From 00343a48d39d9ff74ceb662c5140048295f2610a Mon Sep 17 00:00:00 2001
From: TTrapper <mike.sk.traynor@gmail.com>
Date: Mon, 2 Oct 2017 17:51:09 -0300
Subject: [PATCH 0001/1225] sampled version of
 sparse_softmax_cross_entropy_with_logits

---
 tensorflow/python/ops/nn.py      |  1 +
 tensorflow/python/ops/nn_impl.py | 98 ++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)

diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py
index a80662c8b5..f7edace5b1 100644
--- a/tensorflow/python/ops/nn.py
+++ b/tensorflow/python/ops/nn.py
@@ -90,6 +90,7 @@ See the @{$python/nn} guide.
 @@in_top_k
 @@nce_loss
 @@sampled_softmax_loss
+@@sampled_sparse_softmax_loss
 @@uniform_candidate_sampler
 @@log_uniform_candidate_sampler
 @@learned_unigram_candidate_sampler
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index db8e92831e..b2b57a055f 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -1258,3 +1258,101 @@ def sampled_softmax_loss(weights,
       labels=labels, logits=logits)
   # sampled_losses is a [batch_size] tensor.
   return sampled_losses
+
+
+def sampled_sparse_softmax_loss(weights,
+                                biases,
+                                labels,
+                                inputs,
+                                num_sampled,
+                                num_classes,
+                                sampled_values=None,
+                                remove_accidental_hits=True,
+                                partition_strategy="mod",
+                                name="sampled_sparse_softmax_loss"):
+  """Computes and returns the sampled sparse softmax training loss.
+
+  This is a faster way to train a softmax classifier over a huge number of
+  classes.
+
+  This operation is for training only.  It is generally an underestimate of
+  the full softmax loss.
+
+  A common use case is to use this method for training, and calculate the full
+  softmax loss for evaluation or inference. In this case, you must set
+  `partition_strategy="div"` for the two losses to be consistent, as in the
+  following example:
+
+  ```python
+  if mode == "train":
+    loss = tf.nn.sampled_sparse_softmax_loss(
+        weights=weights,
+        biases=biases,
+        labels=labels,
+        inputs=inputs,
+        ...,
+        partition_strategy="div")
+  elif mode == "eval":
+    logits = tf.matmul(inputs, tf.transpose(weights))
+    logits = tf.nn.bias_add(logits, biases)
+    labels_one_hot = tf.one_hot(labels, n_classes)
+    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=labels_one_hot,
+        logits=logits)
+  ```
+
+  See our [Candidate Sampling Algorithms Reference]
+  (https://www.tensorflow.org/extras/candidate_sampling.pdf)
+
+  Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
+  ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
+
+  Args:
+    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
+        objects whose concatenation along dimension 0 has shape
+        [num_classes, dim].  The (possibly-sharded) class embeddings.
+    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
+    labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`.
+        The index of the single target class for each row of logits.  Note that
+        this format differs from the `labels` argument of
+        `nn.sparse_softmax_cross_entropy_with_logits`.
+    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
+        activations of the input network.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    num_classes: An `int`. The number of possible classes.
+    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
+        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
+        (if None, we default to `log_uniform_candidate_sampler`)
+    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
+        where a sampled class equals one of the target classes.  Default is
+        True.
+    partition_strategy: A string specifying the partitioning strategy, relevant
+        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
+        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `batch_size` 1-D tensor of per-example sampled softmax losses.
+
+  """
+  logits, labels = _compute_sampled_logits(
+      weights=weights,
+      biases=biases,
+      labels=labels,
+      inputs=inputs,
+      num_sampled=num_sampled,
+      num_classes=num_classes,
+      num_true=1,
+      sampled_values=sampled_values,
+      subtract_log_q=True,
+      remove_accidental_hits=remove_accidental_hits,
+      partition_strategy=partition_strategy,
+      name=name)
+
+  # labels returned by _compute_sampled_logits are one_hot. Convert to indices.
+  labels = array_ops.reshape(math_ops.argmax(labels, axis=1), [-1])
+
+  sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  # sampled_losses is a [batch_size] tensor.
+  return sampled_losses
-- 
GitLab


From 499376eb38b6b5b991e330d87c91d879a6f7bbbe Mon Sep 17 00:00:00 2001
From: Daniyar <daniyar.turmukhambetov.10@ucl.ac.uk>
Date: Mon, 2 Oct 2017 20:58:00 +0100
Subject: [PATCH 0002/1225] unpack for int64 tensors on gpu

---
 tensorflow/core/kernels/unpack_op.cc | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc
index 7fd1def1fe..7ece912557 100644
--- a/tensorflow/core/kernels/unpack_op.cc
+++ b/tensorflow/core/kernels/unpack_op.cc
@@ -153,6 +153,12 @@ REGISTER_KERNEL_BUILDER(Name("Unpack")
                             .HostMemory("output")
                             .TypeConstraint<int32>("T"),
                         UnpackOp<CPUDevice, int32>);
+REGISTER_KERNEL_BUILDER(Name("Unpack")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("value")
+                            .HostMemory("output")
+                            .TypeConstraint<int64>("T"),
+                        UnpackOp<CPUDevice, int64>);
 
 #endif  // GOOGLE_CUDA
 
@@ -170,6 +176,12 @@ REGISTER_KERNEL_BUILDER(Name("Unpack")
                             .HostMemory("output")
                             .TypeConstraint<int32>("T"),
                         UnpackOp<CPUDevice, int32>);
+REGISTER_KERNEL_BUILDER(Name("Unpack")
+                            .Device(DEVICE_SYCL)
+                            .HostMemory("value")
+                            .HostMemory("output")
+                            .TypeConstraint<int64>("T"),
+                        UnpackOp<CPUDevice, int64>);
 #undef REGISTER_SYCL
 #endif  // TENSORFLOW_USE_SYCL
 
-- 
GitLab


From 7fe8a6decd3b1c077de5a3cdedff198195b16ee1 Mon Sep 17 00:00:00 2001
From: Daniyar <daniyar.turmukhambetov.10@ucl.ac.uk>
Date: Thu, 5 Oct 2017 14:34:12 +0100
Subject: [PATCH 0003/1225] unstack op tests for dtypes

---
 .../python/kernel_tests/unstack_op_test.py    | 37 ++++++++++++++-----
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/kernel_tests/unstack_op_test.py b/tensorflow/python/kernel_tests/unstack_op_test.py
index c2dcff978a..d937108599 100644
--- a/tensorflow/python/kernel_tests/unstack_op_test.py
+++ b/tensorflow/python/kernel_tests/unstack_op_test.py
@@ -22,6 +22,7 @@ import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.platform import test
@@ -42,15 +43,33 @@ class UnstackOpTest(test.TestCase):
     np.random.seed(7)
     with self.test_session(use_gpu=True):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
-        data = np.random.randn(*shape)
-        # Convert data to a single tensorflow tensor
-        x = constant_op.constant(data)
-        # Unpack into a list of tensors
-        cs = array_ops.unstack(x, num=shape[0])
-        self.assertEqual(type(cs), list)
-        self.assertEqual(len(cs), shape[0])
-        cs = [c.eval() for c in cs]
-        self.assertAllEqual(cs, data)
+        for dtype in [np.bool, np.float16, np.float32, np.float64, np.int32, np.int64]:
+          data = np.random.randn(*shape).astype(dtype)
+          # Convert data to a single tensorflow tensor
+          x = constant_op.constant(data)
+          # Unpack into a list of tensors
+          cs = array_ops.unstack(x, num=shape[0])
+          self.assertEqual(type(cs), list)
+          self.assertEqual(len(cs), shape[0])
+          cs = [c.eval() for c in cs]
+          self.assertAllEqual(cs, data)
+
+  def testSimpleGpu(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+    np.random.seed(7)
+    with self.test_session(use_gpu=True, force_gpu=True):
+      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
+        for dtype in [np.float16, np.float32, np.float64, np.int32, np.int64]:
+          data = np.random.randn(*shape).astype(dtype)
+          # Convert data to a single tensorflow tensor
+          x = constant_op.constant(data)
+          # Unpack into a list of tensors
+          cs = array_ops.unstack(x, num=shape[0])
+          self.assertEqual(type(cs), list)
+          self.assertEqual(len(cs), shape[0])
+          cs = [c.eval() for c in cs]
+          self.assertAllEqual(cs, data)
 
   def testGradientsAxis0(self):
     for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
-- 
GitLab


From 03233a04cf07d639d8d2b5f3fbcab479b267ac4e Mon Sep 17 00:00:00 2001
From: TTrapper <mike.sk.traynor@gmail.com>
Date: Fri, 6 Oct 2017 00:21:08 -0300
Subject: [PATCH 0004/1225] Adressed reviewer comments: moved to contrib, fixed
 erroneous doc, modified _compute_sampled_logits to optionally return target
 indices

---
 tensorflow/contrib/nn/__init__.py             |   1 +
 .../contrib/nn/python/ops/sampling_ops.py     |  97 +++++++++++++
 tensorflow/python/ops/nn.py                   |   1 -
 tensorflow/python/ops/nn_impl.py              | 128 +++---------------
 4 files changed, 120 insertions(+), 107 deletions(-)

diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py
index be0957f473..89b70ddfc2 100644
--- a/tensorflow/contrib/nn/__init__.py
+++ b/tensorflow/contrib/nn/__init__.py
@@ -19,6 +19,7 @@
 @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits
 @@deprecated_flipped_sigmoid_cross_entropy_with_logits
 @@rank_sampled_softmax_loss
+@@sampled_sparse_softmax_loss
 """
 
 from __future__ import absolute_import
diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py
index 2ae529e015..b26da52f01 100644
--- a/tensorflow/contrib/nn/python/ops/sampling_ops.py
+++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py
@@ -24,6 +24,8 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_impl
+from tensorflow.python.ops import nn_ops
 
 
 def _rank_resample(weights, biases, inputs, sampled_values, num_resampled,
@@ -240,3 +242,98 @@ def rank_sampled_softmax_loss(weights,
         remove_accidental_hits=remove_accidental_hits,
         partition_strategy=partition_strategy,
         name=name)
+
+
+def sampled_sparse_softmax_loss(weights,
+                                biases,
+                                labels,
+                                inputs,
+                                num_sampled,
+                                num_classes,
+                                sampled_values=None,
+                                remove_accidental_hits=True,
+                                partition_strategy="mod",
+                                name="sampled_sparse_softmax_loss"):
+  """Computes and returns the sampled sparse softmax training loss.
+
+  This is a faster way to train a softmax classifier over a huge number of
+  classes.
+
+  This operation is for training only.  It is generally an underestimate of
+  the full softmax loss.
+
+  A common use case is to use this method for training, and calculate the full
+  softmax loss for evaluation or inference. In this case, you must set
+  `partition_strategy="div"` for the two losses to be consistent, as in the
+  following example:
+
+  ```python
+  if mode == "train":
+    loss = tf.nn.sampled_sparse_softmax_loss(
+        weights=weights,
+        biases=biases,
+        labels=labels,
+        inputs=inputs,
+        ...,
+        partition_strategy="div")
+  elif mode == "eval":
+    logits = tf.matmul(inputs, tf.transpose(weights))
+    logits = tf.nn.bias_add(logits, biases)
+    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=tf.squeeze(labels),
+        logits=logits)
+  ```
+
+  See our [Candidate Sampling Algorithms Reference]
+  (https://www.tensorflow.org/extras/candidate_sampling.pdf)
+
+  Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
+  ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
+
+  Args:
+    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
+        objects whose concatenation along dimension 0 has shape
+        [num_classes, dim].  The (possibly-sharded) class embeddings.
+    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
+    labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`.
+        The index of the single target class for each row of logits.  Note that
+        this format differs from the `labels` argument of
+        `nn.sparse_softmax_cross_entropy_with_logits`.
+    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
+        activations of the input network.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    num_classes: An `int`. The number of possible classes.
+    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
+        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
+        (if None, we default to `log_uniform_candidate_sampler`)
+    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
+        where a sampled class equals one of the target classes.  Default is
+        True.
+    partition_strategy: A string specifying the partitioning strategy, relevant
+        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
+        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `batch_size` 1-D tensor of per-example sampled softmax losses.
+
+  """
+  logits, labels = nn_impl._compute_sampled_logits(
+      weights=weights,
+      biases=biases,
+      labels=labels,
+      inputs=inputs,
+      num_sampled=num_sampled,
+      num_classes=num_classes,
+      num_true=1,
+      sampled_values=sampled_values,
+      subtract_log_q=True,
+      remove_accidental_hits=remove_accidental_hits,
+      partition_strategy=partition_strategy,
+      labels_as_indices=True,
+      name=name)
+
+  sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
+      labels=array_ops.squeeze(labels), logits=logits)
+  # sampled_losses is a [batch_size] tensor.
+  return sampled_losses
diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py
index f7edace5b1..a80662c8b5 100644
--- a/tensorflow/python/ops/nn.py
+++ b/tensorflow/python/ops/nn.py
@@ -90,7 +90,6 @@ See the @{$python/nn} guide.
 @@in_top_k
 @@nce_loss
 @@sampled_softmax_loss
-@@sampled_sparse_softmax_loss
 @@uniform_candidate_sampler
 @@log_uniform_candidate_sampler
 @@learned_unigram_candidate_sampler
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index b2b57a055f..ad18eedfb0 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -26,6 +26,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import candidate_sampling_ops
 from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
@@ -893,6 +894,7 @@ def _compute_sampled_logits(weights,
                             subtract_log_q=True,
                             remove_accidental_hits=False,
                             partition_strategy="mod",
+                            labels_as_indices=False,
                             name=None):
   """Helper function for nce_loss and sampled_softmax_loss functions.
 
@@ -930,12 +932,18 @@ def _compute_sampled_logits(weights,
     partition_strategy: A string specifying the partitioning strategy, relevant
         if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
         Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
+    labels_as_indices: A `bool`. Whether the returned labels represent the
+        indices of the true classes. Default is `False`.
     name: A name for the operation (optional).
   Returns:
-    out_logits, out_labels: `Tensor` objects each with shape
+    out_logits: `Tensor` object with shape
         `[batch_size, num_true + num_sampled]`, for passing to either
         `nn.sigmoid_cross_entropy_with_logits` (NCE) or
         `nn.softmax_cross_entropy_with_logits` (sampled softmax).
+    out_labels: If `labels_as_indices` is `False`, a Tensor object with the same
+        shape as `out_logits`. Otherwise a `Tensor` of shape
+        `[batch_size, num_true]` with the indices of the target classes for each
+        row of `out_logits`.
   """
 
   if isinstance(weights, variables.PartitionedVariable):
@@ -1046,13 +1054,19 @@ def _compute_sampled_logits(weights,
 
     # Construct output logits and labels. The true labels/logits start at col 0.
     out_logits = array_ops.concat([true_logits, sampled_logits], 1)
-    # true_logits is a float tensor, ones_like(true_logits) is a float tensor
-    # of ones. We then divide by num_true to ensure the per-example labels sum
-    # to 1.0, i.e. form a proper probability distribution.
-    out_labels = array_ops.concat([
-        array_ops.ones_like(true_logits) / num_true,
-        array_ops.zeros_like(sampled_logits)
-    ], 1)
+    if labels_as_indices:
+        # We want each row of labels to be the indices of the targets, which
+        # start at col 0 and end at col num_true-1.
+        out_labels = gen_array_ops.tile(
+                [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1])
+    else:
+        # true_logits is a float tensor, ones_like(true_logits) is a float
+        # tensor of ones. We then divide by num_true to ensure the per-example
+        # labels sum to 1.0, i.e. form a proper probability distribution.
+        out_labels = array_ops.concat([
+            array_ops.ones_like(true_logits) / num_true,
+            array_ops.zeros_like(sampled_logits)
+        ], 1)
 
   return out_logits, out_labels
 
@@ -1258,101 +1272,3 @@ def sampled_softmax_loss(weights,
       labels=labels, logits=logits)
   # sampled_losses is a [batch_size] tensor.
   return sampled_losses
-
-
-def sampled_sparse_softmax_loss(weights,
-                                biases,
-                                labels,
-                                inputs,
-                                num_sampled,
-                                num_classes,
-                                sampled_values=None,
-                                remove_accidental_hits=True,
-                                partition_strategy="mod",
-                                name="sampled_sparse_softmax_loss"):
-  """Computes and returns the sampled sparse softmax training loss.
-
-  This is a faster way to train a softmax classifier over a huge number of
-  classes.
-
-  This operation is for training only.  It is generally an underestimate of
-  the full softmax loss.
-
-  A common use case is to use this method for training, and calculate the full
-  softmax loss for evaluation or inference. In this case, you must set
-  `partition_strategy="div"` for the two losses to be consistent, as in the
-  following example:
-
-  ```python
-  if mode == "train":
-    loss = tf.nn.sampled_sparse_softmax_loss(
-        weights=weights,
-        biases=biases,
-        labels=labels,
-        inputs=inputs,
-        ...,
-        partition_strategy="div")
-  elif mode == "eval":
-    logits = tf.matmul(inputs, tf.transpose(weights))
-    logits = tf.nn.bias_add(logits, biases)
-    labels_one_hot = tf.one_hot(labels, n_classes)
-    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
-        labels=labels_one_hot,
-        logits=logits)
-  ```
-
-  See our [Candidate Sampling Algorithms Reference]
-  (https://www.tensorflow.org/extras/candidate_sampling.pdf)
-
-  Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
-  ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
-
-  Args:
-    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
-        objects whose concatenation along dimension 0 has shape
-        [num_classes, dim].  The (possibly-sharded) class embeddings.
-    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
-    labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`.
-        The index of the single target class for each row of logits.  Note that
-        this format differs from the `labels` argument of
-        `nn.sparse_softmax_cross_entropy_with_logits`.
-    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
-        activations of the input network.
-    num_sampled: An `int`.  The number of classes to randomly sample per batch.
-    num_classes: An `int`. The number of possible classes.
-    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
-        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
-        (if None, we default to `log_uniform_candidate_sampler`)
-    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
-        where a sampled class equals one of the target classes.  Default is
-        True.
-    partition_strategy: A string specifying the partitioning strategy, relevant
-        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
-        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
-    name: A name for the operation (optional).
-
-  Returns:
-    A `batch_size` 1-D tensor of per-example sampled softmax losses.
-
-  """
-  logits, labels = _compute_sampled_logits(
-      weights=weights,
-      biases=biases,
-      labels=labels,
-      inputs=inputs,
-      num_sampled=num_sampled,
-      num_classes=num_classes,
-      num_true=1,
-      sampled_values=sampled_values,
-      subtract_log_q=True,
-      remove_accidental_hits=remove_accidental_hits,
-      partition_strategy=partition_strategy,
-      name=name)
-
-  # labels returned by _compute_sampled_logits are one_hot. Convert to indices.
-  labels = array_ops.reshape(math_ops.argmax(labels, axis=1), [-1])
-
-  sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
-      labels=labels, logits=logits)
-  # sampled_losses is a [batch_size] tensor.
-  return sampled_losses
-- 
GitLab


From 7680d8d00dec8897b64ea864da71537b7be957de Mon Sep 17 00:00:00 2001
From: TTrapper <mike.sk.traynor@gmail.com>
Date: Fri, 6 Oct 2017 00:47:54 -0300
Subject: [PATCH 0005/1225] checkstyle fix

---
 tensorflow/python/ops/nn_impl.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index ad18eedfb0..8e64259143 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -1055,18 +1055,18 @@ def _compute_sampled_logits(weights,
     # Construct output logits and labels. The true labels/logits start at col 0.
     out_logits = array_ops.concat([true_logits, sampled_logits], 1)
     if labels_as_indices:
-        # We want each row of labels to be the indices of the targets, which
-        # start at col 0 and end at col num_true-1.
-        out_labels = gen_array_ops.tile(
-                [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1])
+      # We want each row of labels to be the indices of the targets, which
+      # start at col 0 and end at col num_true-1.
+      out_labels = gen_array_ops.tile(
+          [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1])
     else:
-        # true_logits is a float tensor, ones_like(true_logits) is a float
-        # tensor of ones. We then divide by num_true to ensure the per-example
-        # labels sum to 1.0, i.e. form a proper probability distribution.
-        out_labels = array_ops.concat([
-            array_ops.ones_like(true_logits) / num_true,
-            array_ops.zeros_like(sampled_logits)
-        ], 1)
+      # true_logits is a float tensor, ones_like(true_logits) is a float
+      # tensor of ones. We then divide by num_true to ensure the per-example
+      # labels sum to 1.0, i.e. form a proper probability distribution.
+      out_labels = array_ops.concat([
+          array_ops.ones_like(true_logits) / num_true,
+          array_ops.zeros_like(sampled_logits)
+      ], 1)
 
   return out_logits, out_labels
 
-- 
GitLab


From f300bcbb3419e7ad7130a84d5375ae53d92e1568 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Sun, 22 Oct 2017 21:36:25 +0800
Subject: [PATCH 0006/1225] Propagate -DPCRE_STATIC from pcre.BUILD to
 swig.BUILD

To fix a build error on Windows:
ERROR: C:/os/t/external/swig/BUILD.bazel:5:1: Linking of rule '@swig//:swig'
  failed (Exit 1120): link.exe failed: error executing command
misc.o : error LNK2019: unresolved external symbol __imp_pcre_compile
  referenced in function Swig_string_regex
...
---
 third_party/pcre.BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/pcre.BUILD b/third_party/pcre.BUILD
index 68aadd1d40..e2cdec4029 100644
--- a/third_party/pcre.BUILD
+++ b/third_party/pcre.BUILD
@@ -50,12 +50,12 @@ cc_library(
         "-DNEWLINE=10",
         "-DNO_RECURSE",
         "-DPARENS_NEST_LIMIT=50",
-        "-DPCRE_STATIC=1",
         "-DPOSIX_MALLOC_THRESHOLD=10",
         "-DSTDC_HEADERS=1",
         "-DSUPPORT_UCP",
         "-DSUPPORT_UTF",
     ],
+    defines = ["PCRE_STATIC=1"],
     includes = ["."],
     visibility = ["@swig//:__pkg__"],  # Please use RE2
     alwayslink = 1,
-- 
GitLab


From 40fc0cb0258352b5d00f25bab55a6991b06b959b Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 5 Nov 2017 14:52:42 +0000
Subject: [PATCH 0007/1225] Fix issue in the `Defun` docs

This fix fixes a couple of typos in the `Defun` docs:
`tf.Constant` -> `tf.constant`

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/framework/function.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index cef3f8d4c4..f55ee5b1e1 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -82,8 +82,8 @@ class Defun(object):
     return x + y, x - y
 
   # Building the graph.
-  a = tf.Constant([1.0])
-  b = tf.Constant([2.0])
+  a = tf.constant([1.0])
+  b = tf.constant([2.0])
   c, d = MyFunc(a, b, name='mycall')
   ```
   """
-- 
GitLab


From 1a94310a14d073fbc80d55b211a85e47a2f9c9c6 Mon Sep 17 00:00:00 2001
From: dariavel <daria@mellanox.com>
Date: Thu, 26 Oct 2017 17:06:00 +0300
Subject: [PATCH 0008/1225] Add connectivity check

Ping on each channel and count send+recv completions

Signed-off-by: dariavel <daria@mellanox.com>
---
 tensorflow/contrib/verbs/rdma.cc             | 18 ++--
 tensorflow/contrib/verbs/rdma.h              |  1 +
 tensorflow/contrib/verbs/rdma_mgr.cc         | 93 ++++++++++++++++++++
 tensorflow/contrib/verbs/rdma_mgr.h          |  7 +-
 tensorflow/contrib/verbs/verbs_server_lib.cc |  5 +-
 5 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index 331943a3ef..d99cb34661 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -147,7 +147,7 @@ ibv_device* set_device() {
     // check validity of input device
     CHECK(false) << "The device " << env_p_rdma_device << " wasn't found";
   } else {
-  // set default device
+    // set default device
     str_port_num = get_env_var("RDMA_DEVICE_PORT");
     CHECK(str_port_num.empty())
         << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user";
@@ -177,7 +177,7 @@ ibv_device* set_device() {
 // Returns:
 //   port to use
 uint8_t set_port(ibv_context* context) {
-  uint8_t port_num = 0; //0 is illegal port number
+  uint8_t port_num = 0;  // 0 is illegal port number
   string str_port_num;
   ibv_device_attr device_att;
   ibv_port_attr port_attr;
@@ -419,9 +419,6 @@ RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env)
                       0);
   CHECK(cq_) << "Failed to create completion queue";
   CHECK(!ibv_req_notify_cq(cq_, 0)) << "Failed to request CQ notification";
-  polling_thread_.reset(Env::Default()->StartThread(
-      ThreadOptions(), "RdmaAdapterCQThread", [this] { Process_CQ(); }));
-  VLOG(2) << "Start RdmaAdapter: " << name();
 }
 
 RdmaAdapter::~RdmaAdapter() {
@@ -433,6 +430,12 @@ RdmaAdapter::~RdmaAdapter() {
   CHECK(!ibv_close_device(context_)) << "Failed to release context";
 }
 
+void RdmaAdapter::StartPolling() {
+  polling_thread_.reset(Env::Default()->StartThread(
+      ThreadOptions(), "RdmaAdapterCQThread", [this] { Process_CQ(); }));
+  VLOG(2) << "Start RdmaAdapter: " << name();
+}
+
 string RdmaAdapter::name() const { return string(context_->device->name); }
 
 // Function to process incoming messages
@@ -633,11 +636,6 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
       buffer_index_name_table_.insert({index, buffer_names[i]});
       buffer_name_index_table_.insert({buffer_names[i], index});
     }
-
-    // Initiate recv
-    for (int i = 0; i < 100; i++) {
-      Recv();
-    }
   }
 }
 
diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h
index 52d92a7c5b..2e128961b6 100644
--- a/tensorflow/contrib/verbs/rdma.h
+++ b/tensorflow/contrib/verbs/rdma.h
@@ -107,6 +107,7 @@ class RdmaAdapter {
   ~RdmaAdapter();
   // Adapter name, e.g. mlx5_0.
   string name() const;
+  void StartPolling();
   void Process_CQ();
 
  protected:
diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 09b878843f..b3b3c4f31d 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -115,6 +115,99 @@ void RdmaMgr::SetupChannels() {
   }
 }
 
+#define PING_RECV_WRID 0
+#define PING_BUFF_SIZE 1024
+
+int RdmaMgr::PostRecv(RdmaChannel* rc, struct ibv_sge list) {
+  struct ibv_recv_wr wr, *bad_wr;
+  memset(&wr, 0, sizeof(wr));
+  wr.sg_list = &list;
+  wr.num_sge = 1;
+  wr.wr_id = PING_RECV_WRID;
+
+  return ibv_post_recv(rc->qp_, &wr, &bad_wr);
+}
+
+int RdmaMgr::PostSend(RdmaChannel* rc, struct ibv_sge list) {
+  struct ibv_send_wr wr, *bad_wr;
+  memset(&wr, 0, sizeof(wr));
+  wr.wr_id = (uint64_t)rc;
+  wr.sg_list = &list;
+  wr.num_sge = 1;
+  wr.opcode = IBV_WR_SEND;
+  wr.send_flags = IBV_SEND_SIGNALED;
+
+  return ibv_post_send(rc->qp_, &wr, &bad_wr);
+}
+
+// Check connectivity by pinging every channel
+bool RdmaMgr::ConnectivityCheck() {
+  int i, rcnt = 0, scnt = 0;
+  void* buff;
+  struct ibv_sge list;
+  buff = malloc(PING_BUFF_SIZE);
+  CHECK(buff) << "Malloc failed!";
+  struct ibv_mr* mr = ibv_reg_mr(rdma_adapter_->pd_, buff, PING_BUFF_SIZE,
+                                 IBV_ACCESS_LOCAL_WRITE);
+  CHECK(mr) << "Failed to register memory region";
+
+  memset(&list, 0, sizeof(list));
+  list.addr = (uintptr_t)buff;
+  list.length = PING_BUFF_SIZE;
+  list.lkey = mr->lkey;
+
+  for (const auto& p : channel_table_) {
+    string worker_name = p.first;
+    RdmaChannel* rc = p.second;
+
+    VLOG(2) << "Ping to " << worker_name;
+    CHECK(PostRecv(rc, list) == 0) << "Couldn't post receive from "
+                                   << worker_name << " with error "
+                                   << std::strerror(errno);
+    CHECK(PostSend(rc, list) == 0) << "Couldn't post send  to " << worker_name
+                                   << " with error: " << std::strerror(errno);
+    for (int i = 0; i < 100; i++) {
+      rc->Recv();
+    }
+  }
+
+  while (rcnt < num_remote_workers_ || scnt < num_remote_workers_) {
+    int ne;
+    do {
+      ne = ibv_poll_cq(rdma_adapter_->cq_, 2 * num_remote_workers_,
+                       rdma_adapter_->wc_);
+      CHECK(ne >= 0) << "poll CQ failed " << ne << "with error"
+                     << std::strerror(errno);
+    } while (ne < 1);
+
+    for (i = 0; i < ne; ++i) {
+      ibv_wc_status s = rdma_adapter_->wc_[i].status;
+      // recv complete
+      if ((int)rdma_adapter_->wc_[i].wr_id == PING_RECV_WRID) {
+        CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str(
+                                                  rdma_adapter_->wc_[i].status)
+                                   << "(" << rdma_adapter_->wc_[i].status
+                                   << ") for PING_RECV_WRID";
+        ++rcnt;
+        // send complete
+      } else {
+        RdmaChannel* rc =
+            reinterpret_cast<RdmaChannel*>(rdma_adapter_->wc_[i].wr_id);
+        CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str(
+                                                  rdma_adapter_->wc_[i].status)
+                                   << "(" << rdma_adapter_->wc_[i].status
+                                   << ") to " << rc->remote_name_;
+        ++scnt;
+      }
+    }  // for
+  }    // while
+  CHECK(rcnt == scnt) << "Connectivity check failed!";
+  ibv_dereg_mr(mr);
+  free(buff);
+  rdma_adapter_->StartPolling();
+  return (num_remote_workers_ == rcnt) && (num_remote_workers_ == scnt);
+}
+
 RdmaMgr::~RdmaMgr() {
   for (const auto& p : channel_table_) delete p.second;
   channel_table_.clear();
diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h
index b156f64096..4ace70ba57 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.h
+++ b/tensorflow/contrib/verbs/rdma_mgr.h
@@ -28,12 +28,16 @@ limitations under the License.
 namespace tensorflow {
 
 class RdmaMgr {
+  friend class RdmaChannel;
+  friend class RdmaAdapter;
+
  public:
   explicit RdmaMgr(const WorkerEnv* const worker_env,
                    GrpcChannelCache* const channel_cache);
   ~RdmaMgr();
   RdmaChannel* FindChannel(const string& key);
   void SetupChannels();
+  bool ConnectivityCheck();
   const string& local_worker() { return local_worker_; }
 
  private:
@@ -44,7 +48,8 @@ class RdmaMgr {
   RdmaAdapter* rdma_adapter_;
   typedef std::unordered_map<string, RdmaChannel*> ChannelTable;
   ChannelTable channel_table_;
-
+  int PostSend(RdmaChannel* rc, struct ibv_sge list);
+  int PostRecv(RdmaChannel* rc, struct ibv_sge list);
   TF_DISALLOW_COPY_AND_ASSIGN(RdmaMgr);
 };
 
diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc
index 6d1c79c0fb..a606ef75a4 100644
--- a/tensorflow/contrib/verbs/verbs_server_lib.cc
+++ b/tensorflow/contrib/verbs/verbs_server_lib.cc
@@ -49,8 +49,8 @@ VerbsServer::~VerbsServer() {
 Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def,
                                         GrpcChannelCache** channel_cache) {
   string name_prefix =
-      strings::StrCat("/job:", server_def.job_name(), "/replica:0",
-                      "/task:", server_def.task_index());
+      strings::StrCat("/job:", server_def.job_name(), "/replica:0", "/task:",
+                      server_def.task_index());
 
   GrpcChannelSpec channel_spec;
   TF_RETURN_IF_ERROR(ParseChannelSpec(server_def, &channel_spec));
@@ -103,6 +103,7 @@ Status VerbsServer::Start() {
           ThreadOptions(), "TF_verbs_service",
           [this] { verbs_service_->HandleRPCsLoop(); }));
       rdma_mgr_->SetupChannels();
+      CHECK(rdma_mgr_->ConnectivityCheck()) << "Connectivity check failed!";
       verbs_state_ = CONNECTED;
     }
   }
-- 
GitLab


From 734237891314132631bdd8adf03b8d7827f9c4ae Mon Sep 17 00:00:00 2001
From: dariavel <daria@mellanox.com>
Date: Tue, 31 Oct 2017 14:11:14 +0200
Subject: [PATCH 0009/1225] Move PostSend and PostRecv from mgr to channel,
 PostRecv upon channel creation before connectivity check

Signed-off-by: dariavel <daria@mellanox.com>
---
 tensorflow/contrib/verbs/rdma.cc     | 39 ++++++++++++++++++++++++
 tensorflow/contrib/verbs/rdma.h      |  9 ++++++
 tensorflow/contrib/verbs/rdma_mgr.cc | 45 ++--------------------------
 tensorflow/contrib/verbs/rdma_mgr.h  |  2 --
 4 files changed, 50 insertions(+), 45 deletions(-)

diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index d99cb34661..55a8f20c29 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -561,9 +561,44 @@ void RdmaAdapter::Process_CQ() {
   }
 }
 
+int RdmaChannel::PingPostRecv() {
+  struct ibv_recv_wr wr, *bad_wr;
+  memset(&wr, 0, sizeof(wr));
+  wr.sg_list = &ping_sge_list_;
+  wr.num_sge = 1;
+  wr.wr_id = PingRecvWrid;
+
+  return ibv_post_recv(qp_, &wr, &bad_wr);
+}
+
+int RdmaChannel::PingPostSend() {
+  struct ibv_send_wr wr, *bad_wr;
+  memset(&wr, 0, sizeof(wr));
+  wr.wr_id = (uint64_t)this;
+  wr.sg_list = &ping_sge_list_;
+  wr.num_sge = 1;
+  wr.opcode = IBV_WR_SEND;
+  wr.send_flags = IBV_SEND_SIGNALED;
+
+  return ibv_post_send(qp_, &wr, &bad_wr);
+}
+
 RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
                          const string remote_name)
     : adapter_(adapter), local_name_(local_name), remote_name_(remote_name) {
+
+  struct ibv_sge list;
+
+  mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, PingBuffSize,
+                                 IBV_ACCESS_LOCAL_WRITE);
+  CHECK(mr_) << "Failed to register memory region";
+
+  memset(&list, 0, sizeof(list));
+  list.addr = (uintptr_t)ping_buff_;
+  list.length = PingBuffSize;
+  list.lkey = mr_->lkey;
+
+  ping_sge_list_ = list;
   // Create queue pair
   {
     struct ibv_qp_init_attr attr;
@@ -637,9 +672,13 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
       buffer_name_index_table_.insert({buffer_names[i], index});
     }
   }
+  CHECK(PingPostRecv() == 0) << "Couldn't post receive from "
+                             << remote_name_ << " with error "
+                             << std::strerror(errno);
 }
 
 RdmaChannel::~RdmaChannel() {
+  ibv_dereg_mr(mr_);
   CHECK(!ibv_destroy_qp(qp_)) << "Failed to destroy QP";
   delete tx_message_buffer_;
   delete rx_message_buffer_;
diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h
index 2e128961b6..92391d6a57 100644
--- a/tensorflow/contrib/verbs/rdma.h
+++ b/tensorflow/contrib/verbs/rdma.h
@@ -162,6 +162,15 @@ class RdmaChannel {
   void RemoveRecvCallback(const string& key);
   void RunRecvCallback(const string& key);
   static const int kNumMessageBuffers = 4;
+  static const int PingRecvWrid = 0;
+
+ private:
+  static const int PingBuffSize = 1024;
+  char ping_buff_[PingBuffSize];
+  struct ibv_mr* mr_;
+  struct ibv_sge ping_sge_list_;
+  int PingPostRecv();
+  int PingPostSend();
 
  protected:
   const RdmaAdapter* adapter_;
diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index b3b3c4f31d..3e2171f33d 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -115,56 +115,17 @@ void RdmaMgr::SetupChannels() {
   }
 }
 
-#define PING_RECV_WRID 0
-#define PING_BUFF_SIZE 1024
-
-int RdmaMgr::PostRecv(RdmaChannel* rc, struct ibv_sge list) {
-  struct ibv_recv_wr wr, *bad_wr;
-  memset(&wr, 0, sizeof(wr));
-  wr.sg_list = &list;
-  wr.num_sge = 1;
-  wr.wr_id = PING_RECV_WRID;
-
-  return ibv_post_recv(rc->qp_, &wr, &bad_wr);
-}
-
-int RdmaMgr::PostSend(RdmaChannel* rc, struct ibv_sge list) {
-  struct ibv_send_wr wr, *bad_wr;
-  memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t)rc;
-  wr.sg_list = &list;
-  wr.num_sge = 1;
-  wr.opcode = IBV_WR_SEND;
-  wr.send_flags = IBV_SEND_SIGNALED;
-
-  return ibv_post_send(rc->qp_, &wr, &bad_wr);
-}
 
 // Check connectivity by pinging every channel
 bool RdmaMgr::ConnectivityCheck() {
   int i, rcnt = 0, scnt = 0;
-  void* buff;
-  struct ibv_sge list;
-  buff = malloc(PING_BUFF_SIZE);
-  CHECK(buff) << "Malloc failed!";
-  struct ibv_mr* mr = ibv_reg_mr(rdma_adapter_->pd_, buff, PING_BUFF_SIZE,
-                                 IBV_ACCESS_LOCAL_WRITE);
-  CHECK(mr) << "Failed to register memory region";
-
-  memset(&list, 0, sizeof(list));
-  list.addr = (uintptr_t)buff;
-  list.length = PING_BUFF_SIZE;
-  list.lkey = mr->lkey;
 
   for (const auto& p : channel_table_) {
     string worker_name = p.first;
     RdmaChannel* rc = p.second;
 
     VLOG(2) << "Ping to " << worker_name;
-    CHECK(PostRecv(rc, list) == 0) << "Couldn't post receive from "
-                                   << worker_name << " with error "
-                                   << std::strerror(errno);
-    CHECK(PostSend(rc, list) == 0) << "Couldn't post send  to " << worker_name
+    CHECK(rc->PingPostSend() == 0) << "Couldn't post send  to " << worker_name
                                    << " with error: " << std::strerror(errno);
     for (int i = 0; i < 100; i++) {
       rc->Recv();
@@ -183,7 +144,7 @@ bool RdmaMgr::ConnectivityCheck() {
     for (i = 0; i < ne; ++i) {
       ibv_wc_status s = rdma_adapter_->wc_[i].status;
       // recv complete
-      if ((int)rdma_adapter_->wc_[i].wr_id == PING_RECV_WRID) {
+      if ((int)rdma_adapter_->wc_[i].wr_id == RdmaChannel::PingRecvWrid) {
         CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str(
                                                   rdma_adapter_->wc_[i].status)
                                    << "(" << rdma_adapter_->wc_[i].status
@@ -202,8 +163,6 @@ bool RdmaMgr::ConnectivityCheck() {
     }  // for
   }    // while
   CHECK(rcnt == scnt) << "Connectivity check failed!";
-  ibv_dereg_mr(mr);
-  free(buff);
   rdma_adapter_->StartPolling();
   return (num_remote_workers_ == rcnt) && (num_remote_workers_ == scnt);
 }
diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h
index 4ace70ba57..e711e60478 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.h
+++ b/tensorflow/contrib/verbs/rdma_mgr.h
@@ -48,8 +48,6 @@ class RdmaMgr {
   RdmaAdapter* rdma_adapter_;
   typedef std::unordered_map<string, RdmaChannel*> ChannelTable;
   ChannelTable channel_table_;
-  int PostSend(RdmaChannel* rc, struct ibv_sge list);
-  int PostRecv(RdmaChannel* rc, struct ibv_sge list);
   TF_DISALLOW_COPY_AND_ASSIGN(RdmaMgr);
 };
 
-- 
GitLab


From 097d536c02d5e9f8ab0c2269161343471c2a00fe Mon Sep 17 00:00:00 2001
From: dariavel <daria@mellanox.com>
Date: Mon, 9 Oct 2017 15:54:32 +0300
Subject: [PATCH 0010/1225] Call done in case of not OK status fix + light code
 refactoring

Signed-off-by: dariavel <daria@mellanox.com>
---
 .../contrib/verbs/rdma_rendezvous_mgr.cc      | 40 +++++++------------
 1 file changed, 14 insertions(+), 26 deletions(-)

diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
index ce82ca2883..2bfa81c2ae 100644
--- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
@@ -58,20 +58,13 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
   // parse src_name and dst_name
   string src_name, dst_name, unused;
   if (!DeviceNameUtils::SplitDeviceName(parsed.src_device, &src_name,
+                                        &unused) ||
+      !DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name,
                                         &unused)) {
     s = errors::Internal("Could not parse src name.");
   }
-  CHECK(s.ok()) << "s is not ok, error code " << s.error_message();
-  if (!s.ok()) {
-    done(s, Args(), recv_args, Tensor{}, false);
-    return;
-  }
-  if (!DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name,
-                                        &unused)) {
-    s = errors::Internal("Could not parse dst name.");
-  }
-  CHECK(s.ok()) << "s is not ok, error code " << s.error_message();
   if (!s.ok()) {
+    LOG(ERROR) << "s is not ok, error code " << s.error_message();
     done(s, Args(), recv_args, Tensor{}, false);
     return;
   }
@@ -82,18 +75,13 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
   // insert callback
   rc->InsertRecvCallback(key_with_step_id, [this, key, key_with_step_id, rc,
                                             recv_args, parsed, done]() {
-    Status s;
-    Device* src_dev;
-    s = env_->device_mgr->LookupDevice("CPU:0", &src_dev);
-    CHECK(s.ok()) << "s is not ok, error code " << s.error_message();
-    if (!s.ok()) {
-      done(s, Args(), recv_args, Tensor(), true);
-      return;
-    }
-    Device* dst_dev;
-    s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_dev);
-    CHECK(s.ok()) << "s is not ok, error code " << s.error_message();
-    if (!s.ok()) {
+    Status src_s, dst_s, s;
+    Device* src_dev, *dst_dev;
+    src_s = env_->device_mgr->LookupDevice("CPU:0", &src_dev);
+    dst_s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_dev);
+    if (!src_s.ok() || !dst_s.ok()) {
+      s = src_s.ok() ? dst_s : src_s;
+      LOG(ERROR) << "s is not ok, error code " << s.error_message();
       done(s, Args(), recv_args, Tensor(), true);
       return;
     }
@@ -111,8 +99,8 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
         if (dst_dev->tensorflow_gpu_device_info() &&
             (!recv_args.alloc_attrs.on_host())) {
           CHECK(recv_args.device_context)
-            << "send dev name: " << src_dev->name()
-            << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
+              << "send dev name: " << src_dev->name()
+              << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
           Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0);
           Tensor copy(alloc, rm.data_type_, rm.tensor_shape_);
           memcpy(DMAHelper::base(&copy), input, rm.tensor_bytes_);
@@ -122,8 +110,8 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
 
           GPUUtil::CopyCPUTensorToGPU(
               &copy, recv_args.device_context, dst_dev, &gpu_copy,
-              [this, gpu_copy, key, key_with_step_id, recv_args, done, rm,
-               rc](const Status& s) {
+              [this, gpu_copy, key, key_with_step_id, recv_args, done, rm, rc](
+                  const Status& s) {
                 CHECK(s.ok()) << "copy tensor to gpu sync";
                 Tensor val;
                 val = std::move(gpu_copy);
-- 
GitLab


From 7edaa93308d7b4d03dd32c009c1ffe5847b9a8b8 Mon Sep 17 00:00:00 2001
From: Noa Ezra <noae@mellanox.com>
Date: Wed, 18 Oct 2017 10:25:04 +0300
Subject: [PATCH 0011/1225] fix compilation error when working without cuda

Signed-off-by: Noa Ezra <noae@mellanox.com>

adding cuda library to BUILD file in order to use GOOGLE_CUDA define

Signed-off-by: Noa Ezra <noae@mellanox.com>
---
 tensorflow/contrib/verbs/BUILD                  | 6 ++++--
 tensorflow/contrib/verbs/rdma.cc                | 5 +++++
 tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc | 4 ++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD
index 746ff38b37..8b38fc1e85 100644
--- a/tensorflow/contrib/verbs/BUILD
+++ b/tensorflow/contrib/verbs/BUILD
@@ -7,6 +7,8 @@ package(default_visibility = [
 
 licenses(["notice"])  # Apache 2.0
 
+load("//tensorflow:tensorflow.bzl","tf_cuda_library")
+
 exports_files(["LICENSE"])
 
 filegroup(
@@ -97,7 +99,7 @@ cc_library(
     alwayslink = 1,
 )
 
-cc_library(
+tf_cuda_library(
     name = "rdma_rendezvous_mgr",
     srcs = ["rdma_rendezvous_mgr.cc"],
     hdrs = ["rdma_rendezvous_mgr.h"],
@@ -130,7 +132,7 @@ cc_library(
     ],
 )
 
-cc_library(
+tf_cuda_library(
     name = "rdma",
     srcs = ["rdma.cc"],
     hdrs = ["rdma.h"],
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index 55a8f20c29..79c6c1ab07 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -21,8 +21,10 @@ limitations under the License.
 #include "tensorflow/contrib/verbs/verbs_util.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
+#if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/gpu/process_state.h"
+#endif
 #include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h"
 #include "tensorflow/core/distributed_runtime/session_mgr.h"
 #include "tensorflow/core/framework/rendezvous.h"
@@ -31,6 +33,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 
 namespace tensorflow {
 
@@ -1063,6 +1066,7 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
     TensorProto proto;
     if (src_dev->tensorflow_gpu_device_info() &&
         (!send_args.alloc_attrs.on_host())) {
+#if GOOGLE_CUDA
       CHECK(send_args.device_context) << "send dev name: " << src_dev->name()
                                       << " gpu_info: "
                                       << src_dev->tensorflow_gpu_device_info();
@@ -1101,6 +1105,7 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
                                  &proto, NULL, send_args, recv_args);
             });
       }
+#endif  // GOOGLE_CUDA
     } else {
       // tensor is in CPU memory.
       StringPiece copy_buf;
diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
index 2bfa81c2ae..dbb3d25f45 100644
--- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
@@ -21,8 +21,10 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
+#if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/gpu/process_state.h"
+#endif  // GOOGLE_CUDA
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -98,6 +100,7 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
       if (can_memcpy) {
         if (dst_dev->tensorflow_gpu_device_info() &&
             (!recv_args.alloc_attrs.on_host())) {
+#if GOOGLE_CUDA
           CHECK(recv_args.device_context)
               << "send dev name: " << src_dev->name()
               << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
@@ -118,6 +121,7 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
                 RecvPostCopyOps(key, key_with_step_id, recv_args, done, rm, rc,
                                 val, s);
               });
+#endif  // GOOGLE_CUDA
           return;
         } else {
           AllocatorAttributes host_alloc_attrs;
-- 
GitLab


From d7dce09a100e29f63f2ac20740a061c9aaf27654 Mon Sep 17 00:00:00 2001
From: dariavel <daria@mellanox.com>
Date: Mon, 6 Nov 2017 11:38:15 +0200
Subject: [PATCH 0012/1225] Replace hardcoded 100 with RDMA_QP_QUEUE_DEPTH

Signed-off-by: dariavel <daria@mellanox.com>
---
 tensorflow/contrib/verbs/rdma_mgr.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 3e2171f33d..8d26e022d0 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -127,7 +127,7 @@ bool RdmaMgr::ConnectivityCheck() {
     VLOG(2) << "Ping to " << worker_name;
     CHECK(rc->PingPostSend() == 0) << "Couldn't post send  to " << worker_name
                                    << " with error: " << std::strerror(errno);
-    for (int i = 0; i < 100; i++) {
+    for (i = 0; i < rc->adapter_->params_.queue_depth - 1; i++) {
       rc->Recv();
     }
   }
-- 
GitLab


From d6b267ac78fcb6a3250c24d466e8aa478c1fc783 Mon Sep 17 00:00:00 2001
From: dariavel <daria@mellanox.com>
Date: Tue, 7 Nov 2017 11:57:50 +0200
Subject: [PATCH 0013/1225] Clang formating

Signed-off-by: dariavel <daria@mellanox.com>
---
 tensorflow/contrib/verbs/BUILD       | 2 +-
 tensorflow/contrib/verbs/rdma.cc     | 9 ++++-----
 tensorflow/contrib/verbs/rdma_mgr.cc | 1 -
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD
index 8b38fc1e85..38a84ffb10 100644
--- a/tensorflow/contrib/verbs/BUILD
+++ b/tensorflow/contrib/verbs/BUILD
@@ -7,7 +7,7 @@ package(default_visibility = [
 
 licenses(["notice"])  # Apache 2.0
 
-load("//tensorflow:tensorflow.bzl","tf_cuda_library")
+load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
 
 exports_files(["LICENSE"])
 
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index 79c6c1ab07..1fa98a1f01 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -577,7 +577,7 @@ int RdmaChannel::PingPostRecv() {
 int RdmaChannel::PingPostSend() {
   struct ibv_send_wr wr, *bad_wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t)this;
+  wr.wr_id = (uint64_t) this;
   wr.sg_list = &ping_sge_list_;
   wr.num_sge = 1;
   wr.opcode = IBV_WR_SEND;
@@ -593,7 +593,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
   struct ibv_sge list;
 
   mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, PingBuffSize,
-                                 IBV_ACCESS_LOCAL_WRITE);
+                   IBV_ACCESS_LOCAL_WRITE);
   CHECK(mr_) << "Failed to register memory region";
 
   memset(&list, 0, sizeof(list));
@@ -675,9 +675,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
       buffer_name_index_table_.insert({buffer_names[i], index});
     }
   }
-  CHECK(PingPostRecv() == 0) << "Couldn't post receive from "
-                             << remote_name_ << " with error "
-                             << std::strerror(errno);
+  CHECK(PingPostRecv() == 0) << "Couldn't post receive from " << remote_name_
+                             << " with error " << std::strerror(errno);
 }
 
 RdmaChannel::~RdmaChannel() {
diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 8d26e022d0..e7df0528b5 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -115,7 +115,6 @@ void RdmaMgr::SetupChannels() {
   }
 }
 
-
 // Check connectivity by pinging every channel
 bool RdmaMgr::ConnectivityCheck() {
   int i, rcnt = 0, scnt = 0;
-- 
GitLab


From f552fb90e94ccfb72475327553c968412282eb26 Mon Sep 17 00:00:00 2001
From: Alex Rothberg <agrothberg@gmail.com>
Date: Tue, 7 Nov 2017 22:04:16 -0500
Subject: [PATCH 0014/1225] update create_train_op to use get_global_step

---
 tensorflow/contrib/training/python/training/training.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/training/python/training/training.py b/tensorflow/contrib/training/python/training/training.py
index 6a4d79796d..59f02fa38f 100644
--- a/tensorflow/contrib/training/python/training/training.py
+++ b/tensorflow/contrib/training/python/training/training.py
@@ -255,6 +255,7 @@ from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import optimizer as tf_optimizer
+from tensorflow.python.training import training_util
 
 # TODO(nsilberman): move add_gradients_summaries, clip_gradient_norms and
 # multiply_gradients into contrib/summaries and contrib/optimizers.py
@@ -409,7 +410,7 @@ def create_train_op(total_loss,
       loss value.
   """
   if global_step is _USE_GLOBAL_STEP:
-    global_step = variables.get_or_create_global_step()
+    global_step = training_util.get_global_step()
 
   # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None.
   global_update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
-- 
GitLab


From 56e0d5e0d8dab578f1c9ef723772ac79e9fc9583 Mon Sep 17 00:00:00 2001
From: Jay Young <yangjian@patsnap.com>
Date: Wed, 8 Nov 2017 16:22:59 +0800
Subject: [PATCH 0015/1225] [FIX]the estimator generate by
 tf.keras.model_to_estimator() cannot export saved_model because the model_fn
 provided by _create_keras_model_fn wasn't set export_outputs in the returned
 EstimatorSpec. Here I provide a default export_outputs with serve_default key
 and Predict API, and the result inside is same as predictions
 [FIX]_save_first_checkpoint call saver.save with only a path and without
 filename, that make the ckpt saved with name like `{model_dir}/.meta` and
 `{model_dir}/.index`, which can not be found by
 latest_checkpoint("{model_dir}"). As state by save method of Saver, save_path
 should be a path to the checkpoint name. So to fix this, I change the name to
 `{model_dir}/keras_model.ckpt`

---
 tensorflow/python/keras/_impl/keras/estimator.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py
index 125e63e1b8..a2a2fe0ead 100644
--- a/tensorflow/python/keras/_impl/keras/estimator.py
+++ b/tensorflow/python/keras/_impl/keras/estimator.py
@@ -19,10 +19,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 
 from tensorflow.python.client import session
 from tensorflow.python.estimator import estimator as estimator_lib
 from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator import export as export_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
@@ -33,6 +35,9 @@ from tensorflow.python.ops import metrics as metrics_module
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import saver as saver_lib
 from tensorflow.python.training import training_util
+from tensorflow.python.saved_model import signature_constants
+
+_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
 
 
 def _create_ordered_io(keras_model, estimator_io_dict, is_input=True):
@@ -184,7 +189,10 @@ def _create_keras_model_fn(keras_model, custom_objects=None):
         predictions=predictions,
         loss=loss,
         train_op=train_op,
-        eval_metric_ops=eval_metric_ops)
+        eval_metric_ops=eval_metric_ops,
+        export_outputs={
+            _DEFAULT_SERVING_KEY: export_lib.export_output.PredictOutput(predictions)
+        })
 
   return model_fn
 
@@ -222,7 +230,7 @@ def _save_first_checkpoint(keras_model, estimator, custom_objects,
           K._initialize_variables(sess)
           # pylint: enable=protected-access
         saver = saver_lib.Saver()
-        saver.save(sess, estimator.model_dir + '/')
+        saver.save(sess, os.path.join(estimator.model_dir, 'keras_model.ckpt'))
 
 
 def model_to_estimator(keras_model=None,
-- 
GitLab


From 5de6f68848b8bc431e18a53fa03700820bcee57f Mon Sep 17 00:00:00 2001
From: Cameron Thomas <cthom055@gold.ac.uk>
Date: Thu, 9 Nov 2017 01:19:51 +0000
Subject: [PATCH 0016/1225] Forward declare condition_variable

Necessary to enable friendship with mutex
---
 tensorflow/core/platform/default/mutex.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/core/platform/default/mutex.h b/tensorflow/core/platform/default/mutex.h
index c3e44c42d9..044c754e80 100644
--- a/tensorflow/core/platform/default/mutex.h
+++ b/tensorflow/core/platform/default/mutex.h
@@ -31,6 +31,8 @@ namespace tensorflow {
 
 enum LinkerInitialized { LINKER_INITIALIZED };
 
+class condition_variable;
+
 // Mimic std::mutex + C++17's shared_mutex, adding a LinkerInitialized
 // constructor interface.  This type is as fast as mutex, but is also a shared
 // lock.
-- 
GitLab


From b58ee215e631b9c2a0400cbd5b52ea7a3a8bfca0 Mon Sep 17 00:00:00 2001
From: PW486 <ooqwe486@gmail.com>
Date: Thu, 9 Nov 2017 19:12:41 +0900
Subject: [PATCH 0017/1225] Fixed typos, comments

---
 tensorflow/core/public/session.h                 | 2 +-
 tensorflow/core/util/saved_tensor_slice.proto    | 2 +-
 tensorflow/core/util/strided_slice_op.cc         | 4 ++--
 tensorflow/core/util/tensor_slice_reader.h       | 1 -
 tensorflow/core/util/tensor_slice_reader_cache.h | 1 -
 tensorflow/core/util/tensor_slice_writer.h       | 1 -
 6 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/public/session.h b/tensorflow/core/public/session.h
index bca384e59f..75ad50f6f2 100644
--- a/tensorflow/core/public/session.h
+++ b/tensorflow/core/public/session.h
@@ -186,7 +186,7 @@ class Session {
   /// the `SessionOptions::target` field).
   virtual Status Close() = 0;
 
-  // NOTE(ashankar): As of July 2017, this method was added to faciliate some
+  // NOTE(ashankar): As of July 2017, this method was added to facilitate some
   // experimentation. Reconsider/re-evaluate after September 2017.
   //
   // Sets `*output` to the `DeviceMgr` that owns accessible devices in the
diff --git a/tensorflow/core/util/saved_tensor_slice.proto b/tensorflow/core/util/saved_tensor_slice.proto
index 6278685957..8a6dd7bdb7 100644
--- a/tensorflow/core/util/saved_tensor_slice.proto
+++ b/tensorflow/core/util/saved_tensor_slice.proto
@@ -1,7 +1,7 @@
 // Protocol buffers for saved tensor slices. It's used for the brain tensor
 // ops checkpoints and the V3 checkpoints in dist_belief.
 
-// A checkpoint file is an sstable. The value for each record is a serialized
+// A checkpoint file is a stable. The value for each record is a serialized
 // SavedTensorSlices message (defined below).
 //
 // Each checkpoint file has a record with the empty key (""), which corresponds
diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc
index cfe9275a09..d5bc676a9a 100644
--- a/tensorflow/core/util/strided_slice_op.cc
+++ b/tensorflow/core/util/strided_slice_op.cc
@@ -218,8 +218,8 @@ Status ValidateStridedSliceOp(
 
   // Step 2: Make a sparse spec into a full index spec
   //
-  // The sparse spec does not corresopnds to the number of dimensions
-  // Make a dense spec that corresponds to thte number of dimensions
+  // The sparse spec does not corresponds to the number of dimensions
+  // Make a dense spec that corresponds to the number of dimensions
   //
   // For example suppose foo[...,3:] on foo.shape=(2,2,3) then
   // we need to produce the missing begin_mask for the first two
diff --git a/tensorflow/core/util/tensor_slice_reader.h b/tensorflow/core/util/tensor_slice_reader.h
index 4bb2b24615..263f56c7fc 100644
--- a/tensorflow/core/util/tensor_slice_reader.h
+++ b/tensorflow/core/util/tensor_slice_reader.h
@@ -15,7 +15,6 @@ limitations under the License.
 
 // The utility to read checkpoints for google brain tensor ops and v3
 // checkpoints for dist_belief.
-//
 
 #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_READER_H_
 #define TENSORFLOW_UTIL_TENSOR_SLICE_READER_H_
diff --git a/tensorflow/core/util/tensor_slice_reader_cache.h b/tensorflow/core/util/tensor_slice_reader_cache.h
index bdd36a2791..63a8d0b068 100644
--- a/tensorflow/core/util/tensor_slice_reader_cache.h
+++ b/tensorflow/core/util/tensor_slice_reader_cache.h
@@ -15,7 +15,6 @@ limitations under the License.
 
 // The utility to read checkpoints for google brain tensor ops and v3
 // checkpoints for dist_belief.
-//
 
 #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_READER_CACHE_H_
 #define TENSORFLOW_UTIL_TENSOR_SLICE_READER_CACHE_H_
diff --git a/tensorflow/core/util/tensor_slice_writer.h b/tensorflow/core/util/tensor_slice_writer.h
index 95d6384afe..bdb4921e1b 100644
--- a/tensorflow/core/util/tensor_slice_writer.h
+++ b/tensorflow/core/util/tensor_slice_writer.h
@@ -15,7 +15,6 @@ limitations under the License.
 
 // The utility to write checkpoints for google brain tensor ops and v3
 // checkpoints for dist_belief.
-//
 
 #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_WRITER_H_
 #define TENSORFLOW_UTIL_TENSOR_SLICE_WRITER_H_
-- 
GitLab


From c25cd200ddb2728aec1302f655ff220b08d60007 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Thu, 9 Nov 2017 19:23:07 +0900
Subject: [PATCH 0018/1225] CMake: configure default string values of options
 properly

Because cmake configures defaults values as ON or OFF only,
string values as default doesn't work.

Thus, when it is set "OFF", we need to re-set the values.

Fixes #14400

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 tensorflow/contrib/cmake/CMakeLists.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 77a3fc0c83..846daf3213 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -53,7 +53,15 @@ if (NOT WIN32)
     set(tensorflow_CUDNN_INCLUDE /usr/include)
   endif (NOT tensorflow_CUDNN_INCLUDE)
   option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB})
+  if (NOT tensorflow_PATH_CUDNN_STATIC_LIB)
+    # option's default value is OFF. Fill it with real default values
+    set (tensorflow_PATH_CUDNN_STATIC_LIB ${tensorflow_PATH_STATIC_LIB})
+  endif (NOT tensorflow_PATH_CUDNN_STATIC_LIB)
   option(tensorflow_PATH_NCCL_STATIC_LIB "Override PATH_STATIC_LIB for libnccl_static.a" ${tensorflow_PATH_STATIC_LIB})
+  if (NOT tensorflow_PATH_NCCL_STATIC_LIB)
+    # option's default value is OFF. Fill it with real default values
+    set (tensorflow_PATH_NCCL_STATIC_LIB ${tensorflow_PATH_STATIC_LIB})
+  endif (NOT tensorflow_PATH_NCCL_STATIC_LIB)
   option(tensorflow_CUDA_LIBRARY_PATH "Designate the default CUDA library paths" /usr/local/cuda/lib64)
   if (NOT tensorflow_CUDA_LIBRARY_PATH)
     # option's default value is OFF. Fill it with real default values
-- 
GitLab


From d45f27d4586ef2d2dcc405eaac97b1515dad9671 Mon Sep 17 00:00:00 2001
From: PW486 <ooqwe486@gmail.com>
Date: Thu, 9 Nov 2017 23:05:38 +0900
Subject: [PATCH 0019/1225] Fixed typos, comments

---
 tensorflow/core/util/saved_tensor_slice.proto | 2 +-
 tensorflow/core/util/strided_slice_op.cc      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/util/saved_tensor_slice.proto b/tensorflow/core/util/saved_tensor_slice.proto
index 8a6dd7bdb7..6278685957 100644
--- a/tensorflow/core/util/saved_tensor_slice.proto
+++ b/tensorflow/core/util/saved_tensor_slice.proto
@@ -1,7 +1,7 @@
 // Protocol buffers for saved tensor slices. It's used for the brain tensor
 // ops checkpoints and the V3 checkpoints in dist_belief.
 
-// A checkpoint file is a stable. The value for each record is a serialized
+// A checkpoint file is an sstable. The value for each record is a serialized
 // SavedTensorSlices message (defined below).
 //
 // Each checkpoint file has a record with the empty key (""), which corresponds
diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc
index d5bc676a9a..f0264c0a9d 100644
--- a/tensorflow/core/util/strided_slice_op.cc
+++ b/tensorflow/core/util/strided_slice_op.cc
@@ -218,8 +218,8 @@ Status ValidateStridedSliceOp(
 
   // Step 2: Make a sparse spec into a full index spec
   //
-  // The sparse spec does not corresponds to the number of dimensions
-  // Make a dense spec that corresponds to the number of dimensions
+  // The sparse spec does not correspond to the number of dimensions
+  // Make a dense spec that correspond to the number of dimensions
   //
   // For example suppose foo[...,3:] on foo.shape=(2,2,3) then
   // we need to produce the missing begin_mask for the first two
-- 
GitLab


From 6f6eb52a89ec6e360d8604fa68516cf2d819207f Mon Sep 17 00:00:00 2001
From: PW486 <ooqwe486@gmail.com>
Date: Thu, 9 Nov 2017 23:10:41 +0900
Subject: [PATCH 0020/1225] Fixed typos, comments

---
 tensorflow/core/util/strided_slice_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc
index f0264c0a9d..aca60b942d 100644
--- a/tensorflow/core/util/strided_slice_op.cc
+++ b/tensorflow/core/util/strided_slice_op.cc
@@ -219,7 +219,7 @@ Status ValidateStridedSliceOp(
   // Step 2: Make a sparse spec into a full index spec
   //
   // The sparse spec does not correspond to the number of dimensions
-  // Make a dense spec that correspond to the number of dimensions
+  // Make a dense spec that corresponds to the number of dimensions
   //
   // For example suppose foo[...,3:] on foo.shape=(2,2,3) then
   // we need to produce the missing begin_mask for the first two
-- 
GitLab


From 17626168cb05e9edc6cbbd57d04c1da8a43ecfb2 Mon Sep 17 00:00:00 2001
From: PW486 <ooqwe486@gmail.com>
Date: Fri, 10 Nov 2017 12:33:44 +0900
Subject: [PATCH 0021/1225] Fixed typos, comments

---
 tensorflow/contrib/batching/shared_batch_scheduler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/batching/shared_batch_scheduler.h b/tensorflow/contrib/batching/shared_batch_scheduler.h
index 41a3f99137..1853827dc0 100644
--- a/tensorflow/contrib/batching/shared_batch_scheduler.h
+++ b/tensorflow/contrib/batching/shared_batch_scheduler.h
@@ -63,7 +63,7 @@ namespace serving {
 // instead of N independent ones, with their sharing deliberately coordinated.
 //
 // SharedBatchScheduler does not implement the BatchScheduler API; rather, it
-// presents an abstraction of "queues", where each queue coresponds to one type
+// presents an abstraction of "queues", where each queue corresponds to one type
 // of task. Tasks submitted to a given queue are placed in their own batches,
 // and cannot be mixed with other tasks. Queues can be added and deleted
 // dynamically, to accommodate e.g. versions of a model being brought up and
-- 
GitLab


From e058a030f88f19a60e3a4d5ed6b5cbcf85b1a5d6 Mon Sep 17 00:00:00 2001
From: PW486 <ooqwe486@gmail.com>
Date: Fri, 10 Nov 2017 14:19:31 +0900
Subject: [PATCH 0022/1225] Fixed typos

---
 tensorflow/c/c_test_util.h                                  | 2 +-
 tensorflow/compiler/xla/client/computation_builder.h        | 2 +-
 tensorflow/contrib/boosted_trees/lib/utils/batch_features.h | 2 +-
 tensorflow/core/grappler/costs/virtual_placer.h             | 2 +-
 tensorflow/python/util/util.cc                              | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h
index d547337492..bc44a7b840 100644
--- a/tensorflow/c/c_test_util.h
+++ b/tensorflow/c/c_test_util.h
@@ -74,7 +74,7 @@ TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s,
 
 TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s);
 
-// Split `input` along the first dimention into 3 tensors
+// Split `input` along the first dimension into 3 tensors
 TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s,
                      const char* name = "split3");
 
diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h
index 8e1b4be1f3..9159b26614 100644
--- a/tensorflow/compiler/xla/client/computation_builder.h
+++ b/tensorflow/compiler/xla/client/computation_builder.h
@@ -806,7 +806,7 @@ class ComputationBuilder {
   // The operand must represent a constant value, which in this case
   // means that it must not statically depend on any parameter of the
   // computation that is being built other then the ones specified on the
-  // paramtere list. The parameters in the list will be indexed by their
+  // parameter list. The parameters in the list will be indexed by their
   // parameter id property so the number of parameters specified should be at
   // least as many as the largest used parameter index.
   //
diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
index 7a550d6f73..badc629a11 100644
--- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
+++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
@@ -56,7 +56,7 @@ class BatchFeatures {
     *num_sparse_int_features = sparse_int_feature_columns_.size();
     if (*num_dense_float_features == 0 && *num_sparse_float_features == 0 &&
         *num_sparse_int_features == 0) {
-      return errors::FailedPrecondition("Not intialized yet.");
+      return errors::FailedPrecondition("Not initialized yet.");
     }
     return Status::OK();
   }
diff --git a/tensorflow/core/grappler/costs/virtual_placer.h b/tensorflow/core/grappler/costs/virtual_placer.h
index 7ccb1ebb99..fee5ce0f51 100644
--- a/tensorflow/core/grappler/costs/virtual_placer.h
+++ b/tensorflow/core/grappler/costs/virtual_placer.h
@@ -41,7 +41,7 @@ class VirtualPlacer {
  private:
   // Converts given device name to Lowercase Fully-Qualified Name (LFQN) string.
   // This helps us disambiguate device names internally and simplify matching.
-  // If device_name couldn't be parsed succesfully, returns empty string.
+  // If device_name couldn't be parsed successfully, returns empty string.
   string to_lfqn_or_empty(const string& device_name) const;
 
   // Map based on the cluster info: cluster device name -> device properties.
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index c3d7611ad4..a41fa7df25 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -29,7 +29,7 @@ bool WarnedThatSetIsNotSequence = false;
 
 // Returns 1 if `o` is considered a sequence for the purposes of Flatten().
 // Returns 0 otherwise.
-// Returns -1 if an error occured.
+// Returns -1 if an error occurred.
 int IsSequenceHelper(PyObject* o) {
   if (PyDict_Check(o)) return true;
   if (PySet_Check(o) && !WarnedThatSetIsNotSequence) {
-- 
GitLab


From 95ed2e833abd80727164270fdc299e99ab86ffaf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Mon, 13 Nov 2017 13:35:14 +0800
Subject: [PATCH 0023/1225] TST: add test case

---
 .../python/kernel_tests/lookup_ops_test.py    | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py
index 76c790a0a2..11778d8ddb 100644
--- a/tensorflow/python/kernel_tests/lookup_ops_test.py
+++ b/tensorflow/python/kernel_tests/lookup_ops_test.py
@@ -573,15 +573,19 @@ class IndexToStringTableFromFileTest(test.TestCase):
     return vocabulary_file
 
   def test_index_to_string_table(self):
-    vocabulary_file = self._createVocabFile("i2f_vocab1.txt")
-    with self.test_session():
-      table = lookup_ops.index_to_string_table_from_file(
-          vocabulary_file=vocabulary_file)
-      features = table.lookup(constant_op.constant([0, 1, 2, 3], dtypes.int64))
-      self.assertRaises(errors_impl.OpError, features.eval)
-      lookup_ops.tables_initializer().run()
-      self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"),
-                          features.eval())
+    vocabulary_path = self._createVocabFile("i2f_vocab1.txt")
+    # vocabulary_file supports string and tensor
+    type_funcs = [str, constant_op.constant]
+    for type_func in type_funcs:
+      vocabulary_file = type_func(vocabulary_path)
+      with self.test_session():
+        table = lookup_ops.index_to_string_table_from_file(
+            vocabulary_file=vocabulary_file)
+        features = table.lookup(constant_op.constant([0, 1, 2, 3], dtypes.int64))
+        self.assertRaises(errors_impl.OpError, features.eval)
+        lookup_ops.tables_initializer().run()
+        self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"),
+                            features.eval())
 
   def test_index_to_string_table_with_default_value(self):
     default_value = b"NONE"
-- 
GitLab


From 603a2f3db38753cb4281f367f413e8c1975835f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Mon, 13 Nov 2017 13:37:00 +0800
Subject: [PATCH 0024/1225] BUG: don't check tensor

---
 tensorflow/python/ops/lookup_ops.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index fa58ffc37e..10b7cd7001 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -1123,8 +1123,10 @@ def index_to_string_table_from_file(vocabulary_file,
     ValueError: when `vocabulary_file` is empty.
     ValueError: when `vocab_size` is invalid.
   """
-  if not vocabulary_file:
-    raise ValueError("vocabulary_file must be specified.")
+  if vocabulary_file is None or (
+        isinstance(vocabulary_file, str) and not vocabulary_file):
+    raise ValueError("vocabulary_file must be specified and must not be empty.")
+
   if vocab_size is not None and vocab_size < 1:
     raise ValueError("vocab_size must be greater than 0, got %d." % vocab_size)
 
-- 
GitLab


From bd1074ab5d2bc87d4fc37e9f6941dc138a3fb961 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Mon, 13 Nov 2017 13:39:33 +0800
Subject: [PATCH 0025/1225] DOC: add docment

---
 tensorflow/python/ops/lookup_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index 10b7cd7001..cb5e9d08c0 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -1110,7 +1110,7 @@ def index_to_string_table_from_file(vocabulary_file,
   ```
 
   Args:
-    vocabulary_file: The vocabulary filename.
+    vocabulary_file: The vocabulary filename, may be a constant scalar `Tensor`.
     vocab_size: Number of the elements in the vocabulary, if known.
     default_value: The value to use for out-of-vocabulary indices.
     name: A name for this op (optional).
-- 
GitLab


From 9e966e9e540d245950dcfccdb982304dac740294 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Mon, 13 Nov 2017 13:45:21 +0800
Subject: [PATCH 0026/1225] ENH: use six.string_types

---
 tensorflow/python/ops/lookup_ops.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index cb5e9d08c0..c489a8ab6b 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import collections
 import functools
+import six
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
@@ -922,7 +923,7 @@ def index_table_from_file(vocabulary_file=None,
       than zero.
   """
   if vocabulary_file is None or (
-      isinstance(vocabulary_file, str) and not vocabulary_file):
+      isinstance(vocabulary_file, six.string_types) and not vocabulary_file):
     raise ValueError("vocabulary_file must be specified and must not be empty.")
   if num_oov_buckets < 0:
     raise ValueError("num_oov_buckets must be greater or equal than 0, got %d."
@@ -1124,7 +1125,7 @@ def index_to_string_table_from_file(vocabulary_file,
     ValueError: when `vocab_size` is invalid.
   """
   if vocabulary_file is None or (
-        isinstance(vocabulary_file, str) and not vocabulary_file):
+        isinstance(vocabulary_file, six.string_types) and not vocabulary_file):
     raise ValueError("vocabulary_file must be specified and must not be empty.")
 
   if vocab_size is not None and vocab_size < 1:
-- 
GitLab


From cedb85f2cbda30b9dada94930af9ba40bbbdcf86 Mon Sep 17 00:00:00 2001
From: TTrapper <mike.sk.traynor@gmail.com>
Date: Tue, 14 Nov 2017 12:41:15 -0400
Subject: [PATCH 0027/1225] Removing labels_as_indices logic from
 _compute_sampled_logits. Now computing 0-index labels in
 sampled_sparse_softmax_loss.

---
 .../contrib/nn/python/ops/sampling_ops.py     |  7 ++--
 tensorflow/python/ops/nn_impl.py              | 33 +++++++------------
 2 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py
index b26da52f01..02aa1efc5a 100644
--- a/tensorflow/contrib/nn/python/ops/sampling_ops.py
+++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py
@@ -318,7 +318,7 @@ def sampled_sparse_softmax_loss(weights,
     A `batch_size` 1-D tensor of per-example sampled softmax losses.
 
   """
-  logits, labels = nn_impl._compute_sampled_logits(
+  logits, _ = nn_impl._compute_sampled_logits(
       weights=weights,
       biases=biases,
       labels=labels,
@@ -330,9 +330,12 @@ def sampled_sparse_softmax_loss(weights,
       subtract_log_q=True,
       remove_accidental_hits=remove_accidental_hits,
       partition_strategy=partition_strategy,
-      labels_as_indices=True,
       name=name)
 
+  # There is only one true label. _compute_sampled_logits puts the true logit
+  # at index 0.
+  labels = tf.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64)
+
   sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
       labels=array_ops.squeeze(labels), logits=logits)
   # sampled_losses is a [batch_size] tensor.
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 8e64259143..2bf5514c64 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -894,7 +894,6 @@ def _compute_sampled_logits(weights,
                             subtract_log_q=True,
                             remove_accidental_hits=False,
                             partition_strategy="mod",
-                            labels_as_indices=False,
                             name=None):
   """Helper function for nce_loss and sampled_softmax_loss functions.
 
@@ -932,18 +931,13 @@ def _compute_sampled_logits(weights,
     partition_strategy: A string specifying the partitioning strategy, relevant
         if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
         Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
-    labels_as_indices: A `bool`. Whether the returned labels represent the
-        indices of the true classes. Default is `False`.
     name: A name for the operation (optional).
   Returns:
     out_logits: `Tensor` object with shape
         `[batch_size, num_true + num_sampled]`, for passing to either
         `nn.sigmoid_cross_entropy_with_logits` (NCE) or
         `nn.softmax_cross_entropy_with_logits` (sampled softmax).
-    out_labels: If `labels_as_indices` is `False`, a Tensor object with the same
-        shape as `out_logits`. Otherwise a `Tensor` of shape
-        `[batch_size, num_true]` with the indices of the target classes for each
-        row of `out_logits`.
+    out_labels: A Tensor object with the same shape as `out_logits`.
   """
 
   if isinstance(weights, variables.PartitionedVariable):
@@ -1054,21 +1048,16 @@ def _compute_sampled_logits(weights,
 
     # Construct output logits and labels. The true labels/logits start at col 0.
     out_logits = array_ops.concat([true_logits, sampled_logits], 1)
-    if labels_as_indices:
-      # We want each row of labels to be the indices of the targets, which
-      # start at col 0 and end at col num_true-1.
-      out_labels = gen_array_ops.tile(
-          [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1])
-    else:
-      # true_logits is a float tensor, ones_like(true_logits) is a float
-      # tensor of ones. We then divide by num_true to ensure the per-example
-      # labels sum to 1.0, i.e. form a proper probability distribution.
-      out_labels = array_ops.concat([
-          array_ops.ones_like(true_logits) / num_true,
-          array_ops.zeros_like(sampled_logits)
-      ], 1)
-
-  return out_logits, out_labels
+
+    # true_logits is a float tensor, ones_like(true_logits) is a float
+    # tensor of ones. We then divide by num_true to ensure the per-example
+    # labels sum to 1.0, i.e. form a proper probability distribution.
+    out_labels = array_ops.concat([
+        array_ops.ones_like(true_logits) / num_true,
+        array_ops.zeros_like(sampled_logits)
+    ], 1)
+
+    return out_logits, out_labels
 
 
 def nce_loss(weights,
-- 
GitLab


From 7ba5810c105640f218993d989142d7e91da6703e Mon Sep 17 00:00:00 2001
From: TTrapper <mike.sk.traynor@gmail.com>
Date: Tue, 14 Nov 2017 13:48:29 -0400
Subject: [PATCH 0028/1225] calling array_ops instead of erroneus tf

---
 tensorflow/contrib/nn/python/ops/sampling_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py
index 02aa1efc5a..ca719ccaf3 100644
--- a/tensorflow/contrib/nn/python/ops/sampling_ops.py
+++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py
@@ -334,7 +334,7 @@ def sampled_sparse_softmax_loss(weights,
 
   # There is only one true label. _compute_sampled_logits puts the true logit
   # at index 0.
-  labels = tf.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64)
+  labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64)
 
   sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
       labels=array_ops.squeeze(labels), logits=logits)
-- 
GitLab


From d43d00be13ff271eb8a2e6a14eb7ac01a51934ff Mon Sep 17 00:00:00 2001
From: dariavel <daria@mellanox.com>
Date: Thu, 16 Nov 2017 17:12:06 +0200
Subject: [PATCH 0029/1225] Renaming and comment fix

Signed-off-by: dariavel <daria@mellanox.com>
---
 tensorflow/contrib/verbs/rdma.cc                | 6 +++---
 tensorflow/contrib/verbs/rdma.h                 | 6 +++---
 tensorflow/contrib/verbs/rdma_mgr.cc            | 2 +-
 tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index 1fa98a1f01..59bc65f937 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -569,7 +569,7 @@ int RdmaChannel::PingPostRecv() {
   memset(&wr, 0, sizeof(wr));
   wr.sg_list = &ping_sge_list_;
   wr.num_sge = 1;
-  wr.wr_id = PingRecvWrid;
+  wr.wr_id = kPingRecvWrid;
 
   return ibv_post_recv(qp_, &wr, &bad_wr);
 }
@@ -592,13 +592,13 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
 
   struct ibv_sge list;
 
-  mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, PingBuffSize,
+  mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, kPingBuffSize,
                    IBV_ACCESS_LOCAL_WRITE);
   CHECK(mr_) << "Failed to register memory region";
 
   memset(&list, 0, sizeof(list));
   list.addr = (uintptr_t)ping_buff_;
-  list.length = PingBuffSize;
+  list.length = kPingBuffSize;
   list.lkey = mr_->lkey;
 
   ping_sge_list_ = list;
diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h
index 92391d6a57..fea2327d77 100644
--- a/tensorflow/contrib/verbs/rdma.h
+++ b/tensorflow/contrib/verbs/rdma.h
@@ -162,11 +162,11 @@ class RdmaChannel {
   void RemoveRecvCallback(const string& key);
   void RunRecvCallback(const string& key);
   static const int kNumMessageBuffers = 4;
-  static const int PingRecvWrid = 0;
+  static const int kPingRecvWrid = 0;
 
  private:
-  static const int PingBuffSize = 1024;
-  char ping_buff_[PingBuffSize];
+  static const int kPingBuffSize = 1024;
+  char ping_buff_[kPingBuffSize];
   struct ibv_mr* mr_;
   struct ibv_sge ping_sge_list_;
   int PingPostRecv();
diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index e7df0528b5..9cb307bcfa 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -143,7 +143,7 @@ bool RdmaMgr::ConnectivityCheck() {
     for (i = 0; i < ne; ++i) {
       ibv_wc_status s = rdma_adapter_->wc_[i].status;
       // recv complete
-      if ((int)rdma_adapter_->wc_[i].wr_id == RdmaChannel::PingRecvWrid) {
+      if ((int)rdma_adapter_->wc_[i].wr_id == RdmaChannel::kPingRecvWrid) {
         CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str(
                                                   rdma_adapter_->wc_[i].status)
                                    << "(" << rdma_adapter_->wc_[i].status
diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
index dbb3d25f45..74f6681af3 100644
--- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
@@ -63,7 +63,7 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
                                         &unused) ||
       !DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name,
                                         &unused)) {
-    s = errors::Internal("Could not parse src name.");
+    s = errors::Internal("Could not parse src or dst name.");
   }
   if (!s.ok()) {
     LOG(ERROR) << "s is not ok, error code " << s.error_message();
-- 
GitLab


From 1a63168ff0196f1579a1f6b4cfae2d65f1e7c04e Mon Sep 17 00:00:00 2001
From: Dave MacLachlan <dmaclach@gmail.com>
Date: Thu, 16 Nov 2017 15:05:58 -0800
Subject: [PATCH 0030/1225] Add LICENSES to gitignore

Update gitignore file for ios to cover the license files that get
installed following the install instructions.
---
 tensorflow/examples/ios/.gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/examples/ios/.gitignore b/tensorflow/examples/ios/.gitignore
index e572b3012c..dbabfb33bf 100644
--- a/tensorflow/examples/ios/.gitignore
+++ b/tensorflow/examples/ios/.gitignore
@@ -2,3 +2,6 @@ project.xcworkspace
 xcuserdata
 imagenet_comp_graph_label_strings.txt
 tensorflow_inception_graph.pb
+simple/data/LICENSE
+camera/data/LICENSE
+benchmark/data/LICENSE
-- 
GitLab


From 0f9a9c854f7dfee904c4e88130cc496ec9f2611e Mon Sep 17 00:00:00 2001
From: Alex Rothberg <agrothberg@gmail.com>
Date: Thu, 16 Nov 2017 18:53:42 -0500
Subject: [PATCH 0031/1225] Use get_or_create_global_step

---
 tensorflow/contrib/training/python/training/training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/training/python/training/training.py b/tensorflow/contrib/training/python/training/training.py
index 59f02fa38f..8e0139bdd6 100644
--- a/tensorflow/contrib/training/python/training/training.py
+++ b/tensorflow/contrib/training/python/training/training.py
@@ -410,7 +410,7 @@ def create_train_op(total_loss,
       loss value.
   """
   if global_step is _USE_GLOBAL_STEP:
-    global_step = training_util.get_global_step()
+    global_step = training_util.get_or_create_global_step()
 
   # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None.
   global_update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
-- 
GitLab


From 55ee41a98d50e200eda314ebf08f092000477f6e Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Thu, 16 Nov 2017 15:54:17 -0800
Subject: [PATCH 0032/1225] When constructing fusion computations from a proto,
 do not uniquify the names. The names are already unique and uniquifying them
 again will mutate them resulting in inconsistent names between the proto and
 the constructed HLO.

PiperOrigin-RevId: 176035108
---
 .../compiler/xla/service/hlo_computation.cc   | 12 ++++----
 .../compiler/xla/service/hlo_computation.h    | 12 +++++---
 .../compiler/xla/service/hlo_instruction.cc   | 28 +++++++++++--------
 .../compiler/xla/service/hlo_instruction.h    | 11 ++++++--
 tensorflow/compiler/xla/service/hlo_module.cc | 13 +++++++--
 5 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 8056bcf0f7..c215cc48d6 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -407,16 +407,18 @@ HloComputationProto HloComputation::ToProto() const {
 /* static */ StatusOr<std::unique_ptr<HloComputation>>
 HloComputation::CreateFromProto(
     HloModule* module, const HloComputationProto& proto,
-    tensorflow::gtl::FlatMap<string, HloComputation*>* computation_map,
+    const tensorflow::gtl::FlatMap<string, HloComputation*>& computation_map,
+    const std::function<void(std::unique_ptr<HloComputation>)>&
+        add_fused_computation,
     HloInstruction* fusion_instruction) {
   std::vector<std::unique_ptr<HloInstruction>> instructions;
   tensorflow::gtl::FlatMap<string, HloInstruction*> instruction_map;
   int64 parameter_count = 0;
   for (const HloInstructionProto& instruction_proto : proto.instructions()) {
-    TF_ASSIGN_OR_RETURN(
-        std::unique_ptr<HloInstruction> instruction,
-        HloInstruction::CreateFromProto(module, instruction_proto,
-                                        instruction_map, computation_map));
+    TF_ASSIGN_OR_RETURN(std::unique_ptr<HloInstruction> instruction,
+                        HloInstruction::CreateFromProto(
+                            module, instruction_proto, instruction_map,
+                            computation_map, add_fused_computation));
     if (instruction->opcode() == HloOpcode::kParameter) {
       parameter_count++;
     }
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 2835dbbb84..353b30bc69 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -152,12 +152,16 @@ class HloComputation {
   //   computation_map: a map from computation name to HloComputation*. This map
   //     must contain all computations which the newly constructed computation
   //     calls.
-  //  fusion_instruction: if non-null then the newly created computation will be
-  //     constructed as a fused computation with this instruction as its fusion
-  //     parent.
+  //   add_fused_computation: A function to call to add a fused
+  //     computation. Used only when the instruction is a fusion instruction.
+  //   fusion_instruction: if non-null then the newly created computation will
+  //     be constructed as a fused computation with this instruction as its
+  //     fusion parent.
   static StatusOr<std::unique_ptr<HloComputation>> CreateFromProto(
       HloModule* module, const HloComputationProto& proto,
-      tensorflow::gtl::FlatMap<string, HloComputation*>* computation_map,
+      const tensorflow::gtl::FlatMap<string, HloComputation*>& computation_map,
+      const std::function<void(std::unique_ptr<HloComputation>)>&
+          add_fused_computation,
       HloInstruction* fusion_instruction = nullptr);
 
   // Gets the instructions in this computation.
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index c35ca1eb99..c046b6d9c8 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -52,7 +52,9 @@ using ::tensorflow::strings::StrCat;
 StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     HloModule* module, const HloInstructionProto& proto,
     const tensorflow::gtl::FlatMap<string, HloInstruction*>& instruction_map,
-    tensorflow::gtl::FlatMap<string, HloComputation*>* computation_map) {
+    const tensorflow::gtl::FlatMap<string, HloComputation*>& computation_map,
+    const std::function<void(std::unique_ptr<HloComputation>)>&
+        add_fused_computation) {
   TF_RET_CHECK(!proto.opcode().empty());
   TF_ASSIGN_OR_RETURN(HloOpcode opcode, StringToHloOpcode(proto.opcode()));
   TF_RET_CHECK(proto.has_shape());
@@ -78,19 +80,19 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     TF_RET_CHECK(!proto.fusion_kind().empty());
     TF_ASSIGN_OR_RETURN(instruction->fusion_kind_,
                         StringToFusionKind(proto.fusion_kind()));
-    TF_ASSIGN_OR_RETURN(
-        std::unique_ptr<HloComputation> fused_computation,
-        HloComputation::CreateFromProto(
-            module, proto.fused_instructions_computation(), computation_map,
-            /*fusion_instruction=*/instruction.get()));
-    instruction->called_computations_.push_back(
-        module->AddEmbeddedComputation(std::move(fused_computation)));
+    TF_ASSIGN_OR_RETURN(std::unique_ptr<HloComputation> fused_computation,
+                        HloComputation::CreateFromProto(
+                            module, proto.fused_instructions_computation(),
+                            computation_map, add_fused_computation,
+                            /*fusion_instruction=*/instruction.get()));
+    instruction->called_computations_.push_back(fused_computation.get());
+    add_fused_computation(std::move(fused_computation));
   } else {
     for (const string& computation_name : proto.called_computation_names()) {
-      TF_RET_CHECK(ContainsKey(*computation_map, computation_name))
+      TF_RET_CHECK(ContainsKey(computation_map, computation_name))
           << "No computation named " << computation_name;
       instruction->called_computations_.push_back(
-          computation_map->at(computation_name));
+          computation_map.at(computation_name));
     }
   }
 
@@ -2076,8 +2078,10 @@ string HloInstruction::ToCategory() const {
       bool saw_rank_1 = false;
       bool saw_higher_rank = false;
       for (const auto* operand : operands()) {
-        saw_rank_1 |= ShapeUtil::Rank(operand->shape()) == 1;
-        saw_higher_rank |= ShapeUtil::Rank(operand->shape()) > 1;
+        if (!ShapeUtil::IsTuple(operand->shape())) {
+          saw_rank_1 |= ShapeUtil::Rank(operand->shape()) == 1;
+          saw_higher_rank |= ShapeUtil::Rank(operand->shape()) > 1;
+        }
       }
       if (saw_rank_1 && saw_higher_rank) {
         return "rank-1-broadcast binary fusion";
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 6b2762ff14..8c6449d73b 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -44,6 +44,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/gtl/iterator_range.h"
 #include "tensorflow/core/platform/logging.h"
@@ -83,12 +84,16 @@ class HloInstruction {
   //     must contain all operands of the newly constructed instruction.
   //   computation_map: a map from computation name to HloComputation*. This map
   //     must contain all computations which the newly constructed instruction
-  //     calls. If the instruction is a fusion instruction, then the fusion
-  //     computation is added to this map and the module.
+  //     calls.
+  //   add_fused_computation: A function to call to add a fused
+  //     computation. Used (clearly) when the instruction is a fusion
+  //     instruction.
   static StatusOr<std::unique_ptr<HloInstruction>> CreateFromProto(
       HloModule* module, const HloInstructionProto& proto,
       const tensorflow::gtl::FlatMap<string, HloInstruction*>& instruction_map,
-      tensorflow::gtl::FlatMap<string, HloComputation*>* computation_map);
+      const tensorflow::gtl::FlatMap<string, HloComputation*>& computation_map,
+      const std::function<void(std::unique_ptr<HloComputation>)>&
+          add_fused_computation);
 
   // Creates a parameter-retrieving instruction.
   static std::unique_ptr<HloInstruction> CreateParameter(int64 parameter_number,
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index d9c223fbba..faaf73ea1c 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -290,9 +290,16 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
 
   tensorflow::gtl::FlatMap<string, HloComputation*> computation_map;
   for (const HloComputationProto& computation_proto : proto.computations()) {
-    TF_ASSIGN_OR_RETURN(std::unique_ptr<HloComputation> computation,
-                        HloComputation::CreateFromProto(
-                            module.get(), computation_proto, &computation_map));
+    TF_ASSIGN_OR_RETURN(
+        std::unique_ptr<HloComputation> computation,
+        HloComputation::CreateFromProto(
+            module.get(), computation_proto, computation_map,
+            /*add_fused_computation=*/
+            [&module](std::unique_ptr<HloComputation> fused_computation) {
+              module->AddComputationInternal(std::move(fused_computation),
+                                             /*is_entry=*/false,
+                                             /*uniquify_names=*/false);
+            }));
     CHECK_NE(computation.get(), nullptr);
     TF_RET_CHECK(!ContainsKey(computation_map, computation->name()));
     string computation_name = computation->name();
-- 
GitLab


From 9a72855893a7ca2832a08e1c5c4060f8674e0c7d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 16 Nov 2017 16:10:25 -0800
Subject: [PATCH 0033/1225] Update fuse_op to eliminate duplicate nodes being
 created in the graph when injecting artificial dependency to the fused op.

PiperOrigin-RevId: 176037465
---
 .../framework/python/framework/graph_util.py  |  2 +-
 .../python/framework/graph_util_test.py       | 24 +++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/framework/python/framework/graph_util.py b/tensorflow/contrib/framework/python/framework/graph_util.py
index 8ab8711db4..9ba9c77b92 100644
--- a/tensorflow/contrib/framework/python/framework/graph_util.py
+++ b/tensorflow/contrib/framework/python/framework/graph_util.py
@@ -91,7 +91,7 @@ def fuse_op(graph_def, input_nodes, output_nodes, output_dtypes,
                             (n, cur_node))
           if cur_node not in input_nodes_set:
             next_to_visit += name_to_input_name[cur_node]
-    else:
+    elif n not in reachable_by_input:
       nodes_post_output.append(n)
 
   # Add all nodes upto the input nodes
diff --git a/tensorflow/contrib/framework/python/framework/graph_util_test.py b/tensorflow/contrib/framework/python/framework/graph_util_test.py
index 87b992e22e..0c531fb290 100644
--- a/tensorflow/contrib/framework/python/framework/graph_util_test.py
+++ b/tensorflow/contrib/framework/python/framework/graph_util_test.py
@@ -56,6 +56,30 @@ class GraphUtilTest(test.TestCase):
     self.assertEqual(fused_graph_def.node[2].name, 'D')
     self.assertEqual(fused_graph_def.node[3].name, 'E')
 
+  def testGraphUtilArtificialDependencyInjection(self):
+    graph_def = graph_pb2.GraphDef()
+    node_a = GetNewNode('A', 'Placeholder', [])
+    node_a1 = GetNewNode('A1', 'Placeholder', [])
+    node_b = GetNewNode('B', 'Op1', ['A'])
+    node_c = GetNewNode('C', 'Op1', ['B'])
+    node_d = GetNewNode('D', 'Op1', ['C'])
+    node_e = GetNewNode('E', 'Op1', ['D'])
+    graph_def.node.extend([node_a, node_a1, node_b, node_c, node_d, node_e])
+    fused_graph_def = graph_util.fuse_op(graph_def, ['A', 'A1'], ['D'],
+                                         [types_pb2.DT_FLOAT], True, 'FusedOp',
+                                         'Op2')
+    self.assertEqual(len(fused_graph_def.node), 5)
+    self.assertEqual(fused_graph_def.node[0].name, 'A')
+    self.assertEqual(fused_graph_def.node[1].name, 'A1')
+    self.assertEqual(fused_graph_def.node[2].name, 'FusedOp')
+    self.assertEqual(fused_graph_def.node[2].input[0], 'A')
+    self.assertEqual(fused_graph_def.node[2].op, 'Op2')
+    self.assertEqual(fused_graph_def.node[2].attr['_output_quantized'].b, True)
+    self.assertEqual(fused_graph_def.node[2].attr['_output_types'].list.type,
+                     [types_pb2.DT_FLOAT])
+    self.assertEqual(fused_graph_def.node[3].name, 'D')
+    self.assertEqual(fused_graph_def.node[4].name, 'E')
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 780c64e3e872269e76efa27b5bb7fe2465c26dfe Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Thu, 16 Nov 2017 18:23:32 -0800
Subject: [PATCH 0034/1225] Turn off graph optimization in max pooling test
 because of the inconsistent behavior on handling NaN and -Inf in different
 MaxPooling implementations. Split the tests as ConfigProto could interfere
 with each other.

PiperOrigin-RevId: 176054079
---
 .../python/kernel_tests/pooling_ops_test.py   | 76 ++++++++++++++-----
 1 file changed, 59 insertions(+), 17 deletions(-)

diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index c699d50c02..30c777d12f 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -1172,12 +1174,27 @@ class PoolingTest(test.TestCase):
                      [1, window_rows, window_cols, 1],
                      [1, row_stride, col_stride, 1], padding)
 
-  def _testMaxPoolGradDirect(self, input_data, output_backprop,
-                             expected_input_backprop, input_sizes, output_sizes,
-                             window_rows, window_cols, row_stride, col_stride,
-                             padding, use_gpu, v2):
+  def _testMaxPoolGradDirect(self,
+                             input_data,
+                             output_backprop,
+                             expected_input_backprop,
+                             input_sizes,
+                             output_sizes,
+                             window_rows,
+                             window_cols,
+                             row_stride,
+                             col_stride,
+                             padding,
+                             use_gpu,
+                             v2,
+                             graph_optimization=False):
     pool_func = gen_nn_ops._max_pool_v2 if v2 else nn_ops.max_pool
-    with self.test_session(use_gpu=use_gpu):
+
+    config = config_pb2.ConfigProto()
+    if graph_optimization:
+      config.graph_options.rewrite_options.layout_optimizer = (
+          rewriter_config_pb2.RewriterConfig.ON)
+    with self.test_session(use_gpu=use_gpu, config=config):
       input_tensor = constant_op.constant(input_data, shape=input_sizes)
       output_tensor = pool_func(input_tensor,
                                 [1, window_rows, window_cols, 1],
@@ -1314,7 +1331,7 @@ class PoolingTest(test.TestCase):
             use_gpu=use_gpu,
             v2=v2)
 
-  def _testMaxPoolGradDirectWithNans2_1(self):
+  def _testMaxPoolGradDirectWithNans2_1CPU(self):
     input_data = [float("nan")] * 16
     output_backprop = [11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 19.0, 20.0, 21.0]
     # Test the CPU implementation, which propagates diffs in case of NaN
@@ -1337,11 +1354,23 @@ class PoolingTest(test.TestCase):
           use_gpu=False,
           v2=v2)
 
+  def _testMaxPoolGradDirectWithNans2_1GPU(self):
     if not test.is_gpu_available():
       return
-
-    # Test the GPU implementation that uses cudnn for now.
-    # It does not propagate the diff in cases of NaNs
+    input_data = [float("nan")] * 16
+    output_backprop = [11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 19.0, 20.0, 21.0]
+    # (1) For the NHWC format (used by default below), TensorFlow currently uses
+    # custom MaxPoolingNoMask for the forward op, cuDNN for the gradient op.
+    # With NaNs as input, MaxPoolingNoMask outputs -Inf, which is then fed into
+    # the gradient op. The cuDNN gradient op currently doesn't propagate the
+    # diff if input is -Inf and as a result outputs zeros.
+    # (2) For the NCHW format, TensorFlow currently uses
+    # cuDNN for both the forward and the gradient op. With NaNs as input, cuDNN
+    # forward op outputs NaNs, which is then fed into cuDNN gradient op. cuDNN
+    # gradient op is able to propagate NaNs and as a result the output is the
+    # same as expected_input_backprop_tf_cpu.
+    # We turn off graph optimization (layout optimizer) as the behavior of the
+    # above two cases are different.
     expected_input_backprop_cudnn = [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0
@@ -1359,9 +1388,10 @@ class PoolingTest(test.TestCase):
           col_stride=1,
           padding="VALID",
           use_gpu=True,
-          v2=v2)
+          v2=v2,
+          graph_optimization=False)
 
-  def _testMaxPoolGradDirectWithNans2_2(self):
+  def _testMaxPoolGradDirectWithNans2_2CPU(self):
     input_data = [float("nan")] * 16
     output_backprop = [
         float("nan"), 12.0, 13.0, 15.0, float("nan"), 17.0, 19.0, 20.0,
@@ -1387,11 +1417,16 @@ class PoolingTest(test.TestCase):
           use_gpu=False,
           v2=v2)
 
+  def _testMaxPoolGradDirectWithNans2_2GPU(self):
     if not test.is_gpu_available():
       return
-
-    # Test the GPU implementation that uses cudnn for now.
-    # It does not propagate the diff in cases of NaNs
+    input_data = [float("nan")] * 16
+    output_backprop = [
+        float("nan"), 12.0, 13.0, 15.0,
+        float("nan"), 17.0, 19.0, 20.0,
+        float("nan")
+    ]
+    # See the correspoinding comment in _testMaxPoolGradDirectWithNans2_1GPU().
     expected_input_backprop_cudnn = [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0
@@ -1409,14 +1444,21 @@ class PoolingTest(test.TestCase):
           col_stride=1,
           padding="VALID",
           use_gpu=True,
-          v2=v2)
+          v2=v2,
+          graph_optimization=False)
 
   def testMaxPoolGradDirect(self):
     self._testMaxPoolGradDirect1_1()
     self._testMaxPoolGradDirect1_2()
     self._testMaxPoolGradDirect1_3()
-    self._testMaxPoolGradDirectWithNans2_1()
-    self._testMaxPoolGradDirectWithNans2_2()
+    self._testMaxPoolGradDirectWithNans2_1CPU()
+    self._testMaxPoolGradDirectWithNans2_2CPU()
+
+  def testMaxPoolGradDirectNans2_1GPU(self):
+    self._testMaxPoolGradDirectWithNans2_1GPU()
+
+  def testMaxPoolGradDirectNans2_2GPU(self):
+    self._testMaxPoolGradDirectWithNans2_2GPU()
 
   def _testMaxPoolGradGradValidPadding1_1(self, data_format, use_gpu):
     for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]:
-- 
GitLab


From 0beff6bd1342f399173fc4e9d0e79afa3c54503b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 16 Nov 2017 18:27:53 -0800
Subject: [PATCH 0035/1225]    [tpu:profiler] Add run environment to TfOpStats.

PiperOrigin-RevId: 176054460
---
 .../contrib/tpu/profiler/tf_op_stats.proto     | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
index 2d2207a43f..6943ff5f47 100644
--- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
+++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
@@ -104,6 +104,8 @@ message HloExtraInfoResult {
   optional string category = 1;
   // The long name of the HLO that includes the dimensions.
   optional string long_name = 2;
+  // The per-TPU-core batch size inferred from this HLO.
+  optional int64 per_core_batch_size = 3;
 }
 
 // Result proto for HloExtraInfoMap.
@@ -112,6 +114,20 @@ message HloExtraInfoMapResult {
   map<string, HloExtraInfoResult> hlo_extrainfo_map = 1;
 }
 
+// Result proto for RunEnvironment (the run environment of a profiling session).
+message RunEnvironmentResult {
+  // Number of hosts used.
+  optional int32 host_count = 1;
+  // The type of TPU used.
+  optional string tpu_type = 2;
+  // The number of TPU cores used.
+  optional int32 tpu_core_count = 3;
+  // The per-TPU-core batch size.
+  optional int32 per_core_batch_size = 4;
+  // Job information including build target and command line.
+  optional string job_info = 5;
+}
+
 // Result proto for TfStatsHelper.
 message TfOpStats {
   // The result for the TF-metric database.
@@ -126,4 +142,6 @@ message TfOpStats {
   optional HloExtraInfoMapResult hlo_extrainfo_map = 5;
   // Overall matrix unit utilization in percentage.
   optional double matrix_unit_utilization_percent = 6;
+  // The run environment of this profiling session.
+  optional RunEnvironmentResult run_environment = 7;
 }
-- 
GitLab


From 466040ca83a29d9842c4f44b56f51e99a16083dc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 16 Nov 2017 18:39:43 -0800
Subject: [PATCH 0036/1225] Renaming feature_id to dimension_id in dense float
 split

PiperOrigin-RevId: 176055428
---
 .../kernels/split_handler_ops.cc              |  4 ++--
 .../boosted_trees/lib/trees/decision_tree.cc  | 24 +++++++++----------
 .../lib/trees/decision_tree_test.cc           |  6 ++---
 .../boosted_trees/proto/tree_config.proto     |  6 ++---
 .../kernel_tests/prediction_ops_test.py       |  2 +-
 .../kernel_tests/split_handler_ops_test.py    |  8 +++----
 6 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
index 3bd30d8678..5c31980359 100644
--- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
+++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
@@ -490,11 +490,11 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp {
       }
       dense_split->set_feature_column(feature_column_group_id_);
       // Set the feature index for the best feature column.
-      const int64 best_feature_id =
+      const int64 best_dimension_id =
           bucket_ids_and_dimensions(best_element_idx, 1);
       const int32 best_bucket_id =
           bucket_ids_and_dimensions(best_element_idx, 0);
-      dense_split->set_feature_id(best_feature_id);
+      dense_split->set_dimension_id(best_dimension_id);
       dense_split->set_threshold(bucket_boundaries(best_bucket_id));
 
       auto* left_child = split_info.mutable_left_child();
diff --git a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc
index f8750e7191..0e5578693a 100644
--- a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc
+++ b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc
@@ -52,13 +52,13 @@ int DecisionTree::Traverse(const DecisionTreeConfig& config,
             example.sparse_float_features[split.feature_column()];
         // Feature id for the split when multivalent sparse float column, or 0
         // by default.
-        const int32 feature_id = split.feature_id();
+        const int32 dimension_id = split.dimension_id();
 
-        node_id =
-            !sparse_feature[feature_id].has_value() ||
-                    sparse_feature[feature_id].get_value() <= split.threshold()
-                ? split.left_id()
-                : split.right_id();
+        node_id = !sparse_feature[dimension_id].has_value() ||
+                          sparse_feature[dimension_id].get_value() <=
+                              split.threshold()
+                      ? split.left_id()
+                      : split.right_id();
         break;
       }
       case TreeNode::kSparseFloatBinarySplitDefaultRight: {
@@ -68,12 +68,12 @@ int DecisionTree::Traverse(const DecisionTreeConfig& config,
             example.sparse_float_features[split.feature_column()];
         // Feature id for the split when multivalent sparse float column, or 0
         // by default.
-        const int32 feature_id = split.feature_id();
-        node_id =
-            sparse_feature[feature_id].has_value() &&
-                    sparse_feature[feature_id].get_value() <= split.threshold()
-                ? split.left_id()
-                : split.right_id();
+        const int32 dimension_id = split.dimension_id();
+        node_id = sparse_feature[dimension_id].has_value() &&
+                          sparse_feature[dimension_id].get_value() <=
+                              split.threshold()
+                      ? split.left_id()
+                      : split.right_id();
         break;
       }
       case TreeNode::kCategoricalIdBinarySplit: {
diff --git a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc
index 93924d429c..58fe8e335a 100644
--- a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc
+++ b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc
@@ -190,7 +190,7 @@ TEST_F(DecisionTreeTest, TraverseSparseBinarySplit) {
     tree_config.add_nodes()->mutable_leaf();
 
     // Split on first column
-    split_node->set_feature_id(0);
+    split_node->set_dimension_id(0);
     split_node->set_threshold(2.0f);
 
     // Both instances have this feature value.
@@ -199,7 +199,7 @@ TEST_F(DecisionTreeTest, TraverseSparseBinarySplit) {
     EXPECT_EQ(1, DecisionTree::Traverse(tree_config, 0, *++example_it));
 
     // Split on second column
-    split_node->set_feature_id(1);
+    split_node->set_dimension_id(1);
     split_node->set_threshold(5.0f);
 
     // First instance does not have it (default right), second does have it.
@@ -208,7 +208,7 @@ TEST_F(DecisionTreeTest, TraverseSparseBinarySplit) {
     EXPECT_EQ(1, DecisionTree::Traverse(tree_config, 0, *++example_it));
 
     // Split on third column
-    split_node->set_feature_id(2);
+    split_node->set_dimension_id(2);
     split_node->set_threshold(3.0f);
     example_it = example_iterable.begin();
 
diff --git a/tensorflow/contrib/boosted_trees/proto/tree_config.proto b/tensorflow/contrib/boosted_trees/proto/tree_config.proto
index f14abf45a5..fc570c1083 100644
--- a/tensorflow/contrib/boosted_trees/proto/tree_config.proto
+++ b/tensorflow/contrib/boosted_trees/proto/tree_config.proto
@@ -53,9 +53,9 @@ message DenseFloatBinarySplit {
   // Float feature column and split threshold describing
   // the rule feature <= threshold.
   int32 feature_column = 1;
-  // If feature column is multivalent, this holds the index of the feature for
-  // the split. Defaults to 0.
-  int32 feature_id = 5;
+  // If feature column is multivalent, this holds the index of the dimensiong
+  // for the split. Defaults to 0.
+  int32 dimension_id = 5;
   float threshold = 2;
 
   // Node children indexing into a contiguous
diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py
index 9ada844601..c1acf35160 100644
--- a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py
+++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py
@@ -93,7 +93,7 @@ def _set_float_split(split, feat_col, thresh, l_id, r_id, feature_dim_id=None):
   split.left_id = l_id
   split.right_id = r_id
   if feature_dim_id is not None:
-    split.feature_id = feature_dim_id
+    split.dimension_id = feature_dim_id
 
 
 def _set_categorical_id_split(split, feat_col, feat_id, l_id, r_id):
diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py
index 7c2e3a3b20..28834ef55b 100644
--- a/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py
+++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py
@@ -240,7 +240,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase):
 
     self.assertEqual(0, split_node.split.feature_column)
     # Sparse is one dimensional.
-    self.assertEqual(0, split_node.split.feature_id)
+    self.assertEqual(0, split_node.split.dimension_id)
 
     self.assertAllClose(0.52, split_node.split.threshold)
 
@@ -263,7 +263,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase):
 
     self.assertEqual(0, split_node.split.feature_column)
     # Sparse is one dimensional.
-    self.assertEqual(0, split_node.split.feature_id)
+    self.assertEqual(0, split_node.split.dimension_id)
 
     self.assertAllClose(0.52, split_node.split.threshold)
 
@@ -373,7 +373,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase):
 
     self.assertEqual(0, split_node.split.feature_column)
     # Split happened on second dimension.
-    self.assertEqual(1, split_node.split.feature_id)
+    self.assertEqual(1, split_node.split.dimension_id)
 
     self.assertAllClose(0.58, split_node.split.threshold)
 
@@ -395,7 +395,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase):
     self.assertAllClose([expected_right_weight], right_child.value)
 
     self.assertEqual(0, split_node.split.feature_column)
-    self.assertEqual(2, split_node.split.feature_id)
+    self.assertEqual(2, split_node.split.dimension_id)
 
     self.assertAllClose(0.6, split_node.split.threshold)
 
-- 
GitLab


From 0833a3646f90ebaa9d92e90f4ae6326aac13a01c Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Thu, 16 Nov 2017 18:46:19 -0800
Subject: [PATCH 0037/1225] Adds sleep before close session in TPU Estimator

PiperOrigin-RevId: 176055885
---
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 21 ++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 97b2d25e0c..fe17664d7f 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -23,6 +23,8 @@ import collections
 from contextlib import contextmanager
 import copy
 import threading
+import time
+
 import six
 from six.moves import queue as Queue  # pylint: disable=redefined-builtin
 
@@ -490,11 +492,28 @@ class _InfeedThreadController(_InfeedOutfeedThreadBaseController):
           count += 1
 
     except Exception:  # pylint: disable=broad-except
+      # Close the session to avoid the main thread from hanging. If input
+      # pipeline triggers any error, the infeed thread dies but the main thread
+      # for TPU computation waits for the infeed enqueue forever. Close the
+      # Session to cancel the main thread Session.run execution.
+      #
+      # However, sleep for 2 minutes before explicit closing to give some time
+      # for the TPU compilation error, if any, propagating, from TPU to CPU
+      # host. Compilation errors should be reported by the main thread so that
+      # the program can be interrupted and users can take action.  Due to a race
+      # condition, the infeed thread might see an error first.  Closing the
+      # session here immediately would result in a session cancellation
+      # exception in the main thread, instead of the expected compile error.
+      # User code that depends on having the proper exception type will
+      # therefore be confused.
       logging.error(
           'Failed running infeed, closing session.\n'
-          'You may see an exception from your main session after this.',
+          'You may see an exception from your main session after this. '
+          'Sleep for 2 minutes before close Session from infeed thread to '
+          'allow the main thread returning an error first, if any.',
           exc_info=1
       )
+      time.sleep(120)
       session.close()
 
   def join(self):
-- 
GitLab


From 6434efb9a7db19171d7a3f6e4608af0f03882267 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 16 Nov 2017 19:04:39 -0800
Subject: [PATCH 0038/1225] Use idiomatic grpc::Slice API that allows use of
 different backing buffer

PiperOrigin-RevId: 176057178
---
 .../rpc/grpc_tensor_coding.cc                   | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
index 5639691804..e51894b4c7 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
@@ -214,22 +214,13 @@ void EncodeTensorToByteBuffer(bool is_dead, const Tensor& val,
 
     if (tensor_data_is_large) {
       // (E) Encode tensor data, but by sharing backing store
-
-      // TODO(vpai): Use the pure C++ ::grpc::Slice constructor that uses
-      // grpc_slice_new_with_user_data once TensorFlow pins a version of gRPC
-      // that includes https://github.com/grpc/grpc/pull/12065
-
       const TensorBuffer* buf = DMAHelper::buffer(&val);
       buf->Ref();
       slices[1] = ::grpc::Slice(
-          grpc_slice_new_with_user_data(
-              const_cast<void*>(static_cast<const void*>(tdata.data())),
-              tdata.size(),
-              [](void* backing) {
-                static_cast<TensorBuffer*>(backing)->Unref();
-              },
-              const_cast<TensorBuffer*>(buf)),
-          ::grpc::Slice::STEAL_REF);
+          const_cast<void*>(static_cast<const void*>(tdata.data())),
+          tdata.size(),
+          [](void* backing) { static_cast<TensorBuffer*>(backing)->Unref(); },
+          const_cast<TensorBuffer*>(buf));
       num_slices += 1;
     }
     size_t total_bytes = 0;
-- 
GitLab


From 75775514239bbbf2916c5aa93ef2fbd29b02cb7f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 16 Nov 2017 19:05:35 -0800
Subject: [PATCH 0039/1225] Hlo parser: allow empty convolution window. Window
 is not required for a convolution on a 2D shape.

PiperOrigin-RevId: 176057261
---
 tensorflow/compiler/xla/service/hlo_instruction.cc      | 2 +-
 tensorflow/compiler/xla/tools/parser/hlo_parser.cc      | 5 ++++-
 tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index c046b6d9c8..a0795a7b36 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -1898,7 +1898,7 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
   if (CanHaveDimensionsField()) {
     extra.push_back(StrCat("dimensions={", Join(dimensions(), ","), "}"));
   }
-  if (window_ != nullptr) {
+  if (window_ != nullptr && window_->dimensions_size() != 0) {
     extra.push_back(StrCat("window={", window_util::ToString(*window_), "}"));
   }
   if (padding_config_ != nullptr) {
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 2112b3e710..1767d712d7 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -564,13 +564,16 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
     case HloOpcode::kConvolution: {
       optional<Window> window;
       optional<ConvolutionDimensionNumbers> dnums;
-      attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window};
+      attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window};
       attrs["dim_labels"] = {/*required=*/true,
                              AttrTy::kConvolutionDimensionNumbers, &dnums};
       if (!ParseOperands(&operands, /*expected_size=*/2) ||
           !ParseAttributes(attrs)) {
         return false;
       }
+      if (!window) {
+        window.emplace();
+      }
       instruction = builder->AddInstruction(HloInstruction::CreateConvolve(
           shape, /*lhs=*/operands[0], /*rhs=*/operands[1], *window, *dnums));
       break;
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index cb02ef84a9..3fbbfbdead 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -312,7 +312,7 @@ R"(HloModule ConvolveR2_module:
 ENTRY %ConvolveR2.v3 (input: f32[1,2], filter: f32[1,1]) -> f32[1,2] {
   %input = f32[1,2]{1,0} parameter(0)
   %filter = f32[1,1]{1,0} parameter(1)
-  ROOT %convolution = f32[1,2]{0,1} convolution(f32[1,2]{1,0} %input, f32[1,1]{1,0} %filter), window={size=1}, dim_labels=bf_io->bf
+  ROOT %convolution = f32[1,2]{0,1} convolution(f32[1,2]{1,0} %input, f32[1,1]{1,0} %filter), dim_labels=bf_io->bf
 }
 
 )"
-- 
GitLab


From 15907659888a3e36e8de3d5a95de8d3327cb7c46 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 16 Nov 2017 19:10:45 -0800
Subject: [PATCH 0040/1225] [tf.data] Add experimental API for gathering
 statistics from an Iterator.

PiperOrigin-RevId: 176057576
---
 .../contrib/data/python/kernel_tests/BUILD    |  13 ++
 .../kernel_tests/stats_dataset_ops_test.py    | 213 ++++++++++++++++++
 tensorflow/contrib/data/python/ops/BUILD      |   1 +
 .../contrib/data/python/ops/stats_ops.py      | 177 +++++++++++++++
 tensorflow/core/kernels/BUILD                 |  37 +++
 tensorflow/core/kernels/dataset.h             |  22 +-
 tensorflow/core/kernels/iterator_ops.cc       |  43 ++++
 tensorflow/core/kernels/stats_aggregator.h    |  84 +++++++
 .../core/kernels/stats_aggregator_ops.cc      | 108 +++++++++
 tensorflow/core/kernels/stats_dataset_ops.cc  | 181 +++++++++++++++
 tensorflow/core/ops/dataset_ops.cc            |  47 ++++
 11 files changed, 924 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
 create mode 100644 tensorflow/contrib/data/python/ops/stats_ops.py
 create mode 100644 tensorflow/core/kernels/stats_aggregator.h
 create mode 100644 tensorflow/core/kernels/stats_aggregator_ops.cc
 create mode 100644 tensorflow/core/kernels/stats_dataset_ops.cc

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index c61f61263f..0dac03d7d8 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -424,6 +424,19 @@ py_test(
     ],
 )
 
+py_test(
+    name = "stats_dataset_ops_test",
+    size = "small",
+    srcs = ["stats_dataset_ops_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/data/python/ops:dataset_ops",
+        "//tensorflow/contrib/data/python/ops:transformation_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+    ],
+)
+
 py_test(
     name = "zip_dataset_op_test",
     size = "small",
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
new file mode 100644
index 0000000000..8f24d6b2f6
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
@@ -0,0 +1,213 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline statistics gathering ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.data.python.ops import stats_ops
+from tensorflow.core.framework import summary_pb2
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class StatsDatasetTest(test.TestCase):
+
+  def _assertSummaryHasCount(self, summary_str, tag, expected_value):
+    summary_proto = summary_pb2.Summary()
+    summary_proto.ParseFromString(summary_str)
+    for value in summary_proto.value:
+      if tag == value.tag:
+        self.assertEqual(expected_value, value.histo.num)
+        return
+    self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto))
+
+  def _assertSummaryHasSum(self, summary_str, tag, expected_value):
+    summary_proto = summary_pb2.Summary()
+    summary_proto.ParseFromString(summary_str)
+    for value in summary_proto.value:
+      if tag == value.tag:
+        self.assertEqual(expected_value, value.histo.sum)
+        return
+    self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto))
+
+  def testBytesProduced(self):
+    dataset = dataset_ops.Dataset.range(100).map(
+        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply(
+            stats_ops.bytes_produced_stats("bytes_produced"))
+    iterator = dataset.make_initializable_iterator()
+    stats_aggregator = stats_ops.StatsAggregator()
+    stats_aggregator_subscriber = stats_aggregator.subscribe(iterator)
+    next_element = iterator.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run([iterator.initializer, stats_aggregator_subscriber])
+      expected_sum = 0.0
+      for i in range(100):
+        self.assertAllEqual(
+            np.array([i] * i, dtype=np.int64), sess.run(next_element))
+        summary_str = sess.run(summary_t)
+        self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1))
+        expected_sum += i * 8.0
+        self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+      summary_str = sess.run(summary_t)
+      self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0)
+      self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
+
+  def testLatencyStats(self):
+    dataset = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency"))
+    iterator = dataset.make_initializable_iterator()
+    stats_aggregator = stats_ops.StatsAggregator()
+    stats_aggregator_subscriber = stats_aggregator.subscribe(iterator)
+    next_element = iterator.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run([iterator.initializer, stats_aggregator_subscriber])
+      for i in range(100):
+        self.assertEqual(i, sess.run(next_element))
+        self._assertSummaryHasCount(
+            sess.run(summary_t), "record_latency", float(i + 1))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+      self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0)
+
+  def testReinitialize(self):
+    dataset = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency"))
+    iterator = dataset.make_initializable_iterator()
+    stats_aggregator = stats_ops.StatsAggregator()
+    stats_aggregator_subscriber = stats_aggregator.subscribe(iterator)
+    next_element = iterator.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run(stats_aggregator_subscriber)
+      for j in range(5):
+        sess.run(iterator.initializer)
+        for i in range(100):
+          self.assertEqual(i, sess.run(next_element))
+          self._assertSummaryHasCount(
+              sess.run(summary_t), "record_latency", float((j * 100) + i + 1))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(next_element)
+        self._assertSummaryHasCount(
+            sess.run(summary_t), "record_latency", (j + 1) * 100.0)
+
+  def testNoAggregatorRegistered(self):
+    dataset = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency"))
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(iterator.initializer)
+      for i in range(100):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testMultipleTags(self):
+    dataset = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency")).apply(
+            stats_ops.latency_stats("record_latency_2"))
+    iterator = dataset.make_initializable_iterator()
+    stats_aggregator = stats_ops.StatsAggregator()
+    stats_aggregator_subscriber = stats_aggregator.subscribe(iterator)
+    next_element = iterator.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run([iterator.initializer, stats_aggregator_subscriber])
+      for i in range(100):
+        self.assertEqual(i, sess.run(next_element))
+        self._assertSummaryHasCount(
+            sess.run(summary_t), "record_latency", float(i + 1))
+        self._assertSummaryHasCount(
+            sess.run(summary_t), "record_latency_2", float(i + 1))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+      self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0)
+      self._assertSummaryHasCount(
+          sess.run(summary_t), "record_latency_2", 100.0)
+
+  def testRepeatedTags(self):
+    dataset = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency")).apply(
+            stats_ops.latency_stats("record_latency"))
+    iterator = dataset.make_initializable_iterator()
+    stats_aggregator = stats_ops.StatsAggregator()
+    stats_aggregator_subscriber = stats_aggregator.subscribe(iterator)
+    next_element = iterator.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run([iterator.initializer, stats_aggregator_subscriber])
+      for i in range(100):
+        self.assertEqual(i, sess.run(next_element))
+        self._assertSummaryHasCount(
+            sess.run(summary_t), "record_latency", float(2 * (i + 1)))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+      self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0)
+
+  def testMultipleIteratorsSameAggregator(self):
+    dataset = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency"))
+    iterator_0 = dataset.make_initializable_iterator()
+    iterator_1 = dataset.make_initializable_iterator()
+    stats_aggregator = stats_ops.StatsAggregator()
+    stats_aggregator_subscribers = [stats_aggregator.subscribe(iterator_0),
+                                    stats_aggregator.subscribe(iterator_1)]
+    next_element = iterator_0.get_next() + iterator_1.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run([iterator_0.initializer, iterator_1.initializer,
+                stats_aggregator_subscribers])
+      for i in range(100):
+        self.assertEqual(i * 2, sess.run(next_element))
+        self._assertSummaryHasCount(
+            sess.run(summary_t), "record_latency", float(2 * (i + 1)))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+      self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0)
+
+  def testMultipleStatsAggregatorsSameIteratorFail(self):
+    dataset = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency"))
+    iterator = dataset.make_initializable_iterator()
+    stats_aggregator_0 = stats_ops.StatsAggregator()
+    stats_aggregator_1 = stats_ops.StatsAggregator()
+
+    with self.test_session() as sess:
+      sess.run(stats_aggregator_0.subscribe(iterator))
+      # TODO(mrry): Consider making this allowable (and also allowing
+      # aggregators to unsubscribe).
+      with self.assertRaises(errors.FailedPreconditionError):
+        sess.run(stats_aggregator_1.subscribe(iterator))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index d6aaa12f5b..86035f3a69 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -71,6 +71,7 @@ py_library(
         "interleave_ops.py",
         "resampling.py",
         "scan_ops.py",
+        "stats_ops.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/contrib/data/python/ops/stats_ops.py
new file mode 100644
index 0000000000..b8875bd533
--- /dev/null
+++ b/tensorflow/contrib/data/python/ops/stats_ops.py
@@ -0,0 +1,177 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental API for gathering statistics from `tf.data` pipelines."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_dataset_ops
+
+
+class StatsAggregator(object):
+  """A stateful resource that aggregates statistics from one or more iterators.
+
+  To record statistics, use one of the custom transformation functions defined
+  in this module when defining your @{tf.data.Dataset}. All statistics will be
+  aggregated by the `StatsAggregator` that is associated with a particular
+  iterator (see below). For example, to record the total number of bytes
+  produced by iterating over a dataset:
+
+  ```python
+  dataset = ...
+  dataset = dataset.apply(stats_ops.bytes_produced_stats("total_bytes"))
+  ```
+
+  To associate a `StatsAggregator` with a @{tf.data.Iterator} object, use
+  the following pattern:
+
+  ```python
+  dataset = ...
+  iterator = dataset.make_one_shot_iterator()
+  stats_aggregator = stats_ops.StatsAggregator()
+  set_op = stats_op.set_stats_aggregator_op(iterator, stats_aggregator)
+
+  with tf.Session() as sess:
+    # Running `set_op` will associate `iterator` with `stats_aggregator`.
+    sess.run(set_op)
+  ```
+
+  To get a protocol buffer summary of the currently aggregated statistics,
+  use the `StatsAggregator.get_summary()` tensor. The easiest way to do this
+  is to add the returned tensor to the @{tf.GraphKeys.SUMMARIES} collection,
+  so that the summaries will be included with any existing summaries.
+
+  ```python
+  stats_aggregator = stats_ops.StatsAggregator()
+  stats_summary = stats_aggregator.get_summary()
+  tf.add_to_collection(tf.GraphKeys.SUMMARIES, stats_summary)
+  ```
+
+  Note: This interface is experimental and expected to change. In particular,
+  we expect to add other implementations of `StatsAggregator` that provide
+  different ways of exporting statistics, and add more types of statistics.
+  """
+
+  def __init__(self):
+    """Creates a `StatsAggregator`."""
+    self._resource = gen_dataset_ops.stats_aggregator_handle()
+
+  def get_summary(self):
+    """Returns a string @{tf.Tensor} that summarizes the aggregated statistics.
+
+    The returned tensor will contain a serialized @{tf.summary.Summary} protocol
+    buffer, which can be used with the standard TensorBoard logging facilities.
+
+    Returns:
+      A scalar string @{tf.Tensor} that summarizes the aggregated statistics.
+    """
+    return gen_dataset_ops.stats_aggregator_summary(self._resource)
+
+  def subscribe(self, iterator):
+    """Returns a @{tf.Operation} to associate this aggregator with `iterator`.
+
+    Note: Each @{tf.data.Iterator} can be associated with at most one
+    `StatsAggregator`. After running the operation that this function
+    returns, all statistics recorded in the iteration of `iterator`
+    will be stored in `stats_aggregator`.
+
+    Args:
+      iterator: A @{tf.data.Iterator} object.
+
+    Returns:
+      A @{tf.Operation} that, when run, associates this aggregator with
+      `iterator`.
+    """
+    if not isinstance(iterator, iterator_ops.Iterator):
+      raise TypeError("`iterator` must be a `tf.data.Iterator` object.")
+    return gen_dataset_ops.iterator_set_stats_aggregator(
+        iterator._iterator_resource, self._resource)  # pylint: disable=protected-access
+
+
+def bytes_produced_stats(tag):
+  """Records the number of bytes produced by each element of the input dataset.
+
+  To consume the statistics, associate a `StatsAggregator` with an iterator
+  over the output dataset.
+
+  Args:
+    tag: String. All statistics recorded by the returned transformation will
+      be associated with the given `tag`.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    @{tf.contrib.data.Dataset.apply}.
+  """
+
+  def _apply_fn(dataset):
+    return _StatsDataset(dataset, gen_dataset_ops.bytes_produced_stats_dataset,
+                         tag)
+
+  return _apply_fn
+
+
+def latency_stats(tag):
+  """Records the latency of producing each element of the input dataset.
+
+  To consume the statistics, associate a `StatsAggregator` with an iterator
+  over the output dataset.
+
+  Args:
+    tag: String. All statistics recorded by the returned transformation will
+      be associated with the given `tag`.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    @{tf.contrib.data.Dataset.apply}.
+  """
+
+  def _apply_fn(dataset):
+    return _StatsDataset(dataset, gen_dataset_ops.latency_stats_dataset, tag)
+
+  return _apply_fn
+
+
+class _StatsDataset(dataset_ops.Dataset):
+  """A `Dataset` that acts as an identity, and also records statistics."""
+
+  def __init__(self, input_dataset, op_function, tag):
+    super(_StatsDataset, self).__init__()
+    self._input_dataset = input_dataset
+    self._op_function = op_function
+    self._tag = ops.convert_to_tensor(tag, dtype=dtypes.string)
+
+  def _as_variant_tensor(self):
+    return self._op_function(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._tag,
+        output_shapes=nest.flatten(self.output_shapes),
+        output_types=nest.flatten(self.output_types))
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index b7386abdea..00cf3f90e9 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -5752,6 +5752,26 @@ tf_mkl_kernel_library(
     ],
 )
 
+cc_library(
+    name = "stats_aggregator",
+    hdrs = ["stats_aggregator.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_kernel_library(
+    name = "stats_aggregator_ops",
+    srcs = ["stats_aggregator_ops.cc"],
+    deps = [
+        ":stats_aggregator",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
 cc_library(
     name = "dataset",
     srcs = ["dataset.cc"],
@@ -5760,6 +5780,7 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/util/tensor_bundle",
     ],
 )
@@ -6032,6 +6053,19 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "stats_dataset_ops",
+    srcs = ["stats_dataset_ops.cc"],
+    deps = [
+        ":dataset",
+        ":stats_aggregator",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
 tf_kernel_library(
     name = "range_dataset_op",
     srcs = ["range_dataset_op.cc"],
@@ -6157,6 +6191,7 @@ tf_kernel_library(
     deps = [
         ":dataset",
         ":ops_util",
+        ":stats_aggregator",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -6206,6 +6241,8 @@ tf_kernel_library(
         ":skip_dataset_op",
         ":sparse_tensor_slice_dataset_op",
         ":sql_dataset_ops",
+        ":stats_aggregator_ops",
+        ":stats_dataset_ops",
         ":take_dataset_op",
         ":tensor_dataset_op",
         ":tensor_slice_dataset_op",
diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h
index df75deacbe..c266bc07c1 100644
--- a/tensorflow/core/kernels/dataset.h
+++ b/tensorflow/core/kernels/dataset.h
@@ -41,8 +41,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-class ResourceMgr;
-
 // Interface for reading values from a key-value store.
 // Used for restoring iterator state.
 class IteratorStateReader {
@@ -308,6 +306,8 @@ class GraphDefBuilderWrapper {
   GraphDefBuilder* b_;
 };
 
+class StatsAggregator;
+
 // A cut-down version of OpKernelContext for running computations in
 // iterators. Note that we cannot simply use OpKernelContext here
 // because we might run computation in an iterator whose lifetime is
@@ -331,6 +331,16 @@ class IteratorContext {
 
     // Function call support.
     std::function<void(std::function<void()>)> runner = nullptr;
+
+    // A function that returns the current `StatsAggregator` instance to be
+    // used when recording statistics about the iterator.
+    //
+    // NOTE(mrry): This is somewhat awkward, because (i) the `StatsAggregator`
+    // is a property of the `IteratorResource` (which this class does not know
+    // about), and (ii) it can change after the `IteratorContext` has been
+    // created. Better suggestions are welcome!
+    std::function<std::shared_ptr<StatsAggregator>()> stats_aggregator_getter =
+        nullptr;
   };
 
   explicit IteratorContext(Params params) : params_(std::move(params)) {}
@@ -341,6 +351,14 @@ class IteratorContext {
     return &params_.runner;
   }
 
+  std::shared_ptr<StatsAggregator> stats_aggregator() {
+    if (params_.stats_aggregator_getter) {
+      return params_.stats_aggregator_getter();
+    } else {
+      return nullptr;
+    }
+  }
+
  private:
   Params params_;
 };
diff --git a/tensorflow/core/kernels/iterator_ops.cc b/tensorflow/core/kernels/iterator_ops.cc
index b48da5b326..439775157b 100644
--- a/tensorflow/core/kernels/iterator_ops.cc
+++ b/tensorflow/core/kernels/iterator_ops.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/kernels/dataset.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/stats_aggregator.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -168,6 +169,16 @@ class IteratorResource : public ResourceBase {
     return Status::OK();
   }
 
+  void set_stats_aggregator(std::shared_ptr<StatsAggregator> stats_aggregator) {
+    mutex_lock l(mu_);
+    stats_aggregator_ = std::move(stats_aggregator);
+  }
+
+  std::shared_ptr<StatsAggregator> stats_aggregator() {
+    tf_shared_lock l(mu_);
+    return stats_aggregator_;
+  }
+
   string DebugString() override { return "Iterator resource"; }
 
   const DataTypeVector& output_dtypes() const { return output_dtypes_; }
@@ -178,6 +189,8 @@ class IteratorResource : public ResourceBase {
 
  private:
   std::shared_ptr<IteratorBase> iterator_;
+  mutex mu_;
+  std::shared_ptr<StatsAggregator> stats_aggregator_ GUARDED_BY(mu_);
   const DataTypeVector output_dtypes_;
   const std::vector<PartialTensorShape> output_shapes_;
   const int graph_def_version_;
@@ -684,6 +697,9 @@ class IteratorGetNextOp : public AsyncOpKernel {
 
       IteratorContext::Params params;
       params.env = ctx->env();
+      params.stats_aggregator_getter = [iterator]() {
+        return iterator->stats_aggregator();
+      };
       params.runner = *(ctx->runner());
       IteratorContext iter_ctx(std::move(params));
 
@@ -835,6 +851,31 @@ class DeserializeIteratorOp : public OpKernel {
   }
 };
 
+class IteratorSetStatsAggregatorOp : public OpKernel {
+ public:
+  explicit IteratorSetStatsAggregatorOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    IteratorResource* iterator_resource;
+    OP_REQUIRES_OK(
+        ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource));
+    core::ScopedUnref unref_iterator(iterator_resource);
+
+    StatsAggregatorResource* stats_aggregator_resource;
+    OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 1),
+                                       &stats_aggregator_resource));
+    core::ScopedUnref unref_stats_aggregator(stats_aggregator_resource);
+    // TODO(mrry): Consider allowing multiple StatsAggregator ops to
+    // subscribe to updates, and/or unsubscribing.
+    OP_REQUIRES(ctx, !iterator_resource->stats_aggregator(),
+                errors::FailedPrecondition(
+                    "Iterator already associated with a StatsAggregator"));
+    iterator_resource->set_stats_aggregator(
+        stats_aggregator_resource->stats_aggregator());
+  }
+};
+
 REGISTER_KERNEL_BUILDER(Name("Iterator").Device(DEVICE_CPU), IteratorHandleOp);
 REGISTER_KERNEL_BUILDER(Name("MakeIterator").Device(DEVICE_CPU),
                         MakeIteratorOp);
@@ -852,6 +893,8 @@ REGISTER_KERNEL_BUILDER(Name("SerializeIterator").Device(DEVICE_CPU),
                         SerializeIteratorOp);
 REGISTER_KERNEL_BUILDER(Name("DeserializeIterator").Device(DEVICE_CPU),
                         DeserializeIteratorOp);
+REGISTER_KERNEL_BUILDER(Name("IteratorSetStatsAggregator").Device(DEVICE_CPU),
+                        IteratorSetStatsAggregatorOp);
 
 }  // namespace
 
diff --git a/tensorflow/core/kernels/stats_aggregator.h b/tensorflow/core/kernels/stats_aggregator.h
new file mode 100644
index 0000000000..5f602c5f3b
--- /dev/null
+++ b/tensorflow/core/kernels/stats_aggregator.h
@@ -0,0 +1,84 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_
+
+#include <memory>
+#include <string>
+
+#include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+
+namespace tensorflow {
+
+class Summary;
+
+// A `StatsAggregator` accumulates statistics incrementally. A
+// `StatsAggregator` can accumulate multiple different statistics, distinguished
+// by a string name.
+//
+// The class currently supports accumulating `Histogram` objects, and we expect
+// to add other methods in future.
+//
+// NOTE(mrry): `StatsAggregator` is a virtual interface because we anticipate
+// that many different implementations will the same interface. For example, the
+// current implementation in "stats_aggregator_ops.cc" is a simple in-memory
+// implementation that integrates with the pull-based summary API, and we may
+// add implementations that work with the push-based `SummaryWriterInterface`,
+// as well as custom monitoring services.
+class StatsAggregator {
+ public:
+  virtual ~StatsAggregator() {}
+
+  // Add the given `values` to the histogram with the given `name`. Each
+  // element of `values` will be treated as a separate sample in the histogram.
+  virtual void AddToHistogram(const string& name,
+                              gtl::ArraySlice<double> values) = 0;
+
+  // Stores a protocol buffer representation of the aggregator state in the
+  // given `out_summary`.
+  // TODO(mrry): Consider separating this method from the `StatsAggregator`
+  // interface. It is possible that not all implementations will support
+  // encoding their state as a protocol buffer.
+  virtual void EncodeToProto(Summary* out_summary) = 0;
+};
+
+// A `StatsAggregatorResource` wraps a shareable `StatsAggregator` as a resource
+// in the TensorFlow resource manager.
+//
+// NOTE(mrry): This class is separate from `StatsAggregator` in order to
+// simplify the memory management of the shared object. Most users of
+// `StatsAggregator` interact with a `std::shared_ptr<StatsAggregator>` whereas
+// the `ResourceBase` API requires explicit reference counting.
+class StatsAggregatorResource : public ResourceBase {
+ public:
+  // Creates a new resource from the given `stats_aggregator`.
+  StatsAggregatorResource(std::unique_ptr<StatsAggregator> stats_aggregator)
+      : stats_aggregator_(stats_aggregator.release()) {}
+
+  // Returns the wrapped `StatsAggregator`.
+  std::shared_ptr<StatsAggregator> stats_aggregator() const {
+    return stats_aggregator_;
+  }
+
+  string DebugString() { return "StatsAggregatorResource"; }
+
+ private:
+  const std::shared_ptr<StatsAggregator> stats_aggregator_;
+};
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_
diff --git a/tensorflow/core/kernels/stats_aggregator_ops.cc b/tensorflow/core/kernels/stats_aggregator_ops.cc
new file mode 100644
index 0000000000..037ec64a83
--- /dev/null
+++ b/tensorflow/core/kernels/stats_aggregator_ops.cc
@@ -0,0 +1,108 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/kernels/stats_aggregator.h"
+
+#include <memory>
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/resource_op_kernel.h"
+#include "tensorflow/core/framework/summary.pb.h"
+#include "tensorflow/core/lib/histogram/histogram.h"
+#include "tensorflow/core/platform/macros.h"
+
+namespace tensorflow {
+namespace {
+
+class StatsAggregatorImpl : public StatsAggregator {
+ public:
+  StatsAggregatorImpl() {}
+
+  void AddToHistogram(const string& name,
+                      gtl::ArraySlice<double> values) override {
+    mutex_lock l(mu_);
+    histogram::Histogram& histogram = histograms_[name];
+    for (double value : values) {
+      histogram.Add(value);
+    }
+  }
+
+  void EncodeToProto(Summary* out_summary) override {
+    mutex_lock l(mu_);
+    for (const auto& pair : histograms_) {
+      const string& name = pair.first;
+      const histogram::Histogram& histogram = pair.second;
+
+      Summary::Value* value = out_summary->add_value();
+      value->set_tag(name);
+      histogram.EncodeToProto(value->mutable_histo(),
+                              true /* preserve_zero_buckets */);
+    }
+  }
+
+ private:
+  mutex mu_;
+  std::unordered_map<string, histogram::Histogram> histograms_ GUARDED_BY(mu_);
+  TF_DISALLOW_COPY_AND_ASSIGN(StatsAggregatorImpl);
+};
+
+class StatsAggregatorHandleOp
+    : public ResourceOpKernel<StatsAggregatorResource> {
+ public:
+  explicit StatsAggregatorHandleOp(OpKernelConstruction* ctx)
+      : ResourceOpKernel<StatsAggregatorResource>(ctx) {}
+
+ private:
+  Status CreateResource(StatsAggregatorResource** ret) override
+      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    *ret = new StatsAggregatorResource(
+        std::unique_ptr<StatsAggregator>(new StatsAggregatorImpl));
+    return Status::OK();
+  }
+
+  Status VerifyResource(StatsAggregatorResource* resource) override {
+    return Status::OK();
+  }
+};
+
+class StatsAggregatorSummaryOp : public OpKernel {
+ public:
+  explicit StatsAggregatorSummaryOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& resource_handle_t = ctx->input(0);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(resource_handle_t.shape()),
+                errors::InvalidArgument("resource_handle must be a scalar"));
+
+    StatsAggregatorResource* resource;
+    OP_REQUIRES_OK(ctx,
+                   LookupResource(ctx, HandleFromInput(ctx, 0), &resource));
+    core::ScopedUnref unref_iterator(resource);
+
+    Tensor* summary_t;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &summary_t));
+    Summary summary;
+    resource->stats_aggregator()->EncodeToProto(&summary);
+    summary_t->scalar<string>()() = summary.SerializeAsString();
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("StatsAggregatorHandle").Device(DEVICE_CPU),
+                        StatsAggregatorHandleOp);
+REGISTER_KERNEL_BUILDER(Name("StatsAggregatorSummary").Device(DEVICE_CPU),
+                        StatsAggregatorSummaryOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/stats_dataset_ops.cc b/tensorflow/core/kernels/stats_dataset_ops.cc
new file mode 100644
index 0000000000..7b1853aba6
--- /dev/null
+++ b/tensorflow/core/kernels/stats_dataset_ops.cc
@@ -0,0 +1,181 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/stats_aggregator.h"
+#include "tensorflow/core/lib/random/random.h"
+
+namespace tensorflow {
+namespace {
+
+// This op defines a `Dataset` that passes through its input elements and
+// records the latency of producing each element in the context's
+// `StatsAggregator`.
+//
+// TODO(mrry): It is likely that many *StatsDatasetOp kernels will have the
+// same or similar structure. We should abstract the common boilerplate into
+// a base case and/or investigate how to make general-purpose *StatsDatasetOp
+// kernels that use TensorFlow functions to represent their logic. For example,
+// if the performance were adequate, we might replace this kernel with an
+// implementation that executes functions before and after the `GetNext()` call
+// on the input, each executing an op that gets the current time and performing
+// the subtraction.
+class LatencyStatsDatasetOp : public UnaryDatasetOpKernel {
+ public:
+  explicit LatencyStatsDatasetOp(OpKernelConstruction* ctx)
+      : UnaryDatasetOpKernel(ctx) {}
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    string tag;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag));
+    *output = new Dataset(input, std::move(tag));
+  }
+
+ private:
+  class Dataset : public DatasetBase {
+   public:
+    explicit Dataset(const DatasetBase* input, string tag)
+        : input_(input), tag_(std::move(tag)) {
+      input_->Ref();
+    }
+
+    ~Dataset() override { input_->Unref(); }
+
+    std::unique_ptr<IteratorBase> MakeIterator(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::LatencyStats")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return input_->output_dtypes();
+    }
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      return input_->output_shapes();
+    }
+
+    string DebugString() override { return "LatencyStatsDatasetOp::Dataset"; }
+
+   private:
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params),
+            input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {}
+
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        uint64 start = ctx->env()->NowMicros();
+        Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
+        uint64 end = ctx->env()->NowMicros();
+        auto stats_aggregator = ctx->stats_aggregator();
+        if (stats_aggregator && !*end_of_sequence) {
+          ctx->stats_aggregator()->AddToHistogram(
+              dataset()->tag_, {static_cast<double>(end - start)});
+        }
+        return s;
+      }
+
+     private:
+      const std::unique_ptr<IteratorBase> input_impl_;
+    };
+
+    const DatasetBase* const input_;
+    const string tag_;
+  };
+};
+
+class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel {
+ public:
+  explicit BytesProducedStatsDatasetOp(OpKernelConstruction* ctx)
+      : UnaryDatasetOpKernel(ctx) {}
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    string tag;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag));
+    *output = new Dataset(input, std::move(tag));
+  }
+
+ private:
+  class Dataset : public DatasetBase {
+   public:
+    explicit Dataset(const DatasetBase* input, string tag)
+        : input_(input), tag_(std::move(tag)) {
+      input_->Ref();
+    }
+
+    ~Dataset() override { input_->Unref(); }
+
+    std::unique_ptr<IteratorBase> MakeIterator(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(new Iterator(
+          {this, strings::StrCat(prefix, "::BytesProducedStats")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return input_->output_dtypes();
+    }
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      return input_->output_shapes();
+    }
+
+    string DebugString() override {
+      return "BytesProducedStatsDatasetOp::Dataset";
+    }
+
+   private:
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params),
+            input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {}
+
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
+        auto stats_aggregator = ctx->stats_aggregator();
+        if (stats_aggregator && s.ok() && !*end_of_sequence) {
+          size_t total_bytes = 0;
+          for (const Tensor& t : *out_tensors) {
+            total_bytes += t.TotalBytes();
+          }
+          ctx->stats_aggregator()->AddToHistogram(
+              dataset()->tag_, {static_cast<double>(total_bytes)});
+        }
+        return s;
+      }
+
+     private:
+      const std::unique_ptr<IteratorBase> input_impl_;
+    };
+
+    const DatasetBase* const input_;
+    const string tag_;
+  };
+};
+
+REGISTER_KERNEL_BUILDER(Name("LatencyStatsDataset").Device(DEVICE_CPU),
+                        LatencyStatsDatasetOp);
+REGISTER_KERNEL_BUILDER(Name("BytesProducedStatsDataset").Device(DEVICE_CPU),
+                        BytesProducedStatsDatasetOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index f512213964..6bf226e7a5 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -151,6 +151,28 @@ REGISTER_OP("IgnoreErrorsDataset")
 Creates a dataset that contains the elements of `input_dataset` ignoring errors.
 )doc");
 
+REGISTER_OP("BytesProducedStatsDataset")
+    .Input("input_dataset: variant")
+    .Input("tag: string")
+    .Output("handle: variant")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Records the bytes size of each element of `input_dataset` in a StatsAggregator.
+)doc");
+
+REGISTER_OP("LatencyStatsDataset")
+    .Input("input_dataset: variant")
+    .Input("tag: string")
+    .Output("handle: variant")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Records the latency of producing `input_dataset` elements in a StatsAggregator.
+)doc");
+
 REGISTER_OP("MapDataset")
     .Input("input_dataset: variant")
     .Input("other_arguments: Targuments")
@@ -744,4 +766,29 @@ serialized: A variant tensor storing the state of the iterator contained in the
   resource.
 )doc");
 
+REGISTER_OP("StatsAggregatorHandle")
+    .Output("handle: resource")
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Attr("container: string = ''")
+    .Attr("shared_name: string = ''")
+    .Doc(R"doc(
+Creates a statistics manager resource.
+)doc");
+
+REGISTER_OP("IteratorSetStatsAggregator")
+    .Input("iterator_handle: resource")
+    .Input("stats_aggregator_handle: resource")
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"doc(
+Associates the given iterator with the given statistics aggregator.
+)doc");
+
+REGISTER_OP("StatsAggregatorSummary")
+    .Input("iterator: resource")
+    .Output("summary: string")
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Produces a summary of any statistics recorded by the given statistics manager.
+)doc");
+
 }  // namespace tensorflow
-- 
GitLab


From 929178e1046f6387d9245c3d89ba5c3c1f3078d5 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Thu, 16 Nov 2017 19:11:55 -0800
Subject: [PATCH 0041/1225] Add documentation for how to get input names for
 input_fn for keras model converted estimator.

PiperOrigin-RevId: 176057647
---
 .../docs_src/programmers_guide/estimators.md  | 26 ++++++++++++++++---
 .../python/keras/_impl/keras/estimator.py     |  3 +++
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md
index d465679817..6544a16f2b 100644
--- a/tensorflow/docs_src/programmers_guide/estimators.md
+++ b/tensorflow/docs_src/programmers_guide/estimators.md
@@ -166,11 +166,29 @@ keras_inception_v3 = tf.keras.applications.inception_v3.InceptionV3(weights=None
 keras_inception_v3.compile(optimizer=tf.keras.optimizers.SGD(lr=0.0001, momentum=0.9),
                           loss='categorical_crossentropy',
                           metric='accuracy')
-# Create an Estimator from the compiled Keras model.
+# Create an Estimator from the compiled Keras model. Note the initial model
+# state of the keras model is preserved in the created Estimator.
 est_inception_v3 = tf.keras.estimator.model_to_estimator(keras_model=keras_inception_v3)
-# Treat the derived Estimator as you would any other Estimator. For example,
-# the following derived Estimator calls the train method:
-est_inception_v3.train(input_fn=my_training_set, steps=2000)
+
+# Treat the derived Estimator as you would with any other Estimator.
+# First, recover the input name(s) of Keras model, so we can use them as the
+# feature column name(s) of the Estimator input function:
+keras_inception_v3.input_names  # print out: ['input_1']
+# Once we have the input name(s), we can create the input function, for example,
+# for input(s) in the format of numpy ndarray:
+train_input_fn = tf.estimator.inputs.numpy_input_fn(
+    x={"input_1": train_data},
+    y=train_labels,
+    num_epochs=1,
+    shuffle=False)
+# To train, we call Estimator's train function:
+est_inception_v3.train(input_fn=train_input_fn, steps=2000)
 ```
+Note that the names of feature columns and labels of a keras estimator come from
+the corresponding compiled keras model. For example, the input key names for
+@{$get_started/input_fn} in above `est_inception_v3` estimator can be obtained
+from `keras_inception_v3.input_names`, and similarily, the predicted output
+names can be obtained from `keras_inception_v3.output_names`.
+
 For more details, please refer to the documentation for
 @{tf.keras.estimator.model_to_estimator}.
diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py
index 125e63e1b8..2e931769c7 100644
--- a/tensorflow/python/keras/_impl/keras/estimator.py
+++ b/tensorflow/python/keras/_impl/keras/estimator.py
@@ -232,6 +232,9 @@ def model_to_estimator(keras_model=None,
                        config=None):
   """Constructs an `Estimator` instance from given keras model.
 
+  For usage example, please see
+  @{$programmers_guide/estimators$creating_estimators_from_keras_models}.
+
   Args:
     keras_model: Keras model in memory.
     keras_model_path: Directory to a keras model on disk.
-- 
GitLab


From 7d17d27940aa915583b0b3e2ba77d9f708af6783 Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Thu, 16 Nov 2017 19:30:05 -0800
Subject: [PATCH 0042/1225] Add WriteScalar support to SummaryDbWriter

PiperOrigin-RevId: 176058700
---
 tensorflow/contrib/summary/summary_ops.py     | 22 ++++-
 .../tensorboard/db/summary_db_writer.cc       | 81 ++++++++++++++-----
 .../tensorboard/db/summary_db_writer_test.cc  | 27 +++++++
 3 files changed, 109 insertions(+), 21 deletions(-)

diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py
index a72c0c80aa..bf810744a1 100644
--- a/tensorflow/contrib/summary/summary_ops.py
+++ b/tensorflow/contrib/summary/summary_ops.py
@@ -364,16 +364,34 @@ def generic(name, tensor, metadata=None, family=None, global_step=None):
 
 
 def scalar(name, tensor, family=None, global_step=None):
-  """Writes a scalar summary if possible."""
+  """Writes a scalar summary if possible.
+
+  Unlike @{tf.contrib.summary.generic} this op may change the dtype
+  depending on the writer, for both practical and efficiency concerns.
+
+  Args:
+    name: An arbitrary name for this summary.
+    tensor: A @{tf.Tensor} Must be one of the following types:
+      `float32`, `float64`, `int32`, `int64`, `uint8`, `int16`,
+      `int8`, `uint16`, `half`, `uint32`, `uint64`.
+    family: Optional, the summary's family.
+    global_step: The `int64` monotonic step variable, which defaults
+      to @{tf.train.get_global_step}.
+
+  Returns:
+    The created @{tf.Operation} or a @{tf.no_op} if summary writing has
+    not been enabled for this context.
+  """
   if global_step is None:
     global_step = training_util.get_global_step()
+  else:
+    global_step = ops.convert_to_tensor(global_step, dtypes.int64)
   def function(tag, scope):
     # Note the identity to move the tensor to the CPU.
     return gen_summary_ops.write_scalar_summary(
         context.context().summary_writer_resource,
         global_step, tag, array_ops.identity(tensor),
         name=scope)
-
   return summary_writer_function(name, tensor, function, family=family)
 
 
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
index ae063d24ef..857e731ef2 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
@@ -81,6 +81,55 @@ Status BindTensor(SqliteStatement* stmt, int parameter, const Tensor& t) {
   return BindProto(stmt, parameter, p);
 }
 
+// Tries to fudge shape and dtype to something with smaller storage.
+Status CoerceScalar(const Tensor& t, Tensor* out) {
+  switch (t.dtype()) {
+    case DT_DOUBLE:
+      *out = t;
+      break;
+    case DT_INT64:
+      *out = t;
+      break;
+    case DT_FLOAT:
+      *out = {DT_DOUBLE, {}};
+      out->scalar<double>()() = t.scalar<float>()();
+      break;
+    case DT_HALF:
+      *out = {DT_DOUBLE, {}};
+      out->scalar<double>()() = static_cast<double>(t.scalar<Eigen::half>()());
+      break;
+    case DT_INT32:
+      *out = {DT_INT64, {}};
+      out->scalar<int64>()() = t.scalar<int32>()();
+      break;
+    case DT_INT16:
+      *out = {DT_INT64, {}};
+      out->scalar<int64>()() = t.scalar<int16>()();
+      break;
+    case DT_INT8:
+      *out = {DT_INT64, {}};
+      out->scalar<int64>()() = t.scalar<int8>()();
+      break;
+    case DT_UINT32:
+      *out = {DT_INT64, {}};
+      out->scalar<int64>()() = t.scalar<uint32>()();
+      break;
+    case DT_UINT16:
+      *out = {DT_INT64, {}};
+      out->scalar<int64>()() = t.scalar<uint16>()();
+      break;
+    case DT_UINT8:
+      *out = {DT_INT64, {}};
+      out->scalar<int64>()() = t.scalar<uint8>()();
+      break;
+    default:
+      return errors::Unimplemented("Scalar summary for dtype ",
+                                   DataTypeString(t.dtype()),
+                                   " is not supported.");
+  }
+  return Status::OK();
+}
+
 class Transactor {
  public:
   explicit Transactor(std::shared_ptr<Sqlite> db)
@@ -280,20 +329,23 @@ class SummaryDbWriter : public SummaryWriterInterface {
     insert_tensor_.BindInt(1, tag_id);
     insert_tensor_.BindInt(2, global_step);
     insert_tensor_.BindDouble(3, GetWallTime(env_));
-    switch (t.dtype()) {
-      case DT_INT64:
-        insert_tensor_.BindInt(4, t.scalar<int64>()());
-        break;
-      case DT_DOUBLE:
-        insert_tensor_.BindDouble(4, t.scalar<double>()());
-        break;
-      default:
-        TF_RETURN_IF_ERROR(BindTensor(&insert_tensor_, 4, t));
-        break;
+    if (t.shape().dims() == 0 && t.dtype() == DT_INT64) {
+      insert_tensor_.BindInt(4, t.scalar<int64>()());
+    } else if (t.shape().dims() == 0 && t.dtype() == DT_DOUBLE) {
+      insert_tensor_.BindDouble(4, t.scalar<double>()());
+    } else {
+      TF_RETURN_IF_ERROR(BindTensor(&insert_tensor_, 4, t));
     }
     return insert_tensor_.StepAndReset();
   }
 
+  Status WriteScalar(int64 global_step, Tensor t, const string& tag) override {
+    Tensor t2;
+    TF_RETURN_IF_ERROR(CoerceScalar(t, &t2));
+    // TODO(jart): Generate scalars plugin metadata on this value.
+    return WriteTensor(global_step, std::move(t2), tag, "");
+  }
+
   Status WriteGraph(int64 global_step, std::unique_ptr<GraphDef> g) override {
     mutex_lock ml(mu_);
     TF_RETURN_IF_ERROR(InitializeParents());
@@ -325,15 +377,6 @@ class SummaryDbWriter : public SummaryWriterInterface {
     }
   }
 
-  Status WriteScalar(int64 global_step, Tensor t, const string& tag) override {
-    // TODO(@jart): Unlike WriteTensor, this method would be granted leniency
-    //              to change the dtype if it saves storage space. For example,
-    //              DT_UINT32 would be stored in the database as an INTEGER
-    //              rather than a serialized BLOB. But when reading it back,
-    //              the dtype would become DT_INT64.
-    return errors::Unimplemented("WriteScalar");
-  }
-
   Status WriteHistogram(int64 global_step, Tensor t,
                         const string& tag) override {
     return errors::Unimplemented(
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
index 3431842ca2..625861fa6b 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
@@ -290,5 +290,32 @@ TEST_F(SummaryDbWriterTest, WriteGraph) {
   EXPECT_EQ(1LL, QueryInt("SELECT is_control FROM NodeInputs WHERE idx = 2"));
 }
 
+TEST_F(SummaryDbWriterTest, WriteScalarInt32_CoercesToInt64) {
+  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_));
+  Tensor t(DT_INT32, {});
+  t.scalar<int32>()() = -17;
+  TF_ASSERT_OK(writer_->WriteScalar(1, t, "t"));
+  TF_ASSERT_OK(writer_->Flush());
+  ASSERT_EQ(-17LL, QueryInt("SELECT tensor FROM Tensors"));
+}
+
+TEST_F(SummaryDbWriterTest, WriteScalarInt8_CoercesToInt64) {
+  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_));
+  Tensor t(DT_INT8, {});
+  t.scalar<int8>()() = static_cast<int8>(-17);
+  TF_ASSERT_OK(writer_->WriteScalar(1, t, "t"));
+  TF_ASSERT_OK(writer_->Flush());
+  ASSERT_EQ(-17LL, QueryInt("SELECT tensor FROM Tensors"));
+}
+
+TEST_F(SummaryDbWriterTest, WriteScalarUint8_CoercesToInt64) {
+  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_));
+  Tensor t(DT_UINT8, {});
+  t.scalar<uint8>()() = static_cast<uint8>(254);
+  TF_ASSERT_OK(writer_->WriteScalar(1, t, "t"));
+  TF_ASSERT_OK(writer_->Flush());
+  ASSERT_EQ(254LL, QueryInt("SELECT tensor FROM Tensors"));
+}
+
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From 2397a7f375ab91b071126948dcae7abd3e775d3f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 16 Nov 2017 19:34:35 -0800
Subject: [PATCH 0043/1225] Update ops-related pbtxt files.

PiperOrigin-RevId: 176059000
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 100 +++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 105 ++++++++++++++++++
 2 files changed, 205 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index ffb608d600..daeb6763c8 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -6059,6 +6059,33 @@ op {
     type: "list(float)"
   }
 }
+op {
+  name: "BytesProducedStatsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "CTCBeamSearchDecoder"
   input_arg {
@@ -15001,6 +15028,18 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "IteratorSetStatsAggregator"
+  input_arg {
+    name: "iterator_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "stats_aggregator_handle"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
 op {
   name: "IteratorToStringHandle"
   input_arg {
@@ -15192,6 +15231,33 @@ op {
     }
   }
 }
+op {
+  name: "LatencyStatsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "LearnedUnigramCandidateSampler"
   input_arg {
@@ -38418,6 +38484,40 @@ op {
     }
   }
 }
+op {
+  name: "StatsAggregatorHandle"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "StatsAggregatorSummary"
+  input_arg {
+    name: "iterator"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "summary"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
 op {
   name: "StopGradient"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index d9e3dbdbb7..55a8fc9032 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -4270,6 +4270,34 @@ op {
   summary: "Bucketizes \'input\' based on \'boundaries\'."
   description: "For example, if the inputs are\n    boundaries = [0, 10, 100]\n    input = [[-5, 10000]\n             [150,   10]\n             [5,    100]]\n\nthen the output will be\n    output = [[0, 3]\n              [3, 2]\n              [1, 3]]"
 }
+op {
+  name: "BytesProducedStatsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  summary: "Records the bytes size of each element of `input_dataset` in a StatsAggregator."
+}
 op {
   name: "CTCBeamSearchDecoder"
   input_arg {
@@ -11502,6 +11530,19 @@ op {
   summary: "Gets the next output from the given iterator."
   is_stateful: true
 }
+op {
+  name: "IteratorSetStatsAggregator"
+  input_arg {
+    name: "iterator_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "stats_aggregator_handle"
+    type: DT_RESOURCE
+  }
+  summary: "Associates the given iterator with the given statistics aggregator."
+  is_stateful: true
+}
 op {
   name: "IteratorToStringHandle"
   input_arg {
@@ -11698,6 +11739,34 @@ op {
   }
   summary: "Gradients for Local Response Normalization."
 }
+op {
+  name: "LatencyStatsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  summary: "Records the latency of producing `input_dataset` elements in a StatsAggregator."
+}
 op {
   name: "LearnedUnigramCandidateSampler"
   input_arg {
@@ -29980,6 +30049,42 @@ op {
   summary: "Outputs deterministic pseudorandom values from a truncated normal distribution."
   description: "The generated values follow a normal distribution with mean 0 and standard\ndeviation 1, except that values whose magnitude is more than 2 standard\ndeviations from the mean are dropped and re-picked.\n\nThe outputs are a deterministic function of `shape` and `seed`."
 }
+op {
+  name: "StatsAggregatorHandle"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  summary: "Creates a statistics manager resource."
+  is_stateful: true
+}
+op {
+  name: "StatsAggregatorSummary"
+  input_arg {
+    name: "iterator"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "summary"
+    type: DT_STRING
+  }
+  summary: "Produces a summary of any statistics recorded by the given statistics manager."
+  is_stateful: true
+}
 op {
   name: "StopGradient"
   input_arg {
-- 
GitLab


From a764ec152ce8a4ebe6faf42c55a3177182389c9f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 16 Nov 2017 19:45:12 -0800
Subject: [PATCH 0044/1225] Go: Update generated wrapper functions for
 TensorFlow ops.

PiperOrigin-RevId: 176059622
---
 tensorflow/go/op/wrappers.go | 102 +++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index a910b51fb9..e650d25a32 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -5334,6 +5334,21 @@ func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged
 	return op.Output(0)
 }
 
+// Produces a summary of any statistics recorded by the given statistics manager.
+func StatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "StatsAggregatorSummary",
+		Input: []tf.Input{
+			iterator,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // FIFOQueueV2Attr is an optional argument to FIFOQueueV2.
 type FIFOQueueV2Attr func(optionalAttr)
 
@@ -5950,6 +5965,23 @@ func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.
 	return scope.AddOperation(opspec)
 }
 
+// Records the latency of producing `input_dataset` elements in a StatsAggregator.
+func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "LatencyStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Concatenates tensors along one dimension.
 //
 // Arguments:
@@ -6146,6 +6178,43 @@ func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_ou
 	return op.Output(0)
 }
 
+// StatsAggregatorHandleAttr is an optional argument to StatsAggregatorHandle.
+type StatsAggregatorHandleAttr func(optionalAttr)
+
+// StatsAggregatorHandleContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func StatsAggregatorHandleContainer(value string) StatsAggregatorHandleAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// StatsAggregatorHandleSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func StatsAggregatorHandleSharedName(value string) StatsAggregatorHandleAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Creates a statistics manager resource.
+func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatsAggregatorHandle",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes.
 type CropAndResizeGradBoxesAttr func(optionalAttr)
 
@@ -19067,6 +19136,22 @@ func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value
 	return op.Output(0)
 }
 
+// Associates the given iterator with the given statistics aggregator.
+//
+// Returns the created operation.
+func IteratorSetStatsAggregator(scope *Scope, iterator_handle tf.Output, stats_aggregator_handle tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IteratorSetStatsAggregator",
+		Input: []tf.Input{
+			iterator_handle, stats_aggregator_handle,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
 // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
 type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
 
@@ -24785,6 +24870,23 @@ func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Ou
 	return scope.AddOperation(opspec)
 }
 
+// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
+func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "BytesProducedStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // QrAttr is an optional argument to Qr.
 type QrAttr func(optionalAttr)
 
-- 
GitLab


From 10581c8afee392f2455acb700ece8217a3a19a4b Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Thu, 16 Nov 2017 20:50:28 -0800
Subject: [PATCH 0045/1225] Rename global_step -> step in contrib/summary API

Since it's more succinct and the API doesn't actually care if the provided step
is the one true global step.

PiperOrigin-RevId: 176063779
---
 tensorflow/contrib/summary/summary_ops.py     | 72 ++++++++++---------
 .../contrib/summary/summary_ops_test.py       |  4 +-
 tensorflow/core/kernels/summary_kernels.cc    | 40 +++++------
 tensorflow/core/ops/summary_ops.cc            | 24 +++----
 4 files changed, 73 insertions(+), 67 deletions(-)

diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py
index bf810744a1..3e65f83051 100644
--- a/tensorflow/contrib/summary/summary_ops.py
+++ b/tensorflow/contrib/summary/summary_ops.py
@@ -344,10 +344,9 @@ def summary_writer_function(name, tensor, function, family=None):
   return op
 
 
-def generic(name, tensor, metadata=None, family=None, global_step=None):
+def generic(name, tensor, metadata=None, family=None, step=None):
   """Writes a tensor summary if possible."""
-  if global_step is None:
-    global_step = training_util.get_global_step()
+
   def function(tag, scope):
     if metadata is None:
       serialized_metadata = constant_op.constant("")
@@ -358,12 +357,15 @@ def generic(name, tensor, metadata=None, family=None, global_step=None):
     # Note the identity to move the tensor to the CPU.
     return gen_summary_ops.write_summary(
         context.context().summary_writer_resource,
-        global_step, array_ops.identity(tensor),
-        tag, serialized_metadata, name=scope)
+        _choose_step(step),
+        array_ops.identity(tensor),
+        tag,
+        serialized_metadata,
+        name=scope)
   return summary_writer_function(name, tensor, function, family=family)
 
 
-def scalar(name, tensor, family=None, global_step=None):
+def scalar(name, tensor, family=None, step=None):
   """Writes a scalar summary if possible.
 
   Unlike @{tf.contrib.summary.generic} this op may change the dtype
@@ -375,68 +377,68 @@ def scalar(name, tensor, family=None, global_step=None):
       `float32`, `float64`, `int32`, `int64`, `uint8`, `int16`,
       `int8`, `uint16`, `half`, `uint32`, `uint64`.
     family: Optional, the summary's family.
-    global_step: The `int64` monotonic step variable, which defaults
+    step: The `int64` monotonic step variable, which defaults
       to @{tf.train.get_global_step}.
 
   Returns:
     The created @{tf.Operation} or a @{tf.no_op} if summary writing has
     not been enabled for this context.
   """
-  if global_step is None:
-    global_step = training_util.get_global_step()
-  else:
-    global_step = ops.convert_to_tensor(global_step, dtypes.int64)
+
   def function(tag, scope):
     # Note the identity to move the tensor to the CPU.
     return gen_summary_ops.write_scalar_summary(
         context.context().summary_writer_resource,
-        global_step, tag, array_ops.identity(tensor),
+        _choose_step(step),
+        tag,
+        array_ops.identity(tensor),
         name=scope)
+
   return summary_writer_function(name, tensor, function, family=family)
 
 
-def histogram(name, tensor, family=None, global_step=None):
+def histogram(name, tensor, family=None, step=None):
   """Writes a histogram summary if possible."""
-  if global_step is None:
-    global_step = training_util.get_global_step()
+
   def function(tag, scope):
     # Note the identity to move the tensor to the CPU.
     return gen_summary_ops.write_histogram_summary(
         context.context().summary_writer_resource,
-        global_step, tag, array_ops.identity(tensor),
+        _choose_step(step),
+        tag,
+        array_ops.identity(tensor),
         name=scope)
 
   return summary_writer_function(name, tensor, function, family=family)
 
 
-def image(name, tensor, bad_color=None, max_images=3, family=None,
-          global_step=None):
+def image(name, tensor, bad_color=None, max_images=3, family=None, step=None):
   """Writes an image summary if possible."""
-  if global_step is None:
-    global_step = training_util.get_global_step()
+
   def function(tag, scope):
     bad_color_ = (constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8)
                   if bad_color is None else bad_color)
     # Note the identity to move the tensor to the CPU.
     return gen_summary_ops.write_image_summary(
         context.context().summary_writer_resource,
-        global_step, tag, array_ops.identity(tensor),
+        _choose_step(step),
+        tag,
+        array_ops.identity(tensor),
         bad_color_,
-        max_images, name=scope)
+        max_images,
+        name=scope)
 
   return summary_writer_function(name, tensor, function, family=family)
 
 
-def audio(name, tensor, sample_rate, max_outputs, family=None,
-          global_step=None):
+def audio(name, tensor, sample_rate, max_outputs, family=None, step=None):
   """Writes an audio summary if possible."""
-  if global_step is None:
-    global_step = training_util.get_global_step()
+
   def function(tag, scope):
     # Note the identity to move the tensor to the CPU.
     return gen_summary_ops.write_audio_summary(
         context.context().summary_writer_resource,
-        global_step,
+        _choose_step(step),
         tag,
         array_ops.identity(tensor),
         sample_rate=sample_rate,
@@ -483,15 +485,13 @@ def graph(param, step=None, name=None):
   if writer is None:
     return control_flow_ops.no_op()
   with ops.device("cpu:0"):
-    if step is None:
-      step = training_util.get_global_step()
-    else:
-      step = ops.convert_to_tensor(step, dtypes.int64)
     if isinstance(param, (ops.Graph, graph_pb2.GraphDef)):
       tensor = ops.convert_to_tensor(_serialize_graph(param), dtypes.string)
     else:
       tensor = array_ops.identity(param)
-    return gen_summary_ops.write_graph_summary(writer, step, tensor, name=name)
+    return gen_summary_ops.write_graph_summary(
+        writer, _choose_step(step), tensor, name=name)
+
 
 _graph = graph  # for functions with a graph parameter
 
@@ -527,3 +527,11 @@ def _serialize_graph(arbitrary_graph):
     return arbitrary_graph.as_graph_def(add_shapes=True).SerializeToString()
   else:
     return arbitrary_graph.SerializeToString()
+
+
+def _choose_step(step):
+  if step is None:
+    return training_util.get_global_step()
+  if not isinstance(step, ops.Tensor):
+    return ops.convert_to_tensor(step, dtypes.int64)
+  return step
diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py
index c5ca054f77..ad89c0c36a 100644
--- a/tensorflow/contrib/summary/summary_ops_test.py
+++ b/tensorflow/contrib/summary/summary_ops_test.py
@@ -97,13 +97,13 @@ class TargetTest(test_util.TensorFlowTestCase):
       self.assertEqual(events[1].summary.value[0].tag, 'scalar')
 
   def testSummaryGlobalStep(self):
-    global_step = training_util.get_or_create_global_step()
+    step = training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
     with summary_ops.create_summary_file_writer(
         logdir, max_queue=0,
         name='t2').as_default(), summary_ops.always_record_summaries():
 
-      summary_ops.scalar('scalar', 2.0, global_step=global_step)
+      summary_ops.scalar('scalar', 2.0, step=step)
 
       events = summary_test_util.events_from_logdir(logdir)
       self.assertEqual(len(events), 2)
diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc
index 3706f51cf4..7487e70acc 100644
--- a/tensorflow/core/kernels/summary_kernels.cc
+++ b/tensorflow/core/kernels/summary_kernels.cc
@@ -111,8 +111,8 @@ class WriteSummaryOp : public OpKernel {
     OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s));
     core::ScopedUnref unref(s);
     const Tensor* tmp;
-    OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp));
-    const int64 global_step = tmp->scalar<int64>()();
+    OP_REQUIRES_OK(ctx, ctx->input("step", &tmp));
+    const int64 step = tmp->scalar<int64>()();
     OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp));
     const string& tag = tmp->scalar<string>()();
     OP_REQUIRES_OK(ctx, ctx->input("summary_metadata", &tmp));
@@ -121,8 +121,7 @@ class WriteSummaryOp : public OpKernel {
     const Tensor* t;
     OP_REQUIRES_OK(ctx, ctx->input("tensor", &t));
 
-    OP_REQUIRES_OK(ctx,
-                   s->WriteTensor(global_step, *t, tag, serialized_metadata));
+    OP_REQUIRES_OK(ctx, s->WriteTensor(step, *t, tag, serialized_metadata));
   }
 };
 REGISTER_KERNEL_BUILDER(Name("WriteSummary").Device(DEVICE_CPU),
@@ -158,15 +157,15 @@ class WriteScalarSummaryOp : public OpKernel {
     OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s));
     core::ScopedUnref unref(s);
     const Tensor* tmp;
-    OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp));
-    const int64 global_step = tmp->scalar<int64>()();
+    OP_REQUIRES_OK(ctx, ctx->input("step", &tmp));
+    const int64 step = tmp->scalar<int64>()();
     OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp));
     const string& tag = tmp->scalar<string>()();
 
     const Tensor* t;
     OP_REQUIRES_OK(ctx, ctx->input("value", &t));
 
-    OP_REQUIRES_OK(ctx, s->WriteScalar(global_step, *t, tag));
+    OP_REQUIRES_OK(ctx, s->WriteScalar(step, *t, tag));
   }
 };
 REGISTER_KERNEL_BUILDER(Name("WriteScalarSummary").Device(DEVICE_CPU),
@@ -181,15 +180,15 @@ class WriteHistogramSummaryOp : public OpKernel {
     OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s));
     core::ScopedUnref unref(s);
     const Tensor* tmp;
-    OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp));
-    const int64 global_step = tmp->scalar<int64>()();
+    OP_REQUIRES_OK(ctx, ctx->input("step", &tmp));
+    const int64 step = tmp->scalar<int64>()();
     OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp));
     const string& tag = tmp->scalar<string>()();
 
     const Tensor* t;
     OP_REQUIRES_OK(ctx, ctx->input("values", &t));
 
-    OP_REQUIRES_OK(ctx, s->WriteHistogram(global_step, *t, tag));
+    OP_REQUIRES_OK(ctx, s->WriteHistogram(step, *t, tag));
   }
 };
 REGISTER_KERNEL_BUILDER(Name("WriteHistogramSummary").Device(DEVICE_CPU),
@@ -210,8 +209,8 @@ class WriteImageSummaryOp : public OpKernel {
     OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s));
     core::ScopedUnref unref(s);
     const Tensor* tmp;
-    OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp));
-    const int64 global_step = tmp->scalar<int64>()();
+    OP_REQUIRES_OK(ctx, ctx->input("step", &tmp));
+    const int64 step = tmp->scalar<int64>()();
     OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp));
     const string& tag = tmp->scalar<string>()();
     const Tensor* bad_color;
@@ -224,8 +223,7 @@ class WriteImageSummaryOp : public OpKernel {
     const Tensor* t;
     OP_REQUIRES_OK(ctx, ctx->input("tensor", &t));
 
-    OP_REQUIRES_OK(
-        ctx, s->WriteImage(global_step, *t, tag, max_images_, *bad_color));
+    OP_REQUIRES_OK(ctx, s->WriteImage(step, *t, tag, max_images_, *bad_color));
   }
 
  private:
@@ -247,8 +245,8 @@ class WriteAudioSummaryOp : public OpKernel {
     OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s));
     core::ScopedUnref unref(s);
     const Tensor* tmp;
-    OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp));
-    const int64 global_step = tmp->scalar<int64>()();
+    OP_REQUIRES_OK(ctx, ctx->input("step", &tmp));
+    const int64 step = tmp->scalar<int64>()();
     OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp));
     const string& tag = tmp->scalar<string>()();
     OP_REQUIRES_OK(ctx, ctx->input("sample_rate", &tmp));
@@ -257,8 +255,8 @@ class WriteAudioSummaryOp : public OpKernel {
     const Tensor* t;
     OP_REQUIRES_OK(ctx, ctx->input("tensor", &t));
 
-    OP_REQUIRES_OK(
-        ctx, s->WriteAudio(global_step, *t, tag, max_outputs_, sample_rate));
+    OP_REQUIRES_OK(ctx,
+                   s->WriteAudio(step, *t, tag, max_outputs_, sample_rate));
   }
 
  private:
@@ -278,8 +276,8 @@ class WriteGraphSummaryOp : public OpKernel {
     OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s));
     core::ScopedUnref unref(s);
     const Tensor* t;
-    OP_REQUIRES_OK(ctx, ctx->input("global_step", &t));
-    const int64 global_step = t->scalar<int64>()();
+    OP_REQUIRES_OK(ctx, ctx->input("step", &t));
+    const int64 step = t->scalar<int64>()();
     OP_REQUIRES_OK(ctx, ctx->input("tensor", &t));
     std::unique_ptr<GraphDef> graph{new GraphDef};
     if (!ParseProtoUnlimited(graph.get(), t->scalar<string>()())) {
@@ -287,7 +285,7 @@ class WriteGraphSummaryOp : public OpKernel {
           errors::DataLoss("Bad tf.GraphDef binary proto tensor string"));
       return;
     }
-    OP_REQUIRES_OK(ctx, s->WriteGraph(global_step, std::move(graph)));
+    OP_REQUIRES_OK(ctx, s->WriteGraph(step, std::move(graph)));
   }
 };
 REGISTER_KERNEL_BUILDER(Name("WriteGraphSummary").Device(DEVICE_CPU),
diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc
index 7f6d8b06cd..029ff09906 100644
--- a/tensorflow/core/ops/summary_ops.cc
+++ b/tensorflow/core/ops/summary_ops.cc
@@ -99,7 +99,7 @@ writer: A handle to the summary writer resource.
 
 REGISTER_OP("WriteSummary")
     .Input("writer: resource")
-    .Input("global_step: int64")
+    .Input("step: int64")
     .Input("tensor: T")
     .Input("tag: string")
     .Input("summary_metadata: string")
@@ -109,7 +109,7 @@ REGISTER_OP("WriteSummary")
 Outputs a `Summary` protocol buffer with a tensor.
 
 writer: A handle to a summary writer.
-global_step: The step to write the summary for.
+step: The step to write the summary for.
 tensor: A tensor to serialize.
 tag: The summary's tag.
 summary_metadata: Serialized SummaryMetadata protocol buffer containing
@@ -132,7 +132,7 @@ event: A string containing a binary-encoded tf.Event proto.
 
 REGISTER_OP("WriteScalarSummary")
     .Input("writer: resource")
-    .Input("global_step: int64")
+    .Input("step: int64")
     .Input("tag: string")
     .Input("value: T")
     .Attr("T: realnumbertype")
@@ -143,14 +143,14 @@ Writes a `Summary` protocol buffer with scalar values.
 The input `tag` and `value` must have the scalars.
 
 writer: A handle to a summary writer.
-global_step: The step to write the summary for.
+step: The step to write the summary for.
 tag: Tag for the summary.
 value: Value for the summary.
 )doc");
 
 REGISTER_OP("WriteHistogramSummary")
     .Input("writer: resource")
-    .Input("global_step: int64")
+    .Input("step: int64")
     .Input("tag: string")
     .Input("values: T")
     .Attr("T: realnumbertype = DT_FLOAT")
@@ -165,14 +165,14 @@ has one summary value containing a histogram for `values`.
 This op reports an `InvalidArgument` error if any value is not finite.
 
 writer: A handle to a summary writer.
-global_step: The step to write the summary for.
+step: The step to write the summary for.
 tag: Scalar.  Tag to use for the `Summary.Value`.
 values: Any shape. Values to use to build the histogram.
 )doc");
 
 REGISTER_OP("WriteImageSummary")
     .Input("writer: resource")
-    .Input("global_step: int64")
+    .Input("step: int64")
     .Input("tag: string")
     .Input("tensor: T")
     .Input("bad_color: uint8")
@@ -217,7 +217,7 @@ replaced by this tensor in the output image.  The default value is the color
 red.
 
 writer: A handle to a summary writer.
-global_step: The step to write the summary for.
+step: The step to write the summary for.
 tag: Scalar. Used to build the `tag` attribute of the summary values.
 tensor: 4-D of shape `[batch_size, height, width, channels]` where
   `channels` is 1, 3, or 4.
@@ -227,7 +227,7 @@ bad_color: Color to use for pixels with non-finite values.
 
 REGISTER_OP("WriteAudioSummary")
     .Input("writer: resource")
-    .Input("global_step: int64")
+    .Input("step: int64")
     .Input("tag: string")
     .Input("tensor: float")
     .Input("sample_rate: float")
@@ -249,7 +249,7 @@ build the `tag` of the summary values:
    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
 
 writer: A handle to a summary writer.
-global_step: The step to write the summary for.
+step: The step to write the summary for.
 tag: Scalar. Used to build the `tag` attribute of the summary values.
 tensor: 2-D of shape `[batch_size, frames]`.
 sample_rate: The sample rate of the signal in hertz.
@@ -258,14 +258,14 @@ max_outputs: Max number of batch elements to generate audio for.
 
 REGISTER_OP("WriteGraphSummary")
     .Input("writer: resource")
-    .Input("global_step: int64")
+    .Input("step: int64")
     .Input("tensor: string")
     .SetShapeFn(shape_inference::NoOutputs)
     .Doc(R"doc(
 Writes a `GraphDef` protocol buffer to a `SummaryWriter`.
 
 writer: Handle of `SummaryWriter`.
-global_step: The step to write the summary for.
+step: The step to write the summary for.
 tensor: A scalar string of the serialized tf.GraphDef proto.
 )doc");
 
-- 
GitLab


From 7a2a3b40d518baa0c9bc4231df434fa09857cee4 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Thu, 16 Nov 2017 21:04:50 -0800
Subject: [PATCH 0046/1225] [XLA] Rematerialization and fusion need to consider
 nested computations to determine if rematerializablity or Fusabiltiy.

PiperOrigin-RevId: 176064783
---
 .../compiler/xla/service/hlo_instruction.cc    | 18 +++---------------
 .../xla/service/hlo_rematerialization.cc       |  9 +--------
 2 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index a0795a7b36..e3fdc53b7f 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -793,7 +793,7 @@ HloInstruction* HloInstruction::FuseInstructionInternal(
 HloInstruction* HloInstruction::CloneAndFuseInternal(
     HloInstruction* instruction_to_fuse, bool add_output) {
   CHECK_EQ(opcode_, HloOpcode::kFusion);
-  CHECK(instruction_to_fuse->IsFusable());
+  CHECK(instruction_to_fuse->IsFusable()) << instruction_to_fuse->ToString();
   VLOG(3) << "CloneAndFuseInternal:\n" << instruction_to_fuse->ToString();
   HloInstruction* clone = nullptr;
   if (called_computations_.empty()) {
@@ -2134,25 +2134,13 @@ bool HloInstruction::IsFusable() const {
   if (tracing()) {
     return false;
   }
-
   // Some kinds of instructions don't make sense to fuse.
   switch (opcode_) {
-    case HloOpcode::kInfeed:
-    case HloOpcode::kOutfeed:
     case HloOpcode::kParameter:
-    case HloOpcode::kTrace:
-    case HloOpcode::kRecv:
-    case HloOpcode::kRecvDone:
-    case HloOpcode::kSend:
-    case HloOpcode::kSendDone:
       return false;
-    // Only fuse Rng if it is used once, otherwise the random numbers generated
-    // will be different in each fusion. If it is the root (user count = 0)
-    // then it is the equivalent of having one user.
-    case HloOpcode::kRng:
-      return users_.size() <= 1;
+    // Side effecting instrutions cannot be fused.
     default:
-      return true;
+      return !HasSideEffect();
   }
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index 828be8490c..017f996bc4 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -62,18 +62,11 @@ bool IsRematerializable(const HloInstruction* instruction) {
     case HloOpcode::kConstant:
     case HloOpcode::kCrossReplicaSum:
     case HloOpcode::kCustomCall:
-    case HloOpcode::kOutfeed:
-    case HloOpcode::kInfeed:
     case HloOpcode::kParameter:
-    case HloOpcode::kRecv:
-    case HloOpcode::kRecvDone:
-    case HloOpcode::kSend:
-    case HloOpcode::kSendDone:
-    case HloOpcode::kTrace:
     case HloOpcode::kWhile:
       return false;
     default:
-      return true;
+      return !instruction->HasSideEffect();
   }
 }
 
-- 
GitLab


From 389d4001261df5a0f0db1ed869e2c72fefb2297e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 16 Nov 2017 21:13:31 -0800
Subject: [PATCH 0047/1225] Go: Update generated wrapper functions for
 TensorFlow ops.

PiperOrigin-RevId: 176065246
---
 tensorflow/go/op/wrappers.go | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index e650d25a32..1d1383ec82 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -44,19 +44,19 @@ func makeOutputList(op *tf.Operation, start int, output string) ([]tf.Output, in
 //
 // Arguments:
 //	writer: A handle to a summary writer.
-//	global_step: The step to write the summary for.
+//	step: The step to write the summary for.
 //	tag: Tag for the summary.
 //	value: Value for the summary.
 //
 // Returns the created operation.
-func WriteScalarSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) {
+func WriteScalarSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
 		Type: "WriteScalarSummary",
 		Input: []tf.Input{
-			writer, global_step, tag, value,
+			writer, step, tag, value,
 		},
 	}
 	return scope.AddOperation(opspec)
@@ -89,21 +89,21 @@ func ImportEvent(scope *Scope, writer tf.Output, event tf.Output) (o *tf.Operati
 //
 // Arguments:
 //	writer: A handle to a summary writer.
-//	global_step: The step to write the summary for.
+//	step: The step to write the summary for.
 //	tensor: A tensor to serialize.
 //	tag: The summary's tag.
 //	summary_metadata: Serialized SummaryMetadata protocol buffer containing
 // plugin-related metadata for this summary.
 //
 // Returns the created operation.
-func WriteSummary(scope *Scope, writer tf.Output, global_step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) {
+func WriteSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
 		Type: "WriteSummary",
 		Input: []tf.Input{
-			writer, global_step, tensor, tag, summary_metadata,
+			writer, step, tensor, tag, summary_metadata,
 		},
 	}
 	return scope.AddOperation(opspec)
@@ -2147,19 +2147,19 @@ func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset
 //
 // Arguments:
 //	writer: A handle to a summary writer.
-//	global_step: The step to write the summary for.
+//	step: The step to write the summary for.
 //	tag: Scalar.  Tag to use for the `Summary.Value`.
 //	values: Any shape. Values to use to build the histogram.
 //
 // Returns the created operation.
-func WriteHistogramSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) {
+func WriteHistogramSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
 		Type: "WriteHistogramSummary",
 		Input: []tf.Input{
-			writer, global_step, tag, values,
+			writer, step, tag, values,
 		},
 	}
 	return scope.AddOperation(opspec)
@@ -11101,13 +11101,13 @@ func WriteAudioSummaryMaxOutputs(value int64) WriteAudioSummaryAttr {
 //
 // Arguments:
 //	writer: A handle to a summary writer.
-//	global_step: The step to write the summary for.
+//	step: The step to write the summary for.
 //	tag: Scalar. Used to build the `tag` attribute of the summary values.
 //	tensor: 2-D of shape `[batch_size, frames]`.
 //	sample_rate: The sample rate of the signal in hertz.
 //
 // Returns the created operation.
-func WriteAudioSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) {
+func WriteAudioSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -11118,7 +11118,7 @@ func WriteAudioSummary(scope *Scope, writer tf.Output, global_step tf.Output, ta
 	opspec := tf.OpSpec{
 		Type: "WriteAudioSummary",
 		Input: []tf.Input{
-			writer, global_step, tag, tensor, sample_rate,
+			writer, step, tag, tensor, sample_rate,
 		},
 		Attrs: attrs,
 	}
@@ -18248,14 +18248,14 @@ func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr {
 //
 // Arguments:
 //	writer: A handle to a summary writer.
-//	global_step: The step to write the summary for.
+//	step: The step to write the summary for.
 //	tag: Scalar. Used to build the `tag` attribute of the summary values.
 //	tensor: 4-D of shape `[batch_size, height, width, channels]` where
 // `channels` is 1, 3, or 4.
 //	bad_color: Color to use for pixels with non-finite values.
 //
 // Returns the created operation.
-func WriteImageSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) {
+func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18266,7 +18266,7 @@ func WriteImageSummary(scope *Scope, writer tf.Output, global_step tf.Output, ta
 	opspec := tf.OpSpec{
 		Type: "WriteImageSummary",
 		Input: []tf.Input{
-			writer, global_step, tag, tensor, bad_color,
+			writer, step, tag, tensor, bad_color,
 		},
 		Attrs: attrs,
 	}
@@ -20657,18 +20657,18 @@ func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 //
 // Arguments:
 //	writer: Handle of `SummaryWriter`.
-//	global_step: The step to write the summary for.
+//	step: The step to write the summary for.
 //	tensor: A scalar string of the serialized tf.GraphDef proto.
 //
 // Returns the created operation.
-func WriteGraphSummary(scope *Scope, writer tf.Output, global_step tf.Output, tensor tf.Output) (o *tf.Operation) {
+func WriteGraphSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
 		Type: "WriteGraphSummary",
 		Input: []tf.Input{
-			writer, global_step, tensor,
+			writer, step, tensor,
 		},
 	}
 	return scope.AddOperation(opspec)
-- 
GitLab


From 724ca9f1a5a7428e74b62c8e2e6061244af93ace Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 07:21:52 -0800
Subject: [PATCH 0048/1225] [XLA] Enable operand buffer aliasing for Call
 instructions where the unique use of operand in the called computation is the
 root instruction which is elemenentwise on the operand. This eliminates
 copies of Call instruction result buffers induced by parallel computation
 outlining on the XLA:CPU backend.

PiperOrigin-RevId: 176106140
---
 .../compiler/xla/service/liveness_util.cc     | 52 ++++++++++++++++++-
 .../xla/service/liveness_util_test.cc         | 39 ++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/liveness_util.cc b/tensorflow/compiler/xla/service/liveness_util.cc
index 53d88eda7a..68c99256a2 100644
--- a/tensorflow/compiler/xla/service/liveness_util.cc
+++ b/tensorflow/compiler/xla/service/liveness_util.cc
@@ -103,7 +103,7 @@ namespace {
 
 // Returns all uses of all aliases of 'instruction' at 'index' in 'uses'.
 // Each use in 'uses' is a pair (HloInstruction* user, int64 operand_index)
-// where 'user' is a user of an alias of 'intruction' at 'index', and
+// where 'user' is a user of an alias of 'instruction' at 'index', and
 // 'operand_index' is the operand index at which the alias appears in the
 // operand list of 'user'.
 std::vector<std::pair<HloInstruction*, int64>> GetAllUsesOfInstructionAtIndex(
@@ -243,6 +243,31 @@ bool CanShareOperandBufferWithUser(
     std::vector<int64> operand_indices = user->OperandIndices(operand);
     return operand_indices.size() == 1 && operand_indices[0] == 0;
   }
+  if (user->opcode() == HloOpcode::kCall) {
+    // TODO(b/62548313): Remove when buffer assignment is module scoped and
+    // does not assign buffers to calls.
+    // Find called computation parameter associated with 'operand'.
+    const std::vector<int64> operand_indices = user->OperandIndices(operand);
+    if (operand_indices.size() > 1) {
+      return false;
+    }
+    CHECK_EQ(1, operand_indices.size());
+    auto* param = user->to_apply()->parameter_instruction(operand_indices[0]);
+    // Get all uses of 'operand' at 'index' in called computation.
+    auto param_uses = GetAllUsesOfInstructionAtIndex(param, operand_index,
+                                                     points_to_analysis);
+
+    // Return true iff:
+    // *) There exists exactly one use of 'operand' in called computation.
+    // *) The unique use is by the root instruction of called computation.
+    //    (Note: we check the root of the called computation, because the
+    //     root result buffer is required to alias with the Call result buffer).
+    // *) The root instruction of the called computation is element-wise on
+    //    'operand'.
+    auto* callee_root = user->to_apply()->root_instruction();
+    return param_uses.size() == 1 && param_uses[0].first == callee_root &&
+           callee_root->IsElementwiseOnOperand(param_uses[0].second);
+  }
   // Check if 'user' is element-wise.
   return user->IsElementwise();
 }
@@ -322,6 +347,31 @@ bool CanShareOperandBufferWithUser(HloInstruction* operand,
     std::vector<int64> operand_indices = user->OperandIndices(operand);
     return operand_indices.size() == 1 && operand_indices[0] == 0;
   }
+  if (user->opcode() == HloOpcode::kCall) {
+    // Get all uses of value defined by 'operand' at 'operand_index'.
+    const auto& uses =
+        dataflow.GetValueDefinedAt(operand, operand_index).uses();
+    // Return true iff:
+    // *) There exists two uses of 'operand'.
+    // *) One use is by 'user' (caller).
+    // *) One use is by root instruction of called computation (callee root).
+    //    (Note: we check the root of the called computation, because the
+    //     root result buffer is required to alias with the Call result buffer).
+    // *) The root instruction of the called computation is element-wise on
+    //    'operand'.
+    const bool found_caller_use =
+        std::find_if(uses.begin(), uses.end(), [user](const HloUse& use) {
+          return use.instruction == user;
+        }) != uses.end();
+    auto* callee_root = user->to_apply()->root_instruction();
+    const bool found_elementwise_callee_use =
+        std::find_if(
+            uses.begin(), uses.end(), [callee_root](const HloUse& use) {
+              return use.instruction == callee_root &&
+                     callee_root->IsElementwiseOnOperand(use.operand_number);
+            }) != uses.end();
+    return uses.size() == 2 && found_caller_use && found_elementwise_callee_use;
+  }
   // Check if 'user' is element-wise.
   return user->IsElementwise();
 }
diff --git a/tensorflow/compiler/xla/service/liveness_util_test.cc b/tensorflow/compiler/xla/service/liveness_util_test.cc
index b5e15906d3..476e86fa72 100644
--- a/tensorflow/compiler/xla/service/liveness_util_test.cc
+++ b/tensorflow/compiler/xla/service/liveness_util_test.cc
@@ -415,5 +415,44 @@ TEST_F(CanShareOperandBufferWithUserTest, WhileCanShare) {
       CanShareOperandBufferWithUser(data, {}, whil, {}, *dataflow_analysis_));
 }
 
+// Tests that Call can alias operand buffer if the only use of the operand
+// in the called computation is an elementwise instruction.
+TEST_F(CanShareOperandBufferWithUserTest, CallToComputationWithFusionRoot) {
+  Shape shape = ShapeUtil::MakeShape(F32, {8});
+  // Build sub-computation with fusion root.
+  auto sub_builder = HloComputation::Builder(TestName() + "_sub");
+  auto sub_param = sub_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, shape, "sub_param"));
+  auto one = sub_builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
+  auto ones = sub_builder.AddInstruction(
+      HloInstruction::CreateBroadcast(shape, one, {1}));
+  auto add = sub_builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, sub_param, ones));
+
+  module_ = CreateNewModule();
+  auto sub_computation = module_->AddEmbeddedComputation(sub_builder.Build());
+  sub_computation->CreateFusionInstruction({add, ones},
+                                           HloInstruction::FusionKind::kLoop);
+
+  // Build entry-computation with kCall which calls 'sub_computation'.
+  auto builder = HloComputation::Builder(TestName());
+
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, shape, "param"));
+  auto reverse =
+      builder.AddInstruction(HloInstruction::CreateReverse(shape, param, {0}));
+  auto call = builder.AddInstruction(
+      HloInstruction::CreateCall(shape, {reverse}, sub_computation));
+  computation_ = module_->AddEntryComputation(builder.Build());
+
+  RunAnalysis();
+
+  EXPECT_TRUE(CanShareOperandBufferWithUser(reverse, {}, call, {},
+                                            *points_to_analysis_));
+  EXPECT_TRUE(CanShareOperandBufferWithUser(reverse, {}, call, {},
+                                            *dataflow_analysis_));
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 881f84796f2559c0e7fd8081d7449a214a4cf7ac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 07:36:46 -0800
Subject: [PATCH 0049/1225] internal change

PiperOrigin-RevId: 176107131
---
 tensorflow/contrib/lite/toco/format_port.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/toco/format_port.h b/tensorflow/contrib/lite/toco/format_port.h
index 3bc3295d04..0e999001e0 100644
--- a/tensorflow/contrib/lite/toco/format_port.h
+++ b/tensorflow/contrib/lite/toco/format_port.h
@@ -36,7 +36,7 @@ inline const char* IdentityOrConvertStringToRaw(const std::string& foo) {
   return foo.c_str();
 }
 
-#if defined(PLATFORM_GOOGLE)
+#if defined(PLATFORM_GOOGLE) && defined(HAS_GLOBAL_STRING)
 // Overloaded case where we return string.
 inline const char* IdentityOrConvertStringToRaw(const string& foo) {
   return foo.c_str();
-- 
GitLab


From 573a652ec5512a35d84d5b4b4400d7430baa854a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 09:26:32 -0800
Subject: [PATCH 0050/1225] Add Speech ASR Language Model test.

PiperOrigin-RevId: 176117985
---
 .../lite/models/speech_terse_lm_model_test.cc | 122 ++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc

diff --git a/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc b/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc
new file mode 100644
index 0000000000..04c54ffb22
--- /dev/null
+++ b/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc
@@ -0,0 +1,122 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// Unit test for speech ASR LM model using TFLite Ops.
+
+#include <string.h>
+
+#include <memory>
+#include <string>
+
+#include "base/logging.h"
+#include "file/base/path.h"
+#include "testing/base/public/googletest.h"
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/models/test_utils.h"
+
+namespace tflite {
+namespace models {
+
+constexpr int kModelInput1Tensor = 0;
+constexpr int kModelInput2Tensor = 66;
+constexpr int kLstmLayer1OutputStateTensor = 21;
+constexpr int kLstmLayer1CellStateTensor = 22;
+constexpr int kLstmLayer2OutputStateTensor = 42;
+constexpr int kLstmLayer2CellStateTensor = 43;
+constexpr int kLstmLayer3OutputStateTensor = 63;
+constexpr int kLstmLayer3CellStateTensor = 64;
+constexpr int kModelOutputTensor = 75;
+
+static void ClearLstmStates(Interpreter* interpreter) {
+  memset(interpreter->tensor(kLstmLayer1OutputStateTensor)->data.raw, 0,
+         interpreter->tensor(kLstmLayer1OutputStateTensor)->bytes);
+  memset(interpreter->tensor(kLstmLayer1CellStateTensor)->data.raw, 0,
+         interpreter->tensor(kLstmLayer1CellStateTensor)->bytes);
+
+  memset(interpreter->tensor(kLstmLayer2OutputStateTensor)->data.raw, 0,
+         interpreter->tensor(kLstmLayer2OutputStateTensor)->bytes);
+  memset(interpreter->tensor(kLstmLayer2CellStateTensor)->data.raw, 0,
+         interpreter->tensor(kLstmLayer2CellStateTensor)->bytes);
+
+  memset(interpreter->tensor(kLstmLayer3OutputStateTensor)->data.raw, 0,
+         interpreter->tensor(kLstmLayer3OutputStateTensor)->bytes);
+  memset(interpreter->tensor(kLstmLayer3CellStateTensor)->data.raw, 0,
+         interpreter->tensor(kLstmLayer3CellStateTensor)->bytes);
+}
+
+TEST(SpeechTerseLm, EndToEndTest) {
+  // Read the model.
+  string tflite_file_path =
+      file::JoinPath(TestDataPath(), "speech_terse_lm_model.tflite");
+  auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str());
+  CHECK(model) << "Failed to mmap model " << tflite_file_path;
+
+  // Initialize the interpreter.
+  ops::builtin::BuiltinOpResolver builtins;
+  std::unique_ptr<Interpreter> interpreter;
+  InterpreterBuilder(*model, builtins)(&interpreter);
+  CHECK(interpreter != nullptr);
+  interpreter->AllocateTensors();
+
+  // Load the input frames.
+  Frames input_frames;
+  const string input_file_path =
+      file::JoinPath(TestDataPath(), "speech_terse_lm_model_in.csv");
+  ReadFrames(input_file_path, &input_frames);
+
+  // Load the golden output results.
+  Frames output_frames;
+  const string output_file_path =
+      file::JoinPath(TestDataPath(), "speech_terse_lm_model_out.csv");
+  ReadFrames(output_file_path, &output_frames);
+
+  CHECK_EQ(interpreter->tensor(kModelInput1Tensor)->dims->size, 1);
+  const int input1_size =
+      interpreter->tensor(kModelInput1Tensor)->dims->data[0];
+  CHECK_EQ(input1_size, 1);
+  CHECK_EQ(interpreter->tensor(kModelInput2Tensor)->dims->size, 1);
+  const int output_size =
+      interpreter->tensor(kModelOutputTensor)->dims->data[0];
+  CHECK_EQ(output_size, 1);
+
+  int* input_lookup_ptr = interpreter->tensor(kModelInput1Tensor)->data.i32;
+  int* output_lookup_ptr = interpreter->tensor(kModelInput2Tensor)->data.i32;
+  float* output_ptr = interpreter->tensor(kModelOutputTensor)->data.f;
+
+
+  for (int i = 0; i < input_frames.size(); i++) {
+    float output_score = 0.0f;
+    // Reset LSTM states for each sequence.
+    ClearLstmStates(interpreter.get());
+    // For subsequent inputs feed them sequentially, one-by-one.
+    for (int k = 1; k < input_frames[i].size(); k++) {
+      // Feed the inputs to model.
+      input_lookup_ptr[0] = static_cast<int32>(input_frames[i][k - 1]);
+      output_lookup_ptr[0] = static_cast<int32>(input_frames[i][k]);
+      // Run the model.
+      interpreter->Invoke();
+      // Sum up the outputs.
+      output_score += output_ptr[0];
+    }
+    // Validate the output.
+    ASSERT_NEAR(output_score, output_frames[i][0], 1.4e-5);
+  }
+}
+
+}  // namespace models
+}  // namespace tflite
-- 
GitLab


From be4295e796437d18ffb7242942c963a8857e5003 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Fri, 17 Nov 2017 10:10:37 -0800
Subject: [PATCH 0051/1225] Created new shared IsFreeOfSideEffect and
 ModifiedFrameInfo functions.

PiperOrigin-RevId: 176124088
---
 tensorflow/core/grappler/BUILD                |  2 ++
 tensorflow/core/grappler/op_types.cc          | 28 +++++++++++++++++++
 tensorflow/core/grappler/op_types.h           |  3 ++
 .../optimizers/arithmetic_optimizer.cc        | 26 ++++-------------
 .../optimizers/arithmetic_optimizer.h         |  8 ++----
 .../optimizers/dependency_optimizer.cc        |  9 ++++--
 6 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index 7b18e79c8d..c81c6c0f21 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -21,6 +21,8 @@ cc_library(
     hdrs = ["op_types.h"],
     visibility = ["//visibility:public"],
     deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
     ],
 )
diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index acb8498142..69bdef33c6 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -14,6 +14,8 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -120,5 +122,31 @@ bool IsVariable(const NodeDef& node) {
          op == "VarHandleOp" || op == "ReadVariableOp";
 }
 
+bool IsFreeOfSideEffect(const NodeDef& node) {
+  // Placeholders must be preserved to keep the graph feedable.
+  if (IsPlaceholder(node)) {
+    return false;
+  }
+  const OpDef* op_def = nullptr;
+  Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def);
+  if (!status.ok()) {
+    return false;
+  }
+  if (op_def->is_stateful()) {
+    return false;
+  }
+  // Nodes such as Assign or AssignAdd modify one of their inputs.
+  for (const auto& input : op_def->input_arg()) {
+    if (input.is_ref()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool ModifiesFrameInfo(const NodeDef& node) {
+  return IsEnter(node) || IsExit(node) || IsNextIteration(node);
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 0de954fcb4..a7c556c1ed 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -42,6 +42,9 @@ bool IsSwitch(const NodeDef& node);
 bool IsTranspose(const NodeDef& node);
 bool IsVariable(const NodeDef& node);
 
+bool IsFreeOfSideEffect(const NodeDef& node);
+bool ModifiesFrameInfo(const NodeDef& node);
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 0cd0d4351e..2677888fcb 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -512,33 +512,17 @@ bool UniqueNodes::SameNode(const NodeDef& node1, const NodeDef& node2) const {
   return true;
 }
 
-// static
-bool ArithmeticOptimizer::CanDedup(
-    const NodeDef& node, const std::unordered_set<string>& nodes_to_preserve) {
-  if (nodes_to_preserve.find(node.name()) != nodes_to_preserve.end()) {
+bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const {
+  if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) {
     return false;
   }
-  if (IsEnter(node) || IsExit(node) || IsPlaceholder(node)) {
+  if (IsEnter(node) || IsExit(node)) {
     return false;
   }
   if (node.device().find("SPU") != string::npos) {
     return false;
   }
-  const OpDef* op_def = nullptr;
-  Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def);
-  if (!status.ok()) {
-    return false;
-  }
-  if (op_def->is_stateful()) {
-    return false;
-  }
-  // Don't consolidate ops such as AssignAdd
-  for (const auto& input : op_def->input_arg()) {
-    if (input.is_ref()) {
-      return false;
-    }
-  }
-  return true;
+  return IsFreeOfSideEffect(node);
 }
 
 void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const {
@@ -553,7 +537,7 @@ void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const {
         continue;
       }
       NodeDef* node = optimized_graph->mutable_node(i);
-      if (!CanDedup(*node, nodes_to_preserve_)) {
+      if (!CanDedup(*node)) {
         continue;
       }
       NodeDef* rep = nodes.FindOrAddRepresentative(node);
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
index c8cc292295..c22e2d5363 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
@@ -28,11 +28,6 @@ namespace grappler {
 // run a model.
 class ArithmeticOptimizer : public GraphOptimizer {
  public:
-  // Returns true if it is safe to dedup node from the graph.
-  // TODO(rmlarsen): Refactor to op_types.{h,cc}.
-  static bool CanDedup(const NodeDef& node,
-                       const std::unordered_set<string>& nodes_to_preserve);
-
   ArithmeticOptimizer() : opt_level_(RewriterConfig::ON) {}
   explicit ArithmeticOptimizer(RewriterConfig::Toggle opt_level)
       : opt_level_(opt_level) {}
@@ -47,6 +42,9 @@ class ArithmeticOptimizer : public GraphOptimizer {
                 const GraphDef& optimized_graph, double result) override;
 
  private:
+  // Returns true if it is safe to dedup node from the graph.
+  bool CanDedup(const NodeDef& node) const;
+
   void DedupComputations(GraphDef* optimized_graph) const;
   // Runs peep-hole optimizations on `optimized_graph`, e.g., removing inverse
   // transposes.
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 49eb29d037..57eee60646 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -93,11 +93,16 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
   if (!has_fetch_ || HasRegularOutputs(node, *node_map_)) {
     return false;
   }
-
+  if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) {
+    return false;
+  }
   if (IsMerge(node)) {
     return false;
   }
-  if (!ArithmeticOptimizer::CanDedup(node, nodes_to_preserve_)) {
+  if (ModifiesFrameInfo(node)) {
+    return false;
+  }
+  if (!IsFreeOfSideEffect(node)) {
     return false;
   }
 
-- 
GitLab


From 34d4986e661b8d45f7cec2a717c401c65f0a242f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 10:28:28 -0800
Subject: [PATCH 0052/1225] Fix the numbering of the LSTM layers in the figure.

PiperOrigin-RevId: 176126886
---
 tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg b/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg
index ca96556422..9f841c219b 100644
--- a/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg
+++ b/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg
@@ -1,4 +1,4 @@
 <?xml version="1.0" standalone="yes"?>
 
-<svg version="1.1" viewBox="0.0 0.0 960.0 720.0" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><clipPath id="p.0"><path d="m0 0l960.0 0l0 720.0l-960.0 0l0 -720.0z" clip-rule="nonzero"></path></clipPath><g clip-path="url(#p.0)"><path fill="#000000" fill-opacity="0.0" d="m0 0l960.0 0l0 720.0l-960.0 0z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m392.0 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m392.0 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path fill="#000000" d="m404.43954 57.620842l0 -13.59375l1.8125 0l0 13.59375l-1.8125 0zm4.6676636 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375732 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313202 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.5788574 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.897858 5.5q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.254181 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm6.8439026 0.28125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141296 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219482 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m359.0 102.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m359.0 102.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m401.82367 128.94362l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm21.212677 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.918396 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.572052 -7.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141357 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944519 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.9844055 0l-3.3906555 4.640625l3.6562805 5.21875l-2.0469055 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.9687805 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375671 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219421 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m395.9714 154.72487l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844452 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019806 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426636 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5042114 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.7187805 0.21875q-0.40625 1.5 -1.5156555 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.2344055 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.3437805 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578827 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm18.210388 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656921 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m385.80054 657.01575l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m385.80054 657.01575l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m402.3206 677.3107q0 -3.390625 1.8125 -5.296875q1.828125 -1.921875 4.703125 -1.921875q1.875 0 3.390625 0.90625q1.515625 0.890625 2.296875 2.5q0.796875 1.609375 0.796875 3.65625q0 2.0625 -0.84375 3.703125q-0.828125 1.625 -2.359375 2.46875q-1.53125 0.84375 -3.296875 0.84375q-1.921875 0 -3.4375 -0.921875q-1.5 -0.9375 -2.28125 -2.53125q-0.78125 -1.609375 -0.78125 -3.40625zm1.859375 0.03125q0 2.453125 1.3125 3.875q1.328125 1.40625 3.3125 1.40625q2.03125 0 3.34375 -1.421875q1.3125 -1.4375 1.3125 -4.0625q0 -1.65625 -0.5625 -2.890625q-0.546875 -1.234375 -1.640625 -1.921875q-1.078125 -0.6875 -2.421875 -0.6875q-1.90625 0 -3.28125 1.3125q-1.375 1.3125 -1.375 4.390625zm19.433289 6.59375l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.5788574 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5270386 5.28125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313232 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578827 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.897858 5.5q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm12.187622 3.875l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm13.797607 3.171875l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.1569824 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m475.09448 161.01575l0 24.724411" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m475.09448 161.01575l0 18.724411" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m473.44275 179.74016l1.6517334 4.538101l1.6517334 -4.538101z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m475.09448 244.72906l0 25.29132" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m475.09448 244.72906l0 19.291351" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m473.44275 264.02042l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m475.00787 72.81108l0.09448242 29.196846" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m475.00787 72.81108l0.07510376 23.196877" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m473.4312 96.013306l1.6664124 4.5327225l1.6370544 -4.543419z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m359.0 526.4199l232.18896 0l0 42.11029l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m359.0 526.4199l232.18896 0l0 42.11029l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m372.43524 553.33997l0 -13.59375l9.171875 0l0 1.59375l-7.375 0l0 4.21875l6.375 0l0 1.609375l-6.375 0l0 6.171875l-1.796875 0zm17.53659 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.8913574 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.144806 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.0979614 3.796875l-0.171875 -1.5625q0.546875 0.140625 0.953125 0.140625q0.546875 0 0.875 -0.1875q0.34375 -0.1875 0.5625 -0.515625q0.15625 -0.25 0.5 -1.25q0.046875 -0.140625 0.15625 -0.40625l-3.734375 -9.875l1.796875 0l2.046875 5.71875q0.40625 1.078125 0.71875 2.28125q0.28125 -1.15625 0.6875 -2.25l2.09375 -5.75l1.671875 0l-3.75 10.03125q-0.59375 1.625 -0.9375 2.234375q-0.4375 0.828125 -1.015625 1.203125q-0.578125 0.390625 -1.375 0.390625q-0.484375 0 -1.078125 -0.203125zm19.328125 -8.5625l1.796875 0.453125q-0.5625 2.21875 -2.03125 3.390625q-1.46875 1.15625 -3.59375 1.15625q-2.203125 0 -3.578125 -0.890625q-1.375 -0.90625 -2.09375 -2.59375q-0.71875 -1.703125 -0.71875 -3.65625q0 -2.125 0.796875 -3.703125q0.8125 -1.578125 2.3125 -2.390625q1.5 -0.828125 3.296875 -0.828125q2.046875 0 3.4375 1.046875q1.390625 1.03125 1.9375 2.90625l-1.765625 0.421875q-0.46875 -1.484375 -1.375 -2.15625q-0.90625 -0.6875 -2.265625 -0.6875q-1.5625 0 -2.625 0.75q-1.046875 0.75 -1.484375 2.03125q-0.421875 1.265625 -0.421875 2.609375q0 1.734375 0.5 3.03125q0.515625 1.28125 1.578125 1.921875q1.078125 0.640625 2.3125 0.640625q1.515625 0 2.5625 -0.859375q1.046875 -0.875 1.421875 -2.59375zm2.926056 -0.15625q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375702 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm17.125732 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547577 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm8.277069 -1.671875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500702 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm17.637146 8.921875q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375732 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm15.328125 0l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm13.797546 3.171875l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.1569824 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m475.09448 413.32974l0 24.125977" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m475.09448 413.3297l0 18.126007" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m473.44275 431.45572l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m475.09448 329.01575l0 25.322845" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m475.09448 329.01575l0 19.322845" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m473.44275 348.3386l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m475.09448 496.44235l0 29.984283" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m475.09448 496.44238l0 23.984253" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m473.44275 520.42664l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m359.0 185.73694l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m359.0 185.73694l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m401.82367 212.65694l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm23.697052 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375732 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.9844055 0l-3.3906555 4.640625l3.6562805 5.21875l-2.0469055 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.9687805 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375671 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219421 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m395.9714 238.43819l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844452 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019806 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426636 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5042114 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.7187805 0.21875q-0.40625 1.5 -1.5156555 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.2344055 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.3437805 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578827 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm18.210388 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656921 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m359.0 270.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m359.0 270.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m401.82367 296.94363l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm23.697052 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375732 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.9844055 0l-3.3906555 4.640625l3.6562805 5.21875l-2.0469055 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.9687805 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375671 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219421 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m395.9714 322.72488l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844452 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019806 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426636 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5042114 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.7187805 0.21875q-0.40625 1.5 -1.5156555 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.2344055 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.3437805 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578827 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm18.210388 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656921 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m359.0 354.33762l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m359.0 354.33762l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m401.82367 381.2576l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm23.697052 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375732 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.9844055 0l-3.3906555 4.640625l3.6562805 5.21875l-2.0469055 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.9687805 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375671 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219421 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m395.9714 407.03885l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844452 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019806 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426636 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5042114 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.7187805 0.21875q-0.40625 1.5 -1.5156555 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.2344055 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.3437805 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578827 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm18.210388 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656921 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m359.0 437.45026l232.18896 0l0 58.992096l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m359.0 437.45026l232.18896 0l0 58.992096l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m401.82367 464.37024l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm23.697052 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375732 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.9844055 0l-3.3906555 4.640625l3.6562805 5.21875l-2.0469055 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.9687805 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375671 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219421 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m395.9714 490.1515l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844452 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019806 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426636 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5042114 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.7187805 0.21875q-0.40625 1.5 -1.5156555 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.2344055 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.3437805 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578827 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm18.210388 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656921 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m405.46194 594.54596l140.06302 0l0 42.11023l-140.06302 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m405.46194 594.54596l140.06302 0l0 42.11023l-140.06302 0z" fill-rule="evenodd"></path><path fill="#000000" d="m442.13754 617.09094l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm12.209198 -0.546875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.688232 4.921875l0 -8.546875l-1.484375 0l0 -1.3125l1.484375 0l0 -1.046875q0 -0.984375 0.171875 -1.46875q0.234375 -0.65625 0.84375 -1.046875q0.609375 -0.40625 1.703125 -0.40625q0.703125 0 1.5624695 0.15625l-0.25 1.46875q-0.5155945 -0.09375 -0.9843445 -0.09375q-0.765625 0 -1.078125 0.328125q-0.3125 0.3125 -0.3125 1.203125l0 0.90625l1.921875 0l0 1.3125l-1.921875 0l0 8.546875l-1.65625 0zm8.433289 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5270386 1.5l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm21.978302 -1.21875q-0.9375 0.796875 -1.796875 1.125q-0.859375 0.3125 -1.84375 0.3125q-1.609375 0 -2.484375 -0.78125q-0.875 -0.796875 -0.875 -2.03125q0 -0.734375 0.328125 -1.328125q0.328125 -0.59375 0.859375 -0.953125q0.53125 -0.359375 1.203125 -0.546875q0.5 -0.140625 1.484375 -0.25q2.03125 -0.25 2.984375 -0.578125q0 -0.34375 0 -0.4375q0 -1.015625 -0.46875 -1.4375q-0.640625 -0.5625 -1.90625 -0.5625q-1.171875 0 -1.734375 0.40625q-0.5625 0.40625 -0.828125 1.46875l-1.640625 -0.234375q0.234375 -1.046875 0.734375 -1.6875q0.515625 -0.640625 1.46875 -0.984375q0.96875 -0.359375 2.25 -0.359375q1.265625 0 2.046875 0.296875q0.78125 0.296875 1.15625 0.75q0.375 0.453125 0.515625 1.140625q0.09375 0.421875 0.09375 1.53125l0 2.234375q0 2.328125 0.09375 2.953125q0.109375 0.609375 0.4375 1.171875l-1.75 0q-0.265625 -0.515625 -0.328125 -1.21875zm-0.140625 -3.71875q-0.90625 0.359375 -2.734375 0.625q-1.03125 0.140625 -1.453125 0.328125q-0.421875 0.1875 -0.65625 0.546875q-0.234375 0.359375 -0.234375 0.796875q0 0.671875 0.5 1.125q0.515625 0.4375 1.484375 0.4375q0.96875 0 1.71875 -0.421875q0.75 -0.4375 1.109375 -1.15625q0.265625 -0.578125 0.265625 -1.671875l0 -0.609375zm2.9694824 4.9375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m475.09448 568.5302l0.40945435 26.015747" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m475.09448 568.5302l0.31506348 20.01648" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m473.758 588.5727l1.7229309 4.5115356l1.5801086 -4.5635376z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m475.49344 636.6562l0.31497192 20.346436" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m475.49344 636.6562l0.22210693 14.347168" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m474.064 651.02893l1.7217712 4.511963l1.5812988 -4.5631104z" fill-rule="evenodd"></path></g></svg>
+<svg version="1.1" viewBox="0.0 0.0 703.0 722.8005249343832" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><clipPath id="p.0"><path d="m0 0l703.0 0l0 722.80054l-703.0 0l0 -722.80054z" clip-rule="nonzero"></path></clipPath><g clip-path="url(#p.0)"><path fill="#000000" fill-opacity="0.0" d="m0 0l703.0 0l0 722.80054l-703.0 0z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m256.0 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m256.0 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path fill="#000000" d="m268.43954 57.620842l0 -13.59375l1.8125 0l0 13.59375l-1.8125 0zm4.6676636 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375732 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313202 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.5788574 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.897858 5.5q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.254181 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm6.8439026 0.28125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141327 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219452 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m223.0 102.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m223.0 102.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m265.82367 128.94362l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm21.212677 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.918396 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.572052 -7.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141357 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944519 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.96875 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375702 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219452 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m259.9714 154.72487l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844452 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019806 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426636 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5042114 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578827 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm18.210358 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m249.80052 657.01575l180.00002 0l0 42.11023l-180.00002 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m249.80052 657.01575l180.00002 0l0 42.11023l-180.00002 0z" fill-rule="evenodd"></path><path fill="#000000" d="m266.3206 677.3107q0 -3.390625 1.8125 -5.296875q1.828125 -1.921875 4.703125 -1.921875q1.875 0 3.390625 0.90625q1.515625 0.890625 2.296875 2.5q0.796875 1.609375 0.796875 3.65625q0 2.0625 -0.84375 3.703125q-0.828125 1.625 -2.359375 2.46875q-1.53125 0.84375 -3.296875 0.84375q-1.921875 0 -3.4375 -0.921875q-1.5 -0.9375 -2.28125 -2.53125q-0.78125 -1.609375 -0.78125 -3.40625zm1.859375 0.03125q0 2.453125 1.3125 3.875q1.328125 1.40625 3.3125 1.40625q2.03125 0 3.34375 -1.421875q1.3125 -1.4375 1.3125 -4.0625q0 -1.65625 -0.5625 -2.890625q-0.546875 -1.234375 -1.640625 -1.921875q-1.078125 -0.6875 -2.421875 -0.6875q-1.90625 0 -3.28125 1.3125q-1.375 1.3125 -1.375 4.390625zm19.433289 6.59375l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.5788574 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5270386 5.28125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313232 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578827 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.897858 5.5q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm12.187653 3.875l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm13.797577 3.171875l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.1569824 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m339.09448 161.01575l0 24.724411" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m339.09448 161.01575l0 18.724411" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m337.44275 179.74016l1.6517334 4.538101l1.6517334 -4.538101z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m339.09448 244.72906l0 25.29132" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m339.09448 244.72906l0 19.291351" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m337.44275 264.02042l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m339.00787 72.81108l0.09448242 29.196846" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m339.00787 72.81108l0.07507324 23.196877" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m337.4312 96.013306l1.6664124 4.5327225l1.6370544 -4.543419z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m223.0 526.4199l232.18896 0l0 42.11029l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m223.0 526.4199l232.18896 0l0 42.11029l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m236.43524 553.33997l0 -13.59375l9.171875 0l0 1.59375l-7.375 0l0 4.21875l6.375 0l0 1.609375l-6.375 0l0 6.171875l-1.796875 0zm17.53659 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.8913574 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.144806 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.0979614 3.796875l-0.171875 -1.5625q0.546875 0.140625 0.953125 0.140625q0.546875 0 0.875 -0.1875q0.34375 -0.1875 0.5625 -0.515625q0.15625 -0.25 0.5 -1.25q0.046875 -0.140625 0.15625 -0.40625l-3.734375 -9.875l1.796875 0l2.046875 5.71875q0.40625 1.078125 0.71875 2.28125q0.28125 -1.15625 0.6875 -2.25l2.09375 -5.75l1.671875 0l-3.75 10.03125q-0.59375 1.625 -0.9375 2.234375q-0.4375 0.828125 -1.015625 1.203125q-0.578125 0.390625 -1.375 0.390625q-0.484375 0 -1.078125 -0.203125zm19.328125 -8.5625l1.796875 0.453125q-0.5625 2.21875 -2.03125 3.390625q-1.46875 1.15625 -3.59375 1.15625q-2.203125 0 -3.578125 -0.890625q-1.375 -0.90625 -2.09375 -2.59375q-0.71875 -1.703125 -0.71875 -3.65625q0 -2.125 0.796875 -3.703125q0.8125 -1.578125 2.3125 -2.390625q1.5 -0.828125 3.296875 -0.828125q2.046875 0 3.4375 1.046875q1.390625 1.03125 1.9375 2.90625l-1.765625 0.421875q-0.46875 -1.484375 -1.375 -2.15625q-0.90625 -0.6875 -2.265625 -0.6875q-1.5625 0 -2.625 0.75q-1.046875 0.75 -1.484375 2.03125q-0.421875 1.265625 -0.421875 2.609375q0 1.734375 0.5 3.03125q0.515625 1.28125 1.578125 1.921875q1.078125 0.640625 2.3125 0.640625q1.515625 0 2.5625 -0.859375q1.046875 -0.875 1.421875 -2.59375zm2.926056 -0.15625q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375702 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm17.125732 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547577 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm8.277069 -1.671875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500702 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm17.637146 8.921875q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375732 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm15.328125 0l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm13.797577 3.171875l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.1569824 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m339.09448 413.32974l0 24.125977" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m339.09448 413.3297l0 18.126007" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m337.44275 431.45572l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m339.09448 329.01575l0 25.322845" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m339.09448 329.01575l0 19.322845" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m337.44275 348.3386l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m339.09448 496.44235l0 29.984283" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m339.09448 496.44238l0 23.984253" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m337.44275 520.42664l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m223.0 185.73694l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m223.0 185.73694l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m265.82367 212.65694l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm23.697052 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375732 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.96875 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375702 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219452 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m259.9714 238.43819l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844452 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019806 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426636 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5042114 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578827 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm18.210358 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m223.0 270.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m223.0 270.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m265.82367 296.94363l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm14.931427 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.199646 7.59375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375732 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.96875 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375702 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219452 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m259.9714 322.72488l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844452 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019806 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426636 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5042114 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578827 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm18.210358 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m223.0 354.33762l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m223.0 354.33762l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m265.82367 381.2576l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm20.275177 0l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm13.855896 8.78125q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375732 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.96875 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375702 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219452 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m259.9714 407.03885l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844452 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019806 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426636 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5042114 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578827 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm18.210358 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m223.0 437.45026l232.18896 0l0 58.992096l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m223.0 437.45026l232.18896 0l0 58.992096l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m265.82367 464.37024l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm14.915802 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm19.215271 7.5625q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375732 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.016357 6.703125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.96875 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm10.375702 -3.140625q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656982 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm10.219452 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m259.9714 490.1515l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844452 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019806 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426636 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5042114 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578827 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm18.210358 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm1.5944824 -5.09375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm8.656952 0q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m269.46194 594.54596l140.06299 0l0 42.11023l-140.06299 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m269.46194 594.54596l140.06299 0l0 42.11023l-140.06299 0z" fill-rule="evenodd"></path><path fill="#000000" d="m306.13754 617.09094l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm12.209198 -0.546875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.688232 4.921875l0 -8.546875l-1.484375 0l0 -1.3125l1.484375 0l0 -1.046875q0 -0.984375 0.171875 -1.46875q0.234375 -0.65625 0.84375 -1.046875q0.609375 -0.40625 1.703125 -0.40625q0.703125 0 1.5624695 0.15625l-0.25 1.46875q-0.5155945 -0.09375 -0.9843445 -0.09375q-0.765625 0 -1.078125 0.328125q-0.3125 0.3125 -0.3125 1.203125l0 0.90625l1.921875 0l0 1.3125l-1.921875 0l0 8.546875l-1.65625 0zm8.433289 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5270386 1.5l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm21.978302 -1.21875q-0.9375 0.796875 -1.796875 1.125q-0.859375 0.3125 -1.84375 0.3125q-1.609375 0 -2.484375 -0.78125q-0.875 -0.796875 -0.875 -2.03125q0 -0.734375 0.328125 -1.328125q0.328125 -0.59375 0.859375 -0.953125q0.53125 -0.359375 1.203125 -0.546875q0.5 -0.140625 1.484375 -0.25q2.03125 -0.25 2.984375 -0.578125q0 -0.34375 0 -0.4375q0 -1.015625 -0.46875 -1.4375q-0.640625 -0.5625 -1.90625 -0.5625q-1.171875 0 -1.734375 0.40625q-0.5625 0.40625 -0.828125 1.46875l-1.640625 -0.234375q0.234375 -1.046875 0.734375 -1.6875q0.515625 -0.640625 1.46875 -0.984375q0.96875 -0.359375 2.25 -0.359375q1.265625 0 2.046875 0.296875q0.78125 0.296875 1.15625 0.75q0.375 0.453125 0.515625 1.140625q0.09375 0.421875 0.09375 1.53125l0 2.234375q0 2.328125 0.09375 2.953125q0.109375 0.609375 0.4375 1.171875l-1.75 0q-0.265625 -0.515625 -0.328125 -1.21875zm-0.140625 -3.71875q-0.90625 0.359375 -2.734375 0.625q-1.03125 0.140625 -1.453125 0.328125q-0.421875 0.1875 -0.65625 0.546875q-0.234375 0.359375 -0.234375 0.796875q0 0.671875 0.5 1.125q0.515625 0.4375 1.484375 0.4375q0.96875 0 1.71875 -0.421875q0.75 -0.4375 1.109375 -1.15625q0.265625 -0.578125 0.265625 -1.671875l0 -0.609375zm2.9694824 4.9375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m339.09448 568.5302l0.40945435 26.015747" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m339.09448 568.5302l0.31503296 20.01648" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m337.758 588.5727l1.7229309 4.5115356l1.5801086 -4.5635376z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m339.49344 636.6562l0.31497192 20.346436" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m339.49344 636.6562l0.22210693 14.347168" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m338.064 651.02893l1.7217712 4.511963l1.5812988 -4.5631104z" fill-rule="evenodd"></path></g></svg>
 
-- 
GitLab


From 6fecbc39f37643f30ebd0681240b2c2fdede5b09 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 10:32:07 -0800
Subject: [PATCH 0053/1225] Added tests for tensorflow::StringPiece::Hasher.

PiperOrigin-RevId: 176127449
---
 tensorflow/core/lib/core/stringpiece_test.cc | 72 ++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc
index ad70d41873..11554554e8 100644
--- a/tensorflow/core/lib/core/stringpiece_test.cc
+++ b/tensorflow/core/lib/core/stringpiece_test.cc
@@ -14,6 +14,8 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/core/stringpiece.h"
+
+#include <unordered_map>
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -63,4 +65,74 @@ TEST(StringPiece, Contains) {
   EXPECT_TRUE(!a.contains(d));
 }
 
+TEST(StringPieceHasher, Equality) {
+  StringPiece::Hasher hasher;
+
+  StringPiece s1("foo");
+  StringPiece s2("bar");
+  StringPiece s3("baz");
+  StringPiece s4("zot");
+
+  EXPECT_TRUE(hasher(s1) != hasher(s2));
+  EXPECT_TRUE(hasher(s1) != hasher(s3));
+  EXPECT_TRUE(hasher(s1) != hasher(s4));
+  EXPECT_TRUE(hasher(s2) != hasher(s3));
+  EXPECT_TRUE(hasher(s2) != hasher(s4));
+  EXPECT_TRUE(hasher(s3) != hasher(s4));
+
+  EXPECT_TRUE(hasher(s1) == hasher(s1));
+  EXPECT_TRUE(hasher(s2) == hasher(s2));
+  EXPECT_TRUE(hasher(s3) == hasher(s3));
+  EXPECT_TRUE(hasher(s4) == hasher(s4));
+}
+
+TEST(StringPieceHasher, HashMap) {
+  string s1("foo");
+  string s2("bar");
+  string s3("baz");
+
+  StringPiece p1(s1);
+  StringPiece p2(s2);
+  StringPiece p3(s3);
+
+  std::unordered_map<StringPiece, int, StringPiece::Hasher> map;
+
+  map.insert(std::make_pair(p1, 0));
+  map.insert(std::make_pair(p2, 1));
+  map.insert(std::make_pair(p3, 2));
+  EXPECT_EQ(map.size(), 3);
+
+  bool found[3] = {false, false, false};
+  for (auto const& val : map) {
+    int x = val.second;
+    EXPECT_TRUE(x >= 0 && x < 3);
+    EXPECT_TRUE(!found[x]);
+    found[x] = true;
+  }
+  EXPECT_EQ(found[0], true);
+  EXPECT_EQ(found[1], true);
+  EXPECT_EQ(found[2], true);
+
+  auto new_iter = map.find("zot");
+  EXPECT_TRUE(new_iter == map.end());
+
+  new_iter = map.find("bar");
+  EXPECT_TRUE(new_iter != map.end());
+
+  map.erase(new_iter);
+  EXPECT_EQ(map.size(), 2);
+
+  found[0] = false;
+  found[1] = false;
+  found[2] = false;
+  for (const auto& iter : map) {
+    int x = iter.second;
+    EXPECT_TRUE(x >= 0 && x < 3);
+    EXPECT_TRUE(!found[x]);
+    found[x] = true;
+  }
+  EXPECT_EQ(found[0], true);
+  EXPECT_EQ(found[1], false);
+  EXPECT_EQ(found[2], true);
+}
 }  // namespace tensorflow
-- 
GitLab


From fedb844013194539e23cb971df793b4029396c2f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 10:44:57 -0800
Subject: [PATCH 0054/1225] Throw error if context exists but graph is empty
 when enabling eager mode.

PiperOrigin-RevId: 176129497
---
 tensorflow/python/framework/ops.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 6ac3b862c8..7cca260d73 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -4703,6 +4703,9 @@ def enable_eager_execution(config=None, device_policy=None):
                      " policy: %s." % (config, context._context._config,
                                        device_policy,
                                        context._context._device_policy))
+  else:
+    raise ValueError(
+        "tfe.enable_eager_execution has to be called at program startup.")
 
 
 def eager_run(main=None, argv=None):
-- 
GitLab


From de1cd503235a32ec216533d198dd6f6318655ab2 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Fri, 17 Nov 2017 10:58:28 -0800
Subject: [PATCH 0055/1225] Minor tf_session.i refactoring.

Moves inline C++ helper functions to top of file, and adds
CreateWrappedTFOutput and CreateWrappedTFOperation helper functions
(this is pulling out existing functionality that will be useful moving
forward).

PiperOrigin-RevId: 176131555
---
 tensorflow/python/client/tf_session.i | 82 +++++++++++++++------------
 1 file changed, 45 insertions(+), 37 deletions(-)

diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index 40731aba7d..41c707ae63 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -24,6 +24,49 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/public/version.h"
 
+// Helper function to convert a Python list of Tensors to a C++ vector of
+// TF_Outputs.
+//
+// Returns true if successful. Otherwise, returns false and sets error_msg.
+bool PyTensorListToVector(PyObject* py_tensor_list,
+                          std::vector<TF_Output>* vec,
+                          string* error_msg) {
+  if (!PyList_Check(py_tensor_list)) {
+    *error_msg = "expected Python list.";
+    return false;
+  }
+  size_t size = PyList_Size(py_tensor_list);
+  for (int i = 0; i < size; ++i) {
+    PyObject* item = PyList_GetItem(py_tensor_list, i);
+    TF_Output* input_ptr;
+    if (!SWIG_IsOK(SWIG_ConvertPtr(item, reinterpret_cast<void**>(&input_ptr),
+                                   SWIGTYPE_p_TF_Output, 0))) {
+      *error_msg = "expected Python list of wrapped TF_Output objects. "
+          "Found python list of something else.";
+      return false;
+    }
+    vec->push_back(*input_ptr);
+  }
+  return true;
+}
+
+// Helper function to convert a TF_Output to a wrapped TF_Output Python object.
+PyObject* CreateWrappedTFOutput(TF_Output tf_output) {
+  // We used heap-allocated pointers in the Python runtime (this is what SWIG
+  // generates by default for functions returning TF_Output).
+  TF_Output* tf_output_ptr = new TF_Output(tf_output);
+  // Use SWIG_POINTER_OWN so the TF_Output* is deleted by Python.
+  return SWIG_NewPointerObj(tf_output_ptr, SWIGTYPE_p_TF_Output,
+                            SWIG_POINTER_OWN);
+}
+
+// Helper function to convert a TF_Operation to a wrapped TF_Operation Python
+// object.
+PyObject* CreateWrappedTFOperation(TF_Operation* tf_operation) {
+  // No flags since operation is owned by TF_Graph.
+  return SWIG_NewPointerObj(tf_operation, SWIGTYPE_p_TF_Operation, 0);
+}
+
 %}
 
 %include "tensorflow/python/client/tf_sessionrun_wrapper.i"
@@ -98,8 +141,7 @@ tensorflow::ImportNumpy();
   }
 
   for (size_t i = 0; i < $1.size(); ++i) {
-    PyList_SET_ITEM($result, i, SWIG_NewPointerObj(
-                            $1[i], SWIGTYPE_p_TF_Operation, 0));
+    PyList_SET_ITEM($result, i, CreateWrappedTFOperation($1[i]));
   }
 }
 
@@ -118,13 +160,7 @@ tensorflow::ImportNumpy();
   // Unwrap the generated SwigValueWrapper<std::vector<TF_Output>> via &
   std::vector<TF_Output>* tf_outputs = &$1;
   for (size_t i = 0; i < $1.size(); ++i) {
-    // We used wrapped heap-allocated pointers in the Python runtime (this is
-    // what SWIG generates by default for functions returning TF_Output).
-    TF_Output* tf_output_ptr = new TF_Output((*tf_outputs)[i]);
-    // Use SWIG_POINTER_OWN so the TF_Output* is deleted by Python.
-    PyList_SET_ITEM($result, i,
-                    SWIG_NewPointerObj(tf_output_ptr, SWIGTYPE_p_TF_Output,
-                                       SWIG_POINTER_OWN));
+    PyList_SET_ITEM($result, i, CreateWrappedTFOutput((*tf_outputs)[i]));
   }
 }
 
@@ -268,34 +304,6 @@ tensorflow::ImportNumpy();
       reinterpret_cast<const char*>($1.data), $1.length);
 }
 
-%inline %{
-// Helper function to convert a Python list of Tensors to a C++ vector of
-// TF_Outputs.
-//
-// Returns true if successful. Otherwise, returns false and sets error_msg.
-bool PyTensorListToVector(PyObject* py_tensor_list,
-                          std::vector<TF_Output>* vec,
-                          string* error_msg) {
-  if (!PyList_Check(py_tensor_list)) {
-    *error_msg = "expected Python list.";
-    return false;
-  }
-  size_t size = PyList_Size(py_tensor_list);
-  for (int i = 0; i < size; ++i) {
-    PyObject* item = PyList_GetItem(py_tensor_list, i);
-    TF_Output* input_ptr;
-    if (!SWIG_IsOK(SWIG_ConvertPtr(item, reinterpret_cast<void**>(&input_ptr),
-                                   SWIGTYPE_p_TF_Output, 0))) {
-      *error_msg = "expected Python list of wrapped TF_Output objects. "
-          "Found python list of something else.";
-      return false;
-    }
-    vec->push_back(*input_ptr);
-  }
-  return true;
-}
-%}
-
 // Converts input Python list of wrapped TF_Outputs into a single array
 %typemap(in) (const TF_Output* inputs, int num_inputs)
     (std::vector<TF_Output> inputs) {
-- 
GitLab


From 684c02d91116022bbceea13fc4a0cff9267d8534 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 11:14:12 -0800
Subject: [PATCH 0056/1225] Add g3doc for the ASR LM model.

PiperOrigin-RevId: 176134219
---
 .../contrib/lite/models/testdata/g3doc/README.md    | 13 +++++++++++++
 .../contrib/lite/models/testdata/g3doc/asr_lm.svg   |  4 ++++
 2 files changed, 17 insertions(+)
 create mode 100644 tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg

diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/README.md b/tensorflow/contrib/lite/models/testdata/g3doc/README.md
index d0c21d2833..da4802b07d 100644
--- a/tensorflow/contrib/lite/models/testdata/g3doc/README.md
+++ b/tensorflow/contrib/lite/models/testdata/g3doc/README.md
@@ -61,6 +61,19 @@ the corresponding parameters as shown in the figure.
 
 ![asr_am_model](asr_am.svg "ASR AM model")
 
+### Automatic Speech Recognizer (ASR) Language Model (LM)
+
+The language model for automatic speech recognition is the neural network model
+for predicting the probability of a word given previous words in a sentence.
+It generates posterior probabilities of the next word based from a sequence of
+words. The words are encoded as indices in a fixed size dictionary.
+The model has two inputs both of size one (integer), an output size of one
+(float). It consits of three embedding layer, three LSTM layers, followed by a
+multiplication, a fully connected layers and an addition.
+The corresponding parameters as shown in the figure.
+
+![asr_lm_model](asr_lm.svg "ASR LM model")
+
 ## Speech models test input/output generation
 
 As mentioned above the input to models are generated from a pre-processing
diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg b/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg
new file mode 100644
index 0000000000..84d5f95b6a
--- /dev/null
+++ b/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" standalone="yes"?>
+
+<svg version="1.1" viewBox="0.0 0.0 742.6010498687664 753.6010498687664" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><clipPath id="p.0"><path d="m0 0l742.6011 0l0 753.6011l-742.6011 0l0 -753.6011z" clip-rule="nonzero"></path></clipPath><g clip-path="url(#p.0)"><path fill="#000000" fill-opacity="0.0" d="m0 0l742.6011 0l0 753.6011l-742.6011 0z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m136.0 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m136.0 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path fill="#000000" d="m153.6274 57.620842l0 -13.59375l1.8125 0l0 13.59375l-1.8125 0zm4.667679 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375717 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313217 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578842 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm7.355179 1.5l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.918396 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2541962 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.07463 -2.125l-8.968735 0l0 -1.5625l8.968735 0l0 1.5625zm0 4.125l-8.968735 0l0 -1.546875l8.968735 0l0 1.546875zm13.125153 3.875l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.641327 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 190.02362l232.18896 0l0 42.110245l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 190.02362l232.18896 0l0 42.110245l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m151.01154 216.94362l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844467 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.880356 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm21.212677 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.918396 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672592 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860092 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.95384216 1.609375l3.5937347 -5.125l-3.3281097 -4.734375l2.09375 0l1.5156097 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.5937347 3.890625l-2.015625 0zm16.26561 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.750732 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm10.078857 8.40625l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m129.09448 653.0184l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m129.09448 653.0184l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m150.8024 673.31335q0 -3.390625 1.8125 -5.296875q1.828125 -1.921875 4.703125 -1.921875q1.875 0 3.390625 0.90625q1.515625 0.890625 2.296875 2.5q0.796875 1.609375 0.796875 3.65625q0 2.0625 -0.84375 3.703125q-0.828125 1.625 -2.359375 2.46875q-1.53125 0.84375 -3.296875 0.84375q-1.921875 0 -3.4375 -0.921875q-1.5 -0.9375 -2.28125 -2.53125q-0.78125 -1.609375 -0.78125 -3.40625zm1.859375 0.03125q0 2.453125 1.3125 3.875q1.328125 1.40625 3.3125 1.40625q2.03125 0 3.34375 -1.421875q1.3125 -1.4375 1.3125 -4.0625q0 -1.65625 -0.5625 -2.890625q-0.546875 -1.234375 -1.640625 -1.921875q-1.078125 -0.6875 -2.421875 -0.6875q-1.90625 0 -3.28125 1.3125q-1.375 1.3125 -1.375 4.390625zm19.433304 6.59375l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578842 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5270538 5.28125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313217 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578842 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.897858 5.5q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2541962 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm13.125153 3.875l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.641327 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 232.13387l0 33.606277" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 232.13387l0 27.606277" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 259.74014l1.6517334 4.5381165l1.6517334 -4.5381165z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 307.8476l0 34.173218" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 307.8476l0 28.173248" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 336.02084l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.00787 72.81108l0.09448242 25.732285" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.00787 72.81108l0.07246399 19.732315" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.4286 92.54946l1.668396 4.5320053l1.6350555 -4.544136z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 384.13385l0 36.283478" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 384.13385l0 30.283478" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 414.41733l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 265.73737l232.18896 0l0 42.11023l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 265.73737l232.18896 0l0 42.11023l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m151.01154 292.65735l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844467 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.880356 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm23.697052 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672592 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860092 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.95384216 1.609375l3.5937347 -5.125l-3.3281097 -4.734375l2.09375 0l1.5156097 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.5937347 3.890625l-2.015625 0zm9.98436 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141357 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.1569824 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 342.02362l232.18896 0l0 42.11023l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 342.02362l232.18896 0l0 42.11023l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m151.01154 368.94363l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844467 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.880356 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm14.931427 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.199646 7.59375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672592 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860092 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.95384216 1.609375l3.5937347 -5.125l-3.3281097 -4.734375l2.09375 0l1.5156097 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.5937347 3.890625l-2.015625 0zm9.98436 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141357 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.1569824 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 618.4042l0 34.614197" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 618.4042l0 28.614197" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 647.0184l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 98.54593l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 98.54593l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m143.32318 125.46593l0 -13.59375l9.84375 0l0 1.59375l-8.046875 0l0 4.171875l7.53125 0l0 1.59375l-7.53125 0l0 4.625l8.359375 0l0 1.609375l-10.15625 0zm12.193573 0l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm17.087677 0l-1.546875 0l0 -13.59375l1.65625 0l0 4.84375q1.0625 -1.328125 2.703125 -1.328125q0.90625 0 1.71875 0.375q0.8125 0.359375 1.328125 1.03125q0.53125 0.65625 0.828125 1.59375q0.296875 0.9375 0.296875 2.0q0 2.53125 -1.25 3.921875q-1.25 1.375 -3.0 1.375q-1.75 0 -2.734375 -1.453125l0 1.234375zm-0.015625 -5.0q0 1.765625 0.46875 2.5625q0.796875 1.28125 2.140625 1.28125q1.09375 0 1.890625 -0.9375q0.796875 -0.953125 0.796875 -2.84375q0 -1.921875 -0.765625 -2.84375q-0.765625 -0.921875 -1.84375 -0.921875q-1.09375 0 -1.890625 0.953125q-0.796875 0.953125 -0.796875 2.75zm15.594467 1.828125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500717 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656967 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281967 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129196 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.078842 0.8125l1.609375 0.25q0.109375 0.75 0.578125 1.09375q0.609375 0.453125 1.6875 0.453125q1.171875 0 1.796875 -0.46875q0.625 -0.453125 0.859375 -1.28125q0.125 -0.515625 0.109375 -2.15625q-1.09375 1.296875 -2.71875 1.296875q-2.03125 0 -3.15625 -1.46875q-1.109375 -1.46875 -1.109375 -3.515625q0 -1.40625 0.515625 -2.59375q0.515625 -1.203125 1.484375 -1.84375q0.96875 -0.65625 2.265625 -0.65625q1.75 0 2.875 1.40625l0 -1.1875l1.546875 0l0 8.515625q0 2.3125 -0.46875 3.265625q-0.46875 0.96875 -1.484375 1.515625q-1.015625 0.5625 -2.5 0.5625q-1.765625 0 -2.859375 -0.796875q-1.078125 -0.796875 -1.03125 -2.390625zm1.375 -5.921875q0 1.953125 0.765625 2.84375q0.78125 0.890625 1.9375 0.890625q1.140625 0 1.921875 -0.890625q0.78125 -0.890625 0.78125 -2.78125q0 -1.8125 -0.8125 -2.71875q-0.796875 -0.921875 -1.921875 -0.921875q-1.109375 0 -1.890625 0.90625q-0.78125 0.890625 -0.78125 2.671875zm14.449646 5.109375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.5510712 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm8.656967 0q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.297607 4.921875l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0zm15.765625 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.922577 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625z" fill-rule="nonzero"></path><path fill="#000000" d="m176.34024 151.46593q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm8.853302 -4.0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.750717 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm7.875717 4.40625l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm18.640625 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm8.531967 0.8125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm10.625717 0.453125l1.59375 -0.15625q0.203125 1.140625 0.78125 1.65625q0.578125 0.5 1.484375 0.5q0.765625 0 1.34375 -0.34375q0.578125 -0.359375 0.953125 -0.953125q0.375 -0.59375 0.625 -1.59375q0.25 -1.0 0.25 -2.03125q0 -0.109375 -0.015625 -0.34375q-0.5 0.796875 -1.375 1.296875q-0.859375 0.5 -1.875 0.5q-1.6875 0 -2.859375 -1.21875q-1.171875 -1.234375 -1.171875 -3.234375q0 -2.078125 1.21875 -3.328125q1.234375 -1.265625 3.0625 -1.265625q1.328125 0 2.421875 0.71875q1.109375 0.703125 1.671875 2.03125q0.578125 1.328125 0.578125 3.828125q0 2.609375 -0.578125 4.15625q-0.5625 1.546875 -1.6875 2.359375q-1.109375 0.796875 -2.609375 0.796875q-1.59375 0 -2.609375 -0.890625q-1.0 -0.890625 -1.203125 -2.484375zm6.828125 -6.0q0 -1.4375 -0.765625 -2.28125q-0.765625 -0.859375 -1.84375 -0.859375q-1.109375 0 -1.9375 0.921875q-0.828125 0.90625 -0.828125 2.34375q0 1.3125 0.78125 2.125q0.796875 0.796875 1.9375 0.796875q1.171875 0 1.90625 -0.796875q0.75 -0.8125 0.75 -2.25zm5.860092 1.765625q-1.046875 -0.375 -1.546875 -1.078125q-0.5 -0.71875 -0.5 -1.703125q0 -1.484375 1.0625 -2.484375q1.078125 -1.015625 2.84375 -1.015625q1.78125 0 2.859375 1.03125q1.09375 1.03125 1.09375 2.515625q0 0.953125 -0.5 1.65625q-0.484375 0.703125 -1.5 1.078125q1.25 0.40625 1.90625 1.3125q0.65625 0.90625 0.65625 2.171875q0 1.75 -1.234375 2.9375q-1.234375 1.1875 -3.25 1.1875q-2.015625 0 -3.25 -1.1875q-1.234375 -1.203125 -1.234375 -2.984375q0 -1.328125 0.671875 -2.21875q0.671875 -0.890625 1.921875 -1.21875zm-0.328125 -2.828125q0 0.96875 0.609375 1.578125q0.625 0.609375 1.625 0.609375q0.953125 0 1.5625 -0.609375q0.625 -0.609375 0.625 -1.484375q0 -0.921875 -0.640625 -1.546875q-0.625 -0.625 -1.578125 -0.625q-0.953125 0 -1.578125 0.609375q-0.625 0.609375 -0.625 1.46875zm-0.546875 6.28125q0 0.71875 0.328125 1.390625q0.34375 0.65625 1.015625 1.03125q0.671875 0.359375 1.4375 0.359375q1.203125 0 1.984375 -0.765625q0.78125 -0.78125 0.78125 -1.96875q0 -1.203125 -0.8125 -1.984375q-0.796875 -0.796875 -2.0 -0.796875q-1.1875 0 -1.96875 0.78125q-0.765625 0.78125 -0.765625 1.953125zm8.688217 0.328125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm11.922577 7.59375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 157.53806l0 32.472443" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 157.53806l0 26.472443" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 184.0105l1.6517334 4.538101l1.6517334 -4.538101z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m395.48425 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m395.48425 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path fill="#000000" d="m413.11163 57.620842l0 -13.59375l1.8125 0l0 13.59375l-1.8125 0zm4.667694 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375702 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313232 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578827 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.839569 -0.109375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.254181 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm13.125122 3.875l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.6413574 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 411.97638l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 411.97638l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path fill="#000000" d="m402.72214 438.89636l0 -13.59375l9.84375 0l0 1.59375l-8.046875 0l0 4.171875l7.53125 0l0 1.59375l-7.53125 0l0 4.625l8.359375 0l0 1.609375l-10.15625 0zm12.193573 0l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm17.087677 0l-1.546875 0l0 -13.59375l1.65625 0l0 4.84375q1.0625 -1.328125 2.703125 -1.328125q0.90625 0 1.71875 0.375q0.8125 0.359375 1.328125 1.03125q0.53125 0.65625 0.828125 1.59375q0.296875 0.9375 0.296875 2.0q0 2.53125 -1.25 3.921875q-1.25 1.375 -3.0 1.375q-1.75 0 -2.734375 -1.453125l0 1.234375zm-0.015625 -5.0q0 1.765625 0.46875 2.5625q0.796875 1.28125 2.140625 1.28125q1.09375 0 1.890625 -0.9375q0.796875 -0.953125 0.796875 -2.84375q0 -1.921875 -0.765625 -2.84375q-0.765625 -0.921875 -1.84375 -0.921875q-1.09375 0 -1.890625 0.953125q-0.796875 0.953125 -0.796875 2.75zm15.594452 1.828125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500732 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656952 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281982 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129181 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.078857 0.8125l1.609375 0.25q0.109375 0.75 0.578125 1.09375q0.609375 0.453125 1.6875 0.453125q1.171875 0 1.796875 -0.46875q0.625 -0.453125 0.859375 -1.28125q0.125 -0.515625 0.109375 -2.15625q-1.09375 1.296875 -2.71875 1.296875q-2.03125 0 -3.15625 -1.46875q-1.109375 -1.46875 -1.109375 -3.515625q0 -1.40625 0.515625 -2.59375q0.515625 -1.203125 1.484375 -1.84375q0.96875 -0.65625 2.265625 -0.65625q1.75 0 2.875 1.40625l0 -1.1875l1.546875 0l0 8.515625q0 2.3125 -0.46875 3.265625q-0.46875 0.96875 -1.484375 1.515625q-1.015625 0.5625 -2.5 0.5625q-1.765625 0 -2.859375 -0.796875q-1.078125 -0.796875 -1.03125 -2.390625zm1.375 -5.921875q0 1.953125 0.765625 2.84375q0.78125 0.890625 1.9375 0.890625q1.140625 0 1.921875 -0.890625q0.78125 -0.890625 0.78125 -2.78125q0 -1.8125 -0.8125 -2.71875q-0.796875 -0.921875 -1.921875 -0.921875q-1.109375 0 -1.890625 0.90625q-0.78125 0.890625 -0.78125 2.671875zm14.449646 5.109375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.551056 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.0312805 0 3.3125305 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.0781555 0.59375 -2.3750305 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625305 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.8281555 -0.9375 -2.0625305 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm8.656952 0q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.297607 4.921875l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0zm15.765625 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.9226074 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625z" fill-rule="nonzero"></path><path fill="#000000" d="m435.7392 464.89636q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.572052 -7.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141327 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.9538574 1.609375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm18.640625 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm8.531952 0.8125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm10.625732 0.453125l1.59375 -0.15625q0.203125 1.140625 0.78125 1.65625q0.578125 0.5 1.484375 0.5q0.765625 0 1.34375 -0.34375q0.578125 -0.359375 0.953125 -0.953125q0.375 -0.59375 0.625 -1.59375q0.25 -1.0 0.25 -2.03125q0 -0.109375 -0.015625 -0.34375q-0.5 0.796875 -1.375 1.296875q-0.859375 0.5 -1.875 0.5q-1.6875 0 -2.859375 -1.21875q-1.171875 -1.234375 -1.171875 -3.234375q0 -2.078125 1.21875 -3.328125q1.234375 -1.265625 3.0625 -1.265625q1.328125 0 2.421875 0.71875q1.109375 0.703125 1.671875 2.03125q0.578125 1.328125 0.578125 3.828125q0 2.609375 -0.578125 4.15625q-0.5625 1.546875 -1.6875 2.359375q-1.109375 0.796875 -2.609375 0.796875q-1.59375 0 -2.609375 -0.890625q-1.0 -0.890625 -1.203125 -2.484375zm6.828125 -6.0q0 -1.4375 -0.765625 -2.28125q-0.765625 -0.859375 -1.84375 -0.859375q-1.109375 0 -1.9375 0.921875q-0.828125 0.90625 -0.828125 2.34375q0 1.3125 0.78125 2.125q0.796875 0.796875 1.9375 0.796875q1.171875 0 1.90625 -0.796875q0.75 -0.8125 0.75 -2.25zm5.860077 1.765625q-1.046875 -0.375 -1.546875 -1.078125q-0.5 -0.71875 -0.5 -1.703125q0 -1.484375 1.0625 -2.484375q1.078125 -1.015625 2.84375 -1.015625q1.78125 0 2.859375 1.03125q1.09375 1.03125 1.09375 2.515625q0 0.953125 -0.5 1.65625q-0.484375 0.703125 -1.5 1.078125q1.25 0.40625 1.90625 1.3125q0.65625 0.90625 0.65625 2.171875q0 1.75 -1.234375 2.9375q-1.234375 1.1875 -3.25 1.1875q-2.015625 0 -3.25 -1.1875q-1.234375 -1.203125 -1.234375 -2.984375q0 -1.328125 0.671875 -2.21875q0.671875 -0.890625 1.921875 -1.21875zm-0.328125 -2.828125q0 0.96875 0.609375 1.578125q0.625 0.609375 1.625 0.609375q0.953125 0 1.5625 -0.609375q0.625 -0.609375 0.625 -1.484375q0 -0.921875 -0.640625 -1.546875q-0.625 -0.625 -1.578125 -0.625q-0.953125 0 -1.578125 0.609375q-0.625 0.609375 -0.625 1.46875zm-0.546875 6.28125q0 0.71875 0.328125 1.390625q0.34375 0.65625 1.015625 1.03125q0.671875 0.359375 1.4375 0.359375q1.203125 0 1.984375 -0.765625q0.78125 -0.78125 0.78125 -1.96875q0 -1.203125 -0.8125 -1.984375q-0.796875 -0.796875 -2.0 -0.796875q-1.1875 0 -1.96875 0.78125q-0.765625 0.78125 -0.765625 1.953125zm8.688232 0.328125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm11.922546 7.59375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 567.8504l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 567.8504l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path fill="#000000" d="m402.72214 594.7704l0 -13.59375l9.84375 0l0 1.59375l-8.046875 0l0 4.171875l7.53125 0l0 1.59375l-7.53125 0l0 4.625l8.359375 0l0 1.609375l-10.15625 0zm12.193573 0l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm17.087677 0l-1.546875 0l0 -13.59375l1.65625 0l0 4.84375q1.0625 -1.328125 2.703125 -1.328125q0.90625 0 1.71875 0.375q0.8125 0.359375 1.328125 1.03125q0.53125 0.65625 0.828125 1.59375q0.296875 0.9375 0.296875 2.0q0 2.53125 -1.25 3.921875q-1.25 1.375 -3.0 1.375q-1.75 0 -2.734375 -1.453125l0 1.234375zm-0.015625 -5.0q0 1.765625 0.46875 2.5625q0.796875 1.28125 2.140625 1.28125q1.09375 0 1.890625 -0.9375q0.796875 -0.953125 0.796875 -2.84375q0 -1.921875 -0.765625 -2.84375q-0.765625 -0.921875 -1.84375 -0.921875q-1.09375 0 -1.890625 0.953125q-0.796875 0.953125 -0.796875 2.75zm15.594452 1.828125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500732 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656952 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281982 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129181 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.078857 0.8125l1.609375 0.25q0.109375 0.75 0.578125 1.09375q0.609375 0.453125 1.6875 0.453125q1.171875 0 1.796875 -0.46875q0.625 -0.453125 0.859375 -1.28125q0.125 -0.515625 0.109375 -2.15625q-1.09375 1.296875 -2.71875 1.296875q-2.03125 0 -3.15625 -1.46875q-1.109375 -1.46875 -1.109375 -3.515625q0 -1.40625 0.515625 -2.59375q0.515625 -1.203125 1.484375 -1.84375q0.96875 -0.65625 2.265625 -0.65625q1.75 0 2.875 1.40625l0 -1.1875l1.546875 0l0 8.515625q0 2.3125 -0.46875 3.265625q-0.46875 0.96875 -1.484375 1.515625q-1.015625 0.5625 -2.5 0.5625q-1.765625 0 -2.859375 -0.796875q-1.078125 -0.796875 -1.03125 -2.390625zm1.375 -5.921875q0 1.953125 0.765625 2.84375q0.78125 0.890625 1.9375 0.890625q1.140625 0 1.921875 -0.890625q0.78125 -0.890625 0.78125 -2.78125q0 -1.8125 -0.8125 -2.71875q-0.796875 -0.921875 -1.921875 -0.921875q-1.109375 0 -1.890625 0.90625q-0.78125 0.890625 -0.78125 2.671875zm14.449646 5.109375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.551056 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.0312805 0 3.3125305 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.0781555 0.59375 -2.3750305 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625305 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.8281555 -0.9375 -2.0625305 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm8.656952 0q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.297607 4.921875l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0zm15.765625 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.9226074 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625z" fill-rule="nonzero"></path><path fill="#000000" d="m440.92703 620.7704q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm8.853302 -4.0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm3.4382324 0l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm18.640625 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm8.531952 0.8125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm10.625732 0.453125l1.59375 -0.15625q0.203125 1.140625 0.78125 1.65625q0.578125 0.5 1.484375 0.5q0.765625 0 1.34375 -0.34375q0.578125 -0.359375 0.953125 -0.953125q0.375 -0.59375 0.625 -1.59375q0.25 -1.0 0.25 -2.03125q0 -0.109375 -0.015625 -0.34375q-0.5 0.796875 -1.375 1.296875q-0.859375 0.5 -1.875 0.5q-1.6875 0 -2.859375 -1.21875q-1.171875 -1.234375 -1.171875 -3.234375q0 -2.078125 1.21875 -3.328125q1.234375 -1.265625 3.0625 -1.265625q1.328125 0 2.421875 0.71875q1.109375 0.703125 1.671875 2.03125q0.578125 1.328125 0.578125 3.828125q0 2.609375 -0.578125 4.15625q-0.5625 1.546875 -1.6875 2.359375q-1.109375 0.796875 -2.609375 0.796875q-1.59375 0 -2.609375 -0.890625q-1.0 -0.890625 -1.203125 -2.484375zm6.828125 -6.0q0 -1.4375 -0.765625 -2.28125q-0.765625 -0.859375 -1.84375 -0.859375q-1.109375 0 -1.9375 0.921875q-0.828125 0.90625 -0.828125 2.34375q0 1.3125 0.78125 2.125q0.796875 0.796875 1.9375 0.796875q1.171875 0 1.90625 -0.796875q0.75 -0.8125 0.75 -2.25zm5.860077 1.765625q-1.046875 -0.375 -1.546875 -1.078125q-0.5 -0.71875 -0.5 -1.703125q0 -1.484375 1.0625 -2.484375q1.078125 -1.015625 2.84375 -1.015625q1.78125 0 2.859375 1.03125q1.09375 1.03125 1.09375 2.515625q0 0.953125 -0.5 1.65625q-0.484375 0.703125 -1.5 1.078125q1.25 0.40625 1.90625 1.3125q0.65625 0.90625 0.65625 2.171875q0 1.75 -1.234375 2.9375q-1.234375 1.1875 -3.25 1.1875q-2.015625 0 -3.25 -1.1875q-1.234375 -1.203125 -1.234375 -2.984375q0 -1.328125 0.671875 -2.21875q0.671875 -0.890625 1.921875 -1.21875zm-0.328125 -2.828125q0 0.96875 0.609375 1.578125q0.625 0.609375 1.625 0.609375q0.953125 0 1.5625 -0.609375q0.625 -0.609375 0.625 -1.484375q0 -0.921875 -0.640625 -1.546875q-0.625 -0.625 -1.578125 -0.625q-0.953125 0 -1.578125 0.609375q-0.625 0.609375 -0.625 1.46875zm-0.546875 6.28125q0 0.71875 0.328125 1.390625q0.34375 0.65625 1.015625 1.03125q0.671875 0.359375 1.4375 0.359375q1.203125 0 1.984375 -0.765625q0.78125 -0.78125 0.78125 -1.96875q0 -1.203125 -0.8125 -1.984375q-0.796875 -0.796875 -2.0 -0.796875q-1.1875 0 -1.96875 0.78125q-0.765625 0.78125 -0.765625 1.953125zm8.688232 0.328125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm11.922607 7.59375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m129.09448 420.41733l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m129.09448 420.41733l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m166.58162 447.3373l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm21.837677 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.8913422 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm7.832321 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129196 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.828842 4.875l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.191696 -11.6875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm10.566696 -3.609375l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm9.328125 2.390625q-0.9375 0.796875 -1.796875 1.125q-0.859375 0.3125 -1.84375 0.3125q-1.609375 0 -2.484375 -0.78125q-0.875 -0.796875 -0.875 -2.03125q0 -0.734375 0.328125 -1.328125q0.328125 -0.59375 0.859375 -0.953125q0.53125 -0.359375 1.203125 -0.546875q0.5 -0.140625 1.484375 -0.25q2.03125 -0.25 2.984375 -0.578125q0 -0.34375 0 -0.4375q0 -1.015625 -0.46875 -1.4375q-0.640625 -0.5625 -1.90625 -0.5625q-1.171875 0 -1.734375 0.40625q-0.5625 0.40625 -0.828125 1.46875l-1.640625 -0.234375q0.234375 -1.046875 0.734375 -1.6875q0.515625 -0.640625 1.46875 -0.984375q0.96875 -0.359375 2.25 -0.359375q1.265625 0 2.046875 0.296875q0.78125 0.296875 1.15625 0.75q0.375 0.453125 0.515625 1.140625q0.09375 0.421875 0.09375 1.53125l0 2.234375q0 2.328125 0.09375 2.953125q0.109375 0.609375 0.4375 1.171875l-1.75 0q-0.265625 -0.515625 -0.328125 -1.21875zm-0.140625 -3.71875q-0.90625 0.359375 -2.734375 0.625q-1.03125 0.140625 -1.453125 0.328125q-0.421875 0.1875 -0.65625 0.546875q-0.234375 0.359375 -0.234375 0.796875q0 0.671875 0.5 1.125q0.515625 0.4375 1.484375 0.4375q0.96875 0 1.71875 -0.421875q0.75 -0.4375 1.109375 -1.15625q0.265625 -0.578125 0.265625 -1.671875l0 -0.609375zm7.735092 3.4375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5041962 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.0937653 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625153 0 -3.3437653 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.82814026 0.9375 2.0781403 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.0781403 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m129.09448 576.29395l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m129.09448 576.29395l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m185.35355 603.214l5.234375 -13.59375l1.9375 0l5.5625 13.59375l-2.046875 0l-1.59375 -4.125l-5.6875 0l-1.484375 4.125l-1.921875 0zm3.921875 -5.578125l4.609375 0l-1.40625 -3.78125q-0.65625 -1.703125 -0.96875 -2.8125q-0.265625 1.3125 -0.734375 2.59375l-1.5 4.0zm16.193573 5.578125l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656967 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281967 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm7.785446 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5041962 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 597.34644l-79.40158 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 597.34644l-73.40158 0" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m315.09186 595.6947l-4.538086 1.6517334l4.538086 1.6517334z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 441.47244l-79.40158 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 441.47244l-73.40158 0" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m315.09186 439.8207l-4.538086 1.6517334l4.538086 1.6517334z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 462.52756l0 31.84253" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 462.52756l0 25.84253" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 488.3701l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m561.5 51.755962l31.99347 0l0 545.57477l-25.001343 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m561.5 51.755962l31.99347 0l0 545.57477l-25.001343 0" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m478.49213 72.81108l0 339.1496" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m478.49213 72.81108l0 333.1496" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m476.8404 405.96066l1.6517334 4.5381165l1.6517334 -4.5381165z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m590.00525 597.4094l-21.51184 -0.06298828" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m590.00525 597.4094l-15.511841 -0.045410156" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m574.4982 595.7123l-4.5429077 1.6384277l4.533264 1.6650391z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m109.09449 494.357l220.0 0l0 42.11023l-220.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m109.09449 494.357l220.0 0l0 42.11023l-220.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m126.81095 521.277l0 -13.59375l9.171867 0l0 1.59375l-7.375 0l0 4.21875l6.375 0l0 1.609375l-6.375 0l0 6.171875l-1.7968674 0zm17.536598 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.8913422 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.144821 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.097946 3.796875l-0.171875 -1.5625q0.546875 0.140625 0.953125 0.140625q0.546875 0 0.875 -0.1875q0.34375 -0.1875 0.5625 -0.515625q0.15625 -0.25 0.5 -1.25q0.046875 -0.140625 0.15625 -0.40625l-3.734375 -9.875l1.796875 0l2.046875 5.71875q0.40625 1.078125 0.71875 2.28125q0.28125 -1.15625 0.6875 -2.25l2.09375 -5.75l1.671875 0l-3.75 10.03125q-0.59375 1.625 -0.9375 2.234375q-0.4375 0.828125 -1.015625 1.203125q-0.578125 0.390625 -1.375 0.390625q-0.484375 0 -1.078125 -0.203125zm19.328125 -8.5625l1.796875 0.453125q-0.5625 2.21875 -2.03125 3.390625q-1.46875 1.15625 -3.59375 1.15625q-2.203125 0 -3.578125 -0.890625q-1.375 -0.90625 -2.09375 -2.59375q-0.71875 -1.703125 -0.71875 -3.65625q0 -2.125 0.796875 -3.703125q0.8125 -1.578125 2.3125 -2.390625q1.5 -0.828125 3.296875 -0.828125q2.046875 0 3.4375 1.046875q1.390625 1.03125 1.9375 2.90625l-1.765625 0.421875q-0.46875 -1.484375 -1.375 -2.15625q-0.90625 -0.6875 -2.265625 -0.6875q-1.5625 0 -2.625 0.75q-1.046875 0.75 -1.484375 2.03125q-0.421875 1.265625 -0.421875 2.609375q0 1.734375 0.5 3.03125q0.515625 1.28125 1.578125 1.921875q1.078125 0.640625 2.3125 0.640625q1.515625 0 2.5625 -0.859375q1.046875 -0.875 1.421875 -2.59375zm2.9260712 -0.15625q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375717 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm17.125717 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547592 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm8.277054 -1.671875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500717 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm17.637161 8.921875q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.572052 -7.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141327 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.9538574 1.609375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm16.265625 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.641327 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 536.4672l0 39.811035" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 536.4672l0 33.811035" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 570.27826l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path></g></svg>
+
-- 
GitLab


From b8bef6e6c89931768ac1f6b28d834d359e761410 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Fri, 17 Nov 2017 11:26:54 -0800
Subject: [PATCH 0057/1225] Rename optimal to optimal_seconds in
 HloCostAnalysis etc.

PiperOrigin-RevId: 176136105
---
 .../compiler/xla/service/hlo_cost_analysis.cc | 20 +++++++++----------
 .../compiler/xla/service/hlo_cost_analysis.h  |  6 +++---
 .../xla/service/hlo_cost_analysis_test.cc     |  2 +-
 .../xla/service/hlo_execution_profile.cc      |  2 +-
 .../xla/service/hlo_profile_printer.cc        |  2 +-
 .../xla/service/hlo_profile_printer.h         |  2 +-
 6 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index 1877065f67..a24457edbf 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
@@ -28,7 +28,7 @@ namespace xla {
 constexpr char HloCostAnalysis::kFlopsKey[];
 constexpr char HloCostAnalysis::kTranscendentalsKey[];
 constexpr char HloCostAnalysis::kBytesAccessedKey[];
-constexpr char HloCostAnalysis::kSecondsKey[];
+constexpr char HloCostAnalysis::kOptimalSecondsKey[];
 
 HloCostAnalysis::HloCostAnalysis(const ShapeSizeFunction& shape_size)
     : HloCostAnalysis(shape_size, {}) {}
@@ -60,16 +60,16 @@ Status HloCostAnalysis::Postprocess(const HloInstruction* hlo) {
   if (current_should_compute_bottleneck_time_) {
     // Compute the time as the time of the bottleneck, i.e. the slowest property
     // given the per-second rate of each property.
-    float max_seconds = 0.0f;
+    float optimal_seconds = 0.0f;
     for (const auto& property : current_properties_) {
-      if (property.first != kSecondsKey) {
-        max_seconds = std::max(
-            max_seconds,
+      if (property.first != kOptimalSecondsKey) {
+        optimal_seconds = std::max(
+            optimal_seconds,
             property.second /
                 GetProperty(property.first, per_second_rates_, INFINITY));
       }
     }
-    current_properties_[kSecondsKey] = max_seconds;
+    current_properties_[kOptimalSecondsKey] = optimal_seconds;
   }
 
   TF_RET_CHECK(hlo_properties_.emplace(hlo, current_properties_).second);
@@ -496,8 +496,8 @@ float HloCostAnalysis::bytes_accessed() const {
   return GetProperty(kBytesAccessedKey, properties_sum_);
 }
 
-float HloCostAnalysis::seconds() const {
-  return GetProperty(kSecondsKey, properties_sum_);
+float HloCostAnalysis::optimal_seconds() const {
+  return GetProperty(kOptimalSecondsKey, properties_sum_);
 }
 
 int64 HloCostAnalysis::flop_count(const HloInstruction& hlo) const {
@@ -512,8 +512,8 @@ int64 HloCostAnalysis::bytes_accessed(const HloInstruction& hlo) const {
   return GetPropertyForHlo(hlo, kBytesAccessedKey, hlo_properties_);
 }
 
-float HloCostAnalysis::seconds(const HloInstruction& hlo) const {
-  return GetPropertyForHlo(hlo, kSecondsKey, hlo_properties_);
+float HloCostAnalysis::optimal_seconds(const HloInstruction& hlo) const {
+  return GetPropertyForHlo(hlo, kOptimalSecondsKey, hlo_properties_);
 }
 
 StatusOr<HloCostAnalysis::Properties> HloCostAnalysis::ProcessSubcomputation(
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
index 0f44775378..e785596c8e 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
@@ -42,7 +42,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor {
   static constexpr char kFlopsKey[] = "flops";
   static constexpr char kTranscendentalsKey[] = "transcendentals";
   static constexpr char kBytesAccessedKey[] = "bytes accessed";
-  static constexpr char kSecondsKey[] = "seconds";
+  static constexpr char kOptimalSecondsKey[] = "optimal_seconds";
 
   // shape_size is a function which returns the size in bytes of the top-level
   // buffer of a shape.
@@ -118,14 +118,14 @@ class HloCostAnalysis : public ConstDfsHloVisitor {
   float flop_count() const;
   float transcendental_count() const;
   float bytes_accessed() const;
-  float seconds() const;
+  float optimal_seconds() const;
 
   // Returns the respective cost computed for a particular HLO instruction, or 0
   // if the HLO was not found to have a cost in the analysis.
   int64 flop_count(const HloInstruction& hlo) const;
   int64 transcendental_count(const HloInstruction& hlo) const;
   int64 bytes_accessed(const HloInstruction& hlo) const;
-  float seconds(const HloInstruction& hlo) const;
+  float optimal_seconds(const HloInstruction& hlo) const;
 
   const Properties& properties() const { return properties_sum_; }
   const float property(const string& key) const {
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc
index 0eaa21ef25..3b289c240a 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc
@@ -389,7 +389,7 @@ TEST_F(FusionCostAnalysis, LoopFusion) {
     static_assert(bytes_accessed == 64, "");
     EXPECT_EQ(fusion_analysis.bytes_accessed(), bytes_accessed);
 
-    EXPECT_EQ(fusion_analysis.seconds(), 1 << i);
+    EXPECT_EQ(fusion_analysis.optimal_seconds(), 1 << i);
   }
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index 755374b91d..9e256b9b37 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -83,7 +83,7 @@ static HloProfilePrinter CreateOwnedHloProfilePrinter(
       instruction_info->transcendental_count =
           cost_analysis.transcendental_count(*hlo);
       instruction_info->bytes_accessed = cost_analysis.bytes_accessed(*hlo);
-      instruction_info->seconds = cost_analysis.seconds(*hlo);
+      instruction_info->optimal_seconds = cost_analysis.optimal_seconds(*hlo);
       instruction_info->profile_index =
           hlo_profile_index_map.GetProfileIndexFor(*hlo);
       CHECK_LT(instruction_info->profile_index, max_profile_index);
diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.cc b/tensorflow/compiler/xla/service/hlo_profile_printer.cc
index 071c5a6629..e944ad1513 100644
--- a/tensorflow/compiler/xla/service/hlo_profile_printer.cc
+++ b/tensorflow/compiler/xla/service/hlo_profile_printer.cc
@@ -50,7 +50,7 @@ string HloProfilePrinter::ToString(const int64* counters,
           /*short_name=*/instruction->short_name, instruction->category,
           counters[instruction->profile_index], instruction->flop_count,
           instruction->transcendental_count, instruction->bytes_accessed,
-          instruction->seconds);
+          instruction->optimal_seconds);
     }
 
     result += builder.ToString();
diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.h b/tensorflow/compiler/xla/service/hlo_profile_printer.h
index 45921c66f6..316753a82a 100644
--- a/tensorflow/compiler/xla/service/hlo_profile_printer.h
+++ b/tensorflow/compiler/xla/service/hlo_profile_printer.h
@@ -41,7 +41,7 @@ class HloProfilePrinter {
     float flop_count;
     float transcendental_count;
     float bytes_accessed;
-    float seconds;
+    float optimal_seconds;
 
     // The index into the profile counters array for the HloInstruction
     // corresponding to this HloInstructionInfo.
-- 
GitLab


From 9dd9246d76aeada08f07d8c9550d7eedb0809713 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 12:08:27 -0800
Subject: [PATCH 0058/1225] Open-sourcing AddSign and PowerSign optimizers,
 found in Neural Optimizer Search with Reinforcement Learning [Bello et al,
 ICML2017]

PiperOrigin-RevId: 176142062
---
 tensorflow/contrib/opt/BUILD                  |  51 ++++
 tensorflow/contrib/opt/__init__.py            |   5 +-
 .../contrib/opt/python/training/addsign.py    | 169 +++++++++++
 .../opt/python/training/addsign_test.py       | 262 +++++++++++++++++
 .../contrib/opt/python/training/powersign.py  | 173 +++++++++++
 .../opt/python/training/powersign_test.py     | 268 ++++++++++++++++++
 .../contrib/opt/python/training/sign_decay.py | 158 +++++++++++
 .../opt/python/training/sign_decay_test.py    | 110 +++++++
 tensorflow/core/kernels/training_ops.cc       | 250 +++++++++++++++-
 tensorflow/core/kernels/training_ops.h        |  23 ++
 .../core/kernels/training_ops_gpu.cu.cc       |  74 +++++
 tensorflow/core/kernels/training_ops_test.cc  |  74 +++++
 tensorflow/core/ops/training_ops.cc           | 137 +++++++++
 tensorflow/core/ops/training_ops_test.cc      |  34 +++
 14 files changed, 1786 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/contrib/opt/python/training/addsign.py
 create mode 100644 tensorflow/contrib/opt/python/training/addsign_test.py
 create mode 100644 tensorflow/contrib/opt/python/training/powersign.py
 create mode 100644 tensorflow/contrib/opt/python/training/powersign_test.py
 create mode 100644 tensorflow/contrib/opt/python/training/sign_decay.py
 create mode 100644 tensorflow/contrib/opt/python/training/sign_decay_test.py

diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index 096d2270e4..8c46becf2c 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -14,11 +14,14 @@ py_library(
     name = "opt_py",
     srcs = [
         "__init__.py",
+        "python/training/addsign.py",
         "python/training/drop_stale_gradient_optimizer.py",
         "python/training/external_optimizer.py",
         "python/training/lazy_adam_optimizer.py",
         "python/training/moving_average_optimizer.py",
         "python/training/nadam_optimizer.py",
+        "python/training/powersign.py",
+        "python/training/sign_decay.py",
         "python/training/variable_clipping_optimizer.py",
     ],
     srcs_version = "PY2AND3",
@@ -152,6 +155,54 @@ tf_py_test(
     ],
 )
 
+py_test(
+    name = "sign_decay_test",
+    srcs = ["python/training/sign_decay_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":opt_py",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "addsign_test",
+    srcs = ["python/training/addsign_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":opt_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "powersign_test",
+    srcs = ["python/training/powersign_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":opt_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index e194fa2d4d..caf22536bb 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -19,12 +19,14 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=wildcard-import
+from tensorflow.contrib.opt.python.training.addsign import *
 from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import *
 from tensorflow.contrib.opt.python.training.external_optimizer import *
 from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import *
-from tensorflow.contrib.opt.python.training.nadam_optimizer import *
 from tensorflow.contrib.opt.python.training.moving_average_optimizer import *
 from tensorflow.contrib.opt.python.training.nadam_optimizer import *
+from tensorflow.contrib.opt.python.training.nadam_optimizer import *
+from tensorflow.contrib.opt.python.training.powersign import *
 from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import *
 # pylint: enable=wildcard-import
 
@@ -32,6 +34,7 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 
 _allowed_symbols = [
+    'PowerSignOptimizer', 'AddSignOptimizer'
     'DelayCompensatedGradientDescentOptimizer',
     'DropStaleGradientOptimizer', 'ExternalOptimizerInterface',
     'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer',
diff --git a/tensorflow/contrib/opt/python/training/addsign.py b/tensorflow/contrib/opt/python/training/addsign.py
new file mode 100644
index 0000000000..729e59cb0a
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/addsign.py
@@ -0,0 +1,169 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of AddSign."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import training_ops
+
+
+class AddSignOptimizer(optimizer.Optimizer):
+  """Optimizer that implements the AddSign update.
+
+  See  Neural Optimizer Search with Reinforcement Learning
+  [Bello et al., ICML2017].
+  """
+
+  def __init__(self,
+               learning_rate=0.1,
+               alpha=1.0,
+               beta=0.9,
+               sign_decay_fn=None,
+               use_locking=False,
+               name='AddSignOptimizer'):
+    """Constructs a new AddSignOptimizer object.
+
+    Initialization:
+
+    m_0 <- 0 (Initialize initial 1st moment vector)
+    t <- 0 (Initialize timestep)
+    ```
+
+    Update:
+
+    ```
+    t <- t + 1
+    m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+    sign_decay <- sign_decay(t)
+    update <- (alpha + sign_decay * sign(g) *sign(m)) * g
+    variable <- variable - lr_t * update
+    ```
+
+    Example for AddSign-ld (AddSign with linear sign decay)
+    ```
+    decay_steps = 1000
+    linear_decay_fn = sign_decays.get_linear_decay_fn(decay_steps)
+    opt = AddSignOptimizer(learning_rate=0.1, sign_decay_fn=linear_decay_fn)
+    ```
+
+    Args:
+      learning_rate: learning_rate used when taking a step.
+      alpha: alpha used in optimizer.
+      beta: decay used for computing the moving average m.
+      sign_decay_fn: decay function applied to the sign(g*m) quantity.
+          Takes global_step as an argument and returns the quantity to multiply
+          the sign(g*m) by.
+        compute (1.0 + alpha * decay * sign(g) * sign(m)) * m.
+      use_locking: If True use locks for update operations.
+      name: Optional name for the operations created when applying gradients.
+        Defaults to "AddSignOptimizer".
+    """
+    super(AddSignOptimizer, self).__init__(use_locking, name)
+    self._lr = learning_rate
+    self._alpha = alpha
+    self._beta = beta
+
+    self._sign_decay_fn = sign_decay_fn
+
+    # Tensor versions of the constructor arguments, created in _prepare().
+    self._lr_t = None
+    self._alpha_t = None
+    self._beta_t = None
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    if self._sign_decay_fn is not None:
+      self._sign_decay_t = ops.convert_to_tensor(
+          self._sign_decay_fn(global_step), name='sign_decay')
+    return super(AddSignOptimizer, self).apply_gradients(
+        grads_and_vars, global_step=global_step, name=name)
+
+  def _create_slots(self, var_list):
+    # Create slots for the first moment.
+    for v in var_list:
+      self._zeros_slot(v, 'm', self._name)
+
+  def _prepare(self):
+    self._lr_t = ops.convert_to_tensor(self._lr, name='learning_rate')
+    self._beta_t = ops.convert_to_tensor(self._beta, name='beta')
+    self._alpha_t = ops.convert_to_tensor(self._alpha, name='alpha')
+    if self._sign_decay_fn is None:
+      self._sign_decay_t = ops.convert_to_tensor(1.0, name='sign_decay')
+
+  def _apply_dense(self, grad, var):
+    m = self.get_slot(var, 'm')
+    return training_ops.apply_add_sign(
+        var,
+        m,
+        math_ops.cast(self._lr_t, var.dtype.base_dtype),
+        math_ops.cast(self._alpha_t, var.dtype.base_dtype),
+        math_ops.cast(self._sign_decay_t, var.dtype.base_dtype),
+        math_ops.cast(self._beta_t, var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking).op
+
+  def _resource_apply_dense(self, grad, var):
+    m = self.get_slot(var, 'm')
+    return training_ops.resource_apply_add_sign(
+        var.handle,
+        m.handle,
+        math_ops.cast(self._lr_t, var.dtype.base_dtype),
+        math_ops.cast(self._alpha_t, var.dtype.base_dtype),
+        math_ops.cast(self._sign_decay_t, var.dtype.base_dtype),
+        math_ops.cast(self._beta_t, var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var):
+    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
+    alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
+    beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)
+
+    m = self.get_slot(var, 'm')
+    m_t = state_ops.assign(
+        m, (m * beta_t) + (grad * (1 - beta_t)), use_locking=self._use_locking)
+
+    sign_g = ops.IndexedSlices(
+        math_ops.sign(grad.values), grad.indices, dense_shape=grad.dense_shape)
+    sign_gm = ops.IndexedSlices(
+        array_ops.gather(math_ops.sign(m_t), sign_g.indices) * sign_g.values,
+        sign_g.indices,
+        dense_shape=sign_g.dense_shape)
+
+    sign_decayed = math_ops.cast(
+        self._sign_decay_t, var.dtype.base_dtype)
+    multiplier_values = alpha_t + sign_decayed * sign_gm.values
+    multiplier = ops.IndexedSlices(
+        multiplier_values, sign_gm.indices, dense_shape=sign_gm.dense_shape)
+
+    final_update = ops.IndexedSlices(
+        lr_t * multiplier.values * grad.values,
+        multiplier.indices,
+        dense_shape=multiplier.dense_shape)
+
+    var_update = state_ops.scatter_sub(
+        var,
+        final_update.indices,
+        final_update.values,
+        use_locking=self._use_locking)
+
+    return control_flow_ops.group(* [var_update, m_t])
diff --git a/tensorflow/contrib/opt/python/training/addsign_test.py b/tensorflow/contrib/opt/python/training/addsign_test.py
new file mode 100644
index 0000000000..bd19ee3e7a
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/addsign_test.py
@@ -0,0 +1,262 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for AddSign."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.opt.python.training import addsign
+from tensorflow.contrib.opt.python.training import sign_decay
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+def py_linear_decay_fn(decay_steps):
+  def linear_decay(step):
+    step = min(step, decay_steps)
+    return float(decay_steps - step) / decay_steps
+  return linear_decay
+
+
+def addsign_update_numpy(params,
+                         g_t,
+                         m,
+                         lr,
+                         alpha=1.0,
+                         beta=0.9,
+                         py_sign_decay_fn=None,
+                         t=None):
+  m_t = beta * m + (1 - beta) * g_t
+  if py_sign_decay_fn is None:
+    sign_decayed = 1.0
+  else:
+    sign_decayed = py_sign_decay_fn(t-1)
+  multiplier = alpha + sign_decayed * np.sign(g_t) * np.sign(m_t)
+  params_t = params - lr * multiplier * g_t
+  return params_t, m_t
+
+
+class AddSignTest(test.TestCase):
+
+  def _testDense(self,
+                 use_resource=False,
+                 learning_rate=0.1,
+                 sign_decay_fn=None,
+                 py_sign_decay_fn=None,
+                 alpha=1.0,
+                 beta=0.9):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.test_session(use_gpu=True):
+        # Initialize variables for numpy implementation.
+        m0, m1 = 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          var0 = resource_variable_ops.ResourceVariable(var0_np)
+          var1 = resource_variable_ops.ResourceVariable(var1_np)
+          global_step = resource_variable_ops.ResourceVariable(
+              0, trainable=False)
+        else:
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+          global_step = variables.Variable(
+              0, trainable=False)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+
+        opt = addsign.AddSignOptimizer(
+            learning_rate=learning_rate,
+            alpha=alpha,
+            beta=beta,
+            sign_decay_fn=sign_decay_fn,
+        )
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                     global_step=global_step)
+        neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]),
+                                         global_step=global_step)
+        if context.in_graph_mode():
+          self.evaluate(variables.global_variables_initializer())
+          # Fetch params to validate initial values
+          self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+          self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+        # Run 7 steps of AddSign
+        # first 4 steps with positive gradient
+        # last 3 steps with negative gradient (sign(gm) should be -1)
+        for t in range(1, 8):
+          if t < 5:
+            if context.in_graph_mode():
+              self.evaluate(update)
+            elif t > 1:
+              opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                  global_step=global_step)
+          else:
+            if context.in_graph_mode():
+              self.evaluate(neg_update)
+            elif t > 1:
+              opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]),
+                                  global_step=global_step)
+
+          var0_np, m0 = addsign_update_numpy(
+              var0_np,
+              grads0_np if t < 5 else -grads0_np,
+              m0,
+              learning_rate,
+              alpha=alpha,
+              beta=beta,
+              py_sign_decay_fn=py_sign_decay_fn,
+              t=t,
+          )
+          var1_np, m1 = addsign_update_numpy(
+              var1_np,
+              grads1_np if t < 5 else -grads1_np,
+              m1,
+              learning_rate,
+              alpha=alpha,
+              beta=beta,
+              py_sign_decay_fn=py_sign_decay_fn,
+              t=t,
+          )
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
+          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+
+  def testDense(self):
+    decay_steps = 10
+    sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps)
+    py_sign_decay_fn = py_linear_decay_fn(decay_steps)
+    self._testDense(use_resource=False)
+    self._testDense(use_resource=False, learning_rate=0.01, alpha=0.1, beta=0.8)
+    self._testDense(use_resource=False,
+                    sign_decay_fn=sign_decay_fn,
+                    py_sign_decay_fn=py_sign_decay_fn)
+
+    self._testDense(use_resource=True)
+    self._testDense(use_resource=True, learning_rate=0.01, alpha=0.1, beta=0.8)
+    self._testDense(use_resource=True,
+                    sign_decay_fn=sign_decay_fn,
+                    py_sign_decay_fn=py_sign_decay_fn)
+
+  def _testSparse(self,
+                  use_resource=False,
+                  learning_rate=0.1,
+                  sign_decay_fn=None,
+                  py_sign_decay_fn=None,
+                  alpha=1.0,
+                  beta=0.9):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.test_session(use_gpu=True):
+        # Initialize variables for numpy implementation.
+        m0, m1 = 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          var0 = resource_variable_ops.ResourceVariable(var0_np)
+          var1 = resource_variable_ops.ResourceVariable(var1_np)
+          global_step = resource_variable_ops.ResourceVariable(
+              0, trainable=False)
+        else:
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+          global_step = variables.Variable(
+              0, trainable=False)
+        grads0_np_indices = np.array([0, 1], dtype=np.int32)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(grads0_np),
+            constant_op.constant(grads0_np_indices), constant_op.constant([2]))
+        grads1_np_indices = np.array([0, 1], dtype=np.int32)
+        grads1 = ops.IndexedSlices(
+            constant_op.constant(grads1_np),
+            constant_op.constant(grads1_np_indices), constant_op.constant([2]))
+        opt = addsign.AddSignOptimizer(
+            learning_rate=learning_rate,
+            alpha=alpha,
+            beta=beta,
+            sign_decay_fn=sign_decay_fn,
+        )
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                     global_step=global_step)
+        neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]),
+                                         global_step=global_step)
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        # Run 7 steps of AddSign
+        # first 4 steps with positive gradient
+        # last 3 steps with negative gradient (sign(gm) should be -1)
+        for t in range(1, 4):
+          if t < 5:
+            update.run()
+          else:
+            neg_update.run()
+
+          var0_np, m0 = addsign_update_numpy(
+              var0_np,
+              grads0_np,
+              m0,
+              learning_rate,
+              alpha=alpha,
+              beta=beta,
+              py_sign_decay_fn=py_sign_decay_fn,
+              t=t,
+          )
+          var1_np, m1 = addsign_update_numpy(
+              var1_np,
+              grads1_np,
+              m1,
+              learning_rate,
+              alpha=alpha,
+              beta=beta,
+              py_sign_decay_fn=py_sign_decay_fn,
+              t=t,
+          )
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  def testSparse(self):
+    decay_steps = 10
+    sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps)
+    py_sign_decay_fn = py_linear_decay_fn(decay_steps)
+    self._testSparse(use_resource=False)
+    self._testSparse(use_resource=False,
+                     learning_rate=0.01,
+                     alpha=0.1,
+                     beta=0.8)
+    self._testSparse(use_resource=False,
+                     sign_decay_fn=sign_decay_fn,
+                     py_sign_decay_fn=py_sign_decay_fn)
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/opt/python/training/powersign.py b/tensorflow/contrib/opt/python/training/powersign.py
new file mode 100644
index 0000000000..7f7521581f
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/powersign.py
@@ -0,0 +1,173 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of PowerSign."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import training_ops
+
+
+class PowerSignOptimizer(optimizer.Optimizer):
+  """Optimizer that implements the PowerSign update.
+
+  See  Neural Optimizer Search with Reinforcement Learning
+  [Bello et al., ICML2017].
+  """
+
+  def __init__(self,
+               learning_rate=0.1,
+               base=math.e,
+               beta=0.9,
+               sign_decay_fn=None,
+               use_locking=False,
+               name='PowerSignOptimizer'):
+    """Constructs a new PowerSignOptimizer object.
+
+    Initialization:
+
+    ```
+    m_0 <- 0 (Initialize initial 1st moment vector)
+    t <- 0 (Initialize timestep)
+    ```
+
+    Update:
+
+    ```
+    t <- t + 1
+    m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+    sign_decay <- sign_decay(t)
+    update <- base ** (sign_decay * sign(g) * sign(m)) * g
+    variable <- variable - lr_t * update
+    ```
+
+    Example usage for PowerSign-cd (PowerSign with cosine sign decay)
+    ```
+    decay_steps = 1000
+    linear_decay_fn = sign_decays.get_linear_decay_fn(decay_steps)
+    opt = PowerSignOptimizer(learning_rate=0.1, sign_decay_fn=linear_decay_fn)
+    ```
+
+    Args:
+      learning_rate: learning_rate used when taking a step.
+      base: base used in optimizer.
+      beta: decay used for computing the moving average m.
+      sign_decay_fn: decay function applied to the sign(g*m) quantity.
+          Takes global_step as an argument and returns the quantity to multiply
+          the sign(g*m) by.
+      use_locking: If True use locks for update operations.
+      name: Optional name for the operations created iwhen applying gradients.
+        Defaults to "PowerSignOptimizer".
+    """
+    super(PowerSignOptimizer, self).__init__(use_locking, name)
+    self._lr = learning_rate
+    self._beta = beta
+    self._logbase = math.log(base)
+
+    self._sign_decay_fn = sign_decay_fn
+
+    # Tensor versions of the constructor arguments, created in _prepare().
+    self._lr_t = None
+    self._beta_t = None
+    self._logbase_t = None
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    if self._sign_decay_fn is not None:
+      self._sign_decay_t = ops.convert_to_tensor(
+          self._sign_decay_fn(global_step), name='sign_decay')
+    return super(PowerSignOptimizer, self).apply_gradients(
+        grads_and_vars, global_step=global_step, name=name)
+
+  def _create_slots(self, var_list):
+    # Create slots for the first moment.
+    for v in var_list:
+      self._zeros_slot(v, 'm', self._name)
+
+  def _prepare(self):
+    self._lr_t = ops.convert_to_tensor(self._lr, name='learning_rate')
+    self._beta_t = ops.convert_to_tensor(self._beta, name='beta')
+    self._logbase_t = ops.convert_to_tensor(self._logbase, name='logbase')
+    if self._sign_decay_fn is None:
+      self._sign_decay_t = ops.convert_to_tensor(1.0, name='sign_decay')
+
+  def _apply_dense(self, grad, var):
+    m = self.get_slot(var, 'm')
+    return training_ops.apply_power_sign(
+        var,
+        m,
+        math_ops.cast(self._lr_t, var.dtype.base_dtype),
+        math_ops.cast(self._logbase_t, var.dtype.base_dtype),
+        math_ops.cast(self._sign_decay_t, var.dtype.base_dtype),
+        math_ops.cast(self._beta_t, var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking).op
+
+  def _resource_apply_dense(self, grad, var):
+    m = self.get_slot(var, 'm')
+    return training_ops.resource_apply_power_sign(
+        var.handle,
+        m.handle,
+        math_ops.cast(self._lr_t, var.dtype.base_dtype),
+        math_ops.cast(self._logbase_t, var.dtype.base_dtype),
+        math_ops.cast(self._sign_decay_t, var.dtype.base_dtype),
+        math_ops.cast(self._beta_t, var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var):
+    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
+    beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)
+    logbase_t = math_ops.cast(self._logbase_t, var.dtype.base_dtype)
+    e_t = math_ops.cast(math.e, var.dtype.base_dtype)
+
+    m = self.get_slot(var, 'm')
+    m_t = state_ops.assign(
+        m, (m * beta_t) + (grad * (1 - beta_t)), use_locking=self._use_locking)
+
+    sign_g = ops.IndexedSlices(
+        math_ops.sign(grad.values), grad.indices, dense_shape=grad.dense_shape)
+    sign_gm = ops.IndexedSlices(
+        array_ops.gather(math_ops.sign(m_t), sign_g.indices) * sign_g.values,
+        sign_g.indices,
+        dense_shape=sign_g.dense_shape)
+
+    sign_decayed = math_ops.cast(
+        self._sign_decay_t, var.dtype.base_dtype)
+    multiplier_values = math_ops.pow(
+        e_t, logbase_t * sign_decayed * sign_gm.values)
+    multiplier = ops.IndexedSlices(
+        multiplier_values, sign_gm.indices, dense_shape=sign_gm.dense_shape)
+
+    final_update = ops.IndexedSlices(
+        lr_t * multiplier.values * grad.values,
+        multiplier.indices,
+        dense_shape=multiplier.dense_shape)
+
+    var_update = state_ops.scatter_sub(
+        var,
+        final_update.indices,
+        final_update.values,
+        use_locking=self._use_locking)
+
+    return control_flow_ops.group(* [var_update, m_t])
diff --git a/tensorflow/contrib/opt/python/training/powersign_test.py b/tensorflow/contrib/opt/python/training/powersign_test.py
new file mode 100644
index 0000000000..ff7b1a72d4
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/powersign_test.py
@@ -0,0 +1,268 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for PowerSign."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import numpy as np
+
+from tensorflow.contrib.opt.python.training import powersign
+from tensorflow.contrib.opt.python.training import sign_decay
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+def py_linear_decay_fn(decay_steps):
+  def linear_decay(step):
+    step = min(step, decay_steps)
+    return float(decay_steps - step) / decay_steps
+  return linear_decay
+
+
+def powersign_update_numpy(params,
+                           g_t,
+                           m,
+                           lr,
+                           base=math.e,
+                           beta=0.9,
+                           py_sign_decay_fn=None,
+                           t=None):
+  m_t = beta * m + (1 - beta) * g_t
+  if py_sign_decay_fn is None:
+    sign_decayed = 1.0
+  else:
+    sign_decayed = py_sign_decay_fn(t-1)
+  multiplier = base ** (sign_decayed * np.sign(g_t) * np.sign(m_t))
+  params_t = params - lr * multiplier * g_t
+  return params_t, m_t
+
+
+class PowerSignTest(test.TestCase):
+
+  def _testDense(self,
+                 use_resource=False,
+                 learning_rate=0.1,
+                 sign_decay_fn=None,
+                 py_sign_decay_fn=None,
+                 base=math.e,
+                 beta=0.9):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.test_session(use_gpu=True):
+        # Initialize variables for numpy implementation.
+        m0, m1 = 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          var0 = resource_variable_ops.ResourceVariable(var0_np)
+          var1 = resource_variable_ops.ResourceVariable(var1_np)
+          global_step = resource_variable_ops.ResourceVariable(
+              0, trainable=False)
+        else:
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+          global_step = variables.Variable(
+              0, trainable=False)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+
+        opt = powersign.PowerSignOptimizer(
+            learning_rate=learning_rate,
+            base=base,
+            beta=beta,
+            sign_decay_fn=sign_decay_fn,
+        )
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                     global_step=global_step)
+        neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]),
+                                         global_step=global_step)
+
+        if context.in_graph_mode():
+          self.evaluate(variables.global_variables_initializer())
+          # Fetch params to validate initial values
+          self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+          self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+        # Run 7 steps of powersign
+        # first 4 steps with positive gradient
+        # last 3 steps with negative gradient (sign(gm) should be -1)
+        for t in range(1, 8):
+          if t < 5:
+            if context.in_graph_mode():
+              self.evaluate(update)
+            elif t > 1:
+              opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                  global_step=global_step)
+          else:
+            if context.in_graph_mode():
+              self.evaluate(neg_update)
+            elif t > 1:
+              opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]),
+                                  global_step=global_step)
+
+          var0_np, m0 = powersign_update_numpy(
+              var0_np,
+              grads0_np if t < 5 else -grads0_np,
+              m0,
+              learning_rate,
+              base=base,
+              beta=beta,
+              py_sign_decay_fn=py_sign_decay_fn,
+              t=t,
+          )
+          var1_np, m1 = powersign_update_numpy(
+              var1_np,
+              grads1_np if t < 5 else -grads1_np,
+              m1,
+              learning_rate,
+              base=base,
+              beta=beta,
+              py_sign_decay_fn=py_sign_decay_fn,
+              t=t,
+          )
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
+          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+
+  def testDense(self):
+    decay_steps = 10
+    sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps)
+    py_sign_decay_fn = py_linear_decay_fn(decay_steps)
+    self._testDense(use_resource=False)
+    self._testDense(use_resource=False,
+                    learning_rate=0.1,
+                    base=10.0,
+                    beta=0.8)
+    self._testDense(use_resource=False,
+                    sign_decay_fn=sign_decay_fn,
+                    py_sign_decay_fn=py_sign_decay_fn)
+
+    self._testDense(use_resource=True)
+    self._testDense(use_resource=True, learning_rate=0.1, base=10.0, beta=0.8)
+    self._testDense(use_resource=True,
+                    sign_decay_fn=sign_decay_fn,
+                    py_sign_decay_fn=py_sign_decay_fn)
+
+  def _testSparse(self,
+                  use_resource=False,
+                  learning_rate=0.1,
+                  sign_decay_fn=None,
+                  py_sign_decay_fn=None,
+                  base=math.e,
+                  beta=0.9):
+    with self.test_session(use_gpu=True):
+      for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+        # Initialize variables for numpy implementation.
+        m0, m1 = 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          var0 = resource_variable_ops.ResourceVariable(var0_np)
+          var1 = resource_variable_ops.ResourceVariable(var1_np)
+          global_step = resource_variable_ops.ResourceVariable(
+              0, trainable=False)
+        else:
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+          global_step = variables.Variable(
+              0, trainable=False)
+        grads0_np_indices = np.array([0, 1], dtype=np.int32)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(grads0_np),
+            constant_op.constant(grads0_np_indices), constant_op.constant([2]))
+        grads1_np_indices = np.array([0, 1], dtype=np.int32)
+        grads1 = ops.IndexedSlices(
+            constant_op.constant(grads1_np),
+            constant_op.constant(grads1_np_indices), constant_op.constant([2]))
+        opt = powersign.PowerSignOptimizer(
+            learning_rate=learning_rate,
+            base=base,
+            beta=beta,
+            sign_decay_fn=sign_decay_fn,
+        )
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                     global_step=global_step)
+        neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]),
+                                         global_step=global_step)
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        # Run 3 steps of powersign
+        # first 4 steps with positive gradient
+        # last 3 steps with negative gradient (sign(gm) should be -1)
+        for t in range(1, 8):
+          if t < 5:
+            update.run()
+          else:
+            neg_update.run()
+
+          var0_np, m0 = powersign_update_numpy(
+              var0_np,
+              grads0_np if t < 5 else -grads0_np,
+              m0,
+              learning_rate,
+              base=base,
+              beta=beta,
+              py_sign_decay_fn=py_sign_decay_fn,
+              t=t,
+          )
+          var1_np, m1 = powersign_update_numpy(
+              var1_np,
+              grads1_np if t < 5 else -grads1_np,
+              m1,
+              learning_rate,
+              base=base,
+              beta=beta,
+              py_sign_decay_fn=py_sign_decay_fn,
+              t=t,
+          )
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  def testSparse(self):
+    decay_steps = 10
+    sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps)
+    py_sign_decay_fn = py_linear_decay_fn(decay_steps)
+    self._testSparse(use_resource=False)
+    self._testSparse(use_resource=False,
+                     learning_rate=0.01,
+                     base=2.0,
+                     beta=0.8)
+    self._testSparse(use_resource=False,
+                     sign_decay_fn=sign_decay_fn,
+                     py_sign_decay_fn=py_sign_decay_fn)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/opt/python/training/sign_decay.py b/tensorflow/contrib/opt/python/training/sign_decay.py
new file mode 100644
index 0000000000..e8870c0721
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/sign_decay.py
@@ -0,0 +1,158 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the sign decay functions used in PowerSign and AddSign.
+
+See [Bello et al., ICML 2017] Neural Optimizer Search with Reinforcement
+Learning for details.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+
+
+def get_linear_decay_fn(decay_steps):
+  """Returns a function that computes a linear decay.
+
+  This decay computes linear annealing:
+    max(0, (decay_steps - global_step) / decay_steps)
+
+  Example usage:
+  ```
+  decay_steps = 1000
+  linear_decay_fn = get_linear_decay_fn(decay_steps)
+  decayed = linear_decay_fn(global_step)
+  x *= decayed
+  ```
+  Args:
+    decay_steps: number of steps to decay over.
+  Returns:
+    linear_decay_fn: a function that computes the linear decay.
+  """
+  # pylint:disable=missing-docstring
+  def linear_decay_fn(global_step):
+    if global_step is None:
+      raise ValueError("global_step is required for linear_decay.")
+    global_step = math_ops.minimum(global_step, decay_steps)
+    remaining_steps = math_ops.to_int32(decay_steps) - math_ops.to_int32(
+        global_step)
+    decayed = math_ops.to_float(remaining_steps) / math_ops.to_float(
+        decay_steps)
+    return math_ops.maximum(0.0, decayed)
+  # pylint:enable=missing-docstring
+  return linear_decay_fn
+
+
+def get_cosine_decay_fn(decay_steps, num_periods=0.5, zero_after=None):
+  """Returns a function that computes a cosine decay.
+
+  This decay computes cosine annealing:
+    0.5 * (1.0 + cos(2.0 * pi * num_periods * global_step / decay_steps))
+
+  This decay can be used to decay the sign quantity in the AddSign and PowerSign
+  optimizers discovered in
+  [Bello et al., ICML 2017] Neural Optimizer Search with RL.
+
+  Example usage:
+  ```
+  decay_steps = 1000
+  num_periods = 2
+  cosine_decay_fn = get_cosine_decay_fn(decay_steps, num_periods=num_periods)
+  decayed = cosine_decay_fn(global_step)
+  x *= decayed
+  ```
+  Args:
+    decay_steps: number of steps to decay over.
+    num_periods: number of periods for cosine signal. 0.5 by default,
+      which maps the last decay step to 0.
+    zero_after: if not None, number after which the decay function
+      will just return 0.
+  Returns:
+    cosine_decay_fn: a function that computes the cosine decay.
+  """
+  # pylint:disable=missing-docstring
+  def cosine_decay_fn(global_step):
+    if global_step is None:
+      raise ValueError("global_step is required for cosine_decay.")
+    global_step = math_ops.minimum(global_step, decay_steps)
+    completed_fraction = math_ops.to_float(global_step) / math_ops.to_float(
+        decay_steps)
+    fraction = 2.0 * num_periods * completed_fraction
+    decayed = 0.5 * (
+        1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
+    if zero_after is not None:
+      decayed = array_ops.where(
+          math_ops.greater_equal(fraction, 2 * zero_after), 0.0, decayed)
+    return decayed
+  # pylint:enable=missing-docstring
+  return cosine_decay_fn
+
+
+def get_restart_decay_fn(decay_steps, num_periods=1, zero_after=None):
+  """Returns a function that computes a restart decay.
+
+  This decay computes
+    0.5 * (1.0 + cos(pi * (num_periods * global_step) % num_training_steps))
+
+  This is a simplified version of the restart decay introduced in
+  "SGDR: Stochastic Gradient Descent with Warm Restarts"
+  by Ilya Loshchilov & Frank Hutter, Proceedings of
+  ICLR'2017, available at https://arxiv.org/pdf/1608.03983.pdf
+
+  This decay can be used to decay the sign quantity in the AddSign and PowerSign
+  optimizers discovered in
+  [Bello et al., ICML 2017] Neural Optimizer Search with RL.
+
+  Example usage:
+  ```
+  decay_steps = 1000
+  num_periods = 2.0
+  restart_decay_fn = get_restart_decay_fn(decay_steps,
+                                          num_periods=num_periods)
+  decayed = restart_decay_fn(global_step)
+  x *= decayed
+  ```
+  Args:
+    decay_steps: number of steps to decay over.
+    num_periods: number of periods for cosine signal. 1 by default,
+      which maps the last decay step to 0.
+    zero_after: if not None, number after which the decay function
+      will return 0.
+  Returns:
+    restart_decay_fn: a function that computes the restart decay.
+  """
+  # pylint:disable=missing-docstring
+  def restart_decay_fn(global_step):
+    if global_step is None:
+      raise ValueError("global_step is required for cosine_decay.")
+    global_step = math_ops.minimum(global_step, decay_steps)
+    num = math_ops.mod(num_periods * math_ops.to_float(global_step),
+                       decay_steps)
+    fraction = num / math_ops.to_float(decay_steps)
+    decayed = 0.5 * (
+        1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
+    if zero_after is not None:
+      tmp = math_ops.to_float(
+          num_periods * global_step) / math_ops.to_float(decay_steps)
+      decayed = array_ops.where(
+          math_ops.greater_equal(tmp, zero_after), 0.0, decayed)
+    return decayed
+  # pylint:enable=missing-docstring
+  return restart_decay_fn
diff --git a/tensorflow/contrib/opt/python/training/sign_decay_test.py b/tensorflow/contrib/opt/python/training/sign_decay_test.py
new file mode 100644
index 0000000000..c31cb924ea
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/sign_decay_test.py
@@ -0,0 +1,110 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for sign_decay."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from tensorflow.contrib.opt.python.training import sign_decay
+from tensorflow.python.platform import test
+
+
+def py_linear_decay_fn(decay_steps):
+
+  def linear_decay(step):
+    step = min(step, decay_steps)
+    return float(decay_steps - step) / decay_steps
+
+  return linear_decay
+
+
+def py_cosine_decay_fn(decay_steps, num_periods=0.5, zero_after=None):
+
+  def cosine_decay(step):
+    step = min(step, decay_steps)
+    fraction = 2.0 * num_periods * step / float(decay_steps)
+    if zero_after is not None and fraction >= 2 * zero_after:
+      return 0.0
+    return 0.5 * (1.0 + math.cos(math.pi * fraction))
+
+  return cosine_decay
+
+
+def py_restart_decay_fn(decay_steps, num_periods=1, zero_after=None):
+
+  def restart_decay(step):
+    step = min(step, decay_steps)
+    tmp = num_periods * step / float(decay_steps)
+    fraction = (
+        num_periods * step % decay_steps) / float(decay_steps)
+    if zero_after is not None and tmp >= zero_after:
+      return 0
+    return 0.5 * (1.0 + math.cos(math.pi * fraction))
+
+  return restart_decay
+
+
+class SignDecaysTest(test.TestCase):
+
+  def testLinearDecay(self):
+    num_training_steps = 1000
+    linear_decay_fn = sign_decay.get_linear_decay_fn(num_training_steps)
+
+    for step in range(0, 1000, 100):
+      with self.test_session():
+        tf_decayed = linear_decay_fn(step).eval()
+        py_decayed = py_linear_decay_fn(num_training_steps)(step)
+        self.assertAlmostEqual(tf_decayed, py_decayed, places=4)
+
+  def testCosineDecay(self):
+    num_training_steps = 1000
+    cosine_decay_fn = sign_decay.get_cosine_decay_fn(num_training_steps)
+    cosine_decay_2_fn = sign_decay.get_cosine_decay_fn(
+        num_training_steps, num_periods=5, zero_after=2)
+
+    for step in range(0, 1000, 100):
+      with self.test_session():
+        tf_decayed = cosine_decay_fn(step).eval()
+        py_decayed = py_cosine_decay_fn(num_training_steps)(step)
+        self.assertAlmostEqual(tf_decayed, py_decayed, places=4)
+
+        tf_decayed = cosine_decay_2_fn(step).eval()
+        py_decayed = py_cosine_decay_fn(
+            num_training_steps, num_periods=5, zero_after=2)(step)
+        self.assertAlmostEqual(tf_decayed, py_decayed, places=4)
+
+  def testRestartDecay(self):
+    num_training_steps = 1000
+    restart_decay_fn = sign_decay.get_restart_decay_fn(num_training_steps)
+    restart_decay_2_fn = sign_decay.get_restart_decay_fn(
+        num_training_steps, num_periods=5, zero_after=2)
+
+    for step in range(0, 1000, 100):
+      with self.test_session():
+        tf_decayed = restart_decay_fn(step).eval()
+        py_decayed = py_restart_decay_fn(num_training_steps)(step)
+        self.assertAlmostEqual(tf_decayed, py_decayed, places=4)
+
+        tf_decayed = restart_decay_2_fn(step).eval()
+        py_decayed = py_restart_decay_fn(
+            num_training_steps, num_periods=5, zero_after=2)(step)
+        self.assertAlmostEqual(tf_decayed, py_decayed, places=4)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc
index 98dfa5a3dd..76c30c5a46 100644
--- a/tensorflow/core/kernels/training_ops.cc
+++ b/tensorflow/core/kernels/training_ops.cc
@@ -15,12 +15,13 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/training_ops.h"
 #include <algorithm>
+
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/training_op_helpers.h"
+#include "tensorflow/core/kernels/training_ops.h"
 #include "tensorflow/core/kernels/variable_ops.h"
 
 #ifdef TENSORFLOW_USE_SYCL
@@ -361,6 +362,37 @@ struct ApplyCenteredRMSProp<CPUDevice, T> {
   }
 };
 
+template <typename T>
+struct ApplyAddSign<CPUDevice, T> {
+  void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
+                  typename TTypes<T>::Flat m,
+                  typename TTypes<T>::ConstScalar lr,
+                  typename TTypes<T>::ConstScalar alpha,
+                  typename TTypes<T>::ConstScalar sign_decay,
+                  typename TTypes<T>::ConstScalar beta,
+                  typename TTypes<T>::ConstFlat grad) {
+    m.device(d) = m * beta() + grad * (static_cast<T>(1) - beta());
+    auto sign_gm = grad.sign() * m.sign();
+    var.device(d) -= lr() * (alpha() + sign_decay() * sign_gm) * grad;
+  }
+};
+
+template <typename T>
+struct ApplyPowerSign<CPUDevice, T> {
+  void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
+                  typename TTypes<T>::Flat m,
+                  typename TTypes<T>::ConstScalar lr,
+                  typename TTypes<T>::ConstScalar logbase,
+                  typename TTypes<T>::ConstScalar sign_decay,
+                  typename TTypes<T>::ConstScalar beta,
+                  typename TTypes<T>::ConstFlat grad) {
+    m.device(d) = m * beta() + grad * (static_cast<T>(1) - beta());
+    auto sign_gm = grad.sign() * m.sign();
+    auto grad_scale = (logbase() * sign_decay() * sign_gm).exp();
+    var.device(d) -= lr() * grad_scale * grad;
+  }
+};
+
 }  // namespace functor
 
 template <typename Device, typename T>
@@ -3243,4 +3275,220 @@ REGISTER_KERNELS(double, int64);
 
 #undef REGISTER_KERNELS
 
+
+template <typename Device, typename T>
+class ApplyAddSignOp : public OpKernel {
+ public:
+  explicit ApplyAddSignOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    auto locks =
+        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+
+    Tensor var;
+    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
+                            ctx, 0, use_exclusive_lock_, false, &var));
+    Tensor m;
+    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
+                            ctx, 1, use_exclusive_lock_, false, &m));
+    OP_REQUIRES(
+        ctx, var.IsInitialized(),
+        errors::FailedPrecondition(
+            "Attempting to use uninitialized variables: ", requested_input(0)));
+    OP_REQUIRES(
+        ctx, m.IsInitialized(),
+        errors::FailedPrecondition(
+            "Attempting to use uninitialized variables: ", requested_input(1)));
+    const Tensor& lr = ctx->input(2);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()),
+                errors::InvalidArgument("lr is not a scalar: ",
+                                        lr.shape().DebugString()));
+    const Tensor& alpha = ctx->input(3);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(alpha.shape()),
+                errors::InvalidArgument("alpha is not a scalar: ",
+                                        alpha.shape().DebugString()));
+    const Tensor& sign_decay = ctx->input(4);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(alpha.shape()),
+                errors::InvalidArgument("sign_decay is not a scalar: ",
+                                        sign_decay.shape().DebugString()));
+    const Tensor& beta = ctx->input(5);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta.shape()),
+                errors::InvalidArgument("beta is not a scalar: ",
+                                        beta.shape().DebugString()));
+    const Tensor& grad = ctx->input(6);
+    OP_REQUIRES(ctx, var.shape().IsSameSize(m.shape()),
+                errors::InvalidArgument("var and m do not have the same shape",
+                                        var.shape().DebugString(), " ",
+                                        m.shape().DebugString()));
+    OP_REQUIRES(
+        ctx, var.shape().IsSameSize(grad.shape()),
+        errors::InvalidArgument("var and grad do not have the same shape",
+                                var.shape().DebugString(), " ",
+                                grad.shape().DebugString()));
+
+    const Device& device = ctx->template eigen_device<Device>();
+    functor::ApplyAddSign<Device, T>()(
+        device, var.flat<T>(), m.flat<T>(), lr.scalar<T>(), alpha.scalar<T>(),
+        sign_decay.scalar<T>(), beta.scalar<T>(), grad.flat<T>());
+    MaybeForwardRefInputToRefOutput(ctx, 0, 0);
+  }
+
+ private:
+  bool use_exclusive_lock_;
+};
+
+#define REGISTER_KERNELS(D, T)                                        \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("ApplyAddSign").Device(DEVICE_##D).TypeConstraint<T>("T"), \
+      ApplyAddSignOp<D##Device, T>);                                  \
+  REGISTER_KERNEL_BUILDER(Name("ResourceApplyAddSign")                \
+                              .Device(DEVICE_##D)                     \
+                              .HostMemory("var")                      \
+                              .HostMemory("m")                        \
+                              .TypeConstraint<T>("T"),                \
+                          ApplyAddSignOp<D##Device, T>);
+#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
+
+TF_CALL_half(REGISTER_CPU_KERNELS);
+TF_CALL_float(REGISTER_CPU_KERNELS);
+TF_CALL_double(REGISTER_CPU_KERNELS);
+
+#if GOOGLE_CUDA
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+#define DECLARE_GPU_SPEC(T)                                               \
+  template <>                                                             \
+  void ApplyAddSign<GPUDevice, T>::operator()(                            \
+      const GPUDevice& d,                                                 \
+      typename TTypes<T>::Flat var,                                       \
+      typename TTypes<T>::Flat m,                                         \
+      typename TTypes<T>::ConstScalar lr,                                 \
+      typename TTypes<T>::ConstScalar alpha,                              \
+      typename TTypes<T>::ConstScalar sign_decay,                         \
+      typename TTypes<T>::ConstScalar beta,                               \
+      typename TTypes<T>::ConstFlat grad);                                \
+  extern template struct ApplyAddSign<GPUDevice, T>;
+DECLARE_GPU_SPEC(Eigen::half);
+DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(double);
+#undef DECLARE_GPU_SPEC
+}  // namespace functor
+
+REGISTER_KERNELS(GPU, Eigen::half);
+REGISTER_KERNELS(GPU, float);
+REGISTER_KERNELS(GPU, double);
+#endif
+#undef REGISTER_CPU_KERNELS
+#undef REGISTER_KERNELS
+
+
+template <typename Device, typename T>
+class ApplyPowerSignOp : public OpKernel {
+ public:
+  explicit ApplyPowerSignOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    auto locks =
+        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+
+    Tensor var;
+    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
+                            ctx, 0, use_exclusive_lock_, false, &var));
+    Tensor m;
+    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
+                            ctx, 1, use_exclusive_lock_, false, &m));
+    OP_REQUIRES(
+        ctx, var.IsInitialized(),
+        errors::FailedPrecondition(
+            "Attempting to use uninitialized variables: ", requested_input(0)));
+    OP_REQUIRES(
+        ctx, m.IsInitialized(),
+        errors::FailedPrecondition(
+            "Attempting to use uninitialized variables: ", requested_input(1)));
+    const Tensor& lr = ctx->input(2);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()),
+                errors::InvalidArgument("lr is not a scalar: ",
+                                        lr.shape().DebugString()));
+    const Tensor& logbase = ctx->input(3);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(logbase.shape()),
+                errors::InvalidArgument("logbase is not a scalar: ",
+                                        logbase.shape().DebugString()));
+    const Tensor& sign_decay = ctx->input(4);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(logbase.shape()),
+                errors::InvalidArgument("sign_decay is not a scalar: ",
+                                        sign_decay.shape().DebugString()));
+    const Tensor& beta = ctx->input(5);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta.shape()),
+                errors::InvalidArgument("beta is not a scalar: ",
+                                        beta.shape().DebugString()));
+    const Tensor& grad = ctx->input(6);
+    OP_REQUIRES(ctx, var.shape().IsSameSize(m.shape()),
+                errors::InvalidArgument("var and m do not have the same shape",
+                                        var.shape().DebugString(), " ",
+                                        m.shape().DebugString()));
+    OP_REQUIRES(
+        ctx, var.shape().IsSameSize(grad.shape()),
+        errors::InvalidArgument("var and grad do not have the same shape",
+                                var.shape().DebugString(), " ",
+                                grad.shape().DebugString()));
+
+    const Device& device = ctx->template eigen_device<Device>();
+    functor::ApplyPowerSign<Device, T>()(
+        device, var.flat<T>(), m.flat<T>(), lr.scalar<T>(), logbase.scalar<T>(),
+        sign_decay.scalar<T>(), beta.scalar<T>(), grad.flat<T>());
+    MaybeForwardRefInputToRefOutput(ctx, 0, 0);
+  }
+
+ private:
+  bool use_exclusive_lock_;
+};
+
+#define REGISTER_KERNELS(D, T)                                          \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("ApplyPowerSign").Device(DEVICE_##D).TypeConstraint<T>("T"), \
+      ApplyPowerSignOp<D##Device, T>);                                  \
+  REGISTER_KERNEL_BUILDER(Name("ResourceApplyPowerSign")                \
+                              .Device(DEVICE_##D)                       \
+                              .HostMemory("var")                        \
+                              .HostMemory("m")                          \
+                              .TypeConstraint<T>("T"),                  \
+                          ApplyPowerSignOp<D##Device, T>);
+#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
+
+TF_CALL_half(REGISTER_CPU_KERNELS);
+TF_CALL_float(REGISTER_CPU_KERNELS);
+TF_CALL_double(REGISTER_CPU_KERNELS);
+
+#if GOOGLE_CUDA
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+#define DECLARE_GPU_SPEC(T)                                               \
+  template <>                                                             \
+  void ApplyPowerSign<GPUDevice, T>::operator()(                          \
+      const GPUDevice& d,                                                 \
+      typename TTypes<T>::Flat var,                                       \
+      typename TTypes<T>::Flat m,                                         \
+      typename TTypes<T>::ConstScalar lr,                                 \
+      typename TTypes<T>::ConstScalar logbase,                            \
+      typename TTypes<T>::ConstScalar sign_decay,                         \
+      typename TTypes<T>::ConstScalar beta,                               \
+      typename TTypes<T>::ConstFlat grad);                                \
+  extern template struct ApplyPowerSign<GPUDevice, T>;
+DECLARE_GPU_SPEC(Eigen::half);
+DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(double);
+#undef DECLARE_GPU_SPEC
+}  // namespace functor
+
+REGISTER_KERNELS(GPU, Eigen::half);
+REGISTER_KERNELS(GPU, float);
+REGISTER_KERNELS(GPU, double);
+#endif
+#undef REGISTER_CPU_KERNELS
+#undef REGISTER_KERNELS
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/training_ops.h b/tensorflow/core/kernels/training_ops.h
index 99a714e0a2..7ee956053a 100644
--- a/tensorflow/core/kernels/training_ops.h
+++ b/tensorflow/core/kernels/training_ops.h
@@ -161,6 +161,29 @@ struct ApplyCenteredRMSProp {
                   typename TTypes<T>::ConstScalar epsilon,
                   typename TTypes<T>::ConstFlat grad);
 };
+
+template <typename Device, typename T>
+struct ApplyAddSign {
+  void operator()(const Device& d, typename TTypes<T>::Flat var,
+                  typename TTypes<T>::Flat m,
+                  typename TTypes<T>::ConstScalar lr,
+                  typename TTypes<T>::ConstScalar alpha,
+                  typename TTypes<T>::ConstScalar sign_decay,
+                  typename TTypes<T>::ConstScalar beta,
+                  typename TTypes<T>::ConstFlat grad);
+};
+
+template <typename Device, typename T>
+struct ApplyPowerSign {
+  void operator()(const Device& d, typename TTypes<T>::Flat var,
+                  typename TTypes<T>::Flat m,
+                  typename TTypes<T>::ConstScalar lr,
+                  typename TTypes<T>::ConstScalar logbase,
+                  typename TTypes<T>::ConstScalar sign_decay,
+                  typename TTypes<T>::ConstScalar beta,
+                  typename TTypes<T>::ConstFlat grad);
+};
+
 }  // end namespace functor
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc
index 3678b96e98..f501161095 100644
--- a/tensorflow/core/kernels/training_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc
@@ -193,6 +193,71 @@ struct ApplyCenteredRMSProp<GPUDevice, T> {
   }
 };
 
+template <typename T>
+struct ApplyAddSign<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
+                  typename TTypes<T>::Flat m,
+                  typename TTypes<T>::ConstScalar lr,
+                  typename TTypes<T>::ConstScalar alpha,
+                  typename TTypes<T>::ConstScalar sign_decay,
+                  typename TTypes<T>::ConstScalar beta,
+                  typename TTypes<T>::ConstFlat grad) {
+    Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
+    bcast[0] = grad.dimension(0);
+    Eigen::Sizes<1> single;
+
+    // The following is the GPU equivalent of the CPU version:
+    // m.device(d) = m * beta() + grad * (static_cast<T>(1) - beta());
+    const auto one = static_cast<T>(1.0);
+    auto beta_bcast = beta.reshape(single).broadcast(bcast);
+    auto one_minus_beta =
+        (beta.constant(one) - beta).reshape(single).broadcast(bcast);
+    m.device(d) =  m * beta_bcast + grad * one_minus_beta;
+
+    // The following is the GPU equivalent of the CPU version:
+    // var.device(d) -= lr() * (alpha() + sign_decay() * sign_gm) * grad;
+    auto sign_gm = grad.sign() * m.sign();
+    auto lr_bcast = lr.reshape(single).broadcast(bcast);
+    auto alpha_bcast = alpha.reshape(single).broadcast(bcast);
+    auto sign_decay_bcast = sign_decay.reshape(single).broadcast(bcast);
+    var.device(d) -=
+        lr_bcast * (alpha_bcast + sign_decay_bcast * sign_gm) * grad;
+  }
+};
+
+template <typename T>
+struct ApplyPowerSign<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
+                  typename TTypes<T>::Flat m,
+                  typename TTypes<T>::ConstScalar lr,
+                  typename TTypes<T>::ConstScalar logbase,
+                  typename TTypes<T>::ConstScalar sign_decay,
+                  typename TTypes<T>::ConstScalar beta,
+                  typename TTypes<T>::ConstFlat grad) {
+    Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
+    bcast[0] = grad.dimension(0);
+    Eigen::Sizes<1> single;
+
+    // The following is the GPU equivalent of the CPU version:
+    // m.device(d) = m * beta() + grad * (static_cast<T>(1) - beta());
+    const auto one = static_cast<T>(1.0);
+    auto beta_bcast = beta.reshape(single).broadcast(bcast);
+    auto one_minus_beta =
+        (beta.constant(one) - beta).reshape(single).broadcast(bcast);
+    m.device(d) =  m * beta_bcast + grad * one_minus_beta;
+
+    // The following is the GPU equivalent of the CPU version:
+    // auto grad_scale = (logbase() * sign_decay() * sign_gm).exp();
+    // var.device(d) -= lr() * grad_scale * grad;
+    auto sign_gm = grad.sign() * m.sign();
+    auto lr_bcast = lr.reshape(single).broadcast(bcast);
+    auto logbase_bcast = logbase.reshape(single).broadcast(bcast);
+    auto sign_decay_bcast = sign_decay.reshape(single).broadcast(bcast);
+    auto grad_scale =  (logbase_bcast * sign_decay_bcast * sign_gm).exp();
+    var.device(d) -= lr_bcast * grad_scale * grad;
+  }
+};
+
 }  // namespace functor
 
 template struct functor::ApplyGradientDescent<GPUDevice, Eigen::half>;
@@ -222,6 +287,15 @@ template struct functor::ApplyRMSProp<GPUDevice, double>;
 template struct functor::ApplyCenteredRMSProp<GPUDevice, Eigen::half>;
 template struct functor::ApplyCenteredRMSProp<GPUDevice, float>;
 template struct functor::ApplyCenteredRMSProp<GPUDevice, double>;
+
+template struct functor::ApplyAddSign<GPUDevice, Eigen::half>;
+template struct functor::ApplyAddSign<GPUDevice, float>;
+template struct functor::ApplyAddSign<GPUDevice, double>;
+
+template struct functor::ApplyPowerSign<GPUDevice, Eigen::half>;
+template struct functor::ApplyPowerSign<GPUDevice, float>;
+template struct functor::ApplyPowerSign<GPUDevice, double>;
+
 }  // end namespace tensorflow
 
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/training_ops_test.cc b/tensorflow/core/kernels/training_ops_test.cc
index 4b1c9eb8bb..ffa7f87c9e 100644
--- a/tensorflow/core/kernels/training_ops_test.cc
+++ b/tensorflow/core/kernels/training_ops_test.cc
@@ -233,4 +233,78 @@ static void BM_RMSProp(int iters, int params) {
 }
 BENCHMARK(BM_RMSProp)->Arg(128 << 10)->Arg(256 << 10);
 
+static void AddSign(int32 n, Graph** init_g, Graph** train_g) {
+  TensorShape shape({n});
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto m = Var(g, n);
+    auto zero = Zeros(g, n);
+    test::graph::Assign(g, var, zero);
+    test::graph::Assign(g, m, zero);
+    *init_g = g;
+  }
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto m = Var(g, n);
+    auto lr = Scalar(g, 0.01);
+    auto alpha = Scalar(g, 0.1);
+    auto sign_decay = Scalar(g, 0.9);
+    auto beta = Scalar(g, 0.8);
+    auto grad = Random(g, n);
+    test::graph::Multi(g, "ApplyAddSign",
+                       {var, m, lr, alpha, sign_decay, beta, grad});
+    *train_g = g;
+  }
+}
+
+static void BM_AddSign(int iters, int params) {
+  const int64 tot = static_cast<int64>(iters) * params;
+  testing::ItemsProcessed(tot);
+  testing::BytesProcessed(tot * sizeof(float));
+  Graph* init;
+  Graph* train;
+  AddSign(params, &init, &train);
+  test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
+}
+BENCHMARK(BM_AddSign)->Arg(128 << 10)->Arg(256 << 10);
+
+static void PowerSign(int32 n, Graph** init_g, Graph** train_g) {
+  TensorShape shape({n});
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto m = Var(g, n);
+    auto zero = Zeros(g, n);
+    test::graph::Assign(g, var, zero);
+    test::graph::Assign(g, m, zero);
+    *init_g = g;
+  }
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto m = Var(g, n);
+    auto lr = Scalar(g, 0.01);
+    auto logbase = Scalar(g, 2);
+    auto sign_decay = Scalar(g, 0.9);
+    auto beta = Scalar(g, 0.8);
+    auto grad = Random(g, n);
+    test::graph::Multi(g, "ApplyPowerSign",
+                       {var, m, lr, logbase, sign_decay, beta, grad});
+    *train_g = g;
+  }
+}
+
+static void BM_PowerSign(int iters, int params) {
+  const int64 tot = static_cast<int64>(iters) * params;
+  testing::ItemsProcessed(tot);
+  testing::BytesProcessed(tot * sizeof(float));
+  Graph* init;
+  Graph* train;
+  PowerSign(params, &init, &train);
+  test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
+}
+BENCHMARK(BM_PowerSign)->Arg(128 << 10)->Arg(256 << 10);
+
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/training_ops.cc b/tensorflow/core/ops/training_ops.cc
index 6f06b87d58..405318caf2 100644
--- a/tensorflow/core/ops/training_ops.cc
+++ b/tensorflow/core/ops/training_ops.cc
@@ -22,6 +22,48 @@ using shape_inference::DimensionHandle;
 using shape_inference::InferenceContext;
 using shape_inference::ShapeHandle;
 
+const char kAddSignCommonDocStr[] = R"doc(
+Update '*var' according to the AddSign update.
+
+m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+update <- (alpha + sign_decay * sign(g) *sign(m)) * g
+variable <- variable - lr_t * update
+
+var: Should be from a Variable().
+m: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+sign_decay: Must be a scalar.
+alpha: Must be a scalar.
+beta: Must be a scalar.
+grad: The gradient.
+)doc";
+
+const char kPowerSignCommonDocStr[] = R"doc(
+Update '*var' according to the AddSign update.
+
+m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
+variable <- variable - lr_t * update
+
+var: Should be from a Variable().
+m: Should be from a Variable().
+lr: Scaling factor. Must be a scalar.
+logbase: Must be a scalar.
+sign_decay: Must be a scalar.
+beta: Must be a scalar.
+grad: The gradient.
+)doc";
+
+const char kOutDocStr[] = R"doc(
+out: Same as "var".
+)doc";
+
+const char kLockDocStr[] = R"doc(
+use_locking: If `True`, updating of the var and m tensors is
+  protected by a lock; otherwise the behavior is undefined, but may exhibit less
+  contention.
+)doc";
+
 static ShapeHandle ShapeOrHandleShape(InferenceContext* c, int input) {
   auto* handle_data = c->input_handle_shapes_and_types(input);
   if (handle_data != nullptr && !handle_data->empty() &&
@@ -1796,4 +1838,99 @@ use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
   contention.
 )doc");
 
+static Status ApplyAddSignShapeFn(InferenceContext* c, bool sparse) {
+  ShapeHandle unused;
+  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
+  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // m
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));       // lr
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));       // alpha
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));       // sign_decay
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));       // beta
+  TF_RETURN_IF_ERROR(
+      HandleGradAndIndicesInputs(c, sparse, 6 /* grad_idx */, &s));
+  if (c->num_outputs() > 0) {
+    c->set_output(0, s);
+  }
+  return Status::OK();
+}
+
+REGISTER_OP("ApplyAddSign")
+    .Input("var: Ref(T)")
+    .Input("m: Ref(T)")
+    .Input("lr: T")
+    .Input("alpha: T")
+    .Input("sign_decay: T")
+    .Input("beta: T")
+    .Input("grad: T")
+    .Output("out: Ref(T)")
+    .Attr("T: numbertype")
+    .Attr("use_locking: bool = false")
+    .SetShapeFn([](InferenceContext* c) {
+      return ApplyAddSignShapeFn(c, /*sparse=*/false);
+    })
+    .Doc(strings::StrCat(kAddSignCommonDocStr, kOutDocStr, kLockDocStr));
+
+REGISTER_OP("ResourceApplyAddSign")
+    .Input("var: resource")
+    .Input("m: resource")
+    .Input("lr: T")
+    .Input("alpha: T")
+    .Input("sign_decay: T")
+    .Input("beta: T")
+    .Input("grad: T")
+    .Attr("T: numbertype")
+    .Attr("use_locking: bool = false")
+    .SetShapeFn([](InferenceContext* c) {
+      return ApplyAddSignShapeFn(c, /*sparse=*/false);
+    })
+    .Doc(strings::StrCat(kAddSignCommonDocStr, kLockDocStr));
+
+static Status ApplyPowerSignShapeFn(InferenceContext* c, bool sparse) {
+  ShapeHandle unused;
+  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
+  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // m
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));       // lr
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));       // logbase
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));       // sign_delay
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));       // beta
+  TF_RETURN_IF_ERROR(
+      HandleGradAndIndicesInputs(c, sparse, 6 /* grad_idx */, &s));
+  if (c->num_outputs() > 0) {
+    c->set_output(0, s);
+  }
+  return Status::OK();
+}
+
+REGISTER_OP("ApplyPowerSign")
+    .Input("var: Ref(T)")
+    .Input("m: Ref(T)")
+    .Input("lr: T")
+    .Input("logbase: T")
+    .Input("sign_decay: T")
+    .Input("beta: T")
+    .Input("grad: T")
+    .Output("out: Ref(T)")
+    .Attr("T: numbertype")
+    .Attr("use_locking: bool = false")
+    .SetShapeFn([](InferenceContext* c) {
+      return ApplyPowerSignShapeFn(c, /*sparse=*/false);
+    })
+    .Doc(strings::StrCat(kPowerSignCommonDocStr, kOutDocStr, kLockDocStr));
+
+REGISTER_OP("ResourceApplyPowerSign")
+    .Input("var: resource")
+    .Input("m: resource")
+    .Input("lr: T")
+    .Input("logbase: T")
+    .Input("sign_decay: T")
+    .Input("beta: T")
+    .Input("grad: T")
+    .Attr("T: numbertype")
+    .Attr("use_locking: bool = false")
+    .SetShapeFn([](InferenceContext* c) {
+      return ApplyPowerSignShapeFn(c, /*sparse=*/false);
+    })
+    .Doc(strings::StrCat(kPowerSignCommonDocStr, kLockDocStr));
+
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/training_ops_test.cc b/tensorflow/core/ops/training_ops_test.cc
index 92d5ad9964..de4e3cd9e7 100644
--- a/tensorflow/core/ops/training_ops_test.cc
+++ b/tensorflow/core/ops/training_ops_test.cc
@@ -332,4 +332,38 @@ TEST(TrainingOpsTest, SparseApplyRMSProp_ShapeFn) {
   INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;?;?;[?];?;?");
 }
 
+TEST(TrainingOpsTest, ApplyAddSign_ShapeFn) {
+  ShapeInferenceTestOp op("ApplyAddSign");
+
+  // Output is a merge of inputs 0, 1, and 6 (var, ms, and grad).
+  INFER_OK(op, "[1,?,?];[?,2,?];[];[];[];[];[?,?,2]", "[d0_0,d1_1,d6_2]");
+  INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op,
+              "[1];[2];[];[];[];[];[1]");
+  INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op,
+              "[1];[1];[];[];[];[];[2]");
+
+  // lr, alpha, sign_decay, and beta must be scalars.
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;[?];?;?;?;?");
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;[?];?;?;?");
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;[?];?;?");
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;?;[?];?");
+}
+
+TEST(TrainingOpsTest, ApplyPowerSign_ShapeFn) {
+  ShapeInferenceTestOp op("ApplyPowerSign");
+
+  // Output is a merge of inputs 0, 1, and 6 (var, ms, and grad).
+  INFER_OK(op, "[1,?,?];[?,2,?];[];[];[];[];[?,?,2]", "[d0_0,d1_1,d6_2]");
+  INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op,
+              "[1];[2];[];[];[];[];[1]");
+  INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op,
+              "[1];[1];[];[];[];[];[2]");
+
+  // lr, logbase, sign_decay, and beta must be scalars.
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;[?];?;?;?;?");
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;[?];?;?;?");
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;[?];?;?");
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;?;[?];?");
+}
+
 }  // end namespace tensorflow
-- 
GitLab


From aab5a41eb139812a50a728a9e888bb0290c4c95e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 12:20:44 -0800
Subject: [PATCH 0059/1225] Update ops-related pbtxt files.

PiperOrigin-RevId: 176143870
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 264 +++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 306 ++++++++++++++++++
 2 files changed, 570 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index daeb6763c8..c7a296d938 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -1536,6 +1536,75 @@ op {
     }
   }
 }
+op {
+  name: "ApplyAddSign"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "m"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "ApplyCenteredRMSProp"
   input_arg {
@@ -2228,6 +2297,75 @@ op {
     }
   }
 }
+op {
+  name: "ApplyPowerSign"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "m"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "ApplyProximalAdagrad"
   input_arg {
@@ -26847,6 +26985,69 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "ResourceApplyAddSign"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "ResourceApplyCenteredRMSProp"
   input_arg {
@@ -27473,6 +27674,69 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "ResourceApplyPowerSign"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "ResourceApplyProximalAdagrad"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 55a8fc9032..d043696a94 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -1103,6 +1103,86 @@ op {
   summary: "Update \'*var\' according to the Adam algorithm."
   description: "lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)\nm_t <- beta1 * m_{t-1} + (1 - beta1) * g_t\nv_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t\nvariable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)"
 }
+op {
+  name: "ApplyAddSign"
+  input_arg {
+    name: "var"
+    description: "Should be from a Variable()."
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "m"
+    description: "Should be from a Variable()."
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    description: "Scaling factor. Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "alpha"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    description: "The gradient."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    description: "Same as \"var\"."
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
+  }
+  summary: "Update \'*var\' according to the AddSign update."
+  description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- (alpha + sign_decay * sign(g) *sign(m)) * g\nvariable <- variable - lr_t * update"
+}
 op {
   name: "ApplyCenteredRMSProp"
   input_arg {
@@ -1506,6 +1586,86 @@ op {
   summary: "Update \'*var\' according to the momentum scheme. Set use_nesterov = True if you"
   description: "want to use Nesterov momentum.\n\naccum = accum * momentum + grad\nvar -= lr * accum"
 }
+op {
+  name: "ApplyPowerSign"
+  input_arg {
+    name: "var"
+    description: "Should be from a Variable()."
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "m"
+    description: "Should be from a Variable()."
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    description: "Scaling factor. Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "logbase"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    description: "The gradient."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    description: "Same as \"var\"."
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
+  }
+  summary: "Update \'*var\' according to the AddSign update."
+  description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g\nvariable <- variable - lr_t * update"
+}
 op {
   name: "ApplyProximalAdagrad"
   input_arg {
@@ -21774,6 +21934,79 @@ op {
   description: "lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)\nm_t <- beta1 * m_{t-1} + (1 - beta1) * g_t\nv_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t\nvariable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)"
   is_stateful: true
 }
+op {
+  name: "ResourceApplyAddSign"
+  input_arg {
+    name: "var"
+    description: "Should be from a Variable()."
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    description: "Should be from a Variable()."
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    description: "Scaling factor. Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "alpha"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    description: "The gradient."
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
+  }
+  summary: "Update \'*var\' according to the AddSign update."
+  description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- (alpha + sign_decay * sign(g) *sign(m)) * g\nvariable <- variable - lr_t * update"
+  is_stateful: true
+}
 op {
   name: "ResourceApplyCenteredRMSProp"
   input_arg {
@@ -22139,6 +22372,79 @@ op {
   description: "want to use Nesterov momentum.\n\naccum = accum * momentum + grad\nvar -= lr * accum"
   is_stateful: true
 }
+op {
+  name: "ResourceApplyPowerSign"
+  input_arg {
+    name: "var"
+    description: "Should be from a Variable()."
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    description: "Should be from a Variable()."
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    description: "Scaling factor. Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "logbase"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    description: "Must be a scalar."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    description: "The gradient."
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
+  }
+  summary: "Update \'*var\' according to the AddSign update."
+  description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g\nvariable <- variable - lr_t * update"
+  is_stateful: true
+}
 op {
   name: "ResourceApplyProximalAdagrad"
   input_arg {
-- 
GitLab


From d32150d14f1651e20bafa07e6f1b51a32fd75999 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 12:30:54 -0800
Subject: [PATCH 0060/1225] Go: Update generated wrapper functions for
 TensorFlow ops.

PiperOrigin-RevId: 176145113
---
 tensorflow/go/op/wrappers.go | 2276 ++++++++++++++++++----------------
 1 file changed, 1187 insertions(+), 1089 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 1d1383ec82..664e37d3a1 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -2681,21 +2681,6 @@ func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) {
 	return scope.AddOperation(opspec)
 }
 
-// Does nothing. Serves as a control trigger for scheduling.
-//
-// Only useful as a placeholder for control edges.
-//
-// Returns the created operation.
-func ControlTrigger(scope *Scope) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ControlTrigger",
-	}
-	return scope.AddOperation(opspec)
-}
-
 // SpaceToDepthAttr is an optional argument to SpaceToDepth.
 type SpaceToDepthAttr func(optionalAttr)
 
@@ -8123,88 +8108,82 @@ func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3.
-type QuantizeAndDequantizeV3Attr func(optionalAttr)
-
-// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["signed_input"] = value
-	}
-}
-
-// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["range_given"] = value
-	}
-}
-
-// Quantizes then dequantizes a tensor.
+// Merges summaries.
 //
-// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
-// tensor, so its value can change during training.
-func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) {
+// This op creates a
+// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+// protocol buffer that contains the union of all the values in the input
+// summaries.
+//
+// When the Op is run, it reports an `InvalidArgument` error if multiple values
+// in the summaries to merge use the same tag.
+//
+// Arguments:
+//	inputs: Can be of any shape.  Each must contain serialized `Summary` protocol
+// buffers.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "QuantizeAndDequantizeV3",
+		Type: "MergeSummary",
 		Input: []tf.Input{
-			input, input_min, input_max, num_bits,
+			tf.OutputList(inputs),
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// AvgPool3DAttr is an optional argument to AvgPool3D.
-type AvgPool3DAttr func(optionalAttr)
+// AudioSummaryV2Attr is an optional argument to AudioSummaryV2.
+type AudioSummaryV2Attr func(optionalAttr)
 
-// AvgPool3DDataFormat sets the optional data_format attribute to value.
+// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func AvgPool3DDataFormat(value string) AvgPool3DAttr {
+// value: Max number of batch elements to generate audio for.
+// If not specified, defaults to 3
+//
+// REQUIRES: value >= 1
+func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["max_outputs"] = value
 	}
 }
 
-// Performs 3D average pooling on the input.
+// Outputs a `Summary` protocol buffer with audio.
+//
+// The summary has up to `max_outputs` summary values containing audio. The
+// audio is built from `tensor` which must be 3-D with shape `[batch_size,
+// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
+// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
+// *  If `max_outputs` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 2-D of shape `[batch_size, frames]`.
+//	sample_rate: The sample rate of the signal in hertz.
 //
-// Returns The average pooled output tensor.
-func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) {
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AvgPool3D",
+		Type: "AudioSummaryV2",
 		Input: []tf.Input{
-			input,
+			tag, tensor, sample_rate,
 		},
 		Attrs: attrs,
 	}
@@ -8212,35 +8191,6 @@ func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa
 	return op.Output(0)
 }
 
-// Produces the max pool of the input tensor for quantized types.
-//
-// Arguments:
-//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	ksize: The size of the window for each dimension of the input tensor.
-// The length must be 4 to match the number of dimensions of the input.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor. The length must be 4 to match the number of dimensions of the input.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "QuantizedMaxPool",
-		Input: []tf.Input{
-			input, min_input, max_input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
 // Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2.
 type Conv3DBackpropInputV2Attr func(optionalAttr)
 
@@ -8725,32 +8675,6 @@ func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad
 	return op.Output(0)
 }
 
-// Adds `bias` to `value`.
-//
-// This is a deprecated version of BiasAdd and will be soon removed.
-//
-// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
-// Broadcasting is supported, so `value` may have any number of dimensions.
-//
-// Arguments:
-//	value: Any number of dimensions.
-//	bias: 1-D with size the last dimension of `value`.
-//
-// Returns Broadcasted sum of `value` and `bias`.
-func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BiasAddV1",
-		Input: []tf.Input{
-			value, bias,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // EncodeJpegAttr is an optional argument to EncodeJpeg.
 type EncodeJpegAttr func(optionalAttr)
 
@@ -13128,41 +13052,264 @@ func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Outp
 	return op.Output(0)
 }
 
-// Batch normalization.
+// WriteImageSummaryAttr is an optional argument to WriteImageSummary.
+type WriteImageSummaryAttr func(optionalAttr)
+
+// WriteImageSummaryMaxImages sets the optional max_images attribute to value.
 //
-// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
+// value: Max number of batch elements to generate images for.
+// If not specified, defaults to 3
 //
-// This op is deprecated. Prefer `tf.nn.batch_normalization`.
+// REQUIRES: value >= 1
+func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr {
+	return func(m optionalAttr) {
+		m["max_images"] = value
+	}
+}
+
+// Writes a `Summary` protocol buffer with images.
+//
+// The summary has up to `max_images` summary values containing images. The
+// images are built from `tensor` which must be 4-D with shape `[batch_size,
+// height, width, channels]` and where `channels` can be:
+//
+// *  1: `tensor` is interpreted as Grayscale.
+// *  3: `tensor` is interpreted as RGB.
+// *  4: `tensor` is interpreted as RGBA.
+//
+// The images have the same number of channels as the input tensor. For float
+// input, the values are normalized one image at a time to fit in the range
+// `[0, 255]`.  `uint8` values are unchanged.  The op uses two different
+// normalization algorithms:
+//
+// *  If the input values are all positive, they are rescaled so the largest one
+//    is 255.
+//
+// *  If any input value is negative, the values are shifted so input value 0.0
+//    is at 127.  They are then rescaled so that either the smallest value is 0,
+//    or the largest one is 255.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_images` is 1, the summary value tag is '*tag*/image'.
+// *  If `max_images` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
+//
+// The `bad_color` argument is the color to use in the generated images for
+// non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
+// Each element must be in the range `[0, 255]` (It represents the value of a
+// pixel in the output image).  Non-finite values in the input tensor are
+// replaced by this tensor in the output image.  The default value is the color
+// red.
 //
 // Arguments:
-//	t: A 4D input Tensor.
-//	m: A 1D mean Tensor with size matching the last dimension of t.
-// This is the first output from tf.nn.moments,
-// or a saved moving average thereof.
-//	v: A 1D variance Tensor with size matching the last dimension of t.
-// This is the second output from tf.nn.moments,
-// or a saved moving average thereof.
-//	beta: A 1D beta Tensor with size matching the last dimension of t.
-// An offset to be added to the normalized tensor.
-//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
-// If "scale_after_normalization" is true, this tensor will be multiplied
-// with the normalized tensor.
-//	variance_epsilon: A small float number to avoid dividing by 0.
-//	scale_after_normalization: A bool indicating whether the resulted tensor
-// needs to be multiplied with gamma.
-func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) {
+//	writer: A handle to a summary writer.
+//	step: The step to write the summary for.
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 4-D of shape `[batch_size, height, width, channels]` where
+// `channels` is 1, 3, or 4.
+//	bad_color: Color to use for pixels with non-finite values.
+//
+// Returns the created operation.
+func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "BatchNormWithGlobalNormalization",
+		Type: "WriteImageSummary",
 		Input: []tf.Input{
-			t, m, v, beta, gamma,
+			writer, step, tag, tensor, bad_color,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
+	return scope.AddOperation(opspec)
+}
+
+// Pads a tensor with zeros.
+//
+// This operation pads a `input` with zeros according to the `paddings` you
+// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
+// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+// how many zeros to add before the contents of `input` in that dimension, and
+// `paddings[D, 1]` indicates how many zeros to add after the contents of `input`
+// in that dimension.
+//
+// The padded size of each dimension D of the output is:
+//
+// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+//
+// For example:
+//
+// ```
+// # 't' is [[1, 1], [2, 2]]
+// # 'paddings' is [[1, 1], [2, 2]]
+// # rank of 't' is 2
+// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
+//                       [0, 0, 1, 1, 0, 0]
+//                       [0, 0, 2, 2, 0, 0]
+//                       [0, 0, 0, 0, 0, 0]]
+// ```
+func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Pad",
+		Input: []tf.Input{
+			input, paddings,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the number of elements in the given queue.
+//
+// Arguments:
+//	handle: The handle to a queue.
+//
+// Returns The number of elements in the given queue.
+func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueSizeV2",
+		Input: []tf.Input{
+			handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs a `Summary` protocol buffer with a histogram.
+//
+// The generated
+// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+// has one summary value containing a histogram for `values`.
+//
+// This op reports an `InvalidArgument` error if any value is not finite.
+//
+// Arguments:
+//	tag: Scalar.  Tag to use for the `Summary.Value`.
+//	values: Any shape. Values to use to build the histogram.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "HistogramSummary",
+		Input: []tf.Input{
+			tag, values,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that emits the lines of one or more text files.
+//
+// Arguments:
+//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
+// read.
+//	compression_type: A scalar containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//	buffer_size: A scalar containing the number of bytes to buffer.
+func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TextLineDataset",
+		Input: []tf.Input{
+			filenames, compression_type, buffer_size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the number of records this Reader has produced.
+//
+// This is the same as the number of ReaderRead executions that have
+// succeeded.
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderNumRecordsProducedV2",
+		Input: []tf.Input{
+			reader_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes exponential of x - 1 element-wise.
+//
+// I.e., \\(y = (\exp x) - 1\\).
+func Expm1(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Expm1",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Batch normalization.
+//
+// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
+//
+// This op is deprecated. Prefer `tf.nn.batch_normalization`.
+//
+// Arguments:
+//	t: A 4D input Tensor.
+//	m: A 1D mean Tensor with size matching the last dimension of t.
+// This is the first output from tf.nn.moments,
+// or a saved moving average thereof.
+//	v: A 1D variance Tensor with size matching the last dimension of t.
+// This is the second output from tf.nn.moments,
+// or a saved moving average thereof.
+//	beta: A 1D beta Tensor with size matching the last dimension of t.
+// An offset to be added to the normalized tensor.
+//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
+// If "scale_after_normalization" is true, this tensor will be multiplied
+// with the normalized tensor.
+//	variance_epsilon: A small float number to avoid dividing by 0.
+//	scale_after_normalization: A bool indicating whether the resulted tensor
+// needs to be multiplied with gamma.
+func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
+	opspec := tf.OpSpec{
+		Type: "BatchNormWithGlobalNormalization",
+		Input: []tf.Input{
+			t, m, v, beta, gamma,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
@@ -13284,35 +13431,6 @@ func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataTyp
 	return key, values
 }
 
-// Merges summaries.
-//
-// This op creates a
-// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-// protocol buffer that contains the union of all the values in the input
-// summaries.
-//
-// When the Op is run, it reports an `InvalidArgument` error if multiple values
-// in the summaries to merge use the same tag.
-//
-// Arguments:
-//	inputs: Can be of any shape.  Each must contain serialized `Summary` protocol
-// buffers.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MergeSummary",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Read an element from the TensorArray into output `value`.
 //
 // Arguments:
@@ -14006,56 +14124,120 @@ func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Ou
 	return op.Output(0)
 }
 
-// Reorders a SparseTensor into the canonical, row-major ordering.
-//
-// Note that by convention, all sparse ops preserve the canonical ordering along
-// increasing dimension number. The only time ordering can be violated is during
-// manual manipulation of the indices and values vectors to add entries.
-//
-// Reordering does not affect the shape of the SparseTensor.
-//
-// If the tensor has rank `R` and `N` non-empty values, `input_indices` has
-// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`.
+// Does nothing. Serves as a control trigger for scheduling.
 //
-// Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
+// Only useful as a placeholder for control edges.
 //
-// Returns 2-D.  `N x R` matrix with the same indices as input_indices, but
-// in canonical row-major ordering.1-D.  `N` non-empty values corresponding to `output_indices`.
-func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+// Returns the created operation.
+func ControlTrigger(scope *Scope) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseReorder",
-		Input: []tf.Input{
-			input_indices, input_values, input_shape,
-		},
+		Type: "ControlTrigger",
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return scope.AddOperation(opspec)
 }
 
-// PackAttr is an optional argument to Pack.
-type PackAttr func(optionalAttr)
+// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign.
+type ResourceApplyAddSignAttr func(optionalAttr)
 
-// PackAxis sets the optional axis attribute to value.
+// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value.
 //
-// value: Dimension along which to pack.  Negative values wrap around, so the
-// valid range is `[-(R+1), R+1)`.
-// If not specified, defaults to 0
-func PackAxis(value int64) PackAttr {
+// value: If `True`, updating of the var and m tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr {
 	return func(m optionalAttr) {
-		m["axis"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor.
+// Update '*var' according to the AddSign update.
 //
-// Packs the `N` tensors in `values` into a tensor with rank one higher than each
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// update <- (alpha + sign_decay * sign(g) *sign(m)) * g
+// variable <- variable - lr_t * update
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	alpha: Must be a scalar.
+//	sign_decay: Must be a scalar.
+//	beta: Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAddSign",
+		Input: []tf.Input{
+			var_, m, lr, alpha, sign_decay, beta, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Reorders a SparseTensor into the canonical, row-major ordering.
+//
+// Note that by convention, all sparse ops preserve the canonical ordering along
+// increasing dimension number. The only time ordering can be violated is during
+// manual manipulation of the indices and values vectors to add entries.
+//
+// Reordering does not affect the shape of the SparseTensor.
+//
+// If the tensor has rank `R` and `N` non-empty values, `input_indices` has
+// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//
+// Returns 2-D.  `N x R` matrix with the same indices as input_indices, but
+// in canonical row-major ordering.1-D.  `N` non-empty values corresponding to `output_indices`.
+func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReorder",
+		Input: []tf.Input{
+			input_indices, input_values, input_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// PackAttr is an optional argument to Pack.
+type PackAttr func(optionalAttr)
+
+// PackAxis sets the optional axis attribute to value.
+//
+// value: Dimension along which to pack.  Negative values wrap around, so the
+// valid range is `[-(R+1), R+1)`.
+// If not specified, defaults to 0
+func PackAxis(value int64) PackAttr {
+	return func(m optionalAttr) {
+		m["axis"] = value
+	}
+}
+
+// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor.
+//
+// Packs the `N` tensors in `values` into a tensor with rank one higher than each
 // tensor in `values`, by packing them along the `axis` dimension.
 // Given a list of tensors of shape `(A, B, C)`;
 //
@@ -14151,6 +14333,133 @@ func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
+func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "BytesProducedStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QrAttr is an optional argument to Qr.
+type QrAttr func(optionalAttr)
+
+// QrFullMatrices sets the optional full_matrices attribute to value.
+//
+// value: If true, compute full-sized `q` and `r`. If false
+// (the default), compute only the leading `P` columns of `q`.
+// If not specified, defaults to false
+func QrFullMatrices(value bool) QrAttr {
+	return func(m optionalAttr) {
+		m["full_matrices"] = value
+	}
+}
+
+// Computes the QR decompositions of one or more matrices.
+//
+// Computes the QR decomposition of each inner matrix in `tensor` such that
+// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])`
+//
+// ```python
+// # a is a tensor.
+// # q is a tensor of orthonormal matrices.
+// # r is a tensor of upper triangular matrices.
+// q, r = qr(a)
+// q_full, r_full = qr(a, full_matrices=True)
+// ```
+//
+// Arguments:
+//	input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
+//
+// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then
+// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
+// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is
+// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`.
+func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Qr",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// AudioSummaryAttr is an optional argument to AudioSummary.
+type AudioSummaryAttr func(optionalAttr)
+
+// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value.
+//
+// value: Max number of batch elements to generate audio for.
+// If not specified, defaults to 3
+//
+// REQUIRES: value >= 1
+func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr {
+	return func(m optionalAttr) {
+		m["max_outputs"] = value
+	}
+}
+
+// Outputs a `Summary` protocol buffer with audio.
+//
+// DEPRECATED at GraphDef version 15: Use AudioSummaryV2.
+//
+// The summary has up to `max_outputs` summary values containing audio. The
+// audio is built from `tensor` which must be 3-D with shape `[batch_size,
+// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
+// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
+// *  If `max_outputs` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
+//
+// Arguments:
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 2-D of shape `[batch_size, frames]`.
+//	sample_rate: The sample rate of the signal in hertz.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"sample_rate": sample_rate}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AudioSummary",
+		Input: []tf.Input{
+			tag, tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Reverses specific dimensions of a tensor.
 //
 // NOTE `tf.reverse` has now changed behavior in preparation for 1.0.
@@ -14671,6 +14980,24 @@ func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.O
 	return op.Output(0), op.Output(1)
 }
 
+// Returns x - y element-wise.
+//
+// *NOTE*: `Sub` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sub",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Get the value of the tensor specified by its handle.
 //
 // Arguments:
@@ -17551,69 +17878,21 @@ func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output)
 	return op.Output(0)
 }
 
-// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad.
-type AvgPool3DGradAttr func(optionalAttr)
-
-// AvgPool3DGradDataFormat sets the optional data_format attribute to value.
+// Inverse fast Fourier transform.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes gradients of average pooling function.
+// Computes the inverse 1-dimensional discrete Fourier transform over the
+// inner-most dimension of `input`.
 //
 // Arguments:
-//	orig_input_shape: The original input dimensions.
-//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
+//	input: A complex64 tensor.
 //
-// Returns The backprop for input.
-func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AvgPool3DGrad",
-		Input: []tf.Input{
-			orig_input_shape, grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Inverse fast Fourier transform.
-//
-// Computes the inverse 1-dimensional discrete Fourier transform over the
-// inner-most dimension of `input`.
-//
-// Arguments:
-//	input: A complex64 tensor.
-//
-// Returns A complex64 tensor of the same shape as `input`. The inner-most
-//   dimension of `input` is replaced with its inverse 1D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.ifft
-// @end_compatibility
-func IFFT(scope *Scope, input tf.Output) (output tf.Output) {
+// Returns A complex64 tensor of the same shape as `input`. The inner-most
+//   dimension of `input` is replaced with its inverse 1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.ifft
+// @end_compatibility
+func IFFT(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17850,6 +18129,55 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr)
 	return op.Output(0)
 }
 
+// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
+type ResourceApplyPowerSignAttr func(optionalAttr)
+
+// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and m tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the AddSign update.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
+// variable <- variable - lr_t * update
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	logbase: Must be a scalar.
+//	sign_decay: Must be a scalar.
+//	beta: Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyPowerSign",
+		Input: []tf.Input{
+			var_, m, lr, logbase, sign_decay, beta, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // DestroyResourceOpAttr is an optional argument to DestroyResourceOp.
 type DestroyResourceOpAttr func(optionalAttr)
 
@@ -17988,6 +18316,32 @@ func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Outpu
 	return op.Output(0), op.Output(1)
 }
 
+// Adds `bias` to `value`.
+//
+// This is a deprecated version of BiasAdd and will be soon removed.
+//
+// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
+// Broadcasting is supported, so `value` may have any number of dimensions.
+//
+// Arguments:
+//	value: Any number of dimensions.
+//	bias: 1-D with size the last dimension of `value`.
+//
+// Returns Broadcasted sum of `value` and `bias`.
+func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BiasAddV1",
+		Input: []tf.Input{
+			value, bias,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2.
 type FixedLengthRecordReaderV2Attr func(optionalAttr)
 
@@ -18195,169 +18549,6 @@ func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (
 	return op.Output(0)
 }
 
-// WriteImageSummaryAttr is an optional argument to WriteImageSummary.
-type WriteImageSummaryAttr func(optionalAttr)
-
-// WriteImageSummaryMaxImages sets the optional max_images attribute to value.
-//
-// value: Max number of batch elements to generate images for.
-// If not specified, defaults to 3
-//
-// REQUIRES: value >= 1
-func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr {
-	return func(m optionalAttr) {
-		m["max_images"] = value
-	}
-}
-
-// Writes a `Summary` protocol buffer with images.
-//
-// The summary has up to `max_images` summary values containing images. The
-// images are built from `tensor` which must be 4-D with shape `[batch_size,
-// height, width, channels]` and where `channels` can be:
-//
-// *  1: `tensor` is interpreted as Grayscale.
-// *  3: `tensor` is interpreted as RGB.
-// *  4: `tensor` is interpreted as RGBA.
-//
-// The images have the same number of channels as the input tensor. For float
-// input, the values are normalized one image at a time to fit in the range
-// `[0, 255]`.  `uint8` values are unchanged.  The op uses two different
-// normalization algorithms:
-//
-// *  If the input values are all positive, they are rescaled so the largest one
-//    is 255.
-//
-// *  If any input value is negative, the values are shifted so input value 0.0
-//    is at 127.  They are then rescaled so that either the smallest value is 0,
-//    or the largest one is 255.
-//
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
-//
-// *  If `max_images` is 1, the summary value tag is '*tag*/image'.
-// *  If `max_images` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
-//
-// The `bad_color` argument is the color to use in the generated images for
-// non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
-// Each element must be in the range `[0, 255]` (It represents the value of a
-// pixel in the output image).  Non-finite values in the input tensor are
-// replaced by this tensor in the output image.  The default value is the color
-// red.
-//
-// Arguments:
-//	writer: A handle to a summary writer.
-//	step: The step to write the summary for.
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 4-D of shape `[batch_size, height, width, channels]` where
-// `channels` is 1, 3, or 4.
-//	bad_color: Color to use for pixels with non-finite values.
-//
-// Returns the created operation.
-func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "WriteImageSummary",
-		Input: []tf.Input{
-			writer, step, tag, tensor, bad_color,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Pads a tensor with zeros.
-//
-// This operation pads a `input` with zeros according to the `paddings` you
-// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
-// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-// how many zeros to add before the contents of `input` in that dimension, and
-// `paddings[D, 1]` indicates how many zeros to add after the contents of `input`
-// in that dimension.
-//
-// The padded size of each dimension D of the output is:
-//
-// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
-//
-// For example:
-//
-// ```
-// # 't' is [[1, 1], [2, 2]]
-// # 'paddings' is [[1, 1], [2, 2]]
-// # rank of 't' is 2
-// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
-//                       [0, 0, 1, 1, 0, 0]
-//                       [0, 0, 2, 2, 0, 0]
-//                       [0, 0, 0, 0, 0, 0]]
-// ```
-func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Pad",
-		Input: []tf.Input{
-			input, paddings,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the number of elements in the given queue.
-//
-// Arguments:
-//	handle: The handle to a queue.
-//
-// Returns The number of elements in the given queue.
-func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueSizeV2",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Outputs a `Summary` protocol buffer with a histogram.
-//
-// The generated
-// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-// has one summary value containing a histogram for `values`.
-//
-// This op reports an `InvalidArgument` error if any value is not finite.
-//
-// Arguments:
-//	tag: Scalar.  Tag to use for the `Summary.Value`.
-//	values: Any shape. Values to use to build the histogram.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "HistogramSummary",
-		Input: []tf.Input{
-			tag, values,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // AsStringAttr is an optional argument to AsString.
 type AsStringAttr func(optionalAttr)
 
@@ -18989,7 +19180,158 @@ func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, option
 	return op.Output(0)
 }
 
-// Computes the sum along sparse segments of a tensor divided by the sqrt of N.
+// UniqueWithCountsAttr is an optional argument to UniqueWithCounts.
+type UniqueWithCountsAttr func(optionalAttr)
+
+// UniqueWithCountsOutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr {
+	return func(m optionalAttr) {
+		m["out_idx"] = value
+	}
+}
+
+// Finds unique elements in a 1-D tensor.
+//
+// This operation returns a tensor `y` containing all of the unique elements of `x`
+// sorted in the same order that they occur in `x`. This operation also returns a
+// tensor `idx` the same size as `x` that contains the index of each value of `x`
+// in the unique output `y`. Finally, it returns a third tensor `count` that
+// contains the count of each element of `y` in `x`. In other words:
+//
+// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+//
+// For example:
+//
+// ```
+// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+// y, idx, count = unique_with_counts(x)
+// y ==> [1, 2, 4, 7, 8]
+// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+// count ==> [2, 1, 3, 1, 2]
+// ```
+//
+// Arguments:
+//	x: 1-D.
+//
+// Returns 1-D.1-D.1-D.
+func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UniqueWithCounts",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// RestoreSliceAttr is an optional argument to RestoreSlice.
+type RestoreSliceAttr func(optionalAttr)
+
+// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
+//
+// value: Index of file to open first if multiple files match
+// `file_pattern`. See the documentation for `Restore`.
+// If not specified, defaults to -1
+func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
+	return func(m optionalAttr) {
+		m["preferred_shard"] = value
+	}
+}
+
+// Restores a tensor from checkpoint files.
+//
+// This is like `Restore` except that restored tensor can be listed as filling
+// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
+// larger tensor and the slice that the restored tensor covers.
+//
+// The `shape_and_slice` input has the same format as the
+// elements of the `shapes_and_slices` input of the `SaveSlices` op.
+//
+// Arguments:
+//	file_pattern: Must have a single element. The pattern of the files from
+// which we read the tensor.
+//	tensor_name: Must have a single element. The name of the tensor to be
+// restored.
+//	shape_and_slice: Scalar. The shapes and slice specifications to use when
+// restoring a tensors.
+//	dt: The type of the tensor to be restored.
+//
+// Returns The restored tensor.
+func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dt": dt}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RestoreSlice",
+		Input: []tf.Input{
+			file_pattern, tensor_name, shape_and_slice,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
+type StatelessTruncatedNormalAttr func(optionalAttr)
+
+// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom values from a truncated normal distribution.
+//
+// The generated values follow a normal distribution with mean 0 and standard
+// deviation 1, except that values whose magnitude is more than 2 standard
+// deviations from the mean are dropped and re-picked.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessTruncatedNormal",
+		Input: []tf.Input{
+			shape, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along sparse segments of a tensor divided by the sqrt of N.
 //
 // N is the size of the segment being reduced.
 //
@@ -19891,92 +20233,205 @@ func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// Inverse real-valued fast Fourier transform.
-//
-// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
-// signal over the inner-most dimension of `input`.
-//
-// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
-// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
-// `fft_length` is not provided, it is computed from the size of the inner-most
-// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
-// compute `input` is odd, it should be provided since it cannot be inferred
-// properly.
-//
-// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
-// than the corresponding dimension of `input`, the dimension is cropped. If it is
-// larger, the dimension is padded with zeros.
+// Creates a dataset that skips `count` elements from the `input_dataset`.
 //
 // Arguments:
-//	input: A complex64 tensor.
-//	fft_length: An int32 tensor of shape [1]. The FFT length.
 //
-// Returns A float32 tensor of the same rank as `input`. The inner-most
-//   dimension of `input` is replaced with the `fft_length` samples of its inverse
-//   1D Fourier transform.
+//	count: A scalar representing the number of elements from the `input_dataset`
+// that should be skipped.  If count is -1, skips everything.
 //
-// @compatibility(numpy)
-// Equivalent to np.fft.irfft
-// @end_compatibility
-func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+//
+func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "IRFFT",
+		Type: "SkipDataset",
 		Input: []tf.Input{
-			input, fft_length,
+			input_dataset, count,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Adds a value to the current value of a variable.
-//
-// Any ReadVariableOp which depends directly or indirectly on this assign is
-// guaranteed to see the incremented value or a subsequent newer one.
+// ImagAttr is an optional argument to Imag.
+type ImagAttr func(optionalAttr)
+
+// ImagTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func ImagTout(value tf.DataType) ImagAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Returns the imaginary part of a complex number.
 //
-// Outputs the incremented value, which can be used to totally order the
-// increments to this variable.
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// type `float` that is the imaginary part of each element in `input`. All
+// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a*
+// is the real part and *b* is the imaginary part returned by this operation.
 //
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value by which the variable will be incremented.
+// For example:
 //
-// Returns the created operation.
-func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+// ```
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.imag(input) ==> [4.75, 5.75]
+// ```
+func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "AssignAddVariableOp",
-		Input: []tf.Input{
-			resource, value,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes inverse hyperbolic sine of x element-wise.
-func Asinh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Asinh",
+		Type: "Imag",
 		Input: []tf.Input{
-			x,
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Real-valued fast Fourier transform.
+// ComplexAttr is an optional argument to Complex.
+type ComplexAttr func(optionalAttr)
+
+// ComplexTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_COMPLEX64
+func ComplexTout(value tf.DataType) ComplexAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Converts two real numbers to a complex number.
 //
-// Computes the 1-dimensional discrete Fourier transform of a real-valued signal
-// over the inner-most dimension of `input`.
+// Given a tensor `real` representing the real part of a complex number, and a
+// tensor `imag` representing the imaginary part of a complex number, this
+// operation returns complex numbers elementwise of the form \\(a + bj\\), where
+// *a* represents the `real` part and *b* represents the `imag` part.
+//
+// The input tensors `real` and `imag` must have the same shape.
+//
+// For example:
+//
+// ```
+// # tensor 'real' is [2.25, 3.25]
+// # tensor `imag` is [4.75, 5.75]
+// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]]
+// ```
+func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Complex",
+		Input: []tf.Input{
+			real, imag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Inverse real-valued fast Fourier transform.
+//
+// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
+// signal over the inner-most dimension of `input`.
+//
+// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
+// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
+// `fft_length` is not provided, it is computed from the size of the inner-most
+// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
+// compute `input` is odd, it should be provided since it cannot be inferred
+// properly.
+//
+// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
+// than the corresponding dimension of `input`, the dimension is cropped. If it is
+// larger, the dimension is padded with zeros.
+//
+// Arguments:
+//	input: A complex64 tensor.
+//	fft_length: An int32 tensor of shape [1]. The FFT length.
+//
+// Returns A float32 tensor of the same rank as `input`. The inner-most
+//   dimension of `input` is replaced with the `fft_length` samples of its inverse
+//   1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.irfft
+// @end_compatibility
+func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IRFFT",
+		Input: []tf.Input{
+			input, fft_length,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds a value to the current value of a variable.
+//
+// Any ReadVariableOp which depends directly or indirectly on this assign is
+// guaranteed to see the incremented value or a subsequent newer one.
+//
+// Outputs the incremented value, which can be used to totally order the
+// increments to this variable.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	value: the value by which the variable will be incremented.
+//
+// Returns the created operation.
+func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AssignAddVariableOp",
+		Input: []tf.Input{
+			resource, value,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes inverse hyperbolic sine of x element-wise.
+func Asinh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Asinh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Real-valued fast Fourier transform.
+//
+// Computes the 1-dimensional discrete Fourier transform of a real-valued signal
+// over the inner-most dimension of `input`.
 //
 // Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the
 // `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,
@@ -20311,85 +20766,30 @@ func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Outp
 	return op.Output(0)
 }
 
-// RestoreSliceAttr is an optional argument to RestoreSlice.
-type RestoreSliceAttr func(optionalAttr)
+// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3.
+type QuantizeAndDequantizeV3Attr func(optionalAttr)
 
-// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
-//
-// value: Index of file to open first if multiple files match
-// `file_pattern`. See the documentation for `Restore`.
-// If not specified, defaults to -1
-func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
+// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr {
 	return func(m optionalAttr) {
-		m["preferred_shard"] = value
+		m["signed_input"] = value
 	}
 }
 
-// Restores a tensor from checkpoint files.
-//
-// This is like `Restore` except that restored tensor can be listed as filling
-// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
-// larger tensor and the slice that the restored tensor covers.
-//
-// The `shape_and_slice` input has the same format as the
-// elements of the `shapes_and_slices` input of the `SaveSlices` op.
-//
-// Arguments:
-//	file_pattern: Must have a single element. The pattern of the files from
-// which we read the tensor.
-//	tensor_name: Must have a single element. The name of the tensor to be
-// restored.
-//	shape_and_slice: Scalar. The shapes and slice specifications to use when
-// restoring a tensors.
-//	dt: The type of the tensor to be restored.
-//
-// Returns The restored tensor.
-func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dt": dt}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RestoreSlice",
-		Input: []tf.Input{
-			file_pattern, tensor_name, shape_and_slice,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
-type StatelessTruncatedNormalAttr func(optionalAttr)
-
-// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
+// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr {
 	return func(m optionalAttr) {
-		m["dtype"] = value
+		m["range_given"] = value
 	}
 }
 
-// Outputs deterministic pseudorandom values from a truncated normal distribution.
-//
-// The generated values follow a normal distribution with mean 0 and standard
-// deviation 1, except that values whose magnitude is more than 2 standard
-// deviations from the mean are dropped and re-picked.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
+// Quantizes then dequantizes a tensor.
 //
-// Returns Random values with specified shape.
-func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
+// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
+// tensor, so its value can change during training.
+func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -20398,9 +20798,9 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StatelessTruncatedNormal",
+		Type: "QuantizeAndDequantizeV3",
 		Input: []tf.Input{
-			shape, seed,
+			input, input_min, input_max, num_bits,
 		},
 		Attrs: attrs,
 	}
@@ -20408,246 +20808,125 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt
 	return op.Output(0)
 }
 
-// UniqueWithCountsAttr is an optional argument to UniqueWithCounts.
-type UniqueWithCountsAttr func(optionalAttr)
+// AvgPool3DAttr is an optional argument to AvgPool3D.
+type AvgPool3DAttr func(optionalAttr)
 
-// UniqueWithCountsOutIdx sets the optional out_idx attribute to value.
-// If not specified, defaults to DT_INT32
-func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr {
+// AvgPool3DDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func AvgPool3DDataFormat(value string) AvgPool3DAttr {
 	return func(m optionalAttr) {
-		m["out_idx"] = value
+		m["data_format"] = value
 	}
 }
 
-// Finds unique elements in a 1-D tensor.
-//
-// This operation returns a tensor `y` containing all of the unique elements of `x`
-// sorted in the same order that they occur in `x`. This operation also returns a
-// tensor `idx` the same size as `x` that contains the index of each value of `x`
-// in the unique output `y`. Finally, it returns a third tensor `count` that
-// contains the count of each element of `y` in `x`. In other words:
-//
-// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
-//
-// For example:
-//
-// ```
-// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-// y, idx, count = unique_with_counts(x)
-// y ==> [1, 2, 4, 7, 8]
-// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-// count ==> [2, 1, 3, 1, 2]
-// ```
+// Performs 3D average pooling on the input.
 //
 // Arguments:
-//	x: 1-D.
+//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
 //
-// Returns 1-D.1-D.1-D.
-func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) {
+// Returns The average pooled output tensor.
+func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "UniqueWithCounts",
+		Type: "AvgPool3D",
 		Input: []tf.Input{
-			x,
+			input,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Creates a dataset that skips `count` elements from the `input_dataset`.
+// Produces the max pool of the input tensor for quantized types.
 //
 // Arguments:
+//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	ksize: The size of the window for each dimension of the input tensor.
+// The length must be 4 to match the number of dimensions of the input.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor. The length must be 4 to match the number of dimensions of the input.
+//	padding: The type of padding algorithm to use.
 //
-//	count: A scalar representing the number of elements from the `input_dataset`
-// that should be skipped.  If count is -1, skips everything.
-//
-//
-func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "SkipDataset",
+		Type: "QuantizedMaxPool",
 		Input: []tf.Input{
-			input_dataset, count,
+			input, min_input, max_input,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// ComplexAttr is an optional argument to Complex.
-type ComplexAttr func(optionalAttr)
-
-// ComplexTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_COMPLEX64
-func ComplexTout(value tf.DataType) ComplexAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
-	}
-}
+// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad.
+type AvgPool3DGradAttr func(optionalAttr)
 
-// Converts two real numbers to a complex number.
-//
-// Given a tensor `real` representing the real part of a complex number, and a
-// tensor `imag` representing the imaginary part of a complex number, this
-// operation returns complex numbers elementwise of the form \\(a + bj\\), where
-// *a* represents the `real` part and *b* represents the `imag` part.
-//
-// The input tensors `real` and `imag` must have the same shape.
-//
-// For example:
+// AvgPool3DGradDataFormat sets the optional data_format attribute to value.
 //
-// ```
-// # tensor 'real' is [2.25, 3.25]
-// # tensor `imag` is [4.75, 5.75]
-// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]]
-// ```
-func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Complex",
-		Input: []tf.Input{
-			real, imag,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ImagAttr is an optional argument to Imag.
-type ImagAttr func(optionalAttr)
-
-// ImagTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func ImagTout(value tf.DataType) ImagAttr {
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr {
 	return func(m optionalAttr) {
-		m["Tout"] = value
-	}
-}
-
-// Returns the imaginary part of a complex number.
-//
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// type `float` that is the imaginary part of each element in `input`. All
-// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a*
-// is the real part and *b* is the imaginary part returned by this operation.
-//
-// For example:
-//
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.imag(input) ==> [4.75, 5.75]
-// ```
-func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Imag",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that emits the lines of one or more text files.
-//
-// Arguments:
-//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
-// read.
-//	compression_type: A scalar containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//	buffer_size: A scalar containing the number of bytes to buffer.
-func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TextLineDataset",
-		Input: []tf.Input{
-			filenames, compression_type, buffer_size,
-		},
+		m["data_format"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Returns the number of records this Reader has produced.
-//
-// This is the same as the number of ReaderRead executions that have
-// succeeded.
+// Computes gradients of average pooling function.
 //
 // Arguments:
-//	reader_handle: Handle to a Reader.
-func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderNumRecordsProducedV2",
-		Input: []tf.Input{
-			reader_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes exponential of x - 1 element-wise.
-//
-// I.e., \\(y = (\exp x) - 1\\).
-func Expm1(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Expm1",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns x - y element-wise.
+//	orig_input_shape: The original input dimensions.
+//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
 //
-// *NOTE*: `Sub` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Returns The backprop for input.
+func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Sub",
+		Type: "AvgPool3DGrad",
 		Input: []tf.Input{
-			x, y,
+			orig_input_shape, grad,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -24870,133 +25149,6 @@ func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Ou
 	return scope.AddOperation(opspec)
 }
 
-// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
-func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "BytesProducedStatsDataset",
-		Input: []tf.Input{
-			input_dataset, tag,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QrAttr is an optional argument to Qr.
-type QrAttr func(optionalAttr)
-
-// QrFullMatrices sets the optional full_matrices attribute to value.
-//
-// value: If true, compute full-sized `q` and `r`. If false
-// (the default), compute only the leading `P` columns of `q`.
-// If not specified, defaults to false
-func QrFullMatrices(value bool) QrAttr {
-	return func(m optionalAttr) {
-		m["full_matrices"] = value
-	}
-}
-
-// Computes the QR decompositions of one or more matrices.
-//
-// Computes the QR decomposition of each inner matrix in `tensor` such that
-// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])`
-//
-// ```python
-// # a is a tensor.
-// # q is a tensor of orthonormal matrices.
-// # r is a tensor of upper triangular matrices.
-// q, r = qr(a)
-// q_full, r_full = qr(a, full_matrices=True)
-// ```
-//
-// Arguments:
-//	input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
-//
-// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then
-// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
-// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is
-// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`.
-func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Qr",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// AudioSummaryAttr is an optional argument to AudioSummary.
-type AudioSummaryAttr func(optionalAttr)
-
-// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value.
-//
-// value: Max number of batch elements to generate audio for.
-// If not specified, defaults to 3
-//
-// REQUIRES: value >= 1
-func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr {
-	return func(m optionalAttr) {
-		m["max_outputs"] = value
-	}
-}
-
-// Outputs a `Summary` protocol buffer with audio.
-//
-// DEPRECATED at GraphDef version 15: Use AudioSummaryV2.
-//
-// The summary has up to `max_outputs` summary values containing audio. The
-// audio is built from `tensor` which must be 3-D with shape `[batch_size,
-// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
-// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
-//
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
-//
-// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
-// *  If `max_outputs` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
-//
-// Arguments:
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 2-D of shape `[batch_size, frames]`.
-//	sample_rate: The sample rate of the signal in hertz.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"sample_rate": sample_rate}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AudioSummary",
-		Input: []tf.Input{
-			tag, tensor,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // BiasAddAttr is an optional argument to BiasAdd.
 type BiasAddAttr func(optionalAttr)
 
@@ -25198,74 +25350,195 @@ func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr
 	}
 }
 
-// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Generates labels for candidate sampling with a log-uniform distribution.
+//
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
+//
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LogUniformCandidateSampler",
+		Input: []tf.Input{
+			true_classes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Returns the truth value of (x < y) element-wise.
+//
+// *NOTE*: `Less` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Less",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FakeQuantWithMinMaxVarsGradientAttr is an optional argument to FakeQuantWithMinMaxVarsGradient.
+type FakeQuantWithMinMaxVarsGradientAttr func(optionalAttr)
+
+// FakeQuantWithMinMaxVarsGradientNumBits sets the optional num_bits attribute to value.
+//
+// value: The bitwidth of the quantization; between 2 and 8, inclusive.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxVarsGradientNumBits(value int64) FakeQuantWithMinMaxVarsGradientAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// FakeQuantWithMinMaxVarsGradientNarrowRange sets the optional narrow_range attribute to value.
+//
+// value: Whether to quantize into 2^num_bits - 1 distinct values.
+// If not specified, defaults to false
+func FakeQuantWithMinMaxVarsGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsGradientAttr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// Compute gradients for a FakeQuantWithMinMaxVars operation.
+//
+// Arguments:
+//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation.
+//	inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation.
+// min, max: Quantization interval, scalar floats.
+//
+//
+//
+// Returns Backpropagated gradients w.r.t. inputs:
+// `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter:
+// `sum(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter:
+// `sum(gradients * (inputs > max))`.
+func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FakeQuantWithMinMaxVarsGradient",
+		Input: []tf.Input{
+			gradients, inputs, min, max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2.
+type MaxPoolGradV2Attr func(optionalAttr)
+
+// MaxPoolGradV2DataFormat sets the optional data_format attribute to value.
 //
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["data_format"] = value
 	}
 }
 
-// Generates labels for candidate sampling with a log-uniform distribution.
-//
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
+// Computes gradients of the maxpooling function.
 //
 // Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
 //
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+// Returns Gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	attrs := map[string]interface{}{"padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LogUniformCandidateSampler",
+		Type: "MaxPoolGradV2",
 		Input: []tf.Input{
-			true_classes,
+			orig_input, orig_output, grad, ksize, strides,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Returns the truth value of (x < y) element-wise.
+// Returns the min of x and y (i.e. x < y ? x : y) element-wise.
 //
-// *NOTE*: `Less` supports broadcasting. More about broadcasting
+// *NOTE*: `Minimum` supports broadcasting. More about broadcasting
 // [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Less",
+		Type: "Minimum",
 		Input: []tf.Input{
 			x, y,
 		},
@@ -27136,127 +27409,6 @@ func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, val
 	return scope.AddOperation(opspec)
 }
 
-// FakeQuantWithMinMaxVarsGradientAttr is an optional argument to FakeQuantWithMinMaxVarsGradient.
-type FakeQuantWithMinMaxVarsGradientAttr func(optionalAttr)
-
-// FakeQuantWithMinMaxVarsGradientNumBits sets the optional num_bits attribute to value.
-//
-// value: The bitwidth of the quantization; between 2 and 8, inclusive.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxVarsGradientNumBits(value int64) FakeQuantWithMinMaxVarsGradientAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// FakeQuantWithMinMaxVarsGradientNarrowRange sets the optional narrow_range attribute to value.
-//
-// value: Whether to quantize into 2^num_bits - 1 distinct values.
-// If not specified, defaults to false
-func FakeQuantWithMinMaxVarsGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsGradientAttr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
-	}
-}
-
-// Compute gradients for a FakeQuantWithMinMaxVars operation.
-//
-// Arguments:
-//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation.
-//	inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation.
-// min, max: Quantization interval, scalar floats.
-//
-//
-//
-// Returns Backpropagated gradients w.r.t. inputs:
-// `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter:
-// `sum(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter:
-// `sum(gradients * (inputs > max))`.
-func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxVarsGradient",
-		Input: []tf.Input{
-			gradients, inputs, min, max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2.
-type MaxPoolGradV2Attr func(optionalAttr)
-
-// MaxPoolGradV2DataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes gradients of the maxpooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradV2",
-		Input: []tf.Input{
-			orig_input, orig_output, grad, ksize, strides,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the min of x and y (i.e. x < y ? x : y) element-wise.
-//
-// *NOTE*: `Minimum` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Minimum",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a dataset that asynchronously prefetches elements from `input_dataset`.
 //
 // Arguments:
@@ -27301,57 +27453,3 @@ func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_s
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
-
-// AudioSummaryV2Attr is an optional argument to AudioSummaryV2.
-type AudioSummaryV2Attr func(optionalAttr)
-
-// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value.
-//
-// value: Max number of batch elements to generate audio for.
-// If not specified, defaults to 3
-//
-// REQUIRES: value >= 1
-func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr {
-	return func(m optionalAttr) {
-		m["max_outputs"] = value
-	}
-}
-
-// Outputs a `Summary` protocol buffer with audio.
-//
-// The summary has up to `max_outputs` summary values containing audio. The
-// audio is built from `tensor` which must be 3-D with shape `[batch_size,
-// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
-// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
-//
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
-//
-// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
-// *  If `max_outputs` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
-//
-// Arguments:
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 2-D of shape `[batch_size, frames]`.
-//	sample_rate: The sample rate of the signal in hertz.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AudioSummaryV2",
-		Input: []tf.Input{
-			tag, tensor, sample_rate,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-- 
GitLab


From 859df2a2a1bdfb02cf370f7b68e3c6802e822b15 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 17 Nov 2017 12:32:58 -0800
Subject: [PATCH 0061/1225] Remove the existence of unused HloProtos.

PiperOrigin-RevId: 176145413
---
 tensorflow/compiler/xla/service/hlo.proto | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 79493c4112..e984bdb5f7 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -250,7 +250,3 @@ message HloProto {
   HloOrderingProto hlo_ordering = 2;
   BufferAssignmentProto buffer_assignment = 3;
 }
-
-message HloProtos {
-  repeated HloProto hlo_protos = 1;
-}
-- 
GitLab


From a715b06555a0c14e95f30569f40a97019af6a6b0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 12:34:28 -0800
Subject: [PATCH 0062/1225] [XLA:CPU/GPU] Revert back to previous buffer
 aliasing calculation for fused DynamicUpdateSlice in-place updates (fused
 instructions compared in current calculation are not assigned buffers, so I
 think the current calculation is always returning false).

PiperOrigin-RevId: 176145589
---
 tensorflow/compiler/xla/service/llvm_ir/ops.h | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/llvm_ir/ops.h b/tensorflow/compiler/xla/service/llvm_ir/ops.h
index 11e84d9cb5..f72f482e31 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/ops.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/ops.h
@@ -40,11 +40,24 @@ bool CanUpdateDynamicSliceInPlace(HloInstruction* dynamic_update_slice,
 inline bool CanEmitFusedDynamicUpdateSliceInPlace(
     HloInstruction* fusion, const BufferAssignment& assignment) {
   CHECK_EQ(fusion->opcode(), HloOpcode::kFusion);
-  return fusion->fusion_kind() == HloInstruction::FusionKind::kLoop &&
-         fusion->fused_expression_root()->opcode() ==
-             HloOpcode::kDynamicUpdateSlice &&
-         CanUpdateDynamicSliceInPlace(fusion->fused_expression_root(),
-                                      assignment);
+  HloInstruction* fused_root = fusion->fused_expression_root();
+  if (fused_root->opcode() != HloOpcode::kDynamicUpdateSlice ||
+      fusion->fusion_kind() != HloInstruction::FusionKind::kLoop) {
+    return false;
+  }
+  // Walk DynamicUpdateSlice operand(0) to fused parameter and get its
+  // associated operand. See if it shares an allocation with this operand.
+  HloInstruction* fusion_operand;
+  ShapeIndex index;
+  std::tie(fusion_operand, index) =
+      fused_root->mutable_operand(0)->LatestNonGteAncestorAndIndex();
+  if (fusion_operand->opcode() != HloOpcode::kParameter) {
+    return false;
+  }
+  auto* operand = fusion->operand(fusion_operand->parameter_number());
+  return assignment.HasAllocationAt(operand, index) &&
+         assignment.HasAllocationAt(fusion, {}) &&
+         assignment.SharesSliceAtIndex(fusion, {}, operand, index);
 }
 
 // Emits IR for running the given dynamic-update-slice op in-place -- that is,
-- 
GitLab


From 6a7cdfa8c973f3ce6a31664233fc8b096f2ba393 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Fri, 17 Nov 2017 12:46:18 -0800
Subject: [PATCH 0063/1225] Improved shape inference

PiperOrigin-RevId: 176147013
---
 .../core/common_runtime/shape_refiner.cc      |  31 +-
 .../core/common_runtime/shape_refiner_test.cc |  15 +-
 tensorflow/core/framework/shape_inference.cc  |  60 +-
 tensorflow/core/framework/shape_inference.h   |  27 +-
 .../core/framework/shape_inference_test.cc    |  13 +-
 .../core/grappler/costs/graph_properties.cc   | 669 ++++++++++++------
 .../core/grappler/costs/graph_properties.h    |  50 +-
 .../grappler/costs/graph_properties_test.cc   |  23 +-
 .../while_loop.pbtxt                          |  20 +-
 9 files changed, 591 insertions(+), 317 deletions(-)

diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index 10901da192..d66865e45b 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -335,10 +335,14 @@ Status ShapeRefiner::UpdateNode(const Node* node, bool relax, bool* refined) {
     InferenceContext* c = iter->second->get_context();
     DCHECK_GE(dst_input, 0);
     ShapeHandle existing_input = node_context->input(dst_input);
-    if (!relax && node_context->MergeInput(dst_input, c->output(src_output)) &&
-        !existing_input.SameHandle(node_context->input(dst_input))) {
-      *refined = true;
-    } else if (relax) {
+    if (!relax) {
+      if (node_context->MergeInput(dst_input, c->output(src_output))) {
+        if (!SameDefinedShape(node_context, node_context->input(dst_input),
+                              existing_input)) {
+          *refined = true;
+        }
+      }
+    } else {
       if (node_context->RelaxInput(dst_input, c->output(src_output))) {
         if (!SameDefinedShape(node_context, node_context->input(dst_input),
                               existing_input)) {
@@ -865,15 +869,22 @@ Status ShapeRefiner::RunShapeFn(const Node* node,
 
 bool ShapeRefiner::SameDefinedShape(InferenceContext* c, ShapeHandle s0,
                                     ShapeHandle s1) {
-  if (!c->RankKnown(s0)) {
-    return !c->RankKnown(s1);
-  } else if (!c->RankKnown(s1) || c->Rank(s0) != c->Rank(s1)) {
+  if (s0.SameHandle(s1)) {
+    return true;
+  }
+  if (c->Rank(s0) != c->Rank(s1)) {
+    return false;
+  }
+  if (!c->RankKnown(s0) && !c->RankKnown(s1)) {
     return false;
   }
-
   for (int i = 0; i < c->Rank(s0); ++i) {
-    if (c->Value(c->Dim(s0, i)) != c->Value(c->Dim(s1, i))) {
-      return false;
+    if (!c->Dim(s0, i).SameHandle(c->Dim(s1, i))) {
+      int64 val0 = c->Value(c->Dim(s0, i));
+      int64 val1 = c->Value(c->Dim(s1, i));
+      if (val0 < 0 || val1 < 0 || val0 != val1) {
+        return false;
+      }
     }
   }
 
diff --git a/tensorflow/core/common_runtime/shape_refiner_test.cc b/tensorflow/core/common_runtime/shape_refiner_test.cc
index ff32e855d5..e4eef1dbe2 100644
--- a/tensorflow/core/common_runtime/shape_refiner_test.cc
+++ b/tensorflow/core/common_runtime/shape_refiner_test.cc
@@ -1161,11 +1161,13 @@ TEST_F(ShapeRefinerTest, SameDefinedShape) {
   auto s_unknown_2 = ctx->MakeShape({-1, 2});
   auto s_unknown_2_b = ctx->MakeShape({-1, 2});
 
-  EXPECT_TRUE(SameDefinedShape(ctx, unknown, unknown_b));
+  EXPECT_TRUE(SameDefinedShape(ctx, unknown, unknown));
+  EXPECT_FALSE(SameDefinedShape(ctx, unknown, unknown_b));
   EXPECT_FALSE(SameDefinedShape(ctx, unknown, s_1_2));
   EXPECT_TRUE(SameDefinedShape(ctx, s_1_2, s_1_2_b));
   EXPECT_FALSE(SameDefinedShape(ctx, s_1_2, s_2_2));
-  EXPECT_TRUE(SameDefinedShape(ctx, s_unknown_2, s_unknown_2_b));
+  EXPECT_TRUE(SameDefinedShape(ctx, s_unknown_2, s_unknown_2));
+  EXPECT_FALSE(SameDefinedShape(ctx, s_unknown_2, s_unknown_2_b));
 }
 
 TEST_F(ShapeRefinerTest, IsUpdatedShapesOrTypes) {
@@ -1178,14 +1180,15 @@ TEST_F(ShapeRefinerTest, IsUpdatedShapesOrTypes) {
   TF_ASSERT_OK(m.AddNode(test));
   shape_inference::InferenceContext* ctx = m.GetContext(test);
 
+  shape_inference::ShapeHandle unknown = ctx->UnknownShape();
   std::vector<shape_inference::ShapeAndType> t0{
       {ctx->MakeShape({1, 2, 3}), DT_FLOAT},
-      {ctx->UnknownShape(), DT_INVALID},
+      {unknown, DT_INVALID},
       {ctx->MakeShape({4, 3, 2, 1}), DT_INT32}};
 
   std::vector<shape_inference::ShapeAndType> t1{
       {ctx->MakeShape({1, 2, 3}), DT_FLOAT},
-      {ctx->UnknownShape(), DT_INVALID},
+      {unknown, DT_INVALID},
       {ctx->MakeShape({4, 3, 2, 1}), DT_INT32}};
 
   std::vector<shape_inference::ShapeAndType> t2{
@@ -1256,10 +1259,10 @@ TEST_F(ShapeRefinerTest, IncrementalUpdates) {
       0, std::vector<shape_inference::ShapeAndType>{{shp, DT_FLOAT}});
   refined = false;
   TF_ASSERT_OK(m.UpdateNode(dequeue, true /* relax */, &refined));
-  EXPECT_FALSE(refined);
+  EXPECT_TRUE(refined);
   ctx = m.GetContext(dequeue);
   EXPECT_EQ("[?,7]", ctx->DebugString(ctx->output(0)));
-  EXPECT_FALSE(SameHandle(ctx->Dim(ctx->output(0), 0), ctx->Dim(shp, 0)));
+  EXPECT_TRUE(SameHandle(ctx->Dim(ctx->output(0), 0), ctx->Dim(shp, 0)));
 
   // Inject a shape of the same handle and expect refined to not change.
   ctx = m.GetContext(queue);
diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index fe0742e1db..f30272e250 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -403,15 +403,28 @@ Status InferenceContext::WithValue(DimensionHandle dim, int64 value,
                                  existing);
 }
 
-void InferenceContext::Relax(DimensionHandle d0, DimensionHandle d1,
+void InferenceContext::Relax(DimensionHandle d_old, DimensionHandle d_new,
                              DimensionHandle* out) {
-  if (d0.SameHandle(d1)) {
-    *out = d0;
-  } else if (!ValueKnown(d0) || !ValueKnown(d1)) {
-    *out = UnknownDim();
-  } else if (Value(d0) == Value(d1)) {
-    *out = d0;
+  if (d_old.SameHandle(d_new)) {
+    *out = d_old;
+  } else if (!ValueKnown(d_old) && !ValueKnown(d_new)) {
+    // The node will be fed by the dimension d_new instead of d_old: any
+    // equality assertion between d_old and other input dimension on this node
+    // may not be true anymore, so forget them all.
+    ForgetMerges();
+    // Return the new shape handle to force the relaxation to propagate to the
+    // fanout of the context.
+    *out = d_new;
+  } else if (!ValueKnown(d_new)) {
+    ForgetMerges();
+    *out = d_new;
+  } else if (Value(d_old) == Value(d_new)) {
+    // Return the old shape handle. This will stop the relaxation in the fanout
+    // of the context.
+    *out = d_old;
   } else {
+    // Return a new handle that encodes a different unknown dim.
+    ForgetMerges();
     *out = UnknownDim();
   }
 }
@@ -463,45 +476,48 @@ Status InferenceContext::MergePrefix(ShapeHandle s, ShapeHandle prefix,
   return Status::OK();
 }
 
-void InferenceContext::Relax(ShapeHandle s0, ShapeHandle s1, ShapeHandle* out) {
-  if (s0.SameHandle(s1)) {
-    *out = s0;
+void InferenceContext::Relax(ShapeHandle s_old, ShapeHandle s_new,
+                             ShapeHandle* out) {
+  if (s_old.SameHandle(s_new)) {
+    *out = s_old;
     return;
-  } else if (!RankKnown(s0) || !RankKnown(s1)) {
-    *out = UnknownShape();
+  } else if (!RankKnown(s_new) || !s_old.IsSet()) {
+    ForgetMerges();
+    *out = s_new;
     return;
   }
 
-  const int32 rank = Rank(s0);
-  if (rank != Rank(s1)) {
+  const int32 rank = Rank(s_old);
+  if (rank != Rank(s_new)) {
+    ForgetMerges();
     *out = UnknownShape();
     return;
   }
 
-  bool return_s0 = true;
+  bool return_s_old = true;
   for (int i = 0; i < rank; ++i) {
-    auto d0 = Dim(s0, i);
-    auto d1 = Dim(s1, i);
+    auto d0 = Dim(s_old, i);
+    auto d1 = Dim(s_new, i);
     if (d0.SameHandle(d1)) continue;
 
     auto v0 = Value(d0);
     auto v1 = Value(d1);
     if (v0 == kUnknownDim || v1 == kUnknownDim || v0 != v1) {
-      return_s0 = false;
+      return_s_old = false;
       break;
     }
   }
-  if (return_s0) {
-    *out = s0;
+  if (return_s_old) {
+    *out = s_old;
     return;
   }
 
   // Relax dims.
   std::vector<DimensionHandle> dims(rank);
   for (int i = 0; i < rank; ++i) {
-    // Invariant for relax was checked earlier, so CHECK is ok.
-    Relax(Dim(s0, i), Dim(s1, i), &dims[i]);
+    Relax(Dim(s_old, i), Dim(s_new, i), &dims[i]);
   }
+  ForgetMerges();
   *out = MakeShape(dims);
 }
 
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index b12d37b4c0..4a4ef12635 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -62,7 +62,7 @@ class DimensionHandle {
  private:
   DimensionHandle(const Dimension* dim) { ptr_ = dim; }
 
-  const Dimension* operator->() { return ptr_; }
+  const Dimension* operator->() const { return ptr_; }
   bool IsSet() const { return ptr_ != nullptr; }
 
   const Dimension* ptr_ = nullptr;
@@ -104,7 +104,7 @@ class ShapeHandle {
 
  private:
   ShapeHandle(const Shape* shape) { ptr_ = shape; }
-  const Shape* operator->() { return ptr_; }
+  const Shape* operator->() const { return ptr_; }
   bool IsSet() const { return ptr_ != nullptr; }
 
   const Shape* ptr_ = nullptr;
@@ -678,14 +678,17 @@ class InferenceContext {
   // Adds additional context to the given status.
   Status AttachContext(const Status& status);
 
-  // Relaxes <d0> and <d1> and returns the relaxed dimension in <*out>. If <d0>
-  // and <d1> have incompatible values, returns an error.
+  // Relaxes an existing value <d_old> with a new value <d_new> and returns the
+  // relaxed dimension in <*out>. If <d_old> and <d_new> have incompatible
+  // values, returns an error.
   //
-  // Note that <*out> may be set to <d0> or <d1>.
-  void Relax(DimensionHandle d0, DimensionHandle d1, DimensionHandle* out);
-  // Relaxes <s0> and <s1> and returns the relaxed shape in <*out>. See
-  // 'RelaxInput' function for full details and examples.
-  void Relax(ShapeHandle s0, ShapeHandle s1, ShapeHandle* out);
+  // Note that <*out> may be set to <d_old> or <d_new>.
+  void Relax(DimensionHandle d_old, DimensionHandle d_new,
+             DimensionHandle* out);
+  // Relaxes an existing shape <s_old> with a new shape <s_new> and returns the
+  // relaxed shape in <*out>. See 'RelaxInput' function for full details and
+  // examples.
+  void Relax(ShapeHandle s_old, ShapeHandle s_new, ShapeHandle* out);
 
   // Used to implement MergeInputHandleShapesAndTypes and
   // MergeOutputHandleShapesAndTypes.
@@ -698,6 +701,12 @@ class InferenceContext {
       const std::vector<ShapeAndType>& shapes_and_types,
       std::vector<ShapeAndType>* to_update) TF_MUST_USE_RESULT;
 
+  // Forget all the previous merged shapes and dims.
+  void ForgetMerges() {
+    merged_shapes_.clear();
+    merged_dims_.clear();
+  }
+
   ShapeManager shape_manager_;
 
   // inputs_, outputs_, and input_tensors_as_shapes_ refer to values from
diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc
index d03cc8ce6d..68156e63ca 100644
--- a/tensorflow/core/framework/shape_inference_test.cc
+++ b/tensorflow/core/framework/shape_inference_test.cc
@@ -544,9 +544,10 @@ TEST_F(ShapeInferenceTest, RelaxDim) {
   auto d_unknown_b = c.Dim(c.input(0), 4);
   DimensionHandle out;
 
-  // Relaxing anything with unknown returns a new unknown.
+  // Relaxing anything with unknown returns a new unknown or the existing
+  // unknown.
   Relax(&c, d2, d_unknown, &out);
-  EXPECT_FALSE(SameHandle(d_unknown, out));
+  EXPECT_TRUE(SameHandle(d_unknown, out));
   EXPECT_FALSE(SameHandle(d_unknown_b, out));
   EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(out));
   Relax(&c, d_unknown, d2, &out);
@@ -554,7 +555,7 @@ TEST_F(ShapeInferenceTest, RelaxDim) {
   EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(out));
   Relax(&c, d_unknown, d_unknown_b, &out);
   EXPECT_FALSE(SameHandle(d_unknown, out));
-  EXPECT_FALSE(SameHandle(d_unknown_b, out));
+  EXPECT_TRUE(SameHandle(d_unknown_b, out));
   EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(out));
 
   // Relaxing with self returns self.
@@ -602,7 +603,7 @@ TEST_F(ShapeInferenceTest, RelaxShape) {
   EXPECT_EQ("?", c.DebugString(out));
   Relax(&c, s_unknown, s_unknown_b, &out);
   EXPECT_FALSE(SameHandle(s_unknown, out));
-  EXPECT_FALSE(SameHandle(s_unknown_b, out));
+  EXPECT_TRUE(SameHandle(s_unknown_b, out));
   EXPECT_EQ("?", c.DebugString(out));
 
   // Relaxing with self returns self.
@@ -623,7 +624,7 @@ TEST_F(ShapeInferenceTest, RelaxShape) {
   Relax(&c, s_u_2, s_1_u, &out);
   EXPECT_EQ("[?,?]", c.DebugString(out));
   EXPECT_FALSE(SameHandle(c.Dim(s_u_2, 0), c.Dim(out, 0)));
-  EXPECT_FALSE(SameHandle(c.Dim(s_1_u, 1), c.Dim(out, 1)));
+  EXPECT_TRUE(SameHandle(c.Dim(s_1_u, 1), c.Dim(out, 1)));
   auto s_u1 = c.UnknownShapeOfRank(1);
   auto s_u2 = c.UnknownShapeOfRank(1);
   Relax(&c, s_u1, s_u2, &out);
@@ -637,7 +638,7 @@ TEST_F(ShapeInferenceTest, RelaxShape) {
   EXPECT_EQ("[?,?]", c.DebugString(out));
   out = s_unknown;
   Relax(&c, s_1_3, s_u_2, &out);
-  EXPECT_FALSE(SameHandle(c.Dim(s_u_2, 0), c.Dim(out, 0)));
+  EXPECT_TRUE(SameHandle(c.Dim(s_u_2, 0), c.Dim(out, 0)));
   EXPECT_EQ("[?,?]", c.DebugString(out));
   out = s_unknown;
 
diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index d33d86df3a..46c6841023 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -249,106 +249,252 @@ typename DisjointSet<Handle>::Rep* DisjointSet<Handle>::Find(Handle value) {
   return root;
 }
 
-// If a Merge node has a NextIteration node as an input then that input will
-// try to forward an UnknownShape at graph construction time. However, the
-// Merge shape function will always propagate an UnknownShape if any of its
-// inputs are UnknownShapes. So we need to ignore the input from NextIteration
-// nodes to propagate any known shape from the Merge node.
-Status ShapeOfMergeNode(const Node* node, InferenceContext* c) {
-  ShapeHandle out = c->input(0);
-  if (!c->RankKnown(out)) {
-    out = c->UnknownShape();
-  } else {
-    int32 rank = c->Rank(out);
-    for (const Edge* e : node->in_edges()) {
-      if (e->src()->IsNextIteration() || e->dst_input() <= 0) {
-        continue;
-      }
-      ShapeHandle input = c->input(e->dst_input());
-      if (!c->RankKnown(input) || c->Rank(input) != rank) {
-        out = c->UnknownShape();
-        break;
-      }
+bool IsQueue(const Node& node) {
+  StringPiece type(node.type_string());
+  return type.ends_with("QueueV2");
+}
+
+// Returns true if the node is an Enter op AND its input is a Queue.
+bool IsEnterWithQueue(const Node& node) {
+  if (node.IsEnter()) {
+    const Node* in_node;
+    TF_CHECK_OK(node.input_node(0, &in_node));
+    return IsQueue(*in_node);
+  }
+  return false;
+}
+
+}  // namespace
+
+// Queue of nodes to process. Nodes can be enqueued in any order, but will be
+// dequeued in (roughly) topological order. Propagating shapes following a
+// topological ordering isn't required for correctness but helps speed things up
+// since it avoids processing the same node multiple times as its inputs
+// information is refined.
+class TopoQueue {
+ public:
+  void push(const Node* n) { queue_.insert(n); }
+  const Node* pop() {
+    CHECK(!empty());
+    auto it = queue_.begin();
+    const Node* n = *it;
+    queue_.erase(it);
+    return n;
+  }
+
+  bool empty() const { return queue_.empty(); }
 
+ private:
+  // Graph nodes are created in (roughly) topological order. Therefore we can
+  // use their id to ensure they're sorted topologically.
+  struct CompareNodes {
+    bool operator()(const Node* lhs, const Node* rhs) const {
+      return lhs->id() > rhs->id();
+    }
+  };
+  std::set<const Node*, CompareNodes> queue_;
+};
+
+// Merge and relax symbolic shapes.
+// Each symbolic shape or dimension is represented by a handle. Unlike the TF
+// shape refiner which creates new handles every time it processes an unknown
+// shape/dimension, the symbolic shape refiner assigns a specific handle to each
+// unknown shape/dimension of a given node.
+class SymbolicShapeRefiner {
+ public:
+  explicit SymbolicShapeRefiner(ShapeRefiner* shape_refiner)
+      : shape_refiner_(shape_refiner) {}
+
+  InferenceContext* GetContext(const Node* node) {
+    return shape_refiner_->GetContext(node);
+  }
+  Status UpdateNode(const Node* node, bool relax, bool* refined) {
+    return shape_refiner_->UpdateNode(node, relax, refined);
+  }
+  Status SetShape(const Node* node, int output_port,
+                  shape_inference::ShapeHandle shape) {
+    return shape_refiner_->SetShape(node, output_port, shape);
+  }
+
+  struct ShapeId {
+    const Node* node;
+    int port_id;
+    bool operator==(const ShapeId& other) const {
+      return node == other.node && port_id == other.port_id;
+    }
+  };
+  struct HashShapeId {
+    std::size_t operator()(const ShapeId& shp) const {
+      return std::hash<const Node*>{}(shp.node) + shp.port_id;
+    }
+  };
+
+  struct DimId {
+    const Node* node;
+    int port_id;
+    int dim_index;
+    bool operator==(const DimId& other) const {
+      return node == other.node && port_id == other.port_id &&
+             dim_index == other.dim_index;
+    }
+  };
+
+  struct HashDimId {
+    std::size_t operator()(const DimId& dim) const {
+      return std::hash<const Node*>{}(dim.node) + dim.port_id + dim.dim_index;
+    }
+  };
+
+  // Compute the shape of the tensors outputed by node 'node' at output port
+  // 'port_index' as the intersection of shape1 and shape2.
+  ShapeHandle OutputAsIntersection(const Node* node, int port_index,
+                                   ShapeHandle shape1, ShapeHandle shape2) {
+    if (shape1.SameHandle(shape2)) {
+      return shape1;
+    }
+    InferenceContext* ctx = shape_refiner_->GetContext(node);
+    ShapeHandle merged = shape1;
+    if (!ctx->RankKnown(shape2) && !ctx->RankKnown(shape1)) {
+      // Return either one since they're expected to represent the same value.
+      return shape1;
+    } else if (!ctx->RankKnown(shape2) && ctx->RankKnown(shape1)) {
+      return shape1;
+    } else if (ctx->RankKnown(shape2) && !ctx->RankKnown(shape1)) {
+      return shape2;
+    } else {
+      const int rank = ctx->Rank(shape1);
+      if (ctx->Rank(shape2) != rank) {
+        // We detected an inconsistency, return an unknown shape. This can
+        // happen in the fanout of a merge node since during the initial
+        // propagation we optimistically assume that all the inputs to the merge
+        // node have the same shape.
+        return GetUnknownOutputShape(node, port_index);
+      }
       for (int d = 0; d < rank; ++d) {
-        if (c->Value(c->Dim(input, d)) != c->Value(c->Dim(out, d))) {
-          TF_RETURN_IF_ERROR(c->ReplaceDim(out, d, c->UnknownDim(), &out));
+        if (!ctx->Dim(shape1, d).SameHandle(ctx->Dim(shape2, d))) {
+          if (ctx->Value(ctx->Dim(shape1, d)) !=
+              ctx->Value(ctx->Dim(shape2, d))) {
+            DimensionHandle new_dim;
+            if (ctx->Value(ctx->Dim(shape1, d)) < 0) {
+              new_dim = ctx->Dim(shape2, d);
+            } else if (ctx->Value(ctx->Dim(shape2, d)) < 0) {
+              new_dim = ctx->Dim(shape1, d);
+            } else {
+              new_dim = GetUnknownOutputDim(node, port_index, d);
+            }
+            TF_CHECK_OK(ctx->ReplaceDim(merged, d, new_dim, &merged));
+          }
         }
       }
     }
+    return merged;
   }
-  c->set_output(0, out);
-  c->set_output(1, c->Scalar());
-  return Status::OK();
-}
 
-// Manually propagate the input shape for Enter nodes and update any Merge node
-// outputs.
-Status UpdateEnter(ShapeRefiner* shape_refiner, const Node* node, bool relax,
-                   std::queue<const Node*>* new_shapes) {
-  auto enter_ctx = shape_refiner->GetContext(node);
-  CHECK_NE(enter_ctx, nullptr);
-  for (int i = 0; i < enter_ctx->num_outputs(); i++) {
-    TF_RETURN_IF_ERROR(shape_refiner->SetShape(node, i, enter_ctx->input(0)));
-  }
-  for (const Edge* e : node->out_edges()) {
-    Node* dst = e->dst();
-    if (dst->IsMerge()) {
-      bool updated = false;
-      TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(dst, relax, &updated));
-      if (!updated) {
-        continue;
+  // Compute the shape of the tensors outputed by node 'node' at output port
+  // 'port_index' as the union of shape1 and shape2.
+  ShapeHandle OutputAsUnion(const Node* node, int port_index,
+                            ShapeHandle shape1, ShapeHandle shape2) {
+    if (shape1.SameHandle(shape2)) {
+      return shape1;
+    }
+    InferenceContext* ctx = shape_refiner_->GetContext(node);
+    ShapeHandle relaxed = shape1;
+    const int rank = ctx->Rank(shape1);
+    if (!ctx->RankKnown(shape2) || ctx->Rank(shape2) != rank) {
+      relaxed = GetUnknownOutputShape(node, port_index);
+    } else {
+      for (int d = 0; d < rank; ++d) {
+        if (!ctx->Dim(shape1, d).SameHandle(ctx->Dim(shape2, d))) {
+          int64 val1 = ctx->Value(ctx->Dim(shape1, d));
+          int64 val2 = ctx->Value(ctx->Dim(shape2, d));
+          if (val1 != val2 || (val1 < 0 && val2 < 0)) {
+            DimensionHandle new_dim = GetUnknownOutputDim(node, port_index, d);
+            TF_CHECK_OK(ctx->ReplaceDim(relaxed, d, new_dim, &relaxed));
+          }
+        }
       }
-      InferenceContext* merge_ctx = shape_refiner->GetContext(dst);
-      CHECK_NE(merge_ctx, nullptr);
-      TF_RETURN_IF_ERROR(ShapeOfMergeNode(dst, merge_ctx));
-      new_shapes->push(dst);
     }
+    return relaxed;
   }
-  return Status::OK();
-}
 
-// Propagates the shapes in the transitive fan-out of <new_shapes>.
-Status PropagateShapes(ShapeRefiner* shape_refiner, bool relax,
-                       std::queue<const Node*>* new_shapes) {
-  while (!new_shapes->empty()) {
-    const Node* n = new_shapes->front();
-    new_shapes->pop();
-    for (const Node* fanout : n->out_nodes()) {
-      bool updated = false;
-      TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(fanout, relax, &updated));
-      if (fanout->IsEnter()) {
-        TF_RETURN_IF_ERROR(
-            UpdateEnter(shape_refiner, fanout, relax, new_shapes));
-      } else if (updated) {
-        // We want to avoid propagating through loops on the merge pass because
-        // the shapes are not guaranteed to converge.
-        if (!relax && fanout->IsNextIteration()) {
+  bool EquivalentShapes(ShapeHandle s1, ShapeHandle s2) const {
+    if (s1.SameHandle(s2)) {
+      return true;
+    }
+    if (InferenceContext::Rank(s1) != InferenceContext::Rank(s2)) {
+      return false;
+    }
+    if (!InferenceContext::RankKnown(s1) && !InferenceContext::RankKnown(s2)) {
+      return true;
+    }
+    const int rank = InferenceContext::Rank(s1);
+    for (int i = 0; i < rank; ++i) {
+      if (!InferenceContext::DimKnownRank(s1, i).SameHandle(
+              InferenceContext::DimKnownRank(s2, i))) {
+        int64 val1 =
+            InferenceContext::Value(InferenceContext::DimKnownRank(s1, i));
+        int64 val2 =
+            InferenceContext::Value(InferenceContext::DimKnownRank(s2, i));
+        if (val1 >= 0 && val2 >= 0 && val1 == val2) {
           continue;
         }
-        new_shapes->push(fanout);
+        return false;
       }
     }
+    return true;
   }
-  return Status::OK();
-}
 
-bool IsQueue(const Node& node) {
-  StringPiece type(node.type_string());
-  return type.ends_with("QueueV2");
-}
+  bool EquivalentShapesAndTypes(const std::vector<ShapeAndType>& st1,
+                                const std::vector<ShapeAndType>& st2) const {
+    if (st1.size() != st2.size()) {
+      return false;
+    }
+    for (int i = 0; i < st1.size(); ++i) {
+      const ShapeAndType& s1 = st1[i];
+      const ShapeAndType& s2 = st2[i];
+      if (s1.dtype != s2.dtype) {
+        return false;
+      }
+      if (!EquivalentShapes(s1.shape, s2.shape)) {
+        return false;
+      }
+    }
+    return true;
+  }
 
-// Returns true if the node is an Enter op AND its input is a Queue.
-bool IsEnterWithQueue(const Node& node) {
-  if (node.IsEnter()) {
-    const Node* in_node;
-    TF_CHECK_OK(node.input_node(0, &in_node));
-    return IsQueue(*in_node);
+ private:
+  // Return the one ShapeHandle used to denote a fully unknown shape for a node
+  // output.
+  ShapeHandle GetUnknownOutputShape(const Node* node, int index) {
+    ShapeId id{node, index};
+    auto it = unknown_shapes_.find(id);
+    if (it != unknown_shapes_.end()) {
+      return it->second;
+    }
+    InferenceContext* c = shape_refiner_->GetContext(node);
+    ShapeHandle shp = c->UnknownShape();
+    unknown_shapes_[id] = shp;
+    return shp;
+  }
+  // Return the one ShapeHandle used to denote a fully unknown dimension for a
+  // node output.
+  DimensionHandle GetUnknownOutputDim(const Node* node, int index, int dim_id) {
+    DimId id{node, index, dim_id};
+    auto it = unknown_dims_.find(id);
+    if (it != unknown_dims_.end()) {
+      return it->second;
+    }
+    InferenceContext* c = shape_refiner_->GetContext(node);
+    DimensionHandle dim = c->UnknownDim();
+    unknown_dims_[id] = dim;
+    return dim;
   }
-  return false;
-}
 
-}  // namespace
+  ShapeRefiner* shape_refiner_;
+
+  std::unordered_map<ShapeId, ShapeHandle, HashShapeId> unknown_shapes_;
+  std::unordered_map<DimId, DimensionHandle, HashDimId> unknown_dims_;
+};
 
 // Keep track of shapes and dimensions in a graph.
 // In particular, use disjoint sets to track equivalence between shapes and
@@ -401,24 +547,9 @@ class SymbolicShapeManager {
   DisjointSet<shape_inference::DimensionHandle> dims_;
 };
 
-void GraphProperties::Relax(InferenceContext* c, ShapeHandle s0, ShapeHandle s1,
-                            ShapeHandle* out) {
-  c->Relax(s0, s1, out);
-}
-
-bool GraphProperties::SameDefinedShape(InferenceContext* c, ShapeHandle s0,
-                                       ShapeHandle s1) {
-  return ShapeRefiner::SameDefinedShape(c, s0, s1);
-}
-
-bool GraphProperties::IsUpdatedShapesOrTypes(
-    InferenceContext* c, const std::vector<ShapeAndType>& existing,
-    const std::vector<ShapeAndType>& updated) {
-  return ShapeRefiner::IsUpdatedShapesOrTypes(c, existing, updated);
-}
-
 Status GraphProperties::MergeEnqueueShapesAndTypes(
-    const std::vector<ShapeAndType>& shapes_and_types, InferenceContext* qctx,
+    SymbolicShapeRefiner* shape_refiner, const Node* qnode,
+    const std::vector<ShapeAndType>& shapes_and_types,
     std::vector<ShapeAndType>* queue_shapes_and_types) {
   if (shapes_and_types.size() != queue_shapes_and_types->size()) {
     return errors::InvalidArgument(
@@ -434,13 +565,14 @@ Status GraphProperties::MergeEnqueueShapesAndTypes(
                                      DataTypeString(b.dtype));
     }
 
-    TF_RETURN_IF_ERROR(qctx->Merge(a.shape, b.shape, &b.shape));
+    b.shape = shape_refiner->OutputAsIntersection(qnode, i, a.shape, b.shape);
   }
   return Status::OK();
 }
 
 Status GraphProperties::RelaxEnqueueShapesAndMergeTypes(
-    const std::vector<ShapeAndType>& shapes_and_types, InferenceContext* qctx,
+    SymbolicShapeRefiner* shape_refiner, const Node* qnode,
+    const std::vector<ShapeAndType>& shapes_and_types,
     std::vector<ShapeAndType>* queue_shapes_and_types) {
   if (shapes_and_types.size() != queue_shapes_and_types->size()) {
     return errors::InvalidArgument(
@@ -456,11 +588,197 @@ Status GraphProperties::RelaxEnqueueShapesAndMergeTypes(
                                      DataTypeString(b.dtype));
     }
 
-    Relax(qctx, a.shape, b.shape, &b.shape);
+    b.shape = shape_refiner->OutputAsUnion(qnode, i, a.shape, b.shape);
   }
   return Status::OK();
 }
 
+// If a Merge node has a NextIteration node as an input then that input will
+// try to forward an UnknownShape at graph construction time. However, the
+// Merge shape function will always propagate an UnknownShape if any of its
+// inputs are UnknownShapes. So we need to ignore the input from NextIteration
+// nodes to propagate any known shape from the Merge node.
+Status GraphProperties::UpdateMergeNode(SymbolicShapeRefiner* shape_refiner,
+                                        const Node* node, bool relax,
+                                        TopoQueue* new_shapes) {
+  InferenceContext* c = shape_refiner->GetContext(node);
+  CHECK_NE(c, nullptr);
+
+  ShapeHandle out;
+  bool out_initialized = false;
+  for (const Edge* e : node->in_edges()) {
+    if (e->IsControlEdge()) {
+      continue;
+    }
+    // Skip back edges during the initial propagation phase. This is equivalent
+    // to assuming that all the inputs to the merge nodes are fed by the same
+    // shape, and will be corrected as needed in the relaxation phase.
+    if (!relax && e->src()->IsNextIteration()) {
+      continue;
+    }
+
+    InferenceContext* in = shape_refiner->GetContext(e->src());
+    ShapeHandle input = in->output(e->src_output());
+    if (relax) {
+      c->RelaxInput(e->dst_input(), input);
+    } else {
+      c->MergeInput(e->dst_input(), input);
+    }
+    if (!out_initialized) {
+      out_initialized = true;
+      out = input;
+      continue;
+    }
+    if (relax) {
+      out = shape_refiner->OutputAsUnion(node, 0, input, out);
+    } else {
+      out = shape_refiner->OutputAsIntersection(node, 0, input, out);
+    }
+  }
+
+  if (!shape_refiner->EquivalentShapes(out, c->output(0))) {
+    c->set_output(0, out);
+    c->set_output(1, c->Scalar());
+    new_shapes->push(node);
+  }
+
+  return Status::OK();
+}
+
+// Manually propagate the input shape for Enter nodes and update any Merge node
+// outputs.
+Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner,
+                                    const Node* node, bool relax,
+                                    TopoQueue* new_shapes) {
+  auto enter_ctx = shape_refiner->GetContext(node);
+  CHECK_NE(enter_ctx, nullptr);
+
+  for (const Edge* e : node->in_edges()) {
+    if (e->IsControlEdge()) {
+      continue;
+    }
+    InferenceContext* in = shape_refiner->GetContext(e->src());
+    ShapeHandle input = in->output(e->src_output());
+    if (!enter_ctx->output(0).SameHandle(input)) {
+      if (relax) {
+        enter_ctx->RelaxInput(0, input);
+      } else {
+        enter_ctx->MergeInput(0, input);
+      }
+      enter_ctx->set_output(0, input);
+      new_shapes->push(node);
+    }
+  }
+  return Status::OK();
+}
+
+Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner,
+                                     bool relax, const Node* n,
+                                     TopoQueue* new_shapes) {
+  if (n->IsEnter()) {
+    // The Enter shape function always forwards an UnknownShape, so do the right
+    // thing here.
+    TF_RETURN_IF_ERROR(UpdateEnter(shape_refiner, n, relax, new_shapes));
+  } else if (n->IsMerge()) {
+    // Properly handle merge nodes.
+    TF_RETURN_IF_ERROR(UpdateMergeNode(shape_refiner, n, relax, new_shapes));
+  } else {
+    // Rely on regular TF shape refinement for all the other nodes.
+    bool updated = false;
+    TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(n, relax, &updated));
+    if (updated) {
+      // We want to avoid propagating through loops on the merge pass because
+      // the shapes are not guaranteed to converge.
+      if (relax || !n->IsNextIteration()) {
+        new_shapes->push(n);
+      }
+    }
+  }
+  return Status::OK();
+}
+
+// Propagates the shapes in the transitive fan-out of <new_shapes>.
+Status GraphProperties::PropagateShapes(
+    SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes,
+    const std::unordered_map<const Node*, std::unordered_set<const Node*>>&
+        resources) {
+  do {
+    while (!new_shapes->empty()) {
+      const Node* n = new_shapes->pop();
+      for (const Node* fanout : n->out_nodes()) {
+        TF_RETURN_IF_ERROR(
+            UpdateShapes(shape_refiner, relax, fanout, new_shapes));
+      }
+    }
+
+    for (const auto& resource : resources) {
+      // Resources need special handling: since the enqueue nodes are in the
+      // fanout of the queues, we need to manually propagate the shapes from
+      // enqueue node to the corresponding queue.
+      TF_RETURN_IF_ERROR(UpdateResource(resource.first, resource.second,
+                                        shape_refiner, relax, new_shapes));
+    }
+  } while (!new_shapes->empty());
+
+  return Status::OK();
+}
+
+Status GraphProperties::UpdateResource(
+    const Node* qnode, const std::unordered_set<const Node*>& queue_inputs,
+    SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes) {
+  // Proceed only if qnode is a queue or an Enter with queue input.
+  if (!IsQueue(*qnode) && !IsEnterWithQueue(*qnode)) {
+    return Status::OK();
+  }
+  auto qctx = shape_refiner->GetContext(qnode);
+  if (!qctx) {
+    return Status::OK();
+  }
+  auto* queue_handle_data = qctx->output_handle_shapes_and_types(0);
+
+  // Merge all inputs into the enqueue node, regardless of which phase we
+  // are in.
+  std::vector<ShapeAndType> queue_shapes_and_types;
+  if (queue_handle_data) {
+    queue_shapes_and_types = *queue_handle_data;
+  }
+  for (const auto& node : queue_inputs) {
+    auto ctx = shape_refiner->GetContext(node);
+    if (!ctx) {
+      continue;
+    }
+    // TODO(bsteiner): handle EnqueueMany as well.
+    if (node->type_string().find("Enqueue") != std::string::npos &&
+        node->type_string().find("EnqueueMany") == std::string::npos) {
+      std::vector<ShapeAndType> shapes_and_types;
+      for (int i = 1; i < ctx->num_inputs(); ++i) {
+        shapes_and_types.push_back({ctx->input(i), node->input_type(i)});
+      }
+      if (queue_shapes_and_types.empty()) {
+        queue_shapes_and_types = shapes_and_types;
+      } else {
+        if (relax) {
+          TF_RETURN_IF_ERROR(RelaxEnqueueShapesAndMergeTypes(
+              shape_refiner, qnode, shapes_and_types, &queue_shapes_and_types));
+        } else {
+          TF_RETURN_IF_ERROR(MergeEnqueueShapesAndTypes(
+              shape_refiner, qnode, shapes_and_types, &queue_shapes_and_types));
+        }
+      }
+    }
+  }
+
+  if (queue_handle_data == nullptr ||
+      !shape_refiner->EquivalentShapesAndTypes(*queue_handle_data,
+                                               queue_shapes_and_types)) {
+    qctx->set_output_handle_shapes_and_types(0, queue_shapes_and_types);
+
+    new_shapes->push(qnode);
+  }
+
+  return Status::OK();
+}
+
 Status GraphProperties::InferStatically() {
   Graph graph(OpRegistry::Global());
   FunctionLibraryDefinition function_library(graph.op_registry(),
@@ -493,146 +811,35 @@ Status GraphProperties::InferStatically() {
     }
     if (node->IsEnter()) {
       enter_nodes.insert(node);
-    } else if (node->IsNextIteration()) {
-      for (const Node* output : node->out_nodes()) {
-        if (output->IsMerge()) {
-          merge_nodes.insert(output);
-        }
-      }
+    } else if (node->IsMerge()) {
+      merge_nodes.insert(node);
     }
   }
 
-  // Propagate the initial shapes of Enter nodes manually (the Enter shape
-  // function always forwards an UnknownShape).
-  std::queue<const Node*> new_shapes;
-  for (const Node* node : enter_nodes) {
-    TF_RETURN_IF_ERROR(
-        UpdateEnter(&shape_refiner, node, false /* relax */, &new_shapes));
-  }
-  TF_RETURN_IF_ERROR(
-      PropagateShapes(&shape_refiner, false /* relax */, &new_shapes));
+  SymbolicShapeRefiner refiner(&shape_refiner);
 
   // We propagate shapes through the graph in two phases. In the first phase, we
-  // exclusively merge shapes but we do not propagate shapes through loops. Then
-  // on the second phase, we exclusively relax shapes and propagate shapes
-  // through loops until reaching fixed point.
+  // exclusively merge shapes but we do not propagate shapes through the
+  // backedge of loops (i.e. the NextIteration node). Then on the second phase,
+  // we exclusively relax shapes and propagate shapes through loops until
+  // reaching fixed point.
   for (int relax = 0; relax < 2; relax++) {
-    // We don't update Merge nodes with the input of NextIteration nodes on the
-    // merge pass. So we do that at the beginning of the relax pass instead.
-    if (relax) {
-      bool updated = false;
-      for (const Node* node : merge_nodes) {
-        TF_RETURN_IF_ERROR(
-            shape_refiner.UpdateNode(node, false /* relax */, &updated));
-      }
+    TopoQueue new_shapes;
+    // Force the propagation of shapes of Enter nodes manually (the Enter shape
+    // function always forwards an UnknownShape).
+    for (const Node* node : enter_nodes) {
+      TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes));
     }
-
-    bool done = true;
-    do {
-      if (relax) {
-        // Propagate shapes through any loops in the graph by relaxing.
-        for (const Node* node : merge_nodes) {
-          new_shapes.push(node);
-        }
-        TF_RETURN_IF_ERROR(PropagateShapes(&shape_refiner, relax, &new_shapes));
-      }
-
-      // If we found a resource, try to propagate the shapes through it.
-      new_shapes = std::queue<const Node*>();
-      for (const auto& resource_data : resources) {
-        const Node* qnode = resource_data.first;
-        // Proceed only if qnode is a queue or an Enter with queue input.
-        if (!IsQueue(*qnode) && !IsEnterWithQueue(*qnode)) {
-          continue;
-        }
-        auto qctx = shape_refiner.GetContext(qnode);
-        if (!qctx) {
-          continue;
-        }
-
-        // Check to see if the shape is fully defined.
-        auto* queue_handle_data = qctx->output_handle_shapes_and_types(0);
-        if (queue_handle_data != nullptr) {
-          bool fully_defined = true;
-          for (const auto& shape_and_type : *queue_handle_data) {
-            if (!qctx->FullyDefined(shape_and_type.shape) ||
-                shape_and_type.dtype == DT_INVALID) {
-              fully_defined = false;
-            }
-          }
-          // If we are merging, then we are done. If we are relaxing, then we
-          // could potentially propagate a less specific shape.
-          if (fully_defined && !relax) {
-            continue;
-          }
-        }
-
-        // Merge all inputs into the enqueue node, regardless of which phase we
-        // are in.
-        std::vector<ShapeAndType> queue_shapes_and_types;
-        for (const auto& node : resource_data.second) {
-          auto ctx = shape_refiner.GetContext(node);
-          if (!ctx) {
-            continue;
-          }
-          // TODO(bsteiner): handle EnqueueMany as well.
-          if (node->type_string().find("Enqueue") != std::string::npos &&
-              node->type_string().find("EnqueueMany") == std::string::npos) {
-            std::vector<ShapeAndType> shapes_and_types;
-            for (int i = 1; i < ctx->num_inputs(); ++i) {
-              shapes_and_types.push_back({ctx->input(i), node->input_type(i)});
-            }
-
-            if (queue_shapes_and_types.empty()) {
-              queue_shapes_and_types = shapes_and_types;
-            } else {
-              TF_RETURN_IF_ERROR(MergeEnqueueShapesAndTypes(
-                  shapes_and_types, qctx, &queue_shapes_and_types));
-            }
-          }
-        }
-        // Combine the input shapes with the existing output shape. We either
-        // merge or relax depending on which phase we are in.
-        if (queue_handle_data != nullptr) {
-          if (relax) {
-            TF_RETURN_IF_ERROR(RelaxEnqueueShapesAndMergeTypes(
-                *queue_handle_data, qctx, &queue_shapes_and_types));
-          } else {
-            TF_RETURN_IF_ERROR(MergeEnqueueShapesAndTypes(
-                *queue_handle_data, qctx, &queue_shapes_and_types));
-          }
-        }
-        // Set the output ShapeAndType handles. If we successfully update the
-        // resource node, add its fan-out to the queue.
-        const std::vector<ShapeAndType>* outputs =
-            qctx->output_handle_shapes_and_types(0);
-        std::vector<ShapeAndType> existing_outputs;
-        if (outputs) {
-          existing_outputs = *outputs;
-        }
-        if (!queue_shapes_and_types.empty()) {
-          if (!relax && qctx->MergeOutputHandleShapesAndTypes(
-                            0, queue_shapes_and_types)) {
-            new_shapes.push(qnode);
-          } else if (relax && qctx->RelaxOutputHandleShapesAndMergeTypes(
-                                  0, queue_shapes_and_types)) {
-            if (IsUpdatedShapesOrTypes(
-                    qctx, existing_outputs,
-                    *qctx->output_handle_shapes_and_types(0))) {
-              new_shapes.push(qnode);
-            }
-          }
-        }
-      }
-      // Propagate the shapes in the transitive fan-out of the queue.
-      done = new_shapes.empty();
-      if (!done) {
-        TF_RETURN_IF_ERROR(PropagateShapes(&shape_refiner, relax, &new_shapes));
-      }
-    } while (!done);
+    // Seed the propagation of shapes through merge nodes.
+    for (const Node* node : merge_nodes) {
+      TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes));
+    }
+    // Propagate shapes normally.
+    TF_RETURN_IF_ERROR(
+        PropagateShapes(&refiner, relax, &new_shapes, resources));
   }
 
-  // Track shapes globally accross the graph.
+  // Track shapes globally across the graph.
   SymbolicShapeManager shape_manager;
   bool found_error = false;
   for (const Node* const node : graph.nodes()) {
diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index a6aed0bba6..37c8654541 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -26,6 +26,9 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
+class SymbolicShapeRefiner;
+class TopoQueue;
+
 // A TensorFlow model to optimize.
 // Models are represented by the combination of a graph, one of more fetch
 // nodes, and potentially a set of nodes to feed.
@@ -64,31 +67,42 @@ class GraphProperties {
 
   // Merges shapes <shapes_and_types>, determined from an EnqueueV2 node, into
   // <*queue_shapes_and_types>.
-  Status MergeEnqueueShapesAndTypes(
+  static Status MergeEnqueueShapesAndTypes(
+      SymbolicShapeRefiner* shape_refiner, const Node* qnode,
       const std::vector<shape_inference::ShapeAndType>& shapes_and_types,
-      shape_inference::InferenceContext* qctx,
       std::vector<shape_inference::ShapeAndType>* queue_shapes_and_types);
   // Relaxes shapes <shapes_and_types>, determined from an EnqueueV2 node, into
   // <*queue_shapes_and_types>.
-  Status RelaxEnqueueShapesAndMergeTypes(
+  static Status RelaxEnqueueShapesAndMergeTypes(
+      SymbolicShapeRefiner* shape_refiner, const Node* qnode,
       const std::vector<shape_inference::ShapeAndType>& shapes_and_types,
-      shape_inference::InferenceContext* qctx,
       std::vector<shape_inference::ShapeAndType>* queue_shapes_and_types);
 
-  // This gives access to private function of InferenceContext.
-  static void Relax(shape_inference::InferenceContext* c,
-                    shape_inference::ShapeHandle s0,
-                    shape_inference::ShapeHandle s1,
-                    shape_inference::ShapeHandle* out);
-
-  // These give access to private functions of ShapeRefiner.
-  static bool SameDefinedShape(shape_inference::InferenceContext* c,
-                               shape_inference::ShapeHandle s0,
-                               shape_inference::ShapeHandle s1);
-  static bool IsUpdatedShapesOrTypes(
-      shape_inference::InferenceContext* c,
-      const std::vector<shape_inference::ShapeAndType>& existing,
-      const std::vector<shape_inference::ShapeAndType>& updated);
+  // Update the shapes for qnode. If output shapes of qnode have changed,
+  // enqueue its fanout in 'new_shapes'.
+  static Status UpdateResource(
+      const Node* qnode, const std::unordered_set<const Node*>& queue_inputs,
+      SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes);
+
+  // Update the output shapes of a Merge node, and enqueue its fanout in
+  // new_shapes if needed.
+  static Status UpdateMergeNode(SymbolicShapeRefiner* shape_refiner,
+                                const Node* node, bool relax,
+                                TopoQueue* new_shapes);
+  // Process the Enter node, and enqueue its fanout in new_shapes if needed.
+  static Status UpdateEnter(SymbolicShapeRefiner* shape_refiner,
+                            const Node* node, bool relax,
+                            TopoQueue* new_shapes);
+  // Update the shapes for node 'n'. If output shapes for n have changed,
+  // enqueue its fanout in 'new_shapes'.
+  static Status UpdateShapes(SymbolicShapeRefiner* shape_refiner, bool relax,
+                             const Node* n, TopoQueue* new_shapes);
+  // Propagate the shapes for the nodes enqueued in new_shapes and their
+  // transitive fanout until a fixed point is reached.
+  static Status PropagateShapes(
+      SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes,
+      const std::unordered_map<const Node*, std::unordered_set<const Node*>>&
+          resources);
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index f785f627e1..74d48158a9 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -362,7 +362,7 @@ TEST_F(GraphPropertiesTest, WhileLoop) {
   /*
      with tf.Graph().as_default():
        i0 = tf.constant(0)
-       m0 = tf.ones([2, 2])
+       m0 = tf.placeholder([-1, 2])
        c = lambda i, m: i < 10
        b = lambda i, m: [i+1, tf.concat([m, m], axis=0)]
        r = tf.while_loop(
@@ -387,6 +387,14 @@ TEST_F(GraphPropertiesTest, WhileLoop) {
     EXPECT_EQ(DT_FLOAT, prop.dtype());
     EXPECT_EQ("float: [-1,2]", PropToString(prop));
   }
+
+  // The loop outputs batch dim should be different from the input batch dim
+  // since we concatenated along the batch dim.
+  auto shape_in = properties.GetOutputProperties("ones").at(0).shape();
+  auto shape_out = properties.GetOutputProperties("while/Exit_1").at(0).shape();
+  EXPECT_GE(-2, shape_in.dim(0).size());
+  EXPECT_GE(-2, shape_out.dim(0).size());
+  EXPECT_NE(shape_in.dim(0).size(), shape_out.dim(0).size());
 }
 
 TEST_F(GraphPropertiesTest, NestedLoop) {
@@ -750,6 +758,10 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) {
   Output e = ops::Add(s.WithOpName("e"), c, d);
   Output f = ops::Add(s.WithOpName("f"), a, c);
 
+  Output zero = ops::Const(s.WithOpName("zero"), 0.0f, {});
+  Output g = ops::Shape(s.WithOpName("g"), c);
+  Output h = ops::Fill(s.WithOpName("h"), g, zero);
+
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
@@ -773,15 +785,20 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) {
   EXPECT_EQ(shape_b.dim(0).size(), shape_d.dim(0).size());
 
   const auto shape_e = properties.GetOutputProperties("e").at(0).shape();
-  EXPECT_EQ(2, shape_e.dim_size());
+  ASSERT_EQ(2, shape_e.dim_size());
   EXPECT_EQ(shape_e.dim(0).size(), shape_c.dim(0).size());
   EXPECT_NE(shape_e.dim(1).size(), shape_c.dim(1).size());
   EXPECT_NE(shape_e.dim(0).size(), shape_d.dim(0).size());
 
   const auto shape_f = properties.GetOutputProperties("f").at(0).shape();
-  EXPECT_EQ(2, shape_f.dim_size());
+  ASSERT_EQ(2, shape_f.dim_size());
   EXPECT_EQ(shape_f.dim(0).size(), shape_a.dim(0).size());
   EXPECT_EQ(shape_f.dim(1).size(), shape_a.dim(1).size());
+
+  const auto shape_h = properties.GetOutputProperties("h").at(0).shape();
+  ASSERT_EQ(2, shape_f.dim_size());
+  EXPECT_EQ(shape_h.dim(0).size(), shape_c.dim(0).size());
+  EXPECT_EQ(shape_h.dim(1).size(), shape_c.dim(1).size());
 }
 
 TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) {
diff --git a/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt b/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt
index c11833bd1a..fbc3659d9a 100644
--- a/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt
+++ b/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt
@@ -21,7 +21,7 @@ node {
 }
 node {
   name: "ones"
-  op: "Const"
+  op: "PlaceholderV2"
   attr {
     key: "dtype"
     value {
@@ -29,19 +29,15 @@ node {
     }
   }
   attr {
-    key: "value"
+    key: "shape"
     value {
-      tensor {
-        dtype: DT_FLOAT
-        tensor_shape {
-          dim {
-            size: 2
-          }
-          dim {
-            size: 2
-          }
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 2
         }
-        float_val: 1.0
       }
     }
   }
-- 
GitLab


From 98ef53d5541049655c9160130595253fdefd4590 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Fri, 17 Nov 2017 12:49:48 -0800
Subject: [PATCH 0064/1225] Remove `tf.data.SparseType` and instead use
 `tf.data.Dataset.output_classes` as the means for recording the class type of
 the dataset elements.

PiperOrigin-RevId: 176147440
---
 .../contrib/data/python/ops/batching.py       |  40 ++-
 .../contrib/data/python/ops/dataset_ops.py    |   4 +
 .../contrib/data/python/ops/error_ops.py      |   9 +-
 .../contrib/data/python/ops/grouping.py       |  36 +-
 .../contrib/data/python/ops/interleave_ops.py |  22 +-
 tensorflow/contrib/data/python/ops/readers.py |   4 +
 .../contrib/data/python/ops/scan_ops.py       |  23 +-
 tensorflow/python/data/__init__.py            |   2 -
 tensorflow/python/data/ops/dataset_ops.py     | 254 ++++++++++----
 tensorflow/python/data/ops/iterator_ops.py    |  93 +++--
 tensorflow/python/data/ops/readers.py         |  12 +
 tensorflow/python/data/util/BUILD             |   3 +
 tensorflow/python/data/util/sparse.py         | 150 ++++-----
 tensorflow/python/data/util/sparse_test.py    | 318 ++++++++++++++----
 tensorflow/python/kernel_tests/BUILD          |  17 +-
 tensorflow/python/ops/sparse_ops.py           |   2 +-
 .../api/golden/tensorflow.data.-dataset.pbtxt |   4 +
 ...ow.data.-fixed-length-record-dataset.pbtxt |   4 +
 .../golden/tensorflow.data.-iterator.pbtxt    |  10 +-
 .../golden/tensorflow.data.-sparse-type.pbtxt |  13 -
 .../tensorflow.data.-t-f-record-dataset.pbtxt |   4 +
 .../tensorflow.data.-text-line-dataset.pbtxt  |   4 +
 .../tools/api/golden/tensorflow.data.pbtxt    |   4 -
 23 files changed, 728 insertions(+), 304 deletions(-)
 delete mode 100644 tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt

diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py
index cc63baed81..1ac059b374 100644
--- a/tensorflow/contrib/data/python/ops/batching.py
+++ b/tensorflow/contrib/data/python/ops/batching.py
@@ -112,8 +112,10 @@ def filter_irregular_batches(batch_size):
     tensor_batch_size = ops.convert_to_tensor(
         batch_size, dtype=dtypes.int64, name="batch_size")
 
-    flattened = _RestructuredDataset(dataset,
-                                     tuple(nest.flatten(dataset.output_types)))
+    flattened = _RestructuredDataset(
+        dataset,
+        tuple(nest.flatten(dataset.output_types)),
+        output_classes=tuple(nest.flatten(dataset.output_classes)))
 
     def _predicate(*xs):
       """Return `True` if this element is a full batch."""
@@ -135,7 +137,11 @@ def filter_irregular_batches(batch_size):
 
     known_shapes = nest.map_structure(_set_first_dimension,
                                       dataset.output_shapes)
-    return _RestructuredDataset(filtered, dataset.output_types, known_shapes)
+    return _RestructuredDataset(
+        filtered,
+        dataset.output_types,
+        known_shapes,
+        output_classes=dataset.output_classes)
 
   return _apply_fn
 
@@ -237,6 +243,10 @@ class DenseToSparseBatchDataset(dataset_ops.Dataset):
         output_shapes=self.output_shapes,
         output_types=self.output_types)
 
+  @property
+  def output_classes(self):
+    return (ops.Tensor, ops.Tensor, ops.Tensor)
+
   @property
   def output_shapes(self):
     num_elements = tensor_shape.Dimension(None)
@@ -252,7 +262,11 @@ class DenseToSparseBatchDataset(dataset_ops.Dataset):
 class _RestructuredDataset(dataset_ops.Dataset):
   """An internal helper for changing the structure and shape of a dataset."""
 
-  def __init__(self, dataset, output_types, output_shapes=None):
+  def __init__(self,
+               dataset,
+               output_types,
+               output_shapes=None,
+               output_classes=None):
     """Creates a new dataset with the given output types and shapes.
 
     The given `dataset` must have a structure that is convertible:
@@ -268,6 +282,8 @@ class _RestructuredDataset(dataset_ops.Dataset):
       output_types: A nested structure of `tf.DType` objects.
       output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects.
         If omitted, the shapes will be inherited from `dataset`.
+      output_classes: (Optional.) A nested structure of class types.
+        If omitted, the class types will be inherited from `dataset`.
 
     Raises:
       ValueError: If either `output_types` or `output_shapes` is not compatible
@@ -307,10 +323,21 @@ class _RestructuredDataset(dataset_ops.Dataset):
                                                  output_shapes))
       self._output_shapes = nest.map_structure_up_to(
           output_types, tensor_shape.as_shape, output_shapes)
+    if output_classes is None:
+      # Inherit class types from the original `dataset`.
+      self._output_classes = nest.pack_sequence_as(output_types,
+                                                   nest.flatten(
+                                                       dataset.output_classes))
+    else:
+      self._output_classes = output_classes
 
   def _as_variant_tensor(self):
     return self._dataset._as_variant_tensor()  # pylint: disable=protected-access
 
+  @property
+  def output_classes(self):
+    return self._output_classes
+
   @property
   def output_types(self):
     return self._output_types
@@ -345,8 +372,9 @@ class _MapAndBatchDataset(dataset_ops.MapDataset):
         batch_size=self._batch_size,
         num_parallel_batches=self._num_parallel_batches,
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)),
-        output_shapes=nest.flatten(self.output_shapes))
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
     # pylint: enable=protected-access
 
   @property
diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py
index 45d6dbe743..863c94ef9f 100644
--- a/tensorflow/contrib/data/python/ops/dataset_ops.py
+++ b/tensorflow/contrib/data/python/ops/dataset_ops.py
@@ -48,6 +48,10 @@ class Dataset(dataset_ops.Dataset):
   def _as_variant_tensor(self):
     return self._dataset._as_variant_tensor()  # pylint: disable=protected-access
 
+  @property
+  def output_classes(self):
+    return self._dataset.output_classes
+
   @property
   def output_shapes(self):
     return self._dataset.output_shapes
diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py
index 194b611513..aa629cba47 100644
--- a/tensorflow/contrib/data/python/ops/error_ops.py
+++ b/tensorflow/contrib/data/python/ops/error_ops.py
@@ -63,9 +63,14 @@ class IgnoreErrorsDataset(dataset_ops.Dataset):
   def _as_variant_tensor(self):
     return gen_dataset_ops.ignore_errors_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        output_shapes=nest.flatten(self.output_shapes),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)))
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
 
   @property
   def output_shapes(self):
diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py
index 86337271bc..ef91c56726 100644
--- a/tensorflow/contrib/data/python/ops/grouping.py
+++ b/tensorflow/contrib/data/python/ops/grouping.py
@@ -88,15 +88,21 @@ def group_by_window(key_func,
 class _VariantDataset(dataset_ops.Dataset):
   """A Dataset wrapper for a tf.variant-typed function argument."""
 
-  def __init__(self, dataset_variant, output_types, output_shapes):
+  def __init__(self, dataset_variant, output_types, output_shapes,
+               output_classes):
     super(_VariantDataset, self).__init__()
     self._dataset_variant = dataset_variant
     self._output_types = output_types
     self._output_shapes = output_shapes
+    self._output_classes = output_classes
 
   def _as_variant_tensor(self):
     return self._dataset_variant
 
+  @property
+  def output_classes(self):
+    return self._output_classes
+
   @property
   def output_shapes(self):
     return self._output_shapes
@@ -138,17 +144,21 @@ class GroupByWindowDataset(dataset_ops.Dataset):
   def _make_key_func(self, key_func, input_dataset):
     """Make wrapping Defun for key_func."""
 
-    @function.Defun(
-        *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types)))
+    @function.Defun(*nest.flatten(
+        sparse.as_dense_types(input_dataset.output_types,
+                              input_dataset.output_classes)))
     def tf_key_func(*args):
       """A wrapper for Defun that facilitates shape inference."""
       # Pass in shape information from the input_dataset.
-      for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)):
+      dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes,
+                                            input_dataset.output_classes)
+      for arg, shape in zip(args, nest.flatten(dense_shapes)):
         arg.set_shape(shape)
 
       nested_args = nest.pack_sequence_as(input_dataset.output_types, args)
       nested_args = sparse.deserialize_sparse_tensors(
-          nested_args, input_dataset.output_types)
+          nested_args, input_dataset.output_types, input_dataset.output_shapes,
+          input_dataset.output_classes)
       # pylint: disable=protected-access
       if dataset_ops._should_unpack_args(nested_args):
         ret = key_func(*nested_args)
@@ -170,14 +180,15 @@ class GroupByWindowDataset(dataset_ops.Dataset):
     def tf_reduce_func(key, window_dataset_variant):
       """A wrapper for Defun that facilitates shape inference."""
       key.set_shape([])
-      window_dataset = _VariantDataset(window_dataset_variant,
-                                       input_dataset.output_types,
-                                       input_dataset.output_shapes)
+      window_dataset = _VariantDataset(
+          window_dataset_variant, input_dataset.output_types,
+          input_dataset.output_shapes, input_dataset.output_classes)
       if not isinstance(window_dataset, dataset_ops.Dataset):
         raise TypeError("`window_dataset` must return a `Dataset` object.")
       output_dataset = reduce_func(key, window_dataset)
       if not isinstance(output_dataset, dataset_ops.Dataset):
         raise TypeError("`reduce_func` must return a `Dataset` object.")
+      self._output_classes = output_dataset.output_classes
       self._output_types = output_dataset.output_types
       self._output_shapes = output_dataset.output_shapes
       return output_dataset._as_variant_tensor()  # pylint: disable=protected-access
@@ -185,6 +196,10 @@ class GroupByWindowDataset(dataset_ops.Dataset):
     self._reduce_func = tf_reduce_func
     self._reduce_func.add_to_graph(ops.get_default_graph())
 
+  @property
+  def output_classes(self):
+    return self._output_classes
+
   @property
   def output_shapes(self):
     return self._output_shapes
@@ -203,5 +218,6 @@ class GroupByWindowDataset(dataset_ops.Dataset):
         reduce_func=self._reduce_func,
         window_size_func=self._window_size_func,
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)),
-        output_shapes=nest.flatten(self.output_shapes))
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py
index 830642c040..53324e06e7 100644
--- a/tensorflow/contrib/data/python/ops/interleave_ops.py
+++ b/tensorflow/contrib/data/python/ops/interleave_ops.py
@@ -36,17 +36,21 @@ class ParallelInterleaveDataset(dataset_ops.Dataset):
     super(ParallelInterleaveDataset, self).__init__()
     self._input_dataset = input_dataset
 
-    @function.Defun(
-        *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types)))
+    @function.Defun(*nest.flatten(
+        sparse.as_dense_types(input_dataset.output_types,
+                              input_dataset.output_classes)))
     def tf_map_func(*args):
       """A wrapper for Defun that facilitates shape inference."""
       # Pass in shape information from the input_dataset.
-      for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)):
+      dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes,
+                                            input_dataset.output_classes)
+      for arg, shape in zip(args, nest.flatten(dense_shapes)):
         arg.set_shape(shape)
 
       nested_args = nest.pack_sequence_as(input_dataset.output_types, args)
       nested_args = sparse.deserialize_sparse_tensors(
-          nested_args, input_dataset.output_types)
+          nested_args, input_dataset.output_types, input_dataset.output_shapes,
+          input_dataset.output_classes)
       if dataset_ops._should_unpack_args(nested_args):  # pylint: disable=protected-access
         dataset = map_func(*nested_args)
       else:
@@ -55,6 +59,7 @@ class ParallelInterleaveDataset(dataset_ops.Dataset):
       if not isinstance(dataset, dataset_ops.Dataset):
         raise TypeError("`map_func` must return a `Dataset` object.")
 
+      self._output_classes = dataset.output_classes
       self._output_types = dataset.output_types
       self._output_shapes = dataset.output_shapes
 
@@ -79,8 +84,13 @@ class ParallelInterleaveDataset(dataset_ops.Dataset):
         self._sloppy,
         f=self._map_func,
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)),
-        output_shapes=nest.flatten(self.output_shapes))
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._output_classes
 
   @property
   def output_shapes(self):
diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index 632082b5f1..bb47832fe9 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -269,6 +269,10 @@ class _SqlDataset(dataset_ops.Dataset):
                                        nest.flatten(self.output_types),
                                        nest.flatten(self.output_shapes))
 
+  @property
+  def output_classes(self):
+    return nest.map_structure(lambda _: ops.Tensor, self._output_types)
+
   @property
   def output_shapes(self):
     return nest.map_structure(lambda _: tensor_shape.TensorShape([]),
diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py
index 2cfc0709cd..7c595b1814 100644
--- a/tensorflow/contrib/data/python/ops/scan_ops.py
+++ b/tensorflow/contrib/data/python/ops/scan_ops.py
@@ -53,6 +53,7 @@ class _ScanDataset(dataset_ops.Dataset):
         [t.dtype for t in nest.flatten(self._initial_state)])
 
     # Will be populated by calling `tf_scan_func`.
+    self._output_classes = None
     self._output_shapes = None
     self._output_types = None
 
@@ -68,13 +69,16 @@ class _ScanDataset(dataset_ops.Dataset):
       flat_new_state_shapes = []
 
       @function.Defun(*(flat_state_types + nest.flatten(
-          sparse.unwrap_sparse_types(input_dataset.output_types))))
+          sparse.as_dense_types(input_dataset.output_types,
+                                input_dataset.output_classes))))  # pylint: disable=protected-access
       def tf_scan_func(*args):
         """A wrapper for Defun that facilitates shape inference."""
         # Pass in shape information from the state and input_dataset.
-        for arg, shape in zip(
-            args,
-            flat_state_shapes + nest.flatten(input_dataset.output_shapes)):
+        # TODO(b/69424092): Check that neither inputs nor outputs are sparse.
+        dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes,
+                                              input_dataset.output_classes)  # pylint: disable=protected-access
+        for arg, shape in zip(args,
+                              flat_state_shapes + nest.flatten(dense_shapes)):
           arg.set_shape(shape)
 
         pivot = len(flat_state_shapes)
@@ -108,6 +112,8 @@ class _ScanDataset(dataset_ops.Dataset):
                 "state. Expected %s; got %s." %
                 (self._state_types, nest.pack_sequence_as(
                     self._state_types, [t.dtype for t in flat_new_state])))
+        self._output_classes = nest.pack_sequence_as(
+            output_value, [ops.Tensor for _ in flat_output_value])
         self._output_types = nest.pack_sequence_as(
             output_value, [t.dtype for t in flat_output_value])
 
@@ -147,8 +153,13 @@ class _ScanDataset(dataset_ops.Dataset):
         self._scan_func.captured_inputs,
         f=self._scan_func,
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)),
-        output_shapes=nest.flatten(self.output_shapes))
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._output_classes
 
   @property
   def output_shapes(self):
diff --git a/tensorflow/python/data/__init__.py b/tensorflow/python/data/__init__.py
index 504500d245..239f9b0d59 100644
--- a/tensorflow/python/data/__init__.py
+++ b/tensorflow/python/data/__init__.py
@@ -21,7 +21,6 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview.
 @@FixedLengthRecordDataset
 @@TextLineDataset
 @@TFRecordDataset
-@@SparseType
 """
 
 from __future__ import absolute_import
@@ -34,7 +33,6 @@ from tensorflow.python.data.ops.iterator_ops import Iterator
 from tensorflow.python.data.ops.readers import FixedLengthRecordDataset
 from tensorflow.python.data.ops.readers import TextLineDataset
 from tensorflow.python.data.ops.readers import TFRecordDataset
-from tensorflow.python.data.util.sparse import SparseType
 # pylint: enable=unused-import
 
 from tensorflow.python.util.all_util import remove_undocumented
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 5f981e2670..d434c8e522 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -97,13 +97,15 @@ class Dataset(object):
         container="",
         shared_name=shared_name,
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)),
-        output_shapes=nest.flatten(self.output_shapes))
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
     with ops.colocate_with(iterator_resource):
       initializer = gen_dataset_ops.make_iterator(self._as_variant_tensor(),
                                                   iterator_resource)
     return iterator_ops.Iterator(iterator_resource, initializer,
-                                 self.output_types, self.output_shapes)
+                                 self.output_types, self.output_shapes,
+                                 self.output_classes)
 
   def make_one_shot_iterator(self):
     """Creates an `Iterator` for enumerating the elements of this dataset.
@@ -144,9 +146,23 @@ class Dataset(object):
         gen_dataset_ops.one_shot_iterator(
             dataset_factory=_make_dataset,
             output_types=nest.flatten(
-                sparse.unwrap_sparse_types(self.output_types)),
-            output_shapes=nest.flatten(self.output_shapes)), None,
-        self.output_types, self.output_shapes)
+                sparse.as_dense_types(self.output_types, self.output_classes)),
+            output_shapes=nest.flatten(
+                sparse.as_dense_shapes(self.output_shapes,
+                                       self.output_classes))), None,
+        self.output_types, self.output_shapes, self.output_classes)
+
+  @abc.abstractproperty
+  def output_classes(self):
+    """Returns the class of each component of an element of this dataset.
+
+    The expected values are `tf.Tensor` and `tf.SparseTensor`.
+
+    Returns:
+      A nested structure of Python `type` objects corresponding to each
+      component of an element of this dataset.
+    """
+    raise NotImplementedError("Dataset.output_classes")
 
   @abc.abstractproperty
   def output_shapes(self):
@@ -163,9 +179,8 @@ class Dataset(object):
     """Returns the type of each component of an element of this dataset.
 
     Returns:
-      A nested structure of `tf.DType` (or `tf.data.SparseType`) objects
-      corresponding to each `tf.Tensor` (or `tf.SparseTensor`) component of an
-      element of this dataset.
+      A nested structure of `tf.DType` objects corresponding to each component
+      of an element of this dataset.
     """
     raise NotImplementedError("Dataset.output_types")
 
@@ -882,7 +897,13 @@ class TensorDataset(Dataset):
   def _as_variant_tensor(self):
     return gen_dataset_ops.tensor_dataset(
         nest.flatten(self._tensors),
-        output_shapes=nest.flatten(self.output_shapes))
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return nest.pack_sequence_as(
+        self._tensors, [ops.Tensor for _ in nest.flatten(self._tensors)])
 
   @property
   def output_shapes(self):
@@ -915,7 +936,13 @@ class TensorSliceDataset(Dataset):
   def _as_variant_tensor(self):
     return gen_dataset_ops.tensor_slice_dataset(
         nest.flatten(self._tensors),
-        output_shapes=nest.flatten(self.output_shapes))
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return nest.pack_sequence_as(
+        self._tensors, [ops.Tensor for _ in nest.flatten(self._tensors)])
 
   @property
   def output_shapes(self):
@@ -945,6 +972,10 @@ class SparseTensorSliceDataset(Dataset):
         self._sparse_tensor.indices, self._sparse_tensor.values,
         self._sparse_tensor.dense_shape)
 
+  @property
+  def output_classes(self):
+    return (ops.Tensor, ops.Tensor, ops.Tensor)
+
   @property
   def output_shapes(self):
     indices_shape = self._sparse_tensor.indices.get_shape()
@@ -994,6 +1025,12 @@ class ZipDataset(Dataset):
         ])
     # pylint: enable=protected-access
 
+  @property
+  def output_classes(self):
+    return nest.pack_sequence_as(
+        self._datasets,
+        [ds.output_classes for ds in nest.flatten(self._datasets)])
+
   @property
   def output_shapes(self):
     return nest.pack_sequence_as(
@@ -1030,11 +1067,16 @@ class ConcatenateDataset(Dataset):
     return gen_dataset_ops.concatenate_dataset(
         self._input_dataset._as_variant_tensor(),
         self._dataset_to_concatenate._as_variant_tensor(),
-        output_shapes=nest.flatten(self.output_shapes),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)))
+            sparse.as_dense_types(self.output_types, self.output_classes)))
     # pylint: enable=protected-access
 
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
   @property
   def output_shapes(self):
     return nest.pack_sequence_as(self._input_dataset.output_shapes, [
@@ -1066,9 +1108,14 @@ class RepeatDataset(Dataset):
     return gen_dataset_ops.repeat_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         count=self._count,
-        output_shapes=nest.flatten(self.output_shapes),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)))
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
 
   @property
   def output_shapes(self):
@@ -1111,9 +1158,14 @@ class RangeDataset(Dataset):
         start=self._start,
         stop=self._stop,
         step=self._step,
-        output_shapes=nest.flatten(self.output_shapes),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)))
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return ops.Tensor
 
   @property
   def output_shapes(self):
@@ -1138,9 +1190,14 @@ class CacheDataset(Dataset):
     return gen_dataset_ops.cache_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         filename=self._filename,
-        output_shapes=nest.flatten(self.output_shapes),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)))
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
 
   @property
   def output_shapes(self):
@@ -1186,9 +1243,14 @@ class ShuffleDataset(Dataset):
         seed=self._seed,
         seed2=self._seed2,
         reshuffle_each_iteration=self._reshuffle_each_iteration,
-        output_shapes=nest.flatten(self.output_shapes),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)))
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
 
   @property
   def output_shapes(self):
@@ -1212,9 +1274,14 @@ class TakeDataset(Dataset):
     return gen_dataset_ops.take_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         count=self._count,
-        output_shapes=nest.flatten(self.output_shapes),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)))
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
 
   @property
   def output_shapes(self):
@@ -1238,9 +1305,14 @@ class SkipDataset(Dataset):
     return gen_dataset_ops.skip_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         count=self._count,
-        output_shapes=nest.flatten(self.output_shapes),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)))
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
 
   @property
   def output_shapes(self):
@@ -1257,7 +1329,7 @@ class BatchDataset(Dataset):
   def __init__(self, input_dataset, batch_size):
     """See `Dataset.batch()` for details."""
     super(BatchDataset, self).__init__()
-    if sparse.any_sparse(input_dataset.output_types):
+    if sparse.any_sparse(input_dataset.output_classes):
       # TODO(b/63669786): support batching of sparse tensors
       raise TypeError("Batching of sparse tensors is not currently supported")
     self._input_dataset = input_dataset
@@ -1268,9 +1340,14 @@ class BatchDataset(Dataset):
     return gen_dataset_ops.batch_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         batch_size=self._batch_size,
-        output_shapes=nest.flatten(self.output_shapes),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)))
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
 
   @property
   def output_shapes(self):
@@ -1330,7 +1407,7 @@ class PaddedBatchDataset(Dataset):
   def __init__(self, input_dataset, batch_size, padded_shapes, padding_values):
     """See `Dataset.batch()` for details."""
     super(PaddedBatchDataset, self).__init__()
-    if sparse.any_sparse(input_dataset.output_types):
+    if sparse.any_sparse(input_dataset.output_classes):
       # TODO(b/63669786): support batching of sparse tensors
       raise TypeError("Batching of sparse tensors is not currently supported")
     self._input_dataset = input_dataset
@@ -1364,7 +1441,12 @@ class PaddedBatchDataset(Dataset):
             for s in nest.flatten(self._padded_shapes)
         ],
         padding_values=nest.flatten(self._padding_values),
-        output_shapes=nest.flatten(self.output_shapes))
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
 
   @property
   def output_shapes(self):
@@ -1393,20 +1475,25 @@ class MapDataset(Dataset):
     super(MapDataset, self).__init__()
     self._input_dataset = input_dataset
 
+    self._output_classes = None
     self._output_shapes = None
     self._output_types = None
 
-    @function.Defun(
-        *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types)))
+    @function.Defun(*nest.flatten(
+        sparse.as_dense_types(input_dataset.output_types,
+                              input_dataset.output_classes)))
     def tf_map_func(*args):
       """A wrapper for Defun that facilitates shape inference."""
       # Pass in shape information from the input_dataset.
-      for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)):
+      dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes,
+                                            input_dataset.output_classes)
+      for arg, shape in zip(args, nest.flatten(dense_shapes)):
         arg.set_shape(shape)
 
       nested_args = nest.pack_sequence_as(input_dataset.output_types, args)
       nested_args = sparse.deserialize_sparse_tensors(
-          nested_args, input_dataset.output_types)
+          nested_args, input_dataset.output_types, input_dataset.output_shapes,
+          input_dataset.output_classes)
       if _should_unpack_args(nested_args):
         ret = map_func(*nested_args)
       else:
@@ -1425,16 +1512,17 @@ class MapDataset(Dataset):
       if isinstance(ret, list):
         ret = tuple(ret)
 
-      # Identify components that hold sparse tensor values.
-      types = sparse.get_sparse_types(ret)
+      self._output_classes = sparse.get_classes(ret)
+      self._output_shapes = nest.pack_sequence_as(
+          ret, [t.get_shape() for t in nest.flatten(ret)])
+      self._output_types = nest.pack_sequence_as(
+          ret, [t.dtype for t in nest.flatten(ret)])
+
       # Serialize any sparse tensors and convert result to tensors.
       ret = nest.pack_sequence_as(ret, [
           ops.convert_to_tensor(t)
           for t in nest.flatten(sparse.serialize_sparse_tensors(ret))
       ])
-      self._output_shapes = nest.pack_sequence_as(
-          types, [t.get_shape() for t in nest.flatten(ret)])
-      self._output_types = sparse.wrap_sparse_types(ret, types)
       return nest.flatten(ret)
 
     self._map_func = tf_map_func
@@ -1447,8 +1535,13 @@ class MapDataset(Dataset):
         self._map_func.captured_inputs,
         f=self._map_func,
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)),
-        output_shapes=nest.flatten(self.output_shapes))
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._output_classes
 
   @property
   def output_shapes(self):
@@ -1478,8 +1571,9 @@ class ParallelMapDataset(MapDataset):
         f=self._map_func,
         num_parallel_calls=self._num_parallel_calls,
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)),
-        output_shapes=nest.flatten(self.output_shapes))
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
     # pylint: enable=protected-access
 
 
@@ -1491,17 +1585,21 @@ class FlatMapDataset(Dataset):
     super(FlatMapDataset, self).__init__()
     self._input_dataset = input_dataset
 
-    @function.Defun(
-        *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types)))
+    @function.Defun(*nest.flatten(
+        sparse.as_dense_types(input_dataset.output_types,
+                              input_dataset.output_classes)))
     def tf_map_func(*args):
       """A wrapper for Defun that facilitates shape inference."""
       # Pass in shape information from the input_dataset.
-      for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)):
+      dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes,
+                                            input_dataset.output_classes)
+      for arg, shape in zip(args, nest.flatten(dense_shapes)):
         arg.set_shape(shape)
 
       nested_args = nest.pack_sequence_as(input_dataset.output_types, args)
       nested_args = sparse.deserialize_sparse_tensors(
-          nested_args, input_dataset.output_types)
+          nested_args, input_dataset.output_types, input_dataset.output_shapes,
+          input_dataset.output_classes)
       if _should_unpack_args(nested_args):
         dataset = map_func(*nested_args)
       else:
@@ -1510,6 +1608,7 @@ class FlatMapDataset(Dataset):
       if not isinstance(dataset, Dataset):
         raise TypeError("`map_func` must return a `Dataset` object.")
 
+      self._output_classes = dataset.output_classes
       self._output_types = dataset.output_types
       self._output_shapes = dataset.output_shapes
 
@@ -1524,8 +1623,13 @@ class FlatMapDataset(Dataset):
         self._map_func.captured_inputs,
         f=self._map_func,
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)),
-        output_shapes=nest.flatten(self.output_shapes))
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._output_classes
 
   @property
   def output_shapes(self):
@@ -1545,17 +1649,21 @@ class InterleaveDataset(Dataset):
     super(InterleaveDataset, self).__init__()
     self._input_dataset = input_dataset
 
-    @function.Defun(
-        *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types)))
+    @function.Defun(*nest.flatten(
+        sparse.as_dense_types(input_dataset.output_types,
+                              input_dataset.output_classes)))
     def tf_map_func(*args):
       """A wrapper for Defun that facilitates shape inference."""
       # Pass in shape information from the input_dataset.
-      for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)):
+      dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes,
+                                            input_dataset.output_classes)
+      for arg, shape in zip(args, nest.flatten(dense_shapes)):
         arg.set_shape(shape)
 
       nested_args = nest.pack_sequence_as(input_dataset.output_types, args)
       nested_args = sparse.deserialize_sparse_tensors(
-          nested_args, input_dataset.output_types)
+          nested_args, input_dataset.output_types, input_dataset.output_shapes,
+          input_dataset.output_classes)
       if _should_unpack_args(nested_args):
         dataset = map_func(*nested_args)
       else:
@@ -1564,6 +1672,7 @@ class InterleaveDataset(Dataset):
       if not isinstance(dataset, Dataset):
         raise TypeError("`map_func` must return a `Dataset` object.")
 
+      self._output_classes = dataset.output_classes
       self._output_types = dataset.output_types
       self._output_shapes = dataset.output_shapes
 
@@ -1585,8 +1694,13 @@ class InterleaveDataset(Dataset):
         self._block_length,
         f=self._map_func,
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)),
-        output_shapes=nest.flatten(self.output_shapes))
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._output_classes
 
   @property
   def output_shapes(self):
@@ -1605,17 +1719,21 @@ class FilterDataset(Dataset):
     super(FilterDataset, self).__init__()
     self._input_dataset = input_dataset
 
-    @function.Defun(
-        *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types)))
+    @function.Defun(*nest.flatten(
+        sparse.as_dense_types(input_dataset.output_types,
+                              input_dataset.output_classes)))
     def tf_predicate(*args):
       """A wrapper for Defun that facilitates shape inference."""
       # Pass in shape information from the input_dataset.
-      for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)):
+      dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes,
+                                            input_dataset.output_classes)
+      for arg, shape in zip(args, nest.flatten(dense_shapes)):
         arg.set_shape(shape)
 
       nested_args = nest.pack_sequence_as(input_dataset.output_types, args)
       nested_args = sparse.deserialize_sparse_tensors(
-          nested_args, input_dataset.output_types)
+          nested_args, input_dataset.output_types, input_dataset.output_shapes,
+          input_dataset.output_classes)
       if _should_unpack_args(nested_args):
         ret = predicate(*nested_args)
       else:
@@ -1637,8 +1755,13 @@ class FilterDataset(Dataset):
         other_arguments=self._predicate.captured_inputs,
         predicate=self._predicate,
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)),
-        output_shapes=nest.flatten(self.output_shapes))
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
 
   @property
   def output_shapes(self):
@@ -1663,9 +1786,14 @@ class PrefetchDataset(Dataset):
     return gen_dataset_ops.prefetch_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         buffer_size=self._buffer_size,
-        output_shapes=nest.flatten(self.output_shapes),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
         output_types=nest.flatten(
-            sparse.unwrap_sparse_types(self.output_types)))
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
 
   @property
   def output_shapes(self):
diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py
index 987a9b53ad..663bed07b2 100644
--- a/tensorflow/python/data/ops/iterator_ops.py
+++ b/tensorflow/python/data/ops/iterator_ops.py
@@ -29,7 +29,7 @@ class Iterator(object):
   """Represents the state of iterating through a `Dataset`."""
 
   def __init__(self, iterator_resource, initializer, output_types,
-               output_shapes):
+               output_shapes, output_classes):
     """Creates a new iterator from the given iterator resource.
 
     Note: Most users will not call this initializer directly, and will
@@ -41,21 +41,27 @@ class Iterator(object):
         iterator.
       initializer: A `tf.Operation` that should be run to initialize this
         iterator.
-      output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`)
-        objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`)
-        component of an element of this dataset.
+      output_types: A nested structure of `tf.DType` objects corresponding to
+        each component of an element of this dataset.
       output_shapes: A nested structure of `tf.TensorShape` objects
         corresponding to each component of an element of this dataset.
+      output_classes: A nested structure of Python `type` object corresponding
+        to each
+        component of an element of this iterator.
     """
     self._iterator_resource = iterator_resource
     self._initializer = initializer
+    self._output_classes = output_classes
     self._output_types = output_types
     self._output_shapes = output_shapes
     self._string_handle = gen_dataset_ops.iterator_to_string_handle(
         self._iterator_resource)
 
   @staticmethod
-  def from_structure(output_types, output_shapes=None, shared_name=None):
+  def from_structure(output_types,
+                     output_shapes=None,
+                     shared_name=None,
+                     output_classes=None):
     """Creates a new, uninitialized `Iterator` with the given structure.
 
     This iterator-constructing method can be used to create an iterator that
@@ -102,15 +108,17 @@ class Iterator(object):
     ```
 
     Args:
-      output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`)
-        objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`)
-        component of an element of this dataset.
+      output_types: A nested structure of `tf.DType` objects corresponding to
+        each component of an element of this dataset.
       output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects
         corresponding to each component of an element of this dataset. If
         omitted, each component will have an unconstrainted shape.
       shared_name: (Optional.) If non-empty, this iterator will be shared under
         the given name across multiple sessions that share the same devices
         (e.g. when using a remote server).
+      output_classes: (Optional.) A nested structure of Python `type` objects
+        corresponding to each component of an element of this iterator. If
+        omitted, each component is assumed to be of type `tf.Tensor`.
 
     Returns:
       An `Iterator`.
@@ -126,18 +134,24 @@ class Iterator(object):
     else:
       output_shapes = nest.map_structure_up_to(
           output_types, tensor_shape.as_shape, output_shapes)
+    if output_classes is None:
+      output_classes = nest.map_structure(lambda _: ops.Tensor, output_types)
     nest.assert_same_structure(output_types, output_shapes)
     if shared_name is None:
       shared_name = ""
     iterator_resource = gen_dataset_ops.iterator(
         container="",
         shared_name=shared_name,
-        output_types=nest.flatten(sparse.unwrap_sparse_types(output_types)),
+        output_types=nest.flatten(output_types),
         output_shapes=nest.flatten(output_shapes))
-    return Iterator(iterator_resource, None, output_types, output_shapes)
+    return Iterator(iterator_resource, None, output_types, output_shapes,
+                    output_classes)
 
   @staticmethod
-  def from_string_handle(string_handle, output_types, output_shapes=None):
+  def from_string_handle(string_handle,
+                         output_types,
+                         output_shapes=None,
+                         output_classes=None):
     """Creates a new, uninitialized `Iterator` based on the given handle.
 
     This method allows you to define a "feedable" iterator where you can choose
@@ -170,12 +184,14 @@ class Iterator(object):
     Args:
       string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates
         to a handle produced by the `Iterator.string_handle()` method.
-      output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`)
-        objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`)
-        component of an element of this dataset.
+      output_types: A nested structure of `tf.DType` objects corresponding to
+        each component of an element of this dataset.
       output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects
         corresponding to each component of an element of this dataset. If
         omitted, each component will have an unconstrainted shape.
+      output_classes: (Optional.) A nested structure of Python `type` objects
+        corresponding to each component of an element of this iterator. If
+        omitted, each component is assumed to be of type `tf.Tensor`.
 
     Returns:
       An `Iterator`.
@@ -187,13 +203,16 @@ class Iterator(object):
     else:
       output_shapes = nest.map_structure_up_to(
           output_types, tensor_shape.as_shape, output_shapes)
+    if output_classes is None:
+      output_classes = nest.map_structure(lambda _: ops.Tensor, output_types)
     nest.assert_same_structure(output_types, output_shapes)
     string_handle = ops.convert_to_tensor(string_handle, dtype=dtypes.string)
     iterator_resource = gen_dataset_ops.iterator_from_string_handle(
         string_handle,
-        output_types=nest.flatten(sparse.unwrap_sparse_types(output_types)),
+        output_types=nest.flatten(output_types),
         output_shapes=nest.flatten(output_shapes))
-    return Iterator(iterator_resource, None, output_types, output_shapes)
+    return Iterator(iterator_resource, None, output_types, output_shapes,
+                    output_classes)
 
   @property
   def initializer(self):
@@ -230,6 +249,13 @@ class Iterator(object):
     with ops.name_scope(name, "make_initializer") as name:
       nest.assert_same_structure(self._output_types, dataset.output_types)
       nest.assert_same_structure(self._output_shapes, dataset.output_shapes)
+      for iterator_class, dataset_class in zip(
+          nest.flatten(self._output_classes),
+          nest.flatten(dataset.output_classes)):
+        if iterator_class is not dataset_class:
+          raise TypeError(
+              "Expected output classes %r but got dataset with output class %r."
+              % (self._output_classes, dataset.output_classes))
       for iterator_dtype, dataset_dtype in zip(
           nest.flatten(self._output_types), nest.flatten(dataset.output_types)):
         if iterator_dtype != dataset_dtype:
@@ -237,8 +263,8 @@ class Iterator(object):
               "Expected output types %r but got dataset with output types %r." %
               (self._output_types, dataset.output_types))
       for iterator_shape, dataset_shape in zip(
-          nest.flatten(self._output_shapes),
-          nest.flatten(dataset.output_shapes)):
+          nest.flatten(self._output_shapes), nest.flatten(
+              dataset.output_shapes)):
         if not iterator_shape.is_compatible_with(dataset_shape):
           raise TypeError("Expected output shapes compatible with %r but got "
                           "dataset with output shapes %r." %
@@ -261,11 +287,15 @@ class Iterator(object):
                               gen_dataset_ops.iterator_get_next(
                                   self._iterator_resource,
                                   output_types=nest.flatten(
-                                      sparse.unwrap_sparse_types(
-                                          self._output_types)),
+                                      sparse.as_dense_types(
+                                          self._output_types,
+                                          self._output_classes)),
                                   output_shapes=nest.flatten(
-                                      self._output_shapes),
-                                  name=name)), self._output_types)
+                                      sparse.as_dense_shapes(
+                                          self._output_shapes,
+                                          self._output_classes)),
+                                  name=name)), self._output_types,
+        self._output_shapes, self._output_classes)
 
   def string_handle(self, name=None):
     """Returns a string-valued `tf.Tensor` that represents this iterator.
@@ -282,13 +312,25 @@ class Iterator(object):
       return gen_dataset_ops.iterator_to_string_handle(
           self._iterator_resource, name=name)
 
+  @property
+  def output_classes(self):
+    """Returns the class of each component of an element of this iterator.
+
+    The expected values are `tf.Tensor` and `tf.SparseTensor`.
+
+    Returns:
+      A nested structure of Python `type` objects corresponding to each
+      component of an element of this dataset.
+    """
+    return self._output_classes
+
   @property
   def output_shapes(self):
     """Returns the shape of each component of an element of this iterator.
 
     Returns:
       A nested structure of `tf.TensorShape` objects corresponding to each
-      component of an element of this iterator.
+      component of an element of this dataset.
     """
     return self._output_shapes
 
@@ -297,8 +339,7 @@ class Iterator(object):
     """Returns the type of each component of an element of this iterator.
 
     Returns:
-      A nested structure of `tf.DType` (or `tf.data.SparseType`) objects
-      corresponding to each `tf.Tensor` (or `tf.SparseTensor`) component of an
-      element of this dataset.
+      A nested structure of `tf.DType` objects corresponding to each component
+      of an element of this dataset.
     """
     return self._output_types
diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py
index 531716581f..c6fb8531ae 100644
--- a/tensorflow/python/data/ops/readers.py
+++ b/tensorflow/python/data/ops/readers.py
@@ -70,6 +70,10 @@ class TextLineDataset(Dataset):
     return gen_dataset_ops.text_line_dataset(
         self._filenames, self._compression_type, self._buffer_size)
 
+  @property
+  def output_classes(self):
+    return ops.Tensor
+
   @property
   def output_shapes(self):
     return tensor_shape.scalar()
@@ -110,6 +114,10 @@ class TFRecordDataset(Dataset):
     return gen_dataset_ops.tf_record_dataset(
         self._filenames, self._compression_type, self._buffer_size)
 
+  @property
+  def output_classes(self):
+    return ops.Tensor
+
   @property
   def output_shapes(self):
     return tensor_shape.TensorShape([])
@@ -159,6 +167,10 @@ class FixedLengthRecordDataset(Dataset):
         self._filenames, self._header_bytes, self._record_bytes,
         self._footer_bytes, self._buffer_size)
 
+  @property
+  def output_classes(self):
+    return ops.Tensor
+
   @property
   def output_shapes(self):
     return tensor_shape.scalar()
diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD
index 41d8513b16..f7d7fe98d3 100644
--- a/tensorflow/python/data/util/BUILD
+++ b/tensorflow/python/data/util/BUILD
@@ -38,8 +38,10 @@ py_library(
     deps = [
         ":nest",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:ops",
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
         "//tensorflow/python:util",
         "@six_archive//:six",
     ],
@@ -56,6 +58,7 @@ py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
     ],
 )
 
diff --git a/tensorflow/python/data/util/sparse.py b/tensorflow/python/data/util/sparse.py
index 673fac095c..4d25f6a963 100644
--- a/tensorflow/python/data/util/sparse.py
+++ b/tensorflow/python/data/util/sparse.py
@@ -19,29 +19,70 @@ from __future__ import print_function
 
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import sparse_ops
 
 
-def any_sparse(types):
-  """Checks for sparse tensor types.
+def any_sparse(classes):
+  """Checks for sparse tensor.
 
   Args:
-    types: a structure with tensor types.
+    classes: a structure of objects that identify the dataset item classes
 
   Returns:
-    `True` if `types` contains a sparse tensor type and `False` otherwise.
+    `True` if `classes` contains a sparse tensor type and `False` otherwise.
   """
-  return any([isinstance(ty, SparseType) for ty in nest.flatten(types)])
+  return any([c is sparse_tensor.SparseTensor for c in nest.flatten(classes)])
 
 
-def deserialize_sparse_tensors(tensors, types):
+def as_dense_shapes(shapes, classes):
+  """Converts sparse tensor shapes to their physical shapes.
+
+  Args:
+    shapes: a structure of shapes to convert.
+    classes: a structure of objects that identify the dataset item classes
+
+  Returns:
+    a structure matching the nested structure of `shapes`, containing
+    `tensor_shape.unknown_shape()` at positions where `classes` contains
+    `tf.SparseTensor` and matching contents of `shapes` otherwise
+  """
+  ret = nest.pack_sequence_as(shapes, [
+      tensor_shape.unknown_shape() if c is sparse_tensor.SparseTensor else shape
+      for shape, c in zip(nest.flatten(shapes), nest.flatten(classes))
+  ])
+  return ret
+
+
+def as_dense_types(types, classes):
+  """Converts sparse tensor types to `dtypes.string`.
+
+  Args:
+    types: a structure of types to convert.
+    classes: a structure of objects that identify the dataset item classes
+
+  Returns:
+    a structure matching the nested structure of `types`, containing
+    `dtypes.string` at positions where `classes` contains `tf.SparseTensor` and
+    matching contents of `types` otherwise
+  """
+  ret = nest.pack_sequence_as(types, [
+      dtypes.string if c is sparse_tensor.SparseTensor else ty
+      for ty, c in zip(nest.flatten(types), nest.flatten(classes))
+  ])
+  return ret
+
+
+def deserialize_sparse_tensors(tensors, types, shapes, classes):
   """Deserializes sparse tensors.
 
   Args:
     tensors: a structure of tensors to deserialize.
-    types: a structure object the holds information about which tensors in
-      `tensors` represent serialized sparse tensors
+    types: a structure that holds information about types of `tensors`
+    shapes: a structure that holds information about shapes of `tensors`
+    classes: a structure of objects that identify the dataset item classes
 
   Returns:
     `tensors` with any serialized sparse tensors replaced by their deserialized
@@ -49,27 +90,29 @@ def deserialize_sparse_tensors(tensors, types):
   """
   # TODO(b/63669786): support batching of sparse tensors
   ret = nest.pack_sequence_as(types, [
-      sparse_ops.deserialize_sparse(tensor, ty.dtype)
-      if isinstance(ty, SparseType) else tensor
-      for (tensor, ty) in zip(nest.flatten(tensors), nest.flatten(types))
+      sparse_ops.deserialize_sparse(tensor, dtype=ty, rank=shape.ndims)
+      if c is sparse_tensor.SparseTensor else tensor
+      for (tensor, ty, shape, c) in zip(
+          nest.flatten(tensors), nest.flatten(types), nest.flatten(shapes),
+          nest.flatten(classes))
   ])
   return ret
 
 
-def get_sparse_types(tensors):
-  """Gets sparse types for a structure of tensors.
+def get_classes(tensors):
+  """Gets classes for a structure of tensors.
 
   Args:
-    tensors: the tensor structure to get sparse types for.
+    tensors: the tensor structure to get classes for.
 
   Returns:
     a structure matching the nested structure of `tensors`, containing
-    `SparseType` at positions where `tensors` contains a sparse tensor and
-    `None` otherwise
+    `tf.SparseTensor` at positions where `tensors` contains a sparse tensor and
+    `tf.Tensor` otherwise
   """
   return nest.pack_sequence_as(tensors, [
-      SparseType(tensor.dtype)
-      if isinstance(tensor, sparse_tensor.SparseTensor) else None
+      sparse_tensor.SparseTensor
+      if isinstance(tensor, sparse_tensor.SparseTensor) else ops.Tensor
       for tensor in nest.flatten(tensors)
   ])
 
@@ -90,74 +133,3 @@ def serialize_sparse_tensors(tensors):
       for tensor in nest.flatten(tensors)
   ])
   return ret
-
-
-def unwrap_sparse_types(types):
-  """Unwraps sparse tensor types as `dtypes.string`.
-
-  Args:
-    types: a structure of types to unwrap.
-
-  Returns:
-    a structure matching the nested structure of `types`, containing
-    `dtypes.string` at positions where `types` contains a sparse tensor and
-    matching contents of `types` otherwise
-  """
-  ret = nest.pack_sequence_as(types, [
-      dtypes.string if isinstance(ty, SparseType) else ty
-      for ty in nest.flatten(types)
-  ])
-  return ret
-
-
-def wrap_sparse_types(tensors, types):
-  """Wraps sparse tensor types in `SparseType`.
-
-  Args:
-    tensors: a structure of tensors for which to wrap types.
-    types: a structure that holds information about which tensors in
-      `tensors` represent serialized sparse tensors
-
-  Returns:
-    a structure matching the nested structure of `tensors`, containing
-    `SparseType` at positions where `tensors` contains a sparse tensor and
-    `DType` otherwise
-  """
-  ret = nest.pack_sequence_as(types, [
-      tensor.dtype if ty is None else ty
-      for tensor, ty in zip(nest.flatten(tensors), nest.flatten(types))
-  ])
-  return ret
-
-
-class SparseType(object):
-  """Wrapper class for representing types of sparse tensors in tf.data."""
-
-  def __init__(self, dtype):
-    """Creates a new instace of `SparseType`.
-
-    Args:
-      dtype: the sparse tensor type to wrap.
-    """
-    self._dtype = dtype
-
-  def __repr__(self):
-    return "SparseType({0!r})".format(self._dtype)
-
-  def __eq__(self, other):
-    """Returns `True` iff `self == other`."""
-    if not isinstance(other, SparseType):
-      return False
-    return self._dtype == other.dtype
-
-  def __ne__(self, other):
-    """Returns `True` iff `self != other`."""
-    return not self.__eq__(other)
-
-  def __hash__(self):
-    return self._dtype.__hash__()
-
-  @property
-  def dtype(self):
-    """Returns the wrapped sparse tensor type."""
-    return self._dtype
diff --git a/tensorflow/python/data/util/sparse_test.py b/tensorflow/python/data/util/sparse_test.py
index e30ed639c2..a707570bab 100644
--- a/tensorflow/python/data/util/sparse_test.py
+++ b/tensorflow/python/data/util/sparse_test.py
@@ -22,7 +22,9 @@ from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import sparse
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.platform import test
 
 
@@ -30,17 +32,200 @@ class SparseTest(test.TestCase):
 
   def testAnySparse(self):
     test_cases = (
-        ((), False),
-        ((None), False),
-        ((dtypes.string), False),
-        ((None, -1, dtypes.string), False),
-        ((sparse.SparseType(dtypes.string)), True),
-        ((None, sparse.SparseType(dtypes.string)), True),
-        ((sparse.SparseType(dtypes.string), dtypes.string), True),
-        ((((sparse.SparseType(dtypes.string)))), True)
+        {
+            "classes": (),
+            "expected": False
+        },
+        {
+            "classes": (ops.Tensor),
+            "expected": False
+        },
+        {
+            "classes": (((ops.Tensor))),
+            "expected": False
+        },
+        {
+            "classes": (ops.Tensor, ops.Tensor),
+            "expected": False
+        },
+        {
+            "classes": (ops.Tensor, sparse_tensor.SparseTensor),
+            "expected": True
+        },
+        {
+            "classes": (sparse_tensor.SparseTensor, sparse_tensor.SparseTensor),
+            "expected":
+                True
+        },
+        {
+            "classes": (sparse_tensor.SparseTensor, ops.Tensor),
+            "expected": True
+        },
+        {
+            "classes": (((sparse_tensor.SparseTensor))),
+            "expected": True
+        },
     )
     for test_case in test_cases:
-      self.assertEqual(sparse.any_sparse(test_case[0]), test_case[1])
+      self.assertEqual(
+          sparse.any_sparse(test_case["classes"]), test_case["expected"])
+
+  def assertShapesEqual(self, a, b):
+    for a, b in zip(nest.flatten(a), nest.flatten(b)):
+      self.assertEqual(a.ndims, b.ndims)
+      if a.ndims is None:
+        continue
+      for c, d in zip(a.as_list(), b.as_list()):
+        self.assertEqual(c, d)
+
+  def testAsDenseShapes(self):
+    test_cases = (
+        {
+            "types": (),
+            "classes": (),
+            "expected": ()
+        },
+        {
+            "types": tensor_shape.scalar(),
+            "classes": ops.Tensor,
+            "expected": tensor_shape.scalar()
+        },
+        {
+            "types": tensor_shape.scalar(),
+            "classes": sparse_tensor.SparseTensor,
+            "expected": tensor_shape.unknown_shape()
+        },
+        {
+            "types": (tensor_shape.scalar()),
+            "classes": (ops.Tensor),
+            "expected": (tensor_shape.scalar())
+        },
+        {
+            "types": (tensor_shape.scalar()),
+            "classes": (sparse_tensor.SparseTensor),
+            "expected": (tensor_shape.unknown_shape())
+        },
+        {
+            "types": (tensor_shape.scalar(), ()),
+            "classes": (ops.Tensor, ()),
+            "expected": (tensor_shape.scalar(), ())
+        },
+        {
+            "types": ((), tensor_shape.scalar()),
+            "classes": ((), ops.Tensor),
+            "expected": ((), tensor_shape.scalar())
+        },
+        {
+            "types": (tensor_shape.scalar(), ()),
+            "classes": (sparse_tensor.SparseTensor, ()),
+            "expected": (tensor_shape.unknown_shape(), ())
+        },
+        {
+            "types": ((), tensor_shape.scalar()),
+            "classes": ((), sparse_tensor.SparseTensor),
+            "expected": ((), tensor_shape.unknown_shape())
+        },
+        {
+            "types": (tensor_shape.scalar(), (), tensor_shape.scalar()),
+            "classes": (ops.Tensor, (), ops.Tensor),
+            "expected": (tensor_shape.scalar(), (), tensor_shape.scalar())
+        },
+        {
+            "types": (tensor_shape.scalar(), (), tensor_shape.scalar()),
+            "classes": (sparse_tensor.SparseTensor, (),
+                        sparse_tensor.SparseTensor),
+            "expected": (tensor_shape.unknown_shape(), (),
+                         tensor_shape.unknown_shape())
+        },
+        {
+            "types": ((), tensor_shape.scalar(), ()),
+            "classes": ((), ops.Tensor, ()),
+            "expected": ((), tensor_shape.scalar(), ())
+        },
+        {
+            "types": ((), tensor_shape.scalar(), ()),
+            "classes": ((), sparse_tensor.SparseTensor, ()),
+            "expected": ((), tensor_shape.unknown_shape(), ())
+        },
+    )
+    for test_case in test_cases:
+      self.assertShapesEqual(
+          sparse.as_dense_shapes(test_case["types"], test_case["classes"]),
+          test_case["expected"])
+
+  def testAsDenseTypes(self):
+    test_cases = (
+        {
+            "types": (),
+            "classes": (),
+            "expected": ()
+        },
+        {
+            "types": dtypes.int32,
+            "classes": ops.Tensor,
+            "expected": dtypes.int32
+        },
+        {
+            "types": dtypes.int32,
+            "classes": sparse_tensor.SparseTensor,
+            "expected": dtypes.string
+        },
+        {
+            "types": (dtypes.int32),
+            "classes": (ops.Tensor),
+            "expected": (dtypes.int32)
+        },
+        {
+            "types": (dtypes.int32),
+            "classes": (sparse_tensor.SparseTensor),
+            "expected": (dtypes.string)
+        },
+        {
+            "types": (dtypes.int32, ()),
+            "classes": (ops.Tensor, ()),
+            "expected": (dtypes.int32, ())
+        },
+        {
+            "types": ((), dtypes.int32),
+            "classes": ((), ops.Tensor),
+            "expected": ((), dtypes.int32)
+        },
+        {
+            "types": (dtypes.int32, ()),
+            "classes": (sparse_tensor.SparseTensor, ()),
+            "expected": (dtypes.string, ())
+        },
+        {
+            "types": ((), dtypes.int32),
+            "classes": ((), sparse_tensor.SparseTensor),
+            "expected": ((), dtypes.string)
+        },
+        {
+            "types": (dtypes.int32, (), dtypes.int32),
+            "classes": (ops.Tensor, (), ops.Tensor),
+            "expected": (dtypes.int32, (), dtypes.int32)
+        },
+        {
+            "types": (dtypes.int32, (), dtypes.int32),
+            "classes": (sparse_tensor.SparseTensor, (),
+                        sparse_tensor.SparseTensor),
+            "expected": (dtypes.string, (), dtypes.string)
+        },
+        {
+            "types": ((), dtypes.int32, ()),
+            "classes": ((), ops.Tensor, ()),
+            "expected": ((), dtypes.int32, ())
+        },
+        {
+            "types": ((), dtypes.int32, ()),
+            "classes": ((), sparse_tensor.SparseTensor, ()),
+            "expected": ((), dtypes.string, ())
+        },
+    )
+    for test_case in test_cases:
+      self.assertEqual(
+          sparse.as_dense_types(test_case["types"], test_case["classes"]),
+          test_case["expected"])
 
   def assertSparseValuesEqual(self, a, b):
     if not isinstance(a, sparse_tensor.SparseTensor):
@@ -70,71 +255,74 @@ class SparseTest(test.TestCase):
             indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
     )
     for expected in test_cases:
+      classes = sparse.get_classes(expected)
+      shapes = nest.map_structure(lambda _: tensor_shape.TensorShape(None),
+                                  classes)
+      types = nest.map_structure(lambda _: dtypes.int32, classes)
       actual = sparse.deserialize_sparse_tensors(
-          sparse.serialize_sparse_tensors(expected),
-          sparse.get_sparse_types(expected))
+          sparse.serialize_sparse_tensors(expected), types, shapes,
+          sparse.get_classes(expected))
       nest.assert_same_structure(expected, actual)
       for a, e in zip(nest.flatten(actual), nest.flatten(expected)):
         self.assertSparseValuesEqual(a, e)
 
-  def testGetSparseTypes(self):
-    s = sparse_tensor.SparseTensor(
-        indices=[[0, 0]], values=[1], dense_shape=[1, 1])
-    t = sparse.SparseType(dtypes.int32)
-    test_cases = (
-        ((), ()),
-        (s, t),
-        ((s), (t)),
-        ((s, ()), (t, ())),
-        (((), s), ((), t)),
-    )
-    for test_case in test_cases:
-      self.assertEqual(sparse.get_sparse_types(test_case[0]), test_case[1])
-
-  def testWrapSparseTypes(self):
-    c = constant_op.constant([1])
-    d = dtypes.int32
-    s = sparse_tensor.SparseTensor(
-        indices=[[0, 0]], values=[1], dense_shape=[1, 1])
-    t = sparse.SparseType(dtypes.int32)
+  def testGetClasses(self):
+    s = sparse_tensor.SparseTensor(indices=[[0]], values=[1], dense_shape=[1])
+    d = ops.Tensor
+    t = sparse_tensor.SparseTensor
     test_cases = (
-        ((), ()),
-        (s, t),
-        (c, d),
-        ((s), (t)),
-        ((c), (d)),
-        ((s, ()), (t, ())),
-        (((), s), ((), t)),
-        ((c, ()), (d, ())),
-        (((), c), ((), d)),
-        ((s, (), c), (t, (), d)),
-        (((), s, ()), ((), t, ())),
-        (((), c, ()), ((), d, ())),
+        {
+            "classes": (),
+            "expected": ()
+        },
+        {
+            "classes": s,
+            "expected": t
+        },
+        {
+            "classes": constant_op.constant([1]),
+            "expected": d
+        },
+        {
+            "classes": (s),
+            "expected": (t)
+        },
+        {
+            "classes": (constant_op.constant([1])),
+            "expected": (d)
+        },
+        {
+            "classes": (s, ()),
+            "expected": (t, ())
+        },
+        {
+            "classes": ((), s),
+            "expected": ((), t)
+        },
+        {
+            "classes": (constant_op.constant([1]), ()),
+            "expected": (d, ())
+        },
+        {
+            "classes": ((), constant_op.constant([1])),
+            "expected": ((), d)
+        },
+        {
+            "classes": (s, (), constant_op.constant([1])),
+            "expected": (t, (), d)
+        },
+        {
+            "classes": ((), s, ()),
+            "expected": ((), t, ())
+        },
+        {
+            "classes": ((), constant_op.constant([1]), ()),
+            "expected": ((), d, ())
+        },
     )
     for test_case in test_cases:
       self.assertEqual(
-          sparse.wrap_sparse_types(test_case[0], sparse.get_sparse_types(
-              test_case[0])), test_case[1])
-
-  def testUnwrapSparseTypes(self):
-    d = dtypes.string
-    t = sparse.SparseType(dtypes.int32)
-    test_cases = (
-        ((), ()),
-        (t, d),
-        (d, d),
-        ((t), (d)),
-        ((d), (d)),
-        ((t, ()), (d, ())),
-        (((), t), ((), d)),
-        ((d, ()), (d, ())),
-        (((), d), ((), d)),
-        ((t, (), d), (d, (), d)),
-        (((), t, ()), ((), d, ())),
-        (((), d, ()), ((), d, ())),
-    )
-    for test_case in test_cases:
-      self.assertEqual(sparse.unwrap_sparse_types(test_case[0]), test_case[1])
+          sparse.get_classes(test_case["classes"]), test_case["expected"])
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index d3fa5cb778..7643cf2ddc 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2807,11 +2807,14 @@ tf_py_test(
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
         "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:session",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
     ],
     tags = [
         "manual",
@@ -2825,18 +2828,12 @@ tf_py_test(
     srcs = ["dataset_from_generator_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
     ],
 )
 
@@ -3082,6 +3079,7 @@ tf_py_test(
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/util:sparse",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -3096,8 +3094,11 @@ tf_py_test(
         "//tensorflow/python:io_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:random_ops",
         "//tensorflow/python:script_ops",
         "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
         "//tensorflow/python:training",
     ],
 )
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 2ef6a0015b..3d6f942dca 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -1442,7 +1442,7 @@ def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None):
       Must have 3 columns.
     dtype: The `dtype` of the serialized `SparseTensor` object.
     rank: (optional) Python int, the rank of the `SparseTensor` object.
-    name: A name prefix for the returned tensors (optional)
+    name: A name prefix for the returned tensors (optional).
 
   Returns:
     A `SparseTensor` representing the deserialized `SparseTensor` object.
diff --git a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt
index d12514fe77..42de5c0c80 100644
--- a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt
@@ -2,6 +2,10 @@ path: "tensorflow.data.Dataset"
 tf_class {
   is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<class \'abc.abstractproperty\'>"
+  }
   member {
     name: "output_shapes"
     mtype: "<class \'abc.abstractproperty\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt
index 002d0c6a9f..e2fc8d6cb1 100644
--- a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -3,6 +3,10 @@ tf_class {
   is_instance: "<class \'tensorflow.python.data.ops.readers.FixedLengthRecordDataset\'>"
   is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "output_shapes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt
index e62f6b247a..1f9aeb6ad6 100644
--- a/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt
@@ -6,6 +6,10 @@ tf_class {
     name: "initializer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "output_shapes"
     mtype: "<type \'property\'>"
@@ -16,15 +20,15 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'iterator_resource\', \'initializer\', \'output_types\', \'output_shapes\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'iterator_resource\', \'initializer\', \'output_types\', \'output_shapes\', \'output_classes\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "from_string_handle"
-    argspec: "args=[\'string_handle\', \'output_types\', \'output_shapes\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'string_handle\', \'output_types\', \'output_shapes\', \'output_classes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "from_structure"
-    argspec: "args=[\'output_types\', \'output_shapes\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+    argspec: "args=[\'output_types\', \'output_shapes\', \'shared_name\', \'output_classes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "get_next"
diff --git a/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt
deleted file mode 100644
index b25f9a029f..0000000000
--- a/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt
+++ /dev/null
@@ -1,13 +0,0 @@
-path: "tensorflow.data.SparseType"
-tf_class {
-  is_instance: "<class \'tensorflow.python.data.util.sparse.SparseType\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'dtype\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt
index 2b476dab66..9770389e5e 100644
--- a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -3,6 +3,10 @@ tf_class {
   is_instance: "<class \'tensorflow.python.data.ops.readers.TFRecordDataset\'>"
   is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "output_shapes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt
index c4c5ac0775..7263230c1c 100644
--- a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt
@@ -3,6 +3,10 @@ tf_class {
   is_instance: "<class \'tensorflow.python.data.ops.readers.TextLineDataset\'>"
   is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "output_shapes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.pbtxt
index b9f54a4d72..56fb270a49 100644
--- a/tensorflow/tools/api/golden/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.data.pbtxt
@@ -12,10 +12,6 @@ tf_module {
     name: "Iterator"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "SparseType"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "TFRecordDataset"
     mtype: "<class \'abc.ABCMeta\'>"
-- 
GitLab


From cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Fri, 17 Nov 2017 13:05:02 -0800
Subject: [PATCH 0065/1225] Add field to HLO proto field to enable reversing a
 convolution filter.

PiperOrigin-RevId: 176149369
---
 .../xla/client/computation_builder.cc         |  1 +
 .../compiler/xla/service/hlo_evaluator.cc     |  4 +-
 .../xla/service/hlo_evaluator_test.cc         | 77 +++++++++++++++++++
 tensorflow/compiler/xla/window_util.cc        |  3 +
 tensorflow/compiler/xla/xla_data.proto        |  4 +
 5 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc
index 763d94e94c..b1f4ea8ab6 100644
--- a/tensorflow/compiler/xla/client/computation_builder.cc
+++ b/tensorflow/compiler/xla/client/computation_builder.cc
@@ -153,6 +153,7 @@ bool ComputationBuilder::MakeWindow(
     } else {
       dim->set_window_dilation(1);
     }
+    dim->set_window_reversal(false);
   }
   return true;
 }
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index a722d1b3d9..2bd9723dbe 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -814,7 +814,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
             }
 
             rhs_index[dnums.kernel_spatial_dimensions(ki)] =
-                rhs_spatial_index[ki];
+                window_dim.window_reversal()
+                    ? ((window_dim.size() - 1) - rhs_spatial_index[ki])
+                    : rhs_spatial_index[ki];
           }
 
           result_val += lhs_literal.Get<ReturnT>(lhs_index) *
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index 85477af6fe..94929dda6a 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -794,6 +794,83 @@ TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
+TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) {
+  HloComputation::Builder b(TestName());
+
+  // clang-format off
+  // Input dimensions: [feature=2, height=3, batch=1, width=4]
+  Array4D<float> input({
+    {{{1, 2, 3, 4}},
+     {{5, 6, 7, 8}},
+     {{9, 10, 11, 12}}},
+    {{{13, 14, 15, 16}},
+     {{17, 18, 19, 20}},
+     {{21, 22, 23, 24}}}
+  });
+  // Weight dimensions:
+  // [kernel_output_feature=1, width=3, kernel_input_feature=2, height=3]
+  Array4D<float> weight({{
+    {{1, 7, 13},
+     {4, 10, 16}},
+    {{2, 8, 14},
+     {5, 11, 17}},
+    {{3, 9, 15},
+     {6, 12, 18}}
+  }});
+  // clang-format on
+
+  auto lhs_literal = Literal::CreateR4FromArray4D<float>(input);
+  HloInstruction* lhs_instruction =
+      b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs_literal)));
+
+  auto rhs_literal = Literal::CreateR4FromArray4D<float>(weight);
+  HloInstruction* rhs_instruction =
+      b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal)));
+  rhs_instruction = b.AddInstruction(HloInstruction::CreateReverse(
+      rhs_instruction->shape(), rhs_instruction, {3, 1}));
+
+  Window window;
+  WindowDimension dim;
+  dim.set_size(3);
+  dim.set_stride(1);
+  dim.set_padding_low(0);
+  dim.set_padding_high(0);
+  dim.set_window_dilation(1);
+  dim.set_base_dilation(1);
+  dim.set_window_reversal(true);
+  *window.add_dimensions() = dim;
+  *window.add_dimensions() = dim;
+
+  ConvolutionDimensionNumbers dnums;
+  dnums.set_input_batch_dimension(2);
+  dnums.set_output_batch_dimension(2);
+  dnums.set_input_feature_dimension(0);
+  dnums.set_output_feature_dimension(0);
+  dnums.add_spatial_dimensions(1);
+  dnums.add_spatial_dimensions(3);
+
+  dnums.set_kernel_output_feature_dimension(0);
+  dnums.set_kernel_input_feature_dimension(2);
+  dnums.add_kernel_spatial_dimensions(3);
+  dnums.add_kernel_spatial_dimensions(1);
+
+  const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2});
+  b.AddInstruction(HloInstruction::CreateConvolve(
+      shape, lhs_instruction, rhs_instruction, window, dnums));
+  auto computation = module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result =
+      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+
+  // clang-format off
+  // Result dimensions: [feature=1, height=1, batch=1, width=2]
+  Array4D<float> expected_array({{{{2514, 2685}}}});
+  // clang-format on
+  auto expected = Literal::CreateR4FromArray4D<float>(expected_array);
+
+  LiteralTestUtil::ExpectEqual(*expected, *result);
+}
+
 TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) {
   HloComputation::Builder b(TestName());
 
diff --git a/tensorflow/compiler/xla/window_util.cc b/tensorflow/compiler/xla/window_util.cc
index 6f7f1479b9..2e0eba8de0 100644
--- a/tensorflow/compiler/xla/window_util.cc
+++ b/tensorflow/compiler/xla/window_util.cc
@@ -44,6 +44,9 @@ namespace window_util {
   if (dim.window_dilation() != 1) {
     StrAppend(&str, ",window_dilation=", dim.window_dilation());
   }
+  if (dim.window_reversal()) {
+    StrAppend(&str, ",window_reversal");
+  }
   StrAppend(&str, ")");
   return str;
 }
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index eac8f2ff07..39f5806739 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -357,6 +357,10 @@ message WindowDimension {
   // means no dilation. base_dilation - 1 no-op entries ("holes") are implicitly
   // placed between each base area element. See documentation for convolution.
   int64 base_dilation = 6;
+
+  // Window reversal means that this dimension was logically reversed before the
+  // operation.
+  bool window_reversal = 7;
 }
 
 // Describes the windowing in an operation such as convolution.
-- 
GitLab


From 704d66d66508d10bd12f39d2f99de4eb8c8ad7b0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 13:11:10 -0800
Subject: [PATCH 0066/1225] Temporarily disable tsan for wals_test.

PiperOrigin-RevId: 176150090
---
 tensorflow/contrib/factorization/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD
index fe86a20ab1..29a0a4221a 100644
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD
@@ -270,6 +270,7 @@ tf_py_test(
         "manual",
         "noasan",  # times out b/63678675
         "nomsan",
+        "notsan",  # b/69374301
     ],
 )
 
-- 
GitLab


From d79dd4993061670c1ec5ea01db3022f28d72d0a3 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 17 Nov 2017 13:55:17 -0800
Subject: [PATCH 0067/1225] Fix shutdown race in ClusterSpec propagation.

Previously, the DeregisterGraph and DeleteWorkerSession RPCs could
race against each other, leading to undefined behavior. This change
inhibits the unnecessary DeregisterGraph RPCs when DeleteWorkerSession
is being used, which both fixes the race and cuts down on unnecessary
network traffic on the Session::Close path.

PiperOrigin-RevId: 176155626
---
 .../core/distributed_runtime/master_session.cc     | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 91a1fa7d1e..b3e499be79 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -67,13 +67,14 @@ class MasterSession::ReffedClientGraph : public core::RefCounted {
                     const SessionOptions& session_opts,
                     const StatsPublisherFactory& stats_publisher_factory,
                     GraphExecutionState* execution_state, bool is_partial,
-                    WorkerCacheInterface* worker_cache)
+                    WorkerCacheInterface* worker_cache, bool should_deregister)
       : session_handle_(handle),
         client_graph_(std::move(cg)),
         session_opts_(session_opts),
         is_partial_(is_partial),
         debug_opts_(bopts.debug_options),
-        worker_cache_(worker_cache) {
+        worker_cache_(worker_cache),
+        should_deregister_(should_deregister) {
     VLOG(1) << "Created ReffedClientGraph for node with "
             << client_graph()->graph.num_node_ids();
 
@@ -85,7 +86,11 @@ class MasterSession::ReffedClientGraph : public core::RefCounted {
     }
   }
 
-  ~ReffedClientGraph() override { DeregisterPartitions(); }
+  ~ReffedClientGraph() override {
+    if (should_deregister_) {
+      DeregisterPartitions();
+    }
+  }
 
   const ClientGraph* client_graph() { return client_graph_.get(); }
 
@@ -209,6 +214,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted {
   const DebugOptions& debug_opts_;
   WorkerCacheInterface* const worker_cache_;  // Not owned.
   std::unordered_map<StringPiece, Node*, StringPiece::Hasher> name_to_node_;
+  const bool should_deregister_;
 
   // Graph partitioned into per-location subgraphs.
   struct Part {
@@ -1262,7 +1268,7 @@ Status MasterSession::StartStep(const BuildGraphOptions& opts, int64* count,
       auto entry = new ReffedClientGraph(
           handle_, opts, std::move(client_graph), session_opts_,
           stats_publisher_factory_, execution_state_.get(), is_partial,
-          worker_cache);
+          worker_cache, !should_delete_worker_sessions_);
       iter = m->insert({hash, entry}).first;
       VLOG(1) << "Preparing to execute new graph";
     }
-- 
GitLab


From 3f888e1539db5551cfcf9ee837a0555c224e0018 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 14:16:09 -0800
Subject: [PATCH 0068/1225] Add a Compiler::BuildExecutable interface that
 compiles the given Hlo module without optimizations.

PiperOrigin-RevId: 176158846
---
 tensorflow/compiler/xla/service/compiler.h    | 17 +++++++++---
 .../compiler/xla/service/cpu/cpu_compiler.cc  | 26 ++++++++++++-------
 .../compiler/xla/service/cpu/cpu_compiler.h   |  6 ++++-
 .../compiler/xla/service/gpu/gpu_compiler.cc  | 22 +++++++++-------
 .../compiler/xla/service/gpu/gpu_compiler.h   |  6 ++++-
 tensorflow/compiler/xla/service/hlo_runner.cc | 19 +++++++++-----
 tensorflow/compiler/xla/service/hlo_runner.h  | 15 +++++++----
 .../xla/service/interpreter/compiler.cc       | 12 +++++++--
 .../xla/service/interpreter/compiler.h        |  8 ++++--
 .../compiler/xla/service/llvm_compiler.cc     |  4 ++-
 .../compiler/xla/service/llvm_compiler.h      | 12 ++++++---
 tensorflow/compiler/xla/service/service.cc    |  5 +++-
 .../compiler/xla/tests/codegen_test_base.cc   |  7 +++--
 .../compiler/xla/tests/llvm_compiler_test.cc  |  4 +--
 14 files changed, 114 insertions(+), 49 deletions(-)

diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h
index 5f021900c8..fc67330f5c 100644
--- a/tensorflow/compiler/xla/service/compiler.h
+++ b/tensorflow/compiler/xla/service/compiler.h
@@ -97,21 +97,32 @@ class Compiler {
   // Returns the ID of the platform that this compiler targets.
   virtual perftools::gputools::Platform::Id PlatformId() const = 0;
 
+  // Runs Hlo passes to optimize the given Hlo module, returns the optimized
+  // module.
+  virtual StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
+      std::unique_ptr<HloModule> module,
+      perftools::gputools::StreamExecutor* executor) = 0;
+
   // Compiles the HLO module for execution on a device given by the executor,
-  // and returns an executable object or an error status. Takes ownership of the
-  // HLO module and is free to transform it.
+  // and returns an executable object or an error status. No HLO passes are
+  // applied to module. Generally a module should be passed through RunHloPasses
+  // prior to calling this method because the some HLO passes are required for
+  // correctness. Takes ownership of the HLO module and is free to transform it.
   //
   // The compiler may optionally specialize to the individual device
   // (not just type of device) indicated by the executor.
   //
   // Use the overload below to compile computations that run in parallel.
-  virtual StatusOr<std::unique_ptr<Executable>> Compile(
+  virtual StatusOr<std::unique_ptr<Executable>> RunBackend(
       std::unique_ptr<HloModule> module,
       perftools::gputools::StreamExecutor* executor) = 0;
 
   // Compiles a set of HLO modules that can run in parallel, potentially
   // communicating data between the modules, and returns a corresponding
   // sequence of executable objects.
+  //
+  // TODO(b/68666782): Remove this method after adding support for multiple
+  // modules to RunHloPasses and RunBackends.
   virtual StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
       std::vector<std::unique_ptr<HloModule>> modules,
       std::vector<std::vector<perftools::gputools::StreamExecutor*>>
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index f5b95d3657..b04a279395 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -426,8 +426,22 @@ Status InitializeModuleHooks(
 
 }  // namespace
 
-StatusOr<std::unique_ptr<Executable>> CpuCompiler::Compile(
-    std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec) {
+StatusOr<std::unique_ptr<HloModule>> CpuCompiler::RunHloPasses(
+    std::unique_ptr<HloModule> module,
+    perftools::gputools::StreamExecutor* /*stream_exec*/) {
+  VLOG(2) << "Before optimization:";
+  XLA_VLOG_LINES(2, module->ToString());
+
+  TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false));
+
+  VLOG(2) << "After optimization:";
+  XLA_VLOG_LINES(2, module->ToString());
+  return std::move(module);
+}
+
+StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
+    std::unique_ptr<HloModule> module,
+    perftools::gputools::StreamExecutor* stream_exec) {
   const string timer_message =
       "Compiling [" + module->name() + "] for CPU using JIT";
   ScopedLoggingTimer compiling_timer(timer_message, 1);
@@ -458,14 +472,6 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::Compile(
   llvm_module->setDataLayout(jit->data_layout());
   llvm_module->setTargetTriple(jit->target_triple().getTriple());
 
-  VLOG(2) << "Before optimization:";
-  XLA_VLOG_LINES(2, module->ToString());
-
-  TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false));
-
-  VLOG(2) << "After optimization:";
-  XLA_VLOG_LINES(2, module->ToString());
-
   HloComputation* computation = module->entry_computation();
   std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx;
   if (module->config().hlo_profiling_enabled()) {
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
index 963aced208..ebed7058d8 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
@@ -116,7 +116,11 @@ class CpuCompiler : public LLVMCompiler {
   //        stream_execs)
   using LLVMCompiler::Compile;
 
-  StatusOr<std::unique_ptr<Executable>> Compile(
+  StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
+      std::unique_ptr<HloModule> module,
+      perftools::gputools::StreamExecutor* stream_exec) override;
+
+  StatusOr<std::unique_ptr<Executable>> RunBackend(
       std::unique_ptr<HloModule> module,
       perftools::gputools::StreamExecutor* stream_exec) override;
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 23fb308ec6..937d453a5c 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -126,7 +126,7 @@ string GetLibdeviceDir(const string& config_cuda_data_dir) {
 
 // Runs optimization passes on the given HLO module.
 tensorflow::Status OptimizeHloModule(
-    HloModule* hlo_module, const se::DeviceDescription& device_desc,
+    HloModule* hlo_module,
     const HloCostAnalysis::ShapeSizeFunction& shape_size_function) {
   {
     HloPassPipeline pipeline("optimization");
@@ -297,19 +297,23 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
 GpuCompiler::GpuCompiler()
     : pointer_size_(llvm::DataLayout(kDataLayout).getPointerSize()) {}
 
-StatusOr<std::unique_ptr<Executable>> GpuCompiler::Compile(
-    std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec) {
-  TF_RET_CHECK(stream_exec != nullptr);
-
+StatusOr<std::unique_ptr<HloModule>> GpuCompiler::RunHloPasses(
+    std::unique_ptr<HloModule> module, se::StreamExecutor* /*stream_exec*/) {
   {
     Tracing::TraceMe annotation("HLO Transforms", module->name(),
                                 /*is_expensive=*/true);
-    TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(),
-                                         stream_exec->GetDeviceDescription(),
-                                         ShapeSizeBytesFunction()));
     TF_RETURN_IF_ERROR(
-        PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction()));
+        OptimizeHloModule(module.get(), ShapeSizeBytesFunction()));
   }
+  return std::move(module);
+}
+
+StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
+    std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec) {
+  TF_RET_CHECK(stream_exec != nullptr);
+
+  TF_RETURN_IF_ERROR(
+      PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction()));
 
   llvm::LLVMContext llvm_context;
   std::string buffer;
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h
index fe5fce615f..18e3434020 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h
@@ -49,7 +49,11 @@ class GpuCompiler : public LLVMCompiler {
   //        stream_execs)
   using LLVMCompiler::Compile;
 
-  StatusOr<std::unique_ptr<Executable>> Compile(
+  StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
+      std::unique_ptr<HloModule> module,
+      perftools::gputools::StreamExecutor* stream_exec) override;
+
+  StatusOr<std::unique_ptr<Executable>> RunBackend(
       std::unique_ptr<HloModule> module,
       perftools::gputools::StreamExecutor* stream_exec) override;
 
diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index 63f2b1296e..6b6d48233a 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc
@@ -114,11 +114,16 @@ HloRunner::~HloRunner() {
 StatusOr<se::DeviceMemoryBase> HloRunner::Execute(
     std::unique_ptr<HloModule> module,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    Shape* result_shape) {
+    Shape* result_shape, bool run_hlo_passes) {
+  if (run_hlo_passes) {
+    TF_ASSIGN_OR_RETURN(
+        module, backend().compiler()->RunHloPasses(
+                    std::move(module), backend().default_stream_executor()));
+  }
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<Executable> executable,
-      backend().compiler()->Compile(std::move(module),
-                                    backend().default_stream_executor()));
+      backend().compiler()->RunBackend(std::move(module),
+                                       backend().default_stream_executor()));
 
   se::Stream stream(backend().default_stream_executor());
   stream.Init();
@@ -193,10 +198,12 @@ StatusOr<std::unique_ptr<Literal>> HloRunner::TransferFromDevice(
 
 StatusOr<std::unique_ptr<Literal>> HloRunner::ExecuteAndTransfer(
     std::unique_ptr<HloModule> module,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
+    bool run_hlo_passes) {
   Shape result_shape;
-  TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase device_base,
-                      Execute(std::move(module), arguments, &result_shape));
+  TF_ASSIGN_OR_RETURN(
+      se::DeviceMemoryBase device_base,
+      Execute(std::move(module), arguments, &result_shape, run_hlo_passes));
   return TransferFromDevice(result_shape, device_base);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h
index a5732848c6..95cddafc91 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.h
+++ b/tensorflow/compiler/xla/service/hlo_runner.h
@@ -65,17 +65,20 @@ class HloRunner {
   // Executes the given module with given literals as input and returns the
   // result as a Literal. The LiteralPtr type accepts Literal* or
   // std::unique_ptr<Literal>.
+  // If run_hlo_passes is true, the module will be executed without Hlo
+  // optimization.
   template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Execute(
       std::unique_ptr<HloModule> module,
-      const tensorflow::gtl::ArraySlice<LiteralPtr> literals);
+      const tensorflow::gtl::ArraySlice<LiteralPtr> literals,
+      bool run_hlo_passes = true);
 
   // Executes the given module and returns a global data handle.
   StatusOr<perftools::gputools::DeviceMemoryBase> Execute(
       std::unique_ptr<HloModule> module,
       tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
           arguments,
-      Shape* result_shape);
+      Shape* result_shape, bool run_hlo_passes = true);
 
   // Transfers the given literal to the device and returns the data handle.
   StatusOr<perftools::gputools::DeviceMemoryBase> TransferToDevice(
@@ -90,7 +93,8 @@ class HloRunner {
   StatusOr<std::unique_ptr<Literal>> ExecuteAndTransfer(
       std::unique_ptr<HloModule> module,
       tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments);
+          arguments,
+      bool run_hlo_passes = true);
 
   // If backend is not created in the constructor, creates and returns the
   // default backend. If creation fails, crashes the program.
@@ -112,14 +116,15 @@ class HloRunner {
 template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloRunner::Execute(
     std::unique_ptr<HloModule> module,
-    const tensorflow::gtl::ArraySlice<LiteralPtr> literals) {
+    const tensorflow::gtl::ArraySlice<LiteralPtr> literals,
+    bool run_hlo_passes) {
   std::vector<perftools::gputools::DeviceMemoryBase> arguments;
   for (const auto& literal : literals) {
     TF_ASSIGN_OR_RETURN(perftools::gputools::DeviceMemoryBase argument,
                         TransferToDevice(*literal));
     arguments.push_back(argument);
   }
-  return ExecuteAndTransfer(std::move(module), arguments);
+  return ExecuteAndTransfer(std::move(module), arguments, run_hlo_passes);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index 6d5796a24b..c9a5285a4f 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -69,11 +69,19 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) {
   return pipeline.Run(hlo_module).status();
 }
 
-StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::Compile(
+StatusOr<std::unique_ptr<HloModule>> InterpreterCompiler::RunHloPasses(
+    std::unique_ptr<HloModule> hlo_module,
+    se::StreamExecutor* /*stream_exec*/) {
+  VLOG(1) << "Run hlo passes on graph " << hlo_module->name();
+  TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get()));
+  return std::move(hlo_module);
+}
+
+StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend(
     std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec) {
   TF_RET_CHECK(stream_exec != nullptr);
 
-  VLOG(1) << "Generate graph " << hlo_module->name();
+  VLOG(1) << "Run backend " << hlo_module->name();
 
   TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get()));
 
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h
index cfdc9b6256..278cf51842 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.h
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.h
@@ -43,8 +43,12 @@ class InterpreterCompiler : public Compiler {
   InterpreterCompiler() {}
   ~InterpreterCompiler() override {}
 
-  StatusOr<std::unique_ptr<Executable>> Compile(
-      std::unique_ptr<HloModule> hlo_modules,
+  StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
+      std::unique_ptr<HloModule> hlo_module,
+      perftools::gputools::StreamExecutor* stream_exec) override;
+
+  StatusOr<std::unique_ptr<Executable>> RunBackend(
+      std::unique_ptr<HloModule> hlo_module,
       perftools::gputools::StreamExecutor* stream_exec) override;
 
   StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc
index ba0304fb8c..34f3419269 100644
--- a/tensorflow/compiler/xla/service/llvm_compiler.cc
+++ b/tensorflow/compiler/xla/service/llvm_compiler.cc
@@ -27,8 +27,10 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::Compile(
           "Model partitioning not implemented for the CPU/GPU compilers!");
     }
 
+    TF_ASSIGN_OR_RETURN(
+        modules[i], RunHloPasses(std::move(modules[i]), stream_execs[i][0]));
     TF_ASSIGN_OR_RETURN(std::unique_ptr<Executable> executable,
-                        Compile(std::move(modules[i]), stream_execs[i][0]));
+                        RunBackend(std::move(modules[i]), stream_execs[i][0]));
     result.push_back(std::move(executable));
   }
 
diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h
index c4f689eabe..c5393cef4f 100644
--- a/tensorflow/compiler/xla/service/llvm_compiler.h
+++ b/tensorflow/compiler/xla/service/llvm_compiler.h
@@ -58,10 +58,14 @@ class LLVMCompiler : public Compiler {
   void RemovePostOptimizationHook() { user_post_optimization_hook_ = nullptr; }
 
   // Bring in
-  // StatusOr<std::unique_ptr<Executable>> Compile(
-  //    std::unique_ptr<HloModule> module,
-  //    perftools::gputools::StreamExecutor* executor)
-  using Compiler::Compile;
+  //   StatusOr<std::unique_ptr<Executable>> RunBackend(
+  //       std::unique_ptr<HloModule> module,
+  //       perftools::gputools::StreamExecutor* stream_exec)
+  //   StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
+  //       std::unique_ptr<HloModule> module,
+  //       perftools::gputools::StreamExecutor* stream_exec)
+  using Compiler::RunBackend;
+  using Compiler::RunHloPasses;
 
   StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
       std::vector<std::unique_ptr<HloModule>> modules,
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index ee9501dd48..0544a1697b 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -430,9 +430,12 @@ StatusOr<std::unique_ptr<Executable>> Service::BuildExecutable(
                                           /*include_unreachable_instructions=*/
                                           true));
 
+  TF_ASSIGN_OR_RETURN(
+      module, backend->compiler()->RunHloPasses(std::move(module), executor));
+
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<Executable> executable,
-      backend->compiler()->Compile(std::move(module), executor));
+      backend->compiler()->RunBackend(std::move(module), executor));
 
   if (!other_directory_path.empty()) {
     executable->set_session_module(std::move(session_module));
diff --git a/tensorflow/compiler/xla/tests/codegen_test_base.cc b/tensorflow/compiler/xla/tests/codegen_test_base.cc
index 43ea7f6019..e472408dcf 100644
--- a/tensorflow/compiler/xla/tests/codegen_test_base.cc
+++ b/tensorflow/compiler/xla/tests/codegen_test_base.cc
@@ -19,8 +19,11 @@ namespace xla {
 
 StatusOr<std::unique_ptr<Executable>> CodegenTestBase::CompileToExecutable(
     std::unique_ptr<HloModule> hlo_module) {
-  return backend().compiler()->Compile(std::move(hlo_module),
-                                       backend().default_stream_executor());
+  TF_ASSIGN_OR_RETURN(hlo_module, backend().compiler()->RunHloPasses(
+                                      std::move(hlo_module),
+                                      backend().default_stream_executor()));
+  return backend().compiler()->RunBackend(std::move(hlo_module),
+                                          backend().default_stream_executor());
 }
 
 StatusOr<std::unique_ptr<AotCompilationResult>>
diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
index 62fab6a224..b5b95967ff 100644
--- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
+++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
@@ -73,8 +73,8 @@ class LLVMCompilerTest : public ::testing::Test {
     compiler->SetPostOptimizationHook(post_opt_hook);
 
     ASSERT_TRUE(compiler
-                    ->Compile(std::move(hlo_module),
-                              backend_->default_stream_executor())
+                    ->RunBackend(std::move(hlo_module),
+                                 backend_->default_stream_executor())
                     .ok());
 
     // Test that hooks were called.
-- 
GitLab


From 6610eb74981ffcbaaba6befc241ad6d34aded81e Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Fri, 17 Nov 2017 14:17:22 -0800
Subject: [PATCH 0069/1225] tfdbg: fix missing space in grpc error message

PiperOrigin-RevId: 176159019
---
 tensorflow/core/kernels/debug_ops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/debug_ops.h b/tensorflow/core/kernels/debug_ops.h
index 2c21053121..381add3fb3 100644
--- a/tensorflow/core/kernels/debug_ops.h
+++ b/tensorflow/core/kernels/debug_ops.h
@@ -185,7 +185,7 @@ class BaseDebugOp : public OpKernel {
       if (!status.ok()) {
         LOG(ERROR) << "Debug node of watch key "
                    << debug_watch_key_->debug_node_name
-                   << "failed to publish debug tensor data to all URLs "
+                   << " failed to publish debug tensor data to all URLs "
                    << str_util::Join(debug_urls_, ", ")
                    << ", due to: " << status.error_message();
       }
-- 
GitLab


From 61b0ddca2570215a625e22f76348f51ffd661ddf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 14:23:01 -0800
Subject: [PATCH 0070/1225] Modify QuantizeAddContexts so that ops are added
 deterministically. This is needed when using multiple worker replicas so that
 the ops can be initialized consistently.

PiperOrigin-RevId: 176159819
---
 tensorflow/contrib/quantize/python/quantize.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index 7db2d863aa..50a2b4c91c 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -164,7 +164,10 @@ class _QuantizeContext(object):
 
   def QuantizeAddContexts(self):
     """Quantizes all add ops in self.add_contexts."""
-    for add_context in self.add_contexts:
+    # Loop through sorted self.add_contexts so that op creation is
+    # deterministic. This is needed when using multiple worker replicas so that
+    # the ops can be initialized consistently.
+    for add_context in sorted(self.add_contexts):
       add_op = self.GetOperationByNamesDontThrow([
           add_context + '/Add', add_context + '/add'])
       if add_op is not None:
-- 
GitLab


From 3094dfcf387c122b678230b6c0df778aad594d1e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 14:36:39 -0800
Subject: [PATCH 0071/1225] [XLA:GPU] Partially enable Winograd convolution
 algorithm. Diable the algorithm for certain inputs to avoid a known bug in
 cuDNNv5 and cuDNNv6.

PiperOrigin-RevId: 176161830
---
 .../xla/service/gpu/convolution_thunk.cc      | 37 +++++++++++++++----
 .../xla/service/gpu/convolution_thunk.h       |  1 +
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
index e79d0a4c79..5fe5f55857 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
@@ -258,22 +258,19 @@ tensorflow::Status ConvolutionThunk::Convolve(
 }
 
 std::vector<AlgorithmDesc> ConvolutionThunk::GetAlgorithms(
-    se::StreamExecutor* stream_exec) const {
+    bool with_winograd_nonfused, se::StreamExecutor* stream_exec) const {
   std::vector<AlgorithmDesc> algorithms;
-  // TODO(yangzihao): Currently disable the use of winograd nonfused in XLA
-  // by default. Should send in conv parameters and enable it when
-  // ShouldIncludeWinogradNonfusedAlgo() returns true.
   switch (convolution_kind_) {
     case ConvolutionKind::kBackwardFilter:
       CHECK(stream_exec->GetConvolveBackwardFilterAlgorithms(
-          /*with_winograd_nonfused=*/false, &algorithms));
+          with_winograd_nonfused, &algorithms));
       break;
     case ConvolutionKind::kBackwardInput:
       CHECK(stream_exec->GetConvolveBackwardDataAlgorithms(
-          /*with_winograd_nonfused=*/false, &algorithms));
+          with_winograd_nonfused, &algorithms));
       break;
     case ConvolutionKind::kForward:
-      CHECK(stream_exec->GetConvolveAlgorithms(/*with_winograd_nonfused=*/false,
+      CHECK(stream_exec->GetConvolveAlgorithms(with_winograd_nonfused,
                                                &algorithms));
       break;
   }
@@ -287,6 +284,26 @@ static string AlgorithmToString(const se::dnn::AlgorithmDesc& algo) {
   return tensorflow::strings::StrCat(algo.algo_id());
 }
 
+// Determines whether we can safely perform a winograd non-fused convolution for
+// the given input and output descriptors.  This works around b/68264959, an
+// integer overflow in cuDNNv5 and cuDNNv6.
+static bool ShouldIncludeWinogradNonfusedAlgo(
+    const BatchDescriptor& input_descriptor,
+    const BatchDescriptor& output_descriptor) {
+  int64 batch = input_descriptor.count();
+  int64 in_depths = input_descriptor.feature_map_count();
+  int64 in_rows = input_descriptor.height();
+  int64 in_cols = input_descriptor.width();
+  int64 out_depths = output_descriptor.feature_map_count();
+
+  int64 total_size = 16 * std::ceil(batch / 16.0) *
+                     std::max(in_depths, out_depths) * in_cols * in_rows *
+                     sizeof(float);
+  int64 threshold = 1L << 31;
+
+  return total_size < threshold;
+}
+
 tensorflow::Status ConvolutionThunk::ConvolveWithTune(
     const BatchDescriptor& input_descriptor, se::DeviceMemory<float> input_data,
     const FilterDescriptor& filter_descriptor,
@@ -303,9 +320,13 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune(
                "ConvolutionThunk: "
             << this;
 
+    bool with_winograd_nonfused =
+        ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor);
+
     se::dnn::ProfileResult best_result;
     se::dnn::ProfileResult best_result_without_scratch;
-    std::vector<AlgorithmDesc> algorithms = GetAlgorithms(stream->parent());
+    std::vector<AlgorithmDesc> algorithms =
+        GetAlgorithms(with_winograd_nonfused, stream->parent());
     for (auto algorithm : algorithms) {
       ConvolveScratchAllocator scratch_allocator(
           buffer_allocations.device_ordinal(),
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
index 13432301b2..5ac5db2f04 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
@@ -116,6 +116,7 @@ class ConvolutionThunk : public Thunk {
 
   // Returns the convolve algorithms that can be used for this ConvolutionThunk.
   std::vector<perftools::gputools::dnn::AlgorithmDesc> GetAlgorithms(
+      bool with_winograd_nonfused,
       perftools::gputools::StreamExecutor* stream_exec) const;
 
   // Fastest cuDNN convolution algorithm for this thunk learned from
-- 
GitLab


From 9b858b88784b6a9232d23d3a13353cd6ef43cd18 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Fri, 17 Nov 2017 14:53:59 -0800
Subject: [PATCH 0072/1225] Avoid reordering in ports in SwitchGrad for
 CondContext.

PiperOrigin-RevId: 176164285
---
 tensorflow/python/ops/control_flow_grad.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py
index 3c082b19b6..22dc6771ec 100644
--- a/tensorflow/python/ops/control_flow_grad.py
+++ b/tensorflow/python/ops/control_flow_grad.py
@@ -69,13 +69,12 @@ def _SwitchGrad(op, *grad):
       # meaning the output is not differentiable.
       return None, None
   elif isinstance(op_ctxt, CondContext):
-    good_grad = grad[op_ctxt.branch]
     zero_grad = grad[1 - op_ctxt.branch]
     # At this point, we have created zero_grad guarded by the right switch.
     # Unfortunately, we may still get None here for not trainable data types.
     if zero_grad is None:
       return None, None
-    return merge([good_grad, zero_grad], name="cond_grad")[0], None
+    return merge(grad, name="cond_grad")[0], None
   else:
     false_grad = switch(grad[0], op.inputs[1])[0]
     true_grad = switch(grad[1], op.inputs[1])[1]
-- 
GitLab


From c86793dd597649fdf64964f87e6f8e896966e490 Mon Sep 17 00:00:00 2001
From: Rui Zhao <rzhao@google.com>
Date: Fri, 17 Nov 2017 15:11:09 -0800
Subject: [PATCH 0073/1225] Register tile_ops GPU kernel for bool types.

PiperOrigin-RevId: 176166731
---
 tensorflow/core/kernels/tile_functor_gpu.cu.cc |  1 +
 tensorflow/core/kernels/tile_ops.cc            | 14 ++++++++++++--
 tensorflow/core/util/cuda_kernel_helper.h      | 10 ++++++++++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/tile_functor_gpu.cu.cc b/tensorflow/core/kernels/tile_functor_gpu.cu.cc
index 5a36e7567b..84a5060fc3 100644
--- a/tensorflow/core/kernels/tile_functor_gpu.cu.cc
+++ b/tensorflow/core/kernels/tile_functor_gpu.cu.cc
@@ -90,6 +90,7 @@ typedef Eigen::GpuDevice GPUDevice;
   template struct Tile<GPUDevice, T, int32>; \
   template struct Tile<GPUDevice, T, int64>;
 
+TF_CALL_bool(DEFINE_TYPE);
 TF_CALL_int16(DEFINE_TYPE);
 TF_CALL_int32(DEFINE_TYPE);
 TF_CALL_int64(DEFINE_TYPE);
diff --git a/tensorflow/core/kernels/tile_ops.cc b/tensorflow/core/kernels/tile_ops.cc
index fa5afe6a31..68cdae3249 100644
--- a/tensorflow/core/kernels/tile_ops.cc
+++ b/tensorflow/core/kernels/tile_ops.cc
@@ -222,6 +222,7 @@ TF_CALL_complex128(HANDLE_TYPE_NAME_CPU);
 TF_CALL_string(HANDLE_TYPE_NAME_CPU);
 
 #if GOOGLE_CUDA
+TF_CALL_bool(HANDLE_TYPE_NAME_GPU);
 TF_CALL_float(HANDLE_TYPE_NAME_GPU);
 TF_CALL_double(HANDLE_TYPE_NAME_GPU);
 TF_CALL_int16(HANDLE_TYPE_NAME_GPU);
@@ -534,7 +535,7 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad")
                         TileGradientOp<CPUDevice, int64>);
 
 #if GOOGLE_CUDA
-#define REGISTER_GPU(type)                                         \
+#define REGISTER_GPU_TILE(type)                                    \
   REGISTER_KERNEL_BUILDER(Name("Tile")                             \
                               .Device(DEVICE_GPU)                  \
                               .TypeConstraint<type>("T")           \
@@ -546,7 +547,9 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad")
                               .TypeConstraint<type>("T")           \
                               .TypeConstraint<int64>("Tmultiples") \
                               .HostMemory("multiples"),            \
-                          TileOp<GPUDevice, int64>);               \
+                          TileOp<GPUDevice, int64>);
+
+#define REGISTER_GPU_TILE_GRAD(type)                               \
   REGISTER_KERNEL_BUILDER(Name("TileGrad")                         \
                               .Device(DEVICE_GPU)                  \
                               .TypeConstraint<type>("T")           \
@@ -560,6 +563,11 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad")
                               .HostMemory("multiples"),            \
                           TileGradientOp<GPUDevice, int64>);
 
+#define REGISTER_GPU(type) \
+  REGISTER_GPU_TILE(type); \
+  REGISTER_GPU_TILE_GRAD(type);
+
+TF_CALL_bool(REGISTER_GPU_TILE);
 TF_CALL_float(REGISTER_GPU);
 TF_CALL_double(REGISTER_GPU);
 TF_CALL_half(REGISTER_GPU);
@@ -568,6 +576,8 @@ TF_CALL_int32(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU)
 
+#undef REGISTER_GPU_TILE
+#undef REGISTER_GPU_TILE_GRAD
 #undef REGISTER_GPU
 #endif  // GOOGLE_CUDA
 
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index 8315f208e7..8fa0dfbed9 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -374,6 +374,16 @@ __device__ __host__ inline Eigen::half ldg(const Eigen::half* address) {
 #endif
 }
 
+template <>
+__device__ __host__ inline bool ldg(const bool* address) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  return *reinterpret_cast<const bool*>(
+      __ldg(reinterpret_cast<const char*>(address)));
+#else
+  return *address;
+#endif
+}
+
 // CUDA provides atomic ops, but not for all types.  We provide wrappers
 // for some ops and provide implementation for all reasonable types.
 #define CUDA_ATOMIC_WRAPPER(op, T) \
-- 
GitLab


From 3cc43816cda27c497399bf94429b174db5ed6d6b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 15:19:17 -0800
Subject: [PATCH 0074/1225] Adds validation for categorical_columns in
 shared_embedding_columns.

PiperOrigin-RevId: 176167775
---
 .../python/feature_column/feature_column.py   | 28 +++++++++++++---
 .../feature_column/feature_column_test.py     | 32 +++++++++++++++++++
 2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 5ee93be7c3..a19636474b 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -689,12 +689,30 @@ def _shared_embedding_columns(
     raise ValueError('initializer must be callable if specified.')
   if initializer is None:
     initializer = init_ops.truncated_normal_initializer(
-        mean=0.0, stddev=1 / math.sqrt(dimension))
-  # TODO(b/67952670): Validate categorical_columns.
+        mean=0.0, stddev=1. / math.sqrt(dimension))
+
+  # Sort the columns so the default collection name is deterministic even if the
+  # user passes columns from an unsorted collection, such as dict.values().
+  sorted_columns = sorted(categorical_columns, key=lambda x: x.name)
+
+  c0 = sorted_columns[0]
+  if not isinstance(c0, _CategoricalColumn):
+    raise ValueError(
+        'All categorical_columns must be subclasses of _CategoricalColumn. '
+        'Given: {}, of type: {}'.format(c0, type(c0)))
+  if isinstance(c0, _WeightedCategoricalColumn):
+    c0 = c0.categorical_column
+  for c in sorted_columns[1:]:
+    if isinstance(c, _WeightedCategoricalColumn):
+      c = c.categorical_column
+    if not isinstance(c, type(c0)):
+      raise ValueError(
+          'To use shared_embedding_column, all categorical_columns must have '
+          'the same type, or be weighted_categorical_column of the same type. '
+          'Given column: {} of type: {} does not match given column: {} of '
+          'type: {}'.format(c0, type(c0), c, type(c)))
+
   if not shared_embedding_collection_name:
-    # Sort the columns so the name is deterministic even if the user passes
-    # columns from an unsorted collection, such as dict.values().
-    sorted_columns = sorted(categorical_columns, key=lambda x: x.name)
     shared_embedding_collection_name = '_'.join(c.name for c in sorted_columns)
     shared_embedding_collection_name += '_shared_embedding'
 
diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py
index 9981f358b1..6ac5ce8757 100644
--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -4162,6 +4162,38 @@ class SharedEmbeddingColumnTest(test.TestCase):
           [categorical_column_a, categorical_column_b], dimension=2,
           initializer='not_fn')
 
+  def test_incompatible_column_type(self):
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    categorical_column_c = fc.categorical_column_with_hash_bucket(
+        key='ccc', hash_bucket_size=3)
+    with self.assertRaisesRegexp(
+        ValueError,
+        'all categorical_columns must have the same type.*'
+        '_IdentityCategoricalColumn.*_HashedCategoricalColumn'):
+      fc_lib._shared_embedding_columns(
+          [categorical_column_a, categorical_column_b, categorical_column_c],
+          dimension=2)
+
+  def test_weighted_categorical_column_ok(self):
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    weighted_categorical_column_a = fc.weighted_categorical_column(
+        categorical_column_a, weight_feature_key='aaa_weights')
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    weighted_categorical_column_b = fc.weighted_categorical_column(
+        categorical_column_b, weight_feature_key='bbb_weights')
+    fc_lib._shared_embedding_columns(
+        [weighted_categorical_column_a, categorical_column_b], dimension=2)
+    fc_lib._shared_embedding_columns(
+        [categorical_column_a, weighted_categorical_column_b], dimension=2)
+    fc_lib._shared_embedding_columns(
+        [weighted_categorical_column_a, weighted_categorical_column_b],
+        dimension=2)
+
   def test_parse_example(self):
     a = fc.categorical_column_with_vocabulary_list(
         key='aaa', vocabulary_list=('omar', 'stringer', 'marlo'))
-- 
GitLab


From cb12ebe044ad8fb8515bc9d95d27c0ab19ec314b Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 17 Nov 2017 15:20:49 -0800
Subject: [PATCH 0075/1225] Provide an option to use ApiDef instead of
 OpGenOverrides when generating C++ API. Also, updating UpdateDocs method to
 ApiDef to replace names in docs.

PiperOrigin-RevId: 176167953
---
 tensorflow/cc/BUILD                           |  27 +++
 tensorflow/cc/framework/cc_op_gen.cc          | 218 ++++++++++--------
 tensorflow/cc/framework/cc_op_gen.h           |   6 +-
 tensorflow/cc/framework/cc_op_gen_main.cc     |  38 ++-
 tensorflow/cc/framework/cc_op_gen_test.cc     | 195 ++++++++++++++++
 tensorflow/contrib/cmake/tf_cc_ops.cmake      |   2 +-
 tensorflow/core/BUILD                         |   6 +-
 tensorflow/core/api_def/api_test.cc           |   9 +
 .../base_api/api_def_ApplyAddSign.pbtxt       |  65 ++++++
 .../base_api/api_def_ApplyPowerSign.pbtxt     |  65 ++++++
 .../api_def_BytesProducedStatsDataset.pbtxt   |   4 +
 .../base_api/api_def_DeserializeSparse.pbtxt  |  19 ++
 .../api_def_GenerateVocabRemapping.pbtxt      |  13 +-
 .../api_def_IteratorSetStatsAggregator.pbtxt  |   4 +
 .../api_def_LatencyStatsDataset.pbtxt         |   4 +
 .../base_api/api_def_MatrixExponential.pbtxt  |  32 +++
 .../api_def/base_api/api_def_NthElement.pbtxt |   2 +-
 .../api_def_ResourceApplyAddSign.pbtxt        |  59 +++++
 .../api_def_ResourceApplyPowerSign.pbtxt      |  59 +++++
 .../api_def_StatsAggregatorHandle.pbtxt       |   4 +
 .../api_def_StatsAggregatorSummary.pbtxt      |   4 +
 .../base_api/api_def_TensorArrayV3.pbtxt      |  11 +
 .../api_def_DeserializeSparse.pbtxt           |   4 +
 .../api_def_MatrixExponential.pbtxt           |   4 +
 tensorflow/core/framework/op_gen_lib.cc       |  60 +++++
 tensorflow/core/framework/op_gen_lib.h        |   6 +
 tensorflow/core/framework/op_gen_lib_test.cc  |  57 +++++
 tensorflow/tensorflow.bzl                     |  31 ++-
 28 files changed, 894 insertions(+), 114 deletions(-)
 create mode 100644 tensorflow/cc/framework/cc_op_gen_test.cc
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ApplyPowerSign.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_BytesProducedStatsDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_IteratorSetStatsAggregator.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_LatencyStatsDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceApplyAddSign.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceApplyPowerSign.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_StatsAggregatorHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_StatsAggregatorSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_DeserializeSparse.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_MatrixExponential.pbtxt

diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index 80112f9b44..e354831d7d 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -421,6 +421,7 @@ tf_cc_test(
 
 tf_gen_op_wrappers_cc(
     name = "cc_ops",
+    api_def_srcs = ["//tensorflow/core:base_api_def"],
     op_lib_names = [
         "array_ops",
         "audio_ops",
@@ -525,6 +526,30 @@ cc_library_with_android_deps(
         "//tensorflow/core:android_tensorflow_lib",
     ],
     copts = tf_copts(),
+    data = [
+        "//tensorflow/core:base_api_def",
+    ],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:op_gen_lib",
+        "//tensorflow/core:op_gen_overrides_proto_cc",
+        "//tensorflow/core:proto_text",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+tf_cc_test(
+    name = "cc_op_gen_test",
+    srcs = [
+        "framework/cc_op_gen.cc",
+        "framework/cc_op_gen.h",
+        "framework/cc_op_gen_test.cc",
+    ],
+    data = [
+        "//tensorflow/cc:ops/op_gen_overrides.pbtxt",
+    ],
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -533,6 +558,8 @@ cc_library_with_android_deps(
         "//tensorflow/core:op_gen_overrides_proto_cc",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
     ],
 )
 
diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index 38a17598b8..6f2b7acb82 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -18,8 +18,10 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/cc/framework/cc_op_gen.h"
+#include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
 #include "tensorflow/core/framework/op_gen_overrides.pb.h"
 #include "tensorflow/core/framework/tensor.pb.h"
@@ -385,10 +387,10 @@ bool ArgIsList(const OpDef::ArgDef& arg) {
 }
 
 bool HasOptionalAttrs(
-    const OpDef& op_def,
+    const ApiDef& api_def,
     const std::unordered_map<string, string>& inferred_input_attrs) {
-  for (int i = 0; i < op_def.attr_size(); ++i) {
-    const auto& attr(op_def.attr(i));
+  for (int i = 0; i < api_def.attr_size(); ++i) {
+    const auto& attr(api_def.attr(i));
     if ((inferred_input_attrs.find(attr.name()) ==
          inferred_input_attrs.end()) &&
         attr.has_default_value()) {
@@ -398,12 +400,21 @@ bool HasOptionalAttrs(
   return false;
 }
 
+const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def) {
+  for (int i = 0; i < api_def.in_arg_size(); ++i) {
+    if (api_def.in_arg(i).name() == name) {
+      return &api_def.in_arg(i);
+    }
+  }
+  return nullptr;
+}
+
 struct OpInfo {
   // graph_op_def: The OpDef used by the runtime, has the names that
   //   must be used when calling NodeBuilder.
   // interface_op_def: The OpDef used in the interface in the generated
   //   code, with possibly overridden names and defaults.
-  explicit OpInfo(const OpDef& graph_op_def, const OpDef& inteface_op_def,
+  explicit OpInfo(const OpDef& graph_op_def, const ApiDef& api_def,
                   const std::vector<string>& aliases);
   string GetOpAttrStruct() const;
   string GetConstructorDecl(StringPiece op_name_prefix,
@@ -423,74 +434,81 @@ struct OpInfo {
   string comment;
 
   const OpDef& graph_op_def;
-  const OpDef& op_def;
+  const ApiDef& api_def;
   const std::vector<string>& aliases;
+  // Map from type attribute to corresponding original argument name.
   std::unordered_map<string, string> inferred_input_attrs;
 };
 
-OpInfo::OpInfo(const OpDef& g_op_def, const OpDef& i_op_def,
-               const std::vector<string>& a)
-    : graph_op_def(g_op_def), op_def(i_op_def), aliases(a) {
-  op_name = op_def.name();
-  InferOpAttributes(op_def, &inferred_input_attrs);
-  has_optional_attrs = HasOptionalAttrs(op_def, inferred_input_attrs);
+OpInfo::OpInfo(const OpDef& graph_op_def, const ApiDef& api_def,
+               const std::vector<string>& aliases)
+    : graph_op_def(graph_op_def), api_def(api_def), aliases(aliases) {
+  op_name = api_def.endpoint(0).name();
+  InferOpAttributes(graph_op_def, &inferred_input_attrs);
+  has_optional_attrs = HasOptionalAttrs(api_def, inferred_input_attrs);
   arg_types.push_back("const ::tensorflow::Scope&");
   arg_names.push_back("scope");
 
-  if (op_def.has_deprecation()) {
-    if (!op_def.summary().empty()) {
-      comment = strings::StrCat(op_def.summary(), "\n");
+  if (graph_op_def.has_deprecation()) {
+    if (!api_def.summary().empty()) {
+      comment = strings::StrCat(api_def.summary(), "\n");
     }
     strings::StrAppend(&comment, "DEPRECATED at GraphDef version ",
-                       op_def.deprecation().version(), ":\n",
-                       op_def.deprecation().explanation(), ".\n");
-  } else if (op_def.summary().empty()) {
+                       graph_op_def.deprecation().version(), ":\n",
+                       graph_op_def.deprecation().explanation(), ".\n");
+  } else if (api_def.summary().empty()) {
     comment = "TODO: add doc.\n";
   } else {
-    comment = strings::StrCat(op_def.summary(), "\n");
+    comment = strings::StrCat(api_def.summary(), "\n");
   }
-  if (!op_def.description().empty()) {
-    strings::StrAppend(&comment, "\n", op_def.description(), "\n");
+  if (!api_def.description().empty()) {
+    strings::StrAppend(&comment, "\n", api_def.description(), "\n");
   }
   strings::StrAppend(&comment, "\nArguments:\n* scope: A Scope object\n");
 
   // Process inputs
-  for (int i = 0; i < op_def.input_arg_size(); ++i) {
-    const auto& arg(op_def.input_arg(i));
+  for (int i = 0; i < api_def.arg_order_size(); ++i) {
+    const auto& arg = *FindInputArg(api_def.arg_order(i), graph_op_def);
+    const auto& api_def_arg = *FindInputArg(api_def.arg_order(i), api_def);
     arg_types.push_back(strings::StrCat(
         "::tensorflow::", ArgIsList(arg) ? "InputList" : "Input"));
-    arg_names.push_back(AvoidCPPKeywords(arg.name()));
+    arg_names.push_back(AvoidCPPKeywords(api_def_arg.rename_to()));
 
     // TODO(keveman): Include input type information.
-    StringPiece description = arg.description();
+    StringPiece description = api_def_arg.description();
     if (!description.empty()) {
       ConsumeEquals(&description);
-      strings::StrAppend(&comment, "* ", AvoidCPPKeywords(arg.name()), ": ",
-                         arg.description(), "\n");
+      strings::StrAppend(&comment, "* ",
+                         AvoidCPPKeywords(api_def_arg.rename_to()), ": ",
+                         api_def_arg.description(), "\n");
     }
   }
 
   // Process attrs
   string required_attrs_comment;
   string optional_attrs_comment;
-  for (int i = 0; i < op_def.attr_size(); ++i) {
-    const auto& attr(op_def.attr(i));
+  for (int i = 0; i < graph_op_def.attr_size(); ++i) {
+    // ApiDef attributes must be in the same order as in OpDef since
+    // we initialize ApiDef based on OpDef.
+    const auto& attr(graph_op_def.attr(i));
+    const auto& api_def_attr(api_def.attr(i));
+    CHECK_EQ(attr.name(), api_def_attr.name());
     // Skip inferred arguments
     if (inferred_input_attrs.count(attr.name()) > 0) continue;
 
     const auto entry = AttrTypeName(attr.type());
     const auto attr_type_name = entry.first;
     const bool use_const = entry.second;
-    string attr_name = AvoidCPPKeywords(attr.name());
+    string attr_name = AvoidCPPKeywords(api_def_attr.rename_to());
 
     string attr_comment;
-    if (!attr.description().empty()) {
+    if (!api_def_attr.description().empty()) {
       // TODO(keveman): Word wrap and indent this, to handle multi-line
       // descriptions.
       strings::StrAppend(&attr_comment, "* ", attr_name, ": ",
-                         attr.description(), "\n");
+                         api_def_attr.description(), "\n");
     }
-    if (attr.has_default_value()) {
+    if (api_def_attr.has_default_value()) {
       strings::StrAppend(&optional_attrs_comment, attr_comment);
     } else {
       strings::StrAppend(&required_attrs_comment, attr_comment);
@@ -508,44 +526,49 @@ OpInfo::OpInfo(const OpDef& g_op_def, const OpDef& i_op_def,
   }
 
   // Process outputs
-  for (int i = 0; i < op_def.output_arg_size(); ++i) {
-    const auto& arg = op_def.output_arg(i);
+  for (int i = 0; i < graph_op_def.output_arg_size(); ++i) {
+    // ApiDef arguments must be in the same order as in OpDef since
+    // we initialize ApiDef based on OpDef.
+    const auto& arg = graph_op_def.output_arg(i);
+    const auto& api_def_arg(api_def.out_arg(i));
+    CHECK_EQ(arg.name(), api_def_arg.name());
+
     bool is_list = ArgIsList(arg);
     output_types.push_back(
         strings::StrCat("::tensorflow::", is_list ? "OutputList" : "Output"));
-    output_names.push_back(AvoidCPPKeywords(arg.name()));
+    output_names.push_back(AvoidCPPKeywords(api_def_arg.rename_to()));
     is_list_output.push_back(is_list);
   }
 
   strings::StrAppend(&comment, "\nReturns:\n");
-  if (op_def.output_arg_size() == 0) {  // No outputs.
+  if (graph_op_def.output_arg_size() == 0) {  // No outputs.
     strings::StrAppend(&comment, "* the created `Operation`\n");
-  } else if (op_def.output_arg_size() == 1) {  // One output
+  } else if (graph_op_def.output_arg_size() == 1) {  // One output
     if (is_list_output[0]) {
       strings::StrAppend(&comment, "* `OutputList`: ");
     } else {
       strings::StrAppend(&comment, "* `Output`: ");
     }
-    if (op_def.output_arg(0).description().empty()) {
-      strings::StrAppend(&comment, "The ", op_def.output_arg(0).name(),
+    if (api_def.out_arg(0).description().empty()) {
+      strings::StrAppend(&comment, "The ", api_def.out_arg(0).name(),
                          " tensor.\n");
     } else {
       // TODO(josh11b): Word wrap this.
-      strings::StrAppend(&comment, op_def.output_arg(0).description(), "\n");
+      strings::StrAppend(&comment, api_def.out_arg(0).description(), "\n");
     }
   } else {  // Multiple outputs.
-    for (int i = 0; i < op_def.output_arg_size(); ++i) {
+    for (int i = 0; i < graph_op_def.output_arg_size(); ++i) {
       if (is_list_output[i]) {
         strings::StrAppend(&comment, "* `OutputList`");
       } else {
         strings::StrAppend(&comment, "* `Output`");
       }
       strings::StrAppend(&comment, " ", output_names[i]);
-      if (op_def.output_arg(i).description().empty()) {
+      if (api_def.out_arg(i).description().empty()) {
         strings::StrAppend(&comment, "\n");
       } else {
         // TODO(josh11b): Word wrap this.
-        strings::StrAppend(&comment, ": ", op_def.output_arg(i).description(),
+        strings::StrAppend(&comment, ": ", api_def.out_arg(i).description(),
                            "\n");
       }
     }
@@ -564,19 +587,20 @@ string OpInfo::GetOpAttrStruct() const {
   string struct_fields;
   string setters;
 
-  for (int i = 0; i < op_def.attr_size(); ++i) {
-    const auto& attr(op_def.attr(i));
+  for (int i = 0; i < graph_op_def.attr_size(); ++i) {
+    const auto& attr(graph_op_def.attr(i));
+    const auto& api_def_attr(api_def.attr(i));
     // If attr will be inferred or it doesn't have a default value, don't
     // add it to the struct.
     if ((inferred_input_attrs.find(attr.name()) !=
          inferred_input_attrs.end()) ||
-        !attr.has_default_value()) {
+        !api_def_attr.has_default_value()) {
       continue;
     }
     const auto entry = AttrTypeName(attr.type());
     const auto attr_type_name = entry.first;
     const bool use_const = entry.second;
-    const string camel_case_name = ToCamelCase(attr.name());
+    const string camel_case_name = ToCamelCase(api_def_attr.rename_to());
     const string suffix =
         (camel_case_name == op_name || camel_case_name == "Attrs") ? "_" : "";
     const string attr_func_def =
@@ -584,22 +608,25 @@ string OpInfo::GetOpAttrStruct() const {
                         attr_type_name, use_const ? "&" : "");
 
     string attr_comment;
-    if (!attr.description().empty()) {
-      strings::StrAppend(&attr_comment, attr.description(), "\n\n");
+    if (!api_def_attr.description().empty()) {
+      strings::StrAppend(&attr_comment, api_def_attr.description(), "\n\n");
     }
     strings::StrAppend(&attr_comment, "Defaults to ",
-                       SummarizeAttrValue(attr.default_value()), "\n");
+                       SummarizeAttrValue(api_def_attr.default_value()), "\n");
     attr_comment = MakeComment(attr_comment, "    ");
 
     strings::StrAppend(&setters, attr_comment);
     strings::StrAppend(&setters, "    Attrs ", attr_func_def, " x) {\n");
     strings::StrAppend(&setters, "      Attrs ret = *this;\n");
-    strings::StrAppend(&setters, "      ret.", attr.name(), "_ = x;\n");
+    strings::StrAppend(&setters, "      ret.", api_def_attr.rename_to(),
+                       "_ = x;\n");
     strings::StrAppend(&setters, "      return ret;\n    }\n\n");
 
     strings::StrAppend(
-        &struct_fields, "    ", attr_type_name, " ", attr.name(), "_ = ",
-        PrintAttrValue(op_def.name(), attr.default_value()), ";\n");
+        &struct_fields, "    ", attr_type_name, " ", api_def_attr.rename_to(),
+        "_ = ",
+        PrintAttrValue(graph_op_def.name(), api_def_attr.default_value()),
+        ";\n");
   }
 
   if (struct_fields.empty()) {
@@ -676,17 +703,18 @@ void OpInfo::WriteClassDecl(WritableFile* h) const {
   // Add the static functions to set optional attrs
   if (has_optional_attrs) {
     strings::StrAppend(&class_decl, "\n");
-    for (int i = 0; i < op_def.attr_size(); ++i) {
-      const auto& attr(op_def.attr(i));
+    for (int i = 0; i < graph_op_def.attr_size(); ++i) {
+      const auto& attr(graph_op_def.attr(i));
+      const auto& api_def_attr(api_def.attr(i));
       if ((inferred_input_attrs.find(attr.name()) !=
            inferred_input_attrs.end()) ||
-          !attr.has_default_value()) {
+          !api_def_attr.has_default_value()) {
         continue;
       }
       const auto entry = AttrTypeName(attr.type());
       const auto attr_type_name = entry.first;
       const bool use_const = entry.second;
-      const string camel_case_name = ToCamelCase(attr.name());
+      const string camel_case_name = ToCamelCase(api_def_attr.rename_to());
       const string suffix =
           (camel_case_name == op_name || camel_case_name == "Attrs") ? "_" : "";
       const string attr_func_def = strings::StrCat(
@@ -726,11 +754,11 @@ void OpInfo::GetOutput(string* out) const {
       strings::StrCat("if (!", scope_str, ".ok()) return;");
 
   // No outputs.
-  if (op_def.output_arg_size() == 0) {
+  if (graph_op_def.output_arg_size() == 0) {
     strings::StrAppend(out, "  this->operation = Operation(ret);\n  return;\n");
     return;
   }
-  if (op_def.output_arg_size() == 1) {
+  if (graph_op_def.output_arg_size() == 1) {
     // One output, no need for NameRangeMap
     if (is_list_output[0]) {
       strings::StrAppend(out,
@@ -752,7 +780,7 @@ void OpInfo::GetOutput(string* out) const {
                      ".UpdateStatus(_status_);\n", "    return;\n");
   strings::StrAppend(out, "  }\n\n");
 
-  for (int i = 0; i < op_def.output_arg_size(); ++i) {
+  for (int i = 0; i < graph_op_def.output_arg_size(); ++i) {
     const string arg_range = strings::StrCat(
         "_outputs_range[\"", graph_op_def.output_arg(i).name(), "\"]");
     if (is_list_output[i]) {
@@ -776,11 +804,13 @@ string OpInfo::GetConstructorBody() const {
 
   strings::StrAppend(&body, "  ", return_on_error, "\n");
 
-  for (int i = 0; i < op_def.input_arg_size(); ++i) {
-    const auto& arg(op_def.input_arg(i));
-    strings::StrAppend(&body, "  auto _", arg.name(), " = ::tensorflow::ops::",
-                       ArgIsList(arg) ? "AsNodeOutList" : "AsNodeOut", "(",
-                       scope_str, ", ", AvoidCPPKeywords(arg.name()), ");\n");
+  for (int i = 0; i < graph_op_def.input_arg_size(); ++i) {
+    const auto& arg(graph_op_def.input_arg(i));
+    const auto& api_def_arg(api_def.in_arg(i));
+    strings::StrAppend(
+        &body, "  auto _", api_def_arg.rename_to(), " = ::tensorflow::ops::",
+        ArgIsList(arg) ? "AsNodeOutList" : "AsNodeOut", "(", scope_str, ", ",
+        AvoidCPPKeywords(api_def_arg.rename_to()), ");\n");
     strings::StrAppend(&body, "  ", return_on_error, "\n");
   }
 
@@ -791,19 +821,21 @@ string OpInfo::GetConstructorBody() const {
       &body, "  auto builder = ::tensorflow::NodeBuilder(unique_name, \"",
       graph_op_def.name(), "\")\n");
   const string spaces = "                     ";
-  for (int i = 0; i < op_def.input_arg_size(); ++i) {
-    const auto& arg(op_def.input_arg(i));
-    strings::StrAppend(&body, spaces, ".Input(_", arg.name(), ")\n");
+  for (int i = 0; i < api_def.in_arg_size(); ++i) {
+    const auto& arg(api_def.in_arg(i));
+    strings::StrAppend(&body, spaces, ".Input(_", arg.rename_to(), ")\n");
   }
-  for (int i = 0; i < op_def.attr_size(); ++i) {
+  for (int i = 0; i < api_def.attr_size(); ++i) {
     const auto& graph_attr(graph_op_def.attr(i));
-    const auto& attr(op_def.attr(i));
-    if (inferred_input_attrs.find(attr.name()) != inferred_input_attrs.end()) {
+    const auto& api_def_attr(api_def.attr(i));
+    if (inferred_input_attrs.find(api_def_attr.name()) !=
+        inferred_input_attrs.end()) {
       continue;
     }
-    const string attr_name = attr.has_default_value()
-                                 ? strings::StrCat("attrs.", attr.name(), "_")
-                                 : AvoidCPPKeywords(attr.name());
+    const string attr_name =
+        api_def_attr.has_default_value()
+            ? strings::StrCat("attrs.", api_def_attr.rename_to(), "_")
+            : AvoidCPPKeywords(api_def_attr.rename_to());
     strings::StrAppend(&body, spaces, ".Attr(\"", graph_attr.name(), "\", ",
                        attr_name, ")\n");
   }
@@ -845,10 +877,10 @@ void OpInfo::WriteClassDef(WritableFile* cc) const {
   TF_CHECK_OK(cc->Append(class_def));
 }
 
-void WriteCCOp(const OpDef& graph_op_def, const OpDef& interface_op_def,
+void WriteCCOp(const OpDef& graph_op_def, const ApiDef& api_def,
                const std::vector<string>& aliases, WritableFile* h,
                WritableFile* cc) {
-  OpInfo op_info(graph_op_def, interface_op_def, aliases);
+  OpInfo op_info(graph_op_def, api_def, aliases);
 
   op_info.WriteClassDecl(h);
   op_info.WriteClassDef(cc);
@@ -943,8 +975,9 @@ string MakeInternal(const string& fname) {
 
 }  // namespace
 
-void WriteCCOps(const OpList& ops, const string& dot_h_fname,
-                const string& dot_cc_fname, const string& overrides_fnames) {
+void WriteCCOps(const OpList& ops, const ApiDefMap& api_def_map,
+                const string& dot_h_fname, const string& dot_cc_fname,
+                const string& overrides_fnames) {
   Env* env = Env::Default();
 
   // Load the override map.
@@ -984,24 +1017,23 @@ void WriteCCOps(const OpList& ops, const string& dot_h_fname,
     // code depends on it.
     if (graph_op_def.name() == "Const") continue;
 
-    // Incorporate overrides from override_map.
-    OpDef interface_op_def = graph_op_def;
-    const OpGenOverride* op_override =
-        override_map.ApplyOverride(&interface_op_def);
+    const auto* api_def = api_def_map.GetApiDef(graph_op_def.name());
+
     std::vector<string> aliases;
-    if (op_override) {
-      if (op_override->skip()) continue;
-      aliases.assign(op_override->alias().begin(), op_override->alias().end());
-      if (op_override->hide()) {
-        // Write hidden ops to _internal.h and _internal.cc.
-        WriteCCOp(graph_op_def, interface_op_def, aliases, internal_h.get(),
-                  internal_cc.get());
-        continue;
-      }
+    if (api_def->visibility() == ApiDef::SKIP) continue;
+    // First endpoint is canonical, the rest are aliases.
+    for (int endpoint_i = 1; endpoint_i < api_def->endpoint_size();
+         ++endpoint_i) {
+      aliases.push_back(api_def->endpoint(endpoint_i).name());
+    }
+    if (api_def->visibility() == ApiDef::HIDDEN) {
+      // Write hidden ops to _internal.h and _internal.cc.
+      WriteCCOp(graph_op_def, *api_def, aliases, internal_h.get(),
+                internal_cc.get());
+      continue;
     }
-
     // This isn't a hidden op, write it to the main files.
-    WriteCCOp(graph_op_def, interface_op_def, aliases, h.get(), cc.get());
+    WriteCCOp(graph_op_def, *api_def, aliases, h.get(), cc.get());
   }
 
   FinishFiles(false, h.get(), cc.get(), op_header_guard);
diff --git a/tensorflow/cc/framework/cc_op_gen.h b/tensorflow/cc/framework/cc_op_gen.h
index fa5e004f03..cea2899014 100644
--- a/tensorflow/cc/framework/cc_op_gen.h
+++ b/tensorflow/cc/framework/cc_op_gen.h
@@ -17,13 +17,15 @@ limitations under the License.
 #define THIRD_PARTY_TENSORFLOW_CC_FRAMEWORK_CC_OP_GEN_H_
 
 #include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/framework/op_gen_lib.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
 /// Result is written to files dot_h and dot_cc.
-void WriteCCOps(const OpList& ops, const string& dot_h_fname,
-                const string& dot_cc_fname, const string& overrides_fnames);
+void WriteCCOps(const OpList& ops, const ApiDefMap& api_def_map,
+                const string& dot_h_fname, const string& dot_cc_fname,
+                const string& overrides_fnames);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/cc/framework/cc_op_gen_main.cc b/tensorflow/cc/framework/cc_op_gen_main.cc
index 3b80cf993e..326d5668b8 100644
--- a/tensorflow/cc/framework/cc_op_gen_main.cc
+++ b/tensorflow/cc/framework/cc_op_gen_main.cc
@@ -16,7 +16,11 @@ limitations under the License.
 #include "tensorflow/cc/framework/cc_op_gen.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/framework/op_gen_lib.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -24,10 +28,28 @@ namespace tensorflow {
 namespace {
 
 void PrintAllCCOps(const std::string& dot_h, const std::string& dot_cc,
-                   const std::string& overrides_fnames, bool include_internal) {
+                   const std::string& overrides_fnames, bool include_internal,
+                   const std::vector<string>& api_def_dirs) {
   OpList ops;
   OpRegistry::Global()->Export(include_internal, &ops);
-  WriteCCOps(ops, dot_h, dot_cc, overrides_fnames);
+  ApiDefMap api_def_map(ops);
+  if (!api_def_dirs.empty()) {
+    Env* env = Env::Default();
+    // Only load files that correspond to "ops".
+    for (const auto& op : ops.op()) {
+      for (const auto& api_def_dir : api_def_dirs) {
+        const std::string api_def_file_pattern =
+            io::JoinPath(api_def_dir, "api_def_" + op.name() + ".pbtxt");
+        if (env->FileExists(api_def_file_pattern).ok()) {
+          TF_CHECK_OK(api_def_map.LoadFile(env, api_def_file_pattern));
+        }
+      }
+    }
+  }
+
+  api_def_map.UpdateDocs();
+
+  WriteCCOps(ops, api_def_map, dot_h, dot_cc, overrides_fnames);
 }
 
 }  // namespace
@@ -35,18 +57,24 @@ void PrintAllCCOps(const std::string& dot_h, const std::string& dot_cc,
 
 int main(int argc, char* argv[]) {
   tensorflow::port::InitMain(argv[0], &argc, &argv);
-  if (argc != 5) {
+  // TODO(annarev): Update this file to no longer take op_gen_overrides.pbtxt
+  // as an argument.
+  if (argc != 6) {
     for (int i = 1; i < argc; ++i) {
       fprintf(stderr, "Arg %d = %s\n", i, argv[i]);
     }
     fprintf(stderr,
-            "Usage: %s out.h out.cc overrides1.pbtxt,2.pbtxt include_internal\n"
+            "Usage: %s out.h out.cc overrides1.pbtxt,2.pbtxt include_internal "
+            "api_def_dirs1,api_def_dir2 ...\n"
             "  include_internal: 1 means include internal ops\n",
             argv[0]);
     exit(1);
   }
 
   bool include_internal = tensorflow::StringPiece("1") == argv[4];
-  tensorflow::PrintAllCCOps(argv[1], argv[2], argv[3], include_internal);
+  std::vector<tensorflow::string> api_def_dirs = tensorflow::str_util::Split(
+      argv[5], ",", tensorflow::str_util::SkipEmpty());
+  tensorflow::PrintAllCCOps(argv[1], argv[2], argv[3], include_internal,
+                            api_def_dirs);
   return 0;
 }
diff --git a/tensorflow/cc/framework/cc_op_gen_test.cc b/tensorflow/cc/framework/cc_op_gen_test.cc
new file mode 100644
index 0000000000..0b7e720a5c
--- /dev/null
+++ b/tensorflow/cc/framework/cc_op_gen_test.cc
@@ -0,0 +1,195 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/framework/cc_op_gen.h"
+
+#include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/framework/op_gen_lib.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+// TODO(annarev): Remove this op_gen_overrides.pbtxt reference.
+// It is needed only because WriteCCOps takes it as an argument.
+constexpr char kOverridesFnames[] =
+    "tensorflow/cc/ops/op_gen_overrides.pbtxt";
+constexpr char kBaseOpDef[] = R"(
+op {
+  name: "Foo"
+  input_arg {
+    name: "images"
+    description: "Images to process."
+  }
+  input_arg {
+    name: "dim"
+    description: "Description for dim."
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    description: "Description for output."
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    description: "Type for images"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+      }
+    }
+    default_value {
+      i: 1
+    }
+  }
+  summary: "Summary for op Foo."
+  description: "Description for op Foo."
+}
+)";
+
+void ExpectHasSubstr(StringPiece s, StringPiece expected) {
+  EXPECT_TRUE(s.contains(expected))
+      << "'" << s << "' does not contain '" << expected << "'";
+}
+
+void ExpectDoesNotHaveSubstr(StringPiece s, StringPiece expected) {
+  EXPECT_FALSE(s.contains(expected))
+      << "'" << s << "' contains '" << expected << "'";
+}
+
+void ExpectSubstrOrder(const string& s, const string& before,
+                       const string& after) {
+  int before_pos = s.find(before);
+  int after_pos = s.find(after);
+  ASSERT_NE(std::string::npos, before_pos);
+  ASSERT_NE(std::string::npos, after_pos);
+  EXPECT_LT(before_pos, after_pos)
+      << before << " is not before " << after << " in " << s;
+}
+
+// Runs WriteCCOps and stores output in (internal_)cc_file_path and
+// (internal_)h_file_path.
+void GenerateCcOpFiles(Env* env, const OpList& ops,
+                       const ApiDefMap& api_def_map, string* h_file_text,
+                       string* internal_h_file_text) {
+  const string& tmpdir = testing::TmpDir();
+
+  const auto h_file_path = io::JoinPath(tmpdir, "test.h");
+  const auto cc_file_path = io::JoinPath(tmpdir, "test.cc");
+  const auto internal_h_file_path = io::JoinPath(tmpdir, "test_internal.h");
+  const auto internal_cc_file_path = io::JoinPath(tmpdir, "test_internal.cc");
+
+  WriteCCOps(ops, api_def_map, h_file_path, cc_file_path, kOverridesFnames);
+
+  TF_ASSERT_OK(ReadFileToString(env, h_file_path, h_file_text));
+  TF_ASSERT_OK(
+      ReadFileToString(env, internal_h_file_path, internal_h_file_text));
+}
+
+TEST(CcOpGenTest, TestVisibilityChangedToHidden) {
+  const string api_def = R"(
+op {
+  graph_op_name: "Foo"
+  visibility: HIDDEN
+}
+)";
+  Env* env = Env::Default();
+  OpList op_defs;
+  protobuf::TextFormat::ParseFromString(kBaseOpDef, &op_defs);  // NOLINT
+  ApiDefMap api_def_map(op_defs);
+
+  string h_file_text, internal_h_file_text;
+  // Without ApiDef
+  GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text,
+                    &internal_h_file_text);
+  ExpectHasSubstr(h_file_text, "class Foo");
+  ExpectDoesNotHaveSubstr(internal_h_file_text, "class Foo");
+
+  // With ApiDef
+  TF_ASSERT_OK(api_def_map.LoadApiDef(api_def));
+  GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text,
+                    &internal_h_file_text);
+  ExpectHasSubstr(internal_h_file_text, "class Foo");
+  ExpectDoesNotHaveSubstr(h_file_text, "class Foo");
+}
+
+TEST(CcOpGenTest, TestArgNameChanges) {
+  const string api_def = R"(
+op {
+  graph_op_name: "Foo"
+  arg_order: "dim"
+  arg_order: "images"
+}
+)";
+  Env* env = Env::Default();
+  OpList op_defs;
+  protobuf::TextFormat::ParseFromString(kBaseOpDef, &op_defs);  // NOLINT
+
+  ApiDefMap api_def_map(op_defs);
+  string cc_file_text, h_file_text;
+  string internal_cc_file_text, internal_h_file_text;
+  // Without ApiDef
+  GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text,
+                    &internal_h_file_text);
+  ExpectSubstrOrder(h_file_text, "Input images", "Input dim");
+
+  // With ApiDef
+  TF_ASSERT_OK(api_def_map.LoadApiDef(api_def));
+  GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text,
+                    &internal_h_file_text);
+  ExpectSubstrOrder(h_file_text, "Input dim", "Input images");
+}
+
+TEST(CcOpGenTest, TestEndpoints) {
+  const string api_def = R"(
+op {
+  graph_op_name: "Foo"
+  endpoint {
+    name: "Foo1"
+  }
+  endpoint {
+    name: "Foo2"
+  }
+}
+)";
+  Env* env = Env::Default();
+  OpList op_defs;
+  protobuf::TextFormat::ParseFromString(kBaseOpDef, &op_defs);  // NOLINT
+
+  ApiDefMap api_def_map(op_defs);
+  string cc_file_text, h_file_text;
+  string internal_cc_file_text, internal_h_file_text;
+  // Without ApiDef
+  GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text,
+                    &internal_h_file_text);
+  ExpectHasSubstr(h_file_text, "class Foo {");
+  ExpectDoesNotHaveSubstr(h_file_text, "class Foo1");
+  ExpectDoesNotHaveSubstr(h_file_text, "class Foo2");
+
+  // With ApiDef
+  TF_ASSERT_OK(api_def_map.LoadApiDef(api_def));
+  GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text,
+                    &internal_h_file_text);
+  ExpectHasSubstr(h_file_text, "class Foo1");
+  ExpectHasSubstr(h_file_text, "typedef Foo1 Foo2");
+  ExpectDoesNotHaveSubstr(h_file_text, "class Foo {");
+}
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake
index a5f5ae5478..45eeb11062 100644
--- a/tensorflow/contrib/cmake/tf_cc_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake
@@ -83,7 +83,7 @@ foreach(tf_cc_op_lib_name ${tf_cc_op_lib_names})
                ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc
                ${cc_ops_target_dir}/${tf_cc_op_lib_name}_internal.h
                ${cc_ops_target_dir}/${tf_cc_op_lib_name}_internal.cc
-        COMMAND ${tf_cc_op_lib_name}_gen_cc ${cc_ops_target_dir}/${tf_cc_op_lib_name}.h ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc ${tensorflow_source_dir}/tensorflow/cc/ops/op_gen_overrides.pbtxt ${cc_ops_include_internal}
+        COMMAND ${tf_cc_op_lib_name}_gen_cc ${cc_ops_target_dir}/${tf_cc_op_lib_name}.h ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc ${tensorflow_source_dir}/tensorflow/cc/ops/op_gen_overrides.pbtxt ${cc_ops_include_internal} ${tensorflow_source_dir}/tensorflow/core/api_def/base_api
         DEPENDS ${tf_cc_op_lib_name}_gen_cc create_cc_ops_header_dir
     )
 
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 01ddbfc2d4..ee14078496 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -3371,7 +3371,7 @@ tf_cc_test(
 
 filegroup(
     name = "base_api_def",
-    data = glob(["api_def/base_api/*"]),
+    srcs = glob(["api_def/base_api/*"]),
 )
 
 filegroup(
@@ -3386,10 +3386,6 @@ tf_cc_test(
         ":base_api_def",
         "//tensorflow/cc:ops/op_gen_overrides.pbtxt",
     ],
-    tags = [
-        "manual",
-        "notap",
-    ],
     deps = [
         ":framework",
         ":framework_internal",
diff --git a/tensorflow/core/api_def/api_test.cc b/tensorflow/core/api_def/api_test.cc
index f222d345ab..2cdc14843f 100644
--- a/tensorflow/core/api_def/api_test.cc
+++ b/tensorflow/core/api_def/api_test.cc
@@ -221,9 +221,18 @@ std::unordered_map<string, ApiDefs> GenerateApiDef(
 
   std::unordered_map<string, ApiDefs> api_defs_map;
 
+  // These ops are included in OpList only if TF_NEED_GCP
+  // is set to true. So, we skip them for now so that this test passes
+  // whether TF_NEED_GCP is set or not.
+  const std::unordered_set<string> ops_to_exclude = {
+      "BigQueryReader", "GenerateBigQueryReaderPartitions"};
   for (const auto& op : ops.op()) {
     CHECK(!op.name().empty())
         << "Encountered empty op name: %s" << op.DebugString();
+    if (ops_to_exclude.find(op.name()) != ops_to_exclude.end()) {
+      LOG(INFO) << "Skipping " << op.name();
+      continue;
+    }
     string file_path = io::JoinPath(api_def_dir, kApiDefFileFormat);
     file_path = strings::Printf(file_path.c_str(), op.name().c_str());
     ApiDef* api_def = api_defs_map[file_path].add_op();
diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt
new file mode 100644
index 0000000000..dd46095252
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt
@@ -0,0 +1,65 @@
+op {
+  graph_op_name: "ApplyAddSign"
+  in_arg {
+    name: "var"
+    description: <<END
+Should be from a Variable().
+END
+  }
+  in_arg {
+    name: "m"
+    description: <<END
+Should be from a Variable().
+END
+  }
+  in_arg {
+    name: "lr"
+    description: <<END
+Scaling factor. Must be a scalar.
+END
+  }
+  in_arg {
+    name: "alpha"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "sign_decay"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "beta"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "grad"
+    description: <<END
+The gradient.
+END
+  }
+  out_arg {
+    name: "out"
+    description: <<END
+Same as "var".
+END
+  }
+  attr {
+    name: "use_locking"
+    description: <<END
+If `True`, updating of the var and m tensors is
+protected by a lock; otherwise the behavior is undefined, but may exhibit less
+contention.
+END
+  }
+  summary: "Update \'*var\' according to the AddSign update."
+  description: <<END
+m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+update <- (alpha + sign_decay * sign(g) *sign(m)) * g
+variable <- variable - lr_t * update
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyPowerSign.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyPowerSign.pbtxt
new file mode 100644
index 0000000000..cfa5619b87
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ApplyPowerSign.pbtxt
@@ -0,0 +1,65 @@
+op {
+  graph_op_name: "ApplyPowerSign"
+  in_arg {
+    name: "var"
+    description: <<END
+Should be from a Variable().
+END
+  }
+  in_arg {
+    name: "m"
+    description: <<END
+Should be from a Variable().
+END
+  }
+  in_arg {
+    name: "lr"
+    description: <<END
+Scaling factor. Must be a scalar.
+END
+  }
+  in_arg {
+    name: "logbase"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "sign_decay"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "beta"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "grad"
+    description: <<END
+The gradient.
+END
+  }
+  out_arg {
+    name: "out"
+    description: <<END
+Same as "var".
+END
+  }
+  attr {
+    name: "use_locking"
+    description: <<END
+If `True`, updating of the var and m tensors is
+protected by a lock; otherwise the behavior is undefined, but may exhibit less
+contention.
+END
+  }
+  summary: "Update \'*var\' according to the AddSign update."
+  description: <<END
+m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
+variable <- variable - lr_t * update
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_BytesProducedStatsDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_BytesProducedStatsDataset.pbtxt
new file mode 100644
index 0000000000..73df11b2f7
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_BytesProducedStatsDataset.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "BytesProducedStatsDataset"
+  summary: "Records the bytes size of each element of `input_dataset` in a StatsAggregator."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
new file mode 100644
index 0000000000..c86f059eb3
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
@@ -0,0 +1,19 @@
+op {
+  graph_op_name: "DeserializeSparse"
+  in_arg {
+    name: "serialized_sparse"
+    description: <<END
+1-D, The serialized `SparseTensor` object. Must have 3 columns.
+END
+  }
+  attr {
+    name: "dtype"
+    description: <<END
+The `dtype` of the serialized `SparseTensor` object.
+END
+  }
+  summary: "Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`)"
+  description: <<END
+object.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_GenerateVocabRemapping.pbtxt b/tensorflow/core/api_def/base_api/api_def_GenerateVocabRemapping.pbtxt
index 085acf7ff1..662e4c54b6 100644
--- a/tensorflow/core/api_def/base_api/api_def_GenerateVocabRemapping.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_GenerateVocabRemapping.pbtxt
@@ -36,6 +36,13 @@ END
     name: "num_new_vocab"
     description: <<END
 Number of entries in the new vocab file to remap.
+END
+  }
+  attr {
+    name: "old_vocab_size"
+    description: <<END
+Number of entries in the old vocab file to consider.  If -1,
+use the entire old vocabulary.
 END
   }
   summary: "Given a path to new and old vocabulary files, returns a remapping Tensor of"
@@ -43,7 +50,11 @@ END
 length `num_new_vocab`, where `remapping[i]` contains the row number in the old
 vocabulary that corresponds to row `i` in the new vocabulary (starting at line
 `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
-in the new vocabulary is not in the old vocabulary.  `num_vocab_offset` enables
+in the new vocabulary is not in the old vocabulary.  The old vocabulary is
+constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
+default value of -1.
+
+`num_vocab_offset` enables
 use in the partitioned variable case, and should generally be set through
 examining partitioning info.  The format of the files should be a text file,
 with each line containing a single entity within the vocabulary.
diff --git a/tensorflow/core/api_def/base_api/api_def_IteratorSetStatsAggregator.pbtxt b/tensorflow/core/api_def/base_api/api_def_IteratorSetStatsAggregator.pbtxt
new file mode 100644
index 0000000000..c6f2212cd4
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_IteratorSetStatsAggregator.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "IteratorSetStatsAggregator"
+  summary: "Associates the given iterator with the given statistics aggregator."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_LatencyStatsDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_LatencyStatsDataset.pbtxt
new file mode 100644
index 0000000000..78d946b0b4
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LatencyStatsDataset.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "LatencyStatsDataset"
+  summary: "Records the latency of producing `input_dataset` elements in a StatsAggregator."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt
new file mode 100644
index 0000000000..0d680f6531
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt
@@ -0,0 +1,32 @@
+op {
+  graph_op_name: "MatrixExponential"
+  in_arg {
+    name: "input"
+    description: <<END
+Shape is `[..., M, M]`.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Shape is `[..., M, M]`.
+
+@compatibility(scipy)
+Equivalent to scipy.linalg.expm
+@end_compatibility
+END
+  }
+  summary: "Computes the matrix exponential of one or more square matrices:"
+  description: <<END
+exp(A) = \sum_{n=0}^\infty A^n/n!
+
+The exponential is computed using a combination of the scaling and squaring
+method and the Pade approximation. Details can be founds in:
+Nicholas J. Higham, "The scaling and squaring method for the matrix exponential
+revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.
+
+The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+form square matrices. The output is a tensor of the same shape as the input
+containing the exponential for all input submatrices `[..., :, :]`.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_NthElement.pbtxt b/tensorflow/core/api_def/base_api/api_def_NthElement.pbtxt
index 9ef20a26db..2f5d849619 100644
--- a/tensorflow/core/api_def/base_api/api_def_NthElement.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_NthElement.pbtxt
@@ -26,7 +26,7 @@ When set to True, find the nth-largest value in the vector and vice
 versa.
 END
   }
-  summary: "Finds values of the `n`-th order statistic for the last dmension."
+  summary: "Finds values of the `n`-th order statistic for the last dimension."
   description: <<END
 If the input is a vector (rank-1), finds the entries which is the nth-smallest
 value in the vector and outputs their values as scalar tensor.
diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceApplyAddSign.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceApplyAddSign.pbtxt
new file mode 100644
index 0000000000..94ba3a8d81
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ResourceApplyAddSign.pbtxt
@@ -0,0 +1,59 @@
+op {
+  graph_op_name: "ResourceApplyAddSign"
+  in_arg {
+    name: "var"
+    description: <<END
+Should be from a Variable().
+END
+  }
+  in_arg {
+    name: "m"
+    description: <<END
+Should be from a Variable().
+END
+  }
+  in_arg {
+    name: "lr"
+    description: <<END
+Scaling factor. Must be a scalar.
+END
+  }
+  in_arg {
+    name: "alpha"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "sign_decay"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "beta"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "grad"
+    description: <<END
+The gradient.
+END
+  }
+  attr {
+    name: "use_locking"
+    description: <<END
+If `True`, updating of the var and m tensors is
+protected by a lock; otherwise the behavior is undefined, but may exhibit less
+contention.
+END
+  }
+  summary: "Update \'*var\' according to the AddSign update."
+  description: <<END
+m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+update <- (alpha + sign_decay * sign(g) *sign(m)) * g
+variable <- variable - lr_t * update
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceApplyPowerSign.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceApplyPowerSign.pbtxt
new file mode 100644
index 0000000000..909861e668
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ResourceApplyPowerSign.pbtxt
@@ -0,0 +1,59 @@
+op {
+  graph_op_name: "ResourceApplyPowerSign"
+  in_arg {
+    name: "var"
+    description: <<END
+Should be from a Variable().
+END
+  }
+  in_arg {
+    name: "m"
+    description: <<END
+Should be from a Variable().
+END
+  }
+  in_arg {
+    name: "lr"
+    description: <<END
+Scaling factor. Must be a scalar.
+END
+  }
+  in_arg {
+    name: "logbase"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "sign_decay"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "beta"
+    description: <<END
+Must be a scalar.
+END
+  }
+  in_arg {
+    name: "grad"
+    description: <<END
+The gradient.
+END
+  }
+  attr {
+    name: "use_locking"
+    description: <<END
+If `True`, updating of the var and m tensors is
+protected by a lock; otherwise the behavior is undefined, but may exhibit less
+contention.
+END
+  }
+  summary: "Update \'*var\' according to the AddSign update."
+  description: <<END
+m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
+variable <- variable - lr_t * update
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_StatsAggregatorHandle.pbtxt b/tensorflow/core/api_def/base_api/api_def_StatsAggregatorHandle.pbtxt
new file mode 100644
index 0000000000..9b30d64afe
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_StatsAggregatorHandle.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatsAggregatorHandle"
+  summary: "Creates a statistics manager resource."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_StatsAggregatorSummary.pbtxt b/tensorflow/core/api_def/base_api/api_def_StatsAggregatorSummary.pbtxt
new file mode 100644
index 0000000000..bcaf9fea1a
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_StatsAggregatorSummary.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatsAggregatorSummary"
+  summary: "Produces a summary of any statistics recorded by the given statistics manager."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_TensorArrayV3.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorArrayV3.pbtxt
index d1de753ee5..48ac6f5e7d 100644
--- a/tensorflow/core/api_def/base_api/api_def_TensorArrayV3.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_TensorArrayV3.pbtxt
@@ -48,6 +48,17 @@ END
 If true (default), Tensors in the TensorArray are cleared
 after being read.  This disables multiple read semantics but allows early
 release of memory.
+END
+  }
+  attr {
+    name: "identical_element_shapes"
+    description: <<END
+If true (default is false), then all
+elements in the TensorArray will be expected to have have identical shapes.
+This allows certain behaviors, like dynamically checking for
+consistent shapes on write, and being able to fill in properly
+shaped zero tensors on stack -- even if the element_shape attribute
+is not fully defined.
 END
   }
   attr {
diff --git a/tensorflow/core/api_def/python_api/api_def_DeserializeSparse.pbtxt b/tensorflow/core/api_def/python_api/api_def_DeserializeSparse.pbtxt
new file mode 100644
index 0000000000..d067990780
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_DeserializeSparse.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "DeserializeSparse"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_MatrixExponential.pbtxt b/tensorflow/core/api_def/python_api/api_def_MatrixExponential.pbtxt
new file mode 100644
index 0000000000..d215b86c72
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_MatrixExponential.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "MatrixExponential"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc
index d84d5431e9..95a9b763f9 100644
--- a/tensorflow/core/framework/op_gen_lib.cc
+++ b/tensorflow/core/framework/op_gen_lib.cc
@@ -281,6 +281,9 @@ static void StringReplace(const string& from, const string& to, string* s) {
     } else {
       split.push_back(s->substr(pos, found - pos));
       pos = found + from.size();
+      if (pos == s->size()) {  // handle case where `from` is at the very end.
+        split.push_back("");
+      }
     }
   }
   // Join the pieces back together with a new delimiter.
@@ -316,6 +319,36 @@ static void RenameInDocs(const string& from, const string& to, OpDef* op_def) {
   }
 }
 
+static void RenameInDocs(const string& from, const string& to,
+                         ApiDef* api_def) {
+  const string from_quoted = strings::StrCat("`", from, "`");
+  const string to_quoted = strings::StrCat("`", to, "`");
+  for (int i = 0; i < api_def->in_arg_size(); ++i) {
+    if (!api_def->in_arg(i).description().empty()) {
+      StringReplace(from_quoted, to_quoted,
+                    api_def->mutable_in_arg(i)->mutable_description());
+    }
+  }
+  for (int i = 0; i < api_def->out_arg_size(); ++i) {
+    if (!api_def->out_arg(i).description().empty()) {
+      StringReplace(from_quoted, to_quoted,
+                    api_def->mutable_out_arg(i)->mutable_description());
+    }
+  }
+  for (int i = 0; i < api_def->attr_size(); ++i) {
+    if (!api_def->attr(i).description().empty()) {
+      StringReplace(from_quoted, to_quoted,
+                    api_def->mutable_attr(i)->mutable_description());
+    }
+  }
+  if (!api_def->summary().empty()) {
+    StringReplace(from_quoted, to_quoted, api_def->mutable_summary());
+  }
+  if (!api_def->description().empty()) {
+    StringReplace(from_quoted, to_quoted, api_def->mutable_description());
+  }
+}
+
 const OpGenOverride* OpGenOverrideMap::ApplyOverride(OpDef* op_def) const {
   // Look up
   const auto iter = map_.find(op_def->name());
@@ -521,6 +554,7 @@ Status MergeApiDefs(ApiDef* base_api_def, const ApiDef& new_api_def) {
           ". All elements in arg_order override must match base arg_order: ",
           str_util::Join(base_api_def->arg_order(), ", "));
     }
+
     base_api_def->clear_arg_order();
     std::copy(
         new_api_def.arg_order().begin(), new_api_def.arg_order().end(),
@@ -608,6 +642,32 @@ Status ApiDefMap::LoadApiDef(const string& api_def_file_contents) {
   return Status::OK();
 }
 
+void ApiDefMap::UpdateDocs() {
+  for (auto& name_and_api_def : map_) {
+    auto& api_def = name_and_api_def.second;
+    CHECK_GT(api_def.endpoint_size(), 0);
+    const string canonical_name = api_def.endpoint(0).name();
+    if (api_def.graph_op_name() != canonical_name) {
+      RenameInDocs(api_def.graph_op_name(), canonical_name, &api_def);
+    }
+    for (const auto& in_arg : api_def.in_arg()) {
+      if (in_arg.name() != in_arg.rename_to()) {
+        RenameInDocs(in_arg.name(), in_arg.rename_to(), &api_def);
+      }
+    }
+    for (const auto& out_arg : api_def.out_arg()) {
+      if (out_arg.name() != out_arg.rename_to()) {
+        RenameInDocs(out_arg.name(), out_arg.rename_to(), &api_def);
+      }
+    }
+    for (const auto& attr : api_def.attr()) {
+      if (attr.name() != attr.rename_to()) {
+        RenameInDocs(attr.name(), attr.rename_to(), &api_def);
+      }
+    }
+  }
+}
+
 const tensorflow::ApiDef* ApiDefMap::GetApiDef(const string& name) const {
   return gtl::FindOrNull(map_, name);
 }
diff --git a/tensorflow/core/framework/op_gen_lib.h b/tensorflow/core/framework/op_gen_lib.h
index efb287477b..1ede3af8d7 100644
--- a/tensorflow/core/framework/op_gen_lib.h
+++ b/tensorflow/core/framework/op_gen_lib.h
@@ -106,6 +106,12 @@ class ApiDefMap {
   // passed to the constructor.
   Status LoadApiDef(const string& api_def_file_contents);
 
+  // Updates ApiDef docs. For example, if ApiDef renames an argument
+  // or attribute, applies these renames to descriptions as well.
+  // UpdateDocs should only be called once after all ApiDefs are loaded
+  // since it replaces original op names.
+  void UpdateDocs();
+
   // Look up ApiDef proto based on the given graph op name.
   // If graph op name is not in this ApiDefMap, returns nullptr.
   //
diff --git a/tensorflow/core/framework/op_gen_lib_test.cc b/tensorflow/core/framework/op_gen_lib_test.cc
index da9b4dfbb1..bbe57bdd62 100644
--- a/tensorflow/core/framework/op_gen_lib_test.cc
+++ b/tensorflow/core/framework/op_gen_lib_test.cc
@@ -455,5 +455,62 @@ op {
   status = api_map.LoadApiDef(api_def3);
   ASSERT_EQ(tensorflow::error::FAILED_PRECONDITION, status.code());
 }
+
+TEST(OpGenLibTest, ApiDefUpdateDocs) {
+  const string op_list1 = R"(op {
+  name: "testop"
+  input_arg {
+    name: "arg_a"
+    description: "`arg_a`, `arg_c`, `attr_a`, `testop`"
+  }
+  output_arg {
+    name: "arg_c"
+    description: "`arg_a`, `arg_c`, `attr_a`, `testop`"
+  }
+  attr {
+    name: "attr_a"
+    description: "`arg_a`, `arg_c`, `attr_a`, `testop`"
+  }
+  description: "`arg_a`, `arg_c`, `attr_a`, `testop`"
+}
+)";
+
+  const string api_def1 = R"(
+op {
+  graph_op_name: "testop"
+  endpoint {
+    name: "testop2"
+  }
+  in_arg {
+    name: "arg_a"
+    rename_to: "arg_aa"
+  }
+  out_arg {
+    name: "arg_c"
+    rename_to: "arg_cc"
+    description: "New description: `arg_a`, `arg_c`, `attr_a`, `testop`"
+  }
+  attr {
+    name: "attr_a"
+    rename_to: "attr_aa"
+  }
+}
+)";
+  OpList op_list;
+  protobuf::TextFormat::ParseFromString(op_list1, &op_list);  // NOLINT
+  ApiDefMap api_map(op_list);
+  TF_CHECK_OK(api_map.LoadApiDef(api_def1));
+  api_map.UpdateDocs();
+
+  const string expected_description =
+      "`arg_aa`, `arg_cc`, `attr_aa`, `testop2`";
+  EXPECT_EQ(expected_description, api_map.GetApiDef("testop")->description());
+  EXPECT_EQ(expected_description,
+            api_map.GetApiDef("testop")->in_arg(0).description());
+  EXPECT_EQ("New description: " + expected_description,
+            api_map.GetApiDef("testop")->out_arg(0).description());
+  EXPECT_EQ(expected_description,
+            api_map.GetApiDef("testop")->attr(0).description());
+}
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index a3ba363469..8d392fb36d 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -316,7 +316,9 @@ def tf_gen_op_wrapper_cc(name,
                          op_gen=clean_dep("//tensorflow/cc:cc_op_gen_main"),
                          deps=None,
                          override_file=None,
-                         include_internal_ops=0):
+                         include_internal_ops=0,
+                         # ApiDefs will be loaded in the order specified in this list.
+                         api_def_srcs=[]):
   # Construct an op generator binary for these ops.
   tool = out_ops_file + "_gen_cc"
   if deps == None:
@@ -328,12 +330,26 @@ def tf_gen_op_wrapper_cc(name,
       linkstatic=1,  # Faster to link this one-time-use binary dynamically
       deps=[op_gen] + deps)
 
+  srcs = api_def_srcs[:]
+
   if override_file == None:
-    srcs = []
     override_arg = ","
   else:
-    srcs = [override_file]
+    srcs += [override_file]
     override_arg = "$(location " + override_file + ")"
+
+  if not api_def_srcs:
+    api_def_args_str = ","
+  else:
+    api_def_args = []
+    for api_def_src in api_def_srcs:
+      # Add directory of the first ApiDef source to args.
+      # We are assuming all ApiDefs in a single api_def_src are in the
+      # same directory.
+      api_def_args.append(
+          " $$(dirname $$(echo $(locations " + api_def_src +
+          ") | cut -d\" \" -f1))")
+    api_def_args_str = ",".join(api_def_args)
   native.genrule(
       name=name + "_genrule",
       outs=[
@@ -344,7 +360,7 @@ def tf_gen_op_wrapper_cc(name,
       tools=[":" + tool] + tf_binary_additional_srcs(),
       cmd=("$(location :" + tool + ") $(location :" + out_ops_file + ".h) " +
            "$(location :" + out_ops_file + ".cc) " + override_arg + " " +
-           str(include_internal_ops)))
+           str(include_internal_ops) + " " + api_def_args_str))
 
 
 # Given a list of "op_lib_names" (a list of files in the ops directory
@@ -387,7 +403,9 @@ def tf_gen_op_wrappers_cc(name,
                           op_gen=clean_dep("//tensorflow/cc:cc_op_gen_main"),
                           override_file=None,
                           include_internal_ops=0,
-                          visibility=None):
+                          visibility=None,
+                          # ApiDefs will be loaded in the order apecified in this list.
+                          api_def_srcs=[]):
   subsrcs = other_srcs[:]
   subhdrs = other_hdrs[:]
   internalsrcs = []
@@ -399,7 +417,8 @@ def tf_gen_op_wrappers_cc(name,
         pkg=pkg,
         op_gen=op_gen,
         override_file=override_file,
-        include_internal_ops=include_internal_ops)
+        include_internal_ops=include_internal_ops,
+        api_def_srcs=api_def_srcs)
     subsrcs += ["ops/" + n + ".cc"]
     subhdrs += ["ops/" + n + ".h"]
     internalsrcs += ["ops/" + n + "_internal.cc"]
-- 
GitLab


From 58f31aab00b916a5351781ddd5499219ae3bb210 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 15:21:59 -0800
Subject: [PATCH 0076/1225] Don't prepend '%' in HloInstruction constructor.
 And update the ToString and logging.

PiperOrigin-RevId: 176168081
---
 .../xla/service/hlo_execution_profile_test.cc |  4 +--
 .../compiler/xla/service/hlo_instruction.cc   | 36 +++++++++----------
 .../xla/service/hlo_instruction_test.cc       | 21 ++++++-----
 3 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc b/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc
index 0628444b34..5ba31296ea 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc
@@ -90,10 +90,10 @@ TEST_F(HloExecutionProfileTest, Basic) {
   const std::vector<string>& line_3 = lines_and_words[3];
 
   EXPECT_EQ(line_2[kInstructionCyclesIndex], std::to_string(dot_cycles));
-  EXPECT_EQ(line_2[kInstructionNameIndex], dot_instruction->name());
+  EXPECT_EQ(line_2[kInstructionNameIndex], '%' + dot_instruction->name());
 
   EXPECT_EQ(line_3[kInstructionCyclesIndex], std::to_string(add_cycles));
-  EXPECT_EQ(line_3[kInstructionNameIndex], add_instruction->name());
+  EXPECT_EQ(line_3[kInstructionNameIndex], '%' + add_instruction->name());
 }
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index e3fdc53b7f..95c14ee7a8 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -151,7 +151,7 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       WrapUnique(new HloInstruction(HloOpcode::kParameter, shape));
   instruction->parameter_number_ = parameter_number;
   instruction->parameter_name_ = name;
-  instruction->name_ = "%" + name;
+  instruction->name_ = name;
   return instruction;
 }
 
@@ -871,10 +871,8 @@ HloInstruction* HloInstruction::CloneAndFuseInternal(
       // parameter instruction.
       int64 param_no = fused_parameters.size();
       // Name the parameter after the instruction it represents in the outer
-      // (non-fusion) computation. Strip the leading "%" from the operand name
-      // to avoid a double %%.
-      string param_name =
-          StrCat(operand->name().substr(1), ".param_", param_no);
+      // (non-fusion) computation.
+      string param_name = StrCat(operand->name(), ".param_", param_no);
       fused_param = fused_instructions_computation()->AddParameter(
           CreateParameter(param_no, operand->shape(), param_name));
       AppendOperand(operand);
@@ -1015,7 +1013,7 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
   VLOG(3) << "CloneWithNewOperands:\n  " << ToString();
   VLOG(3) << "  new operands:";
   for (const HloInstruction* new_operand : new_operands) {
-    VLOG(3) << "    " << new_operand->name();
+    VLOG(3) << "    %" << new_operand->name();
   }
 
   std::unique_ptr<HloInstruction> clone;
@@ -1827,7 +1825,7 @@ string HloInstruction::SignatureString() const {
 string HloInstruction::ToString(bool compact_operands, bool include_metadata,
                                 bool include_large_constants) const {
   string result =
-      StrCat(name(), " = ", ShapeUtil::HumanStringWithLayout(shape()), " ",
+      StrCat("%", name(), " = ", ShapeUtil::HumanStringWithLayout(shape()), " ",
              HloOpcodeString(opcode()), "(",
              OperandsToString(compact_operands, include_large_constants), ")");
   for (const string& extra : ExtraAttributesToString()) {
@@ -1879,7 +1877,7 @@ string HloInstruction::OperandsToString(bool compact,
     operands = Join(slice, ", ", [&](string* out, HloInstruction* operand) {
       *out += ShapeUtil::HumanStringWithLayout(operand->shape());
       if (!compact) {
-        StrAppend(out, " ", operand->name());
+        StrAppend(out, " %", operand->name());
       }
     });
     const int64 remaining = operands_.size() - slice.size();
@@ -1966,7 +1964,7 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
     extra.push_back(StrCat("control-predecessors={",
                            Join(control_predecessors_, ", ",
                                 [](string* out, HloInstruction* pre) {
-                                  StrAppend(out, pre->name());
+                                  StrAppend(out, "%", pre->name());
                                 }),
                            "}"));
   }
@@ -1981,10 +1979,10 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
 }
 
 string HloInstruction::ToShortString() const {
-  return StrCat(name(), " = ", HloOpcodeString(opcode()), "(",
+  return StrCat("%", name(), " = ", HloOpcodeString(opcode()), "(",
                 Join(operands_, ", ",
                      [](string* out, HloInstruction* operand) {
-                       StrAppend(out, operand->name());
+                       StrAppend(out, "%", operand->name());
                      }),
                 ")");
 }
@@ -2191,7 +2189,7 @@ HloInstruction::HloInstruction(HloOpcode opcode, const Shape& shape)
     : unique_id_(-1),
       opcode_(opcode),
       shape_(shape),
-      name_("%" + HloOpcodeString(opcode)) {
+      name_(HloOpcodeString(opcode)) {
   TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(shape_));
 }
 
@@ -2415,7 +2413,7 @@ static Status PostOrderDFS(HloInstruction* root, Visitor* visitor,
         visitor->GetVisitState(current_id);
     if (visit_state == Visitor::kVisited) {
       dfs_stack.pop_back();
-      VLOG(3) << "Not visiting HLO " << current_node->name()
+      VLOG(3) << "Not visiting HLO %" << current_node->name()
               << " as it was already visited.";
       continue;
     }
@@ -2424,7 +2422,7 @@ static Status PostOrderDFS(HloInstruction* root, Visitor* visitor,
       dfs_stack.pop_back();
 
       TF_RETURN_IF_ERROR(visitor->Preprocess(current_node));
-      VLOG(2) << "Visiting HLO " << current_node->name();
+      VLOG(2) << "Visiting HLO %" << current_node->name();
       TF_RETURN_IF_ERROR(current_node->Visit(visitor));
       visitor->SetVisitState(current_id, Visitor::kVisited);
       TF_RETURN_IF_ERROR(visitor->Postprocess(current_node));
@@ -2469,7 +2467,7 @@ template <typename HloInstructionPtr>
 Status HloInstruction::Accept(DfsHloVisitorBase<HloInstructionPtr>* visitor,
                               bool call_finish_visit,
                               bool ignore_control_predecessors) {
-  VLOG(3) << "HloInstruction::Accept(" << name() << ")";
+  VLOG(3) << "HloInstruction::Accept(%" << name() << ")";
   TF_RETURN_IF_ERROR(
       PostOrderDFS(this, visitor, nullptr, ignore_control_predecessors));
   if (call_finish_visit) {
@@ -2485,7 +2483,7 @@ template Status HloInstruction::Accept(ConstDfsHloVisitor*, bool, bool);
 Status HloInstruction::AcceptWithOperandOrder(
     DfsHloVisitor* visitor, const CompareFunction& operand_order,
     bool call_finish_visit) {
-  VLOG(2) << "HloInstruction::AcceptWithOperandOrder(" << name() << ")";
+  VLOG(2) << "HloInstruction::AcceptWithOperandOrder(%" << name() << ")";
   InternalCompareFunction func = [&operand_order](
                                      std::pair<int, const HloInstruction*> a,
                                      std::pair<int, const HloInstruction*> b) {
@@ -2548,7 +2546,7 @@ Status HloInstruction::Accept(
 
 Status HloInstruction::AcceptOrdered(
     DfsHloVisitor* visitor, const std::vector<const HloInstruction*>& order) {
-  VLOG(2) << "HloInstruction::AcceptOrdered(" << name() << ")";
+  VLOG(2) << "HloInstruction::AcceptOrdered(%" << name() << ")";
   TF_RET_CHECK(OrderIsTopologicalSort(order));
 
   // Compute the predecessors of this instruction.
@@ -2567,7 +2565,7 @@ Status HloInstruction::AcceptOrdered(
     // The visitor can mark instructions as visited to skip particular
     // instructions.
     if (visitor->DidVisit(*const_instruction)) {
-      VLOG(3) << "Not visiting HLO " << const_instruction->name()
+      VLOG(3) << "Not visiting HLO %" << const_instruction->name()
               << " as it was already visited.";
       continue;
     }
@@ -2576,7 +2574,7 @@ Status HloInstruction::AcceptOrdered(
         const_cast<HloInstruction*>(const_instruction);
 
     TF_RETURN_IF_ERROR(visitor->Preprocess(instruction));
-    VLOG(2) << "Visiting HLO " << instruction->name();
+    VLOG(2) << "Visiting HLO %" << instruction->name();
     TF_RETURN_IF_ERROR(instruction->Visit(visitor));
     visitor->SetVisited(*instruction);
     TF_RETURN_IF_ERROR(visitor->Postprocess(instruction));
diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index 41b916e2c7..070bb4bc42 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -1138,35 +1138,34 @@ TEST_F(HloInstructionTest, CloneSuffixNames) {
   // Test cloning the same instruction multiple times.
   auto foo =
       HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(F32, {}), "foo");
-  EXPECT_EQ(foo->Clone()->name(), "%foo.clone");
-  EXPECT_EQ(foo->Clone()->Clone()->name(), "%foo.clone2");
-  EXPECT_EQ(foo->Clone()->Clone()->Clone()->name(), "%foo.clone3");
+  EXPECT_EQ(foo->Clone()->name(), "foo.clone");
+  EXPECT_EQ(foo->Clone()->Clone()->name(), "foo.clone2");
+  EXPECT_EQ(foo->Clone()->Clone()->Clone()->name(), "foo.clone3");
 
   // Test custom suffixes.
-  EXPECT_EQ(foo->Clone("bar")->name(), "%foo.bar");
-  EXPECT_EQ(foo->Clone("bar")->Clone("bar")->name(), "%foo.bar2");
-  EXPECT_EQ(foo->Clone("bar")->Clone("bar")->Clone()->name(),
-            "%foo.bar2.clone");
+  EXPECT_EQ(foo->Clone("bar")->name(), "foo.bar");
+  EXPECT_EQ(foo->Clone("bar")->Clone("bar")->name(), "foo.bar2");
+  EXPECT_EQ(foo->Clone("bar")->Clone("bar")->Clone()->name(), "foo.bar2.clone");
 
   // Test instruction name with a dot.
   auto foo_baz = HloInstruction::CreateParameter(
       0, ShapeUtil::MakeShape(F32, {}), "foo.baz");
-  EXPECT_EQ(foo_baz->Clone()->name(), "%foo.baz.clone");
+  EXPECT_EQ(foo_baz->Clone()->name(), "foo.baz.clone");
 
   // Test incrementing a large number after the suffix.
   auto foo_clone234 = HloInstruction::CreateParameter(
       0, ShapeUtil::MakeShape(F32, {}), "foo.clone234");
-  EXPECT_EQ(foo_clone234->Clone()->name(), "%foo.clone235");
+  EXPECT_EQ(foo_clone234->Clone()->name(), "foo.clone235");
 
   // Test a non-numeric string after the cloning suffix.
   auto foo_clonexyz = HloInstruction::CreateParameter(
       0, ShapeUtil::MakeShape(F32, {}), "foo.clonexyz");
-  EXPECT_EQ(foo_clonexyz->Clone()->name(), "%foo.clonexyz.clone");
+  EXPECT_EQ(foo_clonexyz->Clone()->name(), "foo.clonexyz.clone");
 
   // Test a name with multiple appearances of the suffix.
   auto foo_clone_clone3 = HloInstruction::CreateParameter(
       0, ShapeUtil::MakeShape(F32, {}), "foo.clone.clone3");
-  EXPECT_EQ(foo_clone_clone3->Clone()->name(), "%foo.clone.clone4");
+  EXPECT_EQ(foo_clone_clone3->Clone()->name(), "foo.clone.clone4");
 }
 
 TEST_F(HloInstructionTest, Stringification) {
-- 
GitLab


From 7eaa14b885124cbe1577d58d071db6bba5ee5cbe Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 15:27:57 -0800
Subject: [PATCH 0077/1225] [tf.data] Move existing Saveable MapDataset tests
 to use dataset_serialization_test_base; add verify_restore_in_empty_graph and
 verify_save_with_errors methods to dataset_serialization_test_base.

PiperOrigin-RevId: 176168789
---
 .../contrib/data/python/kernel_tests/BUILD    |  11 +-
 .../dataset_serialization_test_base.py        | 112 +++++
 .../kernel_tests/map_dataset_op_test.py       | 432 +-----------------
 3 files changed, 130 insertions(+), 425 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 0dac03d7d8..3b5f2db322 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -130,6 +130,8 @@ py_library(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:training",
         "//tensorflow/python:util",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/ops:iterator_ops",
         "//third_party/py/numpy",
     ],
 )
@@ -267,8 +269,8 @@ py_test(
     srcs = ["map_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/contrib/data/python/ops:transformation_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -277,19 +279,18 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:function",
         "//tensorflow/python:functional_ops",
         "//tensorflow/python:io_ops",
         "//tensorflow/python:lookup_ops",
         "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
         "//tensorflow/python:random_ops",
         "//tensorflow/python:script_ops",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
-        "//tensorflow/python:training",
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/ops:iterator_ops",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py
index 0a9e99fd99..a24a16a5f8 100644
--- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py
+++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py
@@ -23,9 +23,11 @@ import os
 import numpy as np
 
 from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 from tensorflow.python.training import saver as saver_lib
@@ -63,6 +65,8 @@ class DatasetSerializationTestBase(test.TestCase):
         ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
     self.verify_reset_restored_iterator(
         ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_restore_in_empty_graph(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
     if ds_fn2:
       self.verify_restore_in_modified_graph(
           ds_fn1, ds_fn2, num_outputs, sparse_tensors=sparse_tensors)
@@ -299,6 +303,97 @@ class DatasetSerializationTestBase(test.TestCase):
 
     self.match(expected, actual)
 
+  def verify_restore_in_empty_graph(self,
+                                    ds_fn,
+                                    num_outputs,
+                                    break_point=None,
+                                    sparse_tensors=False,
+                                    verify_exhausted=True):
+    """Attempts to restore an iterator in an empty graph.
+
+    Builds an input pipeline using ds_fn, runs it for `break_point` steps
+    and saves a checkpoint. Then builds a new empty graph, restores
+    the checkpoint from ds_fn and verifies that the restore is successful.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      break_point: Break point. Optional. Defaults to num_outputs/2.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    break_point = num_outputs // 2 if not break_point else break_point
+
+    # Skip `break_point` items and store the remaining produced from ds_fn
+    # in `expected`.
+    self.gen_outputs(
+        ds_fn, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+    expected = self.gen_outputs(
+        ds_fn, [],
+        num_outputs - break_point,
+        ckpt_saved=True,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    # Generate `break_point` items from ds_fn and save checkpoint.
+    self.gen_outputs(
+        ds_fn, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+
+    actual = []
+    # Build an empty graph but load checkpoint for ds_fn.
+    with ops.Graph().as_default() as g:
+      get_next_op, saver = self._build_empty_graph(
+          ds_fn, sparse_tensors=sparse_tensors)
+      with self.test_session(graph=g) as sess:
+        self._restore(saver, sess)
+        for _ in range(num_outputs - break_point):
+          actual.append(sess.run(get_next_op))
+        if verify_exhausted:
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(get_next_op)
+
+    self.match(expected, actual)
+
+  def verify_error_on_save(self,
+                           ds_fn,
+                           num_outputs,
+                           error,
+                           break_point=None,
+                           sparse_tensors=False):
+    """Attempts to save a non-saveable iterator.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      error: Declared error when trying to save iterator.
+      break_point: Break point. Optional. Defaults to num_outputs/2.
+      sparse_tensors: See `run_core_tests`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+
+    break_point = num_outputs // 2 if not break_point else break_point
+    with ops.Graph().as_default() as g:
+      init_op, get_next_op, saver = self._build_graph(
+          ds_fn, sparse_tensors=sparse_tensors)
+      with self.test_session(graph=g) as sess:
+        sess.run(variables.global_variables_initializer())
+        sess.run(init_op)
+        for _ in range(break_point):
+          sess.run(get_next_op)
+        with self.assertRaises(error):
+          self._save(sess, saver)
+
   def verify_run_with_breaks(self,
                              ds_fn,
                              break_points,
@@ -395,6 +490,7 @@ class DatasetSerializationTestBase(test.TestCase):
         with self.test_session(graph=g) as sess:
           if ckpt_saved:
             if init_before_restore:
+              sess.run(variables.global_variables_initializer())
               sess.run(init_op)
             self._restore(saver, sess)
           else:
@@ -466,6 +562,18 @@ class DatasetSerializationTestBase(test.TestCase):
     saver = saver_lib.Saver(allow_empty=True)
     return init_op, get_next, saver
 
+  def _build_empty_graph(self, ds_fn, sparse_tensors=False):
+    iterator = iterator_ops.Iterator.from_structure(
+        self._get_output_types(ds_fn), self._get_output_shapes(ds_fn))
+    saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
+    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
+    if sparse_tensors:
+      get_next = sparse_tensor.SparseTensor(*iterator.get_next())
+    else:
+      get_next = iterator.get_next()
+    saver = saver_lib.Saver(allow_empty=True)
+    return get_next, saver
+
   def _add_iterator_ops_to_collection(self,
                                       init_op,
                                       get_next,
@@ -495,6 +603,10 @@ class DatasetSerializationTestBase(test.TestCase):
     with ops.Graph().as_default():
       return ds_fn().output_types
 
+  def _get_output_shapes(self, ds_fn):
+    with ops.Graph().as_default():
+      return ds_fn().output_shapes
+
   def _ckpt_path(self):
     return os.path.join(self.get_temp_dir(), "iterator")
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
index d8e7f9d593..3c07a5571a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
@@ -23,10 +23,9 @@ import threading
 
 import numpy as np
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import dataset_ops
 from tensorflow.contrib.data.python.ops import error_ops
-from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
-from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -44,10 +43,7 @@ from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
-from tensorflow.python.training import saver as saver_lib
 from tensorflow.python.util import compat
 
 
@@ -702,20 +698,14 @@ class MapDatasetTest(test.TestCase):
           sess.run(init_op)
 
 
-class MapDatasetSerializationTest(test.TestCase):
+class MapDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
 
   def setUp(self):
     self._tensor_slice_len = 7
     self._num_epochs = 14
     self._num_outputs = self._tensor_slice_len * self._num_epochs
 
-  def tearDown(self):
-    # Remove all checkpoint files.
-    prefix = self._ckpt_path()
-    pattern = prefix + "*"
-    files = gfile.Glob(pattern)
-    map(gfile.Remove, files)
-
   def _build_ds(self, multiplier=37.0):
     components = (np.arange(self._tensor_slice_len), np.array([[1, 2, 3]]) *
                   np.arange(self._tensor_slice_len)[:, np.newaxis],
@@ -727,292 +717,11 @@ class MapDatasetSerializationTest(test.TestCase):
     return (dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn)
             .repeat(self._num_epochs))
 
-  def _build_graph(self, multiplier=37.0, build_saveable=True):
-    ds = self._build_ds(multiplier)
-    iterator = ds.make_initializable_iterator()
-
-    if build_saveable:
-      saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    self._add_iterator_ops_to_collection(init_op, get_next)
-    saver = saver_lib.Saver(allow_empty=True)
-    return init_op, get_next, saver
-
-  def _build_empty_graph(self, output_types, output_shapes):
-    iterator = iterator_ops.Iterator.from_structure(output_types, output_shapes)
-    saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-    saver = saver_lib.Saver()
-    get_next = iterator.get_next()
-    return get_next, saver
-
-  def _add_iterator_ops_to_collection(self, init_op, get_next):
-    ops.add_to_collection("iterator_ops", init_op)
-    ops.add_to_collection("iterator_ops", get_next[0])
-    ops.add_to_collection("iterator_ops", get_next[1])
-    ops.add_to_collection("iterator_ops", get_next[2])
-
-  def _get_iterator_ops_from_collection(self):
-    init_op, get_next_1, get_next_2, get_next_3 = ops.get_collection(
-        "iterator_ops")
-    return init_op, (get_next_1, get_next_2, get_next_3)
-
-  def _ckpt_path(self):
-    return os.path.join(self.get_temp_dir(), "iterator")
-
-  def _latest_ckpt(self):
-    return saver_lib.latest_checkpoint(self.get_temp_dir())
-
-  def _save(self, sess, saver):
-    saver.save(sess, self._ckpt_path())
-
-  def _restore(self, saver, sess):
-    saver.restore(sess, self._latest_ckpt())
-
-  def _import_meta_graph(self):
-    meta_file_path = self._ckpt_path() + ".meta"
-    return saver_lib.import_meta_graph(meta_file_path)
-
-  def _testReadWithBreaks(self, break_points, init_before_restore=False):
-    expected = []
-    actual = []
-    # Generate the ground truth.
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, _ = self._build_graph()
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for _ in range(self._num_outputs):
-          expected.append(sess.run(get_next_op))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-
-    # Run and checkpoint after first break_point.
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph()
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for _ in range(break_points[0]):
-          actual.append(sess.run(get_next_op))
-        self._save(sess, saver)
-
-    # Load from checkpoint and continue running while stopping at each
-    # subsequent checkpoint.
-    for i in range(len(break_points)):
-      with ops.Graph().as_default() as g:
-        saver = self._import_meta_graph()
-        init_op, get_next_op = self._get_iterator_ops_from_collection()
-        with self.test_session(graph=g) as sess:
-          if init_before_restore:
-            sess.run(init_op)
-          self._restore(saver, sess)
-          start = break_points[i]
-          end = break_points[
-              i + 1] if i < len(break_points) - 1 else self._num_outputs
-          for _ in range(end - start):
-            actual.append(sess.run(get_next_op))
-          self._save(sess, saver)
-          if end == self._num_outputs:
-            with self.assertRaises(errors.OutOfRangeError):
-              sess.run(get_next_op)
-    self._match(expected, actual)
-
-  def _match(self, expected, actual):
-    self.assertEqual(len(expected), len(actual))
-    for expected_tuple, actual_tuple in zip(expected, actual):
-      self.assertEqual(expected_tuple[0], actual_tuple[0])
-      self.assertSequenceEqual(expected_tuple[1].tolist(),
-                               actual_tuple[1].tolist())
-      self.assertEqual(expected_tuple[2], actual_tuple[2])
-
-  def _does_not_match(self, expected, actual):
-    with self.assertRaises(AssertionError):
-      self._match(expected, actual)
-
-  def testSaveRestore(self):
-    self._testReadWithBreaks([4])
-    self._testReadWithBreaks([13])
-    self._testReadWithBreaks([18])
-    self._testReadWithBreaks([23])
-
-  def testSaveUnusedIterator(self):
-    self._testReadWithBreaks([0])
-
-  def testSaveFullyUsedIterator(self):
-    self._testReadWithBreaks([self._num_outputs])
-
-  def testMultipleBreaks(self):
-    self._testReadWithBreaks([0, 5, 9, 15, 25, 32])
-
-  def testIdempotence(self):
-    # Attempt to save iterator immediately after restoring.
-    self._testReadWithBreaks([1, 1, 5, 5, 5, 25, 32])
-
-  def testInitThenRestore(self):
-    self._testReadWithBreaks([0, 5, 9, 15, 25, 32], init_before_restore=True)
-
-  def testRestoreExhaustedIterator(self):
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph()
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for _ in range(self._num_outputs):
-          sess.run(get_next_op)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-        self._save(sess, saver)
-
-      with ops.Graph().as_default() as g:
-        saver = self._import_meta_graph()
-        init_op, get_next_op = self._get_iterator_ops_from_collection()
-        with self.test_session(graph=g) as sess:
-          self._restore(saver, sess)
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-
-  def testResetRestoredIterator(self):
-    expected = []
-    # Collect ground truth containing all outputs.
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph()
-      break_point = self._num_outputs // 2
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for _ in range(break_point):
-          expected.append(sess.run(get_next_op))
-        self._save(sess, saver)
-        for _ in range(self._num_outputs - break_point):
-          expected.append(sess.run(get_next_op))
-
-    actual = []
-    # Restore from checkpoint and then run init_op.
-    with ops.Graph().as_default() as g:
-      saver = self._import_meta_graph()
-      init_op, get_next_op = self._get_iterator_ops_from_collection()
-      with self.test_session(graph=g) as sess:
-        self._restore(saver, sess)
-        sess.run(init_op)
-        for _ in range(self._num_outputs):
-          actual.append(sess.run(get_next_op))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-    self._match(expected, actual)
-
-  def testRestoreInModifiedGraph(self):
-    expected = []
-    actual_without_restore = []
-    actual = []
-    break_point = 10
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph(multiplier=15.0)
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for _ in range(break_point):
-          expected.append(sess.run(get_next_op))
-        actual.extend(expected)
-        self._save(sess, saver)
-        for _ in range(self._num_outputs - break_point):
-          expected.append(sess.run(get_next_op))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-
-    # Collect outputs by running modified graph.
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph(multiplier=30.0)
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for _ in range(self._num_outputs):
-          actual_without_restore.append(sess.run(get_next_op))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-
-    # Restore the checkpoint in the modified graph.
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph(multiplier=30.0)
-      with self.test_session(graph=g) as sess:
-        self._restore(saver, sess)
-        for _ in range(self._num_outputs - break_point):
-          actual.append(sess.run(get_next_op))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-
-    # Ensure the modified graph gets overridden when restoring checkpoint.
-    self._does_not_match(expected, actual_without_restore)
-    # Expect that the outputs are what we would expect if we ran the old
-    # graph.
-    self._match(expected, actual)
-
-  # TODO(srbs): Add this test to dataset_serialization_test_base.py.
-  def testRestoreInEmptyGraph(self):
-    expected = []
-    actual = []
-    break_point = 10
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph(multiplier=15.0)
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for _ in range(break_point):
-          sess.run(get_next_op)
-        self._save(sess, saver)
-        for _ in range(self._num_outputs - break_point):
-          expected.append(sess.run(get_next_op))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-
-    with ops.Graph().as_default() as g:
-      ds = self._build_ds()
-      output_types = ds.output_types
-      output_shapes = ds.output_shapes
-
-    with ops.Graph().as_default() as g:
-      get_next_op, saver = self._build_empty_graph(output_types, output_shapes)
-      with self.test_session(graph=g) as sess:
-        self._restore(saver, sess)
-        for _ in range(self._num_outputs - break_point):
-          actual.append(sess.run(get_next_op))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-
-    # Expect that the outputs are what we would expect if we ran the old
-    # graph.
-    self._match(expected, actual)
-
-  def testDoNotBuildSaveable(self):
-    break_point = 10
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph(multiplier=15.0)
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for _ in range(break_point):
-          sess.run(get_next_op)
-        self._save(sess, saver)
-
-    expected = []
-    # Collect ground truth by running modified graph.
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph(multiplier=30.0)
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for _ in range(self._num_outputs):
-          expected.append(sess.run(get_next_op))
-
-    actual = []
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph(
-          multiplier=30.0, build_saveable=False)
-      with self.test_session(graph=g) as sess:
-        # Since the SaveableObject was not added to Saver's list
-        # of saveables, iterator state is not restored by saver.restore().
-        self._restore(saver, sess)
-        with self.assertRaises(errors.FailedPreconditionError):
-          sess.run(get_next_op)
-        sess.run(init_op)
-        for _ in range(self._num_outputs):
-          actual.append(sess.run(get_next_op))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-    self._match(expected, actual)
+  def testSaveRestoreCore(self):
+    self.run_core_tests(
+        self._build_ds,
+        lambda: self._build_ds(multiplier=15.0),
+        self._num_outputs)
 
   def testSaveStatefulFunction(self):
 
@@ -1024,26 +733,7 @@ class MapDatasetSerializationTest(test.TestCase):
 
       return dataset_ops.Dataset.range(100).map(_map_fn)
 
-    def _build_graph():
-      ds = _build_ds()
-      iterator = ds.make_initializable_iterator()
-
-      saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      saver = saver_lib.Saver(allow_empty=True)
-      return init_op, get_next, saver
-
-    break_point = 10
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = _build_graph()
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for _ in range(break_point):
-          sess.run(get_next_op)
-        with self.assertRaises(errors.InvalidArgumentError):
-          self._save(sess, saver)
+    self.verify_error_on_save(_build_ds, 15, errors.InvalidArgumentError)
 
   def testCaptureVariableInMapFn(self):
 
@@ -1053,27 +743,7 @@ class MapDatasetSerializationTest(test.TestCase):
       return (dataset_ops.Dataset.from_tensors(0).repeat(10).map(
           lambda _: counter_var.assign_add(1)))
 
-    def _build_graph():
-      ds = _build_ds()
-      iterator = ds.make_initializable_iterator()
-
-      saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      saver = saver_lib.Saver(allow_empty=True)
-      return init_op, get_next, saver
-
-    break_point = 10
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = _build_graph()
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        for _ in range(break_point):
-          sess.run(get_next_op)
-        with self.assertRaises(errors.InvalidArgumentError):
-          self._save(sess, saver)
+    self.verify_error_on_save(_build_ds, 15, errors.InvalidArgumentError)
 
   def testCaptureDefunInMapFn(self):
     num_outputs = 100
@@ -1086,46 +756,7 @@ class MapDatasetSerializationTest(test.TestCase):
 
       return dataset_ops.Dataset.range(num_outputs).map(defun_fn)
 
-    def _build_graph():
-      ds = _build_ds()
-      iterator = ds.make_initializable_iterator()
-
-      saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      saver = saver_lib.Saver(allow_empty=True)
-      return init_op, get_next, saver
-
-    break_point = 10
-    expected = []
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = _build_graph()
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        for _ in range(break_point):
-          sess.run(get_next_op)
-        self._save(sess, saver)
-        for _ in range(num_outputs - break_point):
-          expected.append(sess.run(get_next_op))
-
-    with ops.Graph().as_default() as g:
-      ds = _build_ds()
-      output_types = ds.output_types
-      output_shapes = ds.output_shapes
-
-    actual = []
-    with ops.Graph().as_default() as g:
-      get_next_op, saver = self._build_empty_graph(output_types, output_shapes)
-      with self.test_session(graph=g) as sess:
-        self._restore(saver, sess)
-        for _ in range(num_outputs - break_point):
-          actual.append(sess.run(get_next_op))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-
-    self.assertSequenceEqual(expected, actual)
+    self.run_core_tests(_build_ds, None, num_outputs)
 
   def testBuildDefunInMapFn(self):
     num_outputs = 100
@@ -1143,46 +774,7 @@ class MapDatasetSerializationTest(test.TestCase):
 
       return dataset_ops.Dataset.range(num_outputs).map(defun_fn)
 
-    def _build_graph():
-      ds = _build_ds()
-      iterator = ds.make_initializable_iterator()
-
-      saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      saver = saver_lib.Saver(allow_empty=True)
-      return init_op, get_next, saver
-
-    break_point = 10
-    expected = []
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = _build_graph()
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        for _ in range(break_point):
-          sess.run(get_next_op)
-        self._save(sess, saver)
-        for _ in range(num_outputs - break_point):
-          expected.append(sess.run(get_next_op))
-
-    with ops.Graph().as_default() as g:
-      ds = _build_ds()
-      output_types = ds.output_types
-      output_shapes = ds.output_shapes
-
-    actual = []
-    with ops.Graph().as_default() as g:
-      get_next_op, saver = self._build_empty_graph(output_types, output_shapes)
-      with self.test_session(graph=g) as sess:
-        self._restore(saver, sess)
-        for _ in range(num_outputs - break_point):
-          actual.append(sess.run(get_next_op))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next_op)
-
-    self.assertSequenceEqual(expected, actual)
+    self.run_core_tests(_build_ds, None, num_outputs)
 
 
 if __name__ == "__main__":
-- 
GitLab


From d9a1d5d17029c84c99af5383d9b2a0a02985fd20 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 17 Nov 2017 15:39:15 -0800
Subject: [PATCH 0078/1225] Fix parameter pack expansion in ptr_util. (#14674)

* Fix parameter pack expansion in ptr_util.

* Explicitly pick xla::MakeUnique to call in XLA code.
---
 tensorflow/compiler/xla/ptr_util.h            |  2 +-
 .../xla/service/buffer_assignment_test.cc     | 23 ++++++++-------
 .../xla/service/buffer_liveness_test.cc       | 29 ++++++++++---------
 .../compiler/xla/service/cpu/cpu_compiler.cc  | 14 ++++-----
 4 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/tensorflow/compiler/xla/ptr_util.h b/tensorflow/compiler/xla/ptr_util.h
index 627ddf535f..c58c19db2c 100644
--- a/tensorflow/compiler/xla/ptr_util.h
+++ b/tensorflow/compiler/xla/ptr_util.h
@@ -37,7 +37,7 @@ std::unique_ptr<T> WrapUnique(T* ptr) {
 template <typename T, typename... Args>
 typename tensorflow::helper::MakeUniqueResult<T>::scalar MakeUnique(
     Args&&... args) {
-  return tensorflow::MakeUnique<T, Args>(std::forward<Args>(args)...);
+  return tensorflow::MakeUnique<T, Args...>(std::forward<Args>(args)...);
 }
 
 // Overload for array of unknown bound.
diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
index 89410f42bd..f1b3c2ed75 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
@@ -85,7 +85,7 @@ class BufferAssignmentTest : public HloTestBase {
   std::unique_ptr<BufferAssignment> RunBufferAssignment(HloModule* module,
                                                         int64 alignment = 1) {
     return BufferAssigner::Run(
-               module, MakeUnique<DependencyHloOrdering>(module),
+               module, xla::MakeUnique<DependencyHloOrdering>(module),
                backend().compiler()->BufferSizeBytesFunction(),
                [alignment](LogicalBuffer::Color) { return alignment; })
         .ConsumeValueOrDie();
@@ -94,7 +94,7 @@ class BufferAssignmentTest : public HloTestBase {
   std::unique_ptr<BufferAssignment> RunColoredBufferAssignment(
       HloModule* module, BufferLiveness::Colorer colorer, int64 alignment = 1) {
     return BufferAssigner::Run(
-               module, MakeUnique<DependencyHloOrdering>(module),
+               module, xla::MakeUnique<DependencyHloOrdering>(module),
                backend().compiler()->BufferSizeBytesFunction(),
                [alignment](LogicalBuffer::Color) { return alignment; }, false,
                std::move(colorer))
@@ -1448,7 +1448,7 @@ class WhileBufferAssignmentTest : public HloTestBase {
     auto sequence =
         CreateMemoryMinimizingSequence(*module, ByteSizeOf).ConsumeValueOrDie();
     return BufferAssigner::Run(
-               module, MakeUnique<SequentialHloOrdering>(module, sequence),
+               module, xla::MakeUnique<SequentialHloOrdering>(module, sequence),
                ByteSizeOf,
                [alignment](LogicalBuffer::Color) { return alignment; })
         .ConsumeValueOrDie();
@@ -1469,7 +1469,7 @@ static void RunCopyInsertion(HloModule* module) {
 }
 
 TEST_F(WhileBufferAssignmentTest, TwoForwardWhileLoops) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   auto builder = HloComputation::Builder("entry");
 
   auto input0 = builder.AddInstruction(
@@ -1526,7 +1526,7 @@ TEST_F(WhileBufferAssignmentTest, TwoForwardWhileLoops) {
 }
 
 TEST_F(WhileBufferAssignmentTest, OneForwardBackwardWhileLoopSet) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   auto builder = HloComputation::Builder("entry");
 
   auto input0 = builder.AddInstruction(
@@ -1575,7 +1575,7 @@ TEST_F(WhileBufferAssignmentTest, OneForwardBackwardWhileLoopSet) {
 }
 
 TEST_F(BufferAssignmentTest, TwoCalls) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   Shape r0f32 = ShapeUtil::MakeShape(xla::F32, {});
   HloComputation* sub_computation;
   {
@@ -1640,7 +1640,7 @@ static bool IsPostOrderTraversal(
 }
 
 TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   auto builder = HloComputation::Builder(TestName());
 
   auto zero = builder.AddInstruction(
@@ -1708,9 +1708,10 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) {
   auto assignment =
       BufferAssigner::Run(
           module.get(),
-          MakeUnique<SequentialHloOrdering>(module.get(), sequence), ByteSizeOf,
+          xla::MakeUnique<SequentialHloOrdering>(module.get(), sequence),
+          ByteSizeOf,
           [](LogicalBuffer::Color) { return 1; })
-          .ConsumeValueOrDie();
+      .ConsumeValueOrDie();
 
   EXPECT_TRUE(BuffersDistinct({while0}, {while1}, *assignment));
 }
@@ -1718,7 +1719,7 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) {
 // Test buffer assignment for while nodes with multiple uses.
 // TODO(b/37245345): Fix buffer assignment for this case.
 TEST_F(WhileBufferAssignmentTest, DISABLED_TwoWhiles) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   auto builder = HloComputation::Builder(TestName());
 
   auto input0 = builder.AddInstruction(
@@ -1765,7 +1766,7 @@ TEST_F(WhileBufferAssignmentTest, DISABLED_TwoWhiles) {
 }
 
 TEST_F(WhileBufferAssignmentTest, WhilesDontShareEntryParamIfLiveOut) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   auto builder = HloComputation::Builder("entry");
 
   auto input0 = builder.AddInstruction(
diff --git a/tensorflow/compiler/xla/service/buffer_liveness_test.cc b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
index 56600b5838..bbb42d494b 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
@@ -120,7 +120,7 @@ TEST_F(BufferLivenessTest, ElementwiseChain) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, negate));
@@ -169,7 +169,8 @@ TEST_F(BufferLivenessTest, MultipleEntryParameters_Sequential) {
   sequence.insert({entry, {param0, negate, param1, exp, add}});
   auto liveness = BufferLiveness::Run(
                       module.get(),
-                      MakeUnique<SequentialHloOrdering>(module.get(), sequence))
+                      xla::MakeUnique<SequentialHloOrdering>(
+                          module.get(), sequence))
                       .ConsumeValueOrDie();
 
   // Entry parameters interfere as if they are defined simultaneously at
@@ -216,7 +217,7 @@ TEST_F(BufferLivenessTest, NonElementwiseOperand) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, exp));
@@ -250,7 +251,7 @@ TEST_F(BufferLivenessTest, OverlappedBuffers) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   EXPECT_TRUE(InstructionsMayInterfere(*liveness, param, negate));
@@ -294,8 +295,8 @@ TEST_F(BufferLivenessTest, OverlappedBuffersSequentialOrder) {
   std::vector<const HloInstruction*> order = {param, negate, exp, add};
   module_sequence.emplace(computation, order);
   auto liveness =
-      BufferLiveness::Run(module.get(), MakeUnique<SequentialHloOrdering>(
-                                            module.get(), module_sequence))
+      BufferLiveness::Run(module.get(), xla::MakeUnique<SequentialHloOrdering>(
+          module.get(), module_sequence))
           .ConsumeValueOrDie();
 
   EXPECT_TRUE(InstructionsMayInterfere(*liveness, param, negate));
@@ -334,7 +335,7 @@ TEST_F(BufferLivenessTest, TupleLiveOut) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   // All buffers should be live out except the param
@@ -370,7 +371,7 @@ TEST_F(BufferLivenessTest, EmbeddedComputation) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   // Buffers in different computations should always interfere.
@@ -409,7 +410,7 @@ TEST_F(BufferLivenessTest, TupleConstantLiveOut) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   // Only the element buffers of the tuple constant which are pointed to by
@@ -474,7 +475,7 @@ TEST_F(BufferLivenessTest, IndependentTupleElements) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   // We compare tuple element pairs that are input/output to the computation:
@@ -536,7 +537,7 @@ TEST_F(BufferLivenessTest, DependentTupleElements) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   // We compare tuple element pairs that are input/output to the computation:
@@ -625,7 +626,8 @@ class FusedDynamicUpdateSliceLivenessTest : public BufferLivenessTest {
     // Run BufferLiveness on 'module'.
     auto liveness =
         BufferLiveness::Run(module.get(),
-                            MakeUnique<DependencyHloOrdering>(module.get()))
+                            xla::MakeUnique<DependencyHloOrdering>(
+                                module.get()))
             .ConsumeValueOrDie();
     // Return whether or not buffers interference is detected between
     // 'tuple_param0' and 'tuple_root' at shape index '{1}'.
@@ -737,7 +739,8 @@ class DynamicUpdateSliceLivenessTest : public BufferLivenessTest {
     // Run BufferLiveness on 'module'.
     auto liveness =
         BufferLiveness::Run(module.get(),
-                            MakeUnique<DependencyHloOrdering>(module.get()))
+                            xla::MakeUnique<DependencyHloOrdering>(
+                                module.get()))
             .ConsumeValueOrDie();
     // Return whether or not buffers interference is detected between
     // 'tuple_param0' and 'tuple_root' at shape index '{1}'.
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index f5b95d3657..4e39612ff6 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -444,11 +444,11 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::Compile(
       &pre_optimization_ir_hook, &post_optimization_ir_hook));
 
   // Compile must be thread-safe so create a new LLVM context for the module.
-  auto llvm_context = MakeUnique<llvm::LLVMContext>();
+  auto llvm_context = xla::MakeUnique<llvm::LLVMContext>();
   auto llvm_module =
-      MakeUnique<llvm::Module>("__compute_module", *llvm_context);
+      xla::MakeUnique<llvm::Module>("__compute_module", *llvm_context);
 
-  auto jit = MakeUnique<SimpleOrcJIT>(
+  auto jit = xla::MakeUnique<SimpleOrcJIT>(
       CompilerTargetOptions(module->config()),
       CodeGenOptLevel(module->config()),
       options::OptimizeForSizeRequested(module->config()),
@@ -495,7 +495,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::Compile(
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<BufferAssignment> assignment,
         BufferAssigner::Run(module.get(),
-                            MakeUnique<DependencyHloOrdering>(module.get()),
+                            xla::MakeUnique<DependencyHloOrdering>(module.get()),
                             BufferSizeBytesFunction(), memory_alignment));
     // BufferAssignment::ToString() includes a header, so no need for us to
     // print one ourselves.
@@ -523,7 +523,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::Compile(
         const void* data = instruction->literal().InternalData();
         int64 size = CpuExecutable::ShapeSizeBytes(instruction->shape());
         auto iter = aligned_constants.emplace(
-            instruction, MakeUnique<unsigned char[]>(size));
+            instruction, xla::MakeUnique<unsigned char[]>(size));
         CHECK_EQ(iter.second, true);
         unsigned char* aligned_data = iter.first->second.get();
         memcpy(aligned_data, data, size);
@@ -604,7 +604,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::Compile(
         std::unique_ptr<BufferAssignment> assignment,
         BufferAssigner::Run(
             module.get(),
-            MakeUnique<SequentialHloOrdering>(module.get(), module_sequence),
+            xla::MakeUnique<SequentialHloOrdering>(module.get(), module_sequence),
             BufferSizeBytesFunction(), memory_alignment));
     // BufferAssignment::ToString() includes a header, so no need for us to
     // print one ourselves.
@@ -776,7 +776,7 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<BufferAssignment> assignment,
         BufferAssigner::Run(
-            module, MakeUnique<SequentialHloOrdering>(module, module_sequence),
+            module, xla::MakeUnique<SequentialHloOrdering>(module, module_sequence),
             BufferSizeBytesFunction(), memory_alignment));
     // BufferAssignment::ToString() includes a header, so no need for us to
     // print one ourselves.
-- 
GitLab


From 9f1772a2fd989bf511743ad652e8eac395054079 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 15:29:36 -0800
Subject: [PATCH 0079/1225] [XLA] Change array_elementwise_ops_test to test
 against the HLO evaluator instead of hand computed values.

HLO Evaluator Fixes:
1. Use Bitwise Not instead of Logical Not for integral types.
2. Use Bitwise And instead of Logical And for integral types.
3. Use Bitwise Or  instead of Logical Or  for integral types.
4. Implement Cos.
5. Implement Sin.
6. Add disabled test for special broadcast rules for Clamp.
7. Negate signed values by casting to unsigned, because there are platforms where negating MIN_INT is a runtime error and adding one to MAX_UINT is undefined.
8. Multiply signed values by casting to unsigned, because there are platforms where signed overflow is undefined.

PiperOrigin-RevId: 176168969
---
 .../compiler/xla/service/hlo_evaluator.cc     | 160 +++++++++++--
 .../xla/service/hlo_evaluator_test.cc         | 222 +++++++++++-------
 2 files changed, 277 insertions(+), 105 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 2bd9723dbe..4fffb6127e 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -335,9 +335,31 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
-  template <
-      typename NativeT,
-      typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
+  template <typename NativeT,
+            typename std::enable_if<
+                std::is_integral<NativeT>::value &&
+                !std::is_same<NativeT, bool>::value>::type* = nullptr>
+  Status HandleNot(HloInstruction* not_) {
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_],
+                        ElementWiseUnaryOp(not_, [](ReturnT elem_operand) {
+                          return ~elem_operand;
+                        }));
+    return Status::OK();
+  }
+
+  template <typename NativeT, typename std::enable_if<std::is_floating_point<
+                                  NativeT>::value>::type* = nullptr>
+  Status HandleNot(HloInstruction* not_) {
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_],
+                        ElementWiseUnaryOp(not_, [](ReturnT elem_operand) {
+                          return !elem_operand;
+                        }));
+    return Status::OK();
+  }
+
+  template <typename NativeT,
+            typename std::enable_if<std::is_same<NativeT, bool>::value>::type* =
+                nullptr>
   Status HandleNot(HloInstruction* not_) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_],
                         ElementWiseUnaryOp(not_, [](ReturnT elem_operand) {
@@ -357,7 +379,24 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return HandleNot<ReturnT>(not_);
   }
 
-  Status HandleNegate(HloInstruction* negate) override {
+  template <typename NativeT,
+            typename std::enable_if<
+                std::is_signed<NativeT>::value &&
+                !std::is_floating_point<NativeT>::value>::type* = nullptr>
+  Status HandleNegate(HloInstruction* negate) {
+    using type = typename std::make_unsigned<NativeT>::type;
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[negate],
+                        ElementWiseUnaryOp(negate, [](ReturnT elem_operand) {
+                          return NativeT(-type(elem_operand));
+                        }));
+    return Status::OK();
+  }
+
+  template <typename NativeT,
+            typename std::enable_if<
+                !std::is_signed<NativeT>::value ||
+                std::is_floating_point<NativeT>::value>::type* = nullptr>
+  Status HandleNegate(HloInstruction* negate) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[negate],
                         ElementWiseUnaryOp(negate, [](ReturnT elem_operand) {
                           return -elem_operand;
@@ -365,6 +404,10 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  Status HandleNegate(HloInstruction* negate) override {
+    return HandleNegate<ReturnT>(negate);
+  }
+
   template <
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
@@ -402,7 +445,26 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
-  Status HandleMultiply(HloInstruction* multiply) override {
+  template <typename NativeT,
+            typename std::enable_if<
+                std::is_signed<NativeT>::value &&
+                !std::is_floating_point<NativeT>::value>::type* = nullptr>
+  Status HandleMultiply(HloInstruction* multiply) {
+    using type = typename std::make_unsigned<NativeT>::type;
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[multiply],
+        ElementWiseBinaryOp(multiply, [](ReturnT lhs_elem, ReturnT rhs_elem) {
+          return NativeT(type(lhs_elem) * type(rhs_elem));
+        }));
+    return Status::OK();
+  }
+
+  template <
+      typename NativeT,
+      typename std::enable_if<std::is_unsigned<NativeT>::value ||
+                              std::is_floating_point<NativeT>::value ||
+                              is_complex_t<NativeT>::value>::type* = nullptr>
+  Status HandleMultiply(HloInstruction* multiply) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[multiply],
         ElementWiseBinaryOp(multiply, [](ReturnT lhs_elem, ReturnT rhs_elem) {
@@ -411,6 +473,10 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  Status HandleMultiply(HloInstruction* multiply) override {
+    return HandleMultiply<ReturnT>(multiply);
+  }
+
   Status HandleSubtract(HloInstruction* subtract) override {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[subtract],
@@ -516,9 +582,20 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return HandleRemainder<ReturnT>(remainder);
   }
 
-  template <
-      typename NativeT,
-      typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
+  template <typename NativeT,
+            typename std::enable_if<std::is_integral<NativeT>::value>::type* =
+                nullptr>
+  Status HandleAnd(HloInstruction* and_) {
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[and_],
+        ElementWiseBinaryOp(and_, [](ReturnT lhs_el, ReturnT rhs_el) {
+          return lhs_el & rhs_el;
+        }));
+    return Status::OK();
+  }
+
+  template <typename NativeT, typename std::enable_if<std::is_floating_point<
+                                  NativeT>::value>::type* = nullptr>
   Status HandleAnd(HloInstruction* and_) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[and_],
@@ -539,9 +616,20 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return HandleAnd<ReturnT>(and_);
   }
 
-  template <
-      typename NativeT,
-      typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
+  template <typename NativeT,
+            typename std::enable_if<std::is_integral<NativeT>::value>::type* =
+                nullptr>
+  Status HandleOr(HloInstruction* or_) {
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[or_],
+        ElementWiseBinaryOp(or_, [](ReturnT lhs_el, ReturnT rhs_el) {
+          return lhs_el | rhs_el;
+        }));
+    return Status::OK();
+  }
+
+  template <typename NativeT, typename std::enable_if<std::is_floating_point<
+                                  NativeT>::value>::type* = nullptr>
   Status HandleOr(HloInstruction* or_) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[or_],
@@ -645,7 +733,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleClamp(HloInstruction* clamp) {
     std::function<ReturnT(ReturnT, ReturnT, ReturnT)> clamp_op =
-        [](ReturnT low, ReturnT high, ReturnT value) {
+        [](ReturnT low, ReturnT value, ReturnT high) {
           return std::fmax(low, std::fmin(value, high));
         };
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[clamp],
@@ -1289,6 +1377,50 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  template <typename NativeT, typename std::enable_if<std::is_floating_point<
+                                  NativeT>::value>::type* = nullptr>
+  Status HandleSin(HloInstruction* sin) {
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[sin],
+                        ElementWiseUnaryOp(sin, [](ReturnT elem_operand) {
+                          return std::sin(elem_operand);
+                        }));
+    return Status::OK();
+  }
+
+  template <
+      typename NativeT,
+      typename std::enable_if<std::is_integral<NativeT>::value ||
+                              is_complex_t<NativeT>::value>::type* = nullptr>
+  Status HandleSin(HloInstruction* sin) {
+    return InvalidArgument("Unsupported type for Sin");
+  }
+
+  Status HandleSin(HloInstruction* sin) override {
+    return HandleSin<ReturnT>(sin);
+  }
+
+  template <typename NativeT, typename std::enable_if<std::is_floating_point<
+                                  NativeT>::value>::type* = nullptr>
+  Status HandleCos(HloInstruction* cos) {
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[cos],
+                        ElementWiseUnaryOp(cos, [](ReturnT elem_operand) {
+                          return std::cos(elem_operand);
+                        }));
+    return Status::OK();
+  }
+
+  template <
+      typename NativeT,
+      typename std::enable_if<std::is_integral<NativeT>::value ||
+                              is_complex_t<NativeT>::value>::type* = nullptr>
+  Status HandleCos(HloInstruction* cos) {
+    return InvalidArgument("Unsupported type for Cos");
+  }
+
+  Status HandleCos(HloInstruction* cos) override {
+    return HandleCos<ReturnT>(cos);
+  }
+
  private:
   template <typename IndexT>
   StatusOr<std::unique_ptr<Literal>> DynamicSlice(
@@ -1399,8 +1531,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     const auto* rhs = instruction->operand(1);
     const auto* ehs = instruction->operand(2);
 
-    // TODO(b/35950897, b/27796129): add DCHECK back once implicit broadcast is
-    // removed.
+    // TODO(b/35950897, b/27796129): add DCHECK back once implicit
+    // broadcast is removed.
     if (!(ShapeUtil::SameDimensions(shape, lhs->shape()) &&
           ShapeUtil::SameDimensions(lhs->shape(), rhs->shape()) &&
           ShapeUtil::SameDimensions(rhs->shape(), ehs->shape()))) {
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index 94929dda6a..d0d6029d5f 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -46,20 +46,57 @@ class HloEvaluatorTest : public HloVerifiedTestBase {
   HloEvaluatorTest() { evaluator_ = MakeUnique<HloEvaluator>(); }
 
   std::unique_ptr<HloEvaluator> evaluator_;
+
+  void TestUnaryOp(HloOpcode opcode, std::unique_ptr<Literal> expected,
+                   std::unique_ptr<Literal> input, float aabs = 0) {
+    HloComputation::Builder b(TestName());
+    auto c1 =
+        b.AddInstruction(HloInstruction::CreateConstant(std::move(input)));
+    auto instruction = b.AddInstruction(
+        HloInstruction::CreateUnary(expected->shape(), opcode, c1));
+    module().AddEntryComputation(b.Build());
+
+    std::unique_ptr<Literal> result =
+        evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+
+    auto element_type = expected->shape().element_type();
+    if (element_type == F32 || element_type == F64) {
+      ErrorSpec error(aabs);
+      LiteralTestUtil::ExpectNear(*expected, *result, error);
+    } else {
+      LiteralTestUtil::ExpectEqual(*expected, *result);
+    }
+  }
+
+  void TestBinaryOp(HloOpcode opcode, std::unique_ptr<Literal> expected,
+                    std::unique_ptr<Literal> lhs,
+                    std::unique_ptr<Literal> rhs) {
+    HloComputation::Builder b(TestName());
+    auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs)));
+    auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs)));
+    auto instruction = b.AddInstruction(
+        HloInstruction::CreateBinary(expected->shape(), opcode, c1, c2));
+    module().AddEntryComputation(b.Build());
+
+    std::unique_ptr<Literal> result =
+        evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+
+    LiteralTestUtil::ExpectEqual(*expected, *result);
+  }
 };
 
 // Verifies that HloEvaluator evaluates a HLO instruction that performs clamp
 // with 3 operands.
 TEST_F(HloEvaluatorTest, DoesClamp) {
   auto low = Literal::CreateR2<float>({{0.f, 2.f}, {2.f, 4.f}});
-  auto high = Literal::CreateR2<float>({{2.f, 4.f}, {4.f, 4.f}});
   auto value = Literal::CreateR2<float>({{0.f, 5.f}, {0.f, 4.f}});
+  auto high = Literal::CreateR2<float>({{2.f, 4.f}, {4.f, 4.f}});
 
   Shape shape = low->shape();
   HloComputation::Builder b(TestName());
   auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(low)));
-  auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(high)));
-  auto c3 = b.AddInstruction(HloInstruction::CreateConstant(std::move(value)));
+  auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(value)));
+  auto c3 = b.AddInstruction(HloInstruction::CreateConstant(std::move(high)));
   auto instruction = b.AddInstruction(
       HloInstruction::CreateTernary(shape, HloOpcode::kClamp, c1, c2, c3));
   module().AddEntryComputation(b.Build());
@@ -72,6 +109,28 @@ TEST_F(HloEvaluatorTest, DoesClamp) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
+TEST_F(HloEvaluatorTest, DISABLED_DoesClampSpecialBroadcast) {
+  auto low = Literal::CreateR0<float>(0.f);
+  auto value = Literal::CreateR2<float>({{-1.f, 0.f}, {1.f, 2.f}});
+  auto high = Literal::CreateR0<float>(1.f);
+
+  Shape shape = value->shape();
+  HloComputation::Builder b(TestName());
+  auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(low)));
+  auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(value)));
+  auto c3 = b.AddInstruction(HloInstruction::CreateConstant(std::move(high)));
+  auto instruction = b.AddInstruction(
+      HloInstruction::CreateTernary(shape, HloOpcode::kClamp, c1, c2, c3));
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result =
+      evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+
+  auto expected = Literal::CreateR2<float>({{0, 0}, {1, 1}});
+
+  LiteralTestUtil::ExpectEqual(*expected, *result);
+}
+
 // Verifies that HloEvaluator evaluates a HLO instruction that performs select
 // with 3 operands.
 TEST_F(HloEvaluatorTest, DoesSelect) {
@@ -103,120 +162,101 @@ TEST_F(HloEvaluatorTest, DoesSelect) {
 TEST_F(HloEvaluatorTest, DoesAdd) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
-
-  Shape shape = ShapeUtil::MakeShape(S64, {2, 2});
-  HloComputation::Builder b(TestName());
-  auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs)));
-  auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs)));
-  auto instruction = b.AddInstruction(
-      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, c1, c2));
-  module().AddEntryComputation(b.Build());
-
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
-
   auto expected = Literal::CreateR2<int64>({{3, 4}, {-96, 8}});
-
-  LiteralTestUtil::ExpectEqual(*expected, *result);
+  TestBinaryOp(HloOpcode::kAdd, std::move(expected), std::move(lhs),
+               std::move(rhs));
+}
+// Verifies that HloEvaluator evaluates a HLO instruction that performs
+// element-wise and with 2 operands.
+TEST_F(HloEvaluatorTest, DoesAnd) {
+  auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
+  auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
+  auto expected = Literal::CreateR2<int64>({{0, 0}, {4, 4}});
+  TestBinaryOp(HloOpcode::kAnd, std::move(expected), std::move(lhs),
+               std::move(rhs));
+}
+// Verifies that HloEvaluator evaluates a HLO instruction that performs
+// element-wise or with 2 operands.
+TEST_F(HloEvaluatorTest, DoesOr) {
+  auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
+  auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
+  auto expected = Literal::CreateR2<int64>({{3, 4}, {-100, 4}});
+  TestBinaryOp(HloOpcode::kOr, std::move(expected), std::move(lhs),
+               std::move(rhs));
+}
+// Verifies that HloEvaluator evaluates a HLO instruction that performs
+// element-wise multiply with 2 operands.
+TEST_F(HloEvaluatorTest, DoesMultiply) {
+  auto lhs = Literal::CreateR2<int32>({{-1, 0}, {-100, 4}});
+  auto rhs = Literal::CreateR2<int32>(
+      {{std::numeric_limits<int32>::min(), 4}, {4, 4}});
+  auto expected = Literal::CreateR2<int32>(
+      {{std::numeric_limits<int32>::min(), 0}, {-400, 16}});
+  TestBinaryOp(HloOpcode::kMultiply, std::move(expected), std::move(lhs),
+               std::move(rhs));
 }
-
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise divide with 2 operands.
 TEST_F(HloEvaluatorTest, DoesDivideInt64) {
-  auto lhs_s64 = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
-  auto rhs_s64 = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
-
-  Shape shape_s64 = ShapeUtil::MakeShape(S64, {2, 2});
-  HloComputation::Builder b(TestName());
-  auto c1_s64 =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs_s64)));
-  auto c2_s64 =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_s64)));
-  auto instruction = b.AddInstruction(HloInstruction::CreateBinary(
-      shape_s64, HloOpcode::kDivide, c1_s64, c2_s64));
-  module().AddEntryComputation(b.Build());
-
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
-
+  auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
+  auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto expected = Literal::CreateR2<int64>({{0, 0}, {-25, 1}});
-
-  LiteralTestUtil::ExpectEqual(*expected, *result);
+  TestBinaryOp(HloOpcode::kDivide, std::move(expected), std::move(lhs),
+               std::move(rhs));
 }
 TEST_F(HloEvaluatorTest, DoesDivideDouble) {
-  auto lhs_f64 = Literal::CreateR2<double>({{1.0, 0.0}, {-100.0, 4.0}});
-  auto rhs_f64 = Literal::CreateR2<double>({{2.2, 4.0}, {4.0, 4.0}});
-
-  Shape shape_f64 = ShapeUtil::MakeShape(F64, {2, 2});
-  HloComputation::Builder b(TestName());
-  auto c1_f64 =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs_f64)));
-  auto c2_f64 =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_f64)));
-  auto instruction = b.AddInstruction(HloInstruction::CreateBinary(
-      shape_f64, HloOpcode::kDivide, c1_f64, c2_f64));
-  module().AddEntryComputation(b.Build());
-
-  auto result = evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
-
+  auto lhs = Literal::CreateR2<double>({{1.0, 0.0}, {-100.0, 4.0}});
+  auto rhs = Literal::CreateR2<double>({{2.2, 4.0}, {4.0, 4.0}});
   auto expected =
       Literal::CreateR2<double>({{0.45454545454545453, 0}, {-25, 1}});
-
-  LiteralTestUtil::ExpectEqual(*expected, *result);
+  TestBinaryOp(HloOpcode::kDivide, std::move(expected), std::move(lhs),
+               std::move(rhs));
 }
 
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise abs op with 1 operand.
 TEST_F(HloEvaluatorTest, DoesAbsR2) {
   auto operand = Literal::CreateR2<int64>({{1, -20}, {-100, 4}});
-  const Shape& shape = ShapeUtil::MakeShape(S64, {2, 2});
-  HloComputation::Builder b(TestName());
-  auto c1 =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(operand)));
-  auto instruction =
-      b.AddInstruction(HloInstruction::CreateUnary(shape, HloOpcode::kAbs, c1));
-  module().AddEntryComputation(b.Build());
-
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
-
   auto expected = Literal::CreateR2<int64>({{1, 20}, {100, 4}});
-
-  LiteralTestUtil::ExpectEqual(*expected, *result);
+  TestUnaryOp(HloOpcode::kAbs, std::move(expected), std::move(operand));
 }
 TEST_F(HloEvaluatorTest, DoesAbsR0) {
-  // For R0 literal.
-  const Shape& r0 = ShapeUtil::MakeShape(F32, {});
   auto operand = Literal::CreateR0<float>(-1.0f);
-  HloComputation::Builder b(TestName());
-  auto c1 =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(operand)));
-  auto instruction =
-      b.AddInstruction(HloInstruction::CreateUnary(r0, HloOpcode::kAbs, c1));
-  module().AddEntryComputation(b.Build());
-
-  auto result = evaluator_->Evaluate(instruction).ConsumeValueOrDie();
   auto expected = Literal::CreateR0<float>(1.0f);
-
-  LiteralTestUtil::ExpectEqual(*expected, *result);
+  TestUnaryOp(HloOpcode::kAbs, std::move(expected), std::move(operand));
 }
 TEST_F(HloEvaluatorTest, DoesAbsR1WithZeroSize) {
-  // For R1 literal with dimension of size 0.
-  Shape empty_r1 = ShapeUtil::MakeShape(F32, {0});
   auto operand = Literal::CreateR1<float>({});
-  HloComputation::Builder b(TestName());
-  auto c1 =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(operand)));
-  auto instruction = b.AddInstruction(
-      HloInstruction::CreateUnary(empty_r1, HloOpcode::kAbs, c1));
-  module().AddEntryComputation(b.Build());
-
-  auto result = evaluator_->Evaluate(instruction).ConsumeValueOrDie();
   auto expected = Literal::CreateR1<float>({});
-
-  LiteralTestUtil::ExpectEqual(*expected, *result);
+  TestUnaryOp(HloOpcode::kAbs, std::move(expected), std::move(operand));
+}
+TEST_F(HloEvaluatorTest, DoesNegateR2) {
+  auto operand = Literal::CreateR2<int32>(
+      {{0, std::numeric_limits<int32>::min()}, {-1, 4}});
+  auto expected =
+      Literal::CreateR2<int32>({{0, std::numeric_limits<int>::min()}, {1, -4}});
+  TestUnaryOp(HloOpcode::kNegate, std::move(expected), std::move(operand));
+}
+TEST_F(HloEvaluatorTest, DoesCosR2) {
+  auto operand = Literal::CreateR2<float>({{0, M_PI}, {-M_PI, 2 * M_PI}});
+  auto expected = Literal::CreateR2<float>({{1, -1}, {-1, 1}});
+  TestUnaryOp(HloOpcode::kCos, std::move(expected), std::move(operand));
+}
+TEST_F(HloEvaluatorTest, DoesSinR2) {
+  auto operand = Literal::CreateR2<float>({{0, M_PI}, {-M_PI, 2 * M_PI}});
+  auto expected = Literal::CreateR2<float>({{0, 0}, {0, 0}});
+  TestUnaryOp(HloOpcode::kSin, std::move(expected), std::move(operand),
+              0x1.0P-20);
+}
+TEST_F(HloEvaluatorTest, DoesNotR2) {
+  auto operand =
+      Literal::CreateR2<int32>({{0, std::numeric_limits<int>::min()},
+                                {-1, std::numeric_limits<int>::max()}});
+  auto expected =
+      Literal::CreateR2<int32>({{-1, std::numeric_limits<int>::max()},
+                                {0, std::numeric_limits<int>::min()}});
+  TestUnaryOp(HloOpcode::kNot, std::move(expected), std::move(operand));
 }
-
 // Verifies that HloEvaluator evaluates a HLO Computation with non-parameter nor
 // constant operands.
 TEST_F(HloEvaluatorTest, DoesTraverseInstructions) {
-- 
GitLab


From 441c9d38224f5010aae9ac38462bf308008c5036 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 16:03:23 -0800
Subject: [PATCH 0080/1225] Hlo parser: support empty literals.

PiperOrigin-RevId: 176173427
---
 .../compiler/xla/tools/parser/hlo_parser.cc   |  6 -----
 .../xla/tools/parser/hlo_parser_test.cc       | 22 +++++++++++++++++++
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 1767d712d7..6f5c7b8d0f 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -1167,12 +1167,6 @@ bool HloParser::ParseTupleLiteral(std::unique_ptr<Literal>* literal,
 // rank2345 ::= shape nested_array
 bool HloParser::ParseNonTupleLiteral(std::unique_ptr<Literal>* literal,
                                      const Shape& shape) {
-  const int64 size = ShapeUtil::ElementsIn(shape);
-  if (size == 0) {
-    *literal = Literal::CreateFromShape(shape);
-    return true;
-  }
-
   const int64 rank = ShapeUtil::Rank(shape);
   if (rank > 1 && !EatShapeAndCheckCompatible(shape)) {
     return false;
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 3fbbfbdead..b67b4b816d 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -90,6 +90,28 @@ ENTRY %ConstantF32.v4 () -> f32[] {
   ROOT %constant = f32[] constant(42)
 }
 
+)"
+},
+// f32 constant, rank 1 empty array.
+{
+"ConstantF32R1Empty",
+R"(HloModule ConstantF32Empty_module:
+
+ENTRY %ConstantF32Empty.v4 () -> f32[0] {
+  ROOT %constant = f32[0]{0} constant({})
+}
+
+)"
+},
+// f32 constant, rank 4 empty array.
+{
+"ConstantF32R4Empty",
+R"(HloModule ConstantF32R4Empty_module:
+
+ENTRY %ConstantF32R4Empty.v4 () -> f32[2,0,4,3] {
+  ROOT %constant = f32[2,0,4,3]{3,2,1,0} constant(f32[2,0,4,3] { { /*i0=0*/ }, { /*i0=1*/ } })
+}
+
 )"
 },
 // constant 4D
-- 
GitLab


From 8547044d4dacaa0d6001578634a44b488dd23601 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 16:16:09 -0800
Subject: [PATCH 0081/1225] [XLA] Add conditional HloInstruction and handle
 conditional in DFS visitors.

PiperOrigin-RevId: 176175297
---
 tensorflow/compiler/xla/service/BUILD         |  1 +
 .../compiler/xla/service/dfs_hlo_visitor.h    |  1 +
 .../service/dfs_hlo_visitor_with_default.h    |  3 ++
 .../compiler/xla/service/hlo_cost_analysis.cc | 20 ++++++++
 .../compiler/xla/service/hlo_cost_analysis.h  |  1 +
 .../compiler/xla/service/hlo_instruction.cc   | 46 ++++++++++++++++++-
 .../compiler/xla/service/hlo_instruction.h    | 18 ++++++++
 .../compiler/xla/service/hlo_matchers.h       |  1 +
 .../compiler/xla/service/hlo_verifier.cc      |  9 ++++
 9 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 7bb4479ce0..db265510f2 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1306,6 +1306,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index bc73839a88..7b95325601 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
@@ -208,6 +208,7 @@ class DfsHloVisitorBase {
   virtual Status HandleReduceWindow(HloInstructionPtr hlo) = 0;
   virtual Status HandleSelectAndScatter(HloInstructionPtr hlo) = 0;
   virtual Status HandleWhile(HloInstructionPtr hlo) = 0;
+  virtual Status HandleConditional(HloInstructionPtr hlo) = 0;
 
   virtual Status HandlePad(HloInstructionPtr hlo) = 0;
 
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
index 5415bab5b3..133aa25094 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
@@ -167,6 +167,9 @@ class DfsHloVisitorWithDefaultBase
   Status HandleWhile(HloInstructionPtr xla_while) override {
     return DefaultAction(xla_while);
   }
+  Status HandleConditional(HloInstructionPtr conditional) override {
+    return DefaultAction(conditional);
+  }
   Status HandleRecv(HloInstructionPtr recv) override {
     return DefaultAction(recv);
   }
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index a24457edbf..6fcc01dd64 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/bits.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 
 namespace xla {
 
@@ -480,6 +481,25 @@ Status HloCostAnalysis::HandleWhile(const HloInstruction* xla_while) {
   return Status::OK();
 }
 
+Status HloCostAnalysis::HandleConditional(const HloInstruction* conditional) {
+  // Compute the cost of the true and false computations and take the maximum
+  // from those for each property.
+  TF_ASSIGN_OR_RETURN(const Properties true_computation_properties,
+                      ProcessSubcomputation(conditional->true_computation()));
+  TF_ASSIGN_OR_RETURN(const Properties false_computation_properties,
+                      ProcessSubcomputation(conditional->false_computation()));
+  current_properties_ = true_computation_properties;
+  for (const auto& property : false_computation_properties) {
+    if (!tensorflow::gtl::InsertIfNotPresent(&current_properties_, property)) {
+      current_properties_[property.first] =
+          std::max(current_properties_[property.first], property.second);
+    }
+  }
+  current_should_compute_bottleneck_time_ = false;
+
+  return Status::OK();
+}
+
 Status HloCostAnalysis::FinishVisit(const HloInstruction*) {
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
index e785596c8e..fade19522c 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
@@ -97,6 +97,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor {
   Status HandleReshape(const HloInstruction* reshape) override;
   Status HandleTranspose(const HloInstruction* transpose) override;
   Status HandleWhile(const HloInstruction* xla_while) override;
+  Status HandleConditional(const HloInstruction* conditional) override;
   Status FinishVisit(const HloInstruction* root) override;
 
   Status Preprocess(const HloInstruction* hlo) override;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 95c14ee7a8..ff16f7558e 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -438,6 +438,23 @@ HloInstruction::CreateCrossReplicaSum(const Shape& shape,
   return instruction;
 }
 
+/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateConditional(
+    const Shape& shape, HloInstruction* pred,
+    HloInstruction* true_computation_arg, HloComputation* true_computation,
+    HloInstruction* false_computation_arg, HloComputation* false_computation) {
+  auto instruction =
+      WrapUnique(new HloInstruction(HloOpcode::kConditional, shape));
+  instruction->AppendOperand(pred);
+  instruction->AppendOperand(true_computation_arg);
+  instruction->AppendOperand(false_computation_arg);
+  // In called_computations_, the index of true_computation must be 0 and that
+  // of false computation must be 1, as defined by kTrueComputationIndex and
+  // kFalseComputationIndex.
+  instruction->called_computations_.push_back(true_computation);
+  instruction->called_computations_.push_back(false_computation);
+  return instruction;
+}
+
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateSlice(
     const Shape& shape, HloInstruction* operand,
     tensorflow::gtl::ArraySlice<int64> start_indices,
@@ -1814,6 +1831,32 @@ void HloInstruction::set_scatter(HloComputation* computation) {
   called_computations_[kScatterComputationIndex] = computation;
 }
 
+HloComputation* HloInstruction::true_computation() const {
+  CHECK_EQ(HloOpcode::kConditional, opcode_);
+  return called_computations_[kTrueComputationIndex];
+}
+
+HloComputation* HloInstruction::false_computation() const {
+  CHECK_EQ(HloOpcode::kConditional, opcode_);
+  return called_computations_[kFalseComputationIndex];
+}
+
+void HloInstruction::set_true_computation(HloComputation* true_computation) {
+  // Don't allow changing the computation for fused instructions so we don't
+  // have to recompute called_instructions for the entire fusion instruction.
+  CHECK(!IsFused());
+  CHECK_EQ(HloOpcode::kConditional, opcode_);
+  called_computations_[kTrueComputationIndex] = true_computation;
+}
+
+void HloInstruction::set_false_computation(HloComputation* false_computation) {
+  // Don't allow changing the computation for fused instructions so we don't
+  // have to recompute called_instructions for the entire fusion instruction.
+  CHECK(!IsFused());
+  CHECK_EQ(HloOpcode::kConditional, opcode_);
+  called_computations_[kFalseComputationIndex] = false_computation;
+}
+
 string HloInstruction::SignatureString() const {
   string operands =
       Join(operands_, ", ", [](string* out, HloInstruction* operand) {
@@ -2335,6 +2378,8 @@ Status HloInstruction::Visit(DfsHloVisitorBase<HloInstructionPtr>* visitor) {
       return visitor->HandleFusion(this);
     case HloOpcode::kCall:
       return visitor->HandleCall(this);
+    case HloOpcode::kConditional:
+      return visitor->HandleConditional(this);
     case HloOpcode::kCustomCall:
       return visitor->HandleCustomCall(this);
     case HloOpcode::kRecv:
@@ -2347,7 +2392,6 @@ Status HloInstruction::Visit(DfsHloVisitorBase<HloInstructionPtr>* visitor) {
       return visitor->HandleSendDone(this);
 
     // These opcodes are not handled here.
-    case HloOpcode::kConditional:
     case HloOpcode::kTrace:
       break;
   }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 8c6449d73b..f3dbe9e33f 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -310,6 +310,11 @@ class HloInstruction {
                                                      HloComputation* body,
                                                      HloInstruction* init);
 
+  static std::unique_ptr<HloInstruction> CreateConditional(
+      const Shape& shape, HloInstruction* pred,
+      HloInstruction* true_computation_arg, HloComputation* true_computation,
+      HloInstruction* false_computation_arg, HloComputation* false_computation);
+
   // Creates a fusion instruction. A fusion instruction contains one or more
   // fused instructions forming an expression with a single root
   // "fused_root". Additional instructions can be added to the fusion
@@ -613,6 +618,15 @@ class HloInstruction {
   void set_select(HloComputation* select);
   void set_scatter(HloComputation* scatter);
 
+  // Gets/sets the true and false HloComputation for Conditional. The setters
+  // should only be called by HloModule or HloComputation methods.
+  //
+  // Precondition: The instruction is a Conditional instruction.
+  HloComputation* true_computation() const;
+  HloComputation* false_computation() const;
+  void set_true_computation(HloComputation* true_computation);
+  void set_false_computation(HloComputation* false_computation);
+
   // Returns a string for the signature of this instruction if considered as a
   // function, e.g. the signature of an F32 add is (F32, F32) -> F32.
   string SignatureString() const;
@@ -1197,6 +1211,10 @@ class HloInstruction {
     // kSelectAndScatter computations.
     kSelectComputationIndex = 0,
     kScatterComputationIndex = 1,
+
+    // kConditional computations.
+    kTrueComputationIndex = 0,
+    kFalseComputationIndex = 1,
   };
 
   // Outfeed configuration information, only present for kOutfeed.
diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h
index 268fa0f632..992f55788b 100644
--- a/tensorflow/compiler/xla/service/hlo_matchers.h
+++ b/tensorflow/compiler/xla/service/hlo_matchers.h
@@ -87,6 +87,7 @@ HLO_MATCHER(Call);
 HLO_MATCHER(Ceil);
 HLO_MATCHER(Clamp);
 HLO_MATCHER(Concatenate);
+HLO_MATCHER(Conditional);
 HLO_MATCHER(Constant);
 HLO_MATCHER(Convert);
 HLO_MATCHER(Convolution);
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index c938450891..e353a75cab 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -263,6 +263,15 @@ class ShapeVerifier : public DfsHloVisitor {
                       xla_while->while_body()->ComputeProgramShape().result());
   }
 
+  Status HandleConditional(HloInstruction* conditional) override {
+    TF_RETURN_IF_ERROR(CheckShape(
+        conditional,
+        conditional->true_computation()->ComputeProgramShape().result()));
+    return CheckShape(
+        conditional,
+        conditional->false_computation()->ComputeProgramShape().result());
+  }
+
   Status HandlePad(HloInstruction* pad) override {
     return CheckShape(pad,
                       ShapeInference::InferPadShape(pad->operand(0)->shape(),
-- 
GitLab


From 172d475ffbe9a88379a44a8fb32b8029e01ac11a Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 17 Nov 2017 16:23:40 -0800
Subject: [PATCH 0082/1225] Simplify the interface of NodeProcessor
 constructor.

PiperOrigin-RevId: 176176185
---
 .../grappler/optimizers/layout_optimizer.cc   | 177 ++++++++----------
 1 file changed, 78 insertions(+), 99 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index ba5d13eeaf..e363b8f27b 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -31,6 +31,7 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
+namespace {
 
 const char kConcatConst[] = "LayoutOptimizerConcatConst";
 const char kPermNHWCToNCHW[] = "LayoutOptimizerPermConstNHWCToNCHW";
@@ -158,13 +159,25 @@ class GraphProcessor {
  private:
 };
 
+struct OptimizeContext {
+  OptimizeContext(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                  bool is_in_frame)
+      : graph(graph),
+        node(node),
+        node_map(node_map),
+        is_in_frame(is_in_frame) {}
+  GraphDef* graph;
+  NodeDef* node;
+  NodeMap* node_map;
+  bool is_in_frame;
+};
+
 class NodeProcessor : public GraphProcessor {
  public:
-  NodeProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                bool is_in_frame)
-      : GraphProcessor(graph, node_map),
-        node_(node),
-        is_in_frame_(is_in_frame) {}
+  explicit NodeProcessor(const OptimizeContext& opt_cxt)
+      : GraphProcessor(opt_cxt.graph, opt_cxt.node_map),
+        node_(opt_cxt.node),
+        is_in_frame_(opt_cxt.is_in_frame) {}
   virtual ~NodeProcessor() {}
   virtual Status ConvertNode() {
     if (ShouldProcess()) {
@@ -473,9 +486,8 @@ class NodeProcessor : public GraphProcessor {
 
 class AvgPoolGradProcessor : public NodeProcessor {
  public:
-  AvgPoolGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                       bool is_in_frame)
-      : NodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit AvgPoolGradProcessor(const OptimizeContext& opt_cxt)
+      : NodeProcessor(opt_cxt) {}
 
  protected:
   std::vector<int> GetInputPos() const override {
@@ -487,9 +499,8 @@ class AvgPoolGradProcessor : public NodeProcessor {
 
 class BiasAddGradProcessor : public NodeProcessor {
  public:
-  BiasAddGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                       bool is_in_frame)
-      : NodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit BiasAddGradProcessor(const OptimizeContext& opt_cxt)
+      : NodeProcessor(opt_cxt) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -507,9 +518,8 @@ class BiasAddGradProcessor : public NodeProcessor {
 
 class Conv2DProcessor : public NodeProcessor {
  public:
-  Conv2DProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                  bool no_gemm, bool is_in_frame)
-      : NodeProcessor(graph, node, node_map, is_in_frame), no_gemm_(no_gemm) {}
+  Conv2DProcessor(const OptimizeContext& opt_cxt, bool no_gemm)
+      : NodeProcessor(opt_cxt), no_gemm_(no_gemm) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -577,10 +587,8 @@ class Conv2DProcessor : public NodeProcessor {
 
 class Conv2DBackpropFilterProcessor : public Conv2DProcessor {
  public:
-  Conv2DBackpropFilterProcessor(GraphDef* graph, NodeDef* node,
-                                NodeMap* node_map, bool no_gemm,
-                                bool is_in_frame)
-      : Conv2DProcessor(graph, node, node_map, no_gemm, is_in_frame) {}
+  Conv2DBackpropFilterProcessor(const OptimizeContext& opt_cxt, bool no_gemm)
+      : Conv2DProcessor(opt_cxt, no_gemm) {}
 
  protected:
   bool IsGemmUsed() const override {
@@ -603,10 +611,8 @@ class Conv2DBackpropFilterProcessor : public Conv2DProcessor {
 
 class Conv2DBackpropInputProcessor : public Conv2DProcessor {
  public:
-  Conv2DBackpropInputProcessor(GraphDef* graph, NodeDef* node,
-                               NodeMap* node_map, bool no_gemm,
-                               bool is_in_frame)
-      : Conv2DProcessor(graph, node, node_map, no_gemm, is_in_frame) {}
+  Conv2DBackpropInputProcessor(const OptimizeContext& opt_cxt, bool no_gemm)
+      : Conv2DProcessor(opt_cxt, no_gemm) {}
 
  protected:
   bool IsGemmUsed() const override {
@@ -625,9 +631,8 @@ class Conv2DBackpropInputProcessor : public Conv2DProcessor {
 
 class FusedBatchNormGradProcessor : public NodeProcessor {
  public:
-  FusedBatchNormGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                              bool is_in_frame)
-      : NodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit FusedBatchNormGradProcessor(const OptimizeContext& opt_cxt)
+      : NodeProcessor(opt_cxt) {}
 
  protected:
   std::vector<int> GetInputPos() const override {
@@ -638,9 +643,8 @@ class FusedBatchNormGradProcessor : public NodeProcessor {
 
 class MaxPoolGradProcessor : public NodeProcessor {
  public:
-  MaxPoolGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                       bool is_in_frame)
-      : NodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit MaxPoolGradProcessor(const OptimizeContext& opt_cxt)
+      : NodeProcessor(opt_cxt) {}
 
  protected:
   std::vector<int> GetInputPos() const override {
@@ -651,9 +655,8 @@ class MaxPoolGradProcessor : public NodeProcessor {
 
 class AgnosticNodeProcessor : public NodeProcessor {
  public:
-  AgnosticNodeProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                        bool is_in_frame)
-      : NodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit AgnosticNodeProcessor(const OptimizeContext& opt_cxt)
+      : NodeProcessor(opt_cxt) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -684,9 +687,8 @@ class AgnosticNodeProcessor : public NodeProcessor {
 
 class AddNProcessor : public AgnosticNodeProcessor {
  public:
-  AddNProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                bool is_in_frame)
-      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit AddNProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   std::vector<int> GetInputPos() const override {
@@ -701,9 +703,8 @@ class AddNProcessor : public AgnosticNodeProcessor {
 
 class BinaryOpProcessor : public AgnosticNodeProcessor {
  public:
-  BinaryOpProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                    bool is_in_frame)
-      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {
+  explicit BinaryOpProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {
     is_4d_with_vector_ = Is4DOperateWithVector();
   }
 
@@ -810,9 +811,8 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
 
 class ConcatProcessor : public AgnosticNodeProcessor {
  public:
-  ConcatProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                  bool is_in_frame)
-      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {
+  explicit ConcatProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {
     // For Concat,  the concat axis is the first input; for ConcatV2,
     // the last input.
     axis_node_pos_ =
@@ -881,9 +881,8 @@ class ConcatProcessor : public AgnosticNodeProcessor {
 
 class PadProcessor : public AgnosticNodeProcessor {
  public:
-  PadProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-               bool is_in_frame)
-      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit PadProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -913,9 +912,8 @@ class PadProcessor : public AgnosticNodeProcessor {
 
 class ReluGradProcessor : public AgnosticNodeProcessor {
  public:
-  ReluGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                    bool is_in_frame)
-      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit ReluGradProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   std::vector<int> GetInputPos() const override {
@@ -926,9 +924,8 @@ class ReluGradProcessor : public AgnosticNodeProcessor {
 
 class SliceProcessor : public AgnosticNodeProcessor {
  public:
-  SliceProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                 bool is_in_frame)
-      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit SliceProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   Status CustomizedProcessing() override {
@@ -1029,9 +1026,8 @@ class SliceProcessor : public AgnosticNodeProcessor {
 // before this optimization.
 class SliceProcessorConst : public AgnosticNodeProcessor {
  public:
-  SliceProcessorConst(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                      bool is_in_frame)
-      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit SliceProcessorConst(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   Status CustomizedProcessing() override {
@@ -1047,9 +1043,8 @@ class SliceProcessorConst : public AgnosticNodeProcessor {
 // example use case is in the gradient computation of Concat for InceptionV3.
 class SliceProcessorConcatOffset : public AgnosticNodeProcessor {
  public:
-  SliceProcessorConcatOffset(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                             bool is_in_frame)
-      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit SliceProcessorConcatOffset(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   Status CustomizedProcessing() override {
@@ -1098,9 +1093,8 @@ class SliceProcessorConcatOffset : public AgnosticNodeProcessor {
 
 class SqueezeProcessor : public AgnosticNodeProcessor {
  public:
-  SqueezeProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                   bool is_in_frame)
-      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit SqueezeProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -1148,9 +1142,8 @@ class SqueezeProcessor : public AgnosticNodeProcessor {
 
 class SumProcessor : public AgnosticNodeProcessor {
  public:
-  SumProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-               bool is_in_frame)
-      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
+  explicit SumProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -1268,31 +1261,26 @@ class DataLayoutOptimizer : GraphProcessor {
           ops_format_supported.end()) {
         auto node = graph_->mutable_node(i);
         bool is_in_frame = !frames[node].empty();
+        OptimizeContext opt_cxt(graph_, node, node_map_, is_in_frame);
         std::unique_ptr<NodeProcessor> node_processor;
         if (node->op().compare("AvgPoolGrad") == 0) {
-          node_processor.reset(
-              new AvgPoolGradProcessor(graph_, node, node_map_, is_in_frame));
+          node_processor.reset(new AvgPoolGradProcessor(opt_cxt));
         } else if (node->op().compare("BiasAddGrad") == 0) {
-          node_processor.reset(
-              new BiasAddGradProcessor(graph_, node, node_map_, is_in_frame));
+          node_processor.reset(new BiasAddGradProcessor(opt_cxt));
         } else if (node->op().compare("Conv2D") == 0) {
-          node_processor.reset(new Conv2DProcessor(
-              graph_, node, node_map_, config_.no_gemm, is_in_frame));
+          node_processor.reset(new Conv2DProcessor(opt_cxt, config_.no_gemm));
         } else if (node->op().compare("Conv2DBackpropFilter") == 0) {
-          node_processor.reset(new Conv2DBackpropFilterProcessor(
-              graph_, node, node_map_, config_.no_gemm, is_in_frame));
+          node_processor.reset(
+              new Conv2DBackpropFilterProcessor(opt_cxt, config_.no_gemm));
         } else if (node->op().compare("Conv2DBackpropInput") == 0) {
-          node_processor.reset(new Conv2DBackpropInputProcessor(
-              graph_, node, node_map_, config_.no_gemm, is_in_frame));
+          node_processor.reset(
+              new Conv2DBackpropInputProcessor(opt_cxt, config_.no_gemm));
         } else if (node->op().compare("FusedBatchNormGrad") == 0) {
-          node_processor.reset(new FusedBatchNormGradProcessor(
-              graph_, node, node_map_, is_in_frame));
+          node_processor.reset(new FusedBatchNormGradProcessor(opt_cxt));
         } else if (node->op().compare("MaxPoolGrad") == 0) {
-          node_processor.reset(
-              new MaxPoolGradProcessor(graph_, node, node_map_, is_in_frame));
+          node_processor.reset(new MaxPoolGradProcessor(opt_cxt));
         } else {
-          node_processor.reset(
-              new NodeProcessor(graph_, node, node_map_, is_in_frame));
+          node_processor.reset(new NodeProcessor(opt_cxt));
         }
         TF_RETURN_IF_ERROR(node_processor->ConvertNode());
       }
@@ -1313,49 +1301,39 @@ class DataLayoutOptimizer : GraphProcessor {
             ops_format_agnostic.end()) {
           auto node = graph_->mutable_node(i);
           bool is_in_frame = !frames[node].empty();
+          OptimizeContext opt_cxt(graph_, node, node_map_, is_in_frame);
           std::unique_ptr<NodeProcessor> node_processor;
           if (node->op().compare("AddN") == 0) {
-            node_processor.reset(
-                new AddNProcessor(graph_, node, node_map_, is_in_frame));
+            node_processor.reset(new AddNProcessor(opt_cxt));
           } else if (node->op().compare("Add") == 0 ||
                      node->op().compare("Mul") == 0 ||
                      node->op().compare("RealDiv") == 0 ||
                      node->op().compare("SquaredDifference") == 0 ||
                      node->op().compare("Sub") == 0) {
-            node_processor.reset(
-                new BinaryOpProcessor(graph_, node, node_map_, is_in_frame));
+            node_processor.reset(new BinaryOpProcessor(opt_cxt));
           } else if (node->op().compare("Concat") == 0 ||
                      node->op().compare("ConcatV2") == 0) {
-            node_processor.reset(
-                new ConcatProcessor(graph_, node, node_map_, is_in_frame));
+            node_processor.reset(new ConcatProcessor(opt_cxt));
           } else if (node->op().compare("Pad") == 0) {
-            node_processor.reset(
-                new PadProcessor(graph_, node, node_map_, is_in_frame));
+            node_processor.reset(new PadProcessor(opt_cxt));
           } else if (node->op().compare("ReluGrad") == 0) {
-            node_processor.reset(
-                new ReluGradProcessor(graph_, node, node_map_, is_in_frame));
+            node_processor.reset(new ReluGradProcessor(opt_cxt));
           } else if (node->op().compare("Slice") == 0) {
             auto input1 = node_map_->GetNode(NodeName(node->input(1)));
             auto input2 = node_map_->GetNode(NodeName(node->input(2)));
             if (input1->op() == "ConcatOffset") {
-              node_processor.reset(new SliceProcessorConcatOffset(
-                  graph_, node, node_map_, is_in_frame));
+              node_processor.reset(new SliceProcessorConcatOffset(opt_cxt));
             } else if (input1->op() == "Const" && input2->op() == "Const") {
-              node_processor.reset(new SliceProcessorConst(
-                  graph_, node, node_map_, is_in_frame));
+              node_processor.reset(new SliceProcessorConst(opt_cxt));
             } else {
-              node_processor.reset(
-                  new SliceProcessor(graph_, node, node_map_, is_in_frame));
+              node_processor.reset(new SliceProcessor(opt_cxt));
             }
           } else if (node->op().compare("Squeeze") == 0) {
-            node_processor.reset(
-                new SqueezeProcessor(graph_, node, node_map_, is_in_frame));
+            node_processor.reset(new SqueezeProcessor(opt_cxt));
           } else if (node->op().compare("Sum") == 0) {
-            node_processor.reset(
-                new SumProcessor(graph_, node, node_map_, is_in_frame));
+            node_processor.reset(new SumProcessor(opt_cxt));
           } else {
-            node_processor.reset(new AgnosticNodeProcessor(
-                graph_, node, node_map_, is_in_frame));
+            node_processor.reset(new AgnosticNodeProcessor(opt_cxt));
           }
           TF_RETURN_IF_ERROR(node_processor->ConvertNode());
         }
@@ -1416,6 +1394,7 @@ int GetNumTranspose(const GraphDef& graph) {
   LOG(INFO) << "Number of Transpose nodes: " << number;
   return number;
 }
+}  // namespace
 
 Status LayoutOptimizer::Tune(const GrapplerItem& item,
                              const GraphProperties& graph_properties,
-- 
GitLab


From c59b194370d626d41263dc616ccf5e453078e195 Mon Sep 17 00:00:00 2001
From: Youssef Hesham <youssefheshamhassan@gmail.com>
Date: Sat, 18 Nov 2017 02:28:44 +0200
Subject: [PATCH 0083/1225] typo fixed (#14660)

---
 tensorflow/docs_src/tutorials/image_recognition.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/docs_src/tutorials/image_recognition.md b/tensorflow/docs_src/tutorials/image_recognition.md
index ddb771700a..f74bc3107e 100644
--- a/tensorflow/docs_src/tutorials/image_recognition.md
+++ b/tensorflow/docs_src/tutorials/image_recognition.md
@@ -5,7 +5,7 @@ tell apart a lion and a jaguar, read a sign, or recognize a human's face.
 But these are actually hard problems to solve with a computer: they only
 seem easy because our brains are incredibly good at understanding images.
 
-In the last few years the field of machine learning has made tremendous
+In the last few years, the field of machine learning has made tremendous
 progress on addressing these difficult problems. In particular, we've
 found that a kind of model called a deep
 [convolutional neural network](https://colah.github.io/posts/2014-07-Conv-Nets-Modular/)
-- 
GitLab


From 69620e12bf3403a11a47b26006ebb22656a9f036 Mon Sep 17 00:00:00 2001
From: dmaclach <dmaclach@gmail.com>
Date: Fri, 17 Nov 2017 16:29:14 -0800
Subject: [PATCH 0084/1225] Fix up link to ios.md in docs. (#14640)

---
 tensorflow/contrib/lite/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index 827c5d0baa..0deff7c8f6 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -198,4 +198,4 @@ The [demo app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/c
 Note that you’d need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build).
 
 ### For iOS
-Follow the documentation [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app.
+Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app.
-- 
GitLab


From 3f65709fc058d280ddf25043de0fc171cad1eaa2 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Fri, 17 Nov 2017 16:39:49 -0800
Subject: [PATCH 0085/1225] Change some Quantized op tests to use TEST() macros
 and main()

PiperOrigin-RevId: 176178241
---
 tensorflow/core/kernels/BUILD                 | 10 ----
 .../core/kernels/quantization_utils_test.cc   | 59 +++++++++----------
 .../core/kernels/quantized_add_op_test.cc     | 28 ++++-----
 .../kernels/quantized_instance_norm_test.cc   | 21 ++-----
 .../core/kernels/quantized_mul_op_test.cc     | 28 ++++-----
 .../quantized_resize_bilinear_op_test.cc      | 24 ++++----
 6 files changed, 65 insertions(+), 105 deletions(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 00cf3f90e9..39e8e499cd 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4992,7 +4992,6 @@ tf_cc_test(
         "//tensorflow/core:nn_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//third_party/eigen3",
     ],
@@ -5035,7 +5034,6 @@ tf_cc_binary(
             "//tensorflow/cc:client_session",
             "//tensorflow/core:framework",
             "//tensorflow/core:tensor_testutil",
-            "//tensorflow/core:test_main",
         ],
     }),
 )
@@ -5095,7 +5093,6 @@ cc_binary(
             "//tensorflow/core:tensor_testutil",
             "//tensorflow/core:tensorflow",
             "//tensorflow/core:test",
-            "//tensorflow/core:test_main",
         ],
     }),
 )
@@ -5119,7 +5116,6 @@ tf_cc_test(
         "//tensorflow/core:nn_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
     ],
 )
@@ -5140,7 +5136,6 @@ tf_cc_test(
         "//tensorflow/core:image_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
     ],
 )
@@ -5184,7 +5179,6 @@ cc_binary(
             "//tensorflow/core:image_ops_op_lib",
             "//tensorflow/core:protos_all_cc",
             "//tensorflow/core:test",
-            "//tensorflow/core:test_main",
             "//tensorflow/core:testlib",
         ],
     }),
@@ -5298,7 +5292,6 @@ cc_binary(
             ":quantized_ops",
             "//tensorflow/core:framework",
             "//tensorflow/core:tensor_testutil",
-            "//tensorflow/core:test_main",
             "//tensorflow/core:protos_all_cc",
             "//tensorflow/core:test",
         ],
@@ -5324,7 +5317,6 @@ tf_cc_test(
         "//tensorflow/core:nn_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
     ],
 )
@@ -5436,7 +5428,6 @@ cc_binary(
         "//conditions:default": [
             "//tensorflow/core:framework",
             "//tensorflow/core:tensor_testutil",
-            "//tensorflow/core:test_main",
         ],
     }),
 )
@@ -5457,7 +5448,6 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
     ],
 )
diff --git a/tensorflow/core/kernels/quantization_utils_test.cc b/tensorflow/core/kernels/quantization_utils_test.cc
index eae303b85e..a73581fbbc 100644
--- a/tensorflow/core/kernels/quantization_utils_test.cc
+++ b/tensorflow/core/kernels/quantization_utils_test.cc
@@ -910,42 +910,41 @@ void TestComputeLerp4xAll() {
 
 }  // namespace tensorflow
 
-#if defined(__ANDROID__)
-int main(int argc, char** argv) {
-#define RUN_TEST(t)            \
-  LOG(INFO) << "Test: " << #t; \
-  tensorflow::t();
-#else
 #define RUN_TEST(t) \
   TEST(QuantizationUtilsTest, t) { tensorflow::t(); }
-#endif
 
-  RUN_TEST(TestFloatToQuantized);
-  RUN_TEST(TestQuantizedToFloat);
-  RUN_TEST(TestAvoidBias);
-  RUN_TEST(TestRequantizeInNewRange);
-  RUN_TEST(TestRequantizeInNewRangeRealData);
-  RUN_TEST(TestRequantizeInNewRange32To8Bit);
-  RUN_TEST(TestRequantizeManyInNewRange32To8Bit);
-  RUN_TEST(TestRequantizeManyInNewRange32To8BitUsingEigen);
-  RUN_TEST(TestRequantizeManyInNewRange32To8BitEigenVsNonEigen);
-  RUN_TEST(TestRequantizeManyInNewRange32To8BitSignedEigenVsNonEigen);
-  RUN_TEST(TestFloatTensorToQuantized);
-  RUN_TEST(TestRequantizeManyInNewRange8To32Bit);
-  RUN_TEST(TestFloatToQuantizedInPlaceUsingEigen);
-  RUN_TEST(TestOverflowWithEigen);
-  RUN_TEST(TestQuantizedTensorToFloat);
-  RUN_TEST(TestQuantizedToFloatInPlaceUsingEigen);
+RUN_TEST(TestFloatToQuantized);
+RUN_TEST(TestQuantizedToFloat);
+RUN_TEST(TestAvoidBias);
+RUN_TEST(TestRequantizeInNewRange);
+RUN_TEST(TestRequantizeInNewRangeRealData);
+RUN_TEST(TestRequantizeInNewRange32To8Bit);
+RUN_TEST(TestRequantizeManyInNewRange32To8Bit);
+RUN_TEST(TestRequantizeManyInNewRange32To8BitUsingEigen);
+RUN_TEST(TestRequantizeManyInNewRange32To8BitEigenVsNonEigen);
+RUN_TEST(TestRequantizeManyInNewRange32To8BitSignedEigenVsNonEigen);
+RUN_TEST(TestFloatTensorToQuantized);
+RUN_TEST(TestRequantizeManyInNewRange8To32Bit);
+RUN_TEST(TestFloatToQuantizedInPlaceUsingEigen);
+RUN_TEST(TestOverflowWithEigen);
+RUN_TEST(TestQuantizedTensorToFloat);
+RUN_TEST(TestQuantizedToFloatInPlaceUsingEigen);
 
 #if defined(__ANDROID__)
+
+RUN_TEST(BenchmarkRequantizeManyInNewRange);
+
 #ifdef QUANTIZATION_UTILS_USE_NEON
-  RUN_TEST(TestDivide64x2PowAll);
-  RUN_TEST(TestComputeLerp4xAll);
-#endif
 
-  tensorflow::BenchmarkRequantizeManyInNewRange();
+RUN_TEST(TestDivide64x2PowAll);
+RUN_TEST(TestComputeLerp4xAll);
+
+#endif  // QUANTIZATION_UTILS_USE_NEON
+
+#endif  // __ANDROID__
 
-  LOG(INFO) << "All tests complete.";
-  return 0;
+int main(int argc, char** argv) {
+  // On Linux, add: FLAGS_logtostderr = true;
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
 }
-#endif
diff --git a/tensorflow/core/kernels/quantized_add_op_test.cc b/tensorflow/core/kernels/quantized_add_op_test.cc
index 74d16b282d..90bd145ad0 100644
--- a/tensorflow/core/kernels/quantized_add_op_test.cc
+++ b/tensorflow/core/kernels/quantized_add_op_test.cc
@@ -276,10 +276,10 @@ void BenchmarkVectorPlusTensor() {
   TimeAdd({100000, 100}, {100}, 1);
 }
 
-#if !defined(__ANDROID__)
+}  // end namespace tensorflow
 
 #define RUN_TEST(t) \
-  TEST(QuantizedAddOpTest, t) { t(); }
+  TEST(QuantizedAddOpTest, t) { tensorflow::t(); }
 
 RUN_TEST(TestManualScalar);
 RUN_TEST(TestManualVector);
@@ -288,24 +288,16 @@ RUN_TEST(TestScalar);
 RUN_TEST(TestVector);
 RUN_TEST(TestVectorPlusTensor);
 
-#undef RUN_TEST
+#if defined(__ANDROID__)
 
-#endif  // __ANDROID__
+RUN_TEST(BenchmarkTensorScalar);
+RUN_TEST(BenchmarkVector);
+RUN_TEST(BenchmarkVectorPlusTensor);
 
-}  // end namespace tensorflow
+#endif  // __ANDROID__
 
-#if defined(__ANDROID__)
 int main(int argc, char** argv) {
-  LOG(INFO) << "TestManualScalar:";
-  tensorflow::TestManualScalar();
-  LOG(INFO) << "TestManualVector:";
-  tensorflow::TestManualVector();
-  LOG(INFO) << "TestManualVectorPlusTensor:";
-  tensorflow::TestManualVectorPlusTensor();
-  tensorflow::BenchmarkTensorScalar();
-  tensorflow::BenchmarkVector();
-  tensorflow::BenchmarkVectorPlusTensor();
-  LOG(INFO) << "All tests complete";
-  return 0;
+  // On Linux, add: FLAGS_logtostderr = true;
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
 }
-#endif  // __ANDROID__
diff --git a/tensorflow/core/kernels/quantized_instance_norm_test.cc b/tensorflow/core/kernels/quantized_instance_norm_test.cc
index 29d8dbc0df..d2b15ee20b 100644
--- a/tensorflow/core/kernels/quantized_instance_norm_test.cc
+++ b/tensorflow/core/kernels/quantized_instance_norm_test.cc
@@ -173,10 +173,10 @@ void TestClamp() {
   Expect(input_tensor, -10.0f, 10.0f, true, 0.0f, 1.0f);
 }
 
-#if !defined(__ANDROID__)
+}  // end namespace tensorflow
 
 #define RUN_TEST(t) \
-  TEST(QuantizedInstanceNormTest, t) { t(); }
+  TEST(QuantizedAddOpTest, t) { tensorflow::t(); }
 
 RUN_TEST(TestBasic);
 RUN_TEST(TestZeroInput);
@@ -184,19 +184,8 @@ RUN_TEST(TestMaxInput);
 RUN_TEST(TestOutputRangeGiven);
 RUN_TEST(TestClamp);
 
-#undef RUN_TEST
-
-#endif  // __ANDROID__
-
-}  // end namespace tensorflow
-
-#if defined(__ANDROID__)
 int main(int argc, char** argv) {
-  tensorflow::TestBasic();
-  tensorflow::TestZeroInput();
-  tensorflow::TestMaxInput();
-  tensorflow::TestOutputRangeGiven();
-  tensorflow::TestClamp();
-  return 0;
+  // On Linux, add: FLAGS_logtostderr = true;
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
 }
-#endif  // __ANDROID__
diff --git a/tensorflow/core/kernels/quantized_mul_op_test.cc b/tensorflow/core/kernels/quantized_mul_op_test.cc
index 45d6c51444..5f858eb8ce 100644
--- a/tensorflow/core/kernels/quantized_mul_op_test.cc
+++ b/tensorflow/core/kernels/quantized_mul_op_test.cc
@@ -276,10 +276,10 @@ void BenchmarkVectorTimesTensor() {
   TimeMul({100000, 100}, {100}, 100);
 }
 
-#if !defined(__ANDROID__)
+}  // end namespace tensorflow
 
 #define RUN_TEST(t) \
-  TEST(QuantizedMulOpTest, t) { t(); }
+  TEST(QuantizedAddOpTest, t) { tensorflow::t(); }
 
 RUN_TEST(TestManualScalar);
 RUN_TEST(TestManualVector);
@@ -288,24 +288,16 @@ RUN_TEST(TestScalar);
 RUN_TEST(TestVector);
 RUN_TEST(TestVectorTimesTensor);
 
-#undef RUN_TEST
+#if defined(__ANDROID__)
 
-#endif  // __ANDROID__
+RUN_TEST(BenchmarkTensorScalar);
+RUN_TEST(BenchmarkVector);
+RUN_TEST(BenchmarkVectorTimesTensor);
 
-}  // end namespace tensorflow
+#endif  // __ANDROID__
 
-#if defined(__ANDROID__)
 int main(int argc, char** argv) {
-  LOG(INFO) << "TestManualScalar:";
-  tensorflow::TestManualScalar();
-  LOG(INFO) << "TestManualVector:";
-  tensorflow::TestManualVector();
-  LOG(INFO) << "TestManualVectorTimesTensor:";
-  tensorflow::TestManualVectorTimesTensor();
-  tensorflow::BenchmarkTensorScalar();
-  tensorflow::BenchmarkVector();
-  tensorflow::BenchmarkVectorTimesTensor();
-  LOG(INFO) << "All tests complete";
-  return 0;
+  // On Linux, add: FLAGS_logtostderr = true;
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
 }
-#endif  // __ANDROID__
diff --git a/tensorflow/core/kernels/quantized_resize_bilinear_op_test.cc b/tensorflow/core/kernels/quantized_resize_bilinear_op_test.cc
index 8d3d7105a4..e6133415d0 100644
--- a/tensorflow/core/kernels/quantized_resize_bilinear_op_test.cc
+++ b/tensorflow/core/kernels/quantized_resize_bilinear_op_test.cc
@@ -373,22 +373,20 @@ void RunBenchmarkResizeBilinearTwoDims() {
 
 }  // namespace tensorflow
 
-#if defined(__ANDROID__)
-int main(int argc, char** argv) {
-#define RUN_TEST(t)            \
-  LOG(INFO) << "Test: " << #t; \
-  tensorflow::t();
-#else
 #define RUN_TEST(t) \
   TEST(QuantizationResizeBilenarTest, t) { tensorflow::t(); }
-#endif
 
-  RUN_TEST(TestResizeBilinearOneDim);
-  RUN_TEST(TestResizeBilinearTwoDims);
+RUN_TEST(TestResizeBilinearOneDim);
+RUN_TEST(TestResizeBilinearTwoDims);
 
 #if defined(__ANDROID__)
-  RUN_TEST(RunBenchmarkResizeBilinearTwoDims);
-  LOG(INFO) << "All tests complete.";
-  return 0;
+
+RUN_TEST(RunBenchmarkResizeBilinearTwoDims);
+
+#endif  // __ANDROID__
+
+int main(int argc, char** argv) {
+  // On Linux, add: FLAGS_logtostderr = true;
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
 }
-#endif
-- 
GitLab


From ee9b4eb5ee09bc505a51b1ba9325823444ac1dd2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 16:43:53 -0800
Subject: [PATCH 0086/1225] Rename
 streaming_precision_recall_at_equal_thresholds to
 precision_recall_at_equal_thresholds.

Add the metric to the tf.contrib.metrics namespace.

Re-order its parameters to be more in line with that of other metrics.

PiperOrigin-RevId: 176178651
---
 tensorflow/contrib/metrics/__init__.py        |  2 ++
 .../contrib/metrics/python/ops/metric_ops.py  | 25 ++++++++++---------
 .../metrics/python/ops/metric_ops_test.py     | 23 ++++++++---------
 3 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py
index 8eed45c4b3..27dad5379a 100644
--- a/tensorflow/contrib/metrics/__init__.py
+++ b/tensorflow/contrib/metrics/__init__.py
@@ -67,6 +67,7 @@ See the @{$python/contrib.metrics} guide.
 @@set_size
 @@set_union
 @@count
+@@precision_recall_at_equal_thresholds
 @@recall_at_precision
 
 """
@@ -82,6 +83,7 @@ from tensorflow.contrib.metrics.python.ops.histogram_ops import auc_using_histog
 from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metric_map
 from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metrics
 from tensorflow.contrib.metrics.python.ops.metric_ops import count
+from tensorflow.contrib.metrics.python.ops.metric_ops import precision_recall_at_equal_thresholds
 from tensorflow.contrib.metrics.python.ops.metric_ops import recall_at_precision
 from tensorflow.contrib.metrics.python.ops.metric_ops import sparse_recall_at_top_k
 from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_accuracy
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index 6e2190cb7a..6b08b749f8 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -927,7 +927,7 @@ def streaming_curve_points(labels=None,
       tuple.
 
   TODO(chizeng): Consider rewriting this method to make use of logic within the
-  streaming_precision_recall_at_equal_thresholds method (to improve run time).
+  precision_recall_at_equal_thresholds method (to improve run time).
   """
   with variable_scope.variable_scope(name, 'curve_points',
                                      (labels, predictions, weights)):
@@ -1196,12 +1196,12 @@ def streaming_dynamic_auc(labels,
       return auc, update_op
 
 
-def streaming_precision_recall_at_equal_thresholds(predictions,
-                                                   labels,
-                                                   num_thresholds=None,
-                                                   weights=None,
-                                                   name=None,
-                                                   use_locking=None):
+def precision_recall_at_equal_thresholds(labels,
+                                         predictions,
+                                         weights=None,
+                                         num_thresholds=None,
+                                         use_locking=None,
+                                         name=None):
   """A helper method for creating metrics related to precision-recall curves.
 
   These values are true positives, false negatives, true negatives, false
@@ -1222,20 +1222,20 @@ def streaming_precision_recall_at_equal_thresholds(predictions,
   reweight certain values, or more commonly used for masking values.
 
   Args:
+    labels: A bool `Tensor` whose shape matches `predictions`.
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
-    labels: A bool `Tensor` whose shape matches `predictions`.
+    weights: Optional; If provided, a `Tensor` that has the same dtype as,
+      and broadcastable to, `predictions`. This tensor is multplied by counts.
     num_thresholds: Optional; Number of thresholds, evenly distributed in
       `[0, 1]`. Should be `>= 2`. Defaults to 201. Note that the number of bins
       is 1 less than `num_thresholds`. Using an even `num_thresholds` value
       instead of an odd one may yield unfriendly edges for bins.
-    weights: Optional; If provided, a `Tensor` that has the same dtype as,
-      and broadcastable to, `predictions`. This tensor is multplied by counts.
-    name: Optional; variable_scope name. If not provided, the string
-      'precision_recall_at_equal_threshold' is used.
     use_locking: Optional; If True, the op will be protected by a lock.
       Otherwise, the behavior is undefined, but may exhibit less contention.
       Defaults to True.
+    name: Optional; variable_scope name. If not provided, the string
+      'precision_recall_at_equal_threshold' is used.
 
   Returns:
     result: A named tuple (See PrecisionRecallData within the implementation of
@@ -3301,6 +3301,7 @@ __all__ = [
     'aggregate_metric_map',
     'aggregate_metrics',
     'count',
+    'precision_recall_at_equal_thresholds',
     'recall_at_precision',
     'sparse_recall_at_top_k',
     'streaming_accuracy',
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
index 5d0463e1f7..7db06609de 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
@@ -2218,11 +2218,11 @@ class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase):
       if weights:
         weights_tensor = constant_op.constant(weights, dtype=dtypes_lib.float32)
       gotten_result, update_op = (
-          metric_ops.streaming_precision_recall_at_equal_thresholds(
-              predictions=predictions_tensor,
+          metric_ops.precision_recall_at_equal_thresholds(
               labels=labels_tensor,
-              num_thresholds=3,
-              weights=weights_tensor))
+              predictions=predictions_tensor,
+              weights=weights_tensor,
+              num_thresholds=3))
 
       sess.run(variables.local_variables_initializer())
       sess.run(update_op)
@@ -2230,17 +2230,17 @@ class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase):
       self._testResultsEqual(expected_result, gotten_result)
 
   def testVars(self):
-    metric_ops.streaming_precision_recall_at_equal_thresholds(
-        predictions=constant_op.constant([0.42], dtype=dtypes_lib.float32),
-        labels=constant_op.constant([True], dtype=dtypes_lib.bool))
+    metric_ops.precision_recall_at_equal_thresholds(
+        labels=constant_op.constant([True], dtype=dtypes_lib.bool),
+        predictions=constant_op.constant([0.42], dtype=dtypes_lib.float32))
     _assert_metric_variables(
         self, ('precision_recall_at_equal_thresholds/variables/tp_buckets:0',
                'precision_recall_at_equal_thresholds/variables/fp_buckets:0'))
 
   def testVarsWithName(self):
-    metric_ops.streaming_precision_recall_at_equal_thresholds(
-        predictions=constant_op.constant([0.42], dtype=dtypes_lib.float32),
+    metric_ops.precision_recall_at_equal_thresholds(
         labels=constant_op.constant([True], dtype=dtypes_lib.bool),
+        predictions=constant_op.constant([0.42], dtype=dtypes_lib.float32),
         name='foo')
     _assert_metric_variables(
         self, ('foo/variables/tp_buckets:0', 'foo/variables/fp_buckets:0'))
@@ -2251,9 +2251,8 @@ class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase):
     labels = constant_op.constant(
         np.random.uniform(size=(10, 3)) > 0.5, dtype=dtypes_lib.bool)
 
-    result, update_op = (
-        metric_ops.streaming_precision_recall_at_equal_thresholds(
-            predictions=predictions, labels=labels))
+    result, update_op = metric_ops.precision_recall_at_equal_thresholds(
+        labels=labels, predictions=predictions)
 
     with self.test_session() as sess:
       # Run several updates.
-- 
GitLab


From f386693307fca2ef09ef7618d214f148c2944ff2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 16:53:39 -0800
Subject: [PATCH 0087/1225] Avoid a dependency between the interpreter and
 tensorflow

PiperOrigin-RevId: 176179748
---
 tensorflow/contrib/lite/interpreter.h | 2 --
 tensorflow/contrib/lite/string.h      | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h
index 8bf60e91f7..65c61e44be 100644
--- a/tensorflow/contrib/lite/interpreter.h
+++ b/tensorflow/contrib/lite/interpreter.h
@@ -24,7 +24,6 @@ limitations under the License.
 #include "tensorflow/contrib/lite/context.h"
 #include "tensorflow/contrib/lite/error_reporter.h"
 #include "tensorflow/contrib/lite/simple_memory_arena.h"
-#include "tensorflow/core/platform/platform.h"
 
 namespace tflite {
 
@@ -232,7 +231,6 @@ class Interpreter {
   // If you know that your sizes are not changing, you need not call this.
 
   // Returns status of success or failure.
-  // TODO(aselle): Madde
   TfLiteStatus AllocateTensors();
 
   // Invoke the interpreter (run the whole graph in dependency order).
diff --git a/tensorflow/contrib/lite/string.h b/tensorflow/contrib/lite/string.h
index ecd6f04ec2..7f8f4e851e 100644
--- a/tensorflow/contrib/lite/string.h
+++ b/tensorflow/contrib/lite/string.h
@@ -17,11 +17,10 @@ limitations under the License.
 #define _THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_STRING_H_
 
 #include <string>
-#include "tensorflow/core/platform/platform.h"
 
 namespace tflite {
 
-#ifndef PLATFORM_GOOGLE
+#ifndef HAS_GLOBAL_STRING
 using std::string;
 #endif
 
-- 
GitLab


From ee26ea9ffde6773863badc00001342a368fae39a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 16:56:15 -0800
Subject: [PATCH 0088/1225] Fix use-after-free error in summary_db_writer.cc

BindBlobUnsafe() requires that its second argument's lifetime
last at least until the next call to Reset() or Close().
So that can't be used with a local variable in BindProto().
This CL fixes this by using BindBlob() rather than BindBlobUnsafe().

PiperOrigin-RevId: 176180007
---
 tensorflow/contrib/tensorboard/db/summary_db_writer.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
index 857e731ef2..18f0f1e97a 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
@@ -67,7 +67,7 @@ Status BindProto(SqliteStatement* stmt, int parameter,
   TF_RETURN_IF_ERROR(Serialize(proto, &serialized));
   string compressed;
   TF_RETURN_IF_ERROR(Compress(serialized, &compressed));
-  stmt->BindBlobUnsafe(parameter, compressed);
+  stmt->BindBlob(parameter, compressed);
   return Status::OK();
 }
 
-- 
GitLab


From f4c18a0eb05e21bae397c9c16527ff8080cae6b8 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Fri, 17 Nov 2017 17:00:30 -0800
Subject: [PATCH 0089/1225] Call Graph._add_op in Operation.__init__ (and
 remove existing calls).

Without this change, ops manually constructed via Operation.__init__
must be passed to Graph._add_op to keep the graph in a consistent
state. Failure to do so is particularly disasterous with the C API
enabled, as more Operation methods rely on Graph._nodes_by_name, which
is updated in Graph._add_op (e.g. Operation.inputs will fail if the
inputs have not been added to the graph).

An alternative to this change is to require that all
Operation.__init__ callers also call Graph._add_op (we don't currently
do this in ops_test.py, although I imagine all non-test callers
do). While this is effectively the current contract, it forces callers
of Operation.__init__, which is a public API, to use _add_op, which is
private.

One downside of this change is that it will break existing
Graph._add_op calls, since the op will already have been
added. However, _add_op is a private API.

PiperOrigin-RevId: 176180386
---
 .../contrib/copy_graph/python/util/copy_elements.py  |  3 +--
 tensorflow/contrib/graph_editor/transform.py         |  3 ---
 tensorflow/python/framework/ops.py                   |  3 ++-
 tensorflow/python/framework/ops_test.py              |  2 ++
 tensorflow/python/framework/test_util_test.py        | 12 +++++++-----
 5 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/copy_graph/python/util/copy_elements.py b/tensorflow/contrib/copy_graph/python/util/copy_elements.py
index 8c2528f548..d060eda0a7 100644
--- a/tensorflow/contrib/copy_graph/python/util/copy_elements.py
+++ b/tensorflow/contrib/copy_graph/python/util/copy_elements.py
@@ -19,7 +19,7 @@ from one graph to another. The copied elements are initialized inside a
 user-specified scope in the other graph. There are separate functions to
 copy ops and variables.
 There is also a function to retrive the copied version of an op from the
-first graph inside a scope in the second graph. 
+first graph inside a scope in the second graph.
 
 @@copy_op_to_graph
 @@copy_variable_to_graph
@@ -225,7 +225,6 @@ def copy_op_to_graph(org_instance, to_graph, variables,
                            new_original_op,
                            op_def)
     #Use Graph's hidden methods to add the op
-    to_graph._add_op(new_op)
     to_graph._record_op_seen_by_control_dependencies(new_op)
     for device_function in reversed(to_graph._device_function_stack):
       new_op._set_device(device_function(new_op))
diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py
index 14ac529665..2a97a79070 100644
--- a/tensorflow/contrib/graph_editor/transform.py
+++ b/tensorflow/contrib/graph_editor/transform.py
@@ -173,9 +173,6 @@ def copy_op_handler(info, op, copy_shape=True):
   if op._original_op:
     op_._original_op = op._original_op
 
-  # Add op to the graph
-  info.graph_._add_op(op_)
-
   return op_, op_.outputs
 
 
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 7cca260d73..5e57245b7d 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -1592,6 +1592,8 @@ class Operation(object):
     self._id_value = self._graph._next_id()  # pylint: disable=protected-access
     self._recompute_node_def()
 
+    self._graph._add_op(self)  # pylint: disable=protected-access
+
   def _reconstruct_sequence_inputs(self, op_def, inputs, attrs):
     """Regroups a flat list of input tensors into scalar and sequence inputs.
 
@@ -3072,7 +3074,6 @@ class Graph(object):
         op_def=op_def)
     if compute_shapes:
       set_shapes_for_outputs(ret)
-    self._add_op(ret)
     self._record_op_seen_by_control_dependencies(ret)
 
     if compute_device:
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 1be306ddc5..7db5d133ed 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -237,6 +237,7 @@ class OperationTest(test_util.TensorFlowTestCase):
         ops._NodeDef("RefOutputFloatOutput", "op1"), g, [],
         [dtypes.float32_ref, dtypes.float32])
     self.assertProtoEquals("op:'RefOutputFloatOutput' name:'op1'", op1.node_def)
+    self.assertEquals([], list(op1.inputs))
     ref_t, nonref_t = op1.values()
     # NOTE(mrry): Must specify input_types to preserve ref-typed input.
     op2 = ops.Operation(
@@ -246,6 +247,7 @@ class OperationTest(test_util.TensorFlowTestCase):
     self.assertProtoEquals(
         "op:'RefInputFloatInput' name:'op2' input:'op1' input:'op1:1'",
         op2.node_def)
+    self.assertEquals([ref_t, nonref_t], list(op2.inputs))
     op3 = ops.Operation(
         ops._NodeDef("TwoFloatInputs", "op3"), g, [ref_t, nonref_t], [])
     self.assertProtoEquals(
diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py
index 1c5db94500..3ea28e6334 100644
--- a/tensorflow/python/framework/test_util_test.py
+++ b/tensorflow/python/framework/test_util_test.py
@@ -183,11 +183,13 @@ class TestUtilTest(test_util.TensorFlowTestCase):
 
   def _WeMustGoDeeper(self, msg):
     with self.assertRaisesOpError(msg):
-      node_def = ops._NodeDef("op_type", "name")
-      node_def_orig = ops._NodeDef("op_type_orig", "orig")
-      op_orig = ops.Operation(node_def_orig, ops.get_default_graph())
-      op = ops.Operation(node_def, ops.get_default_graph(), original_op=op_orig)
-      raise errors.UnauthenticatedError(node_def, op, "true_err")
+      with ops.Graph().as_default():
+        node_def = ops._NodeDef("op_type", "name")
+        node_def_orig = ops._NodeDef("op_type_orig", "orig")
+        op_orig = ops.Operation(node_def_orig, ops.get_default_graph())
+        op = ops.Operation(node_def, ops.get_default_graph(),
+                           original_op=op_orig)
+        raise errors.UnauthenticatedError(node_def, op, "true_err")
 
   def testAssertRaisesOpErrorDoesNotPassMessageDueToLeakedStack(self):
     with self.assertRaises(AssertionError):
-- 
GitLab


From 0e0cee30e74ee374104dcd15b787dac89dd9ed5f Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 17 Nov 2017 17:27:17 -0800
Subject: [PATCH 0090/1225] Add the missing use_gpu=True to make the GPU test
 take effect.

PiperOrigin-RevId: 176183039
---
 tensorflow/python/kernel_tests/conv1d_test.py      |  2 +-
 tensorflow/python/kernel_tests/pool_test.py        | 14 +++++++-------
 tensorflow/python/kernel_tests/pooling_ops_test.py |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index 662c94eea7..b67a4e3f89 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -37,7 +37,7 @@ class Conv1DTest(test.TestCase):
     filters = array_ops.expand_dims(filters, 2)  # out_channels
     # Filters is 2x1x1
     for stride in [1, 2]:
-      with self.test_session():
+      with self.test_session(use_gpu=test.is_gpu_available()):
         c = nn_ops.conv1d(x, filters, stride, padding="VALID")
         reduced = array_ops.squeeze(c)
         output = reduced.eval()
diff --git a/tensorflow/python/kernel_tests/pool_test.py b/tensorflow/python/kernel_tests/pool_test.py
index 563815b7d8..6384897633 100644
--- a/tensorflow/python/kernel_tests/pool_test.py
+++ b/tensorflow/python/kernel_tests/pool_test.py
@@ -154,7 +154,7 @@ class PoolingTest(test.TestCase):
     self.assertAllClose(y1, y2.eval(), rtol=1e-2, atol=1e-2)
 
   def testPoolSimple(self):
-    with self.test_session():
+    with self.test_session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["MAX", "AVG"]:
           self._test(
@@ -166,7 +166,7 @@ class PoolingTest(test.TestCase):
               strides=[1, 2])
 
   def testPool1D(self):
-    with self.test_session():
+    with self.test_session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["MAX", "AVG"]:
           for input_shape in [[2, 9, 2], [2, 10, 2]]:
@@ -192,7 +192,7 @@ class PoolingTest(test.TestCase):
                     strides=strides)
 
   def testPool2D(self):
-    with self.test_session():
+    with self.test_session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["MAX", "AVG"]:
           for input_shape in [[2, 9, 10, 2], [2, 10, 9, 2]]:
@@ -218,7 +218,7 @@ class PoolingTest(test.TestCase):
                     strides=strides)
 
   def testPool3D(self):
-    with self.test_session():
+    with self.test_session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["MAX", "AVG"]:
           for input_shape in [[2, 9, 10, 11, 2], [2, 10, 9, 11, 2]]:
@@ -302,7 +302,7 @@ class PoolingTest(test.TestCase):
     self.assertLess(err, err_tolerance)
 
   def testGradient1D(self):
-    with self.test_session():
+    with self.test_session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["AVG", "MAX"]:
           for input_shape in [[2, 5, 2], [1, 4, 1]]:
@@ -328,7 +328,7 @@ class PoolingTest(test.TestCase):
                     strides=strides)
 
   def testGradient2D(self):
-    with self.test_session():
+    with self.test_session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["AVG", "MAX"]:
           for input_shape in [[2, 4, 5, 2], [1, 5, 4, 1]]:
@@ -354,7 +354,7 @@ class PoolingTest(test.TestCase):
                     strides=strides)
 
   def testGradient3D(self):
-    with self.test_session():
+    with self.test_session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["AVG", "MAX"]:
           for input_shape in [[1, 3, 5, 4, 1], [1, 5, 4, 3, 1]]:
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index 30c777d12f..c97ad864ee 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -1765,7 +1765,7 @@ class PoolingTest(test.TestCase):
             padding="SAME")
 
   def testOpEdgeCases(self):
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=test.is_gpu_available()) as sess:
       pool_funcs = [nn_ops.max_pool, nn_ops.avg_pool]
       if test.is_gpu_available():
         pool_funcs.append(nn_ops.max_pool_with_argmax)
-- 
GitLab


From c980a04552c9a12a9a403aa0bcf97a6d1a755e12 Mon Sep 17 00:00:00 2001
From: Chris Ying <chrisying@google.com>
Date: Fri, 17 Nov 2017 18:08:01 -0800
Subject: [PATCH 0091/1225] make deep copy of axis parameter for batch norm

PiperOrigin-RevId: 176187824
---
 tensorflow/python/layers/normalization.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index a9d59b25a3..9d9b2b3941 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -141,7 +141,10 @@ class BatchNormalization(base.Layer):
                **kwargs):
     super(BatchNormalization, self).__init__(
         name=name, trainable=trainable, **kwargs)
-    self.axis = axis
+    if isinstance(axis, list):
+      self.axis = axis[:]
+    else:
+      self.axis = axis
     self.momentum = momentum
     self.epsilon = epsilon
     self.center = center
-- 
GitLab


From bfcc2325117b82bba55994796a0c5f8fa6ea334c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 18:14:39 -0800
Subject: [PATCH 0092/1225] Update g3doc for ASR LM model.

PiperOrigin-RevId: 176188673
---
 tensorflow/contrib/lite/models/testdata/g3doc/README.md  | 7 ++++---
 tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/README.md b/tensorflow/contrib/lite/models/testdata/g3doc/README.md
index da4802b07d..83760e420f 100644
--- a/tensorflow/contrib/lite/models/testdata/g3doc/README.md
+++ b/tensorflow/contrib/lite/models/testdata/g3doc/README.md
@@ -67,9 +67,10 @@ The language model for automatic speech recognition is the neural network model
 for predicting the probability of a word given previous words in a sentence.
 It generates posterior probabilities of the next word based from a sequence of
 words. The words are encoded as indices in a fixed size dictionary.
-The model has two inputs both of size one (integer), an output size of one
-(float). It consits of three embedding layer, three LSTM layers, followed by a
-multiplication, a fully connected layers and an addition.
+The model has two inputs both of size one (integer): the current word index and
+next word index, an output size of one (float): the log probability. It consits
+of three embedding layer, three LSTM layers, followed by a multiplication, a
+fully connected layers and an addition.
 The corresponding parameters as shown in the figure.
 
 ![asr_lm_model](asr_lm.svg "ASR LM model")
diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg b/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg
index 84d5f95b6a..2662f77269 100644
--- a/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg
+++ b/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg
@@ -1,4 +1,4 @@
 <?xml version="1.0" standalone="yes"?>
 
-<svg version="1.1" viewBox="0.0 0.0 742.6010498687664 753.6010498687664" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><clipPath id="p.0"><path d="m0 0l742.6011 0l0 753.6011l-742.6011 0l0 -753.6011z" clip-rule="nonzero"></path></clipPath><g clip-path="url(#p.0)"><path fill="#000000" fill-opacity="0.0" d="m0 0l742.6011 0l0 753.6011l-742.6011 0z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m136.0 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m136.0 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path fill="#000000" d="m153.6274 57.620842l0 -13.59375l1.8125 0l0 13.59375l-1.8125 0zm4.667679 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375717 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313217 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578842 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm7.355179 1.5l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.918396 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2541962 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.07463 -2.125l-8.968735 0l0 -1.5625l8.968735 0l0 1.5625zm0 4.125l-8.968735 0l0 -1.546875l8.968735 0l0 1.546875zm13.125153 3.875l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.641327 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 190.02362l232.18896 0l0 42.110245l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 190.02362l232.18896 0l0 42.110245l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m151.01154 216.94362l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844467 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.880356 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm21.212677 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.918396 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672592 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860092 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.95384216 1.609375l3.5937347 -5.125l-3.3281097 -4.734375l2.09375 0l1.5156097 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.5937347 3.890625l-2.015625 0zm16.26561 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.750732 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm10.078857 8.40625l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m129.09448 653.0184l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m129.09448 653.0184l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m150.8024 673.31335q0 -3.390625 1.8125 -5.296875q1.828125 -1.921875 4.703125 -1.921875q1.875 0 3.390625 0.90625q1.515625 0.890625 2.296875 2.5q0.796875 1.609375 0.796875 3.65625q0 2.0625 -0.84375 3.703125q-0.828125 1.625 -2.359375 2.46875q-1.53125 0.84375 -3.296875 0.84375q-1.921875 0 -3.4375 -0.921875q-1.5 -0.9375 -2.28125 -2.53125q-0.78125 -1.609375 -0.78125 -3.40625zm1.859375 0.03125q0 2.453125 1.3125 3.875q1.328125 1.40625 3.3125 1.40625q2.03125 0 3.34375 -1.421875q1.3125 -1.4375 1.3125 -4.0625q0 -1.65625 -0.5625 -2.890625q-0.546875 -1.234375 -1.640625 -1.921875q-1.078125 -0.6875 -2.421875 -0.6875q-1.90625 0 -3.28125 1.3125q-1.375 1.3125 -1.375 4.390625zm19.433304 6.59375l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578842 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5270538 5.28125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313217 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578842 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.897858 5.5q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2541962 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm13.125153 3.875l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.641327 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 232.13387l0 33.606277" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 232.13387l0 27.606277" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 259.74014l1.6517334 4.5381165l1.6517334 -4.5381165z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 307.8476l0 34.173218" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 307.8476l0 28.173248" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 336.02084l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.00787 72.81108l0.09448242 25.732285" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.00787 72.81108l0.07246399 19.732315" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.4286 92.54946l1.668396 4.5320053l1.6350555 -4.544136z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 384.13385l0 36.283478" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 384.13385l0 30.283478" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 414.41733l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 265.73737l232.18896 0l0 42.11023l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 265.73737l232.18896 0l0 42.11023l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m151.01154 292.65735l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844467 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.880356 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm23.697052 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672592 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860092 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.95384216 1.609375l3.5937347 -5.125l-3.3281097 -4.734375l2.09375 0l1.5156097 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.5937347 3.890625l-2.015625 0zm9.98436 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141357 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.1569824 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 342.02362l232.18896 0l0 42.11023l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 342.02362l232.18896 0l0 42.11023l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m151.01154 368.94363l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844467 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.880356 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm14.931427 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.199646 7.59375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672592 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860092 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.95384216 1.609375l3.5937347 -5.125l-3.3281097 -4.734375l2.09375 0l1.5156097 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.5937347 3.890625l-2.015625 0zm9.98436 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141357 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.1569824 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 618.4042l0 34.614197" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 618.4042l0 28.614197" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 647.0184l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 98.54593l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 98.54593l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m143.32318 125.46593l0 -13.59375l9.84375 0l0 1.59375l-8.046875 0l0 4.171875l7.53125 0l0 1.59375l-7.53125 0l0 4.625l8.359375 0l0 1.609375l-10.15625 0zm12.193573 0l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm17.087677 0l-1.546875 0l0 -13.59375l1.65625 0l0 4.84375q1.0625 -1.328125 2.703125 -1.328125q0.90625 0 1.71875 0.375q0.8125 0.359375 1.328125 1.03125q0.53125 0.65625 0.828125 1.59375q0.296875 0.9375 0.296875 2.0q0 2.53125 -1.25 3.921875q-1.25 1.375 -3.0 1.375q-1.75 0 -2.734375 -1.453125l0 1.234375zm-0.015625 -5.0q0 1.765625 0.46875 2.5625q0.796875 1.28125 2.140625 1.28125q1.09375 0 1.890625 -0.9375q0.796875 -0.953125 0.796875 -2.84375q0 -1.921875 -0.765625 -2.84375q-0.765625 -0.921875 -1.84375 -0.921875q-1.09375 0 -1.890625 0.953125q-0.796875 0.953125 -0.796875 2.75zm15.594467 1.828125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500717 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656967 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281967 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129196 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.078842 0.8125l1.609375 0.25q0.109375 0.75 0.578125 1.09375q0.609375 0.453125 1.6875 0.453125q1.171875 0 1.796875 -0.46875q0.625 -0.453125 0.859375 -1.28125q0.125 -0.515625 0.109375 -2.15625q-1.09375 1.296875 -2.71875 1.296875q-2.03125 0 -3.15625 -1.46875q-1.109375 -1.46875 -1.109375 -3.515625q0 -1.40625 0.515625 -2.59375q0.515625 -1.203125 1.484375 -1.84375q0.96875 -0.65625 2.265625 -0.65625q1.75 0 2.875 1.40625l0 -1.1875l1.546875 0l0 8.515625q0 2.3125 -0.46875 3.265625q-0.46875 0.96875 -1.484375 1.515625q-1.015625 0.5625 -2.5 0.5625q-1.765625 0 -2.859375 -0.796875q-1.078125 -0.796875 -1.03125 -2.390625zm1.375 -5.921875q0 1.953125 0.765625 2.84375q0.78125 0.890625 1.9375 0.890625q1.140625 0 1.921875 -0.890625q0.78125 -0.890625 0.78125 -2.78125q0 -1.8125 -0.8125 -2.71875q-0.796875 -0.921875 -1.921875 -0.921875q-1.109375 0 -1.890625 0.90625q-0.78125 0.890625 -0.78125 2.671875zm14.449646 5.109375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.5510712 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm8.656967 0q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.297607 4.921875l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0zm15.765625 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.922577 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625z" fill-rule="nonzero"></path><path fill="#000000" d="m176.34024 151.46593q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm8.853302 -4.0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.750717 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm7.875717 4.40625l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm18.640625 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm8.531967 0.8125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm10.625717 0.453125l1.59375 -0.15625q0.203125 1.140625 0.78125 1.65625q0.578125 0.5 1.484375 0.5q0.765625 0 1.34375 -0.34375q0.578125 -0.359375 0.953125 -0.953125q0.375 -0.59375 0.625 -1.59375q0.25 -1.0 0.25 -2.03125q0 -0.109375 -0.015625 -0.34375q-0.5 0.796875 -1.375 1.296875q-0.859375 0.5 -1.875 0.5q-1.6875 0 -2.859375 -1.21875q-1.171875 -1.234375 -1.171875 -3.234375q0 -2.078125 1.21875 -3.328125q1.234375 -1.265625 3.0625 -1.265625q1.328125 0 2.421875 0.71875q1.109375 0.703125 1.671875 2.03125q0.578125 1.328125 0.578125 3.828125q0 2.609375 -0.578125 4.15625q-0.5625 1.546875 -1.6875 2.359375q-1.109375 0.796875 -2.609375 0.796875q-1.59375 0 -2.609375 -0.890625q-1.0 -0.890625 -1.203125 -2.484375zm6.828125 -6.0q0 -1.4375 -0.765625 -2.28125q-0.765625 -0.859375 -1.84375 -0.859375q-1.109375 0 -1.9375 0.921875q-0.828125 0.90625 -0.828125 2.34375q0 1.3125 0.78125 2.125q0.796875 0.796875 1.9375 0.796875q1.171875 0 1.90625 -0.796875q0.75 -0.8125 0.75 -2.25zm5.860092 1.765625q-1.046875 -0.375 -1.546875 -1.078125q-0.5 -0.71875 -0.5 -1.703125q0 -1.484375 1.0625 -2.484375q1.078125 -1.015625 2.84375 -1.015625q1.78125 0 2.859375 1.03125q1.09375 1.03125 1.09375 2.515625q0 0.953125 -0.5 1.65625q-0.484375 0.703125 -1.5 1.078125q1.25 0.40625 1.90625 1.3125q0.65625 0.90625 0.65625 2.171875q0 1.75 -1.234375 2.9375q-1.234375 1.1875 -3.25 1.1875q-2.015625 0 -3.25 -1.1875q-1.234375 -1.203125 -1.234375 -2.984375q0 -1.328125 0.671875 -2.21875q0.671875 -0.890625 1.921875 -1.21875zm-0.328125 -2.828125q0 0.96875 0.609375 1.578125q0.625 0.609375 1.625 0.609375q0.953125 0 1.5625 -0.609375q0.625 -0.609375 0.625 -1.484375q0 -0.921875 -0.640625 -1.546875q-0.625 -0.625 -1.578125 -0.625q-0.953125 0 -1.578125 0.609375q-0.625 0.609375 -0.625 1.46875zm-0.546875 6.28125q0 0.71875 0.328125 1.390625q0.34375 0.65625 1.015625 1.03125q0.671875 0.359375 1.4375 0.359375q1.203125 0 1.984375 -0.765625q0.78125 -0.78125 0.78125 -1.96875q0 -1.203125 -0.8125 -1.984375q-0.796875 -0.796875 -2.0 -0.796875q-1.1875 0 -1.96875 0.78125q-0.765625 0.78125 -0.765625 1.953125zm8.688217 0.328125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm11.922577 7.59375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 157.53806l0 32.472443" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 157.53806l0 26.472443" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 184.0105l1.6517334 4.538101l1.6517334 -4.538101z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m395.48425 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m395.48425 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path fill="#000000" d="m413.11163 57.620842l0 -13.59375l1.8125 0l0 13.59375l-1.8125 0zm4.667694 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375702 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313232 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578827 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.839569 -0.109375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.254181 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm13.125122 3.875l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.6413574 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 411.97638l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 411.97638l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path fill="#000000" d="m402.72214 438.89636l0 -13.59375l9.84375 0l0 1.59375l-8.046875 0l0 4.171875l7.53125 0l0 1.59375l-7.53125 0l0 4.625l8.359375 0l0 1.609375l-10.15625 0zm12.193573 0l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm17.087677 0l-1.546875 0l0 -13.59375l1.65625 0l0 4.84375q1.0625 -1.328125 2.703125 -1.328125q0.90625 0 1.71875 0.375q0.8125 0.359375 1.328125 1.03125q0.53125 0.65625 0.828125 1.59375q0.296875 0.9375 0.296875 2.0q0 2.53125 -1.25 3.921875q-1.25 1.375 -3.0 1.375q-1.75 0 -2.734375 -1.453125l0 1.234375zm-0.015625 -5.0q0 1.765625 0.46875 2.5625q0.796875 1.28125 2.140625 1.28125q1.09375 0 1.890625 -0.9375q0.796875 -0.953125 0.796875 -2.84375q0 -1.921875 -0.765625 -2.84375q-0.765625 -0.921875 -1.84375 -0.921875q-1.09375 0 -1.890625 0.953125q-0.796875 0.953125 -0.796875 2.75zm15.594452 1.828125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500732 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656952 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281982 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129181 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.078857 0.8125l1.609375 0.25q0.109375 0.75 0.578125 1.09375q0.609375 0.453125 1.6875 0.453125q1.171875 0 1.796875 -0.46875q0.625 -0.453125 0.859375 -1.28125q0.125 -0.515625 0.109375 -2.15625q-1.09375 1.296875 -2.71875 1.296875q-2.03125 0 -3.15625 -1.46875q-1.109375 -1.46875 -1.109375 -3.515625q0 -1.40625 0.515625 -2.59375q0.515625 -1.203125 1.484375 -1.84375q0.96875 -0.65625 2.265625 -0.65625q1.75 0 2.875 1.40625l0 -1.1875l1.546875 0l0 8.515625q0 2.3125 -0.46875 3.265625q-0.46875 0.96875 -1.484375 1.515625q-1.015625 0.5625 -2.5 0.5625q-1.765625 0 -2.859375 -0.796875q-1.078125 -0.796875 -1.03125 -2.390625zm1.375 -5.921875q0 1.953125 0.765625 2.84375q0.78125 0.890625 1.9375 0.890625q1.140625 0 1.921875 -0.890625q0.78125 -0.890625 0.78125 -2.78125q0 -1.8125 -0.8125 -2.71875q-0.796875 -0.921875 -1.921875 -0.921875q-1.109375 0 -1.890625 0.90625q-0.78125 0.890625 -0.78125 2.671875zm14.449646 5.109375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.551056 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.0312805 0 3.3125305 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.0781555 0.59375 -2.3750305 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625305 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.8281555 -0.9375 -2.0625305 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm8.656952 0q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.297607 4.921875l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0zm15.765625 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.9226074 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625z" fill-rule="nonzero"></path><path fill="#000000" d="m435.7392 464.89636q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.572052 -7.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141327 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.9538574 1.609375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm18.640625 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm8.531952 0.8125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm10.625732 0.453125l1.59375 -0.15625q0.203125 1.140625 0.78125 1.65625q0.578125 0.5 1.484375 0.5q0.765625 0 1.34375 -0.34375q0.578125 -0.359375 0.953125 -0.953125q0.375 -0.59375 0.625 -1.59375q0.25 -1.0 0.25 -2.03125q0 -0.109375 -0.015625 -0.34375q-0.5 0.796875 -1.375 1.296875q-0.859375 0.5 -1.875 0.5q-1.6875 0 -2.859375 -1.21875q-1.171875 -1.234375 -1.171875 -3.234375q0 -2.078125 1.21875 -3.328125q1.234375 -1.265625 3.0625 -1.265625q1.328125 0 2.421875 0.71875q1.109375 0.703125 1.671875 2.03125q0.578125 1.328125 0.578125 3.828125q0 2.609375 -0.578125 4.15625q-0.5625 1.546875 -1.6875 2.359375q-1.109375 0.796875 -2.609375 0.796875q-1.59375 0 -2.609375 -0.890625q-1.0 -0.890625 -1.203125 -2.484375zm6.828125 -6.0q0 -1.4375 -0.765625 -2.28125q-0.765625 -0.859375 -1.84375 -0.859375q-1.109375 0 -1.9375 0.921875q-0.828125 0.90625 -0.828125 2.34375q0 1.3125 0.78125 2.125q0.796875 0.796875 1.9375 0.796875q1.171875 0 1.90625 -0.796875q0.75 -0.8125 0.75 -2.25zm5.860077 1.765625q-1.046875 -0.375 -1.546875 -1.078125q-0.5 -0.71875 -0.5 -1.703125q0 -1.484375 1.0625 -2.484375q1.078125 -1.015625 2.84375 -1.015625q1.78125 0 2.859375 1.03125q1.09375 1.03125 1.09375 2.515625q0 0.953125 -0.5 1.65625q-0.484375 0.703125 -1.5 1.078125q1.25 0.40625 1.90625 1.3125q0.65625 0.90625 0.65625 2.171875q0 1.75 -1.234375 2.9375q-1.234375 1.1875 -3.25 1.1875q-2.015625 0 -3.25 -1.1875q-1.234375 -1.203125 -1.234375 -2.984375q0 -1.328125 0.671875 -2.21875q0.671875 -0.890625 1.921875 -1.21875zm-0.328125 -2.828125q0 0.96875 0.609375 1.578125q0.625 0.609375 1.625 0.609375q0.953125 0 1.5625 -0.609375q0.625 -0.609375 0.625 -1.484375q0 -0.921875 -0.640625 -1.546875q-0.625 -0.625 -1.578125 -0.625q-0.953125 0 -1.578125 0.609375q-0.625 0.609375 -0.625 1.46875zm-0.546875 6.28125q0 0.71875 0.328125 1.390625q0.34375 0.65625 1.015625 1.03125q0.671875 0.359375 1.4375 0.359375q1.203125 0 1.984375 -0.765625q0.78125 -0.78125 0.78125 -1.96875q0 -1.203125 -0.8125 -1.984375q-0.796875 -0.796875 -2.0 -0.796875q-1.1875 0 -1.96875 0.78125q-0.765625 0.78125 -0.765625 1.953125zm8.688232 0.328125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm11.922546 7.59375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 567.8504l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 567.8504l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path fill="#000000" d="m402.72214 594.7704l0 -13.59375l9.84375 0l0 1.59375l-8.046875 0l0 4.171875l7.53125 0l0 1.59375l-7.53125 0l0 4.625l8.359375 0l0 1.609375l-10.15625 0zm12.193573 0l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm17.087677 0l-1.546875 0l0 -13.59375l1.65625 0l0 4.84375q1.0625 -1.328125 2.703125 -1.328125q0.90625 0 1.71875 0.375q0.8125 0.359375 1.328125 1.03125q0.53125 0.65625 0.828125 1.59375q0.296875 0.9375 0.296875 2.0q0 2.53125 -1.25 3.921875q-1.25 1.375 -3.0 1.375q-1.75 0 -2.734375 -1.453125l0 1.234375zm-0.015625 -5.0q0 1.765625 0.46875 2.5625q0.796875 1.28125 2.140625 1.28125q1.09375 0 1.890625 -0.9375q0.796875 -0.953125 0.796875 -2.84375q0 -1.921875 -0.765625 -2.84375q-0.765625 -0.921875 -1.84375 -0.921875q-1.09375 0 -1.890625 0.953125q-0.796875 0.953125 -0.796875 2.75zm15.594452 1.828125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500732 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656952 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281982 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129181 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.078857 0.8125l1.609375 0.25q0.109375 0.75 0.578125 1.09375q0.609375 0.453125 1.6875 0.453125q1.171875 0 1.796875 -0.46875q0.625 -0.453125 0.859375 -1.28125q0.125 -0.515625 0.109375 -2.15625q-1.09375 1.296875 -2.71875 1.296875q-2.03125 0 -3.15625 -1.46875q-1.109375 -1.46875 -1.109375 -3.515625q0 -1.40625 0.515625 -2.59375q0.515625 -1.203125 1.484375 -1.84375q0.96875 -0.65625 2.265625 -0.65625q1.75 0 2.875 1.40625l0 -1.1875l1.546875 0l0 8.515625q0 2.3125 -0.46875 3.265625q-0.46875 0.96875 -1.484375 1.515625q-1.015625 0.5625 -2.5 0.5625q-1.765625 0 -2.859375 -0.796875q-1.078125 -0.796875 -1.03125 -2.390625zm1.375 -5.921875q0 1.953125 0.765625 2.84375q0.78125 0.890625 1.9375 0.890625q1.140625 0 1.921875 -0.890625q0.78125 -0.890625 0.78125 -2.78125q0 -1.8125 -0.8125 -2.71875q-0.796875 -0.921875 -1.921875 -0.921875q-1.109375 0 -1.890625 0.90625q-0.78125 0.890625 -0.78125 2.671875zm14.449646 5.109375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.551056 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.0312805 0 3.3125305 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.0781555 0.59375 -2.3750305 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625305 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.8281555 -0.9375 -2.0625305 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm8.656952 0q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.297607 4.921875l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0zm15.765625 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.9226074 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625z" fill-rule="nonzero"></path><path fill="#000000" d="m440.92703 620.7704q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm8.853302 -4.0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm3.4382324 0l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm18.640625 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm8.531952 0.8125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm10.625732 0.453125l1.59375 -0.15625q0.203125 1.140625 0.78125 1.65625q0.578125 0.5 1.484375 0.5q0.765625 0 1.34375 -0.34375q0.578125 -0.359375 0.953125 -0.953125q0.375 -0.59375 0.625 -1.59375q0.25 -1.0 0.25 -2.03125q0 -0.109375 -0.015625 -0.34375q-0.5 0.796875 -1.375 1.296875q-0.859375 0.5 -1.875 0.5q-1.6875 0 -2.859375 -1.21875q-1.171875 -1.234375 -1.171875 -3.234375q0 -2.078125 1.21875 -3.328125q1.234375 -1.265625 3.0625 -1.265625q1.328125 0 2.421875 0.71875q1.109375 0.703125 1.671875 2.03125q0.578125 1.328125 0.578125 3.828125q0 2.609375 -0.578125 4.15625q-0.5625 1.546875 -1.6875 2.359375q-1.109375 0.796875 -2.609375 0.796875q-1.59375 0 -2.609375 -0.890625q-1.0 -0.890625 -1.203125 -2.484375zm6.828125 -6.0q0 -1.4375 -0.765625 -2.28125q-0.765625 -0.859375 -1.84375 -0.859375q-1.109375 0 -1.9375 0.921875q-0.828125 0.90625 -0.828125 2.34375q0 1.3125 0.78125 2.125q0.796875 0.796875 1.9375 0.796875q1.171875 0 1.90625 -0.796875q0.75 -0.8125 0.75 -2.25zm5.860077 1.765625q-1.046875 -0.375 -1.546875 -1.078125q-0.5 -0.71875 -0.5 -1.703125q0 -1.484375 1.0625 -2.484375q1.078125 -1.015625 2.84375 -1.015625q1.78125 0 2.859375 1.03125q1.09375 1.03125 1.09375 2.515625q0 0.953125 -0.5 1.65625q-0.484375 0.703125 -1.5 1.078125q1.25 0.40625 1.90625 1.3125q0.65625 0.90625 0.65625 2.171875q0 1.75 -1.234375 2.9375q-1.234375 1.1875 -3.25 1.1875q-2.015625 0 -3.25 -1.1875q-1.234375 -1.203125 -1.234375 -2.984375q0 -1.328125 0.671875 -2.21875q0.671875 -0.890625 1.921875 -1.21875zm-0.328125 -2.828125q0 0.96875 0.609375 1.578125q0.625 0.609375 1.625 0.609375q0.953125 0 1.5625 -0.609375q0.625 -0.609375 0.625 -1.484375q0 -0.921875 -0.640625 -1.546875q-0.625 -0.625 -1.578125 -0.625q-0.953125 0 -1.578125 0.609375q-0.625 0.609375 -0.625 1.46875zm-0.546875 6.28125q0 0.71875 0.328125 1.390625q0.34375 0.65625 1.015625 1.03125q0.671875 0.359375 1.4375 0.359375q1.203125 0 1.984375 -0.765625q0.78125 -0.78125 0.78125 -1.96875q0 -1.203125 -0.8125 -1.984375q-0.796875 -0.796875 -2.0 -0.796875q-1.1875 0 -1.96875 0.78125q-0.765625 0.78125 -0.765625 1.953125zm8.688232 0.328125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm11.922607 7.59375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m129.09448 420.41733l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m129.09448 420.41733l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m166.58162 447.3373l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm21.837677 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.8913422 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm7.832321 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129196 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.828842 4.875l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.191696 -11.6875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm10.566696 -3.609375l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm9.328125 2.390625q-0.9375 0.796875 -1.796875 1.125q-0.859375 0.3125 -1.84375 0.3125q-1.609375 0 -2.484375 -0.78125q-0.875 -0.796875 -0.875 -2.03125q0 -0.734375 0.328125 -1.328125q0.328125 -0.59375 0.859375 -0.953125q0.53125 -0.359375 1.203125 -0.546875q0.5 -0.140625 1.484375 -0.25q2.03125 -0.25 2.984375 -0.578125q0 -0.34375 0 -0.4375q0 -1.015625 -0.46875 -1.4375q-0.640625 -0.5625 -1.90625 -0.5625q-1.171875 0 -1.734375 0.40625q-0.5625 0.40625 -0.828125 1.46875l-1.640625 -0.234375q0.234375 -1.046875 0.734375 -1.6875q0.515625 -0.640625 1.46875 -0.984375q0.96875 -0.359375 2.25 -0.359375q1.265625 0 2.046875 0.296875q0.78125 0.296875 1.15625 0.75q0.375 0.453125 0.515625 1.140625q0.09375 0.421875 0.09375 1.53125l0 2.234375q0 2.328125 0.09375 2.953125q0.109375 0.609375 0.4375 1.171875l-1.75 0q-0.265625 -0.515625 -0.328125 -1.21875zm-0.140625 -3.71875q-0.90625 0.359375 -2.734375 0.625q-1.03125 0.140625 -1.453125 0.328125q-0.421875 0.1875 -0.65625 0.546875q-0.234375 0.359375 -0.234375 0.796875q0 0.671875 0.5 1.125q0.515625 0.4375 1.484375 0.4375q0.96875 0 1.71875 -0.421875q0.75 -0.4375 1.109375 -1.15625q0.265625 -0.578125 0.265625 -1.671875l0 -0.609375zm7.735092 3.4375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5041962 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.0937653 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625153 0 -3.3437653 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.82814026 0.9375 2.0781403 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.0781403 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m129.09448 576.29395l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m129.09448 576.29395l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m185.35355 603.214l5.234375 -13.59375l1.9375 0l5.5625 13.59375l-2.046875 0l-1.59375 -4.125l-5.6875 0l-1.484375 4.125l-1.921875 0zm3.921875 -5.578125l4.609375 0l-1.40625 -3.78125q-0.65625 -1.703125 -0.96875 -2.8125q-0.265625 1.3125 -0.734375 2.59375l-1.5 4.0zm16.193573 5.578125l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656967 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281967 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm7.785446 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5041962 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 597.34644l-79.40158 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 597.34644l-73.40158 0" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m315.09186 595.6947l-4.538086 1.6517334l4.538086 1.6517334z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 441.47244l-79.40158 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 441.47244l-73.40158 0" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m315.09186 439.8207l-4.538086 1.6517334l4.538086 1.6517334z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 462.52756l0 31.84253" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 462.52756l0 25.84253" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 488.3701l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m561.5 51.755962l31.99347 0l0 545.57477l-25.001343 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m561.5 51.755962l31.99347 0l0 545.57477l-25.001343 0" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m478.49213 72.81108l0 339.1496" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m478.49213 72.81108l0 333.1496" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m476.8404 405.96066l1.6517334 4.5381165l1.6517334 -4.5381165z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m590.00525 597.4094l-21.51184 -0.06298828" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m590.00525 597.4094l-15.511841 -0.045410156" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m574.4982 595.7123l-4.5429077 1.6384277l4.533264 1.6650391z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m109.09449 494.357l220.0 0l0 42.11023l-220.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m109.09449 494.357l220.0 0l0 42.11023l-220.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m126.81095 521.277l0 -13.59375l9.171867 0l0 1.59375l-7.375 0l0 4.21875l6.375 0l0 1.609375l-6.375 0l0 6.171875l-1.7968674 0zm17.536598 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.8913422 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.144821 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.097946 3.796875l-0.171875 -1.5625q0.546875 0.140625 0.953125 0.140625q0.546875 0 0.875 -0.1875q0.34375 -0.1875 0.5625 -0.515625q0.15625 -0.25 0.5 -1.25q0.046875 -0.140625 0.15625 -0.40625l-3.734375 -9.875l1.796875 0l2.046875 5.71875q0.40625 1.078125 0.71875 2.28125q0.28125 -1.15625 0.6875 -2.25l2.09375 -5.75l1.671875 0l-3.75 10.03125q-0.59375 1.625 -0.9375 2.234375q-0.4375 0.828125 -1.015625 1.203125q-0.578125 0.390625 -1.375 0.390625q-0.484375 0 -1.078125 -0.203125zm19.328125 -8.5625l1.796875 0.453125q-0.5625 2.21875 -2.03125 3.390625q-1.46875 1.15625 -3.59375 1.15625q-2.203125 0 -3.578125 -0.890625q-1.375 -0.90625 -2.09375 -2.59375q-0.71875 -1.703125 -0.71875 -3.65625q0 -2.125 0.796875 -3.703125q0.8125 -1.578125 2.3125 -2.390625q1.5 -0.828125 3.296875 -0.828125q2.046875 0 3.4375 1.046875q1.390625 1.03125 1.9375 2.90625l-1.765625 0.421875q-0.46875 -1.484375 -1.375 -2.15625q-0.90625 -0.6875 -2.265625 -0.6875q-1.5625 0 -2.625 0.75q-1.046875 0.75 -1.484375 2.03125q-0.421875 1.265625 -0.421875 2.609375q0 1.734375 0.5 3.03125q0.515625 1.28125 1.578125 1.921875q1.078125 0.640625 2.3125 0.640625q1.515625 0 2.5625 -0.859375q1.046875 -0.875 1.421875 -2.59375zm2.9260712 -0.15625q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375717 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm17.125717 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547592 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm8.277054 -1.671875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500717 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm17.637161 8.921875q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.572052 -7.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141327 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.9538574 1.609375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm16.265625 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.641327 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 536.4672l0 39.811035" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 536.4672l0 33.811035" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 570.27826l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path></g></svg>
+<svg version="1.1" viewBox="0.0 0.0 742.6010498687664 753.6010498687664" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><clipPath id="p.0"><path d="m0 0l742.6011 0l0 753.6011l-742.6011 0l0 -753.6011z" clip-rule="nonzero"></path></clipPath><g clip-path="url(#p.0)"><path fill="#000000" fill-opacity="0.0" d="m0 0l742.6011 0l0 753.6011l-742.6011 0z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m136.0 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m136.0 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path fill="#000000" d="m153.6274 57.620842l0 -13.59375l1.8125 0l0 13.59375l-1.8125 0zm4.667679 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375717 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313217 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578842 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm7.355179 1.5l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.918396 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2541962 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.07463 -2.125l-8.968735 0l0 -1.5625l8.968735 0l0 1.5625zm0 4.125l-8.968735 0l0 -1.546875l8.968735 0l0 1.546875zm13.125153 3.875l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.641327 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 180.96326l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 180.96326l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m151.01154 207.88326l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844467 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.880356 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm21.212677 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.918396 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672592 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860092 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.95384216 1.609375l3.5937347 -5.125l-3.3281097 -4.734375l2.09375 0l1.5156097 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.5937347 3.890625l-2.015625 0zm16.26561 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.750732 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm10.078857 8.40625l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m145.15926 233.6645l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844467 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019821 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547592 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5041962 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2541962 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.95311 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.4218597 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.2812347 -1.375 3.3281097 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.3437347 0q0.09375 1.625 0.92185974 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.4843597 -2.703125l5.4999847 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78123474 0.765625 -0.85935974 2.046875zm9.578842 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm9.444733 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141327 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m129.09448 653.0184l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m129.09448 653.0184l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m150.8024 673.31335q0 -3.390625 1.8125 -5.296875q1.828125 -1.921875 4.703125 -1.921875q1.875 0 3.390625 0.90625q1.515625 0.890625 2.296875 2.5q0.796875 1.609375 0.796875 3.65625q0 2.0625 -0.84375 3.703125q-0.828125 1.625 -2.359375 2.46875q-1.53125 0.84375 -3.296875 0.84375q-1.921875 0 -3.4375 -0.921875q-1.5 -0.9375 -2.28125 -2.53125q-0.78125 -1.609375 -0.78125 -3.40625zm1.859375 0.03125q0 2.453125 1.3125 3.875q1.328125 1.40625 3.3125 1.40625q2.03125 0 3.34375 -1.421875q1.3125 -1.4375 1.3125 -4.0625q0 -1.65625 -0.5625 -2.890625q-0.546875 -1.234375 -1.640625 -1.921875q-1.078125 -0.6875 -2.421875 -0.6875q-1.90625 0 -3.28125 1.3125q-1.375 1.3125 -1.375 4.390625zm19.433304 6.59375l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578842 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5270538 5.28125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313217 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578842 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.897858 5.5q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2541962 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm13.125153 3.875l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.641327 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 239.95538l0 21.543304" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 239.95538l0 15.543304" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 255.49869l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 320.48557l0 21.543304" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 320.48557l0 15.543304" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 336.02887l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.00787 72.81108l0.09448242 25.732285" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.00787 72.81108l0.07246399 19.732315" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.4286 92.54946l1.668396 4.5320053l1.6350555 -4.544136z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 401.01575l0 19.40158" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 401.01575l0 13.401581" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 414.41733l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 261.49344l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 261.49344l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m145.82367 288.41342l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm23.697052 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672607 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860077 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.9538574 1.609375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.96875 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672577 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860107 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.156952 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m145.15926 314.19467l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844467 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019821 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547592 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5041962 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2541962 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.95311 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.4218597 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.2812347 -1.375 3.3281097 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.3437347 0q0.09375 1.625 0.92185974 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.4843597 -2.703125l5.4999847 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78123474 0.765625 -0.85935974 2.046875zm9.578842 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm9.444733 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141327 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 342.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 342.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m145.82367 368.94363l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm14.931427 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.199646 7.59375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.556427 -7.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672607 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860077 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.9538574 1.609375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.96875 -3.5625l1.765625 -0.15625q0.1875 1.28125 0.890625 1.9375q0.71875 0.640625 1.71875 0.640625q1.203125 0 2.03125 -0.90625q0.84375 -0.90625 0.84375 -2.421875q0 -1.421875 -0.8125 -2.25q-0.796875 -0.828125 -2.09375 -0.828125q-0.796875 0 -1.453125 0.375q-0.640625 0.359375 -1.015625 0.953125l-1.578125 -0.203125l1.328125 -7.0l6.765625 0l0 1.609375l-5.4375 0l-0.734375 3.640625q1.234375 -0.84375 2.578125 -0.84375q1.78125 0 3.0 1.234375q1.234375 1.234375 1.234375 3.171875q0 1.84375 -1.078125 3.1875q-1.3125 1.65625 -3.578125 1.65625q-1.859375 0 -3.03125 -1.03125q-1.171875 -1.046875 -1.34375 -2.765625zm16.672577 3.5625l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.860107 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.156952 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" d="m145.15926 394.72488l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.844467 4.875l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm5.603302 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 -6.734375l0 -1.9375l1.65625 0l0 1.9375l-1.65625 0zm-2.125 15.484375l0.3125 -1.421875q0.5 0.125 0.796875 0.125q0.515625 0 0.765625 -0.34375q0.25 -0.328125 0.25 -1.6875l0 -10.359375l1.65625 0l0 10.390625q0 1.828125 -0.46875 2.546875q-0.59375 0.921875 -2.0 0.921875q-0.671875 0 -1.3125 -0.171875zm13.019821 -7.0l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547592 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5041962 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm14.887146 -2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2541962 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.95311 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.4218597 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.2812347 -1.375 3.3281097 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.3437347 0q0.09375 1.625 0.92185974 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.4843597 -2.703125l5.4999847 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78123474 0.765625 -0.85935974 2.046875zm9.578842 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm9.444733 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141327 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 618.4042l0 34.614197" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 618.4042l0 28.614197" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 647.0184l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m103.0 98.54593l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m103.0 98.54593l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m143.32318 125.46593l0 -13.59375l9.84375 0l0 1.59375l-8.046875 0l0 4.171875l7.53125 0l0 1.59375l-7.53125 0l0 4.625l8.359375 0l0 1.609375l-10.15625 0zm12.193573 0l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm17.087677 0l-1.546875 0l0 -13.59375l1.65625 0l0 4.84375q1.0625 -1.328125 2.703125 -1.328125q0.90625 0 1.71875 0.375q0.8125 0.359375 1.328125 1.03125q0.53125 0.65625 0.828125 1.59375q0.296875 0.9375 0.296875 2.0q0 2.53125 -1.25 3.921875q-1.25 1.375 -3.0 1.375q-1.75 0 -2.734375 -1.453125l0 1.234375zm-0.015625 -5.0q0 1.765625 0.46875 2.5625q0.796875 1.28125 2.140625 1.28125q1.09375 0 1.890625 -0.9375q0.796875 -0.953125 0.796875 -2.84375q0 -1.921875 -0.765625 -2.84375q-0.765625 -0.921875 -1.84375 -0.921875q-1.09375 0 -1.890625 0.953125q-0.796875 0.953125 -0.796875 2.75zm15.594467 1.828125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500717 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656967 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281967 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129196 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.078842 0.8125l1.609375 0.25q0.109375 0.75 0.578125 1.09375q0.609375 0.453125 1.6875 0.453125q1.171875 0 1.796875 -0.46875q0.625 -0.453125 0.859375 -1.28125q0.125 -0.515625 0.109375 -2.15625q-1.09375 1.296875 -2.71875 1.296875q-2.03125 0 -3.15625 -1.46875q-1.109375 -1.46875 -1.109375 -3.515625q0 -1.40625 0.515625 -2.59375q0.515625 -1.203125 1.484375 -1.84375q0.96875 -0.65625 2.265625 -0.65625q1.75 0 2.875 1.40625l0 -1.1875l1.546875 0l0 8.515625q0 2.3125 -0.46875 3.265625q-0.46875 0.96875 -1.484375 1.515625q-1.015625 0.5625 -2.5 0.5625q-1.765625 0 -2.859375 -0.796875q-1.078125 -0.796875 -1.03125 -2.390625zm1.375 -5.921875q0 1.953125 0.765625 2.84375q0.78125 0.890625 1.9375 0.890625q1.140625 0 1.921875 -0.890625q0.78125 -0.890625 0.78125 -2.78125q0 -1.8125 -0.8125 -2.71875q-0.796875 -0.921875 -1.921875 -0.921875q-1.109375 0 -1.890625 0.90625q-0.78125 0.890625 -0.78125 2.671875zm14.449646 5.109375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.5510712 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm8.656967 0q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.297607 4.921875l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0zm15.765625 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.922577 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625z" fill-rule="nonzero"></path><path fill="#000000" d="m176.34024 151.46593q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm11.228302 -14.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm8.531967 0.8125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm10.625717 0.453125l1.59375 -0.15625q0.203125 1.140625 0.78125 1.65625q0.578125 0.5 1.484375 0.5q0.765625 0 1.34375 -0.34375q0.578125 -0.359375 0.953125 -0.953125q0.375 -0.59375 0.625 -1.59375q0.25 -1.0 0.25 -2.03125q0 -0.109375 -0.015625 -0.34375q-0.5 0.796875 -1.375 1.296875q-0.859375 0.5 -1.875 0.5q-1.6875 0 -2.859375 -1.21875q-1.171875 -1.234375 -1.171875 -3.234375q0 -2.078125 1.21875 -3.328125q1.234375 -1.265625 3.0625 -1.265625q1.328125 0 2.421875 0.71875q1.109375 0.703125 1.671875 2.03125q0.578125 1.328125 0.578125 3.828125q0 2.609375 -0.578125 4.15625q-0.5625 1.546875 -1.6875 2.359375q-1.109375 0.796875 -2.609375 0.796875q-1.59375 0 -2.609375 -0.890625q-1.0 -0.890625 -1.203125 -2.484375zm6.828125 -6.0q0 -1.4375 -0.765625 -2.28125q-0.765625 -0.859375 -1.84375 -0.859375q-1.109375 0 -1.9375 0.921875q-0.828125 0.90625 -0.828125 2.34375q0 1.3125 0.78125 2.125q0.796875 0.796875 1.9375 0.796875q1.171875 0 1.90625 -0.796875q0.75 -0.8125 0.75 -2.25zm5.860092 1.765625q-1.046875 -0.375 -1.546875 -1.078125q-0.5 -0.71875 -0.5 -1.703125q0 -1.484375 1.0625 -2.484375q1.078125 -1.015625 2.84375 -1.015625q1.78125 0 2.859375 1.03125q1.09375 1.03125 1.09375 2.515625q0 0.953125 -0.5 1.65625q-0.484375 0.703125 -1.5 1.078125q1.25 0.40625 1.90625 1.3125q0.65625 0.90625 0.65625 2.171875q0 1.75 -1.234375 2.9375q-1.234375 1.1875 -3.25 1.1875q-2.015625 0 -3.25 -1.1875q-1.234375 -1.203125 -1.234375 -2.984375q0 -1.328125 0.671875 -2.21875q0.671875 -0.890625 1.921875 -1.21875zm-0.328125 -2.828125q0 0.96875 0.609375 1.578125q0.625 0.609375 1.625 0.609375q0.953125 0 1.5625 -0.609375q0.625 -0.609375 0.625 -1.484375q0 -0.921875 -0.640625 -1.546875q-0.625 -0.625 -1.578125 -0.625q-0.953125 0 -1.578125 0.609375q-0.625 0.609375 -0.625 1.46875zm-0.546875 6.28125q0 0.71875 0.328125 1.390625q0.34375 0.65625 1.015625 1.03125q0.671875 0.359375 1.4375 0.359375q1.203125 0 1.984375 -0.765625q0.78125 -0.78125 0.78125 -1.96875q0 -1.203125 -0.8125 -1.984375q-0.796875 -0.796875 -2.0 -0.796875q-1.1875 0 -1.96875 0.78125q-0.765625 0.78125 -0.765625 1.953125zm8.688217 0.328125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm9.719467 3.59375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm16.265625 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.750717 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm10.078827 8.40625l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 157.53806l0 23.433075" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 157.53806l0 17.433075" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 174.97113l1.6517334 4.538101l1.6517334 -4.538101z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m395.48425 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m395.48425 30.700842l166.01575 0l0 42.110237l-166.01575 0z" fill-rule="evenodd"></path><path fill="#000000" d="m413.11163 57.620842l0 -13.59375l1.8125 0l0 13.59375l-1.8125 0zm4.667694 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375702 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313232 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578827 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.839569 -0.109375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.254181 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm13.125122 3.875l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.6413574 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 411.97638l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 411.97638l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path fill="#000000" d="m402.72214 438.89636l0 -13.59375l9.84375 0l0 1.59375l-8.046875 0l0 4.171875l7.53125 0l0 1.59375l-7.53125 0l0 4.625l8.359375 0l0 1.609375l-10.15625 0zm12.193573 0l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm17.087677 0l-1.546875 0l0 -13.59375l1.65625 0l0 4.84375q1.0625 -1.328125 2.703125 -1.328125q0.90625 0 1.71875 0.375q0.8125 0.359375 1.328125 1.03125q0.53125 0.65625 0.828125 1.59375q0.296875 0.9375 0.296875 2.0q0 2.53125 -1.25 3.921875q-1.25 1.375 -3.0 1.375q-1.75 0 -2.734375 -1.453125l0 1.234375zm-0.015625 -5.0q0 1.765625 0.46875 2.5625q0.796875 1.28125 2.140625 1.28125q1.09375 0 1.890625 -0.9375q0.796875 -0.953125 0.796875 -2.84375q0 -1.921875 -0.765625 -2.84375q-0.765625 -0.921875 -1.84375 -0.921875q-1.09375 0 -1.890625 0.953125q-0.796875 0.953125 -0.796875 2.75zm15.594452 1.828125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500732 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656952 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281982 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129181 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.078857 0.8125l1.609375 0.25q0.109375 0.75 0.578125 1.09375q0.609375 0.453125 1.6875 0.453125q1.171875 0 1.796875 -0.46875q0.625 -0.453125 0.859375 -1.28125q0.125 -0.515625 0.109375 -2.15625q-1.09375 1.296875 -2.71875 1.296875q-2.03125 0 -3.15625 -1.46875q-1.109375 -1.46875 -1.109375 -3.515625q0 -1.40625 0.515625 -2.59375q0.515625 -1.203125 1.484375 -1.84375q0.96875 -0.65625 2.265625 -0.65625q1.75 0 2.875 1.40625l0 -1.1875l1.546875 0l0 8.515625q0 2.3125 -0.46875 3.265625q-0.46875 0.96875 -1.484375 1.515625q-1.015625 0.5625 -2.5 0.5625q-1.765625 0 -2.859375 -0.796875q-1.078125 -0.796875 -1.03125 -2.390625zm1.375 -5.921875q0 1.953125 0.765625 2.84375q0.78125 0.890625 1.9375 0.890625q1.140625 0 1.921875 -0.890625q0.78125 -0.890625 0.78125 -2.78125q0 -1.8125 -0.8125 -2.71875q-0.796875 -0.921875 -1.921875 -0.921875q-1.109375 0 -1.890625 0.90625q-0.78125 0.890625 -0.78125 2.671875zm14.449646 5.109375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.551056 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.0312805 0 3.3125305 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.0781555 0.59375 -2.3750305 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625305 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.8281555 -0.9375 -2.0625305 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm8.656952 0q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.297607 4.921875l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0zm15.765625 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.9226074 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625z" fill-rule="nonzero"></path><path fill="#000000" d="m435.7392 464.89636q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm11.228302 -14.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm8.531952 0.8125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm10.625732 0.453125l1.59375 -0.15625q0.203125 1.140625 0.78125 1.65625q0.578125 0.5 1.484375 0.5q0.765625 0 1.34375 -0.34375q0.578125 -0.359375 0.953125 -0.953125q0.375 -0.59375 0.625 -1.59375q0.25 -1.0 0.25 -2.03125q0 -0.109375 -0.015625 -0.34375q-0.5 0.796875 -1.375 1.296875q-0.859375 0.5 -1.875 0.5q-1.6875 0 -2.859375 -1.21875q-1.171875 -1.234375 -1.171875 -3.234375q0 -2.078125 1.21875 -3.328125q1.234375 -1.265625 3.0625 -1.265625q1.328125 0 2.421875 0.71875q1.109375 0.703125 1.671875 2.03125q0.578125 1.328125 0.578125 3.828125q0 2.609375 -0.578125 4.15625q-0.5625 1.546875 -1.6875 2.359375q-1.109375 0.796875 -2.609375 0.796875q-1.59375 0 -2.609375 -0.890625q-1.0 -0.890625 -1.203125 -2.484375zm6.828125 -6.0q0 -1.4375 -0.765625 -2.28125q-0.765625 -0.859375 -1.84375 -0.859375q-1.109375 0 -1.9375 0.921875q-0.828125 0.90625 -0.828125 2.34375q0 1.3125 0.78125 2.125q0.796875 0.796875 1.9375 0.796875q1.171875 0 1.90625 -0.796875q0.75 -0.8125 0.75 -2.25zm5.860077 1.765625q-1.046875 -0.375 -1.546875 -1.078125q-0.5 -0.71875 -0.5 -1.703125q0 -1.484375 1.0625 -2.484375q1.078125 -1.015625 2.84375 -1.015625q1.78125 0 2.859375 1.03125q1.09375 1.03125 1.09375 2.515625q0 0.953125 -0.5 1.65625q-0.484375 0.703125 -1.5 1.078125q1.25 0.40625 1.90625 1.3125q0.65625 0.90625 0.65625 2.171875q0 1.75 -1.234375 2.9375q-1.234375 1.1875 -3.25 1.1875q-2.015625 0 -3.25 -1.1875q-1.234375 -1.203125 -1.234375 -2.984375q0 -1.328125 0.671875 -2.21875q0.671875 -0.890625 1.921875 -1.21875zm-0.328125 -2.828125q0 0.96875 0.609375 1.578125q0.625 0.609375 1.625 0.609375q0.953125 0 1.5625 -0.609375q0.625 -0.609375 0.625 -1.484375q0 -0.921875 -0.640625 -1.546875q-0.625 -0.625 -1.578125 -0.625q-0.953125 0 -1.578125 0.609375q-0.625 0.609375 -0.625 1.46875zm-0.546875 6.28125q0 0.71875 0.328125 1.390625q0.34375 0.65625 1.015625 1.03125q0.671875 0.359375 1.4375 0.359375q1.203125 0 1.984375 -0.765625q0.78125 -0.78125 0.78125 -1.96875q0 -1.203125 -0.8125 -1.984375q-0.796875 -0.796875 -2.0 -0.796875q-1.1875 0 -1.96875 0.78125q-0.765625 0.78125 -0.765625 1.953125zm8.688232 0.328125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm9.719452 3.59375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm9.984375 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141357 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.1569214 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 567.8504l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 567.8504l179.99997 0l0 58.992126l-179.99997 0z" fill-rule="evenodd"></path><path fill="#000000" d="m402.72214 594.7704l0 -13.59375l9.84375 0l0 1.59375l-8.046875 0l0 4.171875l7.53125 0l0 1.59375l-7.53125 0l0 4.625l8.359375 0l0 1.609375l-10.15625 0zm12.193573 0l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm17.087677 0l-1.546875 0l0 -13.59375l1.65625 0l0 4.84375q1.0625 -1.328125 2.703125 -1.328125q0.90625 0 1.71875 0.375q0.8125 0.359375 1.328125 1.03125q0.53125 0.65625 0.828125 1.59375q0.296875 0.9375 0.296875 2.0q0 2.53125 -1.25 3.921875q-1.25 1.375 -3.0 1.375q-1.75 0 -2.734375 -1.453125l0 1.234375zm-0.015625 -5.0q0 1.765625 0.46875 2.5625q0.796875 1.28125 2.140625 1.28125q1.09375 0 1.890625 -0.9375q0.796875 -0.953125 0.796875 -2.84375q0 -1.921875 -0.765625 -2.84375q-0.765625 -0.921875 -1.84375 -0.921875q-1.09375 0 -1.890625 0.953125q-0.796875 0.953125 -0.796875 2.75zm15.594452 1.828125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500732 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656952 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281982 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129181 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.078857 0.8125l1.609375 0.25q0.109375 0.75 0.578125 1.09375q0.609375 0.453125 1.6875 0.453125q1.171875 0 1.796875 -0.46875q0.625 -0.453125 0.859375 -1.28125q0.125 -0.515625 0.109375 -2.15625q-1.09375 1.296875 -2.71875 1.296875q-2.03125 0 -3.15625 -1.46875q-1.109375 -1.46875 -1.109375 -3.515625q0 -1.40625 0.515625 -2.59375q0.515625 -1.203125 1.484375 -1.84375q0.96875 -0.65625 2.265625 -0.65625q1.75 0 2.875 1.40625l0 -1.1875l1.546875 0l0 8.515625q0 2.3125 -0.46875 3.265625q-0.46875 0.96875 -1.484375 1.515625q-1.015625 0.5625 -2.5 0.5625q-1.765625 0 -2.859375 -0.796875q-1.078125 -0.796875 -1.03125 -2.390625zm1.375 -5.921875q0 1.953125 0.765625 2.84375q0.78125 0.890625 1.9375 0.890625q1.140625 0 1.921875 -0.890625q0.78125 -0.890625 0.78125 -2.78125q0 -1.8125 -0.8125 -2.71875q-0.796875 -0.921875 -1.921875 -0.921875q-1.109375 0 -1.890625 0.90625q-0.78125 0.890625 -0.78125 2.671875zm14.449646 5.109375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.551056 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.0312805 0 3.3125305 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.0781555 0.59375 -2.3750305 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625305 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.8281555 -0.9375 -2.0625305 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm8.656952 0q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.297607 4.921875l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0zm15.765625 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.9226074 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625z" fill-rule="nonzero"></path><path fill="#000000" d="m440.92703 620.7704q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm11.228302 -14.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm8.531982 0.8125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm10.625702 0.453125l1.59375 -0.15625q0.203125 1.140625 0.78125 1.65625q0.578125 0.5 1.484375 0.5q0.765625 0 1.34375 -0.34375q0.578125 -0.359375 0.953125 -0.953125q0.375 -0.59375 0.625 -1.59375q0.25 -1.0 0.25 -2.03125q0 -0.109375 -0.015625 -0.34375q-0.5 0.796875 -1.375 1.296875q-0.859375 0.5 -1.875 0.5q-1.6875 0 -2.859375 -1.21875q-1.171875 -1.234375 -1.171875 -3.234375q0 -2.078125 1.21875 -3.328125q1.234375 -1.265625 3.0625 -1.265625q1.328125 0 2.421875 0.71875q1.109375 0.703125 1.671875 2.03125q0.578125 1.328125 0.578125 3.828125q0 2.609375 -0.578125 4.15625q-0.5625 1.546875 -1.6875 2.359375q-1.109375 0.796875 -2.609375 0.796875q-1.59375 0 -2.609375 -0.890625q-1.0 -0.890625 -1.203125 -2.484375zm6.828125 -6.0q0 -1.4375 -0.765625 -2.28125q-0.765625 -0.859375 -1.84375 -0.859375q-1.109375 0 -1.9375 0.921875q-0.828125 0.90625 -0.828125 2.34375q0 1.3125 0.78125 2.125q0.796875 0.796875 1.9375 0.796875q1.171875 0 1.90625 -0.796875q0.75 -0.8125 0.75 -2.25zm5.8601074 1.765625q-1.046875 -0.375 -1.546875 -1.078125q-0.5 -0.71875 -0.5 -1.703125q0 -1.484375 1.0625 -2.484375q1.078125 -1.015625 2.84375 -1.015625q1.78125 0 2.859375 1.03125q1.09375 1.03125 1.09375 2.515625q0 0.953125 -0.5 1.65625q-0.484375 0.703125 -1.5 1.078125q1.25 0.40625 1.90625 1.3125q0.65625 0.90625 0.65625 2.171875q0 1.75 -1.234375 2.9375q-1.234375 1.1875 -3.25 1.1875q-2.015625 0 -3.25 -1.1875q-1.234375 -1.203125 -1.234375 -2.984375q0 -1.328125 0.671875 -2.21875q0.671875 -0.890625 1.921875 -1.21875zm-0.328125 -2.828125q0 0.96875 0.609375 1.578125q0.625 0.609375 1.625 0.609375q0.953125 0 1.5625 -0.609375q0.625 -0.609375 0.625 -1.484375q0 -0.921875 -0.640625 -1.546875q-0.625 -0.625 -1.578125 -0.625q-0.953125 0 -1.578125 0.609375q-0.625 0.609375 -0.625 1.46875zm-0.546875 6.28125q0 0.71875 0.328125 1.390625q0.34375 0.65625 1.015625 1.03125q0.671875 0.359375 1.4375 0.359375q1.203125 0 1.984375 -0.765625q0.78125 -0.78125 0.78125 -1.96875q0 -1.203125 -0.8125 -1.984375q-0.796875 -0.796875 -2.0 -0.796875q-1.1875 0 -1.96875 0.78125q-0.765625 0.78125 -0.765625 1.953125zm8.688202 0.328125l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm9.719482 3.59375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm16.265625 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.6413574 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m129.09448 420.41733l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m129.09448 420.41733l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m147.40158 447.3373l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm21.837677 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.8913422 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm7.832321 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm4.129196 3.78125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm8.828842 4.875l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.191696 -11.6875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm10.566696 -3.609375l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm9.328125 2.390625q-0.9375 0.796875 -1.796875 1.125q-0.859375 0.3125 -1.84375 0.3125q-1.609375 0 -2.484375 -0.78125q-0.875 -0.796875 -0.875 -2.03125q0 -0.734375 0.328125 -1.328125q0.328125 -0.59375 0.859375 -0.953125q0.53125 -0.359375 1.203125 -0.546875q0.5 -0.140625 1.484375 -0.25q2.03125 -0.25 2.984375 -0.578125q0 -0.34375 0 -0.4375q0 -1.015625 -0.46875 -1.4375q-0.640625 -0.5625 -1.90625 -0.5625q-1.171875 0 -1.734375 0.40625q-0.5625 0.40625 -0.828125 1.46875l-1.640625 -0.234375q0.234375 -1.046875 0.734375 -1.6875q0.515625 -0.640625 1.46875 -0.984375q0.96875 -0.359375 2.25 -0.359375q1.265625 0 2.046875 0.296875q0.78125 0.296875 1.15625 0.75q0.375 0.453125 0.515625 1.140625q0.09375 0.421875 0.09375 1.53125l0 2.234375q0 2.328125 0.09375 2.953125q0.109375 0.609375 0.4375 1.171875l-1.75 0q-0.265625 -0.515625 -0.328125 -1.21875zm-0.140625 -3.71875q-0.90625 0.359375 -2.734375 0.625q-1.03125 0.140625 -1.453125 0.328125q-0.421875 0.1875 -0.65625 0.546875q-0.234375 0.359375 -0.234375 0.796875q0 0.671875 0.5 1.125q0.515625 0.4375 1.484375 0.4375q0.96875 0 1.71875 -0.421875q0.75 -0.4375 1.109375 -1.15625q0.265625 -0.578125 0.265625 -1.671875l0 -0.609375zm7.735092 3.4375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5041962 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm18.746506 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.572052 -7.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141357 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm3.156952 5.609375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m129.09448 576.29395l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m129.09448 576.29395l180.0 0l0 42.11023l-180.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m171.36136 603.214l5.234375 -13.59375l1.9375 0l5.5625 13.59375l-2.046875 0l-1.59375 -4.125l-5.6875 0l-1.484375 4.125l-1.921875 0zm3.921875 -5.578125l4.609375 0l-1.40625 -3.78125q-0.65625 -1.703125 -0.96875 -2.8125q-0.265625 1.3125 -0.734375 2.59375l-1.5 4.0zm16.193573 5.578125l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm15.656967 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm9.281967 -6.765625l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm7.785446 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5426788 -10.1875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.5041962 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm18.746521 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm8.853302 -4.0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.641327 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 597.34644l-79.40158 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 597.34644l-73.40158 0" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m315.09186 595.6947l-4.538086 1.6517334l4.538086 1.6517334z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m388.49344 441.47244l-79.40158 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m388.49344 441.47244l-73.40158 0" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m315.09186 439.8207l-4.538086 1.6517334l4.538086 1.6517334z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 462.52756l0 31.84253" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 462.52756l0 25.84253" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 488.3701l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m561.5 51.755962l31.99347 0l0 545.57477l-25.001343 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m561.5 51.755962l31.99347 0l0 545.57477l-25.001343 0" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m478.49213 72.81108l0 339.1496" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m478.49213 72.81108l0 333.1496" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m476.8404 405.96066l1.6517334 4.5381165l1.6517334 -4.5381165z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m590.00525 597.4094l-21.51184 -0.06298828" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m590.00525 597.4094l-15.511841 -0.045410156" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m574.4982 595.7123l-4.5429077 1.6384277l4.533264 1.6650391z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m109.09449 494.357l220.0 0l0 42.11023l-220.0 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m109.09449 494.357l220.0 0l0 42.11023l-220.0 0z" fill-rule="evenodd"></path><path fill="#000000" d="m126.81095 521.277l0 -13.59375l9.171867 0l0 1.59375l-7.375 0l0 4.21875l6.375 0l0 1.609375l-6.375 0l0 6.171875l-1.7968674 0zm17.536598 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.8913422 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.144821 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.097946 3.796875l-0.171875 -1.5625q0.546875 0.140625 0.953125 0.140625q0.546875 0 0.875 -0.1875q0.34375 -0.1875 0.5625 -0.515625q0.15625 -0.25 0.5 -1.25q0.046875 -0.140625 0.15625 -0.40625l-3.734375 -9.875l1.796875 0l2.046875 5.71875q0.40625 1.078125 0.71875 2.28125q0.28125 -1.15625 0.6875 -2.25l2.09375 -5.75l1.671875 0l-3.75 10.03125q-0.59375 1.625 -0.9375 2.234375q-0.4375 0.828125 -1.015625 1.203125q-0.578125 0.390625 -1.375 0.390625q-0.484375 0 -1.078125 -0.203125zm19.328125 -8.5625l1.796875 0.453125q-0.5625 2.21875 -2.03125 3.390625q-1.46875 1.15625 -3.59375 1.15625q-2.203125 0 -3.578125 -0.890625q-1.375 -0.90625 -2.09375 -2.59375q-0.71875 -1.703125 -0.71875 -3.65625q0 -2.125 0.796875 -3.703125q0.8125 -1.578125 2.3125 -2.390625q1.5 -0.828125 3.296875 -0.828125q2.046875 0 3.4375 1.046875q1.390625 1.03125 1.9375 2.90625l-1.765625 0.421875q-0.46875 -1.484375 -1.375 -2.15625q-0.90625 -0.6875 -2.265625 -0.6875q-1.5625 0 -2.625 0.75q-1.046875 0.75 -1.484375 2.03125q-0.421875 1.265625 -0.421875 2.609375q0 1.734375 0.5 3.03125q0.515625 1.28125 1.578125 1.921875q1.078125 0.640625 2.3125 0.640625q1.515625 0 2.5625 -0.859375q1.046875 -0.875 1.421875 -2.59375zm2.9260712 -0.15625q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281967 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375717 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm17.125717 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547592 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm8.277054 -1.671875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500717 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm17.637161 8.921875q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.572052 -7.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm19.141327 1.984375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm0.9538574 1.609375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm16.265625 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm5.641327 4.0l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m219.09448 536.4672l0 39.811035" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.09448 536.4672l0 33.811035" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m217.44275 570.27826l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path></g></svg>
 
-- 
GitLab


From dae0a14311ed958e114932a5abcb2de8d20f95fb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 18:33:17 -0800
Subject: [PATCH 0093/1225] [tf.data] Saveable Iterator for
 dataset.ignore_errors(..)

PiperOrigin-RevId: 176190698
---
 .../dataset_serialization_test_base.py        |  2 +
 .../kernel_tests/map_dataset_op_test.py       | 16 +++++++
 .../core/kernels/ignore_errors_dataset_op.cc  | 44 +++++++++++++++++--
 3 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py
index a24a16a5f8..bf25cc60a1 100644
--- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py
+++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py
@@ -233,6 +233,7 @@ class DatasetSerializationTestBase(test.TestCase):
           ds_fn, sparse_tensors=sparse_tensors)
       with self.test_session(graph=g) as sess:
         self._restore(saver, sess)
+        sess.run(variables.global_variables_initializer())
         sess.run(init_op)
         for _ in range(num_outputs):
           actual.append(sess.run(get_next_op))
@@ -494,6 +495,7 @@ class DatasetSerializationTestBase(test.TestCase):
               sess.run(init_op)
             self._restore(saver, sess)
           else:
+            sess.run(variables.global_variables_initializer())
             sess.run(init_op)
           start = break_points[i - 1] if i > 0 else 0
           end = break_points[i] if i < len(break_points) else num_outputs
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
index 3c07a5571a..514b08b874 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
@@ -777,5 +777,21 @@ class MapDatasetSerializationTest(
     self.run_core_tests(_build_ds, None, num_outputs)
 
 
+class IgnoreErrorsSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_ds(self, components):
+    return dataset_ops.Dataset.from_tensor_slices(components).map(
+        lambda x: array_ops.check_numerics(x, "message")).apply(
+            error_ops.ignore_errors())
+
+  def testIgnoreErrorsCore(self):
+    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
+    diff_components = np.array([1., 2., 3., np.nan]).astype(np.float32)
+    num_outputs = 4
+    self.run_core_tests(lambda: self._build_ds(components),
+                        lambda: self._build_ds(diff_components), num_outputs)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/core/kernels/ignore_errors_dataset_op.cc b/tensorflow/core/kernels/ignore_errors_dataset_op.cc
index 568e7ade0e..43ba5ab7dd 100644
--- a/tensorflow/core/kernels/ignore_errors_dataset_op.cc
+++ b/tensorflow/core/kernels/ignore_errors_dataset_op.cc
@@ -32,13 +32,14 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    *output = new Dataset(input);
+    *output = new Dataset(ctx, input);
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    explicit Dataset(const DatasetBase* input) : input_(input) {
+    explicit Dataset(OpKernelContext* ctx, const DatasetBase* input)
+        : GraphDatasetBase(ctx), input_(input) {
       input_->Ref();
     }
 
@@ -59,6 +60,15 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
 
     string DebugString() override { return "IgnoreErrorsDatasetOp::Dataset"; }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+      TF_RETURN_IF_ERROR(b->AddDataset(this, {input_graph_node}, output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -69,16 +79,42 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
       Status GetNextInternal(IteratorContext* ctx,
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
+        if (!input_impl_) {
+          *end_of_sequence = true;
+          return Status::OK();
+        }
         Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
         while (!s.ok()) {
           out_tensors->clear();
           s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
         }
+        if (*end_of_sequence) {
+          input_impl_.reset();
+        }
+        return Status::OK();
+      }
+
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        if (input_impl_)
+          TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        else
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("input_impls_empty"), ""));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(OpKernelContext* ctx,
+                             IteratorStateReader* reader) override {
+        if (reader->Contains(full_name("input_impls_empty")))
+          input_impl_.reset();
+        else
+          TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
         return Status::OK();
       }
 
      private:
-      const std::unique_ptr<IteratorBase> input_impl_;
+      std::unique_ptr<IteratorBase> input_impl_;
     };
 
     const DatasetBase* const input_;
-- 
GitLab


From 7fd7d75e300b5776a448f7083fe0fd05ba415fb5 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 17 Nov 2017 18:37:29 -0800
Subject: [PATCH 0094/1225] Add VLOG for optimizer return status.

PiperOrigin-RevId: 176190934
---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 1fa639ad33..d2df8cacb7 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -134,6 +134,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
             ". Graph size after: ", optimized_graph->node_size());
       }
       result_.push_back(std::make_pair(optimizer->name(), result));
+      VLOG(1) << "Optimizer " << optimizer->name()
+              << " return status: " << result;
     } else {
       GrapplerItem optimized_item(item, std::move(*optimized_graph));
       auto status =
@@ -152,6 +154,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
             ". Graph size after: ", optimized_graph->node_size());
       }
       result_.push_back(std::make_pair(optimizer->name(), result));
+      VLOG(1) << "Optimizer " << optimizer->name()
+              << " return status: " << result;
     }
   }
 
-- 
GitLab


From 9fd424e4871e5a60b6e1985d70c31960a2df80d8 Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Fri, 17 Nov 2017 20:14:51 -0800
Subject: [PATCH 0095/1225] Make sure local_variables_initializer returns a
 no_op in Eager mode.

PiperOrigin-RevId: 176197040
---
 tensorflow/python/ops/variables.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index f906b7b3c4..e9b1c67d16 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -1447,6 +1447,8 @@ def local_variables_initializer():
   Returns:
     An Op that initializes all local variables in the graph.
   """
+  if context.in_eager_mode():
+    return control_flow_ops.no_op(name="local_variables_initializer")
   return variables_initializer(local_variables())
 
 
-- 
GitLab


From 091291b70b567a37d33bf03b71bee9715e7a80bb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 17 Nov 2017 21:35:52 -0800
Subject: [PATCH 0096/1225] In WALSMatrixFactorization: moves the op that
 updates the global_step to a session run hook.

PiperOrigin-RevId: 176200549
---
 tensorflow/contrib/factorization/BUILD        |  1 -
 .../contrib/factorization/python/ops/wals.py  | 34 +++++++++++--------
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD
index 29a0a4221a..fe86a20ab1 100644
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD
@@ -270,7 +270,6 @@ tf_py_test(
         "manual",
         "noasan",  # times out b/63678675
         "nomsan",
-        "notsan",  # b/69374301
     ],
 )
 
diff --git a/tensorflow/contrib/factorization/python/ops/wals.py b/tensorflow/contrib/factorization/python/ops/wals.py
index b2f22eb2fc..2bde3e0dd7 100644
--- a/tensorflow/contrib/factorization/python/ops/wals.py
+++ b/tensorflow/contrib/factorization/python/ops/wals.py
@@ -77,6 +77,7 @@ class _SweepHook(session_run_hook.SessionRunHook):
       logging.info("SweepHook running init op.")
       sess.run(self._init_op)
     if is_sweep_done:
+      logging.info("SweepHook starting the next sweep.")
       sess.run(self._switch_op)
     is_row_sweep = sess.run(self._is_row_sweep_var)
     if is_sweep_done or not self._is_initialized:
@@ -91,6 +92,22 @@ class _SweepHook(session_run_hook.SessionRunHook):
         fetches=[self._row_train_op if is_row_sweep else self._col_train_op])
 
 
+class _IncrementGlobalStepHook(session_run_hook.SessionRunHook):
+  """Hook that increments the global step."""
+
+  def __init__(self):
+    global_step = training_util.get_global_step()
+    if global_step:
+      self._global_step_incr_op = state_ops.assign_add(
+          global_step, 1, name="global_step_incr").op
+    else:
+      self._global_step_incr_op = None
+
+  def before_run(self, run_context):
+    if self._global_step_incr_op:
+      run_context.session.run(self._global_step_incr_op)
+
+
 class _StopAtSweepHook(session_run_hook.SessionRunHook):
   """Hook that requests stop at a given sweep."""
 
@@ -210,14 +227,6 @@ def _wals_factorization_model_function(features, labels, mode, params):
     summary.scalar("root_weighted_squared_error", rwse_var)
     summary.scalar("completed_sweeps", completed_sweeps_var)
 
-    # Increments global step.
-    global_step = training_util.get_global_step()
-    if global_step:
-      global_step_incr_op = state_ops.assign_add(
-          global_step, 1, name="global_step_incr").op
-    else:
-      global_step_incr_op = control_flow_ops.no_op()
-
     def create_axis_ops(sp_input, num_items, update_fn, axis_name):
       """Creates book-keeping and training ops for a given axis.
 
@@ -246,9 +255,6 @@ def _wals_factorization_model_function(features, labels, mode, params):
             collections=[ops.GraphKeys.GLOBAL_VARIABLES],
             trainable=False,
             name="processed_" + axis_name)
-      reset_processed_items_op = state_ops.assign(
-          processed_items, processed_items_init,
-          name="reset_processed_" + axis_name)
       _, update_op, loss, reg, sum_weights = update_fn(sp_input)
       input_indices = sp_input.indices[:, 0]
       with ops.control_dependencies([
@@ -264,13 +270,12 @@ def _wals_factorization_model_function(features, labels, mode, params):
         with ops.control_dependencies([update_processed_items]):
           is_sweep_done = math_ops.reduce_all(processed_items)
           axis_train_op = control_flow_ops.group(
-              global_step_incr_op,
               state_ops.assign(is_sweep_done_var, is_sweep_done),
               state_ops.assign_add(
                   completed_sweeps_var,
                   math_ops.cast(is_sweep_done, dtypes.int32)),
               name="{}_sweep_train_op".format(axis_name))
-      return reset_processed_items_op, axis_train_op
+      return processed_items.initializer, axis_train_op
 
     reset_processed_rows_op, row_train_op = create_axis_ops(
         input_rows,
@@ -296,7 +301,8 @@ def _wals_factorization_model_function(features, labels, mode, params):
     sweep_hook = _SweepHook(
         is_row_sweep_var, is_sweep_done_var, init_op,
         row_prep_ops, col_prep_ops, row_train_op, col_train_op, switch_op)
-    training_hooks = [sweep_hook]
+    global_step_hook = _IncrementGlobalStepHook()
+    training_hooks = [sweep_hook, global_step_hook]
     if max_sweeps is not None:
       training_hooks.append(_StopAtSweepHook(max_sweeps))
 
-- 
GitLab


From 1ffb45e7112addad325084084b93897918ed9a54 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 17 Nov 2017 22:00:34 -0800
Subject: [PATCH 0097/1225] Use the op type utility.

PiperOrigin-RevId: 176201390
---
 tensorflow/core/grappler/op_types.cc          | 89 ++++++++++++++++++-
 tensorflow/core/grappler/op_types.h           | 19 +++-
 .../grappler/optimizers/layout_optimizer.cc   | 72 ++++++++-------
 3 files changed, 146 insertions(+), 34 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 69bdef33c6..48b17fd20f 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -20,14 +20,29 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
+bool IsAdd(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Add";
+}
+
 bool IsAddN(const NodeDef& node) {
   const auto op = node.op();
   return op == "AddN";
 }
 
-bool IsConcat(const NodeDef& node) {
+bool IsAvgPoolGrad(const NodeDef& node) {
   const auto op = node.op();
-  return op == "Concat" || op == "ConcatV2";
+  return op == "AvgPoolGrad";
+}
+
+bool IsBiasAddGrad(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "BiasAddGrad";
+}
+
+bool IsConcatOffset(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "ConcatOffset";
 }
 
 bool IsConstant(const NodeDef& node) {
@@ -35,6 +50,21 @@ bool IsConstant(const NodeDef& node) {
   return op == "Const";
 }
 
+bool IsConv2D(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Conv2D";
+}
+
+bool IsConv2DBackpropFilter(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Conv2DBackpropFilter";
+}
+
+bool IsConv2DBackpropInput(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Conv2DBackpropInput";
+}
+
 bool IsDequeueOp(const NodeDef& node) {
   const auto& op = node.op();
   return op == "QueueDequeueManyV2" || op == "QueueDequeueMany" ||
@@ -52,6 +82,16 @@ bool IsExit(const NodeDef& node) {
   return op == "Exit" || op == "RefExit";
 }
 
+bool IsFloorMod(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "FloorMod";
+}
+
+bool IsFusedBatchNormGradV1(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "FusedBatchNormGrad";
+}
+
 bool IsIdentity(const NodeDef& node) {
   const auto& op = node.op();
   return op == "Identity" || op == "RefIdentity";
@@ -62,6 +102,11 @@ bool IsMerge(const NodeDef& node) {
   return op == "Merge" || op == "RefMerge";
 }
 
+bool IsMul(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Mul";
+}
+
 bool IsNoOp(const NodeDef& node) {
   const auto op = node.op();
   return op == "NoOp";
@@ -72,12 +117,27 @@ bool IsNextIteration(const NodeDef& node) {
   return op == "NextIteration" || op == "RefNextIteration";
 }
 
+bool IsPad(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Pad";
+}
+
 bool IsPlaceholder(const NodeDef& node) {
   const auto op = node.op();
   return op == "Placeholder" || op == "PlaceholderV2" ||
          op == "PlaceholderWithDefault";
 }
 
+bool IsRealDiv(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "RealDiv";
+}
+
+bool IsReluGrad(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "ReluGrad";
+}
+
 bool IsRecv(const NodeDef& node) {
   const auto op = node.op();
   return op == "_Recv";
@@ -101,11 +161,36 @@ bool IsSend(const NodeDef& node) {
   return op == "_Send";
 }
 
+bool IsSlice(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Slice";
+}
+
+bool IsSquaredDifference(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "SquaredDifference";
+}
+
+bool IsSqueeze(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Squeeze";
+}
+
 bool IsStopGradient(const NodeDef& node) {
   const auto& op = node.op();
   return op == "StopGradient" || op == "PreventGradient";
 }
 
+bool IsSub(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Sub";
+}
+
+bool IsSum(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Sum";
+}
+
 bool IsSwitch(const NodeDef& node) {
   const auto& op = node.op();
   return op == "Switch" || op == "RefSwitch";
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index a7c556c1ed..17ba3603c5 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -21,23 +21,40 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
+bool IsAdd(const NodeDef& node);
 bool IsAddN(const NodeDef& node);
-bool IsConcat(const NodeDef& node);
+bool IsAvgPoolGrad(const NodeDef& node);
+bool IsBiasAddGrad(const NodeDef& node);
+bool IsConcatOffset(const NodeDef& node);
 bool IsConstant(const NodeDef& node);
+bool IsConv2D(const NodeDef& node);
+bool IsConv2DBackpropFilter(const NodeDef& node);
+bool IsConv2DBackpropInput(const NodeDef& node);
 bool IsDequeueOp(const NodeDef& node);
 bool IsEnter(const NodeDef& node);
 bool IsExit(const NodeDef& node);
+bool IsFloorMod(const NodeDef& node);
+bool IsFusedBatchNormGradV1(const NodeDef& node);
 bool IsIdentity(const NodeDef& node);
 bool IsMerge(const NodeDef& node);
+bool IsMul(const NodeDef& node);
 bool IsNextIteration(const NodeDef& node);
+bool IsPad(const NodeDef& node);
 bool IsNoOp(const NodeDef& node);
 bool IsPlaceholder(const NodeDef& node);
+bool IsRealDiv(const NodeDef& node);
+bool IsReluGrad(const NodeDef& node);
 bool IsRecv(const NodeDef& node);
 bool IsReduction(const NodeDef& node);
 bool IsReshape(const NodeDef& node);
 bool IsRestore(const NodeDef& node);
 bool IsSend(const NodeDef& node);
+bool IsSlice(const NodeDef& node);
+bool IsSquaredDifference(const NodeDef& node);
+bool IsSqueeze(const NodeDef& node);
 bool IsStopGradient(const NodeDef& node);
+bool IsSub(const NodeDef& node);
+bool IsSum(const NodeDef& node);
 bool IsSwitch(const NodeDef& node);
 bool IsTranspose(const NodeDef& node);
 bool IsVariable(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index e363b8f27b..f186fdb895 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -88,6 +88,21 @@ bool IsNodeNCHWToNHWC(const string& node_name) {
   return false;
 }
 
+bool IsConcat(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Concat" || op == "ConcatV2";
+}
+
+bool IsConcatV1(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "Concat";
+}
+
+bool IsMaxPoolGradV1(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "MaxPoolGrad";
+}
+
 class GraphProcessor {
  public:
   GraphProcessor(GraphDef* graph, NodeMap* node_map)
@@ -668,7 +683,7 @@ class AgnosticNodeProcessor : public NodeProcessor {
     auto node = node_map_->GetNode(node_->name());
     while (node->input_size() > 0) {
       int data_input_pos = 0;
-      if (node->op().compare("Concat") == 0) {
+      if (IsConcatV1(*node)) {
         data_input_pos = 1;
       }
       node = node_map_->GetNode(node->input(data_input_pos));
@@ -815,8 +830,7 @@ class ConcatProcessor : public AgnosticNodeProcessor {
       : AgnosticNodeProcessor(opt_cxt) {
     // For Concat,  the concat axis is the first input; for ConcatV2,
     // the last input.
-    axis_node_pos_ =
-        (node_->op().compare("Concat") == 0) ? 0 : (node_->input_size() - 1);
+    axis_node_pos_ = (IsConcatV1(*node_)) ? 0 : (node_->input_size() - 1);
   }
 
  protected:
@@ -827,9 +841,9 @@ class ConcatProcessor : public AgnosticNodeProcessor {
 
   std::vector<int> GetInputPos() const override {
     std::vector<int> input_pos;
-    int start = (node_->op().compare("Concat") == 0) ? 1 : 0;
-    int end = (node_->op().compare("Concat") == 0) ? node_->input_size()
-                                                   : (node_->input_size() - 1);
+    int start = (IsConcatV1(*node_)) ? 1 : 0;
+    int end =
+        (IsConcatV1(*node_)) ? node_->input_size() : (node_->input_size() - 1);
     for (int i = start; i < end; i++) {
       input_pos.push_back(i);
     }
@@ -1050,17 +1064,17 @@ class SliceProcessorConcatOffset : public AgnosticNodeProcessor {
   Status CustomizedProcessing() override {
     auto maybe_concatoffset_node =
         node_map_->GetNode(NodeName(node_->input(1)));
-    if (maybe_concatoffset_node->op() == "ConcatOffset") {
+    if (IsConcatOffset(*maybe_concatoffset_node)) {
       auto maybe_axis_node =
           node_map_->GetNode(maybe_concatoffset_node->input(0));
       NodeDef* axis_node;
-      if (maybe_axis_node->op() == "Const") {
+      if (IsConstant(*maybe_axis_node)) {
         axis_node = maybe_axis_node;
         // A FloorMod node might be added between ConcatOffset and the concat
         // dimension const node to handle a negative dimension index -1, meaning
         // the last dimension, which is consistent with the python's notation
         // for negative index.
-      } else if (maybe_axis_node->op() == "FloorMod") {
+      } else if (IsFloorMod(*maybe_axis_node)) {
         axis_node = node_map_->GetNode(maybe_axis_node->input(0));
       } else {
         return Status(error::INVALID_ARGUMENT,
@@ -1263,21 +1277,21 @@ class DataLayoutOptimizer : GraphProcessor {
         bool is_in_frame = !frames[node].empty();
         OptimizeContext opt_cxt(graph_, node, node_map_, is_in_frame);
         std::unique_ptr<NodeProcessor> node_processor;
-        if (node->op().compare("AvgPoolGrad") == 0) {
+        if (IsAvgPoolGrad(*node)) {
           node_processor.reset(new AvgPoolGradProcessor(opt_cxt));
-        } else if (node->op().compare("BiasAddGrad") == 0) {
+        } else if (IsBiasAddGrad(*node)) {
           node_processor.reset(new BiasAddGradProcessor(opt_cxt));
-        } else if (node->op().compare("Conv2D") == 0) {
+        } else if (IsConv2D(*node)) {
           node_processor.reset(new Conv2DProcessor(opt_cxt, config_.no_gemm));
-        } else if (node->op().compare("Conv2DBackpropFilter") == 0) {
+        } else if (IsConv2DBackpropFilter(*node)) {
           node_processor.reset(
               new Conv2DBackpropFilterProcessor(opt_cxt, config_.no_gemm));
-        } else if (node->op().compare("Conv2DBackpropInput") == 0) {
+        } else if (IsConv2DBackpropInput(*node)) {
           node_processor.reset(
               new Conv2DBackpropInputProcessor(opt_cxt, config_.no_gemm));
-        } else if (node->op().compare("FusedBatchNormGrad") == 0) {
+        } else if (IsFusedBatchNormGradV1(*node)) {
           node_processor.reset(new FusedBatchNormGradProcessor(opt_cxt));
-        } else if (node->op().compare("MaxPoolGrad") == 0) {
+        } else if (IsMaxPoolGradV1(*node)) {
           node_processor.reset(new MaxPoolGradProcessor(opt_cxt));
         } else {
           node_processor.reset(new NodeProcessor(opt_cxt));
@@ -1303,34 +1317,30 @@ class DataLayoutOptimizer : GraphProcessor {
           bool is_in_frame = !frames[node].empty();
           OptimizeContext opt_cxt(graph_, node, node_map_, is_in_frame);
           std::unique_ptr<NodeProcessor> node_processor;
-          if (node->op().compare("AddN") == 0) {
+          if (IsAddN(*node)) {
             node_processor.reset(new AddNProcessor(opt_cxt));
-          } else if (node->op().compare("Add") == 0 ||
-                     node->op().compare("Mul") == 0 ||
-                     node->op().compare("RealDiv") == 0 ||
-                     node->op().compare("SquaredDifference") == 0 ||
-                     node->op().compare("Sub") == 0) {
+          } else if (IsAdd(*node) || IsMul(*node) || IsRealDiv(*node) ||
+                     IsSquaredDifference(*node) || IsSub(*node)) {
             node_processor.reset(new BinaryOpProcessor(opt_cxt));
-          } else if (node->op().compare("Concat") == 0 ||
-                     node->op().compare("ConcatV2") == 0) {
+          } else if (IsConcat(*node)) {
             node_processor.reset(new ConcatProcessor(opt_cxt));
-          } else if (node->op().compare("Pad") == 0) {
+          } else if (IsPad(*node)) {
             node_processor.reset(new PadProcessor(opt_cxt));
-          } else if (node->op().compare("ReluGrad") == 0) {
+          } else if (IsReluGrad(*node)) {
             node_processor.reset(new ReluGradProcessor(opt_cxt));
-          } else if (node->op().compare("Slice") == 0) {
+          } else if (IsSlice(*node)) {
             auto input1 = node_map_->GetNode(NodeName(node->input(1)));
             auto input2 = node_map_->GetNode(NodeName(node->input(2)));
-            if (input1->op() == "ConcatOffset") {
+            if (IsConcatOffset(*input1)) {
               node_processor.reset(new SliceProcessorConcatOffset(opt_cxt));
-            } else if (input1->op() == "Const" && input2->op() == "Const") {
+            } else if (IsConstant(*input1) && IsConstant(*input2)) {
               node_processor.reset(new SliceProcessorConst(opt_cxt));
             } else {
               node_processor.reset(new SliceProcessor(opt_cxt));
             }
-          } else if (node->op().compare("Squeeze") == 0) {
+          } else if (IsSqueeze(*node)) {
             node_processor.reset(new SqueezeProcessor(opt_cxt));
-          } else if (node->op().compare("Sum") == 0) {
+          } else if (IsSum(*node)) {
             node_processor.reset(new SumProcessor(opt_cxt));
           } else {
             node_processor.reset(new AgnosticNodeProcessor(opt_cxt));
-- 
GitLab


From 63a69b8c4a30f4458b2696993463d1c78b5c922e Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Fri, 17 Nov 2017 22:13:21 -0800
Subject: [PATCH 0098/1225] Added the ability to list the hardware resources
 available in a cluster.

PiperOrigin-RevId: 176202165
---
 .../core/protobuf/device_properties.proto     |  5 +++++
 tensorflow/python/grappler/cluster.i          | 21 +++++++++++++++++++
 tensorflow/python/grappler/cluster.py         | 12 +++++++++++
 3 files changed, 38 insertions(+)

diff --git a/tensorflow/core/protobuf/device_properties.proto b/tensorflow/core/protobuf/device_properties.proto
index 9b1497c710..3bd3015900 100644
--- a/tensorflow/core/protobuf/device_properties.proto
+++ b/tensorflow/core/protobuf/device_properties.proto
@@ -49,3 +49,8 @@ message DeviceProperties {
   // Memory bandwidth in KB/s
   int64 bandwidth = 13;
 }
+
+message NamedDevice {
+  string name = 1;
+  DeviceProperties properties = 2;
+}
diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i
index 3df9431282..1e06074188 100644
--- a/tensorflow/python/grappler/cluster.i
+++ b/tensorflow/python/grappler/cluster.i
@@ -41,12 +41,14 @@ limitations under the License.
 }
 
 %{
+#include <vector>
 #include "tensorflow/core/grappler/devices.h"
 #include "tensorflow/core/grappler/clusters/single_machine.h"
 #include "tensorflow/core/grappler/costs/graph_memory.h"
 #include "tensorflow/core/grappler/costs/op_performance_data.pb.h"
 #include "tensorflow/core/grappler/costs/measuring_cost_estimator.h"
 #include "tensorflow/core/grappler/costs/utils.h"
+#include "tensorflow/core/protobuf/device_properties.pb.h"
 
 static tensorflow::grappler::Cluster* TF_NewCluster(
     bool allow_soft_placement, bool disable_detailed_stats, TF_Status* out_status) {
@@ -85,6 +87,23 @@ tensorflow::Status _GetOpPerformanceDataAndRunTime(const tensorflow::grappler::G
   return tensorflow::Status::OK();
 }
 
+static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) {
+  const std::unordered_map<string, tensorflow::DeviceProperties>& devices = cluster->GetDevices();
+  PyObject* result = PyList_New(devices.size());
+  int i = 0;
+  for (auto& dev : devices) {
+    tensorflow::NamedDevice d;
+    d.set_name(dev.first);
+    *d.mutable_properties() = dev.second;
+    string dev_str = d.SerializeAsString();
+    PyObject* dev_obj = PyBytes_FromStringAndSize(dev_str.data(),
+                                                  dev_str.size());
+    PyList_SetItem(result, i, dev_obj);
+    ++i;
+  }
+  return result;
+}
+
 static PyObject* TF_MeasureCosts(
     const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster,
     bool generate_timeline, TF_Status* out_status) {
@@ -198,9 +217,11 @@ static PyObject* TF_DeterminePeakMemoryUsage(
 static tensorflow::grappler::Cluster* TF_NewCluster(
     bool allow_soft_placement, bool disable_detailed_stats, TF_Status* out_status);
 static void TF_DeleteCluster(tensorflow::grappler::Cluster* cluster);
+static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster);
 static PyObject* TF_MeasureCosts(
     const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster,
     bool generate_timeline, TF_Status* out_status);
 static PyObject* TF_DeterminePeakMemoryUsage(
     const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster,
     TF_Status* out_status);
+
diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py
index baac604f41..c6ddb803f4 100644
--- a/tensorflow/python/grappler/cluster.py
+++ b/tensorflow/python/grappler/cluster.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from tensorflow.core.framework import step_stats_pb2
 from tensorflow.core.grappler.costs import op_performance_data_pb2
+from tensorflow.core.protobuf import device_properties_pb2
 from tensorflow.python import pywrap_tensorflow as tf_cluster
 from tensorflow.python.framework import errors
 
@@ -51,6 +52,17 @@ class Cluster(object):
     if self._tf_cluster is not None:
       tf_cluster.TF_DeleteCluster(self._tf_cluster)
 
+  def ListDevices(self):
+    """Returns the list of available hardware devices."""
+    devices = []
+    if self._tf_cluster is not None:
+      ret_from_swig = tf_cluster.TF_ListDevices(self._tf_cluster)
+      devices = []
+      for raw_dev in ret_from_swig:
+        devices.append(device_properties_pb2.NamedDevice.FromString(raw_dev))
+    print(str(devices))
+    return devices
+
   def MeasureCosts(self, item):
     """Returns the cost of running the specified item.
 
-- 
GitLab


From 1002af67b9cca81a1cf700aaebf338e14c8c5c04 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 18 Nov 2017 02:32:30 -0800
Subject: [PATCH 0099/1225] Tuples weren't handled by the sharding validator.
 Add more tuple validation tests and improve the validation error messages
 given.

PiperOrigin-RevId: 176214090
---
 .../compiler/xla/service/hlo_sharding.cc      | 76 +++++++++++++++----
 .../compiler/xla/service/hlo_sharding.h       |  5 ++
 .../compiler/xla/service/hlo_sharding_test.cc | 14 +++-
 3 files changed, 80 insertions(+), 15 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
index 7356663454..d1adec31c2 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding.cc
@@ -160,7 +160,59 @@ bool HloSharding::HasUniqueDevice() const {
   }
 }
 
+Status HloSharding::ValidateTuple(const Shape& shape, int64 num_devices) const {
+  if (!ShapeUtil::IsTuple(shape)) {
+    return tensorflow::errors::InvalidArgument(
+        StrCat("Sharding is tuple-shaped but validation shape is not."));
+  }
+  // The easiest way to get the number of elements in a nested tuple is just to
+  // create a shape tree. We could call GetAsShapeTree, but that will try and
+  // apply our tuple_shardings_ to the shape tree, and that might cause a crash
+  // at this point as we haven't validated them.
+  ShapeTree<bool> bool_shape_tree(shape, false);
+  int64 num_leaves =
+      std::distance(bool_shape_tree.leaf_begin(), bool_shape_tree.leaf_end());
+  if (num_leaves != tuple_elements_.size()) {
+    return tensorflow::errors::InvalidArgument(
+        StrCat("Validation tuple shape has ", num_leaves,
+               " leaf elements, but this sharding contains ",
+               tuple_elements_.size(), " elements."));
+  }
+
+  // Now we've validated the number of tuple elements, it's safe to request a
+  // shape tree.
+  ShapeTree<HloSharding> shape_tree = GetAsShapeTree(shape);
+  for (const auto& index_to_sharding : shape_tree.leaves()) {
+    Status status = index_to_sharding.second.ValidateNonTuple(
+        ShapeUtil::GetSubshape(shape, index_to_sharding.first), num_devices);
+    if (!status.ok()) {
+      tensorflow::errors::AppendToMessage(
+          &status, StrCat("Note: While validating sharding tuple element ",
+                          index_to_sharding.first.ToString(), " which is ",
+                          index_to_sharding.second.ToString()));
+      return status;
+    }
+  }
+  return Status::OK();
+}
+
 Status HloSharding::Validate(const Shape& shape, int64 num_devices) const {
+  Status status = IsTuple() ? ValidateTuple(shape, num_devices)
+                            : ValidateNonTuple(shape, num_devices);
+  if (!status.ok()) {
+    tensorflow::errors::AppendToMessage(
+        &status, StrCat("Note: While validating sharding ", ToString(),
+                        " against shape ", ShapeUtil::HumanString(shape)));
+  }
+  return status;
+}
+
+Status HloSharding::ValidateNonTuple(const Shape& shape,
+                                     int64 num_devices) const {
+  if (ShapeUtil::IsTuple(shape)) {
+    return tensorflow::errors::InvalidArgument(
+        StrCat("Validation shape is a tuple but sharding is not."));
+  }
   if (replicated_) {
     return Status::OK();
   }
@@ -174,13 +226,11 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const {
         // Don't overwrite a bad status, so we report the first error.
         if (status.ok()) {
           if (core >= num_devices) {
-            status =
-                tensorflow::errors::InvalidArgument(tensorflow::strings::StrCat(
-                    "core ", core, " > ", num_devices, " in tile assignment"));
+            status = tensorflow::errors::InvalidArgument(StrCat(
+                "core ", core, " > ", num_devices, " in tile assignment"));
           } else if (seen_cores.count(core) != 0) {
-            status =
-                tensorflow::errors::InvalidArgument(tensorflow::strings::StrCat(
-                    "core ", core, " is not unique in tile assignment"));
+            status = tensorflow::errors::InvalidArgument(
+                StrCat("core ", core, " is not unique in tile assignment"));
           }
         }
         seen_cores.insert(core);
@@ -214,9 +264,9 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const {
     auto tile_dim = tile_shape_.dimensions(i);
     auto shape_dim = shape.dimensions(i);
     if (tile_dim > shape_dim) {
-      return tensorflow::errors::InvalidArgument(tensorflow::strings::StrCat(
-          "Tile is larger than input shape (dimension ", i, ", ", tile_dim,
-          " > ", shape_dim));
+      return tensorflow::errors::InvalidArgument(
+          StrCat("Tile is larger than input shape (dimension ", i, ", ",
+                 tile_dim, " > ", shape_dim));
     }
   }
 
@@ -226,10 +276,10 @@ Status HloSharding::Validate(const Shape& shape, int64 num_devices) const {
     int64 expected_dim =
         CeilOfRatio(shape.dimensions(i), tile_shape_.dimensions(i));
     if (tile_assignment_.dimensions()[i] != expected_dim) {
-      return tensorflow::errors::InvalidArgument(tensorflow::strings::StrCat(
-          "Tile assignment tensor has incorrect shape. Dimension ", i,
-          " expected ", expected_dim, " but got ",
-          tile_assignment_.dimensions()[i]));
+      return tensorflow::errors::InvalidArgument(
+          StrCat("Tile assignment tensor has incorrect shape. Dimension ", i,
+                 " expected ", expected_dim, " but got ",
+                 tile_assignment_.dimensions()[i]));
     }
   }
 
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h
index dbd16b7c9d..1a6988a2dc 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.h
+++ b/tensorflow/compiler/xla/service/hlo_sharding.h
@@ -222,6 +222,11 @@ class HloSharding {
         tile_assignment_({0}),
         tuple_elements_(tuple_shardings) {}
 
+  // Internal helper to validate a tuple sharding.
+  Status ValidateTuple(const Shape& shape, int64 num_devices) const;
+  // Internal helper to validate a non-tuple (leaf) sharding.
+  Status ValidateNonTuple(const Shape& shape, int64 num_devices) const;
+
   bool replicated_;
   bool maximal_;
   bool tuple_;
diff --git a/tensorflow/compiler/xla/service/hlo_sharding_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_test.cc
index 3161dda271..0c7487b3ac 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding_test.cc
@@ -145,11 +145,13 @@ TEST_F(HloShardingTest, NestedTuple) {
       ShapeUtil::MakeShape(F32, {4, 6}),
   });
 
+  HloSharding tiled_sharding = HloSharding::Tile(
+      ShapeUtil::MakeShape(F32, {4, 3}), Array<int64>({{0, 1}}));
   OpSharding proto;
   proto.set_type(OpSharding::Type::OpSharding_Type_TUPLE);
   *proto.add_tuple_shardings() = HloSharding::Replicate().ToProto();
   *proto.add_tuple_shardings() = HloSharding::AssignDevice(0).ToProto();
-  *proto.add_tuple_shardings() = HloSharding::AssignDevice(1).ToProto();
+  *proto.add_tuple_shardings() = tiled_sharding.ToProto();
   HloSharding tuple_sharding =
       HloSharding::FromProto(proto).ConsumeValueOrDie();
 
@@ -157,7 +159,15 @@ TEST_F(HloShardingTest, NestedTuple) {
       tuple_sharding.GetAsShapeTree(nested_tuple_shape);
   EXPECT_EQ(shape_tree.element({0}), HloSharding::Replicate());
   EXPECT_EQ(shape_tree.element({1, 0}), HloSharding::AssignDevice(0));
-  EXPECT_EQ(shape_tree.element({2}), HloSharding::AssignDevice(1));
+  EXPECT_EQ(shape_tree.element({2}), tiled_sharding);
+
+  EXPECT_IS_OK(tuple_sharding.Validate(nested_tuple_shape, /*num_devices=*/5));
+  // Test should fail because tuple element count does not match.
+  EXPECT_IS_NOT_OK(tuple_sharding.Validate(ShapeUtil::MakeTupleShape({}),
+                                           /*num_devices=*/5));
+  // Test should fail because the input type is not a tuple.
+  EXPECT_IS_NOT_OK(tuple_sharding.Validate(ShapeUtil::MakeShape(F32, {}),
+                                           /*num_devices=*/5));
 }
 
 TEST_F(HloShardingTest, Hash) {
-- 
GitLab


From b6b11e6c44019dc785fa5c957ab20f610b022623 Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Sat, 18 Nov 2017 07:13:27 -0800
Subject: [PATCH 0100/1225] Reduce Kokoro log output by 83%

-Winconsistent-missing-override caused this macro to output 1.5MB of
linter warnings, since it's expanded 45 times and included by 51 .cc
files.

PiperOrigin-RevId: 176224735
---
 .../lite/toco/graph_transformations/graph_transformations.h     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
index 2cc24ff361..9ad1b9622f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
@@ -108,7 +108,7 @@ void RunGraphTransformations(Model* model, const string& message,
   class GTName : public GraphTransformation {              \
    public:                                                 \
     bool Run(Model* model, std::size_t op_index) override; \
-    const char* Name() const { return #GTName; }           \
+    const char* Name() const override { return #GTName; }  \
   };
 
 // List of all graph transformations
-- 
GitLab


From 80e7c9f45c7911b97c1da92d0a40063f9d2ae50c Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Sat, 18 Nov 2017 23:52:32 -0800
Subject: [PATCH 0101/1225] Fix pip package tests. (#14685)

* Disable contrib/summary tests from pip runs.

They depend on a testonly package, so they break pip test runs for
nightly and releases.

* Disable reader_ops_test from pip tests.

It depends on a testonly module, which breaks nightly and release tests.

* Instead of disabling tests, merge test_internal package into test_util.

* Add import for sqlite3 to summary_test_util.

* Add functools import to summary_test_util.
---
 .../contrib/data/python/kernel_tests/BUILD    |  1 +
 tensorflow/contrib/summary/BUILD              | 15 +----
 .../contrib/summary/summary_ops_graph_test.py |  6 +-
 .../contrib/summary/summary_ops_test.py       |  7 +--
 .../contrib/summary/summary_test_internal.py  | 59 -------------------
 .../contrib/summary/summary_test_util.py      | 37 ++++++++++++
 .../tools/pip_package/pip_smoke_test.py       |  3 -
 7 files changed, 45 insertions(+), 83 deletions(-)
 delete mode 100644 tensorflow/contrib/summary/summary_test_internal.py

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 3d4e46408e..b947b450ce 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -326,6 +326,7 @@ py_test(
     size = "small",
     srcs = ["reader_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:readers",
diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD
index 3892654f25..cbe2d34d0d 100644
--- a/tensorflow/contrib/summary/BUILD
+++ b/tensorflow/contrib/summary/BUILD
@@ -25,7 +25,6 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":summary_ops",
-        ":summary_test_internal",
         ":summary_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:errors",
@@ -46,7 +45,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":summary_ops",
-        ":summary_test_internal",
+        ":summary_test_util",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:ops",
         "//tensorflow/python:platform",
@@ -111,15 +110,3 @@ py_library(
         "//tensorflow/python:platform",
     ],
 )
-
-py_library(
-    name = "summary_test_internal",
-    testonly = 1,
-    srcs = ["summary_test_internal.py"],
-    srcs_version = "PY2AND3",
-    visibility = ["//visibility:private"],
-    deps = [
-        "//tensorflow/python:lib",
-        "//tensorflow/python:platform",
-    ],
-)
diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index 8f85f67a25..3df87491ef 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py
@@ -19,17 +19,17 @@ from __future__ import print_function
 import six
 
 from tensorflow.contrib.summary import summary_ops
-from tensorflow.contrib.summary import summary_test_internal
+from tensorflow.contrib.summary import summary_test_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 from tensorflow.python.training import training_util
 
-get_all = summary_test_internal.get_all
+get_all = summary_test_util.get_all
 
 
-class DbTest(summary_test_internal.SummaryDbTest):
+class DbTest(summary_test_util.SummaryDbTest):
 
   def testGraphPassedToGraph_isForbiddenForThineOwnSafety(self):
     with self.assertRaises(TypeError):
diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py
index c5ca054f77..7c4c55bdb1 100644
--- a/tensorflow/contrib/summary/summary_ops_test.py
+++ b/tensorflow/contrib/summary/summary_ops_test.py
@@ -21,7 +21,6 @@ import tempfile
 import six
 
 from tensorflow.contrib.summary import summary_ops
-from tensorflow.contrib.summary import summary_test_internal
 from tensorflow.contrib.summary import summary_test_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
@@ -35,8 +34,8 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import gfile
 from tensorflow.python.training import training_util
 
-get_all = summary_test_internal.get_all
-get_one = summary_test_internal.get_one
+get_all = summary_test_util.get_all
+get_one = summary_test_util.get_one
 
 
 class TargetTest(test_util.TensorFlowTestCase):
@@ -110,7 +109,7 @@ class TargetTest(test_util.TensorFlowTestCase):
       self.assertEqual(events[1].summary.value[0].tag, 'scalar')
 
 
-class DbTest(summary_test_internal.SummaryDbTest):
+class DbTest(summary_test_util.SummaryDbTest):
 
   def testIntegerSummaries(self):
     step = training_util.create_global_step()
diff --git a/tensorflow/contrib/summary/summary_test_internal.py b/tensorflow/contrib/summary/summary_test_internal.py
deleted file mode 100644
index 54233f2f50..0000000000
--- a/tensorflow/contrib/summary/summary_test_internal.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Internal helpers for tests in this directory."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-import os
-import sqlite3
-
-from tensorflow.contrib.summary import summary_ops
-from tensorflow.python.framework import test_util
-
-
-class SummaryDbTest(test_util.TensorFlowTestCase):
-  """Helper for summary database testing."""
-
-  def setUp(self):
-    super(SummaryDbTest, self).setUp()
-    self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite')
-    if os.path.exists(self.db_path):
-      os.unlink(self.db_path)
-    self.db = sqlite3.connect(self.db_path)
-    self.create_summary_db_writer = functools.partial(
-        summary_ops.create_summary_db_writer,
-        db_uri=self.db_path,
-        experiment_name='experiment',
-        run_name='run',
-        user_name='user')
-
-  def tearDown(self):
-    self.db.close()
-    super(SummaryDbTest, self).tearDown()
-
-
-def get_one(db, q, *p):
-  return db.execute(q, p).fetchone()[0]
-
-
-def get_all(db, q, *p):
-  return unroll(db.execute(q, p).fetchall())
-
-
-def unroll(list_of_tuples):
-  return sum(list_of_tuples, ())
diff --git a/tensorflow/contrib/summary/summary_test_util.py b/tensorflow/contrib/summary/summary_test_util.py
index 794c5b8bab..94767c8df2 100644
--- a/tensorflow/contrib/summary/summary_test_util.py
+++ b/tensorflow/contrib/summary/summary_test_util.py
@@ -19,13 +19,38 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
 import os
+import sqlite3
 
+from tensorflow.contrib.summary import summary_ops
 from tensorflow.core.util import event_pb2
+from tensorflow.python.framework import test_util
 from tensorflow.python.lib.io import tf_record
 from tensorflow.python.platform import gfile
 
 
+class SummaryDbTest(test_util.TensorFlowTestCase):
+  """Helper for summary database testing."""
+
+  def setUp(self):
+    super(SummaryDbTest, self).setUp()
+    self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite')
+    if os.path.exists(self.db_path):
+      os.unlink(self.db_path)
+    self.db = sqlite3.connect(self.db_path)
+    self.create_summary_db_writer = functools.partial(
+        summary_ops.create_summary_db_writer,
+        db_uri=self.db_path,
+        experiment_name='experiment',
+        run_name='run',
+        user_name='user')
+
+  def tearDown(self):
+    self.db.close()
+    super(SummaryDbTest, self).tearDown()
+
+
 def events_from_file(filepath):
   """Returns all events in a single event file.
 
@@ -60,3 +85,15 @@ def events_from_logdir(logdir):
   files = gfile.ListDirectory(logdir)
   assert len(files) == 1, "Found not exactly one file in logdir: %s" % files
   return events_from_file(os.path.join(logdir, files[0]))
+
+
+def get_one(db, q, *p):
+  return db.execute(q, p).fetchone()[0]
+
+
+def get_all(db, q, *p):
+  return unroll(db.execute(q, p).fetchall())
+
+
+def unroll(list_of_tuples):
+  return sum(list_of_tuples, ())
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index 3677aaa886..cc46dd5162 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -66,9 +66,6 @@ BLACKLIST = [
     "//tensorflow/contrib/timeseries/examples:data/period_trend.csv",  # pylint:disable=line-too-long
     "//tensorflow/contrib/timeseries/python/timeseries:test_utils",
     "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils",  # pylint:disable=line-too-long
-
-    # TODO(yifeif): Remove when py_library(testonly=1) is ignored.
-    "//tensorflow/contrib/summary:summary_test_internal",
 ]
 
 
-- 
GitLab


From b3b95253f6c4dcdff24cf32dd7c9910401498b89 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Sun, 19 Nov 2017 09:12:30 -0800
Subject: [PATCH 0102/1225] Fix issues in replicate_model_fn_test.

PiperOrigin-RevId: 176280861
---
 tensorflow/contrib/estimator/BUILD                           | 1 -
 .../estimator/python/estimator/replicate_model_fn_test.py    | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index 008ca7a5d1..fe643659d8 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -297,5 +297,4 @@ cuda_py_test(
         "//tensorflow/python:variables",
         ":replicate_model_fn",
     ],
-    tags = ["requires-gpu-sm35"],
 )
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
index 5a1982f5eb..ffe69f89b4 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
@@ -843,9 +843,8 @@ class GetLocalDevicesTest(test_util.TensorFlowTestCase):
         replicate_model_fn._get_local_devices('XPU'))  # XPU doesn't exist.
 
   def test_whether_there_is_a_gpu(self):
-    self.assertEqual(
-        len(replicate_model_fn._get_local_devices('GPU')),
-        test.is_gpu_available())
+    if test.is_gpu_available():
+      self.assertTrue(len(replicate_model_fn._get_local_devices('GPU')))
 
 
 class LocalDeviceSetterTest(test_util.TensorFlowTestCase):
-- 
GitLab


From 2c5a88cbb002f6c8cb1df37e1e0d958543fd010b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 01:33:58 -0800
Subject: [PATCH 0103/1225] Strip explicit zero output index from array names.

PiperOrigin-RevId: 176327703
---
 .../contrib/lite/toco/import_tensorflow.cc    | 21 ++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index c889149ada..0135c3e2f9 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -19,6 +19,7 @@ limitations under the License.
 
 #include "google/protobuf/map.h"
 #include "google/protobuf/text_format.h"
+#include "absl/strings/match.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
@@ -1260,6 +1261,12 @@ void StripCaretFromArrayNames(Model* model) {
   }
 }
 
+void StripZeroOutputIndexFromInputs(NodeDef* node) {
+  for (auto& input : *node->mutable_input()) {
+    input = StripSuffixString(input, ":0");
+  }
+}
+
 void AddExtraOutputsFedIntoOtherOps(Model* model) {
   for (const auto& consumer_op : model->operators) {
     for (const string& input : consumer_op->inputs) {
@@ -1347,10 +1354,22 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(const ModelFlags& model_flags,
     LogDumpGraphDef(kLogLevelModelChanged, "AFTER INLINING", inlined_graph);
   }
 
+  // Check input and output specification.
+  for (const auto& specified_input_array : model_flags.input_arrays()) {
+    CHECK(!absl::EndsWith(specified_input_array.name(), ":0"))
+        << "Unsupported explicit zero output index: "
+        << specified_input_array.name();
+  }
+  for (const string& specified_output_array : model_flags.output_arrays()) {
+    CHECK(!absl::EndsWith(specified_output_array, ":0"))
+        << "Unsupported explicit zero output index: " << specified_output_array;
+  }
+
   Model* model = new Model;
   ResolveModelFlags(model_flags, model);
 
-  for (const auto& node : inlined_graph.node()) {
+  for (auto node : inlined_graph.node()) {
+    StripZeroOutputIndexFromInputs(&node);
     if (node.op() == "Const") {
       ConvertConstOperator(node, model);
     } else if (node.op() == "Conv2D") {
-- 
GitLab


From a9be434f9b6b4492ae990df1153d774a80dc0a3b Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Mon, 20 Nov 2017 02:26:49 -0800
Subject: [PATCH 0104/1225] Upstream LLVM moved headers from llvm/Target to
 llvm/CodeGen/.

PiperOrigin-RevId: 176331872
---
 tensorflow/compiler/xla/service/cpu/BUILD         | 1 +
 tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 78216f2ffb..6b62ee5ee7 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -273,6 +273,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:ops",
         "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
         "//tensorflow/core:lib",
+        "//third_party/llvm/llvm:codegen",
         "@llvm//:core",
         "@llvm//:support",
         "@llvm//:target",
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index c00f1d5c1d..8fba823b97 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -26,14 +26,14 @@ limitations under the License.
 
 #include "tensorflow/core/platform/logging.h"
 // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
-- 
GitLab


From b57961353c5b9433923d4b8f0b1ec1731bd6b1a7 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Mon, 20 Nov 2017 04:42:04 -0800
Subject: [PATCH 0105/1225] Unbreak the build

PiperOrigin-RevId: 176341793
---
 tensorflow/contrib/lite/toco/import_tensorflow.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 0135c3e2f9..b00365d5de 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -1263,7 +1263,7 @@ void StripCaretFromArrayNames(Model* model) {
 
 void StripZeroOutputIndexFromInputs(NodeDef* node) {
   for (auto& input : *node->mutable_input()) {
-    input = StripSuffixString(input, ":0");
+    input = string(absl::StripSuffix(input, ":0"));
   }
 }
 
-- 
GitLab


From 6040ed631ba8e95b97c0e3edb1dd31e04569b521 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 07:25:17 -0800
Subject: [PATCH 0106/1225] Input types flags refactoring. 1.  --input_type[s]
 is deprecated. Its semantics were not clearly defined,     and included both
 ModelFlags-like semantics (describing a property     of the input file) and
 TocoFlags-like semantics (describing a requested     property of the output
 file). 2.  New ModelFlags: --input_data_type[s], represented as a new 'type' 
    field on each input array proto. This is unambiguously describing a    
 property of the input file, similar to the existing input_array[s],    
 input_shape[s] etc. 3.  New TocoFlag: --inference_input_type.  This is
 essentially the new     name of --input_type, except that it's purely a
 transformation flag,     only describing a property of the requested output
 file, not anymore     mixed with ModelFlags-like semantics (now taken care of
 by 2.).     The name --inference_input_type makes it clear that it's a    
 companion of --inference_type. Also, --inference_input_type is now    
 optional, defaulting to using the same value as --inference_type.     This
 reflects the fact that most users want to do either float     inference on a
 float input, or quantized inference on a quantized     input. The only case
 at the moment where --inference_input_type     is needed, is for doing float
 inference on a quantized input,     which is typically done in some vision
 applications where the     input is a bitmap image with integer-quantized
 channels.

PiperOrigin-RevId: 176356352
---
 tensorflow/contrib/lite/python/lite.py        |  2 +-
 tensorflow/contrib/lite/toco/args.h           | 11 ++-
 .../contrib/lite/toco/model_cmdline_flags.cc  | 46 +++++++++--
 .../contrib/lite/toco/model_flags.proto       | 27 ++++++-
 .../lite/toco/python/toco_from_protos_test.py |  2 +-
 .../contrib/lite/toco/toco_cmdline_flags.cc   | 81 ++++++++-----------
 tensorflow/contrib/lite/toco/toco_flags.proto | 54 +++++++++----
 tensorflow/contrib/lite/toco/toco_tooling.cc  | 70 +++++++---------
 tensorflow/contrib/lite/toco/tooling_util.cc  | 34 ++++++++
 tensorflow/contrib/lite/toco/tooling_util.h   |  2 +
 10 files changed, 214 insertions(+), 115 deletions(-)

diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py
index 759677121f..3cfee443e5 100644
--- a/tensorflow/contrib/lite/python/lite.py
+++ b/tensorflow/contrib/lite/python/lite.py
@@ -188,7 +188,7 @@ def toco_convert(input_data,
 
     input_array.name = _tensor_name(input_tensor)
     input_array.shape.extend(map(int, input_tensor.get_shape()))
-    toco.input_types.append(tflite_input_type)
+    toco.inference_input_type = tflite_input_type
 
   for output_tensor in output_tensors:
     model.output_arrays.append(_tensor_name(output_tensor))
diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h
index 28661d4ff0..88e0a29350 100644
--- a/tensorflow/contrib/lite/toco/args.h
+++ b/tensorflow/contrib/lite/toco/args.h
@@ -191,12 +191,15 @@ struct ParsedModelFlags {
   Arg<string> mean_values;
   Arg<float> std_value = Arg<float>(1.f);
   Arg<string> std_values;
+  Arg<string> input_data_type;
+  Arg<string> input_data_types;
   Arg<bool> variable_batch = Arg<bool>(false);
   Arg<bool> drop_control_dependency = Arg<bool>(false);
   Arg<toco::IntList> input_shape;
   Arg<toco::StringMapList> rnn_states;
   Arg<toco::StringMapList> model_checks;
-  // Debugging output options
+  // Debugging output options.
+  // TODO(benoitjacob): these shouldn't be ModelFlags.
   Arg<string> graphviz_first_array;
   Arg<string> graphviz_last_array;
   Arg<string> dump_graphviz;
@@ -213,12 +216,14 @@ struct ParsedTocoFlags {
   // TODO(aselle): command_line_flags  doesn't support doubles
   Arg<float> default_ranges_min = Arg<float>(0.);
   Arg<float> default_ranges_max = Arg<float>(0.);
-  Arg<string> input_type;
-  Arg<string> input_types;
   Arg<string> inference_type;
+  Arg<string> inference_input_type;
   Arg<bool> drop_fake_quant = Arg<bool>(false);
   Arg<bool> reorder_across_fake_quant = Arg<bool>(false);
   Arg<bool> allow_custom_ops = Arg<bool>(false);
+  // Deprecated flags
+  Arg<string> input_type;
+  Arg<string> input_types;
 };
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
index 699c95753f..287a5d563d 100644
--- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
@@ -43,7 +43,8 @@ bool ParseModelFlagsFromCommandLineFlags(
   std::vector<tensorflow::Flag> flags = {
       Flag("input_array", parsed_flags.input_array.bind(),
            parsed_flags.input_array.default_value(),
-           "Name of the input array. If not specified, will try to read "
+           "Deprecated: use --input_arrays instead. Name of the input array. "
+           "If not specified, will try to read "
            "that information from the input file."),
       Flag("input_arrays", parsed_flags.input_arrays.bind(),
            parsed_flags.input_arrays.default_value(),
@@ -51,7 +52,8 @@ bool ParseModelFlagsFromCommandLineFlags(
            "will try to read that information from the input file."),
       Flag("output_array", parsed_flags.output_array.bind(),
            parsed_flags.output_array.default_value(),
-           "Name of the output array, when specifying a unique output array. "
+           "Deprecated: use --output_arrays instead. Name of the output array, "
+           "when specifying a unique output array. "
            "If not specified, will try to read that information from the "
            "input file."),
       Flag("output_arrays", parsed_flags.output_arrays.bind(),
@@ -60,8 +62,9 @@ bool ParseModelFlagsFromCommandLineFlags(
            "If not specified, will try to read "
            "that information from the input file."),
       Flag("input_shape", parsed_flags.input_shape.bind(),
-           parsed_flags.output_arrays.default_value(),
-           "Input array shape. For many models the shape takes the form "
+           parsed_flags.input_shape.default_value(),
+           "Deprecated: use --input_shapes instead. Input array shape. For "
+           "many models the shape takes the form "
            "batch size, input array height, input array width, input array "
            "depth."),
       Flag("input_shapes", parsed_flags.input_shapes.bind(),
@@ -69,9 +72,22 @@ bool ParseModelFlagsFromCommandLineFlags(
            "Shapes corresponding to --input_arrays, colon-separated. For "
            "many models each shape takes the form batch size, input array "
            "height, input array width, input array depth."),
+      Flag("input_data_type", parsed_flags.input_data_type.bind(),
+           parsed_flags.input_data_type.default_value(),
+           "Deprecated: use --input_data_types instead. Input array type, if "
+           "not already provided in the graph. "
+           "Typically needs to be specified when passing arbitrary arrays "
+           "to --input_array."),
+      Flag("input_data_types", parsed_flags.input_data_types.bind(),
+           parsed_flags.input_data_types.default_value(),
+           "Input arrays types, comma-separated, if not already provided in "
+           "the graph. "
+           "Typically needs to be specified when passing arbitrary arrays "
+           "to --input_arrays."),
       Flag("mean_value", parsed_flags.mean_value.bind(),
            parsed_flags.mean_value.default_value(),
-           "mean_value parameter for image models, used to compute input "
+           "Deprecated: use --mean_values instead. mean_value parameter for "
+           "image models, used to compute input "
            "activations from input pixel data."),
       Flag("mean_values", parsed_flags.mean_values.bind(),
            parsed_flags.mean_values.default_value(),
@@ -81,7 +97,8 @@ bool ParseModelFlagsFromCommandLineFlags(
            "--input_arrays."),
       Flag("std_value", parsed_flags.std_value.bind(),
            parsed_flags.std_value.default_value(),
-           "std_value parameter for image models, used to compute input "
+           "Deprecated: use --std_values instead. std_value parameter for "
+           "image models, used to compute input "
            "activations from input pixel data."),
       Flag("std_values", parsed_flags.std_values.bind(),
            parsed_flags.std_values.default_value(),
@@ -232,6 +249,23 @@ void ReadModelFlagsFromCommandLineFlags(
       CHECK(last != std_values[i].data());
     }
   }
+  if (parsed_model_flags.input_data_type.specified()) {
+    QCHECK(uses_single_input_flags);
+    IODataType type;
+    QCHECK(IODataType_Parse(parsed_model_flags.input_data_type.value(), &type));
+    model_flags->mutable_input_arrays(0)->set_data_type(type);
+  }
+  if (parsed_model_flags.input_data_types.specified()) {
+    QCHECK(uses_multi_input_flags);
+    std::vector<string> input_data_types =
+        absl::StrSplit(parsed_model_flags.input_data_types.value(), ',');
+    QCHECK(input_data_types.size() == model_flags->input_arrays_size());
+    for (int i = 0; i < input_data_types.size(); ++i) {
+      IODataType type;
+      QCHECK(IODataType_Parse(input_data_types[i], &type));
+      model_flags->mutable_input_arrays(i)->set_data_type(type);
+    }
+  }
   if (parsed_model_flags.input_shape.specified()) {
     QCHECK(uses_single_input_flags);
     if (model_flags->input_arrays().empty()) {
diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto
index b016f34621..bd6e374e8c 100644
--- a/tensorflow/contrib/lite/toco/model_flags.proto
+++ b/tensorflow/contrib/lite/toco/model_flags.proto
@@ -16,7 +16,7 @@ import "tensorflow/contrib/lite/toco/types.proto";
 
 package toco;
 
-// Next ID to USE: 5.
+// Next ID to USE: 6.
 message InputArray {
   // Name of the input arrays, i.e. the arrays from which input activations
   // will be read.
@@ -46,6 +46,31 @@ message InputArray {
   // (TensorFlow via LegacyFedInput).
   optional float mean_value = 3;
   optional float std_value = 4 [default = 1.];
+
+  // Data type of the input.
+  //
+  // In many graphs, the input arrays already have defined data types,
+  // e.g. Placeholder nodes in a TensorFlow GraphDef have a dtype attribute.
+  // In those cases, it is not needed to specify this data_type flag.
+  // The purpose of this flag is only to define the data type of input
+  // arrays whose type isn't defined in the input graph file. For example,
+  // when specifying an arbitrary (not Placeholder) --input_array into
+  // a TensorFlow GraphDef.
+  //
+  // When this data_type is quantized (e.g. QUANTIZED_UINT8), the
+  // corresponding quantization parameters are the mean_value, std_value
+  // fields.
+  //
+  // It is also important to understand the nuance between this data_type
+  // flag and the inference_input_type in TocoFlags. The basic difference
+  // is that this data_type (like all ModelFlags) describes a property
+  // of the input graph, while inference_input_type (like all TocoFlags)
+  // describes an aspect of the toco transformation process and thus of
+  // the output file. The types of input arrays may be different between
+  // the input and output files if quantization or dequantization occurred.
+  // Such differences can only occur for real-number data i.e. only
+  // between FLOAT and quantized types (e.g. QUANTIZED_UINT8).
+  optional IODataType data_type = 5;
 }
 
 // ModelFlags encodes properties of a model that, depending on the file
diff --git a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py
index ce19b7efbe..28d52067a9 100644
--- a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py
+++ b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py
@@ -48,7 +48,7 @@ class TocoFromProtosTest(googletest.TestCase):
     toco_flags = toco_flags_pb2.TocoFlags()
     toco_flags.input_format = toco_flags_pb2.TENSORFLOW_GRAPHDEF
     toco_flags.output_format = toco_flags_pb2.TFLITE
-    toco_flags.input_types.append(types_pb2.FLOAT)
+    toco_flags.inference_input_type = types_pb2.FLOAT
     toco_flags.inference_type = types_pb2.FLOAT
     model_flags = model_flags_pb2.ModelFlags()
     input_array = model_flags.input_arrays.add()
diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
index d43c3b4a8e..e97f59eb3f 100644
--- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
@@ -55,25 +55,23 @@ bool ParseTocoFlagsFromCommandLineFlags(
            parsed_flags.default_ranges_max.default_value(),
            "If defined, will be used as the default value for the max bound "
            "of min/max ranges used for quantization."),
+      Flag("inference_type", parsed_flags.inference_type.bind(),
+           parsed_flags.inference_type.default_value(),
+           "Target data type of arrays in the output file (for input_arrays, "
+           "this may be overridden by inference_input_type)."),
+      Flag("inference_input_type", parsed_flags.inference_input_type.bind(),
+           parsed_flags.inference_input_type.default_value(),
+           "Target data type of input arrays. If not specified, inference_type "
+           "is used."),
       Flag("input_type", parsed_flags.input_type.bind(),
            parsed_flags.input_type.default_value(),
-           "Data type of the input array in the "
-           "output file. "),
+           "Deprecated old name of inference_input_type."),
       Flag("input_types", parsed_flags.input_types.bind(),
            parsed_flags.input_types.default_value(),
-           "Data types of the input arrays in the "
-           "output file. "
-           "Comma-separated list matching the enumeration order of "
-           "input_arrays."),
-      Flag("inference_type", parsed_flags.inference_type.bind(),
-           parsed_flags.inference_type.default_value(),
-           "Data type, in the output file, of internal and output arrays "
-           "that are FLOAT in the input file. Thus, the value FLOAT means "
-           "keep doing floating-point inference, while the value "
-           "QUANTIZED_UINT8 means replace all internal floating-point "
-           "arithmetic by integer arithmetic producing 8-bit integer "
-           "activations instead of float activations --- which we call "
-           "\'quantized inference\'."),
+           "Deprecated old name of inference_input_type. Was meant to be a "
+           "comma-separated list, but this was deprecated before "
+           "multiple-input-types was ever properly supported."),
+
       Flag("drop_fake_quant", parsed_flags.drop_fake_quant.bind(),
            parsed_flags.drop_fake_quant.default_value(),
            "Ignore and discard FakeQuant nodes. For instance, that can be used "
@@ -158,49 +156,36 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags,
 
   PARSE_TOCO_FLAG(FileFormat, input_format, FlagRequirement::kMustBeSpecified);
   PARSE_TOCO_FLAG(FileFormat, output_format, FlagRequirement::kMustBeSpecified);
-  FlagRequirement tflite_flags_requirement =
-      toco_flags->output_format() == TFLITE
-          ? FlagRequirement::kMustBeSpecified
-          : FlagRequirement::kMustNotBeSpecified;
-  PARSE_TOCO_FLAG(IODataType, inference_type, tflite_flags_requirement);
+  PARSE_TOCO_FLAG(IODataType, inference_type, FlagRequirement::kNone);
+  PARSE_TOCO_FLAG(IODataType, inference_input_type, FlagRequirement::kNone);
   READ_TOCO_FLAG(default_ranges_min, FlagRequirement::kNone);
   READ_TOCO_FLAG(default_ranges_max, FlagRequirement::kNone);
   READ_TOCO_FLAG(drop_fake_quant, FlagRequirement::kNone);
   READ_TOCO_FLAG(reorder_across_fake_quant, FlagRequirement::kNone);
   READ_TOCO_FLAG(allow_custom_ops, FlagRequirement::kNone);
 
-#undef READ_TOCO_FLAG
-#undef PARSE_TOCO_FLAG
-
-  const bool input_type_specified = parsed_toco_flags.input_type.specified();
-  const bool input_types_specified = parsed_toco_flags.input_types.specified();
-  if (toco_flags->output_format() == TFLITE) {
-    QCHECK(input_type_specified || input_types_specified)
-        << "When output_format=TFLITE, either input_type or input_types needs "
-           "to be specified.";
-  } else {
-    QCHECK(!input_type_specified && !input_types_specified)
-        << "With this output_format, neither input_type nor input_types must "
-           "be specified.";
-  }
-  QCHECK(!(input_type_specified && input_types_specified))
-      << "input_type and input_types are mutually exclusive";
-  if (input_type_specified) {
-    IODataType type;
-    QCHECK(IODataType_Parse(parsed_toco_flags.input_type.value(), &type))
-        << "Unrecognized input_type: " << parsed_toco_flags.input_type.value();
-    toco_flags->add_input_types(type);
+  // Deprecated flag handling.
+  if (parsed_toco_flags.input_type.specified()) {
+    LOG(WARNING) << "--input_type is deprecated. Use --inference_input_type.";
+    toco::IODataType input_type;
+    QCHECK(toco::IODataType_Parse(parsed_toco_flags.input_type.value(),
+                                  &input_type));
+    toco_flags->set_inference_input_type(input_type);
   }
-  if (input_types_specified) {
+  if (parsed_toco_flags.input_types.specified()) {
+    LOG(WARNING) << "--input_types is deprecated. Use --inference_input_type.";
     std::vector<string> input_types =
         absl::StrSplit(parsed_toco_flags.input_types.value(), ',');
-    for (const string& t : input_types) {
-      IODataType type;
-      QCHECK(IODataType_Parse(t, &type))
-          << "Unrecognized input_types value " << t
-          << " in input_types=" << parsed_toco_flags.input_types.value();
-      toco_flags->add_input_types(type);
+    QCHECK(!input_types.empty());
+    for (int i = 1; i < input_types.size(); i++) {
+      QCHECK_EQ(input_types[i], input_types[0]);
     }
+    toco::IODataType input_type;
+    QCHECK(toco::IODataType_Parse(input_types[0], &input_type));
+    toco_flags->set_inference_input_type(input_type);
   }
+
+#undef READ_TOCO_FLAG
+#undef PARSE_TOCO_FLAG
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto
index e900e1a25a..7bddce5b03 100644
--- a/tensorflow/contrib/lite/toco/toco_flags.proto
+++ b/tensorflow/contrib/lite/toco/toco_flags.proto
@@ -36,7 +36,7 @@ enum FileFormat {
 // are not normally encoded in model files and in general may not be thought
 // of as properties of models, instead describing how models are to be
 // processed in the context of the present tooling job.
-// Next Id: 11
+// Next Id: 12
 message TocoFlags {
   // Input file format
   optional FileFormat input_format = 1;
@@ -44,23 +44,47 @@ message TocoFlags {
   // Output file format
   optional FileFormat output_format = 2;
 
-  // Numeric data types of the input arrays in the output format.
-  // This controls what input types the output file will be expecting.
-  // This is not a description of the input types of the input file.
-  // For example, the input file may have a float input placeholder,
-  // but we may want to generate a quantized TFLite file from it,
-  // or a float TFLite file taking a quantized input.
+  // Similar to inference_type, but allows to control specifically the
+  // quantization of input arrays, separately from other arrays.
   //
-  // The length of this list should match the length of the input_arrays
-  // list in ModelFlags.
-  repeated IODataType input_types = 9;
+  // If not set, then the value of inference_type is implicitly used, i.e.
+  // by default input arrays are quantized like other arrays.
+  //
+  // Like inference_type, this only affects real-number arrays. By "real-number"
+  // we mean float arrays, and quantized arrays. This excludes plain
+  // integer arrays, strings arrays, and every other data type.
+  //
+  // The typical use for this flag is for vision models taking a bitmap
+  // as input, typically with uint8 channels, yet still requiring floating-point
+  // inference. For such image models, the uint8 input is quantized, i.e.
+  // the uint8 values are interpreted as real numbers, and the quantization
+  // parameters used for such input arrays are their mean_value, std_value
+  // parameters.
+  optional IODataType inference_input_type = 11;
 
-  // Numeric data type of the internal activations array and output array.
+  // Sets the type of real-number arrays in the output file, that is, controls
+  // the representation (quantization) of real numbers in the output file,
+  // except for input arrays, which are controlled by inference_input_type.
+  //
+  // NOTE: this flag only impacts real-number arrays. By "real-number"
+  // we mean float arrays, and quantized arrays. This excludes plain
+  // integer arrays, strings arrays, and every other data type.
+  //
+  // For real-number arrays, the impact of this flag is to allow the output
+  // file to choose a different real-numbers representation (quantization)
+  // from what the input file used. For any other types of arrays, changing
+  // the data type would not make sense.
+  //
+  // Specifically:
+  //    - If FLOAT, then real-numbers arrays will be of type float in
+  //      the output file. If they were quantized in the input file, then
+  //      they get dequantized.
+  //    - If QUANTIZED_UINT8, then real-numbers arrays will be quantized
+  //      as uint8 in the output file. If they were float in the input file,
+  //      then they get quantized.
+  //    - If not set, then all real-numbers arrays retain the same type in the
+  //      output file as they have in the input file.
   //
-  // As a matter of implementation detail, most model
-  // parameter arrays (weights, etc) will tend to also use this data type.
-  // Not all will, though: for instance, bias vectors will typically
-  // get quantized as int32 when weights and activations get quantized as uint8.
   optional IODataType inference_type = 4;
 
   // default_ranges_min and default_ranges_max are helpers to experiment
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index 232538a841..1408f7cd7b 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -86,38 +86,36 @@ void MakeGeneralGraphTransformationsSet(
 }
 
 void SetArrayFinalDataTypes(const TocoFlags& toco_flags, Model* model) {
-  const bool output_is_tflite = toco_flags.output_format() == TFLITE;
+  const bool output_supports_only_float =
+      toco_flags.output_format() == TENSORFLOW_GRAPHDEF;
 
-  if (output_is_tflite) {
-    if (!toco_flags.input_types().empty()) {
-      for (int i = 0; i < model->flags.input_arrays_size(); i++) {
-        int input_types_index = toco_flags.input_types_size() == 1 ? 0 : i;
-        const auto input_type = toco_flags.input_types(input_types_index);
-        ArrayDataType final_data_type = ArrayDataType::kNone;
-        switch (input_type) {
-          case FLOAT:
-            final_data_type = ArrayDataType::kFloat;
-            break;
-          case QUANTIZED_UINT8:
-            final_data_type = ArrayDataType::kUint8;
-            break;
-          case INT32:
-            final_data_type = ArrayDataType::kInt32;
-            break;
-          case INT64:
-            final_data_type = ArrayDataType::kInt64;
-            break;
-          default:
-            LOG(FATAL) << "Unknown data type";
-        }
-        model->arrays[model->flags.input_arrays(i).name()]->final_data_type =
-            final_data_type;
-      }
-    }
+  ArrayDataType specified_final_data_type = ArrayDataType::kNone;
+  if (toco_flags.has_inference_input_type()) {
+    specified_final_data_type =
+        ConvertIODataTypeToArrayDataType(toco_flags.inference_input_type());
+  } else if (toco_flags.has_inference_type()) {
+    specified_final_data_type =
+        ConvertIODataTypeToArrayDataType(toco_flags.inference_type());
+  }
+  ArrayDataType final_data_type = ArrayDataType::kNone;
+  if (output_supports_only_float) {
+    QCHECK(specified_final_data_type == ArrayDataType::kNone ||
+           specified_final_data_type == ArrayDataType::kFloat);
+    final_data_type = ArrayDataType::kFloat;
   } else {
-    for (int i = 0; i < model->flags.input_arrays_size(); i++) {
-      model->arrays[model->flags.input_arrays(i).name()]->final_data_type =
-          ArrayDataType::kFloat;
+    final_data_type = specified_final_data_type;
+  }
+  for (int i = 0; i < model->flags.input_arrays_size(); i++) {
+    auto* array = model->arrays[model->flags.input_arrays(i).name()].get();
+    // Note that the notion of changing data types only applies to real-numbers
+    // arrays (see the documentation for inference_input_type).
+    // TODO(benoitjacob) this is assuming that uint8 arrays are quantized,
+    // i.e. represent real numbers by means of quantization parameters,
+    // and not plain integer uint8 input arrays.
+    const bool is_real_numbers = array->data_type == ArrayDataType::kFloat ||
+                                 array->data_type == ArrayDataType::kUint8;
+    if (is_real_numbers) {
+      array->final_data_type = final_data_type;
     }
   }
 }
@@ -155,17 +153,9 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
   const bool output_is_tflite_quantized =
       output_is_tflite && inference_type == QUANTIZED_UINT8;
 
-  if (output_is_tflite) {
-    QCHECK(toco_flags.input_types_size() == 1 ||
-           toco_flags.input_types_size() == model->flags.input_arrays_size())
-        << "Mismatched numbers of input_arrays and input_types";
-  }
-
   if (output_is_tflite_quantized) {
-    for (const auto& input_type : toco_flags.input_types()) {
-      QCHECK_NE(input_type, FLOAT)
-          << "Quantized inference is not allowed with float inputs.";
-    }
+    QCHECK_NE(toco_flags.inference_input_type(), FLOAT)
+        << "Quantized inference is not allowed with float inputs.";
   }
 
   SetArrayFinalDataTypes(toco_flags, model);
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index bcbfed62d3..ec24f76dc8 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -991,6 +991,11 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
             specified_input_array.shape());
       }
     }
+
+    if (specified_input_array.has_data_type()) {
+      QCHECK(!dst_input_array->has_data_type());
+      dst_input_array->set_data_type(specified_input_array.data_type());
+    }
   }
 
   if (model_flags.output_arrays_size() > 0) {
@@ -1046,6 +1051,20 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
            "command-line flag.";
 
     auto& input_array = model->GetOrCreateArray(input_array_proto.name());
+    if (input_array_proto.has_data_type()) {
+      const ArrayDataType specified_type =
+          ConvertIODataTypeToArrayDataType(input_array_proto.data_type());
+      QCHECK(specified_type != ArrayDataType::kNone);
+      if (input_array.data_type != ArrayDataType::kNone) {
+        QCHECK(specified_type == input_array.data_type)
+            << "For input array " << input_array_proto.name()
+            << " the specified input data type "
+            << IODataType_Name(input_array_proto.data_type())
+            << " conflicts with the existing type.";
+      }
+      input_array.data_type = specified_type;
+    }
+
     if (input_array.data_type == ArrayDataType::kNone) {
       // We start out with a float input array;
       // that may get replaced by a uint8 array later, by
@@ -1549,4 +1568,19 @@ void CheckFinalDataTypesSatisfied(const Model& model) {
   }
 }
 
+ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type) {
+  switch (type) {
+    case FLOAT:
+      return ArrayDataType::kFloat;
+    case QUANTIZED_UINT8:
+      return ArrayDataType::kUint8;
+    case INT32:
+      return ArrayDataType::kInt32;
+    case INT64:
+      return ArrayDataType::kInt64;
+    default:
+      return ArrayDataType::kNone;
+  }
+}
+
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h
index e863996d7b..d820d619d0 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.h
+++ b/tensorflow/contrib/lite/toco/tooling_util.h
@@ -288,6 +288,8 @@ bool IsDiscardableArray(const Model& model, const string& array_name);
 
 void CheckFinalDataTypesSatisfied(const Model& model);
 
+ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type);
+
 }  // namespace toco
 
 #endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_TOOLING_UTIL_H_
-- 
GitLab


From 27441400018afcac84c3f0fcc1f96dd9c065b1ab Mon Sep 17 00:00:00 2001
From: Alan Du <alanhdu@gmail.com>
Date: Mon, 20 Nov 2017 11:36:08 -0500
Subject: [PATCH 0107/1225] Only install enum34 on Python <3.4 versions

Python 3.6 sometimes has issues with enum34 because the standard library
relies on enum features not in enum34 (see
https://bitbucket.org/stoneleaf/enum34/issues/19/enum34-isnt-compatible-with-python-36
for more details).
---
 tensorflow/tools/pip_package/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index a493c6f2aa..726fc92bf6 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -33,7 +33,6 @@ _VERSION = '1.4.0'
 
 REQUIRED_PACKAGES = [
     'absl-py',
-    'enum34 >= 1.1.6',
     'numpy >= 1.12.1',
     'six >= 1.10.0',
     'protobuf >= 3.4.0',
@@ -62,9 +61,10 @@ if 'tf_nightly' in project_name:
       REQUIRED_PACKAGES.remove(package)
       break
 
-# weakref.finalize was introduced in Python 3.4
+# weakref.finalize and enum were introduced in Python 3.4
 if sys.version_info < (3, 4):
   REQUIRED_PACKAGES.append('backports.weakref >= 1.0rc1')
+  REQUIRED_PACKAGES.append('enum34 >= 1.1.6')
 
 # pylint: disable=line-too-long
 CONSOLE_SCRIPTS = [
-- 
GitLab


From 728d4b347fd928b1d2d8f13884924c2e7f3e37ad Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 09:28:11 -0800
Subject: [PATCH 0108/1225] Update documentation to the input_type changes.

PiperOrigin-RevId: 176371086
---
 .../lite/toco/g3doc/cmdline_examples.md       |  37 ++-----
 .../lite/toco/g3doc/cmdline_reference.md      | 103 ++++++++++--------
 2 files changed, 69 insertions(+), 71 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
index b9f8c8d152..7e152f5ba8 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
+++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
@@ -26,7 +26,6 @@ bazel run --config=opt \
   --output_file=/tmp/foo.lite \
   --input_format=TENSORFLOW_GRAPHDEF \
   --output_format=TFLITE \
-  --input_type=FLOAT \
   --inference_type=FLOAT \
   --input_shape=1,128,128,3 \
   --input_array=input \
@@ -58,19 +57,9 @@ To explain each of these flags:
     allowing to defer the specification of the input shape until runtime. The
     format of `input_shape` is always a comma-separated list of dimensions,
     always in TensorFlow convention.
-*   `--input_type` specifies what should be the type of the input arrays in the
-    **output** file. `--input_type` does not describe a property of the input
-    file: the type of input arrays is already encoded in the input graph.
-    Rather, `--input_type` is how you specify what should be the type of the
-    inputs to be provided to the output converted graph. This only affects
-    arrays of real numbers: this flag allows to quantized/dequantize
-    real-numbers inputs, switching between floating-point and quantized forms.
-    This flag has no incidence on all other types of input arrays, such as plain
-    integers or strings.
 *   `--inference_type` specifies what type of arithmetic the output file should
     be relying on. It implies in particular the choice of type of the output
-    arrays in the output file. Like `--input_type`, `--inference_type` does not
-    describe a property of the input file.
+    arrays in the output file.
 
 ## Just optimize a TensorFlow GraphDef
 
@@ -94,11 +83,11 @@ bazel run --config=opt \
   --output_array=MobilenetV1/Predictions/Reshape_1
 ```
 
-Here we did not pass `--input_type` and `--inference_type` because they are
-considered not applicable to the TensorFlow GraphDef format (as far as we are
-concerned, TensorFlow GraphDefs are technically always float, and the only
-flavor of "quantized" GraphDef that the converter deals with is "FakeQuantized"
-graphs that are still technically float graphs).
+Here we did not pass `--inference_type` because it is not considered applicable
+to the TensorFlow GraphDef format (as far as we are concerned, TensorFlow
+GraphDefs are technically always float, and the only flavor of "quantized"
+GraphDef that the converter deals with is "FakeQuantized" graphs that are still
+technically float graphs).
 
 Below in the section about passing arbitrary input/output arrays we give another
 example, using the converter to extract just a sub-graph from a TensorFlow
@@ -144,7 +133,6 @@ bazel run --config=opt \
   --output_file=/tmp/foo.lite \
   --input_format=TENSORFLOW_GRAPHDEF \
   --output_format=TFLITE \
-  --input_type=QUANTIZED_UINT8 \
   --inference_type=QUANTIZED_UINT8 \
   --input_shape=1,128,128,3 \
   --input_array=input \
@@ -156,11 +144,9 @@ bazel run --config=opt \
 Here, besides changing `--input_file` to point to a (fake-)quantized GraphDef,
 the only other changes are:
 
-*   To change `--input_type` and `--inference_type` to `QUANTIZED_UINT8`. This
-    effectively tells the converter to generate an output file that can take a
-    quantized uint8 array as input (`--input_type=QUANTIZED_UINT8`), and have
-    quantized uint8 internal and output arrays as well
-    (`--inference_type=QUANTIZED_UINT8`).
+*   To change `--inference_type` to `QUANTIZED_UINT8`. This effectively tells
+    the converter to generate an output file that performs quantized inference
+    on a quantized input.
 *   To pass `--mean_value` and `--std_value` flags to describe how the quantized
     uint8 input array values are to be interpreted as the mathematical real
     numbers that the graph is concerned with (keep in mind that even a
@@ -195,7 +181,6 @@ bazel run --config=opt \
   --output_file=/tmp/foo.cc \
   --input_format=TENSORFLOW_GRAPHDEF \
   --output_format=TFLITE \
-  --input_type=QUANTIZED_UINT8 \
   --inference_type=QUANTIZED_UINT8 \
   --input_shape=1,128,128,3 \
   --input_array=input \
@@ -225,7 +210,6 @@ bazel run --config=opt \
   --output_file=/tmp/foo.lite \
   --input_format=TENSORFLOW_GRAPHDEF \
   --output_format=TFLITE \
-  --input_type=FLOAT \
   --inference_type=FLOAT \
   --input_shape=1,224,224,3 \
   --input_array=input \
@@ -254,7 +238,6 @@ bazel run --config=opt \
   --output_file=/tmp/foo.lite \
   --input_format=TENSORFLOW_GRAPHDEF \
   --output_format=TFLITE \
-  --input_type=FLOAT \
   --inference_type=FLOAT \
   --input_shapes=1,28,28,96:1,28,28,16:1,28,28,192:1,28,28,64 \
   --input_arrays=InceptionV1/InceptionV1/Mixed_3b/Branch_1/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_2/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_3/MaxPool_0a_3x3/MaxPool,InceptionV1/InceptionV1/Mixed_3b/Branch_0/Conv2d_0a_1x1/Relu \
@@ -328,7 +311,6 @@ bazel run --config=opt \
   --output_file=/tmp/foo.lite \
   --input_format=TENSORFLOW_GRAPHDEF \
   --output_format=TFLITE \
-  --input_type=FLOAT \
   --inference_type=FLOAT \
   --input_shape=1,128,128,3 \
   --input_array=input \
@@ -436,7 +418,6 @@ bazel run --config=opt \
   --output_file=/tmp/foo.lite \
   --input_format=TENSORFLOW_GRAPHDEF \
   --output_format=TFLITE \
-  --input_type=FLOAT \
   --inference_type=FLOAT \
   --input_shape=1,128,128,3 \
   --input_array=input \
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
index cc6d416959..4776741ab9 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
+++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
@@ -38,23 +38,27 @@ on the input and output formats, additional flags may be allowed or mandatory:
         file are to be considered the input activations.
     *   `--input_shape` or `--input_shapes` specify the shapes of the input
         arrays.
+    *   `--input_data_type` or `--input_data_types` specify the data types of
+        input arrays, which can be used if the input file does not already
+        specify them.
     *   `--mean_value` or `--mean_values`, and `--std_value` or `--std_values`,
         give the dequantization parameters of the input arrays, for the case
         when the output file will accept quantized input arrays.
 *   *Transformation flags* specify options of the transformations to be applied
     to the graph, i.e. they specify requested properties that the output file
     should have.
-    *   `--input_type` specifies the type that the input arrays should have
-        after transformations, in the output file. This is where you choose
-        whether you want runtime inference code to accept float or quantized
-        inputs. This flag only applies to float or quantized inputs, and allows
-        to convert between the two. This flag has no effect on all other types
-        of inputs, such as ordinary integer arrays.
-    *   `--inference_type` or `--inference_types` specify the type that generic
-        intermediate and output activation arrays should have after
-        transformations, in the output file. This is where you choose whether
-        you want runtime inference code to perform float or quantized inference
-        arithmetic.
+    *   `--inference_type` specifies the type of real-numbers arrays in the
+        output file. This only affects arrays of real numbers and allows to
+        control their quantization or dequantization, effectively switching
+        between floating-point and quantized arithmetic for the inference
+        workload, as far as real numbers are concerned. Other data types are
+        unaffected (e.g. plain integers, and strings).
+    *   `--inference_input_type` is like `--inference_type` but specifically
+        controlling input arrays, separately from other arrays. If not
+        specified, then `--inference_type` is used. The use case for specifying
+        `--inference_input_type` is when one wants to perform floating-point
+        inference on a quantized input, as is common in image models operating
+        on bitmap image inputs.
     *   Some transformation flags allow to carry on with quantization when the
         input graph is not properly quantized: `--default_ranges_min`,
         `--default_ranges_max`, `--drop_fake_quant`,
@@ -77,8 +81,6 @@ on the input and output formats, additional flags may be allowed or mandatory:
     *   `TFLITE` &mdash; The TensorFlow Lite flatbuffers format.
         *   Whether a float or quantized TensorFlow Lite file will be produced
             depends on the `--inference_type` flag.
-        *   Whether the produced TensorFlow Lite file will accept a float or
-            quantized input depends on the `--input_type` flag.
     *   `GRAPHVIZ_DOT` &mdash; The GraphViz `.dot` format. This asks the
         converter to generate a reasonable graphical representation of the graph
         after simplification by a generic set of transformation.
@@ -126,9 +128,7 @@ additional information about the single input array:
         next innermost dimension after 'depth').
 *   `--mean_value` and `--std_value`. Type: floating-point. The decimal point
     character is always the dot (`.`) regardless of the locale. These specify
-    the (de-)quantization parameters of the input array, to use when the output
-    file will take a quantized input array (that is, when passing
-    `--input_type=QUANTIZED_UINT8`).
+    the (de-)quantization parameters of the input array, when it is quantized.
     *   The meaning of mean_value and std_value is as follows: each quantized
         value in the quantized input array will be interpreted as a mathematical
         real number (i.e. as an input activation value) according to the
@@ -162,33 +162,48 @@ additional information about the multiple input arrays:
 
 ### Transformation flags
 
-*   `--input_type`. Type: string. Specifies what should be the type of the
-    entries in the input array(s) in the output file, after transformations, for
-    those input arrays that are originally either floating-point or quantized
-    real numbers in the input file. If there are multiple such input arrays,
-    then they all use this type. Input arrays of other types, such as arrays of
-    plain integers or strings, are not concerned with this flag. Allowed values:
-    *   `FLOAT` &mdash; Keep floating-point input arrays as such. Dequantize any
-        quantized input array. entries ("float32").
-    *   `QUANTIZED_UINT8` &mdash; Quantize floating-point input arrays, to have
-        8-bit unsigned integer entries. The quantization params are specified by
-        `--mean_value`, `--std_value` flags as explained in the documentation of
-        these flags.
-*   `--inference_type`. Type: string. Specifies what to do with floating-point
-    arrays found in the input file, besides input arrays. In other words, this
-    controls the possible quantization of floating-point weights, intermediate
-    activations, and output activations. Has no effect on arrays that aren't
-    floating-point in the input file. Allowed values:
-    *   `FLOAT` &mdash; Keep floating-point arrays as floating-point in the
-        output file. This corresponds to what is commonly called "floating-point
-        inference".
-    *   `QUANTIZED_UINT8` &mdash; Quantize floating-point arrays, changing their
-        storage data type from float to some integer type:
-        *   All float activations are quantized as `uint8`.
-        *   Almost all float weights are quantized as `uint8`.
-            *   A few exceptions exist. In particular, the bias-vectors in
-                "Conv" and "FullyConnected" layers are quantized as `int32`
-                instead for technical reasons.
+*   `--inference_type`. Type: string. Sets the type of real-number arrays in the
+    output file, that is, controls the representation (quantization) of real
+    numbers in the output file, except for input arrays, which are controlled by
+    `--inference_input_type`.
+
+    This flag only impacts real-number arrays. By "real-number" we mean float
+    arrays, and quantized arrays. This excludes plain integer arrays, strings
+    arrays, and every other data type.
+
+    For real-number arrays, the impact of this flag is to allow the output file
+    to choose a different real-numbers representation (quantization) from what
+    the input file used. For any other types of arrays, changing the data type
+    would not make sense.
+
+    Specifically:
+
+    *   If `FLOAT`, then real-numbers arrays will be of type float in the output
+        file. If they were quantized in the input file, then they get
+        dequantized.
+    *   If `QUANTIZED_UINT8`, then real-numbers arrays will be quantized as
+        uint8 in the output file. If they were float in the input file, then
+        they get quantized.
+    *   If not set, then all real-numbers arrays retain the same type in the
+        output file as they have in the input file.
+
+*   `--inference_input_type`. Type: string. Similar to inference_type, but
+    allows to control specifically the quantization of input arrays, separately
+    from other arrays.
+
+    If not set, then the value of `--inference_type` is implicitly used, i.e. by
+    default input arrays are quantized like other arrays.
+
+    Like `--inference_type`, this only affects real-number arrays. By
+    "real-number" we mean float arrays, and quantized arrays. This excludes
+    plain integer arrays, strings arrays, and every other data type.
+
+    The typical use for this flag is for vision models taking a bitmap as input,
+    typically with uint8 channels, yet still requiring floating-point inference.
+    For such image models, the uint8 input is quantized, i.e. the uint8 values
+    are interpreted as real numbers, and the quantization parameters used for
+    such input arrays are their `mean_value`, `std_value` parameters.
+
 *   `--default_ranges_min`, `--default_ranges_max`. Type: floating-point. The
     decimal point character is always the dot (`.`) regardless of the locale.
     These flags enable what is called "dummy quantization". If defined, their
@@ -198,9 +213,11 @@ additional information about the multiple input arrays:
     incorrectly-quantized input files. This enables easy performance prototyping
     ("how fast would my model run if I quantized it?") but should never be used
     in production as the resulting quantized arithmetic is inaccurate.
+
 *   `--drop_fake_quant`. Type: boolean. Default: false. Causes fake-quantization
     nodes to be dropped from the graph. This may be used to recover a plain
     float graph from a fake-quantized graph.
+
 *   `--reorder_across_fake_quant`. Type: boolean. Default: false. Normally,
     fake-quantization nodes must be strict boundaries for graph transformations,
     in order to ensure that quantized inference has the exact same arithmetic
-- 
GitLab


From 68ffc85450d328cf9e1323dd0021c6671110c5fb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 10:31:08 -0800
Subject: [PATCH 0109/1225] Adds eager compatability message for
 embedding_column.

PiperOrigin-RevId: 176380794
---
 tensorflow/python/feature_column/feature_column.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index a19636474b..55969c4b75 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -134,6 +134,7 @@ import math
 import numpy as np
 import six
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
@@ -554,6 +555,11 @@ def embedding_column(
     ValueError: if exactly one of `ckpt_to_load_from` and `tensor_name_in_ckpt`
       is specified.
     ValueError: if `initializer` is specified and is not callable.
+    RuntimeError: If eager execution is enabled.
+
+  @compatibility(eager)
+  Not compatible with eager execution.
+  @end_compatibility
   """
   if (dimension is None) or (dimension < 1):
     raise ValueError('Invalid dimension {}.'.format(dimension))
@@ -565,6 +571,8 @@ def embedding_column(
     raise ValueError('initializer must be callable if specified. '
                      'Embedding of column_name: {}'.format(
                          categorical_column.name))
+  if not context.in_graph_mode():
+    raise RuntimeError('Embedding_column not supported in eager mode.')
   if initializer is None:
     initializer = init_ops.truncated_normal_initializer(
         mean=0.0, stddev=1 / math.sqrt(dimension))
-- 
GitLab


From e1d6b9cce89eba0831b4e594099f393a13c6e5a8 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Mon, 20 Nov 2017 11:20:20 -0800
Subject: [PATCH 0110/1225] Bump LLVM snapshot to r318630.

PiperOrigin-RevId: 176388678
---
 tensorflow/compiler/xla/service/cpu/BUILD | 2 +-
 tensorflow/workspace.bzl                  | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 6b62ee5ee7..8005cfac8c 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -273,7 +273,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:ops",
         "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
         "//tensorflow/core:lib",
-        "//third_party/llvm/llvm:codegen",
+        "@llvm//:code_gen",
         "@llvm//:core",
         "@llvm//:support",
         "@llvm//:target",
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 8e62228c1b..9bbc0cb1c4 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -578,11 +578,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   temp_workaround_http_archive(
       name = "llvm",
       urls = [
-          "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/823bedeb8e23a095173389fa05680597eba3f569.tar.gz",
-          "https://github.com/llvm-mirror/llvm/archive/823bedeb8e23a095173389fa05680597eba3f569.tar.gz",
+          "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8d26b8bee4d8e7230870a600bc968c7ee8cf6f67.tar.gz",
+          "https://github.com/llvm-mirror/llvm/archive/8d26b8bee4d8e7230870a600bc968c7ee8cf6f67.tar.gz",
       ],
-      sha256 = "93464bc760fd0319ebd0a5831fe477fdc4954f3612a29cc64d7405eaee8e00b2",
-      strip_prefix = "llvm-823bedeb8e23a095173389fa05680597eba3f569",
+      sha256 = "ff5ddbe5af5e264426c8d489e7fddfc5ad7e0975f19cefe9db8c0a5d0faeb23e",
+      strip_prefix = "llvm-8d26b8bee4d8e7230870a600bc968c7ee8cf6f67",
       build_file = str(Label("//third_party/llvm:llvm.BUILD")),
       repository = tf_repo_name,
   )
-- 
GitLab


From 70ba44b46bb9e5f5e55b2357676ffa7196b9bda7 Mon Sep 17 00:00:00 2001
From: Alan Du <alanhdu@gmail.com>
Date: Mon, 20 Nov 2017 14:29:17 -0500
Subject: [PATCH 0111/1225] Use new platform-specific dependencies

---
 tensorflow/tools/pip_package/setup.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 726fc92bf6..2df568a811 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -33,6 +33,9 @@ _VERSION = '1.4.0'
 
 REQUIRED_PACKAGES = [
     'absl-py',
+    'backports.weakref >= 1.0rc1; python_version < "3.4"',
+    'enum34 >= 1.1.6; python_version < "3.4"',
+    'mock >= 2.0.0; python_version < "3.0"',
     'numpy >= 1.12.1',
     'six >= 1.10.0',
     'protobuf >= 3.4.0',
@@ -51,8 +54,6 @@ if sys.version_info.major == 3:
   REQUIRED_PACKAGES.append('wheel >= 0.26')
 else:
   REQUIRED_PACKAGES.append('wheel')
-  # mock comes with unittest.mock for python3, need to install for python2
-  REQUIRED_PACKAGES.append('mock >= 2.0.0')
 
 # remove tensorboard from tf-nightly packages
 if 'tf_nightly' in project_name:
@@ -61,11 +62,6 @@ if 'tf_nightly' in project_name:
       REQUIRED_PACKAGES.remove(package)
       break
 
-# weakref.finalize and enum were introduced in Python 3.4
-if sys.version_info < (3, 4):
-  REQUIRED_PACKAGES.append('backports.weakref >= 1.0rc1')
-  REQUIRED_PACKAGES.append('enum34 >= 1.1.6')
-
 # pylint: disable=line-too-long
 CONSOLE_SCRIPTS = [
     'freeze_graph = tensorflow.python.tools.freeze_graph:main',
-- 
GitLab


From e5126dbcfc9015649e110e368e1ec3b359833f78 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 20 Nov 2017 11:33:07 -0800
Subject: [PATCH 0112/1225] Avoid using illegal characters in checkpoint file
 names in normalization_test. (#14677)

---
 tensorflow/python/layers/normalization_test.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index b2876c58c2..6f6d49fe91 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -108,8 +108,15 @@ class BNTest(test.TestCase):
                          infer_use_gpu):
     batch, height, width, input_channels = 2, 4, 5, 3
     shape = [batch, height, width, input_channels]
-    checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' %
-        (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu))
+
+    # To use in paths, "sanitize" the dtype string representation.
+    dtype_to_str = {
+        dtypes.float16: 'float16',
+        dtypes.float32: 'float32',
+    }
+    checkpoint = os.path.join(
+        self.get_temp_dir(), 'cp_%s_%s_%s_%s' % (
+            dtype_to_str[dtype], train1_use_gpu, train2_use_gpu, infer_use_gpu))
 
     self._train(
         checkpoint,
@@ -144,7 +151,7 @@ class BNTest(test.TestCase):
                                                  train1_use_gpu=True,
                                                  train2_use_gpu=True,
                                                  infer_use_gpu=True)
- 
+
     self.assertEqual(len(ref_vars), 5)
 
     for train1_use_gpu in [True, False]:
-- 
GitLab


From e832c2eeec7597287a883a51b6c54a6511cdedc4 Mon Sep 17 00:00:00 2001
From: Alan Du <alanhdu@gmail.com>
Date: Mon, 20 Nov 2017 14:34:44 -0500
Subject: [PATCH 0113/1225] Add comments back in

---
 tensorflow/tools/pip_package/setup.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 2df568a811..5b4c091d1e 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -33,8 +33,11 @@ _VERSION = '1.4.0'
 
 REQUIRED_PACKAGES = [
     'absl-py',
+    # weakref.finalize introduced in Python 3.4
     'backports.weakref >= 1.0rc1; python_version < "3.4"',
+    # enum module introduced in Python 3.4
     'enum34 >= 1.1.6; python_version < "3.4"',
+    # Needed for unittest.mock in Python 2
     'mock >= 2.0.0; python_version < "3.0"',
     'numpy >= 1.12.1',
     'six >= 1.10.0',
-- 
GitLab


From 6be3d1c95c2821276392469150724970f6b1108e Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Mon, 20 Nov 2017 11:30:36 -0800
Subject: [PATCH 0114/1225] Update the documentation for GraphNetwork to refer
 to tf.keras.models rather than "model".

PiperOrigin-RevId: 176390315
---
 tensorflow/python/layers/network.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/layers/network.py b/tensorflow/python/layers/network.py
index 9a33a5c726..edc52545f9 100644
--- a/tensorflow/python/layers/network.py
+++ b/tensorflow/python/layers/network.py
@@ -181,11 +181,11 @@ def Input(  # pylint: disable=invalid-name
 class GraphNetwork(base.Layer):
   """A GraphNetwork is a directed acyclic graph of layers.
 
-  It is the topological form of a "model".
-  A Model is simply a GraphNetwork with added training/evaluation routines.
+  It is the topological form of a `tf.keras.models.Model`. A `Model` is simply a
+  `GraphNetwork` with added training/evaluation routines.
 
-  A GraphNetwork instance implements the full Layer API. In particular, a
-  GraphNetwork can be called on new inputs.
+  A `GraphNetwork` instance implements the full `Layer` API. In particular, a
+  `GraphNetwork` can be called on new inputs.
 
   Example:
 
-- 
GitLab


From 1b4ad65bc5830513d10ecde6d3e96e96117f7bad Mon Sep 17 00:00:00 2001
From: Daniyar <daniyar.turmukhambetov.10@ucl.ac.uk>
Date: Mon, 20 Nov 2017 19:36:58 +0000
Subject: [PATCH 0115/1225] restart tests

---
 tensorflow/core/kernels/unpack_op.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc
index 7ece912557..e4963f9d4c 100644
--- a/tensorflow/core/kernels/unpack_op.cc
+++ b/tensorflow/core/kernels/unpack_op.cc
@@ -176,6 +176,7 @@ REGISTER_KERNEL_BUILDER(Name("Unpack")
                             .HostMemory("output")
                             .TypeConstraint<int32>("T"),
                         UnpackOp<CPUDevice, int32>);
+
 REGISTER_KERNEL_BUILDER(Name("Unpack")
                             .Device(DEVICE_SYCL)
                             .HostMemory("value")
-- 
GitLab


From 892febff7e1655d368a1b354a0f4563f65124d2d Mon Sep 17 00:00:00 2001
From: Max Galkin <maxgalkin@google.com>
Date: Mon, 20 Nov 2017 12:03:11 -0800
Subject: [PATCH 0116/1225] Clarifications in the doc comment for
 `tf.strided_slice`. Also removed the angle brackets, which caused some words
 to disappear in the web version of the doc.

PiperOrigin-RevId: 176395531
---
 tensorflow/python/ops/array_ops.py | 60 ++++++++++++++----------------
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 61bd41e7de..c3c7ecd080 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -641,40 +641,35 @@ def strided_slice(input_,
                   name=None):
   """Extracts a strided slice of a tensor (generalized python array indexing).
 
-  **Most users will want to use @{tf.Tensor.__getitem__} and
-  @{tf.Variable.__getitem__}.** That allows  NumPy style slicing syntax (i.e.
-  `tensor[..., 3:4:-1, tf.newaxis, 3]`).
-  This op is the low-level interface that are used to implement operators.
-  Those interfaces are much more friendly, and highly recommended.
-
-  To a first order, this operation extracts a slice of size `end - begin`
-  from a tensor `input`
-  starting at the location specified by `begin`. The slice continues by adding
-  `stride` to the `begin` index until all dimensions are not less than `end`.
-  Note that components of stride can be negative, which causes a reverse
-  slice.
-
-  This operation can be thought of an encoding of a numpy style sliced
-  range. Given a python slice input[<spec0>, <spec1>, ..., <specn>]
+  **Instead of calling this op directly most users will want to use the
+  NumPy-style slicing syntax (e.g. `tensor[..., 3:4:-1, tf.newaxis, 3]`), which
+  is supported via @{tf.Tensor.__getitem__} and @{tf.Variable.__getitem__}.**
+  The interface of this op is a low-level encoding of the slicing syntax.
+
+  Roughly speaking, this op extracts a slice of size `(end-begin)/stride`
+  from the given `input_` tensor. Starting at the location specified by `begin`
+  the slice continues by adding `stride` to the index until all dimensions are
+  not less than `end`.
+  Note that a stride can be negative, which causes a reverse slice.
+
+  Given a Python slice `input[spec0, spec1, ..., specn]`,
   this function will be called as follows.
 
-  `begin`, `end`, and `strides` will be all length n. n is in general
-  not the same dimensionality as `input`.
+  `begin`, `end`, and `strides` will be vectors of length n.
+  n in general is not equal to the rank of the `input_` tensor.
 
-  For the ith spec,
-  `begin_mask`, `end_mask`, `ellipsis_mask`, `new_axis_mask`,
-  and `shrink_axis_mask` will have the ith bit corresponding to
+  In each mask field (`begin_mask`, `end_mask`, `ellipsis_mask`,
+  `new_axis_mask`, `shrink_axis_mask`) the ith bit will correspond to
   the ith spec.
 
-  If the ith bit of `begin_mask` is non-zero, `begin[i]` is ignored and
+  If the ith bit of `begin_mask` is set, `begin[i]` is ignored and
   the fullest possible range in that dimension is used instead.
   `end_mask` works analogously, except with the end range.
 
   `foo[5:,:,:3]` on a 7x8x9 tensor is equivalent to `foo[5:7,0:8,0:3]`.
   `foo[::-1]` reverses a tensor with shape 8.
 
-
-  If the ith bit of `ellipsis_mask` is non-zero, as many unspecified dimensions
+  If the ith bit of `ellipsis_mask` is set, as many unspecified dimensions
   as needed will be inserted between other dimensions. Only one
   non-zero bit is allowed in `ellipsis_mask`.
 
@@ -682,20 +677,21 @@ def strided_slice(input_,
   equivalent to `foo[3:5,:,:,4:5]` and
   `foo[3:5,...]` is equivalent to `foo[3:5,:,:,:]`.
 
-  If the ith bit of `new_axis_mask` is one, then `begin`,
+  If the ith bit of `new_axis_mask` is set, then `begin`,
   `end`, and `stride` are ignored and a new length 1 dimension is
   added at this point in the output tensor.
 
-  For example `foo[3:5,4]` on a 10x8 tensor produces a shape 2 tensor
-  whereas `foo[3:5,4:5]` produces a shape 2x1 tensor with shrink_mask
-  being 1<<1 == 2.
+  For example,
+  `foo[:4, tf.newaxis, :2]` would produce a shape `(4, 1, 2)` tensor.
+
+  If the ith bit of `shrink_axis_mask` is set, it implies that the ith
+  specification shrinks the dimensionality by 1. `begin[i]`, `end[i]` and
+  `strides[i]` must imply a slice of size 1 in the dimension. For example in
+  Python one might do `foo[:, 3, :]` which would result in
+  `shrink_axis_mask` equal to 2.
 
-  If the ith bit of `shrink_axis_mask` is one, then `begin`,
-  `end[i]`, and `stride[i]` are used to do a slice in the appropriate
-  dimension, but the output tensor will be reduced in dimensionality
-  by one. This is only valid if the ith entry of slice[i]==1.
 
-  NOTE: `begin` and `end` are zero-indexed`.
+  NOTE: `begin` and `end` are zero-indexed.
   `strides` entries must be non-zero.
 
 
-- 
GitLab


From 9a6999c9bd067b507e1052d7c84d8d49329a81b2 Mon Sep 17 00:00:00 2001
From: Jayaram Bobba <jayaram.bobba@intel.com>
Date: Mon, 20 Nov 2017 12:13:06 -0800
Subject: [PATCH 0117/1225] MKL: Faster CPU implementation of batch matmul
 kernel using MKL cblas apis (#14335)

* Faster CPU implementation of batch matmul kernel using MKL cblas apis

* - Added MKL batched gemm calls for complex data types
- Addressed PR feedback
---
 tensorflow/core/kernels/BUILD                 |   7 +-
 .../core/kernels/batch_matmul_op_complex.cc   |   2 +
 .../core/kernels/batch_matmul_op_real.cc      |   2 +
 .../core/kernels/mkl_batch_matmul_op.cc       | 238 ++++++++++++++++++
 4 files changed, 248 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/core/kernels/mkl_batch_matmul_op.cc

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index f491132777..b2f505ff0c 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2564,8 +2564,13 @@ tf_kernel_library(
 
 tf_kernel_library(
     name = "batch_matmul_op",
+    srcs = [] + if_mkl([
+        "mkl_batch_matmul_op.cc",
+    ]),
     prefix = "batch_matmul_op",
-    deps = MATH_DEPS,
+    deps = MATH_DEPS + if_mkl([
+        "//third_party/mkl:intel_binary_blob",
+    ]),
 )
 
 tf_kernel_library(
diff --git a/tensorflow/core/kernels/batch_matmul_op_complex.cc b/tensorflow/core/kernels/batch_matmul_op_complex.cc
index a58ec02726..96216764fd 100644
--- a/tensorflow/core/kernels/batch_matmul_op_complex.cc
+++ b/tensorflow/core/kernels/batch_matmul_op_complex.cc
@@ -17,8 +17,10 @@ limitations under the License.
 
 namespace tensorflow {
 
+#if !defined(INTEL_MKL)
 TF_CALL_complex64(REGISTER_BATCH_MATMUL_CPU);
 TF_CALL_complex128(REGISTER_BATCH_MATMUL_CPU);
+#endif
 
 #if GOOGLE_CUDA
 TF_CALL_complex64(REGISTER_BATCH_MATMUL_GPU);
diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc
index 1900ed8e31..8d155ca62b 100644
--- a/tensorflow/core/kernels/batch_matmul_op_real.cc
+++ b/tensorflow/core/kernels/batch_matmul_op_real.cc
@@ -17,8 +17,10 @@ limitations under the License.
 
 namespace tensorflow {
 
+#if !defined(INTEL_MKL)
 TF_CALL_float(REGISTER_BATCH_MATMUL_CPU);
 TF_CALL_double(REGISTER_BATCH_MATMUL_CPU);
+#endif
 TF_CALL_half(REGISTER_BATCH_MATMUL_CPU);
 TF_CALL_int32(REGISTER_BATCH_MATMUL_CPU);
 
diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc
new file mode 100644
index 0000000000..138acdf298
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc
@@ -0,0 +1,238 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/math_ops.cc.
+
+// This file uses MKL CBLAS batched xGEMM for acceleration of TF Batch
+// Matrix-Matrix Multiplication (MatMul) operations.
+// We currently register this kernel only for MKL supported data
+// types (float, double, complex64, complex128). The macro INTEL_MKL is defined
+// by the build system only when MKL is chosen as an option at configure stage
+// and when it is undefined at build time, this file becomes an empty
+// compilation unit
+
+#define EIGEN_USE_THREADS
+
+#if defined(INTEL_MKL)
+#include <vector>
+#include "mkl_cblas.h"
+#include "tensorflow/core/framework/numeric_types.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/type_traits.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#define MKL_Complex8 tensorflow::complex64
+#define MKL_Complex16 tensorflow::complex128
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+template <typename Device, typename Scalar>
+class BatchMatMulMkl : public OpKernel {
+ public:
+  explicit BatchMatMulMkl(OpKernelConstruction *context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("adj_x", &adj_x_));
+    OP_REQUIRES_OK(context, context->GetAttr("adj_y", &adj_y_));
+  }
+
+  virtual ~BatchMatMulMkl() {}
+
+  void Compute(OpKernelContext *ctx) override {
+    const Tensor &lhs = ctx->input(0);
+    const Tensor &rhs = ctx->input(1);
+    OP_REQUIRES(ctx, lhs.dims() == rhs.dims(),
+                errors::InvalidArgument("lhs and rhs has different ndims: ",
+                                        lhs.shape().DebugString(), " vs. ",
+                                        rhs.shape().DebugString()));
+    const int ndims = lhs.dims();
+    OP_REQUIRES(
+        ctx, ndims >= 2,
+        errors::InvalidArgument("lhs and rhs ndims must be >= 2: ", ndims));
+    TensorShape out_shape;
+    for (int i = 0; i < ndims - 2; ++i) {
+      OP_REQUIRES(ctx, lhs.dim_size(i) == rhs.dim_size(i),
+                  errors::InvalidArgument("lhs.dim(", i, ") and rhs.dim(", i,
+                                          ") must be the same: ",
+                                          lhs.shape().DebugString(), " vs ",
+                                          rhs.shape().DebugString()));
+      out_shape.AddDim(lhs.dim_size(i));
+    }
+    auto batch_size = (ndims == 2) ? 1 : out_shape.num_elements();
+    auto lhs_rows = lhs.dim_size(ndims - 2);
+    auto lhs_cols = lhs.dim_size(ndims - 1);
+    auto rhs_rows = rhs.dim_size(ndims - 2);
+    auto rhs_cols = rhs.dim_size(ndims - 1);
+    if (adj_x_) std::swap(lhs_rows, lhs_cols);
+    if (adj_y_) std::swap(rhs_rows, rhs_cols);
+    OP_REQUIRES(ctx, lhs_cols == rhs_rows,
+                errors::InvalidArgument(
+                    "lhs mismatch rhs shape: ", lhs_cols, " vs. ", rhs_rows,
+                    ": ", lhs.shape().DebugString(), " ",
+                    rhs.shape().DebugString(), " ", adj_x_, " ", adj_y_));
+    out_shape.AddDim(lhs_rows);
+    out_shape.AddDim(rhs_cols);
+    Tensor *out = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, out_shape, &out));
+    if (out->NumElements() == 0) {
+      return;
+    }
+    if (lhs.NumElements() == 0 || rhs.NumElements() == 0) {
+      functor::SetZeroFunctor<Device, Scalar> f;
+      f(ctx->eigen_device<Device>(), out->flat<Scalar>());
+      return;
+    }
+
+    auto rhs_reshaped = rhs.template flat_inner_dims<Scalar, 3>();
+    auto lhs_reshaped = lhs.template flat_inner_dims<Scalar, 3>();
+    auto out_reshaped = out->template flat_inner_dims<Scalar, 3>();
+    const uint64 M = lhs_reshaped.dimension(adj_x_ ? 2 : 1);
+    const uint64 K = lhs_reshaped.dimension(adj_x_ ? 1 : 2);
+    const uint64 N = rhs_reshaped.dimension(adj_y_ ? 1 : 2);
+    
+    std::vector<MKL_INT> m_array(batch_size, M);
+    std::vector<MKL_INT> n_array(batch_size, N);
+    std::vector<MKL_INT> k_array(batch_size, K);
+    std::vector<MKL_INT> lda_array(batch_size, adj_x_ ? M : K);
+    std::vector<MKL_INT> ldb_array(batch_size, adj_y_ ? K : N);
+    std::vector<MKL_INT> ldc_array(batch_size, N);
+    std::vector<MKL_INT> group_size(1, batch_size);
+    std::vector<const Scalar *> a_array;
+    std::vector<const Scalar *> b_array;
+    std::vector<Scalar *> c_array;
+    a_array.reserve(batch_size);
+    b_array.reserve(batch_size);
+    c_array.reserve(batch_size);
+    for (int64 i = 0; i < batch_size; i++) {
+      a_array.push_back(&lhs_reshaped(i, 0, 0));
+      b_array.push_back(&rhs_reshaped(i, 0, 0));
+      c_array.push_back(&out_reshaped(i, 0, 0));
+    }
+    
+    MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, &m_array[0], &n_array[0],
+                      &k_array[0], &a_array[0], &lda_array[0], &b_array[0],
+                      &ldb_array[0], &c_array[0], &ldc_array[0], 1,
+                      &group_size[0]);
+  }
+
+ private:
+  bool adj_x_;
+  bool adj_y_;
+
+  void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA,
+                         const bool TransB, const MKL_INT *M_Array,
+                         const MKL_INT *N_Array, const MKL_INT *K_Array,
+                         const float **A_Array, const MKL_INT *lda_Array,
+                         const float **B_Array, const MKL_INT *ldb_Array,
+                         float **C_Array, const MKL_INT *ldc_Array,
+                         const MKL_INT group_count, const MKL_INT *group_size) {
+    std::vector<CBLAS_TRANSPOSE> TransA_Array(
+        group_size[0], TransA ? CblasTrans : CblasNoTrans);
+    std::vector<CBLAS_TRANSPOSE> TransB_Array(
+        group_size[0], TransB ? CblasTrans : CblasNoTrans);
+    std::vector<float> alpha_Array(group_size[0], 1.0);
+    std::vector<float> beta_Array(group_size[0], 0.0);
+    cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], M_Array,
+                      N_Array, K_Array, &alpha_Array[0], A_Array, lda_Array,
+                      B_Array, ldb_Array, &beta_Array[0], C_Array, ldc_Array,
+                      group_count, group_size);
+  }
+
+  void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA,
+                         const bool TransB, const MKL_INT *M_Array,
+                         const MKL_INT *N_Array, const MKL_INT *K_Array,
+                         const double **A_Array, const MKL_INT *lda_Array,
+                         const double **B_Array, const MKL_INT *ldb_Array,
+                         double **C_Array, const MKL_INT *ldc_Array,
+                         const MKL_INT group_count, const MKL_INT *group_size) {
+    std::vector<CBLAS_TRANSPOSE> TransA_array(
+        group_size[0], TransA ? CblasTrans : CblasNoTrans);
+    std::vector<CBLAS_TRANSPOSE> TransB_array(
+        group_size[0], TransB ? CblasTrans : CblasNoTrans);
+    std::vector<double> alpha_Array(group_size[0], 1.0);
+    std::vector<double> beta_Array(group_size[0], 0.0);
+    cblas_dgemm_batch(Layout, &TransA_array[0], &TransB_array[0], M_Array,
+                      N_Array, K_Array, &alpha_Array[0], A_Array, lda_Array,
+                      B_Array, ldb_Array, &beta_Array[0], C_Array, ldc_Array,
+                      group_count, group_size);
+  }
+
+  void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA,
+                         const bool TransB, const MKL_INT *M_Array,
+                         const MKL_INT *N_Array, const MKL_INT *K_Array,
+                         const MKL_Complex8 **A_Array, const MKL_INT *lda_Array,
+                         const MKL_Complex8 **B_Array, const MKL_INT *ldb_Array,
+                         MKL_Complex8 **C_Array, const MKL_INT *ldc_Array,
+                         const MKL_INT group_count, const MKL_INT *group_size) {
+    std::vector<CBLAS_TRANSPOSE> TransA_array(
+        group_size[0], TransA ? CblasConjTrans : CblasNoTrans);
+    std::vector<CBLAS_TRANSPOSE> TransB_array(
+        group_size[0], TransB ? CblasConjTrans : CblasNoTrans);
+    std::vector<MKL_Complex8> alpha_Array(group_size[0], {1.0f, 0.0f});
+    std::vector<MKL_Complex8> beta_Array(group_size[0], {0.0f, 0.0f});
+    cblas_cgemm_batch(
+        Layout, &TransA_array[0], &TransB_array[0], M_Array, N_Array, K_Array,
+        static_cast<const void *>(&alpha_Array[0]),
+        reinterpret_cast<const void **>(A_Array), lda_Array,
+        reinterpret_cast<const void **>(B_Array), ldb_Array,
+        static_cast<const void *>(&beta_Array[0]),
+        reinterpret_cast<void **>(C_Array), ldc_Array, group_count, group_size);
+  }
+
+  void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA,
+                         const bool TransB, const MKL_INT *M_Array,
+                         const MKL_INT *N_Array, const MKL_INT *K_Array,
+                         const MKL_Complex16 **A_Array,
+                         const MKL_INT *lda_Array,
+                         const MKL_Complex16 **B_Array,
+                         const MKL_INT *ldb_Array, MKL_Complex16 **C_Array,
+                         const MKL_INT *ldc_Array, const MKL_INT group_count,
+                         const MKL_INT *group_size) {
+    std::vector<CBLAS_TRANSPOSE> TransA_array(
+        group_size[0], TransA ? CblasConjTrans : CblasNoTrans);
+    std::vector<CBLAS_TRANSPOSE> TransB_array(
+        group_size[0], TransB ? CblasConjTrans : CblasNoTrans);
+    std::vector<MKL_Complex16> alpha_Array(group_size[0], {1.0f, 0.0f});
+    std::vector<MKL_Complex16> beta_Array(group_size[0], {0.0f, 0.0f});
+    cblas_zgemm_batch(
+        Layout, &TransA_array[0], &TransB_array[0], M_Array, N_Array, K_Array,
+        static_cast<const void *>(&alpha_Array[0]),
+        reinterpret_cast<const void **>(A_Array), lda_Array,
+        reinterpret_cast<const void **>(B_Array), ldb_Array,
+        static_cast<const void *>(&beta_Array[0]),
+        reinterpret_cast<void **>(C_Array), ldc_Array, group_count, group_size);
+  }
+};
+
+#define REGISTER_BATCH_MATMUL_MKL(TYPE)                                 \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("BatchMatMul").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
+      BatchMatMulMkl<CPUDevice, TYPE>)
+
+TF_CALL_float(REGISTER_BATCH_MATMUL_MKL);
+TF_CALL_double(REGISTER_BATCH_MATMUL_MKL);
+TF_CALL_complex64(REGISTER_BATCH_MATMUL_MKL);
+TF_CALL_complex128(REGISTER_BATCH_MATMUL_MKL);
+
+}  // end namespace tensorflow
+#endif
-- 
GitLab


From ea69b406d86176f9370c51b9ed9963a28ab93c74 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 20 Nov 2017 12:13:08 -0800
Subject: [PATCH 0118/1225] Avoid std::move 'ing the hlo_to_profile_idx into
 IrEmitter

The current code is incorrect since we're using hlo_to_profile_idx after
std::moving out of it.
PiperOrigin-RevId: 176397126
---
 .../compiler/xla/service/cpu/cpu_compiler.cc     | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index b04a279395..592751e118 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -543,11 +543,9 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
       parallel_computations.emplace(to_apply, instruction);
     }
 
-    size_t entry_computation_profile_idx = hlo_to_profile_idx.size();
-    IrEmitter ir_emitter(
-        *module, *assignment, llvm_module.get(), std::move(hlo_to_profile_idx),
-        /*entry_computation_profile_idx=*/entry_computation_profile_idx,
-        jit->target_machine(), jit->external_constant_pool());
+    IrEmitter ir_emitter(*module, *assignment, llvm_module.get(),
+                         hlo_to_profile_idx, hlo_to_profile_idx.size(),
+                         jit->target_machine(), jit->external_constant_pool());
 
     std::unique_ptr<HloInstructionMap<string>> function_names(
         new HloInstructionMap<string>());
@@ -625,11 +623,9 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     // before the entry computation. The order of computations returned from
     // GetEmbeddedComputations guarantees that a called computation occurs
     // before a caller computation.
-    size_t entry_computation_profile_idx = hlo_to_profile_idx.size();
-    IrEmitter ir_emitter(
-        *module, *assignment, llvm_module.get(), std::move(hlo_to_profile_idx),
-        /*entry_computation_profile_idx=*/entry_computation_profile_idx,
-        jit->target_machine(), jit->external_constant_pool());
+    IrEmitter ir_emitter(*module, *assignment, llvm_module.get(),
+                         hlo_to_profile_idx, hlo_to_profile_idx.size(),
+                         jit->target_machine(), jit->external_constant_pool());
 
     for (auto embedded_computation :
          computation->MakeEmbeddedComputationsList()) {
-- 
GitLab


From f7a6c4294dfb1603da33a952080bc4c46935a461 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Mon, 20 Nov 2017 12:26:08 -0800
Subject: [PATCH 0119/1225] Raise an exception if a user attempts to use
 unsupported operations on the TPU.

PiperOrigin-RevId: 176399081
---
 tensorflow/contrib/tpu/python/tpu/tpu.py | 30 +++++++++++++++++++-----
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index f3ddc09754..77977b3c94 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -32,8 +32,25 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
 
 
-_SUMMARY_OPS = ("ScalarSummary",)
-_PLACEHOLDER_OPS = ("Placeholder",)
+# Operations that indicate some error in the users graph, e.g. a placeholder
+# that's introduced outside of the infeed.
+_BLACKLISTED_OPS = set([
+    "Placeholder",
+])
+
+# These operations will currently fail to compile, but we should be able to
+# support them eventually via CPU offload or extending our operation set.
+_NOT_IMPLEMENTED_OPS = set([
+    "AudioSummary",
+    "AudioSummaryV2",
+    "HistogramSummary",
+    "ImageSummary",
+    "MergeSummary",
+    "Print",
+    "ScalarSummary",
+    "TensorSummary",
+    "TensorSummaryV2",
+    ])
 
 
 def initialize_system(embedding_config=None, job=None):
@@ -108,12 +125,13 @@ class TPUReplicateContext(control_flow_ops.ControlFlowContext):
 
   def _AddOpInternal(self, op):
     # pylint: disable=protected-access
-    if op.type in _PLACEHOLDER_OPS:
-      raise ValueError("Placeholder %s is not supported." % op.name)
+    if op.type in _BLACKLISTED_OPS:
+      raise ValueError("Operation of type %s (%s) is not supported on the TPU" %
+                       (op.type, op.name))
 
-    if op.type in _SUMMARY_OPS:
+    if op.type in _NOT_IMPLEMENTED_OPS:
       logging.warning(
-          "Summary operations are not currently supported (%s)" % op.name)
+          "Operation %s (%s) is not currently supported", op.type, op.name)
 
     if any(x.dtype._is_ref_dtype for x in op.inputs):
       raise NotImplementedError(
-- 
GitLab


From 1d97fe0abdf2279f3f9187e279646732f2264940 Mon Sep 17 00:00:00 2001
From: Olivia Nordquist <nolivia@google.com>
Date: Mon, 20 Nov 2017 12:27:49 -0800
Subject: [PATCH 0120/1225] fix bug in c_api TF_GraphSetTensorShape to be able
 to handle unknown shapes as per the documentation.

PiperOrigin-RevId: 176399293
---
 tensorflow/c/c_api.cc      | 16 ++++++++++------
 tensorflow/c/c_api_test.cc | 19 ++++++++++++++++++-
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index dd638de3c6..bb41f92306 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -939,13 +939,17 @@ void TF_GraphSetTensorShape(TF_Graph* graph, TF_Output output,
     return;
   }
 
-  std::vector<tensorflow::shape_inference::DimensionHandle> dim_vec;
-  dim_vec.reserve(num_dims);
-  for (int i = 0; i < num_dims; ++i) {
-    dim_vec.push_back(ic->MakeDim(dims[i]));
+  tensorflow::shape_inference::ShapeHandle new_shape;
+  if (num_dims != -1) {
+    std::vector<tensorflow::shape_inference::DimensionHandle> dim_vec;
+    dim_vec.reserve(num_dims);
+    for (int i = 0; i < num_dims; ++i) {
+      dim_vec.push_back(ic->MakeDim(dims[i]));
+    }
+    new_shape = ic->MakeShape(dim_vec);
+  } else {
+    new_shape = ic->UnknownShape();
   }
-
-  tensorflow::shape_inference::ShapeHandle new_shape = ic->MakeShape(dim_vec);
   status->status = graph->refiner.SetShape(node, output.index, new_shape);
 }
 
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index e0057eb51c..6ec1db8ccf 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -287,6 +287,13 @@ TEST(CAPI, SetShape) {
   ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
   EXPECT_EQ(-1, num_dims);
 
+  // Set the shape to be unknown, expect no change.
+  TF_GraphSetTensorShape(graph, feed_out_0, /*dims=*/nullptr, -1, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  num_dims = TF_GraphGetTensorNumDims(graph, feed_out_0, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(-1, num_dims);
+
   // Set the shape to be 2 x Unknown
   int64_t dims[] = {2, -1};
   TF_GraphSetTensorShape(graph, feed_out_0, dims, 2, s);
@@ -315,7 +322,17 @@ TEST(CAPI, SetShape) {
   EXPECT_EQ(dims[0], returned_dims[0]);
   EXPECT_EQ(dims[1], returned_dims[1]);
 
-  // Try to set 'unknown' on the shape and see that
+  // Try to set 'unknown' with unknown rank on the shape and see that
+  // it doesn't change.
+  TF_GraphSetTensorShape(graph, feed_out_0, /*dims=*/nullptr, -1, s);
+  EXPECT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_GraphGetTensorShape(graph, feed_out_0, returned_dims, num_dims, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(2, num_dims);
+  EXPECT_EQ(2, returned_dims[0]);
+  EXPECT_EQ(3, returned_dims[1]);
+
+  // Try to set 'unknown' with same rank on the shape and see that
   // it doesn't change.
   dims[0] = -1;
   dims[1] = -1;
-- 
GitLab


From b7a74edb5e6e134df4d66ad66b486aafd29c4ac4 Mon Sep 17 00:00:00 2001
From: codrut3 <grosu.codrut@gmail.com>
Date: Mon, 20 Nov 2017 22:58:53 +0200
Subject: [PATCH 0121/1225] Use cub::ReduceByKey to count partition indices
 (#14665)

* Use cub::ReduceByKey to count how many times each partition index appears.

This implements a suggestion by @ekelsen. It replaces the
previously custom-made counting method and is likely more
efficient.

* Remove CubReduceAdd and use instead cub::Sum.
---
 .../kernels/dynamic_partition_op_gpu.cu.cc    | 221 ++++++++++++------
 .../core/util/transform_output_iterator.h     |   2 +-
 .../kernel_tests/dynamic_partition_op_test.py |  91 ++++++++
 3 files changed, 247 insertions(+), 67 deletions(-)

diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
index fc98556440..872921efa5 100644
--- a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
@@ -19,11 +19,12 @@ limitations under the License.
 // 2. We apply cub::DeviceRadixSort::SortPairs to the key - value pairs given
 //    by partitions and indices_in. This will result in two new vectors
 //    partitions_out and indices_out, with partitions_out sorted.
-// 3. The first dimension of outputs[i] is equal to the length of the interval
-//    of i-values in partitions_out. We determine it in two steps:
-//    - compute the starting and ending point of each interval,
-//    - subtract the starting and ending points to find the length.
-//    The result is placed in partition_count.
+// 3. The first dimension of outputs[i] is equal to the number of i-values in
+//    partitions_out. We determine it in two steps:
+//    - apply cub::DeviceReduce::ReduceByKey to count how many times each value
+//      appears in partitions_out,
+//    - move the results to partition_count. This handles missing values
+//      (corresponding to empty parts).
 // 4. Because partition_count is on the GPU, we bring it asynchronously to
 //    the CPU. Then we can allocate the output tensors.
 // 5. Finally, we use indices_out and the gather functor to collect the output.
@@ -35,6 +36,9 @@ limitations under the License.
 #define EIGEN_USE_GPU
 
 #include "external/cub_archive/cub/device/device_radix_sort.cuh"
+#include "external/cub_archive/cub/device/device_reduce.cuh"
+#include "external/cub_archive/cub/iterator/constant_input_iterator.cuh"
+#include "external/cub_archive/cub/thread/thread_operators.cuh"
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -44,6 +48,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/kernels/gather_functor_gpu.cu.h"
 #include "tensorflow/core/util/cuda_kernel_helper.h"
+#include "tensorflow/core/util/transform_output_iterator.h"
 
 namespace tensorflow {
 
@@ -57,34 +62,14 @@ __global__ void RangeInitKernel(const T start, const T delta, const int32 size,
   CUDA_1D_KERNEL_LOOP(i, size) { out[i] = start + i * delta; }
 }
 
-__global__ void FindEndpointsKernel(const int32* partitions, int32 size,
-                                    int32 nump, int32* start, int32* end) {
-  CUDA_1D_KERNEL_LOOP(i, size) {
-    int32 current = ldg(partitions + i);
-    if (FastBoundsCheck(current, nump)) {
-      if (i == 0)
-        start[current] = i;
-      else {
-        int32 before = ldg(partitions + i - 1);
-        if (before != current) start[current] = i;
-      }
-      if (i == size - 1)
-        end[current] = i + 1;
-      else {
-        int32 after = ldg(partitions + i + 1);
-        if (after != current) end[current] = i + 1;
-      }
-    }
-  }
-}
-
-// We create a local version of subtract, because the tf.subtract kernel
-// is not defined for int32. We use it to compute the length of an interval
-// by subtracting the endpoints.
-__global__ void IntervalLengthKernel(int32* start, int32 size, int32* end) {
-  CUDA_1D_KERNEL_LOOP(i, size) {
-    int32 start_point = ldg(start + i);
-    end[i] = end[i] - start_point;
+__global__ void MoveValuesKernel(const int32* keys, const int32* values,
+                                 const int32* size, int32 out_size,
+                                 int32* out) {
+  int32 N = min(ldg(size), out_size);
+  CUDA_1D_KERNEL_LOOP(i, N) {
+    int32 key = ldg(keys + i);
+    int32 value = ldg(values + i);
+    if (FastBoundsCheck(key, out_size)) out[key] = value;
   }
 }
 
@@ -99,23 +84,18 @@ void RangeInit(const GPUDevice& d, const T start, const T delta,
       start, delta, size, out.data());
 }
 
-// Partitions is a sorted vector of N non-negative integer numbers.
-// This function computes the starting and ending points of each interval
-// of values.
-void ComputeIntervals(const GPUDevice& d, Tensor* partitions, int32 N,
-                      int32 nump, int32* start_ptr, int32* end_ptr) {
-  CudaLaunchConfig config = GetCudaLaunchConfig(N, d);
-  FindEndpointsKernel<<<config.block_count, config.thread_per_block, 0,
-                        d.stream()>>>(partitions->flat<int32>().data(), N, nump,
-                                      start_ptr, end_ptr);
-}
-
-// Subtract the ending points of each interval to obtain the interval length.
-void ComputeItvLength(const GPUDevice& d, int32 num, int32* start_ptr,
-                      int32* end_ptr) {
-  CudaLaunchConfig config = GetCudaLaunchConfig(num, d);
-  IntervalLengthKernel<<<config.block_count, config.thread_per_block, 0,
-                         d.stream()>>>(start_ptr, num, end_ptr);
+// Given *num_runs pairs (key, value), this function moves the value
+// corresponding to key i at position i in the array out.
+void MoveValues(const GPUDevice& d, int32* keys, int32* values, int32* num_runs,
+                int32 out_size, int32* out) {
+  // Because num_runs is located on the GPU, we can not access it directly.
+  // So we launch the kernel with size = out_size.
+  // This is valid for correct inputs, because then out_size >= *num_runs.
+  // For wrong inputs, we may have out_size < *num_runs. In this case we will
+  // only handle the first out_size values.
+  CudaLaunchConfig config = GetCudaLaunchConfig(out_size, d);
+  MoveValuesKernel<<<config.block_count, config.thread_per_block, 0,
+                     d.stream()>>>(keys, values, num_runs, out_size, out);
 }
 
 template <typename T>
@@ -130,10 +110,75 @@ void CallGatherKernel(const GPUDevice& d, const T* params, const int32* indices,
       out_size);
 }
 
+struct IdentityOp {
+  __device__ int32 __forceinline__ operator()(const int32& a) const {
+    return a;
+  }
+};
+
+// Define an output iterator that only allows assignment to
+// positions between [base, base + limit).
+class BoundedOutputIterator
+    : public TransformOutputIterator<int32, int32, IdentityOp> {
+ private:
+  int32 limit;
+  int32* base;
+
+  struct BoundedReference : Reference {
+    int32 limit;
+    int32* base;
+    // Constructor
+    __host__ __device__ __forceinline__
+    BoundedReference(int32* ptr, int32* base, IdentityOp op, int32 limit)
+        : Reference(ptr, op), base(base), limit(limit) {}
+
+    // Assignment
+    __host__ __device__ __forceinline__ int32 operator=(int32 val) {
+      if (ptr - base < limit && ptr - base >= 0) *ptr = val;
+      return val;
+    }
+  };
+
+ public:
+  typedef BoundedOutputIterator self_type;
+  typedef BoundedReference reference;
+
+  __host__ __device__ __forceinline__ BoundedOutputIterator(int32* ptr,
+                                                            IdentityOp op,
+                                                            int32 size)
+      : TransformOutputIterator(ptr, op), base(ptr), limit(size) {}
+
+  __host__ __device__ __forceinline__
+  BoundedOutputIterator(int32* ptr, int32* base, IdentityOp op, int32 size)
+      : TransformOutputIterator(ptr, op), base(base), limit(size) {}
+
+  // Indirection
+  __host__ __device__ __forceinline__ reference operator*() const {
+    return BoundedReference(ptr, base, conversion_op, limit);
+  }
+
+  // Array subscript
+  __host__ __device__ __forceinline__ reference operator[](int32 n) const {
+    return BoundedReference(ptr + n, base, conversion_op, limit);
+  }
+
+  // Addition
+  __host__ __device__ __forceinline__ self_type operator+(int32 n) const {
+    self_type retval(ptr + n, base, conversion_op, limit);
+    return retval;
+  }
+
+  // Subtraction
+  __host__ __device__ __forceinline__ self_type operator-(int32 n) const {
+    self_type retval(ptr - n, base, conversion_op, limit);
+    return retval;
+  }
+};
+
 }  // namespace
 
 // The current implementation has memory cost on GPU
-// I + P + max(3N + R, O + N), where:
+// I + P + max(3N + R + P, O + N), where:
 // I - the size of the input
 // N - the size of the partitions tensor
 // R - the temporary storage used by cub::RadixSort, about 2N
@@ -310,9 +355,11 @@ class DynamicPartitionOpGPU : public AsyncOpKernel {
                          Tensor* partition_count, Tensor* indices_out,
                          DoneCallback done) {
     const GPUDevice& device = c->eigen_device<GPUDevice>();
+    const cudaStream_t& cu_stream = GetCudaStream(c);
     int32 N = partitions->NumElements();
     Tensor indices_in;
     Tensor partitions_out;
+    Tensor aggregates_out;
 
     // Allocate memory for Radix-Sort.
     this->AllocateTempSpace(c, N, &indices_in, &partitions_out, indices_out,
@@ -321,24 +368,66 @@ class DynamicPartitionOpGPU : public AsyncOpKernel {
     this->RadixSort(c, partitions, &indices_in, &partitions_out, indices_out,
                     done);
     if (!c->status().ok()) return;
-    // We still need a little bit of additional memory. However,
-    // we can reuse the indices_in tensor. We could also use atomic
-    // operations and no additional memory, but this approach seems faster.
+    // We will now apply a reduce operation to count how many times
+    // each index appears in partitions.
 
-    // Zero-out the allocated memory.
+    // Zero-out the partition_count tensor.
     functor::SetZeroFunctor<GPUDevice, int32> zero_functor;
     zero_functor(device, partition_count->flat<int32>());
-    zero_functor(device, indices_in.flat<int32>());
+    // Allocate memory for aggregates_out.
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}),
+                            &aggregates_out),
+        done);
     // Obtain the pointers to inner buffers.
-    int32* start_ptr = indices_in.flat<int32>().data();
-    int32* end_ptr = partition_count->flat<int32>().data();
-    // Obtain the starting and ending points of each interval.
-    ComputeIntervals(device, &partitions_out, N, num_partitions_, start_ptr,
-                     end_ptr);
-    // Subtract to compute the number of appearances of each id.
-    ComputeItvLength(device, num_partitions_, start_ptr, end_ptr);
-  }  // At this point indices_in and partitions_out will be marked
-     // for deallocation.
+    int32* keys_in_ptr = partitions_out.flat<int32>().data();
+    // Here we reuse the indices_in tensor for the unique keys output.
+    int32* unique_out_ptr = indices_in.flat<int32>().data();
+    int32* aggregates_out_ptr = aggregates_out.flat<int32>().data();
+    // We wrap the pointers in bounded output iterators to guard against
+    // wrong inputs (more than num_partitions distinct indices).
+    IdentityOp id_op;
+    BoundedOutputIterator unique_out_it(unique_out_ptr, id_op, num_partitions_);
+    BoundedOutputIterator aggregates_out_it(aggregates_out_ptr, id_op,
+                                            num_partitions_);
+
+    cub::ConstantInputIterator<int32> values_in(1);
+    cub::Sum reduction_op;
+
+    // Allocate space on GPU for the number of runs. This is required by CUB.
+    Tensor num_runs;
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({1}), &num_runs), done);
+    int32* num_runs_ptr = num_runs.flat<int32>().data();
+
+    // Determine temporary device storage requirements
+    Tensor cub_temp_storage;
+    size_t temp_storage_bytes = 0;
+    cub::DeviceReduce::ReduceByKey(NULL, temp_storage_bytes, keys_in_ptr,
+                                   unique_out_it, values_in, aggregates_out_it,
+                                   num_runs_ptr, reduction_op, N, cu_stream);
+    // Allocate temporary storage.
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(
+               DT_INT8, TensorShape({static_cast<int64>(temp_storage_bytes)}),
+               &cub_temp_storage),
+        done);
+    // Run reduce-by-key. The effect is that we count how many times
+    // each index appears in partitions. The distinct indices are stored
+    // in unique_out, while the count is stored in aggregates_out.
+    // The total number of distinct indices is stored in num_runs.
+    cub::DeviceReduce::ReduceByKey(cub_temp_storage.flat<int8>().data(),
+                                   temp_storage_bytes, keys_in_ptr,
+                                   unique_out_it, values_in, aggregates_out_it,
+                                   num_runs_ptr, reduction_op, N, cu_stream);
+    // We are not done yet. unique_out only contains the indices that appeared
+    // at least once in partitions. We move each value from aggregates_out
+    // to the corresponding position in partition_count. This will handle
+    // possibly empty parts.
+    MoveValues(device, unique_out_ptr, aggregates_out_ptr, num_runs_ptr,
+               num_partitions_, partition_count->flat<int32>().data());
+  }  // At this point indices_in, partitions_out, aggregates_out
+     // and cub_temp_storage will be marked for deallocation.
 
   void GatherSlices(OpKernelContext* c, const Tensor* data,
                     const Tensor* indices, int32 N, int64 slice_size,
@@ -358,7 +447,7 @@ class DynamicPartitionOpGPU : public AsyncOpKernel {
     }
   }
 
-  int num_partitions_;
+  int32 num_partitions_;
 };
 
 #define REGISTER_DYNAMIC_PARTITION_GPU(T)                                 \
diff --git a/tensorflow/core/util/transform_output_iterator.h b/tensorflow/core/util/transform_output_iterator.h
index 1640791ad1..059206c75b 100644
--- a/tensorflow/core/util/transform_output_iterator.h
+++ b/tensorflow/core/util/transform_output_iterator.h
@@ -24,7 +24,7 @@ namespace tensorflow {
 template <typename StoreType, typename InputType, typename ConversionOp,
           typename OffsetT = ptrdiff_t>
 class TransformOutputIterator {
- private:
+ protected:
   // Proxy object
   struct Reference {
     StoreType* ptr;
diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
index 2460950aa9..b4fb5aa411 100644
--- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
@@ -40,6 +40,7 @@ class DynamicPartitionTest(test.TestCase):
           data, indices, num_partitions=4)
       partition_vals = sess.run(partitions)
 
+    self.assertEqual(4, len(partition_vals))
     self.assertAllEqual([0, 13], partition_vals[0])
     self.assertAllEqual([17], partition_vals[1])
     self.assertAllEqual([2, 4], partition_vals[2])
@@ -61,6 +62,7 @@ class DynamicPartitionTest(test.TestCase):
           data, indices, num_partitions=4)
       partition_vals = sess.run(partitions)
 
+    self.assertEqual(4, len(partition_vals))
     self.assertAllEqual([[0, 1, 2], [3, 4, 5]], partition_vals[0])
     self.assertAllEqual([[15, 16, 17]], partition_vals[1])
     self.assertAllEqual([[6, 7, 8], [12, 13, 14]], partition_vals[2])
@@ -85,6 +87,7 @@ class DynamicPartitionTest(test.TestCase):
           data, indices, num_partitions=2)
       partition_vals = sess.run(partitions)
 
+    self.assertEqual(2, len(partition_vals))
     self.assertAllEqual(part1, partition_vals[0])
     self.assertAllEqual(part2, partition_vals[1])
 
@@ -106,6 +109,7 @@ class DynamicPartitionTest(test.TestCase):
           data, indices, num_partitions=num_partitions)
       partition_vals = sess.run(partitions)
 
+    self.assertEqual(num_partitions, len(partition_vals))
     for i in range(num_partitions):
       # reshape because of empty parts
       parts_np = np.array(parts[i], dtype=np.float).reshape(-1, cols)
@@ -121,9 +125,30 @@ class DynamicPartitionTest(test.TestCase):
           data, indices, num_partitions=2)
       partition_vals = sess.run(partitions)
 
+    self.assertEqual(2, len(partition_vals))
     self.assertAllEqual([3 + 4j, 7 + 8j], partition_vals[0])
     self.assertAllEqual([1 + 2j, 5 + 6j], partition_vals[1])
 
+  def testScalarPartitions(self):
+    data_list = [10, 13, 12, 11]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float64)
+      indices = 3
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=4)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(4, len(partition_vals))
+    self.assertAllEqual(np.array([], dtype=np.float64).reshape(-1, 4),
+                        partition_vals[0])
+    self.assertAllEqual(np.array([], dtype=np.float64).reshape(-1, 4),
+                        partition_vals[1])
+    self.assertAllEqual(np.array([], dtype=np.float64).reshape(-1, 4),
+                        partition_vals[2])
+    self.assertAllEqual(np.array([10, 13, 12, 11],
+                                 dtype=np.float64).reshape(-1, 4),
+                        partition_vals[3])
+
   def testHigherRank(self):
     np.random.seed(7)
     with self.test_session(use_gpu=True) as sess:
@@ -158,6 +183,7 @@ class DynamicPartitionTest(test.TestCase):
           data, indices, num_partitions=4)
       partition_vals = sess.run(partitions)
 
+    self.assertEqual(4, len(partition_vals))
     self.assertAllEqual([], partition_vals[0])
     self.assertAllEqual([1, 3], partition_vals[1])
     self.assertAllEqual([], partition_vals[2])
@@ -173,6 +199,7 @@ class DynamicPartitionTest(test.TestCase):
           data, indices, num_partitions=3)
       partition_vals = sess.run(partitions)
 
+    self.assertEqual(3, len(partition_vals))
     self.assertAllEqual([[]], partition_vals[0])
     self.assertAllEqual([[]], partition_vals[1])
     self.assertAllEqual(np.array([], dtype=np.float).reshape(0, 0),
@@ -188,9 +215,73 @@ class DynamicPartitionTest(test.TestCase):
           data, indices, num_partitions=2)
       partition_vals = sess.run(partitions)
 
+    self.assertEqual(2, len(partition_vals))
     self.assertAllEqual([], partition_vals[0])
     self.assertAllEqual([], partition_vals[1])
 
+  def testGPUTooManyParts(self):
+    # This test only makes sense on the GPU. There we do not check
+    # for errors. In this case, we should discard all but the first
+    # num_partitions indices.
+    if not test.is_gpu_available():
+      return
+
+    data_list = [1, 2, 3, 4, 5, 6]
+    indices_list = [6, 5, 4, 3, 1, 0]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=2)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(2, len(partition_vals))
+    self.assertAllEqual([6], partition_vals[0])
+    self.assertAllEqual([5], partition_vals[1])
+
+  def testGPUPartsTooLarge(self):
+    # This test only makes sense on the GPU. There we do not check
+    # for errors. In this case, we should discard all the values
+    # larger than num_partitions.
+    if not test.is_gpu_available():
+      return
+
+    data_list = [1, 2, 3, 4, 5, 6]
+    indices_list = [10, 11, 2, 12, 0, 1000]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=5)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(5, len(partition_vals))
+    self.assertAllEqual([5], partition_vals[0])
+    self.assertAllEqual([], partition_vals[1])
+    self.assertAllEqual([3], partition_vals[2])
+    self.assertAllEqual([], partition_vals[3])
+    self.assertAllEqual([], partition_vals[4])
+
+  def testGPUAllIndicesBig(self):
+    # This test only makes sense on the GPU. There we do not check
+    # for errors. In this case, we should discard all the values
+    # and have an empty output.
+    if not test.is_gpu_available():
+      return
+
+    data_list = [1.1, 2.1, 3.1, 4.1, 5.1, 6.1]
+    indices_list = [90, 70, 60, 100, 110, 40]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=40)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(40, len(partition_vals))
+    for i in range(40):
+      self.assertAllEqual([], partition_vals[i])
+
   def testErrorIndexOutOfRange(self):
     with self.test_session() as sess:
       data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11],
-- 
GitLab


From 09e0e117ef04d3ad3d654dc242b8e2817b6a79c9 Mon Sep 17 00:00:00 2001
From: David Norman <DavidNorman@users.noreply.github.com>
Date: Mon, 20 Nov 2017 20:59:10 +0000
Subject: [PATCH 0122/1225] Fix build issue with TensorShape constructor
 (#14649)

---
 tensorflow/compiler/tf2xla/xla_op_kernel.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
index a052bb105e..2b4cc9ba2d 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
@@ -346,9 +346,9 @@ void XlaOpKernelContext::SetConstantOutput(int index, const Tensor& constant) {
 }
 
 void XlaOpKernelContext::SetInvalidOutput(int index) {
-  const TensorShape shape;
   Tensor* output = nullptr;
-  OP_REQUIRES_OK(context_, context_->allocate_output(index, shape, &output));
+  OP_REQUIRES_OK(context_,
+                 context_->allocate_output(index, TensorShape({}), &output));
   XlaExpression* expression = CastExpressionFromUninitializedTensor(output);
   xla::ComputationDataHandle handle;
   handle.set_handle(0);
-- 
GitLab


From ee446103347fa75a59799a704b058b6ee6dba78d Mon Sep 17 00:00:00 2001
From: CSJY <qmick@live.cn>
Date: Tue, 21 Nov 2017 04:59:40 +0800
Subject: [PATCH 0123/1225] Add feature get_placeholders() (#14541)

* Add feature get_placeholders()

* improve test case

* Improve code style according to pylint and review
---
 .../framework/python/framework/graph_util.py  | 28 ++++++++++++++++++-
 .../python/framework/graph_util_test.py       | 13 +++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/framework/python/framework/graph_util.py b/tensorflow/contrib/framework/python/framework/graph_util.py
index 8ab8711db4..2533e9ecc9 100644
--- a/tensorflow/contrib/framework/python/framework/graph_util.py
+++ b/tensorflow/contrib/framework/python/framework/graph_util.py
@@ -24,12 +24,14 @@ import six
 # pylint: disable=unused-import
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
+from tensorflow.python.framework import ops
 from tensorflow.python.framework.graph_util_impl import _assert_nodes_are_present
 from tensorflow.python.framework.graph_util_impl import _bfs_for_reachable_nodes
 from tensorflow.python.framework.graph_util_impl import _extract_graph_summary
 from tensorflow.python.framework.graph_util_impl import _node_name
 
-__all__ = ["fuse_op"]
+
+__all__ = ["fuse_op", "get_placeholders"]
 
 
 def fuse_op(graph_def, input_nodes, output_nodes, output_dtypes,
@@ -126,3 +128,27 @@ def fuse_op(graph_def, input_nodes, output_nodes, output_dtypes,
   out.library.CopyFrom(graph_def.library)
   out.versions.CopyFrom(graph_def.versions)
   return out
+
+
+def get_placeholders(graph):
+  """Get placeholders of a graph.
+
+  Args:
+    graph: A tf.Graph.
+  Returns:
+    A list contains all placeholders of given graph.
+
+  Raises:
+    TypeError: If `graph` is not a tensorflow graph.
+  """
+
+  if not isinstance(graph, ops.Graph):
+    raise TypeError("Input graph needs to be a Graph: %s" % graph)
+
+  # For each placeholder() call, there is a corresponding
+  # operation of type 'Placeholder' registered to the graph.
+  # The return value (a Tensor) of placeholder() is the
+  # first output of this operation in fact.
+  operations = graph.get_operations()
+  result = [i.outputs[0] for i in operations if i.type == 'Placeholder']
+  return result
diff --git a/tensorflow/contrib/framework/python/framework/graph_util_test.py b/tensorflow/contrib/framework/python/framework/graph_util_test.py
index 87b992e22e..0105374c1b 100644
--- a/tensorflow/contrib/framework/python/framework/graph_util_test.py
+++ b/tensorflow/contrib/framework/python/framework/graph_util_test.py
@@ -21,6 +21,9 @@ from tensorflow.contrib.framework.python.framework import graph_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
 from tensorflow.core.framework import types_pb2
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
@@ -57,5 +60,15 @@ class GraphUtilTest(test.TestCase):
     self.assertEqual(fused_graph_def.node[3].name, 'E')
 
 
+class GetPlaceholdersTest(test.TestCase):
+
+  def test_get_placeholders(self):
+    with ops.Graph().as_default() as g:
+      placeholders = [array_ops.placeholder(dtypes.float32) for _ in range(5)]
+      results = graph_util.get_placeholders(g)
+      self.assertEqual(sorted(placeholders, key=lambda x: x._id),  # pylint: disable=protected-access
+                       sorted(results, key=lambda x: x._id))  # pylint: disable=protected-access
+
+
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 0931164a60c5c9b0c0054c6051ed9387e28f8404 Mon Sep 17 00:00:00 2001
From: ManHyuk <manhyuk@kw.ac.kr>
Date: Tue, 21 Nov 2017 05:59:57 +0900
Subject: [PATCH 0124/1225] Fix typo (#14435)

* FIx typo

* Fix typo
---
 tensorflow/compiler/xla/xla_data.proto                      | 2 +-
 tensorflow/contrib/boosted_trees/lib/utils/batch_features.h | 2 +-
 tensorflow/tools/benchmark/benchmark_model.cc               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index eac8f2ff07..e0f57b701b 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -440,7 +440,7 @@ message ConvolutionDimensionNumbers {
 message ConvolveRequest {
   ComputationDataHandle lhs = 2;
   ComputationDataHandle rhs = 3;  // This is the filter/kernel.
-  Window window = 4;              // Describes the filter/kenel.
+  Window window = 4;              // Describes the filter/kernel.
   ConvolutionDimensionNumbers dimension_numbers = 5;
 }
 
diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
index 7a550d6f73..badc629a11 100644
--- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
+++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
@@ -56,7 +56,7 @@ class BatchFeatures {
     *num_sparse_int_features = sparse_int_feature_columns_.size();
     if (*num_dense_float_features == 0 && *num_sparse_float_features == 0 &&
         *num_sparse_int_features == 0) {
-      return errors::FailedPrecondition("Not intialized yet.");
+      return errors::FailedPrecondition("Not initialized yet.");
     }
     return Status::OK();
   }
diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
index 2d59299da4..9809ad52de 100644
--- a/tensorflow/tools/benchmark/benchmark_model.cc
+++ b/tensorflow/tools/benchmark/benchmark_model.cc
@@ -622,7 +622,7 @@ int Main(int argc, char** argv) {
     RecordBenchmarkEntry(output_prefix, benchmark_name, "meta-first-inference",
                          warmup_runs, warmup_time_us / 1000000.0);
 
-    // Time from starting to intialize TF to getting the first result back.
+    // Time from starting to initialize TF to getting the first result back.
     // This also assumes that only one warmup run is performed.
     RecordBenchmarkEntry(
         output_prefix, benchmark_name, "meta-init-plus-first-inference", 1,
-- 
GitLab


From 3814a7fb65bbdef853036a2d63d5791c035a3132 Mon Sep 17 00:00:00 2001
From: cinqS <ci.song@cisong.eu>
Date: Tue, 21 Nov 2017 05:00:08 +0800
Subject: [PATCH 0125/1225] code comments error in docker notebook (#14440)

* 1. changed x_with_bias to bias_with_x for clear understanding 2. changed weights updated comments in code for an error

* added lecun's repo for easier access for those who can't access google
---
 .../tools/docker/notebooks/2_getting_started.ipynb   | 12 ++++++------
 .../docker/notebooks/3_mnist_from_scratch.ipynb      |  2 ++
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tensorflow/tools/docker/notebooks/2_getting_started.ipynb b/tensorflow/tools/docker/notebooks/2_getting_started.ipynb
index e171b439fe..b0963ebc3f 100644
--- a/tensorflow/tools/docker/notebooks/2_getting_started.ipynb
+++ b/tensorflow/tools/docker/notebooks/2_getting_started.ipynb
@@ -159,7 +159,7 @@
         "X = np.array([np.linspace(-2, 4, num_examples), np.linspace(-6, 6, num_examples)])\n",
         "X += np.random.randn(2, num_examples)\n",
         "x, y = X\n",
-        "x_with_bias = np.array([(1., a) for a in x]).astype(np.float32)\n",
+        "bias_with_x = np.array([(1., a) for a in x]).astype(np.float32)\n",
         "\n",
         "losses = []\n",
         "training_steps = 50\n",
@@ -167,7 +167,7 @@
         "\n",
         "with tf.Session() as sess:\n",
         "    # Set up all the tensors, variables, and operations.\n",
-        "    input = tf.constant(x_with_bias)\n",
+        "    input = tf.constant(bias_with_x)\n",
         "    target = tf.constant(np.transpose([y]).astype(np.float32))\n",
         "    weights = tf.Variable(tf.random_normal([2, 1], 0, 0.1))\n",
         "\n",
@@ -583,7 +583,7 @@
         "# Split into x and y\n",
         "x, y = X\n",
         "# Add the bias node which always has a value of 1\n",
-        "x_with_bias = np.array([(1., a) for a in x]).astype(np.float32)\n",
+        "bias_with_x = np.array([(1., a) for a in x]).astype(np.float32)\n",
         "\n",
         "# Keep track of the loss at each iteration so we can chart it later\n",
         "losses = []\n",
@@ -598,7 +598,7 @@
         "with tf.Session() as sess:\n",
         "    # Set up all the tensors.\n",
         "    # Our input layer is the x value and the bias node.\n",
-        "    input = tf.constant(x_with_bias)\n",
+        "    input = tf.constant(bias_with_x)\n",
         "    # Our target is the y values. They need to be massaged to the right shape.\n",
         "    target = tf.constant(np.transpose([y]).astype(np.float32))\n",
         "    # Weights are a variable. They change every time through the loop.\n",
@@ -621,7 +621,7 @@
         "    loss = tf.nn.l2_loss(yerror)\n",
         "\n",
         "    # Perform gradient descent. \n",
-        "    # This essentially just updates weights, like weights += grads * learning_rate\n",
+        "    # This essentially just updates weights, like weights -= grads * learning_rate\n",
         "    # using the partial derivative of the loss with respect to the\n",
         "    # weights. It's the direction we want to go to move toward lower error.\n",
         "    update_weights = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)\n",
@@ -743,7 +743,7 @@
         "with tf.Session() as sess:\n",
         "    # Set up all the tensors.\n",
         "    # The input is the x values with the bias appended on to each x.\n",
-        "    input = tf.constant(x_with_bias)\n",
+        "    input = tf.constant(bias_with_x)\n",
         "    # We're trying to find the best fit for the target y values.\n",
         "    target = tf.constant(np.transpose([y]).astype(np.float32))\n",
         "    # Let's set up the weights randomly\n",
diff --git a/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb b/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb
index 614a19c178..5585ebdcd3 100644
--- a/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb
+++ b/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb
@@ -135,6 +135,8 @@
     "from six.moves.urllib.request import urlretrieve\n",
     "\n",
     "SOURCE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/'\n",
+    "#SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'\n",
+    "# for those who have no access to google storage, use lecun's repo please\n",
     "WORK_DIRECTORY = \"/tmp/mnist-data\"\n",
     "\n",
     "def maybe_download(filename):\n",
-- 
GitLab


From 2c7685161e8afb33e6d3c9cfba5dcf2634c1cb8f Mon Sep 17 00:00:00 2001
From: ted chang <htchang@us.ibm.com>
Date: Mon, 20 Nov 2017 13:00:19 -0800
Subject: [PATCH 0126/1225] Added checkpoint V1 test for SaveRestoreShardedTest
 (#14473)

---
 tensorflow/python/training/saver_test.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 744b17dd22..0c827849e4 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -714,6 +714,8 @@ class SaverTest(test.TestCase):
 
 class SaveRestoreShardedTest(test.TestCase):
 
+  _WRITE_VERSION = saver_pb2.SaverDef.V1
+
   def _get_test_dir(self, dirname):
     test_dir = os.path.join(self.get_temp_dir(), dirname)
     gfile.MakeDirs(test_dir)
@@ -739,6 +741,7 @@ class SaveRestoreShardedTest(test.TestCase):
               "t0": t0.saveable,
               "t1": t1.saveable
           },
+          write_version=self._WRITE_VERSION,
           sharded=True)
       variables.global_variables_initializer().run()
       t0.insert("k1", 30.0).run()
@@ -759,7 +762,9 @@ class SaveRestoreShardedTest(test.TestCase):
         with sess.graph.device("/cpu:0"):
           v0 = variables.Variable(111, name="v0")
           t0 = saver_test_utils.CheckpointedOp(name="t0")
-        save = saver_module.Saver({"v0": v0, "t0": t0.saveable}, sharded=True)
+        save = saver_module.Saver({"v0": v0, "t0": t0.saveable},
+                                  write_version=self._WRITE_VERSION,
+                                  sharded=True)
         variables.global_variables_initializer().run()
         t0.insert("k11", 33.0).run()
         self.assertEqual(111, v0.eval())
@@ -777,7 +782,9 @@ class SaveRestoreShardedTest(test.TestCase):
         with sess.graph.device("/cpu:0"):
           v1 = variables.Variable(222)
           t1 = saver_test_utils.CheckpointedOp(name="t1")
-        save = saver_module.Saver({"v1": v1, "t1": t1.saveable}, sharded=True)
+        save = saver_module.Saver({"v1": v1, "t1": t1.saveable},
+                                  write_version=self._WRITE_VERSION,
+                                  sharded=True)
         variables.global_variables_initializer().run()
         t1.insert("k22", 44.0).run()
         self.assertEqual(222, v1.eval())
@@ -805,6 +812,7 @@ class SaveRestoreShardedTest(test.TestCase):
               "t0": t0.saveable,
               "t1": t1.saveable
           },
+          write_version=self._WRITE_VERSION,
           sharded=True)
       variables.global_variables_initializer().run()
       t0.insert("k11", 33.0).run()
@@ -970,6 +978,10 @@ class SaveRestoreShardedTest(test.TestCase):
     self._testPartitionedVariables(use_resource=True)
 
 
+class SaveRestoreShardedTestV2(SaveRestoreShardedTest):
+  _WRITE_VERSION = saver_pb2.SaverDef.V2
+
+
 class MaxToKeepTest(test.TestCase):
 
   def _get_test_dir(self, dirname):
-- 
GitLab


From 4a423d13c6814ede1941e35c7cdcdc21ca13e8ef Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 20 Nov 2017 13:04:39 -0800
Subject: [PATCH 0127/1225] [XLA:CPU] Handle convolutions with no spatial dims.

Fixes two bugs:

 a) In ir_emitter, we'd dereference a null pointer if we had a conv with
    no spacial dims.
 b) In conv-canonicalization, we incorrectly assumed that the input
    permutation was equal to the output permutation.  This isn't true if
    the output batch/feature dims don't match the input batch/feature
    dims.

The testcase for (a) has to run with conv-canonicalization disabled,
because otherwise we canonicalize the conv to an eigen conv, which
doesn't hit the bug in the ir_emitter.

While we're here, we also add additional logging to a relevant CHECK in
HloInstruction.

PiperOrigin-RevId: 176404070
---
 .../xla/service/cpu/conv_canonicalization.cc  | 13 ++---
 .../compiler/xla/service/cpu/ir_emitter.cc    |  8 ++--
 .../compiler/xla/service/hlo_instruction.cc   |  5 +-
 .../compiler/xla/tests/convolution_test.cc    | 48 +++++++++++++++++++
 4 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
index 44cd2171af..80760356e3 100644
--- a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
+++ b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
@@ -53,7 +53,7 @@ StatusOr<bool> ConvCanonicalization::Run(HloModule* module) {
       //   kernel and output.
       //
       // For simplicity, as a first step, we reshape the input and filter to
-      // NHWC and HWIO order, respectively. This may lose precision but not
+      // NHWC and HWIO order, respectively. This may lose precision but won't
       // break the soundness.
       HloInstruction* input = hlo->mutable_operand(0);
 
@@ -98,14 +98,18 @@ StatusOr<bool> ConvCanonicalization::Run(HloModule* module) {
           HloInstruction::CreateTranspose(new_kernel_shape, kernel,
                                           new_kernel_dim_order));
 
+      std::vector<int64> new_output_dim_order(num_dims);
       std::vector<int64> new_conv_dims(num_dims);
       auto output_batch_dim = dnums.output_batch_dimension();
       auto output_feature_dim = dnums.output_feature_dimension();
+      new_output_dim_order[0] = output_batch_dim;
       new_conv_dims[0] = hlo->shape().dimensions(output_batch_dim);
       for (int i = 0; i < num_spatial_dims; ++i) {
+        new_output_dim_order[i + 1] = dnums.spatial_dimensions(i);
         new_conv_dims[i + 1] =
             hlo->shape().dimensions(dnums.spatial_dimensions(i));
       }
+      new_output_dim_order[num_dims - 1] = output_feature_dim;
       new_conv_dims[num_dims - 1] = hlo->shape().dimensions(output_feature_dim);
       Shape new_conv_shape =
           ShapeUtil::MakeShape(hlo->shape().element_type(), new_conv_dims);
@@ -129,14 +133,11 @@ StatusOr<bool> ConvCanonicalization::Run(HloModule* module) {
           HloInstruction::CreateConvolve(new_conv_shape, new_input, new_kernel,
                                          hlo->window(), new_dnums));
 
-      // kConvolution inherits the dimension mapping of its input, so we need to
-      // reshape the output back to the shape of the original convolution. This
-      // is done by apply the inverse permutation of the collapsing order of the
-      // input reshape.
+      // Reshape the output back to the shape of the original convolution.
       TF_RETURN_IF_ERROR(module->entry_computation()->ReplaceWithNewInstruction(
           hlo, HloInstruction::CreateTranspose(
                    hlo->shape(), new_conv,
-                   InversePermutation(new_input_dim_order))));
+                   InversePermutation(new_output_dim_order))));
       changed = true;
     }
   }
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 8fba823b97..49f4782693 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -795,7 +795,7 @@ Status IrEmitter::HandleSelectAndScatter(HloInstruction* select_and_scatter) {
   // operand index is within the bounds. The unsigned comparison includes
   // checking whether the operand index >= 0.
   llvm_ir::IrArray::Index operand_index(source_index.size());
-  llvm::Value* in_bounds_condition = ir_builder_.getInt1(true);
+  llvm::Value* in_bounds_condition = ir_builder_.getTrue();
   for (int64 i = 0; i < rank; ++i) {
     llvm::Value* strided_index = ir_builder_.CreateNSWMul(
         source_index[i], ir_builder_.getInt64(window.dimensions(i).stride()));
@@ -1140,7 +1140,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
           return ir_builder_.CreateICmpEQ(remainder, ir_builder_.getInt64(0));
         };
 
-        llvm::Value* in_bounds_condition = nullptr;
+        llvm::Value* in_bounds_condition = ir_builder_.getInt1(true);
         for (int i = 0; i < num_spatial_dims; ++i) {
           llvm::ConstantInt* input_bound =
               ir_builder_.getInt64(window_util::DilatedBound(
@@ -1153,9 +1153,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
           llvm::Value* dim_ok =
               ir_builder_.CreateAnd(dim_in_bound, dim_not_in_hole);
           in_bounds_condition =
-              in_bounds_condition
-                  ? ir_builder_.CreateAnd(in_bounds_condition, dim_ok)
-                  : dim_ok;
+              ir_builder_.CreateAnd(in_bounds_condition, dim_ok);
         }
 
         // Now we need to map the dilated base coordinates back to the actual
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index ff16f7558e..464af7c554 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -650,7 +650,10 @@ HloInstruction::CreateSelectAndScatter(
   CHECK_EQ(shape.dimensions().size(), operand->shape().dimensions().size());
   CHECK(std::equal(operand->shape().dimensions().begin(),
                    operand->shape().dimensions().end(),
-                   Permute(dimensions, shape.dimensions()).begin()));
+                   Permute(dimensions, shape.dimensions()).begin()))
+      << "shape: " << ShapeUtil::HumanString(shape)
+      << ", operand->shape(): " << ShapeUtil::HumanString(shape)
+      << ", dimensions: {" << Join(dimensions, ", ") << "}";
   auto instruction =
       WrapUnique(new HloInstruction(HloOpcode::kTranspose, shape));
   instruction->AppendOperand(operand);
diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc
index 7425f778a6..8de7c9ffdc 100644
--- a/tensorflow/compiler/xla/tests/convolution_test.cc
+++ b/tensorflow/compiler/xla/tests/convolution_test.cc
@@ -458,6 +458,54 @@ XLA_TEST_F(ConvolutionTest, Convolve2D_1x3x3x5_3x3x5x5_Valid) {
                            error_spec_);
 }
 
+// Test fixture to run convolution tests with and without convolution
+// canonicalization enabled.
+class ConvolveWithAndWithoutCanonicalization
+    : public ConvolutionTest,
+      public ::testing::WithParamInterface<bool> {};
+
+XLA_TEST_P(ConvolveWithAndWithoutCanonicalization,
+           DISABLED_ON_GPU(Convolve2D_NoSpatialDims)) {
+  if (GetParam()) {
+    execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes(
+        "convolution-canonicalization");
+  }
+  ComputationBuilder builder(client_, TestName());
+  Shape input_shape = ShapeUtil::MakeShape(F32, {4, 29});
+  Shape filter_shape = ShapeUtil::MakeShape(F32, {4, 10});
+
+  auto input = builder.Parameter(0, input_shape, "input");
+  auto filter = builder.Parameter(1, filter_shape, "filter");
+
+  ConvolutionDimensionNumbers dnums;
+  dnums.set_input_feature_dimension(0);
+  dnums.set_input_batch_dimension(1);
+  dnums.set_kernel_input_feature_dimension(0);
+  dnums.set_kernel_output_feature_dimension(1);
+  dnums.set_output_batch_dimension(0);
+  dnums.set_output_feature_dimension(1);
+  auto conv = builder.ConvWithGeneralDimensions(input, filter, {},
+                                                Padding::kValid, dnums);
+
+  Array2D<float> param0(4, 29);
+  param0.FillUnique();
+
+  Array2D<float> param1(4, 10);
+  param1.FillUnique();
+
+  Array2D<float> expected_result(29, 10);
+  expected_result.Fill(0);
+
+  ComputeAndCompare(
+      &builder, conv,
+      {*Literal::CreateFromArray(param0), *Literal::CreateFromArray(param1)},
+      error_spec_);
+}
+
+INSTANTIATE_TEST_CASE_P(ConvolveWithAndWithoutCanonicalization_Instantiation,
+                        ConvolveWithAndWithoutCanonicalization,
+                        ::testing::Values(true, false));
+
 struct Convolve1DTestParam {
   int64 input_feature;
   int64 output_feature;
-- 
GitLab


From 3e462d8efd076e70bec0db596051a548a765e1c2 Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Mon, 20 Nov 2017 13:04:42 -0800
Subject: [PATCH 0128/1225] K-FAC: Allow sharding of cov and inv update ops and
 colocation of grads with ops

PiperOrigin-RevId: 176404077
---
 .../contrib/kfac/python/kernel_tests/BUILD    |   2 +
 .../python/kernel_tests/estimator_test.py     |  25 +++
 .../kernel_tests/fisher_factors_test.py       |  20 ++
 tensorflow/contrib/kfac/python/ops/BUILD      |   1 +
 .../contrib/kfac/python/ops/estimator.py      |  94 +++++++-
 .../contrib/kfac/python/ops/fisher_factors.py | 207 ++++++++++++------
 .../kfac/python/ops/layer_collection.py       |  11 +-
 .../contrib/kfac/python/ops/optimizer.py      |  44 ++--
 8 files changed, 316 insertions(+), 88 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD
index 7d65ac9a43..95fba59e3c 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD
@@ -16,6 +16,7 @@ py_test(
         "//tensorflow/contrib/kfac/python/ops:utils",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:init_ops",
@@ -33,6 +34,7 @@ py_test(
         "//tensorflow/contrib/kfac/python/ops:fisher_factors",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:gradients",
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py b/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py
index b52a7b52a7..9b28c45c72 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 from tensorflow.contrib.kfac.python.ops import estimator
 from tensorflow.contrib.kfac.python.ops import layer_collection as lc
 from tensorflow.contrib.kfac.python.ops import utils
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -33,6 +34,30 @@ from tensorflow.python.platform import test
 _ALL_ESTIMATION_MODES = ["gradients", "empirical", "curvature_prop", "exact"]
 
 
+class DeviceContextGeneratorTest(test.TestCase):
+
+  def testNoDevice(self):
+    device_context_generator = estimator._DeviceContextGenerator(None)
+    with ops.device("/device:CPU:0"):  # This is what will be used
+      with device_context_generator():  # Does nothing
+        a = constant_op.constant([2.0], name="a")
+    self.assertEqual("/device:CPU:0", a.op.device)
+
+  def testTwoDevices(self):
+    device_context_generator = estimator._DeviceContextGenerator(
+        ["/device:GPU:0", "/device:GPU:1"])
+    with ops.device("/device:CPU:0"):  # Will be over-ridden by the inner scopes
+      with device_context_generator():
+        a = constant_op.constant([2.0], name="a")
+      with device_context_generator():
+        b = constant_op.constant([2.0], name="b")
+      with device_context_generator():
+        c = constant_op.constant([2.0], name="c")
+    self.assertEqual("/device:GPU:0", a.op.device)
+    self.assertEqual("/device:GPU:1", b.op.device)
+    self.assertEqual("/device:GPU:0", c.op.device)
+
+
 class EstimatorTest(test.TestCase):
 
   def setUp(self):
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
index fbb3d21913..5e2ce5a309 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
@@ -22,6 +22,7 @@ import numpy as np
 import numpy.random as npr
 
 from tensorflow.contrib.kfac.python.ops import fisher_factors as ff
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops as tf_ops
 from tensorflow.python.framework import random_seed
@@ -32,6 +33,25 @@ from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.platform import test
 
 
+class MaybeColocateTest(test.TestCase):
+
+  def testFalse(self):
+    with tf_ops.Graph().as_default():
+      a = constant_op.constant([2.0], name='a')
+      with ff._maybe_colocate_with(a, False):
+        b = constant_op.constant(3.0, name='b')
+      self.assertEqual([b'loc:@a'], a.op.colocation_groups())
+      self.assertEqual([b'loc:@b'], b.op.colocation_groups())
+
+  def testTrue(self):
+    with tf_ops.Graph().as_default():
+      a = constant_op.constant([2.0], name='a')
+      with ff._maybe_colocate_with(a, True):
+        b = constant_op.constant(3.0, name='b')
+      self.assertEqual([b'loc:@a'], a.op.colocation_groups())
+      self.assertEqual([b'loc:@a'], b.op.colocation_groups())
+
+
 class FisherFactorTestingDummy(ff.FisherFactor):
   """Dummy class to test the non-abstract methods on ff.FisherFactor."""
 
diff --git a/tensorflow/contrib/kfac/python/ops/BUILD b/tensorflow/contrib/kfac/python/ops/BUILD
index de4b8920b8..b2272a4cee 100644
--- a/tensorflow/contrib/kfac/python/ops/BUILD
+++ b/tensorflow/contrib/kfac/python/ops/BUILD
@@ -171,6 +171,7 @@ py_library(
     deps = [
         ":utils",
         "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_ops",
         "//tensorflow/python:gradients",
         "//tensorflow/python:util",
         "//third_party/py/numpy",
diff --git a/tensorflow/contrib/kfac/python/ops/estimator.py b/tensorflow/contrib/kfac/python/ops/estimator.py
index ce4e776324..c353f3592f 100644
--- a/tensorflow/contrib/kfac/python/ops/estimator.py
+++ b/tensorflow/contrib/kfac/python/ops/estimator.py
@@ -18,16 +18,54 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import contextlib
+import itertools
 import math
 
 import numpy as np
 
 from tensorflow.contrib.kfac.python.ops import utils
+from tensorflow.python.framework import ops as tf_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.util import nest
 
 
+class _DeviceContextGenerator(object):
+  """Class for generating device contexts in a round-robin fashion."""
+
+  def __init__(self, devices):
+    """Creates a _DeviceContextGenerator object.
+
+    Example usage:
+
+    ```python
+    dcg = _DeviceContextGenerator(['/gpu:0', 'gpu:1'])
+    with dcg():
+      # All operations in this context will be placed on GPU 0
+      ...
+    with dcg():
+      # All operations in this context will be placed on GPU 1
+      ...
+    ```
+
+    Args:
+      devices: An iterable of device strings (or None). Successive calls to
+          __call__ will give contexts which place devices on these devices in
+          a round-robin fashion.
+    """
+    self._cycle = None if devices is None else itertools.cycle(devices)
+
+  @contextlib.contextmanager
+  def __call__(self):
+    """Returns a context manager specifying the default device."""
+    if self._cycle is None:
+      yield
+    else:
+      with tf_ops.device(next(self._cycle)):
+        yield
+
+
 class FisherEstimator(object):
   """Fisher estimator class supporting various approximations of the Fisher."""
 
@@ -36,7 +74,10 @@ class FisherEstimator(object):
                cov_ema_decay,
                damping,
                layer_collection,
-               estimation_mode="gradients"):
+               estimation_mode="gradients",
+               colocate_gradients_with_ops=False,
+               cov_devices=None,
+               inv_devices=None):
     """Create a FisherEstimator object.
 
     Args:
@@ -69,6 +110,14 @@ class FisherEstimator(object):
           for each coordinate of the output instead of using 1/-1 vectors.  It
           is more expensive to compute than the other three options by a factor
           equal to the output dimension, roughly speaking.
+      colocate_gradients_with_ops: Whether we should request gradients be
+          colocated with their respective ops.
+      cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance
+          computations will be placed on these devices in a round-robin fashion.
+          Can be None, which means that no devices are specified.
+      inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion
+          computations will be placed on these devices in a round-robin fashion.
+          Can be None, which means that no devices are specified.
 
     Raises:
       ValueError: If no losses have been registered with layer_collection.
@@ -86,6 +135,12 @@ class FisherEstimator(object):
         "curvature_prop": self._get_grads_lists_curvature_prop,
         "exact": self._get_grads_lists_exact
     }
+    self._colocate_gradients_with_ops = colocate_gradients_with_ops
+    self._cov_device_context_generator = _DeviceContextGenerator(cov_devices)
+    if inv_devices == cov_devices:
+      self._inv_device_context_generator = self._cov_device_context_generator
+    else:
+      self._inv_device_context_generator = _DeviceContextGenerator(inv_devices)
     setup = self._setup(cov_ema_decay)
     self.cov_update_op, self.inv_update_op, self.inv_updates_dict = setup
 
@@ -219,8 +274,13 @@ class FisherEstimator(object):
       raise ValueError("Unrecognized value {} for estimation_mode.".format(
           self._estimation_mode))
 
+    # TODO(b/68033310): This loop round-robins the "concat" operations which
+    # gather the inputs for the cov_updates. In future, we might do these
+    # computations locally then communicate the results, which would require a
+    # modification to this code.
     for grads_list, fb in zip(grads_lists, fisher_blocks_list):
-      fb.instantiate_factors(grads_list, self.damping)
+      with self._cov_device_context_generator():
+        fb.instantiate_factors(grads_list, self.damping)
 
     cov_updates = [
         factor.make_covariance_update_op(cov_ema_decay)
@@ -233,18 +293,23 @@ class FisherEstimator(object):
 
   def _get_all_inverse_update_ops(self):
     for factor in self._layers.get_factors():
-      for op in factor.make_inverse_update_ops():
-        yield op
+      with self._inv_device_context_generator():
+        for op in factor.make_inverse_update_ops():
+          yield op
 
   def _get_grads_lists_gradients(self, tensors):
-    grads_flat = gradients_impl.gradients(self._layers.total_sampled_loss(),
-                                          nest.flatten(tensors))
+    grads_flat = gradients_impl.gradients(
+        self._layers.total_sampled_loss(),
+        nest.flatten(tensors),
+        colocate_gradients_with_ops=self._colocate_gradients_with_ops)
     grads_all = nest.pack_sequence_as(tensors, grads_flat)
     return tuple((grad,) for grad in grads_all)
 
   def _get_grads_lists_empirical(self, tensors):
-    grads_flat = gradients_impl.gradients(self._layers.total_loss(),
-                                          nest.flatten(tensors))
+    grads_flat = gradients_impl.gradients(
+        self._layers.total_loss(),
+        nest.flatten(tensors),
+        colocate_gradients_with_ops=self._colocate_gradients_with_ops)
     grads_all = nest.pack_sequence_as(tensors, grads_flat)
     return tuple((grad,) for grad in grads_all)
 
@@ -262,11 +327,17 @@ class FisherEstimator(object):
     grads_flat = gradients_impl.gradients(
         nest.flatten(loss_inputs),
         nest.flatten(tensors),
-        grad_ys=nest.flatten(transformed_random_signs))
+        grad_ys=nest.flatten(transformed_random_signs),
+        colocate_gradients_with_ops=self._colocate_gradients_with_ops)
     grads_all = nest.pack_sequence_as(tensors, grads_flat)
     return tuple((grad,) for grad in grads_all)
 
   def _get_grads_lists_exact(self, tensors):
+    """Returns a list of all gradients, computing them exactly.
+
+    Args:
+      tensors: Tensors for which to compute gradients.
+    """
     # Loop over all coordinates of all losses.
     grads_all = []
     for loss in self._layers.losses:
@@ -274,6 +345,9 @@ class FisherEstimator(object):
         transformed_one_hot = loss.multiply_fisher_factor_replicated_one_hot(
             index)
         grads_flat = gradients_impl.gradients(
-            loss.inputs, nest.flatten(tensors), grad_ys=transformed_one_hot)
+            loss.inputs,
+            nest.flatten(tensors),
+            grad_ys=transformed_one_hot,
+            colocate_gradients_with_ops=self._colocate_gradients_with_ops)
         grads_all.append(nest.pack_sequence_as(tensors, grads_flat))
     return zip(*grads_all)
diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
index 4e36813369..fbc192f1dc 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import abc
+import contextlib
 
 import numpy as np
 import six
@@ -50,7 +51,22 @@ EIGENVALUE_DECOMPOSITION_THRESHOLD = 2
 EIGENVALUE_CLIPPING_THRESHOLD = 0.0
 
 
-def set_global_constants(init_covariances_at_zero=None, zero_debias=None,
+@contextlib.contextmanager
+def _maybe_colocate_with(op, colocate_cov_ops_with_inputs):
+  """Context to colocate with `op` if `colocate_cov_ops_with_inputs`."""
+  if colocate_cov_ops_with_inputs:
+    if isinstance(op, (list, tuple)):
+      with tf_ops.colocate_with(op[0]):
+        yield
+    else:
+      with tf_ops.colocate_with(op):
+        yield
+  else:
+    yield
+
+
+def set_global_constants(init_covariances_at_zero=None,
+                         zero_debias=None,
                          eigenvalue_decomposition_threshold=None,
                          eigenvalue_clipping_threshold=None):
   """Sets various global constants used by the classes in this module."""
@@ -356,12 +372,21 @@ class FullFactor(InverseProvidingFactor):
   to any type of parameter in principle, but has very high variance.
   """
 
-  def __init__(self, params_grads, batch_size):
+  def __init__(self,
+               params_grads,
+               batch_size,
+               colocate_cov_ops_with_inputs=False):
     self._batch_size = batch_size
+    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
     self._orig_params_grads_name = scope_string_from_params(
         [params_grads, self._batch_size])
-    self._params_grads_flat = tuple(
-        utils.tensors_to_column(params_grad) for params_grad in params_grads)
+    params_grads_flat = []
+    for params_grad in params_grads:
+      with _maybe_colocate_with(params_grad,
+                                self._colocate_cov_ops_with_inputs):
+        col = utils.tensors_to_column(params_grad)
+        params_grads_flat.append(col)
+    self._params_grads_flat = tuple(params_grads_flat)
     super(FullFactor, self).__init__()
 
   @property
@@ -379,9 +404,11 @@ class FullFactor(InverseProvidingFactor):
 
   def _compute_new_cov(self, idx=0):
     # This will be a very basic rank 1 estimate
-    return ((self._params_grads_flat[idx] * array_ops.transpose(
-        self._params_grads_flat[idx])) / math_ops.cast(
-            self._batch_size, self._params_grads_flat[idx].dtype))
+    with _maybe_colocate_with(self._params_grads_flat[idx],
+                              self._colocate_cov_ops_with_inputs):
+      return ((self._params_grads_flat[idx] * array_ops.transpose(
+          self._params_grads_flat[idx])) / math_ops.cast(
+              self._batch_size, self._params_grads_flat[idx].dtype))
 
 
 class DiagonalFactor(FisherFactor):
@@ -402,10 +429,19 @@ class NaiveDiagonalFactor(DiagonalFactor):
   to any type of parameter in principle, but has very high variance.
   """
 
-  def __init__(self, params_grads, batch_size):
+  def __init__(self,
+               params_grads,
+               batch_size,
+               colocate_cov_ops_with_inputs=False):
     self._batch_size = batch_size
-    self._params_grads = tuple(
-        utils.tensors_to_column(params_grad) for params_grad in params_grads)
+    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
+    params_grads_flat = []
+    for params_grad in params_grads:
+      with _maybe_colocate_with(params_grad,
+                                self._colocate_cov_ops_with_inputs):
+        col = utils.tensors_to_column(params_grad)
+        params_grads_flat.append(col)
+    self._params_grads = tuple(params_grads_flat)
     self._orig_params_grads_name = scope_string_from_params(
         [self._params_grads, self._batch_size])
     super(NaiveDiagonalFactor, self).__init__()
@@ -423,8 +459,10 @@ class NaiveDiagonalFactor(DiagonalFactor):
     return len(self._params_grads)
 
   def _compute_new_cov(self, idx=0):
-    return (math_ops.square(self._params_grads[idx]) / math_ops.cast(
-        self._batch_size, self._params_grads[idx].dtype))
+    with _maybe_colocate_with(self._params_grads[idx],
+                              self._colocate_cov_ops_with_inputs):
+      return (math_ops.square(self._params_grads[idx]) / math_ops.cast(
+          self._batch_size, self._params_grads[idx].dtype))
 
 
 class FullyConnectedDiagonalFactor(DiagonalFactor):
@@ -440,7 +478,11 @@ class FullyConnectedDiagonalFactor(DiagonalFactor):
 
   # TODO(jamesmartens): add units tests for this class
 
-  def __init__(self, inputs, outputs_grads, has_bias=False):
+  def __init__(self,
+               inputs,
+               outputs_grads,
+               has_bias=False,
+               colocate_cov_ops_with_inputs=False):
     """Instantiate FullyConnectedDiagonalFactor.
 
     Args:
@@ -449,8 +491,11 @@ class FullyConnectedDiagonalFactor(DiagonalFactor):
       outputs_grads: List of Tensors of shape [batch_size, output_size].
         Gradient of loss with respect to layer's preactivations.
       has_bias: bool. If True, append '1' to each input.
+      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
+          their inputs.
     """
     self._outputs_grads = outputs_grads
+    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
     self._batch_size = array_ops.shape(inputs)[0]
     self._orig_tensors_name = scope_string_from_params((inputs,) +
                                                        tuple(outputs_grads))
@@ -458,9 +503,10 @@ class FullyConnectedDiagonalFactor(DiagonalFactor):
     # Note that we precompute the required operations on the inputs since the
     # inputs don't change with the 'idx' argument to _compute_new_cov.  (Only
     # the target entry of _outputs_grads changes with idx.)
-    if has_bias:
-      inputs = _append_homog(inputs)
-    self._squared_inputs = math_ops.square(inputs)
+    with _maybe_colocate_with(inputs, self._colocate_cov_ops_with_inputs):
+      if has_bias:
+        inputs = _append_homog(inputs)
+      self._squared_inputs = math_ops.square(inputs)
 
     super(FullyConnectedDiagonalFactor, self).__init__()
 
@@ -481,12 +527,14 @@ class FullyConnectedDiagonalFactor(DiagonalFactor):
     # square of an outer product is the outer-product of the entry-wise squares.
     # The gradient is the outer product of the input and the output gradients,
     # so we just square both and then take their outer-product.
-    new_cov = math_ops.matmul(
-        self._squared_inputs,
-        math_ops.square(self._outputs_grads[idx]),
-        transpose_a=True)
-    new_cov /= math_ops.cast(self._batch_size, new_cov.dtype)
-    return new_cov
+    with _maybe_colocate_with(self._squared_inputs,
+                              self._colocate_cov_ops_with_inputs):
+      new_cov = math_ops.matmul(
+          self._squared_inputs,
+          math_ops.square(self._outputs_grads[idx]),
+          transpose_a=True)
+      new_cov /= math_ops.cast(self._batch_size, new_cov.dtype)
+      return new_cov
 
 
 class ConvDiagonalFactor(DiagonalFactor):
@@ -494,8 +542,14 @@ class ConvDiagonalFactor(DiagonalFactor):
 
   # TODO(jamesmartens): add units tests for this class
 
-  def __init__(self, inputs, outputs_grads, filter_shape, strides, padding,
-               has_bias=False):
+  def __init__(self,
+               inputs,
+               outputs_grads,
+               filter_shape,
+               strides,
+               padding,
+               has_bias=False,
+               colocate_cov_ops_with_inputs=False):
     """Creates a ConvDiagonalFactor object.
 
     Args:
@@ -510,10 +564,13 @@ class ConvDiagonalFactor(DiagonalFactor):
       padding: The padding in this layer (1-D of Tensor length 4).
       has_bias: Python bool. If True, the layer is assumed to have a bias
         parameter in addition to its filter parameter.
+      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
+          their inputs.
     """
     self._filter_shape = filter_shape
     self._has_bias = has_bias
     self._outputs_grads = outputs_grads
+    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
 
     self._orig_tensors_name = scope_string_from_name((inputs,)
                                                      + tuple(outputs_grads))
@@ -521,18 +578,19 @@ class ConvDiagonalFactor(DiagonalFactor):
     # Note that we precompute the required operations on the inputs since the
     # inputs don't change with the 'idx' argument to _compute_new_cov.  (Only
     # the target entry of _outputs_grads changes with idx.)
-    filter_height, filter_width, _, _ = self._filter_shape
-    patches = array_ops.extract_image_patches(
-        inputs,
-        ksizes=[1, filter_height, filter_width, 1],
-        strides=strides,
-        rates=[1, 1, 1, 1],
-        padding=padding)
+    with _maybe_colocate_with(inputs, self._colocate_cov_ops_with_inputs):
+      filter_height, filter_width, _, _ = self._filter_shape
+      patches = array_ops.extract_image_patches(
+          inputs,
+          ksizes=[1, filter_height, filter_width, 1],
+          strides=strides,
+          rates=[1, 1, 1, 1],
+          padding=padding)
 
-    if has_bias:
-      patches = _append_homog(patches)
+      if has_bias:
+        patches = _append_homog(patches)
 
-    self._patches = patches
+      self._patches = patches
 
     super(ConvDiagonalFactor, self).__init__()
 
@@ -551,13 +609,15 @@ class ConvDiagonalFactor(DiagonalFactor):
     return len(self._outputs_grads)
 
   def _compute_new_cov(self, idx=0):
-    outputs_grad = self._outputs_grads[idx]
-    batch_size = array_ops.shape(self._patches)[0]
+    with _maybe_colocate_with(self._outputs_grads[idx],
+                              self._colocate_cov_ops_with_inputs):
+      outputs_grad = self._outputs_grads[idx]
+      batch_size = array_ops.shape(self._patches)[0]
 
-    new_cov = self._convdiag_sum_of_squares(self._patches, outputs_grad)
-    new_cov /= math_ops.cast(batch_size, new_cov.dtype)
+      new_cov = self._convdiag_sum_of_squares(self._patches, outputs_grad)
+      new_cov /= math_ops.cast(batch_size, new_cov.dtype)
 
-    return new_cov
+      return new_cov
 
   def _convdiag_sum_of_squares(self, patches, outputs_grad):
     # This computes the sum of the squares of the per-training-case "gradients".
@@ -572,7 +632,10 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor):
   """Kronecker factor for the input or output side of a fully-connected layer.
   """
 
-  def __init__(self, tensors, has_bias=False):
+  def __init__(self,
+               tensors,
+               has_bias=False,
+               colocate_cov_ops_with_inputs=False):
     """Instantiate FullyConnectedKroneckerFactor.
 
     Args:
@@ -580,11 +643,14 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor):
         layer's inputs or its output's gradients.
       has_bias: bool. If True, assume this factor is for the layer's inputs and
         append '1' to each row.
+      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
+          their inputs.
     """
     # The tensor argument is either a tensor of input activations or a tensor of
     # output pre-activation gradients.
     self._has_bias = has_bias
     self._tensors = tensors
+    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
     super(FullyConnectedKroneckerFactor, self).__init__()
 
   @property
@@ -602,10 +668,12 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor):
     return len(self._tensors)
 
   def _compute_new_cov(self, idx=0):
-    tensor = self._tensors[idx]
-    if self._has_bias:
-      tensor = _append_homog(tensor)
-    return _compute_cov(tensor)
+    with _maybe_colocate_with(self._tensors[idx],
+                              self._colocate_cov_ops_with_inputs):
+      tensor = self._tensors[idx]
+      if self._has_bias:
+        tensor = _append_homog(tensor)
+      return _compute_cov(tensor)
 
 
 class ConvInputKroneckerFactor(InverseProvidingFactor):
@@ -618,7 +686,13 @@ class ConvInputKroneckerFactor(InverseProvidingFactor):
   Section 3.1 Estimating the factors.
   """
 
-  def __init__(self, inputs, filter_shape, strides, padding, has_bias=False):
+  def __init__(self,
+               inputs,
+               filter_shape,
+               strides,
+               padding,
+               has_bias=False,
+               colocate_cov_ops_with_inputs=False):
     """Initializes ConvInputKroneckerFactor.
 
     Args:
@@ -630,12 +704,15 @@ class ConvInputKroneckerFactor(InverseProvidingFactor):
         width_stride, in_channel_stride].
       padding: str. Padding method for layer. "SAME" or "VALID".
       has_bias: bool. If True, append 1 to in_channel.
+      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
+          their inputs.
     """
     self._filter_shape = filter_shape
     self._strides = strides
     self._padding = padding
     self._has_bias = has_bias
     self._inputs = inputs
+    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
     super(ConvInputKroneckerFactor, self).__init__()
 
   @property
@@ -660,21 +737,22 @@ class ConvInputKroneckerFactor(InverseProvidingFactor):
       raise ValueError("ConvInputKroneckerFactor only supports idx = 0")
 
     # TODO(jamesmartens): factor this patches stuff out into a utility function
-    filter_height, filter_width, in_channels, _ = self._filter_shape
-    patches = array_ops.extract_image_patches(
-        self._inputs,
-        ksizes=[1, filter_height, filter_width, 1],
-        strides=self._strides,
-        rates=[1, 1, 1, 1],
-        padding=self._padding)
+    with _maybe_colocate_with(self._inputs, self._colocate_cov_ops_with_inputs):
+      filter_height, filter_width, in_channels, _ = self._filter_shape
+      patches = array_ops.extract_image_patches(
+          self._inputs,
+          ksizes=[1, filter_height, filter_width, 1],
+          strides=self._strides,
+          rates=[1, 1, 1, 1],
+          padding=self._padding)
 
-    flatten_size = (filter_height * filter_width * in_channels)
-    patches_flat = array_ops.reshape(patches, [-1, flatten_size])
+      flatten_size = (filter_height * filter_width * in_channels)
+      patches_flat = array_ops.reshape(patches, [-1, flatten_size])
 
-    if self._has_bias:
-      patches_flat = _append_homog(patches_flat)
+      if self._has_bias:
+        patches_flat = _append_homog(patches_flat)
 
-    return _compute_cov(patches_flat)
+      return _compute_cov(patches_flat)
 
 
 class ConvOutputKroneckerFactor(InverseProvidingFactor):
@@ -688,15 +766,18 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor):
   Section 3.1 Estimating the factors.
   """
 
-  def __init__(self, outputs_grads):
+  def __init__(self, outputs_grads, colocate_cov_ops_with_inputs=False):
     """Initializes ConvOutputKroneckerFactor.
 
     Args:
       outputs_grads: list of Tensors. Each Tensor is of shape
-        [batch_size, height, width, out_channels].
+          [batch_size, height, width, out_channels].
+      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
+          their inputs.
     """
     self._out_channels = outputs_grads[0].shape.as_list()[3]
     self._outputs_grads = outputs_grads
+    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
     super(ConvOutputKroneckerFactor, self).__init__()
 
   @property
@@ -713,6 +794,8 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor):
     return len(self._outputs_grads)
 
   def _compute_new_cov(self, idx=0):
-    reshaped_tensor = array_ops.reshape(self._outputs_grads[idx],
-                                        [-1, self._out_channels])
-    return _compute_cov(reshaped_tensor)
+    with _maybe_colocate_with(self._outputs_grads[idx],
+                              self._colocate_cov_ops_with_inputs):
+      reshaped_tensor = array_ops.reshape(self._outputs_grads[idx],
+                                          [-1, self._out_channels])
+      return _compute_cov(reshaped_tensor)
diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py
index 2139a261e0..cead023c15 100644
--- a/tensorflow/contrib/kfac/python/ops/layer_collection.py
+++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py
@@ -129,7 +129,10 @@ class LayerCollection(object):
         sum.
   """
 
-  def __init__(self, graph=None, name="LayerCollection"):
+  def __init__(self,
+               graph=None,
+               colocate_cov_ops_with_inputs=False,
+               name="LayerCollection"):
     self.fisher_blocks = LayerParametersDict()
     self.fisher_factors = OrderedDict()
     self._linked_parameters = dict(
@@ -140,6 +143,7 @@ class LayerCollection(object):
     self._default_generic_approximation = APPROX_FULL_NAME
     self._default_fully_connected_approximation = APPROX_KRONECKER_NAME
     self._default_convolution_2d_approximation = APPROX_KRONECKER_NAME
+    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
 
     with variable_scope.variable_scope(None, default_name=name) as scope:
       self._var_scope = scope.name
@@ -710,6 +714,9 @@ class LayerCollection(object):
            "LayerCollection.fisher_factors. The pair cannot be hashed.").format(
                cls, args))
 
+    kwargs = {
+        "colocate_cov_ops_with_inputs": self._colocate_cov_ops_with_inputs
+    }
     with variable_scope.variable_scope(self._var_scope):
       return utils.setdefault(self.fisher_factors, (cls, args),
-                              lambda: cls(*args))
+                              lambda: cls(*args, **kwargs))
diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py
index 88299e495c..a0e2fedc5c 100644
--- a/tensorflow/contrib/kfac/python/ops/optimizer.py
+++ b/tensorflow/contrib/kfac/python/ops/optimizer.py
@@ -35,17 +35,19 @@ from tensorflow.python.training import gradient_descent
 class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
   """The KFAC Optimizer (https://arxiv.org/abs/1503.05671)."""
 
-  def __init__(
-      self,
-      learning_rate,
-      cov_ema_decay,
-      damping,
-      layer_collection,
-      momentum=0.,
-      momentum_type="regular",
-      norm_constraint=None,
-      name="KFAC",
-      estimation_mode="gradients"):
+  def __init__(self,
+               learning_rate,
+               cov_ema_decay,
+               damping,
+               layer_collection,
+               momentum=0.,
+               momentum_type="regular",
+               norm_constraint=None,
+               name="KFAC",
+               estimation_mode="gradients",
+               colocate_gradients_with_ops=False,
+               cov_devices=None,
+               inv_devices=None):
     """Initializes the KFAC optimizer with the given settings.
 
     Args:
@@ -77,6 +79,14 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
           'gradients', 'empirical', 'curvature_propagation', or 'exact'.
           (Default: 'gradients'). See the doc-string for FisherEstimator for
           more a more detailed description of these options.
+      colocate_gradients_with_ops: Whether we should request gradients we
+          compute in the estimator be colocated with their respective ops.
+      cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance
+          computations will be placed on these devices in a round-robin fashion.
+          Can be None, which means that no devices are specified.
+      inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion
+          computations will be placed on these devices in a round-robin fashion.
+          Can be None, which means that no devices are specified.
 
     Raises:
       ValueError: If the momentum type is unsupported.
@@ -90,9 +100,15 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
     # now it's just all the trainable variables.
     variables = tf_variables.trainable_variables()
 
-    self._fisher_est = est.FisherEstimator(variables, cov_ema_decay, damping,
-                                           layer_collection,
-                                           estimation_mode=estimation_mode)
+    self._fisher_est = est.FisherEstimator(
+        variables,
+        cov_ema_decay,
+        damping,
+        layer_collection,
+        estimation_mode=estimation_mode,
+        colocate_gradients_with_ops=colocate_gradients_with_ops,
+        cov_devices=cov_devices,
+        inv_devices=inv_devices)
 
     momentum_type = momentum_type.lower()
     legal_momentum_types = ["regular", "adam", "qmodel"]
-- 
GitLab


From 7479dc02a8b9314316a91ea1dd55f990ac2df0bb Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 20 Nov 2017 13:14:33 -0800
Subject: [PATCH 0129/1225] Added an option to make the graph optimizer less
 verbose

PiperOrigin-RevId: 176405416
---
 tensorflow/python/grappler/tf_optimizer.i  | 8 +++++---
 tensorflow/python/grappler/tf_optimizer.py | 7 +++++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/grappler/tf_optimizer.i b/tensorflow/python/grappler/tf_optimizer.i
index f3d8fe194b..719ddaae21 100644
--- a/tensorflow/python/grappler/tf_optimizer.i
+++ b/tensorflow/python/grappler/tf_optimizer.i
@@ -93,7 +93,7 @@ void DetectDevices(std::unordered_map<string, tensorflow::DeviceProperties>* dev
 PyObject* TF_OptimizeGraph(
       const tensorflow::RewriterConfig& rewriter_config,
       const tensorflow::MetaGraphDef& metagraph,
-      const string& graph_id, TF_Status* out_status) {
+      bool verbose, const string& graph_id, TF_Status* out_status) {
     tensorflow::grappler::ItemConfig item_config;
     item_config.inline_functions = false;
     item_config.apply_optimizations = false;
@@ -106,7 +106,9 @@ PyObject* TF_OptimizeGraph(
     tensorflow::GraphDef out_graph;
     tensorflow::grappler::MetaOptimizer optimizer(cpu_device, rewriter_config);
     tensorflow::Status status = optimizer.Optimize(&cluster, *grappler_item, &out_graph);
-    optimizer.PrintResult();
+    if (verbose) {
+      optimizer.PrintResult();
+    }
     tensorflow::Set_TF_Status_from_Status(out_status, status);
     string out_graph_str = out_graph.SerializeAsString();
     PyObject* ret = PyBytes_FromStringAndSize(out_graph_str.data(),
@@ -119,7 +121,7 @@ PyObject* TF_OptimizeGraph(
 // Wrap this function
 PyObject* TF_OptimizeGraph(
     const tensorflow::RewriterConfig& rewriter_config,
-    const tensorflow::MetaGraphDef& metagraph,
+    const tensorflow::MetaGraphDef& metagraph, bool verbose,
     const string& graph_id, TF_Status* out_status);
 
 
diff --git a/tensorflow/python/grappler/tf_optimizer.py b/tensorflow/python/grappler/tf_optimizer.py
index d0464c6054..1c608ce319 100644
--- a/tensorflow/python/grappler/tf_optimizer.py
+++ b/tensorflow/python/grappler/tf_optimizer.py
@@ -23,12 +23,15 @@ from tensorflow.python import pywrap_tensorflow as tf_opt
 from tensorflow.python.framework import errors
 
 
-def OptimizeGraph(rewriter_config, metagraph, graph_id=b'graph_to_optimize'):
+def OptimizeGraph(rewriter_config,
+                  metagraph,
+                  verbose=True,
+                  graph_id=b'graph_to_optimize'):
   """Optimize the provided metagraph."""
   with errors.raise_exception_on_not_ok_status() as status:
     ret_from_swig = tf_opt.TF_OptimizeGraph(rewriter_config.SerializeToString(),
                                             metagraph.SerializeToString(),
-                                            graph_id, status)
+                                            verbose, graph_id, status)
   if ret_from_swig is None:
     return None
   out_graph = graph_pb2.GraphDef().FromString(ret_from_swig)
-- 
GitLab


From 5912cc53590c9e013076fbf7c97837558abe8a5e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 13:56:03 -0800
Subject: [PATCH 0130/1225] Clean up initialization, declarations and access of
 TransferManagerTest

PiperOrigin-RevId: 176411773
---
 .../xla/tests/transfer_manager_test.cc         | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/transfer_manager_test.cc b/tensorflow/compiler/xla/tests/transfer_manager_test.cc
index c30cd1b7b8..f2a6474948 100644
--- a/tensorflow/compiler/xla/tests/transfer_manager_test.cc
+++ b/tensorflow/compiler/xla/tests/transfer_manager_test.cc
@@ -33,29 +33,26 @@ limitations under the License.
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 #include "tensorflow/core/platform/types.h"
 
-namespace se = ::perftools::gputools;
-
 namespace xla {
-
 namespace {
 
 class TransferManagerTest : public LocalClientTestBase {
  protected:
-  TransferManagerTest() {
-    shape_size_fn_ = [this](const Shape& shape) {
-      return transfer_manager_->GetByteSizeRequirement(shape);
-    };
-  }
+  TransferManagerTest()
+      : shape_size_fn_([this](const Shape& shape) {
+          return transfer_manager_->GetByteSizeRequirement(shape);
+        }) {}
 
-  ~TransferManagerTest() override {}
+  ~TransferManagerTest() override = default;
 
   std::unique_ptr<ScopedShapedBuffer> AllocateDeviceBuffer(const Shape& shape) {
     return ScopedShapedBuffer::Allocate(
                shape, GetOrCreateAllocator(local_client_->platform()),
                /*device_ordinal=*/0, shape_size_fn_)
-        .ConsumeValueOrDie();
+        .ValueOrDie();
   }
 
+ private:
   std::function<int64(const Shape&)> shape_size_fn_;
 };
 
@@ -215,5 +212,4 @@ XLA_TEST_F(TransferManagerTest, TransferNestedTuple) {
 }
 
 }  // namespace
-
 }  // namespace xla
-- 
GitLab


From d5cd77276013aea1c22c0f0f85e56a62c2a7ad96 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 13:59:10 -0800
Subject: [PATCH 0131/1225] Add a test for multidimensional sparse quantiles.

PiperOrigin-RevId: 176412183
---
 .../python/kernel_tests/quantile_ops_test.py  | 36 ++++++++++++-------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py
index 2a72961504..888d5c57ed 100644
--- a/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py
+++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py
@@ -48,15 +48,16 @@ class QuantileBucketsOpTest(test_util.TensorFlowTestCase):
   def testBasicQuantileBuckets(self):
     """Sets up the quantile summary op test as follows.
 
-    Create a batch of 6 examples having a dense and sparse features.
+    Create a batch of 6 examples having a dense and sparse features. SparseM is
+    a sparse multi-dimensional (multivalent) feature.
     The data looks like this
-    | Instance | instance weights | Dense 0  | Sparse 0
-    | 0        |     10           |   1      |
-    | 1        |     1            |   2      |    2
-    | 2        |     1            |   3      |    3
-    | 3        |     1            |   4      |    4
-    | 4        |     1            |   4      |    5
-    | 5        |     1            |   5      |    6
+    | Instance | instance weights | Dense 0  | Sparse 0 | SparseM
+    | 0        |     10           |   1      |          |   |   |
+    | 1        |     1            |   2      |    2     | 2 |   |
+    | 2        |     1            |   3      |    3     | 3 |   |
+    | 3        |     1            |   4      |    4     |   | 4 |
+    | 4        |     1            |   4      |    5     |   | 5 |
+    | 5        |     1            |   5      |    6     |   | 6 |
     """
 
     dense_float_tensor_0 = constant_op.constant(
@@ -66,20 +67,29 @@ class QuantileBucketsOpTest(test_util.TensorFlowTestCase):
     sparse_values_0 = constant_op.constant(
         [2, 3, 4, 5, 6], dtype=dtypes.float32)
     sparse_shape_0 = constant_op.constant([6, 1], dtype=dtypes.int64)
+    # Multi-dimensional feature that should have the same quantiles as Sparse 0.
+    sparse_indices_m = constant_op.constant(
+        [[1, 1], [2, 0], [3, 1], [4, 1], [5, 1]], dtype=dtypes.int64)
+    sparse_values_m = constant_op.constant(
+        [2, 3, 4, 5, 6], dtype=dtypes.float32)
+    sparse_shape_m = constant_op.constant([6, 2], dtype=dtypes.int64)
+
     example_weights = constant_op.constant(
         [10, 1, 1, 1, 1, 1], dtype=dtypes.float32)
 
     with self.test_session():
       config = self._gen_config(0.33, 3)
       dense_buckets, sparse_buckets = quantile_ops.quantile_buckets(
-          [dense_float_tensor_0], [sparse_indices_0], [sparse_values_0],
-          [sparse_shape_0],
+          [dense_float_tensor_0], [sparse_indices_0, sparse_indices_m],
+          [sparse_values_0, sparse_values_m], [sparse_shape_0, sparse_shape_m],
           example_weights=example_weights,
           dense_config=[config],
-          sparse_config=[config])
+          sparse_config=[config, config])
 
       self.assertAllEqual([1, 3, 5], dense_buckets[0].eval())
       self.assertAllEqual([2, 4, 6.], sparse_buckets[0].eval())
+      # Multidimensional sparse.
+      self.assertAllEqual([2, 4, 6.], sparse_buckets[1].eval())
 
   def testStreamingQuantileBucketsWithVaryingBatch(self):
     """Sets up the quantile summary op test as follows.
@@ -214,10 +224,10 @@ class QuantileBucketsOpTest(test_util.TensorFlowTestCase):
       resources.initialize_resources(resources.shared_resources()).run()
 
       sparse_indices_0 = constant_op.constant(
-          [[1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], dtype=dtypes.int64)
+          [[1, 0], [2, 1], [3, 0], [4, 2], [5, 0]], dtype=dtypes.int64)
       sparse_values_0 = constant_op.constant(
           [2.0, 3.0, 4.0, 5.0, 6.0], dtype=dtypes.float32)
-      sparse_shape_0 = constant_op.constant([6, 1], dtype=dtypes.int64)
+      sparse_shape_0 = constant_op.constant([6, 3], dtype=dtypes.int64)
       example_weights = constant_op.constant(
           [10, 1, 1, 1, 1, 1], dtype=dtypes.float32, shape=[6, 1])
       update = accumulator.add_summary(
-- 
GitLab


From d48e510392e84962593b5c710aa9fca5c4f67e4c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 14:00:24 -0800
Subject: [PATCH 0132/1225] Remove "smart" registration functionality from
 LayerCollection.

Currently, register_block contains rules for registering blocks with keys
that have some overlap with existing keys: if the key is a subset of an
existing key it's skipped, and if it's a superset the new entry replaces the
old one. This is dangerous and is likely to cause silent failure, since a user
may attempt to register a block with no effect. If the user is registering
multiple blocks with overlapping parameters, then they should be the one to
specify which registration is intended.

This CL removes the subset and superset checks, and instead raises an error if
register_block is called with any key that intersects but is not equal to any
existing key.

PiperOrigin-RevId: 176412365
---
 .../kernel_tests/layer_collection_test.py     |  46 ++++--
 .../kfac/python/ops/layer_collection.py       | 144 +++---------------
 2 files changed, 59 insertions(+), 131 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py
index c5ad90d1dc..b8ccbeadd0 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py
@@ -128,8 +128,9 @@ class LayerCollectionTest(test.TestCase):
       key = array_ops.constant(1)
       lc.register_fully_connected(key, array_ops.constant(2),
                                   array_ops.constant(3))
-      with self.assertRaises(ValueError):
+      with self.assertRaises(ValueError) as cm:
         lc.register_generic(key, 16)
+      self.assertIn('already in LayerCollection', str(cm.exception))
 
   def testRegisterSingleParamNotRegistered(self):
     x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
@@ -144,16 +145,18 @@ class LayerCollectionTest(test.TestCase):
     x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
     lc = layer_collection.LayerCollection()
     lc.fisher_blocks = {x: '1'}
-    with self.assertRaises(ValueError):
+    with self.assertRaises(ValueError) as cm:
       lc.register_block(x, 'foo')
+    self.assertIn('already in LayerCollection', str(cm.exception))
 
   def testRegisterSingleParamRegisteredInTuple(self):
     x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
     y = variable_scope.get_variable('y', initializer=array_ops.constant(1,))
     lc = layer_collection.LayerCollection()
     lc.fisher_blocks = {(x, y): '1'}
-    lc.register_block(x, 'foo')
-    self.assertEqual(set(['1']), set(lc.get_blocks()))
+    with self.assertRaises(ValueError) as cm:
+      lc.register_block(x, 'foo')
+    self.assertIn('was already registered', str(cm.exception))
 
   def testRegisterTupleParamNotRegistered(self):
     x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
@@ -173,8 +176,9 @@ class LayerCollectionTest(test.TestCase):
     lc = layer_collection.LayerCollection()
     lc.fisher_blocks = {(x, y): '1'}
 
-    with self.assertRaises(ValueError):
+    with self.assertRaises(ValueError) as cm:
       lc.register_block((x, y), 'foo')
+    self.assertIn('already in LayerCollection', str(cm.exception))
 
   def testRegisterTupleParamRegisteredInSuperset(self):
     x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
@@ -183,8 +187,9 @@ class LayerCollectionTest(test.TestCase):
     lc = layer_collection.LayerCollection()
     lc.fisher_blocks = {(x, y, z): '1'}
 
-    lc.register_block((x, y), 'foo')
-    self.assertEqual(set(['1']), set(lc.get_blocks()))
+    with self.assertRaises(ValueError) as cm:
+      lc.register_block((x, y), 'foo')
+    self.assertIn('was already registered', str(cm.exception))
 
   def testRegisterTupleParamSomeRegistered(self):
     x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
@@ -193,10 +198,9 @@ class LayerCollectionTest(test.TestCase):
     lc = layer_collection.LayerCollection()
     lc.fisher_blocks = {x: MockFisherBlock('1'), z: MockFisherBlock('2')}
 
-    lc.register_block((x, y), MockFisherBlock('foo'))
-    self.assertEqual(
-        set([MockFisherBlock('2'), MockFisherBlock('foo')]), set(
-            lc.get_blocks()))
+    with self.assertRaises(ValueError) as cm:
+      lc.register_block((x, y), MockFisherBlock('foo'))
+    self.assertIn('was already registered', str(cm.exception))
 
   def testRegisterTupleVarSomeRegisteredInOtherTuples(self):
     x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
@@ -206,8 +210,9 @@ class LayerCollectionTest(test.TestCase):
     lc = layer_collection.LayerCollection()
     lc.fisher_blocks = {(x, z): '1', (z, w): '2'}
 
-    with self.assertRaises(ValueError):
+    with self.assertRaises(ValueError) as cm:
       lc.register_block((x, y), 'foo')
+    self.assertIn('was already registered', str(cm.exception))
 
   def testRegisterCategoricalPredictiveDistribution(self):
     with ops.Graph().as_default(), self.test_session() as sess:
@@ -427,6 +432,23 @@ class LayerCollectionTest(test.TestCase):
 
       self.ensureLayerReuseWorks(register_fn)
 
+  def testReuseWithInvalidRegistration(self):
+    """Invalid registrations shouldn't overwrite existing blocks."""
+    with ops.Graph().as_default():
+      inputs = array_ops.ones([2, 5, 5, 10])
+      outputs = array_ops.zeros([2, 5, 5, 3])
+      w = variable_scope.get_variable('w', [1, 1, 10, 3])
+      b = variable_scope.get_variable('b', [3])
+      lc = layer_collection.LayerCollection()
+      lc.register_fully_connected(w, inputs, outputs)
+      self.assertEqual(lc.fisher_blocks[w].num_registered_minibatches, 1)
+      with self.assertRaises(KeyError):
+        lc.register_fully_connected((w, b), inputs, outputs, reuse=True)
+      self.assertNotIn((w, b), lc.fisher_blocks)
+      self.assertEqual(lc.fisher_blocks[w].num_registered_minibatches, 1)
+      lc.register_fully_connected(w, inputs, outputs, reuse=True)
+      self.assertEqual(lc.fisher_blocks[w].num_registered_minibatches, 2)
+
   def testMakeOrGetFactor(self):
     with ops.Graph().as_default():
       random_seed.set_random_seed(200)
diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py
index cead023c15..04f5a869bd 100644
--- a/tensorflow/contrib/kfac/python/ops/layer_collection.py
+++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py
@@ -35,7 +35,6 @@ from tensorflow.contrib.kfac.python.ops import utils
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
 # Names for various approximations that can be requested for Fisher blocks.
@@ -220,39 +219,24 @@ class LayerCollection(object):
   def register_block(self, layer_key, fisher_block, reuse=VARIABLE_SCOPE):
     """Validates and registers the layer_key associated with the fisher_block.
 
-    Validation consists of checking whether the key was already registered or
-    if any of the elements of layer_key (if it's a tuple) were already
-    registered as part of another tuple (throws an error if so). If any of the
-    elements were registered by themselves, or as part of tuples that are
-    subsets of this layer_key, those registrations are first removed.
-
-    If the layer_key is a subset of an existing registration, registration of
-    the new, smaller layer_key is skipped.
-
-    e.g. If registrations include {'a': foo, ('b', 'c'): bar}, then
-      - register_layer('a', baz) -> ValueError
-      - register_layer(('b', 'c', 'd'), baz) ->
-        {'a': foo, ('b', 'c', 'd'): baz}
-      - register_layer('b', baz) ->
-        {'a': foo, ('b', 'c'): bar} (No change)
-      - register_layer(('a', 'd'), baz) ->
-        {('a', 'd'): baz, ('b', 'c'): bar}
-      - register_layer(('b', 'd'), baz) -> ValueError
-
     Args:
-      layer_key: The key to check for in existing registrations and to register
-          if valid.
-      fisher_block: The associated fisher block.
-      reuse: Method to use for inserting new FisherBlocks. One of True, False,
+      layer_key: A variable or tuple of variables. The key to check for in
+          existing registrations and to register if valid.
+      fisher_block: The associated `FisherBlock`.
+      reuse: Method to use for inserting new `FisherBlock`s. One of True, False,
         or VARIABLE_SCOPE.
 
     Raises:
-      ValueError: If the layer_key was already registered, or if a subset of the
-          layer_key has already been registered as part of a different tuple.
+      ValueError: If `layer_key` was already registered and reuse is `False`,
+        if `layer_key` was registered with a different block type, or if
+        `layer_key` shares any variables with but is not equal to a previously
+        registered key.
+      KeyError: If `reuse` is `True` but `layer_key` was not previously
+        registered.
 
     Returns:
-      FisherBlock registered under 'layer_key'. May or may not be the same as
-      'fisher_block'.
+      The `FisherBlock` registered under `layer_key`. If `layer_key` was already
+      registered, this will be the previously registered `FisherBlock`.
     """
     if reuse is VARIABLE_SCOPE:
       reuse = variable_scope.get_variable_scope().reuse
@@ -272,101 +256,23 @@ class LayerCollection(object):
     # Insert fisher_block into self.fisher_blocks.
     if layer_key in self.fisher_blocks:
       raise ValueError("Duplicate registration: {}".format(layer_key))
-    if isinstance(layer_key, (tuple, list)):
-      return self._register_block_with_sequence_key(layer_key, fisher_block)
-    else:
-      return self._register_block_with_nonsequence_key(layer_key, fisher_block)
-
-  def _register_block_with_sequence_key(self, layer_key, fisher_block):
-    """Validates and registers the layer_key if it's a sequence."""
-    # Find all keys that are either supersets or subsets of 'layer_key'.
-    inclusions = {
-        fisher_elt
-        for layer_elt in layer_key
-        for fisher_elt in self.fisher_blocks
-        if self._equal_or_subset(layer_elt, fisher_elt)
+    # Raise an error if any variable in layer_key has been registered in any
+    # other blocks.
+    variable_to_block = {
+        var: (params, block)
+        for (params, block) in self.fisher_blocks.items()
+        for var in ensure_sequence(params)
     }
-
-    if not inclusions:
-      self.fisher_blocks[layer_key] = fisher_block
-      return fisher_block
-
-    result_key = None
-    for key in inclusions:
-      fisher_block_key = key if isinstance(key, (tuple, list)) else (key,)
-      in_existing_only = set(fisher_block_key) - set(layer_key)
-      in_new_only = set(layer_key) - set(fisher_block_key)
-
-      if in_existing_only and in_new_only:
-        # Existing and new key have an intersection but neither is a subset of
-        # the other. This is an error.
+    for variable in ensure_sequence(layer_key):
+      if variable in variable_to_block:
+        prev_key, prev_block = variable_to_block[variable]
         raise ValueError(
-            "Inconsistent registration, expected new key to be a subset or "
-            "superset of the existing key: existing is {}, new is {}".format(
-                key, layer_key))
-      elif in_existing_only and not in_new_only:
-        # Existing key is strict superset of new key. Return existing
-        # FisherBlock.
-        logging.warning("Graph Registration Warning: tried to register "
-                        "a subset ({}) of an already registered tuple "
-                        "({}), skipping".format(layer_key, fisher_block_key))
-        assert result_key is None
-        result_key = key
-      elif in_new_only and not in_existing_only:
-        # Existing key is a strict subset of new key. Replace existing
-        # FisherBlock with new one.
-        #
-        # TODO(b/68715045): This is dangerous. If there are existing
-        # registrations for a minibatch from elsewhere in the graph, they won't
-        # be re-registered with this new FisherBlock. The type of FisherBlock
-        # could also change here.
-        logging.warning(
-            "Replacing existing FisherBlock for key {} with new FisherBlock "
-            "for key {}. {} registered minibatches from the existing "
-            "FisherBlock will not be migrated.".format(
-                key, layer_key,
-                self.fisher_blocks[key].num_registered_minibatches))
-        self.fisher_blocks.pop(key)
-        self.fisher_blocks[layer_key] = fisher_block
-        assert result_key is None
-        result_key = layer_key
-      elif not in_new_only and not in_existing_only:
-        # Existing and new are identical. Reuse the old FisherBlock.
-        #
-        # TODO(b/68715045): This is dangerous. If the new FisherBlock has
-        # existing registered minibatches, they will not be migrated to the
-        # existing FisherBlock.
-        assert result_key is None
-        result_key = key
-      else:
-        raise ValueError("Unexpected layer key conflict: {} vs. {}".format(
-            layer_key, key))
-
-    return self.fisher_blocks[result_key]
-
-  def _register_block_with_nonsequence_key(self, layer_key, fisher_block):
-    """Validates and registers the layer_key if it's not a sequence."""
-    inclusions = {
-        fisher_elt
-        for fisher_elt in self.fisher_blocks
-        if self._equal_or_subset(layer_key, fisher_elt)
-    }
-
-    if not inclusions:
-      self.fisher_blocks[layer_key] = fisher_block
-    else:
-      logging.warning("Graph Registration Warning: tried to register "
-                      "variable ({}) but a containing tuple was already "
-                      "registered ({}), skipping".format(layer_key, inclusions))
-
+            "Attempted to register layer_key {} with block {}, but variable {}"
+            " was already registered in key {} with block {}.".format(
+                layer_key, fisher_block, variable, prev_key, prev_block))
+    self.fisher_blocks[layer_key] = fisher_block
     return fisher_block
 
-  def _equal_or_subset(self, elt1, elt2):
-    """Checks if the elements are equal or one is contained in the other."""
-    return (elt1 == elt2 or (isinstance(elt1,
-                                        (tuple, list)) and elt2 in elt1) or
-            (isinstance(elt2, (tuple, list)) and elt1 in elt2))
-
   def get_use_count_map(self):
     """Returns a dict of variables to their number of registrations."""
     vars_to_uses = defaultdict(int)
-- 
GitLab


From 74faf5003bb3d0ed885a96b7a517072465d9e579 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 20 Nov 2017 14:07:01 -0800
Subject: [PATCH 0133/1225] Add ability to create Python Operation from
 already-constructed TF_Operation.

Currently, Operation.__init__ always creates a corresponding
TF_Operation if the C API is enabled. This is the desired behavior for
Operations created directly from Python code. However, we also need
the ability to create Operations from TF_Operations created indirectly
by the C API (e.g. TF_ImportGraphDef, TF_FinishWhile). This change:

- Adds a new method, Graph._create_op_from_tf_operation, which is like
  Graph.create_op() except it takes an existing TF_Operation. This
  involves refactoring create_op so that as much logic as possible is
  shared between these two methods.

- Allow Operation.__init__ to take an already-constructed TF_Operation
  for its 'node_def' argument, which it will use instead of creating a
  new TF_Operation. This is passed through 'node_def' to avoid visibly
  changing the public API (Operation.__init__ is technically part of
  the public API, even though we don't expect users to directly call
  it).

- Removes some C API sanity checks that depend on the Python Operation
  state being initialized, and similar changes to move away from the
  Python state. When an Operation is constructed from a TF_Operation,
  much of this state isn't initialized (we instead fetch it using the
  C API). Eventually we'll remove this redundant state altogether.

PiperOrigin-RevId: 176413495
---
 tensorflow/python/framework/c_api_util.py |  16 ++
 tensorflow/python/framework/ops.py        | 230 +++++++++++++---------
 tensorflow/python/framework/ops_test.py   | 160 ++++++++++++++-
 tensorflow/python/framework/test_ops.cc   |   5 +
 4 files changed, 321 insertions(+), 90 deletions(-)

diff --git a/tensorflow/python/framework/c_api_util.py b/tensorflow/python/framework/c_api_util.py
index 1d0dd88dc5..814436fc7a 100644
--- a/tensorflow/python/framework/c_api_util.py
+++ b/tensorflow/python/framework/c_api_util.py
@@ -94,3 +94,19 @@ def tf_buffer(data=None):
     yield buf
   finally:
     c_api.TF_DeleteBuffer(buf)
+
+
+def tf_output(c_op, index):
+  """Returns a wrapped TF_Output with specified operation and index.
+
+  Args:
+    c_op: wrapped TF_Operation
+    index: integer
+
+  Returns:
+    Wrapped TF_Output
+  """
+  ret = c_api.TF_Output()
+  ret.oper = c_op
+  ret.index = index
+  return ret
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 5e57245b7d..746b35b87f 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -491,11 +491,10 @@ class Tensor(_TensorLike):
       return "%s:%d" % (self._op.name, self._value_index)
 
   def _as_tf_output(self):
-    assert self.op._c_op  # pylint: disable=protected-access
-    tf_output = c_api.TF_Output()
-    tf_output.oper = self.op._c_op  # pylint: disable=protected-access
-    tf_output.index = self.value_index
-    return tf_output
+    # pylint: disable=protected-access
+    assert self.op._c_op
+    return c_api_util.tf_output(self.op._c_op, self.value_index)
+    # pylint: enable=protected-access
 
   def __str__(self):
     return "Tensor(\"%s\"%s%s%s)" % (
@@ -1505,16 +1504,33 @@ class Operation(object):
         or if `inputs` and `input_types` are incompatible.
       ValueError: if the `node_def` name is not valid.
     """
-    if not isinstance(node_def, node_def_pb2.NodeDef):
+    # For internal use only: `node_def` can be set to a TF_Operation to create
+    # an Operation for that op. This is useful for creating Operations for ops
+    # indirectly created by C API methods, e.g. the ops created by
+    # TF_ImportGraphDef. When `node_def` is a TF_Operation, all optional fields
+    # except `control_inputs` should be None.
+
+    if isinstance(node_def, node_def_pb2.NodeDef):
+      if node_def.ByteSize() >= (1 << 31) or node_def.ByteSize() < 0:
+        raise ValueError(
+            "Cannot create a tensor proto whose content is larger than 2GB.")
+      if not _VALID_OP_NAME_REGEX.match(node_def.name):
+        raise ValueError("'%s' is not a valid node name" % node_def.name)
+      self._node_def = copy.deepcopy(node_def)
+      c_op = None
+    elif type(node_def).__name__ == "SwigPyObject":
+      assert inputs is None
+      assert output_types is None
+      assert input_types is None
+      assert original_op is None
+      assert op_def is None
+      self._node_def = None
+      c_op = node_def
+    else:
       raise TypeError("node_def needs to be a NodeDef: %s" % node_def)
-    if node_def.ByteSize() >= (1 << 31) or node_def.ByteSize() < 0:
-      raise ValueError(
-          "Cannot create a tensor proto whose content is larger than 2GB.")
-    if not _VALID_OP_NAME_REGEX.match(node_def.name):
-      raise ValueError("'%s' is not a valid node name" % node_def.name)
+
     if not isinstance(g, Graph):
       raise TypeError("g needs to be a Graph: %s" % g)
-    self._node_def = copy.deepcopy(node_def)
     self._graph = g
     if inputs is None:
       inputs = []
@@ -1524,8 +1540,6 @@ class Operation(object):
     for a in self._inputs:
       if not isinstance(a, Tensor):
         raise TypeError("input needs to be a Tensor: %s" % a)
-      # Mark that we consume the inputs.
-      a._add_consumer(self)  # pylint: disable=protected-access
     if input_types is None:
       input_types = [i.dtype.base_dtype for i in self._inputs]
     else:
@@ -1542,21 +1556,27 @@ class Operation(object):
     self._control_inputs = []
     if control_inputs:
       for c in control_inputs:
-        c_op = None
+        control_op = None
         if isinstance(c, Operation):
-          c_op = c
+          control_op = c
         elif isinstance(c, (Tensor, IndexedSlices)):
-          c_op = c.op
+          control_op = c.op
         else:
           raise TypeError("Control input must be an Operation, "
                           "a Tensor, or IndexedSlices: %s" % c)
-        self._control_inputs.append(c_op)
+        self._control_inputs.append(control_op)
 
     self._original_op = original_op
     self._op_def = op_def
     self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access
 
-    if self._graph._c_graph:  # pylint: disable=protected-access
+    # Initialize self._c_op.
+    if c_op:
+      # TODO(skyewm): remove this assert when we remove USE_C_API
+      assert self._graph._c_graph  # pylint: disable=protected-access
+      self._c_op = c_op
+      self._add_control_inputs(self._control_inputs)
+    elif self._graph._c_graph:  # pylint: disable=protected-access
       if self._op_def:
         # TODO(skyewm): op_def_library.apply_op() flattens the incoming
         # inputs. Refactor so we don't have to do this here.
@@ -1571,8 +1591,18 @@ class Operation(object):
     else:
       self._c_op = None
 
-    # Initialize self._outputs
-    if output_types is None:
+    # Mark that we consume the inputs.
+    for input_tensor in self.inputs:
+      input_tensor._add_consumer(self)  # pylint: disable=protected-access
+
+    # Initialize self._outputs.
+    if self._c_op:
+      num_outputs = c_api.TF_OperationNumOutputs(self._c_op)
+      output_types = [
+          c_api.TF_OperationOutputType(c_api_util.tf_output(self._c_op, i))
+          for i in range(num_outputs)]
+      assert output_types is not None
+    elif output_types is None:
       output_types = []
     self._output_types_val = output_types
     self._outputs = [
@@ -1580,7 +1610,7 @@ class Operation(object):
         for i, output_type in enumerate(output_types)
     ]
 
-    # Add this op to the current control flow context:
+    # Add this op to the current control flow context.
     self._control_flow_context = g._get_control_flow_context()  # pylint: disable=protected-access
     if self._control_flow_context is not None:
       self._control_flow_context.AddOp(self)
@@ -1674,9 +1704,6 @@ class Operation(object):
   def name(self):
     """The full name of this operation."""
     if self._c_op:
-      # TODO(iga): Remove this assert after converting to C API by default.
-      # Just being a bit paranoid here.
-      assert self._node_def.name == c_api.TF_OperationName(self._c_op)
       return c_api.TF_OperationName(self._c_op)
     else:
       return self._node_def.name
@@ -1696,9 +1723,6 @@ class Operation(object):
       device.
     """
     if self._c_op:
-      # TODO(iga): Remove this assert after converting to C API by default.
-      # Just being a bit paranoid here
-      assert self._node_def.device == c_api.TF_OperationDevice(self._c_op)
       return c_api.TF_OperationDevice(self._c_op)
     else:
       return self._node_def.device
@@ -1760,8 +1784,8 @@ class Operation(object):
           self._graph._c_graph,  # pylint: disable=protected-access
           self._c_op,  # pylint: disable=protected-access
           _device_string(device))
-    # TODO(nolivia): remove this line when switch to C api
-    self._node_def.device = _device_string(device)
+    else:
+      self._node_def.device = _device_string(device)
 
   def _add_input(self, tensor, dtype=None):
     """Add a new input to this operation.
@@ -1868,6 +1892,9 @@ class Operation(object):
 
   # Methods below are used when building the NodeDef and Graph proto.
   def _recompute_node_def(self):
+    # TODO(skyewm): remove this function when we switch to C API
+    if self._c_op: return
+
     del self._node_def.input[:]
     # pylint: disable=protected-access
     self._node_def.input.extend([t._as_node_def_input() for t in self._inputs])
@@ -1937,9 +1964,6 @@ class Operation(object):
           dtypes.as_dtype(c_api.TF_OperationInputType(self._tf_input(i)))
           for i in xrange(num_inputs)
       ]
-      # TODO(iga): Remove this assert after converting to C API by default.
-      # Just being a bit paranoid here.
-      assert self._input_types_val == input_types
       return input_types
     else:
       return self._input_types_val
@@ -1974,14 +1998,6 @@ class Operation(object):
     """The type of the op (e.g. `"MatMul"`)."""
     if self._c_op:
       op_type = c_api.TF_OperationOpType(self._c_op)
-      # TODO(iga): Remove these asserts after converting to C API by default.
-      # Just being a bit paranoid here.
-      # pylint: disable=unidiomatic-typecheck
-      assert type(op_type) == type(self._node_def.op), (
-          "Expected same types %s vs %s" % (type(op_type),
-                                            type(self._node_def.op)))
-      # pylint: enable=unidiomatic-typecheck
-      assert op_type == self._node_def.op
       return op_type
     else:
       return self._node_def.op
@@ -3032,74 +3048,105 @@ class Graph(object):
 
     node_def = _NodeDef(op_type, name, device=None, attrs=attrs)
 
+    input_ops = set([t.op for t in inputs])
+    control_inputs = self._control_dependencies_for_inputs(input_ops)
+    ret = Operation(
+        node_def,
+        self,
+        inputs=inputs,
+        output_types=dtypes,
+        control_inputs=control_inputs,
+        input_types=input_types,
+        original_op=self._default_original_op,
+        op_def=op_def)
+    if compute_shapes:
+      set_shapes_for_outputs(ret)
+    self._create_op_helper(ret, compute_device=compute_device)
+    return ret
+
+  def _create_op_from_tf_operation(self, c_op):
+    """Creates an `Operation` in this graph from the supplied TF_Operation.
+
+    This method is like create_op() except the new Operation is constructed
+    using `c_op`. The returned Operation will have `c_op` as its _c_op
+    field. This is used to create Operation objects around TF_Operations created
+    indirectly by the C API (e.g. by TF_ImportGraphDef, TF_FinishWhile).
+
+    Args:
+      c_op: a wrapped TF_Operation
+
+    Returns:
+      An `Operation` object.
+    """
+    self._check_not_finalized()
+    tf_outputs = c_api.GetOperationInputs(c_op)
+    input_ops = set(self._get_operation_by_tf_operation(output.oper)
+                    for output in tf_outputs)
+    control_inputs = self._control_dependencies_for_inputs(input_ops)
+    ret = Operation(c_op, self, control_inputs=control_inputs)
+    self._create_op_helper(ret)
+    return ret
+
+  def _create_op_helper(self, op, compute_device=True):
+    """Common logic for creating an op in this graph."""
     # Apply any additional attributes requested. Do not overwrite any existing
     # attributes.
     for key, value in self._attr_scope_map.items():
-      if key not in node_def.attr:
+      try:
+        op.get_attr(key)
+      except ValueError:
         if callable(value):
-          value = value(node_def)
+          value = value(op.node_def)
           if not isinstance(value, (type(None), attr_value_pb2.AttrValue)):
             raise TypeError(
                 "Callable for scope map key '%s' must return either None or "
                 "an AttrValue protocol buffer; but it returned: %s" % (key,
                                                                        value))
-        node_def.attr[key].CopyFrom(value)
+        if value:
+          op._set_attr(key, value)  # pylint: disable=protected-access
 
-    # Apply a kernel label if one has been specified for this op_type.
+    # Apply a kernel label if one has been specified for this op type.
     try:
-      kernel_label = self._op_to_kernel_label_map[op_type]
-      node_def.attr["_kernel"].CopyFrom(
-          attr_value_pb2.AttrValue(s=compat.as_bytes(kernel_label)))
+      kernel_label = self._op_to_kernel_label_map[op.type]
+      op._set_attr("_kernel",  # pylint: disable=protected-access
+                   attr_value_pb2.AttrValue(s=compat.as_bytes(kernel_label)))
     except KeyError:
       pass
 
-    # Apply the overriding op_type for gradients if one has been
-    # specified for this op_type.
+    # Apply the overriding op type for gradients if one has been specified for
+    # this op type.
     try:
-      mapped_op_type = self._gradient_override_map[op_type]
-      node_def.attr["_gradient_op_type"].CopyFrom(
-          attr_value_pb2.AttrValue(s=compat.as_bytes(mapped_op_type)))
+      mapped_op_type = self._gradient_override_map[op.type]
+      op._set_attr("_gradient_op_type",  # pylint: disable=protected-access
+                   attr_value_pb2.AttrValue(s=compat.as_bytes(mapped_op_type)))
     except KeyError:
       pass
 
-    control_inputs = self._control_dependencies_for_inputs(inputs)
-    ret = Operation(
-        node_def,
-        self,
-        inputs=inputs,
-        output_types=dtypes,
-        control_inputs=control_inputs,
-        input_types=input_types,
-        original_op=self._default_original_op,
-        op_def=op_def)
-    if compute_shapes:
-      set_shapes_for_outputs(ret)
-    self._record_op_seen_by_control_dependencies(ret)
+    self._record_op_seen_by_control_dependencies(op)
 
     if compute_device:
-      self._apply_device_functions(ret)
+      self._apply_device_functions(op)
 
     if self._colocation_stack:
       all_colocation_groups = []
       for colocation_op in self._colocation_stack:
         all_colocation_groups.extend(colocation_op.colocation_groups())
         if colocation_op.device:
-          # Make this device match the device of the colocated op, to
-          # provide consistency between the device and the colocation
-          # property.
-          if (ret.device and pydev.canonical_name(ret.device) !=
+          # Make this device match the device of the colocated op, to provide
+          # consistency between the device and the colocation property.
+          if (op.device and pydev.canonical_name(op.device) !=
               pydev.canonical_name(colocation_op.device)):
             logging.warning("Tried to colocate %s with an op %s that had "
                             "a different device: %s vs %s. "
-                            "Ignoring colocation property.", name,
-                            colocation_op.name, ret.device,
+                            "Ignoring colocation property.", op.name,
+                            colocation_op.name, op.device,
                             colocation_op.device)
           else:
-            ret._set_device(colocation_op.device)  # pylint: disable=protected-access
+            op._set_device(colocation_op.device)  # pylint: disable=protected-access
 
       all_colocation_groups = sorted(set(all_colocation_groups))
       # pylint: disable=protected-access
-      ret._set_attr("_class", attr_value_pb2.AttrValue(
+      op._set_attr("_class", attr_value_pb2.AttrValue(
           list=attr_value_pb2.AttrValue.ListValue(s=all_colocation_groups)))
       # pylint: enable=protected-access
 
@@ -3108,14 +3155,17 @@ class Graph(object):
     # (2) "is_stateful" is set in OpDef
     # (3) "container" attribute is in OpDef
     # (4) "container" attribute is None
-    if (self._container and op_type in self._registered_ops and
-        self._registered_ops[op_type].is_stateful and
-        "container" in ret.node_def.attr and
-        not ret.node_def.attr["container"].s):
-      ret.node_def.attr["container"].CopyFrom(
-          attr_value_pb2.AttrValue(s=compat.as_bytes(self._container)))
-
-    return ret
+    if (self._container and op.type in self._registered_ops and
+        self._registered_ops[op.type].is_stateful):
+      try:
+        container_attr = op.get_attr("container")
+      except ValueError:
+        # "container" attribute is not in OpDef
+        pass
+      else:
+        if not container_attr:
+          op._set_attr("container", attr_value_pb2.AttrValue(  # pylint: disable=protected-access
+              s=compat.as_bytes(self._container)))
 
   def as_graph_element(self, obj, allow_tensor=True, allow_operation=True):
     """Returns the object referred to by `obj`, as an `Operation` or `Tensor`.
@@ -3303,6 +3353,10 @@ class Graph(object):
     with self._lock:
       return self._nodes_by_name[name]
 
+  def _get_operation_by_tf_operation(self, tf_oper):
+    op_name = c_api.TF_OperationName(tf_oper)
+    return self._get_operation_by_name_unsafe(op_name)
+
   def get_tensor_by_name(self, name):
     """Returns the `Tensor` with the given `name`.
 
@@ -3337,8 +3391,7 @@ class Graph(object):
     Returns:
       The `Tensor` that represents `tf_output`.
     """
-    op_name = c_api.TF_OperationName(tf_output.oper)
-    op = self._get_operation_by_name_unsafe(op_name)
+    op = self._get_operation_by_tf_operation(tf_output.oper)
     return op.outputs[tf_output.index]
 
   def _next_id(self):
@@ -4004,8 +4057,8 @@ class Graph(object):
         ret.add(op)
     return ret
 
-  def _control_dependencies_for_inputs(self, input_tensors):
-    """For an op that takes `input_tensors` as inputs, compute control inputs.
+  def _control_dependencies_for_inputs(self, input_ops):
+    """For an op that takes `input_ops` as inputs, compute control inputs.
 
     The returned control dependencies should yield an execution that
     is equivalent to adding all control inputs in
@@ -4016,13 +4069,12 @@ class Graph(object):
     the explicit approach redundant.
 
     Args:
-      input_tensors: The direct data dependencies for an op to be created.
+      input_ops: The data input ops for an op to be created.
 
     Returns:
       A list of control inputs for the op to be created.
     """
     ret = []
-    input_ops = set([t.op for t in input_tensors])
     for controller in self._control_dependencies_stack:
       # If any of the input_ops already depends on the inputs from controller,
       # we say that the new op is dominated (by that input), and we therefore
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 7db5d133ed..7ecdea8275 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -636,6 +636,164 @@ class CreateOpTest(test_util.TensorFlowTestCase):
     g.create_op("FloatOutput", [], [dtypes.float32], None, name="myop1")
 
 
+# NOTE(skyewm): these cases test the private Graph._create_op_from_tf_operation
+# method. Arguably we should only test the public APIs that depend on this
+# method. However, this logic is complex and tricky, and it can be difficult to
+# ascertain if we have adequate coverage (e.g. a graph may run successfully if
+# the control flow context isn't set properly, but a more complicated use case
+# that might not be obvious to test will fail). Thus we instead explicitly test
+# the low-level behavior.
+@test_util.with_c_api
+class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
+
+  def testBasic(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = test_ops.int_output()
+      if ops._USE_C_API:
+        c_op = ops._create_c_op(
+            g, ops._NodeDef("IntInputIntOutput", "myop"), [x], [])
+        op = g._create_op_from_tf_operation(c_op)
+      else:
+        # Test pure-Python version to make sure C API has same behavior.
+        op = test_ops.int_input_int_output(x, name="myop").op
+
+    self.assertEqual(op.name, "myop")
+    self.assertEqual(op.type, "IntInputIntOutput")
+    self.assertEqual(len(op.outputs), 1)
+    self.assertEqual(list(op.inputs), [x])
+    self.assertEqual(op.control_inputs, [])
+    self.assertEqual(op.graph, g)
+    self.assertEqual(x.consumers(), [op])
+    self.assertIsNotNone(op.traceback)
+    self.assertEqual(g.get_operation_by_name("myop"), op)
+    self.assertEqual(g.get_tensor_by_name("myop:0"), op.outputs[0])
+
+  def testCond(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = test_ops.int_output()
+
+      def true_fn():
+        if ops._USE_C_API:
+          c_op = ops._create_c_op(ops.get_default_graph(),
+                                  ops._NodeDef("IntInput", "cond/myop"), [x],
+                                  [])
+          ops.get_default_graph()._create_op_from_tf_operation(c_op)
+        else:
+        # Test pure-Python version to make sure C API has same behavior.
+          test_ops.int_input(x, name="myop")
+        return x
+
+      control_flow_ops.cond(x < 10, true_fn, lambda: x)
+
+    op = g.get_operation_by_name("cond/myop")
+    self.assertIsNotNone(op)
+    self.assertEqual(op.name, "cond/myop")
+    self.assertEqual(op.type, "IntInput")
+    self.assertEqual(op.outputs, [])
+    op_input = op.inputs[0].op
+    self.assertEqual(op_input.type, "Switch")
+    self.assertEqual(op_input.inputs[0], x)
+    self.assertEqual(op.graph, g)
+    # pylint: disable=protected-access
+    self.assertIsNotNone(op._get_control_flow_context())
+    self.assertEqual(op._get_control_flow_context().name,
+                     "cond/cond_text")
+    # pylint: enable=protected-access
+
+  def testWhileLoop(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = test_ops.int_output()
+
+      def body(i):
+        if ops._USE_C_API:
+          c_op = ops._create_c_op(ops.get_default_graph(),
+                                  ops._NodeDef("IntInput", "myloop/myop"), [x],
+                                  [])
+          ops.get_default_graph()._create_op_from_tf_operation(c_op)
+        else:
+          # Test pure-Python version to make sure C API has same behavior.
+          test_ops.int_input(x, name="myop")
+        return i
+
+      control_flow_ops.while_loop(lambda i: i < 10, body, [0], name="myloop")
+
+    op = g.get_operation_by_name("myloop/myop")
+    self.assertIsNotNone(op)
+    self.assertEqual(op.name, "myloop/myop")
+    self.assertEqual(op.type, "IntInput")
+    self.assertEqual(op.outputs, [])
+    op_input = op.inputs[0].op
+    self.assertEqual(op_input.type, "Enter")
+    self.assertEqual(list(op_input.inputs), [x])
+    self.assertEqual(op.graph, g)
+    # pylint: disable=protected-access
+    self.assertIsNotNone(op._get_control_flow_context())
+    self.assertEqual(op._get_control_flow_context().name,
+                     "myloop/while_context")
+    # pylint: enable=protected-access
+
+  def testWhileLoopWithInternalControlDep(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = test_ops.int_output()
+
+      def body(i):
+        c = constant_op.constant(1.0, name="c")
+        if ops._USE_C_API:
+          c_op = ops._create_c_op(ops.get_default_graph(),
+                                  ops._NodeDef("IntInput", "myloop/myop"), [x],
+                                  [])
+          with ops.control_dependencies([c]):
+            ops.get_default_graph()._create_op_from_tf_operation(c_op)
+        else:
+          with ops.control_dependencies([c]):
+            test_ops.int_input(x, name="myop")
+        return i
+
+      control_flow_ops.while_loop(lambda i: i < 10, body, [0], name="myloop")
+
+    op = g.get_operation_by_name("myloop/myop")
+    self.assertIsNotNone(op)
+    c = g.get_operation_by_name("myloop/c")
+    self.assertIsNotNone(c)
+    # Internal control dep is preserved
+    self.assertEqual(op.control_inputs, [c])
+
+  def testWhileLoopWithExternalControlDep(self):
+    # TODO(skyewm): enable once ControlFlowContext._RemoveExternalControlEdges
+    # works with C API enabled
+    if ops._USE_C_API: self.skipTest("Not yet implemented with C API enabled")
+
+    g = ops.Graph()
+    with g.as_default():
+      x = test_ops.int_output()
+      c = constant_op.constant(1.0)
+
+      def body(i):
+        if ops._USE_C_API:
+          c_op = ops._create_c_op(ops.get_default_graph(),
+                                  ops._NodeDef("IntInput", "myloop/myop"), [x],
+                                  [])
+          with ops.control_dependencies([c]):
+            ops.get_default_graph()._create_op_from_tf_operation(c_op)
+        else:
+          with ops.control_dependencies([c]):
+            test_ops.int_input(x, name="myop")
+        return i
+
+      control_flow_ops.while_loop(lambda i: i < 10, body, [0], name="myloop")
+
+    op = g.get_operation_by_name("myloop/myop")
+    self.assertIsNotNone(op)
+    self.assertEqual(len(op.control_inputs), 1)
+    # External control dep is removed and replaced with internal control dep
+    self.assertNotEqual(op.control_inputs[0], c.op)
+    self.assertIsNotNone(op.control_inputs[0]._get_control_flow_context())
+
+
 @test_util.with_c_api
 class ApplyOpTest(test_util.TensorFlowTestCase):
 
@@ -1936,7 +2094,7 @@ class DenseTensorLikeTypeTest(test_util.TensorFlowTestCase):
 
   def testSuccess(self):
     op = ops.Operation(
-        ops._NodeDef("None", "myop"), ops.Graph(), [], [dtypes.float32])
+        ops._NodeDef("FloatOutput", "myop"), ops.Graph(), [], [dtypes.float32])
     t = op.outputs[0]
     self.assertTrue(ops.is_dense_tensor_like(t))
 
diff --git a/tensorflow/python/framework/test_ops.cc b/tensorflow/python/framework/test_ops.cc
index 35e0167b26..25bb7af20c 100644
--- a/tensorflow/python/framework/test_ops.cc
+++ b/tensorflow/python/framework/test_ops.cc
@@ -252,6 +252,11 @@ REGISTER_OP("IntInput")
     .Input("a: int32")
     .SetShapeFn(shape_inference::UnknownShape);
 
+REGISTER_OP("IntInputIntOutput")
+    .Input("a: int32")
+    .Output("b: int32")
+    .SetShapeFn(shape_inference::UnknownShape);
+
 REGISTER_OP("FloatInput")
     .Input("a: float32")
     .SetShapeFn(shape_inference::UnknownShape);
-- 
GitLab


From dd8ad028e79005377d326a02fa77f655a0f62699 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 14:11:14 -0800
Subject: [PATCH 0134/1225] Moved tensorflow::StringPiece::Hasher out of
 tensorflow::StringPiece (to tensorflow::StringPieceHasher) and replaced it
 with an alias. This will allow tensorflow::StringPiece to be more easily
 replaced with absl::string_view (which does not contain a Hasher struct)
 after all references to tensorflow::StringPiece::Hasher are removed.

PiperOrigin-RevId: 176414198
---
 tensorflow/core/lib/core/stringpiece.cc |  2 +-
 tensorflow/core/lib/core/stringpiece.h  | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc
index 984f4404ce..29b727fc44 100644
--- a/tensorflow/core/lib/core/stringpiece.cc
+++ b/tensorflow/core/lib/core/stringpiece.cc
@@ -21,7 +21,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-size_t StringPiece::Hasher::operator()(StringPiece s) const {
+size_t StringPieceHasher::operator()(StringPiece s) const {
   return Hash64(s.data(), s.size());
 }
 
diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h
index 94f4a377f1..b2c6842151 100644
--- a/tensorflow/core/lib/core/stringpiece.h
+++ b/tensorflow/core/lib/core/stringpiece.h
@@ -35,6 +35,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+struct StringPieceHasher;
+
 class StringPiece {
  public:
   typedef size_t size_type;
@@ -103,9 +105,7 @@ class StringPiece {
 
   StringPiece substr(size_t pos, size_t n = npos) const;
 
-  struct Hasher {
-    size_t operator()(StringPiece arg) const;
-  };
+  using Hasher = ::tensorflow::StringPieceHasher;
 
   // Return a string that contains the copy of the referenced data.
   std::string ToString() const { return std::string(data_, size_); }
@@ -133,6 +133,10 @@ class StringPiece {
   // Intentionally copyable
 };
 
+struct StringPieceHasher {
+  size_t operator()(StringPiece s) const;
+};
+
 inline bool operator==(StringPiece x, StringPiece y) {
   return ((x.size() == y.size()) &&
           (memcmp(x.data(), y.data(), x.size()) == 0));
-- 
GitLab


From 5439c1e2de01a8684b62aba224d44c392176ac32 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 14:12:59 -0800
Subject: [PATCH 0135/1225] Remove nonfunctional and accidentally pessimizing
 value category casts.

The conditional expression only has ONE fixed value category. Because in the code as written the two operands are of different value categories, the result is in fact a prvalue, i.e. a copy. This seems unintended, and we should simply preserve the existing lvalue.

If we do want to allow moving, we need multiple statements:

    if (num == 1) {
      f(std::move(copier));
    } else {
      f(copier);
    }

PiperOrigin-RevId: 176414503
---
 tensorflow/core/common_runtime/copy_tensor.cc    | 12 +++++-------
 tensorflow/core/common_runtime/gpu/gpu_device.cc |  6 ++----
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc
index 9084081119..e35548729b 100644
--- a/tensorflow/core/common_runtime/copy_tensor.cc
+++ b/tensorflow/core/common_runtime/copy_tensor.cc
@@ -90,8 +90,7 @@ void CopyHostToDevice(const Tensor* input, Allocator* cpu_allocator,
     Status s_copy_init;
     for (int64 i = 0; i < input->NumElements(); ++i) {
       s_copy_init = VariantDeviceCopy(
-          VariantDeviceCopyDirection::HOST_TO_DEVICE, v[i], &v_out[i],
-          (input->NumElements() == 1) ? std::move(copier) : copier);
+          VariantDeviceCopyDirection::HOST_TO_DEVICE, v[i], &v_out[i], copier);
       if (!s_copy_init.ok()) {
         status_cb->UpdateStatus(s_copy_init);
         break;
@@ -149,8 +148,7 @@ void CopyDeviceToHost(const Tensor* input, Allocator* cpu_allocator,
     Status s_copy_init;
     for (int64 i = 0; i < input->NumElements(); ++i) {
       s_copy_init = VariantDeviceCopy(
-          VariantDeviceCopyDirection::DEVICE_TO_HOST, v[i], &v_out[i],
-          (input->NumElements() == 1) ? std::move(copier) : copier);
+          VariantDeviceCopyDirection::DEVICE_TO_HOST, v[i], &v_out[i], copier);
       if (!s_copy_init.ok()) {
         status_cb->UpdateStatus(s_copy_init);
         break;
@@ -213,9 +211,9 @@ void CopyDeviceToDevice(CopyTensor::CopyFunction copy_function,
     Variant* v_out = copy.flat<Variant>().data();
     Status s_copy_init;
     for (int64 i = 0; i < input->NumElements(); ++i) {
-      s_copy_init = VariantDeviceCopy(
-          VariantDeviceCopyDirection::DEVICE_TO_DEVICE, v[i], &v_out[i],
-          (input->NumElements() == 1) ? std::move(copier) : copier);
+      s_copy_init =
+          VariantDeviceCopy(VariantDeviceCopyDirection::DEVICE_TO_DEVICE, v[i],
+                            &v_out[i], copier);
       if (!s_copy_init.ok()) {
         status_cb->UpdateStatus(s_copy_init);
         break;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 5a7d96445e..eff169640f 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -561,10 +561,8 @@ Status BaseGPUDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
     };
     Status s;
     for (int64 ix = 0; ix < parsed.NumElements(); ++ix) {
-      s = VariantDeviceCopy(
-          VariantDeviceCopyDirection::HOST_TO_DEVICE, from[ix],
-          &copy_variant[ix],
-          parsed.NumElements() == 1 ? std::move(copier) : copier);
+      s = VariantDeviceCopy(VariantDeviceCopyDirection::HOST_TO_DEVICE,
+                            from[ix], &copy_variant[ix], copier);
       if (!s.ok()) {
         break;
       }
-- 
GitLab


From d3f1adc0394c4954328ba03f3bcb6ee378b97068 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 14:13:16 -0800
Subject: [PATCH 0136/1225] Reserve vector capacity when the final size is
 known

PiperOrigin-RevId: 176414557
---
 tensorflow/compiler/xla/service/cpu/cpu_executable.cc    | 5 +++--
 tensorflow/compiler/xla/service/hlo_evaluator.cc         | 3 ++-
 tensorflow/compiler/xla/service/while_loop_simplifier.cc | 3 +++
 tensorflow/core/ops/array_ops.cc                         | 1 +
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index f62353bee7..ddbe7ab341 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -147,8 +147,9 @@ Status CpuExecutable::ExecuteComputeFunction(
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
     HloExecutionProfile* hlo_execution_profile) {
   std::vector<se::DeviceMemoryBase> argument_buffers;
-  for (int i = 0; i < arguments.size(); ++i) {
-    argument_buffers.push_back(arguments[i]->buffer(/*index=*/{}));
+  argument_buffers.reserve(arguments.size());
+  for (const auto* argument : arguments) {
+    argument_buffers.push_back(argument->buffer(/*index=*/{}));
   }
   return ExecuteComputeFunction(run_options, argument_buffers, buffers,
                                 hlo_execution_profile);
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 4fffb6127e..0a1ebe3416 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -910,7 +910,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
           result_val += lhs_literal.Get<ReturnT>(lhs_index) *
                         rhs_literal.Get<ReturnT>(rhs_index);
         }
-      cnt:;
+      cnt : {}
       } while (IndexUtil::BumpIndices(window_shape, &rhs_spatial_index));
 
       return result_val;
@@ -1699,6 +1699,7 @@ StatusOr<std::unique_ptr<Literal>> HloEvaluator::EvaluateWithSubstitutions(
   }
 
   std::vector<HloInstruction*> operands;
+  operands.reserve(owned_operands.size());
   for (auto& operand : owned_operands) {
     operands.push_back(operand.get());
   }
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index 2fac914892..8f335be794 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -403,6 +403,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
 
   // Compute the shape of the while op after we remove the dead indices.
   std::vector<Shape> new_while_tuple_elem_shapes;
+  new_while_tuple_elem_shapes.reserve(new_to_old_tuple_idx.size());
   for (int64 old_idx : new_to_old_tuple_idx) {
     new_while_tuple_elem_shapes.push_back(
         while_init->shape().tuple_shapes(old_idx));
@@ -469,6 +470,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
   std::unordered_map<const HloInstruction*, std::unique_ptr<HloInstruction>>
       while_body_replacements = make_while_computation_replacements(while_body);
   std::vector<HloInstruction*> new_while_body_root_elems;
+  new_while_body_root_elems.reserve(new_to_old_tuple_idx.size());
   for (int64 old_idx : new_to_old_tuple_idx) {
     new_while_body_root_elems.push_back(
         while_body_root->mutable_operand(old_idx));
@@ -483,6 +485,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
   // clean this up in the common case where while_init is a tuple op.  (It's
   // definitely tuple-shaped, but it's not necessarily a tuple op.)
   std::vector<HloInstruction*> new_while_init_elems;
+  new_while_init_elems.reserve(new_to_old_tuple_idx.size());
   for (int64 old_idx : new_to_old_tuple_idx) {
     new_while_init_elems.push_back(
         computation->AddInstruction(HloInstruction::CreateGetTupleElement(
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index cdf370399c..be2916f154 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2378,6 +2378,7 @@ REGISTER_OP("Slice")
           TF_RETURN_IF_ERROR(
               c->WithRank(begin_value, c->Rank(sizes_value), &begin_value));
           std::vector<DimensionHandle> dims;
+          dims.reserve(c->Rank(sizes_value));
           for (int i = 0; i < c->Rank(sizes_value); ++i) {
             dims.emplace_back(c->Dim(sizes_value, i));
           }
-- 
GitLab


From 248d7c26c2a3f8d3f45b3498eff1d639e7cb0077 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Mon, 20 Nov 2017 14:40:23 -0800
Subject: [PATCH 0137/1225] Automated g4 rollback of changelist 176054079

PiperOrigin-RevId: 176418959
---
 .../python/kernel_tests/pooling_ops_test.py   | 76 +++++--------------
 1 file changed, 17 insertions(+), 59 deletions(-)

diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index c97ad864ee..a126180414 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -20,8 +20,6 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -1174,27 +1172,12 @@ class PoolingTest(test.TestCase):
                      [1, window_rows, window_cols, 1],
                      [1, row_stride, col_stride, 1], padding)
 
-  def _testMaxPoolGradDirect(self,
-                             input_data,
-                             output_backprop,
-                             expected_input_backprop,
-                             input_sizes,
-                             output_sizes,
-                             window_rows,
-                             window_cols,
-                             row_stride,
-                             col_stride,
-                             padding,
-                             use_gpu,
-                             v2,
-                             graph_optimization=False):
+  def _testMaxPoolGradDirect(self, input_data, output_backprop,
+                             expected_input_backprop, input_sizes, output_sizes,
+                             window_rows, window_cols, row_stride, col_stride,
+                             padding, use_gpu, v2):
     pool_func = gen_nn_ops._max_pool_v2 if v2 else nn_ops.max_pool
-
-    config = config_pb2.ConfigProto()
-    if graph_optimization:
-      config.graph_options.rewrite_options.layout_optimizer = (
-          rewriter_config_pb2.RewriterConfig.ON)
-    with self.test_session(use_gpu=use_gpu, config=config):
+    with self.test_session(use_gpu=use_gpu):
       input_tensor = constant_op.constant(input_data, shape=input_sizes)
       output_tensor = pool_func(input_tensor,
                                 [1, window_rows, window_cols, 1],
@@ -1331,7 +1314,7 @@ class PoolingTest(test.TestCase):
             use_gpu=use_gpu,
             v2=v2)
 
-  def _testMaxPoolGradDirectWithNans2_1CPU(self):
+  def _testMaxPoolGradDirectWithNans2_1(self):
     input_data = [float("nan")] * 16
     output_backprop = [11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 19.0, 20.0, 21.0]
     # Test the CPU implementation, which propagates diffs in case of NaN
@@ -1354,23 +1337,11 @@ class PoolingTest(test.TestCase):
           use_gpu=False,
           v2=v2)
 
-  def _testMaxPoolGradDirectWithNans2_1GPU(self):
     if not test.is_gpu_available():
       return
-    input_data = [float("nan")] * 16
-    output_backprop = [11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 19.0, 20.0, 21.0]
-    # (1) For the NHWC format (used by default below), TensorFlow currently uses
-    # custom MaxPoolingNoMask for the forward op, cuDNN for the gradient op.
-    # With NaNs as input, MaxPoolingNoMask outputs -Inf, which is then fed into
-    # the gradient op. The cuDNN gradient op currently doesn't propagate the
-    # diff if input is -Inf and as a result outputs zeros.
-    # (2) For the NCHW format, TensorFlow currently uses
-    # cuDNN for both the forward and the gradient op. With NaNs as input, cuDNN
-    # forward op outputs NaNs, which is then fed into cuDNN gradient op. cuDNN
-    # gradient op is able to propagate NaNs and as a result the output is the
-    # same as expected_input_backprop_tf_cpu.
-    # We turn off graph optimization (layout optimizer) as the behavior of the
-    # above two cases are different.
+
+    # Test the GPU implementation that uses cudnn for now.
+    # It does not propagate the diff in cases of NaNs
     expected_input_backprop_cudnn = [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0
@@ -1388,10 +1359,9 @@ class PoolingTest(test.TestCase):
           col_stride=1,
           padding="VALID",
           use_gpu=True,
-          v2=v2,
-          graph_optimization=False)
+          v2=v2)
 
-  def _testMaxPoolGradDirectWithNans2_2CPU(self):
+  def _testMaxPoolGradDirectWithNans2_2(self):
     input_data = [float("nan")] * 16
     output_backprop = [
         float("nan"), 12.0, 13.0, 15.0, float("nan"), 17.0, 19.0, 20.0,
@@ -1417,16 +1387,11 @@ class PoolingTest(test.TestCase):
           use_gpu=False,
           v2=v2)
 
-  def _testMaxPoolGradDirectWithNans2_2GPU(self):
     if not test.is_gpu_available():
       return
-    input_data = [float("nan")] * 16
-    output_backprop = [
-        float("nan"), 12.0, 13.0, 15.0,
-        float("nan"), 17.0, 19.0, 20.0,
-        float("nan")
-    ]
-    # See the correspoinding comment in _testMaxPoolGradDirectWithNans2_1GPU().
+
+    # Test the GPU implementation that uses cudnn for now.
+    # It does not propagate the diff in cases of NaNs
     expected_input_backprop_cudnn = [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0
@@ -1444,21 +1409,14 @@ class PoolingTest(test.TestCase):
           col_stride=1,
           padding="VALID",
           use_gpu=True,
-          v2=v2,
-          graph_optimization=False)
+          v2=v2)
 
   def testMaxPoolGradDirect(self):
     self._testMaxPoolGradDirect1_1()
     self._testMaxPoolGradDirect1_2()
     self._testMaxPoolGradDirect1_3()
-    self._testMaxPoolGradDirectWithNans2_1CPU()
-    self._testMaxPoolGradDirectWithNans2_2CPU()
-
-  def testMaxPoolGradDirectNans2_1GPU(self):
-    self._testMaxPoolGradDirectWithNans2_1GPU()
-
-  def testMaxPoolGradDirectNans2_2GPU(self):
-    self._testMaxPoolGradDirectWithNans2_2GPU()
+    self._testMaxPoolGradDirectWithNans2_1()
+    self._testMaxPoolGradDirectWithNans2_2()
 
   def _testMaxPoolGradGradValidPadding1_1(self, data_format, use_gpu):
     for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]:
-- 
GitLab


From 775b496167bef8067d6c03f42809a96d565727f9 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 20 Nov 2017 14:59:34 -0800
Subject: [PATCH 0138/1225] Do not swallow exceptions in gradient functions in
 eager.

PiperOrigin-RevId: 176422128
---
 tensorflow/c/eager/tape.h                 | 1 -
 tensorflow/python/eager/backprop_test.py  | 3 +--
 tensorflow/python/eager/pywrap_tfe_src.cc | 9 +++++----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 29d73c5ca4..84b40a1819 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -485,7 +485,6 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
     Status s = vspace.CallBackwardFunction(trace.backward_function,
                                            out_gradients, &in_gradients);
     if (!s.ok()) {
-      VLOG(1) << "Gradient function failed.";
       cleanup();
       return s;
     }
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 86c9cce3fd..e18ebba785 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -28,7 +28,6 @@ from tensorflow.python.eager import tape
 from tensorflow.python.eager import test
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
@@ -111,7 +110,7 @@ class BackpropTest(test.TestCase):
       return x, grad
 
     # TODO(apassos) raise the right error here
-    with self.assertRaises(errors_impl.InternalError):
+    with self.assertRaises(RuntimeError):
       backprop.gradients_function(f)(constant_op.constant(1.0))
 
   def testImplicitGradOverEmbeddingLookup(self):
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 387eec1358..0a0749fd4b 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -891,10 +891,6 @@ class PyVSpace : public tensorflow::eager::VSpace<PyObject, PyObject> {
     Py_DECREF(grads);
     Py_DECREF(backward_function);
     if (py_result == nullptr) {
-      VLOG(1) << "Gradient function threw exceptions";
-      if (VLOG_IS_ON(1)) {
-        PyErr_Print();
-      }
       return tensorflow::errors::Internal("gradient function threw exceptions");
     }
     result->clear();
@@ -981,6 +977,11 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace,
   status->status = tape_obj->tape->ComputeGradient(
       c_vspace, target_vec, sources_vec, outgrad_vec, &result);
   if (!status->status.ok()) {
+    if (PyErr_Occurred()) {
+      // Do not propagate the erroneous status as that would swallow the
+      // exception which caused the problem.
+      status->status = tensorflow::Status::OK();
+    }
     return nullptr;
   }
   if (!result.empty()) {
-- 
GitLab


From ecc2754b21572ebb3f43417995822f6ccd98ea7d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 15:05:03 -0800
Subject: [PATCH 0139/1225] Remove unused inclusions

PiperOrigin-RevId: 176423279
---
 tensorflow/cc/framework/cc_op_gen.cc                         | 2 --
 .../toco/graph_transformations/remove_trivial_passthrough.cc | 5 ++---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index 6f2b7acb82..c0b8cc2e41 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -23,7 +23,6 @@ limitations under the License.
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
-#include "tensorflow/core/framework/op_gen_overrides.pb.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.pb_text.h"
@@ -37,7 +36,6 @@ limitations under the License.
 #include "tensorflow/core/public/version.h"
 
 namespace tensorflow {
-
 namespace {
 
 const int kRightMargin = 79;
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
index a0d1338298..d998dcd9f3 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
@@ -19,13 +19,12 @@ limitations under the License.
 
 #include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
 #include "tensorflow/contrib/lite/toco/model.h"
-#include "tensorflow/contrib/lite/toco/model_flags.pb.h"
 #include "tensorflow/contrib/lite/toco/tooling_util.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace toco {
-
 namespace {
+
 // Reroute all edges involving a given discardable array to another
 // array instead. from_array is assumed to be discardable, and consequently
 // this only updates operator edges (since discardable arrays only
@@ -46,7 +45,7 @@ void RerouteEdges(const string& from_array, const string& to_array,
   }
 }
 
-}  // end anonymous namespace
+}  // namespace
 
 bool RemoveTrivialPassthroughOp(GraphTransformation* transformation,
                                 Model* model, std::size_t op_index) {
-- 
GitLab


From 75e6675c19024a70d27015f9f52f8eba60024803 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 20 Nov 2017 15:23:43 -0800
Subject: [PATCH 0140/1225] Shape inference function for
 CreateSummaryFileWriter.

PiperOrigin-RevId: 176426462
---
 tensorflow/core/ops/summary_ops.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc
index 029ff09906..aa7458f903 100644
--- a/tensorflow/core/ops/summary_ops.cc
+++ b/tensorflow/core/ops/summary_ops.cc
@@ -38,6 +38,7 @@ REGISTER_OP("CreateSummaryFileWriter")
     .Input("max_queue: int32")
     .Input("flush_millis: int32")
     .Input("filename_suffix: string")
+    .SetShapeFn(shape_inference::NoOutputs)
     .Doc(R"doc(
 Creates a summary file writer accessible by the given resource handle.
 
-- 
GitLab


From 901f3af1891804d6a5f211346a867dbb4167653d Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Mon, 20 Nov 2017 15:46:58 -0800
Subject: [PATCH 0141/1225] Add a bit of documentation to tfe.Network

PiperOrigin-RevId: 176429590
---
 tensorflow/contrib/eager/python/network.py | 81 +++++++++++++++++++---
 1 file changed, 73 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py
index 97eded7dca..0388aaa849 100644
--- a/tensorflow/contrib/eager/python/network.py
+++ b/tensorflow/contrib/eager/python/network.py
@@ -54,16 +54,81 @@ def _network_name_scope_naming(current_variable_scope):
 class Network(base.Layer):
   """Represents the composition of a set of Layers.
 
-  TODO(josh11b,ashankar):
-  - Should "trainable" be changeable on the Network object?
-  - Do we allow add_variable in Network?
-  - Detect layers used in __call__ that weren't registered with track_layer.
-  - Convert inputs to __call__ to tensors.
-  - Prevent variables from being created after the first __call__?
-    (Think about restoring from a checkpoint).
+  `Network` implements the `Layer` interface and adds convenience methods for
+  managing sub-`Layer`s, such as listing variables.
+
+  `Layer`s (including other `Network`s) should be added via `track_layer`. They
+  can then be used when overriding the `Network.call` method:
+
+  ```python
+  class TwoLayerNetwork(tfe.Network):
+
+    def __init__(self, name):
+      super(TwoLayerNetwork, self).__init__(name=name)
+      self.layer_one = self.track_layer(tf.layers.Dense(16, input_shape=(8,)))
+      self.layer_two = self.track_layer(tf.layers.Dense(1, input_shape=(16,)))
+
+    def call(self, inputs):
+      return self.layer_two(self.layer_one(inputs))
+  ```
+
+  After constructing an object and calling the `Network`, a list of variables
+  created by tracked `Layer`s is available via `Network.variables`:
+
+  ```python
+  net = TwoLayerNetwork(name="net")
+  output = net(tf.ones([1, 8]))
+  print([v.name for v in net.variables])
+  ```
+
+  This example prints variable names, one kernel and one bias per
+  `tf.layers.Dense` layer:
+
+  ```
+  ['net/dense/kernel:0',
+   'net/dense/bias:0',
+   'net/dense_1/kernel:0',
+   'net/dense_1/bias:0']
+  ```
+
+  These variables can be passed to a `Saver` (`tf.train.Saver`, or
+  `tf.contrib.eager.Saver` when executing eagerly) to save or restore the
+  `Network`, typically alongside a global step and `tf.train.Optimizer`
+  variables when checkpointing during training.
+
+  Note that the semantics of calling a `Network` with graph execution (i.e. not
+  executing eagerly) may change slightly in the future. Currently stateful ops
+  are pruned from the graph unless they or something that depends on them is
+  executed in a session, but this behavior is not consistent with eager
+  execution (where stateful ops are executed eagerly). `Layer`s from `tf.layers`
+  do not depend on this pruning and so will not be affected, but `Network`s
+  which rely on stateful ops being added to the graph but not executed (e.g. via
+  custom `Layer`s which manage stateful ops) may break with this change.
   """
+  # TODO(josh11b,ashankar,allenl):
+  # - Should 'trainable' be changeable on the Network object?
+  # - Do we allow add_variable in Network?
+  # - Detect layers used in __call__ that weren't registered with track_layer.
+  # - Convert inputs to __call__ to tensors.
 
   def __init__(self, name=None):
+    """Configure the `Network`.
+
+    Args:
+      name: The name to use for this `Network`. If specified, it must be unique
+        in the context where this `Network` is first
+         (1) added to another `Network` (in which case it must not share a name
+           with other `Layers` added to that `Network`), or
+         (2) built/called (in which case no other 'top-level' `Network`s may
+          share this name).
+        If unspecified or None, the `Network` will be named using its class
+        name, with a number appended if necessary for uniqueness (e.g. MyNetwork
+        -> 'my_network_1').
+
+    Raises:
+      ValueError: If `name` is not valid. Note that some naming errors will
+        instead be raised when the `Network` is called.
+    """
     if isinstance(name, variable_scope.VariableScope):
       raise ValueError("VariableScopes are not valid Network names.")
     if name is not None and "/" in name:
@@ -386,7 +451,7 @@ class Network(base.Layer):
         "at https://github.com/tensorflow/tensorflow/issues/new if this is "
         "important to you")
 
-  # TODO(josh11b): Support other Layer methods needed for graph mode, such as for
+  # TODO(allenl): Support other Layer methods needed for graph mode, such as for
   # losses and updates
 
 
-- 
GitLab


From b99ba0d749f04311d2c8e8c5843d78e427edb832 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 20 Nov 2017 15:50:44 -0800
Subject: [PATCH 0142/1225] Contrib summaries always try to run when inside
 loops or conditionals.

PiperOrigin-RevId: 176430089
---
 tensorflow/contrib/summary/BUILD              | 12 ++++-
 tensorflow/contrib/summary/summary_ops.py     |  5 +-
 .../contrib/summary/summary_ops_graph_test.py | 50 +++++++++++++++++++
 tensorflow/python/framework/ops.py            |  3 ++
 tensorflow/python/ops/control_flow_ops.py     | 26 ++++++++++
 5 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD
index 3892654f25..45d6454526 100644
--- a/tensorflow/contrib/summary/BUILD
+++ b/tensorflow/contrib/summary/BUILD
@@ -47,10 +47,16 @@ py_test(
     deps = [
         ":summary_ops",
         ":summary_test_internal",
+        ":summary_test_util",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:ops",
-        "//tensorflow/python:platform",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
         "//tensorflow/python:training",
+        "@six_archive//:six",
     ],
 )
 
@@ -61,6 +67,7 @@ py_library(
     visibility = ["//tensorflow:internal"],
     deps = [
         ":gen_summary_ops",
+        "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:control_flow_ops",
@@ -73,6 +80,7 @@ py_library(
         "//tensorflow/python:training",
         "//tensorflow/python:util",
         "//tensorflow/python/eager:context",
+        "@six_archive//:six",
     ],
 )
 
diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py
index 3e65f83051..8e37987cb7 100644
--- a/tensorflow/contrib/summary/summary_ops.py
+++ b/tensorflow/contrib/summary/summary_ops.py
@@ -45,7 +45,6 @@ from tensorflow.python.util import tf_contextlib
 # Tensor. If this tensor is True the summary ops will record summaries.
 _SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries"
 
-_SUMMARY_COLLECTION_NAME = "_SUMMARY_V2"
 _SUMMARY_WRITER_INIT_COLLECTION_NAME = "_SUMMARY_WRITER_V2"
 
 _EXPERIMENT_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,256}$")
@@ -298,7 +297,7 @@ def all_summary_ops():
   if context.in_eager_mode():
     raise RuntimeError(
         "tf.contrib.summary.all_summary_ops is only supported in graph mode.")
-  return ops.get_collection(_SUMMARY_COLLECTION_NAME)
+  return ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
 
 
 def summary_writer_initializer_op():
@@ -340,7 +339,7 @@ def summary_writer_function(name, tensor, function, family=None):
   with ops.device("cpu:0"):
     op = utils.smart_cond(
         should_record_summaries(), record, _nothing, name="")
-    ops.add_to_collection(_SUMMARY_COLLECTION_NAME, op)
+    ops.add_to_collection(ops.GraphKeys._SUMMARY_COLLECTION, op)  # pylint: disable=protected-access
   return op
 
 
diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index 8f85f67a25..fe55bf93e2 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py
@@ -16,13 +16,20 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import tempfile
+
 import six
 
 from tensorflow.contrib.summary import summary_ops
 from tensorflow.contrib.summary import summary_test_internal
+from tensorflow.contrib.summary import summary_test_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training import training_util
 
@@ -47,6 +54,49 @@ class DbTest(summary_test_internal.SummaryDbTest):
     six.assertCountEqual(self, [name],
                          get_all(self.db, 'SELECT node_name FROM Nodes'))
 
+  def testSummaryGraphModeCond(self):
+    with ops.Graph().as_default(), self.test_session():
+      training_util.get_or_create_global_step()
+      logdir = tempfile.mkdtemp()
+      with summary_ops.create_summary_file_writer(
+          logdir, max_queue=0,
+          name='t2').as_default(), summary_ops.always_record_summaries():
+        summary_ops.initialize()
+        training_util.get_or_create_global_step().initializer.run()
+        def f():
+          summary_ops.scalar('scalar', 2.0)
+          return constant_op.constant(True)
+        pred = array_ops.placeholder(dtypes.bool)
+        x = control_flow_ops.cond(pred, f,
+                                  lambda: constant_op.constant(False))
+        x.eval(feed_dict={pred: True})
+
+      events = summary_test_util.events_from_logdir(logdir)
+      self.assertEqual(len(events), 2)
+      self.assertEqual(events[1].summary.value[0].tag, 'cond/scalar')
+
+  def testSummaryGraphModeWhile(self):
+    with ops.Graph().as_default(), self.test_session():
+      training_util.get_or_create_global_step()
+      logdir = tempfile.mkdtemp()
+      with summary_ops.create_summary_file_writer(
+          logdir, max_queue=0,
+          name='t2').as_default(), summary_ops.always_record_summaries():
+        summary_ops.initialize()
+        training_util.get_or_create_global_step().initializer.run()
+        def body(unused_pred):
+          summary_ops.scalar('scalar', 2.0)
+          return constant_op.constant(False)
+        def cond(pred):
+          return pred
+        pred = array_ops.placeholder(dtypes.bool)
+        x = control_flow_ops.while_loop(cond, body, [pred])
+        x.eval(feed_dict={pred: True})
+
+      events = summary_test_util.events_from_logdir(logdir)
+      self.assertEqual(len(events), 2)
+      self.assertEqual(events[1].summary.value[0].tag, 'while/scalar')
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 746b35b87f..132571dd05 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -5047,6 +5047,9 @@ class GraphKeys(object):
   COND_CONTEXT = "cond_context"
   WHILE_CONTEXT = "while_context"
 
+  # Used to store v2 summary names.
+  _SUMMARY_COLLECTION = "_SUMMARY_V2"
+
   # List of all collections that keep track of variables.
   _VARIABLE_COLLECTIONS = [
       GLOBAL_VARIABLES,
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index d33d4cd597..194df5957c 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -1764,7 +1764,19 @@ class CondContext(ControlFlowContext):
 
   def BuildCondBranch(self, fn):
     """Add the subgraph defined by fn() to the graph."""
+    pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
     original_result = fn()
+    post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
+    if len(post_summaries) > len(pre_summaries):
+      new_summaries = post_summaries[len(pre_summaries):]
+      summary_ref = ops.get_collection_ref(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
+      summary_ref[:] = pre_summaries
+      with ops.control_dependencies(new_summaries):
+        if original_result is None:
+          return no_op(), None
+        else:
+          original_result = nest.map_structure(
+              array_ops.identity, original_result)
     if original_result is None:
       return None, None
 
@@ -2629,9 +2641,23 @@ class WhileContext(ControlFlowContext):
     packed_vars_for_body = nest.pack_sequence_as(
         structure=original_loop_vars,
         flat_sequence=vars_for_body_with_tensor_arrays)
+    pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
     body_result = body(*packed_vars_for_body)
+    post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
     if not nest.is_sequence(body_result):
       body_result = [body_result]
+    if len(post_summaries) > len(pre_summaries):
+      new_summaries = post_summaries[len(pre_summaries):]
+      summary_ref = ops.get_collection_ref(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
+      summary_ref[:] = pre_summaries
+      with ops.control_dependencies(new_summaries):
+        def map_fn(x):
+          # TODO(apassos) figure out how to trigger with tensor arrays as well
+          if isinstance(x, tensor_array_ops.TensorArray):
+            return x
+          return array_ops.identity(x)
+        body_result = nest.map_structure(map_fn, body_result)
+
     # Compare the structure types of input and output of body.
     # For backwards compatibility, the first layer is forced to a list
     # during this comparison, because inputs are typically lists and
-- 
GitLab


From ab400e98f1a206aa57e219d00dc9856d151ad676 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 16:00:38 -0800
Subject: [PATCH 0143/1225] When both axis and reduction_axes are None and
 keep_dims is False, tf.reduce_* must return a scalar even if the tensor being
 reduced has a dynamic shape.

It's not possible for the shape fn to apply this logic because the input's
rank is unknown and axes is not a constant.

PiperOrigin-RevId: 176431338
---
 .../python/kernel_tests/reduction_ops_test.py |  30 +++-
 tensorflow/python/ops/math_ops.py             | 159 ++++++++++--------
 2 files changed, 117 insertions(+), 72 deletions(-)

diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py
index 2dc65b1384..4231a79b2d 100644
--- a/tensorflow/python/kernel_tests/reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test.py
@@ -50,7 +50,7 @@ def _powerset(iterable):
   """
   s = list(iterable)
   return itertools.chain.from_iterable(
-      itertools.combinations(s, r) for r in range(len(s)+1))
+      itertools.combinations(s, r) for r in range(len(s) + 1))
 
 
 class ReducedShapeTest(test.TestCase):
@@ -91,6 +91,23 @@ class ReducedShapeTest(test.TestCase):
       self._check([10, 10, 10], [-3], [1, 10, 10])
 
 
+class ReductionUnknownShape(test.TestCase):
+
+  def testBasic(self):
+    with self.test_session():
+      for dtype, reductions in [(dtypes.float32,
+                                 (math_ops.reduce_sum, math_ops.reduce_mean,
+                                  math_ops.reduce_prod, math_ops.reduce_max,
+                                  math_ops.reduce_min)),
+                                (dtypes.bool, (math_ops.reduce_all,
+                                               math_ops.reduce_any))]:
+        for reduction in reductions:
+          x = array_ops.placeholder(
+              dtype=dtype, shape=None)  # Some tensor w/ unknown shape.
+          y = reduction(x)
+          self.assertEqual(y.shape, ())
+
+
 class BaseReductionTest(test.TestCase):
 
   def _tf_reduce(self, x, reduction_axes, keep_dims):
@@ -200,7 +217,6 @@ class SumReductionTest(BaseReductionTest):
       tf_out_mean = sess.run(tf_mean)
     self.assertAllClose(tf_out_mean, 1.)
 
-
   def testFloat32(self):
     for rank in range(1, _MAX_RANK + 1):
       np_arr = self._makeIncremental((2,) * rank, dtypes.float32)
@@ -309,8 +325,9 @@ class SumReductionTest(BaseReductionTest):
   # Int64??
 
   def testGradient(self):
-    for dtype in [dtypes.float32, dtypes.float64, dtypes.complex64,
-                  dtypes.complex128]:
+    for dtype in [
+        dtypes.float32, dtypes.float64, dtypes.complex64, dtypes.complex128
+    ]:
       x = self._makeIncremental([2, 3, 4, 2], dtype)
       self._compareGradientAxes(x)
 
@@ -913,8 +930,9 @@ class CountNonzeroReductionTest(test.TestCase):
   def testFloatReduce4D(self):
     # Create a 4D array of floats and reduce across some
     # dimensions
-    np_arr = np.floor(np.arange(0.0, 210.0) / 100.0).reshape(
-        [2, 3, 5, 7]).astype(np.float32)
+    np_arr = np.floor(np.arange(0.0, 210.0) / 100.0).reshape([2, 3, 5,
+                                                              7]).astype(
+                                                                  np.float32)
     self._compareAll(np_arr, None)
     self._compareAll(np_arr, [])
     self._compareAll(np_arr, [0])
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 886b2048f9..4c400423b6 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -181,17 +181,19 @@ arg_min = deprecated(None, "Use `argmin` instead")(arg_min)  # pylint: disable=u
 
 
 def _set_doc(doc):
+
   def _decorator(func):
     func.__doc__ = doc
     return func
+
   return _decorator
 
 
 # pylint: disable=redefined-builtin
 @deprecated_args(None, "Use the `axis` argument instead", "dimension")
-@_set_doc(gen_math_ops.arg_max.__doc__
-          .replace("dimensions", "axes")
-          .replace("dimension", "axis"))
+@_set_doc(
+    gen_math_ops.arg_max.__doc__.replace("dimensions", "axes").replace(
+        "dimension", "axis"))
 def argmax(input,
            axis=None,
            name=None,
@@ -207,9 +209,9 @@ def argmax(input,
 
 
 @deprecated_args(None, "Use the `axis` argument instead", "dimension")
-@_set_doc(gen_math_ops.arg_min.__doc__
-          .replace("dimensions", "axes")
-          .replace("dimension", "axis"))
+@_set_doc(
+    gen_math_ops.arg_min.__doc__.replace("dimensions", "axes").replace(
+        "dimension", "axis"))
 def argmin(input,
            axis=None,
            name=None,
@@ -275,6 +277,8 @@ def abs(x, name=None):
 # pylint: disable=redefined-builtin
 def _bucketize(input, boundaries, name=None):
   return gen_math_ops._bucketize(input=input, boundaries=boundaries, name=name)
+
+
 # pylint: enable=redefined-builtin
 
 
@@ -327,8 +331,8 @@ def _mul(x, y, name=None):
   return gen_math_ops._mul(x, y, name)
 
 
-_mul.__doc__ = (gen_math_ops._mul.__doc__ +
-                ("" if _mul.__doc__ is None else _mul.__doc__))
+_mul.__doc__ = (
+    gen_math_ops._mul.__doc__ + ("" if _mul.__doc__ is None else _mul.__doc__))
 
 
 def subtract(x, y, name=None):
@@ -346,8 +350,8 @@ def _sub(x, y, name=None):
   return gen_math_ops._sub(x, y, name)
 
 
-_sub.__doc__ = (gen_math_ops._sub.__doc__ +
-                ("" if _sub.__doc__ is None else _sub.__doc__))
+_sub.__doc__ = (
+    gen_math_ops._sub.__doc__ + ("" if _sub.__doc__ is None else _sub.__doc__))
 
 
 # pylint: disable=g-docstring-has-escape
@@ -957,8 +961,8 @@ _TRUEDIV_TABLE = {
 # to explicitly use the "/" operator to invoke either truediv or div.
 def _sparse_dense_truediv(sp_indices, sp_values, sp_shape, y, name=None):
   """Internal helper function for 'sp_t / dense_t'."""
-  with ops.name_scope(name, "truediv", [sp_indices, sp_values, sp_shape,
-                                        y]) as name:
+  with ops.name_scope(name, "truediv",
+                      [sp_indices, sp_values, sp_shape, y]) as name:
     sp_values = ops.convert_to_tensor(sp_values, name="sp_values")
     y = ops.convert_to_tensor(y, name="y")
     x_dtype = sp_values.dtype.base_dtype
@@ -1265,6 +1269,14 @@ def _ReductionDims(x, axis, reduction_indices):
     return range(0, array_ops.rank(x))
 
 
+def _may_reduce_to_scalar(keep_dims, axis, reduction_indices, output):
+  """Set a reduction's output's shape to be a scalar if we are certain."""
+  if (not output.shape.is_fully_defined()) and (not keep_dims) and (
+      axis is None) and (reduction_indices is None):
+    output.set_shape(())
+  return output
+
+
 def reduce_sum(input_tensor,
                axis=None,
                keep_dims=False,
@@ -1307,11 +1319,13 @@ def reduce_sum(input_tensor,
   Equivalent to np.sum
   @end_compatibility
   """
-  return gen_math_ops._sum(
-      input_tensor,
-      _ReductionDims(input_tensor, axis, reduction_indices),
-      keep_dims,
-      name=name)
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+                               gen_math_ops._sum(
+                                   input_tensor,
+                                   _ReductionDims(input_tensor, axis,
+                                                  reduction_indices),
+                                   keep_dims,
+                                   name=name))
 
 
 def count_nonzero(input_tensor,
@@ -1411,11 +1425,13 @@ def reduce_mean(input_tensor,
   Equivalent to np.mean
   @end_compatibility
   """
-  return gen_math_ops._mean(
-      input_tensor,
-      _ReductionDims(input_tensor, axis, reduction_indices),
-      keep_dims,
-      name=name)
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+                               gen_math_ops._mean(
+                                   input_tensor,
+                                   _ReductionDims(input_tensor, axis,
+                                                  reduction_indices),
+                                   keep_dims,
+                                   name=name))
 
 
 def reduce_prod(input_tensor,
@@ -1449,11 +1465,13 @@ def reduce_prod(input_tensor,
   Equivalent to np.prod
   @end_compatibility
   """
-  return gen_math_ops._prod(
-      input_tensor,
-      _ReductionDims(input_tensor, axis, reduction_indices),
-      keep_dims,
-      name=name)
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+                               gen_math_ops._prod(
+                                   input_tensor,
+                                   _ReductionDims(input_tensor, axis,
+                                                  reduction_indices),
+                                   keep_dims,
+                                   name=name))
 
 
 def reduce_min(input_tensor,
@@ -1487,11 +1505,13 @@ def reduce_min(input_tensor,
   Equivalent to np.min
   @end_compatibility
   """
-  return gen_math_ops._min(
-      input_tensor,
-      _ReductionDims(input_tensor, axis, reduction_indices),
-      keep_dims,
-      name=name)
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+                               gen_math_ops._min(
+                                   input_tensor,
+                                   _ReductionDims(input_tensor, axis,
+                                                  reduction_indices),
+                                   keep_dims,
+                                   name=name))
 
 
 def reduce_max(input_tensor,
@@ -1525,11 +1545,13 @@ def reduce_max(input_tensor,
   Equivalent to np.max
   @end_compatibility
   """
-  return gen_math_ops._max(
-      input_tensor,
-      _ReductionDims(input_tensor, axis, reduction_indices),
-      keep_dims,
-      name=name)
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+                               gen_math_ops._max(
+                                   input_tensor,
+                                   _ReductionDims(input_tensor, axis,
+                                                  reduction_indices),
+                                   keep_dims,
+                                   name=name))
 
 
 def reduce_all(input_tensor,
@@ -1572,11 +1594,13 @@ def reduce_all(input_tensor,
   Equivalent to np.all
   @end_compatibility
   """
-  return gen_math_ops._all(
-      input_tensor,
-      _ReductionDims(input_tensor, axis, reduction_indices),
-      keep_dims,
-      name=name)
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+                               gen_math_ops._all(
+                                   input_tensor,
+                                   _ReductionDims(input_tensor, axis,
+                                                  reduction_indices),
+                                   keep_dims,
+                                   name=name))
 
 
 def reduce_any(input_tensor,
@@ -1619,11 +1643,13 @@ def reduce_any(input_tensor,
   Equivalent to np.any
   @end_compatibility
   """
-  return gen_math_ops._any(
-      input_tensor,
-      _ReductionDims(input_tensor, axis, reduction_indices),
-      keep_dims,
-      name=name)
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+                               gen_math_ops._any(
+                                   input_tensor,
+                                   _ReductionDims(input_tensor, axis,
+                                                  reduction_indices),
+                                   keep_dims,
+                                   name=name))
 
 
 def reduce_logsumexp(input_tensor,
@@ -1676,8 +1702,7 @@ def reduce_logsumexp(input_tensor,
         keep_dims=True)
     my_max = array_ops.stop_gradient(
         array_ops.where(
-            gen_math_ops.is_finite(raw_max),
-            raw_max,
+            gen_math_ops.is_finite(raw_max), raw_max,
             array_ops.zeros_like(raw_max)))
     result = gen_math_ops.log(
         reduce_sum(
@@ -1689,7 +1714,7 @@ def reduce_logsumexp(input_tensor,
       if isinstance(axis, int):
         axis = [axis]
       result = array_ops.squeeze(result, axis)
-    return result
+    return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, result)
 
 
 def trace(x, name=None):
@@ -1853,9 +1878,9 @@ def matmul(a,
     # TODO(apassos) remove _shape_tuple here when it is not needed.
     a_shape = a._shape_tuple()  # pylint: disable=protected-access
     b_shape = b._shape_tuple()  # pylint: disable=protected-access
-    if (not a_is_sparse and not b_is_sparse) and (
-        (a_shape is None or len(a_shape) > 2) and
-        (b_shape is None or len(b_shape) > 2)):
+    if (not a_is_sparse and
+        not b_is_sparse) and ((a_shape is None or len(a_shape) > 2) and
+                              (b_shape is None or len(b_shape) > 2)):
       # BatchMatmul does not support transpose, so we conjugate the matrix and
       # use adjoint instead. Conj() is a noop for real matrices.
       if transpose_a:
@@ -1880,8 +1905,8 @@ def matmul(a,
     use_sparse_matmul = False
     if a_is_sparse or b_is_sparse:
       sparse_matmul_types = [dtypes.bfloat16, dtypes.float32]
-      use_sparse_matmul = (a.dtype in sparse_matmul_types and
-                           b.dtype in sparse_matmul_types)
+      use_sparse_matmul = (
+          a.dtype in sparse_matmul_types and b.dtype in sparse_matmul_types)
     if a.dtype == dtypes.bfloat16 or b.dtype == dtypes.bfloat16:
       # matmul currently doesn't handle bfloat16 inputs.
       use_sparse_matmul = True
@@ -1972,8 +1997,8 @@ def _as_indexed_slices_list(inputs, optimize=True):
   for o in outputs:
     if o.indices.dtype == dtypes.int32:
       casted_outputs.append(
-          ops.IndexedSlices(o.values,
-                            cast(o.indices, dtypes.int64), o.dense_shape))
+          ops.IndexedSlices(o.values, cast(o.indices, dtypes.int64),
+                            o.dense_shape))
     else:
       casted_outputs.append(o)
   return casted_outputs
@@ -2072,8 +2097,8 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
   if tensor_dtype is None:
     tensor_dtype = inputs[0].dtype
   if tensor_dtype != inputs[0].dtype:
-    raise TypeError("tensor_dtype is {}, but input is of type {}"
-                    .format(tensor_dtype, inputs[0].dtype))
+    raise TypeError("tensor_dtype is {}, but input is of type {}".format(
+        tensor_dtype, inputs[0].dtype))
   if len(inputs) == 1:
     return inputs[0]
   with ops.name_scope(name, "AccumulateN", inputs) as name:
@@ -2191,8 +2216,9 @@ def bincount(arr,
     maxlength = ops.convert_to_tensor(
         maxlength, name="maxlength", dtype=dtypes.int32)
     output_size = gen_math_ops.minimum(maxlength, output_size)
-  weights = (ops.convert_to_tensor(weights, name="weights")
-             if weights is not None else constant_op.constant([], dtype))
+  weights = (
+      ops.convert_to_tensor(weights, name="weights")
+      if weights is not None else constant_op.constant([], dtype))
   return gen_math_ops.bincount(arr, output_size, weights)
 
 
@@ -2472,7 +2498,8 @@ def tensordot(a, b, axes, name=None):
       rank_a = array_ops.rank(a)
       axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes")
       axes = cast(axes >= 0, dtypes.int32) * axes + cast(
-          axes < 0, dtypes.int32) * (axes + rank_a)
+          axes < 0, dtypes.int32) * (
+              axes + rank_a)
       free, _ = array_ops.setdiff1d(range(rank_a), axes)
       free_dims = array_ops.gather(shape_a, free)
       axes_dims = array_ops.gather(shape_a, axes)
@@ -2498,8 +2525,8 @@ def tensordot(a, b, axes, name=None):
         return range(a_shape.ndims - axes, a_shape.ndims), range(axes)
       else:
         rank = array_ops.rank(a)
-        return (range(rank - axes, rank, dtype=dtypes.int32), range(
-            axes, dtype=dtypes.int32))
+        return (range(rank - axes, rank, dtype=dtypes.int32),
+                range(axes, dtype=dtypes.int32))
     elif isinstance(axes, (list, tuple)):
       if len(axes) != 2:
         raise ValueError("'axes' must be an integer or have length 2.")
@@ -2523,8 +2550,8 @@ def tensordot(a, b, axes, name=None):
     b = ops.convert_to_tensor(b, name="b")
     a_axes, b_axes = _tensordot_axes(a, axes)
     a_reshape, a_free_dims, a_free_dims_static = _tensordot_reshape(a, a_axes)
-    b_reshape, b_free_dims, b_free_dims_static = _tensordot_reshape(b, b_axes,
-                                                                    True)
+    b_reshape, b_free_dims, b_free_dims_static = _tensordot_reshape(
+        b, b_axes, True)
     ab_matmul = matmul(a_reshape, b_reshape)
     if isinstance(a_free_dims, list) and isinstance(b_free_dims, list):
       return array_ops.reshape(ab_matmul, a_free_dims + b_free_dims, name=name)
-- 
GitLab


From ab00df9b0b74910ca738e6ee850982f62ad42e55 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 20 Nov 2017 16:07:16 -0800
Subject: [PATCH 0144/1225] Fix all tests under python/keras on windows
 (#14439)

* Add keras tests to cmake build.

* Make sure to close all file handles before cleanup in models_test.

* Try to fix callbacks_test failures on windows.

* Add a destructor for io_utils.HDF5Matrix class.

* Clear the FileWriterCache before deleting test folders in estiamator_test.

* Actually enable keras tests this time.

* Fix typo in models_test

* Make sure writer cache is clear before trying to delete files.

* Disable failing test cases on windows in data_utils_test.

* Disable failing test case on windows in training_test.

* Make sure the file writer is closed in callbacks_test.

* Remove the unused positional arg for on_train_end for Tensorboard callback.

* Skip empty rows in test_stop_training_csv.

* Skip tests that use use_multiprocessing=True on windows.

* Address review comments.

* Fix typo in callbacks_test.py.

* Update TensorBoard callback API to be consistent with the other callbacks.

* Revert callbacks_test to skip empty lines.

Opening with 'rU' does not seem to work.

* Add keras tests to cmake build.

* Make sure to close all file handles before cleanup in models_test.

* Try to fix callbacks_test failures on windows.

* Add a destructor for io_utils.HDF5Matrix class.

* Clear the FileWriterCache before deleting test folders in estiamator_test.

* Actually enable keras tests this time.

* Fix typo in models_test

* Make sure writer cache is clear before trying to delete files.

* Disable failing test cases on windows in data_utils_test.

* Disable failing test case on windows in training_test.

* Make sure the file writer is closed in callbacks_test.

* Remove the unused positional arg for on_train_end for Tensorboard callback.

* Skip empty rows in test_stop_training_csv.

* Skip tests that use use_multiprocessing=True on windows.

* Address review comments.

* Fix typo in callbacks_test.py.

* Update TensorBoard callback API to be consistent with the other callbacks.

* Revert callbacks_test to skip empty lines.

Opening with 'rU' does not seem to work.
---
 tensorflow/contrib/cmake/tf_tests.cmake       |  3 +-
 .../python/keras/_impl/keras/callbacks.py     |  2 +-
 .../keras/_impl/keras/callbacks_test.py       | 48 ++++++++++++++-----
 .../keras/_impl/keras/engine/training_test.py |  6 +++
 .../keras/_impl/keras/estimator_test.py       |  5 ++
 .../python/keras/_impl/keras/models_test.py   | 21 +++++---
 .../_impl/keras/utils/data_utils_test.py      |  7 +++
 .../keras/_impl/keras/utils/io_utils.py       | 11 +++--
 ...orflow.keras.callbacks.-tensor-board.pbtxt |  2 +-
 9 files changed, 78 insertions(+), 27 deletions(-)

diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 5d6ba9ca8d..ba9e307835 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -153,7 +153,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     "${tensorflow_source_dir}/tensorflow/contrib/data/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/factorization/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/image/*_test.py"
-    "${tensorflow_source_dir}/tensorflow/contrib/keras/python/keras/integration_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/python/kernel_tests/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/python/kernel_tests/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/stateless/python/kernel_tests/*_test.py"
@@ -171,7 +171,6 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       "${tensorflow_source_dir}/tensorflow/contrib/graph_editor/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/bayesflow/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/framework/*_test.py"
-      "${tensorflow_source_dir}/tensorflow/contrib/keras/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/distributions/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/learn/*_test.py"
     )
diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py
index 40a996a03f..16109b52b3 100644
--- a/tensorflow/python/keras/_impl/keras/callbacks.py
+++ b/tensorflow/python/keras/_impl/keras/callbacks.py
@@ -768,7 +768,7 @@ class TensorBoard(Callback):
       self.writer.add_summary(summary, epoch)
     self.writer.flush()
 
-  def on_train_end(self, _):
+  def on_train_end(self, logs=None):
     self.writer.close()
 
 
diff --git a/tensorflow/python/keras/_impl/keras/callbacks_test.py b/tensorflow/python/keras/_impl/keras/callbacks_test.py
index 97a650a992..9c17fbb4a7 100644
--- a/tensorflow/python/keras/_impl/keras/callbacks_test.py
+++ b/tensorflow/python/keras/_impl/keras/callbacks_test.py
@@ -19,16 +19,18 @@ from __future__ import division
 from __future__ import print_function
 
 import csv
-import multiprocessing
 import os
 import re
 import shutil
+import threading
+import unittest
 
 import numpy as np
 
 from tensorflow.python.keras._impl import keras
 from tensorflow.python.keras._impl.keras import testing_utils
 from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
 
 try:
   import h5py  # pylint:disable=g-import-not-at-top
@@ -498,7 +500,10 @@ class KerasCallbacksTest(test.TestCase):
       values = []
       with open(fp) as f:
         for x in csv.reader(f):
-          values.append(x)
+          # In windows, due to \r\n line ends we may end up reading empty lines
+          # after each line. Skip empty lines.
+          if x:
+            values.append(x)
       assert 'nan' in values[-1], 'The last epoch was not logged.'
 
   def test_TerminateOnNaN(self):
@@ -678,23 +683,38 @@ class KerasCallbacksTest(test.TestCase):
             batch_size=5)]
 
       # fit w/o validation data should raise ValueError if histogram_freq > 0
+      cbs = callbacks_factory(histogram_freq=1)
       with self.assertRaises(ValueError):
         model.fit(x_train, y_train, batch_size=BATCH_SIZE,
-                  callbacks=callbacks_factory(histogram_freq=1), epochs=3)
+                  callbacks=cbs, epochs=3)
+
+      for cb in cbs:
+        cb.on_train_end()
 
       # fit generator without validation data should raise ValueError if
       # histogram_freq > 0
+      cbs = callbacks_factory(histogram_freq=1)
       with self.assertRaises(ValueError):
         model.fit_generator(data_generator(True), len(x_train), epochs=2,
-                            callbacks=callbacks_factory(histogram_freq=1))
+                            callbacks=cbs)
+
+      for cb in cbs:
+        cb.on_train_end()
 
       # fit generator with validation data generator should raise ValueError if
       # histogram_freq > 0
+      cbs = callbacks_factory(histogram_freq=1)
       with self.assertRaises(ValueError):
         model.fit_generator(data_generator(True), len(x_train), epochs=2,
                             validation_data=data_generator(False),
                             validation_steps=1,
-                            callbacks=callbacks_factory(histogram_freq=1))
+                            callbacks=cbs)
+
+      for cb in cbs:
+        cb.on_train_end()
+
+      # Make sure file writer cache is clear to avoid failures during cleanup.
+      writer_cache.FileWriterCache.clear()
 
   def test_TensorBoard_multi_input_output(self):
     np.random.seed(1337)
@@ -767,6 +787,9 @@ class KerasCallbacksTest(test.TestCase):
                           callbacks=callbacks_factory(histogram_freq=1))
       assert os.path.isdir(filepath)
 
+  @unittest.skipIf(
+      os.name == 'nt',
+      'use_multiprocessing=True does not work on windows properly.')
   def test_LambdaCallback(self):
     with self.test_session():
       np.random.seed(1337)
@@ -789,14 +812,15 @@ class KerasCallbacksTest(test.TestCase):
 
       # Start an arbitrary process that should run during model
       # training and be terminated after training has completed.
+      e = threading.Event()
+
       def target():
-        while True:
-          pass
+        e.wait()
 
-      p = multiprocessing.Process(target=target)
-      p.start()
+      t = threading.Thread(target=target)
+      t.start()
       cleanup_callback = keras.callbacks.LambdaCallback(
-          on_train_end=lambda logs: p.terminate())
+          on_train_end=lambda logs: e.set())
 
       cbks = [cleanup_callback]
       model.fit(
@@ -807,8 +831,8 @@ class KerasCallbacksTest(test.TestCase):
           callbacks=cbks,
           epochs=5,
           verbose=0)
-      p.join()
-      assert not p.is_alive()
+      t.join()
+      assert not t.is_alive()
 
   def test_TensorBoard_with_ReduceLROnPlateau(self):
     with self.test_session():
diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py
index e2a06e8e77..17a26f978e 100644
--- a/tensorflow/python/keras/_impl/keras/engine/training_test.py
+++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py
@@ -18,6 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+import unittest
+
 import numpy as np
 
 from tensorflow.python.keras._impl import keras
@@ -783,6 +786,9 @@ class TestDynamicTrainability(test.TestCase):
 
 class TestGeneratorMethods(test.TestCase):
 
+  @unittest.skipIf(
+      os.name == 'nt',
+      'use_multiprocessing=True does not work on windows properly.')
   def test_generator_methods(self):
     arr_data = np.random.random((50, 2))
     arr_labels = np.random.random((50,))
diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py
index 1144aa3152..a7ea3b48a3 100644
--- a/tensorflow/python/keras/_impl/keras/estimator_test.py
+++ b/tensorflow/python/keras/_impl/keras/estimator_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.keras._impl.keras import testing_utils
 from tensorflow.python.keras._impl.keras.applications import mobilenet
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
 
 
 try:
@@ -132,6 +133,8 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
         tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir)
 
   def tearDown(self):
+    # Make sure nothing is stuck in limbo.
+    writer_cache.FileWriterCache.clear()
     if os.path.isdir(self._base_dir):
       gfile.DeleteRecursively(self._base_dir)
 
@@ -153,6 +156,8 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
         est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
         after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
         self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
+
+      writer_cache.FileWriterCache.clear()
       gfile.DeleteRecursively(self._config.model_dir)
 
   def test_evaluate(self):
diff --git a/tensorflow/python/keras/_impl/keras/models_test.py b/tensorflow/python/keras/_impl/keras/models_test.py
index 86acac4604..61938066b9 100644
--- a/tensorflow/python/keras/_impl/keras/models_test.py
+++ b/tensorflow/python/keras/_impl/keras/models_test.py
@@ -54,10 +54,11 @@ class TestModelSaving(test.TestCase):
       model.train_on_batch(x, y)
 
       out = model.predict(x)
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
 
       new_model = keras.models.load_model(fname)
+      os.close(fd)
       os.remove(fname)
 
       out2 = new_model.predict(x)
@@ -95,13 +96,14 @@ class TestModelSaving(test.TestCase):
       model.train_on_batch(x, y)
 
       out = model.predict(x)
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
 
       model = keras.models.load_model(
           fname,
           custom_objects={'CustomOp': CustomOp,
                           'custom_loss': custom_loss})
+      os.close(fd)
       os.remove(fname)
 
       out2 = model.predict(x)
@@ -125,10 +127,11 @@ class TestModelSaving(test.TestCase):
       model.train_on_batch(x, y)
 
       out = model.predict(x)
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
 
       model = keras.models.load_model(fname)
+      os.close(fd)
       os.remove(fname)
 
       out2 = model.predict(x)
@@ -144,9 +147,10 @@ class TestModelSaving(test.TestCase):
       model.add(keras.layers.Dense(3))
       model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
 
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
       model = keras.models.load_model(fname)
+      os.close(fd)
       os.remove(fname)
 
   def test_saving_with_tf_optimizer(self):
@@ -161,9 +165,10 @@ class TestModelSaving(test.TestCase):
                     optimizer=training_module.AdadeltaOptimizer(0.1),
                     metrics=['acc'])
 
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
       model = keras.models.load_model(fname)
+      os.close(fd)
       os.remove(fname)
 
   def test_saving_right_after_compilation(self):
@@ -177,9 +182,10 @@ class TestModelSaving(test.TestCase):
       model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
       model.model._make_train_function()
 
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
       model = keras.models.load_model(fname)
+      os.close(fd)
       os.remove(fname)
 
   def test_saving_lambda_numpy_array_arguments(self):
@@ -194,10 +200,11 @@ class TestModelSaving(test.TestCase):
     model = keras.models.Model(inputs, output)
     model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
 
-    _, fname = tempfile.mkstemp('.h5')
+    fd, fname = tempfile.mkstemp('.h5')
     keras.models.save_model(model, fname)
 
     model = keras.models.load_model(fname)
+    os.close(fd)
     os.remove(fname)
 
     self.assertAllClose(mean, model.layers[1].arguments['mu'])
diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
index 14b2f08442..47c5b4cff0 100644
--- a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
+++ b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
@@ -22,6 +22,7 @@ from itertools import cycle
 import os
 import tarfile
 import threading
+import unittest
 import zipfile
 
 import numpy as np
@@ -164,6 +165,9 @@ class TestEnqueuers(test.TestCase):
     self.assertEqual(len(set(acc) - set(range(100))), 0)
     enqueuer.stop()
 
+  @unittest.skipIf(
+      os.name == 'nt',
+      'use_multiprocessing=True does not work on windows properly.')
   def test_generator_enqueuer_processes(self):
     enqueuer = keras.utils.data_utils.GeneratorEnqueuer(
         create_generator_from_sequence_pcs(TestSequence([3, 200, 200, 3])),
@@ -185,6 +189,9 @@ class TestEnqueuers(test.TestCase):
     with self.assertRaises(StopIteration):
       next(gen_output)
 
+  @unittest.skipIf(
+      os.name == 'nt',
+      'use_multiprocessing=True does not work on windows properly.')
   def test_generator_enqueuer_fail_processes(self):
     enqueuer = keras.utils.data_utils.GeneratorEnqueuer(
         create_generator_from_sequence_pcs(FaultSequence()),
diff --git a/tensorflow/python/keras/_impl/keras/utils/io_utils.py b/tensorflow/python/keras/_impl/keras/utils/io_utils.py
index 1c8299c27d..2003e19a0a 100644
--- a/tensorflow/python/keras/_impl/keras/utils/io_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/io_utils.py
@@ -63,11 +63,11 @@ class HDF5Matrix(object):
                         'HDF5 and h5py installed.')
 
     if datapath not in list(self.refs.keys()):
-      f = h5py.File(datapath)
-      self.refs[datapath] = f
+      self._f = h5py.File(datapath)
+      self.refs[datapath] = self._f
     else:
-      f = self.refs[datapath]
-    self.data = f[dataset]
+      self._f = self.refs[datapath]
+    self.data = self._f[dataset]
     self.start = start
     if end is None:
       self.end = self.data.shape[0]
@@ -78,6 +78,9 @@ class HDF5Matrix(object):
   def __len__(self):
     return self.end - self.start
 
+  def  __del__(self):
+    self._f.close()
+
   def __getitem__(self, key):
     if isinstance(key, slice):
       start, stop = key.start, key.stop
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt
index 6620a9d308..7de4008c45 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt
@@ -29,7 +29,7 @@ tf_class {
   }
   member_method {
     name: "on_train_end"
-    argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "set_model"
-- 
GitLab


From 1ac6383d976f0b0b099a340820c36b825126fc9c Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 20 Nov 2017 16:10:19 -0800
Subject: [PATCH 0145/1225] Add a fast path for batching strings when it is
 possible to move them.

This optimization applies to any TensorFlow queues that contain
strings, plus the `Dataset.batch()` transformation.

PiperOrigin-RevId: 176432783
---
 tensorflow/contrib/makefile/tf_op_files.txt   |  1 +
 tensorflow/core/framework/tensor.h            |  6 ++
 tensorflow/core/kernels/BUILD                 | 22 ++++-
 tensorflow/core/kernels/barrier_ops.cc        |  8 +-
 tensorflow/core/kernels/batch_dataset_op.cc   | 44 +---------
 tensorflow/core/kernels/batch_util.cc         | 80 +++++++++++++++++++
 tensorflow/core/kernels/batch_util.h          | 35 ++++++++
 tensorflow/core/kernels/fifo_queue.cc         |  5 +-
 tensorflow/core/kernels/padding_fifo_queue.cc |  9 ++-
 tensorflow/core/kernels/priority_queue.cc     |  5 +-
 tensorflow/core/kernels/queue_base.cc         | 49 +-----------
 tensorflow/core/kernels/queue_base.h          |  3 +
 .../core/kernels/random_shuffle_queue_op.cc   |  5 +-
 13 files changed, 173 insertions(+), 99 deletions(-)
 create mode 100644 tensorflow/core/kernels/batch_util.cc
 create mode 100644 tensorflow/core/kernels/batch_util.h

diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index fbcda0421e..97351b2c51 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -284,3 +284,4 @@ tensorflow/core/kernels/spacetobatch_op.cc
 tensorflow/core/kernels/batchtospace_op.cc
 tensorflow/core/kernels/warn_about_ints.cc
 tensorflow/core/kernels/segment_reduction_ops.cc
+tensorflow/core/kernels/batch_util.cc
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index 3a7df6a478..c195623b27 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -42,6 +42,9 @@ class TensorCApi;
 class TensorDescription;
 class TensorProto;
 class VariantTensorData;
+namespace batch_util {
+Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index);
+}  // namespace batch_util
 
 /// @ingroup core
 /// Represents an n-dimensional array of values.
@@ -487,6 +490,9 @@ class Tensor {
   template <typename Device, typename T>
   friend Status PrepareToUpdateVariable(
       OpKernelContext* ctx, Tensor* tensor);  // For access to RefCountIsOne().
+  friend Status batch_util::CopyElementToSlice(
+      Tensor element, Tensor* parent,
+      int64 index);                // For access to RefCountIsOne().
   friend class NumpyTensorBuffer;  // For access to the private constructor
                                    // taking the buffer.
 
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 39e8e499cd..3cab6acc42 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -329,6 +329,7 @@ cc_library(
     srcs = ["queue_base.cc"],
     hdrs = ["queue_base.h"],
     deps = [
+        ":batch_util",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -350,6 +351,7 @@ cc_library(
     srcs = ["priority_queue.cc"],
     hdrs = ["priority_queue.h"],
     deps = [
+        ":batch_util",
         ":queue_base",
         ":typed_queue",
         "//tensorflow/core:framework",
@@ -1562,7 +1564,10 @@ tf_kernel_library(
 tf_kernel_library(
     name = "random_shuffle_queue_op",
     prefix = "random_shuffle_queue_op",
-    deps = DATA_FLOW_DEPS + ["//tensorflow/core:protos_all_cc"],
+    deps = DATA_FLOW_DEPS + [
+        ":batch_util",
+        "//tensorflow/core:protos_all_cc",
+    ],
 )
 
 tf_kernel_library(
@@ -1712,6 +1717,7 @@ cc_library(
     hdrs = ["fifo_queue.h"],
     visibility = ["//visibility:private"],
     deps = [
+        ":batch_util",
         ":queue_base",
         ":typed_queue",
         "//tensorflow/core:framework",
@@ -1726,6 +1732,7 @@ cc_library(
     hdrs = ["padding_fifo_queue.h"],
     visibility = ["//visibility:private"],
     deps = [
+        ":batch_util",
         ":fifo_queue",
         ":queue_base",
         ":typed_queue",
@@ -4343,6 +4350,7 @@ filegroup(
     name = "mobile_srcs",
     srcs = [
         "avgpooling_op.h",
+        "batch_util.h",
         "bounds_check.h",
         "cwise_ops.h",
         "cwise_ops_common.h",
@@ -4654,6 +4662,7 @@ filegroup(
 filegroup(
     name = "android_extended_ops_group2",
     srcs = [
+        "batch_util.cc",
         "batchtospace_op.cc",
         "ctc_decoder_ops.cc",
         "decode_bmp_op.cc",
@@ -5762,6 +5771,16 @@ tf_kernel_library(
     ],
 )
 
+cc_library(
+    name = "batch_util",
+    srcs = ["batch_util.cc"],
+    hdrs = ["batch_util.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "dataset",
     srcs = ["dataset.cc"],
@@ -5821,6 +5840,7 @@ tf_kernel_library(
     name = "batch_dataset_op",
     srcs = ["batch_dataset_op.cc"],
     deps = [
+        ":batch_util",
         ":dataset",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
diff --git a/tensorflow/core/kernels/barrier_ops.cc b/tensorflow/core/kernels/barrier_ops.cc
index 3b880a9635..d0bbea9fe2 100644
--- a/tensorflow/core/kernels/barrier_ops.cc
+++ b/tensorflow/core/kernels/barrier_ops.cc
@@ -161,9 +161,11 @@ class Barrier : public ResourceBase {
         component_shape.InsertDim(0, insertion_size);
         Tensor component(ready_tuples[0][i].dtype(), component_shape);
         for (int b = 0; b < insertion_size; ++b) {
-          OP_REQUIRES_OK_ASYNC(ctx, QueueBase::CopyElementToSlice(
-                                        ready_tuples[b][i], &component, b),
-                               callback);
+          OP_REQUIRES_OK_ASYNC(
+              ctx,
+              batch_util::CopyElementToSlice(std::move(ready_tuples[b][i]),
+                                             &component, b),
+              callback);
         }
         insert_tuple.push_back(component);
       }
diff --git a/tensorflow/core/kernels/batch_dataset_op.cc b/tensorflow/core/kernels/batch_dataset_op.cc
index 46412a554b..3dec4f71d8 100644
--- a/tensorflow/core/kernels/batch_dataset_op.cc
+++ b/tensorflow/core/kernels/batch_dataset_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/batch_util.h"
 
 namespace tensorflow {
 
@@ -92,44 +93,6 @@ class BatchDatasetOp : public UnaryDatasetOpKernel {
     }
 
    private:
-    // Copies element into the index^th slice of parent (in the 0th dimension).
-    //
-    // TODO(mrry): Reconcile this method with the similar method in
-    // the queue implementation.
-    template <typename T>
-    static Status HandleElementToSlice(const Tensor& element, Tensor* parent,
-                                       int64 index) {
-      if (element.NumElements() !=
-          (parent->NumElements() / parent->dim_size(0))) {
-        TensorShape chip_shape = parent->shape();
-        chip_shape.RemoveDim(0);
-        return errors::InvalidArgument(
-            "HandleElementToSlice Cannot copy slice: number of elements does "
-            "not match. Shapes are: [element]: ",
-            element.shape().DebugString(),
-            ", [parent slice]: ", chip_shape.DebugString());
-      }
-      auto parent_as_matrix = parent->flat_outer_dims<T>();
-      parent_as_matrix.chip(index, 0) = element.flat<T>();
-      return Status::OK();
-    }
-
-    // Copies element into the index^th slice of parent (in the 0th dimension).
-    static Status CopyElementToSlice(const Tensor& element, Tensor* parent,
-                                     int64 index) {
-#define HANDLE_TYPE(T)                                      \
-  case DataTypeToEnum<T>::value: {                          \
-    return HandleElementToSlice<T>(element, parent, index); \
-  }
-
-      switch (element.dtype()) {
-        TF_CALL_DATASET_TYPES(HANDLE_TYPE);
-#undef HANDLE_TYPE
-        default:
-          return errors::Unimplemented(
-              "CopyElementToSlice Unhandled data type: ", element.dtype());
-      }
-    }
 
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -188,8 +151,9 @@ class BatchDatasetOp : public UnaryDatasetOpKernel {
           // Build the output tuple component by copying one slice
           // from each input element in the batch.
           for (size_t i = 0; i < num_batch_elements; ++i) {
-            TF_RETURN_IF_ERROR(CopyElementToSlice(
-                batch_elements[i][component_index], &batch_component, i));
+            TF_RETURN_IF_ERROR(batch_util::CopyElementToSlice(
+                std::move(batch_elements[i][component_index]), &batch_component,
+                i));
           }
           out_tensors->emplace_back(std::move(batch_component));
         }
diff --git a/tensorflow/core/kernels/batch_util.cc b/tensorflow/core/kernels/batch_util.cc
new file mode 100644
index 0000000000..298e156579
--- /dev/null
+++ b/tensorflow/core/kernels/batch_util.cc
@@ -0,0 +1,80 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/batch_util.h"
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace batch_util {
+
+namespace {
+
+// Copies element into the index^th slice of parent (in the 0th dimension).
+template <typename T>
+Status HandleElementToSlice(Tensor element, Tensor* parent, int64 index,
+                            bool /* can_move */) {
+  parent->flat_outer_dims<T>().chip(index, 0) = element.flat<T>();
+  return Status::OK();
+}
+
+template <>
+Status HandleElementToSlice<string>(Tensor element, Tensor* parent, int64 index,
+                                    bool can_move) {
+  auto parent_as_matrix = parent->flat_outer_dims<string>();
+  auto element_flat = element.flat<string>();
+  if (can_move) {
+    for (int64 i = 0; i < element.NumElements(); ++i) {
+      parent_as_matrix(index, i) = std::move(element_flat(i));
+    }
+  } else {
+    parent_as_matrix.chip(index, 0) = element_flat;
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
+  if (element.NumElements() != (parent->NumElements() / parent->dim_size(0))) {
+    TensorShape chip_shape = parent->shape();
+    chip_shape.RemoveDim(0);
+    return errors::InvalidArgument(
+        "HandleElementToSlice Cannot copy slice: number of elements does "
+        "not match. Shapes are: [element]: ",
+        element.shape().DebugString(),
+        ", [parent slice]: ", chip_shape.DebugString());
+  }
+  bool can_move = element.RefCountIsOne();
+#define HANDLE_TYPE(T)                                                \
+  case DataTypeToEnum<T>::value: {                                    \
+    return HandleElementToSlice<T>(std::move(element), parent, index, \
+                                   can_move);                         \
+  }
+
+  switch (element.dtype()) {
+    TF_CALL_ALL_TYPES(HANDLE_TYPE);
+    TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
+#undef HANDLE_TYPE
+    default:
+      return errors::Unimplemented("CopyElementToSlice Unhandled data type: ",
+                                   element.dtype());
+  }
+}
+
+}  // namespace batch_util
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/batch_util.h b/tensorflow/core/kernels/batch_util.h
new file mode 100644
index 0000000000..065011a699
--- /dev/null
+++ b/tensorflow/core/kernels/batch_util.h
@@ -0,0 +1,35 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCH_UTIL_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCH_UTIL_H_
+
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+namespace batch_util {
+
+// Copies element into the index^th slice of parent (in the 0th dimension).
+//
+// NOTE(mrry): The `element` argument is taken by value. Use `std::move()`
+// to move the `element` argument into this function, and the implementation
+// may be able to optimize the copy to a move. This is particularly important
+// for DT_STRING tensors.
+Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index);
+
+}  // namespace batch_util
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCH_UTIL_H_
diff --git a/tensorflow/core/kernels/fifo_queue.cc b/tensorflow/core/kernels/fifo_queue.cc
index ea86b04762..9fd82e2168 100644
--- a/tensorflow/core/kernels/fifo_queue.cc
+++ b/tensorflow/core/kernels/fifo_queue.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/batch_util.h"
 #include "tensorflow/core/kernels/fifo_queue.h"
 #include "tensorflow/core/kernels/queue_base.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -329,8 +330,8 @@ void FIFOQueue::TryDequeueMany(int num_elements, OpKernelContext* ctx,
                   const int64 index = attempt->tuple[0].dim_size(0) -
                                       attempt->elements_requested;
                   for (int i = 0; i < num_components(); ++i) {
-                    attempt->context->SetStatus(CopyElementToSlice(
-                        tuple[i], &attempt->tuple[i], index));
+                    attempt->context->SetStatus(batch_util::CopyElementToSlice(
+                        std::move(tuple[i]), &attempt->tuple[i], index));
                     if (!attempt->context->status().ok()) return kComplete;
                   }
                   tuple.clear();
diff --git a/tensorflow/core/kernels/padding_fifo_queue.cc b/tensorflow/core/kernels/padding_fifo_queue.cc
index d0f7683f3d..9d35ecb66c 100644
--- a/tensorflow/core/kernels/padding_fifo_queue.cc
+++ b/tensorflow/core/kernels/padding_fifo_queue.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/batch_util.h"
 #include "tensorflow/core/kernels/padding_fifo_queue.h"
 #include "tensorflow/core/kernels/queue_base.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -156,7 +157,7 @@ void PaddingFIFOQueue::TryDequeueMany(int num_elements, OpKernelContext* ctx,
                 // Finished.  Allocate attempt->tuple and
                 // copy from attempt->tuples to attempt->tuple.
                 attempt->tuple.reserve(num_components());
-                const std::vector<Tuple>& tuples = attempt->tuples;
+                std::vector<Tuple>& tuples = attempt->tuples;
 
                 std::vector<bool> dynamic_shape;
                 const int64 batch_size = tuples.size();
@@ -206,8 +207,10 @@ void PaddingFIFOQueue::TryDequeueMany(int num_elements, OpKernelContext* ctx,
                       attempt->context->SetStatus(CopyElementToLargerSlice(
                           tuples[index][i], &attempt->tuple[i], index));
                     } else {
-                      attempt->context->SetStatus(CopyElementToSlice(
-                          tuples[index][i], &attempt->tuple[i], index));
+                      attempt->context->SetStatus(
+                          batch_util::CopyElementToSlice(
+                              std::move(tuples[index][i]), &attempt->tuple[i],
+                              index));
                     }
                     if (!attempt->context->status().ok()) return kComplete;
                   }
diff --git a/tensorflow/core/kernels/priority_queue.cc b/tensorflow/core/kernels/priority_queue.cc
index 4c406fc1ed..5c487edbe3 100644
--- a/tensorflow/core/kernels/priority_queue.cc
+++ b/tensorflow/core/kernels/priority_queue.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/batch_util.h"
 #include "tensorflow/core/kernels/priority_queue.h"
 #include "tensorflow/core/kernels/queue_base.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -358,8 +359,8 @@ void PriorityQueue::TryDequeueMany(int num_elements, OpKernelContext* ctx,
               const int index =
                   attempt->tuple[0].dim_size(0) - attempt->elements_requested;
               for (int i = 0; i < num_components(); ++i) {
-                attempt->context->SetStatus(
-                    CopyElementToSlice(tuple[i], &attempt->tuple[i], index));
+                attempt->context->SetStatus(batch_util::CopyElementToSlice(
+                    std::move(tuple[i]), &attempt->tuple[i], index));
                 if (!attempt->context->status().ok()) return kComplete;
               }
               tuple.clear();
diff --git a/tensorflow/core/kernels/queue_base.cc b/tensorflow/core/kernels/queue_base.cc
index 8a9af39e1f..6c91d0cd94 100644
--- a/tensorflow/core/kernels/queue_base.cc
+++ b/tensorflow/core/kernels/queue_base.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/batch_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
@@ -46,25 +47,6 @@ Status HandleSliceToElement(const Tensor& parent, Tensor* element,
   return Status::OK();
 }
 
-template <DataType DT>
-Status HandleElementToSlice(const Tensor& element, Tensor* parent, int index) {
-  typedef typename EnumToDataType<DT>::Type T;
-  DCHECK_NE(parent->dim_size(0), 0);
-  DCHECK_GE(index, 0);
-  if (element.NumElements() != (parent->NumElements() / parent->dim_size(0))) {
-    TensorShape chip_shape = parent->shape();
-    chip_shape.RemoveDim(0);
-    return errors::Internal(
-        "HandleElementToSlice Cannot copy slice: number of elements does not "
-        "match.  Shapes are: [element]: ",
-        element.shape().DebugString(), ", [parent slice]: ",
-        chip_shape.DebugString());
-  }
-  auto parent_as_matrix = parent->flat_outer_dims<T>();
-  parent_as_matrix.chip(index, 0) = element.flat<T>();
-  return Status::OK();
-}
-
 }  // namespace
 
 QueueBase::QueueBase(int32 capacity, const DataTypeVector& component_dtypes,
@@ -382,35 +364,10 @@ Status QueueBase::CopySliceToElement(const Tensor& parent, Tensor* element,
                                parent.dtype());
 }
 
-// Static method
+/* static */
 Status QueueBase::CopyElementToSlice(const Tensor& element, Tensor* parent,
                                      int64 index) {
-#define HANDLE_TYPE(DT)                                                   \
-  if (element.dtype() == DT) {                                            \
-    TF_RETURN_IF_ERROR(HandleElementToSlice<DT>(element, parent, index)); \
-    return Status::OK();                                                  \
-  }
-  HANDLE_TYPE(DT_FLOAT);
-  HANDLE_TYPE(DT_HALF);
-  HANDLE_TYPE(DT_DOUBLE);
-  HANDLE_TYPE(DT_INT32);
-  HANDLE_TYPE(DT_UINT8);
-  HANDLE_TYPE(DT_INT16);
-  HANDLE_TYPE(DT_INT8);
-  HANDLE_TYPE(DT_STRING);
-  HANDLE_TYPE(DT_COMPLEX64);
-  HANDLE_TYPE(DT_COMPLEX128);
-  HANDLE_TYPE(DT_INT64);
-  HANDLE_TYPE(DT_BOOL);
-  HANDLE_TYPE(DT_QINT8);
-  HANDLE_TYPE(DT_QUINT8);
-  HANDLE_TYPE(DT_QINT32);
-  HANDLE_TYPE(DT_QINT16);
-  HANDLE_TYPE(DT_QUINT16);
-  HANDLE_TYPE(DT_UINT16);
-#undef HANDLE_TYPE
-  return errors::Unimplemented("CopyElementToSlice Unhandled data type: ",
-                               element.dtype());
+  return batch_util::CopyElementToSlice(element, parent, index);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/queue_base.h b/tensorflow/core/kernels/queue_base.h
index c101fb3579..5fb1c92f94 100644
--- a/tensorflow/core/kernels/queue_base.h
+++ b/tensorflow/core/kernels/queue_base.h
@@ -79,6 +79,9 @@ class QueueBase : public QueueInterface {
                                    int64 index);
 
   // Copies element into the index^th slice (in the first dimension) of parent.
+  // NOTE(mrry): This method is deprecated. Use
+  // `tensorflow::batch_util::CopySliceToElement()` defined in
+  // "./batch_util.h" instead.
   static Status CopyElementToSlice(const Tensor& element, Tensor* parent,
                                    int64 index);
 
diff --git a/tensorflow/core/kernels/random_shuffle_queue_op.cc b/tensorflow/core/kernels/random_shuffle_queue_op.cc
index 30bbbd4aed..7a40e9ddf2 100644
--- a/tensorflow/core/kernels/random_shuffle_queue_op.cc
+++ b/tensorflow/core/kernels/random_shuffle_queue_op.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/batch_util.h"
 #include "tensorflow/core/kernels/queue_op.h"
 #include "tensorflow/core/kernels/typed_queue.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -407,8 +408,8 @@ void RandomShuffleQueue::TryDequeueMany(int num_elements, OpKernelContext* ctx,
                   const int index = attempt->tuple[0].dim_size(0) -
                                     attempt->elements_requested;
                   for (int i = 0; i < num_components(); ++i) {
-                    attempt->context->SetStatus(CopyElementToSlice(
-                        tuple[i], &attempt->tuple[i], index));
+                    attempt->context->SetStatus(batch_util::CopyElementToSlice(
+                        std::move(tuple[i]), &attempt->tuple[i], index));
                     if (!attempt->context->status().ok()) return kComplete;
                   }
                   tuple.clear();
-- 
GitLab


From 46b383781f731ff3dab757e53278874780729167 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 16:16:55 -0800
Subject: [PATCH 0146/1225] Hlo parser: make the window attribute optional for
 other ops.

We printed the window attribute only when it's not empty, regardless of the opcode. Before this change, window is only optional for the convolution op, but empty window could happen as well to the other ops (reduce-window, select-and-scatter).

PiperOrigin-RevId: 176433808
---
 .../compiler/xla/tools/parser/hlo_parser.cc   | 10 ++++-
 .../xla/tools/parser/hlo_parser_test.cc       | 45 +++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 6f5c7b8d0f..a102bdc3aa 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -549,13 +549,16 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
     case HloOpcode::kReduceWindow: {
       optional<HloComputation*> reduce_computation;
       optional<Window> window;
-      attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window};
+      attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window};
       attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation,
                            &reduce_computation};
       if (!ParseOperands(&operands, /*expected_size=*/2) ||
           !ParseAttributes(attrs)) {
         return false;
       }
+      if (!window) {
+        window.emplace();
+      }
       instruction = builder->AddInstruction(HloInstruction::CreateReduceWindow(
           shape, /*operand=*/operands[0], /*init_value=*/operands[1], *window,
           *reduce_computation));
@@ -647,11 +650,14 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
       optional<HloComputation*> scatter;
       attrs["scatter"] = {/*required=*/true, AttrTy::kHloComputation, &scatter};
       optional<Window> window;
-      attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window};
+      attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window};
       if (!ParseOperands(&operands, /*expected_size=*/3) ||
           !ParseAttributes(attrs)) {
         return false;
       }
+      if (!window) {
+        window.emplace();
+      }
       instruction =
           builder->AddInstruction(HloInstruction::CreateSelectAndScatter(
               shape, /*operand=*/operands[0], *select, *window,
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index b67b4b816d..e56f120def 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -310,6 +310,25 @@ ENTRY %R4UnitWindow.v3 (operand: f32[13,12,8,15]) -> f32[13,3,8,15] {
   ROOT %reduce-window = f32[13,3,8,15]{0,3,2,1} reduce-window(f32[13,12,8,15]{0,3,2,1} %operand, f32[] %constant), window={size=1x1x7x1 stride=1x4x1x1 pad=0_0x0_0x3_3x0_0}, to_apply=%add_F32.v3
 }
 
+)"
+},
+// reduce window on scalar
+{
+"ReduceWindowScalar",
+R"(HloModule reduce_window_scalar:
+
+%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] {
+  %lhs = f32[] parameter(0)
+  %rhs = f32[] parameter(1)
+  ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs)
+}
+
+ENTRY %R4UnitWindowScalar () -> f32[] {
+  %constant = f32[] constant(42)
+  %constant.1 = f32[] constant(1)
+  ROOT %reduce-window = f32[] reduce-window(f32[] %constant, f32[] %constant.1), to_apply=%add_F32.v3
+}
+
 )"
 },
 // convolution
@@ -426,6 +445,32 @@ ENTRY %R4F32OverlapSmall.v4 () -> f32[4,5,1,1] {
   ROOT %select-and-scatter = f32[4,5,1,1]{3,2,1,0} select-and-scatter(f32[4,5,1,1]{3,2,1,0} %constant, f32[2,2,1,1]{3,2,1,0} %constant.1, f32[] %constant.2), window={size=2x3x1x1 stride=2x2x1x1}, select=%ge_F32.v3, scatter=%add_F32.v3
 }
 
+)"
+},
+// select and scatter on scalar
+{
+"SelectAndScatterScalar",
+R"(HloModule select_and_scatter_scalar:
+
+%ge_F32.v3 (lhs: f32[], rhs: f32[]) -> pred[] {
+  %lhs = f32[] parameter(0)
+  %rhs = f32[] parameter(1)
+  ROOT %greater-than-or-equal-to = pred[] greater-than-or-equal-to(f32[] %lhs, f32[] %rhs)
+}
+
+%add_F32.v3 (lhs.1: f32[], rhs.1: f32[]) -> f32[] {
+  %lhs.1 = f32[] parameter(0)
+  %rhs.1 = f32[] parameter(1)
+  ROOT %add = f32[] add(f32[] %lhs.1, f32[] %rhs.1)
+}
+
+ENTRY %SelectAndScatterScalar () -> f32[] {
+  %constant = f32[] constant(42)
+  %constant.1 = f32[] constant(1)
+  %constant.2 = f32[] constant(2)
+  ROOT %select-and-scatter = f32[] select-and-scatter(f32[] %constant, f32[] %constant.1, f32[] %constant.2), select=%ge_F32.v3, scatter=%add_F32.v3
+}
+
 )"
 },
 // slice
-- 
GitLab


From 138b00934a436b9207afde330731a49f2187ea9d Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Mon, 20 Nov 2017 16:18:31 -0800
Subject: [PATCH 0147/1225] Rename GPUTracer to DeviceTracer.

PiperOrigin-RevId: 176434090
---
 tensorflow/core/BUILD                         | 31 ++++-----
 .../core/common_runtime/direct_session.cc     | 29 ++++-----
 tensorflow/core/debug/BUILD                   |  2 +-
 .../core/platform/default/build_config.bzl    |  8 +--
 .../{gpu_tracer.cc => device_tracer.cc}       | 64 +++++++++----------
 .../{gpu_tracer.h => device_tracer.h}         | 32 +++++-----
 ...u_tracer_test.cc => device_tracer_test.cc} | 44 ++++++-------
 .../ci_build/windows/bazel/bazel_test_lib.sh  |  2 +-
 8 files changed, 104 insertions(+), 108 deletions(-)
 rename tensorflow/core/platform/default/{gpu_tracer.cc => device_tracer.cc} (93%)
 rename tensorflow/core/platform/{gpu_tracer.h => device_tracer.h} (69%)
 rename tensorflow/core/platform/{gpu_tracer_test.cc => device_tracer_test.cc} (84%)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index ee14078496..d71f314e11 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -127,9 +127,9 @@ load(
     "tf_additional_verbs_lib_defines",
     "tf_additional_mpi_lib_defines",
     "tf_additional_gdr_lib_defines",
-    "tf_additional_gpu_tracer_srcs",
-    "tf_additional_gpu_tracer_deps",
-    "tf_additional_gpu_tracer_cuda_deps",
+    "tf_additional_device_tracer_srcs",
+    "tf_additional_device_tracer_deps",
+    "tf_additional_device_tracer_cuda_deps",
     "tf_pyclif_proto_library",
     "tf_jspb_proto_library",
     "tf_nano_proto_library",
@@ -1461,7 +1461,7 @@ cc_library(
             "lib/jpeg/**/*",
             "platform/**/env_time.cc",
             "platform/**/cuda_libdevice_path.cc",
-            "platform/**/gpu_tracer.cc",
+            "platform/**/device_tracer.cc",
             "platform/variant_coding.cc",
             "platform/**/variant_cord_coding.cc",
         ],
@@ -1472,7 +1472,7 @@ cc_library(
             "platform/**/cuda_libdevice_path.cc",
             "platform/**/stream_executor.h",
             "platform/**/env_time.cc",
-            "platform/**/gpu_tracer.cc",
+            "platform/**/device_tracer.cc",
             "platform/variant_coding.cc",
             "platform/**/variant_cord_coding.cc",
         ] +
@@ -2085,12 +2085,9 @@ tf_cuda_library(
         "util/env_var.h",
     ],
     copts = tf_copts(),
-    cuda_deps = [
-        ":gpu_tracer",
-    ],
-    linkstatic = 1,
     deps = [
         ":core_cpu_internal",
+        ":device_tracer",
         ":framework",
         ":lib",
         ":lib_internal",
@@ -2122,18 +2119,18 @@ cc_library(
 )
 
 tf_cuda_library(
-    name = "gpu_tracer",
-    srcs = tf_additional_gpu_tracer_srcs(),
+    name = "device_tracer",
+    srcs = tf_additional_device_tracer_srcs(),
     hdrs = [
-        "platform/gpu_tracer.h",
+        "platform/device_tracer.h",
     ],
     copts = tf_copts(),
-    cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_gpu_tracer_cuda_deps(),
+    cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(),
     deps = [
         ":core_cpu_internal",
         ":lib",
         ":protos_all_cc",
-    ] + tf_additional_gpu_tracer_deps(),
+    ] + tf_additional_device_tracer_deps(),
 )
 
 GPU_RUNTIME_HEADERS = [
@@ -3401,9 +3398,9 @@ tf_cc_test(
 )
 
 tf_cc_test_gpu(
-    name = "gpu_tracer_test",
+    name = "device_tracer_test",
     size = "small",
-    srcs = ["platform/gpu_tracer_test.cc"],
+    srcs = ["platform/device_tracer_test.cc"],
     args = ["--heap_check=local"],
     linkstatic = tf_kernel_tests_linkstatic(),
     tags = tf_cuda_tests_tags() + ["nomac"],
@@ -3411,12 +3408,12 @@ tf_cc_test_gpu(
         ":all_kernels",
         ":core_cpu",
         ":core_cpu_internal",
+        ":device_tracer",
         ":direct_session",
         ":direct_session_internal",
         ":framework",
         ":framework_internal",
         ":gpu_runtime",
-        ":gpu_tracer",
         ":lib",
         ":lib_internal",
         ":protos_all_cc",
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 2f57164dcd..6dfe17405c 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -54,15 +54,13 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/platform/device_tracer.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/device_name_utils.h"
 #include "tensorflow/core/util/env_var.h"
 
-#if GOOGLE_CUDA
-#include "tensorflow/core/platform/gpu_tracer.h"
-#endif  // GOOGLE_CUDA
 
 namespace tensorflow {
 
@@ -555,15 +553,19 @@ Status DirectSession::Run(const RunOptions& run_options,
     args.stats_collector = run_state.collector.get();
   }
 
-#if GOOGLE_CUDA
-  std::unique_ptr<GPUTracer> tracer;
+  std::unique_ptr<DeviceTracer> tracer;
   if (run_options.trace_level() >= RunOptions::HARDWARE_TRACE) {
-    tracer = CreateGPUTracer();
-    // tracer will be NULL on non-GPU platforms.
-    // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
-    if (tracer) tracer->Start().IgnoreError();
+    tracer = CreateDeviceTracer();
+    // tracer may be NULL on platforms without accelerators.
+    if (tracer) {
+      Status s = tracer->Start();
+      if (!s.ok()) {
+        run_state.executors_done.Notify();
+        delete barrier;
+        return s;
+      }
+    }
   }
-#endif  // GOOGLE_CUDA
 
   // Register this step with session's cancellation manager, so that
   // `Session::Close()` will cancel the step.
@@ -598,13 +600,10 @@ Status DirectSession::Run(const RunOptions& run_options,
     run_state.status.Update(errors::Cancelled("Run call was cancelled"));
   }
 
-#if GOOGLE_CUDA
   if (tracer) {
-    // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
-    tracer->Stop().IgnoreError();
-    tracer->Collect(args.stats_collector).IgnoreError();
+    TF_RETURN_IF_ERROR(tracer->Stop());
+    TF_RETURN_IF_ERROR(tracer->Collect(args.stats_collector));
   }
-#endif  // GOOGLE_CUDA
 
   {
     mutex_lock l(run_state.mu_);
diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD
index 525f96a3de..6d796768de 100644
--- a/tensorflow/core/debug/BUILD
+++ b/tensorflow/core/debug/BUILD
@@ -89,9 +89,9 @@ tf_cuda_library(
     deps = [
         ":debug",
         "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:device_tracer",
         "//tensorflow/core:direct_session_internal",
         "//tensorflow/core:framework",
-        "//tensorflow/core:gpu_tracer",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 5eeb861bdd..0f8cf8f122 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -399,13 +399,13 @@ def tf_env_time_srcs():
 def tf_additional_cupti_wrapper_deps():
   return ["//tensorflow/core/platform/default/gpu:cupti_wrapper"]
 
-def tf_additional_gpu_tracer_srcs():
-  return ["platform/default/gpu_tracer.cc"]
+def tf_additional_device_tracer_srcs():
+  return ["platform/default/device_tracer.cc"]
 
-def tf_additional_gpu_tracer_cuda_deps():
+def tf_additional_device_tracer_cuda_deps():
   return []
 
-def tf_additional_gpu_tracer_deps():
+def tf_additional_device_tracer_deps():
   return []
 
 def tf_additional_libdevice_data():
diff --git a/tensorflow/core/platform/default/gpu_tracer.cc b/tensorflow/core/platform/default/device_tracer.cc
similarity index 93%
rename from tensorflow/core/platform/default/gpu_tracer.cc
rename to tensorflow/core/platform/default/device_tracer.cc
index d6489f2f00..f4b0f16393 100644
--- a/tensorflow/core/platform/default/gpu_tracer.cc
+++ b/tensorflow/core/platform/default/device_tracer.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/platform/gpu_tracer.h"
+#include "tensorflow/core/platform/device_tracer.h"
 
 #if GOOGLE_CUDA
 
@@ -101,7 +101,7 @@ const char *getActivityOverheadKindString(CUpti_ActivityOverheadKind kind) {
 }  // namespace
 
 namespace tensorflow {
-namespace gputracer {
+namespace devicetracer {
 
 // Forward declaration.
 class CUPTIManager;
@@ -286,14 +286,14 @@ CUPTIManager *GetCUPTIManager() {
 // for the duration of the CUPTI API callback.
 TF_STATIC_THREAD_LOCAL_POD(const char *, tls_current_annotation);
 
-class GPUTracerImpl : public GPUTracer,
-                      public CUPTIClient,
-                      public port::Tracing::Engine {
+class DeviceTracerImpl : public DeviceTracer,
+                         public CUPTIClient,
+                         public port::Tracing::Engine {
  public:
-  GPUTracerImpl();
-  ~GPUTracerImpl() override;
+  DeviceTracerImpl();
+  ~DeviceTracerImpl() override;
 
-  // GPUTracer interface:
+  // DeviceTracer interface:
   Status Start() override;
   Status Stop() override;
   Status Collect(StepStatsCollector *collector) override;
@@ -348,7 +348,7 @@ class GPUTracerImpl : public GPUTracer,
   };
 
   // This is the subscriber callback which is invoked directly by CUPTI.
-  // The 'userdata' argument will be a pointer to the active 'GPUTracerImpl'.
+  // The 'userdata' argument will be a pointer to the active 'DeviceTracerImpl'.
   static void CUPTIAPI ApiCallback(void *userdata, CUpti_CallbackDomain domain,
                                    CUpti_CallbackId cbid, const void *cbdata);
 
@@ -375,28 +375,28 @@ class GPUTracerImpl : public GPUTracer,
   uint64_t start_timestamp_ GUARDED_BY(mu_);
   uint64_t end_timestamp_ GUARDED_BY(mu_);
 
-  TF_DISALLOW_COPY_AND_ASSIGN(GPUTracerImpl);
+  TF_DISALLOW_COPY_AND_ASSIGN(DeviceTracerImpl);
 };
 
-GPUTracerImpl::GPUTracerImpl() {
-  VLOG(1) << "GPUTracer created.";
+DeviceTracerImpl::DeviceTracerImpl() {
+  VLOG(1) << "DeviceTracer created.";
   cupti_manager_ = GetCUPTIManager();
   CHECK(cupti_manager_);
   cupti_wrapper_.reset(new perftools::gputools::profiler::CuptiWrapper());
   enabled_ = false;
 }
 
-GPUTracerImpl::~GPUTracerImpl() {
+DeviceTracerImpl::~DeviceTracerImpl() {
   // Unregister the CUPTI callbacks if needed to prevent them from accessing
   // freed memory.
   Stop().IgnoreError();
 }
 
-Status GPUTracerImpl::Start() {
-  VLOG(1) << "GPUTracer::Start";
+Status DeviceTracerImpl::Start() {
+  VLOG(1) << "DeviceTracer::Start";
   mutex_lock l(mu_);
   if (enabled_) {
-    return errors::FailedPrecondition("GPUTracer is already enabled.");
+    return errors::FailedPrecondition("DeviceTracer is already enabled.");
   }
   // There can only be one CUPTI subscriber.  If we can't create one then
   // there is another trace in progress (possibly by external code).
@@ -451,8 +451,8 @@ Status GPUTracerImpl::Start() {
   return Status::OK();
 }
 
-Status GPUTracerImpl::Stop() {
-  VLOG(1) << "GPUTracer::Stop";
+Status DeviceTracerImpl::Stop() {
+  VLOG(1) << "DeviceTracer::Stop";
   mutex_lock l(mu_);
   if (!enabled_) {
     return Status::OK();
@@ -466,20 +466,20 @@ Status GPUTracerImpl::Stop() {
   return Status::OK();
 }
 
-void GPUTracerImpl::AddCorrelationId(uint32 correlation_id,
-                                     const string &name) {
+void DeviceTracerImpl::AddCorrelationId(uint32 correlation_id,
+                                        const string &name) {
   VLOG(2) << correlation_id << " : " << name;
   mutex_lock l(trace_mu_);
   if (correlations_.size() >= kMaxRecords) return;
   correlations_.emplace(correlation_id, name);
 }
 
-/*static*/ void GPUTracerImpl::ApiCallback(void *userdata,
-                                           CUpti_CallbackDomain domain,
-                                           CUpti_CallbackId cbid,
-                                           const void *cbdata) {
+/*static*/ void DeviceTracerImpl::ApiCallback(void *userdata,
+                                              CUpti_CallbackDomain domain,
+                                              CUpti_CallbackId cbid,
+                                              const void *cbdata) {
   auto *cbInfo = reinterpret_cast<const CUpti_CallbackData *>(cbdata);
-  GPUTracerImpl *tracer = reinterpret_cast<GPUTracerImpl *>(userdata);
+  DeviceTracerImpl *tracer = reinterpret_cast<DeviceTracerImpl *>(userdata);
   VLOG(2) << "ApiCallback " << domain << ":" << cbid
           << " func: " << cbInfo->functionName;
 
@@ -533,7 +533,7 @@ void GPUTracerImpl::AddCorrelationId(uint32 correlation_id,
   }
 }
 
-void GPUTracerImpl::ActivityCallback(const CUpti_Activity &record) {
+void DeviceTracerImpl::ActivityCallback(const CUpti_Activity &record) {
   VLOG(2) << "ActivityCallback " << record.kind;
   mutex_lock l(trace_mu_);
   switch (record.kind) {
@@ -570,10 +570,10 @@ void GPUTracerImpl::ActivityCallback(const CUpti_Activity &record) {
   }
 }
 
-Status GPUTracerImpl::Collect(StepStatsCollector *collector) {
+Status DeviceTracerImpl::Collect(StepStatsCollector *collector) {
   mutex_lock l(mu_);
   if (enabled_) {
-    return errors::FailedPrecondition("GPUTracer is still enabled.");
+    return errors::FailedPrecondition("DeviceTracer is still enabled.");
   }
 
   // TODO(pbar) Handle device IDs and prefix properly.
@@ -630,10 +630,10 @@ Status GPUTracerImpl::Collect(StepStatsCollector *collector) {
   return Status::OK();
 }
 
-}  // namespace gputracer
+}  // namespace devicetracer
 
-std::unique_ptr<GPUTracer> CreateGPUTracer() {
-  std::unique_ptr<GPUTracer> tracer(new gputracer::GPUTracerImpl());
+std::unique_ptr<DeviceTracer> CreateDeviceTracer() {
+  std::unique_ptr<DeviceTracer> tracer(new devicetracer::DeviceTracerImpl());
   return tracer;
 }
 
@@ -643,7 +643,7 @@ std::unique_ptr<GPUTracer> CreateGPUTracer() {
 
 namespace tensorflow {
 
-std::unique_ptr<GPUTracer> CreateGPUTracer() { return nullptr; }
+std::unique_ptr<DeviceTracer> CreateDeviceTracer() { return nullptr; }
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/platform/gpu_tracer.h b/tensorflow/core/platform/device_tracer.h
similarity index 69%
rename from tensorflow/core/platform/gpu_tracer.h
rename to tensorflow/core/platform/device_tracer.h
index 3373d974e3..d0f86a5103 100644
--- a/tensorflow/core/platform/gpu_tracer.h
+++ b/tensorflow/core/platform/device_tracer.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_PLATFORM_GPU_TRACER_H_
-#define TENSORFLOW_CORE_PLATFORM_GPU_TRACER_H_
+#ifndef TENSORFLOW_CORE_PLATFORM_DEVICE_TRACER_H_
+#define TENSORFLOW_CORE_PLATFORM_DEVICE_TRACER_H_
 
 #include <memory>
 
@@ -24,16 +24,16 @@ namespace tensorflow {
 
 class StepStatsCollector;
 
-// 'GPUTracer' is an interface for collecting low-level execution timings
-// of GPU computation and DMA transfers.
+// 'DeviceTracer' is an interface for collecting low-level execution timings
+// of hardware accelerator (e.g. GPU) computation and DMA transfers.
 //
 // Typical usage pattern is as follows:
 //
-// GPUTracer* tracer = CreateGPUTracer();
+// DeviceTracer* tracer = CreateDeviceTracer();
 // if (tracer) {
 //   tracer->Start();
 //
-//   ... perform some GPU computations.
+//   ... perform some computations on a hardware accelerator.
 //
 //   tracer->Stop();
 //
@@ -44,23 +44,23 @@ class StepStatsCollector;
 //
 // Notes:
 // Tracing is not supported on all plaforms.  On platforms
-// with no GPU tracing support, 'CreateGPUTracer' will return 'nullptr'.
-// On most plaforms, GPU tracing will be a system-wide activity and
-// a single 'GPUTracer' will collect activity from all GPUs.
+// with no tracing support, 'CreateDeviceTracer' will return 'nullptr'.
+// On most plaforms, hardware tracing will be a system-wide activity and
+// a single 'DeviceTracer' will collect activity from all devices.
 // It is also common that only a single tracer may be active at any
 // given time.  The 'Start' method will return an error if tracing is
 // already in progress elsewhere.
 //
-class GPUTracer {
+class DeviceTracer {
  public:
-  virtual ~GPUTracer() {}
+  virtual ~DeviceTracer() {}
 
-  // Start GPU tracing.
+  // Start device tracing.
   // Note that only a single trace can be active, in which case this
   // methods will return an 'Unavailable' error.
   virtual Status Start() = 0;
 
-  // Stop GPU tracing.
+  // Stop device tracing.
   // It is safe to call 'Stop' on a tracer which is not enabled.
   virtual Status Stop() = 0;
 
@@ -70,10 +70,10 @@ class GPUTracer {
   virtual Status Collect(StepStatsCollector* collector) = 0;
 };
 
-// Creates a platform-specific GPUTracer.
+// Creates a platform-specific DeviceTracer.
 // Returns 'nullptr' on platforms where tracing is not supported.
-std::unique_ptr<GPUTracer> CreateGPUTracer();
+std::unique_ptr<DeviceTracer> CreateDeviceTracer();
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_PLATFORM_GPU_TRACER_H_
+#endif  // TENSORFLOW_CORE_PLATFORM_DEVICE_TRACER_H_
diff --git a/tensorflow/core/platform/gpu_tracer_test.cc b/tensorflow/core/platform/device_tracer_test.cc
similarity index 84%
rename from tensorflow/core/platform/gpu_tracer_test.cc
rename to tensorflow/core/platform/device_tracer_test.cc
index ce2985fd47..c0c08dabac 100644
--- a/tensorflow/core/platform/gpu_tracer_test.cc
+++ b/tensorflow/core/platform/device_tracer_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/platform/gpu_tracer.h"
+#include "tensorflow/core/platform/device_tracer.h"
 
 #include <map>
 #include <memory>
@@ -50,7 +50,7 @@ std::unique_ptr<Session> CreateSession() {
   return std::unique_ptr<Session>(NewSession(options));
 }
 
-class GPUTracerTest : public ::testing::Test {
+class DeviceTracerTest : public ::testing::Test {
  public:
   void Initialize(std::initializer_list<float> a_values) {
     Graph graph(OpRegistry::Global());
@@ -84,10 +84,10 @@ class GPUTracerTest : public ::testing::Test {
 
  protected:
   void ExpectFailure(const Status& status, error::Code code) {
-    EXPECT_FALSE(status.ok());
+    EXPECT_FALSE(status.ok()) << status.ToString();
     if (!status.ok()) {
       LOG(INFO) << "Status message: " << status.error_message();
-      EXPECT_EQ(code, status.code());
+      EXPECT_EQ(code, status.code()) << status.ToString();
     }
   }
 
@@ -97,22 +97,22 @@ class GPUTracerTest : public ::testing::Test {
   GraphDef def_;
 };
 
-TEST_F(GPUTracerTest, StartStop) {
-  std::unique_ptr<GPUTracer> tracer(CreateGPUTracer());
+TEST_F(DeviceTracerTest, StartStop) {
+  std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
   if (!tracer) return;
   TF_EXPECT_OK(tracer->Start());
   TF_EXPECT_OK(tracer->Stop());
 }
 
-TEST_F(GPUTracerTest, StopBeforeStart) {
-  std::unique_ptr<GPUTracer> tracer(CreateGPUTracer());
+TEST_F(DeviceTracerTest, StopBeforeStart) {
+  std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
   if (!tracer) return;
   TF_EXPECT_OK(tracer->Stop());
   TF_EXPECT_OK(tracer->Stop());
 }
 
-TEST_F(GPUTracerTest, CollectBeforeStart) {
-  std::unique_ptr<GPUTracer> tracer(CreateGPUTracer());
+TEST_F(DeviceTracerTest, CollectBeforeStart) {
+  std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
   if (!tracer) return;
   StepStats stats;
   StepStatsCollector collector(&stats);
@@ -120,8 +120,8 @@ TEST_F(GPUTracerTest, CollectBeforeStart) {
   EXPECT_EQ(stats.dev_stats_size(), 0);
 }
 
-TEST_F(GPUTracerTest, CollectBeforeStop) {
-  std::unique_ptr<GPUTracer> tracer(CreateGPUTracer());
+TEST_F(DeviceTracerTest, CollectBeforeStop) {
+  std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
   if (!tracer) return;
   TF_EXPECT_OK(tracer->Start());
   StepStats stats;
@@ -131,9 +131,9 @@ TEST_F(GPUTracerTest, CollectBeforeStop) {
   TF_EXPECT_OK(tracer->Stop());
 }
 
-TEST_F(GPUTracerTest, StartTwoTracers) {
-  std::unique_ptr<GPUTracer> tracer1(CreateGPUTracer());
-  std::unique_ptr<GPUTracer> tracer2(CreateGPUTracer());
+TEST_F(DeviceTracerTest, StartTwoTracers) {
+  std::unique_ptr<DeviceTracer> tracer1(CreateDeviceTracer());
+  std::unique_ptr<DeviceTracer> tracer2(CreateDeviceTracer());
   if (!tracer1 || !tracer2) return;
 
   TF_EXPECT_OK(tracer1->Start());
@@ -144,9 +144,9 @@ TEST_F(GPUTracerTest, StartTwoTracers) {
   TF_EXPECT_OK(tracer2->Stop());
 }
 
-TEST_F(GPUTracerTest, RunWithTracer) {
-  // On non-GPU platforms, we may not support GPUTracer.
-  std::unique_ptr<GPUTracer> tracer(CreateGPUTracer());
+TEST_F(DeviceTracerTest, RunWithTracer) {
+  // On non-GPU platforms, we may not support DeviceTracer.
+  std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
   if (!tracer) return;
 
   Initialize({3, 2, -1, 0});
@@ -172,8 +172,8 @@ TEST_F(GPUTracerTest, RunWithTracer) {
   EXPECT_FLOAT_EQ(5.0, mat(0, 0));
 }
 
-TEST_F(GPUTracerTest, TraceToStepStatsCollector) {
-  std::unique_ptr<GPUTracer> tracer(CreateGPUTracer());
+TEST_F(DeviceTracerTest, TraceToStepStatsCollector) {
+  std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
   if (!tracer) return;
 
   Initialize({3, 2, -1, 0});
@@ -198,10 +198,10 @@ TEST_F(GPUTracerTest, TraceToStepStatsCollector) {
   collector.Finalize();
   // Depending on whether this runs on CPU or GPU, we will have a
   // different number of devices.
-  EXPECT_GE(stats.dev_stats_size(), 1);
+  EXPECT_GE(stats.dev_stats_size(), 1) << "Saw stats: " << stats.DebugString();
 }
 
-TEST_F(GPUTracerTest, RunWithTraceOption) {
+TEST_F(DeviceTracerTest, RunWithTraceOption) {
   Initialize({3, 2, -1, 0});
   auto session = CreateSession();
   ASSERT_TRUE(session != nullptr);
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 6a8b6417d6..924ab1a4ae 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -88,7 +88,7 @@ extra_failing_gpu_cc_tests="\
     //tensorflow/core:cuda_libdevice_path_test + \
     //tensorflow/core:common_runtime_direct_session_test + \
     //tensorflow/core:common_runtime_direct_session_with_tracking_alloc_test + \
-    //tensorflow/core:gpu_tracer_test + \
+    //tensorflow/core:device_tracer_test + \
     //tensorflow/core:ops_math_grad_test \
 "
 
-- 
GitLab


From 8fc41bbabcc041460cf3a123f0595ea0df6bc30f Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 20 Nov 2017 16:20:46 -0800
Subject: [PATCH 0148/1225] Fix potential use-after-move in `RecvOp`.

Note that has not been a problem in practice because the callback
would only be used-after-move in an error condition
(`Rendezvous::ParseKey()` failing) that would only occur if a
malformed graph was fed to a TensorFlow server.

PiperOrigin-RevId: 176434460
---
 tensorflow/core/kernels/sendrecv_ops.cc | 33 +++++++++++++++----------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc
index 542382872c..206fd40fa6 100644
--- a/tensorflow/core/kernels/sendrecv_ops.cc
+++ b/tensorflow/core/kernels/sendrecv_ops.cc
@@ -142,17 +142,12 @@ RecvOp::RecvOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) {
   }
 }
 
-void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) {
-  OP_REQUIRES(
-      ctx, ctx->rendezvous() != nullptr,
-      errors::Internal("Op kernel context needs to provide a rendezvous."));
-
-  Rendezvous::Args args;
-  args.device_context = ctx->op_device_context();
-  args.alloc_attrs = ctx->output_alloc_attr(0);
+namespace {
+Rendezvous::DoneCallback make_recv_callback(OpKernelContext* ctx,
+                                            AsyncOpKernel::DoneCallback done) {
   using namespace std::placeholders;
-  Rendezvous::DoneCallback done_cb = std::bind(
-      [ctx](DoneCallback done,
+  return std::bind(
+      [ctx](AsyncOpKernel::DoneCallback done,
             // Begin unbound arguments.
             const Status& s, const Rendezvous::Args& send_args,
             const Rendezvous::Args& recv_args, const Tensor& val,
@@ -170,19 +165,31 @@ void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) {
         done();
       },
       std::move(done), _1, _2, _3, _4, _5);
+}
+}  // namespace
+
+void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) {
+  OP_REQUIRES(
+      ctx, ctx->rendezvous() != nullptr,
+      errors::Internal("Op kernel context needs to provide a rendezvous."));
+
+  Rendezvous::Args args;
+  args.device_context = ctx->op_device_context();
+  args.alloc_attrs = ctx->output_alloc_attr(0);
 
   FrameAndIter frame_iter = GetFrameAndIter(ctx, hostmem_sendrecv_);
   if (frame_iter == FrameAndIter(0, 0)) {
     VLOG(2) << "Recv " << parsed_key_.buf_;
-    ctx->rendezvous()->RecvAsync(parsed_key_, args, std::move(done_cb));
+    ctx->rendezvous()->RecvAsync(parsed_key_, args,
+                                 make_recv_callback(ctx, std::move(done)));
   } else {
     Rendezvous::ParsedKey in_loop_parsed;
     GetRendezvousKey(key_prefix_, frame_iter, &in_loop_parsed.buf_);
     VLOG(2) << "Recv " << in_loop_parsed.buf_;
     OP_REQUIRES_OK_ASYNC(
         ctx, Rendezvous::ParseKey(in_loop_parsed.buf_, &in_loop_parsed), done);
-
-    ctx->rendezvous()->RecvAsync(in_loop_parsed, args, std::move(done_cb));
+    ctx->rendezvous()->RecvAsync(in_loop_parsed, args,
+                                 make_recv_callback(ctx, std::move(done)));
   }
 }
 
-- 
GitLab


From dad3670237d9943c7780c5daa3a171e6ca1bf959 Mon Sep 17 00:00:00 2001
From: Kay Zhu <kayzhu@google.com>
Date: Mon, 20 Nov 2017 16:34:19 -0800
Subject: [PATCH 0149/1225] [XLA:Doc] Correct parameter ordering for Clamp
 operation: operand should be in the middle of min and max to be consistent
 with the actual implementation.

PiperOrigin-RevId: 176436283
---
 tensorflow/docs_src/performance/xla/operation_semantics.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
index ccced8792e..dfd9c12c89 100644
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -75,14 +75,14 @@ Clamps an operand to within the range between a minimum and maximum value.
 | `computation` | `Computation`           | computation of type `T_0, T_1,   |
 :               :                         : ..., T_N -> S` with N parameters :
 :               :                         : of arbitrary type                :
-| `operand`     | `ComputationDataHandle` | array of type T                  |
 | `min`         | `ComputationDataHandle` | array of type T                  |
+| `operand`     | `ComputationDataHandle` | array of type T                  |
 | `max`         | `ComputationDataHandle` | array of type T                  |
 
 Given an operand and minimum and maximum values, returns the operand if it is in
 the range between the minimum and maximum, else returns the minimum value if the
 operand is below this range or the maximum value if the operand is above this
-range.  That is, `clamp(x, a, b) =  max(min(x, a), b)`.
+range.  That is, `clamp(a, x, b) =  max(min(a, x), b)`.
 
 All three arrays must be the same shape. Alternately, as a restricted form of
 [broadcasting](broadcasting.md), `min` and/or `max` can be a scalar of type `T`.
@@ -94,7 +94,7 @@ let operand: s32[3] = {-1, 5, 9};
 let min: s32 = 0;
 let max: s32 = 6;
 ==>
-Clamp(operand, min, max) = s32[3]{0, 5, 6};
+Clamp(min, operand, max) = s32[3]{0, 5, 6};
 ```
 
 ## Collapse
-- 
GitLab


From 3b7ff59bf10680a1520272bd3a738bd7c741f61f Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 20 Nov 2017 16:38:28 -0800
Subject: [PATCH 0150/1225] Improving documentation of
 `tf.data.Dataset.repeat`.

PiperOrigin-RevId: 176436826
---
 tensorflow/python/data/ops/dataset_ops.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index d434c8e522..d6efb7fa9a 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -549,11 +549,14 @@ class Dataset(object):
   def repeat(self, count=None):
     """Repeats this dataset `count` times.
 
+    NOTE: If this dataset is a function of global state (e.g. a random number
+    generator), then different repetitions may produce different elements.
+
     Args:
       count: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
-        number of times the elements of this dataset should be repeated. The
-        default behavior (if `count` is `None` or `-1`) is for the elements to
-        be repeated indefinitely.
+        number of times the dataset should be repeated. The default behavior
+        (if `count` is `None` or `-1`) is for the dataset be repeated
+        indefinitely.
 
     Returns:
       A `Dataset`.
-- 
GitLab


From 76195b6a84d89e5648a6911a5c3577a9e2bf0ce5 Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Mon, 20 Nov 2017 16:56:12 -0800
Subject: [PATCH 0151/1225] Allow test_util to eval callables in Eager mode.

PiperOrigin-RevId: 176438865
---
 tensorflow/python/framework/test_util.py      |  2 ++
 tensorflow/python/framework/test_util_test.py | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 6e3a35af3c..cfa5fe5e3e 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -730,6 +730,8 @@ class TensorFlowTestCase(googletest.TestCase):
       return dict()
     elif tensors is None:
       return None
+    elif callable(tensors):
+      return self._eval_helper(tensors())
     else:
       raise ValueError("Unsupported type %s." % type(tensors))
 
diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py
index 3ea28e6334..9aed3457a6 100644
--- a/tensorflow/python/framework/test_util_test.py
+++ b/tensorflow/python/framework/test_util_test.py
@@ -330,6 +330,15 @@ class TestUtilTest(test_util.TensorFlowTestCase):
     self.assertEqual(a_np_rand, b_np_rand)
     self.assertEqual(a_rand, b_rand)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def test_callable_evaluate(self):
+    def model():
+      return resource_variable_ops.ResourceVariable(
+          name="same_name",
+          initial_value=1) + 1
+    with context.eager_mode():
+      self.assertEqual(2, self.evaluate(model))
+
 
 class GarbageCollectionTest(test_util.TensorFlowTestCase):
 
@@ -421,6 +430,5 @@ class IsolationTest(test_util.TensorFlowTestCase):
         with self.assertRaises(ValueError):
           first_container_variable.read_value()
 
-
 if __name__ == "__main__":
   googletest.main()
-- 
GitLab


From 60a0666a40767baea6b21e57714a553c217cad8f Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 20 Nov 2017 16:59:00 -0800
Subject: [PATCH 0152/1225] [tf.data] Add two simple microbenchmarks for
 `Dataset.map()` performance.

PiperOrigin-RevId: 176439139
---
 .../kernel_tests/map_dataset_op_test.py       | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/tensorflow/python/kernel_tests/map_dataset_op_test.py b/tensorflow/python/kernel_tests/map_dataset_op_test.py
index c6c36d133c..51f43bfd89 100644
--- a/tensorflow/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/kernel_tests/map_dataset_op_test.py
@@ -19,13 +19,16 @@ from __future__ import print_function
 
 from collections import namedtuple
 import threading
+import time
 
 import numpy as np
 
+from tensorflow.python.client import session
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import data_flow_ops
@@ -595,5 +598,64 @@ class MapDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+
+class MapDatasetBenchmark(test.Benchmark):
+
+  def benchmarkChainOfMaps(self):
+    chain_lengths = [0, 1, 2, 5, 10, 20, 50]
+    for chain_length in chain_lengths:
+      with ops.Graph().as_default():
+        dataset = dataset_ops.Dataset.from_tensors(0).repeat(None)
+        for _ in range(chain_length):
+          dataset = dataset.map(lambda x: x)
+        iterator = dataset.make_one_shot_iterator()
+        next_element = iterator.get_next()
+
+        with session.Session() as sess:
+          for _ in range(5):
+            sess.run(next_element.op)
+          deltas = []
+          for _ in range(100):
+            start = time.time()
+            for _ in range(100):
+              sess.run(next_element.op)
+            end = time.time()
+            deltas.append(end - start)
+
+          median_wall_time = np.median(deltas) / 100
+          print("Map dataset chain length: %d Median wall time: %f"
+                % (chain_length, median_wall_time))
+          self.report_benchmark(
+              iters=1000, wall_time=median_wall_time,
+              name="benchmark_map_dataset_chain_latency_%d" % chain_length)
+
+  def benchmarkMapFanOut(self):
+    fan_outs = [1, 2, 5, 10, 20, 50, 100]
+    for fan_out in fan_outs:
+      with ops.Graph().as_default():
+        dataset = dataset_ops.Dataset.from_tensors(
+            tuple(0 for _ in range(fan_out))).repeat(None).map(lambda *xs: xs)
+        iterator = dataset.make_one_shot_iterator()
+        next_element = iterator.get_next()
+
+        with session.Session() as sess:
+          for _ in range(5):
+            sess.run(next_element[0].op)
+          deltas = []
+          for _ in range(100):
+            start = time.time()
+            for _ in range(100):
+              sess.run(next_element[0].op)
+            end = time.time()
+            deltas.append(end - start)
+
+          median_wall_time = np.median(deltas) / 100
+          print("Map dataset fan out: %d Median wall time: %f"
+                % (fan_out, median_wall_time))
+          self.report_benchmark(
+              iters=1000, wall_time=median_wall_time,
+              name="benchmark_map_dataset_fan_out_%d" % fan_out)
+
+
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 385da92355a1b24d081843d4ffb509fd0a7a983e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 17:02:25 -0800
Subject: [PATCH 0153/1225] [TF:XLA] Make two test cases to run only with
 NDEBUG defined.

PiperOrigin-RevId: 176439578
---
 tensorflow/compiler/xla/tests/BUILD          | 1 -
 tensorflow/compiler/xla/tests/params_test.cc | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index f3885e9021..c64d5aca4f 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -382,7 +382,6 @@ xla_test(
     name = "params_test",
     srcs = ["params_test.cc"],
     shard_count = 30,
-    tags = ["optonly"],
     deps = [
         "//tensorflow/compiler/xla:array2d",
         "//tensorflow/compiler/xla:literal_util",
diff --git a/tensorflow/compiler/xla/tests/params_test.cc b/tensorflow/compiler/xla/tests/params_test.cc
index fda4389f47..b7f62b8aa1 100644
--- a/tensorflow/compiler/xla/tests/params_test.cc
+++ b/tensorflow/compiler/xla/tests/params_test.cc
@@ -252,8 +252,8 @@ XLA_TEST_F(ParamsTest, HundredLargeR1Parameters) {
 }
 
 // Only run the 3,000-parameter tests in opt mode to avoid test timeouts.
-// Timeout last observed on 2017-09-12.
-#ifndef NDEBUG
+// Timeout last observed on 2017-11-20.
+#ifdef NDEBUG
 
 // TODO(b/65525254) Fails on GPU on 2017-09-10 because we try to reserve too
 // much space in parameter memory for the kernel.
-- 
GitLab


From fd92829df41984de014fd5f6807ad061fa45090a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 17:27:35 -0800
Subject: [PATCH 0154/1225] [tf.data] Remove redundant else statement.

PiperOrigin-RevId: 176442381
---
 tensorflow/core/kernels/zip_dataset_op.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/zip_dataset_op.cc
index 96080863ea..9381915ae9 100644
--- a/tensorflow/core/kernels/zip_dataset_op.cc
+++ b/tensorflow/core/kernels/zip_dataset_op.cc
@@ -128,8 +128,6 @@ class ZipDatasetOp : public DatasetOpKernel {
         if (*end_of_sequence) {
           out_tensors->clear();
           input_impls_.clear();
-        } else {
-          *end_of_sequence = false;
         }
         return Status::OK();
       }
-- 
GitLab


From 55672b52559c73b5bf12c4827277959bda765e59 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 20 Nov 2017 17:34:19 -0800
Subject: [PATCH 0155/1225] TFE_ContextAddFunction to interface with the
 TFE_Function* API

PiperOrigin-RevId: 176443014
---
 tensorflow/c/eager/c_api.cc      |  6 ++++
 tensorflow/c/eager/c_api.h       |  7 ++++
 tensorflow/c/eager/c_api_test.cc | 60 ++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)

diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 8359de62b7..706c89536d 100644
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -571,6 +571,12 @@ void TFE_ContextAddFunctionDef(TFE_Context* ctx,
   status->status = ctx->func_lib_def.AddFunctionDef(function_def);
 }
 
+void TFE_ContextAddFunction(TFE_Context* ctx, TF_Function* function,
+                            TF_Status* status) {
+  tensorflow::mutex_lock l(ctx->functions_mu);
+  status->status = ctx->func_lib_def.AddFunctionDef(function->fdef);
+}
+
 }  // extern "C"
 
 TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t) {
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
index 865580c5f3..ca105962df 100644
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -200,6 +200,13 @@ TF_CAPI_EXPORT extern void TFE_ContextAddFunctionDef(TFE_Context* ctx,
                                                      const char* serialized_function_def,
                                                      size_t size, TF_Status* status);
 
+// Adds a function (created from TF_GraphToFunction or
+// TF_FunctionImportFunctionDef) to the context, allowing it to be executed with
+// TFE_Execute by creating an op with the same name as the function.
+TF_CAPI_EXPORT extern void TFE_ContextAddFunction(TFE_Context* ctx,
+                                                  TF_Function* function,
+                                                  TF_Status* status);
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc
index 4af91b8853..03843fa913 100644
--- a/tensorflow/c/eager/c_api_test.cc
+++ b/tensorflow/c/eager/c_api_test.cc
@@ -295,6 +295,66 @@ TEST(CAPI, Execute) {
   TF_DeleteStatus(status);
 }
 
+TEST(CAPI, Function) {
+  // First create a simple identity function.
+  TF_Graph* function_graph = TF_NewGraph();
+  TF_OperationDescription* arg_descr =
+      TF_NewOperation(function_graph, "Placeholder", "arg");
+  TF_SetAttrType(arg_descr, "dtype", TF_INT32);
+  TF_Status* status = TF_NewStatus();
+  TF_Operation* arg = TF_FinishOperation(arg_descr, status);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+  TF_OperationDescription* id_descr =
+      TF_NewOperation(function_graph, "Identity", "id");
+  TF_SetAttrType(id_descr, "T", TF_INT32);
+  TF_AddInput(id_descr, {arg, 0});
+  TF_Operation* id = TF_FinishOperation(id_descr, status);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+  TF_Output input{arg, 0};
+  TF_Output output{id, 0};
+  TF_Function* fn =
+      TF_GraphToFunction(function_graph, "ident", 0, 1, &id, 1, &input, 1,
+                         &output, nullptr, nullptr, "test", status);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+  TF_DeleteGraph(function_graph);
+  TFE_ContextOptions* opts = TFE_NewContextOptions();
+  TFE_Context* ctx = TFE_NewContext(opts, status);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+  TFE_DeleteContextOptions(opts);
+  TFE_ContextAddFunction(ctx, fn, status);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+  TF_DeleteFunction(fn);
+
+  TF_Tensor* t = TF_AllocateTensor(TF_INT32, nullptr, 0, 1);
+  *reinterpret_cast<tensorflow::int32*>(TF_TensorData(t)) = 42;
+  TFE_TensorHandle* h = TFE_NewTensorHandle(t, status);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+  TF_DeleteTensor(t);
+
+  TFE_Op* op = TFE_NewOp(ctx, "ident", status);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+  TFE_OpAddInput(op, h, status);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+
+  std::vector<TFE_TensorHandle*> result;
+  result.push_back(nullptr);
+  int num_retvals = 1;
+  TFE_Execute(op, result.data(), &num_retvals, status);
+  TFE_DeleteOp(op);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+  ASSERT_EQ(num_retvals, 1);
+
+  TF_Tensor* r = TFE_TensorHandleResolve(result[0], status);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+  EXPECT_EQ(*reinterpret_cast<tensorflow::int32*>(TF_TensorData(r)), 42);
+  TFE_DeleteTensorHandle(h);
+  TF_DeleteTensor(r);
+  TFE_DeleteTensorHandle(result[0]);
+  TFE_DeleteContext(ctx, status);
+  ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+  TF_DeleteStatus(status);
+}
+
 string MatMulFunction() {
   tensorflow::FunctionDef def;
   CHECK(tensorflow::protobuf::TextFormat::ParseFromString(
-- 
GitLab


From c1885d49cb83801b6e8ca60c7ed9289f9f2ae13d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 17:40:21 -0800
Subject: [PATCH 0156/1225] removing unused code

PiperOrigin-RevId: 176443600
---
 tensorflow/contrib/boosted_trees/BUILD        |   6 +-
 .../kernels/split_handler_ops.cc              |   2 +-
 tensorflow/contrib/boosted_trees/lib/BUILD    |  42 -----
 .../handlers/bias-feature-column-handler.cc   |  59 ------
 .../handlers/bias-feature-column-handler.h    |  57 ------
 .../bias-feature-column-handler_test.cc       | 135 --------------
 .../categorical-feature-column-handler.cc     | 140 --------------
 .../categorical-feature-column-handler.h      |  64 -------
 ...categorical-feature-column-handler_test.cc | 165 -----------------
 .../dense-quantized-feature-column-handler.cc | 116 ------------
 .../dense-quantized-feature-column-handler.h  |  62 -------
 ...e-quantized-feature-column-handler_test.cc | 155 ----------------
 .../handlers/feature-column-handler.h         |  83 ---------
 ...sparse-quantized-feature-column-handler.cc | 172 ------------------
 .../sparse-quantized-feature-column-handler.h |  67 -------
 ...e-quantized-feature-column-handler_test.cc | 162 -----------------
 16 files changed, 5 insertions(+), 1482 deletions(-)
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.cc
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.cc
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.cc
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h
 delete mode 100644 tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc

diff --git a/tensorflow/contrib/boosted_trees/BUILD b/tensorflow/contrib/boosted_trees/BUILD
index 66a04d42e9..7072f56420 100644
--- a/tensorflow/contrib/boosted_trees/BUILD
+++ b/tensorflow/contrib/boosted_trees/BUILD
@@ -359,8 +359,8 @@ tf_custom_op_library(
     ],
     deps = [
         "//tensorflow/contrib/boosted_trees/lib:example_partitioner",
-        "//tensorflow/contrib/boosted_trees/lib:feature-column-handlers",
         "//tensorflow/contrib/boosted_trees/lib:models",
+        "//tensorflow/contrib/boosted_trees/lib:node-stats",
         "//tensorflow/contrib/boosted_trees/lib:utils",
         "//tensorflow/contrib/boosted_trees/lib:weighted_quantiles",
         "//tensorflow/contrib/boosted_trees/proto:learner_proto_cc",
@@ -404,10 +404,12 @@ tf_kernel_library(
     name = "split_handler_ops_kernels",
     srcs = ["kernels/split_handler_ops.cc"],
     deps = [
-        "//tensorflow/contrib/boosted_trees/lib:feature-column-handlers",
+        "//tensorflow/contrib/boosted_trees/lib:node-stats",
         "//tensorflow/contrib/boosted_trees/proto:split_info_proto_cc",
         "//tensorflow/contrib/boosted_trees/proto:tree_config_proto_cc",
         "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//third_party/eigen3",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
index 5c31980359..a5de1340b9 100644
--- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
+++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
@@ -16,7 +16,7 @@
 #include <string>
 #include <vector>
 
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h"
+#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h"
 #include "tensorflow/contrib/boosted_trees/proto/split_info.pb.h"
 #include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h"
 #include "tensorflow/core/framework/device_base.h"
diff --git a/tensorflow/contrib/boosted_trees/lib/BUILD b/tensorflow/contrib/boosted_trees/lib/BUILD
index 107ff0d295..af389849b4 100644
--- a/tensorflow/contrib/boosted_trees/lib/BUILD
+++ b/tensorflow/contrib/boosted_trees/lib/BUILD
@@ -406,48 +406,6 @@ tf_cc_test(
 )
 
 # Learner/stochastic
-
-cc_library(
-    name = "feature-column-handlers",
-    srcs = [
-        "learner/stochastic/handlers/bias-feature-column-handler.cc",
-        "learner/stochastic/handlers/categorical-feature-column-handler.cc",
-        "learner/stochastic/handlers/dense-quantized-feature-column-handler.cc",
-        "learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc",
-    ],
-    hdrs = [
-        "learner/stochastic/handlers/bias-feature-column-handler.h",
-        "learner/stochastic/handlers/categorical-feature-column-handler.h",
-        "learner/stochastic/handlers/dense-quantized-feature-column-handler.h",
-        "learner/stochastic/handlers/feature-column-handler.h",
-        "learner/stochastic/handlers/sparse-quantized-feature-column-handler.h",
-    ],
-    deps = [
-        ":feature-split-candidate",
-        ":feature-stats-accumulator",
-        "//tensorflow/contrib/boosted_trees/proto:learner_proto_cc",
-        "//tensorflow/core:framework_headers_lib",
-        "//tensorflow/core:protos_all_cc",
-    ],
-)
-
-tf_cc_test(
-    name = "feature-column-handlers_test",
-    size = "small",
-    srcs = [
-        "learner/stochastic/handlers/bias-feature-column-handler_test.cc",
-        "learner/stochastic/handlers/categorical-feature-column-handler_test.cc",
-        "learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc",
-        "learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc",
-    ],
-    deps = [
-        ":feature-column-handlers",
-        "//tensorflow/core:tensor_testutil",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-    ],
-)
-
 cc_library(
     name = "gradient-stats",
     hdrs = ["learner/stochastic/stats/gradient-stats.h"],
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.cc
deleted file mode 100644
index b880cf2c47..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-
-void BiasFeatureColumnHandler::AggregateGradientStats(
-    const std::vector<int32>& example_partition_ids,
-    const Tensor& example_first_order_gradients,
-    const Tensor& example_second_order_gradients,
-    FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>*
-        gradient_stats_accumulator) const {
-  // Pass over all examples and aggregate gradient stats for each sub-root.
-  for (int64 example_idx = 0; example_idx < batch_size_; ++example_idx) {
-    auto partition_id = example_partition_ids[example_idx];
-    gradient_stats_accumulator->AddStats(
-        slot_id_, class_id_, partition_id, kBiasFeatureId,
-        GradientStats(example_first_order_gradients,
-                      example_second_order_gradients, example_idx));
-  }
-}
-
-void BiasFeatureColumnHandler::GenerateFeatureSplitCandidates(
-    const LearnerConfig& learner_config, const std::vector<int32>& roots,
-    const std::vector<NodeStats>& root_stats,
-    const FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>&
-        gradient_stats_accumulator,
-    std::vector<FeatureSplitCandidate>* split_candidates) const {
-  split_candidates->clear();
-  split_candidates->reserve(roots.size());
-  boosted_trees::trees::TreeNode tree_node;
-  for (size_t root_idx = 0; root_idx < roots.size(); ++root_idx) {
-    const NodeStats& root_node_stats = root_stats[root_idx];
-    tree_node.Clear();
-    root_node_stats.FillLeaf(class_id_, tree_node.mutable_leaf());
-    split_candidates->emplace_back(slot_id_, tree_node,
-                                   SplitStats(learner_config, root_node_stats));
-  }
-}
-
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h
deleted file mode 100644
index 5c0f99185a..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_H_  // NOLINT
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_H_  // NOLINT
-
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-
-// Handler for a bias feature column in the single class case.
-// This handler is useful even if we don't introduce a bias feature because
-// it allows us to aggregate stats per partition which in turn allows us
-// to compute node stats for each root to split.
-class BiasFeatureColumnHandler : public FeatureColumnHandler {
- public:
-  BiasFeatureColumnHandler(const uint32 class_id, const uint32 slot_id,
-                           const int64 batch_size)
-      : FeatureColumnHandler(class_id, slot_id, batch_size) {}
-
-  void AggregateGradientStats(
-      const std::vector<int32>& example_partition_ids,
-      const Tensor& example_first_order_gradients,
-      const Tensor& example_second_order_gradients,
-      FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>*
-          gradient_stats_accumulator) const override;
-
-  void GenerateFeatureSplitCandidates(
-      const LearnerConfig& learner_config, const std::vector<int32>& roots,
-      const std::vector<NodeStats>& root_stats,
-      const FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>&
-          gradient_stats_accumulator,
-      std::vector<FeatureSplitCandidate>* split_candidates) const override;
-
-  static constexpr auto kBiasFeatureId = 0;
-};
-
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
-
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_H_  // NOLINT
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc
deleted file mode 100644
index f4c7df7fab..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.h"
-
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-namespace {
-
-using boosted_trees::learner::LearnerConfig;
-
-const auto kClassId = 7;
-const auto kSlotId = 0;
-const auto kBatchSize = 4;
-
-using FeatureStatsAccumulator =
-    FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>;
-
-class BiasFeatureColumnHandlerTest : public ::testing::Test {
- protected:
-  BiasFeatureColumnHandlerTest()
-      : example_first_order_gradients_(
-            test::AsTensor<float>({0.2f, -0.5f, 1.2f, 4.0f}, {4})),
-        example_second_order_gradients_(
-            test::AsTensor<float>({0.12f, 0.07f, 0.2f, 0.13f}, {4})),
-        example_partitions_({0, 0, 1, 3}) {
-    // Set L2 regularization.
-    learner_config_.mutable_regularization()->set_l2(2.0f);
-    learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS);
-
-    // Create handler.
-    handler_.reset(new BiasFeatureColumnHandler(kClassId, kSlotId, kBatchSize));
-  }
-
-  LearnerConfig learner_config_;
-  const Tensor example_first_order_gradients_;
-  const Tensor example_second_order_gradients_;
-  const std::vector<int32> example_partitions_;
-  std::unique_ptr<BiasFeatureColumnHandler> handler_;
-};
-
-TEST_F(BiasFeatureColumnHandlerTest, AggregateGradientStats) {
-  // Create handler.
-  FeatureStatsAccumulator accumulator(1);
-  handler_->AggregateGradientStats(
-      example_partitions_, example_first_order_gradients_,
-      example_second_order_gradients_, &accumulator);
-
-  // Check stats for each partition.
-  // Partition 0.
-  EXPECT_GRADIENT_STATS_EQ(
-      GradientStats(-0.3f, 0.19f),
-      accumulator.GetStats(kSlotId, kClassId, 0,
-                           BiasFeatureColumnHandler::kBiasFeatureId));
-  // Partition 1.
-  EXPECT_GRADIENT_STATS_EQ(
-      GradientStats(1.2f, 0.2f),
-      accumulator.GetStats(kSlotId, kClassId, 1,
-                           BiasFeatureColumnHandler::kBiasFeatureId));
-  // Partition 2.
-  EXPECT_GRADIENT_STATS_EQ(
-      GradientStats(0.0f, 0.0f),
-      accumulator.GetStats(kSlotId, kClassId, 2,
-                           BiasFeatureColumnHandler::kBiasFeatureId));
-  // Partition 3.
-  EXPECT_GRADIENT_STATS_EQ(
-      GradientStats(4.0f, 0.13f),
-      accumulator.GetStats(kSlotId, kClassId, 3,
-                           BiasFeatureColumnHandler::kBiasFeatureId));
-}
-
-TEST_F(BiasFeatureColumnHandlerTest, GenerateFeatureSplitCandidates) {
-  // Create handler.
-  FeatureStatsAccumulator accumulator(1);
-  handler_->AggregateGradientStats(
-      example_partitions_, example_first_order_gradients_,
-      example_second_order_gradients_, &accumulator);
-
-  // Get feature split candidates for two roots 0 and 3.
-  // Root 0 has zero gain and root 3 has the same gain as the leaf.
-  const std::vector<int32> roots = {0, 3};
-  const std::vector<NodeStats>& root_stats = {
-      NodeStats(1), NodeStats(learner_config_, GradientStats(4.0f, 0.13f))};
-  std::vector<FeatureSplitCandidate> split_candidates;
-  handler_->GenerateFeatureSplitCandidates(learner_config_, roots, root_stats,
-                                           accumulator, &split_candidates);
-  // Expect two candidate splits (one per root).
-  EXPECT_EQ(2, split_candidates.size());
-
-  // Verify first candidate for root 0, gain is expected to be the same as
-  // the left child since the root node gain is zero.
-  const SplitStats expected_split_stats0(learner_config_, root_stats[0]);
-  EXPECT_SPLIT_STATS_EQ(expected_split_stats0, split_candidates[0].split_stats);
-  const auto& tree_node0 = split_candidates[0].tree_node;
-  EXPECT_EQ(boosted_trees::trees::TreeNode::kLeaf, tree_node0.node_case());
-  EXPECT_EQ(1, tree_node0.leaf().sparse_vector().index_size());
-  EXPECT_EQ(kClassId, tree_node0.leaf().sparse_vector().index(0));
-  EXPECT_EQ(1, tree_node0.leaf().sparse_vector().value_size());
-  EXPECT_EQ(root_stats[0].weight_contribution[0],
-            tree_node0.leaf().sparse_vector().value(0));
-
-  // Verify second candidate for root 3, gain is expected to be zero as
-  // the left child gain is equal to the parent gain.
-  const SplitStats expected_split_stats1(learner_config_, root_stats[1]);
-  EXPECT_SPLIT_STATS_EQ(expected_split_stats1, split_candidates[1].split_stats);
-  const auto& tree_node1 = split_candidates[1].tree_node;
-  EXPECT_EQ(boosted_trees::trees::TreeNode::kLeaf, tree_node1.node_case());
-  EXPECT_EQ(1, tree_node1.leaf().sparse_vector().index_size());
-  EXPECT_EQ(kClassId, tree_node1.leaf().sparse_vector().index(0));
-  EXPECT_EQ(1, tree_node1.leaf().sparse_vector().value_size());
-  EXPECT_EQ(root_stats[1].weight_contribution[0],
-            tree_node1.leaf().sparse_vector().value(0));
-}
-
-}  // namespace
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.cc
deleted file mode 100644
index 3a6c409f84..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.cc
+++ /dev/null
@@ -1,140 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h"
-
-#include "tensorflow/core/platform/macros.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-
-namespace {
-
-// Creates a categorical Id split node without assigning children.
-boosted_trees::trees::TreeNode CreateCategoricalIdNode(
-    const int32 feature_column, const int32 id) {
-  boosted_trees::trees::TreeNode split_node;
-  auto* split = split_node.mutable_categorical_id_binary_split();
-  split->set_feature_column(feature_column);
-  split->set_feature_id(id);
-  return split_node;
-}
-
-}  // namespace
-
-void CategoricalFeatureColumnHandler::AggregateGradientStats(
-    const std::vector<int32>& example_partition_ids,
-    const Tensor& example_first_order_gradients,
-    const Tensor& example_second_order_gradients,
-    FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>*
-        gradient_stats_accumulator) const {
-  // Pass over all rows and aggregate gradient stats for each feature id.
-  const int64 num_rows = indices_.dimension(0);
-  for (int64 row_idx = 0; row_idx < num_rows; ++row_idx) {
-    auto example_idx = indices_(row_idx, 0);
-    auto feature_id = values_(row_idx);
-    const GradientStats norm_gradient_stats(example_first_order_gradients,
-                                            example_second_order_gradients,
-                                            example_idx);
-    auto partition_id = example_partition_ids[example_idx];
-    gradient_stats_accumulator->AddStats(slot_id_, class_id_, partition_id,
-                                         feature_id, norm_gradient_stats);
-  }
-}
-
-void CategoricalFeatureColumnHandler::GenerateFeatureSplitCandidates(
-    const LearnerConfig& learner_config, const std::vector<int32>& roots,
-    const std::vector<NodeStats>& root_stats,
-    const FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>&
-        gradient_stats_accumulator,
-    std::vector<FeatureSplitCandidate>* split_candidates) const {
-  // Build a reverse lookup of partition id to root idx.
-  std::unordered_map<int32, size_t> partition_id_to_root_idx;
-  partition_id_to_root_idx.reserve(roots.size());
-  for (size_t root_idx = 0; root_idx < roots.size(); ++root_idx) {
-    partition_id_to_root_idx[roots[root_idx]] = root_idx;
-  }
-
-  // Initialize split candidates.
-  split_candidates->clear();
-  if (!roots.empty()) {
-    FeatureSplitCandidate empty_candidate(
-        root_stats[0].weight_contribution.size());
-    split_candidates->resize(roots.size(), empty_candidate);
-  }
-  for (auto& split_candidate : *split_candidates) {
-    split_candidate.split_stats.gain = std::numeric_limits<float>::lowest();
-  }
-
-  // Evaluate split candidates for every root as each is a separate
-  // logical partition over the examples.
-  // Then for each root, we evaluate every feature id as an equality split
-  // and pick the highest split gain.
-  for (const auto& entry :
-       gradient_stats_accumulator.GetFeatureStats(slot_id_)) {
-    DCHECK_EQ(entry.first.class_id, class_id_);
-
-    // Get partition id and root node stats.
-    const int32 partition_id = entry.first.partition_id;
-    auto root_idx_it = partition_id_to_root_idx.find(partition_id);
-    if (root_idx_it == partition_id_to_root_idx.end()) {
-      // Inactive partition.
-      continue;
-    }
-    size_t root_idx = root_idx_it->second;
-    const NodeStats& root_node_stats = root_stats[root_idx];
-
-    // Get gradient stats.
-    const auto& left_gradient_stats = entry.second;
-    auto right_gradient_stats =
-        root_node_stats.gradient_stats - left_gradient_stats;
-
-    // Get node stats.
-    NodeStats left_node_stats(learner_config, left_gradient_stats);
-    NodeStats right_node_stats(learner_config, right_gradient_stats);
-
-    // Generate split candidate and update best split candidate for the
-    // current root if needed.
-    FeatureSplitCandidate split_candidate(
-        slot_id_,
-        CreateCategoricalIdNode(feature_column_, entry.first.feature_id),
-        SplitStats(learner_config, root_node_stats, left_node_stats,
-                   right_node_stats));
-    FeatureSplitCandidate& best_split_candidate = (*split_candidates)[root_idx];
-    if (TF_PREDICT_FALSE(best_split_candidate.tree_node.node_case() ==
-                         boosted_trees::trees::TreeNode::NODE_NOT_SET)) {
-      // Always replace candidates with no node set.
-      best_split_candidate = std::move(split_candidate);
-    } else if (TF_PREDICT_FALSE(split_candidate.split_stats.gain ==
-                                best_split_candidate.split_stats.gain)) {
-      // Tie break on feature id.
-      auto best_split_feature_id =
-          best_split_candidate.tree_node.categorical_id_binary_split()
-              .feature_id();
-      if (entry.first.feature_id < best_split_feature_id) {
-        best_split_candidate = std::move(split_candidate);
-      }
-    } else if (split_candidate.split_stats.gain >
-               best_split_candidate.split_stats.gain) {
-      best_split_candidate = std::move(split_candidate);
-    }
-  }
-}
-
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h
deleted file mode 100644
index ef964ba716..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_CATEGORICAL_FEATURE_COLUMN_HANDLER_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_CATEGORICAL_FEATURE_COLUMN_HANDLER_H_
-
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-
-// Handler for a categorical feature column in the single class case.
-class CategoricalFeatureColumnHandler : public FeatureColumnHandler {
- public:
-  CategoricalFeatureColumnHandler(const int32 class_id, const int32 slot_id,
-                                  const int64 batch_size,
-                                  const int32 feature_column,
-                                  TTypes<int64>::ConstMatrix indices,
-                                  TTypes<int64>::ConstVec values)
-      : FeatureColumnHandler(class_id, slot_id, batch_size),
-        feature_column_(feature_column),
-        indices_(indices),
-        values_(values) {}
-
-  void AggregateGradientStats(
-      const std::vector<int32>& example_partition_ids,
-      const Tensor& example_first_order_gradients,
-      const Tensor& example_second_order_gradients,
-      FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>*
-          gradient_stats_accumulator) const override;
-
-  void GenerateFeatureSplitCandidates(
-      const LearnerConfig& learner_config, const std::vector<int32>& roots,
-      const std::vector<NodeStats>& root_stats,
-      const FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>&
-          gradient_stats_accumulator,
-      std::vector<FeatureSplitCandidate>* split_candidates) const override;
-
- protected:
-  const int32 feature_column_;
-  TTypes<int64>::ConstMatrix indices_;
-  TTypes<int64>::ConstVec values_;
-};
-
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
-
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_CATEGORICAL_FEATURE_COLUMN_HANDLER_H_
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc
deleted file mode 100644
index ea82b3f086..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc
+++ /dev/null
@@ -1,165 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.h"
-
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-namespace {
-
-using boosted_trees::learner::LearnerConfig;
-
-const auto kClassId = 7;
-const auto kSlotId = 0;
-const auto kBatchSize = 4;
-const auto kFeatureColumn = 3;
-
-using FeatureStatsAccumulator =
-    FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>;
-
-class CategoricalFeatureColumnHandlerTest : public ::testing::Test {
- protected:
-  // The data looks like the following:
-  // Example |  Gradients    | Partition | Feature Id |
-  // i0      |  (0.2, 0.12)  |     0     |    1,2     |
-  // i1      |  (-0.5, 0.07) |     0     |            |
-  // i2      |  (1.2, 0.2)   |     0     |     2      |
-  // i3      |  (4.0, 0.13)  |     1     |     0      |
-  CategoricalFeatureColumnHandlerTest()
-      : example_first_order_gradients_(
-            test::AsTensor<float>({0.2f, -0.5f, 1.2f, 4.0f}, {4})),
-        example_second_order_gradients_(
-            test::AsTensor<float>({0.12f, 0.07f, 0.2f, 0.13f}, {4})),
-        example_partitions_({0, 0, 0, 1}),
-        indices_(test::AsTensor<int64>({0, 0, 0, 1, 2, 0, 3, 0}, {4, 2})),
-        values_(test::AsTensor<int64>({1, 2, 2, 0}, {4})) {
-    // Set L2 regularization.
-    learner_config_.mutable_regularization()->set_l2(2.0f);
-    learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS);
-    // Create handler.
-    handler_.reset(new CategoricalFeatureColumnHandler(
-        kClassId, kSlotId, kBatchSize, kFeatureColumn, indices_.matrix<int64>(),
-        values_.vec<int64>()));
-  }
-
-  LearnerConfig learner_config_;
-  const Tensor example_first_order_gradients_;
-  const Tensor example_second_order_gradients_;
-  const std::vector<int32> example_partitions_;
-  const Tensor indices_;
-  const Tensor values_;
-  std::unique_ptr<FeatureColumnHandler> handler_;
-};
-
-TEST_F(CategoricalFeatureColumnHandlerTest, AggregateGradientStats) {
-  // Create handler.
-  FeatureStatsAccumulator accumulator(1);
-  handler_->AggregateGradientStats(
-      example_partitions_, example_first_order_gradients_,
-      example_second_order_gradients_, &accumulator);
-
-  // Check stats for each partition and feature.
-  // Partition 0, Feature 0.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(0.0f, 0.0f),
-                           accumulator.GetStats(kSlotId, kClassId, 0, 0));
-  // Partition 0, Feature 1.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(0.2f, 0.12f),
-                           accumulator.GetStats(kSlotId, kClassId, 0, 1));
-  // Partition 0, Feature 2.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(0.2f + 1.2f, 0.12f + 0.2f),
-                           accumulator.GetStats(kSlotId, kClassId, 0, 2));
-
-  // Partition 1, Feature 0.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(4.0f, 0.13f),
-                           accumulator.GetStats(kSlotId, kClassId, 1, 0));
-  // Partition 1, Feature 1.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(0.0f, 0.0f),
-                           accumulator.GetStats(kSlotId, kClassId, 1, 1));
-  // Partition 1, Feature 2.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(0.0f, 0.0f),
-                           accumulator.GetStats(kSlotId, kClassId, 1, 2));
-}
-
-TEST_F(CategoricalFeatureColumnHandlerTest, GenerateFeatureSplitCandidates) {
-  // Create handler.
-  FeatureStatsAccumulator accumulator(1);
-  handler_->AggregateGradientStats(
-      example_partitions_, example_first_order_gradients_,
-      example_second_order_gradients_, &accumulator);
-
-  // Get feature split candidates for two roots 0 and 1.
-  // The root stats are derived from the per-partition total gradient stats.
-  const std::vector<int32> roots = {0, 1, 5};
-  const std::vector<NodeStats>& root_stats = {
-      NodeStats(learner_config_, GradientStats(0.9f, 0.39f)),
-      NodeStats(learner_config_, GradientStats(4.0f, 0.13f)), NodeStats(1)};
-  std::vector<FeatureSplitCandidate> split_candidates;
-  handler_->GenerateFeatureSplitCandidates(learner_config_, roots, root_stats,
-                                           accumulator, &split_candidates);
-  // Expect three candidate splits (one per root).
-  EXPECT_EQ(3, split_candidates.size());
-
-  // Verify candidate for root 0, the best split occurs when we route
-  // example i0, i2 left and i1 right.
-  const NodeStats expected_left_node0(learner_config_,
-                                      GradientStats(0.2f + 1.2f, 0.12f + 0.2f));
-  const NodeStats expected_right_node0(
-      learner_config_,
-      root_stats[0].gradient_stats - expected_left_node0.gradient_stats);
-  const SplitStats expected_split_stats0(learner_config_, root_stats[0],
-                                         expected_left_node0,
-                                         expected_right_node0);
-  EXPECT_SPLIT_STATS_EQ(expected_split_stats0, split_candidates[0].split_stats);
-
-  const auto& tree_node0 = split_candidates[0].tree_node;
-  EXPECT_EQ(
-      boosted_trees::trees::TreeNode::kCategoricalIdBinarySplitFieldNumber,
-      tree_node0.node_case());
-  const auto& split0 = tree_node0.categorical_id_binary_split();
-  EXPECT_EQ(2, split0.feature_id());
-  EXPECT_EQ(kFeatureColumn, split0.feature_column());
-
-  // Verify candidate for root 1, there's only one active feature here
-  // so zero gain is expected.
-  const NodeStats expected_left_node1(learner_config_,
-                                      root_stats[1].gradient_stats);
-  const NodeStats expected_right_node1(learner_config_, GradientStats(0, 0));
-  const SplitStats expected_split_stats1(learner_config_, root_stats[1],
-                                         expected_left_node1,
-                                         expected_right_node1);
-  EXPECT_SPLIT_STATS_EQ(expected_split_stats1, split_candidates[1].split_stats);
-  const auto& tree_node1 = split_candidates[1].tree_node;
-  EXPECT_EQ(
-      boosted_trees::trees::TreeNode::kCategoricalIdBinarySplitFieldNumber,
-      tree_node1.node_case());
-  const auto& split1 = tree_node1.categorical_id_binary_split();
-  EXPECT_EQ(0, split1.feature_id());
-  EXPECT_EQ(kFeatureColumn, split1.feature_column());
-
-  // Verify there are no candidate splits for root 5.
-  const auto& tree_node2 = split_candidates[2].tree_node;
-  EXPECT_EQ(boosted_trees::trees::TreeNode::NODE_NOT_SET,
-            tree_node2.node_case());
-}
-
-}  // namespace
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.cc
deleted file mode 100644
index ca7bb71e7d..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-
-namespace {
-
-// Creates a dense split node without assigning children.
-boosted_trees::trees::TreeNode CreateDenseSplitNode(const int32 feature_column,
-                                                    const float threshold) {
-  boosted_trees::trees::TreeNode split_node;
-  auto* split = split_node.mutable_dense_float_binary_split();
-  split->set_feature_column(feature_column);
-  split->set_threshold(threshold);
-  return split_node;
-}
-
-}  // namespace
-
-void DenseQuantizedFeatureColumnHandler::AggregateGradientStats(
-    const std::vector<int32>& example_partition_ids,
-    const Tensor& example_first_order_gradients,
-    const Tensor& example_second_order_gradients,
-    FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>*
-        gradient_stats_accumulator) const {
-  // Pass over all examples and aggregate gradient stats for each partition
-  // and quantized feature bucket.
-  for (int64 example_idx = 0; example_idx < batch_size_; ++example_idx) {
-    auto partition_id = example_partition_ids[example_idx];
-    auto feature_id = dense_quantized_values_(example_idx);
-    gradient_stats_accumulator->AddStats(
-        slot_id_, class_id_, partition_id, feature_id,
-        GradientStats(example_first_order_gradients,
-                      example_second_order_gradients, example_idx));
-  }
-}
-
-void DenseQuantizedFeatureColumnHandler::GenerateFeatureSplitCandidates(
-    const LearnerConfig& learner_config, const std::vector<int32>& roots,
-    const std::vector<NodeStats>& root_stats,
-    const FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>&
-        gradient_stats_accumulator,
-    std::vector<FeatureSplitCandidate>* split_candidates) const {
-  // Evaluate split candidates for every root as each is a separate
-  // logical partition over the examples.
-  // Then for each root, we do a forward-only pass over the quantized
-  // feature buckets accumulating gradients from left to right.
-  // Split gains are evaluated at every threshold and the best split is picked.
-  split_candidates->clear();
-  split_candidates->reserve(roots.size());
-  for (size_t root_idx = 0; root_idx < roots.size(); ++root_idx) {
-    // Get partition Id and root node stats.
-    const int32 partition_id = roots[root_idx];
-    const NodeStats& root_node_stats = root_stats[root_idx];
-
-    // Forward left to right pass over quantiles.
-    GradientStats left_gradient_stats;
-    GradientStats right_gradient_stats(root_node_stats.gradient_stats);
-    FeatureSplitCandidate best_split_candidate(
-        root_node_stats.weight_contribution.size());
-    best_split_candidate.split_stats.gain =
-        std::numeric_limits<float>::lowest();
-    for (int bucket_id = 0; bucket_id < dense_quantiles_.size(); ++bucket_id) {
-      // Get gradient stats.
-      auto gradient_stats = gradient_stats_accumulator.GetStats(
-          slot_id_, class_id_, partition_id, bucket_id);
-      if (gradient_stats.IsZero()) {
-        continue;
-      }
-
-      // Update gradient stats.
-      left_gradient_stats += gradient_stats;
-      right_gradient_stats =
-          root_node_stats.gradient_stats - left_gradient_stats;
-
-      // Get node stats
-      NodeStats left_node_stats(learner_config, left_gradient_stats);
-      NodeStats right_node_stats(learner_config, right_gradient_stats);
-
-      // Generate split candidate.
-      const float threshold = dense_quantiles_(bucket_id);
-      FeatureSplitCandidate split_candidate(
-          slot_id_, CreateDenseSplitNode(dense_feature_column_, threshold),
-          SplitStats(learner_config, root_node_stats, left_node_stats,
-                     right_node_stats));
-      if (split_candidate.split_stats.gain >
-          best_split_candidate.split_stats.gain) {
-        best_split_candidate = std::move(split_candidate);
-      }
-    }
-
-    // Add best candidate for partition.
-    split_candidates->push_back(std::move(best_split_candidate));
-  }
-}
-
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h
deleted file mode 100644
index 0f3858e4d8..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_DENSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_DENSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_
-
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-
-// Handler for a dense quantized feature column in the single class case.
-class DenseQuantizedFeatureColumnHandler : public FeatureColumnHandler {
- public:
-  DenseQuantizedFeatureColumnHandler(
-      const int32 class_id, const int32 slot_id, const int64 batch_size,
-      const int32 dense_feature_column, TTypes<float>::ConstVec dense_quantiles,
-      TTypes<int32>::ConstVec dense_quantized_values)
-      : FeatureColumnHandler(class_id, slot_id, batch_size),
-        dense_feature_column_(dense_feature_column),
-        dense_quantiles_(dense_quantiles),
-        dense_quantized_values_(dense_quantized_values) {}
-
-  void AggregateGradientStats(
-      const std::vector<int32>& example_partition_ids,
-      const Tensor& example_first_order_gradients,
-      const Tensor& example_second_order_gradients,
-      FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>*
-          gradient_stats_accumulator) const override;
-
-  void GenerateFeatureSplitCandidates(
-      const LearnerConfig& learner_config, const std::vector<int32>& roots,
-      const std::vector<NodeStats>& root_stats,
-      const FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>&
-          gradient_stats_accumulator,
-      std::vector<FeatureSplitCandidate>* split_candidates) const override;
-
- protected:
-  const int32 dense_feature_column_;
-  TTypes<float>::ConstVec dense_quantiles_;
-  TTypes<int32>::ConstVec dense_quantized_values_;
-};
-
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
-
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_DENSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc
deleted file mode 100644
index 1bc9d733ad..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.h"
-
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-namespace {
-
-using boosted_trees::learner::LearnerConfig;
-
-const auto kClassId = 1;
-const auto kSlotId = 0;
-const auto kBatchSize = 4;
-const auto kFeatureColumn = 2;
-
-using FeatureStatsAccumulator =
-    FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>;
-
-class DenseQuantizedFeatureColumnHandlerTest : public ::testing::Test {
- protected:
-  // The data looks like the following:
-  // Example |  Gradients    | Partition | Dense Quantile |
-  // i0      |  (0.2, 0.12)  | 0         | 1              |
-  // i1      |  (-0.5, 0.07) | 0         | 1              |
-  // i2      |  (1.2, 0.2)   | 0         | 0              |
-  // i3      |  (4.0, 0.13)  | 1         | 1              |
-  DenseQuantizedFeatureColumnHandlerTest()
-      : example_first_order_gradients_(
-            test::AsTensor<float>({0.2f, -0.5f, 1.2f, 4.0f}, {4})),
-        example_second_order_gradients_(
-            test::AsTensor<float>({0.12f, 0.07f, 0.2f, 0.13f}, {4})),
-        example_partitions_({0, 0, 0, 1}),
-        dense_quantiles_(test::AsTensor<float>({0.3f, 0.52f}, {2})),
-        dense_quantized_values_(test::AsTensor<int32>({1, 1, 0, 1}, {4})) {
-    // Set L2 regularization.
-    learner_config_.mutable_regularization()->set_l2(2.0f);
-    learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS);
-    // Create handler.
-    handler_.reset(new DenseQuantizedFeatureColumnHandler(
-        kClassId, kSlotId, kBatchSize, kFeatureColumn,
-        dense_quantiles_.vec<float>(), dense_quantized_values_.vec<int32>()));
-  }
-
-  LearnerConfig learner_config_;
-  const Tensor example_first_order_gradients_;
-  const Tensor example_second_order_gradients_;
-  const std::vector<int32> example_partitions_;
-  const Tensor dense_quantiles_;
-  const Tensor dense_quantized_values_;
-  std::unique_ptr<FeatureColumnHandler> handler_;
-};
-
-TEST_F(DenseQuantizedFeatureColumnHandlerTest, AggregateGradientStats) {
-  // Create handler.
-  FeatureStatsAccumulator accumulator(1);
-  handler_->AggregateGradientStats(
-      example_partitions_, example_first_order_gradients_,
-      example_second_order_gradients_, &accumulator);
-
-  // Check stats for each partition and feature.
-  // Partition 0, Feature 0.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(1.2f, 0.2f),
-                           accumulator.GetStats(kSlotId, kClassId, 0, 0));
-  // Partition 0, Feature 1.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(-0.3f, 0.19f),
-                           accumulator.GetStats(kSlotId, kClassId, 0, 1));
-  // Partition 1, Feature 0.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(0.0f, 0.0f),
-                           accumulator.GetStats(kSlotId, kClassId, 1, 0));
-  // Partition 1, Feature 1.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(4.0f, 0.13f),
-                           accumulator.GetStats(kSlotId, kClassId, 1, 1));
-}
-
-TEST_F(DenseQuantizedFeatureColumnHandlerTest, GenerateFeatureSplitCandidates) {
-  // Create handler.
-  FeatureStatsAccumulator accumulator(1);
-  handler_->AggregateGradientStats(
-      example_partitions_, example_first_order_gradients_,
-      example_second_order_gradients_, &accumulator);
-
-  // Get feature split candidates for two roots 0 and 1.
-  // The root stats are derived from the per-partition total gradient stats.
-  const std::vector<int32> roots = {0, 1, 5};
-  const std::vector<NodeStats>& root_stats = {
-      NodeStats(learner_config_, GradientStats(0.9f, 0.39f)),
-      NodeStats(learner_config_, GradientStats(4.0f, 0.13f)), NodeStats(1)};
-  std::vector<FeatureSplitCandidate> split_candidates;
-  handler_->GenerateFeatureSplitCandidates(learner_config_, roots, root_stats,
-                                           accumulator, &split_candidates);
-  // Expect three candidate splits (one per root).
-  EXPECT_EQ(3, split_candidates.size());
-
-  // Verify candidate for root 0, the best split occurs when we route
-  // example i2 left and i0, i1 right.
-  const NodeStats expected_left_node0(learner_config_,
-                                      GradientStats(1.2f, 0.2f));
-  const NodeStats expected_right_node0(
-      learner_config_,
-      root_stats[0].gradient_stats - expected_left_node0.gradient_stats);
-  const SplitStats expected_split_stats0(learner_config_, root_stats[0],
-                                         expected_left_node0,
-                                         expected_right_node0);
-  EXPECT_SPLIT_STATS_EQ(expected_split_stats0, split_candidates[0].split_stats);
-  const auto& tree_node0 = split_candidates[0].tree_node;
-  EXPECT_EQ(boosted_trees::trees::TreeNode::kDenseFloatBinarySplit,
-            tree_node0.node_case());
-  const auto& split0 = tree_node0.dense_float_binary_split();
-  EXPECT_FLOAT_EQ(dense_quantiles_.vec<float>()(0), split0.threshold());
-  EXPECT_EQ(kFeatureColumn, split0.feature_column());
-
-  // Verify candidate for root 1, there's only one active bucket here
-  // so zero gain is expected.
-  const NodeStats expected_left_node1(learner_config_,
-                                      root_stats[1].gradient_stats);
-  const NodeStats expected_right_node1(learner_config_, GradientStats(0, 0));
-  const SplitStats expected_split_stats1(learner_config_, root_stats[1],
-                                         expected_left_node1,
-                                         expected_right_node1);
-  EXPECT_SPLIT_STATS_EQ(expected_split_stats1, split_candidates[1].split_stats);
-  const auto& tree_node1 = split_candidates[1].tree_node;
-  EXPECT_EQ(boosted_trees::trees::TreeNode::kDenseFloatBinarySplit,
-            tree_node1.node_case());
-  const auto& split1 = tree_node1.dense_float_binary_split();
-  EXPECT_FLOAT_EQ(dense_quantiles_.vec<float>()(1), split1.threshold());
-  EXPECT_EQ(kFeatureColumn, split1.feature_column());
-
-  // Verify there are no candidate splits for root 5.
-  const auto& tree_node2 = split_candidates[2].tree_node;
-  EXPECT_EQ(boosted_trees::trees::TreeNode::NODE_NOT_SET,
-            tree_node2.node_case());
-}
-
-}  // namespace
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h
deleted file mode 100644
index 8bd2092f96..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_FEATURE_COLUMN_HANDLER_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_FEATURE_COLUMN_HANDLER_H_
-
-#include <vector>
-#include "tensorflow/contrib/boosted_trees/lib/learner/common/accumulators/feature-stats-accumulator.h"
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/feature-split-candidate.h"
-#include "tensorflow/contrib/boosted_trees/proto/learner.pb.h"
-#include "tensorflow/core/framework/attr_value.pb.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_types.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-
-// Handler interface for feature columns. Each feature column type may
-// have its own handler which encapsulates the logic of aggregating gradient
-// stats as well as generating split candidates for each partition.
-// Handlers can be stateful and must be thread compatible.
-class FeatureColumnHandler {
- public:
-  FeatureColumnHandler(const int32 class_id, const int32 slot_id,
-                       const int64 batch_size)
-      : class_id_(class_id), slot_id_(slot_id), batch_size_(batch_size) {}
-
-  virtual ~FeatureColumnHandler() {}
-  FeatureColumnHandler(const FeatureColumnHandler& other) = delete;
-  FeatureColumnHandler& operator=(const FeatureColumnHandler& other) = delete;
-
-  // Aggregates example gradient stats for the feature column.
-  virtual void AggregateGradientStats(
-      const std::vector<int32>& example_partition_ids,
-      const Tensor& example_first_order_gradients,
-      const Tensor& example_second_order_gradients,
-      FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>*
-          gradient_stats_accumulator) const = 0;
-
-  // Generates feature column split candidates for the specified roots.
-  virtual void GenerateFeatureSplitCandidates(
-      const LearnerConfig& learner_config, const std::vector<int32>& roots,
-      const std::vector<NodeStats>& root_stats,
-      const FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>&
-          gradient_stats_accumulator,
-      std::vector<FeatureSplitCandidate>* split_candidates) const = 0;
-
-  // Accessors.
-  int32 class_id() const { return class_id_; }
-  int32 slot_id() const { return slot_id_; }
-  int64 batch_size() const { return batch_size_; }
-
- protected:
-  // The class Id.
-  const int32 class_id_;
-
-  // The slod Id for use as a unique Id across all feature columns.
-  const int32 slot_id_;
-
-  // Size of the batch of examples.
-  const int64 batch_size_;
-};
-
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
-
-#endif  //  THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_FEATURE_COLUMN_HANDLER_H_
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc
deleted file mode 100644
index a0e9efbbc5..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-
-namespace {
-
-// Creates a sparse default right split node without assigning children.
-boosted_trees::trees::TreeNode CreateSparseSplitNodeDefaultRight(
-    int32 feature_column, float threshold) {
-  boosted_trees::trees::TreeNode split_node;
-  auto* split = split_node.mutable_sparse_float_binary_split_default_right()
-                    ->mutable_split();
-  split->set_feature_column(feature_column);
-  split->set_threshold(threshold);
-  return split_node;
-}
-
-// Creates a sparse default left split node without assigning children.
-boosted_trees::trees::TreeNode CreateSparseSplitNodeDefaultLeft(
-    int32 feature_column, float threshold) {
-  boosted_trees::trees::TreeNode split_node;
-  auto* split = split_node.mutable_sparse_float_binary_split_default_left()
-                    ->mutable_split();
-  split->set_feature_column(feature_column);
-  split->set_threshold(threshold);
-  return split_node;
-}
-
-}  // namespace
-
-void SparseQuantizedFeatureColumnHandler::AggregateGradientStats(
-    const std::vector<int32>& example_partition_ids,
-    const Tensor& example_first_order_gradients,
-    const Tensor& example_second_order_gradients,
-    FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>*
-        gradient_stats_accumulator) const {
-  // Pass over all rows and aggregate gradient stats for each partition
-  // and quantized feature bucket.
-  const int64 num_rows = sparse_indices_.dimension(0);
-  for (int64 row_idx = 0; row_idx < num_rows; ++row_idx) {
-    auto example_idx = sparse_indices_(row_idx, 0);
-    auto partition_id = example_partition_ids[example_idx];
-    auto feature_id = sparse_quantized_values_(row_idx);
-    gradient_stats_accumulator->AddStats(
-        slot_id_, class_id_, partition_id, feature_id,
-        GradientStats(example_first_order_gradients,
-                      example_second_order_gradients, example_idx));
-  }
-}
-
-void SparseQuantizedFeatureColumnHandler::GenerateFeatureSplitCandidates(
-    const LearnerConfig& learner_config, const std::vector<int32>& roots,
-    const std::vector<NodeStats>& root_stats,
-    const FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>&
-        gradient_stats_accumulator,
-    std::vector<FeatureSplitCandidate>* split_candidates) const {
-  // Evaluate split candidates for every root as each is a separate
-  // logical partition over the examples.
-  // Then for each root, we do both a forward left to right pass and a backward
-  // right to left pass over the quantized feature buckets accumulating
-  // gradients on one side and using the root aggregate gradients to get the
-  // gradients for the other side. Split gains are evaluated for each pass at
-  // every threshold and the best split is picked.
-  split_candidates->clear();
-  split_candidates->reserve(roots.size());
-  for (size_t root_idx = 0; root_idx < roots.size(); ++root_idx) {
-    // Get partition Id and root node stats.
-    const int32 partition_id = roots[root_idx];
-    const NodeStats& root_node_stats = root_stats[root_idx];
-
-    // Forward pass with right default direction.
-    GradientStats left_gradient_stats;
-    GradientStats right_gradient_stats(root_node_stats.gradient_stats);
-    FeatureSplitCandidate best_split_candidate(
-        root_node_stats.weight_contribution.size());
-    best_split_candidate.split_stats.gain =
-        std::numeric_limits<float>::lowest();
-    for (int bucket_id = 0; bucket_id < sparse_quantiles_.size(); ++bucket_id) {
-      // Get gradient stats.
-      auto gradient_stats = gradient_stats_accumulator.GetStats(
-          slot_id_, class_id_, partition_id, bucket_id);
-      if (gradient_stats.IsZero()) {
-        continue;
-      }
-
-      // Update gradient stats.
-      left_gradient_stats += gradient_stats;
-      right_gradient_stats =
-          root_node_stats.gradient_stats - left_gradient_stats;
-
-      // Get node stats
-      NodeStats left_node_stats(learner_config, left_gradient_stats);
-      NodeStats right_node_stats(learner_config, right_gradient_stats);
-
-      // Generate split candidate.
-      const float threshold = sparse_quantiles_(bucket_id);
-      FeatureSplitCandidate split_candidate(
-          slot_id_,
-          CreateSparseSplitNodeDefaultRight(sparse_feature_column_, threshold),
-          SplitStats(learner_config, root_node_stats, left_node_stats,
-                     right_node_stats));
-      if (split_candidate.split_stats.gain >
-          best_split_candidate.split_stats.gain) {
-        best_split_candidate = std::move(split_candidate);
-      }
-    }
-
-    // Determine if we need a backward pass by checking if the residual gradient
-    // after forward aggregation is almost the same as the aggregated gradient.
-    // for the current root. This helps avoid unnecessary computation as well
-    // as consistency due to floating point precision.
-    if (!right_gradient_stats.IsAlmostZero()) {
-      // Backward pass with left default direction.
-      right_gradient_stats = GradientStats();
-      left_gradient_stats = root_node_stats.gradient_stats;
-      for (int bucket_id = sparse_quantiles_.size() - 1; bucket_id > 0;
-           --bucket_id) {
-        // Get gradient stats.
-        auto gradient_stats = gradient_stats_accumulator.GetStats(
-            slot_id_, class_id_, partition_id, bucket_id);
-        if (gradient_stats.IsZero()) {
-          continue;
-        }
-
-        // Update gradient stats.
-        right_gradient_stats += gradient_stats;
-        left_gradient_stats = root_node_stats.gradient_stats - gradient_stats;
-
-        // Get node stats
-        NodeStats left_node_stats(learner_config, left_gradient_stats);
-        NodeStats right_node_stats(learner_config, right_gradient_stats);
-
-        // Generate split candidate.
-        const float threshold = sparse_quantiles_(bucket_id - 1);
-        FeatureSplitCandidate split_candidate(
-            slot_id_,
-            CreateSparseSplitNodeDefaultLeft(sparse_feature_column_, threshold),
-            SplitStats(learner_config, root_node_stats, left_node_stats,
-                       right_node_stats));
-        if (split_candidate.split_stats.gain >
-            best_split_candidate.split_stats.gain) {
-          best_split_candidate = std::move(split_candidate);
-        }
-      }
-    }
-
-    // Add best candidate for partition.
-    split_candidates->push_back(std::move(best_split_candidate));
-  }
-}
-
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h
deleted file mode 100644
index eb63e70547..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_SPARSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_SPARSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_
-
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/feature-column-handler.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-
-// Handler for a sparse quantized feature column in the single class case.
-class SparseQuantizedFeatureColumnHandler : public FeatureColumnHandler {
- public:
-  SparseQuantizedFeatureColumnHandler(
-      const int32 class_id, const int32 slot_id, const int64 batch_size,
-      const int32 sparse_feature_column,
-      TTypes<float>::ConstVec sparse_quantiles,
-      TTypes<int64>::ConstMatrix sparse_indices,
-      TTypes<int32>::ConstVec sparse_quantized_values)
-      : FeatureColumnHandler(class_id, slot_id, batch_size),
-        sparse_feature_column_(sparse_feature_column),
-        sparse_quantiles_(sparse_quantiles),
-        sparse_indices_(sparse_indices),
-        sparse_quantized_values_(sparse_quantized_values) {}
-
-  void AggregateGradientStats(
-      const std::vector<int32>& example_partition_ids,
-      const Tensor& example_first_order_gradients,
-      const Tensor& example_second_order_gradients,
-      FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>*
-          gradient_stats_accumulator) const override;
-
-  void GenerateFeatureSplitCandidates(
-      const LearnerConfig& learner_config, const std::vector<int32>& roots,
-      const std::vector<NodeStats>& root_stats,
-      const FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>&
-          gradient_stats_accumulator,
-      std::vector<FeatureSplitCandidate>* split_candidates) const override;
-
- protected:
-  const int32 sparse_feature_column_;
-  TTypes<float>::ConstVec sparse_quantiles_;
-  TTypes<int64>::ConstMatrix sparse_indices_;
-  TTypes<int32>::ConstVec sparse_quantized_values_;
-};
-
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
-
-#endif  //  THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_HANDLERS_SPARSE_QUANTIZED_FEATURE_COLUMN_HANDLER_H_
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc
deleted file mode 100644
index 643d936ad2..0000000000
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.h"
-
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace boosted_trees {
-namespace learner {
-namespace stochastic {
-namespace {
-
-using boosted_trees::learner::LearnerConfig;
-
-const auto kClassId = 3;
-const auto kSlotId = 0;
-const auto kBatchSize = 4;
-const auto kFeatureColumn = 4;
-
-using FeatureStatsAccumulator =
-    FeatureStatsAccumulator<GradientStats, GradientStatsAccumulator>;
-
-class SparseQuantizedFeatureColumnHandlerTest : public ::testing::Test {
- protected:
-  // The data looks like the following:
-  // Example |  Gradients    | Partition | Sparse Quantile |
-  // i0      |  (0.2, 0.12)  | 0         | 1               |
-  // i1      |  (-0.5, 0.07) | 0         | N/A             |
-  // i2      |  (1.2, 0.2)   | 0         | 0               |
-  // i3      |  (4.0, 0.13)  | 1         | 1               |
-  SparseQuantizedFeatureColumnHandlerTest()
-      : example_first_order_gradients_(
-            test::AsTensor<float>({0.2f, -0.5f, 1.2f, 4.0f}, {4})),
-        example_second_order_gradients_(
-            test::AsTensor<float>({0.12f, 0.07f, 0.2f, 0.13f}, {4})),
-        example_partitions_({0, 0, 0, 1}),
-        sparse_quantiles_(test::AsTensor<float>({0.3f, 0.52f}, {2})),
-        sparse_indices_(test::AsTensor<int64>({0, 0, 2, 0, 3, 0}, {3, 2})),
-        sparse_quantized_values_(test::AsTensor<int32>({1, 0, 1}, {3})) {
-    // Set L2 regularization.
-    learner_config_.mutable_regularization()->set_l2(2.0f);
-    learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS);
-    // Create handler.
-    handler_.reset(new SparseQuantizedFeatureColumnHandler(
-        kClassId, kSlotId, kBatchSize, kFeatureColumn,
-        sparse_quantiles_.vec<float>(), sparse_indices_.matrix<int64>(),
-        sparse_quantized_values_.vec<int32>()));
-  }
-
-  LearnerConfig learner_config_;
-  const Tensor example_first_order_gradients_;
-  const Tensor example_second_order_gradients_;
-  const std::vector<int32> example_partitions_;
-  const Tensor sparse_quantiles_;
-  const Tensor sparse_indices_;
-  const Tensor sparse_quantized_values_;
-  std::unique_ptr<FeatureColumnHandler> handler_;
-};
-
-TEST_F(SparseQuantizedFeatureColumnHandlerTest, AggregateGradientStats) {
-  // Create handler.
-  FeatureStatsAccumulator accumulator(1);
-  handler_->AggregateGradientStats(
-      example_partitions_, example_first_order_gradients_,
-      example_second_order_gradients_, &accumulator);
-
-  // Check stats for each partition and feature.
-  // Partition 0, Feature 0.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(1.2f, 0.2f),
-                           accumulator.GetStats(kSlotId, kClassId, 0, 0));
-  // Partition 0, Feature 1.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(0.2f, 0.12f),
-                           accumulator.GetStats(kSlotId, kClassId, 0, 1));
-  // Partition 1, Feature 0.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(0.0f, 0.0f),
-                           accumulator.GetStats(kSlotId, kClassId, 1, 0));
-  // Partition 1, Feature 1.
-  EXPECT_GRADIENT_STATS_EQ(GradientStats(4.0f, 0.13f),
-                           accumulator.GetStats(kSlotId, kClassId, 1, 1));
-}
-
-TEST_F(SparseQuantizedFeatureColumnHandlerTest,
-       GenerateFeatureSplitCandidates) {
-  // Create handler.
-  FeatureStatsAccumulator accumulator(1);
-  handler_->AggregateGradientStats(
-      example_partitions_, example_first_order_gradients_,
-      example_second_order_gradients_, &accumulator);
-
-  // Get feature split candidates for two roots 0 and 1.
-  // The root stats are derived from the per-partition total gradient stats.
-  const std::vector<int32> roots = {0, 1, 9};
-  const std::vector<NodeStats>& root_stats = {
-      NodeStats(learner_config_, GradientStats(0.9f, 0.39f)),
-      NodeStats(learner_config_, GradientStats(4.0f, 0.13f)), NodeStats(1)};
-  std::vector<FeatureSplitCandidate> split_candidates;
-  handler_->GenerateFeatureSplitCandidates(learner_config_, roots, root_stats,
-                                           accumulator, &split_candidates);
-  // Expect three candidate splits (one per root).
-  EXPECT_EQ(3, split_candidates.size());
-
-  // Verify candidate for root 0, the best split occurs when we route
-  // example i0 and i2 to the left and i1 to the right (by default direction).
-  const NodeStats expected_left_node0(learner_config_,
-                                      GradientStats(0.2f + 1.2f, 0.12f + 0.2f));
-  const NodeStats expected_right_node0(
-      learner_config_,
-      root_stats[0].gradient_stats - expected_left_node0.gradient_stats);
-  const SplitStats expected_split_stats0(learner_config_, root_stats[0],
-                                         expected_left_node0,
-                                         expected_right_node0);
-  EXPECT_SPLIT_STATS_EQ(expected_split_stats0, split_candidates[0].split_stats);
-  const auto& tree_node0 = split_candidates[0].tree_node;
-  EXPECT_EQ(boosted_trees::trees::TreeNode::kSparseFloatBinarySplitDefaultRight,
-            tree_node0.node_case());
-  const auto& split0 =
-      tree_node0.sparse_float_binary_split_default_right().split();
-  EXPECT_FLOAT_EQ(sparse_quantiles_.vec<float>()(1), split0.threshold());
-  EXPECT_EQ(kFeatureColumn, split0.feature_column());
-
-  // Verify candidate for root 1, there's only one active bucket here
-  // so zero gain is expected.
-  const NodeStats expected_left_node1(learner_config_,
-                                      root_stats[1].gradient_stats);
-  const NodeStats expected_right_node1(learner_config_, GradientStats(0, 0));
-  const SplitStats expected_split_stats1(learner_config_, root_stats[1],
-                                         expected_left_node1,
-                                         expected_right_node1);
-  EXPECT_SPLIT_STATS_EQ(expected_split_stats1, split_candidates[1].split_stats);
-  const auto& tree_node1 = split_candidates[1].tree_node;
-  EXPECT_EQ(boosted_trees::trees::TreeNode::kSparseFloatBinarySplitDefaultRight,
-            tree_node1.node_case());
-  const auto& split1 =
-      tree_node1.sparse_float_binary_split_default_right().split();
-  EXPECT_FLOAT_EQ(sparse_quantiles_.vec<float>()(1), split1.threshold());
-  EXPECT_EQ(kFeatureColumn, split1.feature_column());
-
-  // Verify there are no candidate splits for root 9.
-  const auto& tree_node2 = split_candidates[2].tree_node;
-  EXPECT_EQ(boosted_trees::trees::TreeNode::NODE_NOT_SET,
-            tree_node2.node_case());
-}
-
-}  // namespace
-}  // namespace stochastic
-}  // namespace learner
-}  // namespace boosted_trees
-}  // namespace tensorflow
-- 
GitLab


From 7c7ccb0ba476d12814b5be2a0b87f30784977a7e Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 20 Nov 2017 17:53:18 -0800
Subject: [PATCH 0157/1225] Adding support for (nested) batching of sparse
 tensor for tf.data.

PiperOrigin-RevId: 176444931
---
 .../kernel_tests/batch_dataset_op_test.py     | 122 +++++++--
 .../contrib/data/python/ops/batching.py       |   7 -
 tensorflow/contrib/makefile/tf_op_files.txt   |   1 +
 .../base_api/api_def_DeserializeSparse.pbtxt  |  10 +-
 tensorflow/core/kernels/BUILD                 |  22 +-
 tensorflow/core/kernels/reshape_util.cc       | 149 +++++++++++
 tensorflow/core/kernels/reshape_util.h        |  31 +++
 .../core/kernels/serialize_sparse_op.cc       | 238 +++++++++++++-----
 tensorflow/core/kernels/sparse_reshape_op.cc  | 123 +--------
 tensorflow/core/ops/sparse_ops.cc             |  16 +-
 tensorflow/python/data/ops/dataset_ops.py     |   6 +-
 tensorflow/python/data/util/sparse.py         |   1 -
 .../kernel_tests/batch_dataset_op_test.py     |  55 +++-
 .../kernel_tests/sparse_reshape_op_test.py    |   2 +-
 .../sparse_serialization_ops_test.py          |  81 ++++++
 tensorflow/python/ops/sparse_ops.py           |  17 +-
 16 files changed, 638 insertions(+), 243 deletions(-)
 create mode 100644 tensorflow/core/kernels/reshape_util.cc
 create mode 100644 tensorflow/core/kernels/reshape_util.h

diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
index 09416f8302..d7437cba73 100644
--- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
@@ -104,14 +104,58 @@ class BatchDatasetTest(test.TestCase):
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(init_op, feed_dict={count: 14, batch_size: 0})
 
-  def testBatchSparseError(self):
+  def assertSparseValuesEqual(self, a, b):
+    self.assertAllEqual(a.indices, b.indices)
+    self.assertAllEqual(a.values, b.values)
+    self.assertAllEqual(a.dense_shape, b.dense_shape)
 
-    def _map_fn(i):
+  def testBatchSparse(self):
+
+    def _sparse(i):
       return sparse_tensor.SparseTensor(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
+          indices=[[0]], values=(i * [1]), dense_shape=[1])
 
-    with self.assertRaises(TypeError):
-      _ = dataset_ops.Dataset.range(10).map(_map_fn).batch(10)
+    iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(
+        5).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      for i in range(2):
+        actual = sess.run(get_next)
+        expected = sparse_tensor.SparseTensor(
+            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
+            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
+            dense_shape=[5, 1])
+        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertSparseValuesEqual(actual, expected.eval())
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testNestedBatchSparse(self):
+
+    def _sparse(i):
+      return sparse_tensor.SparseTensor(
+          indices=[[0]], values=(i * [1]), dense_shape=[1])
+
+    iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(5).batch(
+        2).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      actual = sess.run(get_next)
+      expected = sparse_tensor.SparseTensor(
+          indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [0, 4, 0],
+                   [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], [1, 4, 0]],
+          values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+          dense_shape=[2, 5, 1])
+      self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+      self.assertSparseValuesEqual(actual, expected.eval())
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
 
   def testPaddedBatchDataset(self):
     seq_lens = array_ops.placeholder(dtypes.int32, shape=[None])
@@ -438,6 +482,30 @@ class BatchDatasetTest(test.TestCase):
         with self.assertRaises(errors.OutOfRangeError):
           sess.run(next_element)
 
+  def testBatchAndDropRemainderSparse(self):
+
+    def _sparse(i):
+      return sparse_tensor.SparseTensor(
+          indices=[[0]], values=(i * [1]), dense_shape=[1])
+
+    iterator = dataset_ops.Dataset.range(12).map(_sparse).apply(
+        batching.batch_and_drop_remainder(5)).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      for i in range(2):
+        actual = sess.run(get_next)
+        expected = sparse_tensor.SparseTensor(
+            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
+            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
+            dense_shape=[5, 1])
+        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertSparseValuesEqual(actual, expected.eval())
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
   def testPaddedBatchAndDropRemainder(self):
     els = []
     for length in [3, 6, 9, 4, 12, 10, 2]:
@@ -474,6 +542,16 @@ class BatchDatasetTest(test.TestCase):
         with self.assertRaises(errors.OutOfRangeError):
           sess.run(next_element)
 
+  def testPaddedBatchAndDropRemainderSparseError(self):
+
+    def _map_fn(i):
+      return sparse_tensor.SparseTensor(
+          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
+
+    with self.assertRaises(TypeError):
+      _ = dataset_ops.Dataset.range(10).map(_map_fn).apply(
+          batching.padded_batch_and_drop_remainder(5))
+
   def testBatchAndDropRemainderShapeInference(self):
     components = (array_ops.placeholder(dtypes.int32),
                   (array_ops.placeholder(dtypes.int32, shape=[None]),
@@ -499,16 +577,6 @@ class BatchDatasetTest(test.TestCase):
     self.assertEqual([None], dataset.output_shapes[1][0].as_list())
     self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list())
 
-  def testBatchAndDropRemainderSparseError(self):
-
-    def _map_fn(i):
-      return sparse_tensor.SparseTensor(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
-
-    with self.assertRaises(TypeError):
-      _ = dataset_ops.Dataset.range(10).map(_map_fn).apply(
-          batching.batch_and_drop_remainder(10))
-
   def testBatchAndMapDataset(self):
     """Test a dataset that maps a TF function across its input elements."""
     # The pipeline is TensorSliceDataset ->
@@ -572,6 +640,30 @@ class BatchDatasetTest(test.TestCase):
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(init_op, feed_dict={count: 14, batch_size: 0})
 
+  def testMapAndBatchSparse(self):
+
+    def _sparse(i):
+      return sparse_tensor.SparseTensor(
+          indices=[[0]], values=(i * [1]), dense_shape=[1])
+
+    iterator = dataset_ops.Dataset.range(10).apply(
+        batching.map_and_batch(_sparse, 5)).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      for i in range(2):
+        actual = sess.run(get_next)
+        expected = sparse_tensor.SparseTensor(
+            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
+            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
+            dense_shape=[5, 1])
+        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertSparseValuesEqual(actual, expected.eval())
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
   def testBatchAndMapDatasetFails(self):
     """Test a dataset that maps a TF function across its input elements."""
     dataset = dataset_ops.Dataset.from_tensors(
diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py
index 1ac059b374..63782d229e 100644
--- a/tensorflow/contrib/data/python/ops/batching.py
+++ b/tensorflow/contrib/data/python/ops/batching.py
@@ -353,10 +353,6 @@ class _MapAndBatchDataset(dataset_ops.MapDataset):
   def __init__(self, input_dataset, map_func, batch_size, num_parallel_batches):
     """See `Dataset.map()` for details."""
     super(_MapAndBatchDataset, self).__init__(input_dataset, map_func)
-    if sparse.any_sparse(self._output_types):
-      # TODO(b/63669786): support batching of sparse tensors
-      raise TypeError("Batching of sparse tensors is not currently supported")
-
     self._batch_size = ops.convert_to_tensor(
         batch_size, dtype=dtypes.int64, name="batch_size")
     self._num_parallel_batches = ops.convert_to_tensor(
@@ -422,9 +418,6 @@ def map_and_batch(map_func, batch_size, num_parallel_batches=1):
   """
 
   def _apply_fn(dataset):
-    if sparse.any_sparse(dataset.output_types):
-      # TODO(b/63669786): support batching of sparse tensors
-      raise TypeError("Batching of sparse tensors is not currently supported")
     return _MapAndBatchDataset(dataset, map_func, batch_size,
                                num_parallel_batches)
 
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 97351b2c51..ff612f1fdf 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -78,6 +78,7 @@ tensorflow/core/kernels/reverse_op.cc
 tensorflow/core/kernels/restore_op.cc
 tensorflow/core/kernels/resize_nearest_neighbor_op.cc
 tensorflow/core/kernels/resize_bilinear_op.cc
+tensorflow/core/kernels/reshape_util.cc
 tensorflow/core/kernels/reshape_op.cc
 tensorflow/core/kernels/relu_op.cc
 tensorflow/core/kernels/reduction_ops_sum.cc
diff --git a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
index c86f059eb3..00e96c8a15 100644
--- a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
@@ -3,17 +3,15 @@ op {
   in_arg {
     name: "serialized_sparse"
     description: <<END
-1-D, The serialized `SparseTensor` object. Must have 3 columns.
+The serialized `SparseTensor` objects. The last dimension
+must have 3 columns.
 END
   }
   attr {
     name: "dtype"
     description: <<END
-The `dtype` of the serialized `SparseTensor` object.
+The `dtype` of the serialized `SparseTensor` objects.
 END
   }
-  summary: "Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`)"
-  description: <<END
-object.
-END
+  summary: "Deserialize `SparseTensor` objects."
 }
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 3cab6acc42..d7b457eab7 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -291,6 +291,17 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "reshape_util",
+    srcs = ["reshape_util.cc"],
+    hdrs = ["reshape_util.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
 tf_cc_test(
     name = "variable_ops_test",
     size = "small",
@@ -3666,7 +3677,9 @@ tf_kernel_library(
 tf_kernel_library(
     name = "sparse_reshape_op",
     prefix = "sparse_reshape_op",
-    deps = SPARSE_DEPS,
+    deps = SPARSE_DEPS + [
+        ":reshape_util",
+    ],
 )
 
 tf_kernel_library(
@@ -3714,7 +3727,10 @@ tf_kernel_library(
 tf_kernel_library(
     name = "serialize_sparse_op",
     prefix = "serialize_sparse_op",
-    deps = SPARSE_DEPS + ["//tensorflow/core:protos_all_cc"],
+    deps = SPARSE_DEPS + [
+        ":reshape_util",
+        "//tensorflow/core:protos_all_cc",
+    ],
 )
 
 tf_kernel_library(
@@ -4546,6 +4562,7 @@ filegroup(
         "reduction_ops_common.h",
         "relu_op.h",
         "relu_op_functor.h",
+        "reshape_util.h",
         "resize_bilinear_op.h",
         "resize_nearest_neighbor_op.h",
         "reverse_op.h",
@@ -4701,6 +4718,7 @@ filegroup(
         "reduction_ops_prod.cc",
         "reduction_ops_sum.cc",
         "relu_op.cc",
+        "reshape_util.cc",
         "resize_bilinear_op.cc",
         "resize_nearest_neighbor_op.cc",
         "restore_op.cc",
diff --git a/tensorflow/core/kernels/reshape_util.cc b/tensorflow/core/kernels/reshape_util.cc
new file mode 100644
index 0000000000..4188ad233e
--- /dev/null
+++ b/tensorflow/core/kernels/reshape_util.cc
@@ -0,0 +1,149 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#include <algorithm>
+#include <numeric>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_util.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/util/sparse/sparse_tensor.h"
+
+namespace tensorflow {
+
+void Reshape(OpKernelContext *context, const Tensor &input_indices_in,
+             const Tensor &input_shape_in, const Tensor &target_shape_in,
+             int output_indices_idx, int output_shape_idx) {
+  OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_indices_in.shape()),
+              errors::InvalidArgument(
+                  "Input indices should be a matrix but received shape ",
+                  input_indices_in.shape().DebugString()));
+  OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape_in.shape()),
+              errors::InvalidArgument(
+                  "Input shape should be a vector but received shape ",
+                  input_shape_in.shape().DebugString()));
+  OP_REQUIRES(context, TensorShapeUtils::IsVector(target_shape_in.shape()),
+              errors::InvalidArgument(
+                  "Target shape should be a vector but received shape ",
+                  target_shape_in.shape().DebugString()));
+
+  const int64 input_rank = input_shape_in.NumElements();
+  const int64 output_rank = target_shape_in.NumElements();
+  const TensorShape input_shape(input_shape_in.vec<int64>());
+  const int64 dense_size = input_shape.num_elements();
+  const int64 nnz = input_indices_in.shape().dim_size(0);
+
+  // Compute the output shape. Determine product of specified dimensions, and
+  // find the index of the unspecified one.
+  TensorShape output_shape;
+  int64 product = 1;
+  int unknown_index = -1;
+  auto target_shape = target_shape_in.vec<int64>();
+  for (int d = 0; d < output_rank; ++d) {
+    const int64 size = target_shape(d);
+    if (size == -1) {
+      OP_REQUIRES(
+          context, unknown_index == -1,
+          errors::InvalidArgument("only one output dimension may be -1, "
+                                  "not both ",
+                                  unknown_index, " and ", d));
+      unknown_index = d;
+      output_shape.AddDim(1);
+    } else {
+      OP_REQUIRES(context, size >= 0,
+                  errors::InvalidArgument("size ", d,
+                                          " must be non-negative, not ", size));
+      product *= size;
+      output_shape.AddDim(size);
+    }
+  }
+  if (unknown_index != -1) {
+    OP_REQUIRES(
+        context, product > 0,
+        errors::InvalidArgument("reshape cannot infer the missing "
+                                "input size for an empty tensor unless all "
+                                "specified input sizes are non-zero"));
+    const int64 missing = dense_size / product;
+    OP_REQUIRES(
+        context, product * missing == dense_size,
+        errors::InvalidArgument(
+            "Input to reshape is a SparseTensor with ", dense_size,
+            " dense values, but the requested shape requires a multiple of ",
+            product));
+    output_shape.set_dim(unknown_index, missing);
+  }
+
+  OP_REQUIRES(
+      context, output_shape.num_elements() == dense_size,
+      errors::InvalidArgument("Input to reshape is a tensor with ", dense_size,
+                              " dense values, but the requested shape has ",
+                              output_shape.num_elements()));
+
+  // Optimize for reshaping to the same shape.
+  if (input_shape == output_shape) {
+    context->set_output(output_indices_idx, input_indices_in);
+    context->set_output(output_shape_idx, input_shape_in);
+    return;
+  }
+
+  gtl::InlinedVector<int64, 8> input_strides(input_rank);
+  input_strides[input_rank - 1] = 1;
+  for (int d = input_rank - 2; d >= 0; --d) {
+    input_strides[d] = input_strides[d + 1] * input_shape.dim_size(d + 1);
+  }
+
+  gtl::InlinedVector<int64, 8> output_strides(output_rank);
+  output_strides[output_rank - 1] = 1;
+  for (int d = output_rank - 2; d >= 0; --d) {
+    output_strides[d] = output_strides[d + 1] * output_shape.dim_size(d + 1);
+  }
+
+  Tensor *result_indices = nullptr;
+  OP_REQUIRES_OK(context,
+                 context->allocate_output(output_indices_idx,
+                                          TensorShape({nnz, output_rank}),
+                                          &result_indices));
+  auto input_ind = input_indices_in.matrix<int64>();
+  auto output_ind = result_indices->matrix<int64>();
+  for (int i = 0; i < nnz; ++i) {
+    int64 id = 0;
+    for (int j = 0; j < input_rank; ++j) {
+      id += input_ind(i, j) * input_strides[j];
+    }
+    for (int j = 0; j < output_rank; ++j) {
+      output_ind(i, j) = id / output_strides[j];
+      id %= output_strides[j];
+    }
+  }
+
+  Tensor *result_shape = nullptr;
+  OP_REQUIRES_OK(context, context->allocate_output(output_shape_idx,
+                                                   TensorShape({output_rank}),
+                                                   &result_shape));
+  auto output_shape_vec = result_shape->vec<int64>();
+  for (int j = 0; j < output_shape.dims(); ++j) {
+    output_shape_vec(j) = output_shape.dim_size(j);
+  }
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/reshape_util.h b/tensorflow/core/kernels/reshape_util.h
new file mode 100644
index 0000000000..ed583afd13
--- /dev/null
+++ b/tensorflow/core/kernels/reshape_util.h
@@ -0,0 +1,31 @@
+
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_RESHAPE_UTIL_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_RESHAPE_UTIL_H_
+
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+// Reshapes the input indices and input shape to the target shape.
+void Reshape(OpKernelContext *context, const Tensor &input_indices_in,
+             const Tensor &input_shape_in, const Tensor &target_shape_in,
+             int output_indices_idx, int output_shape_idx);
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_RESHAPE_UTIL_H_
diff --git a/tensorflow/core/kernels/serialize_sparse_op.cc b/tensorflow/core/kernels/serialize_sparse_op.cc
index ac58c3d1ea..161c505e84 100644
--- a/tensorflow/core/kernels/serialize_sparse_op.cc
+++ b/tensorflow/core/kernels/serialize_sparse_op.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/reshape_util.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
@@ -215,84 +216,185 @@ class DeserializeSparseOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& serialized_sparse = context->input(0);
-    OP_REQUIRES(context, TensorShapeUtils::IsVector(serialized_sparse.shape()),
+    const int ndims = serialized_sparse.shape().dims();
+
+    OP_REQUIRES(
+        context, ndims > 0,
+        errors::InvalidArgument("Serialized sparse should have non-zero rank ",
+                                serialized_sparse.shape().DebugString()));
+
+    OP_REQUIRES(context, serialized_sparse.shape().dim_size(ndims - 1) == 3,
                 errors::InvalidArgument(
-                    "Serialized sparse should be a vector but received shape ",
+                    "Serialized sparse should have 3 as the last dimension ",
                     serialized_sparse.shape().DebugString()));
+
+    int num_sparse_tensors = 1;
+    for (int i = 0; i < ndims - 1; ++i) {
+      num_sparse_tensors *= serialized_sparse.shape().dim_size(i);
+    }
+
     OP_REQUIRES(
-        context, serialized_sparse.shape().dim_size(0) == 3,
+        context, num_sparse_tensors > 0,
         errors::InvalidArgument(
-            "Serialize sparse should have 3 columns but received shape ",
+            "Serialized sparse should have at least 1 serialized tensor, "
+            "but has a zero dimension ",
             serialized_sparse.shape().DebugString()));
 
-    Tensor output_indices(DT_INT64);
-    Tensor output_values(DataTypeToEnum<T>::value);
-    Tensor output_shape(DT_INT64);
-    TensorProto proto_indices;
-    TensorProto proto_values;
-    TensorProto proto_shape;
+    std::vector<Tensor> indices;
+    std::vector<Tensor> values;
+    TensorShape shape;
+    indices.reserve(num_sparse_tensors);
+    values.reserve(num_sparse_tensors);
 
-    const auto& serialized_sparse_t = serialized_sparse.vec<string>();
+    const auto& serialized_sparse_t =
+        serialized_sparse.flat_inner_dims<string, 2>();
 
-    OP_REQUIRES(
-        context, ParseProtoUnlimited(&proto_indices, serialized_sparse_t(0)),
-        errors::InvalidArgument("Could not parse serialized_sparse[0]"));
-    OP_REQUIRES(
-        context, ParseProtoUnlimited(&proto_values, serialized_sparse_t(1)),
-        errors::InvalidArgument("Could not parse serialized_sparse[1]"));
-    OP_REQUIRES(
-        context, ParseProtoUnlimited(&proto_shape, serialized_sparse_t(2)),
-        errors::InvalidArgument("Could not parse serialized_sparse[2]"));
+    for (int i = 0; i < num_sparse_tensors; ++i) {
+      Tensor output_indices(DT_INT64);
+      Tensor output_values(DataTypeToEnum<T>::value);
+      Tensor output_shape(DT_INT64);
+      TensorProto proto_indices;
+      TensorProto proto_values;
+      TensorProto proto_shape;
 
-    OP_REQUIRES(
-        context, output_indices.FromProto(proto_indices),
-        errors::InvalidArgument(
-            "Could not construct Tensor serialized_sparse[0] (indices)"));
-    OP_REQUIRES(
-        context, TensorShapeUtils::IsMatrix(output_indices.shape()),
-        errors::InvalidArgument("Expected serialized_sparse[0] to represent an "
-                                "index matrix but received shape ",
-                                output_indices.shape().DebugString()));
-    OP_REQUIRES(
-        context, output_values.FromProto(proto_values),
-        errors::InvalidArgument(
-            "Could not construct Tensor serialized_sparse[1] (values)"));
-    OP_REQUIRES(
-        context, TensorShapeUtils::IsVector(output_values.shape()),
-        errors::InvalidArgument("Expected serialized_sparse[1] to represent a "
-                                "values vector but received shape ",
-                                output_values.shape().DebugString()));
-    OP_REQUIRES(context, output_shape.FromProto(proto_shape),
-                errors::InvalidArgument(
-                    "Could not construct Tensor serialized_sparse[2] (shape)"));
-    OP_REQUIRES(context, TensorShapeUtils::IsVector(output_shape.shape()),
-                errors::InvalidArgument("Expected serialized_sparse[2] to be a "
-                                        "shape vector but its shape is ",
-                                        output_shape.shape().DebugString()));
+      OP_REQUIRES(
+          context,
+          ParseProtoUnlimited(&proto_indices, serialized_sparse_t(i, 0)),
+          errors::InvalidArgument("Could not parse serialized_sparse[", i,
+                                  ", 0]"));
+      OP_REQUIRES(context,
+                  ParseProtoUnlimited(&proto_values, serialized_sparse_t(i, 1)),
+                  errors::InvalidArgument("Could not parse serialized_sparse[",
+                                          i, ", 1]"));
+      OP_REQUIRES(context,
+                  ParseProtoUnlimited(&proto_shape, serialized_sparse_t(i, 2)),
+                  errors::InvalidArgument("Could not parse serialized_sparse[",
+                                          i, ", 2]"));
 
-    OP_REQUIRES(
-        context, DataTypeToEnum<T>::value == output_values.dtype(),
-        errors::InvalidArgument("Requested SparseTensor of type ",
-                                DataTypeString(DataTypeToEnum<T>::value),
-                                " but SparseTensor.values.dtype() == ",
-                                DataTypeString(output_values.dtype())));
-
-    int64 num_entries = output_indices.dim_size(0);
-    OP_REQUIRES(context, num_entries == output_values.dim_size(0),
-                errors::InvalidArgument(
-                    "Expected row counts of SparseTensor.indices and "
-                    "SparseTensor.values to match but they do not: ",
-                    num_entries, " vs. ", output_values.dim_size(0)));
-    int rank = output_indices.dim_size(1);
-    OP_REQUIRES(context, rank == output_shape.dim_size(0),
-                errors::InvalidArgument(
-                    "Expected column counts of SparseTensor.indices to match "
-                    "size of SparseTensor.shape but they do not: ",
-                    rank, " vs. ", output_shape.dim_size(0)));
+      OP_REQUIRES(context, output_indices.FromProto(proto_indices),
+                  errors::InvalidArgument(
+                      "Could not construct Tensor serialized_sparse[", i,
+                      ", 0] (indices)"));
+      OP_REQUIRES(context, TensorShapeUtils::IsMatrix(output_indices.shape()),
+                  errors::InvalidArgument(
+                      "Expected serialized_sparse[", i,
+                      ", 0] to represent an index matrix but received shape ",
+                      output_indices.shape().DebugString()));
+      OP_REQUIRES(context, output_values.FromProto(proto_values),
+                  errors::InvalidArgument(
+                      "Could not construct Tensor serialized_sparse[", i,
+                      ", 1] (values)"));
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(output_values.shape()),
+                  errors::InvalidArgument(
+                      "Expected serialized_sparse[", i,
+                      ", 1] to represent a values vector but received shape ",
+                      output_values.shape().DebugString()));
+      OP_REQUIRES(context, output_shape.FromProto(proto_shape),
+                  errors::InvalidArgument(
+                      "Could not construct Tensor serialized_sparse[", i,
+                      ", 2] (shape)"));
+      OP_REQUIRES(
+          context, TensorShapeUtils::IsVector(output_shape.shape()),
+          errors::InvalidArgument("Expected serialized_sparse[", i,
+                                  ", 1] to be a shape vector but its shape is ",
+                                  output_shape.shape().DebugString()));
+
+      OP_REQUIRES(
+          context, DataTypeToEnum<T>::value == output_values.dtype(),
+          errors::InvalidArgument(
+              "Requested SparseTensor of type ",
+              DataTypeString(DataTypeToEnum<T>::value), " but SparseTensor[", i,
+              "].values.dtype() == ", DataTypeString(output_values.dtype())));
+
+      int64 num_entries = output_indices.dim_size(0);
+      OP_REQUIRES(context, num_entries == output_values.dim_size(0),
+                  errors::InvalidArgument(
+                      "Expected row counts of SparseTensor[", i,
+                      "].indices and SparseTensor[", i,
+                      "].values to match but they do not: ", num_entries,
+                      " vs. ", output_values.dim_size(0)));
+      int rank = output_indices.dim_size(1);
+      OP_REQUIRES(
+          context, rank == output_shape.dim_size(0),
+          errors::InvalidArgument("Expected column counts of SparseTensor[", i,
+                                  "].indices to match size of SparseTensor[", i,
+                                  "].shape but they do not: ", rank, " vs. ",
+                                  output_shape.dim_size(0)));
+
+      // Now we expand each SparseTensors' indices and shape by
+      // prefixing a dimension
+      Tensor expanded_indices(DT_INT64, TensorShape({num_entries, 1 + rank}));
+      const auto& output_indices_t = output_indices.matrix<int64>();
+      auto expanded_indices_t = expanded_indices.matrix<int64>();
+      expanded_indices_t.chip<1>(0).setZero();
+      Eigen::DSizes<Eigen::DenseIndex, 2> indices_start(0, 1);
+      Eigen::DSizes<Eigen::DenseIndex, 2> indices_sizes(num_entries, rank);
+      expanded_indices_t.slice(indices_start, indices_sizes) = output_indices_t;
+
+      Tensor expanded_shape(DT_INT64, TensorShape({1 + rank}));
+      const auto& output_shape_t = output_shape.vec<int64>();
+      auto expanded_shape_t = expanded_shape.vec<int64>();
+      expanded_shape_t(0) = 1;
+      std::copy_n(&output_shape_t(0), rank, &expanded_shape_t(1));
+
+      TensorShape expanded_tensor_shape(expanded_shape.vec<int64>());
 
-    context->set_output(0, output_indices);
-    context->set_output(1, output_values);
-    context->set_output(2, output_shape);
+      indices.push_back(expanded_indices);
+      values.push_back(output_values);
+      if (i == 0) {
+        shape = expanded_tensor_shape;
+      } else {
+        OP_REQUIRES(
+            context, shape.dims() == expanded_tensor_shape.dims(),
+            errors::InvalidArgument(
+                "Inconsistent shape across SparseTensors: rank prior to "
+                "SparseTensor[",
+                i, "] was: ", shape.dims() - 1, " but rank of SparseTensor[", i,
+                "] is: ", expanded_tensor_shape.dims() - 1));
+        for (int j = 1; j < shape.dims(); ++j) {
+          OP_REQUIRES(
+              context, shape.dim_size(j) == expanded_tensor_shape.dim_size(j),
+              errors::InvalidArgument(
+                  "Inconsistent shape across SparseTensors: dimension ", j - 1,
+                  " prior to SparseTensor[", i, "] was: ", shape.dim_size(j),
+                  " but rank of SparseTensor[", i,
+                  "] is: ", expanded_tensor_shape.dim_size(j)));
+        }
+      }
+    }
+
+    // Dimension 0 is the primary dimension.
+    int rank = shape.dims();
+    gtl::InlinedVector<int64, 8> std_order(rank);
+    std::iota(std_order.begin(), std_order.end(), 0);
+
+    std::vector<SparseTensor> tensors;
+    tensors.reserve(num_sparse_tensors);
+    for (int i = 0; i < num_sparse_tensors; ++i) {
+      tensors.emplace_back(indices[i], values[i], shape, std_order);
+    }
+
+    SparseTensor output = SparseTensor::Concat<T>(tensors);
+
+    // Compute the input shape for the reshape operation.
+    Tensor input_shape(DT_INT64, TensorShape({output.dims()}));
+    std::copy_n(output.shape().data(), output.dims(),
+                input_shape.vec<int64>().data());
+
+    // Compute the target shape for the reshape operation.
+    Tensor target_shape(DT_INT64, TensorShape({ndims + output.dims() - 2}));
+    for (int i = 0; i < ndims - 1; ++i) {
+      target_shape.vec<int64>()(i) = serialized_sparse.shape().dim_size(i);
+    }
+    for (int i = 0; i < output.dims() - 1; ++i) {
+      target_shape.vec<int64>()(i + ndims - 1) = output.shape().data()[i + 1];
+    }
+
+    Tensor output_indices;
+    Tensor output_shape;
+    Reshape(context, output.indices(), input_shape, target_shape,
+            0 /* output indices index */, 2 /* output shape index */);
+    context->set_output(1, output.values());
   }
 };
 
@@ -320,7 +422,7 @@ class DeserializeManySparseOp : public OpKernel {
     OP_REQUIRES(
         context, serialized_sparse.shape().dim_size(1) == 3,
         errors::InvalidArgument(
-            "Serialize sparse should have 3 columns but received shape ",
+            "Serialized sparse should have 3 columns but received shape ",
             serialized_sparse.shape().DebugString()));
 
     int num_sparse_tensors = serialized_sparse.shape().dim_size(0);
diff --git a/tensorflow/core/kernels/sparse_reshape_op.cc b/tensorflow/core/kernels/sparse_reshape_op.cc
index f0f353871d..939d404aa4 100644
--- a/tensorflow/core/kernels/sparse_reshape_op.cc
+++ b/tensorflow/core/kernels/sparse_reshape_op.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/reshape_util.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 
 namespace tensorflow {
@@ -33,124 +34,10 @@ class SparseReshapeOp : public OpKernel {
   explicit SparseReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
-    const Tensor& input_ind_in = context->input(0);
-    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_ind_in.shape()),
-                errors::InvalidArgument(
-                    "Input indices should be a matrix but received shape ",
-                    input_ind_in.shape().DebugString()));
-
-    const Tensor& input_shape_in = context->input(1);
-    OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape_in.shape()),
-                errors::InvalidArgument(
-                    "Input shape should be a vector but received shape ",
-                    input_shape_in.shape().DebugString()));
-
-    const Tensor& new_shape_in = context->input(2);
-    OP_REQUIRES(context, TensorShapeUtils::IsVector(new_shape_in.shape()),
-                errors::InvalidArgument(
-                    "New shape should be a vector but received shape ",
-                    new_shape_in.shape().DebugString()));
-
-    const int64 input_rank = input_shape_in.NumElements();
-    const int64 output_rank = new_shape_in.NumElements();
-
-    const TensorShape input_shape(input_shape_in.vec<int64>());
-    const int64 dense_size = input_shape.num_elements();
-
-    const int64 nnz = input_ind_in.shape().dim_size(0);
-
-    // Compute the output shape.  Determine product of specified
-    // dimensions, and find the index of the unspecified one. Largely the
-    // same calculation as reshape_op
-    TensorShape output_shape;
-    int64 product = 1;
-    int unknown_index = -1;
-    auto new_shape = new_shape_in.vec<int64>();
-    for (int d = 0; d < output_rank; ++d) {
-      const int64 size = new_shape(d);
-      if (size == -1) {
-        OP_REQUIRES(
-            context, unknown_index == -1,
-            errors::InvalidArgument("only one output shape size may be -1, "
-                                    "not both ",
-                                    unknown_index, " and ", d));
-        unknown_index = d;
-        output_shape.AddDim(1);
-      } else {
-        OP_REQUIRES(context, size >= 0,
-                    errors::InvalidArgument(
-                        "size ", d, " must be non-negative, not ", size));
-        output_shape.AddDim(size);
-        product *= size;
-      }
-    }
-    if (unknown_index != -1) {
-      OP_REQUIRES(
-          context, product > 0,
-          errors::InvalidArgument("SparseReshape cannot infer the missing "
-                                  "input size for an empty tensor unless all "
-                                  "specified input sizes are non-zero"));
-      const int64 missing = dense_size / product;
-      OP_REQUIRES(
-          context, product * missing == dense_size,
-          errors::InvalidArgument(
-              "Input to reshape is a SparseTensor with ", dense_size,
-              " dense values, but the requested shape requires a multiple of ",
-              product));
-      output_shape.set_dim(unknown_index, missing);
-    }
-
-    OP_REQUIRES(context, output_shape.num_elements() == dense_size,
-                errors::InvalidArgument("Input to reshape is a tensor with ",
-                                        dense_size,
-                                        " dense values, but the "
-                                        "requested shape has ",
-                                        output_shape.num_elements()));
-
-    // Optimize for reshaping to the same shape.
-    if (input_shape == output_shape) {
-      context->set_output(0, input_ind_in);
-      context->set_output(1, input_shape_in);
-      return;
-    }
-
-    gtl::InlinedVector<int64, 8> input_strides(input_rank);
-    input_strides[input_rank - 1] = 1;
-    for (int d = input_rank - 2; d >= 0; --d) {
-      input_strides[d] = input_strides[d + 1] * input_shape.dim_size(d + 1);
-    }
-
-    gtl::InlinedVector<int64, 8> output_strides(output_rank);
-    output_strides[output_rank - 1] = 1;
-    for (int d = output_rank - 2; d >= 0; --d) {
-      output_strides[d] = output_strides[d + 1] * output_shape.dim_size(d + 1);
-    }
-
-    Tensor* output_ind_out = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, TensorShape({nnz, output_rank}),
-                                            &output_ind_out));
-    auto input_ind = input_ind_in.matrix<int64>();
-    auto output_ind = output_ind_out->matrix<int64>();
-    for (int i = 0; i < nnz; ++i) {
-      int64 id = 0;
-      for (int j = 0; j < input_rank; ++j) {
-        id += input_ind(i, j) * input_strides[j];
-      }
-      for (int j = 0; j < output_rank; ++j) {
-        output_ind(i, j) = id / output_strides[j];
-        id %= output_strides[j];
-      }
-    }
-
-    Tensor* output_shape_out = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(1, TensorShape({output_rank}),
-                                            &output_shape_out));
-    auto output_shape_vec = output_shape_out->vec<int64>();
-    for (int j = 0; j < output_shape.dims(); ++j) {
-      output_shape_vec(j) = output_shape.dim_size(j);
-    }
+    Tensor output_indices;
+    Tensor output_shape;
+    Reshape(context, context->input(0), context->input(1), context->input(2),
+            0 /* output indices index */, 1 /* output shape index */);
   }
 };
 
diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc
index 8b6106f2a4..8414519f0b 100644
--- a/tensorflow/core/ops/sparse_ops.cc
+++ b/tensorflow/core/ops/sparse_ops.cc
@@ -244,13 +244,9 @@ REGISTER_OP("DeserializeSparse")
     .Output("sparse_values: dtype")
     .Output("sparse_shape: int64")
     .SetShapeFn([](InferenceContext* c) {
-      // serialized sparse is [3] vector.
-      ShapeHandle serialized_sparse;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &serialized_sparse));
+      // serialized sparse is [?, ..., ?, 3] vector.
       DimensionHandle unused;
-      TF_RETURN_IF_ERROR(
-          c->WithValue(c->Dim(serialized_sparse, 0), 3, &unused));
-
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), -1), 3, &unused));
       c->set_output(0, c->Matrix(InferenceContext::kUnknownDim,
                                  InferenceContext::kUnknownDim));
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
@@ -258,11 +254,11 @@ REGISTER_OP("DeserializeSparse")
       return Status::OK();
     })
     .Doc(R"doc(
-Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`)
-object.
+Deserialize `SparseTensor` objects.
 
-serialized_sparse: 1-D, The serialized `SparseTensor` object. Must have 3 columns.
-dtype: The `dtype` of the serialized `SparseTensor` object.
+serialized_sparse: The serialized `SparseTensor` objects. The last dimension
+  must have 3 columns.
+dtype: The `dtype` of the serialized `SparseTensor` objects.
 )doc");
 
 REGISTER_OP("DeserializeManySparse")
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index d6efb7fa9a..00ac3334b0 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1332,9 +1332,6 @@ class BatchDataset(Dataset):
   def __init__(self, input_dataset, batch_size):
     """See `Dataset.batch()` for details."""
     super(BatchDataset, self).__init__()
-    if sparse.any_sparse(input_dataset.output_classes):
-      # TODO(b/63669786): support batching of sparse tensors
-      raise TypeError("Batching of sparse tensors is not currently supported")
     self._input_dataset = input_dataset
     self._batch_size = ops.convert_to_tensor(
         batch_size, dtype=dtypes.int64, name="batch_size")
@@ -1412,7 +1409,8 @@ class PaddedBatchDataset(Dataset):
     super(PaddedBatchDataset, self).__init__()
     if sparse.any_sparse(input_dataset.output_classes):
       # TODO(b/63669786): support batching of sparse tensors
-      raise TypeError("Batching of sparse tensors is not currently supported")
+      raise TypeError(
+          "Batching of padded sparse tensors is not currently supported")
     self._input_dataset = input_dataset
     self._batch_size = ops.convert_to_tensor(
         batch_size, dtype=dtypes.int64, name="batch_size")
diff --git a/tensorflow/python/data/util/sparse.py b/tensorflow/python/data/util/sparse.py
index 4d25f6a963..b4219198d3 100644
--- a/tensorflow/python/data/util/sparse.py
+++ b/tensorflow/python/data/util/sparse.py
@@ -88,7 +88,6 @@ def deserialize_sparse_tensors(tensors, types, shapes, classes):
     `tensors` with any serialized sparse tensors replaced by their deserialized
     version.
   """
-  # TODO(b/63669786): support batching of sparse tensors
   ret = nest.pack_sequence_as(types, [
       sparse_ops.deserialize_sparse(tensor, dtype=ty, rank=shape.ndims)
       if c is sparse_tensor.SparseTensor else tensor
diff --git a/tensorflow/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/kernel_tests/batch_dataset_op_test.py
index 236c5bc4ff..513dfb1ec3 100644
--- a/tensorflow/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_dataset_op_test.py
@@ -101,13 +101,58 @@ class BatchDatasetTest(test.TestCase):
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(init_op, feed_dict={count: 14, batch_size: 0})
 
-  def testBatchSparseError(self):
-    def _map_fn(i):
+  def assertSparseValuesEqual(self, a, b):
+    self.assertAllEqual(a.indices, b.indices)
+    self.assertAllEqual(a.values, b.values)
+    self.assertAllEqual(a.dense_shape, b.dense_shape)
+
+  def testBatchSparse(self):
+
+    def _sparse(i):
       return sparse_tensor.SparseTensor(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
+          indices=[[0]], values=(i * [1]), dense_shape=[1])
 
-    with self.assertRaises(TypeError):
-      _ = dataset_ops.Dataset.range(10).map(_map_fn).batch(10)
+    iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(
+        5).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      for i in range(2):
+        actual = sess.run(get_next)
+        expected = sparse_tensor.SparseTensor(
+            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
+            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
+            dense_shape=[5, 1])
+        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertSparseValuesEqual(actual, expected.eval())
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testNestedBatchSparse(self):
+
+    def _sparse(i):
+      return sparse_tensor.SparseTensor(
+          indices=[[0]], values=(i * [1]), dense_shape=[1])
+
+    iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(5).batch(
+        2).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      actual = sess.run(get_next)
+      expected = sparse_tensor.SparseTensor(
+          indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [0, 4, 0],
+                   [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], [1, 4, 0]],
+          values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+          dense_shape=[2, 5, 1])
+      self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+      self.assertSparseValuesEqual(actual, expected.eval())
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
 
   def testPaddedBatchDataset(self):
     seq_lens = array_ops.placeholder(dtypes.int32, shape=[None])
diff --git a/tensorflow/python/kernel_tests/sparse_reshape_op_test.py b/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
index e87fa0c94c..0d2887f3ce 100644
--- a/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
@@ -196,7 +196,7 @@ class SparseReshapeTest(test.TestCase):
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(sp_input, [4, -1, -1])
-      with self.assertRaisesOpError("only one output shape size may be -1"):
+      with self.assertRaisesOpError("only one output dimension may be -1"):
         sess.run(sp_output, {sp_input: input_val})
 
   def testProvideStaticallyMismatchedSizes(self):
diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
index af395b31bf..d0d6cc4c0f 100644
--- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
@@ -64,6 +64,87 @@ class SerializeSparseTest(test.TestCase):
     shape = np.array([3, 4, 5]).astype(np.int64)
     return sparse_tensor_lib.SparseTensorValue(ind, val, shape)
 
+  def testSerializeDeserialize(self):
+    with self.test_session(use_gpu=False) as sess:
+      sp_input = self._SparseTensorValue_5x6(np.arange(6))
+      serialized = sparse_ops.serialize_sparse(sp_input)
+      sp_deserialized = sparse_ops.deserialize_sparse(
+          serialized, dtype=dtypes.int32)
+
+      indices, values, shape = sess.run(sp_deserialized)
+
+      self.assertAllEqual(indices, sp_input[0])
+      self.assertAllEqual(values, sp_input[1])
+      self.assertAllEqual(shape, sp_input[2])
+
+  def testSerializeDeserializeBatch(self):
+    with self.test_session(use_gpu=False) as sess:
+      sp_input = self._SparseTensorValue_5x6(np.arange(6))
+      serialized = sparse_ops.serialize_sparse(sp_input)
+      serialized = array_ops.stack([serialized, serialized])
+
+      sp_deserialized = sparse_ops.deserialize_sparse(
+          serialized, dtype=dtypes.int32)
+
+      combined_indices, combined_values, combined_shape = sess.run(
+          sp_deserialized)
+
+      self.assertAllEqual(combined_indices[:6, 0], [0] * 6)  # minibatch 0
+      self.assertAllEqual(combined_indices[:6, 1:], sp_input[0])
+      self.assertAllEqual(combined_indices[6:, 0], [1] * 6)  # minibatch 1
+      self.assertAllEqual(combined_indices[6:, 1:], sp_input[0])
+      self.assertAllEqual(combined_values[:6], sp_input[1])
+      self.assertAllEqual(combined_values[6:], sp_input[1])
+      self.assertAllEqual(combined_shape, [2, 5, 6])
+
+  def testSerializeDeserializeBatchInconsistentShape(self):
+    with self.test_session(use_gpu=False) as sess:
+      sp_input0 = self._SparseTensorValue_5x6(np.arange(6))
+      sp_input1 = self._SparseTensorValue_3x4(np.arange(6))
+      serialized0 = sparse_ops.serialize_sparse(sp_input0)
+      serialized1 = sparse_ops.serialize_sparse(sp_input1)
+      serialized = array_ops.stack([serialized0, serialized1])
+
+      sp_deserialized = sparse_ops.deserialize_sparse(
+          serialized, dtype=dtypes.int32)
+
+      with self.assertRaisesOpError(
+          r"Inconsistent shape across SparseTensors: dimension 0 prior to "
+          r"SparseTensor\[1\] was: 5 but rank of SparseTensor\[1\] is: 3"):
+        sess.run(sp_deserialized)
+
+  def testSerializeDeserializeNestedBatch(self):
+    with self.test_session(use_gpu=False) as sess:
+      sp_input = self._SparseTensorValue_5x6(np.arange(6))
+      serialized = sparse_ops.serialize_sparse(sp_input)
+      serialized = array_ops.stack([serialized, serialized])
+      serialized = array_ops.stack([serialized, serialized])
+
+      sp_deserialized = sparse_ops.deserialize_sparse(
+          serialized, dtype=dtypes.int32)
+
+      combined_indices, combined_values, combined_shape = sess.run(
+          sp_deserialized)
+
+      # minibatch 0
+      self.assertAllEqual(combined_indices[:6, :2], [[0, 0]] * 6)
+      self.assertAllEqual(combined_indices[:6, 2:], sp_input[0])
+      self.assertAllEqual(combined_values[:6], sp_input[1])
+      # minibatch 1
+      self.assertAllEqual(combined_indices[6:12, :2], [[0, 1]] * 6)
+      self.assertAllEqual(combined_indices[6:12, 2:], sp_input[0])
+      self.assertAllEqual(combined_values[6:12], sp_input[1])
+      # minibatch 2
+      self.assertAllEqual(combined_indices[12:18, :2], [[1, 0]] * 6)
+      self.assertAllEqual(combined_indices[12:18, 2:], sp_input[0])
+      self.assertAllEqual(combined_values[12:18], sp_input[1])
+      # minibatch 3
+      self.assertAllEqual(combined_indices[18:, :2], [[1, 1]] * 6)
+      self.assertAllEqual(combined_indices[18:, 2:], sp_input[0])
+      self.assertAllEqual(combined_values[18:], sp_input[1])
+
+      self.assertAllEqual(combined_shape, [2, 2, 5, 6])
+
   def testSerializeDeserializeMany(self):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorValue_5x6(np.arange(6))
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 3d6f942dca..cdfe9e1c1e 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -1435,17 +1435,22 @@ def serialize_many_sparse(sp_input, name=None):
 
 
 def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None):
-  """Deserialize `SparseTensor` from a string 3-vector (1-D `Tensor`) object.
+  """Deserialize `SparseTensor` objects.
+
+  The input is expected to have shape [d_1, ..., d_m, 3], where the last
+  dimension stores a serialized `SparseTensor`. The method deserializes
+  all input `SparseTensor`s, concatenates them into a single tensor, and
+  reshapes the sparse tensor to preserve the structure of the input.
 
   Args:
-    serialized_sparse: 1-D, The serialized `SparseTensor` object.
-      Must have 3 columns.
-    dtype: The `dtype` of the serialized `SparseTensor` object.
-    rank: (optional) Python int, the rank of the `SparseTensor` object.
+    serialized_sparse: The serialized `SparseTensor` objects.
+      The last dimension must have 3 columns.
+    dtype: The `dtype` of the serialized `SparseTensor` objects.
+    rank: (optional) Python int, the rank of the `SparseTensor` objects.
     name: A name prefix for the returned tensors (optional).
 
   Returns:
-    A `SparseTensor` representing the deserialized `SparseTensor` object.
+    A `SparseTensor` representing the deserialized `SparseTensor` objects.
 
   """
   output_indices, output_values, output_shape = (
-- 
GitLab


From 20e2fdc2f95f213eef5a736a140d8591ef7a5b6e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 17:54:51 -0800
Subject: [PATCH 0158/1225] Update the Boston example to include the feature
 importance and a custom export format for the model.

PiperOrigin-RevId: 176445066
---
 .../contrib/boosted_trees/examples/boston.py  | 38 +++++++++++++------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/examples/boston.py b/tensorflow/contrib/boosted_trees/examples/boston.py
index 2c0a3c4912..e9dbdb0fd7 100644
--- a/tensorflow/contrib/boosted_trees/examples/boston.py
+++ b/tensorflow/contrib/boosted_trees/examples/boston.py
@@ -22,7 +22,7 @@ r"""Demonstrates a regression on Boston housing data.
 
   python tensorflow/contrib/boosted_trees/examples/boston.py \
   --batch_size=404 --output_dir="/tmp/boston" --depth=4 --learning_rate=0.1 \
-  --num_eval_steps=1 --num_trees=500 --l2=4 \
+  --num_eval_steps=1 --num_trees=500 --l2=0.001 \
   --vmodule=training_ops=1
 
   When training is done, mean squared error on eval data is reported.
@@ -37,8 +37,10 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import os
 import sys
 import tensorflow as tf
+from tensorflow.contrib.boosted_trees.estimator_batch import custom_export_strategy
 from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeRegressor
 from tensorflow.contrib.boosted_trees.proto import learner_pb2
 from tensorflow.contrib.layers.python.layers import feature_column
@@ -51,22 +53,18 @@ _BOSTON_NUM_FEATURES = 13
 def _get_tfbt(output_dir, feature_cols):
   """Configures TF Boosted Trees estimator based on flags."""
   learner_config = learner_pb2.LearnerConfig()
-
   learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate
   learner_config.regularization.l1 = 0.0
-  # Set the regularization per instance in such a way that
-  # regularization for the full training data is equal to l2 flag.
-  learner_config.regularization.l2 = FLAGS.l2 / FLAGS.batch_size
+  learner_config.regularization.l2 = FLAGS.l2
   learner_config.constraints.max_tree_depth = FLAGS.depth
-  learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE
 
   run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
 
   # Create a TF Boosted trees regression estimator.
   estimator = GradientBoostedDecisionTreeRegressor(
       learner_config=learner_config,
-      # For the WHOLE_TREE strategy, set the examples_per_layer to be equal to
-      # batch size.
+      # This should be the number of examples. For large datasets it can be
+      # larger than the batch_size.
       examples_per_layer=FLAGS.batch_size,
       feature_columns=feature_cols,
       label_dimension=1,
@@ -77,6 +75,14 @@ def _get_tfbt(output_dir, feature_cols):
   return estimator
 
 
+def _convert_fn(dtec, sorted_feature_names, num_dense, num_sparse_float,
+                num_sparse_int, export_dir, unused_eval_result):
+  universal_format = custom_export_strategy.convert_to_universal_format(
+      dtec, sorted_feature_names, num_dense, num_sparse_float, num_sparse_int)
+  with tf.gfile.GFile(os.path.join(export_dir, "tree_proto"), "w") as f:
+    f.write(str(universal_format))
+
+
 def _make_experiment_fn(output_dir):
   """Creates experiment for gradient boosted decision trees."""
   (x_train, y_train), (x_test,
@@ -88,21 +94,31 @@ def _make_experiment_fn(output_dir):
       batch_size=FLAGS.batch_size,
       num_epochs=None,
       shuffle=True)
-
   eval_input_fn = tf.estimator.inputs.numpy_input_fn(
       x={"x": x_test}, y=y_test, num_epochs=1, shuffle=False)
 
   feature_columns = [
       feature_column.real_valued_column("x", dimension=_BOSTON_NUM_FEATURES)
   ]
-
+  feature_spec = tf.contrib.layers.create_feature_spec_for_parsing(
+      feature_columns)
+  serving_input_fn = tf.contrib.learn.utils.build_parsing_serving_input_fn(
+      feature_spec)
+  # An export strategy that outputs the feature importance and also exports
+  # the internal tree representation in another format.
+  export_strategy = custom_export_strategy.make_custom_export_strategy(
+      "exports",
+      convert_fn=_convert_fn,
+      feature_columns=feature_columns,
+      export_input_fn=serving_input_fn)
   return tf.contrib.learn.Experiment(
       estimator=_get_tfbt(output_dir, feature_columns),
       train_input_fn=train_input_fn,
       eval_input_fn=eval_input_fn,
       train_steps=None,
       eval_steps=FLAGS.num_eval_steps,
-      eval_metrics=None)
+      eval_metrics=None,
+      export_strategies=[export_strategy])
 
 
 def main(unused_argv):
-- 
GitLab


From cf57817f554fa7bfe7c134453ef9cf4374aef23d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 17:56:28 -0800
Subject: [PATCH 0159/1225] Automated g4 rollback of changelist 175593063

PiperOrigin-RevId: 176445215
---
 tensorflow/cc/framework/cc_op_gen.cc          | 119 +++++++++++++++---
 tensorflow/core/common_runtime/device_mgr.h   |   2 +-
 .../core/common_runtime/direct_session.cc     |   2 +-
 .../core/common_runtime/direct_session.h      |   3 +-
 tensorflow/core/common_runtime/placer.cc      |   4 +-
 .../common_runtime/step_stats_collector.cc    |   4 +-
 .../distributed_runtime/master_session.cc     |   4 +-
 .../core/framework/variant_op_registry.h      |   9 +-
 tensorflow/core/graph/costmodel.h             |   2 +-
 tensorflow/core/graph/graph_constructor.cc    |   6 +-
 tensorflow/core/graph/quantize_training.cc    |   4 +-
 tensorflow/core/graph/subgraph.h              |   2 +-
 .../graph_transforms/fold_constants_lib.cc    |   4 +-
 13 files changed, 123 insertions(+), 42 deletions(-)

diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index c0b8cc2e41..d889c518f9 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -297,7 +297,7 @@ string ToCamelCase(const string& str) {
 // argument to a function.
 std::pair<const char*, bool> AttrTypeName(StringPiece attr_type) {
   static const std::unordered_map<StringPiece, std::pair<const char*, bool>,
-                                  StringPiece::Hasher>
+                                  StringPieceHasher>
       attr_type_map{
           {"string", {"StringPiece", false}},
           {"list(string)", {"gtl::ArraySlice<string>", true}},
@@ -325,29 +325,112 @@ std::pair<const char*, bool> AttrTypeName(StringPiece attr_type) {
 }
 
 bool IsCPPKeyword(StringPiece name) {
-  static const std::unordered_set<StringPiece, StringPiece::Hasher>
+  static const std::unordered_set<StringPiece, StringPieceHasher>
       // Keywords obtained from http://en.cppreference.com/w/cpp/keyword
       kCPPReserved{
-          "alignas", "alignof", "and", "and_eq", "asm", "atomic_cancel",
-          "atomic_commit", "atomic_noexcept", "auto", "bitand", "bitor", "bool",
-          "break", "case", "catch", "char", "char16_t", "char32_t", "class",
-          "compl", "concept", "const", "const_cast", "constexpr", "continue",
-          "decltype", "default", "delete", "do", "double", "dynamic_cast",
-          "else", "enum", "explicit", "export", "extern", "false", "final",
-          "float", "for", "friend", "goto", "if", "import", "inline", "int",
-          "long", "module", "mutable", "namespace", "new", "noexcept", "not",
-          "not_eq", "nullptr", "operator", "or", "or_eq", "override", "private",
-          "protected", "public", "register", "reinterpret_cast", "requires",
-          "return", "short", "signed", "sizeof", "static", "static_assert",
-          "static_cast", "struct", "switch", "synchronized", "template", "this",
-          "thread_local", "throw", "true", "try", "typedef", "typeid",
-          "typename", "union", "unsigned", "using", "virtual", "void",
-          "volatile", "wchar_t", "while", "xor", "xor_eq",
+          "alignas",
+          "alignof",
+          "and",
+          "and_eq",
+          "asm",
+          "atomic_cancel",
+          "atomic_commit",
+          "atomic_noexcept",
+          "auto",
+          "bitand",
+          "bitor",
+          "bool",
+          "break",
+          "case",
+          "catch",
+          "char",
+          "char16_t",
+          "char32_t",
+          "class",
+          "compl",
+          "concept",
+          "const",
+          "const_cast",
+          "constexpr",
+          "continue",
+          "decltype",
+          "default",
+          "delete",
+          "do",
+          "double",
+          "dynamic_cast",
+          "else",
+          "enum",
+          "explicit",
+          "export",
+          "extern",
+          "false",
+          "final",
+          "float",
+          "for",
+          "friend",
+          "goto",
+          "if",
+          "import",
+          "inline",
+          "int",
+          "long",
+          "module",
+          "mutable",
+          "namespace",
+          "new",
+          "noexcept",
+          "not",
+          "not_eq",
+          "nullptr",
+          "operator",
+          "or",
+          "or_eq",
+          "override",
+          "private",
+          "protected",
+          "public",
+          "register",
+          "reinterpret_cast",
+          "requires",
+          "return",
+          "short",
+          "signed",
+          "sizeof",
+          "static",
+          "static_assert",
+          "static_cast",
+          "struct",
+          "switch",
+          "synchronized",
+          "template",
+          "this",
+          "thread_local",
+          "throw",
+          "true",
+          "try",
+          "typedef",
+          "typeid",
+          "typename",
+          "union",
+          "unsigned",
+          "using",
+          "virtual",
+          "void",
+          "volatile",
+          "wchar_t",
+          "while",
+          "xor",
+          "xor_eq",
 
           // The following are not C++ keywords, but names of local variables
           // and parameters used in the op constructor. Treating them as
           // keywords, so that other parameter names don't conflict with these.
-          "builder", "node", "ret", "scope", "unique_name",
+          "builder",
+          "node",
+          "ret",
+          "scope",
+          "unique_name",
       };
   return kCPPReserved.count(name) > 0;
 }
diff --git a/tensorflow/core/common_runtime/device_mgr.h b/tensorflow/core/common_runtime/device_mgr.h
index d16681ac59..cd93f76324 100644
--- a/tensorflow/core/common_runtime/device_mgr.h
+++ b/tensorflow/core/common_runtime/device_mgr.h
@@ -68,7 +68,7 @@ class DeviceMgr {
 
   StringPiece CopyToBackingStore(StringPiece s);
 
-  std::unordered_map<StringPiece, Device*, StringPiece::Hasher> device_map_;
+  std::unordered_map<StringPiece, Device*, StringPieceHasher> device_map_;
   core::Arena name_backing_store_;  // Storage for keys in device_map_
   std::unordered_map<string, int> device_type_counts_;
 
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 6dfe17405c..2d4f2a2d90 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -1135,7 +1135,7 @@ Status DirectSession::GetOrCreateExecutors(
 
   if (run_state_args->is_partial_run) {
     ek->graph = std::move(run_state_args->graph);
-    std::unordered_set<StringPiece, StringPiece::Hasher> names;
+    std::unordered_set<StringPiece, StringPieceHasher> names;
     for (const string& input : inputs) {
       TensorId id(ParseTensorName(input));
       names.emplace(id.first);
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index 7fbabf6d81..780d0b46a8 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -64,8 +64,7 @@ class DirectSession : public Session {
   ~DirectSession() override;
 
   typedef std::vector<std::pair<string, Tensor>> NamedTensorList;
-  typedef std::unordered_map<StringPiece, Node*, StringPiece::Hasher>
-      NameNodeMap;
+  typedef std::unordered_map<StringPiece, Node*, StringPieceHasher> NameNodeMap;
 
   ::tensorflow::Status Create(const GraphDef& graph) override;
   ::tensorflow::Status Extend(const GraphDef& graph) override;
diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc
index 73fdf60fd5..54f082e823 100644
--- a/tensorflow/core/common_runtime/placer.cc
+++ b/tensorflow/core/common_runtime/placer.cc
@@ -129,7 +129,7 @@ class ColocationGraph {
     // 'string' values stored in NodeDef attribute lists, as well as StringPiece
     // values that refer to 'string' values from NodeDef::name(), without
     // performing any string allocations.
-    std::unordered_map<StringPiece, const Node*, StringPiece::Hasher>
+    std::unordered_map<StringPiece, const Node*, StringPieceHasher>
         colocation_group_root;
 
     for (Node* node : graph_->nodes()) {
@@ -171,7 +171,7 @@ class ColocationGraph {
   }
 
   Status ColocateNodeToGroup(
-      std::unordered_map<StringPiece, const Node*, StringPiece::Hasher>*
+      std::unordered_map<StringPiece, const Node*, StringPieceHasher>*
           colocation_group_root,
       Node* node, StringPiece colocation_group) {
     const Node*& root_node = (*colocation_group_root)[colocation_group];
diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc
index bfe7a32b1b..d7e01144c9 100644
--- a/tensorflow/core/common_runtime/step_stats_collector.cc
+++ b/tensorflow/core/common_runtime/step_stats_collector.cc
@@ -150,7 +150,7 @@ void StepStatsCollector::BuildCostModel(
     const DeviceStepStats* hardware_stats;
   };
 
-  std::unordered_map<StringPiece, DeviceStats, StringPiece::Hasher>
+  std::unordered_map<StringPiece, DeviceStats, StringPieceHasher>
       per_device_stats;
   std::unordered_map<int, const DeviceStepStats*> gpu_hardware_stats;
 
@@ -190,7 +190,7 @@ void StepStatsCollector::BuildCostModel(
     CostModel* cm = cost_model_manager->FindOrCreateCostModel(graph);
     cm->IncrementUpdateTimes();
 
-    std::unordered_map<StringPiece, Node*, StringPiece::Hasher> name_to_node;
+    std::unordered_map<StringPiece, Node*, StringPieceHasher> name_to_node;
     for (Node* n : graph->nodes()) {
       name_to_node.emplace(n->name(), n);
     }
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index b3e499be79..3379302b9b 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -213,7 +213,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted {
   const bool is_partial_;
   const DebugOptions& debug_opts_;
   WorkerCacheInterface* const worker_cache_;  // Not owned.
-  std::unordered_map<StringPiece, Node*, StringPiece::Hasher> name_to_node_;
+  std::unordered_map<StringPiece, Node*, StringPieceHasher> name_to_node_;
   const bool should_deregister_;
 
   // Graph partitioned into per-location subgraphs.
@@ -492,7 +492,7 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
   VLOG(2) << "RunPartitions step_id " << step_id << " execution_count "
           << execution_count;
   // Maps the names of fed tensors to their index in `req`.
-  std::unordered_map<StringPiece, size_t, StringPiece::Hasher> feeds(3);
+  std::unordered_map<StringPiece, size_t, StringPieceHasher> feeds(3);
 
   for (size_t i = 0; i < req.num_feeds(); ++i) {
     if (!feeds.insert({req.feed_name(i), i}).second) {
diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h
index 831dbd3dff..13f6908cae 100644
--- a/tensorflow/core/framework/variant_op_registry.h
+++ b/tensorflow/core/framework/variant_op_registry.h
@@ -145,9 +145,8 @@ class UnaryVariantOpRegistry {
   static std::unordered_set<string>* PersistentStringStorage();
 
  private:
-  std::unordered_map<StringPiece, VariantShapeFn, StringPiece::Hasher>
-      shape_fns;
-  std::unordered_map<StringPiece, VariantDecodeFn, StringPiece::Hasher>
+  std::unordered_map<StringPiece, VariantShapeFn, StringPieceHasher> shape_fns;
+  std::unordered_map<StringPiece, VariantDecodeFn, StringPieceHasher>
       decode_fns;
 
   // Map std::pair<Direction, type_name> to function.
@@ -159,7 +158,7 @@ class UnaryVariantOpRegistry {
       ret = Hash64Combine(ret, sp_hasher_(std::get<1>(x)));
       return ret;
     }
-    StringPiece::Hasher sp_hasher_;
+    StringPieceHasher sp_hasher_;
   };
 
   std::unordered_map<std::pair<VariantDeviceCopyDirection, StringPiece>,
@@ -177,7 +176,7 @@ class UnaryVariantOpRegistry {
       ret = Hash64Combine(ret, sp_hasher_(std::get<2>(x)));
       return ret;
     }
-    StringPiece::Hasher sp_hasher_;
+    StringPieceHasher sp_hasher_;
   };
   std::unordered_map<std::tuple<VariantUnaryOp, StringPiece, StringPiece>,
                      VariantUnaryOpFn, TupleHash>
diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h
index a908a4843c..8afa4971ad 100644
--- a/tensorflow/core/graph/costmodel.h
+++ b/tensorflow/core/graph/costmodel.h
@@ -30,7 +30,7 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace tensorflow {
-typedef std::unordered_map<StringPiece, int32, StringPiece::Hasher>
+typedef std::unordered_map<StringPiece, int32, StringPieceHasher>
     NodeNameToCostIdMap;
 
 class StepStats;
diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index e45828b7ba..8890a9fb0f 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -241,13 +241,13 @@ class GraphConstructor {
   };
   // TODO(vrv): Profile this data structure to see if we should use an
   // alternative implementation of std::unordered_map.
-  std::unordered_map<StringPiece, NodeInfo, StringPiece::Hasher> gdef_nodes_;
+  std::unordered_map<StringPiece, NodeInfo, StringPieceHasher> gdef_nodes_;
 
   // Mapping from node name to the existing node in g_.
-  std::unordered_map<StringPiece, Node*, StringPiece::Hasher> existing_nodes_;
+  std::unordered_map<StringPiece, Node*, StringPieceHasher> existing_nodes_;
 
   // Prefixes already used in the graph.
-  std::unordered_set<StringPiece, StringPiece::Hasher> existing_prefixes_;
+  std::unordered_set<StringPiece, StringPieceHasher> existing_prefixes_;
 
   // Imported node names that have been uniquified. The key is the original
   // name, the value is the new unique name.
diff --git a/tensorflow/core/graph/quantize_training.cc b/tensorflow/core/graph/quantize_training.cc
index d9cb55f448..cb0fc8a154 100644
--- a/tensorflow/core/graph/quantize_training.cc
+++ b/tensorflow/core/graph/quantize_training.cc
@@ -42,7 +42,7 @@ const float kEMADecay = 0.999;
 
 // Node types to rewrite. Insert quantize_and_dequantize op for their inputs.
 const auto* nodes_to_rewrite =
-    new std::unordered_set<string, StringPiece::Hasher>{"MatMul", "Conv2D"};
+    new std::unordered_set<string, StringPieceHasher>{"MatMul", "Conv2D"};
 
 // Contains necessary parameters to convert an edge.
 struct EdgeToConvert {
@@ -563,7 +563,7 @@ Status ProcessTargetEdges(Graph* graph, const string& quant_op_type,
                           const std::vector<EdgeToConvert>& target_edges) {
   // Remember previously converted ops to avoid duplicated conversion on the
   // same input.
-  std::unordered_map<string, Node*, StringPiece::Hasher> name_index;
+  std::unordered_map<string, Node*, StringPieceHasher> name_index;
   std::vector<Node*> added_variables;
   for (const EdgeToConvert edge : target_edges) {
     Node* convert_node;
diff --git a/tensorflow/core/graph/subgraph.h b/tensorflow/core/graph/subgraph.h
index 8ccc27914b..3c1f8870f5 100644
--- a/tensorflow/core/graph/subgraph.h
+++ b/tensorflow/core/graph/subgraph.h
@@ -71,7 +71,7 @@ Status RewriteGraphForExecution(
     const DeviceAttributes& device_info, bool use_function_convention,
     RewriteGraphMetadata* out_metadata);
 
-typedef std::unordered_map<StringPiece, Node*, StringPiece::Hasher> NameIndex;
+typedef std::unordered_map<StringPiece, Node*, StringPieceHasher> NameIndex;
 
 // Augment "*g" by adding special "fetch" nodes that connect to the
 // tensor outputs specified in "fetch_outputs" to retrieve the output
diff --git a/tensorflow/tools/graph_transforms/fold_constants_lib.cc b/tensorflow/tools/graph_transforms/fold_constants_lib.cc
index f2934a79bd..250f54e20f 100644
--- a/tensorflow/tools/graph_transforms/fold_constants_lib.cc
+++ b/tensorflow/tools/graph_transforms/fold_constants_lib.cc
@@ -39,9 +39,9 @@ limitations under the License.
 namespace tensorflow {
 namespace graph_transforms {
 namespace {
-using StringPieceSet = std::unordered_set<StringPiece, StringPiece::Hasher>;
+using StringPieceSet = std::unordered_set<StringPiece, StringPieceHasher>;
 template <typename T>
-using StringPieceMap = std::unordered_map<StringPiece, T, StringPiece::Hasher>;
+using StringPieceMap = std::unordered_map<StringPiece, T, StringPieceHasher>;
 }  // namespace
 
 Status ReplaceSendRecvs(const GraphDef& original_graph_def,
-- 
GitLab


From 1c7661be3337d5ab6c44300aee6a2d4001c81b27 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Mon, 20 Nov 2017 18:04:51 -0800
Subject: [PATCH 0160/1225] [TF2XLA] Flow down across switch edges separately.
 * Change the way that the clustering was done by flowing down along the
 branches of the switch node separately;   - It was previously wrong to assume
 that the operands of an op are in the same control scope if they are not a
 switch or a merge node, as a zero-input op (such as a const) could be
 referenced by both "branches" of a switch without this op not being
 exclusively in either branch. * Change from matching a switch for a merge
 cluster, to matching a merge for a switch cluster:   - The new matching
 considers switch-merge subgraphs where all nodes within the subgraph are
 dominated by the switch nodes, so reversing the matching makes it easier to
 perform the dominance checking.   - This allows for cases where there is a
 cluster with a control dependency on a switch node and used by a branch of
 the switch.

PiperOrigin-RevId: 176446211
---
 .../tf2xla/functionalize_control_flow.cc      | 329 ++++++++++++------
 1 file changed, 220 insertions(+), 109 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 40a484da09..5726d8294a 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -623,11 +623,12 @@ class FunctionalizeCond {
   FunctionalizeCond(Graph* graph, FunctionLibraryDefinition* library)
       : clusters_(graph->num_node_ids()), library_(library), graph_(graph) {}
 
-  // Returns a vector of Merge nodes from the clustered graph where the nodes
+  // Returns a vector of Switch nodes from the clustered graph where the nodes
   // are sorted by the number of switch nodes minus number of merge nodes
   // from a root of the clustered graph to the given Merge node, with ties
-  // broken by the representative of the Cluster.
-  std::vector<std::pair<int, Cluster*>> SortedMergeNodes();
+  // broken by the representative of the Cluster. This corresponds to sorting by
+  // nesting depth, from deepest nested to outermost.
+  std::vector<std::pair<int, Cluster*>> SortedSwitchNodes();
 
   // Returns whether the graph has no conditionals.
   bool NoConditionals() const { return merge_nodes_.empty(); }
@@ -654,15 +655,17 @@ class FunctionalizeCond {
   // extracting the bodies needed for the then and else branch, creates a XlaIf
   // node, removing the nodes of the branches from the graph and replacing the
   // merge node with a XlaIf.
-  Status ConvertMergeToXlaIf(Cluster* merge_cluster);
+  Status ConvertCorrespondingMergeToXlaIf(Cluster* switch_cluster);
 
   // Removes a Switch cluster feeding directly into a Merge cluster by removing
   // the Switch and Merge nodes and collapsing into a single cluster.
-  Status RemoveTrivialMerge(Cluster* merge_cluster);
+  Status RemoveTrivialSwitch(Cluster* switch_cluster);
 
-  // Returns the switch cluster corresponding to the merge node. This function
-  // only returns the switch cluster in the simple case where we have a switch
-  // node is the entry of a diamond corresponding to a conditional:
+  // Returns the merge cluster corresponding to the switch node. This function
+  // only returns the merge cluster in the case where we have a switch node that
+  // is the single entry point for all paths to a common merge cluster, this
+  // merge cluster may be created by combining multiple merge clusters, that
+  // share the switch cluster as common ancestor, together.
   //
   //           Switch
   //          /      \
@@ -671,8 +674,9 @@ class FunctionalizeCond {
   //        merge_cluster
   //
   // Note: either of the branches may be empty. The case where both branches are
-  // empty is handled by RemoveTrivialMerge.
-  gtl::optional<Cluster*> GetSwitchCluster(const Cluster& merge_cluster);
+  // empty is handled by RemoveTrivialSwitch.
+  gtl::optional<Cluster*> CreateCorrespondingMergeCluster(
+      Cluster* switch_cluster);
 
   // Determines the arguments needed as input to the Merge cluster originating
   // from the Switch cluster.
@@ -793,6 +797,10 @@ bool IsDeadSwitch(const Node* node) {
 }
 
 void FunctionalizeCond::CreateClusters() {
+  ClusterHandle source_cluster = ClusterHandle(Graph::kSourceId);
+  auto& source = clusters_.at(source_cluster);
+  std::deque<std::pair<ClusterHandle, std::deque<Node*>>> workqueue;
+  workqueue.push_back({source_cluster, {}});
   for (Node* node : graph_->nodes()) {
     if (IsSwitch(node)) {
       switch_nodes_.insert(node);
@@ -801,6 +809,12 @@ void FunctionalizeCond::CreateClusters() {
     }
     ClusterHandle& cluster = clusters_.at(node).Get();
     cluster = ClusterHandle(node->id());
+    // Group all source clusters together.
+    if (node->IsSource() || node->in_edges().empty()) {
+      clusters_.at(node).Merge(&source);
+      source.Merge(&clusters_.at(node));
+      workqueue.front().second.push_back(node);
+    }
   }
 
   // If there are no Merge nodes, then terminate.
@@ -815,20 +829,118 @@ void FunctionalizeCond::CreateClusters() {
   // conservatively assuming all merge nodes become XlaIf nodes.
   clusters_.resize(clusters_.size() + merge_nodes_.size());
 
-  // Merge a cluster with its input, unless the input is a Switch node or
-  // the node is a Merge node.
-  for (const Node* node : graph_->nodes()) {
-    if (IsMerge(node) || IsSwitch(node) || !node->IsOp()) {
-      continue;
+  std::unordered_set<Node*> marked;
+  while (!workqueue.empty()) {
+    auto cluster_queue = workqueue.front();
+    VLOG(4) << "Cluster: " << cluster_queue.first << " Queue: {"
+            << str_util::Join(cluster_queue.second, ",",
+                              [](string* output, const Node* node) {
+                                strings::StrAppend(output, node->id());
+                              })
+            << "}";
+
+    UnionFind<ClusterHandle>& repr = clusters_.at(cluster_queue.first);
+    workqueue.pop_front();
+    std::deque<Node*> switch_nodes;
+    std::deque<Node*> merge_nodes;
+    std::unordered_set<Node*> cluster_member;
+    while (!cluster_queue.second.empty()) {
+      // Iterate node workqueue and flow forward merging all nodes reachable
+      // that are neither a Switch or a Merge and whose inputs are all part of
+      // the same cluster.
+      Node* cur = cluster_queue.second.front();
+      cluster_queue.second.pop_front();
+      if (marked.find(cur) != marked.end()) {
+        continue;
+      }
+      if (IsMerge(cur)) {
+        merge_nodes.push_back(cur);
+        marked.insert(cur);
+        continue;
+      }
+      if (IsSwitch(cur)) {
+        switch_nodes.push_back(cur);
+        marked.insert(cur);
+        continue;
+      }
+      clusters_.at(cur).Merge(&repr);
+      cluster_member.insert(cur);
+      for (Node* out : cur->out_nodes()) {
+        bool all_ancestors_in_cluster = true;
+        for (Node* in : out->in_nodes()) {
+          if (IsMerge(out)) {
+            merge_nodes.push_back(out);
+          }
+          if (IsSwitch(out)) {
+            switch_nodes.push_back(out);
+          }
+          if (cluster_member.find(in) == cluster_member.end()) {
+            all_ancestors_in_cluster = false;
+            break;
+          }
+        }
+        if (all_ancestors_in_cluster && out->IsOp()) {
+          cluster_queue.second.push_back(out);
+          marked.insert(cur);
+        }
+      }
     }
-    for (const Node* in : node->in_nodes()) {
-      if (in->IsOp() && !IsSwitch(in) && !IsMerge(in)) {
-        clusters_.at(node).Merge(&clusters_.at(in));
+
+    VLOG(4) << "Switches: {"
+            << str_util::Join(switch_nodes, ",",
+                              [](string* output, const Node* node) {
+                                strings::StrAppend(output, node->id());
+                              })
+            << "}";
+
+    // Merge Switch nodes with common predicate.
+    std::unordered_map<Node*, std::vector<Node*>> predicate_to_switch;
+    for (Node* node : switch_nodes) {
+      Node* tmp;
+      TF_CHECK_OK(node->input_node(1, &tmp));
+      predicate_to_switch[tmp].push_back(node);
+    }
+    for (auto kv : predicate_to_switch) {
+      Node* first = kv.second.front();
+      for (Node* switch_node : kv.second) {
+        clusters_.at(first).Merge(&clusters_.at(switch_node));
       }
     }
-    // Group all source clusters together.
-    if (node->IsSource() || node->in_edges().empty()) {
-      clusters_.at(node).Merge(&clusters_.at(ClusterHandle(Graph::kSourceId)));
+
+    // Enqueue each edge of the switch node separately. That is, group all the
+    // nodes that are due to the true/false edge of the switch together and
+    // consider all nodes that only have a control dependency on the switch node
+    // separately. We want to group together all nodes that are part of the same
+    // branch, as these will be extracted into the `then` and `else` functions
+    // of the functional if. The ops due to control edges are different as they
+    // could be involved with either branch and merging them here could result
+    // in invalid graphs.
+    for (auto kv : predicate_to_switch) {
+      ClusterHandle none = ClusterHandle(-1);
+      ClusterHandle first[2] = {none, none};
+      std::deque<Node*>* queue[2];
+      for (auto switch_node : kv.second) {
+        for (const auto e : switch_node->out_edges()) {
+          if (IsSwitch(e->dst()) || IsMerge(e->dst())) {
+            continue;
+          }
+          // Control edges are enqueued on their own.
+          if (e->IsControlEdge()) {
+            workqueue.push_back({Representative(e->dst()), {e->dst()}});
+            continue;
+          }
+          // Combine all outputs of the same output port of a switch cluster
+          // into the same workqueue entry.
+          if (first[e->src_output()] == none) {
+            ClusterHandle repr = Representative(e->dst());
+            first[e->src_output()] = repr;
+            workqueue.push_back({repr, {}});
+            queue[e->src_output()] = &workqueue.back().second;
+          }
+          clusters_.at(first[e->src_output()]).Merge(&clusters_.at(e->dst()));
+          queue[e->src_output()]->push_back(e->dst());
+        }
+      }
     }
   }
 }
@@ -910,74 +1022,60 @@ void FunctionalizeCond::CreateClusteredGraph() {
     update_cluster_for_node(node).merge_nodes.insert(node);
   }
 
-  // Merge Switch nodes with common predicate.
-  std::unordered_map<Node*, std::vector<Node*>> predicate_to_switch;
-  for (Node* node : switch_nodes_) {
-    Node* tmp;
-    TF_CHECK_OK(node->input_node(1, &tmp));
-    predicate_to_switch[tmp].push_back(node);
-  }
-  for (auto kv : predicate_to_switch) {
-    Cluster& first = clustered_graph_.at(Representative(kv.second.front()));
-    for (Node* switch_node : kv.second) {
-      ClusterHandle handle = Representative(switch_node);
-      Cluster& cluster = clustered_graph_.at(handle);
-      ContractEdge(&cluster, &first, /*remove_from_graph=*/true);
-    }
-  }
-
-  // Merge Merge nodes with common input together.
-  for (Node* node : merge_nodes_) {
-    Cluster& cluster = clustered_graph_.at(Representative(node));
-    for (const Node* in : node->in_nodes()) {
-      if (!in->IsOp()) {
-        continue;
-      }
-      Cluster& cluster_node_in = clustered_graph_.at(Representative(in));
-      // ContractEdge can modify out_nodes of cluster_node_in, so traverse
-      // over out_nodes assuming it does.
-      for (auto it = cluster_node_in.out_nodes.begin();
-           it != cluster_node_in.out_nodes.end();) {
-        if (!(*it)->merge_nodes.empty()) {
-          ContractEdge(*it++, &cluster, /*remove_from_graph=*/true);
-        } else {
-          ++it;
-        }
-      }
-    }
-  }
-
   VLOG(3) << "Graph with clusters: " << DebugString(*graph_, &clusters_);
   VLOG(3) << "ClusteredGraph: " << DebugString(clustered_graph_);
 }
 
-gtl::optional<FunctionalizeCond::Cluster*> FunctionalizeCond::GetSwitchCluster(
-    const Cluster& merge_cluster) {
-  VLOG(3) << "GetSwitchCluster for " << merge_cluster.representative;
-  gtl::optional<Cluster*> switch_cluster;
-  if (merge_cluster.in_nodes.size() > 2) {
-    return gtl::nullopt;
+gtl::optional<FunctionalizeCond::Cluster*>
+FunctionalizeCond::CreateCorrespondingMergeCluster(Cluster* switch_cluster) {
+  VLOG(3) << "CreateCorrespondingMergeCluster for "
+          << switch_cluster->representative;
+  std::unordered_set<Cluster*> merges;
+  std::unordered_set<Cluster*> dominated;
+  dominated.insert(switch_cluster);
+  std::deque<Cluster*> queue;
+  auto enqueue_or_update_merge = [this, &queue, &merges](Cluster* c) {
+    if (c->merge_nodes.empty()) {
+      queue.push_back(c);
+    } else {
+      merges.insert(c);
+    }
+  };
+  // Enqueue all the outputs of the switch cluster in the workqueue.
+  for (auto* out : switch_cluster->out_nodes) {
+    enqueue_or_update_merge(out);
   }
-  for (Cluster* in : merge_cluster.in_nodes) {
-    Cluster* cluster = in;
-    if (in->switch_nodes.empty()) {
-      if (in->in_nodes.size() != 1 || in->out_nodes.size() != 1) {
+  std::unordered_set<Cluster*> visited;
+  while (!queue.empty()) {
+    Cluster* cur = queue.front();
+    queue.pop_front();
+    if (visited.find(cur) != visited.end()) {
+      continue;
+    }
+    visited.insert(cur);
+    // Ensure all inputs to the current node are in the dominated set.
+    for (Cluster* in : cur->in_nodes) {
+      if (dominated.find(in) == dominated.end()) {
         return gtl::nullopt;
       }
-      // There is only a single `in` cluster.
-      cluster = *in->in_nodes.begin();
-    }
-    if (cluster->switch_nodes.empty()) {
-      return gtl::nullopt;
     }
-
-    if (switch_cluster.has_value() && *switch_cluster != cluster) {
-      return gtl::nullopt;
-    } else {
-      switch_cluster = cluster;
+    for (Cluster* out : cur->out_nodes) {
+      // No switch nodes beyond the entry one is expected.
+      if (!out->switch_nodes.empty()) {
+        return gtl::nullopt;
+      }
+      enqueue_or_update_merge(out);
     }
   }
-  return switch_cluster;
+  auto it = merges.begin();
+  Cluster* merge_cluster = *it;
+  for (++it; it != merges.end(); ++it) {
+    ContractEdge(*it, merge_cluster);
+  }
+
+  // TODO(jpienaar): Clean up graph, merging nodes.
+
+  return merge_cluster;
 }
 
 xla::StatusOr<FunctionalizeCond::CondArgs> FunctionalizeCond::DetermineCondArgs(
@@ -1221,11 +1319,11 @@ void FunctionalizeCond::RemoveMergeNodes(Cluster* merge_cluster) {
   }
 }
 
-Status FunctionalizeCond::RemoveTrivialMerge(Cluster* merge_cluster) {
-  Cluster* switch_cluster = *merge_cluster->in_nodes.begin();
-  if (switch_cluster->switch_nodes.empty()) {
+Status FunctionalizeCond::RemoveTrivialSwitch(Cluster* switch_cluster) {
+  Cluster* merge_cluster = *switch_cluster->out_nodes.begin();
+  if (merge_cluster->merge_nodes.empty()) {
     return errors::FailedPrecondition(
-        "Not a trivial merge: no Switch node feeding into Merge node");
+        "Not a trivial switch: no Merge node feeding into Switch node");
   }
 
   for (auto it = merge_cluster->merge_nodes.begin();
@@ -1252,17 +1350,25 @@ Status FunctionalizeCond::RemoveTrivialMerge(Cluster* merge_cluster) {
   return Status::OK();
 }
 
-Status FunctionalizeCond::ConvertMergeToXlaIf(Cluster* merge_cluster) {
-  VLOG(1) << "ConvertMergeToXlaIf for " << merge_cluster->representative;
-  gtl::optional<Cluster*> switch_cluster = GetSwitchCluster(*merge_cluster);
-  if (!switch_cluster.has_value()) {
+Status FunctionalizeCond::ConvertCorrespondingMergeToXlaIf(
+    Cluster* switch_cluster) {
+  VLOG(1) << "ConvertMergeToXlaIf for " << switch_cluster->representative;
+  gtl::optional<Cluster*> maybe_merge =
+      CreateCorrespondingMergeCluster(switch_cluster);
+  if (!maybe_merge.has_value()) {
     return errors::FailedPrecondition(
-        "Merge cluster was not part of a simple conditional in the clustered "
-        "graph. Graph nodes in merge cluster ",
-        NodesToString(merge_cluster->merge_nodes));
+        "Switch cluster was not part of a simple conditional in the clustered "
+        "graph. Graph nodes in switch cluster ",
+        NodesToString(switch_cluster->switch_nodes));
+  }
+  Cluster* merge_cluster = *maybe_merge;
+  if (merge_cluster->merge_nodes.empty()) {
+    return errors::Internal(
+        "Merge node in clustered graph contains no merge nodes: ",
+        merge_cluster->representative.ToString());
   }
   TF_ASSIGN_OR_RETURN(auto cond_args,
-                      DetermineCondArgs(*merge_cluster, **switch_cluster));
+                      DetermineCondArgs(*merge_cluster, *switch_cluster));
 
   // Sort the outputs by ID to produce more stable output.
   std::vector<Node*> outputs(merge_cluster->merge_nodes.begin(),
@@ -1278,7 +1384,7 @@ Status FunctionalizeCond::ConvertMergeToXlaIf(Cluster* merge_cluster) {
   // Remove the old nodes from the graph_ and contract the edges of the
   // clustered graph.
   for (auto in : merge_cluster->in_nodes) {
-    if (in != *switch_cluster) {
+    if (in != switch_cluster) {
       RemoveClusterNodes(in);
     }
   }
@@ -1286,20 +1392,20 @@ Status FunctionalizeCond::ConvertMergeToXlaIf(Cluster* merge_cluster) {
   RemoveUnusedArgs(cond_args.args);
   auto in_nodes = merge_cluster->in_nodes;
   for (auto it = in_nodes.begin(); it != in_nodes.end();) {
-    ContractEdge(*it++, merge_cluster);
+    ContractEdge(*it++, switch_cluster);
   }
-  ContractEdge(*switch_cluster, merge_cluster);
-  clusters_[if_node].Get() = ClusterHandle(merge_cluster->representative);
+  ContractEdge(merge_cluster, switch_cluster);
+  clusters_[if_node].Get() = ClusterHandle(switch_cluster->representative);
 
   return Status::OK();
 }
 
 std::vector<std::pair<int, FunctionalizeCond::Cluster*>>
-FunctionalizeCond::SortedMergeNodes() {
+FunctionalizeCond::SortedSwitchNodes() {
   VLOG(2) << "ProcessClusteredGraph";
   std::stack<std::pair<int, Cluster*>> stack;
   // Initialize with the source node.
-  stack.push({0, &clustered_graph_[ClusterHandle(Graph::kSourceId)]});
+  stack.push({0, &clustered_graph_[Representative(graph_->source_node())]});
 
   // Perform a depth-first traversal of the clustered graph computing the
   // switch-merge depth.
@@ -1317,10 +1423,10 @@ FunctionalizeCond::SortedMergeNodes() {
 
     size_t new_depth = depth;
     if (!n->merge_nodes.empty()) {
-      queue.emplace_back(depth, n);
       --new_depth;
     }
     if (!n->switch_nodes.empty()) {
+      queue.emplace_back(depth, n);
       ++new_depth;
     }
     for (Cluster* e : n->out_nodes) {
@@ -1350,25 +1456,30 @@ Status FunctionalizeCond::Functionalize(Graph* graph,
   }
   fc.CreateClusteredGraph();
 
-  auto queue = fc.SortedMergeNodes();
+  auto queue = fc.SortedSwitchNodes();
   for (auto it = queue.begin(); it != queue.end();) {
-    Cluster* merge_cluster = (*it).second;
+    Cluster* switch_cluster = (*it).second;
     ++it;
-    if (merge_cluster->in_nodes.size() == 1) {
-      TF_RETURN_IF_ERROR(fc.RemoveTrivialMerge(merge_cluster));
+    if (switch_cluster->out_nodes.size() == 1) {
+      TF_RETURN_IF_ERROR(fc.RemoveTrivialSwitch(switch_cluster));
     } else {
-      TF_RETURN_IF_ERROR(fc.ConvertMergeToXlaIf(merge_cluster));
+      TF_RETURN_IF_ERROR(fc.ConvertCorrespondingMergeToXlaIf(switch_cluster));
     }
 
-    // Contract newly Merge free merge_cluster with incoming nodes without
+    // Contract newly Switch free switch_cluster with outgoing nodes without
     // Switch or Merge nodes.
-    std::vector<Cluster*> in_nodes(merge_cluster->in_nodes.begin(),
-                                   merge_cluster->in_nodes.end());
-    for (auto in : in_nodes) {
-      if (in->merge_nodes.empty() && in->switch_nodes.empty()) {
-        fc.ContractEdge(in, merge_cluster);
+    for (auto& nodes : {switch_cluster->out_nodes, switch_cluster->in_nodes}) {
+      std::vector<Cluster*> copy_nodes(nodes.begin(), nodes.end());
+      for (auto* node : copy_nodes) {
+        if (node->merge_nodes.empty() && node->switch_nodes.empty()) {
+          fc.ContractEdge(node, switch_cluster);
+        }
       }
     }
+
+    VLOG(3) << "Graph with clusters: "
+            << DebugString(*fc.graph_, &fc.clusters_);
+    VLOG(3) << "ClusteredGraph: " << DebugString(fc.clustered_graph_);
   }
 
   if (!fc.switch_nodes_.empty()) {
-- 
GitLab


From 3a164021037b005452d07d325bdd4f5e8ce8465e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 18:18:25 -0800
Subject: [PATCH 0161/1225] Update ops-related pbtxt files.

PiperOrigin-RevId: 176447787
---
 tensorflow/core/ops/ops.pbtxt | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index d043696a94..6ce0b70c9d 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -7323,7 +7323,7 @@ op {
   name: "DeserializeSparse"
   input_arg {
     name: "serialized_sparse"
-    description: "1-D, The serialized `SparseTensor` object. Must have 3 columns."
+    description: "The serialized `SparseTensor` objects. The last dimension\nmust have 3 columns."
     type: DT_STRING
   }
   output_arg {
@@ -7341,10 +7341,9 @@ op {
   attr {
     name: "dtype"
     type: "type"
-    description: "The `dtype` of the serialized `SparseTensor` object."
+    description: "The `dtype` of the serialized `SparseTensor` objects."
   }
-  summary: "Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`)"
-  description: "object."
+  summary: "Deserialize `SparseTensor` objects."
 }
 op {
   name: "DestroyResourceOp"
-- 
GitLab


From bb96a309730b9ae409ca5107535493ae40bc58e1 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 20 Nov 2017 18:48:29 -0800
Subject: [PATCH 0162/1225] Added the ability to report peak memory usage

PiperOrigin-RevId: 176450440
---
 tensorflow/python/BUILD                       |  6 ++-
 tensorflow/python/grappler/cluster.py         |  4 ++
 tensorflow/python/grappler/cost_analyzer.i    | 17 ++-----
 tensorflow/python/grappler/cost_analyzer.py   | 51 +++++++++++++++++--
 .../python/grappler/cost_analyzer_test.py     | 32 ++++++++++--
 5 files changed, 88 insertions(+), 22 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index a20898e40e..590dbcd462 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -4422,7 +4422,11 @@ py_library(
         "grappler/cost_analyzer.py",
     ],
     srcs_version = "PY2AND3",
-    deps = [":pywrap_tensorflow_internal"],
+    deps = [
+        ":pywrap_tensorflow_internal",
+        ":tf_cluster",
+        ":tf_item",
+    ],
 )
 
 py_binary(
diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py
index c6ddb803f4..58c7bbbac1 100644
--- a/tensorflow/python/grappler/cluster.py
+++ b/tensorflow/python/grappler/cluster.py
@@ -52,6 +52,10 @@ class Cluster(object):
     if self._tf_cluster is not None:
       tf_cluster.TF_DeleteCluster(self._tf_cluster)
 
+  @property
+  def tf_cluster(self):
+    return self._tf_cluster
+
   def ListDevices(self):
     """Returns the list of available hardware devices."""
     devices = []
diff --git a/tensorflow/python/grappler/cost_analyzer.i b/tensorflow/python/grappler/cost_analyzer.i
index 1f024e439d..0318ff762c 100644
--- a/tensorflow/python/grappler/cost_analyzer.i
+++ b/tensorflow/python/grappler/cost_analyzer.i
@@ -43,7 +43,7 @@ limitations under the License.
 
 %{
 string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool
-per_node_report) {
+per_node_report, tensorflow::grappler::Cluster* cluster) {
   tensorflow::grappler::ItemConfig cfg;
   cfg.apply_optimizations = false;
   std::unique_ptr<tensorflow::grappler::GrapplerItem> item =
@@ -51,20 +51,9 @@ per_node_report) {
   if (!item) {
     return "Error: failed to preprocess metagraph: check your log file for errors";
   }
-  
-  // TODO(bsteiner): we should wrap the tf session instead to properly handle the case of a
-  // distributed setup.
-  const int timeout_s = 3600;
-  int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores();
-  int num_gpus = tensorflow::grappler::GetNumAvailableGPUs();
-  tensorflow::grappler::SingleMachine cluster(timeout_s, num_cpu_cores, num_gpus);
-  cluster.SetNumWarmupSteps(10);
-  cluster.AllowSoftPlacement(true);
-  cluster.DisableDetailedStats(false);
-  TF_CHECK_OK(cluster.Provision());
 
   string suffix;
-  tensorflow::grappler::CostAnalyzer analyzer(*item, &cluster, suffix);
+  tensorflow::grappler::CostAnalyzer analyzer(*item, cluster, suffix);
 
   std::stringstream os;
   analyzer.GenerateReport(os, per_node_report);
@@ -74,4 +63,4 @@ per_node_report) {
 %}
 
 string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool
-per_node_report);
+                          per_node_report, tensorflow::grappler::Cluster* cluster);
diff --git a/tensorflow/python/grappler/cost_analyzer.py b/tensorflow/python/grappler/cost_analyzer.py
index 75c21e5727..a1ff915c61 100644
--- a/tensorflow/python/grappler/cost_analyzer.py
+++ b/tensorflow/python/grappler/cost_analyzer.py
@@ -20,21 +20,64 @@ from __future__ import print_function
 
 from tensorflow.python import pywrap_tensorflow as tf_wrap
 from tensorflow.python.framework import errors
+from tensorflow.python.grappler import cluster as gcluster
+from tensorflow.python.grappler import item as gitem
 
 
-def GenerateCostReport(metagraph, per_node_report=False):
+def GenerateCostReport(metagraph, per_node_report=False, cluster=None):
   """Analyze the cost of each TensorFlow op and node in the provided metagraph.
 
   Args:
-    metagraph: An TensorFlow MetaGraphDef.
+    metagraph: A TensorFlow MetaGraphDef.
     per_node_report: by default the report contains stats aggregated on a per op
       type basis, setting per_node_report to True adds results for each
       individual node to the report.
+    cluster: Analyze the costs using the specified cluster, or the local machine
+      if no cluster was specified.
 
   Returns:
     A string of cost report.
   """
+  if cluster is None:
+    cluster = gcluster.Cluster(disable_detailed_stats=False)
+
   with errors.raise_exception_on_not_ok_status():
-    ret_from_swig = tf_wrap.GenerateCostReport(metagraph.SerializeToString(),
-                                               per_node_report)
+    ret_from_swig = tf_wrap.GenerateCostReport(
+        metagraph.SerializeToString(), per_node_report, cluster.tf_cluster)
   return ret_from_swig
+
+
+def GenerateMemoryReport(metagraph, detailed_report=True, cluster=None):
+  """Analyze the peak memory usage for the provided metagraph.
+
+  Args:
+    metagraph: A TensorFlow MetaGraphDef.
+    detailed_report: print the live tensors in addition to the peak memory
+      usage.
+    cluster: Analyze the memory using the specified cluster, or the local
+      machine if no cluster was specified.
+
+  Returns:
+    A string with the formatted memory usage.
+  """
+  if cluster is None:
+    cluster = gcluster.Cluster(
+        disable_detailed_stats=True, disable_timeline=True)
+
+  item = gitem.Item(metagraph)
+  peak_usage = cluster.DeterminePeakMemoryUsage(item)
+  report = ""
+  for device, snapshot in peak_usage.items():
+    peak_usage = snapshot[0]
+    report += "Peak usage for device " + device + ": " + str(
+        peak_usage) + " bytes\n"
+    if detailed_report:
+      live_tensors = snapshot[1]
+      for tensor in live_tensors:
+        op_name = tensor[0]
+        output_id = tensor[1]
+        mem_used = tensor[2]
+        report += "  " + str(op_name) + ":" + str(output_id) + " uses " + str(
+            mem_used) + " bytes\n"
+
+  return report
diff --git a/tensorflow/python/grappler/cost_analyzer_test.py b/tensorflow/python/grappler/cost_analyzer_test.py
index d59f1d04f6..f4933a4514 100644
--- a/tensorflow/python/grappler/cost_analyzer_test.py
+++ b/tensorflow/python/grappler/cost_analyzer_test.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.grappler import cost_analyzer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
@@ -35,9 +36,9 @@ from tensorflow.python.platform import test
 from tensorflow.python.training import adam
 
 
-class PyWrapOptimizeGraphTest(test.TestCase):
+class CostAnalysisTest(test.TestCase):
 
-  def testBasic(self):
+  def testBasicCost(self):
     """Make sure arguments can be passed correctly."""
     a = constant_op.constant(10, name="a")
     b = constant_op.constant(20, name="b")
@@ -60,7 +61,7 @@ class PyWrapOptimizeGraphTest(test.TestCase):
     # Also print the report to make it easier to debug
     print("{}".format(report))
 
-  def testSmallNetwork(self):
+  def testSmallNetworkCost(self):
     image = array_ops.placeholder(dtypes.float32, shape=[1, 28, 28, 1])
     label = array_ops.placeholder(dtypes.float32, shape=[1, 10])
     w = variables.Variable(
@@ -111,6 +112,31 @@ class PyWrapOptimizeGraphTest(test.TestCase):
       # self.assertTrue(0 < upper)
       # self.assertTrue(lower <= upper)
 
+  def testBasicMemory(self):
+    """Make sure arguments can be passed correctly."""
+    with test_util.device(use_gpu=False):
+      a = constant_op.constant(10, name="a")
+      b = constant_op.constant(20, name="b")
+      c = math_ops.add_n([a, b], name="c")
+      d = math_ops.add_n([b, c], name="d")
+      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+      train_op.append(d)
+      mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
+
+    report = cost_analyzer.GenerateMemoryReport(mg)
+
+    # Print the report to make it easier to debug
+    print("{}".format(report))
+
+    # Check the report
+    self.assertTrue(
+        "Peak usage for device /job:localhost/replica:0/task:0/cpu:0: 16 bytes"
+        in report)
+    self.assertTrue("  a:0 uses 4 bytes" in report)
+    self.assertTrue("  b:0 uses 4 bytes" in report)
+    self.assertTrue("  c:0 uses 4 bytes" in report)
+    self.assertTrue("  d:0 uses 4 bytes" in report)
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From b525ea6798175f4c95996a3666c70de5c00a9a0c Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 20 Nov 2017 20:10:19 -0800
Subject: [PATCH 0163/1225] [XLA] Rework ScopedLoggingTimer into macros, and
 add some tracing to gpu_compiler.

PiperOrigin-RevId: 176455799
---
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  2 +-
 .../compiler/xla/service/gpu/gpu_compiler.cc  | 29 +++++++++------
 .../gpu/llvm_gpu_backend/gpu_backend_lib.cc   |  5 ++-
 tensorflow/compiler/xla/util.cc               |  8 ++---
 tensorflow/compiler/xla/util.h                | 36 +++++++++++++++++--
 5 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 592751e118..88f7e7a93f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -444,7 +444,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     perftools::gputools::StreamExecutor* stream_exec) {
   const string timer_message =
       "Compiling [" + module->name() + "] for CPU using JIT";
-  ScopedLoggingTimer compiling_timer(timer_message, 1);
+  XLA_SCOPED_LOGGING_TIMER(timer_message);
 
   VLOG(1) << "Compiling: " << module->name();
   TF_RET_CHECK(stream_exec != nullptr);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 937d453a5c..e84c390745 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -299,17 +299,17 @@ GpuCompiler::GpuCompiler()
 
 StatusOr<std::unique_ptr<HloModule>> GpuCompiler::RunHloPasses(
     std::unique_ptr<HloModule> module, se::StreamExecutor* /*stream_exec*/) {
-  {
-    Tracing::TraceMe annotation("HLO Transforms", module->name(),
-                                /*is_expensive=*/true);
-    TF_RETURN_IF_ERROR(
-        OptimizeHloModule(module.get(), ShapeSizeBytesFunction()));
-  }
+  XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunHloPasses");
+  Tracing::TraceMe annotation("HLO Transforms", module->name(),
+                              /*is_expensive=*/true);
+  TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), ShapeSizeBytesFunction()));
   return std::move(module);
 }
 
 StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
     std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec) {
+  XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend");
+
   TF_RET_CHECK(stream_exec != nullptr);
 
   TF_RETURN_IF_ERROR(
@@ -366,8 +366,11 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
   HloComputation* entry_computation = module->entry_computation();
   IrEmitterUnnested ir_emitter(module->config(), entry_computation,
                                &ir_emitter_context);
-  TF_RETURN_IF_ERROR(
-      entry_computation->root_instruction()->Accept(&ir_emitter));
+  {
+    XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend - IR emission");
+    TF_RETURN_IF_ERROR(
+        entry_computation->root_instruction()->Accept(&ir_emitter));
+  }
 
   if (user_pre_optimization_hook_) {
     TF_CHECK_OK(user_pre_optimization_hook_(llvm_module));
@@ -416,9 +419,12 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
     cc_minor = 0;
   }
 
-  TF_ASSIGN_OR_RETURN(string ptx,
-                      CompileToPtx(&llvm_module, {cc_major, cc_minor},
-                                   module->config(), libdevice_dir));
+  string ptx;
+  {
+    XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend - CompileToPtx");
+    TF_ASSIGN_OR_RETURN(ptx, CompileToPtx(&llvm_module, {cc_major, cc_minor},
+                                          module->config(), libdevice_dir));
+  }
 
   if (!ir_dump_directory.empty()) {
     TF_RETURN_IF_ERROR(llvm_ir::DumpIRToDirectory(
@@ -474,6 +480,7 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
 std::vector<uint8> GpuCompiler::CompilePtxOrGetCachedResult(const string& ptx,
                                                             int cc_major,
                                                             int cc_minor) {
+  XLA_SCOPED_LOGGING_TIMER("GpuCompiler::CompilePtxOrGetCachedResult");
   Tracing::TraceMe annotation("PTX->CUBIN", /*is_expensive=*/true);
   bool inserted;
   decltype(compilation_cache_.begin()) iter;
diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
index 1cb963be61..a574123d6b 100644
--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
@@ -492,9 +492,8 @@ StatusOr<string> CompileToPtx(llvm::Module* module,
     tensorflow::port::Tracing::TraceMe annotation(
         "Compiling IR", llvm_ir::AsString(module->getName()),
         /*is_expensive=*/true);
-    ScopedLoggingTimer compilation_timer(
-        "Compile module " + llvm_ir::AsString(module->getName()),
-        /*vlog_level=*/2);
+    XLA_SCOPED_LOGGING_TIMER("Compile module " +
+                             llvm_ir::AsString(module->getName()));
     TF_ASSIGN_OR_RETURN(
         ptx, CompileModuleToPtx(module, compute_capability, hlo_module_config,
                                 libdevice_dir_path));
diff --git a/tensorflow/compiler/xla/util.cc b/tensorflow/compiler/xla/util.cc
index 2624ef0252..e595df3052 100644
--- a/tensorflow/compiler/xla/util.cc
+++ b/tensorflow/compiler/xla/util.cc
@@ -42,15 +42,15 @@ Status WithLogBacktrace(const Status& status) {
 
 }  // namespace
 
-ScopedLoggingTimer::ScopedLoggingTimer(const string& label, int32 vlog_level)
-    : label(label), vlog_level(vlog_level) {
-  if (VLOG_IS_ON(vlog_level)) {
+ScopedLoggingTimer::ScopedLoggingTimer(const string& label, bool enabled)
+    : enabled(enabled), label(label) {
+  if (enabled) {
     start_micros = tensorflow::Env::Default()->NowMicros();
   }
 }
 
 ScopedLoggingTimer::~ScopedLoggingTimer() {
-  if (VLOG_IS_ON(vlog_level)) {
+  if (enabled) {
     uint64 end_micros = tensorflow::Env::Default()->NowMicros();
     double secs = (end_micros - start_micros) / 1000000.0;
 
diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h
index f58f57b443..b722095d1f 100644
--- a/tensorflow/compiler/xla/util.h
+++ b/tensorflow/compiler/xla/util.h
@@ -50,13 +50,43 @@ using DimensionVector = tensorflow::gtl::InlinedVector<int64, kInlineRank>;
 // RAII timer that logs with a given label the wall clock time duration in human
 // readable form. This differs from base's ElapsedTimer primarily in that it
 // spits out the human-readable duration form.
+//
+// By default, the timing traces are only printed at VLOG(1) and above:
+//
+//   XLA_SCOPED_LOGGING_TIMER("fooing bar");  // nop if !VLOG_IS_ON(1).
+//
+// but you can control this via:
+//
+//   XLA_SCOPED_LOGGING_TIMER_LEVEL("fooing bar", 2);  // nop if !VLOG_IS_ON(2)
+//
+#define XLA_SCOPED_LOGGING_TIMER(label) \
+  XLA_SCOPED_LOGGING_TIMER_HELPER(label, 1, __COUNTER__)
+#define XLA_SCOPED_LOGGING_TIMER_LEVEL(label, level) \
+  XLA_SCOPED_LOGGING_TIMER_HELPER(label, level, __COUNTER__)
+
+// Helper for implementing macros above.  Do not use directly.
+//
+// Forces the evaluation of "counter", which we expect is equal to __COUNTER__.
+#define XLA_SCOPED_LOGGING_TIMER_HELPER(label, level, counter) \
+  XLA_SCOPED_LOGGING_TIMER_HELPER2(label, level, counter)
+
+// Helper for macros above.  Don't use directly.
+#define XLA_SCOPED_LOGGING_TIMER_HELPER2(label, level, counter)      \
+  ::xla::ScopedLoggingTimer XLA_ScopedLoggingTimerInstance##counter( \
+      label, VLOG_IS_ON(level))
+
+// RAII timer for XLA_SCOPED_LOGGING_TIMER and XLA_SCOPED_LOGGING_TIMER_LEVEL
+// macros above.  Recommended usage is via the macros so you don't have to give
+// the timer a name or worry about calling VLOG_IS_ON yourself.
 struct ScopedLoggingTimer {
-  explicit ScopedLoggingTimer(const string& label, int32 vlog_level = 1);
+  // The timer does nothing if enabled is false.  This lets you pass in your
+  // file's VLOG_IS_ON value.
+  ScopedLoggingTimer(const string& label, bool enabled);
   ~ScopedLoggingTimer();
 
-  uint64 start_micros;
+  bool enabled;
   string label;
-  int32 vlog_level;
+  uint64 start_micros;
 };
 
 // Given a vector<T>, returns a MutableArraySlice<char> that points at its
-- 
GitLab


From 9a267fe324044578ec997510d9b2859b5e67842e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 20 Nov 2017 20:27:40 -0800
Subject: [PATCH 0164/1225] [XLA] Handle reduce window on a scalar value.

PiperOrigin-RevId: 176457058
---
 .../compiler/xla/service/algebraic_simplifier.cc       |  9 +++++++++
 tensorflow/compiler/xla/tests/reduce_window_test.cc    | 10 ++++++++++
 2 files changed, 19 insertions(+)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index bc9a3ac43d..1764f7f3dc 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -1398,6 +1398,15 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow(
   auto operand = reduce_window->mutable_operand(0);
   const Window& window = reduce_window->window();
   auto function = reduce_window->to_apply();
+  if (ShapeUtil::IsScalar(operand->shape())) {
+    TF_RET_CHECK(ShapeUtil::IsScalar(reduce_window->shape()));
+    return ReplaceWithNewInstruction(
+        reduce_window,
+        HloInstruction::CreateMap(reduce_window->shape(),
+                                  {operand, reduce_window->mutable_operand(1)},
+                                  function));
+  }
+
   VLOG(10) << "Considering folding Pad: " << operand->ToString()
            << "\ninto reduce-window: " << reduce_window->ToString();
 
diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index 6c9b62b48d..0601a1466b 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -90,6 +90,16 @@ TEST_F(ReduceWindowTest, MismatchedRanksGivesErrorStatus) {
               ::testing::HasSubstr("Want input dimensions size"));
 }
 
+// Regression test for b/68964348.
+TEST_F(ReduceWindowTest, R0ReduceWindow) {
+  auto input = builder_.ConstantR0<float>(42);
+  auto init = builder_.ConstantR0<float>(1.0);
+  builder_.ReduceWindow(input, init, CreateScalarAddComputation(F32, &builder_),
+                        /*window_dimensions=*/{},
+                        /*window_strides=*/{}, Padding::kSame);
+  ComputeAndCompareR0<float>(&builder_, 43, {}, ErrorSpec(0.00001));
+}
+
 TEST_F(ReduceWindowTest, Min3In5Stride2) {
   const auto input = builder_.ConstantR1<float>({10000, 1000, 100, 10, 1});
   ReduceWindowMin(input, {3}, {2}, Padding::kValid);
-- 
GitLab


From 044cb401046401b7956234b31ecdafe4d86cc6d9 Mon Sep 17 00:00:00 2001
From: scott <scotthuang1989@163.com>
Date: Tue, 21 Nov 2017 21:52:48 +0800
Subject: [PATCH 0165/1225] fix: import error

---
 tensorflow/contrib/slim/README.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md
index f7a85557ca..dc92ae0c85 100644
--- a/tensorflow/contrib/slim/README.md
+++ b/tensorflow/contrib/slim/README.md
@@ -441,7 +441,8 @@ module. Consider the simple case where we want to train the VGG network:
 
 ```python
 import tensorflow as tf
-vgg = tf.contrib.slim.nets.vgg
+import tensorflow.contrib.slim.nets as nets
+vgg = nets.vgg
 
 # Load the images and labels.
 images, labels = ...
@@ -559,9 +560,10 @@ examine the following sample of training the VGG network:
 
 ```python
 import tensorflow as tf
+import tensorflow.contrib.slim.nets as nets
 
 slim = tf.contrib.slim
-vgg = tf.contrib.slim.nets.vgg
+vgg = nets.vgg
 
 ...
 
@@ -809,9 +811,10 @@ Putting it all together:
 
 ```python
 import tensorflow as tf
+import tensorflow.contrib.slim.nets as nets
 
 slim = tf.contrib.slim
-vgg = tf.contrib.slim.nets.vgg
+vgg = nets.vgg
 
 
 # Load the data
-- 
GitLab


From 3def704f255c3af59fd3225dba862834e35b3493 Mon Sep 17 00:00:00 2001
From: Christopher Shallue <shallue@google.com>
Date: Tue, 21 Nov 2017 09:40:32 -0800
Subject: [PATCH 0166/1225] Add method HParams.get(key, default=None)

PiperOrigin-RevId: 176520519
---
 .../training/python/training/hparam.py        | 27 ++++++++++++
 .../training/python/training/hparam_test.py   | 43 +++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py
index 7db625cdd5..8d5f47ca4d 100644
--- a/tensorflow/contrib/training/python/training/hparam.py
+++ b/tensorflow/contrib/training/python/training/hparam.py
@@ -582,6 +582,33 @@ class HParams(object):
     """
     return {n: getattr(self, n) for n in self._hparam_types.keys()}
 
+  def get(self, key, default=None):
+    """Returns the value of `key` if it exists, else `default`."""
+    if key in self._hparam_types:
+      # Ensure that default is compatible with the parameter type.
+      if default is not None:
+        param_type, is_param_list = self._hparam_types[key]
+        type_str = 'list<%s>' % param_type if is_param_list else str(param_type)
+        fail_msg = ("Hparam '%s' of type '%s' is incompatible with "
+                    'default=%s' % (key, type_str, default))
+
+        is_default_list = isinstance(default, list)
+        if is_param_list != is_default_list:
+          raise ValueError(fail_msg)
+
+        try:
+          if is_default_list:
+            for value in default:
+              _cast_to_type_if_compatible(key, param_type, value)
+          else:
+            _cast_to_type_if_compatible(key, param_type, default)
+        except ValueError as e:
+          raise ValueError('%s. %s' % (fail_msg, e))
+
+      return getattr(self, key)
+
+    return default
+
   def __contains__(self, key):
     return key in self._hparam_types
 
diff --git a/tensorflow/contrib/training/python/training/hparam_test.py b/tensorflow/contrib/training/python/training/hparam_test.py
index 949c262f5b..643905d3a6 100644
--- a/tensorflow/contrib/training/python/training/hparam_test.py
+++ b/tensorflow/contrib/training/python/training/hparam_test.py
@@ -364,6 +364,49 @@ class HParamsTest(test.TestCase):
     with self.assertRaisesRegexp(AssertionError, ''):
       hparam.HParams(hparam_def=[1, 2, 3])
 
+  def testGet(self):
+    hparams = hparam.HParams(aaa=1, b=2.0, c_c='relu6', d=True, e=[5.0, 6.0])
+
+    # Existing parameters with default=None.
+    self.assertEqual(1, hparams.get('aaa'))
+    self.assertEqual(2.0, hparams.get('b'))
+    self.assertEqual('relu6', hparams.get('c_c'))
+    self.assertEqual(True, hparams.get('d'))
+    self.assertEqual([5.0, 6.0], hparams.get('e', None))
+
+    # Existing parameters with compatible defaults.
+    self.assertEqual(1, hparams.get('aaa', 2))
+    self.assertEqual(2.0, hparams.get('b', 3.0))
+    self.assertEqual(2.0, hparams.get('b', 3))
+    self.assertEqual('relu6', hparams.get('c_c', 'default'))
+    self.assertEqual(True, hparams.get('d', True))
+    self.assertEqual([5.0, 6.0], hparams.get('e', [1.0, 2.0, 3.0]))
+    self.assertEqual([5.0, 6.0], hparams.get('e', [1, 2, 3]))
+
+    # Existing parameters with incompatible defaults.
+    with self.assertRaises(ValueError):
+      hparams.get('aaa', 2.0)
+
+    with self.assertRaises(ValueError):
+      hparams.get('b', False)
+
+    with self.assertRaises(ValueError):
+      hparams.get('c_c', [1, 2, 3])
+
+    with self.assertRaises(ValueError):
+      hparams.get('d', 'relu')
+
+    with self.assertRaises(ValueError):
+      hparams.get('e', 123.0)
+
+    with self.assertRaises(ValueError):
+      hparams.get('e', ['a', 'b', 'c'])
+
+    # Nonexistent parameters.
+    self.assertEqual(None, hparams.get('unknown'))
+    self.assertEqual(123, hparams.get('unknown', 123))
+    self.assertEqual([1, 2, 3], hparams.get('unknown', [1, 2, 3]))
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 745eb9242d3b3b8e860abce018c74444a02e0926 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 10:01:57 -0800
Subject: [PATCH 0167/1225] Fix tests that prevent enabling the dependency
 optimizer in grappler.   - Most of these use constant inputs that cause the
 new optimizer to turn various backprop nodes into NoOps because their outputs
 are not consumed.

Improve or fix a few issues in the dependency optimizer:
  - Prune duplicate control inputs. Don't add new control inputs if the target already has the source as a regular input.
  - Don't turn Merge, Switch, Enter, Exit, NextIteration, _TPUExecute or _TPUCompile nodes into NoOps.
  - Don't call ConstantFolding::AddControlDependency() when re-routing, since the nodes
    in questions already have control inputs, so cannot be Switch nodes.
  - Don't clear inputs from NoOps unless we know the fetch nodes.

PiperOrigin-RevId: 176523299
---
 tensorflow/core/debug/debug_gateway_test.cc   |  13 +-
 tensorflow/core/grappler/optimizers/BUILD     |   2 -
 .../optimizers/dependency_optimizer.cc        | 150 ++++++++++++------
 .../optimizers/dependency_optimizer.h         |   2 +-
 .../optimizers/dependency_optimizer_test.cc   |   1 +
 .../lib/debug_graph_reconstruction_test.py    |   9 +-
 .../python/debug/lib/session_debug_testlib.py |   5 +-
 7 files changed, 122 insertions(+), 60 deletions(-)

diff --git a/tensorflow/core/debug/debug_gateway_test.cc b/tensorflow/core/debug/debug_gateway_test.cc
index 3903040e4d..5758334906 100644
--- a/tensorflow/core/debug/debug_gateway_test.cc
+++ b/tensorflow/core/debug/debug_gateway_test.cc
@@ -40,6 +40,9 @@ std::unique_ptr<DirectSession> CreateSession() {
   options.config.mutable_graph_options()
       ->mutable_rewrite_options()
       ->set_constant_folding(RewriterConfig::OFF);
+  options.config.mutable_graph_options()
+      ->mutable_rewrite_options()
+      ->set_dependency_optimization(RewriterConfig::OFF);
 
   return std::unique_ptr<DirectSession>(
       dynamic_cast<DirectSession*>(NewSession(options)));
@@ -55,7 +58,7 @@ class SessionDebugMinusAXTest : public ::testing::Test {
 #elif defined(TENSORFLOW_USE_SYCL)
     const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
 #else
-    const string kDeviceName = "/job:localhost/replica:0/task:0/cpu:0";
+    const string kDeviceName = "/job:localhost/replica:0/task:0/device:CPU:0";
 #endif
 
     Tensor a_tensor(DT_FLOAT, TensorShape({2, 2}));
@@ -503,7 +506,7 @@ TEST_F(SessionDebugMinusAXTest,
 }
 #endif
 
-class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test {
+class SessionDebugOutputSlotWithoutOutgoingEdgeTest : public ::testing::Test {
  public:
   void Initialize() {
     Graph graph(OpRegistry::Global());
@@ -513,7 +516,7 @@ class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test {
 #elif defined(TENSORFLOW_USE_SYCL)
     const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
 #else
-    const string kDeviceName = "/job:localhost/replica:0/task:0/cpu:0";
+    const string kDeviceName = "/job:localhost/replica:0/task:0/device:CPU:0";
 #endif
 
     Tensor a_tensor(DT_FLOAT, TensorShape({1, 1}));
@@ -540,7 +543,7 @@ class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test {
   GraphDef def_;
 };
 
-TEST_F(SessionDebugOutputSlotWithoutOngoingEdgeTest,
+TEST_F(SessionDebugOutputSlotWithoutOutgoingEdgeTest,
        WatchSlotWithoutOutgoingEdge) {
   Initialize();
   auto session = CreateSession();
@@ -615,7 +618,7 @@ class SessionDebugVariableTest : public ::testing::Test {
 #elif defined(TENSORFLOW_USE_SYCL)
     const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
 #else
-    const string kDeviceName = "/job:localhost/replica:0/task:0/cpu:0";
+    const string kDeviceName = "/job:localhost/replica:0/task:0/device:CPU:0";
 #endif
 
     // Define variable node.
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index dbfa8ae503..08344b0ada 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -202,7 +202,6 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
-        ":arithmetic_optimizer",
         ":constant_folding",
         ":graph_optimizer",
         "//tensorflow/core:framework",
@@ -213,7 +212,6 @@ cc_library(
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/costs:graph_properties",
-        "//tensorflow/core/grappler/utils:frame",
     ],
 )
 
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 57eee60646..0cc4585ba4 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -22,9 +22,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
-#include "tensorflow/core/grappler/optimizers/arithmetic_optimizer.h"
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
-#include "tensorflow/core/grappler/utils/frame.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -70,7 +68,7 @@ class SetVector {
 bool HasRegularOutputs(const NodeDef& node, const NodeMap& node_map) {
   for (const NodeDef* output : node_map.GetOutputs(node.name())) {
     for (const string& input : output->input()) {
-      if (input == node.name()) {
+      if (!IsControlInput(input) && NodeName(input) == node.name()) {
         return true;
       }
     }
@@ -78,25 +76,53 @@ bool HasRegularOutputs(const NodeDef& node, const NodeMap& node_map) {
   return false;
 }
 
-int FindInputSlot(const NodeDef& node, const string& input) {
-  for (int i = 0; i < node.input_size(); ++i) {
-    if (node.input(i) == input) {
-      return i;
+int RemoveInput(NodeDef* node, const string& input, NodeMap* node_map) {
+  int num_removed = 0;
+  int pos = 0;
+  while (pos < node->input_size()) {
+    if (node->input(pos) == input) {
+      node->mutable_input()->SwapElements(pos, node->input_size() - 1);
+      node->mutable_input()->RemoveLast();
+      node_map->RemoveOutput(node->name(), NodeName(input));
+    } else {
+      ++pos;
+    }
+    ++num_removed;
+  }
+  return num_removed;
+}
+
+// Remove dulicate control inputs.
+void PruneControlInputs(NodeDef* node) {
+  std::unordered_set<string> inputs;
+  int pos = 0;
+  while (pos < node->input_size()) {
+    const string& input = node->input(pos);
+    // TODO(rmlarsen): Remove control inputs that also appears as a regular
+    // inputs. Currently, doing so breaks testControlFlowStrictness in
+    // python/framework/function_test.
+    //    if (!inputs.insert(NodeName(input)).second && IsControlInput(input)) {
+    if (IsControlInput(input) && !inputs.insert(input).second) {
+      VLOG(1) << "**** Removing duplicate control input: " << input
+              << " from node " << node->DebugString();
+      node->mutable_input()->SwapElements(pos, node->input_size() - 1);
+      node->mutable_input()->RemoveLast();
+    } else {
+      ++pos;
     }
   }
-  return -1;
 }
 
 }  // namespace
 
 bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
-  if (!has_fetch_ || HasRegularOutputs(node, *node_map_)) {
+  if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) {
     return false;
   }
-  if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) {
+  if (!fetch_nodes_known_ || HasRegularOutputs(node, *node_map_)) {
     return false;
   }
-  if (IsMerge(node)) {
+  if (IsMerge(node) || IsSwitch(node)) {
     return false;
   }
   if (ModifiesFrameInfo(node)) {
@@ -105,21 +131,21 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
   if (!IsFreeOfSideEffect(node)) {
     return false;
   }
-
+  if (node.op().rfind("Submodel", 0) == 0) {
+    return false;
+  }
   const OpDef* op_def = nullptr;
   Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def);
   if (!status.ok() || op_def->output_arg_size() == 0) {
     return false;
   }
 
-  // TODO(rmlarsen): We have to skip Const nodes to make
-  // core/debug/debug_gateway_test pass. See if we can fix that test.
   // TODO(rmlarsen): We have to skip Identity nodes to make an obsolete test in
   // python/training/session_manager_test.py pass. See if we can fix or get rid
   // of that test.
-  const std::unordered_set<string> do_not_rewrite_ops = {
-      "Assert", "CheckNumerics",         "Const",      "Identity", "_Retval",
-      "_Arg",   "_ParallelConcatUpdate", "_TPUExecute"};
+  const std::unordered_set<string> do_not_rewrite_ops{
+      "Assert", "CheckNumerics",         "Identity",    "_Retval",
+      "_Arg",   "_ParallelConcatUpdate", "_TPUExecute", "_TPUCompile"};
   return do_not_rewrite_ops.find(node.op()) == do_not_rewrite_ops.end();
 }
 
@@ -127,20 +153,33 @@ string DependencyOptimizer::TryOptimizeDependencies(
     NodeDef* node, GraphDef* graph, std::vector<NodeDef*>* new_nodes) {
   // Change ops that only have control dependencies as outputs to NoOps.
   if (node->op() != "NoOp" && SafeToConvertToNoOp(*node)) {
-    VLOG(2) << "***** Replacing  " << node->name() << " (" << node->op()
+    VLOG(1) << "***** Replacing  " << node->name() << " (" << node->op()
             << ") with NoOp.";
     // The outputs of this node are not consumed. Replace its inputs with
     // control dependencies and replace the op itself with the NoOp op.
-    for (int i = 0; i < node->input_size(); ++i) {
-      const string& old_input = node->input(i);
+    std::unordered_set<string> ctrl_inputs;
+    int pos = 0;
+    while (pos < node->input_size()) {
+      const string& old_input = node->input(pos);
       if (IsControlInput(old_input)) {
+        if (!ctrl_inputs.insert(old_input).second) {
+          // We found a duplicate control input. Remove it.
+          node->mutable_input()->SwapElements(pos, node->input_size() - 1);
+          node->mutable_input()->RemoveLast();
+        } else {
+          ++pos;
+        }
         continue;
       }
       const string ctrl_input = ConstantFolding::AddControlDependency(
           old_input, graph, node_map_.get());
-      node->set_input(i, ctrl_input);
-      node_map_->UpdateInput(node->name(), old_input, ctrl_input);
-      new_nodes->push_back(node_map_->GetNode(old_input));
+      if (ctrl_inputs.insert(ctrl_input).second) {
+        node->set_input(pos, ctrl_input);
+        node_map_->UpdateInput(node->name(), old_input, ctrl_input);
+        auto old_input_node = node_map_->GetNode(old_input);
+        new_nodes->push_back(old_input_node);
+      }
+      ++pos;
     }
     node->set_op("NoOp");
     node->clear_attr();
@@ -164,40 +203,50 @@ string DependencyOptimizer::TryOptimizeDependencies(
   //           +------+ --^> c         +---+ --^> c
   if (node->op() == "NoOp" &&
       nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) {
-    auto outputs = node_map_->GetOutputs(node->name());
-    const int num_outputs = outputs.size();
+    const auto output_nodes = node_map_->GetOutputs(node->name());
+    const int num_outputs = output_nodes.size();
     const int num_inputs = node->input_size();
     if (num_inputs > 1 && num_outputs > 1) {
       return "";
     }
-
-    for (auto consumer : outputs) {
+    VLOG(1) << "***** Rerouting input around  " << node->name();
+    std::vector<NodeDef*> input_nodes;
+    for (int i = 0; i < num_inputs; ++i) {
+      NodeDef* tmp = node_map_->GetNode(node->input(i));
+      if (tmp != nullptr) {
+        input_nodes.push_back(tmp);
+      }
+    }
+    for (auto consumer : output_nodes) {
+      bool updated_consumer = false;
+      VLOG(1) << "***** Considering consumer  " << consumer->name() << "\n"
+              << consumer->DebugString();
       for (int i = 0; i < num_inputs; ++i) {
         const string& input = node->input(i);
-        // Forward dependencies from inputs to consumer if it doesn't already
+        // Forward dependency from input to consumer if it doesn't already
         // depend on it.
-        if (node_map_->GetOutputs(input).count(consumer) == 0) {
-          consumer->add_input(ConstantFolding::AddControlDependency(
-              input, graph, node_map_.get()));
+        if (node_map_->GetOutputs(NodeName(input)).count(consumer) == 0) {
+          consumer->add_input(input);
+          updated_consumer = true;
           node_map_->AddOutput(NodeName(input), consumer->name());
+          new_nodes->push_back(input_nodes[i]);
         }
-        new_nodes->push_back(node_map_->GetNode(input));
       }
       // Remove dependency on node from consumer.
-      int pos = FindInputSlot(*consumer, AsControlDependency(node->name()));
-      if (pos >= 0) {
-        consumer->mutable_input()->SwapElements(pos,
-                                                consumer->input_size() - 1);
-        consumer->mutable_input()->RemoveLast();
-        node_map_->RemoveOutput(node->name(), consumer->name());
+      updated_consumer |= RemoveInput(
+          consumer, AsControlDependency(node->name()), node_map_.get());
+      if (updated_consumer) {
+        VLOG(1) << "***** Updated consumer  " << consumer->name() << " ("
+                << consumer->op() << ")";
         new_nodes->push_back(consumer);
       }
     }
 
     // Clear all control inputs to node.
-    node_map_->RemoveInputs(node->name());
-    node->clear_input();
-    return "";
+    if (fetch_nodes_known_) {
+      node_map_->RemoveInputs(node->name());
+      node->clear_input();
+    }
   }
 
   return "";
@@ -208,9 +257,10 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) {
   // in the ArithmeticOptimizer. Dedup this.
   SetVector<NodeDef*> nodes_to_simplify;
   for (int i = 0; i < optimized_graph->node_size(); ++i) {
-    const NodeDef& node = optimized_graph->node(i);
-    if (node.op() == "NoOp" || SafeToConvertToNoOp(node)) {
-      nodes_to_simplify.PushBack(optimized_graph->mutable_node()->Mutable(i));
+    NodeDef* node = optimized_graph->mutable_node(i);
+    if (node->op() == "NoOp" || SafeToConvertToNoOp(*node)) {
+      PruneControlInputs(node);
+      nodes_to_simplify.PushBack(node);
     }
   }
   while (!nodes_to_simplify.Empty()) {
@@ -244,8 +294,6 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) {
                      ? AsControlDependency(NodeName(simplified_tensor))
                      : simplified_tensor);
           }
-          VLOG(2) << "Update input " << consumer->input(i) << " of "
-                  << consumer->name() << " to " << simplified_tensor;
         }
         node_map_->UpdateInput(consumer->name(), node->name(),
                                simplified_tensor);
@@ -256,6 +304,10 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) {
       nodes_to_simplify.PushBack(new_node);
     }
   }
+  for (int i = 0; i < optimized_graph->node_size(); ++i) {
+    NodeDef* node = optimized_graph->mutable_node(i);
+    PruneControlInputs(node);
+  }
   return Status::OK();
 }
 
@@ -264,10 +316,10 @@ Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   *optimized_graph = item.graph;
   nodes_to_preserve_ = item.NodesToPreserve();
   node_map_.reset(new NodeMap(optimized_graph));
-  has_fetch_ = !item.fetch.empty();
-  VLOG(2) << "Graph before optimization:\n" << optimized_graph->DebugString();
+  fetch_nodes_known_ = !item.fetch.empty();
+  VLOG(1) << "Graph before optimization:\n" << optimized_graph->DebugString();
   TF_RETURN_IF_ERROR(OptimizeDependencies(optimized_graph));
-  VLOG(2) << "Graph after optimization:\n" << optimized_graph->DebugString();
+  VLOG(1) << "Graph after optimization:\n" << optimized_graph->DebugString();
 
   return Status::OK();
 }
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.h b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
index 13ece87aff..cab9383b94 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
@@ -56,7 +56,7 @@ class DependencyOptimizer : public GraphOptimizer {
 
   bool HasOnlyControlOutputs(const NodeDef* node);
 
-  bool has_fetch_;
+  bool fetch_nodes_known_;
   RewriterConfig::Toggle opt_level_;
   std::unordered_set<string> nodes_to_preserve_;
   std::unique_ptr<NodeMap> node_map_;
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
index d54d7b2093..90f5ec8c3f 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
@@ -104,6 +104,7 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop) {
   }
 }
 
+// TODO(rmlarsen): Add test to make sure we skip Switch and Merge.
 TEST_F(DependencyOptimizerTest, ChangeToNoop_NoFetch) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
diff --git a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
index 442dfb7b3f..cc1a380538 100644
--- a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
+++ b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
@@ -22,6 +22,7 @@ import tempfile
 
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.debug.lib import debug_data
 from tensorflow.python.debug.lib import debug_graphs
@@ -41,6 +42,12 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase):
   _OP_TYPE_BLACKLIST = (
       "_Send", "_Recv", "_HostSend", "_HostRecv", "_Retval")
 
+  def _no_rewrite_session_config(self):
+    rewriter_config = rewriter_config_pb2.RewriterConfig(
+        dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF)
+    graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
+    return config_pb2.ConfigProto(graph_options=graph_options)
+
   def setUp(self):
     super(ReconstructNonDebugGraphTest, self).setUp()
     self._dump_dir = tempfile.mkdtemp()
@@ -136,7 +143,7 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase):
           sess, c, expected_output=400.0)
 
   def testReonstructGraphWithCond(self):
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = variables.Variable(10.0, name="x")
       y = variables.Variable(20.0, name="y")
       cond = control_flow_ops.cond(
diff --git a/tensorflow/python/debug/lib/session_debug_testlib.py b/tensorflow/python/debug/lib/session_debug_testlib.py
index ed31a8c8cd..20a40018bf 100644
--- a/tensorflow/python/debug/lib/session_debug_testlib.py
+++ b/tensorflow/python/debug/lib/session_debug_testlib.py
@@ -58,7 +58,8 @@ from tensorflow.python.training import gradient_descent
 def no_rewrite_session_config():
   rewriter_config = rewriter_config_pb2.RewriterConfig(
       disable_model_pruning=True,
-      arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF)
+      arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
+      dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF)
   graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
   return config_pb2.ConfigProto(graph_options=graph_options)
 
@@ -963,7 +964,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
   def testOutputSlotWithoutOutgoingEdgeCanBeWatched(self):
     """Test watching output slots not attached to any outgoing edges."""
 
-    with session.Session() as sess:
+    with session.Session(config=no_rewrite_session_config()) as sess:
       u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]])
       u = constant_op.constant(u_init_val, shape=[2, 2], name="u")
 
-- 
GitLab


From c33fc377309eb72e94077eb091dc51d198cb1afb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 10:07:57 -0800
Subject: [PATCH 0168/1225] Remove deleted files from CMake

PiperOrigin-RevId: 176524446
---
 tensorflow/contrib/cmake/tf_core_kernels.cmake | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index f978c8ccd5..d6b8990664 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -55,10 +55,6 @@ if(tensorflow_BUILD_CONTRIB_KERNELS)
       "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/utils/tensor_utils.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/learner/common/partitioners/example_partitioner.cc"
-      "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler.cc"
-      "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler.cc"
-      "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler.cc"
-      "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/model_ops.cc"
-- 
GitLab


From ff47768027a66d550667015e1f238541169414c5 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Tue, 21 Nov 2017 10:18:13 -0800
Subject: [PATCH 0169/1225] Limit the number of iterations to avoid creating
 infinite loops if a shape function isn't implemented correctly.

PiperOrigin-RevId: 176526135
---
 .../core/grappler/costs/graph_properties.cc   | 23 ++++++++++++++++---
 .../core/grappler/costs/graph_properties.h    |  4 ++--
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index 46c6841023..c28498ef6f 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -283,6 +283,7 @@ class TopoQueue {
   }
 
   bool empty() const { return queue_.empty(); }
+  std::size_t size() const { return queue_.size(); }
 
  private:
   // Graph nodes are created in (roughly) topological order. Therefore we can
@@ -701,9 +702,24 @@ Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner,
 Status GraphProperties::PropagateShapes(
     SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes,
     const std::unordered_map<const Node*, std::unordered_set<const Node*>>&
-        resources) {
+        resources) const {
+  // Limit the number of iterations to prevent infinite loops in the presence of
+  // incorrect shape functions. The algoritm should converge in at most
+  // num_nested_loops^2 * max_rank. We approximate max_rank with the constant 4.
+  // The same applies to resources.
+  const int num_loops = new_shapes->size();
+  const int max_loop_length = item_.graph.node_size();
+  const int max_rank = 4;
+  const int max_loop_iterations =
+      max_rank * max_loop_length * std::max(1, num_loops * num_loops);
+  const int num_queues = resources.size();
+  const int max_resource_iterations = num_queues * num_queues * max_rank;
+
+  int num_resource_iterations = 0;
   do {
-    while (!new_shapes->empty()) {
+    int num_loop_iterations = 0;
+    while (!new_shapes->empty() &&
+           num_loop_iterations++ < max_loop_iterations) {
       const Node* n = new_shapes->pop();
       for (const Node* fanout : n->out_nodes()) {
         TF_RETURN_IF_ERROR(
@@ -718,7 +734,8 @@ Status GraphProperties::PropagateShapes(
       TF_RETURN_IF_ERROR(UpdateResource(resource.first, resource.second,
                                         shape_refiner, relax, new_shapes));
     }
-  } while (!new_shapes->empty());
+  } while (!new_shapes->empty() &&
+           num_resource_iterations++ < max_resource_iterations);
 
   return Status::OK();
 }
diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index 37c8654541..ee279b7e0a 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -99,10 +99,10 @@ class GraphProperties {
                              const Node* n, TopoQueue* new_shapes);
   // Propagate the shapes for the nodes enqueued in new_shapes and their
   // transitive fanout until a fixed point is reached.
-  static Status PropagateShapes(
+  Status PropagateShapes(
       SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes,
       const std::unordered_map<const Node*, std::unordered_set<const Node*>>&
-          resources);
+          resources) const;
 };
 
 }  // end namespace grappler
-- 
GitLab


From 893fb86ab81503f2b608e700874c76f83a4b07a7 Mon Sep 17 00:00:00 2001
From: Kiril Gorovoy <kgorovoy@google.com>
Date: Tue, 21 Nov 2017 10:28:38 -0800
Subject: [PATCH 0170/1225] Fix flatbuffers workspace.bzl definition to work
 when TF is imported as a submodule.

PiperOrigin-RevId: 176527761
---
 tensorflow/workspace.bzl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 9bbc0cb1c4..dd5dc37a87 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -819,7 +819,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
 
   native.new_http_archive(
       name = "flatbuffers",
-      build_file = "third_party/flatbuffers/flatbuffers.BUILD",
+      build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")),
       strip_prefix = "flatbuffers-971a68110e4fc1bace10fcb6deeb189e7e1a34ce",
       sha256 = "874088d2ee0d9f8524191f77209556415f03dd44e156276edf19e5b90ceb5f55",
       urls = [
-- 
GitLab


From f282ad32cf4bd2cba873ab2ebbc98cc1a7329a7f Mon Sep 17 00:00:00 2001
From: Kyle Mills <kyle.mills@uoit.net>
Date: Tue, 21 Nov 2017 14:24:32 -0500
Subject: [PATCH 0171/1225] Fixed typo in usage docstring

Changed tf.SyncReplicasOptimizer to tf.train.SyncReplicasOptimizer.
---
 tensorflow/python/training/sync_replicas_optimizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/training/sync_replicas_optimizer.py b/tensorflow/python/training/sync_replicas_optimizer.py
index 2a97d45daa..b52d101a21 100644
--- a/tensorflow/python/training/sync_replicas_optimizer.py
+++ b/tensorflow/python/training/sync_replicas_optimizer.py
@@ -99,7 +99,7 @@ class SyncReplicasOptimizer(optimizer.Optimizer):
   # Note that if you want to have 2 backup replicas, you can change
   # total_num_replicas=52 and make sure this number matches how many physical
   # replicas you started in your job.
-  opt = tf.SyncReplicasOptimizer(opt, replicas_to_aggregate=50,
+  opt = tf.train.SyncReplicasOptimizer(opt, replicas_to_aggregate=50,
                                  total_num_replicas=50)
 
   # Some models have startup_delays to help stabilize the model but when using
-- 
GitLab


From 0d4b94b7eddfff07f3a722ec2747568894256428 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Tue, 21 Nov 2017 11:56:56 -0800
Subject: [PATCH 0172/1225] Fix bug in WhileLoopContext.

PiperOrigin-RevId: 176540820
---
 tensorflow/python/ops/control_flow_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 194df5957c..38c959df8d 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -2734,7 +2734,7 @@ class WhileContext(ControlFlowContext):
         if shape is not None:
           xs.append(shape)
       for x in xs:
-        inp_op = x.op.inputs[0]
+        inp_op = x.op.inputs[0].op
         control_inputs = graph._control_dependencies_for_inputs([inp_op])
         outer_control_inputs = [op for op in control_inputs
                                 if self._IsInOuterContext(op)]
-- 
GitLab


From 01fec325b3b4b26bf5338930eb37a252a16786df Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Tue, 21 Nov 2017 12:06:36 -0800
Subject: [PATCH 0173/1225] Allow creating contrib.framework.local_variable
 that use_resource. Allow creating contrib.framework.global_variable that
 use_resource.

PiperOrigin-RevId: 176542393
---
 tensorflow/contrib/framework/BUILD            |  1 +
 tensorflow/contrib/framework/__init__.py      |  1 +
 .../contrib/framework/python/ops/variables.py | 35 ++++++++-
 .../framework/python/ops/variables_test.py    | 77 +++++++++++++++++++
 tensorflow/python/ops/variable_scope.py       |  6 +-
 5 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD
index e8dad886a1..5b659ddaa1 100644
--- a/tensorflow/contrib/framework/BUILD
+++ b/tensorflow/contrib/framework/BUILD
@@ -276,6 +276,7 @@ py_test(
         "//tensorflow/python:nn_ops",
         "//tensorflow/python:partitioned_variables",
         "//tensorflow/python:platform",
+        "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:session",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py
index 3f59261183..4edc77f86b 100644
--- a/tensorflow/contrib/framework/__init__.py
+++ b/tensorflow/contrib/framework/__init__.py
@@ -65,6 +65,7 @@ See the @{$python/contrib.framework} guide.
 @@get_variable_full_name
 @@get_variables_to_restore
 @@get_variables
+@@global_variable
 @@local_variable
 @@model_variable
 @@variable
diff --git a/tensorflow/contrib/framework/python/ops/variables.py b/tensorflow/contrib/framework/python/ops/variables.py
index b766837968..07b7857e7b 100644
--- a/tensorflow/contrib/framework/python/ops/variables.py
+++ b/tensorflow/contrib/framework/python/ops/variables.py
@@ -60,6 +60,7 @@ __all__ = ['add_model_variable',
            'get_variable_full_name',
            'get_variables_to_restore',
            'get_variables',
+           'global_variable',
            'local_variable',
            'model_variable',
            'variable',
@@ -147,20 +148,48 @@ def get_or_create_global_step(graph=None):
   return training_util.get_or_create_global_step(graph)
 
 
-def local_variable(initial_value, validate_shape=True, name=None):
-  """Create variable and add it to `GraphKeys.LOCAL_VARIABLES` collection.
+def local_variable(initial_value,
+                   validate_shape=True,
+                   name=None,
+                   use_resource=None):
+  """Create a variable with a value and add it to `GraphKeys.LOCAL_VARIABLES`.
 
   Args:
     initial_value: See variables.Variable.__init__.
     validate_shape: See variables.Variable.__init__.
     name: See variables.Variable.__init__.
+    use_resource: If `True` use a ResourceVariable instead of a Variable.
   Returns:
     New variable.
   """
   return variable_scope.variable(
       initial_value, trainable=False,
       collections=[ops.GraphKeys.LOCAL_VARIABLES],
-      validate_shape=validate_shape, name=name)
+      validate_shape=validate_shape,
+      use_resource=use_resource,
+      name=name)
+
+
+def global_variable(initial_value,
+                    validate_shape=True,
+                    name=None,
+                    use_resource=None):
+  """Create a variable with a value and add it to `GraphKeys.GLOBAL_VARIABLES`.
+
+  Args:
+    initial_value: See variables.Variable.__init__.
+    validate_shape: See variables.Variable.__init__.
+    name: See variables.Variable.__init__.
+    use_resource: If `True` use a ResourceVariable instead of a Variable.
+  Returns:
+    New variable.
+  """
+  return variable_scope.variable(
+      initial_value, trainable=False,
+      collections=[ops.GraphKeys.GLOBAL_VARIABLES],
+      validate_shape=validate_shape,
+      use_resource=use_resource,
+      name=name)
 
 
 @contrib_add_arg_scope
diff --git a/tensorflow/contrib/framework/python/ops/variables_test.py b/tensorflow/contrib/framework/python/ops/variables_test.py
index 6a74e4e866..2f06df93ac 100644
--- a/tensorflow/contrib/framework/python/ops/variables_test.py
+++ b/tensorflow/contrib/framework/python/ops/variables_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import gfile
@@ -102,6 +103,82 @@ class LocalVariableTest(test.TestCase):
       sess.run(variables_lib.local_variables_initializer())
       self.assertAllEqual(a.eval(), [0] * 5)
 
+  def testResourceVariable(self):
+    a = variables_lib2.local_variable(0)
+    b = variables_lib2.local_variable(0, use_resource=True)
+    self.assertEqual(type(a), variables_lib.Variable)
+    self.assertEqual(type(b), resource_variable_ops.ResourceVariable)
+
+
+class GlobalVariableTest(test.TestCase):
+
+  def test_global_variable(self):
+    with self.test_session() as sess:
+      self.assertEquals([], variables_lib.global_variables())
+      value0 = 42
+      variables_lib2.global_variable(value0)
+      value1 = 43
+      variables_lib2.global_variable(value1)
+      variables = variables_lib.global_variables()
+      self.assertEquals(2, len(variables))
+      with self.assertRaisesOpError(
+          'Attempting to use uninitialized value Variable'):
+        sess.run(variables)
+      variables_lib.variables_initializer(variables).run()
+      self.assertAllEqual(set([value0, value1]), set(sess.run(variables)))
+
+  def testVariableNameAndShape(self):
+    with self.test_session():
+      with variable_scope.variable_scope('A'):
+        a = variables_lib2.global_variable([1, 1, 1, 1, 1], name='a')
+        self.assertEquals(a.op.name, 'A/a')
+        self.assertListEqual(a.get_shape().as_list(), [5])
+        self.assertListEqual([a], variables_lib.global_variables())
+
+  def testGlobalVariableNotInLocalVariables(self):
+    with self.test_session():
+      with variable_scope.variable_scope('A'):
+        a = variables_lib2.global_variable(0)
+        self.assertFalse(a in variables_lib.local_variables())
+        self.assertTrue(a in variables_lib.global_variables())
+
+  def testGlobalVariableInVariablesToRestore(self):
+    with self.test_session():
+      with variable_scope.variable_scope('A'):
+        a = variables_lib2.global_variable(0)
+        self.assertFalse(a in variables_lib.local_variables())
+        self.assertTrue(a in variables_lib2.get_variables_to_restore())
+
+  def testGetVariablesReturnsThem(self):
+    with self.test_session():
+      with variable_scope.variable_scope('A'):
+        a = variables_lib2.global_variable(0)
+      with variable_scope.variable_scope('B'):
+        b = variables_lib2.global_variable(0)
+      self.assertEquals([a], variables_lib2.get_variables('A'))
+      self.assertEquals([b], variables_lib2.get_variables('B'))
+
+  def testGetLocalVariablesDontReturnsThem(self):
+    with self.test_session():
+      with variable_scope.variable_scope('A'):
+        variables_lib2.global_variable(0)
+      with variable_scope.variable_scope('B'):
+        variables_lib2.global_variable(0)
+      self.assertEquals([], variables_lib2.get_local_variables('A'))
+      self.assertEquals([], variables_lib2.get_local_variables('B'))
+
+  def testInitializedVariableValue(self):
+    with self.test_session() as sess:
+      a = variables_lib2.global_variable([0, 0, 0, 0, 0], name='a')
+      sess.run(variables_lib.global_variables_initializer())
+      self.assertAllEqual(a.eval(), [0] * 5)
+
+  def testResourceVariable(self):
+    a = variables_lib2.global_variable(0)
+    b = variables_lib2.global_variable(0, use_resource=True)
+    self.assertEqual(type(a), variables_lib.Variable)
+    self.assertEqual(type(b), resource_variable_ops.ResourceVariable)
+
 
 class GlobalStepTest(test.TestCase):
 
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 91dea12da2..dd435249f4 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -1985,8 +1985,10 @@ def variable(initial_value=None,
              validate_shape=True,
              caching_device=None,
              name=None,
-             dtype=None):
-  use_resource = get_variable_scope().use_resource
+             dtype=None,
+             use_resource=None):
+  if use_resource is None:
+    use_resource = get_variable_scope().use_resource
   if use_resource or (use_resource is None and context.in_eager_mode()):
     return resource_variable_ops.ResourceVariable(
         initial_value=initial_value, trainable=trainable,
-- 
GitLab


From 89449ee332974650f39b5cee468ee20a53f8ef33 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 12:11:14 -0800
Subject: [PATCH 0174/1225] Fix error messages in
 ops.register_dense_tensor_like_type().

PiperOrigin-RevId: 176543190
---
 tensorflow/python/framework/ops.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 132571dd05..2785aed13e 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -157,14 +157,18 @@ def register_dense_tensor_like_type(tensor_type):
   """
   try:
     if not isinstance(tensor_type.name, property):
-      raise TypeError("Type %s does not define a `name` property")
+      raise TypeError("Type %s does not define a `name` property" %
+                      tensor_type.__name__)
   except AttributeError:
-    raise TypeError("Type %s does not define a `name` property")
+    raise TypeError("Type %s does not define a `name` property" %
+                    tensor_type.__name__)
   try:
     if not isinstance(tensor_type.dtype, property):
-      raise TypeError("Type %s does not define a `dtype` property")
+      raise TypeError("Type %s does not define a `dtype` property" %
+                      tensor_type.__name__)
   except AttributeError:
-    raise TypeError("Type %s does not define a `dtype` property")
+    raise TypeError("Type %s does not define a `dtype` property" %
+                    tensor_type.__name__)
   # We expect this list to be small, so choose quadratic complexity
   # for registration, so that we have a tuple that can be used for
   # more efficient `isinstance` checks later.
-- 
GitLab


From 9ad26eb766ac6e742503c1533efa324815ee4653 Mon Sep 17 00:00:00 2001
From: Christopher Shallue <shallue@google.com>
Date: Tue, 21 Nov 2017 12:16:33 -0800
Subject: [PATCH 0175/1225] Add **kwargs to HParams.to_json() to be passed to
 json.dumps()

PiperOrigin-RevId: 176543810
---
 .../contrib/training/python/training/hparam.py  | 17 +++++++++++++++--
 .../training/python/training/hparam_test.py     | 10 ++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py
index 8d5f47ca4d..80de0f6eb7 100644
--- a/tensorflow/contrib/training/python/training/hparam.py
+++ b/tensorflow/contrib/training/python/training/hparam.py
@@ -550,13 +550,26 @@ class HParams(object):
   def get_model_structure(self):
     return self._model_structure
 
-  def to_json(self):
+  def to_json(self, indent=None, separators=None, sort_keys=False):
     """Serializes the hyperparameters into JSON.
 
+    Args:
+      indent: If a non-negative integer, JSON array elements and object members
+        will be pretty-printed with that indent level. An indent level of 0, or
+        negative, will only insert newlines. `None` (the default) selects the
+        most compact representation.
+      separators: Optional `(item_separator, key_separator)` tuple. Default is
+        `(', ', ': ')`.
+      sort_keys: If `True`, the output dictionaries will be sorted by key.
+
     Returns:
       A JSON string.
     """
-    return json.dumps(self.values())
+    return json.dumps(
+        self.values(),
+        indent=indent,
+        separators=separators,
+        sort_keys=sort_keys)
 
   def parse_json(self, values_json):
     """Override hyperparameter values, parsing new values from a json object.
diff --git a/tensorflow/contrib/training/python/training/hparam_test.py b/tensorflow/contrib/training/python/training/hparam_test.py
index 643905d3a6..28e4b4d01e 100644
--- a/tensorflow/contrib/training/python/training/hparam_test.py
+++ b/tensorflow/contrib/training/python/training/hparam_test.py
@@ -292,6 +292,16 @@ class HParamsTest(test.TestCase):
     self.assertEqual('relu4', hparams2.c_c)
     self.assertEqual(False, hparams2.d)
 
+    hparams3 = hparam.HParams(aaa=123)
+    self.assertEqual('{"aaa": 123}', hparams3.to_json())
+    self.assertEqual('{\n  "aaa": 123\n}', hparams3.to_json(indent=2))
+    self.assertEqual('{"aaa"=123}', hparams3.to_json(separators=(';', '=')))
+
+    hparams4 = hparam.HParams(aaa=123, b='hello', c_c=False)
+    self.assertEqual(
+        '{"aaa": 123, "b": "hello", "c_c": false}',
+        hparams4.to_json(sort_keys=True))
+
   def testSetHParam(self):
     hparams = hparam.HParams(aaa=1, b=2.0, c_c='relu6', d=True)
     self.assertDictEqual({
-- 
GitLab


From b5dcb0161942c467be6cba19aa0ee05aef742d2e Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Tue, 21 Nov 2017 12:18:17 -0800
Subject: [PATCH 0176/1225] Make all assert_* ops work in eager mode

PiperOrigin-RevId: 176544038
---
 .../python/kernel_tests/check_ops_test.py     | 701 ++++++++++--------
 tensorflow/python/ops/check_ops.py            | 102 ++-
 2 files changed, 456 insertions(+), 347 deletions(-)

diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 43785adcee..7ce0f1e7b8 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -34,38 +34,45 @@ from tensorflow.python.platform import test
 
 class AssertProperIterableTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_single_tensor_raises(self):
     tensor = constant_op.constant(1)
     with self.assertRaisesRegexp(TypeError, "proper"):
       check_ops.assert_proper_iterable(tensor)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_single_sparse_tensor_raises(self):
     ten = sparse_tensor.SparseTensor(
         indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
     with self.assertRaisesRegexp(TypeError, "proper"):
       check_ops.assert_proper_iterable(ten)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_single_ndarray_raises(self):
     array = np.array([1, 2, 3])
     with self.assertRaisesRegexp(TypeError, "proper"):
       check_ops.assert_proper_iterable(array)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_single_string_raises(self):
     mystr = "hello"
     with self.assertRaisesRegexp(TypeError, "proper"):
       check_ops.assert_proper_iterable(mystr)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_non_iterable_object_raises(self):
     non_iterable = 1234
     with self.assertRaisesRegexp(TypeError, "to be iterable"):
       check_ops.assert_proper_iterable(non_iterable)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_list_does_not_raise(self):
     list_of_stuff = [
         constant_op.constant([11, 22]), constant_op.constant([1, 2])
     ]
     check_ops.assert_proper_iterable(list_of_stuff)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_generator_does_not_raise(self):
     generator_of_stuff = (constant_op.constant([11, 22]), constant_op.constant(
         [1, 2]))
@@ -333,265 +340,283 @@ class AssertLessTest(test.TestCase):
 
 class AssertLessEqualTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_equal(self):
-    with self.test_session():
-      small = constant_op.constant([1, 2], name="small")
-      with ops.control_dependencies(
-          [check_ops.assert_less_equal(small, small)]):
-        out = array_ops.identity(small)
-      out.eval()
+    small = constant_op.constant([1, 2], name="small")
+    with ops.control_dependencies(
+        [check_ops.assert_less_equal(small, small)]):
+      out = array_ops.identity(small)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_greater(self):
-    with self.test_session():
-      small = constant_op.constant([1, 2], name="small")
-      big = constant_op.constant([3, 4], name="big")
+    small = constant_op.constant([1, 2], name="small")
+    big = constant_op.constant([3, 4], name="big")
+    with self.assertRaisesOpError("fail"):
       with ops.control_dependencies(
           [check_ops.assert_less_equal(
               big, small, message="fail")]):
         out = array_ops.identity(small)
-      with self.assertRaisesOpError("fail.*big.*small"):
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_less_equal(self):
-    with self.test_session():
-      small = constant_op.constant([1, 2], name="small")
-      big = constant_op.constant([3, 2], name="big")
-      with ops.control_dependencies([check_ops.assert_less_equal(small, big)]):
-        out = array_ops.identity(small)
-      out.eval()
+    small = constant_op.constant([1, 2], name="small")
+    big = constant_op.constant([3, 2], name="big")
+    with ops.control_dependencies([check_ops.assert_less_equal(small, big)]):
+      out = array_ops.identity(small)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_less_equal_and_broadcastable_shapes(self):
-    with self.test_session():
-      small = constant_op.constant([1], name="small")
-      big = constant_op.constant([3, 1], name="big")
-      with ops.control_dependencies([check_ops.assert_less_equal(small, big)]):
-        out = array_ops.identity(small)
-      out.eval()
+    small = constant_op.constant([1], name="small")
+    big = constant_op.constant([3, 1], name="big")
+    with ops.control_dependencies([check_ops.assert_less_equal(small, big)]):
+      out = array_ops.identity(small)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_less_equal_but_non_broadcastable_shapes(self):
-    with self.test_session():
-      small = constant_op.constant([1, 1, 1], name="small")
-      big = constant_op.constant([3, 1], name="big")
-      with self.assertRaisesRegexp(ValueError, "must be"):
-        with ops.control_dependencies(
-            [check_ops.assert_less_equal(small, big)]):
-          out = array_ops.identity(small)
-        out.eval()
+    small = constant_op.constant([3, 1], name="small")
+    big = constant_op.constant([1, 1, 1], name="big")
+    # The exception in eager and non-eager mode is different because
+    # eager mode relies on shape check done as part of the C++ op, while
+    # graph mode does shape checks when creating the `Operation` instance.
+    with self.assertRaisesRegexp(
+        (errors.InvalidArgumentError, ValueError),
+        (r"Incompatible shapes: \[2\] vs. \[3\]|"
+         r"Dimensions must be equal, but are 2 and 3")):
+      with ops.control_dependencies(
+          [check_ops.assert_less_equal(small, big)]):
+        out = array_ops.identity(small)
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_both_empty(self):
-    with self.test_session():
-      larry = constant_op.constant([])
-      curly = constant_op.constant([])
-      with ops.control_dependencies(
-          [check_ops.assert_less_equal(larry, curly)]):
-        out = array_ops.identity(larry)
-      out.eval()
+    larry = constant_op.constant([])
+    curly = constant_op.constant([])
+    with ops.control_dependencies(
+        [check_ops.assert_less_equal(larry, curly)]):
+      out = array_ops.identity(larry)
+    self.evaluate(out)
 
 
 class AssertGreaterTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_equal(self):
-    with self.test_session():
-      small = constant_op.constant([1, 2], name="small")
+    small = constant_op.constant([1, 2], name="small")
+    with self.assertRaisesOpError("fail"):
       with ops.control_dependencies(
           [check_ops.assert_greater(
               small, small, message="fail")]):
         out = array_ops.identity(small)
-      with self.assertRaisesOpError("fail.*small.*small"):
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_less(self):
-    with self.test_session():
-      small = constant_op.constant([1, 2], name="small")
-      big = constant_op.constant([3, 4], name="big")
+    small = constant_op.constant([1, 2], name="small")
+    big = constant_op.constant([3, 4], name="big")
+    with self.assertRaisesOpError("x > y did not hold"):
       with ops.control_dependencies([check_ops.assert_greater(small, big)]):
         out = array_ops.identity(big)
-      with self.assertRaisesOpError("small.*big"):
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_greater(self):
-    with self.test_session():
-      small = constant_op.constant([3, 1], name="small")
-      big = constant_op.constant([4, 2], name="big")
-      with ops.control_dependencies([check_ops.assert_greater(big, small)]):
-        out = array_ops.identity(small)
-      out.eval()
+    small = constant_op.constant([3, 1], name="small")
+    big = constant_op.constant([4, 2], name="big")
+    with ops.control_dependencies([check_ops.assert_greater(big, small)]):
+      out = array_ops.identity(small)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_greater_and_broadcastable_shapes(self):
-    with self.test_session():
-      small = constant_op.constant([1], name="small")
-      big = constant_op.constant([3, 2], name="big")
-      with ops.control_dependencies([check_ops.assert_greater(big, small)]):
-        out = array_ops.identity(small)
-      out.eval()
+    small = constant_op.constant([1], name="small")
+    big = constant_op.constant([3, 2], name="big")
+    with ops.control_dependencies([check_ops.assert_greater(big, small)]):
+      out = array_ops.identity(small)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_greater_but_non_broadcastable_shapes(self):
-    with self.test_session():
-      small = constant_op.constant([1, 1, 1], name="small")
-      big = constant_op.constant([3, 2], name="big")
-      with self.assertRaisesRegexp(ValueError, "must be"):
-        with ops.control_dependencies([check_ops.assert_greater(big, small)]):
-          out = array_ops.identity(small)
-        out.eval()
+    small = constant_op.constant([1, 1, 1], name="small")
+    big = constant_op.constant([3, 2], name="big")
+    # The exception in eager and non-eager mode is different because
+    # eager mode relies on shape check done as part of the C++ op, while
+    # graph mode does shape checks when creating the `Operation` instance.
+    with self.assertRaisesRegexp(
+        (errors.InvalidArgumentError, ValueError),
+        (r"Incompatible shapes: \[2\] vs. \[3\]|"
+         r"Dimensions must be equal, but are 2 and 3")):
+      with ops.control_dependencies([check_ops.assert_greater(big, small)]):
+        out = array_ops.identity(small)
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_both_empty(self):
-    with self.test_session():
-      larry = constant_op.constant([])
-      curly = constant_op.constant([])
-      with ops.control_dependencies([check_ops.assert_greater(larry, curly)]):
-        out = array_ops.identity(larry)
-      out.eval()
+    larry = constant_op.constant([])
+    curly = constant_op.constant([])
+    with ops.control_dependencies([check_ops.assert_greater(larry, curly)]):
+      out = array_ops.identity(larry)
+    self.evaluate(out)
 
 
 class AssertGreaterEqualTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_equal(self):
-    with self.test_session():
-      small = constant_op.constant([1, 2], name="small")
-      with ops.control_dependencies(
-          [check_ops.assert_greater_equal(small, small)]):
-        out = array_ops.identity(small)
-      out.eval()
+    small = constant_op.constant([1, 2], name="small")
+    with ops.control_dependencies(
+        [check_ops.assert_greater_equal(small, small)]):
+      out = array_ops.identity(small)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_less(self):
-    with self.test_session():
-      small = constant_op.constant([1, 2], name="small")
-      big = constant_op.constant([3, 4], name="big")
+    small = constant_op.constant([1, 2], name="small")
+    big = constant_op.constant([3, 4], name="big")
+    with self.assertRaisesOpError("fail"):
       with ops.control_dependencies(
           [check_ops.assert_greater_equal(
               small, big, message="fail")]):
         out = array_ops.identity(small)
-      with self.assertRaisesOpError("fail.*small.*big"):
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_greater_equal(self):
-    with self.test_session():
-      small = constant_op.constant([1, 2], name="small")
-      big = constant_op.constant([3, 2], name="big")
-      with ops.control_dependencies(
-          [check_ops.assert_greater_equal(big, small)]):
-        out = array_ops.identity(small)
-      out.eval()
+    small = constant_op.constant([1, 2], name="small")
+    big = constant_op.constant([3, 2], name="big")
+    with ops.control_dependencies(
+        [check_ops.assert_greater_equal(big, small)]):
+      out = array_ops.identity(small)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_greater_equal_and_broadcastable_shapes(self):
-    with self.test_session():
-      small = constant_op.constant([1], name="small")
-      big = constant_op.constant([3, 1], name="big")
+    small = constant_op.constant([1], name="small")
+    big = constant_op.constant([3, 1], name="big")
+    with ops.control_dependencies(
+        [check_ops.assert_greater_equal(big, small)]):
+      out = array_ops.identity(small)
+    self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_raises_when_less_equal_but_non_broadcastable_shapes(self):
+    small = constant_op.constant([1, 1, 1], name="big")
+    big = constant_op.constant([3, 1], name="small")
+    # The exception in eager and non-eager mode is different because
+    # eager mode relies on shape check done as part of the C++ op, while
+    # graph mode does shape checks when creating the `Operation` instance.
+    with self.assertRaisesRegexp(
+        (errors.InvalidArgumentError, ValueError),
+        (r"Incompatible shapes: \[2\] vs. \[3\]|"
+         r"Dimensions must be equal, but are 2 and 3")):
       with ops.control_dependencies(
           [check_ops.assert_greater_equal(big, small)]):
         out = array_ops.identity(small)
-      out.eval()
-
-  def test_raises_when_less_equal_but_non_broadcastable_shapes(self):
-    with self.test_session():
-      small = constant_op.constant([1, 1, 1], name="big")
-      big = constant_op.constant([3, 1], name="small")
-      with self.assertRaisesRegexp(ValueError, "Dimensions must be equal"):
-        with ops.control_dependencies(
-            [check_ops.assert_greater_equal(big, small)]):
-          out = array_ops.identity(small)
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_both_empty(self):
-    with self.test_session():
-      larry = constant_op.constant([])
-      curly = constant_op.constant([])
-      with ops.control_dependencies(
-          [check_ops.assert_greater_equal(larry, curly)]):
-        out = array_ops.identity(larry)
-      out.eval()
+    larry = constant_op.constant([])
+    curly = constant_op.constant([])
+    with ops.control_dependencies(
+        [check_ops.assert_greater_equal(larry, curly)]):
+      out = array_ops.identity(larry)
+    self.evaluate(out)
 
 
 class AssertNegativeTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_negative(self):
-    with self.test_session():
-      frank = constant_op.constant([-1, -2], name="frank")
-      with ops.control_dependencies([check_ops.assert_negative(frank)]):
-        out = array_ops.identity(frank)
-      out.eval()
+    frank = constant_op.constant([-1, -2], name="frank")
+    with ops.control_dependencies([check_ops.assert_negative(frank)]):
+      out = array_ops.identity(frank)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_positive(self):
-    with self.test_session():
-      doug = constant_op.constant([1, 2], name="doug")
+    doug = constant_op.constant([1, 2], name="doug")
+    with self.assertRaisesOpError("fail"):
       with ops.control_dependencies(
           [check_ops.assert_negative(
               doug, message="fail")]):
         out = array_ops.identity(doug)
-      with self.assertRaisesOpError("fail.*doug"):
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_zero(self):
-    with self.test_session():
-      claire = constant_op.constant([0], name="claire")
+    claire = constant_op.constant([0], name="claire")
+    with self.assertRaisesOpError("x < 0 did not hold"):
       with ops.control_dependencies([check_ops.assert_negative(claire)]):
         out = array_ops.identity(claire)
-      with self.assertRaisesOpError("claire"):
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_empty_tensor_doesnt_raise(self):
     # A tensor is negative when it satisfies:
     #   For every element x_i in x, x_i < 0
     # and an empty tensor has no elements, so this is trivially satisfied.
     # This is standard set theory.
-    with self.test_session():
-      empty = constant_op.constant([], name="empty")
-      with ops.control_dependencies([check_ops.assert_negative(empty)]):
-        out = array_ops.identity(empty)
-      out.eval()
+    empty = constant_op.constant([], name="empty")
+    with ops.control_dependencies([check_ops.assert_negative(empty)]):
+      out = array_ops.identity(empty)
+    self.evaluate(out)
 
 
 class AssertPositiveTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_negative(self):
-    with self.test_session():
-      freddie = constant_op.constant([-1, -2], name="freddie")
+    freddie = constant_op.constant([-1, -2], name="freddie")
+    with self.assertRaisesOpError("fail"):
       with ops.control_dependencies(
           [check_ops.assert_positive(
               freddie, message="fail")]):
         out = array_ops.identity(freddie)
-      with self.assertRaisesOpError("fail.*freddie"):
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_positive(self):
-    with self.test_session():
-      remmy = constant_op.constant([1, 2], name="remmy")
-      with ops.control_dependencies([check_ops.assert_positive(remmy)]):
-        out = array_ops.identity(remmy)
-      out.eval()
+    remmy = constant_op.constant([1, 2], name="remmy")
+    with ops.control_dependencies([check_ops.assert_positive(remmy)]):
+      out = array_ops.identity(remmy)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_zero(self):
-    with self.test_session():
-      meechum = constant_op.constant([0], name="meechum")
+    meechum = constant_op.constant([0], name="meechum")
+    with self.assertRaisesOpError("x > 0 did not hold"):
       with ops.control_dependencies([check_ops.assert_positive(meechum)]):
         out = array_ops.identity(meechum)
-      with self.assertRaisesOpError("meechum"):
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_empty_tensor_doesnt_raise(self):
     # A tensor is positive when it satisfies:
     #   For every element x_i in x, x_i > 0
     # and an empty tensor has no elements, so this is trivially satisfied.
     # This is standard set theory.
-    with self.test_session():
-      empty = constant_op.constant([], name="empty")
-      with ops.control_dependencies([check_ops.assert_positive(empty)]):
-        out = array_ops.identity(empty)
-      out.eval()
+    empty = constant_op.constant([], name="empty")
+    with ops.control_dependencies([check_ops.assert_positive(empty)]):
+      out = array_ops.identity(empty)
+    self.evaluate(out)
 
 
 class AssertRankTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_zero_tensor_raises_if_rank_too_small_static_rank(self):
-    with self.test_session():
-      tensor = constant_op.constant(1, name="my_tensor")
-      desired_rank = 1
-      with self.assertRaisesRegexp(ValueError,
-                                   "fail.*my_tensor.*must have rank 1"):
-        with ops.control_dependencies(
-            [check_ops.assert_rank(
-                tensor, desired_rank, message="fail")]):
-          array_ops.identity(tensor).eval()
+    tensor = constant_op.constant(1, name="my_tensor")
+    desired_rank = 1
+    with self.assertRaisesRegexp(ValueError,
+                                 "fail.*must have rank 1"):
+      with ops.control_dependencies(
+          [check_ops.assert_rank(
+              tensor, desired_rank, message="fail")]):
+        self.evaluate(array_ops.identity(tensor))
 
   def test_rank_zero_tensor_raises_if_rank_too_small_dynamic_rank(self):
     with self.test_session():
@@ -603,13 +628,13 @@ class AssertRankTest(test.TestCase):
         with self.assertRaisesOpError("fail.*my_tensor.*rank"):
           array_ops.identity(tensor).eval(feed_dict={tensor: 0})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_zero_tensor_doesnt_raise_if_rank_just_right_static_rank(self):
-    with self.test_session():
-      tensor = constant_op.constant(1, name="my_tensor")
-      desired_rank = 0
-      with ops.control_dependencies(
-          [check_ops.assert_rank(tensor, desired_rank)]):
-        array_ops.identity(tensor).eval()
+    tensor = constant_op.constant(1, name="my_tensor")
+    desired_rank = 0
+    with ops.control_dependencies(
+        [check_ops.assert_rank(tensor, desired_rank)]):
+      self.evaluate(array_ops.identity(tensor))
 
   def test_rank_zero_tensor_doesnt_raise_if_rank_just_right_dynamic_rank(self):
     with self.test_session():
@@ -619,14 +644,14 @@ class AssertRankTest(test.TestCase):
           [check_ops.assert_rank(tensor, desired_rank)]):
         array_ops.identity(tensor).eval(feed_dict={tensor: 0})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_one_tensor_raises_if_rank_too_large_static_rank(self):
-    with self.test_session():
-      tensor = constant_op.constant([1, 2], name="my_tensor")
-      desired_rank = 0
-      with self.assertRaisesRegexp(ValueError, "my_tensor.*rank"):
-        with ops.control_dependencies(
-            [check_ops.assert_rank(tensor, desired_rank)]):
-          array_ops.identity(tensor).eval()
+    tensor = constant_op.constant([1, 2], name="my_tensor")
+    desired_rank = 0
+    with self.assertRaisesRegexp(ValueError, "rank"):
+      with ops.control_dependencies(
+          [check_ops.assert_rank(tensor, desired_rank)]):
+        self.evaluate(array_ops.identity(tensor))
 
   def test_rank_one_tensor_raises_if_rank_too_large_dynamic_rank(self):
     with self.test_session():
@@ -637,13 +662,13 @@ class AssertRankTest(test.TestCase):
         with self.assertRaisesOpError("my_tensor.*rank"):
           array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_one_tensor_doesnt_raise_if_rank_just_right_static_rank(self):
-    with self.test_session():
-      tensor = constant_op.constant([1, 2], name="my_tensor")
-      desired_rank = 1
-      with ops.control_dependencies(
-          [check_ops.assert_rank(tensor, desired_rank)]):
-        array_ops.identity(tensor).eval()
+    tensor = constant_op.constant([1, 2], name="my_tensor")
+    desired_rank = 1
+    with ops.control_dependencies(
+        [check_ops.assert_rank(tensor, desired_rank)]):
+      self.evaluate(array_ops.identity(tensor))
 
   def test_rank_one_tensor_doesnt_raise_if_rank_just_right_dynamic_rank(self):
     with self.test_session():
@@ -653,14 +678,14 @@ class AssertRankTest(test.TestCase):
           [check_ops.assert_rank(tensor, desired_rank)]):
         array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_one_tensor_raises_if_rank_too_small_static_rank(self):
-    with self.test_session():
-      tensor = constant_op.constant([1, 2], name="my_tensor")
-      desired_rank = 2
-      with self.assertRaisesRegexp(ValueError, "my_tensor.*rank"):
-        with ops.control_dependencies(
-            [check_ops.assert_rank(tensor, desired_rank)]):
-          array_ops.identity(tensor).eval()
+    tensor = constant_op.constant([1, 2], name="my_tensor")
+    desired_rank = 2
+    with self.assertRaisesRegexp(ValueError, "rank"):
+      with ops.control_dependencies(
+          [check_ops.assert_rank(tensor, desired_rank)]):
+        self.evaluate(array_ops.identity(tensor))
 
   def test_rank_one_tensor_raises_if_rank_too_small_dynamic_rank(self):
     with self.test_session():
@@ -671,11 +696,11 @@ class AssertRankTest(test.TestCase):
         with self.assertRaisesOpError("my_tensor.*rank"):
           array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_if_rank_is_not_scalar_static(self):
-    with self.test_session():
-      tensor = constant_op.constant([1, 2], name="my_tensor")
-      with self.assertRaisesRegexp(ValueError, "Rank must be a scalar"):
-        check_ops.assert_rank(tensor, np.array([], dtype=np.int32))
+    tensor = constant_op.constant([1, 2], name="my_tensor")
+    with self.assertRaisesRegexp(ValueError, "Rank must be a scalar"):
+      check_ops.assert_rank(tensor, np.array([], dtype=np.int32))
 
   def test_raises_if_rank_is_not_scalar_dynamic(self):
     with self.test_session():
@@ -687,12 +712,12 @@ class AssertRankTest(test.TestCase):
             [check_ops.assert_rank(tensor, rank_tensor)]):
           array_ops.identity(tensor).eval(feed_dict={rank_tensor: [1, 2]})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_if_rank_is_not_integer_static(self):
-    with self.test_session():
-      tensor = constant_op.constant([1, 2], name="my_tensor")
-      with self.assertRaisesRegexp(TypeError,
-                                   "must be of type <dtype: 'int32'>"):
-        check_ops.assert_rank(tensor, .5)
+    tensor = constant_op.constant([1, 2], name="my_tensor")
+    with self.assertRaisesRegexp(TypeError,
+                                 "must be of type <dtype: 'int32'>"):
+      check_ops.assert_rank(tensor, .5)
 
   def test_raises_if_rank_is_not_integer_dynamic(self):
     with self.test_session():
@@ -708,14 +733,14 @@ class AssertRankTest(test.TestCase):
 
 class AssertRankInTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_zero_tensor_raises_if_rank_mismatch_static_rank(self):
-    with self.test_session():
-      tensor_rank0 = constant_op.constant(42, name="my_tensor")
-      with self.assertRaisesRegexp(
-          ValueError, "fail.*my_tensor.*must have rank.*in.*1.*2"):
-        with ops.control_dependencies([
-            check_ops.assert_rank_in(tensor_rank0, (1, 2), message="fail")]):
-          array_ops.identity(tensor_rank0).eval()
+    tensor_rank0 = constant_op.constant(42, name="my_tensor")
+    with self.assertRaisesRegexp(
+        ValueError, "fail.*must have rank.*in.*1.*2"):
+      with ops.control_dependencies([
+          check_ops.assert_rank_in(tensor_rank0, (1, 2), message="fail")]):
+        self.evaluate(array_ops.identity(tensor_rank0))
 
   def test_rank_zero_tensor_raises_if_rank_mismatch_dynamic_rank(self):
     with self.test_session():
@@ -725,13 +750,13 @@ class AssertRankInTest(test.TestCase):
         with self.assertRaisesOpError("fail.*my_tensor.*rank"):
           array_ops.identity(tensor_rank0).eval(feed_dict={tensor_rank0: 42.0})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_zero_tensor_doesnt_raise_if_rank_matches_static_rank(self):
-    with self.test_session():
-      tensor_rank0 = constant_op.constant(42, name="my_tensor")
-      for desired_ranks in ((0, 1, 2), (1, 0, 2), (1, 2, 0)):
-        with ops.control_dependencies([
-            check_ops.assert_rank_in(tensor_rank0, desired_ranks)]):
-          array_ops.identity(tensor_rank0).eval()
+    tensor_rank0 = constant_op.constant(42, name="my_tensor")
+    for desired_ranks in ((0, 1, 2), (1, 0, 2), (1, 2, 0)):
+      with ops.control_dependencies([
+          check_ops.assert_rank_in(tensor_rank0, desired_ranks)]):
+        self.evaluate(array_ops.identity(tensor_rank0))
 
   def test_rank_zero_tensor_doesnt_raise_if_rank_matches_dynamic_rank(self):
     with self.test_session():
@@ -741,13 +766,13 @@ class AssertRankInTest(test.TestCase):
             check_ops.assert_rank_in(tensor_rank0, desired_ranks)]):
           array_ops.identity(tensor_rank0).eval(feed_dict={tensor_rank0: 42.0})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_one_tensor_doesnt_raise_if_rank_matches_static_rank(self):
-    with self.test_session():
-      tensor_rank1 = constant_op.constant([42, 43], name="my_tensor")
-      for desired_ranks in ((0, 1, 2), (1, 0, 2), (1, 2, 0)):
-        with ops.control_dependencies([
-            check_ops.assert_rank_in(tensor_rank1, desired_ranks)]):
-          array_ops.identity(tensor_rank1).eval()
+    tensor_rank1 = constant_op.constant([42, 43], name="my_tensor")
+    for desired_ranks in ((0, 1, 2), (1, 0, 2), (1, 2, 0)):
+      with ops.control_dependencies([
+          check_ops.assert_rank_in(tensor_rank1, desired_ranks)]):
+        self.evaluate(array_ops.identity(tensor_rank1))
 
   def test_rank_one_tensor_doesnt_raise_if_rank_matches_dynamic_rank(self):
     with self.test_session():
@@ -759,13 +784,13 @@ class AssertRankInTest(test.TestCase):
               tensor_rank1: (42.0, 43.0)
           })
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_one_tensor_raises_if_rank_mismatches_static_rank(self):
-    with self.test_session():
-      tensor_rank1 = constant_op.constant((42, 43), name="my_tensor")
-      with self.assertRaisesRegexp(ValueError, "my_tensor.*rank"):
-        with ops.control_dependencies([
-            check_ops.assert_rank_in(tensor_rank1, (0, 2))]):
-          array_ops.identity(tensor_rank1).eval()
+    tensor_rank1 = constant_op.constant((42, 43), name="my_tensor")
+    with self.assertRaisesRegexp(ValueError, "rank"):
+      with ops.control_dependencies([
+          check_ops.assert_rank_in(tensor_rank1, (0, 2))]):
+        self.evaluate(array_ops.identity(tensor_rank1))
 
   def test_rank_one_tensor_raises_if_rank_mismatches_dynamic_rank(self):
     with self.test_session():
@@ -777,14 +802,14 @@ class AssertRankInTest(test.TestCase):
               tensor_rank1: (42.0, 43.0)
           })
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_if_rank_is_not_scalar_static(self):
-    with self.test_session():
-      tensor = constant_op.constant((42, 43), name="my_tensor")
-      desired_ranks = (
-          np.array(1, dtype=np.int32),
-          np.array((2, 1), dtype=np.int32))
-      with self.assertRaisesRegexp(ValueError, "Rank must be a scalar"):
-        check_ops.assert_rank_in(tensor, desired_ranks)
+    tensor = constant_op.constant((42, 43), name="my_tensor")
+    desired_ranks = (
+        np.array(1, dtype=np.int32),
+        np.array((2, 1), dtype=np.int32))
+    with self.assertRaisesRegexp(ValueError, "Rank must be a scalar"):
+      check_ops.assert_rank_in(tensor, desired_ranks)
 
   def test_raises_if_rank_is_not_scalar_dynamic(self):
     with self.test_session():
@@ -801,12 +826,12 @@ class AssertRankInTest(test.TestCase):
               desired_ranks[1]: [2, 1],
           })
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_if_rank_is_not_integer_static(self):
-    with self.test_session():
-      tensor = constant_op.constant((42, 43), name="my_tensor")
-      with self.assertRaisesRegexp(TypeError,
-                                   "must be of type <dtype: 'int32'>"):
-        check_ops.assert_rank_in(tensor, (1, .5,))
+    tensor = constant_op.constant((42, 43), name="my_tensor")
+    with self.assertRaisesRegexp(TypeError,
+                                 "must be of type <dtype: 'int32'>"):
+      check_ops.assert_rank_in(tensor, (1, .5,))
 
   def test_raises_if_rank_is_not_integer_dynamic(self):
     with self.test_session():
@@ -822,14 +847,14 @@ class AssertRankInTest(test.TestCase):
 
 class AssertRankAtLeastTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_zero_tensor_raises_if_rank_too_small_static_rank(self):
-    with self.test_session():
-      tensor = constant_op.constant(1, name="my_tensor")
-      desired_rank = 1
-      with self.assertRaisesRegexp(ValueError, "my_tensor.*rank at least 1"):
-        with ops.control_dependencies(
-            [check_ops.assert_rank_at_least(tensor, desired_rank)]):
-          array_ops.identity(tensor).eval()
+    tensor = constant_op.constant(1, name="my_tensor")
+    desired_rank = 1
+    with self.assertRaisesRegexp(ValueError, "rank at least 1"):
+      with ops.control_dependencies(
+          [check_ops.assert_rank_at_least(tensor, desired_rank)]):
+        self.evaluate(array_ops.identity(tensor))
 
   def test_rank_zero_tensor_raises_if_rank_too_small_dynamic_rank(self):
     with self.test_session():
@@ -840,13 +865,13 @@ class AssertRankAtLeastTest(test.TestCase):
         with self.assertRaisesOpError("my_tensor.*rank"):
           array_ops.identity(tensor).eval(feed_dict={tensor: 0})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_zero_tensor_doesnt_raise_if_rank_just_right_static_rank(self):
-    with self.test_session():
-      tensor = constant_op.constant(1, name="my_tensor")
-      desired_rank = 0
-      with ops.control_dependencies(
-          [check_ops.assert_rank_at_least(tensor, desired_rank)]):
-        array_ops.identity(tensor).eval()
+    tensor = constant_op.constant(1, name="my_tensor")
+    desired_rank = 0
+    with ops.control_dependencies(
+        [check_ops.assert_rank_at_least(tensor, desired_rank)]):
+      self.evaluate(array_ops.identity(tensor))
 
   def test_rank_zero_tensor_doesnt_raise_if_rank_just_right_dynamic_rank(self):
     with self.test_session():
@@ -856,13 +881,13 @@ class AssertRankAtLeastTest(test.TestCase):
           [check_ops.assert_rank_at_least(tensor, desired_rank)]):
         array_ops.identity(tensor).eval(feed_dict={tensor: 0})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_one_ten_doesnt_raise_raise_if_rank_too_large_static_rank(self):
-    with self.test_session():
-      tensor = constant_op.constant([1, 2], name="my_tensor")
-      desired_rank = 0
-      with ops.control_dependencies(
-          [check_ops.assert_rank_at_least(tensor, desired_rank)]):
-        array_ops.identity(tensor).eval()
+    tensor = constant_op.constant([1, 2], name="my_tensor")
+    desired_rank = 0
+    with ops.control_dependencies(
+        [check_ops.assert_rank_at_least(tensor, desired_rank)]):
+      self.evaluate(array_ops.identity(tensor))
 
   def test_rank_one_ten_doesnt_raise_if_rank_too_large_dynamic_rank(self):
     with self.test_session():
@@ -872,13 +897,13 @@ class AssertRankAtLeastTest(test.TestCase):
           [check_ops.assert_rank_at_least(tensor, desired_rank)]):
         array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_one_tensor_doesnt_raise_if_rank_just_right_static_rank(self):
-    with self.test_session():
-      tensor = constant_op.constant([1, 2], name="my_tensor")
-      desired_rank = 1
-      with ops.control_dependencies(
-          [check_ops.assert_rank_at_least(tensor, desired_rank)]):
-        array_ops.identity(tensor).eval()
+    tensor = constant_op.constant([1, 2], name="my_tensor")
+    desired_rank = 1
+    with ops.control_dependencies(
+        [check_ops.assert_rank_at_least(tensor, desired_rank)]):
+      self.evaluate(array_ops.identity(tensor))
 
   def test_rank_one_tensor_doesnt_raise_if_rank_just_right_dynamic_rank(self):
     with self.test_session():
@@ -888,14 +913,14 @@ class AssertRankAtLeastTest(test.TestCase):
           [check_ops.assert_rank_at_least(tensor, desired_rank)]):
         array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]})
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_rank_one_tensor_raises_if_rank_too_small_static_rank(self):
-    with self.test_session():
-      tensor = constant_op.constant([1, 2], name="my_tensor")
-      desired_rank = 2
-      with self.assertRaisesRegexp(ValueError, "my_tensor.*rank"):
-        with ops.control_dependencies(
-            [check_ops.assert_rank_at_least(tensor, desired_rank)]):
-          array_ops.identity(tensor).eval()
+    tensor = constant_op.constant([1, 2], name="my_tensor")
+    desired_rank = 2
+    with self.assertRaisesRegexp(ValueError, "rank at least 2"):
+      with ops.control_dependencies(
+          [check_ops.assert_rank_at_least(tensor, desired_rank)]):
+        self.evaluate(array_ops.identity(tensor))
 
   def test_rank_one_tensor_raises_if_rank_too_small_dynamic_rank(self):
     with self.test_session():
@@ -909,144 +934,165 @@ class AssertRankAtLeastTest(test.TestCase):
 
 class AssertNonNegativeTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_negative(self):
-    with self.test_session():
-      zoe = constant_op.constant([-1, -2], name="zoe")
+    zoe = constant_op.constant([-1, -2], name="zoe")
+    with self.assertRaisesOpError("x >= 0 did not hold"):
       with ops.control_dependencies([check_ops.assert_non_negative(zoe)]):
         out = array_ops.identity(zoe)
-      with self.assertRaisesOpError("zoe"):
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_zero_and_positive(self):
-    with self.test_session():
-      lucas = constant_op.constant([0, 2], name="lucas")
-      with ops.control_dependencies([check_ops.assert_non_negative(lucas)]):
-        out = array_ops.identity(lucas)
-      out.eval()
+    lucas = constant_op.constant([0, 2], name="lucas")
+    with ops.control_dependencies([check_ops.assert_non_negative(lucas)]):
+      out = array_ops.identity(lucas)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_empty_tensor_doesnt_raise(self):
     # A tensor is non-negative when it satisfies:
     #   For every element x_i in x, x_i >= 0
     # and an empty tensor has no elements, so this is trivially satisfied.
     # This is standard set theory.
-    with self.test_session():
-      empty = constant_op.constant([], name="empty")
-      with ops.control_dependencies([check_ops.assert_non_negative(empty)]):
-        out = array_ops.identity(empty)
-      out.eval()
+    empty = constant_op.constant([], name="empty")
+    with ops.control_dependencies([check_ops.assert_non_negative(empty)]):
+      out = array_ops.identity(empty)
+    self.evaluate(out)
 
 
 class AssertNonPositiveTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_zero_and_negative(self):
-    with self.test_session():
-      tom = constant_op.constant([0, -2], name="tom")
-      with ops.control_dependencies([check_ops.assert_non_positive(tom)]):
-        out = array_ops.identity(tom)
-      out.eval()
+    tom = constant_op.constant([0, -2], name="tom")
+    with ops.control_dependencies([check_ops.assert_non_positive(tom)]):
+      out = array_ops.identity(tom)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_positive(self):
-    with self.test_session():
-      rachel = constant_op.constant([0, 2], name="rachel")
+    rachel = constant_op.constant([0, 2], name="rachel")
+    with self.assertRaisesOpError("x <= 0 did not hold"):
       with ops.control_dependencies([check_ops.assert_non_positive(rachel)]):
         out = array_ops.identity(rachel)
-      with self.assertRaisesOpError("rachel"):
-        out.eval()
+      self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_empty_tensor_doesnt_raise(self):
     # A tensor is non-positive when it satisfies:
     #   For every element x_i in x, x_i <= 0
     # and an empty tensor has no elements, so this is trivially satisfied.
     # This is standard set theory.
-    with self.test_session():
-      empty = constant_op.constant([], name="empty")
-      with ops.control_dependencies([check_ops.assert_non_positive(empty)]):
-        out = array_ops.identity(empty)
-      out.eval()
+    empty = constant_op.constant([], name="empty")
+    with ops.control_dependencies([check_ops.assert_non_positive(empty)]):
+      out = array_ops.identity(empty)
+    self.evaluate(out)
 
 
 class AssertIntegerTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_doesnt_raise_when_integer(self):
-    with self.test_session():
-      integers = constant_op.constant([1, 2], name="integers")
-      with ops.control_dependencies([check_ops.assert_integer(integers)]):
-        out = array_ops.identity(integers)
-      out.eval()
+    integers = constant_op.constant([1, 2], name="integers")
+    with ops.control_dependencies([check_ops.assert_integer(integers)]):
+      out = array_ops.identity(integers)
+    self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_raises_when_float(self):
-    with self.test_session():
-      floats = constant_op.constant([1.0, 2.0], name="floats")
-      with self.assertRaisesRegexp(TypeError, "Expected.*integer"):
-        check_ops.assert_integer(floats)
+    floats = constant_op.constant([1.0, 2.0], name="floats")
+    with self.assertRaisesRegexp(TypeError, "Expected.*integer"):
+      check_ops.assert_integer(floats)
+
+
+class AssertTypeTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_doesnt_raise_when_correct_type(self):
+    integers = constant_op.constant([1, 2], dtype=dtypes.int64)
+    with ops.control_dependencies([
+        check_ops.assert_type(integers, dtypes.int64)]):
+      out = array_ops.identity(integers)
+    self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_raises_when_wrong_type(self):
+    floats = constant_op.constant([1.0, 2.0], dtype=dtypes.float16)
+    with self.assertRaisesRegexp(TypeError, "must be of type.*float32"):
+      check_ops.assert_type(floats, dtypes.float32)
 
 
 class IsStrictlyIncreasingTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_constant_tensor_is_not_strictly_increasing(self):
-    with self.test_session():
-      self.assertFalse(check_ops.is_strictly_increasing([1, 1, 1]).eval())
+    self.assertFalse(self.evaluate(check_ops.is_strictly_increasing([1, 1, 1])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_decreasing_tensor_is_not_strictly_increasing(self):
-    with self.test_session():
-      self.assertFalse(check_ops.is_strictly_increasing([1, 0, -1]).eval())
+    self.assertFalse(self.evaluate(
+        check_ops.is_strictly_increasing([1, 0, -1])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_2d_decreasing_tensor_is_not_strictly_increasing(self):
-    with self.test_session():
-      self.assertFalse(
-          check_ops.is_strictly_increasing([[1, 3], [2, 4]]).eval())
+    self.assertFalse(
+        self.evaluate(check_ops.is_strictly_increasing([[1, 3], [2, 4]])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_increasing_tensor_is_increasing(self):
-    with self.test_session():
-      self.assertTrue(check_ops.is_strictly_increasing([1, 2, 3]).eval())
+    self.assertTrue(self.evaluate(check_ops.is_strictly_increasing([1, 2, 3])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_increasing_rank_two_tensor(self):
-    with self.test_session():
-      self.assertTrue(
-          check_ops.is_strictly_increasing([[-1, 2], [3, 4]]).eval())
+    self.assertTrue(
+        self.evaluate(check_ops.is_strictly_increasing([[-1, 2], [3, 4]])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_tensor_with_one_element_is_strictly_increasing(self):
-    with self.test_session():
-      self.assertTrue(check_ops.is_strictly_increasing([1]).eval())
+    self.assertTrue(self.evaluate(check_ops.is_strictly_increasing([1])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_empty_tensor_is_strictly_increasing(self):
-    with self.test_session():
-      self.assertTrue(check_ops.is_strictly_increasing([]).eval())
+    self.assertTrue(self.evaluate(check_ops.is_strictly_increasing([])))
 
 
 class IsNonDecreasingTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_constant_tensor_is_non_decreasing(self):
-    with self.test_session():
-      self.assertTrue(check_ops.is_non_decreasing([1, 1, 1]).eval())
+    self.assertTrue(self.evaluate(check_ops.is_non_decreasing([1, 1, 1])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_decreasing_tensor_is_not_non_decreasing(self):
-    with self.test_session():
-      self.assertFalse(check_ops.is_non_decreasing([3, 2, 1]).eval())
+    self.assertFalse(self.evaluate(check_ops.is_non_decreasing([3, 2, 1])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_2d_decreasing_tensor_is_not_non_decreasing(self):
-    with self.test_session():
-      self.assertFalse(check_ops.is_non_decreasing([[1, 3], [2, 4]]).eval())
+    self.assertFalse(self.evaluate(
+        check_ops.is_non_decreasing([[1, 3], [2, 4]])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_increasing_rank_one_tensor_is_non_decreasing(self):
-    with self.test_session():
-      self.assertTrue(check_ops.is_non_decreasing([1, 2, 3]).eval())
+    self.assertTrue(self.evaluate(check_ops.is_non_decreasing([1, 2, 3])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_increasing_rank_two_tensor(self):
-    with self.test_session():
-      self.assertTrue(check_ops.is_non_decreasing([[-1, 2], [3, 3]]).eval())
+    self.assertTrue(self.evaluate(
+        check_ops.is_non_decreasing([[-1, 2], [3, 3]])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_tensor_with_one_element_is_non_decreasing(self):
-    with self.test_session():
-      self.assertTrue(check_ops.is_non_decreasing([1]).eval())
+    self.assertTrue(self.evaluate(check_ops.is_non_decreasing([1])))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_empty_tensor_is_non_decreasing(self):
-    with self.test_session():
-      self.assertTrue(check_ops.is_non_decreasing([]).eval())
+    self.assertTrue(self.evaluate(check_ops.is_non_decreasing([])))
 
 
 class FloatDTypeTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_assert_same_float_dtype(self):
     self.assertIs(dtypes.float32,
                   check_ops.assert_same_float_dtype(None, None))
@@ -1100,6 +1146,7 @@ class FloatDTypeTest(test.TestCase):
 
 class AssertScalarTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_assert_scalar(self):
     check_ops.assert_scalar(constant_op.constant(3))
     check_ops.assert_scalar(constant_op.constant("foo"))
diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py
index 7e509f72c1..1377af3eac 100644
--- a/tensorflow/python/ops/check_ops.py
+++ b/tensorflow/python/ops/check_ops.py
@@ -104,6 +104,11 @@ def _assert_static(condition, data):
                                       message='\n'.join(data_static))
 
 
+def _shape_and_dtype_str(tensor):
+  """Returns a string containing tensor's shape and dtype."""
+  return 'shape=%s dtype=%s' % (tensor.shape, tensor.dtype.name)
+
+
 def assert_proper_iterable(values):
   """Static assert that values is a "proper" iterable.
 
@@ -159,10 +164,14 @@ def assert_negative(x, data=None, summarize=None, message=None, name=None):
   with ops.name_scope(name, 'assert_negative', [x, data]):
     x = ops.convert_to_tensor(x, name='x')
     if data is None:
+      if context.in_eager_mode():
+        name = _shape_and_dtype_str(x)
+      else:
+        name = x.name
       data = [
           message,
           'Condition x < 0 did not hold element-wise:',
-          'x (%s) = ' % x.name, x]
+          'x (%s) = ' % name, x]
     zero = ops.convert_to_tensor(0, dtype=x.dtype)
     return assert_less(x, zero, data=data, summarize=summarize)
 
@@ -195,9 +204,13 @@ def assert_positive(x, data=None, summarize=None, message=None, name=None):
   with ops.name_scope(name, 'assert_positive', [x, data]):
     x = ops.convert_to_tensor(x, name='x')
     if data is None:
+      if context.in_eager_mode():
+        name = _shape_and_dtype_str(x)
+      else:
+        name = x.name
       data = [
           message, 'Condition x > 0 did not hold element-wise:',
-          'x (%s) = ' % x.name, x]
+          'x (%s) = ' % name, x]
     zero = ops.convert_to_tensor(0, dtype=x.dtype)
     return assert_less(zero, x, data=data, summarize=summarize)
 
@@ -232,7 +245,7 @@ def assert_non_negative(x, data=None, summarize=None, message=None, name=None):
     x = ops.convert_to_tensor(x, name='x')
     if data is None:
       if context.in_eager_mode():
-        name = str(x)
+        name = _shape_and_dtype_str(x)
       else:
         name = x.name
       data = [
@@ -272,10 +285,14 @@ def assert_non_positive(x, data=None, summarize=None, message=None, name=None):
   with ops.name_scope(name, 'assert_non_positive', [x, data]):
     x = ops.convert_to_tensor(x, name='x')
     if data is None:
+      if context.in_eager_mode():
+        name = _shape_and_dtype_str(x)
+      else:
+        name = x.name
       data = [
           message,
           'Condition x <= 0 did not hold element-wise:'
-          'x (%s) = ' % x.name, x]
+          'x (%s) = ' % name, x]
     zero = ops.convert_to_tensor(0, dtype=x.dtype)
     return assert_less_equal(x, zero, data=data, summarize=summarize)
 
@@ -408,8 +425,8 @@ def assert_none_equal(
     x = ops.convert_to_tensor(x, name='x')
     y = ops.convert_to_tensor(y, name='y')
     if context.in_eager_mode():
-      x_name = 'x'
-      y_name = 'y'
+      x_name = _shape_and_dtype_str(x)
+      y_name = _shape_and_dtype_str(y)
     else:
       x_name = x.name
       y_name = y.name
@@ -456,8 +473,8 @@ def assert_less(x, y, data=None, summarize=None, message=None, name=None):
     x = ops.convert_to_tensor(x, name='x')
     y = ops.convert_to_tensor(y, name='y')
     if context.in_eager_mode():
-      x_name = 'x'
-      y_name = 'y'
+      x_name = _shape_and_dtype_str(x)
+      y_name = _shape_and_dtype_str(y)
     else:
       x_name = x.name
       y_name = y.name
@@ -502,11 +519,18 @@ def assert_less_equal(x, y, data=None, summarize=None, message=None, name=None):
   with ops.name_scope(name, 'assert_less_equal', [x, y, data]):
     x = ops.convert_to_tensor(x, name='x')
     y = ops.convert_to_tensor(y, name='y')
+    if context.in_eager_mode():
+      x_name = _shape_and_dtype_str(x)
+      y_name = _shape_and_dtype_str(y)
+    else:
+      x_name = x.name
+      y_name = y.name
+
     if data is None:
       data = [
           message,
           'Condition x <= y did not hold element-wise:'
-          'x (%s) = ' % x.name, x, 'y (%s) = ' % y.name, y
+          'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y
       ]
     condition = math_ops.reduce_all(math_ops.less_equal(x, y))
     return control_flow_ops.Assert(condition, data, summarize=summarize)
@@ -542,11 +566,18 @@ def assert_greater(x, y, data=None, summarize=None, message=None, name=None):
   with ops.name_scope(name, 'assert_greater', [x, y, data]):
     x = ops.convert_to_tensor(x, name='x')
     y = ops.convert_to_tensor(y, name='y')
+    if context.in_eager_mode():
+      x_name = _shape_and_dtype_str(x)
+      y_name = _shape_and_dtype_str(y)
+    else:
+      x_name = x.name
+      y_name = y.name
+
     if data is None:
       data = [
           message,
           'Condition x > y did not hold element-wise:'
-          'x (%s) = ' % x.name, x, 'y (%s) = ' % y.name, y
+          'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y
       ]
     condition = math_ops.reduce_all(math_ops.greater(x, y))
     return control_flow_ops.Assert(condition, data, summarize=summarize)
@@ -584,11 +615,18 @@ def assert_greater_equal(x, y, data=None, summarize=None, message=None,
   with ops.name_scope(name, 'assert_greater_equal', [x, y, data]):
     x = ops.convert_to_tensor(x, name='x')
     y = ops.convert_to_tensor(y, name='y')
+    if context.in_eager_mode():
+      x_name = _shape_and_dtype_str(x)
+      y_name = _shape_and_dtype_str(y)
+    else:
+      x_name = x.name
+      y_name = y.name
+
     if data is None:
       data = [
           message,
           'Condition x >= y did not hold element-wise:'
-          'x (%s) = ' % x.name, x, 'y (%s) = ' % y.name, y
+          'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y
       ]
     condition = math_ops.reduce_all(math_ops.greater_equal(x, y))
     return control_flow_ops.Assert(condition, data, summarize=summarize)
@@ -676,10 +714,15 @@ def assert_rank(x, rank, data=None, summarize=None, message=None, name=None):
     static_condition = lambda actual_rank, given_rank: actual_rank == given_rank
     dynamic_condition = math_ops.equal
 
+    if context.in_eager_mode():
+      name = ''
+    else:
+      name = x.name
+
     if data is None:
       data = [
           message,
-          'Tensor %s must have rank' % x.name, rank, 'Received shape: ',
+          'Tensor %s must have rank' % name, rank, 'Received shape: ',
           array_ops.shape(x)
       ]
 
@@ -691,7 +734,7 @@ def assert_rank(x, rank, data=None, summarize=None, message=None, name=None):
       if e.args[0] == 'Static rank condition failed':
         raise ValueError(
             '%s.  Tensor %s must have rank %d.  Received rank %d, shape %s' %
-            (message, x.name, e.args[2], e.args[1], x.get_shape()))
+            (message, name, e.args[2], e.args[1], x.get_shape()))
       else:
         raise
 
@@ -734,10 +777,16 @@ def assert_rank_at_least(
 
     static_condition = lambda actual_rank, given_rank: actual_rank >= given_rank
     dynamic_condition = math_ops.greater_equal
+
+    if context.in_eager_mode():
+      name = ''
+    else:
+      name = x.name
+
     if data is None:
       data = [
           message,
-          'Tensor %s must have rank at least' % x.name, rank,
+          'Tensor %s must have rank at least' % name, rank,
           'Received shape: ', array_ops.shape(x)
       ]
 
@@ -749,7 +798,7 @@ def assert_rank_at_least(
       if e.args[0] == 'Static rank condition failed':
         raise ValueError(
             '%s.  Tensor %s must have rank at least %d.  Received rank %d, '
-            'shape %s' % (message, x.name, e.args[2], e.args[1], x.get_shape()))
+            'shape %s' % (message, name, e.args[2], e.args[1], x.get_shape()))
       else:
         raise
 
@@ -856,9 +905,14 @@ def assert_rank_in(
     ranks = tuple([ops.convert_to_tensor(rank, name='rank') for rank in ranks])
     message = message or ''
 
+    if context.in_eager_mode():
+      name = ''
+    else:
+      name = x.name
+
     if data is None:
       data = [
-          message, 'Tensor %s must have rank in' % x.name
+          message, 'Tensor %s must have rank in' % name
       ] + list(ranks) + [
           'Received shape: ', array_ops.shape(x)
       ]
@@ -871,7 +925,7 @@ def assert_rank_in(
       if e.args[0] == 'Static rank condition failed':
         raise ValueError(
             '%s.  Tensor %s must have rank in %s.  Received rank %d, '
-            'shape %s' % (message, x.name, e.args[2], e.args[1], x.get_shape()))
+            'shape %s' % (message, name, e.args[2], e.args[1], x.get_shape()))
       else:
         raise
 
@@ -903,9 +957,13 @@ def assert_integer(x, message=None, name=None):
   with ops.name_scope(name, 'assert_integer', [x]):
     x = ops.convert_to_tensor(x, name='x')
     if not x.dtype.is_integer:
+      if context.in_eager_mode():
+        name = 'tensor'
+      else:
+        name = x.name
       err_msg = (
           '%s  Expected "x" to be integer type.  Found: %s of dtype %s'
-          % (message, x.name, x.dtype))
+          % (message, name, x.dtype))
       raise TypeError(err_msg)
 
     return control_flow_ops.no_op('statically_determined_was_integer')
@@ -1079,6 +1137,10 @@ def assert_scalar(tensor, name=None):
     tensor = ops.convert_to_tensor(tensor, name=name_scope)
     shape = tensor.get_shape()
     if shape.ndims != 0:
-      raise ValueError('Expected scalar shape for %s, saw shape: %s.'
-                       % (tensor.name, shape))
+      if context.in_eager_mode():
+        raise ValueError('Expected scalar shape, saw shape: %s.'
+                         % (shape,))
+      else:
+        raise ValueError('Expected scalar shape for %s, saw shape: %s.'
+                         % (tensor.name, shape))
     return tensor
-- 
GitLab


From 6c7bd707ce26cc89d542bbb326882026a613748c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 12:24:12 -0800
Subject: [PATCH 0177/1225] Add tpu saved model tags. No cpu tag is added
 because cpu is assumed to be the implicit device.

PiperOrigin-RevId: 176544698
---
 tensorflow/cc/saved_model/tag_constants.h          |  3 +++
 .../saved_model/python/saved_model/reader_test.py  | 10 +++++++++-
 tensorflow/python/saved_model/saved_model_test.py  | 14 ++++++++++++++
 tensorflow/python/saved_model/tag_constants.py     |  6 +++++-
 .../tensorflow.saved_model.tag_constants.pbtxt     |  4 ++++
 5 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/tensorflow/cc/saved_model/tag_constants.h b/tensorflow/cc/saved_model/tag_constants.h
index 2b0b2d5c7f..b71cb263ca 100644
--- a/tensorflow/cc/saved_model/tag_constants.h
+++ b/tensorflow/cc/saved_model/tag_constants.h
@@ -21,6 +21,9 @@ namespace tensorflow {
 /// Tag for the `gpu` graph.
 constexpr char kSavedModelTagGpu[] = "gpu";
 
+/// Tag for the `tpu` graph.
+constexpr char kSavedModelTagTpu[] = "tpu";
+
 /// Tag for the `serving` graph.
 constexpr char kSavedModelTagServe[] = "serve";
 
diff --git a/tensorflow/contrib/saved_model/python/saved_model/reader_test.py b/tensorflow/contrib/saved_model/python/saved_model/reader_test.py
index a8331cbc8f..d10ec9cf0c 100644
--- a/tensorflow/contrib/saved_model/python/saved_model/reader_test.py
+++ b/tensorflow/contrib/saved_model/python/saved_model/reader_test.py
@@ -86,6 +86,13 @@ class ReaderTest(test.TestCase):
       self._init_and_validate_variable(sess, "v", 44)
       builder.add_meta_graph([tag_constants.SERVING, tag_constants.GPU])
 
+    # Graph that updates the single variable. SavedModel is invoked:
+    # - to add the model (weights are not updated).
+    # - multiple predefined tags for serving on TPU.
+    with self.test_session(graph=ops.Graph()) as sess:
+      self._init_and_validate_variable(sess, "v", 44)
+      builder.add_meta_graph([tag_constants.SERVING, tag_constants.TPU])
+
     # Graph that updates the single variable. SavedModel is invoked:
     # - to add the model (weights are not updated).
     # - multiple custom tags.
@@ -97,7 +104,8 @@ class ReaderTest(test.TestCase):
     builder.save()
 
     actual_tags = reader.get_saved_model_tag_sets(saved_model_dir)
-    expected_tags = [["train"], ["serve"], ["serve", "gpu"], ["foo", "bar"]]
+    expected_tags = [["train"], ["serve"], ["serve", "gpu"], ["serve", "tpu"],
+                     ["foo", "bar"]]
     self.assertEqual(expected_tags, actual_tags)
 
 
diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py
index c6d2c32293..92ca7dec6f 100644
--- a/tensorflow/python/saved_model/saved_model_test.py
+++ b/tensorflow/python/saved_model/saved_model_test.py
@@ -214,6 +214,13 @@ class SavedModelTest(test.TestCase):
       self._init_and_validate_variable(sess, "v", 45)
       builder.add_meta_graph([tag_constants.SERVING, tag_constants.GPU])
 
+    # Graph that updates the single variable. SavedModel invoked to:
+    # - simply add the model (weights are not updated).
+    # - multiple tags (from predefined constants for serving on TPU).
+    with self.test_session(graph=ops.Graph()) as sess:
+      self._init_and_validate_variable(sess, "v", 45)
+      builder.add_meta_graph([tag_constants.SERVING, tag_constants.TPU])
+
     # Graph that updates the single variable. SavedModel is invoked:
     # - to add the model (weights are not updated).
     # - multiple custom tags.
@@ -244,6 +251,13 @@ class SavedModelTest(test.TestCase):
       self.assertEqual(
           42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval())
 
+    # Restore the graph with multiple predefined tags (for serving on TPU)
+    # whose variables were not saved.
+    with self.test_session(graph=ops.Graph()) as sess:
+      loader.load(sess, [tag_constants.SERVING, tag_constants.TPU], export_dir)
+      self.assertEqual(
+          42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval())
+
     # Restore the graph with multiple tags. Provide duplicate tags to test set
     # semantics.
     with self.test_session(graph=ops.Graph()) as sess:
diff --git a/tensorflow/python/saved_model/tag_constants.py b/tensorflow/python/saved_model/tag_constants.py
index 52868bdf99..e2facafda5 100644
--- a/tensorflow/python/saved_model/tag_constants.py
+++ b/tensorflow/python/saved_model/tag_constants.py
@@ -31,9 +31,13 @@ TRAINING = "train"
 # Tag for the `gpu` graph.
 GPU = "gpu"
 
+# Tag for the `tpu` graph.
+TPU = "tpu"
+
 _allowed_symbols = [
     "SERVING",
     "TRAINING",
-    "GPU"
+    "GPU",
+    "TPU"
 ]
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/tools/api/golden/tensorflow.saved_model.tag_constants.pbtxt b/tensorflow/tools/api/golden/tensorflow.saved_model.tag_constants.pbtxt
index 35e49ee9f4..6af72498d7 100644
--- a/tensorflow/tools/api/golden/tensorflow.saved_model.tag_constants.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.saved_model.tag_constants.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "SERVING"
     mtype: "<type \'str\'>"
   }
+  member {
+    name: "TPU"
+    mtype: "<type \'str\'>"
+  }
   member {
     name: "TRAINING"
     mtype: "<type \'str\'>"
-- 
GitLab


From e8366158a2ed2c971315e1e4b9b4f235ea97f8da Mon Sep 17 00:00:00 2001
From: RJ Ryan <rjryan@google.com>
Date: Tue, 21 Nov 2017 13:01:38 -0800
Subject: [PATCH 0178/1225] Replace
 tf.contrib.framework.get_or_create_global_step with
 tf.train.get_or_create_global_step in tf.contrib.training.create_train_op.

PiperOrigin-RevId: 176549279
---
 tensorflow/contrib/training/python/training/training.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/training/python/training/training.py b/tensorflow/contrib/training/python/training/training.py
index eee2b88812..f72e0a3f83 100644
--- a/tensorflow/contrib/training/python/training/training.py
+++ b/tensorflow/contrib/training/python/training/training.py
@@ -244,7 +244,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.framework.python.ops import variables
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -255,6 +254,7 @@ from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import optimizer as tf_optimizer
+from tensorflow.python.training import training_util
 
 # TODO(nsilberman): move add_gradients_summaries, clip_gradient_norms and
 # multiply_gradients into contrib/summaries and contrib/optimizers.py
@@ -409,7 +409,7 @@ def create_train_op(total_loss,
       loss value.
   """
   if global_step is _USE_GLOBAL_STEP:
-    global_step = variables.get_or_create_global_step()
+    global_step = training_util.get_or_create_global_step()
 
   # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None.
   global_update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
-- 
GitLab


From 1a9212a7eda7c347e6320991ac219165d0eb9788 Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Tue, 21 Nov 2017 13:16:32 -0800
Subject: [PATCH 0179/1225] Delete duplicate mpi.bzl

PiperOrigin-RevId: 176551183
---
 tensorflow/BUILD                   |  1 +
 tensorflow/third_party/mpi/mpi.bzl | 17 -----------------
 2 files changed, 1 insertion(+), 17 deletions(-)
 delete mode 100644 tensorflow/third_party/mpi/mpi.bzl

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 6a66d1d44b..49828cd4d6 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -628,6 +628,7 @@ filegroup(
         "//tensorflow/tools/test:all_files",
         "//tensorflow/user_ops:all_files",
         "//third_party/hadoop:all_files",
+        "//third_party/mpi:all_files",
         "//third_party/sycl:all_files",
         "//third_party/sycl/sycl:all_files",
     ],
diff --git a/tensorflow/third_party/mpi/mpi.bzl b/tensorflow/third_party/mpi/mpi.bzl
deleted file mode 100644
index 38ce91c4d0..0000000000
--- a/tensorflow/third_party/mpi/mpi.bzl
+++ /dev/null
@@ -1,17 +0,0 @@
-#OpenMPI and Mvapich/mpich require different headers
-#based on the configuration options return one or the other
-
-def mpi_hdr():
-    MPI_LIB_IS_OPENMPI=True
-    hdrs = []    
-    if MPI_LIB_IS_OPENMPI:
-        hdrs = ["mpi.h", "mpi_portable_platform.h"]   #When using OpenMPI
-    else:
-        hdrs = ["mpi.h",  "mpio.h", "mpicxx.h"]        #When using MVAPICH
-    return hdrs
-
-def if_mpi(if_true, if_false = []):
-    return select({
-        "//tensorflow:with_mpi_support": if_true,
-        "//conditions:default": if_false
-    })
-- 
GitLab


From a784520596cdf1e62702ff656dcd7cc0137d6d67 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Tue, 21 Nov 2017 13:25:30 -0800
Subject: [PATCH 0180/1225] Update the tf_item whenever the underlying
 metagraph is updated

PiperOrigin-RevId: 176552317
---
 tensorflow/python/grappler/item.py      | 23 ++++++++++++++++------
 tensorflow/python/grappler/item_test.py | 26 +++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/grappler/item.py b/tensorflow/python/grappler/item.py
index f53fc7f337..cfbe014de5 100644
--- a/tensorflow/python/grappler/item.py
+++ b/tensorflow/python/grappler/item.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.core.grappler.costs import op_performance_data_pb2
+from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.python import pywrap_tensorflow as tf_item
 from tensorflow.python.framework import errors
 
@@ -42,21 +43,22 @@ class Item(object):
       ValueError: the metagraph is incomplete or invalid.
     """
     self._metagraph = metagraph
+    self._item_graph = meta_graph_pb2.MetaGraphDef()
+    self._item_graph.CopyFrom(metagraph)
+    self._ignore_colocation = ignore_colocation
+    self._ignore_user_placement = ignore_user_placement
     self._tf_item = None
-    with errors.raise_exception_on_not_ok_status() as status:
-      self._tf_item = tf_item.TF_NewItem(metagraph.SerializeToString(),
-                                         ignore_colocation,
-                                         ignore_user_placement, status)
+    self._BuildTFItem()
 
   def __del__(self):
     if self._tf_item:
       tf_item.TF_DeleteItem(self._tf_item)
 
   def IdentifyImportantOps(self):
-    return tf_item.TF_IdentifyImportantOps(self._tf_item)
+    return tf_item.TF_IdentifyImportantOps(self.tf_item)
 
   def GetOpProperties(self):
-    ret_from_swig = tf_item.TF_GetOpProperties(self._tf_item)
+    ret_from_swig = tf_item.TF_GetOpProperties(self.tf_item)
     properties = {}
     for key, values in ret_from_swig.items():
       prop = []
@@ -72,4 +74,13 @@ class Item(object):
 
   @property
   def tf_item(self):
+    if self._item_graph != self._metagraph:
+      self._BuildTFItem()
+      self._item_graph.CopyFrom(self._metagraph)
     return self._tf_item
+
+  def _BuildTFItem(self):
+    with errors.raise_exception_on_not_ok_status() as status:
+      self._tf_item = tf_item.TF_NewItem(self._metagraph.SerializeToString(),
+                                         self._ignore_colocation,
+                                         self._ignore_user_placement, status)
diff --git a/tensorflow/python/grappler/item_test.py b/tensorflow/python/grappler/item_test.py
index 0739a7a0e4..69835761bc 100644
--- a/tensorflow/python/grappler/item_test.py
+++ b/tensorflow/python/grappler/item_test.py
@@ -73,6 +73,32 @@ class ItemTest(test.TestCase):
         self.assertEqual(dtypes.int32, node_prop[0].dtype)
         self.assertEqual(tensor_shape.scalar(), node_prop[0].shape)
 
+  def testUpdates(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(10)
+      b = constant_op.constant(20)
+      c = a + b
+      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+      train_op.append(c)
+      mg = meta_graph.create_meta_graph_def(graph=g)
+      grappler_item = item.Item(mg)
+
+    initial_tf_item = grappler_item.tf_item
+    no_change_tf_item = grappler_item.tf_item
+    self.assertEqual(initial_tf_item, no_change_tf_item)
+
+    # Modify the placement.
+    for node in grappler_item.metagraph.graph_def.node:
+      node.device = '/cpu:0'
+    new_tf_item = grappler_item.tf_item
+    self.assertNotEqual(initial_tf_item, new_tf_item)
+
+    # Assign the same placement.
+    for node in grappler_item.metagraph.graph_def.node:
+      node.device = '/cpu:0'
+    newest_tf_item = grappler_item.tf_item
+    self.assertEqual(new_tf_item, newest_tf_item)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From b5198a3c9b093f0d574c21b5496f045e18c74bae Mon Sep 17 00:00:00 2001
From: Eli Bendersky <eliben@google.com>
Date: Tue, 21 Nov 2017 13:28:16 -0800
Subject: [PATCH 0181/1225] Make parameter names in declarations consistent
 with names in definitions

PiperOrigin-RevId: 176552613
---
 tensorflow/compiler/xla/service/user_computation.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/service/user_computation.h b/tensorflow/compiler/xla/service/user_computation.h
index ac879ce55a..473a8b8f73 100644
--- a/tensorflow/compiler/xla/service/user_computation.h
+++ b/tensorflow/compiler/xla/service/user_computation.h
@@ -70,7 +70,7 @@ class UserComputation {
 
   // Enqueues a pad instruction onto this user computation.
   StatusOr<ComputationDataHandle> AddPadInstruction(
-      const PadRequest& parameter_request);
+      const PadRequest& pad_request);
 
   // Enqueues a tracing instruction onto this user computation.
   // Returns an error status if the operand cannot be resolved.
@@ -105,7 +105,7 @@ class UserComputation {
   // Enqueues a ternary instruction onto this user computation.
   // Returns an error status if the operand indices are out of bounds.
   StatusOr<ComputationDataHandle> AddTernaryInstruction(
-      const TernaryOpRequest& request);
+      const TernaryOpRequest& ternary_request);
 
   // Enqueues a variadic instruction onto this user computation.
   // Returns an error status if the operand indices are out of bounds.
@@ -179,7 +179,7 @@ class UserComputation {
 
   // Enqueues a concatenate instruction onto this user computation.
   StatusOr<ComputationDataHandle> AddConcatenateInstruction(
-      const ConcatenateRequest& slice_request);
+      const ConcatenateRequest& concatenate_request);
 
   // Enqueues a convert instruction onto this user computation.
   StatusOr<ComputationDataHandle> AddConvertInstruction(
@@ -188,17 +188,17 @@ class UserComputation {
   // Enqueues a reduce instruction onto this user computation.
   StatusOr<ComputationDataHandle> AddReduceInstruction(
       const ReduceRequest& reduce_request,
-      const UserComputation& reduction_computation);
+      const UserComputation& to_apply_computation);
 
   // Enqueues a windowed reduce instruction onto this user computation.
   StatusOr<ComputationDataHandle> AddReduceWindowInstruction(
       const ReduceWindowRequest& reduce_window_request,
-      const UserComputation& reduction_computation);
+      const UserComputation& to_apply_computation);
 
   // Enqueues a select-and-scatter instruction onto this user
   // computation.
   StatusOr<ComputationDataHandle> AddSelectAndScatterInstruction(
-      const SelectAndScatterRequest& scatter_to_selected_window_element_request,
+      const SelectAndScatterRequest& select_and_scatter_request,
       const UserComputation& select_computation,
       const UserComputation& scatter_computation);
 
-- 
GitLab


From fe69b11b578b40bcf6127e54ef307d822d13e123 Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Tue, 21 Nov 2017 14:34:09 -0800
Subject: [PATCH 0182/1225] Run xla FunctionTest with (and without) C API

PiperOrigin-RevId: 176562355
---
 tensorflow/compiler/tests/function_test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/compiler/tests/function_test.py b/tensorflow/compiler/tests/function_test.py
index cbe2888696..11d8a99ffe 100644
--- a/tensorflow/compiler/tests/function_test.py
+++ b/tensorflow/compiler/tests/function_test.py
@@ -24,10 +24,12 @@ from tensorflow.compiler.tests.xla_test import XLATestCase
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import googletest
 
 
+@test_util.with_c_api
 class FunctionTest(XLATestCase):
 
   def testFunction(self):
-- 
GitLab


From c3ea6ed46123a719b478c508ecf1c1b5628d3b10 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 14:36:54 -0800
Subject: [PATCH 0183/1225] Add support for saving_listeners to Experiment.

PiperOrigin-RevId: 176562782
---
 .../contrib/learn/python/learn/experiment.py  | 47 +++++++++++++------
 .../learn/python/learn/experiment_test.py     | 43 ++++++++++++-----
 2 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py
index 307db76afe..fc4bd1f461 100644
--- a/tensorflow/contrib/learn/python/learn/experiment.py
+++ b/tensorflow/contrib/learn/python/learn/experiment.py
@@ -140,7 +140,8 @@ class Experiment(object):
                delay_workers_by_global_step=False,
                export_strategies=None,
                train_steps_per_iteration=None,
-               checkpoint_and_export=False):
+               checkpoint_and_export=False,
+               saving_listeners=None):
     """Constructor for `Experiment`.
 
     Creates an Experiment instance. None of the functions passed to this
@@ -200,6 +201,9 @@ class Experiment(object):
         `save_checkpoints_steps`. Also, this parameter leads to the creation of
         a default `CheckpointSaverHook` instead of a `ValidationMonitor`, so the
         provided `train_monitors` will need to be adjusted accordingly.
+      saving_listeners: list of `CheckpointSaverListener` objects. Used by
+        tf.estimator.Estimator for callbacks that run immediately before or
+        after checkpoint savings.
 
     Raises:
       ValueError: if `estimator` does not implement Estimator interface,
@@ -221,6 +225,9 @@ class Experiment(object):
         raise ValueError(
             "`estimator` must implement `tf.contrib.learn.Trainable`"
             "or `tf.estimator.`Estimator`.")
+      if saving_listeners is not None:
+        raise ValueError("`saving_listeners` must be `None` with "
+                         "`tf.contrib.learn.Estimator`.")
 
     if isinstance(estimator, tpu_estimator.TPUEstimator):
       logging.warn(
@@ -242,6 +249,7 @@ class Experiment(object):
     self._eval_delay_secs = eval_delay_secs
     self._continuous_eval_throttle_secs = continuous_eval_throttle_secs
     self._checkpoint_and_export = checkpoint_and_export
+    self._saving_listeners = saving_listeners
     # Using 1 on a non-cached file system requires a lot of overhead to
     # read the checkpoint state file. This is particular bad on GCS, so
     # we use a different default. This is a temporary band-aid, to be
@@ -362,9 +370,11 @@ class Experiment(object):
       logging.info("Waiting %d secs before starting training.", remaining)
       time.sleep(delay_secs)
 
-    return self._call_train(input_fn=self._train_input_fn,
-                            max_steps=self._train_steps,
-                            hooks=self._train_monitors + extra_hooks)
+    return self._call_train(
+        input_fn=self._train_input_fn,
+        max_steps=self._train_steps,
+        hooks=self._train_monitors + extra_hooks,
+        saving_listeners=self._saving_listeners)
 
   def evaluate(self, delay_secs=None, name=None):
     """Evaluate on the evaluation data.
@@ -712,9 +722,11 @@ class Experiment(object):
         break
 
       logging.info("Training model for %s steps", train_steps_per_iteration)
-      self._call_train(input_fn=self._train_input_fn,
-                       steps=train_steps_per_iteration,
-                       hooks=self._train_monitors)
+      self._call_train(
+          input_fn=self._train_input_fn,
+          steps=train_steps_per_iteration,
+          hooks=self._train_monitors,
+          saving_listeners=self._saving_listeners)
 
       logging.info("Evaluating model now.")
       eval_result = self._call_evaluate(input_fn=self._eval_input_fn,
@@ -762,9 +774,11 @@ class Experiment(object):
     Returns:
       The result of the `evaluate` call to the `Estimator`.
     """
-    self._call_train(input_fn=self._train_input_fn,
-                     steps=1,
-                     hooks=self._train_monitors)
+    self._call_train(
+        input_fn=self._train_input_fn,
+        steps=1,
+        hooks=self._train_monitors,
+        saving_listeners=self._saving_listeners)
 
     eval_result = self._call_evaluate(input_fn=self._eval_input_fn,
                                       steps=1,
@@ -792,7 +806,8 @@ class Experiment(object):
     return server
 
   def _call_train(self, _sentinel=None,  # pylint: disable=invalid-name,
-                  input_fn=None, steps=None, hooks=None, max_steps=None):
+                  input_fn=None, steps=None, hooks=None, max_steps=None,
+                  saving_listeners=None):
     if _sentinel is not None:
       raise ValueError("_call_train should be called with keyword args only")
 
@@ -801,10 +816,12 @@ class Experiment(object):
     # safe to convert for both cases.
     hooks = monitors.replace_monitors_with_hooks(hooks, self._estimator)
     if self._core_estimator_used:
-      return self._estimator.train(input_fn=input_fn,
-                                   steps=steps,
-                                   max_steps=max_steps,
-                                   hooks=hooks)
+      return self._estimator.train(
+          input_fn=input_fn,
+          steps=steps,
+          max_steps=max_steps,
+          hooks=hooks,
+          saving_listeners=saving_listeners)
     else:
       return self._estimator.fit(input_fn=input_fn,
                                  steps=steps,
diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py
index fe40d27c44..c29c198d09 100644
--- a/tensorflow/contrib/learn/python/learn/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/experiment_test.py
@@ -232,14 +232,19 @@ class ExperimentTest(test.TestCase):
 
   def test_train(self):
     for est in self._estimators_for_tests():
-      eval_metrics = 'eval_metrics' if not isinstance(
-          est, core_estimator.Estimator) else None
+      if isinstance(est, core_estimator.Estimator):
+        eval_metrics = None
+        saving_listeners = 'saving_listeners'
+      else:
+        eval_metrics = 'eval_metrics'
+        saving_listeners = None
       ex = experiment.Experiment(
           est,
           train_input_fn='train_input',
           train_steps='train_steps',
           eval_input_fn='eval_input',
-          eval_metrics=eval_metrics)
+          eval_metrics=eval_metrics,
+          saving_listeners=saving_listeners)
       fit_args = ex.train(delay_secs=0)
       self.assertEqual(1, est.fit_count)
       self.assertIn(('max_steps', 'train_steps'), fit_args)
@@ -675,8 +680,12 @@ class ExperimentTest(test.TestCase):
 
   def test_continuous_train_and_eval(self):
     for est in self._estimators_for_tests(eval_dict={'global_step': 100}):
-      eval_metrics = 'eval_metrics' if not isinstance(
-          est, core_estimator.Estimator) else None
+      if isinstance(est, core_estimator.Estimator):
+        eval_metrics = None
+        saving_listeners = 'saving_listeners'
+      else:
+        eval_metrics = 'eval_metrics'
+        saving_listeners = None
       noop_hook = _NoopHook()
       export_strategy = saved_model_export_utils.make_export_strategy(
           est,
@@ -690,7 +699,8 @@ class ExperimentTest(test.TestCase):
           eval_hooks=[noop_hook],
           train_steps=100,
           eval_steps=100,
-          export_strategies=export_strategy)
+          export_strategies=export_strategy,
+          saving_listeners=saving_listeners)
       ex.continuous_train_and_eval()
       self.assertEqual(1, est.fit_count)
       self.assertEqual(1, est.eval_count)
@@ -742,9 +752,10 @@ class ExperimentTest(test.TestCase):
     ex.continuous_train_and_eval(continuous_eval_predicate_fn=predicate_fn)
     mock_estimator.train.assert_called_once_with(
         input_fn='train_input',
-        steps=int(total_steps/10),
+        steps=int(total_steps / 10),
         max_steps=test.mock.ANY,
-        hooks=test.mock.ANY)
+        hooks=test.mock.ANY,
+        saving_listeners=test.mock.ANY)
 
   def test_continuous_train_and_eval_with_steps_per_iteration_from_user(self):
     mock_estimator = test.mock.Mock(core_estimator.Estimator)
@@ -768,7 +779,8 @@ class ExperimentTest(test.TestCase):
         input_fn='train_input',
         steps=1234,
         max_steps=test.mock.ANY,
-        hooks=test.mock.ANY)
+        hooks=test.mock.ANY,
+        saving_listeners=test.mock.ANY)
 
   def test_continuous_train_and_eval_with_default_steps_per_iteration(self):
     mock_estimator = test.mock.Mock(core_estimator.Estimator)
@@ -791,7 +803,8 @@ class ExperimentTest(test.TestCase):
         input_fn='train_input',
         steps=1000,
         max_steps=test.mock.ANY,
-        hooks=test.mock.ANY)
+        hooks=test.mock.ANY,
+        saving_listeners=test.mock.ANY)
 
   def test_continuous_train_and_eval_with_invalid_predicate_fn(self):
     for est in self._estimators_for_tests():
@@ -857,11 +870,19 @@ class ExperimentTest(test.TestCase):
           est,
           None if isinstance(est, core_estimator.Estimator) else 'export_input',
           exports_to_keep=None)
+      if isinstance(est, core_estimator.Estimator):
+        eval_metrics = None
+        saving_listeners = 'saving_listeners'
+      else:
+        eval_metrics = 'eval_metrics'
+        saving_listeners = None
       ex = experiment.Experiment(
           est,
           train_input_fn='train_input',
           eval_input_fn='eval_input',
-          export_strategies=(exp_strategy,))
+          export_strategies=(exp_strategy,),
+          eval_metrics=eval_metrics,
+          saving_listeners=saving_listeners)
       ex.test()
       self.assertEqual(1, est.fit_count)
       self.assertEqual(1, est.eval_count)
-- 
GitLab


From c6117cacf6ca91e442aea9bacb88a9444be60b33 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 14:42:24 -0800
Subject: [PATCH 0184/1225] Adds name_scope for each column in
 shared_embedding_columns.

PiperOrigin-RevId: 176563698
---
 .../python/feature_column/feature_column.py   | 84 +++++++++++++++----
 1 file changed, 69 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 55969c4b75..452f84192c 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -236,20 +236,29 @@ def input_layer(features,
       ordered_columns.append(column)
       with variable_scope.variable_scope(
           None, default_name=column._var_scope_name):  # pylint: disable=protected-access
-        tensor = column._get_dense_tensor(  # pylint: disable=protected-access
-            builder,
-            weight_collections=weight_collections,
-            trainable=trainable)
+        if column._var_scope_name == column.name:  # pylint: disable=protected-access
+          tensor = _get_dense_tensor(
+              column=column,
+              builder=builder,
+              weight_collections=weight_collections,
+              trainable=trainable)
+        else:
+          # This is typically the case for shared_embedding_columns. The
+          # embedding weights variable will be under the common variable_scope,
+          # but the ops for each column will be under a separate name_scope.
+          with ops.name_scope(column.name):
+            tensor = _get_dense_tensor(
+                column=column,
+                builder=builder,
+                weight_collections=weight_collections,
+                trainable=trainable)
+        output_tensors.append(tensor)
         if cols_to_vars is not None:
           # Retrieve any variables created (some _DenseColumn's don't create
           # variables, in which case an empty list is returned).
           cols_to_vars[column] = ops.get_collection(
               ops.GraphKeys.GLOBAL_VARIABLES,
               scope=variable_scope.get_variable_scope().name)
-        num_elements = column._variable_shape.num_elements()  # pylint: disable=protected-access
-        batch_size = array_ops.shape(tensor)[0]
-        tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements))
-        output_tensors.append(tensor)
     _verify_static_batch_size_equality(output_tensors, ordered_columns)
     return array_ops.concat(output_tensors, 1)
 
@@ -345,13 +354,26 @@ def linear_model(features,
       with variable_scope.variable_scope(
           None, default_name=column._var_scope_name):  # pylint: disable=protected-access
         ordered_columns.append(column)
-        if isinstance(column, _CategoricalColumn):
-          weighted_sum = _create_categorical_column_weighted_sum(
-              column, builder, units, sparse_combiner, weight_collections,
-              trainable)
+        if column._var_scope_name == column.name:  # pylint: disable=protected-access
+          weighted_sum = _create_weighted_sum(
+              column=column,
+              builder=builder,
+              units=units,
+              sparse_combiner=sparse_combiner,
+              weight_collections=weight_collections,
+              trainable=trainable)
         else:
-          weighted_sum = _create_dense_column_weighted_sum(
-              column, builder, units, weight_collections, trainable)
+          # This is typically the case for shared_embedding_columns. The
+          # embedding weights variable will be under the common variable_scope,
+          # but the ops for each column will be under a separate name_scope.
+          with ops.name_scope(column.name):
+            weighted_sum = _create_weighted_sum(
+                column=column,
+                builder=builder,
+                units=units,
+                sparse_combiner=sparse_combiner,
+                weight_collections=weight_collections,
+                trainable=trainable)
         weighted_sums.append(weighted_sum)
         if cols_to_vars is not None:
           # Retrieve the variables created.
@@ -1488,7 +1510,7 @@ class _FeatureColumn(object):
 
   @abc.abstractproperty
   def name(self):
-    """Returns string. Used for naming."""
+    """Returns string. Used for naming and for name_scope."""
     pass
 
   @property
@@ -1586,6 +1608,38 @@ class _DenseColumn(_FeatureColumn):
     pass
 
 
+def _get_dense_tensor(
+    column,
+    builder,
+    weight_collections,
+    trainable):
+  """Creates a dense Tensor for a _DenseColumn for input_layer."""
+  tensor = column._get_dense_tensor(  # pylint: disable=protected-access
+      builder,
+      weight_collections=weight_collections,
+      trainable=trainable)
+  num_elements = column._variable_shape.num_elements()  # pylint: disable=protected-access
+  batch_size = array_ops.shape(tensor)[0]
+  return array_ops.reshape(tensor, shape=(batch_size, num_elements))
+
+
+def _create_weighted_sum(
+    column,
+    builder,
+    units,
+    sparse_combiner,
+    weight_collections,
+    trainable):
+  """Creates a weighted sum for a dense or sparse column for linear_model."""
+  if isinstance(column, _CategoricalColumn):
+    return _create_categorical_column_weighted_sum(
+        column, builder, units, sparse_combiner, weight_collections,
+        trainable)
+  else:
+    return _create_dense_column_weighted_sum(
+        column, builder, units, weight_collections, trainable)
+
+
 def _create_dense_column_weighted_sum(
     column, builder, units, weight_collections, trainable):
   """Create a weighted sum of a dense column for linear_model."""
-- 
GitLab


From f0d1abbf2389aa2a29fe6fd090ba68ab6b8fd76f Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Tue, 21 Nov 2017 14:47:00 -0800
Subject: [PATCH 0185/1225] Pass VirtualCluster to tf_optimizer to enable
 cross-optimization of models.

PiperOrigin-RevId: 176564391
---
 .../grappler/costs/op_level_cost_estimator.cc |  3 +-
 tensorflow/core/grappler/optimizers/BUILD     |  1 +
 .../grappler/optimizers/layout_optimizer.cc   | 23 ++++-
 .../grappler/optimizers/layout_optimizer.h    |  4 -
 .../optimizers/layout_optimizer_test.cc       | 36 +++----
 tensorflow/python/BUILD                       |  3 +
 tensorflow/python/grappler/cluster.i          | 87 ++++++++++++++---
 tensorflow/python/grappler/cluster.py         | 30 +++---
 tensorflow/python/grappler/cluster_test.py    | 21 +++++
 .../python/grappler/layout_optimizer_test.py  | 93 +++++++++++--------
 tensorflow/python/grappler/tf_optimizer.i     | 16 +++-
 tensorflow/python/grappler/tf_optimizer.py    |  7 +-
 12 files changed, 226 insertions(+), 98 deletions(-)

diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index bd84331b67..f7905d7798 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -324,7 +324,8 @@ OpLevelCostEstimator::DeviceInfo OpLevelCostEstimator::GetDeviceInfo(
       // Maxwell
       cores_per_multiprocessor = 128;
     } else {
-      // Pascal
+      // Pascal (compute capability version 6) and Volta (compute capability
+      // version 7)
       cores_per_multiprocessor = 64;
     }
     gflops = device.num_cores() * device.frequency() * 1e-3 *
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 08344b0ada..e127556054 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -336,6 +336,7 @@ tf_cc_test(
         "//tensorflow/core:testlib",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:utils",
+        "//tensorflow/core/grappler/clusters:virtual_cluster",
     ],
 )
 
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index f186fdb895..aaa1b7a316 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -1404,6 +1404,24 @@ int GetNumTranspose(const GraphDef& graph) {
   LOG(INFO) << "Number of Transpose nodes: " << number;
   return number;
 }
+
+int GetNumGPUs(const Cluster& cluster) {
+  auto devices = cluster.GetDevices();
+  int num_gpus = 0;
+  for (const auto& device : devices) {
+    if (device.second.type() == "GPU") {
+      if (device.second.environment().find("architecture") !=
+          device.second.environment().end()) {
+        const string arch = device.second.environment().at("architecture");
+        // TODO(yaozhang): Enable for Volta GPUs (compute capability version 7).
+        if (arch < "7") {
+          num_gpus++;
+        }
+      }
+    }
+  }
+  return num_gpus;
+}
 }  // namespace
 
 Status LayoutOptimizer::Tune(const GrapplerItem& item,
@@ -1424,10 +1442,7 @@ Status LayoutOptimizer::Tune(const GrapplerItem& item,
 
 Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                  GraphDef* output) {
-  if (num_gpus_ == 0) {
-    num_gpus_ = GetNumAvailableGPUs();
-  }
-  if (num_gpus_ < 1) {
+  if (GetNumGPUs(*cluster) < 1) {
     // LayoutOptimizer is currently only tuned for GPU.
     *output = item.graph;
     return Status::OK();
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.h b/tensorflow/core/grappler/optimizers/layout_optimizer.h
index 621c286976..a22fadd9e7 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.h
@@ -29,9 +29,6 @@ class LayoutOptimizer : public GraphOptimizer {
 
   string name() const override { return "layout"; };
 
-  // This is for testing only.
-  void set_num_gpus(int num_gpus) { num_gpus_ = num_gpus; };
-
   struct TuningConfig {
     // If true, do not use the NHWC GEMM implementation. When filter size is
     // one or filter size is equal to input image size,
@@ -50,7 +47,6 @@ class LayoutOptimizer : public GraphOptimizer {
                 const GraphDef& optimize_output, double result) override;
 
  private:
-  int num_gpus_ = 0;
   Status Tune(const GrapplerItem& item, const GraphProperties& graph_properties,
               const string& default_device, const TuningConfig& config,
               GraphDef* output);
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index b760cf2ff2..156e6710a6 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -17,10 +17,12 @@ limitations under the License.
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/grappler/clusters/virtual_cluster.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/device_properties.pb.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -28,6 +30,13 @@ namespace {
 
 class LayoutOptimizerTest : public ::testing::Test {
  protected:
+  void SetUp() override {
+    DeviceProperties device_properties;
+    device_properties.set_type("GPU");
+    device_properties.mutable_environment()->insert({"architecture", "6"});
+    virtual_cluster_.reset(new VirtualCluster({{"/GPU:0", device_properties}}));
+  }
+
   Output SimpleConv2D(tensorflow::Scope* s, int input_size, int filter_size,
                       const string& padding) {
     int batch_size = 128;
@@ -99,6 +108,8 @@ class LayoutOptimizerTest : public ::testing::Test {
     CHECK(tensor.FromProto(node.attr().at({"value"}).tensor()));
     return tensor;
   }
+
+  std::unique_ptr<VirtualCluster> virtual_cluster_;
 };
 
 TEST_F(LayoutOptimizerTest, Conv2DBackpropInput) {
@@ -108,9 +119,9 @@ TEST_F(LayoutOptimizerTest, Conv2DBackpropInput) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
   LayoutOptimizer optimizer;
-  optimizer.set_num_gpus(1);
   GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
+
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   string input_name = AddPrefixToNodeName("Conv2DBackpropInput-InputSizes",
                                           "LayoutOptimizer", "-");
@@ -132,9 +143,8 @@ TEST_F(LayoutOptimizerTest, FilterSizeIsOne) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
   LayoutOptimizer optimizer;
-  optimizer.set_num_gpus(1);
   GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   EXPECT_FALSE(
       node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input"));
@@ -147,9 +157,8 @@ TEST_F(LayoutOptimizerTest, FilterSizeNotOne) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
   LayoutOptimizer optimizer;
-  optimizer.set_num_gpus(1);
   GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   EXPECT_FALSE(
       node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input"));
@@ -162,9 +171,8 @@ TEST_F(LayoutOptimizerTest, EqualSizeWithValidPadding) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
   LayoutOptimizer optimizer;
-  optimizer.set_num_gpus(1);
   GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   EXPECT_FALSE(
       node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input"));
@@ -177,9 +185,8 @@ TEST_F(LayoutOptimizerTest, EqualSizeWithSamePadding) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
   LayoutOptimizer optimizer;
-  optimizer.set_num_gpus(1);
   GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   EXPECT_TRUE(
       node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input-0"));
@@ -192,9 +199,8 @@ TEST_F(LayoutOptimizerTest, NotEqualSizeWithValidPadding) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
   LayoutOptimizer optimizer;
-  optimizer.set_num_gpus(1);
   GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   EXPECT_TRUE(
       node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input-0"));
@@ -209,9 +215,8 @@ TEST_F(LayoutOptimizerTest, Pad) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
   LayoutOptimizer optimizer;
-  optimizer.set_num_gpus(1);
   GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
 
   auto pad = node_map.GetNode("p");
@@ -246,9 +251,8 @@ TEST_F(LayoutOptimizerTest, Connectivity) {
   auto node_i2 = node_map_original.GetNode("i2");
   node_i2->Swap(node_i1);
   LayoutOptimizer optimizer;
-  optimizer.set_num_gpus(1);
   GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map_output(&output);
   auto node_i2_output = node_map_output.GetNode("i2");
   // Layout optimizer should process i2, as it detects i2 is connected with the
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 590dbcd462..5ae4aace16 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3042,7 +3042,9 @@ tf_py_wrap_cc(
         "//tensorflow/core/distributed_runtime/rpc:grpc_session",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:grappler_item_builder",
+        "//tensorflow/core/grappler/clusters:cluster",
         "//tensorflow/core/grappler/clusters:single_machine",
+        "//tensorflow/core/grappler/clusters:virtual_cluster",
         "//tensorflow/core/grappler/costs:graph_memory",
         "//tensorflow/core/grappler/optimizers:meta_optimizer",
         "//tensorflow/core:lib",
@@ -4408,6 +4410,7 @@ cuda_py_test(
         ":nn",
         ":ops",
         ":random_ops",
+        ":tf_cluster",
         ":tf_optimizer",
         ":training",
         "//third_party/py/numpy",
diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i
index 1e06074188..1838c40e46 100644
--- a/tensorflow/python/grappler/cluster.i
+++ b/tensorflow/python/grappler/cluster.i
@@ -15,6 +15,37 @@ limitations under the License.
 
 %include "tensorflow/python/platform/base.i"
 
+%{
+#include "tensorflow/core/protobuf/device_properties.pb.h"
+
+template <>
+bool _PyObjAs(PyObject *input, tensorflow::NamedDevice *out) {
+  char* c_string;
+  Py_ssize_t py_size;
+  if (PyBytes_AsStringAndSize(input, &c_string, &py_size) == -1) {
+    // Python has raised an error (likely TypeError or UnicodeEncodeError).
+    return false;
+  }
+
+  tensorflow::NamedDevice named_device;
+  if (!named_device.ParseFromString(string(c_string, py_size))) {
+    PyErr_SetString(
+        PyExc_TypeError,
+        "The NamedDevice could not be parsed as a valid protocol buffer");
+    return false;
+  }
+  if (out) *out = named_device;
+  return true;
+}
+%}
+
+%typemap(in) const std::vector<tensorflow::NamedDevice>& (std::vector<tensorflow::NamedDevice> temp) {
+  if (!tf_vector_input_helper($input, &temp, &_PyObjAs<tensorflow::NamedDevice>)) {
+    SWIG_fail;
+  }
+  $1 = &temp;
+}
+
 %typemap(in) const tensorflow::RunMetadata& (tensorflow::RunMetadata temp) {
   char* c_string;
   Py_ssize_t py_size;
@@ -26,7 +57,7 @@ limitations under the License.
   if (!temp.ParseFromString(string(c_string, py_size))) {
     PyErr_SetString(
         PyExc_TypeError,
-        "The MetaGraphDef could not be parsed as a valid protocol buffer");
+        "The RunMetadata could not be parsed as a valid protocol buffer");
     SWIG_fail;
   }
   $1 = &temp;
@@ -44,6 +75,7 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/core/grappler/devices.h"
 #include "tensorflow/core/grappler/clusters/single_machine.h"
+#include "tensorflow/core/grappler/clusters/virtual_cluster.h"
 #include "tensorflow/core/grappler/costs/graph_memory.h"
 #include "tensorflow/core/grappler/costs/op_performance_data.pb.h"
 #include "tensorflow/core/grappler/costs/measuring_cost_estimator.h"
@@ -51,12 +83,14 @@ limitations under the License.
 #include "tensorflow/core/protobuf/device_properties.pb.h"
 
 static tensorflow::grappler::Cluster* TF_NewCluster(
-    bool allow_soft_placement, bool disable_detailed_stats, TF_Status* out_status) {
+    bool allow_soft_placement,
+    bool disable_detailed_stats, TF_Status* out_status) {
   int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores();
   int num_gpus = tensorflow::grappler::GetNumAvailableGPUs();;
   int timeout_s = 60 * 10;
-  tensorflow::grappler::Cluster* cluster = new tensorflow::grappler::SingleMachine(
-      timeout_s, num_cpu_cores, num_gpus);
+  tensorflow::grappler::Cluster* cluster =
+      new tensorflow::grappler::SingleMachine(
+          timeout_s, num_cpu_cores, num_gpus);
   cluster->DisableDetailedStats(disable_detailed_stats);
   cluster->AllowSoftPlacement(allow_soft_placement);
   tensorflow::Status status = cluster->Provision();
@@ -64,15 +98,30 @@ static tensorflow::grappler::Cluster* TF_NewCluster(
   return cluster;
 }
 
+static tensorflow::grappler::Cluster* TF_NewVirtualCluster(
+    const std::vector<tensorflow::NamedDevice>& named_devices,
+    TF_Status* out_status) {
+  std::unordered_map<string, tensorflow::DeviceProperties> devices;
+  for (const auto& named_device : named_devices) {
+    devices[named_device.name()]= named_device.properties();
+  }
+  tensorflow::grappler::Cluster* cluster =
+      new tensorflow::grappler::VirtualCluster(devices);
+  tensorflow::Status status = cluster->Provision();
+  tensorflow::Set_TF_Status_from_Status(out_status, status);
+  return cluster;
+}
+
 static void TF_DeleteCluster(tensorflow::grappler::Cluster* cluster) {
   cluster->Shutdown();
   delete cluster;
 }
 
-tensorflow::Status _GetOpPerformanceDataAndRunTime(const tensorflow::grappler::GrapplerItem& item,
-                                       tensorflow::grappler::CostEstimator* cost_measure,
-                                       tensorflow::OpPerformanceList* op_performance_data,
-                                       tensorflow::grappler::Costs* costs) {
+tensorflow::Status _GetOpPerformanceDataAndRunTime(
+    const tensorflow::grappler::GrapplerItem& item,
+    tensorflow::grappler::CostEstimator* cost_measure,
+    tensorflow::OpPerformanceList* op_performance_data,
+    tensorflow::grappler::Costs* costs) {
   tensorflow::Status status = cost_measure->Initialize(item);
   if (!status.ok()) return status;
 
@@ -105,7 +154,8 @@ static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) {
 }
 
 static PyObject* TF_MeasureCosts(
-    const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster,
+    const tensorflow::grappler::GrapplerItem* item,
+    tensorflow::grappler::Cluster* cluster,
     bool generate_timeline, TF_Status* out_status) {
   tensorflow::OpPerformanceList op_performance_data;
   tensorflow::StepStats step_stats;
@@ -113,15 +163,16 @@ static PyObject* TF_MeasureCosts(
   tensorflow::grappler::MeasuringCostEstimator cost_measure(cluster, 10, 0);
 
   tensorflow::grappler::Costs costs;
-  tensorflow::Status status = _GetOpPerformanceDataAndRunTime(*item, &cost_measure,
-                                                 &op_performance_data, &costs);
+  tensorflow::Status status = _GetOpPerformanceDataAndRunTime(
+      *item, &cost_measure, &op_performance_data, &costs);
   double run_time = FLT_MAX;
   if (status.ok()) {
     run_time = static_cast<double>(costs.execution_time.count()) / 1e9;
   }
   if (generate_timeline) {
     tensorflow::RunMetadata metadata;
-    tensorflow::Status s = cluster->Run(item->graph, item->feed, item->fetch, &metadata);
+    tensorflow::Status s = cluster->Run(
+        item->graph, item->feed, item->fetch, &metadata);
     if (s.ok()) {
       step_stats = metadata.step_stats();
     } else {
@@ -133,9 +184,11 @@ static PyObject* TF_MeasureCosts(
   if (!status.ok()) {
     Py_RETURN_NONE;
   }
-  PyObject* op_perf_objs = PyList_New(op_performance_data.op_performance_size());
+  PyObject* op_perf_objs = PyList_New(
+      op_performance_data.op_performance_size());
   for (int i = 0; i < op_performance_data.op_performance_size(); i++) {
-    string op_perf_str = op_performance_data.op_performance(i).SerializeAsString();
+    string op_perf_str =
+        op_performance_data.op_performance(i).SerializeAsString();
     PyObject* op_perf_obj = PyBytes_FromStringAndSize(op_perf_str.data(),
                                                       op_perf_str.size());
     PyList_SetItem(op_perf_objs, i, op_perf_obj);
@@ -165,7 +218,8 @@ static PyObject* TF_MeasureCosts(
 
 
 static PyObject* TF_DeterminePeakMemoryUsage(
-    const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster,
+    const tensorflow::grappler::GrapplerItem* item,
+    tensorflow::grappler::Cluster* cluster,
     TF_Status* out_status) {
   if (!item || !cluster) {
     tensorflow::Status status(tensorflow::error::Code::INTERNAL,
@@ -216,6 +270,9 @@ static PyObject* TF_DeterminePeakMemoryUsage(
 
 static tensorflow::grappler::Cluster* TF_NewCluster(
     bool allow_soft_placement, bool disable_detailed_stats, TF_Status* out_status);
+static tensorflow::grappler::Cluster* TF_NewVirtualCluster(
+    const std::vector<tensorflow::NamedDevice>& named_devices,
+    TF_Status* out_status);
 static void TF_DeleteCluster(tensorflow::grappler::Cluster* cluster);
 static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster);
 static PyObject* TF_MeasureCosts(
diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py
index 58c7bbbac1..9864e86811 100644
--- a/tensorflow/python/grappler/cluster.py
+++ b/tensorflow/python/grappler/cluster.py
@@ -31,21 +31,29 @@ class Cluster(object):
   def __init__(self,
                allow_soft_placement=True,
                disable_detailed_stats=True,
-               disable_timeline=True):
+               disable_timeline=True,
+               devices=None):
     """Creates a Cluster.
 
     Args:
-      allow_soft_placement: if True, TF will automatically fix illegal
+      allow_soft_placement: If True, TF will automatically fix illegal
         placements instead of erroring out if the placement isn't legal.
-      disable_detailed_stats: if True, detailed statistics will not be
+      disable_detailed_stats: If True, detailed statistics will not be
         available.
-      disable_timeline: if True, the timeline information will not be
-        reported.
+      disable_timeline: If True, the timeline information will not be reported.
+      devices: A list of devices of type device_properties_pb2.NamedDevice.
+        If None, a device list will be created based on the spec of
+        the local machine.
     """
     self._tf_cluster = None
     with errors.raise_exception_on_not_ok_status() as status:
-      self._tf_cluster = tf_cluster.TF_NewCluster(
-          allow_soft_placement, disable_detailed_stats, status)
+      if devices is None:
+        self._tf_cluster = tf_cluster.TF_NewCluster(
+            allow_soft_placement, disable_detailed_stats, status)
+      else:
+        devices_serialized = [device.SerializeToString() for device in devices]
+        self._tf_cluster = tf_cluster.TF_NewVirtualCluster(
+            devices_serialized, status)
     self._generate_timeline = not disable_timeline
 
   def __del__(self):
@@ -71,8 +79,8 @@ class Cluster(object):
     """Returns the cost of running the specified item.
 
     Args:
-      item: the item for which to measure the costs.
-    Returns: the triplet op_perfs, runtime, step_stats.
+      item: The item for which to measure the costs.
+    Returns: The triplet op_perfs, runtime, step_stats.
     """
     with errors.raise_exception_on_not_ok_status() as status:
       ret_from_swig = tf_cluster.TF_MeasureCosts(
@@ -93,8 +101,8 @@ class Cluster(object):
     """Returns a snapshot of the peak memory usage.
 
     Args:
-      item: the item for which to measure the costs.
-    Returns: a hashtable indexed by device name.
+      item: The item for which to measure the costs.
+    Returns: A hashtable indexed by device name.
     """
     with errors.raise_exception_on_not_ok_status() as status:
       ret_from_swig = tf_cluster.TF_DeterminePeakMemoryUsage(
diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py
index de4ded571f..a71a860a59 100644
--- a/tensorflow/python/grappler/cluster_test.py
+++ b/tensorflow/python/grappler/cluster_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.core.protobuf import device_properties_pb2
 from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.grappler import cluster
@@ -82,6 +83,26 @@ class ClusterTest(test.TestCase):
         live_tensors = snapshot[1]
         self.assertEqual(15, len(live_tensors))
 
+  def testVirtualCluster(self):
+    with ops.Graph().as_default() as g:
+      a = random_ops.random_uniform(shape=())
+      b = random_ops.random_uniform(shape=())
+      c = a + b
+      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+      train_op.append(c)
+      mg = meta_graph.create_meta_graph_def(graph=g)
+      grappler_item = item.Item(mg)
+      device_properties = device_properties_pb2.DeviceProperties(
+          type='GPU', environment={
+              'architecture': '7'
+          })
+      named_device = device_properties_pb2.NamedDevice(
+          properties=device_properties, name='/GPU:0')
+      grappler_cluster = cluster.Cluster(devices=[named_device])
+      op_perfs, run_time, _ = grappler_cluster.MeasureCosts(grappler_item)
+      self.assertGreater(run_time, 0)
+      self.assertEqual(len(op_perfs), 15)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 99a4d23b6a..350c8434ce 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import device_properties_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.core.protobuf import saver_pb2
 from tensorflow.python.client import session
@@ -28,6 +29,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
+from tensorflow.python.grappler import cluster as gcluster
 from tensorflow.python.grappler import tf_optimizer
 from tensorflow.python.layers import convolutional as conv_layers
 from tensorflow.python.ops import array_ops
@@ -41,53 +43,53 @@ from tensorflow.python.training import gradient_descent
 from tensorflow.python.training import saver as saver_lib
 
 
-def weight(shape):
-  """weights generates a weight of a given shape."""
+def _weight(shape):
+  """Generates a weight of a given shape."""
   return random_ops.truncated_normal(shape, seed=0, stddev=0.1)
 
 
-def bias(shape):
-  """bias generates a bias of a given shape."""
+def _bias(shape):
+  """Generates a bias of a given shape."""
   return constant_op.constant(0.1, shape=shape)
 
 
-def conv2d(x, w):
-  """conv2d returns a 2d convolution layer with full stride."""
+def _conv2d(x, w):
+  """Returns a 2d convolution layer with full stride."""
   return nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
 
 
-def max_pool_2x2(x):
-  """max_pool_2x2 downsamples a feature map by 2X."""
+def _max_pool_2x2(x):
+  """Downsamples a feature map by 2X."""
   return nn.max_pool(
       x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 
 
 # Taken from tensorflow/examples/tutorials/mnist/mnist_deep.py
-def two_layer_model(x):
+def _two_layer_model(x):
   x_image = array_ops.reshape(x, [-1, 28, 28, 1])
-  w_conv1 = weight([5, 5, 1, 32])
-  b_conv1 = bias([32])
-  h_conv1 = nn.relu(conv2d(x_image, w_conv1) + b_conv1)
-  h_pool1 = max_pool_2x2(h_conv1)
-  w_conv2 = weight([5, 5, 32, 64])
-  b_conv2 = bias([64])
-  h_conv2 = nn.relu(conv2d(h_pool1, w_conv2) + b_conv2)
-  h_pool2 = max_pool_2x2(h_conv2)
+  w_conv1 = _weight([5, 5, 1, 32])
+  b_conv1 = _bias([32])
+  h_conv1 = nn.relu(_conv2d(x_image, w_conv1) + b_conv1)
+  h_pool1 = _max_pool_2x2(h_conv1)
+  w_conv2 = _weight([5, 5, 32, 64])
+  b_conv2 = _bias([64])
+  h_conv2 = nn.relu(_conv2d(h_pool1, w_conv2) + b_conv2)
+  h_pool2 = _max_pool_2x2(h_conv2)
   return h_pool2
 
 
-def loop():
+def _loop():
   random_seed.set_random_seed(0)
   x1 = random_ops.truncated_normal([1, 784], seed=0)
   x2 = random_ops.truncated_normal([1, 784], seed=0)
   x3 = random_ops.truncated_normal([1, 784], seed=0)
   x4 = random_ops.truncated_normal([1, 784], seed=0)
   elems = (x1, x2, x3, x4)
-  outputs = functional_ops.map_fn(two_layer_model, elems, dtype=dtypes.float32)
+  outputs = functional_ops.map_fn(_two_layer_model, elems, dtype=dtypes.float32)
   return outputs
 
 
-def get_config(layout_optimizer=True):
+def _get_config(layout_optimizer=True):
   if layout_optimizer:
     rewrite_options = rewriter_config_pb2.RewriterConfig(
         layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
@@ -100,6 +102,20 @@ def get_config(layout_optimizer=True):
   return config
 
 
+def _simple_metagraph():
+  random_seed.set_random_seed(0)
+  x = variables.Variable(random_ops.truncated_normal([1, 200, 200, 3], seed=0))
+  y = conv_layers.conv2d(x, 32, [3, 3])
+  z = conv_layers.conv2d(y, 32, [3, 3])
+  optimizer = gradient_descent.GradientDescentOptimizer(1e-4)
+  loss = math_ops.reduce_mean(z)
+  train_op = optimizer.minimize(loss)
+  graph = ops.get_default_graph()
+  graph.add_to_collection('train_op', train_op)
+  meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def())
+  return meta_graph
+
+
 class LayoutOptimizerTest(test.TestCase):
   """Tests the Grappler layout optimizer."""
 
@@ -107,7 +123,7 @@ class LayoutOptimizerTest(test.TestCase):
     ops.reset_default_graph()
     graph = ops.get_default_graph()
     with session.Session(
-        config=get_config(layout_optimizer), graph=graph) as sess:
+        config=_get_config(layout_optimizer), graph=graph) as sess:
       batch = 2
       height = 6
       width = 7
@@ -142,12 +158,12 @@ class LayoutOptimizerTest(test.TestCase):
     if test.is_gpu_available(cuda_only=True):
       random_seed.set_random_seed(0)
       x = random_ops.truncated_normal([1, 784], seed=0)
-      output = two_layer_model(x)
+      output = _two_layer_model(x)
 
       with session.Session() as sess:
         output_val_ref = sess.run(output)
 
-      with session.Session(config=get_config()) as sess:
+      with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
         output_val = sess.run(output, run_metadata=metadata)
 
@@ -171,36 +187,28 @@ class LayoutOptimizerTest(test.TestCase):
 
   def testLoop(self):
     if test.is_gpu_available(cuda_only=True):
-      output = loop()
+      output = _loop()
 
       with session.Session() as sess:
         output_val_ref = sess.run(output)
 
-      with session.Session(config=get_config()) as sess:
+      with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
         output_val = sess.run(output, run_metadata=metadata)
 
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
   def testGradient(self):
-    if not test.is_gpu_available(cuda_only=True):
-      self.skipTest('GPU required')
-
-    random_seed.set_random_seed(0)
-    x = variables.Variable(
-        random_ops.truncated_normal([1, 200, 200, 3], seed=0))
-    y = conv_layers.conv2d(x, 32, [3, 3])
-    z = conv_layers.conv2d(y, 32, [3, 3])
-    optimizer = gradient_descent.GradientDescentOptimizer(1e-4)
-    loss = math_ops.reduce_mean(z)
-    train_op = optimizer.minimize(loss)
-    graph = ops.get_default_graph()
-    graph.add_to_collection('train_op', train_op)
-    meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def())
-
+    meta_graph = _simple_metagraph()
     rewrite_options = rewriter_config_pb2.RewriterConfig(
         layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
-    optimized_graph = tf_optimizer.OptimizeGraph(rewrite_options, meta_graph)
+    named_device = device_properties_pb2.NamedDevice()
+    named_device.name = '/GPU:0'
+    named_device.properties.type = 'GPU'
+    named_device.properties.environment['architecture'] = '4'
+    cluster = gcluster.Cluster(devices=[named_device])
+    optimized_graph = tf_optimizer.OptimizeGraph(
+        rewrite_options, meta_graph, cluster=cluster)
 
     found = 0
     for node in optimized_graph.node:
@@ -210,6 +218,9 @@ class LayoutOptimizerTest(test.TestCase):
     self.assertEqual(found, 5)
 
   def testCheckpointCompatibility(self):
+    if not test.is_gpu_available(cuda_only=True):
+      self.skipTest('GPU required')
+
     checkpoint_path = self.get_temp_dir()
     self._train(checkpoint_path)
     vars_expected = self._train(checkpoint_path, restore=True)
diff --git a/tensorflow/python/grappler/tf_optimizer.i b/tensorflow/python/grappler/tf_optimizer.i
index 719ddaae21..3965c65bb9 100644
--- a/tensorflow/python/grappler/tf_optimizer.i
+++ b/tensorflow/python/grappler/tf_optimizer.i
@@ -62,6 +62,7 @@ limitations under the License.
   #include "tensorflow/core/framework/graph.pb.h"
   #include "tensorflow/core/grappler/grappler_item.h"
   #include "tensorflow/core/grappler/grappler_item_builder.h"
+  #include "tensorflow/core/grappler/clusters/cluster.h"
   #include "tensorflow/core/grappler/clusters/utils.h"
   #include "tensorflow/core/grappler/clusters/virtual_cluster.h"
   #include "tensorflow/core/grappler/optimizers/meta_optimizer.h"
@@ -91,6 +92,7 @@ void DetectDevices(std::unordered_map<string, tensorflow::DeviceProperties>* dev
 }
 
 PyObject* TF_OptimizeGraph(
+      tensorflow::grappler::Cluster* cluster,
       const tensorflow::RewriterConfig& rewriter_config,
       const tensorflow::MetaGraphDef& metagraph,
       bool verbose, const string& graph_id, TF_Status* out_status) {
@@ -99,13 +101,18 @@ PyObject* TF_OptimizeGraph(
     item_config.apply_optimizations = false;
     std::unique_ptr<tensorflow::grappler::GrapplerItem> grappler_item =
         tensorflow::grappler::GrapplerItemFromMetaGraphDef(graph_id, metagraph, item_config);
-    std::unordered_map<string, tensorflow::DeviceProperties> device_map;
-    DetectDevices(&device_map);
+
+    std::unique_ptr<tensorflow::grappler::VirtualCluster> virtual_cluster;
+    if (cluster == nullptr) {
+      std::unordered_map<string, tensorflow::DeviceProperties> device_map;
+      DetectDevices(&device_map);
+      virtual_cluster.reset(new tensorflow::grappler::VirtualCluster(device_map));
+      cluster = virtual_cluster.get();
+    }
     tensorflow::DeviceBase* cpu_device = nullptr;
-    tensorflow::grappler::VirtualCluster cluster(device_map);
     tensorflow::GraphDef out_graph;
     tensorflow::grappler::MetaOptimizer optimizer(cpu_device, rewriter_config);
-    tensorflow::Status status = optimizer.Optimize(&cluster, *grappler_item, &out_graph);
+    tensorflow::Status status = optimizer.Optimize(cluster, *grappler_item, &out_graph);
     if (verbose) {
       optimizer.PrintResult();
     }
@@ -120,6 +127,7 @@ PyObject* TF_OptimizeGraph(
 
 // Wrap this function
 PyObject* TF_OptimizeGraph(
+    tensorflow::grappler::Cluster* cluster,
     const tensorflow::RewriterConfig& rewriter_config,
     const tensorflow::MetaGraphDef& metagraph, bool verbose,
     const string& graph_id, TF_Status* out_status);
diff --git a/tensorflow/python/grappler/tf_optimizer.py b/tensorflow/python/grappler/tf_optimizer.py
index 1c608ce319..d430dd9e2f 100644
--- a/tensorflow/python/grappler/tf_optimizer.py
+++ b/tensorflow/python/grappler/tf_optimizer.py
@@ -26,10 +26,13 @@ from tensorflow.python.framework import errors
 def OptimizeGraph(rewriter_config,
                   metagraph,
                   verbose=True,
-                  graph_id=b'graph_to_optimize'):
+                  graph_id=b'graph_to_optimize',
+                  cluster=None):
   """Optimize the provided metagraph."""
   with errors.raise_exception_on_not_ok_status() as status:
-    ret_from_swig = tf_opt.TF_OptimizeGraph(rewriter_config.SerializeToString(),
+    ret_from_swig = tf_opt.TF_OptimizeGraph(None if cluster is None else
+                                            cluster.tf_cluster,
+                                            rewriter_config.SerializeToString(),
                                             metagraph.SerializeToString(),
                                             verbose, graph_id, status)
   if ret_from_swig is None:
-- 
GitLab


From db8447528c1f7d6055d9a0145aa35bbea7bfd810 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 14:47:49 -0800
Subject: [PATCH 0186/1225] Removed forward declarations of
 tensorflow::StringPiece so that it may more easily be replaced with
 absl::string_view.

PiperOrigin-RevId: 176564520
---
 tensorflow/core/lib/io/path.h                | 20 ++++++++++----------
 tensorflow/core/lib/io/proto_encode_helper.h |  2 +-
 tensorflow/core/lib/strings/ordered_code.h   |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h
index 955098f5b5..8d02baa5bb 100644
--- a/tensorflow/core/lib/io/path.h
+++ b/tensorflow/core/lib/io/path.h
@@ -20,10 +20,9 @@ limitations under the License.
 #include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
-class StringPiece;
 namespace io {
 namespace internal {
-string JoinPathImpl(std::initializer_list<StringPiece> paths);
+string JoinPathImpl(std::initializer_list<tensorflow::StringPiece> paths);
 }
 
 // Utility routines for processing filenames
@@ -50,20 +49,20 @@ string JoinPath(const T&... args) {
 #endif /* SWIG */
 
 // Return true if path is absolute.
-bool IsAbsolutePath(StringPiece path);
+bool IsAbsolutePath(tensorflow::StringPiece path);
 
 // Returns the part of the path before the final "/".  If there is a single
 // leading "/" in the path, the result will be the leading "/".  If there is
 // no "/" in the path, the result is the empty prefix of the input.
-StringPiece Dirname(StringPiece path);
+tensorflow::StringPiece Dirname(tensorflow::StringPiece path);
 
 // Returns the part of the path after the final "/".  If there is no
 // "/" in the path, the result is the same as the input.
-StringPiece Basename(StringPiece path);
+tensorflow::StringPiece Basename(tensorflow::StringPiece path);
 
 // Returns the part of the basename of path after the final ".".  If
 // there is no "." in the basename, the result is empty.
-StringPiece Extension(StringPiece path);
+tensorflow::StringPiece Extension(tensorflow::StringPiece path);
 
 // Collapse duplicate "/"s, resolve ".." and "." path elements, remove
 // trailing "/".
@@ -72,7 +71,7 @@ StringPiece Extension(StringPiece path);
 // invoke any system calls (getcwd(2)) in order to resolve relative
 // paths with respect to the actual working directory.  That is, this is purely
 // string manipulation, completely independent of process state.
-string CleanPath(StringPiece path);
+string CleanPath(tensorflow::StringPiece path);
 
 // Populates the scheme, host, and path from a URI. scheme, host, and path are
 // guaranteed by this function to point into the contents of uri, even if
@@ -82,12 +81,13 @@ string CleanPath(StringPiece path);
 // - If the URI is invalid, scheme and host are set to empty strings and the
 //   passed string is assumed to be a path
 // - If the URI omits the path (e.g. file://host), then the path is left empty.
-void ParseURI(StringPiece uri, StringPiece* scheme, StringPiece* host,
-              StringPiece* path);
+void ParseURI(tensorflow::StringPiece uri, tensorflow::StringPiece* scheme,
+              tensorflow::StringPiece* host, tensorflow::StringPiece* path);
 
 // Creates a URI from a scheme, host, and path. If the scheme is empty, we just
 // return the path.
-string CreateURI(StringPiece scheme, StringPiece host, StringPiece path);
+string CreateURI(tensorflow::StringPiece scheme, tensorflow::StringPiece host,
+                 tensorflow::StringPiece path);
 
 }  // namespace io
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/io/proto_encode_helper.h b/tensorflow/core/lib/io/proto_encode_helper.h
index 5d30dda901..f70e1cbaab 100644
--- a/tensorflow/core/lib/io/proto_encode_helper.h
+++ b/tensorflow/core/lib/io/proto_encode_helper.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_LIB_IO_PROTO_ENCODE_HELPER_H_
 
 #include "tensorflow/core/lib/core/coding.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 // A helper class for appending various kinds of values in protocol
@@ -24,7 +25,6 @@ limitations under the License.
 // a buffer and a maximum size guarantee for the number of bytes they
 // will add to this buffer.
 namespace tensorflow {
-class StringPiece;
 namespace io {
 
 class ProtoEncodeHelper {
diff --git a/tensorflow/core/lib/strings/ordered_code.h b/tensorflow/core/lib/strings/ordered_code.h
index ce823c3f87..91870cfec6 100644
--- a/tensorflow/core/lib/strings/ordered_code.h
+++ b/tensorflow/core/lib/strings/ordered_code.h
@@ -39,11 +39,11 @@ limitations under the License.
 #define TENSORFLOW_LIB_STRINGS_ORDERED_CODE_H__
 
 #include <string>
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
-class StringPiece;
 
 namespace strings {
 
-- 
GitLab


From 54dec6e7f5a790460d54ae68568fa5546942b1fe Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Mon, 20 Nov 2017 19:38:01 -0800
Subject: [PATCH 0187/1225] Have tf-nightly depend on tb-nightly

TensorBoard now has an automated nightly release process!
---
 tensorflow/tools/pip_package/setup.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index a493c6f2aa..c18f20910a 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -37,7 +37,7 @@ REQUIRED_PACKAGES = [
     'numpy >= 1.12.1',
     'six >= 1.10.0',
     'protobuf >= 3.4.0',
-    'tensorflow-tensorboard >= 0.4.0rc1, < 0.5.0',
+    'tensorflow-tensorboard',
 ]
 
 project_name = 'tensorflow'
@@ -55,11 +55,11 @@ else:
   # mock comes with unittest.mock for python3, need to install for python2
   REQUIRED_PACKAGES.append('mock >= 2.0.0')
 
-# remove tensorboard from tf-nightly packages
+# tf-nightly should depend on tb-nightly
 if 'tf_nightly' in project_name:
-  for package in REQUIRED_PACKAGES:
-    if 'tensorflow-tensorboard' in package:
-      REQUIRED_PACKAGES.remove(package)
+  for i, pkg in enumerate(REQUIRED_PACKAGES):
+    if 'tensorboard' in pkg:
+      REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.5.0a0, < 1.6.0a0'
       break
 
 # weakref.finalize was introduced in Python 3.4
@@ -76,13 +76,13 @@ CONSOLE_SCRIPTS = [
     # is now declared by the tensorboard pip package. If we remove the
     # TensorBoard command, pip will inappropriately remove it during install,
     # even though the command is not removed, just moved to a different wheel.
-    'tensorboard = tensorboard.main:main',
+    'tensorboard = tensorboard.main:run_main',
 ]
 # pylint: enable=line-too-long
 
 # remove the tensorboard console script if building tf_nightly
 if 'tf_nightly' in project_name:
-  CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:main')
+  CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main')
 
 TEST_PACKAGES = [
     'scipy >= 0.15.1',
-- 
GitLab


From 34a96722c9d3ee53ed3be9db5522307637877d29 Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Tue, 21 Nov 2017 15:10:21 -0800
Subject: [PATCH 0188/1225] Add the first e2e scalar test with bfloat16.

This test doesn't pass yet, but it's good to use it to drive future development work.

PiperOrigin-RevId: 176568226
---
 tensorflow/compiler/xla/tests/BUILD           | 32 ++++++++
 .../compiler/xla/tests/bfloat16_test.cc       | 75 +++++++++++++++++++
 .../xla/tests/client_library_test_base.h      |  1 +
 .../compiler/xla/tests/literal_test_util.cc   | 10 +++
 4 files changed, 118 insertions(+)
 create mode 100644 tensorflow/compiler/xla/tests/bfloat16_test.cc

diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index c64d5aca4f..2e220e7293 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -769,6 +769,38 @@ xla_test(
     ],
 )
 
+xla_test(
+    name = "bfloat16_test",
+    srcs = ["bfloat16_test.cc"],
+    shard_count = 40,
+    deps = [
+        ":test_utils",
+        "//tensorflow/compiler/xla:array2d",
+        "//tensorflow/compiler/xla:array4d",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:reference_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:test_helpers",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:computation",
+        "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/client:global_data",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/client/lib:arithmetic",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 xla_test(
     name = "slice_test",
     srcs = ["slice_test.cc"],
diff --git a/tensorflow/compiler/xla/tests/bfloat16_test.cc b/tensorflow/compiler/xla/tests/bfloat16_test.cc
new file mode 100644
index 0000000000..26e2b1a95b
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/bfloat16_test.cc
@@ -0,0 +1,75 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cmath>
+#include <memory>
+#include <vector>
+
+#include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/array4d.h"
+#include "tensorflow/compiler/xla/client/computation.h"
+#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+#include "tensorflow/compiler/xla/client/local_client.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/reference_util.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/test_helpers.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/compiler/xla/tests/test_utils.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace {
+
+class Bfloat16Test : public ClientLibraryTestBase {
+ protected:
+  const ErrorSpec error_spec_{0.001, 0.001};
+};
+
+XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL(
+                             DISABLED_ON_CPU(ScalarOperation)))) {
+  ComputationBuilder builder(client_, TestName());
+  auto x = builder.ConstantR0<bfloat16>(static_cast<bfloat16>(2.0f));
+  auto y = builder.ConstantR0<bfloat16>(static_cast<bfloat16>(1.0f));
+  builder.Add(x, y);
+
+  ComputeAndCompareR0<bfloat16>(&builder, static_cast<bfloat16>(3.0f), {},
+                                error_spec_);
+}
+
+XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL(
+                             DISABLED_ON_CPU(NegateScalarF16)))) {
+  ComputationBuilder builder(client_, TestName());
+  builder.Neg(builder.ConstantR0<bfloat16>(static_cast<bfloat16>(2.1f)));
+
+  ComputeAndCompareR0<bfloat16>(&builder, static_cast<bfloat16>(-2.1f), {},
+                                error_spec_);
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h
index 1dc274c591..af22c12684 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.h
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.h
@@ -333,6 +333,7 @@ void ClientLibraryTestBase::ComputeAndCompareR0(
     tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) {
   static_assert(std::is_same<NativeT, float>::value ||
                     std::is_same<NativeT, double>::value ||
+                    std::is_same<NativeT, bfloat16>::value ||
                     std::is_same<NativeT, complex64>::value,
                 "Float or complex type required when specifying an ErrorSpec");
   std::unique_ptr<Literal> expected_literal =
diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc
index 75c9a0d3fb..9ae5c7b6f0 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util.cc
+++ b/tensorflow/compiler/xla/tests/literal_test_util.cc
@@ -340,6 +340,9 @@ class NearComparator {
     multi_index_.resize(expected.shape().dimensions_size(), 0);
 
     switch (expected.shape().element_type()) {
+      case BF16:
+        ExpectLiteralsNear<bfloat16>(expected, actual, 0);
+        break;
       case F32:
         ExpectLiteralsNear<float>(expected, actual, 0);
         break;
@@ -525,6 +528,13 @@ void NearComparator::ExpectNear<complex64>(complex64 expected, complex64 actual,
       << message;
 }
 
+template <>
+bool NearComparator::ExpectValuesNear<bfloat16>(bfloat16 expected,
+                                                bfloat16 actual) {
+  return ExpectValuesNear(static_cast<float>(expected),
+                          static_cast<float>(actual));
+}
+
 }  // namespace
 
 /* static */ ::testing::AssertionResult LiteralTestUtil::Near(
-- 
GitLab


From cc003b7315b30a66567f749b35c120f5af768615 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 15:27:22 -0800
Subject: [PATCH 0189/1225] Remove vestigial utils.setdefault function.

PiperOrigin-RevId: 176570863
---
 .../contrib/kfac/python/ops/layer_collection.py      | 12 ++++++------
 tensorflow/contrib/kfac/python/ops/utils.py          |  7 -------
 tensorflow/contrib/kfac/python/ops/utils_lib.py      |  1 -
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py
index 04f5a869bd..d8781231ed 100644
--- a/tensorflow/contrib/kfac/python/ops/layer_collection.py
+++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py
@@ -620,9 +620,9 @@ class LayerCollection(object):
            "LayerCollection.fisher_factors. The pair cannot be hashed.").format(
                cls, args))
 
-    kwargs = {
-        "colocate_cov_ops_with_inputs": self._colocate_cov_ops_with_inputs
-    }
-    with variable_scope.variable_scope(self._var_scope):
-      return utils.setdefault(self.fisher_factors, (cls, args),
-                              lambda: cls(*args, **kwargs))
+    key = cls, args
+    if key not in self.fisher_factors:
+      colo = self._colocate_cov_ops_with_inputs
+      with variable_scope.variable_scope(self._var_scope):
+        self.fisher_factors[key] = cls(*args, colocate_cov_ops_with_inputs=colo)
+    return self.fisher_factors[key]
diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py
index 0fd7f51477..ca6fb655b4 100644
--- a/tensorflow/contrib/kfac/python/ops/utils.py
+++ b/tensorflow/contrib/kfac/python/ops/utils.py
@@ -64,13 +64,6 @@ class SequenceDict(object):
     return list(self._dict.items())
 
 
-def setdefault(dct, key, thunk):
-  """Like dict.setdefault but delays evaluation of the value to be set."""
-  if key not in dct:
-    dct[key] = thunk()
-  return dct[key]
-
-
 def tensors_to_column(tensors):
   """Converts a tensor or list of tensors to a column vector.
 
diff --git a/tensorflow/contrib/kfac/python/ops/utils_lib.py b/tensorflow/contrib/kfac/python/ops/utils_lib.py
index ddbb4485ce..9df07d69aa 100644
--- a/tensorflow/contrib/kfac/python/ops/utils_lib.py
+++ b/tensorflow/contrib/kfac/python/ops/utils_lib.py
@@ -25,7 +25,6 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
     "SequenceDict",
-    "setdefault",
     "tensors_to_column",
     "column_to_tensors",
     "kronecker_product",
-- 
GitLab


From 9305349a4a6b6c2d265ac81091f855e5560041c4 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Tue, 21 Nov 2017 15:35:13 -0800
Subject: [PATCH 0190/1225] Improve caching behavior of GCS filesystem

On a number of Cloud TPU-related workloads, these changes improve the time to
restore from a checkpoint by ~20x.

PiperOrigin-RevId: 176571963
---
 .../core/platform/cloud/expiring_lru_cache.h  |  79 ++++++---
 .../platform/cloud/expiring_lru_cache_test.cc |  64 +++++++
 .../core/platform/cloud/file_block_cache.cc   | 158 +++++++++++------
 .../core/platform/cloud/file_block_cache.h    |  50 +++++-
 .../platform/cloud/file_block_cache_test.cc   |  35 ++++
 .../core/platform/cloud/gcs_file_system.cc    | 164 ++++++++++--------
 .../platform/cloud/gcs_file_system_test.cc    |  11 +-
 7 files changed, 396 insertions(+), 165 deletions(-)

diff --git a/tensorflow/core/platform/cloud/expiring_lru_cache.h b/tensorflow/core/platform/cloud/expiring_lru_cache.h
index 4fe4234e22..3fc23a4306 100644
--- a/tensorflow/core/platform/cloud/expiring_lru_cache.h
+++ b/tensorflow/core/platform/cloud/expiring_lru_cache.h
@@ -28,7 +28,7 @@ limitations under the License.
 namespace tensorflow {
 
 /// \brief An LRU cache of string keys and arbitrary values, with configurable
-/// max item age and max entries.
+/// max item age (in seconds) and max entries.
 ///
 /// This class is thread safe.
 template <typename T>
@@ -48,16 +48,7 @@ class ExpiringLRUCache {
       return;
     }
     mutex_lock lock(mu_);
-    lru_list_.push_front(key);
-    Entry entry{env_->NowSeconds(), value, lru_list_.begin()};
-    auto insert = cache_.insert(std::make_pair(key, entry));
-    if (!insert.second) {
-      lru_list_.erase(insert.first->second.lru_iterator);
-      insert.first->second = entry;
-    } else if (max_entries_ > 0 && cache_.size() > max_entries_) {
-      cache_.erase(lru_list_.back());
-      lru_list_.pop_back();
-    }
+    InsertLocked(key, value);
   }
 
   /// Look up the entry with key `key` and copy it to `value` if found. Returns
@@ -68,19 +59,33 @@ class ExpiringLRUCache {
       return false;
     }
     mutex_lock lock(mu_);
-    auto it = cache_.find(key);
-    if (it == cache_.end()) {
-      return false;
+    return LookupLocked(key, value);
+  }
+
+  typedef std::function<Status(const string&, T*)> ComputeFunc;
+
+  /// Look up the entry with key `key` and copy it to `value` if found. If not
+  /// found, call `compute_func`. If `compute_func` returns successfully, store
+  /// a copy of the output parameter in the cache, and another copy in `value`.
+  Status LookupOrCompute(const string& key, T* value,
+                         const ComputeFunc& compute_func) {
+    if (max_age_ == 0) {
+      return compute_func(key, value);
     }
-    lru_list_.erase(it->second.lru_iterator);
-    if (env_->NowSeconds() - it->second.timestamp > max_age_) {
-      cache_.erase(it);
-      return false;
+
+    // Note: we hold onto mu_ for the rest of this function. In practice, this
+    // is okay, as stat requests are typically fast, and concurrent requests are
+    // often for the same file. Future work can split this up into one lock per
+    // key if this proves to be a significant performance bottleneck.
+    mutex_lock lock(mu_);
+    if (LookupLocked(key, value)) {
+      return Status::OK();
     }
-    *value = it->second.value;
-    lru_list_.push_front(it->first);
-    it->second.lru_iterator = lru_list_.begin();
-    return true;
+    Status s = compute_func(key, value);
+    if (s.ok()) {
+      InsertLocked(key, *value);
+    }
+    return s;
   }
 
   /// Accessors for cache parameters.
@@ -99,6 +104,36 @@ class ExpiringLRUCache {
     std::list<string>::iterator lru_iterator;
   };
 
+  bool LookupLocked(const string& key, T* value) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    auto it = cache_.find(key);
+    if (it == cache_.end()) {
+      return false;
+    }
+    lru_list_.erase(it->second.lru_iterator);
+    if (env_->NowSeconds() - it->second.timestamp > max_age_) {
+      cache_.erase(it);
+      return false;
+    }
+    *value = it->second.value;
+    lru_list_.push_front(it->first);
+    it->second.lru_iterator = lru_list_.begin();
+    return true;
+  }
+
+  void InsertLocked(const string& key, const T& value)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    lru_list_.push_front(key);
+    Entry entry{env_->NowSeconds(), value, lru_list_.begin()};
+    auto insert = cache_.insert(std::make_pair(key, entry));
+    if (!insert.second) {
+      lru_list_.erase(insert.first->second.lru_iterator);
+      insert.first->second = entry;
+    } else if (max_entries_ > 0 && cache_.size() > max_entries_) {
+      cache_.erase(lru_list_.back());
+      lru_list_.pop_back();
+    }
+  }
+
   /// The maximum age of entries in the cache, in seconds. A value of 0 means
   /// that no entry is ever placed in the cache.
   const uint64 max_age_;
diff --git a/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc b/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc
index bf9bfcd67e..8f8d5744a4 100644
--- a/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc
+++ b/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc
@@ -88,5 +88,69 @@ TEST(ExpiringLRUCacheTest, MaxEntries) {
   EXPECT_EQ(value, 5);
 }
 
+TEST(ExpiringLRUCacheTest, LookupOrCompute) {
+  // max_age of 0 means we should always compute.
+  uint64 num_compute_calls = 0;
+  ExpiringLRUCache<int>::ComputeFunc compute_func =
+      [&num_compute_calls](const string& key, int* value) {
+        *value = num_compute_calls;
+        num_compute_calls++;
+        return Status::OK();
+      };
+  ExpiringLRUCache<int> cache1(0, 4);
+
+  int value = -1;
+  TF_EXPECT_OK(cache1.LookupOrCompute("a", &value, compute_func));
+  EXPECT_EQ(value, 0);
+  EXPECT_EQ(num_compute_calls, 1);
+  // re-read the same value, expect another lookup
+  TF_EXPECT_OK(cache1.LookupOrCompute("a", &value, compute_func));
+  EXPECT_EQ(value, 1);
+  EXPECT_EQ(num_compute_calls, 2);
+
+  // Define a new cache with max_age > 0 and verify correct behavior.
+  ExpiringLRUCache<int> cache2(2, 4);
+  num_compute_calls = 0;
+  value = -1;
+
+  // Read our first value
+  TF_EXPECT_OK(cache2.LookupOrCompute("a", &value, compute_func));
+  EXPECT_EQ(value, 0);
+  EXPECT_EQ(num_compute_calls, 1);
+  // Re-read, exepct no additional function compute_func calls.
+  TF_EXPECT_OK(cache2.LookupOrCompute("a", &value, compute_func));
+  EXPECT_EQ(value, 0);
+  EXPECT_EQ(num_compute_calls, 1);
+
+  // Read a sequence of additional values, eventually evicting "a".
+  TF_EXPECT_OK(cache2.LookupOrCompute("b", &value, compute_func));
+  EXPECT_EQ(value, 1);
+  EXPECT_EQ(num_compute_calls, 2);
+  TF_EXPECT_OK(cache2.LookupOrCompute("c", &value, compute_func));
+  EXPECT_EQ(value, 2);
+  EXPECT_EQ(num_compute_calls, 3);
+  TF_EXPECT_OK(cache2.LookupOrCompute("d", &value, compute_func));
+  EXPECT_EQ(value, 3);
+  EXPECT_EQ(num_compute_calls, 4);
+  TF_EXPECT_OK(cache2.LookupOrCompute("e", &value, compute_func));
+  EXPECT_EQ(value, 4);
+  EXPECT_EQ(num_compute_calls, 5);
+  // Verify the other values remain in the cache.
+  TF_EXPECT_OK(cache2.LookupOrCompute("b", &value, compute_func));
+  EXPECT_EQ(value, 1);
+  EXPECT_EQ(num_compute_calls, 5);
+  TF_EXPECT_OK(cache2.LookupOrCompute("c", &value, compute_func));
+  EXPECT_EQ(value, 2);
+  EXPECT_EQ(num_compute_calls, 5);
+  TF_EXPECT_OK(cache2.LookupOrCompute("d", &value, compute_func));
+  EXPECT_EQ(value, 3);
+  EXPECT_EQ(num_compute_calls, 5);
+
+  // Re-read "a", ensure it is re-computed.
+  TF_EXPECT_OK(cache2.LookupOrCompute("a", &value, compute_func));
+  EXPECT_EQ(value, 5);
+  EXPECT_EQ(num_compute_calls, 6);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/file_block_cache.cc b/tensorflow/core/platform/cloud/file_block_cache.cc
index a05c18c069..a472ae52fc 100644
--- a/tensorflow/core/platform/cloud/file_block_cache.cc
+++ b/tensorflow/core/platform/cloud/file_block_cache.cc
@@ -16,79 +16,137 @@ limitations under the License.
 #include "tensorflow/core/platform/cloud/file_block_cache.h"
 #include <cstring>
 #include <memory>
+#include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/env.h"
 
 namespace tensorflow {
 
-std::shared_ptr<FileBlockCache::Block> FileBlockCache::Lookup(const Key& key) {
-  mutex_lock lock(mu_);
-  auto entry = block_map_.find(key);
-  if (entry == block_map_.end()) {
-    return std::shared_ptr<Block>();
-  }
-  // If we're enforcing max staleness and the block is stale, remove all of the
-  // file's cached blocks so we reload them.
-  if (max_staleness_ > 0 &&
-      env_->NowSeconds() - entry->second->timestamp > max_staleness_) {
-    RemoveFile_Locked(key.first);
-    return std::shared_ptr<Block>();
+bool FileBlockCache::BlockNotStale(const std::shared_ptr<Block>& block) {
+  mutex_lock l(block->mu);
+  if (block->state != FetchState::FINISHED) {
+    return true;  // No need to check for staleness.
   }
-  return entry->second;
+  if (max_staleness_ == 0) return true;  // Not enforcing staleness.
+  return env_->NowSeconds() - block->timestamp <= max_staleness_;
 }
 
-std::shared_ptr<FileBlockCache::Block> FileBlockCache::Insert(
-    const Key& key, std::shared_ptr<Block> block) {
+std::shared_ptr<FileBlockCache::Block> FileBlockCache::Lookup(const Key& key) {
   mutex_lock lock(mu_);
   auto entry = block_map_.find(key);
   if (entry != block_map_.end()) {
-    // Use the block that's already in the cache.
-    return entry->second;
-  }
-  // Sanity check to detect interrupted reads leading to partial blocks: a
-  // partial block must have a higher key than the highest existing key in the
-  // block map for the file. Note that since this check relies on the existence
-  // of a cached block with a higher key, some incomplete reads may still go
-  // undetected (if their key happens to be higher than anything in the cache).
-  if (block->data.size() < block_size_ && !block_map_.empty()) {
-    Key fmax = std::make_pair(key.first, std::numeric_limits<size_t>::max());
-    auto fcmp = block_map_.upper_bound(fmax);
-    if (fcmp != block_map_.begin() && key < (--fcmp)->first) {
-      // We expected to read a full block at this position.
-      return std::shared_ptr<Block>();
+    if (BlockNotStale(entry->second)) {
+      return entry->second;
+    } else {
+      // Remove the stale block and continue.
+      RemoveFile_Locked(key.first);
     }
   }
-  // Add the block to the cache (with necessary bookkeeping).
+
+  // Insert a new empty block, setting the bookkeeping to sentinel values
+  // in order to update them as appropriate.
+  auto new_entry = std::make_shared<Block>();
   lru_list_.push_front(key);
   lra_list_.push_front(key);
-  block->lru_iterator = lru_list_.begin();
-  block->lra_iterator = lra_list_.begin();
-  block->timestamp = env_->NowSeconds();
-  cache_size_ += block->data.size();
-  block_map_.emplace(std::make_pair(key, block));
-  return block;
+  new_entry->lru_iterator = lru_list_.begin();
+  new_entry->lra_iterator = lra_list_.begin();
+  new_entry->timestamp = env_->NowSeconds();
+  block_map_.emplace(std::make_pair(key, new_entry));
+  return new_entry;
 }
 
-// Remove blocks from the cache until there is space for a full sized block.
+// Remove blocks from the cache until we do not exceed our maximum size.
 void FileBlockCache::Trim() {
-  mutex_lock lock(mu_);
-  while (!lru_list_.empty() && cache_size_ + block_size_ > max_bytes_) {
+  while (!lru_list_.empty() && cache_size_ > max_bytes_) {
     RemoveBlock(block_map_.find(lru_list_.back()));
   }
 }
 
 /// Move the block to the front of the LRU list if it isn't already there.
-void FileBlockCache::UpdateLRU(const Key& key,
-                               const std::shared_ptr<Block>& block) {
+Status FileBlockCache::UpdateLRU(const Key& key,
+                                 const std::shared_ptr<Block>& block) {
   mutex_lock lock(mu_);
   if (block->timestamp == 0) {
     // The block was evicted from another thread. Allow it to remain evicted.
-    return;
+    return Status::OK();
   }
   if (block->lru_iterator != lru_list_.begin()) {
     lru_list_.erase(block->lru_iterator);
     lru_list_.push_front(key);
     block->lru_iterator = lru_list_.begin();
   }
+
+  // Check for inconsistent state. If there is a block later in the same file
+  // in the cache, and our current block is not block size, this likely means
+  // we have inconsistent state within the cache. Note: it's possible some
+  // incomplete reads may still go undetected.
+  if (block->data.size() < block_size_) {
+    Key fmax = std::make_pair(key.first, std::numeric_limits<size_t>::max());
+    auto fcmp = block_map_.upper_bound(fmax);
+    if (fcmp != block_map_.begin() && key < (--fcmp)->first) {
+      return errors::Internal("Block cache contents are inconsistent.");
+    }
+  }
+
+  Trim();
+
+  return Status::OK();
+}
+
+Status FileBlockCache::MaybeFetch(const Key& key,
+                                  const std::shared_ptr<Block>& block) {
+  bool downloaded_block = false;
+  auto reconcile_state =
+      gtl::MakeCleanup([this, &downloaded_block, &key, &block] {
+        // Perform this action in a cleanup callback to avoid locking mu_ after
+        // locking block->mu.
+        if (downloaded_block) {
+          mutex_lock l(mu_);
+          // Do not update state if the block is already to be evicted.
+          if (block->timestamp != 0) {
+            cache_size_ += block->data.size();
+            // Put to beginning of LRA list.
+            lra_list_.erase(block->lra_iterator);
+            lra_list_.push_front(key);
+            block->lra_iterator = lra_list_.begin();
+            block->timestamp = env_->NowSeconds();
+          }
+        }
+      });
+  // Loop until either block content is successfully fetched, or our request
+  // encounters an error.
+  mutex_lock l(block->mu);
+  Status status = Status::OK();
+  while (true) {
+    switch (block->state) {
+      case FetchState::ERROR:
+        TF_FALLTHROUGH_INTENDED;
+      case FetchState::CREATED:
+        block->state = FetchState::FETCHING;
+        block->mu.unlock();  // Release the lock while making the API call.
+        status.Update(
+            block_fetcher_(key.first, key.second, block_size_, &block->data));
+        block->mu.lock();  // Reacquire the lock immediately afterwards
+        if (status.ok()) {
+          downloaded_block = true;
+          block->state = FetchState::FINISHED;
+        } else {
+          block->state = FetchState::ERROR;
+        }
+        block->cond_var.notify_all();
+        return status;
+      case FetchState::FETCHING:
+        block->cond_var.wait_for(l, std::chrono::seconds(60));
+        if (block->state == FetchState::FINISHED) {
+          return Status::OK();
+        }
+        // Re-loop in case of errors.
+        break;
+      case FetchState::FINISHED:
+        return Status::OK();
+    }
+  }
+  return errors::Internal(
+      "Control flow should never reach the end of FileBlockCache::Fetch.");
 }
 
 Status FileBlockCache::Read(const string& filename, size_t offset, size_t n,
@@ -114,15 +172,9 @@ Status FileBlockCache::Read(const string& filename, size_t offset, size_t n,
     // Look up the block, fetching and inserting it if necessary, and update the
     // LRU iterator for the key and block.
     std::shared_ptr<Block> block = Lookup(key);
-    if (!block) {
-      Trim();
-      auto fetch = std::make_shared<Block>();
-      auto status = block_fetcher_(filename, pos, block_size_, &fetch->data);
-      if (!(block = Insert(key, fetch))) {
-        return errors::Internal("File contents are inconsistent");
-      }
-    }
-    UpdateLRU(key, block);
+    DCHECK(block) << "No block for key " << key.first << "@" << key.second;
+    TF_RETURN_IF_ERROR(MaybeFetch(key, block));
+    TF_RETURN_IF_ERROR(UpdateLRU(key, block));
     // Copy the relevant portion of the block into the result buffer.
     const auto& data = block->data;
     if (offset >= pos + data.size()) {
@@ -190,11 +242,11 @@ void FileBlockCache::RemoveFile_Locked(const string& filename) {
 }
 
 void FileBlockCache::RemoveBlock(BlockMap::iterator entry) {
-  lru_list_.erase(entry->second->lru_iterator);
-  lra_list_.erase(entry->second->lra_iterator);
   // This signals that the block is removed, and should not be inadvertently
   // reinserted into the cache in UpdateLRU.
   entry->second->timestamp = 0;
+  lru_list_.erase(entry->second->lru_iterator);
+  lra_list_.erase(entry->second->lra_iterator);
   cache_size_ -= entry->second->data.size();
   block_map_.erase(entry);
 }
diff --git a/tensorflow/core/platform/cloud/file_block_cache.h b/tensorflow/core/platform/cloud/file_block_cache.h
index b45d226095..36dbf9db83 100644
--- a/tensorflow/core/platform/cloud/file_block_cache.h
+++ b/tensorflow/core/platform/cloud/file_block_cache.h
@@ -115,11 +115,35 @@ class FileBlockCache {
   /// The file block cache key is a {filename, offset} pair.
   typedef std::pair<string, size_t> Key;
 
+  /// \brief The state of a block.
+  ///
+  /// A block begins in the CREATED stage. The first thread will attempt to read
+  /// the block from the filesystem, transitioning the state of the block to
+  /// FETCHING. After completing, if the read was successful the state should
+  /// be FINISHED. Otherwise the state should be ERROR. A subsequent read can
+  /// re-fetch the block if the state is ERROR.
+  enum class FetchState {
+    CREATED,
+    FETCHING,
+    FINISHED,
+    ERROR,
+  };
+
   /// \brief A block of a file.
   ///
   /// A file block consists of the block data, the block's current position in
-  /// the LRU cache, and the timestamp (seconds since epoch) at which the block
-  /// was cached.
+  /// the LRU cache, the timestamp (seconds since epoch) at which the block
+  /// was cached, a coordination lock, and state & condition variables.
+  ///
+  /// Thread safety:
+  /// The iterator and timestamp fields should only be accessed while holding
+  /// the block-cache-wide mu_ instance variable. The state variable should only
+  /// be accessed while holding the Block's mu lock. The data vector should only
+  /// be accessed after state == FINISHED, and it should never be modified.
+  ///
+  /// In order to prevent deadlocks, never grab the block-cache-wide mu_ lock
+  /// AFTER grabbing any block's mu lock. It is safe to grab mu without locking
+  /// mu_.
   struct Block {
     /// The block data.
     std::vector<char> data;
@@ -129,6 +153,12 @@ class FileBlockCache {
     std::list<Key>::iterator lra_iterator;
     /// The timestamp (seconds since epoch) at which the block was cached.
     uint64 timestamp;
+    /// Mutex to guard state variable
+    mutex mu;
+    /// The state of the block.
+    FetchState state GUARDED_BY(mu) = FetchState::CREATED;
+    /// Wait on cond_var if state is FETCHING.
+    condition_variable cond_var;
   };
 
   /// \brief The block map type for the file block cache.
@@ -139,19 +169,20 @@ class FileBlockCache {
   /// Prune the cache by removing files with expired blocks.
   void Prune() LOCKS_EXCLUDED(mu_);
 
+  bool BlockNotStale(const std::shared_ptr<Block>& block)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
   /// Look up a Key in the block cache.
   std::shared_ptr<Block> Lookup(const Key& key) LOCKS_EXCLUDED(mu_);
 
-  /// Insert a block in the block cache with the given key.
-  std::shared_ptr<FileBlockCache::Block> Insert(const Key& key,
-                                                std::shared_ptr<Block> block)
+  Status MaybeFetch(const Key& key, const std::shared_ptr<Block>& block)
       LOCKS_EXCLUDED(mu_);
 
   /// Trim the block cache to make room for another entry.
-  void Trim() LOCKS_EXCLUDED(mu_);
+  void Trim() EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
-  /// Update LRU and LRA iterators for the block at `key`.
-  void UpdateLRU(const Key& key, const std::shared_ptr<Block>& block)
+  /// Update the LRU iterator for the block at `key`.
+  Status UpdateLRU(const Key& key, const std::shared_ptr<Block>& block)
       LOCKS_EXCLUDED(mu_);
 
   /// Remove all blocks of a file, with mu_ already held.
@@ -179,6 +210,9 @@ class FileBlockCache {
 
   /// The LRA (least recently added) list of block keys. The front of the list
   /// identifies the most recently added block.
+  ///
+  /// Note: blocks are added to lra_list_ only after they have successfully been
+  /// fetched from the underlying block store.
   std::list<Key> lra_list_ GUARDED_BY(mu_);
 
   /// The combined number of bytes in all of the cached blocks.
diff --git a/tensorflow/core/platform/cloud/file_block_cache_test.cc b/tensorflow/core/platform/cloud/file_block_cache_test.cc
index 5fa738b452..2a9eb7d524 100644
--- a/tensorflow/core/platform/cloud/file_block_cache_test.cc
+++ b/tensorflow/core/platform/cloud/file_block_cache_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/cloud/now_seconds_env.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/notification.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -435,5 +436,39 @@ TEST(FileBlockCacheTest, ParallelReads) {
   // executed, or 10 seconds have passed).
 }
 
+TEST(FileBlockCacheTest, CoalesceConcurrentReads) {
+  // Concurrent reads to the same file blocks should be de-duplicated.
+  const size_t block_size = 16;
+  int num_requests = 0;
+  Notification notification;
+  auto fetcher = [&num_requests, &notification, block_size](
+                     const string& filename, size_t offset, size_t n,
+                     std::vector<char>* out) {
+    EXPECT_EQ(n, block_size);
+    EXPECT_EQ(offset, 0);
+    num_requests++;
+    out->resize(n, 'x');
+    notification.Notify();
+    // Wait for other thread to issue read.
+    Env::Default()->SleepForMicroseconds(100000);  // 0.1 secs
+    return Status::OK();
+  };
+  FileBlockCache cache(block_size, block_size, 0, fetcher);
+  // Fork off thread for parallel read.
+  std::unique_ptr<Thread> concurrent(
+      Env::Default()->StartThread({}, "concurrent", [&cache] {
+        std::vector<char> out;
+        TF_EXPECT_OK(cache.Read("", 0, block_size / 2, &out));
+        EXPECT_EQ(out.size(), block_size / 2);
+      }));
+  EXPECT_TRUE(WaitForNotificationWithTimeout(&notification, 1000))
+      << "Timeout waiting for concurrent thread to start.";
+  std::vector<char> out;
+  TF_EXPECT_OK(cache.Read("", block_size / 2, block_size / 2, &out));
+  EXPECT_EQ(out.size(), block_size / 2);
+
+  EXPECT_EQ(1, num_requests);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 9287de7237..d5e2a518e9 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -695,6 +695,7 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset,
 
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading gs://",
                                   bucket, "/", object);
+
   return Status::OK();
 }
 
@@ -814,53 +815,55 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket,
   if (!stat) {
     return errors::Internal("'stat' cannot be nullptr.");
   }
-  if (stat_cache_->Lookup(fname, stat)) {
-    if (stat->is_directory) {
-      return errors::NotFound(fname, " is a directory.");
-    } else {
-      return Status::OK();
-    }
-  }
   if (object.empty()) {
     return errors::InvalidArgument("'object' must be a non-empty string.");
   }
 
-  string auth_token;
-  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
+  StatCache::ComputeFunc compute_func =
+      [this, &bucket, &object](const string& fname, FileStatistics* stat) {
+        string auth_token;
+        TF_RETURN_IF_ERROR(
+            AuthProvider::GetToken(auth_provider_.get(), &auth_token));
 
-  std::vector<char> output_buffer;
-  std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-  TF_RETURN_IF_ERROR(request->Init());
-  TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
-      kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object),
-      "?fields=size%2Cupdated")));
-  TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-  TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+        std::vector<char> output_buffer;
+        std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
+        TF_RETURN_IF_ERROR(request->Init());
+        TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
+            kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object),
+            "?fields=size%2Cupdated")));
+        TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
+        TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
 
-  if (dns_cache_) {
-    TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
+        if (dns_cache_) {
+          TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
+        }
+        TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(),
+                                        " when reading metadata of gs://",
+                                        bucket, "/", object);
+
+        StringPiece response_piece =
+            StringPiece(output_buffer.data(), output_buffer.size());
+        Json::Value root;
+        TF_RETURN_IF_ERROR(ParseJson(response_piece, &root));
+
+        // Parse file size.
+        TF_RETURN_IF_ERROR(GetInt64Value(root, "size", &(stat->length)));
+
+        // Parse file modification time.
+        string updated;
+        TF_RETURN_IF_ERROR(GetStringValue(root, "updated", &updated));
+        TF_RETURN_IF_ERROR(ParseRfc3339Time(updated, &(stat->mtime_nsec)));
+
+        stat->is_directory = false;
+        return Status::OK();
+      };
+
+  TF_RETURN_IF_ERROR(stat_cache_->LookupOrCompute(fname, stat, compute_func));
+  if (stat->is_directory) {
+    return errors::NotFound(fname, " is a directory.");
+  } else {
+    return Status::OK();
   }
-
-  TF_RETURN_WITH_CONTEXT_IF_ERROR(
-      request->Send(), " when reading metadata of gs://", bucket, "/", object);
-
-  StringPiece response_piece =
-      StringPiece(output_buffer.data(), output_buffer.size());
-  Json::Value root;
-  TF_RETURN_IF_ERROR(ParseJson(response_piece, &root));
-
-  // Parse file size.
-  TF_RETURN_IF_ERROR(GetInt64Value(root, "size", &(stat->length)));
-
-  // Parse file modification time.
-  string updated;
-  TF_RETURN_IF_ERROR(GetStringValue(root, "updated", &updated));
-  TF_RETURN_IF_ERROR(ParseRfc3339Time(updated, &(stat->mtime_nsec)));
-
-  stat->is_directory = false;
-  stat_cache_->Insert(fname, *stat);
-
-  return Status::OK();
 }
 
 Status GcsFileSystem::BucketExists(const string& bucket, bool* result) {
@@ -892,19 +895,30 @@ Status GcsFileSystem::FolderExists(const string& dirname, bool* result) {
   if (!result) {
     return errors::Internal("'result' cannot be nullptr.");
   }
+  StatCache::ComputeFunc compute_func = [this](const string& dirname,
+                                               FileStatistics* stat) {
+    std::vector<string> children;
+    TF_RETURN_IF_ERROR(
+        GetChildrenBounded(dirname, 1, &children, true /* recursively */,
+                           true /* include_self_directory_marker */));
+    if (!children.empty()) {
+      *stat = DIRECTORY_STAT;
+      return Status::OK();
+    } else {
+      return errors::InvalidArgument("Not a directory!");
+    }
+  };
   FileStatistics stat;
-  if (stat_cache_->Lookup(dirname, &stat)) {
+  Status s = stat_cache_->LookupOrCompute(dirname, &stat, compute_func);
+  if (s.ok()) {
     *result = stat.is_directory;
     return Status::OK();
   }
-  std::vector<string> children;
-  TF_RETURN_IF_ERROR(
-      GetChildrenBounded(dirname, 1, &children, true /* recursively */,
-                         true /* include_self_directory_marker */));
-  if ((*result = !children.empty())) {
-    stat_cache_->Insert(dirname, DIRECTORY_STAT);
+  if (errors::IsInvalidArgument(s)) {
+    *result = false;
+    return Status::OK();
   }
-  return Status::OK();
+  return s;
 }
 
 Status GcsFileSystem::GetChildren(const string& dirname,
@@ -916,33 +930,35 @@ Status GcsFileSystem::GetChildren(const string& dirname,
 
 Status GcsFileSystem::GetMatchingPaths(const string& pattern,
                                        std::vector<string>* results) {
-  if (matching_paths_cache_->Lookup(pattern, results)) {
-    return Status::OK();
-  }
-  results->clear();
-  // Find the fixed prefix by looking for the first wildcard.
-  const string& fixed_prefix =
-      pattern.substr(0, pattern.find_first_of("*?[\\"));
-  const string& dir = io::Dirname(fixed_prefix).ToString();
-  if (dir.empty()) {
-    return errors::InvalidArgument("A GCS pattern doesn't have a bucket name: ",
-                                   pattern);
-  }
-  std::vector<string> all_files;
+  MatchingPathsCache::ComputeFunc compute_func =
+      [this](const string& pattern, std::vector<string>* results) {
+        results->clear();
+        // Find the fixed prefix by looking for the first wildcard.
+        const string& fixed_prefix =
+            pattern.substr(0, pattern.find_first_of("*?[\\"));
+        const string& dir = io::Dirname(fixed_prefix).ToString();
+        if (dir.empty()) {
+          return errors::InvalidArgument(
+              "A GCS pattern doesn't have a bucket name: ", pattern);
+        }
+        std::vector<string> all_files;
+        TF_RETURN_IF_ERROR(GetChildrenBounded(
+            dir, UINT64_MAX, &all_files, true /* recursively */,
+            false /* include_self_directory_marker */));
+
+        const auto& files_and_folders = AddAllSubpaths(all_files);
+
+        // Match all obtained paths to the input pattern.
+        for (const auto& path : files_and_folders) {
+          const string& full_path = io::JoinPath(dir, path);
+          if (Env::Default()->MatchPath(full_path, pattern)) {
+            results->push_back(full_path);
+          }
+        }
+        return Status::OK();
+      };
   TF_RETURN_IF_ERROR(
-      GetChildrenBounded(dir, UINT64_MAX, &all_files, true /* recursively */,
-                         false /* include_self_directory_marker */));
-
-  const auto& files_and_folders = AddAllSubpaths(all_files);
-
-  // Match all obtained paths to the input pattern.
-  for (const auto& path : files_and_folders) {
-    const string& full_path = io::JoinPath(dir, path);
-    if (Env::Default()->MatchPath(full_path, pattern)) {
-      results->push_back(full_path);
-    }
-  }
-  matching_paths_cache_->Insert(pattern, *results);
+      matching_paths_cache_->LookupOrCompute(pattern, results, compute_func));
   return Status::OK();
 }
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 911176365f..7614ec4d7f 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -127,12 +127,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
            "Range: 18-26\n",
-           ""),
-       new FakeHttpRequest(
-           "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
-           "Auth Token: fake_token\n"
-           "Range: 0-8\n",
-           "012345678")});
+           "")});
   GcsFileSystem fs(
       std::unique_ptr<AuthProvider>(new FakeAuthProvider),
       std::unique_ptr<HttpRequest::Factory>(
@@ -182,8 +177,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
               file->Read(20, 10, &result, scratch).code());
     EXPECT_TRUE(result.empty());
 
-    // The beginning of the file has been evicted from the LRU cache.  This will
-    // result in another request. The buffer size is still 15.
+    // The beginning of the file should still be in the LRU cache. There should
+    // not be another request. The buffer size is still 15.
     TF_EXPECT_OK(file->Read(0, 4, &result, scratch));
   }
 
-- 
GitLab


From c4ec569953069f689fd42bae92e15f6ccf40e364 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 16:21:50 -0800
Subject: [PATCH 0191/1225] Get rid of some code duplication in Grappler
 optimizers by refactoring some utilities to a shared location. Generalize the
 GetTailOfXXXChain to a more generic graph walker that takes a predicate
 functor that controls when to stop.

PiperOrigin-RevId: 176577743
---
 tensorflow/core/grappler/BUILD                |   2 +
 tensorflow/core/grappler/op_types.cc          |  41 +++++-
 tensorflow/core/grappler/op_types.h           |  18 +++
 .../optimizers/arithmetic_optimizer.cc        | 139 ++----------------
 .../optimizers/dependency_optimizer.cc        |  45 +-----
 .../core/grappler/optimizers/model_pruner.cc  |  10 --
 tensorflow/core/grappler/utils.cc             |  52 +++++++
 tensorflow/core/grappler/utils.h              |  65 +++++++-
 tensorflow/core/grappler/utils_test.cc        |  62 +++++++-
 9 files changed, 251 insertions(+), 183 deletions(-)

diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index c81c6c0f21..99f1318072 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -21,6 +21,7 @@ cc_library(
     hdrs = ["op_types.h"],
     visibility = ["//visibility:public"],
     deps = [
+        ":utils",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -45,6 +46,7 @@ tf_cc_test(
     srcs = ["utils_test.cc"],
     deps = [
         ":utils",
+        "//tensorflow/cc:cc_ops",
         "//tensorflow/core:all_kernels",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 48b17fd20f..3a39045a4a 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -13,8 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/grappler/op_types.h"
+#include <unordered_set>
+
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -233,5 +237,38 @@ bool ModifiesFrameInfo(const NodeDef& node) {
   return IsEnter(node) || IsExit(node) || IsNextIteration(node);
 }
 
-}  // end namespace grappler
+#define OPDEF_PROPERTY_HELPER(PROPERTY_CAP, PROPERTY)                      \
+  bool Is##PROPERTY_CAP(const NodeDef& node) {                             \
+    if (node.op() == "Add") {                                              \
+      /* Workaround for "Add" not being marked is_commutative and */       \
+      /* is_aggregate. (See cl/173915048). */                              \
+      const auto type = GetDataTypeFromAttr(node, "T");                    \
+      return type != DT_INVALID && type != DT_STRING;                      \
+    }                                                                      \
+    const OpDef* op_def = nullptr;                                         \
+    Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); \
+    return status.ok() && op_def->is_##PROPERTY();                         \
+  }
+
+OPDEF_PROPERTY_HELPER(Aggregate, aggregate)
+OPDEF_PROPERTY_HELPER(Commutative, commutative)
+
+bool IsInvolution(const NodeDef& node) {
+  const std::unordered_set<string> involution_ops{
+      "Conj", "Reciprocal", "Invert", "Neg", "LogicalNot"};
+  return involution_ops.count(node.op()) > 0;
+}
+
+bool IsValuePreserving(const NodeDef& node) {
+  if (NumNonControlInputs(node) == 1 && IsAggregate(node)) {
+    return true;
+  }
+  const std::unordered_set<string> value_preserving_ops{
+      "Transpose",  "Reshape",      "Identity",        "InvertPermutation",
+      "Reverse",    "StopGradient", "PreventGradient", "CheckNumerics",
+      "ExpandDims", "Squeeze"};
+  return value_preserving_ops.count(node.op()) > 0;
+}
+
+}  // namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 17ba3603c5..b7a55f3f21 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_GRAPPLER_OP_TYPES_H_
 
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -59,9 +60,26 @@ bool IsSwitch(const NodeDef& node);
 bool IsTranspose(const NodeDef& node);
 bool IsVariable(const NodeDef& node);
 
+// Return true if the op is an aggregation (e.g. Add, AddN).
+// Returns false if it could not be determined to be so.
+bool IsAggregate(const NodeDef& node);
+
+// Return true if the op is commutative (e.g. Mul, Add).
+// Returns false if it could not be determined to be so.
+bool IsCommutative(const NodeDef& node);
+
 bool IsFreeOfSideEffect(const NodeDef& node);
 bool ModifiesFrameInfo(const NodeDef& node);
 
+// Returns true if the op is an element-wise involution, i.e. if it is its
+// own inverse such that f(f(x)) == x.
+bool IsInvolution(const NodeDef& node);
+
+// Returns true if the op in node only rearranges the order of elements in its
+// first input tensor and possible changes its shape. More precisely, this
+// function returns true if the op commutes with all element-wise operations.
+bool IsValuePreserving(const NodeDef& node);
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 2677888fcb..33eac79c01 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
@@ -80,22 +81,6 @@ Status SetTensorValue(DataType dtype, int value, Tensor* tensor) {
   return Status::OK();
 }
 
-bool IsInvolution(const NodeDef& node) {
-  const std::unordered_set<string> involution_ops = {
-      "Conj", "Reciprocal", "Invert", "Neg", "LogicalNot"};
-  return involution_ops.count(node.op()) > 0;
-}
-
-// Returns true if the op in node only rearranges the order of elements in an
-// input tensor, or more specifically, if it commutes with all element-wise
-// operations on the values.
-bool IsValuePreserving(const NodeDef& node) {
-  const std::unordered_set<string> value_preserving_ops = {
-      "Transpose",  "Reshape",      "Identity",        "InvertPermutation",
-      "Reverse",    "StopGradient", "PreventGradient", "CheckNumerics",
-      "ExpandDims", "Squeeze"};
-  return value_preserving_ops.count(node.op()) > 0;
-}
 
 template <typename T>
 bool AreInversePermutations(const std::vector<T>& a, const std::vector<T>& b) {
@@ -185,39 +170,6 @@ bool IsInnerMatrixTransposeNode(const NodeDef& transpose_node,
   return false;
 }
 
-// Follow a chain (through input(0)) of ops starting at `source->input(0)` as
-// long as they
-//  1. preserve the values of their first input,
-//  2. have a single (non-control) output,
-//  3. are not in nodes_to_preserve.
-// Returns the last node in the chain satisfying these properties or source
-// itself if a chain of length zero was found.
-//
-// source <- vp <- vp <- vp <- non_vp
-//                       ^^
-//                   return value
-NodeDef* GetTailOfValuePreservingChain(
-    const NodeDef* source, const NodeMap* node_map,
-    const std::unordered_set<string>& nodes_to_preserve) {
-  const NodeDef* source_parent = source;
-  if (!IsControlInput(source->input(0))) {
-    source = node_map->GetNode(source->input(0));
-    while (IsValuePreserving(*source) &&
-           node_map->GetOutputs(source->name()).size() == 1 &&
-           // Do not skip over preserved nodes, because folding will change
-           // the results of these skipped data-reordering nodes.
-           // TODO(jingyue): A more elegant way is to copy this chain of
-           // data-reordering nodes and modify only the copy.
-           !nodes_to_preserve.count(source->name())) {
-      source_parent = source;
-      if (IsControlInput(source->input(0))) {
-        break;
-      }
-      source = node_map->GetNode(source->input(0));
-    }
-  }
-  return const_cast<NodeDef*>(source_parent);
-}
 
 bool MaybeAddControlInput(const string& new_input, NodeDef* node,
                           GraphDef* graph, NodeMap* node_map) {
@@ -249,43 +201,6 @@ int CopyControlInputs(const NodeDef& from, NodeDef* to, GraphDef* graph,
   return num_copied;
 }
 
-// Returns the data type in attribute `attr_name` of `node`. If that attribute
-// doesn't exist, returns DT_INVALID.
-DataType GetDataTypeFromAttr(const NodeDef& node, const string& attr_name) {
-  if (!node.attr().count(attr_name)) {
-    return DT_INVALID;
-  }
-  const auto& attr = node.attr().at(attr_name);
-  if (attr.value_case() != AttrValue::kType) {
-    return DT_INVALID;
-  }
-  return attr.type();
-}
-
-bool IsCommutative(const NodeDef& node) {
-  if (node.op() == "Add" && node.input_size() > 0) {
-    // Workaround for "Add" not being marked is_commutative and is_aggregate.
-    // (See cl/173915048).
-    const auto type = GetDataTypeFromAttr(node, "T");
-    return type != DT_INVALID && type != DT_STRING;
-  }
-  const OpDef* op_def = nullptr;
-  const Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def);
-  return status.ok() && op_def->is_commutative();
-}
-
-bool IsAggregate(const NodeDef& node) {
-  if (node.op() == "Add" && node.input_size() > 0) {
-    // Workaround for "Add" not being marked is_commutative and is_aggregate.
-    // (See cl/173915048).
-    const auto type = GetDataTypeFromAttr(node, "T");
-    return type != DT_INVALID && type != DT_STRING;
-  }
-  const OpDef* op_def = nullptr;
-  const Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def);
-  return status.ok() && op_def->is_aggregate();
-}
-
 void SetDataTypeToAttr(DataType dtype, const string& attr_name, NodeDef* node) {
   (*node->mutable_attr())[attr_name].set_type(dtype);
 }
@@ -407,6 +322,18 @@ void AddFrameControlDeps(const NodeDef* old_node,
   }
 }
 
+NodeDef* GetTailOfValuePreservingChain(
+    const NodeDef& node, const NodeMap& node_map,
+    const std::unordered_set<string>& nodes_to_preserve) {
+  auto is_value_preserving_non_branching = [&](const NodeDef& node) {
+    return IsValuePreserving(node) &&
+           NumNonControlOutputs(node, node_map) == 1 &&
+           nodes_to_preserve.count(node.name()) == 0;
+  };
+  return GetTailOfChain(node, node_map, /*follow_control_input=*/false,
+                        is_value_preserving_non_branching);
+}
+
 }  // namespace
 
 class UniqueNodes {
@@ -591,7 +518,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
     // the two instances of the involution from the graph, since they cancel
     // each other.
     NodeDef* tail =
-        GetTailOfValuePreservingChain(node, node_map, nodes_to_preserve_);
+        GetTailOfValuePreservingChain(*node, *node_map, nodes_to_preserve_);
     NodeDef* involution = node_map->GetNode(tail->input(0));
     if (involution->op() == node->op()) {
       // Skip both *node and *involution since they cancel each other.
@@ -609,7 +536,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
 
   // Remove inverse transposes.
   if (node->op() == "Transpose" || node->op() == "ConjugateTranspose") {
-    const NodeDef* input = node_map->GetNode(node->input(0));
+    NodeDef* input = node_map->GetNode(node->input(0));
     if (input->op() == node->op()) {
       const NodeDef* node_perm = node_map->GetNode(node->input(1));
       const NodeDef* input_perm = node_map->GetNode(input->input(1));
@@ -798,7 +725,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
     // since the weights tend to be smaller than the activations.
     if (weights->op() == "Const") {
       const NodeDef* source = node_map->GetNode(
-          GetTailOfValuePreservingChain(node, node_map, nodes_to_preserve_)
+          GetTailOfValuePreservingChain(*node, *node_map, nodes_to_preserve_)
               ->input(0));
       if (source->op() == "Mul" &&
           node_map->GetOutputs(source->name()).size() == 1) {
@@ -1066,40 +993,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   return "";
 }
 
-namespace {
-// A vector with a set. The set stores the same elements as the vector, and
-// quickly answers whether a value is in the vector. Duplicated elements are not
-// allowed for now.
-template <class T>
-class SetVector {
- public:
-  // Returns false if value already existed in the set, true otherwise.
-  bool PushBack(const T& value) {
-    if (!set_.insert(value).second) {
-      VLOG(2) << "Value " << value << " is already in the set.";
-      return false;
-    }
-    vector_.push_back(value);
-    return true;
-  }
-
-  T PopBack() {
-    T back = vector_.back();
-    set_.erase(back);
-    vector_.pop_back();
-    return back;
-  }
-
-  bool Exists(const T& value) const { return set_.count(value); }
-
-  bool Empty() const { return vector_.empty(); }
-
- private:
-  std::unordered_set<T> set_;
-  std::vector<T> vector_;
-};
-}  // namespace
-
 Status ArithmeticOptimizer::SimplifyArithmeticOps(
     GraphDef* optimized_graph) const {
   NodeMap node_map(optimized_graph);
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 0cc4585ba4..7a9db9bebb 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -32,49 +32,6 @@ namespace tensorflow {
 namespace grappler {
 
 namespace {
-// A vector with a set. The set stores the same elements as the vector, and
-// quickly answers whether a value is in the vector. Duplicated elements are not
-// allowed for now.
-template <class T>
-class SetVector {
- public:
-  // Returns false if value already existed in the set, true otherwise.
-  bool PushBack(const T& value) {
-    if (!set_.insert(value).second) {
-      return false;
-    }
-    vector_.push_back(value);
-    return true;
-  }
-
-  T PopBack() {
-    T back = vector_.back();
-    set_.erase(back);
-    vector_.pop_back();
-    return back;
-  }
-
-  bool Exists(const T& value) const { return set_.count(value); }
-
-  bool Empty() const { return vector_.empty(); }
-
-  void Reserve(int64 size) { vector_.reserve(size); }
-
- private:
-  std::unordered_set<T> set_;
-  std::vector<T> vector_;
-};
-
-bool HasRegularOutputs(const NodeDef& node, const NodeMap& node_map) {
-  for (const NodeDef* output : node_map.GetOutputs(node.name())) {
-    for (const string& input : output->input()) {
-      if (!IsControlInput(input) && NodeName(input) == node.name()) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
 
 int RemoveInput(NodeDef* node, const string& input, NodeMap* node_map) {
   int num_removed = 0;
@@ -119,7 +76,7 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
   if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) {
     return false;
   }
-  if (!fetch_nodes_known_ || HasRegularOutputs(node, *node_map_)) {
+  if (!fetch_nodes_known_ || NumNonControlOutputs(node, *node_map_) > 0) {
     return false;
   }
   if (IsMerge(node) || IsSwitch(node)) {
diff --git a/tensorflow/core/grappler/optimizers/model_pruner.cc b/tensorflow/core/grappler/optimizers/model_pruner.cc
index b9df196f83..c9bec7890e 100644
--- a/tensorflow/core/grappler/optimizers/model_pruner.cc
+++ b/tensorflow/core/grappler/optimizers/model_pruner.cc
@@ -26,16 +26,6 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-int NumNonControlInputs(const NodeDef& node) {
-  int num_inputs = node.input_size();
-  for (int i = 0; i < node.input_size(); ++i) {
-    if (!node.input(i).empty() && node.input(i)[0] == '^') {
-      num_inputs--;
-    }
-  }
-  return num_inputs;
-}
-
 bool IsTrivialOp(const NodeDef& node) {
   // Remove the stop gradient nodes since they serve no purpose once the graph
   // is built. Also remove Identity ops.
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 66f8c537ed..7fd1876371 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/scanner.h"
@@ -247,5 +248,56 @@ int NumOutputs(const NodeDef& node) {
   return num_outputs;
 }
 
+int NumNonControlInputs(const NodeDef& node) {
+  int num_inputs = node.input_size();
+  for (int i = 0; i < node.input_size(); ++i) {
+    if (IsControlInput(node.input(i))) {
+      --num_inputs;
+    }
+  }
+  return num_inputs;
+}
+
+int NumNonControlOutputs(const NodeDef& node, const NodeMap& node_map) {
+  int num_outputs = 0;
+  for (const NodeDef* output : node_map.GetOutputs(node.name())) {
+    for (const string& input : output->input()) {
+      if (input == node.name()) {
+        ++num_outputs;
+      }
+    }
+  }
+  return num_outputs;
+}
+
+// Returns the data type in attribute `attr_name` of `node`. If that attribute
+// doesn't exist, returns DT_INVALID.
+DataType GetDataTypeFromAttr(const NodeDef& node, const string& attr_name) {
+  if (!node.attr().count(attr_name)) {
+    return DT_INVALID;
+  }
+  const auto& attr = node.attr().at(attr_name);
+  if (attr.value_case() != AttrValue::kType) {
+    return DT_INVALID;
+  }
+  return attr.type();
+}
+
+NodeDef* GetTailOfChain(const NodeDef& source, const NodeMap& node_map,
+                        bool follow_control_input,
+                        const std::function<bool(const NodeDef&)>& pred_fn) {
+  const NodeDef* current = &source;
+  const NodeDef* next = current;
+  while (next == &source || pred_fn(*next)) {
+    current = next;
+    if (current->input_size() == 0 ||
+        (!follow_control_input && IsControlInput(current->input(0)))) {
+      break;
+    }
+    next = node_map.GetNode(current->input(0));
+  }
+  return const_cast<NodeDef*>(current);
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index f9fb418140..b98b8656e2 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -17,12 +17,15 @@ limitations under the License.
 #define TENSORFLOW_GRAPPLER_UTILS_H_
 
 #include <functional>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
 
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/threadpool.h"
-#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -68,6 +71,39 @@ class OutputMap {
   std::unordered_map<string, std::unordered_map<NodeDef*, int>> outputs_;
 };
 
+// A vector with a set. The set stores the same elements as the vector, and
+// quickly answers whether a value is in the vector. Duplicated elements are not
+// allowed for now.
+template <class T>
+class SetVector {
+ public:
+  // Returns false if value already existed in the set, true otherwise.
+  bool PushBack(const T& value) {
+    if (!set_.insert(value).second) {
+      return false;
+    }
+    vector_.push_back(value);
+    return true;
+  }
+
+  T PopBack() {
+    T back = vector_.back();
+    set_.erase(back);
+    vector_.pop_back();
+    return back;
+  }
+
+  bool Exists(const T& value) const { return set_.find(value) != set_.end(); }
+
+  bool Empty() const { return vector_.empty(); }
+
+  void Reserve(int64 size) { vector_.reserve(size); }
+
+ private:
+  std::unordered_set<T> set_;
+  std::vector<T> vector_;
+};
+
 // True iff 'name' refers to a control inputs, i.e. a node name prefixed with
 // the ^ character.
 bool IsControlInput(const string& name);
@@ -109,10 +145,33 @@ string AsControlDependency(const NodeDef& node);
 // for control dependency, given a node name
 string AsControlDependency(const string& node);
 
-// Returns the number of outputs of a node. Note that some of the outputs may be
-// unconnected.
+// Returns the number of outputs of a node according to its OpDef. Note that
+// some of the outputs may be unconnected.
 int NumOutputs(const NodeDef& node);
 
+// Number of connected non-control inputs.
+int NumNonControlInputs(const NodeDef& node);
+
+// Number of connected non-control outputs.
+int NumNonControlOutputs(const NodeDef& node, const NodeMap& node_map);
+
+// Returns the data type in attribute `attr_name` of `node`. If that attribute
+// doesn't exist, returns DT_INVALID.
+DataType GetDataTypeFromAttr(const NodeDef& node, const string& attr_name);
+
+// Returns the last node in the simple chain starting at source and traversing
+// through the input(0) edge from each node as long as the next node satisfies
+// the predicate given in pred_fn. If no nodes satisfy the predicate, &source
+// will be returned. Example: For the chain
+//    source <- a <- b <- ... <- y <- z
+// where
+//    pred_fn(a) = pred_fn(b) = ... = pred_fn(y) = true,
+//    pred_fn(z) = false,
+// the return value will be a pointer to y.
+NodeDef* GetTailOfChain(const NodeDef& source, const NodeMap& node_map,
+                        bool follow_control_input,
+                        const std::function<bool(const NodeDef&)>& pred_fn);
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc
index 9d747fe7dc..77371c399e 100644
--- a/tensorflow/core/grappler/utils_test.cc
+++ b/tensorflow/core/grappler/utils_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/threadpool.h"
@@ -181,7 +182,7 @@ TEST_F(UtilsTest, NumOutputs) {
   EXPECT_EQ(1, NumOutputs(CreateDequeueNode()));
 }
 
-TEST(AsControlDependency, BasicTest) {
+TEST_F(UtilsTest, AsControlDependency) {
   NodeDef node;
   node.set_name("foo");
   EXPECT_EQ("^foo", AsControlDependency(node));
@@ -189,6 +190,65 @@ TEST(AsControlDependency, BasicTest) {
   EXPECT_EQ("^foo", AsControlDependency("^foo"));
 }
 
+TEST_F(UtilsTest, GetTailOfChain) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output c0 = ops::Const(s.WithOpName("c0"), {1.0f, 2.0f}, {1, 2});
+  Output c1 = ops::Const(s.WithOpName("c1"), {3.0f, 4.0f}, {1, 2});
+  // Add a node with only connected by control output.
+  Output neg0 = ops::Neg(s.WithOpName("neg0"), c1);
+  // Add a node with two outputs.
+  Output neg1 =
+      ops::Neg(s.WithControlDependencies(neg0).WithOpName("neg1"), c0);
+  Output neg2 = ops::Neg(s.WithOpName("neg2"), neg1);
+  Output id1 = ops::Identity(s.WithOpName("id1"), neg2);
+  Output id2 = ops::Identity(s.WithOpName("id2"), neg1);
+  auto noop = ops::NoOp(s.WithControlDependencies(neg0).WithOpName("noop"));
+  GraphDef graph;
+  TF_CHECK_OK(s.ToGraphDef(&graph));
+  LOG(INFO) << graph.DebugString();
+
+  ASSERT_EQ("c0", graph.node(0).name());
+  ASSERT_EQ("c1", graph.node(1).name());
+  ASSERT_EQ("neg0", graph.node(2).name());
+  ASSERT_EQ("neg1", graph.node(3).name());
+  ASSERT_EQ("neg2", graph.node(4).name());
+  ASSERT_EQ("id1", graph.node(5).name());
+  ASSERT_EQ("id2", graph.node(6).name());
+  ASSERT_EQ("noop", graph.node(7).name());
+
+  NodeMap node_map(&graph);
+  auto is_neg = [&](const NodeDef& node) { return node.op() == "Neg"; };
+  // We walk backwards, starting as "id1", so tail should be "neg1".
+  NodeDef* tail = GetTailOfChain(graph.node(5), node_map,
+                                 /*follow_control_input=*/false, is_neg);
+  EXPECT_NE(tail, nullptr);
+  EXPECT_EQ("neg1", tail->name());
+
+  // We stop at branching nodes, so tail should be "neg2".
+  auto is_neg_and_non_branching = [&](const NodeDef& node) {
+    return node.op() == "Neg" && NumNonControlOutputs(node, node_map) == 1;
+  };
+  tail =
+      GetTailOfChain(graph.node(5), node_map,
+                     /*follow_control_input=*/false, is_neg_and_non_branching);
+  EXPECT_NE(tail, nullptr);
+  EXPECT_EQ("neg2", tail->name());
+
+  // We walk backwards, starting from "noop", also following control inputs,
+  // so tail should be "neg0".
+  tail = GetTailOfChain(graph.node(7), node_map,
+                        /*follow_control_input=*/true, is_neg);
+  EXPECT_NE(tail, nullptr);
+  EXPECT_EQ("neg0", tail->name());
+
+  // We walk backwards, starting from "noop", not following control inputs,
+  // so tail should be "noop" itself.
+  tail = GetTailOfChain(graph.node(7), node_map,
+                        /*follow_control_input=*/false, is_neg);
+  EXPECT_NE(tail, nullptr);
+  EXPECT_EQ("noop", tail->name());
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 94c122b25f2d5c0695cb9e73c0f8eee9992286ed Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 16:27:36 -0800
Subject: [PATCH 0192/1225] Adds LinearEstimator and
 DNNLinearCombinedEstimator.

PiperOrigin-RevId: 176578390
---
 tensorflow/contrib/estimator/BUILD            |  77 ++++++
 tensorflow/contrib/estimator/__init__.py      |   4 +
 .../python/estimator/dnn_linear_combined.py   | 164 +++++++++++++
 .../estimator/dnn_linear_combined_test.py     | 220 ++++++++++++++++++
 .../estimator/python/estimator/linear.py      | 118 ++++++++++
 .../estimator/python/estimator/linear_test.py | 153 ++++++++++++
 6 files changed, 736 insertions(+)
 create mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/linear.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/linear_test.py

diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index fe643659d8..197cf7e56f 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -27,8 +27,10 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":dnn",
+        ":dnn_linear_combined",
         ":extenders",
         ":head",
+        ":linear",
         ":logit_fns",
         ":multi_head",
         ":replicate_model_fn",
@@ -73,6 +75,45 @@ py_test(
     ],
 )
 
+py_library(
+    name = "dnn_linear_combined",
+    srcs = ["python/estimator/dnn_linear_combined.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:nn",
+        "//tensorflow/python/estimator",
+        "//tensorflow/python/estimator:dnn_linear_combined",
+    ],
+)
+
+py_test(
+    name = "dnn_linear_combined_test",
+    size = "small",
+    srcs = ["python/estimator/dnn_linear_combined_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",
+    ],
+    deps = [
+        ":dnn_linear_combined",
+        ":head",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:nn",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:summary",
+        "//tensorflow/python/estimator:dnn_testing_utils",
+        "//tensorflow/python/estimator:export_export",
+        "//tensorflow/python/estimator:linear_testing_utils",
+        "//tensorflow/python/estimator:numpy_io",
+        "//tensorflow/python/estimator:prediction_keys",
+        "//tensorflow/python/feature_column",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "extenders",
     srcs = [
@@ -169,6 +210,42 @@ py_test(
     ],
 )
 
+py_library(
+    name = "linear",
+    srcs = ["python/estimator/linear.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python/estimator",
+        "//tensorflow/python/estimator:linear",
+    ],
+)
+
+py_test(
+    name = "linear_test",
+    size = "small",
+    srcs = ["python/estimator/linear_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",
+    ],
+    deps = [
+        ":head",
+        ":linear",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:summary",
+        "//tensorflow/python/estimator:export_export",
+        "//tensorflow/python/estimator:linear_testing_utils",
+        "//tensorflow/python/estimator:numpy_io",
+        "//tensorflow/python/estimator:prediction_keys",
+        "//tensorflow/python/feature_column",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "logit_fns",
     srcs = [
diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py
index cf727264cd..8191e06fae 100644
--- a/tensorflow/contrib/estimator/__init__.py
+++ b/tensorflow/contrib/estimator/__init__.py
@@ -20,8 +20,10 @@ from __future__ import print_function
 
 # pylint: disable=unused-import,line-too-long,wildcard-import
 from tensorflow.contrib.estimator.python.estimator.dnn import *
+from tensorflow.contrib.estimator.python.estimator.dnn_linear_combined import *
 from tensorflow.contrib.estimator.python.estimator.extenders import *
 from tensorflow.contrib.estimator.python.estimator.head import *
+from tensorflow.contrib.estimator.python.estimator.linear import *
 from tensorflow.contrib.estimator.python.estimator.logit_fns import *
 from tensorflow.contrib.estimator.python.estimator.multi_head import *
 
@@ -38,6 +40,8 @@ _allowed_symbols = [
     'multi_label_head',
     'regression_head',
     'DNNEstimator',
+    'DNNLinearCombinedEstimator',
+    'LinearEstimator',
     'call_logit_fn',
     'dnn_logit_fn_builder',
     'linear_logit_fn_builder',
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
new file mode 100644
index 0000000000..ccaf1128bf
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
@@ -0,0 +1,164 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TensorFlow estimator for Linear and DNN joined training models."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import dnn_linear_combined as dnn_linear_combined_lib
+from tensorflow.python.ops import nn
+
+
+class DNNLinearCombinedEstimator(estimator.Estimator):
+  """An estimator for TensorFlow Linear and DNN joined models with custom head.
+
+  Note: This estimator is also known as wide-n-deep.
+
+  Example:
+
+  ```python
+  numeric_feature = numeric_column(...)
+  categorical_column_a = categorical_column_with_hash_bucket(...)
+  categorical_column_b = categorical_column_with_hash_bucket(...)
+
+  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
+  categorical_feature_a_emb = embedding_column(
+      categorical_column=categorical_feature_a, ...)
+  categorical_feature_b_emb = embedding_column(
+      categorical_column=categorical_feature_b, ...)
+
+  estimator = DNNLinearCombinedEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      # wide settings
+      linear_feature_columns=[categorical_feature_a_x_categorical_feature_b],
+      linear_optimizer=tf.train.FtrlOptimizer(...),
+      # deep settings
+      dnn_feature_columns=[
+          categorical_feature_a_emb, categorical_feature_b_emb,
+          numeric_feature],
+      dnn_hidden_units=[1000, 500, 100],
+      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...))
+
+  # To apply L1 and L2 regularization, you can set optimizers as follows:
+  tf.train.ProximalAdagradOptimizer(
+      learning_rate=0.1,
+      l1_regularization_strength=0.001,
+      l2_regularization_strength=0.001)
+  # It is same for FtrlOptimizer.
+
+  # Input builders
+  def input_fn_train: # returns x, y
+    pass
+  estimator.train(input_fn=input_fn_train, steps=100)
+
+  def input_fn_eval: # returns x, y
+    pass
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    pass
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss is calculated by using mean squared error.
+
+  @compatibility(eager)
+  Estimators are not compatible with eager execution.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               head,
+               model_dir=None,
+               linear_feature_columns=None,
+               linear_optimizer='Ftrl',
+               dnn_feature_columns=None,
+               dnn_optimizer='Adagrad',
+               dnn_hidden_units=None,
+               dnn_activation_fn=nn.relu,
+               dnn_dropout=None,
+               input_layer_partitioner=None,
+               config=None):
+    """Initializes a DNNLinearCombinedEstimator instance.
+
+    Args:
+      head: A `_Head` instance constructed with a method such as
+        `tf.contrib.estimator.multi_label_head`.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+      linear_feature_columns: An iterable containing all the feature columns
+        used by linear part of the model. All items in the set must be
+        instances of classes derived from `FeatureColumn`.
+      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
+        the linear part of the model. Defaults to FTRL optimizer.
+      dnn_feature_columns: An iterable containing all the feature columns used
+        by deep part of the model. All items in the set must be instances of
+        classes derived from `FeatureColumn`.
+      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
+        the deep part of the model. Defaults to Adagrad optimizer.
+      dnn_hidden_units: List of hidden units per layer. All layers are fully
+        connected.
+      dnn_activation_fn: Activation function applied to each layer. If None,
+        will use `tf.nn.relu`.
+      dnn_dropout: When not None, the probability we will drop out
+        a given coordinate.
+      input_layer_partitioner: Partitioner for input layer. Defaults to
+        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+      config: RunConfig object to configure the runtime settings.
+
+    Raises:
+      ValueError: If both linear_feature_columns and dnn_features_columns are
+        empty at the same time.
+    """
+    linear_feature_columns = linear_feature_columns or []
+    dnn_feature_columns = dnn_feature_columns or []
+    self._feature_columns = (
+        list(linear_feature_columns) + list(dnn_feature_columns))
+    if not self._feature_columns:
+      raise ValueError('Either linear_feature_columns or dnn_feature_columns '
+                       'must be defined.')
+
+    def _model_fn(features, labels, mode, config):
+      return dnn_linear_combined_lib._dnn_linear_combined_model_fn(  # pylint: disable=protected-access
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          linear_feature_columns=linear_feature_columns,
+          linear_optimizer=linear_optimizer,
+          dnn_feature_columns=dnn_feature_columns,
+          dnn_optimizer=dnn_optimizer,
+          dnn_hidden_units=dnn_hidden_units,
+          dnn_activation_fn=dnn_activation_fn,
+          dnn_dropout=dnn_dropout,
+          input_layer_partitioner=input_layer_partitioner,
+          config=config)
+
+    super(DNNLinearCombinedEstimator, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config)
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py
new file mode 100644
index 0000000000..b5e4d34dc7
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py
@@ -0,0 +1,220 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for dnn_linear_combined.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import dnn_linear_combined
+from tensorflow.contrib.estimator.python.estimator import head as head_lib
+from tensorflow.python.estimator.canned import dnn_testing_utils
+from tensorflow.python.estimator.canned import linear_testing_utils
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import nn
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+
+
+def _dnn_only_estimator_fn(
+    hidden_units,
+    feature_columns,
+    model_dir=None,
+    label_dimension=1,
+    weight_column=None,
+    optimizer='Adagrad',
+    activation_fn=nn.relu,
+    dropout=None,
+    input_layer_partitioner=None,
+    config=None):
+  return dnn_linear_combined.DNNLinearCombinedEstimator(
+      head=head_lib.regression_head(
+          weight_column=weight_column, label_dimension=label_dimension),
+      model_dir=model_dir,
+      dnn_feature_columns=feature_columns,
+      dnn_optimizer=optimizer,
+      dnn_hidden_units=hidden_units,
+      dnn_activation_fn=activation_fn,
+      dnn_dropout=dropout,
+      input_layer_partitioner=input_layer_partitioner,
+      config=config)
+
+
+class DNNOnlyEstimatorEvaluateTest(
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_only_estimator_fn)
+
+
+class DNNOnlyEstimatorPredictTest(
+    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_only_estimator_fn)
+
+
+class DNNOnlyEstimatorTrainTest(
+    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_only_estimator_fn)
+
+
+def _linear_only_estimator_fn(
+    feature_columns,
+    model_dir=None,
+    label_dimension=1,
+    weight_column=None,
+    optimizer='Ftrl',
+    config=None,
+    partitioner=None):
+  return dnn_linear_combined.DNNLinearCombinedEstimator(
+      head=head_lib.regression_head(
+          weight_column=weight_column, label_dimension=label_dimension),
+      model_dir=model_dir,
+      linear_feature_columns=feature_columns,
+      linear_optimizer=optimizer,
+      input_layer_partitioner=partitioner,
+      config=config)
+
+
+class LinearOnlyEstimatorEvaluateTest(
+    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
+        self, _linear_only_estimator_fn)
+
+
+class LinearOnlyEstimatorPredictTest(
+    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
+        self, _linear_only_estimator_fn)
+
+
+class LinearOnlyEstimatorTrainTest(
+    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
+        self, _linear_only_estimator_fn)
+
+
+class DNNLinearCombinedEstimatorIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(
+      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, batch_size):
+    linear_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))]
+    dnn_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    est = dnn_linear_combined.DNNLinearCombinedEstimator(
+        head=head_lib.regression_head(label_dimension=label_dimension),
+        linear_feature_columns=linear_feature_columns,
+        dnn_feature_columns=dnn_feature_columns,
+        dnn_hidden_units=(2, 2),
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array([
+        x[prediction_keys.PredictionKeys.PREDICTIONS]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
+
+    # EXPORT
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/linear.py b/tensorflow/contrib/estimator/python/estimator/linear.py
new file mode 100644
index 0000000000..3bf4abe83d
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/linear.py
@@ -0,0 +1,118 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Linear estimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import linear as linear_lib
+
+
+class LinearEstimator(estimator.Estimator):
+  """An estimator for TensorFlow linear models with user-specified head.
+
+  Example:
+
+  ```python
+  categorical_column_a = categorical_column_with_hash_bucket(...)
+  categorical_column_b = categorical_column_with_hash_bucket(...)
+
+  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
+
+  # Estimator using the default optimizer.
+  estimator = LinearEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b])
+
+  # Or estimator using the FTRL optimizer with regularization.
+  estimator = LinearEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b])
+      optimizer=tf.train.FtrlOptimizer(
+          learning_rate=0.1,
+          l1_regularization_strength=0.001
+      ))
+
+  def input_fn_train: # returns x, y (where y represents label's class index).
+    ...
+  estimator.train(input_fn=input_fn_train, steps=100)
+  def input_fn_eval: # returns x, y (where y represents label's class index).
+    ...
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    ...
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * if `weight_column` is not `None`, a feature with
+    `key=weight_column` whose value is a `Tensor`.
+  * for each `column` in `feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss and predicted output are determined by the specified head.
+
+  @compatibility(eager)
+  Estimators are not compatible with eager execution.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               head,
+               feature_columns,
+               model_dir=None,
+               optimizer='Ftrl',
+               config=None,
+               partitioner=None):
+    """Initializes a `LinearEstimator` instance.
+
+    Args:
+      head: A `_Head` instance constructed with a method such as
+        `tf.contrib.estimator.multi_label_head`.
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `FeatureColumn`.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+      optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
+        to FTRL optimizer.
+      config: `RunConfig` object to configure the runtime settings.
+      partitioner: Optional. Partitioner for input layer.
+    """
+    def _model_fn(features, labels, mode, config):
+      return linear_lib._linear_model_fn(  # pylint: disable=protected-access
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          feature_columns=tuple(feature_columns or []),
+          optimizer=optimizer,
+          partitioner=partitioner,
+          config=config)
+    super(LinearEstimator, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config)
diff --git a/tensorflow/contrib/estimator/python/estimator/linear_test.py b/tensorflow/contrib/estimator/python/estimator/linear_test.py
new file mode 100644
index 0000000000..c63514eb68
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/linear_test.py
@@ -0,0 +1,153 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for linear.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import head as head_lib
+from tensorflow.contrib.estimator.python.estimator import linear
+from tensorflow.python.estimator.canned import linear_testing_utils
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+
+
+def _linear_estimator_fn(
+    weight_column=None, label_dimension=1, *args, **kwargs):
+  """Returns a LinearEstimator that uses regression_head."""
+  return linear.LinearEstimator(
+      head=head_lib.regression_head(
+          weight_column=weight_column, label_dimension=label_dimension),
+      *args, **kwargs)
+
+
+class LinearEstimatorEvaluateTest(
+    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
+        self, _linear_estimator_fn)
+
+
+class LinearEstimatorPredictTest(
+    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
+        self, _linear_estimator_fn)
+
+
+class LinearEstimatorTrainTest(
+    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
+        self, _linear_estimator_fn)
+
+
+class LinearEstimatorIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(
+      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, batch_size):
+    feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))]
+    est = linear.LinearEstimator(
+        head=head_lib.regression_head(label_dimension=label_dimension),
+        feature_columns=feature_columns,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array([
+        x[prediction_keys.PredictionKeys.PREDICTIONS]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
+
+    # EXPORT
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size)
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From 6c0dd242e76f494dbab42b5b7621d2513168c5bb Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Tue, 21 Nov 2017 16:58:23 -0800
Subject: [PATCH 0193/1225] Fix a non-critical problem in TFLite README

Currently it uses `bazel build` to build non-optimized binary,
then `bazel run` to build and run optimized binary. It doubles
the required build time to try Toco converter.

PiperOrigin-RevId: 176581770
---
 tensorflow/contrib/lite/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index b5df986686..385ccf4680 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -154,7 +154,7 @@ Here is a sample command line to convert the frozen Graphdef to '.lite' format f
 ```
 bazel build tensorflow/contrib/lite/toco:toco
 
-bazel run --config=opt tensorflow/contrib/lite/toco:toco -- \
+bazel-bin/tensorflow/contrib/lite/toco/toco -- \
   --input_file=(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \
   --input_format=TENSORFLOW_GRAPHDEF  --output_format=TFLITE \
   --output_file=/tmp/mobilenet_v1_1.0_224.lite --inference_type=FLOAT \
-- 
GitLab


From 88fe67d1432124cb74df8318603e4a82e3763fb3 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Tue, 21 Nov 2017 17:01:33 -0800
Subject: [PATCH 0194/1225] Make FlapMapDataset saveable. Add test to verify
 restoring iterator in an empty graph works to the testing base class. Fix a
 bug in GraphDefBuilderWrapper::AddFunction where we were not adding functions
 referenced by the attrs of the NodeDefs in the FunctionDef.
 FlatMapDatasetSerializationTest.testMapThenFlatMap makes sure this works now.

PiperOrigin-RevId: 176582131
---
 .../contrib/data/python/kernel_tests/BUILD    |   6 +
 .../kernel_tests/flat_map_dataset_op_test.py  |  79 ++++++++++
 tensorflow/core/kernels/dataset.h             |  42 +++--
 .../core/kernels/flat_map_dataset_op.cc       | 147 ++++++++++++++++--
 4 files changed, 254 insertions(+), 20 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 3b5f2db322..1923c0586a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -159,13 +159,19 @@ py_test(
     srcs = ["flat_map_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:function",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
         "//tensorflow/python:session",
         "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py
index c950e4857e..ddb4bc34f3 100644
--- a/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py
@@ -21,11 +21,18 @@ import random
 
 import numpy as np
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import dataset_ops
 from tensorflow.python.client import session
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import function
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import sparse_ops
+from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import test
 from tensorflow.python.training import server_lib
 
@@ -147,5 +154,77 @@ class FlatMapDatasetTest(test.TestCase):
         sess.run(get_next)
 
 
+class FlatMapDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def testCore(self):
+    # Complicated way of saying range(start, start+25).
+    def build_ds(start):
+
+      def map_fn(x):
+        return dataset_ops.Dataset.range(x, x + 5)
+
+      return dataset_ops.Dataset.range(start, start + 5 * 5, 5).flat_map(map_fn)
+
+    self.run_core_tests(lambda: build_ds(0), lambda: build_ds(10), 25)
+
+  def testMapThenFlatMap(self):
+
+    def build_ds():
+
+      def flat_map_fn(_):
+
+        def map_fn(y):
+          return 10 * math_ops.to_int32(y)
+
+        return dataset_ops.Dataset.range(100).map(map_fn)
+
+      return dataset_ops.Dataset.range(5).flat_map(flat_map_fn)
+
+    self.run_core_tests(build_ds, None, 500)
+
+  def testCaptureDefunInMapFn(self):
+
+    def build_ds():
+
+      def map_fn(x):
+
+        @function.Defun(dtypes.int64)
+        def defun_fn(x):
+          return constant_op.constant(1000) + math_ops.to_int32(x)
+
+        return dataset_ops.Dataset.from_tensor_slices([defun_fn(x)])
+
+      return dataset_ops.Dataset.range(100).flat_map(map_fn)
+
+    self.run_core_tests(build_ds, None, 100)
+
+  def testDisallowVariableCapture(self):
+
+    def build_ds():
+      test_var = variable_scope.get_variable(
+          name="test_var", shape=(), use_resource=True)
+      return dataset_ops.Dataset.range(5).flat_map(
+          lambda _: dataset_ops.Dataset.from_tensor_slices([test_var]))
+
+    self.verify_error_on_save(build_ds, 5, errors.InvalidArgumentError)
+
+  def testDisallowCapturingStatefulOps(self):
+
+    def build_ds():
+
+      def flat_map_fn(_):
+
+        def map_fn(x):
+          return random_ops.random_uniform(
+              (), 0, 10, dtype=dtypes.int32) * math_ops.to_int32(x)
+
+        return dataset_ops.Dataset.range(100).map(map_fn)
+
+      return dataset_ops.Dataset.range(5).flat_map(flat_map_fn)
+
+    self.verify_error_on_save(build_ds, 500, errors.InvalidArgumentError)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h
index c266bc07c1..18b57ec97a 100644
--- a/tensorflow/core/kernels/dataset.h
+++ b/tensorflow/core/kernels/dataset.h
@@ -238,19 +238,16 @@ class GraphDefBuilderWrapper {
       if (op_reg_data->is_function_op) {
         TF_RETURN_IF_ERROR(AddFunction(ctx, op_reg_data->op_def.name()));
       }
+      // Recursively add functions in attrs of this NodeDef.
+      for (const auto& pair : node_def.attr()) {
+        TF_RETURN_IF_ERROR(AddAttrFunctions(pair.second, ctx));
+      }
     }
 
     // Recursively add functions in attrs of function_name.
     for (auto iter = f_def->attr().begin(); iter != f_def->attr().end();
          iter++) {
-      const AttrValue& attr_value = iter->second;
-      if (attr_value.has_func()) {
-        TF_RETURN_IF_ERROR(AddFunction(ctx, attr_value.func().name()));
-      } else if (attr_value.has_list()) {
-        for (const NameAttrList& name_attr_list : attr_value.list().func()) {
-          TF_RETURN_IF_ERROR(AddFunction(ctx, name_attr_list.name()));
-        }
-      }
+      TF_RETURN_IF_ERROR(AddAttrFunctions(iter->second, ctx));
     }
     return Status::OK();
   }
@@ -279,6 +276,13 @@ class GraphDefBuilderWrapper {
     for (const NodeDef& node_def : function_def->node_def()) {
       const OpDef* op_def;
       TF_RETURN_IF_ERROR(lib_def->LookUpOpDef(node_def.op(), &op_def));
+      // TODO(b/65524810): Hack to allow functions to capture Dataset op
+      // nodes needed for FlatMap. Currently, source datasets nodes have been
+      // marked stateful to avoid constant folding since we do not have a
+      // good way of serializing them.
+      if (IsOpWhitelisted(op_def)) {
+        continue;
+      }
       if (op_def->is_stateful()) {
         return errors::InvalidArgument(
             "Op[name: ", node_def.name(), ", type: ", node_def.op(), "] ",
@@ -289,12 +293,21 @@ class GraphDefBuilderWrapper {
     return Status::OK();
   }
 
-  bool HasAttr(const string& op_type_name, const string& attr_name) {
+  bool IsOpWhitelisted(const OpDef* op_def) const {
+    return StringPiece(op_def->name()).ends_with("Dataset") &&
+           HasAttr(op_def, "output_shapes");
+  }
+
+  bool HasAttr(const string& op_type_name, const string& attr_name) const {
     const OpDef* op_def = nullptr;
     Status s = b_->opts().op_registry()->LookUpOpDef(op_type_name, &op_def);
     if (!s.ok() || op_def == nullptr) {
       return false;
     }
+    return HasAttr(op_def, attr_name);
+  }
+
+  bool HasAttr(const OpDef* op_def, const string& attr_name) const {
     for (auto attr : op_def->attr()) {
       if (attr.name() == attr_name) {
         return true;
@@ -303,6 +316,17 @@ class GraphDefBuilderWrapper {
     return false;
   }
 
+  Status AddAttrFunctions(const AttrValue& attr_value, OpKernelContext* ctx) {
+    if (attr_value.has_func()) {
+      TF_RETURN_IF_ERROR(AddFunction(ctx, attr_value.func().name()));
+    } else if (attr_value.has_list()) {
+      for (const NameAttrList& name_attr_list : attr_value.list().func()) {
+        TF_RETURN_IF_ERROR(AddFunction(ctx, name_attr_list.name()));
+      }
+    }
+    return Status::OK();
+  }
+
   GraphDefBuilder* b_;
 };
 
diff --git a/tensorflow/core/kernels/flat_map_dataset_op.cc b/tensorflow/core/kernels/flat_map_dataset_op.cc
index e62a43e94c..ac1689e5bf 100644
--- a/tensorflow/core/kernels/flat_map_dataset_op.cc
+++ b/tensorflow/core/kernels/flat_map_dataset_op.cc
@@ -54,18 +54,21 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
                                                  std::move(other_arguments),
                                                  &captured_func));
 
-    *output = new Dataset(input, std::move(captured_func), output_types_,
-                          output_shapes_);
+    *output = new Dataset(ctx, input, func_, std::move(captured_func),
+                          output_types_, output_shapes_);
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    Dataset(const DatasetBase* input,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes)
-        : input_(input),
+        : GraphDatasetBase(ctx),
+          input_(input),
+          func_(func),
           captured_func_(std::move(captured_func)),
           output_types_(output_types),
           output_shapes_(output_shapes) {
@@ -90,6 +93,37 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
 
     string DebugString() override { return "FlatMapDatasetOp::Dataset"; }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+
+      DataTypeVector other_arguments_types;
+      other_arguments_types.reserve(captured_func_->captured_inputs().size());
+      std::vector<NodeBuilder::NodeOut> other_arguments;
+      other_arguments.reserve(captured_func_->captured_inputs().size());
+      for (const Tensor& t : captured_func_->captured_inputs()) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        other_arguments.emplace_back(node);
+        other_arguments_types.emplace_back(t.dtype());
+      }
+      AttrValue f;
+      b->BuildAttrValue(func_, &f);
+      AttrValue other_arguments_types_attr;
+      b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
+
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this, {std::make_pair(0, input_graph_node)},  // Single tensor inputs.
+          {std::make_pair(1, other_arguments)},         // Tensor list inputs.
+          {std::make_pair("f", f),
+           std::make_pair("Targuments", other_arguments_types_attr)},  // Attrs
+          output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -102,6 +136,10 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
                              bool* end_of_sequence) override {
         mutex_lock l(mu_);
         do {
+          if (!input_impl_) {
+            *end_of_sequence = true;
+            return Status::OK();
+          }
           if (current_element_iterator_) {
             // We are currently precessing a mapped element, so try to get the
             // next subelement.
@@ -120,26 +158,113 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
           }
 
           // Get the next element from the input dataset.
-          std::vector<Tensor> args;
-          TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &args, end_of_sequence));
+          captured_func_inputs_.clear();
+          TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &captured_func_inputs_,
+                                                  end_of_sequence));
           if (*end_of_sequence) {
+            input_impl_.reset();
             return Status::OK();
           }
 
-          TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement(
-              ctx, args, element_index_++, dataset()->captured_func_.get(),
-              prefix(), &current_element_iterator_));
+          TF_RETURN_IF_ERROR(BuildCurrentElementIteratorLocked(ctx));
         } while (true);
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        if (input_impl_) {
+          TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("element_index"), element_index_));
+          if (current_element_iterator_) {
+            TF_RETURN_IF_ERROR(
+                writer->WriteScalar(full_name("captured_func_inputs_size"),
+                                    captured_func_inputs_.size()));
+            for (int i = 0; i < captured_func_inputs_.size(); i++) {
+              TF_RETURN_IF_ERROR(writer->WriteTensor(
+                  full_name(strings::StrCat("captured_func_inputs[", i, "]")),
+                  captured_func_inputs_[i]));
+            }
+            TF_RETURN_IF_ERROR(SaveParent(writer, current_element_iterator_));
+          } else {
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name("current_element_iterator_uninitialized"), ""));
+          }
+        } else {
+          TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("exhausted"), ""));
+        }
+        return Status::OK();
+      }
+
+      Status RestoreInternal(OpKernelContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        input_impl_.reset();
+        element_index_ = 0;
+        current_element_iterator_.reset();
+        captured_func_inputs_.clear();
+        if (!reader->Contains(full_name("exhausted"))) {
+          input_impl_ = dataset()->input_->MakeIterator(prefix());
+          TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+          {
+            int64 temp;
+            TF_RETURN_IF_ERROR(
+                reader->ReadScalar(full_name("element_index"), &temp));
+            element_index_ = temp;
+          }
+          if (!reader->Contains(
+                  full_name("current_element_iterator_uninitialized"))) {
+            size_t captured_func_inputs_size;
+            {
+              int64 temp;
+              TF_RETURN_IF_ERROR(reader->ReadScalar(
+                  full_name("captured_func_inputs_size"), &temp));
+              captured_func_inputs_size = static_cast<size_t>(temp);
+            }
+            captured_func_inputs_.reserve(captured_func_inputs_size);
+            for (int i = 0; i < captured_func_inputs_size; i++) {
+              captured_func_inputs_.emplace_back();
+              TF_RETURN_IF_ERROR(reader->ReadTensor(
+                  full_name(strings::StrCat("captured_func_inputs[", i, "]")),
+                  &captured_func_inputs_.back()));
+            }
+            element_index_--;
+            TF_RETURN_IF_ERROR(BuildCurrentElementIteratorLocked(ctx));
+            TF_RETURN_IF_ERROR(
+                RestoreParent(ctx, reader, current_element_iterator_));
+          }
+        }
+        return Status::OK();
+      }
+
      private:
+      Status BuildCurrentElementIteratorLocked(IteratorContext* ctx)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        return dataset::MakeIteratorFromInputElement(
+            ctx, captured_func_inputs_, element_index_++,
+            dataset()->captured_func_.get(), prefix(),
+            &current_element_iterator_);
+      }
+
+      Status BuildCurrentElementIteratorLocked(OpKernelContext* ctx)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        IteratorContext::Params params;
+        params.env = ctx->env();
+        params.runner = *(ctx->runner());
+        IteratorContext iter_ctx(std::move(params));
+        return BuildCurrentElementIteratorLocked(&iter_ctx);
+      }
+
       mutex mu_;
       size_t element_index_ GUARDED_BY(mu_) = 0;
-      const std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> current_element_iterator_ GUARDED_BY(mu_);
+      std::vector<Tensor> captured_func_inputs_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
+    const NameAttrList func_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
-- 
GitLab


From c518d35b9077bd193321f8b66dfb958ce9ab61cd Mon Sep 17 00:00:00 2001
From: Kay Zhu <kayzhu@google.com>
Date: Tue, 21 Nov 2017 17:02:29 -0800
Subject: [PATCH 0195/1225] [XLA] Enable explicit broadcast for ternary
 operations.

Also explicitly broadcast constant 1 in algsimp for pow(x, -1) => 1/x transformation, so that:
  - we can avoid implicit broadcast which we are trying to eliminate at HLO level.
  - interpreter, which does not support implicit broadcast, now passes the PowSpecialF32 test case in array_elementwise_ops_test which generates a divide(1.F32[], param.F[4]) instruction that requires implicit broadcast.

PiperOrigin-RevId: 176582286
---
 .../xla/service/algebraic_simplifier.cc       |  8 +++++++-
 .../xla/service/algebraic_simplifier_test.cc  |  6 ++++--
 .../compiler/xla/service/user_computation.cc  | 19 +++++++++++++++++++
 tensorflow/compiler/xla/tests/BUILD           |  1 +
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 1764f7f3dc..5dcc1318c9 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -1108,9 +1108,15 @@ Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) {
   if (IsAll(rhs, -1)) {
     auto* one = computation_->AddInstruction(HloInstruction::CreateConstant(
         Literal::One(rhs->shape().element_type()).CloneToUnique()));
+
+    // Explicitly broadcast scalar 1 to the output shape, to avoid implicit
+    // broadcast in divide HLO as we are trying to eliminate implicit
+    // broadcasting at HLO level.
+    auto* broadcast_one = computation_->AddInstruction(
+        HloInstruction::CreateBroadcast(power->shape(), one, {}));
     return ReplaceWithNewInstruction(
         power, HloInstruction::CreateBinary(power->shape(), HloOpcode::kDivide,
-                                            one, lhs));
+                                            broadcast_one, lhs));
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 620f0a54fa..097f30be32 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -761,8 +761,10 @@ TEST_F(AlgebraicSimplifierTest, PowNegative1) {
   ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
 
   HloInstruction* root = computation->root_instruction();
-  EXPECT_THAT(root, op::Divide(op::Constant(), param0));
-  EXPECT_EQ(root->operand(0)->literal().GetFirstElement<float>(), 1);
+  EXPECT_THAT(root, op::Divide(op::Broadcast(), param0));
+  EXPECT_EQ(root->operand(0)->opcode(), HloOpcode::kBroadcast);
+  EXPECT_EQ(root->operand(0)->operand(0)->literal().GetFirstElement<float>(),
+            1);
 }
 
 TEST_F(AlgebraicSimplifierTest, ReshapeBroadcast) {
diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index 8f63c92e5b..b449b4f288 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -2978,6 +2978,25 @@ void ComputationLowerer::Visit(
       HloInstruction* rhs = lookup_instruction(ternary_op_request.rhs());
       HloInstruction* ehs = lookup_instruction(ternary_op_request.ehs());
       auto hlo_opcode = TernaryOperationToHloOpcode(ternary_op_request.triop());
+
+      if (debug_options_.xla_eliminate_hlo_implicit_broadcast()) {
+        if (!ShapeUtil::SameDimensions(request.output_shape(), lhs->shape())) {
+          // lhs side is being implicitly broadcast. Change to explicit.
+          lhs =
+              ImplicitBroadcastToExplicitBroadcast(lhs, request.output_shape());
+        }
+
+        if (!ShapeUtil::SameDimensions(request.output_shape(), rhs->shape())) {
+          rhs =
+              ImplicitBroadcastToExplicitBroadcast(rhs, request.output_shape());
+        }
+
+        if (!ShapeUtil::SameDimensions(request.output_shape(), ehs->shape())) {
+          ehs =
+              ImplicitBroadcastToExplicitBroadcast(ehs, request.output_shape());
+        }
+      }
+
       hlo_instruction = add_instruction(HloInstruction::CreateTernary(
           request.output_shape(), hlo_opcode, lhs, rhs, ehs));
       break;
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 2e220e7293..aa1804cc21 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -511,6 +511,7 @@ xla_test(
     name = "array_elementwise_ops_test",
     srcs = ["array_elementwise_ops_test.cc"],
     shard_count = 25,
+    tags = ["enable_for_xla_interpreter"],
     deps = [
         "//tensorflow/compiler/xla:array2d",
         "//tensorflow/compiler/xla:array3d",
-- 
GitLab


From ecfd154cacf9d886b02b91bc7f518e75e5f9c6b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 17:22:18 -0800
Subject: [PATCH 0196/1225] Changed StringPiece::Hasher to StringPieceHasher in
 various places. This will allow the Hasher alias to be removed from
 StringPiece.

PiperOrigin-RevId: 176584316
---
 tensorflow/contrib/tensorboard/db/summary_db_writer.cc | 2 +-
 tensorflow/core/lib/core/stringpiece_test.cc           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
index 18f0f1e97a..37a32acb1e 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
@@ -278,7 +278,7 @@ class GraphSaver {
   GraphDef* graph_;
   int64 graph_id_;
   std::vector<string> name_copies_;
-  std::unordered_map<StringPiece, int64, StringPiece::Hasher> name_to_node_id_;
+  std::unordered_map<StringPiece, int64, StringPieceHasher> name_to_node_id_;
 };
 
 class SummaryDbWriter : public SummaryWriterInterface {
diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc
index 11554554e8..a1d335c4e1 100644
--- a/tensorflow/core/lib/core/stringpiece_test.cc
+++ b/tensorflow/core/lib/core/stringpiece_test.cc
@@ -95,7 +95,7 @@ TEST(StringPieceHasher, HashMap) {
   StringPiece p2(s2);
   StringPiece p3(s3);
 
-  std::unordered_map<StringPiece, int, StringPiece::Hasher> map;
+  std::unordered_map<StringPiece, int, StringPieceHasher> map;
 
   map.insert(std::make_pair(p1, 0));
   map.insert(std::make_pair(p2, 1));
-- 
GitLab


From 6ad9e45abf68f62caf7a9fe7adb7ef30da5ea6ca Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Tue, 21 Nov 2017 17:22:49 -0800
Subject: [PATCH 0197/1225] Support depthwise conv ops.

PiperOrigin-RevId: 176584368
---
 tensorflow/core/grappler/op_types.cc          | 15 +++++++
 tensorflow/core/grappler/op_types.h           |  3 ++
 .../grappler/optimizers/layout_optimizer.cc   | 35 ++++++++++------
 .../python/grappler/layout_optimizer_test.py  | 40 ++++++++++++++-----
 4 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 3a39045a4a..1b23a4caba 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -69,6 +69,21 @@ bool IsConv2DBackpropInput(const NodeDef& node) {
   return op == "Conv2DBackpropInput";
 }
 
+bool IsDepthwiseConv2dNative(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "DepthwiseConv2dNative";
+}
+
+bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "DepthwiseConv2dNativeBackpropFilter";
+}
+
+bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "DepthwiseConv2dNativeBackpropInput";
+}
+
 bool IsDequeueOp(const NodeDef& node) {
   const auto& op = node.op();
   return op == "QueueDequeueManyV2" || op == "QueueDequeueMany" ||
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index b7a55f3f21..85260efa93 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -31,6 +31,9 @@ bool IsConstant(const NodeDef& node);
 bool IsConv2D(const NodeDef& node);
 bool IsConv2DBackpropFilter(const NodeDef& node);
 bool IsConv2DBackpropInput(const NodeDef& node);
+bool IsDepthwiseConv2dNative(const NodeDef& node);
+bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node);
+bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node);
 bool IsDequeueOp(const NodeDef& node);
 bool IsEnter(const NodeDef& node);
 bool IsExit(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index aaa1b7a316..b00e1cb011 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -45,18 +45,22 @@ const char kReshapeConst[] = "LayoutOptimizerReshapeConst";
 const char kReductionConst[] = "LayoutOptimizerReductionConst";
 
 std::set<string> GetOpsFormatSupported() {
-  std::set<string> ops_format_supported = {"AvgPool",
-                                           "AvgPoolGrad",
-                                           "Conv2D",
-                                           "Conv2DBackpropFilter",
-                                           "Conv2DBackpropInput",
-                                           "BiasAdd",
-                                           "BiasAddGrad",
-                                           "FusedBatchNorm",
-                                           "FusedBatchNormGrad",
-                                           "FusedConv2DBiasActivation",
-                                           "MaxPool",
-                                           "MaxPoolGrad"};
+  std::set<string> ops_format_supported = {
+      "AvgPool",
+      "AvgPoolGrad",
+      "Conv2D",
+      "Conv2DBackpropFilter",
+      "Conv2DBackpropInput",
+      "BiasAdd",
+      "BiasAddGrad",
+      "DepthwiseConv2dNative",
+      "DepthwiseConv2dNativeBackpropInput",
+      "DepthwiseConv2dNativeBackpropFilter",
+      "FusedBatchNorm",
+      "FusedBatchNormGrad",
+      "FusedConv2DBiasActivation",
+      "MaxPool",
+      "MaxPoolGrad"};
   return ops_format_supported;
 }
 
@@ -1289,6 +1293,13 @@ class DataLayoutOptimizer : GraphProcessor {
         } else if (IsConv2DBackpropInput(*node)) {
           node_processor.reset(
               new Conv2DBackpropInputProcessor(opt_cxt, config_.no_gemm));
+        } else if (IsDepthwiseConv2dNative(*node)) {
+          node_processor.reset(new Conv2DProcessor(opt_cxt, true));
+        } else if (IsDepthwiseConv2dNativeBackpropFilter(*node)) {
+          node_processor.reset(
+              new Conv2DBackpropFilterProcessor(opt_cxt, true));
+        } else if (IsDepthwiseConv2dNativeBackpropInput(*node)) {
+          node_processor.reset(new Conv2DBackpropInputProcessor(opt_cxt, true));
         } else if (IsFusedBatchNormGradV1(*node)) {
           node_processor.reset(new FusedBatchNormGradProcessor(opt_cxt));
         } else if (IsMaxPoolGradV1(*node)) {
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 350c8434ce..626e0502cb 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -102,11 +102,12 @@ def _get_config(layout_optimizer=True):
   return config
 
 
-def _simple_metagraph():
+def _simple_metagraph(depthwise=False):
   random_seed.set_random_seed(0)
   x = variables.Variable(random_ops.truncated_normal([1, 200, 200, 3], seed=0))
-  y = conv_layers.conv2d(x, 32, [3, 3])
-  z = conv_layers.conv2d(y, 32, [3, 3])
+  conv = conv_layers.separable_conv2d if depthwise else conv_layers.conv2d
+  y = conv(x, 32, [3, 3])
+  z = conv(y, 32, [3, 3])
   optimizer = gradient_descent.GradientDescentOptimizer(1e-4)
   loss = math_ops.reduce_mean(z)
   train_op = optimizer.minimize(loss)
@@ -116,6 +117,15 @@ def _simple_metagraph():
   return meta_graph
 
 
+def _get_cluster():
+  named_device = device_properties_pb2.NamedDevice()
+  named_device.name = '/GPU:0'
+  named_device.properties.type = 'GPU'
+  named_device.properties.environment['architecture'] = '4'
+  cluster = gcluster.Cluster(devices=[named_device])
+  return cluster
+
+
 class LayoutOptimizerTest(test.TestCase):
   """Tests the Grappler layout optimizer."""
 
@@ -202,13 +212,8 @@ class LayoutOptimizerTest(test.TestCase):
     meta_graph = _simple_metagraph()
     rewrite_options = rewriter_config_pb2.RewriterConfig(
         layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
-    named_device = device_properties_pb2.NamedDevice()
-    named_device.name = '/GPU:0'
-    named_device.properties.type = 'GPU'
-    named_device.properties.environment['architecture'] = '4'
-    cluster = gcluster.Cluster(devices=[named_device])
     optimized_graph = tf_optimizer.OptimizeGraph(
-        rewrite_options, meta_graph, cluster=cluster)
+        rewrite_options, meta_graph, cluster=_get_cluster())
 
     found = 0
     for node in optimized_graph.node:
@@ -217,6 +222,23 @@ class LayoutOptimizerTest(test.TestCase):
         self.assertEqual(node.attr['data_format'].s, 'NCHW')
     self.assertEqual(found, 5)
 
+  def testDepthwise(self):
+    meta_graph = _simple_metagraph(depthwise=True)
+    rewrite_options = rewriter_config_pb2.RewriterConfig(
+        layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
+    optimized_graph = tf_optimizer.OptimizeGraph(
+        rewrite_options, meta_graph, cluster=_get_cluster())
+
+    found = 0
+    for node in optimized_graph.node:
+      if node.op in [
+          'DepthwiseConv2dNative', 'DepthwiseConv2dNativeBackpropFilter',
+          'DepthwiseConv2dNativeBackpropInput'
+      ]:
+        found += 1
+        self.assertEqual(node.attr['data_format'].s, 'NCHW')
+    self.assertEqual(found, 6)
+
   def testCheckpointCompatibility(self):
     if not test.is_gpu_available(cuda_only=True):
       self.skipTest('GPU required')
-- 
GitLab


From f1c2ed214dc470ef22ecd7a7c977f783c533e977 Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Tue, 21 Nov 2017 18:16:29 -0800
Subject: [PATCH 0198/1225] Test combining tensors from different graphs

PiperOrigin-RevId: 176589756
---
 tensorflow/python/framework/ops_test.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 7ecdea8275..a4780fdc05 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -577,6 +577,16 @@ class OperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(len(z.op.op_def.input_arg), 2)
     self.assertEqual(len(z.op.op_def.output_arg), 1)
 
+  def testInputFromDifferentGraphError(self):
+    g_0 = ops.Graph()
+    g_1 = ops.Graph()
+    with g_0.as_default():
+      x = constant_op.constant(1)
+    with g_1.as_default():
+      y = constant_op.constant(2)
+      with self.assertRaisesRegexp(ValueError, "must be from the same graph"):
+        y * x  # pylint: disable=pointless-statement
+
 
 @test_util.with_c_api
 class CreateOpTest(test_util.TensorFlowTestCase):
-- 
GitLab


From 59998d9150271e6e051a2ca638fb6041f8018224 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Tue, 21 Nov 2017 18:53:53 -0800
Subject: [PATCH 0199/1225] Preserve fetch nodes.

PiperOrigin-RevId: 176594442
---
 tensorflow/core/grappler/grappler_item.cc     |  2 +
 .../grappler/optimizers/layout_optimizer.cc   | 73 ++++++++++++-------
 .../grappler/optimizers/layout_optimizer.h    |  1 +
 .../optimizers/layout_optimizer_test.cc       | 15 ++++
 4 files changed, 64 insertions(+), 27 deletions(-)

diff --git a/tensorflow/core/grappler/grappler_item.cc b/tensorflow/core/grappler/grappler_item.cc
index 844a1fa328..149f6fc735 100644
--- a/tensorflow/core/grappler/grappler_item.cc
+++ b/tensorflow/core/grappler/grappler_item.cc
@@ -72,9 +72,11 @@ std::vector<const NodeDef*> GrapplerItem::MainVariables() const {
 std::unordered_set<string> GrapplerItem::NodesToPreserve() const {
   std::unordered_set<string> result;
   for (const string& f : fetch) {
+    VLOG(1) << "Add fetch " << f;
     result.insert(NodeName(f));
   }
   for (const auto& f : feed) {
+    VLOG(1) << "Add feed " << f.first;
     result.insert(NodeName(f.first));
   }
   for (const auto& node : init_ops) {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index b00e1cb011..89ebd8e98f 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -109,8 +109,11 @@ bool IsMaxPoolGradV1(const NodeDef& node) {
 
 class GraphProcessor {
  public:
-  GraphProcessor(GraphDef* graph, NodeMap* node_map)
-      : graph_(graph), node_map_(node_map) {}
+  GraphProcessor(GraphDef* graph, NodeMap* node_map,
+                 const std::unordered_set<string>& nodes_to_preserve)
+      : graph_(graph),
+        node_map_(node_map),
+        nodes_to_preserve_(nodes_to_preserve) {}
 
  protected:
   NodeDef* AddNodePermConst(const string& name, const string& device,
@@ -174,27 +177,30 @@ class GraphProcessor {
 
   GraphDef* graph_;
   NodeMap* node_map_;
-
- private:
+  const std::unordered_set<string>& nodes_to_preserve_;
 };
 
 struct OptimizeContext {
   OptimizeContext(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                  const std::unordered_set<string>& nodes_to_preserve,
                   bool is_in_frame)
       : graph(graph),
         node(node),
         node_map(node_map),
+        nodes_to_preserve(nodes_to_preserve),
         is_in_frame(is_in_frame) {}
   GraphDef* graph;
   NodeDef* node;
   NodeMap* node_map;
+  const std::unordered_set<string>& nodes_to_preserve;
   bool is_in_frame;
 };
 
 class NodeProcessor : public GraphProcessor {
  public:
   explicit NodeProcessor(const OptimizeContext& opt_cxt)
-      : GraphProcessor(opt_cxt.graph, opt_cxt.node_map),
+      : GraphProcessor(opt_cxt.graph, opt_cxt.node_map,
+                       opt_cxt.nodes_to_preserve),
         node_(opt_cxt.node),
         is_in_frame_(opt_cxt.is_in_frame) {}
   virtual ~NodeProcessor() {}
@@ -246,8 +252,12 @@ class NodeProcessor : public GraphProcessor {
     return Status::OK();
   }
 
+  bool MustPreserve() const {
+    return nodes_to_preserve_.find(node_->name()) != nodes_to_preserve_.end();
+  }
+
   virtual bool ShouldProcess() const {
-    return IsNHWC() && IsDimsFour(*node_) && HasOutputs();
+    return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs();
   }
 
   void UpdateAttrDataFormat() {
@@ -523,6 +533,9 @@ class BiasAddGradProcessor : public NodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
+    if (MustPreserve()) {
+      return false;
+    }
     auto input = node_map_->GetNode(node_->input(0));
     if (input) {
       if ((IsNHWC() && IsDimsFour(*input)) || IsNodeNCHWToNHWC(input->name())) {
@@ -542,7 +555,7 @@ class Conv2DProcessor : public NodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return IsNHWC() && IsDimsFour(*node_) && HasOutputs() &&
+    return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs() &&
            (!IsGemmUsed() || no_gemm_);
   }
 
@@ -679,7 +692,8 @@ class AgnosticNodeProcessor : public NodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC();
+    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
+           IsNodeAfterNCHWToNHWC();
   }
 
   bool IsNodeAfterNCHWToNHWC() const {
@@ -729,7 +743,8 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
+    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
+           IsNodeAfterNCHWToNHWC() &&
            (Is4DOperateWithND(4) || Is4DOperateWithScalar() ||
             Is4DOperateWithVector());
   }
@@ -839,8 +854,8 @@ class ConcatProcessor : public AgnosticNodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
-           IsAlongDimC();
+    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
+           IsNodeAfterNCHWToNHWC() && IsAlongDimC();
   }
 
   std::vector<int> GetInputPos() const override {
@@ -904,8 +919,8 @@ class PadProcessor : public AgnosticNodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return IsDimsFour(*node_) && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
-           PaddingSupported();
+    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
+           IsNodeAfterNCHWToNHWC() && PaddingSupported();
   }
   Status CustomizedProcessing() override { return UpdateAttrValueOfInput(1); }
 
@@ -1116,8 +1131,8 @@ class SqueezeProcessor : public AgnosticNodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return IsDimsN(*node_, 2) && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
-           IsInputConvertible() && IsAlongDimHW();
+    return !MustPreserve() && IsDimsN(*node_, 2) && HasOutputs() &&
+           IsNodeAfterNCHWToNHWC() && IsInputConvertible() && IsAlongDimHW();
   }
 
   Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
@@ -1166,7 +1181,7 @@ class SumProcessor : public AgnosticNodeProcessor {
  protected:
   bool ShouldProcess() const override {
     auto input0 = node_map_->GetNode(node_->input(0));
-    return HasOutputs() && IsNodeAfterNCHWToNHWC() &&
+    return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
            (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) &&
            IsAlongDimNHW();
   }
@@ -1227,12 +1242,13 @@ class SumProcessor : public AgnosticNodeProcessor {
 
 class DataLayoutOptimizer : GraphProcessor {
  public:
-  explicit DataLayoutOptimizer(const string& default_device, GraphDef* graph,
-                               NodeMap* node_map,
-                               LayoutOptimizer::TuningConfig config)
-      : GraphProcessor(graph, node_map),
-        default_device_(default_device),
-        config_(config) {}
+  explicit DataLayoutOptimizer(
+      LayoutOptimizer::TuningConfig config,
+      const std::unordered_set<string>& nodes_to_preserve,
+      const string& default_device, GraphDef* graph, NodeMap* node_map)
+      : GraphProcessor(graph, node_map, nodes_to_preserve),
+        config_(config),
+        default_device_(default_device) {}
 
   Status Optimize() {
     LOG(INFO) << "Number of nodes for original graph: " << graph_->node_size();
@@ -1279,7 +1295,8 @@ class DataLayoutOptimizer : GraphProcessor {
           ops_format_supported.end()) {
         auto node = graph_->mutable_node(i);
         bool is_in_frame = !frames[node].empty();
-        OptimizeContext opt_cxt(graph_, node, node_map_, is_in_frame);
+        OptimizeContext opt_cxt(graph_, node, node_map_, nodes_to_preserve_,
+                                is_in_frame);
         std::unique_ptr<NodeProcessor> node_processor;
         if (IsAvgPoolGrad(*node)) {
           node_processor.reset(new AvgPoolGradProcessor(opt_cxt));
@@ -1326,7 +1343,8 @@ class DataLayoutOptimizer : GraphProcessor {
             ops_format_agnostic.end()) {
           auto node = graph_->mutable_node(i);
           bool is_in_frame = !frames[node].empty();
-          OptimizeContext opt_cxt(graph_, node, node_map_, is_in_frame);
+          OptimizeContext opt_cxt(graph_, node, node_map_, nodes_to_preserve_,
+                                  is_in_frame);
           std::unique_ptr<NodeProcessor> node_processor;
           if (IsAddN(*node)) {
             node_processor.reset(new AddNProcessor(opt_cxt));
@@ -1401,8 +1419,8 @@ class DataLayoutOptimizer : GraphProcessor {
     return Status::OK();
   }
 
-  string default_device_;
   LayoutOptimizer::TuningConfig config_;
+  string default_device_;
 };
 
 int GetNumTranspose(const GraphDef& graph) {
@@ -1445,8 +1463,8 @@ Status LayoutOptimizer::Tune(const GrapplerItem& item,
     return status;
   }
   NodeMap node_map(output);
-  DataLayoutOptimizer layout_optimizer(default_device, output, &node_map,
-                                       config);
+  DataLayoutOptimizer layout_optimizer(config, nodes_to_preserve_,
+                                       default_device, output, &node_map);
   status = layout_optimizer.Optimize();
   return status;
 }
@@ -1459,6 +1477,7 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     return Status::OK();
   }
 
+  nodes_to_preserve_ = item.NodesToPreserve();
   GraphProperties graph_properties(item);
   auto status = graph_properties.InferStatically();
   if (!status.ok()) {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.h b/tensorflow/core/grappler/optimizers/layout_optimizer.h
index a22fadd9e7..f5dd70356a 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.h
@@ -47,6 +47,7 @@ class LayoutOptimizer : public GraphOptimizer {
                 const GraphDef& optimize_output, double result) override;
 
  private:
+  std::unordered_set<string> nodes_to_preserve_;
   Status Tune(const GrapplerItem& item, const GraphProperties& graph_properties,
               const string& default_device, const TuningConfig& config,
               GraphDef* output);
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 156e6710a6..5d2d90b193 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -263,6 +263,21 @@ TEST_F(LayoutOptimizerTest, Connectivity) {
   EXPECT_EQ(node_i2_output->input(0), "i1");
 }
 
+TEST_F(LayoutOptimizerTest, PreserveFetch) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto i = ops::Identity(s.WithOpName("i"), conv);
+  GrapplerItem item;
+  item.fetch.push_back("Conv2D");
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto conv_node = node_map.GetNode("Conv2D");
+  EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 8b4b63b9710a3d242dad7640bc3fd7c6515b41fa Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Tue, 21 Nov 2017 19:34:35 -0800
Subject: [PATCH 0200/1225] Add tf.contrib.data.Counter.

PiperOrigin-RevId: 176597546
---
 tensorflow/contrib/data/__init__.py           |  4 ++
 .../kernel_tests/range_dataset_op_test.py     | 22 ++++++++
 tensorflow/contrib/data/python/ops/BUILD      |  1 +
 tensorflow/contrib/data/python/ops/counter.py | 52 +++++++++++++++++++
 .../contrib/data/python/ops/scan_ops.py       |  4 +-
 5 files changed, 81 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/contrib/data/python/ops/counter.py

diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index 6e43ae0e63..7c6244f22b 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -17,6 +17,7 @@
 See the @{$datasets$Importing Data} Programmer's Guide for an overview.
 
 @@Dataset
+@@Counter
 @@Iterator
 @@TFRecordDataset
 @@FixedLengthRecordDataset
@@ -33,6 +34,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview.
 @@unbatch
 @@parallel_interleave
 @@rejection_resample
+@@scan
 @@sloppy_interleave
 
 @@get_single_element
@@ -48,6 +50,7 @@ from tensorflow.contrib.data.python.ops.batching import batch_and_drop_remainder
 from tensorflow.contrib.data.python.ops.batching import dense_to_sparse_batch
 from tensorflow.contrib.data.python.ops.batching import padded_batch_and_drop_remainder
 from tensorflow.contrib.data.python.ops.batching import unbatch
+from tensorflow.contrib.data.python.ops.counter import Counter
 from tensorflow.contrib.data.python.ops.dataset_ops import Dataset
 from tensorflow.contrib.data.python.ops.dataset_ops import get_single_element
 from tensorflow.contrib.data.python.ops.enumerate_ops import enumerate_dataset
@@ -62,6 +65,7 @@ from tensorflow.contrib.data.python.ops.readers import SqlDataset
 from tensorflow.contrib.data.python.ops.readers import TextLineDataset
 from tensorflow.contrib.data.python.ops.readers import TFRecordDataset
 from tensorflow.contrib.data.python.ops.resampling import rejection_resample
+from tensorflow.contrib.data.python.ops.scan_ops import scan
 from tensorflow.python.data.ops.iterator_ops import Iterator
 # pylint: enable=unused-import
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
index f59ac760dc..8e6ad061a1 100644
--- a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import os
 
+from tensorflow.contrib.data.python.ops import counter
 from tensorflow.contrib.data.python.ops import dataset_ops
 from tensorflow.contrib.data.python.ops import enumerate_ops
 from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
@@ -194,6 +195,27 @@ class RangeDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def testCounter(self):
+    """Test dataset construction using `count`."""
+    iterator = (counter.Counter(start=3, step=4)
+                .make_one_shot_iterator())
+    get_next = iterator.get_next()
+    self.assertEqual([], get_next.shape.as_list())
+    self.assertEqual(dtypes.int64, get_next.dtype)
+
+    negative_iterator = (counter.Counter(start=0, step=-1)
+                         .make_one_shot_iterator())
+    negative_get_next = negative_iterator.get_next()
+
+    with self.test_session() as sess:
+      self.assertEqual(3, sess.run(get_next))
+      self.assertEqual(3 + 4, sess.run(get_next))
+      self.assertEqual(3 + 2 * 4, sess.run(get_next))
+
+      self.assertEqual(0, sess.run(negative_get_next))
+      self.assertEqual(-1, sess.run(negative_get_next))
+      self.assertEqual(-2, sess.run(negative_get_next))
+
   def _iterator_checkpoint_prefix(self):
     return os.path.join(self.get_temp_dir(), "iterator")
 
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index 86035f3a69..25ed58cdf5 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -14,6 +14,7 @@ load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 py_library(
     name = "dataset_ops",
     srcs = [
+        "counter.py",
         "dataset_ops.py",
     ],
     srcs_version = "PY2AND3",
diff --git a/tensorflow/contrib/data/python/ops/counter.py b/tensorflow/contrib/data/python/ops/counter.py
new file mode 100644
index 0000000000..63226fe781
--- /dev/null
+++ b/tensorflow/contrib/data/python/ops/counter.py
@@ -0,0 +1,52 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The Counter Dataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.data.python.ops import scan_ops
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+
+
+def Counter(start=0, step=1, dtype=dtypes.int64):
+  """Creates a `Dataset` of a `step`-separated count startin from `start`.
+
+  For example:
+
+  ```python
+  Dataset.count() == [0, 1, 2, ...)
+  Dataset.count(2) == [2, 3, ...)
+  Dataset.count(2, 5) == [2, 7, 12, ...)
+  Dataset.count(0, -1) == [0, -1, -2, ...)
+  Dataset.count(10, -1) == [10, 9, ...)
+  ```
+
+  Args:
+    start: starting value for count.
+    step: step size.
+    dtype: counter data type.
+
+  Returns:
+    A `Dataset` of scalar elements.
+  """
+  with ops.name_scope("counter"):
+    start = ops.convert_to_tensor(start, dtype=dtype, name="start")
+    step = ops.convert_to_tensor(step, dtype=dtype, name="step")
+    return dataset_ops.Dataset.from_tensors(0).repeat(None).apply(
+        scan_ops.scan(start, lambda state, _: (state + step, state)))
diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py
index 7c595b1814..2744786e9e 100644
--- a/tensorflow/contrib/data/python/ops/scan_ops.py
+++ b/tensorflow/contrib/data/python/ops/scan_ops.py
@@ -70,13 +70,13 @@ class _ScanDataset(dataset_ops.Dataset):
 
       @function.Defun(*(flat_state_types + nest.flatten(
           sparse.as_dense_types(input_dataset.output_types,
-                                input_dataset.output_classes))))  # pylint: disable=protected-access
+                                input_dataset.output_classes))))
       def tf_scan_func(*args):
         """A wrapper for Defun that facilitates shape inference."""
         # Pass in shape information from the state and input_dataset.
         # TODO(b/69424092): Check that neither inputs nor outputs are sparse.
         dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes,
-                                              input_dataset.output_classes)  # pylint: disable=protected-access
+                                              input_dataset.output_classes)
         for arg, shape in zip(args,
                               flat_state_shapes + nest.flatten(dense_shapes)):
           arg.set_shape(shape)
-- 
GitLab


From 7f88363810e77a39db919fb4000583ad0138e53c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 20:19:18 -0800
Subject: [PATCH 0201/1225] Fix an integer overflow problem in PropagateShapes.

PiperOrigin-RevId: 176601510
---
 .../core/grappler/costs/graph_properties.cc   | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index c28498ef6f..bf49d78a1a 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -707,17 +707,17 @@ Status GraphProperties::PropagateShapes(
   // incorrect shape functions. The algoritm should converge in at most
   // num_nested_loops^2 * max_rank. We approximate max_rank with the constant 4.
   // The same applies to resources.
-  const int num_loops = new_shapes->size();
-  const int max_loop_length = item_.graph.node_size();
-  const int max_rank = 4;
-  const int max_loop_iterations =
-      max_rank * max_loop_length * std::max(1, num_loops * num_loops);
-  const int num_queues = resources.size();
-  const int max_resource_iterations = num_queues * num_queues * max_rank;
-
-  int num_resource_iterations = 0;
+  const int64 num_loops = new_shapes->size();
+  const int64 max_loop_length = item_.graph.node_size();
+  const int64 max_rank = 4;
+  const int64 max_loop_iterations =
+      max_rank * max_loop_length * std::max<int64>(1, num_loops * num_loops);
+  const int64 num_queues = resources.size();
+  const int64 max_resource_iterations = num_queues * num_queues * max_rank;
+
+  int64 num_resource_iterations = 0;
   do {
-    int num_loop_iterations = 0;
+    int64 num_loop_iterations = 0;
     while (!new_shapes->empty() &&
            num_loop_iterations++ < max_loop_iterations) {
       const Node* n = new_shapes->pop();
-- 
GitLab


From b06b072e787740c15ef49b0f2285bb170abea020 Mon Sep 17 00:00:00 2001
From: Seungil You <31752931+si-you@users.noreply.github.com>
Date: Wed, 22 Nov 2017 13:52:11 +0900
Subject: [PATCH 0202/1225] Add str(Label(...)) to bazel macros (#14737)

* Add Label to bazel configurations to make bazel macros work in supermodule.

* Roll back unnecessary auto formatting
---
 .../platform/default/build_config_root.bzl    | 24 ++++++++++---------
 third_party/mkl/build_defs.bzl                |  2 +-
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index caeed0aa4a..6e98f12114 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -10,7 +10,9 @@ def tf_sycl_tests_tags():
 
 def tf_additional_plugin_deps():
   return select({
-      "//tensorflow:with_xla_support": ["//tensorflow/compiler/jit"],
+      str(Label("//tensorflow:with_xla_support")): [
+          str(Label("//tensorflow/compiler/jit"))
+      ],
       "//conditions:default": [],
   })
 
@@ -19,37 +21,37 @@ def tf_additional_xla_deps_py():
 
 def tf_additional_license_deps():
   return select({
-      "//tensorflow:with_xla_support": ["@llvm//:LICENSE.TXT"],
+      str(Label("//tensorflow:with_xla_support")): ["@llvm//:LICENSE.TXT"],
       "//conditions:default": [],
   })
 
 def tf_additional_verbs_deps():
   return select({
-      "//tensorflow:with_verbs_support": [
-          "//tensorflow/contrib/verbs:verbs_server_lib",
-          "//tensorflow/contrib/verbs:grpc_verbs_client",
-      ], 
+      str(Label("//tensorflow:with_verbs_support")): [
+          str(Label("//tensorflow/contrib/verbs:verbs_server_lib")),
+          str(Label("//tensorflow/contrib/verbs:grpc_verbs_client")),
+      ],
       "//conditions:default": [],
   })
 
 def tf_additional_mpi_deps():
   return select({
-      "//tensorflow:with_mpi_support": [
-          "//tensorflow/contrib/mpi:mpi_server_lib",
+      str(Label("//tensorflow:with_mpi_support")): [
+          str(Label("//tensorflow/contrib/mpi:mpi_server_lib")),
       ],
       "//conditions:default": [],
   })
 
 def tf_additional_gdr_deps():
   return select({
-      "//tensorflow:with_gdr_support": [
-          "//tensorflow/contrib/gdr:gdr_server_lib",
+      str(Label("//tensorflow:with_gdr_support")): [
+          str(Label("//tensorflow/contrib/gdr:gdr_server_lib")),
       ],
       "//conditions:default": [],
   })
 
 def if_static(extra_deps, otherwise=[]):
   return select({
-      "//tensorflow:framework_shared_object": otherwise,
+      str(Label("//tensorflow:framework_shared_object")): otherwise,
       "//conditions:default": extra_deps,
   })
diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl
index 533c0766c7..6574f25092 100644
--- a/third_party/mkl/build_defs.bzl
+++ b/third_party/mkl/build_defs.bzl
@@ -20,7 +20,7 @@ def if_mkl(if_true, if_false = []):
 
     """
     return select({
-        "//third_party/mkl:using_mkl": if_true,
+        str(Label("//third_party/mkl:using_mkl")): if_true,
         "//conditions:default": if_false
     })
 
-- 
GitLab


From 4816a8c641e55253ec95767e08440dffe0c65bb9 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Tue, 21 Nov 2017 20:53:59 -0800
Subject: [PATCH 0203/1225] Fixing download_dependencies.sh bugs for generating
 TFLite iOS exmaples (#14734)

* Fix: Can't build TFLite after running download_dependencies.sh.

Root cause: The script downloads files for building TFLite for iOS
example. It writes to `downloads/` directory and conflicts with the
visibility rule "**/*" in BUILD

* Retain lite/examples/ios/camera/data directory in git.

* Fix some bugs in download_dependencies.sh

* Handle both the cases that the zip file has nested directories
  or not.
* Always use `curl` since `wget` sometimes has certificate problem
  in some Mac machines.
---
 tensorflow/contrib/lite/BUILD                  |  3 +++
 .../contrib/lite/download_dependencies.sh      | 18 +++++++++++++-----
 .../lite/examples/ios/camera/data/.gitignore   |  0
 3 files changed, 16 insertions(+), 5 deletions(-)
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/data/.gitignore

diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index 96a9e281ad..52460123cc 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -191,6 +191,9 @@ filegroup(
         exclude = [
             "**/METADATA",
             "**/OWNERS",
+            "downloads",
+            "examples",
+            "gen",
         ],
     ),
     visibility = ["//tensorflow:__subpackages__"],
diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh
index 41480c2007..e13df2fa1c 100755
--- a/tensorflow/contrib/lite/download_dependencies.sh
+++ b/tensorflow/contrib/lite/download_dependencies.sh
@@ -56,11 +56,19 @@ download_and_extract() {
   elif [[ "${url}" == *zip ]]; then
     tempdir=$(mktemp -d)
     tempdir2=$(mktemp -d)
-    wget -P ${tempdir} ${url}
-    unzip ${tempdir}/* -d ${tempdir2}
-    # unzip has no strip components, so unzip to a temp dir, and move the files
-    # we want from the tempdir to destination.
-    echo cp `find ${tempdir2} -type f` ${dir}/
+
+    curl -L ${url} > ${tempdir}/zipped.zip
+    unzip ${tempdir}/zipped.zip -d ${tempdir2}
+
+    # If the zip file contains nested directories, extract the files from the
+    # inner directory.
+    if ls ${tempdir2}/*/* 1> /dev/null 2>&1; then
+      # unzip has no strip components, so unzip to a temp dir, and move the
+      # files we want from the tempdir to destination.
+      cp -R ${tempdir2}/*/* ${dir}/
+    else
+      cp -R ${tempdir2}/* ${dir}/
+    fi
     rm -rf ${tempdir2} ${tempdir}
   fi
 
diff --git a/tensorflow/contrib/lite/examples/ios/camera/data/.gitignore b/tensorflow/contrib/lite/examples/ios/camera/data/.gitignore
new file mode 100644
index 0000000000..e69de29bb2
-- 
GitLab


From 6a35171131331a31d70e67e6d244422f3d15aafb Mon Sep 17 00:00:00 2001
From: Derek Murray <derek.murray@gmail.com>
Date: Tue, 21 Nov 2017 20:55:22 -0800
Subject: [PATCH 0204/1225] [CMake] Don't build tests for RE2 (#14696)

* [CMake] Don't build tests for RE2

Issue #14691 shows a build error on Windows in the RE2 tests. Since we do not run these tests, and they seem to be causing problems on some platforms, do not build them as part of the TensorFlow build.

* replace tab with spaces
---
 tensorflow/contrib/cmake/external/re2.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake
index b56f4b0898..d10f5959f7 100644
--- a/tensorflow/contrib/cmake/external/re2.cmake
+++ b/tensorflow/contrib/cmake/external/re2.cmake
@@ -45,4 +45,5 @@ ExternalProject_Add(re2
 		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL}
+        -DRE2_BUILD_TESTING:BOOL=OFF
 )
-- 
GitLab


From 6e3e1d317a5d157da7773d35cb18c23892d931e4 Mon Sep 17 00:00:00 2001
From: Chris Hoyean Song <sjhshy@gmail.com>
Date: Wed, 22 Nov 2017 13:56:37 +0900
Subject: [PATCH 0205/1225] fix misspellings (#14702)

---
 .../contrib/distributions/python/ops/poisson_lognormal.py   | 2 +-
 tensorflow/contrib/eager/python/metrics_impl.py             | 2 +-
 tensorflow/contrib/factorization/python/ops/wals.py         | 2 +-
 tensorflow/contrib/gan/python/train.py                      | 2 +-
 tensorflow/contrib/lite/schema/upgrade_schema_test.py       | 2 +-
 tensorflow/contrib/nn/python/ops/sampling_ops.py            | 2 +-
 tensorflow/contrib/slim/python/slim/evaluation.py           | 4 ++--
 .../hybrid/python/models/decisions_to_data_then_nn_test.py  | 6 +++---
 tensorflow/python/estimator/export/export.py                | 2 +-
 tensorflow/python/estimator/training_test.py                | 4 ++--
 tensorflow/python/keras/_impl/keras/backend.py              | 2 +-
 tensorflow/python/ops/math_ops_test.py                      | 2 +-
 tensorflow/python/ops/variables.py                          | 4 ++--
 tensorflow/python/profiler/model_analyzer_test.py           | 2 +-
 tensorflow/python/util/nest.py                              | 4 ++--
 15 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
index 8a95038a3c..e1118ed431 100644
--- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
+++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
@@ -292,7 +292,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
     # where,
     #
     # Z|v ~ interpolate_affine[v](distribution)
-    # V ~ mixture_distrubution
+    # V ~ mixture_distribution
     #
     # thus,
     #
diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py
index aa359b7a0d..2f8016ede3 100644
--- a/tensorflow/contrib/eager/python/metrics_impl.py
+++ b/tensorflow/contrib/eager/python/metrics_impl.py
@@ -73,7 +73,7 @@ class Metric(object):
   * `result()`: Computes and returns a final value for the metric
     from the variables in `self`.
 
-  Decendants may override `aggregate()`, but usually won't need to.  It
+  Descendants may override `aggregate()`, but usually won't need to.  It
   adds in the state from a list of metrics of the same type as `self`.
   (Default is to sum all the variables.) Note that users should not call
   `aggregate()`, it is for use by TensorFlow infrastructure.
diff --git a/tensorflow/contrib/factorization/python/ops/wals.py b/tensorflow/contrib/factorization/python/ops/wals.py
index b2f22eb2fc..0c5661e4a1 100644
--- a/tensorflow/contrib/factorization/python/ops/wals.py
+++ b/tensorflow/contrib/factorization/python/ops/wals.py
@@ -166,7 +166,7 @@ def _wals_factorization_model_function(features, labels, mode, params):
 
   # TRAIN mode:
   if mode == model_fn.ModeKeys.TRAIN:
-    # Training consists of the folowing ops (controlled using a SweepHook).
+    # Training consists of the following ops (controlled using a SweepHook).
     # Before a row sweep:
     #   row_update_prep_gramian_op
     #   initialize_row_update_op
diff --git a/tensorflow/contrib/gan/python/train.py b/tensorflow/contrib/gan/python/train.py
index ad2d5eb86c..e9443f766b 100644
--- a/tensorflow/contrib/gan/python/train.py
+++ b/tensorflow/contrib/gan/python/train.py
@@ -422,7 +422,7 @@ def gan_loss(
     ac_disc_loss = tfgan_losses.acgan_discriminator_loss(
         model, add_summaries=add_summaries)
     dis_loss += aux_cond_discriminator_weight * ac_disc_loss
-  # Gathers auxilliary losses.
+  # Gathers auxiliary losses.
   if model.generator_scope:
     gen_reg_loss = losses.get_regularization_loss(model.generator_scope.name)
   else:
diff --git a/tensorflow/contrib/lite/schema/upgrade_schema_test.py b/tensorflow/contrib/lite/schema/upgrade_schema_test.py
index 754400e888..b5002e6f75 100644
--- a/tensorflow/contrib/lite/schema/upgrade_schema_test.py
+++ b/tensorflow/contrib/lite/schema/upgrade_schema_test.py
@@ -252,7 +252,7 @@ def JsonDumpAndFlush(data, fp):
 
 class TestSchemaUpgrade(test_util.TensorFlowTestCase):
 
-  def testNonExistantFile(self):
+  def testNonExistentFile(self):
     converter = upgrade_schema_lib.Converter()
     non_existent = tempfile.mktemp(suffix=".json")
     with self.assertRaisesRegexp(IOError, "No such file or directory"):
diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py
index 2ae529e015..98749cff7e 100644
--- a/tensorflow/contrib/nn/python/ops/sampling_ops.py
+++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py
@@ -34,7 +34,7 @@ def _rank_resample(weights, biases, inputs, sampled_values, num_resampled,
 
       log(sum_j exp((w_i * x_j + b_i) / resampling_temperature))
 
-  where w_i, b_i are the weight and bias of the i-th class, repsectively,
+  where w_i, b_i are the weight and bias of the i-th class, respectively,
   and j ranges over the rows of `inputs`. For efficiency, we rearrange the
   computation to
 
diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py
index cdb720b36b..3caf4e02da 100644
--- a/tensorflow/contrib/slim/python/slim/evaluation.py
+++ b/tensorflow/contrib/slim/python/slim/evaluation.py
@@ -34,7 +34,7 @@ the metrics and finally call the `evaluation` method:
       "mse": slim.metrics.mean_squared_error(predictions, labels),
   })
 
-  inital_op = tf.group(
+  initial_op = tf.group(
       tf.global_variables_initializer(),
       tf.local_variables_initializer())
 
@@ -42,7 +42,7 @@ the metrics and finally call the `evaluation` method:
     metric_values = slim.evaluation(
         sess,
         num_evals=1,
-        inital_op=initial_op,
+        initial_op=initial_op,
         eval_op=names_to_updates.values(),
         final_op=name_to_values.values())
 
diff --git a/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py b/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py
index cccf444db8..a56beeeb2c 100644
--- a/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py
+++ b/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py
@@ -80,7 +80,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase):
         isinstance(self.params.num_trees, tensor_forest.ForestHParams))
 
     with variable_scope.variable_scope(
-        "DecisionsToDataThenNNTest_testContructionPollution"):
+        "DecisionsToDataThenNNTest_testConstructionPollution"):
       graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN(
           self.params)
 
@@ -95,7 +95,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase):
          for _ in range(100)])
 
     with variable_scope.variable_scope(
-        "DecisionsToDataThenNNTest_testInferenceContruction"):
+        "DecisionsToDataThenNNTest_testInferenceConstruction"):
       graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN(
           self.params)
       graph = graph_builder.inference_graph(data, None)
@@ -111,7 +111,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase):
     labels = [1 for _ in range(100)]
 
     with variable_scope.variable_scope(
-        "DecisionsToDataThenNNTest_testTrainingContruction"):
+        "DecisionsToDataThenNNTest_testTrainingConstruction"):
       graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN(
           self.params)
       graph = graph_builder.training_graph(data, labels, None)
diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py
index 31e9933c6f..3b295a7e35 100644
--- a/tensorflow/python/estimator/export/export.py
+++ b/tensorflow/python/estimator/export/export.py
@@ -57,7 +57,7 @@ class ServingInputReceiver(collections.namedtuple(
       groups of receiver tensors, each of which may be a `Tensor` or a dict of
       string to `Tensor`.  These named receiver tensor alternatives generate
       additional serving signatures, which may be used to feed inputs at
-      different points within the input reciever subgraph.  A typical usage is
+      different points within the input receiver subgraph.  A typical usage is
       to allow feeding raw feature `Tensor`s *downstream* of the
       tf.parse_example() op.  Defaults to None.
   """
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index 1862e325e2..17d018aa88 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -1016,7 +1016,7 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
                is_the_final_export):
       del export_path, checkpoint_path, eval_result
       estimator.times_export_was_called += 1
-      # final_export is happend at the end.
+      # final_export is happened at the end.
       self.assertEqual(0, estimator.times_final_export_was_called)
       if is_the_final_export:
         estimator.times_final_export_was_called += 1
@@ -1361,7 +1361,7 @@ class TrainingExecutorRunLocalTest(test.TestCase):
                is_the_final_export):
       del export_path, checkpoint_path, eval_result
       estimator.times_export_was_called += 1
-      # final_export is happend at the end.
+      # final_export is happened at the end.
       self.assertEqual(0, estimator.times_final_export_was_called)
       if is_the_final_export:
         estimator.times_final_export_was_called += 1
diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py
index b029e5161f..ec7a5dcffd 100644
--- a/tensorflow/python/keras/_impl/keras/backend.py
+++ b/tensorflow/python/keras/_impl/keras/backend.py
@@ -2487,7 +2487,7 @@ class Function(object):
   """Runs a computation graph.
 
   It's possible to pass arguments to `tf.Session.run()` via `session_kwargs`.
-  In particular additonal operations via `fetches` argument and additional
+  In particular additional operations via `fetches` argument and additional
   tensor substitutions via `feed_dict` arguments. Note that given
   substitutions are merged with substitutions from `inputs`. Even though
   `feed_dict` is passed once in the constructor (called in `model.compile()`)
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index 4642f4c580..39be804eee 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -62,7 +62,7 @@ class ReduceTest(test_util.TensorFlowTestCase):
   @test_util.run_in_graph_and_eager_modes()
   def testReduceInvalidAxis(self):
     if context.in_eager_mode():
-      # The shape check is in run a graph contruction time. In eager mode,
+      # The shape check is in run a graph construction time. In eager mode,
       # it misses the check, magically return result given wrong shape.
       return
     x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index eab7c3828f..36c03ec2a1 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -200,7 +200,7 @@ class Variable(object):
 
     @compatibility(eager)
     `tf.Variable` is not compatible with eager execution.  Use
-    `tfe.Variable` instead which is compatable with both eager execution
+    `tfe.Variable` instead which is compatible with both eager execution
     and graph construction.  See [the TensorFlow Eager Execution
     guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers)
     for details on how variables work in eager execution.
@@ -1064,7 +1064,7 @@ class PartitionedVariable(object):
   """A container for partitioned `Variable` objects.
 
   @compatibility(eager) `tf.PartitionedVariable` is not compatible with
-  eager execution.  Use `tfe.Variable` instead which is compatable
+  eager execution.  Use `tfe.Variable` instead which is compatible
   with both eager execution and graph construction.  See [the
   TensorFlow Eager Execution
   guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers)
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 698f8906d4..26fb99efe6 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -62,7 +62,7 @@ class PrintModelAnalysisTest(test.TestCase):
                          '  ScalarW (1, 1/1 params)\n',
                          f.read())
 
-  def testSelectEverthingDetail(self):
+  def testSelectEverythingDetail(self):
     ops.reset_default_graph()
     dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0'
     outfile = os.path.join(test.get_temp_dir(), 'dump')
diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index 25dbc78d7a..cdd53fb995 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -116,7 +116,7 @@ def flatten(nest):
   used instead. The same convention is followed in `pack_sequence_as`. This
   correctly repacks dicts and `OrderedDict`s after they have been flattened,
   and also allows flattening an `OrderedDict` and then repacking it back using
-  a correponding plain dict, or vice-versa.
+  a corresponding plain dict, or vice-versa.
   Dictionaries with non-sortable keys cannot be flattened.
 
   Users must not modify any collections used in `nest` while this function is
@@ -296,7 +296,7 @@ def pack_sequence_as(structure, flat_sequence):
   keys is used instead. The same convention is followed in `pack_sequence_as`.
   This correctly repacks dicts and `OrderedDict`s after they have been
   flattened, and also allows flattening an `OrderedDict` and then repacking it
-  back using a correponding plain dict, or vice-versa.
+  back using a corresponding plain dict, or vice-versa.
   Dictionaries with non-sortable keys cannot be flattened.
 
   Args:
-- 
GitLab


From fc34cf3a4a7fb946ea0e15d33dfe05f042db7f9f Mon Sep 17 00:00:00 2001
From: CSJY <qmick@live.cn>
Date: Wed, 22 Nov 2017 12:57:23 +0800
Subject: [PATCH 0206/1225] Fix docstring of variable_scope() (#14707)

---
 tensorflow/python/ops/variable_scope.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 91dea12da2..0ebebc7131 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -1691,7 +1691,7 @@ class variable_scope(object):  # pylint: disable=invalid-name
   v1 = foo()  # Creates v.
   v2 = foo()  # Gets the same, existing v.
   assert v1 == v2
-
+  ```
 
   Basic example of sharing a variable with reuse=True:
 
-- 
GitLab


From 4ddc2866e2ae1aa4ac4b345fccd97990b6ccca01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carl=20Thom=C3=A9?= <carlthome@gmail.com>
Date: Wed, 22 Nov 2017 06:44:27 +0100
Subject: [PATCH 0207/1225] Add back whitespace (#14721)

---
 tensorflow/compiler/aot/tfcompile.bzl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index 1e22b760b8..e6ca7a2750 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -152,7 +152,7 @@ def tf_library(name, graph, config,
            " --target_triple=" + target_llvm_triple() +
            " --out_header=$(@D)/" + header_file +
            " --out_object=$(@D)/" + object_file +
-           flags),
+           " " + flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
-- 
GitLab


From ef3ee202659a2a49afcd9898451bf9b1256a2757 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 22:27:21 -0800
Subject: [PATCH 0208/1225] [XLA] Add BitcastConvert HLO op to enable bitwise
 operations on floating point types.

PiperOrigin-RevId: 176610007
---
 .../xla/client/computation_builder.cc         |  28 ++++
 .../compiler/xla/client/computation_builder.h |  15 +-
 .../compiler/xla/service/dfs_hlo_visitor.h    |   3 +
 .../xla/service/elemental_ir_emitter.cc       |  49 +++++-
 .../compiler/xla/service/hlo_graph_dumper.cc  |  35 ++---
 .../compiler/xla/service/hlo_instruction.cc   |  17 +++
 .../compiler/xla/service/hlo_instruction.h    |   5 +
 tensorflow/compiler/xla/service/hlo_opcode.h  |   1 +
 .../compiler/xla/service/hlo_verifier.cc      |  12 +-
 .../xla/service/instruction_fusion.cc         |  19 +--
 tensorflow/compiler/xla/service/service.cc    |   4 +
 .../compiler/xla/service/shape_inference.cc   |  38 +++++
 .../compiler/xla/service/shape_inference.h    |   7 +
 .../compiler/xla/service/user_computation.cc  |  44 +++++-
 .../compiler/xla/service/user_computation.h   |   4 +
 tensorflow/compiler/xla/tests/BUILD           |  17 +++
 .../xla/tests/bitcast_convert_test.cc         | 141 ++++++++++++++++++
 .../compiler/xla/tools/parser/hlo_parser.cc   |   9 ++
 tensorflow/compiler/xla/xla_data.proto        |   3 +-
 .../performance/xla/operation_semantics.md    |  42 ++++--
 20 files changed, 439 insertions(+), 54 deletions(-)
 create mode 100644 tensorflow/compiler/xla/tests/bitcast_convert_test.cc

diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc
index b1f4ea8ab6..b17d221ef5 100644
--- a/tensorflow/compiler/xla/client/computation_builder.cc
+++ b/tensorflow/compiler/xla/client/computation_builder.cc
@@ -1164,6 +1164,34 @@ ComputationDataHandle ComputationBuilder::ConvertElementType(
   return ParseOpResponse(s, &response);
 }
 
+ComputationDataHandle ComputationBuilder::BitcastConvertType(
+    const ComputationDataHandle& operand, PrimitiveType new_element_type) {
+  if (!first_error_.ok() || !PrepareComputation().ok()) {
+    return ComputationDataHandle();
+  }
+
+  StatusOr<std::unique_ptr<Shape>> shape_status = GetShape(operand);
+  if (!shape_status.ok()) {
+    first_error_ = shape_status.status();
+    return ComputationDataHandle();
+  }
+  std::unique_ptr<Shape> original = shape_status.ConsumeValueOrDie();
+
+  ConvertRequest request;
+  *request.mutable_operand() = operand;
+  request.set_new_element_type(new_element_type);
+  OpRequest op_request;
+  *op_request.mutable_computation() = computation_.handle();
+  *op_request.mutable_bitcast_convert_request() = request;
+  AddCommonFieldsToOpRequest(&op_request);
+  OpResponse response;
+
+  VLOG(2) << "making bitcast convert request";
+  Status s = client_->stub()->Op(&op_request, &response);
+
+  return ParseOpResponse(s, &response);
+}
+
 ComputationDataHandle ComputationBuilder::SquareF32(
     const ComputationDataHandle& operand) {
   return BinaryOp(BINOP_POW, operand, ConstantR0<float>(2.0),
diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h
index 4c6e320557..3a34010e6a 100644
--- a/tensorflow/compiler/xla/client/computation_builder.h
+++ b/tensorflow/compiler/xla/client/computation_builder.h
@@ -121,14 +121,10 @@ class ComputationBuilder {
   // result, OpMetadata is set on the Computation Builder. All subsequent
   // instructions generated via this Computation Builder will have the same
   // OpMetadata attached until a call to ClearOpMetdata.
-  void SetOpMetadata(const OpMetadata& metadata) {
-    metadata_ = metadata;
-  }
+  void SetOpMetadata(const OpMetadata& metadata) { metadata_ = metadata; }
 
   // Clears the HloMetadata state.
-  void ClearOpMetadata() {
-    metadata_.Clear();
-  }
+  void ClearOpMetadata() { metadata_.Clear(); }
 
   // Sets an OpSharding that will be attached to all instructions until cleared.
   void SetSharding(const OpSharding& sharding) { sharding_ = sharding; }
@@ -673,6 +669,13 @@ class ComputationBuilder {
   ComputationDataHandle ConvertElementType(const ComputationDataHandle& operand,
                                            PrimitiveType new_element_type);
 
+  // Enqueues a no-op instruction onto the computation that changes
+  // the element type of the operand array to primitive_type. The
+  // bit-widths of the source and destination element types must be
+  // identical.
+  ComputationDataHandle BitcastConvertType(const ComputationDataHandle& operand,
+                                           PrimitiveType new_element_type);
+
   // Enqueues a float32 reciprocal instruction onto the computation.
   // (float32 is specified as there is an implicit float32 -1.0f constant
   // exponent).
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index 7b95325601..91086fd4a5 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
@@ -86,6 +86,9 @@ class DfsHloVisitorBase {
   virtual Status HandleConvert(HloInstructionPtr hlo) {
     return HandleElementwiseUnary(hlo);
   }
+  virtual Status HandleBitcastConvert(HloInstructionPtr hlo) {
+    return HandleElementwiseUnary(hlo);
+  }
   virtual Status HandleCopy(HloInstructionPtr hlo) {
     return HandleElementwiseUnary(hlo);
   }
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index 606868034a..97ced5dfdc 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -110,6 +110,26 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitIntegerUnaryOp(
                            PrimitiveType_Name(from_type).c_str(),
                            PrimitiveType_Name(to_type).c_str());
     }
+    case HloOpcode::kBitcastConvert: {
+      PrimitiveType from_type = op->operand(0)->shape().element_type();
+      PrimitiveType to_type = op->shape().element_type();
+      CHECK(primitive_util::IsIntegralType(from_type));
+      if (from_type == to_type) {
+        return operand_value;
+      }
+      if (primitive_util::BitWidth(from_type) ==
+          primitive_util::BitWidth(to_type)) {
+        return ir_builder_->CreateBitCast(
+            operand_value, llvm_ir::PrimitiveTypeToIrType(to_type, module_));
+      }
+      return InvalidArgument(
+          "bitcast conversion from primitive type %s to %s with unequal "
+          "bit-widths (%u versus %u) ",
+          PrimitiveType_Name(from_type).c_str(),
+          PrimitiveType_Name(to_type).c_str(),
+          primitive_util::BitWidth(from_type),
+          primitive_util::BitWidth(to_type));
+    }
     case HloOpcode::kAbs: {
       bool is_signed =
           primitive_util::IsSignedIntegralType(op->shape().element_type());
@@ -203,6 +223,26 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatUnaryOp(
                            PrimitiveType_Name(from_type).c_str(),
                            PrimitiveType_Name(to_type).c_str());
     }
+    case HloOpcode::kBitcastConvert: {
+      PrimitiveType from_type = op->operand(0)->shape().element_type();
+      PrimitiveType to_type = op->shape().element_type();
+      CHECK(primitive_util::IsFloatingPointType(from_type));
+      if (from_type == to_type) {
+        return operand_value;
+      }
+      if (primitive_util::BitWidth(from_type) ==
+          primitive_util::BitWidth(to_type)) {
+        return ir_builder_->CreateBitCast(
+            operand_value, llvm_ir::PrimitiveTypeToIrType(to_type, module_));
+      }
+      return InvalidArgument(
+          "bitcast conversion from primitive type %s to %s with unequal "
+          "bit-widths (%u versus %u) ",
+          PrimitiveType_Name(from_type).c_str(),
+          PrimitiveType_Name(to_type).c_str(),
+          primitive_util::BitWidth(from_type),
+          primitive_util::BitWidth(to_type));
+    }
     case HloOpcode::kExp:
       return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {operand_value},
                                           {operand_value->getType()},
@@ -1073,6 +1113,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator(
     case HloOpcode::kRoundNearestAfz:
     case HloOpcode::kCeil:
     case HloOpcode::kConvert:
+    case HloOpcode::kBitcastConvert:
     case HloOpcode::kCopy:
     case HloOpcode::kCos:
     case HloOpcode::kExp:
@@ -1081,11 +1122,11 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator(
     case HloOpcode::kIsFinite:
     case HloOpcode::kLog:
     case HloOpcode::kNegate:
+    case HloOpcode::kNot:
     case HloOpcode::kReal:
     case HloOpcode::kSign:
     case HloOpcode::kSin:
     case HloOpcode::kTanh:
-    case HloOpcode::kNot:
       return [this, hlo, &operand_to_generator](
                  const IrArray::Index& index) -> StatusOr<llvm::Value*> {
         TF_ASSIGN_OR_RETURN(llvm::Value * operand_value,
@@ -1094,6 +1135,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator(
         return EmitUnaryOp(hlo, operand_value);
       };
     case HloOpcode::kAdd:
+    case HloOpcode::kAnd:
     case HloOpcode::kAtan2:
     case HloOpcode::kComplex:
     case HloOpcode::kDivide:
@@ -1106,14 +1148,13 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator(
     case HloOpcode::kMinimum:
     case HloOpcode::kMultiply:
     case HloOpcode::kNe:
+    case HloOpcode::kOr:
     case HloOpcode::kPower:
     case HloOpcode::kRemainder:
-    case HloOpcode::kSubtract:
-    case HloOpcode::kAnd:
-    case HloOpcode::kOr:
     case HloOpcode::kShiftLeft:
     case HloOpcode::kShiftRightArithmetic:
     case HloOpcode::kShiftRightLogical:
+    case HloOpcode::kSubtract:
       return [this, hlo, &operand_to_generator](
                  const IrArray::Index& index) -> StatusOr<llvm::Value*> {
         const HloInstruction* lhs = hlo->operand(0);
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index d71a4b42c7..84187d5783 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -864,9 +864,10 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) {
   // (eg, parameter).
   switch (instr->opcode()) {
     case HloOpcode::kAbs:
-    case HloOpcode::kRoundNearestAfz:
     case HloOpcode::kAdd:
+    case HloOpcode::kAnd:
     case HloOpcode::kAtan2:
+    case HloOpcode::kBitcastConvert:
     case HloOpcode::kCeil:
     case HloOpcode::kClamp:
     case HloOpcode::kComplex:
@@ -882,18 +883,19 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) {
     case HloOpcode::kIsFinite:
     case HloOpcode::kLe:
     case HloOpcode::kLog:
-    case HloOpcode::kAnd:
-    case HloOpcode::kNot:
-    case HloOpcode::kOr:
     case HloOpcode::kLt:
     case HloOpcode::kMaximum:
     case HloOpcode::kMinimum:
     case HloOpcode::kMultiply:
     case HloOpcode::kNe:
     case HloOpcode::kNegate:
+    case HloOpcode::kNot:
+    case HloOpcode::kOr:
     case HloOpcode::kPower:
     case HloOpcode::kReal:
     case HloOpcode::kRemainder:
+    case HloOpcode::kRng:
+    case HloOpcode::kRoundNearestAfz:
     case HloOpcode::kShiftLeft:
     case HloOpcode::kShiftRightArithmetic:
     case HloOpcode::kShiftRightLogical:
@@ -903,7 +905,6 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) {
     case HloOpcode::kSort:
     case HloOpcode::kSubtract:
     case HloOpcode::kTanh:
-    case HloOpcode::kRng:
       // De-emphasize scalar-shaped elementwise ops -- they're generally
       // uninteresting.
       if (ShapeUtil::IsEffectiveScalar(instr->shape())) {
@@ -911,9 +912,9 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) {
       }
       return kYellow;
     case HloOpcode::kBitcast:
-    case HloOpcode::kTuple:
-    case HloOpcode::kTrace:
     case HloOpcode::kGetTupleElement:
+    case HloOpcode::kTrace:
+    case HloOpcode::kTuple:
       return kWhite;
     case HloOpcode::kBroadcast:
       // De-emphasize nodes which broadcast a scalar within a fusion node --
@@ -952,28 +953,28 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) {
       return kRed;
     case HloOpcode::kParameter:
       return kParameterColor;
-    case HloOpcode::kBatchNormTraining:
-    case HloOpcode::kBatchNormInference:
     case HloOpcode::kBatchNormGrad:
+    case HloOpcode::kBatchNormInference:
+    case HloOpcode::kBatchNormTraining:
     case HloOpcode::kReduce:
-    case HloOpcode::kSelectAndScatter:
     case HloOpcode::kReduceWindow:
+    case HloOpcode::kSelectAndScatter:
       return kPurple;
-    case HloOpcode::kMap:
     case HloOpcode::kFusion:
+    case HloOpcode::kMap:
       return kGray;
-    case HloOpcode::kSend:
-    case HloOpcode::kSendDone:
-    case HloOpcode::kRecv:
-    case HloOpcode::kRecvDone:
+    case HloOpcode::kCrossReplicaSum:
     case HloOpcode::kInfeed:
     case HloOpcode::kOutfeed:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kRecv:
+    case HloOpcode::kRecvDone:
+    case HloOpcode::kSend:
+    case HloOpcode::kSendDone:
       return kBrown;
+    case HloOpcode::kCall:
     case HloOpcode::kConditional:
     case HloOpcode::kCustomCall:
     case HloOpcode::kWhile:
-    case HloOpcode::kCall:
       return kDarkGreen;
     case HloOpcode::kConstant:
       LOG(FATAL) << "Constants don't get their own nodes in the graph.";
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 464af7c554..854185af56 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -518,6 +518,15 @@ HloInstruction::CreateDynamicUpdateSlice(const Shape& shape,
   return instruction;
 }
 
+/* static */ std::unique_ptr<HloInstruction>
+HloInstruction::CreateBitcastConvert(const Shape& shape,
+                                     HloInstruction* operand) {
+  auto instruction =
+      WrapUnique(new HloInstruction(HloOpcode::kBitcastConvert, shape));
+  instruction->AppendOperand(operand);
+  return instruction;
+}
+
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateReduce(
     const Shape& shape, HloInstruction* arg, HloInstruction* init_value,
     tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
@@ -1115,6 +1124,10 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
       CHECK_EQ(new_operands.size(), 1);
       clone = CreateConvert(shape, new_operands[0]);
       break;
+    case HloOpcode::kBitcastConvert:
+      CHECK_EQ(new_operands.size(), 1);
+      clone = CreateBitcastConvert(shape, new_operands[0]);
+      break;
     case HloOpcode::kReducePrecision:
       CHECK_EQ(new_operands.size(), 1);
       clone = CreateReducePrecision(shape, new_operands[0], exponent_bits_,
@@ -1555,6 +1568,7 @@ bool HloInstruction::IdenticalSlowPath(
     // A convert result is determined by the primitive type that the operand is
     // converted into.
     case HloOpcode::kConvert:
+    case HloOpcode::kBitcastConvert:
       return shape().element_type() == other.shape().element_type();
 
     // A reduce-precision operation is determined by the bit sizes.
@@ -2295,6 +2309,8 @@ Status HloInstruction::Visit(DfsHloVisitorBase<HloInstructionPtr>* visitor) {
       return visitor->HandleConcatenate(this);
     case HloOpcode::kConvert:
       return visitor->HandleConvert(this);
+    case HloOpcode::kBitcastConvert:
+      return visitor->HandleBitcastConvert(this);
     case HloOpcode::kCopy:
       return visitor->HandleCopy(this);
     case HloOpcode::kMultiply:
@@ -2667,6 +2683,7 @@ bool HloInstruction::IsElementwise() const {
     case HloOpcode::kRoundNearestAfz:
     case HloOpcode::kCeil:
     case HloOpcode::kConvert:
+    case HloOpcode::kBitcastConvert:
     case HloOpcode::kCopy:
     case HloOpcode::kCos:
     case HloOpcode::kExp:
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index f3dbe9e33f..1bd0cca945 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -176,6 +176,11 @@ class HloInstruction {
   static std::unique_ptr<HloInstruction> CreateConvert(const Shape& shape,
                                                        HloInstruction* operand);
 
+  // Creates a bitcast conversion instruction, where operand is the data to
+  // convert and shape is the target shape for the conversion.
+  static std::unique_ptr<HloInstruction> CreateBitcastConvert(
+      const Shape& shape, HloInstruction* operand);
+
   // Creates an infeed instruction, which reads data of the given shape from the
   // Infeed interface of the device.
   static std::unique_ptr<HloInstruction> CreateInfeed(const Shape& shape,
diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h
index 7b07027441..f3f7935758 100644
--- a/tensorflow/compiler/xla/service/hlo_opcode.h
+++ b/tensorflow/compiler/xla/service/hlo_opcode.h
@@ -52,6 +52,7 @@ namespace xla {
   V(kBatchNormInference, "batch-norm-inference")             \
   V(kBatchNormTraining, "batch-norm-training")               \
   V(kBitcast, "bitcast")                                     \
+  V(kBitcastConvert, "bitcast-convert")                      \
   V(kBroadcast, "broadcast")                                 \
   V(kCall, "call", kHloOpcodeIsVariadic)                     \
   V(kCeil, "ceil")                                           \
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index e353a75cab..f2a739c1e2 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -59,15 +59,17 @@ class ShapeVerifier : public DfsHloVisitor {
   }
 
   Status HandleConvert(HloInstruction* convert) override {
-    if (ShapeUtil::ElementIsComplex(convert->operand(0)->shape())) {
-      TF_RET_CHECK(ShapeUtil::ElementIsComplex(convert->shape()))
-          << "Unsupported complex->real kConvert";
-    }
     return CheckShape(convert, ShapeInference::InferConvertShape(
                                    convert->operand(0)->shape(),
                                    convert->shape().element_type()));
   }
 
+  Status HandleBitcastConvert(HloInstruction* convert) override {
+    return CheckShape(convert, ShapeInference::InferBitcastConvertShape(
+                                   convert->operand(0)->shape(),
+                                   convert->shape().element_type()));
+  }
+
   Status HandleCopy(HloInstruction* copy) override {
     return CheckUnaryShape(copy);
   }
@@ -580,7 +582,7 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
         // or ComputationLowerer::Visit()
         TF_RET_CHECK(instruction->dimensions().size() ==
                      ShapeUtil::Rank(instruction->operand(0)->shape()))
-                << "Broadcast HLO has invalid number of dimensions.";
+            << "Broadcast HLO has invalid number of dimensions.";
       } else if (instruction->opcode() == HloOpcode::kWhile) {
         auto* while_cond = instruction->while_condition();
         auto* while_body = instruction->while_body();
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index de4804996f..ba901b99e4 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -33,7 +33,9 @@ namespace xla {
   switch (instruction.opcode()) {
     // Cheap instructions.
     case HloOpcode::kAdd:
+    case HloOpcode::kAnd:
     case HloOpcode::kBitcast:
+    case HloOpcode::kBitcastConvert:
     case HloOpcode::kBroadcast:
     case HloOpcode::kCeil:
     case HloOpcode::kClamp:
@@ -53,15 +55,14 @@ namespace xla {
     case HloOpcode::kInfeed:
     case HloOpcode::kIsFinite:
     case HloOpcode::kLe:
-    case HloOpcode::kAnd:
-    case HloOpcode::kNot:
-    case HloOpcode::kOr:
     case HloOpcode::kLt:
     case HloOpcode::kMaximum:
     case HloOpcode::kMinimum:
     case HloOpcode::kMultiply:
     case HloOpcode::kNe:
     case HloOpcode::kNegate:
+    case HloOpcode::kNot:
+    case HloOpcode::kOr:
     case HloOpcode::kOutfeed:
     case HloOpcode::kPad:
     case HloOpcode::kReal:
@@ -88,9 +89,9 @@ namespace xla {
 
     // Expensive instructions.
     case HloOpcode::kAtan2:
-    case HloOpcode::kBatchNormTraining:
-    case HloOpcode::kBatchNormInference:
     case HloOpcode::kBatchNormGrad:
+    case HloOpcode::kBatchNormInference:
+    case HloOpcode::kBatchNormTraining:
     case HloOpcode::kCall:
     case HloOpcode::kConditional:
     case HloOpcode::kConvolution:
@@ -104,19 +105,19 @@ namespace xla {
     case HloOpcode::kMap:
     case HloOpcode::kParameter:
     case HloOpcode::kPower:
+    case HloOpcode::kRecv:
+    case HloOpcode::kRecvDone:
     case HloOpcode::kReduce:
     case HloOpcode::kReduceWindow:
     case HloOpcode::kRemainder:
     case HloOpcode::kRng:
     case HloOpcode::kSelectAndScatter:
+    case HloOpcode::kSend:
+    case HloOpcode::kSendDone:
     case HloOpcode::kSort:
     case HloOpcode::kTanh:
     case HloOpcode::kTrace:
     case HloOpcode::kWhile:
-    case HloOpcode::kSend:
-    case HloOpcode::kSendDone:
-    case HloOpcode::kRecv:
-    case HloOpcode::kRecvDone:
       return true;
   }
 
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 0544a1697b..902a1afb45 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -1364,6 +1364,10 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) {
       handle_status =
           computation->AddConvertInstruction(arg->convert_request());
       break;
+    case OpRequest::kBitcastConvertRequest:
+      handle_status = computation->AddBitcastConvertInstruction(
+          arg->bitcast_convert_request());
+      break;
     case OpRequest::kConvolveRequest:
       handle_status =
           computation->AddConvolveInstruction(arg->convolve_request());
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index dcd726f22c..0a2bf939c1 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -441,6 +441,14 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
 
 /* static */ StatusOr<Shape> ShapeInference::InferConvertShape(
     const Shape& operand_shape, PrimitiveType new_element_type) {
+  auto old_element_type = operand_shape.element_type();
+  if (primitive_util::IsComplexType(old_element_type) &&
+      !primitive_util::IsComplexType(new_element_type)) {
+    return Unimplemented(
+        "Unsupported conversion from complex to real type: %s => %s",
+        ShapeUtil::HumanString(operand_shape).c_str(),
+        PrimitiveType_Name(new_element_type).c_str());
+  }
   if (ShapeUtil::IsTuple(operand_shape) || new_element_type == TUPLE) {
     // Note: we may want to support tuple conversions via this operation in the
     // future, by recursing into the tuple elements to check all sub-conversions
@@ -454,6 +462,36 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
   return ShapeUtil::ChangeElementType(operand_shape, new_element_type);
 }
 
+/* static */ StatusOr<Shape> ShapeInference::InferBitcastConvertShape(
+    const Shape& operand_shape, PrimitiveType new_element_type) {
+  auto old_element_type = operand_shape.element_type();
+  if (primitive_util::IsComplexType(old_element_type) !=
+      primitive_util::IsComplexType(new_element_type)) {
+    return Unimplemented(
+        "Unsupported conversion between real and complex types: %s => %s",
+        ShapeUtil::HumanString(operand_shape).c_str(),
+        PrimitiveType_Name(new_element_type).c_str());
+  }
+  if (ShapeUtil::IsTuple(operand_shape) || new_element_type == TUPLE) {
+    // Note: we may want to support tuple conversions via this operation in the
+    // future, by recursing into the tuple elements to check all sub-conversions
+    // are valid. For now we just reject them, though.
+    return InvalidArgument(
+        "cannot convert from or to tuple type; requested conversion: %s => %s",
+        ShapeUtil::HumanString(operand_shape).c_str(),
+        PrimitiveType_Name(new_element_type).c_str());
+  }
+  if (primitive_util::BitWidth(old_element_type) !=
+      primitive_util::BitWidth(new_element_type)) {
+    return InvalidArgument(
+        "cannot bitcast types with different bit-widths: %s => %s",
+        PrimitiveType_Name(old_element_type).c_str(),
+        PrimitiveType_Name(new_element_type).c_str());
+  }
+
+  return ShapeUtil::ChangeElementType(operand_shape, new_element_type);
+}
+
 /* static */ StatusOr<Shape> ShapeInference::InferReducePrecisionShape(
     const Shape& operand_shape, const int exponent_bits,
     const int mantissa_bits) {
diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h
index d5d497176d..0aadb98a40 100644
--- a/tensorflow/compiler/xla/service/shape_inference.h
+++ b/tensorflow/compiler/xla/service/shape_inference.h
@@ -204,6 +204,13 @@ class ShapeInference {
   static StatusOr<Shape> InferConvertShape(const Shape& operand_shape,
                                            PrimitiveType new_element_type);
 
+  // Helper that validates the given operand shape can be bitcast converted to
+  // the target output_shape via a bitcast convert instruction -- the
+  // requirement is that the shape is identical except for the element type and
+  // the element types have identical bit-widths.
+  static StatusOr<Shape> InferBitcastConvertShape(
+      const Shape& operand_shape, PrimitiveType new_element_type);
+
   // Helper that validates the input data type for a reduce-precision operation,
   // and returns the result shape.
   static StatusOr<Shape> InferReducePrecisionShape(const Shape& operand_shape,
diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index b449b4f288..b0b15bb571 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -994,6 +994,32 @@ StatusOr<ComputationDataHandle> UserComputation::AddConvertInstruction(
   return handle;
 }
 
+StatusOr<ComputationDataHandle> UserComputation::AddBitcastConvertInstruction(
+    const ConvertRequest& convert_request) {
+  tensorflow::mutex_lock lock(mutex_);
+
+  TF_ASSIGN_OR_RETURN(const OperationRequest* operand,
+                      LookUpRequest(convert_request.operand()));
+
+  TF_ASSIGN_OR_RETURN(Shape new_shape, ShapeInference::InferConvertShape(
+                                           operand->output_shape(),
+                                           convert_request.new_element_type()));
+
+  ComputationDataHandle handle = CreateComputationDataHandle();
+
+  OperationRequest& request =
+      (*session_computation_.mutable_requests())[handle.handle()];
+  *request.mutable_output_handle() = handle;
+  *request.mutable_output_shape() = new_shape;
+  *request.mutable_request()->mutable_bitcast_convert_request() =
+      convert_request;
+
+  VLOG(1) << "AddBitcastConvertInstruction (" << GetVersionedHandleInternal()
+          << "), data handle " << handle.handle() << ": "
+          << convert_request.ShortDebugString();
+  return handle;
+}
+
 StatusOr<ComputationDataHandle> UserComputation::AddReducePrecisionInstruction(
     const ReducePrecisionRequest& reduce_precision_request) {
   tensorflow::mutex_lock lock(mutex_);
@@ -2370,6 +2396,13 @@ static void ForEachOperand(
       break;
     }
 
+    case OpRequest::kBitcastConvertRequest: {
+      const ConvertRequest& convert_request =
+          request.request().bitcast_convert_request();
+      apply(convert_request.operand());
+      break;
+    }
+
     case OpRequest::kWhileRequest: {
       const WhileRequest& while_request = request.request().while_request();
       apply(while_request.init());
@@ -2954,6 +2987,15 @@ void ComputationLowerer::Visit(
       break;
     }
 
+    case OpRequest::kBitcastConvertRequest: {
+      const ConvertRequest& convert_request =
+          request.request().bitcast_convert_request();
+      HloInstruction* operand = lookup_instruction(convert_request.operand());
+      hlo_instruction = add_instruction(HloInstruction::CreateBitcastConvert(
+          request.output_shape(), operand));
+      break;
+    }
+
     case OpRequest::kWhileRequest: {
       const WhileRequest& while_request = request.request().while_request();
       CHECK_EQ(2, request.embedded_computation_versions_size());
@@ -3156,7 +3198,7 @@ void ComputationLowerer::Visit(
       LOG(FATAL) << "Unexpected request type: " << request.request().op_case();
   }
   (*instructions)[handle.handle()] = hlo_instruction;
-}
+}  // NOLINT(readability/fn_size)
 
 }  // namespace
 
diff --git a/tensorflow/compiler/xla/service/user_computation.h b/tensorflow/compiler/xla/service/user_computation.h
index 473a8b8f73..317c631dca 100644
--- a/tensorflow/compiler/xla/service/user_computation.h
+++ b/tensorflow/compiler/xla/service/user_computation.h
@@ -185,6 +185,10 @@ class UserComputation {
   StatusOr<ComputationDataHandle> AddConvertInstruction(
       const ConvertRequest& convert_request);
 
+  // Enqueues a bitcast element instruction onto this user computation.
+  StatusOr<ComputationDataHandle> AddBitcastConvertInstruction(
+      const ConvertRequest& convert_request);
+
   // Enqueues a reduce instruction onto this user computation.
   StatusOr<ComputationDataHandle> AddReduceInstruction(
       const ReduceRequest& reduce_request,
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index aa1804cc21..6811dbb39f 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -1262,6 +1262,23 @@ xla_test(
     ],
 )
 
+xla_test(
+    name = "bitcast_convert_test",
+    srcs = ["bitcast_convert_test.cc"],
+    deps = [
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:stream_executor_no_cuda",
+        "//tensorflow/core:test",
+    ],
+)
+
 xla_test(
     name = "compilation_cache_test",
     srcs = ["compilation_cache_test.cc"],
diff --git a/tensorflow/compiler/xla/tests/bitcast_convert_test.cc b/tensorflow/compiler/xla/tests/bitcast_convert_test.cc
new file mode 100644
index 0000000000..0d94d65c10
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/bitcast_convert_test.cc
@@ -0,0 +1,141 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/client/local_client.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/platform/stream_executor_no_cuda.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace {
+
+class BitcastConvertTest : public ClientLibraryTestBase {
+ public:
+  explicit BitcastConvertTest(perftools::gputools::Platform* platform = nullptr)
+      : ClientLibraryTestBase(platform) {
+    mutable_debug_options()->add_xla_disable_hlo_passes("algsimp");
+    mutable_debug_options()->add_xla_disable_hlo_passes("inline");
+  }
+};
+
+TEST_F(BitcastConvertTest, ConvertR1S32ToR1S32) {
+  ComputationBuilder builder(client_, TestName());
+  auto a = builder.ConstantR1<int32>({42, 64});
+  builder.BitcastConvertType(a, S32);
+
+  std::vector<int32> expected = {42, 64};
+  ComputeAndCompareR1<int32>(&builder, expected, {});
+}
+
+TEST_F(BitcastConvertTest, ConvertR1F32ToR1F32) {
+  ComputationBuilder builder(client_, TestName());
+  auto a = builder.ConstantR1<float>({42.0f, 64.0f});
+  builder.BitcastConvertType(a, F32);
+
+  std::vector<float> expected = {42.0f, 64.0f};
+  ComputeAndCompareR1<float>(&builder, expected, {});
+}
+
+TEST_F(BitcastConvertTest, BitcastR1S32ToR1F32) {
+  ComputationBuilder builder(client_, TestName());
+  auto a =
+      builder.ConstantR1<int32>({0, static_cast<int32>(0x80000000), 0x3F800000,
+                                 static_cast<int32>(0xBF800000), 0x3F000000,
+                                 static_cast<int32>(0xBF000000)});
+  builder.BitcastConvertType(a, F32);
+
+  std::vector<float> expected = {0.0f, -0.0f, 1.0f, -1.0f, 0.5f, -0.5f};
+  ComputeAndCompareR1<float>(&builder, expected, {});
+}
+
+XLA_TEST_F(BitcastConvertTest, ConvertR1S0S32ToR1S0F32) {
+  ComputationBuilder builder(client_, TestName());
+  auto a = builder.ConstantR1<int32>({});
+  builder.BitcastConvertType(a, F32);
+
+  std::vector<float> expected = {};
+  ComputeAndCompareR1<float>(&builder, expected, {});
+}
+
+TEST_F(BitcastConvertTest, ConvertR1F32ToR1S32) {
+  ComputationBuilder builder(client_, TestName());
+  auto a = builder.ConstantR1<float>({42.6, 64.4});
+  builder.BitcastConvertType(a, S32);
+
+  std::vector<int32> expected = {0x422a6666, 0x4280cccd};
+  ComputeAndCompareR1<int32>(&builder, expected, {});
+}
+
+TEST_F(BitcastConvertTest, ConvertS32Extremes) {
+  ComputationBuilder builder(client_, TestName());
+  auto a = builder.ConstantR1<int32>(
+      {std::numeric_limits<int32>::min(), std::numeric_limits<int32>::max()});
+  builder.BitcastConvertType(a, F32);
+
+  std::vector<float> expected = {-0.0f, NAN};
+  ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0, 0));
+}
+
+TEST_F(BitcastConvertTest, ConvertMapToS32) {
+  ComputationBuilder builder(client_, TestName());
+  auto b = builder.CreateSubBuilder("convert");
+  auto param = b->Parameter(0, ShapeUtil::MakeShape(F32, {}), "in");
+  b->BitcastConvertType(param, S32);
+  auto a = builder.ConstantR1<float>({42.0f, 64.0f});
+  builder.Map({a}, b->BuildAndNoteError(), {0});
+
+  std::vector<int32> expected = {0x42280000, 0x42800000};
+  ComputeAndCompareR1<int32>(&builder, expected, {});
+}
+
+TEST_F(BitcastConvertTest, ConvertMapToF32) {
+  ComputationBuilder builder(client_, TestName());
+  auto b = builder.CreateSubBuilder("convert");
+  auto param = b->Parameter(0, ShapeUtil::MakeShape(S32, {}), "in");
+  b->BitcastConvertType(param, F32);
+  auto a = builder.ConstantR1<int32>({0x42280000, 0x42800000});
+  builder.Map({a}, b->BuildAndNoteError(), {0});
+
+  std::vector<float> expected = {42.0f, 64.0f};
+  ComputeAndCompareR1<float>(&builder, expected, {});
+}
+
+// Regression test for b/31758660. When ReshapeMover transforms
+//   input -> reshape -> convert
+// to
+//   input -> convert -> reshape
+// the new convert should have the same element type as the old convert.
+TEST_F(BitcastConvertTest, ConvertReshape) {
+  ComputationBuilder builder(client_, TestName());
+  auto input = builder.ConstantR1<int32>({0x42280000});
+  auto reshape = builder.Reshape(input, /*dimensions=*/{0}, /*new_sizes=*/{});
+  builder.BitcastConvertType(reshape, F32);
+
+  ComputeAndCompareR0<float>(&builder, 42.0f, {});
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index a102bdc3aa..a10497665a 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -434,6 +434,15 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
           HloInstruction::CreateConvert(shape, operands[0]));
       break;
     }
+    case HloOpcode::kBitcastConvert: {
+      if (!ParseOperands(&operands, /*expected_size=*/1) ||
+          !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction = builder->AddInstruction(
+          HloInstruction::CreateBitcastConvert(shape, operands[0]));
+      break;
+    }
     case HloOpcode::kCrossReplicaSum: {
       if (!ParseOperands(&operands, /*expected_size=*/1) ||
           !ParseAttributes(attrs)) {
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index 39f5806739..d3c5a88807 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -907,7 +907,8 @@ message OpRequest {
     BatchNormGradRequest batch_norm_grad_request = 37;
     BatchNormInferenceRequest batch_norm_inference_request = 38;
     FftRequest fft_request = 41;
-    // Next: 42
+    ConvertRequest bitcast_convert_request = 42;
+    // Next: 43
   }
 }
 
diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
index dfd9c12c89..8831b3d0fd 100644
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -13,6 +13,28 @@ arbitrary-dimensional array. For convenience, special cases have more specific
 and familiar names; for example a *vector* is a 1-dimensional array and a
 *matrix* is a 2-dimensional array.
 
+## BitcastConvertType
+
+See also
+[`ComputationBuilder::BitcastConvertType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+Similar to a `tf.bitcast` in TensorFlow, performs an element-wise bitcast
+operation from a data shape to a target shape. The dimensions must match, and
+the conversion is an element-wise one; e.g. `s32` elements become `f32` elements
+via bitcast routine. Bitcast is implemented as a low-level cast, so machines
+with different floating point representations will give different results.
+
+<b> `BitcastConvertType(operand, new_element_type)` </b>
+
+Arguments          | Type                    | Semantics
+------------------ | ----------------------- | ---------------------------
+`operand`          | `ComputationDataHandle` | array of type T with dims D
+`new_element_type` | `PrimitiveType`         | type U
+
+The dimensions of the operand and the target shape must match. The bit-width of
+the source and destination element types must be equal. The source
+and destination element types must not be tuples.
+
 ## Broadcast
 
 See also
@@ -234,9 +256,8 @@ Arguments          | Type                    | Semantics
 `operand`          | `ComputationDataHandle` | array of type T with dims D
 `new_element_type` | `PrimitiveType`         | type U
 
-If the dimensions of the operand and the target shape do not match, or an
-invalid conversion is requested (e.g. to/from a tuple) an error will be
-produced.
+The dimensions of the operand and the target shape must match. The source and
+destination element types must not be tuples.
 
 A conversion such as `T=s32` to `U=f32` will perform a normalizing int-to-float
 conversion routine such as round-to-nearest-even.
@@ -646,8 +667,8 @@ Normalizes an array across batch and spatial dimensions.
 For each feature in the feature dimension (`feature_index` is the index for the
 feature dimension in `operand`), the operation calculates the mean and variance
 across all the other dimensions and use the mean and variance to normalize each
-element in `operand`. If an invalid `feature_index` is passed, an error is
-produced.
+element in `operand`. The `feature_index` must be a valid index for the feature
+dimension in `operand`.
 
 The algorithm goes as follows for each batch in `operand` \\(x\\) that
 contains `m` elements with `w` and `h` as the size of spatial dimensions (
@@ -702,8 +723,8 @@ Normalizes an array across batch and spatial dimensions.
 For each feature in the feature dimension (`feature_index` is the index for the
 feature dimension in `operand`), the operation calculates the mean and variance
 across all the other dimensions and use the mean and variance to normalize each
-element in `operand`. If an invalid `feature_index` is passed, an error is
-produced.
+element in `operand`. The `feature_index` must be a valid index for the feature
+dimension in `operand`.
 
 `BatchNormInference`  is equivalent to calling `BatchNormTraining` without
 computing `mean` and `variance` for each batch. It uses the input `mean` and
@@ -742,8 +763,8 @@ Calculates gradients of batch norm.
 
 For each feature in the feature dimension (`feature_index` is the index for the
 feature dimension in `operand`), the operation calculates the gradients with
-respect to `operand`, `offset` and `scale` across all the other dimensions. If
-an invalid `feature_index` is passed, an error is produced.
+respect to `operand`, `offset` and `scale` across all the other dimensions. The
+`feature_index` must be a valid index for the feature dimension in `operand`.
 
 The three gradients are defined by the following formulas:
 
@@ -808,8 +829,7 @@ device, interpreting the data as the given shape and its layout, and returns a
 `ComputationDataHandle` of the data. Multiple Infeed operations are allowed in a
 computation, but there must be a total order among the Infeed operations. For
 example, two Infeeds in the code below have a total order since there is a
-dependency between the while loops. The compiler issues an error if there isn't
-a total order.
+dependency between the while loops.
 
 ```
 result1 = while (condition, init = init_value) {
-- 
GitLab


From f93c48dc061d23495a4425fcad17d55159cb02b1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 23:05:17 -0800
Subject: [PATCH 0209/1225] Use LINKER_INITIALIZED for mutexes with static
 storage class.

This was causing exit-time races as some threads were accessing the
mutex as it was being destructed.

			---------------

It is illegal to use any static type with a constructor/destructor with static
storage class in a multithreaded C++ programme that can exit(), even if the
constructor is protected by C++11's function-scope static initialization rules,
because exit-time destruction is unsafe in the presence of multiple threads.

For things that are not function-scope, the construction is also unsafe,
because global contruction ordering is undefined in general.

The LINKER_INITIALIZED variant constructor for TensorFlow's mutex avoids these
problems, at the cost of relying on the linker to zero-initialize the BSS
region.

PiperOrigin-RevId: 176612772
---
 tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc | 2 +-
 tensorflow/contrib/nccl/kernels/nccl_manager.cc      | 2 +-
 tensorflow/core/common_runtime/device_factory.cc     | 2 +-
 tensorflow/core/common_runtime/session_factory.cc    | 2 +-
 tensorflow/core/debug/debug_io_utils.cc              | 2 +-
 tensorflow/core/distributed_runtime/local_master.cc  | 2 +-
 tensorflow/core/distributed_runtime/server_lib.cc    | 2 +-
 tensorflow/core/framework/load_library.cc            | 2 +-
 tensorflow/core/framework/op_def_util.cc             | 2 +-
 tensorflow/core/kernels/meta_support.cc              | 2 +-
 tensorflow/core/lib/random/random.cc                 | 4 ++--
 tensorflow/core/platform/s3/s3_file_system.cc        | 2 +-
 tensorflow/python/lib/core/py_func.cc                | 2 +-
 13 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc
index 2871c14628..85b61b2616 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc
@@ -39,7 +39,7 @@ const char kTestMp3Filename[] =
 
 // Set to true via a command line flag iff the test is expected to have FFmpeg
 // installed.
-mutex mu;
+mutex mu(LINKER_INITIALIZED);
 bool should_ffmpeg_be_installed GUARDED_BY(mu) = false;
 
 string ParseTestFlags(int* argc, char** argv) {
diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.cc b/tensorflow/contrib/nccl/kernels/nccl_manager.cc
index 1eb1481675..31a35b0d53 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_manager.cc
+++ b/tensorflow/contrib/nccl/kernels/nccl_manager.cc
@@ -370,7 +370,7 @@ void NcclManager::AddParticipant(int num_devices, const string& key,
 }
 
 void NcclManager::RunCollective(const string& key, Collective* collective) {
-  static mutex collective_mu;
+  static mutex collective_mu(LINKER_INITIALIZED);
 
   auto* communicator = GetCommunicator(collective);
   collective->communicator = communicator;
diff --git a/tensorflow/core/common_runtime/device_factory.cc b/tensorflow/core/common_runtime/device_factory.cc
index fa12c48fb9..b43c718817 100644
--- a/tensorflow/core/common_runtime/device_factory.cc
+++ b/tensorflow/core/common_runtime/device_factory.cc
@@ -32,7 +32,7 @@ namespace tensorflow {
 namespace {
 
 static mutex* get_device_factory_lock() {
-  static mutex device_factory_lock;
+  static mutex device_factory_lock(LINKER_INITIALIZED);
   return &device_factory_lock;
 }
 
diff --git a/tensorflow/core/common_runtime/session_factory.cc b/tensorflow/core/common_runtime/session_factory.cc
index dba7a9253e..0234d4c372 100644
--- a/tensorflow/core/common_runtime/session_factory.cc
+++ b/tensorflow/core/common_runtime/session_factory.cc
@@ -29,7 +29,7 @@ namespace tensorflow {
 namespace {
 
 static mutex* get_session_factory_lock() {
-  static mutex session_factory_lock;
+  static mutex session_factory_lock(LINKER_INITIALIZED);
   return &session_factory_lock;
 }
 
diff --git a/tensorflow/core/debug/debug_io_utils.cc b/tensorflow/core/debug/debug_io_utils.cc
index 85d04daa65..f81445c20b 100644
--- a/tensorflow/core/debug/debug_io_utils.cc
+++ b/tensorflow/core/debug/debug_io_utils.cc
@@ -736,7 +736,7 @@ Status DebugGrpcChannel::ReceiveServerRepliesAndClose() {
   }
 }
 
-mutex DebugGrpcIO::streams_mu;
+mutex DebugGrpcIO::streams_mu(LINKER_INITIALIZED);
 
 int64 DebugGrpcIO::channel_connection_timeout_micros = 900 * 1000 * 1000;
 // TODO(cais): Make this configurable?
diff --git a/tensorflow/core/distributed_runtime/local_master.cc b/tensorflow/core/distributed_runtime/local_master.cc
index c7ba7abeaf..aaa4cfa734 100644
--- a/tensorflow/core/distributed_runtime/local_master.cc
+++ b/tensorflow/core/distributed_runtime/local_master.cc
@@ -159,7 +159,7 @@ Status LocalMaster::Reset(CallOptions* call_options,
 
 namespace {
 mutex* get_local_master_registry_lock() {
-  static mutex local_master_registry_lock;
+  static mutex local_master_registry_lock(LINKER_INITIALIZED);
   return &local_master_registry_lock;
 }
 
diff --git a/tensorflow/core/distributed_runtime/server_lib.cc b/tensorflow/core/distributed_runtime/server_lib.cc
index 0b7fed79cd..7d308bb723 100644
--- a/tensorflow/core/distributed_runtime/server_lib.cc
+++ b/tensorflow/core/distributed_runtime/server_lib.cc
@@ -24,7 +24,7 @@ namespace tensorflow {
 
 namespace {
 mutex* get_server_factory_lock() {
-  static mutex server_factory_lock;
+  static mutex server_factory_lock(LINKER_INITIALIZED);
   return &server_factory_lock;
 }
 
diff --git a/tensorflow/core/framework/load_library.cc b/tensorflow/core/framework/load_library.cc
index f825335300..b9e33b148f 100644
--- a/tensorflow/core/framework/load_library.cc
+++ b/tensorflow/core/framework/load_library.cc
@@ -45,7 +45,7 @@ struct Library {
 // perform initialization again, so the OpList would be empty.
 Status LoadLibrary(const char* library_filename, void** result,
                    const void** buf, size_t* len) {
-  static mutex mu;
+  static mutex mu(LINKER_INITIALIZED);
   static std::unordered_map<string, Library> loaded_libs;
   Env* env = Env::Default();
   Library library;
diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc
index f7d4166f97..29feda499f 100644
--- a/tensorflow/core/framework/op_def_util.cc
+++ b/tensorflow/core/framework/op_def_util.cc
@@ -332,7 +332,7 @@ Status CheckOpDeprecation(const OpDef& op_def, int graph_def_version) {
           ". ", dep.explanation(), ".");
     } else {
       // Warn only once for each op name, and do it in a threadsafe manner.
-      static mutex mu;
+      static mutex mu(LINKER_INITIALIZED);
       static std::unordered_set<string> warned;
       bool warn;
       {
diff --git a/tensorflow/core/kernels/meta_support.cc b/tensorflow/core/kernels/meta_support.cc
index b29feb0032..9fed01189f 100644
--- a/tensorflow/core/kernels/meta_support.cc
+++ b/tensorflow/core/kernels/meta_support.cc
@@ -82,7 +82,7 @@ gemmlowp::WorkersPool* GetWorkersPool() {
 }
 
 mutex& GetMutex() {
-  static mutex mu;
+  static mutex mu(LINKER_INITIALIZED);
   return mu;
 }
 
diff --git a/tensorflow/core/lib/random/random.cc b/tensorflow/core/lib/random/random.cc
index 723c1100f8..82dc829507 100644
--- a/tensorflow/core/lib/random/random.cc
+++ b/tensorflow/core/lib/random/random.cc
@@ -33,14 +33,14 @@ std::mt19937_64 InitRngWithDefaultSeed() { return std::mt19937_64(); }
 
 uint64 New64() {
   static std::mt19937_64* rng = InitRngWithRandomSeed();
-  static mutex mu;
+  static mutex mu(LINKER_INITIALIZED);
   mutex_lock l(mu);
   return (*rng)();
 }
 
 uint64 New64DefaultSeed() {
   static std::mt19937_64 rng = InitRngWithDefaultSeed();
-  static mutex mu;
+  static mutex mu(LINKER_INITIALIZED);
   mutex_lock l(mu);
   return rng();
 }
diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc
index 51c85592bf..234f3c3aed 100644
--- a/tensorflow/core/platform/s3/s3_file_system.cc
+++ b/tensorflow/core/platform/s3/s3_file_system.cc
@@ -38,7 +38,7 @@ static const size_t kS3ReadAppendableFileBufferSize = 1024 * 1024;
 static const int kS3GetChildrenMaxKeys = 100;
 
 Aws::Client::ClientConfiguration& GetDefaultClientConfig() {
-  static mutex cfg_lock;
+  static mutex cfg_lock(LINKER_INITIALIZED);
   static bool init(false);
   static Aws::Client::ClientConfiguration cfg;
 
diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc
index b30125761f..8bf831f8ba 100644
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -32,7 +32,7 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-static mutex mu;
+static mutex mu(LINKER_INITIALIZED);
 static PyObject* py_trampoline GUARDED_BY(mu) = nullptr;
 
 // Returns the py_trampoline that is used to pass the control to the
-- 
GitLab


From 5fbda9d8da7b98f62e83a392f047adf307b48b02 Mon Sep 17 00:00:00 2001
From: zxcqwe4906 <b00902042@ntu.edu.tw>
Date: Wed, 22 Nov 2017 15:13:18 +0800
Subject: [PATCH 0210/1225] Change ndimage.imread to imageio.imread. (#14710)

Scipy will not support imread from 1.0.0 as its document says:
https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.misc.imread.html

Change to imageio.imread and add its correspond exception.
---
 tensorflow/examples/udacity/1_notmnist.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/examples/udacity/1_notmnist.ipynb b/tensorflow/examples/udacity/1_notmnist.ipynb
index 39674e1aa4..dffe5d37c6 100644
--- a/tensorflow/examples/udacity/1_notmnist.ipynb
+++ b/tensorflow/examples/udacity/1_notmnist.ipynb
@@ -46,13 +46,13 @@
         "# These are all the modules we'll be using later. Make sure you can import them\n",
         "# before proceeding further.\n",
         "from __future__ import print_function\n",
+        "import imageio\n",
         "import matplotlib.pyplot as plt\n",
         "import numpy as np\n",
         "import os\n",
         "import sys\n",
         "import tarfile\n",
         "from IPython.display import display, Image\n",
-        "from scipy import ndimage\n",
         "from sklearn.linear_model import LogisticRegression\n",
         "from six.moves.urllib.request import urlretrieve\n",
         "from six.moves import cPickle as pickle\n",
@@ -325,13 +325,13 @@
         "  for image in image_files:\n",
         "    image_file = os.path.join(folder, image)\n",
         "    try:\n",
-        "      image_data = (ndimage.imread(image_file).astype(float) - \n",
+        "      image_data = (imageio.imread(image_file).astype(float) - \n",
         "                    pixel_depth / 2) / pixel_depth\n",
         "      if image_data.shape != (image_size, image_size):\n",
         "        raise Exception('Unexpected image shape: %s' % str(image_data.shape))\n",
         "      dataset[num_images, :, :] = image_data\n",
         "      num_images = num_images + 1\n",
-        "    except IOError as e:\n",
+        "    except (IOError, ValueError) as e:\n",
         "      print('Could not read:', image_file, ':', e, '- it\\'s ok, skipping.')\n",
         "    \n",
         "  dataset = dataset[0:num_images, :, :]\n",
-- 
GitLab


From c6d603f02e1a98f871912cda6716cdcbed6b439e Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Tue, 21 Nov 2017 23:32:43 -0800
Subject: [PATCH 0211/1225] Merge changes from github.

PiperOrigin-RevId: 176615107
---
 CODE_OF_CONDUCT.md                            |   6 +-
 README.md                                     |   4 +-
 configure.py                                  |  38 +-
 tensorflow/BUILD                              |  16 +
 tensorflow/compiler/aot/tfcompile.bzl         |  15 +-
 tensorflow/compiler/tests/BUILD               |   2 +-
 .../compiler/tests/fused_batchnorm_test.py    |  25 +-
 tensorflow/compiler/xla/service/BUILD         |   2 -
 .../compiler/xla/service/hlo_instruction.h    |   2 +-
 .../xla/service/hlo_instruction_test.cc       |   4 +-
 tensorflow/contrib/batching/BUILD             |   1 +
 .../contrib/batching/kernels/batch_kernels.cc |   2 +-
 .../kernel_tests/csiszar_divergence_test.py   |   2 +-
 tensorflow/contrib/cmake/CMakeLists.txt       | 147 +++-
 .../contrib/cmake/external/boringssl.cmake    |   6 +-
 .../contrib/cmake/external/jsoncpp.cmake      |   6 +-
 tensorflow/contrib/cmake/external/lmdb.cmake  |   6 +-
 tensorflow/contrib/cmake/external/png.cmake   |   6 +-
 .../contrib/cmake/external/protobuf.cmake     |   6 +-
 tensorflow/contrib/cmake/external/re2.cmake   |   8 +-
 .../contrib/cmake/external/snappy.cmake       |   8 +-
 .../contrib/cmake/external/sqlite.cmake       |   6 +-
 tensorflow/contrib/cmake/external/zlib.cmake  |   6 +-
 tensorflow/contrib/cmake/tf_cc_ops.cmake      |  36 +-
 .../contrib/cmake/tf_core_kernels.cmake       |  23 +-
 .../cmake/tf_label_image_example.cmake        |   5 +
 tensorflow/contrib/cmake/tf_python.cmake      |  38 +-
 tensorflow/contrib/cmake/tf_shared_lib.cmake  |  45 +-
 .../contrib/cmake/tf_stream_executor.cmake    |   3 +
 tensorflow/contrib/cmake/tf_tools.cmake       |  13 +-
 tensorflow/contrib/cmake/tf_tutorials.cmake   |   5 +
 tensorflow/contrib/crf/python/ops/crf.py      |  19 +-
 .../contrib/data/python/kernel_tests/BUILD    |   8 +-
 tensorflow/contrib/distributions/BUILD        |  17 +
 tensorflow/contrib/distributions/__init__.py  |   2 +
 .../python/kernel_tests/cauchy_test.py        | 437 +++++++++++
 .../distributions/python/ops/cauchy.py        | 223 ++++++
 .../python/examples/notebooks/1_basics.ipynb  |   4 +-
 .../examples/notebooks/2_gradients.ipynb      |   6 +-
 .../examples/notebooks/3_datasets.ipynb       |  10 +-
 .../contrib/layers/python/layers/layers.py    |  18 +-
 .../layers/python/layers/layers_test.py       |  73 +-
 .../learn/python/learn/estimators/head.py     |   2 +-
 .../learn/python/learn/estimators/model_fn.py |   6 +-
 .../python/learn/learn_io/data_feeder.py      |  12 +-
 .../linear_optimizer/python/ops/sdca_ops.py   |  11 +-
 tensorflow/contrib/lite/python/BUILD          |   1 +
 .../contrib/lite/testing/generate_examples.py |  17 +-
 tensorflow/contrib/lite/toco/python/BUILD     |   1 +
 tensorflow/contrib/makefile/Makefile          |   3 +-
 tensorflow/contrib/makefile/README.md         |  41 +-
 tensorflow/contrib/makefile/build_all_ios.sh  |  54 +-
 .../contrib/makefile/compile_ios_protobuf.sh  | 369 ++++++----
 .../makefile/compile_ios_tensorflow.sh        | 155 ++--
 tensorflow/contrib/makefile/compile_nsync.sh  |   5 +-
 tensorflow/contrib/nn/__init__.py             |   2 +
 tensorflow/contrib/opt/BUILD                  |  18 +
 tensorflow/contrib/opt/__init__.py            |   5 +-
 .../training/multitask_optimizer_wrapper.py   | 138 ++++
 .../multitask_optimizer_wrapper_test.py       | 119 +++
 .../python/kernel_tests/core_rnn_cell_test.py |  42 ++
 .../rnn/python/kernel_tests/rnn_cell_test.py  |  44 ++
 tensorflow/contrib/rnn/python/ops/rnn_cell.py | 344 ++++++++-
 .../seq2seq/python/ops/attention_wrapper.py   |  51 +-
 tensorflow/contrib/slim/README.md             |   2 +-
 .../slim/python/slim/nets/resnet_v1_test.py   |   2 +-
 tensorflow/contrib/verbs/README.md            |  14 +-
 tensorflow/contrib/verbs/rdma.cc              | 413 +++++++++--
 tensorflow/contrib/verbs/rdma.h               |  40 +-
 tensorflow/core/BUILD                         |   1 +
 .../core/common_runtime/mkl_cpu_allocator.h   |   2 +-
 .../core/common_runtime/sycl/sycl_device.h    |  22 +-
 tensorflow/core/graph/graph.cc                |  15 +
 tensorflow/core/graph/graph.h                 |   5 +
 tensorflow/core/graph/graph_partition.cc      |   4 +-
 tensorflow/core/graph/graph_test.cc           |  64 +-
 tensorflow/core/graph/mkl_graph_util.h        | 179 ++---
 tensorflow/core/graph/mkl_layout_pass.cc      |   2 +-
 .../core/graph/mkl_tfconversion_pass.cc       |   4 +-
 .../core/grappler/costs/graph_properties.h    |   6 +
 tensorflow/core/grappler/utils.cc             |   2 +-
 tensorflow/core/kernels/BUILD                 |  31 +-
 tensorflow/core/kernels/avgpooling_op.cc      |   7 +-
 tensorflow/core/kernels/bincount_op.cc        | 115 +--
 tensorflow/core/kernels/bincount_op.h         |  41 ++
 tensorflow/core/kernels/bincount_op_gpu.cu.cc | 114 +++
 tensorflow/core/kernels/bincount_op_test.cc   |  75 ++
 tensorflow/core/kernels/bucketize_op.cc       |  66 +-
 tensorflow/core/kernels/bucketize_op.h        |  41 ++
 .../core/kernels/bucketize_op_gpu.cu.cc       | 101 +++
 tensorflow/core/kernels/conv_grad_ops_3d.cc   |  42 +-
 tensorflow/core/kernels/conv_ops_3d.cc        |   5 +
 tensorflow/core/kernels/cwise_op_acosh.cc     |  12 +-
 tensorflow/core/kernels/cwise_op_asinh.cc     |  14 +-
 tensorflow/core/kernels/cwise_op_atanh.cc     |  14 +-
 tensorflow/core/kernels/cwise_ops.h           |  12 +
 .../core/kernels/depthwise_conv_grad_op.cc    |  10 +-
 tensorflow/core/kernels/depthwise_conv_op.cc  |  10 +-
 tensorflow/core/kernels/depthwise_conv_op.h   |   4 +-
 .../core/kernels/depthwise_conv_op_gpu.cu.cc  |  19 +-
 .../kernels/dynamic_partition_op_gpu.cu.cc    | 376 ++++++++++
 .../core/kernels/dynamic_partition_op_test.cc |  58 ++
 .../core/kernels/fused_batch_norm_op.cc       |  70 +-
 tensorflow/core/kernels/fused_batch_norm_op.h |  22 +-
 tensorflow/core/kernels/lmdb_reader_op.cc     |   7 +-
 tensorflow/core/kernels/maxpooling_op.cc      |  47 +-
 .../core/kernels/maxpooling_op_gpu.cu.cc      |  40 +-
 tensorflow/core/kernels/maxpooling_op_gpu.h   |   2 +-
 .../core/kernels/mkl_conv_grad_filter_ops.cc  |  78 +-
 .../core/kernels/mkl_conv_grad_input_ops.cc   |  86 +--
 tensorflow/core/kernels/mkl_conv_ops.cc       |  82 ++-
 tensorflow/core/kernels/mkl_conv_ops.h        | 140 ++--
 tensorflow/core/kernels/mkl_tfconv_op.h       |  80 +-
 tensorflow/core/kernels/pooling_ops_common.cc |  10 +-
 .../core/kernels/pooling_ops_common_gpu.h     |   4 +-
 tensorflow/core/kernels/quantized_add_op.cc   |   2 +-
 tensorflow/core/kernels/random_op.cc          |   4 +-
 .../core/kernels/segment_reduction_ops.cc     |   3 +
 .../core/kernels/segment_reduction_ops.h      |  36 +-
 tensorflow/core/kernels/shape_ops.cc          |  43 +-
 tensorflow/core/kernels/shape_ops.h           |  13 +-
 tensorflow/core/kernels/slice_op.cc           | 116 ++-
 tensorflow/core/kernels/slice_op.h            | 109 ++-
 tensorflow/core/kernels/slice_op_gpu.cu.cc    |  56 ++
 tensorflow/core/kernels/strided_slice_op.cc   |   1 -
 .../core/kernels/strided_slice_op_impl.h      |  25 +-
 .../core/kernels/strided_slice_op_test.cc     |  49 ++
 tensorflow/core/kernels/transpose_op.cc       |  35 +-
 tensorflow/core/kernels/unique_op.cc          | 113 ++-
 tensorflow/core/ops/array_ops.cc              |  44 +-
 tensorflow/core/ops/math_ops.cc               |   2 +
 tensorflow/core/ops/nn_ops.cc                 |  12 +-
 tensorflow/core/ops/ops.pbtxt                 |   5 +
 .../core/platform/default/build_config/BUILD  |  20 +-
 .../core/platform/default/notification.h      |   2 +-
 tensorflow/core/platform/posix/error.cc       |  11 +-
 tensorflow/core/platform/posix/port.cc        |   6 +-
 tensorflow/core/public/version.h              |   2 +-
 tensorflow/core/util/mkl_util.h               | 691 ++++++++++++++++--
 tensorflow/core/util/mkl_util_test.cc         |  92 +++
 .../api_guides/python/threading_and_queues.md |   2 +-
 .../docs_src/get_started/get_started.md       |   6 +-
 tensorflow/docs_src/get_started/input_fn.md   |   6 +-
 tensorflow/docs_src/install/install_c.md      |   2 +-
 tensorflow/docs_src/install/install_go.md     |   2 +-
 tensorflow/docs_src/install/install_java.md   |  18 +-
 tensorflow/docs_src/install/install_linux.md  |  22 +-
 tensorflow/docs_src/install/install_mac.md    |  10 +-
 .../docs_src/install/install_sources.md       |  19 +-
 tensorflow/docs_src/mobile/prepare_models.md  |   2 +-
 .../docs_src/programmers_guide/debugger.md    |  19 +-
 .../docs_src/programmers_guide/tensors.md     |  12 +-
 tensorflow/examples/speech_commands/models.py |   2 +-
 tensorflow/go/android.go                      |  20 +
 tensorflow/go/operation_test.go               |   8 +
 tensorflow/go/tensor.go                       |   9 +-
 tensorflow/go/tensor_test.go                  |   9 +-
 .../src/main/java/org/tensorflow/Shape.java   |  32 +
 .../test/java/org/tensorflow/ShapeTest.java   |  26 +
 tensorflow/python/BUILD                       |   4 +
 tensorflow/python/estimator/canned/head.py    |   2 +-
 .../python/estimator/inputs/numpy_io.py       |  83 ++-
 .../python/estimator/inputs/numpy_io_test.py  |  87 +++
 tensorflow/python/framework/ops.py            |   4 +
 tensorflow/python/framework/tensor_util.py    |   1 +
 tensorflow/python/framework/test_util.py      |   3 +-
 .../python/kernel_tests/array_ops_test.py     |  52 +-
 .../python/kernel_tests/bincount_op_test.py   |  25 +-
 .../python/kernel_tests/bucketize_op_test.py  |   8 +-
 .../python/kernel_tests/constant_op_test.py   |  14 +-
 tensorflow/python/kernel_tests/conv1d_test.py |  43 ++
 .../python/kernel_tests/conv_ops_3d_test.py   | 267 +++----
 .../kernel_tests/depthwise_conv_op_test.py    |  20 +-
 .../python/kernel_tests/distributions/BUILD   |   1 +
 .../distributions/multinomial_test.py         |  12 +-
 .../kernel_tests/dynamic_partition_op_test.py | 106 ++-
 .../python/kernel_tests/pooling_ops_test.py   |  60 +-
 .../python/kernel_tests/reader_ops_test.py    |  41 ++
 .../segment_reduction_ops_test.py             |  29 +-
 .../python/kernel_tests/shape_ops_test.py     |  10 +
 .../python/kernel_tests/slice_op_test.py      |  25 +-
 .../python/kernel_tests/unique_op_test.py     |  26 +
 tensorflow/python/layers/base.py              |   8 +-
 tensorflow/python/layers/convolutional.py     |   2 +
 tensorflow/python/layers/normalization.py     |  22 +-
 .../python/layers/normalization_test.py       |  98 ++-
 tensorflow/python/ops/array_ops.py            |  38 +-
 .../python/ops/distributions/dirichlet.py     |   2 +-
 .../python/ops/distributions/multinomial.py   |  49 +-
 tensorflow/python/ops/image_ops_impl.py       |  23 +-
 tensorflow/python/ops/linalg_ops.py           |  31 +-
 tensorflow/python/ops/math_grad_test.py       |  17 +
 tensorflow/python/ops/math_ops.py             | 258 ++++---
 tensorflow/python/ops/metrics_impl.py         |   2 +-
 .../python/ops/nn_fused_batchnorm_test.py     | 119 +--
 tensorflow/python/ops/nn_impl.py              |  16 +-
 tensorflow/python/ops/nn_ops.py               | 125 +++-
 tensorflow/python/ops/variables.py            |   4 +-
 .../python/tools/import_pb_to_tensorboard.py  |   0
 tensorflow/stream_executor/cuda/cuda_dnn.cc   |   9 +-
 tensorflow/stream_executor/dnn.cc             |  16 +-
 tensorflow/stream_executor/dnn.h              |   6 +
 .../tools/api/golden/tensorflow.linalg.pbtxt  |   2 +-
 .../tools/api/golden/tensorflow.nn.pbtxt      |   6 +-
 tensorflow/tools/api/golden/tensorflow.pbtxt  |  22 +-
 .../tools/ci_build/ci_parameterized_build.sh  |   2 +-
 .../tools/ci_build/install/install_golang.sh  |   2 +-
 .../ci_build/linux/libtensorflow_docker.sh    |   2 +-
 .../tools/ci_build/osx/libtensorflow_cpu.sh   |   2 +-
 .../tools/ci_build/osx/libtensorflow_gpu.sh   |   2 +-
 .../tools/ci_build/pi/build_raspberry_pi.sh   |   6 +
 .../ci_build/windows/bazel/bazel_test_lib.sh  |   4 +-
 .../docker/Dockerfile.devel-gpu-cuda9-cudnn7  |   7 +-
 tensorflow/tools/docker/Dockerfile.gpu        |   2 +-
 tensorflow/tools/docker/README.md             |  14 +
 tensorflow/tools/graph_transforms/BUILD       |   2 +
 .../tools/graph_transforms/quantize_nodes.cc  |   2 +
 tensorflow/tools/pip_package/setup.py         |   2 +-
 third_party/aws.BUILD                         |   3 +
 third_party/curl.BUILD                        |   1 -
 third_party/sycl/crosstool/CROSSTOOL.tpl      |   8 +-
 third_party/sycl/crosstool/trisycl.tpl        |  73 ++
 third_party/sycl/sycl/BUILD.tpl               |  17 +-
 third_party/sycl/sycl/build_defs.bzl.tpl      |  17 +-
 third_party/sycl/sycl_configure.bzl           |  86 ++-
 third_party/zlib.BUILD                        |   2 +-
 tools/bazel.rc                                |   7 +-
 util/python/BUILD                             |   2 +-
 228 files changed, 7332 insertions(+), 1810 deletions(-)
 create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
 create mode 100644 tensorflow/contrib/distributions/python/ops/cauchy.py
 create mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
 create mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
 create mode 100644 tensorflow/core/kernels/bincount_op.h
 create mode 100644 tensorflow/core/kernels/bincount_op_gpu.cu.cc
 create mode 100644 tensorflow/core/kernels/bincount_op_test.cc
 create mode 100644 tensorflow/core/kernels/bucketize_op.h
 create mode 100644 tensorflow/core/kernels/bucketize_op_gpu.cu.cc
 create mode 100644 tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
 create mode 100644 tensorflow/core/util/mkl_util_test.cc
 create mode 100644 tensorflow/go/android.go
 mode change 100644 => 100755 tensorflow/python/tools/import_pb_to_tensorboard.py
 create mode 100644 third_party/sycl/crosstool/trisycl.tpl

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 10fd595fec..ff11d13140 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -42,7 +42,7 @@ The Code of Conduct also applies within project spaces and in public spaces when
 
 Conflicts in an open source project can take many forms, from someone having a bad day and using harsh and hurtful language in the issue queue, to more serious instances such as sexist/racist statements or threats of violence, and everything in between.
 
-If the behaviour is threatening or harassing, or for other reasons requires immediate escalation, please see below.
+If the behavior is threatening or harassing, or for other reasons requires immediate escalation, please see below.
 
 However, for the vast majority of issues, we aim to empower individuals to first resolve conflicts themselves, asking for help when needed, and only after that fails to escalate further. This approach gives people more control over the outcome of their dispute. 
 
@@ -55,14 +55,14 @@ If you are experiencing or witnessing conflict, we ask you to use the following
 
 ## Reporting Violations
 
-Violations of the Code of Conduct can be reported to TensorFlow’s Project Steward at conduct@tensorflow.org. The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report.
+Violations of the Code of Conduct can be reported to TensorFlow’s Project Stewards, Edd Wilder-James (ewj@google.com) and Sarah Novotny (sarahnovotny@google.com). The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report.
 
 Violations of the Code of Conduct can occur in any setting, even those unrelated to the project. We will only consider complaints about conduct that has occurred within one year of the report.
 
 
 ## Enforcement
 
-If the Project Steward receives a report alleging a violation of the Code of Conduct, the Project Steward will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Steward will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Steward may issue sanctions without notice.
+If the Project Stewards receive a report alleging a violation of the Code of Conduct, the Project Stewards will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Stewards will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Stewards may issue sanctions without notice.
 
 
 ## Attribution
diff --git a/README.md b/README.md
index 24bbb6cec1..aff3427bdd 100644
--- a/README.md
+++ b/README.md
@@ -73,11 +73,11 @@ $ python
 
 ## For more information
 
-* [TensorFlow website](https://www.tensorflow.org)
+* [TensorFlow Website](https://www.tensorflow.org)
 * [TensorFlow White Papers](https://www.tensorflow.org/about/bib)
 * [TensorFlow Model Zoo](https://github.com/tensorflow/models)
 * [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730)
-* [TensorFlow course at Stanford](https://web.stanford.edu/class/cs20si)
+* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si)
 
 Learn more about the TensorFlow community at the [community page of tensorflow.org](https://www.tensorflow.org/community) for a few ways to participate.
 
diff --git a/configure.py b/configure.py
index 0d1afbfe15..26da09bd94 100644
--- a/configure.py
+++ b/configure.py
@@ -43,6 +43,7 @@ _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing '
                           'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION)
 _TF_OPENCL_VERSION = '1.2'
 _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp'
+_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include'
 
 
 def is_windows():
@@ -636,7 +637,7 @@ def set_tf_cuda_version(environ_cp):
   write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version)
 
 
-def set_tf_cunn_version(environ_cp):
+def set_tf_cudnn_version(environ_cp):
   """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION."""
   ask_cudnn_version = (
       'Please specify the cuDNN version you want to use. '
@@ -882,6 +883,27 @@ def set_computecpp_toolkit_path(environ_cp):
   write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
                               computecpp_toolkit_path)
 
+def set_trisycl_include_dir(environ_cp):
+  """Set TRISYCL_INCLUDE_DIR"""
+  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
+                             'include directory. (Use --config=sycl_trisycl '
+                             'when building with Bazel) '
+                             '[Default is %s]: '
+                             ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
+  while True:
+    trisycl_include_dir = get_from_env_or_user_or_default(
+      environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
+      _DEFAULT_TRISYCL_INCLUDE_DIR)
+    if os.path.exists(trisycl_include_dir):
+      break
+
+    print('Invalid triSYCL include directory, %s cannot be found'
+          % (trisycl_include_dir))
+
+  # Set TRISYCL_INCLUDE_DIR
+  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
+  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
+                              trisycl_include_dir)
 
 def set_mpi_home(environ_cp):
   """Set MPI_HOME."""
@@ -997,6 +1019,8 @@ def main():
     environ_cp['TF_NEED_GCP'] = '0'
     environ_cp['TF_NEED_HDFS'] = '0'
     environ_cp['TF_NEED_JEMALLOC'] = '0'
+    environ_cp['TF_NEED_OPENCL_SYCL'] = '0'
+    environ_cp['TF_NEED_COMPUTECPP'] = '0'
     environ_cp['TF_NEED_OPENCL'] = '0'
     environ_cp['TF_CUDA_CLANG'] = '0'
 
@@ -1018,17 +1042,21 @@ def main():
   set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support',
                 False, 'verbs')
 
-  set_action_env_var(environ_cp, 'TF_NEED_OPENCL', 'OpenCL', False)
-  if environ_cp.get('TF_NEED_OPENCL') == '1':
+  set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False)
+  if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
     set_host_cxx_compiler(environ_cp)
     set_host_c_compiler(environ_cp)
-    set_computecpp_toolkit_path(environ_cp)
+    set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', True)
+    if environ_cp.get('TF_NEED_COMPUTECPP') == '1':
+      set_computecpp_toolkit_path(environ_cp)
+    else:
+      set_trisycl_include_dir(environ_cp)
 
   set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
   if (environ_cp.get('TF_NEED_CUDA') == '1' and
       'TF_CUDA_CONFIG_REPO' not in environ_cp):
     set_tf_cuda_version(environ_cp)
-    set_tf_cunn_version(environ_cp)
+    set_tf_cudnn_version(environ_cp)
     set_tf_cuda_compute_capabilities(environ_cp)
 
     set_tf_cuda_clang(environ_cp)
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 49828cd4d6..c8f0b6b061 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -54,6 +54,15 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "raspberry_pi_armeabi",
+    values = {
+        "crosstool_top": "@local_config_arm_compiler//:toolchain",
+        "cpu": "armeabi",
+    },
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
     name = "android_arm",
     values = {
@@ -760,6 +769,13 @@ tf_cc_shared_object(
     ],
 )
 
+exports_files(
+    [
+        "tf_version_script.lds",
+        "tf_exported_symbols.lds",
+    ],
+)
+
 py_library(
     name = "tensorflow_py",
     srcs = ["__init__.py"],
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index ee291c12d0..1e22b760b8 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -119,7 +119,7 @@ def tf_library(name, graph, config,
             out_nodes_file,
         ] + freeze_saver_srcs,
         outs=[freeze_file],
-        cmd=("$(location //tensorflow/python/tools:freeze_graph)" +
+        cmd=("$(location @org_tensorflow//tensorflow/python/tools:freeze_graph)" +
              freeze_args),
         tools=["@org_tensorflow//tensorflow/python/tools:freeze_graph"],
         tags=tags,
@@ -130,6 +130,10 @@ def tf_library(name, graph, config,
   header_file = name + ".h"
   object_file = name + ".o"
   ep = ("__" + PACKAGE_NAME + "__" + name).replace("/", "_")
+  if type(tfcompile_flags) == type(""):
+    flags = tfcompile_flags
+  else:
+    flags = " ".join(["'" + arg.replace("'", "'\\''") + "'" for arg in (tfcompile_flags or [])])
   native.genrule(
       name=("gen_" + name),
       srcs=[
@@ -148,7 +152,7 @@ def tf_library(name, graph, config,
            " --target_triple=" + target_llvm_triple() +
            " --out_header=$(@D)/" + header_file +
            " --out_object=$(@D)/" + object_file +
-           " " + (tfcompile_flags or "")),
+           flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -185,7 +189,7 @@ def tf_library(name, graph, config,
            " --cpp_class=" + cpp_class +
            " --target_triple=" + target_llvm_triple() +
            " --out_session_module=$(@D)/" + session_module_pb +
-           " " + (tfcompile_flags or "")),
+           flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -195,8 +199,7 @@ def tf_library(name, graph, config,
 
   # The cc_library rule packaging up the header and object file, and needed
   # kernel implementations.
-  need_xla_data_proto = (tfcompile_flags and
-                         tfcompile_flags.find("--gen_program_shape") != -1)
+  need_xla_data_proto = (flags and flags.find("--gen_program_shape") != -1)
   native.cc_library(
       name=name,
       srcs=[object_file],
@@ -253,7 +256,7 @@ def tf_library(name, graph, config,
         ],
         outs=[test_file],
         cmd=("sed " + sed_replace +
-             " $(location //tensorflow/compiler/aot:test.cc) " +
+             " $(location @org_tensorflow//tensorflow/compiler/aot:test.cc) " +
              "> $(OUTS)"),
         tags=tags,
     )
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index c372e05474..79c4befd36 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -672,7 +672,7 @@ tf_library(
     cpp_class = "LSTMLayerInference",
     graph = "lstm_layer_inference.pbtxt",
     tags = ["manual"],
-    tfcompile_flags = "--xla_cpu_multi_thread_eigen=false",
+    tfcompile_flags = ["--xla_cpu_multi_thread_eigen=false"],
 )
 
 # -----------------------------------------------------------------------------
diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py
index 936fcf8b6b..a773b5a947 100644
--- a/tensorflow/compiler/tests/fused_batchnorm_test.py
+++ b/tensorflow/compiler/tests/fused_batchnorm_test.py
@@ -36,7 +36,7 @@ class FusedBatchNormTest(XLATestCase):
     x_square = x * x
     x_square_sum = np.sum(x_square, (0, 1, 2))
     x_sum = np.sum(x, axis=(0, 1, 2))
-    element_count = np.size(x) / int(np.shape(x)[0])
+    element_count = np.size(x) / int(np.shape(x)[-1])
     mean = x_sum / element_count
     var = x_square_sum / element_count - mean * mean
     normalized = (x - mean) / np.sqrt(var + epsilon)
@@ -64,8 +64,9 @@ class FusedBatchNormTest(XLATestCase):
     return grad_x, grad_scale, grad_offset
 
   def testInference(self):
-    x_shape = [2, 2, 6, 2]
-    scale_shape = [2]
+    channel = 3
+    x_shape = [2, 2, 6, channel]
+    scale_shape = [channel]
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     scale_val = np.random.random_sample(scale_shape).astype(np.float32)
 
@@ -74,8 +75,8 @@ class FusedBatchNormTest(XLATestCase):
     with self.test_session() as sess, self.test_scope():
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
-      scale = array_ops.placeholder(np.float32, shape=[2], name="scale")
-      offset = array_ops.placeholder(np.float32, shape=[2], name="offset")
+      scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
+      offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset")
       epsilon = 0.001
       y_ref, mean_ref, var_ref = self._reference_training(
           x_val, scale_val, offset_val, epsilon, data_format)
@@ -97,8 +98,9 @@ class FusedBatchNormTest(XLATestCase):
       self.assertAllClose(y_val, y_ref, atol=1e-3)
 
   def _testLearning(self, use_gradient_checker):
-    x_shape = [2, 2, 6, 2]
-    scale_shape = [2]
+    channel = 3
+    x_shape = [2, 2, 6, channel]
+    scale_shape = [channel]
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     scale_val = np.random.random_sample(scale_shape).astype(np.float32)
 
@@ -109,8 +111,8 @@ class FusedBatchNormTest(XLATestCase):
     with self.test_session() as sess, self.test_scope():
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
-      scale = array_ops.placeholder(np.float32, shape=[2], name="scale")
-      offset = array_ops.placeholder(np.float32, shape=[2], name="offset")
+      scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
+      offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset")
       epsilon = 0.001
       y, mean, var = nn.fused_batch_norm(
           t_val,
@@ -154,8 +156,9 @@ class FusedBatchNormTest(XLATestCase):
   def testGradient(self):
     # TODO(b/64270657): Use gradient_checker here in addition to comparing with
     # this reference implementation.
-    x_shape = [2, 2, 6, 2]
-    scale_shape = [2]
+    channel = 3
+    x_shape = [2, 2, 6, channel]
+    scale_shape = [channel]
     grad_val = np.random.random_sample(x_shape).astype(np.float32)
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     scale_val = np.random.random_sample(scale_shape).astype(np.float32)
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index db265510f2..fb980e7056 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -90,8 +90,6 @@ cc_library(
         ":shape_inference",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:status",
-        "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 1bd0cca945..cda8b07c61 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -222,7 +222,7 @@ class HloInstruction {
       tensorflow::gtl::ArraySlice<int64> strides);
 
   // Creates a slice instruction, where the first operand is sliced by
-  // start indices specified in the second operand, and by size specfied in
+  // start indices specified in the second operand, and by size specified in
   // 'slice_sizes'.
   static std::unique_ptr<HloInstruction> CreateDynamicSlice(
       const Shape& shape, HloInstruction* operand,
diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index 070bb4bc42..76b12fc8d3 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -792,8 +792,8 @@ TEST_F(HloInstructionTest, ComplexFusionOp) {
   //   sub = Sub(mul, clamp)
   //   tuple = Tuple({sub, sub, mul, C1})
   //
-  // Notable complexities are repeated operands in a same instruction, different
-  // shapes, use of value in different expressions.
+  // Notable complexities are repeated operands in the same instruction,
+  // different shapes, use of value in different expressions.
   auto c1 = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateR0<float>(1.1f)));
   auto c2 = builder.AddInstruction(
diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index 8b7df4a84c..a111cfecb3 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -82,6 +82,7 @@ cc_library(
 tf_cc_test(
     name = "adaptive_shared_batch_scheduler_test",
     srcs = ["adaptive_shared_batch_scheduler_test.cc"],
+    tags = ["manual"],  # b/69013768
     deps = [
         ":adaptive_shared_batch_scheduler",
         "//tensorflow/contrib/batching/test_util:fake_clock_env",
diff --git a/tensorflow/contrib/batching/kernels/batch_kernels.cc b/tensorflow/contrib/batching/kernels/batch_kernels.cc
index 3b7c538fcc..6041d8c9b2 100644
--- a/tensorflow/contrib/batching/kernels/batch_kernels.cc
+++ b/tensorflow/contrib/batching/kernels/batch_kernels.cc
@@ -461,7 +461,7 @@ class BatchResource : public ResourceBase {
     return Status::OK();
   }
 
-  // Looks up the batcher queue for 'queue_name'. If it did't previously exist,
+  // Looks up the batcher queue for 'queue_name'. If it didn't previously exist,
   // creates it.
   Status LookupOrCreateBatcherQueue(const string& queue_name,
                                     BatcherQueue** queue) {
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py
index 8c6a614beb..2e94b7206d 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py
@@ -759,7 +759,7 @@ class CsiszarVIMCOTest(test.TestCase):
   def _csiszar_vimco_helper_grad(self, logu, delta):
     """Finite difference approximation of `grad(csiszar_vimco_helper, logu)`."""
 
-    # This code actually estimates the sum of the Jacobiab because thats what
+    # This code actually estimates the sum of the Jacobiab because that's what
     # TF's `gradients` does.
     np_log_avg_u1, np_log_sooavg_u1 = self._csiszar_vimco_helper(
         logu[..., None] + np.diag([delta]*len(logu)))
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 8744fc492f..77a3fc0c83 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -34,13 +34,41 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF)
 option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON)
 option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions")
 option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON)
+if(HAIKU)
+	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF)
+else()
+	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" ON)
+endif()
+
 
 if (NOT WIN32)
   # Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option
   # for targets that link ${CMAKE_THREAD_LIBS_INIT}.
   find_package (Threads)
+
+  option(tensorflow_PATH_STATIC_LIB "Additional library search path for libcudnn_static.a, libnccl_static.a, libculibos.a" /usr/local/cuda/lib64/)
+  option(tensorflow_CUDNN_INCLUDE "cudnn.h header install path" /usr/include/)
+  if (NOT tensorflow_CUDNN_INCLUDE)
+    # option's default value is OFF. Fill it with real default values
+    set(tensorflow_CUDNN_INCLUDE /usr/include)
+  endif (NOT tensorflow_CUDNN_INCLUDE)
+  option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB})
+  option(tensorflow_PATH_NCCL_STATIC_LIB "Override PATH_STATIC_LIB for libnccl_static.a" ${tensorflow_PATH_STATIC_LIB})
+  option(tensorflow_CUDA_LIBRARY_PATH "Designate the default CUDA library paths" /usr/local/cuda/lib64)
+  if (NOT tensorflow_CUDA_LIBRARY_PATH)
+    # option's default value is OFF. Fill it with real default values
+    set(tensorflow_CUDA_LIBRARY_PATH /usr/local/cuda/lib64)
+  endif (NOT tensorflow_CUDA_LIBRARY_PATH)
 endif()
 
+if (WIN32)
+  set(BOOL_WIN32 ON)
+else (WIN32)
+  set(BOOL_WIN32 OFF)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+endif (WIN32)
+
 # [CLEANUP] Remove when done
 # For debugging
 function(SHOW_VARIABLES)
@@ -58,7 +86,12 @@ set (DOWNLOAD_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/downloads"
      CACHE PATH "Location where external projects will be downloaded.")
 mark_as_advanced(DOWNLOAD_LOCATION)
 
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+if (tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+	set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+else()
+	set(CMAKE_POSITION_INDEPENDENT_CODE OFF)
+endif()
+
 add_definitions(-DEIGEN_AVOID_STL_ARRAY)
 if(WIN32)
   add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC)
@@ -217,20 +250,35 @@ endif()
 if(UNIX)
   list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
 endif()
+if(HAIKU)
+  list(APPEND tensorflow_EXTERNAL_LIBRARIES network)
+endif()
 
 if (tensorflow_ENABLE_GPU)
+  if (NOT WIN32)
+    # Default install paths for cuda libraries in Linux
+    # In some Linux distros, find_package(CUDA) seems to require CMAKE_LIBRARY_PATH to include cuda-lib paths
+    list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}")
+    list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs")
+  endif (NOT WIN32)
+
+  find_package(CUDA 8.0 REQUIRED)
+
+  # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
+  # CUDA_NVCC_FLAGS and cuda_config.h below
+  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
+  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
+  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true)  # Flush denormals to zero
+  set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
+  include_directories(${CUDA_INCLUDE})
   if (WIN32)
-    find_package(CUDA 8.0 REQUIRED)
-
-    # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
-    # CUDA_NVCC_FLAGS and cuda_config.h below
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true)  # Flush denormals to zero
-    set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
-    include_directories(${CUDA_INCLUDE})
     add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2)
+  else (WIN32)
+    # Without these double quotes, cmake in Linux makes it "-DTF_EXTRA_CUDA_CAPABILITIES=3.0, -D3.5, -D5.2" for cc, which incurs build breaks
+    add_definitions(-DGOOGLE_CUDA=1 -D"TF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2")
+  endif (WIN32)
 
+  if (WIN32)
     # add cudnn
     if(NOT CUDNN_HOME)
       set(CUDNN_HOME ${CUDA_TOOLKIT_TARGET_DIR})
@@ -238,18 +286,48 @@ if (tensorflow_ENABLE_GPU)
     include_directories(${CUDNN_HOME})
     set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES}
       ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${CUDNN_HOME}/lib/x64/cudnn.lib)
+  else (WIN32)
+    set(CUDNN_INCLUDE "${tensorflow_CUDNN_INCLUDE}")
 
-    # create cuda_config.h
-    FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
-      "#ifndef CUDA_CUDA_CONFIG_H_\n"
-      "#define CUDA_CUDA_CONFIG_H_\n"
-      "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
-      "#define TF_CUDA_VERSION \"64_80\"\n"
-      "#define TF_CUDNN_VERSION \"64_6\"\n"
-      "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
-      "#endif  // CUDA_CUDA_CONFIG_H_\n"
-    )
+    find_library(nccl_STATIC_LIBRARY NAMES libnccl_static.a PATHS ${tensorflow_PATH_NCCL_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR})
+    if (NOT nccl_STATIC_LIBRARY)
+      message(FATAL_ERROR "NCCL is required for GPU-build")
+    else (NOT nccl_STATIC_LIBRARY)
+      message("nccl-static: ${nccl_STATIC_LIBRARY}")
+      # something like /usr/lib64/libnccl_static.a
+    endif (NOT nccl_STATIC_LIBRARY)
+
+    find_library(cudnn_STATIC_LIBRARY NAMES libcudnn_static.a PATHS ${tensorflow_PATH_CUDNN_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR})
+    if (NOT cudnn_STATIC_LIBRARY)
+      message(FATAL_ERROR "CUDNN is required for GPU-build")
+    else (NOT cudnn_STATIC_LIBRARY)
+      message("cudnn-static: ${cudnn_STATIC_LIBRARY}")
+    endif (NOT cudnn_STATIC_LIBRARY)
+
+    find_library(culibos_STATIC_LIBRARY NAMES libculibos.a PATHS ${tensorflow_PATH_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR})
+    if (NOT culibos_STATIC_LIBRARY)
+      message(FATAL_ERROR "CULIBOS is required for GPU-build")
+    else (NOT culibos_STATIC_LIBRARY)
+      message("culibos-static: ${culibos_STATIC_LIBRARY}")
+    endif (NOT culibos_STATIC_LIBRARY)
+
+    include_directories(${CUDNN_INCLUDE})
+    set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES}
+      ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY})
+  endif (WIN32)
+
+  # create cuda_config.h
+  FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
+    "#ifndef CUDA_CUDA_CONFIG_H_\n"
+    "#define CUDA_CUDA_CONFIG_H_\n"
+    "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
+    "#define TF_CUDA_VERSION \"64_80\"\n"
+    "#define TF_CUDNN_VERSION \"64_6\"\n"
+    "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
+    "#endif  // CUDA_CUDA_CONFIG_H_\n"
+  )
 
+  if (WIN32)
     # tf assumes in various places header files to be in cuda/include. On windows the cuda sdk
     # installs them under cuda/version/include and to avoid that we need to change tf we copy a
     # few files to cuda/include
@@ -261,12 +339,25 @@ if (tensorflow_ENABLE_GPU)
       ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h
       DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
     )
-    include_directories(${tensorflow_source_dir}/third_party/gpus)
-    # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
-    list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
+  else(WIN32)
+    # Linux has slightly differnt install paths than Windows
+    FILE(COPY
+      ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h
+      ${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_INCLUDE}/cudnn.h
+      ${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h
+      ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda_runtime_api.h
+      ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h
+      DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
+    )
+  endif(WIN32)
 
-    # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used
-    # in the default build is upgraded.
+  include_directories(${tensorflow_source_dir}/third_party/gpus)
+  # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
+  list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
+
+  # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used
+  # in the default build is upgraded.
+  if(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
       msvcp_dll_name=msvcp140.dll
       cudart_dll_name=cudart64_80.dll
@@ -275,7 +366,9 @@ if (tensorflow_ENABLE_GPU)
       cudnn_dll_name=cudnn64_6.dll
       cudnn_version_number=6)
   else(WIN32)
-    message(FATAL_ERROR "CMake GPU build is currently only supported on Windows.")
+    set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
+      cuda_version_number=8.0
+      cudnn_version_number=6)
   endif(WIN32)
 else(tensorflow_ENABLE_GPU)
   set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value
@@ -293,9 +386,7 @@ include(tf_core_framework.cmake)
 # NOTE: Disabled until issue #3996 is fixed.
 # include(tf_stream_executor.cmake)
 if (tensorflow_ENABLE_GPU)
-  if (WIN32)
     include(tf_stream_executor.cmake)
-  endif()
 endif()
 
 include(tf_core_cpu.cmake)
diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake
index dc27eadaca..cca8444e2a 100644
--- a/tensorflow/contrib/cmake/external/boringssl.cmake
+++ b/tensorflow/contrib/cmake/external/boringssl.cmake
@@ -39,8 +39,12 @@ ExternalProject_Add(boringssl
     # BUILD_IN_SOURCE 1
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
+        if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+        	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+        else()
+        	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+        endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake
index 5127d7e8f7..d2ae4c76e8 100644
--- a/tensorflow/contrib/cmake/external/jsoncpp.cmake
+++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake
@@ -42,8 +42,12 @@ ExternalProject_Add(jsoncpp
     BUILD_IN_SOURCE 1
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
+  	  if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+  	      -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+  	  else()
+   	    	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+   	 endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
diff --git a/tensorflow/contrib/cmake/external/lmdb.cmake b/tensorflow/contrib/cmake/external/lmdb.cmake
index 79971b7cfc..e41384f023 100644
--- a/tensorflow/contrib/cmake/external/lmdb.cmake
+++ b/tensorflow/contrib/cmake/external/lmdb.cmake
@@ -29,10 +29,14 @@ ExternalProject_Add(lmdb
     INSTALL_DIR ${lmdb_INSTALL}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DCMAKE_INSTALL_PREFIX:STRING=${lmdb_INSTALL}
-    -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 if(WIN32)
diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake
index 2b2bd47d1c..aad6618f52 100644
--- a/tensorflow/contrib/cmake/external/png.cmake
+++ b/tensorflow/contrib/cmake/external/png.cmake
@@ -41,10 +41,14 @@ ExternalProject_Add(png
     INSTALL_DIR ${png_INSTALL}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DCMAKE_INSTALL_PREFIX:STRING=${png_INSTALL}
-	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 	-DZLIB_ROOT:STRING=${ZLIB_INSTALL}
 )
 
diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index 1e300e21df..b53857a47b 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -44,8 +44,12 @@ ExternalProject_Add(protobuf
         ${PROTOBUF_ADDITIONAL_CMAKE_OPTIONS}
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
         -DZLIB_ROOT:STRING=${ZLIB_INSTALL}
 )
diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake
index cb4ec9c2de..b56f4b0898 100644
--- a/tensorflow/contrib/cmake/external/re2.cmake
+++ b/tensorflow/contrib/cmake/external/re2.cmake
@@ -38,7 +38,11 @@ ExternalProject_Add(re2
     BUILD_IN_SOURCE 1
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL}
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-)
\ No newline at end of file
+)
diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake
index 2d2451521c..926c271fd9 100644
--- a/tensorflow/contrib/cmake/external/snappy.cmake
+++ b/tensorflow/contrib/cmake/external/snappy.cmake
@@ -40,11 +40,15 @@ ExternalProject_Add(snappy
     LOG_CONFIGURE ON
     LOG_BUILD ON
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DSNAPPY_BUILD_TESTS:BOOL=OFF
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 # actually enables snappy in the source code
-add_definitions(-DTF_USE_SNAPPY)
+add_definitions(-DTF_USE_SNAPPY)
\ No newline at end of file
diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake
index 1770dcb1fd..785039a469 100644
--- a/tensorflow/contrib/cmake/external/sqlite.cmake
+++ b/tensorflow/contrib/cmake/external/sqlite.cmake
@@ -53,9 +53,13 @@ else()
         INSTALL_DIR ${sqlite_INSTALL}
         DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
         CMAKE_CACHE_ARGS
+			if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+				-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+			else()
+				-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+			endif()
             -DCMAKE_BUILD_TYPE:STRING=Release
             -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-            -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
             -DCMAKE_INSTALL_PREFIX:STRING=${sqlite_INSTALL}
     )
 
diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake
index c8af611e1e..f10f84336e 100644
--- a/tensorflow/contrib/cmake/external/zlib.cmake
+++ b/tensorflow/contrib/cmake/external/zlib.cmake
@@ -42,9 +42,13 @@ ExternalProject_Add(zlib
     BUILD_IN_SOURCE 1
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_INSTALL_PREFIX:STRING=${ZLIB_INSTALL}
-	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 # put zlib includes in the directory where they are expected
diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake
index 45eeb11062..6e2ac203f9 100644
--- a/tensorflow/contrib/cmake/tf_cc_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake
@@ -148,7 +148,11 @@ list(REMOVE_ITEM tf_cc_srcs ${tf_cc_test_srcs})
 add_library(tf_cc OBJECT ${tf_cc_srcs})
 add_dependencies(tf_cc tf_cc_framework tf_cc_ops)
 
-set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib")
+if (WIN32)
+  set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib")
+else (WIN32)
+  set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so")
+endif (WIN32)
 add_custom_target(tf_extension_ops)
 
 function(AddUserOps)
@@ -164,15 +168,13 @@ function(AddUserOps)
   # create shared library from source and cuda obj
   add_library(${_AT_TARGET} SHARED ${_AT_SOURCES} ${gpu_lib})
   target_link_libraries(${_AT_TARGET} ${pywrap_tensorflow_lib})
-  if(WIN32)
-    if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
-        # some ops call out to cuda directly; need to link libs for the cuda dlls
-        target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES})
-    endif()
-    if (_AT_DISTCOPY)
-        add_custom_command(TARGET ${_AT_TARGET} POST_BUILD
-            COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${_AT_TARGET}> ${_AT_DISTCOPY}/)
-    endif()
+  if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
+      # some ops call out to cuda directly; need to link libs for the cuda dlls
+      target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES})
+  endif()
+  if (_AT_DISTCOPY)
+      add_custom_command(TARGET ${_AT_TARGET} POST_BUILD
+          COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${_AT_TARGET}> ${_AT_DISTCOPY}/)
   endif()
   if (_AT_DEPENDS)
     add_dependencies(${_AT_TARGET} ${_AT_DEPENDS})
@@ -180,9 +182,19 @@ function(AddUserOps)
   # make sure TF_COMPILE_LIBRARY is not defined for this target
   get_target_property(target_compile_flags  ${_AT_TARGET} COMPILE_FLAGS)
   if(target_compile_flags STREQUAL "target_compile_flags-NOTFOUND")
-    set(target_compile_flags "/UTF_COMPILE_LIBRARY")
+    if (WIN32)
+      set(target_compile_flags "/UTF_COMPILE_LIBRARY")
+    else (WIN32)
+      # gcc uses UTF as default
+      set(target_compile_flags "-finput-charset=UTF-8")
+    endif (WIN32)
   else()
-    set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY")
+    if (WIN32)
+      set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY")
+    else (WIN32)
+      # gcc uses UTF as default
+      set(target_compile_flags "${target_compile_flags} -finput-charset=UTF-8")
+    endif (WIN32)
   endif()
   set_target_properties(${_AT_TARGET} PROPERTIES COMPILE_FLAGS ${target_compile_flags})
   add_dependencies(tf_extension_ops ${_AT_TARGET})
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index d6b8990664..2d015908a8 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -179,6 +179,7 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs
     "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/*.cu.cc"
     "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
     "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/*.cu.cc"
+    "${tensorflow_source_dir}/tensorflow/contrib/resampler/kernels/*.cu.cc"
 )
 
 if(WIN32 AND tensorflow_ENABLE_GPU)
@@ -202,16 +203,16 @@ endif(WIN32 AND tensorflow_ENABLE_GPU)
 add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
 add_dependencies(tf_core_kernels tf_core_cpu)
 
-if(WIN32)
+if (WIN32)
   target_compile_options(tf_core_kernels PRIVATE /MP)
-  if (tensorflow_ENABLE_GPU)
-    set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
-    set(tf_core_gpu_kernels_lib tf_core_gpu_kernels)
-    cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs})
-    set_target_properties(${tf_core_gpu_kernels_lib}
-                          PROPERTIES DEBUG_POSTFIX ""
-                          COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}"
-    )
-    add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu)
-  endif()
+endif (WIN32)
+if (tensorflow_ENABLE_GPU)
+  set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+  set(tf_core_gpu_kernels_lib tf_core_gpu_kernels)
+  cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs})
+  set_target_properties(${tf_core_gpu_kernels_lib}
+                        PROPERTIES DEBUG_POSTFIX ""
+                        COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}"
+  )
+  add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu)
 endif()
diff --git a/tensorflow/contrib/cmake/tf_label_image_example.cmake b/tensorflow/contrib/cmake/tf_label_image_example.cmake
index 0d3a4699eb..7f2f60b089 100644
--- a/tensorflow/contrib/cmake/tf_label_image_example.cmake
+++ b/tensorflow/contrib/cmake/tf_label_image_example.cmake
@@ -34,3 +34,8 @@ target_link_libraries(tf_label_image_example PUBLIC
     ${tf_core_gpu_kernels_lib}
     ${tensorflow_EXTERNAL_LIBRARIES}
 )
+
+install(TARGETS tf_label_image_example
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib)
\ No newline at end of file
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 9b863f7bc6..61b3fd715d 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -715,6 +715,9 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
       set(require_shape_fn 1)
     endif()
 
+    get_filename_component(GENERATE_PYTHON_OP_LIB_MKDIRPATH ${GENERATE_PYTHON_OP_LIB_DESTINATION} PATH)
+    file(MAKE_DIRECTORY ${GENERATE_PYTHON_OP_LIB_MKDIRPATH})
+
     # Create a C++ executable that links in the appropriate op
     # registrations and generates Python wrapper code based on the
     # registered ops.
@@ -743,6 +746,7 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
         ${GENERATE_PYTHON_OP_LIB_DESTINATION} PARENT_SCOPE)
 endfunction()
 
+GENERATE_PYTHON_OP_LIB("audio_ops")
 GENERATE_PYTHON_OP_LIB("array_ops")
 GENERATE_PYTHON_OP_LIB("bitwise_ops")
 GENERATE_PYTHON_OP_LIB("math_ops")
@@ -987,7 +991,7 @@ add_library(pywrap_tensorflow_internal SHARED
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
     ${pywrap_tensorflow_deffile}
 )
@@ -1063,25 +1067,23 @@ if(WIN32)
         DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/)
 endif(WIN32)
 
-if(WIN32)
-    # include contrib/seq2seq as .so
-    #
-    set(tf_beam_search_srcs
-        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc"
-        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h"
-        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc"
-    )
+# include contrib/seq2seq as .so
+#
+set(tf_beam_search_srcs
+    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc"
+    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h"
+    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc"
+)
 
-    set(tf_beam_search_gpu_srcs
-        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc"
-    )
+set(tf_beam_search_gpu_srcs
+    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc"
+)
 
-    AddUserOps(TARGET _beam_search_ops
-        SOURCES "${tf_beam_search_srcs}"
-        GPUSOURCES ${tf_beam_search_gpu_srcs}
-        DEPENDS pywrap_tensorflow_internal tf_python_ops
-        DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/)
-endif(WIN32)
+AddUserOps(TARGET _beam_search_ops
+    SOURCES "${tf_beam_search_srcs}"
+    GPUSOURCES ${tf_beam_search_gpu_srcs}
+    DEPENDS pywrap_tensorflow_internal tf_python_ops
+    DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/)
 
 ############################################################
 # Build a PIP package containing the TensorFlow runtime.
diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake
index 9bf45bab30..3e3fe0cdfa 100644
--- a/tensorflow/contrib/cmake/tf_shared_lib.cmake
+++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake
@@ -73,7 +73,7 @@ add_library(tensorflow SHARED
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
     ${tensorflow_deffile}
 )
@@ -94,3 +94,46 @@ endif()
 if(WIN32)
   add_dependencies(tensorflow tensorflow_static)
 endif(WIN32)
+
+install(TARGETS tensorflow
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib)
+
+# install necessary headers
+# tensorflow headers
+install(DIRECTORY ${tensorflow_source_dir}/tensorflow/cc/
+        DESTINATION include/tensorflow/cc
+        FILES_MATCHING PATTERN "*.h")
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/cc/
+        DESTINATION include/tensorflow/cc
+        FILES_MATCHING PATTERN "*.h")
+install(DIRECTORY ${tensorflow_source_dir}/tensorflow/core/
+        DESTINATION include/tensorflow/core
+        FILES_MATCHING PATTERN "*.h")
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/core/
+        DESTINATION include/tensorflow/core
+        FILES_MATCHING PATTERN "*.h")
+install(DIRECTORY ${tensorflow_source_dir}/tensorflow/stream_executor/
+        DESTINATION include/tensorflow/stream_executor
+        FILES_MATCHING PATTERN "*.h")
+# google protobuf headers
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src/google/
+        DESTINATION include/google
+        FILES_MATCHING PATTERN "*.h")
+# nsync headers
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/
+        DESTINATION include/external/nsync
+        FILES_MATCHING PATTERN "*.h")
+# Eigen directory
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/Eigen/
+        DESTINATION include/Eigen)
+# external directory
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive/
+        DESTINATION include/external/eigen_archive)
+# third_party eigen directory
+install(DIRECTORY ${tensorflow_source_dir}/third_party/eigen3/
+        DESTINATION include/third_party/eigen3)
+# unsupported Eigen directory
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/
+        DESTINATION include/unsupported/Eigen)
diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake
index 3d84f1ebb9..8d95f0d3e8 100644
--- a/tensorflow/contrib/cmake/tf_stream_executor.cmake
+++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake
@@ -74,6 +74,9 @@ endif()
 #)
 #list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) 
 
+if (NOT WIN32)
+  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lgomp")
+endif (NOT WIN32)
 add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs})
 
 add_dependencies(tf_stream_executor
diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake
index 6ef9598963..cb58a2e7df 100644
--- a/tensorflow/contrib/cmake/tf_tools.cmake
+++ b/tensorflow/contrib/cmake/tf_tools.cmake
@@ -73,7 +73,7 @@ add_executable(${transform_graph}
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -95,7 +95,7 @@ add_executable(${summarize_graph}
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -117,7 +117,7 @@ add_executable(${compare_graphs}
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -138,7 +138,7 @@ add_executable(${benchmark_model}
     $<TARGET_OBJECTS:tf_core_ops>
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -147,3 +147,8 @@ target_link_libraries(${benchmark_model} PUBLIC
   ${tf_core_gpu_kernels_lib}
   ${tensorflow_EXTERNAL_LIBRARIES}
 )
+
+install(TARGETS ${transform_graph} ${summarize_graph} ${compare_graphs} ${benchmark_model}
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib)
diff --git a/tensorflow/contrib/cmake/tf_tutorials.cmake b/tensorflow/contrib/cmake/tf_tutorials.cmake
index 858e7dda92..e63fccc181 100644
--- a/tensorflow/contrib/cmake/tf_tutorials.cmake
+++ b/tensorflow/contrib/cmake/tf_tutorials.cmake
@@ -34,3 +34,8 @@ target_link_libraries(tf_tutorials_example_trainer PUBLIC
     ${tf_core_gpu_kernels_lib}
     ${tensorflow_EXTERNAL_LIBRARIES}
 )
+
+install(TARGETS tf_tutorials_example_trainer
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib)
diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py
index 4282be5ec8..1612c75179 100644
--- a/tensorflow/contrib/crf/python/ops/crf.py
+++ b/tensorflow/contrib/crf/python/ops/crf.py
@@ -363,8 +363,8 @@ class CrfDecodeForwardRnnCell(rnn_cell.RNNCell):
       scope: Unused variable scope of this cell.
 
     Returns:
-      backpointers: [batch_size, num_tags], containing backpointers.
-      new_state: [batch_size, num_tags], containing new score values.
+      backpointers: A [batch_size, num_tags] matrix of backpointers.
+      new_state: A [batch_size, num_tags] matrix of new score values.
     """
     # For simplicity, in shape comments, denote:
     # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
@@ -404,8 +404,9 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell):
     """Build the CrfDecodeBackwardRnnCell.
 
     Args:
-      inputs: [batch_size, num_tags], backpointer of next step (in time order).
-      state: [batch_size, 1], next position's tag index.
+      inputs: A [batch_size, num_tags] matrix of
+            backpointer of next step (in time order).
+      state: A [batch_size, 1] matrix of tag index of next step.
       scope: Unused variable scope of this cell.
 
     Returns:
@@ -429,16 +430,16 @@ def crf_decode(potentials, transition_params, sequence_length):
   This is a function for tensor.
 
   Args:
-    potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of
+    potentials: A [batch_size, max_seq_len, num_tags] tensor of
               unary potentials.
-    transition_params: A [num_tags, num_tags] tensor, matrix of
+    transition_params: A [num_tags, num_tags] matrix of
               binary potentials.
-    sequence_length: A [batch_size] tensor, containing sequence lengths.
+    sequence_length: A [batch_size] vector of true sequence lengths.
 
   Returns:
-    decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32.
+    decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
                 Contains the highest scoring tag indices.
-    best_score: A [batch_size] tensor, containing the score of decode_tags.
+    best_score: A [batch_size] vector, containing the score of `decode_tags`.
   """
   # For simplicity, in shape comments, denote:
   # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 1923c0586a..dd0457d54b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -11,6 +11,7 @@ py_test(
     size = "small",
     srcs = ["batch_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -372,6 +373,7 @@ py_test(
     size = "small",
     srcs = ["sequence_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -449,6 +451,7 @@ py_test(
     size = "small",
     srcs = ["zip_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -465,7 +468,10 @@ py_test(
     size = "small",
     srcs = ["prefetching_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_oss"],  # b/68785503
+    tags = [
+        "manual",
+        "no_oss",  # b/68785503
+    ],
     deps = [
         "//tensorflow/contrib/data/python/ops:prefetching_py",
         "//tensorflow/core:protos_all_py",
diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 2dc8ad9483..145b9495ff 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -140,6 +140,23 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "cauchy_test",
+    size = "medium",
+    srcs = ["python/kernel_tests/cauchy_test.py"],
+    additional_deps = [
+        ":distributions_py",
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
+    ],
+)
+
 cuda_py_test(
     name = "chi2_test",
     srcs = ["python/kernel_tests/chi2_test.py"],
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 16f6533e57..0d12d83893 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -24,6 +24,7 @@ from __future__ import print_function
 
 from tensorflow.contrib.distributions.python.ops import bijectors
 from tensorflow.contrib.distributions.python.ops.binomial import *
+from tensorflow.contrib.distributions.python.ops.cauchy import *
 from tensorflow.contrib.distributions.python.ops.chi2 import *
 from tensorflow.contrib.distributions.python.ops.conditional_distribution import *
 from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import *
@@ -83,6 +84,7 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
     'bijectors',
+    'Cauchy',
     'ConditionalDistribution',
     'ConditionalTransformedDistribution',
     'FULLY_REPARAMETERIZED',
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
new file mode 100644
index 0000000000..7f7697357c
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
@@ -0,0 +1,437 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Cauchy."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import importlib
+import numpy as np
+
+from tensorflow.contrib.distributions.python.ops import cauchy as cauchy_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+
+
+def try_import(name):  # pylint: disable=invalid-name
+  module = None
+  try:
+    module = importlib.import_module(name)
+  except ImportError as e:
+    tf_logging.warning("Could not import %s: %s" % (name, str(e)))
+  return module
+
+stats = try_import("scipy.stats")
+
+
+class CauchyTest(test.TestCase):
+
+  def setUp(self):
+    self._rng = np.random.RandomState(123)
+
+  def assertAllFinite(self, tensor):
+    is_finite = np.isfinite(tensor.eval())
+    all_true = np.ones_like(is_finite, dtype=np.bool)
+    self.assertAllEqual(all_true, is_finite)
+
+  def _testParamShapes(self, sample_shape, expected):
+    with self.test_session():
+      param_shapes = cauchy_lib.Cauchy.param_shapes(sample_shape)
+      loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"]
+      self.assertAllEqual(expected, loc_shape.eval())
+      self.assertAllEqual(expected, scale_shape.eval())
+      loc = array_ops.zeros(loc_shape)
+      scale = array_ops.ones(scale_shape)
+      self.assertAllEqual(
+          expected,
+          array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval())
+
+  def _testParamStaticShapes(self, sample_shape, expected):
+    param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape)
+    loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"]
+    self.assertEqual(expected, loc_shape)
+    self.assertEqual(expected, scale_shape)
+
+  def testParamShapes(self):
+    sample_shape = [10, 3, 4]
+    self._testParamShapes(sample_shape, sample_shape)
+    self._testParamShapes(constant_op.constant(sample_shape), sample_shape)
+
+  def testParamStaticShapes(self):
+    sample_shape = [10, 3, 4]
+    self._testParamStaticShapes(sample_shape, sample_shape)
+    self._testParamStaticShapes(
+        tensor_shape.TensorShape(sample_shape), sample_shape)
+
+  def testCauchyLogPDF(self):
+    with self.test_session():
+      batch_size = 6
+      loc = constant_op.constant([3.0] * batch_size)
+      scale = constant_op.constant([np.sqrt(10.0)] * batch_size)
+      x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32)
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      log_pdf = cauchy.log_prob(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          log_pdf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape)
+
+      pdf = cauchy.prob(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, pdf.eval().shape)
+
+      if not stats:
+        return
+      expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x)
+      self.assertAllClose(expected_log_pdf, log_pdf.eval())
+      self.assertAllClose(np.exp(expected_log_pdf), pdf.eval())
+
+  def testCauchyLogPDFMultidimensional(self):
+    with self.test_session():
+      batch_size = 6
+      loc = constant_op.constant([[3.0, -3.0]] * batch_size)
+      scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] *
+                                   batch_size)
+      x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      log_pdf = cauchy.log_prob(x)
+      log_pdf_values = log_pdf.eval()
+      self.assertEqual(log_pdf.shape, (6, 2))
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          log_pdf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape)
+
+      pdf = cauchy.prob(x)
+      pdf_values = pdf.eval()
+      self.assertEqual(pdf.shape, (6, 2))
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf_values.shape)
+      self.assertAllEqual(cauchy.batch_shape, pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, pdf_values.shape)
+
+      if not stats:
+        return
+      expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x)
+      self.assertAllClose(expected_log_pdf, log_pdf_values)
+      self.assertAllClose(np.exp(expected_log_pdf), pdf_values)
+
+  def testCauchyCDF(self):
+    with self.test_session():
+      batch_size = 50
+      loc = self._rng.randn(batch_size)
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+      cdf = cauchy.cdf(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, cdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape)
+      if not stats:
+        return
+      expected_cdf = stats.cauchy(loc, scale).cdf(x)
+      self.assertAllClose(expected_cdf, cdf.eval(), atol=0)
+
+  def testCauchySurvivalFunction(self):
+    with self.test_session():
+      batch_size = 50
+      loc = self._rng.randn(batch_size)
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      sf = cauchy.survival_function(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, sf.shape)
+      self.assertAllEqual(cauchy.batch_shape, sf.eval().shape)
+      if not stats:
+        return
+      expected_sf = stats.cauchy(loc, scale).sf(x)
+      self.assertAllClose(expected_sf, sf.eval(), atol=0)
+
+  def testCauchyLogCDF(self):
+    with self.test_session():
+      batch_size = 50
+      loc = self._rng.randn(batch_size)
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-100.0, 10.0, batch_size).astype(np.float64)
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      cdf = cauchy.log_cdf(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, cdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape)
+
+      if not stats:
+        return
+      expected_cdf = stats.cauchy(loc, scale).logcdf(x)
+      self.assertAllClose(expected_cdf, cdf.eval(), atol=0, rtol=1e-5)
+
+  def testFiniteGradientAtDifficultPoints(self):
+    for dtype in [np.float32, np.float64]:
+      g = ops.Graph()
+      with g.as_default():
+        loc = variables.Variable(dtype(0.0))
+        scale = variables.Variable(dtype(1.0))
+        dist = cauchy_lib.Cauchy(loc=loc, scale=scale)
+        x = np.array([-100., -20., -5., 0., 5., 20., 100.]).astype(dtype)
+        for func in [
+            dist.cdf, dist.log_cdf, dist.survival_function,
+            dist.log_survival_function, dist.log_prob, dist.prob
+        ]:
+          value = func(x)
+          grads = gradients_impl.gradients(value, [loc, scale])
+          with self.test_session(graph=g):
+            variables.global_variables_initializer().run()
+            self.assertAllFinite(value)
+            self.assertAllFinite(grads[0])
+            self.assertAllFinite(grads[1])
+
+  def testCauchyLogSurvivalFunction(self):
+    with self.test_session():
+      batch_size = 50
+      loc = self._rng.randn(batch_size)
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-10.0, 100.0, batch_size).astype(np.float64)
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      sf = cauchy.log_survival_function(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, sf.shape)
+      self.assertAllEqual(cauchy.batch_shape, sf.eval().shape)
+
+      if not stats:
+        return
+      expected_sf = stats.cauchy(loc, scale).logsf(x)
+      self.assertAllClose(expected_sf, sf.eval(), atol=0, rtol=1e-5)
+
+  def testCauchyEntropy(self):
+    with self.test_session():
+      loc = np.array([1.0, 1.0, 1.0])
+      scale = np.array([[1.0, 2.0, 3.0]])
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      entropy = cauchy.entropy()
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          entropy.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          entropy.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, entropy.shape)
+      self.assertAllEqual(cauchy.batch_shape, entropy.eval().shape)
+
+      if not stats:
+        return
+      expected_entropy = stats.cauchy(loc, scale).entropy()
+      self.assertAllClose(expected_entropy, entropy.eval())
+
+  def testCauchyMode(self):
+    with self.test_session():
+      # Mu will be broadcast to [7, 7, 7].
+      loc = [7.]
+      scale = [11., 12., 13.]
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      self.assertAllEqual((3,), cauchy.mode().shape)
+      self.assertAllEqual([7., 7, 7], cauchy.mode().eval())
+
+  def testCauchyMean(self):
+    with self.test_session():
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      self.assertAllEqual((3,), cauchy.mean().shape)
+      self.assertAllEqual([np.nan] * 3, cauchy.mean().eval())
+
+  def testCauchyNanMean(self):
+    with self.test_session():
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False)
+
+      with self.assertRaises(ValueError):
+        cauchy.mean().eval()
+
+  def testCauchyQuantile(self):
+    with self.test_session():
+      batch_size = 50
+      loc = self._rng.randn(batch_size)
+      scale = self._rng.rand(batch_size) + 1.0
+      p = np.linspace(0.000001, 0.999999, batch_size).astype(np.float64)
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+      x = cauchy.quantile(p)
+
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, x.shape)
+      self.assertAllEqual(cauchy.batch_shape, x.eval().shape)
+
+      if not stats:
+        return
+      expected_x = stats.cauchy(loc, scale).ppf(p)
+      self.assertAllClose(expected_x, x.eval(), atol=0.)
+
+  def testCauchyVariance(self):
+    with self.test_session():
+      # scale will be broadcast to [7, 7, 7]
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      self.assertAllEqual((3,), cauchy.variance().shape)
+      self.assertAllEqual([np.nan] * 3, cauchy.variance().eval())
+
+  def testCauchyNanVariance(self):
+    with self.test_session():
+      # scale will be broadcast to [7, 7, 7]
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False)
+
+      with self.assertRaises(ValueError):
+        cauchy.variance().eval()
+
+  def testCauchyStandardDeviation(self):
+    with self.test_session():
+      # scale will be broadcast to [7, 7, 7]
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      self.assertAllEqual((3,), cauchy.stddev().shape)
+      self.assertAllEqual([np.nan] * 3, cauchy.stddev().eval())
+
+  def testCauchyNanStandardDeviation(self):
+    with self.test_session():
+      # scale will be broadcast to [7, 7, 7]
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False)
+
+      with self.assertRaises(ValueError):
+        cauchy.stddev().eval()
+
+  def testCauchySample(self):
+    with self.test_session():
+      loc = constant_op.constant(3.0)
+      scale = constant_op.constant(1.0)
+      loc_v = 3.0
+      n = constant_op.constant(100000)
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+      samples = cauchy.sample(n)
+      sample_values = samples.eval()
+
+      self.assertEqual(sample_values.shape, (100000,))
+      self.assertAllClose(np.median(sample_values), loc_v, atol=1e-1)
+
+      expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
+          tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval()))
+
+      self.assertAllEqual(expected_shape, samples.shape)
+      self.assertAllEqual(expected_shape, sample_values.shape)
+
+      expected_shape = (tensor_shape.TensorShape(
+          [n.eval()]).concatenate(cauchy.batch_shape))
+
+      self.assertAllEqual(expected_shape, samples.shape)
+      self.assertAllEqual(expected_shape, sample_values.shape)
+
+  def testCauchySampleMultiDimensional(self):
+    with self.test_session():
+      batch_size = 2
+      loc = constant_op.constant([[3.0, -3.0]] * batch_size)
+      scale = constant_op.constant([[0.5, 1.0]] * batch_size)
+      loc_v = [3.0, -3.0]
+      n = constant_op.constant(100000)
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+      samples = cauchy.sample(n)
+      sample_values = samples.eval()
+      self.assertEqual(samples.shape, (100000, batch_size, 2))
+      self.assertAllClose(np.median(sample_values[:, 0, 0]),
+                          loc_v[0], atol=1e-1)
+      self.assertAllClose(np.median(sample_values[:, 0, 1]),
+                          loc_v[1], atol=1e-1)
+
+      expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
+          tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval()))
+      self.assertAllEqual(expected_shape, samples.shape)
+      self.assertAllEqual(expected_shape, sample_values.shape)
+
+      expected_shape = (tensor_shape.TensorShape(
+          [n.eval()]).concatenate(cauchy.batch_shape))
+      self.assertAllEqual(expected_shape, samples.shape)
+      self.assertAllEqual(expected_shape, sample_values.shape)
+
+  def testCauchyNegativeLocFails(self):
+    with self.test_session():
+      cauchy = cauchy_lib.Cauchy(loc=[1.], scale=[-5.], validate_args=True)
+      with self.assertRaisesOpError("Condition x > 0 did not hold"):
+        cauchy.mode().eval()
+
+  def testCauchyShape(self):
+    with self.test_session():
+      loc = constant_op.constant([-3.0] * 5)
+      scale = constant_op.constant(11.0)
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      self.assertEqual(cauchy.batch_shape_tensor().eval(), [5])
+      self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape([5]))
+      self.assertAllEqual(cauchy.event_shape_tensor().eval(), [])
+      self.assertEqual(cauchy.event_shape, tensor_shape.TensorShape([]))
+
+  def testCauchyShapeWithPlaceholders(self):
+    loc = array_ops.placeholder(dtype=dtypes.float32)
+    scale = array_ops.placeholder(dtype=dtypes.float32)
+    cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+    with self.test_session() as sess:
+      # get_batch_shape should return an "<unknown>" tensor.
+      self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape(None))
+      self.assertEqual(cauchy.event_shape, ())
+      self.assertAllEqual(cauchy.event_shape_tensor().eval(), [])
+      self.assertAllEqual(
+          sess.run(cauchy.batch_shape_tensor(),
+                   feed_dict={loc: 5.0,
+                              scale: [1.0, 2.0]}), [2])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py
new file mode 100644
index 0000000000..a17bb091f6
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/ops/cauchy.py
@@ -0,0 +1,223 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The Cauchy distribution class."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops.distributions import distribution
+
+
+__all__ = [
+    "Cauchy",
+]
+
+
+class Cauchy(distribution.Distribution):
+  """The Cauchy distribution with location `loc` and scale `scale`.
+
+  #### Mathematical details
+
+  The probability density function (pdf) is,
+
+  ```none
+  pdf(x; loc, scale) = 1 / (pi * scale * (1 + ((x - loc) / scale)**2))
+  ```
+  where `loc` is the location, and `scale` is the scale.
+
+  The Cauchy distribution is a member of the [location-scale family](
+  https://en.wikipedia.org/wiki/Location-scale_family), i.e.
+
+  ```none
+  X ~ Cauchy(loc=0, scale=1)
+  Y ~ Cauchy(loc=loc, scale=scale)
+  Y = loc + scale * X
+  ```
+
+  #### Examples
+
+  Examples of initialization of one or a batch of distributions.
+
+  ```python
+  # Define a single scalar Cauchy distribution.
+  dist = Cauchy(loc=0., scale=3.)
+
+  # Evaluate the cdf at 1, returning a scalar.
+  dist.cdf(1.)
+
+  # Define a batch of two scalar valued Cauchy distributions.
+  dist = Cauchy(loc=[1, 2.], scale=[11, 22.])
+
+  # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
+  # returning a length two tensor.
+  dist.prob([0, 1.5])
+
+  # Get 3 samples, returning a 3 x 2 tensor.
+  dist.sample([3])
+  ```
+
+  Arguments are broadcast when possible.
+
+  ```python
+  # Define a batch of two scalar valued Cauchy distributions.
+  # Both have median 1, but different scales.
+  dist = tf.contrib.distributions.Cauchy(loc=1., scale=[11, 22.])
+  # Evaluate the pdf of both distributions on the same point, 3.0,
+  # returning a length 2 tensor.
+  dist.prob(3.0)
+  ```
+  """
+
+  def __init__(self,
+               loc,
+               scale,
+               validate_args=False,
+               allow_nan_stats=True,
+               name="Cauchy"):
+    """Construct Cauchy distributions with loc and and scale `loc` and `scale`.
+
+    The parameters `loc` and `scale` must be shaped in a way that supports
+    broadcasting (e.g. `loc + scale` is a valid operation).
+
+    Args:
+      loc: Floating point tensor; the modes of the distribution(s).
+      scale: Floating point tensor; the locations of the distribution(s).
+        Must contain only positive values.
+      validate_args: Python `bool`, default `False`. When `True` distribution
+        parameters are checked for validity despite possibly degrading runtime
+        performance. When `False` invalid inputs may silently render incorrect
+        outputs.
+      allow_nan_stats: Python `bool`, default `True`. When `True`,
+        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
+        indicate the result is undefined. When `False`, an exception is raised
+        if one or more of the statistic's batch members are undefined.
+      name: Python `str` name prefixed to Ops created by this class.
+
+    Raises:
+      TypeError: if `loc` and `scale` have different `dtype`.
+    """
+    parameters = locals()
+    with ops.name_scope(name, values=[loc, scale]):
+      with ops.control_dependencies([check_ops.assert_positive(scale)] if
+                                    validate_args else []):
+        self._loc = array_ops.identity(loc, name="loc")
+        self._scale = array_ops.identity(scale, name="scale")
+        check_ops.assert_same_float_dtype([self._loc, self._scale])
+    super(Cauchy, self).__init__(
+        dtype=self._scale.dtype,
+        reparameterization_type=distribution.FULLY_REPARAMETERIZED,
+        validate_args=validate_args,
+        allow_nan_stats=allow_nan_stats,
+        parameters=parameters,
+        graph_parents=[self._loc, self._scale],
+        name=name)
+
+  @staticmethod
+  def _param_shapes(sample_shape):
+    return dict(
+        zip(("loc", "scale"), ([ops.convert_to_tensor(
+            sample_shape, dtype=dtypes.int32)] * 2)))
+
+  @property
+  def loc(self):
+    """Distribution parameter for the mean."""
+    return self._loc
+
+  @property
+  def scale(self):
+    """Distribution parameter for standard deviation."""
+    return self._scale
+
+  def _batch_shape_tensor(self):
+    return array_ops.broadcast_dynamic_shape(
+        array_ops.shape(self.loc),
+        array_ops.shape(self.scale))
+
+  def _batch_shape(self):
+    return array_ops.broadcast_static_shape(
+        self.loc.shape,
+        self.scale.shape)
+
+  def _event_shape_tensor(self):
+    return constant_op.constant([], dtype=dtypes.int32)
+
+  def _event_shape(self):
+    return tensor_shape.scalar()
+
+  def _sample_n(self, n, seed=None):
+    shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
+    probs = random_ops.random_uniform(
+        shape=shape, minval=0., maxval=1., dtype=self.dtype, seed=seed)
+    return self._quantile(probs)
+
+  def _log_prob(self, x):
+    return self._log_unnormalized_prob(x) - self._log_normalization()
+
+  def _cdf(self, x):
+    return math_ops.atan(self._z(x)) / np.pi + 0.5
+
+  def _log_cdf(self, x):
+    return math_ops.log1p(2 / np.pi * math_ops.atan(self._z(x))) - np.log(2)
+
+  def _log_unnormalized_prob(self, x):
+    return -math_ops.log1p(math_ops.square(self._z(x)))
+
+  def _log_normalization(self):
+    return np.log(np.pi) + math_ops.log(self.scale)
+
+  def _entropy(self):
+    h = np.log(4 * np.pi) + math_ops.log(self.scale)
+    return h * array_ops.ones_like(self.loc)
+
+  def _quantile(self, p):
+    return self.loc + self.scale * math_ops.tan(np.pi * (p - 0.5))
+
+  def _mode(self):
+    return self.loc * array_ops.ones_like(self.scale)
+
+  def _z(self, x):
+    """Standardize input `x`."""
+    with ops.name_scope("standardize", values=[x]):
+      return (x - self.loc) / self.scale
+
+  def _inv_z(self, z):
+    """Reconstruct input `x` from a its normalized version."""
+    with ops.name_scope("reconstruct", values=[z]):
+      return z * self.scale + self.loc
+
+  def _mean(self):
+    if self.allow_nan_stats:
+      return array_ops.fill(self.batch_shape_tensor(),
+                            self.dtype.as_numpy_dtype(np.nan))
+    else:
+      raise ValueError("`mean` is undefined for Cauchy distribution.")
+
+  def _stddev(self):
+    if self.allow_nan_stats:
+      return array_ops.fill(self.batch_shape_tensor(),
+                            self.dtype.as_numpy_dtype(np.nan))
+    else:
+      raise ValueError("`stddev` is undefined for Cauchy distribution.")
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb
index 01616f2e7d..459f2f4a7d 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb
@@ -429,7 +429,9 @@
         "cpu_tensor = tf.random_normal([SIZE, SIZE])\n",
         "\n",
         "if is_gpu_available:\n",
-        "  gpu_tensor = cpu_tensor.gpu()"
+        "  gpu_tensor = cpu_tensor.gpu()\n",
+        "else:\n",
+        "  print(\"GPU not available.\")"
       ]
     },
     {
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb
index 3b7e2cd435..e6c7c11733 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb
@@ -383,7 +383,7 @@
         "\n",
         "`implicit_value_and_gradients()` returns a function that accepts the same inputs as the function passed in, and returns a tuple consisting of:\n",
         "\n",
-        "1. the value returned by the function passed in (in this case, the loss calculated by `calculate_linear_model_loss()`), and\n",
+        "1. the value returned by the function passed in (in this case, the loss calculated by `loss_fn()`), and\n",
         "1. a list of tuples consisting of:\n",
         "  1. The value of the gradient (a `tf.Tensor`) with respect to a given variable\n",
         "  1. The corresponding variable (`tf.Variable`)\n",
@@ -698,7 +698,7 @@
       "source": [
         "## Other Ways to Compute Gradients\n",
         "\n",
-        "Using our loss function as an example (`calculate_linear_model_loss()`), there are several other ways we could compute gradients:\n",
+        "Using our loss function as an example (`loss_fn()`), there are several other ways we could compute gradients:\n",
         "\n",
         "1. `tfe.implicit_gradients()`\n",
         "1. `tfe.gradients_function()`\n",
@@ -841,7 +841,7 @@
         "# tfe.implicit_value_and_gradients() demo\n",
         "value_gradients_fn = tfe.implicit_value_and_gradients(loss_fn)\n",
         "\n",
-        "# Returns only gradients:\n",
+        "# Returns the value returned by the function passed in, gradients, and variables:\n",
         "value_gradients_fn(inputs, labels, wb)"
       ]
     }
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
index ebcc7027c1..0088da5c4b 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
@@ -9,7 +9,7 @@
       "source": [
         "# Eager Execution Tutorial: Importing Data\n",
         "\n",
-        "This notebook demonstrates the use of the [`tf.contrib.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n",
+        "This notebook demonstrates the use of the [`tf.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n",
         "\n",
         "* Creating a `Dataset`.\n",
         "* Iteration over a `Dataset` with eager execution enabled.\n",
@@ -64,7 +64,7 @@
       "source": [
         "# Step 1: Create a source `Dataset`\n",
         "\n",
-        "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information."
+        "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information."
       ]
     },
     {
@@ -83,7 +83,7 @@
       },
       "outputs": [],
       "source": [
-        "ds_tensors = tf.contrib.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n",
+        "ds_tensors = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n",
         "\n",
         "# Create a CSV file\n",
         "import tempfile\n",
@@ -93,7 +93,7 @@
         "Line 2\n",
         "Line 3\n",
         "  \"\"\")\n",
-        "ds_file = tf.contrib.data.TextLineDataset(filename)\n"
+        "ds_file = tf.data.TextLineDataset(filename)\n"
       ]
     },
     {
@@ -105,7 +105,7 @@
       "source": [
         "# Step 2: Apply transformations\n",
         "\n",
-        "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.contrib.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset) for details."
+        "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) for details."
       ]
     },
     {
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 46b3eeae91..9378fe8799 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -286,7 +286,6 @@ def _fused_batch_norm(inputs,
     ValueError: If the rank of `inputs` is neither 2 or 4.
     ValueError: If rank or `C` dimension of `inputs` is undefined.
   """
-  # TODO(reedwm): Add support for fp16 inputs.
   if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
     raise ValueError('data_format has to be either NCHW or NHWC.')
   with variable_scope.variable_scope(
@@ -320,9 +319,10 @@ def _fused_batch_norm(inputs,
                        (inputs.name, params_shape))
 
     # Allocate parameters for the beta and gamma of the normalization.
-    trainable_beta = trainable and center
     beta_collections = utils.get_variable_collections(variables_collections,
                                                       'beta')
+    # Float32 required to avoid precision-loss when using fp16 input/output
+    variable_dtype = dtypes.float32
     if not param_initializers:
       param_initializers = {}
     if not param_regularizers:
@@ -336,13 +336,13 @@ def _fused_batch_norm(inputs,
       beta = variables.model_variable(
           'beta',
           shape=params_shape,
-          dtype=dtype,
+          dtype=variable_dtype,
           initializer=beta_initializer,
           regularizer=beta_regularizer,
           collections=beta_collections,
-          trainable=trainable_beta)
+          trainable=trainable)
     else:
-      beta = array_ops.constant(0.0, shape=params_shape)
+      beta = array_ops.constant(0.0, dtype=variable_dtype, shape=params_shape)
 
     if scale:
       gamma_collections = utils.get_variable_collections(
@@ -352,13 +352,13 @@ def _fused_batch_norm(inputs,
       gamma = variables.model_variable(
           'gamma',
           shape=params_shape,
-          dtype=dtype,
+          dtype=variable_dtype,
           initializer=gamma_initializer,
           regularizer=gamma_regularizer,
           collections=gamma_collections,
           trainable=trainable)
     else:
-      gamma = array_ops.constant(1.0, shape=params_shape)
+      gamma = array_ops.constant(1.0, dtype=variable_dtype, shape=params_shape)
 
     # Create moving_mean and moving_variance variables and add them to the
     # appropriate collections. We disable variable partitioning while creating
@@ -375,7 +375,7 @@ def _fused_batch_norm(inputs,
       moving_mean = variables.model_variable(
           'moving_mean',
           shape=params_shape,
-          dtype=dtype,
+          dtype=variable_dtype,
           initializer=moving_mean_initializer,
           trainable=False,
           collections=moving_mean_collections)
@@ -386,7 +386,7 @@ def _fused_batch_norm(inputs,
       moving_variance = variables.model_variable(
           'moving_variance',
           shape=params_shape,
-          dtype=dtype,
+          dtype=variable_dtype,
           initializer=moving_variance_initializer,
           trainable=False,
           collections=moving_variance_collections)
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index ff7f0e4462..5aa2253516 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1774,10 +1774,12 @@ class BatchNormTest(test.TestCase):
       with self.assertRaisesRegexp(ValueError, 'undefined'):
         _layers.batch_norm(inputs, data_format='NCHW')
 
-  def _testCreateOp(self, fused):
+  def _testCreateOp(self, fused, dtype=None):
+    if dtype is None:
+      dtype = dtypes.float32
     height, width = 3, 3
     with self.test_session():
-      images = np.random.uniform(size=(5, height, width, 3)).astype('f')
+      images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype)
       output = _layers.batch_norm(images, fused=fused)
       expected_name = ('BatchNorm/FusedBatchNorm' if fused else
                        'BatchNorm/batchnorm')
@@ -1792,6 +1794,9 @@ class BatchNormTest(test.TestCase):
   def testCreateOpFused(self):
     self._testCreateOp(True)
 
+  def testCreateOpFusedFloat16(self):
+    self._testCreateOp(True, dtypes.float16)
+
   def _testCreateOpBetaRegularizer(self, fused=True):
     height, width = 3, 3
     with self.test_session():
@@ -2659,10 +2664,68 @@ class BatchNormTest(test.TestCase):
   def testBatchNormBeta(self):
     # Test case for 11673
     with self.test_session() as sess:
-      a = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10))
-      b = _layers.batch_norm(a, center=False, data_format='NCHW',
-                                       zero_debias_moving_mean=True)
+      a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10))
+      b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW',
+                                zero_debias_moving_mean=True)
+      a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10))
+      b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW',
+                                zero_debias_moving_mean=True)
+      sess.run(variables_lib.global_variables_initializer())
+
+  def testVariablesAreFloat32(self):
+    height, width = 3, 3
+    with self.test_session():
+      images = random_ops.random_uniform((5, height, width, 3),
+                                         seed=1, dtype=dtypes.float16)
+      _layers.batch_norm(images, scale=True)
+      beta = variables.get_variables_by_name('beta')[0]
+      gamma = variables.get_variables_by_name('gamma')[0]
+      self.assertEqual(beta.dtype, dtypes.float32_ref)
+      self.assertEqual(gamma.dtype, dtypes.float32_ref)
+      moving_mean = variables.get_variables_by_name('moving_mean')[0]
+      moving_variance = variables.get_variables_by_name('moving_variance')[0]
+      self.assertEqual(moving_mean.dtype, dtypes.float32_ref)
+      self.assertEqual(moving_variance.dtype, dtypes.float32_ref)
+
+  def _runFusedBatchNorm(self, shape, dtype):
+    channels = shape[1]
+    images = np.arange(np.product(shape), dtype=dtype).reshape(shape)
+    beta = init_ops.constant_initializer(
+        np.arange(
+            2, channels + 2, dtype=np.float32))
+    gamma = init_ops.constant_initializer(
+        np.arange(
+            10, channels + 10, dtype=np.float32) * 2.0)
+    mean = init_ops.constant_initializer(
+        np.arange(
+            3, channels + 3, dtype=np.float32) * 5.0)
+    variance = init_ops.constant_initializer(
+        np.arange(
+            1, channels + 1, dtype=np.float32) * 4.0)
+    output = _layers.batch_norm(
+        images,
+        fused=True,
+        is_training=True,
+        scale=True,
+        epsilon=0.5,
+        param_initializers={
+            'beta': beta,
+            'gamma': gamma,
+            'moving_mean': mean,
+            'moving_variance': variance,
+        },
+        data_format='NCHW')
+    with self.test_session(use_gpu=True) as sess:
       sess.run(variables_lib.global_variables_initializer())
+      return sess.run(output)
+
+  def testFusedBatchNormFloat16MatchesFloat32(self):
+    if test.is_gpu_available(cuda_only=True):
+      shape = [5, 4, 2, 3]
+      res_32 = self._runFusedBatchNorm(shape, np.float32)
+      res_16 = self._runFusedBatchNorm(shape, np.float16)
+      self.assertAllClose(res_32, res_16, rtol=1e-3)
+
 
   def testAdjustmentCreated(self):
     # Tests that the adjustment is appropriately passed to and used by the core
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
index 468d792a0d..bc0e6fc009 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -119,7 +119,7 @@ class Head(object):
       update_op = tf.contrib.layers.optimize_loss(optimizer=sync,
                                                   loss=model_fn_ops.loss, ...)
       hooks = [sync.make_session_run_hook(is_chief)]
-      ... upate train_op and hooks in ModelFnOps and return
+      ... update train_op and hooks in ModelFnOps and return
     ```
   """
   __metaclass__ = abc.ABCMeta
diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py
index 8be9c72adf..44e6c7c52d 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py
@@ -23,7 +23,6 @@ import collections
 
 import six
 
-from tensorflow.contrib import framework as contrib_framework
 from tensorflow.contrib.framework import get_graph_from_inputs
 from tensorflow.contrib.learn.python.learn.estimators import constants
 from tensorflow.contrib.learn.python.learn.estimators import metric_key
@@ -32,6 +31,7 @@ from tensorflow.python.estimator import model_fn as core_model_fn_lib
 from tensorflow.python.estimator.export import export_output as core_export_lib
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging
@@ -156,11 +156,11 @@ class ModelFnOps(
     else:
       if isinstance(predictions, dict):
         predictions = {
-            k: contrib_framework.convert_to_tensor_or_sparse_tensor(v)
+            k: sparse_tensor.convert_to_tensor_or_sparse_tensor(v)
             for k, v in six.iteritems(predictions)
         }
       else:
-        predictions = contrib_framework.convert_to_tensor_or_sparse_tensor(
+        predictions = sparse_tensor.convert_to_tensor_or_sparse_tensor(
             predictions)
 
     # Validate eval_metric_ops
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
index 4c50d40aaa..db18ebf05d 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
@@ -28,13 +28,14 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging
 
 # pylint: disable=g-multiple-import,g-bad-import-order
 from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels
 from .dask_io import HAS_DASK, extract_dask_data, extract_dask_labels
-
 # pylint: enable=g-multiple-import,g-bad-import-order
 
 
@@ -365,8 +366,13 @@ class DataFeeder(object):
     self.random_state = np.random.RandomState(
         42) if random_state is None else random_state
 
-    num_samples = list(self._x.values())[0].shape[
-        0] if x_is_dict else self._x.shape[0]
+    if x_is_dict:
+      num_samples = list(self._x.values())[0].shape[0]
+    elif tensor_util.is_tensor(self._x):
+      num_samples = self._x.shape[0].value  # shape will be a Dimension, extract an int
+    else:
+      num_samples = self._x.shape[0]
+      
     if self._shuffle:
       self.indices = self.random_state.permutation(num_samples)
     else:
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index 13f2f0f502..86d8484391 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -238,10 +238,10 @@ class SdcaModel(object):
     with name_scope('sdca/prediction'):
       sparse_variables = self._convert_n_to_tensor(self._variables[
           'sparse_features_weights'])
-      result = 0.0
+      result_sparse = 0.0
       for sfc, sv in zip(examples['sparse_features'], sparse_variables):
         # TODO(sibyl-Aix6ihai): following does not take care of missing features.
-        result += math_ops.segment_sum(
+        result_sparse += math_ops.segment_sum(
             math_ops.multiply(
                 array_ops.gather(sv, sfc.feature_indices), sfc.feature_values),
             sfc.example_indices)
@@ -249,12 +249,13 @@ class SdcaModel(object):
       dense_variables = self._convert_n_to_tensor(self._variables[
           'dense_features_weights'])
 
+      result_dense = 0.0
       for i in range(len(dense_variables)):
-        result += math_ops.matmul(dense_features[i],
-                                  array_ops.expand_dims(dense_variables[i], -1))
+        result_dense += math_ops.matmul(
+            dense_features[i], array_ops.expand_dims(dense_variables[i], -1))
 
     # Reshaping to allow shape inference at graph construction time.
-    return array_ops.reshape(result, [-1])
+    return array_ops.reshape(result_dense, [-1]) + result_sparse
 
   def predictions(self, examples):
     """Add operations to compute predictions by the model.
diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index b4aa032ff8..89e8693490 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -23,6 +23,7 @@ py_library(
 py_test(
     name = "lite_test",
     srcs = ["lite_test.py"],
+    srcs_version = "PY2AND3",
     deps = [
         ":lite",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 86540d58a6..b122818221 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -36,6 +36,10 @@ import traceback
 import zipfile
 import numpy as np
 from six import StringIO
+
+# TODO(aselle): Disable GPU for now
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
 import tensorflow as tf
 from google.protobuf import text_format
 # TODO(aselle): switch to TensorFlow's resource_loader
@@ -379,12 +383,13 @@ def make_zip_of_tests(zip_path,
         report["toco_log"] = ""
         tf.reset_default_graph()
 
-        try:
-          inputs, outputs = make_graph(param_dict_real)
-        except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError,
-                ValueError):
-          report["tf_log"] += traceback.format_exc()
-          return None, report
+        with tf.device('/cpu:0'):
+          try:
+            inputs, outputs = make_graph(param_dict_real)
+          except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError,
+                  ValueError):
+            report["tf_log"] += traceback.format_exc()
+            return None, report
 
         sess = tf.Session()
         try:
diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD
index 92246a8aed..17115047d2 100644
--- a/tensorflow/contrib/lite/toco/python/BUILD
+++ b/tensorflow/contrib/lite/toco/python/BUILD
@@ -61,6 +61,7 @@ tf_py_test(
     data = [
         ":toco_from_protos",
     ],
+    tags = ["no_pip"],
 )
 
 filegroup(
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index dba1464653..e2e6c05591 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -314,7 +314,8 @@ ifeq ($(TARGET),ANDROID)
 -Wno-narrowing \
 -fomit-frame-pointer \
 $(MARCH_OPTION) \
--fPIE
+-fPIE \
+-fPIC
 	INCLUDES = \
 -I$(NDK_ROOT)/sources/android/support/include \
 -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \
diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md
index 715eb51577..65bd60c12a 100644
--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@@ -174,10 +174,26 @@ tensorflow/contrib/makefile/build_all_ios.sh
 
 This process will take around twenty minutes on a modern MacBook Pro.
 
-When it completes, you will have a library for a single architecture and the
-benchmark program. Although successfully compiling the benchmark program is a
+When it completes, you will have a unified library for all architectures
+(i386sim, x86_64sim, armv7, armv7s and arm64)  and the benchmark program.
+Although successfully compiling the benchmark program is a
 sign of success, the program is not a complete iOS app.
 
+If you would only like to build only one architecture to save time:
+(iOS 11+ only supports 64bit so you can get away with arm64)
+
+```bash
+tensorflow/contrib/makefile/build_all_ios.sh -a arm64
+```
+
+After the first build if you would like to just build the tensorflow
+library you can pass the -T flag to avoid a clean & rebuild. This should
+take you just a few seconds to generate the library if you modified one file.
+
+```bash
+tensorflow/contrib/makefile/build_all_ios.sh -a arm64 -T
+```
+
 To see TensorFlow running on iOS, the example Xcode project in
 [tensorflow/examples/ios](../../examples/ios/) shows how to use the static
 library in a simple app.
@@ -193,19 +209,18 @@ If you have not already, you will need to download dependencies:
 tensorflow/contrib/makefile/download_dependencies.sh
 ```
 
-Next, you will need to compile protobufs for iOS:
+Next, you will need to compile protobufs for iOS (optionally takes the -a $ARCH flag):
 
 ```bash
-tensorflow/contrib/makefile/compile_ios_protobuf.sh 
+tensorflow/contrib/makefile/compile_ios_protobuf.sh
 ```
 
-Then, you will need to compile the nsync library for iOS:
+Then, you will need to compile the nsync library for iOS (optionally takes -a $ARCH flag):
 
 ```bash
 export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
 export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
 ```
-
 Then, you can run the makefile specifying iOS as the target, along with the
 architecture you want to build for:
 
@@ -219,10 +234,6 @@ This creates a library in
 `tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a` that you can link any
 xcode project against. 
 
-At this point, you will have a library for a single architecture and the
-benchmark program. Although successfully compiling the benchmark program is a
-sign of success, the program is not a complete iOS app. 
-
 To see TensorFlow running on iOS, the example Xcode project in
 [tensorflow/examples/ios](../../examples/ios/) shows how to use the static
 library in a simple app.
@@ -237,6 +248,14 @@ time follow it with:
 compile_ios_tensorflow.sh
 ```
 
+`compile_ios_tensorflow.sh` takes the -a flag to build only for one architecture.
+In case you run into issues with unresolved symbols with nsync you can also pass
+-h ${HOST_NSYNC_LIB} and -n {TARGET_NSYNC_LIB} so it would look like:
+
+```bash
+tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h tensorflow/contrib/makefile/downloads/nsync/builds/default.macos.c++11/nsync.a -n tensorflow/contrib/makefile/downloads/nsync/builds/lipo.ios.c++11/nsync.a -a arm64
+```
+
 In XCode, you will need to use -force_load in the linker flags
 section of the build settings to pull in the global constructors that are used
 to register ops and kernels. 
@@ -249,7 +268,7 @@ debug mode. If you are concerned about performance or are working on a release
 build, you would likely want a higher optimization setting, like so:
  
 ```bash
-compile_ios_tensorflow.sh "-Os"
+compile_ios_tensorflow.sh -f "-Os"
 ```
 
 For other variations of valid optimization flags, see [clang optimization levels](http://stackoverflow.com/questions/15548023/clang-optimization-levels).
diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh
index a49bbe4565..988e12b482 100755
--- a/tensorflow/contrib/makefile/build_all_ios.sh
+++ b/tensorflow/contrib/makefile/build_all_ios.sh
@@ -23,14 +23,29 @@ if [[ $(uname) != "Darwin" ]]; then
     exit 1
 fi
 
+usage() {
+  echo "Usage: $(basename "$0") [-a:T]"
+  echo "-a [build_arch] build only for specified arch x86_64 [default=all]"
+  echo "-T only build tensorflow (dont download other deps etc)"
+  exit 1
+}
+
+while getopts "a:T" opt_name; do
+  case "$opt_name" in
+    a) BUILD_ARCH="${OPTARG}";;
+    T) ONLY_MAKE_TENSORFLOW="true";;
+    *) usage;;
+  esac
+done
+shift $((OPTIND - 1))
+
+
 # Make sure we're in the correct directory, at the root of the source tree.
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd ${SCRIPT_DIR}/../../../
 
-
-# Remove any old files first.
-make -f tensorflow/contrib/makefile/Makefile clean
-rm -rf tensorflow/contrib/makefile/downloads
+source "${SCRIPT_DIR}/build_helper.subr"
+JOB_COUNT="${JOB_COUNT:-$(get_job_count)}"
 
 # Setting a deployment target is required for building with bitcode,
 # otherwise linking will fail with:
@@ -41,20 +56,37 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then
     export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion)
 fi
 
-# Pull down the required versions of the frameworks we need.
-tensorflow/contrib/makefile/download_dependencies.sh
+if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then
+    # Remove any old files first.
+    make -f tensorflow/contrib/makefile/Makefile clean
+    rm -rf tensorflow/contrib/makefile/downloads
 
-# Compile protobuf for the target iOS device architectures.
-tensorflow/contrib/makefile/compile_ios_protobuf.sh
+    # Pull down the required versions of the frameworks we need.
+    tensorflow/contrib/makefile/download_dependencies.sh
+
+    # Compile protobuf for the target iOS device architectures.
+    tensorflow/contrib/makefile/compile_ios_protobuf.sh
+fi
 
 # Compile nsync for the target iOS device architectures.
 # Don't use  export var=`something` syntax; it swallows the exit status.
 HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
-TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
+if [[ -z "${BUILD_ARCH}" ]]; then
+    # No arch specified so build all architectures
+    TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
+else
+    # arch specified so build just that
+    TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios -a ${BUILD_ARCH}`
+fi
 export HOST_NSYNC_LIB TARGET_NSYNC_LIB
 
-# Build the iOS TensorFlow libraries.
-tensorflow/contrib/makefile/compile_ios_tensorflow.sh "-O3"
+if [[ -z "${BUILD_ARCH}" ]]; then
+    # build the ios tensorflow libraries.
+    tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB
+else
+    # arch specified so build just that
+    tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -a "${BUILD_ARCH}" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB
+fi
 
 # Creates a static universal library in
 # tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a
diff --git a/tensorflow/contrib/makefile/compile_ios_protobuf.sh b/tensorflow/contrib/makefile/compile_ios_protobuf.sh
index 4056db18a7..43e5809dd2 100755
--- a/tensorflow/contrib/makefile/compile_ios_protobuf.sh
+++ b/tensorflow/contrib/makefile/compile_ios_protobuf.sh
@@ -21,10 +21,28 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then
     export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion)
 fi
 
-SCRIPT_DIR=$(dirname $0)
+usage() {
+  echo "Usage: $(basename "$0") [-a]"
+  echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)"
+  echo "default arch i386, x86_64, armv7, armv7s, arm64"
+  exit 1
+}
+
+BUILD_TARGET="i386 x86_64 armv7 armv7s arm64"
+while getopts "a:" opt_name; do
+  case "$opt_name" in
+    a) BUILD_TARGET="${OPTARG}";;
+    *) usage;;
+  esac
+done
+shift $((OPTIND - 1))
+
+IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}"
+
+SCRIPT_DIR=$(cd `dirname $0` && pwd)
 source "${SCRIPT_DIR}/build_helper.subr"
 
-cd tensorflow/contrib/makefile
+cd ${SCRIPT_DIR}
 
 HOST_GENDIR="$(pwd)/gen/protobuf-host"
 mkdir -p "${HOST_GENDIR}"
@@ -64,6 +82,10 @@ else
   echo "protoc found. Skip building host tools."
 fi
 
+# Remove old libs
+rm -f ${LIBDIR}/libprotobuf.a
+rm -f ${LIBDIR}/libprotobuf-lite.a
+
 ./autogen.sh
 if [ $? -ne 0 ]
 then
@@ -71,157 +93,192 @@ then
   exit 1
 fi
 
-make distclean
-./configure \
---host=i386-apple-${OSX_VERSION} \
---disable-shared \
---enable-cross-compile \
---with-protoc="${PROTOC_PATH}" \
---prefix=${LIBDIR}/iossim_386 \
---exec-prefix=${LIBDIR}/iossim_386 \
-"CFLAGS=${CFLAGS} \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
--arch i386 \
--fembed-bitcode \
--isysroot ${IPHONESIMULATOR_SYSROOT}" \
-"CXX=${CXX}" \
-"CXXFLAGS=${CXXFLAGS} \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
--arch i386 \
--fembed-bitcode \
--isysroot \
-${IPHONESIMULATOR_SYSROOT}" \
-LDFLAGS="-arch i386 \
--fembed-bitcode \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
-${LDFLAGS} \
--L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
--L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
-"LIBS=${LIBS}"
-make -j"${JOB_COUNT}"
-make install
-
-make distclean
-./configure \
---host=x86_64-apple-${OSX_VERSION} \
---disable-shared \
---enable-cross-compile \
---with-protoc="${PROTOC_PATH}" \
---prefix=${LIBDIR}/iossim_x86_64 \
---exec-prefix=${LIBDIR}/iossim_x86_64 \
-"CFLAGS=${CFLAGS} \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
--arch x86_64 \
--fembed-bitcode \
--isysroot ${IPHONESIMULATOR_SYSROOT}" \
-"CXX=${CXX}" \
-"CXXFLAGS=${CXXFLAGS} \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
--arch x86_64 \
--fembed-bitcode \
--isysroot \
-${IPHONESIMULATOR_SYSROOT}" \
-LDFLAGS="-arch x86_64 \
--fembed-bitcode \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
-${LDFLAGS} \
--L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
--L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
-"LIBS=${LIBS}"
-make -j"${JOB_COUNT}"
-make install
-
-make distclean
-./configure \
---host=armv7-apple-${OSX_VERSION} \
---with-protoc="${PROTOC_PATH}" \
---disable-shared \
---prefix=${LIBDIR}/ios_arm7 \
---exec-prefix=${LIBDIR}/ios_arm7 \
-"CFLAGS=${CFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch armv7 \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-"CXX=${CXX}" \
-"CXXFLAGS=${CXXFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch armv7 \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-LDFLAGS="-arch armv7 \
--fembed-bitcode \
--miphoneos-version-min=${MIN_SDK_VERSION} \
-${LDFLAGS}" \
-"LIBS=${LIBS}"
-make -j"${JOB_COUNT}"
-make install
-
-make distclean
-./configure \
---host=armv7s-apple-${OSX_VERSION} \
---with-protoc="${PROTOC_PATH}" \
---disable-shared \
---prefix=${LIBDIR}/ios_arm7s \
---exec-prefix=${LIBDIR}/ios_arm7s \
-"CFLAGS=${CFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch armv7s \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-"CXX=${CXX}" \
-"CXXFLAGS=${CXXFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch armv7s \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-LDFLAGS="-arch armv7s \
--fembed-bitcode \
--miphoneos-version-min=${MIN_SDK_VERSION} \
-${LDFLAGS}" \
-"LIBS=${LIBS}"
-make -j"${JOB_COUNT}"
-make install
-
-make distclean
-./configure \
---host=arm \
---with-protoc="${PROTOC_PATH}" \
---disable-shared \
---prefix=${LIBDIR}/ios_arm64 \
---exec-prefix=${LIBDIR}/ios_arm64 \
-"CFLAGS=${CFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch arm64 \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-"CXXFLAGS=${CXXFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch arm64 \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-LDFLAGS="-arch arm64 \
--fembed-bitcode \
--miphoneos-version-min=${MIN_SDK_VERSION} \
-${LDFLAGS}" \
-"LIBS=${LIBS}"
-make -j"${JOB_COUNT}"
-make install
-
-lipo \
-${LIBDIR}/iossim_386/lib/libprotobuf.a \
-${LIBDIR}/iossim_x86_64/lib/libprotobuf.a \
-${LIBDIR}/ios_arm7/lib/libprotobuf.a \
-${LIBDIR}/ios_arm7s/lib/libprotobuf.a \
-${LIBDIR}/ios_arm64/lib/libprotobuf.a \
--create \
--output ${LIBDIR}/libprotobuf.a
-
-lipo \
-${LIBDIR}/iossim_386/lib/libprotobuf-lite.a \
-${LIBDIR}/iossim_x86_64/lib/libprotobuf-lite.a \
-${LIBDIR}/ios_arm7/lib/libprotobuf-lite.a \
-${LIBDIR}/ios_arm7s/lib/libprotobuf-lite.a \
-${LIBDIR}/ios_arm64/lib/libprotobuf-lite.a \
--create \
--output ${LIBDIR}/libprotobuf-lite.a
+package_pb_library() {
+    pb_libs="${LIBDIR}/${1}/lib/libprotobuf.a"
+    if [ -f "${LIBDIR}/libprotobuf.a" ]; then
+        pb_libs="$pb_libs ${LIBDIR}/libprotobuf.a"
+    fi
+    lipo \
+    $pb_libs \
+    -create \
+    -output ${LIBDIR}/libprotobuf.a
+
+    pblite_libs="${LIBDIR}/${1}/lib/libprotobuf-lite.a"
+    if [ -f "${LIBDIR}/libprotobuf-lite.a" ]; then
+        pblite_libs="$pblite_libs ${LIBDIR}/libprotobuf-lite.a"
+    fi
+    lipo \
+    $pblite_libs \
+    -create \
+    -output ${LIBDIR}/libprotobuf-lite.a
+}
+
+build_target() {
+case "$1" in
+    i386)  make distclean
+        ./configure \
+        --host=i386-apple-${OSX_VERSION} \
+        --disable-shared \
+        --enable-cross-compile \
+        --with-protoc="${PROTOC_PATH}" \
+        --prefix=${LIBDIR}/iossim_386 \
+        --exec-prefix=${LIBDIR}/iossim_386 \
+        "CFLAGS=${CFLAGS} \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        -arch i386 \
+        -fembed-bitcode \
+        -isysroot ${IPHONESIMULATOR_SYSROOT}" \
+        "CXX=${CXX}" \
+        "CXXFLAGS=${CXXFLAGS} \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        -arch i386 \
+        -fembed-bitcode \
+        -isysroot \
+        ${IPHONESIMULATOR_SYSROOT}" \
+        LDFLAGS="-arch i386 \
+        -fembed-bitcode \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        ${LDFLAGS} \
+        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
+        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
+        "LIBS=${LIBS}"
+        make -j"${JOB_COUNT}"
+        make install
+
+        package_pb_library "iossim_386"
+        ;;
+
+    x86_64) make distclean
+        ./configure \
+        --host=x86_64-apple-${OSX_VERSION} \
+        --disable-shared \
+        --enable-cross-compile \
+        --with-protoc="${PROTOC_PATH}" \
+        --prefix=${LIBDIR}/iossim_x86_64 \
+        --exec-prefix=${LIBDIR}/iossim_x86_64 \
+        "CFLAGS=${CFLAGS} \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        -arch x86_64 \
+        -fembed-bitcode \
+        -isysroot ${IPHONESIMULATOR_SYSROOT}" \
+        "CXX=${CXX}" \
+        "CXXFLAGS=${CXXFLAGS} \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        -arch x86_64 \
+        -fembed-bitcode \
+        -isysroot \
+        ${IPHONESIMULATOR_SYSROOT}" \
+        LDFLAGS="-arch x86_64 \
+        -fembed-bitcode \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        ${LDFLAGS} \
+        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
+        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
+        "LIBS=${LIBS}"
+        make -j"${JOB_COUNT}"
+        make install
+
+        package_pb_library "iossim_x86_64"
+        ;;
+
+    armv7) make distclean
+        ./configure \
+        --host=armv7-apple-${OSX_VERSION} \
+        --with-protoc="${PROTOC_PATH}" \
+        --disable-shared \
+        --prefix=${LIBDIR}/ios_arm7 \
+        --exec-prefix=${LIBDIR}/ios_arm7 \
+        "CFLAGS=${CFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch armv7 \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        "CXX=${CXX}" \
+        "CXXFLAGS=${CXXFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch armv7 \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        LDFLAGS="-arch armv7 \
+        -fembed-bitcode \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        ${LDFLAGS}" \
+        "LIBS=${LIBS}"
+        make -j"${JOB_COUNT}"
+        make install
+
+        package_pb_library "ios_arm7"
+        ;;
+
+    armv7s) make distclean
+        ./configure \
+        --host=armv7s-apple-${OSX_VERSION} \
+        --with-protoc="${PROTOC_PATH}" \
+        --disable-shared \
+        --prefix=${LIBDIR}/ios_arm7s \
+        --exec-prefix=${LIBDIR}/ios_arm7s \
+        "CFLAGS=${CFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch armv7s \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        "CXX=${CXX}" \
+        "CXXFLAGS=${CXXFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch armv7s \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        LDFLAGS="-arch armv7s \
+        -fembed-bitcode \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        ${LDFLAGS}" \
+        "LIBS=${LIBS}"
+        make -j"${JOB_COUNT}"
+        make install
+
+        package_pb_library "ios_arm7s"
+        ;;
+
+    arm64) make distclean
+        ./configure \
+        --host=arm \
+        --with-protoc="${PROTOC_PATH}" \
+        --disable-shared \
+        --prefix=${LIBDIR}/ios_arm64 \
+        --exec-prefix=${LIBDIR}/ios_arm64 \
+        "CFLAGS=${CFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch arm64 \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        "CXXFLAGS=${CXXFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch arm64 \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        LDFLAGS="-arch arm64 \
+        -fembed-bitcode \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        ${LDFLAGS}" \
+        "LIBS=${LIBS}"
+        make -j"${JOB_COUNT}"
+        make install
+
+        package_pb_library "ios_arm64"
+        ;;
+    *)
+        echo "Unknown ARCH"
+        exit 1
+        ;;
+esac 
+}
+
+for build_element in "${build_targets[@]}"
+do
+    echo "$build_element"
+    build_target "$build_element"
+done
+
+file ${LIBDIR}/libprotobuf.a
+file ${LIBDIR}/libprotobuf-lite.a
+echo "Done building and packaging the libraries"
diff --git a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
index 5d1cc8b375..ae82163e11 100755
--- a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
+++ b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
@@ -43,55 +43,124 @@ then
     exit 1
 fi
 
+usage() {
+  echo "Usage: $(basename "$0") [-a]"
+  echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)"
+  echo "default is [i386, x86_64, armv7, armv7s, arm64]"
+  exit 1
+}
+
+BUILD_TARGET="i386 x86_64 armv7 armv7s arm64"
+while getopts "a:f:h:n:" opt_name; do
+  case "$opt_name" in
+    a) BUILD_TARGET="${OPTARG}";;
+    f) BUILD_OPT="${OPTARG}";;
+    h) NSYNC_HOST="${OPTARG}";;
+    n) NSYNC_TARGET="${OPTARG}";;
+    *) usage;;
+  esac
+done
+shift $((OPTIND - 1))
+
+IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}"
+
+SCRIPT_DIR=$(cd `dirname $0` && pwd)
+source "${SCRIPT_DIR}/build_helper.subr"
+
+
 GENDIR=tensorflow/contrib/makefile/gen/
 LIBDIR=${GENDIR}lib
 LIB_PREFIX=libtensorflow-core
 
-make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a OPTFLAGS="$1"
-if [ $? -ne 0 ]
-then
-  echo "armv7 compilation failed."
-  exit 1
-fi
+#remove any old artifacts
+rm -rf ${LIBDIR}/${LIB_PREFIX}.a
 
-make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a OPTFLAGS="$1"
-if [ $? -ne 0 ]
-then
-  echo "arm7vs compilation failed."
-  exit 1
-fi
+package_tf_library() {
+    CAP_DIR=`echo $1 | tr 'a-z' 'A-Z'`
+    tf_libs="${LIBDIR}/ios_${CAP_DIR}/${LIB_PREFIX}-${1}.a"
+    if [ -f "${LIBDIR}/${LIB_PREFIX}.a" ]; then
+        tf_libs="$tf_libs ${LIBDIR}/${LIB_PREFIX}.a"
+    fi
+    lipo \
+    $tf_libs \
+    -create \
+    -output ${LIBDIR}/${LIB_PREFIX}.a
+}
 
-make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a OPTFLAGS="$1"
-if [ $? -ne 0 ]
-then
-  echo "arm64 compilation failed."
-  exit 1
-fi
+build_tf_target() {
+case "$1" in
+    armv7)
+        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+        TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a \
+        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
+        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
+        if [ $? -ne 0 ]
+        then
+          echo "armv7 compilation failed."
+          exit 1
+        fi
+        package_tf_library "armv7"
+        ;;
+    armv7s)
+        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+        TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a \
+        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
+        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
 
-make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a OPTFLAGS="$1"
-if [ $? -ne 0 ]
-then
-  echo "i386 compilation failed."
-  exit 1
-fi
+        if [ $? -ne 0 ]
+        then
+          echo "arm7vs compilation failed."
+          exit 1
+        fi
+        package_tf_library "armv7s"
+        ;;
+    arm64)
+        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+        TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a \
+        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
+        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
+        if [ $? -ne 0 ]
+        then
+          echo "arm64 compilation failed."
+          exit 1
+        fi
+        package_tf_library "arm64"
+        ;;
+    i386)
+        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+        TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a \
+        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
+        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
+        if [ $? -ne 0 ]
+        then
+          echo "i386 compilation failed."
+          exit 1
+        fi
+        package_tf_library "i386"
+        ;;
+    x86_64)
+        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+        TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a \
+        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
+        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
+        if [ $? -ne 0 ]
+        then
+          echo "x86_64 compilation failed."
+          exit 1
+        fi
+        package_tf_library "x86_64"
+        ;;
+    *)
+        echo "Unknown ARCH"
+        exit 1
+esac
+}
 
-make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a OPTFLAGS="$1"
-if [ $? -ne 0 ]
-then
-  echo "x86_64 compilation failed."
-  exit 1
-fi
+for build_tf_element in "${build_targets[@]}"
+do
+    echo "$build_tf_element"
+    build_tf_target "$build_tf_element"
+done
 
-lipo \
-${LIBDIR}/ios_ARMV7/${LIB_PREFIX}-armv7.a \
-${LIBDIR}/ios_ARMV7S/${LIB_PREFIX}-armv7s.a \
-${LIBDIR}/ios_ARM64/${LIB_PREFIX}-arm64.a \
-${LIBDIR}/ios_I386/${LIB_PREFIX}-i386.a \
-${LIBDIR}/ios_X86_64/${LIB_PREFIX}-x86_64.a \
--create \
--output ${LIBDIR}/${LIB_PREFIX}.a
+echo "Done building and packaging TF"
+file ${LIBDIR}/${LIB_PREFIX}.a
diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh
index ecbd9bb825..930e6b8dea 100755
--- a/tensorflow/contrib/makefile/compile_nsync.sh
+++ b/tensorflow/contrib/makefile/compile_nsync.sh
@@ -265,7 +265,7 @@ for arch in $archs; do
                                           -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \
                                           -I../../platform/c++11 -I../../platform/gcc \
                                           -I../../platform/posix -pthread
-                        PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE
+                        PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE -fPIC
                         PLATFORM_LDFLAGS=-pthread
                         MKDEP=${CC} -M -std=c++11
                         PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \
@@ -301,6 +301,9 @@ done
 
 case "$target_platform" in
 ios)    nsync_platform_dir="$nsync_builds_dir/lipo.$target_platform.c++11"
+        if [ -d "$nsync_platform_dir" ]; then
+            rm -rf "$nsync_platform_dir"
+        fi
         mkdir "$nsync_platform_dir"
         eval lipo $platform_libs -create -output '$nsync_platform_dir/nsync.a'
         echo "$nsync_platform_dir/nsync.a"
diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py
index 3bf795d19a..0bc133a00e 100644
--- a/tensorflow/contrib/nn/__init__.py
+++ b/tensorflow/contrib/nn/__init__.py
@@ -15,6 +15,7 @@
 """Module for variants of ops in tf.nn.
 
 @@alpha_dropout
+@@conv1d_transpose
 @@deprecated_flipped_softmax_cross_entropy_with_logits
 @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits
 @@deprecated_flipped_sigmoid_cross_entropy_with_logits
@@ -32,6 +33,7 @@ from tensorflow.contrib.nn.python.ops.alpha_dropout import *
 from tensorflow.contrib.nn.python.ops.cross_entropy import *
 from tensorflow.contrib.nn.python.ops.sampling_ops import *
 from tensorflow.contrib.nn.python.ops.scaled_softplus import *
+from tensorflow.python.ops.nn_ops import conv1d_transpose
 from tensorflow.python.ops.nn_ops import nth_element
 # pylint: enable=unused-import,wildcard-import
 
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index 8c46becf2c..a9a63cbce0 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -19,6 +19,7 @@ py_library(
         "python/training/external_optimizer.py",
         "python/training/lazy_adam_optimizer.py",
         "python/training/moving_average_optimizer.py",
+        "python/training/multitask_optimizer_wrapper.py",
         "python/training/nadam_optimizer.py",
         "python/training/powersign.py",
         "python/training/sign_decay.py",
@@ -98,6 +99,23 @@ py_test(
     ],
 )
 
+py_test(
+    name = "multitask_optimizer_wrapper_test",
+    srcs = ["python/training/multitask_optimizer_wrapper_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":opt_py",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_test(
     name = "lazy_adam_optimizer_test",
     srcs = ["python/training/lazy_adam_optimizer_test.py"],
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index caf22536bb..4c60c99342 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -24,7 +24,7 @@ from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import
 from tensorflow.contrib.opt.python.training.external_optimizer import *
 from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import *
 from tensorflow.contrib.opt.python.training.moving_average_optimizer import *
-from tensorflow.contrib.opt.python.training.nadam_optimizer import *
+from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
 from tensorflow.contrib.opt.python.training.nadam_optimizer import *
 from tensorflow.contrib.opt.python.training.powersign import *
 from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import *
@@ -38,7 +38,8 @@ _allowed_symbols = [
     'DelayCompensatedGradientDescentOptimizer',
     'DropStaleGradientOptimizer', 'ExternalOptimizerInterface',
     'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer',
-    'ScipyOptimizerInterface', 'VariableClippingOptimizer'
+    'ScipyOptimizerInterface', 'VariableClippingOptimizer',
+    'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm',
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
new file mode 100644
index 0000000000..c26037935d
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
@@ -0,0 +1,138 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""An optimizer wrapper that ensures correct behaviour
+of stateful optimizers with multitask loss."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import types
+import six
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.training import optimizer
+
+__all__ = ["MultitaskOptimizerWrapper",
+           "clip_gradients_by_global_norm"]
+
+def _is_all_zeros(grad):
+  all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0)
+  return all_zeros
+
+def _get_wrapper(fn, opt):
+  def wrapper(self, grad, *args, **kwargs):  # pylint: disable=unused-argument
+    all_zeros = _is_all_zeros(grad)
+    return control_flow_ops.cond(
+        all_zeros,
+        control_flow_ops.no_op,
+        lambda: fn(grad, *args, **kwargs))
+  wrapper = types.MethodType(wrapper, opt)
+  return wrapper
+
+class MultitaskOptimizerWrapper(object):
+  """Optimizer wrapper that ensures that
+  all-zero gradients don't affect the optimizer state.
+
+  This might be useful when a multi-task loss is used,
+  and some components of the loss might be
+  not present (e.g. masked out) in some training batches.
+  Technically their gradient would be zero,
+  which would normally affect the optimizer state
+  (e.g. push running average to zero).
+  However this is not the desired behaviour,
+  since the missing loss component
+  should be treated as unknown rather than zero.
+
+  This wrapper filters out all-zero gradient tensors,
+  therefore preserving the optimizer state.
+
+  If gradient clipping by global norm is used,
+  the provided function clip_gradients_by_global_norm
+  should be used (and specified explicitly by the user).
+  Otherwise the global norm would be underestimated
+  because of all-zero tensors that should be ignored.
+
+  The gradient calculation and application
+  are delegated to an underlying optimizer.
+  The gradient application is altered only for all-zero tensors.
+
+  Example:
+  ```python
+  momentum_optimizer = tf.train.MomentumOptimizer(
+    learning_rate, momentum=0.9)
+  multitask_momentum_optimizer = tf.contrib.opt.MultitaskOptimizerWrapper(
+    momentum_optimizer)
+  gradvars = multitask_momentum_optimizer.compute_gradients(
+    loss)
+  gradvars_clipped, _ = tf.contrib.opt.clip_gradients_by_global_norm(
+    gradvars, 15.0)
+  train_op = multitask_momentum_optimizer.apply_gradients(
+    gradvars_clipped, global_step=batch)
+  ```
+  """
+  def __init__(self, opt):
+    """
+    Args:
+    opt: an instance of a class that implements tf.train.Optimizer.
+    """
+    if not isinstance(opt, optimizer.Optimizer):
+      raise TypeError(
+          "Supplied optimizer must be an instance of tf.train.Optimizer")
+    self._opt = opt
+    overriden_methods = ('_apply_dense',
+                         '_resource_apply_dense',
+                         '_apply_sparse',
+                         '_resource_apply_sparse')
+    for name in overriden_methods:
+      fn = getattr(self._opt, name)
+      wrapper = _get_wrapper(fn, self._opt)
+      setattr(self._opt, name, wrapper)
+
+  def __getattr__(self, name):
+    return getattr(self._opt, name)
+
+
+def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.):
+  """Clips gradients of a multitask loss by their global norm.
+  Ignores all-zero tensors when computing the global norm.
+
+  Args:
+  gradients_variables: a list of pairs (gradient, variable).
+  clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.
+
+  Returns:
+  list: A list of pairs of the same type as gradients_variables,.
+  fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
+  """
+  gradients, variables = six.moves.zip(*gradients_variables)
+  def _replace_nonexisting_grad(grad):
+    if grad is None:
+      return grad
+    all_zeros = _is_all_zeros(grad)
+    return control_flow_ops.cond(all_zeros,
+                                 lambda: array_ops.zeros(
+                                     [], dtype=dtypes.as_dtype(grad.dtype)),
+                                 lambda: grad)
+  nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients]
+  fixed_global_norm = clip_ops.global_norm(nonzero_gradients)
+  gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm,
+                                              use_norm=fixed_global_norm)
+  return list(six.moves.zip(gradients, variables)), fixed_global_norm
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
new file mode 100644
index 0000000000..b06213f715
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
@@ -0,0 +1,119 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for MultitaskOptimizerWrapper."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.training import momentum
+
+import numpy as np
+import six
+
+class MultitaskOptimizerWrapperTest(test.TestCase):
+  """
+  Tests for the multitask optimizer wrapper.
+  """
+  def testWrapper(self):
+    with self.test_session():
+      var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32)
+      var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
+      grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32)
+      grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32)
+      grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32)
+      mom_opt_impl = momentum.MomentumOptimizer(
+          learning_rate=2.0, momentum=0.9)
+      mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper(
+          mom_opt_impl)
+      mom_update = mom_opt.apply_gradients(
+          zip([grads0, grads1], [var0, var1]))
+      mom_update_partial = mom_opt.apply_gradients(
+          zip([grads_allzero, grads1], [var0, var1]))
+      mom_update_no_action = mom_opt.apply_gradients(
+          zip([grads_allzero, grads_allzero], [var0, var1]))
+      self.evaluate(variables.global_variables_initializer())
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+      self.assertEqual(["momentum"], mom_opt.get_slot_names())
+      slot0 = mom_opt.get_slot(var0, "momentum")
+      self.assertEquals(slot0.get_shape(), var0.get_shape())
+      slot1 = mom_opt.get_slot(var1, "momentum")
+      self.assertEquals(slot1.get_shape(), var1.get_shape())
+
+      # Step 1: normal momentum update.
+      self.evaluate(mom_update)
+      # Check that the momentum accumulators have been updated.
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
+                                         self.evaluate(slot1))
+      # Check that the parameters have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
+          self.evaluate(var0))
+      self.assertAllCloseAccordingToType(
+          np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
+          self.evaluate(var1))
+
+      # Step 2: momentum update that changes only slot1 but not slot0.
+      self.evaluate(mom_update_partial)
+      # Check that only the relevant momentum accumulator has been updated.
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
+          self.evaluate(slot1))
+
+      # Step 3: momentum update that does not change anything.
+      self.evaluate(mom_update_no_action)
+      # Check that the momentum accumulators have *NOT* been updated.
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
+          self.evaluate(slot1))
+
+  def testGradientClipping(self):
+    with self.test_session():
+      var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32)
+      var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
+      var2 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
+      var3 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
+      grads0 = constant_op.constant([10.0, 15.0], dtype=dtypes.float32)
+      grads1 = constant_op.constant([0.0, 5.0], dtype=dtypes.float32)
+      grads2 = constant_op.constant([0.0, 0.0], dtype=dtypes.float32)
+      grads3 = None
+      varlist = [var0, var1, var2, var3]
+      gradients = [grads0, grads1, grads2, grads3]
+      clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm(
+          six.moves.zip(gradients, varlist), clip_norm=1.0)
+      clipped_grads = list(six.moves.zip(*clipped_gradvars))[0]
+      reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0])))
+      self.assertAllCloseAccordingToType(
+          self.evaluate(global_norm), reference_global_norm)
+      self.assertAllCloseAccordingToType(
+          self.evaluate(clipped_grads[2]), np.array([0., 0.]))
+      self.assertEqual(clipped_grads[3], None)
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index 909c6aba2b..16b6d145e3 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -38,6 +38,9 @@ from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
+from tensorflow.python.framework import test_util
+from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
+
 
 
 # pylint: enable=protected-access
@@ -358,6 +361,45 @@ class RNNCellTest(test.TestCase):
       self.assertEquals(variables[2].op.name,
                         "root/lstm_cell/projection/kernel")
 
+  def testLSTMCellLayerNorm(self):
+    with self.test_session() as sess:
+      num_units = 2
+      num_proj = 3
+      batch_size = 1
+      input_size = 4
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([batch_size, input_size])
+        c = array_ops.zeros([batch_size, num_units])
+        h = array_ops.zeros([batch_size, num_proj])
+        state = rnn_cell_impl.LSTMStateTuple(c, h)
+        cell = contrib_rnn_cell.LayerNormLSTMCell(
+          num_units=num_units,
+          num_proj=num_proj,
+          forget_bias=1.0,
+          layer_norm=True,
+          norm_gain=1.0,
+          norm_shift=0.0)
+        g, out_m = cell(x, state)
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run([g, out_m], {
+          x.name: np.ones((batch_size, input_size)),
+          c.name: 0.1 * np.ones((batch_size, num_units)),
+          h.name: 0.1 * np.ones((batch_size, num_proj))
+        })
+        self.assertEqual(len(res), 2)
+        # The numbers in results were not calculated, this is mostly just a
+        # smoke test.
+        self.assertEqual(res[0].shape, (batch_size, num_proj))
+        self.assertEqual(res[1][0].shape, (batch_size, num_units))
+        self.assertEqual(res[1][1].shape, (batch_size, num_proj))
+        # Different inputs so different outputs and states
+        for i in range(1, batch_size):
+          self.assertTrue(
+            float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
+          self.assertTrue(
+            float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
+
   def testOutputProjectionWrapper(self):
     with self.test_session() as sess:
       with variable_scope.variable_scope(
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index ebd4564f12..b4a5f2d7eb 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -37,6 +37,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn
 from tensorflow.python.ops import rnn_cell
+from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -1275,6 +1276,49 @@ class LayerNormBasicLSTMCellTest(test.TestCase):
         self.assertAllClose(res[2].c, expected_c1, 1e-5)
         self.assertAllClose(res[2].h, expected_h1, 1e-5)
 
+
+  def testBasicLSTMCellWithStateTupleLayerNorm(self):
+    """The results of LSTMCell and LayerNormBasicLSTMCell 
+    should be same. """
+    with self.test_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        c0 = array_ops.zeros([1, 2])
+        h0 = array_ops.zeros([1, 2])
+        state0 = rnn_cell_impl.LSTMStateTuple(c0, h0)
+        c1 = array_ops.zeros([1, 2])
+        h1 = array_ops.zeros([1, 2])
+        state1 = rnn_cell_impl.LSTMStateTuple(c1, h1)
+        cell = rnn_cell_impl.MultiRNNCell(
+          [contrib_rnn_cell.LayerNormLSTMCell(
+              2,
+              layer_norm=True,
+              norm_gain=1.0,
+              norm_shift=0.0) for _ in range(2)])
+        h, (s0, s1) = cell(x, (state0, state1))
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([h, s0, s1], {
+          x.name: np.array([[1., 1.]]),
+          c0.name: 0.1 * np.asarray([[0, 1]]),
+          h0.name: 0.1 * np.asarray([[2, 3]]),
+          c1.name: 0.1 * np.asarray([[4, 5]]),
+          h1.name: 0.1 * np.asarray([[6, 7]]),
+        })
+
+        expected_h = np.array([[-0.38079708, 0.38079708]])
+        expected_h0 = np.array([[-0.38079708, 0.38079708]])
+        expected_c0 = np.array([[-1.0, 1.0]])
+        expected_h1 = np.array([[-0.38079708, 0.38079708]])
+        expected_c1 = np.array([[-1.0, 1.0]])
+
+        self.assertEqual(len(res), 3)
+        self.assertAllClose(res[0], expected_h, 1e-5)
+        self.assertAllClose(res[1].c, expected_c0, 1e-5)
+        self.assertAllClose(res[1].h, expected_h0, 1e-5)
+        self.assertAllClose(res[2].c, expected_c1, 1e-5)
+        self.assertAllClose(res[2].h, expected_h1, 1e-5)
+
   def testBasicLSTMCellWithDropout(self):
 
     def _is_close(x, y, digits=4):
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index d4691f2c27..5e85c125df 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope as vs
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
@@ -76,6 +77,18 @@ def _get_sharded_variable(name, shape, dtype, num_shards):
   return shards
 
 
+def _norm(g, b, inp, scope):
+  shape = inp.get_shape()[-1:]
+  gamma_init = init_ops.constant_initializer(g)
+  beta_init = init_ops.constant_initializer(b)
+  with vs.variable_scope(scope):
+    # Initialize beta and gamma for use by layer_norm.
+    vs.get_variable("gamma", shape=shape, initializer=gamma_init)
+    vs.get_variable("beta", shape=shape, initializer=beta_init)
+  normalized = layers.layer_norm(inp, reuse=True, scope=scope)
+  return normalized
+
+
 class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
   """Long short-term memory unit (LSTM) recurrent network cell.
 
@@ -102,13 +115,24 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
 
   The class uses optional peep-hole connections, and an optional projection
   layer.
+  
+  Layer normalization implementation is based on:
+
+    https://arxiv.org/abs/1607.06450.
+
+  "Layer Normalization"
+  Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
+
+  and is applied before the internal nonlinearities.
+  
   """
 
   def __init__(self, num_units, use_peepholes=False,
                initializer=None, num_proj=None, proj_clip=None,
                num_unit_shards=1, num_proj_shards=1,
                forget_bias=1.0, state_is_tuple=True,
-               activation=math_ops.tanh, reuse=None):
+               activation=math_ops.tanh, reuse=None,
+               layer_norm=False, norm_gain=1.0, norm_shift=0.0):
     """Initialize the parameters for an LSTM cell.
 
     Args:
@@ -135,6 +159,13 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
       reuse: (optional) Python boolean describing whether to reuse variables
         in an existing scope.  If not `True`, and the existing scope already has
         the given variables, an error is raised.
+      layer_norm: If `True`, layer normalization will be applied.
+      norm_gain: float, The layer normalization gain initial value. If
+        `layer_norm` has been set to `False`, this argument will be ignored.
+      norm_shift: float, The layer normalization shift initial value. If
+        `layer_norm` has been set to `False`, this argument will be ignored.
+        
+        
     """
     super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse)
     if not state_is_tuple:
@@ -152,6 +183,9 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
     self._state_is_tuple = state_is_tuple
     self._activation = activation
     self._reuse = reuse
+    self._layer_norm = layer_norm
+    self._norm_gain = norm_gain
+    self._norm_shift = norm_shift
 
     if num_proj:
       self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj)
@@ -220,9 +254,20 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
 
     # j = new_input, f = forget_gate, o = output_gate
     cell_inputs = array_ops.concat([inputs, m_prev], 1)
-    lstm_matrix = nn_ops.bias_add(math_ops.matmul(cell_inputs, concat_w), b)
+    lstm_matrix = math_ops.matmul(cell_inputs, concat_w)
+
+    # If layer nomalization is applied, do not add bias
+    if not self._layer_norm:
+      lstm_matrix = nn_ops.bias_add(lstm_matrix, b)
+
     j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=3, axis=1)
 
+    # Apply layer normalization
+    if self._layer_norm:
+      j = _norm(self._norm_gain, self._norm_shift, j, "transform")
+      f = _norm(self._norm_gain, self._norm_shift, f, "forget")
+      o = _norm(self._norm_gain, self._norm_shift, o, "output")
+
     # Diagonal connections
     if self._use_peepholes:
       w_f_diag = vs.get_variable(
@@ -236,6 +281,10 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
       f_act = sigmoid(f + self._forget_bias)
     c = (f_act * c_prev + (1 - f_act) * self._activation(j))
 
+    # Apply layer normalization
+    if self._layer_norm:
+      c = _norm(self._norm_gain, self._norm_shift, c, "state")
+
     if self._use_peepholes:
       m = sigmoid(o + w_o_diag * c) * self._activation(c)
     else:
@@ -1301,8 +1350,8 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
     self._keep_prob = dropout_keep_prob
     self._seed = dropout_prob_seed
     self._layer_norm = layer_norm
-    self._g = norm_gain
-    self._b = norm_shift
+    self._norm_gain = norm_gain
+    self._norm_shift = norm_shift
     self._reuse = reuse
 
   @property
@@ -1313,24 +1362,25 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
   def output_size(self):
     return self._num_units
 
-  def _norm(self, inp, scope):
+  def _norm(self, inp, scope, dtype=dtypes.float32):
     shape = inp.get_shape()[-1:]
-    gamma_init = init_ops.constant_initializer(self._g)
-    beta_init = init_ops.constant_initializer(self._b)
+    gamma_init = init_ops.constant_initializer(self._norm_gain)
+    beta_init = init_ops.constant_initializer(self._norm_shift)
     with vs.variable_scope(scope):
       # Initialize beta and gamma for use by layer_norm.
-      vs.get_variable("gamma", shape=shape, initializer=gamma_init)
-      vs.get_variable("beta", shape=shape, initializer=beta_init)
+      vs.get_variable("gamma", shape=shape, initializer=gamma_init, dtype=dtype)
+      vs.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype)
     normalized = layers.layer_norm(inp, reuse=True, scope=scope)
     return normalized
 
   def _linear(self, args):
     out_size = 4 * self._num_units
     proj_size = args.get_shape()[-1]
-    weights = vs.get_variable("kernel", [proj_size, out_size])
+    dtype = args.dtype
+    weights = vs.get_variable("kernel", [proj_size, out_size], dtype=dtype)
     out = math_ops.matmul(args, weights)
     if not self._layer_norm:
-      bias = vs.get_variable("bias", [out_size])
+      bias = vs.get_variable("bias", [out_size], dtype=dtype)
       out = nn_ops.bias_add(out, bias)
     return out
 
@@ -1339,13 +1389,14 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
     c, h = state
     args = array_ops.concat([inputs, h], 1)
     concat = self._linear(args)
+    dtype = args.dtype
 
     i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)
     if self._layer_norm:
-      i = self._norm(i, "input")
-      j = self._norm(j, "transform")
-      f = self._norm(f, "forget")
-      o = self._norm(o, "output")
+      i = self._norm(i, "input", dtype=dtype)
+      j = self._norm(j, "transform", dtype=dtype)
+      f = self._norm(f, "forget", dtype=dtype)
+      o = self._norm(o, "output", dtype=dtype)
 
     g = self._activation(j)
     if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1:
@@ -1354,7 +1405,7 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
     new_c = (c * math_ops.sigmoid(f + self._forget_bias)
              + math_ops.sigmoid(i) * g)
     if self._layer_norm:
-      new_c = self._norm(new_c, "state")
+      new_c = self._norm(new_c, "state", dtype=dtype)
     new_h = self._activation(new_c) * math_ops.sigmoid(o)
 
     new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h)
@@ -2306,3 +2357,264 @@ class GLSTMCell(rnn_cell_impl.RNNCell):
 
     new_state = rnn_cell_impl.LSTMStateTuple(c, m)
     return m, new_state
+
+
+class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
+  """Long short-term memory unit (LSTM) recurrent network cell.
+
+  The default non-peephole implementation is based on:
+
+    http://www.bioinf.jku.at/publications/older/2604.pdf
+
+  S. Hochreiter and J. Schmidhuber.
+  "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997.
+
+  The peephole implementation is based on:
+
+    https://research.google.com/pubs/archive/43905.pdf
+
+  Hasim Sak, Andrew Senior, and Francoise Beaufays.
+  "Long short-term memory recurrent neural network architectures for
+   large scale acoustic modeling." INTERSPEECH, 2014.
+
+  The class uses optional peep-hole connections, optional cell clipping, and
+  an optional projection layer.
+
+  Layer normalization implementation is based on:
+
+    https://arxiv.org/abs/1607.06450.
+
+  "Layer Normalization"
+  Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
+
+  and is applied before the internal nonlinearities.
+
+  """
+
+  def __init__(self, num_units,
+               use_peepholes=False, cell_clip=None,
+               initializer=None, num_proj=None, proj_clip=None,
+               forget_bias=1.0,
+               activation=None, layer_norm=False,
+               norm_gain=1.0, norm_shift=0.0, reuse=None):
+    """Initialize the parameters for an LSTM cell.
+
+    Args:
+      num_units: int, The number of units in the LSTM cell
+      use_peepholes: bool, set True to enable diagonal/peephole connections.
+      cell_clip: (optional) A float value, if provided the cell state is clipped
+        by this value prior to the cell output activation.
+      initializer: (optional) The initializer to use for the weight and
+        projection matrices.
+      num_proj: (optional) int, The output dimensionality for the projection
+        matrices.  If None, no projection is performed.
+      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
+        provided, then the projected values are clipped elementwise to within
+        `[-proj_clip, proj_clip]`.
+      forget_bias: Biases of the forget gate are initialized by default to 1
+        in order to reduce the scale of forgetting at the beginning of
+        the training. Must set it manually to `0.0` when restoring from
+        CudnnLSTM trained checkpoints.
+      activation: Activation function of the inner states.  Default: `tanh`.
+      layer_norm: If `True`, layer normalization will be applied.
+      norm_gain: float, The layer normalization gain initial value. If
+        `layer_norm` has been set to `False`, this argument will be ignored.
+      norm_shift: float, The layer normalization shift initial value. If
+        `layer_norm` has been set to `False`, this argument will be ignored.
+      reuse: (optional) Python boolean describing whether to reuse variables
+        in an existing scope.  If not `True`, and the existing scope already has
+        the given variables, an error is raised.
+
+      When restoring from CudnnLSTM-trained checkpoints, must use
+      CudnnCompatibleLSTMCell instead.
+    """
+    super(LayerNormLSTMCell, self).__init__(_reuse=reuse)
+
+    self._num_units = num_units
+    self._use_peepholes = use_peepholes
+    self._cell_clip = cell_clip
+    self._initializer = initializer
+    self._num_proj = num_proj
+    self._proj_clip = proj_clip
+    self._forget_bias = forget_bias
+    self._activation = activation or math_ops.tanh
+    self._layer_norm = layer_norm
+    self._norm_gain = norm_gain
+    self._norm_shift = norm_shift
+
+    if num_proj:
+      self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj))
+      self._output_size = num_proj
+    else:
+      self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_units))
+      self._output_size = num_units
+
+  @property
+  def state_size(self):
+    return self._state_size
+
+  @property
+  def output_size(self):
+    return self._output_size
+
+
+  def _linear(self,
+              args,
+              output_size,
+              bias,
+              bias_initializer=None,
+              kernel_initializer=None,
+              layer_norm=False):
+    """Linear map: sum_i(args[i] * W[i]), where W[i] is a Variable.
+
+    Args:
+      args: a 2D Tensor or a list of 2D, batch x n, Tensors.
+      output_size: int, second dimension of W[i].
+      bias: boolean, whether to add a bias term or not.
+      bias_initializer: starting value to initialize the bias
+        (default is all zeros).
+      kernel_initializer: starting value to initialize the weight.
+      layer_norm: boolean, whether to apply layer normalization.
+
+
+    Returns:
+      A 2D Tensor with shape [batch x output_size] taking value
+      sum_i(args[i] * W[i]), where each W[i] is a newly created Variable.
+
+    Raises:
+      ValueError: if some of the arguments has unspecified or wrong shape.
+    """
+    if args is None or (nest.is_sequence(args) and not args):
+      raise ValueError("`args` must be specified")
+    if not nest.is_sequence(args):
+      args = [args]
+
+    # Calculate the total size of arguments on dimension 1.
+    total_arg_size = 0
+    shapes = [a.get_shape() for a in args]
+    for shape in shapes:
+      if shape.ndims != 2:
+        raise ValueError("linear is expecting 2D arguments: %s" % shapes)
+      if shape[1].value is None:
+        raise ValueError("linear expects shape[1] to be provided for shape %s, "
+                         "but saw %s" % (shape, shape[1]))
+      else:
+        total_arg_size += shape[1].value
+
+    dtype = [a.dtype for a in args][0]
+
+    # Now the computation.
+    scope = vs.get_variable_scope()
+    with vs.variable_scope(scope) as outer_scope:
+      weights = vs.get_variable(
+        "kernel", [total_arg_size, output_size],
+        dtype=dtype,
+        initializer=kernel_initializer)
+      if len(args) == 1:
+        res = math_ops.matmul(args[0], weights)
+      else:
+        res = math_ops.matmul(array_ops.concat(args, 1), weights)
+      if not bias:
+        return res
+      with vs.variable_scope(outer_scope) as inner_scope:
+        inner_scope.set_partitioner(None)
+        if bias_initializer is None:
+          bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
+        biases = vs.get_variable(
+          "bias", [output_size],
+          dtype=dtype,
+          initializer=bias_initializer)
+
+    if not layer_norm:
+      res = nn_ops.bias_add(res, biases)
+
+    return res
+
+  def call(self, inputs, state):
+    """Run one step of LSTM.
+
+    Args:
+      inputs: input Tensor, 2D, batch x num_units.
+      state: this must be a tuple of state Tensors,
+       both `2-D`, with column sizes `c_state` and
+        `m_state`.
+
+    Returns:
+      A tuple containing:
+
+      - A `2-D, [batch x output_dim]`, Tensor representing the output of the
+        LSTM after reading `inputs` when previous state was `state`.
+        Here output_dim is:
+           num_proj if num_proj was set,
+           num_units otherwise.
+      - Tensor(s) representing the new state of LSTM after reading `inputs` when
+        the previous state was `state`.  Same type and shape(s) as `state`.
+
+    Raises:
+      ValueError: If input size cannot be inferred from inputs via
+        static shape inference.
+    """
+    num_proj = self._num_units if self._num_proj is None else self._num_proj
+    sigmoid = math_ops.sigmoid
+
+    (c_prev, m_prev) = state
+
+    dtype = inputs.dtype
+    input_size = inputs.get_shape().with_rank(2)[1]
+    if input_size.value is None:
+      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
+    scope = vs.get_variable_scope()
+    with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
+
+      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
+      lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True,
+                            bias_initializer=None, layer_norm=self._layer_norm)
+      i, j, f, o = array_ops.split(
+        value=lstm_matrix, num_or_size_splits=4, axis=1)
+
+      if self._layer_norm:
+        i = _norm(self._norm_gain, self._norm_shift, i, "input")
+        j = _norm(self._norm_gain, self._norm_shift, j, "transform")
+        f = _norm(self._norm_gain, self._norm_shift, f, "forget")
+        o = _norm(self._norm_gain, self._norm_shift, o, "output")
+
+      # Diagonal connections
+      if self._use_peepholes:
+        with vs.variable_scope(unit_scope) as projection_scope:
+          w_f_diag = vs.get_variable(
+            "w_f_diag", shape=[self._num_units], dtype=dtype)
+          w_i_diag = vs.get_variable(
+            "w_i_diag", shape=[self._num_units], dtype=dtype)
+          w_o_diag = vs.get_variable(
+            "w_o_diag", shape=[self._num_units], dtype=dtype)
+
+      if self._use_peepholes:
+        c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
+             sigmoid(i + w_i_diag * c_prev) * self._activation(j))
+      else:
+        c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
+             self._activation(j))
+
+      if self._layer_norm:
+        c = _norm(self._norm_gain, self._norm_shift, c, "state")
+
+      if self._cell_clip is not None:
+        # pylint: disable=invalid-unary-operand-type
+        c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
+        # pylint: enable=invalid-unary-operand-type
+      if self._use_peepholes:
+        m = sigmoid(o + w_o_diag * c) * self._activation(c)
+      else:
+        m = sigmoid(o) * self._activation(c)
+
+      if self._num_proj is not None:
+        with vs.variable_scope("projection") as proj_scope:
+          m = self._linear(m, self._num_proj, bias=False)
+
+        if self._proj_clip is not None:
+          # pylint: disable=invalid-unary-operand-type
+          m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
+          # pylint: enable=invalid-unary-operand-type
+
+    new_state = (rnn_cell_impl.LSTMStateTuple(c, m))
+    return m, new_state
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index 87230e3355..c3b180d9f4 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
@@ -149,7 +149,7 @@ class _BaseAttentionMechanism(AttentionMechanism):
                memory_sequence_length=None,
                memory_layer=None,
                check_inner_dims_defined=True,
-               score_mask_value=float("-inf"),
+               score_mask_value=None,
                name=None):
     """Construct base AttentionMechanism class.
 
@@ -187,9 +187,12 @@ class _BaseAttentionMechanism(AttentionMechanism):
           "memory_layer is not a Layer: %s" % type(memory_layer).__name__)
     self._query_layer = query_layer
     self._memory_layer = memory_layer
+    self.dtype = memory_layer.dtype
     if not callable(probability_fn):
       raise TypeError("probability_fn must be callable, saw type: %s" %
                       type(probability_fn).__name__)
+    if score_mask_value is None:
+      score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf)
     self._probability_fn = lambda score, prev: (  # pylint:disable=g-long-lambda
         probability_fn(
             _maybe_mask_score(score, memory_sequence_length, score_mask_value),
@@ -334,7 +337,8 @@ class LuongAttention(_BaseAttentionMechanism):
                memory_sequence_length=None,
                scale=False,
                probability_fn=None,
-               score_mask_value=float("-inf"),
+               score_mask_value=None,
+               dtype=None,
                name="LuongAttention"):
     """Construct the AttentionMechanism mechanism.
 
@@ -353,17 +357,20 @@ class LuongAttention(_BaseAttentionMechanism):
       score_mask_value: (optional) The mask value for score before passing into
         `probability_fn`. The default is -inf. Only used if
         `memory_sequence_length` is not None.
+      dtype: The data type for the memory layer of the attention mechanism.
       name: Name to use when creating ops.
     """
     # For LuongAttention, we only transform the memory layer; thus
     # num_units **must** match expected the query depth.
     if probability_fn is None:
       probability_fn = nn_ops.softmax
+    if dtype is None:
+      dtype = dtypes.float32
     wrapped_probability_fn = lambda score, _: probability_fn(score)
     super(LuongAttention, self).__init__(
         query_layer=None,
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False),
+            num_units, name="memory_layer", use_bias=False, dtype=dtype),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -475,7 +482,8 @@ class BahdanauAttention(_BaseAttentionMechanism):
                memory_sequence_length=None,
                normalize=False,
                probability_fn=None,
-               score_mask_value=float("-inf"),
+               score_mask_value=None,
+               dtype=None,
                name="BahdanauAttention"):
     """Construct the Attention mechanism.
 
@@ -494,16 +502,20 @@ class BahdanauAttention(_BaseAttentionMechanism):
       score_mask_value: (optional): The mask value for score before passing into
         `probability_fn`. The default is -inf. Only used if
         `memory_sequence_length` is not None.
+      dtype: The data type for the query and memory layers of the attention
+        mechanism.
       name: Name to use when creating ops.
     """
     if probability_fn is None:
       probability_fn = nn_ops.softmax
+    if dtype is None:
+      dtype = dtypes.float32
     wrapped_probability_fn = lambda score, _: probability_fn(score)
     super(BahdanauAttention, self).__init__(
         query_layer=layers_core.Dense(
-            num_units, name="query_layer", use_bias=False),
+            num_units, name="query_layer", use_bias=False, dtype=dtype),
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False),
+            num_units, name="memory_layer", use_bias=False, dtype=dtype),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -738,11 +750,12 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism):
                memory,
                memory_sequence_length=None,
                normalize=False,
-               score_mask_value=float("-inf"),
+               score_mask_value=None,
                sigmoid_noise=0.,
                sigmoid_noise_seed=None,
                score_bias_init=0.,
                mode="parallel",
+               dtype=None,
                name="BahdanauMonotonicAttention"):
     """Construct the Attention mechanism.
 
@@ -766,17 +779,21 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism):
       mode: How to compute the attention distribution.  Must be one of
         'recursive', 'parallel', or 'hard'.  See the docstring for
         `tf.contrib.seq2seq.monotonic_attention` for more information.
+      dtype: The data type for the query and memory layers of the attention
+        mechanism.
       name: Name to use when creating ops.
     """
     # Set up the monotonic probability fn with supplied parameters
+    if dtype is None:
+      dtype = dtypes.float32
     wrapped_probability_fn = functools.partial(
         _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode,
         seed=sigmoid_noise_seed)
     super(BahdanauMonotonicAttention, self).__init__(
         query_layer=layers_core.Dense(
-            num_units, name="query_layer", use_bias=False),
+            num_units, name="query_layer", use_bias=False, dtype=dtype),
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False),
+            num_units, name="memory_layer", use_bias=False, dtype=dtype),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -834,11 +851,12 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
                memory,
                memory_sequence_length=None,
                scale=False,
-               score_mask_value=float("-inf"),
+               score_mask_value=None,
                sigmoid_noise=0.,
                sigmoid_noise_seed=None,
                score_bias_init=0.,
                mode="parallel",
+               dtype=None,
                name="LuongMonotonicAttention"):
     """Construct the Attention mechanism.
 
@@ -862,17 +880,21 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
       mode: How to compute the attention distribution.  Must be one of
         'recursive', 'parallel', or 'hard'.  See the docstring for
         `tf.contrib.seq2seq.monotonic_attention` for more information.
+      dtype: The data type for the query and memory layers of the attention
+        mechanism.
       name: Name to use when creating ops.
     """
     # Set up the monotonic probability fn with supplied parameters
+    if dtype is None:
+      dtype = dtypes.float32
     wrapped_probability_fn = functools.partial(
         _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode,
         seed=sigmoid_noise_seed)
     super(LuongMonotonicAttention, self).__init__(
         query_layer=layers_core.Dense(
-            num_units, name="query_layer", use_bias=False),
+            num_units, name="query_layer", use_bias=False, dtype=dtype),
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False),
+            num_units, name="memory_layer", use_bias=False, dtype=dtype),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -1123,8 +1145,9 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
             % (len(attention_layer_sizes), len(attention_mechanisms)))
       self._attention_layers = tuple(
           layers_core.Dense(
-              attention_layer_size, name="attention_layer", use_bias=False)
-          for attention_layer_size in attention_layer_sizes)
+              attention_layer_size, name="attention_layer", use_bias=False,
+              dtype=attention_mechanisms[i].dtype)
+          for i, attention_layer_size in enumerate(attention_layer_sizes))
       self._attention_layer_size = sum(attention_layer_sizes)
     else:
       self._attention_layers = None
diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md
index 0bfd0801d5..f7a85557ca 100644
--- a/tensorflow/contrib/slim/README.md
+++ b/tensorflow/contrib/slim/README.md
@@ -237,7 +237,7 @@ One way to reduce this code duplication would be via a `for` loop:
 ```python
 net = ...
 for i in range(3):
-  net = slim.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1))
+  net = slim.conv2d(net, 256, [3, 3], scope='conv3_%d' % (i+1))
 net = slim.max_pool2d(net, [2, 2], scope='pool2')
 ```
 
diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
index b4fd2580c2..576444214d 100644
--- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
@@ -386,7 +386,7 @@ class ResnetCompleteNetworkTest(test.TestCase):
                 inputs, None, is_training=False, global_pool=False)
             sess.run(variables.global_variables_initializer())
             self.assertAllClose(
-                output.eval(), expected.eval(), atol=1e-4, rtol=1e-4)
+                output.eval(), expected.eval(), atol=2e-4, rtol=1e-4)
 
   def testUnknownBatchSize(self):
     batch = 2
diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md
index da5f2b0223..dcb390b0a5 100644
--- a/tensorflow/contrib/verbs/README.md
+++ b/tensorflow/contrib/verbs/README.md
@@ -1,4 +1,4 @@
-## How to compile and use RDMA-enabled TensorFlow
+## How to compile, use and configure RDMA-enabled TensorFlow
 1. Follow the regular TF compilation instructions. During configure step, if you want ibverbs based RDMA support, answer yes to this question:
 
     ```Do you wish to build TensorFlow with VERBS-RDMA support [y/N]```
@@ -7,6 +7,18 @@
 
     ```server = tf.train.Server(cluster, job_name="local", task_index=0, protocol='grpc+verbs') # default protocol is 'grpc'```
 
+3. RDMA configuration is done by setting the following environment variables:
+   * **RDMA_DEVICE**: The RDMA device name to be used. If not defined by user, a default device with an active port will be set if exists.
+   * **RDMA_DEVICE_PORT**: The port within the selected device. Not relevant if RDMA_DEVICE is not defined. If not defined by user, a default active port will be set if exists.
+   * **RDMA_GID_INDEX**: The GID index of the port. If not defined by user, a default suitable GID index will be set (RoCEV2 is favourable as default).
+   * **RDMA_QP_PKEY_INDEX**: The Pkey for the QP. If not defined by user, the default value is 0.
+   * **RDMA_QP_QUEUE_DEPTH**: TX/RX queue size for the QP. If not defined by user, the default value is 1024.
+   * **RDMA_QP_TIMEOUT**: The retransmission timeout for QPs. If not defined by user, the default value is 14.
+   * **RDMA_QP_RETRY_COUNT**: Number of retransmission for QPs. If not defined by user, the default value is 7.
+   * **RDMA_QP_SL**: Service level configuration for QOS and ECN, valid values are 0-7. If not defined by user, the default value is 0.
+   * **RDMA_QP_MTU**: MTU configuration for the QPs. If not defined by user, the default value is active MTU from query_port.
+   * **RDMA_TRAFFIC_CLASS**: Traffic class configuration for QP, in case of DSCP trust level QoS configuration. If not defined by user, the default value is 0. For more info see [HowTo Configure Trust state on Mellanox Adapters](https://community.mellanox.com/docs/DOC-2866).
+
 ## Overview
 The design is based on TensorFlow r1.0. An RDMA path is added between servers for tensor transfer (weights, gradients, etc). The existing GRPC path remains and is responsible for "administrative" tasks, such as setting up the RDMA path, exchanging computation graphs, etc.
 
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index 26e18b28aa..331943a3ef 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/contrib/verbs/rdma.h"
 #include <cstdlib>
+#include <fcntl.h>
 #include "tensorflow/contrib/verbs/verbs_util.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
@@ -33,6 +34,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+#define RoCE_V2 "RoCE v2"
+
 namespace {
 // hash name to 32-bit integer
 uint32_t NameHash(const string& name) {
@@ -66,16 +69,337 @@ string MessageTypeToString(RdmaMessageType rmt) {
 }
 }  // namespace
 
-ibv_context* open_default_device() {
+// Function to get environment variable
+// Args:
+//    var_name - the name of the environmental variable
+// Returns:
+//    string with it's value or empty string if not set
+string get_env_var(char const* var_name) {
+  char const* var_temp = getenv(var_name);
+
+  return (var_temp == NULL) ? string() : string(var_temp);
+}
+
+// Function to open device
+// Args:
+//   ibv_dev device to open
+// Returns:
+//   context of the opened device
+ibv_context* open_device(ibv_device* ibv_dev) {
+  ibv_context* context = ibv_open_device(ibv_dev);
+
+  CHECK(context) << "Open context failed for " << ibv_get_device_name(ibv_dev);
+  return context;
+}
+
+// Function to count the number of active ports for device
+// Args:
+//   device - to check active ports
+// Returns:
+//   number of active ports of the given device
+int get_dev_active_port_count(ibv_device* device) {
+  ibv_device_attr device_att;
+  ibv_port_attr port_attr;
+  ibv_context* context = NULL;
+  int rc, port_index, active_ports = 0;
+
+  context = ibv_open_device(device);
+  CHECK(context) << "Open context failed for " << ibv_get_device_name(device);
+  rc = ibv_query_device(context, &device_att);
+  CHECK(!rc) << "Failed to query the device";
+
+  for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) {
+    rc = ibv_query_port(context, port_index, &port_attr);
+    CHECK(!rc) << "Failed to query the port" << port_index;
+    if (port_attr.state == IBV_PORT_ACTIVE) {
+      active_ports++;
+    }
+  }
+  ibv_close_device(context);
+  return active_ports;
+}
+
+// Function to set device. If RDMA_DEVICE not set, search for device with active
+// port.
+// Fails if more than one device with active port was found.
+// Returns:
+//   device to use
+ibv_device* set_device() {
   ibv_device** dev_list;
-  ibv_device* ib_dev;
-  dev_list = ibv_get_device_list(NULL);
+  int dev_num, device_index, device_to_open = 0;
+  int num_devs_with_active_port = 0;
+  string env_p_rdma_device, str_port_num;
+
+  dev_list = ibv_get_device_list(&dev_num);
   CHECK(dev_list) << "No InfiniBand device found";
-  ib_dev = dev_list[0];
-  CHECK(ib_dev) << "No InfiniBand device found";
-  ibv_context* context = ibv_open_device(ib_dev);
-  CHECK(context) << "Open context failed for " << ibv_get_device_name(ib_dev);
-  return context;
+
+  env_p_rdma_device = get_env_var("RDMA_DEVICE");
+  if (!env_p_rdma_device.empty()) {
+    for (device_index = 0; device_index < dev_num; device_index++) {
+      if (!env_p_rdma_device.compare(
+               ibv_get_device_name(dev_list[device_index]))) {
+        CHECK(get_dev_active_port_count(dev_list[device_index]) != 0)
+            << "Device " << ibv_get_device_name(dev_list[device_index])
+            << " has no active ports";
+        return dev_list[device_index];
+      }
+    }
+    // check validity of input device
+    CHECK(false) << "The device " << env_p_rdma_device << " wasn't found";
+  } else {
+  // set default device
+    str_port_num = get_env_var("RDMA_DEVICE_PORT");
+    CHECK(str_port_num.empty())
+        << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user";
+    for (device_index = 0; device_index < dev_num; device_index++) {
+      // get port_num
+      if (get_dev_active_port_count(dev_list[device_index]) > 0) {
+        num_devs_with_active_port++;
+        CHECK(num_devs_with_active_port <= 1) << ". More than one device with "
+                                                 "active port in the system. "
+                                                 "Please enter RDMA_DEVICE";
+        // found device with at least 1 active port
+        device_to_open = device_index;
+      }
+    }
+    CHECK(num_devs_with_active_port > 0)
+        << "There is no active port in the system";
+    return dev_list[device_to_open];
+  }
+  CHECK(false) << "No device was set!";
+  return NULL;  // never happens
+}
+
+// Function to set port for device.
+// If RDMA_DEVICE_PORT not set, first active port of the device will be set.
+// Args:
+//   context of the device
+// Returns:
+//   port to use
+uint8_t set_port(ibv_context* context) {
+  uint8_t port_num = 0; //0 is illegal port number
+  string str_port_num;
+  ibv_device_attr device_att;
+  ibv_port_attr port_attr;
+  int rc, port_index;
+
+  rc = ibv_query_device(context, &device_att);
+  CHECK(!rc) << "Failed to query the device\n";
+
+  str_port_num = get_env_var("RDMA_DEVICE_PORT");
+  // user defined port
+  if (!str_port_num.empty()) {
+    port_num = stoi(str_port_num);
+    CHECK(port_num > 0) << "RDMA_DEVICE_PORT should be positive";
+    CHECK(port_num <= device_att.phys_port_cnt) << "RDMA_DEVICE_PORT should be "
+                                                   "less or equal to amount of "
+                                                   "available ports";
+    rc = ibv_query_port(context, port_num, &port_attr);
+    CHECK(!rc) << "Failed to query the port" << port_num;
+    // check if port id active
+    CHECK(port_attr.state == IBV_PORT_ACTIVE)
+        << "Selected RDMA_DEVICE_PORT is not active";
+  }
+  // set default port
+  else {
+    for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) {
+      rc = ibv_query_port(context, port_index, &port_attr);
+      CHECK(!rc) << "Failed to query the port" << port_index;
+      if (port_attr.state == IBV_PORT_ACTIVE) {
+        port_num = port_index;
+        break;
+      }
+    }
+    CHECK_GT(port_num, 0) << "No active ports";
+  }
+  return port_num;
+}
+
+// Function read from sysfs file
+// Args:
+//   dir - directory
+//   file - file
+//   buff - buffer for the result
+//   size - buffer size
+// Returns:
+//   number of bytes were read or -1 if failed
+int read_sysfs_file(const char* dir, const char* file, char* buf, size_t size) {
+  char* path;
+  int fd;
+  int len;
+
+  if (asprintf(&path, "%s/%s", dir, file) < 0) return -1;
+
+  fd = open(path, O_RDONLY);
+  if (fd < 0) {
+    free(path);
+    return -1;
+  }
+
+  len = read(fd, buf, size);
+
+  close(fd);
+  free(path);
+
+  if (len > 0 && buf[len - 1] == '\n') buf[--len] = '\0';
+
+  return len;
+}
+
+// Function to check if GID index support RoCE V2
+// Args:
+//   context - device context
+//   port_num - port number
+//   index -  GID index
+// Returns:
+//   if GID supports RoCE V2 - true, otherwise - false.
+bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num,
+                         uint8_t index) {
+  char name[32];
+  char buff[41];
+
+  snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num, index);
+  if (read_sysfs_file(context->device->ibdev_path, name, buff, sizeof(buff)) <=
+      0) {
+    return false;
+  }
+  return !strcmp(buff, RoCE_V2);
+}
+
+// Function to set GID index.
+// If the port link is IB, no GID index should be selected.
+// If Ethernet but RDMA_GID_INDEX not set gid index that supports
+//   RoCE V2 will be chosen(fails if more then one IP is configured)
+// Args:
+//   context - device context
+//   port_num - port number
+// Returns:
+//   GID index to use
+uint8_t set_gid(uint8_t port_num, ibv_context* context) {
+  ibv_port_attr port_attr;
+  string gid_str;
+  int rc, i, gids_num = 0, v2_ip_num = 0;
+  union ibv_gid gid;
+  uint8_t gid_index = 0;
+
+  rc = ibv_query_port(context, port_num, &port_attr);
+  CHECK(!rc) << "Failed to query the port" << port_num;
+
+  for (i = 0; i < port_attr.gid_tbl_len; i++) {
+    rc = ibv_query_gid(context, port_num, i, &gid);
+    CHECK(!rc) << "Failed to query gid to port " << (int)port_num << " index "
+               << i;
+    if (gid.global.interface_id) {
+      gids_num++;
+      if (gid.global.subnet_prefix == 0 &&
+          is_gid_type_roce_v2(context, port_num, i)) {
+        if (v2_ip_num == 0) {
+          // can be overwritten by RDMA_GID_INDEX later
+          gid_index = i;
+        }
+        v2_ip_num++;
+      }
+    }
+  }
+  switch (port_attr.link_layer) {
+    case(IBV_LINK_LAYER_ETHERNET) :
+      gid_str = get_env_var("RDMA_GID_INDEX");
+      if (!gid_str.empty()) {
+        gid_index = stoi(gid_str);
+        CHECK(gid_index < gids_num)
+            << "RDMA_GID_INDEX should be less than GIDs amount" << gids_num;
+      } else {
+        CHECK(v2_ip_num <= 1)
+            << "More than one IP is available, please specify GID_INDEX";
+      }
+      break;
+    case(IBV_LINK_LAYER_INFINIBAND) :  // no need in GID index
+      break;
+    default:
+      LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and "
+                   "InfiniBand only. ";
+  }
+  if (!is_gid_type_roce_v2(context, port_num, gid_index)) {
+    LOG(INFO) << "RoCE v2 is not configured for GID_INDEX " << (int)gid_index;
+  }
+  return gid_index;
+}
+
+// set the default or environment value to the configuration parameter.
+// Args:
+//   default_val- the default value for this parameter
+//   env_param- the environment parameter's name
+// Returns:
+//   32-bit value
+uint32_t set_param(uint32_t default_val, const char* env_param) {
+  uint32_t val = default_val;
+  string val_s;
+
+  val_s = get_env_var(env_param);
+
+  if (!val_s.empty()) {
+    val = stoi(val_s);
+  }
+  return val;
+}
+
+enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) {
+  ibv_port_attr port_attr;
+  enum ibv_mtu mtu;
+  string mtu_s;
+  int rc, mtu_i;
+
+  rc = ibv_query_port(context, port_num, &port_attr);
+  CHECK(!rc) << "Failed to query the port" << port_num;
+
+  mtu_s = get_env_var("RDMA_MTU");
+
+  if (!mtu_s.empty()) {
+    mtu_i = stoi(mtu_s);
+    switch (mtu_i) {
+      case 256:
+        mtu = IBV_MTU_256;
+        break;
+      case 512:
+        mtu = IBV_MTU_512;
+        break;
+      case 1024:
+        mtu = IBV_MTU_1024;
+        break;
+      case 2048:
+        mtu = IBV_MTU_2048;
+        break;
+      case 4096:
+        mtu = IBV_MTU_4096;
+        break;
+      default:
+        CHECK(0) << "Error: MTU input value must be one of the following: 256, "
+                    "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n";
+        break;
+    }
+    CHECK(mtu < port_attr.active_mtu)
+        << "MTU configuration for the QPs is larger than active MTU";
+  } else {
+    mtu = port_attr.active_mtu;
+  }
+  return mtu;
+}
+
+RdmaParams params_init(ibv_context* context) {
+  RdmaParams params;
+
+  params.port_num = set_port(context);
+  params.sgid_index = set_gid(params.port_num, context);
+  params.pkey_index = (uint8_t)set_param(PKEY_DEFAULT, "RDMA_PKEY");
+  params.queue_depth = set_param(QUEUE_DEPTH_DEFAULT, "RDMA_QUEUE_DEPTH");
+  params.timeout = (uint8_t)set_param(TIMEOUT_DEFAULT, "RDMA_TIMEOUT");
+  params.retry_cnt = (uint8_t)set_param(RETRY_CNT_DEFAULT, "RDMA_RETRY_CNT");
+  params.sl = (uint8_t)set_param(SL_DEFAULT, "RDMA_SL");
+  CHECK(params.sl <= 7) << "SL value is " << (int)params.sl
+                        << ". Valid values are 0-7.";
+  params.mtu = set_mtu(params.port_num, context);
+  params.traffic_class = set_param(TRAFFIC_CLASS, "RDMA_TRAFFIC_CLASS");
+  return params;
 }
 
 ibv_pd* alloc_protection_domain(ibv_context* context) {
@@ -85,7 +409,8 @@ ibv_pd* alloc_protection_domain(ibv_context* context) {
 }
 
 RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env)
-    : context_(open_default_device()),
+    : context_(open_device(set_device())),
+      params_(params_init(context_)),
       pd_(alloc_protection_domain(context_)),
       worker_env_(worker_env) {
   event_channel_ = ibv_create_comp_channel(context_);
@@ -128,9 +453,9 @@ void RdmaAdapter::Process_CQ() {
     CHECK_GE(ne, 0);
     for (int i = 0; i < ne; ++i) {
       CHECK(wc_[i].status == IBV_WC_SUCCESS)
-          << "Failed status \n"
-          << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " "
-          << static_cast<int>(wc_[i].wr_id) << " " << wc_[i].vendor_err;
+          << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " "
+          << wc_[i].status << " " << static_cast<int>(wc_[i].wr_id) << " "
+          << wc_[i].vendor_err;
       if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) {
         RdmaChannel* rc = reinterpret_cast<RdmaChannel*>(wc_[i].wr_id);
         // put back a recv wr.
@@ -242,8 +567,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
     memset(&attr, 0, sizeof(ibv_qp_init_attr));
     attr.send_cq = adapter_->cq_;
     attr.recv_cq = adapter_->cq_;
-    attr.cap.max_send_wr = RdmaAdapter::MAX_CONCURRENT_WRITES;
-    attr.cap.max_recv_wr = RdmaAdapter::MAX_CONCURRENT_WRITES;
+    attr.cap.max_send_wr = adapter_->params_.queue_depth;
+    attr.cap.max_recv_wr = adapter_->params_.queue_depth;
     attr.cap.max_send_sge = 1;
     attr.cap.max_recv_sge = 1;
     attr.qp_type = IBV_QPT_RC;
@@ -257,8 +582,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
     struct ibv_qp_attr attr;
     memset(&attr, 0, sizeof(ibv_qp_attr));
     attr.qp_state = IBV_QPS_INIT;
-    attr.pkey_index = 0;
-    attr.port_num = 1;
+    attr.pkey_index = adapter_->params_.pkey_index;
+    attr.port_num = adapter_->params_.port_num;
     attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
 
     int mask =
@@ -269,13 +594,15 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
   // Local address
   {
     struct ibv_port_attr attr;
-    CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &attr))
+    CHECK(
+        !ibv_query_port(adapter_->context_, adapter_->params_.port_num, &attr))
         << "Query port";
     self_.lid = attr.lid;
     self_.qpn = qp_->qp_num;
     self_.psn = static_cast<uint32_t>(random::New64()) & 0xffffff;
     union ibv_gid gid;
-    CHECK(!ibv_query_gid(adapter_->context_, (uint8_t)1, 0, &gid))
+    CHECK(!ibv_query_gid(adapter_->context_, adapter_->params_.port_num,
+                         adapter_->params_.sgid_index, &gid))
         << "Query gid";
     self_.snp = gid.global.subnet_prefix;
     self_.iid = gid.global.interface_id;
@@ -284,7 +611,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
   // create message and ack buffers, then initialize the tables.
   {
     const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer",
-                                   "tx_ack_buffer", "rx_ack_buffer"};
+                                   "tx_ack_buffer",     "rx_ack_buffer"};
     tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]);
     rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]);
     tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]);
@@ -345,7 +672,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) {
 void RdmaChannel::Recv() {
   struct ibv_recv_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t)this;
+  wr.wr_id = (uint64_t) this;
   struct ibv_recv_wr* bad_wr;
   CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv";
 }
@@ -479,11 +806,9 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     struct ibv_qp_attr attr;
     memset(&attr, 0, sizeof(ibv_qp_attr));
     attr.qp_state = IBV_QPS_RTR;
-    struct ibv_port_attr port_attr;
-    CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &port_attr))
-        << "Query port failed";
+
     // This assumes both QP's ports are configured with the same MTU
-    attr.path_mtu = port_attr.active_mtu;
+    attr.path_mtu = adapter_->params_.mtu;
     attr.dest_qp_num = remoteAddr.qpn;
     attr.rq_psn = remoteAddr.psn;
     attr.max_dest_rd_atomic = 1;
@@ -494,30 +819,32 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.ah_attr.grh.flow_label = 0;
     attr.ah_attr.grh.hop_limit = 255;
     attr.ah_attr.dlid = remoteAddr.lid;
-    attr.ah_attr.sl = 0;
+    attr.ah_attr.sl = adapter_->params_.sl;
     attr.ah_attr.src_path_bits = 0;
-    attr.ah_attr.port_num = 1;
+    attr.ah_attr.port_num = adapter_->params_.port_num;
+    attr.ah_attr.grh.sgid_index = adapter_->params_.sgid_index;
+    attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class;
 
     int r;
-    CHECK(!(r = ibv_modify_qp(qp_, &attr,
-                              IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU |
-                                  IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
-                                  IBV_QP_MAX_DEST_RD_ATOMIC |
-                                  IBV_QP_MIN_RNR_TIMER)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV |
+                                              IBV_QP_PATH_MTU |
+                                              IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
+                                              IBV_QP_MAX_DEST_RD_ATOMIC |
+                                              IBV_QP_MIN_RNR_TIMER)))
         << "QP to Ready to Receive " << r;
 
     memset(&attr, 0, sizeof(ibv_qp_attr));
     attr.qp_state = IBV_QPS_RTS;
     attr.sq_psn = self_.psn;
-    attr.timeout = 14;
-    attr.retry_cnt = 7;
+    attr.timeout = adapter_->params_.timeout;
+    attr.retry_cnt = adapter_->params_.retry_cnt;
     attr.rnr_retry = 7; /* infinite */
     attr.max_rd_atomic = 1;
 
-    CHECK(!(r = ibv_modify_qp(qp_, &attr,
-                              IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT |
-                                  IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
-                                  IBV_QP_MAX_QP_RD_ATOMIC)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT |
+                                              IBV_QP_RETRY_CNT |
+                                              IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
+                                              IBV_QP_MAX_QP_RD_ATOMIC)))
         << "QP to Ready to Send " << r;
 
     connected_ = true;
@@ -604,7 +931,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) {
 
   struct ibv_send_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t)this;
+  wr.wr_id = (uint64_t) this;
   wr.sg_list = &list;
   wr.num_sge = 1;
   wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
@@ -699,9 +1026,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
     TensorProto proto;
     if (src_dev->tensorflow_gpu_device_info() &&
         (!send_args.alloc_attrs.on_host())) {
-      CHECK(send_args.device_context)
-          << "send dev name: " << src_dev->name()
-          << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
+      CHECK(send_args.device_context) << "send dev name: " << src_dev->name()
+                                      << " gpu_info: "
+                                      << src_dev->tensorflow_gpu_device_info();
 
       if (can_memcpy) {
         AllocatorAttributes host_alloc_attrs;
@@ -727,8 +1054,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
         // aync instead
         GPUUtil::SetProtoFromGPU(
             in, src_dev, send_args.device_context, &proto, is_dead,
-            [this, proto, buffer_size, key, in, step_id, key_with_step_id,
-             is_dead, send_args, recv_args](const Status& s) mutable {
+	    [this, proto, buffer_size, key, in, step_id, key_with_step_id,
+            is_dead, send_args, recv_args](const Status& s) mutable {
               CHECK(s.ok()) << "copy proto from gpu sync";
               auto tensor_bytes = proto.ByteSize();
               buffer_size += tensor_bytes;
diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h
index e1e07db776..52d92a7c5b 100644
--- a/tensorflow/contrib/verbs/rdma.h
+++ b/tensorflow/contrib/verbs/rdma.h
@@ -36,7 +36,24 @@ limitations under the License.
 #include "tensorflow/core/platform/mutex.h"
 
 namespace tensorflow {
-
+#define PKEY_DEFAULT 0
+#define QUEUE_DEPTH_DEFAULT 1024
+#define TIMEOUT_DEFAULT 14
+#define RETRY_CNT_DEFAULT 7
+#define SL_DEFAULT 0
+#define TRAFFIC_CLASS 0
+
+struct RdmaParams {
+  uint8_t port_num;
+  uint8_t sgid_index;
+  uint8_t pkey_index;
+  uint32_t queue_depth;
+  uint8_t timeout;
+  uint8_t retry_cnt;
+  uint8_t sl;
+  enum ibv_mtu mtu;
+  uint8_t traffic_class;
+};
 // structure to save the address of remote channels.
 struct RdmaAddress {
   uint32_t lid;
@@ -50,9 +67,20 @@ struct RemoteMR {
   uint64_t remote_addr;
   uint32_t rkey;
 };
-enum BufferStatus { none, idle, busy };
-enum Location { local, remote };
-enum BufferType { ACK, MESSAGE, TENSOR };
+enum BufferStatus {
+  none,
+  idle,
+  busy
+};
+enum Location {
+  local,
+  remote
+};
+enum BufferType {
+  ACK,
+  MESSAGE,
+  TENSOR
+};
 enum RdmaMessageType {
   RDMA_MESSAGE_ACK,
   RDMA_MESSAGE_BUFFER_IDLE,
@@ -84,6 +112,8 @@ class RdmaAdapter {
  protected:
   static const int MAX_CONCURRENT_WRITES = 1000;
   ibv_context* context_;
+  // RDMA configuration parameters
+  RdmaParams params_;
   // ibverbs protection domain
   ibv_pd* pd_;
   // Completion event channel, to wait for work completions
@@ -183,7 +213,7 @@ class RdmaBuffer {
   }
   void FreeBuffer();
   void EnqueueItem(string Item);
-  virtual void SendNextItem(){};
+  virtual void SendNextItem() {};
   void CreateCPUBuffer(size_t size, bool lock = true);
   void SetRemoteMR(RemoteMR rmi, bool override);
   uint32_t LookupBufferIndex(const string& buffer_name) {
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index d71f314e11..30ff4ef358 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2710,6 +2710,7 @@ tf_cc_test_mkl(
     srcs = [
         "graph/mkl_layout_pass_test.cc",
         "graph/mkl_tfconversion_pass_test.cc",
+        "util/mkl_util_test.cc",
     ],
     linkstatic = 1,
     deps = [
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 53e80b1ee3..63b74e8dbf 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -81,7 +81,7 @@ class MklCPUAllocator : public Allocator {
       }
 #if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
       if (user_val > max_mem_bytes) {
-        LOG(WARNING) << "The user specifed a memory limit " << kMaxLimitStr
+        LOG(WARNING) << "The user specified a memory limit " << kMaxLimitStr
                      << "=" << user_val
                      << " greater than available physical memory: "
                      << max_mem_bytes
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h
index 9caa076c72..cc272d156e 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.h
@@ -46,8 +46,8 @@ class GSYCLInterface {
 
     if (!found_device) {
       // Currently Intel GPU is not supported
-      LOG(WARNING) << "No OpenCL GPU found that is supported by ComputeCpp, "
-                      "trying OpenCL CPU";
+      LOG(WARNING) << "No OpenCL GPU found that is supported by "
+                   << "ComputeCpp/triSYCL, trying OpenCL CPU";
     }
 
     for (const auto& device : device_list) {
@@ -58,10 +58,24 @@ class GSYCLInterface {
       }
     }
 
+    if (!found_device) {
+      LOG(WARNING) << "No OpenCL CPU found that is supported by "
+                   << "ComputeCpp/triSYCL, checking for host sycl device";
+    }
+
+    for (const auto& device : device_list) {
+      // triSYCL only supports the host device for now
+      if (device.is_host()) {
+        LOG(WARNING) << "Found SYCL host device";
+        AddDevice(device);
+        found_device = true;
+      }
+    }
+
     if (!found_device) {
       // Currently Intel GPU is not supported
-      LOG(FATAL)
-          << "No OpenCL GPU nor CPU found that is supported by ComputeCpp";
+      LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU"
+                 << " supported by ComputeCPP/triSYCL was found";
     } else {
       LOG(INFO) << "Found following OpenCL devices:";
       for (int i = 0; i < device_list.size(); i++) {
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 87c41186d5..fd1b5d33b9 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -453,6 +453,21 @@ const Edge* Graph::AddControlEdge(Node* source, Node* dest,
   return AddEdge(source, kControlSlot, dest, kControlSlot);
 }
 
+void Graph::RemoveControlEdge(const Edge* e) {
+  if (!e->src_->IsSource() && !e->dst_->IsSink()) {
+    e->dst_->MaybeCopyOnWrite();
+    std::string e_src_name = strings::StrCat("^", e->src_->name());
+    auto* inputs = e->dst_->props_->node_def.mutable_input();
+    for (auto it = inputs->begin(); it != inputs->end(); ++it) {
+      if (*it == e_src_name) {
+        inputs->erase(it);
+        break;
+      }
+    }
+  }
+  RemoveEdge(e);
+}
+
 Status Graph::UpdateEdge(Node* new_src, int new_src_index, Node* dst,
                          int dst_index) {
   TF_RETURN_IF_ERROR(IsValidOutputTensor(new_src, new_src_index));
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index c5dde722fa..d0dba6e1f0 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -451,6 +451,11 @@ class Graph {
   // REQUIRES: The edge must exist.
   void RemoveEdge(const Edge* edge);
 
+  // Removes control edge `edge` from the graph. Note that this also updates
+  // the corresponding NodeDef to reflect the change.
+  // REQUIRES: The control edge must exist.
+  void RemoveControlEdge(const Edge* e);
+  
   // Updates the input to a node.  The existing edge to `dst` is removed and an
   // edge from `new_src` to `dst` is created. The NodeDef associated with `dst`
   // is also updated.
diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index b9e3cba035..1924c05d3d 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -117,7 +117,7 @@ DataType EdgeType(const Edge* e) {
   }
 }
 
-// Return true iff we need to add a same device send/recv for 'edge'.
+// Return true iff we need to add the same device send/recv for 'edge'.
 bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) {
   if (edge->IsControlEdge()) {
     return false;
@@ -1116,7 +1116,7 @@ Status Partition(const PartitionOptions& opts, Graph* g,
         // before the data is available.
         AddInput(real_recv, send->name(), Graph::kControlSlot);
       } else if (control_flow_edge != nullptr) {
-        // Redirect control edge to the real recv since this is not a same
+        // Redirect control edge to the real recv since this is not the same
         // device send/recv.
         --num_control_flow_edges;
         AddInput(real_recv, control_flow_edge->src()->name(),
diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc
index 7686cef219..2aa1b31e15 100644
--- a/tensorflow/core/graph/graph_test.cc
+++ b/tensorflow/core/graph/graph_test.cc
@@ -118,6 +118,25 @@ class GraphTest : public ::testing::Test {
     LOG(FATAL) << name;
   }
 
+  bool ControlEdgeExistsInGraphOrNodeDef(const Node* src,
+                                         const Node* dst) {
+    for (const Edge *e : dst->in_edges()) {
+      if (e->IsControlEdge() &&
+          e->src() == src &&
+          e->src_output() == Graph::kControlSlot &&
+          e->dst_input() == Graph::kControlSlot) {
+        return true;
+      }
+    }
+    std::string control_edge_name = strings::StrCat("^", src->name());
+    for (int i = 0; i < dst->def().input_size(); ++i) {
+      if (dst->def().input(i) == control_edge_name) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   Graph graph_;
 
  private:
@@ -458,8 +477,8 @@ TEST_F(GraphTest, AddControlEdge) {
   EXPECT_TRUE(edge == nullptr);
   EXPECT_EQ(b->def().input_size(), 2);
 
-  // Can add redundant control edge with create_duplicate.
-  edge = graph_.AddControlEdge(a, b, /*create_duplicate=*/true);
+  // Can add redundant control edge with allow_duplicates.
+  edge = graph_.AddControlEdge(a, b, /*allow_duplicates=*/true);
   EXPECT_TRUE(edge != nullptr);
   // create_duplicate causes the NodeDef not to be updated.
   ASSERT_EQ(b->def().input_size(), 2);
@@ -477,6 +496,47 @@ TEST_F(GraphTest, AddControlEdge) {
   EXPECT_EQ(b->def().input_size(), 2);
 }
 
+TEST_F(GraphTest, RemoveControlEdge) {
+  FromGraphDef(
+      "node { name: 'A' op: 'OneOutput' }"
+      "node { name: 'B' op: 'OneInputTwoOutputs' input: [ 'A:0' ] }"
+      "node { name: 'C' op: 'NoOp' } ");
+  Node* a = FindNode("A");
+  Node* b = FindNode("B");
+  Node* c = FindNode("C");
+
+  // Add a control edge.
+  const Edge* edge_1 = graph_.AddControlEdge(c, a);
+  const Edge* edge_2 = graph_.AddControlEdge(a, b);
+  ASSERT_TRUE(edge_1 != nullptr);
+  ASSERT_TRUE(edge_2 != nullptr);
+
+  ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(c, a));
+  ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b));
+
+  graph_.RemoveControlEdge(edge_1);
+  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a));
+  ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b));
+
+  graph_.RemoveControlEdge(edge_2);
+  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a));
+  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(a, b));
+
+  // Test removing a duplicate control edge.
+  // Note that unless allow_duplicates is true, the duplicate edge
+  // will not be added. That's why we expect edge_4 to be a null
+  // pointer. We are not testing with allow_duplicates set to true,
+  // as that is a highly unlikely use case that does not make much
+  // sense.
+  const Edge* edge_3 = graph_.AddControlEdge(c, a);
+  const Edge* edge_4 = graph_.AddControlEdge(c, a);
+  ASSERT_TRUE(edge_3 != nullptr);
+  ASSERT_TRUE(edge_4 == nullptr);
+
+  graph_.RemoveControlEdge(edge_3);
+  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a));
+}
+
 TEST_F(GraphTest, UpdateEdge) {
   // Build a little graph
   Node* a = FromNodeDef("A", "OneOutput", 0);
diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index cb32d64334..880e4e712e 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -21,107 +21,108 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 
 namespace tensorflow {
-// Since our ops are going to produce and also consume N addition tensors
-// (Mkl) for N Tensorflow tensors, we can have following different
-// orderings among these 2N tensors.
-//
-// E.g., for Tensorflow tensors A, B, and C, our ops will produce and
-// consume A_m, B_m, and C_m additionally.
-//
-// INTERLEAVED: in this case 2N tensors are interleaved. So for above
-//              example, the ordering looks like: A, A_m, B, B_m, C, C_m.
-//
-// CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed
-//             by N Mkl tensors. So for above example, the ordering looks
-//             like: A, B, C, A_m, B_m, C_m
-//
-// Following APIs map index of original Tensorflow tensors to their
-// appropriate position based on selected ordering. For contiguous ordering,
-// we need to know the total number of tensors (parameter total).
-//
-typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering;
-// NOTE: Currently, we use contiguous ordering. If you change this, then you
-// would need to change Mkl op definitions in nn_ops.cc.
-static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS;
+  // Since our ops are going to produce and also consume N addition tensors
+  // (Mkl) for N Tensorflow tensors, we can have following different
+  // orderings among these 2N tensors.
+  //
+  // E.g., for Tensorflow tensors A, B, and C, our ops will produce and
+  // consume A_m, B_m, and C_m additionally.
+  //
+  // INTERLEAVED: in this case 2N tensors are interleaved. So for above
+  //              example, the ordering looks like: A, A_m, B, B_m, C, C_m.
+  //
+  // CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed
+  //             by N Mkl tensors. So for above example, the ordering looks
+  //             like: A, B, C, A_m, B_m, C_m
+  //
+  // Following APIs map index of original Tensorflow tensors to their
+  // appropriate position based on selected ordering. For contiguous ordering,
+  // we need to know the total number of tensors (parameter total).
+  //
+  typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering;
+  // NOTE: Currently, we use contiguous ordering. If you change this, then you
+  // would need to change Mkl op definitions in nn_ops.cc.
+  static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS;
 
-// Get index of MetaData tensor from index 'n' of Data tensor.
-inline int DataIndexToMetaDataIndex(int n, int total_tensors) {
-  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-    // For interleaved ordering, Mkl tensor follows immediately after
-    // Tensorflow tensor.
-    return n + 1;
-  } else {
-    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-    // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away.
-    return n + total_tensors / 2;
+  // Get index of MetaData tensor from index 'n' of Data tensor.
+  inline int DataIndexToMetaDataIndex(int n, int total_tensors) {
+    if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
+      // For interleaved ordering, Mkl tensor follows immediately after
+      // Tensorflow tensor.
+      return n + 1;
+    } else {
+      CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+      // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away.
+      return n + total_tensors / 2;
+    }
   }
-}
 
-int inline GetTensorDataIndex(int n, int total_tensors) {
-  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-    return 2 * n;  // index corresponding to nth input/output tensor
-  } else {
-    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-    return n;
-  }
-}
+  int inline GetTensorDataIndex(int n, int total_tensors) {
+      if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
+        return 2 * n;  // index corresponding to nth input/output tensor
+      } else {
+        CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+        return n;
+      }
+    }
 
-int inline GetTensorMetaDataIndex(int n, int total_tensors) {
-  // Get index for TensorData first and then use mapping function
-  // to get TensorMetaData index from TensorData index.
-  int tidx = GetTensorDataIndex(n, total_tensors);
-  return DataIndexToMetaDataIndex(tidx, total_tensors);
-}
+  int inline GetTensorMetaDataIndex(int n, int total_tensors) {
+      // Get index for TensorData first and then use mapping function
+      // to get TensorMetaData index from TensorData index.
+      int tidx = GetTensorDataIndex(n, total_tensors);
+      return DataIndexToMetaDataIndex(tidx, total_tensors);
+    }
 
 namespace mkl_op_registry {
-static const char* kMklOpLabel = "MklOp";
-static const char* kMklOpLabelPattern = "label='MklOp'";
-
-// Get the name of Mkl op from original TensorFlow op
-// We prefix 'Mkl' to the original op to get Mkl op.
-inline string GetMklOpName(const string& name) {
-  // Prefix that we add to Tensorflow op name to construct Mkl op name.
-  const char* const kMklOpPrefix = "_Mkl";
-  return string(kMklOpPrefix) + name;
-}
+  static const char* kMklOpLabel = "MklOp";
+  static const char* kMklOpLabelPattern = "label='MklOp'";
 
-// Check whether opname with type T is registered as MKL-compliant.
-//
-// @input: name of the op
-// @input: T datatype to be used for checking op
-// @return: true if opname is registered as Mkl op; false otherwise
-static inline bool IsMklOp(const std::string& op_name, DataType T) {
-  string kernel = KernelsRegisteredForOp(op_name);
-  bool result =
-      kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
-  if (result) {
-    VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
+  // Get the name of Mkl op from original TensorFlow op
+  // We prefix 'Mkl' to the original op to get Mkl op.
+  inline string GetMklOpName(const string& name) {
+    // Prefix that we add to Tensorflow op name to construct Mkl op name.
+    const char* const kMklOpPrefix = "_Mkl";
+    return string(kMklOpPrefix) + name;
   }
-  return result;
-}
 
-// Check whether opname with type T is registered as MKL-compliant and
-// is element-wise.
-//
-// @input: name of the op
-// @input: T datatype to be used for checking op
-// @return: true if opname is registered as element-wise Mkl op;
-// false otherwise
-static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) {
-  if (!IsMklOp(op_name, T)) {
-    return false;
+  // Check whether opname with type T is registered as MKL-compliant.
+  //
+  // @input: name of the op
+  // @input: T datatype to be used for checking op
+  // @return: true if opname is registered as Mkl op; false otherwise
+  static inline bool IsMklOp(const std::string& op_name, DataType T) {
+    string kernel = KernelsRegisteredForOp(op_name);
+    bool result =
+        kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
+    if (result) {
+      VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
+    }
+    return result;
   }
 
-  bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
-                 0 == op_name.compare(GetMklOpName("Sub")) ||
-                 0 == op_name.compare(GetMklOpName("Mul")) ||
-                 0 == op_name.compare(GetMklOpName("Maximum")) ||
-                 0 == op_name.compare(GetMklOpName("SquaredDifference")));
+  // Check whether opname with type T is registered as MKL-compliant and
+  // is element-wise.
+  //
+  // @input: name of the op
+  // @input: T datatype to be used for checking op
+  // @return: true if opname is registered as element-wise Mkl op;
+  // false otherwise
+  static inline bool IsMklElementWiseOp(const std::string& op_name,
+    DataType T) {
+    if (!IsMklOp(op_name, T)) {
+      return false;
+    }
 
-  VLOG(1) << "mkl_op_registry::" << op_name
-          << " is elementwise MKL op: " << result;
-  return result;
-}
+    bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
+                    0 == op_name.compare(GetMklOpName("Sub")) ||
+                    0 == op_name.compare(GetMklOpName("Mul")) ||
+                    0 == op_name.compare(GetMklOpName("Maximum")) ||
+                    0 == op_name.compare(GetMklOpName("SquaredDifference")));
+
+    VLOG(1) << "mkl_op_registry::" << op_name
+            << " is elementwise MKL op: " << result;
+    return result;
+  }
 }  // namespace mkl_op_registry
 }  // namespace tensorflow
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index f4c9073dee..912075aa28 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -37,8 +37,8 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/tensor_format.h"
 
-#include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_layout_pass.h"
+#include "tensorflow/core/graph/mkl_graph_util.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index fe4588389e..599bb88f01 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -33,8 +33,8 @@ limitations under the License.
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
 
-#include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_tfconversion_pass.h"
+#include "tensorflow/core/graph/mkl_graph_util.h"
 
 namespace tensorflow {
 
@@ -68,7 +68,7 @@ namespace tensorflow {
 // take place before we hit the op. For this, we add a new op before each
 // element-wise MKL op to deal with the inputs, called _MklInputConversion.
 // This pass has been enhanced to add this capability.
-// 
+//
 // The _MklInputConversion op will check the inputs to the elementwise op and
 // make sure that either both are in MKL format or both are in TF format,
 // depending on their initial state and whether broadcast is needed or not.
diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index ee279b7e0a..239b5ac244 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -58,6 +58,12 @@ class GraphProperties {
   const std::vector<OpInfo::TensorProperties>& GetOutputProperties(
       const string& node_name) const;
 
+  static void FillTensorPropertiesFromContext(
+      const shape_inference::ShapeHandle&, const DataType&,
+      shape_inference::InferenceContext*,
+      std::unordered_map<const shape_inference::Dimension*, int>* dim_ids,
+      OpInfo::TensorProperties*);
+
  private:
   // Inputs
   GrapplerItem item_;
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 7fd1876371..9ab889beb5 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -62,7 +62,7 @@ const std::set<NodeDef*>& NodeMap::GetOutputs(const string& node_name) const {
 void NodeMap::AddNode(const string& name, NodeDef* node) {
   auto ret = nodes_.insert(std::make_pair(name, node));
   CHECK(ret.second) << "Pair (" << name << "," << node
-                    << ") is not inserted because a same key already exists.";
+                    << ") is not inserted because the same key already exists.";
 }
 
 void NodeMap::AddOutput(const string& node_name, const string& output_name) {
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index d7b457eab7..f1cb9a1860 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -929,6 +929,25 @@ tf_cc_test(
     ],
 )
 
+tf_cuda_cc_test(
+    name = "bincount_op_test",
+    size = "small",
+    srcs = ["bincount_op_test.cc"],
+    deps = [
+        ":bincount_op",
+        ":ops_testutil",
+        ":ops_util",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_cuda_cc_test(
     name = "constant_op_test",
     size = "small",
@@ -1617,7 +1636,10 @@ DYNAMIC_DEPS = [
 tf_kernel_library(
     name = "dynamic_partition_op",
     prefix = "dynamic_partition_op",
-    deps = DYNAMIC_DEPS,
+    deps = DYNAMIC_DEPS + [
+        ":fill_functor",
+        ":gather_functor",
+    ] + if_cuda(["@cub_archive//:cub"]),
 )
 
 tf_kernel_library(
@@ -1687,7 +1709,7 @@ tf_kernel_library(
     ],
 )
 
-tf_cc_tests(
+tf_cuda_cc_tests(
     name = "dynamic_op_test",
     size = "small",
     srcs = [
@@ -2572,8 +2594,9 @@ tf_kernel_library(
 
 tf_kernel_library(
     name = "bucketize_op",
+    gpu_srcs = ["cuda_device_array.h"],
     prefix = "bucketize_op",
-    deps = MATH_DEPS,
+    deps = ARRAY_DEPS,
 )
 
 tf_kernel_library(
@@ -3174,7 +3197,7 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//third_party/eigen3",
-    ],
+    ] + if_cuda(["@cub_archive//:cub"]),
 )
 
 tf_kernel_library(
diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc
index af629d0de8..f918023693 100644
--- a/tensorflow/core/kernels/avgpooling_op.cc
+++ b/tensorflow/core/kernels/avgpooling_op.cc
@@ -153,7 +153,8 @@ class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> {
     if (data_format_ == FORMAT_NCHW) {
       DnnPoolingOp<T>::Compute(
           context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
-          stride_, padding_, data_format_, tensor_in, output_shape);
+          stride_, padding_, data_format_, tensor_in, output_shape,
+          /*propagate_nans=*/false);
     } else {
       Tensor* output = nullptr;
       OP_REQUIRES_OK(context,
@@ -408,7 +409,7 @@ class AvgPoolingGradOp<GPUDevice, T> : public OpKernel {
     DnnPoolingGradOp<T>::Compute(
         context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
         stride_, padding_, data_format_, nullptr, nullptr, out_backprop,
-        output_shape);
+        output_shape, /*propagate_nans=*/false);
   }
 
  private:
@@ -532,7 +533,7 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
       DnnPoolingGradOp<T>::Compute(
           context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
           stride_, padding_, data_format_, nullptr, nullptr, out_backprop,
-          output_shape);
+          output_shape, /*propagate_nans=*/false);
     }
   }
 
diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc
index 1cd5943ef3..766d63e3be 100644
--- a/tensorflow/core/kernels/bincount_op.cc
+++ b/tensorflow/core/kernels/bincount_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
+#include "tensorflow/core/kernels/bincount_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/types.h"
@@ -27,46 +28,37 @@ namespace tensorflow {
 
 using thread::ThreadPool;
 
-template <typename T>
-class BincountOp : public OpKernel {
- public:
-  explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
 
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& arr_t = ctx->input(0);
-    const Tensor& size_tensor = ctx->input(1);
-    const Tensor& weights_t = ctx->input(2);
-    int32 size = size_tensor.scalar<int32>()();
-    OP_REQUIRES(
-        ctx, size >= 0,
-        errors::InvalidArgument("size (", size, ") must be non-negative"));
-    const bool has_weights = weights_t.NumElements() > 0;
-    OP_REQUIRES(ctx, !(has_weights && arr_t.shape() != weights_t.shape()),
-                errors::InvalidArgument(
-                    "If weights are passed, they must have the same shape (" +
-                    weights_t.shape().DebugString() + ") as arr (" +
-                    arr_t.shape().DebugString() + ")"));
-    const auto arr = arr_t.flat<int32>();
-    const auto weights = weights_t.flat<T>();
+namespace functor {
+
+template <typename T>
+struct BincountFunctor<CPUDevice, T> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<int32, 1>::ConstTensor& arr,
+                        const typename TTypes<T, 1>::ConstTensor& weights,
+                        typename TTypes<T, 1>::Tensor& output) {
+    int size = output.size();
 
     Tensor all_nonneg_t;
-    OP_REQUIRES_OK(ctx,
-                   ctx->allocate_temp(DT_BOOL, TensorShape({}), &all_nonneg_t,
-                                      AllocatorAttributes()));
-    all_nonneg_t.scalar<bool>().device(ctx->eigen_cpu_device()) =
+    TF_RETURN_IF_ERROR(context->allocate_temp(
+        DT_BOOL, TensorShape({}), &all_nonneg_t, AllocatorAttributes()));
+    all_nonneg_t.scalar<bool>().device(context->eigen_cpu_device()) =
         (arr >= 0).all();
-    OP_REQUIRES(ctx, all_nonneg_t.scalar<bool>()(),
-                errors::InvalidArgument("Input arr must be non-negative!"));
+    if (!all_nonneg_t.scalar<bool>()()) {
+      return errors::InvalidArgument("Input arr must be non-negative!");
+    }
 
     // Allocate partial output bin sums for each worker thread. Worker ids in
     // ParallelForWithWorkerId range from 0 to NumThreads() inclusive.
     ThreadPool* thread_pool =
-        ctx->device()->tensorflow_cpu_worker_threads()->workers;
+        context->device()->tensorflow_cpu_worker_threads()->workers;
     const int64 num_threads = thread_pool->NumThreads() + 1;
     Tensor partial_bins_t;
-    OP_REQUIRES_OK(ctx, ctx->allocate_temp(weights_t.dtype(),
-                                           TensorShape({num_threads, size}),
-                                           &partial_bins_t));
+    TF_RETURN_IF_ERROR(context->allocate_temp(DataTypeToEnum<T>::value,
+                                              TensorShape({num_threads, size}),
+                                              &partial_bins_t));
     auto partial_bins = partial_bins_t.matrix<T>();
     partial_bins.setZero();
     thread_pool->ParallelForWithWorkerId(
@@ -75,7 +67,7 @@ class BincountOp : public OpKernel {
           for (int64 i = start_ind; i < limit_ind; i++) {
             int32 value = arr(i);
             if (value < size) {
-              if (has_weights) {
+              if (weights.size()) {
                 partial_bins(worker_id, value) += weights(i);
               } else {
                 // Complex numbers don't support "++".
@@ -84,25 +76,62 @@ class BincountOp : public OpKernel {
             }
           }
         });
-    TensorShape output_shape({size});
-    Tensor* output_t;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_t));
+
     // Sum the partial bins along the 0th axis.
     Eigen::array<int, 1> reduce_dims({0});
-    output_t->flat<T>().device(ctx->eigen_cpu_device()) =
-        partial_bins.sum(reduce_dims);
+    output.device(context->eigen_cpu_device()) = partial_bins.sum(reduce_dims);
+    return Status::OK();
+  }
+};
+
+}  // namespace functor
+
+template <typename Device, typename T>
+class BincountOp : public OpKernel {
+ public:
+  explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& arr_t = ctx->input(0);
+    const Tensor& size_tensor = ctx->input(1);
+    const Tensor& weights_t = ctx->input(2);
+
+    int32 size = size_tensor.scalar<int32>()();
+    OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument(
+                                    "size (", size, ") must be non-negative"));
+
+    const auto arr = arr_t.flat<int32>();
+    const auto weights = weights_t.flat<T>();
+    Tensor* output_t;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(0, TensorShape({size}), &output_t));
+    auto output = output_t->flat<T>();
+    OP_REQUIRES_OK(ctx, functor::BincountFunctor<Device, T>::Compute(
+                            ctx, arr, weights, output));
   }
 };
 
-#define REGISTER(TYPE)                                               \
+#define REGISTER_KERNELS(type)                                       \
   REGISTER_KERNEL_BUILDER(                                           \
-      Name("Bincount").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
-      BincountOp<TYPE>)
+      Name("Bincount").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
+      BincountOp<CPUDevice, type>)
+
+TF_CALL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#if GOOGLE_CUDA
+
+#define REGISTER_KERNELS(type)                            \
+  REGISTER_KERNEL_BUILDER(Name("Bincount")                \
+                              .Device(DEVICE_GPU)         \
+                              .HostMemory("size")         \
+                              .TypeConstraint<type>("T"), \
+                          BincountOp<GPUDevice, type>)
 
-TF_CALL_NUMBER_TYPES(REGISTER);
+TF_CALL_int32(REGISTER_KERNELS);
+TF_CALL_float(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
 
-// TODO(ringwalt): Add a GPU implementation. We probably want to take a
-// different approach, e.g. threads in a warp each taking a pass over the same
-// data, and each thread summing a single bin.
+#endif  // GOOGLE_CUDA
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h
new file mode 100644
index 0000000000..0f8dd2b82a
--- /dev/null
+++ b/tensorflow/core/kernels/bincount_op.h
@@ -0,0 +1,41 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_BINCOUNT_OP_H_
+#define TENSORFLOW_BINCOUNT_OP_H_
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+
+namespace functor {
+
+template <typename Device, typename T>
+struct BincountFunctor {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<int32, 1>::ConstTensor& arr,
+                        const typename TTypes<T, 1>::ConstTensor& weights,
+                        typename TTypes<T, 1>::Tensor& output);
+};
+
+}  // end namespace functor
+
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_BINCOUNT_OP_H_
diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
new file mode 100644
index 0000000000..ae9e26ffdf
--- /dev/null
+++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
@@ -0,0 +1,114 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/kernels/bincount_op.h"
+#include "external/cub_archive/cub/device/device_histogram.cuh"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
+template <typename T>
+struct BincountFunctor<GPUDevice, T> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<int32, 1>::ConstTensor& arr,
+                        const typename TTypes<T, 1>::ConstTensor& weights,
+                        typename TTypes<T, 1>::Tensor& output) {
+    if (weights.size() != 0) {
+      return errors::InvalidArgument(
+          "Weights should not be passed as it should be "
+          "handled by unsorted_segment_sum");
+    }
+    if (output.size() == 0) {
+      return Status::OK();
+    }
+    // In case weight.size() == 0, use CUB
+    size_t temp_storage_bytes = 0;
+    const int32* d_samples = arr.data();
+    T* d_histogram = output.data();
+    int num_levels = output.size() + 1;
+    int32 lower_level = 0;
+    int32 upper_level = output.size();
+    int num_samples = arr.size();
+    const cudaStream_t& stream = GetCudaStream(context);
+
+    // The first HistogramEven is to obtain the temp storage size required
+    // with d_temp_storage = NULL passed to the call.
+    auto err = cub::DeviceHistogram::HistogramEven(
+        /* d_temp_storage */ NULL,
+        /* temp_storage_bytes */ temp_storage_bytes,
+        /* d_samples */ d_samples,
+        /* d_histogram */ d_histogram,
+        /* num_levels */ num_levels,
+        /* lower_level */ lower_level,
+        /* upper_level */ upper_level,
+        /* num_samples */ num_samples,
+        /* stream */ stream);
+    if (err != cudaSuccess) {
+      return errors::Internal(
+          "Could not launch HistogramEven to get temp storage: ",
+          cudaGetErrorString(err), ".");
+    }
+    Tensor temp_storage;
+    TF_RETURN_IF_ERROR(context->allocate_temp(
+        DataTypeToEnum<int8>::value,
+        TensorShape({static_cast<int64>(temp_storage_bytes)}), &temp_storage));
+
+    void* d_temp_storage = temp_storage.flat<int8>().data();
+    // The second HistogramEven is to actual run with d_temp_storage
+    // allocated with temp_storage_bytes.
+    err = cub::DeviceHistogram::HistogramEven(
+        /* d_temp_storage */ d_temp_storage,
+        /* temp_storage_bytes */ temp_storage_bytes,
+        /* d_samples */ d_samples,
+        /* d_histogram */ d_histogram,
+        /* num_levels */ num_levels,
+        /* lower_level */ lower_level,
+        /* upper_level */ upper_level,
+        /* num_samples */ num_samples,
+        /* stream */ stream);
+    if (err != cudaSuccess) {
+      return errors::Internal("Could not launch HistogramEven: ",
+                              cudaGetErrorString(err), ".");
+    }
+    return Status::OK();
+  }
+};
+
+}  // end namespace functor
+
+#define REGISTER_GPU_SPEC(type) \
+  template struct functor::BincountFunctor<GPUDevice, type>;
+
+TF_CALL_int32(REGISTER_GPU_SPEC);
+TF_CALL_float(REGISTER_GPU_SPEC);
+#undef REGISTER_GPU_SPEC
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc
new file mode 100644
index 0000000000..14becc87a7
--- /dev/null
+++ b/tensorflow/core/kernels/bincount_op_test.cc
@@ -0,0 +1,75 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace tensorflow {
+
+static Graph* Bincount(int arr_size, int nbins) {
+  Graph* g = new Graph(OpRegistry::Global());
+
+  Tensor arr(DT_INT32, TensorShape({arr_size}));
+  arr.flat<int32>() = arr.flat<int32>().setRandom().abs();
+
+  Tensor size(DT_INT32, TensorShape({(int32)1}));
+  size.flat<int32>()(0) = (int32)nbins;
+
+  Tensor weights(DT_INT32, TensorShape({0}));
+
+  Node* node;
+  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Bincount")
+                  .Input(test::graph::Constant(g, arr))
+                  .Input(test::graph::Constant(g, size))
+                  .Input(test::graph::Constant(g, weights))
+                  .Attr("T", DT_INT32)
+                  .Finalize(g, &node));
+  return g;
+}
+
+#define BM_BincountDev(K, NBINS, type)                             \
+  static void BM_Bincount##_##type##_##K##_##NBINS(int iters) {    \
+    testing::ItemsProcessed(static_cast<int64>(iters) * K * 1024); \
+    test::Benchmark(#type, Bincount(K * 1024, NBINS)).Run(iters);  \
+  }                                                                \
+  BENCHMARK(BM_Bincount##_##type##_##K##_##NBINS);
+
+BM_BincountDev(32, 1000, cpu);
+BM_BincountDev(32, 2000, cpu);
+BM_BincountDev(32, 5000, cpu);
+BM_BincountDev(64, 1000, cpu);
+BM_BincountDev(64, 2000, cpu);
+BM_BincountDev(64, 5000, cpu);
+BM_BincountDev(128, 1000, cpu);
+BM_BincountDev(128, 2000, cpu);
+BM_BincountDev(128, 5000, cpu);
+
+BM_BincountDev(32, 1000, gpu);
+BM_BincountDev(32, 2000, gpu);
+BM_BincountDev(32, 5000, gpu);
+BM_BincountDev(64, 1000, gpu);
+BM_BincountDev(64, 2000, gpu);
+BM_BincountDev(64, 5000, gpu);
+BM_BincountDev(128, 1000, gpu);
+BM_BincountDev(128, 2000, gpu);
+BM_BincountDev(128, 5000, gpu);
+
+}  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/bucketize_op.cc b/tensorflow/core/kernels/bucketize_op.cc
index 93c2d01221..c1693de538 100644
--- a/tensorflow/core/kernels/bucketize_op.cc
+++ b/tensorflow/core/kernels/bucketize_op.cc
@@ -15,15 +15,43 @@ limitations under the License.
 
 // See docs in ../ops/math_ops.cc.
 
-#include <algorithm>
-#include <vector>
-
+#include "tensorflow/core/kernels/bucketize_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
+using thread::ThreadPool;
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
 template <typename T>
+struct BucketizeFunctor<CPUDevice, T> {
+  // PRECONDITION: boundaries_vector must be sorted.
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& input,
+                        const std::vector<float>& boundaries_vector,
+                        typename TTypes<int32, 1>::Tensor& output) {
+    const int N = input.size();
+    for (int i = 0; i < N; i++) {
+      auto first_bigger_it = std::upper_bound(
+          boundaries_vector.begin(), boundaries_vector.end(), input(i));
+      output(i) = first_bigger_it - boundaries_vector.begin();
+    }
+
+    return Status::OK();
+  }
+};
+}  // namespace functor
+
+template <typename Device, typename T>
 class BucketizeOp : public OpKernel {
  public:
   explicit BucketizeOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -34,36 +62,42 @@ class BucketizeOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input_tensor = context->input(0);
-    auto input = input_tensor.flat<T>();
+    const auto input = input_tensor.flat<T>();
+
     Tensor* output_tensor = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
                                                      &output_tensor));
     auto output = output_tensor->template flat<int32>();
-
-    const int N = input.size();
-    for (int i = 0; i < N; i++) {
-      output(i) = CalculateBucketIndex(input(i));
-    }
+    OP_REQUIRES_OK(context, functor::BucketizeFunctor<Device, T>::Compute(
+                                context, input, boundaries_, output));
   }
 
  private:
-  int32 CalculateBucketIndex(const T value) {
-    auto first_bigger_it =
-        std::upper_bound(boundaries_.begin(), boundaries_.end(), value);
-    return first_bigger_it - boundaries_.begin();
-  }
   std::vector<float> boundaries_;
 };
 
 #define REGISTER_KERNEL(T)                                         \
   REGISTER_KERNEL_BUILDER(                                         \
       Name("Bucketize").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-      BucketizeOp<T>);
+      BucketizeOp<CPUDevice, T>);
+
+REGISTER_KERNEL(int32);
+REGISTER_KERNEL(int64);
+REGISTER_KERNEL(float);
+REGISTER_KERNEL(double);
+#undef REGISTER_KERNEL
+
+#if GOOGLE_CUDA
+#define REGISTER_KERNEL(T)                                         \
+  REGISTER_KERNEL_BUILDER(                                         \
+      Name("Bucketize").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      BucketizeOp<GPUDevice, T>);
 
 REGISTER_KERNEL(int32);
 REGISTER_KERNEL(int64);
 REGISTER_KERNEL(float);
 REGISTER_KERNEL(double);
 #undef REGISTER_KERNEL
+#endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/bucketize_op.h b/tensorflow/core/kernels/bucketize_op.h
new file mode 100644
index 0000000000..c8e461beb9
--- /dev/null
+++ b/tensorflow/core/kernels/bucketize_op.h
@@ -0,0 +1,41 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_BUCKETIZE_OP_H_
+#define TENSORFLOW_BUCKETIZE_OP_H_
+
+#include <vector>
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace functor {
+
+template <typename Device, typename T>
+struct BucketizeFunctor {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& input,
+                        const std::vector<float>& boundaries_vector,
+                        typename TTypes<int32, 1>::Tensor& output);
+};
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_BUCKETIZE_OP_H_
diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
new file mode 100644
index 0000000000..aafbbe41b4
--- /dev/null
+++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
@@ -0,0 +1,101 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/bucketize_op.h"
+#include "tensorflow/core/kernels/cuda_device_array.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+template <typename T>
+__global__ void BucketizeCustomKernel(
+    const int32 size_in, const T* in, const int32 size_boundaries,
+    CudaDeviceArrayStruct<float> boundaries_array, int32* out) {
+  const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array);
+  CUDA_1D_KERNEL_LOOP(i, size_in) {
+    T value = in[i];
+    int32 bucket = 0;
+    int32 count = size_boundaries;
+    while (count > 0) {
+      int32 l = bucket;
+      int32 step = count / 2;
+      l += step;
+      if (!(value < static_cast<T>(boundaries[l]))) {
+        bucket = ++l;
+        count -= step + 1;
+      } else {
+        count = step;
+      }
+    }
+    out[i] = bucket;
+  }
+}
+
+namespace functor {
+
+template <typename T>
+struct BucketizeFunctor<GPUDevice, T> {
+  // PRECONDITION: boundaries_vector must be sorted.
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& input,
+                        const std::vector<float>& boundaries_vector,
+                        typename TTypes<int32, 1>::Tensor& output) {
+    const GPUDevice& d = context->eigen_device<GPUDevice>();
+
+    CudaDeviceArrayOnHost<float> boundaries_array(context,
+                                                  boundaries_vector.size());
+    TF_RETURN_IF_ERROR(boundaries_array.Init());
+    for (int i = 0; i < boundaries_vector.size(); ++i) {
+      boundaries_array.Set(i, boundaries_vector[i]);
+    }
+    TF_RETURN_IF_ERROR(boundaries_array.Finalize());
+
+    CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d);
+    BucketizeCustomKernel<
+        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+        input.size(), input.data(), boundaries_vector.size(),
+        boundaries_array.data(), output.data());
+
+    return Status::OK();
+  }
+};
+}  // namespace functor
+
+#define REGISTER_GPU_SPEC(type) \
+  template struct functor::BucketizeFunctor<GPUDevice, type>;
+
+REGISTER_GPU_SPEC(int32);
+REGISTER_GPU_SPEC(int64);
+REGISTER_GPU_SPEC(float);
+REGISTER_GPU_SPEC(double);
+#undef REGISTER_GPU_SPEC
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index 21f5cb1716..f819fccbfb 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -236,6 +236,7 @@ class Conv3DBackpropInputOp : public OpKernel {
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("Conv3DBackpropInputV2").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       Conv3DBackpropInputOp<CPUDevice, T>);
+TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -383,6 +384,7 @@ class Conv3DBackpropFilterOp : public OpKernel {
                               .Device(DEVICE_CPU)                             \
                               .TypeConstraint<T>("T"),                        \
                           Conv3DBackpropFilterOp<CPUDevice, T>);
+TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -409,6 +411,7 @@ namespace functor {
       const std::array<int, 3>& padding_right,                        \
       typename TTypes<T, 5, int>::Tensor out, TensorFormat format);
 
+DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 #undef DECLARE_GPU_SPEC
 }  // namespace functor
@@ -1098,22 +1101,29 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
   bool cudnn_use_autotune_;
 };
 
-REGISTER_KERNEL_BUILDER(
-    Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint<float>("T"),
-    Conv3DBackpropInputOp<GPUDevice, float>);
-REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<float>("T")
-                            .HostMemory("input_sizes"),
-                        Conv3DBackpropInputOp<GPUDevice, float>);
-REGISTER_KERNEL_BUILDER(
-    Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<float>("T"),
-    Conv3DBackpropFilterOp<GPUDevice, float>);
-REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<float>("T")
-                            .HostMemory("filter_sizes"),
-                        Conv3DBackpropFilterOp<GPUDevice, float>);
+
+
+#define REGISTER_GPU_KERNEL(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint<T>("T"),  \
+      Conv3DBackpropInputOp<GPUDevice, T>);                                   \
+  REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2")                       \
+                            .Device(DEVICE_GPU)                               \
+                            .TypeConstraint<T>("T")                           \
+                            .HostMemory("input_sizes"),                       \
+                        Conv3DBackpropInputOp<GPUDevice, T>);                 \
+  REGISTER_KERNEL_BUILDER(                                                    \
+    Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"),   \
+    Conv3DBackpropFilterOp<GPUDevice, T>);                                    \
+  REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2")                      \
+                            .Device(DEVICE_GPU)                               \
+                            .TypeConstraint<T>("T")                           \
+                            .HostMemory("filter_sizes"),                      \
+                        Conv3DBackpropFilterOp<GPUDevice, T>);
+TF_CALL_half(REGISTER_GPU_KERNEL);
+TF_CALL_float(REGISTER_GPU_KERNEL);
+#undef REGISTER_GPU_KERNEL
+     
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index 8a89d564de..37cb67bc51 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -145,6 +145,7 @@ class Conv3DOp : public BinaryOp<T> {
   REGISTER_KERNEL_BUILDER(                                      \
       Name("Conv3D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       Conv3DOp<CPUDevice, T>);
+TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -482,12 +483,16 @@ namespace functor {
       const std::array<int, 3>& padding_right,                        \
       typename TTypes<T, 5, int>::Tensor out, TensorFormat format);
 
+DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 #undef DECLARE_GPU_SPEC
 
 }  // namespace functor
 
 // Registration of the GPU implementations.
+REGISTER_KERNEL_BUILDER(
+    Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
+    Conv3DOp<GPUDevice, Eigen::half>);
 REGISTER_KERNEL_BUILDER(
     Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<float>("T"),
     Conv3DOp<GPUDevice, float>);
diff --git a/tensorflow/core/kernels/cwise_op_acosh.cc b/tensorflow/core/kernels/cwise_op_acosh.cc
index 7bdd8d22a3..39c8814073 100644
--- a/tensorflow/core/kernels/cwise_op_acosh.cc
+++ b/tensorflow/core/kernels/cwise_op_acosh.cc
@@ -20,16 +20,8 @@ namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "Acosh", functor::acosh, float, double,
           complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-                          Name("Acosh")                               \
-                          .Device(DEVICE_SYCL)                        \
-                          .TypeConstraint<TYPE>("T"),                 \
-                          UnaryOp<SYCLDevice, functor::acosh<TYPE>>);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-#undef REGISTER_SYCL_KERNEL
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER2(UnaryOp, SYCL, "Acosh", functor::acosh, float, double);
 #endif // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index e0644323c0..8d44208aa7 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -20,17 +20,9 @@ namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double,
           complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-                          Name("Asinh")                               \
-                          .Device(DEVICE_SYCL)                        \
-                          .TypeConstraint<TYPE>("T"),                 \
-                          UnaryOp<SYCLDevice, functor::asinh<TYPE>>);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-#undef REGISTER_SYCL_KERNEL
-#endif // TENSORFLOW_USE_SYC
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double);
+#endif // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc
index 058f5140c5..bbc69e45aa 100644
--- a/tensorflow/core/kernels/cwise_op_atanh.cc
+++ b/tensorflow/core/kernels/cwise_op_atanh.cc
@@ -20,17 +20,9 @@ namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double,
           complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-                          Name("Atanh")                               \
-                          .Device(DEVICE_SYCL)                        \
-                          .TypeConstraint<TYPE>("T"),                 \
-                          UnaryOp<SYCLDevice, functor::atanh<TYPE>>);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-#undef REGISTER_SYCL_KERNEL
-#endif // TENSORFLOW_USE_SYC
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double);
+#endif // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double);
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index 6c22b124de..d32185b6bf 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -49,7 +49,11 @@ template <typename T>
 struct scalar_asinh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const {
+#if EIGEN_HAS_CXX11_MATH
+    return numext::asinh(a);
+#else
     return std::asinh(a);
+#endif  // EIGEN_HAS_CXX11_MATH
   }
 };
 template <typename T>
@@ -61,7 +65,11 @@ template <typename T>
 struct scalar_acosh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_acosh_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const {
+#if EIGEN_HAS_CXX11_MATH
+    return numext::acosh(a);
+#else
     return std::acosh(a);
+#endif  // EIGEN_HAS_CXX11_MATH
   }
 };
 template <typename T>
@@ -73,7 +81,11 @@ template <typename T>
 struct scalar_atanh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_atanh_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const {
+#if EIGEN_HAS_CXX11_MATH
+    return numext::atanh(a);
+#else
     return std::atanh(a);
+#endif  // EIGEN_HAS_CXX11_MATH
   }
 };
 template <typename T>
diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
index 9804d7d38e..53d65a22d1 100644
--- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
@@ -231,7 +231,7 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args,
       }
       // Pad to vector-register width (if needed).
       for (int64 d = 0; d < pad_size; ++d) {
-        buffer[buf_base + vectorized_size + scalar_size + d] = 0;
+        buffer[buf_base + vectorized_size + scalar_size + d] = static_cast<T>(0);
       }
     }
   }
@@ -297,7 +297,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args,
 
   for (int i = 0; i < output_vectorized_size; i += kPacketSize) {
     // Reset accumulator.
-    auto vaccum = Eigen::internal::pset1<Packet>(0);
+    auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
     for (int j = 0; j < filter_spatial_size; ++j) {
       // Calculate index.
       const int64 index = i + j * padded_filter_inner_dim_size;
@@ -318,7 +318,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args,
   }
 
   if (output_scalar_size > 0) {
-    auto vaccum = Eigen::internal::pset1<Packet>(0);
+    auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
     for (int j = 0; j < filter_spatial_size; ++j) {
       const int64 index =
           output_vectorized_size + j * padded_filter_inner_dim_size;
@@ -346,7 +346,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args,
   if (depth_multiplier > 1) {
     for (int64 d = 0; d < in_depth; ++d) {
       const int64 index = d * args.depth_multiplier;
-      T accum = 0;
+      T accum = static_cast<T>(0);
       for (int64 dm = 0; dm < dm_vectorized_size; dm += kPacketSize) {
         const auto v = Eigen::internal::ploadu<Packet>(out_buffer + index + dm);
         accum += Eigen::internal::predux(v);
@@ -510,6 +510,7 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
+extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, double>;
 
@@ -884,6 +885,7 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
+extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, double>;
 
diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index bbeeaf7895..2759ecb2f1 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -94,7 +94,7 @@ struct DepthwiseConv2DKernel {
 
     for (int i = 0; i < output_vectorized_size; i += kPacketSize) {
       // Reset accumulator.
-      auto vaccum = Eigen::internal::pset1<Packet>(0);
+      auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
       for (int j = 0; j < filter_spatial_size; ++j) {
         // Calculate index.
         const int64 index = i + j * padded_filter_inner_dim_size;
@@ -115,7 +115,7 @@ struct DepthwiseConv2DKernel {
     }
 
     if (output_scalar_size > 0) {
-      auto vaccum = Eigen::internal::pset1<Packet>(0);
+      auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
       for (int j = 0; j < filter_spatial_size; ++j) {
         const int64 index =
             output_vectorized_size + j * padded_filter_inner_dim_size;
@@ -246,6 +246,7 @@ extern template class LaunchConv2DOp<CPUDevice, float>;
 #if GOOGLE_CUDA
 
 // Extern template instantiated in depthwise_conv_op_gpu.cc.
+extern template struct LaunchDepthwiseConvOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvOp<GPUDevice, double>;
 
@@ -419,12 +420,17 @@ class DepthwiseConv2dNativeOp : public BinaryOp<T> {
       Name("DepthwiseConv2dNative").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       DepthwiseConv2dNativeOp<CPUDevice, T>);
 
+TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 #if !defined(PLATFORM_WINDOWS) || !defined(_DEBUG)
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #endif
 
 #if GOOGLE_CUDA
+REGISTER_KERNEL_BUILDER(
+    Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
+    DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
+
 REGISTER_KERNEL_BUILDER(
     Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<float>("T"),
     DepthwiseConv2dNativeOp<GPUDevice, float>);
diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h
index aa5b5c76f6..11aed5b415 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.h
+++ b/tensorflow/core/kernels/depthwise_conv_op.h
@@ -158,7 +158,7 @@ struct DepthwiseFilterPadOp {
       }
       // Pad the remainder of output to vector-register boundary.
       for (int64 j = 0; j < pad_size; ++j) {
-        padded_filter[output_base + vectorized_size + scalar_size + j] = 0;
+        padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast<T>(0);
       }
     }
   }
@@ -266,7 +266,7 @@ struct DepthwiseInputCopyOp {
 
           // Pad the remainder of the output to vector register boundary.
           for (int64 d = 0; d < output_pad_size; ++d) {
-            in_buf[d] = 0;
+            in_buf[d] = static_cast<T>(0);
           }
           in_buf += output_pad_size;
 
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
index ecfe51d599..903aac5d68 100644
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
@@ -105,7 +105,7 @@ __global__ void __launch_bounds__(1024, 2)
     const int input_row_end = input_row_start + filter_rows;
     const int input_col_end = input_col_start + filter_cols;
 
-    T sum = 0;
+    T sum = static_cast<T>(0);
 
     const int input_offset_temp = in_rows * OB;
     if (input_row_start >= 0 && input_col_start >= 0 &&
@@ -258,8 +258,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall(
     __syncthreads();
 
     if (depth_in_range) {
-      T sum1 = 0;
-      T sum2 = 0;
+      T sum1 = static_cast<T>(0);
+      T sum2 = static_cast<T>(0);
       int shared_offset = data_idx;
       const T* filter_ptr = filter_read_offset + shared_data;
       UNROLL for (int r = 0; r < filter_rows; ++r) {
@@ -369,7 +369,7 @@ __global__ void __launch_bounds__(1024, 2)
     const int input_row_end = input_row_start + filter_rows;
     const int input_col_end = input_col_start + filter_cols;
 
-    T sum = 0;
+    T sum = static_cast<T>(0);
     if (input_row_start >= 0 && input_col_start >= 0 &&
         input_row_end < in_rows && input_col_end < in_cols) {
       // Loop that doesn't need to check for boundary conditions.
@@ -529,8 +529,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall(
     __syncthreads();
 
     if (slice_in_range) {
-      T sum1 = 0;
-      T sum2 = 0;
+      T sum1 = static_cast<T>(0);
+      T sum2 = static_cast<T>(0);
       int shared_offset = data_idx;
       const T* filter_ptr = filter_read_offset + shared_data;
       UNROLL for (int r = 0; r < filter_rows; ++r) {
@@ -710,6 +710,7 @@ void LaunchDepthwiseConvOp<GPUDevice, T>::operator()(OpKernelContext* ctx,
                   "Launch of gpu kernel for DepthwiseConv2dGPULaunch failed"));
 }
 
+template struct LaunchDepthwiseConvOp<GPUDevice, Eigen::half>;
 template struct LaunchDepthwiseConvOp<GPUDevice, float>;
 template struct LaunchDepthwiseConvOp<GPUDevice, double>;
 
@@ -744,7 +745,7 @@ __global__ void __launch_bounds__(640, 2)
     const int in_r = (thread_id / in_depth / in_cols) % in_rows;
     const int b = thread_id / in_depth / in_cols / in_rows;
 
-    T sum = 0;
+    T sum = static_cast<T>(0);
 
     const int out_r_start =
         tf_max<int>(0, (in_r - filter_rows + pad_rows + stride) / stride);
@@ -810,7 +811,7 @@ __global__ void __launch_bounds__(640, 2)
     const int in_d = (thread_id / in_cols / in_rows) % in_depth;
     const int b = thread_id / in_depth / in_cols / in_rows;
 
-    T sum = 0;
+    T sum = static_cast<T>(0);
     const int out_d_start = in_d * depth_multiplier;
     const int out_d_end = out_d_start + depth_multiplier;
 
@@ -919,6 +920,7 @@ void LaunchDepthwiseConvBackpropInputOp<GPUDevice, T>::operator()(
                                "utGPULaunch failed"));
 }
 
+template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, Eigen::half>;
 template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, float>;
 template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, double>;
 
@@ -1631,6 +1633,7 @@ void LaunchDepthwiseConvBackpropFilterOp<GPUDevice, T>::operator()(
                                "terGPULaunch failed"));
 }
 
+template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, Eigen::half>;
 template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, float>;
 template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, double>;
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
new file mode 100644
index 0000000000..7249c8c66c
--- /dev/null
+++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
@@ -0,0 +1,376 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// The algorithm for dynamic partition has the following steps:
+// 1. Let N be the size of partitions. We initialize a new vector indices_in
+//    with the values 0, 1, 2, ..., N-1.
+// 2. We apply cub::DeviceRadixSort::SortPairs to the key - value pairs given
+//    by partitions and indices_in. This will result in two new vectors
+//    partitions_out and indices_out, with partitions_out sorted.
+// 3. The first dimension of outputs[i] is equal to the length of the interval
+//    of i-values in partitions_out. We determine it in two steps:
+//    - compute the starting and ending point of each interval,
+//    - subtract the starting and ending points to find the length.
+//    The result is placed in partition_count.
+// 4. Because partition_count is on the GPU, we bring it asynchronously to
+//    the CPU. Then we can allocate the output tensors.
+// 5. Finally, we use indices_out and the gather functor to collect the output.
+//    This works, because for each interval of i-values, indices_out points
+//    to the slices which should form output[i].
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "external/cub_archive/cub/device/device_radix_sort.cuh"
+#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/kernels/gather_functor_gpu.cu.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace {
+
+template <typename T>
+__global__ void RangeInitKernel(const T start, const T delta, const int32 size,
+                                T* out) {
+  CUDA_1D_KERNEL_LOOP(i, size) { out[i] = start + i * delta; }
+}
+
+__global__ void FindEndpointsKernel(const int32* partitions, int32 size,
+                                    int32 nump, int32* start, int32* end) {
+  CUDA_1D_KERNEL_LOOP(i, size) {
+    int32 current = ldg(partitions + i);
+    if (FastBoundsCheck(current, nump)) {
+      if (i == 0)
+        start[current] = i;
+      else {
+        int32 before = ldg(partitions + i - 1);
+        if (before != current) start[current] = i;
+      }
+      if (i == size - 1)
+        end[current] = i + 1;
+      else {
+        int32 after = ldg(partitions + i + 1);
+        if (after != current) end[current] = i + 1;
+      }
+    }
+  }
+}
+
+// We create a local version of subtract, because the tf.subtract kernel
+// is not defined for int32. We use it to compute the length of an interval
+// by subtracting the endpoints.
+__global__ void IntervalLengthKernel(int32* start, int32 size, int32* end) {
+  CUDA_1D_KERNEL_LOOP(i, size) {
+    int32 start_point = ldg(start + i);
+    end[i] = end[i] - start_point;
+  }
+}
+
+// Initialize out with range start, start + delta, start + 2 * delta, ...
+// This is needed because tf.range has no GPU implementation.
+template <typename T>
+void RangeInit(const GPUDevice& d, const T start, const T delta,
+               const int32 size, typename TTypes<T>::Flat out) {
+  CudaLaunchConfig config = GetCudaLaunchConfig(size, d);
+  RangeInitKernel<
+      T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+      start, delta, size, out.data());
+}
+
+// Partitions is a sorted vector of N non-negative integer numbers.
+// This function computes the starting and ending points of each interval
+// of values.
+void ComputeIntervals(const GPUDevice& d, Tensor* partitions, int32 N,
+                      int32 nump, int32* start_ptr, int32* end_ptr) {
+  CudaLaunchConfig config = GetCudaLaunchConfig(N, d);
+  FindEndpointsKernel<<<config.block_count, config.thread_per_block, 0,
+                        d.stream()>>>(partitions->flat<int32>().data(), N, nump,
+                                      start_ptr, end_ptr);
+}
+
+// Subtract the ending points of each interval to obtain the interval length.
+void ComputeItvLength(const GPUDevice& d, int32 num, int32* start_ptr,
+                      int32* end_ptr) {
+  CudaLaunchConfig config = GetCudaLaunchConfig(num, d);
+  IntervalLengthKernel<<<config.block_count, config.thread_per_block, 0,
+                         d.stream()>>>(start_ptr, num, end_ptr);
+}
+
+template <typename T>
+void CallGatherKernel(const GPUDevice& d, const T* params, const int32* indices,
+                      T* out, int64 gather_dim_size, int64 indices_size,
+                      int64 slice_size, int64 out_size) {
+  CudaLaunchConfig config = GetCudaLaunchConfig(out_size, d);
+  GatherOpKernel<
+      T, int32,
+      true><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+      params, indices, out, gather_dim_size, indices_size, slice_size,
+      out_size);
+}
+
+}  // namespace
+
+// The current implementation has memory cost on GPU
+// I + P + max(3N + R, O + N), where:
+// I - the size of the input
+// N - the size of the partitions tensor
+// R - the temporary storage used by cub::RadixSort, about 2N
+// P - the number of partitions
+// O - the size of the output
+// So roughly the cost is I + P + max(5N, O + N).
+template <typename T>
+class DynamicPartitionOpGPU : public AsyncOpKernel {
+ public:
+  explicit DynamicPartitionOpGPU(OpKernelConstruction* c) : AsyncOpKernel(c) {
+    OP_REQUIRES_OK(c, c->GetAttr("num_partitions", &num_partitions_));
+    OP_REQUIRES(c, num_partitions_ >= 1,
+                errors::InvalidArgument("num_partitions must be at least 1"));
+  }
+
+  void AllocateTempSpace(OpKernelContext* c, int32 N, Tensor* indices_in,
+                         Tensor* partitions_out, Tensor* indices_out,
+                         DoneCallback done) {
+    int32 M = std::max(N, num_partitions_);
+    // indices_in will be made slightly larger to accomodate
+    // later computations.
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({M}), indices_in), done);
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({N}), partitions_out), done);
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({N}), indices_out), done);
+  }
+
+  void AllocateOutputs(OpKernelContext* c, const Tensor* data,
+                       const Tensor* partitions, const Tensor* partition_count,
+                       OpOutputList* Tout, DoneCallback done) {
+    auto e_part_count = partition_count->flat<int32>();
+    // Allocate output tensors of the right size
+    OP_REQUIRES_OK_ASYNC(c, c->output_list("outputs", Tout), done);
+    for (int p = 0; p < num_partitions_; p++) {
+      TensorShape shape;
+      shape.AddDim(e_part_count(p));
+      for (int i = partitions->dims(); i < data->dims(); i++) {
+        shape.AddDim(data->dim_size(i));
+      }
+      Tensor* out;
+      OP_REQUIRES_OK_ASYNC(c, Tout->allocate(p, shape, &out), done);
+    }
+  }
+
+  void ComputeAsync(OpKernelContext* c, DoneCallback done) {
+    const Tensor& data = c->input(0);
+    const Tensor& partitions = c->input(1);
+
+    OP_REQUIRES_ASYNC(
+        c, TensorShapeUtils::StartsWith(data.shape(), partitions.shape()),
+        errors::InvalidArgument("data.shape must start with partitions.shape, ",
+                                "got data.shape = ", data.shape().DebugString(),
+                                ", partitions.shape = ",
+                                partitions.shape().DebugString()),
+        done);
+
+    Tensor partition_count;
+
+    // We must handle the case of empty partitions separately,
+    // because kernels don't work with 0-sized tensors.
+    if (partitions.NumElements() == 0) {
+      AllocatorAttributes alloc_attr;
+      alloc_attr.set_on_host(true);
+      OP_REQUIRES_OK_ASYNC(
+          c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}),
+                              &partition_count, alloc_attr),
+          done);
+      auto e_part_count = partition_count.flat<int32>();
+      for (int i = 0; i < num_partitions_; i++) e_part_count(i) = 0;
+      OpOutputList outputs;
+      this->AllocateOutputs(c, &data, &partitions, &partition_count, &outputs,
+                            done);
+      if (c->status().ok()) done();
+      return;
+    }
+
+    // Prepare for counting.
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}),
+                            &partition_count),
+        done);
+    Tensor indices_out;
+    // Count how many times each partition index occurs.
+    // Also sort the info in partitions and output it in indices_out,
+    // in preparation for the next step.
+    this->CountAndSortParts(c, &partitions, &partition_count, &indices_out,
+                            done);
+    if (!c->status().ok()) return;
+
+    // In order to allocate the output tensor we have to move partition_count
+    // to CPU.
+    auto* stream = c->op_device_context()->stream();
+    OP_REQUIRES_ASYNC(c, stream, errors::Internal("No GPU stream available."),
+                      done);
+    Tensor cpu_tensor;
+    AllocatorAttributes alloc_attr;
+    alloc_attr.set_on_host(true);
+    alloc_attr.set_gpu_compatible(true);
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(partition_count.dtype(), partition_count.shape(),
+                            &cpu_tensor, alloc_attr),
+        done);
+    perftools::gputools::DeviceMemoryBase wrapped(
+        partition_count.flat<int32>().data(), num_partitions_ * sizeof(int32));
+    const bool status =
+        stream
+            ->ThenMemcpy(cpu_tensor.flat<int32>().data(), wrapped,
+                         num_partitions_ * sizeof(int32))
+            .ok();
+    OP_REQUIRES_ASYNC(
+        c, status,
+        errors::Internal("Failed to launch copy from device to host."), done);
+
+    // Keep a reference to partition_count so that the buffer
+    // is not deallocated at the end of the function, before
+    // memcpy is completed.
+    TensorReference partition_ref(partition_count);
+    auto wrapped_callback = [this, c, &data, &partitions, indices_out,
+                             partition_ref, cpu_tensor, done]() {
+      OpOutputList outputs;
+      this->AllocateOutputs(c, &data, &partitions, &cpu_tensor, &outputs, done);
+      if (!c->status().ok()) {
+        partition_ref.Unref();
+        return;
+      }
+      int32 N = partitions.NumElements();
+      int64 slice_size = data.NumElements() / N;
+      this->GatherSlices(c, &data, &indices_out, N, slice_size, outputs);
+      partition_ref.Unref();
+      done();
+    };
+
+    c->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
+        stream, wrapped_callback);
+  }
+
+ protected:
+  void RadixSort(OpKernelContext* c, const Tensor* partitions,
+                 Tensor* indices_in, Tensor* partitions_out,
+                 Tensor* indices_out, DoneCallback done) {
+    int32 N = partitions->NumElements();
+    const GPUDevice& device = c->eigen_device<GPUDevice>();
+    const cudaStream_t& cu_stream = GetCudaStream(c);
+
+    // Initialize the indices_in tensor using the Range GPU kernel.
+    RangeInit(device, 0, 1, N, indices_in->flat<int32>());
+    // Obtain the pointers to inner buffers.
+    const int32* partitions_ptr = partitions->flat<int32>().data();
+    int32* partitions_out_ptr = partitions_out->flat<int32>().data();
+    int32* indices_in_ptr = indices_in->flat<int32>().data();
+    int32* indices_out_ptr = indices_out->flat<int32>().data();
+    // Determine temporary device storage requirements.
+    Tensor cub_temp_storage;
+    size_t temp_storage_bytes = 0;
+    cub::DeviceRadixSort::SortPairs(
+        NULL, temp_storage_bytes, partitions_ptr, partitions_out_ptr,
+        indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream);
+    // Allocate temporary storage.
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(
+               DT_INT8, TensorShape({static_cast<int64>(temp_storage_bytes)}),
+               &cub_temp_storage),
+        done);
+    // Radix-sort the partition information.
+    cub::DeviceRadixSort::SortPairs(
+        cub_temp_storage.flat<int8>().data(), temp_storage_bytes,
+        partitions_ptr, partitions_out_ptr, indices_in_ptr, indices_out_ptr, N,
+        0, sizeof(int32) * 8, cu_stream);
+  }  // At this point cub_temp_storage will be marked for deallocation.
+
+  void CountAndSortParts(OpKernelContext* c, const Tensor* partitions,
+                         Tensor* partition_count, Tensor* indices_out,
+                         DoneCallback done) {
+    const GPUDevice& device = c->eigen_device<GPUDevice>();
+    int32 N = partitions->NumElements();
+    Tensor indices_in;
+    Tensor partitions_out;
+
+    // Allocate memory for Radix-Sort.
+    this->AllocateTempSpace(c, N, &indices_in, &partitions_out, indices_out,
+                            done);
+    if (!c->status().ok()) return;
+    this->RadixSort(c, partitions, &indices_in, &partitions_out, indices_out,
+                    done);
+    if (!c->status().ok()) return;
+    // We still need a little bit of additional memory. However,
+    // we can reuse the indices_in tensor. We could also use atomic
+    // operations and no additional memory, but this approach seems faster.
+
+    // Zero-out the allocated memory.
+    functor::SetZeroFunctor<GPUDevice, int32> zero_functor;
+    zero_functor(device, partition_count->flat<int32>());
+    zero_functor(device, indices_in.flat<int32>());
+    // Obtain the pointers to inner buffers.
+    int32* start_ptr = indices_in.flat<int32>().data();
+    int32* end_ptr = partition_count->flat<int32>().data();
+    // Obtain the starting and ending points of each interval.
+    ComputeIntervals(device, &partitions_out, N, num_partitions_, start_ptr,
+                     end_ptr);
+    // Subtract to compute the number of appearances of each id.
+    ComputeItvLength(device, num_partitions_, start_ptr, end_ptr);
+  }  // At this point indices_in and partitions_out will be marked
+     // for deallocation.
+
+  void GatherSlices(OpKernelContext* c, const Tensor* data,
+                    const Tensor* indices, int32 N, int64 slice_size,
+                    OpOutputList& outs) {
+    const GPUDevice& device = c->eigen_device<GPUDevice>();
+    const int32* ind_base = indices->flat<int32>().data();
+    const T* data_base = data->flat<T>().data();
+
+    for (int p = 0; p < num_partitions_; p++) {
+      int32 indices_size = outs[p]->dim_size(0);
+      int64 out_size = outs[p]->NumElements();
+      T* out_base = outs[p]->flat<T>().data();
+      if (out_size > 0)
+        CallGatherKernel<T>(device, data_base, ind_base, out_base, N,
+                            indices_size, slice_size, out_size);
+      ind_base += indices_size;
+    }
+  }
+
+  int num_partitions_;
+};
+
+#define REGISTER_DYNAMIC_PARTITION_GPU(T)                                 \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("DynamicPartition").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      DynamicPartitionOpGPU<T>)
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_DYNAMIC_PARTITION_GPU);
+TF_CALL_complex64(REGISTER_DYNAMIC_PARTITION_GPU);
+TF_CALL_complex128(REGISTER_DYNAMIC_PARTITION_GPU);
+#undef REGISTER_DYNAMIC_PARTITION_GPU
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/dynamic_partition_op_test.cc b/tensorflow/core/kernels/dynamic_partition_op_test.cc
index 0e8fbc0a67..9a7ed0af21 100644
--- a/tensorflow/core/kernels/dynamic_partition_op_test.cc
+++ b/tensorflow/core/kernels/dynamic_partition_op_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include <functional>
 #include <memory>
 
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_builder.h"
@@ -23,10 +24,14 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/graph/testlib.h"
 #include "tensorflow/core/kernels/ops_testutil.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
 namespace {
@@ -153,5 +158,58 @@ TEST_F(DynamicPartitionOpTest, Error_IndexOutOfRange) {
       << s;
 }
 
+Node* DynamicPartitionNode(Graph* g, Node* in0, Node* in1, int num_partitions) {
+  Node* ret;
+  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "DynamicPartition")
+                  .Input(in0)
+                  .Input(in1)
+                  .Attr("num_partitions", num_partitions)
+                  .Finalize(g, &ret));
+  return ret;
+}
+
+template <typename T>
+static Graph* DynamicPartition(int num_partitions, int dim) {
+  Graph* g = new Graph(OpRegistry::Global());
+  // Always use a 128MB buffer.
+  const int kRows = ((128 << 20) / sizeof(T)) / dim;
+  Tensor data(DataTypeToEnum<T>::value, TensorShape({kRows, dim}));
+  data.flat<T>().setRandom();
+
+  random::PhiloxRandom philox(301, 17);
+  random::SimplePhilox rnd(&philox);
+  Tensor partitions(DT_INT32, TensorShape({kRows}));
+  for (int i = 0; i < kRows; i++) {
+    partitions.flat<int32>()(i) = rnd.Uniform(num_partitions);
+  }
+  DynamicPartitionNode(g, test::graph::Constant(g, data),
+                       test::graph::Constant(g, partitions), num_partitions);
+  return g;
+}
+
+#define BM_DYNAMIC_PARTITION(DEVICE, T, num)                            \
+  static void BM_##DEVICE##_dynpart_##T##_##num(int iters, int dim) {   \
+    const int64 items = ((128 << 20) / sizeof(T));                      \
+    const int64 tot = static_cast<int64>(iters) * items;                \
+    testing::ItemsProcessed(tot);                                       \
+    testing::UseRealTime();                                             \
+    test::Benchmark(#DEVICE, DynamicPartition<T>(num, dim)).Run(iters); \
+  }                                                                     \
+  BENCHMARK(BM_##DEVICE##_dynpart_##T##_##num)->Arg(1)->Arg(256)
+
+BM_DYNAMIC_PARTITION(cpu, float, 2);
+BM_DYNAMIC_PARTITION(cpu, float, 100);
+BM_DYNAMIC_PARTITION(cpu, double, 2);
+BM_DYNAMIC_PARTITION(cpu, double, 100);
+BM_DYNAMIC_PARTITION(cpu, complex64, 2);
+BM_DYNAMIC_PARTITION(cpu, complex64, 100);
+
+BM_DYNAMIC_PARTITION(gpu, float, 2);
+BM_DYNAMIC_PARTITION(gpu, float, 100);
+BM_DYNAMIC_PARTITION(gpu, double, 2);
+BM_DYNAMIC_PARTITION(gpu, double, 100);
+BM_DYNAMIC_PARTITION(gpu, complex64, 2);
+BM_DYNAMIC_PARTITION(gpu, complex64, 100);
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc
index 0ecb829f34..1688674eb7 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op.cc
@@ -54,25 +54,20 @@ struct FusedBatchNorm<CPUDevice, T, U> {
                   Tensor* batch_var_output, Tensor* saved_mean_output,
                   Tensor* saved_var_output, TensorFormat tensor_format,
                   bool is_training) {
-    // Currently U is ignored, since we only support the case where T and U are
-    // both float32.
-    // TODO(reedwm): Add float16 support, use U, and remove these asserts.
-    static_assert(std::is_same<T, float>::value, "T currently must be float.");
-    static_assert(std::is_same<U, float>::value, "U currently must be float.");
     OP_REQUIRES(context, tensor_format == FORMAT_NHWC,
                 errors::Internal("The CPU implementation of FusedBatchNorm "
                                  "only supports NHWC tensor format for now."));
     typename TTypes<T, 4>::ConstTensor x(x_input.tensor<T, 4>());
-    typename TTypes<T>::ConstVec scale(scale_input.vec<T>());
-    typename TTypes<T>::ConstVec offset(offset_input.vec<T>());
-    typename TTypes<T>::ConstVec estimated_mean(estimated_mean_input.vec<T>());
-    typename TTypes<T>::ConstVec estimated_variance(
-        estimated_variance_input.vec<T>());
+    typename TTypes<U>::ConstVec scale(scale_input.vec<U>());
+    typename TTypes<U>::ConstVec offset(offset_input.vec<U>());
+    typename TTypes<U>::ConstVec estimated_mean(estimated_mean_input.vec<U>());
+    typename TTypes<U>::ConstVec estimated_variance(
+        estimated_variance_input.vec<U>());
     typename TTypes<T, 4>::Tensor y(y_output->tensor<T, 4>());
-    typename TTypes<T>::Vec batch_mean(batch_mean_output->vec<T>());
-    typename TTypes<T>::Vec batch_var(batch_var_output->vec<T>());
-    typename TTypes<T>::Vec saved_mean(saved_mean_output->vec<T>());
-    typename TTypes<T>::Vec saved_var(saved_var_output->vec<T>());
+    typename TTypes<U>::Vec batch_mean(batch_mean_output->vec<U>());
+    typename TTypes<U>::Vec batch_var(batch_var_output->vec<U>());
+    typename TTypes<U>::Vec saved_mean(saved_mean_output->vec<U>());
+    typename TTypes<U>::Vec saved_var(saved_var_output->vec<U>());
 
     const CPUDevice& d = context->eigen_device<CPUDevice>();
 
@@ -93,15 +88,15 @@ struct FusedBatchNorm<CPUDevice, T, U> {
     bcast_spec.set(0, rest_size);
 #endif
 
-    auto x_rest_by_depth = x.reshape(rest_by_depth);
+    auto x_rest_by_depth = x.reshape(rest_by_depth).template cast<U>();
     const int rest_size_minus_one = (rest_size > 1) ? (rest_size - 1) : 1;
-    T rest_size_inv = static_cast<T>(1.0f / static_cast<T>(rest_size));
+    U rest_size_inv = static_cast<U>(1.0f / static_cast<U>(rest_size));
     // This adjustment is for Bessel's correction
-    T rest_size_adjust =
-        static_cast<T>(rest_size) / static_cast<T>(rest_size_minus_one);
+    U rest_size_adjust =
+        static_cast<U>(rest_size) / static_cast<U>(rest_size_minus_one);
 
-    Eigen::Tensor<T, 1, Eigen::RowMajor> mean(depth);
-    Eigen::Tensor<T, 1, Eigen::RowMajor> variance(depth);
+    Eigen::Tensor<U, 1, Eigen::RowMajor> mean(depth);
+    Eigen::Tensor<U, 1, Eigen::RowMajor> variance(depth);
     if (is_training) {
       mean.device(d) = (x_rest_by_depth.sum(reduce_dims) * rest_size_inv);
       batch_mean.device(d) = mean;
@@ -129,7 +124,7 @@ struct FusedBatchNorm<CPUDevice, T, U> {
     auto x_shifted =
         x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec);
 
-    y.reshape(rest_by_depth).device(d) = x_shifted;
+    y.reshape(rest_by_depth).device(d) = x_shifted.template cast<T>();
   }
 };
 
@@ -138,7 +133,7 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
   void operator()(OpKernelContext* context, const Tensor& y_backprop_input,
                   const Tensor& x_input, const Tensor& scale_input,
                   const Tensor& mean_input, const Tensor& variance_input,
-                  T epsilon, Tensor* x_backprop_output,
+                  U epsilon, Tensor* x_backprop_output,
                   Tensor* scale_backprop_output, Tensor* offset_backprop_output,
                   TensorFormat tensor_format) {
     OP_REQUIRES(context, tensor_format == FORMAT_NHWC,
@@ -147,12 +142,12 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
     typename TTypes<T, 4>::ConstTensor y_backprop(
         y_backprop_input.tensor<T, 4>());
     typename TTypes<T, 4>::ConstTensor x(x_input.tensor<T, 4>());
-    typename TTypes<T>::ConstVec scale(scale_input.vec<T>());
-    typename TTypes<T>::ConstVec mean(mean_input.vec<T>());
-    typename TTypes<T>::ConstVec variance(variance_input.vec<T>());
+    typename TTypes<U>::ConstVec scale(scale_input.vec<U>());
+    typename TTypes<U>::ConstVec mean(mean_input.vec<U>());
+    typename TTypes<U>::ConstVec variance(variance_input.vec<U>());
     typename TTypes<T, 4>::Tensor x_backprop(x_backprop_output->tensor<T, 4>());
-    typename TTypes<T>::Vec scale_backprop(scale_backprop_output->vec<T>());
-    typename TTypes<T>::Vec offset_backprop(offset_backprop_output->vec<T>());
+    typename TTypes<U>::Vec scale_backprop(scale_backprop_output->vec<U>());
+    typename TTypes<U>::Vec offset_backprop(offset_backprop_output->vec<U>());
 
     // Note: the following formulas are used to compute the gradients for
     // back propagation.
@@ -181,8 +176,8 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
     bcast_spec.set(0, rest_size);
 #endif
 
-    auto x_rest_by_depth = x.reshape(rest_by_depth);
-    T rest_size_inv = static_cast<T>(1.0f / static_cast<T>(rest_size));
+    auto x_rest_by_depth = x.reshape(rest_by_depth).template cast<U>();
+    U rest_size_inv = static_cast<U>(1.0f / static_cast<U>(rest_size));
 
     auto x_mean_rest_by_depth =
         mean.reshape(one_by_depth).broadcast(bcast_spec);
@@ -192,7 +187,8 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
         coef0.eval().reshape(one_by_depth).broadcast(bcast_spec);
     auto x_scaled = x_centered * coef0_rest_by_depth;
 
-    auto y_backprop_rest_by_depth = y_backprop.eval().reshape(rest_by_depth);
+    auto y_backprop_rest_by_depth =
+        y_backprop.eval().reshape(rest_by_depth).template cast<U>();
     scale_backprop.device(d) =
         (y_backprop_rest_by_depth * x_scaled).sum(reduce_dims);
     auto y_backprop_sum = y_backprop_rest_by_depth.sum(reduce_dims);
@@ -214,7 +210,7 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
                      .reshape(one_by_depth)
                      .broadcast(bcast_spec);
     x_backprop.reshape(rest_by_depth).device(d) =
-        coef1 * (y_backprop_centered - x_centered * coef2);
+        (coef1 * (y_backprop_centered - x_centered * coef2)).template cast<T>();
   }
 };
 
@@ -689,6 +685,18 @@ REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2")
                             .TypeConstraint<float>("U"),
                         FusedBatchNormGradOp<CPUDevice, float, float>);
 
+REGISTER_KERNEL_BUILDER(Name("FusedBatchNormV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<Eigen::half>("T")
+                            .TypeConstraint<float>("U"),
+                        FusedBatchNormOp<CPUDevice, Eigen::half, float>);
+
+REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<Eigen::half>("T")
+                            .TypeConstraint<float>("U"),
+                        FusedBatchNormGradOp<CPUDevice, Eigen::half, float>);
+
 #if GOOGLE_CUDA
 
 REGISTER_KERNEL_BUILDER(
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.h b/tensorflow/core/kernels/fused_batch_norm_op.h
index 38b24d7011..3af104bf95 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.h
+++ b/tensorflow/core/kernels/fused_batch_norm_op.h
@@ -92,26 +92,28 @@ struct FusedBatchNormFreezeGrad {
     // offset_backprop  = sum(y_backprop)
     // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + epsilon))
     // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon))
-    offset_backprop.device(d) = y_backprop.reshape(rest_by_depth)
-                                    .template cast<U>()
-                                    .sum(reduction_axis);
+
+    auto y_backprop_rest_by_depth =
+        y_backprop.reshape(rest_by_depth).template cast<U>();
+    auto input_rest_by_depth = input.reshape(rest_by_depth).template cast<U>();
+
+    offset_backprop.device(d) = y_backprop_rest_by_depth.sum(reduction_axis);
 
     // scratch1 = rsqrt(pop_var + epsilon)
     scratch1.device(d) = (pop_var + pop_var.constant(epsilon)).rsqrt();
 
     // scratch2 = sum(y_backprop * (x - mean))
     scratch2.device(d) =
-        (y_backprop.reshape(rest_by_depth).template cast<U>() *
-         (input.reshape(rest_by_depth).template cast<U>() -
+        (y_backprop_rest_by_depth *
+         (input_rest_by_depth -
           pop_mean.reshape(one_by_depth).broadcast(rest_by_one)))
             .sum(reduction_axis);
 
     x_backprop.reshape(rest_by_depth).device(d) =
-        (y_backprop.reshape(rest_by_depth).template cast<U>() *
-         ((scratch1 * scale)
-              .eval()
-              .reshape(one_by_depth)
-              .broadcast(rest_by_one)))
+        (y_backprop_rest_by_depth * ((scratch1 * scale)
+                                         .eval()
+                                         .reshape(one_by_depth)
+                                         .broadcast(rest_by_one)))
             .template cast<T>();
     scale_backprop.device(d) = scratch2 * scratch1;
   }
diff --git a/tensorflow/core/kernels/lmdb_reader_op.cc b/tensorflow/core/kernels/lmdb_reader_op.cc
index 3bb07301b5..31a427f2c9 100755
--- a/tensorflow/core/kernels/lmdb_reader_op.cc
+++ b/tensorflow/core/kernels/lmdb_reader_op.cc
@@ -36,7 +36,7 @@ class LMDBReader : public ReaderBase {
 
   Status OnWorkStartedLocked() override {
     MDB_CHECK(mdb_env_create(&mdb_env_));
-    int flags = MDB_RDONLY | MDB_NOTLS;
+    int flags = MDB_RDONLY | MDB_NOTLS | MDB_NOLOCK;
 
     // Check if the LMDB filename is actually a file instead of a directory.
     // If so, set appropriate flags so we can open it.
@@ -57,10 +57,13 @@ class LMDBReader : public ReaderBase {
     if (mdb_env_ != nullptr) {
       if (mdb_cursor_) {
         mdb_cursor_close(mdb_cursor_);
+        mdb_cursor_ = nullptr;
       }
-      mdb_txn_abort(mdb_txn_);
       mdb_dbi_close(mdb_env_, mdb_dbi_);
+      mdb_txn_abort(mdb_txn_);
       mdb_env_close(mdb_env_);
+      mdb_txn_ = nullptr;
+      mdb_dbi_ = 0;
       mdb_env_ = nullptr;
     }
     return Status::OK();
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index e2cf605811..157ce106ce 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/maxpooling_op.h"
 
 #include <vector>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -34,9 +33,11 @@ limitations under the License.
 #include "tensorflow/core/kernels/pooling_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/util/env_var.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #if GOOGLE_CUDA
 #include "tensorflow/core/kernels/maxpooling_op_gpu.h"
@@ -358,6 +359,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
 
     use_dnn_ = CanUseCudnn();
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -405,7 +407,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
       DnnPoolingGradOp<T>::Compute(
           context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize,
           stride, padding_, data_format_, &tensor_in, &tensor_out, out_backprop,
-          output_shape);
+          output_shape, propagate_nans_);
     } else {
       CHECK(data_format_ == FORMAT_NHWC)
           << "Non-Cudnn MaxPoolGrad only supports NHWC format";
@@ -420,6 +422,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
   bool use_dnn_;
+  bool propagate_nans_;
 };
 
 #endif  // GOOGLE_CUDA
@@ -884,6 +887,8 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
     OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
+
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -902,14 +907,15 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
     Tensor* argmax = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, out_shape, &argmax));
 
-    LaunchMaxPoolingWithArgmax<Device, T>::launch(context, params, tensor_in,
-                                                  output, argmax);
+    LaunchMaxPoolingWithArgmax<Device, T>::launch(
+        context, params, tensor_in, output, argmax, propagate_nans_);
   }
 
  private:
   std::vector<int32> ksize_;
   std::vector<int32> stride_;
   Padding padding_;
+  bool propagate_nans_;
 };
 
 template <typename Device, typename T>
@@ -1045,6 +1051,8 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
     use_dnn_ = CanUseCudnn();
+
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1068,9 +1076,10 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
 
     // These is_int8x4 checks avoid linker errors for missing qint8 kernels.
     if (!is_int8x4 && use_dnn_ && data_format_ == FORMAT_NCHW) {
-      DnnPoolingOp<T>::Compute(
-          context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_,
-          stride_, padding_, data_format_, tensor_in, out_shape);
+      DnnPoolingOp<T>::Compute(context,
+                               perftools::gputools::dnn::PoolingMode::kMaximum,
+                               ksize_, stride_, padding_, data_format_,
+                               tensor_in, out_shape, propagate_nans_);
     } else {
       Tensor* output = nullptr;
       OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
@@ -1079,7 +1088,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
                                                            tensor_in, output);
       } else if (data_format_ == FORMAT_NHWC) {
         LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in,
-                                                  output);
+                                                  output, propagate_nans_);
       } else {
         LOG(FATAL) << "MaxPool currently only supports the following (layout, "
                       "type) combinations: (NHWC, non-qint8), "
@@ -1098,6 +1107,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
   bool use_dnn_;
+  bool propagate_nans_;
 };
 
 template <typename T>
@@ -1127,6 +1137,7 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
     }
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
     use_dnn_ = CanUseCudnn();
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1168,16 +1179,17 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
         ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height,
                         params.out_width, params.depth);
     if (use_dnn_ && data_format_ == FORMAT_NCHW) {
-      DnnPoolingOp<T>::Compute(
-          context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize,
-          stride, padding_, data_format_, tensor_in, out_shape);
+      DnnPoolingOp<T>::Compute(context,
+                               perftools::gputools::dnn::PoolingMode::kMaximum,
+                               ksize, stride, padding_, data_format_, tensor_in,
+                               out_shape, propagate_nans_);
     } else {
       CHECK(data_format_ == FORMAT_NHWC)
           << "Non-Cudnn MaxPool only supports NHWC format";
       Tensor* output = nullptr;
       OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
       LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in,
-                                                output);
+                                                output, propagate_nans_);
     }
   }
 
@@ -1187,18 +1199,20 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
   bool use_dnn_;
+  bool propagate_nans_;
 };
 
 template <typename T>
 struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
   static void launch(OpKernelContext* context, const PoolParameters& params,
-                     const Tensor& input, Tensor* output) {
+                     const Tensor& input, Tensor* output, bool propagate_nans) {
     bool status = functor::MaxPoolForwardWithOptionalArgmax<T>()(
         input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows,
         params.tensor_in_cols, params.depth, params.out_height,
         params.out_width, params.window_rows, params.window_cols,
         params.row_stride, params.col_stride, params.pad_rows, params.pad_cols,
-        output->flat<T>().data(), nullptr, context->eigen_gpu_device());
+        output->flat<T>().data(), nullptr, context->eigen_gpu_device(),
+        propagate_nans);
     if (!status) {
       context->SetStatus(
           errors::Internal("Failed launching MaxPoolForwardNoMask"));
@@ -1209,7 +1223,8 @@ struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
 template <typename T>
 struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
   static void launch(OpKernelContext* context, const PoolParameters& params,
-                     const Tensor& input, Tensor* output, Tensor* argmax) {
+                     const Tensor& input, Tensor* output, Tensor* argmax,
+                     bool propagate_nans) {
     bool status = functor::MaxPoolForwardWithOptionalArgmax<T>()(
         input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows,
         params.tensor_in_cols, params.depth, params.out_height,
@@ -1217,7 +1232,7 @@ struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
         params.row_stride, params.col_stride, params.pad_rows, params.pad_cols,
         output->flat<T>().data(),
         reinterpret_cast<int64*>(argmax->flat<int64>().data()),
-        context->eigen_gpu_device());
+        context->eigen_gpu_device(), propagate_nans);
     if (!status) {
       context->SetStatus(
           errors::Internal("Failed launching MaxPoolForwardWithArgmax"));
diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
index 26f5274804..d96b844383 100644
--- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
@@ -29,6 +29,15 @@ limitations under the License.
 
 namespace tensorflow {
 namespace {
+template <bool propagate_nans, typename dtype>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool IsGreaterThan(dtype a, dtype b) {
+  if (propagate_nans) {
+    return !(a <= b);
+  } else {
+    return a > b;
+  }
+}
+
 // This is Yangqing's custom kernel for the maxpooling operation. There are
 // three functions: MaxPoolForwardNCHW and MaxPoolForwardNHWC are the two
 // forward functions, dealing with the forward case. MaxPoolBackward is the
@@ -51,7 +60,7 @@ namespace {
 // const int output_size = batch * channels * pooled_height * pooled_width;
 // MaxPoolForwardNCHW<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
 //                      kThreadsPerBlock, 0, cuda_stream>>>(...);
-template <typename dtype>
+template <bool propagate_nans, typename dtype>
 __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data,
                                    const int channels, const int height,
                                    const int width, const int pooled_height,
@@ -77,7 +86,7 @@ __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data,
     for (int h = hstart; h < hend; ++h) {
       for (int w = wstart; w < wend; ++w) {
         int idx = c * height * width + h * width + w;
-        if (bottom_data_n[idx] > maxval) {
+        if (IsGreaterThan<propagate_nans>(bottom_data_n[idx], maxval)) {
           maxidx = idx;
           maxval = bottom_data_n[idx];
         }
@@ -126,7 +135,7 @@ __global__ void MaxPoolForwardNoMaskKernel_NCHW_VECT_C(
   }
 }
 
-template <typename dtype>
+template <bool propagate_nans, typename dtype>
 __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data,
                                    const int height, const int width,
                                    const int channels, const int pooled_height,
@@ -153,7 +162,7 @@ __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data,
     for (int h = hstart; h < hend; ++h) {
       for (int w = wstart; w < wend; ++w) {
         int idx = (h * width + w) * channels + c;
-        if (bottom_data_n[idx] > maxval) {
+        if (IsGreaterThan<propagate_nans>(bottom_data_n[idx], maxval)) {
           maxidx = idx;
           maxval = bottom_data_n[idx];
         }
@@ -390,15 +399,24 @@ bool MaxPoolForwardWithOptionalArgmax<T>::operator()(
     const int channels, const int pooled_height, const int pooled_width,
     const int kernel_h, const int kernel_w, const int stride_h,
     const int stride_w, const int pad_t, const int pad_l, T* top_data,
-    int64* mask, const Eigen::GpuDevice& d) {
+    int64* mask, const Eigen::GpuDevice& d, bool propagate_nans) {
   const int kThreadsPerBlock = 1024;
   const int output_size = batch * channels * pooled_height * pooled_width;
-
-  MaxPoolForwardNHWC<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
-                       kThreadsPerBlock, 0, d.stream()>>>(
-      output_size, bottom_data, height, width, channels, pooled_height,
-      pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
-      top_data, mask);
+  if (propagate_nans) {
+    MaxPoolForwardNHWC<true>
+        <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
+           kThreadsPerBlock, 0, d.stream()>>>
+        (output_size, bottom_data, height, width, channels, pooled_height,
+         pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+         top_data, mask);
+  } else {
+    MaxPoolForwardNHWC<false>
+        <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
+           kThreadsPerBlock, 0, d.stream()>>>
+        (output_size, bottom_data, height, width, channels, pooled_height,
+         pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+         top_data, mask);
+  }
   return d.ok();
 }
 
diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.h b/tensorflow/core/kernels/maxpooling_op_gpu.h
index 34203797cf..38ebb34248 100644
--- a/tensorflow/core/kernels/maxpooling_op_gpu.h
+++ b/tensorflow/core/kernels/maxpooling_op_gpu.h
@@ -39,7 +39,7 @@ struct MaxPoolForwardWithOptionalArgmax {
                   const int pooled_width, const int kernel_h,
                   const int kernel_w, const int stride_h, const int stride_w,
                   const int pad_t, const int pad_l, T* top_data, int64* mask,
-                  const Eigen::GpuDevice& d);
+                  const Eigen::GpuDevice& d, bool propagate_nans);
 };
 
 struct MaxPoolForwardNoMask_NCHW_VECT_C {
diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index 9080bf7be8..f291281108 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -45,12 +45,12 @@ limitations under the License.
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::prop_kind;
 using mkldnn::stream;
+using mkldnn::prop_kind;
 
+using mkldnn::convolution_forward;
 using mkldnn::convolution_backward_weights;
 using mkldnn::convolution_direct;
-using mkldnn::convolution_forward;
 
 #endif
 
@@ -463,13 +463,12 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
 
       // Generate input shapes.
       TensorShape filter_shape;
-      OP_REQUIRES(
-          context, TensorShapeUtils::IsVector(filter_tensor.shape()),
-          errors::InvalidArgument(
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()),
+        errors::InvalidArgument(
               "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ",
               filter_tensor.dims()));
       OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                  filter_tensor.vec<int32>(), &filter_shape));
+                        filter_tensor.vec<int32>(), &filter_shape));
       TensorShape input_shape = input_tensor.shape();
       TensorShape obp_shape = obp_tensor.shape();
 
@@ -481,26 +480,27 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
 
       // Get forward convolution parameters.
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(
-          input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims,
-          &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l,
-          &padding_r);
+      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
+                                         &fwd_input_dims, &fwd_filter_dims,
+                                         &strides,
+                                         &fwd_output_dims_tf_order,
+                                         &fwd_output_dims,
+                                         &padding_l, &padding_r);
       if (!context->status().ok()) return;
 
       // Create Convolution forward descriptor since Convolution backward
       // API needs it. For that, we first need to create input, filter
       // and output memory descriptors.
       auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md =
-          memory::desc(fwd_input_dims, MklDnnType<T>(), mkl_data_format);
-      auto fwd_filter_md =
-          memory::desc(fwd_filter_dims, MklDnnType<T>(), memory::format::hwio);
-      auto fwd_out_md =
-          memory::desc(fwd_output_dims, MklDnnType<T>(), mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(
-          prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md,
-          fwd_out_md, strides, padding_l, padding_r,
-          TFPaddingToMklDnnPadding(padding_));
+      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
+                                        memory::format::hwio);
+      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
       auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
 
       // Allocate output tensor and shape
@@ -537,22 +537,23 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
       output.SetOpMemDesc(bwd_output_dims, memory::format::any);
 
       // Create convolution backward weights primitive.
-      auto bwd_desc = convolution_backward_weights::desc(
-          convolution_direct, input.GetOpMemDesc(), output.GetOpMemDesc(),
-          outbackprop.GetOpMemDesc(), strides, padding_l, padding_r,
-          TFPaddingToMklDnnPadding(padding_));
+      auto bwd_desc = convolution_backward_weights::desc(convolution_direct,
+                          input.GetOpMemDesc(), output.GetOpMemDesc(),
+                          outbackprop.GetOpMemDesc(), strides, padding_l,
+                          padding_r, TFPaddingToMklDnnPadding(padding_));
 
-      auto bwd_pd = convolution_backward_weights::primitive_desc(
-          bwd_desc, cpu_engine, fwd_pd);
+      auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
+                                                              cpu_engine,
+                                                              fwd_pd);
 
       PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output);
-    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
-      OP_REQUIRES_OK(
-          context,
-          errors::Aborted("Operation received an exception:", error_msg));
+    } catch (mkldnn::error &e) {
+     string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
+                                            error_msg));
     }
   }
 
@@ -563,8 +564,9 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
-      const convolution_backward_weights::primitive_desc& conv_pd,
-      MklDnnData<T>* input, MklDnnData<T>* obp, MklDnnData<T>* output) {
+                  const convolution_backward_weights::primitive_desc& conv_pd,
+                  MklDnnData<T>* input, MklDnnData<T>* obp,
+                  MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
@@ -575,10 +577,10 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
     // output side, we will prepare reorder primitive in case output
     // reorder to user memory is required.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-        conv_pd.diff_weights_primitive_desc());
+                                      conv_pd.diff_weights_primitive_desc());
 
-    net.push_back(convolution_backward_weights(
-        conv_pd, input->GetOpMem(), obp->GetOpMem(), output->GetOpMem()));
+    net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                    obp->GetOpMem(), output->GetOpMem()));
 
     // Insert reorder primitive in the net for output reorder if reorder is
     // required.
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index 4b6bf92e42..4a47d0463e 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -23,8 +23,6 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 #include <algorithm>
 #include <vector>
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -43,16 +41,18 @@ limitations under the License.
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
 #include "tensorflow/core/util/work_sharder.h"
+#include "mkl_dnn.h"
+#include "mkl_dnn_types.h"
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::prop_kind;
 using mkldnn::stream;
+using mkldnn::prop_kind;
 
-using mkldnn::convolution_backward_data;
-using mkldnn::convolution_direct;
 using mkldnn::convolution_forward;
+using mkldnn::convolution_direct;
+using mkldnn::convolution_backward_data;
 #endif
 
 namespace tensorflow {
@@ -397,13 +397,12 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 
       // Generate input shape.
       TensorShape input_shape;
-      OP_REQUIRES(
-          context, TensorShapeUtils::IsVector(input_tensor.shape()),
-          errors::InvalidArgument(
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
+        errors::InvalidArgument(
               "Conv2DBackpropInput: input_sizes input must be 1-dim, not ",
               input_tensor.dims()));
       OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                  input_tensor.vec<int32>(), &input_shape));
+                        input_tensor.vec<int32>(), &input_shape));
       TensorShape filter_shape = filter_tensor.shape();
       TensorShape obp_shape = obp_tensor.shape();
 
@@ -415,26 +414,27 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 
       // Get forward convolution parameters.
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(
-          input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims,
-          &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l,
-          &padding_r);
+      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
+                                         &fwd_input_dims, &fwd_filter_dims,
+                                         &strides,
+                                         &fwd_output_dims_tf_order,
+                                         &fwd_output_dims,
+                                         &padding_l, &padding_r);
       if (!context->status().ok()) return;
 
       // Create Convolution forward descriptor since Convolution backward
       // API needs it. For that, we first need to create input, filter
       // and output memory descriptors.
       auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md =
-          memory::desc(fwd_input_dims, MklDnnType<T>(), mkl_data_format);
-      auto fwd_filter_md =
-          memory::desc(fwd_filter_dims, MklDnnType<T>(), memory::format::hwio);
-      auto fwd_out_md =
-          memory::desc(fwd_output_dims, MklDnnType<T>(), mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(
-          prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md,
-          fwd_out_md, strides, padding_l, padding_r,
-          TFPaddingToMklDnnPadding(padding_));
+      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
+                                        memory::format::hwio);
+      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
       auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
 
       // Allocate output tensor and shape
@@ -475,22 +475,23 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
       output.SetOpMemDesc(bwd_output_dims, memory::format::any);
 
       // Create convolution backward data primitive.
-      auto bwd_desc = convolution_backward_data::desc(
-          convolution_direct, output.GetOpMemDesc(), filter.GetOpMemDesc(),
-          outbackprop.GetOpMemDesc(), strides, padding_l, padding_r,
-          TFPaddingToMklDnnPadding(padding_));
+      auto bwd_desc = convolution_backward_data::desc(convolution_direct,
+                          output.GetOpMemDesc(), filter.GetOpMemDesc(),
+                          outbackprop.GetOpMemDesc(), strides, padding_l,
+                          padding_r, TFPaddingToMklDnnPadding(padding_));
 
-      auto bwd_pd = convolution_backward_data::primitive_desc(
-          bwd_desc, cpu_engine, fwd_pd);
+      auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
+                                                              cpu_engine,
+                                                              fwd_pd);
 
       PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output);
-    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
-      OP_REQUIRES_OK(
-          context,
-          errors::Aborted("Operation received an exception:", error_msg));
+    } catch (mkldnn::error &e) {
+     string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
+                                            error_msg));
     }
   }
 
@@ -501,8 +502,9 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
-      const convolution_backward_data::primitive_desc& conv_pd,
-      MklDnnData<T>* filter, MklDnnData<T>* obp, MklDnnData<T>* output) {
+                  const convolution_backward_data::primitive_desc& conv_pd,
+                  MklDnnData<T>* filter, MklDnnData<T>* obp,
+                  MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
@@ -512,11 +514,11 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
     // Memory for output of convolution. Since we may need reorder on the
     // output side, we will prepare reorder primitive in case output
     // reorder to user memory is required.
-    bool output_reorder_required =
-        output->PrepareReorderToUserMemIfReq(conv_pd.diff_src_primitive_desc());
+    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
+                                      conv_pd.diff_src_primitive_desc());
 
-    net.push_back(convolution_backward_data(
-        conv_pd, obp->GetOpMem(), filter->GetOpMem(), output->GetOpMem()));
+    net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(),
+                                    filter->GetOpMem(), output->GetOpMem()));
 
     // Insert reorder primitive in the net for output reorder if reorder is
     // required.
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index 369f632fb4..a9872b8d6d 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <string.h>
 #include <map>
-#include <string>
 #include <vector>
+#include <string>
 
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -46,11 +46,11 @@ limitations under the License.
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::prop_kind;
 using mkldnn::stream;
+using mkldnn::prop_kind;
 
-using mkldnn::convolution_direct;
 using mkldnn::convolution_forward;
+using mkldnn::convolution_direct;
 #endif
 
 namespace tensorflow {
@@ -523,16 +523,19 @@ class MklConv2DOp : public OpKernel {
 
       // Get shapes of input tensors in MKL-DNN order
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(
-          src_tensor.shape(), filter_tensor.shape(), &src_dims, &filter_dims,
-          &strides, &output_dims_tf_order, &output_dims_mkl_order, &padding_l,
-          &padding_r);
+      conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(),
+                                         filter_tensor.shape(),
+                                         &src_dims, &filter_dims, &strides,
+                                         &output_dims_tf_order,
+                                         &output_dims_mkl_order, &padding_l,
+                                         &padding_r);
       if (!context->status().ok()) return;
 
       // Check for corner case - if there is nothing to compute, return.
-      TensorShape tf_output_shape(
-          {output_dims_tf_order[0], output_dims_tf_order[1],
-           output_dims_tf_order[2], output_dims_tf_order[3]});
+      TensorShape tf_output_shape({output_dims_tf_order[0],
+                                output_dims_tf_order[1],
+                                output_dims_tf_order[2],
+                                output_dims_tf_order[3]});
       Tensor* output_tensor = nullptr;
       MklShape mkl_output_mkl_shape;
       mkl_output_mkl_shape.SetMklTensor(false);
@@ -569,13 +572,13 @@ class MklConv2DOp : public OpKernel {
       // the layout is Tensorflow's layout (NHWC or NCHW depending on data
       // format).
       src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_),
-                    const_cast<void*>(
-                        static_cast<const void*>(src_tensor.flat<T>().data())));
+                    const_cast<void*>(static_cast<const void*>(
+                    src_tensor.flat<T>().data())));
       // Although filter shape (filter_dims) required is in MKL-DNN order,
       // the layout is Tensorflow's layout (HWIO).
       filter.SetUsrMem(filter_dims, memory::format::hwio,
                        const_cast<void*>(static_cast<const void*>(
-                           filter_tensor.flat<T>().data())));
+                       filter_tensor.flat<T>().data())));
       // Although output shape (output_dims) required is in MKL-DNN order,
       // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
       output.SetUsrMem(output_dims_mkl_order,
@@ -595,36 +598,36 @@ class MklConv2DOp : public OpKernel {
         const Tensor& bias_tensor = MklGetInput(context, 2);
         bias.SetUsrMem(bias_size, memory::format::x,
                        const_cast<void*>(static_cast<const void*>(
-                           bias_tensor.flat<T>().data())));
+                       bias_tensor.flat<T>().data())));
         bias.SetOpMemDesc(bias_size, memory::format::any);
 
         // Create convolution primitive with Bias.
-        auto conv_desc = convolution_forward::desc(
-            prop_kind::forward, convolution_direct, src.GetOpMemDesc(),
-            filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(),
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+        auto conv_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(),
+            bias.GetOpMemDesc(), output.GetOpMemDesc(), strides,
+            padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
 
-        auto conv_prim_desc =
-            convolution_forward::primitive_desc(conv_desc, cpu_engine);
+        auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
+                                                                cpu_engine);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output);
       } else {
         // Create convolution primitive without Bias.
-        auto conv_desc = convolution_forward::desc(
-            prop_kind::forward, convolution_direct, src.GetOpMemDesc(),
-            filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l,
-            padding_r, TFPaddingToMklDnnPadding(padding_));
+        auto conv_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(),
+            output.GetOpMemDesc(), strides, padding_l, padding_r,
+            TFPaddingToMklDnnPadding(padding_));
 
-        auto conv_prim_desc =
-            convolution_forward::primitive_desc(conv_desc, cpu_engine);
+        auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
+                                                                cpu_engine);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output);
       }
-    } catch (mkldnn::error& e) {
+    } catch (mkldnn::error &e) {
       string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + std::string(e.message) + ", in file " +
-                         std::string(__FILE__) + ":" + std::to_string(__LINE__);
-      OP_REQUIRES_OK(
-          context,
-          errors::Aborted("Operation received an exception:", error_msg));
+                       ", message: " + std::string(e.message) +
+                       ", in file " + std::string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+        errors::Aborted("Operation received an exception:", error_msg));
     }
   }
 
@@ -635,9 +638,9 @@ class MklConv2DOp : public OpKernel {
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecuteNet(
-      const convolution_forward::primitive_desc& conv_prim_desc,
-      MklDnnData<T>* src, MklDnnData<T>* filter, MklDnnData<T>* bias,
-      MklDnnData<T>* output) {
+                  const convolution_forward::primitive_desc& conv_prim_desc,
+                  MklDnnData<T>* src, MklDnnData<T>* filter,
+                  MklDnnData<T>* bias, MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
@@ -648,19 +651,18 @@ class MklConv2DOp : public OpKernel {
     // output side, we will prepare reorder primitive in case output
     // reorder to user memory is required.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-        conv_prim_desc.dst_primitive_desc());
+                                      conv_prim_desc.dst_primitive_desc());
 
     // Create convolution primitive and add it to net.
     if (bias) {
       CHECK_EQ(biasEnabled, true);
       net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(),
-                                        filter->GetOpMem(), bias->GetOpMem(),
-                                        output->GetOpMem()));
+                                    filter->GetOpMem(), bias->GetOpMem(),
+                                    output->GetOpMem()));
     } else {
       CHECK_EQ(biasEnabled, false);
       net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(),
-                                        filter->GetOpMem(),
-                                        output->GetOpMem()));
+                                    filter->GetOpMem(), output->GetOpMem()));
     }
 
     // Insert reorder primitive in the net for output reorder if reorder is
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index e29af19ca9..f0cb37f8a4 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
 #define TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
 
-#include <limits>
 #include <vector>
+#include <limits>
 
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -26,8 +26,8 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_slice.h"
 #include "tensorflow/core/kernels/bounds_check.h"
-#include "tensorflow/core/kernels/conv_grad_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/conv_grad_ops.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/lib/strings/numbers.h"
@@ -49,15 +49,15 @@ namespace tensorflow {
 
 class MklDnnConvUtil {
  protected:
-  OpKernelContext *context_;  // We don't own this.
+  OpKernelContext* context_;  // We don't own this.
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
 
  public:
-  MklDnnConvUtil(OpKernelContext *context, const std::vector<int32> &strides,
-                 Padding pad, TensorFormat fm)
-      : context_(context), strides_(strides), padding_(pad), data_format_(fm) {}
+  MklDnnConvUtil(OpKernelContext* context, const std::vector<int32>& strides,
+                 Padding pad, TensorFormat fm) : context_(context),
+    strides_(strides), padding_(pad), data_format_(fm) {}
 
   virtual ~MklDnnConvUtil() { context_ = nullptr; }
 
@@ -75,14 +75,14 @@ class MklDnnConvUtil {
   // requires input in NCHW format. Function does not return anything.
   // But errors arising from sanity checks are returned in context's
   // status.
-  virtual inline void GetInputSizeInMklOrder(const TensorShape &input_shape,
-                                             memory::dims *input_dims) {
-#define CHECK_BOUNDS(val, err_msg)                                     \
-  do {                                                                 \
-    OP_REQUIRES(context_,                                              \
-                FastBoundsCheck(val, std::numeric_limits<int>::max()), \
-                errors::InvalidArgument(err_msg));                     \
-  } while (0)
+  virtual inline void
+  GetInputSizeInMklOrder(const TensorShape& input_shape,
+                         memory::dims *input_dims) {
+  #define CHECK_BOUNDS(val, err_msg) do {                     \
+    OP_REQUIRES(context_, FastBoundsCheck(val,                \
+                            std::numeric_limits<int>::max()), \
+                errors::InvalidArgument(err_msg));            \
+  }while(0)
 
     CHECK_NOTNULL(input_dims);
 
@@ -105,7 +105,7 @@ class MklDnnConvUtil {
     CHECK_BOUNDS(input_batch_raw, "Input batch too large");
     int input_batch = static_cast<int>(input_batch_raw);
 
-#undef CHECK_BOUNDS
+  #undef CHECK_BOUNDS
 
     // MKL-DNN always requires input in NCHW format.
     *input_dims = {input_batch, input_depth, input_rows, input_cols};
@@ -125,9 +125,10 @@ class MklDnnConvUtil {
   // forward gets actual tensor as input).
   //
   // TODO(nhasabni): Add similar function for input and filter in MklShape.
-  virtual inline void GetFilterSizeInMklOrder(const TensorShape &input_shape,
-                                              const TensorShape &filter_shape,
-                                              memory::dims *filter_dims) {
+  virtual inline void
+  GetFilterSizeInMklOrder(const TensorShape& input_shape,
+                          const TensorShape& filter_shape,
+                          memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
 
     OP_REQUIRES(context_, filter_shape.dims() == 4,
@@ -135,18 +136,17 @@ class MklDnnConvUtil {
                                         filter_shape.DebugString()));
 
     for (int i = 0; i < 3; i++) {
-      OP_REQUIRES(context_,
-                  FastBoundsCheck(filter_shape.dim_size(i),
-                                  std::numeric_limits<int>::max()),
-                  errors::InvalidArgument("filter too large"));
+      OP_REQUIRES(context_, FastBoundsCheck(filter_shape.dim_size(i),
+                                           std::numeric_limits<int>::max()),
+                errors::InvalidArgument("filter too large"));
     }
 
     int input_depth = GetTensorDim(input_shape, data_format_, 'C');
 
-    OP_REQUIRES(context_, input_depth == filter_shape.dim_size(2),
-                errors::InvalidArgument(
-                    "input and filter must have the same depth: ", input_depth,
-                    " vs ", filter_shape.dim_size(2)));
+    OP_REQUIRES(
+        context_, input_depth == filter_shape.dim_size(2),
+        errors::InvalidArgument("input and filter must have the same depth: ",
+                                input_depth, " vs ", filter_shape.dim_size(2)));
 
     // TF filter is always in (rows, cols, in_depth, out_depth) order.
     int filter_rows = static_cast<int>(filter_shape.dim_size(0));
@@ -163,25 +163,25 @@ class MklDnnConvUtil {
   // requires filter in OIHW format. Function does not return anything.
   // But errors arising from sanity checks are returned in context's
   // status.
-  virtual inline void GetFilterSizeInMklOrder(size_t src_index,
-                                              size_t filter_index,
-                                              memory::dims *filter_dims) {
+  virtual inline void
+  GetFilterSizeInMklOrder(size_t src_index, size_t filter_index,
+                          memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
-    const Tensor &input = MklGetInput(context_, src_index);
-    const Tensor &filter = MklGetInput(context_, filter_index);
+    const Tensor& input = MklGetInput(context_, src_index);
+    const Tensor& filter = MklGetInput(context_, filter_index);
     GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims);
   }
 
   // Calculate Bias size for 2D Convolution. Function does not return
   // anything, but sets error in context status.
-  virtual inline void GetBiasSizeInMklOrder(size_t bias_index,
-                                            memory::dims *bias_dims) {
-    const Tensor &bias = MklGetInput(context_, bias_index);
+  virtual inline void
+  GetBiasSizeInMklOrder(size_t bias_index, memory::dims *bias_dims) {
+    const Tensor& bias = MklGetInput(context_, bias_index);
     OP_REQUIRES(context_, bias.dims() == 1,
                 errors::InvalidArgument("bias must be 1-dimensional: ",
                                         bias.shape().DebugString()));
 
-    *bias_dims = {static_cast<int>(bias.dim_size(0))};
+    *bias_dims = { static_cast<int>(bias.dim_size(0)) };
   }
 
   // Function to calculate output and padding size for 2D convolution.
@@ -193,11 +193,13 @@ class MklDnnConvUtil {
   // status is returned via context status.
   //
   // TODO(nhasabni): Add similar function for input and filter in MklShape.
-  virtual inline void GetOutputAndPadSizeInMklOrder(
-      const TensorShape &input_shape, const TensorShape &filter_shape,
-      const memory::dims &strides, memory::dims *output_dims_tf_order,
-      memory::dims *output_dims_mkl_order, memory::dims *pad_l,
-      memory::dims *pad_r) {
+  virtual inline void
+  GetOutputAndPadSizeInMklOrder(const TensorShape& input_shape,
+                                const TensorShape& filter_shape,
+                                const memory::dims& strides,
+                                memory::dims *output_dims_tf_order,
+                                memory::dims *output_dims_mkl_order,
+                                memory::dims *pad_l, memory::dims *pad_r) {
     CHECK_NOTNULL(output_dims_tf_order);
     CHECK_NOTNULL(output_dims_mkl_order);
     CHECK_NOTNULL(pad_l);
@@ -223,21 +225,21 @@ class MklDnnConvUtil {
     int64 out_rows = 0, out_cols = 0;
     int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right;
 
-    OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose(
-                                 input_rows, filter_rows, stride_rows, padding_,
-                                 &out_rows, &pad_top, &pad_bottom));
-    OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose(
-                                 input_cols, filter_cols, stride_cols, padding_,
-                                 &out_cols, &pad_left, &pad_right));
+    OP_REQUIRES_OK(context_,
+            GetWindowedOutputSizeVerbose(input_rows, filter_rows, stride_rows,
+                                 padding_, &out_rows, &pad_top, &pad_bottom));
+    OP_REQUIRES_OK(context_,
+            GetWindowedOutputSizeVerbose(input_cols, filter_cols, stride_cols,
+                                 padding_, &out_cols, &pad_left, &pad_right));
 
     // Tensorflow output is in data_format order. (NHWC or NCHW)
-    TensorShape out_shape =
-        ShapeFromFormat(data_format_, out_batch, out_rows, out_cols, out_depth);
+    TensorShape out_shape = ShapeFromFormat(data_format_, out_batch,
+                                            out_rows, out_cols, out_depth);
     *output_dims_tf_order = TFShapeToMklDnnDims(out_shape);
 
     // MKL-DNN always needs output in NCHW format.
     *output_dims_mkl_order = {out_batch, out_depth, static_cast<int>(out_rows),
-                              static_cast<int>(out_cols)};
+                   static_cast<int>(out_cols)};
 
     // Now handle padding. MKL-DNN uses asymetric padding.
     *pad_l = {static_cast<int>(pad_top), static_cast<int>(pad_left)};
@@ -248,25 +250,27 @@ class MklDnnConvUtil {
   // See comment on GetConvOutputAndPadSizeInMklOrder for parameters.
   //
   // Function does not return anything, but sets error in context status.
-  inline void GetOutputAndPadSizeInMklOrder(
-      size_t src_index, size_t filter_index, const memory::dims &strides,
-      memory::dims *output_dims_tf_order, memory::dims *output_dims_mkl_order,
-      memory::dims *pad_l, memory::dims *pad_r) {
+  inline void
+  GetOutputAndPadSizeInMklOrder(size_t src_index, size_t filter_index,
+                                const memory::dims& strides,
+                                memory::dims *output_dims_tf_order,
+                                memory::dims *output_dims_mkl_order,
+                                memory::dims *pad_l, memory::dims *pad_r) {
     CHECK_NOTNULL(output_dims_tf_order);
     CHECK_NOTNULL(output_dims_mkl_order);
     CHECK_NOTNULL(pad_l);
     CHECK_NOTNULL(pad_r);
 
-    const Tensor &input = MklGetInput(context_, src_index);
-    const Tensor &filter = MklGetInput(context_, filter_index);
+    const Tensor& input = MklGetInput(context_, src_index);
+    const Tensor& filter = MklGetInput(context_, filter_index);
 
     OP_REQUIRES(context_, input.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
-                                        input.shape().DebugString()));
+                                          input.shape().DebugString()));
 
-    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(), strides,
-                                  output_dims_tf_order, output_dims_mkl_order,
-                                  pad_l, pad_r);
+    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(),
+                                  strides, output_dims_tf_order,
+                                  output_dims_mkl_order, pad_l, pad_r);
   }
 
   // Wrapper function to calculate input, filter, and output sizes of
@@ -275,12 +279,15 @@ class MklDnnConvUtil {
   // also calculates strides and paddings for 2D Convolution.
   //
   // Function does not return anything, but sets error in context status.
-  inline void GetConvFwdSizesInMklOrder(
-      const TensorShape &input_shape, const TensorShape &filter_shape,
-      memory::dims *input_dims, memory::dims *filter_dims,
-      memory::dims *strides, memory::dims *output_dims_tf_order,
-      memory::dims *output_dims_mkl_order, memory::dims *pad_l,
-      memory::dims *pad_r) {
+  inline void GetConvFwdSizesInMklOrder(const TensorShape& input_shape,
+                                        const TensorShape& filter_shape,
+                                        memory::dims *input_dims,
+                                        memory::dims *filter_dims,
+                                        memory::dims *strides,
+                                        memory::dims *output_dims_tf_order,
+                                        memory::dims *output_dims_mkl_order,
+                                        memory::dims *pad_l,
+                                        memory::dims *pad_r) {
     CHECK_NOTNULL(input_dims);
     CHECK_NOTNULL(filter_dims);
     CHECK_NOTNULL(strides);
@@ -295,7 +302,8 @@ class MklDnnConvUtil {
     if (!context_->status().ok()) return;
     GetStridesInMklOrder(strides);
     GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides,
-                                  output_dims_tf_order, output_dims_mkl_order,
+                                  output_dims_tf_order,
+                                  output_dims_mkl_order,
                                   pad_l, pad_r);
     if (!context_->status().ok()) return;
   }
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h
index a240ee44fb..0a5be4fec9 100644
--- a/tensorflow/core/kernels/mkl_tfconv_op.h
+++ b/tensorflow/core/kernels/mkl_tfconv_op.h
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifdef INTEL_MKL
-
 #ifndef TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
 #define TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
 
+#ifdef INTEL_MKL
+
 #include <algorithm>
 #include <vector>
 #include "tensorflow/core/framework/numeric_op.h"
@@ -35,6 +35,10 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
@@ -57,6 +61,71 @@ class MklToTfOp : public OpKernel {
     VLOG(1) << "MKLToTFConversion complete successfully.";
   }
 
+#ifdef INTEL_MKL_DNN
+  static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context,
+                             string data_format_str, DataType op_data_type,
+                             bool has_avx512f, uint input_number) {
+    try {
+      // Check that input tensor is in MKL format.
+      const Tensor& input_tensor = MklGetInput(context, input_number);
+      MklDnnShape input_shape;
+      GetMklShape(context, input_number, &input_shape);
+
+      // if input is already in Tf format, then copy input tensor to output.
+      if (!input_shape.IsMklTensor()) {
+        context->set_output(input_number, input_tensor);
+        VLOG(1) << "MKLToTFConversion: No conversion needed, "
+                << "copying input to output";
+        return;
+      }
+
+      // Check that input data type is same as operator data type and that it
+      // is same as output data type.
+      DataType input_data_type = op_kernel->input_type(input_number);
+      DataType output_data_type = op_kernel->output_type(input_number);
+      CHECK_EQ(op_data_type, input_data_type);
+      CHECK_EQ(op_data_type, output_data_type);
+
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> input(&cpu_engine);
+
+      // Get Mkl layout of input tensor.
+      auto input_mkl_md = input_shape.GetMklLayout();
+      // Get TensorFlow layout of input tensor. Expected output of conversion
+      // has same layout as Tensorflow layout of input tensor.
+      auto output_tf_md = input_shape.GetTfLayout();
+      auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine);
+      // Set input Mkl layout as the user layout.
+      input.SetUsrMem(input_mkl_md, &input_tensor);
+
+      // Allocate output tensor.
+      TensorShape output_shape = input_shape.GetTfShape();
+      Tensor* output_tensor = NULL;
+      OP_REQUIRES_OK(context, context->allocate_output(input_number,
+                                  output_shape, &output_tensor));
+      CHECK_NOTNULL(output_tensor);
+
+      // Do we need to reorder Mkl layout into TensorFlow layout?
+      if (input.IsReorderNeeded(output_tf_pd)) {
+        // Insert reorder between Mkl layout and TensorFlow layout.
+        std::vector<primitive> net;
+        CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, output_tensor, &net),
+                 true);
+        stream(stream::kind::eager).submit(net).wait();
+      } else {
+        // If not, just forward input tensor to output tensor.
+        CHECK(output_tensor->CopyFrom(input_tensor, output_shape));
+      }
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + std::string(e.message) +
+                       ", in file " + std::string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+        errors::Aborted("Operation received an exception:", error_msg));
+    }
+  }
+#else
   static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context,
                              string data_format_str, DataType op_data_type,
                              bool has_avx512f, uint input_number) {
@@ -91,8 +160,8 @@ class MklToTfOp : public OpKernel {
 
     // Allocate output tensor.
     Tensor* output_tensor = NULL;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(input_number, output_shape, &output_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(input_number,
+                              output_shape, &output_tensor));
 
     dnnLayout_t output_layout =
         static_cast<dnnLayout_t>(input_shape.GetTfLayout());
@@ -106,6 +175,7 @@ class MklToTfOp : public OpKernel {
                                      output_buffer);
     VLOG(1) << "MKLToTFConversion complete successfully.";
   }
+#endif
 
  private:
   /// Data format of the operation
@@ -132,5 +202,5 @@ class MklToTfOp : public OpKernel {
 TF_CALL_NUMBER_TYPES(REGISTER_CPU);
 #undef REGISTER_CPU
 }  // namespace tensorflow
-#endif  // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
 #endif  // INTEL_MKL
+#endif  // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc
index 7dee751c4f..ac90f67ce0 100644
--- a/tensorflow/core/kernels/pooling_ops_common.cc
+++ b/tensorflow/core/kernels/pooling_ops_common.cc
@@ -143,7 +143,7 @@ void DnnPoolingOp<T>::Compute(
     perftools::gputools::dnn::PoolingMode pooling_mode,
     const std::vector<int32>& size, const std::vector<int32>& stride,
     Padding padding, TensorFormat data_format, const Tensor& tensor_in,
-    const TensorShape& tensor_out_shape) {
+    const TensorShape& tensor_out_shape, bool propagate_nans) {
   Tensor* tensor_out = nullptr;
   OP_REQUIRES_OK(context,
                  context->allocate_output(0, tensor_out_shape, &tensor_out));
@@ -188,7 +188,8 @@ void DnnPoolingOp<T>::Compute(
       .set_vertical_stride(params.row_stride)
       .set_horizontal_stride(params.col_stride)
       .set_vertical_padding(params.pad_rows)
-      .set_horizontal_padding(params.pad_cols);
+      .set_horizontal_padding(params.pad_cols)
+      .set_propagate_nans(propagate_nans);
 
   perftools::gputools::dnn::BatchDescriptor input_desc;
   input_desc.set_count(params.tensor_in_batch)
@@ -237,7 +238,7 @@ void DnnPoolingGradOp<T>::Compute(
     const std::vector<int32>& size, const std::vector<int32>& stride,
     Padding padding, TensorFormat data_format, const Tensor* tensor_in,
     const Tensor* tensor_out, const Tensor& out_backprop,
-    const TensorShape& tensor_in_shape) {
+    const TensorShape& tensor_in_shape, bool propagate_nans) {
   CHECK((pooling_mode != perftools::gputools::dnn::PoolingMode::kMaximum) ||
         (tensor_in && tensor_out))
       << "For MaxPoolGrad, both tensor_in and tensor_out needs to be "
@@ -327,7 +328,8 @@ void DnnPoolingGradOp<T>::Compute(
       .set_vertical_stride(params.row_stride)
       .set_horizontal_stride(params.col_stride)
       .set_vertical_padding(params.pad_rows)
-      .set_horizontal_padding(params.pad_cols);
+      .set_horizontal_padding(params.pad_cols)
+      .set_propagate_nans(propagate_nans);
 
   perftools::gputools::dnn::BatchDescriptor orig_output_desc;
   orig_output_desc.set_count(params.tensor_in_batch)
diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h
index b594f39fad..1458456585 100644
--- a/tensorflow/core/kernels/pooling_ops_common_gpu.h
+++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h
@@ -44,7 +44,7 @@ class DnnPoolingOp {
                       const std::vector<int32>& size,
                       const std::vector<int32>& stride, Padding padding,
                       TensorFormat data_format, const Tensor& tensor_in,
-                      const TensorShape& tensor_out_shape);
+                      const TensorShape& tensor_out_shape, bool propagate_nans);
 };
 
 // A helper class that launch the cudnn pooling backward operations.
@@ -60,7 +60,7 @@ class DnnPoolingGradOp {
                       const std::vector<int32>& stride, Padding padding,
                       TensorFormat data_format, const Tensor* tensor_in,
                       const Tensor* tensor_out, const Tensor& out_backprop,
-                      const TensorShape& tensor_in_shape);
+                      const TensorShape& tensor_in_shape, bool propagate_nans);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc
index 8be0c56798..337c8e5c17 100644
--- a/tensorflow/core/kernels/quantized_add_op.cc
+++ b/tensorflow/core/kernels/quantized_add_op.cc
@@ -489,7 +489,7 @@ class QuantizedAddOp : public OpKernel {
     // adding zero leaves the result unchanged, and to contain the largest of
     // the two input values with some room to spare.
     const float smallest_min = std::min(min_x, min_y);
-    const float largest_max = std::min(max_x, max_y);
+    const float largest_max = std::max(max_x, max_y);
     const float biggest_range =
         std::max(std::abs(smallest_min), std::abs(largest_max));
     const float output_range = (biggest_range * (1 << 14));
diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index a37c757865..55a8b9c9b6 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -577,7 +577,7 @@ struct FillPhiloxRandomKernel<Distribution, false> {
     const size_t kGroupSize = Distribution::kResultElementCount;
 
     const size_t item_id = item.get_global(0);
-    const size_t total_item_count = item.get_global_range(0);
+    const size_t total_item_count = item.get_global_range();
     size_t offset = item_id * kGroupSize;
     gen_.Skip(item_id);
 
@@ -633,7 +633,7 @@ struct FillPhiloxRandomKernel<Distribution, true> {
                                                 PhiloxRandom::kResultElementCount;
 
     const size_t item_id = item.get_global(0);
-    const size_t total_item_count = item.get_global_range(0);
+    const size_t total_item_count = item.get_global_range();
     size_t group_index = item_id;
     size_t offset = group_index * kGroupSize;
 
diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc
index 4302a68a18..2334e50f1d 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.cc
+++ b/tensorflow/core/kernels/segment_reduction_ops.cc
@@ -376,6 +376,9 @@ struct UnsortedSegmentSumFunctor<CPUDevice, T, Index>
     auto data_flat = typename TTypes<T, 2>::ConstTensor(data, N, data_size / N);
     for (int64 i = 0; i < N; ++i) {
       Index j = internal::SubtleMustCopy(segment_ids(i));
+      if (j < 0) {
+        continue;
+      }
       OP_REQUIRES(ctx, FastBoundsCheck(j, output_rows),
                   errors::InvalidArgument(
                       "segment_ids", SliceDebugString(segment_ids_shape, i),
diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h
index 412c1d601d..b10bea72ba 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.h
+++ b/tensorflow/core/kernels/segment_reduction_ops.h
@@ -30,14 +30,14 @@ namespace functor {
 #ifdef GOOGLE_CUDA
 typedef Eigen::GpuDevice GPUDevice;
 // Functor for SegmentSumGPUOp.
-// 'output_rows': the number of output segments (unique segment ids in
+// output_rows: the number of output segments (unique segment ids in
 //                'segment_ids').
-// 'segment_ids_shape': shape of 'segment_ids' tensor.
-// 'segment_ids': unsorted map from input to output segment ids at which to
+// segment_ids_shape: shape of 'segment_ids' tensor.
+// segment_ids: unsorted map from input to output segment ids at which to
 //                perform segment sum operation.
-// 'data_size': size of input data tensor.
-// 'data': input data tensor.
-// 'output': output reshaped to {output_rows, output.size/output_rows}
+// data_size: size of input data tensor.
+// data: input data tensor.
+// output: output reshaped to {output_rows, output.size/output_rows}
 template <typename T, typename Index>
 struct SegmentSumFunctor {
   void operator()(OpKernelContext* ctx, const GPUDevice& d,
@@ -61,14 +61,14 @@ struct UnsortedSegmentBaseFunctor{
 };
 
 // Functor for UnsortedSegmentSumOp.
-// 'output_rows': the number of output segments (unique segment ids in
+// output_rows: the number of output segments (unique segment ids in
 //                'segment_ids').
-// 'segment_ids_shape': shape of 'segment_ids' tensor.
-// 'segment_ids': unsorted map from input to output segment ids at which to
+// segment_ids_shape: shape of 'segment_ids' tensor.
+// segment_ids: unsorted map from input to output segment ids at which to
 //                perform segment sum operation.
-// 'data_size': size of input data tensor.
-// 'data': input data tensor.
-// 'output': output reshaped to {output_rows, output.size/output_rows}
+// data_size: size of input data tensor.
+// data: input data tensor.
+// output: output reshaped to {output_rows, output.size/output_rows}
 template <typename Device, typename T, typename Index>
 struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor<Device, T, Index> {
   void operator()(OpKernelContext* ctx, const Device& d,
@@ -79,14 +79,14 @@ struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor<Device, T, I
 };
 
 // Functor for UnsortedSegmentMaxOp.
-// 'output_rows': the number of output segments (unique segment ids in
+// output_rows: the number of output segments (unique segment ids in
 //                'segment_ids').
-// 'segment_ids_shape': shape of 'segment_ids' tensor.
-// 'segment_ids': unsorted map from input to output segment ids at which to
+// segment_ids_shape: shape of 'segment_ids' tensor.
+// segment_ids: unsorted map from input to output segment ids at which to
 //                perform segment sum operation.
-// 'data_size': size of input data tensor.
-// 'data': input data tensor.
-// 'output': output reshaped to {output_rows, output.size/output_rows}
+// data_size: size of input data tensor.
+// data: input data tensor.
+// output: output reshaped to {output_rows, output.size/output_rows}
 template <typename Device, typename T, typename Index>
 struct UnsortedSegmentMaxFunctor: public UnsortedSegmentBaseFunctor<Device, T, Index> {
   void operator()(OpKernelContext* ctx, const Device& d,
diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc
index 721f9b949b..28a39bae3f 100644
--- a/tensorflow/core/kernels/shape_ops.cc
+++ b/tensorflow/core/kernels/shape_ops.cc
@@ -341,7 +341,12 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                             .Device(DEVICE_CPU)
                             .HostMemory("dim")
                             .TypeConstraint<int32>("Tdim"),
-                        ExpandDimsOp);
+                        ExpandDimsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("ExpandDims")
+                            .Device(DEVICE_CPU)
+                            .HostMemory("dim")
+                            .TypeConstraint<int64>("Tdim"),
+                        ExpandDimsOp<int64>);
 
 #if GOOGLE_CUDA
 #define REGISTER_GPU_KERNEL(type)                            \
@@ -350,7 +355,13 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                               .TypeConstraint<type>("T")     \
                               .TypeConstraint<int32>("Tdim") \
                               .HostMemory("dim"),            \
-                          ExpandDimsOp);
+                          ExpandDimsOp<int32>);              \
+  REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
+                              .Device(DEVICE_GPU)            \
+                              .TypeConstraint<type>("T")     \
+                              .TypeConstraint<int64>("Tdim") \
+                              .HostMemory("dim"),            \
+                          ExpandDimsOp<int64>);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
 TF_CALL_bool(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
@@ -362,7 +373,15 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                             .HostMemory("input")
                             .HostMemory("dim")
                             .HostMemory("output"),
-                        ExpandDimsOp);
+                        ExpandDimsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("ExpandDims")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<int32>("T")
+                            .TypeConstraint<int64>("Tdim")
+                            .HostMemory("input")
+                            .HostMemory("dim")
+                            .HostMemory("output"),
+                        ExpandDimsOp<int64>);
 #endif  // GOOGLE_CUDA
 
 #ifdef TENSORFLOW_USE_SYCL
@@ -372,7 +391,13 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                               .TypeConstraint<type>("T")     \
                               .TypeConstraint<int32>("Tdim") \
                               .HostMemory("dim"),            \
-                          ExpandDimsOp);
+                          ExpandDimsOp<int32>);              \
+  REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
+                              .Device(DEVICE_SYCL)           \
+                              .TypeConstraint<type>("T")     \
+                              .TypeConstraint<int64>("Tdim") \
+                              .HostMemory("dim"),            \
+                          ExpandDimsOp<int64>);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
 TF_CALL_bool(REGISTER_SYCL_KERNEL);
 #undef REGISTER_SYCL_KERNEL
@@ -384,7 +409,15 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                             .HostMemory("input")
                             .HostMemory("dim")
                             .HostMemory("output"),
-                        ExpandDimsOp);
+                        ExpandDimsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("ExpandDims")
+                            .Device(DEVICE_SYCL)
+                            .TypeConstraint<int32>("T")
+                            .TypeConstraint<int64>("Tdim")
+                            .HostMemory("input")
+                            .HostMemory("dim")
+                            .HostMemory("output"),
+                        ExpandDimsOp<int64>);
 #endif  // TENSORFLOW_USE_SYCL
 
 // Squeeze ---------------------------------------
diff --git a/tensorflow/core/kernels/shape_ops.h b/tensorflow/core/kernels/shape_ops.h
index ac607f4e8b..8d9d0ea846 100644
--- a/tensorflow/core/kernels/shape_ops.h
+++ b/tensorflow/core/kernels/shape_ops.h
@@ -145,6 +145,7 @@ class SizeOp : public OpKernel {
   bool IsExpensive() override { return false; }
 };
 
+template <typename Tdim>
 class ExpandDimsOp : public OpKernel {
  public:
   explicit ExpandDimsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -153,7 +154,7 @@ class ExpandDimsOp : public OpKernel {
     OP_REQUIRES(ctx, ctx->input(0).dtype() != DT_VARIANT,
                 errors::InvalidArgument("ExpandDims on Variant not supported"));
 
-    int32 dim = ctx->input(1).flat<int32>()(0);
+    Tdim dim = ctx->input(1).flat<Tdim>()(0);
     OP_REQUIRES(
         ctx, (dim >= -1 - ctx->input(0).dims() && dim <= ctx->input(0).dims()),
         errors::InvalidArgument("Tried to expand dim index ", dim,
@@ -175,7 +176,7 @@ class ExpandDimsOp : public OpKernel {
     }
 
     // Clamp to the end if needed.
-    dim = std::min<int32>(dim, existing_dims_size);
+    dim = std::min<Tdim>(dim, existing_dims_size);
     new_shape.emplace(new_shape.begin() + dim, 1);
     const TensorShape output_shape(new_shape);
 
@@ -234,10 +235,10 @@ class SqueezeOp : public OpKernel {
       if (!wrapped_squeeze_dims.empty()) {
         if (wrapped_squeeze_dims.count(i) > 0) {
           OP_REQUIRES(ctx, existing_dim == 1,
-                      errors::InvalidArgument(
-                          "Tried to explicitly squeeze "
-                          "dimension ",
-                          i, " but dimension was not 1: ", existing_dim));
+                      errors::InvalidArgument("Tried to explicitly squeeze "
+                                              "dimension ",
+                                              i, " but dimension was not 1: ",
+                                              existing_dim));
         } else {
           // This dimension is not being squeezed.
           new_shape.push_back(existing_dim);
diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index d46701749b..28a379774b 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -190,41 +190,25 @@ class SliceOp : public OpKernel {
         }
         return;
       }
-#define HANDLE_DIM(NDIM)                            \
-  if (input_dims == NDIM) {                         \
-    HandleCase<NDIM>(context, begin, size, result); \
-    return;                                         \
+#define HANDLE_DIM(NDIM)                                              \
+  if (input_dims == NDIM) {                                           \
+    functor::Slice<Device, T, NDIM>()(                                \
+        context->eigen_device<Device>(), result, input, begin, size); \
+    return;                                                           \
   }
-
       HANDLE_DIM(1);
       HANDLE_DIM(2);
       HANDLE_DIM(3);
       HANDLE_DIM(4);
       HANDLE_DIM(5);
       HANDLE_DIM(6);
-      HANDLE_DIM(7);
 
 #undef HANDLE_DIM
 
-      OP_REQUIRES(context, false, errors::Unimplemented(
-                                      "SliceOp : Unhandled input dimensions"));
-    }
-  }
-
- private:
-  template <int NDIM>
-  void HandleCase(OpKernelContext* context, const gtl::ArraySlice<int64>& begin,
-                  const gtl::ArraySlice<int64>& size, Tensor* result) {
-    Eigen::DSizes<Eigen::DenseIndex, NDIM> indices;
-    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes;
-    for (int i = 0; i < NDIM; ++i) {
-      indices[i] = begin[i];
-      sizes[i] = size[i];
+      // handle cases which dim >= 7
+      functor::Slice<Device, T, 7>()(
+          context->eigen_device<Device>(), result, input, begin, size);
     }
-
-    functor::Slice<Device, T, NDIM>()(
-        context->eigen_device<Device>(), result->tensor<T, NDIM>(),
-        context->input(0).tensor<T, NDIM>(), indices, sizes);
   }
 };
 
@@ -264,11 +248,16 @@ class MklSliceOp : public OpKernel {
         }
         return;
       }
-#define HANDLE_DIM(NDIM)                            \
-  if (input_dims == NDIM) {                         \
-    HandleCase<NDIM>(context, begin, size, result); \
-    return;                                         \
-  }
+      // Special case for handling 4-D tensor slice.
+      if (input_dims == 4) {
+        HandleCase4D(context, begin, size, result);
+      } else {
+#define HANDLE_DIM(NDIM)                                                  \
+      if (input_dims == NDIM) {                                           \
+        functor::Slice<Device, T, NDIM>()(                                \
+            context->eigen_device<Device>(), result, input, begin, size); \
+            return;                                                       \
+      }
 
       HANDLE_DIM(1);
       HANDLE_DIM(2);
@@ -276,12 +265,13 @@ class MklSliceOp : public OpKernel {
       HANDLE_DIM(4);
       HANDLE_DIM(5);
       HANDLE_DIM(6);
-      HANDLE_DIM(7);
 
 #undef HANDLE_DIM
 
-      OP_REQUIRES(context, false, errors::Unimplemented(
-                                      "SliceOp : Unhandled input dimensions"));
+        // handle cases which dim >= 7
+        functor::Slice<Device, T, 7>()(
+          context->eigen_device<Device>(), result, input, begin, size);
+      }
     }
   }
 
@@ -328,8 +318,7 @@ class MklSliceOp : public OpKernel {
     return false;
   }
 
-  template <int NDIM>
-  void HandleCase(OpKernelContext* context,
+  void HandleCase4D(OpKernelContext* context,
                   const gtl::ArraySlice<int64>& begin,
                   const gtl::ArraySlice<int64>& size, Tensor* result) {
     int slice_dim = -1;
@@ -338,8 +327,7 @@ class MklSliceOp : public OpKernel {
     // differs from the input tensor in only 1 out of 4 dimensions.
     // This case arises in the context of Slice of 4-D tensor in NHWC or NCHW
     // format over channel dimension.
-    if (NDIM == 4 &&
-        DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) {
+    if (DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) {
         size_t in_strides[4] = { (size_t) in_shape.dim_size(1) *
                                           in_shape.dim_size(2) *
                                           in_shape.dim_size(3),
@@ -403,16 +391,8 @@ class MklSliceOp : public OpKernel {
         // slice_dim is not 1 or 3, then we fallback to Eigen implementation.
     }
 
-    Eigen::DSizes<Eigen::DenseIndex, NDIM> indices;
-    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes;
-    for (int i = 0; i < NDIM; ++i) {
-      indices[i] = begin[i];
-      sizes[i] = size[i];
-    }
-
-    functor::Slice<Device, T, NDIM>()(
-        context->eigen_device<Device>(), result->tensor<T, NDIM>(),
-        context->input(0).tensor<T, NDIM>(), indices, sizes);
+    functor::Slice<Device, T, 4>()(
+        context->eigen_device<Device>(), result, context->input(0), begin, size);
   }
 };
 #endif
@@ -420,13 +400,13 @@ class MklSliceOp : public OpKernel {
 // Forward declarations of the functor specializations for declared in the
 // sharded source files.
 namespace functor {
-#define DECLARE_CPU_SPEC(T, NDIM)                                  \
-  template <>                                                      \
-  void Slice<CPUDevice, T, NDIM>::operator()(                      \
-      const CPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
-      typename TTypes<T, NDIM>::ConstTensor input,                 \
-      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
-      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
+#define DECLARE_CPU_SPEC(T, NDIM)                        \
+  template <>                                            \
+  void Slice<CPUDevice, T, NDIM>::operator()(            \
+      const CPUDevice& d, Tensor* output,                \
+      const Tensor& input,                               \
+      const gtl::ArraySlice<int64>& slice_indices,       \
+      const gtl::ArraySlice<int64>& slice_sizes);        \
   extern template struct Slice<CPUDevice, T, NDIM>;
 
 #define DECLARE_FOR_N(T)  \
@@ -476,13 +456,14 @@ REGISTER_SLICE(bfloat16);
 #if GOOGLE_CUDA
 // Forward declarations of the functor specializations for GPU.
 namespace functor {
-#define DECLARE_GPU_SPEC(T, NDIM)                                  \
-  template <>                                                      \
-  void Slice<GPUDevice, T, NDIM>::operator()(                      \
-      const GPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
-      typename TTypes<T, NDIM>::ConstTensor input,                 \
-      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
-      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
+#define DECLARE_GPU_SPEC(T, NDIM)                        \
+  template <>                                            \
+  void Slice<GPUDevice, T, NDIM>::operator()(            \
+      const GPUDevice& d,                                \
+      Tensor* output,                                    \
+      const Tensor& input,                               \
+      const gtl::ArraySlice<int64>& slice_indices,       \
+      const gtl::ArraySlice<int64>& slice_sizes);        \
   extern template struct Slice<GPUDevice, T, NDIM>;
 
 #define DECLARE_FOR_N(T)  \
@@ -536,13 +517,14 @@ REGISTER_KERNEL_BUILDER(Name("Slice")
 #ifdef TENSORFLOW_USE_SYCL
 // Forward declarations of the functor specializations for SYCL.
 namespace functor {
-#define DECLARE_SYCL_SPEC(T, NDIM)                                 \
-  template <>                                                      \
-  void Slice<SYCLDevice, T, NDIM>::operator()(                     \
-      const SYCLDevice& d, typename TTypes<T, NDIM>::Tensor output,\
-      typename TTypes<T, NDIM>::ConstTensor input,                 \
-      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
-      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
+#define DECLARE_SYCL_SPEC(T, NDIM)                       \
+  template <>                                            \
+  void Slice<SYCLDevice, T, NDIM>::operator()(           \
+      const SYCLDevice& d,                               \
+      Tensor* output,                                    \
+      const Tensor& input,                               \
+      const gtl::ArraySlice<int64>& slice_indices,       \
+      const gtl::ArraySlice<int64>& slice_sizes);        \
   extern template struct Slice<SYCLDevice, T, NDIM>;
 
 #define DECLARE_FOR_N(T)   \
diff --git a/tensorflow/core/kernels/slice_op.h b/tensorflow/core/kernels/slice_op.h
index db7eded745..55a4be985b 100644
--- a/tensorflow/core/kernels/slice_op.h
+++ b/tensorflow/core/kernels/slice_op.h
@@ -19,31 +19,104 @@ limitations under the License.
 // Functor definition for SliceOp, must be compilable by nvcc.
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/kernels/ops_util.h"
 
 namespace tensorflow {
-namespace functor {
+
+namespace internal {
+
+template <typename Device, typename T>
+void SliceSimple(const Device& d, Tensor* out, const Tensor& in,
+                 const gtl::ArraySlice<int64>& slice_indices);
+template <typename Device, typename T>
+void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in,
+                 const gtl::ArraySlice<int64>& slice_indices);
+
+template <typename Device, typename T>
+void SliceSimple(const Device& d, Tensor* out, const Tensor& in,
+                 const gtl::ArraySlice<int64>& slice_indices) {
+  const int ndims = in.dims();
+  const int64 nelem = out->NumElements();
+  const gtl::InlinedVector<int64, 8> in_strides = ComputeStride<int64>(in.shape());
+  const gtl::InlinedVector<int64, 8> out_strides = ComputeStride<int64>(out->shape());
+  const T* p = in.flat<T>().data();
+  T* q = out->flat<T>().data();
+
+  std::vector<int64> i_idx(nelem, 0);
+  std::vector<int64> t(nelem, 0);
+
+  for (int64 o_idx = 0; o_idx < nelem; ++o_idx) {
+    t[o_idx] = o_idx;
+  }
+  for (int i = 0; i < ndims; ++i) {
+    int64 n = (nelem + 7) / 8;
+    int64 o_idx = 0;
+    switch (nelem % 8) {
+#define CALC_INPUT_IDX                                                            \
+  i_idx[o_idx] += (t[o_idx] / out_strides[i] + slice_indices[i]) * in_strides[i]; \
+  t[o_idx] %= out_strides[i];                                                     \
+  ++o_idx;
+      case 0: do { CALC_INPUT_IDX;
+      case 7:      CALC_INPUT_IDX;
+      case 6:      CALC_INPUT_IDX;
+      case 5:      CALC_INPUT_IDX;
+      case 4:      CALC_INPUT_IDX;
+      case 3:      CALC_INPUT_IDX;
+      case 2:      CALC_INPUT_IDX;
+      case 1:      CALC_INPUT_IDX;
+#undef CALC_INPUT_IDX
+              } while (--n > 0);
+    }
+  }
+  for (int64 o_idx = 0; o_idx < nelem; ++o_idx) {
+    q[o_idx] = p[i_idx[o_idx]];
+  }
+}
 
 template <typename Device, typename T, int NDIMS>
+void SliceUsingEigen(const Device& d, Tensor* out, const Tensor& in,
+                 const gtl::ArraySlice<int64>& slice_indices,
+                 const gtl::ArraySlice<int64>& slice_sizes) {
+  auto input = in.tensor<T, NDIMS>();
+  auto output = out->tensor<T, NDIMS>();
+  Eigen::DSizes<int, NDIMS> indices;
+  for (int i = 0; i < NDIMS; ++i) {
+    indices[i] = slice_indices[i];
+  }
+  Eigen::DSizes<int, NDIMS> sizes;
+  for (int i = 0; i < NDIMS; ++i) {
+    sizes[i] = slice_sizes[i];
+  }
+  const bool use_64bit = input.size() > Eigen::NumTraits<int>::highest();
+  if (!use_64bit &&
+      Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
+    To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes);
+  } else {
+    output.device(d) = input.slice(indices, sizes);
+  }
+}
+
+} // namespace internal
+
+namespace functor {
+
+// Template parameter NDIM is not neccesary here. The aim of keeping it
+// is to compile struct slice seperately which minimizes the compiling time.
+template <typename Device, typename T, int NDIM>
 struct Slice {
-  void operator()(const Device& d, typename TTypes<T, NDIMS>::Tensor output,
-                  typename TTypes<T, NDIMS>::ConstTensor input,
-                  const Eigen::DSizes<Eigen::DenseIndex, NDIMS>& slice_indices,
-                  const Eigen::DSizes<Eigen::DenseIndex, NDIMS>& slice_sizes) {
-    bool use_64bit = (input.size() > Eigen::NumTraits<int>::highest());
-    if (!use_64bit &&
-        Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
-      Eigen::DSizes<int, NDIMS> indices;
-      for (int i = 0; i < NDIMS; ++i) {
-        indices[i] = slice_indices[i];
-      }
-      Eigen::DSizes<int, NDIMS> sizes;
-      for (int i = 0; i < NDIMS; ++i) {
-        sizes[i] = slice_sizes[i];
-      }
-      To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes);
+  void operator()(const Device& d, Tensor* out, const Tensor& in,
+                  const gtl::ArraySlice<int64>& slice_indices,
+                  const gtl::ArraySlice<int64>& slice_sizes) {
+    if (in.dims() == NDIM) {
+        internal::SliceUsingEigen<Device, T, NDIM>(d, out, in, slice_indices, slice_sizes);
     } else {
-      output.device(d) = input.slice(slice_indices, slice_sizes);
+        if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
+          internal::SliceSimpleGpu<Device, T>(d, out, in, slice_indices);
+        } else {
+          internal::SliceSimple<Device, T>(d, out, in, slice_indices);
+        }
     }
   }
 };
diff --git a/tensorflow/core/kernels/slice_op_gpu.cu.cc b/tensorflow/core/kernels/slice_op_gpu.cu.cc
index a301986f2f..3039b3d777 100644
--- a/tensorflow/core/kernels/slice_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/slice_op_gpu.cu.cc
@@ -21,9 +21,65 @@ limitations under the License.
 
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
 
 namespace tensorflow {
+namespace internal {
+
+template <typename T>
+__global__ void SliceKernel(int nthreads, const T* src, const int32* buf,
+                            const int32 ndims, T* dst) {
+  const int32* in_strides = buf;
+  const int32* out_strides = buf + ndims;
+  const int32* slice_indices = buf + ndims * 2;
+  CUDA_1D_KERNEL_LOOP(o_idx, nthreads) {
+    int32 i_idx = 0;
+    int32 t = o_idx;
+    for (int i = 0; i < ndims; ++i) {
+      i_idx += (t / out_strides[i] + slice_indices[i]) * in_strides[i];
+      t %= out_strides[i];
+    }
+    dst[o_idx] = ldg(src + i_idx);
+  }
+}
+
+template <typename Device, typename T>
+void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in,
+                 const gtl::ArraySlice<int64>& slice_indices) {
+  // Ensures we can use 32-bit index.
+  const int64 in_nelem = in.NumElements();
+  CHECK_LT(in_nelem, kint32max) << "Tensor too large to transpose on GPU";
+  const int64 out_nelem = out->NumElements();
+  CHECK_LT(out_nelem, kint32max) << "Tensor too large to transpose on GPU";
+  // Pack strides and slice indices sizes into one buffer.
+  const int32 ndims = in.dims();
+  gtl::InlinedVector<int32, 24> host_buf(ndims * 3);
+  gtl::InlinedVector<int32, 8> in_strides = ComputeStride<int32>(in.shape());
+  gtl::InlinedVector<int32, 8> out_strides = ComputeStride<int32>(out->shape());
+  for (int i = 0; i < ndims; ++i) {
+    host_buf[i] = in_strides[i];
+    host_buf[ndims + i] = out_strides[i];
+    host_buf[ndims * 2 + i] = slice_indices[i];
+  }
+  auto num_bytes = sizeof(int64) * host_buf.size();
+  auto dev_buf = d.allocate(num_bytes);
+  // NOTE: host_buf is not allocated by CudaHostAllocator, and
+  // therefore we are doing a sync copy effectively.
+  d.memcpyHostToDevice(dev_buf, host_buf.data(), num_bytes);
+  // Launch kernel to q[...] = p[...].
+  const T* p = in.flat<T>().data();
+  T* q = out->flat<T>().data();
+  CudaLaunchConfig cfg = GetCudaLaunchConfig(out_nelem, d);
+  SliceKernel<<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>(
+      cfg.virtual_thread_count, p, reinterpret_cast<const int32*>(dev_buf),
+      ndims, q);
+  // Safe to deallocate immediately after the kernel launch.
+  d.deallocate(dev_buf);
+}
+
+} // namespace internal
 
 typedef Eigen::GpuDevice GPUDevice;
 
diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 73b6d4cf6a..8fc40db3cc 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -427,7 +427,6 @@ REGISTER_STRIDED_SLICE(bfloat16);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
-TF_CALL_int64(REGISTER_GPU);
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h
index afe3a051e6..7d42887426 100644
--- a/tensorflow/core/kernels/strided_slice_op_impl.h
+++ b/tensorflow/core/kernels/strided_slice_op_impl.h
@@ -84,16 +84,16 @@ void HandleStridedSliceCase(OpKernelContext* context,
 
   gtl::InlinedVector<int64, 4> processing_dims = processing_shape.dim_sizes();
   if (is_simple_slice) {
-    Eigen::DSizes<Eigen::DenseIndex, NDIM> begin_di;
-    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes_di;
+    gtl::InlinedVector<int64, 4> sizes(begin.size());
     for (int i = 0; i < NDIM; ++i) {
-      begin_di[i] = begin[i];
-      sizes_di[i] = end[i] - begin[i];
+      sizes[i] = end[i] - begin[i];
     }
-    functor::Slice<Device, Proxy, NDIM>()(
-        context->eigen_device<Device>(),
-        result->bit_casted_shaped<Proxy, NDIM>(processing_dims),
-        context->input(0).bit_casted_tensor<Proxy, NDIM>(), begin_di, sizes_di);
+    const TensorShape final_shape = result->shape();
+    CHECK(result->CopyFrom(*result, processing_shape));
+    const Tensor input = context->input(0);
+    functor::Slice<Device, T, NDIM>()(
+        context->eigen_device<Device>(), result, input, begin, sizes);
+    CHECK(result->CopyFrom(*result, final_shape));
   } else {
     Eigen::DSizes<Eigen::DenseIndex, NDIM> begin_di;
     Eigen::DSizes<Eigen::DenseIndex, NDIM> end_di;
@@ -196,10 +196,9 @@ class HandleStridedSliceAssignCase<Device, T, 0> {
   extern template struct StridedSlice<GPUDevice, T, NDIM>;         \
   template <>                                                      \
   void Slice<GPUDevice, T, NDIM>::operator()(                      \
-      const GPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
-      typename TTypes<T, NDIM>::ConstTensor input,                 \
-      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
-      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
+      const GPUDevice& d, Tensor* output, const Tensor& input,     \
+      const gtl::ArraySlice<int64>& slice_indices,                 \
+      const gtl::ArraySlice<int64>& slice_sizes);                  \
   extern template struct Slice<GPUDevice, T, NDIM>;                \
   template <>                                                      \
   void StridedSliceGrad<GPUDevice, T, NDIM>::operator()(           \
@@ -284,7 +283,6 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU);
 TF_CALL_complex64(DECLARE_FOR_N_GPU);
 TF_CALL_complex128(DECLARE_FOR_N_GPU);
 DECLARE_FOR_N_GPU(int32);
-DECLARE_FOR_N_GPU(int64);
 #endif  // END GOOGLE_CUDA
 
 TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU);
@@ -300,7 +298,6 @@ DECLARE_FOR_N_CPU(bfloat16);
 TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL);
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL);
 DECLARE_FOR_N_SYCL(int32);
-DECLARE_FOR_N_SYCL(int64);
 
 #undef DECLARE_FOR_N_SYCL
 #endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/strided_slice_op_test.cc b/tensorflow/core/kernels/strided_slice_op_test.cc
index 281ca0f58f..78bb15463c 100644
--- a/tensorflow/core/kernels/strided_slice_op_test.cc
+++ b/tensorflow/core/kernels/strided_slice_op_test.cc
@@ -76,20 +76,69 @@ static void SliceHelper(int iters, int size) {
   testing::UseRealTime();
 }
 
+template <typename T>
+static void Dim8SliceHelper(int iters, int size) {
+  testing::StopTiming();
+  Graph* g = new Graph(OpRegistry::Global());
+  DataType dt = DataTypeToEnum<T>::v();
+  int kDim = 100;
+  int kMaxSize = 15000;
+  CHECK_LT(size, kMaxSize);
+
+  Tensor begin(DT_INT32, TensorShape({8}));
+  begin.flat<int32>()(10) = 10;
+  for (int i = 1; i < 7; ++i) {
+    begin.flat<int32>()(i) = 0;
+  }
+  begin.flat<int32>()(7) = 10;
+
+  Tensor end(DT_INT32, TensorShape({8}));
+  end.flat<int32>()(0) = 10 + kDim;
+  for (int i = 1; i < 7; ++i) {
+    end.flat<int32>()(i) = 1;
+  }
+  end.flat<int32>()(7) = 10 + size;
+
+  Tensor strides(DT_INT32, TensorShape({8}));
+  for (int i = 0; i < 8; ++i) {
+    strides.flat<int32>()(i) = 1;
+  }
+
+  Tensor input(dt, TensorShape({2*kDim, 1, 1, 1, 1, 1, 1, kMaxSize}));
+  input.flat<T>().setRandom();
+
+  Node* node;
+  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "StridedSlice")
+                  .Input(test::graph::Constant(g, input))
+                  .Input(test::graph::Constant(g, begin))
+                  .Input(test::graph::Constant(g, end))
+                  .Input(test::graph::Constant(g, strides))
+                  .Attr("T", dt)
+                  .Finalize(g, &node));
+
+  testing::BytesProcessed(static_cast<int64>(iters) * kDim * size * sizeof(T));
+  testing::StartTiming();
+  test::Benchmark("cpu", g).Run(iters);
+  testing::UseRealTime();
+}
+
 static void BM_SliceFloat(int iters, int dim2) {
   SliceHelper<float>(iters, dim2);
+  Dim8SliceHelper<float>(iters, dim2);
 }
 
 BENCHMARK(BM_SliceFloat)->Arg(100)->Arg(1000)->Arg(10000);
 
 static void BM_SliceComplex64(int iters, int dim2) {
   SliceHelper<std::complex<float>>(iters, dim2);
+  Dim8SliceHelper<std::complex<float>>(iters, dim2);
 }
 
 BENCHMARK(BM_SliceComplex64)->Arg(100)->Arg(1000)->Arg(10000);
 
 static void BM_SliceBFloat16(int iters, int dim2) {
   SliceHelper<bfloat16>(iters, dim2);
+  Dim8SliceHelper<bfloat16>(iters, dim2);
 }
 
 BENCHMARK(BM_SliceBFloat16)->Arg(100)->Arg(1000)->Arg(10000);
diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc
index 20f0edf309..96c051c636 100644
--- a/tensorflow/core/kernels/transpose_op.cc
+++ b/tensorflow/core/kernels/transpose_op.cc
@@ -31,13 +31,14 @@ limitations under the License.
 
 namespace tensorflow {
 
-// inv = InvertPermutationOp(T<int32> p) takes a permutation of
+// inv = InvertPermutationOp(T<int32/int64> p) takes a permutation of
 // integers 0, 1, ..., n - 1 and returns the inverted
 // permutation of p. I.e., inv[p[i]] == i, for i in [0 .. n).
 //
-// REQUIRES: input is a vector of int32.
+// REQUIRES: input is a vector of int32 or int64.
 // REQUIRES: input is a permutation of 0, 1, ..., n-1.
 
+template <typename T>
 class InvertPermutationOp : public OpKernel {
  public:
   explicit InvertPermutationOp(OpKernelConstruction* context)
@@ -48,20 +49,19 @@ class InvertPermutationOp : public OpKernel {
     OP_REQUIRES(
         context, TensorShapeUtils::IsVector(input.shape()),
         errors::InvalidArgument("invert_permutation expects a 1D vector."));
-    auto Tin = input.vec<int32>();
+    auto Tin = input.vec<T>();
     OP_REQUIRES(context,
                 FastBoundsCheck(Tin.size(), std::numeric_limits<int32>::max()),
                 errors::InvalidArgument("permutation of nonnegative int32s "
                                         "must have <= int32 max elements"));
-    const int32 N =
-        static_cast<int32>(Tin.size());  // Safe: bounds-checked above.
+    const T N = static_cast<T>(Tin.size());  // Safe: bounds-checked above.
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
-    auto Tout = output->vec<int32>();
+    auto Tout = output->vec<T>();
     std::fill_n(Tout.data(), N, -1);
     for (int i = 0; i < N; ++i) {
-      const int32 d = internal::SubtleMustCopy(Tin(i));
+      const T d = internal::SubtleMustCopy(Tin(i));
       OP_REQUIRES(context, FastBoundsCheck(d, N),
                   errors::InvalidArgument(d, " is not between 0 and ", N));
       OP_REQUIRES(context, Tout(d) == -1,
@@ -73,14 +73,23 @@ class InvertPermutationOp : public OpKernel {
 
 REGISTER_KERNEL_BUILDER(
     Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint<int32>("T"),
-    InvertPermutationOp);
+    InvertPermutationOp<int32>);
+REGISTER_KERNEL_BUILDER(
+    Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint<int64>("T"),
+    InvertPermutationOp<int64>);
 
 REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
                             .Device(DEVICE_GPU)
                             .TypeConstraint<int32>("T")
                             .HostMemory("x")
                             .HostMemory("y"),
-                        InvertPermutationOp);
+                        InvertPermutationOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<int64>("T")
+                            .HostMemory("x")
+                            .HostMemory("y"),
+                        InvertPermutationOp<int64>);
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
@@ -88,7 +97,13 @@ REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
                             .TypeConstraint<int32>("T")
                             .HostMemory("x")
                             .HostMemory("y"),
-                        InvertPermutationOp);
+                        InvertPermutationOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
+                            .Device(DEVICE_SYCL)
+                            .TypeConstraint<int64>("T")
+                            .HostMemory("x")
+                            .HostMemory("y"),
+                        InvertPermutationOp<int64>);
 #endif  // TENSORFLOW_USE_SYCL
 
 namespace {
diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc
index 701c5f6d2b..d087784c8a 100644
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <functional>
 #include <unordered_map>
 #include <utility>
 
@@ -21,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/hash/hash.h"
 
 namespace tensorflow {
 
@@ -33,8 +35,6 @@ class UniqueOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
-                errors::InvalidArgument("unique expects a 1D vector."));
     // TODO(dga):  Make unique polymorphic for returning int32 and int64
     // vectors to support large tensors.
     OP_REQUIRES(context,
@@ -42,31 +42,102 @@ class UniqueOp : public OpKernel {
                 errors::InvalidArgument(
                     "unique does not support input tensors larger than ",
                     std::numeric_limits<int32>::max(), " elements"));
-    auto Tin = input.vec<T>();
-    const int64 N = static_cast<int64>(Tin.size());
+
+    int64 axis = 0;
+    std::vector<int64> new_sizes{1, input.NumElements(), 1};
+    if (context->num_inputs() == 1) {
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
+                  errors::InvalidArgument("unique expects a 1D vector."));
+    } else {
+      // In case of UniqueV2, the axis is a 1D vector. The purpose is
+      // to allow specifying either "no axis" or "axis". The `[]` means
+      // "no axis", while `[x]` means `axis = x`.
+      const Tensor& axis_tensor = context->input(1);
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()),
+                  errors::InvalidArgument("axis expects a 1D vector."));
+      OP_REQUIRES(
+          context, axis_tensor.NumElements() <= 1,
+          errors::InvalidArgument(
+              "axis does not support input tensors larger than 1 elements"));
+      if (axis_tensor.NumElements() == 0) {
+        OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
+                    errors::InvalidArgument("unique expects a 1D vector."));
+      } else {
+        auto axis_vec = axis_tensor.vec<int64>();
+        axis = axis_vec(0);
+        axis = axis < 0 ? axis + input.dims() : axis;
+        OP_REQUIRES(context, 0 <= axis && axis < input.dims(),
+                    errors::InvalidArgument("axis has to be between [0, ",
+                                            input.dims(), ")"));
+        if (axis > 0) {
+          for (int64 i = 0; i < axis; i++) {
+            new_sizes[0] *= input.dim_size(i);
+          }
+        }
+        new_sizes[1] = input.dim_size(axis);
+        if (axis + 1 < input.dims()) {
+          for (int64 i = axis + 1; i < input.dims(); i++) {
+            new_sizes[2] *= input.dim_size(i);
+          }
+        }
+      }
+    }
+
+    auto Tin = input.shaped<T, 3>(new_sizes);
 
     Tensor* idx = nullptr;
-    OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
-                                {0}, 1, input.shape(), &idx));
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                1, TensorShape({Tin.dimension(1)}), &idx));
     auto idx_vec = idx->template vec<TIndex>();
 
-    std::unordered_map<T, TIndex> uniq;
-    uniq.reserve(2 * N);
-    for (int64 i = 0, j = 0; i < N; ++i) {
-      auto it = uniq.insert(std::make_pair(Tin(i), j));
+    auto hash_fn = [&Tin](const int64& key) -> unsigned long {
+      size_t h = 0;
+      for (int64 i = 0; i < Tin.dimension(0); i++) {
+        for (int64 j = 0; j < Tin.dimension(2); j++) {
+          h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
+        }
+      }
+      return h;
+    };
+
+    auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) {
+      for (int64 i = 0; i < Tin.dimension(0); i++) {
+        for (int64 j = 0; j < Tin.dimension(2); j++) {
+          if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
+            return false;
+          }
+        }
+      }
+      return true;
+    };
+
+    std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
+        uniq(0, hash_fn, equal_to_fn);
+
+    uniq.reserve(2 * Tin.dimension(1));
+
+    for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
+      auto it = uniq.insert(std::make_pair(i, j));
       idx_vec(i) = it.first->second;
       if (it.second) {
         ++j;
       }
     }
+
     int64 uniq_size = static_cast<int64>(uniq.size());
+    new_sizes[1] = uniq_size;
+    TensorShape output_shape(input.shape());
+    output_shape.set_dim(axis, uniq_size);
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(
-                                0, TensorShape({uniq_size}), &output));
-    auto output_vec = output->template vec<T>();
+    OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
+    auto Tout = output->shaped<T, 3>(new_sizes);
 
     for (auto it : uniq) {
-      output_vec(it.second) = it.first;
+      for (int64 i = 0; i < Tin.dimension(0); i++) {
+        for (int64 j = 0; j < Tin.dimension(2); j++) {
+          Tout(i, it.second, j) = Tin(i, it.first, j);
+        }
+      }
     }
 
     if (num_outputs() > 2) {
@@ -74,7 +145,7 @@ class UniqueOp : public OpKernel {
                                   2, TensorShape({uniq_size}), &output));
       auto count_output_vec = output->template vec<TIndex>();
       count_output_vec.setZero();
-      for (int64 i = 0; i < N; ++i) {
+      for (int64 i = 0; i < Tin.dimension(1); ++i) {
         count_output_vec(idx_vec(i))++;
       }
     }
@@ -92,6 +163,16 @@ class UniqueOp : public OpKernel {
                               .TypeConstraint<type>("T")         \
                               .TypeConstraint<int64>("out_idx"), \
                           UniqueOp<type, int64>);                \
+  REGISTER_KERNEL_BUILDER(Name("UniqueV2")                       \
+                              .Device(DEVICE_CPU)                \
+                              .TypeConstraint<type>("T")         \
+                              .TypeConstraint<int32>("out_idx"), \
+                          UniqueOp<type, int32>);                \
+  REGISTER_KERNEL_BUILDER(Name("UniqueV2")                       \
+                              .Device(DEVICE_CPU)                \
+                              .TypeConstraint<type>("T")         \
+                              .TypeConstraint<int64>("out_idx"), \
+                          UniqueOp<type, int64>);                \
   REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts")               \
                               .Device(DEVICE_CPU)                \
                               .TypeConstraint<type>("T")         \
@@ -176,5 +257,5 @@ REGISTER_KERNEL_BUILDER(Name("Unique")
                             .HostMemory("y")
                             .HostMemory("idx"),
                         UniqueOp<int64, int64>);
-#endif // TENSORFLOW_USE_SYCL
+#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index be2916f154..9fa6423d59 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -723,7 +723,9 @@ y: a tensor of the same shape and type as x but filled with zeros.
 REGISTER_OP("OnesLike")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {float, double, int32, int64, complex64, complex128}")
+    .Attr(
+        "T: {float, double, int8, uint8, int16, uint16, int32, int64, "
+        "complex64, complex128, bool}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns a tensor of ones with the same shape and type as x.
@@ -2031,6 +2033,46 @@ y: 1-D.
 idx: 1-D.
 )doc");
 
+REGISTER_OP("UniqueV2")
+    .Input("x: T")
+    .Input("axis: int64")
+    .Output("y: T")
+    .Output("idx: out_idx")
+    .Attr("T: type")
+    .Attr("out_idx: {int32, int64} = DT_INT32")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
+      c->set_output(1, c->input(0));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Finds unique elements in a 1-D tensor.
+
+This operation returns a tensor `y` containing all of the unique elements of `x`
+sorted in the same order that they occur in `x`. This operation also returns a
+tensor `idx` the same size as `x` that contains the index of each value of `x`
+in the unique output `y`. In other words:
+
+`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+
+For example:
+
+```
+# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+y, idx = unique(x)
+y ==> [1, 2, 4, 7, 8]
+idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+```
+
+
+x: A `Tensor`.
+axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
+  find the unique elements.
+y: A `Tensor`. Unique elements along the `axis` of `Tensor` x.
+idx: A 1-D Tensor. Has the same type as x that contains the index of each
+  value of x in the output y.
+)doc");
+
 // --------------------------------------------------------------------------
 REGISTER_OP("UniqueWithCounts")
     .Input("x: T")
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 7b10af9f44..d30b847696 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1829,6 +1829,8 @@ need not be sorted and need not cover all values in the full
 range of valid values.
 
 If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+If the given segment ID `i` is negative, the value is dropped and will not be
+added to the sum of the segment.
 
 `num_segments` should equal the number of distinct segment IDs.
 
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index e245c8ba91..a242a13878 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -819,7 +819,7 @@ REGISTER_OP("DepthwiseConv2dNative")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
@@ -945,7 +945,7 @@ REGISTER_OP("Conv3D")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
@@ -977,7 +977,7 @@ REGISTER_OP("Conv3DBackpropInput")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Deprecated(10, "Use Conv3DBackpropInputV2")
@@ -1003,7 +1003,7 @@ REGISTER_OP("Conv3DBackpropFilter")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Deprecated(10, "Use Conv3DBackpropFilterV2")
@@ -1032,7 +1032,7 @@ REGISTER_OP("Conv3DBackpropInputV2")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
@@ -1069,7 +1069,7 @@ REGISTER_OP("Conv3DBackpropFilterV2")
     .Input("filter_sizes: int32")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 6ce0b70c9d..9c41957ae6 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -5449,6 +5449,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5515,6 +5516,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5570,6 +5572,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5635,6 +5638,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5690,6 +5694,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index f746b15fee..f2fadb4558 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -12,6 +12,7 @@ load("//tensorflow:tensorflow.bzl", "tf_copts")
 load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
 load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static")
 load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path")
+load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp")
 
 cc_library(
     name = "gtest",
@@ -194,17 +195,16 @@ cc_library(
 
 cc_library(
     name = "sycl",
-    data = [
+    data = if_ccpp([
         "@local_config_sycl//sycl:{}".format(sycl_library_path("ComputeCpp")),
-    ],
-    linkopts = select({
-        "//conditions:default": [
-            "-Wl,-rpath,../local_config_sycl/sycl/lib",
-        ],
-    }),
-    deps = [
-        "@local_config_sycl//sycl:syclrt",
-    ],
+    ]),
+    linkopts = if_ccpp([
+        "-Wl,-rpath,../local_config_sycl/sycl/lib",
+    ]),
+    deps = if_ccpp(
+        ["@local_config_sycl//sycl:syclrt"],
+        ["@local_config_sycl//sycl:sycl_headers"],
+    ),
 )
 
 filegroup(
diff --git a/tensorflow/core/platform/default/notification.h b/tensorflow/core/platform/default/notification.h
index 6a214dbd0a..5c401b7477 100644
--- a/tensorflow/core/platform/default/notification.h
+++ b/tensorflow/core/platform/default/notification.h
@@ -73,7 +73,7 @@ class Notification {
   }
 
   mutex mu_;                    // protects mutations of notified_
-  condition_variable cv_;       // signalled when notified_ becomes non-zero
+  condition_variable cv_;       // signaled when notified_ becomes non-zero
   std::atomic<bool> notified_;  // mutations under mu_
 };
 
diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc
index e9baad5422..f8b0285c50 100644
--- a/tensorflow/core/platform/posix/error.cc
+++ b/tensorflow/core/platform/posix/error.cc
@@ -72,7 +72,7 @@ error::Code ErrnoToCode(int err_number) {
     case EBUSY:       // Device or resource busy
     case ECHILD:      // No child processes
     case EISCONN:     // Socket is connected
-#if !defined(_WIN32)
+#if !defined(_WIN32) && !defined(__HAIKU__)
     case ENOTBLK:     // Block device required
 #endif
     case ENOTCONN:    // The socket is not connected
@@ -94,7 +94,7 @@ error::Code ErrnoToCode(int err_number) {
     case ENODATA:  // No message is available on the STREAM read queue
     case ENOMEM:   // Not enough space
     case ENOSR:    // No STREAM resources
-#if !defined(_WIN32)
+#if !defined(_WIN32) && !defined(__HAIKU__)
     case EUSERS:   // Too many users
 #endif
       code = error::RESOURCE_EXHAUSTED;
@@ -111,7 +111,7 @@ error::Code ErrnoToCode(int err_number) {
     case EPFNOSUPPORT:     // Protocol family not supported
 #endif
     case EPROTONOSUPPORT:  // Protocol not supported
-#if !defined(_WIN32)
+#if !defined(_WIN32) && !defined(__HAIKU__)
     case ESOCKTNOSUPPORT:  // Socket type not supported
 #endif
     case EXDEV:            // Improper link
@@ -131,7 +131,8 @@ error::Code ErrnoToCode(int err_number) {
     case ENETUNREACH:   // Network unreachable
     case ENOLCK:        // No locks available
     case ENOLINK:       // Link has been severed
-#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32))
+#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \
+	|| defined(__HAIKU__))
     case ENONET:  // Machine is not on the network
 #endif
       code = error::UNAVAILABLE;
@@ -156,7 +157,7 @@ error::Code ErrnoToCode(int err_number) {
     case ENOEXEC:      // Exec format error
     case ENOMSG:       // No message of the desired type
     case EPROTO:       // Protocol error
-#if !defined(_WIN32)
+#if !defined(_WIN32) && !defined(__HAIKU__)
     case EREMOTE:      // Object is remote
 #endif
       code = error::UNKNOWN;
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index 6cba40ccfc..09f69a95c1 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -37,7 +37,8 @@ limitations under the License.
 #ifdef TF_USE_SNAPPY
 #include "snappy.h"
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \
+	|| defined(__HAIKU__)
 #include <thread>
 #endif
 
@@ -61,7 +62,8 @@ int NumSchedulableCPUs() {
   }
   perror("sched_getaffinity");
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \
+	|| defined(__HAIKU__)
   unsigned int count = std::thread::hardware_concurrency();
   if (count > 0) return static_cast<int>(count);
 #endif
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 1bf9c93101..ec077c4283 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -24,7 +24,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc1"
+#define TF_VERSION_SUFFIX ""
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 1bfa4f83a3..118ff0d0d6 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -26,18 +26,23 @@ limitations under the License.
 #include "mkl_trans.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-
 #include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "tensorflow/core/graph/mkl_graph_util.h"
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
+
+using mkldnn::memory;
+using mkldnn::reorder;
+using mkldnn::primitive;
+using mkldnn::padding_kind;
+using mkldnn::engine;
 #endif
 
 // The file contains a number of utility classes and functions used by MKL
@@ -51,6 +56,8 @@ namespace tensorflow {
 // Tensorflow tensor.
 
 typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims;
+typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3,
+               Dim_O = 0, Dim_I = 1 } MklDnnDims;
 
 class MklShape {
  public:
@@ -143,7 +150,9 @@ class MklShape {
   size_t GetDimension() const { return dimension_; }
   const size_t* GetSizes() const { return sizes_; }
   int64 dim_size(int index) const { return sizes_[index]; }
-  int64 tf_dim_size(int index) const { return sizes_[tf_to_mkl_dim_map_[index]]; }
+  int64 tf_dim_size(int index) const {
+    return sizes_[tf_to_mkl_dim_map_[index]];
+  }
   const size_t* GetStrides() const { return strides_; }
   const size_t* GetTfToMklDimMap() const { return tf_to_mkl_dim_map_; }
   size_t tf_dim_idx(int index) const { return tf_to_mkl_dim_map_[index]; }
@@ -227,7 +236,8 @@ class MklShape {
   (IS_MKL_TENSOR_OFFSET + sizeof(size_t))  // Location of dimension_
 // Location of sizes. Note dim is not used here, left here
 // to make macros consistent.
-#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t))
+#define SIZES_OFFSET(dims) \
+  (DIMS_OFFSET + sizeof(size_t))
 #define STRIDES_OFFSET(dims) \
   (SIZES_OFFSET(dims) + dims * sizeof(size_t))  // Location of strides
 #define MKL_LAYOUT_OFFSET(dims) \
@@ -309,6 +319,266 @@ class MklShape {
       nullptr;  // TF dimension corresponding to this MKL dimension
 };
 
+#ifdef INTEL_MKL_DNN
+
+// Forward decl
+TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format);
+
+class MklDnnShape {
+ private:
+  typedef struct {
+    /// Flag to indicate if the tensor is an  MKL tensor or not
+    bool is_mkl_tensor_ = false;
+    /// Number of dimensions in Tensorflow format
+    size_t dimension_ = 0;
+    /// Required by MKLDNN for conversions
+    mkldnn_dims_t sizes_;    // Required by MKL for conversions
+    memory::format tf_data_format_ = memory::format::format_undef;
+    memory::data_type T_ = memory::data_type::data_undef;
+    // MKL layout
+    mkldnn_memory_desc_t mkl_md_;
+    /// TF dimension corresponding to this MKL dimension
+    mkldnn_dims_t map_;
+  } MklShapeData;
+  MklShapeData data_;
+
+  typedef std::remove_extent<mkldnn_dims_t>::type mkldnn_dim_t;
+#define INVALID_DIM_SIZE -1
+
+
+ public:
+  MklDnnShape() {
+    for (size_t i = 0; i < sizeof(data_.sizes_) /
+                           sizeof(data_.sizes_[0]); ++i) {
+      data_.sizes_[i] = -1;
+    }
+    for (size_t i = 0; i < sizeof(data_.map_) /
+                           sizeof(data_.map_[0]); ++i) {
+      data_.map_[i] = -1;
+    }
+  }
+
+  ~MklDnnShape() {}
+  TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape);  // Cannot copy
+
+  inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; }
+  inline void SetMklTensor(bool is_mkl_tensor) {
+    data_.is_mkl_tensor_ = is_mkl_tensor;
+  }
+
+  inline void SetDimensions(const size_t dimension) {
+    data_.dimension_ = dimension;
+  }
+  inline size_t GetDimension(char dimension)const {
+    int index = GetMklDnnTensorDimIndex(dimension);
+    CHECK(index >= 0 && index < this->GetDimension())
+        << "Invalid index from the dimension: " << index << ", " << dimension;
+    return this->DimSize(index);
+  }
+
+  inline int32 GetMklDnnTensorDimIndex(char dimension)const {
+    switch (dimension) {
+  case 'N':
+    return MklDnnDims::Dim_N;
+  case 'C':
+    return MklDnnDims::Dim_C;
+  case 'H':
+    return MklDnnDims::Dim_H;
+  case 'W':
+    return MklDnnDims::Dim_W;
+  default:
+    LOG(FATAL) << "Invalid dimension: " << dimension;
+    return -1;  // Avoid compiler warning about missing return value
+    }
+  }
+
+  inline size_t GetDimension() const { return data_.dimension_; }
+  inline const int* GetSizes() const {
+    return reinterpret_cast<const int*>(&data_.sizes_[0]);
+  }
+
+  // Returns an mkldnn::memory::dims object that contains the sizes of this
+  // MklDnnShape object.
+  inline memory::dims GetSizesAsMklDnnDims() const {
+    memory::dims retVal;
+    if (data_.is_mkl_tensor_) {
+      int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
+      for (size_t i = 0 ; i < dimensions; i++) {
+        if (data_.sizes_[i] != INVALID_DIM_SIZE)
+        retVal.push_back(data_.sizes_[i]);
+      }
+    } else {
+      CHECK_EQ(data_.is_mkl_tensor_, true);
+    }
+    return retVal;
+  }
+
+  inline int64 DimSize(int index) const {
+    CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0]));
+    return data_.sizes_[index];
+  }
+
+  /// Return TensorShape that describes the Tensorflow shape of the tensor
+  /// represented by this MklShape.
+  inline TensorShape GetTfShape() {
+    CHECK_EQ(data_.is_mkl_tensor_, true);
+
+    std::vector<int32> shape(data_.dimension_, -1);
+    for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+      shape[idx] = data_.sizes_[TfDimIdx(idx)];
+    }
+
+    TensorShape ts;
+    bool ret = TensorShapeUtils::MakeShape(shape, &ts).ok();
+    CHECK_EQ(ret, true);
+    return ts;
+  }
+
+  inline void SetElemType(memory::data_type dt) { data_.T_ = dt; }
+  inline const memory::data_type GetElemType() { return data_.T_; }
+
+  inline void SetMklLayout(memory::primitive_desc* pd) {
+    CHECK_NOTNULL(pd);
+    data_.mkl_md_ = pd->desc().data;
+  }
+  inline const memory::desc GetMklLayout() const {
+    return memory::desc(data_.mkl_md_);
+  }
+
+  inline memory::format GetTfDataFormat() const {
+    return data_.tf_data_format_;
+  }
+  /// We don't create primitive_descriptor for TensorFlow layout now.
+  /// We use lazy evaluation and create it only when needed.
+  inline void SetTfLayout(size_t dims, const memory::dims& sizes,
+                   memory::format format) {
+    CHECK_EQ(dims, sizes.size());
+    data_.dimension_ = dims;
+    for (size_t ii = 0; ii < dims; ii++) {
+      data_.sizes_[ii] = sizes[ii];
+    }
+    data_.tf_data_format_ = format;
+    SetTfDimOrder(dims, format);
+  }
+  inline const memory::desc GetTfLayout() const {
+    memory::dims dims;
+    for (size_t ii = 0; ii < data_.dimension_; ii++) {
+      dims.push_back(data_.sizes_[ii]);
+    }
+    return memory::desc(dims, data_.T_, data_.tf_data_format_);
+  }
+  inline const memory::desc GetCurLayout() const {
+    return IsMklTensor() ? GetMklLayout() : GetTfLayout();
+  }
+
+  // nhasabni - I've removed SetTfDimOrder that was setting default order in
+  // case of MKL-ML. We don't need a case of default dimension order because
+  // when an operator that does not get data_format attribute gets all inputs
+  // in Tensorflow format, it will produce output in Tensorflow format.
+  inline void SetTfDimOrder(const size_t dimension, const mkldnn_dims_t map) {
+    CHECK(dimension == data_.dimension_);
+    for (size_t ii = 0; ii < dimension; ii++) {
+      data_.map_[ii] = map[ii];
+    }
+  }
+
+  inline void SetTfDimOrder(const size_t dimension, TensorFormat data_format) {
+    // TODO(nhasabni): Why do we restrict this to 4D?
+    CHECK_EQ(dimension, 4);
+    CHECK(dimension == data_.dimension_);
+    data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W;
+    data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H;
+    data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C;
+    data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N;
+  }
+
+  inline void SetTfDimOrder(const size_t dimension, memory::format format) {
+    TensorFormat data_format = MklDnnDataFormatToTFDataFormat(format);
+    SetTfDimOrder(dimension, data_format);
+  }
+
+  inline const mkldnn_dim_t* GetTfToMklDimMap() const {
+    return &data_.map_[0];
+  }
+  inline size_t TfDimIdx(int index) const { return data_.map_[index]; }
+  inline int64 TfDimSize(int index) const {
+    return data_.sizes_[TfDimIdx(index)];
+  }
+
+  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
+  /// corresponds to MKL's Channel dimension.
+  inline bool IsMklChannelDim(int d) const {
+    return TfDimIdx(d) == MklDnnDims::Dim_C;
+  }
+  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
+  /// corresponds to MKL's Batch dimension.
+  inline bool IsMklBatchDim(int d) const {
+    return TfDimIdx(d) == MklDnnDims::Dim_N;
+  }
+  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
+  /// corresponds to MKL's Width dimension.
+  inline bool IsMklWidthDim(int d) const {
+    return TfDimIdx(d) == MklDnnDims::Dim_W;
+  }
+  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
+  /// corresponds to MKL's Height dimension.
+  inline bool IsMklHeightDim(int d) const {
+    return TfDimIdx(d) == MklDnnDims::Dim_H;
+  }
+
+  /// Check if the TF-Mkl dimension ordering map specifies if the input
+  /// tensor is in NCHW format.
+  inline bool IsTensorInNCHWFormat() const {
+    TensorFormat data_format = FORMAT_NCHW;
+    return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) &&
+            IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) &&
+            IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) &&
+            IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W')));
+  }
+
+  /// Check if the TF-Mkl dimension ordering map specifies if the input
+  /// tensor is in NHWC format.
+  inline bool IsTensorInNHWCFormat() const {
+    TensorFormat data_format = FORMAT_NHWC;
+    return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) &&
+            IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) &&
+            IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) &&
+            IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W')));
+  }
+
+  /// The following methods are used for serializing and de-serializing the
+  /// contents of the mklshape object.
+  /// The data is serialized in this order
+  /// is_mkl_tensor_ : dimension_ : sizes_ : map_: format_ : T_ : mkl_pd_;
+
+  /// Size of buffer to hold the serialized object, the size is computed by
+  /// following above mentioned order
+  inline size_t GetSerializeBufferSize() const {
+    return sizeof(MklShapeData);
+  }
+
+  void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const {
+    CHECK(buf_size >= GetSerializeBufferSize())
+        << "Buffer size is too small to SerializeMklDnnShape";
+    *reinterpret_cast<MklShapeData*>(buf) = data_;
+  }
+
+  void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) {
+    // Make sure buffer holds at least is_mkl_tensor_.
+    CHECK(buf_size >= sizeof(data_.is_mkl_tensor_))
+      << "Buffer size is too small in DeSerializeMklDnnShape";
+
+    const bool is_mkl_tensor = *reinterpret_cast<const bool*>(buf);
+    if (is_mkl_tensor) {  // If it is an MKL Tensor then read the rest
+      CHECK(buf_size >= GetSerializeBufferSize())
+        << "Buffer size is too small in DeSerializeMklDnnShape";
+      data_ = *reinterpret_cast<const MklShapeData*>(buf);
+    }
+  }
+};
+
+#endif
+
 // List of MklShape objects. Used in Concat/Split layers.
 typedef std::vector<MklShape> MklShapeList;
 
@@ -347,6 +617,36 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
   return output_tensor;
 }
 
+#ifdef INTEL_MKL_DNN
+template <typename T>
+inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
+                             const MklDnnShape& mkl_shape) {
+  Tensor output_tensor;
+  TensorShape output_shape;
+
+#if 0
+  // TODO(nhasabni): need to implement
+  for (size_t j = 0; j < mkl_shape.GetDimension(); j++) {
+    // Outermost to innermost dimension
+    output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]);
+  }
+
+  // Allocate output tensor.
+  context->allocate_temp(DataTypeToEnum<T>::v(), output_shape, &output_tensor);
+
+  dnnLayout_t output_layout = static_cast<dnnLayout_t>(mkl_shape.GetTfLayout());
+  void* input_buffer = const_cast<T*>(mkl_tensor.flat<T>().data());
+  void* output_buffer = const_cast<T*>(output_tensor.flat<T>().data());
+
+  if (mkl_tensor.NumElements() != 0) {
+    mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer);
+  }
+#endif
+
+  return output_tensor;
+}
+#endif
+
 // Get the MKL shape from the second string tensor
 inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
   mklshape->DeSerializeMklShape(
@@ -359,6 +659,20 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
           sizeof(uint8));
 }
 
+#ifdef INTEL_MKL_DNN
+inline void GetMklShape(OpKernelContext* ctext, int n,
+                        MklDnnShape* mklshape) {
+  mklshape->DeSerializeMklDnnShape(
+      ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs()))
+          .flat<uint8>()
+          .data(),
+      ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs()))
+              .flat<uint8>()
+              .size() *
+          sizeof(uint8));
+}
+#endif
+
 // Gets the actual input
 inline const Tensor& MklGetInput(OpKernelContext* ctext, int n) {
   return ctext->input(GetTensorDataIndex(n, ctext->num_inputs()));
@@ -382,6 +696,27 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
   }
 }
 
+#ifdef INTEL_MKL_DNN
+/// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
+/// If the input tensor is in MKL layout, then obtains TensorShape from
+/// MklShape.
+inline TensorShape GetTfShape(OpKernelContext* context,
+                              size_t input_idx) {
+  // Sanity check.
+  CHECK_NOTNULL(context);
+  CHECK_LT(input_idx, context->num_inputs());
+
+  MklDnnShape input_mkl_shape;
+  GetMklShape(context, input_idx, &input_mkl_shape);
+  if (input_mkl_shape.IsMklTensor()) {
+    return input_mkl_shape.GetTfShape();
+  } else {
+    const Tensor& t = MklGetInput(context, input_idx);
+    return t.shape();
+  }
+}
+#endif
+
 // Allocate the second output tensor that will contain
 // the MKL shape serialized
 inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
@@ -397,6 +732,23 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
       second_tensor->flat<uint8>().size() * sizeof(uint8));
 }
 
+#ifdef INTEL_MKL_DNN
+// Allocate the second output tensor that will contain
+// the MKL shape serialized
+inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
+                                      const MklDnnShape& mkl_shape) {
+  Tensor* second_tensor = nullptr;
+  TensorShape second_shape;
+  second_shape.AddDim(mkl_shape.GetSerializeBufferSize());
+  OP_REQUIRES_OK(ctext, ctext->allocate_output(
+                            GetTensorMetaDataIndex(n, ctext->num_outputs()),
+                            second_shape, &second_tensor));
+  mkl_shape.SerializeMklDnnShape(
+      second_tensor->flat<uint8>().data(),
+      second_tensor->flat<uint8>().size() * sizeof(uint8));
+}
+#endif
+
 // Allocate the output tensor, create a second output tensor that will contain
 // the MKL shape serialized
 inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
@@ -417,9 +769,43 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
       second_tensor->flat<uint8>().size() * sizeof(uint8));
 }
 
+#ifdef INTEL_MKL_DNN
+// Allocate the output tensor, create a second output tensor that will contain
+// the MKL shape serialized
+inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
+                                      Tensor** output,
+                                      const TensorShape& tf_shape,
+                                      const MklDnnShape& mkl_shape) {
+  Tensor* second_tensor = nullptr;
+  TensorShape second_shape;
+  second_shape.AddDim(mkl_shape.GetSerializeBufferSize());
+  OP_REQUIRES_OK(
+      ctext, ctext->allocate_output(GetTensorDataIndex(n, ctext->num_outputs()),
+                                    tf_shape, output));
+  OP_REQUIRES_OK(ctext, ctext->allocate_output(
+                            GetTensorMetaDataIndex(n, ctext->num_outputs()),
+                            second_shape, &second_tensor));
+  mkl_shape.SerializeMklDnnShape(
+      second_tensor->flat<uint8>().data(),
+      second_tensor->flat<uint8>().size() * sizeof(uint8));
+}
+#endif
+
 // Allocates a temp tensor and returns the data buffer for temporary storage.
 // Currently
-// we only support F32, will need to templatize if other types are added
+#ifdef INTEL_MKL_DNN
+template <typename T>
+inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
+                           const memory::primitive_desc& pd, void** buf_out) {
+  TensorShape tf_shape;
+
+  tf_shape.AddDim(pd.get_size() / sizeof(T) + 1);
+  OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::v(),
+                                                 tf_shape, tensor_out));
+  *buf_out = static_cast<void*>(tensor_out->flat<T>().data());
+}
+#endif
+
 inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
                            dnnLayout_t lt_buff, void** buf_out) {
   TensorShape tf_shape;
@@ -435,7 +821,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
 
 template <typename T>
 inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
-                           TensorShape tf_shape) {
+                              TensorShape tf_shape) {
   OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::v(),
                                                  tf_shape, tensor_out));
 }
@@ -669,6 +1055,8 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0,
   return true;
 }
 
+// These functions do not compile with MKL-DNN since mkl.h is missing.
+// We may need to remove them later.
 // TODO(intel_tf): Remove this routine when faster MKL layout conversion is
 // out.
 inline void MklNHWCToNCHW(const Tensor& input, Tensor** output) {
@@ -707,18 +1095,11 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) {
 
 #ifdef INTEL_MKL_DNN
 
-using mkldnn::engine;
-using mkldnn::memory;
-using mkldnn::padding_kind;
-using mkldnn::primitive;
-using mkldnn::reorder;
-
 /// Return MKL-DNN data type (memory::data_type) for input type T
 ///
 /// @input None
 /// @return memory::data_type corresponding to type T
-template <typename T>
-static memory::data_type MklDnnType();
+template<typename T> static memory::data_type MklDnnType();
 
 /// Instantiation for float type. Add similar instantiations for other
 /// type if needed.
@@ -733,15 +1114,26 @@ memory::data_type MklDnnType<float>() {
 /// @return: memory::format corresponding to TensorFlow data format;
 ///          Fails with an error if invalid data format.
 inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
-  if (format == FORMAT_NHWC)
-    return memory::format::nhwc;
-  else if (format == FORMAT_NCHW)
-    return memory::format::nchw;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
+  if (format == FORMAT_NHWC) return memory::format::nhwc;
+  else if (format == FORMAT_NCHW) return memory::format::nchw;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
+                     "Unsupported data format"));
   // Return to get rid of compiler warning
   return memory::format::format_undef;
 }
 
+/// Map MKL-DNN data format to TensorFlow's data format
+///
+/// @input: memory::format
+/// @return: Tensorflow data format corresponding to memory::format
+///          Fails with an error if invalid data format.
+inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
+  if (format == memory::format::nhwc) return FORMAT_NHWC;
+  else if (format == memory::format::nchw) return FORMAT_NCHW;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
+                     "Unsupported data format"));
+}
+
 /// Map TensorShape object into memory::dims required by MKL-DNN
 ///
 /// This function will simply map input TensorShape into MKL-DNN dims
@@ -753,7 +1145,7 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
 /// @return memory::dims corresponding to TensorShape
 inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) {
   memory::dims dims(shape.dims());
-  for (unsigned int d = 0; d < shape.dims(); ++d) {
+  for (int d = 0; d < shape.dims(); ++d) {
     dims[d] = shape.dim_size(d);
   }
   return dims;
@@ -769,7 +1161,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) {
 /// @input TensorShape object in shape
 /// @return memory::dims in MKL-DNN required NCHW format
 inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
-                                              TensorFormat format) {
+                                            TensorFormat format) {
   // Check validity of format.
   CHECK_NE(TFDataFormatToMklDnnDataFormat(format),
            memory::format::format_undef);
@@ -783,6 +1175,43 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
   return memory::dims({n, c, h, w});
 }
 
+/// Map MklDnn memory::dims object into TensorShape object.
+///
+/// This function will simply map input shape in MKL-DNN memory::dims format
+/// in Tensorflow's TensorShape object by perserving dimension order.
+///
+/// @input MKL-DNN memory::dims object
+/// @output TensorShape corresponding to memory::dims
+inline TensorShape MklDnnDimsToTFShape(const memory::dims& dims) {
+  std::vector<int32> shape(dims.size(), -1);
+  for (int d = 0; d < dims.size(); d++) {
+    shape[d] = dims[d];
+  }
+
+  TensorShape ret;
+  CHECK_EQ(TensorShapeUtils::MakeShape(shape, &ret).ok(), true);
+  return ret;
+}
+
+/// Function to calculate strides given tensor shape in Tensorflow order
+/// E.g., if dims_tf_order is {1, 2, 3, 4}, then as per Tensorflow convention,
+/// dimesion with size 1 is outermost dimension; while dimension with size 4 is
+/// innermost dimension. So strides for this tensor would be {4 * 3 * 2,
+/// 4 * 3, 4, 1}, i.e., {24, 12, 4, 1}.
+///
+/// @input Tensorflow shape in memory::dims type
+/// @return memory::dims containing strides for the tensor.
+inline memory::dims CalculateTFStrides(const memory::dims& dims_tf_order) {
+  CHECK_GT(dims_tf_order.size(), 0);
+  memory::dims strides(dims_tf_order.size());
+  int last_dim_idx = dims_tf_order.size() - 1;
+  strides[last_dim_idx] = 1;
+  for (int d = last_dim_idx - 1; d >= 0; d--) {
+    strides[d] = strides[d + 1] * dims_tf_order[d + 1];
+  }
+  return strides;
+}
+
 inline padding_kind TFPaddingToMklDnnPadding(Padding pad) {
   // MKL-DNN only supports zero padding.
   return padding_kind::zero;
@@ -808,23 +1237,21 @@ class MklDnnData {
   const engine* cpu_engine_;
 
  public:
-  explicit MklDnnData(const engine* e)
-      : user_memory_(nullptr),
-        reorder_memory_(nullptr),
-        op_md_(nullptr),
-        cpu_engine_(e) {}
+  explicit MklDnnData(const engine* e) : user_memory_(nullptr),
+                                         reorder_memory_(nullptr),
+                                         op_md_(nullptr), cpu_engine_(e) {}
 
   ~MklDnnData() {
     cpu_engine_ = nullptr;  // We don't own this.
-    delete (user_memory_);
-    delete (reorder_memory_);
-    delete (op_md_);
+    delete(user_memory_);
+    delete(reorder_memory_);
+    delete(op_md_);
   }
 
-  void* GetTensorBuffer(const Tensor* tensor) {
+  inline void* GetTensorBuffer(const Tensor* tensor) const {
     CHECK_NOTNULL(tensor);
-    return const_cast<void*>(
-        static_cast<const void*>(tensor->flat<T>().data()));
+    return const_cast<void*>(static_cast<const void*>(
+              tensor->flat<T>().data()));
   }
 
   /// Set user memory primitive using specified dimensions, memory format and
@@ -835,35 +1262,83 @@ class MklDnnData {
   /// an operation. E.g., filter of Conv2D is of shape {1, 2, 3, 4}, and
   /// memory format HWIO, and the buffer that contains actual values is
   /// pointed by data_buffer.
-  void SetUsrMem(memory::dims dim, memory::format fm, void* data_buffer) {
-    CHECK_NOTNULL(data_buffer);
-    CHECK_NOTNULL(cpu_engine_);
-    // TODO(nhasabni): can we remove dynamic memory allocation?
-    user_memory_ =
-        new memory(memory::primitive_desc(
-                       memory::desc(dim, MklDnnType<T>(), fm), *cpu_engine_),
-                   data_buffer);
+  inline void SetUsrMem(const memory::dims& dim, memory::format fm,
+                        void* data_buffer = nullptr) {
+    auto md = memory::desc(dim, MklDnnType<T>(), fm);
+    SetUsrMem(md, data_buffer);
   }
 
-  void SetUsrMem(memory::dims dim, memory::format fm, const Tensor* tensor) {
+  inline void SetUsrMem(const memory::dims& dim, memory::format fm,
+                        const Tensor* tensor) {
     CHECK_NOTNULL(tensor);
     SetUsrMem(dim, fm, GetTensorBuffer(tensor));
   }
 
+  /// Helper function to create memory descriptor in Blocked format
+  ///
+  /// @input: Tensor dimensions
+  /// @input: strides corresponding to dimensions. One can use utility
+  ///         function such as CalculateTFStrides to compute strides
+  ///         for given dimensions.
+  /// @return: memory::desc object corresponding to blocked memory format
+  ///          for given dimensions and strides.
+  static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
+      const memory::dims& strides) {
+    CHECK_EQ(dim.size(), strides.size());
+
+    // We have to construct memory descriptor in a C style. This is not at all
+    // ideal but MKLDNN does not offer any API to construct descriptor in
+    // blocked format except a copy constructor that accepts
+    // mkldnn_memory_desc_t.
+    mkldnn_memory_desc_t md;
+    md.primitive_kind = mkldnn_memory;
+    md.ndims = dim.size();
+    md.format = mkldnn_blocked;
+    md.data_type = memory::convert_to_c(MklDnnType<T>());
+
+    for (size_t i = 0; i < dim.size(); i++) {
+      md.layout_desc.blocking.block_dims[i] = 1;
+      md.layout_desc.blocking.strides[1][i] = 1;
+      md.layout_desc.blocking.strides[0][i] = strides[i];
+      md.layout_desc.blocking.padding_dims[i] = dim[i];
+      md.layout_desc.blocking.offset_padding_to_data[i] = 0;
+      md.dims[i] = dim[i];
+    }
+    md.layout_desc.blocking.offset_padding = 0;
+
+    return memory::desc(md);
+  }
+
+  /// A version of SetUsrMem call that allows user to create memory in blocked
+  /// format. So in addition to accepting dimensions, it also accepts strides.
+  /// This allows user to create memory for tensor in a format that is not
+  /// supported by MKLDNN. E.g., MKLDNN does not support tensor format for 6
+  /// dimensional tensor as a native format. But by using blocked format, a user
+  /// can create memory for 6D tensor.
+  inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides,
+                        void* data_buffer = nullptr) {
+    CHECK_EQ(dim.size(), strides.size());
+    auto blocked_md = MklDnnData<T>::CreateBlockedMemDesc(dim, strides);
+    SetUsrMem(blocked_md, data_buffer);
+  }
+
+  inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides,
+                        const Tensor* tensor) {
+    CHECK_NOTNULL(tensor);
+    SetUsrMem(dim, strides, GetTensorBuffer(tensor));
+  }
+
   /// A version of function to set user memory primitive that accepts memory
   /// descriptor directly, instead of accepting dimensions and format. This
   /// function is more generic that the one above, but the function above is
   /// sufficient in most cases.
-  void SetUsrMem(memory::desc md, void* data_buffer) {
-    CHECK_NOTNULL(data_buffer);
-    CHECK_NOTNULL(cpu_engine_);
-    // TODO(nhasabni): can we remove dynamic memory allocation?
-    user_memory_ =
-        new memory(memory::primitive_desc(md, *cpu_engine_), data_buffer);
+  inline void SetUsrMem(const memory::desc& md, void* data_buffer = nullptr) {
+    auto pd = memory::primitive_desc(md, *cpu_engine_);
+    SetUsrMem(pd, data_buffer);
   }
 
   /// A version of SetUsrMem with memory descriptor and tensor
-  void SetUsrMem(memory::desc md, const Tensor* tensor) {
+  inline void SetUsrMem(const memory::desc& md, const Tensor* tensor) {
     CHECK_NOTNULL(tensor);
     SetUsrMem(md, GetTensorBuffer(tensor));
   }
@@ -872,41 +1347,60 @@ class MklDnnData {
   /// descriptor directly, instead of accepting dimensions and format. This
   /// function is more generic that the one above, but the function above is
   /// sufficient in most cases.
-  void SetUsrMem(memory::primitive_desc pd, void* data_buffer) {
-    CHECK_NOTNULL(data_buffer);
+  inline void SetUsrMem(const memory::primitive_desc& pd,
+                        void* data_buffer = nullptr) {
     CHECK_NOTNULL(cpu_engine_);
     // TODO(nhasabni): can we remove dynamic memory allocation?
-    user_memory_ = new memory(pd, data_buffer);
+    if (data_buffer) {
+     user_memory_ = new memory(pd, data_buffer);
+    } else {
+      user_memory_ = new memory(pd);
+    }
   }
 
   /// A version of SetUsrMem with primitive descriptor and tensor
-  void SetUsrMem(memory::primitive_desc pd, const Tensor* tensor) {
+  inline void SetUsrMem(const memory::primitive_desc& pd,
+                        const Tensor* tensor) {
     CHECK_NOTNULL(tensor);
     SetUsrMem(pd, GetTensorBuffer(tensor));
   }
 
   /// Get function for user memory primitive.
-  const memory* GetUsrMem() const { return user_memory_; }
+  inline const memory* GetUsrMem() const { return user_memory_; }
 
   /// Get function for primitive descriptor of user memory primitive.
-  const memory::primitive_desc GetUsrMemPrimDesc() const {
+  inline const memory::primitive_desc GetUsrMemPrimDesc() const {
     CHECK_NOTNULL(user_memory_);
     return user_memory_->get_primitive_desc();
   }
 
   /// Get function for descriptor of user memory.
-  memory::desc GetUsrMemDesc() {
+  inline memory::desc GetUsrMemDesc() {
     // This is ugly. Why MKL-DNN does not provide desc() method of const type??
     const memory::primitive_desc pd = GetUsrMemPrimDesc();
     return const_cast<memory::primitive_desc*>(&pd)->desc();
   }
 
   /// Get function for data buffer of user memory primitive.
-  void* GetUsrMemDataHandle() const {
+  inline void* GetUsrMemDataHandle() const {
     CHECK_NOTNULL(user_memory_);
     return user_memory_->get_data_handle();
   }
 
+  /// Set function for data buffer of user memory primitive.
+  inline void* SetUsrMemDataHandle(void* data_buffer) {
+    CHECK_NOTNULL(user_memory_);
+    CHECK_NOTNULL(data_buffer);
+    return user_memory_->set_data_handle(data_buffer);
+  }
+
+  /// Set function for data buffer of user memory primitive.
+  inline void SetUsrMemDataHandle(const Tensor* tensor) {
+    CHECK_NOTNULL(user_memory_);
+    CHECK_NOTNULL(tensor);
+    user_memory_->set_data_handle(GetTensorBuffer(tensor));
+  }
+
   /// Get the memory primitive for input and output of an op. If inputs
   /// to an op require reorders, then this function returns memory primitive
   /// for reorder. Otherwise, it will return memory primitive for user memory.
@@ -915,7 +1409,7 @@ class MklDnnData {
   /// execute Conv2D, we need memory primitive for I and F. Buf if reorder is
   /// required for I and F (say I_r is reorder primitive for I; F_r is reorder
   /// primitive for F), then we need I_r and F_r to perform Conv2D.
-  const memory& GetOpMem() const {
+  inline const memory& GetOpMem() const {
     return reorder_memory_ ? *reorder_memory_ : *user_memory_;
   }
 
@@ -923,13 +1417,32 @@ class MklDnnData {
   /// format. E.g., For Conv2D, the dimensions would be same as user dimensions
   /// but memory::format would be mkldnn::any because we want MKL-DNN to choose
   /// best layout/format for given input dimensions.
-  void SetOpMemDesc(const memory::dims& dim, memory::format fm) {
+  inline void SetOpMemDesc(const memory::dims& dim, memory::format fm) {
     // TODO(nhasabni): can we remove dynamic memory allocation?
     op_md_ = new memory::desc(dim, MklDnnType<T>(), fm);
   }
 
   /// Get function for memory descriptor for an operation
-  const memory::desc& GetOpMemDesc() const { return *op_md_; }
+  inline const memory::desc& GetOpMemDesc() const { return *op_md_; }
+
+  /// Predicate that checks if we need to reorder user's memory into memory
+  /// pointed by op_pd.
+  ///
+  /// @input: op_pd - memory primitive descriptor of the given input of an
+  ///               operation
+  /// @return: true in case reorder of input is needed; false, otherwise.
+  inline bool IsReorderNeeded(const memory::primitive_desc& op_pd) const {
+    CHECK_NOTNULL(user_memory_);
+    return op_pd != user_memory_->get_primitive_desc();
+  }
+
+  /// Function to create a reorder from memory pointed by from to memory pointed
+  /// by to. Returns created primitive.
+  inline primitive CreateReorder(const memory* from, const memory* to) const {
+    CHECK_NOTNULL(from);
+    CHECK_NOTNULL(to);
+    return reorder(*from, *to);
+  }
 
   /// Function to handle input reordering
   ///
@@ -945,19 +1458,62 @@ class MklDnnData {
   ///               operation
   /// @input: net - net to which to add reorder primitive in case it is needed.
   /// @return: true in case reorder of input is needed; false, otherwise.
-  bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
-                           std::vector<primitive>* net) {
+  inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
+                                  std::vector<primitive>* net) {
     CHECK_NOTNULL(net);
     CHECK_NOTNULL(user_memory_);
-    if (op_pd != user_memory_->get_primitive_desc()) {
+    if (IsReorderNeeded(op_pd)) {
       // TODO(nhasabni): can we remove dynamic memory allocation?
       reorder_memory_ = new memory(op_pd);
-      net->push_back(reorder(*user_memory_, *reorder_memory_));
+      net->push_back(CreateReorder(user_memory_, reorder_memory_));
+      return true;
+    }
+    return false;
+  }
+
+  /// Overloaded version of above function that accepts memory buffer
+  /// where output of reorder needs to be stored.
+  ///
+  /// @input: op_pd - memory primitive descriptor of the given input of an
+  ///               operation
+  /// @reorder_data_handle - memory buffer where output of reorder needs to be
+  ///                        stored. Primitive does not check if buffer is
+  ///                        enough size to write.
+  /// @input: net - net to which to add reorder primitive in case it is needed.
+  /// @return: true in case reorder of input is needed; false, otherwise.
+  inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
+                                  void* reorder_data_handle,
+                                  std::vector<primitive>* net) {
+    CHECK_NOTNULL(net);
+    CHECK_NOTNULL(reorder_data_handle);
+    CHECK_NOTNULL(user_memory_);
+    if (IsReorderNeeded(op_pd)) {
+      // TODO(nhasabni): can we remove dynamic memory allocation?
+      reorder_memory_ = new memory(op_pd, reorder_data_handle);
+      net->push_back(CreateReorder(user_memory_, reorder_memory_));
       return true;
     }
     return false;
   }
 
+  /// Another overloaded version of CheckReorderToOpMem that accepts Tensor
+  /// where output of reorder needs to be stored.
+  ///
+  /// @input: op_pd - memory primitive descriptor of the given input of an
+  ///               operation
+  /// @reorder_tensor - Tensor whose buffer is to be used to store output of
+  ///                   reorder. Primitive does not check if buffer is
+  ///                   enough size to write.
+  /// @input: net - net to which to add reorder primitive in case it is needed.
+  /// @return: true in case reorder of input is needed; false, otherwise.
+  inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
+                                  Tensor* reorder_tensor,
+                                  std::vector<primitive>* net) {
+    CHECK_NOTNULL(net);
+    CHECK_NOTNULL(reorder_tensor);
+    return CheckReorderToOpMem(op_pd, GetTensorBuffer(reorder_tensor), net);
+  }
+
   /// Function to handle output reorder
   ///
   /// This function performs very similar functionality as input reordering
@@ -970,9 +1526,10 @@ class MklDnnData {
   ///
   /// @input memory primitive descriptor for the given output of an operation
   /// @return: true in case reorder of output is needed; false, otherwise.
-  bool PrepareReorderToUserMemIfReq(const memory::primitive_desc& op_pd) {
+  inline bool PrepareReorderToUserMemIfReq(
+      const memory::primitive_desc& op_pd) {
     CHECK_NOTNULL(user_memory_);
-    if (op_pd != user_memory_->get_primitive_desc()) {
+    if (IsReorderNeeded(op_pd)) {
       // TODO(nhasabni): can we remove dynamic memory allocation?
       reorder_memory_ = new memory(op_pd);
       return true;
@@ -987,11 +1544,11 @@ class MklDnnData {
   /// to the user-specified output buffer.
   ///
   /// @input: net - net to which to add reorder primitive
-  void InsertReorderToUserMem(std::vector<primitive>* net) {
+  inline void InsertReorderToUserMem(std::vector<primitive>* net) {
     CHECK_NOTNULL(net);
     CHECK_NOTNULL(user_memory_);
     CHECK_NOTNULL(reorder_memory_);
-    net->push_back(reorder(*reorder_memory_, *user_memory_));
+    net->push_back(CreateReorder(reorder_memory_, user_memory_));
   }
 };
 
diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc
new file mode 100644
index 0000000000..6aef3d86e9
--- /dev/null
+++ b/tensorflow/core/util/mkl_util_test.cc
@@ -0,0 +1,92 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include "tensorflow/core/util/mkl_util.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+#ifdef INTEL_MKL_DNN
+
+TEST(MklUtilTest, MklDnnTfShape) {
+  auto cpu_engine = engine(engine::cpu, 0);
+  MklDnnData<float> a(&cpu_engine);
+
+  const int N = 1, C = 2, H = 3, W = 4;
+  memory::dims a_dims = {N, C, H, W};
+  MklDnnShape a_mkldnn_shape;
+  a_mkldnn_shape.SetMklTensor(true);
+  // Create TF layout in NCHW.
+  a_mkldnn_shape.SetTfLayout(a_dims.size(), a_dims, memory::format::nchw);
+  TensorShape a_tf_shape_nchw({N, C, H, W});
+  TensorShape a_tf_shape_nhwc({N, H, W, C});
+  TensorShape a_mkldnn_tf_shape = a_mkldnn_shape.GetTfShape();
+  // Check that returned shape is in NCHW format.
+  EXPECT_EQ(a_tf_shape_nchw, a_mkldnn_tf_shape);
+  EXPECT_NE(a_tf_shape_nhwc, a_mkldnn_tf_shape);
+
+  memory::dims b_dims = {N, C, H, W};
+  MklDnnShape b_mkldnn_shape;
+  b_mkldnn_shape.SetMklTensor(true);
+  // Create TF layout in NHWC.
+  b_mkldnn_shape.SetTfLayout(b_dims.size(), b_dims, memory::format::nhwc);
+  TensorShape b_tf_shape_nhwc({N, H, W, C});
+  TensorShape b_tf_shape_nchw({N, C, H, W});
+  TensorShape b_mkldnn_tf_shape = b_mkldnn_shape.GetTfShape();
+  // Check that returned shape is in NHWC format.
+  EXPECT_EQ(b_tf_shape_nhwc, b_mkldnn_tf_shape);
+  EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape);
+}
+
+
+TEST(MklUtilTest, MklDnnBlockedFormatTest) {
+  // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension
+  // first (case 1) and then it being outermost dimension (case 2).
+  auto cpu_engine = engine(engine::cpu, 0);
+
+  // Setting for case 1
+  MklDnnData<float> a(&cpu_engine);
+  memory::dims dim1 = {3, 4};
+  memory::dims strides1 = {1, 3};
+  a.SetUsrMem(dim1, strides1);
+
+  memory::desc a_md1 = a.GetUsrMemDesc();
+  EXPECT_EQ(a_md1.data.ndims, 2);
+  EXPECT_EQ(a_md1.data.dims[0], 3);
+  EXPECT_EQ(a_md1.data.dims[1], 4);
+  EXPECT_EQ(a_md1.data.format, mkldnn_blocked);
+
+  // Setting for case 2
+  MklDnnData<float> b(&cpu_engine);
+  memory::dims dim2 = {3, 4};
+  memory::dims strides2 = {4, 1};
+  b.SetUsrMem(dim2, strides2);
+
+  memory::desc b_md2 = b.GetUsrMemDesc();
+  EXPECT_EQ(b_md2.data.ndims, 2);
+  EXPECT_EQ(b_md2.data.dims[0], 3);
+  EXPECT_EQ(b_md2.data.dims[1], 4);
+  EXPECT_EQ(b_md2.data.format, mkldnn_blocked);
+}
+
+#endif  // INTEL_MKL_DNN
+}  // namespace
+}  // namespace tensorflow
+
+#endif  // INTEL_MKL
diff --git a/tensorflow/docs_src/api_guides/python/threading_and_queues.md b/tensorflow/docs_src/api_guides/python/threading_and_queues.md
index ab95ce0af9..8ad4c4c075 100644
--- a/tensorflow/docs_src/api_guides/python/threading_and_queues.md
+++ b/tensorflow/docs_src/api_guides/python/threading_and_queues.md
@@ -3,7 +3,7 @@
 Note: In versions of TensorFlow before 1.2, we recommended using multi-threaded,
 queue-based input pipelines for performance. Beginning with TensorFlow 1.4,
 however, we recommend using the `tf.data` module instead. (See
-[Datasets](datasets) for details. In TensorFlow 1.2 and 1.3, the module was
+@{$datasets$Datasets} for details. In TensorFlow 1.2 and 1.3, the module was
 called `tf.contrib.data`.) The `tf.data` module offers an easier-to-use
 interface for constructing efficient input pipelines. Furthermore, we've stopped
 developing the old multi-threaded, queue-based input pipelines.  We've retained
diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md
index 8409962744..be14ab4026 100644
--- a/tensorflow/docs_src/get_started/get_started.md
+++ b/tensorflow/docs_src/get_started/get_started.md
@@ -272,7 +272,7 @@ train = optimizer.minimize(loss)
 ```
 
 ```python
-sess.run(init) # reset values to incorrect defaults.
+sess.run(init) # reset variables to incorrect defaults.
 for i in range(1000):
   sess.run(train, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]})
 
@@ -317,7 +317,7 @@ y_train = [0, -1, -2, -3]
 # training loop
 init = tf.global_variables_initializer()
 sess = tf.Session()
-sess.run(init) # reset values to wrong
+sess.run(init) # initialize variables with incorrect defaults.
 for i in range(1000):
   sess.run(train, {x: x_train, y: y_train})
 
@@ -383,7 +383,7 @@ train_input_fn = tf.estimator.inputs.numpy_input_fn(
 eval_input_fn = tf.estimator.inputs.numpy_input_fn(
     {"x": x_eval}, y_eval, batch_size=4, num_epochs=1000, shuffle=False)
 
-# We can invoke 1000 training steps by invoking the  method and passing the
+# We can invoke 1000 training steps by invoking the method and passing the
 # training data set.
 estimator.train(input_fn=input_fn, steps=1000)
 
diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md
index 9d3af5d96a..0db5c6143a 100644
--- a/tensorflow/docs_src/get_started/input_fn.md
+++ b/tensorflow/docs_src/get_started/input_fn.md
@@ -191,7 +191,7 @@ import pandas as pd
 
 def get_input_fn_from_pandas(data_set, num_epochs=None, shuffle=True):
   return tf.estimator.inputs.pandas_input_fn(
-      x=pdDataFrame(...),
+      x=pd.DataFrame(...),
       y=pd.Series(...),
       num_epochs=num_epochs,
       shuffle=shuffle)
@@ -267,8 +267,8 @@ tf.logging.set_verbosity(tf.logging.INFO)
 
 Define the column names for the data set in `COLUMNS`. To distinguish features
 from the label, also define `FEATURES` and `LABEL`. Then read the three CSVs
-(@{tf.train},
-@{tf.test}, and
+([train](http://download.tensorflow.org/data/boston_train.csv),
+[test](http://download.tensorflow.org/data/boston_test.csv), and
 [predict](http://download.tensorflow.org/data/boston_predict.csv)) into _pandas_
 `DataFrame`s:
 
diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md
index 3a153e8114..df622c6ac5 100644
--- a/tensorflow/docs_src/install/install_c.md
+++ b/tensorflow/docs_src/install/install_c.md
@@ -38,7 +38,7 @@ enable TensorFlow for C:
          OS="linux" # Change to "darwin" for macOS
          TARGET_DIRECTORY="/usr/local"
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" |
            sudo tar -C $TARGET_DIRECTORY -xz
 
      The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md
index df43255896..8b3da49a0d 100644
--- a/tensorflow/docs_src/install/install_go.md
+++ b/tensorflow/docs_src/install/install_go.md
@@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
          TF_TYPE="cpu" # Change to "gpu" for GPU support
          TARGET_DIRECTORY='/usr/local'
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0-rc1.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0.tar.gz" |
          sudo tar -C $TARGET_DIRECTORY -xz
 
      The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md
index f7f2c3cdc7..6eb8158249 100644
--- a/tensorflow/docs_src/install/install_java.md
+++ b/tensorflow/docs_src/install/install_java.md
@@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
 <dependency>
   <groupId>org.tensorflow</groupId>
   <artifactId>tensorflow</artifactId>
-  <version>1.4.0-rc1</version>
+  <version>1.4.0</version>
 </dependency>
 ```
 
@@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
                <dependency>
                  <groupId>org.tensorflow</groupId>
                  <artifactId>tensorflow</artifactId>
-                 <version>1.4.0-rc1</version>
+                 <version>1.4.0</version>
                </dependency>
              </dependencies>
          </project>
@@ -124,7 +124,7 @@ refer to the simpler instructions above instead.
 Take the following steps to install TensorFlow for Java on Linux or macOS:
 
   1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar),
      which is the TensorFlow Java Archive (JAR).
 
   2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@@ -143,7 +143,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
          OS=$(uname -s | tr '[:upper:]' '[:lower:]')
          mkdir -p ./jni
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" |
            tar -xz -C ./jni
 
 ### Install on Windows
@@ -151,10 +151,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
 Take the following steps to install TensorFlow for Java on Windows:
 
   1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar),
      which is the TensorFlow Java Archive (JAR).
   2. Download the following Java Native Interface (JNI) file appropriate for
-     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0-rc1.zip).
+     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0.zip).
   3. Extract this .zip file.
 
 
@@ -202,7 +202,7 @@ must be part of your `classpath`. For example, you can include the
 downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
 as follows:
 
-<pre><b>javac -cp libtensorflow-1.4.0-rc1.jar HelloTF.java</b></pre>
+<pre><b>javac -cp libtensorflow-1.4.0.jar HelloTF.java</b></pre>
 
 
 ### Running
@@ -216,11 +216,11 @@ two files are available to the JVM:
 For example, the following command line executes the `HelloTF` program on Linux
 and macOS X:
 
-<pre><b>java -cp libtensorflow-1.4.0-rc1.jar:. -Djava.library.path=./jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.4.0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
 
 And the following command line executes the `HelloTF` program on Windows:
 
-<pre><b>java -cp libtensorflow-1.4.0-rc1.jar;. -Djava.library.path=jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.4.0.jar;. -Djava.library.path=jni HelloTF</b></pre>
 
 If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
 installed TensorFlow for Java and are ready to use the API.  If the program
diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md
index 414ab7b1f7..f7380bac8a 100644
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv:
      Virtualenv environment:
 
      <pre>(tensorflow)$ <b>pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b></pre>
 
 If you encounter installation problems, see
 [Common Installation Problems](#common_installation_problems).
@@ -293,7 +293,7 @@ take the following steps:
 
      <pre>
      $ <b>sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl</b>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b>
      </pre>
 
      If this step fails, see
@@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
 
      <pre>
      (tensorflow)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b></pre>
 
 
 <a name="ValidateYourInstallation"></a>
@@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations.
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp27-none-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp27-none-linux_x86_64.whl
 </pre>
 
 Note that GPU support requires the NVIDIA hardware and software described in
@@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp34-cp34m-linux_x86_64.whl
 </pre>
 
 Note that GPU support requires the NVIDIA hardware and software described in
@@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp35-cp35m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp35-cp35m-linux_x86_64.whl
 </pre>
 
 
@@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp36-cp36m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp36-cp36m-linux_x86_64.whl
 </pre>
 
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index 9a95710bfa..79b383817b 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -114,7 +114,7 @@ Take the following steps to install TensorFlow with Virtualenv:
      TensorFlow in the active Virtualenv is as follows:
 
      <pre> $ <b>pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
 
 If you encounter installation problems, see
 [Common Installation Problems](#common-installation-problems).
@@ -235,7 +235,7 @@ take the following steps:
      issue the following command:
 
      <pre> $ <b>sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl</b> </pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b> </pre>
 
      If the preceding command fails, see
      [installation problems](#common-installation-problems).
@@ -344,7 +344,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      TensorFlow for Python 2.7:
 
      <pre> (tensorflow)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
 
 
 <a name="ValidateYourInstallation"></a>
@@ -517,7 +517,7 @@ This section documents the relevant values for Mac OS installations.
 
 
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
 </pre>
 
 
@@ -525,7 +525,7 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-a
 
 
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py3-none-any.whl
 </pre>
 
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index 6d0dcdcd4a..aa4ae6c876 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -355,10 +355,10 @@ Invoke `pip install` to install that pip package.
 The filename of the `.whl` file depends on your platform.
 For example, the following command will install the pip package
 
-for TensorFlow 1.4.0rc1 on Linux:
+for TensorFlow 1.4.0 on Linux:
 
 <pre>
-$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0rc1-py2-none-any.whl</b>
+$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0-py2-none-any.whl</b>
 </pre>
 
 ## Validate your installation
@@ -447,8 +447,10 @@ Stack Overflow and specify the `tensorflow` tag.
 **Linux**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.4.0rc1</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
-<tr><td>tensorflow_gpu-1.4.0rc1</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>6</td><td>8</td></tr>
+<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.5.4</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.4.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.5.4</td><td>6</td><td>8</td></tr>
+ <tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.3.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>6</td><td>8</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.2.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>5.1</td><td>8</td></tr>
 <tr><td>tensorflow-1.1.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.2</td><td>N/A</td><td>N/A</td></tr>
@@ -460,7 +462,8 @@ Stack Overflow and specify the `tensorflow` tag.
 **Mac**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.4.0rc1</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.5.4</td><td>N/A</td><td>N/A</td></tr>
+ <tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow-1.1.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.2</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.1.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.2</td><td>5.1</td><td>8</td></tr>
@@ -471,8 +474,10 @@ Stack Overflow and specify the `tensorflow` tag.
 **Windows**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.4.0rc1</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
-<tr><td>tensorflow_gpu-1.4.0rc1</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
+<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.4.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
+<tr><td>tensorflow-1.3.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.3.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.2.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>5.1</td><td>8</td></tr>
 <tr><td>tensorflow-1.1.0</td><td>CPU</td><td>3.5</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md
index c5a560e074..8fc65be35a 100644
--- a/tensorflow/docs_src/mobile/prepare_models.md
+++ b/tensorflow/docs_src/mobile/prepare_models.md
@@ -296,6 +296,6 @@ complains about missing header files, add the .h’s that are needed into
 the
 [`android_extended_ops`](https://www.tensorflow.org/code/tensorflow/core/kernels/BUILD#L3525) target.
 
-If you’re using a makefile targetting iOS, Raspberry Pi, etc, go to
+If you’re using a makefile targeting iOS, Raspberry Pi, etc, go to
 [`tensorflow/contrib/makefile/tf_op_files.txt`](https://www.tensorflow.org/code/tensorflow/contrib/makefile/tf_op_files.txt) and
 add the right implementation files there.
diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md
index 1f856bbf3f..25cb72008d 100644
--- a/tensorflow/docs_src/programmers_guide/debugger.md
+++ b/tensorflow/docs_src/programmers_guide/debugger.md
@@ -9,11 +9,19 @@ lets you view the internal structure and states of running TensorFlow graphs
 during training and inference, which is difficult to debug with general-purpose
 debuggers such as Python's `pdb` due to TensorFlow's computation-graph paradigm.
 
-> NOTE: The system requirements of tfdbg on supported external platforms include
-> the following. On Mac OS X, the `ncurses` library is required. It can be
-> installed with `brew install homebrew/dupes/ncurses`. On Windows, `pyreadline`
-> is required. If you use Anaconda3, you can install it with a command
+> NOTE: TensorFlow debugger uses a
+> [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based
+> text user interface. On Mac OS X, the `ncurses` library is required and can
+> be installed with `brew install homebrew/dupes/ncurses`. On Windows, curses
+> isn't as well supported, so a
+> [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based interface can
+> be used with tfdbg by installing `pyreadline` with pip.
+> If you use Anaconda3, you can install it with a command
 > such as `"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`.
+> Unofficial Windows curses packages can be downloaded
+> [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently
+> installed using `pip install <your_version>.whl`, however curses on Windows
+> may not work as reliably as curses on Linux or Mac.
 
 This tutorial demonstrates how to use the **tfdbg** command-line interface
 (CLI) to debug the appearance of [`nan`s](https://en.wikipedia.org/wiki/NaN)
@@ -149,6 +157,7 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at
 | | `pt <tensor>[slicing]` | Print a subarray of tensor, using [numpy](http://www.numpy.org/)-style array slicing. | `pt hidden/Relu:0[0:50,:]` |
 | | `-a` | Print the entirety of a large tensor, without using ellipses. (May take a long time for large tensors.) | `pt -a hidden/Relu:0[0:50,:]` |
 | | `-r <range>` | Highlight elements falling into specified numerical range. Multiple ranges can be used in conjunction. | `pt hidden/Relu:0 -a -r [[-inf,-1],[1,inf]]` |
+| | `-n <number>` | Print dump corresponding to specified 0-based dump number. Required for tensors with multiple dumps. | `pt -n 0 hidden/Relu:0` |
 | | `-s` | Include a summary of the numeric values of the tensor (applicable only to non-empty tensors with Boolean and numeric types such as `int*` and `float*`.) | `pt -s hidden/Relu:0[0:50,:]` |
 | **`@[coordinates]`** | | Navigate to specified element in `pt` output. | `@[10,0]` or `@10,0` |
 | **`/regex`** | |  [less](https://linux.die.net/man/1/less)-style search for given regular expression. | `/inf` |
@@ -166,10 +175,12 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at
 | | `-r` | List the inputs to node, recursively (the input tree.) | `li -r hidden/Relu:0` |
 | | `-d <max_depth>` | Limit recursion depth under the `-r` mode. | `li -r -d 3 hidden/Relu:0` |
 | | `-c` | Include control inputs. | `li -c -r hidden/Relu:0` |
+| | `-t` | Show op types of input nodes. | `li -t -r hidden/Relu:0` |
 | **`lo`** | | **List output recipients of node** | |
 | | `-r` | List the output recipients of node, recursively (the output tree.) | `lo -r hidden/Relu:0` |
 | | `-d <max_depth>` | Limit recursion depth under the `-r` mode. | `lo -r -d 3 hidden/Relu:0` |
 | | `-c` | Include recipients via control edges. | `lo -c -r hidden/Relu:0` |
+| | `-t` | Show op types of recipient nodes. | `lo -t -r hidden/Relu:0` |
 | **`ls`** | | **List Python source files involved in node creation.** | |
 | | `-p <path_pattern>` | Limit output to source files matching given regular-expression path pattern. | `ls -p .*debug_mnist.*` |
 | | `-n` | Limit output to node names matching given regular-expression pattern. | `ls -n Softmax.*` |
diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md
index d6f80430cd..88eb277e35 100644
--- a/tensorflow/docs_src/programmers_guide/tensors.md
+++ b/tensorflow/docs_src/programmers_guide/tensors.md
@@ -29,8 +29,8 @@ Some types of tensors are special, and these will be covered in other
 units of the Programmer's guide. The main ones are:
 
   * `tf.Variable`
-  * `tf.Constant`
-  * `tf.Placeholder`
+  * `tf.constant`
+  * `tf.placeholder`
   * `tf.SparseTensor`
 
 With the exception of `tf.Variable`, the value of a tensor is immutable, which
@@ -64,7 +64,7 @@ The following snippet demonstrates creating a few rank 0 variables:
 mammal = tf.Variable("Elephant", tf.string)
 ignition = tf.Variable(451, tf.int16)
 floating = tf.Variable(3.14159265359, tf.float64)
-its_complicated = tf.Variable((12.3, -4.85), tf.complex64)
+its_complicated = tf.Variable(12.3 - 4.85j, tf.complex64)
 ```
 
 Note: A string is treated as a single item in TensorFlow, not as a sequence of
@@ -79,7 +79,7 @@ initial value. For example:
 mystr = tf.Variable(["Hello"], tf.string)
 cool_numbers  = tf.Variable([3.14159, 2.71828], tf.float32)
 first_primes = tf.Variable([2, 3, 5, 7, 11], tf.int32)
-its_very_complicated = tf.Variable([(12.3, -4.85), (7.5, -6.23)], tf.complex64)
+its_very_complicated = tf.Variable([12.3 - 4.85j, 7.5 - 6.23j], tf.complex64)
 ```
 
 
@@ -275,8 +275,8 @@ Graphs and Sessions for more information).
 
 Sometimes it is not possible to evaluate a `tf.Tensor` with no context because
 its value might depend on dynamic information that is not available. For
-example, tensors that depend on `Placeholder`s can't be evaluated without
-providing a value for the `Placeholder`.
+example, tensors that depend on `placeholder`s can't be evaluated without
+providing a value for the `placeholder`.
 
 ``` python
 p = tf.placeholder(tf.float32)
diff --git a/tensorflow/examples/speech_commands/models.py b/tensorflow/examples/speech_commands/models.py
index 82d6a94ea1..ab611f414a 100644
--- a/tensorflow/examples/speech_commands/models.py
+++ b/tensorflow/examples/speech_commands/models.py
@@ -326,7 +326,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings,
   first_filter_height = input_time_size
   first_filter_count = 186
   first_filter_stride_x = 1
-  first_filter_stride_y = 4
+  first_filter_stride_y = 1
   first_weights = tf.Variable(
       tf.truncated_normal(
           [first_filter_height, first_filter_width, 1, first_filter_count],
diff --git a/tensorflow/go/android.go b/tensorflow/go/android.go
new file mode 100644
index 0000000000..3db3ddfec5
--- /dev/null
+++ b/tensorflow/go/android.go
@@ -0,0 +1,20 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build android
+
+package tensorflow
+
+// #cgo LDFLAGS: -landroid -llog -lm -lz -ldl
+import "C"
diff --git a/tensorflow/go/operation_test.go b/tensorflow/go/operation_test.go
index 7cba043af2..40c951ab8c 100644
--- a/tensorflow/go/operation_test.go
+++ b/tensorflow/go/operation_test.go
@@ -123,6 +123,14 @@ func TestOutputDataTypeAndShape(t *testing.T) {
 			[]int64{2, 3},
 			Double,
 		},
+		{ // Matrix of Uint64
+			[][]uint64{
+				{1, 2, 3},
+				{4, 5, 6},
+			},
+			[]int64{2, 3},
+			Uint64,
+		},
 	}
 	for idx, test := range testdata {
 		t.Run(fmt.Sprintf("#%d Value %T", idx, test.Value), func(t *testing.T) {
diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go
index 36a74c0081..1326a95278 100644
--- a/tensorflow/go/tensor.go
+++ b/tensorflow/go/tensor.go
@@ -101,7 +101,7 @@ func NewTensor(value interface{}) (*Tensor, error) {
 			return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len())
 		}
 	} else {
-		e := stringEncoder{offsets: buf, data: raw[nflattened*8 : len(raw)], status: newStatus()}
+		e := stringEncoder{offsets: buf, data: raw[nflattened*8:], status: newStatus()}
 		if err := e.encode(reflect.ValueOf(value), shape); err != nil {
 			return nil, err
 		}
@@ -207,6 +207,9 @@ func (t *Tensor) WriteContentsTo(w io.Writer) (int64, error) {
 func tensorData(c *C.TF_Tensor) []byte {
 	// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
 	cbytes := C.TF_TensorData(c)
+	if cbytes == nil {
+		return nil
+	}
 	length := int(C.TF_TensorByteSize(c))
 	slice := (*[1 << 30]byte)(unsafe.Pointer(cbytes))[:length:length]
 	return slice
@@ -310,7 +313,7 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error {
 		if err := w.WriteByte(b); err != nil {
 			return err
 		}
-	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
 		if err := binary.Write(w, nativeEndian, v.Interface()); err != nil {
 			return err
 		}
@@ -349,7 +352,7 @@ func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect.
 			return err
 		}
 		ptr.Elem().SetBool(b == 1)
-	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
 		if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil {
 			return err
 		}
diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go
index 35bd2fd9a5..674a8ce86f 100644
--- a/tensorflow/go/tensor_test.go
+++ b/tensorflow/go/tensor_test.go
@@ -34,11 +34,15 @@ func TestNewTensor(t *testing.T) {
 		{nil, int64(5)},
 		{nil, uint8(5)},
 		{nil, uint16(5)},
+		{nil, uint32(5)},
+		{nil, uint64(5)},
 		{nil, float32(5)},
 		{nil, float64(5)},
 		{nil, complex(float32(5), float32(6))},
 		{nil, complex(float64(5), float64(6))},
 		{nil, "a string"},
+		{[]int64{1}, []uint32{1}},
+		{[]int64{1}, []uint64{1}},
 		{[]int64{2}, []bool{true, false}},
 		{[]int64{1}, []float64{1}},
 		{[]int64{1}, [1]float64{1}},
@@ -71,11 +75,6 @@ func TestNewTensor(t *testing.T) {
 		// native ints not supported
 		int(5),
 		[]int{5},
-		// uint32 and uint64 are not supported in TensorFlow
-		uint32(5),
-		[]uint32{5},
-		uint64(5),
-		[]uint64{5},
 		// Mismatched dimensions
 		[][]float32{{1, 2, 3}, {4}},
 		// Mismatched dimensions. Should return "mismatched slice lengths" error instead of "BUG"
diff --git a/tensorflow/java/src/main/java/org/tensorflow/Shape.java b/tensorflow/java/src/main/java/org/tensorflow/Shape.java
index 9aa92be111..d533c3d480 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/Shape.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/Shape.java
@@ -77,6 +77,24 @@ public final class Shape {
     return shape[i];
   }
 
+  @Override
+  public int hashCode() {
+    return Arrays.hashCode(shape);
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+
+    if (obj instanceof Shape && Arrays.equals(this.shape, ((Shape) obj).shape)) {
+      return !hasUnknownDimension();
+    }
+
+    return super.equals(obj);
+  }
+
   /** Succinct description of the shape meant for debugging. */
   @Override
   public String toString() {
@@ -98,4 +116,18 @@ public final class Shape {
   }
 
   private long[] shape;
+
+  private boolean hasUnknownDimension() {
+    if (shape == null) {
+      return true;
+    }
+
+    for (long dimension : shape) {
+      if (dimension == -1) {
+        return true;
+      }
+    }
+
+    return false;
+  }
 }
diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
index 3b027700c5..92cc3bd60e 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
@@ -16,6 +16,7 @@ limitations under the License.
 package org.tensorflow;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -77,4 +78,29 @@ public class ShapeTest {
       assertEquals(5, n.shape().size(1));
     }
   }
+
+  @Test
+  public void equalsWorksCorrectly() {
+    assertEquals(Shape.scalar(), Shape.scalar());
+    assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3));
+
+    assertNotEquals(Shape.make(1,2), null);
+    assertNotEquals(Shape.make(1,2), new Object());
+    assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4));
+
+
+    assertNotEquals(Shape.unknown(), Shape.unknown());
+    assertNotEquals(Shape.make(-1), Shape.make(-1));
+    assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3));
+  }
+
+  @Test
+  public void hashCodeIsAsExpected() {
+    assertEquals(Shape.make(1, 2, 3, 4).hashCode(), Shape.make(1, 2, 3, 4).hashCode());
+    assertEquals(Shape.scalar().hashCode(), Shape.scalar().hashCode());
+    assertEquals(Shape.unknown().hashCode(), Shape.unknown().hashCode());
+
+    assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode());
+  }
 }
+
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 5ae4aace16..54c43c1337 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -5,7 +5,10 @@ package(
     default_visibility = [
         "//engedu/ml/tf_from_scratch:__pkg__",
         "//tensorflow:internal",
+        "//tensorflow/contrib/lite/toco/python:__pkg__",
         "//tensorflow_models:__subpackages__",
+        # TODO(aselle): to pass open source test.
+        "//bazel_pip/tensorflow/contrib/lite/toco/python:__pkg__",
     ],
 )
 
@@ -45,6 +48,7 @@ py_library(
         "//tensorflow/compiler/aot/tests:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/contrib/learn:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/contrib/learn/python/learn/datasets:__pkg__",  # TODO(b/34059704): remove when fixed
+        "//tensorflow/contrib/lite/toco/python:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/python/debug:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/python/tools:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/tools/api/generator:__pkg__",
diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py
index 62fea05867..fa5d02c476 100644
--- a/tensorflow/python/estimator/canned/head.py
+++ b/tensorflow/python/estimator/canned/head.py
@@ -117,7 +117,7 @@ class _Head(object):
       update_op = tf.contrib.layers.optimize_loss(optimizer=sync,
                                                   loss=estimator_spec.loss, ...)
       hooks = [sync.make_session_run_hook(is_chief)]
-      ... upate train_op and hooks in EstimatorSpec and return
+      ... update train_op and hooks in EstimatorSpec and return
     ```
   """
   __metaclass__ = abc.ABCMeta
diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py
index c9f37f06e8..3512f66284 100644
--- a/tensorflow/python/estimator/inputs/numpy_io.py
+++ b/tensorflow/python/estimator/inputs/numpy_io.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+from six import string_types
 from tensorflow.python.estimator.inputs.queues import feeding_functions
 
 # Key name to pack the target into dict of `features`. See
@@ -51,8 +52,9 @@ def numpy_input_fn(x,
                    num_threads=1):
   """Returns input function that would feed dict of numpy arrays into the model.
 
-  This returns a function outputting `features` and `target` based on the dict
-  of numpy arrays. The dict `features` has the same keys as the `x`.
+  This returns a function outputting `features` and `targets` based on the dict
+  of numpy arrays. The dict `features` has the same keys as the `x`. The dict
+  `targets` has the same keys as the `y` if `y` is a dict.
 
   Example:
 
@@ -69,7 +71,7 @@ def numpy_input_fn(x,
 
   Args:
     x: dict of numpy array object.
-    y: numpy array object. `None` if absent.
+    y: numpy array object or dict of numpy array object. `None` if absent.
     batch_size: Integer, size of batches to return.
     num_epochs: Integer, number of epochs to iterate over data. If `None` will
       run forever.
@@ -81,11 +83,13 @@ def numpy_input_fn(x,
       such as in prediction and evaluation mode, `num_threads` should be 1.
 
   Returns:
-    Function, that has signature of ()->(dict of `features`, `target`)
+    Function, that has signature of ()->(dict of `features`, `targets`)
 
   Raises:
     ValueError: if the shape of `y` mismatches the shape of values in `x` (i.e.,
       values in `x` have same shape).
+    ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict.
+    ValueError: if x or y is an empty dict.
     TypeError: `x` is not a dict or `shuffle` is not bool.
   """
 
@@ -97,43 +101,76 @@ def numpy_input_fn(x,
     """Numpy input function."""
     if not isinstance(x, dict):
       raise TypeError('x must be dict; got {}'.format(type(x).__name__))
+    if not x:
+      raise ValueError('x cannot be empty')
 
     # Make a shadow copy and also ensure the order of iteration is consistent.
-    ordered_dict_x = collections.OrderedDict(
+    ordered_dict_data = collections.OrderedDict(
         sorted(x.items(), key=lambda t: t[0]))
+    # Deep copy keys which is a view in python 3
+    feature_keys = list(ordered_dict_data.keys())
+
+    if y is None:
+      target_keys = None
+    elif isinstance(y, dict):
+      if not y:
+        raise ValueError('y cannot be empty dict, use None instead.')
+
+      ordered_dict_y = collections.OrderedDict(
+        sorted(y.items(), key=lambda t: t[0]))
+      target_keys = list(ordered_dict_y.keys())
+
+      duplicate_keys = set(feature_keys).intersection(set(target_keys))
+      if len(duplicate_keys):
+        raise ValueError('{} duplicate keys are found in both x and y: '
+                         '{}'.format(len(duplicate_keys), duplicate_keys))
+
+      ordered_dict_data.update(ordered_dict_y)
+    else:
+      target_keys = _get_unique_target_key(ordered_dict_data)
+      ordered_dict_data[target_keys] = y
+
+    if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
+      shape_dict_of_x = {k: ordered_dict_data[k].shape
+                         for k in feature_keys}
+
+      if target_keys is None:
+        shape_of_y = None
+      elif isinstance(target_keys, string_types):
+        shape_of_y = y.shape
+      else:
+        shape_of_y = {k: ordered_dict_data[k].shape
+                      for k in target_keys}
 
-    unique_target_key = _get_unique_target_key(ordered_dict_x)
-    if y is not None:
-      ordered_dict_x[unique_target_key] = y
-
-    if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1:
-      shape_dict_of_x = {k: ordered_dict_x[k].shape
-                         for k in ordered_dict_x.keys()}
-      shape_of_y = None if y is None else y.shape
       raise ValueError('Length of tensors in x and y is mismatched. All '
                        'elements in x and y must have the same length.\n'
                        'Shapes in x: {}\n'
-                       'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y))
+                       'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y))
 
     queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
-        ordered_dict_x,
+        ordered_dict_data,
         queue_capacity,
         shuffle=shuffle,
         num_threads=num_threads,
         enqueue_size=batch_size,
         num_epochs=num_epochs)
 
-    features = (queue.dequeue_many(batch_size) if num_epochs is None
+    batch = (queue.dequeue_many(batch_size) if num_epochs is None
                 else queue.dequeue_up_to(batch_size))
 
-    # Remove the first `Tensor` in `features`, which is the row number.
-    if len(features) > 0:
-      features.pop(0)
+    # Remove the first `Tensor` in `batch`, which is the row number.
+    if len(batch) > 0:
+      batch.pop(0)
 
-    features = dict(zip(ordered_dict_x.keys(), features))
-    if y is not None:
-      target = features.pop(unique_target_key)
+    features = dict(zip(feature_keys, batch[:len(feature_keys)]))
+    if target_keys is None:
+      # TODO(martinwicke), return consistent result
+      return features
+    elif isinstance(target_keys, string_types):
+      target = batch[-1]
+      return features, target
+    else:
+      target = dict(zip(target_keys, batch[-len(target_keys):]))
       return features, target
-    return features
 
   return input_fn
diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py
index 02df22b632..65eae7a7dc 100644
--- a/tensorflow/python/estimator/inputs/numpy_io_test.py
+++ b/tensorflow/python/estimator/inputs/numpy_io_test.py
@@ -239,6 +239,40 @@ class NumpyIoTest(test.TestCase):
             x, y, batch_size=2, shuffle=False, num_epochs=1)
         failing_input_fn()
 
+  def testNumpyInputFnWithXIsEmptyDict(self):
+    x = {}
+    y = np.arange(4)
+    with self.test_session():
+      with self.assertRaisesRegexp(ValueError, 'x cannot be empty'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
+  def testNumpyInputFnWithYIsNone(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = None
+
+    with self.test_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
+      features_tensor = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      feature = session.run(features_tensor)
+      self.assertEqual(len(feature), 2)
+      self.assertAllEqual(feature['a'], [0, 1])
+      self.assertAllEqual(feature['b'], [32, 33])
+
+      session.run([features_tensor])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features_tensor])
+
+      coord.request_stop()
+      coord.join(threads)
+
   def testNumpyInputFnWithNonBoolShuffle(self):
     x = np.arange(32, 36)
     y = np.arange(4)
@@ -285,6 +319,59 @@ class NumpyIoTest(test.TestCase):
             num_epochs=1)
         failing_input_fn()
 
+  def testNumpyInputFnWithYAsDict(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)}
+
+    with self.test_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
+      features_tensor, targets_tensor = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      features, targets = session.run([features_tensor, targets_tensor])
+      self.assertEqual(len(features), 2)
+      self.assertAllEqual(features['a'], [0, 1])
+      self.assertAllEqual(features['b'], [32, 33])
+      self.assertEqual(len(targets), 2)
+      self.assertAllEqual(targets['y1'], [-32, -31])
+      self.assertAllEqual(targets['y2'], [32, 31])
+
+      session.run([features_tensor, targets_tensor])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features_tensor, targets_tensor])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithYIsEmptyDict(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = {}
+    with self.test_session():
+      with self.assertRaisesRegexp(ValueError, 'y cannot be empty'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
+  def testNumpyInputFnWithDuplicateKeysInXAndY(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = {'y1': np.arange(-32, -28),
+         'a': a,
+         'y2': np.arange(32, 28, -1),
+         'b': b}
+    with self.test_session():
+      with self.assertRaisesRegexp(
+              ValueError, '2 duplicate keys are found in both x and y'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 2785aed13e..dc4ffb1747 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -860,6 +860,10 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None):
   inputs, which allows those ops to accept numpy arrays, Python lists,
   and scalars in addition to `Tensor` objects.
 
+  Note: This function diverges from default Numpy behavior for `float` and
+    `string` types when `None` is present in a Python list or scalar. Rather
+    than silently converting `None` values, an error will be thrown.
+
   Args:
     value: An object whose type has a registered `Tensor` conversion function.
     dtype: Optional element type for the returned tensor. If missing, the
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index 7e74c19124..e283542172 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -286,6 +286,7 @@ _TF_TO_IS_OK = {
     dtypes.bool: [_FilterBool],
     dtypes.complex128: [_FilterComplex],
     dtypes.complex64: [_FilterComplex],
+    dtypes.float16: [_FilterFloat],
     dtypes.float32: [_FilterFloat],
     dtypes.float64: [_FilterFloat],
     dtypes.int16: [_FilterInt],
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index cfa5fe5e3e..1610214d54 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -986,8 +986,9 @@ class TensorFlowTestCase(googletest.TestCase):
       err: A float value.
       msg: An optional string message to append to the failure message.
     """
+    # f1 == f2 is needed here as we might have: f1, f2 = inf, inf
     self.assertTrue(
-        math.fabs(f1 - f2) <= err,
+        f1 == f2 or math.fabs(f1 - f2) <= err,
         "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
                                if msg is not None else ""))
 
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 6eb9c66d06..76b80e60ea 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -107,22 +107,41 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
   def setUp(self):
     self.rng = np.random.RandomState(42)
 
-  def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None):
+  def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None, axis=None):
     """Check equivalence between boolean_mask and numpy masking."""
     if make_mask is None:
       make_mask = lambda shape: self.rng.randint(0, 2, size=shape).astype(bool)
     arr = np.random.rand(*arr_shape)
     mask = make_mask(arr_shape[:ndims_mask])
-    masked_arr = arr[mask]
-    with self.test_session():
-      masked_tensor = array_ops.boolean_mask(arr, mask)
+    if axis is not None:
+      mask = make_mask(arr_shape[axis:ndims_mask+axis])
+    if axis is None or axis == 0:
+      masked_arr = arr[mask]
+    elif axis == 1:
+      masked_arr = arr[:,mask]
+    elif axis == 2:
+      masked_arr = arr[:,:,mask]
+    with self.test_session() as sess:
+      masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
 
       # Leading dimension size of masked_tensor is always unknown until runtime
       # since we don't how many elements will be kept.
-      self.assertAllEqual(masked_tensor.get_shape()[1:], masked_arr.shape[1:])
+      leading = 1 if axis is None else axis + 1
+      self.assertAllEqual(masked_tensor.get_shape()[leading:],
+          masked_arr.shape[leading:])
 
       self.assertAllClose(masked_arr, masked_tensor.eval())
 
+  def testMaskDim1ArrDim2Axis1(self):
+    ndims_mask = 1
+    for arr_shape in [(1, 1), (2, 2), (2, 5)]:
+      self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
+
+  def testMaskDim2ArrDim2Axis1(self):
+    ndims_mask = 2
+    for arr_shape in [(1, 1), (2, 2), (2, 5)]:
+      self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
+
   def testMaskDim1ArrDim1(self):
     ndims_mask = 1
     for arr_shape in [(1,), (2,), (3,), (10,)]:
@@ -486,7 +505,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
         _ = checker2[...]
         _ = checker2[tuple()]
 
-  def testFloatSlicedArrayAndInt64IndicesGPU(self):
+  def testInt64GPU(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
     with self.test_session(use_gpu=True, force_gpu=True):
@@ -497,17 +516,6 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       s = array_ops.strided_slice(x, begin, end, strides)
       self.assertAllEqual([3.], self.evaluate(s))
 
-  def testInt64SlicedArrayAndIndicesGPU(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-    with self.test_session(use_gpu=True, force_gpu=True):
-      x = constant_op.constant([1, 2, 3], dtype=dtypes.int64)
-      begin = constant_op.constant([2], dtype=dtypes.int64)
-      end = constant_op.constant([3], dtype=dtypes.int64)
-      strides = constant_op.constant([1], dtype=dtypes.int64)
-      s = array_ops.strided_slice(x, begin, end, strides)
-      self.assertAllEqual([3], self.evaluate(s))
-
   def testDegenerateSlices(self):
     with self.test_session(use_gpu=True):
       checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR)
@@ -1070,6 +1078,16 @@ class PadTest(test_util.TensorFlowTestCase):
                            [0, 0, 4, 5, 6, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0]])
 
+class InvertPermutationTest(test_util.TensorFlowTestCase):
+
+  def testInvertPermutation(self):
+    for dtype in [dtypes.int32, dtypes.int64]:
+      with self.test_session(use_gpu=True):
+        x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype)
+        y = array_ops.invert_permutation(x)
+        self.assertAllEqual(y.get_shape(), [5])
+        self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1])
+
 
 if __name__ == "__main__":
   test_lib.main()
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 7a610debd1..79285476b4 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -25,11 +25,10 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import googletest
 
-
 class BincountTest(test_util.TensorFlowTestCase):
 
   def test_empty(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       self.assertAllEqual(
           math_ops.bincount([], minlength=5).eval(), [0, 0, 0, 0, 0])
       self.assertAllEqual(math_ops.bincount([], minlength=1).eval(), [0])
@@ -42,7 +41,7 @@ class BincountTest(test_util.TensorFlowTestCase):
           np.float64)
 
   def test_values(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       self.assertAllEqual(
           math_ops.bincount([1, 1, 1, 2, 2, 3]).eval(), [0, 3, 2, 1])
       arr = [1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]
@@ -57,14 +56,14 @@ class BincountTest(test_util.TensorFlowTestCase):
           math_ops.bincount(np.arange(10000)).eval(), np.ones(10000))
 
   def test_maxlength(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       self.assertAllEqual(math_ops.bincount([5], maxlength=3).eval(), [0, 0, 0])
       self.assertAllEqual(math_ops.bincount([1], maxlength=3).eval(), [0, 1])
       self.assertAllEqual(math_ops.bincount([], maxlength=3).eval(), [])
 
   def test_random_with_weights(self):
     num_samples = 10000
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       np.random.seed(42)
       for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
         arr = np.random.randint(0, 1000, num_samples)
@@ -72,17 +71,29 @@ class BincountTest(test_util.TensorFlowTestCase):
           weights = np.random.randint(-100, 100, num_samples)
         else:
           weights = np.random.random(num_samples)
-        self.assertAllEqual(
+        self.assertAllClose(
             math_ops.bincount(arr, weights).eval(),
             np.bincount(arr, weights))
 
+  def test_random_without_weights(self):
+    num_samples = 10000
+    with self.test_session(use_gpu=True):
+      np.random.seed(42)
+      for dtype in [np.int32, np.float32]:
+        arr = np.random.randint(0, 1000, num_samples)
+        weights = np.ones(num_samples).astype(dtype)
+        self.assertAllClose(
+            math_ops.bincount(arr, None).eval(),
+            np.bincount(arr, weights))
+
   def test_zero_weights(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       self.assertAllEqual(
           math_ops.bincount(np.arange(1000), np.zeros(1000)).eval(),
           np.zeros(1000))
 
   def test_negative(self):
+    # unsorted_segment_sum will only report InvalidArgumentError on CPU
     with self.test_session():
       with self.assertRaises(errors.InvalidArgumentError):
         math_ops.bincount([1, 2, 3, -1, 6, 8]).eval()
diff --git a/tensorflow/python/kernel_tests/bucketize_op_test.py b/tensorflow/python/kernel_tests/bucketize_op_test.py
index 6db3592055..e612b1c134 100644
--- a/tensorflow/python/kernel_tests/bucketize_op_test.py
+++ b/tensorflow/python/kernel_tests/bucketize_op_test.py
@@ -31,7 +31,7 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([-5, 0, 2, 3, 5, 8, 10, 11, 12]),
         boundaries=[0, 3, 8, 11])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def testFloat(self):
@@ -39,7 +39,7 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([-5., 0., 2., 3., 5., 8., 10., 11., 12.]),
         boundaries=[0., 3., 8., 11.])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def test2DInput(self):
@@ -47,13 +47,13 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([[-5, 0, 2, 3, 5], [8, 10, 11, 12, 0]]),
         boundaries=[0, 3, 8, 11])
     expected_out = [[0, 1, 1, 2, 2], [3, 3, 4, 4, 1]]
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def testInvalidBoundariesOrder(self):
     op = math_ops._bucketize(
         constant_op.constant([-5, 0]), boundaries=[0, 8, 3, 11])
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       with self.assertRaisesRegexp(
           errors_impl.InvalidArgumentError, "Expected sorted boundaries"):
         sess.run(op)
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 6167cb9999..6cbdd4cbb3 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -439,9 +439,10 @@ class ZerosLikeTest(test.TestCase):
 
   def testZerosLikeCPU(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32,
-        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8,
-        dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64,
+        dtypes_lib.float32, dtypes_lib.float64,
+        dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
+        dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
+        dtypes_lib.complex64, dtypes_lib.complex128,
         dtypes_lib.string
     ]:
       self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False)
@@ -573,9 +574,10 @@ class OnesLikeTest(test.TestCase):
 
   def testOnesLike(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32,
-        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8,
-        dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64
+        dtypes_lib.float32, dtypes_lib.float64,
+        dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
+        dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
+        dtypes_lib.complex64, dtypes_lib.complex128
     ]:
       numpy_dtype = dtype.as_numpy_dtype
       with self.test_session():
diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index b67a4e3f89..a7e23ead1c 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -17,6 +17,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
@@ -50,5 +53,45 @@ class Conv1DTest(test.TestCase):
           self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4])
 
 
+  def testConv1DTranspose(self):
+    with self.test_session():
+      stride = 2
+
+      # Input, output: [batch, width, depth]
+      x_shape = [2, 4, 3]
+      y_shape = [2, 9, 2]
+
+      # Filter: [kernel_width, output_depth, input_depth]
+      f_shape = [3, 2, 3]
+
+      x = constant_op.constant(
+          1.0, shape=x_shape, name="x", dtype=dtypes.float32)
+      f = constant_op.constant(
+          1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
+      output = nn_ops.conv1d_transpose(
+          x, f, y_shape, stride=stride, padding="VALID")
+      value = output.eval()
+
+      cache_values = np.zeros(y_shape, dtype=np.float32)
+
+      # The amount of padding added
+      pad = 1
+
+      for n in xrange(x_shape[0]):
+        for k in xrange(f_shape[1]):
+          for w in xrange(pad, y_shape[1] - pad):
+            target = 3.0
+            # We add a case for locations divisible by the stride.
+            w_in = w % stride == 0 and w > pad and w < y_shape[1] - 1 - pad
+            if w_in:
+              target += 3.0
+            cache_values[n, w, k] = target
+
+          # copy values in the border
+          cache_values[n, 0, k] = cache_values[n, 1, k]
+          cache_values[n, -1, k] = cache_values[n, -2, k]
+
+    self.assertAllClose(cache_values, value)
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
index 14622ab467..116681fc4c 100644
--- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
@@ -21,6 +21,8 @@ from __future__ import print_function
 import collections
 import math
 
+import numpy as np
+
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
@@ -45,8 +47,19 @@ def GetTestConfigs():
 
 class Conv3DTest(test.TestCase):
 
+  def _DtypesToTest(self, use_gpu):
+    if use_gpu:
+      if not test_util.CudaSupportsHalfMatMulAndConv():
+        return [dtypes.float32]
+      else:
+        # It is important that float32 comes before float16 here,
+        # as we will be using its gradients as reference for fp16 gradients.
+        return [dtypes.float32, dtypes.float16]
+    else:
+      return [dtypes.float64, dtypes.float32, dtypes.float16]
+
   def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride,
-                            padding, data_format, use_gpu):
+                            padding, data_format, dtype, use_gpu):
     total_size_1 = 1
     total_size_2 = 1
     for s in tensor_in_sizes:
@@ -54,13 +67,14 @@ class Conv3DTest(test.TestCase):
     for s in filter_in_sizes:
       total_size_2 *= s
 
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
+    # Initializes the input tensor with array containing numbers from 0 to 1.
+    # We keep the input tensor values fairly small to avoid overflowing a float16 
+    # tensor during the conv3d 
+    x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)]
+    x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)]
     with self.test_session(use_gpu=use_gpu):
-      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
-      t2 = constant_op.constant(x2, shape=filter_in_sizes)
+      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
+      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
 
       if isinstance(stride, collections.Iterable):
         strides = [1] + list(stride) + [1]
@@ -81,27 +95,35 @@ class Conv3DTest(test.TestCase):
                     expected):
     results = []
     for data_format, use_gpu in GetTestConfigs():
-      result = self._SetupValuesForDevice(
-          tensor_in_sizes,
-          filter_in_sizes,
-          stride,
-          padding,
-          data_format,
-          use_gpu=use_gpu)
-      results.append(result)
-      tolerance = 1e-2 if use_gpu else 1e-5
+      for dtype in self._DtypesToTest(use_gpu):
+        result = self._SetupValuesForDevice(
+            tensor_in_sizes,
+            filter_in_sizes,
+            stride,
+            padding,
+            data_format,
+            dtype,
+            use_gpu=use_gpu)
+        results.append(result)
+
       with self.test_session() as sess:
         values = sess.run(results)
         for value in values:
           print("expected = ", expected)
           print("actual = ", value)
-          self.assertAllClose(expected, value.flatten(), atol=tolerance,
-                              rtol=1e-6)
+          tol = 1e-6
+          if value.dtype == np.float16:
+            tol = 1e-3
+
+          self.assertAllClose(expected, value.flatten(), atol=tol,
+                              rtol=tol)
 
   def testConv3D1x1x1Filter(self):
     expected_output = [
-        30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0,
-        204.0, 174.0, 216.0, 258.0, 210.0, 261.0, 312.0
+        0.18518519,  0.22222222,  0.25925926,  0.40740741,  0.5       ,
+        0.59259259,  0.62962963,  0.77777778,  0.92592593,  0.85185185,
+        1.05555556,  1.25925926,  1.07407407,  1.33333333,  1.59259259,
+        1.2962963 ,  1.61111111,  1.92592593
     ]
 
     # These are equivalent to the Conv2D1x1 case.
@@ -127,8 +149,10 @@ class Conv3DTest(test.TestCase):
   # Expected values computed using scipy's correlate function.
   def testConv3D2x2x2Filter(self):
     expected_output = [
-        19554., 19962., 20370., 22110., 22590., 23070., 34890., 35730., 36570.,
-        37446., 38358., 39270., 50226., 51498., 52770., 52782., 54126., 55470.
+        3.77199074,   3.85069444,   3.92939815,   4.2650463 ,   4.35763889,
+        4.45023148,   6.73032407,   6.89236111,   7.05439815,   7.22337963,
+        7.39930556,   7.57523148,   9.68865741,   9.93402778,  10.17939815,
+        10.18171296,  10.44097222,  10.70023148
     ]
     # expected_shape = [1, 3, 1, 2, 5]
     self._VerifyValues(
@@ -140,69 +164,19 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStrides(self):
     expected_output = [
-        102.,
-        151.,
-        172.,
-        193.,
-        214.,
-        235.,
-        142.,
-        438.,
-        592.,
-        613.,
-        634.,
-        655.,
-        676.,
-        394.,
-        774.,
-        1033.,
-        1054.,
-        1075.,
-        1096.,
-        1117.,
-        646.,
-        1894.,
-        2503.,
-        2524.,
-        2545.,
-        2566.,
-        2587.,
-        1486.,
-        2230.,
-        2944.,
-        2965.,
-        2986.,
-        3007.,
-        3028.,
-        1738.,
-        2566.,
-        3385.,
-        3406.,
-        3427.,
-        3448.,
-        3469.,
-        1990.,
-        3686.,
-        4855.,
-        4876.,
-        4897.,
-        4918.,
-        4939.,
-        2830.,
-        4022.,
-        5296.,
-        5317.,
-        5338.,
-        5359.,
-        5380.,
-        3082.,
-        4358.,
-        5737.,
-        5758.,
-        5779.,
-        5800.,
-        5821.,
-        3334.,
+        0.06071429,  0.08988095,  0.10238095,  0.11488095,  0.12738095,
+        0.13988095,  0.08452381,  0.26071429,  0.35238095,  0.36488095,
+        0.37738095,  0.38988095,  0.40238095,  0.23452381,  0.46071429,
+        0.61488095,  0.62738095,  0.63988095,  0.65238095,  0.66488095,
+        0.38452381,  1.12738095,  1.48988095,  1.50238095,  1.51488095,
+        1.52738095,  1.53988095,  0.88452381,  1.32738095,  1.75238095,
+        1.76488095,  1.77738095,  1.78988095,  1.80238095,  1.03452381,
+        1.52738095,  2.01488095,  2.02738095,  2.03988095,  2.05238095,
+        2.06488095,  1.18452381,  2.19404762,  2.88988095,  2.90238095,
+        2.91488095,  2.92738095,  2.93988095,  1.68452381,  2.39404762,
+        3.15238095,  3.16488095,  3.17738095,  3.18988095,  3.20238095,
+        1.83452381,  2.59404762,  3.41488095,  3.42738095,  3.43988095,
+        3.45238095,  3.46488095,  1.98452381
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 5, 8, 7, 1],
@@ -212,7 +186,10 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
   def testConv3D2x2x2FilterStride2(self):
-    expected_output = [19554., 19962., 20370., 50226., 51498., 52770.]
+    expected_output = [
+        3.77199074,  3.85069444,  3.92939815,  9.68865741,  9.93402778,
+        10.17939815
+    ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
         filter_in_sizes=[2, 2, 2, 3, 3],
@@ -222,11 +199,14 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStride3(self):
     expected_output = [
-        36564., 38022., 39480., 37824., 39354., 40884., 39084., 40686., 42288.,
-        46644., 48678., 50712., 47904., 50010., 52116., 49164., 51342., 53520.,
-        107124., 112614., 118104., 108384., 113946., 119508., 109644., 115278.,
-        120912., 117204., 123270., 129336., 118464., 124602., 130740., 119724.,
-        125934., 132144.
+        1.51140873,  1.57167659,  1.63194444,  1.56349206,  1.62673611,
+        1.68998016,  1.6155754 ,  1.68179563,  1.74801587,  1.9280754 ,
+        2.01215278,  2.09623016,  1.98015873,  2.0672123 ,  2.15426587,
+        2.03224206,  2.12227183,  2.21230159,  4.4280754 ,  4.65500992,
+        4.88194444,  4.48015873,  4.71006944,  4.93998016,  4.53224206,
+        4.76512897,  4.99801587,  4.84474206,  5.09548611,  5.34623016,
+        4.8968254 ,  5.15054563,  5.40426587,  4.94890873,  5.20560516,
+        5.46230159
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 6, 7, 8, 2],
@@ -237,8 +217,9 @@ class Conv3DTest(test.TestCase):
 
   def testConv3D2x2x2FilterStride2Same(self):
     expected_output = [
-        19554., 19962., 20370., 10452., 10710., 10968., 50226., 51498., 52770.,
-        23844., 24534., 25224.
+        3.77199074,   3.85069444,   3.92939815,   2.0162037 ,   2.06597222,
+        2.11574074,   9.68865741,   9.93402778,  10.17939815,   4.59953704,
+        4.73263889,   4.86574074
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
@@ -248,7 +229,10 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
   def testKernelSmallerThanStride(self):
-    expected_output = [1., 3., 7., 9., 19., 21., 25., 27.]
+    expected_output = [
+        0.03703704,  0.11111111,  0.25925926,  0.33333333,  0.7037037 ,
+        0.77777778,  0.92592593,  1.
+    ]
     self._VerifyValues(
         tensor_in_sizes=[1, 3, 3, 3, 1],
         filter_in_sizes=[1, 1, 1, 1, 1],
@@ -263,9 +247,12 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
     expected_output = [
-        1484., 1592., 770., 2240., 2348., 1106., 1149., 1191., 539., 6776.,
-        6884., 3122., 7532., 7640., 3458., 3207., 3249., 1421., 3005., 3035.,
-        1225., 3215., 3245., 1309., 1013., 1022., 343.
+        0.54081633,  0.58017493,  0.28061224,  0.81632653,  0.85568513,
+        0.40306122,  0.41873178,  0.4340379 ,  0.19642857,  2.46938776,
+        2.50874636,  1.1377551 ,  2.74489796,  2.78425656,  1.26020408,
+        1.16873178,  1.1840379 ,  0.51785714,  1.09511662,  1.10604956,
+        0.44642857,  1.17164723,  1.18258017,  0.47704082,  0.3691691 ,
+        0.37244898,  0.125
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
@@ -274,7 +261,10 @@ class Conv3DTest(test.TestCase):
         padding="SAME",
         expected=expected_output)
 
-    expected_output = [1484., 1592., 2240., 2348., 6776., 6884., 7532., 7640.]
+    expected_output = [
+        0.540816,  0.580175,  0.816327,  0.855685,  2.469388,  2.508746,
+        2.744898,  2.784257
+    ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
         filter_in_sizes=[2, 2, 2, 1, 1],
@@ -288,7 +278,7 @@ class Conv3DTest(test.TestCase):
         filter_in_sizes=[2, 1, 2, 1, 2],
         stride=1,
         padding="VALID",
-        expected=[50, 60])
+        expected=[1.5625,  1.875])
 
   def _ConstructAndTestGradientForConfig(
       self, batch, input_shape, filter_shape, in_depth, out_depth, stride,
@@ -328,50 +318,63 @@ class Conv3DTest(test.TestCase):
     input_data = [x * 1.0 / input_size for x in range(0, input_size)]
     filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
 
-    if test.is_gpu_available() and use_gpu:
-      data_type = dtypes.float32
+
+    for data_type in self._DtypesToTest(use_gpu=use_gpu):
       # TODO(mjanusz): Modify gradient_checker to also provide max relative
       # error and synchronize the tolerance levels between the tests for forward
       # and backward computations.
-      if test.is_gpu_available():
+      if data_type == dtypes.float64:
+        tolerance = 1e-8
+      elif data_type == dtypes.float32:
         tolerance = 5e-3
-      else:
-        # As of Aug 2016, higher tolerance is needed for some CPU architectures.
-        # Runs on a single machine can also generate slightly different errors
-        # because of multithreading.
-        tolerance = 8e-3
-    else:
-      data_type = dtypes.float64
-      tolerance = 1e-8
-    with self.test_session(use_gpu=use_gpu):
-      orig_input_tensor = constant_op.constant(
+      elif data_type == dtypes.float16:
+        tolerance = 1e-3
+
+
+      with self.test_session(use_gpu=use_gpu):
+        orig_input_tensor = constant_op.constant(
           input_data, shape=input_shape, dtype=data_type, name="input")
-      filter_tensor = constant_op.constant(
+        filter_tensor = constant_op.constant(
           filter_data, shape=filter_shape, dtype=data_type, name="filter")
 
-      if data_format == "NCDHW":
-        input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
-        strides = test_util.NHWCToNCHW(strides)
-      else:
-        input_tensor = orig_input_tensor
+        if data_format == "NCDHW":
+          input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
+          new_strides = test_util.NHWCToNCHW(strides)
+        else:
+          input_tensor = orig_input_tensor
+          new_strides = strides
 
-      conv = nn_ops.conv3d(
-          input_tensor, filter_tensor, strides, padding,
+        conv = nn_ops.conv3d(
+          input_tensor, filter_tensor, new_strides, padding,
           data_format=data_format, name="conv")
 
-      if data_format == "NCDHW":
-        conv = test_util.NCHWToNHWC(conv)
+        if data_format == "NCDHW":
+          conv = test_util.NCHWToNHWC(conv)
+
+        
+        if test_input:
+          jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor,
+                                                               input_shape,
+                                                               conv,
+                                                               output_shape)
+        else:
+          jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor,
+                                                               filter_shape,
+                                                               conv,
+                                                               output_shape)
+        
+        
+        if data_type != dtypes.float16:
+          reference_jacob_t = jacob_t
+          err = np.fabs(jacob_t - jacob_n).max()
+        else:
+          # Compare fp16 theoretical gradients to fp32 theoretical gradients,
+          # since fp16 numerical gradients are too imprecise.
+          err = np.fabs(jacob_t - reference_jacob_t).max()
+
+      print("conv3d gradient error = ", err)
+      self.assertLess(err, tolerance)
 
-      if test_input:
-        err = gradient_checker.compute_gradient_error(orig_input_tensor,
-                                                      input_shape,
-                                                      conv, output_shape)
-      else:
-        err = gradient_checker.compute_gradient_error(filter_tensor,
-                                                      filter_shape, conv,
-                                                      output_shape)
-    print("conv3d gradient error = ", err)
-    self.assertLess(err, tolerance)
 
   def ConstructAndTestGradient(self, **kwargs):
     for data_format, use_gpu in GetTestConfigs():
diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 3298092fbe..f7ae1a0f37 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -122,7 +122,9 @@ class DepthwiseConv2DTest(test.TestCase):
     x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
     with self.test_session(use_gpu=use_gpu) as sess:
-      if data_type == dtypes.float32:
+      if data_type == dtypes.float16:
+        tolerance = 1e-5
+      elif data_type == dtypes.float32:
         tolerance = 1e-5
       else:
         self.assertEqual(data_type, dtypes.float64)
@@ -169,7 +171,7 @@ class DepthwiseConv2DTest(test.TestCase):
                 padding) in enumerate(ConfigsToTest()):
       print("Testing DepthwiseConv2D,", index, "th config:", input_size, "*",
             filter_size, "stride:", stride, "padding:", padding)
-      for data_type in [dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
         self._VerifyValues(
             input_size, filter_size, stride, padding, data_type, use_gpu=True)
 
@@ -181,7 +183,7 @@ class DepthwiseConv2DTest(test.TestCase):
                 padding) in enumerate(ConfigsToTest()):
       print("Testing DepthwiseConv2DFormat,", index, "th config:", input_size,
             "*", filter_size, "stride:", stride, "padding:", padding)
-      for data_type in [dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
         self._VerifyValues(
             input_size,
             filter_size,
@@ -318,7 +320,9 @@ class DepthwiseConv2DTest(test.TestCase):
     input_data = [x * 1.0 / input_size for x in range(0, input_size)]
     filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
     with self.test_session(use_gpu=use_gpu):
-      if data_type == dtypes.float32:
+      if data_type == dtypes.float16:
+        tolerance = 0.002
+      elif data_type == dtypes.float32:
         tolerance = 0.002
       else:
         self.assertEqual(data_type, dtypes.float64)
@@ -369,6 +373,8 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DInputGrad,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
+      # Note: float16 test for DepthwiseConv2DInputGrad is not enabled,
+      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
@@ -389,6 +395,8 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DInputGradFormat,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
+      # Note: float16 test for DepthwiseConv2DInputGradFormat is not enabled,
+      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
@@ -407,6 +415,8 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DFilterGrad,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
+      # Note: float16 test for DepthwiseConv2DFilterGrad is not enabled,
+      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
@@ -427,6 +437,8 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DFilterGradFormat,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
+      # Note: float16 test for DepthwiseConv2DFilterGradFormat is not enabled,
+      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD
index e21446c2ef..e220d05692 100644
--- a/tensorflow/python/kernel_tests/distributions/BUILD
+++ b/tensorflow/python/kernel_tests/distributions/BUILD
@@ -193,6 +193,7 @@ cuda_py_test(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform_test",
     ],
+    tags = ["manual"],  # b/69001419
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py
index ebc89f15c5..e24e8ade73 100644
--- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py
+++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py
@@ -250,13 +250,11 @@ class MultinomialTest(test.TestCase):
     theta = np.array([[1., 2, 3],
                       [2.5, 4, 0.01]], dtype=np.float32)
     theta /= np.sum(theta, 1)[..., array_ops.newaxis]
-    # Ideally we'd be able to test broadcasting but, the multinomial sampler
-    # doesn't support different total counts.
-    n = np.float32(5)
+    n = np.array([[10., 9.], [8., 7.], [6., 5.]], dtype=np.float32)
     with self.test_session() as sess:
-      # batch_shape=[2], event_shape=[3]
+      # batch_shape=[3, 2], event_shape=[3]
       dist = multinomial.Multinomial(n, theta)
-      x = dist.sample(int(250e3), seed=1)
+      x = dist.sample(int(1000e3), seed=1)
       sample_mean = math_ops.reduce_mean(x, 0)
       x_centered = x - sample_mean[array_ops.newaxis, ...]
       sample_cov = math_ops.reduce_mean(math_ops.matmul(
@@ -291,9 +289,9 @@ class MultinomialTest(test.TestCase):
   def testSampleUnbiasedNonScalarBatch(self):
     with self.test_session() as sess:
       dist = multinomial.Multinomial(
-          total_count=5.,
+          total_count=[7., 6., 5.],
           logits=math_ops.log(2. * self._rng.rand(4, 3, 2).astype(np.float32)))
-      n = int(3e3)
+      n = int(3e4)
       x = dist.sample(n, seed=0)
       sample_mean = math_ops.reduce_mean(x, 0)
       # Cyclically rotate event dims left.
diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
index 4883095707..2460950aa9 100644
--- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
@@ -33,8 +33,8 @@ from tensorflow.python.platform import test
 class DynamicPartitionTest(test.TestCase):
 
   def testSimpleOneDimensional(self):
-    with self.test_session() as sess:
-      data = constant_op.constant([0, 13, 2, 39, 4, 17])
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant([0, 13, 2, 39, 4, 17], dtype=dtypes.float32)
       indices = constant_op.constant([0, 0, 2, 3, 2, 1])
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
@@ -52,9 +52,10 @@ class DynamicPartitionTest(test.TestCase):
     self.assertEqual([None], partitions[3].get_shape().as_list())
 
   def testSimpleTwoDimensional(self):
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11],
-                                   [12, 13, 14], [15, 16, 17]])
+                                   [12, 13, 14], [15, 16, 17]],
+                                  dtype=dtypes.float32)
       indices = constant_op.constant([0, 0, 2, 3, 2, 1])
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
@@ -71,9 +72,61 @@ class DynamicPartitionTest(test.TestCase):
     self.assertEqual([None, 3], partitions[2].get_shape().as_list())
     self.assertEqual([None, 3], partitions[3].get_shape().as_list())
 
+  def testLargeOneDimensional(self):
+    num = 100000
+    data_list = [x for x in range(num)]
+    indices_list = [x % 2 for x in range(num)]
+    part1 = [x for x in range(num) if x % 2 == 0]
+    part2 = [x for x in range(num) if x % 2 == 1]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=2)
+      partition_vals = sess.run(partitions)
+
+    self.assertAllEqual(part1, partition_vals[0])
+    self.assertAllEqual(part2, partition_vals[1])
+
+  def testLargeTwoDimensional(self):
+    rows = 100000
+    cols = 100
+    data_list = [None] * rows
+    for i in range(rows):
+      data_list[i] = [i for _ in range(cols)]
+    num_partitions = 97
+    indices_list = [(i ** 2) % num_partitions for i in range(rows)]
+    parts = [[] for _ in range(num_partitions)]
+    for i in range(rows):
+      parts[(i ** 2) % num_partitions].append(data_list[i])
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=num_partitions)
+      partition_vals = sess.run(partitions)
+
+    for i in range(num_partitions):
+      # reshape because of empty parts
+      parts_np = np.array(parts[i], dtype=np.float).reshape(-1, cols)
+      self.assertAllEqual(parts_np, partition_vals[i])
+
+  def testSimpleComplex(self):
+    data_list = [1 + 2j, 3 + 4j, 5 + 6j, 7 + 8j]
+    indices_list = [1, 0, 1, 0]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.complex64)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=2)
+      partition_vals = sess.run(partitions)
+
+    self.assertAllEqual([3 + 4j, 7 + 8j], partition_vals[0])
+    self.assertAllEqual([1 + 2j, 5 + 6j], partition_vals[1])
+
   def testHigherRank(self):
     np.random.seed(7)
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       for n in 2, 3:
         for shape in (4,), (4, 5), (4, 5, 2):
           partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape)
@@ -95,6 +148,49 @@ class DynamicPartitionTest(test.TestCase):
             self.assertEqual(grads[1], None)  # Partitions has no gradients
             self.assertAllEqual(7 * data, sess.run(grads[0]))
 
+  def testEmptyParts(self):
+    data_list = [1, 2, 3, 4]
+    indices_list = [1, 3, 1, 3]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=4)
+      partition_vals = sess.run(partitions)
+
+    self.assertAllEqual([], partition_vals[0])
+    self.assertAllEqual([1, 3], partition_vals[1])
+    self.assertAllEqual([], partition_vals[2])
+    self.assertAllEqual([2, 4], partition_vals[3])
+
+  def testEmptyDataTwoDimensional(self):
+    data_list = [[], []]
+    indices_list = [0, 1]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=3)
+      partition_vals = sess.run(partitions)
+
+    self.assertAllEqual([[]], partition_vals[0])
+    self.assertAllEqual([[]], partition_vals[1])
+    self.assertAllEqual(np.array([], dtype=np.float).reshape(0, 0),
+                        partition_vals[2])
+
+  def testEmptyPartitions(self):
+    data_list = []
+    indices_list = []
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=2)
+      partition_vals = sess.run(partitions)
+
+    self.assertAllEqual([], partition_vals[0])
+    self.assertAllEqual([], partition_vals[1])
+
   def testErrorIndexOutOfRange(self):
     with self.test_session() as sess:
       data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11],
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index a126180414..150e2ff7f2 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import numpy as np
+import os
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -1341,11 +1342,33 @@ class PoolingTest(test.TestCase):
       return
 
     # Test the GPU implementation that uses cudnn for now.
-    # It does not propagate the diff in cases of NaNs
+    saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP")
+    # Do not propagate the diff in cases of NaNs
+    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0"
     expected_input_backprop_cudnn = [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0
     ]
+
+    for v2 in [True, False]:
+      self._testMaxPoolGradDirect(
+          input_data,
+          output_backprop,
+          expected_input_backprop_cudnn,
+          input_sizes=[1, 4, 4, 1],
+          output_sizes=[1, 3, 3, 1],
+          window_rows=2,
+          window_cols=2,
+          row_stride=1,
+          col_stride=1,
+          padding="VALID",
+          use_gpu=True,
+          v2=v2)
+
+    # Propagate the diff in cases of NaNs
+    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1"
+    expected_input_backprop_cudnn = expected_input_backprop_tf_cpu
+
     for v2 in [True, False]:
       self._testMaxPoolGradDirect(
           input_data,
@@ -1361,6 +1384,11 @@ class PoolingTest(test.TestCase):
           use_gpu=True,
           v2=v2)
 
+    if saved_nanprop:
+      os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop
+    else:
+      del os.environ["TF_ENABLE_MAXPOOL_NANPROP"]
+
   def _testMaxPoolGradDirectWithNans2_2(self):
     input_data = [float("nan")] * 16
     output_backprop = [
@@ -1391,11 +1419,14 @@ class PoolingTest(test.TestCase):
       return
 
     # Test the GPU implementation that uses cudnn for now.
-    # It does not propagate the diff in cases of NaNs
+    saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP")
+    # Do not propagate the diff in cases of NaNs
+    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0"
     expected_input_backprop_cudnn = [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0
     ]
+
     for v2 in [True, False]:
       self._testMaxPoolGradDirect(
           input_data,
@@ -1411,6 +1442,31 @@ class PoolingTest(test.TestCase):
           use_gpu=True,
           v2=v2)
 
+
+    # Propagate the diff in cases of NaNs
+    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1"
+    expected_input_backprop_cudnn = expected_input_backprop_tf_cpu
+
+    for v2 in [True, False]:
+      self._testMaxPoolGradDirect(
+          input_data,
+          output_backprop,
+          expected_input_backprop_cudnn,
+          input_sizes=[1, 4, 4, 1],
+          output_sizes=[1, 3, 3, 1],
+          window_rows=2,
+          window_cols=2,
+          row_stride=1,
+          col_stride=1,
+          padding="VALID",
+          use_gpu=True,
+          v2=v2)
+
+    if saved_nanprop:
+      os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop
+    else:
+      del os.environ["TF_ENABLE_MAXPOOL_NANPROP"]
+
   def testMaxPoolGradDirect(self):
     self._testMaxPoolGradDirect1_1()
     self._testMaxPoolGradDirect1_2()
diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py
index 5630259b7b..8e54d10f32 100644
--- a/tensorflow/python/kernel_tests/reader_ops_test.py
+++ b/tensorflow/python/kernel_tests/reader_ops_test.py
@@ -35,6 +35,9 @@ from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
+from tensorflow.python.training import coordinator
+from tensorflow.python.training import input as input_lib
+from tensorflow.python.training import queue_runner_impl
 from tensorflow.python.util import compat
 
 prefix_path = "tensorflow/core/lib"
@@ -1011,6 +1014,25 @@ class LMDBReaderTest(test.TestCase):
                                     "\\(requested 1, current size 0\\)"):
         k, v = sess.run([key, value])
 
+  def testReadFromSameFile(self):
+    with self.test_session() as sess:
+      reader1 = io_ops.LMDBReader(name="test_read_from_same_file1")
+      reader2 = io_ops.LMDBReader(name="test_read_from_same_file2")
+      filename_queue = input_lib.string_input_producer([self.db_path],
+                                                       num_epochs=None)
+      key1, value1 = reader1.read(filename_queue)
+      key2, value2 = reader2.read(filename_queue)
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
+      for i in range(3):
+        for j in range(10):
+          k1, v1, k2, v2 = sess.run([key1, value1, key2, value2])
+          self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2))
+          self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2))
+      coord.request_stop()
+      coord.join(threads)
+
   def testReadFromFolder(self):
     with self.test_session() as sess:
       reader = io_ops.LMDBReader(name="test_read_from_folder")
@@ -1029,6 +1051,25 @@ class LMDBReaderTest(test.TestCase):
                                     "\\(requested 1, current size 0\\)"):
         k, v = sess.run([key, value])
 
+  def testReadFromFileRepeatedly(self):
+    with self.test_session() as sess:
+      reader = io_ops.LMDBReader(name="test_read_from_file_repeated")
+      filename_queue = input_lib.string_input_producer([self.db_path],
+                                                       num_epochs=None)
+      key, value = reader.read(filename_queue)
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
+      # Iterate over the lmdb 3 times.
+      for i in range(3):
+        # Go over all 10 records each time.
+        for j in range(10):
+          k, v = sess.run([key, value])
+          self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(j)))
+          self.assertAllEqual(
+              compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + j))))
+      coord.request_stop()
+      coord.join(threads)
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 516a9d000e..3a02f24902 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -323,8 +323,9 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
   def testBadIndices(self):
     # Note: GPU kernel does not return the out-of-range error needed for this
     # test, so this test is marked as cpu-only.
+    # Note: With PR #13055 a negative index will be ignored silently.
     with self.test_session(use_gpu=False):
-      for bad in [[-1]], [[7]]:
+      for bad in [[2]], [[7]]:
         unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2)
         with self.assertRaisesOpError(
             r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]):
@@ -360,6 +361,32 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
             x_init_value=np_x.astype(np.double), delta=1)
       self.assertAllClose(jacob_t, jacob_n)
 
+  def testDropNegatives(self):
+    # Note: the test is done by replacing segment_ids with 8 to -1
+    # for index  and replace values generated by numpy with 0.
+    dtypes = [
+        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64,
+        dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128
+    ]
+    indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3])
+    num_segments = 12
+    for indices in indices_flat, indices_flat.reshape(5, 2):
+      shape = indices.shape + (2,)
+      for dtype in dtypes:
+        with self.test_session(use_gpu=True):
+          tf_x, np_x = self._input(shape, dtype=dtype)
+          np_ans = self._segmentReduce(
+              indices, np_x, np.add, op2=None, num_out_rows=num_segments)
+          # Replace np_ans[8] with 0 for the value
+          np_ans[8:] = 0
+          # Replace 8 with -1 in indices
+          np.place(indices, indices==8, [-1])
+          s = math_ops.unsorted_segment_sum(
+              data=tf_x, segment_ids=indices, num_segments=num_segments)
+          tf_ans = s.eval()
+        self.assertAllClose(np_ans, tf_ans)
+        self.assertShapeEqual(np_ans, s)
+
 
 class SparseSegmentReductionHelper(SegmentReductionHelper):
 
diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py
index a9fc699b21..7368251ab6 100644
--- a/tensorflow/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/python/kernel_tests/shape_ops_test.py
@@ -258,6 +258,16 @@ class ShapeOpsTest(test.TestCase):
       self.assertAllEqual([True], array_ops.expand_dims(inp, 0).eval())
       self.assertAllEqual([True], array_ops.expand_dims(inp, -1).eval())
 
+  def testExpandDimsDimType(self):
+    for dtype in [dtypes.int32, dtypes.int64]:
+      x = np.zeros([2])
+      np_ans = np.expand_dims(x, axis=0)
+      with self.test_session(use_gpu=True):
+        tensor = array_ops.expand_dims(x, constant_op.constant(0, dtype))
+        tf_ans = tensor.eval()
+      self.assertShapeEqual(np_ans, tensor)
+      self.assertAllEqual(np_ans, tf_ans)
+
   def _compareSqueeze(self, x, squeeze_dims, use_gpu):
     with self.test_session(use_gpu=use_gpu):
       if squeeze_dims:
diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py
index 051a25080b..6cdc7872f9 100644
--- a/tensorflow/python/kernel_tests/slice_op_test.py
+++ b/tensorflow/python/kernel_tests/slice_op_test.py
@@ -217,6 +217,30 @@ class SliceTest(test.TestCase):
     self.assertEqual(expected_val.shape, slice_t.get_shape())
     self.assertEqual(expected_val.shape, slice2_t.get_shape())
 
+  def testRandomHighRank(self):
+    # Random dims of rank 8
+    input_shape = np.random.randint(0, 20, size=8)
+    inp = np.random.rand(*input_shape).astype("f")
+    with self.test_session(use_gpu=True) as sess:
+      a = constant_op.constant(
+          [float(x) for x in inp.ravel(order="C")],
+          shape=input_shape,
+          dtype=dtypes.float32)
+      indices = [0 if x == 0 else np.random.randint(x) for x in input_shape]
+      sizes = [
+          np.random.randint(0, input_shape[i] - indices[i] + 1)
+          for i in range(8)
+      ]
+      slice_t = array_ops.slice(a, indices, sizes)
+      slice_val = sess.run(slice_t)
+
+    expected_val = inp[indices[0]:indices[0] + sizes[0], indices[1]:indices[1] + sizes[
+      1], indices[2]:indices[2] + sizes[2], indices[3]:indices[3] + sizes[3], indices[
+        4]:indices[4] + sizes[4], indices[5]:indices[5] + sizes[5], indices[6]:indices[
+          6] + sizes[6], indices[7]:indices[7] + sizes[7]]
+    self.assertAllEqual(slice_val, expected_val)
+    self.assertEqual(expected_val.shape, slice_t.get_shape())
+
   def testPartialShapeInference(self):
     z = array_ops.zeros((1, 2, 3))
     self.assertAllEqual(z.get_shape().as_list(), [1, 2, 3])
@@ -227,7 +251,6 @@ class SliceTest(test.TestCase):
     m2 = array_ops.slice(z, [0, 0, 0], [constant_op.constant(1) + 0, 2, -1])
     self.assertAllEqual(m2.get_shape().as_list(), [None, 2, None])
 
-
   def _testGradientSlice(self, input_shape, slice_begin, slice_size):
     with self.test_session(use_gpu=True):
       num_inputs = np.prod(input_shape)
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index a50f53b3cd..04758ce45a 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.platform import test
 
 
@@ -61,6 +62,31 @@ class UniqueTest(test.TestCase):
     for i in range(len(x)):
       self.assertEqual(x[i], tf_y[tf_idx[i]].decode('ascii'))
 
+  def testInt32Axis(self):
+    x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
+    with self.test_session() as sess:
+      y0, idx0 = gen_array_ops.unique_v2(x, axis=[0])
+      tf_y0, tf_idx0 = sess.run([y0, idx0])
+      y1, idx1 = gen_array_ops.unique_v2(x, axis=[1])
+      tf_y1, tf_idx1 = sess.run([y1, idx1])
+    self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
+    self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
+    self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
+    self.assertAllEqual(tf_idx1, np.array([0, 1, 1]))
+
+  def testInt32V2(self):
+    # This test is only temporary, once V2 is used
+    # by default, the axis will be wrapped to allow `axis=None`.
+    x = np.random.randint(2, high=10, size=7000)
+    with self.test_session() as sess:
+      y, idx = gen_array_ops.unique_v2(x, axis=[])
+      tf_y, tf_idx = sess.run([y, idx])
+
+    self.assertEqual(len(x), len(tf_idx))
+    self.assertEqual(len(tf_y), len(np.unique(x)))
+    for i in range(len(x)):
+      self.assertEqual(x[i], tf_y[tf_idx[i]])
+
 class UniqueWithCountsTest(test.TestCase):
 
   def testInt32(self):
diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 74b85da845..6be2bc3e76 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -221,7 +221,7 @@ class Layer(object):
 
     Weight updates (for instance, the updates of the moving mean and variance
     in a BatchNormalization layer) may be dependent on the inputs passed
-    when calling a layer. Hence, when reusing a same layer on
+    when calling a layer. Hence, when reusing the same layer on
     different inputs `a` and `b`, some entries in `layer.updates` may be
     dependent on `a` and some on `b`. This method automatically keeps track
     of dependencies.
@@ -295,9 +295,9 @@ class Layer(object):
     """Add loss tensor(s), potentially dependent on layer inputs.
 
     Some losses (for instance, activity regularization losses) may be dependent
-    on the inputs passed when calling a layer. Hence, when reusing a same layer
-    on different inputs `a` and `b`, some entries in `layer.losses` may be
-    dependent on `a` and some on `b`. This method automatically keeps track
+    on the inputs passed when calling a layer. Hence, when reusing the same
+    layer on different inputs `a` and `b`, some entries in `layer.losses` may
+    be dependent on `a` and some on `b`. This method automatically keeps track
     of dependencies.
 
     The `get_losses_for` method allows to retrieve the losses relevant to a
diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py
index 0c7ce02835..8c327d7e27 100644
--- a/tensorflow/python/layers/convolutional.py
+++ b/tensorflow/python/layers/convolutional.py
@@ -813,6 +813,7 @@ def conv3d(inputs,
       bias_constraint=bias_constraint,
       trainable=trainable,
       name=name,
+      dtype=inputs.dtype.base_dtype,
       _reuse=reuse,
       _scope=name)
   return layer.apply(inputs)
@@ -1746,6 +1747,7 @@ def conv3d_transpose(inputs,
       bias_constraint=bias_constraint,
       trainable=trainable,
       name=name,
+      dtype=inputs.dtype.base_dtype,
       _reuse=reuse,
       _scope=name)
   return layer.apply(inputs)
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index 9d9b2b3941..4d5fb97845 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -26,6 +26,7 @@ import numpy as np
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.layers import base
@@ -239,6 +240,12 @@ class BatchNormalization(base.Layer):
         raise ValueError('Unsupported axis, fused batch norm only supports '
                          'axis == [1] or axis == [3]')
 
+    # Raise parameters of fp16 batch norm to fp32
+    if self.dtype == dtypes.float16:
+      param_dtype = dtypes.float32
+    else:
+      param_dtype = self.dtype or dtypes.float32
+
     axis_to_dim = {x: input_shape[x].value for x in self.axis}
     for x in axis_to_dim:
       if axis_to_dim[x] is None:
@@ -262,6 +269,7 @@ class BatchNormalization(base.Layer):
     if self.scale:
       self.gamma = self.add_variable(name='gamma',
                                      shape=param_shape,
+                                     dtype=param_dtype,
                                      initializer=self.gamma_initializer,
                                      regularizer=self.gamma_regularizer,
                                      constraint=self.gamma_constraint,
@@ -269,11 +277,14 @@ class BatchNormalization(base.Layer):
     else:
       self.gamma = None
       if self.fused:
-        self._gamma_const = array_ops.constant(1.0, shape=param_shape)
+        self._gamma_const = array_ops.constant(1.0,
+                                               dtype=param_dtype,
+                                               shape=param_shape)
 
     if self.center:
       self.beta = self.add_variable(name='beta',
                                     shape=param_shape,
+                                    dtype=param_dtype,
                                     initializer=self.beta_initializer,
                                     regularizer=self.beta_regularizer,
                                     constraint=self.beta_constraint,
@@ -281,7 +292,9 @@ class BatchNormalization(base.Layer):
     else:
       self.beta = None
       if self.fused:
-        self._beta_const = array_ops.constant(0.0, shape=param_shape)
+        self._beta_const = array_ops.constant(0.0,
+                                              dtype=param_dtype,
+                                              shape=param_shape)
 
     # Disable variable partitioning when creating the moving mean and variance
     try:
@@ -293,12 +306,14 @@ class BatchNormalization(base.Layer):
       self.moving_mean = self.add_variable(
           name='moving_mean',
           shape=param_shape,
+          dtype=param_dtype,
           initializer=self.moving_mean_initializer,
           trainable=False)
 
       self.moving_variance = self.add_variable(
           name='moving_variance',
           shape=param_shape,
+          dtype=param_dtype,
           initializer=self.moving_variance_initializer,
           trainable=False)
 
@@ -314,6 +329,7 @@ class BatchNormalization(base.Layer):
         def _renorm_variable(name, shape):
           var = self.add_variable(name=name,
                                   shape=shape,
+                                  dtype=param_dtype,
                                   initializer=init_ops.zeros_initializer(),
                                   trainable=False)
           return var
@@ -356,7 +372,6 @@ class BatchNormalization(base.Layer):
 
   def _fused_batch_norm(self, inputs, training):
     """Returns the output of fused batch norm."""
-    # TODO(reedwm): Add support for fp16 inputs.
     beta = self.beta if self.center else self._beta_const
     gamma = self.gamma if self.scale else self._gamma_const
 
@@ -752,6 +767,7 @@ def batch_normalization(inputs,
       virtual_batch_size=virtual_batch_size,
       adjustment=adjustment,
       name=name,
+      dtype=inputs.dtype.base_dtype,
       _reuse=reuse,
       _scope=name)
   return layer.apply(inputs, training=training)
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index 90ebdc8c86..b2876c58c2 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -68,11 +68,12 @@ class BNTest(test.TestCase):
              use_gpu,
              is_fused,
              restore=False,
-             freeze_mode=False):
+             freeze_mode=False,
+             dtype=dtypes.float32):
     ops.reset_default_graph()
     graph = ops.get_default_graph()
     with self.test_session(graph=graph, use_gpu=use_gpu) as sess:
-      image = array_ops.placeholder(dtype='float32', shape=shape)
+      image = array_ops.placeholder(dtype=dtype, shape=shape)
       loss, train_op, saver = self._simple_model(image, is_fused, freeze_mode)
       if restore:
         saver.restore(sess, checkpoint_path)
@@ -80,7 +81,7 @@ class BNTest(test.TestCase):
         sess.run(variables.global_variables_initializer())
       np.random.seed(0)
       for _ in range(2):
-        image_val = np.random.rand(*shape).astype(np.float32)
+        image_val = np.random.rand(*shape).astype(dtype.as_numpy_dtype)
         sess.run([loss, train_op], feed_dict={image: image_val})
       if restore:
         all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
@@ -90,15 +91,74 @@ class BNTest(test.TestCase):
         saver.save(sess, checkpoint_path)
 
   def _infer(self, checkpoint_path, image_val, shape, use_gpu, is_fused):
+    dtype = image_val.dtype
     ops.reset_default_graph()
     graph = ops.get_default_graph()
     with self.test_session(graph=graph, use_gpu=use_gpu) as sess:
-      image = array_ops.placeholder(dtype='float32', shape=shape)
+      image = array_ops.placeholder(dtype=dtype, shape=shape)
       loss, _, saver = self._simple_model(image, is_fused, True)
       saver.restore(sess, checkpoint_path)
       loss_val = sess.run(loss, feed_dict={image: image_val})
       return loss_val
 
+  def _trainEvalSequence(self,
+                         dtype,
+                         train1_use_gpu,
+                         train2_use_gpu,
+                         infer_use_gpu):
+    batch, height, width, input_channels = 2, 4, 5, 3
+    shape = [batch, height, width, input_channels]
+    checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' %
+        (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu))
+
+    self._train(
+        checkpoint,
+        shape,
+        use_gpu=train1_use_gpu,
+        is_fused=True,
+        restore=False,
+        freeze_mode=False,
+        dtype=dtype)
+
+    train_vars = self._train(
+        checkpoint,
+        shape,
+        use_gpu=train2_use_gpu,
+        is_fused=True,
+        restore=True,
+        freeze_mode=False,
+        dtype=dtype)
+
+    np.random.seed(0)
+    image_val = np.random.rand(batch,
+                               height,
+                               width,
+                               input_channels).astype(dtype.as_numpy_dtype)
+    loss_val = self._infer(checkpoint, image_val, shape,
+                           use_gpu=infer_use_gpu, is_fused=True)
+
+    return train_vars, loss_val
+
+  def testHalfPrecision(self):
+    ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32,
+                                                 train1_use_gpu=True,
+                                                 train2_use_gpu=True,
+                                                 infer_use_gpu=True)
+ 
+    self.assertEqual(len(ref_vars), 5)
+
+    for train1_use_gpu in [True, False]:
+      for train2_use_gpu in [True, False]:
+        for infer_use_gpu in [True, False]:
+          test_vars, test_loss = self._trainEvalSequence(dtypes.float16,
+                                                         train1_use_gpu,
+                                                         train2_use_gpu,
+                                                         infer_use_gpu)
+          self.assertEqual(len(test_vars), 5)
+          for test_var, ref_var in zip(test_vars, ref_vars):
+            self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3)
+          self.assertAllClose(test_loss, ref_loss, rtol=1.e-3, atol=1.e-3)
+
   def _testCheckpoint(self, is_fused_checkpoint_a, is_fused_checkpoint_b,
                       use_gpu_checkpoint_a, use_gpu_checkpoint_b,
                       use_gpu_test_a, use_gpu_test_b, freeze_mode):
@@ -218,6 +278,36 @@ class BNTest(test.TestCase):
         ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES),
         bn.trainable_variables)
 
+  def testCreateFusedBNFloat16(self):
+    # Call layer.
+    bn = normalization_layers.BatchNormalization(axis=1, fused=True)
+    inputs = random_ops.random_uniform((5, 4, 3, 3),
+                                       seed=1,
+                                       dtype=dtypes.float16)
+    training = array_ops.placeholder(dtype='bool')
+    outputs = bn.apply(inputs, training=training)
+
+    # Verify shape.
+    self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3, 3])
+
+    # Verify layer attributes.
+    self.assertEqual(len(bn.updates), 2)
+    self.assertEqual(len(bn.variables), 4)
+    self.assertEqual(len(bn.trainable_variables), 2)
+    self.assertEqual(len(bn.non_trainable_variables), 2)
+    for var in bn.variables:
+      self.assertEqual(var.dtype, dtypes.float32_ref)
+
+    # Test that updates were created and added to UPDATE_OPS.
+    self.assertEqual(len(bn.updates), 2)
+    self.assertListEqual(
+        ops.get_collection(ops.GraphKeys.UPDATE_OPS), bn.updates)
+
+    # Test that weights were created and added to TRAINABLE_VARIABLES.
+    self.assertListEqual(
+        ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES),
+        bn.trainable_variables)
+
   def test3DInputAxis1(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index c3c7ecd080..43238757c7 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1132,7 +1132,7 @@ def concat(values, axis, name="concat"):
   return gen_array_ops._concat_v2(values=values, axis=axis, name=name)
 
 
-def boolean_mask(tensor, mask, name="boolean_mask"):
+def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
   """Apply boolean mask to tensor.  Numpy equivalent is `tensor[mask]`.
 
   ```python
@@ -1146,11 +1146,17 @@ def boolean_mask(tensor, mask, name="boolean_mask"):
   the first K dimensions of `tensor`'s shape.  We then have:
     `boolean_mask(tensor, mask)[i, j1,...,jd] = tensor[i1,...,iK,j1,...,jd]`
   where `(i1,...,iK)` is the ith `True` entry of `mask` (row-major order).
+  The `axis` could be used with `mask` to indicate the axis to mask from.
+  In that case, `axis + dim(mask) <= dim(tensor)` and `mask`'s shape must match
+  the first `axis + dim(mask)` dimensions of `tensor`'s shape.
 
   Args:
     tensor:  N-D tensor.
     mask:  K-D boolean tensor, K <= N and K must be known statically.
     name:  A name for this operation (optional).
+    axis:  A 0-D int Tensor representing the axis in `tensor` to mask from.
+      By default, axis is 0 which will mask from the first dimension. Otherwise
+      K + axis <= N.
 
   Returns:
     (N-K+1)-dimensional tensor populated by entries in `tensor` corresponding
@@ -1169,10 +1175,10 @@ def boolean_mask(tensor, mask, name="boolean_mask"):
   ```
   """
 
-  def _apply_mask_1d(reshaped_tensor, mask):
+  def _apply_mask_1d(reshaped_tensor, mask, axis=None):
     """Mask tensor along dimension 0 with a 1-D mask."""
     indices = squeeze(where(mask), squeeze_dims=[1])
-    return gather(reshaped_tensor, indices)
+    return gather(reshaped_tensor, indices, axis=axis)
 
   with ops.name_scope(name, values=[tensor, mask]):
     tensor = ops.convert_to_tensor(tensor, name="tensor")
@@ -1187,19 +1193,22 @@ def boolean_mask(tensor, mask, name="boolean_mask"):
       raise ValueError(
           "Number of mask dimensions must be specified, even if some dimensions"
           " are None.  E.g. shape=[None] is ok, but shape=None is not.")
-    shape_tensor[:ndims_mask].assert_is_compatible_with(shape_mask)
+    axis = 0 if axis is None else axis
+    shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask)
 
-    leading_size = gen_math_ops._prod(shape(tensor)[:ndims_mask], [0])
+    leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0])
     tensor = reshape(tensor,
-                     concat([[leading_size],
-                             shape(tensor)[ndims_mask:]], 0))
-    first_dim = shape_tensor[:ndims_mask].num_elements()
+                     concat([shape(tensor)[:axis],
+                             [leading_size],
+                             shape(tensor)[axis+ndims_mask:]], 0))
+    first_dim = shape_tensor[axis:axis+ndims_mask].num_elements()
     tensor.set_shape(
-        tensor_shape.as_shape([first_dim])
-        .concatenate(shape_tensor[ndims_mask:]))
+        tensor_shape.as_shape(shape_tensor[:axis])
+        .concatenate([first_dim])
+        .concatenate(shape_tensor[axis+ndims_mask:]))
 
     mask = reshape(mask, [-1])
-    return _apply_mask_1d(tensor, mask)
+    return _apply_mask_1d(tensor, mask, axis)
 
 
 def sparse_mask(a, mask_indices, name=None):
@@ -1521,7 +1530,8 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True):
   Args:
     tensor: A `Tensor`.
     dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
-    `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, or `complex128`.
+      `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`,
+      `complex64`, `complex128` or `bool`.
     name: A name for the operation (optional).
     optimize: if true, attempt to statically determine the shape of 'tensor'
     and encode it as a constant.
@@ -1572,8 +1582,8 @@ def ones_like(tensor, dtype=None, name=None, optimize=True):
   Args:
     tensor: A `Tensor`.
     dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
-      `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, `complex128` or
-      `bool`.
+      `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`,
+      `complex64`, `complex128` or `bool`.
     name: A name for the operation (optional).
     optimize: if true, attempt to statically determine the shape of 'tensor'
     and encode it as a constant.
diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py
index 923696a553..2accedf1b9 100644
--- a/tensorflow/python/ops/distributions/dirichlet.py
+++ b/tensorflow/python/ops/distributions/dirichlet.py
@@ -196,7 +196,7 @@ class Dirichlet(distribution.Distribution):
         alpha=self.concentration,
         dtype=self.dtype,
         seed=seed)
-    return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keep_dims=True)
+    return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keepdims=True)
 
   @distribution_util.AppendDocstring(_dirichlet_sample_note)
   def _log_prob(self, x):
diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py
index 00b5697c83..d49fac59ca 100644
--- a/tensorflow/python/ops/distributions/multinomial.py
+++ b/tensorflow/python/ops/distributions/multinomial.py
@@ -26,6 +26,7 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
 
@@ -140,6 +141,8 @@ class Multinomial(distribution.Distribution):
 
   counts = [[2., 1, 1], [3, 1, 1]]
   dist.prob(counts)  # Shape [2]
+
+  dist.sample(5) # Shape [5, 2, 3]
   ```
   """
 
@@ -231,29 +234,35 @@ class Multinomial(distribution.Distribution):
 
   def _sample_n(self, n, seed=None):
     n_draws = math_ops.cast(self.total_count, dtype=dtypes.int32)
-    if self.total_count.get_shape().ndims is not None:
-      if self.total_count.get_shape().ndims != 0:
-        raise NotImplementedError(
-            "Sample only supported for scalar number of draws.")
-    elif self.validate_args:
-      is_scalar = check_ops.assert_rank(
-          n_draws, 0,
-          message="Sample only supported for scalar number of draws.")
-      n_draws = control_flow_ops.with_dependencies([is_scalar], n_draws)
     k = self.event_shape_tensor()[0]
-    # Flatten batch dims so logits has shape [B, k],
-    # where B = reduce_prod(self.batch_shape_tensor()).
-    x = random_ops.multinomial(
-        logits=array_ops.reshape(self.logits, [-1, k]),
-        num_samples=n * n_draws,
-        seed=seed)
-    x = array_ops.reshape(x, shape=[-1, n, n_draws])
-    x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k),
-                            axis=-2)  # shape: [B, n, k]
+
+    # boardcast the total_count and logits to same shape
+    n_draws = array_ops.ones_like(
+        self.logits[..., 0], dtype=n_draws.dtype) * n_draws
+    logits = array_ops.ones_like(
+        n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits
+
+    # flatten the total_count and logits
+    flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k]
+    flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm]
+
+    # computes each total_count and logits situation by map_fn
+    def _sample_single(args):
+      logits, n_draw = args[0], args[1] # [K], []
+      x = random_ops.multinomial(logits[array_ops.newaxis, ...],
+                                 n_draw, seed) # [1, n*n_draw]
+      x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw]
+      x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k]
+      return x
+    x = functional_ops.map_fn(_sample_single,
+                              [flat_logits, flat_ndraws],
+                              dtype=self.dtype) # [B1B2...Bm, n, k]
+
+    # reshape the results to proper shape
     x = array_ops.transpose(x, perm=[1, 0, 2])
     final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0)
-    x = array_ops.reshape(x, final_shape)
-    return math_ops.cast(x, self.dtype)
+    x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k]
+    return x
 
   @distribution_util.AppendDocstring(_multinomial_sample_note)
   def _log_prob(self, counts):
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 2946dbe81e..7c23321ca5 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1121,7 +1121,7 @@ def rgb_to_grayscale(images, name=None):
     rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
     gray_float = math_ops.reduce_sum(flt_image * rgb_weights,
                                      rank_1,
-                                     keep_dims=True)
+                                     keepdims=True)
     gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
     return convert_image_dtype(gray_float, orig_dtype, name=name)
 
@@ -1212,26 +1212,7 @@ def adjust_hue(image, delta, name=None):
     orig_dtype = image.dtype
     flt_image = convert_image_dtype(image, dtypes.float32)
 
-    # TODO(zhengxq): we will switch to the fused version after we add a GPU
-    # kernel for that.
-    fused = os.environ.get('TF_ADJUST_HUE_FUSED', '')
-    fused = fused.lower() in ('true', 't', '1')
-
-    if not fused:
-      hsv = gen_image_ops.rgb_to_hsv(flt_image)
-
-      hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
-      saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
-      value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])
-
-      # Note that we add 2*pi to guarantee that the resulting hue is a positive
-      # floating point number since delta is [-0.5, 0.5].
-      hue = math_ops.mod(hue + (delta + 1.), 1.)
-
-      hsv_altered = array_ops.concat([hue, saturation, value], 2)
-      rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)
-    else:
-      rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
+    rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
 
     return convert_image_dtype(rgb_altered, orig_dtype)
 
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index 2cb467c891..14a039ffd0 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.gen_linalg_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import compat
+from tensorflow.python.util.deprecation import deprecated_args
 
 # Names below are lower_case.
 # pylint: disable=invalid-name
@@ -438,7 +439,10 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None):
 
 
 # pylint: disable=redefined-builtin
-def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None):
+@deprecated_args(None, "keep_dims is deprecated, use keepdims instead",
+                 "keep_dims")
+def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
+         keep_dims=None):
   r"""Computes the norm of vectors, matrices, and tensors.
 
   This function can compute several different vector norms (the 1-norm, the
@@ -471,13 +475,13 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None):
       can be either a matrix or a batch of matrices at runtime, pass
       `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are
       computed.
-    keep_dims: If True, the axis indicated in `axis` are kept with size 1.
+    keepdims: If True, the axis indicated in `axis` are kept with size 1.
       Otherwise, the dimensions in `axis` are removed from the output shape.
     name: The name of the op.
 
   Returns:
     output: A `Tensor` of the same type as tensor, containing the vector or
-      matrix norms. If `keep_dims` is True then the rank of output is equal to
+      matrix norms. If `keepdims` is True then the rank of output is equal to
       the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar,
       if `axis` is an integer, the rank of `output` is one less than the rank
       of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less
@@ -497,6 +501,13 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None):
   @end_compatibility
   """
 
+  if keep_dims is not None:
+    if keepdims is not None:
+      raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'")
+    keepdims = keep_dims
+  if keepdims is None:
+    keepdims = False
+
   is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and
                     len(axis) == 2)
   if is_matrix_norm:
@@ -528,25 +539,25 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None):
       # matrices.
       result = math_ops.sqrt(
           math_ops.reduce_sum(
-              tensor * math_ops.conj(tensor), axis, keep_dims=True))
+              tensor * math_ops.conj(tensor), axis, keepdims=True))
     else:
       result = math_ops.abs(tensor)
       if ord == 1:
         sum_axis = None if axis is None else axis[0]
-        result = math_ops.reduce_sum(result, sum_axis, keep_dims=True)
+        result = math_ops.reduce_sum(result, sum_axis, keepdims=True)
         if is_matrix_norm:
-          result = math_ops.reduce_max(result, axis[-1], keep_dims=True)
+          result = math_ops.reduce_max(result, axis[-1], keepdims=True)
       elif ord == np.inf:
         if is_matrix_norm:
-          result = math_ops.reduce_sum(result, axis[1], keep_dims=True)
+          result = math_ops.reduce_sum(result, axis[1], keepdims=True)
         max_axis = None if axis is None else axis[0]
-        result = math_ops.reduce_max(result, max_axis, keep_dims=True)
+        result = math_ops.reduce_max(result, max_axis, keepdims=True)
       else:
         # General p-norms (positive p only)
         result = math_ops.pow(
             math_ops.reduce_sum(
-                math_ops.pow(result, ord), axis, keep_dims=True), 1.0 / ord)
-    if not keep_dims:
+                math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord)
+    if not keepdims:
       result = array_ops.squeeze(result, axis)
     return result
 
diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py
index 5732c756ce..04eeb00518 100644
--- a/tensorflow/python/ops/math_grad_test.py
+++ b/tensorflow/python/ops/math_grad_test.py
@@ -113,6 +113,23 @@ class MinOrMaxGradientTest(test.TestCase):
       self.assertLess(error, 1e-4)
 
 
+class MaximumOrMinimumGradientTest(test.TestCase):
+
+  def testMaximumGradient(self):
+    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32)
+    outputs = math_ops.maximum(inputs, 3.0)
+    with self.test_session():
+      error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4])
+      self.assertLess(error, 1e-4)
+
+  def testMinimumGradient(self):
+    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32)
+    outputs = math_ops.minimum(inputs, 2.0)
+    with self.test_session():
+      error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4])
+      self.assertLess(error, 1e-4)
+
+
 class ProdGradientTest(test.TestCase):
 
   def testProdGradient(self):
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 4c400423b6..e2e23dccef 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -170,14 +170,13 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.ops.gen_math_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import compat
-from tensorflow.python.util.deprecation import deprecated
-from tensorflow.python.util.deprecation import deprecated_args
+from tensorflow.python.util import deprecation
 
 # Aliases for some automatically-generated names.
 linspace = gen_math_ops.lin_space
 
-arg_max = deprecated(None, "Use `argmax` instead")(arg_max)  # pylint: disable=used-before-assignment
-arg_min = deprecated(None, "Use `argmin` instead")(arg_min)  # pylint: disable=used-before-assignment
+arg_max = deprecation.deprecated(None, "Use `argmax` instead")(arg_max)  # pylint: disable=used-before-assignment
+arg_min = deprecation.deprecated(None, "Use `argmin` instead")(arg_min)  # pylint: disable=used-before-assignment
 
 
 def _set_doc(doc):
@@ -190,7 +189,8 @@ def _set_doc(doc):
 
 
 # pylint: disable=redefined-builtin
-@deprecated_args(None, "Use the `axis` argument instead", "dimension")
+@deprecation.deprecated_args(None, "Use the `axis` argument instead",
+                             "dimension")
 @_set_doc(
     gen_math_ops.arg_max.__doc__.replace("dimensions", "axes").replace(
         "dimension", "axis"))
@@ -208,7 +208,8 @@ def argmax(input,
   return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type)
 
 
-@deprecated_args(None, "Use the `axis` argument instead", "dimension")
+@deprecation.deprecated_args(None, "Use the `axis` argument instead",
+                             "dimension")
 @_set_doc(
     gen_math_ops.arg_min.__doc__.replace("dimensions", "axes").replace(
         "dimension", "axis"))
@@ -324,7 +325,7 @@ multiply.__doc__ = gen_math_ops._mul.__doc__.replace("Mul", "`tf.multiply`")
 
 
 # TODO(aselle): put deprecation in after another round of global code changes
-@deprecated(
+@deprecation.deprecated(
     "2016-12-30",
     "`tf.mul(x, y)` is deprecated, please use `tf.multiply(x, y)` or `x * y`")
 def _mul(x, y, name=None):
@@ -343,7 +344,7 @@ subtract.__doc__ = gen_math_ops._sub.__doc__.replace("`Sub`", "`tf.subtract`")
 
 
 # TODO(aselle): put deprecation in after another round of global code changes
-@deprecated(
+@deprecation.deprecated(
     "2016-12-30",
     "`tf.sub(x, y)` is deprecated, please use `tf.subtract(x, y)` or `x - y`")
 def _sub(x, y, name=None):
@@ -381,8 +382,9 @@ def negative(x, name=None):
 
 
 # pylint: disable=g-docstring-has-escape
-@deprecated("2016-12-30",
-            "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`")
+@deprecation.deprecated(
+    "2016-12-30",
+    "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`")
 def _neg(x, name=None):
   """Computes numerical negative value element-wise.
 
@@ -1269,24 +1271,27 @@ def _ReductionDims(x, axis, reduction_indices):
     return range(0, array_ops.rank(x))
 
 
-def _may_reduce_to_scalar(keep_dims, axis, reduction_indices, output):
+def _may_reduce_to_scalar(keepdims, axis, reduction_indices, output):
   """Set a reduction's output's shape to be a scalar if we are certain."""
-  if (not output.shape.is_fully_defined()) and (not keep_dims) and (
+  if (not output.shape.is_fully_defined()) and (not keepdims) and (
       axis is None) and (reduction_indices is None):
     output.set_shape(())
   return output
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_sum(input_tensor,
                axis=None,
-               keep_dims=False,
+               keepdims=None,
                name=None,
-               reduction_indices=None):
+               reduction_indices=None,
+               keep_dims=None):
   """Computes the sum of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1299,7 +1304,7 @@ def reduce_sum(input_tensor,
   tf.reduce_sum(x)  # 6
   tf.reduce_sum(x, 0)  # [2, 2, 2]
   tf.reduce_sum(x, 1)  # [3, 3]
-  tf.reduce_sum(x, 1, keep_dims=True)  # [[3], [3]]
+  tf.reduce_sum(x, 1, keepdims=True)  # [[3], [3]]
   tf.reduce_sum(x, [0, 1])  # 6
   ```
 
@@ -1308,9 +1313,10 @@ def reduce_sum(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1319,26 +1325,34 @@ def reduce_sum(input_tensor,
   Equivalent to np.sum
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._sum(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def count_nonzero(input_tensor,
                   axis=None,
-                  keep_dims=False,
+                  keepdims=None,
                   dtype=dtypes.int64,
                   name=None,
-                  reduction_indices=None):
+                  reduction_indices=None,
+                  keep_dims=None):
   """Computes number of nonzero elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1355,7 +1369,7 @@ def count_nonzero(input_tensor,
   tf.count_nonzero(x)  # 3
   tf.count_nonzero(x, 0)  # [1, 2, 0]
   tf.count_nonzero(x, 1)  # [1, 2]
-  tf.count_nonzero(x, 1, keep_dims=True)  # [[1], [2]]
+  tf.count_nonzero(x, 1, keepdims=True)  # [[1], [2]]
   tf.count_nonzero(x, [0, 1])  # 3
   ```
 
@@ -1364,14 +1378,20 @@ def count_nonzero(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     dtype: The output dtype; defaults to `tf.int64`.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor (number of nonzero values).
   """
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+
   with ops.name_scope(name, "count_nonzero", [input_tensor]):
     input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor")
     zero = input_tensor.dtype.as_numpy_dtype()
@@ -1380,21 +1400,24 @@ def count_nonzero(input_tensor,
             # int64 reduction happens on GPU
             to_int64(gen_math_ops.not_equal(input_tensor, zero)),
             axis=axis,
-            keep_dims=keep_dims,
+            keepdims=keepdims,
             reduction_indices=reduction_indices),
         dtype=dtype)
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_mean(input_tensor,
                 axis=None,
-                keep_dims=False,
+                keepdims=None,
                 name=None,
-                reduction_indices=None):
+                reduction_indices=None,
+                keep_dims=None):
   """Computes the mean of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1414,36 +1437,58 @@ def reduce_mean(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
 
   @compatibility(numpy)
   Equivalent to np.mean
+
+  Please note that `np.mean` has a `dtype` parameter that could be used to
+  specify the output type. By default this is `dtype=float64`. On the other
+  hand, `tf.reduce_mean` has an aggressive type inference from `input_tensor`,
+  for example:
+
+  ```python
+  x = tf.constant([1, 0, 1, 0])
+  tf.reduce_mean(x)  # 0
+  y = tf.constant([1., 0., 1., 0.])
+  tf.reduce_mean(y)  # 0.5
+  ```
+
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._mean(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_prod(input_tensor,
                 axis=None,
-                keep_dims=False,
+                keepdims=None,
                 name=None,
-                reduction_indices=None):
+                reduction_indices=None,
+                keep_dims=None):
   """Computes the product of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1454,9 +1499,10 @@ def reduce_prod(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1465,25 +1511,33 @@ def reduce_prod(input_tensor,
   Equivalent to np.prod
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._prod(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_min(input_tensor,
                axis=None,
-               keep_dims=False,
+               keepdims=None,
                name=None,
-               reduction_indices=None):
+               reduction_indices=None,
+               keep_dims=None):
   """Computes the minimum of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1494,9 +1548,10 @@ def reduce_min(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1505,25 +1560,32 @@ def reduce_min(input_tensor,
   Equivalent to np.min
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._min(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_max(input_tensor,
                axis=None,
-               keep_dims=False,
+               keepdims=None,
                name=None,
-               reduction_indices=None):
+               reduction_indices=None,
+               keep_dims=None):
   """Computes the maximum of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1534,9 +1596,10 @@ def reduce_max(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1545,25 +1608,32 @@ def reduce_max(input_tensor,
   Equivalent to np.max
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._max(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_all(input_tensor,
                axis=None,
-               keep_dims=False,
+               keepdims=None,
                name=None,
-               reduction_indices=None):
+               reduction_indices=None,
+               keep_dims=None):
   """Computes the "logical and" of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1583,9 +1653,10 @@ def reduce_all(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1594,25 +1665,32 @@ def reduce_all(input_tensor,
   Equivalent to np.all
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._all(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_any(input_tensor,
                axis=None,
-               keep_dims=False,
+               keepdims=None,
                name=None,
-               reduction_indices=None):
+               reduction_indices=None,
+               keep_dims=None):
   """Computes the "logical or" of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1632,9 +1710,10 @@ def reduce_any(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1643,25 +1722,32 @@ def reduce_any(input_tensor,
   Equivalent to np.any
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._any(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_logsumexp(input_tensor,
                      axis=None,
-                     keep_dims=False,
+                     keepdims=None,
                      name=None,
-                     reduction_indices=None):
+                     reduction_indices=None,
+                     keep_dims=None):
   """Computes log(sum(exp(elements across dimensions of a tensor))).
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1678,7 +1764,7 @@ def reduce_logsumexp(input_tensor,
   tf.reduce_logsumexp(x)  # log(6)
   tf.reduce_logsumexp(x, 0)  # [log(2), log(2), log(2)]
   tf.reduce_logsumexp(x, 1)  # [log(3), log(3)]
-  tf.reduce_logsumexp(x, 1, keep_dims=True)  # [[log(3)], [log(3)]]
+  tf.reduce_logsumexp(x, 1, keepdims=True)  # [[log(3)], [log(3)]]
   tf.reduce_logsumexp(x, [0, 1])  # log(6)
   ```
 
@@ -1687,19 +1773,24 @@ def reduce_logsumexp(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
   """
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
   with ops.name_scope(name, "ReduceLogSumExp", [input_tensor]) as name:
     raw_max = reduce_max(
         input_tensor,
         axis=axis,
         reduction_indices=reduction_indices,
-        keep_dims=True)
+        keepdims=True)
     my_max = array_ops.stop_gradient(
         array_ops.where(
             gen_math_ops.is_finite(raw_max), raw_max,
@@ -1708,13 +1799,13 @@ def reduce_logsumexp(input_tensor,
         reduce_sum(
             gen_math_ops.exp(input_tensor - my_max),
             axis,
-            keep_dims=True,
+            keepdims=True,
             reduction_indices=reduction_indices)) + my_max
-    if not keep_dims:
+    if not keepdims:
       if isinstance(axis, int):
         axis = [axis]
       result = array_ops.squeeze(result, axis)
-    return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, result)
+    return _may_reduce_to_scalar(keepdims, axis, reduction_indices, result)
 
 
 def trace(x, name=None):
@@ -2216,9 +2307,10 @@ def bincount(arr,
     maxlength = ops.convert_to_tensor(
         maxlength, name="maxlength", dtype=dtypes.int32)
     output_size = gen_math_ops.minimum(maxlength, output_size)
-  weights = (
-      ops.convert_to_tensor(weights, name="weights")
-      if weights is not None else constant_op.constant([], dtype))
+  if weights is not None:
+    weights = ops.convert_to_tensor(weights, name="weights")
+    return gen_math_ops.unsorted_segment_sum(weights, arr, output_size)
+  weights = constant_op.constant([], dtype)
   return gen_math_ops.bincount(arr, output_size, weights)
 
 
@@ -2381,7 +2473,7 @@ def reduced_shape(input_shape, axes):
     input_shape: 1-D Tensor, the shape of the Tensor being reduced.
     axes: 1-D Tensor, the reduction axes.
   Returns:
-    A 1-D Tensor, the output shape as if keep_dims were set to True.
+    A 1-D Tensor, the output shape as if keepdims were set to True.
   """
   # Example:
   # cast needed for SparseTensor reductions
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index 717ee1254f..d30f6b92ad 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -794,7 +794,7 @@ def mean_cosine_distance(labels, predictions, dim, weights=None,
   radial_diffs = math_ops.multiply(predictions, labels)
   radial_diffs = math_ops.reduce_sum(radial_diffs,
                                      reduction_indices=[dim,],
-                                     keep_dims=True)
+                                     keepdims=True)
   mean_distance, update_op = mean(radial_diffs, weights,
                                   None,
                                   None,
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index 1fcd0384da..e72d34d1f7 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -335,22 +335,22 @@ class BatchNormalizationTest(test.TestCase):
 
   def testInference(self):
     x_shape = [1, 1, 6, 1]
-    if test.is_gpu_available(cuda_only=True):
-      for dtype in [np.float16, np.float32]:
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_inference(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
         self._test_inference(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
-    self._test_inference(
-        x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC')
+      self._test_inference(
+          x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 1, 6, 2]
     if test.is_gpu_available(cuda_only=True):
       for dtype in [np.float16, np.float32]:
         self._test_inference(
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
-    self._test_inference(
-        x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC')
+        self._test_inference(
+            x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
@@ -359,33 +359,33 @@ class BatchNormalizationTest(test.TestCase):
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
     x_shape = [27, 131, 127, 6]
-    if test.is_gpu_available(cuda_only=True):
-      for dtype in [np.float16, np.float32]:
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_inference(
             x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
         self._test_inference(
             x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
-    self._test_inference(
-        x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC')
+      self._test_inference(
+          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   def testTraining(self):
     x_shape = [1, 1, 6, 1]
-    if test.is_gpu_available(cuda_only=True):
-      for dtype in [np.float16, np.float32]:
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_training(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
         self._test_training(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
-    self._test_training(
-        x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC')
+      self._test_training(
+          x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 1, 6, 2]
-    if test.is_gpu_available(cuda_only=True):
-      for dtype in [np.float16, np.float32]:
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_training(
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
-    self._test_training(
-        x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC')
+      self._test_training(
+          x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
@@ -394,20 +394,20 @@ class BatchNormalizationTest(test.TestCase):
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
     x_shape = [27, 131, 127, 6]
-    if test.is_gpu_available(cuda_only=True):
-      for dtype in [np.float16, np.float32]:
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_training(
             x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
         self._test_training(
             x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
-    self._test_training(
-        x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC')
+      self._test_training(
+          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   def testBatchNormGrad(self):
     for is_training in [True, False]:
       x_shape = [1, 1, 6, 1]
-      if test.is_gpu_available(cuda_only=True):
-        for dtype in [np.float16, np.float32]:
+      for dtype in [np.float16, np.float32]:
+        if test.is_gpu_available(cuda_only=True):
           self._test_gradient(
               x_shape,
               dtype, [1],
@@ -422,17 +422,17 @@ class BatchNormalizationTest(test.TestCase):
               use_gpu=True,
               data_format='NCHW',
               is_training=is_training)
-      self._test_gradient(
-          x_shape,
-          np.float32, [1],
-          np.float32,
-          use_gpu=False,
-          data_format='NHWC',
-          is_training=is_training)
+        self._test_gradient(
+            x_shape,
+            dtype, [1],
+            np.float32,
+            use_gpu=False,
+            data_format='NHWC',
+            is_training=is_training)
 
       x_shape = [1, 1, 6, 2]
-      if test.is_gpu_available(cuda_only=True):
-        for dtype in [np.float16, np.float32]:
+      for dtype in [np.float16, np.float32]:
+        if test.is_gpu_available(cuda_only=True):
           self._test_gradient(
               x_shape,
               dtype, [2],
@@ -440,13 +440,13 @@ class BatchNormalizationTest(test.TestCase):
               use_gpu=True,
               data_format='NHWC',
               is_training=is_training)
-      self._test_gradient(
-          x_shape,
-          np.float32, [2],
-          np.float32,
-          use_gpu=False,
-          data_format='NHWC',
-          is_training=is_training)
+        self._test_gradient(
+            x_shape,
+            dtype, [2],
+            np.float32,
+            use_gpu=False,
+            data_format='NHWC',
+            is_training=is_training)
 
       x_shape = [1, 2, 1, 6]
       if test.is_gpu_available(cuda_only=True):
@@ -460,8 +460,8 @@ class BatchNormalizationTest(test.TestCase):
               is_training=is_training)
 
       x_shape = [5, 7, 11, 4]
-      if test.is_gpu_available(cuda_only=True):
-        for dtype in [np.float16, np.float32]:
+      for dtype in [np.float16, np.float32]:
+        if test.is_gpu_available(cuda_only=True):
           self._test_gradient(
               x_shape,
               dtype, [7],
@@ -476,13 +476,13 @@ class BatchNormalizationTest(test.TestCase):
               use_gpu=True,
               data_format='NHWC',
               is_training=is_training)
-      self._test_gradient(
-          x_shape,
-          np.float32, [4],
-          np.float32,
-          use_gpu=False,
-          data_format='NHWC',
-          is_training=is_training)
+        self._test_gradient(
+            x_shape,
+            dtype, [4],
+            np.float32,
+            use_gpu=False,
+            data_format='NHWC',
+            is_training=is_training)
 
   def _testBatchNormGradGrad(self, config):
     shape = config['shape']
@@ -506,15 +506,14 @@ class BatchNormalizationTest(test.TestCase):
             data_format='NCHW',
             is_training=is_training,
             err_tolerance=err_tolerance)
-      if dtype != np.float16:
-        self._test_grad_grad(
-            shape,
-            np.float32, [shape[3]],
-            np.float32,
-            use_gpu=False,
-            data_format='NHWC',
-            is_training=is_training,
-            err_tolerance=err_tolerance)
+      self._test_grad_grad(
+          shape,
+          dtype, [shape[3]],
+          np.float32,
+          use_gpu=False,
+          data_format='NHWC',
+          is_training=is_training,
+          err_tolerance=err_tolerance)
 
   def testBatchNormGradGrad(self):
     configs = [{
@@ -525,6 +524,10 @@ class BatchNormalizationTest(test.TestCase):
         'shape': [2, 3, 2, 2],
         'err_tolerance': 1e-3,
         'dtype': np.float32,
+    }, {
+        'shape': [2, 3, 4, 5],
+        'err_tolerance': 1e-2,
+        'dtype': np.float16,
     }, {
         'shape': [2, 3, 2, 2],
         'err_tolerance': 2e-3,
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 431ea1186a..da037a7983 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -32,6 +32,8 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.util.deprecation import deprecated_args
+from tensorflow.python.util.deprecation import deprecated_argument_lookup
 
 
 def log_poisson_loss(targets, log_input, compute_full_loss=False, name=None):
@@ -313,19 +315,20 @@ def swish(features):
   return features * math_ops.sigmoid(features)
 
 
-def l2_normalize(x, dim, epsilon=1e-12, name=None):
-  """Normalizes along dimension `dim` using an L2 norm.
+@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
+  """Normalizes along dimension `axis` using an L2 norm.
 
-  For a 1-D tensor with `dim = 0`, computes
+  For a 1-D tensor with `axis = 0`, computes
 
       output = x / sqrt(max(sum(x**2), epsilon))
 
   For `x` with more dimensions, independently normalizes each 1-D slice along
-  dimension `dim`.
+  dimension `axis`.
 
   Args:
     x: A `Tensor`.
-    dim: Dimension along which to normalize.  A scalar or a vector of
+    axis: Dimension along which to normalize.  A scalar or a vector of
       integers.
     epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
       divisor if `norm < sqrt(epsilon)`.
@@ -335,8 +338,9 @@ def l2_normalize(x, dim, epsilon=1e-12, name=None):
     A `Tensor` with the same shape as `x`.
   """
   with ops.name_scope(name, "l2_normalize", [x]) as name:
+    axis = deprecated_argument_lookup("axis", axis, "dim", dim)
     x = ops.convert_to_tensor(x, name="x")
-    square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True)
+    square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keep_dims=True)
     x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon))
     return math_ops.multiply(x, x_inv_norm, name=name)
 
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index bdaac65904..61fa462988 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -23,6 +23,7 @@ import numbers
 import numpy as np
 
 from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_util
 from tensorflow.python.framework import ops
@@ -37,6 +38,8 @@ from tensorflow.python.ops import random_ops
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_nn_ops import *
 # pylint: enable=wildcard-import
+from tensorflow.python.util.deprecation import deprecated_args
+from tensorflow.python.util.deprecation import deprecated_argument_lookup
 
 from tensorflow.python.util import deprecation
 
@@ -1645,17 +1648,18 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   return output
 
 
-def softmax(logits, dim=-1, name=None):
+@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+def softmax(logits, axis=None, name=None, dim=None):
   """Computes softmax activations.
 
   This function performs the equivalent of
 
-      softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), dim)
+      softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis)
 
   Args:
     logits: A non-empty `Tensor`. Must be one of the following types: `half`,
       `float32`, `float64`.
-    dim: The dimension softmax would be performed on. The default is -1 which
+    axis: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
 
@@ -1663,23 +1667,27 @@ def softmax(logits, dim=-1, name=None):
     A `Tensor`. Has the same type and shape as `logits`.
 
   Raises:
-    InvalidArgumentError: if `logits` is empty or `dim` is beyond the last
+    InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
       dimension of `logits`.
   """
-  return _softmax(logits, gen_nn_ops._softmax, dim, name)
+  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
+  if axis is None:
+    axis = -1
+  return _softmax(logits, gen_nn_ops._softmax, axis, name)
 
 
-def log_softmax(logits, dim=-1, name=None):
+@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+def log_softmax(logits, axis=None, name=None, dim=None):
   """Computes log softmax activations.
 
   For each batch `i` and class `j` we have
 
-      logsoftmax = logits - log(reduce_sum(exp(logits), dim))
+      logsoftmax = logits - log(reduce_sum(exp(logits), axis))
 
   Args:
     logits: A non-empty `Tensor`. Must be one of the following types: `half`,
       `float32`, `float64`.
-    dim: The dimension softmax would be performed on. The default is -1 which
+    axis: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
 
@@ -1687,10 +1695,13 @@ def log_softmax(logits, dim=-1, name=None):
     A `Tensor`. Has the same type as `logits`. Same shape as `logits`.
 
   Raises:
-    InvalidArgumentError: if `logits` is empty or `dim` is beyond the last
+    InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
       dimension of `logits`.
   """
-  return _softmax(logits, gen_nn_ops._log_softmax, dim, name)
+  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
+  if axis is None:
+    axis = -1
+  return _softmax(logits, gen_nn_ops._log_softmax, axis, name)
 
 
 def _ensure_xent_args(name, sentinel, labels, logits):
@@ -2305,6 +2316,100 @@ def conv1d(value, filters, stride, padding,
     return array_ops.squeeze(result, [spatial_start_dim])
 
 
+def conv1d_transpose(value,
+                     filter,
+                     output_shape,
+                     stride,
+                     padding="SAME",
+                     data_format="NWC",
+                     name=None):
+  """The transpose of `conv1d`.
+
+  This operation is sometimes called "deconvolution" after [Deconvolutional
+  Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is
+  actually the transpose (gradient) of `conv1d` rather than an actual
+  deconvolution.
+
+  Args:
+    value: A 3-D `Tensor` of type `float` and shape
+      `[batch, in_width, in_channels]` for `NWC` data format or
+      `[batch, in_channels, in_width]` for `NCW` data format.
+    filter: A 3-D `Tensor` with the same type as `value` and shape
+      `[filter_width, output_channels, in_channels]`.  `filter`'s
+      `in_channels` dimension must match that of `value`.
+    output_shape: A 1-D `Tensor` representing the output shape of the
+      deconvolution op.
+    stride: An `integer`.  The number of entries by which
+      the filter is moved right at each step.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
+      See the @{tf.nn.convolution$comment here}
+    data_format: A string. 'NHWC' and 'NCHW' are supported.
+    name: Optional name for the returned tensor.
+
+  Returns:
+    A `Tensor` with the same type as `value`.
+
+  Raises:
+    ValueError: If input/output depth does not match `filter`'s shape, or if
+      padding is other than `'VALID'` or `'SAME'`.
+  """
+  with ops.name_scope(name, "conv1d_transpose",
+                      [value, filter, output_shape]) as name:
+    output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape")
+    if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)):
+      raise ValueError("output_shape must have shape (3,), got {}"
+                       .format(output_shape_.get_shape()))
+
+    # The format could be either NWC or NCW, map to NHWC or NCHW
+    if data_format is None or data_format == "NWC":
+      data_format_2d = "NHWC"
+      axis = 2
+    elif data_format == "NCW":
+      data_format_2d = "NCHW"
+      axis = 1
+    else:
+      raise ValueError("data_format must be \"NWC\" or \"NCW\".")
+
+    if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[2]):
+      raise ValueError("input channels does not match filter's input channels, "
+                       "{} != {}".format(value.get_shape()[axis],
+                                         filter.get_shape()[2]))
+
+    if isinstance(output_shape, (list, np.ndarray)):
+      # output_shape's shape should be == [3] if reached this point.
+      if not filter.get_shape()[1].is_compatible_with(output_shape[axis]):
+        raise ValueError(
+            "output_shape does not match filter's output channels, "
+            "{} != {}".format(output_shape[axis], filter.get_shape()[1]))
+
+    if padding != "VALID" and padding != "SAME":
+      raise ValueError("padding must be either VALID or SAME:"
+                       " {}".format(padding))
+
+    # Reshape the input tensor to [batch, 1, in_width, in_channels]
+    if data_format_2d == "NHWC":
+      output_shape_ = array_ops.concat([output_shape_[:1], [1],
+                                        output_shape_[1:]], axis=0)
+      spatial_start_dim = 1
+      strides = [1, 1, stride, 1]
+    else:
+      output_shape_ = array_ops.concat([output_shape_[:2], [1],
+                                        output_shape_[2:]], axis=0)
+      spatial_start_dim = 2
+      strides = [1, 1, 1, stride]
+    value = array_ops.expand_dims(value, spatial_start_dim)
+    filter = array_ops.expand_dims(filter, 0)
+
+    result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_,
+                                              filter=filter,
+                                              out_backprop=value,
+                                              strides=strides,
+                                              padding=padding,
+                                              data_format=data_format_2d,
+                                              name=name)
+    return array_ops.squeeze(result, [spatial_start_dim])
+
+
 @ops.RegisterStatistics("Dilation2D", "flops")
 def _calc_dilation2d_flops(graph, node):
   """Calculates the compute resources needed for Dilation2D."""
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index e9b1c67d16..a1e4305de1 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -1063,13 +1063,13 @@ class Variable(object):
 class PartitionedVariable(object):
   """A container for partitioned `Variable` objects.
 
-  @compatiblity(eager) `tf.PartitionedVariable` is not compatible with
+  @compatibility(eager) `tf.PartitionedVariable` is not compatible with
   eager execution.  Use `tfe.Variable` instead which is compatable
   with both eager execution and graph construction.  See [the
   TensorFlow Eager Execution
   guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers)
   for details on how variables work in eager execution.
-  @end_compatiblity
+  @end_compatibility
   """
 
   class PartitionedVariableIterator(object):
diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py
old mode 100644
new mode 100755
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 99bed86a17..d78362d4fb 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -232,7 +232,6 @@ CUDNN_DNN_ROUTINE_EACH_R3(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
   __macro(cudnnRNNBackwardData)                               \
   __macro(cudnnRNNBackwardWeights)                            \
   __macro(cudnnSetRNNDescriptor)                              \
-  __macro(cudnnSetRNNDescriptor_v6)                           \
   __macro(cudnnGetFilterNdDescriptor)
 
 // clang-format on
@@ -245,7 +244,8 @@ CUDNN_DNN_ROUTINE_EACH_R5(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
 // clang-format off
 #if CUDNN_VERSION >= 6000
 #define CUDNN_DNN_ROUTINE_EACH_R6(__macro)                    \
-  __macro(cudnnConvolutionBiasActivationForward)
+  __macro(cudnnConvolutionBiasActivationForward)              \
+  __macro(cudnnSetRNNDescriptor_v6)
 
 // clang-format on
 CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
@@ -665,7 +665,6 @@ class ScopedPoolingDescriptor {
       LOG(FATAL) << "could not create cudnn pooling descriptor: "
                  << ToString(status);
     }
-
     const std::vector<int64> strides64 = pooling_descriptor.strides();
     const std::vector<int64> padding64 = pooling_descriptor.padding();
     const std::vector<int64> shape64 = pooling_descriptor.window();
@@ -680,14 +679,14 @@ class ScopedPoolingDescriptor {
                    &CheckedNarrowing<int64, int>);
     std::transform(shape64.cbegin(), shape64.cend(), shape.begin(),
                    &CheckedNarrowing<int64, int>);
+    bool propagate_nans = pooling_descriptor.propagate_nans();
     status = wrap::cudnnSetPoolingNdDescriptor(
         parent_, handle_,
         (pooling_descriptor.mode() == dnn::PoolingMode::kMaximum
              ? CUDNN_POOLING_MAX
              : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING),
 #if CUDNN_VERSION >= 5000
-        // Always propagate nans.
-        CUDNN_PROPAGATE_NAN,
+        propagate_nans ? CUDNN_PROPAGATE_NAN : CUDNN_NOT_PROPAGATE_NAN,
 #endif
         nd, shape.data(), padding.data(), strides.data());
     if (status != CUDNN_STATUS_SUCCESS) {
diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc
index 07fe8a85f4..29fd6d0e87 100644
--- a/tensorflow/stream_executor/dnn.cc
+++ b/tensorflow/stream_executor/dnn.cc
@@ -472,7 +472,8 @@ PoolingDescriptor::PoolingDescriptor(int ndims)
       ndims_(ndims),
       window_(ndims, 0),
       padding_(ndims, 0),
-      strides_(ndims, 1) {}
+      strides_(ndims, 1),
+      propagate_nans_(false) {}
 
 PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {}
 
@@ -482,6 +483,7 @@ void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) {
   window_ = other.window_;
   padding_ = other.padding_;
   strides_ = other.strides_;
+  propagate_nans_ = other.propagate_nans_;
 }
 
 string PoolingDescriptor::ToString() const {
@@ -495,9 +497,12 @@ string PoolingDescriptor::ToString() const {
     port::Appendf(&padding, "%lld", padding_[i]);
   }
 
-  return port::Printf("{mode: %s window: %s strides: %s padding: %s}",
-                      mode_string, window.c_str(), strides.c_str(),
-                      padding.c_str());
+  const char* propagate_string = propagate_nans_ ? "Yes" : "No";
+
+  return port::Printf(
+      "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}",
+      mode_string, window.c_str(), strides.c_str(), padding.c_str(),
+      propagate_string);
 }
 
 string PoolingDescriptor::ToShortString() const {
@@ -508,7 +513,8 @@ string PoolingDescriptor::ToShortString() const {
     port::Appendf(&padding, "_p%d:%lld", i, padding_[i]);
   }
   return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg",
-                      window, strides, padding);
+                      window, strides, padding,
+                      propagate_nans_ ? "propagate_nans" : "ignore_nans");
 }
 
 // -- NormalizeDescriptor
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 49235167ab..0d2cd4a9f2 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -661,6 +661,10 @@ class PoolingDescriptor {
     SetDim(&strides_, dim, value);
     return *this;
   }
+  PoolingDescriptor& set_propagate_nans(bool value) {
+    propagate_nans_ = value;
+    return *this;
+  }
 
   int ndims() const { return ndims_; }
   void CloneFrom(const PoolingDescriptor& other);
@@ -681,10 +685,12 @@ class PoolingDescriptor {
   std::vector<int64> window() const { return window_; }
   std::vector<int64> padding() const { return padding_; }
   std::vector<int64> strides() const { return strides_; }
+  bool propagate_nans() const { return propagate_nans_; }
 
  private:
   PoolingMode mode_;
   int ndims_;
+  bool propagate_nans_;
 
   // Stored as: ..., y, x.
   std::vector<int64> window_;
diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
index 9fd38a29b7..62e634afb8 100644
--- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
@@ -94,7 +94,7 @@ tf_module {
   }
   member_method {
     name: "norm"
-    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], "
+    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "qr"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
index 24c0448dea..ebd9c079b5 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
@@ -170,7 +170,7 @@ tf_module {
   }
   member_method {
     name: "l2_normalize"
-    argspec: "args=[\'x\', \'dim\', \'epsilon\', \'name\'], varargs=None, keywords=None, defaults=[\'1e-12\', \'None\'], "
+    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
   }
   member_method {
     name: "leaky_relu"
@@ -190,7 +190,7 @@ tf_module {
   }
   member_method {
     name: "log_softmax"
-    argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "log_uniform_candidate_sampler"
@@ -282,7 +282,7 @@ tf_module {
   }
   member_method {
     name: "softmax"
-    argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "softmax_cross_entropy_with_logits"
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index bf7bc6a7c1..0edd4153d7 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -750,7 +750,7 @@ tf_module {
   }
   member_method {
     name: "boolean_mask"
-    argspec: "args=[\'tensor\', \'mask\', \'name\'], varargs=None, keywords=None, defaults=[\'boolean_mask\'], "
+    argspec: "args=[\'tensor\', \'mask\', \'name\', \'axis\'], varargs=None, keywords=None, defaults=[\'boolean_mask\', \'None\'], "
   }
   member_method {
     name: "broadcast_dynamic_shape"
@@ -858,7 +858,7 @@ tf_module {
   }
   member_method {
     name: "count_nonzero"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'dtype\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \"<dtype: \'int64\'>\", \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'int64\'>\", \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "count_up_to"
@@ -1414,7 +1414,7 @@ tf_module {
   }
   member_method {
     name: "norm"
-    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], "
+    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "not_equal"
@@ -1546,11 +1546,11 @@ tf_module {
   }
   member_method {
     name: "reduce_all"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_any"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_join"
@@ -1558,27 +1558,27 @@ tf_module {
   }
   member_method {
     name: "reduce_logsumexp"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_max"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_mean"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_min"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_prod"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_sum"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "register_tensor_conversion_function"
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 5f791d7bc7..c27f4953e3 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -165,7 +165,7 @@ else
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_sparse_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:fully_connected_test"
-  BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test"
+  # BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:hashtable_lookup_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:local_response_norm_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:lsh_projection_test"
diff --git a/tensorflow/tools/ci_build/install/install_golang.sh b/tensorflow/tools/ci_build/install/install_golang.sh
index 55c1674495..e1edd62cc5 100755
--- a/tensorflow/tools/ci_build/install/install_golang.sh
+++ b/tensorflow/tools/ci_build/install/install_golang.sh
@@ -16,7 +16,7 @@
 
 set -ex
 
-GOLANG_URL="https://storage.googleapis.com/golang/go1.9.1.linux-amd64.tar.gz"
+GOLANG_URL="https://storage.googleapis.com/golang/go1.9.2.linux-amd64.tar.gz"
 
 sudo mkdir -p /usr/local
 wget -q -O - "${GOLANG_URL}" | sudo tar -C /usr/local -xz
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
index dcda8228bc..e5d8303c6e 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
@@ -48,6 +48,6 @@ ${DOCKER_BINARY} run \
   -e "TF_NEED_GCP=0" \
   -e "TF_NEED_HDFS=0" \
   -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \
-  -e "TF_NEED_OPENCL=0" \
+  -e "TF_NEED_OPENCL_SYCL=0" \
   "${DOCKER_IMAGE}" \
   "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh"
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
index d90a1b905d..e1b56b9a25 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
@@ -27,7 +27,7 @@ export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_GCP=0
 export TF_NEED_HDFS=0
 export TF_NEED_CUDA=0
-export TF_NEED_OPENCL=0
+export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
 
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
index 79973647c1..5a901af3e5 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
@@ -28,7 +28,7 @@ export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${L
 export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_GCP=0
 export TF_NEED_HDFS=0
-export TF_NEED_OPENCL=0
+export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
 
diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 5244898c40..88116d9f24 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -75,17 +75,23 @@ if [[ $1 == "PI_ONE" ]]; then
   PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp
   --copt=-DUSE_GEMM_FOR_CONV --copt=-DUSE_OPENBLAS
   --copt=-isystem --copt=${OPENBLAS_INSTALL_PATH}/include/
+  --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
   --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/
   --linkopt=-l:libopenblas.a"
   echo "Building for the Pi One/Zero, with no NEON support"
 else
   PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4
+  --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8'
   echo "Building for the Pi Two/Three, with NEON acceleration"
 fi
 
+# We need to pass down the environment variable with a possible alternate Python
+# include path for Python 3.x builds to work.
+export CROSSTOOL_PYTHON_INCLUDE_PATH
+
 cd ${WORKSPACE_PATH}
 bazel build -c opt ${PI_COPTS} \
   --config=monolithic \
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 924ab1a4ae..44b6d52952 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -117,7 +117,7 @@ function run_configure_for_cpu_build {
   export TF_NEED_VERBS=0
   export TF_NEED_GCP=0
   export TF_NEED_HDFS=0
-  export TF_NEED_OPENCL=0
+  export TF_NEED_OPENCL_SYCL=0
   echo "" | ./configure
 }
 
@@ -141,7 +141,7 @@ function run_configure_for_gpu_build {
   export TF_NEED_MKL=0
   export TF_NEED_GCP=0
   export TF_NEED_HDFS=0
-  export TF_NEED_OPENCL=0
+  export TF_NEED_OPENCL_SYCL=0
 
   # TODO(pcloudy): Remove this after TensorFlow uses its own CRSOOTOOL
   # for GPU build on Windows
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
index 64ebc4607a..9bcc3925a8 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
@@ -101,12 +101,11 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib
                 --jobs=${TF_AVAILABLE_CPUS} \
                 tensorflow/tools/pip_package:build_pip_package && \
     mkdir /pip_pkg && \
-    bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg
-
-# Clean up pip wheel and Bazel cache when done.
-RUN pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \
+    bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \
+    pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \
     rm -rf /pip_pkg && \
     rm -rf /root/.cache
+# Clean up pip wheel and Bazel cache when done.
 
 WORKDIR /root
 
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index 0571dd7391..e212d10290 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
+FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md
index 2e5a0038ed..e35c58ff80 100644
--- a/tensorflow/tools/docker/README.md
+++ b/tensorflow/tools/docker/README.md
@@ -60,6 +60,20 @@ Building TensorFlow Docker containers should be done through the
 script. The raw Dockerfiles should not be used directly as they contain strings
 to be replaced by the script during the build.
 
+Attempting to run [parameterized_docker_build.sh](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docker/parameterized_docker_build.sh)
+from a binary docker image such as for example `tensorflow/tensorflow:latest` will
+not work. One needs to execute the script from a developer docker image since by
+contrast with a binary docker image it contains not only the compiled solution but
+also the tensorflow source code. Please select the appropriate developer docker
+image of tensorflow at `tensorflow/tensorflow:[.](https://hub.docker.com/r/tensorflow/tensorflow/tags/)`.
+
+The smallest command line to generate a docker image will then be:
+```docker run -it tensorflow/tensorflow:"right_tag"```
+
+If you would like to start a jupyter notebook on your docker container, make sure
+to map the port 8888 of your docker container by adding -p 8888:8888 to the above
+command.
+
 To use the script, specify the container type (`CPU` vs. `GPU`), the desired
 Python version (`PYTHON2` vs. `PYTHON3`) and whether the developer Docker image
 is to be built (`NO` vs. `YES`). In addition, you need to specify the central
diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD
index 1bf7113c9e..9216008600 100644
--- a/tensorflow/tools/graph_transforms/BUILD
+++ b/tensorflow/tools/graph_transforms/BUILD
@@ -131,6 +131,8 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
+        "//tensorflow/contrib/rnn:gru_ops_op_lib",
+        "//tensorflow/contrib/rnn:lstm_ops_op_lib",
     ] + if_not_windows([
         "//tensorflow/core/kernels:quantized_ops",
         "//tensorflow/core/kernels:remote_fused_graph_rewriter_transform",
diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc
index 2b85e7e83c..97e8f77616 100644
--- a/tensorflow/tools/graph_transforms/quantize_nodes.cc
+++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc
@@ -759,6 +759,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
           NodeDef reshape_dims;
           reshape_dims.set_op("Const");
           reshape_dims.set_name(unique_input_name + "/reshape_dims");
+          AddNodeInput("^" + input_name, &reshape_dims);
           SetNodeAttr("dtype", DT_INT32, &reshape_dims);
           Tensor reshape_dims_tensor(DT_INT32, {1});
           reshape_dims_tensor.flat<int32>()(0) = -1;
@@ -768,6 +769,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
           NodeDef reduction_dims;
           reduction_dims.set_op("Const");
           reduction_dims.set_name(unique_input_name + "/reduction_dims");
+          AddNodeInput("^" + input_name, &reduction_dims);
           SetNodeAttr("dtype", DT_INT32, &reduction_dims);
           Tensor reduction_dims_tensor(DT_INT32, {1});
           reduction_dims_tensor.flat<int32>()(0) = 0;
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 60282f6aa3..a493c6f2aa 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -29,7 +29,7 @@ from setuptools.dist import Distribution
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.4.0-rc1'
+_VERSION = '1.4.0'
 
 REQUIRED_PACKAGES = [
     'absl-py',
diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD
index bc6a2fd8cc..bc9e37ffb3 100644
--- a/third_party/aws.BUILD
+++ b/third_party/aws.BUILD
@@ -21,6 +21,9 @@ cc_library(
         "@%ws%//tensorflow:linux_ppc64le": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
+        "@%ws%//tensorflow:raspberry_pi_armeabi": glob([
+            "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
+        ]),
         "//conditions:default": [],
     }) + glob([
         "aws-cpp-sdk-core/include/**/*.h",
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index 882967df1c..805a30d262 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -477,7 +477,6 @@ genrule(
         "#  define HAVE_RAND_EGD 1",
         "#  define HAVE_RAND_STATUS 1",
         "#  define HAVE_SSL_GET_SHUTDOWN 1",
-        "#  define HAVE_STROPTS_H 1",
         "#  define HAVE_TERMIOS_H 1",
         "#  define OS \"x86_64-pc-linux-gnu\"",
         "#  define RANDOM_FILE \"/dev/urandom\"",
diff --git a/third_party/sycl/crosstool/CROSSTOOL.tpl b/third_party/sycl/crosstool/CROSSTOOL.tpl
index 32884d71e7..f8e50efcc6 100755
--- a/third_party/sycl/crosstool/CROSSTOOL.tpl
+++ b/third_party/sycl/crosstool/CROSSTOOL.tpl
@@ -35,10 +35,10 @@ toolchain {
   tool_path { name: "compat-ld" path: "/usr/bin/ld" }
   tool_path { name: "cpp" path: "/usr/bin/cpp" }
   tool_path { name: "dwp" path: "/usr/bin/dwp" }
-  tool_path { name: "gcc" path: "computecpp" }
+  tool_path { name: "gcc" path: "%{sycl_impl}" }
   # Use "-std=c++11" for nvcc. For consistency, force both the host compiler
   # and the device compiler to use "-std=c++11".
-  cxx_flag: "-std=c++11"
+  cxx_flag: "%{c++_std}"
   linker_flag: "-Wl,-no-as-needed"
   linker_flag: "-lstdc++"
   linker_flag: "-B/usr/bin/"
@@ -53,7 +53,7 @@ toolchain {
   cxx_builtin_include_directory: "/usr/local/include"
   cxx_builtin_include_directory: "/usr/include"
 
-  cxx_builtin_include_directory: "%{computecpp_toolkit_path}"
+  cxx_builtin_include_directory: "%{sycl_include_dir}"
   cxx_builtin_include_directory: "%{python_lib_path}"
 
   tool_path { name: "gcov" path: "/usr/bin/gcov" }
@@ -214,4 +214,4 @@ toolchain {
     compiler_flag: "-O2"
     compiler_flag: "-DNDEBUG"
   }
-}
+}
\ No newline at end of file
diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl
new file mode 100644
index 0000000000..b470772fbf
--- /dev/null
+++ b/third_party/sycl/crosstool/trisycl.tpl
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import tempfile
+from subprocess import call
+
+CPU_CXX_COMPILER = ('%{host_cxx_compiler}')
+CPU_C_COMPILER = ('%{host_c_compiler}')
+
+CURRENT_DIR = os.path.dirname(sys.argv[0])
+TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include'
+
+def main():
+  compiler_flags = []
+
+  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions')
+  # remove -fsamotoze-coverage from string with g++
+  if 'g++' in CPU_CXX_COMPILER:
+    remove_flags += ('-fsanitize-coverage',)
+    compiler_flags += ['-fopenmp']
+  else:
+    compiler_flags += ['-fopenmp=libomp']
+
+  compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)]
+
+
+  output_file_index = compiler_flags.index('-o') + 1
+  output_file_name = compiler_flags[output_file_index]
+
+  if(output_file_index == 1):
+    # we are linking
+    return call([CPU_CXX_COMPILER] + compiler_flags +
+                ['-Wl,--no-undefined'])
+
+  # find what we compile
+  compiling_cpp = 0
+  if('-c' in compiler_flags):
+      compiled_file_index = compiler_flags.index('-c') + 1
+      compiled_file_name = compiler_flags[compiled_file_index]
+      if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP',
+                                      '.C', '.cxx'))):
+        compiling_cpp = 1;
+
+  debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic']
+
+  opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3']
+
+  compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1',
+                                     '-DEIGEN_HAS_C99_MATH',
+                                     '-DEIGEN_MAX_ALIGN_BYTES=16',
+                                     '-DTENSORFLOW_USE_SYCL'] + opt_flags
+
+  if(compiling_cpp == 1):
+    # create a blacklist of folders that will be skipped when compiling
+    # with triSYCL
+    skip_extensions = [".cu.cc"]
+    skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"]
+    skip_folders = [(folder + '/') for folder in skip_folders]
+    # if compiling external project skip triSYCL
+    if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders):
+      return call([CPU_CXX_COMPILER] + compiler_flags)
+
+    host_compiler_flags = ['-xc++', '-Wno-unused-variable',
+                           '-I', TRISYCL_INCLUDE_DIR] + compiler_flags
+    x = call([CPU_CXX_COMPILER] + host_compiler_flags)
+    return x
+  else:
+    # compile for C
+    return call([CPU_C_COMPILER] + compiler_flags)
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl
index 6cad190630..b6ceaadda7 100755
--- a/third_party/sycl/sycl/BUILD.tpl
+++ b/third_party/sycl/sycl/BUILD.tpl
@@ -10,16 +10,27 @@ package(default_visibility = ["//visibility:public"])
 exports_files(["LICENSE.text"])
 
 config_setting(
-    name = "using_sycl",
-    values = {
-        "define": "using_sycl=true",
+    name = "using_sycl_ccpp",
+    define_values = {
+        "using_sycl": "true",
+        "using_trisycl": "false",
     },
 )
 
+config_setting(
+    name = "using_sycl_trisycl",
+    define_values = {
+        "using_sycl": "true",
+        "using_trisycl": "false",
+    },
+)
+
+
 cc_library(
     name = "sycl_headers",
     hdrs = glob([
         "**/*.h",
+        "**/*.hpp",
     ]),
     includes = [".", "include"],
 )
diff --git a/third_party/sycl/sycl/build_defs.bzl.tpl b/third_party/sycl/sycl/build_defs.bzl.tpl
index 09bef0a661..33386f8957 100755
--- a/third_party/sycl/sycl/build_defs.bzl.tpl
+++ b/third_party/sycl/sycl/build_defs.bzl.tpl
@@ -5,9 +5,24 @@ def if_sycl(if_true, if_false = []):
 
     Returns a select statement which evaluates to if_true if we're building
     with SYCL enabled.  Otherwise, the select statement evaluates to if_false.
+    If we are building with triSYCL instead of ComputeCPP, a list with
+    the first element of if_true is returned.
+    """
+    return select({
+        "@local_config_sycl//sycl:using_sycl_ccpp": if_true,
+        "@local_config_sycl//sycl:using_sycl_trisycl": if_true[0:1],
+        "//conditions:default": if_false
+    })
+
+def if_ccpp(if_true, if_false = []):
+    """Shorthand for select()'ing if we are building with ComputeCPP.
 
+    Returns a select statement which evaluates to if_true if we're building
+    with ComputeCPP enabled. Otherwise, the select statement evaluates
+    to if_false.
     """
     return select({
-        "@local_config_sycl//sycl:using_sycl": if_true,
+        "@local_config_sycl//sycl:using_sycl_ccpp": if_true,
+        "@local_config_sycl//sycl:using_sycl_trisycl": if_false,
         "//conditions:default": if_false
     })
diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl
index 7af063178e..a0c9e4e43a 100644
--- a/third_party/sycl/sycl_configure.bzl
+++ b/third_party/sycl/sycl_configure.bzl
@@ -5,20 +5,26 @@
   * HOST_CXX_COMPILER:  The host C++ compiler
   * HOST_C_COMPILER:    The host C compiler
   * COMPUTECPP_TOOLKIT_PATH: The path to the ComputeCpp toolkit.
+  * TRISYCL_INCLUDE_DIR: The path to the include directory of triSYCL.
+                         (if using triSYCL instead of ComputeCPP)
   * PYTHON_LIB_PATH: The path to the python lib
 """
 
 _HOST_CXX_COMPILER = "HOST_CXX_COMPILER"
 _HOST_C_COMPILER= "HOST_C_COMPILER"
 _COMPUTECPP_TOOLKIT_PATH = "COMPUTECPP_TOOLKIT_PATH"
+_TRISYCL_INCLUDE_DIR = "TRISYCL_INCLUDE_DIR"
 _PYTHON_LIB_PATH = "PYTHON_LIB_PATH"
 
 def _enable_sycl(repository_ctx):
-  if "TF_NEED_OPENCL" in repository_ctx.os.environ:
-    enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL"].strip()
+  if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ:
+    enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip()
     return enable_sycl == "1"
   return False
 
+def _enable_compute_cpp(repository_ctx):
+  return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ
+
 def auto_configure_fail(msg):
   """Output failure message when auto configuration fails."""
   red = "\033[0;31m"
@@ -59,6 +65,15 @@ def find_computecpp_root(repository_ctx):
     return sycl_name
   fail("Cannot find SYCL compiler, please correct your path")
 
+def find_trisycl_include_dir(repository_ctx):
+  """Find triSYCL include directory. """
+  sycl_name = ""
+  if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ:
+    sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip()
+    if sycl_name.startswith("/"):
+      return sycl_name
+  fail( "Cannot find triSYCL include directory, please correct your path")
+
 def find_python_lib(repository_ctx):
   """Returns python path."""
   if _PYTHON_LIB_PATH in repository_ctx.os.environ:
@@ -171,26 +186,53 @@ def _sycl_autoconf_imp(repository_ctx):
     _tpl(repository_ctx, "sycl:platform.bzl")
     _tpl(repository_ctx, "crosstool:BUILD")
     _file(repository_ctx, "sycl:LICENSE.text")
-    _tpl(repository_ctx, "crosstool:computecpp",
-    {
-      "%{host_cxx_compiler}" : find_cc(repository_ctx),
-      "%{host_c_compiler}" : find_c(repository_ctx),
-    })
-
-    computecpp_root = find_computecpp_root(repository_ctx)
-    _check_dir(repository_ctx, computecpp_root)
-
-    _tpl(repository_ctx, "crosstool:CROSSTOOL",
-    {
-      "%{computecpp_toolkit_path}" : computecpp_root,
-      "%{python_lib_path}" : find_python_lib(repository_ctx),
-    })
-
-    # symlink libraries
-    _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" )
-    _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib")
-    _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include")
-    _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin")
+
+    if _enable_compute_cpp(repository_ctx):
+      _tpl(repository_ctx, "crosstool:computecpp",
+      {
+        "%{host_cxx_compiler}" : find_cc(repository_ctx),
+        "%{host_c_compiler}" : find_c(repository_ctx)
+      })
+
+      computecpp_root = find_computecpp_root(repository_ctx);
+      _check_dir(repository_ctx, computecpp_root)
+
+      _tpl(repository_ctx, "crosstool:CROSSTOOL",
+      {
+        "%{sycl_include_dir}" : computecpp_root,
+        "%{sycl_impl}" : "computecpp",
+        "%{c++_std}" : "-std=c++11",
+        "%{python_lib_path}" : find_python_lib(repository_ctx),
+      })
+
+      # symlink libraries
+      _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" )
+      _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib")
+      _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include")
+      _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin")
+    else:
+
+      trisycl_include_dir = find_trisycl_include_dir(repository_ctx);
+      _check_dir(repository_ctx, trisycl_include_dir)
+
+      _tpl(repository_ctx, "crosstool:trisycl",
+      {
+        "%{host_cxx_compiler}" : find_cc(repository_ctx),
+        "%{host_c_compiler}" : find_c(repository_ctx),
+        "%{trisycl_include_dir}" : trisycl_include_dir
+      })
+
+
+      _tpl(repository_ctx, "crosstool:CROSSTOOL",
+      {
+        "%{sycl_include_dir}" : trisycl_include_dir,
+        "%{sycl_impl}" : "trisycl",
+        "%{c++_std}" : "-std=c++1y",
+        "%{python_lib_path}" : find_python_lib(repository_ctx),
+      })
+
+      _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include")
+
 
 sycl_configure = repository_rule(
   implementation = _sycl_autoconf_imp,
diff --git a/third_party/zlib.BUILD b/third_party/zlib.BUILD
index 8509668891..d164ee719c 100644
--- a/third_party/zlib.BUILD
+++ b/third_party/zlib.BUILD
@@ -49,7 +49,7 @@ cc_library(
         ":windows_msvc": [],
         "//conditions:default": [
             "-Wno-shift-negative-value",
-            "-Wno-implicit-function-declaration",
+            "-DZ_HAVE_UNISTD_H",
         ],
     }),
     includes = ["."],
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 2d7201ae57..04c24d7511 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -9,13 +9,16 @@ build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true
 build:mkl --define=using_mkl=true
 
 build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain
-build:sycl --define=using_sycl=true
+build:sycl --define=using_sycl=true --define=using_trisycl=false
 
 build:sycl_nodouble --crosstool_top=@local_config_sycl//crosstool:toolchain
 build:sycl_nodouble --define=using_sycl=true --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE
 
 build:sycl_asan --crosstool_top=@local_config_sycl//crosstool:toolchain
-build:sycl_asan --define=using_sycl=true --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address
+build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address
+
+build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain
+build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true
 
 build --define=use_fast_cpp_protos=true
 build --define=allow_oversize_protos=true
diff --git a/util/python/BUILD b/util/python/BUILD
index 96daf9947a..f5fa0c6d29 100644
--- a/util/python/BUILD
+++ b/util/python/BUILD
@@ -1,4 +1,4 @@
-licenses(["restricted"])
+licenses(["notice"])  # New BSD, Python Software Foundation
 
 package(default_visibility = ["//visibility:public"])
 
-- 
GitLab


From d0a3b2d3983b970b750329088013dc5cb67d96f9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 21 Nov 2017 23:55:59 -0800
Subject: [PATCH 0212/1225] Merged commit includes the following changes:
 176617057  by yifeif:

    Internal change.

--
176615737  by yifeif:

    Fix internal tests.

--

PiperOrigin-RevId: 176617057
---
 configure.py                                  |  19 +--
 tensorflow/compiler/aot/tfcompile.bzl         |   4 +-
 .../compiler/tests/fused_batchnorm_test.py    |   6 +-
 .../contrib/android/cmake/CMakeLists.txt      |   2 +-
 .../python/kernel_tests/cauchy_test.py        |  47 ++++---
 .../distributions/python/ops/cauchy.py        |  18 +--
 .../contrib/layers/python/layers/layers.py    |   1 -
 .../layers/python/layers/layers_test.py       |  28 ++--
 .../python/learn/learn_io/data_feeder.py      |   6 +-
 .../linear_optimizer/python/ops/sdca_ops.py   |   5 +-
 .../contrib/lite/testing/generate_examples.py |   3 +-
 tensorflow/contrib/opt/__init__.py            |  16 ++-
 .../training/multitask_optimizer_wrapper.py   |  60 ++++----
 .../multitask_optimizer_wrapper_test.py       |  40 +++---
 .../python/kernel_tests/core_rnn_cell_test.py |  31 ++--
 .../rnn/python/kernel_tests/rnn_cell_test.py  |  63 ++++-----
 tensorflow/contrib/rnn/python/ops/rnn_cell.py |  98 +++++++------
 .../seq2seq/python/ops/attention_wrapper.py   |   7 +-
 tensorflow/contrib/verbs/rdma.cc              |  61 ++++----
 .../api_def/base_api/api_def_UniqueV2.pbtxt   |  47 +++++++
 .../base_api/api_def_UnsortedSegmentSum.pbtxt |   2 +
 tensorflow/core/graph/graph.h                 |   2 +-
 tensorflow/core/graph/graph_test.cc           |   8 +-
 tensorflow/core/kernels/BUILD                 |   1 +
 tensorflow/core/kernels/bincount_op.cc        |   5 +-
 tensorflow/core/kernels/bincount_op.h         |   2 +-
 tensorflow/core/kernels/bincount_op_gpu.cu.cc |   6 +-
 tensorflow/core/kernels/bincount_op_test.cc   |   4 +-
 .../core/kernels/bucketize_op_gpu.cu.cc       |   8 +-
 tensorflow/core/kernels/conv_grad_ops_3d.cc   |  24 ++--
 tensorflow/core/kernels/cwise_op_asinh.cc     |   2 +-
 tensorflow/core/kernels/cwise_op_atanh.cc     |   2 +-
 .../core/kernels/depthwise_conv_grad_op.cc    |   9 +-
 tensorflow/core/kernels/depthwise_conv_op.cc  |   5 +
 tensorflow/core/kernels/depthwise_conv_op.h   |   3 +-
 tensorflow/core/kernels/maxpooling_op.cc      |  14 +-
 .../core/kernels/maxpooling_op_gpu.cu.cc      |  16 +--
 tensorflow/core/kernels/mkl_tfconv_op.h       |  20 +--
 tensorflow/core/kernels/ops_util.h            |  13 ++
 tensorflow/core/platform/posix/error.cc       |   4 +-
 tensorflow/core/platform/posix/port.cc        |   8 +-
 tensorflow/core/util/cuda_kernel_helper.h     |  12 ++
 tensorflow/core/util/mkl_util.h               | 132 +++++++++---------
 tensorflow/core/util/mkl_util_test.cc         |   1 -
 .../test/java/org/tensorflow/ShapeTest.java   |   6 +-
 .../python/estimator/inputs/numpy_io.py       |  17 ++-
 .../python/estimator/inputs/numpy_io_test.py  |  11 +-
 tensorflow/python/framework/test_util.py      |   7 +-
 .../python/kernel_tests/array_ops_test.py     |  11 +-
 .../python/kernel_tests/bincount_op_test.py   |   7 +-
 .../python/kernel_tests/constant_op_test.py   |  17 ++-
 tensorflow/python/kernel_tests/conv1d_test.py |   2 +-
 .../python/kernel_tests/conv_ops_3d_test.py   | 120 +++++++---------
 .../python/kernel_tests/pooling_ops_test.py   |   3 +-
 .../python/kernel_tests/reader_ops_test.py    |  15 +-
 .../segment_reduction_ops_test.py             |   2 +-
 .../python/kernel_tests/unique_op_test.py     |   1 +
 tensorflow/python/layers/normalization.py     |  51 +++----
 .../python/layers/normalization_test.py       |  40 +++---
 tensorflow/python/ops/array_ops.py            |  19 +--
 .../python/ops/distributions/multinomial.py   |  25 ++--
 tensorflow/python/ops/image_ops_impl.py       |   5 +-
 tensorflow/python/ops/linalg_ops.py           |  24 ++--
 tensorflow/python/ops/metrics_impl.py         |   7 +-
 tensorflow/python/ops/nn_impl.py              |   1 +
 tensorflow/python/ops/nn_ops.py               |  59 ++++----
 tensorflow/stream_executor/dnn.cc             |   1 +
 third_party/sycl/crosstool/trisycl.tpl        |  60 ++++----
 third_party/sycl/sycl_configure.bzl           |   1 -
 69 files changed, 733 insertions(+), 644 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt

diff --git a/configure.py b/configure.py
index 26da09bd94..1f205861f1 100644
--- a/configure.py
+++ b/configure.py
@@ -883,27 +883,28 @@ def set_computecpp_toolkit_path(environ_cp):
   write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
                               computecpp_toolkit_path)
 
+
 def set_trisycl_include_dir(environ_cp):
-  """Set TRISYCL_INCLUDE_DIR"""
+  """Set TRISYCL_INCLUDE_DIR."""
   ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
                              'include directory. (Use --config=sycl_trisycl '
                              'when building with Bazel) '
-                             '[Default is %s]: '
-                             ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
+                             '[Default is %s]: ') % (
+                                 _DEFAULT_TRISYCL_INCLUDE_DIR)
   while True:
     trisycl_include_dir = get_from_env_or_user_or_default(
-      environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
-      _DEFAULT_TRISYCL_INCLUDE_DIR)
+        environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
+        _DEFAULT_TRISYCL_INCLUDE_DIR)
     if os.path.exists(trisycl_include_dir):
       break
 
-    print('Invalid triSYCL include directory, %s cannot be found'
-          % (trisycl_include_dir))
+    print('Invalid triSYCL include directory, %s cannot be found' %
+          (trisycl_include_dir))
 
   # Set TRISYCL_INCLUDE_DIR
   environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
-  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
-                              trisycl_include_dir)
+  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
+
 
 def set_mpi_home(environ_cp):
   """Set MPI_HOME."""
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index 1e22b760b8..6c385af3b3 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -152,7 +152,7 @@ def tf_library(name, graph, config,
            " --target_triple=" + target_llvm_triple() +
            " --out_header=$(@D)/" + header_file +
            " --out_object=$(@D)/" + object_file +
-           flags),
+           " " + flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -189,7 +189,7 @@ def tf_library(name, graph, config,
            " --cpp_class=" + cpp_class +
            " --target_triple=" + target_llvm_triple() +
            " --out_session_module=$(@D)/" + session_module_pb +
-           flags),
+           " " + flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py
index a773b5a947..00a9c9a65b 100644
--- a/tensorflow/compiler/tests/fused_batchnorm_test.py
+++ b/tensorflow/compiler/tests/fused_batchnorm_test.py
@@ -76,7 +76,8 @@ class FusedBatchNormTest(XLATestCase):
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
       scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
-      offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset")
+      offset = array_ops.placeholder(
+          np.float32, shape=scale_shape, name="offset")
       epsilon = 0.001
       y_ref, mean_ref, var_ref = self._reference_training(
           x_val, scale_val, offset_val, epsilon, data_format)
@@ -112,7 +113,8 @@ class FusedBatchNormTest(XLATestCase):
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
       scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
-      offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset")
+      offset = array_ops.placeholder(
+          np.float32, shape=scale_shape, name="offset")
       epsilon = 0.001
       y, mean, var = nn.fused_batch_norm(
           t_val,
diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt
index 25ada5ba27..aba356d616 100644
--- a/tensorflow/contrib/android/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/android/cmake/CMakeLists.txt
@@ -37,7 +37,7 @@ set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \
                      -std=c++11 -fno-rtti -fno-exceptions \
                      -O2 -Wno-narrowing -fomit-frame-pointer \
-                     -mfpu=neon -mfloat-abi=softfp -fPIE \
+                     -mfpu=neon -mfloat-abi=softfp -fPIE -fPIC \
                      -ftemplate-depth=900 \
                      -DGOOGLE_PROTOBUF_NO_RTTI \
                      -DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER")
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
index 7f7697357c..73747db31c 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
@@ -41,6 +41,7 @@ def try_import(name):  # pylint: disable=invalid-name
     tf_logging.warning("Could not import %s: %s" % (name, str(e)))
   return module
 
+
 stats = try_import("scipy.stats")
 
 
@@ -62,9 +63,9 @@ class CauchyTest(test.TestCase):
       self.assertAllEqual(expected, scale_shape.eval())
       loc = array_ops.zeros(loc_shape)
       scale = array_ops.ones(scale_shape)
-      self.assertAllEqual(
-          expected,
-          array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval())
+      self.assertAllEqual(expected,
+                          array_ops.shape(
+                              cauchy_lib.Cauchy(loc, scale).sample()).eval())
 
   def _testParamStaticShapes(self, sample_shape, expected):
     param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape)
@@ -92,8 +93,7 @@ class CauchyTest(test.TestCase):
       cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
 
       log_pdf = cauchy.log_prob(x)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
-                          log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape)
       self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
                           log_pdf.eval().shape)
       self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
@@ -115,16 +115,15 @@ class CauchyTest(test.TestCase):
     with self.test_session():
       batch_size = 6
       loc = constant_op.constant([[3.0, -3.0]] * batch_size)
-      scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] *
-                                   batch_size)
+      scale = constant_op.constant(
+          [[np.sqrt(10.0), np.sqrt(15.0)]] * batch_size)
       x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T
       cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
 
       log_pdf = cauchy.log_prob(x)
       log_pdf_values = log_pdf.eval()
       self.assertEqual(log_pdf.shape, (6, 2))
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
-                          log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape)
       self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
                           log_pdf.eval().shape)
       self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
@@ -248,8 +247,7 @@ class CauchyTest(test.TestCase):
       cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
 
       entropy = cauchy.entropy()
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
-                          entropy.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.shape)
       self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
                           entropy.eval().shape)
       self.assertAllEqual(cauchy.batch_shape, entropy.shape)
@@ -257,7 +255,7 @@ class CauchyTest(test.TestCase):
 
       if not stats:
         return
-      expected_entropy = stats.cauchy(loc, scale).entropy()
+      expected_entropy = stats.cauchy(loc, scale[0]).entropy().reshape((1, 3))
       self.assertAllClose(expected_entropy, entropy.eval())
 
   def testCauchyMode(self):
@@ -368,8 +366,8 @@ class CauchyTest(test.TestCase):
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
 
-      expected_shape = (tensor_shape.TensorShape(
-          [n.eval()]).concatenate(cauchy.batch_shape))
+      expected_shape = (
+          tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape))
 
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
@@ -385,18 +383,18 @@ class CauchyTest(test.TestCase):
       samples = cauchy.sample(n)
       sample_values = samples.eval()
       self.assertEqual(samples.shape, (100000, batch_size, 2))
-      self.assertAllClose(np.median(sample_values[:, 0, 0]),
-                          loc_v[0], atol=1e-1)
-      self.assertAllClose(np.median(sample_values[:, 0, 1]),
-                          loc_v[1], atol=1e-1)
+      self.assertAllClose(
+          np.median(sample_values[:, 0, 0]), loc_v[0], atol=1e-1)
+      self.assertAllClose(
+          np.median(sample_values[:, 0, 1]), loc_v[1], atol=1e-1)
 
       expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
           tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval()))
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
 
-      expected_shape = (tensor_shape.TensorShape(
-          [n.eval()]).concatenate(cauchy.batch_shape))
+      expected_shape = (
+          tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape))
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
 
@@ -428,9 +426,12 @@ class CauchyTest(test.TestCase):
       self.assertEqual(cauchy.event_shape, ())
       self.assertAllEqual(cauchy.event_shape_tensor().eval(), [])
       self.assertAllEqual(
-          sess.run(cauchy.batch_shape_tensor(),
-                   feed_dict={loc: 5.0,
-                              scale: [1.0, 2.0]}), [2])
+          sess.run(
+              cauchy.batch_shape_tensor(),
+              feed_dict={
+                  loc: 5.0,
+                  scale: [1.0, 2.0]
+              }), [2])
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py
index a17bb091f6..8d59c1abfb 100644
--- a/tensorflow/contrib/distributions/python/ops/cauchy.py
+++ b/tensorflow/contrib/distributions/python/ops/cauchy.py
@@ -30,7 +30,6 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 
-
 __all__ = [
     "Cauchy",
 ]
@@ -97,7 +96,7 @@ class Cauchy(distribution.Distribution):
                validate_args=False,
                allow_nan_stats=True,
                name="Cauchy"):
-    """Construct Cauchy distributions with loc and and scale `loc` and `scale`.
+    """Construct Cauchy distributions.
 
     The parameters `loc` and `scale` must be shaped in a way that supports
     broadcasting (e.g. `loc + scale` is a valid operation).
@@ -121,8 +120,8 @@ class Cauchy(distribution.Distribution):
     """
     parameters = locals()
     with ops.name_scope(name, values=[loc, scale]):
-      with ops.control_dependencies([check_ops.assert_positive(scale)] if
-                                    validate_args else []):
+      with ops.control_dependencies([check_ops.assert_positive(scale)]
+                                    if validate_args else []):
         self._loc = array_ops.identity(loc, name="loc")
         self._scale = array_ops.identity(scale, name="scale")
         check_ops.assert_same_float_dtype([self._loc, self._scale])
@@ -138,8 +137,8 @@ class Cauchy(distribution.Distribution):
   @staticmethod
   def _param_shapes(sample_shape):
     return dict(
-        zip(("loc", "scale"), ([ops.convert_to_tensor(
-            sample_shape, dtype=dtypes.int32)] * 2)))
+        zip(("loc", "scale"),
+            ([ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)] * 2)))
 
   @property
   def loc(self):
@@ -153,13 +152,10 @@ class Cauchy(distribution.Distribution):
 
   def _batch_shape_tensor(self):
     return array_ops.broadcast_dynamic_shape(
-        array_ops.shape(self.loc),
-        array_ops.shape(self.scale))
+        array_ops.shape(self.loc), array_ops.shape(self.scale))
 
   def _batch_shape(self):
-    return array_ops.broadcast_static_shape(
-        self.loc.shape,
-        self.scale.shape)
+    return array_ops.broadcast_static_shape(self.loc.shape, self.scale.shape)
 
   def _event_shape_tensor(self):
     return constant_op.constant([], dtype=dtypes.int32)
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 9378fe8799..f1debc8590 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -309,7 +309,6 @@ def _fused_batch_norm(inputs,
         new_shape = [-1, channels, 1, 1]
       inputs = array_ops.reshape(inputs, new_shape)
     inputs_shape = inputs.get_shape()
-    dtype = inputs.dtype.base_dtype
     if data_format == DATA_FORMAT_NHWC:
       params_shape = inputs_shape[-1:]
     else:
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 5aa2253516..27bd3172d6 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1779,7 +1779,8 @@ class BatchNormTest(test.TestCase):
       dtype = dtypes.float32
     height, width = 3, 3
     with self.test_session():
-      images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype)
+      images = np.random.uniform(size=(5, height, width, 3)).astype(
+          dtype.as_numpy_dtype)
       output = _layers.batch_norm(images, fused=fused)
       expected_name = ('BatchNorm/FusedBatchNorm' if fused else
                        'BatchNorm/batchnorm')
@@ -2665,18 +2666,18 @@ class BatchNormTest(test.TestCase):
     # Test case for 11673
     with self.test_session() as sess:
       a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10))
-      b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW',
-                                zero_debias_moving_mean=True)
+      _layers.batch_norm(
+          a_32, center=False, data_format='NCHW', zero_debias_moving_mean=True)
       a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10))
-      b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW',
-                                zero_debias_moving_mean=True)
+      _layers.batch_norm(
+          a_16, center=False, data_format='NCHW', zero_debias_moving_mean=True)
       sess.run(variables_lib.global_variables_initializer())
 
   def testVariablesAreFloat32(self):
     height, width = 3, 3
     with self.test_session():
-      images = random_ops.random_uniform((5, height, width, 3),
-                                         seed=1, dtype=dtypes.float16)
+      images = random_ops.random_uniform(
+          (5, height, width, 3), seed=1, dtype=dtypes.float16)
       _layers.batch_norm(images, scale=True)
       beta = variables.get_variables_by_name('beta')[0]
       gamma = variables.get_variables_by_name('gamma')[0]
@@ -2691,17 +2692,13 @@ class BatchNormTest(test.TestCase):
     channels = shape[1]
     images = np.arange(np.product(shape), dtype=dtype).reshape(shape)
     beta = init_ops.constant_initializer(
-        np.arange(
-            2, channels + 2, dtype=np.float32))
+        np.arange(2, channels + 2, dtype=np.float32))
     gamma = init_ops.constant_initializer(
-        np.arange(
-            10, channels + 10, dtype=np.float32) * 2.0)
+        np.arange(10, channels + 10, dtype=np.float32) * 2.0)
     mean = init_ops.constant_initializer(
-        np.arange(
-            3, channels + 3, dtype=np.float32) * 5.0)
+        np.arange(3, channels + 3, dtype=np.float32) * 5.0)
     variance = init_ops.constant_initializer(
-        np.arange(
-            1, channels + 1, dtype=np.float32) * 4.0)
+        np.arange(1, channels + 1, dtype=np.float32) * 4.0)
     output = _layers.batch_norm(
         images,
         fused=True,
@@ -2726,7 +2723,6 @@ class BatchNormTest(test.TestCase):
       res_16 = self._runFusedBatchNorm(shape, np.float16)
       self.assertAllClose(res_32, res_16, rtol=1e-3)
 
-
   def testAdjustmentCreated(self):
     # Tests that the adjustment is appropriately passed to and used by the core
     # BN layer.
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
index db18ebf05d..86fad4c553 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
@@ -28,7 +28,6 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging
@@ -369,10 +368,11 @@ class DataFeeder(object):
     if x_is_dict:
       num_samples = list(self._x.values())[0].shape[0]
     elif tensor_util.is_tensor(self._x):
-      num_samples = self._x.shape[0].value  # shape will be a Dimension, extract an int
+      num_samples = self._x.shape[
+          0].value  # shape will be a Dimension, extract an int
     else:
       num_samples = self._x.shape[0]
-      
+
     if self._shuffle:
       self.indices = self.random_state.permutation(num_samples)
     else:
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index 86d8484391..7526f3ae0d 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -251,8 +251,9 @@ class SdcaModel(object):
 
       result_dense = 0.0
       for i in range(len(dense_variables)):
-        result_dense += math_ops.matmul(
-            dense_features[i], array_ops.expand_dims(dense_variables[i], -1))
+        result_dense += math_ops.matmul(dense_features[i],
+                                        array_ops.expand_dims(
+                                            dense_variables[i], -1))
 
     # Reshaping to allow shape inference at graph construction time.
     return array_ops.reshape(result_dense, [-1]) + result_sparse
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index b122818221..5bca82ded0 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -40,6 +40,7 @@ from six import StringIO
 # TODO(aselle): Disable GPU for now
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 
+# pylint: disable=g-import-not-at-top
 import tensorflow as tf
 from google.protobuf import text_format
 # TODO(aselle): switch to TensorFlow's resource_loader
@@ -383,7 +384,7 @@ def make_zip_of_tests(zip_path,
         report["toco_log"] = ""
         tf.reset_default_graph()
 
-        with tf.device('/cpu:0'):
+        with tf.device("/cpu:0"):
           try:
             inputs, outputs = make_graph(param_dict_real)
           except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError,
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 4c60c99342..04643a6058 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -34,12 +34,18 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 
 _allowed_symbols = [
-    'PowerSignOptimizer', 'AddSignOptimizer'
+    'PowerSignOptimizer',
+    'AddSignOptimizer'
     'DelayCompensatedGradientDescentOptimizer',
-    'DropStaleGradientOptimizer', 'ExternalOptimizerInterface',
-    'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer',
-    'ScipyOptimizerInterface', 'VariableClippingOptimizer',
-    'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm',
+    'DropStaleGradientOptimizer',
+    'ExternalOptimizerInterface',
+    'LazyAdamOptimizer',
+    'NadamOptimizer',
+    'MovingAverageOptimizer',
+    'ScipyOptimizerInterface',
+    'VariableClippingOptimizer',
+    'MultitaskOptimizerWrapper',
+    'clip_gradients_by_global_norm',
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
index c26037935d..cb6c77a86f 100644
--- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
+++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
@@ -12,9 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
-"""An optimizer wrapper that ensures correct behaviour
-of stateful optimizers with multitask loss."""
+"""An optimizer wrapper for stateful optimizers with multitask loss."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -30,26 +28,27 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import optimizer
 
-__all__ = ["MultitaskOptimizerWrapper",
-           "clip_gradients_by_global_norm"]
+__all__ = ['MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm']
+
 
 def _is_all_zeros(grad):
   all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0)
   return all_zeros
 
+
 def _get_wrapper(fn, opt):
+
   def wrapper(self, grad, *args, **kwargs):  # pylint: disable=unused-argument
     all_zeros = _is_all_zeros(grad)
-    return control_flow_ops.cond(
-        all_zeros,
-        control_flow_ops.no_op,
-        lambda: fn(grad, *args, **kwargs))
+    return control_flow_ops.cond(all_zeros, control_flow_ops.no_op,
+                                 lambda: fn(grad, *args, **kwargs))
+
   wrapper = types.MethodType(wrapper, opt)
   return wrapper
 
+
 class MultitaskOptimizerWrapper(object):
-  """Optimizer wrapper that ensures that
-  all-zero gradients don't affect the optimizer state.
+  """Optimizer wrapper making all-zero gradients harmless.
 
   This might be useful when a multi-task loss is used,
   and some components of the loss might be
@@ -88,20 +87,20 @@ class MultitaskOptimizerWrapper(object):
     gradvars_clipped, global_step=batch)
   ```
   """
+
   def __init__(self, opt):
-    """
+    """Constructor.
+
     Args:
-    opt: an instance of a class that implements tf.train.Optimizer.
+      opt: an instance of a class that implements tf.train.Optimizer.
     """
     if not isinstance(opt, optimizer.Optimizer):
       raise TypeError(
-          "Supplied optimizer must be an instance of tf.train.Optimizer")
+          'Supplied optimizer must be an instance of tf.train.Optimizer')
     self._opt = opt
-    overriden_methods = ('_apply_dense',
-                         '_resource_apply_dense',
-                         '_apply_sparse',
-                         '_resource_apply_sparse')
-    for name in overriden_methods:
+    overridden_methods = ('_apply_dense', '_resource_apply_dense',
+                          '_apply_sparse', '_resource_apply_sparse')
+    for name in overridden_methods:
       fn = getattr(self._opt, name)
       wrapper = _get_wrapper(fn, self._opt)
       setattr(self._opt, name, wrapper)
@@ -112,27 +111,30 @@ class MultitaskOptimizerWrapper(object):
 
 def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.):
   """Clips gradients of a multitask loss by their global norm.
+
   Ignores all-zero tensors when computing the global norm.
 
   Args:
-  gradients_variables: a list of pairs (gradient, variable).
-  clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.
+    gradients_variables: a list of pairs (gradient, variable).
+    clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.
 
   Returns:
-  list: A list of pairs of the same type as gradients_variables,.
-  fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
+    list: A list of pairs of the same type as gradients_variables,.
+    fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
   """
   gradients, variables = six.moves.zip(*gradients_variables)
+
   def _replace_nonexisting_grad(grad):
     if grad is None:
       return grad
     all_zeros = _is_all_zeros(grad)
-    return control_flow_ops.cond(all_zeros,
-                                 lambda: array_ops.zeros(
-                                     [], dtype=dtypes.as_dtype(grad.dtype)),
-                                 lambda: grad)
+    return control_flow_ops.cond(
+        all_zeros,
+        lambda: array_ops.zeros([], dtype=dtypes.as_dtype(grad.dtype)),
+        lambda: grad)
+
   nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients]
   fixed_global_norm = clip_ops.global_norm(nonzero_gradients)
-  gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm,
-                                              use_norm=fixed_global_norm)
+  gradients, _ = clip_ops.clip_by_global_norm(
+      gradients, clip_norm, use_norm=fixed_global_norm)
   return list(six.moves.zip(gradients, variables)), fixed_global_norm
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
index b06213f715..618d8eb18d 100644
--- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
+++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
@@ -18,6 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+import six
+
 from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -25,13 +28,11 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.training import momentum
 
-import numpy as np
-import six
 
 class MultitaskOptimizerWrapperTest(test.TestCase):
+  """Tests for the multitask optimizer wrapper.
   """
-  Tests for the multitask optimizer wrapper.
-  """
+
   def testWrapper(self):
     with self.test_session():
       var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32)
@@ -39,12 +40,10 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32)
       grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32)
       grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32)
-      mom_opt_impl = momentum.MomentumOptimizer(
-          learning_rate=2.0, momentum=0.9)
+      mom_opt_impl = momentum.MomentumOptimizer(learning_rate=2.0, momentum=0.9)
       mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper(
           mom_opt_impl)
-      mom_update = mom_opt.apply_gradients(
-          zip([grads0, grads1], [var0, var1]))
+      mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
       mom_update_partial = mom_opt.apply_gradients(
           zip([grads_allzero, grads1], [var0, var1]))
       mom_update_no_action = mom_opt.apply_gradients(
@@ -63,14 +62,13 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       # Step 1: normal momentum update.
       self.evaluate(mom_update)
       # Check that the momentum accumulators have been updated.
-      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
-                                         self.evaluate(slot0))
-      self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
-                                         self.evaluate(slot1))
+      self.assertAllCloseAccordingToType(
+          np.array([0.1, 0.1]), self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([0.01, 0.01]), self.evaluate(slot1))
       # Check that the parameters have been updated.
       self.assertAllCloseAccordingToType(
-          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
-          self.evaluate(var0))
+          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0))
       self.assertAllCloseAccordingToType(
           np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
           self.evaluate(var1))
@@ -78,8 +76,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       # Step 2: momentum update that changes only slot1 but not slot0.
       self.evaluate(mom_update_partial)
       # Check that only the relevant momentum accumulator has been updated.
-      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
-                                         self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([0.1, 0.1]), self.evaluate(slot0))
       self.assertAllCloseAccordingToType(
           np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
           self.evaluate(slot1))
@@ -87,8 +85,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       # Step 3: momentum update that does not change anything.
       self.evaluate(mom_update_no_action)
       # Check that the momentum accumulators have *NOT* been updated.
-      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
-                                         self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([0.1, 0.1]), self.evaluate(slot0))
       self.assertAllCloseAccordingToType(
           np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
           self.evaluate(slot1))
@@ -105,8 +103,9 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       grads3 = None
       varlist = [var0, var1, var2, var3]
       gradients = [grads0, grads1, grads2, grads3]
-      clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm(
-          six.moves.zip(gradients, varlist), clip_norm=1.0)
+      clipped_gradvars, global_norm = (
+          multitask_optimizer_wrapper.clip_gradients_by_global_norm(
+              six.moves.zip(gradients, varlist), clip_norm=1.0))
       clipped_grads = list(six.moves.zip(*clipped_gradvars))[0]
       reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0])))
       self.assertAllCloseAccordingToType(
@@ -115,5 +114,6 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
           self.evaluate(clipped_grads[2]), np.array([0., 0.]))
       self.assertEqual(clipped_grads[3], None)
 
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index 16b6d145e3..f130a2187c 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -24,6 +24,7 @@ import numpy as np
 
 from tensorflow.contrib import rnn as contrib_rnn
 from tensorflow.contrib.rnn.python.ops import core_rnn_cell
+from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -38,9 +39,6 @@ from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
-from tensorflow.python.framework import test_util
-from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
-
 
 
 # pylint: enable=protected-access
@@ -374,19 +372,20 @@ class RNNCellTest(test.TestCase):
         h = array_ops.zeros([batch_size, num_proj])
         state = rnn_cell_impl.LSTMStateTuple(c, h)
         cell = contrib_rnn_cell.LayerNormLSTMCell(
-          num_units=num_units,
-          num_proj=num_proj,
-          forget_bias=1.0,
-          layer_norm=True,
-          norm_gain=1.0,
-          norm_shift=0.0)
+            num_units=num_units,
+            num_proj=num_proj,
+            forget_bias=1.0,
+            layer_norm=True,
+            norm_gain=1.0,
+            norm_shift=0.0)
         g, out_m = cell(x, state)
         sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g, out_m], {
-          x.name: np.ones((batch_size, input_size)),
-          c.name: 0.1 * np.ones((batch_size, num_units)),
-          h.name: 0.1 * np.ones((batch_size, num_proj))
-        })
+        res = sess.run(
+            [g, out_m], {
+                x.name: np.ones((batch_size, input_size)),
+                c.name: 0.1 * np.ones((batch_size, num_units)),
+                h.name: 0.1 * np.ones((batch_size, num_proj))
+            })
         self.assertEqual(len(res), 2)
         # The numbers in results were not calculated, this is mostly just a
         # smoke test.
@@ -396,9 +395,9 @@ class RNNCellTest(test.TestCase):
         # Different inputs so different outputs and states
         for i in range(1, batch_size):
           self.assertTrue(
-            float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
+              float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
           self.assertTrue(
-            float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
+              float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
 
   def testOutputProjectionWrapper(self):
     with self.test_session() as sess:
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index b4a5f2d7eb..46823fa364 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -996,26 +996,19 @@ class RNNCellTest(test.TestCase):
         output, state = cell(x, hidden)
 
         sess.run([variables.global_variables_initializer()])
-        res = sess.run([output, state], {
-            hidden[0].name:
-                np.array([[[[[1.],[1.]], 
-                            [[1.],[1.]]],
-                           [[[1.],[1.]],
-                            [[1.],[1.]]]], 
-                          [[[[2.],[2.]],
-                            [[2.],[2.]]],
-                           [[[2.],[2.]],
-                            [[2.],[2.]]]]]),
-            x.name:
-                np.array([[[[[1.],[1.]],
-                            [[1.],[1.]]],
-                           [[[1.],[1.]],
-                            [[1.],[1.]]]],
-                          [[[[2.],[2.]],
-                            [[2.],[2.]]],
-                           [[[2.],[2.]],
-                           [[2.],[2.]]]]])
-        })
+        res = sess.run(
+            [output, state], {
+                hidden[0].name:
+                    np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[
+                        1.
+                    ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]],
+                                 [[[2.], [2.]], [[2.], [2.]]]]]),
+                x.name:
+                    np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[
+                        1.
+                    ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], [[[2.], [2.]],
+                                                                [[2.], [2.]]]]])
+            })
         # This is a smoke test, making sure expected values are unchanged.
         self.assertEqual(len(res), 2)
         self.assertAllClose(res[0], res[1].h)
@@ -1276,10 +1269,8 @@ class LayerNormBasicLSTMCellTest(test.TestCase):
         self.assertAllClose(res[2].c, expected_c1, 1e-5)
         self.assertAllClose(res[2].h, expected_h1, 1e-5)
 
-
   def testBasicLSTMCellWithStateTupleLayerNorm(self):
-    """The results of LSTMCell and LayerNormBasicLSTMCell 
-    should be same. """
+    """The results of LSTMCell and LayerNormBasicLSTMCell should be the same."""
     with self.test_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
@@ -1290,21 +1281,21 @@ class LayerNormBasicLSTMCellTest(test.TestCase):
         c1 = array_ops.zeros([1, 2])
         h1 = array_ops.zeros([1, 2])
         state1 = rnn_cell_impl.LSTMStateTuple(c1, h1)
-        cell = rnn_cell_impl.MultiRNNCell(
-          [contrib_rnn_cell.LayerNormLSTMCell(
-              2,
-              layer_norm=True,
-              norm_gain=1.0,
-              norm_shift=0.0) for _ in range(2)])
+        cell = rnn_cell_impl.MultiRNNCell([
+            contrib_rnn_cell.LayerNormLSTMCell(
+                2, layer_norm=True, norm_gain=1.0, norm_shift=0.0)
+            for _ in range(2)
+        ])
         h, (s0, s1) = cell(x, (state0, state1))
         sess.run([variables.global_variables_initializer()])
-        res = sess.run([h, s0, s1], {
-          x.name: np.array([[1., 1.]]),
-          c0.name: 0.1 * np.asarray([[0, 1]]),
-          h0.name: 0.1 * np.asarray([[2, 3]]),
-          c1.name: 0.1 * np.asarray([[4, 5]]),
-          h1.name: 0.1 * np.asarray([[6, 7]]),
-        })
+        res = sess.run(
+            [h, s0, s1], {
+                x.name: np.array([[1., 1.]]),
+                c0.name: 0.1 * np.asarray([[0, 1]]),
+                h0.name: 0.1 * np.asarray([[2, 3]]),
+                c1.name: 0.1 * np.asarray([[4, 5]]),
+                h1.name: 0.1 * np.asarray([[6, 7]]),
+            })
 
         expected_h = np.array([[-0.38079708, 0.38079708]])
         expected_h0 = np.array([[-0.38079708, 0.38079708]])
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 5e85c125df..0698d40438 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -36,7 +36,6 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope as vs
-from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
@@ -115,7 +114,7 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
 
   The class uses optional peep-hole connections, and an optional projection
   layer.
-  
+
   Layer normalization implementation is based on:
 
     https://arxiv.org/abs/1607.06450.
@@ -124,15 +123,24 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
   Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
 
   and is applied before the internal nonlinearities.
-  
+
   """
 
-  def __init__(self, num_units, use_peepholes=False,
-               initializer=None, num_proj=None, proj_clip=None,
-               num_unit_shards=1, num_proj_shards=1,
-               forget_bias=1.0, state_is_tuple=True,
-               activation=math_ops.tanh, reuse=None,
-               layer_norm=False, norm_gain=1.0, norm_shift=0.0):
+  def __init__(self,
+               num_units,
+               use_peepholes=False,
+               initializer=None,
+               num_proj=None,
+               proj_clip=None,
+               num_unit_shards=1,
+               num_proj_shards=1,
+               forget_bias=1.0,
+               state_is_tuple=True,
+               activation=math_ops.tanh,
+               reuse=None,
+               layer_norm=False,
+               norm_gain=1.0,
+               norm_shift=0.0):
     """Initialize the parameters for an LSTM cell.
 
     Args:
@@ -164,8 +172,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
         `layer_norm` has been set to `False`, this argument will be ignored.
       norm_shift: float, The layer normalization shift initial value. If
         `layer_norm` has been set to `False`, this argument will be ignored.
-        
-        
     """
     super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse)
     if not state_is_tuple:
@@ -2049,8 +2055,8 @@ class ConvLSTMCell(rnn_cell_impl.RNNCell):
     if self._skip_connection:
       self._total_output_channels += self._input_shape[-1]
 
-    state_size = tensor_shape.TensorShape(self._input_shape[:-1] 
-                                          + [self._output_channels])
+    state_size = tensor_shape.TensorShape(
+        self._input_shape[:-1] + [self._output_channels])
     self._state_size = rnn_cell_impl.LSTMStateTuple(state_size, state_size)
     self._output_size = tensor_shape.TensorShape(self._input_shape[:-1]
                                                  + [self._total_output_channels])
@@ -2110,11 +2116,8 @@ class Conv3DLSTMCell(ConvLSTMCell):
     """Construct Conv3DLSTM. See `ConvLSTMCell` for more details."""
     super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs)
 
-def _conv(args, 
-          filter_size,
-          num_features,
-          bias,
-          bias_start=0.0):
+
+def _conv(args, filter_size, num_features, bias, bias_start=0.0):
   """convolution:
   Args:
     args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, 
@@ -2391,12 +2394,19 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
 
   """
 
-  def __init__(self, num_units,
-               use_peepholes=False, cell_clip=None,
-               initializer=None, num_proj=None, proj_clip=None,
+  def __init__(self,
+               num_units,
+               use_peepholes=False,
+               cell_clip=None,
+               initializer=None,
+               num_proj=None,
+               proj_clip=None,
                forget_bias=1.0,
-               activation=None, layer_norm=False,
-               norm_gain=1.0, norm_shift=0.0, reuse=None):
+               activation=None,
+               layer_norm=False,
+               norm_gain=1.0,
+               norm_shift=0.0,
+               reuse=None):
     """Initialize the parameters for an LSTM cell.
 
     Args:
@@ -2457,7 +2467,6 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
   def output_size(self):
     return self._output_size
 
-
   def _linear(self,
               args,
               output_size,
@@ -2507,9 +2516,9 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
     scope = vs.get_variable_scope()
     with vs.variable_scope(scope) as outer_scope:
       weights = vs.get_variable(
-        "kernel", [total_arg_size, output_size],
-        dtype=dtype,
-        initializer=kernel_initializer)
+          "kernel", [total_arg_size, output_size],
+          dtype=dtype,
+          initializer=kernel_initializer)
       if len(args) == 1:
         res = math_ops.matmul(args[0], weights)
       else:
@@ -2521,9 +2530,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
         if bias_initializer is None:
           bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
         biases = vs.get_variable(
-          "bias", [output_size],
-          dtype=dtype,
-          initializer=bias_initializer)
+            "bias", [output_size], dtype=dtype, initializer=bias_initializer)
 
     if not layer_norm:
       res = nn_ops.bias_add(res, biases)
@@ -2554,7 +2561,6 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
       ValueError: If input size cannot be inferred from inputs via
         static shape inference.
     """
-    num_proj = self._num_units if self._num_proj is None else self._num_proj
     sigmoid = math_ops.sigmoid
 
     (c_prev, m_prev) = state
@@ -2567,10 +2573,14 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
     with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
 
       # i = input_gate, j = new_input, f = forget_gate, o = output_gate
-      lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True,
-                            bias_initializer=None, layer_norm=self._layer_norm)
+      lstm_matrix = self._linear(
+          [inputs, m_prev],
+          4 * self._num_units,
+          bias=True,
+          bias_initializer=None,
+          layer_norm=self._layer_norm)
       i, j, f, o = array_ops.split(
-        value=lstm_matrix, num_or_size_splits=4, axis=1)
+          value=lstm_matrix, num_or_size_splits=4, axis=1)
 
       if self._layer_norm:
         i = _norm(self._norm_gain, self._norm_shift, i, "input")
@@ -2580,20 +2590,22 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
 
       # Diagonal connections
       if self._use_peepholes:
-        with vs.variable_scope(unit_scope) as projection_scope:
+        with vs.variable_scope(unit_scope):
           w_f_diag = vs.get_variable(
-            "w_f_diag", shape=[self._num_units], dtype=dtype)
+              "w_f_diag", shape=[self._num_units], dtype=dtype)
           w_i_diag = vs.get_variable(
-            "w_i_diag", shape=[self._num_units], dtype=dtype)
+              "w_i_diag", shape=[self._num_units], dtype=dtype)
           w_o_diag = vs.get_variable(
-            "w_o_diag", shape=[self._num_units], dtype=dtype)
+              "w_o_diag", shape=[self._num_units], dtype=dtype)
 
       if self._use_peepholes:
-        c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
-             sigmoid(i + w_i_diag * c_prev) * self._activation(j))
+        c = (
+            sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
+            sigmoid(i + w_i_diag * c_prev) * self._activation(j))
       else:
-        c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
-             self._activation(j))
+        c = (
+            sigmoid(f + self._forget_bias) * c_prev +
+            sigmoid(i) * self._activation(j))
 
       if self._layer_norm:
         c = _norm(self._norm_gain, self._norm_shift, c, "state")
@@ -2608,7 +2620,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
         m = sigmoid(o) * self._activation(c)
 
       if self._num_proj is not None:
-        with vs.variable_scope("projection") as proj_scope:
+        with vs.variable_scope("projection"):
           m = self._linear(m, self._num_proj, bias=False)
 
         if self._proj_clip is not None:
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index c3b180d9f4..e87ef41388 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
@@ -192,7 +192,8 @@ class _BaseAttentionMechanism(AttentionMechanism):
       raise TypeError("probability_fn must be callable, saw type: %s" %
                       type(probability_fn).__name__)
     if score_mask_value is None:
-      score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf)
+      score_mask_value = dtypes.as_dtype(
+          self._memory_layer.dtype).as_numpy_dtype(-np.inf)
     self._probability_fn = lambda score, prev: (  # pylint:disable=g-long-lambda
         probability_fn(
             _maybe_mask_score(score, memory_sequence_length, score_mask_value),
@@ -1145,7 +1146,9 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
             % (len(attention_layer_sizes), len(attention_mechanisms)))
       self._attention_layers = tuple(
           layers_core.Dense(
-              attention_layer_size, name="attention_layer", use_bias=False,
+              attention_layer_size,
+              name="attention_layer",
+              use_bias=False,
               dtype=attention_mechanisms[i].dtype)
           for i, attention_layer_size in enumerate(attention_layer_sizes))
       self._attention_layer_size = sum(attention_layer_sizes)
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index 331943a3ef..ac8d994502 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #ifdef TENSORFLOW_USE_VERBS
 
 #include "tensorflow/contrib/verbs/rdma.h"
-#include <cstdlib>
 #include <fcntl.h>
+#include <cstdlib>
 #include "tensorflow/contrib/verbs/verbs_util.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
@@ -137,7 +137,7 @@ ibv_device* set_device() {
   if (!env_p_rdma_device.empty()) {
     for (device_index = 0; device_index < dev_num; device_index++) {
       if (!env_p_rdma_device.compare(
-               ibv_get_device_name(dev_list[device_index]))) {
+              ibv_get_device_name(dev_list[device_index]))) {
         CHECK(get_dev_active_port_count(dev_list[device_index]) != 0)
             << "Device " << ibv_get_device_name(dev_list[device_index])
             << " has no active ports";
@@ -147,7 +147,7 @@ ibv_device* set_device() {
     // check validity of input device
     CHECK(false) << "The device " << env_p_rdma_device << " wasn't found";
   } else {
-  // set default device
+    // set default device
     str_port_num = get_env_var("RDMA_DEVICE_PORT");
     CHECK(str_port_num.empty())
         << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user";
@@ -177,7 +177,7 @@ ibv_device* set_device() {
 // Returns:
 //   port to use
 uint8_t set_port(ibv_context* context) {
-  uint8_t port_num = 0; //0 is illegal port number
+  uint8_t port_num = 0;  // 0 is illegal port number
   string str_port_num;
   ibv_device_attr device_att;
   ibv_port_attr port_attr;
@@ -199,9 +199,7 @@ uint8_t set_port(ibv_context* context) {
     // check if port id active
     CHECK(port_attr.state == IBV_PORT_ACTIVE)
         << "Selected RDMA_DEVICE_PORT is not active";
-  }
-  // set default port
-  else {
+  } else {  // set default port
     for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) {
       rc = ibv_query_port(context, port_index, &port_attr);
       CHECK(!rc) << "Failed to query the port" << port_index;
@@ -269,7 +267,7 @@ bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num,
 // Function to set GID index.
 // If the port link is IB, no GID index should be selected.
 // If Ethernet but RDMA_GID_INDEX not set gid index that supports
-//   RoCE V2 will be chosen(fails if more then one IP is configured)
+//   RoCE V2 will be chosen(fails if more than one IP is configured)
 // Args:
 //   context - device context
 //   port_num - port number
@@ -302,7 +300,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) {
     }
   }
   switch (port_attr.link_layer) {
-    case(IBV_LINK_LAYER_ETHERNET) :
+    case (IBV_LINK_LAYER_ETHERNET):
       gid_str = get_env_var("RDMA_GID_INDEX");
       if (!gid_str.empty()) {
         gid_index = stoi(gid_str);
@@ -313,7 +311,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) {
             << "More than one IP is available, please specify GID_INDEX";
       }
       break;
-    case(IBV_LINK_LAYER_INFINIBAND) :  // no need in GID index
+    case (IBV_LINK_LAYER_INFINIBAND):  // no need in GID index
       break;
     default:
       LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and "
@@ -374,7 +372,8 @@ enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) {
         break;
       default:
         CHECK(0) << "Error: MTU input value must be one of the following: 256, "
-                    "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n";
+                    "512, 1024, 2048, 4096. MTU "
+                 << mtu << " is invalid\n";
         break;
     }
     CHECK(mtu < port_attr.active_mtu)
@@ -453,9 +452,9 @@ void RdmaAdapter::Process_CQ() {
     CHECK_GE(ne, 0);
     for (int i = 0; i < ne; ++i) {
       CHECK(wc_[i].status == IBV_WC_SUCCESS)
-          << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " "
-          << wc_[i].status << " " << static_cast<int>(wc_[i].wr_id) << " "
-          << wc_[i].vendor_err;
+          << "Failed status \n"
+          << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " "
+          << static_cast<int>(wc_[i].wr_id) << " " << wc_[i].vendor_err;
       if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) {
         RdmaChannel* rc = reinterpret_cast<RdmaChannel*>(wc_[i].wr_id);
         // put back a recv wr.
@@ -611,7 +610,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
   // create message and ack buffers, then initialize the tables.
   {
     const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer",
-                                   "tx_ack_buffer",     "rx_ack_buffer"};
+                                   "tx_ack_buffer", "rx_ack_buffer"};
     tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]);
     rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]);
     tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]);
@@ -672,7 +671,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) {
 void RdmaChannel::Recv() {
   struct ibv_recv_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t) this;
+  wr.wr_id = (uint64_t)this;
   struct ibv_recv_wr* bad_wr;
   CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv";
 }
@@ -826,11 +825,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class;
 
     int r;
-    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV |
-                                              IBV_QP_PATH_MTU |
-                                              IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
-                                              IBV_QP_MAX_DEST_RD_ATOMIC |
-                                              IBV_QP_MIN_RNR_TIMER)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr,
+                              IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU |
+                                  IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
+                                  IBV_QP_MAX_DEST_RD_ATOMIC |
+                                  IBV_QP_MIN_RNR_TIMER)))
         << "QP to Ready to Receive " << r;
 
     memset(&attr, 0, sizeof(ibv_qp_attr));
@@ -841,10 +840,10 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.rnr_retry = 7; /* infinite */
     attr.max_rd_atomic = 1;
 
-    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT |
-                                              IBV_QP_RETRY_CNT |
-                                              IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
-                                              IBV_QP_MAX_QP_RD_ATOMIC)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr,
+                              IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT |
+                                  IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
+                                  IBV_QP_MAX_QP_RD_ATOMIC)))
         << "QP to Ready to Send " << r;
 
     connected_ = true;
@@ -931,7 +930,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) {
 
   struct ibv_send_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t) this;
+  wr.wr_id = (uint64_t)this;
   wr.sg_list = &list;
   wr.num_sge = 1;
   wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
@@ -1026,9 +1025,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
     TensorProto proto;
     if (src_dev->tensorflow_gpu_device_info() &&
         (!send_args.alloc_attrs.on_host())) {
-      CHECK(send_args.device_context) << "send dev name: " << src_dev->name()
-                                      << " gpu_info: "
-                                      << src_dev->tensorflow_gpu_device_info();
+      CHECK(send_args.device_context)
+          << "send dev name: " << src_dev->name()
+          << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
 
       if (can_memcpy) {
         AllocatorAttributes host_alloc_attrs;
@@ -1054,8 +1053,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
         // aync instead
         GPUUtil::SetProtoFromGPU(
             in, src_dev, send_args.device_context, &proto, is_dead,
-	    [this, proto, buffer_size, key, in, step_id, key_with_step_id,
-            is_dead, send_args, recv_args](const Status& s) mutable {
+            [this, proto, buffer_size, key, in, step_id, key_with_step_id,
+             is_dead, send_args, recv_args](const Status& s) mutable {
               CHECK(s.ok()) << "copy proto from gpu sync";
               auto tensor_bytes = proto.ByteSize();
               buffer_size += tensor_bytes;
diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
new file mode 100644
index 0000000000..cd7ec6e551
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
@@ -0,0 +1,47 @@
+op {
+  graph_op_name: "UniqueV2"
+  in_arg {
+    name: "x"
+    description: <<END
+A `Tensor`.
+END
+  }
+  in_arg {
+    name: "axis"
+    description: <<END
+A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
+find the unique elements.
+END
+  }
+  out_arg {
+    name: "y"
+    description: <<END
+A `Tensor`. Unique elements along the `axis` of `Tensor` x.
+END
+  }
+  out_arg {
+    name: "idx"
+    description: <<END
+A 1-D Tensor. Has the same type as x that contains the index of each
+value of x in the output y.
+END
+  }
+  summary: "Finds unique elements in a 1-D tensor."
+  description: <<END
+This operation returns a tensor `y` containing all of the unique elements of `x`
+sorted in the same order that they occur in `x`. This operation also returns a
+tensor `idx` the same size as `x` that contains the index of each value of `x`
+in the unique output `y`. In other words:
+
+`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+
+For example:
+
+```
+# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+y, idx = unique(x)
+y ==> [1, 2, 4, 7, 8]
+idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+```
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
index 0a3355cdbc..77a96d1e03 100644
--- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
@@ -26,6 +26,8 @@ need not be sorted and need not cover all values in the full
 range of valid values.
 
 If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+If the given segment ID `i` is negative, the value is dropped and will not be
+added to the sum of the segment.
 
 `num_segments` should equal the number of distinct segment IDs.
 
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index d0dba6e1f0..223dd12f8f 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -455,7 +455,7 @@ class Graph {
   // the corresponding NodeDef to reflect the change.
   // REQUIRES: The control edge must exist.
   void RemoveControlEdge(const Edge* e);
-  
+
   // Updates the input to a node.  The existing edge to `dst` is removed and an
   // edge from `new_src` to `dst` is created. The NodeDef associated with `dst`
   // is also updated.
diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc
index 2aa1b31e15..e2ce0ba046 100644
--- a/tensorflow/core/graph/graph_test.cc
+++ b/tensorflow/core/graph/graph_test.cc
@@ -118,11 +118,9 @@ class GraphTest : public ::testing::Test {
     LOG(FATAL) << name;
   }
 
-  bool ControlEdgeExistsInGraphOrNodeDef(const Node* src,
-                                         const Node* dst) {
-    for (const Edge *e : dst->in_edges()) {
-      if (e->IsControlEdge() &&
-          e->src() == src &&
+  bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, const Node* dst) {
+    for (const Edge* e : dst->in_edges()) {
+      if (e->IsControlEdge() && e->src() == src &&
           e->src_output() == Graph::kControlSlot &&
           e->dst_input() == Graph::kControlSlot) {
         return true;
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index f1cb9a1860..b4a5a3c796 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1720,6 +1720,7 @@ tf_cuda_cc_tests(
         ":data_flow",
         ":ops_testutil",
         ":ops_util",
+        "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc
index 766d63e3be..890fa3121b 100644
--- a/tensorflow/core/kernels/bincount_op.cc
+++ b/tensorflow/core/kernels/bincount_op.cc
@@ -97,8 +97,9 @@ class BincountOp : public OpKernel {
     const Tensor& weights_t = ctx->input(2);
 
     int32 size = size_tensor.scalar<int32>()();
-    OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument(
-                                    "size (", size, ") must be non-negative"));
+    OP_REQUIRES(
+        ctx, size >= 0,
+        errors::InvalidArgument("size (", size, ") must be non-negative"));
 
     const auto arr = arr_t.flat<int32>();
     const auto weights = weights_t.flat<T>();
diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h
index 0f8dd2b82a..cd3d560cd1 100644
--- a/tensorflow/core/kernels/bincount_op.h
+++ b/tensorflow/core/kernels/bincount_op.h
@@ -16,11 +16,11 @@ limitations under the License.
 #ifndef TENSORFLOW_BINCOUNT_OP_H_
 #define TENSORFLOW_BINCOUNT_OP_H_
 
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
index ae9e26ffdf..6074b3e1f6 100644
--- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
@@ -17,12 +17,12 @@ limitations under the License.
 
 #define EIGEN_USE_GPU
 
-#include "tensorflow/core/kernels/bincount_op.h"
 #include "external/cub_archive/cub/device/device_histogram.cuh"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/bincount_op.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/cuda_kernel_helper.h"
@@ -93,8 +93,8 @@ struct BincountFunctor<GPUDevice, T> {
         /* num_samples */ num_samples,
         /* stream */ stream);
     if (err != cudaSuccess) {
-      return errors::Internal("Could not launch HistogramEven: ",
-                              cudaGetErrorString(err), ".");
+      return errors::Internal(
+          "Could not launch HistogramEven: ", cudaGetErrorString(err), ".");
     }
     return Status::OK();
   }
diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc
index 14becc87a7..cb04b40637 100644
--- a/tensorflow/core/kernels/bincount_op_test.cc
+++ b/tensorflow/core/kernels/bincount_op_test.cc
@@ -30,8 +30,8 @@ static Graph* Bincount(int arr_size, int nbins) {
   Tensor arr(DT_INT32, TensorShape({arr_size}));
   arr.flat<int32>() = arr.flat<int32>().setRandom().abs();
 
-  Tensor size(DT_INT32, TensorShape({(int32)1}));
-  size.flat<int32>()(0) = (int32)nbins;
+  Tensor size(DT_INT32, TensorShape({static_cast<int32>(1)}));
+  size.flat<int32>()(0) = static_cast<int32>(nbins);
 
   Tensor weights(DT_INT32, TensorShape({0}));
 
diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
index aafbbe41b4..325dee793b 100644
--- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
@@ -77,10 +77,10 @@ struct BucketizeFunctor<GPUDevice, T> {
     TF_RETURN_IF_ERROR(boundaries_array.Finalize());
 
     CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d);
-    BucketizeCustomKernel<
-        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-        input.size(), input.data(), boundaries_vector.size(),
-        boundaries_array.data(), output.data());
+    BucketizeCustomKernel<T>
+        <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+            input.size(), input.data(), boundaries_vector.size(),
+            boundaries_array.data(), output.data());
 
     return Status::OK();
   }
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index f819fccbfb..c2d24d1f12 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -1101,29 +1101,27 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
   bool cudnn_use_autotune_;
 };
 
-
-
 #define REGISTER_GPU_KERNEL(T)                                                \
   REGISTER_KERNEL_BUILDER(                                                    \
       Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint<T>("T"),  \
       Conv3DBackpropInputOp<GPUDevice, T>);                                   \
   REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2")                       \
-                            .Device(DEVICE_GPU)                               \
-                            .TypeConstraint<T>("T")                           \
-                            .HostMemory("input_sizes"),                       \
-                        Conv3DBackpropInputOp<GPUDevice, T>);                 \
+                              .Device(DEVICE_GPU)                             \
+                              .TypeConstraint<T>("T")                         \
+                              .HostMemory("input_sizes"),                     \
+                          Conv3DBackpropInputOp<GPUDevice, T>);               \
   REGISTER_KERNEL_BUILDER(                                                    \
-    Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"),   \
-    Conv3DBackpropFilterOp<GPUDevice, T>);                                    \
+      Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      Conv3DBackpropFilterOp<GPUDevice, T>);                                  \
   REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2")                      \
-                            .Device(DEVICE_GPU)                               \
-                            .TypeConstraint<T>("T")                           \
-                            .HostMemory("filter_sizes"),                      \
-                        Conv3DBackpropFilterOp<GPUDevice, T>);
+                              .Device(DEVICE_GPU)                             \
+                              .TypeConstraint<T>("T")                         \
+                              .HostMemory("filter_sizes"),                    \
+                          Conv3DBackpropFilterOp<GPUDevice, T>);
 TF_CALL_half(REGISTER_GPU_KERNEL);
 TF_CALL_float(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
-     
+
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index 8d44208aa7..a7673afd0b 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double,
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double);
-#endif // TENSORFLOW_USE_SYCL
+#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc
index bbc69e45aa..7b688db4c5 100644
--- a/tensorflow/core/kernels/cwise_op_atanh.cc
+++ b/tensorflow/core/kernels/cwise_op_atanh.cc
@@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double,
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double);
-#endif // TENSORFLOW_USE_SYCL
+#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double);
diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
index 53d65a22d1..9347978d51 100644
--- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
@@ -231,7 +231,8 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args,
       }
       // Pad to vector-register width (if needed).
       for (int64 d = 0; d < pad_size; ++d) {
-        buffer[buf_base + vectorized_size + scalar_size + d] = static_cast<T>(0);
+        buffer[buf_base + vectorized_size + scalar_size + d] =
+            static_cast<T>(0);
       }
     }
   }
@@ -510,7 +511,8 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
-extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, Eigen::half>;
+extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice,
+                                                          Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, double>;
 
@@ -885,7 +887,8 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
-extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, Eigen::half>;
+extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice,
+                                                           Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, double>;
 
diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index 2759ecb2f1..30ecd0c2ba 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -427,6 +427,11 @@ TF_CALL_double(REGISTER_CPU_KERNEL);
 #endif
 
 #if GOOGLE_CUDA
+REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<Eigen::half>("T"),
+                        DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
+
 REGISTER_KERNEL_BUILDER(
     Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
     DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h
index 11aed5b415..097a9f5bfa 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.h
+++ b/tensorflow/core/kernels/depthwise_conv_op.h
@@ -158,7 +158,8 @@ struct DepthwiseFilterPadOp {
       }
       // Pad the remainder of output to vector-register boundary.
       for (int64 j = 0; j < pad_size; ++j) {
-        padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast<T>(0);
+        padded_filter[output_base + vectorized_size + scalar_size + j] =
+            static_cast<T>(0);
       }
     }
   }
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index 157ce106ce..d8bdb700e6 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/maxpooling_op.h"
 
 #include <vector>
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -37,7 +38,6 @@ limitations under the License.
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #if GOOGLE_CUDA
 #include "tensorflow/core/kernels/maxpooling_op_gpu.h"
@@ -359,7 +359,8 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
 
     use_dnn_ = CanUseCudnn();
-    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
+    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
+                                   &propagate_nans_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -888,7 +889,8 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
 
-    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
+    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
+                                   &propagate_nans_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1052,7 +1054,8 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
                     "Pooling is not yet supported on the batch dimension."));
     use_dnn_ = CanUseCudnn();
 
-    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
+    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
+                                   &propagate_nans_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1137,7 +1140,8 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
     }
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
     use_dnn_ = CanUseCudnn();
-    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
+    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
+                                   &propagate_nans_));
   }
 
   void Compute(OpKernelContext* context) override {
diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
index d96b844383..f8daaca4c9 100644
--- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
@@ -405,17 +405,17 @@ bool MaxPoolForwardWithOptionalArgmax<T>::operator()(
   if (propagate_nans) {
     MaxPoolForwardNHWC<true>
         <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
-           kThreadsPerBlock, 0, d.stream()>>>
-        (output_size, bottom_data, height, width, channels, pooled_height,
-         pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
-         top_data, mask);
+           kThreadsPerBlock, 0, d.stream()>>>(
+            output_size, bottom_data, height, width, channels, pooled_height,
+            pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+            top_data, mask);
   } else {
     MaxPoolForwardNHWC<false>
         <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
-           kThreadsPerBlock, 0, d.stream()>>>
-        (output_size, bottom_data, height, width, channels, pooled_height,
-         pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
-         top_data, mask);
+           kThreadsPerBlock, 0, d.stream()>>>(
+            output_size, bottom_data, height, width, channels, pooled_height,
+            pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+            top_data, mask);
   }
   return d.ok();
 }
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h
index 0a5be4fec9..c4d5a45d3c 100644
--- a/tensorflow/core/kernels/mkl_tfconv_op.h
+++ b/tensorflow/core/kernels/mkl_tfconv_op.h
@@ -101,8 +101,8 @@ class MklToTfOp : public OpKernel {
       // Allocate output tensor.
       TensorShape output_shape = input_shape.GetTfShape();
       Tensor* output_tensor = NULL;
-      OP_REQUIRES_OK(context, context->allocate_output(input_number,
-                                  output_shape, &output_tensor));
+      OP_REQUIRES_OK(context, context->allocate_output(
+                                  input_number, output_shape, &output_tensor));
       CHECK_NOTNULL(output_tensor);
 
       // Do we need to reorder Mkl layout into TensorFlow layout?
@@ -116,13 +116,13 @@ class MklToTfOp : public OpKernel {
         // If not, just forward input tensor to output tensor.
         CHECK(output_tensor->CopyFrom(input_tensor, output_shape));
       }
-    } catch (mkldnn::error &e) {
+    } catch (mkldnn::error& e) {
       string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + std::string(e.message) +
-                       ", in file " + std::string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-      OP_REQUIRES_OK(context,
-        errors::Aborted("Operation received an exception:", error_msg));
+                         ", message: " + std::string(e.message) + ", in file " +
+                         std::string(__FILE__) + ":" + std::to_string(__LINE__);
+      OP_REQUIRES_OK(
+          context,
+          errors::Aborted("Operation received an exception:", error_msg));
     }
   }
 #else
@@ -160,8 +160,8 @@ class MklToTfOp : public OpKernel {
 
     // Allocate output tensor.
     Tensor* output_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(input_number,
-                              output_shape, &output_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(input_number, output_shape,
+                                                     &output_tensor));
 
     dnnLayout_t output_layout =
         static_cast<dnnLayout_t>(input_shape.GetTfLayout());
diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h
index d3d1b56c9d..93ef512778 100644
--- a/tensorflow/core/kernels/ops_util.h
+++ b/tensorflow/core/kernels/ops_util.h
@@ -98,6 +98,19 @@ gtl::InlinedVector<T, 8> ComputeStride(const TensorShape& shape) {
   return strides;
 }
 
+// Helper to compute 'strides' given an Eigen TensorDimensions
+template <typename T, typename EigenDimensions>
+gtl::InlinedVector<T, 8> ComputeEigenStrides(const EigenDimensions& shape) {
+  const int ndims = shape.rank();
+  gtl::InlinedVector<T, 8> strides(ndims);
+  T stride = 1;
+  for (int i = ndims - 1; i >= 0; --i) {
+    strides[i] = stride;
+    stride *= static_cast<T>(shape[i]);
+  }
+  return strides;
+}
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_KERNELS_OPS_UTIL_H_
diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc
index f8b0285c50..cda6d7d8f9 100644
--- a/tensorflow/core/platform/posix/error.cc
+++ b/tensorflow/core/platform/posix/error.cc
@@ -131,8 +131,8 @@ error::Code ErrnoToCode(int err_number) {
     case ENETUNREACH:   // Network unreachable
     case ENOLCK:        // No locks available
     case ENOLINK:       // Link has been severed
-#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \
-	|| defined(__HAIKU__))
+#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) || \
+      defined(__HAIKU__))
     case ENONET:  // Machine is not on the network
 #endif
       code = error::UNAVAILABLE;
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index 09f69a95c1..614ee00b01 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -37,8 +37,8 @@ limitations under the License.
 #ifdef TF_USE_SNAPPY
 #include "snappy.h"
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \
-	|| defined(__HAIKU__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
+    defined(__HAIKU__)
 #include <thread>
 #endif
 
@@ -62,8 +62,8 @@ int NumSchedulableCPUs() {
   }
   perror("sched_getaffinity");
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \
-	|| defined(__HAIKU__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
+    defined(__HAIKU__)
   unsigned int count = std::thread::hardware_concurrency();
   if (count > 0) return static_cast<int>(count);
 #endif
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index 8fa0dfbed9..cf11f419a4 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -752,6 +752,12 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleDown(unsigned mask, T value,
   return __shfl_down_sync(mask, value, delta, width);
 }
 
+__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDown(
+    unsigned mask, Eigen::half value, int delta, int width = warpSize) {
+  return Eigen::half(
+      __shfl_down_sync(mask, static_cast<uint16>(value), delta, width));
+}
+
 // Variant of the (undocumented) version from the CUDA SDK, but using unsigned
 // instead of float for lo and hi (which is incorrect with ftz, for example).
 // A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
@@ -774,6 +780,12 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleXor(unsigned mask, T value,
   return __shfl_xor_sync(mask, value, laneMask, width);
 }
 
+__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXor(
+    unsigned mask, Eigen::half value, int laneMask, int width = warpSize) {
+  return Eigen::half(
+      __shfl_xor_sync(mask, static_cast<uint16>(value), laneMask, width));
+}
+
 // Variant of the (undocumented) version from the CUDA SDK, but using unsigned
 // instead of float for lo and hi (which is incorrect with ftz, for example).
 // A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 118ff0d0d6..148c7851bd 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -24,25 +24,25 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "mkl_service.h"
 #include "mkl_trans.h"
+#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
-#include "tensorflow/core/graph/mkl_graph_util.h"
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
+using mkldnn::engine;
 using mkldnn::memory;
-using mkldnn::reorder;
-using mkldnn::primitive;
 using mkldnn::padding_kind;
-using mkldnn::engine;
+using mkldnn::primitive;
+using mkldnn::reorder;
 #endif
 
 // The file contains a number of utility classes and functions used by MKL
@@ -56,8 +56,14 @@ namespace tensorflow {
 // Tensorflow tensor.
 
 typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims;
-typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3,
-               Dim_O = 0, Dim_I = 1 } MklDnnDims;
+typedef enum {
+  Dim_N = 0,
+  Dim_C = 1,
+  Dim_H = 2,
+  Dim_W = 3,
+  Dim_O = 0,
+  Dim_I = 1
+} MklDnnDims;
 
 class MklShape {
  public:
@@ -236,8 +242,7 @@ class MklShape {
   (IS_MKL_TENSOR_OFFSET + sizeof(size_t))  // Location of dimension_
 // Location of sizes. Note dim is not used here, left here
 // to make macros consistent.
-#define SIZES_OFFSET(dims) \
-  (DIMS_OFFSET + sizeof(size_t))
+#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t))
 #define STRIDES_OFFSET(dims) \
   (SIZES_OFFSET(dims) + dims * sizeof(size_t))  // Location of strides
 #define MKL_LAYOUT_OFFSET(dims) \
@@ -332,7 +337,7 @@ class MklDnnShape {
     /// Number of dimensions in Tensorflow format
     size_t dimension_ = 0;
     /// Required by MKLDNN for conversions
-    mkldnn_dims_t sizes_;    // Required by MKL for conversions
+    mkldnn_dims_t sizes_;  // Required by MKL for conversions
     memory::format tf_data_format_ = memory::format::format_undef;
     memory::data_type T_ = memory::data_type::data_undef;
     // MKL layout
@@ -345,15 +350,13 @@ class MklDnnShape {
   typedef std::remove_extent<mkldnn_dims_t>::type mkldnn_dim_t;
 #define INVALID_DIM_SIZE -1
 
-
  public:
   MklDnnShape() {
-    for (size_t i = 0; i < sizeof(data_.sizes_) /
-                           sizeof(data_.sizes_[0]); ++i) {
+    for (size_t i = 0; i < sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
+         ++i) {
       data_.sizes_[i] = -1;
     }
-    for (size_t i = 0; i < sizeof(data_.map_) /
-                           sizeof(data_.map_[0]); ++i) {
+    for (size_t i = 0; i < sizeof(data_.map_) / sizeof(data_.map_[0]); ++i) {
       data_.map_[i] = -1;
     }
   }
@@ -369,26 +372,26 @@ class MklDnnShape {
   inline void SetDimensions(const size_t dimension) {
     data_.dimension_ = dimension;
   }
-  inline size_t GetDimension(char dimension)const {
+  inline size_t GetDimension(char dimension) const {
     int index = GetMklDnnTensorDimIndex(dimension);
     CHECK(index >= 0 && index < this->GetDimension())
         << "Invalid index from the dimension: " << index << ", " << dimension;
     return this->DimSize(index);
   }
 
-  inline int32 GetMklDnnTensorDimIndex(char dimension)const {
+  inline int32 GetMklDnnTensorDimIndex(char dimension) const {
     switch (dimension) {
-  case 'N':
-    return MklDnnDims::Dim_N;
-  case 'C':
-    return MklDnnDims::Dim_C;
-  case 'H':
-    return MklDnnDims::Dim_H;
-  case 'W':
-    return MklDnnDims::Dim_W;
-  default:
-    LOG(FATAL) << "Invalid dimension: " << dimension;
-    return -1;  // Avoid compiler warning about missing return value
+      case 'N':
+        return MklDnnDims::Dim_N;
+      case 'C':
+        return MklDnnDims::Dim_C;
+      case 'H':
+        return MklDnnDims::Dim_H;
+      case 'W':
+        return MklDnnDims::Dim_W;
+      default:
+        LOG(FATAL) << "Invalid dimension: " << dimension;
+        return -1;  // Avoid compiler warning about missing return value
     }
   }
 
@@ -403,9 +406,9 @@ class MklDnnShape {
     memory::dims retVal;
     if (data_.is_mkl_tensor_) {
       int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
-      for (size_t i = 0 ; i < dimensions; i++) {
+      for (size_t i = 0; i < dimensions; i++) {
         if (data_.sizes_[i] != INVALID_DIM_SIZE)
-        retVal.push_back(data_.sizes_[i]);
+          retVal.push_back(data_.sizes_[i]);
       }
     } else {
       CHECK_EQ(data_.is_mkl_tensor_, true);
@@ -414,7 +417,7 @@ class MklDnnShape {
   }
 
   inline int64 DimSize(int index) const {
-    CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0]));
+    CHECK_LT(index, sizeof(data_.sizes_) / sizeof(data_.sizes_[0]));
     return data_.sizes_[index];
   }
 
@@ -451,7 +454,7 @@ class MklDnnShape {
   /// We don't create primitive_descriptor for TensorFlow layout now.
   /// We use lazy evaluation and create it only when needed.
   inline void SetTfLayout(size_t dims, const memory::dims& sizes,
-                   memory::format format) {
+                          memory::format format) {
     CHECK_EQ(dims, sizes.size());
     data_.dimension_ = dims;
     for (size_t ii = 0; ii < dims; ii++) {
@@ -497,9 +500,7 @@ class MklDnnShape {
     SetTfDimOrder(dimension, data_format);
   }
 
-  inline const mkldnn_dim_t* GetTfToMklDimMap() const {
-    return &data_.map_[0];
-  }
+  inline const mkldnn_dim_t* GetTfToMklDimMap() const { return &data_.map_[0]; }
   inline size_t TfDimIdx(int index) const { return data_.map_[index]; }
   inline int64 TfDimSize(int index) const {
     return data_.sizes_[TfDimIdx(index)];
@@ -553,9 +554,7 @@ class MklDnnShape {
 
   /// Size of buffer to hold the serialized object, the size is computed by
   /// following above mentioned order
-  inline size_t GetSerializeBufferSize() const {
-    return sizeof(MklShapeData);
-  }
+  inline size_t GetSerializeBufferSize() const { return sizeof(MklShapeData); }
 
   void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const {
     CHECK(buf_size >= GetSerializeBufferSize())
@@ -566,12 +565,12 @@ class MklDnnShape {
   void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) {
     // Make sure buffer holds at least is_mkl_tensor_.
     CHECK(buf_size >= sizeof(data_.is_mkl_tensor_))
-      << "Buffer size is too small in DeSerializeMklDnnShape";
+        << "Buffer size is too small in DeSerializeMklDnnShape";
 
     const bool is_mkl_tensor = *reinterpret_cast<const bool*>(buf);
     if (is_mkl_tensor) {  // If it is an MKL Tensor then read the rest
       CHECK(buf_size >= GetSerializeBufferSize())
-        << "Buffer size is too small in DeSerializeMklDnnShape";
+          << "Buffer size is too small in DeSerializeMklDnnShape";
       data_ = *reinterpret_cast<const MklShapeData*>(buf);
     }
   }
@@ -660,8 +659,7 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
 }
 
 #ifdef INTEL_MKL_DNN
-inline void GetMklShape(OpKernelContext* ctext, int n,
-                        MklDnnShape* mklshape) {
+inline void GetMklShape(OpKernelContext* ctext, int n, MklDnnShape* mklshape) {
   mklshape->DeSerializeMklDnnShape(
       ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs()))
           .flat<uint8>()
@@ -700,8 +698,7 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
 /// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
 /// If the input tensor is in MKL layout, then obtains TensorShape from
 /// MklShape.
-inline TensorShape GetTfShape(OpKernelContext* context,
-                              size_t input_idx) {
+inline TensorShape GetTfShape(OpKernelContext* context, size_t input_idx) {
   // Sanity check.
   CHECK_NOTNULL(context);
   CHECK_LT(input_idx, context->num_inputs());
@@ -821,7 +818,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
 
 template <typename T>
 inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
-                              TensorShape tf_shape) {
+                           TensorShape tf_shape) {
   OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::v(),
                                                  tf_shape, tensor_out));
 }
@@ -1099,7 +1096,8 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) {
 ///
 /// @input None
 /// @return memory::data_type corresponding to type T
-template<typename T> static memory::data_type MklDnnType();
+template <typename T>
+static memory::data_type MklDnnType();
 
 /// Instantiation for float type. Add similar instantiations for other
 /// type if needed.
@@ -1114,10 +1112,11 @@ memory::data_type MklDnnType<float>() {
 /// @return: memory::format corresponding to TensorFlow data format;
 ///          Fails with an error if invalid data format.
 inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
-  if (format == FORMAT_NHWC) return memory::format::nhwc;
-  else if (format == FORMAT_NCHW) return memory::format::nchw;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
-                     "Unsupported data format"));
+  if (format == FORMAT_NHWC)
+    return memory::format::nhwc;
+  else if (format == FORMAT_NCHW)
+    return memory::format::nchw;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
   // Return to get rid of compiler warning
   return memory::format::format_undef;
 }
@@ -1128,10 +1127,11 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
 /// @return: Tensorflow data format corresponding to memory::format
 ///          Fails with an error if invalid data format.
 inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
-  if (format == memory::format::nhwc) return FORMAT_NHWC;
-  else if (format == memory::format::nchw) return FORMAT_NCHW;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
-                     "Unsupported data format"));
+  if (format == memory::format::nhwc)
+    return FORMAT_NHWC;
+  else if (format == memory::format::nchw)
+    return FORMAT_NCHW;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
 }
 
 /// Map TensorShape object into memory::dims required by MKL-DNN
@@ -1161,7 +1161,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) {
 /// @input TensorShape object in shape
 /// @return memory::dims in MKL-DNN required NCHW format
 inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
-                                            TensorFormat format) {
+                                              TensorFormat format) {
   // Check validity of format.
   CHECK_NE(TFDataFormatToMklDnnDataFormat(format),
            memory::format::format_undef);
@@ -1237,21 +1237,23 @@ class MklDnnData {
   const engine* cpu_engine_;
 
  public:
-  explicit MklDnnData(const engine* e) : user_memory_(nullptr),
-                                         reorder_memory_(nullptr),
-                                         op_md_(nullptr), cpu_engine_(e) {}
+  explicit MklDnnData(const engine* e)
+      : user_memory_(nullptr),
+        reorder_memory_(nullptr),
+        op_md_(nullptr),
+        cpu_engine_(e) {}
 
   ~MklDnnData() {
     cpu_engine_ = nullptr;  // We don't own this.
-    delete(user_memory_);
-    delete(reorder_memory_);
-    delete(op_md_);
+    delete (user_memory_);
+    delete (reorder_memory_);
+    delete (op_md_);
   }
 
   inline void* GetTensorBuffer(const Tensor* tensor) const {
     CHECK_NOTNULL(tensor);
-    return const_cast<void*>(static_cast<const void*>(
-              tensor->flat<T>().data()));
+    return const_cast<void*>(
+        static_cast<const void*>(tensor->flat<T>().data()));
   }
 
   /// Set user memory primitive using specified dimensions, memory format and
@@ -1283,7 +1285,7 @@ class MklDnnData {
   /// @return: memory::desc object corresponding to blocked memory format
   ///          for given dimensions and strides.
   static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
-      const memory::dims& strides) {
+                                                  const memory::dims& strides) {
     CHECK_EQ(dim.size(), strides.size());
 
     // We have to construct memory descriptor in a C style. This is not at all
@@ -1352,7 +1354,7 @@ class MklDnnData {
     CHECK_NOTNULL(cpu_engine_);
     // TODO(nhasabni): can we remove dynamic memory allocation?
     if (data_buffer) {
-     user_memory_ = new memory(pd, data_buffer);
+      user_memory_ = new memory(pd, data_buffer);
     } else {
       user_memory_ = new memory(pd);
     }
diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc
index 6aef3d86e9..8b73eadb40 100644
--- a/tensorflow/core/util/mkl_util_test.cc
+++ b/tensorflow/core/util/mkl_util_test.cc
@@ -54,7 +54,6 @@ TEST(MklUtilTest, MklDnnTfShape) {
   EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape);
 }
 
-
 TEST(MklUtilTest, MklDnnBlockedFormatTest) {
   // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension
   // first (case 1) and then it being outermost dimension (case 2).
diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
index 92cc3bd60e..313c09e1e4 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
@@ -84,11 +84,10 @@ public class ShapeTest {
     assertEquals(Shape.scalar(), Shape.scalar());
     assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3));
 
-    assertNotEquals(Shape.make(1,2), null);
-    assertNotEquals(Shape.make(1,2), new Object());
+    assertNotEquals(Shape.make(1, 2), null);
+    assertNotEquals(Shape.make(1, 2), new Object());
     assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4));
 
-
     assertNotEquals(Shape.unknown(), Shape.unknown());
     assertNotEquals(Shape.make(-1), Shape.make(-1));
     assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3));
@@ -103,4 +102,3 @@ public class ShapeTest {
     assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode());
   }
 }
-
diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py
index 3512f66284..750af20e8a 100644
--- a/tensorflow/python/estimator/inputs/numpy_io.py
+++ b/tensorflow/python/estimator/inputs/numpy_io.py
@@ -117,11 +117,11 @@ def numpy_input_fn(x,
         raise ValueError('y cannot be empty dict, use None instead.')
 
       ordered_dict_y = collections.OrderedDict(
-        sorted(y.items(), key=lambda t: t[0]))
+          sorted(y.items(), key=lambda t: t[0]))
       target_keys = list(ordered_dict_y.keys())
 
       duplicate_keys = set(feature_keys).intersection(set(target_keys))
-      if len(duplicate_keys):
+      if duplicate_keys:
         raise ValueError('{} duplicate keys are found in both x and y: '
                          '{}'.format(len(duplicate_keys), duplicate_keys))
 
@@ -131,16 +131,14 @@ def numpy_input_fn(x,
       ordered_dict_data[target_keys] = y
 
     if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
-      shape_dict_of_x = {k: ordered_dict_data[k].shape
-                         for k in feature_keys}
+      shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys}
 
       if target_keys is None:
         shape_of_y = None
       elif isinstance(target_keys, string_types):
         shape_of_y = y.shape
       else:
-        shape_of_y = {k: ordered_dict_data[k].shape
-                      for k in target_keys}
+        shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys}
 
       raise ValueError('Length of tensors in x and y is mismatched. All '
                        'elements in x and y must have the same length.\n'
@@ -155,11 +153,12 @@ def numpy_input_fn(x,
         enqueue_size=batch_size,
         num_epochs=num_epochs)
 
-    batch = (queue.dequeue_many(batch_size) if num_epochs is None
-                else queue.dequeue_up_to(batch_size))
+    batch = (
+        queue.dequeue_many(batch_size)
+        if num_epochs is None else queue.dequeue_up_to(batch_size))
 
     # Remove the first `Tensor` in `batch`, which is the row number.
-    if len(batch) > 0:
+    if batch:
       batch.pop(0)
 
     features = dict(zip(feature_keys, batch[:len(feature_keys)]))
diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py
index 65eae7a7dc..1374e3f7e1 100644
--- a/tensorflow/python/estimator/inputs/numpy_io_test.py
+++ b/tensorflow/python/estimator/inputs/numpy_io_test.py
@@ -255,7 +255,7 @@ class NumpyIoTest(test.TestCase):
 
     with self.test_session() as session:
       input_fn = numpy_io.numpy_input_fn(
-        x, y, batch_size=2, shuffle=False, num_epochs=1)
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
       features_tensor = input_fn()
 
       coord = coordinator.Coordinator()
@@ -327,7 +327,7 @@ class NumpyIoTest(test.TestCase):
 
     with self.test_session() as session:
       input_fn = numpy_io.numpy_input_fn(
-        x, y, batch_size=2, shuffle=False, num_epochs=1)
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
       features_tensor, targets_tensor = input_fn()
 
       coord = coordinator.Coordinator()
@@ -362,13 +362,10 @@ class NumpyIoTest(test.TestCase):
     a = np.arange(4) * 1.0
     b = np.arange(32, 36)
     x = {'a': a, 'b': b}
-    y = {'y1': np.arange(-32, -28),
-         'a': a,
-         'y2': np.arange(32, 28, -1),
-         'b': b}
+    y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b}
     with self.test_session():
       with self.assertRaisesRegexp(
-              ValueError, '2 duplicate keys are found in both x and y'):
+          ValueError, '2 duplicate keys are found in both x and y'):
         failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
         failing_input_fn()
 
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 1610214d54..4c026590c2 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -987,10 +987,9 @@ class TensorFlowTestCase(googletest.TestCase):
       msg: An optional string message to append to the failure message.
     """
     # f1 == f2 is needed here as we might have: f1, f2 = inf, inf
-    self.assertTrue(
-        f1 == f2 or math.fabs(f1 - f2) <= err,
-        "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
-                               if msg is not None else ""))
+    self.assertTrue(f1 == f2 or math.fabs(f1 - f2) <= err,
+                    "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
+                                           if msg is not None else ""))
 
   def assertArrayNear(self, farray1, farray2, err):
     """Asserts that two float arrays are near each other.
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 76b80e60ea..1bf2b70c1b 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -114,21 +114,21 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
     arr = np.random.rand(*arr_shape)
     mask = make_mask(arr_shape[:ndims_mask])
     if axis is not None:
-      mask = make_mask(arr_shape[axis:ndims_mask+axis])
+      mask = make_mask(arr_shape[axis:ndims_mask + axis])
     if axis is None or axis == 0:
       masked_arr = arr[mask]
     elif axis == 1:
-      masked_arr = arr[:,mask]
+      masked_arr = arr[:, mask]
     elif axis == 2:
-      masked_arr = arr[:,:,mask]
-    with self.test_session() as sess:
+      masked_arr = arr[:, :, mask]
+    with self.test_session():
       masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
 
       # Leading dimension size of masked_tensor is always unknown until runtime
       # since we don't how many elements will be kept.
       leading = 1 if axis is None else axis + 1
       self.assertAllEqual(masked_tensor.get_shape()[leading:],
-          masked_arr.shape[leading:])
+                          masked_arr.shape[leading:])
 
       self.assertAllClose(masked_arr, masked_tensor.eval())
 
@@ -1078,6 +1078,7 @@ class PadTest(test_util.TensorFlowTestCase):
                            [0, 0, 4, 5, 6, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0]])
 
+
 class InvertPermutationTest(test_util.TensorFlowTestCase):
 
   def testInvertPermutation(self):
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 79285476b4..2767df127e 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -25,6 +25,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import googletest
 
+
 class BincountTest(test_util.TensorFlowTestCase):
 
   def test_empty(self):
@@ -72,8 +73,7 @@ class BincountTest(test_util.TensorFlowTestCase):
         else:
           weights = np.random.random(num_samples)
         self.assertAllClose(
-            math_ops.bincount(arr, weights).eval(),
-            np.bincount(arr, weights))
+            math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights))
 
   def test_random_without_weights(self):
     num_samples = 10000
@@ -83,8 +83,7 @@ class BincountTest(test_util.TensorFlowTestCase):
         arr = np.random.randint(0, 1000, num_samples)
         weights = np.ones(num_samples).astype(dtype)
         self.assertAllClose(
-            math_ops.bincount(arr, None).eval(),
-            np.bincount(arr, weights))
+            math_ops.bincount(arr, None).eval(), np.bincount(arr, weights))
 
   def test_zero_weights(self):
     with self.test_session(use_gpu=True):
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 6cbdd4cbb3..68817cc256 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -439,11 +439,10 @@ class ZerosLikeTest(test.TestCase):
 
   def testZerosLikeCPU(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64,
-        dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
-        dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
-        dtypes_lib.complex64, dtypes_lib.complex128,
-        dtypes_lib.string
+        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8,
+        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32,
+        dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64,
+        dtypes_lib.complex128, dtypes_lib.string
     ]:
       self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False)
       self._compareZeros(dtype, fully_defined_shape=True, use_gpu=False)
@@ -574,10 +573,10 @@ class OnesLikeTest(test.TestCase):
 
   def testOnesLike(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64,
-        dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
-        dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
-        dtypes_lib.complex64, dtypes_lib.complex128
+        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8,
+        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32,
+        dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64,
+        dtypes_lib.complex128
     ]:
       numpy_dtype = dtype.as_numpy_dtype
       with self.test_session():
diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index a7e23ead1c..d92797a7d3 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -52,7 +52,6 @@ class Conv1DTest(test.TestCase):
           self.assertEqual(len(output), 2)
           self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4])
 
-
   def testConv1DTranspose(self):
     with self.test_session():
       stride = 2
@@ -93,5 +92,6 @@ class Conv1DTest(test.TestCase):
 
     self.assertAllClose(cache_values, value)
 
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
index 116681fc4c..ec8ac74163 100644
--- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
@@ -68,8 +68,8 @@ class Conv3DTest(test.TestCase):
       total_size_2 *= s
 
     # Initializes the input tensor with array containing numbers from 0 to 1.
-    # We keep the input tensor values fairly small to avoid overflowing a float16 
-    # tensor during the conv3d 
+    # We keep the input tensor values fairly small to avoid overflowing float16
+    # during the conv3d.
     x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)]
     with self.test_session(use_gpu=use_gpu):
@@ -115,15 +115,13 @@ class Conv3DTest(test.TestCase):
           if value.dtype == np.float16:
             tol = 1e-3
 
-          self.assertAllClose(expected, value.flatten(), atol=tol,
-                              rtol=tol)
+          self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol)
 
   def testConv3D1x1x1Filter(self):
     expected_output = [
-        0.18518519,  0.22222222,  0.25925926,  0.40740741,  0.5       ,
-        0.59259259,  0.62962963,  0.77777778,  0.92592593,  0.85185185,
-        1.05555556,  1.25925926,  1.07407407,  1.33333333,  1.59259259,
-        1.2962963 ,  1.61111111,  1.92592593
+        0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5, 0.59259259,
+        0.62962963, 0.77777778, 0.92592593, 0.85185185, 1.05555556, 1.25925926,
+        1.07407407, 1.33333333, 1.59259259, 1.2962963, 1.61111111, 1.92592593
     ]
 
     # These are equivalent to the Conv2D1x1 case.
@@ -149,10 +147,10 @@ class Conv3DTest(test.TestCase):
   # Expected values computed using scipy's correlate function.
   def testConv3D2x2x2Filter(self):
     expected_output = [
-        3.77199074,   3.85069444,   3.92939815,   4.2650463 ,   4.35763889,
-        4.45023148,   6.73032407,   6.89236111,   7.05439815,   7.22337963,
-        7.39930556,   7.57523148,   9.68865741,   9.93402778,  10.17939815,
-        10.18171296,  10.44097222,  10.70023148
+        3.77199074, 3.85069444, 3.92939815, 4.2650463, 4.35763889, 4.45023148,
+        6.73032407, 6.89236111, 7.05439815, 7.22337963, 7.39930556, 7.57523148,
+        9.68865741, 9.93402778, 10.17939815, 10.18171296, 10.44097222,
+        10.70023148
     ]
     # expected_shape = [1, 3, 1, 2, 5]
     self._VerifyValues(
@@ -164,19 +162,17 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStrides(self):
     expected_output = [
-        0.06071429,  0.08988095,  0.10238095,  0.11488095,  0.12738095,
-        0.13988095,  0.08452381,  0.26071429,  0.35238095,  0.36488095,
-        0.37738095,  0.38988095,  0.40238095,  0.23452381,  0.46071429,
-        0.61488095,  0.62738095,  0.63988095,  0.65238095,  0.66488095,
-        0.38452381,  1.12738095,  1.48988095,  1.50238095,  1.51488095,
-        1.52738095,  1.53988095,  0.88452381,  1.32738095,  1.75238095,
-        1.76488095,  1.77738095,  1.78988095,  1.80238095,  1.03452381,
-        1.52738095,  2.01488095,  2.02738095,  2.03988095,  2.05238095,
-        2.06488095,  1.18452381,  2.19404762,  2.88988095,  2.90238095,
-        2.91488095,  2.92738095,  2.93988095,  1.68452381,  2.39404762,
-        3.15238095,  3.16488095,  3.17738095,  3.18988095,  3.20238095,
-        1.83452381,  2.59404762,  3.41488095,  3.42738095,  3.43988095,
-        3.45238095,  3.46488095,  1.98452381
+        0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, 0.13988095,
+        0.08452381, 0.26071429, 0.35238095, 0.36488095, 0.37738095, 0.38988095,
+        0.40238095, 0.23452381, 0.46071429, 0.61488095, 0.62738095, 0.63988095,
+        0.65238095, 0.66488095, 0.38452381, 1.12738095, 1.48988095, 1.50238095,
+        1.51488095, 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095,
+        1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, 1.52738095,
+        2.01488095, 2.02738095, 2.03988095, 2.05238095, 2.06488095, 1.18452381,
+        2.19404762, 2.88988095, 2.90238095, 2.91488095, 2.92738095, 2.93988095,
+        1.68452381, 2.39404762, 3.15238095, 3.16488095, 3.17738095, 3.18988095,
+        3.20238095, 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095,
+        3.45238095, 3.46488095, 1.98452381
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 5, 8, 7, 1],
@@ -187,8 +183,7 @@ class Conv3DTest(test.TestCase):
 
   def testConv3D2x2x2FilterStride2(self):
     expected_output = [
-        3.77199074,  3.85069444,  3.92939815,  9.68865741,  9.93402778,
-        10.17939815
+        3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, 10.17939815
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
@@ -199,14 +194,12 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStride3(self):
     expected_output = [
-        1.51140873,  1.57167659,  1.63194444,  1.56349206,  1.62673611,
-        1.68998016,  1.6155754 ,  1.68179563,  1.74801587,  1.9280754 ,
-        2.01215278,  2.09623016,  1.98015873,  2.0672123 ,  2.15426587,
-        2.03224206,  2.12227183,  2.21230159,  4.4280754 ,  4.65500992,
-        4.88194444,  4.48015873,  4.71006944,  4.93998016,  4.53224206,
-        4.76512897,  4.99801587,  4.84474206,  5.09548611,  5.34623016,
-        4.8968254 ,  5.15054563,  5.40426587,  4.94890873,  5.20560516,
-        5.46230159
+        1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, 1.68998016,
+        1.6155754, 1.68179563, 1.74801587, 1.9280754, 2.01215278, 2.09623016,
+        1.98015873, 2.0672123, 2.15426587, 2.03224206, 2.12227183, 2.21230159,
+        4.4280754, 4.65500992, 4.88194444, 4.48015873, 4.71006944, 4.93998016,
+        4.53224206, 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016,
+        4.8968254, 5.15054563, 5.40426587, 4.94890873, 5.20560516, 5.46230159
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 6, 7, 8, 2],
@@ -217,9 +210,8 @@ class Conv3DTest(test.TestCase):
 
   def testConv3D2x2x2FilterStride2Same(self):
     expected_output = [
-        3.77199074,   3.85069444,   3.92939815,   2.0162037 ,   2.06597222,
-        2.11574074,   9.68865741,   9.93402778,  10.17939815,   4.59953704,
-        4.73263889,   4.86574074
+        3.77199074, 3.85069444, 3.92939815, 2.0162037, 2.06597222, 2.11574074,
+        9.68865741, 9.93402778, 10.17939815, 4.59953704, 4.73263889, 4.86574074
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
@@ -230,8 +222,8 @@ class Conv3DTest(test.TestCase):
 
   def testKernelSmallerThanStride(self):
     expected_output = [
-        0.03703704,  0.11111111,  0.25925926,  0.33333333,  0.7037037 ,
-        0.77777778,  0.92592593,  1.
+        0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037, 0.77777778,
+        0.92592593, 1.
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 3, 3, 3, 1],
@@ -247,12 +239,11 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
     expected_output = [
-        0.54081633,  0.58017493,  0.28061224,  0.81632653,  0.85568513,
-        0.40306122,  0.41873178,  0.4340379 ,  0.19642857,  2.46938776,
-        2.50874636,  1.1377551 ,  2.74489796,  2.78425656,  1.26020408,
-        1.16873178,  1.1840379 ,  0.51785714,  1.09511662,  1.10604956,
-        0.44642857,  1.17164723,  1.18258017,  0.47704082,  0.3691691 ,
-        0.37244898,  0.125
+        0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, 0.40306122,
+        0.41873178, 0.4340379, 0.19642857, 2.46938776, 2.50874636, 1.1377551,
+        2.74489796, 2.78425656, 1.26020408, 1.16873178, 1.1840379, 0.51785714,
+        1.09511662, 1.10604956, 0.44642857, 1.17164723, 1.18258017, 0.47704082,
+        0.3691691, 0.37244898, 0.125
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
@@ -262,8 +253,8 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
     expected_output = [
-        0.540816,  0.580175,  0.816327,  0.855685,  2.469388,  2.508746,
-        2.744898,  2.784257
+        0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, 2.744898,
+        2.784257
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
@@ -278,7 +269,7 @@ class Conv3DTest(test.TestCase):
         filter_in_sizes=[2, 1, 2, 1, 2],
         stride=1,
         padding="VALID",
-        expected=[1.5625,  1.875])
+        expected=[1.5625, 1.875])
 
   def _ConstructAndTestGradientForConfig(
       self, batch, input_shape, filter_shape, in_depth, out_depth, stride,
@@ -318,7 +309,6 @@ class Conv3DTest(test.TestCase):
     input_data = [x * 1.0 / input_size for x in range(0, input_size)]
     filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
 
-
     for data_type in self._DtypesToTest(use_gpu=use_gpu):
       # TODO(mjanusz): Modify gradient_checker to also provide max relative
       # error and synchronize the tolerance levels between the tests for forward
@@ -330,12 +320,11 @@ class Conv3DTest(test.TestCase):
       elif data_type == dtypes.float16:
         tolerance = 1e-3
 
-
       with self.test_session(use_gpu=use_gpu):
         orig_input_tensor = constant_op.constant(
-          input_data, shape=input_shape, dtype=data_type, name="input")
+            input_data, shape=input_shape, dtype=data_type, name="input")
         filter_tensor = constant_op.constant(
-          filter_data, shape=filter_shape, dtype=data_type, name="filter")
+            filter_data, shape=filter_shape, dtype=data_type, name="filter")
 
         if data_format == "NCDHW":
           input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
@@ -345,25 +334,23 @@ class Conv3DTest(test.TestCase):
           new_strides = strides
 
         conv = nn_ops.conv3d(
-          input_tensor, filter_tensor, new_strides, padding,
-          data_format=data_format, name="conv")
+            input_tensor,
+            filter_tensor,
+            new_strides,
+            padding,
+            data_format=data_format,
+            name="conv")
 
         if data_format == "NCDHW":
           conv = test_util.NCHWToNHWC(conv)
 
-        
         if test_input:
-          jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor,
-                                                               input_shape,
-                                                               conv,
-                                                               output_shape)
+          jacob_t, jacob_n = gradient_checker.compute_gradient(
+              orig_input_tensor, input_shape, conv, output_shape)
         else:
-          jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor,
-                                                               filter_shape,
-                                                               conv,
-                                                               output_shape)
-        
-        
+          jacob_t, jacob_n = gradient_checker.compute_gradient(
+              filter_tensor, filter_shape, conv, output_shape)
+
         if data_type != dtypes.float16:
           reference_jacob_t = jacob_t
           err = np.fabs(jacob_t - jacob_n).max()
@@ -375,7 +362,6 @@ class Conv3DTest(test.TestCase):
       print("conv3d gradient error = ", err)
       self.assertLess(err, tolerance)
 
-
   def ConstructAndTestGradient(self, **kwargs):
     for data_format, use_gpu in GetTestConfigs():
       self._ConstructAndTestGradientForConfig(data_format=data_format,
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index 150e2ff7f2..6be8997cab 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -18,8 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
 import os
+import numpy as np
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -1442,7 +1442,6 @@ class PoolingTest(test.TestCase):
           use_gpu=True,
           v2=v2)
 
-
     # Propagate the diff in cases of NaNs
     os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1"
     expected_input_backprop_cudnn = expected_input_backprop_tf_cpu
diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py
index 8e54d10f32..223a4b2c87 100644
--- a/tensorflow/python/kernel_tests/reader_ops_test.py
+++ b/tensorflow/python/kernel_tests/reader_ops_test.py
@@ -1018,15 +1018,15 @@ class LMDBReaderTest(test.TestCase):
     with self.test_session() as sess:
       reader1 = io_ops.LMDBReader(name="test_read_from_same_file1")
       reader2 = io_ops.LMDBReader(name="test_read_from_same_file2")
-      filename_queue = input_lib.string_input_producer([self.db_path],
-                                                       num_epochs=None)
+      filename_queue = input_lib.string_input_producer(
+          [self.db_path], num_epochs=None)
       key1, value1 = reader1.read(filename_queue)
       key2, value2 = reader2.read(filename_queue)
 
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-      for i in range(3):
-        for j in range(10):
+      for _ in range(3):
+        for _ in range(10):
           k1, v1, k2, v2 = sess.run([key1, value1, key2, value2])
           self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2))
           self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2))
@@ -1054,14 +1054,14 @@ class LMDBReaderTest(test.TestCase):
   def testReadFromFileRepeatedly(self):
     with self.test_session() as sess:
       reader = io_ops.LMDBReader(name="test_read_from_file_repeated")
-      filename_queue = input_lib.string_input_producer([self.db_path],
-                                                       num_epochs=None)
+      filename_queue = input_lib.string_input_producer(
+          [self.db_path], num_epochs=None)
       key, value = reader.read(filename_queue)
 
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
       # Iterate over the lmdb 3 times.
-      for i in range(3):
+      for _ in range(3):
         # Go over all 10 records each time.
         for j in range(10):
           k, v = sess.run([key, value])
@@ -1071,5 +1071,6 @@ class LMDBReaderTest(test.TestCase):
       coord.request_stop()
       coord.join(threads)
 
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 3a02f24902..99f9f09690 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -380,7 +380,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
           # Replace np_ans[8] with 0 for the value
           np_ans[8:] = 0
           # Replace 8 with -1 in indices
-          np.place(indices, indices==8, [-1])
+          np.place(indices, indices == 8, [-1])
           s = math_ops.unsorted_segment_sum(
               data=tf_x, segment_ids=indices, num_segments=num_segments)
           tf_ans = s.eval()
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index 04758ce45a..6390b7c518 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -87,6 +87,7 @@ class UniqueTest(test.TestCase):
     for i in range(len(x)):
       self.assertEqual(x[i], tf_y[tf_idx[i]])
 
+
 class UniqueWithCountsTest(test.TestCase):
 
   def testInt32(self):
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index 4d5fb97845..83237b8733 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -267,34 +267,34 @@ class BatchNormalization(base.Layer):
           self.axis[idx] = x + 1      # Account for added dimension
 
     if self.scale:
-      self.gamma = self.add_variable(name='gamma',
-                                     shape=param_shape,
-                                     dtype=param_dtype,
-                                     initializer=self.gamma_initializer,
-                                     regularizer=self.gamma_regularizer,
-                                     constraint=self.gamma_constraint,
-                                     trainable=True)
+      self.gamma = self.add_variable(
+          name='gamma',
+          shape=param_shape,
+          dtype=param_dtype,
+          initializer=self.gamma_initializer,
+          regularizer=self.gamma_regularizer,
+          constraint=self.gamma_constraint,
+          trainable=True)
     else:
       self.gamma = None
       if self.fused:
-        self._gamma_const = array_ops.constant(1.0,
-                                               dtype=param_dtype,
-                                               shape=param_shape)
+        self._gamma_const = array_ops.constant(
+            1.0, dtype=param_dtype, shape=param_shape)
 
     if self.center:
-      self.beta = self.add_variable(name='beta',
-                                    shape=param_shape,
-                                    dtype=param_dtype,
-                                    initializer=self.beta_initializer,
-                                    regularizer=self.beta_regularizer,
-                                    constraint=self.beta_constraint,
-                                    trainable=True)
+      self.beta = self.add_variable(
+          name='beta',
+          shape=param_shape,
+          dtype=param_dtype,
+          initializer=self.beta_initializer,
+          regularizer=self.beta_regularizer,
+          constraint=self.beta_constraint,
+          trainable=True)
     else:
       self.beta = None
       if self.fused:
-        self._beta_const = array_ops.constant(0.0,
-                                              dtype=param_dtype,
-                                              shape=param_shape)
+        self._beta_const = array_ops.constant(
+            0.0, dtype=param_dtype, shape=param_shape)
 
     # Disable variable partitioning when creating the moving mean and variance
     try:
@@ -327,11 +327,12 @@ class BatchNormalization(base.Layer):
         # stack to be cleared. The nested ones use a `lambda` to set the desired
         # device and ignore any devices that may be set by the custom getter.
         def _renorm_variable(name, shape):
-          var = self.add_variable(name=name,
-                                  shape=shape,
-                                  dtype=param_dtype,
-                                  initializer=init_ops.zeros_initializer(),
-                                  trainable=False)
+          var = self.add_variable(
+              name=name,
+              shape=shape,
+              dtype=param_dtype,
+              initializer=init_ops.zeros_initializer(),
+              trainable=False)
           return var
 
         with ops.device(None):
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index b2876c58c2..7c91c3284e 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -101,15 +101,13 @@ class BNTest(test.TestCase):
       loss_val = sess.run(loss, feed_dict={image: image_val})
       return loss_val
 
-  def _trainEvalSequence(self,
-                         dtype,
-                         train1_use_gpu,
-                         train2_use_gpu,
+  def _trainEvalSequence(self, dtype, train1_use_gpu, train2_use_gpu,
                          infer_use_gpu):
     batch, height, width, input_channels = 2, 4, 5, 3
     shape = [batch, height, width, input_channels]
     checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' %
-        (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu))
+                              (dtype, train1_use_gpu, train2_use_gpu,
+                               infer_use_gpu))
 
     self._train(
         checkpoint,
@@ -130,30 +128,27 @@ class BNTest(test.TestCase):
         dtype=dtype)
 
     np.random.seed(0)
-    image_val = np.random.rand(batch,
-                               height,
-                               width,
-                               input_channels).astype(dtype.as_numpy_dtype)
-    loss_val = self._infer(checkpoint, image_val, shape,
-                           use_gpu=infer_use_gpu, is_fused=True)
+    image_val = np.random.rand(batch, height, width, input_channels).astype(
+        dtype.as_numpy_dtype)
+    loss_val = self._infer(
+        checkpoint, image_val, shape, use_gpu=infer_use_gpu, is_fused=True)
 
     return train_vars, loss_val
 
   def testHalfPrecision(self):
-    ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32,
-                                                 train1_use_gpu=True,
-                                                 train2_use_gpu=True,
-                                                 infer_use_gpu=True)
- 
+    ref_vars, ref_loss = self._trainEvalSequence(
+        dtype=dtypes.float32,
+        train1_use_gpu=True,
+        train2_use_gpu=True,
+        infer_use_gpu=True)
+
     self.assertEqual(len(ref_vars), 5)
 
     for train1_use_gpu in [True, False]:
       for train2_use_gpu in [True, False]:
         for infer_use_gpu in [True, False]:
-          test_vars, test_loss = self._trainEvalSequence(dtypes.float16,
-                                                         train1_use_gpu,
-                                                         train2_use_gpu,
-                                                         infer_use_gpu)
+          test_vars, test_loss = self._trainEvalSequence(
+              dtypes.float16, train1_use_gpu, train2_use_gpu, infer_use_gpu)
           self.assertEqual(len(test_vars), 5)
           for test_var, ref_var in zip(test_vars, ref_vars):
             self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3)
@@ -281,9 +276,8 @@ class BNTest(test.TestCase):
   def testCreateFusedBNFloat16(self):
     # Call layer.
     bn = normalization_layers.BatchNormalization(axis=1, fused=True)
-    inputs = random_ops.random_uniform((5, 4, 3, 3),
-                                       seed=1,
-                                       dtype=dtypes.float16)
+    inputs = random_ops.random_uniform(
+        (5, 4, 3, 3), seed=1, dtype=dtypes.float16)
     training = array_ops.placeholder(dtype='bool')
     outputs = bn.apply(inputs, training=training)
 
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 43238757c7..38eff54c69 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1194,18 +1194,19 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
           "Number of mask dimensions must be specified, even if some dimensions"
           " are None.  E.g. shape=[None] is ok, but shape=None is not.")
     axis = 0 if axis is None else axis
-    shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask)
+    shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask)
 
-    leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0])
+    leading_size = gen_math_ops._prod(
+        shape(tensor)[axis:axis + ndims_mask], [0])
     tensor = reshape(tensor,
-                     concat([shape(tensor)[:axis],
-                             [leading_size],
-                             shape(tensor)[axis+ndims_mask:]], 0))
-    first_dim = shape_tensor[axis:axis+ndims_mask].num_elements()
+                     concat([
+                         shape(tensor)[:axis], [leading_size],
+                         shape(tensor)[axis + ndims_mask:]
+                     ], 0))
+    first_dim = shape_tensor[axis:axis + ndims_mask].num_elements()
     tensor.set_shape(
-        tensor_shape.as_shape(shape_tensor[:axis])
-        .concatenate([first_dim])
-        .concatenate(shape_tensor[axis+ndims_mask:]))
+        tensor_shape.as_shape(shape_tensor[:axis]).concatenate([first_dim])
+        .concatenate(shape_tensor[axis + ndims_mask:]))
 
     mask = reshape(mask, [-1])
     return _apply_mask_1d(tensor, mask, axis)
diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py
index d49fac59ca..04762565c2 100644
--- a/tensorflow/python/ops/distributions/multinomial.py
+++ b/tensorflow/python/ops/distributions/multinomial.py
@@ -23,10 +23,10 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
-from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
 
@@ -243,25 +243,26 @@ class Multinomial(distribution.Distribution):
         n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits
 
     # flatten the total_count and logits
-    flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k]
-    flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm]
+    flat_logits = array_ops.reshape(logits, [-1, k])  # [B1B2...Bm, k]
+    flat_ndraws = n * array_ops.reshape(n_draws, [-1])  # [B1B2...Bm]
 
     # computes each total_count and logits situation by map_fn
     def _sample_single(args):
-      logits, n_draw = args[0], args[1] # [K], []
-      x = random_ops.multinomial(logits[array_ops.newaxis, ...],
-                                 n_draw, seed) # [1, n*n_draw]
-      x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw]
-      x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k]
+      logits, n_draw = args[0], args[1]  # [K], []
+      x = random_ops.multinomial(logits[array_ops.newaxis, ...], n_draw,
+                                 seed)  # [1, n*n_draw]
+      x = array_ops.reshape(x, shape=[n, -1])  # [n, n_draw]
+      x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2)  # [n, k]
       return x
-    x = functional_ops.map_fn(_sample_single,
-                              [flat_logits, flat_ndraws],
-                              dtype=self.dtype) # [B1B2...Bm, n, k]
+
+    x = functional_ops.map_fn(
+        _sample_single, [flat_logits, flat_ndraws],
+        dtype=self.dtype)  # [B1B2...Bm, n, k]
 
     # reshape the results to proper shape
     x = array_ops.transpose(x, perm=[1, 0, 2])
     final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0)
-    x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k]
+    x = array_ops.reshape(x, final_shape)  # [n, B1, B2,..., Bm, k]
     return x
 
   @distribution_util.AppendDocstring(_multinomial_sample_note)
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 7c23321ca5..b9c89d62d5 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1119,9 +1119,8 @@ def rgb_to_grayscale(images, name=None):
     # https://en.wikipedia.org/wiki/Luma_%28video%29
     rgb_weights = [0.2989, 0.5870, 0.1140]
     rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
-    gray_float = math_ops.reduce_sum(flt_image * rgb_weights,
-                                     rank_1,
-                                     keepdims=True)
+    gray_float = math_ops.reduce_sum(
+        flt_image * rgb_weights, rank_1, keepdims=True)
     gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
     return convert_image_dtype(gray_float, orig_dtype, name=name)
 
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index 14a039ffd0..be9beee633 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -30,7 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.gen_linalg_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import compat
-from tensorflow.python.util.deprecation import deprecated_args
+from tensorflow.python.util import deprecation
 
 # Names below are lower_case.
 # pylint: disable=invalid-name
@@ -439,9 +439,13 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None):
 
 
 # pylint: disable=redefined-builtin
-@deprecated_args(None, "keep_dims is deprecated, use keepdims instead",
-                 "keep_dims")
-def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
+@deprecation.deprecated_args(
+    None, 'keep_dims is deprecated, use keepdims instead', 'keep_dims')
+def norm(tensor,
+         ord='euclidean',
+         axis=None,
+         keepdims=None,
+         name=None,
          keep_dims=None):
   r"""Computes the norm of vectors, matrices, and tensors.
 
@@ -478,6 +482,7 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
     keepdims: If True, the axis indicated in `axis` are kept with size 1.
       Otherwise, the dimensions in `axis` are removed from the output shape.
     name: The name of the op.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     output: A `Tensor` of the same type as tensor, containing the vector or
@@ -500,11 +505,8 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
      higher order tensors.
   @end_compatibility
   """
-
-  if keep_dims is not None:
-    if keepdims is not None:
-      raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'")
-    keepdims = keep_dims
+  keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims,
+                                                    'keep_dims', keep_dims)
   if keepdims is None:
     keepdims = False
 
@@ -555,8 +557,8 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
       else:
         # General p-norms (positive p only)
         result = math_ops.pow(
-            math_ops.reduce_sum(
-                math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord)
+            math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True),
+            1.0 / ord)
     if not keepdims:
       result = array_ops.squeeze(result, axis)
     return result
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index d30f6b92ad..e04121ee31 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -792,9 +792,10 @@ def mean_cosine_distance(labels, predictions, dim, weights=None,
   predictions, labels, weights = _remove_squeezable_dimensions(
       predictions=predictions, labels=labels, weights=weights)
   radial_diffs = math_ops.multiply(predictions, labels)
-  radial_diffs = math_ops.reduce_sum(radial_diffs,
-                                     reduction_indices=[dim,],
-                                     keepdims=True)
+  radial_diffs = math_ops.reduce_sum(
+      radial_diffs, reduction_indices=[
+          dim,
+      ], keepdims=True)
   mean_distance, update_op = mean(radial_diffs, weights,
                                   None,
                                   None,
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index da037a7983..654eb1c118 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -333,6 +333,7 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
     epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
       divisor if `norm < sqrt(epsilon)`.
     name: A name for this operation (optional).
+    dim: Deprecated alias for axis.
 
   Returns:
     A `Tensor` with the same shape as `x`.
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 61fa462988..ec7b9372ca 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -23,7 +23,6 @@ import numbers
 import numpy as np
 
 from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_util
 from tensorflow.python.framework import ops
@@ -38,11 +37,10 @@ from tensorflow.python.ops import random_ops
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_nn_ops import *
 # pylint: enable=wildcard-import
-from tensorflow.python.util.deprecation import deprecated_args
-from tensorflow.python.util.deprecation import deprecated_argument_lookup
 
 from tensorflow.python.util import deprecation
 
+
 # Aliases for some automatically-generated names.
 local_response_normalization = gen_nn_ops.lrn
 
@@ -1648,7 +1646,7 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   return output
 
 
-@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def softmax(logits, axis=None, name=None, dim=None):
   """Computes softmax activations.
 
@@ -1662,6 +1660,7 @@ def softmax(logits, axis=None, name=None, dim=None):
     axis: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
+    dim: Deprecated alias for `axis`.
 
   Returns:
     A `Tensor`. Has the same type and shape as `logits`.
@@ -1670,13 +1669,13 @@ def softmax(logits, axis=None, name=None, dim=None):
     InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
       dimension of `logits`.
   """
-  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
+  axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim)
   if axis is None:
     axis = -1
   return _softmax(logits, gen_nn_ops._softmax, axis, name)
 
 
-@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def log_softmax(logits, axis=None, name=None, dim=None):
   """Computes log softmax activations.
 
@@ -1690,6 +1689,7 @@ def log_softmax(logits, axis=None, name=None, dim=None):
     axis: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
+    dim: Deprecated alias for `axis`.
 
   Returns:
     A `Tensor`. Has the same type as `logits`. Same shape as `logits`.
@@ -1698,7 +1698,7 @@ def log_softmax(logits, axis=None, name=None, dim=None):
     InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
       dimension of `logits`.
   """
-  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
+  axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim)
   if axis is None:
     axis = -1
   return _softmax(logits, gen_nn_ops._log_softmax, axis, name)
@@ -2316,13 +2316,14 @@ def conv1d(value, filters, stride, padding,
     return array_ops.squeeze(result, [spatial_start_dim])
 
 
-def conv1d_transpose(value,
-                     filter,
-                     output_shape,
-                     stride,
-                     padding="SAME",
-                     data_format="NWC",
-                     name=None):
+def conv1d_transpose(
+    value,
+    filter,  # pylint: disable=redefined-builtin
+    output_shape,
+    stride,
+    padding="SAME",
+    data_format="NWC",
+    name=None):
   """The transpose of `conv1d`.
 
   This operation is sometimes called "deconvolution" after [Deconvolutional
@@ -2357,8 +2358,8 @@ def conv1d_transpose(value,
                       [value, filter, output_shape]) as name:
     output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape")
     if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)):
-      raise ValueError("output_shape must have shape (3,), got {}"
-                       .format(output_shape_.get_shape()))
+      raise ValueError("output_shape must have shape (3,), got {}".format(
+          output_shape_.get_shape()))
 
     # The format could be either NWC or NCW, map to NHWC or NCHW
     if data_format is None or data_format == "NWC":
@@ -2380,7 +2381,8 @@ def conv1d_transpose(value,
       if not filter.get_shape()[1].is_compatible_with(output_shape[axis]):
         raise ValueError(
             "output_shape does not match filter's output channels, "
-            "{} != {}".format(output_shape[axis], filter.get_shape()[1]))
+            "{} != {}".format(output_shape[axis],
+                              filter.get_shape()[1]))
 
     if padding != "VALID" and padding != "SAME":
       raise ValueError("padding must be either VALID or SAME:"
@@ -2388,25 +2390,26 @@ def conv1d_transpose(value,
 
     # Reshape the input tensor to [batch, 1, in_width, in_channels]
     if data_format_2d == "NHWC":
-      output_shape_ = array_ops.concat([output_shape_[:1], [1],
-                                        output_shape_[1:]], axis=0)
+      output_shape_ = array_ops.concat(
+          [output_shape_[:1], [1], output_shape_[1:]], axis=0)
       spatial_start_dim = 1
       strides = [1, 1, stride, 1]
     else:
-      output_shape_ = array_ops.concat([output_shape_[:2], [1],
-                                        output_shape_[2:]], axis=0)
+      output_shape_ = array_ops.concat(
+          [output_shape_[:2], [1], output_shape_[2:]], axis=0)
       spatial_start_dim = 2
       strides = [1, 1, 1, stride]
     value = array_ops.expand_dims(value, spatial_start_dim)
     filter = array_ops.expand_dims(filter, 0)
 
-    result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_,
-                                              filter=filter,
-                                              out_backprop=value,
-                                              strides=strides,
-                                              padding=padding,
-                                              data_format=data_format_2d,
-                                              name=name)
+    result = gen_nn_ops.conv2d_backprop_input(
+        input_sizes=output_shape_,
+        filter=filter,
+        out_backprop=value,
+        strides=strides,
+        padding=padding,
+        data_format=data_format_2d,
+        name=name)
     return array_ops.squeeze(result, [spatial_start_dim])
 
 
diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc
index 29fd6d0e87..6fd0e69905 100644
--- a/tensorflow/stream_executor/dnn.cc
+++ b/tensorflow/stream_executor/dnn.cc
@@ -470,6 +470,7 @@ string ConvolutionDescriptor::ToShortString() const {
 PoolingDescriptor::PoolingDescriptor(int ndims)
     : mode_(dnn::PoolingMode::kMaximum),
       ndims_(ndims),
+      propagate_nans_(false),
       window_(ndims, 0),
       padding_(ndims, 0),
       strides_(ndims, 1),
diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl
index b470772fbf..87a70d8f95 100644
--- a/third_party/sycl/crosstool/trisycl.tpl
+++ b/third_party/sycl/crosstool/trisycl.tpl
@@ -11,10 +11,12 @@ CPU_C_COMPILER = ('%{host_c_compiler}')
 CURRENT_DIR = os.path.dirname(sys.argv[0])
 TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include'
 
+
 def main():
   compiler_flags = []
 
-  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions')
+  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable',
+                  '-Wignored-attributes', '-fno-exceptions')
   # remove -fsamotoze-coverage from string with g++
   if 'g++' in CPU_CXX_COMPILER:
     remove_flags += ('-fsanitize-coverage',)
@@ -22,52 +24,62 @@ def main():
   else:
     compiler_flags += ['-fopenmp=libomp']
 
-  compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)]
-
+  compiler_flags += [
+      flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)
+  ]
 
   output_file_index = compiler_flags.index('-o') + 1
   output_file_name = compiler_flags[output_file_index]
 
-  if(output_file_index == 1):
+  if (output_file_index == 1):
     # we are linking
-    return call([CPU_CXX_COMPILER] + compiler_flags +
-                ['-Wl,--no-undefined'])
+    return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined'])
 
   # find what we compile
   compiling_cpp = 0
-  if('-c' in compiler_flags):
-      compiled_file_index = compiler_flags.index('-c') + 1
-      compiled_file_name = compiler_flags[compiled_file_index]
-      if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP',
-                                      '.C', '.cxx'))):
-        compiling_cpp = 1;
-
-  debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic']
+  if ('-c' in compiler_flags):
+    compiled_file_index = compiler_flags.index('-c') + 1
+    compiled_file_name = compiler_flags[compiled_file_index]
+    if (compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C',
+                                     '.cxx'))):
+      compiling_cpp = 1
+
+  debug_flags = [
+      '-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL',
+      '-lpthread', '-lboost_log', '-g', '-rdynamic'
+  ]
 
   opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3']
 
-  compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1',
-                                     '-DEIGEN_HAS_C99_MATH',
-                                     '-DEIGEN_MAX_ALIGN_BYTES=16',
-                                     '-DTENSORFLOW_USE_SYCL'] + opt_flags
+  compiler_flags = compiler_flags + [
+      '-DEIGEN_USE_SYCL=1', '-DEIGEN_HAS_C99_MATH',
+      '-DEIGEN_MAX_ALIGN_BYTES=16', '-DTENSORFLOW_USE_SYCL'
+  ] + opt_flags
 
-  if(compiling_cpp == 1):
+  if (compiling_cpp == 1):
     # create a blacklist of folders that will be skipped when compiling
     # with triSYCL
-    skip_extensions = [".cu.cc"]
-    skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"]
+    skip_extensions = ['.cu.cc']
+    skip_folders = [
+        'tensorflow/compiler', 'tensorflow/docs_src', 'tensorflow/tensorboard',
+        'third_party', 'external', 'hexagon'
+    ]
     skip_folders = [(folder + '/') for folder in skip_folders]
     # if compiling external project skip triSYCL
-    if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders):
+    if any(
+        compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(
+            _folder in output_file_name for _folder in skip_folders):
       return call([CPU_CXX_COMPILER] + compiler_flags)
 
-    host_compiler_flags = ['-xc++', '-Wno-unused-variable',
-                           '-I', TRISYCL_INCLUDE_DIR] + compiler_flags
+    host_compiler_flags = [
+        '-xc++', '-Wno-unused-variable', '-I', TRISYCL_INCLUDE_DIR
+    ] + compiler_flags
     x = call([CPU_CXX_COMPILER] + host_compiler_flags)
     return x
   else:
     # compile for C
     return call([CPU_C_COMPILER] + compiler_flags)
 
+
 if __name__ == '__main__':
   sys.exit(main())
diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl
index a0c9e4e43a..5b9d0eb383 100644
--- a/third_party/sycl/sycl_configure.bzl
+++ b/third_party/sycl/sycl_configure.bzl
@@ -67,7 +67,6 @@ def find_computecpp_root(repository_ctx):
 
 def find_trisycl_include_dir(repository_ctx):
   """Find triSYCL include directory. """
-  sycl_name = ""
   if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ:
     sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip()
     if sycl_name.startswith("/"):
-- 
GitLab


From 0c98a7ecf88da45469cf00edc5cc4c0c82c7d49f Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 22 Nov 2017 00:15:55 -0800
Subject: [PATCH 0213/1225] Remove duplicate  propagate_nans_(false).

PiperOrigin-RevId: 176619303
---
 tensorflow/stream_executor/dnn.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc
index 6fd0e69905..44144a0613 100644
--- a/tensorflow/stream_executor/dnn.cc
+++ b/tensorflow/stream_executor/dnn.cc
@@ -473,8 +473,7 @@ PoolingDescriptor::PoolingDescriptor(int ndims)
       propagate_nans_(false),
       window_(ndims, 0),
       padding_(ndims, 0),
-      strides_(ndims, 1),
-      propagate_nans_(false) {}
+      strides_(ndims, 1) {}
 
 PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {}
 
-- 
GitLab


From ad7eeec1cc06d7fdba6ee404f03a35fab9cd3e6a Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 22 Nov 2017 00:33:29 -0800
Subject: [PATCH 0214/1225] Automated g4 rollback of changelist 176615737

PiperOrigin-RevId: 176621645
---
 configure.py                                  |  19 ++-
 tensorflow/compiler/aot/tfcompile.bzl         |   8 +-
 .../compiler/tests/fused_batchnorm_test.py    |   6 +-
 .../contrib/android/cmake/CMakeLists.txt      |   2 +-
 .../python/kernel_tests/cauchy_test.py        |  47 +++----
 .../distributions/python/ops/cauchy.py        |  18 ++-
 .../contrib/layers/python/layers/layers.py    |   1 +
 .../layers/python/layers/layers_test.py       |  28 ++--
 .../python/learn/learn_io/data_feeder.py      |   6 +-
 .../linear_optimizer/python/ops/sdca_ops.py   |   5 +-
 .../contrib/lite/testing/generate_examples.py |   3 +-
 tensorflow/contrib/opt/__init__.py            |  16 +--
 .../training/multitask_optimizer_wrapper.py   |  60 ++++----
 .../multitask_optimizer_wrapper_test.py       |  40 +++---
 .../python/kernel_tests/core_rnn_cell_test.py |  31 ++--
 .../rnn/python/kernel_tests/rnn_cell_test.py  |  63 +++++----
 tensorflow/contrib/rnn/python/ops/rnn_cell.py |  98 ++++++-------
 .../seq2seq/python/ops/attention_wrapper.py   |   7 +-
 tensorflow/contrib/verbs/rdma.cc              |  61 ++++----
 .../api_def/base_api/api_def_UniqueV2.pbtxt   |  47 -------
 .../base_api/api_def_UnsortedSegmentSum.pbtxt |   2 -
 tensorflow/core/graph/graph.h                 |   2 +-
 tensorflow/core/graph/graph_test.cc           |   8 +-
 tensorflow/core/kernels/BUILD                 |   1 -
 tensorflow/core/kernels/bincount_op.cc        |   5 +-
 tensorflow/core/kernels/bincount_op.h         |   2 +-
 tensorflow/core/kernels/bincount_op_gpu.cu.cc |   6 +-
 tensorflow/core/kernels/bincount_op_test.cc   |   4 +-
 .../core/kernels/bucketize_op_gpu.cu.cc       |   8 +-
 tensorflow/core/kernels/conv_grad_ops_3d.cc   |  24 ++--
 tensorflow/core/kernels/cwise_op_asinh.cc     |   2 +-
 tensorflow/core/kernels/cwise_op_atanh.cc     |   2 +-
 .../core/kernels/depthwise_conv_grad_op.cc    |   9 +-
 tensorflow/core/kernels/depthwise_conv_op.cc  |   5 -
 tensorflow/core/kernels/depthwise_conv_op.h   |   3 +-
 tensorflow/core/kernels/maxpooling_op.cc      |  14 +-
 .../core/kernels/maxpooling_op_gpu.cu.cc      |  16 +--
 tensorflow/core/kernels/mkl_tfconv_op.h       |  20 +--
 tensorflow/core/kernels/ops_util.h            |  13 --
 tensorflow/core/platform/posix/error.cc       |   4 +-
 tensorflow/core/platform/posix/port.cc        |   8 +-
 tensorflow/core/util/cuda_kernel_helper.h     |  12 --
 tensorflow/core/util/mkl_util.h               | 132 +++++++++---------
 tensorflow/core/util/mkl_util_test.cc         |   1 +
 .../test/java/org/tensorflow/ShapeTest.java   |   6 +-
 .../python/estimator/inputs/numpy_io.py       |  17 +--
 .../python/estimator/inputs/numpy_io_test.py  |  11 +-
 tensorflow/python/framework/test_util.py      |   7 +-
 .../python/kernel_tests/array_ops_test.py     |  11 +-
 .../python/kernel_tests/bincount_op_test.py   |   7 +-
 .../python/kernel_tests/constant_op_test.py   |  17 +--
 tensorflow/python/kernel_tests/conv1d_test.py |   2 +-
 .../python/kernel_tests/conv_ops_3d_test.py   | 120 +++++++++-------
 .../python/kernel_tests/pooling_ops_test.py   |   3 +-
 .../python/kernel_tests/reader_ops_test.py    |  15 +-
 .../segment_reduction_ops_test.py             |   2 +-
 .../python/kernel_tests/unique_op_test.py     |   1 -
 tensorflow/python/layers/normalization.py     |  51 ++++---
 .../python/layers/normalization_test.py       |  40 +++---
 tensorflow/python/ops/array_ops.py            |  19 ++-
 .../python/ops/distributions/multinomial.py   |  25 ++--
 tensorflow/python/ops/image_ops_impl.py       |   5 +-
 tensorflow/python/ops/linalg_ops.py           |  24 ++--
 tensorflow/python/ops/metrics_impl.py         |   7 +-
 tensorflow/python/ops/nn_impl.py              |   1 -
 tensorflow/python/ops/nn_ops.py               |  59 ++++----
 tensorflow/stream_executor/dnn.cc             |   1 -
 third_party/sycl/crosstool/trisycl.tpl        |  60 ++++----
 third_party/sycl/sycl_configure.bzl           |   1 +
 69 files changed, 646 insertions(+), 735 deletions(-)
 delete mode 100644 tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt

diff --git a/configure.py b/configure.py
index 1f205861f1..26da09bd94 100644
--- a/configure.py
+++ b/configure.py
@@ -883,28 +883,27 @@ def set_computecpp_toolkit_path(environ_cp):
   write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
                               computecpp_toolkit_path)
 
-
 def set_trisycl_include_dir(environ_cp):
-  """Set TRISYCL_INCLUDE_DIR."""
+  """Set TRISYCL_INCLUDE_DIR"""
   ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
                              'include directory. (Use --config=sycl_trisycl '
                              'when building with Bazel) '
-                             '[Default is %s]: ') % (
-                                 _DEFAULT_TRISYCL_INCLUDE_DIR)
+                             '[Default is %s]: '
+                             ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
   while True:
     trisycl_include_dir = get_from_env_or_user_or_default(
-        environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
-        _DEFAULT_TRISYCL_INCLUDE_DIR)
+      environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
+      _DEFAULT_TRISYCL_INCLUDE_DIR)
     if os.path.exists(trisycl_include_dir):
       break
 
-    print('Invalid triSYCL include directory, %s cannot be found' %
-          (trisycl_include_dir))
+    print('Invalid triSYCL include directory, %s cannot be found'
+          % (trisycl_include_dir))
 
   # Set TRISYCL_INCLUDE_DIR
   environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
-  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
-
+  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
+                              trisycl_include_dir)
 
 def set_mpi_home(environ_cp):
   """Set MPI_HOME."""
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index 6c385af3b3..b795afd5b8 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -119,7 +119,7 @@ def tf_library(name, graph, config,
             out_nodes_file,
         ] + freeze_saver_srcs,
         outs=[freeze_file],
-        cmd=("$(location @org_tensorflow//tensorflow/python/tools:freeze_graph)" +
+        cmd=("$(location //tensorflow/python/tools:freeze_graph)" +
              freeze_args),
         tools=["@org_tensorflow//tensorflow/python/tools:freeze_graph"],
         tags=tags,
@@ -152,7 +152,7 @@ def tf_library(name, graph, config,
            " --target_triple=" + target_llvm_triple() +
            " --out_header=$(@D)/" + header_file +
            " --out_object=$(@D)/" + object_file +
-           " " + flags),
+           flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -189,7 +189,7 @@ def tf_library(name, graph, config,
            " --cpp_class=" + cpp_class +
            " --target_triple=" + target_llvm_triple() +
            " --out_session_module=$(@D)/" + session_module_pb +
-           " " + flags),
+           flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -256,7 +256,7 @@ def tf_library(name, graph, config,
         ],
         outs=[test_file],
         cmd=("sed " + sed_replace +
-             " $(location @org_tensorflow//tensorflow/compiler/aot:test.cc) " +
+             " $(location //tensorflow/compiler/aot:test.cc) " +
              "> $(OUTS)"),
         tags=tags,
     )
diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py
index 00a9c9a65b..a773b5a947 100644
--- a/tensorflow/compiler/tests/fused_batchnorm_test.py
+++ b/tensorflow/compiler/tests/fused_batchnorm_test.py
@@ -76,8 +76,7 @@ class FusedBatchNormTest(XLATestCase):
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
       scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
-      offset = array_ops.placeholder(
-          np.float32, shape=scale_shape, name="offset")
+      offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset")
       epsilon = 0.001
       y_ref, mean_ref, var_ref = self._reference_training(
           x_val, scale_val, offset_val, epsilon, data_format)
@@ -113,8 +112,7 @@ class FusedBatchNormTest(XLATestCase):
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
       scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
-      offset = array_ops.placeholder(
-          np.float32, shape=scale_shape, name="offset")
+      offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset")
       epsilon = 0.001
       y, mean, var = nn.fused_batch_norm(
           t_val,
diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt
index aba356d616..25ada5ba27 100644
--- a/tensorflow/contrib/android/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/android/cmake/CMakeLists.txt
@@ -37,7 +37,7 @@ set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \
                      -std=c++11 -fno-rtti -fno-exceptions \
                      -O2 -Wno-narrowing -fomit-frame-pointer \
-                     -mfpu=neon -mfloat-abi=softfp -fPIE -fPIC \
+                     -mfpu=neon -mfloat-abi=softfp -fPIE \
                      -ftemplate-depth=900 \
                      -DGOOGLE_PROTOBUF_NO_RTTI \
                      -DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER")
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
index 73747db31c..7f7697357c 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
@@ -41,7 +41,6 @@ def try_import(name):  # pylint: disable=invalid-name
     tf_logging.warning("Could not import %s: %s" % (name, str(e)))
   return module
 
-
 stats = try_import("scipy.stats")
 
 
@@ -63,9 +62,9 @@ class CauchyTest(test.TestCase):
       self.assertAllEqual(expected, scale_shape.eval())
       loc = array_ops.zeros(loc_shape)
       scale = array_ops.ones(scale_shape)
-      self.assertAllEqual(expected,
-                          array_ops.shape(
-                              cauchy_lib.Cauchy(loc, scale).sample()).eval())
+      self.assertAllEqual(
+          expected,
+          array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval())
 
   def _testParamStaticShapes(self, sample_shape, expected):
     param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape)
@@ -93,7 +92,8 @@ class CauchyTest(test.TestCase):
       cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
 
       log_pdf = cauchy.log_prob(x)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          log_pdf.shape)
       self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
                           log_pdf.eval().shape)
       self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
@@ -115,15 +115,16 @@ class CauchyTest(test.TestCase):
     with self.test_session():
       batch_size = 6
       loc = constant_op.constant([[3.0, -3.0]] * batch_size)
-      scale = constant_op.constant(
-          [[np.sqrt(10.0), np.sqrt(15.0)]] * batch_size)
+      scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] *
+                                   batch_size)
       x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T
       cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
 
       log_pdf = cauchy.log_prob(x)
       log_pdf_values = log_pdf.eval()
       self.assertEqual(log_pdf.shape, (6, 2))
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          log_pdf.shape)
       self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
                           log_pdf.eval().shape)
       self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
@@ -247,7 +248,8 @@ class CauchyTest(test.TestCase):
       cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
 
       entropy = cauchy.entropy()
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          entropy.shape)
       self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
                           entropy.eval().shape)
       self.assertAllEqual(cauchy.batch_shape, entropy.shape)
@@ -255,7 +257,7 @@ class CauchyTest(test.TestCase):
 
       if not stats:
         return
-      expected_entropy = stats.cauchy(loc, scale[0]).entropy().reshape((1, 3))
+      expected_entropy = stats.cauchy(loc, scale).entropy()
       self.assertAllClose(expected_entropy, entropy.eval())
 
   def testCauchyMode(self):
@@ -366,8 +368,8 @@ class CauchyTest(test.TestCase):
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
 
-      expected_shape = (
-          tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape))
+      expected_shape = (tensor_shape.TensorShape(
+          [n.eval()]).concatenate(cauchy.batch_shape))
 
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
@@ -383,18 +385,18 @@ class CauchyTest(test.TestCase):
       samples = cauchy.sample(n)
       sample_values = samples.eval()
       self.assertEqual(samples.shape, (100000, batch_size, 2))
-      self.assertAllClose(
-          np.median(sample_values[:, 0, 0]), loc_v[0], atol=1e-1)
-      self.assertAllClose(
-          np.median(sample_values[:, 0, 1]), loc_v[1], atol=1e-1)
+      self.assertAllClose(np.median(sample_values[:, 0, 0]),
+                          loc_v[0], atol=1e-1)
+      self.assertAllClose(np.median(sample_values[:, 0, 1]),
+                          loc_v[1], atol=1e-1)
 
       expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
           tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval()))
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
 
-      expected_shape = (
-          tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape))
+      expected_shape = (tensor_shape.TensorShape(
+          [n.eval()]).concatenate(cauchy.batch_shape))
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
 
@@ -426,12 +428,9 @@ class CauchyTest(test.TestCase):
       self.assertEqual(cauchy.event_shape, ())
       self.assertAllEqual(cauchy.event_shape_tensor().eval(), [])
       self.assertAllEqual(
-          sess.run(
-              cauchy.batch_shape_tensor(),
-              feed_dict={
-                  loc: 5.0,
-                  scale: [1.0, 2.0]
-              }), [2])
+          sess.run(cauchy.batch_shape_tensor(),
+                   feed_dict={loc: 5.0,
+                              scale: [1.0, 2.0]}), [2])
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py
index 8d59c1abfb..a17bb091f6 100644
--- a/tensorflow/contrib/distributions/python/ops/cauchy.py
+++ b/tensorflow/contrib/distributions/python/ops/cauchy.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 
+
 __all__ = [
     "Cauchy",
 ]
@@ -96,7 +97,7 @@ class Cauchy(distribution.Distribution):
                validate_args=False,
                allow_nan_stats=True,
                name="Cauchy"):
-    """Construct Cauchy distributions.
+    """Construct Cauchy distributions with loc and and scale `loc` and `scale`.
 
     The parameters `loc` and `scale` must be shaped in a way that supports
     broadcasting (e.g. `loc + scale` is a valid operation).
@@ -120,8 +121,8 @@ class Cauchy(distribution.Distribution):
     """
     parameters = locals()
     with ops.name_scope(name, values=[loc, scale]):
-      with ops.control_dependencies([check_ops.assert_positive(scale)]
-                                    if validate_args else []):
+      with ops.control_dependencies([check_ops.assert_positive(scale)] if
+                                    validate_args else []):
         self._loc = array_ops.identity(loc, name="loc")
         self._scale = array_ops.identity(scale, name="scale")
         check_ops.assert_same_float_dtype([self._loc, self._scale])
@@ -137,8 +138,8 @@ class Cauchy(distribution.Distribution):
   @staticmethod
   def _param_shapes(sample_shape):
     return dict(
-        zip(("loc", "scale"),
-            ([ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)] * 2)))
+        zip(("loc", "scale"), ([ops.convert_to_tensor(
+            sample_shape, dtype=dtypes.int32)] * 2)))
 
   @property
   def loc(self):
@@ -152,10 +153,13 @@ class Cauchy(distribution.Distribution):
 
   def _batch_shape_tensor(self):
     return array_ops.broadcast_dynamic_shape(
-        array_ops.shape(self.loc), array_ops.shape(self.scale))
+        array_ops.shape(self.loc),
+        array_ops.shape(self.scale))
 
   def _batch_shape(self):
-    return array_ops.broadcast_static_shape(self.loc.shape, self.scale.shape)
+    return array_ops.broadcast_static_shape(
+        self.loc.shape,
+        self.scale.shape)
 
   def _event_shape_tensor(self):
     return constant_op.constant([], dtype=dtypes.int32)
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index f1debc8590..9378fe8799 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -309,6 +309,7 @@ def _fused_batch_norm(inputs,
         new_shape = [-1, channels, 1, 1]
       inputs = array_ops.reshape(inputs, new_shape)
     inputs_shape = inputs.get_shape()
+    dtype = inputs.dtype.base_dtype
     if data_format == DATA_FORMAT_NHWC:
       params_shape = inputs_shape[-1:]
     else:
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 27bd3172d6..5aa2253516 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1779,8 +1779,7 @@ class BatchNormTest(test.TestCase):
       dtype = dtypes.float32
     height, width = 3, 3
     with self.test_session():
-      images = np.random.uniform(size=(5, height, width, 3)).astype(
-          dtype.as_numpy_dtype)
+      images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype)
       output = _layers.batch_norm(images, fused=fused)
       expected_name = ('BatchNorm/FusedBatchNorm' if fused else
                        'BatchNorm/batchnorm')
@@ -2666,18 +2665,18 @@ class BatchNormTest(test.TestCase):
     # Test case for 11673
     with self.test_session() as sess:
       a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10))
-      _layers.batch_norm(
-          a_32, center=False, data_format='NCHW', zero_debias_moving_mean=True)
+      b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW',
+                                zero_debias_moving_mean=True)
       a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10))
-      _layers.batch_norm(
-          a_16, center=False, data_format='NCHW', zero_debias_moving_mean=True)
+      b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW',
+                                zero_debias_moving_mean=True)
       sess.run(variables_lib.global_variables_initializer())
 
   def testVariablesAreFloat32(self):
     height, width = 3, 3
     with self.test_session():
-      images = random_ops.random_uniform(
-          (5, height, width, 3), seed=1, dtype=dtypes.float16)
+      images = random_ops.random_uniform((5, height, width, 3),
+                                         seed=1, dtype=dtypes.float16)
       _layers.batch_norm(images, scale=True)
       beta = variables.get_variables_by_name('beta')[0]
       gamma = variables.get_variables_by_name('gamma')[0]
@@ -2692,13 +2691,17 @@ class BatchNormTest(test.TestCase):
     channels = shape[1]
     images = np.arange(np.product(shape), dtype=dtype).reshape(shape)
     beta = init_ops.constant_initializer(
-        np.arange(2, channels + 2, dtype=np.float32))
+        np.arange(
+            2, channels + 2, dtype=np.float32))
     gamma = init_ops.constant_initializer(
-        np.arange(10, channels + 10, dtype=np.float32) * 2.0)
+        np.arange(
+            10, channels + 10, dtype=np.float32) * 2.0)
     mean = init_ops.constant_initializer(
-        np.arange(3, channels + 3, dtype=np.float32) * 5.0)
+        np.arange(
+            3, channels + 3, dtype=np.float32) * 5.0)
     variance = init_ops.constant_initializer(
-        np.arange(1, channels + 1, dtype=np.float32) * 4.0)
+        np.arange(
+            1, channels + 1, dtype=np.float32) * 4.0)
     output = _layers.batch_norm(
         images,
         fused=True,
@@ -2723,6 +2726,7 @@ class BatchNormTest(test.TestCase):
       res_16 = self._runFusedBatchNorm(shape, np.float16)
       self.assertAllClose(res_32, res_16, rtol=1e-3)
 
+
   def testAdjustmentCreated(self):
     # Tests that the adjustment is appropriately passed to and used by the core
     # BN layer.
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
index 86fad4c553..db18ebf05d 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
@@ -28,6 +28,7 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging
@@ -368,11 +369,10 @@ class DataFeeder(object):
     if x_is_dict:
       num_samples = list(self._x.values())[0].shape[0]
     elif tensor_util.is_tensor(self._x):
-      num_samples = self._x.shape[
-          0].value  # shape will be a Dimension, extract an int
+      num_samples = self._x.shape[0].value  # shape will be a Dimension, extract an int
     else:
       num_samples = self._x.shape[0]
-
+      
     if self._shuffle:
       self.indices = self.random_state.permutation(num_samples)
     else:
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index 7526f3ae0d..86d8484391 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -251,9 +251,8 @@ class SdcaModel(object):
 
       result_dense = 0.0
       for i in range(len(dense_variables)):
-        result_dense += math_ops.matmul(dense_features[i],
-                                        array_ops.expand_dims(
-                                            dense_variables[i], -1))
+        result_dense += math_ops.matmul(
+            dense_features[i], array_ops.expand_dims(dense_variables[i], -1))
 
     # Reshaping to allow shape inference at graph construction time.
     return array_ops.reshape(result_dense, [-1]) + result_sparse
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 5bca82ded0..b122818221 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -40,7 +40,6 @@ from six import StringIO
 # TODO(aselle): Disable GPU for now
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 
-# pylint: disable=g-import-not-at-top
 import tensorflow as tf
 from google.protobuf import text_format
 # TODO(aselle): switch to TensorFlow's resource_loader
@@ -384,7 +383,7 @@ def make_zip_of_tests(zip_path,
         report["toco_log"] = ""
         tf.reset_default_graph()
 
-        with tf.device("/cpu:0"):
+        with tf.device('/cpu:0'):
           try:
             inputs, outputs = make_graph(param_dict_real)
           except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError,
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 04643a6058..4c60c99342 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -34,18 +34,12 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 
 _allowed_symbols = [
-    'PowerSignOptimizer',
-    'AddSignOptimizer'
+    'PowerSignOptimizer', 'AddSignOptimizer'
     'DelayCompensatedGradientDescentOptimizer',
-    'DropStaleGradientOptimizer',
-    'ExternalOptimizerInterface',
-    'LazyAdamOptimizer',
-    'NadamOptimizer',
-    'MovingAverageOptimizer',
-    'ScipyOptimizerInterface',
-    'VariableClippingOptimizer',
-    'MultitaskOptimizerWrapper',
-    'clip_gradients_by_global_norm',
+    'DropStaleGradientOptimizer', 'ExternalOptimizerInterface',
+    'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer',
+    'ScipyOptimizerInterface', 'VariableClippingOptimizer',
+    'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm',
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
index cb6c77a86f..c26037935d 100644
--- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
+++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""An optimizer wrapper for stateful optimizers with multitask loss."""
+
+"""An optimizer wrapper that ensures correct behaviour
+of stateful optimizers with multitask loss."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -28,27 +30,26 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import optimizer
 
-__all__ = ['MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm']
-
+__all__ = ["MultitaskOptimizerWrapper",
+           "clip_gradients_by_global_norm"]
 
 def _is_all_zeros(grad):
   all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0)
   return all_zeros
 
-
 def _get_wrapper(fn, opt):
-
   def wrapper(self, grad, *args, **kwargs):  # pylint: disable=unused-argument
     all_zeros = _is_all_zeros(grad)
-    return control_flow_ops.cond(all_zeros, control_flow_ops.no_op,
-                                 lambda: fn(grad, *args, **kwargs))
-
+    return control_flow_ops.cond(
+        all_zeros,
+        control_flow_ops.no_op,
+        lambda: fn(grad, *args, **kwargs))
   wrapper = types.MethodType(wrapper, opt)
   return wrapper
 
-
 class MultitaskOptimizerWrapper(object):
-  """Optimizer wrapper making all-zero gradients harmless.
+  """Optimizer wrapper that ensures that
+  all-zero gradients don't affect the optimizer state.
 
   This might be useful when a multi-task loss is used,
   and some components of the loss might be
@@ -87,20 +88,20 @@ class MultitaskOptimizerWrapper(object):
     gradvars_clipped, global_step=batch)
   ```
   """
-
   def __init__(self, opt):
-    """Constructor.
-
+    """
     Args:
-      opt: an instance of a class that implements tf.train.Optimizer.
+    opt: an instance of a class that implements tf.train.Optimizer.
     """
     if not isinstance(opt, optimizer.Optimizer):
       raise TypeError(
-          'Supplied optimizer must be an instance of tf.train.Optimizer')
+          "Supplied optimizer must be an instance of tf.train.Optimizer")
     self._opt = opt
-    overridden_methods = ('_apply_dense', '_resource_apply_dense',
-                          '_apply_sparse', '_resource_apply_sparse')
-    for name in overridden_methods:
+    overriden_methods = ('_apply_dense',
+                         '_resource_apply_dense',
+                         '_apply_sparse',
+                         '_resource_apply_sparse')
+    for name in overriden_methods:
       fn = getattr(self._opt, name)
       wrapper = _get_wrapper(fn, self._opt)
       setattr(self._opt, name, wrapper)
@@ -111,30 +112,27 @@ class MultitaskOptimizerWrapper(object):
 
 def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.):
   """Clips gradients of a multitask loss by their global norm.
-
   Ignores all-zero tensors when computing the global norm.
 
   Args:
-    gradients_variables: a list of pairs (gradient, variable).
-    clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.
+  gradients_variables: a list of pairs (gradient, variable).
+  clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.
 
   Returns:
-    list: A list of pairs of the same type as gradients_variables,.
-    fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
+  list: A list of pairs of the same type as gradients_variables,.
+  fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
   """
   gradients, variables = six.moves.zip(*gradients_variables)
-
   def _replace_nonexisting_grad(grad):
     if grad is None:
       return grad
     all_zeros = _is_all_zeros(grad)
-    return control_flow_ops.cond(
-        all_zeros,
-        lambda: array_ops.zeros([], dtype=dtypes.as_dtype(grad.dtype)),
-        lambda: grad)
-
+    return control_flow_ops.cond(all_zeros,
+                                 lambda: array_ops.zeros(
+                                     [], dtype=dtypes.as_dtype(grad.dtype)),
+                                 lambda: grad)
   nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients]
   fixed_global_norm = clip_ops.global_norm(nonzero_gradients)
-  gradients, _ = clip_ops.clip_by_global_norm(
-      gradients, clip_norm, use_norm=fixed_global_norm)
+  gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm,
+                                              use_norm=fixed_global_norm)
   return list(six.moves.zip(gradients, variables)), fixed_global_norm
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
index 618d8eb18d..b06213f715 100644
--- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
+++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
@@ -18,9 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-import six
-
 from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -28,11 +25,13 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.training import momentum
 
+import numpy as np
+import six
 
 class MultitaskOptimizerWrapperTest(test.TestCase):
-  """Tests for the multitask optimizer wrapper.
   """
-
+  Tests for the multitask optimizer wrapper.
+  """
   def testWrapper(self):
     with self.test_session():
       var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32)
@@ -40,10 +39,12 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32)
       grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32)
       grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32)
-      mom_opt_impl = momentum.MomentumOptimizer(learning_rate=2.0, momentum=0.9)
+      mom_opt_impl = momentum.MomentumOptimizer(
+          learning_rate=2.0, momentum=0.9)
       mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper(
           mom_opt_impl)
-      mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      mom_update = mom_opt.apply_gradients(
+          zip([grads0, grads1], [var0, var1]))
       mom_update_partial = mom_opt.apply_gradients(
           zip([grads_allzero, grads1], [var0, var1]))
       mom_update_no_action = mom_opt.apply_gradients(
@@ -62,13 +63,14 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       # Step 1: normal momentum update.
       self.evaluate(mom_update)
       # Check that the momentum accumulators have been updated.
-      self.assertAllCloseAccordingToType(
-          np.array([0.1, 0.1]), self.evaluate(slot0))
-      self.assertAllCloseAccordingToType(
-          np.array([0.01, 0.01]), self.evaluate(slot1))
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
+                                         self.evaluate(slot1))
       # Check that the parameters have been updated.
       self.assertAllCloseAccordingToType(
-          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0))
+          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
+          self.evaluate(var0))
       self.assertAllCloseAccordingToType(
           np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
           self.evaluate(var1))
@@ -76,8 +78,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       # Step 2: momentum update that changes only slot1 but not slot0.
       self.evaluate(mom_update_partial)
       # Check that only the relevant momentum accumulator has been updated.
-      self.assertAllCloseAccordingToType(
-          np.array([0.1, 0.1]), self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
       self.assertAllCloseAccordingToType(
           np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
           self.evaluate(slot1))
@@ -85,8 +87,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       # Step 3: momentum update that does not change anything.
       self.evaluate(mom_update_no_action)
       # Check that the momentum accumulators have *NOT* been updated.
-      self.assertAllCloseAccordingToType(
-          np.array([0.1, 0.1]), self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
       self.assertAllCloseAccordingToType(
           np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
           self.evaluate(slot1))
@@ -103,9 +105,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       grads3 = None
       varlist = [var0, var1, var2, var3]
       gradients = [grads0, grads1, grads2, grads3]
-      clipped_gradvars, global_norm = (
-          multitask_optimizer_wrapper.clip_gradients_by_global_norm(
-              six.moves.zip(gradients, varlist), clip_norm=1.0))
+      clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm(
+          six.moves.zip(gradients, varlist), clip_norm=1.0)
       clipped_grads = list(six.moves.zip(*clipped_gradvars))[0]
       reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0])))
       self.assertAllCloseAccordingToType(
@@ -114,6 +115,5 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
           self.evaluate(clipped_grads[2]), np.array([0., 0.]))
       self.assertEqual(clipped_grads[3], None)
 
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index f130a2187c..16b6d145e3 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -24,7 +24,6 @@ import numpy as np
 
 from tensorflow.contrib import rnn as contrib_rnn
 from tensorflow.contrib.rnn.python.ops import core_rnn_cell
-from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -39,6 +38,9 @@ from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
+from tensorflow.python.framework import test_util
+from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
+
 
 
 # pylint: enable=protected-access
@@ -372,20 +374,19 @@ class RNNCellTest(test.TestCase):
         h = array_ops.zeros([batch_size, num_proj])
         state = rnn_cell_impl.LSTMStateTuple(c, h)
         cell = contrib_rnn_cell.LayerNormLSTMCell(
-            num_units=num_units,
-            num_proj=num_proj,
-            forget_bias=1.0,
-            layer_norm=True,
-            norm_gain=1.0,
-            norm_shift=0.0)
+          num_units=num_units,
+          num_proj=num_proj,
+          forget_bias=1.0,
+          layer_norm=True,
+          norm_gain=1.0,
+          norm_shift=0.0)
         g, out_m = cell(x, state)
         sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run(
-            [g, out_m], {
-                x.name: np.ones((batch_size, input_size)),
-                c.name: 0.1 * np.ones((batch_size, num_units)),
-                h.name: 0.1 * np.ones((batch_size, num_proj))
-            })
+        res = sess.run([g, out_m], {
+          x.name: np.ones((batch_size, input_size)),
+          c.name: 0.1 * np.ones((batch_size, num_units)),
+          h.name: 0.1 * np.ones((batch_size, num_proj))
+        })
         self.assertEqual(len(res), 2)
         # The numbers in results were not calculated, this is mostly just a
         # smoke test.
@@ -395,9 +396,9 @@ class RNNCellTest(test.TestCase):
         # Different inputs so different outputs and states
         for i in range(1, batch_size):
           self.assertTrue(
-              float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
+            float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
           self.assertTrue(
-              float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
+            float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
 
   def testOutputProjectionWrapper(self):
     with self.test_session() as sess:
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index 46823fa364..b4a5f2d7eb 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -996,19 +996,26 @@ class RNNCellTest(test.TestCase):
         output, state = cell(x, hidden)
 
         sess.run([variables.global_variables_initializer()])
-        res = sess.run(
-            [output, state], {
-                hidden[0].name:
-                    np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[
-                        1.
-                    ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]],
-                                 [[[2.], [2.]], [[2.], [2.]]]]]),
-                x.name:
-                    np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[
-                        1.
-                    ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], [[[2.], [2.]],
-                                                                [[2.], [2.]]]]])
-            })
+        res = sess.run([output, state], {
+            hidden[0].name:
+                np.array([[[[[1.],[1.]], 
+                            [[1.],[1.]]],
+                           [[[1.],[1.]],
+                            [[1.],[1.]]]], 
+                          [[[[2.],[2.]],
+                            [[2.],[2.]]],
+                           [[[2.],[2.]],
+                            [[2.],[2.]]]]]),
+            x.name:
+                np.array([[[[[1.],[1.]],
+                            [[1.],[1.]]],
+                           [[[1.],[1.]],
+                            [[1.],[1.]]]],
+                          [[[[2.],[2.]],
+                            [[2.],[2.]]],
+                           [[[2.],[2.]],
+                           [[2.],[2.]]]]])
+        })
         # This is a smoke test, making sure expected values are unchanged.
         self.assertEqual(len(res), 2)
         self.assertAllClose(res[0], res[1].h)
@@ -1269,8 +1276,10 @@ class LayerNormBasicLSTMCellTest(test.TestCase):
         self.assertAllClose(res[2].c, expected_c1, 1e-5)
         self.assertAllClose(res[2].h, expected_h1, 1e-5)
 
+
   def testBasicLSTMCellWithStateTupleLayerNorm(self):
-    """The results of LSTMCell and LayerNormBasicLSTMCell should be the same."""
+    """The results of LSTMCell and LayerNormBasicLSTMCell 
+    should be same. """
     with self.test_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
@@ -1281,21 +1290,21 @@ class LayerNormBasicLSTMCellTest(test.TestCase):
         c1 = array_ops.zeros([1, 2])
         h1 = array_ops.zeros([1, 2])
         state1 = rnn_cell_impl.LSTMStateTuple(c1, h1)
-        cell = rnn_cell_impl.MultiRNNCell([
-            contrib_rnn_cell.LayerNormLSTMCell(
-                2, layer_norm=True, norm_gain=1.0, norm_shift=0.0)
-            for _ in range(2)
-        ])
+        cell = rnn_cell_impl.MultiRNNCell(
+          [contrib_rnn_cell.LayerNormLSTMCell(
+              2,
+              layer_norm=True,
+              norm_gain=1.0,
+              norm_shift=0.0) for _ in range(2)])
         h, (s0, s1) = cell(x, (state0, state1))
         sess.run([variables.global_variables_initializer()])
-        res = sess.run(
-            [h, s0, s1], {
-                x.name: np.array([[1., 1.]]),
-                c0.name: 0.1 * np.asarray([[0, 1]]),
-                h0.name: 0.1 * np.asarray([[2, 3]]),
-                c1.name: 0.1 * np.asarray([[4, 5]]),
-                h1.name: 0.1 * np.asarray([[6, 7]]),
-            })
+        res = sess.run([h, s0, s1], {
+          x.name: np.array([[1., 1.]]),
+          c0.name: 0.1 * np.asarray([[0, 1]]),
+          h0.name: 0.1 * np.asarray([[2, 3]]),
+          c1.name: 0.1 * np.asarray([[4, 5]]),
+          h1.name: 0.1 * np.asarray([[6, 7]]),
+        })
 
         expected_h = np.array([[-0.38079708, 0.38079708]])
         expected_h0 = np.array([[-0.38079708, 0.38079708]])
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 0698d40438..5e85c125df 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope as vs
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
@@ -114,7 +115,7 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
 
   The class uses optional peep-hole connections, and an optional projection
   layer.
-
+  
   Layer normalization implementation is based on:
 
     https://arxiv.org/abs/1607.06450.
@@ -123,24 +124,15 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
   Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
 
   and is applied before the internal nonlinearities.
-
+  
   """
 
-  def __init__(self,
-               num_units,
-               use_peepholes=False,
-               initializer=None,
-               num_proj=None,
-               proj_clip=None,
-               num_unit_shards=1,
-               num_proj_shards=1,
-               forget_bias=1.0,
-               state_is_tuple=True,
-               activation=math_ops.tanh,
-               reuse=None,
-               layer_norm=False,
-               norm_gain=1.0,
-               norm_shift=0.0):
+  def __init__(self, num_units, use_peepholes=False,
+               initializer=None, num_proj=None, proj_clip=None,
+               num_unit_shards=1, num_proj_shards=1,
+               forget_bias=1.0, state_is_tuple=True,
+               activation=math_ops.tanh, reuse=None,
+               layer_norm=False, norm_gain=1.0, norm_shift=0.0):
     """Initialize the parameters for an LSTM cell.
 
     Args:
@@ -172,6 +164,8 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
         `layer_norm` has been set to `False`, this argument will be ignored.
       norm_shift: float, The layer normalization shift initial value. If
         `layer_norm` has been set to `False`, this argument will be ignored.
+        
+        
     """
     super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse)
     if not state_is_tuple:
@@ -2055,8 +2049,8 @@ class ConvLSTMCell(rnn_cell_impl.RNNCell):
     if self._skip_connection:
       self._total_output_channels += self._input_shape[-1]
 
-    state_size = tensor_shape.TensorShape(
-        self._input_shape[:-1] + [self._output_channels])
+    state_size = tensor_shape.TensorShape(self._input_shape[:-1] 
+                                          + [self._output_channels])
     self._state_size = rnn_cell_impl.LSTMStateTuple(state_size, state_size)
     self._output_size = tensor_shape.TensorShape(self._input_shape[:-1]
                                                  + [self._total_output_channels])
@@ -2116,8 +2110,11 @@ class Conv3DLSTMCell(ConvLSTMCell):
     """Construct Conv3DLSTM. See `ConvLSTMCell` for more details."""
     super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs)
 
-
-def _conv(args, filter_size, num_features, bias, bias_start=0.0):
+def _conv(args, 
+          filter_size,
+          num_features,
+          bias,
+          bias_start=0.0):
   """convolution:
   Args:
     args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, 
@@ -2394,19 +2391,12 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
 
   """
 
-  def __init__(self,
-               num_units,
-               use_peepholes=False,
-               cell_clip=None,
-               initializer=None,
-               num_proj=None,
-               proj_clip=None,
+  def __init__(self, num_units,
+               use_peepholes=False, cell_clip=None,
+               initializer=None, num_proj=None, proj_clip=None,
                forget_bias=1.0,
-               activation=None,
-               layer_norm=False,
-               norm_gain=1.0,
-               norm_shift=0.0,
-               reuse=None):
+               activation=None, layer_norm=False,
+               norm_gain=1.0, norm_shift=0.0, reuse=None):
     """Initialize the parameters for an LSTM cell.
 
     Args:
@@ -2467,6 +2457,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
   def output_size(self):
     return self._output_size
 
+
   def _linear(self,
               args,
               output_size,
@@ -2516,9 +2507,9 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
     scope = vs.get_variable_scope()
     with vs.variable_scope(scope) as outer_scope:
       weights = vs.get_variable(
-          "kernel", [total_arg_size, output_size],
-          dtype=dtype,
-          initializer=kernel_initializer)
+        "kernel", [total_arg_size, output_size],
+        dtype=dtype,
+        initializer=kernel_initializer)
       if len(args) == 1:
         res = math_ops.matmul(args[0], weights)
       else:
@@ -2530,7 +2521,9 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
         if bias_initializer is None:
           bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
         biases = vs.get_variable(
-            "bias", [output_size], dtype=dtype, initializer=bias_initializer)
+          "bias", [output_size],
+          dtype=dtype,
+          initializer=bias_initializer)
 
     if not layer_norm:
       res = nn_ops.bias_add(res, biases)
@@ -2561,6 +2554,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
       ValueError: If input size cannot be inferred from inputs via
         static shape inference.
     """
+    num_proj = self._num_units if self._num_proj is None else self._num_proj
     sigmoid = math_ops.sigmoid
 
     (c_prev, m_prev) = state
@@ -2573,14 +2567,10 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
     with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
 
       # i = input_gate, j = new_input, f = forget_gate, o = output_gate
-      lstm_matrix = self._linear(
-          [inputs, m_prev],
-          4 * self._num_units,
-          bias=True,
-          bias_initializer=None,
-          layer_norm=self._layer_norm)
+      lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True,
+                            bias_initializer=None, layer_norm=self._layer_norm)
       i, j, f, o = array_ops.split(
-          value=lstm_matrix, num_or_size_splits=4, axis=1)
+        value=lstm_matrix, num_or_size_splits=4, axis=1)
 
       if self._layer_norm:
         i = _norm(self._norm_gain, self._norm_shift, i, "input")
@@ -2590,22 +2580,20 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
 
       # Diagonal connections
       if self._use_peepholes:
-        with vs.variable_scope(unit_scope):
+        with vs.variable_scope(unit_scope) as projection_scope:
           w_f_diag = vs.get_variable(
-              "w_f_diag", shape=[self._num_units], dtype=dtype)
+            "w_f_diag", shape=[self._num_units], dtype=dtype)
           w_i_diag = vs.get_variable(
-              "w_i_diag", shape=[self._num_units], dtype=dtype)
+            "w_i_diag", shape=[self._num_units], dtype=dtype)
           w_o_diag = vs.get_variable(
-              "w_o_diag", shape=[self._num_units], dtype=dtype)
+            "w_o_diag", shape=[self._num_units], dtype=dtype)
 
       if self._use_peepholes:
-        c = (
-            sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
-            sigmoid(i + w_i_diag * c_prev) * self._activation(j))
+        c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
+             sigmoid(i + w_i_diag * c_prev) * self._activation(j))
       else:
-        c = (
-            sigmoid(f + self._forget_bias) * c_prev +
-            sigmoid(i) * self._activation(j))
+        c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
+             self._activation(j))
 
       if self._layer_norm:
         c = _norm(self._norm_gain, self._norm_shift, c, "state")
@@ -2620,7 +2608,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
         m = sigmoid(o) * self._activation(c)
 
       if self._num_proj is not None:
-        with vs.variable_scope("projection"):
+        with vs.variable_scope("projection") as proj_scope:
           m = self._linear(m, self._num_proj, bias=False)
 
         if self._proj_clip is not None:
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index e87ef41388..c3b180d9f4 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
@@ -192,8 +192,7 @@ class _BaseAttentionMechanism(AttentionMechanism):
       raise TypeError("probability_fn must be callable, saw type: %s" %
                       type(probability_fn).__name__)
     if score_mask_value is None:
-      score_mask_value = dtypes.as_dtype(
-          self._memory_layer.dtype).as_numpy_dtype(-np.inf)
+      score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf)
     self._probability_fn = lambda score, prev: (  # pylint:disable=g-long-lambda
         probability_fn(
             _maybe_mask_score(score, memory_sequence_length, score_mask_value),
@@ -1146,9 +1145,7 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
             % (len(attention_layer_sizes), len(attention_mechanisms)))
       self._attention_layers = tuple(
           layers_core.Dense(
-              attention_layer_size,
-              name="attention_layer",
-              use_bias=False,
+              attention_layer_size, name="attention_layer", use_bias=False,
               dtype=attention_mechanisms[i].dtype)
           for i, attention_layer_size in enumerate(attention_layer_sizes))
       self._attention_layer_size = sum(attention_layer_sizes)
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index ac8d994502..331943a3ef 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #ifdef TENSORFLOW_USE_VERBS
 
 #include "tensorflow/contrib/verbs/rdma.h"
-#include <fcntl.h>
 #include <cstdlib>
+#include <fcntl.h>
 #include "tensorflow/contrib/verbs/verbs_util.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
@@ -137,7 +137,7 @@ ibv_device* set_device() {
   if (!env_p_rdma_device.empty()) {
     for (device_index = 0; device_index < dev_num; device_index++) {
       if (!env_p_rdma_device.compare(
-              ibv_get_device_name(dev_list[device_index]))) {
+               ibv_get_device_name(dev_list[device_index]))) {
         CHECK(get_dev_active_port_count(dev_list[device_index]) != 0)
             << "Device " << ibv_get_device_name(dev_list[device_index])
             << " has no active ports";
@@ -147,7 +147,7 @@ ibv_device* set_device() {
     // check validity of input device
     CHECK(false) << "The device " << env_p_rdma_device << " wasn't found";
   } else {
-    // set default device
+  // set default device
     str_port_num = get_env_var("RDMA_DEVICE_PORT");
     CHECK(str_port_num.empty())
         << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user";
@@ -177,7 +177,7 @@ ibv_device* set_device() {
 // Returns:
 //   port to use
 uint8_t set_port(ibv_context* context) {
-  uint8_t port_num = 0;  // 0 is illegal port number
+  uint8_t port_num = 0; //0 is illegal port number
   string str_port_num;
   ibv_device_attr device_att;
   ibv_port_attr port_attr;
@@ -199,7 +199,9 @@ uint8_t set_port(ibv_context* context) {
     // check if port id active
     CHECK(port_attr.state == IBV_PORT_ACTIVE)
         << "Selected RDMA_DEVICE_PORT is not active";
-  } else {  // set default port
+  }
+  // set default port
+  else {
     for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) {
       rc = ibv_query_port(context, port_index, &port_attr);
       CHECK(!rc) << "Failed to query the port" << port_index;
@@ -267,7 +269,7 @@ bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num,
 // Function to set GID index.
 // If the port link is IB, no GID index should be selected.
 // If Ethernet but RDMA_GID_INDEX not set gid index that supports
-//   RoCE V2 will be chosen(fails if more than one IP is configured)
+//   RoCE V2 will be chosen(fails if more then one IP is configured)
 // Args:
 //   context - device context
 //   port_num - port number
@@ -300,7 +302,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) {
     }
   }
   switch (port_attr.link_layer) {
-    case (IBV_LINK_LAYER_ETHERNET):
+    case(IBV_LINK_LAYER_ETHERNET) :
       gid_str = get_env_var("RDMA_GID_INDEX");
       if (!gid_str.empty()) {
         gid_index = stoi(gid_str);
@@ -311,7 +313,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) {
             << "More than one IP is available, please specify GID_INDEX";
       }
       break;
-    case (IBV_LINK_LAYER_INFINIBAND):  // no need in GID index
+    case(IBV_LINK_LAYER_INFINIBAND) :  // no need in GID index
       break;
     default:
       LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and "
@@ -372,8 +374,7 @@ enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) {
         break;
       default:
         CHECK(0) << "Error: MTU input value must be one of the following: 256, "
-                    "512, 1024, 2048, 4096. MTU "
-                 << mtu << " is invalid\n";
+                    "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n";
         break;
     }
     CHECK(mtu < port_attr.active_mtu)
@@ -452,9 +453,9 @@ void RdmaAdapter::Process_CQ() {
     CHECK_GE(ne, 0);
     for (int i = 0; i < ne; ++i) {
       CHECK(wc_[i].status == IBV_WC_SUCCESS)
-          << "Failed status \n"
-          << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " "
-          << static_cast<int>(wc_[i].wr_id) << " " << wc_[i].vendor_err;
+          << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " "
+          << wc_[i].status << " " << static_cast<int>(wc_[i].wr_id) << " "
+          << wc_[i].vendor_err;
       if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) {
         RdmaChannel* rc = reinterpret_cast<RdmaChannel*>(wc_[i].wr_id);
         // put back a recv wr.
@@ -610,7 +611,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
   // create message and ack buffers, then initialize the tables.
   {
     const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer",
-                                   "tx_ack_buffer", "rx_ack_buffer"};
+                                   "tx_ack_buffer",     "rx_ack_buffer"};
     tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]);
     rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]);
     tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]);
@@ -671,7 +672,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) {
 void RdmaChannel::Recv() {
   struct ibv_recv_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t)this;
+  wr.wr_id = (uint64_t) this;
   struct ibv_recv_wr* bad_wr;
   CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv";
 }
@@ -825,11 +826,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class;
 
     int r;
-    CHECK(!(r = ibv_modify_qp(qp_, &attr,
-                              IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU |
-                                  IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
-                                  IBV_QP_MAX_DEST_RD_ATOMIC |
-                                  IBV_QP_MIN_RNR_TIMER)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV |
+                                              IBV_QP_PATH_MTU |
+                                              IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
+                                              IBV_QP_MAX_DEST_RD_ATOMIC |
+                                              IBV_QP_MIN_RNR_TIMER)))
         << "QP to Ready to Receive " << r;
 
     memset(&attr, 0, sizeof(ibv_qp_attr));
@@ -840,10 +841,10 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.rnr_retry = 7; /* infinite */
     attr.max_rd_atomic = 1;
 
-    CHECK(!(r = ibv_modify_qp(qp_, &attr,
-                              IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT |
-                                  IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
-                                  IBV_QP_MAX_QP_RD_ATOMIC)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT |
+                                              IBV_QP_RETRY_CNT |
+                                              IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
+                                              IBV_QP_MAX_QP_RD_ATOMIC)))
         << "QP to Ready to Send " << r;
 
     connected_ = true;
@@ -930,7 +931,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) {
 
   struct ibv_send_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t)this;
+  wr.wr_id = (uint64_t) this;
   wr.sg_list = &list;
   wr.num_sge = 1;
   wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
@@ -1025,9 +1026,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
     TensorProto proto;
     if (src_dev->tensorflow_gpu_device_info() &&
         (!send_args.alloc_attrs.on_host())) {
-      CHECK(send_args.device_context)
-          << "send dev name: " << src_dev->name()
-          << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
+      CHECK(send_args.device_context) << "send dev name: " << src_dev->name()
+                                      << " gpu_info: "
+                                      << src_dev->tensorflow_gpu_device_info();
 
       if (can_memcpy) {
         AllocatorAttributes host_alloc_attrs;
@@ -1053,8 +1054,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
         // aync instead
         GPUUtil::SetProtoFromGPU(
             in, src_dev, send_args.device_context, &proto, is_dead,
-            [this, proto, buffer_size, key, in, step_id, key_with_step_id,
-             is_dead, send_args, recv_args](const Status& s) mutable {
+	    [this, proto, buffer_size, key, in, step_id, key_with_step_id,
+            is_dead, send_args, recv_args](const Status& s) mutable {
               CHECK(s.ok()) << "copy proto from gpu sync";
               auto tensor_bytes = proto.ByteSize();
               buffer_size += tensor_bytes;
diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
deleted file mode 100644
index cd7ec6e551..0000000000
--- a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
+++ /dev/null
@@ -1,47 +0,0 @@
-op {
-  graph_op_name: "UniqueV2"
-  in_arg {
-    name: "x"
-    description: <<END
-A `Tensor`.
-END
-  }
-  in_arg {
-    name: "axis"
-    description: <<END
-A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
-find the unique elements.
-END
-  }
-  out_arg {
-    name: "y"
-    description: <<END
-A `Tensor`. Unique elements along the `axis` of `Tensor` x.
-END
-  }
-  out_arg {
-    name: "idx"
-    description: <<END
-A 1-D Tensor. Has the same type as x that contains the index of each
-value of x in the output y.
-END
-  }
-  summary: "Finds unique elements in a 1-D tensor."
-  description: <<END
-This operation returns a tensor `y` containing all of the unique elements of `x`
-sorted in the same order that they occur in `x`. This operation also returns a
-tensor `idx` the same size as `x` that contains the index of each value of `x`
-in the unique output `y`. In other words:
-
-`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
-
-For example:
-
-```
-# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-y, idx = unique(x)
-y ==> [1, 2, 4, 7, 8]
-idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-```
-END
-}
diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
index 77a96d1e03..0a3355cdbc 100644
--- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
@@ -26,8 +26,6 @@ need not be sorted and need not cover all values in the full
 range of valid values.
 
 If the sum is empty for a given segment ID `i`, `output[i] = 0`.
-If the given segment ID `i` is negative, the value is dropped and will not be
-added to the sum of the segment.
 
 `num_segments` should equal the number of distinct segment IDs.
 
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 223dd12f8f..d0dba6e1f0 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -455,7 +455,7 @@ class Graph {
   // the corresponding NodeDef to reflect the change.
   // REQUIRES: The control edge must exist.
   void RemoveControlEdge(const Edge* e);
-
+  
   // Updates the input to a node.  The existing edge to `dst` is removed and an
   // edge from `new_src` to `dst` is created. The NodeDef associated with `dst`
   // is also updated.
diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc
index e2ce0ba046..2aa1b31e15 100644
--- a/tensorflow/core/graph/graph_test.cc
+++ b/tensorflow/core/graph/graph_test.cc
@@ -118,9 +118,11 @@ class GraphTest : public ::testing::Test {
     LOG(FATAL) << name;
   }
 
-  bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, const Node* dst) {
-    for (const Edge* e : dst->in_edges()) {
-      if (e->IsControlEdge() && e->src() == src &&
+  bool ControlEdgeExistsInGraphOrNodeDef(const Node* src,
+                                         const Node* dst) {
+    for (const Edge *e : dst->in_edges()) {
+      if (e->IsControlEdge() &&
+          e->src() == src &&
           e->src_output() == Graph::kControlSlot &&
           e->dst_input() == Graph::kControlSlot) {
         return true;
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index b4a5a3c796..f1cb9a1860 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1720,7 +1720,6 @@ tf_cuda_cc_tests(
         ":data_flow",
         ":ops_testutil",
         ":ops_util",
-        "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc
index 890fa3121b..766d63e3be 100644
--- a/tensorflow/core/kernels/bincount_op.cc
+++ b/tensorflow/core/kernels/bincount_op.cc
@@ -97,9 +97,8 @@ class BincountOp : public OpKernel {
     const Tensor& weights_t = ctx->input(2);
 
     int32 size = size_tensor.scalar<int32>()();
-    OP_REQUIRES(
-        ctx, size >= 0,
-        errors::InvalidArgument("size (", size, ") must be non-negative"));
+    OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument(
+                                    "size (", size, ") must be non-negative"));
 
     const auto arr = arr_t.flat<int32>();
     const auto weights = weights_t.flat<T>();
diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h
index cd3d560cd1..0f8dd2b82a 100644
--- a/tensorflow/core/kernels/bincount_op.h
+++ b/tensorflow/core/kernels/bincount_op.h
@@ -16,11 +16,11 @@ limitations under the License.
 #ifndef TENSORFLOW_BINCOUNT_OP_H_
 #define TENSORFLOW_BINCOUNT_OP_H_
 
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
index 6074b3e1f6..ae9e26ffdf 100644
--- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
@@ -17,12 +17,12 @@ limitations under the License.
 
 #define EIGEN_USE_GPU
 
+#include "tensorflow/core/kernels/bincount_op.h"
 #include "external/cub_archive/cub/device/device_histogram.cuh"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/bincount_op.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/cuda_kernel_helper.h"
@@ -93,8 +93,8 @@ struct BincountFunctor<GPUDevice, T> {
         /* num_samples */ num_samples,
         /* stream */ stream);
     if (err != cudaSuccess) {
-      return errors::Internal(
-          "Could not launch HistogramEven: ", cudaGetErrorString(err), ".");
+      return errors::Internal("Could not launch HistogramEven: ",
+                              cudaGetErrorString(err), ".");
     }
     return Status::OK();
   }
diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc
index cb04b40637..14becc87a7 100644
--- a/tensorflow/core/kernels/bincount_op_test.cc
+++ b/tensorflow/core/kernels/bincount_op_test.cc
@@ -30,8 +30,8 @@ static Graph* Bincount(int arr_size, int nbins) {
   Tensor arr(DT_INT32, TensorShape({arr_size}));
   arr.flat<int32>() = arr.flat<int32>().setRandom().abs();
 
-  Tensor size(DT_INT32, TensorShape({static_cast<int32>(1)}));
-  size.flat<int32>()(0) = static_cast<int32>(nbins);
+  Tensor size(DT_INT32, TensorShape({(int32)1}));
+  size.flat<int32>()(0) = (int32)nbins;
 
   Tensor weights(DT_INT32, TensorShape({0}));
 
diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
index 325dee793b..aafbbe41b4 100644
--- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
@@ -77,10 +77,10 @@ struct BucketizeFunctor<GPUDevice, T> {
     TF_RETURN_IF_ERROR(boundaries_array.Finalize());
 
     CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d);
-    BucketizeCustomKernel<T>
-        <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-            input.size(), input.data(), boundaries_vector.size(),
-            boundaries_array.data(), output.data());
+    BucketizeCustomKernel<
+        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+        input.size(), input.data(), boundaries_vector.size(),
+        boundaries_array.data(), output.data());
 
     return Status::OK();
   }
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index c2d24d1f12..f819fccbfb 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -1101,27 +1101,29 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
   bool cudnn_use_autotune_;
 };
 
+
+
 #define REGISTER_GPU_KERNEL(T)                                                \
   REGISTER_KERNEL_BUILDER(                                                    \
       Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint<T>("T"),  \
       Conv3DBackpropInputOp<GPUDevice, T>);                                   \
   REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2")                       \
-                              .Device(DEVICE_GPU)                             \
-                              .TypeConstraint<T>("T")                         \
-                              .HostMemory("input_sizes"),                     \
-                          Conv3DBackpropInputOp<GPUDevice, T>);               \
+                            .Device(DEVICE_GPU)                               \
+                            .TypeConstraint<T>("T")                           \
+                            .HostMemory("input_sizes"),                       \
+                        Conv3DBackpropInputOp<GPUDevice, T>);                 \
   REGISTER_KERNEL_BUILDER(                                                    \
-      Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
-      Conv3DBackpropFilterOp<GPUDevice, T>);                                  \
+    Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"),   \
+    Conv3DBackpropFilterOp<GPUDevice, T>);                                    \
   REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2")                      \
-                              .Device(DEVICE_GPU)                             \
-                              .TypeConstraint<T>("T")                         \
-                              .HostMemory("filter_sizes"),                    \
-                          Conv3DBackpropFilterOp<GPUDevice, T>);
+                            .Device(DEVICE_GPU)                               \
+                            .TypeConstraint<T>("T")                           \
+                            .HostMemory("filter_sizes"),                      \
+                        Conv3DBackpropFilterOp<GPUDevice, T>);
 TF_CALL_half(REGISTER_GPU_KERNEL);
 TF_CALL_float(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
-
+     
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index a7673afd0b..8d44208aa7 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double,
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double);
-#endif  // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc
index 7b688db4c5..bbc69e45aa 100644
--- a/tensorflow/core/kernels/cwise_op_atanh.cc
+++ b/tensorflow/core/kernels/cwise_op_atanh.cc
@@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double,
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double);
-#endif  // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double);
diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
index 9347978d51..53d65a22d1 100644
--- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
@@ -231,8 +231,7 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args,
       }
       // Pad to vector-register width (if needed).
       for (int64 d = 0; d < pad_size; ++d) {
-        buffer[buf_base + vectorized_size + scalar_size + d] =
-            static_cast<T>(0);
+        buffer[buf_base + vectorized_size + scalar_size + d] = static_cast<T>(0);
       }
     }
   }
@@ -511,8 +510,7 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
-extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice,
-                                                          Eigen::half>;
+extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, double>;
 
@@ -887,8 +885,7 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
-extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice,
-                                                           Eigen::half>;
+extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, double>;
 
diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index 30ecd0c2ba..2759ecb2f1 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -427,11 +427,6 @@ TF_CALL_double(REGISTER_CPU_KERNEL);
 #endif
 
 #if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Eigen::half>("T"),
-                        DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
-
 REGISTER_KERNEL_BUILDER(
     Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
     DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h
index 097a9f5bfa..11aed5b415 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.h
+++ b/tensorflow/core/kernels/depthwise_conv_op.h
@@ -158,8 +158,7 @@ struct DepthwiseFilterPadOp {
       }
       // Pad the remainder of output to vector-register boundary.
       for (int64 j = 0; j < pad_size; ++j) {
-        padded_filter[output_base + vectorized_size + scalar_size + j] =
-            static_cast<T>(0);
+        padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast<T>(0);
       }
     }
   }
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index d8bdb700e6..157ce106ce 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/maxpooling_op.h"
 
 #include <vector>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -38,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #if GOOGLE_CUDA
 #include "tensorflow/core/kernels/maxpooling_op_gpu.h"
@@ -359,8 +359,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
 
     use_dnn_ = CanUseCudnn();
-    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
-                                   &propagate_nans_));
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -889,8 +888,7 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
 
-    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
-                                   &propagate_nans_));
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1054,8 +1052,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
                     "Pooling is not yet supported on the batch dimension."));
     use_dnn_ = CanUseCudnn();
 
-    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
-                                   &propagate_nans_));
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1140,8 +1137,7 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
     }
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
     use_dnn_ = CanUseCudnn();
-    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
-                                   &propagate_nans_));
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
index f8daaca4c9..d96b844383 100644
--- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
@@ -405,17 +405,17 @@ bool MaxPoolForwardWithOptionalArgmax<T>::operator()(
   if (propagate_nans) {
     MaxPoolForwardNHWC<true>
         <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
-           kThreadsPerBlock, 0, d.stream()>>>(
-            output_size, bottom_data, height, width, channels, pooled_height,
-            pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
-            top_data, mask);
+           kThreadsPerBlock, 0, d.stream()>>>
+        (output_size, bottom_data, height, width, channels, pooled_height,
+         pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+         top_data, mask);
   } else {
     MaxPoolForwardNHWC<false>
         <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
-           kThreadsPerBlock, 0, d.stream()>>>(
-            output_size, bottom_data, height, width, channels, pooled_height,
-            pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
-            top_data, mask);
+           kThreadsPerBlock, 0, d.stream()>>>
+        (output_size, bottom_data, height, width, channels, pooled_height,
+         pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+         top_data, mask);
   }
   return d.ok();
 }
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h
index c4d5a45d3c..0a5be4fec9 100644
--- a/tensorflow/core/kernels/mkl_tfconv_op.h
+++ b/tensorflow/core/kernels/mkl_tfconv_op.h
@@ -101,8 +101,8 @@ class MklToTfOp : public OpKernel {
       // Allocate output tensor.
       TensorShape output_shape = input_shape.GetTfShape();
       Tensor* output_tensor = NULL;
-      OP_REQUIRES_OK(context, context->allocate_output(
-                                  input_number, output_shape, &output_tensor));
+      OP_REQUIRES_OK(context, context->allocate_output(input_number,
+                                  output_shape, &output_tensor));
       CHECK_NOTNULL(output_tensor);
 
       // Do we need to reorder Mkl layout into TensorFlow layout?
@@ -116,13 +116,13 @@ class MklToTfOp : public OpKernel {
         // If not, just forward input tensor to output tensor.
         CHECK(output_tensor->CopyFrom(input_tensor, output_shape));
       }
-    } catch (mkldnn::error& e) {
+    } catch (mkldnn::error &e) {
       string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + std::string(e.message) + ", in file " +
-                         std::string(__FILE__) + ":" + std::to_string(__LINE__);
-      OP_REQUIRES_OK(
-          context,
-          errors::Aborted("Operation received an exception:", error_msg));
+                       ", message: " + std::string(e.message) +
+                       ", in file " + std::string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+        errors::Aborted("Operation received an exception:", error_msg));
     }
   }
 #else
@@ -160,8 +160,8 @@ class MklToTfOp : public OpKernel {
 
     // Allocate output tensor.
     Tensor* output_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(input_number, output_shape,
-                                                     &output_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(input_number,
+                              output_shape, &output_tensor));
 
     dnnLayout_t output_layout =
         static_cast<dnnLayout_t>(input_shape.GetTfLayout());
diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h
index 93ef512778..d3d1b56c9d 100644
--- a/tensorflow/core/kernels/ops_util.h
+++ b/tensorflow/core/kernels/ops_util.h
@@ -98,19 +98,6 @@ gtl::InlinedVector<T, 8> ComputeStride(const TensorShape& shape) {
   return strides;
 }
 
-// Helper to compute 'strides' given an Eigen TensorDimensions
-template <typename T, typename EigenDimensions>
-gtl::InlinedVector<T, 8> ComputeEigenStrides(const EigenDimensions& shape) {
-  const int ndims = shape.rank();
-  gtl::InlinedVector<T, 8> strides(ndims);
-  T stride = 1;
-  for (int i = ndims - 1; i >= 0; --i) {
-    strides[i] = stride;
-    stride *= static_cast<T>(shape[i]);
-  }
-  return strides;
-}
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_KERNELS_OPS_UTIL_H_
diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc
index cda6d7d8f9..f8b0285c50 100644
--- a/tensorflow/core/platform/posix/error.cc
+++ b/tensorflow/core/platform/posix/error.cc
@@ -131,8 +131,8 @@ error::Code ErrnoToCode(int err_number) {
     case ENETUNREACH:   // Network unreachable
     case ENOLCK:        // No locks available
     case ENOLINK:       // Link has been severed
-#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) || \
-      defined(__HAIKU__))
+#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \
+	|| defined(__HAIKU__))
     case ENONET:  // Machine is not on the network
 #endif
       code = error::UNAVAILABLE;
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index 614ee00b01..09f69a95c1 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -37,8 +37,8 @@ limitations under the License.
 #ifdef TF_USE_SNAPPY
 #include "snappy.h"
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
-    defined(__HAIKU__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \
+	|| defined(__HAIKU__)
 #include <thread>
 #endif
 
@@ -62,8 +62,8 @@ int NumSchedulableCPUs() {
   }
   perror("sched_getaffinity");
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
-    defined(__HAIKU__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \
+	|| defined(__HAIKU__)
   unsigned int count = std::thread::hardware_concurrency();
   if (count > 0) return static_cast<int>(count);
 #endif
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index cf11f419a4..8fa0dfbed9 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -752,12 +752,6 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleDown(unsigned mask, T value,
   return __shfl_down_sync(mask, value, delta, width);
 }
 
-__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDown(
-    unsigned mask, Eigen::half value, int delta, int width = warpSize) {
-  return Eigen::half(
-      __shfl_down_sync(mask, static_cast<uint16>(value), delta, width));
-}
-
 // Variant of the (undocumented) version from the CUDA SDK, but using unsigned
 // instead of float for lo and hi (which is incorrect with ftz, for example).
 // A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
@@ -780,12 +774,6 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleXor(unsigned mask, T value,
   return __shfl_xor_sync(mask, value, laneMask, width);
 }
 
-__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXor(
-    unsigned mask, Eigen::half value, int laneMask, int width = warpSize) {
-  return Eigen::half(
-      __shfl_xor_sync(mask, static_cast<uint16>(value), laneMask, width));
-}
-
 // Variant of the (undocumented) version from the CUDA SDK, but using unsigned
 // instead of float for lo and hi (which is incorrect with ftz, for example).
 // A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 148c7851bd..118ff0d0d6 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -24,25 +24,25 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "mkl_service.h"
 #include "mkl_trans.h"
-#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/graph/mkl_graph_util.h"
+#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "tensorflow/core/graph/mkl_graph_util.h"
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::engine;
 using mkldnn::memory;
-using mkldnn::padding_kind;
-using mkldnn::primitive;
 using mkldnn::reorder;
+using mkldnn::primitive;
+using mkldnn::padding_kind;
+using mkldnn::engine;
 #endif
 
 // The file contains a number of utility classes and functions used by MKL
@@ -56,14 +56,8 @@ namespace tensorflow {
 // Tensorflow tensor.
 
 typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims;
-typedef enum {
-  Dim_N = 0,
-  Dim_C = 1,
-  Dim_H = 2,
-  Dim_W = 3,
-  Dim_O = 0,
-  Dim_I = 1
-} MklDnnDims;
+typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3,
+               Dim_O = 0, Dim_I = 1 } MklDnnDims;
 
 class MklShape {
  public:
@@ -242,7 +236,8 @@ class MklShape {
   (IS_MKL_TENSOR_OFFSET + sizeof(size_t))  // Location of dimension_
 // Location of sizes. Note dim is not used here, left here
 // to make macros consistent.
-#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t))
+#define SIZES_OFFSET(dims) \
+  (DIMS_OFFSET + sizeof(size_t))
 #define STRIDES_OFFSET(dims) \
   (SIZES_OFFSET(dims) + dims * sizeof(size_t))  // Location of strides
 #define MKL_LAYOUT_OFFSET(dims) \
@@ -337,7 +332,7 @@ class MklDnnShape {
     /// Number of dimensions in Tensorflow format
     size_t dimension_ = 0;
     /// Required by MKLDNN for conversions
-    mkldnn_dims_t sizes_;  // Required by MKL for conversions
+    mkldnn_dims_t sizes_;    // Required by MKL for conversions
     memory::format tf_data_format_ = memory::format::format_undef;
     memory::data_type T_ = memory::data_type::data_undef;
     // MKL layout
@@ -350,13 +345,15 @@ class MklDnnShape {
   typedef std::remove_extent<mkldnn_dims_t>::type mkldnn_dim_t;
 #define INVALID_DIM_SIZE -1
 
+
  public:
   MklDnnShape() {
-    for (size_t i = 0; i < sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
-         ++i) {
+    for (size_t i = 0; i < sizeof(data_.sizes_) /
+                           sizeof(data_.sizes_[0]); ++i) {
       data_.sizes_[i] = -1;
     }
-    for (size_t i = 0; i < sizeof(data_.map_) / sizeof(data_.map_[0]); ++i) {
+    for (size_t i = 0; i < sizeof(data_.map_) /
+                           sizeof(data_.map_[0]); ++i) {
       data_.map_[i] = -1;
     }
   }
@@ -372,26 +369,26 @@ class MklDnnShape {
   inline void SetDimensions(const size_t dimension) {
     data_.dimension_ = dimension;
   }
-  inline size_t GetDimension(char dimension) const {
+  inline size_t GetDimension(char dimension)const {
     int index = GetMklDnnTensorDimIndex(dimension);
     CHECK(index >= 0 && index < this->GetDimension())
         << "Invalid index from the dimension: " << index << ", " << dimension;
     return this->DimSize(index);
   }
 
-  inline int32 GetMklDnnTensorDimIndex(char dimension) const {
+  inline int32 GetMklDnnTensorDimIndex(char dimension)const {
     switch (dimension) {
-      case 'N':
-        return MklDnnDims::Dim_N;
-      case 'C':
-        return MklDnnDims::Dim_C;
-      case 'H':
-        return MklDnnDims::Dim_H;
-      case 'W':
-        return MklDnnDims::Dim_W;
-      default:
-        LOG(FATAL) << "Invalid dimension: " << dimension;
-        return -1;  // Avoid compiler warning about missing return value
+  case 'N':
+    return MklDnnDims::Dim_N;
+  case 'C':
+    return MklDnnDims::Dim_C;
+  case 'H':
+    return MklDnnDims::Dim_H;
+  case 'W':
+    return MklDnnDims::Dim_W;
+  default:
+    LOG(FATAL) << "Invalid dimension: " << dimension;
+    return -1;  // Avoid compiler warning about missing return value
     }
   }
 
@@ -406,9 +403,9 @@ class MklDnnShape {
     memory::dims retVal;
     if (data_.is_mkl_tensor_) {
       int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
-      for (size_t i = 0; i < dimensions; i++) {
+      for (size_t i = 0 ; i < dimensions; i++) {
         if (data_.sizes_[i] != INVALID_DIM_SIZE)
-          retVal.push_back(data_.sizes_[i]);
+        retVal.push_back(data_.sizes_[i]);
       }
     } else {
       CHECK_EQ(data_.is_mkl_tensor_, true);
@@ -417,7 +414,7 @@ class MklDnnShape {
   }
 
   inline int64 DimSize(int index) const {
-    CHECK_LT(index, sizeof(data_.sizes_) / sizeof(data_.sizes_[0]));
+    CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0]));
     return data_.sizes_[index];
   }
 
@@ -454,7 +451,7 @@ class MklDnnShape {
   /// We don't create primitive_descriptor for TensorFlow layout now.
   /// We use lazy evaluation and create it only when needed.
   inline void SetTfLayout(size_t dims, const memory::dims& sizes,
-                          memory::format format) {
+                   memory::format format) {
     CHECK_EQ(dims, sizes.size());
     data_.dimension_ = dims;
     for (size_t ii = 0; ii < dims; ii++) {
@@ -500,7 +497,9 @@ class MklDnnShape {
     SetTfDimOrder(dimension, data_format);
   }
 
-  inline const mkldnn_dim_t* GetTfToMklDimMap() const { return &data_.map_[0]; }
+  inline const mkldnn_dim_t* GetTfToMklDimMap() const {
+    return &data_.map_[0];
+  }
   inline size_t TfDimIdx(int index) const { return data_.map_[index]; }
   inline int64 TfDimSize(int index) const {
     return data_.sizes_[TfDimIdx(index)];
@@ -554,7 +553,9 @@ class MklDnnShape {
 
   /// Size of buffer to hold the serialized object, the size is computed by
   /// following above mentioned order
-  inline size_t GetSerializeBufferSize() const { return sizeof(MklShapeData); }
+  inline size_t GetSerializeBufferSize() const {
+    return sizeof(MklShapeData);
+  }
 
   void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const {
     CHECK(buf_size >= GetSerializeBufferSize())
@@ -565,12 +566,12 @@ class MklDnnShape {
   void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) {
     // Make sure buffer holds at least is_mkl_tensor_.
     CHECK(buf_size >= sizeof(data_.is_mkl_tensor_))
-        << "Buffer size is too small in DeSerializeMklDnnShape";
+      << "Buffer size is too small in DeSerializeMklDnnShape";
 
     const bool is_mkl_tensor = *reinterpret_cast<const bool*>(buf);
     if (is_mkl_tensor) {  // If it is an MKL Tensor then read the rest
       CHECK(buf_size >= GetSerializeBufferSize())
-          << "Buffer size is too small in DeSerializeMklDnnShape";
+        << "Buffer size is too small in DeSerializeMklDnnShape";
       data_ = *reinterpret_cast<const MklShapeData*>(buf);
     }
   }
@@ -659,7 +660,8 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
 }
 
 #ifdef INTEL_MKL_DNN
-inline void GetMklShape(OpKernelContext* ctext, int n, MklDnnShape* mklshape) {
+inline void GetMklShape(OpKernelContext* ctext, int n,
+                        MklDnnShape* mklshape) {
   mklshape->DeSerializeMklDnnShape(
       ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs()))
           .flat<uint8>()
@@ -698,7 +700,8 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
 /// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
 /// If the input tensor is in MKL layout, then obtains TensorShape from
 /// MklShape.
-inline TensorShape GetTfShape(OpKernelContext* context, size_t input_idx) {
+inline TensorShape GetTfShape(OpKernelContext* context,
+                              size_t input_idx) {
   // Sanity check.
   CHECK_NOTNULL(context);
   CHECK_LT(input_idx, context->num_inputs());
@@ -818,7 +821,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
 
 template <typename T>
 inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
-                           TensorShape tf_shape) {
+                              TensorShape tf_shape) {
   OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::v(),
                                                  tf_shape, tensor_out));
 }
@@ -1096,8 +1099,7 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) {
 ///
 /// @input None
 /// @return memory::data_type corresponding to type T
-template <typename T>
-static memory::data_type MklDnnType();
+template<typename T> static memory::data_type MklDnnType();
 
 /// Instantiation for float type. Add similar instantiations for other
 /// type if needed.
@@ -1112,11 +1114,10 @@ memory::data_type MklDnnType<float>() {
 /// @return: memory::format corresponding to TensorFlow data format;
 ///          Fails with an error if invalid data format.
 inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
-  if (format == FORMAT_NHWC)
-    return memory::format::nhwc;
-  else if (format == FORMAT_NCHW)
-    return memory::format::nchw;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
+  if (format == FORMAT_NHWC) return memory::format::nhwc;
+  else if (format == FORMAT_NCHW) return memory::format::nchw;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
+                     "Unsupported data format"));
   // Return to get rid of compiler warning
   return memory::format::format_undef;
 }
@@ -1127,11 +1128,10 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
 /// @return: Tensorflow data format corresponding to memory::format
 ///          Fails with an error if invalid data format.
 inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
-  if (format == memory::format::nhwc)
-    return FORMAT_NHWC;
-  else if (format == memory::format::nchw)
-    return FORMAT_NCHW;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
+  if (format == memory::format::nhwc) return FORMAT_NHWC;
+  else if (format == memory::format::nchw) return FORMAT_NCHW;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
+                     "Unsupported data format"));
 }
 
 /// Map TensorShape object into memory::dims required by MKL-DNN
@@ -1161,7 +1161,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) {
 /// @input TensorShape object in shape
 /// @return memory::dims in MKL-DNN required NCHW format
 inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
-                                              TensorFormat format) {
+                                            TensorFormat format) {
   // Check validity of format.
   CHECK_NE(TFDataFormatToMklDnnDataFormat(format),
            memory::format::format_undef);
@@ -1237,23 +1237,21 @@ class MklDnnData {
   const engine* cpu_engine_;
 
  public:
-  explicit MklDnnData(const engine* e)
-      : user_memory_(nullptr),
-        reorder_memory_(nullptr),
-        op_md_(nullptr),
-        cpu_engine_(e) {}
+  explicit MklDnnData(const engine* e) : user_memory_(nullptr),
+                                         reorder_memory_(nullptr),
+                                         op_md_(nullptr), cpu_engine_(e) {}
 
   ~MklDnnData() {
     cpu_engine_ = nullptr;  // We don't own this.
-    delete (user_memory_);
-    delete (reorder_memory_);
-    delete (op_md_);
+    delete(user_memory_);
+    delete(reorder_memory_);
+    delete(op_md_);
   }
 
   inline void* GetTensorBuffer(const Tensor* tensor) const {
     CHECK_NOTNULL(tensor);
-    return const_cast<void*>(
-        static_cast<const void*>(tensor->flat<T>().data()));
+    return const_cast<void*>(static_cast<const void*>(
+              tensor->flat<T>().data()));
   }
 
   /// Set user memory primitive using specified dimensions, memory format and
@@ -1285,7 +1283,7 @@ class MklDnnData {
   /// @return: memory::desc object corresponding to blocked memory format
   ///          for given dimensions and strides.
   static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
-                                                  const memory::dims& strides) {
+      const memory::dims& strides) {
     CHECK_EQ(dim.size(), strides.size());
 
     // We have to construct memory descriptor in a C style. This is not at all
@@ -1354,7 +1352,7 @@ class MklDnnData {
     CHECK_NOTNULL(cpu_engine_);
     // TODO(nhasabni): can we remove dynamic memory allocation?
     if (data_buffer) {
-      user_memory_ = new memory(pd, data_buffer);
+     user_memory_ = new memory(pd, data_buffer);
     } else {
       user_memory_ = new memory(pd);
     }
diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc
index 8b73eadb40..6aef3d86e9 100644
--- a/tensorflow/core/util/mkl_util_test.cc
+++ b/tensorflow/core/util/mkl_util_test.cc
@@ -54,6 +54,7 @@ TEST(MklUtilTest, MklDnnTfShape) {
   EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape);
 }
 
+
 TEST(MklUtilTest, MklDnnBlockedFormatTest) {
   // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension
   // first (case 1) and then it being outermost dimension (case 2).
diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
index 313c09e1e4..92cc3bd60e 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
@@ -84,10 +84,11 @@ public class ShapeTest {
     assertEquals(Shape.scalar(), Shape.scalar());
     assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3));
 
-    assertNotEquals(Shape.make(1, 2), null);
-    assertNotEquals(Shape.make(1, 2), new Object());
+    assertNotEquals(Shape.make(1,2), null);
+    assertNotEquals(Shape.make(1,2), new Object());
     assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4));
 
+
     assertNotEquals(Shape.unknown(), Shape.unknown());
     assertNotEquals(Shape.make(-1), Shape.make(-1));
     assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3));
@@ -102,3 +103,4 @@ public class ShapeTest {
     assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode());
   }
 }
+
diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py
index 750af20e8a..3512f66284 100644
--- a/tensorflow/python/estimator/inputs/numpy_io.py
+++ b/tensorflow/python/estimator/inputs/numpy_io.py
@@ -117,11 +117,11 @@ def numpy_input_fn(x,
         raise ValueError('y cannot be empty dict, use None instead.')
 
       ordered_dict_y = collections.OrderedDict(
-          sorted(y.items(), key=lambda t: t[0]))
+        sorted(y.items(), key=lambda t: t[0]))
       target_keys = list(ordered_dict_y.keys())
 
       duplicate_keys = set(feature_keys).intersection(set(target_keys))
-      if duplicate_keys:
+      if len(duplicate_keys):
         raise ValueError('{} duplicate keys are found in both x and y: '
                          '{}'.format(len(duplicate_keys), duplicate_keys))
 
@@ -131,14 +131,16 @@ def numpy_input_fn(x,
       ordered_dict_data[target_keys] = y
 
     if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
-      shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys}
+      shape_dict_of_x = {k: ordered_dict_data[k].shape
+                         for k in feature_keys}
 
       if target_keys is None:
         shape_of_y = None
       elif isinstance(target_keys, string_types):
         shape_of_y = y.shape
       else:
-        shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys}
+        shape_of_y = {k: ordered_dict_data[k].shape
+                      for k in target_keys}
 
       raise ValueError('Length of tensors in x and y is mismatched. All '
                        'elements in x and y must have the same length.\n'
@@ -153,12 +155,11 @@ def numpy_input_fn(x,
         enqueue_size=batch_size,
         num_epochs=num_epochs)
 
-    batch = (
-        queue.dequeue_many(batch_size)
-        if num_epochs is None else queue.dequeue_up_to(batch_size))
+    batch = (queue.dequeue_many(batch_size) if num_epochs is None
+                else queue.dequeue_up_to(batch_size))
 
     # Remove the first `Tensor` in `batch`, which is the row number.
-    if batch:
+    if len(batch) > 0:
       batch.pop(0)
 
     features = dict(zip(feature_keys, batch[:len(feature_keys)]))
diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py
index 1374e3f7e1..65eae7a7dc 100644
--- a/tensorflow/python/estimator/inputs/numpy_io_test.py
+++ b/tensorflow/python/estimator/inputs/numpy_io_test.py
@@ -255,7 +255,7 @@ class NumpyIoTest(test.TestCase):
 
     with self.test_session() as session:
       input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
       features_tensor = input_fn()
 
       coord = coordinator.Coordinator()
@@ -327,7 +327,7 @@ class NumpyIoTest(test.TestCase):
 
     with self.test_session() as session:
       input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
       features_tensor, targets_tensor = input_fn()
 
       coord = coordinator.Coordinator()
@@ -362,10 +362,13 @@ class NumpyIoTest(test.TestCase):
     a = np.arange(4) * 1.0
     b = np.arange(32, 36)
     x = {'a': a, 'b': b}
-    y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b}
+    y = {'y1': np.arange(-32, -28),
+         'a': a,
+         'y2': np.arange(32, 28, -1),
+         'b': b}
     with self.test_session():
       with self.assertRaisesRegexp(
-          ValueError, '2 duplicate keys are found in both x and y'):
+              ValueError, '2 duplicate keys are found in both x and y'):
         failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
         failing_input_fn()
 
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 4c026590c2..1610214d54 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -987,9 +987,10 @@ class TensorFlowTestCase(googletest.TestCase):
       msg: An optional string message to append to the failure message.
     """
     # f1 == f2 is needed here as we might have: f1, f2 = inf, inf
-    self.assertTrue(f1 == f2 or math.fabs(f1 - f2) <= err,
-                    "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
-                                           if msg is not None else ""))
+    self.assertTrue(
+        f1 == f2 or math.fabs(f1 - f2) <= err,
+        "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
+                               if msg is not None else ""))
 
   def assertArrayNear(self, farray1, farray2, err):
     """Asserts that two float arrays are near each other.
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 1bf2b70c1b..76b80e60ea 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -114,21 +114,21 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
     arr = np.random.rand(*arr_shape)
     mask = make_mask(arr_shape[:ndims_mask])
     if axis is not None:
-      mask = make_mask(arr_shape[axis:ndims_mask + axis])
+      mask = make_mask(arr_shape[axis:ndims_mask+axis])
     if axis is None or axis == 0:
       masked_arr = arr[mask]
     elif axis == 1:
-      masked_arr = arr[:, mask]
+      masked_arr = arr[:,mask]
     elif axis == 2:
-      masked_arr = arr[:, :, mask]
-    with self.test_session():
+      masked_arr = arr[:,:,mask]
+    with self.test_session() as sess:
       masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
 
       # Leading dimension size of masked_tensor is always unknown until runtime
       # since we don't how many elements will be kept.
       leading = 1 if axis is None else axis + 1
       self.assertAllEqual(masked_tensor.get_shape()[leading:],
-                          masked_arr.shape[leading:])
+          masked_arr.shape[leading:])
 
       self.assertAllClose(masked_arr, masked_tensor.eval())
 
@@ -1078,7 +1078,6 @@ class PadTest(test_util.TensorFlowTestCase):
                            [0, 0, 4, 5, 6, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0]])
 
-
 class InvertPermutationTest(test_util.TensorFlowTestCase):
 
   def testInvertPermutation(self):
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 2767df127e..79285476b4 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -25,7 +25,6 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import googletest
 
-
 class BincountTest(test_util.TensorFlowTestCase):
 
   def test_empty(self):
@@ -73,7 +72,8 @@ class BincountTest(test_util.TensorFlowTestCase):
         else:
           weights = np.random.random(num_samples)
         self.assertAllClose(
-            math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights))
+            math_ops.bincount(arr, weights).eval(),
+            np.bincount(arr, weights))
 
   def test_random_without_weights(self):
     num_samples = 10000
@@ -83,7 +83,8 @@ class BincountTest(test_util.TensorFlowTestCase):
         arr = np.random.randint(0, 1000, num_samples)
         weights = np.ones(num_samples).astype(dtype)
         self.assertAllClose(
-            math_ops.bincount(arr, None).eval(), np.bincount(arr, weights))
+            math_ops.bincount(arr, None).eval(),
+            np.bincount(arr, weights))
 
   def test_zero_weights(self):
     with self.test_session(use_gpu=True):
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 68817cc256..6cbdd4cbb3 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -439,10 +439,11 @@ class ZerosLikeTest(test.TestCase):
 
   def testZerosLikeCPU(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8,
-        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32,
-        dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64,
-        dtypes_lib.complex128, dtypes_lib.string
+        dtypes_lib.float32, dtypes_lib.float64,
+        dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
+        dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
+        dtypes_lib.complex64, dtypes_lib.complex128,
+        dtypes_lib.string
     ]:
       self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False)
       self._compareZeros(dtype, fully_defined_shape=True, use_gpu=False)
@@ -573,10 +574,10 @@ class OnesLikeTest(test.TestCase):
 
   def testOnesLike(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8,
-        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32,
-        dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64,
-        dtypes_lib.complex128
+        dtypes_lib.float32, dtypes_lib.float64,
+        dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
+        dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
+        dtypes_lib.complex64, dtypes_lib.complex128
     ]:
       numpy_dtype = dtype.as_numpy_dtype
       with self.test_session():
diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index d92797a7d3..a7e23ead1c 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -52,6 +52,7 @@ class Conv1DTest(test.TestCase):
           self.assertEqual(len(output), 2)
           self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4])
 
+
   def testConv1DTranspose(self):
     with self.test_session():
       stride = 2
@@ -92,6 +93,5 @@ class Conv1DTest(test.TestCase):
 
     self.assertAllClose(cache_values, value)
 
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
index ec8ac74163..116681fc4c 100644
--- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
@@ -68,8 +68,8 @@ class Conv3DTest(test.TestCase):
       total_size_2 *= s
 
     # Initializes the input tensor with array containing numbers from 0 to 1.
-    # We keep the input tensor values fairly small to avoid overflowing float16
-    # during the conv3d.
+    # We keep the input tensor values fairly small to avoid overflowing a float16 
+    # tensor during the conv3d 
     x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)]
     with self.test_session(use_gpu=use_gpu):
@@ -115,13 +115,15 @@ class Conv3DTest(test.TestCase):
           if value.dtype == np.float16:
             tol = 1e-3
 
-          self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol)
+          self.assertAllClose(expected, value.flatten(), atol=tol,
+                              rtol=tol)
 
   def testConv3D1x1x1Filter(self):
     expected_output = [
-        0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5, 0.59259259,
-        0.62962963, 0.77777778, 0.92592593, 0.85185185, 1.05555556, 1.25925926,
-        1.07407407, 1.33333333, 1.59259259, 1.2962963, 1.61111111, 1.92592593
+        0.18518519,  0.22222222,  0.25925926,  0.40740741,  0.5       ,
+        0.59259259,  0.62962963,  0.77777778,  0.92592593,  0.85185185,
+        1.05555556,  1.25925926,  1.07407407,  1.33333333,  1.59259259,
+        1.2962963 ,  1.61111111,  1.92592593
     ]
 
     # These are equivalent to the Conv2D1x1 case.
@@ -147,10 +149,10 @@ class Conv3DTest(test.TestCase):
   # Expected values computed using scipy's correlate function.
   def testConv3D2x2x2Filter(self):
     expected_output = [
-        3.77199074, 3.85069444, 3.92939815, 4.2650463, 4.35763889, 4.45023148,
-        6.73032407, 6.89236111, 7.05439815, 7.22337963, 7.39930556, 7.57523148,
-        9.68865741, 9.93402778, 10.17939815, 10.18171296, 10.44097222,
-        10.70023148
+        3.77199074,   3.85069444,   3.92939815,   4.2650463 ,   4.35763889,
+        4.45023148,   6.73032407,   6.89236111,   7.05439815,   7.22337963,
+        7.39930556,   7.57523148,   9.68865741,   9.93402778,  10.17939815,
+        10.18171296,  10.44097222,  10.70023148
     ]
     # expected_shape = [1, 3, 1, 2, 5]
     self._VerifyValues(
@@ -162,17 +164,19 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStrides(self):
     expected_output = [
-        0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, 0.13988095,
-        0.08452381, 0.26071429, 0.35238095, 0.36488095, 0.37738095, 0.38988095,
-        0.40238095, 0.23452381, 0.46071429, 0.61488095, 0.62738095, 0.63988095,
-        0.65238095, 0.66488095, 0.38452381, 1.12738095, 1.48988095, 1.50238095,
-        1.51488095, 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095,
-        1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, 1.52738095,
-        2.01488095, 2.02738095, 2.03988095, 2.05238095, 2.06488095, 1.18452381,
-        2.19404762, 2.88988095, 2.90238095, 2.91488095, 2.92738095, 2.93988095,
-        1.68452381, 2.39404762, 3.15238095, 3.16488095, 3.17738095, 3.18988095,
-        3.20238095, 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095,
-        3.45238095, 3.46488095, 1.98452381
+        0.06071429,  0.08988095,  0.10238095,  0.11488095,  0.12738095,
+        0.13988095,  0.08452381,  0.26071429,  0.35238095,  0.36488095,
+        0.37738095,  0.38988095,  0.40238095,  0.23452381,  0.46071429,
+        0.61488095,  0.62738095,  0.63988095,  0.65238095,  0.66488095,
+        0.38452381,  1.12738095,  1.48988095,  1.50238095,  1.51488095,
+        1.52738095,  1.53988095,  0.88452381,  1.32738095,  1.75238095,
+        1.76488095,  1.77738095,  1.78988095,  1.80238095,  1.03452381,
+        1.52738095,  2.01488095,  2.02738095,  2.03988095,  2.05238095,
+        2.06488095,  1.18452381,  2.19404762,  2.88988095,  2.90238095,
+        2.91488095,  2.92738095,  2.93988095,  1.68452381,  2.39404762,
+        3.15238095,  3.16488095,  3.17738095,  3.18988095,  3.20238095,
+        1.83452381,  2.59404762,  3.41488095,  3.42738095,  3.43988095,
+        3.45238095,  3.46488095,  1.98452381
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 5, 8, 7, 1],
@@ -183,7 +187,8 @@ class Conv3DTest(test.TestCase):
 
   def testConv3D2x2x2FilterStride2(self):
     expected_output = [
-        3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, 10.17939815
+        3.77199074,  3.85069444,  3.92939815,  9.68865741,  9.93402778,
+        10.17939815
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
@@ -194,12 +199,14 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStride3(self):
     expected_output = [
-        1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, 1.68998016,
-        1.6155754, 1.68179563, 1.74801587, 1.9280754, 2.01215278, 2.09623016,
-        1.98015873, 2.0672123, 2.15426587, 2.03224206, 2.12227183, 2.21230159,
-        4.4280754, 4.65500992, 4.88194444, 4.48015873, 4.71006944, 4.93998016,
-        4.53224206, 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016,
-        4.8968254, 5.15054563, 5.40426587, 4.94890873, 5.20560516, 5.46230159
+        1.51140873,  1.57167659,  1.63194444,  1.56349206,  1.62673611,
+        1.68998016,  1.6155754 ,  1.68179563,  1.74801587,  1.9280754 ,
+        2.01215278,  2.09623016,  1.98015873,  2.0672123 ,  2.15426587,
+        2.03224206,  2.12227183,  2.21230159,  4.4280754 ,  4.65500992,
+        4.88194444,  4.48015873,  4.71006944,  4.93998016,  4.53224206,
+        4.76512897,  4.99801587,  4.84474206,  5.09548611,  5.34623016,
+        4.8968254 ,  5.15054563,  5.40426587,  4.94890873,  5.20560516,
+        5.46230159
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 6, 7, 8, 2],
@@ -210,8 +217,9 @@ class Conv3DTest(test.TestCase):
 
   def testConv3D2x2x2FilterStride2Same(self):
     expected_output = [
-        3.77199074, 3.85069444, 3.92939815, 2.0162037, 2.06597222, 2.11574074,
-        9.68865741, 9.93402778, 10.17939815, 4.59953704, 4.73263889, 4.86574074
+        3.77199074,   3.85069444,   3.92939815,   2.0162037 ,   2.06597222,
+        2.11574074,   9.68865741,   9.93402778,  10.17939815,   4.59953704,
+        4.73263889,   4.86574074
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
@@ -222,8 +230,8 @@ class Conv3DTest(test.TestCase):
 
   def testKernelSmallerThanStride(self):
     expected_output = [
-        0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037, 0.77777778,
-        0.92592593, 1.
+        0.03703704,  0.11111111,  0.25925926,  0.33333333,  0.7037037 ,
+        0.77777778,  0.92592593,  1.
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 3, 3, 3, 1],
@@ -239,11 +247,12 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
     expected_output = [
-        0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, 0.40306122,
-        0.41873178, 0.4340379, 0.19642857, 2.46938776, 2.50874636, 1.1377551,
-        2.74489796, 2.78425656, 1.26020408, 1.16873178, 1.1840379, 0.51785714,
-        1.09511662, 1.10604956, 0.44642857, 1.17164723, 1.18258017, 0.47704082,
-        0.3691691, 0.37244898, 0.125
+        0.54081633,  0.58017493,  0.28061224,  0.81632653,  0.85568513,
+        0.40306122,  0.41873178,  0.4340379 ,  0.19642857,  2.46938776,
+        2.50874636,  1.1377551 ,  2.74489796,  2.78425656,  1.26020408,
+        1.16873178,  1.1840379 ,  0.51785714,  1.09511662,  1.10604956,
+        0.44642857,  1.17164723,  1.18258017,  0.47704082,  0.3691691 ,
+        0.37244898,  0.125
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
@@ -253,8 +262,8 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
     expected_output = [
-        0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, 2.744898,
-        2.784257
+        0.540816,  0.580175,  0.816327,  0.855685,  2.469388,  2.508746,
+        2.744898,  2.784257
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
@@ -269,7 +278,7 @@ class Conv3DTest(test.TestCase):
         filter_in_sizes=[2, 1, 2, 1, 2],
         stride=1,
         padding="VALID",
-        expected=[1.5625, 1.875])
+        expected=[1.5625,  1.875])
 
   def _ConstructAndTestGradientForConfig(
       self, batch, input_shape, filter_shape, in_depth, out_depth, stride,
@@ -309,6 +318,7 @@ class Conv3DTest(test.TestCase):
     input_data = [x * 1.0 / input_size for x in range(0, input_size)]
     filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
 
+
     for data_type in self._DtypesToTest(use_gpu=use_gpu):
       # TODO(mjanusz): Modify gradient_checker to also provide max relative
       # error and synchronize the tolerance levels between the tests for forward
@@ -320,11 +330,12 @@ class Conv3DTest(test.TestCase):
       elif data_type == dtypes.float16:
         tolerance = 1e-3
 
+
       with self.test_session(use_gpu=use_gpu):
         orig_input_tensor = constant_op.constant(
-            input_data, shape=input_shape, dtype=data_type, name="input")
+          input_data, shape=input_shape, dtype=data_type, name="input")
         filter_tensor = constant_op.constant(
-            filter_data, shape=filter_shape, dtype=data_type, name="filter")
+          filter_data, shape=filter_shape, dtype=data_type, name="filter")
 
         if data_format == "NCDHW":
           input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
@@ -334,23 +345,25 @@ class Conv3DTest(test.TestCase):
           new_strides = strides
 
         conv = nn_ops.conv3d(
-            input_tensor,
-            filter_tensor,
-            new_strides,
-            padding,
-            data_format=data_format,
-            name="conv")
+          input_tensor, filter_tensor, new_strides, padding,
+          data_format=data_format, name="conv")
 
         if data_format == "NCDHW":
           conv = test_util.NCHWToNHWC(conv)
 
+        
         if test_input:
-          jacob_t, jacob_n = gradient_checker.compute_gradient(
-              orig_input_tensor, input_shape, conv, output_shape)
+          jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor,
+                                                               input_shape,
+                                                               conv,
+                                                               output_shape)
         else:
-          jacob_t, jacob_n = gradient_checker.compute_gradient(
-              filter_tensor, filter_shape, conv, output_shape)
-
+          jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor,
+                                                               filter_shape,
+                                                               conv,
+                                                               output_shape)
+        
+        
         if data_type != dtypes.float16:
           reference_jacob_t = jacob_t
           err = np.fabs(jacob_t - jacob_n).max()
@@ -362,6 +375,7 @@ class Conv3DTest(test.TestCase):
       print("conv3d gradient error = ", err)
       self.assertLess(err, tolerance)
 
+
   def ConstructAndTestGradient(self, **kwargs):
     for data_format, use_gpu in GetTestConfigs():
       self._ConstructAndTestGradientForConfig(data_format=data_format,
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index 6be8997cab..150e2ff7f2 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -18,8 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
 import numpy as np
+import os
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -1442,6 +1442,7 @@ class PoolingTest(test.TestCase):
           use_gpu=True,
           v2=v2)
 
+
     # Propagate the diff in cases of NaNs
     os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1"
     expected_input_backprop_cudnn = expected_input_backprop_tf_cpu
diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py
index 223a4b2c87..8e54d10f32 100644
--- a/tensorflow/python/kernel_tests/reader_ops_test.py
+++ b/tensorflow/python/kernel_tests/reader_ops_test.py
@@ -1018,15 +1018,15 @@ class LMDBReaderTest(test.TestCase):
     with self.test_session() as sess:
       reader1 = io_ops.LMDBReader(name="test_read_from_same_file1")
       reader2 = io_ops.LMDBReader(name="test_read_from_same_file2")
-      filename_queue = input_lib.string_input_producer(
-          [self.db_path], num_epochs=None)
+      filename_queue = input_lib.string_input_producer([self.db_path],
+                                                       num_epochs=None)
       key1, value1 = reader1.read(filename_queue)
       key2, value2 = reader2.read(filename_queue)
 
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-      for _ in range(3):
-        for _ in range(10):
+      for i in range(3):
+        for j in range(10):
           k1, v1, k2, v2 = sess.run([key1, value1, key2, value2])
           self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2))
           self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2))
@@ -1054,14 +1054,14 @@ class LMDBReaderTest(test.TestCase):
   def testReadFromFileRepeatedly(self):
     with self.test_session() as sess:
       reader = io_ops.LMDBReader(name="test_read_from_file_repeated")
-      filename_queue = input_lib.string_input_producer(
-          [self.db_path], num_epochs=None)
+      filename_queue = input_lib.string_input_producer([self.db_path],
+                                                       num_epochs=None)
       key, value = reader.read(filename_queue)
 
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
       # Iterate over the lmdb 3 times.
-      for _ in range(3):
+      for i in range(3):
         # Go over all 10 records each time.
         for j in range(10):
           k, v = sess.run([key, value])
@@ -1071,6 +1071,5 @@ class LMDBReaderTest(test.TestCase):
       coord.request_stop()
       coord.join(threads)
 
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 99f9f09690..3a02f24902 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -380,7 +380,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
           # Replace np_ans[8] with 0 for the value
           np_ans[8:] = 0
           # Replace 8 with -1 in indices
-          np.place(indices, indices == 8, [-1])
+          np.place(indices, indices==8, [-1])
           s = math_ops.unsorted_segment_sum(
               data=tf_x, segment_ids=indices, num_segments=num_segments)
           tf_ans = s.eval()
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index 6390b7c518..04758ce45a 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -87,7 +87,6 @@ class UniqueTest(test.TestCase):
     for i in range(len(x)):
       self.assertEqual(x[i], tf_y[tf_idx[i]])
 
-
 class UniqueWithCountsTest(test.TestCase):
 
   def testInt32(self):
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index 83237b8733..4d5fb97845 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -267,34 +267,34 @@ class BatchNormalization(base.Layer):
           self.axis[idx] = x + 1      # Account for added dimension
 
     if self.scale:
-      self.gamma = self.add_variable(
-          name='gamma',
-          shape=param_shape,
-          dtype=param_dtype,
-          initializer=self.gamma_initializer,
-          regularizer=self.gamma_regularizer,
-          constraint=self.gamma_constraint,
-          trainable=True)
+      self.gamma = self.add_variable(name='gamma',
+                                     shape=param_shape,
+                                     dtype=param_dtype,
+                                     initializer=self.gamma_initializer,
+                                     regularizer=self.gamma_regularizer,
+                                     constraint=self.gamma_constraint,
+                                     trainable=True)
     else:
       self.gamma = None
       if self.fused:
-        self._gamma_const = array_ops.constant(
-            1.0, dtype=param_dtype, shape=param_shape)
+        self._gamma_const = array_ops.constant(1.0,
+                                               dtype=param_dtype,
+                                               shape=param_shape)
 
     if self.center:
-      self.beta = self.add_variable(
-          name='beta',
-          shape=param_shape,
-          dtype=param_dtype,
-          initializer=self.beta_initializer,
-          regularizer=self.beta_regularizer,
-          constraint=self.beta_constraint,
-          trainable=True)
+      self.beta = self.add_variable(name='beta',
+                                    shape=param_shape,
+                                    dtype=param_dtype,
+                                    initializer=self.beta_initializer,
+                                    regularizer=self.beta_regularizer,
+                                    constraint=self.beta_constraint,
+                                    trainable=True)
     else:
       self.beta = None
       if self.fused:
-        self._beta_const = array_ops.constant(
-            0.0, dtype=param_dtype, shape=param_shape)
+        self._beta_const = array_ops.constant(0.0,
+                                              dtype=param_dtype,
+                                              shape=param_shape)
 
     # Disable variable partitioning when creating the moving mean and variance
     try:
@@ -327,12 +327,11 @@ class BatchNormalization(base.Layer):
         # stack to be cleared. The nested ones use a `lambda` to set the desired
         # device and ignore any devices that may be set by the custom getter.
         def _renorm_variable(name, shape):
-          var = self.add_variable(
-              name=name,
-              shape=shape,
-              dtype=param_dtype,
-              initializer=init_ops.zeros_initializer(),
-              trainable=False)
+          var = self.add_variable(name=name,
+                                  shape=shape,
+                                  dtype=param_dtype,
+                                  initializer=init_ops.zeros_initializer(),
+                                  trainable=False)
           return var
 
         with ops.device(None):
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index 7c91c3284e..b2876c58c2 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -101,13 +101,15 @@ class BNTest(test.TestCase):
       loss_val = sess.run(loss, feed_dict={image: image_val})
       return loss_val
 
-  def _trainEvalSequence(self, dtype, train1_use_gpu, train2_use_gpu,
+  def _trainEvalSequence(self,
+                         dtype,
+                         train1_use_gpu,
+                         train2_use_gpu,
                          infer_use_gpu):
     batch, height, width, input_channels = 2, 4, 5, 3
     shape = [batch, height, width, input_channels]
     checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' %
-                              (dtype, train1_use_gpu, train2_use_gpu,
-                               infer_use_gpu))
+        (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu))
 
     self._train(
         checkpoint,
@@ -128,27 +130,30 @@ class BNTest(test.TestCase):
         dtype=dtype)
 
     np.random.seed(0)
-    image_val = np.random.rand(batch, height, width, input_channels).astype(
-        dtype.as_numpy_dtype)
-    loss_val = self._infer(
-        checkpoint, image_val, shape, use_gpu=infer_use_gpu, is_fused=True)
+    image_val = np.random.rand(batch,
+                               height,
+                               width,
+                               input_channels).astype(dtype.as_numpy_dtype)
+    loss_val = self._infer(checkpoint, image_val, shape,
+                           use_gpu=infer_use_gpu, is_fused=True)
 
     return train_vars, loss_val
 
   def testHalfPrecision(self):
-    ref_vars, ref_loss = self._trainEvalSequence(
-        dtype=dtypes.float32,
-        train1_use_gpu=True,
-        train2_use_gpu=True,
-        infer_use_gpu=True)
-
+    ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32,
+                                                 train1_use_gpu=True,
+                                                 train2_use_gpu=True,
+                                                 infer_use_gpu=True)
+ 
     self.assertEqual(len(ref_vars), 5)
 
     for train1_use_gpu in [True, False]:
       for train2_use_gpu in [True, False]:
         for infer_use_gpu in [True, False]:
-          test_vars, test_loss = self._trainEvalSequence(
-              dtypes.float16, train1_use_gpu, train2_use_gpu, infer_use_gpu)
+          test_vars, test_loss = self._trainEvalSequence(dtypes.float16,
+                                                         train1_use_gpu,
+                                                         train2_use_gpu,
+                                                         infer_use_gpu)
           self.assertEqual(len(test_vars), 5)
           for test_var, ref_var in zip(test_vars, ref_vars):
             self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3)
@@ -276,8 +281,9 @@ class BNTest(test.TestCase):
   def testCreateFusedBNFloat16(self):
     # Call layer.
     bn = normalization_layers.BatchNormalization(axis=1, fused=True)
-    inputs = random_ops.random_uniform(
-        (5, 4, 3, 3), seed=1, dtype=dtypes.float16)
+    inputs = random_ops.random_uniform((5, 4, 3, 3),
+                                       seed=1,
+                                       dtype=dtypes.float16)
     training = array_ops.placeholder(dtype='bool')
     outputs = bn.apply(inputs, training=training)
 
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 38eff54c69..43238757c7 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1194,19 +1194,18 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
           "Number of mask dimensions must be specified, even if some dimensions"
           " are None.  E.g. shape=[None] is ok, but shape=None is not.")
     axis = 0 if axis is None else axis
-    shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask)
+    shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask)
 
-    leading_size = gen_math_ops._prod(
-        shape(tensor)[axis:axis + ndims_mask], [0])
+    leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0])
     tensor = reshape(tensor,
-                     concat([
-                         shape(tensor)[:axis], [leading_size],
-                         shape(tensor)[axis + ndims_mask:]
-                     ], 0))
-    first_dim = shape_tensor[axis:axis + ndims_mask].num_elements()
+                     concat([shape(tensor)[:axis],
+                             [leading_size],
+                             shape(tensor)[axis+ndims_mask:]], 0))
+    first_dim = shape_tensor[axis:axis+ndims_mask].num_elements()
     tensor.set_shape(
-        tensor_shape.as_shape(shape_tensor[:axis]).concatenate([first_dim])
-        .concatenate(shape_tensor[axis + ndims_mask:]))
+        tensor_shape.as_shape(shape_tensor[:axis])
+        .concatenate([first_dim])
+        .concatenate(shape_tensor[axis+ndims_mask:]))
 
     mask = reshape(mask, [-1])
     return _apply_mask_1d(tensor, mask, axis)
diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py
index 04762565c2..d49fac59ca 100644
--- a/tensorflow/python/ops/distributions/multinomial.py
+++ b/tensorflow/python/ops/distributions/multinomial.py
@@ -23,10 +23,10 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
 
@@ -243,26 +243,25 @@ class Multinomial(distribution.Distribution):
         n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits
 
     # flatten the total_count and logits
-    flat_logits = array_ops.reshape(logits, [-1, k])  # [B1B2...Bm, k]
-    flat_ndraws = n * array_ops.reshape(n_draws, [-1])  # [B1B2...Bm]
+    flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k]
+    flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm]
 
     # computes each total_count and logits situation by map_fn
     def _sample_single(args):
-      logits, n_draw = args[0], args[1]  # [K], []
-      x = random_ops.multinomial(logits[array_ops.newaxis, ...], n_draw,
-                                 seed)  # [1, n*n_draw]
-      x = array_ops.reshape(x, shape=[n, -1])  # [n, n_draw]
-      x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2)  # [n, k]
+      logits, n_draw = args[0], args[1] # [K], []
+      x = random_ops.multinomial(logits[array_ops.newaxis, ...],
+                                 n_draw, seed) # [1, n*n_draw]
+      x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw]
+      x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k]
       return x
-
-    x = functional_ops.map_fn(
-        _sample_single, [flat_logits, flat_ndraws],
-        dtype=self.dtype)  # [B1B2...Bm, n, k]
+    x = functional_ops.map_fn(_sample_single,
+                              [flat_logits, flat_ndraws],
+                              dtype=self.dtype) # [B1B2...Bm, n, k]
 
     # reshape the results to proper shape
     x = array_ops.transpose(x, perm=[1, 0, 2])
     final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0)
-    x = array_ops.reshape(x, final_shape)  # [n, B1, B2,..., Bm, k]
+    x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k]
     return x
 
   @distribution_util.AppendDocstring(_multinomial_sample_note)
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index b9c89d62d5..7c23321ca5 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1119,8 +1119,9 @@ def rgb_to_grayscale(images, name=None):
     # https://en.wikipedia.org/wiki/Luma_%28video%29
     rgb_weights = [0.2989, 0.5870, 0.1140]
     rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
-    gray_float = math_ops.reduce_sum(
-        flt_image * rgb_weights, rank_1, keepdims=True)
+    gray_float = math_ops.reduce_sum(flt_image * rgb_weights,
+                                     rank_1,
+                                     keepdims=True)
     gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
     return convert_image_dtype(gray_float, orig_dtype, name=name)
 
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index be9beee633..14a039ffd0 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -30,7 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.gen_linalg_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import compat
-from tensorflow.python.util import deprecation
+from tensorflow.python.util.deprecation import deprecated_args
 
 # Names below are lower_case.
 # pylint: disable=invalid-name
@@ -439,13 +439,9 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None):
 
 
 # pylint: disable=redefined-builtin
-@deprecation.deprecated_args(
-    None, 'keep_dims is deprecated, use keepdims instead', 'keep_dims')
-def norm(tensor,
-         ord='euclidean',
-         axis=None,
-         keepdims=None,
-         name=None,
+@deprecated_args(None, "keep_dims is deprecated, use keepdims instead",
+                 "keep_dims")
+def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
          keep_dims=None):
   r"""Computes the norm of vectors, matrices, and tensors.
 
@@ -482,7 +478,6 @@ def norm(tensor,
     keepdims: If True, the axis indicated in `axis` are kept with size 1.
       Otherwise, the dimensions in `axis` are removed from the output shape.
     name: The name of the op.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     output: A `Tensor` of the same type as tensor, containing the vector or
@@ -505,8 +500,11 @@ def norm(tensor,
      higher order tensors.
   @end_compatibility
   """
-  keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims,
-                                                    'keep_dims', keep_dims)
+
+  if keep_dims is not None:
+    if keepdims is not None:
+      raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'")
+    keepdims = keep_dims
   if keepdims is None:
     keepdims = False
 
@@ -557,8 +555,8 @@ def norm(tensor,
       else:
         # General p-norms (positive p only)
         result = math_ops.pow(
-            math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True),
-            1.0 / ord)
+            math_ops.reduce_sum(
+                math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord)
     if not keepdims:
       result = array_ops.squeeze(result, axis)
     return result
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index e04121ee31..d30f6b92ad 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -792,10 +792,9 @@ def mean_cosine_distance(labels, predictions, dim, weights=None,
   predictions, labels, weights = _remove_squeezable_dimensions(
       predictions=predictions, labels=labels, weights=weights)
   radial_diffs = math_ops.multiply(predictions, labels)
-  radial_diffs = math_ops.reduce_sum(
-      radial_diffs, reduction_indices=[
-          dim,
-      ], keepdims=True)
+  radial_diffs = math_ops.reduce_sum(radial_diffs,
+                                     reduction_indices=[dim,],
+                                     keepdims=True)
   mean_distance, update_op = mean(radial_diffs, weights,
                                   None,
                                   None,
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 654eb1c118..da037a7983 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -333,7 +333,6 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
     epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
       divisor if `norm < sqrt(epsilon)`.
     name: A name for this operation (optional).
-    dim: Deprecated alias for axis.
 
   Returns:
     A `Tensor` with the same shape as `x`.
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index ec7b9372ca..61fa462988 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -23,6 +23,7 @@ import numbers
 import numpy as np
 
 from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_util
 from tensorflow.python.framework import ops
@@ -37,10 +38,11 @@ from tensorflow.python.ops import random_ops
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_nn_ops import *
 # pylint: enable=wildcard-import
+from tensorflow.python.util.deprecation import deprecated_args
+from tensorflow.python.util.deprecation import deprecated_argument_lookup
 
 from tensorflow.python.util import deprecation
 
-
 # Aliases for some automatically-generated names.
 local_response_normalization = gen_nn_ops.lrn
 
@@ -1646,7 +1648,7 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   return output
 
 
-@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def softmax(logits, axis=None, name=None, dim=None):
   """Computes softmax activations.
 
@@ -1660,7 +1662,6 @@ def softmax(logits, axis=None, name=None, dim=None):
     axis: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
-    dim: Deprecated alias for `axis`.
 
   Returns:
     A `Tensor`. Has the same type and shape as `logits`.
@@ -1669,13 +1670,13 @@ def softmax(logits, axis=None, name=None, dim=None):
     InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
       dimension of `logits`.
   """
-  axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim)
+  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
   if axis is None:
     axis = -1
   return _softmax(logits, gen_nn_ops._softmax, axis, name)
 
 
-@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def log_softmax(logits, axis=None, name=None, dim=None):
   """Computes log softmax activations.
 
@@ -1689,7 +1690,6 @@ def log_softmax(logits, axis=None, name=None, dim=None):
     axis: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
-    dim: Deprecated alias for `axis`.
 
   Returns:
     A `Tensor`. Has the same type as `logits`. Same shape as `logits`.
@@ -1698,7 +1698,7 @@ def log_softmax(logits, axis=None, name=None, dim=None):
     InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
       dimension of `logits`.
   """
-  axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim)
+  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
   if axis is None:
     axis = -1
   return _softmax(logits, gen_nn_ops._log_softmax, axis, name)
@@ -2316,14 +2316,13 @@ def conv1d(value, filters, stride, padding,
     return array_ops.squeeze(result, [spatial_start_dim])
 
 
-def conv1d_transpose(
-    value,
-    filter,  # pylint: disable=redefined-builtin
-    output_shape,
-    stride,
-    padding="SAME",
-    data_format="NWC",
-    name=None):
+def conv1d_transpose(value,
+                     filter,
+                     output_shape,
+                     stride,
+                     padding="SAME",
+                     data_format="NWC",
+                     name=None):
   """The transpose of `conv1d`.
 
   This operation is sometimes called "deconvolution" after [Deconvolutional
@@ -2358,8 +2357,8 @@ def conv1d_transpose(
                       [value, filter, output_shape]) as name:
     output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape")
     if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)):
-      raise ValueError("output_shape must have shape (3,), got {}".format(
-          output_shape_.get_shape()))
+      raise ValueError("output_shape must have shape (3,), got {}"
+                       .format(output_shape_.get_shape()))
 
     # The format could be either NWC or NCW, map to NHWC or NCHW
     if data_format is None or data_format == "NWC":
@@ -2381,8 +2380,7 @@ def conv1d_transpose(
       if not filter.get_shape()[1].is_compatible_with(output_shape[axis]):
         raise ValueError(
             "output_shape does not match filter's output channels, "
-            "{} != {}".format(output_shape[axis],
-                              filter.get_shape()[1]))
+            "{} != {}".format(output_shape[axis], filter.get_shape()[1]))
 
     if padding != "VALID" and padding != "SAME":
       raise ValueError("padding must be either VALID or SAME:"
@@ -2390,26 +2388,25 @@ def conv1d_transpose(
 
     # Reshape the input tensor to [batch, 1, in_width, in_channels]
     if data_format_2d == "NHWC":
-      output_shape_ = array_ops.concat(
-          [output_shape_[:1], [1], output_shape_[1:]], axis=0)
+      output_shape_ = array_ops.concat([output_shape_[:1], [1],
+                                        output_shape_[1:]], axis=0)
       spatial_start_dim = 1
       strides = [1, 1, stride, 1]
     else:
-      output_shape_ = array_ops.concat(
-          [output_shape_[:2], [1], output_shape_[2:]], axis=0)
+      output_shape_ = array_ops.concat([output_shape_[:2], [1],
+                                        output_shape_[2:]], axis=0)
       spatial_start_dim = 2
       strides = [1, 1, 1, stride]
     value = array_ops.expand_dims(value, spatial_start_dim)
     filter = array_ops.expand_dims(filter, 0)
 
-    result = gen_nn_ops.conv2d_backprop_input(
-        input_sizes=output_shape_,
-        filter=filter,
-        out_backprop=value,
-        strides=strides,
-        padding=padding,
-        data_format=data_format_2d,
-        name=name)
+    result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_,
+                                              filter=filter,
+                                              out_backprop=value,
+                                              strides=strides,
+                                              padding=padding,
+                                              data_format=data_format_2d,
+                                              name=name)
     return array_ops.squeeze(result, [spatial_start_dim])
 
 
diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc
index 44144a0613..43d2d3cd48 100644
--- a/tensorflow/stream_executor/dnn.cc
+++ b/tensorflow/stream_executor/dnn.cc
@@ -470,7 +470,6 @@ string ConvolutionDescriptor::ToShortString() const {
 PoolingDescriptor::PoolingDescriptor(int ndims)
     : mode_(dnn::PoolingMode::kMaximum),
       ndims_(ndims),
-      propagate_nans_(false),
       window_(ndims, 0),
       padding_(ndims, 0),
       strides_(ndims, 1) {}
diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl
index 87a70d8f95..b470772fbf 100644
--- a/third_party/sycl/crosstool/trisycl.tpl
+++ b/third_party/sycl/crosstool/trisycl.tpl
@@ -11,12 +11,10 @@ CPU_C_COMPILER = ('%{host_c_compiler}')
 CURRENT_DIR = os.path.dirname(sys.argv[0])
 TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include'
 
-
 def main():
   compiler_flags = []
 
-  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable',
-                  '-Wignored-attributes', '-fno-exceptions')
+  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions')
   # remove -fsamotoze-coverage from string with g++
   if 'g++' in CPU_CXX_COMPILER:
     remove_flags += ('-fsanitize-coverage',)
@@ -24,62 +22,52 @@ def main():
   else:
     compiler_flags += ['-fopenmp=libomp']
 
-  compiler_flags += [
-      flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)
-  ]
+  compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)]
+
 
   output_file_index = compiler_flags.index('-o') + 1
   output_file_name = compiler_flags[output_file_index]
 
-  if (output_file_index == 1):
+  if(output_file_index == 1):
     # we are linking
-    return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined'])
+    return call([CPU_CXX_COMPILER] + compiler_flags +
+                ['-Wl,--no-undefined'])
 
   # find what we compile
   compiling_cpp = 0
-  if ('-c' in compiler_flags):
-    compiled_file_index = compiler_flags.index('-c') + 1
-    compiled_file_name = compiler_flags[compiled_file_index]
-    if (compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C',
-                                     '.cxx'))):
-      compiling_cpp = 1
-
-  debug_flags = [
-      '-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL',
-      '-lpthread', '-lboost_log', '-g', '-rdynamic'
-  ]
+  if('-c' in compiler_flags):
+      compiled_file_index = compiler_flags.index('-c') + 1
+      compiled_file_name = compiler_flags[compiled_file_index]
+      if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP',
+                                      '.C', '.cxx'))):
+        compiling_cpp = 1;
+
+  debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic']
 
   opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3']
 
-  compiler_flags = compiler_flags + [
-      '-DEIGEN_USE_SYCL=1', '-DEIGEN_HAS_C99_MATH',
-      '-DEIGEN_MAX_ALIGN_BYTES=16', '-DTENSORFLOW_USE_SYCL'
-  ] + opt_flags
+  compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1',
+                                     '-DEIGEN_HAS_C99_MATH',
+                                     '-DEIGEN_MAX_ALIGN_BYTES=16',
+                                     '-DTENSORFLOW_USE_SYCL'] + opt_flags
 
-  if (compiling_cpp == 1):
+  if(compiling_cpp == 1):
     # create a blacklist of folders that will be skipped when compiling
     # with triSYCL
-    skip_extensions = ['.cu.cc']
-    skip_folders = [
-        'tensorflow/compiler', 'tensorflow/docs_src', 'tensorflow/tensorboard',
-        'third_party', 'external', 'hexagon'
-    ]
+    skip_extensions = [".cu.cc"]
+    skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"]
     skip_folders = [(folder + '/') for folder in skip_folders]
     # if compiling external project skip triSYCL
-    if any(
-        compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(
-            _folder in output_file_name for _folder in skip_folders):
+    if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders):
       return call([CPU_CXX_COMPILER] + compiler_flags)
 
-    host_compiler_flags = [
-        '-xc++', '-Wno-unused-variable', '-I', TRISYCL_INCLUDE_DIR
-    ] + compiler_flags
+    host_compiler_flags = ['-xc++', '-Wno-unused-variable',
+                           '-I', TRISYCL_INCLUDE_DIR] + compiler_flags
     x = call([CPU_CXX_COMPILER] + host_compiler_flags)
     return x
   else:
     # compile for C
     return call([CPU_C_COMPILER] + compiler_flags)
 
-
 if __name__ == '__main__':
   sys.exit(main())
diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl
index 5b9d0eb383..a0c9e4e43a 100644
--- a/third_party/sycl/sycl_configure.bzl
+++ b/third_party/sycl/sycl_configure.bzl
@@ -67,6 +67,7 @@ def find_computecpp_root(repository_ctx):
 
 def find_trisycl_include_dir(repository_ctx):
   """Find triSYCL include directory. """
+  sycl_name = ""
   if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ:
     sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip()
     if sycl_name.startswith("/"):
-- 
GitLab


From e70c00950d295c519fd9c7f8b12e13a3c5aaf710 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 22 Nov 2017 00:39:22 -0800
Subject: [PATCH 0215/1225] Automated g4 rollback of changelist 176615107

PiperOrigin-RevId: 176622438
---
 CODE_OF_CONDUCT.md                            |   6 +-
 README.md                                     |   4 +-
 configure.py                                  |  38 +-
 tensorflow/BUILD                              |  16 -
 tensorflow/compiler/aot/tfcompile.bzl         |  11 +-
 tensorflow/compiler/tests/BUILD               |   2 +-
 .../compiler/tests/fused_batchnorm_test.py    |  25 +-
 tensorflow/compiler/xla/service/BUILD         |   2 +
 .../compiler/xla/service/hlo_instruction.h    |   2 +-
 .../xla/service/hlo_instruction_test.cc       |   4 +-
 tensorflow/contrib/batching/BUILD             |   1 -
 .../contrib/batching/kernels/batch_kernels.cc |   2 +-
 .../kernel_tests/csiszar_divergence_test.py   |   2 +-
 tensorflow/contrib/cmake/CMakeLists.txt       | 147 +---
 .../contrib/cmake/external/boringssl.cmake    |   6 +-
 .../contrib/cmake/external/jsoncpp.cmake      |   6 +-
 tensorflow/contrib/cmake/external/lmdb.cmake  |   6 +-
 tensorflow/contrib/cmake/external/png.cmake   |   6 +-
 .../contrib/cmake/external/protobuf.cmake     |   6 +-
 tensorflow/contrib/cmake/external/re2.cmake   |   8 +-
 .../contrib/cmake/external/snappy.cmake       |   8 +-
 .../contrib/cmake/external/sqlite.cmake       |   6 +-
 tensorflow/contrib/cmake/external/zlib.cmake  |   6 +-
 tensorflow/contrib/cmake/tf_cc_ops.cmake      |  36 +-
 .../contrib/cmake/tf_core_kernels.cmake       |  23 +-
 .../cmake/tf_label_image_example.cmake        |   5 -
 tensorflow/contrib/cmake/tf_python.cmake      |  38 +-
 tensorflow/contrib/cmake/tf_shared_lib.cmake  |  45 +-
 .../contrib/cmake/tf_stream_executor.cmake    |   3 -
 tensorflow/contrib/cmake/tf_tools.cmake       |  13 +-
 tensorflow/contrib/cmake/tf_tutorials.cmake   |   5 -
 tensorflow/contrib/crf/python/ops/crf.py      |  19 +-
 .../contrib/data/python/kernel_tests/BUILD    |   8 +-
 tensorflow/contrib/distributions/BUILD        |  17 -
 tensorflow/contrib/distributions/__init__.py  |   2 -
 .../python/kernel_tests/cauchy_test.py        | 437 -----------
 .../distributions/python/ops/cauchy.py        | 223 ------
 .../python/examples/notebooks/1_basics.ipynb  |   4 +-
 .../examples/notebooks/2_gradients.ipynb      |   6 +-
 .../examples/notebooks/3_datasets.ipynb       |  10 +-
 .../contrib/layers/python/layers/layers.py    |  18 +-
 .../layers/python/layers/layers_test.py       |  73 +-
 .../learn/python/learn/estimators/head.py     |   2 +-
 .../learn/python/learn/estimators/model_fn.py |   6 +-
 .../python/learn/learn_io/data_feeder.py      |  12 +-
 .../linear_optimizer/python/ops/sdca_ops.py   |  11 +-
 tensorflow/contrib/lite/python/BUILD          |   1 -
 .../contrib/lite/testing/generate_examples.py |  17 +-
 tensorflow/contrib/lite/toco/python/BUILD     |   1 -
 tensorflow/contrib/makefile/Makefile          |   3 +-
 tensorflow/contrib/makefile/README.md         |  41 +-
 tensorflow/contrib/makefile/build_all_ios.sh  |  54 +-
 .../contrib/makefile/compile_ios_protobuf.sh  | 369 ++++------
 .../makefile/compile_ios_tensorflow.sh        | 155 ++--
 tensorflow/contrib/makefile/compile_nsync.sh  |   5 +-
 tensorflow/contrib/nn/__init__.py             |   2 -
 tensorflow/contrib/opt/BUILD                  |  18 -
 tensorflow/contrib/opt/__init__.py            |   5 +-
 .../training/multitask_optimizer_wrapper.py   | 138 ----
 .../multitask_optimizer_wrapper_test.py       | 119 ---
 .../python/kernel_tests/core_rnn_cell_test.py |  42 --
 .../rnn/python/kernel_tests/rnn_cell_test.py  |  44 --
 tensorflow/contrib/rnn/python/ops/rnn_cell.py | 344 +--------
 .../seq2seq/python/ops/attention_wrapper.py   |  51 +-
 tensorflow/contrib/slim/README.md             |   2 +-
 .../slim/python/slim/nets/resnet_v1_test.py   |   2 +-
 tensorflow/contrib/verbs/README.md            |  14 +-
 tensorflow/contrib/verbs/rdma.cc              | 413 ++---------
 tensorflow/contrib/verbs/rdma.h               |  40 +-
 tensorflow/core/BUILD                         |   1 -
 .../core/common_runtime/mkl_cpu_allocator.h   |   2 +-
 .../core/common_runtime/sycl/sycl_device.h    |  22 +-
 tensorflow/core/graph/graph.cc                |  15 -
 tensorflow/core/graph/graph.h                 |   5 -
 tensorflow/core/graph/graph_partition.cc      |   4 +-
 tensorflow/core/graph/graph_test.cc           |  64 +-
 tensorflow/core/graph/mkl_graph_util.h        | 179 +++--
 tensorflow/core/graph/mkl_layout_pass.cc      |   2 +-
 .../core/graph/mkl_tfconversion_pass.cc       |   4 +-
 .../core/grappler/costs/graph_properties.h    |   6 -
 tensorflow/core/grappler/utils.cc             |   2 +-
 tensorflow/core/kernels/BUILD                 |  31 +-
 tensorflow/core/kernels/avgpooling_op.cc      |   7 +-
 tensorflow/core/kernels/bincount_op.cc        | 115 ++-
 tensorflow/core/kernels/bincount_op.h         |  41 --
 tensorflow/core/kernels/bincount_op_gpu.cu.cc | 114 ---
 tensorflow/core/kernels/bincount_op_test.cc   |  75 --
 tensorflow/core/kernels/bucketize_op.cc       |  66 +-
 tensorflow/core/kernels/bucketize_op.h        |  41 --
 .../core/kernels/bucketize_op_gpu.cu.cc       | 101 ---
 tensorflow/core/kernels/conv_grad_ops_3d.cc   |  42 +-
 tensorflow/core/kernels/conv_ops_3d.cc        |   5 -
 tensorflow/core/kernels/cwise_op_acosh.cc     |  12 +-
 tensorflow/core/kernels/cwise_op_asinh.cc     |  14 +-
 tensorflow/core/kernels/cwise_op_atanh.cc     |  14 +-
 tensorflow/core/kernels/cwise_ops.h           |  12 -
 .../core/kernels/depthwise_conv_grad_op.cc    |  10 +-
 tensorflow/core/kernels/depthwise_conv_op.cc  |  10 +-
 tensorflow/core/kernels/depthwise_conv_op.h   |   4 +-
 .../core/kernels/depthwise_conv_op_gpu.cu.cc  |  19 +-
 .../kernels/dynamic_partition_op_gpu.cu.cc    | 376 ----------
 .../core/kernels/dynamic_partition_op_test.cc |  58 --
 .../core/kernels/fused_batch_norm_op.cc       |  70 +-
 tensorflow/core/kernels/fused_batch_norm_op.h |  22 +-
 tensorflow/core/kernels/lmdb_reader_op.cc     |   7 +-
 tensorflow/core/kernels/maxpooling_op.cc      |  47 +-
 .../core/kernels/maxpooling_op_gpu.cu.cc      |  40 +-
 tensorflow/core/kernels/maxpooling_op_gpu.h   |   2 +-
 .../core/kernels/mkl_conv_grad_filter_ops.cc  |  78 +-
 .../core/kernels/mkl_conv_grad_input_ops.cc   |  86 ++-
 tensorflow/core/kernels/mkl_conv_ops.cc       |  82 +--
 tensorflow/core/kernels/mkl_conv_ops.h        | 140 ++--
 tensorflow/core/kernels/mkl_tfconv_op.h       |  80 +-
 tensorflow/core/kernels/pooling_ops_common.cc |  10 +-
 .../core/kernels/pooling_ops_common_gpu.h     |   4 +-
 tensorflow/core/kernels/quantized_add_op.cc   |   2 +-
 tensorflow/core/kernels/random_op.cc          |   4 +-
 .../core/kernels/segment_reduction_ops.cc     |   3 -
 .../core/kernels/segment_reduction_ops.h      |  36 +-
 tensorflow/core/kernels/shape_ops.cc          |  43 +-
 tensorflow/core/kernels/shape_ops.h           |  13 +-
 tensorflow/core/kernels/slice_op.cc           | 116 +--
 tensorflow/core/kernels/slice_op.h            | 109 +--
 tensorflow/core/kernels/slice_op_gpu.cu.cc    |  56 --
 tensorflow/core/kernels/strided_slice_op.cc   |   1 +
 .../core/kernels/strided_slice_op_impl.h      |  25 +-
 .../core/kernels/strided_slice_op_test.cc     |  49 --
 tensorflow/core/kernels/transpose_op.cc       |  35 +-
 tensorflow/core/kernels/unique_op.cc          | 113 +--
 tensorflow/core/ops/array_ops.cc              |  44 +-
 tensorflow/core/ops/math_ops.cc               |   2 -
 tensorflow/core/ops/nn_ops.cc                 |  12 +-
 tensorflow/core/ops/ops.pbtxt                 |   5 -
 .../core/platform/default/build_config/BUILD  |  20 +-
 .../core/platform/default/notification.h      |   2 +-
 tensorflow/core/platform/posix/error.cc       |  11 +-
 tensorflow/core/platform/posix/port.cc        |   6 +-
 tensorflow/core/public/version.h              |   2 +-
 tensorflow/core/util/mkl_util.h               | 691 ++----------------
 tensorflow/core/util/mkl_util_test.cc         |  92 ---
 .../api_guides/python/threading_and_queues.md |   2 +-
 .../docs_src/get_started/get_started.md       |   6 +-
 tensorflow/docs_src/get_started/input_fn.md   |   6 +-
 tensorflow/docs_src/install/install_c.md      |   2 +-
 tensorflow/docs_src/install/install_go.md     |   2 +-
 tensorflow/docs_src/install/install_java.md   |  18 +-
 tensorflow/docs_src/install/install_linux.md  |  22 +-
 tensorflow/docs_src/install/install_mac.md    |  10 +-
 .../docs_src/install/install_sources.md       |  19 +-
 tensorflow/docs_src/mobile/prepare_models.md  |   2 +-
 .../docs_src/programmers_guide/debugger.md    |  19 +-
 .../docs_src/programmers_guide/tensors.md     |  12 +-
 tensorflow/examples/speech_commands/models.py |   2 +-
 tensorflow/go/android.go                      |  20 -
 tensorflow/go/operation_test.go               |   8 -
 tensorflow/go/tensor.go                       |   9 +-
 tensorflow/go/tensor_test.go                  |   9 +-
 .../src/main/java/org/tensorflow/Shape.java   |  32 -
 .../test/java/org/tensorflow/ShapeTest.java   |  26 -
 tensorflow/python/BUILD                       |   4 -
 tensorflow/python/estimator/canned/head.py    |   2 +-
 .../python/estimator/inputs/numpy_io.py       |  83 +--
 .../python/estimator/inputs/numpy_io_test.py  |  87 ---
 tensorflow/python/framework/ops.py            |   4 -
 tensorflow/python/framework/tensor_util.py    |   1 -
 tensorflow/python/framework/test_util.py      |   3 +-
 .../python/kernel_tests/array_ops_test.py     |  52 +-
 .../python/kernel_tests/bincount_op_test.py   |  25 +-
 .../python/kernel_tests/bucketize_op_test.py  |   8 +-
 .../python/kernel_tests/constant_op_test.py   |  14 +-
 tensorflow/python/kernel_tests/conv1d_test.py |  43 --
 .../python/kernel_tests/conv_ops_3d_test.py   | 267 ++++---
 .../kernel_tests/depthwise_conv_op_test.py    |  20 +-
 .../python/kernel_tests/distributions/BUILD   |   1 -
 .../distributions/multinomial_test.py         |  12 +-
 .../kernel_tests/dynamic_partition_op_test.py | 106 +--
 .../python/kernel_tests/pooling_ops_test.py   |  60 +-
 .../python/kernel_tests/reader_ops_test.py    |  41 --
 .../segment_reduction_ops_test.py             |  29 +-
 .../python/kernel_tests/shape_ops_test.py     |  10 -
 .../python/kernel_tests/slice_op_test.py      |  25 +-
 .../python/kernel_tests/unique_op_test.py     |  26 -
 tensorflow/python/layers/base.py              |   8 +-
 tensorflow/python/layers/convolutional.py     |   2 -
 tensorflow/python/layers/normalization.py     |  22 +-
 .../python/layers/normalization_test.py       |  98 +--
 tensorflow/python/ops/array_ops.py            |  38 +-
 .../python/ops/distributions/dirichlet.py     |   2 +-
 .../python/ops/distributions/multinomial.py   |  49 +-
 tensorflow/python/ops/image_ops_impl.py       |  23 +-
 tensorflow/python/ops/linalg_ops.py           |  31 +-
 tensorflow/python/ops/math_grad_test.py       |  17 -
 tensorflow/python/ops/math_ops.py             | 258 +++----
 tensorflow/python/ops/metrics_impl.py         |   2 +-
 .../python/ops/nn_fused_batchnorm_test.py     | 119 ++-
 tensorflow/python/ops/nn_impl.py              |  16 +-
 tensorflow/python/ops/nn_ops.py               | 125 +---
 tensorflow/python/ops/variables.py            |   4 +-
 .../python/tools/import_pb_to_tensorboard.py  |   0
 tensorflow/stream_executor/cuda/cuda_dnn.cc   |   9 +-
 tensorflow/stream_executor/dnn.cc             |  13 +-
 tensorflow/stream_executor/dnn.h              |   6 -
 .../tools/api/golden/tensorflow.linalg.pbtxt  |   2 +-
 .../tools/api/golden/tensorflow.nn.pbtxt      |   6 +-
 tensorflow/tools/api/golden/tensorflow.pbtxt  |  22 +-
 .../tools/ci_build/ci_parameterized_build.sh  |   2 +-
 .../tools/ci_build/install/install_golang.sh  |   2 +-
 .../ci_build/linux/libtensorflow_docker.sh    |   2 +-
 .../tools/ci_build/osx/libtensorflow_cpu.sh   |   2 +-
 .../tools/ci_build/osx/libtensorflow_gpu.sh   |   2 +-
 .../tools/ci_build/pi/build_raspberry_pi.sh   |   6 -
 .../ci_build/windows/bazel/bazel_test_lib.sh  |   4 +-
 .../docker/Dockerfile.devel-gpu-cuda9-cudnn7  |   7 +-
 tensorflow/tools/docker/Dockerfile.gpu        |   2 +-
 tensorflow/tools/docker/README.md             |  14 -
 tensorflow/tools/graph_transforms/BUILD       |   2 -
 .../tools/graph_transforms/quantize_nodes.cc  |   2 -
 tensorflow/tools/pip_package/setup.py         |   2 +-
 third_party/aws.BUILD                         |   3 -
 third_party/curl.BUILD                        |   1 +
 third_party/sycl/crosstool/CROSSTOOL.tpl      |   8 +-
 third_party/sycl/crosstool/trisycl.tpl        |  73 --
 third_party/sycl/sycl/BUILD.tpl               |  17 +-
 third_party/sycl/sycl/build_defs.bzl.tpl      |  17 +-
 third_party/sycl/sycl_configure.bzl           |  86 +--
 third_party/zlib.BUILD                        |   2 +-
 tools/bazel.rc                                |   7 +-
 util/python/BUILD                             |   2 +-
 228 files changed, 1807 insertions(+), 7328 deletions(-)
 delete mode 100644 tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/cauchy.py
 delete mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
 delete mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
 delete mode 100644 tensorflow/core/kernels/bincount_op.h
 delete mode 100644 tensorflow/core/kernels/bincount_op_gpu.cu.cc
 delete mode 100644 tensorflow/core/kernels/bincount_op_test.cc
 delete mode 100644 tensorflow/core/kernels/bucketize_op.h
 delete mode 100644 tensorflow/core/kernels/bucketize_op_gpu.cu.cc
 delete mode 100644 tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
 delete mode 100644 tensorflow/core/util/mkl_util_test.cc
 delete mode 100644 tensorflow/go/android.go
 mode change 100755 => 100644 tensorflow/python/tools/import_pb_to_tensorboard.py
 delete mode 100644 third_party/sycl/crosstool/trisycl.tpl

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index ff11d13140..10fd595fec 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -42,7 +42,7 @@ The Code of Conduct also applies within project spaces and in public spaces when
 
 Conflicts in an open source project can take many forms, from someone having a bad day and using harsh and hurtful language in the issue queue, to more serious instances such as sexist/racist statements or threats of violence, and everything in between.
 
-If the behavior is threatening or harassing, or for other reasons requires immediate escalation, please see below.
+If the behaviour is threatening or harassing, or for other reasons requires immediate escalation, please see below.
 
 However, for the vast majority of issues, we aim to empower individuals to first resolve conflicts themselves, asking for help when needed, and only after that fails to escalate further. This approach gives people more control over the outcome of their dispute. 
 
@@ -55,14 +55,14 @@ If you are experiencing or witnessing conflict, we ask you to use the following
 
 ## Reporting Violations
 
-Violations of the Code of Conduct can be reported to TensorFlow’s Project Stewards, Edd Wilder-James (ewj@google.com) and Sarah Novotny (sarahnovotny@google.com). The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report.
+Violations of the Code of Conduct can be reported to TensorFlow’s Project Steward at conduct@tensorflow.org. The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report.
 
 Violations of the Code of Conduct can occur in any setting, even those unrelated to the project. We will only consider complaints about conduct that has occurred within one year of the report.
 
 
 ## Enforcement
 
-If the Project Stewards receive a report alleging a violation of the Code of Conduct, the Project Stewards will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Stewards will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Stewards may issue sanctions without notice.
+If the Project Steward receives a report alleging a violation of the Code of Conduct, the Project Steward will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Steward will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Steward may issue sanctions without notice.
 
 
 ## Attribution
diff --git a/README.md b/README.md
index aff3427bdd..24bbb6cec1 100644
--- a/README.md
+++ b/README.md
@@ -73,11 +73,11 @@ $ python
 
 ## For more information
 
-* [TensorFlow Website](https://www.tensorflow.org)
+* [TensorFlow website](https://www.tensorflow.org)
 * [TensorFlow White Papers](https://www.tensorflow.org/about/bib)
 * [TensorFlow Model Zoo](https://github.com/tensorflow/models)
 * [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730)
-* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si)
+* [TensorFlow course at Stanford](https://web.stanford.edu/class/cs20si)
 
 Learn more about the TensorFlow community at the [community page of tensorflow.org](https://www.tensorflow.org/community) for a few ways to participate.
 
diff --git a/configure.py b/configure.py
index 26da09bd94..0d1afbfe15 100644
--- a/configure.py
+++ b/configure.py
@@ -43,7 +43,6 @@ _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing '
                           'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION)
 _TF_OPENCL_VERSION = '1.2'
 _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp'
-_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include'
 
 
 def is_windows():
@@ -637,7 +636,7 @@ def set_tf_cuda_version(environ_cp):
   write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version)
 
 
-def set_tf_cudnn_version(environ_cp):
+def set_tf_cunn_version(environ_cp):
   """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION."""
   ask_cudnn_version = (
       'Please specify the cuDNN version you want to use. '
@@ -883,27 +882,6 @@ def set_computecpp_toolkit_path(environ_cp):
   write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
                               computecpp_toolkit_path)
 
-def set_trisycl_include_dir(environ_cp):
-  """Set TRISYCL_INCLUDE_DIR"""
-  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
-                             'include directory. (Use --config=sycl_trisycl '
-                             'when building with Bazel) '
-                             '[Default is %s]: '
-                             ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
-  while True:
-    trisycl_include_dir = get_from_env_or_user_or_default(
-      environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
-      _DEFAULT_TRISYCL_INCLUDE_DIR)
-    if os.path.exists(trisycl_include_dir):
-      break
-
-    print('Invalid triSYCL include directory, %s cannot be found'
-          % (trisycl_include_dir))
-
-  # Set TRISYCL_INCLUDE_DIR
-  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
-  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
-                              trisycl_include_dir)
 
 def set_mpi_home(environ_cp):
   """Set MPI_HOME."""
@@ -1019,8 +997,6 @@ def main():
     environ_cp['TF_NEED_GCP'] = '0'
     environ_cp['TF_NEED_HDFS'] = '0'
     environ_cp['TF_NEED_JEMALLOC'] = '0'
-    environ_cp['TF_NEED_OPENCL_SYCL'] = '0'
-    environ_cp['TF_NEED_COMPUTECPP'] = '0'
     environ_cp['TF_NEED_OPENCL'] = '0'
     environ_cp['TF_CUDA_CLANG'] = '0'
 
@@ -1042,21 +1018,17 @@ def main():
   set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support',
                 False, 'verbs')
 
-  set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False)
-  if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
+  set_action_env_var(environ_cp, 'TF_NEED_OPENCL', 'OpenCL', False)
+  if environ_cp.get('TF_NEED_OPENCL') == '1':
     set_host_cxx_compiler(environ_cp)
     set_host_c_compiler(environ_cp)
-    set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', True)
-    if environ_cp.get('TF_NEED_COMPUTECPP') == '1':
-      set_computecpp_toolkit_path(environ_cp)
-    else:
-      set_trisycl_include_dir(environ_cp)
+    set_computecpp_toolkit_path(environ_cp)
 
   set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
   if (environ_cp.get('TF_NEED_CUDA') == '1' and
       'TF_CUDA_CONFIG_REPO' not in environ_cp):
     set_tf_cuda_version(environ_cp)
-    set_tf_cudnn_version(environ_cp)
+    set_tf_cunn_version(environ_cp)
     set_tf_cuda_compute_capabilities(environ_cp)
 
     set_tf_cuda_clang(environ_cp)
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index c8f0b6b061..49828cd4d6 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -54,15 +54,6 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
-config_setting(
-    name = "raspberry_pi_armeabi",
-    values = {
-        "crosstool_top": "@local_config_arm_compiler//:toolchain",
-        "cpu": "armeabi",
-    },
-    visibility = ["//visibility:public"],
-)
-
 config_setting(
     name = "android_arm",
     values = {
@@ -769,13 +760,6 @@ tf_cc_shared_object(
     ],
 )
 
-exports_files(
-    [
-        "tf_version_script.lds",
-        "tf_exported_symbols.lds",
-    ],
-)
-
 py_library(
     name = "tensorflow_py",
     srcs = ["__init__.py"],
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index b795afd5b8..ee291c12d0 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -130,10 +130,6 @@ def tf_library(name, graph, config,
   header_file = name + ".h"
   object_file = name + ".o"
   ep = ("__" + PACKAGE_NAME + "__" + name).replace("/", "_")
-  if type(tfcompile_flags) == type(""):
-    flags = tfcompile_flags
-  else:
-    flags = " ".join(["'" + arg.replace("'", "'\\''") + "'" for arg in (tfcompile_flags or [])])
   native.genrule(
       name=("gen_" + name),
       srcs=[
@@ -152,7 +148,7 @@ def tf_library(name, graph, config,
            " --target_triple=" + target_llvm_triple() +
            " --out_header=$(@D)/" + header_file +
            " --out_object=$(@D)/" + object_file +
-           flags),
+           " " + (tfcompile_flags or "")),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -189,7 +185,7 @@ def tf_library(name, graph, config,
            " --cpp_class=" + cpp_class +
            " --target_triple=" + target_llvm_triple() +
            " --out_session_module=$(@D)/" + session_module_pb +
-           flags),
+           " " + (tfcompile_flags or "")),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -199,7 +195,8 @@ def tf_library(name, graph, config,
 
   # The cc_library rule packaging up the header and object file, and needed
   # kernel implementations.
-  need_xla_data_proto = (flags and flags.find("--gen_program_shape") != -1)
+  need_xla_data_proto = (tfcompile_flags and
+                         tfcompile_flags.find("--gen_program_shape") != -1)
   native.cc_library(
       name=name,
       srcs=[object_file],
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 79c4befd36..c372e05474 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -672,7 +672,7 @@ tf_library(
     cpp_class = "LSTMLayerInference",
     graph = "lstm_layer_inference.pbtxt",
     tags = ["manual"],
-    tfcompile_flags = ["--xla_cpu_multi_thread_eigen=false"],
+    tfcompile_flags = "--xla_cpu_multi_thread_eigen=false",
 )
 
 # -----------------------------------------------------------------------------
diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py
index a773b5a947..936fcf8b6b 100644
--- a/tensorflow/compiler/tests/fused_batchnorm_test.py
+++ b/tensorflow/compiler/tests/fused_batchnorm_test.py
@@ -36,7 +36,7 @@ class FusedBatchNormTest(XLATestCase):
     x_square = x * x
     x_square_sum = np.sum(x_square, (0, 1, 2))
     x_sum = np.sum(x, axis=(0, 1, 2))
-    element_count = np.size(x) / int(np.shape(x)[-1])
+    element_count = np.size(x) / int(np.shape(x)[0])
     mean = x_sum / element_count
     var = x_square_sum / element_count - mean * mean
     normalized = (x - mean) / np.sqrt(var + epsilon)
@@ -64,9 +64,8 @@ class FusedBatchNormTest(XLATestCase):
     return grad_x, grad_scale, grad_offset
 
   def testInference(self):
-    channel = 3
-    x_shape = [2, 2, 6, channel]
-    scale_shape = [channel]
+    x_shape = [2, 2, 6, 2]
+    scale_shape = [2]
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     scale_val = np.random.random_sample(scale_shape).astype(np.float32)
 
@@ -75,8 +74,8 @@ class FusedBatchNormTest(XLATestCase):
     with self.test_session() as sess, self.test_scope():
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
-      scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
-      offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset")
+      scale = array_ops.placeholder(np.float32, shape=[2], name="scale")
+      offset = array_ops.placeholder(np.float32, shape=[2], name="offset")
       epsilon = 0.001
       y_ref, mean_ref, var_ref = self._reference_training(
           x_val, scale_val, offset_val, epsilon, data_format)
@@ -98,9 +97,8 @@ class FusedBatchNormTest(XLATestCase):
       self.assertAllClose(y_val, y_ref, atol=1e-3)
 
   def _testLearning(self, use_gradient_checker):
-    channel = 3
-    x_shape = [2, 2, 6, channel]
-    scale_shape = [channel]
+    x_shape = [2, 2, 6, 2]
+    scale_shape = [2]
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     scale_val = np.random.random_sample(scale_shape).astype(np.float32)
 
@@ -111,8 +109,8 @@ class FusedBatchNormTest(XLATestCase):
     with self.test_session() as sess, self.test_scope():
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
-      scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
-      offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset")
+      scale = array_ops.placeholder(np.float32, shape=[2], name="scale")
+      offset = array_ops.placeholder(np.float32, shape=[2], name="offset")
       epsilon = 0.001
       y, mean, var = nn.fused_batch_norm(
           t_val,
@@ -156,9 +154,8 @@ class FusedBatchNormTest(XLATestCase):
   def testGradient(self):
     # TODO(b/64270657): Use gradient_checker here in addition to comparing with
     # this reference implementation.
-    channel = 3
-    x_shape = [2, 2, 6, channel]
-    scale_shape = [channel]
+    x_shape = [2, 2, 6, 2]
+    scale_shape = [2]
     grad_val = np.random.random_sample(x_shape).astype(np.float32)
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     scale_val = np.random.random_sample(scale_shape).astype(np.float32)
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index fb980e7056..db265510f2 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -90,6 +90,8 @@ cc_library(
         ":shape_inference",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status",
+        "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index cda8b07c61..1bd0cca945 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -222,7 +222,7 @@ class HloInstruction {
       tensorflow::gtl::ArraySlice<int64> strides);
 
   // Creates a slice instruction, where the first operand is sliced by
-  // start indices specified in the second operand, and by size specified in
+  // start indices specified in the second operand, and by size specfied in
   // 'slice_sizes'.
   static std::unique_ptr<HloInstruction> CreateDynamicSlice(
       const Shape& shape, HloInstruction* operand,
diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index 76b12fc8d3..070bb4bc42 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -792,8 +792,8 @@ TEST_F(HloInstructionTest, ComplexFusionOp) {
   //   sub = Sub(mul, clamp)
   //   tuple = Tuple({sub, sub, mul, C1})
   //
-  // Notable complexities are repeated operands in the same instruction,
-  // different shapes, use of value in different expressions.
+  // Notable complexities are repeated operands in a same instruction, different
+  // shapes, use of value in different expressions.
   auto c1 = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateR0<float>(1.1f)));
   auto c2 = builder.AddInstruction(
diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index a111cfecb3..8b7df4a84c 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -82,7 +82,6 @@ cc_library(
 tf_cc_test(
     name = "adaptive_shared_batch_scheduler_test",
     srcs = ["adaptive_shared_batch_scheduler_test.cc"],
-    tags = ["manual"],  # b/69013768
     deps = [
         ":adaptive_shared_batch_scheduler",
         "//tensorflow/contrib/batching/test_util:fake_clock_env",
diff --git a/tensorflow/contrib/batching/kernels/batch_kernels.cc b/tensorflow/contrib/batching/kernels/batch_kernels.cc
index 6041d8c9b2..3b7c538fcc 100644
--- a/tensorflow/contrib/batching/kernels/batch_kernels.cc
+++ b/tensorflow/contrib/batching/kernels/batch_kernels.cc
@@ -461,7 +461,7 @@ class BatchResource : public ResourceBase {
     return Status::OK();
   }
 
-  // Looks up the batcher queue for 'queue_name'. If it didn't previously exist,
+  // Looks up the batcher queue for 'queue_name'. If it did't previously exist,
   // creates it.
   Status LookupOrCreateBatcherQueue(const string& queue_name,
                                     BatcherQueue** queue) {
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py
index 2e94b7206d..8c6a614beb 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py
@@ -759,7 +759,7 @@ class CsiszarVIMCOTest(test.TestCase):
   def _csiszar_vimco_helper_grad(self, logu, delta):
     """Finite difference approximation of `grad(csiszar_vimco_helper, logu)`."""
 
-    # This code actually estimates the sum of the Jacobiab because that's what
+    # This code actually estimates the sum of the Jacobiab because thats what
     # TF's `gradients` does.
     np_log_avg_u1, np_log_sooavg_u1 = self._csiszar_vimco_helper(
         logu[..., None] + np.diag([delta]*len(logu)))
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 77a3fc0c83..8744fc492f 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -34,41 +34,13 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF)
 option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON)
 option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions")
 option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON)
-if(HAIKU)
-	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF)
-else()
-	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" ON)
-endif()
-
 
 if (NOT WIN32)
   # Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option
   # for targets that link ${CMAKE_THREAD_LIBS_INIT}.
   find_package (Threads)
-
-  option(tensorflow_PATH_STATIC_LIB "Additional library search path for libcudnn_static.a, libnccl_static.a, libculibos.a" /usr/local/cuda/lib64/)
-  option(tensorflow_CUDNN_INCLUDE "cudnn.h header install path" /usr/include/)
-  if (NOT tensorflow_CUDNN_INCLUDE)
-    # option's default value is OFF. Fill it with real default values
-    set(tensorflow_CUDNN_INCLUDE /usr/include)
-  endif (NOT tensorflow_CUDNN_INCLUDE)
-  option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB})
-  option(tensorflow_PATH_NCCL_STATIC_LIB "Override PATH_STATIC_LIB for libnccl_static.a" ${tensorflow_PATH_STATIC_LIB})
-  option(tensorflow_CUDA_LIBRARY_PATH "Designate the default CUDA library paths" /usr/local/cuda/lib64)
-  if (NOT tensorflow_CUDA_LIBRARY_PATH)
-    # option's default value is OFF. Fill it with real default values
-    set(tensorflow_CUDA_LIBRARY_PATH /usr/local/cuda/lib64)
-  endif (NOT tensorflow_CUDA_LIBRARY_PATH)
 endif()
 
-if (WIN32)
-  set(BOOL_WIN32 ON)
-else (WIN32)
-  set(BOOL_WIN32 OFF)
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-endif (WIN32)
-
 # [CLEANUP] Remove when done
 # For debugging
 function(SHOW_VARIABLES)
@@ -86,12 +58,7 @@ set (DOWNLOAD_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/downloads"
      CACHE PATH "Location where external projects will be downloaded.")
 mark_as_advanced(DOWNLOAD_LOCATION)
 
-if (tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-	set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-else()
-	set(CMAKE_POSITION_INDEPENDENT_CODE OFF)
-endif()
-
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 add_definitions(-DEIGEN_AVOID_STL_ARRAY)
 if(WIN32)
   add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC)
@@ -250,35 +217,20 @@ endif()
 if(UNIX)
   list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
 endif()
-if(HAIKU)
-  list(APPEND tensorflow_EXTERNAL_LIBRARIES network)
-endif()
 
 if (tensorflow_ENABLE_GPU)
-  if (NOT WIN32)
-    # Default install paths for cuda libraries in Linux
-    # In some Linux distros, find_package(CUDA) seems to require CMAKE_LIBRARY_PATH to include cuda-lib paths
-    list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}")
-    list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs")
-  endif (NOT WIN32)
-
-  find_package(CUDA 8.0 REQUIRED)
-
-  # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
-  # CUDA_NVCC_FLAGS and cuda_config.h below
-  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
-  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
-  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true)  # Flush denormals to zero
-  set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
-  include_directories(${CUDA_INCLUDE})
   if (WIN32)
+    find_package(CUDA 8.0 REQUIRED)
+
+    # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
+    # CUDA_NVCC_FLAGS and cuda_config.h below
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true)  # Flush denormals to zero
+    set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
+    include_directories(${CUDA_INCLUDE})
     add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2)
-  else (WIN32)
-    # Without these double quotes, cmake in Linux makes it "-DTF_EXTRA_CUDA_CAPABILITIES=3.0, -D3.5, -D5.2" for cc, which incurs build breaks
-    add_definitions(-DGOOGLE_CUDA=1 -D"TF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2")
-  endif (WIN32)
 
-  if (WIN32)
     # add cudnn
     if(NOT CUDNN_HOME)
       set(CUDNN_HOME ${CUDA_TOOLKIT_TARGET_DIR})
@@ -286,48 +238,18 @@ if (tensorflow_ENABLE_GPU)
     include_directories(${CUDNN_HOME})
     set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES}
       ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${CUDNN_HOME}/lib/x64/cudnn.lib)
-  else (WIN32)
-    set(CUDNN_INCLUDE "${tensorflow_CUDNN_INCLUDE}")
-
-    find_library(nccl_STATIC_LIBRARY NAMES libnccl_static.a PATHS ${tensorflow_PATH_NCCL_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR})
-    if (NOT nccl_STATIC_LIBRARY)
-      message(FATAL_ERROR "NCCL is required for GPU-build")
-    else (NOT nccl_STATIC_LIBRARY)
-      message("nccl-static: ${nccl_STATIC_LIBRARY}")
-      # something like /usr/lib64/libnccl_static.a
-    endif (NOT nccl_STATIC_LIBRARY)
-
-    find_library(cudnn_STATIC_LIBRARY NAMES libcudnn_static.a PATHS ${tensorflow_PATH_CUDNN_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR})
-    if (NOT cudnn_STATIC_LIBRARY)
-      message(FATAL_ERROR "CUDNN is required for GPU-build")
-    else (NOT cudnn_STATIC_LIBRARY)
-      message("cudnn-static: ${cudnn_STATIC_LIBRARY}")
-    endif (NOT cudnn_STATIC_LIBRARY)
 
-    find_library(culibos_STATIC_LIBRARY NAMES libculibos.a PATHS ${tensorflow_PATH_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR})
-    if (NOT culibos_STATIC_LIBRARY)
-      message(FATAL_ERROR "CULIBOS is required for GPU-build")
-    else (NOT culibos_STATIC_LIBRARY)
-      message("culibos-static: ${culibos_STATIC_LIBRARY}")
-    endif (NOT culibos_STATIC_LIBRARY)
-
-    include_directories(${CUDNN_INCLUDE})
-    set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES}
-      ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY})
-  endif (WIN32)
-
-  # create cuda_config.h
-  FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
-    "#ifndef CUDA_CUDA_CONFIG_H_\n"
-    "#define CUDA_CUDA_CONFIG_H_\n"
-    "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
-    "#define TF_CUDA_VERSION \"64_80\"\n"
-    "#define TF_CUDNN_VERSION \"64_6\"\n"
-    "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
-    "#endif  // CUDA_CUDA_CONFIG_H_\n"
-  )
+    # create cuda_config.h
+    FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
+      "#ifndef CUDA_CUDA_CONFIG_H_\n"
+      "#define CUDA_CUDA_CONFIG_H_\n"
+      "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
+      "#define TF_CUDA_VERSION \"64_80\"\n"
+      "#define TF_CUDNN_VERSION \"64_6\"\n"
+      "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
+      "#endif  // CUDA_CUDA_CONFIG_H_\n"
+    )
 
-  if (WIN32)
     # tf assumes in various places header files to be in cuda/include. On windows the cuda sdk
     # installs them under cuda/version/include and to avoid that we need to change tf we copy a
     # few files to cuda/include
@@ -339,25 +261,12 @@ if (tensorflow_ENABLE_GPU)
       ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h
       DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
     )
-  else(WIN32)
-    # Linux has slightly differnt install paths than Windows
-    FILE(COPY
-      ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h
-      ${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_INCLUDE}/cudnn.h
-      ${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h
-      ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda_runtime_api.h
-      ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h
-      DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
-    )
-  endif(WIN32)
+    include_directories(${tensorflow_source_dir}/third_party/gpus)
+    # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
+    list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
 
-  include_directories(${tensorflow_source_dir}/third_party/gpus)
-  # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
-  list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
-
-  # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used
-  # in the default build is upgraded.
-  if(WIN32)
+    # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used
+    # in the default build is upgraded.
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
       msvcp_dll_name=msvcp140.dll
       cudart_dll_name=cudart64_80.dll
@@ -366,9 +275,7 @@ if (tensorflow_ENABLE_GPU)
       cudnn_dll_name=cudnn64_6.dll
       cudnn_version_number=6)
   else(WIN32)
-    set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
-      cuda_version_number=8.0
-      cudnn_version_number=6)
+    message(FATAL_ERROR "CMake GPU build is currently only supported on Windows.")
   endif(WIN32)
 else(tensorflow_ENABLE_GPU)
   set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value
@@ -386,7 +293,9 @@ include(tf_core_framework.cmake)
 # NOTE: Disabled until issue #3996 is fixed.
 # include(tf_stream_executor.cmake)
 if (tensorflow_ENABLE_GPU)
+  if (WIN32)
     include(tf_stream_executor.cmake)
+  endif()
 endif()
 
 include(tf_core_cpu.cmake)
diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake
index cca8444e2a..dc27eadaca 100644
--- a/tensorflow/contrib/cmake/external/boringssl.cmake
+++ b/tensorflow/contrib/cmake/external/boringssl.cmake
@@ -39,12 +39,8 @@ ExternalProject_Add(boringssl
     # BUILD_IN_SOURCE 1
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
-        if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-        	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-        else()
-        	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-        endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake
index d2ae4c76e8..5127d7e8f7 100644
--- a/tensorflow/contrib/cmake/external/jsoncpp.cmake
+++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake
@@ -42,12 +42,8 @@ ExternalProject_Add(jsoncpp
     BUILD_IN_SOURCE 1
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
-  	  if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-  	      -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-  	  else()
-   	    	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-   	 endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
diff --git a/tensorflow/contrib/cmake/external/lmdb.cmake b/tensorflow/contrib/cmake/external/lmdb.cmake
index e41384f023..79971b7cfc 100644
--- a/tensorflow/contrib/cmake/external/lmdb.cmake
+++ b/tensorflow/contrib/cmake/external/lmdb.cmake
@@ -29,14 +29,10 @@ ExternalProject_Add(lmdb
     INSTALL_DIR ${lmdb_INSTALL}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DCMAKE_INSTALL_PREFIX:STRING=${lmdb_INSTALL}
+    -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 if(WIN32)
diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake
index aad6618f52..2b2bd47d1c 100644
--- a/tensorflow/contrib/cmake/external/png.cmake
+++ b/tensorflow/contrib/cmake/external/png.cmake
@@ -41,14 +41,10 @@ ExternalProject_Add(png
     INSTALL_DIR ${png_INSTALL}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DCMAKE_INSTALL_PREFIX:STRING=${png_INSTALL}
+	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 	-DZLIB_ROOT:STRING=${ZLIB_INSTALL}
 )
 
diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index b53857a47b..1e300e21df 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -44,12 +44,8 @@ ExternalProject_Add(protobuf
         ${PROTOBUF_ADDITIONAL_CMAKE_OPTIONS}
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
         -DZLIB_ROOT:STRING=${ZLIB_INSTALL}
 )
diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake
index b56f4b0898..cb4ec9c2de 100644
--- a/tensorflow/contrib/cmake/external/re2.cmake
+++ b/tensorflow/contrib/cmake/external/re2.cmake
@@ -38,11 +38,7 @@ ExternalProject_Add(re2
     BUILD_IN_SOURCE 1
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL}
-)
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+)
\ No newline at end of file
diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake
index 926c271fd9..2d2451521c 100644
--- a/tensorflow/contrib/cmake/external/snappy.cmake
+++ b/tensorflow/contrib/cmake/external/snappy.cmake
@@ -40,15 +40,11 @@ ExternalProject_Add(snappy
     LOG_CONFIGURE ON
     LOG_BUILD ON
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DSNAPPY_BUILD_TESTS:BOOL=OFF
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 # actually enables snappy in the source code
-add_definitions(-DTF_USE_SNAPPY)
\ No newline at end of file
+add_definitions(-DTF_USE_SNAPPY)
diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake
index 785039a469..1770dcb1fd 100644
--- a/tensorflow/contrib/cmake/external/sqlite.cmake
+++ b/tensorflow/contrib/cmake/external/sqlite.cmake
@@ -53,13 +53,9 @@ else()
         INSTALL_DIR ${sqlite_INSTALL}
         DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
         CMAKE_CACHE_ARGS
-			if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-				-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-			else()
-				-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-			endif()
             -DCMAKE_BUILD_TYPE:STRING=Release
             -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+            -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
             -DCMAKE_INSTALL_PREFIX:STRING=${sqlite_INSTALL}
     )
 
diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake
index f10f84336e..c8af611e1e 100644
--- a/tensorflow/contrib/cmake/external/zlib.cmake
+++ b/tensorflow/contrib/cmake/external/zlib.cmake
@@ -42,13 +42,9 @@ ExternalProject_Add(zlib
     BUILD_IN_SOURCE 1
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_INSTALL_PREFIX:STRING=${ZLIB_INSTALL}
+	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 # put zlib includes in the directory where they are expected
diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake
index 6e2ac203f9..45eeb11062 100644
--- a/tensorflow/contrib/cmake/tf_cc_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake
@@ -148,11 +148,7 @@ list(REMOVE_ITEM tf_cc_srcs ${tf_cc_test_srcs})
 add_library(tf_cc OBJECT ${tf_cc_srcs})
 add_dependencies(tf_cc tf_cc_framework tf_cc_ops)
 
-if (WIN32)
-  set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib")
-else (WIN32)
-  set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so")
-endif (WIN32)
+set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib")
 add_custom_target(tf_extension_ops)
 
 function(AddUserOps)
@@ -168,13 +164,15 @@ function(AddUserOps)
   # create shared library from source and cuda obj
   add_library(${_AT_TARGET} SHARED ${_AT_SOURCES} ${gpu_lib})
   target_link_libraries(${_AT_TARGET} ${pywrap_tensorflow_lib})
-  if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
-      # some ops call out to cuda directly; need to link libs for the cuda dlls
-      target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES})
-  endif()
-  if (_AT_DISTCOPY)
-      add_custom_command(TARGET ${_AT_TARGET} POST_BUILD
-          COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${_AT_TARGET}> ${_AT_DISTCOPY}/)
+  if(WIN32)
+    if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
+        # some ops call out to cuda directly; need to link libs for the cuda dlls
+        target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES})
+    endif()
+    if (_AT_DISTCOPY)
+        add_custom_command(TARGET ${_AT_TARGET} POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${_AT_TARGET}> ${_AT_DISTCOPY}/)
+    endif()
   endif()
   if (_AT_DEPENDS)
     add_dependencies(${_AT_TARGET} ${_AT_DEPENDS})
@@ -182,19 +180,9 @@ function(AddUserOps)
   # make sure TF_COMPILE_LIBRARY is not defined for this target
   get_target_property(target_compile_flags  ${_AT_TARGET} COMPILE_FLAGS)
   if(target_compile_flags STREQUAL "target_compile_flags-NOTFOUND")
-    if (WIN32)
-      set(target_compile_flags "/UTF_COMPILE_LIBRARY")
-    else (WIN32)
-      # gcc uses UTF as default
-      set(target_compile_flags "-finput-charset=UTF-8")
-    endif (WIN32)
+    set(target_compile_flags "/UTF_COMPILE_LIBRARY")
   else()
-    if (WIN32)
-      set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY")
-    else (WIN32)
-      # gcc uses UTF as default
-      set(target_compile_flags "${target_compile_flags} -finput-charset=UTF-8")
-    endif (WIN32)
+    set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY")
   endif()
   set_target_properties(${_AT_TARGET} PROPERTIES COMPILE_FLAGS ${target_compile_flags})
   add_dependencies(tf_extension_ops ${_AT_TARGET})
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index 2d015908a8..d6b8990664 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -179,7 +179,6 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs
     "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/*.cu.cc"
     "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
     "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/*.cu.cc"
-    "${tensorflow_source_dir}/tensorflow/contrib/resampler/kernels/*.cu.cc"
 )
 
 if(WIN32 AND tensorflow_ENABLE_GPU)
@@ -203,16 +202,16 @@ endif(WIN32 AND tensorflow_ENABLE_GPU)
 add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
 add_dependencies(tf_core_kernels tf_core_cpu)
 
-if (WIN32)
+if(WIN32)
   target_compile_options(tf_core_kernels PRIVATE /MP)
-endif (WIN32)
-if (tensorflow_ENABLE_GPU)
-  set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
-  set(tf_core_gpu_kernels_lib tf_core_gpu_kernels)
-  cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs})
-  set_target_properties(${tf_core_gpu_kernels_lib}
-                        PROPERTIES DEBUG_POSTFIX ""
-                        COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}"
-  )
-  add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu)
+  if (tensorflow_ENABLE_GPU)
+    set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+    set(tf_core_gpu_kernels_lib tf_core_gpu_kernels)
+    cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs})
+    set_target_properties(${tf_core_gpu_kernels_lib}
+                          PROPERTIES DEBUG_POSTFIX ""
+                          COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}"
+    )
+    add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu)
+  endif()
 endif()
diff --git a/tensorflow/contrib/cmake/tf_label_image_example.cmake b/tensorflow/contrib/cmake/tf_label_image_example.cmake
index 7f2f60b089..0d3a4699eb 100644
--- a/tensorflow/contrib/cmake/tf_label_image_example.cmake
+++ b/tensorflow/contrib/cmake/tf_label_image_example.cmake
@@ -34,8 +34,3 @@ target_link_libraries(tf_label_image_example PUBLIC
     ${tf_core_gpu_kernels_lib}
     ${tensorflow_EXTERNAL_LIBRARIES}
 )
-
-install(TARGETS tf_label_image_example
-        RUNTIME DESTINATION bin
-        LIBRARY DESTINATION lib
-        ARCHIVE DESTINATION lib)
\ No newline at end of file
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 61b3fd715d..9b863f7bc6 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -715,9 +715,6 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
       set(require_shape_fn 1)
     endif()
 
-    get_filename_component(GENERATE_PYTHON_OP_LIB_MKDIRPATH ${GENERATE_PYTHON_OP_LIB_DESTINATION} PATH)
-    file(MAKE_DIRECTORY ${GENERATE_PYTHON_OP_LIB_MKDIRPATH})
-
     # Create a C++ executable that links in the appropriate op
     # registrations and generates Python wrapper code based on the
     # registered ops.
@@ -746,7 +743,6 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
         ${GENERATE_PYTHON_OP_LIB_DESTINATION} PARENT_SCOPE)
 endfunction()
 
-GENERATE_PYTHON_OP_LIB("audio_ops")
 GENERATE_PYTHON_OP_LIB("array_ops")
 GENERATE_PYTHON_OP_LIB("bitwise_ops")
 GENERATE_PYTHON_OP_LIB("math_ops")
@@ -991,7 +987,7 @@ add_library(pywrap_tensorflow_internal SHARED
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
     ${pywrap_tensorflow_deffile}
 )
@@ -1067,23 +1063,25 @@ if(WIN32)
         DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/)
 endif(WIN32)
 
-# include contrib/seq2seq as .so
-#
-set(tf_beam_search_srcs
-    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc"
-    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h"
-    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc"
-)
+if(WIN32)
+    # include contrib/seq2seq as .so
+    #
+    set(tf_beam_search_srcs
+        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc"
+        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h"
+        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc"
+    )
 
-set(tf_beam_search_gpu_srcs
-    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc"
-)
+    set(tf_beam_search_gpu_srcs
+        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc"
+    )
 
-AddUserOps(TARGET _beam_search_ops
-    SOURCES "${tf_beam_search_srcs}"
-    GPUSOURCES ${tf_beam_search_gpu_srcs}
-    DEPENDS pywrap_tensorflow_internal tf_python_ops
-    DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/)
+    AddUserOps(TARGET _beam_search_ops
+        SOURCES "${tf_beam_search_srcs}"
+        GPUSOURCES ${tf_beam_search_gpu_srcs}
+        DEPENDS pywrap_tensorflow_internal tf_python_ops
+        DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/)
+endif(WIN32)
 
 ############################################################
 # Build a PIP package containing the TensorFlow runtime.
diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake
index 3e3fe0cdfa..9bf45bab30 100644
--- a/tensorflow/contrib/cmake/tf_shared_lib.cmake
+++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake
@@ -73,7 +73,7 @@ add_library(tensorflow SHARED
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
     ${tensorflow_deffile}
 )
@@ -94,46 +94,3 @@ endif()
 if(WIN32)
   add_dependencies(tensorflow tensorflow_static)
 endif(WIN32)
-
-install(TARGETS tensorflow
-        RUNTIME DESTINATION bin
-        LIBRARY DESTINATION lib
-        ARCHIVE DESTINATION lib)
-
-# install necessary headers
-# tensorflow headers
-install(DIRECTORY ${tensorflow_source_dir}/tensorflow/cc/
-        DESTINATION include/tensorflow/cc
-        FILES_MATCHING PATTERN "*.h")
-install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/cc/
-        DESTINATION include/tensorflow/cc
-        FILES_MATCHING PATTERN "*.h")
-install(DIRECTORY ${tensorflow_source_dir}/tensorflow/core/
-        DESTINATION include/tensorflow/core
-        FILES_MATCHING PATTERN "*.h")
-install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/core/
-        DESTINATION include/tensorflow/core
-        FILES_MATCHING PATTERN "*.h")
-install(DIRECTORY ${tensorflow_source_dir}/tensorflow/stream_executor/
-        DESTINATION include/tensorflow/stream_executor
-        FILES_MATCHING PATTERN "*.h")
-# google protobuf headers
-install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src/google/
-        DESTINATION include/google
-        FILES_MATCHING PATTERN "*.h")
-# nsync headers
-install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/
-        DESTINATION include/external/nsync
-        FILES_MATCHING PATTERN "*.h")
-# Eigen directory
-install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/Eigen/
-        DESTINATION include/Eigen)
-# external directory
-install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive/
-        DESTINATION include/external/eigen_archive)
-# third_party eigen directory
-install(DIRECTORY ${tensorflow_source_dir}/third_party/eigen3/
-        DESTINATION include/third_party/eigen3)
-# unsupported Eigen directory
-install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/
-        DESTINATION include/unsupported/Eigen)
diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake
index 8d95f0d3e8..3d84f1ebb9 100644
--- a/tensorflow/contrib/cmake/tf_stream_executor.cmake
+++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake
@@ -74,9 +74,6 @@ endif()
 #)
 #list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) 
 
-if (NOT WIN32)
-  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lgomp")
-endif (NOT WIN32)
 add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs})
 
 add_dependencies(tf_stream_executor
diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake
index cb58a2e7df..6ef9598963 100644
--- a/tensorflow/contrib/cmake/tf_tools.cmake
+++ b/tensorflow/contrib/cmake/tf_tools.cmake
@@ -73,7 +73,7 @@ add_executable(${transform_graph}
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -95,7 +95,7 @@ add_executable(${summarize_graph}
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -117,7 +117,7 @@ add_executable(${compare_graphs}
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -138,7 +138,7 @@ add_executable(${benchmark_model}
     $<TARGET_OBJECTS:tf_core_ops>
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -147,8 +147,3 @@ target_link_libraries(${benchmark_model} PUBLIC
   ${tf_core_gpu_kernels_lib}
   ${tensorflow_EXTERNAL_LIBRARIES}
 )
-
-install(TARGETS ${transform_graph} ${summarize_graph} ${compare_graphs} ${benchmark_model}
-        RUNTIME DESTINATION bin
-        LIBRARY DESTINATION lib
-        ARCHIVE DESTINATION lib)
diff --git a/tensorflow/contrib/cmake/tf_tutorials.cmake b/tensorflow/contrib/cmake/tf_tutorials.cmake
index e63fccc181..858e7dda92 100644
--- a/tensorflow/contrib/cmake/tf_tutorials.cmake
+++ b/tensorflow/contrib/cmake/tf_tutorials.cmake
@@ -34,8 +34,3 @@ target_link_libraries(tf_tutorials_example_trainer PUBLIC
     ${tf_core_gpu_kernels_lib}
     ${tensorflow_EXTERNAL_LIBRARIES}
 )
-
-install(TARGETS tf_tutorials_example_trainer
-        RUNTIME DESTINATION bin
-        LIBRARY DESTINATION lib
-        ARCHIVE DESTINATION lib)
diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py
index 1612c75179..4282be5ec8 100644
--- a/tensorflow/contrib/crf/python/ops/crf.py
+++ b/tensorflow/contrib/crf/python/ops/crf.py
@@ -363,8 +363,8 @@ class CrfDecodeForwardRnnCell(rnn_cell.RNNCell):
       scope: Unused variable scope of this cell.
 
     Returns:
-      backpointers: A [batch_size, num_tags] matrix of backpointers.
-      new_state: A [batch_size, num_tags] matrix of new score values.
+      backpointers: [batch_size, num_tags], containing backpointers.
+      new_state: [batch_size, num_tags], containing new score values.
     """
     # For simplicity, in shape comments, denote:
     # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
@@ -404,9 +404,8 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell):
     """Build the CrfDecodeBackwardRnnCell.
 
     Args:
-      inputs: A [batch_size, num_tags] matrix of
-            backpointer of next step (in time order).
-      state: A [batch_size, 1] matrix of tag index of next step.
+      inputs: [batch_size, num_tags], backpointer of next step (in time order).
+      state: [batch_size, 1], next position's tag index.
       scope: Unused variable scope of this cell.
 
     Returns:
@@ -430,16 +429,16 @@ def crf_decode(potentials, transition_params, sequence_length):
   This is a function for tensor.
 
   Args:
-    potentials: A [batch_size, max_seq_len, num_tags] tensor of
+    potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of
               unary potentials.
-    transition_params: A [num_tags, num_tags] matrix of
+    transition_params: A [num_tags, num_tags] tensor, matrix of
               binary potentials.
-    sequence_length: A [batch_size] vector of true sequence lengths.
+    sequence_length: A [batch_size] tensor, containing sequence lengths.
 
   Returns:
-    decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
+    decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32.
                 Contains the highest scoring tag indices.
-    best_score: A [batch_size] vector, containing the score of `decode_tags`.
+    best_score: A [batch_size] tensor, containing the score of decode_tags.
   """
   # For simplicity, in shape comments, denote:
   # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index dd0457d54b..1923c0586a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -11,7 +11,6 @@ py_test(
     size = "small",
     srcs = ["batch_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -373,7 +372,6 @@ py_test(
     size = "small",
     srcs = ["sequence_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -451,7 +449,6 @@ py_test(
     size = "small",
     srcs = ["zip_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -468,10 +465,7 @@ py_test(
     size = "small",
     srcs = ["prefetching_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "manual",
-        "no_oss",  # b/68785503
-    ],
+    tags = ["no_oss"],  # b/68785503
     deps = [
         "//tensorflow/contrib/data/python/ops:prefetching_py",
         "//tensorflow/core:protos_all_py",
diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 145b9495ff..2dc8ad9483 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -140,23 +140,6 @@ cuda_py_test(
     ],
 )
 
-cuda_py_test(
-    name = "cauchy_test",
-    size = "medium",
-    srcs = ["python/kernel_tests/cauchy_test.py"],
-    additional_deps = [
-        ":distributions_py",
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:gradients",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:variables",
-    ],
-)
-
 cuda_py_test(
     name = "chi2_test",
     srcs = ["python/kernel_tests/chi2_test.py"],
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 0d12d83893..16f6533e57 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -24,7 +24,6 @@ from __future__ import print_function
 
 from tensorflow.contrib.distributions.python.ops import bijectors
 from tensorflow.contrib.distributions.python.ops.binomial import *
-from tensorflow.contrib.distributions.python.ops.cauchy import *
 from tensorflow.contrib.distributions.python.ops.chi2 import *
 from tensorflow.contrib.distributions.python.ops.conditional_distribution import *
 from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import *
@@ -84,7 +83,6 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
     'bijectors',
-    'Cauchy',
     'ConditionalDistribution',
     'ConditionalTransformedDistribution',
     'FULLY_REPARAMETERIZED',
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
deleted file mode 100644
index 7f7697357c..0000000000
--- a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
+++ /dev/null
@@ -1,437 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Cauchy."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import importlib
-import numpy as np
-
-from tensorflow.contrib.distributions.python.ops import cauchy as cauchy_lib
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gradients_impl
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging
-
-
-def try_import(name):  # pylint: disable=invalid-name
-  module = None
-  try:
-    module = importlib.import_module(name)
-  except ImportError as e:
-    tf_logging.warning("Could not import %s: %s" % (name, str(e)))
-  return module
-
-stats = try_import("scipy.stats")
-
-
-class CauchyTest(test.TestCase):
-
-  def setUp(self):
-    self._rng = np.random.RandomState(123)
-
-  def assertAllFinite(self, tensor):
-    is_finite = np.isfinite(tensor.eval())
-    all_true = np.ones_like(is_finite, dtype=np.bool)
-    self.assertAllEqual(all_true, is_finite)
-
-  def _testParamShapes(self, sample_shape, expected):
-    with self.test_session():
-      param_shapes = cauchy_lib.Cauchy.param_shapes(sample_shape)
-      loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"]
-      self.assertAllEqual(expected, loc_shape.eval())
-      self.assertAllEqual(expected, scale_shape.eval())
-      loc = array_ops.zeros(loc_shape)
-      scale = array_ops.ones(scale_shape)
-      self.assertAllEqual(
-          expected,
-          array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval())
-
-  def _testParamStaticShapes(self, sample_shape, expected):
-    param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape)
-    loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"]
-    self.assertEqual(expected, loc_shape)
-    self.assertEqual(expected, scale_shape)
-
-  def testParamShapes(self):
-    sample_shape = [10, 3, 4]
-    self._testParamShapes(sample_shape, sample_shape)
-    self._testParamShapes(constant_op.constant(sample_shape), sample_shape)
-
-  def testParamStaticShapes(self):
-    sample_shape = [10, 3, 4]
-    self._testParamStaticShapes(sample_shape, sample_shape)
-    self._testParamStaticShapes(
-        tensor_shape.TensorShape(sample_shape), sample_shape)
-
-  def testCauchyLogPDF(self):
-    with self.test_session():
-      batch_size = 6
-      loc = constant_op.constant([3.0] * batch_size)
-      scale = constant_op.constant([np.sqrt(10.0)] * batch_size)
-      x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32)
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      log_pdf = cauchy.log_prob(x)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
-                          log_pdf.shape)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
-                          log_pdf.eval().shape)
-      self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
-      self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape)
-
-      pdf = cauchy.prob(x)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.eval().shape)
-      self.assertAllEqual(cauchy.batch_shape, pdf.shape)
-      self.assertAllEqual(cauchy.batch_shape, pdf.eval().shape)
-
-      if not stats:
-        return
-      expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x)
-      self.assertAllClose(expected_log_pdf, log_pdf.eval())
-      self.assertAllClose(np.exp(expected_log_pdf), pdf.eval())
-
-  def testCauchyLogPDFMultidimensional(self):
-    with self.test_session():
-      batch_size = 6
-      loc = constant_op.constant([[3.0, -3.0]] * batch_size)
-      scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] *
-                                   batch_size)
-      x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      log_pdf = cauchy.log_prob(x)
-      log_pdf_values = log_pdf.eval()
-      self.assertEqual(log_pdf.shape, (6, 2))
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
-                          log_pdf.shape)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
-                          log_pdf.eval().shape)
-      self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
-      self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape)
-
-      pdf = cauchy.prob(x)
-      pdf_values = pdf.eval()
-      self.assertEqual(pdf.shape, (6, 2))
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf_values.shape)
-      self.assertAllEqual(cauchy.batch_shape, pdf.shape)
-      self.assertAllEqual(cauchy.batch_shape, pdf_values.shape)
-
-      if not stats:
-        return
-      expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x)
-      self.assertAllClose(expected_log_pdf, log_pdf_values)
-      self.assertAllClose(np.exp(expected_log_pdf), pdf_values)
-
-  def testCauchyCDF(self):
-    with self.test_session():
-      batch_size = 50
-      loc = self._rng.randn(batch_size)
-      scale = self._rng.rand(batch_size) + 1.0
-      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
-
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-      cdf = cauchy.cdf(x)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape)
-      self.assertAllEqual(cauchy.batch_shape, cdf.shape)
-      self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape)
-      if not stats:
-        return
-      expected_cdf = stats.cauchy(loc, scale).cdf(x)
-      self.assertAllClose(expected_cdf, cdf.eval(), atol=0)
-
-  def testCauchySurvivalFunction(self):
-    with self.test_session():
-      batch_size = 50
-      loc = self._rng.randn(batch_size)
-      scale = self._rng.rand(batch_size) + 1.0
-      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
-
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      sf = cauchy.survival_function(x)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape)
-      self.assertAllEqual(cauchy.batch_shape, sf.shape)
-      self.assertAllEqual(cauchy.batch_shape, sf.eval().shape)
-      if not stats:
-        return
-      expected_sf = stats.cauchy(loc, scale).sf(x)
-      self.assertAllClose(expected_sf, sf.eval(), atol=0)
-
-  def testCauchyLogCDF(self):
-    with self.test_session():
-      batch_size = 50
-      loc = self._rng.randn(batch_size)
-      scale = self._rng.rand(batch_size) + 1.0
-      x = np.linspace(-100.0, 10.0, batch_size).astype(np.float64)
-
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      cdf = cauchy.log_cdf(x)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape)
-      self.assertAllEqual(cauchy.batch_shape, cdf.shape)
-      self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape)
-
-      if not stats:
-        return
-      expected_cdf = stats.cauchy(loc, scale).logcdf(x)
-      self.assertAllClose(expected_cdf, cdf.eval(), atol=0, rtol=1e-5)
-
-  def testFiniteGradientAtDifficultPoints(self):
-    for dtype in [np.float32, np.float64]:
-      g = ops.Graph()
-      with g.as_default():
-        loc = variables.Variable(dtype(0.0))
-        scale = variables.Variable(dtype(1.0))
-        dist = cauchy_lib.Cauchy(loc=loc, scale=scale)
-        x = np.array([-100., -20., -5., 0., 5., 20., 100.]).astype(dtype)
-        for func in [
-            dist.cdf, dist.log_cdf, dist.survival_function,
-            dist.log_survival_function, dist.log_prob, dist.prob
-        ]:
-          value = func(x)
-          grads = gradients_impl.gradients(value, [loc, scale])
-          with self.test_session(graph=g):
-            variables.global_variables_initializer().run()
-            self.assertAllFinite(value)
-            self.assertAllFinite(grads[0])
-            self.assertAllFinite(grads[1])
-
-  def testCauchyLogSurvivalFunction(self):
-    with self.test_session():
-      batch_size = 50
-      loc = self._rng.randn(batch_size)
-      scale = self._rng.rand(batch_size) + 1.0
-      x = np.linspace(-10.0, 100.0, batch_size).astype(np.float64)
-
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      sf = cauchy.log_survival_function(x)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape)
-      self.assertAllEqual(cauchy.batch_shape, sf.shape)
-      self.assertAllEqual(cauchy.batch_shape, sf.eval().shape)
-
-      if not stats:
-        return
-      expected_sf = stats.cauchy(loc, scale).logsf(x)
-      self.assertAllClose(expected_sf, sf.eval(), atol=0, rtol=1e-5)
-
-  def testCauchyEntropy(self):
-    with self.test_session():
-      loc = np.array([1.0, 1.0, 1.0])
-      scale = np.array([[1.0, 2.0, 3.0]])
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      entropy = cauchy.entropy()
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
-                          entropy.shape)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
-                          entropy.eval().shape)
-      self.assertAllEqual(cauchy.batch_shape, entropy.shape)
-      self.assertAllEqual(cauchy.batch_shape, entropy.eval().shape)
-
-      if not stats:
-        return
-      expected_entropy = stats.cauchy(loc, scale).entropy()
-      self.assertAllClose(expected_entropy, entropy.eval())
-
-  def testCauchyMode(self):
-    with self.test_session():
-      # Mu will be broadcast to [7, 7, 7].
-      loc = [7.]
-      scale = [11., 12., 13.]
-
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      self.assertAllEqual((3,), cauchy.mode().shape)
-      self.assertAllEqual([7., 7, 7], cauchy.mode().eval())
-
-  def testCauchyMean(self):
-    with self.test_session():
-      loc = [1., 2., 3.]
-      scale = [7.]
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      self.assertAllEqual((3,), cauchy.mean().shape)
-      self.assertAllEqual([np.nan] * 3, cauchy.mean().eval())
-
-  def testCauchyNanMean(self):
-    with self.test_session():
-      loc = [1., 2., 3.]
-      scale = [7.]
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False)
-
-      with self.assertRaises(ValueError):
-        cauchy.mean().eval()
-
-  def testCauchyQuantile(self):
-    with self.test_session():
-      batch_size = 50
-      loc = self._rng.randn(batch_size)
-      scale = self._rng.rand(batch_size) + 1.0
-      p = np.linspace(0.000001, 0.999999, batch_size).astype(np.float64)
-
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-      x = cauchy.quantile(p)
-
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.shape)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.eval().shape)
-      self.assertAllEqual(cauchy.batch_shape, x.shape)
-      self.assertAllEqual(cauchy.batch_shape, x.eval().shape)
-
-      if not stats:
-        return
-      expected_x = stats.cauchy(loc, scale).ppf(p)
-      self.assertAllClose(expected_x, x.eval(), atol=0.)
-
-  def testCauchyVariance(self):
-    with self.test_session():
-      # scale will be broadcast to [7, 7, 7]
-      loc = [1., 2., 3.]
-      scale = [7.]
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      self.assertAllEqual((3,), cauchy.variance().shape)
-      self.assertAllEqual([np.nan] * 3, cauchy.variance().eval())
-
-  def testCauchyNanVariance(self):
-    with self.test_session():
-      # scale will be broadcast to [7, 7, 7]
-      loc = [1., 2., 3.]
-      scale = [7.]
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False)
-
-      with self.assertRaises(ValueError):
-        cauchy.variance().eval()
-
-  def testCauchyStandardDeviation(self):
-    with self.test_session():
-      # scale will be broadcast to [7, 7, 7]
-      loc = [1., 2., 3.]
-      scale = [7.]
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      self.assertAllEqual((3,), cauchy.stddev().shape)
-      self.assertAllEqual([np.nan] * 3, cauchy.stddev().eval())
-
-  def testCauchyNanStandardDeviation(self):
-    with self.test_session():
-      # scale will be broadcast to [7, 7, 7]
-      loc = [1., 2., 3.]
-      scale = [7.]
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False)
-
-      with self.assertRaises(ValueError):
-        cauchy.stddev().eval()
-
-  def testCauchySample(self):
-    with self.test_session():
-      loc = constant_op.constant(3.0)
-      scale = constant_op.constant(1.0)
-      loc_v = 3.0
-      n = constant_op.constant(100000)
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-      samples = cauchy.sample(n)
-      sample_values = samples.eval()
-
-      self.assertEqual(sample_values.shape, (100000,))
-      self.assertAllClose(np.median(sample_values), loc_v, atol=1e-1)
-
-      expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
-          tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval()))
-
-      self.assertAllEqual(expected_shape, samples.shape)
-      self.assertAllEqual(expected_shape, sample_values.shape)
-
-      expected_shape = (tensor_shape.TensorShape(
-          [n.eval()]).concatenate(cauchy.batch_shape))
-
-      self.assertAllEqual(expected_shape, samples.shape)
-      self.assertAllEqual(expected_shape, sample_values.shape)
-
-  def testCauchySampleMultiDimensional(self):
-    with self.test_session():
-      batch_size = 2
-      loc = constant_op.constant([[3.0, -3.0]] * batch_size)
-      scale = constant_op.constant([[0.5, 1.0]] * batch_size)
-      loc_v = [3.0, -3.0]
-      n = constant_op.constant(100000)
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-      samples = cauchy.sample(n)
-      sample_values = samples.eval()
-      self.assertEqual(samples.shape, (100000, batch_size, 2))
-      self.assertAllClose(np.median(sample_values[:, 0, 0]),
-                          loc_v[0], atol=1e-1)
-      self.assertAllClose(np.median(sample_values[:, 0, 1]),
-                          loc_v[1], atol=1e-1)
-
-      expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
-          tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval()))
-      self.assertAllEqual(expected_shape, samples.shape)
-      self.assertAllEqual(expected_shape, sample_values.shape)
-
-      expected_shape = (tensor_shape.TensorShape(
-          [n.eval()]).concatenate(cauchy.batch_shape))
-      self.assertAllEqual(expected_shape, samples.shape)
-      self.assertAllEqual(expected_shape, sample_values.shape)
-
-  def testCauchyNegativeLocFails(self):
-    with self.test_session():
-      cauchy = cauchy_lib.Cauchy(loc=[1.], scale=[-5.], validate_args=True)
-      with self.assertRaisesOpError("Condition x > 0 did not hold"):
-        cauchy.mode().eval()
-
-  def testCauchyShape(self):
-    with self.test_session():
-      loc = constant_op.constant([-3.0] * 5)
-      scale = constant_op.constant(11.0)
-      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-      self.assertEqual(cauchy.batch_shape_tensor().eval(), [5])
-      self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape([5]))
-      self.assertAllEqual(cauchy.event_shape_tensor().eval(), [])
-      self.assertEqual(cauchy.event_shape, tensor_shape.TensorShape([]))
-
-  def testCauchyShapeWithPlaceholders(self):
-    loc = array_ops.placeholder(dtype=dtypes.float32)
-    scale = array_ops.placeholder(dtype=dtypes.float32)
-    cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
-
-    with self.test_session() as sess:
-      # get_batch_shape should return an "<unknown>" tensor.
-      self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape(None))
-      self.assertEqual(cauchy.event_shape, ())
-      self.assertAllEqual(cauchy.event_shape_tensor().eval(), [])
-      self.assertAllEqual(
-          sess.run(cauchy.batch_shape_tensor(),
-                   feed_dict={loc: 5.0,
-                              scale: [1.0, 2.0]}), [2])
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py
deleted file mode 100644
index a17bb091f6..0000000000
--- a/tensorflow/contrib/distributions/python/ops/cauchy.py
+++ /dev/null
@@ -1,223 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""The Cauchy distribution class."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops.distributions import distribution
-
-
-__all__ = [
-    "Cauchy",
-]
-
-
-class Cauchy(distribution.Distribution):
-  """The Cauchy distribution with location `loc` and scale `scale`.
-
-  #### Mathematical details
-
-  The probability density function (pdf) is,
-
-  ```none
-  pdf(x; loc, scale) = 1 / (pi * scale * (1 + ((x - loc) / scale)**2))
-  ```
-  where `loc` is the location, and `scale` is the scale.
-
-  The Cauchy distribution is a member of the [location-scale family](
-  https://en.wikipedia.org/wiki/Location-scale_family), i.e.
-
-  ```none
-  X ~ Cauchy(loc=0, scale=1)
-  Y ~ Cauchy(loc=loc, scale=scale)
-  Y = loc + scale * X
-  ```
-
-  #### Examples
-
-  Examples of initialization of one or a batch of distributions.
-
-  ```python
-  # Define a single scalar Cauchy distribution.
-  dist = Cauchy(loc=0., scale=3.)
-
-  # Evaluate the cdf at 1, returning a scalar.
-  dist.cdf(1.)
-
-  # Define a batch of two scalar valued Cauchy distributions.
-  dist = Cauchy(loc=[1, 2.], scale=[11, 22.])
-
-  # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
-  # returning a length two tensor.
-  dist.prob([0, 1.5])
-
-  # Get 3 samples, returning a 3 x 2 tensor.
-  dist.sample([3])
-  ```
-
-  Arguments are broadcast when possible.
-
-  ```python
-  # Define a batch of two scalar valued Cauchy distributions.
-  # Both have median 1, but different scales.
-  dist = tf.contrib.distributions.Cauchy(loc=1., scale=[11, 22.])
-  # Evaluate the pdf of both distributions on the same point, 3.0,
-  # returning a length 2 tensor.
-  dist.prob(3.0)
-  ```
-  """
-
-  def __init__(self,
-               loc,
-               scale,
-               validate_args=False,
-               allow_nan_stats=True,
-               name="Cauchy"):
-    """Construct Cauchy distributions with loc and and scale `loc` and `scale`.
-
-    The parameters `loc` and `scale` must be shaped in a way that supports
-    broadcasting (e.g. `loc + scale` is a valid operation).
-
-    Args:
-      loc: Floating point tensor; the modes of the distribution(s).
-      scale: Floating point tensor; the locations of the distribution(s).
-        Must contain only positive values.
-      validate_args: Python `bool`, default `False`. When `True` distribution
-        parameters are checked for validity despite possibly degrading runtime
-        performance. When `False` invalid inputs may silently render incorrect
-        outputs.
-      allow_nan_stats: Python `bool`, default `True`. When `True`,
-        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
-        indicate the result is undefined. When `False`, an exception is raised
-        if one or more of the statistic's batch members are undefined.
-      name: Python `str` name prefixed to Ops created by this class.
-
-    Raises:
-      TypeError: if `loc` and `scale` have different `dtype`.
-    """
-    parameters = locals()
-    with ops.name_scope(name, values=[loc, scale]):
-      with ops.control_dependencies([check_ops.assert_positive(scale)] if
-                                    validate_args else []):
-        self._loc = array_ops.identity(loc, name="loc")
-        self._scale = array_ops.identity(scale, name="scale")
-        check_ops.assert_same_float_dtype([self._loc, self._scale])
-    super(Cauchy, self).__init__(
-        dtype=self._scale.dtype,
-        reparameterization_type=distribution.FULLY_REPARAMETERIZED,
-        validate_args=validate_args,
-        allow_nan_stats=allow_nan_stats,
-        parameters=parameters,
-        graph_parents=[self._loc, self._scale],
-        name=name)
-
-  @staticmethod
-  def _param_shapes(sample_shape):
-    return dict(
-        zip(("loc", "scale"), ([ops.convert_to_tensor(
-            sample_shape, dtype=dtypes.int32)] * 2)))
-
-  @property
-  def loc(self):
-    """Distribution parameter for the mean."""
-    return self._loc
-
-  @property
-  def scale(self):
-    """Distribution parameter for standard deviation."""
-    return self._scale
-
-  def _batch_shape_tensor(self):
-    return array_ops.broadcast_dynamic_shape(
-        array_ops.shape(self.loc),
-        array_ops.shape(self.scale))
-
-  def _batch_shape(self):
-    return array_ops.broadcast_static_shape(
-        self.loc.shape,
-        self.scale.shape)
-
-  def _event_shape_tensor(self):
-    return constant_op.constant([], dtype=dtypes.int32)
-
-  def _event_shape(self):
-    return tensor_shape.scalar()
-
-  def _sample_n(self, n, seed=None):
-    shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
-    probs = random_ops.random_uniform(
-        shape=shape, minval=0., maxval=1., dtype=self.dtype, seed=seed)
-    return self._quantile(probs)
-
-  def _log_prob(self, x):
-    return self._log_unnormalized_prob(x) - self._log_normalization()
-
-  def _cdf(self, x):
-    return math_ops.atan(self._z(x)) / np.pi + 0.5
-
-  def _log_cdf(self, x):
-    return math_ops.log1p(2 / np.pi * math_ops.atan(self._z(x))) - np.log(2)
-
-  def _log_unnormalized_prob(self, x):
-    return -math_ops.log1p(math_ops.square(self._z(x)))
-
-  def _log_normalization(self):
-    return np.log(np.pi) + math_ops.log(self.scale)
-
-  def _entropy(self):
-    h = np.log(4 * np.pi) + math_ops.log(self.scale)
-    return h * array_ops.ones_like(self.loc)
-
-  def _quantile(self, p):
-    return self.loc + self.scale * math_ops.tan(np.pi * (p - 0.5))
-
-  def _mode(self):
-    return self.loc * array_ops.ones_like(self.scale)
-
-  def _z(self, x):
-    """Standardize input `x`."""
-    with ops.name_scope("standardize", values=[x]):
-      return (x - self.loc) / self.scale
-
-  def _inv_z(self, z):
-    """Reconstruct input `x` from a its normalized version."""
-    with ops.name_scope("reconstruct", values=[z]):
-      return z * self.scale + self.loc
-
-  def _mean(self):
-    if self.allow_nan_stats:
-      return array_ops.fill(self.batch_shape_tensor(),
-                            self.dtype.as_numpy_dtype(np.nan))
-    else:
-      raise ValueError("`mean` is undefined for Cauchy distribution.")
-
-  def _stddev(self):
-    if self.allow_nan_stats:
-      return array_ops.fill(self.batch_shape_tensor(),
-                            self.dtype.as_numpy_dtype(np.nan))
-    else:
-      raise ValueError("`stddev` is undefined for Cauchy distribution.")
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb
index 459f2f4a7d..01616f2e7d 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb
@@ -429,9 +429,7 @@
         "cpu_tensor = tf.random_normal([SIZE, SIZE])\n",
         "\n",
         "if is_gpu_available:\n",
-        "  gpu_tensor = cpu_tensor.gpu()\n",
-        "else:\n",
-        "  print(\"GPU not available.\")"
+        "  gpu_tensor = cpu_tensor.gpu()"
       ]
     },
     {
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb
index e6c7c11733..3b7e2cd435 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb
@@ -383,7 +383,7 @@
         "\n",
         "`implicit_value_and_gradients()` returns a function that accepts the same inputs as the function passed in, and returns a tuple consisting of:\n",
         "\n",
-        "1. the value returned by the function passed in (in this case, the loss calculated by `loss_fn()`), and\n",
+        "1. the value returned by the function passed in (in this case, the loss calculated by `calculate_linear_model_loss()`), and\n",
         "1. a list of tuples consisting of:\n",
         "  1. The value of the gradient (a `tf.Tensor`) with respect to a given variable\n",
         "  1. The corresponding variable (`tf.Variable`)\n",
@@ -698,7 +698,7 @@
       "source": [
         "## Other Ways to Compute Gradients\n",
         "\n",
-        "Using our loss function as an example (`loss_fn()`), there are several other ways we could compute gradients:\n",
+        "Using our loss function as an example (`calculate_linear_model_loss()`), there are several other ways we could compute gradients:\n",
         "\n",
         "1. `tfe.implicit_gradients()`\n",
         "1. `tfe.gradients_function()`\n",
@@ -841,7 +841,7 @@
         "# tfe.implicit_value_and_gradients() demo\n",
         "value_gradients_fn = tfe.implicit_value_and_gradients(loss_fn)\n",
         "\n",
-        "# Returns the value returned by the function passed in, gradients, and variables:\n",
+        "# Returns only gradients:\n",
         "value_gradients_fn(inputs, labels, wb)"
       ]
     }
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
index 0088da5c4b..ebcc7027c1 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
@@ -9,7 +9,7 @@
       "source": [
         "# Eager Execution Tutorial: Importing Data\n",
         "\n",
-        "This notebook demonstrates the use of the [`tf.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n",
+        "This notebook demonstrates the use of the [`tf.contrib.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n",
         "\n",
         "* Creating a `Dataset`.\n",
         "* Iteration over a `Dataset` with eager execution enabled.\n",
@@ -64,7 +64,7 @@
       "source": [
         "# Step 1: Create a source `Dataset`\n",
         "\n",
-        "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information."
+        "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information."
       ]
     },
     {
@@ -83,7 +83,7 @@
       },
       "outputs": [],
       "source": [
-        "ds_tensors = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n",
+        "ds_tensors = tf.contrib.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n",
         "\n",
         "# Create a CSV file\n",
         "import tempfile\n",
@@ -93,7 +93,7 @@
         "Line 2\n",
         "Line 3\n",
         "  \"\"\")\n",
-        "ds_file = tf.data.TextLineDataset(filename)\n"
+        "ds_file = tf.contrib.data.TextLineDataset(filename)\n"
       ]
     },
     {
@@ -105,7 +105,7 @@
       "source": [
         "# Step 2: Apply transformations\n",
         "\n",
-        "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) for details."
+        "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.contrib.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset) for details."
       ]
     },
     {
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 9378fe8799..46b3eeae91 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -286,6 +286,7 @@ def _fused_batch_norm(inputs,
     ValueError: If the rank of `inputs` is neither 2 or 4.
     ValueError: If rank or `C` dimension of `inputs` is undefined.
   """
+  # TODO(reedwm): Add support for fp16 inputs.
   if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
     raise ValueError('data_format has to be either NCHW or NHWC.')
   with variable_scope.variable_scope(
@@ -319,10 +320,9 @@ def _fused_batch_norm(inputs,
                        (inputs.name, params_shape))
 
     # Allocate parameters for the beta and gamma of the normalization.
+    trainable_beta = trainable and center
     beta_collections = utils.get_variable_collections(variables_collections,
                                                       'beta')
-    # Float32 required to avoid precision-loss when using fp16 input/output
-    variable_dtype = dtypes.float32
     if not param_initializers:
       param_initializers = {}
     if not param_regularizers:
@@ -336,13 +336,13 @@ def _fused_batch_norm(inputs,
       beta = variables.model_variable(
           'beta',
           shape=params_shape,
-          dtype=variable_dtype,
+          dtype=dtype,
           initializer=beta_initializer,
           regularizer=beta_regularizer,
           collections=beta_collections,
-          trainable=trainable)
+          trainable=trainable_beta)
     else:
-      beta = array_ops.constant(0.0, dtype=variable_dtype, shape=params_shape)
+      beta = array_ops.constant(0.0, shape=params_shape)
 
     if scale:
       gamma_collections = utils.get_variable_collections(
@@ -352,13 +352,13 @@ def _fused_batch_norm(inputs,
       gamma = variables.model_variable(
           'gamma',
           shape=params_shape,
-          dtype=variable_dtype,
+          dtype=dtype,
           initializer=gamma_initializer,
           regularizer=gamma_regularizer,
           collections=gamma_collections,
           trainable=trainable)
     else:
-      gamma = array_ops.constant(1.0, dtype=variable_dtype, shape=params_shape)
+      gamma = array_ops.constant(1.0, shape=params_shape)
 
     # Create moving_mean and moving_variance variables and add them to the
     # appropriate collections. We disable variable partitioning while creating
@@ -375,7 +375,7 @@ def _fused_batch_norm(inputs,
       moving_mean = variables.model_variable(
           'moving_mean',
           shape=params_shape,
-          dtype=variable_dtype,
+          dtype=dtype,
           initializer=moving_mean_initializer,
           trainable=False,
           collections=moving_mean_collections)
@@ -386,7 +386,7 @@ def _fused_batch_norm(inputs,
       moving_variance = variables.model_variable(
           'moving_variance',
           shape=params_shape,
-          dtype=variable_dtype,
+          dtype=dtype,
           initializer=moving_variance_initializer,
           trainable=False,
           collections=moving_variance_collections)
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 5aa2253516..ff7f0e4462 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1774,12 +1774,10 @@ class BatchNormTest(test.TestCase):
       with self.assertRaisesRegexp(ValueError, 'undefined'):
         _layers.batch_norm(inputs, data_format='NCHW')
 
-  def _testCreateOp(self, fused, dtype=None):
-    if dtype is None:
-      dtype = dtypes.float32
+  def _testCreateOp(self, fused):
     height, width = 3, 3
     with self.test_session():
-      images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype)
+      images = np.random.uniform(size=(5, height, width, 3)).astype('f')
       output = _layers.batch_norm(images, fused=fused)
       expected_name = ('BatchNorm/FusedBatchNorm' if fused else
                        'BatchNorm/batchnorm')
@@ -1794,9 +1792,6 @@ class BatchNormTest(test.TestCase):
   def testCreateOpFused(self):
     self._testCreateOp(True)
 
-  def testCreateOpFusedFloat16(self):
-    self._testCreateOp(True, dtypes.float16)
-
   def _testCreateOpBetaRegularizer(self, fused=True):
     height, width = 3, 3
     with self.test_session():
@@ -2664,68 +2659,10 @@ class BatchNormTest(test.TestCase):
   def testBatchNormBeta(self):
     # Test case for 11673
     with self.test_session() as sess:
-      a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10))
-      b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW',
-                                zero_debias_moving_mean=True)
-      a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10))
-      b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW',
-                                zero_debias_moving_mean=True)
-      sess.run(variables_lib.global_variables_initializer())
-
-  def testVariablesAreFloat32(self):
-    height, width = 3, 3
-    with self.test_session():
-      images = random_ops.random_uniform((5, height, width, 3),
-                                         seed=1, dtype=dtypes.float16)
-      _layers.batch_norm(images, scale=True)
-      beta = variables.get_variables_by_name('beta')[0]
-      gamma = variables.get_variables_by_name('gamma')[0]
-      self.assertEqual(beta.dtype, dtypes.float32_ref)
-      self.assertEqual(gamma.dtype, dtypes.float32_ref)
-      moving_mean = variables.get_variables_by_name('moving_mean')[0]
-      moving_variance = variables.get_variables_by_name('moving_variance')[0]
-      self.assertEqual(moving_mean.dtype, dtypes.float32_ref)
-      self.assertEqual(moving_variance.dtype, dtypes.float32_ref)
-
-  def _runFusedBatchNorm(self, shape, dtype):
-    channels = shape[1]
-    images = np.arange(np.product(shape), dtype=dtype).reshape(shape)
-    beta = init_ops.constant_initializer(
-        np.arange(
-            2, channels + 2, dtype=np.float32))
-    gamma = init_ops.constant_initializer(
-        np.arange(
-            10, channels + 10, dtype=np.float32) * 2.0)
-    mean = init_ops.constant_initializer(
-        np.arange(
-            3, channels + 3, dtype=np.float32) * 5.0)
-    variance = init_ops.constant_initializer(
-        np.arange(
-            1, channels + 1, dtype=np.float32) * 4.0)
-    output = _layers.batch_norm(
-        images,
-        fused=True,
-        is_training=True,
-        scale=True,
-        epsilon=0.5,
-        param_initializers={
-            'beta': beta,
-            'gamma': gamma,
-            'moving_mean': mean,
-            'moving_variance': variance,
-        },
-        data_format='NCHW')
-    with self.test_session(use_gpu=True) as sess:
+      a = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10))
+      b = _layers.batch_norm(a, center=False, data_format='NCHW',
+                                       zero_debias_moving_mean=True)
       sess.run(variables_lib.global_variables_initializer())
-      return sess.run(output)
-
-  def testFusedBatchNormFloat16MatchesFloat32(self):
-    if test.is_gpu_available(cuda_only=True):
-      shape = [5, 4, 2, 3]
-      res_32 = self._runFusedBatchNorm(shape, np.float32)
-      res_16 = self._runFusedBatchNorm(shape, np.float16)
-      self.assertAllClose(res_32, res_16, rtol=1e-3)
-
 
   def testAdjustmentCreated(self):
     # Tests that the adjustment is appropriately passed to and used by the core
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
index bc0e6fc009..468d792a0d 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -119,7 +119,7 @@ class Head(object):
       update_op = tf.contrib.layers.optimize_loss(optimizer=sync,
                                                   loss=model_fn_ops.loss, ...)
       hooks = [sync.make_session_run_hook(is_chief)]
-      ... update train_op and hooks in ModelFnOps and return
+      ... upate train_op and hooks in ModelFnOps and return
     ```
   """
   __metaclass__ = abc.ABCMeta
diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py
index 44e6c7c52d..8be9c72adf 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py
@@ -23,6 +23,7 @@ import collections
 
 import six
 
+from tensorflow.contrib import framework as contrib_framework
 from tensorflow.contrib.framework import get_graph_from_inputs
 from tensorflow.contrib.learn.python.learn.estimators import constants
 from tensorflow.contrib.learn.python.learn.estimators import metric_key
@@ -31,7 +32,6 @@ from tensorflow.python.estimator import model_fn as core_model_fn_lib
 from tensorflow.python.estimator.export import export_output as core_export_lib
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging
@@ -156,11 +156,11 @@ class ModelFnOps(
     else:
       if isinstance(predictions, dict):
         predictions = {
-            k: sparse_tensor.convert_to_tensor_or_sparse_tensor(v)
+            k: contrib_framework.convert_to_tensor_or_sparse_tensor(v)
             for k, v in six.iteritems(predictions)
         }
       else:
-        predictions = sparse_tensor.convert_to_tensor_or_sparse_tensor(
+        predictions = contrib_framework.convert_to_tensor_or_sparse_tensor(
             predictions)
 
     # Validate eval_metric_ops
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
index db18ebf05d..4c50d40aaa 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
@@ -28,14 +28,13 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging
 
 # pylint: disable=g-multiple-import,g-bad-import-order
 from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels
 from .dask_io import HAS_DASK, extract_dask_data, extract_dask_labels
+
 # pylint: enable=g-multiple-import,g-bad-import-order
 
 
@@ -366,13 +365,8 @@ class DataFeeder(object):
     self.random_state = np.random.RandomState(
         42) if random_state is None else random_state
 
-    if x_is_dict:
-      num_samples = list(self._x.values())[0].shape[0]
-    elif tensor_util.is_tensor(self._x):
-      num_samples = self._x.shape[0].value  # shape will be a Dimension, extract an int
-    else:
-      num_samples = self._x.shape[0]
-      
+    num_samples = list(self._x.values())[0].shape[
+        0] if x_is_dict else self._x.shape[0]
     if self._shuffle:
       self.indices = self.random_state.permutation(num_samples)
     else:
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index 86d8484391..13f2f0f502 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -238,10 +238,10 @@ class SdcaModel(object):
     with name_scope('sdca/prediction'):
       sparse_variables = self._convert_n_to_tensor(self._variables[
           'sparse_features_weights'])
-      result_sparse = 0.0
+      result = 0.0
       for sfc, sv in zip(examples['sparse_features'], sparse_variables):
         # TODO(sibyl-Aix6ihai): following does not take care of missing features.
-        result_sparse += math_ops.segment_sum(
+        result += math_ops.segment_sum(
             math_ops.multiply(
                 array_ops.gather(sv, sfc.feature_indices), sfc.feature_values),
             sfc.example_indices)
@@ -249,13 +249,12 @@ class SdcaModel(object):
       dense_variables = self._convert_n_to_tensor(self._variables[
           'dense_features_weights'])
 
-      result_dense = 0.0
       for i in range(len(dense_variables)):
-        result_dense += math_ops.matmul(
-            dense_features[i], array_ops.expand_dims(dense_variables[i], -1))
+        result += math_ops.matmul(dense_features[i],
+                                  array_ops.expand_dims(dense_variables[i], -1))
 
     # Reshaping to allow shape inference at graph construction time.
-    return array_ops.reshape(result_dense, [-1]) + result_sparse
+    return array_ops.reshape(result, [-1])
 
   def predictions(self, examples):
     """Add operations to compute predictions by the model.
diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index 89e8693490..b4aa032ff8 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -23,7 +23,6 @@ py_library(
 py_test(
     name = "lite_test",
     srcs = ["lite_test.py"],
-    srcs_version = "PY2AND3",
     deps = [
         ":lite",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index b122818221..86540d58a6 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -36,10 +36,6 @@ import traceback
 import zipfile
 import numpy as np
 from six import StringIO
-
-# TODO(aselle): Disable GPU for now
-os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-
 import tensorflow as tf
 from google.protobuf import text_format
 # TODO(aselle): switch to TensorFlow's resource_loader
@@ -383,13 +379,12 @@ def make_zip_of_tests(zip_path,
         report["toco_log"] = ""
         tf.reset_default_graph()
 
-        with tf.device('/cpu:0'):
-          try:
-            inputs, outputs = make_graph(param_dict_real)
-          except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError,
-                  ValueError):
-            report["tf_log"] += traceback.format_exc()
-            return None, report
+        try:
+          inputs, outputs = make_graph(param_dict_real)
+        except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError,
+                ValueError):
+          report["tf_log"] += traceback.format_exc()
+          return None, report
 
         sess = tf.Session()
         try:
diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD
index 17115047d2..92246a8aed 100644
--- a/tensorflow/contrib/lite/toco/python/BUILD
+++ b/tensorflow/contrib/lite/toco/python/BUILD
@@ -61,7 +61,6 @@ tf_py_test(
     data = [
         ":toco_from_protos",
     ],
-    tags = ["no_pip"],
 )
 
 filegroup(
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index e2e6c05591..dba1464653 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -314,8 +314,7 @@ ifeq ($(TARGET),ANDROID)
 -Wno-narrowing \
 -fomit-frame-pointer \
 $(MARCH_OPTION) \
--fPIE \
--fPIC
+-fPIE
 	INCLUDES = \
 -I$(NDK_ROOT)/sources/android/support/include \
 -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \
diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md
index 65bd60c12a..715eb51577 100644
--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@@ -174,26 +174,10 @@ tensorflow/contrib/makefile/build_all_ios.sh
 
 This process will take around twenty minutes on a modern MacBook Pro.
 
-When it completes, you will have a unified library for all architectures
-(i386sim, x86_64sim, armv7, armv7s and arm64)  and the benchmark program.
-Although successfully compiling the benchmark program is a
+When it completes, you will have a library for a single architecture and the
+benchmark program. Although successfully compiling the benchmark program is a
 sign of success, the program is not a complete iOS app.
 
-If you would only like to build only one architecture to save time:
-(iOS 11+ only supports 64bit so you can get away with arm64)
-
-```bash
-tensorflow/contrib/makefile/build_all_ios.sh -a arm64
-```
-
-After the first build if you would like to just build the tensorflow
-library you can pass the -T flag to avoid a clean & rebuild. This should
-take you just a few seconds to generate the library if you modified one file.
-
-```bash
-tensorflow/contrib/makefile/build_all_ios.sh -a arm64 -T
-```
-
 To see TensorFlow running on iOS, the example Xcode project in
 [tensorflow/examples/ios](../../examples/ios/) shows how to use the static
 library in a simple app.
@@ -209,18 +193,19 @@ If you have not already, you will need to download dependencies:
 tensorflow/contrib/makefile/download_dependencies.sh
 ```
 
-Next, you will need to compile protobufs for iOS (optionally takes the -a $ARCH flag):
+Next, you will need to compile protobufs for iOS:
 
 ```bash
-tensorflow/contrib/makefile/compile_ios_protobuf.sh
+tensorflow/contrib/makefile/compile_ios_protobuf.sh 
 ```
 
-Then, you will need to compile the nsync library for iOS (optionally takes -a $ARCH flag):
+Then, you will need to compile the nsync library for iOS:
 
 ```bash
 export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
 export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
 ```
+
 Then, you can run the makefile specifying iOS as the target, along with the
 architecture you want to build for:
 
@@ -234,6 +219,10 @@ This creates a library in
 `tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a` that you can link any
 xcode project against. 
 
+At this point, you will have a library for a single architecture and the
+benchmark program. Although successfully compiling the benchmark program is a
+sign of success, the program is not a complete iOS app. 
+
 To see TensorFlow running on iOS, the example Xcode project in
 [tensorflow/examples/ios](../../examples/ios/) shows how to use the static
 library in a simple app.
@@ -248,14 +237,6 @@ time follow it with:
 compile_ios_tensorflow.sh
 ```
 
-`compile_ios_tensorflow.sh` takes the -a flag to build only for one architecture.
-In case you run into issues with unresolved symbols with nsync you can also pass
--h ${HOST_NSYNC_LIB} and -n {TARGET_NSYNC_LIB} so it would look like:
-
-```bash
-tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h tensorflow/contrib/makefile/downloads/nsync/builds/default.macos.c++11/nsync.a -n tensorflow/contrib/makefile/downloads/nsync/builds/lipo.ios.c++11/nsync.a -a arm64
-```
-
 In XCode, you will need to use -force_load in the linker flags
 section of the build settings to pull in the global constructors that are used
 to register ops and kernels. 
@@ -268,7 +249,7 @@ debug mode. If you are concerned about performance or are working on a release
 build, you would likely want a higher optimization setting, like so:
  
 ```bash
-compile_ios_tensorflow.sh -f "-Os"
+compile_ios_tensorflow.sh "-Os"
 ```
 
 For other variations of valid optimization flags, see [clang optimization levels](http://stackoverflow.com/questions/15548023/clang-optimization-levels).
diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh
index 988e12b482..a49bbe4565 100755
--- a/tensorflow/contrib/makefile/build_all_ios.sh
+++ b/tensorflow/contrib/makefile/build_all_ios.sh
@@ -23,29 +23,14 @@ if [[ $(uname) != "Darwin" ]]; then
     exit 1
 fi
 
-usage() {
-  echo "Usage: $(basename "$0") [-a:T]"
-  echo "-a [build_arch] build only for specified arch x86_64 [default=all]"
-  echo "-T only build tensorflow (dont download other deps etc)"
-  exit 1
-}
-
-while getopts "a:T" opt_name; do
-  case "$opt_name" in
-    a) BUILD_ARCH="${OPTARG}";;
-    T) ONLY_MAKE_TENSORFLOW="true";;
-    *) usage;;
-  esac
-done
-shift $((OPTIND - 1))
-
-
 # Make sure we're in the correct directory, at the root of the source tree.
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd ${SCRIPT_DIR}/../../../
 
-source "${SCRIPT_DIR}/build_helper.subr"
-JOB_COUNT="${JOB_COUNT:-$(get_job_count)}"
+
+# Remove any old files first.
+make -f tensorflow/contrib/makefile/Makefile clean
+rm -rf tensorflow/contrib/makefile/downloads
 
 # Setting a deployment target is required for building with bitcode,
 # otherwise linking will fail with:
@@ -56,37 +41,20 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then
     export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion)
 fi
 
-if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then
-    # Remove any old files first.
-    make -f tensorflow/contrib/makefile/Makefile clean
-    rm -rf tensorflow/contrib/makefile/downloads
+# Pull down the required versions of the frameworks we need.
+tensorflow/contrib/makefile/download_dependencies.sh
 
-    # Pull down the required versions of the frameworks we need.
-    tensorflow/contrib/makefile/download_dependencies.sh
-
-    # Compile protobuf for the target iOS device architectures.
-    tensorflow/contrib/makefile/compile_ios_protobuf.sh
-fi
+# Compile protobuf for the target iOS device architectures.
+tensorflow/contrib/makefile/compile_ios_protobuf.sh
 
 # Compile nsync for the target iOS device architectures.
 # Don't use  export var=`something` syntax; it swallows the exit status.
 HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
-if [[ -z "${BUILD_ARCH}" ]]; then
-    # No arch specified so build all architectures
-    TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
-else
-    # arch specified so build just that
-    TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios -a ${BUILD_ARCH}`
-fi
+TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
 export HOST_NSYNC_LIB TARGET_NSYNC_LIB
 
-if [[ -z "${BUILD_ARCH}" ]]; then
-    # build the ios tensorflow libraries.
-    tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB
-else
-    # arch specified so build just that
-    tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -a "${BUILD_ARCH}" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB
-fi
+# Build the iOS TensorFlow libraries.
+tensorflow/contrib/makefile/compile_ios_tensorflow.sh "-O3"
 
 # Creates a static universal library in
 # tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a
diff --git a/tensorflow/contrib/makefile/compile_ios_protobuf.sh b/tensorflow/contrib/makefile/compile_ios_protobuf.sh
index 43e5809dd2..4056db18a7 100755
--- a/tensorflow/contrib/makefile/compile_ios_protobuf.sh
+++ b/tensorflow/contrib/makefile/compile_ios_protobuf.sh
@@ -21,28 +21,10 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then
     export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion)
 fi
 
-usage() {
-  echo "Usage: $(basename "$0") [-a]"
-  echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)"
-  echo "default arch i386, x86_64, armv7, armv7s, arm64"
-  exit 1
-}
-
-BUILD_TARGET="i386 x86_64 armv7 armv7s arm64"
-while getopts "a:" opt_name; do
-  case "$opt_name" in
-    a) BUILD_TARGET="${OPTARG}";;
-    *) usage;;
-  esac
-done
-shift $((OPTIND - 1))
-
-IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}"
-
-SCRIPT_DIR=$(cd `dirname $0` && pwd)
+SCRIPT_DIR=$(dirname $0)
 source "${SCRIPT_DIR}/build_helper.subr"
 
-cd ${SCRIPT_DIR}
+cd tensorflow/contrib/makefile
 
 HOST_GENDIR="$(pwd)/gen/protobuf-host"
 mkdir -p "${HOST_GENDIR}"
@@ -82,10 +64,6 @@ else
   echo "protoc found. Skip building host tools."
 fi
 
-# Remove old libs
-rm -f ${LIBDIR}/libprotobuf.a
-rm -f ${LIBDIR}/libprotobuf-lite.a
-
 ./autogen.sh
 if [ $? -ne 0 ]
 then
@@ -93,192 +71,157 @@ then
   exit 1
 fi
 
-package_pb_library() {
-    pb_libs="${LIBDIR}/${1}/lib/libprotobuf.a"
-    if [ -f "${LIBDIR}/libprotobuf.a" ]; then
-        pb_libs="$pb_libs ${LIBDIR}/libprotobuf.a"
-    fi
-    lipo \
-    $pb_libs \
-    -create \
-    -output ${LIBDIR}/libprotobuf.a
-
-    pblite_libs="${LIBDIR}/${1}/lib/libprotobuf-lite.a"
-    if [ -f "${LIBDIR}/libprotobuf-lite.a" ]; then
-        pblite_libs="$pblite_libs ${LIBDIR}/libprotobuf-lite.a"
-    fi
-    lipo \
-    $pblite_libs \
-    -create \
-    -output ${LIBDIR}/libprotobuf-lite.a
-}
-
-build_target() {
-case "$1" in
-    i386)  make distclean
-        ./configure \
-        --host=i386-apple-${OSX_VERSION} \
-        --disable-shared \
-        --enable-cross-compile \
-        --with-protoc="${PROTOC_PATH}" \
-        --prefix=${LIBDIR}/iossim_386 \
-        --exec-prefix=${LIBDIR}/iossim_386 \
-        "CFLAGS=${CFLAGS} \
-        -mios-simulator-version-min=${MIN_SDK_VERSION} \
-        -arch i386 \
-        -fembed-bitcode \
-        -isysroot ${IPHONESIMULATOR_SYSROOT}" \
-        "CXX=${CXX}" \
-        "CXXFLAGS=${CXXFLAGS} \
-        -mios-simulator-version-min=${MIN_SDK_VERSION} \
-        -arch i386 \
-        -fembed-bitcode \
-        -isysroot \
-        ${IPHONESIMULATOR_SYSROOT}" \
-        LDFLAGS="-arch i386 \
-        -fembed-bitcode \
-        -mios-simulator-version-min=${MIN_SDK_VERSION} \
-        ${LDFLAGS} \
-        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
-        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
-        "LIBS=${LIBS}"
-        make -j"${JOB_COUNT}"
-        make install
-
-        package_pb_library "iossim_386"
-        ;;
-
-    x86_64) make distclean
-        ./configure \
-        --host=x86_64-apple-${OSX_VERSION} \
-        --disable-shared \
-        --enable-cross-compile \
-        --with-protoc="${PROTOC_PATH}" \
-        --prefix=${LIBDIR}/iossim_x86_64 \
-        --exec-prefix=${LIBDIR}/iossim_x86_64 \
-        "CFLAGS=${CFLAGS} \
-        -mios-simulator-version-min=${MIN_SDK_VERSION} \
-        -arch x86_64 \
-        -fembed-bitcode \
-        -isysroot ${IPHONESIMULATOR_SYSROOT}" \
-        "CXX=${CXX}" \
-        "CXXFLAGS=${CXXFLAGS} \
-        -mios-simulator-version-min=${MIN_SDK_VERSION} \
-        -arch x86_64 \
-        -fembed-bitcode \
-        -isysroot \
-        ${IPHONESIMULATOR_SYSROOT}" \
-        LDFLAGS="-arch x86_64 \
-        -fembed-bitcode \
-        -mios-simulator-version-min=${MIN_SDK_VERSION} \
-        ${LDFLAGS} \
-        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
-        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
-        "LIBS=${LIBS}"
-        make -j"${JOB_COUNT}"
-        make install
-
-        package_pb_library "iossim_x86_64"
-        ;;
-
-    armv7) make distclean
-        ./configure \
-        --host=armv7-apple-${OSX_VERSION} \
-        --with-protoc="${PROTOC_PATH}" \
-        --disable-shared \
-        --prefix=${LIBDIR}/ios_arm7 \
-        --exec-prefix=${LIBDIR}/ios_arm7 \
-        "CFLAGS=${CFLAGS} \
-        -miphoneos-version-min=${MIN_SDK_VERSION} \
-        -arch armv7 \
-        -fembed-bitcode \
-        -isysroot ${IPHONEOS_SYSROOT}" \
-        "CXX=${CXX}" \
-        "CXXFLAGS=${CXXFLAGS} \
-        -miphoneos-version-min=${MIN_SDK_VERSION} \
-        -arch armv7 \
-        -fembed-bitcode \
-        -isysroot ${IPHONEOS_SYSROOT}" \
-        LDFLAGS="-arch armv7 \
-        -fembed-bitcode \
-        -miphoneos-version-min=${MIN_SDK_VERSION} \
-        ${LDFLAGS}" \
-        "LIBS=${LIBS}"
-        make -j"${JOB_COUNT}"
-        make install
-
-        package_pb_library "ios_arm7"
-        ;;
-
-    armv7s) make distclean
-        ./configure \
-        --host=armv7s-apple-${OSX_VERSION} \
-        --with-protoc="${PROTOC_PATH}" \
-        --disable-shared \
-        --prefix=${LIBDIR}/ios_arm7s \
-        --exec-prefix=${LIBDIR}/ios_arm7s \
-        "CFLAGS=${CFLAGS} \
-        -miphoneos-version-min=${MIN_SDK_VERSION} \
-        -arch armv7s \
-        -fembed-bitcode \
-        -isysroot ${IPHONEOS_SYSROOT}" \
-        "CXX=${CXX}" \
-        "CXXFLAGS=${CXXFLAGS} \
-        -miphoneos-version-min=${MIN_SDK_VERSION} \
-        -arch armv7s \
-        -fembed-bitcode \
-        -isysroot ${IPHONEOS_SYSROOT}" \
-        LDFLAGS="-arch armv7s \
-        -fembed-bitcode \
-        -miphoneos-version-min=${MIN_SDK_VERSION} \
-        ${LDFLAGS}" \
-        "LIBS=${LIBS}"
-        make -j"${JOB_COUNT}"
-        make install
-
-        package_pb_library "ios_arm7s"
-        ;;
-
-    arm64) make distclean
-        ./configure \
-        --host=arm \
-        --with-protoc="${PROTOC_PATH}" \
-        --disable-shared \
-        --prefix=${LIBDIR}/ios_arm64 \
-        --exec-prefix=${LIBDIR}/ios_arm64 \
-        "CFLAGS=${CFLAGS} \
-        -miphoneos-version-min=${MIN_SDK_VERSION} \
-        -arch arm64 \
-        -fembed-bitcode \
-        -isysroot ${IPHONEOS_SYSROOT}" \
-        "CXXFLAGS=${CXXFLAGS} \
-        -miphoneos-version-min=${MIN_SDK_VERSION} \
-        -arch arm64 \
-        -fembed-bitcode \
-        -isysroot ${IPHONEOS_SYSROOT}" \
-        LDFLAGS="-arch arm64 \
-        -fembed-bitcode \
-        -miphoneos-version-min=${MIN_SDK_VERSION} \
-        ${LDFLAGS}" \
-        "LIBS=${LIBS}"
-        make -j"${JOB_COUNT}"
-        make install
-
-        package_pb_library "ios_arm64"
-        ;;
-    *)
-        echo "Unknown ARCH"
-        exit 1
-        ;;
-esac 
-}
-
-for build_element in "${build_targets[@]}"
-do
-    echo "$build_element"
-    build_target "$build_element"
-done
-
-file ${LIBDIR}/libprotobuf.a
-file ${LIBDIR}/libprotobuf-lite.a
-echo "Done building and packaging the libraries"
+make distclean
+./configure \
+--host=i386-apple-${OSX_VERSION} \
+--disable-shared \
+--enable-cross-compile \
+--with-protoc="${PROTOC_PATH}" \
+--prefix=${LIBDIR}/iossim_386 \
+--exec-prefix=${LIBDIR}/iossim_386 \
+"CFLAGS=${CFLAGS} \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+-arch i386 \
+-fembed-bitcode \
+-isysroot ${IPHONESIMULATOR_SYSROOT}" \
+"CXX=${CXX}" \
+"CXXFLAGS=${CXXFLAGS} \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+-arch i386 \
+-fembed-bitcode \
+-isysroot \
+${IPHONESIMULATOR_SYSROOT}" \
+LDFLAGS="-arch i386 \
+-fembed-bitcode \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+${LDFLAGS} \
+-L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
+-L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
+"LIBS=${LIBS}"
+make -j"${JOB_COUNT}"
+make install
+
+make distclean
+./configure \
+--host=x86_64-apple-${OSX_VERSION} \
+--disable-shared \
+--enable-cross-compile \
+--with-protoc="${PROTOC_PATH}" \
+--prefix=${LIBDIR}/iossim_x86_64 \
+--exec-prefix=${LIBDIR}/iossim_x86_64 \
+"CFLAGS=${CFLAGS} \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+-arch x86_64 \
+-fembed-bitcode \
+-isysroot ${IPHONESIMULATOR_SYSROOT}" \
+"CXX=${CXX}" \
+"CXXFLAGS=${CXXFLAGS} \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+-arch x86_64 \
+-fembed-bitcode \
+-isysroot \
+${IPHONESIMULATOR_SYSROOT}" \
+LDFLAGS="-arch x86_64 \
+-fembed-bitcode \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+${LDFLAGS} \
+-L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
+-L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
+"LIBS=${LIBS}"
+make -j"${JOB_COUNT}"
+make install
+
+make distclean
+./configure \
+--host=armv7-apple-${OSX_VERSION} \
+--with-protoc="${PROTOC_PATH}" \
+--disable-shared \
+--prefix=${LIBDIR}/ios_arm7 \
+--exec-prefix=${LIBDIR}/ios_arm7 \
+"CFLAGS=${CFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch armv7 \
+-fembed-bitcode \
+-isysroot ${IPHONEOS_SYSROOT}" \
+"CXX=${CXX}" \
+"CXXFLAGS=${CXXFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch armv7 \
+-fembed-bitcode \
+-isysroot ${IPHONEOS_SYSROOT}" \
+LDFLAGS="-arch armv7 \
+-fembed-bitcode \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+${LDFLAGS}" \
+"LIBS=${LIBS}"
+make -j"${JOB_COUNT}"
+make install
+
+make distclean
+./configure \
+--host=armv7s-apple-${OSX_VERSION} \
+--with-protoc="${PROTOC_PATH}" \
+--disable-shared \
+--prefix=${LIBDIR}/ios_arm7s \
+--exec-prefix=${LIBDIR}/ios_arm7s \
+"CFLAGS=${CFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch armv7s \
+-fembed-bitcode \
+-isysroot ${IPHONEOS_SYSROOT}" \
+"CXX=${CXX}" \
+"CXXFLAGS=${CXXFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch armv7s \
+-fembed-bitcode \
+-isysroot ${IPHONEOS_SYSROOT}" \
+LDFLAGS="-arch armv7s \
+-fembed-bitcode \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+${LDFLAGS}" \
+"LIBS=${LIBS}"
+make -j"${JOB_COUNT}"
+make install
+
+make distclean
+./configure \
+--host=arm \
+--with-protoc="${PROTOC_PATH}" \
+--disable-shared \
+--prefix=${LIBDIR}/ios_arm64 \
+--exec-prefix=${LIBDIR}/ios_arm64 \
+"CFLAGS=${CFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch arm64 \
+-fembed-bitcode \
+-isysroot ${IPHONEOS_SYSROOT}" \
+"CXXFLAGS=${CXXFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch arm64 \
+-fembed-bitcode \
+-isysroot ${IPHONEOS_SYSROOT}" \
+LDFLAGS="-arch arm64 \
+-fembed-bitcode \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+${LDFLAGS}" \
+"LIBS=${LIBS}"
+make -j"${JOB_COUNT}"
+make install
+
+lipo \
+${LIBDIR}/iossim_386/lib/libprotobuf.a \
+${LIBDIR}/iossim_x86_64/lib/libprotobuf.a \
+${LIBDIR}/ios_arm7/lib/libprotobuf.a \
+${LIBDIR}/ios_arm7s/lib/libprotobuf.a \
+${LIBDIR}/ios_arm64/lib/libprotobuf.a \
+-create \
+-output ${LIBDIR}/libprotobuf.a
+
+lipo \
+${LIBDIR}/iossim_386/lib/libprotobuf-lite.a \
+${LIBDIR}/iossim_x86_64/lib/libprotobuf-lite.a \
+${LIBDIR}/ios_arm7/lib/libprotobuf-lite.a \
+${LIBDIR}/ios_arm7s/lib/libprotobuf-lite.a \
+${LIBDIR}/ios_arm64/lib/libprotobuf-lite.a \
+-create \
+-output ${LIBDIR}/libprotobuf-lite.a
diff --git a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
index ae82163e11..5d1cc8b375 100755
--- a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
+++ b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
@@ -43,124 +43,55 @@ then
     exit 1
 fi
 
-usage() {
-  echo "Usage: $(basename "$0") [-a]"
-  echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)"
-  echo "default is [i386, x86_64, armv7, armv7s, arm64]"
-  exit 1
-}
-
-BUILD_TARGET="i386 x86_64 armv7 armv7s arm64"
-while getopts "a:f:h:n:" opt_name; do
-  case "$opt_name" in
-    a) BUILD_TARGET="${OPTARG}";;
-    f) BUILD_OPT="${OPTARG}";;
-    h) NSYNC_HOST="${OPTARG}";;
-    n) NSYNC_TARGET="${OPTARG}";;
-    *) usage;;
-  esac
-done
-shift $((OPTIND - 1))
-
-IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}"
-
-SCRIPT_DIR=$(cd `dirname $0` && pwd)
-source "${SCRIPT_DIR}/build_helper.subr"
-
-
 GENDIR=tensorflow/contrib/makefile/gen/
 LIBDIR=${GENDIR}lib
 LIB_PREFIX=libtensorflow-core
 
-#remove any old artifacts
-rm -rf ${LIBDIR}/${LIB_PREFIX}.a
+make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a OPTFLAGS="$1"
+if [ $? -ne 0 ]
+then
+  echo "armv7 compilation failed."
+  exit 1
+fi
 
-package_tf_library() {
-    CAP_DIR=`echo $1 | tr 'a-z' 'A-Z'`
-    tf_libs="${LIBDIR}/ios_${CAP_DIR}/${LIB_PREFIX}-${1}.a"
-    if [ -f "${LIBDIR}/${LIB_PREFIX}.a" ]; then
-        tf_libs="$tf_libs ${LIBDIR}/${LIB_PREFIX}.a"
-    fi
-    lipo \
-    $tf_libs \
-    -create \
-    -output ${LIBDIR}/${LIB_PREFIX}.a
-}
+make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a OPTFLAGS="$1"
+if [ $? -ne 0 ]
+then
+  echo "arm7vs compilation failed."
+  exit 1
+fi
 
-build_tf_target() {
-case "$1" in
-    armv7)
-        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-        TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a \
-        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
-        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
-        if [ $? -ne 0 ]
-        then
-          echo "armv7 compilation failed."
-          exit 1
-        fi
-        package_tf_library "armv7"
-        ;;
-    armv7s)
-        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-        TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a \
-        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
-        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
+make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a OPTFLAGS="$1"
+if [ $? -ne 0 ]
+then
+  echo "arm64 compilation failed."
+  exit 1
+fi
 
-        if [ $? -ne 0 ]
-        then
-          echo "arm7vs compilation failed."
-          exit 1
-        fi
-        package_tf_library "armv7s"
-        ;;
-    arm64)
-        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-        TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a \
-        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
-        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
-        if [ $? -ne 0 ]
-        then
-          echo "arm64 compilation failed."
-          exit 1
-        fi
-        package_tf_library "arm64"
-        ;;
-    i386)
-        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-        TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a \
-        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
-        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
-        if [ $? -ne 0 ]
-        then
-          echo "i386 compilation failed."
-          exit 1
-        fi
-        package_tf_library "i386"
-        ;;
-    x86_64)
-        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-        TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a \
-        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
-        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
-        if [ $? -ne 0 ]
-        then
-          echo "x86_64 compilation failed."
-          exit 1
-        fi
-        package_tf_library "x86_64"
-        ;;
-    *)
-        echo "Unknown ARCH"
-        exit 1
-esac
-}
+make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a OPTFLAGS="$1"
+if [ $? -ne 0 ]
+then
+  echo "i386 compilation failed."
+  exit 1
+fi
 
-for build_tf_element in "${build_targets[@]}"
-do
-    echo "$build_tf_element"
-    build_tf_target "$build_tf_element"
-done
+make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a OPTFLAGS="$1"
+if [ $? -ne 0 ]
+then
+  echo "x86_64 compilation failed."
+  exit 1
+fi
 
-echo "Done building and packaging TF"
-file ${LIBDIR}/${LIB_PREFIX}.a
+lipo \
+${LIBDIR}/ios_ARMV7/${LIB_PREFIX}-armv7.a \
+${LIBDIR}/ios_ARMV7S/${LIB_PREFIX}-armv7s.a \
+${LIBDIR}/ios_ARM64/${LIB_PREFIX}-arm64.a \
+${LIBDIR}/ios_I386/${LIB_PREFIX}-i386.a \
+${LIBDIR}/ios_X86_64/${LIB_PREFIX}-x86_64.a \
+-create \
+-output ${LIBDIR}/${LIB_PREFIX}.a
diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh
index 930e6b8dea..ecbd9bb825 100755
--- a/tensorflow/contrib/makefile/compile_nsync.sh
+++ b/tensorflow/contrib/makefile/compile_nsync.sh
@@ -265,7 +265,7 @@ for arch in $archs; do
                                           -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \
                                           -I../../platform/c++11 -I../../platform/gcc \
                                           -I../../platform/posix -pthread
-                        PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE -fPIC
+                        PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE
                         PLATFORM_LDFLAGS=-pthread
                         MKDEP=${CC} -M -std=c++11
                         PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \
@@ -301,9 +301,6 @@ done
 
 case "$target_platform" in
 ios)    nsync_platform_dir="$nsync_builds_dir/lipo.$target_platform.c++11"
-        if [ -d "$nsync_platform_dir" ]; then
-            rm -rf "$nsync_platform_dir"
-        fi
         mkdir "$nsync_platform_dir"
         eval lipo $platform_libs -create -output '$nsync_platform_dir/nsync.a'
         echo "$nsync_platform_dir/nsync.a"
diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py
index 0bc133a00e..3bf795d19a 100644
--- a/tensorflow/contrib/nn/__init__.py
+++ b/tensorflow/contrib/nn/__init__.py
@@ -15,7 +15,6 @@
 """Module for variants of ops in tf.nn.
 
 @@alpha_dropout
-@@conv1d_transpose
 @@deprecated_flipped_softmax_cross_entropy_with_logits
 @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits
 @@deprecated_flipped_sigmoid_cross_entropy_with_logits
@@ -33,7 +32,6 @@ from tensorflow.contrib.nn.python.ops.alpha_dropout import *
 from tensorflow.contrib.nn.python.ops.cross_entropy import *
 from tensorflow.contrib.nn.python.ops.sampling_ops import *
 from tensorflow.contrib.nn.python.ops.scaled_softplus import *
-from tensorflow.python.ops.nn_ops import conv1d_transpose
 from tensorflow.python.ops.nn_ops import nth_element
 # pylint: enable=unused-import,wildcard-import
 
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index a9a63cbce0..8c46becf2c 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -19,7 +19,6 @@ py_library(
         "python/training/external_optimizer.py",
         "python/training/lazy_adam_optimizer.py",
         "python/training/moving_average_optimizer.py",
-        "python/training/multitask_optimizer_wrapper.py",
         "python/training/nadam_optimizer.py",
         "python/training/powersign.py",
         "python/training/sign_decay.py",
@@ -99,23 +98,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "multitask_optimizer_wrapper_test",
-    srcs = ["python/training/multitask_optimizer_wrapper_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":opt_py",
-        "//tensorflow/python:client",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variables",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_test(
     name = "lazy_adam_optimizer_test",
     srcs = ["python/training/lazy_adam_optimizer_test.py"],
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 4c60c99342..caf22536bb 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -24,7 +24,7 @@ from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import
 from tensorflow.contrib.opt.python.training.external_optimizer import *
 from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import *
 from tensorflow.contrib.opt.python.training.moving_average_optimizer import *
-from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
+from tensorflow.contrib.opt.python.training.nadam_optimizer import *
 from tensorflow.contrib.opt.python.training.nadam_optimizer import *
 from tensorflow.contrib.opt.python.training.powersign import *
 from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import *
@@ -38,8 +38,7 @@ _allowed_symbols = [
     'DelayCompensatedGradientDescentOptimizer',
     'DropStaleGradientOptimizer', 'ExternalOptimizerInterface',
     'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer',
-    'ScipyOptimizerInterface', 'VariableClippingOptimizer',
-    'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm',
+    'ScipyOptimizerInterface', 'VariableClippingOptimizer'
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
deleted file mode 100644
index c26037935d..0000000000
--- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""An optimizer wrapper that ensures correct behaviour
-of stateful optimizers with multitask loss."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import types
-import six
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import clip_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.training import optimizer
-
-__all__ = ["MultitaskOptimizerWrapper",
-           "clip_gradients_by_global_norm"]
-
-def _is_all_zeros(grad):
-  all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0)
-  return all_zeros
-
-def _get_wrapper(fn, opt):
-  def wrapper(self, grad, *args, **kwargs):  # pylint: disable=unused-argument
-    all_zeros = _is_all_zeros(grad)
-    return control_flow_ops.cond(
-        all_zeros,
-        control_flow_ops.no_op,
-        lambda: fn(grad, *args, **kwargs))
-  wrapper = types.MethodType(wrapper, opt)
-  return wrapper
-
-class MultitaskOptimizerWrapper(object):
-  """Optimizer wrapper that ensures that
-  all-zero gradients don't affect the optimizer state.
-
-  This might be useful when a multi-task loss is used,
-  and some components of the loss might be
-  not present (e.g. masked out) in some training batches.
-  Technically their gradient would be zero,
-  which would normally affect the optimizer state
-  (e.g. push running average to zero).
-  However this is not the desired behaviour,
-  since the missing loss component
-  should be treated as unknown rather than zero.
-
-  This wrapper filters out all-zero gradient tensors,
-  therefore preserving the optimizer state.
-
-  If gradient clipping by global norm is used,
-  the provided function clip_gradients_by_global_norm
-  should be used (and specified explicitly by the user).
-  Otherwise the global norm would be underestimated
-  because of all-zero tensors that should be ignored.
-
-  The gradient calculation and application
-  are delegated to an underlying optimizer.
-  The gradient application is altered only for all-zero tensors.
-
-  Example:
-  ```python
-  momentum_optimizer = tf.train.MomentumOptimizer(
-    learning_rate, momentum=0.9)
-  multitask_momentum_optimizer = tf.contrib.opt.MultitaskOptimizerWrapper(
-    momentum_optimizer)
-  gradvars = multitask_momentum_optimizer.compute_gradients(
-    loss)
-  gradvars_clipped, _ = tf.contrib.opt.clip_gradients_by_global_norm(
-    gradvars, 15.0)
-  train_op = multitask_momentum_optimizer.apply_gradients(
-    gradvars_clipped, global_step=batch)
-  ```
-  """
-  def __init__(self, opt):
-    """
-    Args:
-    opt: an instance of a class that implements tf.train.Optimizer.
-    """
-    if not isinstance(opt, optimizer.Optimizer):
-      raise TypeError(
-          "Supplied optimizer must be an instance of tf.train.Optimizer")
-    self._opt = opt
-    overriden_methods = ('_apply_dense',
-                         '_resource_apply_dense',
-                         '_apply_sparse',
-                         '_resource_apply_sparse')
-    for name in overriden_methods:
-      fn = getattr(self._opt, name)
-      wrapper = _get_wrapper(fn, self._opt)
-      setattr(self._opt, name, wrapper)
-
-  def __getattr__(self, name):
-    return getattr(self._opt, name)
-
-
-def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.):
-  """Clips gradients of a multitask loss by their global norm.
-  Ignores all-zero tensors when computing the global norm.
-
-  Args:
-  gradients_variables: a list of pairs (gradient, variable).
-  clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.
-
-  Returns:
-  list: A list of pairs of the same type as gradients_variables,.
-  fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
-  """
-  gradients, variables = six.moves.zip(*gradients_variables)
-  def _replace_nonexisting_grad(grad):
-    if grad is None:
-      return grad
-    all_zeros = _is_all_zeros(grad)
-    return control_flow_ops.cond(all_zeros,
-                                 lambda: array_ops.zeros(
-                                     [], dtype=dtypes.as_dtype(grad.dtype)),
-                                 lambda: grad)
-  nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients]
-  fixed_global_norm = clip_ops.global_norm(nonzero_gradients)
-  gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm,
-                                              use_norm=fixed_global_norm)
-  return list(six.moves.zip(gradients, variables)), fixed_global_norm
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
deleted file mode 100644
index b06213f715..0000000000
--- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for MultitaskOptimizerWrapper."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import test
-from tensorflow.python.training import momentum
-
-import numpy as np
-import six
-
-class MultitaskOptimizerWrapperTest(test.TestCase):
-  """
-  Tests for the multitask optimizer wrapper.
-  """
-  def testWrapper(self):
-    with self.test_session():
-      var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32)
-      var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
-      grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32)
-      grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32)
-      grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32)
-      mom_opt_impl = momentum.MomentumOptimizer(
-          learning_rate=2.0, momentum=0.9)
-      mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper(
-          mom_opt_impl)
-      mom_update = mom_opt.apply_gradients(
-          zip([grads0, grads1], [var0, var1]))
-      mom_update_partial = mom_opt.apply_gradients(
-          zip([grads_allzero, grads1], [var0, var1]))
-      mom_update_no_action = mom_opt.apply_gradients(
-          zip([grads_allzero, grads_allzero], [var0, var1]))
-      self.evaluate(variables.global_variables_initializer())
-      # Fetch params to validate initial values
-      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
-      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
-
-      self.assertEqual(["momentum"], mom_opt.get_slot_names())
-      slot0 = mom_opt.get_slot(var0, "momentum")
-      self.assertEquals(slot0.get_shape(), var0.get_shape())
-      slot1 = mom_opt.get_slot(var1, "momentum")
-      self.assertEquals(slot1.get_shape(), var1.get_shape())
-
-      # Step 1: normal momentum update.
-      self.evaluate(mom_update)
-      # Check that the momentum accumulators have been updated.
-      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
-                                         self.evaluate(slot0))
-      self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
-                                         self.evaluate(slot1))
-      # Check that the parameters have been updated.
-      self.assertAllCloseAccordingToType(
-          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
-          self.evaluate(var0))
-      self.assertAllCloseAccordingToType(
-          np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
-          self.evaluate(var1))
-
-      # Step 2: momentum update that changes only slot1 but not slot0.
-      self.evaluate(mom_update_partial)
-      # Check that only the relevant momentum accumulator has been updated.
-      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
-                                         self.evaluate(slot0))
-      self.assertAllCloseAccordingToType(
-          np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
-          self.evaluate(slot1))
-
-      # Step 3: momentum update that does not change anything.
-      self.evaluate(mom_update_no_action)
-      # Check that the momentum accumulators have *NOT* been updated.
-      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
-                                         self.evaluate(slot0))
-      self.assertAllCloseAccordingToType(
-          np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
-          self.evaluate(slot1))
-
-  def testGradientClipping(self):
-    with self.test_session():
-      var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32)
-      var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
-      var2 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
-      var3 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
-      grads0 = constant_op.constant([10.0, 15.0], dtype=dtypes.float32)
-      grads1 = constant_op.constant([0.0, 5.0], dtype=dtypes.float32)
-      grads2 = constant_op.constant([0.0, 0.0], dtype=dtypes.float32)
-      grads3 = None
-      varlist = [var0, var1, var2, var3]
-      gradients = [grads0, grads1, grads2, grads3]
-      clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm(
-          six.moves.zip(gradients, varlist), clip_norm=1.0)
-      clipped_grads = list(six.moves.zip(*clipped_gradvars))[0]
-      reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0])))
-      self.assertAllCloseAccordingToType(
-          self.evaluate(global_norm), reference_global_norm)
-      self.assertAllCloseAccordingToType(
-          self.evaluate(clipped_grads[2]), np.array([0., 0.]))
-      self.assertEqual(clipped_grads[3], None)
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index 16b6d145e3..909c6aba2b 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -38,9 +38,6 @@ from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
-from tensorflow.python.framework import test_util
-from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
-
 
 
 # pylint: enable=protected-access
@@ -361,45 +358,6 @@ class RNNCellTest(test.TestCase):
       self.assertEquals(variables[2].op.name,
                         "root/lstm_cell/projection/kernel")
 
-  def testLSTMCellLayerNorm(self):
-    with self.test_session() as sess:
-      num_units = 2
-      num_proj = 3
-      batch_size = 1
-      input_size = 4
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([batch_size, input_size])
-        c = array_ops.zeros([batch_size, num_units])
-        h = array_ops.zeros([batch_size, num_proj])
-        state = rnn_cell_impl.LSTMStateTuple(c, h)
-        cell = contrib_rnn_cell.LayerNormLSTMCell(
-          num_units=num_units,
-          num_proj=num_proj,
-          forget_bias=1.0,
-          layer_norm=True,
-          norm_gain=1.0,
-          norm_shift=0.0)
-        g, out_m = cell(x, state)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run([g, out_m], {
-          x.name: np.ones((batch_size, input_size)),
-          c.name: 0.1 * np.ones((batch_size, num_units)),
-          h.name: 0.1 * np.ones((batch_size, num_proj))
-        })
-        self.assertEqual(len(res), 2)
-        # The numbers in results were not calculated, this is mostly just a
-        # smoke test.
-        self.assertEqual(res[0].shape, (batch_size, num_proj))
-        self.assertEqual(res[1][0].shape, (batch_size, num_units))
-        self.assertEqual(res[1][1].shape, (batch_size, num_proj))
-        # Different inputs so different outputs and states
-        for i in range(1, batch_size):
-          self.assertTrue(
-            float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
-          self.assertTrue(
-            float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
-
   def testOutputProjectionWrapper(self):
     with self.test_session() as sess:
       with variable_scope.variable_scope(
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index b4a5f2d7eb..ebd4564f12 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -37,7 +37,6 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn
 from tensorflow.python.ops import rnn_cell
-from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -1276,49 +1275,6 @@ class LayerNormBasicLSTMCellTest(test.TestCase):
         self.assertAllClose(res[2].c, expected_c1, 1e-5)
         self.assertAllClose(res[2].h, expected_h1, 1e-5)
 
-
-  def testBasicLSTMCellWithStateTupleLayerNorm(self):
-    """The results of LSTMCell and LayerNormBasicLSTMCell 
-    should be same. """
-    with self.test_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        c0 = array_ops.zeros([1, 2])
-        h0 = array_ops.zeros([1, 2])
-        state0 = rnn_cell_impl.LSTMStateTuple(c0, h0)
-        c1 = array_ops.zeros([1, 2])
-        h1 = array_ops.zeros([1, 2])
-        state1 = rnn_cell_impl.LSTMStateTuple(c1, h1)
-        cell = rnn_cell_impl.MultiRNNCell(
-          [contrib_rnn_cell.LayerNormLSTMCell(
-              2,
-              layer_norm=True,
-              norm_gain=1.0,
-              norm_shift=0.0) for _ in range(2)])
-        h, (s0, s1) = cell(x, (state0, state1))
-        sess.run([variables.global_variables_initializer()])
-        res = sess.run([h, s0, s1], {
-          x.name: np.array([[1., 1.]]),
-          c0.name: 0.1 * np.asarray([[0, 1]]),
-          h0.name: 0.1 * np.asarray([[2, 3]]),
-          c1.name: 0.1 * np.asarray([[4, 5]]),
-          h1.name: 0.1 * np.asarray([[6, 7]]),
-        })
-
-        expected_h = np.array([[-0.38079708, 0.38079708]])
-        expected_h0 = np.array([[-0.38079708, 0.38079708]])
-        expected_c0 = np.array([[-1.0, 1.0]])
-        expected_h1 = np.array([[-0.38079708, 0.38079708]])
-        expected_c1 = np.array([[-1.0, 1.0]])
-
-        self.assertEqual(len(res), 3)
-        self.assertAllClose(res[0], expected_h, 1e-5)
-        self.assertAllClose(res[1].c, expected_c0, 1e-5)
-        self.assertAllClose(res[1].h, expected_h0, 1e-5)
-        self.assertAllClose(res[2].c, expected_c1, 1e-5)
-        self.assertAllClose(res[2].h, expected_h1, 1e-5)
-
   def testBasicLSTMCellWithDropout(self):
 
     def _is_close(x, y, digits=4):
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 5e85c125df..d4691f2c27 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -36,7 +36,6 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope as vs
-from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
@@ -77,18 +76,6 @@ def _get_sharded_variable(name, shape, dtype, num_shards):
   return shards
 
 
-def _norm(g, b, inp, scope):
-  shape = inp.get_shape()[-1:]
-  gamma_init = init_ops.constant_initializer(g)
-  beta_init = init_ops.constant_initializer(b)
-  with vs.variable_scope(scope):
-    # Initialize beta and gamma for use by layer_norm.
-    vs.get_variable("gamma", shape=shape, initializer=gamma_init)
-    vs.get_variable("beta", shape=shape, initializer=beta_init)
-  normalized = layers.layer_norm(inp, reuse=True, scope=scope)
-  return normalized
-
-
 class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
   """Long short-term memory unit (LSTM) recurrent network cell.
 
@@ -115,24 +102,13 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
 
   The class uses optional peep-hole connections, and an optional projection
   layer.
-  
-  Layer normalization implementation is based on:
-
-    https://arxiv.org/abs/1607.06450.
-
-  "Layer Normalization"
-  Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
-
-  and is applied before the internal nonlinearities.
-  
   """
 
   def __init__(self, num_units, use_peepholes=False,
                initializer=None, num_proj=None, proj_clip=None,
                num_unit_shards=1, num_proj_shards=1,
                forget_bias=1.0, state_is_tuple=True,
-               activation=math_ops.tanh, reuse=None,
-               layer_norm=False, norm_gain=1.0, norm_shift=0.0):
+               activation=math_ops.tanh, reuse=None):
     """Initialize the parameters for an LSTM cell.
 
     Args:
@@ -159,13 +135,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
       reuse: (optional) Python boolean describing whether to reuse variables
         in an existing scope.  If not `True`, and the existing scope already has
         the given variables, an error is raised.
-      layer_norm: If `True`, layer normalization will be applied.
-      norm_gain: float, The layer normalization gain initial value. If
-        `layer_norm` has been set to `False`, this argument will be ignored.
-      norm_shift: float, The layer normalization shift initial value. If
-        `layer_norm` has been set to `False`, this argument will be ignored.
-        
-        
     """
     super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse)
     if not state_is_tuple:
@@ -183,9 +152,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
     self._state_is_tuple = state_is_tuple
     self._activation = activation
     self._reuse = reuse
-    self._layer_norm = layer_norm
-    self._norm_gain = norm_gain
-    self._norm_shift = norm_shift
 
     if num_proj:
       self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj)
@@ -254,20 +220,9 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
 
     # j = new_input, f = forget_gate, o = output_gate
     cell_inputs = array_ops.concat([inputs, m_prev], 1)
-    lstm_matrix = math_ops.matmul(cell_inputs, concat_w)
-
-    # If layer nomalization is applied, do not add bias
-    if not self._layer_norm:
-      lstm_matrix = nn_ops.bias_add(lstm_matrix, b)
-
+    lstm_matrix = nn_ops.bias_add(math_ops.matmul(cell_inputs, concat_w), b)
     j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=3, axis=1)
 
-    # Apply layer normalization
-    if self._layer_norm:
-      j = _norm(self._norm_gain, self._norm_shift, j, "transform")
-      f = _norm(self._norm_gain, self._norm_shift, f, "forget")
-      o = _norm(self._norm_gain, self._norm_shift, o, "output")
-
     # Diagonal connections
     if self._use_peepholes:
       w_f_diag = vs.get_variable(
@@ -281,10 +236,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
       f_act = sigmoid(f + self._forget_bias)
     c = (f_act * c_prev + (1 - f_act) * self._activation(j))
 
-    # Apply layer normalization
-    if self._layer_norm:
-      c = _norm(self._norm_gain, self._norm_shift, c, "state")
-
     if self._use_peepholes:
       m = sigmoid(o + w_o_diag * c) * self._activation(c)
     else:
@@ -1350,8 +1301,8 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
     self._keep_prob = dropout_keep_prob
     self._seed = dropout_prob_seed
     self._layer_norm = layer_norm
-    self._norm_gain = norm_gain
-    self._norm_shift = norm_shift
+    self._g = norm_gain
+    self._b = norm_shift
     self._reuse = reuse
 
   @property
@@ -1362,25 +1313,24 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
   def output_size(self):
     return self._num_units
 
-  def _norm(self, inp, scope, dtype=dtypes.float32):
+  def _norm(self, inp, scope):
     shape = inp.get_shape()[-1:]
-    gamma_init = init_ops.constant_initializer(self._norm_gain)
-    beta_init = init_ops.constant_initializer(self._norm_shift)
+    gamma_init = init_ops.constant_initializer(self._g)
+    beta_init = init_ops.constant_initializer(self._b)
     with vs.variable_scope(scope):
       # Initialize beta and gamma for use by layer_norm.
-      vs.get_variable("gamma", shape=shape, initializer=gamma_init, dtype=dtype)
-      vs.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype)
+      vs.get_variable("gamma", shape=shape, initializer=gamma_init)
+      vs.get_variable("beta", shape=shape, initializer=beta_init)
     normalized = layers.layer_norm(inp, reuse=True, scope=scope)
     return normalized
 
   def _linear(self, args):
     out_size = 4 * self._num_units
     proj_size = args.get_shape()[-1]
-    dtype = args.dtype
-    weights = vs.get_variable("kernel", [proj_size, out_size], dtype=dtype)
+    weights = vs.get_variable("kernel", [proj_size, out_size])
     out = math_ops.matmul(args, weights)
     if not self._layer_norm:
-      bias = vs.get_variable("bias", [out_size], dtype=dtype)
+      bias = vs.get_variable("bias", [out_size])
       out = nn_ops.bias_add(out, bias)
     return out
 
@@ -1389,14 +1339,13 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
     c, h = state
     args = array_ops.concat([inputs, h], 1)
     concat = self._linear(args)
-    dtype = args.dtype
 
     i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)
     if self._layer_norm:
-      i = self._norm(i, "input", dtype=dtype)
-      j = self._norm(j, "transform", dtype=dtype)
-      f = self._norm(f, "forget", dtype=dtype)
-      o = self._norm(o, "output", dtype=dtype)
+      i = self._norm(i, "input")
+      j = self._norm(j, "transform")
+      f = self._norm(f, "forget")
+      o = self._norm(o, "output")
 
     g = self._activation(j)
     if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1:
@@ -1405,7 +1354,7 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
     new_c = (c * math_ops.sigmoid(f + self._forget_bias)
              + math_ops.sigmoid(i) * g)
     if self._layer_norm:
-      new_c = self._norm(new_c, "state", dtype=dtype)
+      new_c = self._norm(new_c, "state")
     new_h = self._activation(new_c) * math_ops.sigmoid(o)
 
     new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h)
@@ -2357,264 +2306,3 @@ class GLSTMCell(rnn_cell_impl.RNNCell):
 
     new_state = rnn_cell_impl.LSTMStateTuple(c, m)
     return m, new_state
-
-
-class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
-  """Long short-term memory unit (LSTM) recurrent network cell.
-
-  The default non-peephole implementation is based on:
-
-    http://www.bioinf.jku.at/publications/older/2604.pdf
-
-  S. Hochreiter and J. Schmidhuber.
-  "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997.
-
-  The peephole implementation is based on:
-
-    https://research.google.com/pubs/archive/43905.pdf
-
-  Hasim Sak, Andrew Senior, and Francoise Beaufays.
-  "Long short-term memory recurrent neural network architectures for
-   large scale acoustic modeling." INTERSPEECH, 2014.
-
-  The class uses optional peep-hole connections, optional cell clipping, and
-  an optional projection layer.
-
-  Layer normalization implementation is based on:
-
-    https://arxiv.org/abs/1607.06450.
-
-  "Layer Normalization"
-  Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
-
-  and is applied before the internal nonlinearities.
-
-  """
-
-  def __init__(self, num_units,
-               use_peepholes=False, cell_clip=None,
-               initializer=None, num_proj=None, proj_clip=None,
-               forget_bias=1.0,
-               activation=None, layer_norm=False,
-               norm_gain=1.0, norm_shift=0.0, reuse=None):
-    """Initialize the parameters for an LSTM cell.
-
-    Args:
-      num_units: int, The number of units in the LSTM cell
-      use_peepholes: bool, set True to enable diagonal/peephole connections.
-      cell_clip: (optional) A float value, if provided the cell state is clipped
-        by this value prior to the cell output activation.
-      initializer: (optional) The initializer to use for the weight and
-        projection matrices.
-      num_proj: (optional) int, The output dimensionality for the projection
-        matrices.  If None, no projection is performed.
-      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
-        provided, then the projected values are clipped elementwise to within
-        `[-proj_clip, proj_clip]`.
-      forget_bias: Biases of the forget gate are initialized by default to 1
-        in order to reduce the scale of forgetting at the beginning of
-        the training. Must set it manually to `0.0` when restoring from
-        CudnnLSTM trained checkpoints.
-      activation: Activation function of the inner states.  Default: `tanh`.
-      layer_norm: If `True`, layer normalization will be applied.
-      norm_gain: float, The layer normalization gain initial value. If
-        `layer_norm` has been set to `False`, this argument will be ignored.
-      norm_shift: float, The layer normalization shift initial value. If
-        `layer_norm` has been set to `False`, this argument will be ignored.
-      reuse: (optional) Python boolean describing whether to reuse variables
-        in an existing scope.  If not `True`, and the existing scope already has
-        the given variables, an error is raised.
-
-      When restoring from CudnnLSTM-trained checkpoints, must use
-      CudnnCompatibleLSTMCell instead.
-    """
-    super(LayerNormLSTMCell, self).__init__(_reuse=reuse)
-
-    self._num_units = num_units
-    self._use_peepholes = use_peepholes
-    self._cell_clip = cell_clip
-    self._initializer = initializer
-    self._num_proj = num_proj
-    self._proj_clip = proj_clip
-    self._forget_bias = forget_bias
-    self._activation = activation or math_ops.tanh
-    self._layer_norm = layer_norm
-    self._norm_gain = norm_gain
-    self._norm_shift = norm_shift
-
-    if num_proj:
-      self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj))
-      self._output_size = num_proj
-    else:
-      self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_units))
-      self._output_size = num_units
-
-  @property
-  def state_size(self):
-    return self._state_size
-
-  @property
-  def output_size(self):
-    return self._output_size
-
-
-  def _linear(self,
-              args,
-              output_size,
-              bias,
-              bias_initializer=None,
-              kernel_initializer=None,
-              layer_norm=False):
-    """Linear map: sum_i(args[i] * W[i]), where W[i] is a Variable.
-
-    Args:
-      args: a 2D Tensor or a list of 2D, batch x n, Tensors.
-      output_size: int, second dimension of W[i].
-      bias: boolean, whether to add a bias term or not.
-      bias_initializer: starting value to initialize the bias
-        (default is all zeros).
-      kernel_initializer: starting value to initialize the weight.
-      layer_norm: boolean, whether to apply layer normalization.
-
-
-    Returns:
-      A 2D Tensor with shape [batch x output_size] taking value
-      sum_i(args[i] * W[i]), where each W[i] is a newly created Variable.
-
-    Raises:
-      ValueError: if some of the arguments has unspecified or wrong shape.
-    """
-    if args is None or (nest.is_sequence(args) and not args):
-      raise ValueError("`args` must be specified")
-    if not nest.is_sequence(args):
-      args = [args]
-
-    # Calculate the total size of arguments on dimension 1.
-    total_arg_size = 0
-    shapes = [a.get_shape() for a in args]
-    for shape in shapes:
-      if shape.ndims != 2:
-        raise ValueError("linear is expecting 2D arguments: %s" % shapes)
-      if shape[1].value is None:
-        raise ValueError("linear expects shape[1] to be provided for shape %s, "
-                         "but saw %s" % (shape, shape[1]))
-      else:
-        total_arg_size += shape[1].value
-
-    dtype = [a.dtype for a in args][0]
-
-    # Now the computation.
-    scope = vs.get_variable_scope()
-    with vs.variable_scope(scope) as outer_scope:
-      weights = vs.get_variable(
-        "kernel", [total_arg_size, output_size],
-        dtype=dtype,
-        initializer=kernel_initializer)
-      if len(args) == 1:
-        res = math_ops.matmul(args[0], weights)
-      else:
-        res = math_ops.matmul(array_ops.concat(args, 1), weights)
-      if not bias:
-        return res
-      with vs.variable_scope(outer_scope) as inner_scope:
-        inner_scope.set_partitioner(None)
-        if bias_initializer is None:
-          bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
-        biases = vs.get_variable(
-          "bias", [output_size],
-          dtype=dtype,
-          initializer=bias_initializer)
-
-    if not layer_norm:
-      res = nn_ops.bias_add(res, biases)
-
-    return res
-
-  def call(self, inputs, state):
-    """Run one step of LSTM.
-
-    Args:
-      inputs: input Tensor, 2D, batch x num_units.
-      state: this must be a tuple of state Tensors,
-       both `2-D`, with column sizes `c_state` and
-        `m_state`.
-
-    Returns:
-      A tuple containing:
-
-      - A `2-D, [batch x output_dim]`, Tensor representing the output of the
-        LSTM after reading `inputs` when previous state was `state`.
-        Here output_dim is:
-           num_proj if num_proj was set,
-           num_units otherwise.
-      - Tensor(s) representing the new state of LSTM after reading `inputs` when
-        the previous state was `state`.  Same type and shape(s) as `state`.
-
-    Raises:
-      ValueError: If input size cannot be inferred from inputs via
-        static shape inference.
-    """
-    num_proj = self._num_units if self._num_proj is None else self._num_proj
-    sigmoid = math_ops.sigmoid
-
-    (c_prev, m_prev) = state
-
-    dtype = inputs.dtype
-    input_size = inputs.get_shape().with_rank(2)[1]
-    if input_size.value is None:
-      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
-    scope = vs.get_variable_scope()
-    with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
-
-      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
-      lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True,
-                            bias_initializer=None, layer_norm=self._layer_norm)
-      i, j, f, o = array_ops.split(
-        value=lstm_matrix, num_or_size_splits=4, axis=1)
-
-      if self._layer_norm:
-        i = _norm(self._norm_gain, self._norm_shift, i, "input")
-        j = _norm(self._norm_gain, self._norm_shift, j, "transform")
-        f = _norm(self._norm_gain, self._norm_shift, f, "forget")
-        o = _norm(self._norm_gain, self._norm_shift, o, "output")
-
-      # Diagonal connections
-      if self._use_peepholes:
-        with vs.variable_scope(unit_scope) as projection_scope:
-          w_f_diag = vs.get_variable(
-            "w_f_diag", shape=[self._num_units], dtype=dtype)
-          w_i_diag = vs.get_variable(
-            "w_i_diag", shape=[self._num_units], dtype=dtype)
-          w_o_diag = vs.get_variable(
-            "w_o_diag", shape=[self._num_units], dtype=dtype)
-
-      if self._use_peepholes:
-        c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
-             sigmoid(i + w_i_diag * c_prev) * self._activation(j))
-      else:
-        c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
-             self._activation(j))
-
-      if self._layer_norm:
-        c = _norm(self._norm_gain, self._norm_shift, c, "state")
-
-      if self._cell_clip is not None:
-        # pylint: disable=invalid-unary-operand-type
-        c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
-        # pylint: enable=invalid-unary-operand-type
-      if self._use_peepholes:
-        m = sigmoid(o + w_o_diag * c) * self._activation(c)
-      else:
-        m = sigmoid(o) * self._activation(c)
-
-      if self._num_proj is not None:
-        with vs.variable_scope("projection") as proj_scope:
-          m = self._linear(m, self._num_proj, bias=False)
-
-        if self._proj_clip is not None:
-          # pylint: disable=invalid-unary-operand-type
-          m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
-          # pylint: enable=invalid-unary-operand-type
-
-    new_state = (rnn_cell_impl.LSTMStateTuple(c, m))
-    return m, new_state
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index c3b180d9f4..87230e3355 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
@@ -149,7 +149,7 @@ class _BaseAttentionMechanism(AttentionMechanism):
                memory_sequence_length=None,
                memory_layer=None,
                check_inner_dims_defined=True,
-               score_mask_value=None,
+               score_mask_value=float("-inf"),
                name=None):
     """Construct base AttentionMechanism class.
 
@@ -187,12 +187,9 @@ class _BaseAttentionMechanism(AttentionMechanism):
           "memory_layer is not a Layer: %s" % type(memory_layer).__name__)
     self._query_layer = query_layer
     self._memory_layer = memory_layer
-    self.dtype = memory_layer.dtype
     if not callable(probability_fn):
       raise TypeError("probability_fn must be callable, saw type: %s" %
                       type(probability_fn).__name__)
-    if score_mask_value is None:
-      score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf)
     self._probability_fn = lambda score, prev: (  # pylint:disable=g-long-lambda
         probability_fn(
             _maybe_mask_score(score, memory_sequence_length, score_mask_value),
@@ -337,8 +334,7 @@ class LuongAttention(_BaseAttentionMechanism):
                memory_sequence_length=None,
                scale=False,
                probability_fn=None,
-               score_mask_value=None,
-               dtype=None,
+               score_mask_value=float("-inf"),
                name="LuongAttention"):
     """Construct the AttentionMechanism mechanism.
 
@@ -357,20 +353,17 @@ class LuongAttention(_BaseAttentionMechanism):
       score_mask_value: (optional) The mask value for score before passing into
         `probability_fn`. The default is -inf. Only used if
         `memory_sequence_length` is not None.
-      dtype: The data type for the memory layer of the attention mechanism.
       name: Name to use when creating ops.
     """
     # For LuongAttention, we only transform the memory layer; thus
     # num_units **must** match expected the query depth.
     if probability_fn is None:
       probability_fn = nn_ops.softmax
-    if dtype is None:
-      dtype = dtypes.float32
     wrapped_probability_fn = lambda score, _: probability_fn(score)
     super(LuongAttention, self).__init__(
         query_layer=None,
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False, dtype=dtype),
+            num_units, name="memory_layer", use_bias=False),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -482,8 +475,7 @@ class BahdanauAttention(_BaseAttentionMechanism):
                memory_sequence_length=None,
                normalize=False,
                probability_fn=None,
-               score_mask_value=None,
-               dtype=None,
+               score_mask_value=float("-inf"),
                name="BahdanauAttention"):
     """Construct the Attention mechanism.
 
@@ -502,20 +494,16 @@ class BahdanauAttention(_BaseAttentionMechanism):
       score_mask_value: (optional): The mask value for score before passing into
         `probability_fn`. The default is -inf. Only used if
         `memory_sequence_length` is not None.
-      dtype: The data type for the query and memory layers of the attention
-        mechanism.
       name: Name to use when creating ops.
     """
     if probability_fn is None:
       probability_fn = nn_ops.softmax
-    if dtype is None:
-      dtype = dtypes.float32
     wrapped_probability_fn = lambda score, _: probability_fn(score)
     super(BahdanauAttention, self).__init__(
         query_layer=layers_core.Dense(
-            num_units, name="query_layer", use_bias=False, dtype=dtype),
+            num_units, name="query_layer", use_bias=False),
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False, dtype=dtype),
+            num_units, name="memory_layer", use_bias=False),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -750,12 +738,11 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism):
                memory,
                memory_sequence_length=None,
                normalize=False,
-               score_mask_value=None,
+               score_mask_value=float("-inf"),
                sigmoid_noise=0.,
                sigmoid_noise_seed=None,
                score_bias_init=0.,
                mode="parallel",
-               dtype=None,
                name="BahdanauMonotonicAttention"):
     """Construct the Attention mechanism.
 
@@ -779,21 +766,17 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism):
       mode: How to compute the attention distribution.  Must be one of
         'recursive', 'parallel', or 'hard'.  See the docstring for
         `tf.contrib.seq2seq.monotonic_attention` for more information.
-      dtype: The data type for the query and memory layers of the attention
-        mechanism.
       name: Name to use when creating ops.
     """
     # Set up the monotonic probability fn with supplied parameters
-    if dtype is None:
-      dtype = dtypes.float32
     wrapped_probability_fn = functools.partial(
         _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode,
         seed=sigmoid_noise_seed)
     super(BahdanauMonotonicAttention, self).__init__(
         query_layer=layers_core.Dense(
-            num_units, name="query_layer", use_bias=False, dtype=dtype),
+            num_units, name="query_layer", use_bias=False),
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False, dtype=dtype),
+            num_units, name="memory_layer", use_bias=False),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -851,12 +834,11 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
                memory,
                memory_sequence_length=None,
                scale=False,
-               score_mask_value=None,
+               score_mask_value=float("-inf"),
                sigmoid_noise=0.,
                sigmoid_noise_seed=None,
                score_bias_init=0.,
                mode="parallel",
-               dtype=None,
                name="LuongMonotonicAttention"):
     """Construct the Attention mechanism.
 
@@ -880,21 +862,17 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
       mode: How to compute the attention distribution.  Must be one of
         'recursive', 'parallel', or 'hard'.  See the docstring for
         `tf.contrib.seq2seq.monotonic_attention` for more information.
-      dtype: The data type for the query and memory layers of the attention
-        mechanism.
       name: Name to use when creating ops.
     """
     # Set up the monotonic probability fn with supplied parameters
-    if dtype is None:
-      dtype = dtypes.float32
     wrapped_probability_fn = functools.partial(
         _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode,
         seed=sigmoid_noise_seed)
     super(LuongMonotonicAttention, self).__init__(
         query_layer=layers_core.Dense(
-            num_units, name="query_layer", use_bias=False, dtype=dtype),
+            num_units, name="query_layer", use_bias=False),
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False, dtype=dtype),
+            num_units, name="memory_layer", use_bias=False),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -1145,9 +1123,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
             % (len(attention_layer_sizes), len(attention_mechanisms)))
       self._attention_layers = tuple(
           layers_core.Dense(
-              attention_layer_size, name="attention_layer", use_bias=False,
-              dtype=attention_mechanisms[i].dtype)
-          for i, attention_layer_size in enumerate(attention_layer_sizes))
+              attention_layer_size, name="attention_layer", use_bias=False)
+          for attention_layer_size in attention_layer_sizes)
       self._attention_layer_size = sum(attention_layer_sizes)
     else:
       self._attention_layers = None
diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md
index f7a85557ca..0bfd0801d5 100644
--- a/tensorflow/contrib/slim/README.md
+++ b/tensorflow/contrib/slim/README.md
@@ -237,7 +237,7 @@ One way to reduce this code duplication would be via a `for` loop:
 ```python
 net = ...
 for i in range(3):
-  net = slim.conv2d(net, 256, [3, 3], scope='conv3_%d' % (i+1))
+  net = slim.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1))
 net = slim.max_pool2d(net, [2, 2], scope='pool2')
 ```
 
diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
index 576444214d..b4fd2580c2 100644
--- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
@@ -386,7 +386,7 @@ class ResnetCompleteNetworkTest(test.TestCase):
                 inputs, None, is_training=False, global_pool=False)
             sess.run(variables.global_variables_initializer())
             self.assertAllClose(
-                output.eval(), expected.eval(), atol=2e-4, rtol=1e-4)
+                output.eval(), expected.eval(), atol=1e-4, rtol=1e-4)
 
   def testUnknownBatchSize(self):
     batch = 2
diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md
index dcb390b0a5..da5f2b0223 100644
--- a/tensorflow/contrib/verbs/README.md
+++ b/tensorflow/contrib/verbs/README.md
@@ -1,4 +1,4 @@
-## How to compile, use and configure RDMA-enabled TensorFlow
+## How to compile and use RDMA-enabled TensorFlow
 1. Follow the regular TF compilation instructions. During configure step, if you want ibverbs based RDMA support, answer yes to this question:
 
     ```Do you wish to build TensorFlow with VERBS-RDMA support [y/N]```
@@ -7,18 +7,6 @@
 
     ```server = tf.train.Server(cluster, job_name="local", task_index=0, protocol='grpc+verbs') # default protocol is 'grpc'```
 
-3. RDMA configuration is done by setting the following environment variables:
-   * **RDMA_DEVICE**: The RDMA device name to be used. If not defined by user, a default device with an active port will be set if exists.
-   * **RDMA_DEVICE_PORT**: The port within the selected device. Not relevant if RDMA_DEVICE is not defined. If not defined by user, a default active port will be set if exists.
-   * **RDMA_GID_INDEX**: The GID index of the port. If not defined by user, a default suitable GID index will be set (RoCEV2 is favourable as default).
-   * **RDMA_QP_PKEY_INDEX**: The Pkey for the QP. If not defined by user, the default value is 0.
-   * **RDMA_QP_QUEUE_DEPTH**: TX/RX queue size for the QP. If not defined by user, the default value is 1024.
-   * **RDMA_QP_TIMEOUT**: The retransmission timeout for QPs. If not defined by user, the default value is 14.
-   * **RDMA_QP_RETRY_COUNT**: Number of retransmission for QPs. If not defined by user, the default value is 7.
-   * **RDMA_QP_SL**: Service level configuration for QOS and ECN, valid values are 0-7. If not defined by user, the default value is 0.
-   * **RDMA_QP_MTU**: MTU configuration for the QPs. If not defined by user, the default value is active MTU from query_port.
-   * **RDMA_TRAFFIC_CLASS**: Traffic class configuration for QP, in case of DSCP trust level QoS configuration. If not defined by user, the default value is 0. For more info see [HowTo Configure Trust state on Mellanox Adapters](https://community.mellanox.com/docs/DOC-2866).
-
 ## Overview
 The design is based on TensorFlow r1.0. An RDMA path is added between servers for tensor transfer (weights, gradients, etc). The existing GRPC path remains and is responsible for "administrative" tasks, such as setting up the RDMA path, exchanging computation graphs, etc.
 
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index 331943a3ef..26e18b28aa 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include "tensorflow/contrib/verbs/rdma.h"
 #include <cstdlib>
-#include <fcntl.h>
 #include "tensorflow/contrib/verbs/verbs_util.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
@@ -34,8 +33,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-#define RoCE_V2 "RoCE v2"
-
 namespace {
 // hash name to 32-bit integer
 uint32_t NameHash(const string& name) {
@@ -69,337 +66,16 @@ string MessageTypeToString(RdmaMessageType rmt) {
 }
 }  // namespace
 
-// Function to get environment variable
-// Args:
-//    var_name - the name of the environmental variable
-// Returns:
-//    string with it's value or empty string if not set
-string get_env_var(char const* var_name) {
-  char const* var_temp = getenv(var_name);
-
-  return (var_temp == NULL) ? string() : string(var_temp);
-}
-
-// Function to open device
-// Args:
-//   ibv_dev device to open
-// Returns:
-//   context of the opened device
-ibv_context* open_device(ibv_device* ibv_dev) {
-  ibv_context* context = ibv_open_device(ibv_dev);
-
-  CHECK(context) << "Open context failed for " << ibv_get_device_name(ibv_dev);
-  return context;
-}
-
-// Function to count the number of active ports for device
-// Args:
-//   device - to check active ports
-// Returns:
-//   number of active ports of the given device
-int get_dev_active_port_count(ibv_device* device) {
-  ibv_device_attr device_att;
-  ibv_port_attr port_attr;
-  ibv_context* context = NULL;
-  int rc, port_index, active_ports = 0;
-
-  context = ibv_open_device(device);
-  CHECK(context) << "Open context failed for " << ibv_get_device_name(device);
-  rc = ibv_query_device(context, &device_att);
-  CHECK(!rc) << "Failed to query the device";
-
-  for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) {
-    rc = ibv_query_port(context, port_index, &port_attr);
-    CHECK(!rc) << "Failed to query the port" << port_index;
-    if (port_attr.state == IBV_PORT_ACTIVE) {
-      active_ports++;
-    }
-  }
-  ibv_close_device(context);
-  return active_ports;
-}
-
-// Function to set device. If RDMA_DEVICE not set, search for device with active
-// port.
-// Fails if more than one device with active port was found.
-// Returns:
-//   device to use
-ibv_device* set_device() {
+ibv_context* open_default_device() {
   ibv_device** dev_list;
-  int dev_num, device_index, device_to_open = 0;
-  int num_devs_with_active_port = 0;
-  string env_p_rdma_device, str_port_num;
-
-  dev_list = ibv_get_device_list(&dev_num);
+  ibv_device* ib_dev;
+  dev_list = ibv_get_device_list(NULL);
   CHECK(dev_list) << "No InfiniBand device found";
-
-  env_p_rdma_device = get_env_var("RDMA_DEVICE");
-  if (!env_p_rdma_device.empty()) {
-    for (device_index = 0; device_index < dev_num; device_index++) {
-      if (!env_p_rdma_device.compare(
-               ibv_get_device_name(dev_list[device_index]))) {
-        CHECK(get_dev_active_port_count(dev_list[device_index]) != 0)
-            << "Device " << ibv_get_device_name(dev_list[device_index])
-            << " has no active ports";
-        return dev_list[device_index];
-      }
-    }
-    // check validity of input device
-    CHECK(false) << "The device " << env_p_rdma_device << " wasn't found";
-  } else {
-  // set default device
-    str_port_num = get_env_var("RDMA_DEVICE_PORT");
-    CHECK(str_port_num.empty())
-        << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user";
-    for (device_index = 0; device_index < dev_num; device_index++) {
-      // get port_num
-      if (get_dev_active_port_count(dev_list[device_index]) > 0) {
-        num_devs_with_active_port++;
-        CHECK(num_devs_with_active_port <= 1) << ". More than one device with "
-                                                 "active port in the system. "
-                                                 "Please enter RDMA_DEVICE";
-        // found device with at least 1 active port
-        device_to_open = device_index;
-      }
-    }
-    CHECK(num_devs_with_active_port > 0)
-        << "There is no active port in the system";
-    return dev_list[device_to_open];
-  }
-  CHECK(false) << "No device was set!";
-  return NULL;  // never happens
-}
-
-// Function to set port for device.
-// If RDMA_DEVICE_PORT not set, first active port of the device will be set.
-// Args:
-//   context of the device
-// Returns:
-//   port to use
-uint8_t set_port(ibv_context* context) {
-  uint8_t port_num = 0; //0 is illegal port number
-  string str_port_num;
-  ibv_device_attr device_att;
-  ibv_port_attr port_attr;
-  int rc, port_index;
-
-  rc = ibv_query_device(context, &device_att);
-  CHECK(!rc) << "Failed to query the device\n";
-
-  str_port_num = get_env_var("RDMA_DEVICE_PORT");
-  // user defined port
-  if (!str_port_num.empty()) {
-    port_num = stoi(str_port_num);
-    CHECK(port_num > 0) << "RDMA_DEVICE_PORT should be positive";
-    CHECK(port_num <= device_att.phys_port_cnt) << "RDMA_DEVICE_PORT should be "
-                                                   "less or equal to amount of "
-                                                   "available ports";
-    rc = ibv_query_port(context, port_num, &port_attr);
-    CHECK(!rc) << "Failed to query the port" << port_num;
-    // check if port id active
-    CHECK(port_attr.state == IBV_PORT_ACTIVE)
-        << "Selected RDMA_DEVICE_PORT is not active";
-  }
-  // set default port
-  else {
-    for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) {
-      rc = ibv_query_port(context, port_index, &port_attr);
-      CHECK(!rc) << "Failed to query the port" << port_index;
-      if (port_attr.state == IBV_PORT_ACTIVE) {
-        port_num = port_index;
-        break;
-      }
-    }
-    CHECK_GT(port_num, 0) << "No active ports";
-  }
-  return port_num;
-}
-
-// Function read from sysfs file
-// Args:
-//   dir - directory
-//   file - file
-//   buff - buffer for the result
-//   size - buffer size
-// Returns:
-//   number of bytes were read or -1 if failed
-int read_sysfs_file(const char* dir, const char* file, char* buf, size_t size) {
-  char* path;
-  int fd;
-  int len;
-
-  if (asprintf(&path, "%s/%s", dir, file) < 0) return -1;
-
-  fd = open(path, O_RDONLY);
-  if (fd < 0) {
-    free(path);
-    return -1;
-  }
-
-  len = read(fd, buf, size);
-
-  close(fd);
-  free(path);
-
-  if (len > 0 && buf[len - 1] == '\n') buf[--len] = '\0';
-
-  return len;
-}
-
-// Function to check if GID index support RoCE V2
-// Args:
-//   context - device context
-//   port_num - port number
-//   index -  GID index
-// Returns:
-//   if GID supports RoCE V2 - true, otherwise - false.
-bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num,
-                         uint8_t index) {
-  char name[32];
-  char buff[41];
-
-  snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num, index);
-  if (read_sysfs_file(context->device->ibdev_path, name, buff, sizeof(buff)) <=
-      0) {
-    return false;
-  }
-  return !strcmp(buff, RoCE_V2);
-}
-
-// Function to set GID index.
-// If the port link is IB, no GID index should be selected.
-// If Ethernet but RDMA_GID_INDEX not set gid index that supports
-//   RoCE V2 will be chosen(fails if more then one IP is configured)
-// Args:
-//   context - device context
-//   port_num - port number
-// Returns:
-//   GID index to use
-uint8_t set_gid(uint8_t port_num, ibv_context* context) {
-  ibv_port_attr port_attr;
-  string gid_str;
-  int rc, i, gids_num = 0, v2_ip_num = 0;
-  union ibv_gid gid;
-  uint8_t gid_index = 0;
-
-  rc = ibv_query_port(context, port_num, &port_attr);
-  CHECK(!rc) << "Failed to query the port" << port_num;
-
-  for (i = 0; i < port_attr.gid_tbl_len; i++) {
-    rc = ibv_query_gid(context, port_num, i, &gid);
-    CHECK(!rc) << "Failed to query gid to port " << (int)port_num << " index "
-               << i;
-    if (gid.global.interface_id) {
-      gids_num++;
-      if (gid.global.subnet_prefix == 0 &&
-          is_gid_type_roce_v2(context, port_num, i)) {
-        if (v2_ip_num == 0) {
-          // can be overwritten by RDMA_GID_INDEX later
-          gid_index = i;
-        }
-        v2_ip_num++;
-      }
-    }
-  }
-  switch (port_attr.link_layer) {
-    case(IBV_LINK_LAYER_ETHERNET) :
-      gid_str = get_env_var("RDMA_GID_INDEX");
-      if (!gid_str.empty()) {
-        gid_index = stoi(gid_str);
-        CHECK(gid_index < gids_num)
-            << "RDMA_GID_INDEX should be less than GIDs amount" << gids_num;
-      } else {
-        CHECK(v2_ip_num <= 1)
-            << "More than one IP is available, please specify GID_INDEX";
-      }
-      break;
-    case(IBV_LINK_LAYER_INFINIBAND) :  // no need in GID index
-      break;
-    default:
-      LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and "
-                   "InfiniBand only. ";
-  }
-  if (!is_gid_type_roce_v2(context, port_num, gid_index)) {
-    LOG(INFO) << "RoCE v2 is not configured for GID_INDEX " << (int)gid_index;
-  }
-  return gid_index;
-}
-
-// set the default or environment value to the configuration parameter.
-// Args:
-//   default_val- the default value for this parameter
-//   env_param- the environment parameter's name
-// Returns:
-//   32-bit value
-uint32_t set_param(uint32_t default_val, const char* env_param) {
-  uint32_t val = default_val;
-  string val_s;
-
-  val_s = get_env_var(env_param);
-
-  if (!val_s.empty()) {
-    val = stoi(val_s);
-  }
-  return val;
-}
-
-enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) {
-  ibv_port_attr port_attr;
-  enum ibv_mtu mtu;
-  string mtu_s;
-  int rc, mtu_i;
-
-  rc = ibv_query_port(context, port_num, &port_attr);
-  CHECK(!rc) << "Failed to query the port" << port_num;
-
-  mtu_s = get_env_var("RDMA_MTU");
-
-  if (!mtu_s.empty()) {
-    mtu_i = stoi(mtu_s);
-    switch (mtu_i) {
-      case 256:
-        mtu = IBV_MTU_256;
-        break;
-      case 512:
-        mtu = IBV_MTU_512;
-        break;
-      case 1024:
-        mtu = IBV_MTU_1024;
-        break;
-      case 2048:
-        mtu = IBV_MTU_2048;
-        break;
-      case 4096:
-        mtu = IBV_MTU_4096;
-        break;
-      default:
-        CHECK(0) << "Error: MTU input value must be one of the following: 256, "
-                    "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n";
-        break;
-    }
-    CHECK(mtu < port_attr.active_mtu)
-        << "MTU configuration for the QPs is larger than active MTU";
-  } else {
-    mtu = port_attr.active_mtu;
-  }
-  return mtu;
-}
-
-RdmaParams params_init(ibv_context* context) {
-  RdmaParams params;
-
-  params.port_num = set_port(context);
-  params.sgid_index = set_gid(params.port_num, context);
-  params.pkey_index = (uint8_t)set_param(PKEY_DEFAULT, "RDMA_PKEY");
-  params.queue_depth = set_param(QUEUE_DEPTH_DEFAULT, "RDMA_QUEUE_DEPTH");
-  params.timeout = (uint8_t)set_param(TIMEOUT_DEFAULT, "RDMA_TIMEOUT");
-  params.retry_cnt = (uint8_t)set_param(RETRY_CNT_DEFAULT, "RDMA_RETRY_CNT");
-  params.sl = (uint8_t)set_param(SL_DEFAULT, "RDMA_SL");
-  CHECK(params.sl <= 7) << "SL value is " << (int)params.sl
-                        << ". Valid values are 0-7.";
-  params.mtu = set_mtu(params.port_num, context);
-  params.traffic_class = set_param(TRAFFIC_CLASS, "RDMA_TRAFFIC_CLASS");
-  return params;
+  ib_dev = dev_list[0];
+  CHECK(ib_dev) << "No InfiniBand device found";
+  ibv_context* context = ibv_open_device(ib_dev);
+  CHECK(context) << "Open context failed for " << ibv_get_device_name(ib_dev);
+  return context;
 }
 
 ibv_pd* alloc_protection_domain(ibv_context* context) {
@@ -409,8 +85,7 @@ ibv_pd* alloc_protection_domain(ibv_context* context) {
 }
 
 RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env)
-    : context_(open_device(set_device())),
-      params_(params_init(context_)),
+    : context_(open_default_device()),
       pd_(alloc_protection_domain(context_)),
       worker_env_(worker_env) {
   event_channel_ = ibv_create_comp_channel(context_);
@@ -453,9 +128,9 @@ void RdmaAdapter::Process_CQ() {
     CHECK_GE(ne, 0);
     for (int i = 0; i < ne; ++i) {
       CHECK(wc_[i].status == IBV_WC_SUCCESS)
-          << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " "
-          << wc_[i].status << " " << static_cast<int>(wc_[i].wr_id) << " "
-          << wc_[i].vendor_err;
+          << "Failed status \n"
+          << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " "
+          << static_cast<int>(wc_[i].wr_id) << " " << wc_[i].vendor_err;
       if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) {
         RdmaChannel* rc = reinterpret_cast<RdmaChannel*>(wc_[i].wr_id);
         // put back a recv wr.
@@ -567,8 +242,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
     memset(&attr, 0, sizeof(ibv_qp_init_attr));
     attr.send_cq = adapter_->cq_;
     attr.recv_cq = adapter_->cq_;
-    attr.cap.max_send_wr = adapter_->params_.queue_depth;
-    attr.cap.max_recv_wr = adapter_->params_.queue_depth;
+    attr.cap.max_send_wr = RdmaAdapter::MAX_CONCURRENT_WRITES;
+    attr.cap.max_recv_wr = RdmaAdapter::MAX_CONCURRENT_WRITES;
     attr.cap.max_send_sge = 1;
     attr.cap.max_recv_sge = 1;
     attr.qp_type = IBV_QPT_RC;
@@ -582,8 +257,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
     struct ibv_qp_attr attr;
     memset(&attr, 0, sizeof(ibv_qp_attr));
     attr.qp_state = IBV_QPS_INIT;
-    attr.pkey_index = adapter_->params_.pkey_index;
-    attr.port_num = adapter_->params_.port_num;
+    attr.pkey_index = 0;
+    attr.port_num = 1;
     attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
 
     int mask =
@@ -594,15 +269,13 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
   // Local address
   {
     struct ibv_port_attr attr;
-    CHECK(
-        !ibv_query_port(adapter_->context_, adapter_->params_.port_num, &attr))
+    CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &attr))
         << "Query port";
     self_.lid = attr.lid;
     self_.qpn = qp_->qp_num;
     self_.psn = static_cast<uint32_t>(random::New64()) & 0xffffff;
     union ibv_gid gid;
-    CHECK(!ibv_query_gid(adapter_->context_, adapter_->params_.port_num,
-                         adapter_->params_.sgid_index, &gid))
+    CHECK(!ibv_query_gid(adapter_->context_, (uint8_t)1, 0, &gid))
         << "Query gid";
     self_.snp = gid.global.subnet_prefix;
     self_.iid = gid.global.interface_id;
@@ -611,7 +284,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
   // create message and ack buffers, then initialize the tables.
   {
     const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer",
-                                   "tx_ack_buffer",     "rx_ack_buffer"};
+                                   "tx_ack_buffer", "rx_ack_buffer"};
     tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]);
     rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]);
     tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]);
@@ -672,7 +345,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) {
 void RdmaChannel::Recv() {
   struct ibv_recv_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t) this;
+  wr.wr_id = (uint64_t)this;
   struct ibv_recv_wr* bad_wr;
   CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv";
 }
@@ -806,9 +479,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     struct ibv_qp_attr attr;
     memset(&attr, 0, sizeof(ibv_qp_attr));
     attr.qp_state = IBV_QPS_RTR;
-
+    struct ibv_port_attr port_attr;
+    CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &port_attr))
+        << "Query port failed";
     // This assumes both QP's ports are configured with the same MTU
-    attr.path_mtu = adapter_->params_.mtu;
+    attr.path_mtu = port_attr.active_mtu;
     attr.dest_qp_num = remoteAddr.qpn;
     attr.rq_psn = remoteAddr.psn;
     attr.max_dest_rd_atomic = 1;
@@ -819,32 +494,30 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.ah_attr.grh.flow_label = 0;
     attr.ah_attr.grh.hop_limit = 255;
     attr.ah_attr.dlid = remoteAddr.lid;
-    attr.ah_attr.sl = adapter_->params_.sl;
+    attr.ah_attr.sl = 0;
     attr.ah_attr.src_path_bits = 0;
-    attr.ah_attr.port_num = adapter_->params_.port_num;
-    attr.ah_attr.grh.sgid_index = adapter_->params_.sgid_index;
-    attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class;
+    attr.ah_attr.port_num = 1;
 
     int r;
-    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV |
-                                              IBV_QP_PATH_MTU |
-                                              IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
-                                              IBV_QP_MAX_DEST_RD_ATOMIC |
-                                              IBV_QP_MIN_RNR_TIMER)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr,
+                              IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU |
+                                  IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
+                                  IBV_QP_MAX_DEST_RD_ATOMIC |
+                                  IBV_QP_MIN_RNR_TIMER)))
         << "QP to Ready to Receive " << r;
 
     memset(&attr, 0, sizeof(ibv_qp_attr));
     attr.qp_state = IBV_QPS_RTS;
     attr.sq_psn = self_.psn;
-    attr.timeout = adapter_->params_.timeout;
-    attr.retry_cnt = adapter_->params_.retry_cnt;
+    attr.timeout = 14;
+    attr.retry_cnt = 7;
     attr.rnr_retry = 7; /* infinite */
     attr.max_rd_atomic = 1;
 
-    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT |
-                                              IBV_QP_RETRY_CNT |
-                                              IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
-                                              IBV_QP_MAX_QP_RD_ATOMIC)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr,
+                              IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT |
+                                  IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
+                                  IBV_QP_MAX_QP_RD_ATOMIC)))
         << "QP to Ready to Send " << r;
 
     connected_ = true;
@@ -931,7 +604,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) {
 
   struct ibv_send_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t) this;
+  wr.wr_id = (uint64_t)this;
   wr.sg_list = &list;
   wr.num_sge = 1;
   wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
@@ -1026,9 +699,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
     TensorProto proto;
     if (src_dev->tensorflow_gpu_device_info() &&
         (!send_args.alloc_attrs.on_host())) {
-      CHECK(send_args.device_context) << "send dev name: " << src_dev->name()
-                                      << " gpu_info: "
-                                      << src_dev->tensorflow_gpu_device_info();
+      CHECK(send_args.device_context)
+          << "send dev name: " << src_dev->name()
+          << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
 
       if (can_memcpy) {
         AllocatorAttributes host_alloc_attrs;
@@ -1054,8 +727,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
         // aync instead
         GPUUtil::SetProtoFromGPU(
             in, src_dev, send_args.device_context, &proto, is_dead,
-	    [this, proto, buffer_size, key, in, step_id, key_with_step_id,
-            is_dead, send_args, recv_args](const Status& s) mutable {
+            [this, proto, buffer_size, key, in, step_id, key_with_step_id,
+             is_dead, send_args, recv_args](const Status& s) mutable {
               CHECK(s.ok()) << "copy proto from gpu sync";
               auto tensor_bytes = proto.ByteSize();
               buffer_size += tensor_bytes;
diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h
index 52d92a7c5b..e1e07db776 100644
--- a/tensorflow/contrib/verbs/rdma.h
+++ b/tensorflow/contrib/verbs/rdma.h
@@ -36,24 +36,7 @@ limitations under the License.
 #include "tensorflow/core/platform/mutex.h"
 
 namespace tensorflow {
-#define PKEY_DEFAULT 0
-#define QUEUE_DEPTH_DEFAULT 1024
-#define TIMEOUT_DEFAULT 14
-#define RETRY_CNT_DEFAULT 7
-#define SL_DEFAULT 0
-#define TRAFFIC_CLASS 0
-
-struct RdmaParams {
-  uint8_t port_num;
-  uint8_t sgid_index;
-  uint8_t pkey_index;
-  uint32_t queue_depth;
-  uint8_t timeout;
-  uint8_t retry_cnt;
-  uint8_t sl;
-  enum ibv_mtu mtu;
-  uint8_t traffic_class;
-};
+
 // structure to save the address of remote channels.
 struct RdmaAddress {
   uint32_t lid;
@@ -67,20 +50,9 @@ struct RemoteMR {
   uint64_t remote_addr;
   uint32_t rkey;
 };
-enum BufferStatus {
-  none,
-  idle,
-  busy
-};
-enum Location {
-  local,
-  remote
-};
-enum BufferType {
-  ACK,
-  MESSAGE,
-  TENSOR
-};
+enum BufferStatus { none, idle, busy };
+enum Location { local, remote };
+enum BufferType { ACK, MESSAGE, TENSOR };
 enum RdmaMessageType {
   RDMA_MESSAGE_ACK,
   RDMA_MESSAGE_BUFFER_IDLE,
@@ -112,8 +84,6 @@ class RdmaAdapter {
  protected:
   static const int MAX_CONCURRENT_WRITES = 1000;
   ibv_context* context_;
-  // RDMA configuration parameters
-  RdmaParams params_;
   // ibverbs protection domain
   ibv_pd* pd_;
   // Completion event channel, to wait for work completions
@@ -213,7 +183,7 @@ class RdmaBuffer {
   }
   void FreeBuffer();
   void EnqueueItem(string Item);
-  virtual void SendNextItem() {};
+  virtual void SendNextItem(){};
   void CreateCPUBuffer(size_t size, bool lock = true);
   void SetRemoteMR(RemoteMR rmi, bool override);
   uint32_t LookupBufferIndex(const string& buffer_name) {
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 30ff4ef358..d71f314e11 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2710,7 +2710,6 @@ tf_cc_test_mkl(
     srcs = [
         "graph/mkl_layout_pass_test.cc",
         "graph/mkl_tfconversion_pass_test.cc",
-        "util/mkl_util_test.cc",
     ],
     linkstatic = 1,
     deps = [
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 63b74e8dbf..53e80b1ee3 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -81,7 +81,7 @@ class MklCPUAllocator : public Allocator {
       }
 #if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
       if (user_val > max_mem_bytes) {
-        LOG(WARNING) << "The user specified a memory limit " << kMaxLimitStr
+        LOG(WARNING) << "The user specifed a memory limit " << kMaxLimitStr
                      << "=" << user_val
                      << " greater than available physical memory: "
                      << max_mem_bytes
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h
index cc272d156e..9caa076c72 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.h
@@ -46,8 +46,8 @@ class GSYCLInterface {
 
     if (!found_device) {
       // Currently Intel GPU is not supported
-      LOG(WARNING) << "No OpenCL GPU found that is supported by "
-                   << "ComputeCpp/triSYCL, trying OpenCL CPU";
+      LOG(WARNING) << "No OpenCL GPU found that is supported by ComputeCpp, "
+                      "trying OpenCL CPU";
     }
 
     for (const auto& device : device_list) {
@@ -58,24 +58,10 @@ class GSYCLInterface {
       }
     }
 
-    if (!found_device) {
-      LOG(WARNING) << "No OpenCL CPU found that is supported by "
-                   << "ComputeCpp/triSYCL, checking for host sycl device";
-    }
-
-    for (const auto& device : device_list) {
-      // triSYCL only supports the host device for now
-      if (device.is_host()) {
-        LOG(WARNING) << "Found SYCL host device";
-        AddDevice(device);
-        found_device = true;
-      }
-    }
-
     if (!found_device) {
       // Currently Intel GPU is not supported
-      LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU"
-                 << " supported by ComputeCPP/triSYCL was found";
+      LOG(FATAL)
+          << "No OpenCL GPU nor CPU found that is supported by ComputeCpp";
     } else {
       LOG(INFO) << "Found following OpenCL devices:";
       for (int i = 0; i < device_list.size(); i++) {
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index fd1b5d33b9..87c41186d5 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -453,21 +453,6 @@ const Edge* Graph::AddControlEdge(Node* source, Node* dest,
   return AddEdge(source, kControlSlot, dest, kControlSlot);
 }
 
-void Graph::RemoveControlEdge(const Edge* e) {
-  if (!e->src_->IsSource() && !e->dst_->IsSink()) {
-    e->dst_->MaybeCopyOnWrite();
-    std::string e_src_name = strings::StrCat("^", e->src_->name());
-    auto* inputs = e->dst_->props_->node_def.mutable_input();
-    for (auto it = inputs->begin(); it != inputs->end(); ++it) {
-      if (*it == e_src_name) {
-        inputs->erase(it);
-        break;
-      }
-    }
-  }
-  RemoveEdge(e);
-}
-
 Status Graph::UpdateEdge(Node* new_src, int new_src_index, Node* dst,
                          int dst_index) {
   TF_RETURN_IF_ERROR(IsValidOutputTensor(new_src, new_src_index));
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index d0dba6e1f0..c5dde722fa 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -451,11 +451,6 @@ class Graph {
   // REQUIRES: The edge must exist.
   void RemoveEdge(const Edge* edge);
 
-  // Removes control edge `edge` from the graph. Note that this also updates
-  // the corresponding NodeDef to reflect the change.
-  // REQUIRES: The control edge must exist.
-  void RemoveControlEdge(const Edge* e);
-  
   // Updates the input to a node.  The existing edge to `dst` is removed and an
   // edge from `new_src` to `dst` is created. The NodeDef associated with `dst`
   // is also updated.
diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index 1924c05d3d..b9e3cba035 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -117,7 +117,7 @@ DataType EdgeType(const Edge* e) {
   }
 }
 
-// Return true iff we need to add the same device send/recv for 'edge'.
+// Return true iff we need to add a same device send/recv for 'edge'.
 bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) {
   if (edge->IsControlEdge()) {
     return false;
@@ -1116,7 +1116,7 @@ Status Partition(const PartitionOptions& opts, Graph* g,
         // before the data is available.
         AddInput(real_recv, send->name(), Graph::kControlSlot);
       } else if (control_flow_edge != nullptr) {
-        // Redirect control edge to the real recv since this is not the same
+        // Redirect control edge to the real recv since this is not a same
         // device send/recv.
         --num_control_flow_edges;
         AddInput(real_recv, control_flow_edge->src()->name(),
diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc
index 2aa1b31e15..7686cef219 100644
--- a/tensorflow/core/graph/graph_test.cc
+++ b/tensorflow/core/graph/graph_test.cc
@@ -118,25 +118,6 @@ class GraphTest : public ::testing::Test {
     LOG(FATAL) << name;
   }
 
-  bool ControlEdgeExistsInGraphOrNodeDef(const Node* src,
-                                         const Node* dst) {
-    for (const Edge *e : dst->in_edges()) {
-      if (e->IsControlEdge() &&
-          e->src() == src &&
-          e->src_output() == Graph::kControlSlot &&
-          e->dst_input() == Graph::kControlSlot) {
-        return true;
-      }
-    }
-    std::string control_edge_name = strings::StrCat("^", src->name());
-    for (int i = 0; i < dst->def().input_size(); ++i) {
-      if (dst->def().input(i) == control_edge_name) {
-        return true;
-      }
-    }
-    return false;
-  }
-
   Graph graph_;
 
  private:
@@ -477,8 +458,8 @@ TEST_F(GraphTest, AddControlEdge) {
   EXPECT_TRUE(edge == nullptr);
   EXPECT_EQ(b->def().input_size(), 2);
 
-  // Can add redundant control edge with allow_duplicates.
-  edge = graph_.AddControlEdge(a, b, /*allow_duplicates=*/true);
+  // Can add redundant control edge with create_duplicate.
+  edge = graph_.AddControlEdge(a, b, /*create_duplicate=*/true);
   EXPECT_TRUE(edge != nullptr);
   // create_duplicate causes the NodeDef not to be updated.
   ASSERT_EQ(b->def().input_size(), 2);
@@ -496,47 +477,6 @@ TEST_F(GraphTest, AddControlEdge) {
   EXPECT_EQ(b->def().input_size(), 2);
 }
 
-TEST_F(GraphTest, RemoveControlEdge) {
-  FromGraphDef(
-      "node { name: 'A' op: 'OneOutput' }"
-      "node { name: 'B' op: 'OneInputTwoOutputs' input: [ 'A:0' ] }"
-      "node { name: 'C' op: 'NoOp' } ");
-  Node* a = FindNode("A");
-  Node* b = FindNode("B");
-  Node* c = FindNode("C");
-
-  // Add a control edge.
-  const Edge* edge_1 = graph_.AddControlEdge(c, a);
-  const Edge* edge_2 = graph_.AddControlEdge(a, b);
-  ASSERT_TRUE(edge_1 != nullptr);
-  ASSERT_TRUE(edge_2 != nullptr);
-
-  ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(c, a));
-  ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b));
-
-  graph_.RemoveControlEdge(edge_1);
-  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a));
-  ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b));
-
-  graph_.RemoveControlEdge(edge_2);
-  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a));
-  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(a, b));
-
-  // Test removing a duplicate control edge.
-  // Note that unless allow_duplicates is true, the duplicate edge
-  // will not be added. That's why we expect edge_4 to be a null
-  // pointer. We are not testing with allow_duplicates set to true,
-  // as that is a highly unlikely use case that does not make much
-  // sense.
-  const Edge* edge_3 = graph_.AddControlEdge(c, a);
-  const Edge* edge_4 = graph_.AddControlEdge(c, a);
-  ASSERT_TRUE(edge_3 != nullptr);
-  ASSERT_TRUE(edge_4 == nullptr);
-
-  graph_.RemoveControlEdge(edge_3);
-  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a));
-}
-
 TEST_F(GraphTest, UpdateEdge) {
   // Build a little graph
   Node* a = FromNodeDef("A", "OneOutput", 0);
diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index 880e4e712e..cb32d64334 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -21,108 +21,107 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 
 namespace tensorflow {
-  // Since our ops are going to produce and also consume N addition tensors
-  // (Mkl) for N Tensorflow tensors, we can have following different
-  // orderings among these 2N tensors.
-  //
-  // E.g., for Tensorflow tensors A, B, and C, our ops will produce and
-  // consume A_m, B_m, and C_m additionally.
-  //
-  // INTERLEAVED: in this case 2N tensors are interleaved. So for above
-  //              example, the ordering looks like: A, A_m, B, B_m, C, C_m.
-  //
-  // CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed
-  //             by N Mkl tensors. So for above example, the ordering looks
-  //             like: A, B, C, A_m, B_m, C_m
-  //
-  // Following APIs map index of original Tensorflow tensors to their
-  // appropriate position based on selected ordering. For contiguous ordering,
-  // we need to know the total number of tensors (parameter total).
-  //
-  typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering;
-  // NOTE: Currently, we use contiguous ordering. If you change this, then you
-  // would need to change Mkl op definitions in nn_ops.cc.
-  static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS;
+// Since our ops are going to produce and also consume N addition tensors
+// (Mkl) for N Tensorflow tensors, we can have following different
+// orderings among these 2N tensors.
+//
+// E.g., for Tensorflow tensors A, B, and C, our ops will produce and
+// consume A_m, B_m, and C_m additionally.
+//
+// INTERLEAVED: in this case 2N tensors are interleaved. So for above
+//              example, the ordering looks like: A, A_m, B, B_m, C, C_m.
+//
+// CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed
+//             by N Mkl tensors. So for above example, the ordering looks
+//             like: A, B, C, A_m, B_m, C_m
+//
+// Following APIs map index of original Tensorflow tensors to their
+// appropriate position based on selected ordering. For contiguous ordering,
+// we need to know the total number of tensors (parameter total).
+//
+typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering;
+// NOTE: Currently, we use contiguous ordering. If you change this, then you
+// would need to change Mkl op definitions in nn_ops.cc.
+static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS;
 
-  // Get index of MetaData tensor from index 'n' of Data tensor.
-  inline int DataIndexToMetaDataIndex(int n, int total_tensors) {
-    if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-      // For interleaved ordering, Mkl tensor follows immediately after
-      // Tensorflow tensor.
-      return n + 1;
-    } else {
-      CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-      // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away.
-      return n + total_tensors / 2;
-    }
+// Get index of MetaData tensor from index 'n' of Data tensor.
+inline int DataIndexToMetaDataIndex(int n, int total_tensors) {
+  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
+    // For interleaved ordering, Mkl tensor follows immediately after
+    // Tensorflow tensor.
+    return n + 1;
+  } else {
+    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+    // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away.
+    return n + total_tensors / 2;
   }
+}
 
-  int inline GetTensorDataIndex(int n, int total_tensors) {
-      if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-        return 2 * n;  // index corresponding to nth input/output tensor
-      } else {
-        CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-        return n;
-      }
-    }
+int inline GetTensorDataIndex(int n, int total_tensors) {
+  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
+    return 2 * n;  // index corresponding to nth input/output tensor
+  } else {
+    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+    return n;
+  }
+}
 
-  int inline GetTensorMetaDataIndex(int n, int total_tensors) {
-      // Get index for TensorData first and then use mapping function
-      // to get TensorMetaData index from TensorData index.
-      int tidx = GetTensorDataIndex(n, total_tensors);
-      return DataIndexToMetaDataIndex(tidx, total_tensors);
-    }
+int inline GetTensorMetaDataIndex(int n, int total_tensors) {
+  // Get index for TensorData first and then use mapping function
+  // to get TensorMetaData index from TensorData index.
+  int tidx = GetTensorDataIndex(n, total_tensors);
+  return DataIndexToMetaDataIndex(tidx, total_tensors);
+}
 
 namespace mkl_op_registry {
-  static const char* kMklOpLabel = "MklOp";
-  static const char* kMklOpLabelPattern = "label='MklOp'";
+static const char* kMklOpLabel = "MklOp";
+static const char* kMklOpLabelPattern = "label='MklOp'";
 
-  // Get the name of Mkl op from original TensorFlow op
-  // We prefix 'Mkl' to the original op to get Mkl op.
-  inline string GetMklOpName(const string& name) {
-    // Prefix that we add to Tensorflow op name to construct Mkl op name.
-    const char* const kMklOpPrefix = "_Mkl";
-    return string(kMklOpPrefix) + name;
-  }
+// Get the name of Mkl op from original TensorFlow op
+// We prefix 'Mkl' to the original op to get Mkl op.
+inline string GetMklOpName(const string& name) {
+  // Prefix that we add to Tensorflow op name to construct Mkl op name.
+  const char* const kMklOpPrefix = "_Mkl";
+  return string(kMklOpPrefix) + name;
+}
 
-  // Check whether opname with type T is registered as MKL-compliant.
-  //
-  // @input: name of the op
-  // @input: T datatype to be used for checking op
-  // @return: true if opname is registered as Mkl op; false otherwise
-  static inline bool IsMklOp(const std::string& op_name, DataType T) {
-    string kernel = KernelsRegisteredForOp(op_name);
-    bool result =
-        kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
-    if (result) {
-      VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
-    }
-    return result;
+// Check whether opname with type T is registered as MKL-compliant.
+//
+// @input: name of the op
+// @input: T datatype to be used for checking op
+// @return: true if opname is registered as Mkl op; false otherwise
+static inline bool IsMklOp(const std::string& op_name, DataType T) {
+  string kernel = KernelsRegisteredForOp(op_name);
+  bool result =
+      kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
+  if (result) {
+    VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
   }
+  return result;
+}
 
-  // Check whether opname with type T is registered as MKL-compliant and
-  // is element-wise.
-  //
-  // @input: name of the op
-  // @input: T datatype to be used for checking op
-  // @return: true if opname is registered as element-wise Mkl op;
-  // false otherwise
-  static inline bool IsMklElementWiseOp(const std::string& op_name,
-    DataType T) {
-    if (!IsMklOp(op_name, T)) {
-      return false;
-    }
+// Check whether opname with type T is registered as MKL-compliant and
+// is element-wise.
+//
+// @input: name of the op
+// @input: T datatype to be used for checking op
+// @return: true if opname is registered as element-wise Mkl op;
+// false otherwise
+static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) {
+  if (!IsMklOp(op_name, T)) {
+    return false;
+  }
 
-    bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
-                    0 == op_name.compare(GetMklOpName("Sub")) ||
-                    0 == op_name.compare(GetMklOpName("Mul")) ||
-                    0 == op_name.compare(GetMklOpName("Maximum")) ||
-                    0 == op_name.compare(GetMklOpName("SquaredDifference")));
+  bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
+                 0 == op_name.compare(GetMklOpName("Sub")) ||
+                 0 == op_name.compare(GetMklOpName("Mul")) ||
+                 0 == op_name.compare(GetMklOpName("Maximum")) ||
+                 0 == op_name.compare(GetMklOpName("SquaredDifference")));
 
-    VLOG(1) << "mkl_op_registry::" << op_name
-            << " is elementwise MKL op: " << result;
-    return result;
-  }
+  VLOG(1) << "mkl_op_registry::" << op_name
+          << " is elementwise MKL op: " << result;
+  return result;
+}
 }  // namespace mkl_op_registry
 }  // namespace tensorflow
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 912075aa28..f4c9073dee 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -37,8 +37,8 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/tensor_format.h"
 
-#include "tensorflow/core/graph/mkl_layout_pass.h"
 #include "tensorflow/core/graph/mkl_graph_util.h"
+#include "tensorflow/core/graph/mkl_layout_pass.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index 599bb88f01..fe4588389e 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -33,8 +33,8 @@ limitations under the License.
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
 
-#include "tensorflow/core/graph/mkl_tfconversion_pass.h"
 #include "tensorflow/core/graph/mkl_graph_util.h"
+#include "tensorflow/core/graph/mkl_tfconversion_pass.h"
 
 namespace tensorflow {
 
@@ -68,7 +68,7 @@ namespace tensorflow {
 // take place before we hit the op. For this, we add a new op before each
 // element-wise MKL op to deal with the inputs, called _MklInputConversion.
 // This pass has been enhanced to add this capability.
-//
+// 
 // The _MklInputConversion op will check the inputs to the elementwise op and
 // make sure that either both are in MKL format or both are in TF format,
 // depending on their initial state and whether broadcast is needed or not.
diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index 239b5ac244..ee279b7e0a 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -58,12 +58,6 @@ class GraphProperties {
   const std::vector<OpInfo::TensorProperties>& GetOutputProperties(
       const string& node_name) const;
 
-  static void FillTensorPropertiesFromContext(
-      const shape_inference::ShapeHandle&, const DataType&,
-      shape_inference::InferenceContext*,
-      std::unordered_map<const shape_inference::Dimension*, int>* dim_ids,
-      OpInfo::TensorProperties*);
-
  private:
   // Inputs
   GrapplerItem item_;
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 9ab889beb5..7fd1876371 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -62,7 +62,7 @@ const std::set<NodeDef*>& NodeMap::GetOutputs(const string& node_name) const {
 void NodeMap::AddNode(const string& name, NodeDef* node) {
   auto ret = nodes_.insert(std::make_pair(name, node));
   CHECK(ret.second) << "Pair (" << name << "," << node
-                    << ") is not inserted because the same key already exists.";
+                    << ") is not inserted because a same key already exists.";
 }
 
 void NodeMap::AddOutput(const string& node_name, const string& output_name) {
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index f1cb9a1860..d7b457eab7 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -929,25 +929,6 @@ tf_cc_test(
     ],
 )
 
-tf_cuda_cc_test(
-    name = "bincount_op_test",
-    size = "small",
-    srcs = ["bincount_op_test.cc"],
-    deps = [
-        ":bincount_op",
-        ":ops_testutil",
-        ":ops_util",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-    ],
-)
-
 tf_cuda_cc_test(
     name = "constant_op_test",
     size = "small",
@@ -1636,10 +1617,7 @@ DYNAMIC_DEPS = [
 tf_kernel_library(
     name = "dynamic_partition_op",
     prefix = "dynamic_partition_op",
-    deps = DYNAMIC_DEPS + [
-        ":fill_functor",
-        ":gather_functor",
-    ] + if_cuda(["@cub_archive//:cub"]),
+    deps = DYNAMIC_DEPS,
 )
 
 tf_kernel_library(
@@ -1709,7 +1687,7 @@ tf_kernel_library(
     ],
 )
 
-tf_cuda_cc_tests(
+tf_cc_tests(
     name = "dynamic_op_test",
     size = "small",
     srcs = [
@@ -2594,9 +2572,8 @@ tf_kernel_library(
 
 tf_kernel_library(
     name = "bucketize_op",
-    gpu_srcs = ["cuda_device_array.h"],
     prefix = "bucketize_op",
-    deps = ARRAY_DEPS,
+    deps = MATH_DEPS,
 )
 
 tf_kernel_library(
@@ -3197,7 +3174,7 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//third_party/eigen3",
-    ] + if_cuda(["@cub_archive//:cub"]),
+    ],
 )
 
 tf_kernel_library(
diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc
index f918023693..af629d0de8 100644
--- a/tensorflow/core/kernels/avgpooling_op.cc
+++ b/tensorflow/core/kernels/avgpooling_op.cc
@@ -153,8 +153,7 @@ class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> {
     if (data_format_ == FORMAT_NCHW) {
       DnnPoolingOp<T>::Compute(
           context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
-          stride_, padding_, data_format_, tensor_in, output_shape,
-          /*propagate_nans=*/false);
+          stride_, padding_, data_format_, tensor_in, output_shape);
     } else {
       Tensor* output = nullptr;
       OP_REQUIRES_OK(context,
@@ -409,7 +408,7 @@ class AvgPoolingGradOp<GPUDevice, T> : public OpKernel {
     DnnPoolingGradOp<T>::Compute(
         context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
         stride_, padding_, data_format_, nullptr, nullptr, out_backprop,
-        output_shape, /*propagate_nans=*/false);
+        output_shape);
   }
 
  private:
@@ -533,7 +532,7 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
       DnnPoolingGradOp<T>::Compute(
           context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
           stride_, padding_, data_format_, nullptr, nullptr, out_backprop,
-          output_shape, /*propagate_nans=*/false);
+          output_shape);
     }
   }
 
diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc
index 766d63e3be..1cd5943ef3 100644
--- a/tensorflow/core/kernels/bincount_op.cc
+++ b/tensorflow/core/kernels/bincount_op.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/bincount_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/types.h"
@@ -28,37 +27,46 @@ namespace tensorflow {
 
 using thread::ThreadPool;
 
-typedef Eigen::ThreadPoolDevice CPUDevice;
-typedef Eigen::GpuDevice GPUDevice;
-
-namespace functor {
-
 template <typename T>
-struct BincountFunctor<CPUDevice, T> {
-  static Status Compute(OpKernelContext* context,
-                        const typename TTypes<int32, 1>::ConstTensor& arr,
-                        const typename TTypes<T, 1>::ConstTensor& weights,
-                        typename TTypes<T, 1>::Tensor& output) {
-    int size = output.size();
+class BincountOp : public OpKernel {
+ public:
+  explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& arr_t = ctx->input(0);
+    const Tensor& size_tensor = ctx->input(1);
+    const Tensor& weights_t = ctx->input(2);
+    int32 size = size_tensor.scalar<int32>()();
+    OP_REQUIRES(
+        ctx, size >= 0,
+        errors::InvalidArgument("size (", size, ") must be non-negative"));
+    const bool has_weights = weights_t.NumElements() > 0;
+    OP_REQUIRES(ctx, !(has_weights && arr_t.shape() != weights_t.shape()),
+                errors::InvalidArgument(
+                    "If weights are passed, they must have the same shape (" +
+                    weights_t.shape().DebugString() + ") as arr (" +
+                    arr_t.shape().DebugString() + ")"));
+    const auto arr = arr_t.flat<int32>();
+    const auto weights = weights_t.flat<T>();
 
     Tensor all_nonneg_t;
-    TF_RETURN_IF_ERROR(context->allocate_temp(
-        DT_BOOL, TensorShape({}), &all_nonneg_t, AllocatorAttributes()));
-    all_nonneg_t.scalar<bool>().device(context->eigen_cpu_device()) =
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_temp(DT_BOOL, TensorShape({}), &all_nonneg_t,
+                                      AllocatorAttributes()));
+    all_nonneg_t.scalar<bool>().device(ctx->eigen_cpu_device()) =
         (arr >= 0).all();
-    if (!all_nonneg_t.scalar<bool>()()) {
-      return errors::InvalidArgument("Input arr must be non-negative!");
-    }
+    OP_REQUIRES(ctx, all_nonneg_t.scalar<bool>()(),
+                errors::InvalidArgument("Input arr must be non-negative!"));
 
     // Allocate partial output bin sums for each worker thread. Worker ids in
     // ParallelForWithWorkerId range from 0 to NumThreads() inclusive.
     ThreadPool* thread_pool =
-        context->device()->tensorflow_cpu_worker_threads()->workers;
+        ctx->device()->tensorflow_cpu_worker_threads()->workers;
     const int64 num_threads = thread_pool->NumThreads() + 1;
     Tensor partial_bins_t;
-    TF_RETURN_IF_ERROR(context->allocate_temp(DataTypeToEnum<T>::value,
-                                              TensorShape({num_threads, size}),
-                                              &partial_bins_t));
+    OP_REQUIRES_OK(ctx, ctx->allocate_temp(weights_t.dtype(),
+                                           TensorShape({num_threads, size}),
+                                           &partial_bins_t));
     auto partial_bins = partial_bins_t.matrix<T>();
     partial_bins.setZero();
     thread_pool->ParallelForWithWorkerId(
@@ -67,7 +75,7 @@ struct BincountFunctor<CPUDevice, T> {
           for (int64 i = start_ind; i < limit_ind; i++) {
             int32 value = arr(i);
             if (value < size) {
-              if (weights.size()) {
+              if (has_weights) {
                 partial_bins(worker_id, value) += weights(i);
               } else {
                 // Complex numbers don't support "++".
@@ -76,62 +84,25 @@ struct BincountFunctor<CPUDevice, T> {
             }
           }
         });
-
+    TensorShape output_shape({size});
+    Tensor* output_t;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_t));
     // Sum the partial bins along the 0th axis.
     Eigen::array<int, 1> reduce_dims({0});
-    output.device(context->eigen_cpu_device()) = partial_bins.sum(reduce_dims);
-    return Status::OK();
-  }
-};
-
-}  // namespace functor
-
-template <typename Device, typename T>
-class BincountOp : public OpKernel {
- public:
-  explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& arr_t = ctx->input(0);
-    const Tensor& size_tensor = ctx->input(1);
-    const Tensor& weights_t = ctx->input(2);
-
-    int32 size = size_tensor.scalar<int32>()();
-    OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument(
-                                    "size (", size, ") must be non-negative"));
-
-    const auto arr = arr_t.flat<int32>();
-    const auto weights = weights_t.flat<T>();
-    Tensor* output_t;
-    OP_REQUIRES_OK(ctx,
-                   ctx->allocate_output(0, TensorShape({size}), &output_t));
-    auto output = output_t->flat<T>();
-    OP_REQUIRES_OK(ctx, functor::BincountFunctor<Device, T>::Compute(
-                            ctx, arr, weights, output));
+    output_t->flat<T>().device(ctx->eigen_cpu_device()) =
+        partial_bins.sum(reduce_dims);
   }
 };
 
-#define REGISTER_KERNELS(type)                                       \
+#define REGISTER(TYPE)                                               \
   REGISTER_KERNEL_BUILDER(                                           \
-      Name("Bincount").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
-      BincountOp<CPUDevice, type>)
-
-TF_CALL_NUMBER_TYPES(REGISTER_KERNELS);
-#undef REGISTER_KERNELS
-
-#if GOOGLE_CUDA
-
-#define REGISTER_KERNELS(type)                            \
-  REGISTER_KERNEL_BUILDER(Name("Bincount")                \
-                              .Device(DEVICE_GPU)         \
-                              .HostMemory("size")         \
-                              .TypeConstraint<type>("T"), \
-                          BincountOp<GPUDevice, type>)
+      Name("Bincount").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
+      BincountOp<TYPE>)
 
-TF_CALL_int32(REGISTER_KERNELS);
-TF_CALL_float(REGISTER_KERNELS);
-#undef REGISTER_KERNELS
+TF_CALL_NUMBER_TYPES(REGISTER);
 
-#endif  // GOOGLE_CUDA
+// TODO(ringwalt): Add a GPU implementation. We probably want to take a
+// different approach, e.g. threads in a warp each taking a pass over the same
+// data, and each thread summing a single bin.
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h
deleted file mode 100644
index 0f8dd2b82a..0000000000
--- a/tensorflow/core/kernels/bincount_op.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_BINCOUNT_OP_H_
-#define TENSORFLOW_BINCOUNT_OP_H_
-
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-
-namespace tensorflow {
-
-namespace functor {
-
-template <typename Device, typename T>
-struct BincountFunctor {
-  static Status Compute(OpKernelContext* context,
-                        const typename TTypes<int32, 1>::ConstTensor& arr,
-                        const typename TTypes<T, 1>::ConstTensor& weights,
-                        typename TTypes<T, 1>::Tensor& output);
-};
-
-}  // end namespace functor
-
-}  // end namespace tensorflow
-
-#endif  // TENSORFLOW_BINCOUNT_OP_H_
diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
deleted file mode 100644
index ae9e26ffdf..0000000000
--- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if GOOGLE_CUDA
-
-#define EIGEN_USE_GPU
-
-#include "tensorflow/core/kernels/bincount_op.h"
-#include "external/cub_archive/cub/device/device_histogram.cuh"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/util/cuda_kernel_helper.h"
-
-namespace tensorflow {
-
-typedef Eigen::GpuDevice GPUDevice;
-
-namespace functor {
-
-template <typename T>
-struct BincountFunctor<GPUDevice, T> {
-  static Status Compute(OpKernelContext* context,
-                        const typename TTypes<int32, 1>::ConstTensor& arr,
-                        const typename TTypes<T, 1>::ConstTensor& weights,
-                        typename TTypes<T, 1>::Tensor& output) {
-    if (weights.size() != 0) {
-      return errors::InvalidArgument(
-          "Weights should not be passed as it should be "
-          "handled by unsorted_segment_sum");
-    }
-    if (output.size() == 0) {
-      return Status::OK();
-    }
-    // In case weight.size() == 0, use CUB
-    size_t temp_storage_bytes = 0;
-    const int32* d_samples = arr.data();
-    T* d_histogram = output.data();
-    int num_levels = output.size() + 1;
-    int32 lower_level = 0;
-    int32 upper_level = output.size();
-    int num_samples = arr.size();
-    const cudaStream_t& stream = GetCudaStream(context);
-
-    // The first HistogramEven is to obtain the temp storage size required
-    // with d_temp_storage = NULL passed to the call.
-    auto err = cub::DeviceHistogram::HistogramEven(
-        /* d_temp_storage */ NULL,
-        /* temp_storage_bytes */ temp_storage_bytes,
-        /* d_samples */ d_samples,
-        /* d_histogram */ d_histogram,
-        /* num_levels */ num_levels,
-        /* lower_level */ lower_level,
-        /* upper_level */ upper_level,
-        /* num_samples */ num_samples,
-        /* stream */ stream);
-    if (err != cudaSuccess) {
-      return errors::Internal(
-          "Could not launch HistogramEven to get temp storage: ",
-          cudaGetErrorString(err), ".");
-    }
-    Tensor temp_storage;
-    TF_RETURN_IF_ERROR(context->allocate_temp(
-        DataTypeToEnum<int8>::value,
-        TensorShape({static_cast<int64>(temp_storage_bytes)}), &temp_storage));
-
-    void* d_temp_storage = temp_storage.flat<int8>().data();
-    // The second HistogramEven is to actual run with d_temp_storage
-    // allocated with temp_storage_bytes.
-    err = cub::DeviceHistogram::HistogramEven(
-        /* d_temp_storage */ d_temp_storage,
-        /* temp_storage_bytes */ temp_storage_bytes,
-        /* d_samples */ d_samples,
-        /* d_histogram */ d_histogram,
-        /* num_levels */ num_levels,
-        /* lower_level */ lower_level,
-        /* upper_level */ upper_level,
-        /* num_samples */ num_samples,
-        /* stream */ stream);
-    if (err != cudaSuccess) {
-      return errors::Internal("Could not launch HistogramEven: ",
-                              cudaGetErrorString(err), ".");
-    }
-    return Status::OK();
-  }
-};
-
-}  // end namespace functor
-
-#define REGISTER_GPU_SPEC(type) \
-  template struct functor::BincountFunctor<GPUDevice, type>;
-
-TF_CALL_int32(REGISTER_GPU_SPEC);
-TF_CALL_float(REGISTER_GPU_SPEC);
-#undef REGISTER_GPU_SPEC
-
-}  // namespace tensorflow
-
-#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc
deleted file mode 100644
index 14becc87a7..0000000000
--- a/tensorflow/core/kernels/bincount_op_test.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/graph/node_builder.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/platform/test_benchmark.h"
-
-namespace tensorflow {
-
-static Graph* Bincount(int arr_size, int nbins) {
-  Graph* g = new Graph(OpRegistry::Global());
-
-  Tensor arr(DT_INT32, TensorShape({arr_size}));
-  arr.flat<int32>() = arr.flat<int32>().setRandom().abs();
-
-  Tensor size(DT_INT32, TensorShape({(int32)1}));
-  size.flat<int32>()(0) = (int32)nbins;
-
-  Tensor weights(DT_INT32, TensorShape({0}));
-
-  Node* node;
-  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Bincount")
-                  .Input(test::graph::Constant(g, arr))
-                  .Input(test::graph::Constant(g, size))
-                  .Input(test::graph::Constant(g, weights))
-                  .Attr("T", DT_INT32)
-                  .Finalize(g, &node));
-  return g;
-}
-
-#define BM_BincountDev(K, NBINS, type)                             \
-  static void BM_Bincount##_##type##_##K##_##NBINS(int iters) {    \
-    testing::ItemsProcessed(static_cast<int64>(iters) * K * 1024); \
-    test::Benchmark(#type, Bincount(K * 1024, NBINS)).Run(iters);  \
-  }                                                                \
-  BENCHMARK(BM_Bincount##_##type##_##K##_##NBINS);
-
-BM_BincountDev(32, 1000, cpu);
-BM_BincountDev(32, 2000, cpu);
-BM_BincountDev(32, 5000, cpu);
-BM_BincountDev(64, 1000, cpu);
-BM_BincountDev(64, 2000, cpu);
-BM_BincountDev(64, 5000, cpu);
-BM_BincountDev(128, 1000, cpu);
-BM_BincountDev(128, 2000, cpu);
-BM_BincountDev(128, 5000, cpu);
-
-BM_BincountDev(32, 1000, gpu);
-BM_BincountDev(32, 2000, gpu);
-BM_BincountDev(32, 5000, gpu);
-BM_BincountDev(64, 1000, gpu);
-BM_BincountDev(64, 2000, gpu);
-BM_BincountDev(64, 5000, gpu);
-BM_BincountDev(128, 1000, gpu);
-BM_BincountDev(128, 2000, gpu);
-BM_BincountDev(128, 5000, gpu);
-
-}  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/bucketize_op.cc b/tensorflow/core/kernels/bucketize_op.cc
index c1693de538..93c2d01221 100644
--- a/tensorflow/core/kernels/bucketize_op.cc
+++ b/tensorflow/core/kernels/bucketize_op.cc
@@ -15,43 +15,15 @@ limitations under the License.
 
 // See docs in ../ops/math_ops.cc.
 
-#include "tensorflow/core/kernels/bucketize_op.h"
+#include <algorithm>
+#include <vector>
+
 #include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
 
-using thread::ThreadPool;
-
-typedef Eigen::ThreadPoolDevice CPUDevice;
-typedef Eigen::GpuDevice GPUDevice;
-
-namespace functor {
-
 template <typename T>
-struct BucketizeFunctor<CPUDevice, T> {
-  // PRECONDITION: boundaries_vector must be sorted.
-  static Status Compute(OpKernelContext* context,
-                        const typename TTypes<T, 1>::ConstTensor& input,
-                        const std::vector<float>& boundaries_vector,
-                        typename TTypes<int32, 1>::Tensor& output) {
-    const int N = input.size();
-    for (int i = 0; i < N; i++) {
-      auto first_bigger_it = std::upper_bound(
-          boundaries_vector.begin(), boundaries_vector.end(), input(i));
-      output(i) = first_bigger_it - boundaries_vector.begin();
-    }
-
-    return Status::OK();
-  }
-};
-}  // namespace functor
-
-template <typename Device, typename T>
 class BucketizeOp : public OpKernel {
  public:
   explicit BucketizeOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -62,42 +34,36 @@ class BucketizeOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input_tensor = context->input(0);
-    const auto input = input_tensor.flat<T>();
-
+    auto input = input_tensor.flat<T>();
     Tensor* output_tensor = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
                                                      &output_tensor));
     auto output = output_tensor->template flat<int32>();
-    OP_REQUIRES_OK(context, functor::BucketizeFunctor<Device, T>::Compute(
-                                context, input, boundaries_, output));
+
+    const int N = input.size();
+    for (int i = 0; i < N; i++) {
+      output(i) = CalculateBucketIndex(input(i));
+    }
   }
 
  private:
+  int32 CalculateBucketIndex(const T value) {
+    auto first_bigger_it =
+        std::upper_bound(boundaries_.begin(), boundaries_.end(), value);
+    return first_bigger_it - boundaries_.begin();
+  }
   std::vector<float> boundaries_;
 };
 
 #define REGISTER_KERNEL(T)                                         \
   REGISTER_KERNEL_BUILDER(                                         \
       Name("Bucketize").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-      BucketizeOp<CPUDevice, T>);
-
-REGISTER_KERNEL(int32);
-REGISTER_KERNEL(int64);
-REGISTER_KERNEL(float);
-REGISTER_KERNEL(double);
-#undef REGISTER_KERNEL
-
-#if GOOGLE_CUDA
-#define REGISTER_KERNEL(T)                                         \
-  REGISTER_KERNEL_BUILDER(                                         \
-      Name("Bucketize").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
-      BucketizeOp<GPUDevice, T>);
+      BucketizeOp<T>);
 
 REGISTER_KERNEL(int32);
 REGISTER_KERNEL(int64);
 REGISTER_KERNEL(float);
 REGISTER_KERNEL(double);
 #undef REGISTER_KERNEL
-#endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/bucketize_op.h b/tensorflow/core/kernels/bucketize_op.h
deleted file mode 100644
index c8e461beb9..0000000000
--- a/tensorflow/core/kernels/bucketize_op.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_BUCKETIZE_OP_H_
-#define TENSORFLOW_BUCKETIZE_OP_H_
-
-#include <vector>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace tensorflow {
-namespace functor {
-
-template <typename Device, typename T>
-struct BucketizeFunctor {
-  static Status Compute(OpKernelContext* context,
-                        const typename TTypes<T, 1>::ConstTensor& input,
-                        const std::vector<float>& boundaries_vector,
-                        typename TTypes<int32, 1>::Tensor& output);
-};
-
-}  // namespace functor
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_BUCKETIZE_OP_H_
diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
deleted file mode 100644
index aafbbe41b4..0000000000
--- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if GOOGLE_CUDA
-
-#define EIGEN_USE_GPU
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/bucketize_op.h"
-#include "tensorflow/core/kernels/cuda_device_array.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/util/cuda_kernel_helper.h"
-
-namespace tensorflow {
-
-typedef Eigen::GpuDevice GPUDevice;
-
-template <typename T>
-__global__ void BucketizeCustomKernel(
-    const int32 size_in, const T* in, const int32 size_boundaries,
-    CudaDeviceArrayStruct<float> boundaries_array, int32* out) {
-  const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array);
-  CUDA_1D_KERNEL_LOOP(i, size_in) {
-    T value = in[i];
-    int32 bucket = 0;
-    int32 count = size_boundaries;
-    while (count > 0) {
-      int32 l = bucket;
-      int32 step = count / 2;
-      l += step;
-      if (!(value < static_cast<T>(boundaries[l]))) {
-        bucket = ++l;
-        count -= step + 1;
-      } else {
-        count = step;
-      }
-    }
-    out[i] = bucket;
-  }
-}
-
-namespace functor {
-
-template <typename T>
-struct BucketizeFunctor<GPUDevice, T> {
-  // PRECONDITION: boundaries_vector must be sorted.
-  static Status Compute(OpKernelContext* context,
-                        const typename TTypes<T, 1>::ConstTensor& input,
-                        const std::vector<float>& boundaries_vector,
-                        typename TTypes<int32, 1>::Tensor& output) {
-    const GPUDevice& d = context->eigen_device<GPUDevice>();
-
-    CudaDeviceArrayOnHost<float> boundaries_array(context,
-                                                  boundaries_vector.size());
-    TF_RETURN_IF_ERROR(boundaries_array.Init());
-    for (int i = 0; i < boundaries_vector.size(); ++i) {
-      boundaries_array.Set(i, boundaries_vector[i]);
-    }
-    TF_RETURN_IF_ERROR(boundaries_array.Finalize());
-
-    CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d);
-    BucketizeCustomKernel<
-        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-        input.size(), input.data(), boundaries_vector.size(),
-        boundaries_array.data(), output.data());
-
-    return Status::OK();
-  }
-};
-}  // namespace functor
-
-#define REGISTER_GPU_SPEC(type) \
-  template struct functor::BucketizeFunctor<GPUDevice, type>;
-
-REGISTER_GPU_SPEC(int32);
-REGISTER_GPU_SPEC(int64);
-REGISTER_GPU_SPEC(float);
-REGISTER_GPU_SPEC(double);
-#undef REGISTER_GPU_SPEC
-
-}  // namespace tensorflow
-
-#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index f819fccbfb..21f5cb1716 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -236,7 +236,6 @@ class Conv3DBackpropInputOp : public OpKernel {
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("Conv3DBackpropInputV2").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       Conv3DBackpropInputOp<CPUDevice, T>);
-TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -384,7 +383,6 @@ class Conv3DBackpropFilterOp : public OpKernel {
                               .Device(DEVICE_CPU)                             \
                               .TypeConstraint<T>("T"),                        \
                           Conv3DBackpropFilterOp<CPUDevice, T>);
-TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -411,7 +409,6 @@ namespace functor {
       const std::array<int, 3>& padding_right,                        \
       typename TTypes<T, 5, int>::Tensor out, TensorFormat format);
 
-DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 #undef DECLARE_GPU_SPEC
 }  // namespace functor
@@ -1101,29 +1098,22 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
   bool cudnn_use_autotune_;
 };
 
-
-
-#define REGISTER_GPU_KERNEL(T)                                                \
-  REGISTER_KERNEL_BUILDER(                                                    \
-      Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint<T>("T"),  \
-      Conv3DBackpropInputOp<GPUDevice, T>);                                   \
-  REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2")                       \
-                            .Device(DEVICE_GPU)                               \
-                            .TypeConstraint<T>("T")                           \
-                            .HostMemory("input_sizes"),                       \
-                        Conv3DBackpropInputOp<GPUDevice, T>);                 \
-  REGISTER_KERNEL_BUILDER(                                                    \
-    Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"),   \
-    Conv3DBackpropFilterOp<GPUDevice, T>);                                    \
-  REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2")                      \
-                            .Device(DEVICE_GPU)                               \
-                            .TypeConstraint<T>("T")                           \
-                            .HostMemory("filter_sizes"),                      \
-                        Conv3DBackpropFilterOp<GPUDevice, T>);
-TF_CALL_half(REGISTER_GPU_KERNEL);
-TF_CALL_float(REGISTER_GPU_KERNEL);
-#undef REGISTER_GPU_KERNEL
-     
+REGISTER_KERNEL_BUILDER(
+    Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint<float>("T"),
+    Conv3DBackpropInputOp<GPUDevice, float>);
+REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<float>("T")
+                            .HostMemory("input_sizes"),
+                        Conv3DBackpropInputOp<GPUDevice, float>);
+REGISTER_KERNEL_BUILDER(
+    Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<float>("T"),
+    Conv3DBackpropFilterOp<GPUDevice, float>);
+REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<float>("T")
+                            .HostMemory("filter_sizes"),
+                        Conv3DBackpropFilterOp<GPUDevice, float>);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index 37cb67bc51..8a89d564de 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -145,7 +145,6 @@ class Conv3DOp : public BinaryOp<T> {
   REGISTER_KERNEL_BUILDER(                                      \
       Name("Conv3D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       Conv3DOp<CPUDevice, T>);
-TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -483,16 +482,12 @@ namespace functor {
       const std::array<int, 3>& padding_right,                        \
       typename TTypes<T, 5, int>::Tensor out, TensorFormat format);
 
-DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 #undef DECLARE_GPU_SPEC
 
 }  // namespace functor
 
 // Registration of the GPU implementations.
-REGISTER_KERNEL_BUILDER(
-    Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
-    Conv3DOp<GPUDevice, Eigen::half>);
 REGISTER_KERNEL_BUILDER(
     Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<float>("T"),
     Conv3DOp<GPUDevice, float>);
diff --git a/tensorflow/core/kernels/cwise_op_acosh.cc b/tensorflow/core/kernels/cwise_op_acosh.cc
index 39c8814073..7bdd8d22a3 100644
--- a/tensorflow/core/kernels/cwise_op_acosh.cc
+++ b/tensorflow/core/kernels/cwise_op_acosh.cc
@@ -20,8 +20,16 @@ namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "Acosh", functor::acosh, float, double,
           complex64, complex128);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Acosh", functor::acosh, float, double);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Acosh")                               \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::acosh<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+REGISTER_SYCL_KERNEL(double);
+#undef REGISTER_SYCL_KERNEL
 #endif // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index 8d44208aa7..e0644323c0 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -20,9 +20,17 @@ namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double,
           complex64, complex128);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double);
-#endif // TENSORFLOW_USE_SYCL
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Asinh")                               \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::asinh<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+REGISTER_SYCL_KERNEL(double);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc
index bbc69e45aa..058f5140c5 100644
--- a/tensorflow/core/kernels/cwise_op_atanh.cc
+++ b/tensorflow/core/kernels/cwise_op_atanh.cc
@@ -20,9 +20,17 @@ namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double,
           complex64, complex128);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double);
-#endif // TENSORFLOW_USE_SYCL
+#if TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+                          Name("Atanh")                               \
+                          .Device(DEVICE_SYCL)                        \
+                          .TypeConstraint<TYPE>("T"),                 \
+                          UnaryOp<SYCLDevice, functor::atanh<TYPE>>);
+REGISTER_SYCL_KERNEL(float);
+REGISTER_SYCL_KERNEL(double);
+#undef REGISTER_SYCL_KERNEL
+#endif // TENSORFLOW_USE_SYC
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double);
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index d32185b6bf..6c22b124de 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -49,11 +49,7 @@ template <typename T>
 struct scalar_asinh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const {
-#if EIGEN_HAS_CXX11_MATH
-    return numext::asinh(a);
-#else
     return std::asinh(a);
-#endif  // EIGEN_HAS_CXX11_MATH
   }
 };
 template <typename T>
@@ -65,11 +61,7 @@ template <typename T>
 struct scalar_acosh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_acosh_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const {
-#if EIGEN_HAS_CXX11_MATH
-    return numext::acosh(a);
-#else
     return std::acosh(a);
-#endif  // EIGEN_HAS_CXX11_MATH
   }
 };
 template <typename T>
@@ -81,11 +73,7 @@ template <typename T>
 struct scalar_atanh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_atanh_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const {
-#if EIGEN_HAS_CXX11_MATH
-    return numext::atanh(a);
-#else
     return std::atanh(a);
-#endif  // EIGEN_HAS_CXX11_MATH
   }
 };
 template <typename T>
diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
index 53d65a22d1..9804d7d38e 100644
--- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
@@ -231,7 +231,7 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args,
       }
       // Pad to vector-register width (if needed).
       for (int64 d = 0; d < pad_size; ++d) {
-        buffer[buf_base + vectorized_size + scalar_size + d] = static_cast<T>(0);
+        buffer[buf_base + vectorized_size + scalar_size + d] = 0;
       }
     }
   }
@@ -297,7 +297,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args,
 
   for (int i = 0; i < output_vectorized_size; i += kPacketSize) {
     // Reset accumulator.
-    auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
+    auto vaccum = Eigen::internal::pset1<Packet>(0);
     for (int j = 0; j < filter_spatial_size; ++j) {
       // Calculate index.
       const int64 index = i + j * padded_filter_inner_dim_size;
@@ -318,7 +318,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args,
   }
 
   if (output_scalar_size > 0) {
-    auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
+    auto vaccum = Eigen::internal::pset1<Packet>(0);
     for (int j = 0; j < filter_spatial_size; ++j) {
       const int64 index =
           output_vectorized_size + j * padded_filter_inner_dim_size;
@@ -346,7 +346,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args,
   if (depth_multiplier > 1) {
     for (int64 d = 0; d < in_depth; ++d) {
       const int64 index = d * args.depth_multiplier;
-      T accum = static_cast<T>(0);
+      T accum = 0;
       for (int64 dm = 0; dm < dm_vectorized_size; dm += kPacketSize) {
         const auto v = Eigen::internal::ploadu<Packet>(out_buffer + index + dm);
         accum += Eigen::internal::predux(v);
@@ -510,7 +510,6 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
-extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, double>;
 
@@ -885,7 +884,6 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
-extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, double>;
 
diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index 2759ecb2f1..bbeeaf7895 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -94,7 +94,7 @@ struct DepthwiseConv2DKernel {
 
     for (int i = 0; i < output_vectorized_size; i += kPacketSize) {
       // Reset accumulator.
-      auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
+      auto vaccum = Eigen::internal::pset1<Packet>(0);
       for (int j = 0; j < filter_spatial_size; ++j) {
         // Calculate index.
         const int64 index = i + j * padded_filter_inner_dim_size;
@@ -115,7 +115,7 @@ struct DepthwiseConv2DKernel {
     }
 
     if (output_scalar_size > 0) {
-      auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
+      auto vaccum = Eigen::internal::pset1<Packet>(0);
       for (int j = 0; j < filter_spatial_size; ++j) {
         const int64 index =
             output_vectorized_size + j * padded_filter_inner_dim_size;
@@ -246,7 +246,6 @@ extern template class LaunchConv2DOp<CPUDevice, float>;
 #if GOOGLE_CUDA
 
 // Extern template instantiated in depthwise_conv_op_gpu.cc.
-extern template struct LaunchDepthwiseConvOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvOp<GPUDevice, double>;
 
@@ -420,17 +419,12 @@ class DepthwiseConv2dNativeOp : public BinaryOp<T> {
       Name("DepthwiseConv2dNative").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       DepthwiseConv2dNativeOp<CPUDevice, T>);
 
-TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 #if !defined(PLATFORM_WINDOWS) || !defined(_DEBUG)
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #endif
 
 #if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(
-    Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
-    DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
-
 REGISTER_KERNEL_BUILDER(
     Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<float>("T"),
     DepthwiseConv2dNativeOp<GPUDevice, float>);
diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h
index 11aed5b415..aa5b5c76f6 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.h
+++ b/tensorflow/core/kernels/depthwise_conv_op.h
@@ -158,7 +158,7 @@ struct DepthwiseFilterPadOp {
       }
       // Pad the remainder of output to vector-register boundary.
       for (int64 j = 0; j < pad_size; ++j) {
-        padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast<T>(0);
+        padded_filter[output_base + vectorized_size + scalar_size + j] = 0;
       }
     }
   }
@@ -266,7 +266,7 @@ struct DepthwiseInputCopyOp {
 
           // Pad the remainder of the output to vector register boundary.
           for (int64 d = 0; d < output_pad_size; ++d) {
-            in_buf[d] = static_cast<T>(0);
+            in_buf[d] = 0;
           }
           in_buf += output_pad_size;
 
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
index 903aac5d68..ecfe51d599 100644
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
@@ -105,7 +105,7 @@ __global__ void __launch_bounds__(1024, 2)
     const int input_row_end = input_row_start + filter_rows;
     const int input_col_end = input_col_start + filter_cols;
 
-    T sum = static_cast<T>(0);
+    T sum = 0;
 
     const int input_offset_temp = in_rows * OB;
     if (input_row_start >= 0 && input_col_start >= 0 &&
@@ -258,8 +258,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall(
     __syncthreads();
 
     if (depth_in_range) {
-      T sum1 = static_cast<T>(0);
-      T sum2 = static_cast<T>(0);
+      T sum1 = 0;
+      T sum2 = 0;
       int shared_offset = data_idx;
       const T* filter_ptr = filter_read_offset + shared_data;
       UNROLL for (int r = 0; r < filter_rows; ++r) {
@@ -369,7 +369,7 @@ __global__ void __launch_bounds__(1024, 2)
     const int input_row_end = input_row_start + filter_rows;
     const int input_col_end = input_col_start + filter_cols;
 
-    T sum = static_cast<T>(0);
+    T sum = 0;
     if (input_row_start >= 0 && input_col_start >= 0 &&
         input_row_end < in_rows && input_col_end < in_cols) {
       // Loop that doesn't need to check for boundary conditions.
@@ -529,8 +529,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall(
     __syncthreads();
 
     if (slice_in_range) {
-      T sum1 = static_cast<T>(0);
-      T sum2 = static_cast<T>(0);
+      T sum1 = 0;
+      T sum2 = 0;
       int shared_offset = data_idx;
       const T* filter_ptr = filter_read_offset + shared_data;
       UNROLL for (int r = 0; r < filter_rows; ++r) {
@@ -710,7 +710,6 @@ void LaunchDepthwiseConvOp<GPUDevice, T>::operator()(OpKernelContext* ctx,
                   "Launch of gpu kernel for DepthwiseConv2dGPULaunch failed"));
 }
 
-template struct LaunchDepthwiseConvOp<GPUDevice, Eigen::half>;
 template struct LaunchDepthwiseConvOp<GPUDevice, float>;
 template struct LaunchDepthwiseConvOp<GPUDevice, double>;
 
@@ -745,7 +744,7 @@ __global__ void __launch_bounds__(640, 2)
     const int in_r = (thread_id / in_depth / in_cols) % in_rows;
     const int b = thread_id / in_depth / in_cols / in_rows;
 
-    T sum = static_cast<T>(0);
+    T sum = 0;
 
     const int out_r_start =
         tf_max<int>(0, (in_r - filter_rows + pad_rows + stride) / stride);
@@ -811,7 +810,7 @@ __global__ void __launch_bounds__(640, 2)
     const int in_d = (thread_id / in_cols / in_rows) % in_depth;
     const int b = thread_id / in_depth / in_cols / in_rows;
 
-    T sum = static_cast<T>(0);
+    T sum = 0;
     const int out_d_start = in_d * depth_multiplier;
     const int out_d_end = out_d_start + depth_multiplier;
 
@@ -920,7 +919,6 @@ void LaunchDepthwiseConvBackpropInputOp<GPUDevice, T>::operator()(
                                "utGPULaunch failed"));
 }
 
-template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, Eigen::half>;
 template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, float>;
 template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, double>;
 
@@ -1633,7 +1631,6 @@ void LaunchDepthwiseConvBackpropFilterOp<GPUDevice, T>::operator()(
                                "terGPULaunch failed"));
 }
 
-template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, Eigen::half>;
 template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, float>;
 template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, double>;
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
deleted file mode 100644
index 7249c8c66c..0000000000
--- a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
+++ /dev/null
@@ -1,376 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// The algorithm for dynamic partition has the following steps:
-// 1. Let N be the size of partitions. We initialize a new vector indices_in
-//    with the values 0, 1, 2, ..., N-1.
-// 2. We apply cub::DeviceRadixSort::SortPairs to the key - value pairs given
-//    by partitions and indices_in. This will result in two new vectors
-//    partitions_out and indices_out, with partitions_out sorted.
-// 3. The first dimension of outputs[i] is equal to the length of the interval
-//    of i-values in partitions_out. We determine it in two steps:
-//    - compute the starting and ending point of each interval,
-//    - subtract the starting and ending points to find the length.
-//    The result is placed in partition_count.
-// 4. Because partition_count is on the GPU, we bring it asynchronously to
-//    the CPU. Then we can allocate the output tensors.
-// 5. Finally, we use indices_out and the gather functor to collect the output.
-//    This works, because for each interval of i-values, indices_out points
-//    to the slices which should form output[i].
-
-#if GOOGLE_CUDA
-
-#define EIGEN_USE_GPU
-
-#include "external/cub_archive/cub/device/device_radix_sort.cuh"
-#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/bounds_check.h"
-#include "tensorflow/core/kernels/fill_functor.h"
-#include "tensorflow/core/kernels/gather_functor_gpu.cu.h"
-#include "tensorflow/core/util/cuda_kernel_helper.h"
-
-namespace tensorflow {
-
-typedef Eigen::GpuDevice GPUDevice;
-
-namespace {
-
-template <typename T>
-__global__ void RangeInitKernel(const T start, const T delta, const int32 size,
-                                T* out) {
-  CUDA_1D_KERNEL_LOOP(i, size) { out[i] = start + i * delta; }
-}
-
-__global__ void FindEndpointsKernel(const int32* partitions, int32 size,
-                                    int32 nump, int32* start, int32* end) {
-  CUDA_1D_KERNEL_LOOP(i, size) {
-    int32 current = ldg(partitions + i);
-    if (FastBoundsCheck(current, nump)) {
-      if (i == 0)
-        start[current] = i;
-      else {
-        int32 before = ldg(partitions + i - 1);
-        if (before != current) start[current] = i;
-      }
-      if (i == size - 1)
-        end[current] = i + 1;
-      else {
-        int32 after = ldg(partitions + i + 1);
-        if (after != current) end[current] = i + 1;
-      }
-    }
-  }
-}
-
-// We create a local version of subtract, because the tf.subtract kernel
-// is not defined for int32. We use it to compute the length of an interval
-// by subtracting the endpoints.
-__global__ void IntervalLengthKernel(int32* start, int32 size, int32* end) {
-  CUDA_1D_KERNEL_LOOP(i, size) {
-    int32 start_point = ldg(start + i);
-    end[i] = end[i] - start_point;
-  }
-}
-
-// Initialize out with range start, start + delta, start + 2 * delta, ...
-// This is needed because tf.range has no GPU implementation.
-template <typename T>
-void RangeInit(const GPUDevice& d, const T start, const T delta,
-               const int32 size, typename TTypes<T>::Flat out) {
-  CudaLaunchConfig config = GetCudaLaunchConfig(size, d);
-  RangeInitKernel<
-      T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-      start, delta, size, out.data());
-}
-
-// Partitions is a sorted vector of N non-negative integer numbers.
-// This function computes the starting and ending points of each interval
-// of values.
-void ComputeIntervals(const GPUDevice& d, Tensor* partitions, int32 N,
-                      int32 nump, int32* start_ptr, int32* end_ptr) {
-  CudaLaunchConfig config = GetCudaLaunchConfig(N, d);
-  FindEndpointsKernel<<<config.block_count, config.thread_per_block, 0,
-                        d.stream()>>>(partitions->flat<int32>().data(), N, nump,
-                                      start_ptr, end_ptr);
-}
-
-// Subtract the ending points of each interval to obtain the interval length.
-void ComputeItvLength(const GPUDevice& d, int32 num, int32* start_ptr,
-                      int32* end_ptr) {
-  CudaLaunchConfig config = GetCudaLaunchConfig(num, d);
-  IntervalLengthKernel<<<config.block_count, config.thread_per_block, 0,
-                         d.stream()>>>(start_ptr, num, end_ptr);
-}
-
-template <typename T>
-void CallGatherKernel(const GPUDevice& d, const T* params, const int32* indices,
-                      T* out, int64 gather_dim_size, int64 indices_size,
-                      int64 slice_size, int64 out_size) {
-  CudaLaunchConfig config = GetCudaLaunchConfig(out_size, d);
-  GatherOpKernel<
-      T, int32,
-      true><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-      params, indices, out, gather_dim_size, indices_size, slice_size,
-      out_size);
-}
-
-}  // namespace
-
-// The current implementation has memory cost on GPU
-// I + P + max(3N + R, O + N), where:
-// I - the size of the input
-// N - the size of the partitions tensor
-// R - the temporary storage used by cub::RadixSort, about 2N
-// P - the number of partitions
-// O - the size of the output
-// So roughly the cost is I + P + max(5N, O + N).
-template <typename T>
-class DynamicPartitionOpGPU : public AsyncOpKernel {
- public:
-  explicit DynamicPartitionOpGPU(OpKernelConstruction* c) : AsyncOpKernel(c) {
-    OP_REQUIRES_OK(c, c->GetAttr("num_partitions", &num_partitions_));
-    OP_REQUIRES(c, num_partitions_ >= 1,
-                errors::InvalidArgument("num_partitions must be at least 1"));
-  }
-
-  void AllocateTempSpace(OpKernelContext* c, int32 N, Tensor* indices_in,
-                         Tensor* partitions_out, Tensor* indices_out,
-                         DoneCallback done) {
-    int32 M = std::max(N, num_partitions_);
-    // indices_in will be made slightly larger to accomodate
-    // later computations.
-    OP_REQUIRES_OK_ASYNC(
-        c, c->allocate_temp(DT_INT32, TensorShape({M}), indices_in), done);
-    OP_REQUIRES_OK_ASYNC(
-        c, c->allocate_temp(DT_INT32, TensorShape({N}), partitions_out), done);
-    OP_REQUIRES_OK_ASYNC(
-        c, c->allocate_temp(DT_INT32, TensorShape({N}), indices_out), done);
-  }
-
-  void AllocateOutputs(OpKernelContext* c, const Tensor* data,
-                       const Tensor* partitions, const Tensor* partition_count,
-                       OpOutputList* Tout, DoneCallback done) {
-    auto e_part_count = partition_count->flat<int32>();
-    // Allocate output tensors of the right size
-    OP_REQUIRES_OK_ASYNC(c, c->output_list("outputs", Tout), done);
-    for (int p = 0; p < num_partitions_; p++) {
-      TensorShape shape;
-      shape.AddDim(e_part_count(p));
-      for (int i = partitions->dims(); i < data->dims(); i++) {
-        shape.AddDim(data->dim_size(i));
-      }
-      Tensor* out;
-      OP_REQUIRES_OK_ASYNC(c, Tout->allocate(p, shape, &out), done);
-    }
-  }
-
-  void ComputeAsync(OpKernelContext* c, DoneCallback done) {
-    const Tensor& data = c->input(0);
-    const Tensor& partitions = c->input(1);
-
-    OP_REQUIRES_ASYNC(
-        c, TensorShapeUtils::StartsWith(data.shape(), partitions.shape()),
-        errors::InvalidArgument("data.shape must start with partitions.shape, ",
-                                "got data.shape = ", data.shape().DebugString(),
-                                ", partitions.shape = ",
-                                partitions.shape().DebugString()),
-        done);
-
-    Tensor partition_count;
-
-    // We must handle the case of empty partitions separately,
-    // because kernels don't work with 0-sized tensors.
-    if (partitions.NumElements() == 0) {
-      AllocatorAttributes alloc_attr;
-      alloc_attr.set_on_host(true);
-      OP_REQUIRES_OK_ASYNC(
-          c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}),
-                              &partition_count, alloc_attr),
-          done);
-      auto e_part_count = partition_count.flat<int32>();
-      for (int i = 0; i < num_partitions_; i++) e_part_count(i) = 0;
-      OpOutputList outputs;
-      this->AllocateOutputs(c, &data, &partitions, &partition_count, &outputs,
-                            done);
-      if (c->status().ok()) done();
-      return;
-    }
-
-    // Prepare for counting.
-    OP_REQUIRES_OK_ASYNC(
-        c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}),
-                            &partition_count),
-        done);
-    Tensor indices_out;
-    // Count how many times each partition index occurs.
-    // Also sort the info in partitions and output it in indices_out,
-    // in preparation for the next step.
-    this->CountAndSortParts(c, &partitions, &partition_count, &indices_out,
-                            done);
-    if (!c->status().ok()) return;
-
-    // In order to allocate the output tensor we have to move partition_count
-    // to CPU.
-    auto* stream = c->op_device_context()->stream();
-    OP_REQUIRES_ASYNC(c, stream, errors::Internal("No GPU stream available."),
-                      done);
-    Tensor cpu_tensor;
-    AllocatorAttributes alloc_attr;
-    alloc_attr.set_on_host(true);
-    alloc_attr.set_gpu_compatible(true);
-    OP_REQUIRES_OK_ASYNC(
-        c, c->allocate_temp(partition_count.dtype(), partition_count.shape(),
-                            &cpu_tensor, alloc_attr),
-        done);
-    perftools::gputools::DeviceMemoryBase wrapped(
-        partition_count.flat<int32>().data(), num_partitions_ * sizeof(int32));
-    const bool status =
-        stream
-            ->ThenMemcpy(cpu_tensor.flat<int32>().data(), wrapped,
-                         num_partitions_ * sizeof(int32))
-            .ok();
-    OP_REQUIRES_ASYNC(
-        c, status,
-        errors::Internal("Failed to launch copy from device to host."), done);
-
-    // Keep a reference to partition_count so that the buffer
-    // is not deallocated at the end of the function, before
-    // memcpy is completed.
-    TensorReference partition_ref(partition_count);
-    auto wrapped_callback = [this, c, &data, &partitions, indices_out,
-                             partition_ref, cpu_tensor, done]() {
-      OpOutputList outputs;
-      this->AllocateOutputs(c, &data, &partitions, &cpu_tensor, &outputs, done);
-      if (!c->status().ok()) {
-        partition_ref.Unref();
-        return;
-      }
-      int32 N = partitions.NumElements();
-      int64 slice_size = data.NumElements() / N;
-      this->GatherSlices(c, &data, &indices_out, N, slice_size, outputs);
-      partition_ref.Unref();
-      done();
-    };
-
-    c->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
-        stream, wrapped_callback);
-  }
-
- protected:
-  void RadixSort(OpKernelContext* c, const Tensor* partitions,
-                 Tensor* indices_in, Tensor* partitions_out,
-                 Tensor* indices_out, DoneCallback done) {
-    int32 N = partitions->NumElements();
-    const GPUDevice& device = c->eigen_device<GPUDevice>();
-    const cudaStream_t& cu_stream = GetCudaStream(c);
-
-    // Initialize the indices_in tensor using the Range GPU kernel.
-    RangeInit(device, 0, 1, N, indices_in->flat<int32>());
-    // Obtain the pointers to inner buffers.
-    const int32* partitions_ptr = partitions->flat<int32>().data();
-    int32* partitions_out_ptr = partitions_out->flat<int32>().data();
-    int32* indices_in_ptr = indices_in->flat<int32>().data();
-    int32* indices_out_ptr = indices_out->flat<int32>().data();
-    // Determine temporary device storage requirements.
-    Tensor cub_temp_storage;
-    size_t temp_storage_bytes = 0;
-    cub::DeviceRadixSort::SortPairs(
-        NULL, temp_storage_bytes, partitions_ptr, partitions_out_ptr,
-        indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream);
-    // Allocate temporary storage.
-    OP_REQUIRES_OK_ASYNC(
-        c, c->allocate_temp(
-               DT_INT8, TensorShape({static_cast<int64>(temp_storage_bytes)}),
-               &cub_temp_storage),
-        done);
-    // Radix-sort the partition information.
-    cub::DeviceRadixSort::SortPairs(
-        cub_temp_storage.flat<int8>().data(), temp_storage_bytes,
-        partitions_ptr, partitions_out_ptr, indices_in_ptr, indices_out_ptr, N,
-        0, sizeof(int32) * 8, cu_stream);
-  }  // At this point cub_temp_storage will be marked for deallocation.
-
-  void CountAndSortParts(OpKernelContext* c, const Tensor* partitions,
-                         Tensor* partition_count, Tensor* indices_out,
-                         DoneCallback done) {
-    const GPUDevice& device = c->eigen_device<GPUDevice>();
-    int32 N = partitions->NumElements();
-    Tensor indices_in;
-    Tensor partitions_out;
-
-    // Allocate memory for Radix-Sort.
-    this->AllocateTempSpace(c, N, &indices_in, &partitions_out, indices_out,
-                            done);
-    if (!c->status().ok()) return;
-    this->RadixSort(c, partitions, &indices_in, &partitions_out, indices_out,
-                    done);
-    if (!c->status().ok()) return;
-    // We still need a little bit of additional memory. However,
-    // we can reuse the indices_in tensor. We could also use atomic
-    // operations and no additional memory, but this approach seems faster.
-
-    // Zero-out the allocated memory.
-    functor::SetZeroFunctor<GPUDevice, int32> zero_functor;
-    zero_functor(device, partition_count->flat<int32>());
-    zero_functor(device, indices_in.flat<int32>());
-    // Obtain the pointers to inner buffers.
-    int32* start_ptr = indices_in.flat<int32>().data();
-    int32* end_ptr = partition_count->flat<int32>().data();
-    // Obtain the starting and ending points of each interval.
-    ComputeIntervals(device, &partitions_out, N, num_partitions_, start_ptr,
-                     end_ptr);
-    // Subtract to compute the number of appearances of each id.
-    ComputeItvLength(device, num_partitions_, start_ptr, end_ptr);
-  }  // At this point indices_in and partitions_out will be marked
-     // for deallocation.
-
-  void GatherSlices(OpKernelContext* c, const Tensor* data,
-                    const Tensor* indices, int32 N, int64 slice_size,
-                    OpOutputList& outs) {
-    const GPUDevice& device = c->eigen_device<GPUDevice>();
-    const int32* ind_base = indices->flat<int32>().data();
-    const T* data_base = data->flat<T>().data();
-
-    for (int p = 0; p < num_partitions_; p++) {
-      int32 indices_size = outs[p]->dim_size(0);
-      int64 out_size = outs[p]->NumElements();
-      T* out_base = outs[p]->flat<T>().data();
-      if (out_size > 0)
-        CallGatherKernel<T>(device, data_base, ind_base, out_base, N,
-                            indices_size, slice_size, out_size);
-      ind_base += indices_size;
-    }
-  }
-
-  int num_partitions_;
-};
-
-#define REGISTER_DYNAMIC_PARTITION_GPU(T)                                 \
-  REGISTER_KERNEL_BUILDER(                                                \
-      Name("DynamicPartition").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
-      DynamicPartitionOpGPU<T>)
-
-TF_CALL_GPU_NUMBER_TYPES(REGISTER_DYNAMIC_PARTITION_GPU);
-TF_CALL_complex64(REGISTER_DYNAMIC_PARTITION_GPU);
-TF_CALL_complex128(REGISTER_DYNAMIC_PARTITION_GPU);
-#undef REGISTER_DYNAMIC_PARTITION_GPU
-
-}  // namespace tensorflow
-
-#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/dynamic_partition_op_test.cc b/tensorflow/core/kernels/dynamic_partition_op_test.cc
index 9a7ed0af21..0e8fbc0a67 100644
--- a/tensorflow/core/kernels/dynamic_partition_op_test.cc
+++ b/tensorflow/core/kernels/dynamic_partition_op_test.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include <functional>
 #include <memory>
 
-#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_builder.h"
@@ -24,14 +23,10 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/graph/node_builder.h"
-#include "tensorflow/core/graph/testlib.h"
 #include "tensorflow/core/kernels/ops_testutil.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/random/simple_philox.h"
 #include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
 namespace {
@@ -158,58 +153,5 @@ TEST_F(DynamicPartitionOpTest, Error_IndexOutOfRange) {
       << s;
 }
 
-Node* DynamicPartitionNode(Graph* g, Node* in0, Node* in1, int num_partitions) {
-  Node* ret;
-  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "DynamicPartition")
-                  .Input(in0)
-                  .Input(in1)
-                  .Attr("num_partitions", num_partitions)
-                  .Finalize(g, &ret));
-  return ret;
-}
-
-template <typename T>
-static Graph* DynamicPartition(int num_partitions, int dim) {
-  Graph* g = new Graph(OpRegistry::Global());
-  // Always use a 128MB buffer.
-  const int kRows = ((128 << 20) / sizeof(T)) / dim;
-  Tensor data(DataTypeToEnum<T>::value, TensorShape({kRows, dim}));
-  data.flat<T>().setRandom();
-
-  random::PhiloxRandom philox(301, 17);
-  random::SimplePhilox rnd(&philox);
-  Tensor partitions(DT_INT32, TensorShape({kRows}));
-  for (int i = 0; i < kRows; i++) {
-    partitions.flat<int32>()(i) = rnd.Uniform(num_partitions);
-  }
-  DynamicPartitionNode(g, test::graph::Constant(g, data),
-                       test::graph::Constant(g, partitions), num_partitions);
-  return g;
-}
-
-#define BM_DYNAMIC_PARTITION(DEVICE, T, num)                            \
-  static void BM_##DEVICE##_dynpart_##T##_##num(int iters, int dim) {   \
-    const int64 items = ((128 << 20) / sizeof(T));                      \
-    const int64 tot = static_cast<int64>(iters) * items;                \
-    testing::ItemsProcessed(tot);                                       \
-    testing::UseRealTime();                                             \
-    test::Benchmark(#DEVICE, DynamicPartition<T>(num, dim)).Run(iters); \
-  }                                                                     \
-  BENCHMARK(BM_##DEVICE##_dynpart_##T##_##num)->Arg(1)->Arg(256)
-
-BM_DYNAMIC_PARTITION(cpu, float, 2);
-BM_DYNAMIC_PARTITION(cpu, float, 100);
-BM_DYNAMIC_PARTITION(cpu, double, 2);
-BM_DYNAMIC_PARTITION(cpu, double, 100);
-BM_DYNAMIC_PARTITION(cpu, complex64, 2);
-BM_DYNAMIC_PARTITION(cpu, complex64, 100);
-
-BM_DYNAMIC_PARTITION(gpu, float, 2);
-BM_DYNAMIC_PARTITION(gpu, float, 100);
-BM_DYNAMIC_PARTITION(gpu, double, 2);
-BM_DYNAMIC_PARTITION(gpu, double, 100);
-BM_DYNAMIC_PARTITION(gpu, complex64, 2);
-BM_DYNAMIC_PARTITION(gpu, complex64, 100);
-
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc
index 1688674eb7..0ecb829f34 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op.cc
@@ -54,20 +54,25 @@ struct FusedBatchNorm<CPUDevice, T, U> {
                   Tensor* batch_var_output, Tensor* saved_mean_output,
                   Tensor* saved_var_output, TensorFormat tensor_format,
                   bool is_training) {
+    // Currently U is ignored, since we only support the case where T and U are
+    // both float32.
+    // TODO(reedwm): Add float16 support, use U, and remove these asserts.
+    static_assert(std::is_same<T, float>::value, "T currently must be float.");
+    static_assert(std::is_same<U, float>::value, "U currently must be float.");
     OP_REQUIRES(context, tensor_format == FORMAT_NHWC,
                 errors::Internal("The CPU implementation of FusedBatchNorm "
                                  "only supports NHWC tensor format for now."));
     typename TTypes<T, 4>::ConstTensor x(x_input.tensor<T, 4>());
-    typename TTypes<U>::ConstVec scale(scale_input.vec<U>());
-    typename TTypes<U>::ConstVec offset(offset_input.vec<U>());
-    typename TTypes<U>::ConstVec estimated_mean(estimated_mean_input.vec<U>());
-    typename TTypes<U>::ConstVec estimated_variance(
-        estimated_variance_input.vec<U>());
+    typename TTypes<T>::ConstVec scale(scale_input.vec<T>());
+    typename TTypes<T>::ConstVec offset(offset_input.vec<T>());
+    typename TTypes<T>::ConstVec estimated_mean(estimated_mean_input.vec<T>());
+    typename TTypes<T>::ConstVec estimated_variance(
+        estimated_variance_input.vec<T>());
     typename TTypes<T, 4>::Tensor y(y_output->tensor<T, 4>());
-    typename TTypes<U>::Vec batch_mean(batch_mean_output->vec<U>());
-    typename TTypes<U>::Vec batch_var(batch_var_output->vec<U>());
-    typename TTypes<U>::Vec saved_mean(saved_mean_output->vec<U>());
-    typename TTypes<U>::Vec saved_var(saved_var_output->vec<U>());
+    typename TTypes<T>::Vec batch_mean(batch_mean_output->vec<T>());
+    typename TTypes<T>::Vec batch_var(batch_var_output->vec<T>());
+    typename TTypes<T>::Vec saved_mean(saved_mean_output->vec<T>());
+    typename TTypes<T>::Vec saved_var(saved_var_output->vec<T>());
 
     const CPUDevice& d = context->eigen_device<CPUDevice>();
 
@@ -88,15 +93,15 @@ struct FusedBatchNorm<CPUDevice, T, U> {
     bcast_spec.set(0, rest_size);
 #endif
 
-    auto x_rest_by_depth = x.reshape(rest_by_depth).template cast<U>();
+    auto x_rest_by_depth = x.reshape(rest_by_depth);
     const int rest_size_minus_one = (rest_size > 1) ? (rest_size - 1) : 1;
-    U rest_size_inv = static_cast<U>(1.0f / static_cast<U>(rest_size));
+    T rest_size_inv = static_cast<T>(1.0f / static_cast<T>(rest_size));
     // This adjustment is for Bessel's correction
-    U rest_size_adjust =
-        static_cast<U>(rest_size) / static_cast<U>(rest_size_minus_one);
+    T rest_size_adjust =
+        static_cast<T>(rest_size) / static_cast<T>(rest_size_minus_one);
 
-    Eigen::Tensor<U, 1, Eigen::RowMajor> mean(depth);
-    Eigen::Tensor<U, 1, Eigen::RowMajor> variance(depth);
+    Eigen::Tensor<T, 1, Eigen::RowMajor> mean(depth);
+    Eigen::Tensor<T, 1, Eigen::RowMajor> variance(depth);
     if (is_training) {
       mean.device(d) = (x_rest_by_depth.sum(reduce_dims) * rest_size_inv);
       batch_mean.device(d) = mean;
@@ -124,7 +129,7 @@ struct FusedBatchNorm<CPUDevice, T, U> {
     auto x_shifted =
         x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec);
 
-    y.reshape(rest_by_depth).device(d) = x_shifted.template cast<T>();
+    y.reshape(rest_by_depth).device(d) = x_shifted;
   }
 };
 
@@ -133,7 +138,7 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
   void operator()(OpKernelContext* context, const Tensor& y_backprop_input,
                   const Tensor& x_input, const Tensor& scale_input,
                   const Tensor& mean_input, const Tensor& variance_input,
-                  U epsilon, Tensor* x_backprop_output,
+                  T epsilon, Tensor* x_backprop_output,
                   Tensor* scale_backprop_output, Tensor* offset_backprop_output,
                   TensorFormat tensor_format) {
     OP_REQUIRES(context, tensor_format == FORMAT_NHWC,
@@ -142,12 +147,12 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
     typename TTypes<T, 4>::ConstTensor y_backprop(
         y_backprop_input.tensor<T, 4>());
     typename TTypes<T, 4>::ConstTensor x(x_input.tensor<T, 4>());
-    typename TTypes<U>::ConstVec scale(scale_input.vec<U>());
-    typename TTypes<U>::ConstVec mean(mean_input.vec<U>());
-    typename TTypes<U>::ConstVec variance(variance_input.vec<U>());
+    typename TTypes<T>::ConstVec scale(scale_input.vec<T>());
+    typename TTypes<T>::ConstVec mean(mean_input.vec<T>());
+    typename TTypes<T>::ConstVec variance(variance_input.vec<T>());
     typename TTypes<T, 4>::Tensor x_backprop(x_backprop_output->tensor<T, 4>());
-    typename TTypes<U>::Vec scale_backprop(scale_backprop_output->vec<U>());
-    typename TTypes<U>::Vec offset_backprop(offset_backprop_output->vec<U>());
+    typename TTypes<T>::Vec scale_backprop(scale_backprop_output->vec<T>());
+    typename TTypes<T>::Vec offset_backprop(offset_backprop_output->vec<T>());
 
     // Note: the following formulas are used to compute the gradients for
     // back propagation.
@@ -176,8 +181,8 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
     bcast_spec.set(0, rest_size);
 #endif
 
-    auto x_rest_by_depth = x.reshape(rest_by_depth).template cast<U>();
-    U rest_size_inv = static_cast<U>(1.0f / static_cast<U>(rest_size));
+    auto x_rest_by_depth = x.reshape(rest_by_depth);
+    T rest_size_inv = static_cast<T>(1.0f / static_cast<T>(rest_size));
 
     auto x_mean_rest_by_depth =
         mean.reshape(one_by_depth).broadcast(bcast_spec);
@@ -187,8 +192,7 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
         coef0.eval().reshape(one_by_depth).broadcast(bcast_spec);
     auto x_scaled = x_centered * coef0_rest_by_depth;
 
-    auto y_backprop_rest_by_depth =
-        y_backprop.eval().reshape(rest_by_depth).template cast<U>();
+    auto y_backprop_rest_by_depth = y_backprop.eval().reshape(rest_by_depth);
     scale_backprop.device(d) =
         (y_backprop_rest_by_depth * x_scaled).sum(reduce_dims);
     auto y_backprop_sum = y_backprop_rest_by_depth.sum(reduce_dims);
@@ -210,7 +214,7 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
                      .reshape(one_by_depth)
                      .broadcast(bcast_spec);
     x_backprop.reshape(rest_by_depth).device(d) =
-        (coef1 * (y_backprop_centered - x_centered * coef2)).template cast<T>();
+        coef1 * (y_backprop_centered - x_centered * coef2);
   }
 };
 
@@ -685,18 +689,6 @@ REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2")
                             .TypeConstraint<float>("U"),
                         FusedBatchNormGradOp<CPUDevice, float, float>);
 
-REGISTER_KERNEL_BUILDER(Name("FusedBatchNormV2")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<Eigen::half>("T")
-                            .TypeConstraint<float>("U"),
-                        FusedBatchNormOp<CPUDevice, Eigen::half, float>);
-
-REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<Eigen::half>("T")
-                            .TypeConstraint<float>("U"),
-                        FusedBatchNormGradOp<CPUDevice, Eigen::half, float>);
-
 #if GOOGLE_CUDA
 
 REGISTER_KERNEL_BUILDER(
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.h b/tensorflow/core/kernels/fused_batch_norm_op.h
index 3af104bf95..38b24d7011 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.h
+++ b/tensorflow/core/kernels/fused_batch_norm_op.h
@@ -92,28 +92,26 @@ struct FusedBatchNormFreezeGrad {
     // offset_backprop  = sum(y_backprop)
     // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + epsilon))
     // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon))
-
-    auto y_backprop_rest_by_depth =
-        y_backprop.reshape(rest_by_depth).template cast<U>();
-    auto input_rest_by_depth = input.reshape(rest_by_depth).template cast<U>();
-
-    offset_backprop.device(d) = y_backprop_rest_by_depth.sum(reduction_axis);
+    offset_backprop.device(d) = y_backprop.reshape(rest_by_depth)
+                                    .template cast<U>()
+                                    .sum(reduction_axis);
 
     // scratch1 = rsqrt(pop_var + epsilon)
     scratch1.device(d) = (pop_var + pop_var.constant(epsilon)).rsqrt();
 
     // scratch2 = sum(y_backprop * (x - mean))
     scratch2.device(d) =
-        (y_backprop_rest_by_depth *
-         (input_rest_by_depth -
+        (y_backprop.reshape(rest_by_depth).template cast<U>() *
+         (input.reshape(rest_by_depth).template cast<U>() -
           pop_mean.reshape(one_by_depth).broadcast(rest_by_one)))
             .sum(reduction_axis);
 
     x_backprop.reshape(rest_by_depth).device(d) =
-        (y_backprop_rest_by_depth * ((scratch1 * scale)
-                                         .eval()
-                                         .reshape(one_by_depth)
-                                         .broadcast(rest_by_one)))
+        (y_backprop.reshape(rest_by_depth).template cast<U>() *
+         ((scratch1 * scale)
+              .eval()
+              .reshape(one_by_depth)
+              .broadcast(rest_by_one)))
             .template cast<T>();
     scale_backprop.device(d) = scratch2 * scratch1;
   }
diff --git a/tensorflow/core/kernels/lmdb_reader_op.cc b/tensorflow/core/kernels/lmdb_reader_op.cc
index 31a427f2c9..3bb07301b5 100755
--- a/tensorflow/core/kernels/lmdb_reader_op.cc
+++ b/tensorflow/core/kernels/lmdb_reader_op.cc
@@ -36,7 +36,7 @@ class LMDBReader : public ReaderBase {
 
   Status OnWorkStartedLocked() override {
     MDB_CHECK(mdb_env_create(&mdb_env_));
-    int flags = MDB_RDONLY | MDB_NOTLS | MDB_NOLOCK;
+    int flags = MDB_RDONLY | MDB_NOTLS;
 
     // Check if the LMDB filename is actually a file instead of a directory.
     // If so, set appropriate flags so we can open it.
@@ -57,13 +57,10 @@ class LMDBReader : public ReaderBase {
     if (mdb_env_ != nullptr) {
       if (mdb_cursor_) {
         mdb_cursor_close(mdb_cursor_);
-        mdb_cursor_ = nullptr;
       }
-      mdb_dbi_close(mdb_env_, mdb_dbi_);
       mdb_txn_abort(mdb_txn_);
+      mdb_dbi_close(mdb_env_, mdb_dbi_);
       mdb_env_close(mdb_env_);
-      mdb_txn_ = nullptr;
-      mdb_dbi_ = 0;
       mdb_env_ = nullptr;
     }
     return Status::OK();
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index 157ce106ce..e2cf605811 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/maxpooling_op.h"
 
 #include <vector>
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -33,11 +34,9 @@ limitations under the License.
 #include "tensorflow/core/kernels/pooling_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
-#include "tensorflow/core/util/env_var.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #if GOOGLE_CUDA
 #include "tensorflow/core/kernels/maxpooling_op_gpu.h"
@@ -359,7 +358,6 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
 
     use_dnn_ = CanUseCudnn();
-    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -407,7 +405,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
       DnnPoolingGradOp<T>::Compute(
           context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize,
           stride, padding_, data_format_, &tensor_in, &tensor_out, out_backprop,
-          output_shape, propagate_nans_);
+          output_shape);
     } else {
       CHECK(data_format_ == FORMAT_NHWC)
           << "Non-Cudnn MaxPoolGrad only supports NHWC format";
@@ -422,7 +420,6 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
   bool use_dnn_;
-  bool propagate_nans_;
 };
 
 #endif  // GOOGLE_CUDA
@@ -887,8 +884,6 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
     OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
-
-    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -907,15 +902,14 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
     Tensor* argmax = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, out_shape, &argmax));
 
-    LaunchMaxPoolingWithArgmax<Device, T>::launch(
-        context, params, tensor_in, output, argmax, propagate_nans_);
+    LaunchMaxPoolingWithArgmax<Device, T>::launch(context, params, tensor_in,
+                                                  output, argmax);
   }
 
  private:
   std::vector<int32> ksize_;
   std::vector<int32> stride_;
   Padding padding_;
-  bool propagate_nans_;
 };
 
 template <typename Device, typename T>
@@ -1051,8 +1045,6 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
     use_dnn_ = CanUseCudnn();
-
-    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1076,10 +1068,9 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
 
     // These is_int8x4 checks avoid linker errors for missing qint8 kernels.
     if (!is_int8x4 && use_dnn_ && data_format_ == FORMAT_NCHW) {
-      DnnPoolingOp<T>::Compute(context,
-                               perftools::gputools::dnn::PoolingMode::kMaximum,
-                               ksize_, stride_, padding_, data_format_,
-                               tensor_in, out_shape, propagate_nans_);
+      DnnPoolingOp<T>::Compute(
+          context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_,
+          stride_, padding_, data_format_, tensor_in, out_shape);
     } else {
       Tensor* output = nullptr;
       OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
@@ -1088,7 +1079,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
                                                            tensor_in, output);
       } else if (data_format_ == FORMAT_NHWC) {
         LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in,
-                                                  output, propagate_nans_);
+                                                  output);
       } else {
         LOG(FATAL) << "MaxPool currently only supports the following (layout, "
                       "type) combinations: (NHWC, non-qint8), "
@@ -1107,7 +1098,6 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
   bool use_dnn_;
-  bool propagate_nans_;
 };
 
 template <typename T>
@@ -1137,7 +1127,6 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
     }
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
     use_dnn_ = CanUseCudnn();
-    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1179,17 +1168,16 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
         ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height,
                         params.out_width, params.depth);
     if (use_dnn_ && data_format_ == FORMAT_NCHW) {
-      DnnPoolingOp<T>::Compute(context,
-                               perftools::gputools::dnn::PoolingMode::kMaximum,
-                               ksize, stride, padding_, data_format_, tensor_in,
-                               out_shape, propagate_nans_);
+      DnnPoolingOp<T>::Compute(
+          context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize,
+          stride, padding_, data_format_, tensor_in, out_shape);
     } else {
       CHECK(data_format_ == FORMAT_NHWC)
           << "Non-Cudnn MaxPool only supports NHWC format";
       Tensor* output = nullptr;
       OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
       LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in,
-                                                output, propagate_nans_);
+                                                output);
     }
   }
 
@@ -1199,20 +1187,18 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
   bool use_dnn_;
-  bool propagate_nans_;
 };
 
 template <typename T>
 struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
   static void launch(OpKernelContext* context, const PoolParameters& params,
-                     const Tensor& input, Tensor* output, bool propagate_nans) {
+                     const Tensor& input, Tensor* output) {
     bool status = functor::MaxPoolForwardWithOptionalArgmax<T>()(
         input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows,
         params.tensor_in_cols, params.depth, params.out_height,
         params.out_width, params.window_rows, params.window_cols,
         params.row_stride, params.col_stride, params.pad_rows, params.pad_cols,
-        output->flat<T>().data(), nullptr, context->eigen_gpu_device(),
-        propagate_nans);
+        output->flat<T>().data(), nullptr, context->eigen_gpu_device());
     if (!status) {
       context->SetStatus(
           errors::Internal("Failed launching MaxPoolForwardNoMask"));
@@ -1223,8 +1209,7 @@ struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
 template <typename T>
 struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
   static void launch(OpKernelContext* context, const PoolParameters& params,
-                     const Tensor& input, Tensor* output, Tensor* argmax,
-                     bool propagate_nans) {
+                     const Tensor& input, Tensor* output, Tensor* argmax) {
     bool status = functor::MaxPoolForwardWithOptionalArgmax<T>()(
         input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows,
         params.tensor_in_cols, params.depth, params.out_height,
@@ -1232,7 +1217,7 @@ struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
         params.row_stride, params.col_stride, params.pad_rows, params.pad_cols,
         output->flat<T>().data(),
         reinterpret_cast<int64*>(argmax->flat<int64>().data()),
-        context->eigen_gpu_device(), propagate_nans);
+        context->eigen_gpu_device());
     if (!status) {
       context->SetStatus(
           errors::Internal("Failed launching MaxPoolForwardWithArgmax"));
diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
index d96b844383..26f5274804 100644
--- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
@@ -29,15 +29,6 @@ limitations under the License.
 
 namespace tensorflow {
 namespace {
-template <bool propagate_nans, typename dtype>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool IsGreaterThan(dtype a, dtype b) {
-  if (propagate_nans) {
-    return !(a <= b);
-  } else {
-    return a > b;
-  }
-}
-
 // This is Yangqing's custom kernel for the maxpooling operation. There are
 // three functions: MaxPoolForwardNCHW and MaxPoolForwardNHWC are the two
 // forward functions, dealing with the forward case. MaxPoolBackward is the
@@ -60,7 +51,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool IsGreaterThan(dtype a, dtype b) {
 // const int output_size = batch * channels * pooled_height * pooled_width;
 // MaxPoolForwardNCHW<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
 //                      kThreadsPerBlock, 0, cuda_stream>>>(...);
-template <bool propagate_nans, typename dtype>
+template <typename dtype>
 __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data,
                                    const int channels, const int height,
                                    const int width, const int pooled_height,
@@ -86,7 +77,7 @@ __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data,
     for (int h = hstart; h < hend; ++h) {
       for (int w = wstart; w < wend; ++w) {
         int idx = c * height * width + h * width + w;
-        if (IsGreaterThan<propagate_nans>(bottom_data_n[idx], maxval)) {
+        if (bottom_data_n[idx] > maxval) {
           maxidx = idx;
           maxval = bottom_data_n[idx];
         }
@@ -135,7 +126,7 @@ __global__ void MaxPoolForwardNoMaskKernel_NCHW_VECT_C(
   }
 }
 
-template <bool propagate_nans, typename dtype>
+template <typename dtype>
 __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data,
                                    const int height, const int width,
                                    const int channels, const int pooled_height,
@@ -162,7 +153,7 @@ __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data,
     for (int h = hstart; h < hend; ++h) {
       for (int w = wstart; w < wend; ++w) {
         int idx = (h * width + w) * channels + c;
-        if (IsGreaterThan<propagate_nans>(bottom_data_n[idx], maxval)) {
+        if (bottom_data_n[idx] > maxval) {
           maxidx = idx;
           maxval = bottom_data_n[idx];
         }
@@ -399,24 +390,15 @@ bool MaxPoolForwardWithOptionalArgmax<T>::operator()(
     const int channels, const int pooled_height, const int pooled_width,
     const int kernel_h, const int kernel_w, const int stride_h,
     const int stride_w, const int pad_t, const int pad_l, T* top_data,
-    int64* mask, const Eigen::GpuDevice& d, bool propagate_nans) {
+    int64* mask, const Eigen::GpuDevice& d) {
   const int kThreadsPerBlock = 1024;
   const int output_size = batch * channels * pooled_height * pooled_width;
-  if (propagate_nans) {
-    MaxPoolForwardNHWC<true>
-        <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
-           kThreadsPerBlock, 0, d.stream()>>>
-        (output_size, bottom_data, height, width, channels, pooled_height,
-         pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
-         top_data, mask);
-  } else {
-    MaxPoolForwardNHWC<false>
-        <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
-           kThreadsPerBlock, 0, d.stream()>>>
-        (output_size, bottom_data, height, width, channels, pooled_height,
-         pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
-         top_data, mask);
-  }
+
+  MaxPoolForwardNHWC<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
+                       kThreadsPerBlock, 0, d.stream()>>>(
+      output_size, bottom_data, height, width, channels, pooled_height,
+      pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+      top_data, mask);
   return d.ok();
 }
 
diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.h b/tensorflow/core/kernels/maxpooling_op_gpu.h
index 38ebb34248..34203797cf 100644
--- a/tensorflow/core/kernels/maxpooling_op_gpu.h
+++ b/tensorflow/core/kernels/maxpooling_op_gpu.h
@@ -39,7 +39,7 @@ struct MaxPoolForwardWithOptionalArgmax {
                   const int pooled_width, const int kernel_h,
                   const int kernel_w, const int stride_h, const int stride_w,
                   const int pad_t, const int pad_l, T* top_data, int64* mask,
-                  const Eigen::GpuDevice& d, bool propagate_nans);
+                  const Eigen::GpuDevice& d);
 };
 
 struct MaxPoolForwardNoMask_NCHW_VECT_C {
diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index f291281108..9080bf7be8 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -45,12 +45,12 @@ limitations under the License.
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::stream;
 using mkldnn::prop_kind;
+using mkldnn::stream;
 
-using mkldnn::convolution_forward;
 using mkldnn::convolution_backward_weights;
 using mkldnn::convolution_direct;
+using mkldnn::convolution_forward;
 
 #endif
 
@@ -463,12 +463,13 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
 
       // Generate input shapes.
       TensorShape filter_shape;
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()),
-        errors::InvalidArgument(
+      OP_REQUIRES(
+          context, TensorShapeUtils::IsVector(filter_tensor.shape()),
+          errors::InvalidArgument(
               "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ",
               filter_tensor.dims()));
       OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                        filter_tensor.vec<int32>(), &filter_shape));
+                                  filter_tensor.vec<int32>(), &filter_shape));
       TensorShape input_shape = input_tensor.shape();
       TensorShape obp_shape = obp_tensor.shape();
 
@@ -480,27 +481,26 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
 
       // Get forward convolution parameters.
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
+      conv_utl.GetConvFwdSizesInMklOrder(
+          input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims,
+          &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l,
+          &padding_r);
       if (!context->status().ok()) return;
 
       // Create Convolution forward descriptor since Convolution backward
       // API needs it. For that, we first need to create input, filter
       // and output memory descriptors.
       auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                        memory::format::hwio);
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+      auto fwd_src_md =
+          memory::desc(fwd_input_dims, MklDnnType<T>(), mkl_data_format);
+      auto fwd_filter_md =
+          memory::desc(fwd_filter_dims, MklDnnType<T>(), memory::format::hwio);
+      auto fwd_out_md =
+          memory::desc(fwd_output_dims, MklDnnType<T>(), mkl_data_format);
+      auto fwd_desc = convolution_forward::desc(
+          prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md,
+          fwd_out_md, strides, padding_l, padding_r,
+          TFPaddingToMklDnnPadding(padding_));
       auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
 
       // Allocate output tensor and shape
@@ -537,23 +537,22 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
       output.SetOpMemDesc(bwd_output_dims, memory::format::any);
 
       // Create convolution backward weights primitive.
-      auto bwd_desc = convolution_backward_weights::desc(convolution_direct,
-                          input.GetOpMemDesc(), output.GetOpMemDesc(),
-                          outbackprop.GetOpMemDesc(), strides, padding_l,
-                          padding_r, TFPaddingToMklDnnPadding(padding_));
+      auto bwd_desc = convolution_backward_weights::desc(
+          convolution_direct, input.GetOpMemDesc(), output.GetOpMemDesc(),
+          outbackprop.GetOpMemDesc(), strides, padding_l, padding_r,
+          TFPaddingToMklDnnPadding(padding_));
 
-      auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
-                                                              cpu_engine,
-                                                              fwd_pd);
+      auto bwd_pd = convolution_backward_weights::primitive_desc(
+          bwd_desc, cpu_engine, fwd_pd);
 
       PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
+    } catch (mkldnn::error& e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + string(e.message) + ", in file " +
+                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      OP_REQUIRES_OK(
+          context,
+          errors::Aborted("Operation received an exception:", error_msg));
     }
   }
 
@@ -564,9 +563,8 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
-                  const convolution_backward_weights::primitive_desc& conv_pd,
-                  MklDnnData<T>* input, MklDnnData<T>* obp,
-                  MklDnnData<T>* output) {
+      const convolution_backward_weights::primitive_desc& conv_pd,
+      MklDnnData<T>* input, MklDnnData<T>* obp, MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
@@ -577,10 +575,10 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
     // output side, we will prepare reorder primitive in case output
     // reorder to user memory is required.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_pd.diff_weights_primitive_desc());
+        conv_pd.diff_weights_primitive_desc());
 
-    net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
-                                    obp->GetOpMem(), output->GetOpMem()));
+    net.push_back(convolution_backward_weights(
+        conv_pd, input->GetOpMem(), obp->GetOpMem(), output->GetOpMem()));
 
     // Insert reorder primitive in the net for output reorder if reorder is
     // required.
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index 4a47d0463e..4b6bf92e42 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -23,6 +23,8 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 #include <algorithm>
 #include <vector>
+#include "mkl_dnn.h"
+#include "mkl_dnn_types.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -41,18 +43,16 @@ limitations under the License.
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
 #include "tensorflow/core/util/work_sharder.h"
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::stream;
 using mkldnn::prop_kind;
+using mkldnn::stream;
 
-using mkldnn::convolution_forward;
-using mkldnn::convolution_direct;
 using mkldnn::convolution_backward_data;
+using mkldnn::convolution_direct;
+using mkldnn::convolution_forward;
 #endif
 
 namespace tensorflow {
@@ -397,12 +397,13 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 
       // Generate input shape.
       TensorShape input_shape;
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
-        errors::InvalidArgument(
+      OP_REQUIRES(
+          context, TensorShapeUtils::IsVector(input_tensor.shape()),
+          errors::InvalidArgument(
               "Conv2DBackpropInput: input_sizes input must be 1-dim, not ",
               input_tensor.dims()));
       OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                        input_tensor.vec<int32>(), &input_shape));
+                                  input_tensor.vec<int32>(), &input_shape));
       TensorShape filter_shape = filter_tensor.shape();
       TensorShape obp_shape = obp_tensor.shape();
 
@@ -414,27 +415,26 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 
       // Get forward convolution parameters.
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
+      conv_utl.GetConvFwdSizesInMklOrder(
+          input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims,
+          &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l,
+          &padding_r);
       if (!context->status().ok()) return;
 
       // Create Convolution forward descriptor since Convolution backward
       // API needs it. For that, we first need to create input, filter
       // and output memory descriptors.
       auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                        memory::format::hwio);
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+      auto fwd_src_md =
+          memory::desc(fwd_input_dims, MklDnnType<T>(), mkl_data_format);
+      auto fwd_filter_md =
+          memory::desc(fwd_filter_dims, MklDnnType<T>(), memory::format::hwio);
+      auto fwd_out_md =
+          memory::desc(fwd_output_dims, MklDnnType<T>(), mkl_data_format);
+      auto fwd_desc = convolution_forward::desc(
+          prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md,
+          fwd_out_md, strides, padding_l, padding_r,
+          TFPaddingToMklDnnPadding(padding_));
       auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
 
       // Allocate output tensor and shape
@@ -475,23 +475,22 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
       output.SetOpMemDesc(bwd_output_dims, memory::format::any);
 
       // Create convolution backward data primitive.
-      auto bwd_desc = convolution_backward_data::desc(convolution_direct,
-                          output.GetOpMemDesc(), filter.GetOpMemDesc(),
-                          outbackprop.GetOpMemDesc(), strides, padding_l,
-                          padding_r, TFPaddingToMklDnnPadding(padding_));
+      auto bwd_desc = convolution_backward_data::desc(
+          convolution_direct, output.GetOpMemDesc(), filter.GetOpMemDesc(),
+          outbackprop.GetOpMemDesc(), strides, padding_l, padding_r,
+          TFPaddingToMklDnnPadding(padding_));
 
-      auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
-                                                              cpu_engine,
-                                                              fwd_pd);
+      auto bwd_pd = convolution_backward_data::primitive_desc(
+          bwd_desc, cpu_engine, fwd_pd);
 
       PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
+    } catch (mkldnn::error& e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + string(e.message) + ", in file " +
+                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      OP_REQUIRES_OK(
+          context,
+          errors::Aborted("Operation received an exception:", error_msg));
     }
   }
 
@@ -502,9 +501,8 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
-                  const convolution_backward_data::primitive_desc& conv_pd,
-                  MklDnnData<T>* filter, MklDnnData<T>* obp,
-                  MklDnnData<T>* output) {
+      const convolution_backward_data::primitive_desc& conv_pd,
+      MklDnnData<T>* filter, MklDnnData<T>* obp, MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
@@ -514,11 +512,11 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
     // Memory for output of convolution. Since we may need reorder on the
     // output side, we will prepare reorder primitive in case output
     // reorder to user memory is required.
-    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_pd.diff_src_primitive_desc());
+    bool output_reorder_required =
+        output->PrepareReorderToUserMemIfReq(conv_pd.diff_src_primitive_desc());
 
-    net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(),
-                                    filter->GetOpMem(), output->GetOpMem()));
+    net.push_back(convolution_backward_data(
+        conv_pd, obp->GetOpMem(), filter->GetOpMem(), output->GetOpMem()));
 
     // Insert reorder primitive in the net for output reorder if reorder is
     // required.
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index a9872b8d6d..369f632fb4 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <string.h>
 #include <map>
-#include <vector>
 #include <string>
+#include <vector>
 
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -46,11 +46,11 @@ limitations under the License.
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::stream;
 using mkldnn::prop_kind;
+using mkldnn::stream;
 
-using mkldnn::convolution_forward;
 using mkldnn::convolution_direct;
+using mkldnn::convolution_forward;
 #endif
 
 namespace tensorflow {
@@ -523,19 +523,16 @@ class MklConv2DOp : public OpKernel {
 
       // Get shapes of input tensors in MKL-DNN order
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(),
-                                         filter_tensor.shape(),
-                                         &src_dims, &filter_dims, &strides,
-                                         &output_dims_tf_order,
-                                         &output_dims_mkl_order, &padding_l,
-                                         &padding_r);
+      conv_utl.GetConvFwdSizesInMklOrder(
+          src_tensor.shape(), filter_tensor.shape(), &src_dims, &filter_dims,
+          &strides, &output_dims_tf_order, &output_dims_mkl_order, &padding_l,
+          &padding_r);
       if (!context->status().ok()) return;
 
       // Check for corner case - if there is nothing to compute, return.
-      TensorShape tf_output_shape({output_dims_tf_order[0],
-                                output_dims_tf_order[1],
-                                output_dims_tf_order[2],
-                                output_dims_tf_order[3]});
+      TensorShape tf_output_shape(
+          {output_dims_tf_order[0], output_dims_tf_order[1],
+           output_dims_tf_order[2], output_dims_tf_order[3]});
       Tensor* output_tensor = nullptr;
       MklShape mkl_output_mkl_shape;
       mkl_output_mkl_shape.SetMklTensor(false);
@@ -572,13 +569,13 @@ class MklConv2DOp : public OpKernel {
       // the layout is Tensorflow's layout (NHWC or NCHW depending on data
       // format).
       src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_),
-                    const_cast<void*>(static_cast<const void*>(
-                    src_tensor.flat<T>().data())));
+                    const_cast<void*>(
+                        static_cast<const void*>(src_tensor.flat<T>().data())));
       // Although filter shape (filter_dims) required is in MKL-DNN order,
       // the layout is Tensorflow's layout (HWIO).
       filter.SetUsrMem(filter_dims, memory::format::hwio,
                        const_cast<void*>(static_cast<const void*>(
-                       filter_tensor.flat<T>().data())));
+                           filter_tensor.flat<T>().data())));
       // Although output shape (output_dims) required is in MKL-DNN order,
       // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
       output.SetUsrMem(output_dims_mkl_order,
@@ -598,36 +595,36 @@ class MklConv2DOp : public OpKernel {
         const Tensor& bias_tensor = MklGetInput(context, 2);
         bias.SetUsrMem(bias_size, memory::format::x,
                        const_cast<void*>(static_cast<const void*>(
-                       bias_tensor.flat<T>().data())));
+                           bias_tensor.flat<T>().data())));
         bias.SetOpMemDesc(bias_size, memory::format::any);
 
         // Create convolution primitive with Bias.
-        auto conv_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(),
-            bias.GetOpMemDesc(), output.GetOpMemDesc(), strides,
-            padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+        auto conv_desc = convolution_forward::desc(
+            prop_kind::forward, convolution_direct, src.GetOpMemDesc(),
+            filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(),
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
 
-        auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
-                                                                cpu_engine);
+        auto conv_prim_desc =
+            convolution_forward::primitive_desc(conv_desc, cpu_engine);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output);
       } else {
         // Create convolution primitive without Bias.
-        auto conv_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(),
-            output.GetOpMemDesc(), strides, padding_l, padding_r,
-            TFPaddingToMklDnnPadding(padding_));
+        auto conv_desc = convolution_forward::desc(
+            prop_kind::forward, convolution_direct, src.GetOpMemDesc(),
+            filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l,
+            padding_r, TFPaddingToMklDnnPadding(padding_));
 
-        auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
-                                                                cpu_engine);
+        auto conv_prim_desc =
+            convolution_forward::primitive_desc(conv_desc, cpu_engine);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output);
       }
-    } catch (mkldnn::error &e) {
+    } catch (mkldnn::error& e) {
       string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + std::string(e.message) +
-                       ", in file " + std::string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-      OP_REQUIRES_OK(context,
-        errors::Aborted("Operation received an exception:", error_msg));
+                         ", message: " + std::string(e.message) + ", in file " +
+                         std::string(__FILE__) + ":" + std::to_string(__LINE__);
+      OP_REQUIRES_OK(
+          context,
+          errors::Aborted("Operation received an exception:", error_msg));
     }
   }
 
@@ -638,9 +635,9 @@ class MklConv2DOp : public OpKernel {
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecuteNet(
-                  const convolution_forward::primitive_desc& conv_prim_desc,
-                  MklDnnData<T>* src, MklDnnData<T>* filter,
-                  MklDnnData<T>* bias, MklDnnData<T>* output) {
+      const convolution_forward::primitive_desc& conv_prim_desc,
+      MklDnnData<T>* src, MklDnnData<T>* filter, MklDnnData<T>* bias,
+      MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
@@ -651,18 +648,19 @@ class MklConv2DOp : public OpKernel {
     // output side, we will prepare reorder primitive in case output
     // reorder to user memory is required.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_prim_desc.dst_primitive_desc());
+        conv_prim_desc.dst_primitive_desc());
 
     // Create convolution primitive and add it to net.
     if (bias) {
       CHECK_EQ(biasEnabled, true);
       net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(),
-                                    filter->GetOpMem(), bias->GetOpMem(),
-                                    output->GetOpMem()));
+                                        filter->GetOpMem(), bias->GetOpMem(),
+                                        output->GetOpMem()));
     } else {
       CHECK_EQ(biasEnabled, false);
       net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(),
-                                    filter->GetOpMem(), output->GetOpMem()));
+                                        filter->GetOpMem(),
+                                        output->GetOpMem()));
     }
 
     // Insert reorder primitive in the net for output reorder if reorder is
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index f0cb37f8a4..e29af19ca9 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
 #define TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
 
-#include <vector>
 #include <limits>
+#include <vector>
 
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -26,8 +26,8 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_slice.h"
 #include "tensorflow/core/kernels/bounds_check.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/kernels/conv_grad_ops.h"
+#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/lib/strings/numbers.h"
@@ -49,15 +49,15 @@ namespace tensorflow {
 
 class MklDnnConvUtil {
  protected:
-  OpKernelContext* context_;  // We don't own this.
+  OpKernelContext *context_;  // We don't own this.
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
 
  public:
-  MklDnnConvUtil(OpKernelContext* context, const std::vector<int32>& strides,
-                 Padding pad, TensorFormat fm) : context_(context),
-    strides_(strides), padding_(pad), data_format_(fm) {}
+  MklDnnConvUtil(OpKernelContext *context, const std::vector<int32> &strides,
+                 Padding pad, TensorFormat fm)
+      : context_(context), strides_(strides), padding_(pad), data_format_(fm) {}
 
   virtual ~MklDnnConvUtil() { context_ = nullptr; }
 
@@ -75,14 +75,14 @@ class MklDnnConvUtil {
   // requires input in NCHW format. Function does not return anything.
   // But errors arising from sanity checks are returned in context's
   // status.
-  virtual inline void
-  GetInputSizeInMklOrder(const TensorShape& input_shape,
-                         memory::dims *input_dims) {
-  #define CHECK_BOUNDS(val, err_msg) do {                     \
-    OP_REQUIRES(context_, FastBoundsCheck(val,                \
-                            std::numeric_limits<int>::max()), \
-                errors::InvalidArgument(err_msg));            \
-  }while(0)
+  virtual inline void GetInputSizeInMklOrder(const TensorShape &input_shape,
+                                             memory::dims *input_dims) {
+#define CHECK_BOUNDS(val, err_msg)                                     \
+  do {                                                                 \
+    OP_REQUIRES(context_,                                              \
+                FastBoundsCheck(val, std::numeric_limits<int>::max()), \
+                errors::InvalidArgument(err_msg));                     \
+  } while (0)
 
     CHECK_NOTNULL(input_dims);
 
@@ -105,7 +105,7 @@ class MklDnnConvUtil {
     CHECK_BOUNDS(input_batch_raw, "Input batch too large");
     int input_batch = static_cast<int>(input_batch_raw);
 
-  #undef CHECK_BOUNDS
+#undef CHECK_BOUNDS
 
     // MKL-DNN always requires input in NCHW format.
     *input_dims = {input_batch, input_depth, input_rows, input_cols};
@@ -125,10 +125,9 @@ class MklDnnConvUtil {
   // forward gets actual tensor as input).
   //
   // TODO(nhasabni): Add similar function for input and filter in MklShape.
-  virtual inline void
-  GetFilterSizeInMklOrder(const TensorShape& input_shape,
-                          const TensorShape& filter_shape,
-                          memory::dims *filter_dims) {
+  virtual inline void GetFilterSizeInMklOrder(const TensorShape &input_shape,
+                                              const TensorShape &filter_shape,
+                                              memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
 
     OP_REQUIRES(context_, filter_shape.dims() == 4,
@@ -136,17 +135,18 @@ class MklDnnConvUtil {
                                         filter_shape.DebugString()));
 
     for (int i = 0; i < 3; i++) {
-      OP_REQUIRES(context_, FastBoundsCheck(filter_shape.dim_size(i),
-                                           std::numeric_limits<int>::max()),
-                errors::InvalidArgument("filter too large"));
+      OP_REQUIRES(context_,
+                  FastBoundsCheck(filter_shape.dim_size(i),
+                                  std::numeric_limits<int>::max()),
+                  errors::InvalidArgument("filter too large"));
     }
 
     int input_depth = GetTensorDim(input_shape, data_format_, 'C');
 
-    OP_REQUIRES(
-        context_, input_depth == filter_shape.dim_size(2),
-        errors::InvalidArgument("input and filter must have the same depth: ",
-                                input_depth, " vs ", filter_shape.dim_size(2)));
+    OP_REQUIRES(context_, input_depth == filter_shape.dim_size(2),
+                errors::InvalidArgument(
+                    "input and filter must have the same depth: ", input_depth,
+                    " vs ", filter_shape.dim_size(2)));
 
     // TF filter is always in (rows, cols, in_depth, out_depth) order.
     int filter_rows = static_cast<int>(filter_shape.dim_size(0));
@@ -163,25 +163,25 @@ class MklDnnConvUtil {
   // requires filter in OIHW format. Function does not return anything.
   // But errors arising from sanity checks are returned in context's
   // status.
-  virtual inline void
-  GetFilterSizeInMklOrder(size_t src_index, size_t filter_index,
-                          memory::dims *filter_dims) {
+  virtual inline void GetFilterSizeInMklOrder(size_t src_index,
+                                              size_t filter_index,
+                                              memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
-    const Tensor& input = MklGetInput(context_, src_index);
-    const Tensor& filter = MklGetInput(context_, filter_index);
+    const Tensor &input = MklGetInput(context_, src_index);
+    const Tensor &filter = MklGetInput(context_, filter_index);
     GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims);
   }
 
   // Calculate Bias size for 2D Convolution. Function does not return
   // anything, but sets error in context status.
-  virtual inline void
-  GetBiasSizeInMklOrder(size_t bias_index, memory::dims *bias_dims) {
-    const Tensor& bias = MklGetInput(context_, bias_index);
+  virtual inline void GetBiasSizeInMklOrder(size_t bias_index,
+                                            memory::dims *bias_dims) {
+    const Tensor &bias = MklGetInput(context_, bias_index);
     OP_REQUIRES(context_, bias.dims() == 1,
                 errors::InvalidArgument("bias must be 1-dimensional: ",
                                         bias.shape().DebugString()));
 
-    *bias_dims = { static_cast<int>(bias.dim_size(0)) };
+    *bias_dims = {static_cast<int>(bias.dim_size(0))};
   }
 
   // Function to calculate output and padding size for 2D convolution.
@@ -193,13 +193,11 @@ class MklDnnConvUtil {
   // status is returned via context status.
   //
   // TODO(nhasabni): Add similar function for input and filter in MklShape.
-  virtual inline void
-  GetOutputAndPadSizeInMklOrder(const TensorShape& input_shape,
-                                const TensorShape& filter_shape,
-                                const memory::dims& strides,
-                                memory::dims *output_dims_tf_order,
-                                memory::dims *output_dims_mkl_order,
-                                memory::dims *pad_l, memory::dims *pad_r) {
+  virtual inline void GetOutputAndPadSizeInMklOrder(
+      const TensorShape &input_shape, const TensorShape &filter_shape,
+      const memory::dims &strides, memory::dims *output_dims_tf_order,
+      memory::dims *output_dims_mkl_order, memory::dims *pad_l,
+      memory::dims *pad_r) {
     CHECK_NOTNULL(output_dims_tf_order);
     CHECK_NOTNULL(output_dims_mkl_order);
     CHECK_NOTNULL(pad_l);
@@ -225,21 +223,21 @@ class MklDnnConvUtil {
     int64 out_rows = 0, out_cols = 0;
     int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right;
 
-    OP_REQUIRES_OK(context_,
-            GetWindowedOutputSizeVerbose(input_rows, filter_rows, stride_rows,
-                                 padding_, &out_rows, &pad_top, &pad_bottom));
-    OP_REQUIRES_OK(context_,
-            GetWindowedOutputSizeVerbose(input_cols, filter_cols, stride_cols,
-                                 padding_, &out_cols, &pad_left, &pad_right));
+    OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose(
+                                 input_rows, filter_rows, stride_rows, padding_,
+                                 &out_rows, &pad_top, &pad_bottom));
+    OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose(
+                                 input_cols, filter_cols, stride_cols, padding_,
+                                 &out_cols, &pad_left, &pad_right));
 
     // Tensorflow output is in data_format order. (NHWC or NCHW)
-    TensorShape out_shape = ShapeFromFormat(data_format_, out_batch,
-                                            out_rows, out_cols, out_depth);
+    TensorShape out_shape =
+        ShapeFromFormat(data_format_, out_batch, out_rows, out_cols, out_depth);
     *output_dims_tf_order = TFShapeToMklDnnDims(out_shape);
 
     // MKL-DNN always needs output in NCHW format.
     *output_dims_mkl_order = {out_batch, out_depth, static_cast<int>(out_rows),
-                   static_cast<int>(out_cols)};
+                              static_cast<int>(out_cols)};
 
     // Now handle padding. MKL-DNN uses asymetric padding.
     *pad_l = {static_cast<int>(pad_top), static_cast<int>(pad_left)};
@@ -250,27 +248,25 @@ class MklDnnConvUtil {
   // See comment on GetConvOutputAndPadSizeInMklOrder for parameters.
   //
   // Function does not return anything, but sets error in context status.
-  inline void
-  GetOutputAndPadSizeInMklOrder(size_t src_index, size_t filter_index,
-                                const memory::dims& strides,
-                                memory::dims *output_dims_tf_order,
-                                memory::dims *output_dims_mkl_order,
-                                memory::dims *pad_l, memory::dims *pad_r) {
+  inline void GetOutputAndPadSizeInMklOrder(
+      size_t src_index, size_t filter_index, const memory::dims &strides,
+      memory::dims *output_dims_tf_order, memory::dims *output_dims_mkl_order,
+      memory::dims *pad_l, memory::dims *pad_r) {
     CHECK_NOTNULL(output_dims_tf_order);
     CHECK_NOTNULL(output_dims_mkl_order);
     CHECK_NOTNULL(pad_l);
     CHECK_NOTNULL(pad_r);
 
-    const Tensor& input = MklGetInput(context_, src_index);
-    const Tensor& filter = MklGetInput(context_, filter_index);
+    const Tensor &input = MklGetInput(context_, src_index);
+    const Tensor &filter = MklGetInput(context_, filter_index);
 
     OP_REQUIRES(context_, input.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
-                                          input.shape().DebugString()));
+                                        input.shape().DebugString()));
 
-    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(),
-                                  strides, output_dims_tf_order,
-                                  output_dims_mkl_order, pad_l, pad_r);
+    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(), strides,
+                                  output_dims_tf_order, output_dims_mkl_order,
+                                  pad_l, pad_r);
   }
 
   // Wrapper function to calculate input, filter, and output sizes of
@@ -279,15 +275,12 @@ class MklDnnConvUtil {
   // also calculates strides and paddings for 2D Convolution.
   //
   // Function does not return anything, but sets error in context status.
-  inline void GetConvFwdSizesInMklOrder(const TensorShape& input_shape,
-                                        const TensorShape& filter_shape,
-                                        memory::dims *input_dims,
-                                        memory::dims *filter_dims,
-                                        memory::dims *strides,
-                                        memory::dims *output_dims_tf_order,
-                                        memory::dims *output_dims_mkl_order,
-                                        memory::dims *pad_l,
-                                        memory::dims *pad_r) {
+  inline void GetConvFwdSizesInMklOrder(
+      const TensorShape &input_shape, const TensorShape &filter_shape,
+      memory::dims *input_dims, memory::dims *filter_dims,
+      memory::dims *strides, memory::dims *output_dims_tf_order,
+      memory::dims *output_dims_mkl_order, memory::dims *pad_l,
+      memory::dims *pad_r) {
     CHECK_NOTNULL(input_dims);
     CHECK_NOTNULL(filter_dims);
     CHECK_NOTNULL(strides);
@@ -302,8 +295,7 @@ class MklDnnConvUtil {
     if (!context_->status().ok()) return;
     GetStridesInMklOrder(strides);
     GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides,
-                                  output_dims_tf_order,
-                                  output_dims_mkl_order,
+                                  output_dims_tf_order, output_dims_mkl_order,
                                   pad_l, pad_r);
     if (!context_->status().ok()) return;
   }
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h
index 0a5be4fec9..a240ee44fb 100644
--- a/tensorflow/core/kernels/mkl_tfconv_op.h
+++ b/tensorflow/core/kernels/mkl_tfconv_op.h
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#ifdef INTEL_MKL
+
 #ifndef TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
 #define TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
 
-#ifdef INTEL_MKL
-
 #include <algorithm>
 #include <vector>
 #include "tensorflow/core/framework/numeric_op.h"
@@ -35,10 +35,6 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-#ifdef INTEL_MKL_DNN
-using mkldnn::stream;
-#endif
-
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
@@ -61,71 +57,6 @@ class MklToTfOp : public OpKernel {
     VLOG(1) << "MKLToTFConversion complete successfully.";
   }
 
-#ifdef INTEL_MKL_DNN
-  static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context,
-                             string data_format_str, DataType op_data_type,
-                             bool has_avx512f, uint input_number) {
-    try {
-      // Check that input tensor is in MKL format.
-      const Tensor& input_tensor = MklGetInput(context, input_number);
-      MklDnnShape input_shape;
-      GetMklShape(context, input_number, &input_shape);
-
-      // if input is already in Tf format, then copy input tensor to output.
-      if (!input_shape.IsMklTensor()) {
-        context->set_output(input_number, input_tensor);
-        VLOG(1) << "MKLToTFConversion: No conversion needed, "
-                << "copying input to output";
-        return;
-      }
-
-      // Check that input data type is same as operator data type and that it
-      // is same as output data type.
-      DataType input_data_type = op_kernel->input_type(input_number);
-      DataType output_data_type = op_kernel->output_type(input_number);
-      CHECK_EQ(op_data_type, input_data_type);
-      CHECK_EQ(op_data_type, output_data_type);
-
-      auto cpu_engine = engine(engine::cpu, 0);
-      MklDnnData<T> input(&cpu_engine);
-
-      // Get Mkl layout of input tensor.
-      auto input_mkl_md = input_shape.GetMklLayout();
-      // Get TensorFlow layout of input tensor. Expected output of conversion
-      // has same layout as Tensorflow layout of input tensor.
-      auto output_tf_md = input_shape.GetTfLayout();
-      auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine);
-      // Set input Mkl layout as the user layout.
-      input.SetUsrMem(input_mkl_md, &input_tensor);
-
-      // Allocate output tensor.
-      TensorShape output_shape = input_shape.GetTfShape();
-      Tensor* output_tensor = NULL;
-      OP_REQUIRES_OK(context, context->allocate_output(input_number,
-                                  output_shape, &output_tensor));
-      CHECK_NOTNULL(output_tensor);
-
-      // Do we need to reorder Mkl layout into TensorFlow layout?
-      if (input.IsReorderNeeded(output_tf_pd)) {
-        // Insert reorder between Mkl layout and TensorFlow layout.
-        std::vector<primitive> net;
-        CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, output_tensor, &net),
-                 true);
-        stream(stream::kind::eager).submit(net).wait();
-      } else {
-        // If not, just forward input tensor to output tensor.
-        CHECK(output_tensor->CopyFrom(input_tensor, output_shape));
-      }
-    } catch (mkldnn::error &e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + std::string(e.message) +
-                       ", in file " + std::string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-      OP_REQUIRES_OK(context,
-        errors::Aborted("Operation received an exception:", error_msg));
-    }
-  }
-#else
   static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context,
                              string data_format_str, DataType op_data_type,
                              bool has_avx512f, uint input_number) {
@@ -160,8 +91,8 @@ class MklToTfOp : public OpKernel {
 
     // Allocate output tensor.
     Tensor* output_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(input_number,
-                              output_shape, &output_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(input_number, output_shape, &output_tensor));
 
     dnnLayout_t output_layout =
         static_cast<dnnLayout_t>(input_shape.GetTfLayout());
@@ -175,7 +106,6 @@ class MklToTfOp : public OpKernel {
                                      output_buffer);
     VLOG(1) << "MKLToTFConversion complete successfully.";
   }
-#endif
 
  private:
   /// Data format of the operation
@@ -202,5 +132,5 @@ class MklToTfOp : public OpKernel {
 TF_CALL_NUMBER_TYPES(REGISTER_CPU);
 #undef REGISTER_CPU
 }  // namespace tensorflow
-#endif  // INTEL_MKL
 #endif  // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
+#endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc
index ac90f67ce0..7dee751c4f 100644
--- a/tensorflow/core/kernels/pooling_ops_common.cc
+++ b/tensorflow/core/kernels/pooling_ops_common.cc
@@ -143,7 +143,7 @@ void DnnPoolingOp<T>::Compute(
     perftools::gputools::dnn::PoolingMode pooling_mode,
     const std::vector<int32>& size, const std::vector<int32>& stride,
     Padding padding, TensorFormat data_format, const Tensor& tensor_in,
-    const TensorShape& tensor_out_shape, bool propagate_nans) {
+    const TensorShape& tensor_out_shape) {
   Tensor* tensor_out = nullptr;
   OP_REQUIRES_OK(context,
                  context->allocate_output(0, tensor_out_shape, &tensor_out));
@@ -188,8 +188,7 @@ void DnnPoolingOp<T>::Compute(
       .set_vertical_stride(params.row_stride)
       .set_horizontal_stride(params.col_stride)
       .set_vertical_padding(params.pad_rows)
-      .set_horizontal_padding(params.pad_cols)
-      .set_propagate_nans(propagate_nans);
+      .set_horizontal_padding(params.pad_cols);
 
   perftools::gputools::dnn::BatchDescriptor input_desc;
   input_desc.set_count(params.tensor_in_batch)
@@ -238,7 +237,7 @@ void DnnPoolingGradOp<T>::Compute(
     const std::vector<int32>& size, const std::vector<int32>& stride,
     Padding padding, TensorFormat data_format, const Tensor* tensor_in,
     const Tensor* tensor_out, const Tensor& out_backprop,
-    const TensorShape& tensor_in_shape, bool propagate_nans) {
+    const TensorShape& tensor_in_shape) {
   CHECK((pooling_mode != perftools::gputools::dnn::PoolingMode::kMaximum) ||
         (tensor_in && tensor_out))
       << "For MaxPoolGrad, both tensor_in and tensor_out needs to be "
@@ -328,8 +327,7 @@ void DnnPoolingGradOp<T>::Compute(
       .set_vertical_stride(params.row_stride)
       .set_horizontal_stride(params.col_stride)
       .set_vertical_padding(params.pad_rows)
-      .set_horizontal_padding(params.pad_cols)
-      .set_propagate_nans(propagate_nans);
+      .set_horizontal_padding(params.pad_cols);
 
   perftools::gputools::dnn::BatchDescriptor orig_output_desc;
   orig_output_desc.set_count(params.tensor_in_batch)
diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h
index 1458456585..b594f39fad 100644
--- a/tensorflow/core/kernels/pooling_ops_common_gpu.h
+++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h
@@ -44,7 +44,7 @@ class DnnPoolingOp {
                       const std::vector<int32>& size,
                       const std::vector<int32>& stride, Padding padding,
                       TensorFormat data_format, const Tensor& tensor_in,
-                      const TensorShape& tensor_out_shape, bool propagate_nans);
+                      const TensorShape& tensor_out_shape);
 };
 
 // A helper class that launch the cudnn pooling backward operations.
@@ -60,7 +60,7 @@ class DnnPoolingGradOp {
                       const std::vector<int32>& stride, Padding padding,
                       TensorFormat data_format, const Tensor* tensor_in,
                       const Tensor* tensor_out, const Tensor& out_backprop,
-                      const TensorShape& tensor_in_shape, bool propagate_nans);
+                      const TensorShape& tensor_in_shape);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc
index 337c8e5c17..8be0c56798 100644
--- a/tensorflow/core/kernels/quantized_add_op.cc
+++ b/tensorflow/core/kernels/quantized_add_op.cc
@@ -489,7 +489,7 @@ class QuantizedAddOp : public OpKernel {
     // adding zero leaves the result unchanged, and to contain the largest of
     // the two input values with some room to spare.
     const float smallest_min = std::min(min_x, min_y);
-    const float largest_max = std::max(max_x, max_y);
+    const float largest_max = std::min(max_x, max_y);
     const float biggest_range =
         std::max(std::abs(smallest_min), std::abs(largest_max));
     const float output_range = (biggest_range * (1 << 14));
diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index 55a8b9c9b6..a37c757865 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -577,7 +577,7 @@ struct FillPhiloxRandomKernel<Distribution, false> {
     const size_t kGroupSize = Distribution::kResultElementCount;
 
     const size_t item_id = item.get_global(0);
-    const size_t total_item_count = item.get_global_range();
+    const size_t total_item_count = item.get_global_range(0);
     size_t offset = item_id * kGroupSize;
     gen_.Skip(item_id);
 
@@ -633,7 +633,7 @@ struct FillPhiloxRandomKernel<Distribution, true> {
                                                 PhiloxRandom::kResultElementCount;
 
     const size_t item_id = item.get_global(0);
-    const size_t total_item_count = item.get_global_range();
+    const size_t total_item_count = item.get_global_range(0);
     size_t group_index = item_id;
     size_t offset = group_index * kGroupSize;
 
diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc
index 2334e50f1d..4302a68a18 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.cc
+++ b/tensorflow/core/kernels/segment_reduction_ops.cc
@@ -376,9 +376,6 @@ struct UnsortedSegmentSumFunctor<CPUDevice, T, Index>
     auto data_flat = typename TTypes<T, 2>::ConstTensor(data, N, data_size / N);
     for (int64 i = 0; i < N; ++i) {
       Index j = internal::SubtleMustCopy(segment_ids(i));
-      if (j < 0) {
-        continue;
-      }
       OP_REQUIRES(ctx, FastBoundsCheck(j, output_rows),
                   errors::InvalidArgument(
                       "segment_ids", SliceDebugString(segment_ids_shape, i),
diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h
index b10bea72ba..412c1d601d 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.h
+++ b/tensorflow/core/kernels/segment_reduction_ops.h
@@ -30,14 +30,14 @@ namespace functor {
 #ifdef GOOGLE_CUDA
 typedef Eigen::GpuDevice GPUDevice;
 // Functor for SegmentSumGPUOp.
-// output_rows: the number of output segments (unique segment ids in
+// 'output_rows': the number of output segments (unique segment ids in
 //                'segment_ids').
-// segment_ids_shape: shape of 'segment_ids' tensor.
-// segment_ids: unsorted map from input to output segment ids at which to
+// 'segment_ids_shape': shape of 'segment_ids' tensor.
+// 'segment_ids': unsorted map from input to output segment ids at which to
 //                perform segment sum operation.
-// data_size: size of input data tensor.
-// data: input data tensor.
-// output: output reshaped to {output_rows, output.size/output_rows}
+// 'data_size': size of input data tensor.
+// 'data': input data tensor.
+// 'output': output reshaped to {output_rows, output.size/output_rows}
 template <typename T, typename Index>
 struct SegmentSumFunctor {
   void operator()(OpKernelContext* ctx, const GPUDevice& d,
@@ -61,14 +61,14 @@ struct UnsortedSegmentBaseFunctor{
 };
 
 // Functor for UnsortedSegmentSumOp.
-// output_rows: the number of output segments (unique segment ids in
+// 'output_rows': the number of output segments (unique segment ids in
 //                'segment_ids').
-// segment_ids_shape: shape of 'segment_ids' tensor.
-// segment_ids: unsorted map from input to output segment ids at which to
+// 'segment_ids_shape': shape of 'segment_ids' tensor.
+// 'segment_ids': unsorted map from input to output segment ids at which to
 //                perform segment sum operation.
-// data_size: size of input data tensor.
-// data: input data tensor.
-// output: output reshaped to {output_rows, output.size/output_rows}
+// 'data_size': size of input data tensor.
+// 'data': input data tensor.
+// 'output': output reshaped to {output_rows, output.size/output_rows}
 template <typename Device, typename T, typename Index>
 struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor<Device, T, Index> {
   void operator()(OpKernelContext* ctx, const Device& d,
@@ -79,14 +79,14 @@ struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor<Device, T, I
 };
 
 // Functor for UnsortedSegmentMaxOp.
-// output_rows: the number of output segments (unique segment ids in
+// 'output_rows': the number of output segments (unique segment ids in
 //                'segment_ids').
-// segment_ids_shape: shape of 'segment_ids' tensor.
-// segment_ids: unsorted map from input to output segment ids at which to
+// 'segment_ids_shape': shape of 'segment_ids' tensor.
+// 'segment_ids': unsorted map from input to output segment ids at which to
 //                perform segment sum operation.
-// data_size: size of input data tensor.
-// data: input data tensor.
-// output: output reshaped to {output_rows, output.size/output_rows}
+// 'data_size': size of input data tensor.
+// 'data': input data tensor.
+// 'output': output reshaped to {output_rows, output.size/output_rows}
 template <typename Device, typename T, typename Index>
 struct UnsortedSegmentMaxFunctor: public UnsortedSegmentBaseFunctor<Device, T, Index> {
   void operator()(OpKernelContext* ctx, const Device& d,
diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc
index 28a39bae3f..721f9b949b 100644
--- a/tensorflow/core/kernels/shape_ops.cc
+++ b/tensorflow/core/kernels/shape_ops.cc
@@ -341,12 +341,7 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                             .Device(DEVICE_CPU)
                             .HostMemory("dim")
                             .TypeConstraint<int32>("Tdim"),
-                        ExpandDimsOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("ExpandDims")
-                            .Device(DEVICE_CPU)
-                            .HostMemory("dim")
-                            .TypeConstraint<int64>("Tdim"),
-                        ExpandDimsOp<int64>);
+                        ExpandDimsOp);
 
 #if GOOGLE_CUDA
 #define REGISTER_GPU_KERNEL(type)                            \
@@ -355,13 +350,7 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                               .TypeConstraint<type>("T")     \
                               .TypeConstraint<int32>("Tdim") \
                               .HostMemory("dim"),            \
-                          ExpandDimsOp<int32>);              \
-  REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
-                              .Device(DEVICE_GPU)            \
-                              .TypeConstraint<type>("T")     \
-                              .TypeConstraint<int64>("Tdim") \
-                              .HostMemory("dim"),            \
-                          ExpandDimsOp<int64>);
+                          ExpandDimsOp);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
 TF_CALL_bool(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
@@ -373,15 +362,7 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                             .HostMemory("input")
                             .HostMemory("dim")
                             .HostMemory("output"),
-                        ExpandDimsOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("ExpandDims")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("Tdim")
-                            .HostMemory("input")
-                            .HostMemory("dim")
-                            .HostMemory("output"),
-                        ExpandDimsOp<int64>);
+                        ExpandDimsOp);
 #endif  // GOOGLE_CUDA
 
 #ifdef TENSORFLOW_USE_SYCL
@@ -391,13 +372,7 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                               .TypeConstraint<type>("T")     \
                               .TypeConstraint<int32>("Tdim") \
                               .HostMemory("dim"),            \
-                          ExpandDimsOp<int32>);              \
-  REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
-                              .Device(DEVICE_SYCL)           \
-                              .TypeConstraint<type>("T")     \
-                              .TypeConstraint<int64>("Tdim") \
-                              .HostMemory("dim"),            \
-                          ExpandDimsOp<int64>);
+                          ExpandDimsOp);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
 TF_CALL_bool(REGISTER_SYCL_KERNEL);
 #undef REGISTER_SYCL_KERNEL
@@ -409,15 +384,7 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                             .HostMemory("input")
                             .HostMemory("dim")
                             .HostMemory("output"),
-                        ExpandDimsOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("ExpandDims")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("Tdim")
-                            .HostMemory("input")
-                            .HostMemory("dim")
-                            .HostMemory("output"),
-                        ExpandDimsOp<int64>);
+                        ExpandDimsOp);
 #endif  // TENSORFLOW_USE_SYCL
 
 // Squeeze ---------------------------------------
diff --git a/tensorflow/core/kernels/shape_ops.h b/tensorflow/core/kernels/shape_ops.h
index 8d9d0ea846..ac607f4e8b 100644
--- a/tensorflow/core/kernels/shape_ops.h
+++ b/tensorflow/core/kernels/shape_ops.h
@@ -145,7 +145,6 @@ class SizeOp : public OpKernel {
   bool IsExpensive() override { return false; }
 };
 
-template <typename Tdim>
 class ExpandDimsOp : public OpKernel {
  public:
   explicit ExpandDimsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -154,7 +153,7 @@ class ExpandDimsOp : public OpKernel {
     OP_REQUIRES(ctx, ctx->input(0).dtype() != DT_VARIANT,
                 errors::InvalidArgument("ExpandDims on Variant not supported"));
 
-    Tdim dim = ctx->input(1).flat<Tdim>()(0);
+    int32 dim = ctx->input(1).flat<int32>()(0);
     OP_REQUIRES(
         ctx, (dim >= -1 - ctx->input(0).dims() && dim <= ctx->input(0).dims()),
         errors::InvalidArgument("Tried to expand dim index ", dim,
@@ -176,7 +175,7 @@ class ExpandDimsOp : public OpKernel {
     }
 
     // Clamp to the end if needed.
-    dim = std::min<Tdim>(dim, existing_dims_size);
+    dim = std::min<int32>(dim, existing_dims_size);
     new_shape.emplace(new_shape.begin() + dim, 1);
     const TensorShape output_shape(new_shape);
 
@@ -235,10 +234,10 @@ class SqueezeOp : public OpKernel {
       if (!wrapped_squeeze_dims.empty()) {
         if (wrapped_squeeze_dims.count(i) > 0) {
           OP_REQUIRES(ctx, existing_dim == 1,
-                      errors::InvalidArgument("Tried to explicitly squeeze "
-                                              "dimension ",
-                                              i, " but dimension was not 1: ",
-                                              existing_dim));
+                      errors::InvalidArgument(
+                          "Tried to explicitly squeeze "
+                          "dimension ",
+                          i, " but dimension was not 1: ", existing_dim));
         } else {
           // This dimension is not being squeezed.
           new_shape.push_back(existing_dim);
diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index 28a379774b..d46701749b 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -190,25 +190,41 @@ class SliceOp : public OpKernel {
         }
         return;
       }
-#define HANDLE_DIM(NDIM)                                              \
-  if (input_dims == NDIM) {                                           \
-    functor::Slice<Device, T, NDIM>()(                                \
-        context->eigen_device<Device>(), result, input, begin, size); \
-    return;                                                           \
+#define HANDLE_DIM(NDIM)                            \
+  if (input_dims == NDIM) {                         \
+    HandleCase<NDIM>(context, begin, size, result); \
+    return;                                         \
   }
+
       HANDLE_DIM(1);
       HANDLE_DIM(2);
       HANDLE_DIM(3);
       HANDLE_DIM(4);
       HANDLE_DIM(5);
       HANDLE_DIM(6);
+      HANDLE_DIM(7);
 
 #undef HANDLE_DIM
 
-      // handle cases which dim >= 7
-      functor::Slice<Device, T, 7>()(
-          context->eigen_device<Device>(), result, input, begin, size);
+      OP_REQUIRES(context, false, errors::Unimplemented(
+                                      "SliceOp : Unhandled input dimensions"));
+    }
+  }
+
+ private:
+  template <int NDIM>
+  void HandleCase(OpKernelContext* context, const gtl::ArraySlice<int64>& begin,
+                  const gtl::ArraySlice<int64>& size, Tensor* result) {
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> indices;
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes;
+    for (int i = 0; i < NDIM; ++i) {
+      indices[i] = begin[i];
+      sizes[i] = size[i];
     }
+
+    functor::Slice<Device, T, NDIM>()(
+        context->eigen_device<Device>(), result->tensor<T, NDIM>(),
+        context->input(0).tensor<T, NDIM>(), indices, sizes);
   }
 };
 
@@ -248,16 +264,11 @@ class MklSliceOp : public OpKernel {
         }
         return;
       }
-      // Special case for handling 4-D tensor slice.
-      if (input_dims == 4) {
-        HandleCase4D(context, begin, size, result);
-      } else {
-#define HANDLE_DIM(NDIM)                                                  \
-      if (input_dims == NDIM) {                                           \
-        functor::Slice<Device, T, NDIM>()(                                \
-            context->eigen_device<Device>(), result, input, begin, size); \
-            return;                                                       \
-      }
+#define HANDLE_DIM(NDIM)                            \
+  if (input_dims == NDIM) {                         \
+    HandleCase<NDIM>(context, begin, size, result); \
+    return;                                         \
+  }
 
       HANDLE_DIM(1);
       HANDLE_DIM(2);
@@ -265,13 +276,12 @@ class MklSliceOp : public OpKernel {
       HANDLE_DIM(4);
       HANDLE_DIM(5);
       HANDLE_DIM(6);
+      HANDLE_DIM(7);
 
 #undef HANDLE_DIM
 
-        // handle cases which dim >= 7
-        functor::Slice<Device, T, 7>()(
-          context->eigen_device<Device>(), result, input, begin, size);
-      }
+      OP_REQUIRES(context, false, errors::Unimplemented(
+                                      "SliceOp : Unhandled input dimensions"));
     }
   }
 
@@ -318,7 +328,8 @@ class MklSliceOp : public OpKernel {
     return false;
   }
 
-  void HandleCase4D(OpKernelContext* context,
+  template <int NDIM>
+  void HandleCase(OpKernelContext* context,
                   const gtl::ArraySlice<int64>& begin,
                   const gtl::ArraySlice<int64>& size, Tensor* result) {
     int slice_dim = -1;
@@ -327,7 +338,8 @@ class MklSliceOp : public OpKernel {
     // differs from the input tensor in only 1 out of 4 dimensions.
     // This case arises in the context of Slice of 4-D tensor in NHWC or NCHW
     // format over channel dimension.
-    if (DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) {
+    if (NDIM == 4 &&
+        DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) {
         size_t in_strides[4] = { (size_t) in_shape.dim_size(1) *
                                           in_shape.dim_size(2) *
                                           in_shape.dim_size(3),
@@ -391,8 +403,16 @@ class MklSliceOp : public OpKernel {
         // slice_dim is not 1 or 3, then we fallback to Eigen implementation.
     }
 
-    functor::Slice<Device, T, 4>()(
-        context->eigen_device<Device>(), result, context->input(0), begin, size);
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> indices;
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes;
+    for (int i = 0; i < NDIM; ++i) {
+      indices[i] = begin[i];
+      sizes[i] = size[i];
+    }
+
+    functor::Slice<Device, T, NDIM>()(
+        context->eigen_device<Device>(), result->tensor<T, NDIM>(),
+        context->input(0).tensor<T, NDIM>(), indices, sizes);
   }
 };
 #endif
@@ -400,13 +420,13 @@ class MklSliceOp : public OpKernel {
 // Forward declarations of the functor specializations for declared in the
 // sharded source files.
 namespace functor {
-#define DECLARE_CPU_SPEC(T, NDIM)                        \
-  template <>                                            \
-  void Slice<CPUDevice, T, NDIM>::operator()(            \
-      const CPUDevice& d, Tensor* output,                \
-      const Tensor& input,                               \
-      const gtl::ArraySlice<int64>& slice_indices,       \
-      const gtl::ArraySlice<int64>& slice_sizes);        \
+#define DECLARE_CPU_SPEC(T, NDIM)                                  \
+  template <>                                                      \
+  void Slice<CPUDevice, T, NDIM>::operator()(                      \
+      const CPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
+      typename TTypes<T, NDIM>::ConstTensor input,                 \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
   extern template struct Slice<CPUDevice, T, NDIM>;
 
 #define DECLARE_FOR_N(T)  \
@@ -456,14 +476,13 @@ REGISTER_SLICE(bfloat16);
 #if GOOGLE_CUDA
 // Forward declarations of the functor specializations for GPU.
 namespace functor {
-#define DECLARE_GPU_SPEC(T, NDIM)                        \
-  template <>                                            \
-  void Slice<GPUDevice, T, NDIM>::operator()(            \
-      const GPUDevice& d,                                \
-      Tensor* output,                                    \
-      const Tensor& input,                               \
-      const gtl::ArraySlice<int64>& slice_indices,       \
-      const gtl::ArraySlice<int64>& slice_sizes);        \
+#define DECLARE_GPU_SPEC(T, NDIM)                                  \
+  template <>                                                      \
+  void Slice<GPUDevice, T, NDIM>::operator()(                      \
+      const GPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
+      typename TTypes<T, NDIM>::ConstTensor input,                 \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
   extern template struct Slice<GPUDevice, T, NDIM>;
 
 #define DECLARE_FOR_N(T)  \
@@ -517,14 +536,13 @@ REGISTER_KERNEL_BUILDER(Name("Slice")
 #ifdef TENSORFLOW_USE_SYCL
 // Forward declarations of the functor specializations for SYCL.
 namespace functor {
-#define DECLARE_SYCL_SPEC(T, NDIM)                       \
-  template <>                                            \
-  void Slice<SYCLDevice, T, NDIM>::operator()(           \
-      const SYCLDevice& d,                               \
-      Tensor* output,                                    \
-      const Tensor& input,                               \
-      const gtl::ArraySlice<int64>& slice_indices,       \
-      const gtl::ArraySlice<int64>& slice_sizes);        \
+#define DECLARE_SYCL_SPEC(T, NDIM)                                 \
+  template <>                                                      \
+  void Slice<SYCLDevice, T, NDIM>::operator()(                     \
+      const SYCLDevice& d, typename TTypes<T, NDIM>::Tensor output,\
+      typename TTypes<T, NDIM>::ConstTensor input,                 \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
   extern template struct Slice<SYCLDevice, T, NDIM>;
 
 #define DECLARE_FOR_N(T)   \
diff --git a/tensorflow/core/kernels/slice_op.h b/tensorflow/core/kernels/slice_op.h
index 55a4be985b..db7eded745 100644
--- a/tensorflow/core/kernels/slice_op.h
+++ b/tensorflow/core/kernels/slice_op.h
@@ -19,104 +19,31 @@ limitations under the License.
 // Functor definition for SliceOp, must be compilable by nvcc.
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/kernels/ops_util.h"
 
 namespace tensorflow {
-
-namespace internal {
-
-template <typename Device, typename T>
-void SliceSimple(const Device& d, Tensor* out, const Tensor& in,
-                 const gtl::ArraySlice<int64>& slice_indices);
-template <typename Device, typename T>
-void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in,
-                 const gtl::ArraySlice<int64>& slice_indices);
-
-template <typename Device, typename T>
-void SliceSimple(const Device& d, Tensor* out, const Tensor& in,
-                 const gtl::ArraySlice<int64>& slice_indices) {
-  const int ndims = in.dims();
-  const int64 nelem = out->NumElements();
-  const gtl::InlinedVector<int64, 8> in_strides = ComputeStride<int64>(in.shape());
-  const gtl::InlinedVector<int64, 8> out_strides = ComputeStride<int64>(out->shape());
-  const T* p = in.flat<T>().data();
-  T* q = out->flat<T>().data();
-
-  std::vector<int64> i_idx(nelem, 0);
-  std::vector<int64> t(nelem, 0);
-
-  for (int64 o_idx = 0; o_idx < nelem; ++o_idx) {
-    t[o_idx] = o_idx;
-  }
-  for (int i = 0; i < ndims; ++i) {
-    int64 n = (nelem + 7) / 8;
-    int64 o_idx = 0;
-    switch (nelem % 8) {
-#define CALC_INPUT_IDX                                                            \
-  i_idx[o_idx] += (t[o_idx] / out_strides[i] + slice_indices[i]) * in_strides[i]; \
-  t[o_idx] %= out_strides[i];                                                     \
-  ++o_idx;
-      case 0: do { CALC_INPUT_IDX;
-      case 7:      CALC_INPUT_IDX;
-      case 6:      CALC_INPUT_IDX;
-      case 5:      CALC_INPUT_IDX;
-      case 4:      CALC_INPUT_IDX;
-      case 3:      CALC_INPUT_IDX;
-      case 2:      CALC_INPUT_IDX;
-      case 1:      CALC_INPUT_IDX;
-#undef CALC_INPUT_IDX
-              } while (--n > 0);
-    }
-  }
-  for (int64 o_idx = 0; o_idx < nelem; ++o_idx) {
-    q[o_idx] = p[i_idx[o_idx]];
-  }
-}
-
-template <typename Device, typename T, int NDIMS>
-void SliceUsingEigen(const Device& d, Tensor* out, const Tensor& in,
-                 const gtl::ArraySlice<int64>& slice_indices,
-                 const gtl::ArraySlice<int64>& slice_sizes) {
-  auto input = in.tensor<T, NDIMS>();
-  auto output = out->tensor<T, NDIMS>();
-  Eigen::DSizes<int, NDIMS> indices;
-  for (int i = 0; i < NDIMS; ++i) {
-    indices[i] = slice_indices[i];
-  }
-  Eigen::DSizes<int, NDIMS> sizes;
-  for (int i = 0; i < NDIMS; ++i) {
-    sizes[i] = slice_sizes[i];
-  }
-  const bool use_64bit = input.size() > Eigen::NumTraits<int>::highest();
-  if (!use_64bit &&
-      Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
-    To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes);
-  } else {
-    output.device(d) = input.slice(indices, sizes);
-  }
-}
-
-} // namespace internal
-
 namespace functor {
 
-// Template parameter NDIM is not neccesary here. The aim of keeping it
-// is to compile struct slice seperately which minimizes the compiling time.
-template <typename Device, typename T, int NDIM>
+template <typename Device, typename T, int NDIMS>
 struct Slice {
-  void operator()(const Device& d, Tensor* out, const Tensor& in,
-                  const gtl::ArraySlice<int64>& slice_indices,
-                  const gtl::ArraySlice<int64>& slice_sizes) {
-    if (in.dims() == NDIM) {
-        internal::SliceUsingEigen<Device, T, NDIM>(d, out, in, slice_indices, slice_sizes);
+  void operator()(const Device& d, typename TTypes<T, NDIMS>::Tensor output,
+                  typename TTypes<T, NDIMS>::ConstTensor input,
+                  const Eigen::DSizes<Eigen::DenseIndex, NDIMS>& slice_indices,
+                  const Eigen::DSizes<Eigen::DenseIndex, NDIMS>& slice_sizes) {
+    bool use_64bit = (input.size() > Eigen::NumTraits<int>::highest());
+    if (!use_64bit &&
+        Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
+      Eigen::DSizes<int, NDIMS> indices;
+      for (int i = 0; i < NDIMS; ++i) {
+        indices[i] = slice_indices[i];
+      }
+      Eigen::DSizes<int, NDIMS> sizes;
+      for (int i = 0; i < NDIMS; ++i) {
+        sizes[i] = slice_sizes[i];
+      }
+      To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes);
     } else {
-        if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
-          internal::SliceSimpleGpu<Device, T>(d, out, in, slice_indices);
-        } else {
-          internal::SliceSimple<Device, T>(d, out, in, slice_indices);
-        }
+      output.device(d) = input.slice(slice_indices, slice_sizes);
     }
   }
 };
diff --git a/tensorflow/core/kernels/slice_op_gpu.cu.cc b/tensorflow/core/kernels/slice_op_gpu.cu.cc
index 3039b3d777..a301986f2f 100644
--- a/tensorflow/core/kernels/slice_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/slice_op_gpu.cu.cc
@@ -21,65 +21,9 @@ limitations under the License.
 
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/util/cuda_kernel_helper.h"
 
 namespace tensorflow {
-namespace internal {
-
-template <typename T>
-__global__ void SliceKernel(int nthreads, const T* src, const int32* buf,
-                            const int32 ndims, T* dst) {
-  const int32* in_strides = buf;
-  const int32* out_strides = buf + ndims;
-  const int32* slice_indices = buf + ndims * 2;
-  CUDA_1D_KERNEL_LOOP(o_idx, nthreads) {
-    int32 i_idx = 0;
-    int32 t = o_idx;
-    for (int i = 0; i < ndims; ++i) {
-      i_idx += (t / out_strides[i] + slice_indices[i]) * in_strides[i];
-      t %= out_strides[i];
-    }
-    dst[o_idx] = ldg(src + i_idx);
-  }
-}
-
-template <typename Device, typename T>
-void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in,
-                 const gtl::ArraySlice<int64>& slice_indices) {
-  // Ensures we can use 32-bit index.
-  const int64 in_nelem = in.NumElements();
-  CHECK_LT(in_nelem, kint32max) << "Tensor too large to transpose on GPU";
-  const int64 out_nelem = out->NumElements();
-  CHECK_LT(out_nelem, kint32max) << "Tensor too large to transpose on GPU";
-  // Pack strides and slice indices sizes into one buffer.
-  const int32 ndims = in.dims();
-  gtl::InlinedVector<int32, 24> host_buf(ndims * 3);
-  gtl::InlinedVector<int32, 8> in_strides = ComputeStride<int32>(in.shape());
-  gtl::InlinedVector<int32, 8> out_strides = ComputeStride<int32>(out->shape());
-  for (int i = 0; i < ndims; ++i) {
-    host_buf[i] = in_strides[i];
-    host_buf[ndims + i] = out_strides[i];
-    host_buf[ndims * 2 + i] = slice_indices[i];
-  }
-  auto num_bytes = sizeof(int64) * host_buf.size();
-  auto dev_buf = d.allocate(num_bytes);
-  // NOTE: host_buf is not allocated by CudaHostAllocator, and
-  // therefore we are doing a sync copy effectively.
-  d.memcpyHostToDevice(dev_buf, host_buf.data(), num_bytes);
-  // Launch kernel to q[...] = p[...].
-  const T* p = in.flat<T>().data();
-  T* q = out->flat<T>().data();
-  CudaLaunchConfig cfg = GetCudaLaunchConfig(out_nelem, d);
-  SliceKernel<<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>(
-      cfg.virtual_thread_count, p, reinterpret_cast<const int32*>(dev_buf),
-      ndims, q);
-  // Safe to deallocate immediately after the kernel launch.
-  d.deallocate(dev_buf);
-}
-
-} // namespace internal
 
 typedef Eigen::GpuDevice GPUDevice;
 
diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 8fc40db3cc..73b6d4cf6a 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -427,6 +427,7 @@ REGISTER_STRIDED_SLICE(bfloat16);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h
index 7d42887426..afe3a051e6 100644
--- a/tensorflow/core/kernels/strided_slice_op_impl.h
+++ b/tensorflow/core/kernels/strided_slice_op_impl.h
@@ -84,16 +84,16 @@ void HandleStridedSliceCase(OpKernelContext* context,
 
   gtl::InlinedVector<int64, 4> processing_dims = processing_shape.dim_sizes();
   if (is_simple_slice) {
-    gtl::InlinedVector<int64, 4> sizes(begin.size());
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> begin_di;
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes_di;
     for (int i = 0; i < NDIM; ++i) {
-      sizes[i] = end[i] - begin[i];
+      begin_di[i] = begin[i];
+      sizes_di[i] = end[i] - begin[i];
     }
-    const TensorShape final_shape = result->shape();
-    CHECK(result->CopyFrom(*result, processing_shape));
-    const Tensor input = context->input(0);
-    functor::Slice<Device, T, NDIM>()(
-        context->eigen_device<Device>(), result, input, begin, sizes);
-    CHECK(result->CopyFrom(*result, final_shape));
+    functor::Slice<Device, Proxy, NDIM>()(
+        context->eigen_device<Device>(),
+        result->bit_casted_shaped<Proxy, NDIM>(processing_dims),
+        context->input(0).bit_casted_tensor<Proxy, NDIM>(), begin_di, sizes_di);
   } else {
     Eigen::DSizes<Eigen::DenseIndex, NDIM> begin_di;
     Eigen::DSizes<Eigen::DenseIndex, NDIM> end_di;
@@ -196,9 +196,10 @@ class HandleStridedSliceAssignCase<Device, T, 0> {
   extern template struct StridedSlice<GPUDevice, T, NDIM>;         \
   template <>                                                      \
   void Slice<GPUDevice, T, NDIM>::operator()(                      \
-      const GPUDevice& d, Tensor* output, const Tensor& input,     \
-      const gtl::ArraySlice<int64>& slice_indices,                 \
-      const gtl::ArraySlice<int64>& slice_sizes);                  \
+      const GPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
+      typename TTypes<T, NDIM>::ConstTensor input,                 \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
   extern template struct Slice<GPUDevice, T, NDIM>;                \
   template <>                                                      \
   void StridedSliceGrad<GPUDevice, T, NDIM>::operator()(           \
@@ -283,6 +284,7 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU);
 TF_CALL_complex64(DECLARE_FOR_N_GPU);
 TF_CALL_complex128(DECLARE_FOR_N_GPU);
 DECLARE_FOR_N_GPU(int32);
+DECLARE_FOR_N_GPU(int64);
 #endif  // END GOOGLE_CUDA
 
 TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU);
@@ -298,6 +300,7 @@ DECLARE_FOR_N_CPU(bfloat16);
 TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL);
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL);
 DECLARE_FOR_N_SYCL(int32);
+DECLARE_FOR_N_SYCL(int64);
 
 #undef DECLARE_FOR_N_SYCL
 #endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/strided_slice_op_test.cc b/tensorflow/core/kernels/strided_slice_op_test.cc
index 78bb15463c..281ca0f58f 100644
--- a/tensorflow/core/kernels/strided_slice_op_test.cc
+++ b/tensorflow/core/kernels/strided_slice_op_test.cc
@@ -76,69 +76,20 @@ static void SliceHelper(int iters, int size) {
   testing::UseRealTime();
 }
 
-template <typename T>
-static void Dim8SliceHelper(int iters, int size) {
-  testing::StopTiming();
-  Graph* g = new Graph(OpRegistry::Global());
-  DataType dt = DataTypeToEnum<T>::v();
-  int kDim = 100;
-  int kMaxSize = 15000;
-  CHECK_LT(size, kMaxSize);
-
-  Tensor begin(DT_INT32, TensorShape({8}));
-  begin.flat<int32>()(10) = 10;
-  for (int i = 1; i < 7; ++i) {
-    begin.flat<int32>()(i) = 0;
-  }
-  begin.flat<int32>()(7) = 10;
-
-  Tensor end(DT_INT32, TensorShape({8}));
-  end.flat<int32>()(0) = 10 + kDim;
-  for (int i = 1; i < 7; ++i) {
-    end.flat<int32>()(i) = 1;
-  }
-  end.flat<int32>()(7) = 10 + size;
-
-  Tensor strides(DT_INT32, TensorShape({8}));
-  for (int i = 0; i < 8; ++i) {
-    strides.flat<int32>()(i) = 1;
-  }
-
-  Tensor input(dt, TensorShape({2*kDim, 1, 1, 1, 1, 1, 1, kMaxSize}));
-  input.flat<T>().setRandom();
-
-  Node* node;
-  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "StridedSlice")
-                  .Input(test::graph::Constant(g, input))
-                  .Input(test::graph::Constant(g, begin))
-                  .Input(test::graph::Constant(g, end))
-                  .Input(test::graph::Constant(g, strides))
-                  .Attr("T", dt)
-                  .Finalize(g, &node));
-
-  testing::BytesProcessed(static_cast<int64>(iters) * kDim * size * sizeof(T));
-  testing::StartTiming();
-  test::Benchmark("cpu", g).Run(iters);
-  testing::UseRealTime();
-}
-
 static void BM_SliceFloat(int iters, int dim2) {
   SliceHelper<float>(iters, dim2);
-  Dim8SliceHelper<float>(iters, dim2);
 }
 
 BENCHMARK(BM_SliceFloat)->Arg(100)->Arg(1000)->Arg(10000);
 
 static void BM_SliceComplex64(int iters, int dim2) {
   SliceHelper<std::complex<float>>(iters, dim2);
-  Dim8SliceHelper<std::complex<float>>(iters, dim2);
 }
 
 BENCHMARK(BM_SliceComplex64)->Arg(100)->Arg(1000)->Arg(10000);
 
 static void BM_SliceBFloat16(int iters, int dim2) {
   SliceHelper<bfloat16>(iters, dim2);
-  Dim8SliceHelper<bfloat16>(iters, dim2);
 }
 
 BENCHMARK(BM_SliceBFloat16)->Arg(100)->Arg(1000)->Arg(10000);
diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc
index 96c051c636..20f0edf309 100644
--- a/tensorflow/core/kernels/transpose_op.cc
+++ b/tensorflow/core/kernels/transpose_op.cc
@@ -31,14 +31,13 @@ limitations under the License.
 
 namespace tensorflow {
 
-// inv = InvertPermutationOp(T<int32/int64> p) takes a permutation of
+// inv = InvertPermutationOp(T<int32> p) takes a permutation of
 // integers 0, 1, ..., n - 1 and returns the inverted
 // permutation of p. I.e., inv[p[i]] == i, for i in [0 .. n).
 //
-// REQUIRES: input is a vector of int32 or int64.
+// REQUIRES: input is a vector of int32.
 // REQUIRES: input is a permutation of 0, 1, ..., n-1.
 
-template <typename T>
 class InvertPermutationOp : public OpKernel {
  public:
   explicit InvertPermutationOp(OpKernelConstruction* context)
@@ -49,19 +48,20 @@ class InvertPermutationOp : public OpKernel {
     OP_REQUIRES(
         context, TensorShapeUtils::IsVector(input.shape()),
         errors::InvalidArgument("invert_permutation expects a 1D vector."));
-    auto Tin = input.vec<T>();
+    auto Tin = input.vec<int32>();
     OP_REQUIRES(context,
                 FastBoundsCheck(Tin.size(), std::numeric_limits<int32>::max()),
                 errors::InvalidArgument("permutation of nonnegative int32s "
                                         "must have <= int32 max elements"));
-    const T N = static_cast<T>(Tin.size());  // Safe: bounds-checked above.
+    const int32 N =
+        static_cast<int32>(Tin.size());  // Safe: bounds-checked above.
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
-    auto Tout = output->vec<T>();
+    auto Tout = output->vec<int32>();
     std::fill_n(Tout.data(), N, -1);
     for (int i = 0; i < N; ++i) {
-      const T d = internal::SubtleMustCopy(Tin(i));
+      const int32 d = internal::SubtleMustCopy(Tin(i));
       OP_REQUIRES(context, FastBoundsCheck(d, N),
                   errors::InvalidArgument(d, " is not between 0 and ", N));
       OP_REQUIRES(context, Tout(d) == -1,
@@ -73,23 +73,14 @@ class InvertPermutationOp : public OpKernel {
 
 REGISTER_KERNEL_BUILDER(
     Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint<int32>("T"),
-    InvertPermutationOp<int32>);
-REGISTER_KERNEL_BUILDER(
-    Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint<int64>("T"),
-    InvertPermutationOp<int64>);
+    InvertPermutationOp);
 
 REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
                             .Device(DEVICE_GPU)
                             .TypeConstraint<int32>("T")
                             .HostMemory("x")
                             .HostMemory("y"),
-                        InvertPermutationOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<int64>("T")
-                            .HostMemory("x")
-                            .HostMemory("y"),
-                        InvertPermutationOp<int64>);
+                        InvertPermutationOp);
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
@@ -97,13 +88,7 @@ REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
                             .TypeConstraint<int32>("T")
                             .HostMemory("x")
                             .HostMemory("y"),
-                        InvertPermutationOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int64>("T")
-                            .HostMemory("x")
-                            .HostMemory("y"),
-                        InvertPermutationOp<int64>);
+                        InvertPermutationOp);
 #endif  // TENSORFLOW_USE_SYCL
 
 namespace {
diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc
index d087784c8a..701c5f6d2b 100644
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include <functional>
 #include <unordered_map>
 #include <utility>
 
@@ -22,7 +21,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/hash/hash.h"
 
 namespace tensorflow {
 
@@ -35,6 +33,8 @@ class UniqueOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
+                errors::InvalidArgument("unique expects a 1D vector."));
     // TODO(dga):  Make unique polymorphic for returning int32 and int64
     // vectors to support large tensors.
     OP_REQUIRES(context,
@@ -42,102 +42,31 @@ class UniqueOp : public OpKernel {
                 errors::InvalidArgument(
                     "unique does not support input tensors larger than ",
                     std::numeric_limits<int32>::max(), " elements"));
-
-    int64 axis = 0;
-    std::vector<int64> new_sizes{1, input.NumElements(), 1};
-    if (context->num_inputs() == 1) {
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
-                  errors::InvalidArgument("unique expects a 1D vector."));
-    } else {
-      // In case of UniqueV2, the axis is a 1D vector. The purpose is
-      // to allow specifying either "no axis" or "axis". The `[]` means
-      // "no axis", while `[x]` means `axis = x`.
-      const Tensor& axis_tensor = context->input(1);
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()),
-                  errors::InvalidArgument("axis expects a 1D vector."));
-      OP_REQUIRES(
-          context, axis_tensor.NumElements() <= 1,
-          errors::InvalidArgument(
-              "axis does not support input tensors larger than 1 elements"));
-      if (axis_tensor.NumElements() == 0) {
-        OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
-                    errors::InvalidArgument("unique expects a 1D vector."));
-      } else {
-        auto axis_vec = axis_tensor.vec<int64>();
-        axis = axis_vec(0);
-        axis = axis < 0 ? axis + input.dims() : axis;
-        OP_REQUIRES(context, 0 <= axis && axis < input.dims(),
-                    errors::InvalidArgument("axis has to be between [0, ",
-                                            input.dims(), ")"));
-        if (axis > 0) {
-          for (int64 i = 0; i < axis; i++) {
-            new_sizes[0] *= input.dim_size(i);
-          }
-        }
-        new_sizes[1] = input.dim_size(axis);
-        if (axis + 1 < input.dims()) {
-          for (int64 i = axis + 1; i < input.dims(); i++) {
-            new_sizes[2] *= input.dim_size(i);
-          }
-        }
-      }
-    }
-
-    auto Tin = input.shaped<T, 3>(new_sizes);
+    auto Tin = input.vec<T>();
+    const int64 N = static_cast<int64>(Tin.size());
 
     Tensor* idx = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(
-                                1, TensorShape({Tin.dimension(1)}), &idx));
+    OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
+                                {0}, 1, input.shape(), &idx));
     auto idx_vec = idx->template vec<TIndex>();
 
-    auto hash_fn = [&Tin](const int64& key) -> unsigned long {
-      size_t h = 0;
-      for (int64 i = 0; i < Tin.dimension(0); i++) {
-        for (int64 j = 0; j < Tin.dimension(2); j++) {
-          h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
-        }
-      }
-      return h;
-    };
-
-    auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) {
-      for (int64 i = 0; i < Tin.dimension(0); i++) {
-        for (int64 j = 0; j < Tin.dimension(2); j++) {
-          if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
-            return false;
-          }
-        }
-      }
-      return true;
-    };
-
-    std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
-        uniq(0, hash_fn, equal_to_fn);
-
-    uniq.reserve(2 * Tin.dimension(1));
-
-    for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
-      auto it = uniq.insert(std::make_pair(i, j));
+    std::unordered_map<T, TIndex> uniq;
+    uniq.reserve(2 * N);
+    for (int64 i = 0, j = 0; i < N; ++i) {
+      auto it = uniq.insert(std::make_pair(Tin(i), j));
       idx_vec(i) = it.first->second;
       if (it.second) {
         ++j;
       }
     }
-
     int64 uniq_size = static_cast<int64>(uniq.size());
-    new_sizes[1] = uniq_size;
-    TensorShape output_shape(input.shape());
-    output_shape.set_dim(axis, uniq_size);
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
-    auto Tout = output->shaped<T, 3>(new_sizes);
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                0, TensorShape({uniq_size}), &output));
+    auto output_vec = output->template vec<T>();
 
     for (auto it : uniq) {
-      for (int64 i = 0; i < Tin.dimension(0); i++) {
-        for (int64 j = 0; j < Tin.dimension(2); j++) {
-          Tout(i, it.second, j) = Tin(i, it.first, j);
-        }
-      }
+      output_vec(it.second) = it.first;
     }
 
     if (num_outputs() > 2) {
@@ -145,7 +74,7 @@ class UniqueOp : public OpKernel {
                                   2, TensorShape({uniq_size}), &output));
       auto count_output_vec = output->template vec<TIndex>();
       count_output_vec.setZero();
-      for (int64 i = 0; i < Tin.dimension(1); ++i) {
+      for (int64 i = 0; i < N; ++i) {
         count_output_vec(idx_vec(i))++;
       }
     }
@@ -163,16 +92,6 @@ class UniqueOp : public OpKernel {
                               .TypeConstraint<type>("T")         \
                               .TypeConstraint<int64>("out_idx"), \
                           UniqueOp<type, int64>);                \
-  REGISTER_KERNEL_BUILDER(Name("UniqueV2")                       \
-                              .Device(DEVICE_CPU)                \
-                              .TypeConstraint<type>("T")         \
-                              .TypeConstraint<int32>("out_idx"), \
-                          UniqueOp<type, int32>);                \
-  REGISTER_KERNEL_BUILDER(Name("UniqueV2")                       \
-                              .Device(DEVICE_CPU)                \
-                              .TypeConstraint<type>("T")         \
-                              .TypeConstraint<int64>("out_idx"), \
-                          UniqueOp<type, int64>);                \
   REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts")               \
                               .Device(DEVICE_CPU)                \
                               .TypeConstraint<type>("T")         \
@@ -257,5 +176,5 @@ REGISTER_KERNEL_BUILDER(Name("Unique")
                             .HostMemory("y")
                             .HostMemory("idx"),
                         UniqueOp<int64, int64>);
-#endif  // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 9fa6423d59..be2916f154 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -723,9 +723,7 @@ y: a tensor of the same shape and type as x but filled with zeros.
 REGISTER_OP("OnesLike")
     .Input("x: T")
     .Output("y: T")
-    .Attr(
-        "T: {float, double, int8, uint8, int16, uint16, int32, int64, "
-        "complex64, complex128, bool}")
+    .Attr("T: {float, double, int32, int64, complex64, complex128}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns a tensor of ones with the same shape and type as x.
@@ -2033,46 +2031,6 @@ y: 1-D.
 idx: 1-D.
 )doc");
 
-REGISTER_OP("UniqueV2")
-    .Input("x: T")
-    .Input("axis: int64")
-    .Output("y: T")
-    .Output("idx: out_idx")
-    .Attr("T: type")
-    .Attr("out_idx: {int32, int64} = DT_INT32")
-    .SetShapeFn([](InferenceContext* c) {
-      c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
-      c->set_output(1, c->input(0));
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Finds unique elements in a 1-D tensor.
-
-This operation returns a tensor `y` containing all of the unique elements of `x`
-sorted in the same order that they occur in `x`. This operation also returns a
-tensor `idx` the same size as `x` that contains the index of each value of `x`
-in the unique output `y`. In other words:
-
-`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
-
-For example:
-
-```
-# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-y, idx = unique(x)
-y ==> [1, 2, 4, 7, 8]
-idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-```
-
-
-x: A `Tensor`.
-axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
-  find the unique elements.
-y: A `Tensor`. Unique elements along the `axis` of `Tensor` x.
-idx: A 1-D Tensor. Has the same type as x that contains the index of each
-  value of x in the output y.
-)doc");
-
 // --------------------------------------------------------------------------
 REGISTER_OP("UniqueWithCounts")
     .Input("x: T")
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index d30b847696..7b10af9f44 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1829,8 +1829,6 @@ need not be sorted and need not cover all values in the full
 range of valid values.
 
 If the sum is empty for a given segment ID `i`, `output[i] = 0`.
-If the given segment ID `i` is negative, the value is dropped and will not be
-added to the sum of the segment.
 
 `num_segments` should equal the number of distinct segment IDs.
 
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index a242a13878..e245c8ba91 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -819,7 +819,7 @@ REGISTER_OP("DepthwiseConv2dNative")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {float, double}")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
@@ -945,7 +945,7 @@ REGISTER_OP("Conv3D")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
@@ -977,7 +977,7 @@ REGISTER_OP("Conv3DBackpropInput")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Deprecated(10, "Use Conv3DBackpropInputV2")
@@ -1003,7 +1003,7 @@ REGISTER_OP("Conv3DBackpropFilter")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Deprecated(10, "Use Conv3DBackpropFilterV2")
@@ -1032,7 +1032,7 @@ REGISTER_OP("Conv3DBackpropInputV2")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
@@ -1069,7 +1069,7 @@ REGISTER_OP("Conv3DBackpropFilterV2")
     .Input("filter_sizes: int32")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 9c41957ae6..6ce0b70c9d 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -5449,7 +5449,6 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5516,7 +5515,6 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5572,7 +5570,6 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5638,7 +5635,6 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5694,7 +5690,6 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index f2fadb4558..f746b15fee 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -12,7 +12,6 @@ load("//tensorflow:tensorflow.bzl", "tf_copts")
 load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
 load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static")
 load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path")
-load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp")
 
 cc_library(
     name = "gtest",
@@ -195,16 +194,17 @@ cc_library(
 
 cc_library(
     name = "sycl",
-    data = if_ccpp([
+    data = [
         "@local_config_sycl//sycl:{}".format(sycl_library_path("ComputeCpp")),
-    ]),
-    linkopts = if_ccpp([
-        "-Wl,-rpath,../local_config_sycl/sycl/lib",
-    ]),
-    deps = if_ccpp(
-        ["@local_config_sycl//sycl:syclrt"],
-        ["@local_config_sycl//sycl:sycl_headers"],
-    ),
+    ],
+    linkopts = select({
+        "//conditions:default": [
+            "-Wl,-rpath,../local_config_sycl/sycl/lib",
+        ],
+    }),
+    deps = [
+        "@local_config_sycl//sycl:syclrt",
+    ],
 )
 
 filegroup(
diff --git a/tensorflow/core/platform/default/notification.h b/tensorflow/core/platform/default/notification.h
index 5c401b7477..6a214dbd0a 100644
--- a/tensorflow/core/platform/default/notification.h
+++ b/tensorflow/core/platform/default/notification.h
@@ -73,7 +73,7 @@ class Notification {
   }
 
   mutex mu_;                    // protects mutations of notified_
-  condition_variable cv_;       // signaled when notified_ becomes non-zero
+  condition_variable cv_;       // signalled when notified_ becomes non-zero
   std::atomic<bool> notified_;  // mutations under mu_
 };
 
diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc
index f8b0285c50..e9baad5422 100644
--- a/tensorflow/core/platform/posix/error.cc
+++ b/tensorflow/core/platform/posix/error.cc
@@ -72,7 +72,7 @@ error::Code ErrnoToCode(int err_number) {
     case EBUSY:       // Device or resource busy
     case ECHILD:      // No child processes
     case EISCONN:     // Socket is connected
-#if !defined(_WIN32) && !defined(__HAIKU__)
+#if !defined(_WIN32)
     case ENOTBLK:     // Block device required
 #endif
     case ENOTCONN:    // The socket is not connected
@@ -94,7 +94,7 @@ error::Code ErrnoToCode(int err_number) {
     case ENODATA:  // No message is available on the STREAM read queue
     case ENOMEM:   // Not enough space
     case ENOSR:    // No STREAM resources
-#if !defined(_WIN32) && !defined(__HAIKU__)
+#if !defined(_WIN32)
     case EUSERS:   // Too many users
 #endif
       code = error::RESOURCE_EXHAUSTED;
@@ -111,7 +111,7 @@ error::Code ErrnoToCode(int err_number) {
     case EPFNOSUPPORT:     // Protocol family not supported
 #endif
     case EPROTONOSUPPORT:  // Protocol not supported
-#if !defined(_WIN32) && !defined(__HAIKU__)
+#if !defined(_WIN32)
     case ESOCKTNOSUPPORT:  // Socket type not supported
 #endif
     case EXDEV:            // Improper link
@@ -131,8 +131,7 @@ error::Code ErrnoToCode(int err_number) {
     case ENETUNREACH:   // Network unreachable
     case ENOLCK:        // No locks available
     case ENOLINK:       // Link has been severed
-#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \
-	|| defined(__HAIKU__))
+#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32))
     case ENONET:  // Machine is not on the network
 #endif
       code = error::UNAVAILABLE;
@@ -157,7 +156,7 @@ error::Code ErrnoToCode(int err_number) {
     case ENOEXEC:      // Exec format error
     case ENOMSG:       // No message of the desired type
     case EPROTO:       // Protocol error
-#if !defined(_WIN32) && !defined(__HAIKU__)
+#if !defined(_WIN32)
     case EREMOTE:      // Object is remote
 #endif
       code = error::UNKNOWN;
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index 09f69a95c1..6cba40ccfc 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -37,8 +37,7 @@ limitations under the License.
 #ifdef TF_USE_SNAPPY
 #include "snappy.h"
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \
-	|| defined(__HAIKU__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__)
 #include <thread>
 #endif
 
@@ -62,8 +61,7 @@ int NumSchedulableCPUs() {
   }
   perror("sched_getaffinity");
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \
-	|| defined(__HAIKU__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__)
   unsigned int count = std::thread::hardware_concurrency();
   if (count > 0) return static_cast<int>(count);
 #endif
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index ec077c4283..1bf9c93101 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -24,7 +24,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX ""
+#define TF_VERSION_SUFFIX "-rc1"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 118ff0d0d6..1bfa4f83a3 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -26,23 +26,18 @@ limitations under the License.
 #include "mkl_trans.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
-#include "tensorflow/core/graph/mkl_graph_util.h"
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
-
-using mkldnn::memory;
-using mkldnn::reorder;
-using mkldnn::primitive;
-using mkldnn::padding_kind;
-using mkldnn::engine;
 #endif
 
 // The file contains a number of utility classes and functions used by MKL
@@ -56,8 +51,6 @@ namespace tensorflow {
 // Tensorflow tensor.
 
 typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims;
-typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3,
-               Dim_O = 0, Dim_I = 1 } MklDnnDims;
 
 class MklShape {
  public:
@@ -150,9 +143,7 @@ class MklShape {
   size_t GetDimension() const { return dimension_; }
   const size_t* GetSizes() const { return sizes_; }
   int64 dim_size(int index) const { return sizes_[index]; }
-  int64 tf_dim_size(int index) const {
-    return sizes_[tf_to_mkl_dim_map_[index]];
-  }
+  int64 tf_dim_size(int index) const { return sizes_[tf_to_mkl_dim_map_[index]]; }
   const size_t* GetStrides() const { return strides_; }
   const size_t* GetTfToMklDimMap() const { return tf_to_mkl_dim_map_; }
   size_t tf_dim_idx(int index) const { return tf_to_mkl_dim_map_[index]; }
@@ -236,8 +227,7 @@ class MklShape {
   (IS_MKL_TENSOR_OFFSET + sizeof(size_t))  // Location of dimension_
 // Location of sizes. Note dim is not used here, left here
 // to make macros consistent.
-#define SIZES_OFFSET(dims) \
-  (DIMS_OFFSET + sizeof(size_t))
+#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t))
 #define STRIDES_OFFSET(dims) \
   (SIZES_OFFSET(dims) + dims * sizeof(size_t))  // Location of strides
 #define MKL_LAYOUT_OFFSET(dims) \
@@ -319,266 +309,6 @@ class MklShape {
       nullptr;  // TF dimension corresponding to this MKL dimension
 };
 
-#ifdef INTEL_MKL_DNN
-
-// Forward decl
-TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format);
-
-class MklDnnShape {
- private:
-  typedef struct {
-    /// Flag to indicate if the tensor is an  MKL tensor or not
-    bool is_mkl_tensor_ = false;
-    /// Number of dimensions in Tensorflow format
-    size_t dimension_ = 0;
-    /// Required by MKLDNN for conversions
-    mkldnn_dims_t sizes_;    // Required by MKL for conversions
-    memory::format tf_data_format_ = memory::format::format_undef;
-    memory::data_type T_ = memory::data_type::data_undef;
-    // MKL layout
-    mkldnn_memory_desc_t mkl_md_;
-    /// TF dimension corresponding to this MKL dimension
-    mkldnn_dims_t map_;
-  } MklShapeData;
-  MklShapeData data_;
-
-  typedef std::remove_extent<mkldnn_dims_t>::type mkldnn_dim_t;
-#define INVALID_DIM_SIZE -1
-
-
- public:
-  MklDnnShape() {
-    for (size_t i = 0; i < sizeof(data_.sizes_) /
-                           sizeof(data_.sizes_[0]); ++i) {
-      data_.sizes_[i] = -1;
-    }
-    for (size_t i = 0; i < sizeof(data_.map_) /
-                           sizeof(data_.map_[0]); ++i) {
-      data_.map_[i] = -1;
-    }
-  }
-
-  ~MklDnnShape() {}
-  TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape);  // Cannot copy
-
-  inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; }
-  inline void SetMklTensor(bool is_mkl_tensor) {
-    data_.is_mkl_tensor_ = is_mkl_tensor;
-  }
-
-  inline void SetDimensions(const size_t dimension) {
-    data_.dimension_ = dimension;
-  }
-  inline size_t GetDimension(char dimension)const {
-    int index = GetMklDnnTensorDimIndex(dimension);
-    CHECK(index >= 0 && index < this->GetDimension())
-        << "Invalid index from the dimension: " << index << ", " << dimension;
-    return this->DimSize(index);
-  }
-
-  inline int32 GetMklDnnTensorDimIndex(char dimension)const {
-    switch (dimension) {
-  case 'N':
-    return MklDnnDims::Dim_N;
-  case 'C':
-    return MklDnnDims::Dim_C;
-  case 'H':
-    return MklDnnDims::Dim_H;
-  case 'W':
-    return MklDnnDims::Dim_W;
-  default:
-    LOG(FATAL) << "Invalid dimension: " << dimension;
-    return -1;  // Avoid compiler warning about missing return value
-    }
-  }
-
-  inline size_t GetDimension() const { return data_.dimension_; }
-  inline const int* GetSizes() const {
-    return reinterpret_cast<const int*>(&data_.sizes_[0]);
-  }
-
-  // Returns an mkldnn::memory::dims object that contains the sizes of this
-  // MklDnnShape object.
-  inline memory::dims GetSizesAsMklDnnDims() const {
-    memory::dims retVal;
-    if (data_.is_mkl_tensor_) {
-      int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
-      for (size_t i = 0 ; i < dimensions; i++) {
-        if (data_.sizes_[i] != INVALID_DIM_SIZE)
-        retVal.push_back(data_.sizes_[i]);
-      }
-    } else {
-      CHECK_EQ(data_.is_mkl_tensor_, true);
-    }
-    return retVal;
-  }
-
-  inline int64 DimSize(int index) const {
-    CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0]));
-    return data_.sizes_[index];
-  }
-
-  /// Return TensorShape that describes the Tensorflow shape of the tensor
-  /// represented by this MklShape.
-  inline TensorShape GetTfShape() {
-    CHECK_EQ(data_.is_mkl_tensor_, true);
-
-    std::vector<int32> shape(data_.dimension_, -1);
-    for (size_t idx = 0; idx < data_.dimension_; ++idx) {
-      shape[idx] = data_.sizes_[TfDimIdx(idx)];
-    }
-
-    TensorShape ts;
-    bool ret = TensorShapeUtils::MakeShape(shape, &ts).ok();
-    CHECK_EQ(ret, true);
-    return ts;
-  }
-
-  inline void SetElemType(memory::data_type dt) { data_.T_ = dt; }
-  inline const memory::data_type GetElemType() { return data_.T_; }
-
-  inline void SetMklLayout(memory::primitive_desc* pd) {
-    CHECK_NOTNULL(pd);
-    data_.mkl_md_ = pd->desc().data;
-  }
-  inline const memory::desc GetMklLayout() const {
-    return memory::desc(data_.mkl_md_);
-  }
-
-  inline memory::format GetTfDataFormat() const {
-    return data_.tf_data_format_;
-  }
-  /// We don't create primitive_descriptor for TensorFlow layout now.
-  /// We use lazy evaluation and create it only when needed.
-  inline void SetTfLayout(size_t dims, const memory::dims& sizes,
-                   memory::format format) {
-    CHECK_EQ(dims, sizes.size());
-    data_.dimension_ = dims;
-    for (size_t ii = 0; ii < dims; ii++) {
-      data_.sizes_[ii] = sizes[ii];
-    }
-    data_.tf_data_format_ = format;
-    SetTfDimOrder(dims, format);
-  }
-  inline const memory::desc GetTfLayout() const {
-    memory::dims dims;
-    for (size_t ii = 0; ii < data_.dimension_; ii++) {
-      dims.push_back(data_.sizes_[ii]);
-    }
-    return memory::desc(dims, data_.T_, data_.tf_data_format_);
-  }
-  inline const memory::desc GetCurLayout() const {
-    return IsMklTensor() ? GetMklLayout() : GetTfLayout();
-  }
-
-  // nhasabni - I've removed SetTfDimOrder that was setting default order in
-  // case of MKL-ML. We don't need a case of default dimension order because
-  // when an operator that does not get data_format attribute gets all inputs
-  // in Tensorflow format, it will produce output in Tensorflow format.
-  inline void SetTfDimOrder(const size_t dimension, const mkldnn_dims_t map) {
-    CHECK(dimension == data_.dimension_);
-    for (size_t ii = 0; ii < dimension; ii++) {
-      data_.map_[ii] = map[ii];
-    }
-  }
-
-  inline void SetTfDimOrder(const size_t dimension, TensorFormat data_format) {
-    // TODO(nhasabni): Why do we restrict this to 4D?
-    CHECK_EQ(dimension, 4);
-    CHECK(dimension == data_.dimension_);
-    data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W;
-    data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H;
-    data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C;
-    data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N;
-  }
-
-  inline void SetTfDimOrder(const size_t dimension, memory::format format) {
-    TensorFormat data_format = MklDnnDataFormatToTFDataFormat(format);
-    SetTfDimOrder(dimension, data_format);
-  }
-
-  inline const mkldnn_dim_t* GetTfToMklDimMap() const {
-    return &data_.map_[0];
-  }
-  inline size_t TfDimIdx(int index) const { return data_.map_[index]; }
-  inline int64 TfDimSize(int index) const {
-    return data_.sizes_[TfDimIdx(index)];
-  }
-
-  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
-  /// corresponds to MKL's Channel dimension.
-  inline bool IsMklChannelDim(int d) const {
-    return TfDimIdx(d) == MklDnnDims::Dim_C;
-  }
-  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
-  /// corresponds to MKL's Batch dimension.
-  inline bool IsMklBatchDim(int d) const {
-    return TfDimIdx(d) == MklDnnDims::Dim_N;
-  }
-  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
-  /// corresponds to MKL's Width dimension.
-  inline bool IsMklWidthDim(int d) const {
-    return TfDimIdx(d) == MklDnnDims::Dim_W;
-  }
-  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
-  /// corresponds to MKL's Height dimension.
-  inline bool IsMklHeightDim(int d) const {
-    return TfDimIdx(d) == MklDnnDims::Dim_H;
-  }
-
-  /// Check if the TF-Mkl dimension ordering map specifies if the input
-  /// tensor is in NCHW format.
-  inline bool IsTensorInNCHWFormat() const {
-    TensorFormat data_format = FORMAT_NCHW;
-    return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) &&
-            IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) &&
-            IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) &&
-            IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W')));
-  }
-
-  /// Check if the TF-Mkl dimension ordering map specifies if the input
-  /// tensor is in NHWC format.
-  inline bool IsTensorInNHWCFormat() const {
-    TensorFormat data_format = FORMAT_NHWC;
-    return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) &&
-            IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) &&
-            IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) &&
-            IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W')));
-  }
-
-  /// The following methods are used for serializing and de-serializing the
-  /// contents of the mklshape object.
-  /// The data is serialized in this order
-  /// is_mkl_tensor_ : dimension_ : sizes_ : map_: format_ : T_ : mkl_pd_;
-
-  /// Size of buffer to hold the serialized object, the size is computed by
-  /// following above mentioned order
-  inline size_t GetSerializeBufferSize() const {
-    return sizeof(MklShapeData);
-  }
-
-  void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const {
-    CHECK(buf_size >= GetSerializeBufferSize())
-        << "Buffer size is too small to SerializeMklDnnShape";
-    *reinterpret_cast<MklShapeData*>(buf) = data_;
-  }
-
-  void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) {
-    // Make sure buffer holds at least is_mkl_tensor_.
-    CHECK(buf_size >= sizeof(data_.is_mkl_tensor_))
-      << "Buffer size is too small in DeSerializeMklDnnShape";
-
-    const bool is_mkl_tensor = *reinterpret_cast<const bool*>(buf);
-    if (is_mkl_tensor) {  // If it is an MKL Tensor then read the rest
-      CHECK(buf_size >= GetSerializeBufferSize())
-        << "Buffer size is too small in DeSerializeMklDnnShape";
-      data_ = *reinterpret_cast<const MklShapeData*>(buf);
-    }
-  }
-};
-
-#endif
-
 // List of MklShape objects. Used in Concat/Split layers.
 typedef std::vector<MklShape> MklShapeList;
 
@@ -617,36 +347,6 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
   return output_tensor;
 }
 
-#ifdef INTEL_MKL_DNN
-template <typename T>
-inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
-                             const MklDnnShape& mkl_shape) {
-  Tensor output_tensor;
-  TensorShape output_shape;
-
-#if 0
-  // TODO(nhasabni): need to implement
-  for (size_t j = 0; j < mkl_shape.GetDimension(); j++) {
-    // Outermost to innermost dimension
-    output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]);
-  }
-
-  // Allocate output tensor.
-  context->allocate_temp(DataTypeToEnum<T>::v(), output_shape, &output_tensor);
-
-  dnnLayout_t output_layout = static_cast<dnnLayout_t>(mkl_shape.GetTfLayout());
-  void* input_buffer = const_cast<T*>(mkl_tensor.flat<T>().data());
-  void* output_buffer = const_cast<T*>(output_tensor.flat<T>().data());
-
-  if (mkl_tensor.NumElements() != 0) {
-    mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer);
-  }
-#endif
-
-  return output_tensor;
-}
-#endif
-
 // Get the MKL shape from the second string tensor
 inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
   mklshape->DeSerializeMklShape(
@@ -659,20 +359,6 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
           sizeof(uint8));
 }
 
-#ifdef INTEL_MKL_DNN
-inline void GetMklShape(OpKernelContext* ctext, int n,
-                        MklDnnShape* mklshape) {
-  mklshape->DeSerializeMklDnnShape(
-      ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs()))
-          .flat<uint8>()
-          .data(),
-      ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs()))
-              .flat<uint8>()
-              .size() *
-          sizeof(uint8));
-}
-#endif
-
 // Gets the actual input
 inline const Tensor& MklGetInput(OpKernelContext* ctext, int n) {
   return ctext->input(GetTensorDataIndex(n, ctext->num_inputs()));
@@ -696,27 +382,6 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
   }
 }
 
-#ifdef INTEL_MKL_DNN
-/// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
-/// If the input tensor is in MKL layout, then obtains TensorShape from
-/// MklShape.
-inline TensorShape GetTfShape(OpKernelContext* context,
-                              size_t input_idx) {
-  // Sanity check.
-  CHECK_NOTNULL(context);
-  CHECK_LT(input_idx, context->num_inputs());
-
-  MklDnnShape input_mkl_shape;
-  GetMklShape(context, input_idx, &input_mkl_shape);
-  if (input_mkl_shape.IsMklTensor()) {
-    return input_mkl_shape.GetTfShape();
-  } else {
-    const Tensor& t = MklGetInput(context, input_idx);
-    return t.shape();
-  }
-}
-#endif
-
 // Allocate the second output tensor that will contain
 // the MKL shape serialized
 inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
@@ -732,23 +397,6 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
       second_tensor->flat<uint8>().size() * sizeof(uint8));
 }
 
-#ifdef INTEL_MKL_DNN
-// Allocate the second output tensor that will contain
-// the MKL shape serialized
-inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
-                                      const MklDnnShape& mkl_shape) {
-  Tensor* second_tensor = nullptr;
-  TensorShape second_shape;
-  second_shape.AddDim(mkl_shape.GetSerializeBufferSize());
-  OP_REQUIRES_OK(ctext, ctext->allocate_output(
-                            GetTensorMetaDataIndex(n, ctext->num_outputs()),
-                            second_shape, &second_tensor));
-  mkl_shape.SerializeMklDnnShape(
-      second_tensor->flat<uint8>().data(),
-      second_tensor->flat<uint8>().size() * sizeof(uint8));
-}
-#endif
-
 // Allocate the output tensor, create a second output tensor that will contain
 // the MKL shape serialized
 inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
@@ -769,43 +417,9 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
       second_tensor->flat<uint8>().size() * sizeof(uint8));
 }
 
-#ifdef INTEL_MKL_DNN
-// Allocate the output tensor, create a second output tensor that will contain
-// the MKL shape serialized
-inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
-                                      Tensor** output,
-                                      const TensorShape& tf_shape,
-                                      const MklDnnShape& mkl_shape) {
-  Tensor* second_tensor = nullptr;
-  TensorShape second_shape;
-  second_shape.AddDim(mkl_shape.GetSerializeBufferSize());
-  OP_REQUIRES_OK(
-      ctext, ctext->allocate_output(GetTensorDataIndex(n, ctext->num_outputs()),
-                                    tf_shape, output));
-  OP_REQUIRES_OK(ctext, ctext->allocate_output(
-                            GetTensorMetaDataIndex(n, ctext->num_outputs()),
-                            second_shape, &second_tensor));
-  mkl_shape.SerializeMklDnnShape(
-      second_tensor->flat<uint8>().data(),
-      second_tensor->flat<uint8>().size() * sizeof(uint8));
-}
-#endif
-
 // Allocates a temp tensor and returns the data buffer for temporary storage.
 // Currently
-#ifdef INTEL_MKL_DNN
-template <typename T>
-inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
-                           const memory::primitive_desc& pd, void** buf_out) {
-  TensorShape tf_shape;
-
-  tf_shape.AddDim(pd.get_size() / sizeof(T) + 1);
-  OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::v(),
-                                                 tf_shape, tensor_out));
-  *buf_out = static_cast<void*>(tensor_out->flat<T>().data());
-}
-#endif
-
+// we only support F32, will need to templatize if other types are added
 inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
                            dnnLayout_t lt_buff, void** buf_out) {
   TensorShape tf_shape;
@@ -821,7 +435,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
 
 template <typename T>
 inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
-                              TensorShape tf_shape) {
+                           TensorShape tf_shape) {
   OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::v(),
                                                  tf_shape, tensor_out));
 }
@@ -1055,8 +669,6 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0,
   return true;
 }
 
-// These functions do not compile with MKL-DNN since mkl.h is missing.
-// We may need to remove them later.
 // TODO(intel_tf): Remove this routine when faster MKL layout conversion is
 // out.
 inline void MklNHWCToNCHW(const Tensor& input, Tensor** output) {
@@ -1095,11 +707,18 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) {
 
 #ifdef INTEL_MKL_DNN
 
+using mkldnn::engine;
+using mkldnn::memory;
+using mkldnn::padding_kind;
+using mkldnn::primitive;
+using mkldnn::reorder;
+
 /// Return MKL-DNN data type (memory::data_type) for input type T
 ///
 /// @input None
 /// @return memory::data_type corresponding to type T
-template<typename T> static memory::data_type MklDnnType();
+template <typename T>
+static memory::data_type MklDnnType();
 
 /// Instantiation for float type. Add similar instantiations for other
 /// type if needed.
@@ -1114,26 +733,15 @@ memory::data_type MklDnnType<float>() {
 /// @return: memory::format corresponding to TensorFlow data format;
 ///          Fails with an error if invalid data format.
 inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
-  if (format == FORMAT_NHWC) return memory::format::nhwc;
-  else if (format == FORMAT_NCHW) return memory::format::nchw;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
-                     "Unsupported data format"));
+  if (format == FORMAT_NHWC)
+    return memory::format::nhwc;
+  else if (format == FORMAT_NCHW)
+    return memory::format::nchw;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
   // Return to get rid of compiler warning
   return memory::format::format_undef;
 }
 
-/// Map MKL-DNN data format to TensorFlow's data format
-///
-/// @input: memory::format
-/// @return: Tensorflow data format corresponding to memory::format
-///          Fails with an error if invalid data format.
-inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
-  if (format == memory::format::nhwc) return FORMAT_NHWC;
-  else if (format == memory::format::nchw) return FORMAT_NCHW;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
-                     "Unsupported data format"));
-}
-
 /// Map TensorShape object into memory::dims required by MKL-DNN
 ///
 /// This function will simply map input TensorShape into MKL-DNN dims
@@ -1145,7 +753,7 @@ inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
 /// @return memory::dims corresponding to TensorShape
 inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) {
   memory::dims dims(shape.dims());
-  for (int d = 0; d < shape.dims(); ++d) {
+  for (unsigned int d = 0; d < shape.dims(); ++d) {
     dims[d] = shape.dim_size(d);
   }
   return dims;
@@ -1161,7 +769,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) {
 /// @input TensorShape object in shape
 /// @return memory::dims in MKL-DNN required NCHW format
 inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
-                                            TensorFormat format) {
+                                              TensorFormat format) {
   // Check validity of format.
   CHECK_NE(TFDataFormatToMklDnnDataFormat(format),
            memory::format::format_undef);
@@ -1175,43 +783,6 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
   return memory::dims({n, c, h, w});
 }
 
-/// Map MklDnn memory::dims object into TensorShape object.
-///
-/// This function will simply map input shape in MKL-DNN memory::dims format
-/// in Tensorflow's TensorShape object by perserving dimension order.
-///
-/// @input MKL-DNN memory::dims object
-/// @output TensorShape corresponding to memory::dims
-inline TensorShape MklDnnDimsToTFShape(const memory::dims& dims) {
-  std::vector<int32> shape(dims.size(), -1);
-  for (int d = 0; d < dims.size(); d++) {
-    shape[d] = dims[d];
-  }
-
-  TensorShape ret;
-  CHECK_EQ(TensorShapeUtils::MakeShape(shape, &ret).ok(), true);
-  return ret;
-}
-
-/// Function to calculate strides given tensor shape in Tensorflow order
-/// E.g., if dims_tf_order is {1, 2, 3, 4}, then as per Tensorflow convention,
-/// dimesion with size 1 is outermost dimension; while dimension with size 4 is
-/// innermost dimension. So strides for this tensor would be {4 * 3 * 2,
-/// 4 * 3, 4, 1}, i.e., {24, 12, 4, 1}.
-///
-/// @input Tensorflow shape in memory::dims type
-/// @return memory::dims containing strides for the tensor.
-inline memory::dims CalculateTFStrides(const memory::dims& dims_tf_order) {
-  CHECK_GT(dims_tf_order.size(), 0);
-  memory::dims strides(dims_tf_order.size());
-  int last_dim_idx = dims_tf_order.size() - 1;
-  strides[last_dim_idx] = 1;
-  for (int d = last_dim_idx - 1; d >= 0; d--) {
-    strides[d] = strides[d + 1] * dims_tf_order[d + 1];
-  }
-  return strides;
-}
-
 inline padding_kind TFPaddingToMklDnnPadding(Padding pad) {
   // MKL-DNN only supports zero padding.
   return padding_kind::zero;
@@ -1237,21 +808,23 @@ class MklDnnData {
   const engine* cpu_engine_;
 
  public:
-  explicit MklDnnData(const engine* e) : user_memory_(nullptr),
-                                         reorder_memory_(nullptr),
-                                         op_md_(nullptr), cpu_engine_(e) {}
+  explicit MklDnnData(const engine* e)
+      : user_memory_(nullptr),
+        reorder_memory_(nullptr),
+        op_md_(nullptr),
+        cpu_engine_(e) {}
 
   ~MklDnnData() {
     cpu_engine_ = nullptr;  // We don't own this.
-    delete(user_memory_);
-    delete(reorder_memory_);
-    delete(op_md_);
+    delete (user_memory_);
+    delete (reorder_memory_);
+    delete (op_md_);
   }
 
-  inline void* GetTensorBuffer(const Tensor* tensor) const {
+  void* GetTensorBuffer(const Tensor* tensor) {
     CHECK_NOTNULL(tensor);
-    return const_cast<void*>(static_cast<const void*>(
-              tensor->flat<T>().data()));
+    return const_cast<void*>(
+        static_cast<const void*>(tensor->flat<T>().data()));
   }
 
   /// Set user memory primitive using specified dimensions, memory format and
@@ -1262,83 +835,35 @@ class MklDnnData {
   /// an operation. E.g., filter of Conv2D is of shape {1, 2, 3, 4}, and
   /// memory format HWIO, and the buffer that contains actual values is
   /// pointed by data_buffer.
-  inline void SetUsrMem(const memory::dims& dim, memory::format fm,
-                        void* data_buffer = nullptr) {
-    auto md = memory::desc(dim, MklDnnType<T>(), fm);
-    SetUsrMem(md, data_buffer);
+  void SetUsrMem(memory::dims dim, memory::format fm, void* data_buffer) {
+    CHECK_NOTNULL(data_buffer);
+    CHECK_NOTNULL(cpu_engine_);
+    // TODO(nhasabni): can we remove dynamic memory allocation?
+    user_memory_ =
+        new memory(memory::primitive_desc(
+                       memory::desc(dim, MklDnnType<T>(), fm), *cpu_engine_),
+                   data_buffer);
   }
 
-  inline void SetUsrMem(const memory::dims& dim, memory::format fm,
-                        const Tensor* tensor) {
+  void SetUsrMem(memory::dims dim, memory::format fm, const Tensor* tensor) {
     CHECK_NOTNULL(tensor);
     SetUsrMem(dim, fm, GetTensorBuffer(tensor));
   }
 
-  /// Helper function to create memory descriptor in Blocked format
-  ///
-  /// @input: Tensor dimensions
-  /// @input: strides corresponding to dimensions. One can use utility
-  ///         function such as CalculateTFStrides to compute strides
-  ///         for given dimensions.
-  /// @return: memory::desc object corresponding to blocked memory format
-  ///          for given dimensions and strides.
-  static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
-      const memory::dims& strides) {
-    CHECK_EQ(dim.size(), strides.size());
-
-    // We have to construct memory descriptor in a C style. This is not at all
-    // ideal but MKLDNN does not offer any API to construct descriptor in
-    // blocked format except a copy constructor that accepts
-    // mkldnn_memory_desc_t.
-    mkldnn_memory_desc_t md;
-    md.primitive_kind = mkldnn_memory;
-    md.ndims = dim.size();
-    md.format = mkldnn_blocked;
-    md.data_type = memory::convert_to_c(MklDnnType<T>());
-
-    for (size_t i = 0; i < dim.size(); i++) {
-      md.layout_desc.blocking.block_dims[i] = 1;
-      md.layout_desc.blocking.strides[1][i] = 1;
-      md.layout_desc.blocking.strides[0][i] = strides[i];
-      md.layout_desc.blocking.padding_dims[i] = dim[i];
-      md.layout_desc.blocking.offset_padding_to_data[i] = 0;
-      md.dims[i] = dim[i];
-    }
-    md.layout_desc.blocking.offset_padding = 0;
-
-    return memory::desc(md);
-  }
-
-  /// A version of SetUsrMem call that allows user to create memory in blocked
-  /// format. So in addition to accepting dimensions, it also accepts strides.
-  /// This allows user to create memory for tensor in a format that is not
-  /// supported by MKLDNN. E.g., MKLDNN does not support tensor format for 6
-  /// dimensional tensor as a native format. But by using blocked format, a user
-  /// can create memory for 6D tensor.
-  inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides,
-                        void* data_buffer = nullptr) {
-    CHECK_EQ(dim.size(), strides.size());
-    auto blocked_md = MklDnnData<T>::CreateBlockedMemDesc(dim, strides);
-    SetUsrMem(blocked_md, data_buffer);
-  }
-
-  inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides,
-                        const Tensor* tensor) {
-    CHECK_NOTNULL(tensor);
-    SetUsrMem(dim, strides, GetTensorBuffer(tensor));
-  }
-
   /// A version of function to set user memory primitive that accepts memory
   /// descriptor directly, instead of accepting dimensions and format. This
   /// function is more generic that the one above, but the function above is
   /// sufficient in most cases.
-  inline void SetUsrMem(const memory::desc& md, void* data_buffer = nullptr) {
-    auto pd = memory::primitive_desc(md, *cpu_engine_);
-    SetUsrMem(pd, data_buffer);
+  void SetUsrMem(memory::desc md, void* data_buffer) {
+    CHECK_NOTNULL(data_buffer);
+    CHECK_NOTNULL(cpu_engine_);
+    // TODO(nhasabni): can we remove dynamic memory allocation?
+    user_memory_ =
+        new memory(memory::primitive_desc(md, *cpu_engine_), data_buffer);
   }
 
   /// A version of SetUsrMem with memory descriptor and tensor
-  inline void SetUsrMem(const memory::desc& md, const Tensor* tensor) {
+  void SetUsrMem(memory::desc md, const Tensor* tensor) {
     CHECK_NOTNULL(tensor);
     SetUsrMem(md, GetTensorBuffer(tensor));
   }
@@ -1347,60 +872,41 @@ class MklDnnData {
   /// descriptor directly, instead of accepting dimensions and format. This
   /// function is more generic that the one above, but the function above is
   /// sufficient in most cases.
-  inline void SetUsrMem(const memory::primitive_desc& pd,
-                        void* data_buffer = nullptr) {
+  void SetUsrMem(memory::primitive_desc pd, void* data_buffer) {
+    CHECK_NOTNULL(data_buffer);
     CHECK_NOTNULL(cpu_engine_);
     // TODO(nhasabni): can we remove dynamic memory allocation?
-    if (data_buffer) {
-     user_memory_ = new memory(pd, data_buffer);
-    } else {
-      user_memory_ = new memory(pd);
-    }
+    user_memory_ = new memory(pd, data_buffer);
   }
 
   /// A version of SetUsrMem with primitive descriptor and tensor
-  inline void SetUsrMem(const memory::primitive_desc& pd,
-                        const Tensor* tensor) {
+  void SetUsrMem(memory::primitive_desc pd, const Tensor* tensor) {
     CHECK_NOTNULL(tensor);
     SetUsrMem(pd, GetTensorBuffer(tensor));
   }
 
   /// Get function for user memory primitive.
-  inline const memory* GetUsrMem() const { return user_memory_; }
+  const memory* GetUsrMem() const { return user_memory_; }
 
   /// Get function for primitive descriptor of user memory primitive.
-  inline const memory::primitive_desc GetUsrMemPrimDesc() const {
+  const memory::primitive_desc GetUsrMemPrimDesc() const {
     CHECK_NOTNULL(user_memory_);
     return user_memory_->get_primitive_desc();
   }
 
   /// Get function for descriptor of user memory.
-  inline memory::desc GetUsrMemDesc() {
+  memory::desc GetUsrMemDesc() {
     // This is ugly. Why MKL-DNN does not provide desc() method of const type??
     const memory::primitive_desc pd = GetUsrMemPrimDesc();
     return const_cast<memory::primitive_desc*>(&pd)->desc();
   }
 
   /// Get function for data buffer of user memory primitive.
-  inline void* GetUsrMemDataHandle() const {
+  void* GetUsrMemDataHandle() const {
     CHECK_NOTNULL(user_memory_);
     return user_memory_->get_data_handle();
   }
 
-  /// Set function for data buffer of user memory primitive.
-  inline void* SetUsrMemDataHandle(void* data_buffer) {
-    CHECK_NOTNULL(user_memory_);
-    CHECK_NOTNULL(data_buffer);
-    return user_memory_->set_data_handle(data_buffer);
-  }
-
-  /// Set function for data buffer of user memory primitive.
-  inline void SetUsrMemDataHandle(const Tensor* tensor) {
-    CHECK_NOTNULL(user_memory_);
-    CHECK_NOTNULL(tensor);
-    user_memory_->set_data_handle(GetTensorBuffer(tensor));
-  }
-
   /// Get the memory primitive for input and output of an op. If inputs
   /// to an op require reorders, then this function returns memory primitive
   /// for reorder. Otherwise, it will return memory primitive for user memory.
@@ -1409,7 +915,7 @@ class MklDnnData {
   /// execute Conv2D, we need memory primitive for I and F. Buf if reorder is
   /// required for I and F (say I_r is reorder primitive for I; F_r is reorder
   /// primitive for F), then we need I_r and F_r to perform Conv2D.
-  inline const memory& GetOpMem() const {
+  const memory& GetOpMem() const {
     return reorder_memory_ ? *reorder_memory_ : *user_memory_;
   }
 
@@ -1417,32 +923,13 @@ class MklDnnData {
   /// format. E.g., For Conv2D, the dimensions would be same as user dimensions
   /// but memory::format would be mkldnn::any because we want MKL-DNN to choose
   /// best layout/format for given input dimensions.
-  inline void SetOpMemDesc(const memory::dims& dim, memory::format fm) {
+  void SetOpMemDesc(const memory::dims& dim, memory::format fm) {
     // TODO(nhasabni): can we remove dynamic memory allocation?
     op_md_ = new memory::desc(dim, MklDnnType<T>(), fm);
   }
 
   /// Get function for memory descriptor for an operation
-  inline const memory::desc& GetOpMemDesc() const { return *op_md_; }
-
-  /// Predicate that checks if we need to reorder user's memory into memory
-  /// pointed by op_pd.
-  ///
-  /// @input: op_pd - memory primitive descriptor of the given input of an
-  ///               operation
-  /// @return: true in case reorder of input is needed; false, otherwise.
-  inline bool IsReorderNeeded(const memory::primitive_desc& op_pd) const {
-    CHECK_NOTNULL(user_memory_);
-    return op_pd != user_memory_->get_primitive_desc();
-  }
-
-  /// Function to create a reorder from memory pointed by from to memory pointed
-  /// by to. Returns created primitive.
-  inline primitive CreateReorder(const memory* from, const memory* to) const {
-    CHECK_NOTNULL(from);
-    CHECK_NOTNULL(to);
-    return reorder(*from, *to);
-  }
+  const memory::desc& GetOpMemDesc() const { return *op_md_; }
 
   /// Function to handle input reordering
   ///
@@ -1458,62 +945,19 @@ class MklDnnData {
   ///               operation
   /// @input: net - net to which to add reorder primitive in case it is needed.
   /// @return: true in case reorder of input is needed; false, otherwise.
-  inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
-                                  std::vector<primitive>* net) {
+  bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
+                           std::vector<primitive>* net) {
     CHECK_NOTNULL(net);
     CHECK_NOTNULL(user_memory_);
-    if (IsReorderNeeded(op_pd)) {
+    if (op_pd != user_memory_->get_primitive_desc()) {
       // TODO(nhasabni): can we remove dynamic memory allocation?
       reorder_memory_ = new memory(op_pd);
-      net->push_back(CreateReorder(user_memory_, reorder_memory_));
-      return true;
-    }
-    return false;
-  }
-
-  /// Overloaded version of above function that accepts memory buffer
-  /// where output of reorder needs to be stored.
-  ///
-  /// @input: op_pd - memory primitive descriptor of the given input of an
-  ///               operation
-  /// @reorder_data_handle - memory buffer where output of reorder needs to be
-  ///                        stored. Primitive does not check if buffer is
-  ///                        enough size to write.
-  /// @input: net - net to which to add reorder primitive in case it is needed.
-  /// @return: true in case reorder of input is needed; false, otherwise.
-  inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
-                                  void* reorder_data_handle,
-                                  std::vector<primitive>* net) {
-    CHECK_NOTNULL(net);
-    CHECK_NOTNULL(reorder_data_handle);
-    CHECK_NOTNULL(user_memory_);
-    if (IsReorderNeeded(op_pd)) {
-      // TODO(nhasabni): can we remove dynamic memory allocation?
-      reorder_memory_ = new memory(op_pd, reorder_data_handle);
-      net->push_back(CreateReorder(user_memory_, reorder_memory_));
+      net->push_back(reorder(*user_memory_, *reorder_memory_));
       return true;
     }
     return false;
   }
 
-  /// Another overloaded version of CheckReorderToOpMem that accepts Tensor
-  /// where output of reorder needs to be stored.
-  ///
-  /// @input: op_pd - memory primitive descriptor of the given input of an
-  ///               operation
-  /// @reorder_tensor - Tensor whose buffer is to be used to store output of
-  ///                   reorder. Primitive does not check if buffer is
-  ///                   enough size to write.
-  /// @input: net - net to which to add reorder primitive in case it is needed.
-  /// @return: true in case reorder of input is needed; false, otherwise.
-  inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
-                                  Tensor* reorder_tensor,
-                                  std::vector<primitive>* net) {
-    CHECK_NOTNULL(net);
-    CHECK_NOTNULL(reorder_tensor);
-    return CheckReorderToOpMem(op_pd, GetTensorBuffer(reorder_tensor), net);
-  }
-
   /// Function to handle output reorder
   ///
   /// This function performs very similar functionality as input reordering
@@ -1526,10 +970,9 @@ class MklDnnData {
   ///
   /// @input memory primitive descriptor for the given output of an operation
   /// @return: true in case reorder of output is needed; false, otherwise.
-  inline bool PrepareReorderToUserMemIfReq(
-      const memory::primitive_desc& op_pd) {
+  bool PrepareReorderToUserMemIfReq(const memory::primitive_desc& op_pd) {
     CHECK_NOTNULL(user_memory_);
-    if (IsReorderNeeded(op_pd)) {
+    if (op_pd != user_memory_->get_primitive_desc()) {
       // TODO(nhasabni): can we remove dynamic memory allocation?
       reorder_memory_ = new memory(op_pd);
       return true;
@@ -1544,11 +987,11 @@ class MklDnnData {
   /// to the user-specified output buffer.
   ///
   /// @input: net - net to which to add reorder primitive
-  inline void InsertReorderToUserMem(std::vector<primitive>* net) {
+  void InsertReorderToUserMem(std::vector<primitive>* net) {
     CHECK_NOTNULL(net);
     CHECK_NOTNULL(user_memory_);
     CHECK_NOTNULL(reorder_memory_);
-    net->push_back(CreateReorder(reorder_memory_, user_memory_));
+    net->push_back(reorder(*reorder_memory_, *user_memory_));
   }
 };
 
diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc
deleted file mode 100644
index 6aef3d86e9..0000000000
--- a/tensorflow/core/util/mkl_util_test.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifdef INTEL_MKL
-
-#include "tensorflow/core/util/mkl_util.h"
-
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace {
-
-#ifdef INTEL_MKL_DNN
-
-TEST(MklUtilTest, MklDnnTfShape) {
-  auto cpu_engine = engine(engine::cpu, 0);
-  MklDnnData<float> a(&cpu_engine);
-
-  const int N = 1, C = 2, H = 3, W = 4;
-  memory::dims a_dims = {N, C, H, W};
-  MklDnnShape a_mkldnn_shape;
-  a_mkldnn_shape.SetMklTensor(true);
-  // Create TF layout in NCHW.
-  a_mkldnn_shape.SetTfLayout(a_dims.size(), a_dims, memory::format::nchw);
-  TensorShape a_tf_shape_nchw({N, C, H, W});
-  TensorShape a_tf_shape_nhwc({N, H, W, C});
-  TensorShape a_mkldnn_tf_shape = a_mkldnn_shape.GetTfShape();
-  // Check that returned shape is in NCHW format.
-  EXPECT_EQ(a_tf_shape_nchw, a_mkldnn_tf_shape);
-  EXPECT_NE(a_tf_shape_nhwc, a_mkldnn_tf_shape);
-
-  memory::dims b_dims = {N, C, H, W};
-  MklDnnShape b_mkldnn_shape;
-  b_mkldnn_shape.SetMklTensor(true);
-  // Create TF layout in NHWC.
-  b_mkldnn_shape.SetTfLayout(b_dims.size(), b_dims, memory::format::nhwc);
-  TensorShape b_tf_shape_nhwc({N, H, W, C});
-  TensorShape b_tf_shape_nchw({N, C, H, W});
-  TensorShape b_mkldnn_tf_shape = b_mkldnn_shape.GetTfShape();
-  // Check that returned shape is in NHWC format.
-  EXPECT_EQ(b_tf_shape_nhwc, b_mkldnn_tf_shape);
-  EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape);
-}
-
-
-TEST(MklUtilTest, MklDnnBlockedFormatTest) {
-  // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension
-  // first (case 1) and then it being outermost dimension (case 2).
-  auto cpu_engine = engine(engine::cpu, 0);
-
-  // Setting for case 1
-  MklDnnData<float> a(&cpu_engine);
-  memory::dims dim1 = {3, 4};
-  memory::dims strides1 = {1, 3};
-  a.SetUsrMem(dim1, strides1);
-
-  memory::desc a_md1 = a.GetUsrMemDesc();
-  EXPECT_EQ(a_md1.data.ndims, 2);
-  EXPECT_EQ(a_md1.data.dims[0], 3);
-  EXPECT_EQ(a_md1.data.dims[1], 4);
-  EXPECT_EQ(a_md1.data.format, mkldnn_blocked);
-
-  // Setting for case 2
-  MklDnnData<float> b(&cpu_engine);
-  memory::dims dim2 = {3, 4};
-  memory::dims strides2 = {4, 1};
-  b.SetUsrMem(dim2, strides2);
-
-  memory::desc b_md2 = b.GetUsrMemDesc();
-  EXPECT_EQ(b_md2.data.ndims, 2);
-  EXPECT_EQ(b_md2.data.dims[0], 3);
-  EXPECT_EQ(b_md2.data.dims[1], 4);
-  EXPECT_EQ(b_md2.data.format, mkldnn_blocked);
-}
-
-#endif  // INTEL_MKL_DNN
-}  // namespace
-}  // namespace tensorflow
-
-#endif  // INTEL_MKL
diff --git a/tensorflow/docs_src/api_guides/python/threading_and_queues.md b/tensorflow/docs_src/api_guides/python/threading_and_queues.md
index 8ad4c4c075..ab95ce0af9 100644
--- a/tensorflow/docs_src/api_guides/python/threading_and_queues.md
+++ b/tensorflow/docs_src/api_guides/python/threading_and_queues.md
@@ -3,7 +3,7 @@
 Note: In versions of TensorFlow before 1.2, we recommended using multi-threaded,
 queue-based input pipelines for performance. Beginning with TensorFlow 1.4,
 however, we recommend using the `tf.data` module instead. (See
-@{$datasets$Datasets} for details. In TensorFlow 1.2 and 1.3, the module was
+[Datasets](datasets) for details. In TensorFlow 1.2 and 1.3, the module was
 called `tf.contrib.data`.) The `tf.data` module offers an easier-to-use
 interface for constructing efficient input pipelines. Furthermore, we've stopped
 developing the old multi-threaded, queue-based input pipelines.  We've retained
diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md
index be14ab4026..8409962744 100644
--- a/tensorflow/docs_src/get_started/get_started.md
+++ b/tensorflow/docs_src/get_started/get_started.md
@@ -272,7 +272,7 @@ train = optimizer.minimize(loss)
 ```
 
 ```python
-sess.run(init) # reset variables to incorrect defaults.
+sess.run(init) # reset values to incorrect defaults.
 for i in range(1000):
   sess.run(train, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]})
 
@@ -317,7 +317,7 @@ y_train = [0, -1, -2, -3]
 # training loop
 init = tf.global_variables_initializer()
 sess = tf.Session()
-sess.run(init) # initialize variables with incorrect defaults.
+sess.run(init) # reset values to wrong
 for i in range(1000):
   sess.run(train, {x: x_train, y: y_train})
 
@@ -383,7 +383,7 @@ train_input_fn = tf.estimator.inputs.numpy_input_fn(
 eval_input_fn = tf.estimator.inputs.numpy_input_fn(
     {"x": x_eval}, y_eval, batch_size=4, num_epochs=1000, shuffle=False)
 
-# We can invoke 1000 training steps by invoking the method and passing the
+# We can invoke 1000 training steps by invoking the  method and passing the
 # training data set.
 estimator.train(input_fn=input_fn, steps=1000)
 
diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md
index 0db5c6143a..9d3af5d96a 100644
--- a/tensorflow/docs_src/get_started/input_fn.md
+++ b/tensorflow/docs_src/get_started/input_fn.md
@@ -191,7 +191,7 @@ import pandas as pd
 
 def get_input_fn_from_pandas(data_set, num_epochs=None, shuffle=True):
   return tf.estimator.inputs.pandas_input_fn(
-      x=pd.DataFrame(...),
+      x=pdDataFrame(...),
       y=pd.Series(...),
       num_epochs=num_epochs,
       shuffle=shuffle)
@@ -267,8 +267,8 @@ tf.logging.set_verbosity(tf.logging.INFO)
 
 Define the column names for the data set in `COLUMNS`. To distinguish features
 from the label, also define `FEATURES` and `LABEL`. Then read the three CSVs
-([train](http://download.tensorflow.org/data/boston_train.csv),
-[test](http://download.tensorflow.org/data/boston_test.csv), and
+(@{tf.train},
+@{tf.test}, and
 [predict](http://download.tensorflow.org/data/boston_predict.csv)) into _pandas_
 `DataFrame`s:
 
diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md
index df622c6ac5..3a153e8114 100644
--- a/tensorflow/docs_src/install/install_c.md
+++ b/tensorflow/docs_src/install/install_c.md
@@ -38,7 +38,7 @@ enable TensorFlow for C:
          OS="linux" # Change to "darwin" for macOS
          TARGET_DIRECTORY="/usr/local"
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" |
            sudo tar -C $TARGET_DIRECTORY -xz
 
      The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md
index 8b3da49a0d..df43255896 100644
--- a/tensorflow/docs_src/install/install_go.md
+++ b/tensorflow/docs_src/install/install_go.md
@@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
          TF_TYPE="cpu" # Change to "gpu" for GPU support
          TARGET_DIRECTORY='/usr/local'
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0-rc1.tar.gz" |
          sudo tar -C $TARGET_DIRECTORY -xz
 
      The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md
index 6eb8158249..f7f2c3cdc7 100644
--- a/tensorflow/docs_src/install/install_java.md
+++ b/tensorflow/docs_src/install/install_java.md
@@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
 <dependency>
   <groupId>org.tensorflow</groupId>
   <artifactId>tensorflow</artifactId>
-  <version>1.4.0</version>
+  <version>1.4.0-rc1</version>
 </dependency>
 ```
 
@@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
                <dependency>
                  <groupId>org.tensorflow</groupId>
                  <artifactId>tensorflow</artifactId>
-                 <version>1.4.0</version>
+                 <version>1.4.0-rc1</version>
                </dependency>
              </dependencies>
          </project>
@@ -124,7 +124,7 @@ refer to the simpler instructions above instead.
 Take the following steps to install TensorFlow for Java on Linux or macOS:
 
   1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar),
      which is the TensorFlow Java Archive (JAR).
 
   2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@@ -143,7 +143,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
          OS=$(uname -s | tr '[:upper:]' '[:lower:]')
          mkdir -p ./jni
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" |
            tar -xz -C ./jni
 
 ### Install on Windows
@@ -151,10 +151,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
 Take the following steps to install TensorFlow for Java on Windows:
 
   1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar),
      which is the TensorFlow Java Archive (JAR).
   2. Download the following Java Native Interface (JNI) file appropriate for
-     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0.zip).
+     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0-rc1.zip).
   3. Extract this .zip file.
 
 
@@ -202,7 +202,7 @@ must be part of your `classpath`. For example, you can include the
 downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
 as follows:
 
-<pre><b>javac -cp libtensorflow-1.4.0.jar HelloTF.java</b></pre>
+<pre><b>javac -cp libtensorflow-1.4.0-rc1.jar HelloTF.java</b></pre>
 
 
 ### Running
@@ -216,11 +216,11 @@ two files are available to the JVM:
 For example, the following command line executes the `HelloTF` program on Linux
 and macOS X:
 
-<pre><b>java -cp libtensorflow-1.4.0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.4.0-rc1.jar:. -Djava.library.path=./jni HelloTF</b></pre>
 
 And the following command line executes the `HelloTF` program on Windows:
 
-<pre><b>java -cp libtensorflow-1.4.0.jar;. -Djava.library.path=jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.4.0-rc1.jar;. -Djava.library.path=jni HelloTF</b></pre>
 
 If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
 installed TensorFlow for Java and are ready to use the API.  If the program
diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md
index f7380bac8a..414ab7b1f7 100644
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv:
      Virtualenv environment:
 
      <pre>(tensorflow)$ <b>pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
 
 If you encounter installation problems, see
 [Common Installation Problems](#common_installation_problems).
@@ -293,7 +293,7 @@ take the following steps:
 
      <pre>
      $ <b>sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl</b>
      </pre>
 
      If this step fails, see
@@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
 
      <pre>
      (tensorflow)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
 
 
 <a name="ValidateYourInstallation"></a>
@@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations.
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp27-none-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp27-none-linux_x86_64.whl
 </pre>
 
 Note that GPU support requires the NVIDIA hardware and software described in
@@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
 </pre>
 
 Note that GPU support requires the NVIDIA hardware and software described in
@@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
 </pre>
 
 
@@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
 </pre>
 
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index 79b383817b..9a95710bfa 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -114,7 +114,7 @@ Take the following steps to install TensorFlow with Virtualenv:
      TensorFlow in the active Virtualenv is as follows:
 
      <pre> $ <b>pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl</b></pre>
 
 If you encounter installation problems, see
 [Common Installation Problems](#common-installation-problems).
@@ -235,7 +235,7 @@ take the following steps:
      issue the following command:
 
      <pre> $ <b>sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b> </pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl</b> </pre>
 
      If the preceding command fails, see
      [installation problems](#common-installation-problems).
@@ -344,7 +344,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      TensorFlow for Python 2.7:
 
      <pre> (tensorflow)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl</b></pre>
 
 
 <a name="ValidateYourInstallation"></a>
@@ -517,7 +517,7 @@ This section documents the relevant values for Mac OS installations.
 
 
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
 </pre>
 
 
@@ -525,7 +525,7 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.
 
 
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py3-none-any.whl
 </pre>
 
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index aa4ae6c876..6d0dcdcd4a 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -355,10 +355,10 @@ Invoke `pip install` to install that pip package.
 The filename of the `.whl` file depends on your platform.
 For example, the following command will install the pip package
 
-for TensorFlow 1.4.0 on Linux:
+for TensorFlow 1.4.0rc1 on Linux:
 
 <pre>
-$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0-py2-none-any.whl</b>
+$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0rc1-py2-none-any.whl</b>
 </pre>
 
 ## Validate your installation
@@ -447,10 +447,8 @@ Stack Overflow and specify the `tensorflow` tag.
 **Linux**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.5.4</td><td>N/A</td><td>N/A</td></tr>
-<tr><td>tensorflow_gpu-1.4.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.5.4</td><td>6</td><td>8</td></tr>
- <tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
-<tr><td>tensorflow_gpu-1.3.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>6</td><td>8</td></tr>
+<tr><td>tensorflow-1.4.0rc1</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.4.0rc1</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>6</td><td>8</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.2.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>5.1</td><td>8</td></tr>
 <tr><td>tensorflow-1.1.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.2</td><td>N/A</td><td>N/A</td></tr>
@@ -462,8 +460,7 @@ Stack Overflow and specify the `tensorflow` tag.
 **Mac**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.5.4</td><td>N/A</td><td>N/A</td></tr>
- <tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow-1.4.0rc1</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow-1.1.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.2</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.1.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.2</td><td>5.1</td><td>8</td></tr>
@@ -474,10 +471,8 @@ Stack Overflow and specify the `tensorflow` tag.
 **Windows**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
-<tr><td>tensorflow_gpu-1.4.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
-<tr><td>tensorflow-1.3.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
-<tr><td>tensorflow_gpu-1.3.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
+<tr><td>tensorflow-1.4.0rc1</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.4.0rc1</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.2.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>5.1</td><td>8</td></tr>
 <tr><td>tensorflow-1.1.0</td><td>CPU</td><td>3.5</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md
index 8fc65be35a..c5a560e074 100644
--- a/tensorflow/docs_src/mobile/prepare_models.md
+++ b/tensorflow/docs_src/mobile/prepare_models.md
@@ -296,6 +296,6 @@ complains about missing header files, add the .h’s that are needed into
 the
 [`android_extended_ops`](https://www.tensorflow.org/code/tensorflow/core/kernels/BUILD#L3525) target.
 
-If you’re using a makefile targeting iOS, Raspberry Pi, etc, go to
+If you’re using a makefile targetting iOS, Raspberry Pi, etc, go to
 [`tensorflow/contrib/makefile/tf_op_files.txt`](https://www.tensorflow.org/code/tensorflow/contrib/makefile/tf_op_files.txt) and
 add the right implementation files there.
diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md
index 25cb72008d..1f856bbf3f 100644
--- a/tensorflow/docs_src/programmers_guide/debugger.md
+++ b/tensorflow/docs_src/programmers_guide/debugger.md
@@ -9,19 +9,11 @@ lets you view the internal structure and states of running TensorFlow graphs
 during training and inference, which is difficult to debug with general-purpose
 debuggers such as Python's `pdb` due to TensorFlow's computation-graph paradigm.
 
-> NOTE: TensorFlow debugger uses a
-> [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based
-> text user interface. On Mac OS X, the `ncurses` library is required and can
-> be installed with `brew install homebrew/dupes/ncurses`. On Windows, curses
-> isn't as well supported, so a
-> [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based interface can
-> be used with tfdbg by installing `pyreadline` with pip.
-> If you use Anaconda3, you can install it with a command
+> NOTE: The system requirements of tfdbg on supported external platforms include
+> the following. On Mac OS X, the `ncurses` library is required. It can be
+> installed with `brew install homebrew/dupes/ncurses`. On Windows, `pyreadline`
+> is required. If you use Anaconda3, you can install it with a command
 > such as `"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`.
-> Unofficial Windows curses packages can be downloaded
-> [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently
-> installed using `pip install <your_version>.whl`, however curses on Windows
-> may not work as reliably as curses on Linux or Mac.
 
 This tutorial demonstrates how to use the **tfdbg** command-line interface
 (CLI) to debug the appearance of [`nan`s](https://en.wikipedia.org/wiki/NaN)
@@ -157,7 +149,6 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at
 | | `pt <tensor>[slicing]` | Print a subarray of tensor, using [numpy](http://www.numpy.org/)-style array slicing. | `pt hidden/Relu:0[0:50,:]` |
 | | `-a` | Print the entirety of a large tensor, without using ellipses. (May take a long time for large tensors.) | `pt -a hidden/Relu:0[0:50,:]` |
 | | `-r <range>` | Highlight elements falling into specified numerical range. Multiple ranges can be used in conjunction. | `pt hidden/Relu:0 -a -r [[-inf,-1],[1,inf]]` |
-| | `-n <number>` | Print dump corresponding to specified 0-based dump number. Required for tensors with multiple dumps. | `pt -n 0 hidden/Relu:0` |
 | | `-s` | Include a summary of the numeric values of the tensor (applicable only to non-empty tensors with Boolean and numeric types such as `int*` and `float*`.) | `pt -s hidden/Relu:0[0:50,:]` |
 | **`@[coordinates]`** | | Navigate to specified element in `pt` output. | `@[10,0]` or `@10,0` |
 | **`/regex`** | |  [less](https://linux.die.net/man/1/less)-style search for given regular expression. | `/inf` |
@@ -175,12 +166,10 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at
 | | `-r` | List the inputs to node, recursively (the input tree.) | `li -r hidden/Relu:0` |
 | | `-d <max_depth>` | Limit recursion depth under the `-r` mode. | `li -r -d 3 hidden/Relu:0` |
 | | `-c` | Include control inputs. | `li -c -r hidden/Relu:0` |
-| | `-t` | Show op types of input nodes. | `li -t -r hidden/Relu:0` |
 | **`lo`** | | **List output recipients of node** | |
 | | `-r` | List the output recipients of node, recursively (the output tree.) | `lo -r hidden/Relu:0` |
 | | `-d <max_depth>` | Limit recursion depth under the `-r` mode. | `lo -r -d 3 hidden/Relu:0` |
 | | `-c` | Include recipients via control edges. | `lo -c -r hidden/Relu:0` |
-| | `-t` | Show op types of recipient nodes. | `lo -t -r hidden/Relu:0` |
 | **`ls`** | | **List Python source files involved in node creation.** | |
 | | `-p <path_pattern>` | Limit output to source files matching given regular-expression path pattern. | `ls -p .*debug_mnist.*` |
 | | `-n` | Limit output to node names matching given regular-expression pattern. | `ls -n Softmax.*` |
diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md
index 88eb277e35..d6f80430cd 100644
--- a/tensorflow/docs_src/programmers_guide/tensors.md
+++ b/tensorflow/docs_src/programmers_guide/tensors.md
@@ -29,8 +29,8 @@ Some types of tensors are special, and these will be covered in other
 units of the Programmer's guide. The main ones are:
 
   * `tf.Variable`
-  * `tf.constant`
-  * `tf.placeholder`
+  * `tf.Constant`
+  * `tf.Placeholder`
   * `tf.SparseTensor`
 
 With the exception of `tf.Variable`, the value of a tensor is immutable, which
@@ -64,7 +64,7 @@ The following snippet demonstrates creating a few rank 0 variables:
 mammal = tf.Variable("Elephant", tf.string)
 ignition = tf.Variable(451, tf.int16)
 floating = tf.Variable(3.14159265359, tf.float64)
-its_complicated = tf.Variable(12.3 - 4.85j, tf.complex64)
+its_complicated = tf.Variable((12.3, -4.85), tf.complex64)
 ```
 
 Note: A string is treated as a single item in TensorFlow, not as a sequence of
@@ -79,7 +79,7 @@ initial value. For example:
 mystr = tf.Variable(["Hello"], tf.string)
 cool_numbers  = tf.Variable([3.14159, 2.71828], tf.float32)
 first_primes = tf.Variable([2, 3, 5, 7, 11], tf.int32)
-its_very_complicated = tf.Variable([12.3 - 4.85j, 7.5 - 6.23j], tf.complex64)
+its_very_complicated = tf.Variable([(12.3, -4.85), (7.5, -6.23)], tf.complex64)
 ```
 
 
@@ -275,8 +275,8 @@ Graphs and Sessions for more information).
 
 Sometimes it is not possible to evaluate a `tf.Tensor` with no context because
 its value might depend on dynamic information that is not available. For
-example, tensors that depend on `placeholder`s can't be evaluated without
-providing a value for the `placeholder`.
+example, tensors that depend on `Placeholder`s can't be evaluated without
+providing a value for the `Placeholder`.
 
 ``` python
 p = tf.placeholder(tf.float32)
diff --git a/tensorflow/examples/speech_commands/models.py b/tensorflow/examples/speech_commands/models.py
index ab611f414a..82d6a94ea1 100644
--- a/tensorflow/examples/speech_commands/models.py
+++ b/tensorflow/examples/speech_commands/models.py
@@ -326,7 +326,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings,
   first_filter_height = input_time_size
   first_filter_count = 186
   first_filter_stride_x = 1
-  first_filter_stride_y = 1
+  first_filter_stride_y = 4
   first_weights = tf.Variable(
       tf.truncated_normal(
           [first_filter_height, first_filter_width, 1, first_filter_count],
diff --git a/tensorflow/go/android.go b/tensorflow/go/android.go
deleted file mode 100644
index 3db3ddfec5..0000000000
--- a/tensorflow/go/android.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build android
-
-package tensorflow
-
-// #cgo LDFLAGS: -landroid -llog -lm -lz -ldl
-import "C"
diff --git a/tensorflow/go/operation_test.go b/tensorflow/go/operation_test.go
index 40c951ab8c..7cba043af2 100644
--- a/tensorflow/go/operation_test.go
+++ b/tensorflow/go/operation_test.go
@@ -123,14 +123,6 @@ func TestOutputDataTypeAndShape(t *testing.T) {
 			[]int64{2, 3},
 			Double,
 		},
-		{ // Matrix of Uint64
-			[][]uint64{
-				{1, 2, 3},
-				{4, 5, 6},
-			},
-			[]int64{2, 3},
-			Uint64,
-		},
 	}
 	for idx, test := range testdata {
 		t.Run(fmt.Sprintf("#%d Value %T", idx, test.Value), func(t *testing.T) {
diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go
index 1326a95278..36a74c0081 100644
--- a/tensorflow/go/tensor.go
+++ b/tensorflow/go/tensor.go
@@ -101,7 +101,7 @@ func NewTensor(value interface{}) (*Tensor, error) {
 			return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len())
 		}
 	} else {
-		e := stringEncoder{offsets: buf, data: raw[nflattened*8:], status: newStatus()}
+		e := stringEncoder{offsets: buf, data: raw[nflattened*8 : len(raw)], status: newStatus()}
 		if err := e.encode(reflect.ValueOf(value), shape); err != nil {
 			return nil, err
 		}
@@ -207,9 +207,6 @@ func (t *Tensor) WriteContentsTo(w io.Writer) (int64, error) {
 func tensorData(c *C.TF_Tensor) []byte {
 	// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
 	cbytes := C.TF_TensorData(c)
-	if cbytes == nil {
-		return nil
-	}
 	length := int(C.TF_TensorByteSize(c))
 	slice := (*[1 << 30]byte)(unsafe.Pointer(cbytes))[:length:length]
 	return slice
@@ -313,7 +310,7 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error {
 		if err := w.WriteByte(b); err != nil {
 			return err
 		}
-	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
 		if err := binary.Write(w, nativeEndian, v.Interface()); err != nil {
 			return err
 		}
@@ -352,7 +349,7 @@ func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect.
 			return err
 		}
 		ptr.Elem().SetBool(b == 1)
-	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
 		if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil {
 			return err
 		}
diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go
index 674a8ce86f..35bd2fd9a5 100644
--- a/tensorflow/go/tensor_test.go
+++ b/tensorflow/go/tensor_test.go
@@ -34,15 +34,11 @@ func TestNewTensor(t *testing.T) {
 		{nil, int64(5)},
 		{nil, uint8(5)},
 		{nil, uint16(5)},
-		{nil, uint32(5)},
-		{nil, uint64(5)},
 		{nil, float32(5)},
 		{nil, float64(5)},
 		{nil, complex(float32(5), float32(6))},
 		{nil, complex(float64(5), float64(6))},
 		{nil, "a string"},
-		{[]int64{1}, []uint32{1}},
-		{[]int64{1}, []uint64{1}},
 		{[]int64{2}, []bool{true, false}},
 		{[]int64{1}, []float64{1}},
 		{[]int64{1}, [1]float64{1}},
@@ -75,6 +71,11 @@ func TestNewTensor(t *testing.T) {
 		// native ints not supported
 		int(5),
 		[]int{5},
+		// uint32 and uint64 are not supported in TensorFlow
+		uint32(5),
+		[]uint32{5},
+		uint64(5),
+		[]uint64{5},
 		// Mismatched dimensions
 		[][]float32{{1, 2, 3}, {4}},
 		// Mismatched dimensions. Should return "mismatched slice lengths" error instead of "BUG"
diff --git a/tensorflow/java/src/main/java/org/tensorflow/Shape.java b/tensorflow/java/src/main/java/org/tensorflow/Shape.java
index d533c3d480..9aa92be111 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/Shape.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/Shape.java
@@ -77,24 +77,6 @@ public final class Shape {
     return shape[i];
   }
 
-  @Override
-  public int hashCode() {
-    return Arrays.hashCode(shape);
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
-
-    if (obj instanceof Shape && Arrays.equals(this.shape, ((Shape) obj).shape)) {
-      return !hasUnknownDimension();
-    }
-
-    return super.equals(obj);
-  }
-
   /** Succinct description of the shape meant for debugging. */
   @Override
   public String toString() {
@@ -116,18 +98,4 @@ public final class Shape {
   }
 
   private long[] shape;
-
-  private boolean hasUnknownDimension() {
-    if (shape == null) {
-      return true;
-    }
-
-    for (long dimension : shape) {
-      if (dimension == -1) {
-        return true;
-      }
-    }
-
-    return false;
-  }
 }
diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
index 92cc3bd60e..3b027700c5 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
@@ -16,7 +16,6 @@ limitations under the License.
 package org.tensorflow;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -78,29 +77,4 @@ public class ShapeTest {
       assertEquals(5, n.shape().size(1));
     }
   }
-
-  @Test
-  public void equalsWorksCorrectly() {
-    assertEquals(Shape.scalar(), Shape.scalar());
-    assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3));
-
-    assertNotEquals(Shape.make(1,2), null);
-    assertNotEquals(Shape.make(1,2), new Object());
-    assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4));
-
-
-    assertNotEquals(Shape.unknown(), Shape.unknown());
-    assertNotEquals(Shape.make(-1), Shape.make(-1));
-    assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3));
-  }
-
-  @Test
-  public void hashCodeIsAsExpected() {
-    assertEquals(Shape.make(1, 2, 3, 4).hashCode(), Shape.make(1, 2, 3, 4).hashCode());
-    assertEquals(Shape.scalar().hashCode(), Shape.scalar().hashCode());
-    assertEquals(Shape.unknown().hashCode(), Shape.unknown().hashCode());
-
-    assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode());
-  }
 }
-
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 54c43c1337..5ae4aace16 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -5,10 +5,7 @@ package(
     default_visibility = [
         "//engedu/ml/tf_from_scratch:__pkg__",
         "//tensorflow:internal",
-        "//tensorflow/contrib/lite/toco/python:__pkg__",
         "//tensorflow_models:__subpackages__",
-        # TODO(aselle): to pass open source test.
-        "//bazel_pip/tensorflow/contrib/lite/toco/python:__pkg__",
     ],
 )
 
@@ -48,7 +45,6 @@ py_library(
         "//tensorflow/compiler/aot/tests:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/contrib/learn:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/contrib/learn/python/learn/datasets:__pkg__",  # TODO(b/34059704): remove when fixed
-        "//tensorflow/contrib/lite/toco/python:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/python/debug:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/python/tools:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/tools/api/generator:__pkg__",
diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py
index fa5d02c476..62fea05867 100644
--- a/tensorflow/python/estimator/canned/head.py
+++ b/tensorflow/python/estimator/canned/head.py
@@ -117,7 +117,7 @@ class _Head(object):
       update_op = tf.contrib.layers.optimize_loss(optimizer=sync,
                                                   loss=estimator_spec.loss, ...)
       hooks = [sync.make_session_run_hook(is_chief)]
-      ... update train_op and hooks in EstimatorSpec and return
+      ... upate train_op and hooks in EstimatorSpec and return
     ```
   """
   __metaclass__ = abc.ABCMeta
diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py
index 3512f66284..c9f37f06e8 100644
--- a/tensorflow/python/estimator/inputs/numpy_io.py
+++ b/tensorflow/python/estimator/inputs/numpy_io.py
@@ -19,7 +19,6 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
-from six import string_types
 from tensorflow.python.estimator.inputs.queues import feeding_functions
 
 # Key name to pack the target into dict of `features`. See
@@ -52,9 +51,8 @@ def numpy_input_fn(x,
                    num_threads=1):
   """Returns input function that would feed dict of numpy arrays into the model.
 
-  This returns a function outputting `features` and `targets` based on the dict
-  of numpy arrays. The dict `features` has the same keys as the `x`. The dict
-  `targets` has the same keys as the `y` if `y` is a dict.
+  This returns a function outputting `features` and `target` based on the dict
+  of numpy arrays. The dict `features` has the same keys as the `x`.
 
   Example:
 
@@ -71,7 +69,7 @@ def numpy_input_fn(x,
 
   Args:
     x: dict of numpy array object.
-    y: numpy array object or dict of numpy array object. `None` if absent.
+    y: numpy array object. `None` if absent.
     batch_size: Integer, size of batches to return.
     num_epochs: Integer, number of epochs to iterate over data. If `None` will
       run forever.
@@ -83,13 +81,11 @@ def numpy_input_fn(x,
       such as in prediction and evaluation mode, `num_threads` should be 1.
 
   Returns:
-    Function, that has signature of ()->(dict of `features`, `targets`)
+    Function, that has signature of ()->(dict of `features`, `target`)
 
   Raises:
     ValueError: if the shape of `y` mismatches the shape of values in `x` (i.e.,
       values in `x` have same shape).
-    ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict.
-    ValueError: if x or y is an empty dict.
     TypeError: `x` is not a dict or `shuffle` is not bool.
   """
 
@@ -101,76 +97,43 @@ def numpy_input_fn(x,
     """Numpy input function."""
     if not isinstance(x, dict):
       raise TypeError('x must be dict; got {}'.format(type(x).__name__))
-    if not x:
-      raise ValueError('x cannot be empty')
 
     # Make a shadow copy and also ensure the order of iteration is consistent.
-    ordered_dict_data = collections.OrderedDict(
+    ordered_dict_x = collections.OrderedDict(
         sorted(x.items(), key=lambda t: t[0]))
-    # Deep copy keys which is a view in python 3
-    feature_keys = list(ordered_dict_data.keys())
-
-    if y is None:
-      target_keys = None
-    elif isinstance(y, dict):
-      if not y:
-        raise ValueError('y cannot be empty dict, use None instead.')
-
-      ordered_dict_y = collections.OrderedDict(
-        sorted(y.items(), key=lambda t: t[0]))
-      target_keys = list(ordered_dict_y.keys())
-
-      duplicate_keys = set(feature_keys).intersection(set(target_keys))
-      if len(duplicate_keys):
-        raise ValueError('{} duplicate keys are found in both x and y: '
-                         '{}'.format(len(duplicate_keys), duplicate_keys))
-
-      ordered_dict_data.update(ordered_dict_y)
-    else:
-      target_keys = _get_unique_target_key(ordered_dict_data)
-      ordered_dict_data[target_keys] = y
-
-    if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
-      shape_dict_of_x = {k: ordered_dict_data[k].shape
-                         for k in feature_keys}
-
-      if target_keys is None:
-        shape_of_y = None
-      elif isinstance(target_keys, string_types):
-        shape_of_y = y.shape
-      else:
-        shape_of_y = {k: ordered_dict_data[k].shape
-                      for k in target_keys}
 
+    unique_target_key = _get_unique_target_key(ordered_dict_x)
+    if y is not None:
+      ordered_dict_x[unique_target_key] = y
+
+    if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1:
+      shape_dict_of_x = {k: ordered_dict_x[k].shape
+                         for k in ordered_dict_x.keys()}
+      shape_of_y = None if y is None else y.shape
       raise ValueError('Length of tensors in x and y is mismatched. All '
                        'elements in x and y must have the same length.\n'
                        'Shapes in x: {}\n'
-                       'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y))
+                       'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y))
 
     queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
-        ordered_dict_data,
+        ordered_dict_x,
         queue_capacity,
         shuffle=shuffle,
         num_threads=num_threads,
         enqueue_size=batch_size,
         num_epochs=num_epochs)
 
-    batch = (queue.dequeue_many(batch_size) if num_epochs is None
+    features = (queue.dequeue_many(batch_size) if num_epochs is None
                 else queue.dequeue_up_to(batch_size))
 
-    # Remove the first `Tensor` in `batch`, which is the row number.
-    if len(batch) > 0:
-      batch.pop(0)
+    # Remove the first `Tensor` in `features`, which is the row number.
+    if len(features) > 0:
+      features.pop(0)
 
-    features = dict(zip(feature_keys, batch[:len(feature_keys)]))
-    if target_keys is None:
-      # TODO(martinwicke), return consistent result
-      return features
-    elif isinstance(target_keys, string_types):
-      target = batch[-1]
-      return features, target
-    else:
-      target = dict(zip(target_keys, batch[-len(target_keys):]))
+    features = dict(zip(ordered_dict_x.keys(), features))
+    if y is not None:
+      target = features.pop(unique_target_key)
       return features, target
+    return features
 
   return input_fn
diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py
index 65eae7a7dc..02df22b632 100644
--- a/tensorflow/python/estimator/inputs/numpy_io_test.py
+++ b/tensorflow/python/estimator/inputs/numpy_io_test.py
@@ -239,40 +239,6 @@ class NumpyIoTest(test.TestCase):
             x, y, batch_size=2, shuffle=False, num_epochs=1)
         failing_input_fn()
 
-  def testNumpyInputFnWithXIsEmptyDict(self):
-    x = {}
-    y = np.arange(4)
-    with self.test_session():
-      with self.assertRaisesRegexp(ValueError, 'x cannot be empty'):
-        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
-        failing_input_fn()
-
-  def testNumpyInputFnWithYIsNone(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = None
-
-    with self.test_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-        x, y, batch_size=2, shuffle=False, num_epochs=1)
-      features_tensor = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      feature = session.run(features_tensor)
-      self.assertEqual(len(feature), 2)
-      self.assertAllEqual(feature['a'], [0, 1])
-      self.assertAllEqual(feature['b'], [32, 33])
-
-      session.run([features_tensor])
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features_tensor])
-
-      coord.request_stop()
-      coord.join(threads)
-
   def testNumpyInputFnWithNonBoolShuffle(self):
     x = np.arange(32, 36)
     y = np.arange(4)
@@ -319,59 +285,6 @@ class NumpyIoTest(test.TestCase):
             num_epochs=1)
         failing_input_fn()
 
-  def testNumpyInputFnWithYAsDict(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)}
-
-    with self.test_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-        x, y, batch_size=2, shuffle=False, num_epochs=1)
-      features_tensor, targets_tensor = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      features, targets = session.run([features_tensor, targets_tensor])
-      self.assertEqual(len(features), 2)
-      self.assertAllEqual(features['a'], [0, 1])
-      self.assertAllEqual(features['b'], [32, 33])
-      self.assertEqual(len(targets), 2)
-      self.assertAllEqual(targets['y1'], [-32, -31])
-      self.assertAllEqual(targets['y2'], [32, 31])
-
-      session.run([features_tensor, targets_tensor])
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features_tensor, targets_tensor])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithYIsEmptyDict(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = {}
-    with self.test_session():
-      with self.assertRaisesRegexp(ValueError, 'y cannot be empty'):
-        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
-        failing_input_fn()
-
-  def testNumpyInputFnWithDuplicateKeysInXAndY(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = {'y1': np.arange(-32, -28),
-         'a': a,
-         'y2': np.arange(32, 28, -1),
-         'b': b}
-    with self.test_session():
-      with self.assertRaisesRegexp(
-              ValueError, '2 duplicate keys are found in both x and y'):
-        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
-        failing_input_fn()
-
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index dc4ffb1747..2785aed13e 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -860,10 +860,6 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None):
   inputs, which allows those ops to accept numpy arrays, Python lists,
   and scalars in addition to `Tensor` objects.
 
-  Note: This function diverges from default Numpy behavior for `float` and
-    `string` types when `None` is present in a Python list or scalar. Rather
-    than silently converting `None` values, an error will be thrown.
-
   Args:
     value: An object whose type has a registered `Tensor` conversion function.
     dtype: Optional element type for the returned tensor. If missing, the
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index e283542172..7e74c19124 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -286,7 +286,6 @@ _TF_TO_IS_OK = {
     dtypes.bool: [_FilterBool],
     dtypes.complex128: [_FilterComplex],
     dtypes.complex64: [_FilterComplex],
-    dtypes.float16: [_FilterFloat],
     dtypes.float32: [_FilterFloat],
     dtypes.float64: [_FilterFloat],
     dtypes.int16: [_FilterInt],
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 1610214d54..cfa5fe5e3e 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -986,9 +986,8 @@ class TensorFlowTestCase(googletest.TestCase):
       err: A float value.
       msg: An optional string message to append to the failure message.
     """
-    # f1 == f2 is needed here as we might have: f1, f2 = inf, inf
     self.assertTrue(
-        f1 == f2 or math.fabs(f1 - f2) <= err,
+        math.fabs(f1 - f2) <= err,
         "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
                                if msg is not None else ""))
 
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 76b80e60ea..6eb9c66d06 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -107,41 +107,22 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
   def setUp(self):
     self.rng = np.random.RandomState(42)
 
-  def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None, axis=None):
+  def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None):
     """Check equivalence between boolean_mask and numpy masking."""
     if make_mask is None:
       make_mask = lambda shape: self.rng.randint(0, 2, size=shape).astype(bool)
     arr = np.random.rand(*arr_shape)
     mask = make_mask(arr_shape[:ndims_mask])
-    if axis is not None:
-      mask = make_mask(arr_shape[axis:ndims_mask+axis])
-    if axis is None or axis == 0:
-      masked_arr = arr[mask]
-    elif axis == 1:
-      masked_arr = arr[:,mask]
-    elif axis == 2:
-      masked_arr = arr[:,:,mask]
-    with self.test_session() as sess:
-      masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
+    masked_arr = arr[mask]
+    with self.test_session():
+      masked_tensor = array_ops.boolean_mask(arr, mask)
 
       # Leading dimension size of masked_tensor is always unknown until runtime
       # since we don't how many elements will be kept.
-      leading = 1 if axis is None else axis + 1
-      self.assertAllEqual(masked_tensor.get_shape()[leading:],
-          masked_arr.shape[leading:])
+      self.assertAllEqual(masked_tensor.get_shape()[1:], masked_arr.shape[1:])
 
       self.assertAllClose(masked_arr, masked_tensor.eval())
 
-  def testMaskDim1ArrDim2Axis1(self):
-    ndims_mask = 1
-    for arr_shape in [(1, 1), (2, 2), (2, 5)]:
-      self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
-
-  def testMaskDim2ArrDim2Axis1(self):
-    ndims_mask = 2
-    for arr_shape in [(1, 1), (2, 2), (2, 5)]:
-      self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
-
   def testMaskDim1ArrDim1(self):
     ndims_mask = 1
     for arr_shape in [(1,), (2,), (3,), (10,)]:
@@ -505,7 +486,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
         _ = checker2[...]
         _ = checker2[tuple()]
 
-  def testInt64GPU(self):
+  def testFloatSlicedArrayAndInt64IndicesGPU(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
     with self.test_session(use_gpu=True, force_gpu=True):
@@ -516,6 +497,17 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       s = array_ops.strided_slice(x, begin, end, strides)
       self.assertAllEqual([3.], self.evaluate(s))
 
+  def testInt64SlicedArrayAndIndicesGPU(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+    with self.test_session(use_gpu=True, force_gpu=True):
+      x = constant_op.constant([1, 2, 3], dtype=dtypes.int64)
+      begin = constant_op.constant([2], dtype=dtypes.int64)
+      end = constant_op.constant([3], dtype=dtypes.int64)
+      strides = constant_op.constant([1], dtype=dtypes.int64)
+      s = array_ops.strided_slice(x, begin, end, strides)
+      self.assertAllEqual([3], self.evaluate(s))
+
   def testDegenerateSlices(self):
     with self.test_session(use_gpu=True):
       checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR)
@@ -1078,16 +1070,6 @@ class PadTest(test_util.TensorFlowTestCase):
                            [0, 0, 4, 5, 6, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0]])
 
-class InvertPermutationTest(test_util.TensorFlowTestCase):
-
-  def testInvertPermutation(self):
-    for dtype in [dtypes.int32, dtypes.int64]:
-      with self.test_session(use_gpu=True):
-        x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype)
-        y = array_ops.invert_permutation(x)
-        self.assertAllEqual(y.get_shape(), [5])
-        self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1])
-
 
 if __name__ == "__main__":
   test_lib.main()
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 79285476b4..7a610debd1 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -25,10 +25,11 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import googletest
 
+
 class BincountTest(test_util.TensorFlowTestCase):
 
   def test_empty(self):
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       self.assertAllEqual(
           math_ops.bincount([], minlength=5).eval(), [0, 0, 0, 0, 0])
       self.assertAllEqual(math_ops.bincount([], minlength=1).eval(), [0])
@@ -41,7 +42,7 @@ class BincountTest(test_util.TensorFlowTestCase):
           np.float64)
 
   def test_values(self):
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       self.assertAllEqual(
           math_ops.bincount([1, 1, 1, 2, 2, 3]).eval(), [0, 3, 2, 1])
       arr = [1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]
@@ -56,14 +57,14 @@ class BincountTest(test_util.TensorFlowTestCase):
           math_ops.bincount(np.arange(10000)).eval(), np.ones(10000))
 
   def test_maxlength(self):
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       self.assertAllEqual(math_ops.bincount([5], maxlength=3).eval(), [0, 0, 0])
       self.assertAllEqual(math_ops.bincount([1], maxlength=3).eval(), [0, 1])
       self.assertAllEqual(math_ops.bincount([], maxlength=3).eval(), [])
 
   def test_random_with_weights(self):
     num_samples = 10000
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       np.random.seed(42)
       for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
         arr = np.random.randint(0, 1000, num_samples)
@@ -71,29 +72,17 @@ class BincountTest(test_util.TensorFlowTestCase):
           weights = np.random.randint(-100, 100, num_samples)
         else:
           weights = np.random.random(num_samples)
-        self.assertAllClose(
+        self.assertAllEqual(
             math_ops.bincount(arr, weights).eval(),
             np.bincount(arr, weights))
 
-  def test_random_without_weights(self):
-    num_samples = 10000
-    with self.test_session(use_gpu=True):
-      np.random.seed(42)
-      for dtype in [np.int32, np.float32]:
-        arr = np.random.randint(0, 1000, num_samples)
-        weights = np.ones(num_samples).astype(dtype)
-        self.assertAllClose(
-            math_ops.bincount(arr, None).eval(),
-            np.bincount(arr, weights))
-
   def test_zero_weights(self):
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       self.assertAllEqual(
           math_ops.bincount(np.arange(1000), np.zeros(1000)).eval(),
           np.zeros(1000))
 
   def test_negative(self):
-    # unsorted_segment_sum will only report InvalidArgumentError on CPU
     with self.test_session():
       with self.assertRaises(errors.InvalidArgumentError):
         math_ops.bincount([1, 2, 3, -1, 6, 8]).eval()
diff --git a/tensorflow/python/kernel_tests/bucketize_op_test.py b/tensorflow/python/kernel_tests/bucketize_op_test.py
index e612b1c134..6db3592055 100644
--- a/tensorflow/python/kernel_tests/bucketize_op_test.py
+++ b/tensorflow/python/kernel_tests/bucketize_op_test.py
@@ -31,7 +31,7 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([-5, 0, 2, 3, 5, 8, 10, 11, 12]),
         boundaries=[0, 3, 8, 11])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
-    with self.test_session(use_gpu=True) as sess:
+    with self.test_session() as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def testFloat(self):
@@ -39,7 +39,7 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([-5., 0., 2., 3., 5., 8., 10., 11., 12.]),
         boundaries=[0., 3., 8., 11.])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
-    with self.test_session(use_gpu=True) as sess:
+    with self.test_session() as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def test2DInput(self):
@@ -47,13 +47,13 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([[-5, 0, 2, 3, 5], [8, 10, 11, 12, 0]]),
         boundaries=[0, 3, 8, 11])
     expected_out = [[0, 1, 1, 2, 2], [3, 3, 4, 4, 1]]
-    with self.test_session(use_gpu=True) as sess:
+    with self.test_session() as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def testInvalidBoundariesOrder(self):
     op = math_ops._bucketize(
         constant_op.constant([-5, 0]), boundaries=[0, 8, 3, 11])
-    with self.test_session(use_gpu=True) as sess:
+    with self.test_session() as sess:
       with self.assertRaisesRegexp(
           errors_impl.InvalidArgumentError, "Expected sorted boundaries"):
         sess.run(op)
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 6cbdd4cbb3..6167cb9999 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -439,10 +439,9 @@ class ZerosLikeTest(test.TestCase):
 
   def testZerosLikeCPU(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64,
-        dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
-        dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
-        dtypes_lib.complex64, dtypes_lib.complex128,
+        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32,
+        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8,
+        dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64,
         dtypes_lib.string
     ]:
       self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False)
@@ -574,10 +573,9 @@ class OnesLikeTest(test.TestCase):
 
   def testOnesLike(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64,
-        dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
-        dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
-        dtypes_lib.complex64, dtypes_lib.complex128
+        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32,
+        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8,
+        dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64
     ]:
       numpy_dtype = dtype.as_numpy_dtype
       with self.test_session():
diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index a7e23ead1c..b67a4e3f89 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -17,9 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
@@ -53,45 +50,5 @@ class Conv1DTest(test.TestCase):
           self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4])
 
 
-  def testConv1DTranspose(self):
-    with self.test_session():
-      stride = 2
-
-      # Input, output: [batch, width, depth]
-      x_shape = [2, 4, 3]
-      y_shape = [2, 9, 2]
-
-      # Filter: [kernel_width, output_depth, input_depth]
-      f_shape = [3, 2, 3]
-
-      x = constant_op.constant(
-          1.0, shape=x_shape, name="x", dtype=dtypes.float32)
-      f = constant_op.constant(
-          1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
-      output = nn_ops.conv1d_transpose(
-          x, f, y_shape, stride=stride, padding="VALID")
-      value = output.eval()
-
-      cache_values = np.zeros(y_shape, dtype=np.float32)
-
-      # The amount of padding added
-      pad = 1
-
-      for n in xrange(x_shape[0]):
-        for k in xrange(f_shape[1]):
-          for w in xrange(pad, y_shape[1] - pad):
-            target = 3.0
-            # We add a case for locations divisible by the stride.
-            w_in = w % stride == 0 and w > pad and w < y_shape[1] - 1 - pad
-            if w_in:
-              target += 3.0
-            cache_values[n, w, k] = target
-
-          # copy values in the border
-          cache_values[n, 0, k] = cache_values[n, 1, k]
-          cache_values[n, -1, k] = cache_values[n, -2, k]
-
-    self.assertAllClose(cache_values, value)
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
index 116681fc4c..14622ab467 100644
--- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
@@ -21,8 +21,6 @@ from __future__ import print_function
 import collections
 import math
 
-import numpy as np
-
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
@@ -47,19 +45,8 @@ def GetTestConfigs():
 
 class Conv3DTest(test.TestCase):
 
-  def _DtypesToTest(self, use_gpu):
-    if use_gpu:
-      if not test_util.CudaSupportsHalfMatMulAndConv():
-        return [dtypes.float32]
-      else:
-        # It is important that float32 comes before float16 here,
-        # as we will be using its gradients as reference for fp16 gradients.
-        return [dtypes.float32, dtypes.float16]
-    else:
-      return [dtypes.float64, dtypes.float32, dtypes.float16]
-
   def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride,
-                            padding, data_format, dtype, use_gpu):
+                            padding, data_format, use_gpu):
     total_size_1 = 1
     total_size_2 = 1
     for s in tensor_in_sizes:
@@ -67,14 +54,13 @@ class Conv3DTest(test.TestCase):
     for s in filter_in_sizes:
       total_size_2 *= s
 
-    # Initializes the input tensor with array containing numbers from 0 to 1.
-    # We keep the input tensor values fairly small to avoid overflowing a float16 
-    # tensor during the conv3d 
-    x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)]
-    x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)]
+    # Initializes the input tensor with array containing incrementing
+    # numbers from 1.
+    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
+    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
     with self.test_session(use_gpu=use_gpu):
-      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
-      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
+      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
+      t2 = constant_op.constant(x2, shape=filter_in_sizes)
 
       if isinstance(stride, collections.Iterable):
         strides = [1] + list(stride) + [1]
@@ -95,35 +81,27 @@ class Conv3DTest(test.TestCase):
                     expected):
     results = []
     for data_format, use_gpu in GetTestConfigs():
-      for dtype in self._DtypesToTest(use_gpu):
-        result = self._SetupValuesForDevice(
-            tensor_in_sizes,
-            filter_in_sizes,
-            stride,
-            padding,
-            data_format,
-            dtype,
-            use_gpu=use_gpu)
-        results.append(result)
-
+      result = self._SetupValuesForDevice(
+          tensor_in_sizes,
+          filter_in_sizes,
+          stride,
+          padding,
+          data_format,
+          use_gpu=use_gpu)
+      results.append(result)
+      tolerance = 1e-2 if use_gpu else 1e-5
       with self.test_session() as sess:
         values = sess.run(results)
         for value in values:
           print("expected = ", expected)
           print("actual = ", value)
-          tol = 1e-6
-          if value.dtype == np.float16:
-            tol = 1e-3
-
-          self.assertAllClose(expected, value.flatten(), atol=tol,
-                              rtol=tol)
+          self.assertAllClose(expected, value.flatten(), atol=tolerance,
+                              rtol=1e-6)
 
   def testConv3D1x1x1Filter(self):
     expected_output = [
-        0.18518519,  0.22222222,  0.25925926,  0.40740741,  0.5       ,
-        0.59259259,  0.62962963,  0.77777778,  0.92592593,  0.85185185,
-        1.05555556,  1.25925926,  1.07407407,  1.33333333,  1.59259259,
-        1.2962963 ,  1.61111111,  1.92592593
+        30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0,
+        204.0, 174.0, 216.0, 258.0, 210.0, 261.0, 312.0
     ]
 
     # These are equivalent to the Conv2D1x1 case.
@@ -149,10 +127,8 @@ class Conv3DTest(test.TestCase):
   # Expected values computed using scipy's correlate function.
   def testConv3D2x2x2Filter(self):
     expected_output = [
-        3.77199074,   3.85069444,   3.92939815,   4.2650463 ,   4.35763889,
-        4.45023148,   6.73032407,   6.89236111,   7.05439815,   7.22337963,
-        7.39930556,   7.57523148,   9.68865741,   9.93402778,  10.17939815,
-        10.18171296,  10.44097222,  10.70023148
+        19554., 19962., 20370., 22110., 22590., 23070., 34890., 35730., 36570.,
+        37446., 38358., 39270., 50226., 51498., 52770., 52782., 54126., 55470.
     ]
     # expected_shape = [1, 3, 1, 2, 5]
     self._VerifyValues(
@@ -164,19 +140,69 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStrides(self):
     expected_output = [
-        0.06071429,  0.08988095,  0.10238095,  0.11488095,  0.12738095,
-        0.13988095,  0.08452381,  0.26071429,  0.35238095,  0.36488095,
-        0.37738095,  0.38988095,  0.40238095,  0.23452381,  0.46071429,
-        0.61488095,  0.62738095,  0.63988095,  0.65238095,  0.66488095,
-        0.38452381,  1.12738095,  1.48988095,  1.50238095,  1.51488095,
-        1.52738095,  1.53988095,  0.88452381,  1.32738095,  1.75238095,
-        1.76488095,  1.77738095,  1.78988095,  1.80238095,  1.03452381,
-        1.52738095,  2.01488095,  2.02738095,  2.03988095,  2.05238095,
-        2.06488095,  1.18452381,  2.19404762,  2.88988095,  2.90238095,
-        2.91488095,  2.92738095,  2.93988095,  1.68452381,  2.39404762,
-        3.15238095,  3.16488095,  3.17738095,  3.18988095,  3.20238095,
-        1.83452381,  2.59404762,  3.41488095,  3.42738095,  3.43988095,
-        3.45238095,  3.46488095,  1.98452381
+        102.,
+        151.,
+        172.,
+        193.,
+        214.,
+        235.,
+        142.,
+        438.,
+        592.,
+        613.,
+        634.,
+        655.,
+        676.,
+        394.,
+        774.,
+        1033.,
+        1054.,
+        1075.,
+        1096.,
+        1117.,
+        646.,
+        1894.,
+        2503.,
+        2524.,
+        2545.,
+        2566.,
+        2587.,
+        1486.,
+        2230.,
+        2944.,
+        2965.,
+        2986.,
+        3007.,
+        3028.,
+        1738.,
+        2566.,
+        3385.,
+        3406.,
+        3427.,
+        3448.,
+        3469.,
+        1990.,
+        3686.,
+        4855.,
+        4876.,
+        4897.,
+        4918.,
+        4939.,
+        2830.,
+        4022.,
+        5296.,
+        5317.,
+        5338.,
+        5359.,
+        5380.,
+        3082.,
+        4358.,
+        5737.,
+        5758.,
+        5779.,
+        5800.,
+        5821.,
+        3334.,
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 5, 8, 7, 1],
@@ -186,10 +212,7 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
   def testConv3D2x2x2FilterStride2(self):
-    expected_output = [
-        3.77199074,  3.85069444,  3.92939815,  9.68865741,  9.93402778,
-        10.17939815
-    ]
+    expected_output = [19554., 19962., 20370., 50226., 51498., 52770.]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
         filter_in_sizes=[2, 2, 2, 3, 3],
@@ -199,14 +222,11 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStride3(self):
     expected_output = [
-        1.51140873,  1.57167659,  1.63194444,  1.56349206,  1.62673611,
-        1.68998016,  1.6155754 ,  1.68179563,  1.74801587,  1.9280754 ,
-        2.01215278,  2.09623016,  1.98015873,  2.0672123 ,  2.15426587,
-        2.03224206,  2.12227183,  2.21230159,  4.4280754 ,  4.65500992,
-        4.88194444,  4.48015873,  4.71006944,  4.93998016,  4.53224206,
-        4.76512897,  4.99801587,  4.84474206,  5.09548611,  5.34623016,
-        4.8968254 ,  5.15054563,  5.40426587,  4.94890873,  5.20560516,
-        5.46230159
+        36564., 38022., 39480., 37824., 39354., 40884., 39084., 40686., 42288.,
+        46644., 48678., 50712., 47904., 50010., 52116., 49164., 51342., 53520.,
+        107124., 112614., 118104., 108384., 113946., 119508., 109644., 115278.,
+        120912., 117204., 123270., 129336., 118464., 124602., 130740., 119724.,
+        125934., 132144.
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 6, 7, 8, 2],
@@ -217,9 +237,8 @@ class Conv3DTest(test.TestCase):
 
   def testConv3D2x2x2FilterStride2Same(self):
     expected_output = [
-        3.77199074,   3.85069444,   3.92939815,   2.0162037 ,   2.06597222,
-        2.11574074,   9.68865741,   9.93402778,  10.17939815,   4.59953704,
-        4.73263889,   4.86574074
+        19554., 19962., 20370., 10452., 10710., 10968., 50226., 51498., 52770.,
+        23844., 24534., 25224.
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
@@ -229,10 +248,7 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
   def testKernelSmallerThanStride(self):
-    expected_output = [
-        0.03703704,  0.11111111,  0.25925926,  0.33333333,  0.7037037 ,
-        0.77777778,  0.92592593,  1.
-    ]
+    expected_output = [1., 3., 7., 9., 19., 21., 25., 27.]
     self._VerifyValues(
         tensor_in_sizes=[1, 3, 3, 3, 1],
         filter_in_sizes=[1, 1, 1, 1, 1],
@@ -247,12 +263,9 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
     expected_output = [
-        0.54081633,  0.58017493,  0.28061224,  0.81632653,  0.85568513,
-        0.40306122,  0.41873178,  0.4340379 ,  0.19642857,  2.46938776,
-        2.50874636,  1.1377551 ,  2.74489796,  2.78425656,  1.26020408,
-        1.16873178,  1.1840379 ,  0.51785714,  1.09511662,  1.10604956,
-        0.44642857,  1.17164723,  1.18258017,  0.47704082,  0.3691691 ,
-        0.37244898,  0.125
+        1484., 1592., 770., 2240., 2348., 1106., 1149., 1191., 539., 6776.,
+        6884., 3122., 7532., 7640., 3458., 3207., 3249., 1421., 3005., 3035.,
+        1225., 3215., 3245., 1309., 1013., 1022., 343.
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
@@ -261,10 +274,7 @@ class Conv3DTest(test.TestCase):
         padding="SAME",
         expected=expected_output)
 
-    expected_output = [
-        0.540816,  0.580175,  0.816327,  0.855685,  2.469388,  2.508746,
-        2.744898,  2.784257
-    ]
+    expected_output = [1484., 1592., 2240., 2348., 6776., 6884., 7532., 7640.]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
         filter_in_sizes=[2, 2, 2, 1, 1],
@@ -278,7 +288,7 @@ class Conv3DTest(test.TestCase):
         filter_in_sizes=[2, 1, 2, 1, 2],
         stride=1,
         padding="VALID",
-        expected=[1.5625,  1.875])
+        expected=[50, 60])
 
   def _ConstructAndTestGradientForConfig(
       self, batch, input_shape, filter_shape, in_depth, out_depth, stride,
@@ -318,63 +328,50 @@ class Conv3DTest(test.TestCase):
     input_data = [x * 1.0 / input_size for x in range(0, input_size)]
     filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
 
-
-    for data_type in self._DtypesToTest(use_gpu=use_gpu):
+    if test.is_gpu_available() and use_gpu:
+      data_type = dtypes.float32
       # TODO(mjanusz): Modify gradient_checker to also provide max relative
       # error and synchronize the tolerance levels between the tests for forward
       # and backward computations.
-      if data_type == dtypes.float64:
-        tolerance = 1e-8
-      elif data_type == dtypes.float32:
+      if test.is_gpu_available():
         tolerance = 5e-3
-      elif data_type == dtypes.float16:
-        tolerance = 1e-3
-
-
-      with self.test_session(use_gpu=use_gpu):
-        orig_input_tensor = constant_op.constant(
+      else:
+        # As of Aug 2016, higher tolerance is needed for some CPU architectures.
+        # Runs on a single machine can also generate slightly different errors
+        # because of multithreading.
+        tolerance = 8e-3
+    else:
+      data_type = dtypes.float64
+      tolerance = 1e-8
+    with self.test_session(use_gpu=use_gpu):
+      orig_input_tensor = constant_op.constant(
           input_data, shape=input_shape, dtype=data_type, name="input")
-        filter_tensor = constant_op.constant(
+      filter_tensor = constant_op.constant(
           filter_data, shape=filter_shape, dtype=data_type, name="filter")
 
-        if data_format == "NCDHW":
-          input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
-          new_strides = test_util.NHWCToNCHW(strides)
-        else:
-          input_tensor = orig_input_tensor
-          new_strides = strides
+      if data_format == "NCDHW":
+        input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
+        strides = test_util.NHWCToNCHW(strides)
+      else:
+        input_tensor = orig_input_tensor
 
-        conv = nn_ops.conv3d(
-          input_tensor, filter_tensor, new_strides, padding,
+      conv = nn_ops.conv3d(
+          input_tensor, filter_tensor, strides, padding,
           data_format=data_format, name="conv")
 
-        if data_format == "NCDHW":
-          conv = test_util.NCHWToNHWC(conv)
-
-        
-        if test_input:
-          jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor,
-                                                               input_shape,
-                                                               conv,
-                                                               output_shape)
-        else:
-          jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor,
-                                                               filter_shape,
-                                                               conv,
-                                                               output_shape)
-        
-        
-        if data_type != dtypes.float16:
-          reference_jacob_t = jacob_t
-          err = np.fabs(jacob_t - jacob_n).max()
-        else:
-          # Compare fp16 theoretical gradients to fp32 theoretical gradients,
-          # since fp16 numerical gradients are too imprecise.
-          err = np.fabs(jacob_t - reference_jacob_t).max()
-
-      print("conv3d gradient error = ", err)
-      self.assertLess(err, tolerance)
+      if data_format == "NCDHW":
+        conv = test_util.NCHWToNHWC(conv)
 
+      if test_input:
+        err = gradient_checker.compute_gradient_error(orig_input_tensor,
+                                                      input_shape,
+                                                      conv, output_shape)
+      else:
+        err = gradient_checker.compute_gradient_error(filter_tensor,
+                                                      filter_shape, conv,
+                                                      output_shape)
+    print("conv3d gradient error = ", err)
+    self.assertLess(err, tolerance)
 
   def ConstructAndTestGradient(self, **kwargs):
     for data_format, use_gpu in GetTestConfigs():
diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index f7ae1a0f37..3298092fbe 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -122,9 +122,7 @@ class DepthwiseConv2DTest(test.TestCase):
     x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
     with self.test_session(use_gpu=use_gpu) as sess:
-      if data_type == dtypes.float16:
-        tolerance = 1e-5
-      elif data_type == dtypes.float32:
+      if data_type == dtypes.float32:
         tolerance = 1e-5
       else:
         self.assertEqual(data_type, dtypes.float64)
@@ -171,7 +169,7 @@ class DepthwiseConv2DTest(test.TestCase):
                 padding) in enumerate(ConfigsToTest()):
       print("Testing DepthwiseConv2D,", index, "th config:", input_size, "*",
             filter_size, "stride:", stride, "padding:", padding)
-      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float32, dtypes.float64]:
         self._VerifyValues(
             input_size, filter_size, stride, padding, data_type, use_gpu=True)
 
@@ -183,7 +181,7 @@ class DepthwiseConv2DTest(test.TestCase):
                 padding) in enumerate(ConfigsToTest()):
       print("Testing DepthwiseConv2DFormat,", index, "th config:", input_size,
             "*", filter_size, "stride:", stride, "padding:", padding)
-      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float32, dtypes.float64]:
         self._VerifyValues(
             input_size,
             filter_size,
@@ -320,9 +318,7 @@ class DepthwiseConv2DTest(test.TestCase):
     input_data = [x * 1.0 / input_size for x in range(0, input_size)]
     filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
     with self.test_session(use_gpu=use_gpu):
-      if data_type == dtypes.float16:
-        tolerance = 0.002
-      elif data_type == dtypes.float32:
+      if data_type == dtypes.float32:
         tolerance = 0.002
       else:
         self.assertEqual(data_type, dtypes.float64)
@@ -373,8 +369,6 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DInputGrad,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
-      # Note: float16 test for DepthwiseConv2DInputGrad is not enabled,
-      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
@@ -395,8 +389,6 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DInputGradFormat,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
-      # Note: float16 test for DepthwiseConv2DInputGradFormat is not enabled,
-      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
@@ -415,8 +407,6 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DFilterGrad,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
-      # Note: float16 test for DepthwiseConv2DFilterGrad is not enabled,
-      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
@@ -437,8 +427,6 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DFilterGradFormat,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
-      # Note: float16 test for DepthwiseConv2DFilterGradFormat is not enabled,
-      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD
index e220d05692..e21446c2ef 100644
--- a/tensorflow/python/kernel_tests/distributions/BUILD
+++ b/tensorflow/python/kernel_tests/distributions/BUILD
@@ -193,7 +193,6 @@ cuda_py_test(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform_test",
     ],
-    tags = ["manual"],  # b/69001419
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py
index e24e8ade73..ebc89f15c5 100644
--- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py
+++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py
@@ -250,11 +250,13 @@ class MultinomialTest(test.TestCase):
     theta = np.array([[1., 2, 3],
                       [2.5, 4, 0.01]], dtype=np.float32)
     theta /= np.sum(theta, 1)[..., array_ops.newaxis]
-    n = np.array([[10., 9.], [8., 7.], [6., 5.]], dtype=np.float32)
+    # Ideally we'd be able to test broadcasting but, the multinomial sampler
+    # doesn't support different total counts.
+    n = np.float32(5)
     with self.test_session() as sess:
-      # batch_shape=[3, 2], event_shape=[3]
+      # batch_shape=[2], event_shape=[3]
       dist = multinomial.Multinomial(n, theta)
-      x = dist.sample(int(1000e3), seed=1)
+      x = dist.sample(int(250e3), seed=1)
       sample_mean = math_ops.reduce_mean(x, 0)
       x_centered = x - sample_mean[array_ops.newaxis, ...]
       sample_cov = math_ops.reduce_mean(math_ops.matmul(
@@ -289,9 +291,9 @@ class MultinomialTest(test.TestCase):
   def testSampleUnbiasedNonScalarBatch(self):
     with self.test_session() as sess:
       dist = multinomial.Multinomial(
-          total_count=[7., 6., 5.],
+          total_count=5.,
           logits=math_ops.log(2. * self._rng.rand(4, 3, 2).astype(np.float32)))
-      n = int(3e4)
+      n = int(3e3)
       x = dist.sample(n, seed=0)
       sample_mean = math_ops.reduce_mean(x, 0)
       # Cyclically rotate event dims left.
diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
index 2460950aa9..4883095707 100644
--- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
@@ -33,8 +33,8 @@ from tensorflow.python.platform import test
 class DynamicPartitionTest(test.TestCase):
 
   def testSimpleOneDimensional(self):
-    with self.test_session(use_gpu=True) as sess:
-      data = constant_op.constant([0, 13, 2, 39, 4, 17], dtype=dtypes.float32)
+    with self.test_session() as sess:
+      data = constant_op.constant([0, 13, 2, 39, 4, 17])
       indices = constant_op.constant([0, 0, 2, 3, 2, 1])
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
@@ -52,10 +52,9 @@ class DynamicPartitionTest(test.TestCase):
     self.assertEqual([None], partitions[3].get_shape().as_list())
 
   def testSimpleTwoDimensional(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.test_session() as sess:
       data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11],
-                                   [12, 13, 14], [15, 16, 17]],
-                                  dtype=dtypes.float32)
+                                   [12, 13, 14], [15, 16, 17]])
       indices = constant_op.constant([0, 0, 2, 3, 2, 1])
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
@@ -72,61 +71,9 @@ class DynamicPartitionTest(test.TestCase):
     self.assertEqual([None, 3], partitions[2].get_shape().as_list())
     self.assertEqual([None, 3], partitions[3].get_shape().as_list())
 
-  def testLargeOneDimensional(self):
-    num = 100000
-    data_list = [x for x in range(num)]
-    indices_list = [x % 2 for x in range(num)]
-    part1 = [x for x in range(num) if x % 2 == 0]
-    part2 = [x for x in range(num) if x % 2 == 1]
-    with self.test_session(use_gpu=True) as sess:
-      data = constant_op.constant(data_list, dtype=dtypes.float32)
-      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
-      partitions = data_flow_ops.dynamic_partition(
-          data, indices, num_partitions=2)
-      partition_vals = sess.run(partitions)
-
-    self.assertAllEqual(part1, partition_vals[0])
-    self.assertAllEqual(part2, partition_vals[1])
-
-  def testLargeTwoDimensional(self):
-    rows = 100000
-    cols = 100
-    data_list = [None] * rows
-    for i in range(rows):
-      data_list[i] = [i for _ in range(cols)]
-    num_partitions = 97
-    indices_list = [(i ** 2) % num_partitions for i in range(rows)]
-    parts = [[] for _ in range(num_partitions)]
-    for i in range(rows):
-      parts[(i ** 2) % num_partitions].append(data_list[i])
-    with self.test_session(use_gpu=True) as sess:
-      data = constant_op.constant(data_list, dtype=dtypes.float32)
-      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
-      partitions = data_flow_ops.dynamic_partition(
-          data, indices, num_partitions=num_partitions)
-      partition_vals = sess.run(partitions)
-
-    for i in range(num_partitions):
-      # reshape because of empty parts
-      parts_np = np.array(parts[i], dtype=np.float).reshape(-1, cols)
-      self.assertAllEqual(parts_np, partition_vals[i])
-
-  def testSimpleComplex(self):
-    data_list = [1 + 2j, 3 + 4j, 5 + 6j, 7 + 8j]
-    indices_list = [1, 0, 1, 0]
-    with self.test_session(use_gpu=True) as sess:
-      data = constant_op.constant(data_list, dtype=dtypes.complex64)
-      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
-      partitions = data_flow_ops.dynamic_partition(
-          data, indices, num_partitions=2)
-      partition_vals = sess.run(partitions)
-
-    self.assertAllEqual([3 + 4j, 7 + 8j], partition_vals[0])
-    self.assertAllEqual([1 + 2j, 5 + 6j], partition_vals[1])
-
   def testHigherRank(self):
     np.random.seed(7)
-    with self.test_session(use_gpu=True) as sess:
+    with self.test_session() as sess:
       for n in 2, 3:
         for shape in (4,), (4, 5), (4, 5, 2):
           partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape)
@@ -148,49 +95,6 @@ class DynamicPartitionTest(test.TestCase):
             self.assertEqual(grads[1], None)  # Partitions has no gradients
             self.assertAllEqual(7 * data, sess.run(grads[0]))
 
-  def testEmptyParts(self):
-    data_list = [1, 2, 3, 4]
-    indices_list = [1, 3, 1, 3]
-    with self.test_session(use_gpu=True) as sess:
-      data = constant_op.constant(data_list, dtype=dtypes.float32)
-      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
-      partitions = data_flow_ops.dynamic_partition(
-          data, indices, num_partitions=4)
-      partition_vals = sess.run(partitions)
-
-    self.assertAllEqual([], partition_vals[0])
-    self.assertAllEqual([1, 3], partition_vals[1])
-    self.assertAllEqual([], partition_vals[2])
-    self.assertAllEqual([2, 4], partition_vals[3])
-
-  def testEmptyDataTwoDimensional(self):
-    data_list = [[], []]
-    indices_list = [0, 1]
-    with self.test_session(use_gpu=True) as sess:
-      data = constant_op.constant(data_list, dtype=dtypes.float32)
-      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
-      partitions = data_flow_ops.dynamic_partition(
-          data, indices, num_partitions=3)
-      partition_vals = sess.run(partitions)
-
-    self.assertAllEqual([[]], partition_vals[0])
-    self.assertAllEqual([[]], partition_vals[1])
-    self.assertAllEqual(np.array([], dtype=np.float).reshape(0, 0),
-                        partition_vals[2])
-
-  def testEmptyPartitions(self):
-    data_list = []
-    indices_list = []
-    with self.test_session(use_gpu=True) as sess:
-      data = constant_op.constant(data_list, dtype=dtypes.float32)
-      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
-      partitions = data_flow_ops.dynamic_partition(
-          data, indices, num_partitions=2)
-      partition_vals = sess.run(partitions)
-
-    self.assertAllEqual([], partition_vals[0])
-    self.assertAllEqual([], partition_vals[1])
-
   def testErrorIndexOutOfRange(self):
     with self.test_session() as sess:
       data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11],
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index 150e2ff7f2..a126180414 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -19,7 +19,6 @@ from __future__ import division
 from __future__ import print_function
 
 import numpy as np
-import os
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -1342,33 +1341,11 @@ class PoolingTest(test.TestCase):
       return
 
     # Test the GPU implementation that uses cudnn for now.
-    saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP")
-    # Do not propagate the diff in cases of NaNs
-    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0"
+    # It does not propagate the diff in cases of NaNs
     expected_input_backprop_cudnn = [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0
     ]
-
-    for v2 in [True, False]:
-      self._testMaxPoolGradDirect(
-          input_data,
-          output_backprop,
-          expected_input_backprop_cudnn,
-          input_sizes=[1, 4, 4, 1],
-          output_sizes=[1, 3, 3, 1],
-          window_rows=2,
-          window_cols=2,
-          row_stride=1,
-          col_stride=1,
-          padding="VALID",
-          use_gpu=True,
-          v2=v2)
-
-    # Propagate the diff in cases of NaNs
-    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1"
-    expected_input_backprop_cudnn = expected_input_backprop_tf_cpu
-
     for v2 in [True, False]:
       self._testMaxPoolGradDirect(
           input_data,
@@ -1384,11 +1361,6 @@ class PoolingTest(test.TestCase):
           use_gpu=True,
           v2=v2)
 
-    if saved_nanprop:
-      os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop
-    else:
-      del os.environ["TF_ENABLE_MAXPOOL_NANPROP"]
-
   def _testMaxPoolGradDirectWithNans2_2(self):
     input_data = [float("nan")] * 16
     output_backprop = [
@@ -1419,14 +1391,11 @@ class PoolingTest(test.TestCase):
       return
 
     # Test the GPU implementation that uses cudnn for now.
-    saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP")
-    # Do not propagate the diff in cases of NaNs
-    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0"
+    # It does not propagate the diff in cases of NaNs
     expected_input_backprop_cudnn = [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0
     ]
-
     for v2 in [True, False]:
       self._testMaxPoolGradDirect(
           input_data,
@@ -1442,31 +1411,6 @@ class PoolingTest(test.TestCase):
           use_gpu=True,
           v2=v2)
 
-
-    # Propagate the diff in cases of NaNs
-    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1"
-    expected_input_backprop_cudnn = expected_input_backprop_tf_cpu
-
-    for v2 in [True, False]:
-      self._testMaxPoolGradDirect(
-          input_data,
-          output_backprop,
-          expected_input_backprop_cudnn,
-          input_sizes=[1, 4, 4, 1],
-          output_sizes=[1, 3, 3, 1],
-          window_rows=2,
-          window_cols=2,
-          row_stride=1,
-          col_stride=1,
-          padding="VALID",
-          use_gpu=True,
-          v2=v2)
-
-    if saved_nanprop:
-      os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop
-    else:
-      del os.environ["TF_ENABLE_MAXPOOL_NANPROP"]
-
   def testMaxPoolGradDirect(self):
     self._testMaxPoolGradDirect1_1()
     self._testMaxPoolGradDirect1_2()
diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py
index 8e54d10f32..5630259b7b 100644
--- a/tensorflow/python/kernel_tests/reader_ops_test.py
+++ b/tensorflow/python/kernel_tests/reader_ops_test.py
@@ -35,9 +35,6 @@ from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
-from tensorflow.python.training import coordinator
-from tensorflow.python.training import input as input_lib
-from tensorflow.python.training import queue_runner_impl
 from tensorflow.python.util import compat
 
 prefix_path = "tensorflow/core/lib"
@@ -1014,25 +1011,6 @@ class LMDBReaderTest(test.TestCase):
                                     "\\(requested 1, current size 0\\)"):
         k, v = sess.run([key, value])
 
-  def testReadFromSameFile(self):
-    with self.test_session() as sess:
-      reader1 = io_ops.LMDBReader(name="test_read_from_same_file1")
-      reader2 = io_ops.LMDBReader(name="test_read_from_same_file2")
-      filename_queue = input_lib.string_input_producer([self.db_path],
-                                                       num_epochs=None)
-      key1, value1 = reader1.read(filename_queue)
-      key2, value2 = reader2.read(filename_queue)
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-      for i in range(3):
-        for j in range(10):
-          k1, v1, k2, v2 = sess.run([key1, value1, key2, value2])
-          self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2))
-          self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2))
-      coord.request_stop()
-      coord.join(threads)
-
   def testReadFromFolder(self):
     with self.test_session() as sess:
       reader = io_ops.LMDBReader(name="test_read_from_folder")
@@ -1051,25 +1029,6 @@ class LMDBReaderTest(test.TestCase):
                                     "\\(requested 1, current size 0\\)"):
         k, v = sess.run([key, value])
 
-  def testReadFromFileRepeatedly(self):
-    with self.test_session() as sess:
-      reader = io_ops.LMDBReader(name="test_read_from_file_repeated")
-      filename_queue = input_lib.string_input_producer([self.db_path],
-                                                       num_epochs=None)
-      key, value = reader.read(filename_queue)
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-      # Iterate over the lmdb 3 times.
-      for i in range(3):
-        # Go over all 10 records each time.
-        for j in range(10):
-          k, v = sess.run([key, value])
-          self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(j)))
-          self.assertAllEqual(
-              compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + j))))
-      coord.request_stop()
-      coord.join(threads)
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 3a02f24902..516a9d000e 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -323,9 +323,8 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
   def testBadIndices(self):
     # Note: GPU kernel does not return the out-of-range error needed for this
     # test, so this test is marked as cpu-only.
-    # Note: With PR #13055 a negative index will be ignored silently.
     with self.test_session(use_gpu=False):
-      for bad in [[2]], [[7]]:
+      for bad in [[-1]], [[7]]:
         unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2)
         with self.assertRaisesOpError(
             r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]):
@@ -361,32 +360,6 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
             x_init_value=np_x.astype(np.double), delta=1)
       self.assertAllClose(jacob_t, jacob_n)
 
-  def testDropNegatives(self):
-    # Note: the test is done by replacing segment_ids with 8 to -1
-    # for index  and replace values generated by numpy with 0.
-    dtypes = [
-        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64,
-        dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128
-    ]
-    indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3])
-    num_segments = 12
-    for indices in indices_flat, indices_flat.reshape(5, 2):
-      shape = indices.shape + (2,)
-      for dtype in dtypes:
-        with self.test_session(use_gpu=True):
-          tf_x, np_x = self._input(shape, dtype=dtype)
-          np_ans = self._segmentReduce(
-              indices, np_x, np.add, op2=None, num_out_rows=num_segments)
-          # Replace np_ans[8] with 0 for the value
-          np_ans[8:] = 0
-          # Replace 8 with -1 in indices
-          np.place(indices, indices==8, [-1])
-          s = math_ops.unsorted_segment_sum(
-              data=tf_x, segment_ids=indices, num_segments=num_segments)
-          tf_ans = s.eval()
-        self.assertAllClose(np_ans, tf_ans)
-        self.assertShapeEqual(np_ans, s)
-
 
 class SparseSegmentReductionHelper(SegmentReductionHelper):
 
diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py
index 7368251ab6..a9fc699b21 100644
--- a/tensorflow/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/python/kernel_tests/shape_ops_test.py
@@ -258,16 +258,6 @@ class ShapeOpsTest(test.TestCase):
       self.assertAllEqual([True], array_ops.expand_dims(inp, 0).eval())
       self.assertAllEqual([True], array_ops.expand_dims(inp, -1).eval())
 
-  def testExpandDimsDimType(self):
-    for dtype in [dtypes.int32, dtypes.int64]:
-      x = np.zeros([2])
-      np_ans = np.expand_dims(x, axis=0)
-      with self.test_session(use_gpu=True):
-        tensor = array_ops.expand_dims(x, constant_op.constant(0, dtype))
-        tf_ans = tensor.eval()
-      self.assertShapeEqual(np_ans, tensor)
-      self.assertAllEqual(np_ans, tf_ans)
-
   def _compareSqueeze(self, x, squeeze_dims, use_gpu):
     with self.test_session(use_gpu=use_gpu):
       if squeeze_dims:
diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py
index 6cdc7872f9..051a25080b 100644
--- a/tensorflow/python/kernel_tests/slice_op_test.py
+++ b/tensorflow/python/kernel_tests/slice_op_test.py
@@ -217,30 +217,6 @@ class SliceTest(test.TestCase):
     self.assertEqual(expected_val.shape, slice_t.get_shape())
     self.assertEqual(expected_val.shape, slice2_t.get_shape())
 
-  def testRandomHighRank(self):
-    # Random dims of rank 8
-    input_shape = np.random.randint(0, 20, size=8)
-    inp = np.random.rand(*input_shape).astype("f")
-    with self.test_session(use_gpu=True) as sess:
-      a = constant_op.constant(
-          [float(x) for x in inp.ravel(order="C")],
-          shape=input_shape,
-          dtype=dtypes.float32)
-      indices = [0 if x == 0 else np.random.randint(x) for x in input_shape]
-      sizes = [
-          np.random.randint(0, input_shape[i] - indices[i] + 1)
-          for i in range(8)
-      ]
-      slice_t = array_ops.slice(a, indices, sizes)
-      slice_val = sess.run(slice_t)
-
-    expected_val = inp[indices[0]:indices[0] + sizes[0], indices[1]:indices[1] + sizes[
-      1], indices[2]:indices[2] + sizes[2], indices[3]:indices[3] + sizes[3], indices[
-        4]:indices[4] + sizes[4], indices[5]:indices[5] + sizes[5], indices[6]:indices[
-          6] + sizes[6], indices[7]:indices[7] + sizes[7]]
-    self.assertAllEqual(slice_val, expected_val)
-    self.assertEqual(expected_val.shape, slice_t.get_shape())
-
   def testPartialShapeInference(self):
     z = array_ops.zeros((1, 2, 3))
     self.assertAllEqual(z.get_shape().as_list(), [1, 2, 3])
@@ -251,6 +227,7 @@ class SliceTest(test.TestCase):
     m2 = array_ops.slice(z, [0, 0, 0], [constant_op.constant(1) + 0, 2, -1])
     self.assertAllEqual(m2.get_shape().as_list(), [None, 2, None])
 
+
   def _testGradientSlice(self, input_shape, slice_begin, slice_size):
     with self.test_session(use_gpu=True):
       num_inputs = np.prod(input_shape)
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index 04758ce45a..a50f53b3cd 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -22,7 +22,6 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.platform import test
 
 
@@ -62,31 +61,6 @@ class UniqueTest(test.TestCase):
     for i in range(len(x)):
       self.assertEqual(x[i], tf_y[tf_idx[i]].decode('ascii'))
 
-  def testInt32Axis(self):
-    x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
-    with self.test_session() as sess:
-      y0, idx0 = gen_array_ops.unique_v2(x, axis=[0])
-      tf_y0, tf_idx0 = sess.run([y0, idx0])
-      y1, idx1 = gen_array_ops.unique_v2(x, axis=[1])
-      tf_y1, tf_idx1 = sess.run([y1, idx1])
-    self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
-    self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
-    self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
-    self.assertAllEqual(tf_idx1, np.array([0, 1, 1]))
-
-  def testInt32V2(self):
-    # This test is only temporary, once V2 is used
-    # by default, the axis will be wrapped to allow `axis=None`.
-    x = np.random.randint(2, high=10, size=7000)
-    with self.test_session() as sess:
-      y, idx = gen_array_ops.unique_v2(x, axis=[])
-      tf_y, tf_idx = sess.run([y, idx])
-
-    self.assertEqual(len(x), len(tf_idx))
-    self.assertEqual(len(tf_y), len(np.unique(x)))
-    for i in range(len(x)):
-      self.assertEqual(x[i], tf_y[tf_idx[i]])
-
 class UniqueWithCountsTest(test.TestCase):
 
   def testInt32(self):
diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 6be2bc3e76..74b85da845 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -221,7 +221,7 @@ class Layer(object):
 
     Weight updates (for instance, the updates of the moving mean and variance
     in a BatchNormalization layer) may be dependent on the inputs passed
-    when calling a layer. Hence, when reusing the same layer on
+    when calling a layer. Hence, when reusing a same layer on
     different inputs `a` and `b`, some entries in `layer.updates` may be
     dependent on `a` and some on `b`. This method automatically keeps track
     of dependencies.
@@ -295,9 +295,9 @@ class Layer(object):
     """Add loss tensor(s), potentially dependent on layer inputs.
 
     Some losses (for instance, activity regularization losses) may be dependent
-    on the inputs passed when calling a layer. Hence, when reusing the same
-    layer on different inputs `a` and `b`, some entries in `layer.losses` may
-    be dependent on `a` and some on `b`. This method automatically keeps track
+    on the inputs passed when calling a layer. Hence, when reusing a same layer
+    on different inputs `a` and `b`, some entries in `layer.losses` may be
+    dependent on `a` and some on `b`. This method automatically keeps track
     of dependencies.
 
     The `get_losses_for` method allows to retrieve the losses relevant to a
diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py
index 8c327d7e27..0c7ce02835 100644
--- a/tensorflow/python/layers/convolutional.py
+++ b/tensorflow/python/layers/convolutional.py
@@ -813,7 +813,6 @@ def conv3d(inputs,
       bias_constraint=bias_constraint,
       trainable=trainable,
       name=name,
-      dtype=inputs.dtype.base_dtype,
       _reuse=reuse,
       _scope=name)
   return layer.apply(inputs)
@@ -1747,7 +1746,6 @@ def conv3d_transpose(inputs,
       bias_constraint=bias_constraint,
       trainable=trainable,
       name=name,
-      dtype=inputs.dtype.base_dtype,
       _reuse=reuse,
       _scope=name)
   return layer.apply(inputs)
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index 4d5fb97845..9d9b2b3941 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -26,7 +26,6 @@ import numpy as np
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.layers import base
@@ -240,12 +239,6 @@ class BatchNormalization(base.Layer):
         raise ValueError('Unsupported axis, fused batch norm only supports '
                          'axis == [1] or axis == [3]')
 
-    # Raise parameters of fp16 batch norm to fp32
-    if self.dtype == dtypes.float16:
-      param_dtype = dtypes.float32
-    else:
-      param_dtype = self.dtype or dtypes.float32
-
     axis_to_dim = {x: input_shape[x].value for x in self.axis}
     for x in axis_to_dim:
       if axis_to_dim[x] is None:
@@ -269,7 +262,6 @@ class BatchNormalization(base.Layer):
     if self.scale:
       self.gamma = self.add_variable(name='gamma',
                                      shape=param_shape,
-                                     dtype=param_dtype,
                                      initializer=self.gamma_initializer,
                                      regularizer=self.gamma_regularizer,
                                      constraint=self.gamma_constraint,
@@ -277,14 +269,11 @@ class BatchNormalization(base.Layer):
     else:
       self.gamma = None
       if self.fused:
-        self._gamma_const = array_ops.constant(1.0,
-                                               dtype=param_dtype,
-                                               shape=param_shape)
+        self._gamma_const = array_ops.constant(1.0, shape=param_shape)
 
     if self.center:
       self.beta = self.add_variable(name='beta',
                                     shape=param_shape,
-                                    dtype=param_dtype,
                                     initializer=self.beta_initializer,
                                     regularizer=self.beta_regularizer,
                                     constraint=self.beta_constraint,
@@ -292,9 +281,7 @@ class BatchNormalization(base.Layer):
     else:
       self.beta = None
       if self.fused:
-        self._beta_const = array_ops.constant(0.0,
-                                              dtype=param_dtype,
-                                              shape=param_shape)
+        self._beta_const = array_ops.constant(0.0, shape=param_shape)
 
     # Disable variable partitioning when creating the moving mean and variance
     try:
@@ -306,14 +293,12 @@ class BatchNormalization(base.Layer):
       self.moving_mean = self.add_variable(
           name='moving_mean',
           shape=param_shape,
-          dtype=param_dtype,
           initializer=self.moving_mean_initializer,
           trainable=False)
 
       self.moving_variance = self.add_variable(
           name='moving_variance',
           shape=param_shape,
-          dtype=param_dtype,
           initializer=self.moving_variance_initializer,
           trainable=False)
 
@@ -329,7 +314,6 @@ class BatchNormalization(base.Layer):
         def _renorm_variable(name, shape):
           var = self.add_variable(name=name,
                                   shape=shape,
-                                  dtype=param_dtype,
                                   initializer=init_ops.zeros_initializer(),
                                   trainable=False)
           return var
@@ -372,6 +356,7 @@ class BatchNormalization(base.Layer):
 
   def _fused_batch_norm(self, inputs, training):
     """Returns the output of fused batch norm."""
+    # TODO(reedwm): Add support for fp16 inputs.
     beta = self.beta if self.center else self._beta_const
     gamma = self.gamma if self.scale else self._gamma_const
 
@@ -767,7 +752,6 @@ def batch_normalization(inputs,
       virtual_batch_size=virtual_batch_size,
       adjustment=adjustment,
       name=name,
-      dtype=inputs.dtype.base_dtype,
       _reuse=reuse,
       _scope=name)
   return layer.apply(inputs, training=training)
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index b2876c58c2..90ebdc8c86 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -68,12 +68,11 @@ class BNTest(test.TestCase):
              use_gpu,
              is_fused,
              restore=False,
-             freeze_mode=False,
-             dtype=dtypes.float32):
+             freeze_mode=False):
     ops.reset_default_graph()
     graph = ops.get_default_graph()
     with self.test_session(graph=graph, use_gpu=use_gpu) as sess:
-      image = array_ops.placeholder(dtype=dtype, shape=shape)
+      image = array_ops.placeholder(dtype='float32', shape=shape)
       loss, train_op, saver = self._simple_model(image, is_fused, freeze_mode)
       if restore:
         saver.restore(sess, checkpoint_path)
@@ -81,7 +80,7 @@ class BNTest(test.TestCase):
         sess.run(variables.global_variables_initializer())
       np.random.seed(0)
       for _ in range(2):
-        image_val = np.random.rand(*shape).astype(dtype.as_numpy_dtype)
+        image_val = np.random.rand(*shape).astype(np.float32)
         sess.run([loss, train_op], feed_dict={image: image_val})
       if restore:
         all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
@@ -91,74 +90,15 @@ class BNTest(test.TestCase):
         saver.save(sess, checkpoint_path)
 
   def _infer(self, checkpoint_path, image_val, shape, use_gpu, is_fused):
-    dtype = image_val.dtype
     ops.reset_default_graph()
     graph = ops.get_default_graph()
     with self.test_session(graph=graph, use_gpu=use_gpu) as sess:
-      image = array_ops.placeholder(dtype=dtype, shape=shape)
+      image = array_ops.placeholder(dtype='float32', shape=shape)
       loss, _, saver = self._simple_model(image, is_fused, True)
       saver.restore(sess, checkpoint_path)
       loss_val = sess.run(loss, feed_dict={image: image_val})
       return loss_val
 
-  def _trainEvalSequence(self,
-                         dtype,
-                         train1_use_gpu,
-                         train2_use_gpu,
-                         infer_use_gpu):
-    batch, height, width, input_channels = 2, 4, 5, 3
-    shape = [batch, height, width, input_channels]
-    checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' %
-        (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu))
-
-    self._train(
-        checkpoint,
-        shape,
-        use_gpu=train1_use_gpu,
-        is_fused=True,
-        restore=False,
-        freeze_mode=False,
-        dtype=dtype)
-
-    train_vars = self._train(
-        checkpoint,
-        shape,
-        use_gpu=train2_use_gpu,
-        is_fused=True,
-        restore=True,
-        freeze_mode=False,
-        dtype=dtype)
-
-    np.random.seed(0)
-    image_val = np.random.rand(batch,
-                               height,
-                               width,
-                               input_channels).astype(dtype.as_numpy_dtype)
-    loss_val = self._infer(checkpoint, image_val, shape,
-                           use_gpu=infer_use_gpu, is_fused=True)
-
-    return train_vars, loss_val
-
-  def testHalfPrecision(self):
-    ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32,
-                                                 train1_use_gpu=True,
-                                                 train2_use_gpu=True,
-                                                 infer_use_gpu=True)
- 
-    self.assertEqual(len(ref_vars), 5)
-
-    for train1_use_gpu in [True, False]:
-      for train2_use_gpu in [True, False]:
-        for infer_use_gpu in [True, False]:
-          test_vars, test_loss = self._trainEvalSequence(dtypes.float16,
-                                                         train1_use_gpu,
-                                                         train2_use_gpu,
-                                                         infer_use_gpu)
-          self.assertEqual(len(test_vars), 5)
-          for test_var, ref_var in zip(test_vars, ref_vars):
-            self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3)
-          self.assertAllClose(test_loss, ref_loss, rtol=1.e-3, atol=1.e-3)
-
   def _testCheckpoint(self, is_fused_checkpoint_a, is_fused_checkpoint_b,
                       use_gpu_checkpoint_a, use_gpu_checkpoint_b,
                       use_gpu_test_a, use_gpu_test_b, freeze_mode):
@@ -278,36 +218,6 @@ class BNTest(test.TestCase):
         ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES),
         bn.trainable_variables)
 
-  def testCreateFusedBNFloat16(self):
-    # Call layer.
-    bn = normalization_layers.BatchNormalization(axis=1, fused=True)
-    inputs = random_ops.random_uniform((5, 4, 3, 3),
-                                       seed=1,
-                                       dtype=dtypes.float16)
-    training = array_ops.placeholder(dtype='bool')
-    outputs = bn.apply(inputs, training=training)
-
-    # Verify shape.
-    self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3, 3])
-
-    # Verify layer attributes.
-    self.assertEqual(len(bn.updates), 2)
-    self.assertEqual(len(bn.variables), 4)
-    self.assertEqual(len(bn.trainable_variables), 2)
-    self.assertEqual(len(bn.non_trainable_variables), 2)
-    for var in bn.variables:
-      self.assertEqual(var.dtype, dtypes.float32_ref)
-
-    # Test that updates were created and added to UPDATE_OPS.
-    self.assertEqual(len(bn.updates), 2)
-    self.assertListEqual(
-        ops.get_collection(ops.GraphKeys.UPDATE_OPS), bn.updates)
-
-    # Test that weights were created and added to TRAINABLE_VARIABLES.
-    self.assertListEqual(
-        ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES),
-        bn.trainable_variables)
-
   def test3DInputAxis1(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 43238757c7..c3c7ecd080 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1132,7 +1132,7 @@ def concat(values, axis, name="concat"):
   return gen_array_ops._concat_v2(values=values, axis=axis, name=name)
 
 
-def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
+def boolean_mask(tensor, mask, name="boolean_mask"):
   """Apply boolean mask to tensor.  Numpy equivalent is `tensor[mask]`.
 
   ```python
@@ -1146,17 +1146,11 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
   the first K dimensions of `tensor`'s shape.  We then have:
     `boolean_mask(tensor, mask)[i, j1,...,jd] = tensor[i1,...,iK,j1,...,jd]`
   where `(i1,...,iK)` is the ith `True` entry of `mask` (row-major order).
-  The `axis` could be used with `mask` to indicate the axis to mask from.
-  In that case, `axis + dim(mask) <= dim(tensor)` and `mask`'s shape must match
-  the first `axis + dim(mask)` dimensions of `tensor`'s shape.
 
   Args:
     tensor:  N-D tensor.
     mask:  K-D boolean tensor, K <= N and K must be known statically.
     name:  A name for this operation (optional).
-    axis:  A 0-D int Tensor representing the axis in `tensor` to mask from.
-      By default, axis is 0 which will mask from the first dimension. Otherwise
-      K + axis <= N.
 
   Returns:
     (N-K+1)-dimensional tensor populated by entries in `tensor` corresponding
@@ -1175,10 +1169,10 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
   ```
   """
 
-  def _apply_mask_1d(reshaped_tensor, mask, axis=None):
+  def _apply_mask_1d(reshaped_tensor, mask):
     """Mask tensor along dimension 0 with a 1-D mask."""
     indices = squeeze(where(mask), squeeze_dims=[1])
-    return gather(reshaped_tensor, indices, axis=axis)
+    return gather(reshaped_tensor, indices)
 
   with ops.name_scope(name, values=[tensor, mask]):
     tensor = ops.convert_to_tensor(tensor, name="tensor")
@@ -1193,22 +1187,19 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
       raise ValueError(
           "Number of mask dimensions must be specified, even if some dimensions"
           " are None.  E.g. shape=[None] is ok, but shape=None is not.")
-    axis = 0 if axis is None else axis
-    shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask)
+    shape_tensor[:ndims_mask].assert_is_compatible_with(shape_mask)
 
-    leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0])
+    leading_size = gen_math_ops._prod(shape(tensor)[:ndims_mask], [0])
     tensor = reshape(tensor,
-                     concat([shape(tensor)[:axis],
-                             [leading_size],
-                             shape(tensor)[axis+ndims_mask:]], 0))
-    first_dim = shape_tensor[axis:axis+ndims_mask].num_elements()
+                     concat([[leading_size],
+                             shape(tensor)[ndims_mask:]], 0))
+    first_dim = shape_tensor[:ndims_mask].num_elements()
     tensor.set_shape(
-        tensor_shape.as_shape(shape_tensor[:axis])
-        .concatenate([first_dim])
-        .concatenate(shape_tensor[axis+ndims_mask:]))
+        tensor_shape.as_shape([first_dim])
+        .concatenate(shape_tensor[ndims_mask:]))
 
     mask = reshape(mask, [-1])
-    return _apply_mask_1d(tensor, mask, axis)
+    return _apply_mask_1d(tensor, mask)
 
 
 def sparse_mask(a, mask_indices, name=None):
@@ -1530,8 +1521,7 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True):
   Args:
     tensor: A `Tensor`.
     dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
-      `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`,
-      `complex64`, `complex128` or `bool`.
+    `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, or `complex128`.
     name: A name for the operation (optional).
     optimize: if true, attempt to statically determine the shape of 'tensor'
     and encode it as a constant.
@@ -1582,8 +1572,8 @@ def ones_like(tensor, dtype=None, name=None, optimize=True):
   Args:
     tensor: A `Tensor`.
     dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
-      `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`,
-      `complex64`, `complex128` or `bool`.
+      `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, `complex128` or
+      `bool`.
     name: A name for the operation (optional).
     optimize: if true, attempt to statically determine the shape of 'tensor'
     and encode it as a constant.
diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py
index 2accedf1b9..923696a553 100644
--- a/tensorflow/python/ops/distributions/dirichlet.py
+++ b/tensorflow/python/ops/distributions/dirichlet.py
@@ -196,7 +196,7 @@ class Dirichlet(distribution.Distribution):
         alpha=self.concentration,
         dtype=self.dtype,
         seed=seed)
-    return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keepdims=True)
+    return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keep_dims=True)
 
   @distribution_util.AppendDocstring(_dirichlet_sample_note)
   def _log_prob(self, x):
diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py
index d49fac59ca..00b5697c83 100644
--- a/tensorflow/python/ops/distributions/multinomial.py
+++ b/tensorflow/python/ops/distributions/multinomial.py
@@ -26,7 +26,6 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
-from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
 
@@ -141,8 +140,6 @@ class Multinomial(distribution.Distribution):
 
   counts = [[2., 1, 1], [3, 1, 1]]
   dist.prob(counts)  # Shape [2]
-
-  dist.sample(5) # Shape [5, 2, 3]
   ```
   """
 
@@ -234,35 +231,29 @@ class Multinomial(distribution.Distribution):
 
   def _sample_n(self, n, seed=None):
     n_draws = math_ops.cast(self.total_count, dtype=dtypes.int32)
+    if self.total_count.get_shape().ndims is not None:
+      if self.total_count.get_shape().ndims != 0:
+        raise NotImplementedError(
+            "Sample only supported for scalar number of draws.")
+    elif self.validate_args:
+      is_scalar = check_ops.assert_rank(
+          n_draws, 0,
+          message="Sample only supported for scalar number of draws.")
+      n_draws = control_flow_ops.with_dependencies([is_scalar], n_draws)
     k = self.event_shape_tensor()[0]
-
-    # boardcast the total_count and logits to same shape
-    n_draws = array_ops.ones_like(
-        self.logits[..., 0], dtype=n_draws.dtype) * n_draws
-    logits = array_ops.ones_like(
-        n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits
-
-    # flatten the total_count and logits
-    flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k]
-    flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm]
-
-    # computes each total_count and logits situation by map_fn
-    def _sample_single(args):
-      logits, n_draw = args[0], args[1] # [K], []
-      x = random_ops.multinomial(logits[array_ops.newaxis, ...],
-                                 n_draw, seed) # [1, n*n_draw]
-      x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw]
-      x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k]
-      return x
-    x = functional_ops.map_fn(_sample_single,
-                              [flat_logits, flat_ndraws],
-                              dtype=self.dtype) # [B1B2...Bm, n, k]
-
-    # reshape the results to proper shape
+    # Flatten batch dims so logits has shape [B, k],
+    # where B = reduce_prod(self.batch_shape_tensor()).
+    x = random_ops.multinomial(
+        logits=array_ops.reshape(self.logits, [-1, k]),
+        num_samples=n * n_draws,
+        seed=seed)
+    x = array_ops.reshape(x, shape=[-1, n, n_draws])
+    x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k),
+                            axis=-2)  # shape: [B, n, k]
     x = array_ops.transpose(x, perm=[1, 0, 2])
     final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0)
-    x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k]
-    return x
+    x = array_ops.reshape(x, final_shape)
+    return math_ops.cast(x, self.dtype)
 
   @distribution_util.AppendDocstring(_multinomial_sample_note)
   def _log_prob(self, counts):
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 7c23321ca5..2946dbe81e 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1121,7 +1121,7 @@ def rgb_to_grayscale(images, name=None):
     rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
     gray_float = math_ops.reduce_sum(flt_image * rgb_weights,
                                      rank_1,
-                                     keepdims=True)
+                                     keep_dims=True)
     gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
     return convert_image_dtype(gray_float, orig_dtype, name=name)
 
@@ -1212,7 +1212,26 @@ def adjust_hue(image, delta, name=None):
     orig_dtype = image.dtype
     flt_image = convert_image_dtype(image, dtypes.float32)
 
-    rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
+    # TODO(zhengxq): we will switch to the fused version after we add a GPU
+    # kernel for that.
+    fused = os.environ.get('TF_ADJUST_HUE_FUSED', '')
+    fused = fused.lower() in ('true', 't', '1')
+
+    if not fused:
+      hsv = gen_image_ops.rgb_to_hsv(flt_image)
+
+      hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
+      saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
+      value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])
+
+      # Note that we add 2*pi to guarantee that the resulting hue is a positive
+      # floating point number since delta is [-0.5, 0.5].
+      hue = math_ops.mod(hue + (delta + 1.), 1.)
+
+      hsv_altered = array_ops.concat([hue, saturation, value], 2)
+      rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)
+    else:
+      rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
 
     return convert_image_dtype(rgb_altered, orig_dtype)
 
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index 14a039ffd0..2cb467c891 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -30,7 +30,6 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.gen_linalg_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import compat
-from tensorflow.python.util.deprecation import deprecated_args
 
 # Names below are lower_case.
 # pylint: disable=invalid-name
@@ -439,10 +438,7 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None):
 
 
 # pylint: disable=redefined-builtin
-@deprecated_args(None, "keep_dims is deprecated, use keepdims instead",
-                 "keep_dims")
-def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
-         keep_dims=None):
+def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None):
   r"""Computes the norm of vectors, matrices, and tensors.
 
   This function can compute several different vector norms (the 1-norm, the
@@ -475,13 +471,13 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
       can be either a matrix or a batch of matrices at runtime, pass
       `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are
       computed.
-    keepdims: If True, the axis indicated in `axis` are kept with size 1.
+    keep_dims: If True, the axis indicated in `axis` are kept with size 1.
       Otherwise, the dimensions in `axis` are removed from the output shape.
     name: The name of the op.
 
   Returns:
     output: A `Tensor` of the same type as tensor, containing the vector or
-      matrix norms. If `keepdims` is True then the rank of output is equal to
+      matrix norms. If `keep_dims` is True then the rank of output is equal to
       the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar,
       if `axis` is an integer, the rank of `output` is one less than the rank
       of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less
@@ -501,13 +497,6 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
   @end_compatibility
   """
 
-  if keep_dims is not None:
-    if keepdims is not None:
-      raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'")
-    keepdims = keep_dims
-  if keepdims is None:
-    keepdims = False
-
   is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and
                     len(axis) == 2)
   if is_matrix_norm:
@@ -539,25 +528,25 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
       # matrices.
       result = math_ops.sqrt(
           math_ops.reduce_sum(
-              tensor * math_ops.conj(tensor), axis, keepdims=True))
+              tensor * math_ops.conj(tensor), axis, keep_dims=True))
     else:
       result = math_ops.abs(tensor)
       if ord == 1:
         sum_axis = None if axis is None else axis[0]
-        result = math_ops.reduce_sum(result, sum_axis, keepdims=True)
+        result = math_ops.reduce_sum(result, sum_axis, keep_dims=True)
         if is_matrix_norm:
-          result = math_ops.reduce_max(result, axis[-1], keepdims=True)
+          result = math_ops.reduce_max(result, axis[-1], keep_dims=True)
       elif ord == np.inf:
         if is_matrix_norm:
-          result = math_ops.reduce_sum(result, axis[1], keepdims=True)
+          result = math_ops.reduce_sum(result, axis[1], keep_dims=True)
         max_axis = None if axis is None else axis[0]
-        result = math_ops.reduce_max(result, max_axis, keepdims=True)
+        result = math_ops.reduce_max(result, max_axis, keep_dims=True)
       else:
         # General p-norms (positive p only)
         result = math_ops.pow(
             math_ops.reduce_sum(
-                math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord)
-    if not keepdims:
+                math_ops.pow(result, ord), axis, keep_dims=True), 1.0 / ord)
+    if not keep_dims:
       result = array_ops.squeeze(result, axis)
     return result
 
diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py
index 04eeb00518..5732c756ce 100644
--- a/tensorflow/python/ops/math_grad_test.py
+++ b/tensorflow/python/ops/math_grad_test.py
@@ -113,23 +113,6 @@ class MinOrMaxGradientTest(test.TestCase):
       self.assertLess(error, 1e-4)
 
 
-class MaximumOrMinimumGradientTest(test.TestCase):
-
-  def testMaximumGradient(self):
-    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32)
-    outputs = math_ops.maximum(inputs, 3.0)
-    with self.test_session():
-      error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4])
-      self.assertLess(error, 1e-4)
-
-  def testMinimumGradient(self):
-    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32)
-    outputs = math_ops.minimum(inputs, 2.0)
-    with self.test_session():
-      error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4])
-      self.assertLess(error, 1e-4)
-
-
 class ProdGradientTest(test.TestCase):
 
   def testProdGradient(self):
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index e2e23dccef..4c400423b6 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -170,13 +170,14 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.ops.gen_math_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import compat
-from tensorflow.python.util import deprecation
+from tensorflow.python.util.deprecation import deprecated
+from tensorflow.python.util.deprecation import deprecated_args
 
 # Aliases for some automatically-generated names.
 linspace = gen_math_ops.lin_space
 
-arg_max = deprecation.deprecated(None, "Use `argmax` instead")(arg_max)  # pylint: disable=used-before-assignment
-arg_min = deprecation.deprecated(None, "Use `argmin` instead")(arg_min)  # pylint: disable=used-before-assignment
+arg_max = deprecated(None, "Use `argmax` instead")(arg_max)  # pylint: disable=used-before-assignment
+arg_min = deprecated(None, "Use `argmin` instead")(arg_min)  # pylint: disable=used-before-assignment
 
 
 def _set_doc(doc):
@@ -189,8 +190,7 @@ def _set_doc(doc):
 
 
 # pylint: disable=redefined-builtin
-@deprecation.deprecated_args(None, "Use the `axis` argument instead",
-                             "dimension")
+@deprecated_args(None, "Use the `axis` argument instead", "dimension")
 @_set_doc(
     gen_math_ops.arg_max.__doc__.replace("dimensions", "axes").replace(
         "dimension", "axis"))
@@ -208,8 +208,7 @@ def argmax(input,
   return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type)
 
 
-@deprecation.deprecated_args(None, "Use the `axis` argument instead",
-                             "dimension")
+@deprecated_args(None, "Use the `axis` argument instead", "dimension")
 @_set_doc(
     gen_math_ops.arg_min.__doc__.replace("dimensions", "axes").replace(
         "dimension", "axis"))
@@ -325,7 +324,7 @@ multiply.__doc__ = gen_math_ops._mul.__doc__.replace("Mul", "`tf.multiply`")
 
 
 # TODO(aselle): put deprecation in after another round of global code changes
-@deprecation.deprecated(
+@deprecated(
     "2016-12-30",
     "`tf.mul(x, y)` is deprecated, please use `tf.multiply(x, y)` or `x * y`")
 def _mul(x, y, name=None):
@@ -344,7 +343,7 @@ subtract.__doc__ = gen_math_ops._sub.__doc__.replace("`Sub`", "`tf.subtract`")
 
 
 # TODO(aselle): put deprecation in after another round of global code changes
-@deprecation.deprecated(
+@deprecated(
     "2016-12-30",
     "`tf.sub(x, y)` is deprecated, please use `tf.subtract(x, y)` or `x - y`")
 def _sub(x, y, name=None):
@@ -382,9 +381,8 @@ def negative(x, name=None):
 
 
 # pylint: disable=g-docstring-has-escape
-@deprecation.deprecated(
-    "2016-12-30",
-    "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`")
+@deprecated("2016-12-30",
+            "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`")
 def _neg(x, name=None):
   """Computes numerical negative value element-wise.
 
@@ -1271,27 +1269,24 @@ def _ReductionDims(x, axis, reduction_indices):
     return range(0, array_ops.rank(x))
 
 
-def _may_reduce_to_scalar(keepdims, axis, reduction_indices, output):
+def _may_reduce_to_scalar(keep_dims, axis, reduction_indices, output):
   """Set a reduction's output's shape to be a scalar if we are certain."""
-  if (not output.shape.is_fully_defined()) and (not keepdims) and (
+  if (not output.shape.is_fully_defined()) and (not keep_dims) and (
       axis is None) and (reduction_indices is None):
     output.set_shape(())
   return output
 
 
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_sum(input_tensor,
                axis=None,
-               keepdims=None,
+               keep_dims=False,
                name=None,
-               reduction_indices=None,
-               keep_dims=None):
+               reduction_indices=None):
   """Computes the sum of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keep_dims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1304,7 +1299,7 @@ def reduce_sum(input_tensor,
   tf.reduce_sum(x)  # 6
   tf.reduce_sum(x, 0)  # [2, 2, 2]
   tf.reduce_sum(x, 1)  # [3, 3]
-  tf.reduce_sum(x, 1, keepdims=True)  # [[3], [3]]
+  tf.reduce_sum(x, 1, keep_dims=True)  # [[3], [3]]
   tf.reduce_sum(x, [0, 1])  # 6
   ```
 
@@ -1313,10 +1308,9 @@ def reduce_sum(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keepdims: If true, retains reduced dimensions with length 1.
+    keep_dims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1325,34 +1319,26 @@ def reduce_sum(input_tensor,
   Equivalent to np.sum
   @end_compatibility
   """
-  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
-                                                    "keep_dims", keep_dims)
-  if keepdims is None:
-    keepdims = False
-
-  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
                                gen_math_ops._sum(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keepdims,
+                                   keep_dims,
                                    name=name))
 
 
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def count_nonzero(input_tensor,
                   axis=None,
-                  keepdims=None,
+                  keep_dims=False,
                   dtype=dtypes.int64,
                   name=None,
-                  reduction_indices=None,
-                  keep_dims=None):
+                  reduction_indices=None):
   """Computes number of nonzero elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keep_dims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1369,7 +1355,7 @@ def count_nonzero(input_tensor,
   tf.count_nonzero(x)  # 3
   tf.count_nonzero(x, 0)  # [1, 2, 0]
   tf.count_nonzero(x, 1)  # [1, 2]
-  tf.count_nonzero(x, 1, keepdims=True)  # [[1], [2]]
+  tf.count_nonzero(x, 1, keep_dims=True)  # [[1], [2]]
   tf.count_nonzero(x, [0, 1])  # 3
   ```
 
@@ -1378,20 +1364,14 @@ def count_nonzero(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keepdims: If true, retains reduced dimensions with length 1.
+    keep_dims: If true, retains reduced dimensions with length 1.
     dtype: The output dtype; defaults to `tf.int64`.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor (number of nonzero values).
   """
-  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
-                                                    "keep_dims", keep_dims)
-  if keepdims is None:
-    keepdims = False
-
   with ops.name_scope(name, "count_nonzero", [input_tensor]):
     input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor")
     zero = input_tensor.dtype.as_numpy_dtype()
@@ -1400,24 +1380,21 @@ def count_nonzero(input_tensor,
             # int64 reduction happens on GPU
             to_int64(gen_math_ops.not_equal(input_tensor, zero)),
             axis=axis,
-            keepdims=keepdims,
+            keep_dims=keep_dims,
             reduction_indices=reduction_indices),
         dtype=dtype)
 
 
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_mean(input_tensor,
                 axis=None,
-                keepdims=None,
+                keep_dims=False,
                 name=None,
-                reduction_indices=None,
-                keep_dims=None):
+                reduction_indices=None):
   """Computes the mean of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keep_dims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1437,58 +1414,36 @@ def reduce_mean(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keepdims: If true, retains reduced dimensions with length 1.
+    keep_dims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
 
   @compatibility(numpy)
   Equivalent to np.mean
-
-  Please note that `np.mean` has a `dtype` parameter that could be used to
-  specify the output type. By default this is `dtype=float64`. On the other
-  hand, `tf.reduce_mean` has an aggressive type inference from `input_tensor`,
-  for example:
-
-  ```python
-  x = tf.constant([1, 0, 1, 0])
-  tf.reduce_mean(x)  # 0
-  y = tf.constant([1., 0., 1., 0.])
-  tf.reduce_mean(y)  # 0.5
-  ```
-
   @end_compatibility
   """
-  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
-                                                    "keep_dims", keep_dims)
-
-  if keepdims is None:
-    keepdims = False
-  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
                                gen_math_ops._mean(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keepdims,
+                                   keep_dims,
                                    name=name))
 
 
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_prod(input_tensor,
                 axis=None,
-                keepdims=None,
+                keep_dims=False,
                 name=None,
-                reduction_indices=None,
-                keep_dims=None):
+                reduction_indices=None):
   """Computes the product of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keep_dims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1499,10 +1454,9 @@ def reduce_prod(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keepdims: If true, retains reduced dimensions with length 1.
+    keep_dims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1511,33 +1465,25 @@ def reduce_prod(input_tensor,
   Equivalent to np.prod
   @end_compatibility
   """
-  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
-                                                    "keep_dims", keep_dims)
-
-  if keepdims is None:
-    keepdims = False
-  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
                                gen_math_ops._prod(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keepdims,
+                                   keep_dims,
                                    name=name))
 
 
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_min(input_tensor,
                axis=None,
-               keepdims=None,
+               keep_dims=False,
                name=None,
-               reduction_indices=None,
-               keep_dims=None):
+               reduction_indices=None):
   """Computes the minimum of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keep_dims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1548,10 +1494,9 @@ def reduce_min(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keepdims: If true, retains reduced dimensions with length 1.
+    keep_dims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1560,32 +1505,25 @@ def reduce_min(input_tensor,
   Equivalent to np.min
   @end_compatibility
   """
-  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
-                                                    "keep_dims", keep_dims)
-  if keepdims is None:
-    keepdims = False
-  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
                                gen_math_ops._min(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keepdims,
+                                   keep_dims,
                                    name=name))
 
 
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_max(input_tensor,
                axis=None,
-               keepdims=None,
+               keep_dims=False,
                name=None,
-               reduction_indices=None,
-               keep_dims=None):
+               reduction_indices=None):
   """Computes the maximum of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keep_dims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1596,10 +1534,9 @@ def reduce_max(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keepdims: If true, retains reduced dimensions with length 1.
+    keep_dims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1608,32 +1545,25 @@ def reduce_max(input_tensor,
   Equivalent to np.max
   @end_compatibility
   """
-  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
-                                                    "keep_dims", keep_dims)
-  if keepdims is None:
-    keepdims = False
-  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
                                gen_math_ops._max(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keepdims,
+                                   keep_dims,
                                    name=name))
 
 
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_all(input_tensor,
                axis=None,
-               keepdims=None,
+               keep_dims=False,
                name=None,
-               reduction_indices=None,
-               keep_dims=None):
+               reduction_indices=None):
   """Computes the "logical and" of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keep_dims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1653,10 +1583,9 @@ def reduce_all(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keepdims: If true, retains reduced dimensions with length 1.
+    keep_dims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1665,32 +1594,25 @@ def reduce_all(input_tensor,
   Equivalent to np.all
   @end_compatibility
   """
-  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
-                                                    "keep_dims", keep_dims)
-  if keepdims is None:
-    keepdims = False
-  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
                                gen_math_ops._all(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keepdims,
+                                   keep_dims,
                                    name=name))
 
 
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_any(input_tensor,
                axis=None,
-               keepdims=None,
+               keep_dims=False,
                name=None,
-               reduction_indices=None,
-               keep_dims=None):
+               reduction_indices=None):
   """Computes the "logical or" of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keep_dims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1710,10 +1632,9 @@ def reduce_any(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keepdims: If true, retains reduced dimensions with length 1.
+    keep_dims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1722,32 +1643,25 @@ def reduce_any(input_tensor,
   Equivalent to np.any
   @end_compatibility
   """
-  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
-                                                    "keep_dims", keep_dims)
-  if keepdims is None:
-    keepdims = False
-  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
+  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
                                gen_math_ops._any(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keepdims,
+                                   keep_dims,
                                    name=name))
 
 
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_logsumexp(input_tensor,
                      axis=None,
-                     keepdims=None,
+                     keep_dims=False,
                      name=None,
-                     reduction_indices=None,
-                     keep_dims=None):
+                     reduction_indices=None):
   """Computes log(sum(exp(elements across dimensions of a tensor))).
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keep_dims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1764,7 +1678,7 @@ def reduce_logsumexp(input_tensor,
   tf.reduce_logsumexp(x)  # log(6)
   tf.reduce_logsumexp(x, 0)  # [log(2), log(2), log(2)]
   tf.reduce_logsumexp(x, 1)  # [log(3), log(3)]
-  tf.reduce_logsumexp(x, 1, keepdims=True)  # [[log(3)], [log(3)]]
+  tf.reduce_logsumexp(x, 1, keep_dims=True)  # [[log(3)], [log(3)]]
   tf.reduce_logsumexp(x, [0, 1])  # log(6)
   ```
 
@@ -1773,24 +1687,19 @@ def reduce_logsumexp(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keepdims: If true, retains reduced dimensions with length 1.
+    keep_dims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
   """
-  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
-                                                    "keep_dims", keep_dims)
-  if keepdims is None:
-    keepdims = False
   with ops.name_scope(name, "ReduceLogSumExp", [input_tensor]) as name:
     raw_max = reduce_max(
         input_tensor,
         axis=axis,
         reduction_indices=reduction_indices,
-        keepdims=True)
+        keep_dims=True)
     my_max = array_ops.stop_gradient(
         array_ops.where(
             gen_math_ops.is_finite(raw_max), raw_max,
@@ -1799,13 +1708,13 @@ def reduce_logsumexp(input_tensor,
         reduce_sum(
             gen_math_ops.exp(input_tensor - my_max),
             axis,
-            keepdims=True,
+            keep_dims=True,
             reduction_indices=reduction_indices)) + my_max
-    if not keepdims:
+    if not keep_dims:
       if isinstance(axis, int):
         axis = [axis]
       result = array_ops.squeeze(result, axis)
-    return _may_reduce_to_scalar(keepdims, axis, reduction_indices, result)
+    return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, result)
 
 
 def trace(x, name=None):
@@ -2307,10 +2216,9 @@ def bincount(arr,
     maxlength = ops.convert_to_tensor(
         maxlength, name="maxlength", dtype=dtypes.int32)
     output_size = gen_math_ops.minimum(maxlength, output_size)
-  if weights is not None:
-    weights = ops.convert_to_tensor(weights, name="weights")
-    return gen_math_ops.unsorted_segment_sum(weights, arr, output_size)
-  weights = constant_op.constant([], dtype)
+  weights = (
+      ops.convert_to_tensor(weights, name="weights")
+      if weights is not None else constant_op.constant([], dtype))
   return gen_math_ops.bincount(arr, output_size, weights)
 
 
@@ -2473,7 +2381,7 @@ def reduced_shape(input_shape, axes):
     input_shape: 1-D Tensor, the shape of the Tensor being reduced.
     axes: 1-D Tensor, the reduction axes.
   Returns:
-    A 1-D Tensor, the output shape as if keepdims were set to True.
+    A 1-D Tensor, the output shape as if keep_dims were set to True.
   """
   # Example:
   # cast needed for SparseTensor reductions
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index d30f6b92ad..717ee1254f 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -794,7 +794,7 @@ def mean_cosine_distance(labels, predictions, dim, weights=None,
   radial_diffs = math_ops.multiply(predictions, labels)
   radial_diffs = math_ops.reduce_sum(radial_diffs,
                                      reduction_indices=[dim,],
-                                     keepdims=True)
+                                     keep_dims=True)
   mean_distance, update_op = mean(radial_diffs, weights,
                                   None,
                                   None,
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index e72d34d1f7..1fcd0384da 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -335,22 +335,22 @@ class BatchNormalizationTest(test.TestCase):
 
   def testInference(self):
     x_shape = [1, 1, 6, 1]
-    for dtype in [np.float16, np.float32]:
-      if test.is_gpu_available(cuda_only=True):
+    if test.is_gpu_available(cuda_only=True):
+      for dtype in [np.float16, np.float32]:
         self._test_inference(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
         self._test_inference(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
-      self._test_inference(
-          x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')
+    self._test_inference(
+        x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 1, 6, 2]
     if test.is_gpu_available(cuda_only=True):
       for dtype in [np.float16, np.float32]:
         self._test_inference(
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
-        self._test_inference(
-            x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')
+    self._test_inference(
+        x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
@@ -359,33 +359,33 @@ class BatchNormalizationTest(test.TestCase):
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
     x_shape = [27, 131, 127, 6]
-    for dtype in [np.float16, np.float32]:
-      if test.is_gpu_available(cuda_only=True):
+    if test.is_gpu_available(cuda_only=True):
+      for dtype in [np.float16, np.float32]:
         self._test_inference(
             x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
         self._test_inference(
             x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
-      self._test_inference(
-          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
+    self._test_inference(
+        x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   def testTraining(self):
     x_shape = [1, 1, 6, 1]
-    for dtype in [np.float16, np.float32]:
-      if test.is_gpu_available(cuda_only=True):
+    if test.is_gpu_available(cuda_only=True):
+      for dtype in [np.float16, np.float32]:
         self._test_training(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
         self._test_training(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
-      self._test_training(
-          x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')
+    self._test_training(
+        x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 1, 6, 2]
-    for dtype in [np.float16, np.float32]:
-      if test.is_gpu_available(cuda_only=True):
+    if test.is_gpu_available(cuda_only=True):
+      for dtype in [np.float16, np.float32]:
         self._test_training(
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
-      self._test_training(
-          x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')
+    self._test_training(
+        x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
@@ -394,20 +394,20 @@ class BatchNormalizationTest(test.TestCase):
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
     x_shape = [27, 131, 127, 6]
-    for dtype in [np.float16, np.float32]:
-      if test.is_gpu_available(cuda_only=True):
+    if test.is_gpu_available(cuda_only=True):
+      for dtype in [np.float16, np.float32]:
         self._test_training(
             x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
         self._test_training(
             x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
-      self._test_training(
-          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
+    self._test_training(
+        x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   def testBatchNormGrad(self):
     for is_training in [True, False]:
       x_shape = [1, 1, 6, 1]
-      for dtype in [np.float16, np.float32]:
-        if test.is_gpu_available(cuda_only=True):
+      if test.is_gpu_available(cuda_only=True):
+        for dtype in [np.float16, np.float32]:
           self._test_gradient(
               x_shape,
               dtype, [1],
@@ -422,17 +422,17 @@ class BatchNormalizationTest(test.TestCase):
               use_gpu=True,
               data_format='NCHW',
               is_training=is_training)
-        self._test_gradient(
-            x_shape,
-            dtype, [1],
-            np.float32,
-            use_gpu=False,
-            data_format='NHWC',
-            is_training=is_training)
+      self._test_gradient(
+          x_shape,
+          np.float32, [1],
+          np.float32,
+          use_gpu=False,
+          data_format='NHWC',
+          is_training=is_training)
 
       x_shape = [1, 1, 6, 2]
-      for dtype in [np.float16, np.float32]:
-        if test.is_gpu_available(cuda_only=True):
+      if test.is_gpu_available(cuda_only=True):
+        for dtype in [np.float16, np.float32]:
           self._test_gradient(
               x_shape,
               dtype, [2],
@@ -440,13 +440,13 @@ class BatchNormalizationTest(test.TestCase):
               use_gpu=True,
               data_format='NHWC',
               is_training=is_training)
-        self._test_gradient(
-            x_shape,
-            dtype, [2],
-            np.float32,
-            use_gpu=False,
-            data_format='NHWC',
-            is_training=is_training)
+      self._test_gradient(
+          x_shape,
+          np.float32, [2],
+          np.float32,
+          use_gpu=False,
+          data_format='NHWC',
+          is_training=is_training)
 
       x_shape = [1, 2, 1, 6]
       if test.is_gpu_available(cuda_only=True):
@@ -460,8 +460,8 @@ class BatchNormalizationTest(test.TestCase):
               is_training=is_training)
 
       x_shape = [5, 7, 11, 4]
-      for dtype in [np.float16, np.float32]:
-        if test.is_gpu_available(cuda_only=True):
+      if test.is_gpu_available(cuda_only=True):
+        for dtype in [np.float16, np.float32]:
           self._test_gradient(
               x_shape,
               dtype, [7],
@@ -476,13 +476,13 @@ class BatchNormalizationTest(test.TestCase):
               use_gpu=True,
               data_format='NHWC',
               is_training=is_training)
-        self._test_gradient(
-            x_shape,
-            dtype, [4],
-            np.float32,
-            use_gpu=False,
-            data_format='NHWC',
-            is_training=is_training)
+      self._test_gradient(
+          x_shape,
+          np.float32, [4],
+          np.float32,
+          use_gpu=False,
+          data_format='NHWC',
+          is_training=is_training)
 
   def _testBatchNormGradGrad(self, config):
     shape = config['shape']
@@ -506,14 +506,15 @@ class BatchNormalizationTest(test.TestCase):
             data_format='NCHW',
             is_training=is_training,
             err_tolerance=err_tolerance)
-      self._test_grad_grad(
-          shape,
-          dtype, [shape[3]],
-          np.float32,
-          use_gpu=False,
-          data_format='NHWC',
-          is_training=is_training,
-          err_tolerance=err_tolerance)
+      if dtype != np.float16:
+        self._test_grad_grad(
+            shape,
+            np.float32, [shape[3]],
+            np.float32,
+            use_gpu=False,
+            data_format='NHWC',
+            is_training=is_training,
+            err_tolerance=err_tolerance)
 
   def testBatchNormGradGrad(self):
     configs = [{
@@ -524,10 +525,6 @@ class BatchNormalizationTest(test.TestCase):
         'shape': [2, 3, 2, 2],
         'err_tolerance': 1e-3,
         'dtype': np.float32,
-    }, {
-        'shape': [2, 3, 4, 5],
-        'err_tolerance': 1e-2,
-        'dtype': np.float16,
     }, {
         'shape': [2, 3, 2, 2],
         'err_tolerance': 2e-3,
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index da037a7983..431ea1186a 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -32,8 +32,6 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import variables
-from tensorflow.python.util.deprecation import deprecated_args
-from tensorflow.python.util.deprecation import deprecated_argument_lookup
 
 
 def log_poisson_loss(targets, log_input, compute_full_loss=False, name=None):
@@ -315,20 +313,19 @@ def swish(features):
   return features * math_ops.sigmoid(features)
 
 
-@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
-def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
-  """Normalizes along dimension `axis` using an L2 norm.
+def l2_normalize(x, dim, epsilon=1e-12, name=None):
+  """Normalizes along dimension `dim` using an L2 norm.
 
-  For a 1-D tensor with `axis = 0`, computes
+  For a 1-D tensor with `dim = 0`, computes
 
       output = x / sqrt(max(sum(x**2), epsilon))
 
   For `x` with more dimensions, independently normalizes each 1-D slice along
-  dimension `axis`.
+  dimension `dim`.
 
   Args:
     x: A `Tensor`.
-    axis: Dimension along which to normalize.  A scalar or a vector of
+    dim: Dimension along which to normalize.  A scalar or a vector of
       integers.
     epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
       divisor if `norm < sqrt(epsilon)`.
@@ -338,9 +335,8 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
     A `Tensor` with the same shape as `x`.
   """
   with ops.name_scope(name, "l2_normalize", [x]) as name:
-    axis = deprecated_argument_lookup("axis", axis, "dim", dim)
     x = ops.convert_to_tensor(x, name="x")
-    square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keep_dims=True)
+    square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True)
     x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon))
     return math_ops.multiply(x, x_inv_norm, name=name)
 
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 61fa462988..bdaac65904 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -23,7 +23,6 @@ import numbers
 import numpy as np
 
 from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_util
 from tensorflow.python.framework import ops
@@ -38,8 +37,6 @@ from tensorflow.python.ops import random_ops
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_nn_ops import *
 # pylint: enable=wildcard-import
-from tensorflow.python.util.deprecation import deprecated_args
-from tensorflow.python.util.deprecation import deprecated_argument_lookup
 
 from tensorflow.python.util import deprecation
 
@@ -1648,18 +1645,17 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   return output
 
 
-@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
-def softmax(logits, axis=None, name=None, dim=None):
+def softmax(logits, dim=-1, name=None):
   """Computes softmax activations.
 
   This function performs the equivalent of
 
-      softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis)
+      softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), dim)
 
   Args:
     logits: A non-empty `Tensor`. Must be one of the following types: `half`,
       `float32`, `float64`.
-    axis: The dimension softmax would be performed on. The default is -1 which
+    dim: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
 
@@ -1667,27 +1663,23 @@ def softmax(logits, axis=None, name=None, dim=None):
     A `Tensor`. Has the same type and shape as `logits`.
 
   Raises:
-    InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
+    InvalidArgumentError: if `logits` is empty or `dim` is beyond the last
       dimension of `logits`.
   """
-  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
-  if axis is None:
-    axis = -1
-  return _softmax(logits, gen_nn_ops._softmax, axis, name)
+  return _softmax(logits, gen_nn_ops._softmax, dim, name)
 
 
-@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
-def log_softmax(logits, axis=None, name=None, dim=None):
+def log_softmax(logits, dim=-1, name=None):
   """Computes log softmax activations.
 
   For each batch `i` and class `j` we have
 
-      logsoftmax = logits - log(reduce_sum(exp(logits), axis))
+      logsoftmax = logits - log(reduce_sum(exp(logits), dim))
 
   Args:
     logits: A non-empty `Tensor`. Must be one of the following types: `half`,
       `float32`, `float64`.
-    axis: The dimension softmax would be performed on. The default is -1 which
+    dim: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
 
@@ -1695,13 +1687,10 @@ def log_softmax(logits, axis=None, name=None, dim=None):
     A `Tensor`. Has the same type as `logits`. Same shape as `logits`.
 
   Raises:
-    InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
+    InvalidArgumentError: if `logits` is empty or `dim` is beyond the last
       dimension of `logits`.
   """
-  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
-  if axis is None:
-    axis = -1
-  return _softmax(logits, gen_nn_ops._log_softmax, axis, name)
+  return _softmax(logits, gen_nn_ops._log_softmax, dim, name)
 
 
 def _ensure_xent_args(name, sentinel, labels, logits):
@@ -2316,100 +2305,6 @@ def conv1d(value, filters, stride, padding,
     return array_ops.squeeze(result, [spatial_start_dim])
 
 
-def conv1d_transpose(value,
-                     filter,
-                     output_shape,
-                     stride,
-                     padding="SAME",
-                     data_format="NWC",
-                     name=None):
-  """The transpose of `conv1d`.
-
-  This operation is sometimes called "deconvolution" after [Deconvolutional
-  Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is
-  actually the transpose (gradient) of `conv1d` rather than an actual
-  deconvolution.
-
-  Args:
-    value: A 3-D `Tensor` of type `float` and shape
-      `[batch, in_width, in_channels]` for `NWC` data format or
-      `[batch, in_channels, in_width]` for `NCW` data format.
-    filter: A 3-D `Tensor` with the same type as `value` and shape
-      `[filter_width, output_channels, in_channels]`.  `filter`'s
-      `in_channels` dimension must match that of `value`.
-    output_shape: A 1-D `Tensor` representing the output shape of the
-      deconvolution op.
-    stride: An `integer`.  The number of entries by which
-      the filter is moved right at each step.
-    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
-      See the @{tf.nn.convolution$comment here}
-    data_format: A string. 'NHWC' and 'NCHW' are supported.
-    name: Optional name for the returned tensor.
-
-  Returns:
-    A `Tensor` with the same type as `value`.
-
-  Raises:
-    ValueError: If input/output depth does not match `filter`'s shape, or if
-      padding is other than `'VALID'` or `'SAME'`.
-  """
-  with ops.name_scope(name, "conv1d_transpose",
-                      [value, filter, output_shape]) as name:
-    output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape")
-    if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)):
-      raise ValueError("output_shape must have shape (3,), got {}"
-                       .format(output_shape_.get_shape()))
-
-    # The format could be either NWC or NCW, map to NHWC or NCHW
-    if data_format is None or data_format == "NWC":
-      data_format_2d = "NHWC"
-      axis = 2
-    elif data_format == "NCW":
-      data_format_2d = "NCHW"
-      axis = 1
-    else:
-      raise ValueError("data_format must be \"NWC\" or \"NCW\".")
-
-    if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[2]):
-      raise ValueError("input channels does not match filter's input channels, "
-                       "{} != {}".format(value.get_shape()[axis],
-                                         filter.get_shape()[2]))
-
-    if isinstance(output_shape, (list, np.ndarray)):
-      # output_shape's shape should be == [3] if reached this point.
-      if not filter.get_shape()[1].is_compatible_with(output_shape[axis]):
-        raise ValueError(
-            "output_shape does not match filter's output channels, "
-            "{} != {}".format(output_shape[axis], filter.get_shape()[1]))
-
-    if padding != "VALID" and padding != "SAME":
-      raise ValueError("padding must be either VALID or SAME:"
-                       " {}".format(padding))
-
-    # Reshape the input tensor to [batch, 1, in_width, in_channels]
-    if data_format_2d == "NHWC":
-      output_shape_ = array_ops.concat([output_shape_[:1], [1],
-                                        output_shape_[1:]], axis=0)
-      spatial_start_dim = 1
-      strides = [1, 1, stride, 1]
-    else:
-      output_shape_ = array_ops.concat([output_shape_[:2], [1],
-                                        output_shape_[2:]], axis=0)
-      spatial_start_dim = 2
-      strides = [1, 1, 1, stride]
-    value = array_ops.expand_dims(value, spatial_start_dim)
-    filter = array_ops.expand_dims(filter, 0)
-
-    result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_,
-                                              filter=filter,
-                                              out_backprop=value,
-                                              strides=strides,
-                                              padding=padding,
-                                              data_format=data_format_2d,
-                                              name=name)
-    return array_ops.squeeze(result, [spatial_start_dim])
-
-
 @ops.RegisterStatistics("Dilation2D", "flops")
 def _calc_dilation2d_flops(graph, node):
   """Calculates the compute resources needed for Dilation2D."""
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index a1e4305de1..e9b1c67d16 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -1063,13 +1063,13 @@ class Variable(object):
 class PartitionedVariable(object):
   """A container for partitioned `Variable` objects.
 
-  @compatibility(eager) `tf.PartitionedVariable` is not compatible with
+  @compatiblity(eager) `tf.PartitionedVariable` is not compatible with
   eager execution.  Use `tfe.Variable` instead which is compatable
   with both eager execution and graph construction.  See [the
   TensorFlow Eager Execution
   guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers)
   for details on how variables work in eager execution.
-  @end_compatibility
+  @end_compatiblity
   """
 
   class PartitionedVariableIterator(object):
diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py
old mode 100755
new mode 100644
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index d78362d4fb..99bed86a17 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -232,6 +232,7 @@ CUDNN_DNN_ROUTINE_EACH_R3(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
   __macro(cudnnRNNBackwardData)                               \
   __macro(cudnnRNNBackwardWeights)                            \
   __macro(cudnnSetRNNDescriptor)                              \
+  __macro(cudnnSetRNNDescriptor_v6)                           \
   __macro(cudnnGetFilterNdDescriptor)
 
 // clang-format on
@@ -244,8 +245,7 @@ CUDNN_DNN_ROUTINE_EACH_R5(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
 // clang-format off
 #if CUDNN_VERSION >= 6000
 #define CUDNN_DNN_ROUTINE_EACH_R6(__macro)                    \
-  __macro(cudnnConvolutionBiasActivationForward)              \
-  __macro(cudnnSetRNNDescriptor_v6)
+  __macro(cudnnConvolutionBiasActivationForward)
 
 // clang-format on
 CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
@@ -665,6 +665,7 @@ class ScopedPoolingDescriptor {
       LOG(FATAL) << "could not create cudnn pooling descriptor: "
                  << ToString(status);
     }
+
     const std::vector<int64> strides64 = pooling_descriptor.strides();
     const std::vector<int64> padding64 = pooling_descriptor.padding();
     const std::vector<int64> shape64 = pooling_descriptor.window();
@@ -679,14 +680,14 @@ class ScopedPoolingDescriptor {
                    &CheckedNarrowing<int64, int>);
     std::transform(shape64.cbegin(), shape64.cend(), shape.begin(),
                    &CheckedNarrowing<int64, int>);
-    bool propagate_nans = pooling_descriptor.propagate_nans();
     status = wrap::cudnnSetPoolingNdDescriptor(
         parent_, handle_,
         (pooling_descriptor.mode() == dnn::PoolingMode::kMaximum
              ? CUDNN_POOLING_MAX
              : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING),
 #if CUDNN_VERSION >= 5000
-        propagate_nans ? CUDNN_PROPAGATE_NAN : CUDNN_NOT_PROPAGATE_NAN,
+        // Always propagate nans.
+        CUDNN_PROPAGATE_NAN,
 #endif
         nd, shape.data(), padding.data(), strides.data());
     if (status != CUDNN_STATUS_SUCCESS) {
diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc
index 43d2d3cd48..07fe8a85f4 100644
--- a/tensorflow/stream_executor/dnn.cc
+++ b/tensorflow/stream_executor/dnn.cc
@@ -482,7 +482,6 @@ void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) {
   window_ = other.window_;
   padding_ = other.padding_;
   strides_ = other.strides_;
-  propagate_nans_ = other.propagate_nans_;
 }
 
 string PoolingDescriptor::ToString() const {
@@ -496,12 +495,9 @@ string PoolingDescriptor::ToString() const {
     port::Appendf(&padding, "%lld", padding_[i]);
   }
 
-  const char* propagate_string = propagate_nans_ ? "Yes" : "No";
-
-  return port::Printf(
-      "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}",
-      mode_string, window.c_str(), strides.c_str(), padding.c_str(),
-      propagate_string);
+  return port::Printf("{mode: %s window: %s strides: %s padding: %s}",
+                      mode_string, window.c_str(), strides.c_str(),
+                      padding.c_str());
 }
 
 string PoolingDescriptor::ToShortString() const {
@@ -512,8 +508,7 @@ string PoolingDescriptor::ToShortString() const {
     port::Appendf(&padding, "_p%d:%lld", i, padding_[i]);
   }
   return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg",
-                      window, strides, padding,
-                      propagate_nans_ ? "propagate_nans" : "ignore_nans");
+                      window, strides, padding);
 }
 
 // -- NormalizeDescriptor
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 0d2cd4a9f2..49235167ab 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -661,10 +661,6 @@ class PoolingDescriptor {
     SetDim(&strides_, dim, value);
     return *this;
   }
-  PoolingDescriptor& set_propagate_nans(bool value) {
-    propagate_nans_ = value;
-    return *this;
-  }
 
   int ndims() const { return ndims_; }
   void CloneFrom(const PoolingDescriptor& other);
@@ -685,12 +681,10 @@ class PoolingDescriptor {
   std::vector<int64> window() const { return window_; }
   std::vector<int64> padding() const { return padding_; }
   std::vector<int64> strides() const { return strides_; }
-  bool propagate_nans() const { return propagate_nans_; }
 
  private:
   PoolingMode mode_;
   int ndims_;
-  bool propagate_nans_;
 
   // Stored as: ..., y, x.
   std::vector<int64> window_;
diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
index 62e634afb8..9fd38a29b7 100644
--- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
@@ -94,7 +94,7 @@ tf_module {
   }
   member_method {
     name: "norm"
-    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], "
   }
   member_method {
     name: "qr"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
index ebd9c079b5..24c0448dea 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
@@ -170,7 +170,7 @@ tf_module {
   }
   member_method {
     name: "l2_normalize"
-    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
+    argspec: "args=[\'x\', \'dim\', \'epsilon\', \'name\'], varargs=None, keywords=None, defaults=[\'1e-12\', \'None\'], "
   }
   member_method {
     name: "leaky_relu"
@@ -190,7 +190,7 @@ tf_module {
   }
   member_method {
     name: "log_softmax"
-    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+    argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
   }
   member_method {
     name: "log_uniform_candidate_sampler"
@@ -282,7 +282,7 @@ tf_module {
   }
   member_method {
     name: "softmax"
-    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+    argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
   }
   member_method {
     name: "softmax_cross_entropy_with_logits"
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index 0edd4153d7..bf7bc6a7c1 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -750,7 +750,7 @@ tf_module {
   }
   member_method {
     name: "boolean_mask"
-    argspec: "args=[\'tensor\', \'mask\', \'name\', \'axis\'], varargs=None, keywords=None, defaults=[\'boolean_mask\', \'None\'], "
+    argspec: "args=[\'tensor\', \'mask\', \'name\'], varargs=None, keywords=None, defaults=[\'boolean_mask\'], "
   }
   member_method {
     name: "broadcast_dynamic_shape"
@@ -858,7 +858,7 @@ tf_module {
   }
   member_method {
     name: "count_nonzero"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'int64\'>\", \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'dtype\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \"<dtype: \'int64\'>\", \'None\', \'None\'], "
   }
   member_method {
     name: "count_up_to"
@@ -1414,7 +1414,7 @@ tf_module {
   }
   member_method {
     name: "norm"
-    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], "
   }
   member_method {
     name: "not_equal"
@@ -1546,11 +1546,11 @@ tf_module {
   }
   member_method {
     name: "reduce_all"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_any"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_join"
@@ -1558,27 +1558,27 @@ tf_module {
   }
   member_method {
     name: "reduce_logsumexp"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_max"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_mean"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_min"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_prod"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_sum"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
   }
   member_method {
     name: "register_tensor_conversion_function"
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index c27f4953e3..5f791d7bc7 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -165,7 +165,7 @@ else
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_sparse_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:fully_connected_test"
-  # BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test"
+  BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:hashtable_lookup_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:local_response_norm_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:lsh_projection_test"
diff --git a/tensorflow/tools/ci_build/install/install_golang.sh b/tensorflow/tools/ci_build/install/install_golang.sh
index e1edd62cc5..55c1674495 100755
--- a/tensorflow/tools/ci_build/install/install_golang.sh
+++ b/tensorflow/tools/ci_build/install/install_golang.sh
@@ -16,7 +16,7 @@
 
 set -ex
 
-GOLANG_URL="https://storage.googleapis.com/golang/go1.9.2.linux-amd64.tar.gz"
+GOLANG_URL="https://storage.googleapis.com/golang/go1.9.1.linux-amd64.tar.gz"
 
 sudo mkdir -p /usr/local
 wget -q -O - "${GOLANG_URL}" | sudo tar -C /usr/local -xz
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
index e5d8303c6e..dcda8228bc 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
@@ -48,6 +48,6 @@ ${DOCKER_BINARY} run \
   -e "TF_NEED_GCP=0" \
   -e "TF_NEED_HDFS=0" \
   -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \
-  -e "TF_NEED_OPENCL_SYCL=0" \
+  -e "TF_NEED_OPENCL=0" \
   "${DOCKER_IMAGE}" \
   "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh"
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
index e1b56b9a25..d90a1b905d 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
@@ -27,7 +27,7 @@ export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_GCP=0
 export TF_NEED_HDFS=0
 export TF_NEED_CUDA=0
-export TF_NEED_OPENCL_SYCL=0
+export TF_NEED_OPENCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
 
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
index 5a901af3e5..79973647c1 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
@@ -28,7 +28,7 @@ export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${L
 export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_GCP=0
 export TF_NEED_HDFS=0
-export TF_NEED_OPENCL_SYCL=0
+export TF_NEED_OPENCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
 
diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 88116d9f24..5244898c40 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -75,23 +75,17 @@ if [[ $1 == "PI_ONE" ]]; then
   PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp
   --copt=-DUSE_GEMM_FOR_CONV --copt=-DUSE_OPENBLAS
   --copt=-isystem --copt=${OPENBLAS_INSTALL_PATH}/include/
-  --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
   --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/
   --linkopt=-l:libopenblas.a"
   echo "Building for the Pi One/Zero, with no NEON support"
 else
   PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4
-  --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8'
   echo "Building for the Pi Two/Three, with NEON acceleration"
 fi
 
-# We need to pass down the environment variable with a possible alternate Python
-# include path for Python 3.x builds to work.
-export CROSSTOOL_PYTHON_INCLUDE_PATH
-
 cd ${WORKSPACE_PATH}
 bazel build -c opt ${PI_COPTS} \
   --config=monolithic \
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 44b6d52952..924ab1a4ae 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -117,7 +117,7 @@ function run_configure_for_cpu_build {
   export TF_NEED_VERBS=0
   export TF_NEED_GCP=0
   export TF_NEED_HDFS=0
-  export TF_NEED_OPENCL_SYCL=0
+  export TF_NEED_OPENCL=0
   echo "" | ./configure
 }
 
@@ -141,7 +141,7 @@ function run_configure_for_gpu_build {
   export TF_NEED_MKL=0
   export TF_NEED_GCP=0
   export TF_NEED_HDFS=0
-  export TF_NEED_OPENCL_SYCL=0
+  export TF_NEED_OPENCL=0
 
   # TODO(pcloudy): Remove this after TensorFlow uses its own CRSOOTOOL
   # for GPU build on Windows
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
index 9bcc3925a8..64ebc4607a 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
@@ -101,11 +101,12 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib
                 --jobs=${TF_AVAILABLE_CPUS} \
                 tensorflow/tools/pip_package:build_pip_package && \
     mkdir /pip_pkg && \
-    bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \
-    pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \
+    bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg
+
+# Clean up pip wheel and Bazel cache when done.
+RUN pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \
     rm -rf /pip_pkg && \
     rm -rf /root/.cache
-# Clean up pip wheel and Bazel cache when done.
 
 WORKDIR /root
 
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index e212d10290..0571dd7391 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04
+FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md
index e35c58ff80..2e5a0038ed 100644
--- a/tensorflow/tools/docker/README.md
+++ b/tensorflow/tools/docker/README.md
@@ -60,20 +60,6 @@ Building TensorFlow Docker containers should be done through the
 script. The raw Dockerfiles should not be used directly as they contain strings
 to be replaced by the script during the build.
 
-Attempting to run [parameterized_docker_build.sh](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docker/parameterized_docker_build.sh)
-from a binary docker image such as for example `tensorflow/tensorflow:latest` will
-not work. One needs to execute the script from a developer docker image since by
-contrast with a binary docker image it contains not only the compiled solution but
-also the tensorflow source code. Please select the appropriate developer docker
-image of tensorflow at `tensorflow/tensorflow:[.](https://hub.docker.com/r/tensorflow/tensorflow/tags/)`.
-
-The smallest command line to generate a docker image will then be:
-```docker run -it tensorflow/tensorflow:"right_tag"```
-
-If you would like to start a jupyter notebook on your docker container, make sure
-to map the port 8888 of your docker container by adding -p 8888:8888 to the above
-command.
-
 To use the script, specify the container type (`CPU` vs. `GPU`), the desired
 Python version (`PYTHON2` vs. `PYTHON3`) and whether the developer Docker image
 is to be built (`NO` vs. `YES`). In addition, you need to specify the central
diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD
index 9216008600..1bf7113c9e 100644
--- a/tensorflow/tools/graph_transforms/BUILD
+++ b/tensorflow/tools/graph_transforms/BUILD
@@ -131,8 +131,6 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
-        "//tensorflow/contrib/rnn:gru_ops_op_lib",
-        "//tensorflow/contrib/rnn:lstm_ops_op_lib",
     ] + if_not_windows([
         "//tensorflow/core/kernels:quantized_ops",
         "//tensorflow/core/kernels:remote_fused_graph_rewriter_transform",
diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc
index 97e8f77616..2b85e7e83c 100644
--- a/tensorflow/tools/graph_transforms/quantize_nodes.cc
+++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc
@@ -759,7 +759,6 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
           NodeDef reshape_dims;
           reshape_dims.set_op("Const");
           reshape_dims.set_name(unique_input_name + "/reshape_dims");
-          AddNodeInput("^" + input_name, &reshape_dims);
           SetNodeAttr("dtype", DT_INT32, &reshape_dims);
           Tensor reshape_dims_tensor(DT_INT32, {1});
           reshape_dims_tensor.flat<int32>()(0) = -1;
@@ -769,7 +768,6 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
           NodeDef reduction_dims;
           reduction_dims.set_op("Const");
           reduction_dims.set_name(unique_input_name + "/reduction_dims");
-          AddNodeInput("^" + input_name, &reduction_dims);
           SetNodeAttr("dtype", DT_INT32, &reduction_dims);
           Tensor reduction_dims_tensor(DT_INT32, {1});
           reduction_dims_tensor.flat<int32>()(0) = 0;
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index a493c6f2aa..60282f6aa3 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -29,7 +29,7 @@ from setuptools.dist import Distribution
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.4.0'
+_VERSION = '1.4.0-rc1'
 
 REQUIRED_PACKAGES = [
     'absl-py',
diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD
index bc9e37ffb3..bc6a2fd8cc 100644
--- a/third_party/aws.BUILD
+++ b/third_party/aws.BUILD
@@ -21,9 +21,6 @@ cc_library(
         "@%ws%//tensorflow:linux_ppc64le": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
-        "@%ws%//tensorflow:raspberry_pi_armeabi": glob([
-            "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
-        ]),
         "//conditions:default": [],
     }) + glob([
         "aws-cpp-sdk-core/include/**/*.h",
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index 805a30d262..882967df1c 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -477,6 +477,7 @@ genrule(
         "#  define HAVE_RAND_EGD 1",
         "#  define HAVE_RAND_STATUS 1",
         "#  define HAVE_SSL_GET_SHUTDOWN 1",
+        "#  define HAVE_STROPTS_H 1",
         "#  define HAVE_TERMIOS_H 1",
         "#  define OS \"x86_64-pc-linux-gnu\"",
         "#  define RANDOM_FILE \"/dev/urandom\"",
diff --git a/third_party/sycl/crosstool/CROSSTOOL.tpl b/third_party/sycl/crosstool/CROSSTOOL.tpl
index f8e50efcc6..32884d71e7 100755
--- a/third_party/sycl/crosstool/CROSSTOOL.tpl
+++ b/third_party/sycl/crosstool/CROSSTOOL.tpl
@@ -35,10 +35,10 @@ toolchain {
   tool_path { name: "compat-ld" path: "/usr/bin/ld" }
   tool_path { name: "cpp" path: "/usr/bin/cpp" }
   tool_path { name: "dwp" path: "/usr/bin/dwp" }
-  tool_path { name: "gcc" path: "%{sycl_impl}" }
+  tool_path { name: "gcc" path: "computecpp" }
   # Use "-std=c++11" for nvcc. For consistency, force both the host compiler
   # and the device compiler to use "-std=c++11".
-  cxx_flag: "%{c++_std}"
+  cxx_flag: "-std=c++11"
   linker_flag: "-Wl,-no-as-needed"
   linker_flag: "-lstdc++"
   linker_flag: "-B/usr/bin/"
@@ -53,7 +53,7 @@ toolchain {
   cxx_builtin_include_directory: "/usr/local/include"
   cxx_builtin_include_directory: "/usr/include"
 
-  cxx_builtin_include_directory: "%{sycl_include_dir}"
+  cxx_builtin_include_directory: "%{computecpp_toolkit_path}"
   cxx_builtin_include_directory: "%{python_lib_path}"
 
   tool_path { name: "gcov" path: "/usr/bin/gcov" }
@@ -214,4 +214,4 @@ toolchain {
     compiler_flag: "-O2"
     compiler_flag: "-DNDEBUG"
   }
-}
\ No newline at end of file
+}
diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl
deleted file mode 100644
index b470772fbf..0000000000
--- a/third_party/sycl/crosstool/trisycl.tpl
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-import tempfile
-from subprocess import call
-
-CPU_CXX_COMPILER = ('%{host_cxx_compiler}')
-CPU_C_COMPILER = ('%{host_c_compiler}')
-
-CURRENT_DIR = os.path.dirname(sys.argv[0])
-TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include'
-
-def main():
-  compiler_flags = []
-
-  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions')
-  # remove -fsamotoze-coverage from string with g++
-  if 'g++' in CPU_CXX_COMPILER:
-    remove_flags += ('-fsanitize-coverage',)
-    compiler_flags += ['-fopenmp']
-  else:
-    compiler_flags += ['-fopenmp=libomp']
-
-  compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)]
-
-
-  output_file_index = compiler_flags.index('-o') + 1
-  output_file_name = compiler_flags[output_file_index]
-
-  if(output_file_index == 1):
-    # we are linking
-    return call([CPU_CXX_COMPILER] + compiler_flags +
-                ['-Wl,--no-undefined'])
-
-  # find what we compile
-  compiling_cpp = 0
-  if('-c' in compiler_flags):
-      compiled_file_index = compiler_flags.index('-c') + 1
-      compiled_file_name = compiler_flags[compiled_file_index]
-      if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP',
-                                      '.C', '.cxx'))):
-        compiling_cpp = 1;
-
-  debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic']
-
-  opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3']
-
-  compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1',
-                                     '-DEIGEN_HAS_C99_MATH',
-                                     '-DEIGEN_MAX_ALIGN_BYTES=16',
-                                     '-DTENSORFLOW_USE_SYCL'] + opt_flags
-
-  if(compiling_cpp == 1):
-    # create a blacklist of folders that will be skipped when compiling
-    # with triSYCL
-    skip_extensions = [".cu.cc"]
-    skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"]
-    skip_folders = [(folder + '/') for folder in skip_folders]
-    # if compiling external project skip triSYCL
-    if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders):
-      return call([CPU_CXX_COMPILER] + compiler_flags)
-
-    host_compiler_flags = ['-xc++', '-Wno-unused-variable',
-                           '-I', TRISYCL_INCLUDE_DIR] + compiler_flags
-    x = call([CPU_CXX_COMPILER] + host_compiler_flags)
-    return x
-  else:
-    # compile for C
-    return call([CPU_C_COMPILER] + compiler_flags)
-
-if __name__ == '__main__':
-  sys.exit(main())
diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl
index b6ceaadda7..6cad190630 100755
--- a/third_party/sycl/sycl/BUILD.tpl
+++ b/third_party/sycl/sycl/BUILD.tpl
@@ -10,27 +10,16 @@ package(default_visibility = ["//visibility:public"])
 exports_files(["LICENSE.text"])
 
 config_setting(
-    name = "using_sycl_ccpp",
-    define_values = {
-        "using_sycl": "true",
-        "using_trisycl": "false",
+    name = "using_sycl",
+    values = {
+        "define": "using_sycl=true",
     },
 )
 
-config_setting(
-    name = "using_sycl_trisycl",
-    define_values = {
-        "using_sycl": "true",
-        "using_trisycl": "false",
-    },
-)
-
-
 cc_library(
     name = "sycl_headers",
     hdrs = glob([
         "**/*.h",
-        "**/*.hpp",
     ]),
     includes = [".", "include"],
 )
diff --git a/third_party/sycl/sycl/build_defs.bzl.tpl b/third_party/sycl/sycl/build_defs.bzl.tpl
index 33386f8957..09bef0a661 100755
--- a/third_party/sycl/sycl/build_defs.bzl.tpl
+++ b/third_party/sycl/sycl/build_defs.bzl.tpl
@@ -5,24 +5,9 @@ def if_sycl(if_true, if_false = []):
 
     Returns a select statement which evaluates to if_true if we're building
     with SYCL enabled.  Otherwise, the select statement evaluates to if_false.
-    If we are building with triSYCL instead of ComputeCPP, a list with
-    the first element of if_true is returned.
-    """
-    return select({
-        "@local_config_sycl//sycl:using_sycl_ccpp": if_true,
-        "@local_config_sycl//sycl:using_sycl_trisycl": if_true[0:1],
-        "//conditions:default": if_false
-    })
-
-def if_ccpp(if_true, if_false = []):
-    """Shorthand for select()'ing if we are building with ComputeCPP.
 
-    Returns a select statement which evaluates to if_true if we're building
-    with ComputeCPP enabled. Otherwise, the select statement evaluates
-    to if_false.
     """
     return select({
-        "@local_config_sycl//sycl:using_sycl_ccpp": if_true,
-        "@local_config_sycl//sycl:using_sycl_trisycl": if_false,
+        "@local_config_sycl//sycl:using_sycl": if_true,
         "//conditions:default": if_false
     })
diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl
index a0c9e4e43a..7af063178e 100644
--- a/third_party/sycl/sycl_configure.bzl
+++ b/third_party/sycl/sycl_configure.bzl
@@ -5,26 +5,20 @@
   * HOST_CXX_COMPILER:  The host C++ compiler
   * HOST_C_COMPILER:    The host C compiler
   * COMPUTECPP_TOOLKIT_PATH: The path to the ComputeCpp toolkit.
-  * TRISYCL_INCLUDE_DIR: The path to the include directory of triSYCL.
-                         (if using triSYCL instead of ComputeCPP)
   * PYTHON_LIB_PATH: The path to the python lib
 """
 
 _HOST_CXX_COMPILER = "HOST_CXX_COMPILER"
 _HOST_C_COMPILER= "HOST_C_COMPILER"
 _COMPUTECPP_TOOLKIT_PATH = "COMPUTECPP_TOOLKIT_PATH"
-_TRISYCL_INCLUDE_DIR = "TRISYCL_INCLUDE_DIR"
 _PYTHON_LIB_PATH = "PYTHON_LIB_PATH"
 
 def _enable_sycl(repository_ctx):
-  if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ:
-    enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip()
+  if "TF_NEED_OPENCL" in repository_ctx.os.environ:
+    enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL"].strip()
     return enable_sycl == "1"
   return False
 
-def _enable_compute_cpp(repository_ctx):
-  return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ
-
 def auto_configure_fail(msg):
   """Output failure message when auto configuration fails."""
   red = "\033[0;31m"
@@ -65,15 +59,6 @@ def find_computecpp_root(repository_ctx):
     return sycl_name
   fail("Cannot find SYCL compiler, please correct your path")
 
-def find_trisycl_include_dir(repository_ctx):
-  """Find triSYCL include directory. """
-  sycl_name = ""
-  if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ:
-    sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip()
-    if sycl_name.startswith("/"):
-      return sycl_name
-  fail( "Cannot find triSYCL include directory, please correct your path")
-
 def find_python_lib(repository_ctx):
   """Returns python path."""
   if _PYTHON_LIB_PATH in repository_ctx.os.environ:
@@ -186,53 +171,26 @@ def _sycl_autoconf_imp(repository_ctx):
     _tpl(repository_ctx, "sycl:platform.bzl")
     _tpl(repository_ctx, "crosstool:BUILD")
     _file(repository_ctx, "sycl:LICENSE.text")
-
-    if _enable_compute_cpp(repository_ctx):
-      _tpl(repository_ctx, "crosstool:computecpp",
-      {
-        "%{host_cxx_compiler}" : find_cc(repository_ctx),
-        "%{host_c_compiler}" : find_c(repository_ctx)
-      })
-
-      computecpp_root = find_computecpp_root(repository_ctx);
-      _check_dir(repository_ctx, computecpp_root)
-
-      _tpl(repository_ctx, "crosstool:CROSSTOOL",
-      {
-        "%{sycl_include_dir}" : computecpp_root,
-        "%{sycl_impl}" : "computecpp",
-        "%{c++_std}" : "-std=c++11",
-        "%{python_lib_path}" : find_python_lib(repository_ctx),
-      })
-
-      # symlink libraries
-      _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" )
-      _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib")
-      _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include")
-      _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin")
-    else:
-
-      trisycl_include_dir = find_trisycl_include_dir(repository_ctx);
-      _check_dir(repository_ctx, trisycl_include_dir)
-
-      _tpl(repository_ctx, "crosstool:trisycl",
-      {
-        "%{host_cxx_compiler}" : find_cc(repository_ctx),
-        "%{host_c_compiler}" : find_c(repository_ctx),
-        "%{trisycl_include_dir}" : trisycl_include_dir
-      })
-
-
-      _tpl(repository_ctx, "crosstool:CROSSTOOL",
-      {
-        "%{sycl_include_dir}" : trisycl_include_dir,
-        "%{sycl_impl}" : "trisycl",
-        "%{c++_std}" : "-std=c++1y",
-        "%{python_lib_path}" : find_python_lib(repository_ctx),
-      })
-
-      _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include")
-
+    _tpl(repository_ctx, "crosstool:computecpp",
+    {
+      "%{host_cxx_compiler}" : find_cc(repository_ctx),
+      "%{host_c_compiler}" : find_c(repository_ctx),
+    })
+
+    computecpp_root = find_computecpp_root(repository_ctx)
+    _check_dir(repository_ctx, computecpp_root)
+
+    _tpl(repository_ctx, "crosstool:CROSSTOOL",
+    {
+      "%{computecpp_toolkit_path}" : computecpp_root,
+      "%{python_lib_path}" : find_python_lib(repository_ctx),
+    })
+
+    # symlink libraries
+    _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" )
+    _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib")
+    _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include")
+    _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin")
 
 sycl_configure = repository_rule(
   implementation = _sycl_autoconf_imp,
diff --git a/third_party/zlib.BUILD b/third_party/zlib.BUILD
index d164ee719c..8509668891 100644
--- a/third_party/zlib.BUILD
+++ b/third_party/zlib.BUILD
@@ -49,7 +49,7 @@ cc_library(
         ":windows_msvc": [],
         "//conditions:default": [
             "-Wno-shift-negative-value",
-            "-DZ_HAVE_UNISTD_H",
+            "-Wno-implicit-function-declaration",
         ],
     }),
     includes = ["."],
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 04c24d7511..2d7201ae57 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -9,16 +9,13 @@ build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true
 build:mkl --define=using_mkl=true
 
 build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain
-build:sycl --define=using_sycl=true --define=using_trisycl=false
+build:sycl --define=using_sycl=true
 
 build:sycl_nodouble --crosstool_top=@local_config_sycl//crosstool:toolchain
 build:sycl_nodouble --define=using_sycl=true --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE
 
 build:sycl_asan --crosstool_top=@local_config_sycl//crosstool:toolchain
-build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address
-
-build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain
-build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true
+build:sycl_asan --define=using_sycl=true --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address
 
 build --define=use_fast_cpp_protos=true
 build --define=allow_oversize_protos=true
diff --git a/util/python/BUILD b/util/python/BUILD
index f5fa0c6d29..96daf9947a 100644
--- a/util/python/BUILD
+++ b/util/python/BUILD
@@ -1,4 +1,4 @@
-licenses(["notice"])  # New BSD, Python Software Foundation
+licenses(["restricted"])
 
 package(default_visibility = ["//visibility:public"])
 
-- 
GitLab


From 62c9c2065acce261f1c3ede8f54047b7af684178 Mon Sep 17 00:00:00 2001
From: Jay Young <yangjian@patsnap.com>
Date: Wed, 22 Nov 2017 16:56:52 +0800
Subject: [PATCH 0216/1225] Add dependenctyy to tensorflow/python/keras/BUILD

---
 tensorflow/python/keras/BUILD | 1 +
 1 file changed, 1 insertion(+)
 mode change 100644 => 100755 tensorflow/python/keras/BUILD

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
old mode 100644
new mode 100755
index 4db48b45ed..ff5d7defa2
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -147,6 +147,7 @@ py_library(
         "//tensorflow/python:variables",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:model_fn",
+        "//tensorflow/python/saved_model",
         "@six_archive//:six",
     ],
 )
-- 
GitLab


From 9f05fc47fc13c4f0b8dfb227e9687eb647dc740f Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Wed, 22 Nov 2017 06:46:36 -0800
Subject: [PATCH 0217/1225] Fix object_detection and skip_thoughts links

PiperOrigin-RevId: 176650384
---
 tensorflow/docs_src/mobile/mobile_intro.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/docs_src/mobile/mobile_intro.md b/tensorflow/docs_src/mobile/mobile_intro.md
index 948563292a..3a002c4da2 100644
--- a/tensorflow/docs_src/mobile/mobile_intro.md
+++ b/tensorflow/docs_src/mobile/mobile_intro.md
@@ -82,7 +82,7 @@ new object enters or leaves the scene. We have some sample code for this
 available for Android [on
 Github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android),
 and also a [more general object detection
-model](https://github.com/tensorflow/models/tree/master/object_detection/README.md)
+model](https://github.com/tensorflow/models/tree/master/research/object_detection/README.md)
 available as well.
 
 ### Gesture Recognition
@@ -134,7 +134,7 @@ that covers everything from sentiment analysis to topic discovery. You’re like
 to have your own categories or labels that you want to apply, so the best place
 to start is with an example
 like
-[Skip-Thoughts](https://github.com/tensorflow/models/tree/master/skip_thoughts/),
+[Skip-Thoughts](https://github.com/tensorflow/models/tree/master/research/skip_thoughts/),
 and then train on your own examples.
 
 ### Voice Synthesis
-- 
GitLab


From a235f23d5babcffa05b6d190c3e1a8909afb5273 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Wed, 22 Nov 2017 08:10:09 -0800
Subject: [PATCH 0218/1225] Roll forward new copy insertion pass.

Original cl: cl/174423881, rollback cl: cl/174505237.

This roll forward includes the following changes from the original to address various issues uncovered with the rollback:

(1) A fix for a problem with fusion instruction serialization was broken out and submitted separately (cl/176035108).

(2) A dataflow analysis fix was broken out and submitted separately (cl/176035108)

(3) Adding RunBenchmarks to our unit test main was broken out. Fix for uncovered segv in while_test_cpu benchmark in pending cl/176068232.

(4) Moved a cpu-specific copy-insertion pass into it's own file, and added tests.

(5) Renamed gpu/copy_insertion.* to gpu/gpu_copy_insertion.* to match cpu side.

PiperOrigin-RevId: 176658339
---
 tensorflow/compiler/xla/service/BUILD         |   10 +-
 .../compiler/xla/service/buffer_assignment.cc |    1 -
 .../xla/service/buffer_assignment_test.cc     |   78 +-
 .../compiler/xla/service/copy_insertion.cc    | 1581 +++++++++++------
 .../compiler/xla/service/copy_insertion.h     |   43 +-
 .../xla/service/copy_insertion_test.cc        |  947 ++++++++--
 tensorflow/compiler/xla/service/cpu/BUILD     |   34 +-
 .../compiler/xla/service/cpu/cpu_compiler.cc  |    7 +-
 .../xla/service/cpu/cpu_copy_insertion.cc     |   43 +
 .../xla/service/cpu/cpu_copy_insertion.h      |   42 +
 .../service/cpu/cpu_copy_insertion_test.cc    |  139 ++
 tensorflow/compiler/xla/service/gpu/BUILD     |   16 +-
 .../xla/service/gpu/copy_insertion.cc         |   71 -
 .../compiler/xla/service/gpu/gpu_compiler.cc  |    5 +-
 .../xla/service/gpu/gpu_copy_insertion.cc     |  112 ++
 ...{copy_insertion.h => gpu_copy_insertion.h} |   21 +-
 .../xla/service/gpu/while_transformer_test.cc |   61 +-
 .../xla/service/hlo_alias_analysis.cc         |   12 +-
 tensorflow/compiler/xla/service/hlo_dce.cc    |    8 +
 tensorflow/compiler/xla/tests/tuple_test.cc   |    3 +-
 20 files changed, 2370 insertions(+), 864 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.cc
 create mode 100644 tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h
 create mode 100644 tensorflow/compiler/xla/service/cpu/cpu_copy_insertion_test.cc
 delete mode 100644 tensorflow/compiler/xla/service/gpu/copy_insertion.cc
 create mode 100644 tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
 rename tensorflow/compiler/xla/service/gpu/{copy_insertion.h => gpu_copy_insertion.h} (56%)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index db265510f2..1023d3e5dc 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1641,10 +1641,14 @@ cc_library(
     deps = [
         ":buffer_liveness",
         ":hlo",
+        ":hlo_alias_analysis",
+        ":hlo_dce",
+        ":hlo_graph_dumper",
+        ":hlo_ordering",
         ":hlo_pass",
         ":liveness_util",
         ":logical_buffer",
-        ":tuple_points_to_analysis",
+        ":tuple_simplifier",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
@@ -1659,15 +1663,17 @@ tf_cc_test(
     deps = [
         ":copy_insertion",
         ":hlo",
+        ":hlo_graph_dumper",
         ":hlo_matchers",
-        ":tuple_points_to_analysis",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:test_helpers",
         "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/legacy_flags:debug_options_flags",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 033034b421..19a9ff04de 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -1265,7 +1265,6 @@ const LogicalBuffer* AddBufferToColocatedSet(
   // CopyInsertion ensures root points-to set is unambiguous and distinct.
   const auto& points_to = points_to_analysis.GetPointsToSet(instruction);
   DCHECK(!points_to.IsAmbiguous());
-  DCHECK(points_to.IsDistinct());
   colocated_set->push_back(points_to.element(index)[0]);
   return colocated_set->back();
 }
diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
index 89410f42bd..4d4c5b953e 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
@@ -1538,8 +1538,6 @@ TEST_F(WhileBufferAssignmentTest, OneForwardBackwardWhileLoopSet) {
       HloInstruction::CreateConstant(Literal::CreateR0<float>(0.0)));
   auto output0 = builder.AddInstruction(
       HloInstruction::CreateBroadcast(data_shape_, zero, {1}));
-  auto output1 = builder.AddInstruction(
-      HloInstruction::CreateBroadcast(data_shape_, zero, {1}));
 
   auto cond0 =
       module->AddEmbeddedComputation(BuildWhileConditionComputation("cond"));
@@ -1556,10 +1554,8 @@ TEST_F(WhileBufferAssignmentTest, OneForwardBackwardWhileLoopSet) {
   auto body1 =
       module->AddEmbeddedComputation(BuildWhileBodyComputation("body"));
 
-  auto tuple1 = builder.AddInstruction(
-      HloInstruction::CreateTuple({input0, weights0, output1}));
   auto while1 = builder.AddInstruction(
-      HloInstruction::CreateWhile(loop_state_shape_, cond1, body1, tuple1));
+      HloInstruction::CreateWhile(loop_state_shape_, cond1, body1, while0));
 
   module->AddEntryComputation(builder.Build());
   RunCopyInsertion(module.get());
@@ -1676,11 +1672,14 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) {
   auto while1 = builder.AddInstruction(
       HloInstruction::CreateWhile(loop_state_shape_, cond, body, tuple1));
 
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(data_shape_, while0, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(data_shape_, while1, 1));
   auto root_add = builder.AddInstruction(HloInstruction::CreateBinary(
-      while0->shape(), HloOpcode::kAdd, while0, while1));
-  module->AddEntryComputation(builder.Build());
+      while0->shape(), HloOpcode::kAdd, gte0, gte1));
 
-  RunCopyInsertion(module.get());
+  module->AddEntryComputation(builder.Build());
 
   {
     FlattenCallGraph flatten;
@@ -1688,22 +1687,22 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) {
     EXPECT_TRUE(result);
   }
 
+  RunCopyInsertion(module.get());
+
   auto sequence =
       CreateMemoryMinimizingSequence(*module, ByteSizeOf).ConsumeValueOrDie();
 
   // To trigger b/38494731, we want a specific Hlo sequence for the
   // root computation, so we overwrite that entry with a manually
   // crafted sequence.
-  std::vector<const HloInstruction*> sequence_for_buffer_assigment = {
-      input1,   weights1, one,     output1, tuple1, while1,  input0,
-      weights0, zero,     output0, tuple0,  while0, root_add};
+  sequence[module->entry_computation()] = {
+      input1, weights1, one,     output1, while1->operand(0), while1,
+      input0, weights0, zero,    output0, while0->operand(0), while0,
+      gte0,   gte1,     root_add};
 
   // If this ASSERT_TRUE fails, we constructed a bogus sequence above
   // and this test itself is buggy.
-  ASSERT_TRUE(IsPostOrderTraversal(sequence_for_buffer_assigment));
-
-  sequence[module->entry_computation()] =
-      std::move(sequence_for_buffer_assigment);
+  ASSERT_TRUE(IsPostOrderTraversal(sequence[module->entry_computation()]));
 
   auto assignment =
       BufferAssigner::Run(
@@ -1715,55 +1714,6 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) {
   EXPECT_TRUE(BuffersDistinct({while0}, {while1}, *assignment));
 }
 
-// Test buffer assignment for while nodes with multiple uses.
-// TODO(b/37245345): Fix buffer assignment for this case.
-TEST_F(WhileBufferAssignmentTest, DISABLED_TwoWhiles) {
-  auto module = MakeUnique<HloModule>(TestName());
-  auto builder = HloComputation::Builder(TestName());
-
-  auto input0 = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, data_shape_, "input0"));
-  auto weights0 = builder.AddInstruction(
-      HloInstruction::CreateParameter(1, data_shape_, "weights0"));
-
-  auto zero = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0<float>(0.0)));
-  auto output0 = builder.AddInstruction(
-      HloInstruction::CreateBroadcast(data_shape_, zero, {1}));
-
-  auto cond0 =
-      module->AddEmbeddedComputation(BuildWhileConditionComputation("cond"));
-  auto body0 =
-      module->AddEmbeddedComputation(BuildWhileBodyComputation("body"));
-
-  auto tuple0 = builder.AddInstruction(
-      HloInstruction::CreateTuple({input0, weights0, output0}));
-  auto while0 = builder.AddInstruction(
-      HloInstruction::CreateWhile(loop_state_shape_, cond0, body0, tuple0));
-  auto while1 = builder.AddInstruction(
-      HloInstruction::CreateWhile(loop_state_shape_, cond0, body0, while0));
-
-  auto get0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(data_shape_, while0, 2));
-  auto get1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(data_shape_, while1, 2));
-  builder.AddInstruction(
-      HloInstruction::CreateBinary(data_shape_, HloOpcode::kAdd, get0, get1));
-  module->AddEntryComputation(builder.Build());
-
-  RunCopyInsertion(module.get());
-
-  {
-    FlattenCallGraph flatten;
-    TF_ASSERT_OK_AND_ASSIGN(bool result, flatten.Run(module.get()));
-    EXPECT_TRUE(result);
-  }
-
-  auto assignment = RunBufferAssignment(module.get());
-
-  EXPECT_TRUE(BuffersDistinct({while0}, {while1}, *assignment));
-}
-
 TEST_F(WhileBufferAssignmentTest, WhilesDontShareEntryParamIfLiveOut) {
   auto module = MakeUnique<HloModule>(TestName());
   auto builder = HloComputation::Builder("entry");
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index 0453a698a0..cd983bc03e 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -15,15 +15,17 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/copy_insertion.h"
 
-#include <memory>
-
+#include "tensorflow/compiler/xla/service/hlo_alias_analysis.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_dce.h"
+#include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_ordering.h"
 #include "tensorflow/compiler/xla/service/liveness_util.h"
 #include "tensorflow/compiler/xla/service/logical_buffer.h"
-#include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
+#include "tensorflow/compiler/xla/service/tuple_simplifier.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -31,597 +33,1174 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
 
+using ::tensorflow::str_util::Join;
+using ::tensorflow::strings::StrAppend;
+using ::tensorflow::strings::StrCat;
+
 namespace {
 
-using tensorflow::gtl::FlatMap;
-using tensorflow::gtl::FlatSet;
+bool IsEntryParameterValue(const HloValue& value) {
+  const HloComputation* computation = value.defining_instruction()->parent();
+  return value.defining_instruction()->opcode() == HloOpcode::kParameter &&
+         computation == computation->parent()->entry_computation();
+}
+
+bool IsConstantValue(const HloValue& value) {
+  return value.defining_instruction()->opcode() == HloOpcode::kConstant;
+}
+
+bool ValueIsReadOnly(const HloValue& value) {
+  return IsConstantValue(value) || IsEntryParameterValue(value);
+}
 
-// InstructionCopier encapsulates indices at which to copy 'instruction'.
-// All 'instruction' users in 'copy_users' are updated to use the copy.
+// Deep copy the given instructions 'from' and 'to' at the ShapeIndexes given in
+// 'indices_to_copy'. Add control edges from the respective kCopy instructions
+// in deep copy of 'from' to the respective kCopy instruction in the deep copy
+// of 'to'.
 //
-// Instruction copies are generated in two phases:
-// 1) Recording buffer indices at which 'instruction' requires copies (i.e.
-//    setting 'indices_to_copy_[index]'=true).
-// 2) Inserting kCopy instructions based on indices recorded in phase 1).
-//   *) Array instructions are copied by inserting a single kCopy instruction.
-//   *) Tuple-shaped instructions are copied by recursively expanding tuples
-//      (and tuple-shaped elements), and inserting kCopy instructions for any
-//      tuple elements which require a copy. As the recursion unwinds, new tuple
-//      instructions are added to gather the copied (and uncopied) references
-//      into the output tuple (i.e. the copy of the tuple-shaped instruction).
+// Requirements: 'from' and 'to' must have compatible shapes.
 //
-//      Example two-element tuple with one element that needs a copy:
+// For example, suppose 'from' and 'to' are two-element tuples where index 0 is
+// the only index to copy. Prior to deep-copying we have:
 //
-//             original-instruction
-//                   /    \
-//                GTE(0)  GTE(1)
-//                  |       |
-//                 Copy     |
-//                   \     /
-//                    Tuple  // copied-instruction
 //
-//      As an optimization, if the original instruction is itself a Tuple
-//      instruction, we elide the unnecessary extra GTE and Tuple instructions,
-//      and just insert the copy into a new Tuple instruction, with control
-//      dependencies to ensure the copy occurs after any possible interference.
-class InstructionCopier {
- public:
-  InstructionCopier(HloInstruction* instruction,
-                    const std::vector<HloInstruction*>& copy_users)
-      : instruction_(instruction),
-        copy_users_(copy_users),
-        indices_to_copy_(instruction->shape()),
-        control_predecessors_(instruction->shape()) {}
-
-  // Sets indices that are read-only, and thus do not need to be copied.
-  void SetReadOnlyIndices(const ShapeTree<bool>& read_only_indices) {
-    read_only_indices_ = read_only_indices;
-  }
+//      'from'
+//         |
+//        ...
+//         |
+//       'to'
+//
+// DeepCopyAndAddControlEdges produces:
+//
+//       'from'
+//        /   \
+//      GTE   GTE
+//       |     |
+//     Copy    |
+//    /   \   /
+//   |    Tuple
+//   |      |
+//  ctrl   ...
+//  edge    |
+//   |      |
+//   |    'to'
+//   |    /   \
+//   |  GTE   GTE
+//    \  |     |
+//     Copy    |
+//        \   /
+//        Tuple
+//
+StatusOr<std::pair<HloInstruction*, HloInstruction*>>
+DeepCopyAndAddControlEdges(HloInstruction* from, HloInstruction* to,
+                           const ShapeTree<bool>& indices_to_copy) {
+  DCHECK(ShapeUtil::Compatible(from->shape(), to->shape()));
+  // to/from_copy_tree hold the kCopy instruction produces by the deep
+  // copies. Elements which are not copied (indices_to_copy.element(index) ==
+  // false) have nullptr at that index.
+  ShapeTree<HloInstruction*> from_copy_tree(from->shape(),
+                                            /*init_value=*/nullptr);
+  TF_ASSIGN_OR_RETURN(HloInstruction * from_deep_copy,
+                      from->parent()->DeepCopyInstruction(
+                          from, &indices_to_copy, &from_copy_tree));
 
-  // Sets copy overrides, which are copy instructions to use at each index. This
-  // is used to share a single copy of read-only entry parameters and constants
-  // between multiple While loops.
-  void SetCopyOverrides(const ShapeTree<HloInstruction*>& copy_overrides) {
-    copy_overrides_ = copy_overrides;
+  ShapeTree<HloInstruction*> to_copy_tree(to->shape(), /*init_value=*/nullptr);
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * to_deep_copy,
+      to->parent()->DeepCopyInstruction(to, &indices_to_copy, &to_copy_tree));
+
+  // Add control edges between the respective kCopy instructions.
+  for (const auto& pair : from_copy_tree) {
+    const ShapeIndex& index = pair.first;
+    HloInstruction* from_copy = pair.second;
+    HloInstruction* to_copy = to_copy_tree.element(index);
+    if (from_copy == nullptr) {
+      TF_RET_CHECK(to_copy == nullptr);
+      continue;
+    }
+    TF_RET_CHECK(to_copy != nullptr);
+    TF_RETURN_IF_ERROR(from_copy->AddControlDependencyTo(to_copy));
   }
 
-  // Returns true if all recorded indices are false (returns true otherwise).
-  bool HasAllIndicesFalse() const;
+  return std::make_pair(from_deep_copy, to_deep_copy);
+}
 
-  // Records instruction buffer indices which point-to a Parameter or Constant.
-  Status RecordIndicesWhichPointToParamOrConstant(
-      const TuplePointsToAnalysis& points_to_analysis);
+// Compute the indices of the loop state which need copies in order to avoid
+// live range interference. Generally, an element in the loop state does not
+// need to be copied if the element is passed through transparently through the
+// body.
+//
+// Returns whether any indices need to be copied.
+bool IndicesToCopyForWhile(const HloDataflowAnalysis& dataflow,
+                           const HloInstruction* xla_while,
+                           ShapeTree<bool>* indices_to_copy) {
+  DCHECK(ShapeUtil::Compatible(indices_to_copy->shape(), xla_while->shape()));
 
-  // Records instruction buffer indices to copy which are necessary to ensure:
-  // *) PointsToSet of 'instruction_' is unambiguous and distinct.
-  // *) No liveness interference between 'instruction_' and 'other_instruction'.
-  //
-  // If 'read_only_indices_out' is non-null, read-only indices are set to true.
-  Status RecordIndicesToCopyForColocatingBuffers(
-      const BufferLiveness& liveness, const HloInstruction* other_instruction,
-      ShapeTree<bool>* read_only_indices_out);
+  bool any_copies = false;
+  const HloInstruction* init = xla_while->operand(0);
+  for (auto& pair : *indices_to_copy) {
+    const ShapeIndex& index = pair.first;
+    bool& should_copy = pair.second;
+    // If there is any ambiguity, then loop state must be copied.
+    if (dataflow.GetValueSet(init, index).values().size() > 1 ||
+        dataflow.GetValueSet(xla_while, index).values().size() > 1) {
+      should_copy = true;
+    } else {
+      // If the output of the while instruction is not the same as the init
+      // value of the while, then this element is not passed through the body
+      // transparently and must be copied.
+      should_copy = dataflow.GetUniqueValueAt(xla_while, index) !=
+                    dataflow.GetUniqueValueAt(init, index);
+    }
+    any_copies |= should_copy;
+  }
+  return any_copies;
+}
 
-  // Records control predecessors to add for inserted copy instructions.
-  // 'parameter' must have the same shape as the instruction that will be
-  // copied, and must define all buffers in the shape. Control predecessors are
-  // only recorded for indices that have already been marked for copying.
-  Status RecordControlPredecessors(
-      const TuplePointsToAnalysis& points_to_analysis,
-      HloInstruction* parameter);
+// Add kCopy instructions around the given kWhile instruction to eliminate any
+// possible live range interference of HLO values assuming a dependency-based
+// ordering (HloDependencyOrdering). Copies are added conservatively. There
+// likely are copies which are not strictly necessary, but there are removed
+// later in the pass via CopyRemover.
+//
+//
+// Elements (each ShapeIndex) in the loop state are considered independently.  A
+// copy is added to each element of the loop state which is modified in the
+// while body. For each such element, a total of three kCopy instructions are
+// added at following locations:
+//
+//   (1) The init value is copied before the kWhile instruction. Before:
+//
+//           (Init)
+//             |
+//           kWhile
+//             |
+//            ...
+//
+//       After:
+//
+//           (Init)
+//             |
+//           kCopy
+//             |
+//           kWhile
+//             |
+//            ...
+//
+//       This copy is necessary in case the init value is simultaneously live
+//       with the kWhile.
+//
+//   (2) Copies are added to the parameter and root of the while body
+//       computation. Before:
+//
+//           kParameter
+//               |
+//              ...
+//               |
+//           (body root)
+//
+//       After:
+//
+//           kParameter
+//               |
+//             kCopy ----------+
+//               |             |
+//              ...           ctrl
+//               |            edge
+//           (body root)       |
+//               |             |
+//             kCopy <---------+
+//
+//       The root kCopy becomes the new root of the computation. Both copies are
+//       necessary to any potential interference between the parameter value and
+//       the root value. The control edge prevents potential interference
+//       between the copies themselves.
+//
+// If the loop state is a tuple then the above kCopy instructions are a deep
+// copy constructed of kCopy, KGetTupleElement, and kTuple instruction as
+// constructed by HloInstruction::DeepCopyInstruction.
+Status AddCopiesForWhile(const HloAliasAnalysis& alias_analysis,
+                         HloInstruction* xla_while) {
+  VLOG(2) << "Adding copies for kWhile instruction " << xla_while->name();
+  TF_RET_CHECK(xla_while->opcode() == HloOpcode::kWhile);
 
-  // Inserts copies of 'instruction' buffers at indices in 'indices_to_copy',
-  // and replaces all uses for instructions in 'copy_users_' with copy.
-  // Returns the instruction which is a copy 'instruction'.
-  HloInstruction* Copy();
+  ShapeTree<bool> indices_to_copy(xla_while->shape());
+  if (!IndicesToCopyForWhile(alias_analysis.dataflow_analysis(), xla_while,
+                             &indices_to_copy)) {
+    VLOG(2) << "No copies necessary for kWhile instruction "
+            << xla_while->name();
+    return Status::OK();
+  }
 
-  HloInstruction* instruction() { return instruction_; }
+  VLOG(2) << "Adding copies for " << xla_while->name() << " at indices:";
+  for (auto& pair : indices_to_copy) {
+    if (pair.second) {
+      VLOG(2) << "  " << pair.first;
+    }
+  }
 
-  const std::vector<HloInstruction*>& copy_users() const { return copy_users_; }
+  // Deep copy init.
+  HloInstruction* while_init = xla_while->mutable_operand(0);
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * while_init_copy,
+      xla_while->parent()->DeepCopyInstruction(while_init, &indices_to_copy));
+  TF_RETURN_IF_ERROR(while_init->ReplaceUseWith(xla_while, while_init_copy));
 
- private:
-  // Does the given index represent a read-only buffer?
-  bool IsReadOnlyIndex(const ShapeIndex& index) const {
-    return !ShapeUtil::IsNil(read_only_indices_.shape()) &&
-           read_only_indices_.element(index);
-  }
+  // Deep copy the parameter and the root. Extend a control edge from the copy
+  // of the parameter value to the corresponding copy value of the root.
+  HloComputation* body = xla_while->while_body();
+  HloInstruction* param = body->parameter_instruction(0);
+  HloInstruction* root = body->root_instruction();
 
-  // Returns the copy override at the given index, or nullptr.
-  HloInstruction* GetCopyOverride(const ShapeIndex& index) const {
-    return ShapeUtil::IsNil(copy_overrides_.shape())
-               ? nullptr
-               : copy_overrides_.element(index);
-  }
+  // If param is the root then all indices should have been passed through the
+  // while body and we should have returned early above.
+  TF_RET_CHECK(param != root);
 
-  // Records instruction buffer indices which have ambiguous or non-distinct
-  // points-to sets.
-  Status RecordAmbiguousOrNonDistinctIndices(
-      const TuplePointsToAnalysis& points_to_analysis);
+  // Copy users before making a deep copy of the parameter as the deep copy
+  // will create new users of the parameter (eg, the GTE instructions of the
+  // deep copy).
+  std::vector<HloInstruction*> param_users = param->users();
 
-  // Records instruction buffer indices which have interfering live ranges
-  // with 'other_instruction' buffers at same index.
-  Status RecordIndicesWhichInterfereWithOtherInstruction(
-      const BufferLiveness& liveness, const HloInstruction* other_instruction,
-      ShapeTree<bool>* read_only_indices_out);
+  ShapeIndex current_index;
+  TF_ASSIGN_OR_RETURN(auto pair,
+                      DeepCopyAndAddControlEdges(param, root, indices_to_copy));
 
-  // Recursively inserts copies of 'instruction' tuple elements at indices
-  // specified in 'indices_to_copy', and returns the copy of 'instruction'.
-  HloInstruction* CopyTuple(HloInstruction* instruction, ShapeIndex* index);
+  HloInstruction* param_copy = pair.first;
+  HloInstruction* root_copy = pair.second;
 
-  void RecordIndex(const ShapeIndex& index) {
-    *indices_to_copy_.mutable_element(index) = true;
+  for (HloInstruction* user : param_users) {
+    TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, param_copy));
   }
 
-  HloInstruction* instruction_;
-  const std::vector<HloInstruction*> copy_users_;
-  ShapeTree<bool> indices_to_copy_;
-  ShapeTree<std::vector<HloInstruction*>> control_predecessors_;
-  ShapeTree<bool> read_only_indices_;
-  ShapeTree<HloInstruction*> copy_overrides_;
-};
+  body->set_root_instruction(root_copy);
 
-bool InstructionCopier::HasAllIndicesFalse() const {
-  bool all_indices_false = true;
-  indices_to_copy_.ForEachElement(
-      [&all_indices_false](const ShapeIndex& /*index*/, bool data) {
-        if (data) {
-          all_indices_false = false;
-        }
-      });
-  return all_indices_false;
+  return Status::OK();
 }
 
-Status InstructionCopier::RecordIndicesWhichPointToParamOrConstant(
-    const TuplePointsToAnalysis& points_to_analysis) {
-  const PointsToSet& points_to =
-      points_to_analysis.GetPointsToSet(instruction_);
-  // Shallow copy the instruction if the points-to set of the top-level
-  // buffer is ambiguous. This is necessary because the backends must know
-  // statically what the top-level buffer of the result is.
-  if (points_to.element(/*index=*/{}).size() > 1) {
-    RecordIndex({});
+// Removes any control dependencies to or from the given instruction.
+Status StripControlDependenciesFrom(HloInstruction* instruction) {
+  while (!instruction->control_successors().empty()) {
+    TF_RETURN_IF_ERROR(instruction->RemoveControlDependencyTo(
+        instruction->control_successors().front()));
+  }
+
+  while (!instruction->control_predecessors().empty()) {
+    TF_RETURN_IF_ERROR(
+        instruction->control_predecessors().front()->RemoveControlDependencyTo(
+            instruction));
   }
 
-  // Multiple buffers within a parameter/constant may be live out, so collect
-  // a set of indices at which to copy first.
-  points_to.ForEachElement([this](const ShapeIndex& index,
-                                  const PointsToSet::BufferList& buffers) {
-    if (IsReadOnlyIndex(index)) {
-      return;
-    }
-    for (const LogicalBuffer* buffer : buffers) {
-      // pointee is the HloInstruction producing the buffer which may be
-      // liveout.
-      HloInstruction* pointee = buffer->instruction();
-      if (pointee->opcode() == HloOpcode::kParameter ||
-          pointee->opcode() == HloOpcode::kConstant) {
-        VLOG(2) << "Parameter or constant buffer " << buffer->ToString()
-                << " index: " << tensorflow::str_util::Join(index, ",")
-                << " may be live out of computation: " << pointee->ToString();
-        RecordIndex(index);
-        break;
-      }
-    }
-  });
   return Status::OK();
 }
 
-Status InstructionCopier::RecordIndicesToCopyForColocatingBuffers(
-    const BufferLiveness& liveness, const HloInstruction* other_instruction,
-    ShapeTree<bool>* read_only_indices_out) {
-  TF_RETURN_IF_ERROR(
-      RecordAmbiguousOrNonDistinctIndices(liveness.points_to_analysis()));
-  TF_RETURN_IF_ERROR(RecordIndicesWhichInterfereWithOtherInstruction(
-      liveness, other_instruction, read_only_indices_out));
+// Add kCopy instructions to the given module to guarantee there is no
+// live-range interference. Generally interference can only occur around kWhile
+// instructions which have update-in-place semantics.
+Status AddCopiesToResolveInterference(HloModule* module) {
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloAliasAnalysis> alias_analysis,
+                      HloAliasAnalysis::Run(module));
+
+  for (HloComputation* computation : module->computations()) {
+    for (HloInstruction* instruction : computation->instructions()) {
+      if (instruction->opcode() == HloOpcode::kWhile) {
+        TF_RETURN_IF_ERROR(AddCopiesForWhile(*alias_analysis, instruction));
+      }
+    }
+  }
   return Status::OK();
 }
 
-Status InstructionCopier::RecordAmbiguousOrNonDistinctIndices(
-    const TuplePointsToAnalysis& points_to_analysis) {
-  const PointsToSet& points_to =
-      points_to_analysis.GetPointsToSet(instruction_);
-  // Mapping from LogicalBuffer to index (used to detect non-distinct indices).
-  FlatMap<const LogicalBuffer*, std::vector<ShapeIndex>>
-      buffer_to_source_indices;
-  points_to.ForEachElement(
-      [this, &buffer_to_source_indices](
-          const ShapeIndex& index, const PointsToSet::BufferList& buffers) {
-        if (buffers.size() > 1) {
-          // Record ambiguous points-to set at 'index'.
-          if (!indices_to_copy_.element(index)) {
-            VLOG(2) << "Adding copy of buffer for instruction: "
-                    << instruction_->name()
-                    << " at index: " << tensorflow::str_util::Join(index, ",")
-                    << " with ambiguous points-to set.";
-            RecordIndex(index);
+// Class for removing unnecessary copies from the module.
+//
+// kCopy instructions are added conservatively to guarantee no live range
+// interference between HLO values. This class uses a more fine-grained analysis
+// to remove some of these added copies which are not strictly necessary.
+class CopyRemover {
+ public:
+  CopyRemover(const HloAliasAnalysis& alias_analysis,
+              const HloOrdering& ordering, HloModule* module)
+      : module_(module),
+        alias_analysis_(alias_analysis),
+        ordering_(ordering),
+        buffer_value_tracker_(*module, alias_analysis, ordering) {}
+
+  // Try to elide the given copy. The copy is elided if the instruction is not
+  // necessary to prevent live-range interference of HLO values. Returns true if
+  // copy was elided.
+  //
+  // The copy instruction is not actually removed here. Instead it is left for
+  // dead in the graph. Later calls to DCE will remove the instruction.
+  StatusOr<bool> TryElideCopy(HloInstruction* copy) {
+    if (buffer_value_tracker_.TryElideCopy(copy)) {
+      TF_RETURN_IF_ERROR(StripControlDependenciesFrom(copy));
+      TF_RETURN_IF_ERROR(copy->ReplaceAllUsesWith(copy->mutable_operand(0)));
+      return true;
+    }
+    return false;
+  }
+
+  string ToString() const {
+    string out = StrCat("CopyRemover, module ", module_->name(), "\n");
+    StrAppend(&out, "  Buffer values, in dependency order:\n");
+    for (const HloBuffer& buffer : alias_analysis_.buffers()) {
+      StrAppend(&out, "    HloBuffer ", buffer.id(), ":\n");
+    }
+    return out;
+  }
+
+ private:
+  // Class which tracks the HLO values within each HLO buffer in the module
+  // during copy removal.
+  //
+  // The values are held in a linked list where there is one list for each
+  // buffer. Removing a copy instruction merges together the values in the
+  // source buffer of the copy to the destination buffer of the copy. This class
+  // tracks these value lists as copies are removed from the graph (and value
+  // lists are merged).
+  //
+  // The BufferValueTracker object is initialized to match the state of
+  // HloAliasAnalysis. However, as copies are removed this state diverges. The
+  // values-to-buffer mapping is maintained outside of HloAliasAnalysis because
+  // a fully updatable alias analysis is very slow.
+  class BufferValueTracker {
+   public:
+    // The values held in a single HLO buffer are represented using a linked
+    // list. An element type in this list is ValueNode.
+    //
+    // This linked list is hand-rolled to enable efficient splicing of lists
+    // using only references to list elements without knowing which lists are
+    // being spliced. std::list requires a reference to the list object to
+    // splice.
+    struct ValueNode {
+      explicit ValueNode(const HloValue* v) : value(v) {}
+
+      const HloValue* value;
+
+      // The uses are maintained outside of HloValue::uses() because
+      // HloValue::uses() is not updatable (a fully updatable dataflow analysis
+      // is slow).
+      std::vector<const HloUse*> uses;
+
+      // next/prev elements in the linked list. The list is circularly linked so
+      // these values are never null for elements in the list.
+      ValueNode* prev = nullptr;
+      ValueNode* next = nullptr;
+    };
+
+    BufferValueTracker(const HloModule& module,
+                       const HloAliasAnalysis& alias_analysis,
+                       const HloOrdering& ordering)
+        : dataflow_(alias_analysis.dataflow_analysis()), ordering_(ordering) {
+      // Construct a list for each HLO buffer in the alias analysis. Maintain a
+      // map from HloValue to the respective list element representing that
+      // value. The map is used to construct the copy info map below.
+      tensorflow::gtl::FlatMap<const HloValue*, ValueNode*> value_to_node;
+      for (const HloBuffer& buffer : alias_analysis.buffers()) {
+        // Verify values contained in the buffer are strictly ordered. This
+        // should always be the case after adding copies to eliminate
+        // interference. Specifically, the addition of the control flow edges
+        // between copies added around aliased operations (kWhile) guarantees
+        // this strict order.
+        for (const HloValue* value_a : buffer.values()) {
+          for (const HloValue* value_b : buffer.values()) {
+            if (value_a != value_b) {
+              DCHECK(ordering_.LiveRangeStrictlyBefore(*value_a, *value_b,
+                                                       dataflow_) ||
+                     ordering_.LiveRangeStrictlyBefore(*value_b, *value_a,
+                                                       dataflow_))
+                  << value_a->ToShortString() << " and "
+                  << value_b->ToShortString() << " are not ordered";
+            }
           }
         }
-        // For each 'buffer': record a mapping from 'buffer' to 'index'.
-        for (const LogicalBuffer* buffer : buffers) {
-          buffer_to_source_indices[buffer].push_back(index);
-        }
-      });
 
-  // Record all non-distinct indices detected in 'buffer_to_source_indices'.
-  for (const auto& buff_to_src : buffer_to_source_indices) {
-    if (buff_to_src.second.size() == 1) {
-      continue;
+        std::vector<const HloValue*> values = buffer.values();
+        std::sort(values.begin(), values.end(),
+                  [this](const HloValue* a, const HloValue* b) {
+                    return ordering_.IsDefinedBefore(*a, *b);
+                  });
+
+        // Create a list containing all of the values in the buffer.
+        AddValueList(values, &value_to_node);
+      }
+
+      // Create copy_map_ which contains the source and destination values
+      // of all copies.
+      CreateCopyMap(module, value_to_node);
+
+      XLA_VLOG_LINES(3, ToString());
+      TF_DCHECK_OK(Verify());
     }
-    for (const ShapeIndex& src_index : buff_to_src.second) {
-      // Record non-distinct points-to set at 'src_index'.
-      if (!indices_to_copy_.element(src_index)) {
-        VLOG(2) << "Adding copy of buffer for instruction: "
-                << instruction_->name()
-                << " at index: " << tensorflow::str_util::Join(src_index, ",")
-                << " because of non-distinct points-to set.";
-        RecordIndex(src_index);
+
+    // Add a list containing the given values to BufferValueTracker. This
+    // represents the values contained in a single buffer. For each value in
+    // 'values' an entry is created in value_to_node which indicates the
+    // respective ValueNode representing that value.
+    void AddValueList(
+        tensorflow::gtl::ArraySlice<const HloValue*> values,
+        tensorflow::gtl::FlatMap<const HloValue*, ValueNode*>* value_to_node) {
+      ValueNode* tail = nullptr;
+      ValueNode* head = nullptr;
+      for (const HloValue* value : values) {
+        auto new_node = new ValueNode(value);
+        (*value_to_node)[value] = new_node;
+
+        // Copy the HLO values's uses into the ValueNode for the value. These
+        // uses in ValueNode are updated as copies are removed.
+        new_node->uses.reserve(value->uses().size());
+        for (const HloUse& use : value->uses()) {
+          new_node->uses.push_back(&use);
+        }
+
+        // Connect the new node into the linked list.
+        if (tail == nullptr) {
+          head = new_node;
+        } else {
+          tail->next = new_node;
+          new_node->prev = tail;
+        }
+        tail = new_node;
       }
+
+      // The linked list is circular so connect the head and tail.
+      tail->next = head;
+      head->prev = tail;
+      value_lists_.insert(head);
     }
-  }
-  return Status::OK();
-}
 
-Status InstructionCopier::RecordIndicesWhichInterfereWithOtherInstruction(
-    const BufferLiveness& liveness, const HloInstruction* other_instruction,
-    ShapeTree<bool>* read_only_indices_out) {
-  // Record all buffer indices for 'instruction_', which interfere with
-  // 'other_instruction' at the same index.
-  ShapeUtil::ForEachSubshape(
-      instruction_->shape(),
-      [this, &liveness, other_instruction, read_only_indices_out](
-          const Shape& /*subshape*/, const ShapeIndex& index) {
-        if (IsReadOnlyIndex(index)) {
-          return;
+    // This method also fills in copy_map_ which indicates which nodes
+    // in the value lists corresponding to the source and destination values of
+    // kCopy instructions. value_to_node should map each HloValue to its
+    // respective ValueNode.
+    void CreateCopyMap(
+        const HloModule& module,
+        const tensorflow::gtl::FlatMap<const HloValue*, ValueNode*>&
+            value_to_node) {
+      for (HloComputation* computation : module.computations()) {
+        for (HloInstruction* instruction : computation->instructions()) {
+          // Add copies with unambiguous source values to the map. Copies with
+          // ambiguous sources are not removable.
+          if (instruction->opcode() == HloOpcode::kCopy) {
+            const HloValueSet& src_value_set =
+                dataflow_.GetValueSet(instruction->operand(0));
+            if (src_value_set.values().size() == 1) {
+              CopyNodes& copy_node = copy_map_[instruction];
+              copy_node.dest =
+                  value_to_node.at(&dataflow_.GetUniqueValueAt(instruction));
+              copy_node.src = value_to_node.at(&src_value_set.GetUniqueValue());
+            }
+          }
         }
-        if (indices_to_copy_.element(index)) {
-          // Return if previous pass already set index.
-          return;
+      }
+    }
+
+    ~BufferValueTracker() {
+      for (const ValueNode* head : value_lists_) {
+        const ValueNode* p = head;
+        do {
+          const ValueNode* tmp = p->next;
+          delete p;
+          p = tmp;
+        } while (p != head);
+      }
+    }
+
+    // Verify invariants within the linked lists.
+    Status Verify() const {
+      for (const ValueNode* head : value_lists_) {
+        const ValueNode* p = head;
+        do {
+          // Verify links between elements are consistent.
+          TF_RET_CHECK(p->prev->next == p);
+          TF_RET_CHECK(p->next->prev == p);
+
+          const HloInstruction* def = p->value->defining_instruction();
+          if (def->opcode() == HloOpcode::kCopy &&
+              ContainsKey(copy_map_, def)) {
+            TF_RET_CHECK(copy_map_.at(def).dest == p);
+          }
+          for (const HloUse* use : p->uses) {
+            if (use->instruction->opcode() == HloOpcode::kCopy &&
+                ContainsKey(copy_map_, use->instruction)) {
+              TF_RET_CHECK(copy_map_.at(use->instruction).src == p);
+            }
+          }
+
+          p = p->next;
+        } while (p != head);
+      }
+      return Status::OK();
+    }
+
+    // Try to elide the given copy. Elision of a copy is possible only if no
+    // live range interference is introduced by the copy's elimination. If
+    // elision is possible, then the internal state (value lists) are updated,
+    // and true is returned. Returns false otherwise.
+    bool TryElideCopy(const HloInstruction* copy) {
+      VLOG(2) << "Trying to remove " << copy->name();
+
+      if (!ContainsKey(copy_map_, copy)) {
+        VLOG(2) << copy->name() << " is not removable";
+        return false;
+      }
+
+      const CopyNodes& copy_node = copy_map_.at(copy);
+      ValueNode* src = copy_node.src;
+      ValueNode* dest = copy_node.dest;
+      DCHECK(src != nullptr);
+      DCHECK(dest != nullptr);
+
+      auto is_live_range_before = [this](const ValueNode& a,
+                                         const ValueNode& b) {
+        if (LiveRangeBefore(a, b)) {
+          VLOG(2) << "  Live range of " << a.value->ToShortString()
+                  << " is before " << b.value->ToShortString();
+          return true;
+        } else {
+          VLOG(2) << "  Live range of " << a.value->ToShortString()
+                  << " is not before " << b.value->ToShortString();
+          return false;
         }
-        const auto& points_to_analysis = liveness.points_to_analysis();
-        // Lookup buffers for 'instruction_' and 'other_instruction'.
-        const auto instruction_buffers =
-            points_to_analysis.GetPointsToSet(instruction_).element(index);
-        // If 'instruction_' has ambiguous points-to-set  at 'index', it would
-        // have been recorded in a previous pass (and we would have returned
-        // early at the entry to this function). As a result, here we know that
-        // 'instruction_' has just one buffer in its points-to-set.
-        CHECK_EQ(1, instruction_buffers.size());
-        const LogicalBuffer* instruction_buffer = instruction_buffers[0];
-
-        const auto other_instruction_buffers =
-            points_to_analysis.GetPointsToSet(other_instruction).element(index);
-        // Do not insert a copy if both instructions point at the same buffer.
-        // This eliminates unnecessary copies of read-only tuple elements.
-        // If 'instruction_' and 'other_instruction' point to the same buffer,
-        // then that buffer is not updated on the path between the two
-        // instructions. Therefore, any other (possibly interference-causing)
-        // users of that buffer from 'other_instruction' will see the same data,
-        // irrespective of whether we insert a copy of this buffer at
-        // 'instruction_' or not.
-        if (other_instruction_buffers.size() == 1 &&
-            other_instruction_buffers[0]->id() == instruction_buffer->id()) {
-          if (read_only_indices_out != nullptr) {
-            *read_only_indices_out->mutable_element(index) = true;
+      };
+
+      VLOG(3) << copy->name() << " copies value "
+              << src->value->ToShortString();
+      VLOG(3) << "Source buffer values: " << ValueListToString(src);
+      VLOG(3) << "Dest buffer values: " << ValueListToString(src);
+
+      // A kCopy instruction copies an HLO value from a source buffer and
+      // defines an HLO value in a destination buffer. Most generally, the
+      // source and destination buffers may each hold more than one value at
+      // different points in the computation so we define the following:
+      //
+      //   Values in source buffer:      {s_0, ..., s_n}
+      //   Values in destination buffer: {d_0, ..., d_m}
+      //
+      // A kCopy instruction between these buffers copies a value s_x in the
+      // source buffer and defines a value d_y in the destination buffer. The
+      // elision of a copy merges the source and destination buffers together,
+      // so the list of values for the source and destination buffers are
+      // merged.
+      //
+      // We handle two different cases for copy elision:
+      //
+      //  (1) the kCopy defines the first value in the destination buffer (d_0).
+      //
+      //  (2) the kCopy copies the last value in the source buffer (s_n).
+      //
+      // For the remaining case where the kCopy copies a not-last value from the
+      // source buffer to a not-first value of the destination buffer, the kCopy
+      // instruction cannot be removed. This case is generated, for example, if
+      // the kCopy copies a while body parameter of the loop state at one tuple
+      // index to a different tuple index in the while body root. Removal of the
+      // copy necessarily results in live range interference of values in the
+      // loop state at the two different tuple indices.
+      //
+      //  We can only perform copy elision if the resulting merged values have
+      //  totally ordered live ranges; otherwise the merged buffer would have
+      //  live range interference.
+      if (IsHead(*dest)) {
+        // The copy copies an arbitrary value in the source buffer (call it s_x)
+        // and defines d_0, the first value in the destination buffer. After
+        // merging, the values in the combined buffer must be strictly ordered
+        // as follows** to elide the copy:
+        //
+        // {s_0, ..., s_x, d_1, ..., d_m, s_{x+1}, ..., s_n}
+        //
+        // Removing the copy eliminates d_0, and uses of d_0 become uses of
+        // s_x. In the above ordering, the live range of d_m must be ordered
+        // before the live range of s_{x+1} and the definition and all uses of
+        // s_x must be ordered before the definition of d_1. These conditions
+        // are checked below prior to elision.
+        //
+        // ** Technically it might be possible to have a non-interfering
+        //    non-trivial interleaving of the values of the source and
+        //    destination buffers in the resulting order. However, this case is
+        //    slow and complicated to check and likely not worth it. So instead
+        //    we simply check for the case where *all* values of the destination
+        //    buffer (d_1 through d_m) are spliced into the point where the copy
+        //    used to be.
+        VLOG(2) << copy->name() << " defines the first value in its buffer";
+        ValueNode* next_dest = Next(*dest);
+        if (next_dest != nullptr) {
+          // Live range of 'from' value (s_x) must be before 'next_dest' (d_1);
+          if (!is_live_range_before(*src, *next_dest)) {
+            return false;
           }
-          return;
         }
-        // We can't say anything about the ambiguity of 'other_instruction' at
-        // this point, so we need to check interference between the single
-        // buffer in the points-to set of 'instruction_' and all buffers in
-        // 'other_instruction_buffers'.
-        for (const LogicalBuffer* other_buffer : other_instruction_buffers) {
-          if (liveness.MayInterfere(*instruction_buffer, *other_buffer)) {
-            VLOG(2) << "Adding copy of buffer for instruction: "
-                    << instruction_->name()
-                    << " instruction_buffer: " << instruction_buffer->ToString()
-                    << " at index: " << tensorflow::str_util::Join(index, ",")
-                    << " because of interference with buffer: "
-                    << other_buffer->ToString();
-            RecordIndex(index);
-            break;
+        ValueNode* next_src = Next(*src);
+
+        if (next_src != nullptr) {
+          // Live range of 'last_dest' (d_m) must be before 'next_src' s_{x+1}.
+          ValueNode* last_dest = dest->prev;
+          DCHECK(IsTail(*last_dest));
+          if (!is_live_range_before(*last_dest, *next_src)) {
+            return false;
           }
         }
-      });
-  return Status::OK();
-}
 
-// This is called when 'instruction_' is a while body root, and 'parameter' is
-// the while body parameter. We record all users of all aliases of 'parameter'
-// as control predecessors, so that when we add a copy of 'instruction_', we can
-// mark the control dependencies. This is necessary because points-to and
-// liveness analysis doesn't know about the aliasing between the while body root
-// and param. Without these control dependencies, the copy might get scheduled
-// to run at a point that interferes with users of the buffer.
-Status InstructionCopier::RecordControlPredecessors(
-    const TuplePointsToAnalysis& points_to_analysis,
-    HloInstruction* parameter) {
-  return indices_to_copy_.ForEachElementWithStatus(
-      [this, &points_to_analysis, parameter](const ShapeIndex& index,
-                                             bool will_copy) {
-        if (will_copy) {
-          TF_ASSIGN_OR_RETURN(
-              const LogicalBuffer* buffer,
-              points_to_analysis.GetBufferDefinedAt(parameter, index));
-          for (const BufferAlias& alias :
-               points_to_analysis.GetBufferAliases(*buffer)) {
-            for (HloInstruction* user : alias.instruction()->users()) {
-              if (DoesNotUseOperandBuffer(alias.instruction(), alias.index(),
-                                          user, points_to_analysis)) {
-                continue;
-              }
-
-              if (user != instruction_) {
-                control_predecessors_.mutable_element(index)->push_back(user);
-              }
-            }
+        // Splice in destination buffer values list right after 'src'.
+        SpliceAfter(dest, src);
+      } else if (IsTail(*src)) {
+        // The copy copies the last value in the source buffer, s_n, and defines
+        // an arbitrary value in the destination buffer, d_y.  After
+        // merging, the values in the combined buffer must be strictly ordered
+        // as follows** to elide the copy:
+        //
+        // {d_0, ..., d_{y-1}, s_0, ..., s_n, d_{y+1}, ..., d_m}
+        //
+        // Removing the copy eliminates d_y, and uses of d_y become uses of
+        // s_n. To enforce the above order, the live range of d_{y-1} must be
+        // before the live range of s_0, and the live range of s_n must be
+        // before the live range of d_{y+1}.
+        //
+        // ** See comment above in the code handling Case (1).
+        VLOG(2) << copy->name() << " copies the last value ("
+                << src->value->ToShortString() << ") in its buffer";
+
+        ValueNode* prev_dest = Prev(*dest);
+        // nullptr condition handled above in the first 'if' case.
+        DCHECK(prev_dest != nullptr);
+        ValueNode* first_src = src->next;
+        DCHECK(IsHead(*first_src));
+        if (!is_live_range_before(*prev_dest, *first_src)) {
+          // Live range of value d_{y-1} is not before s_0.
+          return false;
+        }
+        ValueNode* next_dest = Next(*dest);
+        if (next_dest != nullptr) {
+          if (!is_live_range_before(*src, *next_dest)) {
+            // Live range of value s_n is not before d_{y+1}.
+            return false;
           }
         }
-        return Status::OK();
-      });
-}
 
-// Recursively inserts copies of 'instruction' tuple element buffers at
-// indices in 'indices_to_copy_', expanding tuples as needed.
-HloInstruction* InstructionCopier::CopyTuple(HloInstruction* instruction,
-                                             ShapeIndex* index) {
-  const int64 num_tuple_elements =
-      ShapeUtil::TupleElementCount(instruction->shape());
-  std::vector<HloInstruction*> elem_copies(num_tuple_elements);
-  for (int64 i = 0; i < num_tuple_elements; ++i) {
-    HloInstruction* elem;
-    if (instruction->opcode() == HloOpcode::kTuple) {
-      // If the instruction is already a Tuple instruction, we know that the
-      // element buffers are aliased, so we can just grab the operand directly.
-      elem = instruction->mutable_operand(i);
-    } else {
-      // Otherwise we need to add a GTE to unpack the element out of the tuple.
-      elem = instruction->parent()->AddInstruction(
-          HloInstruction::CreateGetTupleElement(
-              ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction,
-              i));
-    }
-    index->push_back(i);
-    if (ShapeUtil::IsTuple(elem->shape())) {
-      elem_copies[i] = CopyTuple(elem, index);
-    } else if (!indices_to_copy_.element(*index)) {
-      elem_copies[i] = elem;
-    } else if (HloInstruction* copy_override = GetCopyOverride(*index)) {
-      elem_copies[i] = copy_override;
-    } else {
-      HloInstruction* elem_copy = elem->parent()->AddInstruction(
-          HloInstruction::CreateUnary(elem->shape(), HloOpcode::kCopy, elem));
-      for (HloInstruction* control_predecessor :
-           control_predecessors_.element(*index)) {
-        VLOG(2) << "Adding control dependency from "
-                << control_predecessor->ToString() << " to "
-                << elem_copy->ToString();
-        TF_CHECK_OK(control_predecessor->AddControlDependencyTo(elem_copy));
+        // Splice source buffer values list right after 'prev_dest'.
+        SpliceAfter(first_src, prev_dest);
+      } else {
+        VLOG(2)
+            << copy->name()
+            << " copies value in middle of source buffer to value in middle "
+               "of destination buffer";
+        return false;
       }
-      elem_copies[i] = elem_copy;
+
+      RemoveCopyValue(dest);
+
+      XLA_VLOG_LINES(4, ToString());
+      TF_DCHECK_OK(Verify());
+
+      return true;
     }
-    index->pop_back();
-  }
-  return instruction->parent()->AddInstruction(
-      HloInstruction::CreateTuple(elem_copies));
-}
 
-// Inserts copies of 'instruction_' buffers at indices in 'indices_to_copy_'.
-HloInstruction* InstructionCopier::Copy() {
-  ShapeIndex index;
-  HloInstruction* copy;
-  if (ShapeUtil::IsTuple(instruction_->shape())) {
-    copy = CopyTuple(instruction_, &index);
-  } else {
-    copy = instruction_->parent()->AddInstruction(HloInstruction::CreateUnary(
-        instruction_->shape(), HloOpcode::kCopy, instruction_));
-  }
-  for (HloInstruction* user : copy_users_) {
-    VLOG(2) << "Adding copy between instruction: " << instruction_->name()
-            << " and user: " << user->name();
-    TF_CHECK_OK(instruction_->ReplaceUseWith(user, copy));
+    // Delete the given ValueNode associated with a elided kCopy
+    // instruction. This should be called after splicing the value lists of the
+    // source and destination buffers together.
+    void RemoveCopyValue(ValueNode* copy_value_node) {
+      CHECK_EQ(copy_value_node->value->defining_instruction()->opcode(),
+               HloOpcode::kCopy);
+      ValueNode* operand_node = copy_value_node->prev;
+      CHECK(operand_node != copy_value_node);
+
+      VLOG(2) << "Removing copy " << operand_node->value->ToShortString()
+              << " => " << copy_value_node->value->ToShortString();
+
+      // Splice out the copy value node.
+      operand_node->next = copy_value_node->next;
+      copy_value_node->next->prev = operand_node;
+
+      // Patch up uses. Remove use of copy from operand_node uses.
+      auto it =
+          std::find_if(operand_node->uses.begin(), operand_node->uses.end(),
+                       [copy_value_node](const HloUse* use) {
+                         return use->instruction ==
+                                copy_value_node->value->defining_instruction();
+                       });
+      CHECK(it != operand_node->uses.end());
+      operand_node->uses.erase(it);
+
+      // If the elided copy has any uses which are themselves kCopy instructions
+      // then patch up the copy info to reflect the that this kCopy instruction
+      // has a different operand (the operand of the elided copy).
+      for (const HloUse* copy_use : copy_value_node->uses) {
+        operand_node->uses.push_back(copy_use);
+        if (copy_use->instruction->opcode() == HloOpcode::kCopy) {
+          copy_map_.at(copy_use->instruction).src = operand_node;
+        }
+      }
+
+      // Delete the copy info and the value node.
+      copy_map_.erase(copy_value_node->value->defining_instruction());
+      delete copy_value_node;
+    }
+
+    // Returns true if the live range of given value 'a' is before the live
+    // range of 'b'.
+    //
+    // We cannot use LiveRangeStrictlyBefore because HloValue::uses() is not
+    // updated as copies are removed.
+    bool LiveRangeBefore(const ValueNode& a, const ValueNode& b) {
+      if (a.uses.empty()) {
+        VLOG(2) << "Empty uses";
+        return ordering_.IsDefinedBefore(*a.value, *b.value);
+      }
+      for (const HloUse* use : a.uses) {
+        VLOG(2) << "use: " << *use;
+        VLOG(2) << "is before:" << *b.value;
+        if (!ordering_.UseIsBeforeValueDefinition(*use, *b.value, dataflow_)) {
+          VLOG(2) << "Not before";
+          return false;
+        }
+      }
+      return true;
+    }
+
+    // Returns whether 'node' is the last node in its list.
+    bool IsTail(const ValueNode& node) const {
+      return ContainsKey(value_lists_, node.next);
+    }
+
+    // Returns whether 'node' is the first node in its list.
+    bool IsHead(const ValueNode& node) const {
+      return ContainsKey(value_lists_, &node);
+    }
+
+    // Returns the next node in the list after 'node'. If 'node' is the
+    // tail, then nullptr is returned.
+    ValueNode* Next(const ValueNode& node) const {
+      if (IsTail(node)) {
+        return nullptr;
+      } else {
+        return node.next;
+      }
+    }
+
+    // Returns the previous node in the list before 'node'. If 'node'
+    // is the head, then nullptr is returned.
+    ValueNode* Prev(const ValueNode& node) const {
+      if (IsHead(node)) {
+        return nullptr;
+      } else {
+        return node.prev;
+      }
+    }
+
+    // Splices the entire linked list with 'head' as its head right after the
+    // node 'insert_after' in another linked list.
+    void SpliceAfter(ValueNode* head, ValueNode* insert_after) {
+      DCHECK(IsHead(*head));
+      value_lists_.erase(head);
+
+      ValueNode* tail = head->prev;
+      tail->next = insert_after->next;
+      insert_after->next->prev = tail;
+
+      insert_after->next = head;
+      head->prev = insert_after;
+    }
+
+    string ValueListToString(const ValueNode* element) {
+      const ValueNode* head = element;
+      while (!IsHead(*head)) {
+        head = Prev(*head);
+      }
+      std::vector<const HloValue*> values;
+      for (const ValueNode* p = head; p != nullptr; p = Next(*p)) {
+        values.push_back(p->value);
+      }
+      return StrCat("{",
+                    Join(values, ", ",
+                         [](string* s, const HloValue* value) {
+                           StrAppend(s, value->ToShortString());
+                         }),
+                    "}");
+    }
+
+    string ToString() const {
+      string out = StrCat("BufferValueTracker:\n");
+      StrAppend(&out, "  Def-use chains in each buffer:\n");
+      for (const ValueNode* head : value_lists_) {
+        StrAppend(&out, "    Buffer defined by ", head->value->ToShortString(),
+                  ":\n");
+        const ValueNode* p = head;
+        do {
+          StrAppend(&out, "      ", p->value->ToShortString(), ", uses: ",
+                    Join(p->uses, "; ",
+                         [](string* s, const HloUse* use) {
+                           StrAppend(s, use->ToString());
+                         }),
+                    "\n");
+
+          p = p->next;
+        } while (p != head);
+      }
+      StrAppend(&out, "  Potentially removable copies:\n");
+      for (const auto& pair : copy_map_) {
+        const HloInstruction* copy = pair.first;
+        const CopyNodes& copy_info = pair.second;
+
+        StrAppend(&out, "    ", copy->name(), " : ",
+                  copy_info.src->value->ToShortString(), " => ",
+                  copy_info.dest->value->ToShortString(), "\n");
+      }
+      return out;
+    }
+
+   private:
+    const HloDataflowAnalysis& dataflow_;
+    const HloOrdering& ordering_;
+
+    // The heads of all the value lists. Each value list represents the HLO
+    // values contained in a particular HLO buffer. The values in the list are
+    // in dependency order.
+    tensorflow::gtl::FlatSet<const ValueNode*> value_lists_;
+
+    // Copy removal requires fast access to the value list elements
+    // corresponding to the source and destination values of the kCopy
+    // instruction. This data structure holds pointers to these elements for
+    // each kCopy instruction in the graph.
+    struct CopyNodes {
+      // The source and destinations values of the kCopy instruction.
+      ValueNode* src = nullptr;
+      ValueNode* dest = nullptr;
+    };
+    tensorflow::gtl::FlatMap<const HloInstruction*, CopyNodes> copy_map_;
+  };
+
+  HloModule* module_;
+  const HloAliasAnalysis& alias_analysis_;
+  const HloOrdering& ordering_;
+
+  // Object tracking the HLO values contained in each HLO buffer.
+  BufferValueTracker buffer_value_tracker_;
+};
+
+// Try to remove as many copies from the module as possible without introducing
+// live range interference. Copy instructions (identified by their unique id) in
+// the set copies_to_exclude are not considered for removal.
+Status RemoveUnnecessaryCopies(
+    const HloOrdering& ordering,
+    const tensorflow::gtl::FlatSet<int>& copies_to_exclude, HloModule* module) {
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloAliasAnalysis> alias_analysis,
+                      HloAliasAnalysis::Run(module));
+  CopyRemover copy_remover(*alias_analysis, ordering, module);
+  XLA_VLOG_LINES(3, copy_remover.ToString());
+
+  tensorflow::gtl::FlatSet<int> existing_copies;
+  for (HloComputation* computation : module->computations()) {
+    for (HloInstruction* instruction : computation->instructions()) {
+      if (instruction->opcode() == HloOpcode::kCopy &&
+          !ContainsKey(copies_to_exclude, instruction->unique_id())) {
+        TF_RETURN_IF_ERROR(copy_remover.TryElideCopy(instruction).status());
+      }
+    }
   }
-  return copy;
+
+  return Status::OK();
 }
 
-// The 'read_only_indices' are initialized based on points-to analysis on the
-// while body corresponding to 'while_hlo'. If the init buffer corresponding to
-// a read-only index aliases with a constant, it cannot be considered read-only,
-// and must be copied. This is necessary because BufferAssignment does not
-// currently assign an allocation for constants (b/32248867).
-// This function performs this fix-up of 'read_only_indices'.
+// Add copies to address special constraints on the roots of computations not
+// related to live range interference:
 //
-// Returns a ShapeTree of copy_overrides, which implements an optimization to
-// allow multiple while loops that share the same read-only constants to
-// share a single copy.
-StatusOr<ShapeTree<HloInstruction*>> RevertReadOnlyIndicesForConstants(
-    const HloInstruction* while_hlo,
-    const TuplePointsToAnalysis& points_to_analysis,
-    ShapeTree<bool>* read_only_indices,
-    FlatMap<const HloInstruction*, HloInstruction*>* shared_copies) {
-  const HloInstruction* init_hlo = while_hlo->operand(0);
-  const PointsToSet& points_to = points_to_analysis.GetPointsToSet(init_hlo);
-
-  // Mapping from LogicalBuffer to index (used to detect non-distinct indices).
-  FlatSet<const LogicalBuffer*> buffer_set;
-
-  ShapeTree<HloInstruction*> copy_overrides(init_hlo->shape());
-  points_to.ForEachElement([init_hlo, read_only_indices, shared_copies,
-                            &buffer_set, &copy_overrides](
-                               const ShapeIndex& index,
-                               const PointsToSet::BufferList& buffers) {
-    // Look for read-only entry parameters.
-    if (!read_only_indices->element(index)) {
-      return;
-    }
-    for (const LogicalBuffer* buffer : buffers) {
-      HloInstruction* pointee = buffer->instruction();
-      const bool is_constant = pointee->opcode() == HloOpcode::kConstant;
-      if (!is_constant) {
-        continue;
-      }
+//    (1) Entry computation root must be unambiguous and distinct.
+//
+//    (2) Any computation called by a kCall instruction must have an
+//        unambiguous root.
+//
+//    (3) Constants and parameters cannot be live out of the entry computation
+//
+Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) {
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloAliasAnalysis> alias_analysis,
+                      HloAliasAnalysis::Run(module));
+
+  // Identify which shape indices of which instructions need to be copied. Store
+  // these results in 'instructions_to_copy'.
+  std::unordered_map<HloInstruction*, ShapeTree<bool>> instructions_to_copy;
+  auto add_index_to_copy = [&instructions_to_copy](HloInstruction* instruction,
+                                                   const ShapeIndex& index) {
+    auto it = instructions_to_copy.find(instruction);
+    if (it == instructions_to_copy.end()) {
+      auto it_added = instructions_to_copy.emplace(
+          std::piecewise_construct, std::forward_as_tuple(instruction),
+          std::forward_as_tuple(instruction->shape(), /*init_value=*/false));
+      it = it_added.first;
+    }
+    *it->second.mutable_element(index) = true;
+  };
 
-      // We have found an constant that is read-only in
-      // the while body. These buffers are managed by the caller, and cannot
-      // be aliased with HLO buffers. Revert this read-only index,
-      // to allow it to be copied.
-      *read_only_indices->mutable_element(index) = false;
-
-      // Optimization to allow multiple while loops that share the same
-      // read-only entry constants to share a single copy.
-      // Only unambiguous and distinct array-shaped buffers are allowed, to
-      // reduce code complexity. The shape of the entry parameter must be
-      // identical to the shape of the init_hlo at this index, to ensure
-      // there were no intervening bitcast or GTE instructions, which are
-      // also hard to handle.
-      const Shape& pointee_shape = pointee->shape();
-      const Shape& init_shape =
-          ShapeUtil::GetSubshape(init_hlo->shape(), index);
-      if (buffers.size() == 1 && ShapeUtil::IsArray(pointee_shape) &&
-          ShapeUtil::Equal(pointee_shape, init_shape) &&
-          buffer_set.count(buffer) < 1) {
-        HloInstruction** copy = &(*shared_copies)[pointee];
-        if (*copy == nullptr) {
-          *copy = pointee->parent()->AddInstruction(HloInstruction::CreateUnary(
-              pointee_shape, HloOpcode::kCopy, pointee));
+  // Iterate through values of all constants and entry parameters. These values
+  // are special because they are held in read-only buffers. If any of these
+  // values share a buffer with other values (for example, the init value of a
+  // while is a constant) then copy the value at its definition and replace all
+  // its uses with the copy.
+  for (const HloValue* value : alias_analysis->dataflow_analysis().values()) {
+    if (ValueIsReadOnly(*value) &&
+        alias_analysis->GetBufferContainingValue(*value).values().size() > 1) {
+      VLOG(2) << "Value " << value->ToShortString()
+              << " is read only, but its buffer contains more than one value. "
+                 "Copying.";
+      add_index_to_copy(value->defining_instruction(), value->defining_index());
+    }
+  }
+
+  // Identify copies which must be added at root instructions
+  for (HloComputation* computation : module->computations()) {
+    const CallGraphNode& node = call_graph.GetNode(computation);
+    if (node.context() == CallContext::kParallel) {
+      continue;
+    }
+    TF_RET_CHECK(node.context() == CallContext::kSequential);
+
+    const bool is_entry = computation == module->entry_computation();
+    HloInstruction* root = computation->root_instruction();
+
+    // Mark nondistinct/ambiguous indices.
+    tensorflow::gtl::FlatSet<const HloBuffer*> seen;
+    ShapeUtil::ForEachSubshape(
+        root->shape(), [&](const Shape& /*subshape*/, const ShapeIndex& index) {
+          std::vector<const HloBuffer*> buffers_at_index =
+              alias_analysis->ComputeBuffersAt(root, index);
+          bool buffer_seen_before = false;
+          for (const HloBuffer* buffer : buffers_at_index) {
+            buffer_seen_before |= !seen.insert(buffer).second;
+          }
+          if (buffers_at_index.size() > 1 || (buffer_seen_before && is_entry)) {
+            VLOG(2) << "Index " << index << " of root of computation "
+                    << computation->name() << " (" << root->name()
+                    << ") has ambiguous or non-distinct buffer. Copying.";
+            add_index_to_copy(root, index);
+          }
+        });
+
+    // For entry instructions, mark any parameter or constant values.
+    if (is_entry) {
+      for (const auto& pair :
+           alias_analysis->dataflow_analysis().GetInstructionValueSet(root)) {
+        const ShapeIndex& index = pair.first;
+        const HloValueSet& value_set = pair.second;
+        for (const HloValue* value : value_set.values()) {
+          if (ValueIsReadOnly(*value)) {
+            VLOG(2) << "Root of entry computation (" << root->name()
+                    << ") has constant or entry parameter value at index "
+                    << index << ". Copying.";
+            add_index_to_copy(root, index);
+          }
         }
-        // Add the copy as an override.
-        *copy_overrides.mutable_element(index) = *copy;
       }
+    }
+  }
 
-      // Tracks whether this current buffer is distinct.
-      buffer_set.insert(buffer);
+  // Add copy instructions indicated in 'instructions_to_copy' to the module.
+  for (const auto& pair : instructions_to_copy) {
+    HloInstruction* instruction = pair.first;
+    const ShapeTree<bool>& indices_to_copy = pair.second;
 
-      // We've already reverted the read-only index and handled the
-      // single-copy optimization above, so there's nothing more to do.
-      break;
+    std::vector<HloInstruction*> users = instruction->users();
+    TF_ASSIGN_OR_RETURN(HloInstruction * deep_copy,
+                        instruction->parent()->DeepCopyInstruction(
+                            instruction, &indices_to_copy));
+    for (HloInstruction* user : users) {
+      TF_RETURN_IF_ERROR(instruction->ReplaceUseWith(user, deep_copy));
+    }
+    if (instruction == instruction->parent()->root_instruction()) {
+      instruction->parent()->set_root_instruction(deep_copy);
     }
-  });
-  return copy_overrides;
+  }
+
+  return Status::OK();
+}
+
+Status VerifyNoLiveRangeInterference(HloModule* module) {
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloAliasAnalysis> alias_analysis,
+                      HloAliasAnalysis::Run(module));
+  DependencyHloOrdering ordering(module);
+  TF_RET_CHECK(!alias_analysis->HasLiveRangeInterference(ordering));
+  return Status::OK();
 }
 
-}  // anonymous namespace
-
-// NOTE: This is only called by gpu::CopyInsertion. It's not called here in the
-// base class, since the regular CopyInsertion logic above selectively copies
-// tuple elements, while this method assumes all buffers need to be deep copied.
-StatusOr<HloInstruction*> CopyInsertion::FindOrInsertCopy(HloInstruction* hlo) {
-  auto copy_it = inserted_copies_.find(hlo);
-  if (copy_it == inserted_copies_.end()) {
-    HloInstruction* copy = hlo->parent()->DeepCopyInstruction(hlo).ValueOrDie();
-    inserted_copies_.insert({hlo, copy});
-    return copy;
-  } else {
-    return copy_it->second;
+void MaybeDumpModule(const string& message, const HloModule& module) {
+  if (VLOG_IS_ON(3)) {
+    VLOG(3) << message;
+    XLA_VLOG_LINES(3, module.ToString());
+    hlo_graph_dumper::MaybeDumpHloModule(module, message);
   }
 }
 
+}  // namespace
+
 StatusOr<bool> CopyInsertion::Run(HloModule* module) {
-  bool changed = false;
-  VLOG(2) << "CopyInsertion for module " << module->name();
+  // Copy insertion is performed in three steps:
+  //
+  // (1) Add copies conservatively to guarantee that there is no live-range
+  //     interference. This is done simplistically and usually results in more
+  //     copies than is strictly necessary.
+  //
+  // (2) Using a more fine-grained analysis, remove as many copies that were
+  //     added in (1) as possible while ensuring no live-range interference.
+  //
+  // (3) Add copies to resolve issues not related to live range interference
+  //     such as parameters and constants live out of the entry computation.
+  //
+  // We add copies then remove them (step (1) then (2)) rather than simply
+  // adding only the copies that are necessary because, in general, it is
+  // difficult to figure out the minimal set of copies to add once there is
+  // interference. On the other hand, it is easy to determine if removing a copy
+  // will introduce interference.
+  //
+  // The final copy insertion in (3) is done separately to simplify the
+  // implementation of copy removal in (2) which is the most complicated part of
+  // the pass. As is, copy removal only has to reason about live range
+  // interference. If all copies were added in step (1) then copy removal would
+  // also have to reason about things like constants and parameters live out of
+  // the computation.
+  MaybeDumpModule("before copy insertion", *module);
 
-  TF_ASSIGN_OR_RETURN(
-      std::unique_ptr<BufferLiveness> liveness,
-      BufferLiveness::Run(module, MakeUnique<DependencyHloOrdering>(module)));
-  const auto& points_to_analysis = liveness->points_to_analysis();
-  XLA_VLOG_LINES(2, points_to_analysis.ToString());
-  XLA_VLOG_LINES(2, module->ToString());
-
-  // Gather all while body computations and while instructions.
-  FlatSet<const HloComputation*> while_body_computations;
-  std::vector<HloInstruction*> while_instructions;
-  for (auto* computation : module->computations()) {
+  std::unique_ptr<CallGraph> call_graph = CallGraph::Build(module);
+  if (!call_graph->IsFlattened()) {
+    return FailedPrecondition(
+        "Call graph must be flattened before copy insertion.");
+  }
+
+  // Gather Ids of existing kCopy instructions in the module. We avoid removing
+  // these copies (except via DCE in TupleSimplifier) because they may have been
+  // added for reasons not considered by copy insertion (eg, layout assignment).
+  // Instruction id is used instead of HloInstruction* because the pointer
+  // values may be recycled.
+  tensorflow::gtl::FlatSet<int> existing_copies;
+  for (HloComputation* computation : module->computations()) {
     for (HloInstruction* instruction : computation->instructions()) {
-      if (instruction->opcode() == HloOpcode::kWhile) {
-        while_body_computations.insert(instruction->while_body());
-        while_instructions.push_back(instruction);
+      if (instruction->opcode() == HloOpcode::kCopy) {
+        existing_copies.insert(instruction->unique_id());
       }
     }
   }
 
-  // Collect instruction buffer indices to copy in 'instructions_to_copy'.
-  std::vector<InstructionCopier> instructions_to_copy;
-
-  // Add copies of computation root instructions, if needed.
-  FlatMap<const HloComputation*, ShapeTree<bool>> while_body_read_only_indices;
-  for (auto* computation : module->MakeNonfusionComputations()) {
-    VLOG(2) << "computation " << computation->name();
-    InstructionCopier root_copier(computation->root_instruction(),
-                                  /*copy_users=*/{});
-    if (while_body_computations.count(computation) > 0) {
-      // Record root indices to copy for while body sub-computations. We do not
-      // need to call RecordIndicesWhichPointToParamOrConstant for the while
-      // body root instruction here, because any necessary copies needed to
-      // avoid constants or parameters in the output are handled by while.init
-      // operand copy insertion below (which will share an allocation).
-      HloInstruction* while_body_param = computation->parameter_instruction(0);
-      ShapeTree<bool> read_only_indices(while_body_param->shape());
-      TF_RETURN_IF_ERROR(root_copier.RecordIndicesToCopyForColocatingBuffers(
-          *liveness, while_body_param, &read_only_indices));
-      while_body_read_only_indices[computation] = read_only_indices;
-
-      // Mark control predecessors, based on the body param, for any copies
-      // we'll be inserting. This ensures the copy doesn't run too early.
-      TF_RETURN_IF_ERROR(root_copier.RecordControlPredecessors(
-          points_to_analysis, while_body_param));
-    } else {
-      // Record root indices to copy for general computations.
-      TF_RETURN_IF_ERROR(root_copier.RecordIndicesWhichPointToParamOrConstant(
-          points_to_analysis));
+  TF_RETURN_IF_ERROR(AddCopiesToResolveInterference(module));
+
+  // Simplify the tuple structures introduced by the deep copies. This should be
+  // done before removing copies (RemoveUnnecessaryCopies) because tuple
+  // simplification changes dependencies in the graph which changes live range
+  // interference in the graph. Also run DCE to remove the dead Tuple/GTE
+  // instructions introduced by tuple simplification.
+  TupleSimplifier tuple_simplifier;
+  HloDCE dce;
+  TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status());
+  TF_RETURN_IF_ERROR(dce.Run(module).status());
+
+  TF_DCHECK_OK(VerifyNoLiveRangeInterference(module));
+
+  MaybeDumpModule("after adding copies to resolve interference", *module);
+
+  DependencyHloOrdering ordering(module);
+  TF_RETURN_IF_ERROR(
+      RemoveUnnecessaryCopies(ordering, existing_copies, module));
+
+  MaybeDumpModule("after removing unnecessary copies", *module);
+
+  TF_RETURN_IF_ERROR(AddSpecialCaseCopies(*call_graph, module));
+
+  MaybeDumpModule("after adding special-case copies", *module);
+
+  TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status());
+  TF_RETURN_IF_ERROR(dce.Run(module).status());
+  TF_DCHECK_OK(VerifyNoLiveRangeInterference(module));
+
+  MaybeDumpModule("after copy insertion", *module);
+
+  if (VLOG_IS_ON(1)) {
+    int64 num_total_copies = 0;
+    for (HloComputation* computation : module->computations()) {
+      for (HloInstruction* instruction : computation->instructions()) {
+        if (instruction->opcode() == HloOpcode::kCopy) {
+          num_total_copies++;
+        }
+      }
     }
-    instructions_to_copy.push_back(root_copier);
+    VLOG(1) << "Num copies before copy-insertion: " << existing_copies.size();
+    VLOG(1) << "Num copies after copy-insertion: " << num_total_copies;
   }
 
-  // Add copies of while 'init' operand instructions, if needed. 'shared_copies'
-  // is used to ensure that multiple while loops can share a single copy of the
-  // same entry parameter or constant, if all loops use it read-only.
-  //
-  // TODO(b/33301720) Remove redundant while instruction copies.
-  FlatMap<const HloInstruction*, HloInstruction*> shared_copies;
-  for (HloInstruction* while_hlo : while_instructions) {
-    // Fix read_only_indices to account for entry constants. Also
-    // initialize copy_overrides, which ensures a single copy for each read-only
-    // constant that is used in multiple while loops.
-    ShapeTree<bool>* read_only_indices =
-        &while_body_read_only_indices[while_hlo->while_body()];
-    TF_ASSIGN_OR_RETURN(
-        const ShapeTree<HloInstruction*> copy_overrides,
-        RevertReadOnlyIndicesForConstants(while_hlo, points_to_analysis,
-                                          read_only_indices, &shared_copies));
-    // Create InstructionCopier for init operand of while instruction.
-    HloInstruction* init_hlo = while_hlo->mutable_operand(0);
-    InstructionCopier init_copier(init_hlo, {while_hlo});
-    init_copier.SetReadOnlyIndices(*read_only_indices);
-    init_copier.SetCopyOverrides(copy_overrides);
-    // Record 'init' buffer indices which point-to a Constant or Parameter.
-    TF_RETURN_IF_ERROR(init_copier.RecordIndicesWhichPointToParamOrConstant(
-        points_to_analysis));
-    // Record indices necessary to colocate while and init operand buffers.
-    TF_RETURN_IF_ERROR(init_copier.RecordIndicesToCopyForColocatingBuffers(
-        *liveness, while_hlo, /*read_only_indices_out=*/nullptr));
-    instructions_to_copy.push_back(init_copier);
+  return true;
+}
+
+namespace {
+
+bool IsWhileBody(const HloComputation* computation,
+                 const CallGraph& call_graph) {
+  const CallGraphNode& node = call_graph.GetNode(computation);
+
+  if (node.context() == CallContext::kSequential &&
+      !node.caller_callsites().empty()) {
+    // Callgraph should be flattened so sequential context computations can
+    // have at most one caller.
+    CHECK_EQ(node.caller_callsites().size(), 1);
+    const HloInstruction* calling_instruction =
+        node.caller_callsites()[0].instruction();
+    if (calling_instruction->opcode() == HloOpcode::kWhile &&
+        calling_instruction->while_body() == node.computation()) {
+      return true;
+    }
   }
+  return false;
+}
 
-  for (InstructionCopier& to_copy : instructions_to_copy) {
-    if (to_copy.HasAllIndicesFalse()) {
+}  // namespace
+
+/* static */ StatusOr<bool> CopyInsertion::AddCopiesForBufferAssignment(
+    HloModule* module) {
+  std::unique_ptr<CallGraph> call_graph = CallGraph::Build(module);
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloDataflowAnalysis> dataflow,
+                      HloDataflowAnalysis::Run(module));
+
+  bool changed = false;
+
+  // If a buffer live out of a computation is a constant, a parameter, or not
+  // defined in the computation, then copy it to account for the limited
+  // computation-scoped analysis in buffer assignment. An exception to this rule
+  // is the while body which is handled properly without copies.
+  for (HloComputation* computation : module->computations()) {
+    if (computation == module->entry_computation() ||
+        IsWhileBody(computation, *call_graph)) {
       continue;
     }
-    changed = true;
 
-    // Copy instruction at recorded buffer indices.
-    HloComputation* computation = to_copy.instruction()->parent();
-    HloInstruction* copy = to_copy.Copy();
-    if (to_copy.instruction() == computation->root_instruction()) {
-      computation->set_root_instruction(copy);
+    HloInstruction* root = computation->root_instruction();
+    ShapeTree<bool> indices_to_copy(root->shape(), /*init_value=*/false);
+    bool copy_root = false;
+    for (const auto& pair : dataflow->GetInstructionValueSet(root)) {
+      const ShapeIndex& index = pair.first;
+      const HloValueSet& value_set = pair.second;
+      for (const HloValue* value : value_set.values()) {
+        HloInstruction* def = value->defining_instruction();
+        if (def->parent() != computation ||
+            def->opcode() == HloOpcode::kConstant ||
+            def->opcode() == HloOpcode::kParameter) {
+          *indices_to_copy.mutable_element(index) = true;
+          copy_root = true;
+        }
+      }
+    }
+    if (copy_root) {
+      TF_ASSIGN_OR_RETURN(
+          HloInstruction * root_copy,
+          computation->DeepCopyInstruction(root, &indices_to_copy));
+      computation->set_root_instruction(root_copy);
+      changed = true;
     }
   }
 
-  VLOG(3) << "After copy insertion for module " << module->name();
-  XLA_VLOG_LINES(3, module->ToString());
+  TupleSimplifier tuple_simplifier;
+  HloDCE dce;
+  TF_ASSIGN_OR_RETURN(bool tuple_simplifier_changed,
+                      tuple_simplifier.Run(module));
+  TF_ASSIGN_OR_RETURN(bool dce_changed, dce.Run(module));
 
-  return changed;
+  return changed || tuple_simplifier_changed || dce_changed;
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/copy_insertion.h b/tensorflow/compiler/xla/service/copy_insertion.h
index 28bb62e40c..65e3d31e34 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.h
+++ b/tensorflow/compiler/xla/service/copy_insertion.h
@@ -25,12 +25,25 @@ limitations under the License.
 
 namespace xla {
 
-// HLO pass which inserts a copy of the root instruction (creating a new root)
-// if the root is or points-to any constant or parameter instruction.
-// If the root instruction is a Tuple, only tuple elements which point to
-// constant or parameter instructions will be copied.
-// Copy insertion is necessary because constant and parameter arrays have
-// different lifetimes than computation results.
+// Copy insertion is a legalization HLO pass which inserts copies (kCopy
+// instructions) to eliminate several kinds of problems in the HLO module.
+//
+//   (1) Entry parameter or a constant live out of the entry computation.  Entry
+//       computation arguments and constants have different lifetimes than the
+//       computation result and cannot share the same allocation. Parameters and
+//       constants live out of non-entry computations do not need copies.
+//
+//   (2) Different values which are simultaneously live and which must be held
+//       in the same buffer. This can occur in while bodies. Specifically, the
+//       while loop state (the arguments to the while instruction) is updated
+//       in-place and the update may clobber the value from the previous
+//       iteration before the previous value is dead. Computations called from
+//       kCall instructions do not need such copies because kCall has no update
+//       in-place semantics.
+//
+//   (3) The buffer set of the root instruction of the entry computation must be
+//       unambiguous and distinct. That is, InstructionAliasSet::IsAmbiguous and
+//       InstructionAliasSet::IsDistinct return true.
 class CopyInsertion : public HloPassInterface {
  public:
   tensorflow::StringPiece name() const override { return "copy-insertion"; }
@@ -39,14 +52,16 @@ class CopyInsertion : public HloPassInterface {
   // (copies were inserted).
   StatusOr<bool> Run(HloModule* module) override;
 
- protected:
-  // Returns a copy of `hlo`. Looks in inserted_copies_ first to avoid making
-  // duplicate copies.
-  StatusOr<HloInstruction*> FindOrInsertCopy(HloInstruction* hlo);
-
-  // A map containing all copies inserted during the copy insertion pass. The
-  // key is the copied instruction and the value is the copy.
-  tensorflow::gtl::FlatMap<HloInstruction*, HloInstruction*> inserted_copies_;
+  // The CPU and GPU backend need additional copies added due to deficiencies in
+  // buffer assignment. Specifically, copies are needed for constants live-out
+  // of computations, and for values which are live-in and live-out of the same
+  // computation. These copies are needed because buffer-assignment uses a
+  // computation-scoped analyis (TuplePointsToAnalysis) and has limited
+  // visibility across computation boundaries. This method adds these necessary
+  // copies. Returns whether the module was modified.
+  //
+  // TODO(b/62548313): Remove this when buffer assignment is module-scoped.
+  static StatusOr<bool> AddCopiesForBufferAssignment(HloModule* module);
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index a2eacc5c7d..3278fd5f06 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -17,18 +17,19 @@ limitations under the License.
 
 #include <set>
 
+#include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h"
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_matchers.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
-#include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/test_helpers.h"
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 
 namespace op = xla::testing::opcode_matchers;
 
@@ -37,35 +38,53 @@ namespace {
 
 using ::testing::UnorderedElementsAre;
 
+int64 CountCopies(const HloComputation& computation) {
+  int64 count = 0;
+  for (const auto& instruction : computation.instructions()) {
+    if (instruction->opcode() == HloOpcode::kCopy) {
+      count++;
+    }
+  }
+  return count;
+}
+
+int64 CountCopies(const HloModule& module) {
+  int64 count = 0;
+  for (const auto& computation : module.computations()) {
+    count += CountCopies(*computation);
+  }
+  return count;
+}
+
+int64 CountControlEdges(const HloComputation& computation) {
+  int64 count = 0;
+  for (const auto& instruction : computation.instructions()) {
+    count += instruction->control_successors().size();
+  }
+  return count;
+}
+
+int64 CountControlEdges(const HloModule& module) {
+  int64 count = 0;
+  for (const auto& computation : module.computations()) {
+    count += CountControlEdges(*computation);
+  }
+  return count;
+}
+
 class CopyInsertionTest : public HloTestBase {
  protected:
   void InsertCopies(HloModule* module) {
     CopyInsertion copy_insertion;
-    EXPECT_IS_OK(copy_insertion.Run(module).status());
-
-    // Verify the points to set of the root of the computation after copy
-    // insertion contains no constants or parameters, and is distinct and
-    // non-ambiguous.
-    auto points_to_analysis =
-        TuplePointsToAnalysis::Run(module).ConsumeValueOrDie();
-    const auto& points_to = points_to_analysis->GetPointsToSet(
-        module->entry_computation()->root_instruction());
-    EXPECT_TRUE(points_to.IsDistinct());
-    EXPECT_TRUE(!points_to.IsAmbiguous());
-
-    auto maybe_live_out_buffers =
-        points_to_analysis
-            ->GetPointsToSet(module->entry_computation()->root_instruction())
-            .CreateFlattenedSet();
-
-    for (const LogicalBuffer* buffer : maybe_live_out_buffers) {
-      EXPECT_NE(buffer->instruction()->opcode(), HloOpcode::kConstant);
-      EXPECT_NE(buffer->instruction()->opcode(), HloOpcode::kParameter);
-    }
+    ASSERT_IS_OK(copy_insertion.Run(module).status());
   }
+
+  const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {});
 };
 
 TEST_F(CopyInsertionTest, SingleParameter) {
+  // Computation is a single parameter passed into a tuple. The parameter should
+  // be copied before entering the tuple.
   auto builder = HloComputation::Builder(TestName());
   HloInstruction* x = builder.AddInstruction(
       HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(F32, {}), "x"));
@@ -77,14 +96,15 @@ TEST_F(CopyInsertionTest, SingleParameter) {
   auto module = CreateNewModule();
   module->AddEntryComputation(builder.Build());
 
-  HloInstruction* old_root = module->entry_computation()->root_instruction();
   InsertCopies(module.get());
 
   EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::Copy(old_root->operand(0))));
+              op::Tuple(op::Copy(x)));
 }
 
 TEST_F(CopyInsertionTest, SingleConstant) {
+  // Computation is a single constant passed into a tuple. The parameter should
+  // be copied before entering the tuple.
   auto builder = HloComputation::Builder(TestName());
   HloInstruction* constant = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
@@ -96,11 +116,42 @@ TEST_F(CopyInsertionTest, SingleConstant) {
   auto module = CreateNewModule();
   module->AddEntryComputation(builder.Build());
 
-  HloInstruction* old_root = module->entry_computation()->root_instruction();
   InsertCopies(module.get());
+  EXPECT_EQ(CountCopies(*module), 1);
 
   EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::Copy(old_root->operand(0))));
+              op::Tuple(op::Copy(constant)));
+}
+
+TEST_F(CopyInsertionTest, ExistingCopiesNotRemoved) {
+  // Verify that an kCopy instructions which exist in the pass before
+  // copy-insertion remain in the graph after copy-insertion.
+  auto module = CreateNewModule();
+
+  auto builder = HloComputation::Builder(TestName());
+  HloInstruction* constant = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
+  HloInstruction* copy_1 = builder.AddInstruction(HloInstruction::CreateUnary(
+      constant->shape(), HloOpcode::kCopy, constant));
+  HloInstruction* copy_2 = builder.AddInstruction(HloInstruction::CreateUnary(
+      constant->shape(), HloOpcode::kCopy, constant));
+  HloInstruction* add = builder.AddInstruction(HloInstruction::CreateBinary(
+      constant->shape(), HloOpcode::kAdd, copy_1, copy_2));
+  HloInstruction* add_copy = builder.AddInstruction(
+      HloInstruction::CreateUnary(constant->shape(), HloOpcode::kCopy, add));
+
+  module->AddEntryComputation(builder.Build());
+
+  EXPECT_EQ(CountCopies(*module), 3);
+
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 3);
+
+  EXPECT_EQ(module->entry_computation()->root_instruction(), add_copy);
+  EXPECT_THAT(
+      module->entry_computation()->root_instruction(),
+      op::Copy(op::Add(op::Copy(op::Constant()), op::Copy(op::Constant()))));
 }
 
 TEST_F(CopyInsertionTest, MultipleConstantsAndParameters) {
@@ -127,12 +178,12 @@ TEST_F(CopyInsertionTest, MultipleConstantsAndParameters) {
   auto module = CreateNewModule();
   module->AddEntryComputation(builder.Build());
 
-  HloInstruction* old_root = module->entry_computation()->root_instruction();
   InsertCopies(module.get());
+  EXPECT_EQ(CountCopies(*module), 2);
 
-  EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::Copy(old_root->operand(0)),
-                        op::Copy(old_root->operand(1)), old_root->operand(2)));
+  EXPECT_THAT(
+      module->entry_computation()->root_instruction(),
+      op::Tuple(op::Copy(constant2), op::Copy(x), op::Add(constant1, y)));
 }
 
 TEST_F(CopyInsertionTest, AmbiguousPointsToSet) {
@@ -165,6 +216,7 @@ TEST_F(CopyInsertionTest, AmbiguousPointsToSet) {
 
   HloInstruction* old_root = module->entry_computation()->root_instruction();
   InsertCopies(module.get());
+  EXPECT_EQ(CountCopies(*module), 2);
 
   EXPECT_THAT(module->entry_computation()->root_instruction(),
               op::Tuple(op::Copy(op::GetTupleElement(old_root)),
@@ -187,6 +239,7 @@ TEST_F(CopyInsertionTest, BitcastParameter) {
 
   HloInstruction* old_root = module->entry_computation()->root_instruction();
   InsertCopies(module.get());
+  EXPECT_EQ(CountCopies(*module), 1);
 
   EXPECT_THAT(module->entry_computation()->root_instruction(),
               op::Copy(old_root));
@@ -208,6 +261,7 @@ TEST_F(CopyInsertionTest, BitcastConstant) {
 
   HloInstruction* old_root = module->entry_computation()->root_instruction();
   InsertCopies(module.get());
+  EXPECT_EQ(CountCopies(*module), 1);
 
   EXPECT_THAT(module->entry_computation()->root_instruction(),
               op::Copy(old_root));
@@ -227,11 +281,11 @@ TEST_F(CopyInsertionTest, BitcastTupleElementParameter) {
 
   EXPECT_THAT(x->users(), UnorderedElementsAre(bitcast));
 
-  HloInstruction* old_root = module->entry_computation()->root_instruction();
   InsertCopies(module.get());
+  EXPECT_EQ(CountCopies(*module), 1);
 
   EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::Copy(old_root->operand(0))));
+              op::Tuple(op::Copy(bitcast)));
 }
 
 TEST_F(CopyInsertionTest, NestedTupleParameter) {
@@ -257,6 +311,8 @@ TEST_F(CopyInsertionTest, NestedTupleParameter) {
 
   HloInstruction* old_root = module->entry_computation()->root_instruction();
   InsertCopies(module.get());
+  EXPECT_EQ(CountCopies(*module), 3);
+
   HloInstruction* new_root = module->entry_computation()->root_instruction();
   EXPECT_NE(old_root, new_root);
 
@@ -293,12 +349,13 @@ TEST_F(CopyInsertionTest, ElementOfNestedTupleParameter) {
 
   EXPECT_EQ(gte, module->entry_computation()->root_instruction());
 
-  HloInstruction* old_root = module->entry_computation()->root_instruction();
   InsertCopies(module.get());
+  EXPECT_EQ(CountCopies(*module), 2);
 
-  EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::Copy(op::GetTupleElement(old_root)),
-                        op::Copy(op::GetTupleElement(old_root))));
+  EXPECT_THAT(
+      module->entry_computation()->root_instruction(),
+      op::Tuple(op::Copy(op::GetTupleElement(op::GetTupleElement(param))),
+                op::Copy(op::GetTupleElement(op::GetTupleElement(param)))));
 }
 
 TEST_F(CopyInsertionTest, AmbiguousTopLevelRoot) {
@@ -331,6 +388,7 @@ TEST_F(CopyInsertionTest, AmbiguousTopLevelRoot) {
 
   HloInstruction* old_root = module->entry_computation()->root_instruction();
   InsertCopies(module.get());
+  EXPECT_EQ(CountCopies(*module), 1);
 
   EXPECT_THAT(module->entry_computation()->root_instruction(),
               op::Copy(old_root));
@@ -346,12 +404,10 @@ class WhileCopyInsertionTest : public CopyInsertionTest {
   // The parameter 'nested' specifies the loop state shape from which to
   // read the induction variable.
   std::unique_ptr<HloComputation> BuildConditionComputation(
-      bool nested = false) {
+      const Shape& loop_state_shape) {
     auto builder = HloComputation::Builder(TestName() + ".Condition");
     auto limit_const = builder.AddInstruction(
         HloInstruction::CreateConstant(Literal::CreateR0<int32>(10)));
-    const Shape& loop_state_shape =
-        nested ? nested_loop_state_shape_ : loop_state_shape_;
     auto loop_state = builder.AddInstruction(
         HloInstruction::CreateParameter(0, loop_state_shape, "loop_state"));
     auto induction_variable =
@@ -582,7 +638,7 @@ class WhileCopyInsertionTest : public CopyInsertionTest {
       auto loop_state_init = builder.AddInstruction(
           HloInstruction::CreateTuple({induction_var_init, inner_init}));
       auto while_hlo = builder.AddInstruction(HloInstruction::CreateWhile(
-          loop_state_shape_, condition, body, loop_state_init));
+          loop_state_init->shape(), condition, body, loop_state_init));
       module_->AddEntryComputation(builder.Build());
       return while_hlo;
     }
@@ -658,11 +714,28 @@ class WhileCopyInsertionTest : public CopyInsertionTest {
     auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant(
         Literal::CreateR1<float>({1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f})));
     // Take a reference to 'data_init' to make it interfere with while result.
-    builder.AddInstruction(HloInstruction::CreateBinary(
+    auto add = builder.AddInstruction(HloInstruction::CreateBinary(
         data_shape_, HloOpcode::kAdd, data_init, one_vec));
 
-    return BuildWhileInstructionWithCustomInit(loop_state_shape_, data_init,
-                                               &builder);
+    auto xla_while = BuildWhileInstructionWithCustomInit(loop_state_shape_,
+                                                         data_init, &builder);
+
+    // Add an additional binary operation operating on the while and the
+    // interfering add so that neither operation is dead.
+    auto gte = xla_while->parent()->AddInstruction(
+        HloInstruction::CreateGetTupleElement(
+            ShapeUtil::GetSubshape(xla_while->shape(), {1}), xla_while, 1));
+    auto sub = xla_while->parent()->AddInstruction(HloInstruction::CreateBinary(
+        data_shape_, HloOpcode::kSubtract, add, gte));
+    auto gte0 = xla_while->parent()->AddInstruction(
+        HloInstruction::CreateGetTupleElement(
+            ShapeUtil::GetSubshape(xla_while->shape(), {0}), xla_while, 0));
+    auto tuple = xla_while->parent()->AddInstruction(
+        HloInstruction::CreateTuple({gte0, sub}));
+
+    xla_while->parent()->set_root_instruction(tuple);
+
+    return xla_while;
   }
 
   HloInstruction* BuildWhileInstructionWithCustomInit(
@@ -672,8 +745,8 @@ class WhileCopyInsertionTest : public CopyInsertionTest {
         ShapeUtil::Equal(loop_state_shape, nested_loop_state_shape_);
     auto induction_var_init = builder->AddInstruction(
         HloInstruction::CreateConstant(Literal::CreateR0<int32>(0)));
-    auto condition =
-        module_->AddEmbeddedComputation(BuildConditionComputation(nested));
+    auto condition = module_->AddEmbeddedComputation(
+        BuildConditionComputation(loop_state_shape));
     auto body = module_->AddEmbeddedComputation(
         BuildIndependentBodyComputation(nested));
     auto loop_state_init = builder->AddInstruction(
@@ -706,23 +779,21 @@ class WhileCopyInsertionTest : public CopyInsertionTest {
 // CopyInsertion pass should not generate any copies.
 //
 TEST_F(WhileCopyInsertionTest, IndependentTupleElements) {
-  auto condition = module_->AddEmbeddedComputation(BuildConditionComputation());
+  auto condition = module_->AddEmbeddedComputation(
+      BuildConditionComputation(loop_state_shape_));
   auto body =
       module_->AddEmbeddedComputation(BuildIndependentBodyComputation());
   auto while_hlo = BuildWhileInstruction(condition, body);
 
-  const HloInstruction* old_init = while_hlo->operand(0);
-  HloInstruction* old_root = body->root_instruction();
   InsertCopies(module_.get());
-  HloInstruction* new_root = body->root_instruction();
-  const HloInstruction* new_init = while_hlo->operand(0);
 
-  // No copies should be inserted so root should not be updated.
-  EXPECT_EQ(old_root, new_root);
+  // Body should have no copies as the adds can be done inplace.
+  EXPECT_EQ(CountCopies(*body), 0);
+  EXPECT_EQ(CountControlEdges(*module_), 0);
 
-  // Both init indices need copies.
-  EXPECT_THAT(new_init, op::Tuple(op::Copy(old_init->operand(0)),
-                                  op::Copy(old_init->operand(1))));
+  // Both init indices need copies as they are constants.
+  EXPECT_THAT(while_hlo->operand(0),
+              op::Tuple(op::Copy(op::Constant()), op::Copy(op::Constant())));
 }
 
 // Tests while body computation with dependent tuple elements:
@@ -737,20 +808,33 @@ TEST_F(WhileCopyInsertionTest, IndependentTupleElements) {
 //     Tuple(Copy(out0), out1)
 //
 TEST_F(WhileCopyInsertionTest, DependentTupleElements) {
-  auto condition = module_->AddEmbeddedComputation(BuildConditionComputation());
+  auto condition = module_->AddEmbeddedComputation(
+      BuildConditionComputation(loop_state_shape_));
   auto body = module_->AddEmbeddedComputation(BuildDependentBodyComputation());
   auto while_hlo = BuildWhileInstruction(condition, body);
 
-  const HloInstruction* old_init = while_hlo->operand(0);
-  HloInstruction* old_root = body->root_instruction();
   InsertCopies(module_.get());
-  HloInstruction* new_root = body->root_instruction();
-  const HloInstruction* new_init = while_hlo->operand(0);
 
-  EXPECT_THAT(new_root,
-              op::Tuple(op::Copy(old_root->operand(0)), old_root->operand(1)));
-  EXPECT_THAT(new_init, op::Tuple(op::Copy(old_init->operand(0)),
-                                  op::Copy(old_init->operand(1))));
+  EXPECT_EQ(CountCopies(*body), 1);
+  EXPECT_EQ(CountControlEdges(*body), 0);
+
+  EXPECT_THAT(
+      body->root_instruction(),
+      op::Tuple(op::Add(), op::Add(op::GetTupleElement(), op::Broadcast())));
+
+  auto add = body->root_instruction()->operand(0);
+  auto bcast = body->root_instruction()->operand(1)->operand(1);
+  ASSERT_EQ(add->opcode(), HloOpcode::kAdd);
+  ASSERT_EQ(bcast->opcode(), HloOpcode::kBroadcast);
+
+  EXPECT_THAT(
+      while_hlo->while_body()->root_instruction(),
+      op::Tuple(op::Add(op::Copy(), op::Constant()),
+                op::Add(op::GetTupleElement(), op::Broadcast(op::Copy()))));
+
+  // Both init indices need copies as they are constants.
+  EXPECT_THAT(while_hlo->operand(0),
+              op::Tuple(op::Copy(op::Constant()), op::Copy(op::Constant())));
 }
 
 // Tests while body computation with read-only tuple element 0:
@@ -768,33 +852,26 @@ TEST_F(WhileCopyInsertionTest, DependentTupleElements) {
 //
 // CopyInsertion pass should not generate any copies for the while body.
 TEST_F(WhileCopyInsertionTest, DependentTupleElements_OneReadOnly) {
-  auto condition = module_->AddEmbeddedComputation(BuildConditionComputation());
+  auto condition = module_->AddEmbeddedComputation(
+      BuildConditionComputation(loop_state_shape_));
   auto body = module_->AddEmbeddedComputation(
       BuildDependentBodyOneReadOnlyComputation());
-  auto while_hlo = BuildWhileInstruction(condition, body);
+  BuildWhileInstruction(condition, body);
 
-  const HloInstruction* old_init = while_hlo->operand(0);
-  HloInstruction* old_root = body->root_instruction();
   InsertCopies(module_.get());
-  HloInstruction* new_root = body->root_instruction();
-  const HloInstruction* new_init = while_hlo->operand(0);
-
-  // No copies should be inserted in the body, so root should not be updated.
-  EXPECT_EQ(old_root, new_root);
 
-  // Both indices need copies, even though Index 0 is read-only, since both are
-  // constants, which must be copied.
-  EXPECT_THAT(new_init, op::Tuple(op::Copy(old_init->operand(0)),
-                                  op::Copy(old_init->operand(1))));
+  // No copies or control edges should be inserted. The body is legal as is.
+  EXPECT_EQ(CountCopies(*body), 0);
+  EXPECT_EQ(CountControlEdges(*body), 0);
 }
 
 // Same as above, but with two while loops, sharing entry parameters.
 TEST_F(WhileCopyInsertionTest,
        DependentTupleElements_OneReadOnly_TwoLoops_EntryParams) {
-  auto condition1 =
-      module_->AddEmbeddedComputation(BuildConditionComputation());
-  auto condition2 =
-      module_->AddEmbeddedComputation(BuildConditionComputation());
+  auto condition1 = module_->AddEmbeddedComputation(
+      BuildConditionComputation(loop_state_shape_));
+  auto condition2 = module_->AddEmbeddedComputation(
+      BuildConditionComputation(loop_state_shape_));
   auto body1 = module_->AddEmbeddedComputation(
       BuildDependentBodyOneReadOnlyComputation());
   auto body2 = module_->AddEmbeddedComputation(
@@ -812,30 +889,46 @@ TEST_F(WhileCopyInsertionTest,
       loop_state_shape_, condition1, body1, loop_init));
   auto while_hlo2 = builder.AddInstruction(HloInstruction::CreateWhile(
       loop_state_shape_, condition2, body2, loop_init));
-  module_->AddEntryComputation(builder.Build());
+
+  // Add a couple elements from each of the while so both whiles are live.
+  auto gte1 = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
+      ShapeUtil::GetSubshape(while_hlo1->shape(), {0}), while_hlo1, 0));
+  auto gte2 = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
+      ShapeUtil::GetSubshape(while_hlo2->shape(), {0}), while_hlo2, 0));
+  builder.AddInstruction(
+      HloInstruction::CreateBinary(gte1->shape(), HloOpcode::kAdd, gte1, gte2));
+
+  auto entry = module_->AddEntryComputation(builder.Build());
 
   InsertCopies(module_.get());
 
-  // Both while loops alias iter_param, since index 0 is read-only in the body.
-  EXPECT_EQ(while_hlo1->operand(0)->operand(0),
-            while_hlo2->operand(0)->operand(0));
-  EXPECT_EQ(while_hlo1->operand(0)->operand(0), iter_param);
+  // Neither body should have any copies or control edges in them.
+  EXPECT_EQ(CountCopies(*body1), 0);
+  EXPECT_EQ(CountCopies(*body2), 0);
+  EXPECT_EQ(CountControlEdges(*body1), 0);
+  EXPECT_EQ(CountControlEdges(*body2), 0);
 
-  // Each while loop gets its own copy of data_param, since index 1 is not
-  // read-only in the body.
+  // Only two copies should be necessary. Each of the whiles should have
+  // a copy of tuple element 1 (init value is a parameter, and the element is
+  // not non-read-only) so each of the while bodies gets its own buffer to write
+  // element 1 into.
+  EXPECT_EQ(CountCopies(*entry), 2);
+
+  EXPECT_EQ(while_hlo1->operand(0)->operand(1)->opcode(), HloOpcode::kCopy);
+  EXPECT_EQ(while_hlo2->operand(0)->operand(1)->opcode(), HloOpcode::kCopy);
+
+  // The two copies of element 1 should be different.
   EXPECT_NE(while_hlo1->operand(0)->operand(1),
             while_hlo2->operand(0)->operand(1));
-  EXPECT_THAT(while_hlo1->operand(0)->operand(1), op::Copy(data_param));
-  EXPECT_THAT(while_hlo2->operand(0)->operand(1), op::Copy(data_param));
 }
 
 // Same as above, but with two while loops, sharing non-parameters.
 TEST_F(WhileCopyInsertionTest,
        DependentTupleElements_OneReadOnly_TwoLoops_NonParams) {
-  auto condition1 =
-      module_->AddEmbeddedComputation(BuildConditionComputation());
-  auto condition2 =
-      module_->AddEmbeddedComputation(BuildConditionComputation());
+  auto condition1 = module_->AddEmbeddedComputation(
+      BuildConditionComputation(loop_state_shape_));
+  auto condition2 = module_->AddEmbeddedComputation(
+      BuildConditionComputation(loop_state_shape_));
   auto body1 = module_->AddEmbeddedComputation(
       BuildDependentBodyOneReadOnlyComputation());
   auto body2 = module_->AddEmbeddedComputation(
@@ -858,21 +951,28 @@ TEST_F(WhileCopyInsertionTest,
       loop_state_shape_, condition1, body1, loop_init));
   auto while_hlo2 = builder.AddInstruction(HloInstruction::CreateWhile(
       loop_state_shape_, condition2, body2, loop_init));
-  module_->AddEntryComputation(builder.Build());
+
+  // Add a couple elements from each of the while so both whiles are not dead.
+  auto gte1 = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
+      ShapeUtil::GetSubshape(while_hlo1->shape(), {0}), while_hlo1, 0));
+  auto gte2 = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
+      ShapeUtil::GetSubshape(while_hlo2->shape(), {0}), while_hlo2, 0));
+  builder.AddInstruction(
+      HloInstruction::CreateBinary(gte1->shape(), HloOpcode::kAdd, gte1, gte2));
+  auto entry = module_->AddEntryComputation(builder.Build());
 
   InsertCopies(module_.get());
 
-  // No copies of iter_value are necessary, since index 0 is read-only in both
-  // while bodies.
-  EXPECT_EQ(while_hlo1->operand(0)->operand(0), iter_value);
-  EXPECT_EQ(while_hlo2->operand(0)->operand(0), iter_value);
+  // Ideally only one copy should be necessary. One of the whiles should
+  // have a copy of tuple element 1 (the non-read-only element) so each of the
+  // while bodies gets its own buffer to write element 1 into. However, the
+  // analysis isn't perfect and adds an additional copy of element 0.
+  EXPECT_EQ(CountCopies(*entry), 2);
 
-  // Each while loop gets its own copy of data_value, since index 1 is not
-  // read-only in the body.
-  EXPECT_NE(while_hlo1->operand(0)->operand(1),
-            while_hlo2->operand(0)->operand(1));
-  EXPECT_THAT(while_hlo1->operand(0)->operand(1), op::Copy(data_value));
-  EXPECT_THAT(while_hlo2->operand(0)->operand(1), op::Copy(data_value));
+  EXPECT_THAT(while_hlo1->operand(0),
+              op::Tuple(op::Exp(), op::Copy(op::Exp())));
+  EXPECT_THAT(while_hlo2->operand(0),
+              op::Tuple(op::Exp(), op::Copy(op::Exp())));
 }
 
 // Tests while body computation with nested tuple elements:
@@ -905,18 +1005,34 @@ TEST_F(WhileCopyInsertionTest,
 //                     Tuple  // new root
 //
 TEST_F(WhileCopyInsertionTest, NestedTupleElements) {
-  auto condition =
-      module_->AddEmbeddedComputation(BuildConditionComputation(true));
+  auto condition = module_->AddEmbeddedComputation(
+      BuildConditionComputation(nested_loop_state_shape_));
   auto body = module_->AddEmbeddedComputation(BuildNestedBodyComputation());
   BuildWhileInstruction(condition, body, true);
 
-  HloInstruction* old_root = body->root_instruction();
+  //  HloInstruction* old_root = body->root_instruction();
   InsertCopies(module_.get());
 
-  EXPECT_THAT(body->root_instruction(),
-              op::Tuple(old_root->operand(0),
-                        op::Tuple(old_root->operand(1)->operand(0),
-                                  op::Copy(old_root->operand(1)->operand(1)))));
+  // The only copy necessary is for the kReverse as it cannot be done
+  // in-place (instruction can share buffer with operand). The other elements of
+  // the loop state are kAdd instructions which can be done in-place.
+  EXPECT_EQ(CountCopies(*body), 1);
+
+  // Each element of the init needs a copy as all are constants.
+  EXPECT_EQ(CountCopies(*module_), 4);
+
+  // Either the kReverse itself must be copied or the operand of the kReverse
+  // must be copied.
+  if (body->root_instruction()->operand(1)->operand(1)->opcode() ==
+      HloOpcode::kCopy) {
+    EXPECT_THAT(
+        body->root_instruction(),
+        op::Tuple(op::Add(), op::Tuple(op::Add(), op::Copy(op::Reverse()))));
+  } else {
+    EXPECT_THAT(
+        body->root_instruction(),
+        op::Tuple(op::Add(), op::Tuple(op::Add(), op::Reverse(op::Copy()))));
+  }
 }
 
 // Tests while init instruction which points-to a constant.
@@ -927,11 +1043,13 @@ TEST_F(WhileCopyInsertionTest, NestedTupleElements) {
 //
 TEST_F(WhileCopyInsertionTest, InitPointsToConstant) {
   auto while_hlo = BuildWhileInstruction_InitPointsToConstant();
-  auto old_init = while_hlo->operand(0);
+
   InsertCopies(module_.get());
+  EXPECT_EQ(CountCopies(*while_hlo->while_body()), 0);
+  EXPECT_EQ(CountCopies(*module_), 2);
 
-  EXPECT_THAT(while_hlo->operand(0), op::Tuple(op::Copy(old_init->operand(0)),
-                                               op::Copy(old_init->operand(1))));
+  EXPECT_THAT(while_hlo->operand(0),
+              op::Tuple(op::Copy(op::Constant()), op::Copy(op::Constant())));
 }
 
 // Tests while init instruction which points-to a parameter.
@@ -942,11 +1060,13 @@ TEST_F(WhileCopyInsertionTest, InitPointsToConstant) {
 //
 TEST_F(WhileCopyInsertionTest, InitPointsToParameter) {
   auto while_hlo = BuildWhileInstruction_InitPointsToParameter();
-  auto old_init = while_hlo->operand(0);
+
   InsertCopies(module_.get());
+  EXPECT_EQ(CountCopies(*while_hlo->while_body()), 0);
+  EXPECT_EQ(CountCopies(*module_), 2);
 
-  EXPECT_THAT(while_hlo->operand(0), op::Tuple(op::Copy(old_init->operand(0)),
-                                               op::Copy(old_init->operand(1))));
+  EXPECT_THAT(while_hlo->operand(0),
+              op::Tuple(op::Copy(op::Constant()), op::Copy(op::Parameter())));
 }
 
 // Tests while init instruction which has an ambiguous points-to set.
@@ -975,15 +1095,34 @@ TEST_F(WhileCopyInsertionTest, InitPointsToParameter) {
 //
 TEST_F(WhileCopyInsertionTest, InitPointsToAmbiguous) {
   auto while_hlo = BuildWhileInstruction_InitPointsToAmbiguous();
-  auto old_init = while_hlo->operand(0);
-  InsertCopies(module_.get());
 
-  EXPECT_THAT(
-      while_hlo->operand(0),
-      op::Tuple(
-          op::Copy(old_init->operand(0)),
-          op::Tuple(op::Copy(op::GetTupleElement(old_init->operand(1))),
-                    op::Copy(op::GetTupleElement(old_init->operand(1))))));
+  InsertCopies(module_.get());
+  EXPECT_EQ(CountCopies(*module_), 4);
+  // The entry computation requires three copies to resolve the ambiguity of two
+  // init elements and the constant passed in as one of the init elements.
+  EXPECT_EQ(CountCopies(*module_->entry_computation()), 3);
+  EXPECT_THAT(while_hlo->operand(0),
+              op::Tuple(op::Copy(op::Constant()),
+                        op::Tuple(op::Copy(op::GetTupleElement()),
+                                  op::Copy(op::GetTupleElement()))));
+
+  // The body requires one copy because the buffer set is not distinct: the
+  // result of one of the adds is written into two elements of the output of the
+  // loop body. Either element might be copied.
+  EXPECT_EQ(CountCopies(*while_hlo->while_body()), 1);
+  if (while_hlo->while_body()
+          ->root_instruction()
+          ->operand(1)
+          ->operand(0)
+          ->opcode() == HloOpcode::kCopy) {
+    EXPECT_THAT(
+        while_hlo->while_body()->root_instruction(),
+        op::Tuple(op::Add(), op::Tuple(op::Copy(op::Add()), op::Add())));
+  } else {
+    EXPECT_THAT(
+        while_hlo->while_body()->root_instruction(),
+        op::Tuple(op::Add(), op::Tuple(op::Add(), op::Copy(op::Add()))));
+  }
 }
 
 // Tests while init instruction which has a non-distinct points-to set.
@@ -1011,13 +1150,43 @@ TEST_F(WhileCopyInsertionTest, InitPointsToAmbiguous) {
 //
 TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinct) {
   auto while_hlo = BuildWhileInstruction_InitPointsToNonDistinct();
-  auto old_init = while_hlo->operand(0);
+
   InsertCopies(module_.get());
 
-  EXPECT_THAT(while_hlo->operand(0),
-              op::Tuple(op::Copy(old_init->operand(0)),
-                        op::Tuple(op::Copy(old_init->operand(1)->operand(0)),
-                                  op::Copy(old_init->operand(1)->operand(0)))));
+  // The entry computation requires two copies to resolve the non-disinctness of
+  // two init elements and the constant passed in as one of the init
+  // elements. Either element can be copied for the distinctness issue.
+  EXPECT_EQ(CountCopies(*module_->entry_computation()), 2);
+  if (while_hlo->operand(0)->operand(1)->operand(0)->opcode() ==
+      HloOpcode::kCopy) {
+    EXPECT_THAT(
+        while_hlo->operand(0),
+        op::Tuple(op::Copy(op::Constant()),
+                  op::Tuple(op::Copy(op::Broadcast()), op::Broadcast())));
+  } else {
+    EXPECT_THAT(
+        while_hlo->operand(0),
+        op::Tuple(op::Copy(op::Constant()),
+                  op::Tuple(op::Broadcast(), op::Copy(op::Broadcast()))));
+  }
+
+  // The body requires one copy because the buffer set is not distinct: the
+  // result of one of the adds is written into two elements of the output of the
+  // loop body. Either element might be copied.
+  EXPECT_EQ(CountCopies(*while_hlo->while_body()), 1);
+  if (while_hlo->while_body()
+          ->root_instruction()
+          ->operand(1)
+          ->operand(0)
+          ->opcode() == HloOpcode::kCopy) {
+    EXPECT_THAT(
+        while_hlo->while_body()->root_instruction(),
+        op::Tuple(op::Add(), op::Tuple(op::Copy(op::Add()), op::Add())));
+  } else {
+    EXPECT_THAT(
+        while_hlo->while_body()->root_instruction(),
+        op::Tuple(op::Add(), op::Tuple(op::Add(), op::Copy(op::Add()))));
+  }
 }
 
 // Tests while init instruction buffer which interferes with while result
@@ -1031,11 +1200,13 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinct) {
 //
 TEST_F(WhileCopyInsertionTest, InitPointsToInterfering) {
   auto while_hlo = BuildWhileInstruction_InitPointsToInterfering();
-  auto old_init = while_hlo->operand(0);
+
   InsertCopies(module_.get());
+  EXPECT_EQ(CountCopies(*module_), 2);
+  EXPECT_EQ(CountCopies(*while_hlo->while_body()), 0);
 
-  EXPECT_THAT(while_hlo->operand(0), op::Tuple(op::Copy(old_init->operand(0)),
-                                               op::Copy(old_init->operand(1))));
+  EXPECT_THAT(while_hlo->operand(0),
+              op::Tuple(op::Copy(op::Constant()), op::Copy(op::Broadcast())));
 }
 
 // Tests while init instruction buffer which has a non-distinct points-to set:
@@ -1044,18 +1215,21 @@ TEST_F(WhileCopyInsertionTest, InitPointsToInterfering) {
 //                  Parameter(F32, {8})))
 //
 // where the second and third parameters are identical *and* the tuple shared
-// by another while instruction..
+// by another while instruction.
 //
 // Verifies that the resulting point-to set is distinct in the resulting Tuple
 // (non-identical Copys). In other words, verifies that copy sharing does not
 // insert identical copies to the resulting tuple.
 TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinctUsedByTwoWhileLoops) {
-  auto condition1 =
-      module_->AddEmbeddedComputation(BuildConditionComputation());
-  auto condition2 =
-      module_->AddEmbeddedComputation(BuildConditionComputation());
   // Loop body that outputs tuple comprises two elements dependent on the init
   // tuple.
+  const Shape& loop_state_shape = ShapeUtil::MakeTupleShape(
+      {induction_variable_shape_, data_shape_, data_shape_});
+
+  auto condition1 = module_->AddEmbeddedComputation(
+      BuildConditionComputation(loop_state_shape));
+  auto condition2 = module_->AddEmbeddedComputation(
+      BuildConditionComputation(loop_state_shape));
   auto body1 =
       module_->AddEmbeddedComputation(BuildDependentBodyComputation2());
   auto body2 =
@@ -1072,8 +1246,6 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinctUsedByTwoWhileLoops) {
   auto loop_init = builder.AddInstruction(
       HloInstruction::CreateTuple({iter_param, data_param, data_param}));
 
-  const Shape& loop_state_shape = ShapeUtil::MakeTupleShape(
-      {induction_variable_shape_, data_shape_, data_shape_});
 
   // Two while loops shares the same loop init tuple.
   auto while_hlo1 = builder.AddInstruction(HloInstruction::CreateWhile(
@@ -1081,43 +1253,478 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinctUsedByTwoWhileLoops) {
   auto while_hlo2 = builder.AddInstruction(HloInstruction::CreateWhile(
       loop_state_shape, condition2, body2, loop_init));
 
-  module_->AddEntryComputation(builder.Build());
+  // Add add instruction so neither while is dead.
+  auto gte1 = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
+      ShapeUtil::GetSubshape(while_hlo1->shape(), {0}), while_hlo1, 0));
+  auto gte2 = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
+      ShapeUtil::GetSubshape(while_hlo1->shape(), {0}), while_hlo2, 0));
+  builder.AddInstruction(
+      HloInstruction::CreateBinary(gte1->shape(), HloOpcode::kAdd, gte1, gte2));
 
-  auto points_to_analysis =
-      TuplePointsToAnalysis::Run(module_.get()).ConsumeValueOrDie();
+  module_->AddEntryComputation(builder.Build());
 
-  // Asserts that the init tuples before copy insertion is non-distinct.
-  ASSERT_FALSE(
-      points_to_analysis->GetPointsToSet(while_hlo1->operand(0)).IsDistinct());
-  ASSERT_FALSE(
-      points_to_analysis->GetPointsToSet(while_hlo2->operand(0)).IsDistinct());
+  InsertCopies(module_.get());
 
-  auto old_init1 = while_hlo1->operand(0);
-  auto old_init2 = while_hlo2->operand(0);
+  // None of the bodies should have copies or control flow edges.
+  EXPECT_EQ(CountCopies(*body1), 0);
+  EXPECT_EQ(CountCopies(*body2), 0);
 
-  InsertCopies(module_.get());
+  // The loop bodies pass through elements 1 and 2 in the init tuple, so ideally
+  // these should not need to be copied before either while. However, copy
+  // insertion is not able to reason about the transparency of elements through
+  // while bodies in all circumstances so extra copies are added (b/xxx).
+  EXPECT_EQ(CountCopies(*module_->entry_computation()), 2);
 
   EXPECT_THAT(while_hlo1->operand(0),
-              op::Tuple(op::Copy(old_init1->operand(0)),
-                        op::Copy(old_init1->operand(1)),
-                        op::Copy(old_init1->operand(2))));
-
+              op::Tuple(op::Copy(), op::Parameter(), op::Parameter()));
   EXPECT_THAT(while_hlo2->operand(0),
-              op::Tuple(op::Copy(old_init2->operand(0)),
-                        op::Copy(old_init2->operand(1)),
-                        op::Copy(old_init2->operand(2))));
-
-  // Verifies the init tuples after copy insertion is distinct.
-  points_to_analysis =
-      TuplePointsToAnalysis::Run(module_.get()).ConsumeValueOrDie();
-  const auto& points_to1 =
-      points_to_analysis->GetPointsToSet(while_hlo1->operand(0));
-  EXPECT_TRUE(points_to1.IsDistinct());
-
-  const auto& points_to2 =
-      points_to_analysis->GetPointsToSet(while_hlo2->operand(0));
-  EXPECT_TRUE(points_to2.IsDistinct());
+              op::Tuple(op::Copy(), op::Parameter(), op::Parameter()));
 }
 
+TEST_F(CopyInsertionTest, SwizzlingWhile) {
+  // Test a while instruction with a body which permutes its tuple parameter
+  // elements.
+  auto module = CreateNewModule();
+  const Shape loop_state_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  // Body simply interchanges the two tuple elements in the loop state.
+  auto body_builder = HloComputation::Builder("body");
+  auto body_param = body_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, loop_state_shape, "param"));
+  auto body_element_0 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0));
+  auto body_element_1 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1));
+  body_builder.AddInstruction(
+      HloInstruction::CreateTuple({body_element_1, body_element_0}));
+  HloComputation* body = module->AddEmbeddedComputation(body_builder.Build());
+
+  auto cond_builder = HloComputation::Builder("condition");
+  cond_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, loop_state_shape, "param"));
+  auto cond_constant = cond_builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  cond_builder.AddInstruction(HloInstruction::CreateUnary(
+      cond_constant->shape(), HloOpcode::kNot, cond_constant));
+  HloComputation* condition =
+      module->AddEmbeddedComputation(cond_builder.Build());
+
+  auto builder = HloComputation::Builder(TestName());
+  auto constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
+  auto constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(2.0)));
+  auto tuple = builder.AddInstruction(
+      HloInstruction::CreateTuple({constant1, constant2}));
+  auto xla_while = builder.AddInstruction(
+      HloInstruction::CreateWhile(loop_state_shape, condition, body, tuple));
+  module->AddEntryComputation(builder.Build());
+
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 6);
+
+  // The loop state elements should be copied at the parameter and at the root
+  // with a control edge in between (see DeepCopyAndAddControlEdges). This is
+  // technically one more copy than is strictly necessary, but in order to have
+  // only three copies the copies of different loop state elements must be
+  // ordered with a control edge.
+  EXPECT_EQ(CountCopies(*body), 4);
+  EXPECT_EQ(CountControlEdges(*body), 2);
+
+  EXPECT_THAT(body->root_instruction(),
+              op::Tuple(op::Copy(op::Copy()), op::Copy(op::Copy())));
+
+  EXPECT_EQ(CountCopies(*module->entry_computation()), 2);
+  EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy()));
+}
+
+TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) {
+  // Test a while instruction with a body which permutes its tuple parameter
+  // elements and applies one operation to one of the elements. The addition of
+  // the operation (instruction) on the element makes the live range of the
+  // respective input and output elements different than if the instruction were
+  // not there (as in the SwizzlingWhile test above).
+  auto module = CreateNewModule();
+  const Shape loop_state_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  // Body interchanges the two tuple elements in the loop state and negates one
+  // of them.
+  auto body_builder = HloComputation::Builder("body");
+  auto body_param = body_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, loop_state_shape, "param"));
+  auto body_element_0 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0));
+  auto body_element_1 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1));
+  auto negate = body_builder.AddInstruction(HloInstruction::CreateUnary(
+      scalar_shape_, HloOpcode::kNegate, body_element_1));
+  body_builder.AddInstruction(
+      HloInstruction::CreateTuple({negate, body_element_0}));
+  HloComputation* body = module->AddEmbeddedComputation(body_builder.Build());
+
+  auto cond_builder = HloComputation::Builder("condition");
+  cond_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, loop_state_shape, "param"));
+  auto cond_constant = cond_builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  cond_builder.AddInstruction(HloInstruction::CreateUnary(
+      cond_constant->shape(), HloOpcode::kNot, cond_constant));
+  HloComputation* condition =
+      module->AddEmbeddedComputation(cond_builder.Build());
+
+  auto builder = HloComputation::Builder(TestName());
+  auto constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
+  auto constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(2.0)));
+  auto tuple = builder.AddInstruction(
+      HloInstruction::CreateTuple({constant1, constant2}));
+  auto xla_while = builder.AddInstruction(
+      HloInstruction::CreateWhile(loop_state_shape, condition, body, tuple));
+  module->AddEntryComputation(builder.Build());
+
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 6);
+
+  // The loop state elements should be copied at the parameter and at the root
+  // with a control edge in between (see DeepCopyAndAddControlEdges).
+  EXPECT_EQ(CountCopies(*body), 4);
+  EXPECT_EQ(CountControlEdges(*body), 2);
+
+  EXPECT_THAT(
+      body->root_instruction(),
+      op::Tuple(op::Copy(op::Negate(op::Copy())), op::Copy(op::Copy())));
+
+  EXPECT_EQ(CountCopies(*module->entry_computation()), 2);
+  EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy()));
+}
+
+TEST_F(CopyInsertionTest, SwizzlingWhileSharedInput) {
+  // Test a while instruction with a body which permutes it's tuple parameter
+  // elements similar to SwizzlinWhile above. However, in this test the input to
+  // the while body is a single constant (both loop state elements are the same
+  // constant). This means no copies are necessary because both loop state
+  // elements are the same so interchanging them is a no-op.
+  auto module = CreateNewModule();
+  const Shape loop_state_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  // Body simply interchanges the two tuple elements in the loop state.
+  auto body_builder = HloComputation::Builder("body");
+  auto body_param = body_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, loop_state_shape, "param"));
+  auto body_element_0 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0));
+  auto body_element_1 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1));
+  body_builder.AddInstruction(
+      HloInstruction::CreateTuple({body_element_1, body_element_0}));
+  HloComputation* body = module->AddEmbeddedComputation(body_builder.Build());
+
+  auto cond_builder = HloComputation::Builder("condition");
+  cond_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, loop_state_shape, "param"));
+  auto cond_constant = cond_builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  cond_builder.AddInstruction(HloInstruction::CreateUnary(
+      cond_constant->shape(), HloOpcode::kNot, cond_constant));
+  HloComputation* condition =
+      module->AddEmbeddedComputation(cond_builder.Build());
+
+  auto builder = HloComputation::Builder(TestName());
+  auto constant = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
+  auto tuple =
+      builder.AddInstruction(HloInstruction::CreateTuple({constant, constant}));
+  builder.AddInstruction(
+      HloInstruction::CreateWhile(loop_state_shape, condition, body, tuple));
+  module->AddEntryComputation(builder.Build());
+
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 2);
+  EXPECT_EQ(CountCopies(*body), 0);
+
+  EXPECT_EQ(CountCopies(*module->entry_computation()), 2);
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Tuple(op::Copy(), op::Copy()));
+}
+
+TEST_F(CopyInsertionTest, SequentialWhiles) {
+  // Construct a computation with a series of sequential while instructions
+  // containing four loop state elements:
+  //
+  //   element 0 is passed to each while directly from an entry parameter.
+  //
+  //   element 1 is passed transparently in series through all the while bodies.
+  //
+  //   element 2 is negated in each while body. (in-place possible)
+  //
+  //   element 3 is reversed in each while body. (in-place not possible)
+  //
+  const Shape element_shape = ShapeUtil::MakeShape(F32, {42});
+  const Shape loop_state_shape = ShapeUtil::MakeTupleShape(
+      {element_shape, element_shape, element_shape, element_shape});
+
+  auto module = CreateNewModule();
+  auto builder = HloComputation::Builder(TestName());
+  auto param_0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, element_shape, "param_0"));
+  auto param_1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, element_shape, "param_1"));
+  auto param_2 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, element_shape, "param_2"));
+  auto param_3 = builder.AddInstruction(
+      HloInstruction::CreateParameter(3, element_shape, "param_3"));
+
+  // The number of sequential kWhile instructions.
+  const int kNumWhiles = 3;
+
+  HloInstruction* prev_element_1 = param_1;
+  HloInstruction* prev_element_2 = param_2;
+  HloInstruction* prev_element_3 = param_3;
+
+  // Vector containing all of the while instructions.
+  std::vector<const HloInstruction*> whiles;
+  for (int i = 0; i < kNumWhiles; ++i) {
+    auto body_builder = HloComputation::Builder("body");
+    auto body_param = body_builder.AddInstruction(
+        HloInstruction::CreateParameter(0, loop_state_shape, "param"));
+    auto body_element_0 = body_builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(element_shape, body_param, 0));
+    auto body_element_1 = body_builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(element_shape, body_param, 1));
+    auto body_element_2 = body_builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(element_shape, body_param, 2));
+    auto body_element_3 = body_builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(element_shape, body_param, 3));
+    auto negate = body_builder.AddInstruction(HloInstruction::CreateUnary(
+        element_shape, HloOpcode::kNegate, body_element_2));
+    auto reverse = body_builder.AddInstruction(
+        HloInstruction::CreateReverse(element_shape, body_element_3, {0}));
+    body_builder.AddInstruction(HloInstruction::CreateTuple(
+        {body_element_0, body_element_1, negate, reverse}));
+    HloComputation* body = module->AddEmbeddedComputation(body_builder.Build());
+
+    auto cond_builder = HloComputation::Builder("condition");
+    cond_builder.AddInstruction(
+        HloInstruction::CreateParameter(0, loop_state_shape, "param"));
+    auto cond_constant = cond_builder.AddInstruction(
+        HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+    cond_builder.AddInstruction(HloInstruction::CreateUnary(
+        cond_constant->shape(), HloOpcode::kNot, cond_constant));
+    HloComputation* condition =
+        module->AddEmbeddedComputation(cond_builder.Build());
+
+    auto while_init = builder.AddInstruction(HloInstruction::CreateTuple(
+        {param_0, prev_element_1, prev_element_2, prev_element_3}));
+
+    auto xla_while = builder.AddInstruction(HloInstruction::CreateWhile(
+        loop_state_shape, condition, body, while_init));
+    whiles.push_back(xla_while);
+    if (i != kNumWhiles - 1) {
+      prev_element_1 = builder.AddInstruction(
+          HloInstruction::CreateGetTupleElement(element_shape, xla_while, 1));
+      prev_element_2 = builder.AddInstruction(
+          HloInstruction::CreateGetTupleElement(element_shape, xla_while, 2));
+      prev_element_3 = builder.AddInstruction(
+          HloInstruction::CreateGetTupleElement(element_shape, xla_while, 3));
+    }
+  }
+
+  module->AddEntryComputation(builder.Build());
+
+  InsertCopies(module.get());
+
+  // Each while body has one copy. And each loop state element is copied once in
+  // the entry computation.
+  EXPECT_EQ(CountCopies(*module), 4 + kNumWhiles);
+
+  // Each while body should have exactly one copy for element three which is an
+  // op (kReverse) which cannot be done in place.
+  for (const HloInstruction* xla_while : whiles) {
+    EXPECT_EQ(CountCopies(*xla_while->while_body()), 1);
+  }
+
+  EXPECT_THAT(whiles[0]->operand(0), op::Tuple(op::Parameter(), op::Parameter(),
+                                               op::Copy(), op::Copy()));
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Tuple(op::Copy(), op::Copy(), op::GetTupleElement(),
+                        op::GetTupleElement()));
+}
+
+TEST_F(CopyInsertionTest, WhileBodyWithConstantRoot) {
+  // Test a while body and condition which are each simply a constant (root of
+  // computation is a constant). The body constant should be copied.
+  auto module = CreateNewModule();
+  auto builder = HloComputation::Builder(TestName());
+  auto param_0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "param_0"));
+
+  auto body_builder = HloComputation::Builder("body");
+  body_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "param"));
+  body_builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(123.0)));
+  HloComputation* body = module->AddEmbeddedComputation(body_builder.Build());
+
+  auto cond_builder = HloComputation::Builder("condition");
+  cond_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "param"));
+  cond_builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  HloComputation* condition =
+      module->AddEmbeddedComputation(cond_builder.Build());
+
+  auto xla_while = builder.AddInstruction(
+      HloInstruction::CreateWhile(scalar_shape_, condition, body, param_0));
+
+  module->AddEntryComputation(builder.Build());
+
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 2);
+
+  EXPECT_THAT(xla_while->operand(0), op::Copy(op::Parameter()));
+  EXPECT_THAT(body->root_instruction(), op::Copy(op::Constant()));
+  EXPECT_THAT(condition->root_instruction(), op::Constant());
+}
+
+std::unique_ptr<HloComputation> MakeTrivialCondition(const Shape& shape) {
+  auto builder = HloComputation::Builder("trivial_condition");
+  builder.AddInstruction(
+      HloInstruction::CreateParameter(0, shape, "loop_state"));
+  auto constant = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  builder.AddInstruction(HloInstruction::CreateUnary(
+      constant->shape(), HloOpcode::kNot, constant));
+  return builder.Build();
+}
+
+std::unique_ptr<HloComputation> MakeBenchmarkWhileBody() {
+  auto builder = HloComputation::Builder("benchmark_loop_body");
+  const Shape element_shape = ShapeUtil::MakeShape(F32, {42});
+  const Shape loop_state_shape =
+      ShapeUtil::MakeTupleShape({element_shape, element_shape, element_shape});
+  HloInstruction* param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, loop_state_shape, "loop_state"));
+  HloInstruction* element_0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(element_shape, param, 0));
+  HloInstruction* element_1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(element_shape, param, 1));
+  HloInstruction* element_2 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(element_shape, param, 2));
+
+  HloInstruction* rev_1 = builder.AddInstruction(
+      HloInstruction::CreateReverse(element_shape, element_1, {0}));
+  HloInstruction* add_1_2 = builder.AddInstruction(HloInstruction::CreateBinary(
+      element_shape, HloOpcode::kAdd, element_1, element_2));
+
+  builder.AddInstruction(
+      HloInstruction::CreateTuple({element_0, rev_1, add_1_2}));
+  return builder.Build();
+}
+
+void BM_SequentialWhiles(int num_iters, int num_whiles) {
+  // This benchmark constructs a chain of sequential while instructions.
+  tensorflow::testing::StopTiming();
+  for (int i = 0; i < num_iters; ++i) {
+    HloModuleConfig config;
+    config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags());
+    HloModule module("BM_SequentialWhiles", VersionedComputationHandle(),
+                     config);
+
+    auto builder = HloComputation::Builder("BM_SequentialWhiles");
+    HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter(
+        0, ShapeUtil::MakeShape(F32, {42}), "x"));
+    HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter(
+        1, ShapeUtil::MakeShape(F32, {42}), "y"));
+    HloInstruction* z = builder.AddInstruction(HloInstruction::CreateParameter(
+        2, ShapeUtil::MakeShape(F32, {42}), "z"));
+    HloInstruction* init =
+        builder.AddInstruction(HloInstruction::CreateTuple({x, y, z}));
+
+    HloInstruction* prev_loop_state = init;
+    for (int w = 0; w < num_whiles; ++w) {
+      HloComputation* condition =
+          module.AddEmbeddedComputation(MakeTrivialCondition(init->shape()));
+      HloComputation* body =
+          module.AddEmbeddedComputation(MakeBenchmarkWhileBody());
+      prev_loop_state = builder.AddInstruction(HloInstruction::CreateWhile(
+          init->shape(), condition, body, prev_loop_state));
+    }
+    module.AddEntryComputation(builder.Build());
+
+    CopyInsertion copy_insertion;
+
+    tensorflow::testing::StartTiming();
+    ASSERT_IS_OK(copy_insertion.Run(&module).status());
+    tensorflow::testing::StopTiming();
+
+    // The entry computation should have three copies, and each body has one.
+    ASSERT_EQ(CountCopies(module), 3 + num_whiles);
+  }
+}
+
+void BM_ParallelWhiles(int num_iters, int num_whiles) {
+  // This benchmark constructs a fan-out of parallel while instructions.
+  tensorflow::testing::StopTiming();
+  for (int i = 0; i < num_iters; ++i) {
+    HloModuleConfig config;
+    config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags());
+    HloModule module("BM_SequentialWhiles", VersionedComputationHandle(),
+                     config);
+
+    auto builder = HloComputation::Builder("BM_ParallelWhiles");
+    HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter(
+        0, ShapeUtil::MakeShape(F32, {42}), "x"));
+    HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter(
+        1, ShapeUtil::MakeShape(F32, {42}), "y"));
+    HloInstruction* z = builder.AddInstruction(HloInstruction::CreateParameter(
+        2, ShapeUtil::MakeShape(F32, {42}), "z"));
+    HloInstruction* init =
+        builder.AddInstruction(HloInstruction::CreateTuple({x, y, z}));
+
+    HloInstruction* sum = nullptr;
+    for (int w = 0; w < num_whiles; ++w) {
+      HloComputation* condition =
+          module.AddEmbeddedComputation(MakeTrivialCondition(init->shape()));
+      HloComputation* body =
+          module.AddEmbeddedComputation(MakeBenchmarkWhileBody());
+
+      HloInstruction* xla_while = builder.AddInstruction(
+          HloInstruction::CreateWhile(init->shape(), condition, body, init));
+
+      if (sum == nullptr) {
+        sum = builder.AddInstruction(
+            HloInstruction::CreateGetTupleElement(x->shape(), xla_while, 0));
+      } else {
+        HloInstruction* element_0 = builder.AddInstruction(
+            HloInstruction::CreateGetTupleElement(x->shape(), xla_while, 0));
+        sum = builder.AddInstruction(HloInstruction::CreateBinary(
+            x->shape(), HloOpcode::kAdd, sum, element_0));
+      }
+    }
+    module.AddEntryComputation(builder.Build());
+
+    CopyInsertion copy_insertion;
+
+    tensorflow::testing::StartTiming();
+    ASSERT_IS_OK(copy_insertion.Run(&module).status());
+    tensorflow::testing::StopTiming();
+
+    // Each body receives of copy of two of the parameters (the corresponding
+    // elements in the body are modifed), and there is one copy in each body.
+    ASSERT_EQ(CountCopies(module), 3 * num_whiles);
+  }
+}
+
+BENCHMARK(BM_SequentialWhiles)->Arg(512)->Arg(1024)->Arg(2048)->Arg(4096);
+BENCHMARK(BM_ParallelWhiles)->Arg(512)->Arg(1024)->Arg(2048)->Arg(4096);
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 8005cfac8c..e1eed498f6 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -79,6 +79,7 @@ cc_library(
     deps = [
         ":compiler_functor",
         ":conv_canonicalization",
+        ":cpu_copy_insertion",
         ":cpu_executable",
         ":cpu_instruction_fusion",
         ":cpu_options",
@@ -103,7 +104,6 @@ cc_library(
         "//tensorflow/compiler/xla/service:buffer_assignment",
         "//tensorflow/compiler/xla/service:buffer_liveness",
         "//tensorflow/compiler/xla/service:call_inliner",
-        "//tensorflow/compiler/xla/service:copy_insertion",
         "//tensorflow/compiler/xla/service:executable",
         "//tensorflow/compiler/xla/service:flatten_call_graph",
         "//tensorflow/compiler/xla/service:hlo",
@@ -751,6 +751,38 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "cpu_copy_insertion",
+    srcs = ["cpu_copy_insertion.cc"],
+    hdrs = ["cpu_copy_insertion.h"],
+    deps = [
+        "//tensorflow/compiler/xla/service:copy_insertion",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "cpu_copy_insertion_test",
+    srcs = ["cpu_copy_insertion_test.cc"],
+    deps = [
+        ":cpu_copy_insertion",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:test_helpers",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/legacy_flags:debug_options_flags",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_graph_dumper",
+        "//tensorflow/compiler/xla/service:hlo_matchers",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 88f7e7a93f..56940b8d63 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -46,9 +46,9 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
-#include "tensorflow/compiler/xla/service/copy_insertion.h"
 #include "tensorflow/compiler/xla/service/cpu/compiler_functor.h"
 #include "tensorflow/compiler/xla/service/cpu/conv_canonicalization.h"
+#include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_executable.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_options.h"
@@ -332,15 +332,16 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
   // (and sometime after) copy insertion, to avoid dead code from interfering
   // with the rewrites.
   pipeline.AddPass<HloDCE>();
-  pipeline.AddPass<CopyInsertion>();
+  pipeline.AddPass<FlattenCallGraph>();
+  pipeline.AddPass<CpuCopyInsertion>();
   if (options::CpuParallelBackendRequested(module->config())) {
     // Re-run the outlining, in case any copies were inserted into the entry
     // computation.
     pipeline.AddPass<ParallelizationPreparation>(max_parallelism,
                                                  ShapeSizeBytesFunction());
+    pipeline.AddPass<CpuCopyInsertion>();
   }
   pipeline.AddPass<HloDCE>();
-  pipeline.AddPass<FlattenCallGraph>();
   return pipeline.Run(module).status();
 }
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.cc b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.cc
new file mode 100644
index 0000000000..baaacd2ecc
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.cc
@@ -0,0 +1,43 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h"
+
+#include <memory>
+#include <set>
+#include <vector>
+
+#include "tensorflow/compiler/xla/service/copy_insertion.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace xla {
+
+StatusOr<bool> CpuCopyInsertion::Run(HloModule* module) {
+  CopyInsertion generic_copy_insertion;
+
+  TF_ASSIGN_OR_RETURN(bool generic_changed, generic_copy_insertion.Run(module));
+
+  // The CPU backend needs additional copies added due to deficiencies in
+  // buffer assignment.
+  TF_ASSIGN_OR_RETURN(bool buffer_assignment_changed,
+                      CopyInsertion::AddCopiesForBufferAssignment(module));
+
+  return generic_changed || buffer_assignment_changed;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h
new file mode 100644
index 0000000000..3313d1e6eb
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h
@@ -0,0 +1,42 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_COPY_INSERTION_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_COPY_INSERTION_H_
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// Besides the modifications made by the generic xla::CopyInsertion, this
+// CPU-specific copy insertion pass also adds copies to values live out of
+// computations satisfying certain conditions (defined by constant or parameter,
+// etc). This is necessary because of deficiencies of buffer
+// assignment. Specifically, buffer assignment is computation-scoped and does
+// not recognized aliasing between arguments and outputs of computations.
+//
+// TODO(b/62548313): Remove this when buffer assignment is smarter
+// (module-scoped).
+class CpuCopyInsertion : public HloPassInterface {
+ public:
+  tensorflow::StringPiece name() const override { return "copy-insertion"; }
+
+  StatusOr<bool> Run(HloModule* module) override;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_COPY_INSERTION_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion_test.cc
new file mode 100644
index 0000000000..a05a269417
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_copy_insertion_test.cc
@@ -0,0 +1,139 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h"
+
+#include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/test_helpers.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace xla {
+namespace {
+
+namespace op = xla::testing::opcode_matchers;
+
+int64 CountCopies(const HloComputation& computation) {
+  int64 count = 0;
+  for (const auto& instruction : computation.instructions()) {
+    if (instruction->opcode() == HloOpcode::kCopy) {
+      count++;
+    }
+  }
+  return count;
+}
+
+int64 CountCopies(const HloModule& module) {
+  int64 count = 0;
+  for (const auto& computation : module.computations()) {
+    count += CountCopies(*computation);
+  }
+  return count;
+}
+
+class CpuCopyInsertionTest : public HloTestBase {
+ protected:
+  void InsertCopies(HloModule* module) {
+    CpuCopyInsertion copy_insertion;
+    ASSERT_IS_OK(copy_insertion.Run(module).status());
+  }
+
+  const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {});
+};
+
+TEST_F(CpuCopyInsertionTest, WhileBodyWithConstantRoot) {
+  // Test a while body and condition which are each simply a constant (root of
+  // computation is a constant). Each constant should be copied.
+  auto module = CreateNewModule();
+  auto builder = HloComputation::Builder(TestName());
+  auto param_0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "param_0"));
+
+  auto body_builder = HloComputation::Builder("body");
+  body_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "param"));
+  body_builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(123.0)));
+  HloComputation* body = module->AddEmbeddedComputation(body_builder.Build());
+
+  auto cond_builder = HloComputation::Builder("condition");
+  cond_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "param"));
+  cond_builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  HloComputation* condition =
+      module->AddEmbeddedComputation(cond_builder.Build());
+
+  auto xla_while = builder.AddInstruction(
+      HloInstruction::CreateWhile(scalar_shape_, condition, body, param_0));
+
+  module->AddEntryComputation(builder.Build());
+
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 3);
+
+  EXPECT_THAT(xla_while->operand(0), op::Copy(op::Parameter()));
+  EXPECT_THAT(body->root_instruction(), op::Copy(op::Constant()));
+  EXPECT_THAT(condition->root_instruction(), op::Copy(op::Constant()));
+}
+
+TEST_F(CpuCopyInsertionTest, TupleCall) {
+  // Test a kCall instruction which calls a computation which produces a three
+  // element tuple: one is a constant, one is a parameter, and one is produced
+  // in the computation. The constant and parameter should be copied.
+  auto module = CreateNewModule();
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "param_0"));
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_, scalar_shape_});
+
+  auto sub_builder = HloComputation::Builder("subcomputation");
+  auto sub_param = sub_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "param"));
+  auto constant = sub_builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(123.0)));
+  auto add = sub_builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kAdd, sub_param, constant));
+  sub_builder.AddInstruction(
+      HloInstruction::CreateTuple({sub_param, constant, add}));
+  HloComputation* subcomputation =
+      module->AddEmbeddedComputation(sub_builder.Build());
+
+  builder.AddInstruction(
+      HloInstruction::CreateCall(tuple_shape, {param}, subcomputation));
+
+  module->AddEntryComputation(builder.Build());
+
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*subcomputation), 2);
+  EXPECT_THAT(subcomputation->root_instruction(),
+              op::Tuple(op::Copy(op::Parameter()), op::Copy(op::Constant()),
+                        op::Add()));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 364b76b93c..e57558b578 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -343,15 +343,16 @@ tf_cc_test(
 )
 
 cc_library(
-    name = "copy_insertion",
-    srcs = ["copy_insertion.cc"],
-    hdrs = ["copy_insertion.h"],
+    name = "gpu_copy_insertion",
+    srcs = ["gpu_copy_insertion.cc"],
+    hdrs = ["gpu_copy_insertion.h"],
     deps = [
         ":ir_emission_utils",
+        "//tensorflow/compiler/xla/service:call_graph",
         "//tensorflow/compiler/xla/service:copy_insertion",
         "//tensorflow/compiler/xla/service:hlo",
-        "//tensorflow/compiler/xla/service:logical_buffer",
-        "//tensorflow/compiler/xla/service:tuple_points_to_analysis",
+        "//tensorflow/compiler/xla/service:hlo_dataflow_analysis",
+        "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/core:lib",
     ],
 )
@@ -427,8 +428,8 @@ cc_library(
     hdrs = ["gpu_compiler.h"],
     deps = [
         ":convolution_folding",
-        ":copy_insertion",
         ":fusion_merger",
+        ":gpu_copy_insertion",
         ":gpu_executable",
         ":hlo_schedule",
         ":instruction_fusion",
@@ -574,11 +575,14 @@ tf_cc_test(
     deps = [
         ":instruction_fusion",
         ":while_transformer",
+        "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:test_helpers",
         "//tensorflow/compiler/xla/service:copy_insertion",
+        "//tensorflow/compiler/xla/service:hlo_verifier",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/gpu/copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/copy_insertion.cc
deleted file mode 100644
index 3dc8555201..0000000000
--- a/tensorflow/compiler/xla/service/gpu/copy_insertion.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/gpu/copy_insertion.h"
-
-#include <memory>
-#include <set>
-#include <vector>
-
-#include "tensorflow/compiler/xla/service/copy_insertion.h"
-#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
-#include "tensorflow/compiler/xla/service/hlo_computation.h"
-#include "tensorflow/compiler/xla/service/hlo_instruction.h"
-#include "tensorflow/compiler/xla/service/hlo_opcode.h"
-#include "tensorflow/compiler/xla/service/logical_buffer.h"
-#include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/logging.h"
-
-namespace xla {
-namespace gpu {
-
-StatusOr<bool> GpuCopyInsertion::Run(HloModule* module) {
-  TF_ASSIGN_OR_RETURN(bool changed, CopyInsertion::Run(module));
-
-  TF_ASSIGN_OR_RETURN(auto points_to_analysis,
-                      TuplePointsToAnalysis::Run(module));
-
-  // Make sure all operands of a library call are in memory instead of constants
-  // in IR. The top-level (index {}) of the points-to set of each operand
-  // indicates the source(s) of the array buffer. If any of these are constant,
-  // then add a copy to materialize the array.
-  HloComputation* computation = module->entry_computation();
-  for (HloInstruction* hlo : computation->MakeInstructionPostOrder()) {
-    if (ImplementedAsLibraryCall(*hlo)) {
-      for (int64 i = 0; i < hlo->operand_count(); ++i) {
-        HloInstruction* operand = hlo->mutable_operand(i);
-        const PointsToSet& points_to =
-            points_to_analysis->GetPointsToSet(operand);
-        const auto& element = points_to.element(/*index=*/{});
-        if (std::any_of(element.begin(), element.end(),
-                        [](const LogicalBuffer* buffer_source) {
-                          return buffer_source->instruction()->opcode() ==
-                                 HloOpcode::kConstant;
-                        })) {
-          TF_ASSIGN_OR_RETURN(HloInstruction * copy,
-                              CopyInsertion::FindOrInsertCopy(operand));
-          TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(i, copy));
-          changed = true;
-        }
-      }
-    }
-  }
-
-  return changed;
-}
-
-}  // namespace gpu
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index e84c390745..92c53265d0 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -33,8 +33,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/call_inliner.h"
 #include "tensorflow/compiler/xla/service/flatten_call_graph.h"
 #include "tensorflow/compiler/xla/service/gpu/convolution_folding.h"
-#include "tensorflow/compiler/xla/service/gpu/copy_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
 #include "tensorflow/compiler/xla/service/gpu/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
@@ -224,9 +224,8 @@ tensorflow::Status PrepareHloModuleForIrEmitting(
   // (and sometime after) copy insertion, to avoid dead code from interfering
   // with the rewrites.
   pipeline.AddPass<HloDCE>();
-  pipeline.AddPass<GpuCopyInsertion>();
-  pipeline.AddPass<HloDCE>();
   pipeline.AddPass<FlattenCallGraph>();
+  pipeline.AddPass<GpuCopyInsertion>();
   return pipeline.Run(hlo_module).status();
 }
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
new file mode 100644
index 0000000000..33d739b79d
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
@@ -0,0 +1,112 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h"
+
+#include <memory>
+#include <set>
+#include <vector>
+
+#include "tensorflow/compiler/xla/service/call_graph.h"
+#include "tensorflow/compiler/xla/service/copy_insertion.h"
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace xla {
+
+namespace gpu {
+
+StatusOr<HloInstruction*> GpuCopyInsertion::FindOrInsertCopy(
+    HloInstruction* hlo) {
+  HloInstruction*& copy = inserted_copies_[hlo];
+  if (copy == nullptr) {
+    TF_ASSIGN_OR_RETURN(copy, hlo->parent()->DeepCopyInstruction(hlo));
+  }
+  return copy;
+}
+
+StatusOr<bool> GpuCopyInsertion::Run(HloModule* module) {
+  CopyInsertion generic_copy_insertion;
+
+  TF_ASSIGN_OR_RETURN(bool changed, generic_copy_insertion.Run(module));
+
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloDataflowAnalysis> dataflow,
+                      HloDataflowAnalysis::Run(module));
+
+  // Make sure all operands of a library call are in memory instead of constants
+  // in IR.
+  for (HloInstruction* hlo :
+       module->entry_computation()->MakeInstructionPostOrder()) {
+    if (ImplementedAsLibraryCall(*hlo)) {
+      for (int64 i = 0; i < hlo->operand_count(); ++i) {
+        HloInstruction* operand = hlo->mutable_operand(i);
+        TF_RET_CHECK(ShapeUtil::IsArray(operand->shape()));
+        const auto& values = dataflow->GetValueSet(operand).values();
+        if (std::any_of(values.begin(), values.end(),
+                        [](const HloValue* value) {
+                          return value->defining_instruction()->opcode() ==
+                                 HloOpcode::kConstant;
+                        })) {
+          TF_ASSIGN_OR_RETURN(HloInstruction * copy, FindOrInsertCopy(operand));
+          TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(i, copy));
+          changed = true;
+        }
+      }
+    }
+  }
+
+  // Init values of a while node cannot be constants. Insert copies for any
+  // constants found at the operand of a while.
+  tensorflow::gtl::FlatSet<HloInstruction*> copied_constants;
+  for (HloComputation* computation : module->computations()) {
+    for (HloInstruction* instruction : computation->instructions()) {
+      if (instruction->opcode() != HloOpcode::kWhile) {
+        continue;
+      }
+      for (auto& pair :
+               dataflow->GetInstructionValueSet(instruction->operand(0))) {
+        const HloValueSet& value_set = pair.second;
+        for (const HloValue* value : value_set.values()) {
+          if (value->defining_instruction()->opcode() ==
+              HloOpcode::kConstant &&
+              !ContainsKey(copied_constants, value->defining_instruction())) {
+            HloInstruction* constant = value->defining_instruction();
+            TF_ASSIGN_OR_RETURN(HloInstruction * copy,
+                                FindOrInsertCopy(constant));
+            TF_RETURN_IF_ERROR(constant->ReplaceAllUsesWith(copy));
+            copied_constants.insert(constant);
+            changed = true;
+          }
+        }
+      }
+    }
+  }
+
+  // The GPU backend needs additional copies added due to deficiencies in
+  // buffer assignment.
+  TF_ASSIGN_OR_RETURN(bool buffer_assignment_changed,
+                      CopyInsertion::AddCopiesForBufferAssignment(module));
+
+  return changed || buffer_assignment_changed;
+}
+
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/copy_insertion.h b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h
similarity index 56%
rename from tensorflow/compiler/xla/service/gpu/copy_insertion.h
rename to tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h
index 11077dad2e..4d77f337e6 100644
--- a/tensorflow/compiler/xla/service/gpu/copy_insertion.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_COPY_INSERTION_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_COPY_INSERTION_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COPY_INSERTION_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COPY_INSERTION_H_
 
-#include "tensorflow/compiler/xla/service/copy_insertion.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 
 namespace xla {
 namespace gpu {
@@ -25,12 +25,23 @@ namespace gpu {
 // Besides the modifications made by the generic xla::CopyInsertion, this
 // GPU-specific copy insertion also materializes operands of library calls by
 // inserting kCopy instructions.
-class GpuCopyInsertion : public CopyInsertion {
+class GpuCopyInsertion : public HloPassInterface {
  public:
+  tensorflow::StringPiece name() const override { return "copy-insertion"; }
+
   StatusOr<bool> Run(HloModule* module) override;
+
+ protected:
+  // Returns a copy of `hlo`. Looks in inserted_copies_ first to avoid making
+  // duplicate copies.
+  StatusOr<HloInstruction*> FindOrInsertCopy(HloInstruction* hlo);
+
+  // A map containing all copies inserted to materialize operands of library
+  // calls. The key is the copied instruction and the value is the copy.
+  tensorflow::gtl::FlatMap<HloInstruction*, HloInstruction*> inserted_copies_;
 };
 
 }  // namespace gpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_COPY_INSERTION_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COPY_INSERTION_H_
diff --git a/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc b/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc
index 44188473d3..f16daa0b54 100644
--- a/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc
@@ -17,9 +17,12 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/copy_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
+#include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/test_helpers.h"
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
 
 namespace xla {
 namespace {
@@ -33,8 +36,6 @@ class WhileTransformerTest : public HloTestBase {
       : module_(CreateNewModule()),
         induction_variable_shape_(ShapeUtil::MakeShape(S32, {})),
         data_shape_(ShapeUtil::MakeShape(F32, {8})),
-        loop_state_shape_(ShapeUtil::MakeTupleShape(
-            {induction_variable_shape_, data_shape_})),
         condition_result_shape_(ShapeUtil::MakeShape(PRED, {})) {}
 
   std::unique_ptr<HloComputation> BuildConditionComputation(
@@ -42,8 +43,8 @@ class WhileTransformerTest : public HloTestBase {
     auto builder = HloComputation::Builder(TestName() + ".Condition");
     auto limit_const = builder.AddInstruction(
         HloInstruction::CreateConstant(Literal::CreateR0<int32>(limit)));
-    auto loop_state = builder.AddInstruction(
-        HloInstruction::CreateParameter(0, loop_state_shape_, "loop_state"));
+    auto loop_state = builder.AddInstruction(HloInstruction::CreateParameter(
+        0, GetLoopStateShape(tuple_index), "loop_state"));
     auto induction_variable =
         builder.AddInstruction(HloInstruction::CreateGetTupleElement(
             limit_const->shape(), loop_state, tuple_index));
@@ -58,8 +59,8 @@ class WhileTransformerTest : public HloTestBase {
       const int64 increment) {
     auto builder = HloComputation::Builder(TestName() + ".Body");
     // Create param instruction to access loop state.
-    auto loop_state = builder.AddInstruction(
-        HloInstruction::CreateParameter(0, loop_state_shape_, "loop_state"));
+    auto loop_state = builder.AddInstruction(HloInstruction::CreateParameter(
+        0, GetLoopStateShape(ind_var_tuple_index), "loop_state"));
     // Update the induction variable GTE(ind_var_tuple_index).
     auto induction_variable =
         builder.AddInstruction(HloInstruction::CreateGetTupleElement(
@@ -73,7 +74,7 @@ class WhileTransformerTest : public HloTestBase {
         data_shape_, loop_state, data_tuple_index));
     // Use 'induction_variable' in computation with no path to output tuple.
     auto update = builder.AddInstruction(
-        HloInstruction::CreateBroadcast(data_shape_, induction_variable, {8}));
+        HloInstruction::CreateBroadcast(data_shape_, induction_variable, {}));
     auto add1 = builder.AddInstruction(HloInstruction::CreateBinary(
         data_shape_, HloOpcode::kAdd, data, update));
     // Create output Tuple.
@@ -98,8 +99,9 @@ class WhileTransformerTest : public HloTestBase {
                   HloInstruction::CreateTuple({induction_var_init, data_init}))
             : builder.AddInstruction(
                   HloInstruction::CreateTuple({data_init, induction_var_init}));
-    auto while_hlo = builder.AddInstruction(HloInstruction::CreateWhile(
-        loop_state_shape_, condition, body, loop_state_init));
+    auto while_hlo = builder.AddInstruction(
+        HloInstruction::CreateWhile(GetLoopStateShape(ind_var_tuple_index),
+                                    condition, body, loop_state_init));
     module_->AddEntryComputation(builder.Build());
     return while_hlo;
   }
@@ -115,18 +117,34 @@ class WhileTransformerTest : public HloTestBase {
   }
 
   void RunCopyInsertionPass() {
+    HloVerifier verifier([](const Shape& shape) {
+      return ShapeUtil::ByteSizeOf(shape, /*pointer_size=*/sizeof(void*));
+    });
+    TF_ASSERT_OK(verifier.Run(module_.get()).status());
     CopyInsertion copy_insertion;
-    EXPECT_IS_OK(copy_insertion.Run(module_.get()).status());
+    TF_ASSERT_OK(copy_insertion.Run(module_.get()).status());
+  }
+
+  Shape GetLoopStateShape(const int64 ind_var_tuple_index) {
+    if (ind_var_tuple_index == 0) {
+      return ShapeUtil::MakeTupleShape(
+          {induction_variable_shape_, data_shape_});
+    } else {
+      return ShapeUtil::MakeTupleShape(
+          {data_shape_, induction_variable_shape_});
+    }
   }
 
   std::unique_ptr<HloModule> module_;
   Shape induction_variable_shape_;
   Shape data_shape_;
-  Shape loop_state_shape_;
   Shape condition_result_shape_;
 };
 
-TEST_F(WhileTransformerTest, InductionVariableAtTupleElement0) {
+// TODO(b/68830972): The while transformer is far too fragile. It patterns
+// matches the exact expressions of opcodes. Re-enable when transformation is
+// more general
+TEST_F(WhileTransformerTest, DISABLED_InductionVariableAtTupleElement0) {
   // Build computation with induction variable at tuple element 0.
   auto condition =
       module_->AddEmbeddedComputation(BuildConditionComputation(0, 10));
@@ -137,13 +155,16 @@ TEST_F(WhileTransformerTest, InductionVariableAtTupleElement0) {
   RunCopyInsertionPass();
   // Run WhileTransformer.
   auto result = gpu::CanTransformWhileToFor(while_hlo);
-  ASSERT_TRUE(result.ok());
+  TF_ASSERT_OK(result.status());
   // Check results.
   EXPECT_THAT(result.ConsumeValueOrDie(),
               Eq(std::tuple<int64, int64, int64>(0, 10, 1)));
 }
 
-TEST_F(WhileTransformerTest, InductionVariableAtTupleElement1) {
+// TODO(b/68830972): The while transformer is far too fragile. It patterns
+// matches the exact expressions of opcodes. Re-enable when transformation is
+// more general
+TEST_F(WhileTransformerTest, DISABLED_InductionVariableAtTupleElement1) {
   // Build computation with induction variable at tuple element 1.
   auto condition =
       module_->AddEmbeddedComputation(BuildConditionComputation(1, 10));
@@ -154,13 +175,16 @@ TEST_F(WhileTransformerTest, InductionVariableAtTupleElement1) {
   RunCopyInsertionPass();
   // Run WhileTransformer.
   auto result = gpu::CanTransformWhileToFor(while_hlo);
-  ASSERT_TRUE(result.ok());
+  TF_ASSERT_OK(result.status());
   // Check results.
   EXPECT_THAT(result.ConsumeValueOrDie(),
               Eq(std::tuple<int64, int64, int64>(0, 10, 1)));
 }
 
-TEST_F(WhileTransformerTest, InvalidLoopLimit) {
+// TODO(b/68830972): The while transformer is far too fragile. It patterns
+// matches the exact expressions of opcodes. Re-enable when transformation is
+// more general
+TEST_F(WhileTransformerTest, DISABLED_InvalidLoopLimit) {
   // Build computation with invalid loop limit.
   auto condition =
       module_->AddEmbeddedComputation(BuildConditionComputation(0, 5));
@@ -176,7 +200,10 @@ TEST_F(WhileTransformerTest, InvalidLoopLimit) {
               HasSubstr("Loop start must be less than loop limit."));
 }
 
-TEST_F(WhileTransformerTest, InvalidLoopIncrement) {
+// TODO(b/68830972): The while transformer is far too fragile. It patterns
+// matches the exact expressions of opcodes. Re-enable when transformation is
+// more general
+TEST_F(WhileTransformerTest, DISABLED_InvalidLoopIncrement) {
   // Build computation with invalid loop increment.
   auto condition =
       module_->AddEmbeddedComputation(BuildConditionComputation(0, 10));
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index 6f80994751..6d2a3aa5b5 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -144,8 +144,10 @@ class BufferValueMap {
   // Move the given value into the given buffer.
   void MoveValueToBuffer(const HloValue& value, BufferNumber buffer_number) {
     BufferNumber old_buffer_number = value_to_buffer_number_.at(&value);
-    buffers_.at(old_buffer_number).erase(&value);
-    if (buffers_.at(old_buffer_number).empty()) {
+    tensorflow::gtl::FlatSet<const HloValue*>& old_value_set =
+        buffers_.at(old_buffer_number);
+    old_value_set.erase(&value);
+    if (old_value_set.empty()) {
       buffers_.erase(old_buffer_number);
     }
 
@@ -175,7 +177,7 @@ class BufferValueMap {
     // Value is init of a while (use is while).
     std::vector<BufferNumber> aliased_buffers;
     for (const HloUse& use : value.uses()) {
-      VLOG(1) << "use of value " << value.ToShortString() << ": " << use;
+      VLOG(2) << "use of value " << value.ToShortString() << ": " << use;
       if (use.instruction->opcode() == HloOpcode::kWhile) {
         // Determine the while value that this shares a buffer with.
         const HloValue& while_value =
@@ -411,7 +413,7 @@ string HloAliasAnalysis::ToString() const {
 /* static */
 StatusOr<std::unique_ptr<HloAliasAnalysis>> HloAliasAnalysis::Run(
     HloModule* module) {
-  VLOG(1) << "HloAliasAnalysis::Run on module " << module->name();
+  VLOG(2) << "HloAliasAnalysis::Run on module " << module->name();
   XLA_VLOG_LINES(2, module->ToString());
 
   auto alias_analysis = WrapUnique(new HloAliasAnalysis(module));
@@ -444,7 +446,7 @@ StatusOr<std::unique_ptr<HloAliasAnalysis>> HloAliasAnalysis::Run(
 
   TF_DCHECK_OK(alias_analysis->Verify());
 
-  XLA_VLOG_LINES(1, alias_analysis->ToString());
+  XLA_VLOG_LINES(2, alias_analysis->ToString());
   return std::move(alias_analysis);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_dce.cc b/tensorflow/compiler/xla/service/hlo_dce.cc
index a4921232f5..40e67c8780 100644
--- a/tensorflow/compiler/xla/service/hlo_dce.cc
+++ b/tensorflow/compiler/xla/service/hlo_dce.cc
@@ -37,6 +37,9 @@ namespace xla {
 StatusOr<bool> HloDCE::Run(HloModule* module) {
   bool changed = false;
 
+  VLOG(2) << "Before dce:";
+  XLA_VLOG_LINES(2, module->ToString());
+
   for (auto* computation : module->MakeNonfusionComputations()) {
     std::unordered_set<HloInstruction*> live_instructions;
     TF_RETURN_IF_ERROR(computation->root_instruction()->Accept(
@@ -58,6 +61,8 @@ StatusOr<bool> HloDCE::Run(HloModule* module) {
     }
 
     for (HloInstruction* dead_root : dead_roots) {
+      VLOG(1) << "Removing dead root " << dead_root->ToString()
+              << " and it's unused operands";
       TF_RETURN_IF_ERROR(
           computation->RemoveInstructionAndUnusedOperands(dead_root));
       changed = true;
@@ -87,6 +92,9 @@ StatusOr<bool> HloDCE::Run(HloModule* module) {
     }
   }
 
+  VLOG(2) << "After dce:";
+  XLA_VLOG_LINES(2, module->ToString());
+
   return changed;
 }
 
diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc
index 4920f17a7e..5a012c93d6 100644
--- a/tensorflow/compiler/xla/tests/tuple_test.cc
+++ b/tensorflow/compiler/xla/tests/tuple_test.cc
@@ -180,7 +180,8 @@ XLA_TEST_F(TupleTest, TupleGTEToTuple) {
   ComputeAndCompareTuple(&builder, *expected, {}, error_spec_);
 }
 
-XLA_TEST_F(TupleTest, SelectBetweenPredTuples) {
+// TODO(b/68395210): GPU does not tolerate ambiguous top-level buffers.
+XLA_TEST_F(TupleTest, DISABLED_ON_GPU(SelectBetweenPredTuples)) {
   ComputationBuilder b(client_, TestName());
   ComputationDataHandle v1, v2;
 
-- 
GitLab


From 1a376869bdb5b18fcfb50ee2c392380d60f1896e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 09:14:17 -0800
Subject: [PATCH 0219/1225] dynamic_rnn now supports TensorArray in the cell
 state.

This enables using seq2seq.AttentionWrapper with alignment_history=True in dynamic_rnn.

Also fixed some minor documentation issues.

PiperOrigin-RevId: 176664362
---
 tensorflow/python/kernel_tests/BUILD       |  1 +
 tensorflow/python/kernel_tests/rnn_test.py | 56 +++++++++++++++++++++-
 tensorflow/python/ops/rnn.py               | 16 ++++---
 tensorflow/python/util/nest.py             |  2 +-
 4 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 7643cf2ddc..4522520ee4 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2338,6 +2338,7 @@ cuda_py_test(
         "//tensorflow/python:rnn_cell",
         "//tensorflow/python:sparse_grad",
         "//tensorflow/python:tensor_array_grad",
+        "//tensorflow/python:tensor_array_ops",
         "//tensorflow/python:variables",
         "//tensorflow/python/eager:context",
     ],
diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index d8f4b439e3..0c77d1db92 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -38,6 +38,7 @@ from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import rnn
 from tensorflow.python.ops import rnn_cell_impl
+from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import variables as variables_lib
 import tensorflow.python.ops.data_flow_grad  # pylint: disable=unused-import
 import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
@@ -57,7 +58,7 @@ class Plus1RNNCell(rnn_cell_impl.RNNCell):
   def state_size(self):
     return 5
 
-  def __call__(self, input_, state, scope=None):
+  def call(self, input_, state, scope=None):
     return (input_ + 1, state + 1)
 
 
@@ -75,10 +76,31 @@ class ScalarStateRNNCell(rnn_cell_impl.RNNCell):
   def zero_state(self, batch_size, dtype):
     return array_ops.zeros([], dtype=dtypes.int32)
 
-  def __call__(self, input_, state, scope=None):
+  def call(self, input_, state, scope=None):
     return (input_, state + 1)
 
 
+class TensorArrayStateRNNCell(rnn_cell_impl.RNNCell):
+  """RNN Cell its state as a TensorArray."""
+
+  @property
+  def output_size(self):
+    return 1
+
+  @property
+  def state_size(self):
+    return (tensor_shape.TensorShape([]), ())
+
+  def zero_state(self, batch_size, dtype):
+    return (array_ops.zeros([], dtype=dtypes.int32),
+            tensor_array_ops.TensorArray(
+                dtype=dtype, size=0, dynamic_size=True))
+
+  def call(self, input_, state, scope=None):
+    new_array = state[1].write(state[0], input_)
+    return (input_, (state[0] + 1, new_array))
+
+
 class RNNTest(test.TestCase):
 
   def setUp(self):
@@ -171,6 +193,36 @@ class RNNTest(test.TestCase):
       self.assertAllEqual(outputs.numpy(), np.array([[[1], [2], [3], [4]]]))
       self.assertEqual(state.numpy(), 4)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testTensorArrayStateIsAccepted(self):
+    cell = TensorArrayStateRNNCell()
+    in_graph_mode = context.in_graph_mode()
+
+    if in_graph_mode:
+      inputs = array_ops.placeholder(dtypes.float32, shape=(1, 4, 1))
+    else:
+      inputs = np.array([[[1], [2], [3], [4]]], dtype=np.float32)
+
+    with self.test_session() as sess:
+      outputs, state = rnn.dynamic_rnn(
+          cell, inputs, dtype=dtypes.float32, sequence_length=[4])
+      state = (state[0], state[1].stack())
+      if in_graph_mode:
+        outputs, state = sess.run(
+            [outputs, state], feed_dict={
+                inputs: [[[1], [2], [3], [4]]]
+            })
+
+    if in_graph_mode:
+      self.assertAllEqual(outputs, np.array([[[1], [2], [3], [4]]]))
+      self.assertEqual(state[0], 4)
+      self.assertAllEqual(state[1], np.array([[[1]], [[2]], [[3]], [[4]]]))
+    else:
+      self.assertAllEqual(outputs.numpy(), np.array([[[1], [2], [3], [4]]]))
+      self.assertEqual(state[0].numpy(), 4)
+      self.assertAllEqual(state[1].numpy(),
+                          np.array([[[1]], [[2]], [[3]], [[4]]]))
+
 
 ######### Benchmarking RNN code
 
diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index df66302402..436872f044 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -148,7 +148,7 @@ def _rnn_step(
     zero_output, state, call_cell, state_size, skip_conditionals=False):
   """Calculate one step of a dynamic RNN minibatch.
 
-  Returns an (output, state) pair conditioned on the sequence_lengths.
+  Returns an (output, state) pair conditioned on `sequence_length`.
   When skip_conditionals=False, the pseudocode is something like:
 
   if t >= max_sequence_length:
@@ -157,14 +157,14 @@ def _rnn_step(
     return call_cell()
 
   # Selectively output zeros or output, old state or new state depending
-  # on if we've finished calculating each row.
+  # on whether we've finished calculating each row.
   new_output, new_state = call_cell()
   final_output = np.vstack([
-    zero_output if time >= sequence_lengths[r] else new_output_r
+    zero_output if time >= sequence_length[r] else new_output_r
     for r, new_output_r in enumerate(new_output)
   ])
   final_state = np.vstack([
-    state[r] if time >= sequence_lengths[r] else new_state_r
+    state[r] if time >= sequence_length[r] else new_state_r
     for r, new_state_r in enumerate(new_state)
   ])
   return (final_output, final_state)
@@ -202,9 +202,12 @@ def _rnn_step(
   flat_zero_output = nest.flatten(zero_output)
 
   def _copy_one_through(output, new_output):
-    # If the state contains a scalar value we simply pass it through.
+    # TensorArray and scalar get passed through.
+    if isinstance(output, tensor_array_ops.TensorArray):
+      return new_output
     if output.shape.ndims == 0:
       return new_output
+    # Otherwise propagate the old or the new value.
     copy_cond = (time >= sequence_length)
     with ops.colocate_with(new_output):
       return array_ops.where(copy_cond, output, new_output)
@@ -264,7 +267,8 @@ def _rnn_step(
   for output, flat_output in zip(final_output, flat_zero_output):
     output.set_shape(flat_output.get_shape())
   for substate, flat_substate in zip(final_state, flat_state):
-    substate.set_shape(flat_substate.get_shape())
+    if not isinstance(substate, tensor_array_ops.TensorArray):
+      substate.set_shape(flat_substate.get_shape())
 
   final_output = nest.pack_sequence_as(
       structure=zero_output, flat_sequence=final_output)
diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index dd6acee3c7..75f482e5a8 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -293,7 +293,7 @@ def pack_sequence_as(structure, flat_sequence):
   If `structure` is or contains a dict instance, the keys will be sorted to
   pack the flat sequence in deterministic order. This is true also for
   `OrderedDict` instances: their sequence order is ignored, the sorting order of
-  keys is used instead. The same convention is followed in `pack_sequence_as`.
+  keys is used instead. The same convention is followed in `flatten`.
   This correctly repacks dicts and `OrderedDict`s after they have been
   flattened, and also allows flattening an `OrderedDict` and then repacking it
   back using a correponding plain dict, or vice-versa.
-- 
GitLab


From 93023ee2f88cfdc019b76f6d06c679354820d150 Mon Sep 17 00:00:00 2001
From: Yun Peng <pcloudy@google.com>
Date: Wed, 22 Nov 2017 18:46:50 +0100
Subject: [PATCH 0220/1225] Fix nccl.BUILD on Windows (#14790)

Bazel doesn't allow a random file name in `linkopts` attribute, so use `-DEFAULTLIB:` option to specify `ws2_32.lib`
---
 third_party/nccl.BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/nccl.BUILD b/third_party/nccl.BUILD
index 06b9b8ff68..8c7b9bdbe9 100644
--- a/third_party/nccl.BUILD
+++ b/third_party/nccl.BUILD
@@ -55,7 +55,7 @@ cc_library(
         ],
         "@%ws%//tensorflow:ios": [],
         "@%ws%//tensorflow:windows": [
-            "ws2_32.lib",
+            "-DEFAULTLIB:ws2_32.lib",
         ],
         "//conditions:default": [
             "-lrt",
-- 
GitLab


From 47cdf5b85df658da5a57d5eb6dd29145051ddcb4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 09:50:51 -0800
Subject: [PATCH 0221/1225] Internal change.

PiperOrigin-RevId: 176668209
---
 .../contrib/lite/toco/export_tensorflow.cc    |  4 +
 .../fuse_activation_functions.cc              |  3 +-
 .../propagate_fixed_sizes.cc                  | 74 +++++++++----------
 .../contrib/lite/toco/import_tensorflow.cc    | 22 ++++++
 tensorflow/contrib/lite/toco/model.h          |  4 +
 5 files changed, 66 insertions(+), 41 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc
index 16b9fa2260..625a4dd83c 100644
--- a/tensorflow/contrib/lite/toco/export_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc
@@ -1283,6 +1283,10 @@ void ConvertMeanOperator(const Model& model, const MeanOperator& src_op,
   const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]);
   (*new_op->mutable_attr())["T"].set_type(params_type);
 
+  if (src_op.keep_dims) {
+    (*new_op->mutable_attr())["keep_dims"].set_b(true);
+  }
+
   // Create the params tensor.
   auto* params_op = tensorflow_graph->add_node();
   params_op->set_op("Const");
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
index 7a86510025..d129b5ecf2 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
@@ -71,7 +71,8 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
   // TODO(dkalenichenko): Great many ops don't support activation function
   // fusing. Switch to the whilelist approach instead.
   if (op->type == OperatorType::kConcatenation ||
-      op->type == OperatorType::kSlice) {
+      op->type == OperatorType::kSlice ||
+      op->type == OperatorType::kTensorFlowSplit) {
     AddMessageF(
         "Not fusing activation function because the %s op doesn't support it",
         LogName(*op));
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index 82a43bc2ce..f6daad9020 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -48,10 +48,10 @@ void ComputeConvSizes(const Shape& input_shape, int output_depth, int kwidth,
     LOG(FATAL) << "Only supporting SAME or VALID padding";
   }
 
-  fixed_padding->height =
-      ((output_height - 1) * stride_height + kheight - input_height) / 2;
-  fixed_padding->width =
-      ((output_width - 1) * stride_width + kwidth - input_width) / 2;
+  fixed_padding->height = std::max(
+      0, ((output_height - 1) * stride_height + kheight - input_height) / 2);
+  fixed_padding->width = std::max(
+      0, ((output_width - 1) * stride_width + kwidth - input_width) / 2);
 
   // Actually had to debug a situation where those were negative due to bad
   // propagation of placeholder -1 sizes in TensorFlowReshape.
@@ -367,23 +367,40 @@ void ProcessSimpleBinaryOperator(Model* model, Operator* op) {
                                   &output_array);
 }
 
+bool KeepDims(const Operator& op) {
+  switch (op.type) {
+    case OperatorType::kTensorFlowMin:
+      return static_cast<const TensorFlowMinOperator&>(op).keep_dims;
+    case OperatorType::kTensorFlowMax:
+      return static_cast<const TensorFlowMaxOperator&>(op).keep_dims;
+    case OperatorType::kTensorFlowSum:
+      return static_cast<const TensorFlowSumOperator&>(op).keep_dims;
+    case OperatorType::kMean:
+      return static_cast<const MeanOperator&>(op).keep_dims;
+    default:
+      LOG(FATAL) << "Not a reduction operator!";
+      return false;
+  }
+}
+
 void ProcessTensorFlowReductionOperator(Model* model, Operator* op) {
   CHECK_LE(op->inputs.size(), 2);
   auto& output_array = *model->arrays[op->outputs[0]];
   if (output_array.has_shape()) {
     return;
   }
+  const auto& input_array = *model->arrays[op->inputs[0]];
+  if (!input_array.has_shape()) {
+    return;
+  }
+  const auto& input_shape = input_array.shape();
+  const bool keep_dims = KeepDims(*op);
   if (op->inputs.size() == 2) {
     // There is a reduction_indices input.
-    const auto& input_array = *model->arrays[op->inputs[0]];
     const auto& reduction_array = *model->arrays[op->inputs[1]];
     if (!reduction_array.buffer) {
       return;
     }
-    if (!input_array.has_shape()) {
-      return;
-    }
-    auto& input_shape = input_array.shape();
     CHECK(reduction_array.buffer->type == ArrayDataType::kInt32);
     const auto& reduction_array_vals =
         reduction_array.GetBuffer<ArrayDataType::kInt32>().data;
@@ -398,11 +415,17 @@ void ProcessTensorFlowReductionOperator(Model* model, Operator* op) {
       }
       if (!is_reduction_dim) {
         output_dims.push_back(input_shape.dims(i));
+      } else if (keep_dims) {
+        output_dims.push_back(1);
       }
     }
   } else {
     // No reduction_indices means complete reduction to a single scalar.
-    output_array.copy_shape(Shape({}));
+    if (keep_dims) {
+      output_array.copy_shape(input_shape);
+    } else {
+      output_array.copy_shape(Shape({}));
+    }
   }
 }
 
@@ -827,33 +850,6 @@ void ProcessPadOperator(Model* model, PadOperator* op) {
   output_array.copy_shape(output_shape);
 }
 
-void ProcessMeanOperator(Model* model, MeanOperator* op) {
-  CHECK_EQ(op->inputs.size(), 2);
-  CHECK_EQ(op->outputs.size(), 1);
-
-  const auto& input_array = *model->arrays[op->inputs[0]];
-
-  // Yield until input dims have been resolved.
-  if (!input_array.has_shape()) return;
-  const std::vector<int>& indices = op->reduction_indices;
-  if (indices.empty()) return;
-
-  auto& output_array = *model->arrays[op->outputs[0]];
-  if (output_array.has_shape()) return;
-
-  const std::vector<int>& input_dims = input_array.shape().dims();
-  std::vector<int> output_dims;
-  for (int i = 0; i < input_dims.size(); ++i) {
-    if (std::find(indices.begin(), indices.end(), i) == indices.end()) {
-      output_dims.push_back(input_dims[i]);
-    }
-  }
-  CHECK(!output_dims.empty());
-  CHECK_EQ(output_dims.size(), 2);
-
-  *output_array.mutable_shape()->mutable_dims() = output_dims;
-}
-
 void ProcessStridedSliceOperator(Model* model, StridedSliceOperator* op) {
   CHECK_EQ(op->inputs.size(), 4);
   CHECK_EQ(op->outputs.size(), 1);
@@ -1024,6 +1020,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
     case OperatorType::kTensorFlowMin:
     case OperatorType::kTensorFlowMax:
     case OperatorType::kTensorFlowSum:
+    case OperatorType::kMean:
       ProcessTensorFlowReductionOperator(model, op);
       break;
 
@@ -1098,9 +1095,6 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
     case OperatorType::kPad:
       ProcessPadOperator(model, static_cast<PadOperator*>(op));
       break;
-    case OperatorType::kMean:
-      ProcessMeanOperator(model, static_cast<MeanOperator*>(op));
-      break;
     case OperatorType::kStridedSlice:
       ProcessStridedSliceOperator(model,
                                   static_cast<StridedSliceOperator*>(op));
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index b00365d5de..f2dc526a36 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -588,6 +588,9 @@ void ConvertSumOperator(const NodeDef& node, Model* model) {
   op->inputs.push_back(node.input(1));
   op->outputs.push_back(node.name());
   model->operators.emplace_back(op);
+  if (HasAttr(node, "keep_dims")) {
+    op->keep_dims = GetBoolAttr(node, "keep_dims");
+  }
 }
 
 void ConvertTileOperator(const NodeDef& node, Model* model) {
@@ -697,6 +700,11 @@ void ConvertMaxPoolOperator(const NodeDef& node, Model* model) {
   CHECK_EQ(node.op(), "MaxPool");
   CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
   const auto& input_name = node.input(0);
+  // We only support NHWC, which is the default data_format.
+  // So if data_format is not defined, we're all good.
+  if (node.attr().count("data_format")) {
+    CHECK_EQ(GetStringAttr(node, "data_format"), "NHWC");
+  }
   if (HasAttr(node, "T")) {
     CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT);
   } else {
@@ -732,6 +740,11 @@ void ConvertAvgPoolOperator(const NodeDef& node, Model* model) {
   CHECK_EQ(node.op(), "AvgPool");
   CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
   const auto& input_name = node.input(0);
+  // We only support NHWC, which is the default data_format.
+  // So if data_format is not defined, we're all good.
+  if (node.attr().count("data_format")) {
+    CHECK_EQ(GetStringAttr(node, "data_format"), "NHWC");
+  }
   CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT);
   auto* avgpool = new AveragePoolOperator;
   avgpool->inputs.push_back(input_name);
@@ -902,6 +915,9 @@ void ConvertMaxOperator(const NodeDef& node, Model* model) {
   op->inputs.push_back(node.input(1));
   op->outputs.push_back(node.name());
   model->operators.emplace_back(op);
+  if (HasAttr(node, "keep_dims")) {
+    op->keep_dims = GetBoolAttr(node, "keep_dims");
+  }
 }
 
 void ConvertMinOperator(const NodeDef& node, Model* model) {
@@ -912,6 +928,9 @@ void ConvertMinOperator(const NodeDef& node, Model* model) {
   op->inputs.push_back(node.input(1));
   op->outputs.push_back(node.name());
   model->operators.emplace_back(op);
+  if (HasAttr(node, "keep_dims")) {
+    op->keep_dims = GetBoolAttr(node, "keep_dims");
+  }
 }
 
 void ConvertMaximumOperator(const NodeDef& node, Model* model) {
@@ -1222,6 +1241,9 @@ void ConvertMeanOperator(const NodeDef& node, Model* model) {
   op->inputs.push_back(node.input(1));
   op->outputs.push_back(node.name());
   model->operators.emplace_back(op);
+  if (HasAttr(node, "keep_dims")) {
+    op->keep_dims = GetBoolAttr(node, "keep_dims");
+  }
 }
 
 void ConvertSvdfOperator(const NodeDef& node, Model* model) {
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index d992f8458f..f2fce2b249 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -819,6 +819,7 @@ struct SubOperator : Operator {
 // of global reduction across all dimensions.
 struct TensorFlowSumOperator : Operator {
   TensorFlowSumOperator() : Operator(OperatorType::kTensorFlowSum) {}
+  bool keep_dims = false;
 };
 
 // TensorFlow Tile equivalent. Refer to TensorFlow documentation for details.
@@ -971,6 +972,7 @@ struct TensorFlowGreaterEqualOperator : Operator {
 // of global reduction across all dimensions.
 struct TensorFlowMaxOperator : Operator {
   TensorFlowMaxOperator() : Operator(OperatorType::kTensorFlowMax) {}
+  bool keep_dims = false;
 };
 
 // Global min reduction: computes the min of all of entries in the input array.
@@ -983,6 +985,7 @@ struct TensorFlowMaxOperator : Operator {
 // of global reduction across all dimensions.
 struct TensorFlowMinOperator : Operator {
   TensorFlowMinOperator() : Operator(OperatorType::kTensorFlowMin) {}
+  bool keep_dims = false;
 };
 
 // Element-wise maximum operator. Currently it only supports scalar as
@@ -1121,6 +1124,7 @@ struct MeanOperator : Operator {
   MeanOperator() : Operator(OperatorType::kMean) {}
 
   std::vector<int> reduction_indices;
+  bool keep_dims = false;
 };
 
 // Svdf operator:
-- 
GitLab


From e6840e82c3082dd367f56aee4043ccfd342abce5 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 22 Nov 2017 09:54:19 -0800
Subject: [PATCH 0222/1225] Let python handle the deletion of the item objects
 instead of trying to take care of this ourselves Removed a debug printf

PiperOrigin-RevId: 176668566
---
 tensorflow/python/grappler/cluster.py | 1 -
 tensorflow/python/grappler/item.i     | 7 ++-----
 tensorflow/python/grappler/item.py    | 4 ----
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py
index 9864e86811..496f5255b9 100644
--- a/tensorflow/python/grappler/cluster.py
+++ b/tensorflow/python/grappler/cluster.py
@@ -72,7 +72,6 @@ class Cluster(object):
       devices = []
       for raw_dev in ret_from_swig:
         devices.append(device_properties_pb2.NamedDevice.FromString(raw_dev))
-    print(str(devices))
     return devices
 
   def MeasureCosts(self, item):
diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i
index 632f614558..8c346b4438 100644
--- a/tensorflow/python/grappler/item.i
+++ b/tensorflow/python/grappler/item.i
@@ -30,6 +30,8 @@ limitations under the License.
   $1 = &temp;
 }
 
+%newobject TF_NewItem;
+
 %{
 #include <unordered_set>
 #include <map>
@@ -66,10 +68,6 @@ static tensorflow::grappler::GrapplerItem* TF_NewItem(
   return item.release();
 }
 
-static void TF_DeleteItem(tensorflow::grappler::GrapplerItem* item) {
-  delete item;
-}
-
 static std::vector<string> TF_IdentifyImportantOps(const tensorflow::grappler::GrapplerItem* item) {
   if (!item) {
     return {};
@@ -129,6 +127,5 @@ static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* it
 static tensorflow::grappler::GrapplerItem* TF_NewItem(
     const tensorflow::MetaGraphDef& meta_graph, bool ignore_colocation,
     bool ignore_user_placement, TF_Status* out_status);
-static void TF_DeleteItem(tensorflow::grappler::GrapplerItem* item);
 static std::vector<string> TF_IdentifyImportantOps(const tensorflow::grappler::GrapplerItem* item);
 static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* item);
diff --git a/tensorflow/python/grappler/item.py b/tensorflow/python/grappler/item.py
index cfbe014de5..4fc94ec968 100644
--- a/tensorflow/python/grappler/item.py
+++ b/tensorflow/python/grappler/item.py
@@ -50,10 +50,6 @@ class Item(object):
     self._tf_item = None
     self._BuildTFItem()
 
-  def __del__(self):
-    if self._tf_item:
-      tf_item.TF_DeleteItem(self._tf_item)
-
   def IdentifyImportantOps(self):
     return tf_item.TF_IdentifyImportantOps(self.tf_item)
 
-- 
GitLab


From b579996aed210f415767a7ffaed55c6828ddf07b Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 22 Nov 2017 10:06:28 -0800
Subject: [PATCH 0223/1225] Don't infer graph properties unless they'll be
 used.

PiperOrigin-RevId: 176670211
---
 .../core/grappler/optimizers/arithmetic_optimizer.cc   | 10 +++++++---
 .../core/grappler/optimizers/constant_folding.cc       |  9 +++++++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 33eac79c01..6c1770f0b0 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -1055,9 +1055,13 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/,
                                      GraphDef* optimized_graph) {
   *optimized_graph = item.graph;
   nodes_to_preserve_ = item.NodesToPreserve();
-  GraphProperties graph_properties(item);
-  TF_RETURN_IF_ERROR(graph_properties.InferStatically());
-  TF_RETURN_IF_ERROR(graph_properties.AnnotateOutputShapes(optimized_graph));
+
+  if (opt_level_ == RewriterConfig::AGGRESSIVE) {
+    // Shapes are only needed in aggressive mode.
+    GraphProperties graph_properties(item);
+    TF_RETURN_IF_ERROR(graph_properties.InferStatically());
+    TF_RETURN_IF_ERROR(graph_properties.AnnotateOutputShapes(optimized_graph));
+  }
 
   DedupComputations(optimized_graph);
   TF_RETURN_IF_ERROR(SimplifyArithmeticOps(optimized_graph));
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 8ae0d57068..c77b2badf4 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1158,8 +1158,13 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
   }
 
   GraphProperties properties(item);
-  Status s = properties.InferStatically();
-  bool has_feed = !item.feed.empty();
+  const bool has_feed = !item.feed.empty();
+  bool needs_shapes = !has_feed || opt_level_ == RewriterConfig::AGGRESSIVE;
+  Status s = errors::Unknown(
+      "The graph properties are needed but were not initialized");
+  if (needs_shapes) {
+    s = properties.InferStatically();
+  }
 
   if (!has_feed && s.ok()) {
     // Only use static shape information when there is no feed in the
-- 
GitLab


From 3322d51add117acbf6df872c51a086ab1a5feb1d Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Wed, 22 Nov 2017 10:29:01 -0800
Subject: [PATCH 0224/1225] Add local client methods for infeed and outfeed.

PiperOrigin-RevId: 176673166
---
 .../compiler/xla/client/local_client.cc       | 23 +++++++++++++++----
 tensorflow/compiler/xla/client/local_client.h | 14 +++++++++++
 .../xla/tests/local_client_execute_test.cc    |  8 ++++---
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc
index c3c664f76a..b051955f0f 100644
--- a/tensorflow/compiler/xla/client/local_client.cc
+++ b/tensorflow/compiler/xla/client/local_client.cc
@@ -275,9 +275,6 @@ StatusOr<std::unique_ptr<LocalExecutable>> LocalClient::Compile(
                                         device_ordinal, options));
 }
 
-// Copy the literal data to the device with the given ordinal and return as a
-// ScopedShapedBuffer. The given memory allocator is used for device memory
-// allocation.
 StatusOr<std::unique_ptr<ScopedShapedBuffer>>
 LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal,
                                    DeviceMemoryAllocator* allocator) {
@@ -298,8 +295,6 @@ LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal,
   return std::move(scoped_buffer);
 }
 
-// Copy the data from the device contained in the given ShapedBuffer and
-// return as a Literal.
 StatusOr<std::unique_ptr<Literal>> LocalClient::ShapedBufferToLiteral(
     const ShapedBuffer& shaped_buffer) {
   TF_ASSIGN_OR_RETURN(
@@ -309,4 +304,22 @@ StatusOr<std::unique_ptr<Literal>> LocalClient::ShapedBufferToLiteral(
                                                                  shaped_buffer);
 }
 
+Status LocalClient::TransferToInfeedLocal(const Literal& literal,
+                                          int device_ordinal) {
+  TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
+                      backend().stream_executor(device_ordinal));
+  return backend().transfer_manager()->TransferLiteralToInfeed(executor,
+                                                               literal);
+}
+
+StatusOr<std::unique_ptr<Literal>> LocalClient::TransferFromOutfeedLocal(
+    const Shape& shape, int device_ordinal) {
+  TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
+                      backend().stream_executor(device_ordinal));
+  auto literal = MakeUnique<Literal>();
+  TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralFromOutfeed(
+      executor, shape, literal.get()));
+  return std::move(literal);
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h
index 32fe0d9f84..3ca0d2ef55 100644
--- a/tensorflow/compiler/xla/client/local_client.h
+++ b/tensorflow/compiler/xla/client/local_client.h
@@ -162,6 +162,20 @@ class LocalClient : public Client {
   StatusOr<std::unique_ptr<Literal>> ShapedBufferToLiteral(
       const ShapedBuffer& shaped_buffer);
 
+  // Transfer the given literal to the infeed queue of the given device.
+  // TODO(b/69670845): Remove the 'Local' from the name when LocalClient does
+  // not inherit from Client and there is no possibility of confusion with
+  // Client::TransferToInfeed.
+  Status TransferToInfeedLocal(const Literal& literal, int device_ordinal);
+
+  // Transfer and return a value of the given shape from the outfeed of the
+  // given device.
+  // TODO(b/69670845): Remove the 'Local' from the name when LocalClient does
+  // not inherit from Client and there is no possibility of confusion with
+  // Client::TransferFromOutfeed.
+  StatusOr<std::unique_ptr<Literal>> TransferFromOutfeedLocal(
+      const Shape& shape, int device_ordinal);
+
   // Returns the platform that the underlying service targets.
   perftools::gputools::Platform* platform() const;
 
diff --git a/tensorflow/compiler/xla/tests/local_client_execute_test.cc b/tensorflow/compiler/xla/tests/local_client_execute_test.cc
index fbf9739dbc..ad71d40197 100644
--- a/tensorflow/compiler/xla/tests/local_client_execute_test.cc
+++ b/tensorflow/compiler/xla/tests/local_client_execute_test.cc
@@ -874,11 +874,13 @@ XLA_TEST_F(LocalClientExecuteTest,
           tensorflow::ThreadOptions(), "execute_thread",
           [&] { ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {}); }));
 
-  ASSERT_IS_OK(local_client_->TransferToInfeed(
-      *Literal::CreateR1<float>({-5.0, 123.0, 42.0})));
+  ASSERT_IS_OK(local_client_->TransferToInfeedLocal(
+      *Literal::CreateR1<float>({-5.0, 123.0, 42.0}),
+      local_client_->default_device_ordinal()));
 
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Literal> result,
-                          local_client_->TransferFromOutfeed(&shape));
+                          local_client_->TransferFromOutfeedLocal(
+                              shape, local_client_->default_device_ordinal()));
 
   LiteralTestUtil::ExpectR1Equal<float>({-4.0, 125.0, 45.0}, *result);
 }
-- 
GitLab


From b8f4d5b410676659da25355e1e76ec6f70522302 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 22 Nov 2017 10:29:08 -0800
Subject: [PATCH 0225/1225] [tf.data] Remove unused headers from dataset.h.

This change is part of a clean-up that will reduce the dependencies of
the C++ `tensorflow::Dataset` framework, and move towards the
possibility of building custom datasets as external plugins.

PiperOrigin-RevId: 176673196
---
 tensorflow/core/kernels/dataset.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h
index 18b57ec97a..39c10163cf 100644
--- a/tensorflow/core/kernels/dataset.h
+++ b/tensorflow/core/kernels/dataset.h
@@ -17,22 +17,17 @@ limitations under the License.
 
 #include <memory>
 
-#include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/framework/variant_encode_decode.h"
 #include "tensorflow/core/framework/variant_tensor_data.h"
-#include "tensorflow/core/graph/graph.h"
-#include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/tracing.h"
-#include "tensorflow/core/util/tensor_bundle/naming.h"
-#include "tensorflow/core/util/tensor_bundle/tensor_bundle.h"
 
 // Polymorphic datasets should support all primitive TensorFlow
 // types. Use this macro to expand `m(T)` once for each primitive type
-- 
GitLab


From 86f150908d9f8411159044f964f21faf30244183 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 10:51:01 -0800
Subject: [PATCH 0226/1225] Changed StringPiece::Hasher to StringPieceHasher in
 stringpiece_test. This will allow the Hasher alias to be removed from
 StringPiece.

PiperOrigin-RevId: 176676125
---
 tensorflow/core/lib/core/stringpiece_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc
index a1d335c4e1..8f17b85b6d 100644
--- a/tensorflow/core/lib/core/stringpiece_test.cc
+++ b/tensorflow/core/lib/core/stringpiece_test.cc
@@ -66,7 +66,7 @@ TEST(StringPiece, Contains) {
 }
 
 TEST(StringPieceHasher, Equality) {
-  StringPiece::Hasher hasher;
+  StringPieceHasher hasher;
 
   StringPiece s1("foo");
   StringPiece s2("bar");
-- 
GitLab


From cd8ced7a2d48574908d2c9b7127960078cf41690 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 11:23:50 -0800
Subject: [PATCH 0227/1225] Enable deduping of Assert nodes. Add unit test for
 deduping Assert and CheckNumerics.

PiperOrigin-RevId: 176680534
---
 tensorflow/core/grappler/op_types.cc          | 125 +++++-------------
 tensorflow/core/grappler/op_types.h           |   1 +
 .../optimizers/arithmetic_optimizer.cc        |   4 +
 .../optimizers/arithmetic_optimizer_test.cc   |  32 +++++
 4 files changed, 67 insertions(+), 95 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 1b23a4caba..1f18b56238 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -24,64 +24,40 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-bool IsAdd(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Add";
-}
+bool IsAdd(const NodeDef& node) { return node.op() == "Add"; }
 
-bool IsAddN(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "AddN";
-}
+bool IsAddN(const NodeDef& node) { return node.op() == "AddN"; }
 
-bool IsAvgPoolGrad(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "AvgPoolGrad";
-}
+bool IsAvgPoolGrad(const NodeDef& node) { return node.op() == "AvgPoolGrad"; }
 
-bool IsBiasAddGrad(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "BiasAddGrad";
-}
+bool IsAssert(const NodeDef& node) { return node.op() == "Assert"; }
 
-bool IsConcatOffset(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "ConcatOffset";
-}
+bool IsBiasAddGrad(const NodeDef& node) { return node.op() == "BiasAddGrad"; }
 
-bool IsConstant(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Const";
-}
+bool IsConcatOffset(const NodeDef& node) { return node.op() == "ConcatOffset"; }
 
-bool IsConv2D(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Conv2D";
-}
+bool IsConstant(const NodeDef& node) { return node.op() == "Const"; }
+
+bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; }
 
 bool IsConv2DBackpropFilter(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Conv2DBackpropFilter";
+  return node.op() == "Conv2DBackpropFilter";
 }
 
 bool IsConv2DBackpropInput(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Conv2DBackpropInput";
+  return node.op() == "Conv2DBackpropInput";
 }
 
 bool IsDepthwiseConv2dNative(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "DepthwiseConv2dNative";
+  return node.op() == "DepthwiseConv2dNative";
 }
 
 bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "DepthwiseConv2dNativeBackpropFilter";
+  return node.op() == "DepthwiseConv2dNativeBackpropFilter";
 }
 
 bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "DepthwiseConv2dNativeBackpropInput";
+  return node.op() == "DepthwiseConv2dNativeBackpropInput";
 }
 
 bool IsDequeueOp(const NodeDef& node) {
@@ -101,14 +77,10 @@ bool IsExit(const NodeDef& node) {
   return op == "Exit" || op == "RefExit";
 }
 
-bool IsFloorMod(const NodeDef& node) {
-  const auto& op = node.op();
-  return op == "FloorMod";
-}
+bool IsFloorMod(const NodeDef& node) { return node.op() == "FloorMod"; }
 
 bool IsFusedBatchNormGradV1(const NodeDef& node) {
-  const auto& op = node.op();
-  return op == "FusedBatchNormGrad";
+  return node.op() == "FusedBatchNormGrad";
 }
 
 bool IsIdentity(const NodeDef& node) {
@@ -121,25 +93,16 @@ bool IsMerge(const NodeDef& node) {
   return op == "Merge" || op == "RefMerge";
 }
 
-bool IsMul(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Mul";
-}
+bool IsMul(const NodeDef& node) { return node.op() == "Mul"; }
 
-bool IsNoOp(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "NoOp";
-}
+bool IsNoOp(const NodeDef& node) { return node.op() == "NoOp"; }
 
 bool IsNextIteration(const NodeDef& node) {
   const auto& op = node.op();
   return op == "NextIteration" || op == "RefNextIteration";
 }
 
-bool IsPad(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Pad";
-}
+bool IsPad(const NodeDef& node) { return node.op() == "Pad"; }
 
 bool IsPlaceholder(const NodeDef& node) {
   const auto op = node.op();
@@ -147,20 +110,11 @@ bool IsPlaceholder(const NodeDef& node) {
          op == "PlaceholderWithDefault";
 }
 
-bool IsRealDiv(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "RealDiv";
-}
+bool IsRealDiv(const NodeDef& node) { return node.op() == "RealDiv"; }
 
-bool IsReluGrad(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "ReluGrad";
-}
+bool IsReluGrad(const NodeDef& node) { return node.op() == "ReluGrad"; }
 
-bool IsRecv(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "_Recv";
-}
+bool IsRecv(const NodeDef& node) { return node.op() == "_Recv"; }
 
 bool IsReduction(const NodeDef& node) {
   const auto& op = node.op();
@@ -175,53 +129,34 @@ bool IsRestore(const NodeDef& node) {
           node.op() == "RestoreSlice");
 }
 
-bool IsSend(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "_Send";
-}
+bool IsSend(const NodeDef& node) { return node.op() == "_Send"; }
 
-bool IsSlice(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Slice";
-}
+bool IsSlice(const NodeDef& node) { return node.op() == "Slice"; }
 
 bool IsSquaredDifference(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "SquaredDifference";
+  return node.op() == "SquaredDifference";
 }
 
-bool IsSqueeze(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Squeeze";
-}
+bool IsSqueeze(const NodeDef& node) { return node.op() == "Squeeze"; }
 
 bool IsStopGradient(const NodeDef& node) {
   const auto& op = node.op();
   return op == "StopGradient" || op == "PreventGradient";
 }
 
-bool IsSub(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Sub";
-}
+bool IsSub(const NodeDef& node) { return node.op() == "Sub"; }
 
-bool IsSum(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Sum";
-}
+bool IsSum(const NodeDef& node) { return node.op() == "Sum"; }
 
 bool IsSwitch(const NodeDef& node) {
   const auto& op = node.op();
   return op == "Switch" || op == "RefSwitch";
 }
 
-bool IsTranspose(const NodeDef& node) {
-  const auto op = node.op();
-  return op == "Transpose";
-}
+bool IsTranspose(const NodeDef& node) { return node.op() == "Transpose"; }
 
 bool IsVariable(const NodeDef& node) {
-  const auto op = node.op();
+  const auto& op = node.op();
   return op == "Variable" || op == "VariableV2" || op == "AutoReloadVariable" ||
          op == "VarHandleOp" || op == "ReadVariableOp";
 }
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 85260efa93..66ff7a88c5 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -25,6 +25,7 @@ namespace grappler {
 bool IsAdd(const NodeDef& node);
 bool IsAddN(const NodeDef& node);
 bool IsAvgPoolGrad(const NodeDef& node);
+bool IsAssert(const NodeDef& node);
 bool IsBiasAddGrad(const NodeDef& node);
 bool IsConcatOffset(const NodeDef& node);
 bool IsConstant(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 6c1770f0b0..d9a544d21f 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -449,6 +449,10 @@ bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const {
   if (node.device().find("SPU") != string::npos) {
     return false;
   }
+  // Workaround for Assert mistakenly being labeled as stateful.
+  if (IsAssert(node)) {
+    return true;
+  }
   return IsFreeOfSideEffect(node);
 }
 
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index 354a306905..8cec4e4255 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -81,6 +81,38 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) {
   EXPECT_EQ("c1", new_mul.input(1));
 }
 
+TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output p = ops::Placeholder(s, DT_BOOL, ops::Placeholder::Shape({}));
+  Output c = ops::Const(s.WithOpName("c"), {3.14, 2.7}, {1, 2});
+  auto check1 = ops::CheckNumerics(s.WithOpName("check1"), c, "foo");
+  auto check2 = ops::CheckNumerics(s.WithOpName("check2"), c, "foo");
+  auto assert1 = ops::Assert(s.WithOpName("assert1"), p, {c});
+  auto assert2 = ops::Assert(s.WithOpName("assert2"), p, {c});
+  Output mul = ops::Multiply(s.WithOpName("mul").WithControlDependencies(
+                                 {assert1.operation, assert2.operation}),
+                             check1, check2);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  ArithmeticOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  // Run the optimizer twice to make sure the rewrite is idempotent.
+  item.graph.Swap(&output);
+  status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  EXPECT_EQ(5, output.node_size());
+  const NodeDef& new_mul = output.node(3);
+  EXPECT_EQ(4, new_mul.input_size());
+  EXPECT_EQ("check1", new_mul.input(0));
+  EXPECT_EQ("check1", new_mul.input(1));
+  EXPECT_EQ("^assert1", new_mul.input(2));
+  EXPECT_EQ("^assert1", new_mul.input(3));
+}
+
 TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output c1 = ops::Const(s.WithOpName("c1"), {1.0f, 2.0f}, {1, 2});
-- 
GitLab


From b8406da50df94dc17114c10d472a2058ff75b2d2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 11:25:32 -0800
Subject: [PATCH 0228/1225] Make drop_control_dependency a TocoFlag, not a
 ModelFlag.

PiperOrigin-RevId: 176680726
---
 tensorflow/contrib/lite/python/lite.py        |   2 +-
 tensorflow/contrib/lite/toco/args.h           |   2 +-
 .../contrib/lite/toco/import_tensorflow.cc    | 508 +++++++++++-------
 .../contrib/lite/toco/import_tensorflow.h     |  12 +-
 .../contrib/lite/toco/model_cmdline_flags.cc  |   8 -
 .../contrib/lite/toco/model_flags.proto       |   4 -
 .../contrib/lite/toco/toco_cmdline_flags.cc   |   8 +
 tensorflow/contrib/lite/toco/toco_flags.proto |  10 +-
 tensorflow/contrib/lite/toco/toco_tooling.cc  |  11 +-
 tensorflow/contrib/lite/toco/tooling_util.cc  |   1 -
 10 files changed, 350 insertions(+), 216 deletions(-)

diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py
index 3cfee443e5..0fd70f842b 100644
--- a/tensorflow/contrib/lite/python/lite.py
+++ b/tensorflow/contrib/lite/python/lite.py
@@ -164,8 +164,8 @@ def toco_convert(input_data,
   toco = _toco_flags_pb2.TocoFlags()
   toco.input_format = input_format
   toco.output_format = output_format
+  toco.drop_control_dependency = drop_control_dependency
   model = _model_flags_pb2.ModelFlags()
-  model.drop_control_dependency = drop_control_dependency
   toco.inference_type = inference_type
   for idx, input_tensor in enumerate(input_tensors):
     if input_tensor.dtype == _dtypes.float32:
diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h
index 88e0a29350..5268902346 100644
--- a/tensorflow/contrib/lite/toco/args.h
+++ b/tensorflow/contrib/lite/toco/args.h
@@ -194,7 +194,6 @@ struct ParsedModelFlags {
   Arg<string> input_data_type;
   Arg<string> input_data_types;
   Arg<bool> variable_batch = Arg<bool>(false);
-  Arg<bool> drop_control_dependency = Arg<bool>(false);
   Arg<toco::IntList> input_shape;
   Arg<toco::StringMapList> rnn_states;
   Arg<toco::StringMapList> model_checks;
@@ -224,6 +223,7 @@ struct ParsedTocoFlags {
   // Deprecated flags
   Arg<string> input_type;
   Arg<string> input_types;
+  Arg<bool> drop_control_dependency = Arg<bool>(false);
 };
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index f2dc526a36..1f959600f3 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/contrib/lite/toco/import_tensorflow.h"
+
 #include <memory>
 #include <string>
 #include <utility>
@@ -23,6 +25,7 @@ limitations under the License.
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
+//#include "absl/strings/string_view_utils.h"
 #include "absl/strings/strip.h"
 #include "tensorflow/contrib/lite/toco/model.h"
 #include "tensorflow/contrib/lite/toco/model_flags.pb.h"
@@ -234,14 +237,14 @@ void ImportInt64Array(const TensorProto& input_tensor, Array* output_array) {
   }
 }
 
-// Count the number of inputs of a given node. If `drop_control_dependency` is
-// true, count the number of non-control-dependency inputs.
-size_t GetInputsCount(const NodeDef& node, bool drop_control_dependency) {
-  if (drop_control_dependency) {
+// Count the number of inputs of a given node. If
+// `tf_import_flags.drop_control_dependency` is true, count the number of
+// non-control-dependency inputs.
+int GetInputsCount(const NodeDef& node,
+                   const TensorFlowImportFlags& tf_import_flags) {
+  if (tf_import_flags.drop_control_dependency) {
     for (size_t i = 0; i < node.input_size(); ++i) {
       if (node.input(i)[0] == '^') {
-        LOG(INFO) << "Reached first control dependency input: "
-                  << node.input(i);
         return i;
       }
     }
@@ -251,7 +254,9 @@ size_t GetInputsCount(const NodeDef& node, bool drop_control_dependency) {
   }
 }
 
-void ConvertConstOperator(const NodeDef& node, Model* model) {
+void ConvertConstOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
   CHECK_EQ(node.op(), "Const");
   const auto& tensor = GetTensorAttr(node, "value");
   const auto dtype = GetDataTypeAttr(node, "dtype");
@@ -277,9 +282,11 @@ void ConvertConstOperator(const NodeDef& node, Model* model) {
   }
 }
 
-void ConvertConvOperator(const NodeDef& node, Model* model) {
+void ConvertConvOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
   CHECK_EQ(node.op(), "Conv2D");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
 
   // We only support NHWC, which is the default data_format.
   // So if data_format is not defined, we're all good.
@@ -328,9 +335,11 @@ void ConvertConvOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(conv);
 }
 
-void ConvertDepthwiseConvOperator(const NodeDef& node, Model* model) {
+void ConvertDepthwiseConvOperator(const NodeDef& node,
+                                  const TensorFlowImportFlags& tf_import_flags,
+                                  Model* model) {
   CHECK_EQ(node.op(), "DepthwiseConv2dNative");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
 
   // We only support NHWC, which is the default data_format.
   // So if data_format is not defined, we're all good.
@@ -379,9 +388,11 @@ void ConvertDepthwiseConvOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(conv);
 }
 
-void ConvertDepthToSpaceOperator(const NodeDef& node, Model* model) {
+void ConvertDepthToSpaceOperator(const NodeDef& node,
+                                 const TensorFlowImportFlags& tf_import_flags,
+                                 Model* model) {
   CHECK_EQ(node.op(), "DepthToSpace");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT);
   auto* op = new DepthToSpaceOperator;
   op->inputs.push_back(node.input(0));
@@ -391,9 +402,11 @@ void ConvertDepthToSpaceOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertSpaceToDepthOperator(const NodeDef& node, Model* model) {
+void ConvertSpaceToDepthOperator(const NodeDef& node,
+                                 const TensorFlowImportFlags& tf_import_flags,
+                                 Model* model) {
   CHECK_EQ(node.op(), "SpaceToDepth");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT);
   auto* op = new SpaceToDepthOperator;
   op->inputs.push_back(node.input(0));
@@ -403,9 +416,11 @@ void ConvertSpaceToDepthOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertBiasAddOperator(const NodeDef& node, Model* model) {
+void ConvertBiasAddOperator(const NodeDef& node,
+                            const TensorFlowImportFlags& tf_import_flags,
+                            Model* model) {
   CHECK_EQ(node.op(), "BiasAdd");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   const auto& input_name = node.input(0);
   const auto& bias_name = node.input(1);
   CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT);
@@ -416,9 +431,11 @@ void ConvertBiasAddOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(biasadd);
 }
 
-void ConvertReluOperator(const NodeDef& node, Model* model) {
+void ConvertReluOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
   CHECK_EQ(node.op(), "Relu");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto& input_name = node.input(0);
   auto* relu = new ReluOperator;
   relu->inputs.push_back(input_name);
@@ -426,9 +443,11 @@ void ConvertReluOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(relu);
 }
 
-void ConvertRelu6Operator(const NodeDef& node, Model* model) {
+void ConvertRelu6Operator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
   CHECK_EQ(node.op(), "Relu6");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto& input_name = node.input(0);
   auto* op = new Relu6Operator;
   op->inputs.push_back(input_name);
@@ -436,9 +455,11 @@ void ConvertRelu6Operator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertLogisticOperator(const NodeDef& node, Model* model) {
+void ConvertLogisticOperator(const NodeDef& node,
+                             const TensorFlowImportFlags& tf_import_flags,
+                             Model* model) {
   CHECK_EQ(node.op(), "Sigmoid");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto& input_name = node.input(0);
   auto* op = new LogisticOperator;
   op->inputs.push_back(input_name);
@@ -446,9 +467,11 @@ void ConvertLogisticOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertTanhOperator(const NodeDef& node, Model* model) {
+void ConvertTanhOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
   CHECK_EQ(node.op(), "Tanh");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto& input_name = node.input(0);
   auto* op = new TanhOperator;
   op->inputs.push_back(input_name);
@@ -456,9 +479,11 @@ void ConvertTanhOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertDivOperator(const NodeDef& node, Model* model) {
+void ConvertDivOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
   CHECK(node.op() == "Div" || node.op() == "RealDiv");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new DivOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -466,7 +491,9 @@ void ConvertDivOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertIdentityOperator(const NodeDef& node, Model* model) {
+void ConvertIdentityOperator(const NodeDef& node,
+                             const TensorFlowImportFlags& tf_import_flags,
+                             Model* model) {
   CHECK(node.op() == "Identity" || node.op() == "CheckNumerics" ||
         node.op() == "PlaceholderWithDefault");
   auto* op = new TensorFlowIdentityOperator;
@@ -482,9 +509,11 @@ void ConvertIdentityOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertFakeQuantWithMinMaxArgs(const NodeDef& node, Model* model) {
+void ConvertFakeQuantWithMinMaxArgs(
+    const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
+    Model* model) {
   CHECK_EQ(node.op(), "FakeQuantWithMinMaxArgs");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   auto* op = new FakeQuantOperator;
   op->inputs.push_back(node.input(0));
   op->minmax.reset(new MinMax);
@@ -495,10 +524,11 @@ void ConvertFakeQuantWithMinMaxArgs(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertFakeQuantWithMinMaxVars(const NodeDef& node, Model* model) {
+void ConvertFakeQuantWithMinMaxVars(
+    const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
+    Model* model) {
   CHECK_EQ(node.op(), "FakeQuantWithMinMaxVars");
-  const int num_inputs =
-      GetInputsCount(node, model->flags.drop_control_dependency());
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
   CHECK(num_inputs == 3 || num_inputs == 4);
   auto* op = new FakeQuantOperator;
   for (int i = 0; i < 3; i++) {
@@ -508,27 +538,33 @@ void ConvertFakeQuantWithMinMaxVars(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertRsqrtOperator(const NodeDef& node, Model* model) {
+void ConvertRsqrtOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
   CHECK_EQ(node.op(), "Rsqrt");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   auto* op = new TensorFlowRsqrtOperator;
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
   model->operators.emplace_back(op);
 }
 
-void ConvertSqrtOperator(const NodeDef& node, Model* model) {
+void ConvertSqrtOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
   CHECK_EQ(node.op(), "Sqrt");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   auto* op = new TensorFlowSqrtOperator;
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
   model->operators.emplace_back(op);
 }
 
-void ConvertSqueezeOperator(const NodeDef& node, Model* model) {
+void ConvertSqueezeOperator(const NodeDef& node,
+                            const TensorFlowImportFlags& tf_import_flags,
+                            Model* model) {
   CHECK_EQ(node.op(), "Squeeze");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   auto* op = new SqueezeOperator;
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
@@ -541,18 +577,22 @@ void ConvertSqueezeOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertSquareOperator(const NodeDef& node, Model* model) {
+void ConvertSquareOperator(const NodeDef& node,
+                           const TensorFlowImportFlags& tf_import_flags,
+                           Model* model) {
   CHECK_EQ(node.op(), "Square");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   auto* op = new TensorFlowSquareOperator;
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
   model->operators.emplace_back(op);
 }
 
-void ConvertAddOperator(const NodeDef& node, Model* model) {
+void ConvertAddOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
   CHECK_EQ(node.op(), "Add");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new AddOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -560,9 +600,11 @@ void ConvertAddOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertMulOperator(const NodeDef& node, Model* model) {
+void ConvertMulOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
   CHECK_EQ(node.op(), "Mul");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new MulOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -570,9 +612,11 @@ void ConvertMulOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertSubOperator(const NodeDef& node, Model* model) {
+void ConvertSubOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
   CHECK_EQ(node.op(), "Sub");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new SubOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -580,9 +624,11 @@ void ConvertSubOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertSumOperator(const NodeDef& node, Model* model) {
+void ConvertSumOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
   CHECK_EQ(node.op(), "Sum");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new TensorFlowSumOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -593,9 +639,11 @@ void ConvertSumOperator(const NodeDef& node, Model* model) {
   }
 }
 
-void ConvertTileOperator(const NodeDef& node, Model* model) {
+void ConvertTileOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
   CHECK_EQ(node.op(), "Tile");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new TensorFlowTileOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -603,9 +651,11 @@ void ConvertTileOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertSliceOperator(const NodeDef& node, Model* model) {
+void ConvertSliceOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
   CHECK_EQ(node.op(), "Slice");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 3);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 3);
   auto* op = new SliceOperator;
   for (int i = 0; i < 3; ++i) {
     op->inputs.push_back(node.input(i));
@@ -614,9 +664,11 @@ void ConvertSliceOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertPadOperator(const NodeDef& node, Model* model) {
+void ConvertPadOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
   CHECK_EQ(node.op(), "Pad");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new PadOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -624,18 +676,22 @@ void ConvertPadOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertShapeOperator(const NodeDef& node, Model* model) {
+void ConvertShapeOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
   CHECK_EQ(node.op(), "Shape");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   auto* op = new TensorFlowShapeOperator;
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
   model->operators.emplace_back(op);
 }
 
-void ConvertSplitOperator(const NodeDef& node, Model* model) {
+void ConvertSplitOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
   CHECK_EQ(node.op(), "Split");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new TensorFlowSplitOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -648,9 +704,11 @@ void ConvertSplitOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertMergeOperator(const NodeDef& node, Model* model) {
+void ConvertMergeOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
   CHECK_EQ(node.op(), "Merge");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new TensorFlowMergeOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -658,9 +716,11 @@ void ConvertMergeOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertSwitchOperator(const NodeDef& node, Model* model) {
+void ConvertSwitchOperator(const NodeDef& node,
+                           const TensorFlowImportFlags& tf_import_flags,
+                           Model* model) {
   CHECK_EQ(node.op(), "Switch");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new TensorFlowSwitchOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -669,9 +729,11 @@ void ConvertSwitchOperator(const NodeDef& node, Model* model) {
   op->outputs.push_back(node.name() + ":1");
   model->operators.emplace_back(op);
 }
-void ConvertSoftmaxOperator(const NodeDef& node, Model* model) {
+void ConvertSoftmaxOperator(const NodeDef& node,
+                            const TensorFlowImportFlags& tf_import_flags,
+                            Model* model) {
   CHECK_EQ(node.op(), "Softmax");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto& input_name = node.input(0);
   auto* softmax = new SoftmaxOperator;
   softmax->inputs.push_back(input_name);
@@ -682,9 +744,11 @@ void ConvertSoftmaxOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(softmax);
 }
 
-void ConvertLRNOperator(const NodeDef& node, Model* model) {
+void ConvertLRNOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
   CHECK_EQ(node.op(), "LRN");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto& input_name = node.input(0);
   auto* lrn = new LocalResponseNormalizationOperator;
   lrn->inputs.push_back(input_name);
@@ -696,9 +760,11 @@ void ConvertLRNOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(lrn);
 }
 
-void ConvertMaxPoolOperator(const NodeDef& node, Model* model) {
+void ConvertMaxPoolOperator(const NodeDef& node,
+                            const TensorFlowImportFlags& tf_import_flags,
+                            Model* model) {
   CHECK_EQ(node.op(), "MaxPool");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto& input_name = node.input(0);
   // We only support NHWC, which is the default data_format.
   // So if data_format is not defined, we're all good.
@@ -736,9 +802,11 @@ void ConvertMaxPoolOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(maxpool);
 }
 
-void ConvertAvgPoolOperator(const NodeDef& node, Model* model) {
+void ConvertAvgPoolOperator(const NodeDef& node,
+                            const TensorFlowImportFlags& tf_import_flags,
+                            Model* model) {
   CHECK_EQ(node.op(), "AvgPool");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto& input_name = node.input(0);
   // We only support NHWC, which is the default data_format.
   // So if data_format is not defined, we're all good.
@@ -772,9 +840,11 @@ void ConvertAvgPoolOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(avgpool);
 }
 
-void ConvertReshapeOperator(const NodeDef& node, Model* model) {
+void ConvertReshapeOperator(const NodeDef& node,
+                            const TensorFlowImportFlags& tf_import_flags,
+                            Model* model) {
   CHECK_EQ(node.op(), "Reshape");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new TensorFlowReshapeOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -782,9 +852,11 @@ void ConvertReshapeOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertMatMulOperator(const NodeDef& node, Model* model) {
+void ConvertMatMulOperator(const NodeDef& node,
+                           const TensorFlowImportFlags& tf_import_flags,
+                           Model* model) {
   CHECK_EQ(node.op(), "MatMul");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   // Transpose flags should be easy to support, but we don't have a
   // GraphDef with them to test on at the moment.
   CHECK_EQ(GetBoolAttr(node, "transpose_a"), false);
@@ -815,7 +887,9 @@ void ConvertMatMulOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(matmul);
 }
 
-void ConvertConcatOperator(const NodeDef& node, Model* model) {
+void ConvertConcatOperator(const NodeDef& node,
+                           const TensorFlowImportFlags& tf_import_flags,
+                           Model* model) {
   Operator* op = nullptr;
   if (node.op() == "Concat") {
     op = new TensorFlowConcatOperator;
@@ -824,8 +898,7 @@ void ConvertConcatOperator(const NodeDef& node, Model* model) {
   } else {
     LOG(FATAL) << "Expected Concat or ConcatV2";
   }
-  const int num_inputs =
-      GetInputsCount(node, model->flags.drop_control_dependency());
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
   CHECK_GE(num_inputs, 2);
   CHECK_EQ(num_inputs, 1 + GetIntAttr(node, "N"));
   for (int i = 0; i < num_inputs; ++i) {
@@ -835,11 +908,12 @@ void ConvertConcatOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertAllOperator(const NodeDef& node, Model* model) {
+void ConvertAllOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
   CHECK_EQ(node.op(), "All");
   auto* op = new TensorFlowAllOperator;
-  const int num_inputs =
-      GetInputsCount(node, model->flags.drop_control_dependency());
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
   for (int i = 0; i < num_inputs; ++i) {
     op->inputs.push_back(node.input(i));
   }
@@ -847,11 +921,12 @@ void ConvertAllOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertAssertOperator(const NodeDef& node, Model* model) {
+void ConvertAssertOperator(const NodeDef& node,
+                           const TensorFlowImportFlags& tf_import_flags,
+                           Model* model) {
   CHECK_EQ(node.op(), "Assert");
   auto* op = new TensorFlowAssertOperator;
-  const int num_inputs =
-      GetInputsCount(node, model->flags.drop_control_dependency());
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
   for (int i = 0; i < num_inputs; ++i) {
     op->inputs.push_back(node.input(i));
   }
@@ -859,11 +934,12 @@ void ConvertAssertOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertLessOperator(const NodeDef& node, Model* model) {
+void ConvertLessOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
   CHECK_EQ(node.op(), "Less");
   auto* op = new TensorFlowLessOperator;
-  const int num_inputs =
-      GetInputsCount(node, model->flags.drop_control_dependency());
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
   for (int i = 0; i < num_inputs; ++i) {
     op->inputs.push_back(node.input(i));
   }
@@ -871,11 +947,12 @@ void ConvertLessOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertLessEqualOperator(const NodeDef& node, Model* model) {
+void ConvertLessEqualOperator(const NodeDef& node,
+                              const TensorFlowImportFlags& tf_import_flags,
+                              Model* model) {
   CHECK_EQ(node.op(), "LessEqual");
   auto* op = new TensorFlowLessEqualOperator;
-  const int num_inputs =
-      GetInputsCount(node, model->flags.drop_control_dependency());
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
   for (int i = 0; i < num_inputs; ++i) {
     op->inputs.push_back(node.input(i));
   }
@@ -883,11 +960,12 @@ void ConvertLessEqualOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertGreaterOperator(const NodeDef& node, Model* model) {
+void ConvertGreaterOperator(const NodeDef& node,
+                            const TensorFlowImportFlags& tf_import_flags,
+                            Model* model) {
   CHECK_EQ(node.op(), "Greater");
   auto* op = new TensorFlowGreaterOperator;
-  const int num_inputs =
-      GetInputsCount(node, model->flags.drop_control_dependency());
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
   for (int i = 0; i < num_inputs; ++i) {
     op->inputs.push_back(node.input(i));
   }
@@ -895,11 +973,12 @@ void ConvertGreaterOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertGreaterEqualOperator(const NodeDef& node, Model* model) {
+void ConvertGreaterEqualOperator(const NodeDef& node,
+                                 const TensorFlowImportFlags& tf_import_flags,
+                                 Model* model) {
   CHECK_EQ(node.op(), "GreaterEqual");
   auto* op = new TensorFlowGreaterEqualOperator;
-  const int num_inputs =
-      GetInputsCount(node, model->flags.drop_control_dependency());
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
   for (int i = 0; i < num_inputs; ++i) {
     op->inputs.push_back(node.input(i));
   }
@@ -907,9 +986,11 @@ void ConvertGreaterEqualOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertMaxOperator(const NodeDef& node, Model* model) {
+void ConvertMaxOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
   CHECK_EQ(node.op(), "Max");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new TensorFlowMaxOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -920,9 +1001,11 @@ void ConvertMaxOperator(const NodeDef& node, Model* model) {
   }
 }
 
-void ConvertMinOperator(const NodeDef& node, Model* model) {
+void ConvertMinOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
   CHECK_EQ(node.op(), "Min");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new TensorFlowMinOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -933,9 +1016,11 @@ void ConvertMinOperator(const NodeDef& node, Model* model) {
   }
 }
 
-void ConvertMaximumOperator(const NodeDef& node, Model* model) {
+void ConvertMaximumOperator(const NodeDef& node,
+                            const TensorFlowImportFlags& tf_import_flags,
+                            Model* model) {
   CHECK_EQ(node.op(), "Maximum");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new TensorFlowMaximumOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -943,9 +1028,11 @@ void ConvertMaximumOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertMinimumOperator(const NodeDef& node, Model* model) {
+void ConvertMinimumOperator(const NodeDef& node,
+                            const TensorFlowImportFlags& tf_import_flags,
+                            Model* model) {
   CHECK_EQ(node.op(), "Minimum");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new TensorFlowMinimumOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -953,11 +1040,12 @@ void ConvertMinimumOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertUnsupportedOperator(const NodeDef& node, Model* model) {
+void ConvertUnsupportedOperator(const NodeDef& node,
+                                const TensorFlowImportFlags& tf_import_flags,
+                                Model* model) {
   LOG(INFO) << "Converting unsupported operation: " << node.op();
   auto* op = new TensorFlowUnsupportedOperator;
-  const int num_inputs =
-      GetInputsCount(node, model->flags.drop_control_dependency());
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
   for (int i = 0; i < num_inputs; ++i) {
     op->inputs.push_back(node.input(i));
   }
@@ -976,7 +1064,9 @@ void ConvertUnsupportedOperator(const NodeDef& node, Model* model) {
   }
 }
 
-void ConvertStridedSliceOperator(const NodeDef& node, Model* model) {
+void ConvertStridedSliceOperator(const NodeDef& node,
+                                 const TensorFlowImportFlags& tf_import_flags,
+                                 Model* model) {
   CHECK_EQ(node.op(), "StridedSlice");
   CHECK_EQ(node.input_size(), 4);
 
@@ -991,7 +1081,7 @@ void ConvertStridedSliceOperator(const NodeDef& node, Model* model) {
       // Only 4D tensors are supported.
       GetIntAttr(node, "begin_mask") > 15 ||
       GetIntAttr(node, "end_mask") > 15) {
-    ConvertUnsupportedOperator(node, model);
+    ConvertUnsupportedOperator(node, tf_import_flags, model);
     return;
   }
 
@@ -1009,10 +1099,12 @@ void ConvertStridedSliceOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertPlaceholderOperator(const NodeDef& node, Model* model) {
+void ConvertPlaceholderOperator(const NodeDef& node,
+                                const TensorFlowImportFlags& tf_import_flags,
+                                Model* model) {
   CHECK(node.op() == "Placeholder" || node.op() == "LegacyFedInput");
   if (node.op() == "Placeholder") {
-    CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 0);
+    CHECK_EQ(GetInputsCount(node, tf_import_flags), 0);
   }
   auto& array = model->GetOrCreateArray(node.name());
   if (node.attr().count("dtype")) {
@@ -1039,7 +1131,9 @@ void ConvertPlaceholderOperator(const NodeDef& node, Model* model) {
   }
 }
 
-void ConvertNoOpOperator(const NodeDef& node, Model* model) {}
+void ConvertNoOpOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {}
 
 ArrayDataType GetArrayDataType(tensorflow::DataType tf_data_type) {
   if (tf_data_type == DT_UINT8) {
@@ -1053,9 +1147,11 @@ ArrayDataType GetArrayDataType(tensorflow::DataType tf_data_type) {
   }
 }
 
-void ConvertCastOperator(const NodeDef& node, Model* model) {
+void ConvertCastOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
   CHECK_EQ(node.op(), "Cast");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto tf_src_dtype = GetDataTypeAttr(node, "SrcT");
   const auto tf_dst_dtype = GetDataTypeAttr(node, "DstT");
   CHECK(tf_src_dtype == DT_UINT8 || tf_src_dtype == DT_INT32 ||
@@ -1072,9 +1168,11 @@ void ConvertCastOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertFloorOperator(const NodeDef& node, Model* model) {
+void ConvertFloorOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
   CHECK_EQ(node.op(), "Floor");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 1);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto data_type = GetDataTypeAttr(node, "T");
   CHECK(data_type == DT_FLOAT);
   auto* op = new FloorOperator;
@@ -1083,9 +1181,11 @@ void ConvertFloorOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertGatherOperator(const NodeDef& node, Model* model) {
+void ConvertGatherOperator(const NodeDef& node,
+                           const TensorFlowImportFlags& tf_import_flags,
+                           Model* model) {
   CHECK_EQ(node.op(), "Gather");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   const auto indices_data_type = GetDataTypeAttr(node, "Tindices");
   CHECK(indices_data_type == DT_INT32);
   auto* op = new GatherOperator;
@@ -1095,9 +1195,11 @@ void ConvertGatherOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertResizeBilinearOperator(const NodeDef& node, Model* model) {
+void ConvertResizeBilinearOperator(const NodeDef& node,
+                                   const TensorFlowImportFlags& tf_import_flags,
+                                   Model* model) {
   CHECK_EQ(node.op(), "ResizeBilinear");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 2);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
   auto* op = new ResizeBilinearOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -1105,10 +1207,11 @@ void ConvertResizeBilinearOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertBatchNormWithGlobalNormalizationOperator(const NodeDef& node,
-                                                     Model* model) {
+void ConvertBatchNormWithGlobalNormalizationOperator(
+    const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
+    Model* model) {
   CHECK_EQ(node.op(), "BatchNormWithGlobalNormalization");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 5);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 5);
 
   // TODO(ahentz): to really match tensorflow we need to add variance_epsilon
   // to the input, before feeding it into TensorFlowRsqrtOperator.
@@ -1153,7 +1256,9 @@ void ConvertBatchNormWithGlobalNormalizationOperator(const NodeDef& node,
   model->operators.emplace_back(op);
 }
 
-void ConvertFusedBatchNormOperator(const NodeDef& node, Model* model) {
+void ConvertFusedBatchNormOperator(const NodeDef& node,
+                                   const TensorFlowImportFlags& tf_import_flags,
+                                   Model* model) {
   CHECK_EQ(node.op(), "FusedBatchNorm");
   CHECK_EQ(node.input_size(), 5);
 
@@ -1207,9 +1312,11 @@ void ConvertFusedBatchNormOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertSpaceToBatchNDOperator(const NodeDef& node, Model* model) {
+void ConvertSpaceToBatchNDOperator(const NodeDef& node,
+                                   const TensorFlowImportFlags& tf_import_flags,
+                                   Model* model) {
   CHECK_EQ(node.op(), "SpaceToBatchND");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 3);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 3);
   CHECK_EQ(GetDataTypeAttr(node, "Tblock_shape"), DT_INT32);
   CHECK_EQ(GetDataTypeAttr(node, "Tpaddings"), DT_INT32);
   auto* op = new SpaceToBatchNDOperator;
@@ -1220,9 +1327,11 @@ void ConvertSpaceToBatchNDOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertBatchToSpaceNDOperator(const NodeDef& node, Model* model) {
+void ConvertBatchToSpaceNDOperator(const NodeDef& node,
+                                   const TensorFlowImportFlags& tf_import_flags,
+                                   Model* model) {
   CHECK_EQ(node.op(), "BatchToSpaceND");
-  CHECK_EQ(GetInputsCount(node, model->flags.drop_control_dependency()), 3);
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 3);
   CHECK_EQ(GetDataTypeAttr(node, "Tblock_shape"), DT_INT32);
   CHECK_EQ(GetDataTypeAttr(node, "Tcrops"), DT_INT32);
   auto* op = new BatchToSpaceNDOperator;
@@ -1233,7 +1342,9 @@ void ConvertBatchToSpaceNDOperator(const NodeDef& node, Model* model) {
   model->operators.emplace_back(op);
 }
 
-void ConvertMeanOperator(const NodeDef& node, Model* model) {
+void ConvertMeanOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
   CHECK_EQ(node.op(), "Mean");
   CHECK_EQ(node.input_size(), 2);
   auto* op = new MeanOperator;
@@ -1246,7 +1357,9 @@ void ConvertMeanOperator(const NodeDef& node, Model* model) {
   }
 }
 
-void ConvertSvdfOperator(const NodeDef& node, Model* model) {
+void ConvertSvdfOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
   CHECK_EQ(node.op(), "Svdf");
   bool has_bias = (node.input_size() == 4);
   auto* op = new SvdfOperator;
@@ -1367,8 +1480,9 @@ bool InlineAllFunctions(GraphDef* graphdef) {
 }
 }  // namespace
 
-std::unique_ptr<Model> ImportTensorFlowGraphDef(const ModelFlags& model_flags,
-                                                const GraphDef& tf_graph) {
+std::unique_ptr<Model> ImportTensorFlowGraphDef(
+    const ModelFlags& model_flags, const TensorFlowImportFlags& tf_import_flags,
+    const GraphDef& tf_graph) {
   LogDumpGraphDef(kLogLevelModelChanged, "AT IMPORT", tf_graph);
 
   GraphDef inlined_graph(tf_graph);
@@ -1393,129 +1507,130 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(const ModelFlags& model_flags,
   for (auto node : inlined_graph.node()) {
     StripZeroOutputIndexFromInputs(&node);
     if (node.op() == "Const") {
-      ConvertConstOperator(node, model);
+      ConvertConstOperator(node, tf_import_flags, model);
     } else if (node.op() == "Conv2D") {
-      ConvertConvOperator(node, model);
+      ConvertConvOperator(node, tf_import_flags, model);
     } else if (node.op() == "DepthwiseConv2dNative") {
-      ConvertDepthwiseConvOperator(node, model);
+      ConvertDepthwiseConvOperator(node, tf_import_flags, model);
     } else if (node.op() == "DepthToSpace") {
-      ConvertDepthToSpaceOperator(node, model);
+      ConvertDepthToSpaceOperator(node, tf_import_flags, model);
     } else if (node.op() == "SpaceToDepth") {
-      ConvertSpaceToDepthOperator(node, model);
+      ConvertSpaceToDepthOperator(node, tf_import_flags, model);
     } else if (node.op() == "BiasAdd") {
-      ConvertBiasAddOperator(node, model);
+      ConvertBiasAddOperator(node, tf_import_flags, model);
     } else if (node.op() == "Relu") {
-      ConvertReluOperator(node, model);
+      ConvertReluOperator(node, tf_import_flags, model);
     } else if (node.op() == "Relu6") {
-      ConvertRelu6Operator(node, model);
+      ConvertRelu6Operator(node, tf_import_flags, model);
     } else if (node.op() == "Sigmoid") {
-      ConvertLogisticOperator(node, model);
+      ConvertLogisticOperator(node, tf_import_flags, model);
     } else if (node.op() == "Tanh") {
-      ConvertTanhOperator(node, model);
+      ConvertTanhOperator(node, tf_import_flags, model);
     } else if (node.op() == "MaxPool") {
-      ConvertMaxPoolOperator(node, model);
+      ConvertMaxPoolOperator(node, tf_import_flags, model);
     } else if (node.op() == "AvgPool") {
-      ConvertAvgPoolOperator(node, model);
+      ConvertAvgPoolOperator(node, tf_import_flags, model);
     } else if (node.op() == "Reshape") {
-      ConvertReshapeOperator(node, model);
+      ConvertReshapeOperator(node, tf_import_flags, model);
     } else if (node.op() == "MatMul") {
-      ConvertMatMulOperator(node, model);
+      ConvertMatMulOperator(node, tf_import_flags, model);
     } else if (node.op() == "Div" || node.op() == "RealDiv") {
-      ConvertDivOperator(node, model);
+      ConvertDivOperator(node, tf_import_flags, model);
     } else if (node.op() == "Identity" || node.op() == "CheckNumerics") {
-      ConvertIdentityOperator(node, model);
+      ConvertIdentityOperator(node, tf_import_flags, model);
     } else if (node.op() == "FakeQuantWithMinMaxVars") {
-      ConvertFakeQuantWithMinMaxVars(node, model);
+      ConvertFakeQuantWithMinMaxVars(node, tf_import_flags, model);
     } else if (node.op() == "FakeQuantWithMinMaxArgs") {
-      ConvertFakeQuantWithMinMaxArgs(node, model);
+      ConvertFakeQuantWithMinMaxArgs(node, tf_import_flags, model);
     } else if (node.op() == "Rsqrt") {
-      ConvertRsqrtOperator(node, model);
+      ConvertRsqrtOperator(node, tf_import_flags, model);
     } else if (node.op() == "Squeeze") {
-      ConvertSqueezeOperator(node, model);
+      ConvertSqueezeOperator(node, tf_import_flags, model);
     } else if (node.op() == "Sqrt") {
-      ConvertSqrtOperator(node, model);
+      ConvertSqrtOperator(node, tf_import_flags, model);
     } else if (node.op() == "Square") {
-      ConvertSquareOperator(node, model);
+      ConvertSquareOperator(node, tf_import_flags, model);
     } else if (node.op() == "Add") {
-      ConvertAddOperator(node, model);
+      ConvertAddOperator(node, tf_import_flags, model);
     } else if (node.op() == "Mul") {
-      ConvertMulOperator(node, model);
+      ConvertMulOperator(node, tf_import_flags, model);
     } else if (node.op() == "Sub") {
-      ConvertSubOperator(node, model);
+      ConvertSubOperator(node, tf_import_flags, model);
     } else if (node.op() == "Sum") {
-      ConvertSumOperator(node, model);
+      ConvertSumOperator(node, tf_import_flags, model);
     } else if (node.op() == "Tile") {
-      ConvertTileOperator(node, model);
+      ConvertTileOperator(node, tf_import_flags, model);
     } else if (node.op() == "Concat" || node.op() == "ConcatV2") {
-      ConvertConcatOperator(node, model);
+      ConvertConcatOperator(node, tf_import_flags, model);
     } else if (node.op() == "LRN") {
-      ConvertLRNOperator(node, model);
+      ConvertLRNOperator(node, tf_import_flags, model);
     } else if (node.op() == "Softmax") {
-      ConvertSoftmaxOperator(node, model);
+      ConvertSoftmaxOperator(node, tf_import_flags, model);
     } else if (node.op() == "All") {
-      ConvertAllOperator(node, model);
+      ConvertAllOperator(node, tf_import_flags, model);
     } else if (node.op() == "Assert") {
-      ConvertAssertOperator(node, model);
+      ConvertAssertOperator(node, tf_import_flags, model);
     } else if (node.op() == "Less") {
-      ConvertLessOperator(node, model);
+      ConvertLessOperator(node, tf_import_flags, model);
     } else if (node.op() == "LessEqual") {
-      ConvertLessEqualOperator(node, model);
+      ConvertLessEqualOperator(node, tf_import_flags, model);
     } else if (node.op() == "Greater") {
-      ConvertGreaterOperator(node, model);
+      ConvertGreaterOperator(node, tf_import_flags, model);
     } else if (node.op() == "GreaterEqual") {
-      ConvertGreaterEqualOperator(node, model);
+      ConvertGreaterEqualOperator(node, tf_import_flags, model);
     } else if (node.op() == "Max") {
-      ConvertMaxOperator(node, model);
+      ConvertMaxOperator(node, tf_import_flags, model);
     } else if (node.op() == "Min") {
-      ConvertMinOperator(node, model);
+      ConvertMinOperator(node, tf_import_flags, model);
     } else if (node.op() == "Maximum") {
-      ConvertMaximumOperator(node, model);
+      ConvertMaximumOperator(node, tf_import_flags, model);
     } else if (node.op() == "Minimum") {
-      ConvertMinimumOperator(node, model);
+      ConvertMinimumOperator(node, tf_import_flags, model);
     } else if (node.op() == "Merge") {
-      ConvertMergeOperator(node, model);
+      ConvertMergeOperator(node, tf_import_flags, model);
     } else if (node.op() == "Pad") {
-      ConvertPadOperator(node, model);
+      ConvertPadOperator(node, tf_import_flags, model);
     } else if (node.op() == "StridedSlice") {
-      ConvertStridedSliceOperator(node, model);
+      ConvertStridedSliceOperator(node, tf_import_flags, model);
     } else if (node.op() == "Shape") {
-      ConvertShapeOperator(node, model);
+      ConvertShapeOperator(node, tf_import_flags, model);
     } else if (node.op() == "Slice") {
-      ConvertSliceOperator(node, model);
+      ConvertSliceOperator(node, tf_import_flags, model);
     } else if (node.op() == "Split") {
-      ConvertSplitOperator(node, model);
+      ConvertSplitOperator(node, tf_import_flags, model);
     } else if (node.op() == "Switch") {
-      ConvertSwitchOperator(node, model);
+      ConvertSwitchOperator(node, tf_import_flags, model);
     } else if (node.op() == "Placeholder") {
-      ConvertPlaceholderOperator(node, model);
+      ConvertPlaceholderOperator(node, tf_import_flags, model);
     } else if (node.op() == "PlaceholderWithDefault") {
-      ConvertIdentityOperator(node, model);
+      ConvertIdentityOperator(node, tf_import_flags, model);
     } else if (node.op() == "LegacyFedInput") {
-      ConvertPlaceholderOperator(node, model);
+      ConvertPlaceholderOperator(node, tf_import_flags, model);
     } else if (node.op() == "NoOp") {
-      ConvertNoOpOperator(node, model);
+      ConvertNoOpOperator(node, tf_import_flags, model);
     } else if (node.op() == "Cast") {
-      ConvertCastOperator(node, model);
+      ConvertCastOperator(node, tf_import_flags, model);
     } else if (node.op() == "Floor") {
-      ConvertFloorOperator(node, model);
+      ConvertFloorOperator(node, tf_import_flags, model);
     } else if (node.op() == "Gather") {
-      ConvertGatherOperator(node, model);
+      ConvertGatherOperator(node, tf_import_flags, model);
     } else if (node.op() == "ResizeBilinear") {
-      ConvertResizeBilinearOperator(node, model);
+      ConvertResizeBilinearOperator(node, tf_import_flags, model);
     } else if (node.op() == "BatchNormWithGlobalNormalization") {
-      ConvertBatchNormWithGlobalNormalizationOperator(node, model);
+      ConvertBatchNormWithGlobalNormalizationOperator(node, tf_import_flags,
+                                                      model);
     } else if (node.op() == "FusedBatchNorm") {
-      ConvertFusedBatchNormOperator(node, model);
+      ConvertFusedBatchNormOperator(node, tf_import_flags, model);
     } else if (node.op() == "SpaceToBatchND") {
-      ConvertSpaceToBatchNDOperator(node, model);
+      ConvertSpaceToBatchNDOperator(node, tf_import_flags, model);
     } else if (node.op() == "BatchToSpaceND") {
-      ConvertBatchToSpaceNDOperator(node, model);
+      ConvertBatchToSpaceNDOperator(node, tf_import_flags, model);
     } else if (node.op() == "Mean") {
-      ConvertMeanOperator(node, model);
+      ConvertMeanOperator(node, tf_import_flags, model);
     } else if (node.op() == "Svdf") {
-      ConvertSvdfOperator(node, model);
+      ConvertSvdfOperator(node, tf_import_flags, model);
     } else {
-      ConvertUnsupportedOperator(node, model);
+      ConvertUnsupportedOperator(node, tf_import_flags, model);
     }
   }
 
@@ -1535,7 +1650,8 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(const ModelFlags& model_flags,
 }
 
 std::unique_ptr<Model> ImportTensorFlowGraphDef(
-    const ModelFlags& model_flags, const string& input_file_contents) {
+    const ModelFlags& model_flags, const TensorFlowImportFlags& tf_import_flags,
+    const string& input_file_contents) {
   std::unique_ptr<GraphDef> tf_graph(new GraphDef);
   CHECK(ParseFromStringEitherTextOrBinary(input_file_contents, tf_graph.get()));
 
@@ -1544,6 +1660,6 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
   if (pruned_graph) {
     tf_graph = std::move(pruned_graph);
   }
-  return ImportTensorFlowGraphDef(model_flags, *tf_graph);
+  return ImportTensorFlowGraphDef(model_flags, tf_import_flags, *tf_graph);
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.h b/tensorflow/contrib/lite/toco/import_tensorflow.h
index d2eb423ca4..312e3b8f17 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.h
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.h
@@ -23,11 +23,19 @@ limitations under the License.
 
 namespace toco {
 
+struct TensorFlowImportFlags {
+  // If true, control dependencies will be dropped immediately
+  // during the import of the TensorFlow GraphDef.
+  bool drop_control_dependency = false;
+};
+
 std::unique_ptr<Model> ImportTensorFlowGraphDef(
-    const ModelFlags& model_flags, const tensorflow::GraphDef& graph_def);
+    const ModelFlags& model_flags, const TensorFlowImportFlags& tf_import_flags,
+    const tensorflow::GraphDef& graph_def);
 
 std::unique_ptr<Model> ImportTensorFlowGraphDef(
-    const ModelFlags& model_flags, const string& input_file_contents);
+    const ModelFlags& model_flags, const TensorFlowImportFlags& tf_import_flags,
+    const string& input_file_contents);
 
 }  // namespace toco
 
diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
index 287a5d563d..dde602e186 100644
--- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
@@ -112,13 +112,6 @@ bool ParseModelFlagsFromCommandLineFlags(
            "exclusive "
            "with the 'batch' field: at most one of these two fields can be "
            "set."),
-      Flag(
-          "drop_control_dependency",
-          parsed_flags.drop_control_dependency.bind(),
-          parsed_flags.drop_control_dependency.default_value(),
-          "If true, ignore control dependency requirements in input TensorFlow "
-          "GraphDef. Otherwise an error will be raised upon control dependency "
-          "inputs."),
       Flag("rnn_states", parsed_flags.rnn_states.bind(),
            parsed_flags.rnn_states.default_value(), ""),
       Flag("model_checks", parsed_flags.model_checks.bind(),
@@ -316,7 +309,6 @@ void ReadModelFlagsFromCommandLineFlags(
   } while (false)
 
   READ_MODEL_FLAG(variable_batch);
-  READ_MODEL_FLAG(drop_control_dependency);
 
 #undef READ_MODEL_FLAG
 
diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto
index bd6e374e8c..5b30904696 100644
--- a/tensorflow/contrib/lite/toco/model_flags.proto
+++ b/tensorflow/contrib/lite/toco/model_flags.proto
@@ -138,8 +138,4 @@ message ModelFlags {
     optional int32 count_max = 3 [default = -1];
   }
   repeated ModelCheck model_checks = 14;
-
-  // If true, ignore control dependency requirements in input TensorFlow
-  // GraphDef. Otherwise an error will be raised upon control dependency inputs.
-  optional bool drop_control_dependency = 15;
 }
diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
index e97f59eb3f..83947d6b28 100644
--- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
@@ -103,6 +103,13 @@ bool ParseTocoFlagsFromCommandLineFlags(
            parsed_flags.allow_custom_ops.default_value(),
            "If true, allow TOCO to create TF Lite Custom operators for all the"
            "unsupported Tensorflow ops."),
+      Flag(
+          "drop_control_dependency",
+          parsed_flags.drop_control_dependency.bind(),
+          parsed_flags.drop_control_dependency.default_value(),
+          "If true, ignore control dependency requirements in input TensorFlow "
+          "GraphDef. Otherwise an error will be raised upon control dependency "
+          "inputs."),
   };
   bool asked_for_help =
       *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help"));
@@ -163,6 +170,7 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags,
   READ_TOCO_FLAG(drop_fake_quant, FlagRequirement::kNone);
   READ_TOCO_FLAG(reorder_across_fake_quant, FlagRequirement::kNone);
   READ_TOCO_FLAG(allow_custom_ops, FlagRequirement::kNone);
+  READ_TOCO_FLAG(drop_control_dependency, FlagRequirement::kNone);
 
   // Deprecated flag handling.
   if (parsed_toco_flags.input_type.specified()) {
diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto
index 7bddce5b03..3b9d7e2257 100644
--- a/tensorflow/contrib/lite/toco/toco_flags.proto
+++ b/tensorflow/contrib/lite/toco/toco_flags.proto
@@ -36,7 +36,7 @@ enum FileFormat {
 // are not normally encoded in model files and in general may not be thought
 // of as properties of models, instead describing how models are to be
 // processed in the context of the present tooling job.
-// Next Id: 12
+// Next Id: 13
 message TocoFlags {
   // Input file format
   optional FileFormat input_format = 1;
@@ -128,4 +128,12 @@ message TocoFlags {
   // If true, allow TOCO to create TF Lite Custom operators for all the
   // unsupported Tensorflow ops.
   optional bool allow_custom_ops = 10;
+
+  // Applies only to the case when the input format is TENSORFLOW_GRAPHDEF.
+  // If true, then control dependencies will be immediately dropped during
+  // import.
+  // If not set, the default behavior is as follows:
+  //    - Default to false if the output format is TENSORFLOW_GRAPHDEF.
+  //    - Default to true in all other cases.
+  optional bool drop_control_dependency = 12;
 }
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index 1408f7cd7b..eabc145ad4 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -127,9 +127,16 @@ std::unique_ptr<Model> Import(const TocoFlags& toco_flags,
                               const string& input_file_contents) {
   std::unique_ptr<Model> model;
   switch (toco_flags.input_format()) {
-    case TENSORFLOW_GRAPHDEF:
-      model = ImportTensorFlowGraphDef(model_flags, input_file_contents);
+    case TENSORFLOW_GRAPHDEF: {
+      TensorFlowImportFlags tf_import_flags;
+      tf_import_flags.drop_control_dependency =
+          toco_flags.has_drop_control_dependency()
+              ? toco_flags.drop_control_dependency()
+              : (toco_flags.output_format() != TENSORFLOW_GRAPHDEF);
+      model = ImportTensorFlowGraphDef(model_flags, tf_import_flags,
+                                       input_file_contents);
       break;
+    }
     case TFLITE:
       model = toco::tflite::Import(model_flags, input_file_contents);
       ResolveModelFlags(model_flags, model.get());
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index ec24f76dc8..3ee060f9b9 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -1016,7 +1016,6 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
   }
 
   RESOLVE_MODEL_FLAG(variable_batch)
-  RESOLVE_MODEL_FLAG(drop_control_dependency)
 
 #undef RESOLVE_MODEL_FLAG
 
-- 
GitLab


From cf245240ca90e6b552415f720342ae1acd326590 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 22 Nov 2017 11:26:09 -0800
Subject: [PATCH 0229/1225] [XLA:CPU] Add a basic implementation for
 ExecuteAsyncOnStream

PiperOrigin-RevId: 176680801
---
 tensorflow/compiler/xla/client/client.h       |  9 ++
 .../xla/service/cpu/cpu_executable.cc         | 96 ++++++++++++++-----
 tensorflow/compiler/xla/tests/BUILD           |  1 +
 tensorflow/compiler/xla/tests/client_test.cc  | 67 ++++++++++---
 4 files changed, 135 insertions(+), 38 deletions(-)

diff --git a/tensorflow/compiler/xla/client/client.h b/tensorflow/compiler/xla/client/client.h
index a716159f9e..cf6878dd8e 100644
--- a/tensorflow/compiler/xla/client/client.h
+++ b/tensorflow/compiler/xla/client/client.h
@@ -67,6 +67,15 @@ class Client {
     std::vector<GlobalData*> arguments;
     ExecutionOptions execution_options;
     ExecutionProfile* execution_profile;
+
+    ComputationInstance(const Computation& computation,
+                        std::vector<GlobalData*> arguments,
+                        ExecutionOptions execution_options,
+                        ExecutionProfile* execution_profile)
+        : computation(computation),
+          arguments(std::move(arguments)),
+          execution_options(execution_options),
+          execution_profile(execution_profile) {}
   };
 
   // Executes a list ComputationInstances and returns global data produced from
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index ddbe7ab341..e6ef9d6314 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -43,6 +43,7 @@ limitations under the License.
 #include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/stream_executor/host/host_stream.h"
 
 namespace se = ::perftools::gputools;
 
@@ -241,6 +242,37 @@ Status CpuExecutable::ExecuteComputeFunction(
   return Status::OK();
 }
 
+static void LogLiveAddresses(
+    const std::unordered_set<const void*>& marked_addresses) {
+  VLOG(3) << "Live addresses in output marking found "
+          << marked_addresses.size() << " addresses:\n"
+          << tensorflow::str_util::Join(
+                 marked_addresses, ", ", [](string* out, const void* address) {
+                   tensorflow::strings::StrAppend(
+                       out, tensorflow::strings::Printf("%p", address));
+                 });
+}
+
+static Status DeallocateTempBuffers(
+    DeviceMemoryAllocator* allocator, se::Stream* stream,
+    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
+    const std::unordered_set<const void*>& marked_addresses) {
+  // Keep those marked live because they are referenced by the output of the
+  // computation and are needed by the service. They will be deallocated by the
+  // service.
+  for (size_t i = 0; i < buffers.size(); ++i) {
+    se::DeviceMemoryBase alloc = buffers[i];
+    if (marked_addresses.count(alloc.opaque()) == 0 && !alloc.is_null()) {
+      VLOG(3) << "CpuExecutable deallocating buffer #" << i << " ["
+              << alloc.opaque() << "]";
+      TF_RETURN_IF_ERROR(
+          allocator->Deallocate(stream->parent()->device_ordinal(), &alloc));
+    }
+  }
+
+  return Status::OK();
+}
+
 StatusOr<perftools::gputools::DeviceMemoryBase> CpuExecutable::ExecuteOnStream(
     const ServiceExecutableRunOptions* run_options,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
@@ -263,26 +295,9 @@ StatusOr<perftools::gputools::DeviceMemoryBase> CpuExecutable::ExecuteOnStream(
   MarkLiveAddressesInOutput(top_level_output.opaque(), result_shape(),
                             &marked_addresses);
 
-  VLOG(3) << "Live addresses in output marking found "
-          << marked_addresses.size() << " addresses:\n"
-          << tensorflow::str_util::Join(
-                 marked_addresses, ", ", [](string* out, const void* address) {
-                   tensorflow::strings::StrAppend(
-                       out, tensorflow::strings::Printf("%p", address));
-                 });
-
-  // Computation is done - deallocate temp buffers. Keep those marked live
-  // because they are referenced by the output of the computation and are needed
-  // by the service. They will be deallocated by the service.
-  for (size_t i = 0; i < buffers.size(); ++i) {
-    se::DeviceMemoryBase alloc = buffers[i];
-    if (marked_addresses.count(alloc.opaque()) == 0 && !alloc.is_null()) {
-      VLOG(3) << "CpuExecutable deallocating buffer #" << i << " ["
-              << alloc.opaque() << "]";
-      TF_RETURN_IF_ERROR(memory_allocator->Deallocate(
-          stream->parent()->device_ordinal(), &alloc));
-    }
-  }
+  LogLiveAddresses(marked_addresses);
+  TF_RETURN_IF_ERROR(DeallocateTempBuffers(memory_allocator, stream, buffers,
+                                           marked_addresses));
 
   return top_level_output;
 }
@@ -360,9 +375,44 @@ StatusOr<perftools::gputools::DeviceMemoryBase>
 CpuExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
-  // TODO(b/30671675): Implement asynchronous execution mode.
-  return Unimplemented(
-      "Asynchronous execution on stream is not yet supported on CPU.");
+  if (hlo_profiling_enabled()) {
+    return Unimplemented(
+        "Asynchronous execution on stream with hlo profiling is not yet "
+        "supported on CPU.");
+  }
+
+  auto* host_stream = dynamic_cast<perftools::gputools::host::HostStream*>(
+      run_options->stream()->implementation());
+  se::Stream* stream = run_options->stream();
+  DeviceMemoryAllocator* memory_allocator = run_options->allocator();
+  std::vector<se::DeviceMemoryBase> buffers(assignment_->Allocations().size());
+
+  TF_RETURN_IF_ERROR(AllocateBuffers(
+      memory_allocator, stream->parent()->device_ordinal(), &buffers));
+
+  // Mark the buffers that are actually live (used in the output) when the
+  // computation finishes executing.
+  std::unordered_set<const void*> marked_addresses;
+  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice,
+                      assignment_->GetUniqueTopLevelOutputSlice());
+  se::DeviceMemoryBase top_level_output = buffers[result_slice.index()];
+  MarkLiveAddressesInOutput(top_level_output.opaque(), result_shape(),
+                            &marked_addresses);
+
+  LogLiveAddresses(marked_addresses);
+
+  host_stream->EnqueueTask([this, run_options, arguments, buffers,
+                            marked_addresses, memory_allocator, stream]() {
+    // Failing a CHECK here is not great, but I don't see an obvious way to
+    // return a failed Status asynchronously.
+    TF_CHECK_OK(ExecuteComputeFunction(&run_options->run_options(), arguments,
+                                       buffers,
+                                       /*hlo_execution_profile=*/nullptr));
+    TF_CHECK_OK(DeallocateTempBuffers(memory_allocator, stream, buffers,
+                                      marked_addresses));
+  });
+
+  return top_level_output;
 }
 
 /*static*/ int64 CpuExecutable::ShapeSizeBytes(const Shape& shape) {
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 6811dbb39f..13d651ea6f 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -1343,6 +1343,7 @@ xla_test(
     srcs = ["client_test.cc"],
     deps = [
         "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:test_helpers",
         "//tensorflow/compiler/xla:xla_data_proto",
diff --git a/tensorflow/compiler/xla/tests/client_test.cc b/tensorflow/compiler/xla/tests/client_test.cc
index 183bcf1dd3..8853ed9e57 100644
--- a/tensorflow/compiler/xla/tests/client_test.cc
+++ b/tensorflow/compiler/xla/tests/client_test.cc
@@ -20,10 +20,12 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/global_data.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/test_helpers.h"
 #include "tensorflow/compiler/xla/tests/client_library_test_base.h"
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/compiler/xla/tests/test_utils.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/platform/test.h"
@@ -42,26 +44,26 @@ TEST_F(ClientTest, ExecuteWithLayout) {
     for (const std::vector<int64>& transfer_layout : layouts) {
       b.Add(b.ConstantR2<int32>({{1, 2}, {3, 4}}),
             b.ConstantR2<int32>({{10, 20}, {30, 40}}));
-      auto computation = b.Build();
-      ASSERT_TRUE(computation.ok()) << computation.status();
+      TF_ASSERT_OK_AND_ASSIGN(auto computation, b.Build());
 
       ExecutionOptions execution_options = execution_options_;
       *execution_options.mutable_shape_with_output_layout() =
           ShapeUtil::MakeShapeWithLayout(S32, /*dimensions=*/{2, 2},
                                          execute_layout);
-      std::unique_ptr<GlobalData> data =
-          client_->Execute(computation.ValueOrDie(), {}, &execution_options)
-              .ConsumeValueOrDie();
+      TF_ASSERT_OK_AND_ASSIGN(
+          std::unique_ptr<GlobalData> data,
+          client_->Execute(computation, {}, &execution_options));
 
       std::unique_ptr<Literal> expected_literal =
           Literal::CreateR2WithLayout<int32>(
               {{11, 22}, {33, 44}}, LayoutUtil::MakeLayout(transfer_layout));
 
-      auto computed = client_->Transfer(*data, &expected_literal->shape());
+      TF_ASSERT_OK_AND_ASSIGN(
+          auto computed, client_->Transfer(*data, &expected_literal->shape()));
 
-      LiteralTestUtil::AssertEqualShapesAndLayouts(
-          expected_literal->shape(), computed.ValueOrDie()->shape());
-      LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie());
+      LiteralTestUtil::AssertEqualShapesAndLayouts(expected_literal->shape(),
+                                                   computed->shape());
+      LiteralTestUtil::ExpectEqual(*expected_literal, *computed);
     }
   }
 }
@@ -72,8 +74,7 @@ TEST_F(ClientTest, ExecuteWithTupleLayout) {
   b.Tuple({b.ConstantR2<int32>({{1, 2}, {3, 4}}),
            b.ConstantR2<int32>({{10, 20}, {30, 40}})});
 
-  auto computation = b.Build();
-  ASSERT_TRUE(computation.ok()) << computation.status();
+  TF_ASSERT_OK_AND_ASSIGN(auto computation, b.Build());
 
   ExecutionOptions execution_options = execution_options_;
   // Create a result shape with one element column major and the other row
@@ -85,10 +86,9 @@ TEST_F(ClientTest, ExecuteWithTupleLayout) {
            ShapeUtil::MakeShapeWithLayout(S32, /*dimensions=*/{2, 2},
                                           /*minor_to_major=*/{1, 0})});
 
-  auto result =
-      client_
-          ->ExecuteAndTransfer(computation.ValueOrDie(), {}, &execution_options)
-          .ConsumeValueOrDie();
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto result,
+      client_->ExecuteAndTransfer(computation, {}, &execution_options));
   LiteralTestUtil::ExpectR2Equal<int32>({{1, 2}, {3, 4}},
                                         result->tuple_literals(0));
   LiteralTestUtil::ExpectR2Equal<int32>({{10, 20}, {30, 40}},
@@ -107,5 +107,42 @@ TEST_F(ClientTest, ExecuteWithTupleLayout) {
                                      /*minor_to_major=*/{1, 0})));
 }
 
+TEST_F(ClientTest, DISABLED_ON_CPU_PARALLEL(DISABLED_ON_GPU(ExecuteParallel))) {
+  Computation add_with_one_arg, mul_with_two_args, dot_with_one_arg;
+  Shape shape = ShapeUtil::MakeShape(S32, {2, 2});
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<GlobalData> const_arg,
+      client_->TransferToServer(*Literal::CreateR2<int32>({{5, 6}, {7, 8}})));
+
+  ComputationBuilder b(client_, TestName() + ".add");
+  b.Add(b.Parameter(0, shape, "param_0"),
+        b.ConstantR2<int32>({{1, 2}, {3, 4}}));
+  TF_ASSERT_OK_AND_ASSIGN(add_with_one_arg, b.Build());
+
+  // We can't really test parallel execution on CPU since all of the cores in a
+  // CPU are presented as a single device.  So for now we test "parallel"
+  // execution on a single device.
+  std::vector<Client::ComputationInstance> computation_instances;
+  TF_ASSERT_OK_AND_ASSIGN(std::vector<xla::DeviceHandle> devices,
+                          client_->GetDeviceHandles(1));
+  ASSERT_EQ(devices.size(), 1);
+
+  ExecutionOptions options = execution_options_;
+  *options.add_device_handles() = devices[0];
+  computation_instances.push_back(Client::ComputationInstance(
+      add_with_one_arg, {const_arg.get()}, options, nullptr));
+
+  TF_ASSERT_OK_AND_ASSIGN(auto results,
+                          client_->ExecuteParallel(computation_instances));
+  auto expected_result = Literal::CreateR2<int32>({{6, 8}, {10, 12}});
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto result_literal,
+      client_->Transfer(*results[0], &expected_result->shape()));
+
+  LiteralTestUtil::ExpectEqual(*expected_result, *result_literal);
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From b0dbca111c80bdd8f1a2c28afc24d597cb1eac89 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Wed, 22 Nov 2017 11:36:29 -0800
Subject: [PATCH 0230/1225] Fixes to windows builds. (#14803)

* Fixes to windows builds.

-Disable failing data_utils_test in cmake and bazel builds.
-Disable session_partial_run_test in bazel build. It is already not
running under cmake build.
-Increase cmake build log verbosity, as we still canot see the root
cause of failures.

* Use unix style path dividers in cmake files.
---
 tensorflow/contrib/cmake/tf_tests.cmake                   | 1 +
 tensorflow/python/BUILD                                   | 1 +
 tensorflow/python/keras/BUILD                             | 1 +
 tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat | 2 +-
 tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat | 2 +-
 5 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index ba9e307835..18b71d1f9a 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -224,6 +224,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       # Numerical issues, calculations off.
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/concat_op_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/wals_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py"
       # Float division by zero
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/benchmark_test.py"
       # Flaky, for unknown reasons. Cannot reproduce in terminal. Revisit once we can get stack traces.
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index a438768809..5eb9b79ee6 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3372,6 +3372,7 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "no_gpu",
+        "no_windows",
     ],
     deps = [
         ":array_ops",
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index e4992afbca..d9391dd6c5 100644
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -556,6 +556,7 @@ py_test(
     srcs = ["_impl/keras/utils/data_utils_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_windows",
         "noasan",  # times out
         "notsan",
     ],
diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
index 6e600e2dcf..56bff07774 100644
--- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
+++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
@@ -37,4 +37,4 @@ SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe"
 %CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY%
 
 :: Run msbuild in the resulting VS project files to build a pip package.
-%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 /verbosity:minimal tf_python_build_pip_package.vcxproj
\ No newline at end of file
+%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj
diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
index 44d8252a7a..832943ad6c 100644
--- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
+++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
@@ -38,4 +38,4 @@ SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe"
 %CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY%
 
 :: Run msbuild in the resulting VS project files to build a pip package.
-%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 /verbosity:minimal tf_python_build_pip_package.vcxproj
+%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj
-- 
GitLab


From 791ef8383d165c116f4c5fc3fda12ebc7eb07edf Mon Sep 17 00:00:00 2001
From: Olivia Nordquist <nolivia@google.com>
Date: Wed, 22 Nov 2017 11:40:46 -0800
Subject: [PATCH 0231/1225] python testing for  is_feedable and is_fetchable
 nodes in the graph

PiperOrigin-RevId: 176682768
---
 tensorflow/python/framework/ops_test.py          | 16 +++++++++++++++-
 .../kernel_tests/control_flow_ops_py_test.py     | 14 ++++++++++----
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index a4780fdc05..7ae7b5cb7f 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -691,7 +691,7 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
                                   [])
           ops.get_default_graph()._create_op_from_tf_operation(c_op)
         else:
-        # Test pure-Python version to make sure C API has same behavior.
+          # Test pure-Python version to make sure C API has same behavior.
           test_ops.int_input(x, name="myop")
         return x
 
@@ -1741,6 +1741,20 @@ class GraphTest(test_util.TensorFlowTestCase):
       self._AssertDefault(g0)
     self._AssertDefault(orig)
 
+  def testPreventFeeding(self):
+    g = ops.Graph()
+    a = constant_op.constant(2.0)
+    self.assertTrue(g.is_feedable(a))
+    g.prevent_feeding(a)
+    self.assertFalse(g.is_feedable(a))
+
+  def testPreventFetching(self):
+    g = ops.Graph()
+    a = constant_op.constant(2.0)
+    self.assertTrue(g.is_fetchable(a))
+    g.prevent_fetching(a.op)
+    self.assertFalse(g.is_fetchable(a))
+
   def testAsGraphElementConversions(self):
 
     class ConvertibleObj(object):
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index fc125daf38..1b7f9b110c 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -352,14 +352,20 @@ class ControlFlowTest(test.TestCase):
     grad = gradients_impl.gradients(y, [v])
     self.assertAllEqual([None], grad)
 
-  def testFetchables(self):
+  def testFetchable(self):
     with self.test_session() as sess:
       x = array_ops.placeholder(dtypes.float32)
       control_flow_ops.cond(
           constant_op.constant(True), lambda: x + 2, lambda: x + 0)
-      tensor_names = all_fetchables()
-      for name in tensor_names:
-        sess.run(name, feed_dict={x: 3})
+      graph = ops.get_default_graph()
+      for op in graph.get_operations():
+        for t in op.inputs:
+          if graph.is_fetchable(t.op):
+            sess.run(t, feed_dict={x: 3})
+          else:
+            with self.assertRaisesRegexp(ValueError,
+                                         "has been marked as not fetchable"):
+              sess.run(t, feed_dict={x: 3})
 
   def testFeedable(self):
     with self.test_session() as sess:
-- 
GitLab


From e2c652ea018b9d2a4cc8453ea92b10d208ba4265 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Wed, 22 Nov 2017 11:59:46 -0800
Subject: [PATCH 0232/1225] Make PaddedBatchDataset saveable.

PiperOrigin-RevId: 176685098
---
 .../kernel_tests/batch_dataset_op_test.py     | 36 +++++++
 .../core/kernels/padded_batch_dataset_op.cc   | 99 ++++++++++++++++---
 2 files changed, 122 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
index d7437cba73..b0064f8ae7 100644
--- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
@@ -723,5 +723,41 @@ class BatchDatasetSerializationTest(
         num_outputs)
 
 
+class PaddedBatchDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def testPaddedBatch(self):
+
+    def build_dataset(seq_lens):
+      return dataset_ops.Dataset.from_tensor_slices(seq_lens).map(
+          lambda x: array_ops.fill([x], x)).padded_batch(
+              4, padded_shapes=[-1])
+
+    seq_lens1 = np.random.randint(1, 20, size=(32,)).astype(np.int32)
+    seq_lens2 = np.random.randint(21, 40, size=(32,)).astype(np.int32)
+    self.run_core_tests(lambda: build_dataset(seq_lens1),
+                        lambda: build_dataset(seq_lens2), 8)
+
+  def testPaddedBatchNonDefaultPadding(self):
+
+    def build_dataset(seq_lens):
+
+      def fill_tuple(x):
+        filled = array_ops.fill([x], x)
+        return (filled, string_ops.as_string(filled))
+
+      padded_shape = [-1]
+      return dataset_ops.Dataset.from_tensor_slices(seq_lens).map(
+          fill_tuple).padded_batch(
+              4,
+              padded_shapes=(padded_shape, padded_shape),
+              padding_values=(-1, "<end>"))
+
+    seq_lens1 = np.random.randint(1, 20, size=(32,)).astype(np.int32)
+    seq_lens2 = np.random.randint(21, 40, size=(32,)).astype(np.int32)
+    self.run_core_tests(lambda: build_dataset(seq_lens1),
+                        lambda: build_dataset(seq_lens2), 8)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/core/kernels/padded_batch_dataset_op.cc b/tensorflow/core/kernels/padded_batch_dataset_op.cc
index cfc77690b5..7c28d955e1 100644
--- a/tensorflow/core/kernels/padded_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/padded_batch_dataset_op.cc
@@ -181,16 +181,18 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
       padding_values.push_back(tensor::DeepCopy(padding_value_t));
     }
 
-    *output = new Dataset(batch_size, std::move(padded_shapes),
+    *output = new Dataset(ctx, batch_size, std::move(padded_shapes),
                           std::move(padding_values), input);
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    Dataset(int64 batch_size, std::vector<PartialTensorShape> padded_shapes,
+    Dataset(OpKernelContext* ctx, int64 batch_size,
+            std::vector<PartialTensorShape> padded_shapes,
             std::vector<Tensor> padding_values, const DatasetBase* input)
-        : batch_size_(batch_size),
+        : GraphDatasetBase(ctx),
+          batch_size_(batch_size),
           padded_shapes_(std::move(padded_shapes)),
           padding_values_(std::move(padding_values)),
           input_(input) {
@@ -232,6 +234,47 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
                              ")::Dataset");
     }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+      Node* batch_size = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size));
+
+      std::vector<NodeBuilder::NodeOut> padded_shapes;
+      padded_shapes.reserve(padded_shapes_.size());
+      for (int i = 0; i < padded_shapes_.size(); i++) {
+        Node* node;
+        Tensor t(DT_INT64, TensorShape({padded_shapes_[i].dims()}));
+        for (int j = 0; j < padded_shapes_[i].dims(); j++) {
+          t.vec<int64>()(j) = padded_shapes_[i].dim_size(j);
+        }
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        padded_shapes.emplace_back(node);
+      }
+
+      std::vector<NodeBuilder::NodeOut> padding_values;
+      padding_values.reserve(padding_values_.size());
+      for (const Tensor& t : padding_values_) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        padding_values.emplace_back(node);
+      }
+
+      AttrValue output_types;
+      b->BuildAttrValue(output_dtypes(), &output_types);
+
+      AttrValue N;
+      b->BuildAttrValue<int64>(padded_shapes_.size(), &N);
+
+      TF_RETURN_IF_ERROR(
+          b->AddDataset(this, {{0, input_graph_node}, {1, batch_size}},
+                        {{2, padded_shapes}, {3, padding_values}},
+                        {{"Toutput_types", output_types}, {"N", N}}, output));
+      return Status::OK();
+    }
+
    private:
     // Copies element into the index^th slice of parent (in the 0th dimension).
     //
@@ -248,17 +291,25 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
         // Each row of `batch_elements` is a tuple of tensors from the
         // input iterator.
         std::vector<std::vector<Tensor>> batch_elements;
-        batch_elements.reserve(dataset()->batch_size_);
         {
           mutex_lock l(mu_);
-          *end_of_sequence = false;
-          for (int i = 0; i < dataset()->batch_size_ && !*end_of_sequence;
-               ++i) {
-            std::vector<Tensor> batch_element_tuple;
-            TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &batch_element_tuple,
-                                                    end_of_sequence));
-            if (!*end_of_sequence) {
-              batch_elements.push_back(std::move(batch_element_tuple));
+          if (!input_impl_) {
+            *end_of_sequence = true;
+            return Status::OK();
+          } else {
+            *end_of_sequence = false;
+            batch_elements.reserve(dataset()->batch_size_);
+            for (int i = 0; i < dataset()->batch_size_ && !*end_of_sequence;
+                 ++i) {
+              std::vector<Tensor> batch_element_tuple;
+              TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &batch_element_tuple,
+                                                      end_of_sequence));
+              if (!*end_of_sequence) {
+                batch_elements.push_back(std::move(batch_element_tuple));
+              }
+            }
+            if (*end_of_sequence) {
+              input_impl_.reset();
             }
           }
         }
@@ -347,6 +398,28 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        if (input_impl_)
+          TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        else
+          TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("exhausted"), ""));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(OpKernelContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        if (reader->Contains(full_name("exhausted"))) {
+          input_impl_.reset();
+        } else {
+          input_impl_ = dataset()->input_->MakeIterator(prefix());
+          TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        }
+        return Status::OK();
+      }
+
      private:
       mutex mu_;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
-- 
GitLab


From 8200bee9e1433a311d7b9e820c09110a88eb6c58 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Wed, 22 Nov 2017 12:03:10 -0800
Subject: [PATCH 0233/1225] [tf.data] Saveable iteartor for InterleaveDataset.

PiperOrigin-RevId: 176685601
---
 .../contrib/data/python/kernel_tests/BUILD    |   1 +
 .../interleave_dataset_op_test.py             |  42 +++++
 .../core/kernels/interleave_dataset_op.cc     | 151 ++++++++++++++++--
 3 files changed, 183 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 1923c0586a..0697fbdec1 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -185,6 +185,7 @@ py_test(
         "manual",  # b/67958761
     ],
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/contrib/data/python/ops:transformation_ops",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
index 0299e3a1b7..c6e8ed5bdc 100644
--- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
@@ -22,8 +22,10 @@ import math
 import threading
 import time
 
+import numpy as np
 from six.moves import zip_longest
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import dataset_ops
 from tensorflow.contrib.data.python.ops import interleave_ops
 from tensorflow.python.framework import dtypes
@@ -209,6 +211,46 @@ class InterleaveDatasetTest(test.TestCase):
         sess.run(get_next)
 
 
+class InterleaveDatasetSeriazationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_iterator_graph(self, input_values, cycle_length, block_length):
+    repeat_count = 2
+    return dataset_ops.Dataset.from_tensor_slices(input_values).repeat(
+        repeat_count).interleave(
+            lambda x: dataset_ops.Dataset.from_tensors(x).repeat(x),
+            cycle_length, block_length)
+
+  def testSerializationCore(self):
+    input_values = np.array([4, 5, 6], dtype=np.int64)
+    num_outputs = np.sum(input_values) * 2
+    # cycle_length > 1, block_length > 1
+    cycle_length = 2
+    block_length = 3
+    # pylint: disable=g-long-lambda
+    self.run_core_tests(
+        lambda: self._build_iterator_graph(
+            input_values, cycle_length, block_length),
+        lambda: self._build_iterator_graph(
+            input_values, cycle_length * 2, block_length * 1),
+        num_outputs)
+    # cycle_length = 1
+    cycle_length = 1
+    block_length = 3
+    self.run_core_tests(
+        lambda: self._build_iterator_graph(
+            input_values, cycle_length, block_length),
+        None, num_outputs)
+    # block_length = 1
+    cycle_length = 2
+    block_length = 1
+    self.run_core_tests(
+        lambda: self._build_iterator_graph(
+            input_values, cycle_length, block_length),
+        None, num_outputs)
+    # pylint: enable=g-long-lambda
+
+
 class ParallelInterleaveDatasetTest(test.TestCase):
 
   def setUp(self):
diff --git a/tensorflow/core/kernels/interleave_dataset_op.cc b/tensorflow/core/kernels/interleave_dataset_op.cc
index c01d1c7cbb..cbee68b2db 100644
--- a/tensorflow/core/kernels/interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/interleave_dataset_op.cc
@@ -73,18 +73,22 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
                                                  std::move(other_arguments),
                                                  &captured_func));
 
-    *output = new Dataset(input, std::move(captured_func), cycle_length,
-                          block_length, output_types_, output_shapes_);
+    *output =
+        new Dataset(ctx, input, func_, std::move(captured_func), cycle_length,
+                    block_length, output_types_, output_shapes_);
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    Dataset(const DatasetBase* input,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func, int64 cycle_length,
             int64 block_length, const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes)
-        : input_(input),
+        : GraphDatasetBase(ctx),
+          input_(input),
+          func_(func),
           captured_func_(std::move(captured_func)),
           cycle_length_(cycle_length),
           block_length_(block_length),
@@ -110,13 +114,47 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
 
     string DebugString() override { return "InterleaveDatasetOp::Dataset"; }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
+      Node* input_node;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_node));
+      Node* cycle_length_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(cycle_length_, &cycle_length_node));
+      Node* block_length_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(block_length_, &block_length_node));
+      DataTypeVector other_arguments_types;
+      other_arguments_types.reserve(captured_func_->captured_inputs().size());
+      std::vector<NodeBuilder::NodeOut> other_arguments;
+      other_arguments.reserve(captured_func_->captured_inputs().size());
+      for (const Tensor& t : captured_func_->captured_inputs()) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        other_arguments.emplace_back(node);
+        other_arguments_types.emplace_back(t.dtype());
+      }
+      AttrValue f;
+      b->BuildAttrValue(func_, &f);
+      AttrValue other_arguments_types_attr;
+      b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
+
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this,
+          {{0, input_node}, {2, cycle_length_node}, {3, block_length_node}},
+          {{1, other_arguments}},
+          {{"f", f}, {"Targuments", other_arguments_types_attr}}, output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
       explicit Iterator(const Params& params)
           : DatasetIterator<Dataset>(params),
             input_impl_(params.dataset->input_->MakeIterator(params.prefix)),
-            current_elements_(params.dataset->cycle_length_) {}
+            current_elements_(params.dataset->cycle_length_),
+            args_list_(params.dataset->cycle_length_) {}
 
       void AdvanceToNextInCycle() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         block_index_ = 0;
@@ -150,18 +188,19 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
             // We have reached the end of the current element, so move
             // on to the next element in the cycle.
             current_elements_[cycle_index_].reset();
+            args_list_[cycle_index_].clear();
             --num_open_;
             AdvanceToNextInCycle();
           } else if (!end_of_input_) {
             // Get the next element from the input dataset, and create
             // an iterator from it.
-            std::vector<Tensor> args;
-            TF_RETURN_IF_ERROR(
-                input_impl_->GetNext(ctx, &args, &end_of_input_));
+            TF_RETURN_IF_ERROR(input_impl_->GetNext(
+                ctx, &args_list_[cycle_index_], &end_of_input_));
             if (!end_of_input_) {
               TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement(
-                  ctx, args, cycle_index_, dataset()->captured_func_.get(),
-                  prefix(), &current_elements_[cycle_index_]));
+                  ctx, args_list_[cycle_index_], cycle_index_,
+                  dataset()->captured_func_.get(), prefix(),
+                  &current_elements_[cycle_index_]));
               ++num_open_;
             }
           } else {
@@ -173,11 +212,100 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("cycle_index"), cycle_index_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("block_index"), block_index_));
+        if (end_of_input_) {
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("end_of_input"), ""));
+        }
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("num_open"), num_open_));
+        TF_RETURN_IF_ERROR(SaveCurrentElements(writer));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(OpKernelContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        int64 cycle_index;
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(full_name("cycle_index"), &cycle_index));
+        cycle_index_ = size_t(cycle_index);
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(full_name("block_index"), &block_index_));
+        if (reader->Contains(full_name("end_of_input"))) end_of_input_ = true;
+        int64 num_open;
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(full_name("num_open"), &num_open));
+        num_open_ = size_t(num_open);
+        TF_RETURN_IF_ERROR(RestoreCurrentElements(ctx, reader));
+        return Status::OK();
+      }
+
      private:
+      Status SaveCurrentElements(IteratorStateWriter* writer)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        for (int idx = 0; idx < current_elements_.size(); idx++) {
+          if (current_elements_[idx]) {
+            TF_RETURN_IF_ERROR(SaveParent(writer, current_elements_[idx]));
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name(strings::StrCat("args_size[", idx, "]")),
+                args_list_[idx].size()));
+            for (int i = 0; i < args_list_[idx].size(); i++) {
+              TF_RETURN_IF_ERROR(writer->WriteTensor(
+                  full_name(strings::StrCat("args_list_[", idx, "][", i, "]")),
+                  args_list_[idx][i]));
+            }
+          }
+        }
+        return Status::OK();
+      }
+
+      Status RestoreCurrentElements(OpKernelContext* ctx,
+                                    IteratorStateReader* reader)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        IteratorContext::Params params;
+        params.env = ctx->env();
+        params.runner = *(ctx->runner());
+        IteratorContext iter_ctx(std::move(params));
+        for (int idx = 0; idx < current_elements_.size(); idx++) {
+          if (reader->Contains(
+                  full_name(strings::StrCat("args_size[", idx, "]")))) {
+            int64 args_size;
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                full_name(strings::StrCat("args_size[", idx, "]")),
+                &args_size));
+            args_list_[idx].resize(args_size);
+            for (int i = 0; i < args_size; i++) {
+              TF_RETURN_IF_ERROR(reader->ReadTensor(
+                  full_name(strings::StrCat("args_list_[", idx, "][", i, "]")),
+                  &args_list_[idx][i]));
+            }
+            TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement(
+                &iter_ctx, args_list_[idx], idx,
+                dataset()->captured_func_.get(), prefix(),
+                &current_elements_[idx]));
+            TF_RETURN_IF_ERROR(
+                RestoreParent(ctx, reader, current_elements_[idx]));
+          } else {
+            current_elements_[idx].reset();
+          }
+        }
+        return Status::OK();
+      }
+
       mutex mu_;
       const std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       std::vector<std::unique_ptr<IteratorBase>> current_elements_
           GUARDED_BY(mu_);
+      std::vector<std::vector<Tensor>> args_list_ GUARDED_BY(mu_);
       size_t cycle_index_ GUARDED_BY(mu_) = 0;
       int64 block_index_ GUARDED_BY(mu_) = 0;
       bool end_of_input_ GUARDED_BY(mu_) = false;
@@ -185,6 +313,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
     };
 
     const DatasetBase* const input_;
+    const NameAttrList func_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const int64 cycle_length_;
     const int64 block_length_;
-- 
GitLab


From c133aff68cf9020b1eea41b6f0432d309d6b0955 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 22 Nov 2017 12:13:39 -0800
Subject: [PATCH 0234/1225] Count the number of loops instead of approximating
 it. Also added a few minor cleanups

PiperOrigin-RevId: 176686819
---
 tensorflow/core/framework/shape_inference.cc       |  4 ++--
 tensorflow/core/grappler/costs/graph_properties.cc | 13 ++++++++++---
 tensorflow/core/grappler/costs/graph_properties.h  |  3 ++-
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index f30272e250..ee9192d4a1 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -921,7 +921,7 @@ Status InferenceContext::Add(DimensionHandle first, DimensionOrConstant second,
   if (first_value == 0) {
     *out = MakeDim(second);
   } else if (second_value == 0) {
-    *out = MakeDim(first);
+    *out = first;
   } else if (first_value == kUnknownDim || second_value == kUnknownDim) {
     *out = UnknownDim();
   } else {
@@ -946,7 +946,7 @@ Status InferenceContext::Subtract(DimensionHandle first,
   const int64 second_value = Value(second);
   // Special cases.
   if (second_value == 0) {
-    *out = MakeDim(first);
+    *out = first;
   } else if (first_value == kUnknownDim || second_value == kUnknownDim) {
     *out = UnknownDim();
   } else {
diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index bf49d78a1a..abcd83a01e 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -702,12 +702,16 @@ Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner,
 Status GraphProperties::PropagateShapes(
     SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes,
     const std::unordered_map<const Node*, std::unordered_set<const Node*>>&
-        resources) const {
+        resources,
+    int num_loops) const {
   // Limit the number of iterations to prevent infinite loops in the presence of
   // incorrect shape functions. The algoritm should converge in at most
   // num_nested_loops^2 * max_rank. We approximate max_rank with the constant 4.
   // The same applies to resources.
-  const int64 num_loops = new_shapes->size();
+  VLOG(1) << "Propagating (relax=" << relax << ") " << new_shapes->size()
+          << " new shapes through " << num_loops << " loops and "
+          << resources.size() << " resources" << std::endl;
+
   const int64 max_loop_length = item_.graph.node_size();
   const int64 max_rank = 4;
   const int64 max_loop_iterations =
@@ -818,6 +822,7 @@ Status GraphProperties::InferStatically() {
   std::unordered_map<const Node*, std::unordered_set<const Node*>> resources;
   std::unordered_set<const Node*> enter_nodes;
   std::unordered_set<const Node*> merge_nodes;
+  int num_loops = 0;
   for (const Node* const node : graph.nodes()) {
     for (int i = 0; i < node->num_inputs(); ++i) {
       if (node->input_type(i) == DataType::DT_RESOURCE) {
@@ -830,6 +835,8 @@ Status GraphProperties::InferStatically() {
       enter_nodes.insert(node);
     } else if (node->IsMerge()) {
       merge_nodes.insert(node);
+    } else if (node->IsNextIteration()) {
+      ++num_loops;
     }
   }
 
@@ -853,7 +860,7 @@ Status GraphProperties::InferStatically() {
     }
     // Propagate shapes normally.
     TF_RETURN_IF_ERROR(
-        PropagateShapes(&refiner, relax, &new_shapes, resources));
+        PropagateShapes(&refiner, relax, &new_shapes, resources, num_loops));
   }
 
   // Track shapes globally across the graph.
diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index ee279b7e0a..5df190ba01 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -102,7 +102,8 @@ class GraphProperties {
   Status PropagateShapes(
       SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes,
       const std::unordered_map<const Node*, std::unordered_set<const Node*>>&
-          resources) const;
+          resources,
+      int num_loops) const;
 };
 
 }  // end namespace grappler
-- 
GitLab


From c5c642e051f1a7876d099bfcd9f8a2ecaf7227b8 Mon Sep 17 00:00:00 2001
From: Felix Abecassis <felix.abecassis@gmail.com>
Date: Wed, 22 Nov 2017 12:22:45 -0800
Subject: [PATCH 0235/1225] Remove useless statements in Dockerfiles (#14808)

'CMD ["/bin/bash"]' is not useful since it's already provided by the base ubuntu image.
'RUN ["/bin/bash"]' looks like a typo and just creates an extra empty layer.

Signed-off-by: Felix Abecassis <fabecassis@nvidia.com>
---
 tensorflow/tools/docker/Dockerfile.devel                  | 1 -
 tensorflow/tools/docker/Dockerfile.devel-gpu              | 2 --
 tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 | 2 --
 3 files changed, 5 deletions(-)

diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 1a0145b078..3525c7524f 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -101,4 +101,3 @@ EXPOSE 6006
 EXPOSE 8888
 
 WORKDIR /root
-CMD ["/bin/bash"]
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 21a44ee404..041f45971b 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -102,5 +102,3 @@ WORKDIR /root
 EXPOSE 6006
 # IPython
 EXPOSE 8888
-
-RUN ["/bin/bash"]
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
index 9bcc3925a8..3bedc8cf34 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
@@ -113,5 +113,3 @@ WORKDIR /root
 EXPOSE 6006
 # IPython
 EXPOSE 8888
-
-RUN ["/bin/bash"]
-- 
GitLab


From d9b3ed25816f98e8ad11d3ecb20c1fc0ed0f4166 Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Wed, 22 Nov 2017 12:28:47 -0800
Subject: [PATCH 0236/1225] BF16 tests for Batchnorm.

This is a good end to end test which touches many parts of the system.

PiperOrigin-RevId: 176688376
---
 .../xla/service/batchnorm_rewriter.cc         | 63 +++++++++-----
 tensorflow/compiler/xla/tests/BUILD           |  5 ++
 .../compiler/xla/tests/bfloat16_test.cc       | 84 ++++++++++++++++++-
 tensorflow/compiler/xla/tests/build_defs.bzl  | 12 ++-
 4 files changed, 139 insertions(+), 25 deletions(-)

diff --git a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc b/tensorflow/compiler/xla/service/batchnorm_rewriter.cc
index abe881cd1a..c6193b3fbb 100644
--- a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc
+++ b/tensorflow/compiler/xla/service/batchnorm_rewriter.cc
@@ -85,9 +85,9 @@ class BatchNormRewriterVisitor : public DfsHloVisitorWithDefault {
                                              HloOpcode opcode) {
     HloComputation::Builder b("scalar_computation");
     auto scalar_lhs = b.AddInstruction(HloInstruction::CreateParameter(
-        0, ShapeUtil::MakeShape(F32, {}), "scalar_lhs"));
+        0, ShapeUtil::MakeShape(primitive_type, {}), "scalar_lhs"));
     auto scalar_rhs = b.AddInstruction(HloInstruction::CreateParameter(
-        1, ShapeUtil::MakeShape(F32, {}), "scalar_rhs"));
+        1, ShapeUtil::MakeShape(primitive_type, {}), "scalar_rhs"));
     auto scalar_op = b.AddInstruction(
         HloInstruction::CreateBinary(ShapeUtil::MakeShape(primitive_type, {}),
                                      opcode, scalar_lhs, scalar_rhs));
@@ -152,22 +152,30 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
   // Expand batch norm training into smaller HLO ops.
   HloInstruction* operand = batch_norm->mutable_operand(0);
   const Shape operand_shape = operand->shape();
+  PrimitiveType ptype = operand_shape.element_type();
   int64 feature_index = batch_norm->feature_index();
   const int64 feature_count = operand_shape.dimensions(feature_index);
   const int64 size_in_elements = ShapeUtil::ElementsIn(operand_shape);
-  auto elements_per_feature =
-      computation_->AddInstruction(HloInstruction::CreateConstant(
-          Literal::CreateR0<float>(size_in_elements / feature_count)));
+  auto elements_per_feature_literal =
+      Literal::CreateR0<float>(size_in_elements / feature_count);
+  TF_ASSIGN_OR_RETURN(elements_per_feature_literal,
+                      elements_per_feature_literal->Convert(ptype));
+  auto elements_per_feature = computation_->AddInstruction(
+      HloInstruction::CreateConstant(std::move(elements_per_feature_literal)));
 
   HloInstruction* scale = batch_norm->mutable_operand(1);
   HloInstruction* offset = batch_norm->mutable_operand(2);
   const Shape feature_shape = scale->shape();
 
+  auto zero_literal = Literal::CreateR0(0.0f);
+  TF_ASSIGN_OR_RETURN(zero_literal, zero_literal->Convert(ptype));
   auto zero = computation_->AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
+      HloInstruction::CreateConstant(std::move(zero_literal)));
 
+  auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon());
+  TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype));
   auto epsilon = computation_->AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0(batch_norm->epsilon())));
+      HloInstruction::CreateConstant(std::move(epsilon_literal)));
 
   std::vector<int64> dimensions_without_feature;
 
@@ -184,7 +192,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
       HloInstruction::CreateBroadcast(operand_shape, offset, {feature_index}));
 
   HloComputation* add_reduce_computation =
-      GetScalarBinaryComputation(F32, HloOpcode::kAdd);
+      GetScalarBinaryComputation(ptype, HloOpcode::kAdd);
 
   // X^2.
   auto operand_squared =
@@ -243,8 +251,10 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
       computation_->AddInstruction(HloInstruction::CreateBinary(
           operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon));
 
+  auto neg_half_literal = Literal::CreateR0(-0.5f);
+  TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype));
   auto neg_half = computation_->AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0(-0.5f)));
+      HloInstruction::CreateConstant(std::move(neg_half_literal)));
 
   // 1 / Sqrt[Var[X] + epsilon].
   auto rsqrt_var_add_epsilon =
@@ -286,6 +296,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormInference(
   HloInstruction* operand = batch_norm->mutable_operand(0);
   const Shape operand_shape = operand->shape();
   int64 feature_index = batch_norm->feature_index();
+  PrimitiveType ptype = operand_shape.element_type();
 
   HloInstruction* scale = batch_norm->mutable_operand(1);
   HloInstruction* offset = batch_norm->mutable_operand(2);
@@ -293,8 +304,10 @@ Status BatchNormRewriterVisitor::HandleBatchNormInference(
   HloInstruction* var = batch_norm->mutable_operand(4);
   const Shape feature_shape = scale->shape();
 
+  auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon());
+  TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype));
   auto epsilon = computation_->AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0(batch_norm->epsilon())));
+      HloInstruction::CreateConstant(std::move(epsilon_literal)));
 
   std::vector<int64> dimensions_without_feature;
 
@@ -321,8 +334,10 @@ Status BatchNormRewriterVisitor::HandleBatchNormInference(
       computation_->AddInstruction(HloInstruction::CreateBinary(
           operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon));
 
+  auto neg_half_literal = Literal::CreateR0(-0.5f);
+  TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype));
   auto neg_half = computation_->AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0(-0.5f)));
+      HloInstruction::CreateConstant(std::move(neg_half_literal)));
 
   // 1 / Sqrt[Var[X] + epsilon].
   auto rsqrt_var_add_epsilon =
@@ -373,6 +388,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad(
 
   HloInstruction* activation = batch_norm->mutable_operand(0);
   const Shape activation_shape = activation->shape();
+  PrimitiveType ptype = activation_shape.element_type();
   HloInstruction* scale = batch_norm->mutable_operand(1);
   const Shape feature_shape = scale->shape();
   HloInstruction* mean = batch_norm->mutable_operand(2);
@@ -383,18 +399,27 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad(
 
   const int64 size_in_elements = ShapeUtil::ElementsIn(activation_shape);
   const int64 feature_count = activation_shape.dimensions(feature_index);
-  auto elements_per_feature =
-      computation_->AddInstruction(HloInstruction::CreateConstant(
-          Literal::CreateR0<float>(size_in_elements / feature_count)));
-
+  auto elements_per_feature_literal =
+      Literal::CreateR0<float>(size_in_elements / feature_count);
+  TF_ASSIGN_OR_RETURN(elements_per_feature_literal,
+                      elements_per_feature_literal->Convert(ptype));
+  auto elements_per_feature = computation_->AddInstruction(
+      HloInstruction::CreateConstant(std::move(elements_per_feature_literal)));
+
+  auto zero_literal = Literal::CreateR0(0.0f);
+  TF_ASSIGN_OR_RETURN(zero_literal, zero_literal->Convert(ptype));
   auto zero = computation_->AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
+      HloInstruction::CreateConstant(std::move(zero_literal)));
 
+  auto neg_half_literal = Literal::CreateR0(-0.5f);
+  TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype));
   auto neg_half = computation_->AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0(-0.5f)));
+      HloInstruction::CreateConstant(std::move(neg_half_literal)));
 
+  auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon());
+  TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype));
   auto epsilon = computation_->AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0(batch_norm->epsilon())));
+      HloInstruction::CreateConstant(std::move(epsilon_literal)));
 
   std::vector<int64> dimensions_without_feature;
 
@@ -442,7 +467,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad(
                                    grad_output, activation_minus_mean));
 
   HloComputation* add_reduce_computation =
-      GetScalarBinaryComputation(F32, HloOpcode::kAdd);
+      GetScalarBinaryComputation(ptype, HloOpcode::kAdd);
 
   // sum(Grad[Y] * (X - E[X])).
   auto sum_grad_output_times_activiation_minus_mean =
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 13d651ea6f..addce9019b 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -773,6 +773,11 @@ xla_test(
 xla_test(
     name = "bfloat16_test",
     srcs = ["bfloat16_test.cc"],
+    blacklisted_backends = [
+        "cpu",
+        "cpu_parallel",
+        "gpu",
+    ],
     shard_count = 40,
     deps = [
         ":test_utils",
diff --git a/tensorflow/compiler/xla/tests/bfloat16_test.cc b/tensorflow/compiler/xla/tests/bfloat16_test.cc
index 26e2b1a95b..a1c53ef2aa 100644
--- a/tensorflow/compiler/xla/tests/bfloat16_test.cc
+++ b/tensorflow/compiler/xla/tests/bfloat16_test.cc
@@ -51,8 +51,7 @@ class Bfloat16Test : public ClientLibraryTestBase {
   const ErrorSpec error_spec_{0.001, 0.001};
 };
 
-XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL(
-                             DISABLED_ON_CPU(ScalarOperation)))) {
+XLA_TEST_F(Bfloat16Test, ScalarOperation) {
   ComputationBuilder builder(client_, TestName());
   auto x = builder.ConstantR0<bfloat16>(static_cast<bfloat16>(2.0f));
   auto y = builder.ConstantR0<bfloat16>(static_cast<bfloat16>(1.0f));
@@ -62,8 +61,7 @@ XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL(
                                 error_spec_);
 }
 
-XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL(
-                             DISABLED_ON_CPU(NegateScalarF16)))) {
+XLA_TEST_F(Bfloat16Test, NegateScalarF16) {
   ComputationBuilder builder(client_, TestName());
   builder.Neg(builder.ConstantR0<bfloat16>(static_cast<bfloat16>(2.1f)));
 
@@ -71,5 +69,83 @@ XLA_TEST_F(Bfloat16Test, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL(
                                 error_spec_);
 }
 
+XLA_TEST_F(Bfloat16Test, BatchNormTraining) {
+  const int kFeatureIndex = 2;
+  ComputationBuilder builder(client_, TestName());
+
+  auto operand = builder.ConstantR4FromArray4D<bfloat16>(
+      {{{{static_cast<bfloat16>(1.f)}, {static_cast<bfloat16>(2.f)}},
+        {{static_cast<bfloat16>(3.f)}, {static_cast<bfloat16>(4.f)}}},
+       {{{static_cast<bfloat16>(5.f)}, {static_cast<bfloat16>(6.f)}},
+        {{static_cast<bfloat16>(7.f)}, {static_cast<bfloat16>(8.f)}}}});
+
+  auto scale = builder.ConstantR1<bfloat16>(
+      {static_cast<bfloat16>(2.0f), static_cast<bfloat16>(3.0f)});
+
+  auto offset = builder.ConstantR1<bfloat16>(
+      {static_cast<bfloat16>(1.0f), static_cast<bfloat16>(2.0f)});
+
+  auto tuple = builder.BatchNormTraining(operand, scale, offset,
+                                         /*epsilon=*/0.001, kFeatureIndex);
+
+  auto expected = *Literal::MakeTuple(
+      {Literal::CreateR4<bfloat16>(
+           {{{{static_cast<bfloat16>(-1.7f)}, {static_cast<bfloat16>(-2.04f)}},
+             {{static_cast<bfloat16>(0.105f)}, {static_cast<bfloat16>(0.65f)}}},
+            {{{static_cast<bfloat16>(1.89f)}, {static_cast<bfloat16>(3.35f)}},
+             {{static_cast<bfloat16>(3.7f)}, {static_cast<bfloat16>(6.04f)}}}})
+           .get(),
+       Literal::CreateR1<bfloat16>(
+           {static_cast<bfloat16>(4), static_cast<bfloat16>(5)})
+           .get(),
+       Literal::CreateR1<bfloat16>(
+           {static_cast<bfloat16>(5), static_cast<bfloat16>(5)})
+           .get()});
+
+  ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.01));
+}
+
+XLA_TEST_F(Bfloat16Test, BatchNormGrad) {
+  const int kFeatureIndex = 2;
+  ComputationBuilder builder(client_, TestName());
+
+  auto operand = builder.ConstantR4FromArray4D<bfloat16>(
+      Array4D<bfloat16>(2, 2, 2, 1, static_cast<bfloat16>(0.0f)));
+
+  auto scale = builder.ConstantR1<bfloat16>(
+      {static_cast<bfloat16>(1.0f), static_cast<bfloat16>(1.0f)});
+
+  auto mean = builder.ConstantR1<bfloat16>(
+      {static_cast<bfloat16>(0.0f), static_cast<bfloat16>(0.0f)});
+
+  auto var = builder.ConstantR1<bfloat16>(
+      {static_cast<bfloat16>(1.0f), static_cast<bfloat16>(1.0f)});
+
+  auto grad_output = builder.ConstantR4FromArray4D<bfloat16>(
+      {{{{static_cast<bfloat16>(1.f)}, {static_cast<bfloat16>(2.f)}},
+        {{static_cast<bfloat16>(3.f)}, {static_cast<bfloat16>(4.f)}}},
+       {{{static_cast<bfloat16>(5.f)}, {static_cast<bfloat16>(6.f)}},
+        {{static_cast<bfloat16>(7.f)}, {static_cast<bfloat16>(8.f)}}}});
+
+  builder.BatchNormGrad(operand, scale, mean, var, grad_output,
+                        /*epsilon=*/0.0, kFeatureIndex);
+
+  auto expected = *Literal::MakeTuple(
+      {Literal::CreateR4<bfloat16>(
+           {{{{static_cast<bfloat16>(-3.f)}, {static_cast<bfloat16>(-3.f)}},
+             {{static_cast<bfloat16>(-1.f)}, {static_cast<bfloat16>(-1.f)}}},
+            {{{static_cast<bfloat16>(1.f)}, {static_cast<bfloat16>(1.f)}},
+             {{static_cast<bfloat16>(3.f)}, {static_cast<bfloat16>(3.f)}}}})
+           .get(),
+       Literal::CreateR1<bfloat16>(
+           {static_cast<bfloat16>(0), static_cast<bfloat16>(0)})
+           .get(),
+       Literal::CreateR1<bfloat16>(
+           {static_cast<bfloat16>(16), static_cast<bfloat16>(20)})
+           .get()});
+
+  ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.01));
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl
index f594c609db..610302ac12 100644
--- a/tensorflow/compiler/xla/tests/build_defs.bzl
+++ b/tensorflow/compiler/xla/tests/build_defs.bzl
@@ -29,6 +29,7 @@ def xla_test(name,
              deps,
              xla_test_library_deps=[],
              backends=[],
+             blacklisted_backends=[],
              args=[],
              tags=[],
              copts=[],
@@ -92,17 +93,24 @@ def xla_test(name,
     backends: A list of backends to generate tests for. Supported
       values: "cpu", "cpu_parallel", "gpu". If this list is empty, the test will
       be generated for all supported backends.
+    blacklisted_backends: A list of backends to NOT generate tests for.
     args: Test arguments for the target.
     tags: Tags for the target.
-    backend_args: A dict mapping backend name to list of additional args to
-      use for that target.
+    copts: Additional copts to pass to the build.
+    data: Additional data to pass to the build.
     backend_tags: A dict mapping backend name to list of additional tags to
       use for that target.
+    backend_args: A dict mapping backend name to list of additional args to
+      use for that target.
+    **kwargs: Additional keyword arguments to pass to native.cc_test.
   """
   test_names = []
   if not backends:
     backends = all_backends
 
+  backends = [backend for backend in backends
+              if backend not in blacklisted_backends]
+
   native.cc_library(
       name="%s_lib" % name,
       srcs=srcs,
-- 
GitLab


From 8752c973150df64374f96d516aafa664de410dce Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 12:30:20 -0800
Subject: [PATCH 0237/1225] Fix functionality in crf_sequence_score(),
 crf_log_norm(), and crf_decode() for when input has max_seq_len = 1.  This
 can happen in single-example inference.

PiperOrigin-RevId: 176688502
---
 .../crf/python/kernel_tests/crf_test.py       | 224 +++++++++++-------
 tensorflow/contrib/crf/python/ops/crf.py      | 163 ++++++++-----
 2 files changed, 242 insertions(+), 145 deletions(-)

diff --git a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
index 964ec75441..b47fb426a1 100644
--- a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
+++ b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
@@ -32,27 +32,41 @@ from tensorflow.python.platform import test
 class CrfTest(test.TestCase):
 
   def testCrfSequenceScore(self):
-    inputs = np.array(
-        [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
-    tag_indices = np.array([1, 2, 1, 0], dtype=np.int32)
     transition_params = np.array(
         [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32)
-    sequence_lengths = np.array(3, dtype=np.int32)
-    with self.test_session() as sess:
-      sequence_score = crf.crf_sequence_score(
-          inputs=array_ops.expand_dims(inputs, 0),
-          tag_indices=array_ops.expand_dims(tag_indices, 0),
-          sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
-          transition_params=constant_op.constant(transition_params))
-      sequence_score = array_ops.squeeze(sequence_score, [0])
-      tf_sequence_score = sess.run(sequence_score)
-      expected_unary_score = sum(inputs[i][tag_indices[i]]
-                                 for i in range(sequence_lengths))
-      expected_binary_score = sum(
-          transition_params[tag_indices[i], tag_indices[i + 1]]
-          for i in range(sequence_lengths - 1))
-      expected_sequence_score = expected_unary_score + expected_binary_score
-      self.assertAllClose(tf_sequence_score, expected_sequence_score)
+    # Test both the length-1 and regular cases.
+    sequence_lengths_list = [
+        np.array(3, dtype=np.int32),
+        np.array(1, dtype=np.int32)
+    ]
+    inputs_list = [
+        np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]],
+                 dtype=np.float32),
+        np.array([[4, 5, -3]],
+                 dtype=np.float32),
+    ]
+    tag_indices_list = [
+        np.array([1, 2, 1, 0], dtype=np.int32),
+        np.array([1], dtype=np.int32)
+    ]
+    for sequence_lengths, inputs, tag_indices in zip(sequence_lengths_list,
+                                                     inputs_list,
+                                                     tag_indices_list):
+      with self.test_session() as sess:
+        sequence_score = crf.crf_sequence_score(
+            inputs=array_ops.expand_dims(inputs, 0),
+            tag_indices=array_ops.expand_dims(tag_indices, 0),
+            sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+            transition_params=constant_op.constant(transition_params))
+        sequence_score = array_ops.squeeze(sequence_score, [0])
+        tf_sequence_score = sess.run(sequence_score)
+        expected_unary_score = sum(inputs[i][tag_indices[i]]
+                                   for i in range(sequence_lengths))
+        expected_binary_score = sum(
+            transition_params[tag_indices[i], tag_indices[i + 1]]
+            for i in range(sequence_lengths - 1))
+        expected_sequence_score = expected_unary_score + expected_binary_score
+        self.assertAllClose(tf_sequence_score, expected_sequence_score)
 
   def testCrfUnaryScore(self):
     inputs = np.array(
@@ -89,38 +103,54 @@ class CrfTest(test.TestCase):
       self.assertAllClose(tf_binary_score, expected_binary_score)
 
   def testCrfLogNorm(self):
-    inputs = np.array(
-        [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
     transition_params = np.array(
         [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32)
-    num_words = inputs.shape[0]
-    num_tags = inputs.shape[1]
-    sequence_lengths = np.array(3, dtype=np.int32)
-    with self.test_session() as sess:
-      all_sequence_scores = []
-
-      # Compare the dynamic program with brute force computation.
-      for tag_indices in itertools.product(
-          range(num_tags), repeat=sequence_lengths):
-        tag_indices = list(tag_indices)
-        tag_indices.extend([0] * (num_words - sequence_lengths))
-        all_sequence_scores.append(
-            crf.crf_sequence_score(
-                inputs=array_ops.expand_dims(inputs, 0),
-                tag_indices=array_ops.expand_dims(tag_indices, 0),
-                sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
-                transition_params=constant_op.constant(transition_params)))
-
-      brute_force_log_norm = math_ops.reduce_logsumexp(all_sequence_scores)
-      log_norm = crf.crf_log_norm(
-          inputs=array_ops.expand_dims(inputs, 0),
-          sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
-          transition_params=constant_op.constant(transition_params))
-      log_norm = array_ops.squeeze(log_norm, [0])
-      tf_brute_force_log_norm, tf_log_norm = sess.run(
-          [brute_force_log_norm, log_norm])
+    # Test both the length-1 and regular cases.
+    sequence_lengths_list = [
+        np.array(3, dtype=np.int32),
+        np.array(1, dtype=np.int32)
+    ]
+    inputs_list = [
+        np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]],
+                 dtype=np.float32),
+        np.array([[3, -1, 3]],
+                 dtype=np.float32),
+    ]
+    tag_indices_list = [
+        np.array([1, 2, 1, 0], dtype=np.int32),
+        np.array([2], dtype=np.int32)
+    ]
+
+    for sequence_lengths, inputs, tag_indices in zip(sequence_lengths_list,
+                                                     inputs_list,
+                                                     tag_indices_list):
+      num_words = inputs.shape[0]
+      num_tags = inputs.shape[1]
+      with self.test_session() as sess:
+        all_sequence_scores = []
+
+        # Compare the dynamic program with brute force computation.
+        for tag_indices in itertools.product(
+            range(num_tags), repeat=sequence_lengths):
+          tag_indices = list(tag_indices)
+          tag_indices.extend([0] * (num_words - sequence_lengths))
+          all_sequence_scores.append(
+              crf.crf_sequence_score(
+                  inputs=array_ops.expand_dims(inputs, 0),
+                  tag_indices=array_ops.expand_dims(tag_indices, 0),
+                  sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+                  transition_params=constant_op.constant(transition_params)))
+
+        brute_force_log_norm = math_ops.reduce_logsumexp(all_sequence_scores)
+        log_norm = crf.crf_log_norm(
+            inputs=array_ops.expand_dims(inputs, 0),
+            sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+            transition_params=constant_op.constant(transition_params))
+        log_norm = array_ops.squeeze(log_norm, [0])
+        tf_brute_force_log_norm, tf_log_norm = sess.run(
+            [brute_force_log_norm, log_norm])
 
-      self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
+        self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
 
   def testCrfLogLikelihood(self):
     inputs = np.array(
@@ -201,50 +231,66 @@ class CrfTest(test.TestCase):
                        expected_max_sequence[:sequence_lengths])
 
   def testCrfDecode(self):
-    inputs = np.array(
-        [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
     transition_params = np.array(
         [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32)
-    sequence_lengths = np.array(3, dtype=np.int32)
-    num_words = inputs.shape[0]
-    num_tags = inputs.shape[1]
+    # Test both the length-1 and regular cases.
+    sequence_lengths_list = [
+        np.array(3, dtype=np.int32),
+        np.array(1, dtype=np.int32)
+    ]
+    inputs_list = [
+        np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]],
+                 dtype=np.float32),
+        np.array([[-1, 2, 1]],
+                 dtype=np.float32),
+    ]
+    tag_indices_list = [
+        np.array([1, 2, 1, 0], dtype=np.int32),
+        np.array([2], dtype=np.int32)
+    ]
+
+    for sequence_lengths, inputs, tag_indices in zip(sequence_lengths_list,
+                                                     inputs_list,
+                                                     tag_indices_list):
+      num_words = inputs.shape[0]
+      num_tags = inputs.shape[1]
 
-    with self.test_session() as sess:
-      all_sequence_scores = []
-      all_sequences = []
-
-      # Compare the dynamic program with brute force computation.
-      for tag_indices in itertools.product(
-          range(num_tags), repeat=sequence_lengths):
-        tag_indices = list(tag_indices)
-        tag_indices.extend([0] * (num_words - sequence_lengths))
-        all_sequences.append(tag_indices)
-        sequence_score = crf.crf_sequence_score(
-            inputs=array_ops.expand_dims(inputs, 0),
-            tag_indices=array_ops.expand_dims(tag_indices, 0),
-            sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
-            transition_params=constant_op.constant(transition_params))
-        sequence_score = array_ops.squeeze(sequence_score, [0])
-        all_sequence_scores.append(sequence_score)
-
-      tf_all_sequence_scores = sess.run(all_sequence_scores)
-
-      expected_max_sequence_index = np.argmax(tf_all_sequence_scores)
-      expected_max_sequence = all_sequences[expected_max_sequence_index]
-      expected_max_score = tf_all_sequence_scores[expected_max_sequence_index]
-
-      actual_max_sequence, actual_max_score = crf.crf_decode(
-          array_ops.expand_dims(inputs, 0),
-          constant_op.constant(transition_params),
-          array_ops.expand_dims(sequence_lengths, 0))
-      actual_max_sequence = array_ops.squeeze(actual_max_sequence, [0])
-      actual_max_score = array_ops.squeeze(actual_max_score, [0])
-      tf_actual_max_sequence, tf_actual_max_score = sess.run(
-          [actual_max_sequence, actual_max_score])
-
-      self.assertAllClose(tf_actual_max_score, expected_max_score)
-      self.assertEqual(list(tf_actual_max_sequence[:sequence_lengths]),
-                       expected_max_sequence[:sequence_lengths])
+      with self.test_session() as sess:
+        all_sequence_scores = []
+        all_sequences = []
+
+        # Compare the dynamic program with brute force computation.
+        for tag_indices in itertools.product(
+            range(num_tags), repeat=sequence_lengths):
+          tag_indices = list(tag_indices)
+          tag_indices.extend([0] * (num_words - sequence_lengths))
+          all_sequences.append(tag_indices)
+          sequence_score = crf.crf_sequence_score(
+              inputs=array_ops.expand_dims(inputs, 0),
+              tag_indices=array_ops.expand_dims(tag_indices, 0),
+              sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+              transition_params=constant_op.constant(transition_params))
+          sequence_score = array_ops.squeeze(sequence_score, [0])
+          all_sequence_scores.append(sequence_score)
+
+        tf_all_sequence_scores = sess.run(all_sequence_scores)
+
+        expected_max_sequence_index = np.argmax(tf_all_sequence_scores)
+        expected_max_sequence = all_sequences[expected_max_sequence_index]
+        expected_max_score = tf_all_sequence_scores[expected_max_sequence_index]
+
+        actual_max_sequence, actual_max_score = crf.crf_decode(
+            array_ops.expand_dims(inputs, 0),
+            constant_op.constant(transition_params),
+            array_ops.expand_dims(sequence_lengths, 0))
+        actual_max_sequence = array_ops.squeeze(actual_max_sequence, [0])
+        actual_max_score = array_ops.squeeze(actual_max_score, [0])
+        tf_actual_max_sequence, tf_actual_max_score = sess.run(
+            [actual_max_sequence, actual_max_score])
+
+        self.assertAllClose(tf_actual_max_score, expected_max_score)
+        self.assertEqual(list(tf_actual_max_sequence[:sequence_lengths]),
+                         expected_max_sequence[:sequence_lengths])
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py
index 4282be5ec8..ca384226d4 100644
--- a/tensorflow/contrib/crf/python/ops/crf.py
+++ b/tensorflow/contrib/crf/python/ops/crf.py
@@ -53,7 +53,9 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
+from tensorflow.python.layers import utils
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import rnn
@@ -101,12 +103,29 @@ def crf_sequence_score(inputs, tag_indices, sequence_lengths,
   Returns:
     sequence_scores: A [batch_size] vector of unnormalized sequence scores.
   """
-  # Compute the scores of the given tag sequence.
-  unary_scores = crf_unary_score(tag_indices, sequence_lengths, inputs)
-  binary_scores = crf_binary_score(tag_indices, sequence_lengths,
-                                   transition_params)
-  sequence_scores = unary_scores + binary_scores
-  return sequence_scores
+  # If max_seq_len is 1, we skip the score calculation and simply gather the
+  # unary potentials of the single tag.
+  def _single_seq_fn():
+    batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0]
+    example_inds = array_ops.reshape(
+        math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1])
+    return array_ops.gather_nd(
+        array_ops.squeeze(inputs, [1]),
+        array_ops.concat([example_inds, tag_indices], axis=1))
+
+  def _multi_seq_fn():
+    # Compute the scores of the given tag sequence.
+    unary_scores = crf_unary_score(tag_indices, sequence_lengths, inputs)
+    binary_scores = crf_binary_score(tag_indices, sequence_lengths,
+                                     transition_params)
+    sequence_scores = unary_scores + binary_scores
+    return sequence_scores
+
+  return utils.smart_cond(
+      pred=math_ops.equal(inputs.shape[1].value or array_ops.shape(inputs)[1],
+                          1),
+      fn1=_single_seq_fn,
+      fn2=_multi_seq_fn)
 
 
 def crf_log_norm(inputs, sequence_lengths, transition_params):
@@ -124,19 +143,32 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
   # algorithm.
   first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])
   first_input = array_ops.squeeze(first_input, [1])
-  rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])
 
-  # Compute the alpha values in the forward algorithm in order to get the
-  # partition function.
-  forward_cell = CrfForwardRnnCell(transition_params)
-  _, alphas = rnn.dynamic_rnn(
-      cell=forward_cell,
-      inputs=rest_of_input,
-      sequence_length=sequence_lengths - 1,
-      initial_state=first_input,
-      dtype=dtypes.float32)
-  log_norm = math_ops.reduce_logsumexp(alphas, [1])
-  return log_norm
+  # If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over
+  # the "initial state" (the unary potentials).
+  def _single_seq_fn():
+    return math_ops.reduce_logsumexp(first_input, [1])
+
+  def _multi_seq_fn():
+    """Forward computation of alpha values."""
+    rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])
+
+    # Compute the alpha values in the forward algorithm in order to get the
+    # partition function.
+    forward_cell = CrfForwardRnnCell(transition_params)
+    _, alphas = rnn.dynamic_rnn(
+        cell=forward_cell,
+        inputs=rest_of_input,
+        sequence_length=sequence_lengths - 1,
+        initial_state=first_input,
+        dtype=dtypes.float32)
+    log_norm = math_ops.reduce_logsumexp(alphas, [1])
+    return log_norm
+
+  max_seq_len = array_ops.shape(inputs)[1]
+  return control_flow_ops.cond(pred=math_ops.equal(max_seq_len, 1),
+                               true_fn=_single_seq_fn,
+                               false_fn=_multi_seq_fn)
 
 
 def crf_log_likelihood(inputs,
@@ -440,41 +472,60 @@ def crf_decode(potentials, transition_params, sequence_length):
                 Contains the highest scoring tag indices.
     best_score: A [batch_size] tensor, containing the score of decode_tags.
   """
-  # For simplicity, in shape comments, denote:
-  # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
-  num_tags = potentials.get_shape()[2].value
-
-  # Computes forward decoding. Get last score and backpointers.
-  crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params)
-  initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
-  initial_state = array_ops.squeeze(initial_state, axis=[1])      # [B, O]
-  inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1])   # [B, T-1, O]
-  backpointers, last_score = rnn.dynamic_rnn(
-      crf_fwd_cell,
-      inputs=inputs,
-      sequence_length=sequence_length - 1,
-      initial_state=initial_state,
-      time_major=False,
-      dtype=dtypes.int32)             # [B, T - 1, O], [B, O]
-  backpointers = gen_array_ops.reverse_sequence(
-      backpointers, sequence_length - 1, seq_dim=1)               # [B, T-1, O]
-
-  # Computes backward decoding. Extract tag indices from backpointers.
-  crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags)
-  initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1),
-                                dtype=dtypes.int32)               # [B]
-  initial_state = array_ops.expand_dims(initial_state, axis=-1)   # [B, 1]
-  decode_tags, _ = rnn.dynamic_rnn(
-      crf_bwd_cell,
-      inputs=backpointers,
-      sequence_length=sequence_length - 1,
-      initial_state=initial_state,
-      time_major=False,
-      dtype=dtypes.int32)           # [B, T - 1, 1]
-  decode_tags = array_ops.squeeze(decode_tags, axis=[2])           # [B, T - 1]
-  decode_tags = array_ops.concat([initial_state, decode_tags], axis=1)  # [B, T]
-  decode_tags = gen_array_ops.reverse_sequence(
-      decode_tags, sequence_length, seq_dim=1)                     # [B, T]
-
-  best_score = math_ops.reduce_max(last_score, axis=1)             # [B]
-  return decode_tags, best_score
+  # If max_seq_len is 1, we skip the algorithm and simply return the argmax tag
+  # and the max activation.
+  def _single_seq_fn():
+    squeezed_potentials = array_ops.squeeze(potentials, [1])
+    decode_tags = array_ops.expand_dims(
+        math_ops.argmax(squeezed_potentials, axis=1), 1)
+    best_score = math_ops.reduce_max(squeezed_potentials, axis=1)
+    return math_ops.cast(decode_tags, dtype=dtypes.int32), best_score
+
+  def _multi_seq_fn():
+    """Decoding of highest scoring sequence."""
+
+    # For simplicity, in shape comments, denote:
+    # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
+    num_tags = potentials.get_shape()[2].value
+
+    # Computes forward decoding. Get last score and backpointers.
+    crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params)
+    initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
+    initial_state = array_ops.squeeze(initial_state, axis=[1])  # [B, O]
+    inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1])  # [B, T-1, O]
+    backpointers, last_score = rnn.dynamic_rnn(  # [B, T - 1, O], [B, O]
+        crf_fwd_cell,
+        inputs=inputs,
+        sequence_length=sequence_length - 1,
+        initial_state=initial_state,
+        time_major=False,
+        dtype=dtypes.int32)
+    backpointers = gen_array_ops.reverse_sequence(  # [B, T - 1, O]
+        backpointers, sequence_length - 1, seq_dim=1)
+
+    # Computes backward decoding. Extract tag indices from backpointers.
+    crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags)
+    initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1),  # [B]
+                                  dtype=dtypes.int32)
+    initial_state = array_ops.expand_dims(initial_state, axis=-1)  # [B, 1]
+    decode_tags, _ = rnn.dynamic_rnn(  # [B, T - 1, 1]
+        crf_bwd_cell,
+        inputs=backpointers,
+        sequence_length=sequence_length - 1,
+        initial_state=initial_state,
+        time_major=False,
+        dtype=dtypes.int32)
+    decode_tags = array_ops.squeeze(decode_tags, axis=[2])  # [B, T - 1]
+    decode_tags = array_ops.concat([initial_state, decode_tags],   # [B, T]
+                                   axis=1)
+    decode_tags = gen_array_ops.reverse_sequence(  # [B, T]
+        decode_tags, sequence_length, seq_dim=1)
+
+    best_score = math_ops.reduce_max(last_score, axis=1)  # [B]
+    return decode_tags, best_score
+
+  return utils.smart_cond(
+      pred=math_ops.equal(
+          potentials.shape[1].value or array_ops.shape(potentials)[1], 1),
+      fn1=_single_seq_fn,
+      fn2=_multi_seq_fn)
-- 
GitLab


From 9f63f6f4613f6fc556c245bd8b69052778f28dc2 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 22 Nov 2017 12:38:18 -0800
Subject: [PATCH 0238/1225] Remove non-existing reference.

---
 tensorflow/contrib/summary/BUILD                     | 1 -
 tensorflow/contrib/summary/summary_ops_graph_test.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD
index 237339e81e..f34291c203 100644
--- a/tensorflow/contrib/summary/BUILD
+++ b/tensorflow/contrib/summary/BUILD
@@ -45,7 +45,6 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":summary_ops",
-        ":summary_test_internal",
         ":summary_test_util",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index 8fa361de84..703adb7b46 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py
@@ -21,7 +21,6 @@ import tempfile
 import six
 
 from tensorflow.contrib.summary import summary_ops
-from tensorflow.contrib.summary import summary_test_internal
 from tensorflow.contrib.summary import summary_test_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
-- 
GitLab


From d0324067625d56e75984ef235a1b8fe6f6e15a6f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 12:33:13 -0800
Subject: [PATCH 0239/1225] Minor refactor: move stats files from stochastic to
 common, remove stochastic

PiperOrigin-RevId: 176688846
---
 .../contrib/boosted_trees/kernels/split_handler_ops.cc |  2 +-
 tensorflow/contrib/boosted_trees/lib/BUILD             | 10 +++++-----
 .../stats/feature-split-candidate.h                    |  8 ++++----
 .../{stochastic => common}/stats/gradient-stats.h      |  6 +++---
 .../learner/{stochastic => common}/stats/node-stats.h  |  8 ++++----
 .../{stochastic => common}/stats/node-stats_test.cc    |  2 +-
 .../learner/{stochastic => common}/stats/split-stats.h |  8 ++++----
 7 files changed, 22 insertions(+), 22 deletions(-)
 rename tensorflow/contrib/boosted_trees/lib/learner/{stochastic => common}/stats/feature-split-candidate.h (90%)
 rename tensorflow/contrib/boosted_trees/lib/learner/{stochastic => common}/stats/gradient-stats.h (98%)
 rename tensorflow/contrib/boosted_trees/lib/learner/{stochastic => common}/stats/node-stats.h (98%)
 rename tensorflow/contrib/boosted_trees/lib/learner/{stochastic => common}/stats/node-stats_test.cc (99%)
 rename tensorflow/contrib/boosted_trees/lib/learner/{stochastic => common}/stats/split-stats.h (94%)

diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
index a5de1340b9..18b4abd654 100644
--- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
+++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
@@ -16,7 +16,7 @@
 #include <string>
 #include <vector>
 
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h"
+#include "tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h"
 #include "tensorflow/contrib/boosted_trees/proto/split_info.pb.h"
 #include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h"
 #include "tensorflow/core/framework/device_base.h"
diff --git a/tensorflow/contrib/boosted_trees/lib/BUILD b/tensorflow/contrib/boosted_trees/lib/BUILD
index af389849b4..131bd48562 100644
--- a/tensorflow/contrib/boosted_trees/lib/BUILD
+++ b/tensorflow/contrib/boosted_trees/lib/BUILD
@@ -408,7 +408,7 @@ tf_cc_test(
 # Learner/stochastic
 cc_library(
     name = "gradient-stats",
-    hdrs = ["learner/stochastic/stats/gradient-stats.h"],
+    hdrs = ["learner/common/stats/gradient-stats.h"],
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
@@ -417,7 +417,7 @@ cc_library(
 
 cc_library(
     name = "node-stats",
-    hdrs = ["learner/stochastic/stats/node-stats.h"],
+    hdrs = ["learner/common/stats/node-stats.h"],
     deps = [
         ":gradient-stats",
         "//tensorflow/contrib/boosted_trees/proto:learner_proto_cc",
@@ -429,7 +429,7 @@ cc_library(
 
 cc_library(
     name = "split-stats",
-    hdrs = ["learner/stochastic/stats/split-stats.h"],
+    hdrs = ["learner/common/stats/split-stats.h"],
     deps = [
         ":node-stats",
     ],
@@ -437,7 +437,7 @@ cc_library(
 
 cc_library(
     name = "feature-split-candidate",
-    hdrs = ["learner/stochastic/stats/feature-split-candidate.h"],
+    hdrs = ["learner/common/stats/feature-split-candidate.h"],
     deps = [
         ":split-stats",
         "//tensorflow/contrib/boosted_trees/proto:tree_config_proto_cc",
@@ -447,7 +447,7 @@ cc_library(
 tf_cc_test(
     name = "node-stats_test",
     size = "small",
-    srcs = ["learner/stochastic/stats/node-stats_test.cc"],
+    srcs = ["learner/common/stats/node-stats_test.cc"],
     deps = [
         ":node-stats",
         "//tensorflow/core:tensor_testutil",
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/feature-split-candidate.h b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/feature-split-candidate.h
similarity index 90%
rename from tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/feature-split-candidate.h
rename to tensorflow/contrib/boosted_trees/lib/learner/common/stats/feature-split-candidate.h
index fe22691178..339c2e0fde 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/feature-split-candidate.h
+++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/feature-split-candidate.h
@@ -13,10 +13,10 @@
 // limitations under the License.
 //
 // =============================================================================
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_FEATURE_SPLIT_CANDIDATE_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_FEATURE_SPLIT_CANDIDATE_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_FEATURE_SPLIT_CANDIDATE_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_FEATURE_SPLIT_CANDIDATE_H_
 
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/split-stats.h"
+#include "tensorflow/contrib/boosted_trees/lib/learner/common/stats/split-stats.h"
 #include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h"
 
 namespace tensorflow {
@@ -58,4 +58,4 @@ struct FeatureSplitCandidate {
 }  // namespace boosted_trees
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_FEATURE_SPLIT_CANDIDATE_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_FEATURE_SPLIT_CANDIDATE_H_
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/gradient-stats.h b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/gradient-stats.h
similarity index 98%
rename from tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/gradient-stats.h
rename to tensorflow/contrib/boosted_trees/lib/learner/common/stats/gradient-stats.h
index dad64bf165..34e3ddb777 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/gradient-stats.h
+++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/gradient-stats.h
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // =============================================================================
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_GRADIENT_STATS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_GRADIENT_STATS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_GRADIENT_STATS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_GRADIENT_STATS_H_
 
 #include <math.h>
 
@@ -190,4 +190,4 @@ inline GradientStats operator-(const GradientStats& a, const GradientStats& b) {
 }  // namespace boosted_trees
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_GRADIENT_STATS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_GRADIENT_STATS_H_
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h
similarity index 98%
rename from tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h
rename to tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h
index 4e5f53874d..642a183aec 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h
+++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h
@@ -12,12 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // =============================================================================
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_NODE_STATS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_NODE_STATS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_NODE_STATS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_NODE_STATS_H_
 
 #include "third_party/eigen3/Eigen/Core"
 #include "third_party/eigen3/Eigen/Eigenvalues"
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/gradient-stats.h"
+#include "tensorflow/contrib/boosted_trees/lib/learner/common/stats/gradient-stats.h"
 #include "tensorflow/contrib/boosted_trees/proto/learner.pb.h"
 #include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h"
 #include "tensorflow/core/framework/shape_inference.h"
@@ -298,4 +298,4 @@ struct NodeStats {
 }  // namespace boosted_trees
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_NODE_STATS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_NODE_STATS_H_
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats_test.cc
similarity index 99%
rename from tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc
rename to tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats_test.cc
index ecb7a04efb..f867e77d3e 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc
+++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats_test.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // =============================================================================
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h"
+#include "tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h"
 
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/platform/test.h"
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/split-stats.h b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/split-stats.h
similarity index 94%
rename from tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/split-stats.h
rename to tensorflow/contrib/boosted_trees/lib/learner/common/stats/split-stats.h
index f700cbced8..054ccd9a8c 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/split-stats.h
+++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/split-stats.h
@@ -12,12 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // =============================================================================
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_SPLIT_STATS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_SPLIT_STATS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_SPLIT_STATS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_SPLIT_STATS_H_
 
 #include <string>
 
-#include "tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats.h"
+#include "tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h"
 
 namespace tensorflow {
 namespace boosted_trees {
@@ -81,4 +81,4 @@ struct SplitStats {
 }  // namespace boosted_trees
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_STOCHASTIC_STATS_SPLIT_STATS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_LEARNER_COMMON_STATS_SPLIT_STATS_H_
-- 
GitLab


From f7f8de28504e13a82385d49c0a75baaf82f190bf Mon Sep 17 00:00:00 2001
From: Yifei Feng <fengyifei2026@gmail.com>
Date: Wed, 22 Nov 2017 12:41:58 -0800
Subject: [PATCH 0240/1225] Remove non-existing reference.

---
 tensorflow/contrib/summary/BUILD | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD
index 237339e81e..f34291c203 100644
--- a/tensorflow/contrib/summary/BUILD
+++ b/tensorflow/contrib/summary/BUILD
@@ -45,7 +45,6 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":summary_ops",
-        ":summary_test_internal",
         ":summary_test_util",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
-- 
GitLab


From 9998d927ab9a2c915326130e9cdf773276ce9db0 Mon Sep 17 00:00:00 2001
From: Yifei Feng <fengyifei2026@gmail.com>
Date: Wed, 22 Nov 2017 12:42:23 -0800
Subject: [PATCH 0241/1225] Update summary_ops_graph_test.py

---
 tensorflow/contrib/summary/summary_ops_graph_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index 8fa361de84..703adb7b46 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py
@@ -21,7 +21,6 @@ import tempfile
 import six
 
 from tensorflow.contrib.summary import summary_ops
-from tensorflow.contrib.summary import summary_test_internal
 from tensorflow.contrib.summary import summary_test_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
-- 
GitLab


From 467131ff039bb37af36b5fc907960896a20c6f65 Mon Sep 17 00:00:00 2001
From: Eli Bendersky <eliben@google.com>
Date: Wed, 22 Nov 2017 12:33:18 -0800
Subject: [PATCH 0242/1225] Make a parameter name in a declaration consistent
 with a name in the definition.

PiperOrigin-RevId: 176688856
---
 tensorflow/compiler/xla/client/client.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/client/client.h b/tensorflow/compiler/xla/client/client.h
index cf6878dd8e..c28380b689 100644
--- a/tensorflow/compiler/xla/client/client.h
+++ b/tensorflow/compiler/xla/client/client.h
@@ -142,7 +142,7 @@ class Client {
 
   // Returns a vector of global data handles that point to the tuple elements.
   StatusOr<std::vector<std::unique_ptr<GlobalData>>> DeconstructTuple(
-      const GlobalData& computation);
+      const GlobalData& data);
 
   // Retrieves the statistics of the given computation.
   StatusOr<ComputationStats> GetComputationStats(
-- 
GitLab


From 02877b2ff172415845f5305a7a534ef4f7174cf4 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Wed, 22 Nov 2017 12:45:38 -0800
Subject: [PATCH 0243/1225] Also ignore no_oss tags in windows builds. (#14810)

---
 tensorflow/python/BUILD                                       | 1 +
 tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 4 ++--
 tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 5eb9b79ee6..4583b69bbf 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3326,6 +3326,7 @@ py_test(
     tags = [
         "no_gpu",
         "no_oss",
+        "no_pip",
         "no_pip_gpu",
         "notap",
     ],
diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index f6e3d2e6c7..8520ca898f 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -64,7 +64,7 @@ reinstall_tensorflow_pip ${PIP_NAME}
 # https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
 bazel test -c opt $BUILD_OPTS -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
-  --test_tag_filters=-no_pip,-no_windows \
-  --build_tag_filters=-no_pip,-no_windows --build_tests_only \
+  --test_tag_filters=-no_pip,-no_windows,-no_oss \
+  --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \
   --test_env=TF_SAVER_LENIENT_NAMES=True \
   //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 25d327c818..47ca42d642 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -65,7 +65,7 @@ reinstall_tensorflow_pip ${PIP_NAME}
 # https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
 bazel test -c opt $BUILD_OPTS -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
-  --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu \
-  --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu \
+  --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
+  --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
   --test_env=TF_SAVER_LENIENT_NAMES=True \
   --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/...
-- 
GitLab


From 780d35918354a5a06e0499c457ae7c9c0e45d172 Mon Sep 17 00:00:00 2001
From: Keven Wang <keven425@gmail.com>
Date: Wed, 22 Nov 2017 12:46:40 -0800
Subject: [PATCH 0244/1225] execute command properly in bash.exe on windows
 (#14745)

---
 tensorflow/workspace.bzl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 8e62228c1b..9562f7e922 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -107,7 +107,7 @@ def _apply_patch(repo_ctx, patch_file):
     bazel_sh = _get_env_var(repo_ctx, "BAZEL_SH")
     if not bazel_sh:
       fail("BAZEL_SH environment variable is not set")
-    cmd = [bazel_sh, "-c", " ".join(cmd)]
+    cmd = [bazel_sh, "-l", "-c", " ".join(cmd)]
   _execute_and_check_ret_code(repo_ctx, cmd)
 
 # Download the repository and apply a patch to its root
-- 
GitLab


From 8af1600d49ff4cc16063ab1aafbde52be9347c62 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 12:35:30 -0800
Subject: [PATCH 0245/1225] Allow to continue when function-inlining fails.
 This is useful if either the function-inlining fails erroneously, or if one
 would like to continue at least long enough to get a graph-visualization.
 Also, in case of multiple inlining passes, only log once.

PiperOrigin-RevId: 176689106
---
 tensorflow/contrib/lite/toco/import_tensorflow.cc | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 1f959600f3..691b4ff2a9 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -1462,18 +1462,23 @@ bool InlineAllFunctions(GraphDef* graphdef) {
 
   tensorflow::Graph graph(fld);
   tensorflow::GraphConstructorOptions gc_opts;
-  TF_CHECK_OK(
-      tensorflow::ConvertGraphDefToGraph(gc_opts, graphdef_copy, &graph));
+  const auto& tf_convert_status =
+      tensorflow::ConvertGraphDefToGraph(gc_opts, graphdef_copy, &graph);
+  if (!tf_convert_status.ok()) {
+    LOG(ERROR) << "tensorflow::ConvertGraphDefToGraph failed with status: "
+               << tf_convert_status.ToString();
+    return false;
+  }
 
   // Iterate over the graph until there are no more nodes to be inlined.
   bool graph_modified = false;
   while (tensorflow::ExpandInlineFunctions(flr, &graph)) {
     graph_modified = true;
-    LOG(INFO) << "Found functions that were inlined.";
   }
 
   // Output inlined graph
   if (graph_modified) {
+    LOG(INFO) << "Found and inlined TensorFlow functions.";
     graph.ToGraphDef(graphdef);
   }
   return graph_modified;
-- 
GitLab


From 4b636957604faa3361a799dd9d8749a6b85afff7 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 22 Nov 2017 12:39:54 -0800
Subject: [PATCH 0246/1225] Place HloProfilePrinter and HloProfileIndexMap in
 Executable

This refactoring will later allow XlaCompiledCpuFunction to pull out the
HloProfilePrinter from Executable and use that to display the hlo execution
profile.  A de/serialized HloProfilePrinter will let AOT compiled binaries
display their Hlo execution profile.

PiperOrigin-RevId: 176689528
---
 .../compiler/xla/service/cpu/cpu_compiler.cc  | 75 +++++++++++++++----
 .../xla/service/cpu/cpu_executable.cc         | 51 +++++++------
 .../compiler/xla/service/cpu/cpu_executable.h | 19 ++---
 .../service/cpu/parallel_cpu_executable.cc    | 46 ++++--------
 .../xla/service/cpu/parallel_cpu_executable.h | 10 +--
 tensorflow/compiler/xla/service/executable.h  | 35 ++++++---
 .../compiler/xla/service/gpu/gpu_compiler.cc  | 18 ++++-
 .../xla/service/gpu/gpu_executable.cc         | 13 ++--
 .../compiler/xla/service/gpu/gpu_executable.h |  8 +-
 .../xla/service/hlo_execution_profile.cc      | 22 ++----
 .../xla/service/hlo_execution_profile.h       | 24 +++---
 .../xla/service/hlo_execution_profile_test.cc |  6 +-
 .../xla/service/interpreter/executable.cc     |  8 +-
 .../xla/service/interpreter/executable.h      |  2 -
 tensorflow/compiler/xla/service/service.cc    | 11 +--
 15 files changed, 193 insertions(+), 155 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 56940b8d63..ff6042ae19 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -197,28 +197,35 @@ void InitializeLLVMCommandLineOptions(const HloModuleConfig& config) {
 class CollectProfileCandidates : public DfsHloVisitorWithDefault {
  public:
   static StatusOr<std::unordered_map<const HloInstruction*, size_t>>
-  GetCandidatesForComputation(HloComputation* computation) {
+  GetCandidatesForComputation(
+      HloComputation* computation,
+      const std::unordered_map<const HloInstruction*, int64>&
+          assigned_indices) {
     std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx;
     CollectProfileCandidates profile_candidates_for_computation(
-        &hlo_to_profile_idx);
+        &hlo_to_profile_idx, assigned_indices);
     TF_RETURN_IF_ERROR(
         computation->Accept(&profile_candidates_for_computation));
     return hlo_to_profile_idx;
   }
 
  private:
-  explicit CollectProfileCandidates(
-      std::unordered_map<const HloInstruction*, size_t>* hlo_to_profile_idx)
-      : hlo_to_profile_idx_(hlo_to_profile_idx) {}
+  CollectProfileCandidates(
+      std::unordered_map<const HloInstruction*, size_t>* hlo_to_profile_idx,
+      const std::unordered_map<const HloInstruction*, int64>& assigned_indices)
+      : hlo_to_profile_idx_(hlo_to_profile_idx),
+        assigned_indices_(assigned_indices) {}
 
   Status DefaultAction(HloInstruction* hlo_instruction) override {
-    hlo_to_profile_idx_->insert({hlo_instruction, hlo_to_profile_idx_->size()});
+    hlo_to_profile_idx_->insert(
+        {hlo_instruction, FindOrDie(assigned_indices_, hlo_instruction)});
     return Status::OK();
   }
 
   Status HandleCall(HloInstruction* call) override {
     TF_RETURN_IF_ERROR(DefaultAction(call));
-    CollectProfileCandidates candidates_for_call(hlo_to_profile_idx_);
+    CollectProfileCandidates candidates_for_call(hlo_to_profile_idx_,
+                                                 assigned_indices_);
     TF_RETURN_IF_ERROR(call->to_apply()->Accept(&candidates_for_call));
     return Status::OK();
   }
@@ -232,17 +239,20 @@ class CollectProfileCandidates : public DfsHloVisitorWithDefault {
   Status HandleWhile(HloInstruction* xla_while) override {
     TF_RETURN_IF_ERROR(DefaultAction(xla_while));
 
-    CollectProfileCandidates candidates_for_condition(hlo_to_profile_idx_);
+    CollectProfileCandidates candidates_for_condition(hlo_to_profile_idx_,
+                                                      assigned_indices_);
     TF_RETURN_IF_ERROR(
         xla_while->while_condition()->Accept(&candidates_for_condition));
 
-    CollectProfileCandidates candidates_for_body(hlo_to_profile_idx_);
+    CollectProfileCandidates candidates_for_body(hlo_to_profile_idx_,
+                                                 assigned_indices_);
     TF_RETURN_IF_ERROR(xla_while->while_body()->Accept(&candidates_for_body));
 
     return Status::OK();
   }
 
   std::unordered_map<const HloInstruction*, size_t>* hlo_to_profile_idx_;
+  const std::unordered_map<const HloInstruction*, int64>& assigned_indices_;
 };
 }  // namespace
 
@@ -475,10 +485,27 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
 
   HloComputation* computation = module->entry_computation();
   std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx;
+  std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map;
+  std::unique_ptr<HloProfilePrinter> hlo_profile_printer;
   if (module->config().hlo_profiling_enabled()) {
+    hlo_profile_index_map = MakeUnique<HloProfileIndexMap>(*module);
+
     TF_ASSIGN_OR_RETURN(
         hlo_to_profile_idx,
-        CollectProfileCandidates::GetCandidatesForComputation(computation));
+        CollectProfileCandidates::GetCandidatesForComputation(
+            computation, hlo_profile_index_map->instruction_to_profile_idx()));
+
+    auto shape_size_bytes = [](const Shape& shape) {
+      // On the cpu, opaques are pointers.
+      if (ShapeUtil::IsOpaque(shape)) {
+        return static_cast<int64>(sizeof(void*));
+      }
+      return ShapeUtil::ByteSizeOf(shape, sizeof(void*));
+    };
+
+    HloCostAnalysis cost_analysis(shape_size_bytes);
+    hlo_profile_printer =
+        CreateHloProfilePrinter(*hlo_profile_index_map, cost_analysis);
   }
 
   std::unique_ptr<Executable> cpu_executable;
@@ -544,8 +571,16 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
       parallel_computations.emplace(to_apply, instruction);
     }
 
+    // We always profile the entire computation as a whole, even if hlo
+    // profiling is disabled.  When hlo profiling is diabled, we pass in a
+    // profile counter array of just one element, which corresponds to the whole
+    // computation.
+    size_t entry_computation_profile_idx =
+        hlo_profile_index_map ? hlo_profile_index_map->GetProfileIndexFor(
+                                    *module->entry_computation())
+                              : 0;
     IrEmitter ir_emitter(*module, *assignment, llvm_module.get(),
-                         hlo_to_profile_idx, hlo_to_profile_idx.size(),
+                         hlo_to_profile_idx, entry_computation_profile_idx,
                          jit->target_machine(), jit->external_constant_pool());
 
     std::unique_ptr<HloInstructionMap<string>> function_names(
@@ -586,8 +621,8 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     jit->AddModule(std::move(llvm_module));
     cpu_executable.reset(new ParallelCpuExecutable(
         std::move(jit), std::move(assignment), std::move(module),
-        std::move(function_names), std::move(hlo_to_profile_idx),
-        std::move(aligned_constants)));
+        std::move(function_names), std::move(aligned_constants),
+        std::move(hlo_profile_printer), std::move(hlo_profile_index_map)));
 
     if (embed_ir_in_executable) {
       static_cast<CpuExecutable&>(*cpu_executable)
@@ -620,12 +655,22 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
       TF_RETURN_IF_ERROR(protobuf_util::DumpProtoToDirectory(
           proto, xla_dump_hlo_proto_to, module->name()));
     }
+    // We always profile the entire computation as a whole, even if hlo
+    // profiling is disabled.  When hlo profiling is diabled, we pass in a
+    // profile counter array of just one element, which corresponds to the whole
+    // computation.
+    size_t entry_computation_profile_idx =
+        hlo_profile_index_map ? hlo_profile_index_map->GetProfileIndexFor(
+                                    *module->entry_computation())
+                              : 0;
+
     // Each computation is a single function.  Emit all embedded computations
     // before the entry computation. The order of computations returned from
     // GetEmbeddedComputations guarantees that a called computation occurs
     // before a caller computation.
+
     IrEmitter ir_emitter(*module, *assignment, llvm_module.get(),
-                         hlo_to_profile_idx, hlo_to_profile_idx.size(),
+                         hlo_to_profile_idx, entry_computation_profile_idx,
                          jit->target_machine(), jit->external_constant_pool());
 
     for (auto embedded_computation :
@@ -659,7 +704,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     jit->AddModule(std::move(llvm_module));
     cpu_executable.reset(new CpuExecutable(
         std::move(jit), std::move(assignment), std::move(module), function_name,
-        std::move(hlo_to_profile_idx)));
+        std::move(hlo_profile_printer), std::move(hlo_profile_index_map)));
 
     if (embed_ir_in_executable) {
       static_cast<CpuExecutable&>(*cpu_executable)
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index e6ef9d6314..e956f478b8 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -55,11 +55,12 @@ CpuExecutable::CpuExecutable(
     std::unique_ptr<const BufferAssignment> assignment,
     std::unique_ptr<const HloModule> hlo_module,
     const string& entry_function_name,
-    std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx)
-    : Executable(std::move(hlo_module)),
+    std::unique_ptr<HloProfilePrinter> hlo_profile_printer,
+    std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map)
+    : Executable(std::move(hlo_module), std::move(hlo_profile_printer),
+                 std::move(hlo_profile_index_map)),
       jit_(std::move(jit)),
-      assignment_(std::move(assignment)),
-      hlo_to_profile_idx_(std::move(hlo_to_profile_idx)) {
+      assignment_(std::move(assignment)) {
   // Resolve symbols in the constructor rather than at execution time to avoid
   // races because FindSymbol is not thread safe.
   llvm::JITSymbol sym = jit_->FindSymbol(entry_function_name);
@@ -183,9 +184,16 @@ Status CpuExecutable::ExecuteComputeFunction(
   uint64 start_micros = tensorflow::Env::Default()->NowMicros();
 
   // Allocate profiling counters for each hlo instruction that we would like to
-  // profile.  Allocate an additional profile counter for the entire
-  // computation.
-  std::vector<uint64> profile_counters(hlo_to_profile_idx_.size() + 1);
+  // profile.  Even when not Hlo profiling, we allocate a counter for the entire
+  // computation, which we use to update ExecutionProfile below.
+  std::vector<int64>* profile_counters = nullptr;
+  std::vector<int64> profile_counter_for_entry_computation;
+  if (hlo_execution_profile) {
+    profile_counters = hlo_execution_profile->mutable_profile_counters();
+  } else {
+    profile_counters = &profile_counter_for_entry_computation;
+    profile_counter_for_entry_computation.push_back(0);
+  }
 
   // Call the computation function following the calling convention.
   std::vector<void*> buffer_pointers;
@@ -200,7 +208,7 @@ Status CpuExecutable::ExecuteComputeFunction(
     VLOG(3) << tensorflow::strings::Printf(
         "  func(void* result, void* params[%zu], void* temps[%zu], "
         "uint64 profile_counters[%zu])",
-        args_array.size(), buffer_pointers.size(), profile_counters.size());
+        args_array.size(), buffer_pointers.size(), profile_counters->size());
     VLOG(3) << tensorflow::strings::Printf("    result = %p", result_buffer);
     auto ptr_printer = [](string* out, const void* p) {
       tensorflow::strings::StrAppend(out, tensorflow::strings::Printf("%p", p));
@@ -212,11 +220,11 @@ Status CpuExecutable::ExecuteComputeFunction(
         "    temps = [%s]",
         tensorflow::str_util::Join(buffer_pointers, ", ", ptr_printer).c_str());
     VLOG(3) << tensorflow::strings::Printf("    profile_counters = %p",
-                                           profile_counters.data());
+                                           profile_counters->data());
   }
 
   compute_function_(result_buffer, run_options, args_array.data(),
-                    buffer_pointers.data(), profile_counters.data());
+                    buffer_pointers.data(), profile_counters->data());
 
   uint64 end_micros = tensorflow::Env::Default()->NowMicros();
 
@@ -225,20 +233,15 @@ Status CpuExecutable::ExecuteComputeFunction(
     const double nanoseconds = (end_micros - start_micros) * 1000.0;
     execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0));
 
-    // The last profile counter is used for the computation as a whole.
-    execution_profile_.set_compute_cycle_count(profile_counters.back());
-  }
-
-  if (hlo_execution_profile != nullptr) {
-    hlo_execution_profile->set_total_cycles_executed(
-        *module().entry_computation(), profile_counters.back());
-
-    for (auto hlo_prof_idx : hlo_to_profile_idx_) {
-      const HloInstruction* hlo = hlo_prof_idx.first;
-      uint64 cycles_taken = profile_counters[hlo_prof_idx.second];
-      hlo_execution_profile->SetCyclesTakenBy(hlo, cycles_taken);
+    if (hlo_execution_profile) {
+      execution_profile_.set_compute_cycle_count(
+          hlo_execution_profile->total_cycles_executed(
+              *module().entry_computation()));
+    } else {
+      execution_profile_.set_compute_cycle_count(profile_counters->back());
     }
   }
+
   return Status::OK();
 }
 
@@ -428,9 +431,5 @@ const PointsToSet& CpuExecutable::GetRootPointsToSet() const {
       module().entry_computation()->root_instruction());
 }
 
-std::unique_ptr<HloCostAnalysis> CpuExecutable::CreateCostAnalysis() const {
-  return MakeUnique<HloCostAnalysis>(ShapeSizeBytes);
-}
-
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
index 238bc9b46a..17ee2d673e 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
@@ -47,12 +47,12 @@ namespace cpu {
 // architecture, so JIT-ed code and host code share the same ABI.
 class CpuExecutable : public Executable {
  public:
-  CpuExecutable(
-      std::unique_ptr<SimpleOrcJIT> jit,
-      std::unique_ptr<const BufferAssignment> assignment,
-      std::unique_ptr<const HloModule> hlo_module,
-      const string& entry_function_name,
-      std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx);
+  CpuExecutable(std::unique_ptr<SimpleOrcJIT> jit,
+                std::unique_ptr<const BufferAssignment> assignment,
+                std::unique_ptr<const HloModule> hlo_module,
+                const string& entry_function_name,
+                std::unique_ptr<HloProfilePrinter> hlo_profile_printer,
+                std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
   ~CpuExecutable() override {}
 
   StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
@@ -85,12 +85,10 @@ class CpuExecutable : public Executable {
 
   static int64 ShapeSizeBytes(const Shape& shape);
 
-  std::unique_ptr<HloCostAnalysis> CreateCostAnalysis() const override;
-
   // Type of the computation function we expect in the JIT.
   using ComputeFunctionType = void (*)(
       void* /*result*/, const ExecutableRunOptions* /*run_options*/,
-      const void** /*args*/, void** /*temps*/, uint64* /*profile_counters*/);
+      const void** /*args*/, void** /*temps*/, int64* /*profile_counters*/);
 
   const ComputeFunctionType& compute_function() const {
     return compute_function_;
@@ -145,9 +143,6 @@ class CpuExecutable : public Executable {
   // Entry function name for the computation.
   const string entry_function_name_;
 
-  // Maps HLOs to their index into the profile counter array.
-  const std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx_;
-
   TF_DISALLOW_COPY_AND_ASSIGN(CpuExecutable);
 };
 
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc
index aff61296ce..0077e344e2 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc
@@ -59,19 +59,20 @@ ParallelCpuExecutable::ParallelCpuExecutable(
     std::unique_ptr<const BufferAssignment> assignment,
     std::unique_ptr<const HloModule> hlo_module,
     std::unique_ptr<const HloInstructionMap<string>> function_names,
-    std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx,
     std::unordered_map<const HloInstruction*, std::unique_ptr<unsigned char[]>>
-        aligned_constants)
-    : Executable(std::move(hlo_module)),
+        aligned_constants,
+    std::unique_ptr<HloProfilePrinter> hlo_profile_printer,
+    std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map)
+    : Executable(std::move(hlo_module), std::move(hlo_profile_printer),
+                 std::move(hlo_profile_index_map)),
       jit_(std::move(jit)),
       assignment_(std::move(assignment)),
       function_names_(std::move(function_names)),
-      hlo_to_profile_idx_(std::move(hlo_to_profile_idx)),
       aligned_constants_(std::move(aligned_constants)) {}
 
 // Type of the computation function we expect in the JIT.
 using ComputeFunctionType = void (*)(void*, const void*, const void**, void**,
-                                     int64*, uint64*);
+                                     int64*, int64*);
 
 // Given a pointer to an output buffer (following the CPU JIT calling
 // conventions), mark addresses that are "live". The initial pointer itself is
@@ -106,7 +107,7 @@ class Executor {
            const ServiceExecutableRunOptions* run_options,
            std::list<HloInstruction*>* pending,
            HloInstructionMap<const void*>* results, void** temps_array,
-           uint64* profile_counters_array, const BufferAssignment* assignment)
+           int64* profile_counters_array, const BufferAssignment* assignment)
       : functions_(functions),
         run_options_(run_options),
         pending_(pending),
@@ -147,7 +148,7 @@ class Executor {
   std::list<HloInstruction*>* pending_;
   HloInstructionMap<const void*>* results_;
   void** temps_array_;
-  uint64* profile_counters_array_;
+  int64* profile_counters_array_;
   tensorflow::thread::ThreadPool* thread_pool_;
   const BufferAssignment* assignment_;
 
@@ -389,9 +390,11 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions(
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
     HloExecutionProfile* hlo_execution_profile) {
   // Allocate profiling counters for each hlo instruction that we would like to
-  // profile.  Allocate an additional profile counter for the entire
-  // computation.
-  std::vector<uint64> profile_counters(hlo_to_profile_idx_.size() + 1);
+  // profile.
+  std::vector<int64>* profile_counters = nullptr;
+  if (hlo_execution_profile) {
+    profile_counters = hlo_execution_profile->mutable_profile_counters();
+  }
 
   std::vector<void*> buffer_pointers;
   buffer_pointers.reserve(buffers.size());
@@ -441,9 +444,9 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions(
   // For example, if we expect a library conv/matmul call to run at max
   // concurrency, we should not dispatch runnable instructions until the
   // library call is finished (to avoid expensive cache invalidation).
-  Executor executor(functions, run_options, &pending, &results,
-                    buffer_pointers.data(), profile_counters.data(),
-                    assignment_.get());
+  Executor executor(
+      functions, run_options, &pending, &results, buffer_pointers.data(),
+      profile_counters ? profile_counters->data() : nullptr, assignment_.get());
 
   TF_RETURN_IF_ERROR(executor.Run());
 
@@ -453,18 +456,6 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions(
     tensorflow::mutex_lock lock(mutex_);
     double nanoseconds = (end_micros - start_micros) * 1000.0;
     execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0));
-    // The last profile counter is used for the computation as a whole.
-    execution_profile_.set_compute_cycle_count(profile_counters.back());
-  }
-  if (hlo_execution_profile != nullptr) {
-    hlo_execution_profile->set_total_cycles_executed(entry_computation,
-                                                     profile_counters.back());
-
-    for (auto hlo_prof_idx : hlo_to_profile_idx_) {
-      const HloInstruction* hlo = hlo_prof_idx.first;
-      uint64 cycles_taken = profile_counters[hlo_prof_idx.second];
-      hlo_execution_profile->SetCyclesTakenBy(hlo, cycles_taken);
-    }
   }
 
   return Status::OK();
@@ -618,10 +609,5 @@ const PointsToSet& ParallelCpuExecutable::GetRootPointsToSet() const {
       module().entry_computation()->root_instruction());
 }
 
-std::unique_ptr<HloCostAnalysis> ParallelCpuExecutable::CreateCostAnalysis()
-    const {
-  return MakeUnique<HloCostAnalysis>(ShapeSizeBytes);
-}
-
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h
index db16aaf48b..d65e3f42f3 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h
+++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h
@@ -52,10 +52,11 @@ class ParallelCpuExecutable : public Executable {
       std::unique_ptr<const BufferAssignment> assignment,
       std::unique_ptr<const HloModule> hlo_module,
       std::unique_ptr<const HloInstructionMap<string>> function_names,
-      std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx,
       std::unordered_map<const HloInstruction*,
                          std::unique_ptr<unsigned char[]>>
-          aligned_constants);
+          aligned_constants,
+      std::unique_ptr<HloProfilePrinter> hlo_profile_printer,
+      std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
   ~ParallelCpuExecutable() override {}
 
   StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
@@ -95,8 +96,6 @@ class ParallelCpuExecutable : public Executable {
         "Equality test on CPU parallel executable is not implemented.");
   }
 
-  std::unique_ptr<HloCostAnalysis> CreateCostAnalysis() const override;
-
  private:
   // Allocate buffers required for execution and assign them to the elements of
   // "buffers". "buffers" should be sized to the number of buffers in buffer
@@ -143,9 +142,6 @@ class ParallelCpuExecutable : public Executable {
   // Map containing the JITted function names for each HLO instruction.
   const std::unique_ptr<const HloInstructionMap<string>> function_names_;
 
-  // Maps HLOs to their index into the profile counter array.
-  const std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx_;
-
   // Map from HLO Constant instructions to a pointer to their literal data.
   // The data stored in the protocol buffer might be insufficiently aligned,
   // we create a sufficiently aligned copy and store it in this map.
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
index 2135707371..08862308c9 100644
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -44,8 +44,15 @@ namespace xla {
 // interface that is used for launching compiled programs across platforms.
 class Executable {
  public:
-  explicit Executable(std::unique_ptr<const HloModule> hlo_module)
-      : hlo_module_(std::move(hlo_module)) {}
+  explicit Executable(std::unique_ptr<const HloModule> hlo_module,
+                      std::unique_ptr<HloProfilePrinter> hlo_profile_printer,
+                      std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map)
+      : hlo_module_(std::move(hlo_module)),
+        hlo_profile_printer_(std::move(hlo_profile_printer)),
+        hlo_profile_index_map_(std::move(hlo_profile_index_map)) {
+    CHECK_EQ(hlo_profile_printer_.get() == nullptr,
+             hlo_profile_index_map_.get() == nullptr);
+  }
   virtual ~Executable() {}
 
   // Enqueues the compilation result on the provided stream, passing the given
@@ -123,12 +130,20 @@ class Executable {
         "Equality test on this executable is not implemented.");
   }
 
+  const HloProfilePrinter& hlo_profile_printer() const {
+    CHECK(hlo_profiling_enabled());
+    return *hlo_profile_printer_;
+  }
+
+  const HloProfileIndexMap& hlo_profile_index_map() const {
+    CHECK(hlo_profiling_enabled());
+    return *hlo_profile_index_map_;
+  }
+
   // Returns whether this executable was compiled with HLO profilings support
   // enabled. If not, the caller should not expect an hlo_execution_profile
   // passed to ExecuteOnStream above to be populated during execution.
-  bool hlo_profiling_enabled() const {
-    return hlo_module_->config().hlo_profiling_enabled();
-  }
+  bool hlo_profiling_enabled() const { return hlo_profile_printer_ != nullptr; }
 
   const HloModule& module() const { return *hlo_module_; }
 
@@ -160,10 +175,6 @@ class Executable {
   static Status DumpToDirectory(const string& directory_path, string filename,
                                 const SessionModule& session_module);
 
-  // Returns a cost analysis object appropriate for the platform on which this
-  // executable can run.
-  virtual std::unique_ptr<HloCostAnalysis> CreateCostAnalysis() const = 0;
-
  protected:
   mutable tensorflow::mutex mutex_;
 
@@ -181,6 +192,9 @@ class Executable {
   // Execution count, used to generate a unique filename for each dumped
   // execution.
   int64 execution_count_ = 0;
+
+  std::unique_ptr<HloProfilePrinter> hlo_profile_printer_;
+  std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map_;
 };
 
 template <typename ReturnT, typename ArgT>
@@ -200,7 +214,8 @@ StatusOr<ReturnT> Executable::ExecuteOnStreamWrapper(
   std::unique_ptr<HloExecutionProfile> profile_ptr =
       module_config().debug_options().xla_hlo_profile() &&
               hlo_profiling_enabled()
-          ? MakeUnique<HloExecutionProfile>(module(), *CreateCostAnalysis())
+          ? MakeUnique<HloExecutionProfile>(&hlo_profile_printer(),
+                                            &hlo_profile_index_map())
           : nullptr;
 
   auto return_value =
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 92c53265d0..fcd73fd37a 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -465,10 +465,20 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
   VLOG(2) << "Printing the thunk schedule...";
   XLA_VLOG_LINES(2, thunk_schedule->ToString());
 
-  auto* gpu_executable =
-      new GpuExecutable(ptx, cubin, {cc_major, cc_minor},
-                        std::move(thunk_schedule), std::move(module),
-                        std::move(buffer_assignment), ShapeSizeBytesFunction());
+  std::unique_ptr<HloProfileIndexMap> profile_index_map;
+  std::unique_ptr<HloProfilePrinter> profile_printer;
+
+  if (module->config().hlo_profiling_enabled()) {
+    HloCostAnalysis cost_analysis(ShapeSizeBytesFunction());
+    profile_index_map = MakeUnique<HloProfileIndexMap>(*module);
+    profile_printer =
+        CreateHloProfilePrinter(*profile_index_map, cost_analysis);
+  }
+
+  auto* gpu_executable = new GpuExecutable(
+      ptx, cubin, {cc_major, cc_minor}, std::move(thunk_schedule),
+      std::move(module), std::move(buffer_assignment),
+      std::move(profile_printer), std::move(profile_index_map));
   if (embed_ir_in_executable) {
     DCHECK_NE("", ir_module_string_before_opt);
     gpu_executable->set_ir_module_string(ir_module_string_before_opt);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index c6f23f9b05..0fd85e4fb0 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -113,14 +113,15 @@ GpuExecutable::GpuExecutable(
     std::unique_ptr<const ThunkSchedule> thunk_schedule,
     std::unique_ptr<const HloModule> hlo_module,
     std::unique_ptr<const BufferAssignment> assignment,
-    HloCostAnalysis::ShapeSizeFunction shape_size_function)
-    : Executable(std::move(hlo_module)),
+    std::unique_ptr<HloProfilePrinter> hlo_profile_printer,
+    std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map)
+    : Executable(std::move(hlo_module), std::move(hlo_profile_printer),
+                 std::move(hlo_profile_index_map)),
       ptx_(ptx),
       cubin_(cubin),
       compute_capability_(compute_capability),
       thunk_schedule_(std::move(thunk_schedule)),
-      assignment_(std::move(assignment)),
-      shape_size_function_(std::move(shape_size_function)) {}
+      assignment_(std::move(assignment)) {}
 
 Status GpuExecutable::ExecuteThunks(
     const ServiceExecutableRunOptions* run_options,
@@ -358,9 +359,5 @@ const PointsToSet& GpuExecutable::GetRootPointsToSet() const {
       module().entry_computation()->root_instruction());
 }
 
-std::unique_ptr<HloCostAnalysis> GpuExecutable::CreateCostAnalysis() const {
-  return MakeUnique<HloCostAnalysis>(shape_size_function_);
-}
-
 }  // namespace gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
index a3815370c1..e7307e07c0 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
@@ -54,7 +54,8 @@ class GpuExecutable : public Executable {
                 std::unique_ptr<const ThunkSchedule> thunk_schedule,
                 std::unique_ptr<const HloModule> hlo_module,
                 std::unique_ptr<const BufferAssignment> assignment,
-                HloCostAnalysis::ShapeSizeFunction shape_size_function);
+                std::unique_ptr<HloProfilePrinter> hlo_profile_printer,
+                std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
 
   // This should be called after set_ir_module_string.
   const string& ir_module_string() const { return ir_module_string_; }
@@ -95,8 +96,6 @@ class GpuExecutable : public Executable {
     return Unimplemented("Equality test on GPU executable is not implemented.");
   }
 
-  std::unique_ptr<HloCostAnalysis> CreateCostAnalysis() const override;
-
  private:
   // If `block_host_until_done` is false, execution will not block the host
   // until the kernels have completed. This is used as an optimization for
@@ -140,9 +139,6 @@ class GpuExecutable : public Executable {
   // memory for every output/temp buffers.
   const std::unique_ptr<const BufferAssignment> assignment_;
 
-  // Function to compute the size of a given Shape, in bytes.
-  const HloCostAnalysis::ShapeSizeFunction shape_size_function_;
-
   TF_DISALLOW_COPY_AND_ASSIGN(GpuExecutable);
 };
 
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index 9e256b9b37..ba75e2ef1b 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -40,7 +40,7 @@ HloProfileIndexMap::HloProfileIndexMap(const HloModule& module) {
   }
 }
 
-static HloProfilePrinter CreateOwnedHloProfilePrinter(
+std::unique_ptr<HloProfilePrinter> CreateHloProfilePrinter(
     const HloProfileIndexMap& hlo_profile_index_map,
     const HloCostAnalysis& cost_analysis) {
   using HloComputationInfo = HloProfilePrinter::HloComputationInfo;
@@ -108,15 +108,15 @@ static HloProfilePrinter CreateOwnedHloProfilePrinter(
     delete[] computation_infos;
   };
 
-  return HloProfilePrinter(computation_infos,
-                           hlo_profile_index_map.computation_count(), deleter);
+  return MakeUnique<HloProfilePrinter>(
+      computation_infos, hlo_profile_index_map.computation_count(), deleter);
 }
 
-HloExecutionProfile::HloExecutionProfile(const HloModule& module,
-                                         const HloCostAnalysis& cost_analysis)
-    : hlo_profile_index_map_(module),
-      hlo_profile_printer_(
-          CreateOwnedHloProfilePrinter(hlo_profile_index_map_, cost_analysis)),
+HloExecutionProfile::HloExecutionProfile(
+    const HloProfilePrinter* hlo_profile_printer,
+    const HloProfileIndexMap* hlo_profile_index_map)
+    : hlo_profile_printer_(*hlo_profile_printer),
+      hlo_profile_index_map_(*hlo_profile_index_map),
       profile_counters_(
           /*count*/ hlo_profile_index_map_.total_count(),
           /*value*/ 0) {}
@@ -131,10 +131,4 @@ uint64 HloExecutionProfile::GetCyclesTakenBy(const HloInstruction& hlo) const {
   return profile_counters_[hlo_profile_index_map_.GetProfileIndexFor(hlo)];
 }
 
-string HloExecutionProfile::ToString(
-    const DeviceDescription& device_description) const {
-  return hlo_profile_printer_.ToString(profile_counters_.data(),
-                                       device_description.clock_rate_ghz());
-}
-
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.h b/tensorflow/compiler/xla/service/hlo_execution_profile.h
index 84702680c0..470fd4ce3c 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.h
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.h
@@ -77,6 +77,11 @@ class HloProfileIndexMap {
   std::unordered_map<const HloComputation*, int64> computation_to_profile_idx_;
 };
 
+// Create an instance of `HloProfilePrinter` that owns its memory.
+std::unique_ptr<HloProfilePrinter> CreateHloProfilePrinter(
+    const HloProfileIndexMap& hlo_profile_index_map,
+    const HloCostAnalysis& cost_analysis);
+
 // Describes how much time each HLO operation took.
 //
 // Each HloComputation takes a certain number of cycles.  This class helps break
@@ -85,8 +90,8 @@ class HloExecutionProfile {
  public:
   using DeviceDescription = perftools::gputools::DeviceDescription;
 
-  HloExecutionProfile(const HloModule& module,
-                      const HloCostAnalysis& cost_analysis);
+  HloExecutionProfile(const HloProfilePrinter* hlo_profile_printer,
+                      const HloProfileIndexMap* hlo_profile_index_map);
 
   // Record how many cycles this HLO took to execute.
   void SetCyclesTakenBy(const HloInstruction* hlo, uint64 cycles_taken);
@@ -114,15 +119,16 @@ class HloExecutionProfile {
   // for the operations in a given computation. Returns an empty string if it
   // wasn't possible to generate a printable version. cost_analysis should be a
   // clean analysis that can be used to visit the computation.
-  string ToString(const DeviceDescription& device_description) const;
+  string ToString(const DeviceDescription& device_description) const {
+    return hlo_profile_printer_.ToString(profile_counters_.data(),
+                                         device_description.clock_rate_ghz());
+  }
 
- private:
-  // hlo_profile_index_map_ maps an Hlo entity (computation or instruction) to
-  // an index in profile_counters_.
-  HloProfileIndexMap hlo_profile_index_map_;
+  std::vector<int64>* mutable_profile_counters() { return &profile_counters_; }
 
-  // Used to print profile_counters_ in a human readable form.
-  HloProfilePrinter hlo_profile_printer_;
+ private:
+  const HloProfilePrinter& hlo_profile_printer_;
+  const HloProfileIndexMap& hlo_profile_index_map_;
 
   // Stores per-Hlo profile counters.  This is the only thing that changes when
   // we execute an XLA computation.
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc b/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc
index 5ba31296ea..b1e6729e2b 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc
@@ -72,7 +72,11 @@ TEST_F(HloExecutionProfileTest, Basic) {
   };
 
   HloCostAnalysis cost_analysis(shape_size_function);
-  HloExecutionProfile execution_profile(*hlo_module, cost_analysis);
+  HloProfileIndexMap profile_index_map(*hlo_module);
+  std::unique_ptr<HloProfilePrinter> profile_printer =
+      CreateHloProfilePrinter(profile_index_map, cost_analysis);
+  HloExecutionProfile execution_profile(profile_printer.get(),
+                                        &profile_index_map);
 
   const int64 add_cycles = 1000;
   const int64 dot_cycles = 4000;
diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc
index 96f937caf9..9183a1d1bf 100644
--- a/tensorflow/compiler/xla/service/interpreter/executable.cc
+++ b/tensorflow/compiler/xla/service/interpreter/executable.cc
@@ -42,7 +42,8 @@ namespace sep = ::perftools::gputools::interpreter;
 
 InterpreterExecutable::InterpreterExecutable(
     std::unique_ptr<const HloModule> hlo_module)
-    : Executable(std::move(hlo_module)) {}
+    : Executable(std::move(hlo_module), /*hlo_profile_printer=*/nullptr,
+                 /*hlo_profile_index_map=*/nullptr) {}
 
 InterpreterExecutable::~InterpreterExecutable() {}
 
@@ -156,10 +157,5 @@ StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteAsyncOnStream(
   return ShapeUtil::ByteSizeOf(shape, sizeof(void*));
 }
 
-std::unique_ptr<HloCostAnalysis> InterpreterExecutable::CreateCostAnalysis()
-    const {
-  return MakeUnique<HloCostAnalysis>(ShapeSizeBytes);
-}
-
 }  // namespace interpreter
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/interpreter/executable.h b/tensorflow/compiler/xla/service/interpreter/executable.h
index c69b0d036d..0e87eb90bf 100644
--- a/tensorflow/compiler/xla/service/interpreter/executable.h
+++ b/tensorflow/compiler/xla/service/interpreter/executable.h
@@ -61,8 +61,6 @@ class InterpreterExecutable : public Executable {
 
   static int64 ShapeSizeBytes(const Shape& shape);
 
-  std::unique_ptr<HloCostAnalysis> CreateCostAnalysis() const override;
-
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(InterpreterExecutable);
 };
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 902a1afb45..d997cab83f 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -575,12 +575,13 @@ Service::ExecuteParallelAndRegisterResult(
   // profile.
   for (auto& index_to_profiled_stream : index_to_profiled_streams) {
     int64 device = index_to_profiled_stream.first;
-    auto& module = executables[device]->module();
     se::Stream* stream = index_to_profiled_stream.second;
-    HloExecutionProfile hlo_profile(module,
-                                    *executables[device]->CreateCostAnalysis());
-    TF_RETURN_IF_ERROR(executables[device]->PopulateExecutionProfile(
-        &hlo_profile, stream->parent()));
+    Executable* executable = executables[device];
+    const HloModule& module = executable->module();
+    HloExecutionProfile hlo_profile(&executable->hlo_profile_printer(),
+                                    &executable->hlo_profile_index_map());
+    TF_RETURN_IF_ERROR(
+        executable->PopulateExecutionProfile(&hlo_profile, stream->parent()));
     XLA_LOG_LINES(
         tensorflow::INFO,
         hlo_profile.ToString(streams[0]->parent()->GetDeviceDescription()));
-- 
GitLab


From bb287e33f725ed65a0aeb198cb55e6d5d470145b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 12:47:09 -0800
Subject: [PATCH 0247/1225] Small changes to optimizer and utils.

Adds a `var_list` arg to optimizer.

Removes call to `convert_to_tensor` from _momentum to support non-default
dtypes.

Implements `posdef_inv_eig` for matrix inversion.

PiperOrigin-RevId: 176690230
---
 .../contrib/kfac/python/ops/optimizer.py      | 31 ++++++++++++-------
 tensorflow/contrib/kfac/python/ops/utils.py   | 17 ++++++++--
 2 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py
index a0e2fedc5c..98f8e7b230 100644
--- a/tensorflow/contrib/kfac/python/ops/optimizer.py
+++ b/tensorflow/contrib/kfac/python/ops/optimizer.py
@@ -40,6 +40,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
                cov_ema_decay,
                damping,
                layer_collection,
+               var_list=None,
                momentum=0.,
                momentum_type="regular",
                norm_constraint=None,
@@ -66,6 +67,9 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
           blocks, kronecker factors, and losses associated with the
           graph.  The layer_collection cannot be modified after KfacOptimizer's
           initialization.
+      var_list: Optional list or tuple of variables to train. Defaults to the
+          list of variables collected in the graph under the key
+          `GraphKeys.TRAINABLE_VARIABLES`.
       momentum: The momentum value for this optimizer. Only applies when
           momentum_type is 'regular' or 'adam'. (Default: 0)
       momentum_type: The type of momentum to use in this optimizer, one of
@@ -96,9 +100,9 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
           or 'adam'.
     """
 
-    # We may consider determining the set of variables some other way, but for
-    # now it's just all the trainable variables.
-    variables = tf_variables.trainable_variables()
+    variables = var_list
+    if variables is None:
+      variables = tf_variables.trainable_variables()
 
     self._fisher_est = est.FisherEstimator(
         variables,
@@ -123,7 +127,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
       raise ValueError("Momentum must be unspecified if using a momentum_type "
                        "other than 'regular' or 'adam'.")
 
-    self._momentum = ops.convert_to_tensor(momentum, name="momentum")
+    self._momentum = momentum
     self._momentum_type = momentum_type
     self._norm_constraint = norm_constraint
 
@@ -313,14 +317,17 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
         self._batch_size, dtype=fft_precon_grads[0].dtype)
 
     # compute the entries of the 2x2 matrix
-    m_11 = (_inner_product_list(fft_precon_grads, fft_precon_grads) / batch_size
-            + self.damping * _inner_product_list(precon_grads, precon_grads))
+    m_11 = (
+        _inner_product_list(fft_precon_grads, fft_precon_grads) / batch_size +
+        self.damping * _inner_product_list(precon_grads, precon_grads))
 
-    m_21 = (_inner_product_list(fft_prev_updates, fft_precon_grads) / batch_size
-            + self.damping * _inner_product_list(prev_updates, precon_grads))
+    m_21 = (
+        _inner_product_list(fft_prev_updates, fft_precon_grads) / batch_size +
+        self.damping * _inner_product_list(prev_updates, precon_grads))
 
-    m_22 = (_inner_product_list(fft_prev_updates, fft_prev_updates) / batch_size
-            + self.damping * _inner_product_list(prev_updates, prev_updates))
+    m_22 = (
+        _inner_product_list(fft_prev_updates, fft_prev_updates) / batch_size +
+        self.damping * _inner_product_list(prev_updates, prev_updates))
 
     def non_zero_prevupd_case():
       r"""Computes optimal (alpha, mu) given non-zero previous update.
@@ -406,8 +413,8 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
       grads = list(grad for (grad, _) in grads_and_vars)
       variables = list(var for (_, var) in grads_and_vars)
       # previous updates are the negative velocities (up to scaling by LR)
-      prev_updates = list(-self._zeros_slot(var, "velocity", self._name)
-                          for var in variables)
+      prev_updates = list(
+          -self._zeros_slot(var, "velocity", self._name) for var in variables)
 
       # Compute optimal velocity update parameters according to quadratic model
       alpha, mu, _ = self._compute_qmodel_hyperparams(
diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py
index ca6fb655b4..d5461c9f2e 100644
--- a/tensorflow/contrib/kfac/python/ops/utils.py
+++ b/tensorflow/contrib/kfac/python/ops/utils.py
@@ -28,7 +28,6 @@ from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 
-
 # Method used for inverting matrices.
 POSDEF_INV_METHOD = "cholesky"
 
@@ -202,9 +201,18 @@ def posdef_inv_cholesky(tensor, identity, damping):
   return linalg_ops.cholesky_solve(chol, identity)
 
 
+def posdef_inv_eig(tensor, identity, damping):
+  """Computes inverse(tensor + damping * identity) with eigendecomposition."""
+  eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig(
+      tensor + damping * identity)
+  return math_ops.matmul(
+      eigenvectors / eigenvalues, eigenvectors, transpose_b=True)
+
+
 posdef_inv_funcs = {
     "matrix_inverse": posdef_inv_matrix_inverse,
     "cholesky": posdef_inv_cholesky,
+    "eig": posdef_inv_eig,
 }
 
 
@@ -261,8 +269,8 @@ def fwd_gradients(ys, xs, grad_xs=None, stop_gradients=None):
   # generated by the first gradients_impl.gradients call.
 
   us = [array_ops.zeros_like(y) + float("nan") for y in ys]
-  dydxs = gradients_impl.gradients(ys, xs, grad_ys=us,
-                                   stop_gradients=stop_gradients)
+  dydxs = gradients_impl.gradients(
+      ys, xs, grad_ys=us, stop_gradients=stop_gradients)
 
   # Deal with strange types that gradients_impl.gradients returns but can't
   # deal with.
@@ -278,3 +286,6 @@ def fwd_gradients(ys, xs, grad_xs=None, stop_gradients=None):
   dysdx = gradients_impl.gradients(dydxs, us, grad_ys=grad_xs)
 
   return dysdx
+
+# TODO(b/69623235): Add a function for finding tensors that share gradients
+# to eliminate redundant fisher factor computations.
-- 
GitLab


From 21040b72a38f9a7ff0c3fd6cafebbacc5286b596 Mon Sep 17 00:00:00 2001
From: Alex Sergeev <alexander.sergeev@live.com>
Date: Wed, 22 Nov 2017 12:58:13 -0800
Subject: [PATCH 0248/1225] Update Custom Op instructions to use tf.sysconfg
 flags (#14307)

* Update test_user_ops.sh to use new tf.sysconfig APIs

* Fix the test

* Modify the test to not use 'eval'

* Update docs on the website

* Update log message
---
 tensorflow/docs_src/extend/adding_an_op.md    | 10 +++----
 .../tools/ci_build/builds/test_user_ops.sh    | 28 +++++++++----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md
index a3a0272059..c52279b212 100644
--- a/tensorflow/docs_src/extend/adding_an_op.md
+++ b/tensorflow/docs_src/extend/adding_an_op.md
@@ -341,9 +341,9 @@ Assuming you have `g++` installed, here is the sequence of commands you can use
 to compile your op into a dynamic library.
 
 ```bash
-TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
-TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
-g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC -I$TF_INC -I$TF_INC/external/nsync/public -L$TF_LIB -ltensorflow_framework -O2
+TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
+TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
+g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2
 ```
 
 On Mac OS X, the additional flag "-undefined dynamic_lookup" is required when
@@ -1228,10 +1228,10 @@ into a single dynamically loadable library:
 
 ```bash
 nvcc -std=c++11 -c -o cuda_op_kernel.cu.o cuda_op_kernel.cu.cc \
--I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
+  ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
 
 g++ -std=c++11 -shared -o cuda_op_kernel.so cuda_op_kernel.cc \
-cuda_op_kernel.cu.o -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB -ltensorflow_framework
+  cuda_op_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]}
 ```
 
 `cuda_op_kernel.so` produced above can be loaded as usual in Python, using the
diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh
index 4f1c61b8e9..358f82ac5d 100755
--- a/tensorflow/tools/ci_build/builds/test_user_ops.sh
+++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh
@@ -76,17 +76,17 @@ echo "PYTHON_BIN_PATH: ${PYTHON_BIN_PATH}"
 
 pushd "${TMP_DIR}"
 
-# Obtain paths include and lib paths to the TensorFlow installation
-TF_INC=$("${PYTHON_BIN_PATH}" \
-         -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
-TF_LIB=$("${PYTHON_BIN_PATH}" \
-         -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
-
-if [[ -z "${TF_INC}" ]]; then
-  die "FAILED to determine TensorFlow include path"
+# Obtain compilation and linking flags
+TF_CFLAGS=( $("${PYTHON_BIN_PATH}" \
+	      -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
+TF_LFLAGS=( $("${PYTHON_BIN_PATH}" \
+	      -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
+
+if [[ -z "${TF_CFLAGS}" || -z "${TF_LFLAGS}" ]]; then
+  die "FAILED to determine TensorFlow compilation or linking flags"
 else
-  echo "TensorFlow include path: ${TF_INC}"
-  TF_INCLUDE_PATH="-I${TF_INC} -I${TF_INC}/external/nsync/public"
+  echo "TensorFlow compile flags: ${TF_CFLAGS[@]}"
+  echo "TensorFlow link flags: ${TF_LFLAGS[@]}"
 fi
 
 # Check g++ availability
@@ -145,7 +145,7 @@ if [[ ${IS_GPU} == "0" ]]; then
 
   "${GPP_BIN}" -std=c++11 ${EXTRA_GPP_FLAGS} \
     -shared "${SRC_FILE}" -o "${USER_OP_SO}" \
-    -fPIC ${TF_INCLUDE_PATH} -L "${TF_LIB}" -ltensorflow_framework  || \
+    -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]}  || \
     die "g++ compilation of ${SRC_FILE} FAILED"
 
 else
@@ -184,7 +184,7 @@ else
   OP_KERNEL_O=$(echo "${OP_KERNEL_CC}" | sed -e 's/\.cc/\.o/')
   "${NVCC_BIN}" -std=c++11 \
       -c -o "${OP_KERNEL_O}" "${OP_KERNEL_CU}" \
-      ${TF_INCLUDE_PATH} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC || \
+      ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC || \
       die "nvcc compilation of ${OP_KERNEL_CC} FAILED"
 
   CUDA_LIB_DIR="/usr/local/cuda/lib64"
@@ -203,8 +203,8 @@ else
   USER_OP_SO="add_one.so"
   "${GPP_BIN}" -std=c++11 ${EXTRA_GPP_FLAGS} \
       -shared -o "${USER_OP_SO}" "${OP_KERNEL_CC}" \
-      "${OP_KERNEL_O}" ${TF_INCLUDE_PATH} -L "${CUDA_LIB_DIR}" -L "${TF_LIB}" \
-      -fPIC -lcudart -ltensorflow_framework || \
+      "${OP_KERNEL_O}" ${TF_CFLAGS[@]} -L "${CUDA_LIB_DIR}" ${TF_LFLAGS[@]} \
+      -fPIC -lcudart || \
       die "g++ compilation of ${OP_KERNEL_CC}" FAILED
 fi
 
-- 
GitLab


From 1913d96f20e6d9650f4841a3a17daf83fa3a45bb Mon Sep 17 00:00:00 2001
From: Yifei Feng <fengyifei2026@gmail.com>
Date: Wed, 22 Nov 2017 12:59:58 -0800
Subject: [PATCH 0249/1225] Fix math_ops.py

---
 tensorflow/python/ops/math_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 939d45a022..03e9e92e31 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -1277,7 +1277,7 @@ def _may_reduce_to_scalar(keepdims, axis, reduction_indices, output):
   return output
 
 
-@deprecation.deprecated_args(
+@deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_sum(input_tensor,
                axis=None,
-- 
GitLab


From 8067aa0862b7cd708f3a31accc2d232bafaf9442 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 22 Nov 2017 13:26:25 -0800
Subject: [PATCH 0250/1225] Create Python Operations for the TF_Operations
 created by import_graph_def.

This change also introduces Python functionality for iterating through
every TF_Operation in the graph and every newly-added TF_Operation via
TF_GraphNextOperation.

PiperOrigin-RevId: 176694180
---
 tensorflow/python/client/tf_session.i        | 18 ++++++
 tensorflow/python/framework/c_api_util.py    | 38 ++++++++++++
 tensorflow/python/framework/importer.py      | 14 ++++-
 tensorflow/python/framework/importer_test.py | 61 ++++++++++++++++----
 4 files changed, 118 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index 41c707ae63..ef6f28ce07 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -315,6 +315,24 @@ tensorflow::ImportNumpy();
   $2 = inputs.size();
 }
 
+// Typemaps for TF_GraphNextOperation().
+%typemap(in) size_t* pos (size_t pos) {
+  pos = PyLong_AsUnsignedLong($input);
+  $1 = &pos;
+}
+
+// Returns a (TF_Operation*, int pos) tuple.
+%typemap(argout) size_t* pos {
+  PyObject* new_result = PyTuple_New(2);
+  if (!new_result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create tuple");
+  }
+  // Steals $result reference
+  PyTuple_SET_ITEM(new_result, 0, $result);
+  PyTuple_SET_ITEM(new_result, 1, PyLong_FromSize_t(*$1));
+  $result = new_result;
+}
+
 // TODO(skyewm): SWIG emits a warning for the const char* in TF_WhileParams,
 // skip for now
 %ignore TF_WhileParams;
diff --git a/tensorflow/python/framework/c_api_util.py b/tensorflow/python/framework/c_api_util.py
index 814436fc7a..6c522de452 100644
--- a/tensorflow/python/framework/c_api_util.py
+++ b/tensorflow/python/framework/c_api_util.py
@@ -110,3 +110,41 @@ def tf_output(c_op, index):
   ret.oper = c_op
   ret.index = index
   return ret
+
+
+def tf_operations(graph):
+  """Generator that yields every TF_Operation in `graph`.
+
+  Args:
+    graph: Graph
+
+  Yields:
+    wrapped TF_Operation
+  """
+  # pylint: disable=protected-access
+  pos = 0
+  c_op, pos = c_api.TF_GraphNextOperation(graph._c_graph, pos)
+  while c_op is not None:
+    yield c_op
+    c_op, pos = c_api.TF_GraphNextOperation(graph._c_graph, pos)
+  # pylint: enable=protected-access
+
+
+def new_tf_operations(graph):
+  """Generator that yields newly-added TF_Operations in `graph`.
+
+  Specifically, yields TF_Operations that don't have associated Operations in
+  `graph`. This is useful for processing nodes added by the C API.
+
+  Args:
+    graph: Graph
+
+  Yields:
+    wrapped TF_Operation
+  """
+  # TODO(b/69679162): do this more efficiently
+  for c_op in tf_operations(graph):
+    try:
+      graph._get_operation_by_tf_operation(c_op)  # pylint: disable=protected-access
+    except KeyError:
+      yield c_op
diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index e4b94e1a34..c00b9da0df 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -194,6 +194,14 @@ def _FindAttrInOpDef(attr_name, op_def):
   return None
 
 
+def _ProcessNewOps(graph):
+  """Processes the newly-added TF_Operations in `graph`."""
+  for c_op in c_api_util.new_tf_operations(graph):
+    graph._create_op_from_tf_operation(c_op)  # pylint: disable=protected-access
+
+  # TODO(skyewm): colocation logic
+
+
 @deprecated_args(None, 'Please file an issue at '
                  'https://github.com/tensorflow/tensorflow/issues if you depend'
                  ' on this feature.',
@@ -257,11 +265,13 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
   if graph._c_graph:  # pylint: disable=protected-access
     scoped_options = c_api_util.ScopedTFImportGraphDefOptions()
 
-    with errors.raise_exception_on_not_ok_status() as status:
-      with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized:
+    with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized:
+      with errors.raise_exception_on_not_ok_status() as status:
         c_api.TF_GraphImportGraphDefWithResults(
             graph._c_graph, serialized, scoped_options.options, status)  # pylint: disable=protected-access
 
+    _ProcessNewOps(graph)
+
     if return_elements is not None:
       raise ValueError('return_elements not yet implemented with C API')
     return None
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index d27ec1e30c..8984282c68 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -65,19 +65,58 @@ class ImportGraphDefTest(test.TestCase):
     importer.import_graph_def(
         self._MakeGraphDef("""
         node { name: 'A' op: 'IntOutputFloatOutput' }
-          node { name: 'B' op: 'ListOutput'
-                 attr { key: 'T'
-                        value { list { type: DT_INT32 type: DT_FLOAT } } } }
-          node { name: 'C' op: 'ListInput'
-                 attr { key: 'N' value { i: 2 } }
-                 attr { key: 'T' value { type: DT_INT32 } }
-                 input: 'A:0' input: 'B:0' }
-          node { name: 'D' op: 'ListInput'
-                 attr { key: 'N' value { i: 2 } }
-                 attr { key: 'T' value { type: DT_FLOAT } }
-                 input: 'A:1' input: 'B:1' }
+        node { name: 'B' op: 'ListOutput'
+               attr { key: 'T'
+                      value { list { type: DT_INT32 type: DT_FLOAT } } } }
+        node { name: 'C' op: 'ListInput'
+               attr { key: 'N' value { i: 2 } }
+               attr { key: 'T' value { type: DT_INT32 } }
+               input: 'A:0' input: 'B:0' }
+        node { name: 'D' op: 'ListInput'
+               attr { key: 'N' value { i: 2 } }
+               attr { key: 'T' value { type: DT_FLOAT } }
+               input: 'A:1' input: 'B:1' }
           """))
 
+    graph = ops.get_default_graph()
+    a = graph.get_operation_by_name("A")
+    b = graph.get_operation_by_name("B")
+    c = graph.get_operation_by_name("C")
+    d = graph.get_operation_by_name("D")
+
+    # Assert that the import process creates distinct tensors.
+    self.assertNotEqual(a.outputs[0].name, a.outputs[1].name)
+    self.assertNotEqual(b.outputs[0].name, b.outputs[1].name)
+    self.assertNotEqual(a.outputs[0].name, b.outputs[0].name)
+    self.assertNotEqual(a.outputs[0].name, b.outputs[1].name)
+    self.assertNotEqual(a.outputs[1].name, b.outputs[0].name)
+    self.assertNotEqual(a.outputs[1].name, b.outputs[1].name)
+
+    # Assert that the ops are connected according to the GraphDef topology.
+    self.assertEqual(c.inputs[0], a.outputs[0])
+    self.assertEqual(c.inputs[1], b.outputs[0])
+    self.assertEqual(d.inputs[0], a.outputs[1])
+    self.assertEqual(d.inputs[1], b.outputs[1])
+
+    # Check the types of the returned ops and tensors.
+    self.assertEqual(a.type, "IntOutputFloatOutput")
+    self.assertEqual(b.type, "ListOutput")
+    self.assertEqual(c.type, "ListInput")
+    self.assertEqual(d.type, "ListInput")
+    self.assertEqual(a.outputs[0].dtype, dtypes.int32)
+    self.assertEqual(a.outputs[1].dtype, dtypes.float32)
+    self.assertEqual(b.outputs[0].dtype, dtypes.int32)
+    self.assertEqual(b.outputs[1].dtype, dtypes.float32)
+
+    # Check the names of the returned ops.
+    self.assertEqual(a.name, "A")
+    self.assertEqual(b.name, "B")
+    self.assertEqual(c.name, "C")
+    self.assertEqual(d.name, "D")
+
+    # Check that the op_def is still available.
+    self.assertNotEqual(None, a.op_def)
+
   def testBasic(self):
     with ops.Graph().as_default():
       a, b, c, d = importer.import_graph_def(
-- 
GitLab


From 07c81f4f3699d255faa88ddbe50b731223090ba1 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 22 Nov 2017 13:33:01 -0800
Subject: [PATCH 0251/1225] Fix core:api test

---
 .../api_def/base_api/api_def_UniqueV2.pbtxt   | 47 +++++++++++++++++++
 .../base_api/api_def_UnsortedSegmentSum.pbtxt |  2 +
 2 files changed, 49 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt

diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
new file mode 100644
index 0000000000..cd7ec6e551
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
@@ -0,0 +1,47 @@
+op {
+  graph_op_name: "UniqueV2"
+  in_arg {
+    name: "x"
+    description: <<END
+A `Tensor`.
+END
+  }
+  in_arg {
+    name: "axis"
+    description: <<END
+A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
+find the unique elements.
+END
+  }
+  out_arg {
+    name: "y"
+    description: <<END
+A `Tensor`. Unique elements along the `axis` of `Tensor` x.
+END
+  }
+  out_arg {
+    name: "idx"
+    description: <<END
+A 1-D Tensor. Has the same type as x that contains the index of each
+value of x in the output y.
+END
+  }
+  summary: "Finds unique elements in a 1-D tensor."
+  description: <<END
+This operation returns a tensor `y` containing all of the unique elements of `x`
+sorted in the same order that they occur in `x`. This operation also returns a
+tensor `idx` the same size as `x` that contains the index of each value of `x`
+in the unique output `y`. In other words:
+
+`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+
+For example:
+
+```
+# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+y, idx = unique(x)
+y ==> [1, 2, 4, 7, 8]
+idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+```
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
index 0a3355cdbc..77a96d1e03 100644
--- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
@@ -26,6 +26,8 @@ need not be sorted and need not cover all values in the full
 range of valid values.
 
 If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+If the given segment ID `i` is negative, the value is dropped and will not be
+added to the sum of the segment.
 
 `num_segments` should equal the number of distinct segment IDs.
 
-- 
GitLab


From c5b8a5ed86e133ccee62b630108ea53213df2c86 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 13:29:47 -0800
Subject: [PATCH 0252/1225] Minor toco changes to support new features in
 tfmini.

PiperOrigin-RevId: 176694498
---
 .../contrib/lite/toco/export_tensorflow.cc    | 30 ++++++-
 tensorflow/contrib/lite/toco/toco_tooling.cc  | 82 +++++++++++--------
 tensorflow/contrib/lite/toco/tooling_util.cc  | 25 +++++-
 3 files changed, 100 insertions(+), 37 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc
index 625a4dd83c..e18cf46c69 100644
--- a/tensorflow/contrib/lite/toco/export_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc
@@ -35,8 +35,11 @@ limitations under the License.
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/platform/logging.h"
 
+using tensorflow::DT_BOOL;
 using tensorflow::DT_FLOAT;
 using tensorflow::DT_INT32;
+using tensorflow::DT_INT64;
+using tensorflow::DT_UINT8;
 using tensorflow::GraphDef;
 using tensorflow::TensorProto;
 
@@ -1500,10 +1503,29 @@ void ConvertOperator(const Model& model, const Operator& src_op,
   }
 }
 
-void AddPlaceholder(const string& name, GraphDef* tensorflow_graph) {
+void AddPlaceholder(const string& name, ArrayDataType type,
+                    GraphDef* tensorflow_graph) {
   auto* placeholder = tensorflow_graph->add_node();
   placeholder->set_op("Placeholder");
-  (*placeholder->mutable_attr())["dtype"].set_type(DT_FLOAT);
+  switch (type) {
+    case ArrayDataType::kBool:
+      (*placeholder->mutable_attr())["dtype"].set_type(DT_BOOL);
+      break;
+    case ArrayDataType::kFloat:
+      (*placeholder->mutable_attr())["dtype"].set_type(DT_FLOAT);
+      break;
+    case ArrayDataType::kUint8:
+      (*placeholder->mutable_attr())["dtype"].set_type(DT_UINT8);
+      break;
+    case ArrayDataType::kInt32:
+      (*placeholder->mutable_attr())["dtype"].set_type(DT_INT32);
+      break;
+    case ArrayDataType::kInt64:
+      (*placeholder->mutable_attr())["dtype"].set_type(DT_INT64);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected data type in array \"" << name << "\"";
+  }
   placeholder->set_name(name);
 }
 
@@ -1531,7 +1553,9 @@ void AddPlaceholderForRNNState(const Model& model, const string& name, int size,
 void ExportTensorFlowGraphDefImplementation(const Model& model,
                                             GraphDef* tensorflow_graph) {
   for (const auto& input_array : model.flags.input_arrays()) {
-    AddPlaceholder(input_array.name(), tensorflow_graph);
+    AddPlaceholder(input_array.name(),
+                   model.arrays.at(input_array.name())->data_type,
+                   tensorflow_graph);
   }
   for (const auto& rnn_state : model.flags.rnn_states()) {
     AddPlaceholderForRNNState(model, rnn_state.state_array(), rnn_state.size(),
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index eabc145ad4..ca092b2d72 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -85,38 +85,57 @@ void MakeGeneralGraphTransformationsSet(
   transformations->Add(new MakeInitialDequantizeOperator);
 }
 
-void SetArrayFinalDataTypes(const TocoFlags& toco_flags, Model* model) {
-  const bool output_supports_only_float =
-      toco_flags.output_format() == TENSORFLOW_GRAPHDEF;
+bool SupportsQuantization(FileFormat format) {
+  return (format == GRAPHVIZ_DOT || format == TFLITE);
+  ;
+}
+
+bool SupportsFusedActivationFunction(FileFormat format) {
+  return (format == GRAPHVIZ_DOT || format == TFLITE);
+}
+
+bool SupportsLstmCell(FileFormat format) {
+  return (format == TENSORFLOW_GRAPHDEF || format == GRAPHVIZ_DOT);
+}
+
+bool SupportsPreallocatedWorkspace(FileFormat format) {
+  return (format == GRAPHVIZ_DOT || format == TFLITE);
+}
 
-  ArrayDataType specified_final_data_type = ArrayDataType::kNone;
+bool IsRealValued(toco::ArrayDataType type) {
+  return static_cast<bool>(type == toco::ArrayDataType::kFloat ||
+                           type == toco::ArrayDataType::kUint8);
+}
+
+void SetFinalDataTypeOnInputs(const TocoFlags& toco_flags, Model* model) {
+  const FileFormat output_format = toco_flags.output_format();
+  ArrayDataType type;
   if (toco_flags.has_inference_input_type()) {
-    specified_final_data_type =
-        ConvertIODataTypeToArrayDataType(toco_flags.inference_input_type());
+    type = ConvertIODataTypeToArrayDataType(toco_flags.inference_input_type());
   } else if (toco_flags.has_inference_type()) {
-    specified_final_data_type =
-        ConvertIODataTypeToArrayDataType(toco_flags.inference_type());
-  }
-  ArrayDataType final_data_type = ArrayDataType::kNone;
-  if (output_supports_only_float) {
-    QCHECK(specified_final_data_type == ArrayDataType::kNone ||
-           specified_final_data_type == ArrayDataType::kFloat);
-    final_data_type = ArrayDataType::kFloat;
+    type = ConvertIODataTypeToArrayDataType(toco_flags.inference_type());
+  } else if (!SupportsQuantization(output_format)) {
+    // Data type is implicitly float for non-quantized formats
+    type = ArrayDataType::kFloat;
   } else {
-    final_data_type = specified_final_data_type;
+    // Nothing to do. Data types stay as-is.
+    return;
   }
+
   for (int i = 0; i < model->flags.input_arrays_size(); i++) {
-    auto* array = model->arrays[model->flags.input_arrays(i).name()].get();
+    string const& array_name = model->flags.input_arrays(i).name();
+    auto* array = model->arrays[array_name].get();
     // Note that the notion of changing data types only applies to real-numbers
     // arrays (see the documentation for inference_input_type).
     // TODO(benoitjacob) this is assuming that uint8 arrays are quantized,
     // i.e. represent real numbers by means of quantization parameters,
     // and not plain integer uint8 input arrays.
-    const bool is_real_numbers = array->data_type == ArrayDataType::kFloat ||
-                                 array->data_type == ArrayDataType::kUint8;
-    if (is_real_numbers) {
-      array->final_data_type = final_data_type;
+    if (!IsRealValued(array->data_type)) {
+      // Ignore non-real data types.
+      continue;
     }
+
+    array->final_data_type = type;
   }
 }
 
@@ -155,23 +174,21 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
   const FileFormat output_format = toco_flags.output_format();
   const IODataType inference_type = toco_flags.inference_type();
 
-  const bool output_is_tflite = output_format == TFLITE;
-
-  const bool output_is_tflite_quantized =
-      output_is_tflite && inference_type == QUANTIZED_UINT8;
+  const bool quantize_output =
+      SupportsQuantization(output_format) && inference_type == QUANTIZED_UINT8;
 
-  if (output_is_tflite_quantized) {
+  if (quantize_output) {
     QCHECK_NE(toco_flags.inference_input_type(), FLOAT)
         << "Quantized inference is not allowed with float inputs.";
   }
 
-  SetArrayFinalDataTypes(toco_flags, model);
+  SetFinalDataTypeOnInputs(toco_flags, model);
 
   GraphTransformationsSet transformations;
   MakeGeneralGraphTransformationsSet(&transformations);
   auto* remove_trivial_reshape = new RemoveTrivialReshape;
   transformations.Add(remove_trivial_reshape);
-  if (output_format == TFLITE) {
+  if (SupportsFusedActivationFunction(output_format)) {
     transformations.Add(new FuseActivationFunctions);
   } else {
     transformations.Add(new UnfuseActivationFunctions);
@@ -190,25 +207,24 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
     // easy to pass a new toco flag. Once that is resolved on the DarwiNN
     // tests side, the special-casing of DarwiNN here can go away.
     // TODO(benoitjacob): so drop it when we can.
-    if ((output_is_tflite_quantized &&
-         toco_flags.reorder_across_fake_quant())) {
+    if ((quantize_output && toco_flags.reorder_across_fake_quant())) {
       transformations.Add(new DropFakeQuant);
     }
   }
   transformations.Add(new ConvertPureConvToDepthwise);
   // TFLite export does not yet support fused LSTM cell.
-  if (output_format == TENSORFLOW_GRAPHDEF) {
+  if (SupportsLstmCell(output_format)) {
     transformations.Add(new IdentifyLstmCell);
   }
   transformations.Add(new ResolveConstantConcatenation);
   RunGraphTransformations(model, "general graph transformations",
                           transformations);
-  if (output_is_tflite_quantized) {
+  if (quantize_output) {
     RunGraphTransformations(model, "pre-quantization graph transformations",
                             {new HardcodeMinMax, new DropFakeQuant});
   }
 
-  if (output_is_tflite_quantized) {
+  if (quantize_output) {
     if (toco_flags.has_default_ranges_min() &&
         toco_flags.has_default_ranges_max()) {
       UseDefaultMinMaxRangeValues(model, toco_flags.default_ranges_min(),
@@ -239,7 +255,7 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
     CheckUnsupportedOperations(*model);
   }
 
-  if (output_is_tflite) {
+  if (SupportsPreallocatedWorkspace(output_format)) {
     AllocateTransientArrays(model, kDefaultTransientDataAlignment);
     LogDump(kLogLevelModelChanged, "AFTER ALLOCATION", *model);
   }
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 3ee060f9b9..639b5f193c 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -294,6 +294,7 @@ void LogArray(int log_level, const Model& model, const string& name) {
   VLOG(log_level) << "Array: " << name;
   switch (array.data_type) {
     case ArrayDataType::kNone:
+      VLOG(log_level) << "  Data type:";
       break;
     case ArrayDataType::kFloat:
       VLOG(log_level) << "  Data type: kFloat";
@@ -309,6 +310,24 @@ void LogArray(int log_level, const Model& model, const string& name) {
                       << static_cast<int>(array.data_type) << ")";
       break;
   }
+  switch (array.final_data_type) {
+    case ArrayDataType::kNone:
+      VLOG(log_level) << "  Final type:";
+      break;
+    case ArrayDataType::kFloat:
+      VLOG(log_level) << "  Final type: kFloat";
+      break;
+    case ArrayDataType::kInt32:
+      VLOG(log_level) << "  Final type: kInt32";
+      break;
+    case ArrayDataType::kUint8:
+      VLOG(log_level) << "  Final type: kUint8";
+      break;
+    default:
+      VLOG(log_level) << "  Final type: other (numerical value: "
+                      << static_cast<int>(array.data_type) << ")";
+      break;
+  }
   if (array.buffer) {
     VLOG(log_level) << "  Constant Buffer";
   }
@@ -1562,7 +1581,11 @@ void CheckFinalDataTypesSatisfied(const Model& model) {
   for (const auto& array_entry : model.arrays) {
     const auto& array = *array_entry.second;
     if (array.final_data_type != ArrayDataType::kNone) {
-      CHECK(array.final_data_type == array.data_type);
+      CHECK(array.final_data_type == array.data_type)
+          << "Array \"" << array_entry.first
+          << "\" has mis-matching actual and final data types ("
+          << static_cast<int>(array.data_type) << ","
+          << static_cast<int>(array.final_data_type) << ").";
     }
   }
 }
-- 
GitLab


From e219aeb542779d90a582ffe16f8602cd1b275b22 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Wed, 22 Nov 2017 13:41:58 -0800
Subject: [PATCH 0253/1225] GCS: Perform additional file integrity checks

In order to guard against interrupted reads or other network
problems, we perform additional sanity checks to ensure we
correctly load file blocks from GCS.

PiperOrigin-RevId: 176695887
---
 .../core/platform/cloud/gcs_file_system.cc       | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index d5e2a518e9..54d38fe962 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/cloud/curl_http_request.h"
 #include "tensorflow/core/platform/cloud/file_block_cache.h"
 #include "tensorflow/core/platform/cloud/google_auth_provider.h"
@@ -696,6 +697,18 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset,
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading gs://",
                                   bucket, "/", object);
 
+  if (out->size() < block_size()) {
+    // Check stat cache to see if we encountered an interrupted read.
+    FileStatistics stat;
+    if (stat_cache_->Lookup(filename, &stat)) {
+      if (offset + out->size() < stat.length) {
+        return errors::Internal(strings::Printf(
+            "File contents are inconsistent for file: %s @ %lu.",
+            filename.c_str(), offset));
+      }
+    }
+  }
+
   return Status::OK();
 }
 
@@ -816,7 +829,8 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket,
     return errors::Internal("'stat' cannot be nullptr.");
   }
   if (object.empty()) {
-    return errors::InvalidArgument("'object' must be a non-empty string.");
+    return errors::InvalidArgument(strings::Printf(
+        "'object' must be a non-empty string. (File: %s)", fname.c_str()));
   }
 
   StatCache::ComputeFunc compute_func =
-- 
GitLab


From b1d8c59e9b014b527fb2fbef9ce9afc14dbc4938 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 22 Nov 2017 13:42:21 -0800
Subject: [PATCH 0254/1225] Merge changes from github.

PiperOrigin-RevId: 176695926
---
 CODE_OF_CONDUCT.md                            |   6 +-
 README.md                                     |   4 +-
 configure.py                                  |  39 +-
 tensorflow/BUILD                              |  16 +
 tensorflow/compiler/aot/tfcompile.bzl         |  15 +-
 tensorflow/compiler/tests/BUILD               |   2 +-
 .../compiler/tests/fused_batchnorm_test.py    |  27 +-
 .../compiler/xla/service/hlo_instruction.h    |   2 +-
 .../xla/service/hlo_instruction_test.cc       |   4 +-
 .../contrib/android/cmake/CMakeLists.txt      |   2 +-
 tensorflow/contrib/batching/BUILD             |   1 +
 .../contrib/batching/kernels/batch_kernels.cc |   2 +-
 .../kernel_tests/csiszar_divergence_test.py   |   2 +-
 tensorflow/contrib/cmake/CMakeLists.txt       | 147 +++-
 .../contrib/cmake/external/boringssl.cmake    |   6 +-
 .../contrib/cmake/external/jsoncpp.cmake      |   6 +-
 tensorflow/contrib/cmake/external/lmdb.cmake  |   6 +-
 tensorflow/contrib/cmake/external/png.cmake   |   6 +-
 .../contrib/cmake/external/protobuf.cmake     |   6 +-
 tensorflow/contrib/cmake/external/re2.cmake   |   8 +-
 .../contrib/cmake/external/snappy.cmake       |   8 +-
 .../contrib/cmake/external/sqlite.cmake       |   6 +-
 tensorflow/contrib/cmake/external/zlib.cmake  |   6 +-
 tensorflow/contrib/cmake/tf_cc_ops.cmake      |  36 +-
 .../contrib/cmake/tf_core_kernels.cmake       |  23 +-
 .../cmake/tf_label_image_example.cmake        |   5 +
 tensorflow/contrib/cmake/tf_python.cmake      |  38 +-
 tensorflow/contrib/cmake/tf_shared_lib.cmake  |  45 +-
 .../contrib/cmake/tf_stream_executor.cmake    |   3 +
 tensorflow/contrib/cmake/tf_tools.cmake       |  13 +-
 tensorflow/contrib/cmake/tf_tutorials.cmake   |   5 +
 tensorflow/contrib/crf/python/ops/crf.py      |  19 +-
 .../contrib/data/python/kernel_tests/BUILD    |   8 +-
 tensorflow/contrib/distributions/BUILD        |  17 +
 tensorflow/contrib/distributions/__init__.py  |   2 +
 .../python/kernel_tests/cauchy_test.py        | 438 ++++++++++++
 .../distributions/python/ops/cauchy.py        | 219 ++++++
 .../python/examples/notebooks/1_basics.ipynb  |   4 +-
 .../examples/notebooks/2_gradients.ipynb      |   6 +-
 .../examples/notebooks/3_datasets.ipynb       |  10 +-
 .../contrib/layers/python/layers/layers.py    |  19 +-
 .../layers/python/layers/layers_test.py       |  69 +-
 .../learn/python/learn/estimators/head.py     |   2 +-
 .../learn/python/learn/estimators/model_fn.py |   6 +-
 .../python/learn/learn_io/data_feeder.py      |  12 +-
 .../linear_optimizer/python/ops/sdca_ops.py   |  12 +-
 tensorflow/contrib/lite/python/BUILD          |   1 +
 .../contrib/lite/testing/generate_examples.py |  18 +-
 tensorflow/contrib/lite/toco/python/BUILD     |   1 +
 tensorflow/contrib/makefile/Makefile          |   3 +-
 tensorflow/contrib/makefile/README.md         |  41 +-
 tensorflow/contrib/makefile/build_all_ios.sh  |  54 +-
 .../contrib/makefile/compile_ios_protobuf.sh  | 369 +++++-----
 .../makefile/compile_ios_tensorflow.sh        | 155 +++--
 tensorflow/contrib/makefile/compile_nsync.sh  |   5 +-
 tensorflow/contrib/nn/__init__.py             |   2 +
 tensorflow/contrib/opt/BUILD                  |  18 +
 tensorflow/contrib/opt/__init__.py            |  17 +-
 .../training/multitask_optimizer_wrapper.py   | 140 ++++
 .../multitask_optimizer_wrapper_test.py       | 119 ++++
 .../python/kernel_tests/core_rnn_cell_test.py |  41 ++
 .../rnn/python/kernel_tests/rnn_cell_test.py  |  75 +-
 tensorflow/contrib/rnn/python/ops/rnn_cell.py | 378 +++++++++-
 .../seq2seq/python/ops/attention_wrapper.py   |  54 +-
 tensorflow/contrib/slim/README.md             |   2 +-
 .../slim/python/slim/nets/resnet_v1_test.py   |   2 +-
 tensorflow/contrib/verbs/README.md            |  14 +-
 tensorflow/contrib/verbs/rdma.cc              | 372 +++++++++-
 tensorflow/contrib/verbs/rdma.h               |  21 +-
 tensorflow/core/BUILD                         |   1 +
 .../api_def/base_api/api_def_UniqueV2.pbtxt   |  47 ++
 .../base_api/api_def_UnsortedSegmentSum.pbtxt |   2 +
 .../core/common_runtime/mkl_cpu_allocator.h   |   2 +-
 .../core/common_runtime/sycl/sycl_device.h    |  22 +-
 tensorflow/core/graph/graph.cc                |  15 +
 tensorflow/core/graph/graph.h                 |   5 +
 tensorflow/core/graph/graph_partition.cc      |   4 +-
 tensorflow/core/graph/graph_test.cc           |  62 +-
 .../core/graph/mkl_tfconversion_pass.cc       |   2 +-
 .../core/grappler/costs/graph_properties.h    |   6 +
 tensorflow/core/grappler/utils.cc             |   2 +-
 tensorflow/core/kernels/BUILD                 |  32 +-
 tensorflow/core/kernels/avgpooling_op.cc      |   7 +-
 tensorflow/core/kernels/bincount_op.cc        | 116 ++--
 tensorflow/core/kernels/bincount_op.h         |  41 ++
 tensorflow/core/kernels/bincount_op_gpu.cu.cc | 114 +++
 tensorflow/core/kernels/bincount_op_test.cc   |  75 ++
 tensorflow/core/kernels/bucketize_op.cc       |  66 +-
 tensorflow/core/kernels/bucketize_op.h        |  41 ++
 .../core/kernels/bucketize_op_gpu.cu.cc       | 101 +++
 tensorflow/core/kernels/conv_grad_ops_3d.cc   |  40 +-
 tensorflow/core/kernels/conv_ops_3d.cc        |   5 +
 tensorflow/core/kernels/cwise_op_acosh.cc     |  12 +-
 tensorflow/core/kernels/cwise_op_asinh.cc     |  14 +-
 tensorflow/core/kernels/cwise_op_atanh.cc     |  14 +-
 tensorflow/core/kernels/cwise_ops.h           |  12 +
 .../core/kernels/depthwise_conv_grad_op.cc    |  13 +-
 tensorflow/core/kernels/depthwise_conv_op.cc  |  11 +-
 tensorflow/core/kernels/depthwise_conv_op.h   |   5 +-
 .../core/kernels/depthwise_conv_op_gpu.cu.cc  |  19 +-
 .../core/kernels/fused_batch_norm_op.cc       |  70 +-
 tensorflow/core/kernels/fused_batch_norm_op.h |  22 +-
 tensorflow/core/kernels/lmdb_reader_op.cc     |   7 +-
 tensorflow/core/kernels/maxpooling_op.cc      |  49 +-
 .../core/kernels/maxpooling_op_gpu.cu.cc      |  40 +-
 tensorflow/core/kernels/maxpooling_op_gpu.h   |   2 +-
 tensorflow/core/kernels/mkl_tfconv_op.h       |  80 ++-
 tensorflow/core/kernels/ops_util.h            |  13 +
 tensorflow/core/kernels/pooling_ops_common.cc |  10 +-
 .../core/kernels/pooling_ops_common_gpu.h     |   4 +-
 tensorflow/core/kernels/quantized_add_op.cc   |   2 +-
 tensorflow/core/kernels/random_op.cc          |   4 +-
 .../core/kernels/segment_reduction_ops.cc     |   3 +
 .../core/kernels/segment_reduction_ops.h      |  36 +-
 tensorflow/core/kernels/shape_ops.cc          |  43 +-
 tensorflow/core/kernels/shape_ops.h           |   5 +-
 tensorflow/core/kernels/strided_slice_op.cc   |   1 -
 tensorflow/core/kernels/transpose_op.cc       |  35 +-
 tensorflow/core/kernels/unique_op.cc          | 113 ++-
 tensorflow/core/ops/array_ops.cc              |  44 +-
 tensorflow/core/ops/math_ops.cc               |   2 +
 tensorflow/core/ops/nn_ops.cc                 |  12 +-
 tensorflow/core/ops/ops.pbtxt                 |   5 +
 .../core/platform/default/build_config/BUILD  |  20 +-
 .../core/platform/default/notification.h      |   2 +-
 tensorflow/core/platform/posix/error.cc       |  11 +-
 tensorflow/core/platform/posix/port.cc        |   6 +-
 tensorflow/core/public/version.h              |   2 +-
 tensorflow/core/util/cuda_kernel_helper.h     |  12 +
 tensorflow/core/util/mkl_util.h               | 653 ++++++++++++++++--
 tensorflow/core/util/mkl_util_test.cc         |  91 +++
 .../api_guides/python/threading_and_queues.md |   2 +-
 .../docs_src/get_started/get_started.md       |   6 +-
 tensorflow/docs_src/get_started/input_fn.md   |   6 +-
 tensorflow/docs_src/install/install_c.md      |   2 +-
 tensorflow/docs_src/install/install_go.md     |   2 +-
 tensorflow/docs_src/install/install_java.md   |  18 +-
 tensorflow/docs_src/install/install_linux.md  |  22 +-
 tensorflow/docs_src/install/install_mac.md    |  10 +-
 .../docs_src/install/install_sources.md       |  19 +-
 tensorflow/docs_src/mobile/prepare_models.md  |   2 +-
 .../docs_src/programmers_guide/debugger.md    |  19 +-
 .../docs_src/programmers_guide/tensors.md     |  12 +-
 tensorflow/examples/speech_commands/models.py |   2 +-
 tensorflow/go/android.go                      |  20 +
 tensorflow/go/operation_test.go               |   8 +
 tensorflow/go/tensor.go                       |   9 +-
 tensorflow/go/tensor_test.go                  |   9 +-
 .../src/main/java/org/tensorflow/Shape.java   |  32 +
 .../test/java/org/tensorflow/ShapeTest.java   |  24 +
 tensorflow/python/BUILD                       |   4 +
 tensorflow/python/estimator/canned/head.py    |   2 +-
 .../python/estimator/inputs/numpy_io.py       |  84 ++-
 .../python/estimator/inputs/numpy_io_test.py  |  84 +++
 tensorflow/python/framework/ops.py            |   4 +
 tensorflow/python/framework/tensor_util.py    |   1 +
 tensorflow/python/framework/test_util.py      |   8 +-
 .../python/kernel_tests/array_ops_test.py     |  51 +-
 .../python/kernel_tests/bincount_op_test.py   |  26 +-
 .../python/kernel_tests/bucketize_op_test.py  |   8 +-
 .../python/kernel_tests/constant_op_test.py   |  15 +-
 tensorflow/python/kernel_tests/conv1d_test.py |  43 ++
 .../python/kernel_tests/conv_ops_3d_test.py   | 267 ++++---
 .../kernel_tests/depthwise_conv_op_test.py    |  20 +-
 .../python/kernel_tests/distributions/BUILD   |   1 +
 .../distributions/multinomial_test.py         |  12 +-
 .../python/kernel_tests/pooling_ops_test.py   |  59 +-
 .../python/kernel_tests/reader_ops_test.py    |  42 ++
 .../segment_reduction_ops_test.py             |  29 +-
 .../python/kernel_tests/shape_ops_test.py     |  10 +
 .../python/kernel_tests/unique_op_test.py     |  27 +
 tensorflow/python/layers/base.py              |   8 +-
 tensorflow/python/layers/convolutional.py     |   2 +
 tensorflow/python/layers/normalization.py     |  55 +-
 .../python/layers/normalization_test.py       |  92 ++-
 tensorflow/python/ops/array_ops.py            |  39 +-
 .../python/ops/distributions/dirichlet.py     |   2 +-
 .../python/ops/distributions/multinomial.py   |  50 +-
 tensorflow/python/ops/image_ops_impl.py       |  26 +-
 tensorflow/python/ops/linalg_ops.py           |  35 +-
 tensorflow/python/ops/math_grad_test.py       |  17 +
 tensorflow/python/ops/math_ops.py             | 258 ++++---
 tensorflow/python/ops/metrics_impl.py         |   7 +-
 .../python/ops/nn_fused_batchnorm_test.py     | 119 ++--
 tensorflow/python/ops/nn_impl.py              |  17 +-
 tensorflow/python/ops/nn_ops.py               | 128 +++-
 tensorflow/python/ops/variables.py            |   4 +-
 .../python/tools/import_pb_to_tensorboard.py  |   0
 tensorflow/stream_executor/cuda/cuda_dnn.cc   |   9 +-
 tensorflow/stream_executor/dnn.cc             |  14 +-
 tensorflow/stream_executor/dnn.h              |   6 +
 .../tools/api/golden/tensorflow.linalg.pbtxt  |   2 +-
 .../tools/api/golden/tensorflow.nn.pbtxt      |   6 +-
 tensorflow/tools/api/golden/tensorflow.pbtxt  |  22 +-
 .../tools/ci_build/ci_parameterized_build.sh  |   2 +-
 .../tools/ci_build/install/install_golang.sh  |   2 +-
 .../ci_build/linux/libtensorflow_docker.sh    |   2 +-
 .../tools/ci_build/osx/libtensorflow_cpu.sh   |   2 +-
 .../tools/ci_build/osx/libtensorflow_gpu.sh   |   2 +-
 .../tools/ci_build/pi/build_raspberry_pi.sh   |   6 +
 .../ci_build/windows/bazel/bazel_test_lib.sh  |   4 +-
 .../docker/Dockerfile.devel-gpu-cuda9-cudnn7  |   7 +-
 tensorflow/tools/docker/Dockerfile.gpu        |   2 +-
 tensorflow/tools/docker/README.md             |  14 +
 tensorflow/tools/graph_transforms/BUILD       |   2 +
 .../tools/graph_transforms/quantize_nodes.cc  |   2 +
 tensorflow/tools/pip_package/setup.py         |   2 +-
 third_party/aws.BUILD                         |   3 +
 third_party/curl.BUILD                        |   1 -
 third_party/sycl/crosstool/CROSSTOOL.tpl      |   8 +-
 third_party/sycl/crosstool/trisycl.tpl        |  85 +++
 third_party/sycl/sycl/BUILD.tpl               |  17 +-
 third_party/sycl/sycl/build_defs.bzl.tpl      |  17 +-
 third_party/sycl/sycl_configure.bzl           |  85 ++-
 third_party/zlib.BUILD                        |   2 +-
 tools/bazel.rc                                |   7 +-
 util/python/BUILD                             |   2 +-
 217 files changed, 6317 insertions(+), 1445 deletions(-)
 create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
 create mode 100644 tensorflow/contrib/distributions/python/ops/cauchy.py
 create mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
 create mode 100644 tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
 create mode 100644 tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
 create mode 100644 tensorflow/core/kernels/bincount_op.h
 create mode 100644 tensorflow/core/kernels/bincount_op_gpu.cu.cc
 create mode 100644 tensorflow/core/kernels/bincount_op_test.cc
 create mode 100644 tensorflow/core/kernels/bucketize_op.h
 create mode 100644 tensorflow/core/kernels/bucketize_op_gpu.cu.cc
 create mode 100644 tensorflow/core/util/mkl_util_test.cc
 create mode 100644 tensorflow/go/android.go
 mode change 100644 => 100755 tensorflow/python/tools/import_pb_to_tensorboard.py
 create mode 100644 third_party/sycl/crosstool/trisycl.tpl

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 10fd595fec..ff11d13140 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -42,7 +42,7 @@ The Code of Conduct also applies within project spaces and in public spaces when
 
 Conflicts in an open source project can take many forms, from someone having a bad day and using harsh and hurtful language in the issue queue, to more serious instances such as sexist/racist statements or threats of violence, and everything in between.
 
-If the behaviour is threatening or harassing, or for other reasons requires immediate escalation, please see below.
+If the behavior is threatening or harassing, or for other reasons requires immediate escalation, please see below.
 
 However, for the vast majority of issues, we aim to empower individuals to first resolve conflicts themselves, asking for help when needed, and only after that fails to escalate further. This approach gives people more control over the outcome of their dispute. 
 
@@ -55,14 +55,14 @@ If you are experiencing or witnessing conflict, we ask you to use the following
 
 ## Reporting Violations
 
-Violations of the Code of Conduct can be reported to TensorFlow’s Project Steward at conduct@tensorflow.org. The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report.
+Violations of the Code of Conduct can be reported to TensorFlow’s Project Stewards, Edd Wilder-James (ewj@google.com) and Sarah Novotny (sarahnovotny@google.com). The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report.
 
 Violations of the Code of Conduct can occur in any setting, even those unrelated to the project. We will only consider complaints about conduct that has occurred within one year of the report.
 
 
 ## Enforcement
 
-If the Project Steward receives a report alleging a violation of the Code of Conduct, the Project Steward will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Steward will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Steward may issue sanctions without notice.
+If the Project Stewards receive a report alleging a violation of the Code of Conduct, the Project Stewards will notify the accused of the report, and provide them an opportunity to discuss the report before a sanction is issued. The Project Stewards will do their utmost to keep the reporter anonymous. If the act is ongoing (such as someone engaging in harassment), or involves a threat to anyone's safety (e.g. threats of violence), the Project Stewards may issue sanctions without notice.
 
 
 ## Attribution
diff --git a/README.md b/README.md
index 24bbb6cec1..aff3427bdd 100644
--- a/README.md
+++ b/README.md
@@ -73,11 +73,11 @@ $ python
 
 ## For more information
 
-* [TensorFlow website](https://www.tensorflow.org)
+* [TensorFlow Website](https://www.tensorflow.org)
 * [TensorFlow White Papers](https://www.tensorflow.org/about/bib)
 * [TensorFlow Model Zoo](https://github.com/tensorflow/models)
 * [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730)
-* [TensorFlow course at Stanford](https://web.stanford.edu/class/cs20si)
+* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si)
 
 Learn more about the TensorFlow community at the [community page of tensorflow.org](https://www.tensorflow.org/community) for a few ways to participate.
 
diff --git a/configure.py b/configure.py
index 0d1afbfe15..1f205861f1 100644
--- a/configure.py
+++ b/configure.py
@@ -43,6 +43,7 @@ _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing '
                           'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION)
 _TF_OPENCL_VERSION = '1.2'
 _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp'
+_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include'
 
 
 def is_windows():
@@ -636,7 +637,7 @@ def set_tf_cuda_version(environ_cp):
   write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version)
 
 
-def set_tf_cunn_version(environ_cp):
+def set_tf_cudnn_version(environ_cp):
   """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION."""
   ask_cudnn_version = (
       'Please specify the cuDNN version you want to use. '
@@ -883,6 +884,28 @@ def set_computecpp_toolkit_path(environ_cp):
                               computecpp_toolkit_path)
 
 
+def set_trisycl_include_dir(environ_cp):
+  """Set TRISYCL_INCLUDE_DIR."""
+  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
+                             'include directory. (Use --config=sycl_trisycl '
+                             'when building with Bazel) '
+                             '[Default is %s]: ') % (
+                                 _DEFAULT_TRISYCL_INCLUDE_DIR)
+  while True:
+    trisycl_include_dir = get_from_env_or_user_or_default(
+        environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
+        _DEFAULT_TRISYCL_INCLUDE_DIR)
+    if os.path.exists(trisycl_include_dir):
+      break
+
+    print('Invalid triSYCL include directory, %s cannot be found' %
+          (trisycl_include_dir))
+
+  # Set TRISYCL_INCLUDE_DIR
+  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
+  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
+
+
 def set_mpi_home(environ_cp):
   """Set MPI_HOME."""
   default_mpi_home = which('mpirun') or which('mpiexec') or ''
@@ -997,6 +1020,8 @@ def main():
     environ_cp['TF_NEED_GCP'] = '0'
     environ_cp['TF_NEED_HDFS'] = '0'
     environ_cp['TF_NEED_JEMALLOC'] = '0'
+    environ_cp['TF_NEED_OPENCL_SYCL'] = '0'
+    environ_cp['TF_NEED_COMPUTECPP'] = '0'
     environ_cp['TF_NEED_OPENCL'] = '0'
     environ_cp['TF_CUDA_CLANG'] = '0'
 
@@ -1018,17 +1043,21 @@ def main():
   set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support',
                 False, 'verbs')
 
-  set_action_env_var(environ_cp, 'TF_NEED_OPENCL', 'OpenCL', False)
-  if environ_cp.get('TF_NEED_OPENCL') == '1':
+  set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False)
+  if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
     set_host_cxx_compiler(environ_cp)
     set_host_c_compiler(environ_cp)
-    set_computecpp_toolkit_path(environ_cp)
+    set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', True)
+    if environ_cp.get('TF_NEED_COMPUTECPP') == '1':
+      set_computecpp_toolkit_path(environ_cp)
+    else:
+      set_trisycl_include_dir(environ_cp)
 
   set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
   if (environ_cp.get('TF_NEED_CUDA') == '1' and
       'TF_CUDA_CONFIG_REPO' not in environ_cp):
     set_tf_cuda_version(environ_cp)
-    set_tf_cunn_version(environ_cp)
+    set_tf_cudnn_version(environ_cp)
     set_tf_cuda_compute_capabilities(environ_cp)
 
     set_tf_cuda_clang(environ_cp)
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 49828cd4d6..c8f0b6b061 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -54,6 +54,15 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "raspberry_pi_armeabi",
+    values = {
+        "crosstool_top": "@local_config_arm_compiler//:toolchain",
+        "cpu": "armeabi",
+    },
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
     name = "android_arm",
     values = {
@@ -760,6 +769,13 @@ tf_cc_shared_object(
     ],
 )
 
+exports_files(
+    [
+        "tf_version_script.lds",
+        "tf_exported_symbols.lds",
+    ],
+)
+
 py_library(
     name = "tensorflow_py",
     srcs = ["__init__.py"],
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index ee291c12d0..6c385af3b3 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -119,7 +119,7 @@ def tf_library(name, graph, config,
             out_nodes_file,
         ] + freeze_saver_srcs,
         outs=[freeze_file],
-        cmd=("$(location //tensorflow/python/tools:freeze_graph)" +
+        cmd=("$(location @org_tensorflow//tensorflow/python/tools:freeze_graph)" +
              freeze_args),
         tools=["@org_tensorflow//tensorflow/python/tools:freeze_graph"],
         tags=tags,
@@ -130,6 +130,10 @@ def tf_library(name, graph, config,
   header_file = name + ".h"
   object_file = name + ".o"
   ep = ("__" + PACKAGE_NAME + "__" + name).replace("/", "_")
+  if type(tfcompile_flags) == type(""):
+    flags = tfcompile_flags
+  else:
+    flags = " ".join(["'" + arg.replace("'", "'\\''") + "'" for arg in (tfcompile_flags or [])])
   native.genrule(
       name=("gen_" + name),
       srcs=[
@@ -148,7 +152,7 @@ def tf_library(name, graph, config,
            " --target_triple=" + target_llvm_triple() +
            " --out_header=$(@D)/" + header_file +
            " --out_object=$(@D)/" + object_file +
-           " " + (tfcompile_flags or "")),
+           " " + flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -185,7 +189,7 @@ def tf_library(name, graph, config,
            " --cpp_class=" + cpp_class +
            " --target_triple=" + target_llvm_triple() +
            " --out_session_module=$(@D)/" + session_module_pb +
-           " " + (tfcompile_flags or "")),
+           " " + flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -195,8 +199,7 @@ def tf_library(name, graph, config,
 
   # The cc_library rule packaging up the header and object file, and needed
   # kernel implementations.
-  need_xla_data_proto = (tfcompile_flags and
-                         tfcompile_flags.find("--gen_program_shape") != -1)
+  need_xla_data_proto = (flags and flags.find("--gen_program_shape") != -1)
   native.cc_library(
       name=name,
       srcs=[object_file],
@@ -253,7 +256,7 @@ def tf_library(name, graph, config,
         ],
         outs=[test_file],
         cmd=("sed " + sed_replace +
-             " $(location //tensorflow/compiler/aot:test.cc) " +
+             " $(location @org_tensorflow//tensorflow/compiler/aot:test.cc) " +
              "> $(OUTS)"),
         tags=tags,
     )
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index c372e05474..79c4befd36 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -672,7 +672,7 @@ tf_library(
     cpp_class = "LSTMLayerInference",
     graph = "lstm_layer_inference.pbtxt",
     tags = ["manual"],
-    tfcompile_flags = "--xla_cpu_multi_thread_eigen=false",
+    tfcompile_flags = ["--xla_cpu_multi_thread_eigen=false"],
 )
 
 # -----------------------------------------------------------------------------
diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py
index 936fcf8b6b..00a9c9a65b 100644
--- a/tensorflow/compiler/tests/fused_batchnorm_test.py
+++ b/tensorflow/compiler/tests/fused_batchnorm_test.py
@@ -36,7 +36,7 @@ class FusedBatchNormTest(XLATestCase):
     x_square = x * x
     x_square_sum = np.sum(x_square, (0, 1, 2))
     x_sum = np.sum(x, axis=(0, 1, 2))
-    element_count = np.size(x) / int(np.shape(x)[0])
+    element_count = np.size(x) / int(np.shape(x)[-1])
     mean = x_sum / element_count
     var = x_square_sum / element_count - mean * mean
     normalized = (x - mean) / np.sqrt(var + epsilon)
@@ -64,8 +64,9 @@ class FusedBatchNormTest(XLATestCase):
     return grad_x, grad_scale, grad_offset
 
   def testInference(self):
-    x_shape = [2, 2, 6, 2]
-    scale_shape = [2]
+    channel = 3
+    x_shape = [2, 2, 6, channel]
+    scale_shape = [channel]
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     scale_val = np.random.random_sample(scale_shape).astype(np.float32)
 
@@ -74,8 +75,9 @@ class FusedBatchNormTest(XLATestCase):
     with self.test_session() as sess, self.test_scope():
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
-      scale = array_ops.placeholder(np.float32, shape=[2], name="scale")
-      offset = array_ops.placeholder(np.float32, shape=[2], name="offset")
+      scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
+      offset = array_ops.placeholder(
+          np.float32, shape=scale_shape, name="offset")
       epsilon = 0.001
       y_ref, mean_ref, var_ref = self._reference_training(
           x_val, scale_val, offset_val, epsilon, data_format)
@@ -97,8 +99,9 @@ class FusedBatchNormTest(XLATestCase):
       self.assertAllClose(y_val, y_ref, atol=1e-3)
 
   def _testLearning(self, use_gradient_checker):
-    x_shape = [2, 2, 6, 2]
-    scale_shape = [2]
+    channel = 3
+    x_shape = [2, 2, 6, channel]
+    scale_shape = [channel]
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     scale_val = np.random.random_sample(scale_shape).astype(np.float32)
 
@@ -109,8 +112,9 @@ class FusedBatchNormTest(XLATestCase):
     with self.test_session() as sess, self.test_scope():
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
-      scale = array_ops.placeholder(np.float32, shape=[2], name="scale")
-      offset = array_ops.placeholder(np.float32, shape=[2], name="offset")
+      scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
+      offset = array_ops.placeholder(
+          np.float32, shape=scale_shape, name="offset")
       epsilon = 0.001
       y, mean, var = nn.fused_batch_norm(
           t_val,
@@ -154,8 +158,9 @@ class FusedBatchNormTest(XLATestCase):
   def testGradient(self):
     # TODO(b/64270657): Use gradient_checker here in addition to comparing with
     # this reference implementation.
-    x_shape = [2, 2, 6, 2]
-    scale_shape = [2]
+    channel = 3
+    x_shape = [2, 2, 6, channel]
+    scale_shape = [channel]
     grad_val = np.random.random_sample(x_shape).astype(np.float32)
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     scale_val = np.random.random_sample(scale_shape).astype(np.float32)
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 1bd0cca945..cda8b07c61 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -222,7 +222,7 @@ class HloInstruction {
       tensorflow::gtl::ArraySlice<int64> strides);
 
   // Creates a slice instruction, where the first operand is sliced by
-  // start indices specified in the second operand, and by size specfied in
+  // start indices specified in the second operand, and by size specified in
   // 'slice_sizes'.
   static std::unique_ptr<HloInstruction> CreateDynamicSlice(
       const Shape& shape, HloInstruction* operand,
diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index 070bb4bc42..76b12fc8d3 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -792,8 +792,8 @@ TEST_F(HloInstructionTest, ComplexFusionOp) {
   //   sub = Sub(mul, clamp)
   //   tuple = Tuple({sub, sub, mul, C1})
   //
-  // Notable complexities are repeated operands in a same instruction, different
-  // shapes, use of value in different expressions.
+  // Notable complexities are repeated operands in the same instruction,
+  // different shapes, use of value in different expressions.
   auto c1 = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateR0<float>(1.1f)));
   auto c2 = builder.AddInstruction(
diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt
index 25ada5ba27..aba356d616 100644
--- a/tensorflow/contrib/android/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/android/cmake/CMakeLists.txt
@@ -37,7 +37,7 @@ set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \
                      -std=c++11 -fno-rtti -fno-exceptions \
                      -O2 -Wno-narrowing -fomit-frame-pointer \
-                     -mfpu=neon -mfloat-abi=softfp -fPIE \
+                     -mfpu=neon -mfloat-abi=softfp -fPIE -fPIC \
                      -ftemplate-depth=900 \
                      -DGOOGLE_PROTOBUF_NO_RTTI \
                      -DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER")
diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index 8b7df4a84c..a111cfecb3 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -82,6 +82,7 @@ cc_library(
 tf_cc_test(
     name = "adaptive_shared_batch_scheduler_test",
     srcs = ["adaptive_shared_batch_scheduler_test.cc"],
+    tags = ["manual"],  # b/69013768
     deps = [
         ":adaptive_shared_batch_scheduler",
         "//tensorflow/contrib/batching/test_util:fake_clock_env",
diff --git a/tensorflow/contrib/batching/kernels/batch_kernels.cc b/tensorflow/contrib/batching/kernels/batch_kernels.cc
index 3b7c538fcc..6041d8c9b2 100644
--- a/tensorflow/contrib/batching/kernels/batch_kernels.cc
+++ b/tensorflow/contrib/batching/kernels/batch_kernels.cc
@@ -461,7 +461,7 @@ class BatchResource : public ResourceBase {
     return Status::OK();
   }
 
-  // Looks up the batcher queue for 'queue_name'. If it did't previously exist,
+  // Looks up the batcher queue for 'queue_name'. If it didn't previously exist,
   // creates it.
   Status LookupOrCreateBatcherQueue(const string& queue_name,
                                     BatcherQueue** queue) {
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py
index 8c6a614beb..2e94b7206d 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py
@@ -759,7 +759,7 @@ class CsiszarVIMCOTest(test.TestCase):
   def _csiszar_vimco_helper_grad(self, logu, delta):
     """Finite difference approximation of `grad(csiszar_vimco_helper, logu)`."""
 
-    # This code actually estimates the sum of the Jacobiab because thats what
+    # This code actually estimates the sum of the Jacobiab because that's what
     # TF's `gradients` does.
     np_log_avg_u1, np_log_sooavg_u1 = self._csiszar_vimco_helper(
         logu[..., None] + np.diag([delta]*len(logu)))
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 8744fc492f..77a3fc0c83 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -34,13 +34,41 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF)
 option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON)
 option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions")
 option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON)
+if(HAIKU)
+	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF)
+else()
+	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" ON)
+endif()
+
 
 if (NOT WIN32)
   # Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option
   # for targets that link ${CMAKE_THREAD_LIBS_INIT}.
   find_package (Threads)
+
+  option(tensorflow_PATH_STATIC_LIB "Additional library search path for libcudnn_static.a, libnccl_static.a, libculibos.a" /usr/local/cuda/lib64/)
+  option(tensorflow_CUDNN_INCLUDE "cudnn.h header install path" /usr/include/)
+  if (NOT tensorflow_CUDNN_INCLUDE)
+    # option's default value is OFF. Fill it with real default values
+    set(tensorflow_CUDNN_INCLUDE /usr/include)
+  endif (NOT tensorflow_CUDNN_INCLUDE)
+  option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB})
+  option(tensorflow_PATH_NCCL_STATIC_LIB "Override PATH_STATIC_LIB for libnccl_static.a" ${tensorflow_PATH_STATIC_LIB})
+  option(tensorflow_CUDA_LIBRARY_PATH "Designate the default CUDA library paths" /usr/local/cuda/lib64)
+  if (NOT tensorflow_CUDA_LIBRARY_PATH)
+    # option's default value is OFF. Fill it with real default values
+    set(tensorflow_CUDA_LIBRARY_PATH /usr/local/cuda/lib64)
+  endif (NOT tensorflow_CUDA_LIBRARY_PATH)
 endif()
 
+if (WIN32)
+  set(BOOL_WIN32 ON)
+else (WIN32)
+  set(BOOL_WIN32 OFF)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+endif (WIN32)
+
 # [CLEANUP] Remove when done
 # For debugging
 function(SHOW_VARIABLES)
@@ -58,7 +86,12 @@ set (DOWNLOAD_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/downloads"
      CACHE PATH "Location where external projects will be downloaded.")
 mark_as_advanced(DOWNLOAD_LOCATION)
 
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+if (tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+	set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+else()
+	set(CMAKE_POSITION_INDEPENDENT_CODE OFF)
+endif()
+
 add_definitions(-DEIGEN_AVOID_STL_ARRAY)
 if(WIN32)
   add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC)
@@ -217,20 +250,35 @@ endif()
 if(UNIX)
   list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
 endif()
+if(HAIKU)
+  list(APPEND tensorflow_EXTERNAL_LIBRARIES network)
+endif()
 
 if (tensorflow_ENABLE_GPU)
+  if (NOT WIN32)
+    # Default install paths for cuda libraries in Linux
+    # In some Linux distros, find_package(CUDA) seems to require CMAKE_LIBRARY_PATH to include cuda-lib paths
+    list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}")
+    list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs")
+  endif (NOT WIN32)
+
+  find_package(CUDA 8.0 REQUIRED)
+
+  # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
+  # CUDA_NVCC_FLAGS and cuda_config.h below
+  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
+  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
+  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true)  # Flush denormals to zero
+  set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
+  include_directories(${CUDA_INCLUDE})
   if (WIN32)
-    find_package(CUDA 8.0 REQUIRED)
-
-    # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
-    # CUDA_NVCC_FLAGS and cuda_config.h below
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=\"sm_30,compute_30\";-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true)  # Flush denormals to zero
-    set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
-    include_directories(${CUDA_INCLUDE})
     add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2)
+  else (WIN32)
+    # Without these double quotes, cmake in Linux makes it "-DTF_EXTRA_CUDA_CAPABILITIES=3.0, -D3.5, -D5.2" for cc, which incurs build breaks
+    add_definitions(-DGOOGLE_CUDA=1 -D"TF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2")
+  endif (WIN32)
 
+  if (WIN32)
     # add cudnn
     if(NOT CUDNN_HOME)
       set(CUDNN_HOME ${CUDA_TOOLKIT_TARGET_DIR})
@@ -238,18 +286,48 @@ if (tensorflow_ENABLE_GPU)
     include_directories(${CUDNN_HOME})
     set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES}
       ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${CUDNN_HOME}/lib/x64/cudnn.lib)
+  else (WIN32)
+    set(CUDNN_INCLUDE "${tensorflow_CUDNN_INCLUDE}")
 
-    # create cuda_config.h
-    FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
-      "#ifndef CUDA_CUDA_CONFIG_H_\n"
-      "#define CUDA_CUDA_CONFIG_H_\n"
-      "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
-      "#define TF_CUDA_VERSION \"64_80\"\n"
-      "#define TF_CUDNN_VERSION \"64_6\"\n"
-      "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
-      "#endif  // CUDA_CUDA_CONFIG_H_\n"
-    )
+    find_library(nccl_STATIC_LIBRARY NAMES libnccl_static.a PATHS ${tensorflow_PATH_NCCL_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR})
+    if (NOT nccl_STATIC_LIBRARY)
+      message(FATAL_ERROR "NCCL is required for GPU-build")
+    else (NOT nccl_STATIC_LIBRARY)
+      message("nccl-static: ${nccl_STATIC_LIBRARY}")
+      # something like /usr/lib64/libnccl_static.a
+    endif (NOT nccl_STATIC_LIBRARY)
+
+    find_library(cudnn_STATIC_LIBRARY NAMES libcudnn_static.a PATHS ${tensorflow_PATH_CUDNN_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR})
+    if (NOT cudnn_STATIC_LIBRARY)
+      message(FATAL_ERROR "CUDNN is required for GPU-build")
+    else (NOT cudnn_STATIC_LIBRARY)
+      message("cudnn-static: ${cudnn_STATIC_LIBRARY}")
+    endif (NOT cudnn_STATIC_LIBRARY)
+
+    find_library(culibos_STATIC_LIBRARY NAMES libculibos.a PATHS ${tensorflow_PATH_STATIC_LIB} ${CUDA_TOOLKIT_ROOT_DIR})
+    if (NOT culibos_STATIC_LIBRARY)
+      message(FATAL_ERROR "CULIBOS is required for GPU-build")
+    else (NOT culibos_STATIC_LIBRARY)
+      message("culibos-static: ${culibos_STATIC_LIBRARY}")
+    endif (NOT culibos_STATIC_LIBRARY)
+
+    include_directories(${CUDNN_INCLUDE})
+    set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES}
+      ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY})
+  endif (WIN32)
+
+  # create cuda_config.h
+  FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
+    "#ifndef CUDA_CUDA_CONFIG_H_\n"
+    "#define CUDA_CUDA_CONFIG_H_\n"
+    "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
+    "#define TF_CUDA_VERSION \"64_80\"\n"
+    "#define TF_CUDNN_VERSION \"64_6\"\n"
+    "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
+    "#endif  // CUDA_CUDA_CONFIG_H_\n"
+  )
 
+  if (WIN32)
     # tf assumes in various places header files to be in cuda/include. On windows the cuda sdk
     # installs them under cuda/version/include and to avoid that we need to change tf we copy a
     # few files to cuda/include
@@ -261,12 +339,25 @@ if (tensorflow_ENABLE_GPU)
       ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h
       DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
     )
-    include_directories(${tensorflow_source_dir}/third_party/gpus)
-    # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
-    list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
+  else(WIN32)
+    # Linux has slightly differnt install paths than Windows
+    FILE(COPY
+      ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h
+      ${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_INCLUDE}/cudnn.h
+      ${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h
+      ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda_runtime_api.h
+      ${CUDA_TOOLKIT_TARGET_DIR}/include/cusolverDn.h
+      DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
+    )
+  endif(WIN32)
 
-    # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used
-    # in the default build is upgraded.
+  include_directories(${tensorflow_source_dir}/third_party/gpus)
+  # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
+  list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
+
+  # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used
+  # in the default build is upgraded.
+  if(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
       msvcp_dll_name=msvcp140.dll
       cudart_dll_name=cudart64_80.dll
@@ -275,7 +366,9 @@ if (tensorflow_ENABLE_GPU)
       cudnn_dll_name=cudnn64_6.dll
       cudnn_version_number=6)
   else(WIN32)
-    message(FATAL_ERROR "CMake GPU build is currently only supported on Windows.")
+    set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
+      cuda_version_number=8.0
+      cudnn_version_number=6)
   endif(WIN32)
 else(tensorflow_ENABLE_GPU)
   set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value
@@ -293,9 +386,7 @@ include(tf_core_framework.cmake)
 # NOTE: Disabled until issue #3996 is fixed.
 # include(tf_stream_executor.cmake)
 if (tensorflow_ENABLE_GPU)
-  if (WIN32)
     include(tf_stream_executor.cmake)
-  endif()
 endif()
 
 include(tf_core_cpu.cmake)
diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake
index dc27eadaca..cca8444e2a 100644
--- a/tensorflow/contrib/cmake/external/boringssl.cmake
+++ b/tensorflow/contrib/cmake/external/boringssl.cmake
@@ -39,8 +39,12 @@ ExternalProject_Add(boringssl
     # BUILD_IN_SOURCE 1
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
+        if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+        	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+        else()
+        	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+        endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake
index 5127d7e8f7..d2ae4c76e8 100644
--- a/tensorflow/contrib/cmake/external/jsoncpp.cmake
+++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake
@@ -42,8 +42,12 @@ ExternalProject_Add(jsoncpp
     BUILD_IN_SOURCE 1
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
+  	  if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+  	      -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+  	  else()
+   	    	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+   	 endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
diff --git a/tensorflow/contrib/cmake/external/lmdb.cmake b/tensorflow/contrib/cmake/external/lmdb.cmake
index 79971b7cfc..e41384f023 100644
--- a/tensorflow/contrib/cmake/external/lmdb.cmake
+++ b/tensorflow/contrib/cmake/external/lmdb.cmake
@@ -29,10 +29,14 @@ ExternalProject_Add(lmdb
     INSTALL_DIR ${lmdb_INSTALL}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DCMAKE_INSTALL_PREFIX:STRING=${lmdb_INSTALL}
-    -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 if(WIN32)
diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake
index 2b2bd47d1c..aad6618f52 100644
--- a/tensorflow/contrib/cmake/external/png.cmake
+++ b/tensorflow/contrib/cmake/external/png.cmake
@@ -41,10 +41,14 @@ ExternalProject_Add(png
     INSTALL_DIR ${png_INSTALL}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DCMAKE_INSTALL_PREFIX:STRING=${png_INSTALL}
-	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 	-DZLIB_ROOT:STRING=${ZLIB_INSTALL}
 )
 
diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index 1e300e21df..b53857a47b 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -44,8 +44,12 @@ ExternalProject_Add(protobuf
         ${PROTOBUF_ADDITIONAL_CMAKE_OPTIONS}
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
         -DZLIB_ROOT:STRING=${ZLIB_INSTALL}
 )
diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake
index cb4ec9c2de..b56f4b0898 100644
--- a/tensorflow/contrib/cmake/external/re2.cmake
+++ b/tensorflow/contrib/cmake/external/re2.cmake
@@ -38,7 +38,11 @@ ExternalProject_Add(re2
     BUILD_IN_SOURCE 1
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL}
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-)
\ No newline at end of file
+)
diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake
index 2d2451521c..926c271fd9 100644
--- a/tensorflow/contrib/cmake/external/snappy.cmake
+++ b/tensorflow/contrib/cmake/external/snappy.cmake
@@ -40,11 +40,15 @@ ExternalProject_Add(snappy
     LOG_CONFIGURE ON
     LOG_BUILD ON
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DSNAPPY_BUILD_TESTS:BOOL=OFF
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 # actually enables snappy in the source code
-add_definitions(-DTF_USE_SNAPPY)
+add_definitions(-DTF_USE_SNAPPY)
\ No newline at end of file
diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake
index 1770dcb1fd..785039a469 100644
--- a/tensorflow/contrib/cmake/external/sqlite.cmake
+++ b/tensorflow/contrib/cmake/external/sqlite.cmake
@@ -53,9 +53,13 @@ else()
         INSTALL_DIR ${sqlite_INSTALL}
         DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
         CMAKE_CACHE_ARGS
+			if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+				-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+			else()
+				-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+			endif()
             -DCMAKE_BUILD_TYPE:STRING=Release
             -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-            -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
             -DCMAKE_INSTALL_PREFIX:STRING=${sqlite_INSTALL}
     )
 
diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake
index c8af611e1e..f10f84336e 100644
--- a/tensorflow/contrib/cmake/external/zlib.cmake
+++ b/tensorflow/contrib/cmake/external/zlib.cmake
@@ -42,9 +42,13 @@ ExternalProject_Add(zlib
     BUILD_IN_SOURCE 1
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
+		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+		else()
+			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
+		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_INSTALL_PREFIX:STRING=${ZLIB_INSTALL}
-	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 # put zlib includes in the directory where they are expected
diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake
index 45eeb11062..6e2ac203f9 100644
--- a/tensorflow/contrib/cmake/tf_cc_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake
@@ -148,7 +148,11 @@ list(REMOVE_ITEM tf_cc_srcs ${tf_cc_test_srcs})
 add_library(tf_cc OBJECT ${tf_cc_srcs})
 add_dependencies(tf_cc tf_cc_framework tf_cc_ops)
 
-set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib")
+if (WIN32)
+  set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib")
+else (WIN32)
+  set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so")
+endif (WIN32)
 add_custom_target(tf_extension_ops)
 
 function(AddUserOps)
@@ -164,15 +168,13 @@ function(AddUserOps)
   # create shared library from source and cuda obj
   add_library(${_AT_TARGET} SHARED ${_AT_SOURCES} ${gpu_lib})
   target_link_libraries(${_AT_TARGET} ${pywrap_tensorflow_lib})
-  if(WIN32)
-    if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
-        # some ops call out to cuda directly; need to link libs for the cuda dlls
-        target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES})
-    endif()
-    if (_AT_DISTCOPY)
-        add_custom_command(TARGET ${_AT_TARGET} POST_BUILD
-            COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${_AT_TARGET}> ${_AT_DISTCOPY}/)
-    endif()
+  if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
+      # some ops call out to cuda directly; need to link libs for the cuda dlls
+      target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES})
+  endif()
+  if (_AT_DISTCOPY)
+      add_custom_command(TARGET ${_AT_TARGET} POST_BUILD
+          COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${_AT_TARGET}> ${_AT_DISTCOPY}/)
   endif()
   if (_AT_DEPENDS)
     add_dependencies(${_AT_TARGET} ${_AT_DEPENDS})
@@ -180,9 +182,19 @@ function(AddUserOps)
   # make sure TF_COMPILE_LIBRARY is not defined for this target
   get_target_property(target_compile_flags  ${_AT_TARGET} COMPILE_FLAGS)
   if(target_compile_flags STREQUAL "target_compile_flags-NOTFOUND")
-    set(target_compile_flags "/UTF_COMPILE_LIBRARY")
+    if (WIN32)
+      set(target_compile_flags "/UTF_COMPILE_LIBRARY")
+    else (WIN32)
+      # gcc uses UTF as default
+      set(target_compile_flags "-finput-charset=UTF-8")
+    endif (WIN32)
   else()
-    set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY")
+    if (WIN32)
+      set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY")
+    else (WIN32)
+      # gcc uses UTF as default
+      set(target_compile_flags "${target_compile_flags} -finput-charset=UTF-8")
+    endif (WIN32)
   endif()
   set_target_properties(${_AT_TARGET} PROPERTIES COMPILE_FLAGS ${target_compile_flags})
   add_dependencies(tf_extension_ops ${_AT_TARGET})
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index d6b8990664..2d015908a8 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -179,6 +179,7 @@ file(GLOB_RECURSE tf_core_gpu_kernels_srcs
     "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/*.cu.cc"
     "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
     "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/*.cu.cc"
+    "${tensorflow_source_dir}/tensorflow/contrib/resampler/kernels/*.cu.cc"
 )
 
 if(WIN32 AND tensorflow_ENABLE_GPU)
@@ -202,16 +203,16 @@ endif(WIN32 AND tensorflow_ENABLE_GPU)
 add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
 add_dependencies(tf_core_kernels tf_core_cpu)
 
-if(WIN32)
+if (WIN32)
   target_compile_options(tf_core_kernels PRIVATE /MP)
-  if (tensorflow_ENABLE_GPU)
-    set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
-    set(tf_core_gpu_kernels_lib tf_core_gpu_kernels)
-    cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs})
-    set_target_properties(${tf_core_gpu_kernels_lib}
-                          PROPERTIES DEBUG_POSTFIX ""
-                          COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}"
-    )
-    add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu)
-  endif()
+endif (WIN32)
+if (tensorflow_ENABLE_GPU)
+  set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+  set(tf_core_gpu_kernels_lib tf_core_gpu_kernels)
+  cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs})
+  set_target_properties(${tf_core_gpu_kernels_lib}
+                        PROPERTIES DEBUG_POSTFIX ""
+                        COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}"
+  )
+  add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu)
 endif()
diff --git a/tensorflow/contrib/cmake/tf_label_image_example.cmake b/tensorflow/contrib/cmake/tf_label_image_example.cmake
index 0d3a4699eb..7f2f60b089 100644
--- a/tensorflow/contrib/cmake/tf_label_image_example.cmake
+++ b/tensorflow/contrib/cmake/tf_label_image_example.cmake
@@ -34,3 +34,8 @@ target_link_libraries(tf_label_image_example PUBLIC
     ${tf_core_gpu_kernels_lib}
     ${tensorflow_EXTERNAL_LIBRARIES}
 )
+
+install(TARGETS tf_label_image_example
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib)
\ No newline at end of file
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 9b863f7bc6..61b3fd715d 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -715,6 +715,9 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
       set(require_shape_fn 1)
     endif()
 
+    get_filename_component(GENERATE_PYTHON_OP_LIB_MKDIRPATH ${GENERATE_PYTHON_OP_LIB_DESTINATION} PATH)
+    file(MAKE_DIRECTORY ${GENERATE_PYTHON_OP_LIB_MKDIRPATH})
+
     # Create a C++ executable that links in the appropriate op
     # registrations and generates Python wrapper code based on the
     # registered ops.
@@ -743,6 +746,7 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
         ${GENERATE_PYTHON_OP_LIB_DESTINATION} PARENT_SCOPE)
 endfunction()
 
+GENERATE_PYTHON_OP_LIB("audio_ops")
 GENERATE_PYTHON_OP_LIB("array_ops")
 GENERATE_PYTHON_OP_LIB("bitwise_ops")
 GENERATE_PYTHON_OP_LIB("math_ops")
@@ -987,7 +991,7 @@ add_library(pywrap_tensorflow_internal SHARED
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
     ${pywrap_tensorflow_deffile}
 )
@@ -1063,25 +1067,23 @@ if(WIN32)
         DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/)
 endif(WIN32)
 
-if(WIN32)
-    # include contrib/seq2seq as .so
-    #
-    set(tf_beam_search_srcs
-        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc"
-        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h"
-        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc"
-    )
+# include contrib/seq2seq as .so
+#
+set(tf_beam_search_srcs
+    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc"
+    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops.h"
+    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/ops/beam_search_ops.cc"
+)
 
-    set(tf_beam_search_gpu_srcs
-        "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc"
-    )
+set(tf_beam_search_gpu_srcs
+    "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/beam_search_ops_gpu.cu.cc"
+)
 
-    AddUserOps(TARGET _beam_search_ops
-        SOURCES "${tf_beam_search_srcs}"
-        GPUSOURCES ${tf_beam_search_gpu_srcs}
-        DEPENDS pywrap_tensorflow_internal tf_python_ops
-        DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/)
-endif(WIN32)
+AddUserOps(TARGET _beam_search_ops
+    SOURCES "${tf_beam_search_srcs}"
+    GPUSOURCES ${tf_beam_search_gpu_srcs}
+    DEPENDS pywrap_tensorflow_internal tf_python_ops
+    DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/seq2seq/python/ops/)
 
 ############################################################
 # Build a PIP package containing the TensorFlow runtime.
diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake
index 9bf45bab30..3e3fe0cdfa 100644
--- a/tensorflow/contrib/cmake/tf_shared_lib.cmake
+++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake
@@ -73,7 +73,7 @@ add_library(tensorflow SHARED
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
     ${tensorflow_deffile}
 )
@@ -94,3 +94,46 @@ endif()
 if(WIN32)
   add_dependencies(tensorflow tensorflow_static)
 endif(WIN32)
+
+install(TARGETS tensorflow
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib)
+
+# install necessary headers
+# tensorflow headers
+install(DIRECTORY ${tensorflow_source_dir}/tensorflow/cc/
+        DESTINATION include/tensorflow/cc
+        FILES_MATCHING PATTERN "*.h")
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/cc/
+        DESTINATION include/tensorflow/cc
+        FILES_MATCHING PATTERN "*.h")
+install(DIRECTORY ${tensorflow_source_dir}/tensorflow/core/
+        DESTINATION include/tensorflow/core
+        FILES_MATCHING PATTERN "*.h")
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tensorflow/core/
+        DESTINATION include/tensorflow/core
+        FILES_MATCHING PATTERN "*.h")
+install(DIRECTORY ${tensorflow_source_dir}/tensorflow/stream_executor/
+        DESTINATION include/tensorflow/stream_executor
+        FILES_MATCHING PATTERN "*.h")
+# google protobuf headers
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src/google/
+        DESTINATION include/google
+        FILES_MATCHING PATTERN "*.h")
+# nsync headers
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/
+        DESTINATION include/external/nsync
+        FILES_MATCHING PATTERN "*.h")
+# Eigen directory
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/Eigen/
+        DESTINATION include/Eigen)
+# external directory
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive/
+        DESTINATION include/external/eigen_archive)
+# third_party eigen directory
+install(DIRECTORY ${tensorflow_source_dir}/third_party/eigen3/
+        DESTINATION include/third_party/eigen3)
+# unsupported Eigen directory
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/
+        DESTINATION include/unsupported/Eigen)
diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake
index 3d84f1ebb9..8d95f0d3e8 100644
--- a/tensorflow/contrib/cmake/tf_stream_executor.cmake
+++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake
@@ -74,6 +74,9 @@ endif()
 #)
 #list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) 
 
+if (NOT WIN32)
+  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lgomp")
+endif (NOT WIN32)
 add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs})
 
 add_dependencies(tf_stream_executor
diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake
index 6ef9598963..cb58a2e7df 100644
--- a/tensorflow/contrib/cmake/tf_tools.cmake
+++ b/tensorflow/contrib/cmake/tf_tools.cmake
@@ -73,7 +73,7 @@ add_executable(${transform_graph}
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -95,7 +95,7 @@ add_executable(${summarize_graph}
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -117,7 +117,7 @@ add_executable(${compare_graphs}
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -138,7 +138,7 @@ add_executable(${benchmark_model}
     $<TARGET_OBJECTS:tf_core_ops>
     $<TARGET_OBJECTS:tf_core_direct_session>
     $<TARGET_OBJECTS:tf_core_kernels>
-    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+    $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<$<BOOL:${BOOL_WIN32}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>>
     $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
 )
 
@@ -147,3 +147,8 @@ target_link_libraries(${benchmark_model} PUBLIC
   ${tf_core_gpu_kernels_lib}
   ${tensorflow_EXTERNAL_LIBRARIES}
 )
+
+install(TARGETS ${transform_graph} ${summarize_graph} ${compare_graphs} ${benchmark_model}
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib)
diff --git a/tensorflow/contrib/cmake/tf_tutorials.cmake b/tensorflow/contrib/cmake/tf_tutorials.cmake
index 858e7dda92..e63fccc181 100644
--- a/tensorflow/contrib/cmake/tf_tutorials.cmake
+++ b/tensorflow/contrib/cmake/tf_tutorials.cmake
@@ -34,3 +34,8 @@ target_link_libraries(tf_tutorials_example_trainer PUBLIC
     ${tf_core_gpu_kernels_lib}
     ${tensorflow_EXTERNAL_LIBRARIES}
 )
+
+install(TARGETS tf_tutorials_example_trainer
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib)
diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py
index ca384226d4..ec395e41d0 100644
--- a/tensorflow/contrib/crf/python/ops/crf.py
+++ b/tensorflow/contrib/crf/python/ops/crf.py
@@ -395,8 +395,8 @@ class CrfDecodeForwardRnnCell(rnn_cell.RNNCell):
       scope: Unused variable scope of this cell.
 
     Returns:
-      backpointers: [batch_size, num_tags], containing backpointers.
-      new_state: [batch_size, num_tags], containing new score values.
+      backpointers: A [batch_size, num_tags] matrix of backpointers.
+      new_state: A [batch_size, num_tags] matrix of new score values.
     """
     # For simplicity, in shape comments, denote:
     # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
@@ -436,8 +436,9 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell):
     """Build the CrfDecodeBackwardRnnCell.
 
     Args:
-      inputs: [batch_size, num_tags], backpointer of next step (in time order).
-      state: [batch_size, 1], next position's tag index.
+      inputs: A [batch_size, num_tags] matrix of
+            backpointer of next step (in time order).
+      state: A [batch_size, 1] matrix of tag index of next step.
       scope: Unused variable scope of this cell.
 
     Returns:
@@ -461,16 +462,16 @@ def crf_decode(potentials, transition_params, sequence_length):
   This is a function for tensor.
 
   Args:
-    potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of
+    potentials: A [batch_size, max_seq_len, num_tags] tensor of
               unary potentials.
-    transition_params: A [num_tags, num_tags] tensor, matrix of
+    transition_params: A [num_tags, num_tags] matrix of
               binary potentials.
-    sequence_length: A [batch_size] tensor, containing sequence lengths.
+    sequence_length: A [batch_size] vector of true sequence lengths.
 
   Returns:
-    decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32.
+    decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
                 Contains the highest scoring tag indices.
-    best_score: A [batch_size] tensor, containing the score of decode_tags.
+    best_score: A [batch_size] vector, containing the score of `decode_tags`.
   """
   # If max_seq_len is 1, we skip the algorithm and simply return the argmax tag
   # and the max activation.
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 0697fbdec1..995ce6d654 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -11,6 +11,7 @@ py_test(
     size = "small",
     srcs = ["batch_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -373,6 +374,7 @@ py_test(
     size = "small",
     srcs = ["sequence_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -450,6 +452,7 @@ py_test(
     size = "small",
     srcs = ["zip_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -466,7 +469,10 @@ py_test(
     size = "small",
     srcs = ["prefetching_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_oss"],  # b/68785503
+    tags = [
+        "manual",
+        "no_oss",  # b/68785503
+    ],
     deps = [
         "//tensorflow/contrib/data/python/ops:prefetching_py",
         "//tensorflow/core:protos_all_py",
diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 2dc8ad9483..145b9495ff 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -140,6 +140,23 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "cauchy_test",
+    size = "medium",
+    srcs = ["python/kernel_tests/cauchy_test.py"],
+    additional_deps = [
+        ":distributions_py",
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
+    ],
+)
+
 cuda_py_test(
     name = "chi2_test",
     srcs = ["python/kernel_tests/chi2_test.py"],
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 16f6533e57..0d12d83893 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -24,6 +24,7 @@ from __future__ import print_function
 
 from tensorflow.contrib.distributions.python.ops import bijectors
 from tensorflow.contrib.distributions.python.ops.binomial import *
+from tensorflow.contrib.distributions.python.ops.cauchy import *
 from tensorflow.contrib.distributions.python.ops.chi2 import *
 from tensorflow.contrib.distributions.python.ops.conditional_distribution import *
 from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import *
@@ -83,6 +84,7 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
     'bijectors',
+    'Cauchy',
     'ConditionalDistribution',
     'ConditionalTransformedDistribution',
     'FULLY_REPARAMETERIZED',
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
new file mode 100644
index 0000000000..73747db31c
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
@@ -0,0 +1,438 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Cauchy."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import importlib
+import numpy as np
+
+from tensorflow.contrib.distributions.python.ops import cauchy as cauchy_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+
+
+def try_import(name):  # pylint: disable=invalid-name
+  module = None
+  try:
+    module = importlib.import_module(name)
+  except ImportError as e:
+    tf_logging.warning("Could not import %s: %s" % (name, str(e)))
+  return module
+
+
+stats = try_import("scipy.stats")
+
+
+class CauchyTest(test.TestCase):
+
+  def setUp(self):
+    self._rng = np.random.RandomState(123)
+
+  def assertAllFinite(self, tensor):
+    is_finite = np.isfinite(tensor.eval())
+    all_true = np.ones_like(is_finite, dtype=np.bool)
+    self.assertAllEqual(all_true, is_finite)
+
+  def _testParamShapes(self, sample_shape, expected):
+    with self.test_session():
+      param_shapes = cauchy_lib.Cauchy.param_shapes(sample_shape)
+      loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"]
+      self.assertAllEqual(expected, loc_shape.eval())
+      self.assertAllEqual(expected, scale_shape.eval())
+      loc = array_ops.zeros(loc_shape)
+      scale = array_ops.ones(scale_shape)
+      self.assertAllEqual(expected,
+                          array_ops.shape(
+                              cauchy_lib.Cauchy(loc, scale).sample()).eval())
+
+  def _testParamStaticShapes(self, sample_shape, expected):
+    param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape)
+    loc_shape, scale_shape = param_shapes["loc"], param_shapes["scale"]
+    self.assertEqual(expected, loc_shape)
+    self.assertEqual(expected, scale_shape)
+
+  def testParamShapes(self):
+    sample_shape = [10, 3, 4]
+    self._testParamShapes(sample_shape, sample_shape)
+    self._testParamShapes(constant_op.constant(sample_shape), sample_shape)
+
+  def testParamStaticShapes(self):
+    sample_shape = [10, 3, 4]
+    self._testParamStaticShapes(sample_shape, sample_shape)
+    self._testParamStaticShapes(
+        tensor_shape.TensorShape(sample_shape), sample_shape)
+
+  def testCauchyLogPDF(self):
+    with self.test_session():
+      batch_size = 6
+      loc = constant_op.constant([3.0] * batch_size)
+      scale = constant_op.constant([np.sqrt(10.0)] * batch_size)
+      x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32)
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      log_pdf = cauchy.log_prob(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          log_pdf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape)
+
+      pdf = cauchy.prob(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, pdf.eval().shape)
+
+      if not stats:
+        return
+      expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x)
+      self.assertAllClose(expected_log_pdf, log_pdf.eval())
+      self.assertAllClose(np.exp(expected_log_pdf), pdf.eval())
+
+  def testCauchyLogPDFMultidimensional(self):
+    with self.test_session():
+      batch_size = 6
+      loc = constant_op.constant([[3.0, -3.0]] * batch_size)
+      scale = constant_op.constant(
+          [[np.sqrt(10.0), np.sqrt(15.0)]] * batch_size)
+      x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      log_pdf = cauchy.log_prob(x)
+      log_pdf_values = log_pdf.eval()
+      self.assertEqual(log_pdf.shape, (6, 2))
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          log_pdf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, log_pdf.eval().shape)
+
+      pdf = cauchy.prob(x)
+      pdf_values = pdf.eval()
+      self.assertEqual(pdf.shape, (6, 2))
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), pdf_values.shape)
+      self.assertAllEqual(cauchy.batch_shape, pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, pdf_values.shape)
+
+      if not stats:
+        return
+      expected_log_pdf = stats.cauchy(loc.eval(), scale.eval()).logpdf(x)
+      self.assertAllClose(expected_log_pdf, log_pdf_values)
+      self.assertAllClose(np.exp(expected_log_pdf), pdf_values)
+
+  def testCauchyCDF(self):
+    with self.test_session():
+      batch_size = 50
+      loc = self._rng.randn(batch_size)
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+      cdf = cauchy.cdf(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, cdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape)
+      if not stats:
+        return
+      expected_cdf = stats.cauchy(loc, scale).cdf(x)
+      self.assertAllClose(expected_cdf, cdf.eval(), atol=0)
+
+  def testCauchySurvivalFunction(self):
+    with self.test_session():
+      batch_size = 50
+      loc = self._rng.randn(batch_size)
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      sf = cauchy.survival_function(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, sf.shape)
+      self.assertAllEqual(cauchy.batch_shape, sf.eval().shape)
+      if not stats:
+        return
+      expected_sf = stats.cauchy(loc, scale).sf(x)
+      self.assertAllClose(expected_sf, sf.eval(), atol=0)
+
+  def testCauchyLogCDF(self):
+    with self.test_session():
+      batch_size = 50
+      loc = self._rng.randn(batch_size)
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-100.0, 10.0, batch_size).astype(np.float64)
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      cdf = cauchy.log_cdf(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), cdf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, cdf.shape)
+      self.assertAllEqual(cauchy.batch_shape, cdf.eval().shape)
+
+      if not stats:
+        return
+      expected_cdf = stats.cauchy(loc, scale).logcdf(x)
+      self.assertAllClose(expected_cdf, cdf.eval(), atol=0, rtol=1e-5)
+
+  def testFiniteGradientAtDifficultPoints(self):
+    for dtype in [np.float32, np.float64]:
+      g = ops.Graph()
+      with g.as_default():
+        loc = variables.Variable(dtype(0.0))
+        scale = variables.Variable(dtype(1.0))
+        dist = cauchy_lib.Cauchy(loc=loc, scale=scale)
+        x = np.array([-100., -20., -5., 0., 5., 20., 100.]).astype(dtype)
+        for func in [
+            dist.cdf, dist.log_cdf, dist.survival_function,
+            dist.log_survival_function, dist.log_prob, dist.prob
+        ]:
+          value = func(x)
+          grads = gradients_impl.gradients(value, [loc, scale])
+          with self.test_session(graph=g):
+            variables.global_variables_initializer().run()
+            self.assertAllFinite(value)
+            self.assertAllFinite(grads[0])
+            self.assertAllFinite(grads[1])
+
+  def testCauchyLogSurvivalFunction(self):
+    with self.test_session():
+      batch_size = 50
+      loc = self._rng.randn(batch_size)
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-10.0, 100.0, batch_size).astype(np.float64)
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      sf = cauchy.log_survival_function(x)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), sf.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, sf.shape)
+      self.assertAllEqual(cauchy.batch_shape, sf.eval().shape)
+
+      if not stats:
+        return
+      expected_sf = stats.cauchy(loc, scale).logsf(x)
+      self.assertAllClose(expected_sf, sf.eval(), atol=0, rtol=1e-5)
+
+  def testCauchyEntropy(self):
+    with self.test_session():
+      loc = np.array([1.0, 1.0, 1.0])
+      scale = np.array([[1.0, 2.0, 3.0]])
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      entropy = cauchy.entropy()
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          entropy.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, entropy.shape)
+      self.assertAllEqual(cauchy.batch_shape, entropy.eval().shape)
+
+      if not stats:
+        return
+      expected_entropy = stats.cauchy(loc, scale[0]).entropy().reshape((1, 3))
+      self.assertAllClose(expected_entropy, entropy.eval())
+
+  def testCauchyMode(self):
+    with self.test_session():
+      # Mu will be broadcast to [7, 7, 7].
+      loc = [7.]
+      scale = [11., 12., 13.]
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      self.assertAllEqual((3,), cauchy.mode().shape)
+      self.assertAllEqual([7., 7, 7], cauchy.mode().eval())
+
+  def testCauchyMean(self):
+    with self.test_session():
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      self.assertAllEqual((3,), cauchy.mean().shape)
+      self.assertAllEqual([np.nan] * 3, cauchy.mean().eval())
+
+  def testCauchyNanMean(self):
+    with self.test_session():
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False)
+
+      with self.assertRaises(ValueError):
+        cauchy.mean().eval()
+
+  def testCauchyQuantile(self):
+    with self.test_session():
+      batch_size = 50
+      loc = self._rng.randn(batch_size)
+      scale = self._rng.rand(batch_size) + 1.0
+      p = np.linspace(0.000001, 0.999999, batch_size).astype(np.float64)
+
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+      x = cauchy.quantile(p)
+
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), x.eval().shape)
+      self.assertAllEqual(cauchy.batch_shape, x.shape)
+      self.assertAllEqual(cauchy.batch_shape, x.eval().shape)
+
+      if not stats:
+        return
+      expected_x = stats.cauchy(loc, scale).ppf(p)
+      self.assertAllClose(expected_x, x.eval(), atol=0.)
+
+  def testCauchyVariance(self):
+    with self.test_session():
+      # scale will be broadcast to [7, 7, 7]
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      self.assertAllEqual((3,), cauchy.variance().shape)
+      self.assertAllEqual([np.nan] * 3, cauchy.variance().eval())
+
+  def testCauchyNanVariance(self):
+    with self.test_session():
+      # scale will be broadcast to [7, 7, 7]
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False)
+
+      with self.assertRaises(ValueError):
+        cauchy.variance().eval()
+
+  def testCauchyStandardDeviation(self):
+    with self.test_session():
+      # scale will be broadcast to [7, 7, 7]
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      self.assertAllEqual((3,), cauchy.stddev().shape)
+      self.assertAllEqual([np.nan] * 3, cauchy.stddev().eval())
+
+  def testCauchyNanStandardDeviation(self):
+    with self.test_session():
+      # scale will be broadcast to [7, 7, 7]
+      loc = [1., 2., 3.]
+      scale = [7.]
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale, allow_nan_stats=False)
+
+      with self.assertRaises(ValueError):
+        cauchy.stddev().eval()
+
+  def testCauchySample(self):
+    with self.test_session():
+      loc = constant_op.constant(3.0)
+      scale = constant_op.constant(1.0)
+      loc_v = 3.0
+      n = constant_op.constant(100000)
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+      samples = cauchy.sample(n)
+      sample_values = samples.eval()
+
+      self.assertEqual(sample_values.shape, (100000,))
+      self.assertAllClose(np.median(sample_values), loc_v, atol=1e-1)
+
+      expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
+          tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval()))
+
+      self.assertAllEqual(expected_shape, samples.shape)
+      self.assertAllEqual(expected_shape, sample_values.shape)
+
+      expected_shape = (
+          tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape))
+
+      self.assertAllEqual(expected_shape, samples.shape)
+      self.assertAllEqual(expected_shape, sample_values.shape)
+
+  def testCauchySampleMultiDimensional(self):
+    with self.test_session():
+      batch_size = 2
+      loc = constant_op.constant([[3.0, -3.0]] * batch_size)
+      scale = constant_op.constant([[0.5, 1.0]] * batch_size)
+      loc_v = [3.0, -3.0]
+      n = constant_op.constant(100000)
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+      samples = cauchy.sample(n)
+      sample_values = samples.eval()
+      self.assertEqual(samples.shape, (100000, batch_size, 2))
+      self.assertAllClose(
+          np.median(sample_values[:, 0, 0]), loc_v[0], atol=1e-1)
+      self.assertAllClose(
+          np.median(sample_values[:, 0, 1]), loc_v[1], atol=1e-1)
+
+      expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
+          tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval()))
+      self.assertAllEqual(expected_shape, samples.shape)
+      self.assertAllEqual(expected_shape, sample_values.shape)
+
+      expected_shape = (
+          tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape))
+      self.assertAllEqual(expected_shape, samples.shape)
+      self.assertAllEqual(expected_shape, sample_values.shape)
+
+  def testCauchyNegativeLocFails(self):
+    with self.test_session():
+      cauchy = cauchy_lib.Cauchy(loc=[1.], scale=[-5.], validate_args=True)
+      with self.assertRaisesOpError("Condition x > 0 did not hold"):
+        cauchy.mode().eval()
+
+  def testCauchyShape(self):
+    with self.test_session():
+      loc = constant_op.constant([-3.0] * 5)
+      scale = constant_op.constant(11.0)
+      cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+      self.assertEqual(cauchy.batch_shape_tensor().eval(), [5])
+      self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape([5]))
+      self.assertAllEqual(cauchy.event_shape_tensor().eval(), [])
+      self.assertEqual(cauchy.event_shape, tensor_shape.TensorShape([]))
+
+  def testCauchyShapeWithPlaceholders(self):
+    loc = array_ops.placeholder(dtype=dtypes.float32)
+    scale = array_ops.placeholder(dtype=dtypes.float32)
+    cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
+
+    with self.test_session() as sess:
+      # get_batch_shape should return an "<unknown>" tensor.
+      self.assertEqual(cauchy.batch_shape, tensor_shape.TensorShape(None))
+      self.assertEqual(cauchy.event_shape, ())
+      self.assertAllEqual(cauchy.event_shape_tensor().eval(), [])
+      self.assertAllEqual(
+          sess.run(
+              cauchy.batch_shape_tensor(),
+              feed_dict={
+                  loc: 5.0,
+                  scale: [1.0, 2.0]
+              }), [2])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py
new file mode 100644
index 0000000000..8d59c1abfb
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/ops/cauchy.py
@@ -0,0 +1,219 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The Cauchy distribution class."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops.distributions import distribution
+
+__all__ = [
+    "Cauchy",
+]
+
+
+class Cauchy(distribution.Distribution):
+  """The Cauchy distribution with location `loc` and scale `scale`.
+
+  #### Mathematical details
+
+  The probability density function (pdf) is,
+
+  ```none
+  pdf(x; loc, scale) = 1 / (pi * scale * (1 + ((x - loc) / scale)**2))
+  ```
+  where `loc` is the location, and `scale` is the scale.
+
+  The Cauchy distribution is a member of the [location-scale family](
+  https://en.wikipedia.org/wiki/Location-scale_family), i.e.
+
+  ```none
+  X ~ Cauchy(loc=0, scale=1)
+  Y ~ Cauchy(loc=loc, scale=scale)
+  Y = loc + scale * X
+  ```
+
+  #### Examples
+
+  Examples of initialization of one or a batch of distributions.
+
+  ```python
+  # Define a single scalar Cauchy distribution.
+  dist = Cauchy(loc=0., scale=3.)
+
+  # Evaluate the cdf at 1, returning a scalar.
+  dist.cdf(1.)
+
+  # Define a batch of two scalar valued Cauchy distributions.
+  dist = Cauchy(loc=[1, 2.], scale=[11, 22.])
+
+  # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
+  # returning a length two tensor.
+  dist.prob([0, 1.5])
+
+  # Get 3 samples, returning a 3 x 2 tensor.
+  dist.sample([3])
+  ```
+
+  Arguments are broadcast when possible.
+
+  ```python
+  # Define a batch of two scalar valued Cauchy distributions.
+  # Both have median 1, but different scales.
+  dist = tf.contrib.distributions.Cauchy(loc=1., scale=[11, 22.])
+  # Evaluate the pdf of both distributions on the same point, 3.0,
+  # returning a length 2 tensor.
+  dist.prob(3.0)
+  ```
+  """
+
+  def __init__(self,
+               loc,
+               scale,
+               validate_args=False,
+               allow_nan_stats=True,
+               name="Cauchy"):
+    """Construct Cauchy distributions.
+
+    The parameters `loc` and `scale` must be shaped in a way that supports
+    broadcasting (e.g. `loc + scale` is a valid operation).
+
+    Args:
+      loc: Floating point tensor; the modes of the distribution(s).
+      scale: Floating point tensor; the locations of the distribution(s).
+        Must contain only positive values.
+      validate_args: Python `bool`, default `False`. When `True` distribution
+        parameters are checked for validity despite possibly degrading runtime
+        performance. When `False` invalid inputs may silently render incorrect
+        outputs.
+      allow_nan_stats: Python `bool`, default `True`. When `True`,
+        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
+        indicate the result is undefined. When `False`, an exception is raised
+        if one or more of the statistic's batch members are undefined.
+      name: Python `str` name prefixed to Ops created by this class.
+
+    Raises:
+      TypeError: if `loc` and `scale` have different `dtype`.
+    """
+    parameters = locals()
+    with ops.name_scope(name, values=[loc, scale]):
+      with ops.control_dependencies([check_ops.assert_positive(scale)]
+                                    if validate_args else []):
+        self._loc = array_ops.identity(loc, name="loc")
+        self._scale = array_ops.identity(scale, name="scale")
+        check_ops.assert_same_float_dtype([self._loc, self._scale])
+    super(Cauchy, self).__init__(
+        dtype=self._scale.dtype,
+        reparameterization_type=distribution.FULLY_REPARAMETERIZED,
+        validate_args=validate_args,
+        allow_nan_stats=allow_nan_stats,
+        parameters=parameters,
+        graph_parents=[self._loc, self._scale],
+        name=name)
+
+  @staticmethod
+  def _param_shapes(sample_shape):
+    return dict(
+        zip(("loc", "scale"),
+            ([ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)] * 2)))
+
+  @property
+  def loc(self):
+    """Distribution parameter for the mean."""
+    return self._loc
+
+  @property
+  def scale(self):
+    """Distribution parameter for standard deviation."""
+    return self._scale
+
+  def _batch_shape_tensor(self):
+    return array_ops.broadcast_dynamic_shape(
+        array_ops.shape(self.loc), array_ops.shape(self.scale))
+
+  def _batch_shape(self):
+    return array_ops.broadcast_static_shape(self.loc.shape, self.scale.shape)
+
+  def _event_shape_tensor(self):
+    return constant_op.constant([], dtype=dtypes.int32)
+
+  def _event_shape(self):
+    return tensor_shape.scalar()
+
+  def _sample_n(self, n, seed=None):
+    shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
+    probs = random_ops.random_uniform(
+        shape=shape, minval=0., maxval=1., dtype=self.dtype, seed=seed)
+    return self._quantile(probs)
+
+  def _log_prob(self, x):
+    return self._log_unnormalized_prob(x) - self._log_normalization()
+
+  def _cdf(self, x):
+    return math_ops.atan(self._z(x)) / np.pi + 0.5
+
+  def _log_cdf(self, x):
+    return math_ops.log1p(2 / np.pi * math_ops.atan(self._z(x))) - np.log(2)
+
+  def _log_unnormalized_prob(self, x):
+    return -math_ops.log1p(math_ops.square(self._z(x)))
+
+  def _log_normalization(self):
+    return np.log(np.pi) + math_ops.log(self.scale)
+
+  def _entropy(self):
+    h = np.log(4 * np.pi) + math_ops.log(self.scale)
+    return h * array_ops.ones_like(self.loc)
+
+  def _quantile(self, p):
+    return self.loc + self.scale * math_ops.tan(np.pi * (p - 0.5))
+
+  def _mode(self):
+    return self.loc * array_ops.ones_like(self.scale)
+
+  def _z(self, x):
+    """Standardize input `x`."""
+    with ops.name_scope("standardize", values=[x]):
+      return (x - self.loc) / self.scale
+
+  def _inv_z(self, z):
+    """Reconstruct input `x` from a its normalized version."""
+    with ops.name_scope("reconstruct", values=[z]):
+      return z * self.scale + self.loc
+
+  def _mean(self):
+    if self.allow_nan_stats:
+      return array_ops.fill(self.batch_shape_tensor(),
+                            self.dtype.as_numpy_dtype(np.nan))
+    else:
+      raise ValueError("`mean` is undefined for Cauchy distribution.")
+
+  def _stddev(self):
+    if self.allow_nan_stats:
+      return array_ops.fill(self.batch_shape_tensor(),
+                            self.dtype.as_numpy_dtype(np.nan))
+    else:
+      raise ValueError("`stddev` is undefined for Cauchy distribution.")
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb
index 01616f2e7d..459f2f4a7d 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb
@@ -429,7 +429,9 @@
         "cpu_tensor = tf.random_normal([SIZE, SIZE])\n",
         "\n",
         "if is_gpu_available:\n",
-        "  gpu_tensor = cpu_tensor.gpu()"
+        "  gpu_tensor = cpu_tensor.gpu()\n",
+        "else:\n",
+        "  print(\"GPU not available.\")"
       ]
     },
     {
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb
index 3b7e2cd435..e6c7c11733 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb
@@ -383,7 +383,7 @@
         "\n",
         "`implicit_value_and_gradients()` returns a function that accepts the same inputs as the function passed in, and returns a tuple consisting of:\n",
         "\n",
-        "1. the value returned by the function passed in (in this case, the loss calculated by `calculate_linear_model_loss()`), and\n",
+        "1. the value returned by the function passed in (in this case, the loss calculated by `loss_fn()`), and\n",
         "1. a list of tuples consisting of:\n",
         "  1. The value of the gradient (a `tf.Tensor`) with respect to a given variable\n",
         "  1. The corresponding variable (`tf.Variable`)\n",
@@ -698,7 +698,7 @@
       "source": [
         "## Other Ways to Compute Gradients\n",
         "\n",
-        "Using our loss function as an example (`calculate_linear_model_loss()`), there are several other ways we could compute gradients:\n",
+        "Using our loss function as an example (`loss_fn()`), there are several other ways we could compute gradients:\n",
         "\n",
         "1. `tfe.implicit_gradients()`\n",
         "1. `tfe.gradients_function()`\n",
@@ -841,7 +841,7 @@
         "# tfe.implicit_value_and_gradients() demo\n",
         "value_gradients_fn = tfe.implicit_value_and_gradients(loss_fn)\n",
         "\n",
-        "# Returns only gradients:\n",
+        "# Returns the value returned by the function passed in, gradients, and variables:\n",
         "value_gradients_fn(inputs, labels, wb)"
       ]
     }
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
index ebcc7027c1..0088da5c4b 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb
@@ -9,7 +9,7 @@
       "source": [
         "# Eager Execution Tutorial: Importing Data\n",
         "\n",
-        "This notebook demonstrates the use of the [`tf.contrib.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n",
+        "This notebook demonstrates the use of the [`tf.data.Dataset` API](https://www.tensorflow.org/programmers_guide/datasets) to build pipelines to feed data to your program. It covers:\n",
         "\n",
         "* Creating a `Dataset`.\n",
         "* Iteration over a `Dataset` with eager execution enabled.\n",
@@ -64,7 +64,7 @@
       "source": [
         "# Step 1: Create a source `Dataset`\n",
         "\n",
-        "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information."
+        "Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [Programmer's Guide](https://www.google.com/url?sa=D\u0026q=https%3A%2F%2Fwww.tensorflow.org%2Fprogrammers_guide%2Fdatasets%23reading_input_data) for more information."
       ]
     },
     {
@@ -83,7 +83,7 @@
       },
       "outputs": [],
       "source": [
-        "ds_tensors = tf.contrib.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n",
+        "ds_tensors = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n",
         "\n",
         "# Create a CSV file\n",
         "import tempfile\n",
@@ -93,7 +93,7 @@
         "Line 2\n",
         "Line 3\n",
         "  \"\"\")\n",
-        "ds_file = tf.contrib.data.TextLineDataset(filename)\n"
+        "ds_file = tf.data.TextLineDataset(filename)\n"
       ]
     },
     {
@@ -105,7 +105,7 @@
       "source": [
         "# Step 2: Apply transformations\n",
         "\n",
-        "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.contrib.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/contrib/data/Dataset) for details."
+        "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) for details."
       ]
     },
     {
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 46b3eeae91..f1debc8590 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -286,7 +286,6 @@ def _fused_batch_norm(inputs,
     ValueError: If the rank of `inputs` is neither 2 or 4.
     ValueError: If rank or `C` dimension of `inputs` is undefined.
   """
-  # TODO(reedwm): Add support for fp16 inputs.
   if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
     raise ValueError('data_format has to be either NCHW or NHWC.')
   with variable_scope.variable_scope(
@@ -310,7 +309,6 @@ def _fused_batch_norm(inputs,
         new_shape = [-1, channels, 1, 1]
       inputs = array_ops.reshape(inputs, new_shape)
     inputs_shape = inputs.get_shape()
-    dtype = inputs.dtype.base_dtype
     if data_format == DATA_FORMAT_NHWC:
       params_shape = inputs_shape[-1:]
     else:
@@ -320,9 +318,10 @@ def _fused_batch_norm(inputs,
                        (inputs.name, params_shape))
 
     # Allocate parameters for the beta and gamma of the normalization.
-    trainable_beta = trainable and center
     beta_collections = utils.get_variable_collections(variables_collections,
                                                       'beta')
+    # Float32 required to avoid precision-loss when using fp16 input/output
+    variable_dtype = dtypes.float32
     if not param_initializers:
       param_initializers = {}
     if not param_regularizers:
@@ -336,13 +335,13 @@ def _fused_batch_norm(inputs,
       beta = variables.model_variable(
           'beta',
           shape=params_shape,
-          dtype=dtype,
+          dtype=variable_dtype,
           initializer=beta_initializer,
           regularizer=beta_regularizer,
           collections=beta_collections,
-          trainable=trainable_beta)
+          trainable=trainable)
     else:
-      beta = array_ops.constant(0.0, shape=params_shape)
+      beta = array_ops.constant(0.0, dtype=variable_dtype, shape=params_shape)
 
     if scale:
       gamma_collections = utils.get_variable_collections(
@@ -352,13 +351,13 @@ def _fused_batch_norm(inputs,
       gamma = variables.model_variable(
           'gamma',
           shape=params_shape,
-          dtype=dtype,
+          dtype=variable_dtype,
           initializer=gamma_initializer,
           regularizer=gamma_regularizer,
           collections=gamma_collections,
           trainable=trainable)
     else:
-      gamma = array_ops.constant(1.0, shape=params_shape)
+      gamma = array_ops.constant(1.0, dtype=variable_dtype, shape=params_shape)
 
     # Create moving_mean and moving_variance variables and add them to the
     # appropriate collections. We disable variable partitioning while creating
@@ -375,7 +374,7 @@ def _fused_batch_norm(inputs,
       moving_mean = variables.model_variable(
           'moving_mean',
           shape=params_shape,
-          dtype=dtype,
+          dtype=variable_dtype,
           initializer=moving_mean_initializer,
           trainable=False,
           collections=moving_mean_collections)
@@ -386,7 +385,7 @@ def _fused_batch_norm(inputs,
       moving_variance = variables.model_variable(
           'moving_variance',
           shape=params_shape,
-          dtype=dtype,
+          dtype=variable_dtype,
           initializer=moving_variance_initializer,
           trainable=False,
           collections=moving_variance_collections)
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index ff7f0e4462..27bd3172d6 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1774,10 +1774,13 @@ class BatchNormTest(test.TestCase):
       with self.assertRaisesRegexp(ValueError, 'undefined'):
         _layers.batch_norm(inputs, data_format='NCHW')
 
-  def _testCreateOp(self, fused):
+  def _testCreateOp(self, fused, dtype=None):
+    if dtype is None:
+      dtype = dtypes.float32
     height, width = 3, 3
     with self.test_session():
-      images = np.random.uniform(size=(5, height, width, 3)).astype('f')
+      images = np.random.uniform(size=(5, height, width, 3)).astype(
+          dtype.as_numpy_dtype)
       output = _layers.batch_norm(images, fused=fused)
       expected_name = ('BatchNorm/FusedBatchNorm' if fused else
                        'BatchNorm/batchnorm')
@@ -1792,6 +1795,9 @@ class BatchNormTest(test.TestCase):
   def testCreateOpFused(self):
     self._testCreateOp(True)
 
+  def testCreateOpFusedFloat16(self):
+    self._testCreateOp(True, dtypes.float16)
+
   def _testCreateOpBetaRegularizer(self, fused=True):
     height, width = 3, 3
     with self.test_session():
@@ -2659,10 +2665,63 @@ class BatchNormTest(test.TestCase):
   def testBatchNormBeta(self):
     # Test case for 11673
     with self.test_session() as sess:
-      a = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10))
-      b = _layers.batch_norm(a, center=False, data_format='NCHW',
-                                       zero_debias_moving_mean=True)
+      a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10))
+      _layers.batch_norm(
+          a_32, center=False, data_format='NCHW', zero_debias_moving_mean=True)
+      a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10))
+      _layers.batch_norm(
+          a_16, center=False, data_format='NCHW', zero_debias_moving_mean=True)
+      sess.run(variables_lib.global_variables_initializer())
+
+  def testVariablesAreFloat32(self):
+    height, width = 3, 3
+    with self.test_session():
+      images = random_ops.random_uniform(
+          (5, height, width, 3), seed=1, dtype=dtypes.float16)
+      _layers.batch_norm(images, scale=True)
+      beta = variables.get_variables_by_name('beta')[0]
+      gamma = variables.get_variables_by_name('gamma')[0]
+      self.assertEqual(beta.dtype, dtypes.float32_ref)
+      self.assertEqual(gamma.dtype, dtypes.float32_ref)
+      moving_mean = variables.get_variables_by_name('moving_mean')[0]
+      moving_variance = variables.get_variables_by_name('moving_variance')[0]
+      self.assertEqual(moving_mean.dtype, dtypes.float32_ref)
+      self.assertEqual(moving_variance.dtype, dtypes.float32_ref)
+
+  def _runFusedBatchNorm(self, shape, dtype):
+    channels = shape[1]
+    images = np.arange(np.product(shape), dtype=dtype).reshape(shape)
+    beta = init_ops.constant_initializer(
+        np.arange(2, channels + 2, dtype=np.float32))
+    gamma = init_ops.constant_initializer(
+        np.arange(10, channels + 10, dtype=np.float32) * 2.0)
+    mean = init_ops.constant_initializer(
+        np.arange(3, channels + 3, dtype=np.float32) * 5.0)
+    variance = init_ops.constant_initializer(
+        np.arange(1, channels + 1, dtype=np.float32) * 4.0)
+    output = _layers.batch_norm(
+        images,
+        fused=True,
+        is_training=True,
+        scale=True,
+        epsilon=0.5,
+        param_initializers={
+            'beta': beta,
+            'gamma': gamma,
+            'moving_mean': mean,
+            'moving_variance': variance,
+        },
+        data_format='NCHW')
+    with self.test_session(use_gpu=True) as sess:
       sess.run(variables_lib.global_variables_initializer())
+      return sess.run(output)
+
+  def testFusedBatchNormFloat16MatchesFloat32(self):
+    if test.is_gpu_available(cuda_only=True):
+      shape = [5, 4, 2, 3]
+      res_32 = self._runFusedBatchNorm(shape, np.float32)
+      res_16 = self._runFusedBatchNorm(shape, np.float16)
+      self.assertAllClose(res_32, res_16, rtol=1e-3)
 
   def testAdjustmentCreated(self):
     # Tests that the adjustment is appropriately passed to and used by the core
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
index 468d792a0d..bc0e6fc009 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -119,7 +119,7 @@ class Head(object):
       update_op = tf.contrib.layers.optimize_loss(optimizer=sync,
                                                   loss=model_fn_ops.loss, ...)
       hooks = [sync.make_session_run_hook(is_chief)]
-      ... upate train_op and hooks in ModelFnOps and return
+      ... update train_op and hooks in ModelFnOps and return
     ```
   """
   __metaclass__ = abc.ABCMeta
diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py
index 8be9c72adf..44e6c7c52d 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py
@@ -23,7 +23,6 @@ import collections
 
 import six
 
-from tensorflow.contrib import framework as contrib_framework
 from tensorflow.contrib.framework import get_graph_from_inputs
 from tensorflow.contrib.learn.python.learn.estimators import constants
 from tensorflow.contrib.learn.python.learn.estimators import metric_key
@@ -32,6 +31,7 @@ from tensorflow.python.estimator import model_fn as core_model_fn_lib
 from tensorflow.python.estimator.export import export_output as core_export_lib
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging
@@ -156,11 +156,11 @@ class ModelFnOps(
     else:
       if isinstance(predictions, dict):
         predictions = {
-            k: contrib_framework.convert_to_tensor_or_sparse_tensor(v)
+            k: sparse_tensor.convert_to_tensor_or_sparse_tensor(v)
             for k, v in six.iteritems(predictions)
         }
       else:
-        predictions = contrib_framework.convert_to_tensor_or_sparse_tensor(
+        predictions = sparse_tensor.convert_to_tensor_or_sparse_tensor(
             predictions)
 
     # Validate eval_metric_ops
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
index 4c50d40aaa..86fad4c553 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
@@ -28,13 +28,13 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging
 
 # pylint: disable=g-multiple-import,g-bad-import-order
 from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels
 from .dask_io import HAS_DASK, extract_dask_data, extract_dask_labels
-
 # pylint: enable=g-multiple-import,g-bad-import-order
 
 
@@ -365,8 +365,14 @@ class DataFeeder(object):
     self.random_state = np.random.RandomState(
         42) if random_state is None else random_state
 
-    num_samples = list(self._x.values())[0].shape[
-        0] if x_is_dict else self._x.shape[0]
+    if x_is_dict:
+      num_samples = list(self._x.values())[0].shape[0]
+    elif tensor_util.is_tensor(self._x):
+      num_samples = self._x.shape[
+          0].value  # shape will be a Dimension, extract an int
+    else:
+      num_samples = self._x.shape[0]
+
     if self._shuffle:
       self.indices = self.random_state.permutation(num_samples)
     else:
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index 13f2f0f502..7526f3ae0d 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -238,10 +238,10 @@ class SdcaModel(object):
     with name_scope('sdca/prediction'):
       sparse_variables = self._convert_n_to_tensor(self._variables[
           'sparse_features_weights'])
-      result = 0.0
+      result_sparse = 0.0
       for sfc, sv in zip(examples['sparse_features'], sparse_variables):
         # TODO(sibyl-Aix6ihai): following does not take care of missing features.
-        result += math_ops.segment_sum(
+        result_sparse += math_ops.segment_sum(
             math_ops.multiply(
                 array_ops.gather(sv, sfc.feature_indices), sfc.feature_values),
             sfc.example_indices)
@@ -249,12 +249,14 @@ class SdcaModel(object):
       dense_variables = self._convert_n_to_tensor(self._variables[
           'dense_features_weights'])
 
+      result_dense = 0.0
       for i in range(len(dense_variables)):
-        result += math_ops.matmul(dense_features[i],
-                                  array_ops.expand_dims(dense_variables[i], -1))
+        result_dense += math_ops.matmul(dense_features[i],
+                                        array_ops.expand_dims(
+                                            dense_variables[i], -1))
 
     # Reshaping to allow shape inference at graph construction time.
-    return array_ops.reshape(result, [-1])
+    return array_ops.reshape(result_dense, [-1]) + result_sparse
 
   def predictions(self, examples):
     """Add operations to compute predictions by the model.
diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index b4aa032ff8..89e8693490 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -23,6 +23,7 @@ py_library(
 py_test(
     name = "lite_test",
     srcs = ["lite_test.py"],
+    srcs_version = "PY2AND3",
     deps = [
         ":lite",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 86540d58a6..5bca82ded0 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -36,6 +36,11 @@ import traceback
 import zipfile
 import numpy as np
 from six import StringIO
+
+# TODO(aselle): Disable GPU for now
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
+# pylint: disable=g-import-not-at-top
 import tensorflow as tf
 from google.protobuf import text_format
 # TODO(aselle): switch to TensorFlow's resource_loader
@@ -379,12 +384,13 @@ def make_zip_of_tests(zip_path,
         report["toco_log"] = ""
         tf.reset_default_graph()
 
-        try:
-          inputs, outputs = make_graph(param_dict_real)
-        except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError,
-                ValueError):
-          report["tf_log"] += traceback.format_exc()
-          return None, report
+        with tf.device("/cpu:0"):
+          try:
+            inputs, outputs = make_graph(param_dict_real)
+          except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError,
+                  ValueError):
+            report["tf_log"] += traceback.format_exc()
+            return None, report
 
         sess = tf.Session()
         try:
diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD
index 92246a8aed..17115047d2 100644
--- a/tensorflow/contrib/lite/toco/python/BUILD
+++ b/tensorflow/contrib/lite/toco/python/BUILD
@@ -61,6 +61,7 @@ tf_py_test(
     data = [
         ":toco_from_protos",
     ],
+    tags = ["no_pip"],
 )
 
 filegroup(
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index dba1464653..e2e6c05591 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -314,7 +314,8 @@ ifeq ($(TARGET),ANDROID)
 -Wno-narrowing \
 -fomit-frame-pointer \
 $(MARCH_OPTION) \
--fPIE
+-fPIE \
+-fPIC
 	INCLUDES = \
 -I$(NDK_ROOT)/sources/android/support/include \
 -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \
diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md
index 715eb51577..65bd60c12a 100644
--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@@ -174,10 +174,26 @@ tensorflow/contrib/makefile/build_all_ios.sh
 
 This process will take around twenty minutes on a modern MacBook Pro.
 
-When it completes, you will have a library for a single architecture and the
-benchmark program. Although successfully compiling the benchmark program is a
+When it completes, you will have a unified library for all architectures
+(i386sim, x86_64sim, armv7, armv7s and arm64)  and the benchmark program.
+Although successfully compiling the benchmark program is a
 sign of success, the program is not a complete iOS app.
 
+If you would only like to build only one architecture to save time:
+(iOS 11+ only supports 64bit so you can get away with arm64)
+
+```bash
+tensorflow/contrib/makefile/build_all_ios.sh -a arm64
+```
+
+After the first build if you would like to just build the tensorflow
+library you can pass the -T flag to avoid a clean & rebuild. This should
+take you just a few seconds to generate the library if you modified one file.
+
+```bash
+tensorflow/contrib/makefile/build_all_ios.sh -a arm64 -T
+```
+
 To see TensorFlow running on iOS, the example Xcode project in
 [tensorflow/examples/ios](../../examples/ios/) shows how to use the static
 library in a simple app.
@@ -193,19 +209,18 @@ If you have not already, you will need to download dependencies:
 tensorflow/contrib/makefile/download_dependencies.sh
 ```
 
-Next, you will need to compile protobufs for iOS:
+Next, you will need to compile protobufs for iOS (optionally takes the -a $ARCH flag):
 
 ```bash
-tensorflow/contrib/makefile/compile_ios_protobuf.sh 
+tensorflow/contrib/makefile/compile_ios_protobuf.sh
 ```
 
-Then, you will need to compile the nsync library for iOS:
+Then, you will need to compile the nsync library for iOS (optionally takes -a $ARCH flag):
 
 ```bash
 export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
 export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
 ```
-
 Then, you can run the makefile specifying iOS as the target, along with the
 architecture you want to build for:
 
@@ -219,10 +234,6 @@ This creates a library in
 `tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a` that you can link any
 xcode project against. 
 
-At this point, you will have a library for a single architecture and the
-benchmark program. Although successfully compiling the benchmark program is a
-sign of success, the program is not a complete iOS app. 
-
 To see TensorFlow running on iOS, the example Xcode project in
 [tensorflow/examples/ios](../../examples/ios/) shows how to use the static
 library in a simple app.
@@ -237,6 +248,14 @@ time follow it with:
 compile_ios_tensorflow.sh
 ```
 
+`compile_ios_tensorflow.sh` takes the -a flag to build only for one architecture.
+In case you run into issues with unresolved symbols with nsync you can also pass
+-h ${HOST_NSYNC_LIB} and -n {TARGET_NSYNC_LIB} so it would look like:
+
+```bash
+tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h tensorflow/contrib/makefile/downloads/nsync/builds/default.macos.c++11/nsync.a -n tensorflow/contrib/makefile/downloads/nsync/builds/lipo.ios.c++11/nsync.a -a arm64
+```
+
 In XCode, you will need to use -force_load in the linker flags
 section of the build settings to pull in the global constructors that are used
 to register ops and kernels. 
@@ -249,7 +268,7 @@ debug mode. If you are concerned about performance or are working on a release
 build, you would likely want a higher optimization setting, like so:
  
 ```bash
-compile_ios_tensorflow.sh "-Os"
+compile_ios_tensorflow.sh -f "-Os"
 ```
 
 For other variations of valid optimization flags, see [clang optimization levels](http://stackoverflow.com/questions/15548023/clang-optimization-levels).
diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh
index a49bbe4565..988e12b482 100755
--- a/tensorflow/contrib/makefile/build_all_ios.sh
+++ b/tensorflow/contrib/makefile/build_all_ios.sh
@@ -23,14 +23,29 @@ if [[ $(uname) != "Darwin" ]]; then
     exit 1
 fi
 
+usage() {
+  echo "Usage: $(basename "$0") [-a:T]"
+  echo "-a [build_arch] build only for specified arch x86_64 [default=all]"
+  echo "-T only build tensorflow (dont download other deps etc)"
+  exit 1
+}
+
+while getopts "a:T" opt_name; do
+  case "$opt_name" in
+    a) BUILD_ARCH="${OPTARG}";;
+    T) ONLY_MAKE_TENSORFLOW="true";;
+    *) usage;;
+  esac
+done
+shift $((OPTIND - 1))
+
+
 # Make sure we're in the correct directory, at the root of the source tree.
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd ${SCRIPT_DIR}/../../../
 
-
-# Remove any old files first.
-make -f tensorflow/contrib/makefile/Makefile clean
-rm -rf tensorflow/contrib/makefile/downloads
+source "${SCRIPT_DIR}/build_helper.subr"
+JOB_COUNT="${JOB_COUNT:-$(get_job_count)}"
 
 # Setting a deployment target is required for building with bitcode,
 # otherwise linking will fail with:
@@ -41,20 +56,37 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then
     export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion)
 fi
 
-# Pull down the required versions of the frameworks we need.
-tensorflow/contrib/makefile/download_dependencies.sh
+if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then
+    # Remove any old files first.
+    make -f tensorflow/contrib/makefile/Makefile clean
+    rm -rf tensorflow/contrib/makefile/downloads
 
-# Compile protobuf for the target iOS device architectures.
-tensorflow/contrib/makefile/compile_ios_protobuf.sh
+    # Pull down the required versions of the frameworks we need.
+    tensorflow/contrib/makefile/download_dependencies.sh
+
+    # Compile protobuf for the target iOS device architectures.
+    tensorflow/contrib/makefile/compile_ios_protobuf.sh
+fi
 
 # Compile nsync for the target iOS device architectures.
 # Don't use  export var=`something` syntax; it swallows the exit status.
 HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
-TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
+if [[ -z "${BUILD_ARCH}" ]]; then
+    # No arch specified so build all architectures
+    TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
+else
+    # arch specified so build just that
+    TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios -a ${BUILD_ARCH}`
+fi
 export HOST_NSYNC_LIB TARGET_NSYNC_LIB
 
-# Build the iOS TensorFlow libraries.
-tensorflow/contrib/makefile/compile_ios_tensorflow.sh "-O3"
+if [[ -z "${BUILD_ARCH}" ]]; then
+    # build the ios tensorflow libraries.
+    tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB
+else
+    # arch specified so build just that
+    tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -a "${BUILD_ARCH}" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB
+fi
 
 # Creates a static universal library in
 # tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a
diff --git a/tensorflow/contrib/makefile/compile_ios_protobuf.sh b/tensorflow/contrib/makefile/compile_ios_protobuf.sh
index 4056db18a7..43e5809dd2 100755
--- a/tensorflow/contrib/makefile/compile_ios_protobuf.sh
+++ b/tensorflow/contrib/makefile/compile_ios_protobuf.sh
@@ -21,10 +21,28 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then
     export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion)
 fi
 
-SCRIPT_DIR=$(dirname $0)
+usage() {
+  echo "Usage: $(basename "$0") [-a]"
+  echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)"
+  echo "default arch i386, x86_64, armv7, armv7s, arm64"
+  exit 1
+}
+
+BUILD_TARGET="i386 x86_64 armv7 armv7s arm64"
+while getopts "a:" opt_name; do
+  case "$opt_name" in
+    a) BUILD_TARGET="${OPTARG}";;
+    *) usage;;
+  esac
+done
+shift $((OPTIND - 1))
+
+IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}"
+
+SCRIPT_DIR=$(cd `dirname $0` && pwd)
 source "${SCRIPT_DIR}/build_helper.subr"
 
-cd tensorflow/contrib/makefile
+cd ${SCRIPT_DIR}
 
 HOST_GENDIR="$(pwd)/gen/protobuf-host"
 mkdir -p "${HOST_GENDIR}"
@@ -64,6 +82,10 @@ else
   echo "protoc found. Skip building host tools."
 fi
 
+# Remove old libs
+rm -f ${LIBDIR}/libprotobuf.a
+rm -f ${LIBDIR}/libprotobuf-lite.a
+
 ./autogen.sh
 if [ $? -ne 0 ]
 then
@@ -71,157 +93,192 @@ then
   exit 1
 fi
 
-make distclean
-./configure \
---host=i386-apple-${OSX_VERSION} \
---disable-shared \
---enable-cross-compile \
---with-protoc="${PROTOC_PATH}" \
---prefix=${LIBDIR}/iossim_386 \
---exec-prefix=${LIBDIR}/iossim_386 \
-"CFLAGS=${CFLAGS} \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
--arch i386 \
--fembed-bitcode \
--isysroot ${IPHONESIMULATOR_SYSROOT}" \
-"CXX=${CXX}" \
-"CXXFLAGS=${CXXFLAGS} \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
--arch i386 \
--fembed-bitcode \
--isysroot \
-${IPHONESIMULATOR_SYSROOT}" \
-LDFLAGS="-arch i386 \
--fembed-bitcode \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
-${LDFLAGS} \
--L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
--L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
-"LIBS=${LIBS}"
-make -j"${JOB_COUNT}"
-make install
-
-make distclean
-./configure \
---host=x86_64-apple-${OSX_VERSION} \
---disable-shared \
---enable-cross-compile \
---with-protoc="${PROTOC_PATH}" \
---prefix=${LIBDIR}/iossim_x86_64 \
---exec-prefix=${LIBDIR}/iossim_x86_64 \
-"CFLAGS=${CFLAGS} \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
--arch x86_64 \
--fembed-bitcode \
--isysroot ${IPHONESIMULATOR_SYSROOT}" \
-"CXX=${CXX}" \
-"CXXFLAGS=${CXXFLAGS} \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
--arch x86_64 \
--fembed-bitcode \
--isysroot \
-${IPHONESIMULATOR_SYSROOT}" \
-LDFLAGS="-arch x86_64 \
--fembed-bitcode \
--mios-simulator-version-min=${MIN_SDK_VERSION} \
-${LDFLAGS} \
--L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
--L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
-"LIBS=${LIBS}"
-make -j"${JOB_COUNT}"
-make install
-
-make distclean
-./configure \
---host=armv7-apple-${OSX_VERSION} \
---with-protoc="${PROTOC_PATH}" \
---disable-shared \
---prefix=${LIBDIR}/ios_arm7 \
---exec-prefix=${LIBDIR}/ios_arm7 \
-"CFLAGS=${CFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch armv7 \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-"CXX=${CXX}" \
-"CXXFLAGS=${CXXFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch armv7 \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-LDFLAGS="-arch armv7 \
--fembed-bitcode \
--miphoneos-version-min=${MIN_SDK_VERSION} \
-${LDFLAGS}" \
-"LIBS=${LIBS}"
-make -j"${JOB_COUNT}"
-make install
-
-make distclean
-./configure \
---host=armv7s-apple-${OSX_VERSION} \
---with-protoc="${PROTOC_PATH}" \
---disable-shared \
---prefix=${LIBDIR}/ios_arm7s \
---exec-prefix=${LIBDIR}/ios_arm7s \
-"CFLAGS=${CFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch armv7s \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-"CXX=${CXX}" \
-"CXXFLAGS=${CXXFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch armv7s \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-LDFLAGS="-arch armv7s \
--fembed-bitcode \
--miphoneos-version-min=${MIN_SDK_VERSION} \
-${LDFLAGS}" \
-"LIBS=${LIBS}"
-make -j"${JOB_COUNT}"
-make install
-
-make distclean
-./configure \
---host=arm \
---with-protoc="${PROTOC_PATH}" \
---disable-shared \
---prefix=${LIBDIR}/ios_arm64 \
---exec-prefix=${LIBDIR}/ios_arm64 \
-"CFLAGS=${CFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch arm64 \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-"CXXFLAGS=${CXXFLAGS} \
--miphoneos-version-min=${MIN_SDK_VERSION} \
--arch arm64 \
--fembed-bitcode \
--isysroot ${IPHONEOS_SYSROOT}" \
-LDFLAGS="-arch arm64 \
--fembed-bitcode \
--miphoneos-version-min=${MIN_SDK_VERSION} \
-${LDFLAGS}" \
-"LIBS=${LIBS}"
-make -j"${JOB_COUNT}"
-make install
-
-lipo \
-${LIBDIR}/iossim_386/lib/libprotobuf.a \
-${LIBDIR}/iossim_x86_64/lib/libprotobuf.a \
-${LIBDIR}/ios_arm7/lib/libprotobuf.a \
-${LIBDIR}/ios_arm7s/lib/libprotobuf.a \
-${LIBDIR}/ios_arm64/lib/libprotobuf.a \
--create \
--output ${LIBDIR}/libprotobuf.a
-
-lipo \
-${LIBDIR}/iossim_386/lib/libprotobuf-lite.a \
-${LIBDIR}/iossim_x86_64/lib/libprotobuf-lite.a \
-${LIBDIR}/ios_arm7/lib/libprotobuf-lite.a \
-${LIBDIR}/ios_arm7s/lib/libprotobuf-lite.a \
-${LIBDIR}/ios_arm64/lib/libprotobuf-lite.a \
--create \
--output ${LIBDIR}/libprotobuf-lite.a
+package_pb_library() {
+    pb_libs="${LIBDIR}/${1}/lib/libprotobuf.a"
+    if [ -f "${LIBDIR}/libprotobuf.a" ]; then
+        pb_libs="$pb_libs ${LIBDIR}/libprotobuf.a"
+    fi
+    lipo \
+    $pb_libs \
+    -create \
+    -output ${LIBDIR}/libprotobuf.a
+
+    pblite_libs="${LIBDIR}/${1}/lib/libprotobuf-lite.a"
+    if [ -f "${LIBDIR}/libprotobuf-lite.a" ]; then
+        pblite_libs="$pblite_libs ${LIBDIR}/libprotobuf-lite.a"
+    fi
+    lipo \
+    $pblite_libs \
+    -create \
+    -output ${LIBDIR}/libprotobuf-lite.a
+}
+
+build_target() {
+case "$1" in
+    i386)  make distclean
+        ./configure \
+        --host=i386-apple-${OSX_VERSION} \
+        --disable-shared \
+        --enable-cross-compile \
+        --with-protoc="${PROTOC_PATH}" \
+        --prefix=${LIBDIR}/iossim_386 \
+        --exec-prefix=${LIBDIR}/iossim_386 \
+        "CFLAGS=${CFLAGS} \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        -arch i386 \
+        -fembed-bitcode \
+        -isysroot ${IPHONESIMULATOR_SYSROOT}" \
+        "CXX=${CXX}" \
+        "CXXFLAGS=${CXXFLAGS} \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        -arch i386 \
+        -fembed-bitcode \
+        -isysroot \
+        ${IPHONESIMULATOR_SYSROOT}" \
+        LDFLAGS="-arch i386 \
+        -fembed-bitcode \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        ${LDFLAGS} \
+        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
+        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
+        "LIBS=${LIBS}"
+        make -j"${JOB_COUNT}"
+        make install
+
+        package_pb_library "iossim_386"
+        ;;
+
+    x86_64) make distclean
+        ./configure \
+        --host=x86_64-apple-${OSX_VERSION} \
+        --disable-shared \
+        --enable-cross-compile \
+        --with-protoc="${PROTOC_PATH}" \
+        --prefix=${LIBDIR}/iossim_x86_64 \
+        --exec-prefix=${LIBDIR}/iossim_x86_64 \
+        "CFLAGS=${CFLAGS} \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        -arch x86_64 \
+        -fembed-bitcode \
+        -isysroot ${IPHONESIMULATOR_SYSROOT}" \
+        "CXX=${CXX}" \
+        "CXXFLAGS=${CXXFLAGS} \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        -arch x86_64 \
+        -fembed-bitcode \
+        -isysroot \
+        ${IPHONESIMULATOR_SYSROOT}" \
+        LDFLAGS="-arch x86_64 \
+        -fembed-bitcode \
+        -mios-simulator-version-min=${MIN_SDK_VERSION} \
+        ${LDFLAGS} \
+        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
+        -L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
+        "LIBS=${LIBS}"
+        make -j"${JOB_COUNT}"
+        make install
+
+        package_pb_library "iossim_x86_64"
+        ;;
+
+    armv7) make distclean
+        ./configure \
+        --host=armv7-apple-${OSX_VERSION} \
+        --with-protoc="${PROTOC_PATH}" \
+        --disable-shared \
+        --prefix=${LIBDIR}/ios_arm7 \
+        --exec-prefix=${LIBDIR}/ios_arm7 \
+        "CFLAGS=${CFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch armv7 \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        "CXX=${CXX}" \
+        "CXXFLAGS=${CXXFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch armv7 \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        LDFLAGS="-arch armv7 \
+        -fembed-bitcode \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        ${LDFLAGS}" \
+        "LIBS=${LIBS}"
+        make -j"${JOB_COUNT}"
+        make install
+
+        package_pb_library "ios_arm7"
+        ;;
+
+    armv7s) make distclean
+        ./configure \
+        --host=armv7s-apple-${OSX_VERSION} \
+        --with-protoc="${PROTOC_PATH}" \
+        --disable-shared \
+        --prefix=${LIBDIR}/ios_arm7s \
+        --exec-prefix=${LIBDIR}/ios_arm7s \
+        "CFLAGS=${CFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch armv7s \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        "CXX=${CXX}" \
+        "CXXFLAGS=${CXXFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch armv7s \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        LDFLAGS="-arch armv7s \
+        -fembed-bitcode \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        ${LDFLAGS}" \
+        "LIBS=${LIBS}"
+        make -j"${JOB_COUNT}"
+        make install
+
+        package_pb_library "ios_arm7s"
+        ;;
+
+    arm64) make distclean
+        ./configure \
+        --host=arm \
+        --with-protoc="${PROTOC_PATH}" \
+        --disable-shared \
+        --prefix=${LIBDIR}/ios_arm64 \
+        --exec-prefix=${LIBDIR}/ios_arm64 \
+        "CFLAGS=${CFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch arm64 \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        "CXXFLAGS=${CXXFLAGS} \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        -arch arm64 \
+        -fembed-bitcode \
+        -isysroot ${IPHONEOS_SYSROOT}" \
+        LDFLAGS="-arch arm64 \
+        -fembed-bitcode \
+        -miphoneos-version-min=${MIN_SDK_VERSION} \
+        ${LDFLAGS}" \
+        "LIBS=${LIBS}"
+        make -j"${JOB_COUNT}"
+        make install
+
+        package_pb_library "ios_arm64"
+        ;;
+    *)
+        echo "Unknown ARCH"
+        exit 1
+        ;;
+esac 
+}
+
+for build_element in "${build_targets[@]}"
+do
+    echo "$build_element"
+    build_target "$build_element"
+done
+
+file ${LIBDIR}/libprotobuf.a
+file ${LIBDIR}/libprotobuf-lite.a
+echo "Done building and packaging the libraries"
diff --git a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
index 5d1cc8b375..ae82163e11 100755
--- a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
+++ b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
@@ -43,55 +43,124 @@ then
     exit 1
 fi
 
+usage() {
+  echo "Usage: $(basename "$0") [-a]"
+  echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64,arm64)"
+  echo "default is [i386, x86_64, armv7, armv7s, arm64]"
+  exit 1
+}
+
+BUILD_TARGET="i386 x86_64 armv7 armv7s arm64"
+while getopts "a:f:h:n:" opt_name; do
+  case "$opt_name" in
+    a) BUILD_TARGET="${OPTARG}";;
+    f) BUILD_OPT="${OPTARG}";;
+    h) NSYNC_HOST="${OPTARG}";;
+    n) NSYNC_TARGET="${OPTARG}";;
+    *) usage;;
+  esac
+done
+shift $((OPTIND - 1))
+
+IFS=' ' read -r -a build_targets <<< "${BUILD_TARGET}"
+
+SCRIPT_DIR=$(cd `dirname $0` && pwd)
+source "${SCRIPT_DIR}/build_helper.subr"
+
+
 GENDIR=tensorflow/contrib/makefile/gen/
 LIBDIR=${GENDIR}lib
 LIB_PREFIX=libtensorflow-core
 
-make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a OPTFLAGS="$1"
-if [ $? -ne 0 ]
-then
-  echo "armv7 compilation failed."
-  exit 1
-fi
+#remove any old artifacts
+rm -rf ${LIBDIR}/${LIB_PREFIX}.a
 
-make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a OPTFLAGS="$1"
-if [ $? -ne 0 ]
-then
-  echo "arm7vs compilation failed."
-  exit 1
-fi
+package_tf_library() {
+    CAP_DIR=`echo $1 | tr 'a-z' 'A-Z'`
+    tf_libs="${LIBDIR}/ios_${CAP_DIR}/${LIB_PREFIX}-${1}.a"
+    if [ -f "${LIBDIR}/${LIB_PREFIX}.a" ]; then
+        tf_libs="$tf_libs ${LIBDIR}/${LIB_PREFIX}.a"
+    fi
+    lipo \
+    $tf_libs \
+    -create \
+    -output ${LIBDIR}/${LIB_PREFIX}.a
+}
 
-make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a OPTFLAGS="$1"
-if [ $? -ne 0 ]
-then
-  echo "arm64 compilation failed."
-  exit 1
-fi
+build_tf_target() {
+case "$1" in
+    armv7)
+        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+        TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a \
+        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
+        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
+        if [ $? -ne 0 ]
+        then
+          echo "armv7 compilation failed."
+          exit 1
+        fi
+        package_tf_library "armv7"
+        ;;
+    armv7s)
+        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+        TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a \
+        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
+        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
 
-make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a OPTFLAGS="$1"
-if [ $? -ne 0 ]
-then
-  echo "i386 compilation failed."
-  exit 1
-fi
+        if [ $? -ne 0 ]
+        then
+          echo "arm7vs compilation failed."
+          exit 1
+        fi
+        package_tf_library "armv7s"
+        ;;
+    arm64)
+        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+        TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a \
+        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
+        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
+        if [ $? -ne 0 ]
+        then
+          echo "arm64 compilation failed."
+          exit 1
+        fi
+        package_tf_library "arm64"
+        ;;
+    i386)
+        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+        TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a \
+        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
+        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
+        if [ $? -ne 0 ]
+        then
+          echo "i386 compilation failed."
+          exit 1
+        fi
+        package_tf_library "i386"
+        ;;
+    x86_64)
+        make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
+        TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a \
+        OPTFLAGS="${BUILD_OPT}" HOST_NSYNC_LIB="${NSYNC_HOST}" \
+        TARGET_NSYNC_LIB="${NSYNC_TARGET}"
+        if [ $? -ne 0 ]
+        then
+          echo "x86_64 compilation failed."
+          exit 1
+        fi
+        package_tf_library "x86_64"
+        ;;
+    *)
+        echo "Unknown ARCH"
+        exit 1
+esac
+}
 
-make -j"${JOB_COUNT}" -f tensorflow/contrib/makefile/Makefile \
-TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a OPTFLAGS="$1"
-if [ $? -ne 0 ]
-then
-  echo "x86_64 compilation failed."
-  exit 1
-fi
+for build_tf_element in "${build_targets[@]}"
+do
+    echo "$build_tf_element"
+    build_tf_target "$build_tf_element"
+done
 
-lipo \
-${LIBDIR}/ios_ARMV7/${LIB_PREFIX}-armv7.a \
-${LIBDIR}/ios_ARMV7S/${LIB_PREFIX}-armv7s.a \
-${LIBDIR}/ios_ARM64/${LIB_PREFIX}-arm64.a \
-${LIBDIR}/ios_I386/${LIB_PREFIX}-i386.a \
-${LIBDIR}/ios_X86_64/${LIB_PREFIX}-x86_64.a \
--create \
--output ${LIBDIR}/${LIB_PREFIX}.a
+echo "Done building and packaging TF"
+file ${LIBDIR}/${LIB_PREFIX}.a
diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh
index ecbd9bb825..930e6b8dea 100755
--- a/tensorflow/contrib/makefile/compile_nsync.sh
+++ b/tensorflow/contrib/makefile/compile_nsync.sh
@@ -265,7 +265,7 @@ for arch in $archs; do
                                           -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \
                                           -I../../platform/c++11 -I../../platform/gcc \
                                           -I../../platform/posix -pthread
-                        PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE
+                        PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE -fPIC
                         PLATFORM_LDFLAGS=-pthread
                         MKDEP=${CC} -M -std=c++11
                         PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \
@@ -301,6 +301,9 @@ done
 
 case "$target_platform" in
 ios)    nsync_platform_dir="$nsync_builds_dir/lipo.$target_platform.c++11"
+        if [ -d "$nsync_platform_dir" ]; then
+            rm -rf "$nsync_platform_dir"
+        fi
         mkdir "$nsync_platform_dir"
         eval lipo $platform_libs -create -output '$nsync_platform_dir/nsync.a'
         echo "$nsync_platform_dir/nsync.a"
diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py
index 3bf795d19a..0bc133a00e 100644
--- a/tensorflow/contrib/nn/__init__.py
+++ b/tensorflow/contrib/nn/__init__.py
@@ -15,6 +15,7 @@
 """Module for variants of ops in tf.nn.
 
 @@alpha_dropout
+@@conv1d_transpose
 @@deprecated_flipped_softmax_cross_entropy_with_logits
 @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits
 @@deprecated_flipped_sigmoid_cross_entropy_with_logits
@@ -32,6 +33,7 @@ from tensorflow.contrib.nn.python.ops.alpha_dropout import *
 from tensorflow.contrib.nn.python.ops.cross_entropy import *
 from tensorflow.contrib.nn.python.ops.sampling_ops import *
 from tensorflow.contrib.nn.python.ops.scaled_softplus import *
+from tensorflow.python.ops.nn_ops import conv1d_transpose
 from tensorflow.python.ops.nn_ops import nth_element
 # pylint: enable=unused-import,wildcard-import
 
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index 8c46becf2c..a9a63cbce0 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -19,6 +19,7 @@ py_library(
         "python/training/external_optimizer.py",
         "python/training/lazy_adam_optimizer.py",
         "python/training/moving_average_optimizer.py",
+        "python/training/multitask_optimizer_wrapper.py",
         "python/training/nadam_optimizer.py",
         "python/training/powersign.py",
         "python/training/sign_decay.py",
@@ -98,6 +99,23 @@ py_test(
     ],
 )
 
+py_test(
+    name = "multitask_optimizer_wrapper_test",
+    srcs = ["python/training/multitask_optimizer_wrapper_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":opt_py",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_test(
     name = "lazy_adam_optimizer_test",
     srcs = ["python/training/lazy_adam_optimizer_test.py"],
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index caf22536bb..04643a6058 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -24,7 +24,7 @@ from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import
 from tensorflow.contrib.opt.python.training.external_optimizer import *
 from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import *
 from tensorflow.contrib.opt.python.training.moving_average_optimizer import *
-from tensorflow.contrib.opt.python.training.nadam_optimizer import *
+from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
 from tensorflow.contrib.opt.python.training.nadam_optimizer import *
 from tensorflow.contrib.opt.python.training.powersign import *
 from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import *
@@ -34,11 +34,18 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 
 _allowed_symbols = [
-    'PowerSignOptimizer', 'AddSignOptimizer'
+    'PowerSignOptimizer',
+    'AddSignOptimizer'
     'DelayCompensatedGradientDescentOptimizer',
-    'DropStaleGradientOptimizer', 'ExternalOptimizerInterface',
-    'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer',
-    'ScipyOptimizerInterface', 'VariableClippingOptimizer'
+    'DropStaleGradientOptimizer',
+    'ExternalOptimizerInterface',
+    'LazyAdamOptimizer',
+    'NadamOptimizer',
+    'MovingAverageOptimizer',
+    'ScipyOptimizerInterface',
+    'VariableClippingOptimizer',
+    'MultitaskOptimizerWrapper',
+    'clip_gradients_by_global_norm',
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
new file mode 100644
index 0000000000..cb6c77a86f
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
@@ -0,0 +1,140 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""An optimizer wrapper for stateful optimizers with multitask loss."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import types
+import six
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.training import optimizer
+
+__all__ = ['MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm']
+
+
+def _is_all_zeros(grad):
+  all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0)
+  return all_zeros
+
+
+def _get_wrapper(fn, opt):
+
+  def wrapper(self, grad, *args, **kwargs):  # pylint: disable=unused-argument
+    all_zeros = _is_all_zeros(grad)
+    return control_flow_ops.cond(all_zeros, control_flow_ops.no_op,
+                                 lambda: fn(grad, *args, **kwargs))
+
+  wrapper = types.MethodType(wrapper, opt)
+  return wrapper
+
+
+class MultitaskOptimizerWrapper(object):
+  """Optimizer wrapper making all-zero gradients harmless.
+
+  This might be useful when a multi-task loss is used,
+  and some components of the loss might be
+  not present (e.g. masked out) in some training batches.
+  Technically their gradient would be zero,
+  which would normally affect the optimizer state
+  (e.g. push running average to zero).
+  However this is not the desired behaviour,
+  since the missing loss component
+  should be treated as unknown rather than zero.
+
+  This wrapper filters out all-zero gradient tensors,
+  therefore preserving the optimizer state.
+
+  If gradient clipping by global norm is used,
+  the provided function clip_gradients_by_global_norm
+  should be used (and specified explicitly by the user).
+  Otherwise the global norm would be underestimated
+  because of all-zero tensors that should be ignored.
+
+  The gradient calculation and application
+  are delegated to an underlying optimizer.
+  The gradient application is altered only for all-zero tensors.
+
+  Example:
+  ```python
+  momentum_optimizer = tf.train.MomentumOptimizer(
+    learning_rate, momentum=0.9)
+  multitask_momentum_optimizer = tf.contrib.opt.MultitaskOptimizerWrapper(
+    momentum_optimizer)
+  gradvars = multitask_momentum_optimizer.compute_gradients(
+    loss)
+  gradvars_clipped, _ = tf.contrib.opt.clip_gradients_by_global_norm(
+    gradvars, 15.0)
+  train_op = multitask_momentum_optimizer.apply_gradients(
+    gradvars_clipped, global_step=batch)
+  ```
+  """
+
+  def __init__(self, opt):
+    """Constructor.
+
+    Args:
+      opt: an instance of a class that implements tf.train.Optimizer.
+    """
+    if not isinstance(opt, optimizer.Optimizer):
+      raise TypeError(
+          'Supplied optimizer must be an instance of tf.train.Optimizer')
+    self._opt = opt
+    overridden_methods = ('_apply_dense', '_resource_apply_dense',
+                          '_apply_sparse', '_resource_apply_sparse')
+    for name in overridden_methods:
+      fn = getattr(self._opt, name)
+      wrapper = _get_wrapper(fn, self._opt)
+      setattr(self._opt, name, wrapper)
+
+  def __getattr__(self, name):
+    return getattr(self._opt, name)
+
+
+def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.):
+  """Clips gradients of a multitask loss by their global norm.
+
+  Ignores all-zero tensors when computing the global norm.
+
+  Args:
+    gradients_variables: a list of pairs (gradient, variable).
+    clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.
+
+  Returns:
+    list: A list of pairs of the same type as gradients_variables,.
+    fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
+  """
+  gradients, variables = six.moves.zip(*gradients_variables)
+
+  def _replace_nonexisting_grad(grad):
+    if grad is None:
+      return grad
+    all_zeros = _is_all_zeros(grad)
+    return control_flow_ops.cond(
+        all_zeros,
+        lambda: array_ops.zeros([], dtype=dtypes.as_dtype(grad.dtype)),
+        lambda: grad)
+
+  nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients]
+  fixed_global_norm = clip_ops.global_norm(nonzero_gradients)
+  gradients, _ = clip_ops.clip_by_global_norm(
+      gradients, clip_norm, use_norm=fixed_global_norm)
+  return list(six.moves.zip(gradients, variables)), fixed_global_norm
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
new file mode 100644
index 0000000000..618d8eb18d
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
@@ -0,0 +1,119 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for MultitaskOptimizerWrapper."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import six
+
+from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.training import momentum
+
+
+class MultitaskOptimizerWrapperTest(test.TestCase):
+  """Tests for the multitask optimizer wrapper.
+  """
+
+  def testWrapper(self):
+    with self.test_session():
+      var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32)
+      var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
+      grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32)
+      grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32)
+      grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32)
+      mom_opt_impl = momentum.MomentumOptimizer(learning_rate=2.0, momentum=0.9)
+      mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper(
+          mom_opt_impl)
+      mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      mom_update_partial = mom_opt.apply_gradients(
+          zip([grads_allzero, grads1], [var0, var1]))
+      mom_update_no_action = mom_opt.apply_gradients(
+          zip([grads_allzero, grads_allzero], [var0, var1]))
+      self.evaluate(variables.global_variables_initializer())
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+      self.assertEqual(["momentum"], mom_opt.get_slot_names())
+      slot0 = mom_opt.get_slot(var0, "momentum")
+      self.assertEquals(slot0.get_shape(), var0.get_shape())
+      slot1 = mom_opt.get_slot(var1, "momentum")
+      self.assertEquals(slot1.get_shape(), var1.get_shape())
+
+      # Step 1: normal momentum update.
+      self.evaluate(mom_update)
+      # Check that the momentum accumulators have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([0.1, 0.1]), self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([0.01, 0.01]), self.evaluate(slot1))
+      # Check that the parameters have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0))
+      self.assertAllCloseAccordingToType(
+          np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
+          self.evaluate(var1))
+
+      # Step 2: momentum update that changes only slot1 but not slot0.
+      self.evaluate(mom_update_partial)
+      # Check that only the relevant momentum accumulator has been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([0.1, 0.1]), self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
+          self.evaluate(slot1))
+
+      # Step 3: momentum update that does not change anything.
+      self.evaluate(mom_update_no_action)
+      # Check that the momentum accumulators have *NOT* been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([0.1, 0.1]), self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
+          self.evaluate(slot1))
+
+  def testGradientClipping(self):
+    with self.test_session():
+      var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32)
+      var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
+      var2 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
+      var3 = variables.Variable([3.0, 4.0], dtype=dtypes.float32)
+      grads0 = constant_op.constant([10.0, 15.0], dtype=dtypes.float32)
+      grads1 = constant_op.constant([0.0, 5.0], dtype=dtypes.float32)
+      grads2 = constant_op.constant([0.0, 0.0], dtype=dtypes.float32)
+      grads3 = None
+      varlist = [var0, var1, var2, var3]
+      gradients = [grads0, grads1, grads2, grads3]
+      clipped_gradvars, global_norm = (
+          multitask_optimizer_wrapper.clip_gradients_by_global_norm(
+              six.moves.zip(gradients, varlist), clip_norm=1.0))
+      clipped_grads = list(six.moves.zip(*clipped_gradvars))[0]
+      reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0])))
+      self.assertAllCloseAccordingToType(
+          self.evaluate(global_norm), reference_global_norm)
+      self.assertAllCloseAccordingToType(
+          self.evaluate(clipped_grads[2]), np.array([0., 0.]))
+      self.assertEqual(clipped_grads[3], None)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index 909c6aba2b..f130a2187c 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -24,6 +24,7 @@ import numpy as np
 
 from tensorflow.contrib import rnn as contrib_rnn
 from tensorflow.contrib.rnn.python.ops import core_rnn_cell
+from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -358,6 +359,46 @@ class RNNCellTest(test.TestCase):
       self.assertEquals(variables[2].op.name,
                         "root/lstm_cell/projection/kernel")
 
+  def testLSTMCellLayerNorm(self):
+    with self.test_session() as sess:
+      num_units = 2
+      num_proj = 3
+      batch_size = 1
+      input_size = 4
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([batch_size, input_size])
+        c = array_ops.zeros([batch_size, num_units])
+        h = array_ops.zeros([batch_size, num_proj])
+        state = rnn_cell_impl.LSTMStateTuple(c, h)
+        cell = contrib_rnn_cell.LayerNormLSTMCell(
+            num_units=num_units,
+            num_proj=num_proj,
+            forget_bias=1.0,
+            layer_norm=True,
+            norm_gain=1.0,
+            norm_shift=0.0)
+        g, out_m = cell(x, state)
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run(
+            [g, out_m], {
+                x.name: np.ones((batch_size, input_size)),
+                c.name: 0.1 * np.ones((batch_size, num_units)),
+                h.name: 0.1 * np.ones((batch_size, num_proj))
+            })
+        self.assertEqual(len(res), 2)
+        # The numbers in results were not calculated, this is mostly just a
+        # smoke test.
+        self.assertEqual(res[0].shape, (batch_size, num_proj))
+        self.assertEqual(res[1][0].shape, (batch_size, num_units))
+        self.assertEqual(res[1][1].shape, (batch_size, num_proj))
+        # Different inputs so different outputs and states
+        for i in range(1, batch_size):
+          self.assertTrue(
+              float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
+          self.assertTrue(
+              float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
+
   def testOutputProjectionWrapper(self):
     with self.test_session() as sess:
       with variable_scope.variable_scope(
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index ebd4564f12..46823fa364 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -37,6 +37,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn
 from tensorflow.python.ops import rnn_cell
+from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -995,26 +996,19 @@ class RNNCellTest(test.TestCase):
         output, state = cell(x, hidden)
 
         sess.run([variables.global_variables_initializer()])
-        res = sess.run([output, state], {
-            hidden[0].name:
-                np.array([[[[[1.],[1.]], 
-                            [[1.],[1.]]],
-                           [[[1.],[1.]],
-                            [[1.],[1.]]]], 
-                          [[[[2.],[2.]],
-                            [[2.],[2.]]],
-                           [[[2.],[2.]],
-                            [[2.],[2.]]]]]),
-            x.name:
-                np.array([[[[[1.],[1.]],
-                            [[1.],[1.]]],
-                           [[[1.],[1.]],
-                            [[1.],[1.]]]],
-                          [[[[2.],[2.]],
-                            [[2.],[2.]]],
-                           [[[2.],[2.]],
-                           [[2.],[2.]]]]])
-        })
+        res = sess.run(
+            [output, state], {
+                hidden[0].name:
+                    np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[
+                        1.
+                    ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]],
+                                 [[[2.], [2.]], [[2.], [2.]]]]]),
+                x.name:
+                    np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[
+                        1.
+                    ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], [[[2.], [2.]],
+                                                                [[2.], [2.]]]]])
+            })
         # This is a smoke test, making sure expected values are unchanged.
         self.assertEqual(len(res), 2)
         self.assertAllClose(res[0], res[1].h)
@@ -1275,6 +1269,47 @@ class LayerNormBasicLSTMCellTest(test.TestCase):
         self.assertAllClose(res[2].c, expected_c1, 1e-5)
         self.assertAllClose(res[2].h, expected_h1, 1e-5)
 
+  def testBasicLSTMCellWithStateTupleLayerNorm(self):
+    """The results of LSTMCell and LayerNormBasicLSTMCell should be the same."""
+    with self.test_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        c0 = array_ops.zeros([1, 2])
+        h0 = array_ops.zeros([1, 2])
+        state0 = rnn_cell_impl.LSTMStateTuple(c0, h0)
+        c1 = array_ops.zeros([1, 2])
+        h1 = array_ops.zeros([1, 2])
+        state1 = rnn_cell_impl.LSTMStateTuple(c1, h1)
+        cell = rnn_cell_impl.MultiRNNCell([
+            contrib_rnn_cell.LayerNormLSTMCell(
+                2, layer_norm=True, norm_gain=1.0, norm_shift=0.0)
+            for _ in range(2)
+        ])
+        h, (s0, s1) = cell(x, (state0, state1))
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run(
+            [h, s0, s1], {
+                x.name: np.array([[1., 1.]]),
+                c0.name: 0.1 * np.asarray([[0, 1]]),
+                h0.name: 0.1 * np.asarray([[2, 3]]),
+                c1.name: 0.1 * np.asarray([[4, 5]]),
+                h1.name: 0.1 * np.asarray([[6, 7]]),
+            })
+
+        expected_h = np.array([[-0.38079708, 0.38079708]])
+        expected_h0 = np.array([[-0.38079708, 0.38079708]])
+        expected_c0 = np.array([[-1.0, 1.0]])
+        expected_h1 = np.array([[-0.38079708, 0.38079708]])
+        expected_c1 = np.array([[-1.0, 1.0]])
+
+        self.assertEqual(len(res), 3)
+        self.assertAllClose(res[0], expected_h, 1e-5)
+        self.assertAllClose(res[1].c, expected_c0, 1e-5)
+        self.assertAllClose(res[1].h, expected_h0, 1e-5)
+        self.assertAllClose(res[2].c, expected_c1, 1e-5)
+        self.assertAllClose(res[2].h, expected_h1, 1e-5)
+
   def testBasicLSTMCellWithDropout(self):
 
     def _is_close(x, y, digits=4):
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index d4691f2c27..0698d40438 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -76,6 +76,18 @@ def _get_sharded_variable(name, shape, dtype, num_shards):
   return shards
 
 
+def _norm(g, b, inp, scope):
+  shape = inp.get_shape()[-1:]
+  gamma_init = init_ops.constant_initializer(g)
+  beta_init = init_ops.constant_initializer(b)
+  with vs.variable_scope(scope):
+    # Initialize beta and gamma for use by layer_norm.
+    vs.get_variable("gamma", shape=shape, initializer=gamma_init)
+    vs.get_variable("beta", shape=shape, initializer=beta_init)
+  normalized = layers.layer_norm(inp, reuse=True, scope=scope)
+  return normalized
+
+
 class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
   """Long short-term memory unit (LSTM) recurrent network cell.
 
@@ -102,13 +114,33 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
 
   The class uses optional peep-hole connections, and an optional projection
   layer.
+
+  Layer normalization implementation is based on:
+
+    https://arxiv.org/abs/1607.06450.
+
+  "Layer Normalization"
+  Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
+
+  and is applied before the internal nonlinearities.
+
   """
 
-  def __init__(self, num_units, use_peepholes=False,
-               initializer=None, num_proj=None, proj_clip=None,
-               num_unit_shards=1, num_proj_shards=1,
-               forget_bias=1.0, state_is_tuple=True,
-               activation=math_ops.tanh, reuse=None):
+  def __init__(self,
+               num_units,
+               use_peepholes=False,
+               initializer=None,
+               num_proj=None,
+               proj_clip=None,
+               num_unit_shards=1,
+               num_proj_shards=1,
+               forget_bias=1.0,
+               state_is_tuple=True,
+               activation=math_ops.tanh,
+               reuse=None,
+               layer_norm=False,
+               norm_gain=1.0,
+               norm_shift=0.0):
     """Initialize the parameters for an LSTM cell.
 
     Args:
@@ -135,6 +167,11 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
       reuse: (optional) Python boolean describing whether to reuse variables
         in an existing scope.  If not `True`, and the existing scope already has
         the given variables, an error is raised.
+      layer_norm: If `True`, layer normalization will be applied.
+      norm_gain: float, The layer normalization gain initial value. If
+        `layer_norm` has been set to `False`, this argument will be ignored.
+      norm_shift: float, The layer normalization shift initial value. If
+        `layer_norm` has been set to `False`, this argument will be ignored.
     """
     super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse)
     if not state_is_tuple:
@@ -152,6 +189,9 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
     self._state_is_tuple = state_is_tuple
     self._activation = activation
     self._reuse = reuse
+    self._layer_norm = layer_norm
+    self._norm_gain = norm_gain
+    self._norm_shift = norm_shift
 
     if num_proj:
       self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj)
@@ -220,9 +260,20 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
 
     # j = new_input, f = forget_gate, o = output_gate
     cell_inputs = array_ops.concat([inputs, m_prev], 1)
-    lstm_matrix = nn_ops.bias_add(math_ops.matmul(cell_inputs, concat_w), b)
+    lstm_matrix = math_ops.matmul(cell_inputs, concat_w)
+
+    # If layer nomalization is applied, do not add bias
+    if not self._layer_norm:
+      lstm_matrix = nn_ops.bias_add(lstm_matrix, b)
+
     j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=3, axis=1)
 
+    # Apply layer normalization
+    if self._layer_norm:
+      j = _norm(self._norm_gain, self._norm_shift, j, "transform")
+      f = _norm(self._norm_gain, self._norm_shift, f, "forget")
+      o = _norm(self._norm_gain, self._norm_shift, o, "output")
+
     # Diagonal connections
     if self._use_peepholes:
       w_f_diag = vs.get_variable(
@@ -236,6 +287,10 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
       f_act = sigmoid(f + self._forget_bias)
     c = (f_act * c_prev + (1 - f_act) * self._activation(j))
 
+    # Apply layer normalization
+    if self._layer_norm:
+      c = _norm(self._norm_gain, self._norm_shift, c, "state")
+
     if self._use_peepholes:
       m = sigmoid(o + w_o_diag * c) * self._activation(c)
     else:
@@ -1301,8 +1356,8 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
     self._keep_prob = dropout_keep_prob
     self._seed = dropout_prob_seed
     self._layer_norm = layer_norm
-    self._g = norm_gain
-    self._b = norm_shift
+    self._norm_gain = norm_gain
+    self._norm_shift = norm_shift
     self._reuse = reuse
 
   @property
@@ -1313,24 +1368,25 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
   def output_size(self):
     return self._num_units
 
-  def _norm(self, inp, scope):
+  def _norm(self, inp, scope, dtype=dtypes.float32):
     shape = inp.get_shape()[-1:]
-    gamma_init = init_ops.constant_initializer(self._g)
-    beta_init = init_ops.constant_initializer(self._b)
+    gamma_init = init_ops.constant_initializer(self._norm_gain)
+    beta_init = init_ops.constant_initializer(self._norm_shift)
     with vs.variable_scope(scope):
       # Initialize beta and gamma for use by layer_norm.
-      vs.get_variable("gamma", shape=shape, initializer=gamma_init)
-      vs.get_variable("beta", shape=shape, initializer=beta_init)
+      vs.get_variable("gamma", shape=shape, initializer=gamma_init, dtype=dtype)
+      vs.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype)
     normalized = layers.layer_norm(inp, reuse=True, scope=scope)
     return normalized
 
   def _linear(self, args):
     out_size = 4 * self._num_units
     proj_size = args.get_shape()[-1]
-    weights = vs.get_variable("kernel", [proj_size, out_size])
+    dtype = args.dtype
+    weights = vs.get_variable("kernel", [proj_size, out_size], dtype=dtype)
     out = math_ops.matmul(args, weights)
     if not self._layer_norm:
-      bias = vs.get_variable("bias", [out_size])
+      bias = vs.get_variable("bias", [out_size], dtype=dtype)
       out = nn_ops.bias_add(out, bias)
     return out
 
@@ -1339,13 +1395,14 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
     c, h = state
     args = array_ops.concat([inputs, h], 1)
     concat = self._linear(args)
+    dtype = args.dtype
 
     i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)
     if self._layer_norm:
-      i = self._norm(i, "input")
-      j = self._norm(j, "transform")
-      f = self._norm(f, "forget")
-      o = self._norm(o, "output")
+      i = self._norm(i, "input", dtype=dtype)
+      j = self._norm(j, "transform", dtype=dtype)
+      f = self._norm(f, "forget", dtype=dtype)
+      o = self._norm(o, "output", dtype=dtype)
 
     g = self._activation(j)
     if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1:
@@ -1354,7 +1411,7 @@ class LayerNormBasicLSTMCell(rnn_cell_impl.RNNCell):
     new_c = (c * math_ops.sigmoid(f + self._forget_bias)
              + math_ops.sigmoid(i) * g)
     if self._layer_norm:
-      new_c = self._norm(new_c, "state")
+      new_c = self._norm(new_c, "state", dtype=dtype)
     new_h = self._activation(new_c) * math_ops.sigmoid(o)
 
     new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h)
@@ -1998,8 +2055,8 @@ class ConvLSTMCell(rnn_cell_impl.RNNCell):
     if self._skip_connection:
       self._total_output_channels += self._input_shape[-1]
 
-    state_size = tensor_shape.TensorShape(self._input_shape[:-1] 
-                                          + [self._output_channels])
+    state_size = tensor_shape.TensorShape(
+        self._input_shape[:-1] + [self._output_channels])
     self._state_size = rnn_cell_impl.LSTMStateTuple(state_size, state_size)
     self._output_size = tensor_shape.TensorShape(self._input_shape[:-1]
                                                  + [self._total_output_channels])
@@ -2059,11 +2116,8 @@ class Conv3DLSTMCell(ConvLSTMCell):
     """Construct Conv3DLSTM. See `ConvLSTMCell` for more details."""
     super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs)
 
-def _conv(args, 
-          filter_size,
-          num_features,
-          bias,
-          bias_start=0.0):
+
+def _conv(args, filter_size, num_features, bias, bias_start=0.0):
   """convolution:
   Args:
     args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, 
@@ -2306,3 +2360,273 @@ class GLSTMCell(rnn_cell_impl.RNNCell):
 
     new_state = rnn_cell_impl.LSTMStateTuple(c, m)
     return m, new_state
+
+
+class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
+  """Long short-term memory unit (LSTM) recurrent network cell.
+
+  The default non-peephole implementation is based on:
+
+    http://www.bioinf.jku.at/publications/older/2604.pdf
+
+  S. Hochreiter and J. Schmidhuber.
+  "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997.
+
+  The peephole implementation is based on:
+
+    https://research.google.com/pubs/archive/43905.pdf
+
+  Hasim Sak, Andrew Senior, and Francoise Beaufays.
+  "Long short-term memory recurrent neural network architectures for
+   large scale acoustic modeling." INTERSPEECH, 2014.
+
+  The class uses optional peep-hole connections, optional cell clipping, and
+  an optional projection layer.
+
+  Layer normalization implementation is based on:
+
+    https://arxiv.org/abs/1607.06450.
+
+  "Layer Normalization"
+  Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
+
+  and is applied before the internal nonlinearities.
+
+  """
+
+  def __init__(self,
+               num_units,
+               use_peepholes=False,
+               cell_clip=None,
+               initializer=None,
+               num_proj=None,
+               proj_clip=None,
+               forget_bias=1.0,
+               activation=None,
+               layer_norm=False,
+               norm_gain=1.0,
+               norm_shift=0.0,
+               reuse=None):
+    """Initialize the parameters for an LSTM cell.
+
+    Args:
+      num_units: int, The number of units in the LSTM cell
+      use_peepholes: bool, set True to enable diagonal/peephole connections.
+      cell_clip: (optional) A float value, if provided the cell state is clipped
+        by this value prior to the cell output activation.
+      initializer: (optional) The initializer to use for the weight and
+        projection matrices.
+      num_proj: (optional) int, The output dimensionality for the projection
+        matrices.  If None, no projection is performed.
+      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
+        provided, then the projected values are clipped elementwise to within
+        `[-proj_clip, proj_clip]`.
+      forget_bias: Biases of the forget gate are initialized by default to 1
+        in order to reduce the scale of forgetting at the beginning of
+        the training. Must set it manually to `0.0` when restoring from
+        CudnnLSTM trained checkpoints.
+      activation: Activation function of the inner states.  Default: `tanh`.
+      layer_norm: If `True`, layer normalization will be applied.
+      norm_gain: float, The layer normalization gain initial value. If
+        `layer_norm` has been set to `False`, this argument will be ignored.
+      norm_shift: float, The layer normalization shift initial value. If
+        `layer_norm` has been set to `False`, this argument will be ignored.
+      reuse: (optional) Python boolean describing whether to reuse variables
+        in an existing scope.  If not `True`, and the existing scope already has
+        the given variables, an error is raised.
+
+      When restoring from CudnnLSTM-trained checkpoints, must use
+      CudnnCompatibleLSTMCell instead.
+    """
+    super(LayerNormLSTMCell, self).__init__(_reuse=reuse)
+
+    self._num_units = num_units
+    self._use_peepholes = use_peepholes
+    self._cell_clip = cell_clip
+    self._initializer = initializer
+    self._num_proj = num_proj
+    self._proj_clip = proj_clip
+    self._forget_bias = forget_bias
+    self._activation = activation or math_ops.tanh
+    self._layer_norm = layer_norm
+    self._norm_gain = norm_gain
+    self._norm_shift = norm_shift
+
+    if num_proj:
+      self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_proj))
+      self._output_size = num_proj
+    else:
+      self._state_size = (rnn_cell_impl.LSTMStateTuple(num_units, num_units))
+      self._output_size = num_units
+
+  @property
+  def state_size(self):
+    return self._state_size
+
+  @property
+  def output_size(self):
+    return self._output_size
+
+  def _linear(self,
+              args,
+              output_size,
+              bias,
+              bias_initializer=None,
+              kernel_initializer=None,
+              layer_norm=False):
+    """Linear map: sum_i(args[i] * W[i]), where W[i] is a Variable.
+
+    Args:
+      args: a 2D Tensor or a list of 2D, batch x n, Tensors.
+      output_size: int, second dimension of W[i].
+      bias: boolean, whether to add a bias term or not.
+      bias_initializer: starting value to initialize the bias
+        (default is all zeros).
+      kernel_initializer: starting value to initialize the weight.
+      layer_norm: boolean, whether to apply layer normalization.
+
+
+    Returns:
+      A 2D Tensor with shape [batch x output_size] taking value
+      sum_i(args[i] * W[i]), where each W[i] is a newly created Variable.
+
+    Raises:
+      ValueError: if some of the arguments has unspecified or wrong shape.
+    """
+    if args is None or (nest.is_sequence(args) and not args):
+      raise ValueError("`args` must be specified")
+    if not nest.is_sequence(args):
+      args = [args]
+
+    # Calculate the total size of arguments on dimension 1.
+    total_arg_size = 0
+    shapes = [a.get_shape() for a in args]
+    for shape in shapes:
+      if shape.ndims != 2:
+        raise ValueError("linear is expecting 2D arguments: %s" % shapes)
+      if shape[1].value is None:
+        raise ValueError("linear expects shape[1] to be provided for shape %s, "
+                         "but saw %s" % (shape, shape[1]))
+      else:
+        total_arg_size += shape[1].value
+
+    dtype = [a.dtype for a in args][0]
+
+    # Now the computation.
+    scope = vs.get_variable_scope()
+    with vs.variable_scope(scope) as outer_scope:
+      weights = vs.get_variable(
+          "kernel", [total_arg_size, output_size],
+          dtype=dtype,
+          initializer=kernel_initializer)
+      if len(args) == 1:
+        res = math_ops.matmul(args[0], weights)
+      else:
+        res = math_ops.matmul(array_ops.concat(args, 1), weights)
+      if not bias:
+        return res
+      with vs.variable_scope(outer_scope) as inner_scope:
+        inner_scope.set_partitioner(None)
+        if bias_initializer is None:
+          bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
+        biases = vs.get_variable(
+            "bias", [output_size], dtype=dtype, initializer=bias_initializer)
+
+    if not layer_norm:
+      res = nn_ops.bias_add(res, biases)
+
+    return res
+
+  def call(self, inputs, state):
+    """Run one step of LSTM.
+
+    Args:
+      inputs: input Tensor, 2D, batch x num_units.
+      state: this must be a tuple of state Tensors,
+       both `2-D`, with column sizes `c_state` and
+        `m_state`.
+
+    Returns:
+      A tuple containing:
+
+      - A `2-D, [batch x output_dim]`, Tensor representing the output of the
+        LSTM after reading `inputs` when previous state was `state`.
+        Here output_dim is:
+           num_proj if num_proj was set,
+           num_units otherwise.
+      - Tensor(s) representing the new state of LSTM after reading `inputs` when
+        the previous state was `state`.  Same type and shape(s) as `state`.
+
+    Raises:
+      ValueError: If input size cannot be inferred from inputs via
+        static shape inference.
+    """
+    sigmoid = math_ops.sigmoid
+
+    (c_prev, m_prev) = state
+
+    dtype = inputs.dtype
+    input_size = inputs.get_shape().with_rank(2)[1]
+    if input_size.value is None:
+      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
+    scope = vs.get_variable_scope()
+    with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
+
+      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
+      lstm_matrix = self._linear(
+          [inputs, m_prev],
+          4 * self._num_units,
+          bias=True,
+          bias_initializer=None,
+          layer_norm=self._layer_norm)
+      i, j, f, o = array_ops.split(
+          value=lstm_matrix, num_or_size_splits=4, axis=1)
+
+      if self._layer_norm:
+        i = _norm(self._norm_gain, self._norm_shift, i, "input")
+        j = _norm(self._norm_gain, self._norm_shift, j, "transform")
+        f = _norm(self._norm_gain, self._norm_shift, f, "forget")
+        o = _norm(self._norm_gain, self._norm_shift, o, "output")
+
+      # Diagonal connections
+      if self._use_peepholes:
+        with vs.variable_scope(unit_scope):
+          w_f_diag = vs.get_variable(
+              "w_f_diag", shape=[self._num_units], dtype=dtype)
+          w_i_diag = vs.get_variable(
+              "w_i_diag", shape=[self._num_units], dtype=dtype)
+          w_o_diag = vs.get_variable(
+              "w_o_diag", shape=[self._num_units], dtype=dtype)
+
+      if self._use_peepholes:
+        c = (
+            sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
+            sigmoid(i + w_i_diag * c_prev) * self._activation(j))
+      else:
+        c = (
+            sigmoid(f + self._forget_bias) * c_prev +
+            sigmoid(i) * self._activation(j))
+
+      if self._layer_norm:
+        c = _norm(self._norm_gain, self._norm_shift, c, "state")
+
+      if self._cell_clip is not None:
+        # pylint: disable=invalid-unary-operand-type
+        c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
+        # pylint: enable=invalid-unary-operand-type
+      if self._use_peepholes:
+        m = sigmoid(o + w_o_diag * c) * self._activation(c)
+      else:
+        m = sigmoid(o) * self._activation(c)
+
+      if self._num_proj is not None:
+        with vs.variable_scope("projection"):
+          m = self._linear(m, self._num_proj, bias=False)
+
+        if self._proj_clip is not None:
+          # pylint: disable=invalid-unary-operand-type
+          m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
+          # pylint: enable=invalid-unary-operand-type
+
+    new_state = (rnn_cell_impl.LSTMStateTuple(c, m))
+    return m, new_state
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index 87230e3355..e87ef41388 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
@@ -149,7 +149,7 @@ class _BaseAttentionMechanism(AttentionMechanism):
                memory_sequence_length=None,
                memory_layer=None,
                check_inner_dims_defined=True,
-               score_mask_value=float("-inf"),
+               score_mask_value=None,
                name=None):
     """Construct base AttentionMechanism class.
 
@@ -187,9 +187,13 @@ class _BaseAttentionMechanism(AttentionMechanism):
           "memory_layer is not a Layer: %s" % type(memory_layer).__name__)
     self._query_layer = query_layer
     self._memory_layer = memory_layer
+    self.dtype = memory_layer.dtype
     if not callable(probability_fn):
       raise TypeError("probability_fn must be callable, saw type: %s" %
                       type(probability_fn).__name__)
+    if score_mask_value is None:
+      score_mask_value = dtypes.as_dtype(
+          self._memory_layer.dtype).as_numpy_dtype(-np.inf)
     self._probability_fn = lambda score, prev: (  # pylint:disable=g-long-lambda
         probability_fn(
             _maybe_mask_score(score, memory_sequence_length, score_mask_value),
@@ -334,7 +338,8 @@ class LuongAttention(_BaseAttentionMechanism):
                memory_sequence_length=None,
                scale=False,
                probability_fn=None,
-               score_mask_value=float("-inf"),
+               score_mask_value=None,
+               dtype=None,
                name="LuongAttention"):
     """Construct the AttentionMechanism mechanism.
 
@@ -353,17 +358,20 @@ class LuongAttention(_BaseAttentionMechanism):
       score_mask_value: (optional) The mask value for score before passing into
         `probability_fn`. The default is -inf. Only used if
         `memory_sequence_length` is not None.
+      dtype: The data type for the memory layer of the attention mechanism.
       name: Name to use when creating ops.
     """
     # For LuongAttention, we only transform the memory layer; thus
     # num_units **must** match expected the query depth.
     if probability_fn is None:
       probability_fn = nn_ops.softmax
+    if dtype is None:
+      dtype = dtypes.float32
     wrapped_probability_fn = lambda score, _: probability_fn(score)
     super(LuongAttention, self).__init__(
         query_layer=None,
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False),
+            num_units, name="memory_layer", use_bias=False, dtype=dtype),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -475,7 +483,8 @@ class BahdanauAttention(_BaseAttentionMechanism):
                memory_sequence_length=None,
                normalize=False,
                probability_fn=None,
-               score_mask_value=float("-inf"),
+               score_mask_value=None,
+               dtype=None,
                name="BahdanauAttention"):
     """Construct the Attention mechanism.
 
@@ -494,16 +503,20 @@ class BahdanauAttention(_BaseAttentionMechanism):
       score_mask_value: (optional): The mask value for score before passing into
         `probability_fn`. The default is -inf. Only used if
         `memory_sequence_length` is not None.
+      dtype: The data type for the query and memory layers of the attention
+        mechanism.
       name: Name to use when creating ops.
     """
     if probability_fn is None:
       probability_fn = nn_ops.softmax
+    if dtype is None:
+      dtype = dtypes.float32
     wrapped_probability_fn = lambda score, _: probability_fn(score)
     super(BahdanauAttention, self).__init__(
         query_layer=layers_core.Dense(
-            num_units, name="query_layer", use_bias=False),
+            num_units, name="query_layer", use_bias=False, dtype=dtype),
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False),
+            num_units, name="memory_layer", use_bias=False, dtype=dtype),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -738,11 +751,12 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism):
                memory,
                memory_sequence_length=None,
                normalize=False,
-               score_mask_value=float("-inf"),
+               score_mask_value=None,
                sigmoid_noise=0.,
                sigmoid_noise_seed=None,
                score_bias_init=0.,
                mode="parallel",
+               dtype=None,
                name="BahdanauMonotonicAttention"):
     """Construct the Attention mechanism.
 
@@ -766,17 +780,21 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism):
       mode: How to compute the attention distribution.  Must be one of
         'recursive', 'parallel', or 'hard'.  See the docstring for
         `tf.contrib.seq2seq.monotonic_attention` for more information.
+      dtype: The data type for the query and memory layers of the attention
+        mechanism.
       name: Name to use when creating ops.
     """
     # Set up the monotonic probability fn with supplied parameters
+    if dtype is None:
+      dtype = dtypes.float32
     wrapped_probability_fn = functools.partial(
         _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode,
         seed=sigmoid_noise_seed)
     super(BahdanauMonotonicAttention, self).__init__(
         query_layer=layers_core.Dense(
-            num_units, name="query_layer", use_bias=False),
+            num_units, name="query_layer", use_bias=False, dtype=dtype),
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False),
+            num_units, name="memory_layer", use_bias=False, dtype=dtype),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -834,11 +852,12 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
                memory,
                memory_sequence_length=None,
                scale=False,
-               score_mask_value=float("-inf"),
+               score_mask_value=None,
                sigmoid_noise=0.,
                sigmoid_noise_seed=None,
                score_bias_init=0.,
                mode="parallel",
+               dtype=None,
                name="LuongMonotonicAttention"):
     """Construct the Attention mechanism.
 
@@ -862,17 +881,21 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
       mode: How to compute the attention distribution.  Must be one of
         'recursive', 'parallel', or 'hard'.  See the docstring for
         `tf.contrib.seq2seq.monotonic_attention` for more information.
+      dtype: The data type for the query and memory layers of the attention
+        mechanism.
       name: Name to use when creating ops.
     """
     # Set up the monotonic probability fn with supplied parameters
+    if dtype is None:
+      dtype = dtypes.float32
     wrapped_probability_fn = functools.partial(
         _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode,
         seed=sigmoid_noise_seed)
     super(LuongMonotonicAttention, self).__init__(
         query_layer=layers_core.Dense(
-            num_units, name="query_layer", use_bias=False),
+            num_units, name="query_layer", use_bias=False, dtype=dtype),
         memory_layer=layers_core.Dense(
-            num_units, name="memory_layer", use_bias=False),
+            num_units, name="memory_layer", use_bias=False, dtype=dtype),
         memory=memory,
         probability_fn=wrapped_probability_fn,
         memory_sequence_length=memory_sequence_length,
@@ -1123,8 +1146,11 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
             % (len(attention_layer_sizes), len(attention_mechanisms)))
       self._attention_layers = tuple(
           layers_core.Dense(
-              attention_layer_size, name="attention_layer", use_bias=False)
-          for attention_layer_size in attention_layer_sizes)
+              attention_layer_size,
+              name="attention_layer",
+              use_bias=False,
+              dtype=attention_mechanisms[i].dtype)
+          for i, attention_layer_size in enumerate(attention_layer_sizes))
       self._attention_layer_size = sum(attention_layer_sizes)
     else:
       self._attention_layers = None
diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md
index 0bfd0801d5..f7a85557ca 100644
--- a/tensorflow/contrib/slim/README.md
+++ b/tensorflow/contrib/slim/README.md
@@ -237,7 +237,7 @@ One way to reduce this code duplication would be via a `for` loop:
 ```python
 net = ...
 for i in range(3):
-  net = slim.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1))
+  net = slim.conv2d(net, 256, [3, 3], scope='conv3_%d' % (i+1))
 net = slim.max_pool2d(net, [2, 2], scope='pool2')
 ```
 
diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
index b4fd2580c2..576444214d 100644
--- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
@@ -386,7 +386,7 @@ class ResnetCompleteNetworkTest(test.TestCase):
                 inputs, None, is_training=False, global_pool=False)
             sess.run(variables.global_variables_initializer())
             self.assertAllClose(
-                output.eval(), expected.eval(), atol=1e-4, rtol=1e-4)
+                output.eval(), expected.eval(), atol=2e-4, rtol=1e-4)
 
   def testUnknownBatchSize(self):
     batch = 2
diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md
index da5f2b0223..dcb390b0a5 100644
--- a/tensorflow/contrib/verbs/README.md
+++ b/tensorflow/contrib/verbs/README.md
@@ -1,4 +1,4 @@
-## How to compile and use RDMA-enabled TensorFlow
+## How to compile, use and configure RDMA-enabled TensorFlow
 1. Follow the regular TF compilation instructions. During configure step, if you want ibverbs based RDMA support, answer yes to this question:
 
     ```Do you wish to build TensorFlow with VERBS-RDMA support [y/N]```
@@ -7,6 +7,18 @@
 
     ```server = tf.train.Server(cluster, job_name="local", task_index=0, protocol='grpc+verbs') # default protocol is 'grpc'```
 
+3. RDMA configuration is done by setting the following environment variables:
+   * **RDMA_DEVICE**: The RDMA device name to be used. If not defined by user, a default device with an active port will be set if exists.
+   * **RDMA_DEVICE_PORT**: The port within the selected device. Not relevant if RDMA_DEVICE is not defined. If not defined by user, a default active port will be set if exists.
+   * **RDMA_GID_INDEX**: The GID index of the port. If not defined by user, a default suitable GID index will be set (RoCEV2 is favourable as default).
+   * **RDMA_QP_PKEY_INDEX**: The Pkey for the QP. If not defined by user, the default value is 0.
+   * **RDMA_QP_QUEUE_DEPTH**: TX/RX queue size for the QP. If not defined by user, the default value is 1024.
+   * **RDMA_QP_TIMEOUT**: The retransmission timeout for QPs. If not defined by user, the default value is 14.
+   * **RDMA_QP_RETRY_COUNT**: Number of retransmission for QPs. If not defined by user, the default value is 7.
+   * **RDMA_QP_SL**: Service level configuration for QOS and ECN, valid values are 0-7. If not defined by user, the default value is 0.
+   * **RDMA_QP_MTU**: MTU configuration for the QPs. If not defined by user, the default value is active MTU from query_port.
+   * **RDMA_TRAFFIC_CLASS**: Traffic class configuration for QP, in case of DSCP trust level QoS configuration. If not defined by user, the default value is 0. For more info see [HowTo Configure Trust state on Mellanox Adapters](https://community.mellanox.com/docs/DOC-2866).
+
 ## Overview
 The design is based on TensorFlow r1.0. An RDMA path is added between servers for tensor transfer (weights, gradients, etc). The existing GRPC path remains and is responsible for "administrative" tasks, such as setting up the RDMA path, exchanging computation graphs, etc.
 
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index 26e18b28aa..ac8d994502 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #ifdef TENSORFLOW_USE_VERBS
 
 #include "tensorflow/contrib/verbs/rdma.h"
+#include <fcntl.h>
 #include <cstdlib>
 #include "tensorflow/contrib/verbs/verbs_util.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
@@ -33,6 +34,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+#define RoCE_V2 "RoCE v2"
+
 namespace {
 // hash name to 32-bit integer
 uint32_t NameHash(const string& name) {
@@ -66,16 +69,336 @@ string MessageTypeToString(RdmaMessageType rmt) {
 }
 }  // namespace
 
-ibv_context* open_default_device() {
+// Function to get environment variable
+// Args:
+//    var_name - the name of the environmental variable
+// Returns:
+//    string with it's value or empty string if not set
+string get_env_var(char const* var_name) {
+  char const* var_temp = getenv(var_name);
+
+  return (var_temp == NULL) ? string() : string(var_temp);
+}
+
+// Function to open device
+// Args:
+//   ibv_dev device to open
+// Returns:
+//   context of the opened device
+ibv_context* open_device(ibv_device* ibv_dev) {
+  ibv_context* context = ibv_open_device(ibv_dev);
+
+  CHECK(context) << "Open context failed for " << ibv_get_device_name(ibv_dev);
+  return context;
+}
+
+// Function to count the number of active ports for device
+// Args:
+//   device - to check active ports
+// Returns:
+//   number of active ports of the given device
+int get_dev_active_port_count(ibv_device* device) {
+  ibv_device_attr device_att;
+  ibv_port_attr port_attr;
+  ibv_context* context = NULL;
+  int rc, port_index, active_ports = 0;
+
+  context = ibv_open_device(device);
+  CHECK(context) << "Open context failed for " << ibv_get_device_name(device);
+  rc = ibv_query_device(context, &device_att);
+  CHECK(!rc) << "Failed to query the device";
+
+  for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) {
+    rc = ibv_query_port(context, port_index, &port_attr);
+    CHECK(!rc) << "Failed to query the port" << port_index;
+    if (port_attr.state == IBV_PORT_ACTIVE) {
+      active_ports++;
+    }
+  }
+  ibv_close_device(context);
+  return active_ports;
+}
+
+// Function to set device. If RDMA_DEVICE not set, search for device with active
+// port.
+// Fails if more than one device with active port was found.
+// Returns:
+//   device to use
+ibv_device* set_device() {
   ibv_device** dev_list;
-  ibv_device* ib_dev;
-  dev_list = ibv_get_device_list(NULL);
+  int dev_num, device_index, device_to_open = 0;
+  int num_devs_with_active_port = 0;
+  string env_p_rdma_device, str_port_num;
+
+  dev_list = ibv_get_device_list(&dev_num);
   CHECK(dev_list) << "No InfiniBand device found";
-  ib_dev = dev_list[0];
-  CHECK(ib_dev) << "No InfiniBand device found";
-  ibv_context* context = ibv_open_device(ib_dev);
-  CHECK(context) << "Open context failed for " << ibv_get_device_name(ib_dev);
-  return context;
+
+  env_p_rdma_device = get_env_var("RDMA_DEVICE");
+  if (!env_p_rdma_device.empty()) {
+    for (device_index = 0; device_index < dev_num; device_index++) {
+      if (!env_p_rdma_device.compare(
+              ibv_get_device_name(dev_list[device_index]))) {
+        CHECK(get_dev_active_port_count(dev_list[device_index]) != 0)
+            << "Device " << ibv_get_device_name(dev_list[device_index])
+            << " has no active ports";
+        return dev_list[device_index];
+      }
+    }
+    // check validity of input device
+    CHECK(false) << "The device " << env_p_rdma_device << " wasn't found";
+  } else {
+    // set default device
+    str_port_num = get_env_var("RDMA_DEVICE_PORT");
+    CHECK(str_port_num.empty())
+        << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user";
+    for (device_index = 0; device_index < dev_num; device_index++) {
+      // get port_num
+      if (get_dev_active_port_count(dev_list[device_index]) > 0) {
+        num_devs_with_active_port++;
+        CHECK(num_devs_with_active_port <= 1) << ". More than one device with "
+                                                 "active port in the system. "
+                                                 "Please enter RDMA_DEVICE";
+        // found device with at least 1 active port
+        device_to_open = device_index;
+      }
+    }
+    CHECK(num_devs_with_active_port > 0)
+        << "There is no active port in the system";
+    return dev_list[device_to_open];
+  }
+  CHECK(false) << "No device was set!";
+  return NULL;  // never happens
+}
+
+// Function to set port for device.
+// If RDMA_DEVICE_PORT not set, first active port of the device will be set.
+// Args:
+//   context of the device
+// Returns:
+//   port to use
+uint8_t set_port(ibv_context* context) {
+  uint8_t port_num = 0;  // 0 is illegal port number
+  string str_port_num;
+  ibv_device_attr device_att;
+  ibv_port_attr port_attr;
+  int rc, port_index;
+
+  rc = ibv_query_device(context, &device_att);
+  CHECK(!rc) << "Failed to query the device\n";
+
+  str_port_num = get_env_var("RDMA_DEVICE_PORT");
+  // user defined port
+  if (!str_port_num.empty()) {
+    port_num = stoi(str_port_num);
+    CHECK(port_num > 0) << "RDMA_DEVICE_PORT should be positive";
+    CHECK(port_num <= device_att.phys_port_cnt) << "RDMA_DEVICE_PORT should be "
+                                                   "less or equal to amount of "
+                                                   "available ports";
+    rc = ibv_query_port(context, port_num, &port_attr);
+    CHECK(!rc) << "Failed to query the port" << port_num;
+    // check if port id active
+    CHECK(port_attr.state == IBV_PORT_ACTIVE)
+        << "Selected RDMA_DEVICE_PORT is not active";
+  } else {  // set default port
+    for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) {
+      rc = ibv_query_port(context, port_index, &port_attr);
+      CHECK(!rc) << "Failed to query the port" << port_index;
+      if (port_attr.state == IBV_PORT_ACTIVE) {
+        port_num = port_index;
+        break;
+      }
+    }
+    CHECK_GT(port_num, 0) << "No active ports";
+  }
+  return port_num;
+}
+
+// Function read from sysfs file
+// Args:
+//   dir - directory
+//   file - file
+//   buff - buffer for the result
+//   size - buffer size
+// Returns:
+//   number of bytes were read or -1 if failed
+int read_sysfs_file(const char* dir, const char* file, char* buf, size_t size) {
+  char* path;
+  int fd;
+  int len;
+
+  if (asprintf(&path, "%s/%s", dir, file) < 0) return -1;
+
+  fd = open(path, O_RDONLY);
+  if (fd < 0) {
+    free(path);
+    return -1;
+  }
+
+  len = read(fd, buf, size);
+
+  close(fd);
+  free(path);
+
+  if (len > 0 && buf[len - 1] == '\n') buf[--len] = '\0';
+
+  return len;
+}
+
+// Function to check if GID index support RoCE V2
+// Args:
+//   context - device context
+//   port_num - port number
+//   index -  GID index
+// Returns:
+//   if GID supports RoCE V2 - true, otherwise - false.
+bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num,
+                         uint8_t index) {
+  char name[32];
+  char buff[41];
+
+  snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num, index);
+  if (read_sysfs_file(context->device->ibdev_path, name, buff, sizeof(buff)) <=
+      0) {
+    return false;
+  }
+  return !strcmp(buff, RoCE_V2);
+}
+
+// Function to set GID index.
+// If the port link is IB, no GID index should be selected.
+// If Ethernet but RDMA_GID_INDEX not set gid index that supports
+//   RoCE V2 will be chosen(fails if more than one IP is configured)
+// Args:
+//   context - device context
+//   port_num - port number
+// Returns:
+//   GID index to use
+uint8_t set_gid(uint8_t port_num, ibv_context* context) {
+  ibv_port_attr port_attr;
+  string gid_str;
+  int rc, i, gids_num = 0, v2_ip_num = 0;
+  union ibv_gid gid;
+  uint8_t gid_index = 0;
+
+  rc = ibv_query_port(context, port_num, &port_attr);
+  CHECK(!rc) << "Failed to query the port" << port_num;
+
+  for (i = 0; i < port_attr.gid_tbl_len; i++) {
+    rc = ibv_query_gid(context, port_num, i, &gid);
+    CHECK(!rc) << "Failed to query gid to port " << (int)port_num << " index "
+               << i;
+    if (gid.global.interface_id) {
+      gids_num++;
+      if (gid.global.subnet_prefix == 0 &&
+          is_gid_type_roce_v2(context, port_num, i)) {
+        if (v2_ip_num == 0) {
+          // can be overwritten by RDMA_GID_INDEX later
+          gid_index = i;
+        }
+        v2_ip_num++;
+      }
+    }
+  }
+  switch (port_attr.link_layer) {
+    case (IBV_LINK_LAYER_ETHERNET):
+      gid_str = get_env_var("RDMA_GID_INDEX");
+      if (!gid_str.empty()) {
+        gid_index = stoi(gid_str);
+        CHECK(gid_index < gids_num)
+            << "RDMA_GID_INDEX should be less than GIDs amount" << gids_num;
+      } else {
+        CHECK(v2_ip_num <= 1)
+            << "More than one IP is available, please specify GID_INDEX";
+      }
+      break;
+    case (IBV_LINK_LAYER_INFINIBAND):  // no need in GID index
+      break;
+    default:
+      LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and "
+                   "InfiniBand only. ";
+  }
+  if (!is_gid_type_roce_v2(context, port_num, gid_index)) {
+    LOG(INFO) << "RoCE v2 is not configured for GID_INDEX " << (int)gid_index;
+  }
+  return gid_index;
+}
+
+// set the default or environment value to the configuration parameter.
+// Args:
+//   default_val- the default value for this parameter
+//   env_param- the environment parameter's name
+// Returns:
+//   32-bit value
+uint32_t set_param(uint32_t default_val, const char* env_param) {
+  uint32_t val = default_val;
+  string val_s;
+
+  val_s = get_env_var(env_param);
+
+  if (!val_s.empty()) {
+    val = stoi(val_s);
+  }
+  return val;
+}
+
+enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) {
+  ibv_port_attr port_attr;
+  enum ibv_mtu mtu;
+  string mtu_s;
+  int rc, mtu_i;
+
+  rc = ibv_query_port(context, port_num, &port_attr);
+  CHECK(!rc) << "Failed to query the port" << port_num;
+
+  mtu_s = get_env_var("RDMA_MTU");
+
+  if (!mtu_s.empty()) {
+    mtu_i = stoi(mtu_s);
+    switch (mtu_i) {
+      case 256:
+        mtu = IBV_MTU_256;
+        break;
+      case 512:
+        mtu = IBV_MTU_512;
+        break;
+      case 1024:
+        mtu = IBV_MTU_1024;
+        break;
+      case 2048:
+        mtu = IBV_MTU_2048;
+        break;
+      case 4096:
+        mtu = IBV_MTU_4096;
+        break;
+      default:
+        CHECK(0) << "Error: MTU input value must be one of the following: 256, "
+                    "512, 1024, 2048, 4096. MTU "
+                 << mtu << " is invalid\n";
+        break;
+    }
+    CHECK(mtu < port_attr.active_mtu)
+        << "MTU configuration for the QPs is larger than active MTU";
+  } else {
+    mtu = port_attr.active_mtu;
+  }
+  return mtu;
+}
+
+RdmaParams params_init(ibv_context* context) {
+  RdmaParams params;
+
+  params.port_num = set_port(context);
+  params.sgid_index = set_gid(params.port_num, context);
+  params.pkey_index = (uint8_t)set_param(PKEY_DEFAULT, "RDMA_PKEY");
+  params.queue_depth = set_param(QUEUE_DEPTH_DEFAULT, "RDMA_QUEUE_DEPTH");
+  params.timeout = (uint8_t)set_param(TIMEOUT_DEFAULT, "RDMA_TIMEOUT");
+  params.retry_cnt = (uint8_t)set_param(RETRY_CNT_DEFAULT, "RDMA_RETRY_CNT");
+  params.sl = (uint8_t)set_param(SL_DEFAULT, "RDMA_SL");
+  CHECK(params.sl <= 7) << "SL value is " << (int)params.sl
+                        << ". Valid values are 0-7.";
+  params.mtu = set_mtu(params.port_num, context);
+  params.traffic_class = set_param(TRAFFIC_CLASS, "RDMA_TRAFFIC_CLASS");
+  return params;
 }
 
 ibv_pd* alloc_protection_domain(ibv_context* context) {
@@ -85,7 +408,8 @@ ibv_pd* alloc_protection_domain(ibv_context* context) {
 }
 
 RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env)
-    : context_(open_default_device()),
+    : context_(open_device(set_device())),
+      params_(params_init(context_)),
       pd_(alloc_protection_domain(context_)),
       worker_env_(worker_env) {
   event_channel_ = ibv_create_comp_channel(context_);
@@ -242,8 +566,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
     memset(&attr, 0, sizeof(ibv_qp_init_attr));
     attr.send_cq = adapter_->cq_;
     attr.recv_cq = adapter_->cq_;
-    attr.cap.max_send_wr = RdmaAdapter::MAX_CONCURRENT_WRITES;
-    attr.cap.max_recv_wr = RdmaAdapter::MAX_CONCURRENT_WRITES;
+    attr.cap.max_send_wr = adapter_->params_.queue_depth;
+    attr.cap.max_recv_wr = adapter_->params_.queue_depth;
     attr.cap.max_send_sge = 1;
     attr.cap.max_recv_sge = 1;
     attr.qp_type = IBV_QPT_RC;
@@ -257,8 +581,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
     struct ibv_qp_attr attr;
     memset(&attr, 0, sizeof(ibv_qp_attr));
     attr.qp_state = IBV_QPS_INIT;
-    attr.pkey_index = 0;
-    attr.port_num = 1;
+    attr.pkey_index = adapter_->params_.pkey_index;
+    attr.port_num = adapter_->params_.port_num;
     attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
 
     int mask =
@@ -269,13 +593,15 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
   // Local address
   {
     struct ibv_port_attr attr;
-    CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &attr))
+    CHECK(
+        !ibv_query_port(adapter_->context_, adapter_->params_.port_num, &attr))
         << "Query port";
     self_.lid = attr.lid;
     self_.qpn = qp_->qp_num;
     self_.psn = static_cast<uint32_t>(random::New64()) & 0xffffff;
     union ibv_gid gid;
-    CHECK(!ibv_query_gid(adapter_->context_, (uint8_t)1, 0, &gid))
+    CHECK(!ibv_query_gid(adapter_->context_, adapter_->params_.port_num,
+                         adapter_->params_.sgid_index, &gid))
         << "Query gid";
     self_.snp = gid.global.subnet_prefix;
     self_.iid = gid.global.interface_id;
@@ -479,11 +805,9 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     struct ibv_qp_attr attr;
     memset(&attr, 0, sizeof(ibv_qp_attr));
     attr.qp_state = IBV_QPS_RTR;
-    struct ibv_port_attr port_attr;
-    CHECK(!ibv_query_port(adapter_->context_, (uint8_t)1, &port_attr))
-        << "Query port failed";
+
     // This assumes both QP's ports are configured with the same MTU
-    attr.path_mtu = port_attr.active_mtu;
+    attr.path_mtu = adapter_->params_.mtu;
     attr.dest_qp_num = remoteAddr.qpn;
     attr.rq_psn = remoteAddr.psn;
     attr.max_dest_rd_atomic = 1;
@@ -494,9 +818,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.ah_attr.grh.flow_label = 0;
     attr.ah_attr.grh.hop_limit = 255;
     attr.ah_attr.dlid = remoteAddr.lid;
-    attr.ah_attr.sl = 0;
+    attr.ah_attr.sl = adapter_->params_.sl;
     attr.ah_attr.src_path_bits = 0;
-    attr.ah_attr.port_num = 1;
+    attr.ah_attr.port_num = adapter_->params_.port_num;
+    attr.ah_attr.grh.sgid_index = adapter_->params_.sgid_index;
+    attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class;
 
     int r;
     CHECK(!(r = ibv_modify_qp(qp_, &attr,
@@ -509,8 +835,8 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     memset(&attr, 0, sizeof(ibv_qp_attr));
     attr.qp_state = IBV_QPS_RTS;
     attr.sq_psn = self_.psn;
-    attr.timeout = 14;
-    attr.retry_cnt = 7;
+    attr.timeout = adapter_->params_.timeout;
+    attr.retry_cnt = adapter_->params_.retry_cnt;
     attr.rnr_retry = 7; /* infinite */
     attr.max_rd_atomic = 1;
 
diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h
index e1e07db776..00217c81d4 100644
--- a/tensorflow/contrib/verbs/rdma.h
+++ b/tensorflow/contrib/verbs/rdma.h
@@ -36,7 +36,24 @@ limitations under the License.
 #include "tensorflow/core/platform/mutex.h"
 
 namespace tensorflow {
-
+#define PKEY_DEFAULT 0
+#define QUEUE_DEPTH_DEFAULT 1024
+#define TIMEOUT_DEFAULT 14
+#define RETRY_CNT_DEFAULT 7
+#define SL_DEFAULT 0
+#define TRAFFIC_CLASS 0
+
+struct RdmaParams {
+  uint8_t port_num;
+  uint8_t sgid_index;
+  uint8_t pkey_index;
+  uint32_t queue_depth;
+  uint8_t timeout;
+  uint8_t retry_cnt;
+  uint8_t sl;
+  enum ibv_mtu mtu;
+  uint8_t traffic_class;
+};
 // structure to save the address of remote channels.
 struct RdmaAddress {
   uint32_t lid;
@@ -84,6 +101,8 @@ class RdmaAdapter {
  protected:
   static const int MAX_CONCURRENT_WRITES = 1000;
   ibv_context* context_;
+  // RDMA configuration parameters
+  RdmaParams params_;
   // ibverbs protection domain
   ibv_pd* pd_;
   // Completion event channel, to wait for work completions
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index d71f314e11..30ff4ef358 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2710,6 +2710,7 @@ tf_cc_test_mkl(
     srcs = [
         "graph/mkl_layout_pass_test.cc",
         "graph/mkl_tfconversion_pass_test.cc",
+        "util/mkl_util_test.cc",
     ],
     linkstatic = 1,
     deps = [
diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
new file mode 100644
index 0000000000..cd7ec6e551
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
@@ -0,0 +1,47 @@
+op {
+  graph_op_name: "UniqueV2"
+  in_arg {
+    name: "x"
+    description: <<END
+A `Tensor`.
+END
+  }
+  in_arg {
+    name: "axis"
+    description: <<END
+A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
+find the unique elements.
+END
+  }
+  out_arg {
+    name: "y"
+    description: <<END
+A `Tensor`. Unique elements along the `axis` of `Tensor` x.
+END
+  }
+  out_arg {
+    name: "idx"
+    description: <<END
+A 1-D Tensor. Has the same type as x that contains the index of each
+value of x in the output y.
+END
+  }
+  summary: "Finds unique elements in a 1-D tensor."
+  description: <<END
+This operation returns a tensor `y` containing all of the unique elements of `x`
+sorted in the same order that they occur in `x`. This operation also returns a
+tensor `idx` the same size as `x` that contains the index of each value of `x`
+in the unique output `y`. In other words:
+
+`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+
+For example:
+
+```
+# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+y, idx = unique(x)
+y ==> [1, 2, 4, 7, 8]
+idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+```
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
index 0a3355cdbc..77a96d1e03 100644
--- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
@@ -26,6 +26,8 @@ need not be sorted and need not cover all values in the full
 range of valid values.
 
 If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+If the given segment ID `i` is negative, the value is dropped and will not be
+added to the sum of the segment.
 
 `num_segments` should equal the number of distinct segment IDs.
 
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 53e80b1ee3..63b74e8dbf 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -81,7 +81,7 @@ class MklCPUAllocator : public Allocator {
       }
 #if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
       if (user_val > max_mem_bytes) {
-        LOG(WARNING) << "The user specifed a memory limit " << kMaxLimitStr
+        LOG(WARNING) << "The user specified a memory limit " << kMaxLimitStr
                      << "=" << user_val
                      << " greater than available physical memory: "
                      << max_mem_bytes
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h
index 9caa076c72..cc272d156e 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.h
@@ -46,8 +46,8 @@ class GSYCLInterface {
 
     if (!found_device) {
       // Currently Intel GPU is not supported
-      LOG(WARNING) << "No OpenCL GPU found that is supported by ComputeCpp, "
-                      "trying OpenCL CPU";
+      LOG(WARNING) << "No OpenCL GPU found that is supported by "
+                   << "ComputeCpp/triSYCL, trying OpenCL CPU";
     }
 
     for (const auto& device : device_list) {
@@ -58,10 +58,24 @@ class GSYCLInterface {
       }
     }
 
+    if (!found_device) {
+      LOG(WARNING) << "No OpenCL CPU found that is supported by "
+                   << "ComputeCpp/triSYCL, checking for host sycl device";
+    }
+
+    for (const auto& device : device_list) {
+      // triSYCL only supports the host device for now
+      if (device.is_host()) {
+        LOG(WARNING) << "Found SYCL host device";
+        AddDevice(device);
+        found_device = true;
+      }
+    }
+
     if (!found_device) {
       // Currently Intel GPU is not supported
-      LOG(FATAL)
-          << "No OpenCL GPU nor CPU found that is supported by ComputeCpp";
+      LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU"
+                 << " supported by ComputeCPP/triSYCL was found";
     } else {
       LOG(INFO) << "Found following OpenCL devices:";
       for (int i = 0; i < device_list.size(); i++) {
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 87c41186d5..fd1b5d33b9 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -453,6 +453,21 @@ const Edge* Graph::AddControlEdge(Node* source, Node* dest,
   return AddEdge(source, kControlSlot, dest, kControlSlot);
 }
 
+void Graph::RemoveControlEdge(const Edge* e) {
+  if (!e->src_->IsSource() && !e->dst_->IsSink()) {
+    e->dst_->MaybeCopyOnWrite();
+    std::string e_src_name = strings::StrCat("^", e->src_->name());
+    auto* inputs = e->dst_->props_->node_def.mutable_input();
+    for (auto it = inputs->begin(); it != inputs->end(); ++it) {
+      if (*it == e_src_name) {
+        inputs->erase(it);
+        break;
+      }
+    }
+  }
+  RemoveEdge(e);
+}
+
 Status Graph::UpdateEdge(Node* new_src, int new_src_index, Node* dst,
                          int dst_index) {
   TF_RETURN_IF_ERROR(IsValidOutputTensor(new_src, new_src_index));
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index c5dde722fa..223dd12f8f 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -451,6 +451,11 @@ class Graph {
   // REQUIRES: The edge must exist.
   void RemoveEdge(const Edge* edge);
 
+  // Removes control edge `edge` from the graph. Note that this also updates
+  // the corresponding NodeDef to reflect the change.
+  // REQUIRES: The control edge must exist.
+  void RemoveControlEdge(const Edge* e);
+
   // Updates the input to a node.  The existing edge to `dst` is removed and an
   // edge from `new_src` to `dst` is created. The NodeDef associated with `dst`
   // is also updated.
diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index b9e3cba035..1924c05d3d 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -117,7 +117,7 @@ DataType EdgeType(const Edge* e) {
   }
 }
 
-// Return true iff we need to add a same device send/recv for 'edge'.
+// Return true iff we need to add the same device send/recv for 'edge'.
 bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) {
   if (edge->IsControlEdge()) {
     return false;
@@ -1116,7 +1116,7 @@ Status Partition(const PartitionOptions& opts, Graph* g,
         // before the data is available.
         AddInput(real_recv, send->name(), Graph::kControlSlot);
       } else if (control_flow_edge != nullptr) {
-        // Redirect control edge to the real recv since this is not a same
+        // Redirect control edge to the real recv since this is not the same
         // device send/recv.
         --num_control_flow_edges;
         AddInput(real_recv, control_flow_edge->src()->name(),
diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc
index 7686cef219..e2ce0ba046 100644
--- a/tensorflow/core/graph/graph_test.cc
+++ b/tensorflow/core/graph/graph_test.cc
@@ -118,6 +118,23 @@ class GraphTest : public ::testing::Test {
     LOG(FATAL) << name;
   }
 
+  bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, const Node* dst) {
+    for (const Edge* e : dst->in_edges()) {
+      if (e->IsControlEdge() && e->src() == src &&
+          e->src_output() == Graph::kControlSlot &&
+          e->dst_input() == Graph::kControlSlot) {
+        return true;
+      }
+    }
+    std::string control_edge_name = strings::StrCat("^", src->name());
+    for (int i = 0; i < dst->def().input_size(); ++i) {
+      if (dst->def().input(i) == control_edge_name) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   Graph graph_;
 
  private:
@@ -458,8 +475,8 @@ TEST_F(GraphTest, AddControlEdge) {
   EXPECT_TRUE(edge == nullptr);
   EXPECT_EQ(b->def().input_size(), 2);
 
-  // Can add redundant control edge with create_duplicate.
-  edge = graph_.AddControlEdge(a, b, /*create_duplicate=*/true);
+  // Can add redundant control edge with allow_duplicates.
+  edge = graph_.AddControlEdge(a, b, /*allow_duplicates=*/true);
   EXPECT_TRUE(edge != nullptr);
   // create_duplicate causes the NodeDef not to be updated.
   ASSERT_EQ(b->def().input_size(), 2);
@@ -477,6 +494,47 @@ TEST_F(GraphTest, AddControlEdge) {
   EXPECT_EQ(b->def().input_size(), 2);
 }
 
+TEST_F(GraphTest, RemoveControlEdge) {
+  FromGraphDef(
+      "node { name: 'A' op: 'OneOutput' }"
+      "node { name: 'B' op: 'OneInputTwoOutputs' input: [ 'A:0' ] }"
+      "node { name: 'C' op: 'NoOp' } ");
+  Node* a = FindNode("A");
+  Node* b = FindNode("B");
+  Node* c = FindNode("C");
+
+  // Add a control edge.
+  const Edge* edge_1 = graph_.AddControlEdge(c, a);
+  const Edge* edge_2 = graph_.AddControlEdge(a, b);
+  ASSERT_TRUE(edge_1 != nullptr);
+  ASSERT_TRUE(edge_2 != nullptr);
+
+  ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(c, a));
+  ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b));
+
+  graph_.RemoveControlEdge(edge_1);
+  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a));
+  ASSERT_TRUE(ControlEdgeExistsInGraphOrNodeDef(a, b));
+
+  graph_.RemoveControlEdge(edge_2);
+  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a));
+  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(a, b));
+
+  // Test removing a duplicate control edge.
+  // Note that unless allow_duplicates is true, the duplicate edge
+  // will not be added. That's why we expect edge_4 to be a null
+  // pointer. We are not testing with allow_duplicates set to true,
+  // as that is a highly unlikely use case that does not make much
+  // sense.
+  const Edge* edge_3 = graph_.AddControlEdge(c, a);
+  const Edge* edge_4 = graph_.AddControlEdge(c, a);
+  ASSERT_TRUE(edge_3 != nullptr);
+  ASSERT_TRUE(edge_4 == nullptr);
+
+  graph_.RemoveControlEdge(edge_3);
+  ASSERT_TRUE(!ControlEdgeExistsInGraphOrNodeDef(c, a));
+}
+
 TEST_F(GraphTest, UpdateEdge) {
   // Build a little graph
   Node* a = FromNodeDef("A", "OneOutput", 0);
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index fe4588389e..3fd89e2b66 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -68,7 +68,7 @@ namespace tensorflow {
 // take place before we hit the op. For this, we add a new op before each
 // element-wise MKL op to deal with the inputs, called _MklInputConversion.
 // This pass has been enhanced to add this capability.
-// 
+//
 // The _MklInputConversion op will check the inputs to the elementwise op and
 // make sure that either both are in MKL format or both are in TF format,
 // depending on their initial state and whether broadcast is needed or not.
diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index 5df190ba01..95bc5044d0 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -58,6 +58,12 @@ class GraphProperties {
   const std::vector<OpInfo::TensorProperties>& GetOutputProperties(
       const string& node_name) const;
 
+  static void FillTensorPropertiesFromContext(
+      const shape_inference::ShapeHandle&, const DataType&,
+      shape_inference::InferenceContext*,
+      std::unordered_map<const shape_inference::Dimension*, int>* dim_ids,
+      OpInfo::TensorProperties*);
+
  private:
   // Inputs
   GrapplerItem item_;
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 7fd1876371..9ab889beb5 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -62,7 +62,7 @@ const std::set<NodeDef*>& NodeMap::GetOutputs(const string& node_name) const {
 void NodeMap::AddNode(const string& name, NodeDef* node) {
   auto ret = nodes_.insert(std::make_pair(name, node));
   CHECK(ret.second) << "Pair (" << name << "," << node
-                    << ") is not inserted because a same key already exists.";
+                    << ") is not inserted because the same key already exists.";
 }
 
 void NodeMap::AddOutput(const string& node_name, const string& output_name) {
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index d7b457eab7..b4a5a3c796 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -929,6 +929,25 @@ tf_cc_test(
     ],
 )
 
+tf_cuda_cc_test(
+    name = "bincount_op_test",
+    size = "small",
+    srcs = ["bincount_op_test.cc"],
+    deps = [
+        ":bincount_op",
+        ":ops_testutil",
+        ":ops_util",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_cuda_cc_test(
     name = "constant_op_test",
     size = "small",
@@ -1617,7 +1636,10 @@ DYNAMIC_DEPS = [
 tf_kernel_library(
     name = "dynamic_partition_op",
     prefix = "dynamic_partition_op",
-    deps = DYNAMIC_DEPS,
+    deps = DYNAMIC_DEPS + [
+        ":fill_functor",
+        ":gather_functor",
+    ] + if_cuda(["@cub_archive//:cub"]),
 )
 
 tf_kernel_library(
@@ -1687,7 +1709,7 @@ tf_kernel_library(
     ],
 )
 
-tf_cc_tests(
+tf_cuda_cc_tests(
     name = "dynamic_op_test",
     size = "small",
     srcs = [
@@ -1698,6 +1720,7 @@ tf_cc_tests(
         ":data_flow",
         ":ops_testutil",
         ":ops_util",
+        "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -2572,8 +2595,9 @@ tf_kernel_library(
 
 tf_kernel_library(
     name = "bucketize_op",
+    gpu_srcs = ["cuda_device_array.h"],
     prefix = "bucketize_op",
-    deps = MATH_DEPS,
+    deps = ARRAY_DEPS,
 )
 
 tf_kernel_library(
@@ -3174,7 +3198,7 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//third_party/eigen3",
-    ],
+    ] + if_cuda(["@cub_archive//:cub"]),
 )
 
 tf_kernel_library(
diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc
index af629d0de8..f918023693 100644
--- a/tensorflow/core/kernels/avgpooling_op.cc
+++ b/tensorflow/core/kernels/avgpooling_op.cc
@@ -153,7 +153,8 @@ class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> {
     if (data_format_ == FORMAT_NCHW) {
       DnnPoolingOp<T>::Compute(
           context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
-          stride_, padding_, data_format_, tensor_in, output_shape);
+          stride_, padding_, data_format_, tensor_in, output_shape,
+          /*propagate_nans=*/false);
     } else {
       Tensor* output = nullptr;
       OP_REQUIRES_OK(context,
@@ -408,7 +409,7 @@ class AvgPoolingGradOp<GPUDevice, T> : public OpKernel {
     DnnPoolingGradOp<T>::Compute(
         context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
         stride_, padding_, data_format_, nullptr, nullptr, out_backprop,
-        output_shape);
+        output_shape, /*propagate_nans=*/false);
   }
 
  private:
@@ -532,7 +533,7 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
       DnnPoolingGradOp<T>::Compute(
           context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
           stride_, padding_, data_format_, nullptr, nullptr, out_backprop,
-          output_shape);
+          output_shape, /*propagate_nans=*/false);
     }
   }
 
diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc
index 1cd5943ef3..890fa3121b 100644
--- a/tensorflow/core/kernels/bincount_op.cc
+++ b/tensorflow/core/kernels/bincount_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
+#include "tensorflow/core/kernels/bincount_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/types.h"
@@ -27,46 +28,37 @@ namespace tensorflow {
 
 using thread::ThreadPool;
 
-template <typename T>
-class BincountOp : public OpKernel {
- public:
-  explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
 
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& arr_t = ctx->input(0);
-    const Tensor& size_tensor = ctx->input(1);
-    const Tensor& weights_t = ctx->input(2);
-    int32 size = size_tensor.scalar<int32>()();
-    OP_REQUIRES(
-        ctx, size >= 0,
-        errors::InvalidArgument("size (", size, ") must be non-negative"));
-    const bool has_weights = weights_t.NumElements() > 0;
-    OP_REQUIRES(ctx, !(has_weights && arr_t.shape() != weights_t.shape()),
-                errors::InvalidArgument(
-                    "If weights are passed, they must have the same shape (" +
-                    weights_t.shape().DebugString() + ") as arr (" +
-                    arr_t.shape().DebugString() + ")"));
-    const auto arr = arr_t.flat<int32>();
-    const auto weights = weights_t.flat<T>();
+namespace functor {
+
+template <typename T>
+struct BincountFunctor<CPUDevice, T> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<int32, 1>::ConstTensor& arr,
+                        const typename TTypes<T, 1>::ConstTensor& weights,
+                        typename TTypes<T, 1>::Tensor& output) {
+    int size = output.size();
 
     Tensor all_nonneg_t;
-    OP_REQUIRES_OK(ctx,
-                   ctx->allocate_temp(DT_BOOL, TensorShape({}), &all_nonneg_t,
-                                      AllocatorAttributes()));
-    all_nonneg_t.scalar<bool>().device(ctx->eigen_cpu_device()) =
+    TF_RETURN_IF_ERROR(context->allocate_temp(
+        DT_BOOL, TensorShape({}), &all_nonneg_t, AllocatorAttributes()));
+    all_nonneg_t.scalar<bool>().device(context->eigen_cpu_device()) =
         (arr >= 0).all();
-    OP_REQUIRES(ctx, all_nonneg_t.scalar<bool>()(),
-                errors::InvalidArgument("Input arr must be non-negative!"));
+    if (!all_nonneg_t.scalar<bool>()()) {
+      return errors::InvalidArgument("Input arr must be non-negative!");
+    }
 
     // Allocate partial output bin sums for each worker thread. Worker ids in
     // ParallelForWithWorkerId range from 0 to NumThreads() inclusive.
     ThreadPool* thread_pool =
-        ctx->device()->tensorflow_cpu_worker_threads()->workers;
+        context->device()->tensorflow_cpu_worker_threads()->workers;
     const int64 num_threads = thread_pool->NumThreads() + 1;
     Tensor partial_bins_t;
-    OP_REQUIRES_OK(ctx, ctx->allocate_temp(weights_t.dtype(),
-                                           TensorShape({num_threads, size}),
-                                           &partial_bins_t));
+    TF_RETURN_IF_ERROR(context->allocate_temp(DataTypeToEnum<T>::value,
+                                              TensorShape({num_threads, size}),
+                                              &partial_bins_t));
     auto partial_bins = partial_bins_t.matrix<T>();
     partial_bins.setZero();
     thread_pool->ParallelForWithWorkerId(
@@ -75,7 +67,7 @@ class BincountOp : public OpKernel {
           for (int64 i = start_ind; i < limit_ind; i++) {
             int32 value = arr(i);
             if (value < size) {
-              if (has_weights) {
+              if (weights.size()) {
                 partial_bins(worker_id, value) += weights(i);
               } else {
                 // Complex numbers don't support "++".
@@ -84,25 +76,63 @@ class BincountOp : public OpKernel {
             }
           }
         });
-    TensorShape output_shape({size});
-    Tensor* output_t;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_t));
+
     // Sum the partial bins along the 0th axis.
     Eigen::array<int, 1> reduce_dims({0});
-    output_t->flat<T>().device(ctx->eigen_cpu_device()) =
-        partial_bins.sum(reduce_dims);
+    output.device(context->eigen_cpu_device()) = partial_bins.sum(reduce_dims);
+    return Status::OK();
+  }
+};
+
+}  // namespace functor
+
+template <typename Device, typename T>
+class BincountOp : public OpKernel {
+ public:
+  explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& arr_t = ctx->input(0);
+    const Tensor& size_tensor = ctx->input(1);
+    const Tensor& weights_t = ctx->input(2);
+
+    int32 size = size_tensor.scalar<int32>()();
+    OP_REQUIRES(
+        ctx, size >= 0,
+        errors::InvalidArgument("size (", size, ") must be non-negative"));
+
+    const auto arr = arr_t.flat<int32>();
+    const auto weights = weights_t.flat<T>();
+    Tensor* output_t;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(0, TensorShape({size}), &output_t));
+    auto output = output_t->flat<T>();
+    OP_REQUIRES_OK(ctx, functor::BincountFunctor<Device, T>::Compute(
+                            ctx, arr, weights, output));
   }
 };
 
-#define REGISTER(TYPE)                                               \
+#define REGISTER_KERNELS(type)                                       \
   REGISTER_KERNEL_BUILDER(                                           \
-      Name("Bincount").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
-      BincountOp<TYPE>)
+      Name("Bincount").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
+      BincountOp<CPUDevice, type>)
+
+TF_CALL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#if GOOGLE_CUDA
+
+#define REGISTER_KERNELS(type)                            \
+  REGISTER_KERNEL_BUILDER(Name("Bincount")                \
+                              .Device(DEVICE_GPU)         \
+                              .HostMemory("size")         \
+                              .TypeConstraint<type>("T"), \
+                          BincountOp<GPUDevice, type>)
 
-TF_CALL_NUMBER_TYPES(REGISTER);
+TF_CALL_int32(REGISTER_KERNELS);
+TF_CALL_float(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
 
-// TODO(ringwalt): Add a GPU implementation. We probably want to take a
-// different approach, e.g. threads in a warp each taking a pass over the same
-// data, and each thread summing a single bin.
+#endif  // GOOGLE_CUDA
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h
new file mode 100644
index 0000000000..cd3d560cd1
--- /dev/null
+++ b/tensorflow/core/kernels/bincount_op.h
@@ -0,0 +1,41 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_BINCOUNT_OP_H_
+#define TENSORFLOW_BINCOUNT_OP_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+
+namespace functor {
+
+template <typename Device, typename T>
+struct BincountFunctor {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<int32, 1>::ConstTensor& arr,
+                        const typename TTypes<T, 1>::ConstTensor& weights,
+                        typename TTypes<T, 1>::Tensor& output);
+};
+
+}  // end namespace functor
+
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_BINCOUNT_OP_H_
diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
new file mode 100644
index 0000000000..6074b3e1f6
--- /dev/null
+++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
@@ -0,0 +1,114 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "external/cub_archive/cub/device/device_histogram.cuh"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/bincount_op.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
+template <typename T>
+struct BincountFunctor<GPUDevice, T> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<int32, 1>::ConstTensor& arr,
+                        const typename TTypes<T, 1>::ConstTensor& weights,
+                        typename TTypes<T, 1>::Tensor& output) {
+    if (weights.size() != 0) {
+      return errors::InvalidArgument(
+          "Weights should not be passed as it should be "
+          "handled by unsorted_segment_sum");
+    }
+    if (output.size() == 0) {
+      return Status::OK();
+    }
+    // In case weight.size() == 0, use CUB
+    size_t temp_storage_bytes = 0;
+    const int32* d_samples = arr.data();
+    T* d_histogram = output.data();
+    int num_levels = output.size() + 1;
+    int32 lower_level = 0;
+    int32 upper_level = output.size();
+    int num_samples = arr.size();
+    const cudaStream_t& stream = GetCudaStream(context);
+
+    // The first HistogramEven is to obtain the temp storage size required
+    // with d_temp_storage = NULL passed to the call.
+    auto err = cub::DeviceHistogram::HistogramEven(
+        /* d_temp_storage */ NULL,
+        /* temp_storage_bytes */ temp_storage_bytes,
+        /* d_samples */ d_samples,
+        /* d_histogram */ d_histogram,
+        /* num_levels */ num_levels,
+        /* lower_level */ lower_level,
+        /* upper_level */ upper_level,
+        /* num_samples */ num_samples,
+        /* stream */ stream);
+    if (err != cudaSuccess) {
+      return errors::Internal(
+          "Could not launch HistogramEven to get temp storage: ",
+          cudaGetErrorString(err), ".");
+    }
+    Tensor temp_storage;
+    TF_RETURN_IF_ERROR(context->allocate_temp(
+        DataTypeToEnum<int8>::value,
+        TensorShape({static_cast<int64>(temp_storage_bytes)}), &temp_storage));
+
+    void* d_temp_storage = temp_storage.flat<int8>().data();
+    // The second HistogramEven is to actual run with d_temp_storage
+    // allocated with temp_storage_bytes.
+    err = cub::DeviceHistogram::HistogramEven(
+        /* d_temp_storage */ d_temp_storage,
+        /* temp_storage_bytes */ temp_storage_bytes,
+        /* d_samples */ d_samples,
+        /* d_histogram */ d_histogram,
+        /* num_levels */ num_levels,
+        /* lower_level */ lower_level,
+        /* upper_level */ upper_level,
+        /* num_samples */ num_samples,
+        /* stream */ stream);
+    if (err != cudaSuccess) {
+      return errors::Internal(
+          "Could not launch HistogramEven: ", cudaGetErrorString(err), ".");
+    }
+    return Status::OK();
+  }
+};
+
+}  // end namespace functor
+
+#define REGISTER_GPU_SPEC(type) \
+  template struct functor::BincountFunctor<GPUDevice, type>;
+
+TF_CALL_int32(REGISTER_GPU_SPEC);
+TF_CALL_float(REGISTER_GPU_SPEC);
+#undef REGISTER_GPU_SPEC
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc
new file mode 100644
index 0000000000..cb04b40637
--- /dev/null
+++ b/tensorflow/core/kernels/bincount_op_test.cc
@@ -0,0 +1,75 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace tensorflow {
+
+static Graph* Bincount(int arr_size, int nbins) {
+  Graph* g = new Graph(OpRegistry::Global());
+
+  Tensor arr(DT_INT32, TensorShape({arr_size}));
+  arr.flat<int32>() = arr.flat<int32>().setRandom().abs();
+
+  Tensor size(DT_INT32, TensorShape({static_cast<int32>(1)}));
+  size.flat<int32>()(0) = static_cast<int32>(nbins);
+
+  Tensor weights(DT_INT32, TensorShape({0}));
+
+  Node* node;
+  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Bincount")
+                  .Input(test::graph::Constant(g, arr))
+                  .Input(test::graph::Constant(g, size))
+                  .Input(test::graph::Constant(g, weights))
+                  .Attr("T", DT_INT32)
+                  .Finalize(g, &node));
+  return g;
+}
+
+#define BM_BincountDev(K, NBINS, type)                             \
+  static void BM_Bincount##_##type##_##K##_##NBINS(int iters) {    \
+    testing::ItemsProcessed(static_cast<int64>(iters) * K * 1024); \
+    test::Benchmark(#type, Bincount(K * 1024, NBINS)).Run(iters);  \
+  }                                                                \
+  BENCHMARK(BM_Bincount##_##type##_##K##_##NBINS);
+
+BM_BincountDev(32, 1000, cpu);
+BM_BincountDev(32, 2000, cpu);
+BM_BincountDev(32, 5000, cpu);
+BM_BincountDev(64, 1000, cpu);
+BM_BincountDev(64, 2000, cpu);
+BM_BincountDev(64, 5000, cpu);
+BM_BincountDev(128, 1000, cpu);
+BM_BincountDev(128, 2000, cpu);
+BM_BincountDev(128, 5000, cpu);
+
+BM_BincountDev(32, 1000, gpu);
+BM_BincountDev(32, 2000, gpu);
+BM_BincountDev(32, 5000, gpu);
+BM_BincountDev(64, 1000, gpu);
+BM_BincountDev(64, 2000, gpu);
+BM_BincountDev(64, 5000, gpu);
+BM_BincountDev(128, 1000, gpu);
+BM_BincountDev(128, 2000, gpu);
+BM_BincountDev(128, 5000, gpu);
+
+}  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/bucketize_op.cc b/tensorflow/core/kernels/bucketize_op.cc
index 93c2d01221..c1693de538 100644
--- a/tensorflow/core/kernels/bucketize_op.cc
+++ b/tensorflow/core/kernels/bucketize_op.cc
@@ -15,15 +15,43 @@ limitations under the License.
 
 // See docs in ../ops/math_ops.cc.
 
-#include <algorithm>
-#include <vector>
-
+#include "tensorflow/core/kernels/bucketize_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
+using thread::ThreadPool;
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
 template <typename T>
+struct BucketizeFunctor<CPUDevice, T> {
+  // PRECONDITION: boundaries_vector must be sorted.
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& input,
+                        const std::vector<float>& boundaries_vector,
+                        typename TTypes<int32, 1>::Tensor& output) {
+    const int N = input.size();
+    for (int i = 0; i < N; i++) {
+      auto first_bigger_it = std::upper_bound(
+          boundaries_vector.begin(), boundaries_vector.end(), input(i));
+      output(i) = first_bigger_it - boundaries_vector.begin();
+    }
+
+    return Status::OK();
+  }
+};
+}  // namespace functor
+
+template <typename Device, typename T>
 class BucketizeOp : public OpKernel {
  public:
   explicit BucketizeOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -34,36 +62,42 @@ class BucketizeOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input_tensor = context->input(0);
-    auto input = input_tensor.flat<T>();
+    const auto input = input_tensor.flat<T>();
+
     Tensor* output_tensor = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
                                                      &output_tensor));
     auto output = output_tensor->template flat<int32>();
-
-    const int N = input.size();
-    for (int i = 0; i < N; i++) {
-      output(i) = CalculateBucketIndex(input(i));
-    }
+    OP_REQUIRES_OK(context, functor::BucketizeFunctor<Device, T>::Compute(
+                                context, input, boundaries_, output));
   }
 
  private:
-  int32 CalculateBucketIndex(const T value) {
-    auto first_bigger_it =
-        std::upper_bound(boundaries_.begin(), boundaries_.end(), value);
-    return first_bigger_it - boundaries_.begin();
-  }
   std::vector<float> boundaries_;
 };
 
 #define REGISTER_KERNEL(T)                                         \
   REGISTER_KERNEL_BUILDER(                                         \
       Name("Bucketize").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-      BucketizeOp<T>);
+      BucketizeOp<CPUDevice, T>);
+
+REGISTER_KERNEL(int32);
+REGISTER_KERNEL(int64);
+REGISTER_KERNEL(float);
+REGISTER_KERNEL(double);
+#undef REGISTER_KERNEL
+
+#if GOOGLE_CUDA
+#define REGISTER_KERNEL(T)                                         \
+  REGISTER_KERNEL_BUILDER(                                         \
+      Name("Bucketize").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      BucketizeOp<GPUDevice, T>);
 
 REGISTER_KERNEL(int32);
 REGISTER_KERNEL(int64);
 REGISTER_KERNEL(float);
 REGISTER_KERNEL(double);
 #undef REGISTER_KERNEL
+#endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/bucketize_op.h b/tensorflow/core/kernels/bucketize_op.h
new file mode 100644
index 0000000000..c8e461beb9
--- /dev/null
+++ b/tensorflow/core/kernels/bucketize_op.h
@@ -0,0 +1,41 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_BUCKETIZE_OP_H_
+#define TENSORFLOW_BUCKETIZE_OP_H_
+
+#include <vector>
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace functor {
+
+template <typename Device, typename T>
+struct BucketizeFunctor {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& input,
+                        const std::vector<float>& boundaries_vector,
+                        typename TTypes<int32, 1>::Tensor& output);
+};
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_BUCKETIZE_OP_H_
diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
new file mode 100644
index 0000000000..325dee793b
--- /dev/null
+++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
@@ -0,0 +1,101 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/bucketize_op.h"
+#include "tensorflow/core/kernels/cuda_device_array.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+template <typename T>
+__global__ void BucketizeCustomKernel(
+    const int32 size_in, const T* in, const int32 size_boundaries,
+    CudaDeviceArrayStruct<float> boundaries_array, int32* out) {
+  const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array);
+  CUDA_1D_KERNEL_LOOP(i, size_in) {
+    T value = in[i];
+    int32 bucket = 0;
+    int32 count = size_boundaries;
+    while (count > 0) {
+      int32 l = bucket;
+      int32 step = count / 2;
+      l += step;
+      if (!(value < static_cast<T>(boundaries[l]))) {
+        bucket = ++l;
+        count -= step + 1;
+      } else {
+        count = step;
+      }
+    }
+    out[i] = bucket;
+  }
+}
+
+namespace functor {
+
+template <typename T>
+struct BucketizeFunctor<GPUDevice, T> {
+  // PRECONDITION: boundaries_vector must be sorted.
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<T, 1>::ConstTensor& input,
+                        const std::vector<float>& boundaries_vector,
+                        typename TTypes<int32, 1>::Tensor& output) {
+    const GPUDevice& d = context->eigen_device<GPUDevice>();
+
+    CudaDeviceArrayOnHost<float> boundaries_array(context,
+                                                  boundaries_vector.size());
+    TF_RETURN_IF_ERROR(boundaries_array.Init());
+    for (int i = 0; i < boundaries_vector.size(); ++i) {
+      boundaries_array.Set(i, boundaries_vector[i]);
+    }
+    TF_RETURN_IF_ERROR(boundaries_array.Finalize());
+
+    CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d);
+    BucketizeCustomKernel<T>
+        <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+            input.size(), input.data(), boundaries_vector.size(),
+            boundaries_array.data(), output.data());
+
+    return Status::OK();
+  }
+};
+}  // namespace functor
+
+#define REGISTER_GPU_SPEC(type) \
+  template struct functor::BucketizeFunctor<GPUDevice, type>;
+
+REGISTER_GPU_SPEC(int32);
+REGISTER_GPU_SPEC(int64);
+REGISTER_GPU_SPEC(float);
+REGISTER_GPU_SPEC(double);
+#undef REGISTER_GPU_SPEC
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index 21f5cb1716..c2d24d1f12 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -236,6 +236,7 @@ class Conv3DBackpropInputOp : public OpKernel {
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("Conv3DBackpropInputV2").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       Conv3DBackpropInputOp<CPUDevice, T>);
+TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -383,6 +384,7 @@ class Conv3DBackpropFilterOp : public OpKernel {
                               .Device(DEVICE_CPU)                             \
                               .TypeConstraint<T>("T"),                        \
                           Conv3DBackpropFilterOp<CPUDevice, T>);
+TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -409,6 +411,7 @@ namespace functor {
       const std::array<int, 3>& padding_right,                        \
       typename TTypes<T, 5, int>::Tensor out, TensorFormat format);
 
+DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 #undef DECLARE_GPU_SPEC
 }  // namespace functor
@@ -1098,22 +1101,27 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
   bool cudnn_use_autotune_;
 };
 
-REGISTER_KERNEL_BUILDER(
-    Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint<float>("T"),
-    Conv3DBackpropInputOp<GPUDevice, float>);
-REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<float>("T")
-                            .HostMemory("input_sizes"),
-                        Conv3DBackpropInputOp<GPUDevice, float>);
-REGISTER_KERNEL_BUILDER(
-    Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<float>("T"),
-    Conv3DBackpropFilterOp<GPUDevice, float>);
-REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<float>("T")
-                            .HostMemory("filter_sizes"),
-                        Conv3DBackpropFilterOp<GPUDevice, float>);
+#define REGISTER_GPU_KERNEL(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint<T>("T"),  \
+      Conv3DBackpropInputOp<GPUDevice, T>);                                   \
+  REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2")                       \
+                              .Device(DEVICE_GPU)                             \
+                              .TypeConstraint<T>("T")                         \
+                              .HostMemory("input_sizes"),                     \
+                          Conv3DBackpropInputOp<GPUDevice, T>);               \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      Conv3DBackpropFilterOp<GPUDevice, T>);                                  \
+  REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2")                      \
+                              .Device(DEVICE_GPU)                             \
+                              .TypeConstraint<T>("T")                         \
+                              .HostMemory("filter_sizes"),                    \
+                          Conv3DBackpropFilterOp<GPUDevice, T>);
+TF_CALL_half(REGISTER_GPU_KERNEL);
+TF_CALL_float(REGISTER_GPU_KERNEL);
+#undef REGISTER_GPU_KERNEL
+
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index 8a89d564de..37cb67bc51 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -145,6 +145,7 @@ class Conv3DOp : public BinaryOp<T> {
   REGISTER_KERNEL_BUILDER(                                      \
       Name("Conv3D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       Conv3DOp<CPUDevice, T>);
+TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -482,12 +483,16 @@ namespace functor {
       const std::array<int, 3>& padding_right,                        \
       typename TTypes<T, 5, int>::Tensor out, TensorFormat format);
 
+DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 #undef DECLARE_GPU_SPEC
 
 }  // namespace functor
 
 // Registration of the GPU implementations.
+REGISTER_KERNEL_BUILDER(
+    Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
+    Conv3DOp<GPUDevice, Eigen::half>);
 REGISTER_KERNEL_BUILDER(
     Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<float>("T"),
     Conv3DOp<GPUDevice, float>);
diff --git a/tensorflow/core/kernels/cwise_op_acosh.cc b/tensorflow/core/kernels/cwise_op_acosh.cc
index 7bdd8d22a3..39c8814073 100644
--- a/tensorflow/core/kernels/cwise_op_acosh.cc
+++ b/tensorflow/core/kernels/cwise_op_acosh.cc
@@ -20,16 +20,8 @@ namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "Acosh", functor::acosh, float, double,
           complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-                          Name("Acosh")                               \
-                          .Device(DEVICE_SYCL)                        \
-                          .TypeConstraint<TYPE>("T"),                 \
-                          UnaryOp<SYCLDevice, functor::acosh<TYPE>>);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-#undef REGISTER_SYCL_KERNEL
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER2(UnaryOp, SYCL, "Acosh", functor::acosh, float, double);
 #endif // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index e0644323c0..a7673afd0b 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -20,17 +20,9 @@ namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double,
           complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-                          Name("Asinh")                               \
-                          .Device(DEVICE_SYCL)                        \
-                          .TypeConstraint<TYPE>("T"),                 \
-                          UnaryOp<SYCLDevice, functor::asinh<TYPE>>);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-#undef REGISTER_SYCL_KERNEL
-#endif // TENSORFLOW_USE_SYC
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double);
+#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc
index 058f5140c5..7b688db4c5 100644
--- a/tensorflow/core/kernels/cwise_op_atanh.cc
+++ b/tensorflow/core/kernels/cwise_op_atanh.cc
@@ -20,17 +20,9 @@ namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double,
           complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-                          Name("Atanh")                               \
-                          .Device(DEVICE_SYCL)                        \
-                          .TypeConstraint<TYPE>("T"),                 \
-                          UnaryOp<SYCLDevice, functor::atanh<TYPE>>);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-#undef REGISTER_SYCL_KERNEL
-#endif // TENSORFLOW_USE_SYC
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double);
+#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double);
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index 6c22b124de..d32185b6bf 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -49,7 +49,11 @@ template <typename T>
 struct scalar_asinh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const {
+#if EIGEN_HAS_CXX11_MATH
+    return numext::asinh(a);
+#else
     return std::asinh(a);
+#endif  // EIGEN_HAS_CXX11_MATH
   }
 };
 template <typename T>
@@ -61,7 +65,11 @@ template <typename T>
 struct scalar_acosh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_acosh_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const {
+#if EIGEN_HAS_CXX11_MATH
+    return numext::acosh(a);
+#else
     return std::acosh(a);
+#endif  // EIGEN_HAS_CXX11_MATH
   }
 };
 template <typename T>
@@ -73,7 +81,11 @@ template <typename T>
 struct scalar_atanh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_atanh_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a) const {
+#if EIGEN_HAS_CXX11_MATH
+    return numext::atanh(a);
+#else
     return std::atanh(a);
+#endif  // EIGEN_HAS_CXX11_MATH
   }
 };
 template <typename T>
diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
index 9804d7d38e..9347978d51 100644
--- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
@@ -231,7 +231,8 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args,
       }
       // Pad to vector-register width (if needed).
       for (int64 d = 0; d < pad_size; ++d) {
-        buffer[buf_base + vectorized_size + scalar_size + d] = 0;
+        buffer[buf_base + vectorized_size + scalar_size + d] =
+            static_cast<T>(0);
       }
     }
   }
@@ -297,7 +298,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args,
 
   for (int i = 0; i < output_vectorized_size; i += kPacketSize) {
     // Reset accumulator.
-    auto vaccum = Eigen::internal::pset1<Packet>(0);
+    auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
     for (int j = 0; j < filter_spatial_size; ++j) {
       // Calculate index.
       const int64 index = i + j * padded_filter_inner_dim_size;
@@ -318,7 +319,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args,
   }
 
   if (output_scalar_size > 0) {
-    auto vaccum = Eigen::internal::pset1<Packet>(0);
+    auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
     for (int j = 0; j < filter_spatial_size; ++j) {
       const int64 index =
           output_vectorized_size + j * padded_filter_inner_dim_size;
@@ -346,7 +347,7 @@ static void ComputeBackpropInput(const DepthwiseArgs& args,
   if (depth_multiplier > 1) {
     for (int64 d = 0; d < in_depth; ++d) {
       const int64 index = d * args.depth_multiplier;
-      T accum = 0;
+      T accum = static_cast<T>(0);
       for (int64 dm = 0; dm < dm_vectorized_size; dm += kPacketSize) {
         const auto v = Eigen::internal::ploadu<Packet>(out_buffer + index + dm);
         accum += Eigen::internal::predux(v);
@@ -510,6 +511,8 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
+extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice,
+                                                          Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, double>;
 
@@ -884,6 +887,8 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
+extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice,
+                                                           Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, double>;
 
diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index bbeeaf7895..7c43dcb670 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -94,7 +94,7 @@ struct DepthwiseConv2DKernel {
 
     for (int i = 0; i < output_vectorized_size; i += kPacketSize) {
       // Reset accumulator.
-      auto vaccum = Eigen::internal::pset1<Packet>(0);
+      auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
       for (int j = 0; j < filter_spatial_size; ++j) {
         // Calculate index.
         const int64 index = i + j * padded_filter_inner_dim_size;
@@ -115,7 +115,7 @@ struct DepthwiseConv2DKernel {
     }
 
     if (output_scalar_size > 0) {
-      auto vaccum = Eigen::internal::pset1<Packet>(0);
+      auto vaccum = Eigen::internal::pset1<Packet>(static_cast<T>(0));
       for (int j = 0; j < filter_spatial_size; ++j) {
         const int64 index =
             output_vectorized_size + j * padded_filter_inner_dim_size;
@@ -246,6 +246,7 @@ extern template class LaunchConv2DOp<CPUDevice, float>;
 #if GOOGLE_CUDA
 
 // Extern template instantiated in depthwise_conv_op_gpu.cc.
+extern template struct LaunchDepthwiseConvOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvOp<GPUDevice, double>;
 
@@ -419,12 +420,18 @@ class DepthwiseConv2dNativeOp : public BinaryOp<T> {
       Name("DepthwiseConv2dNative").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       DepthwiseConv2dNativeOp<CPUDevice, T>);
 
+TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 #if !defined(PLATFORM_WINDOWS) || !defined(_DEBUG)
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #endif
 
 #if GOOGLE_CUDA
+REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<Eigen::half>("T"),
+                        DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
+
 REGISTER_KERNEL_BUILDER(
     Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<float>("T"),
     DepthwiseConv2dNativeOp<GPUDevice, float>);
diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h
index aa5b5c76f6..097a9f5bfa 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.h
+++ b/tensorflow/core/kernels/depthwise_conv_op.h
@@ -158,7 +158,8 @@ struct DepthwiseFilterPadOp {
       }
       // Pad the remainder of output to vector-register boundary.
       for (int64 j = 0; j < pad_size; ++j) {
-        padded_filter[output_base + vectorized_size + scalar_size + j] = 0;
+        padded_filter[output_base + vectorized_size + scalar_size + j] =
+            static_cast<T>(0);
       }
     }
   }
@@ -266,7 +267,7 @@ struct DepthwiseInputCopyOp {
 
           // Pad the remainder of the output to vector register boundary.
           for (int64 d = 0; d < output_pad_size; ++d) {
-            in_buf[d] = 0;
+            in_buf[d] = static_cast<T>(0);
           }
           in_buf += output_pad_size;
 
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
index ecfe51d599..903aac5d68 100644
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
@@ -105,7 +105,7 @@ __global__ void __launch_bounds__(1024, 2)
     const int input_row_end = input_row_start + filter_rows;
     const int input_col_end = input_col_start + filter_cols;
 
-    T sum = 0;
+    T sum = static_cast<T>(0);
 
     const int input_offset_temp = in_rows * OB;
     if (input_row_start >= 0 && input_col_start >= 0 &&
@@ -258,8 +258,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall(
     __syncthreads();
 
     if (depth_in_range) {
-      T sum1 = 0;
-      T sum2 = 0;
+      T sum1 = static_cast<T>(0);
+      T sum2 = static_cast<T>(0);
       int shared_offset = data_idx;
       const T* filter_ptr = filter_read_offset + shared_data;
       UNROLL for (int r = 0; r < filter_rows; ++r) {
@@ -369,7 +369,7 @@ __global__ void __launch_bounds__(1024, 2)
     const int input_row_end = input_row_start + filter_rows;
     const int input_col_end = input_col_start + filter_cols;
 
-    T sum = 0;
+    T sum = static_cast<T>(0);
     if (input_row_start >= 0 && input_col_start >= 0 &&
         input_row_end < in_rows && input_col_end < in_cols) {
       // Loop that doesn't need to check for boundary conditions.
@@ -529,8 +529,8 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall(
     __syncthreads();
 
     if (slice_in_range) {
-      T sum1 = 0;
-      T sum2 = 0;
+      T sum1 = static_cast<T>(0);
+      T sum2 = static_cast<T>(0);
       int shared_offset = data_idx;
       const T* filter_ptr = filter_read_offset + shared_data;
       UNROLL for (int r = 0; r < filter_rows; ++r) {
@@ -710,6 +710,7 @@ void LaunchDepthwiseConvOp<GPUDevice, T>::operator()(OpKernelContext* ctx,
                   "Launch of gpu kernel for DepthwiseConv2dGPULaunch failed"));
 }
 
+template struct LaunchDepthwiseConvOp<GPUDevice, Eigen::half>;
 template struct LaunchDepthwiseConvOp<GPUDevice, float>;
 template struct LaunchDepthwiseConvOp<GPUDevice, double>;
 
@@ -744,7 +745,7 @@ __global__ void __launch_bounds__(640, 2)
     const int in_r = (thread_id / in_depth / in_cols) % in_rows;
     const int b = thread_id / in_depth / in_cols / in_rows;
 
-    T sum = 0;
+    T sum = static_cast<T>(0);
 
     const int out_r_start =
         tf_max<int>(0, (in_r - filter_rows + pad_rows + stride) / stride);
@@ -810,7 +811,7 @@ __global__ void __launch_bounds__(640, 2)
     const int in_d = (thread_id / in_cols / in_rows) % in_depth;
     const int b = thread_id / in_depth / in_cols / in_rows;
 
-    T sum = 0;
+    T sum = static_cast<T>(0);
     const int out_d_start = in_d * depth_multiplier;
     const int out_d_end = out_d_start + depth_multiplier;
 
@@ -919,6 +920,7 @@ void LaunchDepthwiseConvBackpropInputOp<GPUDevice, T>::operator()(
                                "utGPULaunch failed"));
 }
 
+template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, Eigen::half>;
 template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, float>;
 template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, double>;
 
@@ -1631,6 +1633,7 @@ void LaunchDepthwiseConvBackpropFilterOp<GPUDevice, T>::operator()(
                                "terGPULaunch failed"));
 }
 
+template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, Eigen::half>;
 template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, float>;
 template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, double>;
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc
index 0ecb829f34..1688674eb7 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op.cc
@@ -54,25 +54,20 @@ struct FusedBatchNorm<CPUDevice, T, U> {
                   Tensor* batch_var_output, Tensor* saved_mean_output,
                   Tensor* saved_var_output, TensorFormat tensor_format,
                   bool is_training) {
-    // Currently U is ignored, since we only support the case where T and U are
-    // both float32.
-    // TODO(reedwm): Add float16 support, use U, and remove these asserts.
-    static_assert(std::is_same<T, float>::value, "T currently must be float.");
-    static_assert(std::is_same<U, float>::value, "U currently must be float.");
     OP_REQUIRES(context, tensor_format == FORMAT_NHWC,
                 errors::Internal("The CPU implementation of FusedBatchNorm "
                                  "only supports NHWC tensor format for now."));
     typename TTypes<T, 4>::ConstTensor x(x_input.tensor<T, 4>());
-    typename TTypes<T>::ConstVec scale(scale_input.vec<T>());
-    typename TTypes<T>::ConstVec offset(offset_input.vec<T>());
-    typename TTypes<T>::ConstVec estimated_mean(estimated_mean_input.vec<T>());
-    typename TTypes<T>::ConstVec estimated_variance(
-        estimated_variance_input.vec<T>());
+    typename TTypes<U>::ConstVec scale(scale_input.vec<U>());
+    typename TTypes<U>::ConstVec offset(offset_input.vec<U>());
+    typename TTypes<U>::ConstVec estimated_mean(estimated_mean_input.vec<U>());
+    typename TTypes<U>::ConstVec estimated_variance(
+        estimated_variance_input.vec<U>());
     typename TTypes<T, 4>::Tensor y(y_output->tensor<T, 4>());
-    typename TTypes<T>::Vec batch_mean(batch_mean_output->vec<T>());
-    typename TTypes<T>::Vec batch_var(batch_var_output->vec<T>());
-    typename TTypes<T>::Vec saved_mean(saved_mean_output->vec<T>());
-    typename TTypes<T>::Vec saved_var(saved_var_output->vec<T>());
+    typename TTypes<U>::Vec batch_mean(batch_mean_output->vec<U>());
+    typename TTypes<U>::Vec batch_var(batch_var_output->vec<U>());
+    typename TTypes<U>::Vec saved_mean(saved_mean_output->vec<U>());
+    typename TTypes<U>::Vec saved_var(saved_var_output->vec<U>());
 
     const CPUDevice& d = context->eigen_device<CPUDevice>();
 
@@ -93,15 +88,15 @@ struct FusedBatchNorm<CPUDevice, T, U> {
     bcast_spec.set(0, rest_size);
 #endif
 
-    auto x_rest_by_depth = x.reshape(rest_by_depth);
+    auto x_rest_by_depth = x.reshape(rest_by_depth).template cast<U>();
     const int rest_size_minus_one = (rest_size > 1) ? (rest_size - 1) : 1;
-    T rest_size_inv = static_cast<T>(1.0f / static_cast<T>(rest_size));
+    U rest_size_inv = static_cast<U>(1.0f / static_cast<U>(rest_size));
     // This adjustment is for Bessel's correction
-    T rest_size_adjust =
-        static_cast<T>(rest_size) / static_cast<T>(rest_size_minus_one);
+    U rest_size_adjust =
+        static_cast<U>(rest_size) / static_cast<U>(rest_size_minus_one);
 
-    Eigen::Tensor<T, 1, Eigen::RowMajor> mean(depth);
-    Eigen::Tensor<T, 1, Eigen::RowMajor> variance(depth);
+    Eigen::Tensor<U, 1, Eigen::RowMajor> mean(depth);
+    Eigen::Tensor<U, 1, Eigen::RowMajor> variance(depth);
     if (is_training) {
       mean.device(d) = (x_rest_by_depth.sum(reduce_dims) * rest_size_inv);
       batch_mean.device(d) = mean;
@@ -129,7 +124,7 @@ struct FusedBatchNorm<CPUDevice, T, U> {
     auto x_shifted =
         x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec);
 
-    y.reshape(rest_by_depth).device(d) = x_shifted;
+    y.reshape(rest_by_depth).device(d) = x_shifted.template cast<T>();
   }
 };
 
@@ -138,7 +133,7 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
   void operator()(OpKernelContext* context, const Tensor& y_backprop_input,
                   const Tensor& x_input, const Tensor& scale_input,
                   const Tensor& mean_input, const Tensor& variance_input,
-                  T epsilon, Tensor* x_backprop_output,
+                  U epsilon, Tensor* x_backprop_output,
                   Tensor* scale_backprop_output, Tensor* offset_backprop_output,
                   TensorFormat tensor_format) {
     OP_REQUIRES(context, tensor_format == FORMAT_NHWC,
@@ -147,12 +142,12 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
     typename TTypes<T, 4>::ConstTensor y_backprop(
         y_backprop_input.tensor<T, 4>());
     typename TTypes<T, 4>::ConstTensor x(x_input.tensor<T, 4>());
-    typename TTypes<T>::ConstVec scale(scale_input.vec<T>());
-    typename TTypes<T>::ConstVec mean(mean_input.vec<T>());
-    typename TTypes<T>::ConstVec variance(variance_input.vec<T>());
+    typename TTypes<U>::ConstVec scale(scale_input.vec<U>());
+    typename TTypes<U>::ConstVec mean(mean_input.vec<U>());
+    typename TTypes<U>::ConstVec variance(variance_input.vec<U>());
     typename TTypes<T, 4>::Tensor x_backprop(x_backprop_output->tensor<T, 4>());
-    typename TTypes<T>::Vec scale_backprop(scale_backprop_output->vec<T>());
-    typename TTypes<T>::Vec offset_backprop(offset_backprop_output->vec<T>());
+    typename TTypes<U>::Vec scale_backprop(scale_backprop_output->vec<U>());
+    typename TTypes<U>::Vec offset_backprop(offset_backprop_output->vec<U>());
 
     // Note: the following formulas are used to compute the gradients for
     // back propagation.
@@ -181,8 +176,8 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
     bcast_spec.set(0, rest_size);
 #endif
 
-    auto x_rest_by_depth = x.reshape(rest_by_depth);
-    T rest_size_inv = static_cast<T>(1.0f / static_cast<T>(rest_size));
+    auto x_rest_by_depth = x.reshape(rest_by_depth).template cast<U>();
+    U rest_size_inv = static_cast<U>(1.0f / static_cast<U>(rest_size));
 
     auto x_mean_rest_by_depth =
         mean.reshape(one_by_depth).broadcast(bcast_spec);
@@ -192,7 +187,8 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
         coef0.eval().reshape(one_by_depth).broadcast(bcast_spec);
     auto x_scaled = x_centered * coef0_rest_by_depth;
 
-    auto y_backprop_rest_by_depth = y_backprop.eval().reshape(rest_by_depth);
+    auto y_backprop_rest_by_depth =
+        y_backprop.eval().reshape(rest_by_depth).template cast<U>();
     scale_backprop.device(d) =
         (y_backprop_rest_by_depth * x_scaled).sum(reduce_dims);
     auto y_backprop_sum = y_backprop_rest_by_depth.sum(reduce_dims);
@@ -214,7 +210,7 @@ struct FusedBatchNormGrad<CPUDevice, T, U> {
                      .reshape(one_by_depth)
                      .broadcast(bcast_spec);
     x_backprop.reshape(rest_by_depth).device(d) =
-        coef1 * (y_backprop_centered - x_centered * coef2);
+        (coef1 * (y_backprop_centered - x_centered * coef2)).template cast<T>();
   }
 };
 
@@ -689,6 +685,18 @@ REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2")
                             .TypeConstraint<float>("U"),
                         FusedBatchNormGradOp<CPUDevice, float, float>);
 
+REGISTER_KERNEL_BUILDER(Name("FusedBatchNormV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<Eigen::half>("T")
+                            .TypeConstraint<float>("U"),
+                        FusedBatchNormOp<CPUDevice, Eigen::half, float>);
+
+REGISTER_KERNEL_BUILDER(Name("FusedBatchNormGradV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<Eigen::half>("T")
+                            .TypeConstraint<float>("U"),
+                        FusedBatchNormGradOp<CPUDevice, Eigen::half, float>);
+
 #if GOOGLE_CUDA
 
 REGISTER_KERNEL_BUILDER(
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.h b/tensorflow/core/kernels/fused_batch_norm_op.h
index 38b24d7011..3af104bf95 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.h
+++ b/tensorflow/core/kernels/fused_batch_norm_op.h
@@ -92,26 +92,28 @@ struct FusedBatchNormFreezeGrad {
     // offset_backprop  = sum(y_backprop)
     // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + epsilon))
     // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon))
-    offset_backprop.device(d) = y_backprop.reshape(rest_by_depth)
-                                    .template cast<U>()
-                                    .sum(reduction_axis);
+
+    auto y_backprop_rest_by_depth =
+        y_backprop.reshape(rest_by_depth).template cast<U>();
+    auto input_rest_by_depth = input.reshape(rest_by_depth).template cast<U>();
+
+    offset_backprop.device(d) = y_backprop_rest_by_depth.sum(reduction_axis);
 
     // scratch1 = rsqrt(pop_var + epsilon)
     scratch1.device(d) = (pop_var + pop_var.constant(epsilon)).rsqrt();
 
     // scratch2 = sum(y_backprop * (x - mean))
     scratch2.device(d) =
-        (y_backprop.reshape(rest_by_depth).template cast<U>() *
-         (input.reshape(rest_by_depth).template cast<U>() -
+        (y_backprop_rest_by_depth *
+         (input_rest_by_depth -
           pop_mean.reshape(one_by_depth).broadcast(rest_by_one)))
             .sum(reduction_axis);
 
     x_backprop.reshape(rest_by_depth).device(d) =
-        (y_backprop.reshape(rest_by_depth).template cast<U>() *
-         ((scratch1 * scale)
-              .eval()
-              .reshape(one_by_depth)
-              .broadcast(rest_by_one)))
+        (y_backprop_rest_by_depth * ((scratch1 * scale)
+                                         .eval()
+                                         .reshape(one_by_depth)
+                                         .broadcast(rest_by_one)))
             .template cast<T>();
     scale_backprop.device(d) = scratch2 * scratch1;
   }
diff --git a/tensorflow/core/kernels/lmdb_reader_op.cc b/tensorflow/core/kernels/lmdb_reader_op.cc
index 3bb07301b5..31a427f2c9 100755
--- a/tensorflow/core/kernels/lmdb_reader_op.cc
+++ b/tensorflow/core/kernels/lmdb_reader_op.cc
@@ -36,7 +36,7 @@ class LMDBReader : public ReaderBase {
 
   Status OnWorkStartedLocked() override {
     MDB_CHECK(mdb_env_create(&mdb_env_));
-    int flags = MDB_RDONLY | MDB_NOTLS;
+    int flags = MDB_RDONLY | MDB_NOTLS | MDB_NOLOCK;
 
     // Check if the LMDB filename is actually a file instead of a directory.
     // If so, set appropriate flags so we can open it.
@@ -57,10 +57,13 @@ class LMDBReader : public ReaderBase {
     if (mdb_env_ != nullptr) {
       if (mdb_cursor_) {
         mdb_cursor_close(mdb_cursor_);
+        mdb_cursor_ = nullptr;
       }
-      mdb_txn_abort(mdb_txn_);
       mdb_dbi_close(mdb_env_, mdb_dbi_);
+      mdb_txn_abort(mdb_txn_);
       mdb_env_close(mdb_env_);
+      mdb_txn_ = nullptr;
+      mdb_dbi_ = 0;
       mdb_env_ = nullptr;
     }
     return Status::OK();
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index e2cf605811..d8bdb700e6 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -34,6 +34,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/pooling_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/util/env_var.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
@@ -358,6 +359,8 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
 
     use_dnn_ = CanUseCudnn();
+    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
+                                   &propagate_nans_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -405,7 +408,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
       DnnPoolingGradOp<T>::Compute(
           context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize,
           stride, padding_, data_format_, &tensor_in, &tensor_out, out_backprop,
-          output_shape);
+          output_shape, propagate_nans_);
     } else {
       CHECK(data_format_ == FORMAT_NHWC)
           << "Non-Cudnn MaxPoolGrad only supports NHWC format";
@@ -420,6 +423,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
   bool use_dnn_;
+  bool propagate_nans_;
 };
 
 #endif  // GOOGLE_CUDA
@@ -884,6 +888,9 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
     OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
+
+    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
+                                   &propagate_nans_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -902,14 +909,15 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
     Tensor* argmax = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, out_shape, &argmax));
 
-    LaunchMaxPoolingWithArgmax<Device, T>::launch(context, params, tensor_in,
-                                                  output, argmax);
+    LaunchMaxPoolingWithArgmax<Device, T>::launch(
+        context, params, tensor_in, output, argmax, propagate_nans_);
   }
 
  private:
   std::vector<int32> ksize_;
   std::vector<int32> stride_;
   Padding padding_;
+  bool propagate_nans_;
 };
 
 template <typename Device, typename T>
@@ -1045,6 +1053,9 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
     use_dnn_ = CanUseCudnn();
+
+    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
+                                   &propagate_nans_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1068,9 +1079,10 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
 
     // These is_int8x4 checks avoid linker errors for missing qint8 kernels.
     if (!is_int8x4 && use_dnn_ && data_format_ == FORMAT_NCHW) {
-      DnnPoolingOp<T>::Compute(
-          context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_,
-          stride_, padding_, data_format_, tensor_in, out_shape);
+      DnnPoolingOp<T>::Compute(context,
+                               perftools::gputools::dnn::PoolingMode::kMaximum,
+                               ksize_, stride_, padding_, data_format_,
+                               tensor_in, out_shape, propagate_nans_);
     } else {
       Tensor* output = nullptr;
       OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
@@ -1079,7 +1091,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
                                                            tensor_in, output);
       } else if (data_format_ == FORMAT_NHWC) {
         LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in,
-                                                  output);
+                                                  output, propagate_nans_);
       } else {
         LOG(FATAL) << "MaxPool currently only supports the following (layout, "
                       "type) combinations: (NHWC, non-qint8), "
@@ -1098,6 +1110,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
   bool use_dnn_;
+  bool propagate_nans_;
 };
 
 template <typename T>
@@ -1127,6 +1140,8 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
     }
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
     use_dnn_ = CanUseCudnn();
+    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
+                                   &propagate_nans_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1168,16 +1183,17 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
         ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height,
                         params.out_width, params.depth);
     if (use_dnn_ && data_format_ == FORMAT_NCHW) {
-      DnnPoolingOp<T>::Compute(
-          context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize,
-          stride, padding_, data_format_, tensor_in, out_shape);
+      DnnPoolingOp<T>::Compute(context,
+                               perftools::gputools::dnn::PoolingMode::kMaximum,
+                               ksize, stride, padding_, data_format_, tensor_in,
+                               out_shape, propagate_nans_);
     } else {
       CHECK(data_format_ == FORMAT_NHWC)
           << "Non-Cudnn MaxPool only supports NHWC format";
       Tensor* output = nullptr;
       OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
       LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in,
-                                                output);
+                                                output, propagate_nans_);
     }
   }
 
@@ -1187,18 +1203,20 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
   bool use_dnn_;
+  bool propagate_nans_;
 };
 
 template <typename T>
 struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
   static void launch(OpKernelContext* context, const PoolParameters& params,
-                     const Tensor& input, Tensor* output) {
+                     const Tensor& input, Tensor* output, bool propagate_nans) {
     bool status = functor::MaxPoolForwardWithOptionalArgmax<T>()(
         input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows,
         params.tensor_in_cols, params.depth, params.out_height,
         params.out_width, params.window_rows, params.window_cols,
         params.row_stride, params.col_stride, params.pad_rows, params.pad_cols,
-        output->flat<T>().data(), nullptr, context->eigen_gpu_device());
+        output->flat<T>().data(), nullptr, context->eigen_gpu_device(),
+        propagate_nans);
     if (!status) {
       context->SetStatus(
           errors::Internal("Failed launching MaxPoolForwardNoMask"));
@@ -1209,7 +1227,8 @@ struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
 template <typename T>
 struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
   static void launch(OpKernelContext* context, const PoolParameters& params,
-                     const Tensor& input, Tensor* output, Tensor* argmax) {
+                     const Tensor& input, Tensor* output, Tensor* argmax,
+                     bool propagate_nans) {
     bool status = functor::MaxPoolForwardWithOptionalArgmax<T>()(
         input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows,
         params.tensor_in_cols, params.depth, params.out_height,
@@ -1217,7 +1236,7 @@ struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
         params.row_stride, params.col_stride, params.pad_rows, params.pad_cols,
         output->flat<T>().data(),
         reinterpret_cast<int64*>(argmax->flat<int64>().data()),
-        context->eigen_gpu_device());
+        context->eigen_gpu_device(), propagate_nans);
     if (!status) {
       context->SetStatus(
           errors::Internal("Failed launching MaxPoolForwardWithArgmax"));
diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
index 26f5274804..f8daaca4c9 100644
--- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
@@ -29,6 +29,15 @@ limitations under the License.
 
 namespace tensorflow {
 namespace {
+template <bool propagate_nans, typename dtype>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool IsGreaterThan(dtype a, dtype b) {
+  if (propagate_nans) {
+    return !(a <= b);
+  } else {
+    return a > b;
+  }
+}
+
 // This is Yangqing's custom kernel for the maxpooling operation. There are
 // three functions: MaxPoolForwardNCHW and MaxPoolForwardNHWC are the two
 // forward functions, dealing with the forward case. MaxPoolBackward is the
@@ -51,7 +60,7 @@ namespace {
 // const int output_size = batch * channels * pooled_height * pooled_width;
 // MaxPoolForwardNCHW<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
 //                      kThreadsPerBlock, 0, cuda_stream>>>(...);
-template <typename dtype>
+template <bool propagate_nans, typename dtype>
 __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data,
                                    const int channels, const int height,
                                    const int width, const int pooled_height,
@@ -77,7 +86,7 @@ __global__ void MaxPoolForwardNCHW(const int nthreads, const dtype* bottom_data,
     for (int h = hstart; h < hend; ++h) {
       for (int w = wstart; w < wend; ++w) {
         int idx = c * height * width + h * width + w;
-        if (bottom_data_n[idx] > maxval) {
+        if (IsGreaterThan<propagate_nans>(bottom_data_n[idx], maxval)) {
           maxidx = idx;
           maxval = bottom_data_n[idx];
         }
@@ -126,7 +135,7 @@ __global__ void MaxPoolForwardNoMaskKernel_NCHW_VECT_C(
   }
 }
 
-template <typename dtype>
+template <bool propagate_nans, typename dtype>
 __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data,
                                    const int height, const int width,
                                    const int channels, const int pooled_height,
@@ -153,7 +162,7 @@ __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data,
     for (int h = hstart; h < hend; ++h) {
       for (int w = wstart; w < wend; ++w) {
         int idx = (h * width + w) * channels + c;
-        if (bottom_data_n[idx] > maxval) {
+        if (IsGreaterThan<propagate_nans>(bottom_data_n[idx], maxval)) {
           maxidx = idx;
           maxval = bottom_data_n[idx];
         }
@@ -390,15 +399,24 @@ bool MaxPoolForwardWithOptionalArgmax<T>::operator()(
     const int channels, const int pooled_height, const int pooled_width,
     const int kernel_h, const int kernel_w, const int stride_h,
     const int stride_w, const int pad_t, const int pad_l, T* top_data,
-    int64* mask, const Eigen::GpuDevice& d) {
+    int64* mask, const Eigen::GpuDevice& d, bool propagate_nans) {
   const int kThreadsPerBlock = 1024;
   const int output_size = batch * channels * pooled_height * pooled_width;
-
-  MaxPoolForwardNHWC<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
-                       kThreadsPerBlock, 0, d.stream()>>>(
-      output_size, bottom_data, height, width, channels, pooled_height,
-      pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
-      top_data, mask);
+  if (propagate_nans) {
+    MaxPoolForwardNHWC<true>
+        <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
+           kThreadsPerBlock, 0, d.stream()>>>(
+            output_size, bottom_data, height, width, channels, pooled_height,
+            pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+            top_data, mask);
+  } else {
+    MaxPoolForwardNHWC<false>
+        <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
+           kThreadsPerBlock, 0, d.stream()>>>(
+            output_size, bottom_data, height, width, channels, pooled_height,
+            pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+            top_data, mask);
+  }
   return d.ok();
 }
 
diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.h b/tensorflow/core/kernels/maxpooling_op_gpu.h
index 34203797cf..38ebb34248 100644
--- a/tensorflow/core/kernels/maxpooling_op_gpu.h
+++ b/tensorflow/core/kernels/maxpooling_op_gpu.h
@@ -39,7 +39,7 @@ struct MaxPoolForwardWithOptionalArgmax {
                   const int pooled_width, const int kernel_h,
                   const int kernel_w, const int stride_h, const int stride_w,
                   const int pad_t, const int pad_l, T* top_data, int64* mask,
-                  const Eigen::GpuDevice& d);
+                  const Eigen::GpuDevice& d, bool propagate_nans);
 };
 
 struct MaxPoolForwardNoMask_NCHW_VECT_C {
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h
index a240ee44fb..c4d5a45d3c 100644
--- a/tensorflow/core/kernels/mkl_tfconv_op.h
+++ b/tensorflow/core/kernels/mkl_tfconv_op.h
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifdef INTEL_MKL
-
 #ifndef TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
 #define TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
 
+#ifdef INTEL_MKL
+
 #include <algorithm>
 #include <vector>
 #include "tensorflow/core/framework/numeric_op.h"
@@ -35,6 +35,10 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
@@ -57,6 +61,71 @@ class MklToTfOp : public OpKernel {
     VLOG(1) << "MKLToTFConversion complete successfully.";
   }
 
+#ifdef INTEL_MKL_DNN
+  static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context,
+                             string data_format_str, DataType op_data_type,
+                             bool has_avx512f, uint input_number) {
+    try {
+      // Check that input tensor is in MKL format.
+      const Tensor& input_tensor = MklGetInput(context, input_number);
+      MklDnnShape input_shape;
+      GetMklShape(context, input_number, &input_shape);
+
+      // if input is already in Tf format, then copy input tensor to output.
+      if (!input_shape.IsMklTensor()) {
+        context->set_output(input_number, input_tensor);
+        VLOG(1) << "MKLToTFConversion: No conversion needed, "
+                << "copying input to output";
+        return;
+      }
+
+      // Check that input data type is same as operator data type and that it
+      // is same as output data type.
+      DataType input_data_type = op_kernel->input_type(input_number);
+      DataType output_data_type = op_kernel->output_type(input_number);
+      CHECK_EQ(op_data_type, input_data_type);
+      CHECK_EQ(op_data_type, output_data_type);
+
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> input(&cpu_engine);
+
+      // Get Mkl layout of input tensor.
+      auto input_mkl_md = input_shape.GetMklLayout();
+      // Get TensorFlow layout of input tensor. Expected output of conversion
+      // has same layout as Tensorflow layout of input tensor.
+      auto output_tf_md = input_shape.GetTfLayout();
+      auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine);
+      // Set input Mkl layout as the user layout.
+      input.SetUsrMem(input_mkl_md, &input_tensor);
+
+      // Allocate output tensor.
+      TensorShape output_shape = input_shape.GetTfShape();
+      Tensor* output_tensor = NULL;
+      OP_REQUIRES_OK(context, context->allocate_output(
+                                  input_number, output_shape, &output_tensor));
+      CHECK_NOTNULL(output_tensor);
+
+      // Do we need to reorder Mkl layout into TensorFlow layout?
+      if (input.IsReorderNeeded(output_tf_pd)) {
+        // Insert reorder between Mkl layout and TensorFlow layout.
+        std::vector<primitive> net;
+        CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, output_tensor, &net),
+                 true);
+        stream(stream::kind::eager).submit(net).wait();
+      } else {
+        // If not, just forward input tensor to output tensor.
+        CHECK(output_tensor->CopyFrom(input_tensor, output_shape));
+      }
+    } catch (mkldnn::error& e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + std::string(e.message) + ", in file " +
+                         std::string(__FILE__) + ":" + std::to_string(__LINE__);
+      OP_REQUIRES_OK(
+          context,
+          errors::Aborted("Operation received an exception:", error_msg));
+    }
+  }
+#else
   static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context,
                              string data_format_str, DataType op_data_type,
                              bool has_avx512f, uint input_number) {
@@ -91,8 +160,8 @@ class MklToTfOp : public OpKernel {
 
     // Allocate output tensor.
     Tensor* output_tensor = NULL;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(input_number, output_shape, &output_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(input_number, output_shape,
+                                                     &output_tensor));
 
     dnnLayout_t output_layout =
         static_cast<dnnLayout_t>(input_shape.GetTfLayout());
@@ -106,6 +175,7 @@ class MklToTfOp : public OpKernel {
                                      output_buffer);
     VLOG(1) << "MKLToTFConversion complete successfully.";
   }
+#endif
 
  private:
   /// Data format of the operation
@@ -132,5 +202,5 @@ class MklToTfOp : public OpKernel {
 TF_CALL_NUMBER_TYPES(REGISTER_CPU);
 #undef REGISTER_CPU
 }  // namespace tensorflow
-#endif  // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
 #endif  // INTEL_MKL
+#endif  // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h
index d3d1b56c9d..93ef512778 100644
--- a/tensorflow/core/kernels/ops_util.h
+++ b/tensorflow/core/kernels/ops_util.h
@@ -98,6 +98,19 @@ gtl::InlinedVector<T, 8> ComputeStride(const TensorShape& shape) {
   return strides;
 }
 
+// Helper to compute 'strides' given an Eigen TensorDimensions
+template <typename T, typename EigenDimensions>
+gtl::InlinedVector<T, 8> ComputeEigenStrides(const EigenDimensions& shape) {
+  const int ndims = shape.rank();
+  gtl::InlinedVector<T, 8> strides(ndims);
+  T stride = 1;
+  for (int i = ndims - 1; i >= 0; --i) {
+    strides[i] = stride;
+    stride *= static_cast<T>(shape[i]);
+  }
+  return strides;
+}
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_KERNELS_OPS_UTIL_H_
diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc
index 7dee751c4f..ac90f67ce0 100644
--- a/tensorflow/core/kernels/pooling_ops_common.cc
+++ b/tensorflow/core/kernels/pooling_ops_common.cc
@@ -143,7 +143,7 @@ void DnnPoolingOp<T>::Compute(
     perftools::gputools::dnn::PoolingMode pooling_mode,
     const std::vector<int32>& size, const std::vector<int32>& stride,
     Padding padding, TensorFormat data_format, const Tensor& tensor_in,
-    const TensorShape& tensor_out_shape) {
+    const TensorShape& tensor_out_shape, bool propagate_nans) {
   Tensor* tensor_out = nullptr;
   OP_REQUIRES_OK(context,
                  context->allocate_output(0, tensor_out_shape, &tensor_out));
@@ -188,7 +188,8 @@ void DnnPoolingOp<T>::Compute(
       .set_vertical_stride(params.row_stride)
       .set_horizontal_stride(params.col_stride)
       .set_vertical_padding(params.pad_rows)
-      .set_horizontal_padding(params.pad_cols);
+      .set_horizontal_padding(params.pad_cols)
+      .set_propagate_nans(propagate_nans);
 
   perftools::gputools::dnn::BatchDescriptor input_desc;
   input_desc.set_count(params.tensor_in_batch)
@@ -237,7 +238,7 @@ void DnnPoolingGradOp<T>::Compute(
     const std::vector<int32>& size, const std::vector<int32>& stride,
     Padding padding, TensorFormat data_format, const Tensor* tensor_in,
     const Tensor* tensor_out, const Tensor& out_backprop,
-    const TensorShape& tensor_in_shape) {
+    const TensorShape& tensor_in_shape, bool propagate_nans) {
   CHECK((pooling_mode != perftools::gputools::dnn::PoolingMode::kMaximum) ||
         (tensor_in && tensor_out))
       << "For MaxPoolGrad, both tensor_in and tensor_out needs to be "
@@ -327,7 +328,8 @@ void DnnPoolingGradOp<T>::Compute(
       .set_vertical_stride(params.row_stride)
       .set_horizontal_stride(params.col_stride)
       .set_vertical_padding(params.pad_rows)
-      .set_horizontal_padding(params.pad_cols);
+      .set_horizontal_padding(params.pad_cols)
+      .set_propagate_nans(propagate_nans);
 
   perftools::gputools::dnn::BatchDescriptor orig_output_desc;
   orig_output_desc.set_count(params.tensor_in_batch)
diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h
index b594f39fad..1458456585 100644
--- a/tensorflow/core/kernels/pooling_ops_common_gpu.h
+++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h
@@ -44,7 +44,7 @@ class DnnPoolingOp {
                       const std::vector<int32>& size,
                       const std::vector<int32>& stride, Padding padding,
                       TensorFormat data_format, const Tensor& tensor_in,
-                      const TensorShape& tensor_out_shape);
+                      const TensorShape& tensor_out_shape, bool propagate_nans);
 };
 
 // A helper class that launch the cudnn pooling backward operations.
@@ -60,7 +60,7 @@ class DnnPoolingGradOp {
                       const std::vector<int32>& stride, Padding padding,
                       TensorFormat data_format, const Tensor* tensor_in,
                       const Tensor* tensor_out, const Tensor& out_backprop,
-                      const TensorShape& tensor_in_shape);
+                      const TensorShape& tensor_in_shape, bool propagate_nans);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc
index 8be0c56798..337c8e5c17 100644
--- a/tensorflow/core/kernels/quantized_add_op.cc
+++ b/tensorflow/core/kernels/quantized_add_op.cc
@@ -489,7 +489,7 @@ class QuantizedAddOp : public OpKernel {
     // adding zero leaves the result unchanged, and to contain the largest of
     // the two input values with some room to spare.
     const float smallest_min = std::min(min_x, min_y);
-    const float largest_max = std::min(max_x, max_y);
+    const float largest_max = std::max(max_x, max_y);
     const float biggest_range =
         std::max(std::abs(smallest_min), std::abs(largest_max));
     const float output_range = (biggest_range * (1 << 14));
diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index a37c757865..55a8b9c9b6 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -577,7 +577,7 @@ struct FillPhiloxRandomKernel<Distribution, false> {
     const size_t kGroupSize = Distribution::kResultElementCount;
 
     const size_t item_id = item.get_global(0);
-    const size_t total_item_count = item.get_global_range(0);
+    const size_t total_item_count = item.get_global_range();
     size_t offset = item_id * kGroupSize;
     gen_.Skip(item_id);
 
@@ -633,7 +633,7 @@ struct FillPhiloxRandomKernel<Distribution, true> {
                                                 PhiloxRandom::kResultElementCount;
 
     const size_t item_id = item.get_global(0);
-    const size_t total_item_count = item.get_global_range(0);
+    const size_t total_item_count = item.get_global_range();
     size_t group_index = item_id;
     size_t offset = group_index * kGroupSize;
 
diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc
index 4302a68a18..2334e50f1d 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.cc
+++ b/tensorflow/core/kernels/segment_reduction_ops.cc
@@ -376,6 +376,9 @@ struct UnsortedSegmentSumFunctor<CPUDevice, T, Index>
     auto data_flat = typename TTypes<T, 2>::ConstTensor(data, N, data_size / N);
     for (int64 i = 0; i < N; ++i) {
       Index j = internal::SubtleMustCopy(segment_ids(i));
+      if (j < 0) {
+        continue;
+      }
       OP_REQUIRES(ctx, FastBoundsCheck(j, output_rows),
                   errors::InvalidArgument(
                       "segment_ids", SliceDebugString(segment_ids_shape, i),
diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h
index 412c1d601d..b10bea72ba 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.h
+++ b/tensorflow/core/kernels/segment_reduction_ops.h
@@ -30,14 +30,14 @@ namespace functor {
 #ifdef GOOGLE_CUDA
 typedef Eigen::GpuDevice GPUDevice;
 // Functor for SegmentSumGPUOp.
-// 'output_rows': the number of output segments (unique segment ids in
+// output_rows: the number of output segments (unique segment ids in
 //                'segment_ids').
-// 'segment_ids_shape': shape of 'segment_ids' tensor.
-// 'segment_ids': unsorted map from input to output segment ids at which to
+// segment_ids_shape: shape of 'segment_ids' tensor.
+// segment_ids: unsorted map from input to output segment ids at which to
 //                perform segment sum operation.
-// 'data_size': size of input data tensor.
-// 'data': input data tensor.
-// 'output': output reshaped to {output_rows, output.size/output_rows}
+// data_size: size of input data tensor.
+// data: input data tensor.
+// output: output reshaped to {output_rows, output.size/output_rows}
 template <typename T, typename Index>
 struct SegmentSumFunctor {
   void operator()(OpKernelContext* ctx, const GPUDevice& d,
@@ -61,14 +61,14 @@ struct UnsortedSegmentBaseFunctor{
 };
 
 // Functor for UnsortedSegmentSumOp.
-// 'output_rows': the number of output segments (unique segment ids in
+// output_rows: the number of output segments (unique segment ids in
 //                'segment_ids').
-// 'segment_ids_shape': shape of 'segment_ids' tensor.
-// 'segment_ids': unsorted map from input to output segment ids at which to
+// segment_ids_shape: shape of 'segment_ids' tensor.
+// segment_ids: unsorted map from input to output segment ids at which to
 //                perform segment sum operation.
-// 'data_size': size of input data tensor.
-// 'data': input data tensor.
-// 'output': output reshaped to {output_rows, output.size/output_rows}
+// data_size: size of input data tensor.
+// data: input data tensor.
+// output: output reshaped to {output_rows, output.size/output_rows}
 template <typename Device, typename T, typename Index>
 struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor<Device, T, Index> {
   void operator()(OpKernelContext* ctx, const Device& d,
@@ -79,14 +79,14 @@ struct UnsortedSegmentSumFunctor: public UnsortedSegmentBaseFunctor<Device, T, I
 };
 
 // Functor for UnsortedSegmentMaxOp.
-// 'output_rows': the number of output segments (unique segment ids in
+// output_rows: the number of output segments (unique segment ids in
 //                'segment_ids').
-// 'segment_ids_shape': shape of 'segment_ids' tensor.
-// 'segment_ids': unsorted map from input to output segment ids at which to
+// segment_ids_shape: shape of 'segment_ids' tensor.
+// segment_ids: unsorted map from input to output segment ids at which to
 //                perform segment sum operation.
-// 'data_size': size of input data tensor.
-// 'data': input data tensor.
-// 'output': output reshaped to {output_rows, output.size/output_rows}
+// data_size: size of input data tensor.
+// data: input data tensor.
+// output: output reshaped to {output_rows, output.size/output_rows}
 template <typename Device, typename T, typename Index>
 struct UnsortedSegmentMaxFunctor: public UnsortedSegmentBaseFunctor<Device, T, Index> {
   void operator()(OpKernelContext* ctx, const Device& d,
diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc
index 721f9b949b..28a39bae3f 100644
--- a/tensorflow/core/kernels/shape_ops.cc
+++ b/tensorflow/core/kernels/shape_ops.cc
@@ -341,7 +341,12 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                             .Device(DEVICE_CPU)
                             .HostMemory("dim")
                             .TypeConstraint<int32>("Tdim"),
-                        ExpandDimsOp);
+                        ExpandDimsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("ExpandDims")
+                            .Device(DEVICE_CPU)
+                            .HostMemory("dim")
+                            .TypeConstraint<int64>("Tdim"),
+                        ExpandDimsOp<int64>);
 
 #if GOOGLE_CUDA
 #define REGISTER_GPU_KERNEL(type)                            \
@@ -350,7 +355,13 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                               .TypeConstraint<type>("T")     \
                               .TypeConstraint<int32>("Tdim") \
                               .HostMemory("dim"),            \
-                          ExpandDimsOp);
+                          ExpandDimsOp<int32>);              \
+  REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
+                              .Device(DEVICE_GPU)            \
+                              .TypeConstraint<type>("T")     \
+                              .TypeConstraint<int64>("Tdim") \
+                              .HostMemory("dim"),            \
+                          ExpandDimsOp<int64>);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
 TF_CALL_bool(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
@@ -362,7 +373,15 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                             .HostMemory("input")
                             .HostMemory("dim")
                             .HostMemory("output"),
-                        ExpandDimsOp);
+                        ExpandDimsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("ExpandDims")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<int32>("T")
+                            .TypeConstraint<int64>("Tdim")
+                            .HostMemory("input")
+                            .HostMemory("dim")
+                            .HostMemory("output"),
+                        ExpandDimsOp<int64>);
 #endif  // GOOGLE_CUDA
 
 #ifdef TENSORFLOW_USE_SYCL
@@ -372,7 +391,13 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                               .TypeConstraint<type>("T")     \
                               .TypeConstraint<int32>("Tdim") \
                               .HostMemory("dim"),            \
-                          ExpandDimsOp);
+                          ExpandDimsOp<int32>);              \
+  REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
+                              .Device(DEVICE_SYCL)           \
+                              .TypeConstraint<type>("T")     \
+                              .TypeConstraint<int64>("Tdim") \
+                              .HostMemory("dim"),            \
+                          ExpandDimsOp<int64>);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
 TF_CALL_bool(REGISTER_SYCL_KERNEL);
 #undef REGISTER_SYCL_KERNEL
@@ -384,7 +409,15 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims")
                             .HostMemory("input")
                             .HostMemory("dim")
                             .HostMemory("output"),
-                        ExpandDimsOp);
+                        ExpandDimsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("ExpandDims")
+                            .Device(DEVICE_SYCL)
+                            .TypeConstraint<int32>("T")
+                            .TypeConstraint<int64>("Tdim")
+                            .HostMemory("input")
+                            .HostMemory("dim")
+                            .HostMemory("output"),
+                        ExpandDimsOp<int64>);
 #endif  // TENSORFLOW_USE_SYCL
 
 // Squeeze ---------------------------------------
diff --git a/tensorflow/core/kernels/shape_ops.h b/tensorflow/core/kernels/shape_ops.h
index ac607f4e8b..55be308901 100644
--- a/tensorflow/core/kernels/shape_ops.h
+++ b/tensorflow/core/kernels/shape_ops.h
@@ -145,6 +145,7 @@ class SizeOp : public OpKernel {
   bool IsExpensive() override { return false; }
 };
 
+template <typename Tdim>
 class ExpandDimsOp : public OpKernel {
  public:
   explicit ExpandDimsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -153,7 +154,7 @@ class ExpandDimsOp : public OpKernel {
     OP_REQUIRES(ctx, ctx->input(0).dtype() != DT_VARIANT,
                 errors::InvalidArgument("ExpandDims on Variant not supported"));
 
-    int32 dim = ctx->input(1).flat<int32>()(0);
+    Tdim dim = ctx->input(1).flat<Tdim>()(0);
     OP_REQUIRES(
         ctx, (dim >= -1 - ctx->input(0).dims() && dim <= ctx->input(0).dims()),
         errors::InvalidArgument("Tried to expand dim index ", dim,
@@ -175,7 +176,7 @@ class ExpandDimsOp : public OpKernel {
     }
 
     // Clamp to the end if needed.
-    dim = std::min<int32>(dim, existing_dims_size);
+    dim = std::min<Tdim>(dim, existing_dims_size);
     new_shape.emplace(new_shape.begin() + dim, 1);
     const TensorShape output_shape(new_shape);
 
diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 73b6d4cf6a..8fc40db3cc 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -427,7 +427,6 @@ REGISTER_STRIDED_SLICE(bfloat16);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
-TF_CALL_int64(REGISTER_GPU);
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc
index 20f0edf309..96c051c636 100644
--- a/tensorflow/core/kernels/transpose_op.cc
+++ b/tensorflow/core/kernels/transpose_op.cc
@@ -31,13 +31,14 @@ limitations under the License.
 
 namespace tensorflow {
 
-// inv = InvertPermutationOp(T<int32> p) takes a permutation of
+// inv = InvertPermutationOp(T<int32/int64> p) takes a permutation of
 // integers 0, 1, ..., n - 1 and returns the inverted
 // permutation of p. I.e., inv[p[i]] == i, for i in [0 .. n).
 //
-// REQUIRES: input is a vector of int32.
+// REQUIRES: input is a vector of int32 or int64.
 // REQUIRES: input is a permutation of 0, 1, ..., n-1.
 
+template <typename T>
 class InvertPermutationOp : public OpKernel {
  public:
   explicit InvertPermutationOp(OpKernelConstruction* context)
@@ -48,20 +49,19 @@ class InvertPermutationOp : public OpKernel {
     OP_REQUIRES(
         context, TensorShapeUtils::IsVector(input.shape()),
         errors::InvalidArgument("invert_permutation expects a 1D vector."));
-    auto Tin = input.vec<int32>();
+    auto Tin = input.vec<T>();
     OP_REQUIRES(context,
                 FastBoundsCheck(Tin.size(), std::numeric_limits<int32>::max()),
                 errors::InvalidArgument("permutation of nonnegative int32s "
                                         "must have <= int32 max elements"));
-    const int32 N =
-        static_cast<int32>(Tin.size());  // Safe: bounds-checked above.
+    const T N = static_cast<T>(Tin.size());  // Safe: bounds-checked above.
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
-    auto Tout = output->vec<int32>();
+    auto Tout = output->vec<T>();
     std::fill_n(Tout.data(), N, -1);
     for (int i = 0; i < N; ++i) {
-      const int32 d = internal::SubtleMustCopy(Tin(i));
+      const T d = internal::SubtleMustCopy(Tin(i));
       OP_REQUIRES(context, FastBoundsCheck(d, N),
                   errors::InvalidArgument(d, " is not between 0 and ", N));
       OP_REQUIRES(context, Tout(d) == -1,
@@ -73,14 +73,23 @@ class InvertPermutationOp : public OpKernel {
 
 REGISTER_KERNEL_BUILDER(
     Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint<int32>("T"),
-    InvertPermutationOp);
+    InvertPermutationOp<int32>);
+REGISTER_KERNEL_BUILDER(
+    Name("InvertPermutation").Device(DEVICE_CPU).TypeConstraint<int64>("T"),
+    InvertPermutationOp<int64>);
 
 REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
                             .Device(DEVICE_GPU)
                             .TypeConstraint<int32>("T")
                             .HostMemory("x")
                             .HostMemory("y"),
-                        InvertPermutationOp);
+                        InvertPermutationOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<int64>("T")
+                            .HostMemory("x")
+                            .HostMemory("y"),
+                        InvertPermutationOp<int64>);
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
@@ -88,7 +97,13 @@ REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
                             .TypeConstraint<int32>("T")
                             .HostMemory("x")
                             .HostMemory("y"),
-                        InvertPermutationOp);
+                        InvertPermutationOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
+                            .Device(DEVICE_SYCL)
+                            .TypeConstraint<int64>("T")
+                            .HostMemory("x")
+                            .HostMemory("y"),
+                        InvertPermutationOp<int64>);
 #endif  // TENSORFLOW_USE_SYCL
 
 namespace {
diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc
index 701c5f6d2b..d087784c8a 100644
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <functional>
 #include <unordered_map>
 #include <utility>
 
@@ -21,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/hash/hash.h"
 
 namespace tensorflow {
 
@@ -33,8 +35,6 @@ class UniqueOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
-                errors::InvalidArgument("unique expects a 1D vector."));
     // TODO(dga):  Make unique polymorphic for returning int32 and int64
     // vectors to support large tensors.
     OP_REQUIRES(context,
@@ -42,31 +42,102 @@ class UniqueOp : public OpKernel {
                 errors::InvalidArgument(
                     "unique does not support input tensors larger than ",
                     std::numeric_limits<int32>::max(), " elements"));
-    auto Tin = input.vec<T>();
-    const int64 N = static_cast<int64>(Tin.size());
+
+    int64 axis = 0;
+    std::vector<int64> new_sizes{1, input.NumElements(), 1};
+    if (context->num_inputs() == 1) {
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
+                  errors::InvalidArgument("unique expects a 1D vector."));
+    } else {
+      // In case of UniqueV2, the axis is a 1D vector. The purpose is
+      // to allow specifying either "no axis" or "axis". The `[]` means
+      // "no axis", while `[x]` means `axis = x`.
+      const Tensor& axis_tensor = context->input(1);
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()),
+                  errors::InvalidArgument("axis expects a 1D vector."));
+      OP_REQUIRES(
+          context, axis_tensor.NumElements() <= 1,
+          errors::InvalidArgument(
+              "axis does not support input tensors larger than 1 elements"));
+      if (axis_tensor.NumElements() == 0) {
+        OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
+                    errors::InvalidArgument("unique expects a 1D vector."));
+      } else {
+        auto axis_vec = axis_tensor.vec<int64>();
+        axis = axis_vec(0);
+        axis = axis < 0 ? axis + input.dims() : axis;
+        OP_REQUIRES(context, 0 <= axis && axis < input.dims(),
+                    errors::InvalidArgument("axis has to be between [0, ",
+                                            input.dims(), ")"));
+        if (axis > 0) {
+          for (int64 i = 0; i < axis; i++) {
+            new_sizes[0] *= input.dim_size(i);
+          }
+        }
+        new_sizes[1] = input.dim_size(axis);
+        if (axis + 1 < input.dims()) {
+          for (int64 i = axis + 1; i < input.dims(); i++) {
+            new_sizes[2] *= input.dim_size(i);
+          }
+        }
+      }
+    }
+
+    auto Tin = input.shaped<T, 3>(new_sizes);
 
     Tensor* idx = nullptr;
-    OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
-                                {0}, 1, input.shape(), &idx));
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                1, TensorShape({Tin.dimension(1)}), &idx));
     auto idx_vec = idx->template vec<TIndex>();
 
-    std::unordered_map<T, TIndex> uniq;
-    uniq.reserve(2 * N);
-    for (int64 i = 0, j = 0; i < N; ++i) {
-      auto it = uniq.insert(std::make_pair(Tin(i), j));
+    auto hash_fn = [&Tin](const int64& key) -> unsigned long {
+      size_t h = 0;
+      for (int64 i = 0; i < Tin.dimension(0); i++) {
+        for (int64 j = 0; j < Tin.dimension(2); j++) {
+          h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
+        }
+      }
+      return h;
+    };
+
+    auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) {
+      for (int64 i = 0; i < Tin.dimension(0); i++) {
+        for (int64 j = 0; j < Tin.dimension(2); j++) {
+          if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
+            return false;
+          }
+        }
+      }
+      return true;
+    };
+
+    std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
+        uniq(0, hash_fn, equal_to_fn);
+
+    uniq.reserve(2 * Tin.dimension(1));
+
+    for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
+      auto it = uniq.insert(std::make_pair(i, j));
       idx_vec(i) = it.first->second;
       if (it.second) {
         ++j;
       }
     }
+
     int64 uniq_size = static_cast<int64>(uniq.size());
+    new_sizes[1] = uniq_size;
+    TensorShape output_shape(input.shape());
+    output_shape.set_dim(axis, uniq_size);
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(
-                                0, TensorShape({uniq_size}), &output));
-    auto output_vec = output->template vec<T>();
+    OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
+    auto Tout = output->shaped<T, 3>(new_sizes);
 
     for (auto it : uniq) {
-      output_vec(it.second) = it.first;
+      for (int64 i = 0; i < Tin.dimension(0); i++) {
+        for (int64 j = 0; j < Tin.dimension(2); j++) {
+          Tout(i, it.second, j) = Tin(i, it.first, j);
+        }
+      }
     }
 
     if (num_outputs() > 2) {
@@ -74,7 +145,7 @@ class UniqueOp : public OpKernel {
                                   2, TensorShape({uniq_size}), &output));
       auto count_output_vec = output->template vec<TIndex>();
       count_output_vec.setZero();
-      for (int64 i = 0; i < N; ++i) {
+      for (int64 i = 0; i < Tin.dimension(1); ++i) {
         count_output_vec(idx_vec(i))++;
       }
     }
@@ -92,6 +163,16 @@ class UniqueOp : public OpKernel {
                               .TypeConstraint<type>("T")         \
                               .TypeConstraint<int64>("out_idx"), \
                           UniqueOp<type, int64>);                \
+  REGISTER_KERNEL_BUILDER(Name("UniqueV2")                       \
+                              .Device(DEVICE_CPU)                \
+                              .TypeConstraint<type>("T")         \
+                              .TypeConstraint<int32>("out_idx"), \
+                          UniqueOp<type, int32>);                \
+  REGISTER_KERNEL_BUILDER(Name("UniqueV2")                       \
+                              .Device(DEVICE_CPU)                \
+                              .TypeConstraint<type>("T")         \
+                              .TypeConstraint<int64>("out_idx"), \
+                          UniqueOp<type, int64>);                \
   REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts")               \
                               .Device(DEVICE_CPU)                \
                               .TypeConstraint<type>("T")         \
@@ -176,5 +257,5 @@ REGISTER_KERNEL_BUILDER(Name("Unique")
                             .HostMemory("y")
                             .HostMemory("idx"),
                         UniqueOp<int64, int64>);
-#endif // TENSORFLOW_USE_SYCL
+#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index be2916f154..9fa6423d59 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -723,7 +723,9 @@ y: a tensor of the same shape and type as x but filled with zeros.
 REGISTER_OP("OnesLike")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {float, double, int32, int64, complex64, complex128}")
+    .Attr(
+        "T: {float, double, int8, uint8, int16, uint16, int32, int64, "
+        "complex64, complex128, bool}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns a tensor of ones with the same shape and type as x.
@@ -2031,6 +2033,46 @@ y: 1-D.
 idx: 1-D.
 )doc");
 
+REGISTER_OP("UniqueV2")
+    .Input("x: T")
+    .Input("axis: int64")
+    .Output("y: T")
+    .Output("idx: out_idx")
+    .Attr("T: type")
+    .Attr("out_idx: {int32, int64} = DT_INT32")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
+      c->set_output(1, c->input(0));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Finds unique elements in a 1-D tensor.
+
+This operation returns a tensor `y` containing all of the unique elements of `x`
+sorted in the same order that they occur in `x`. This operation also returns a
+tensor `idx` the same size as `x` that contains the index of each value of `x`
+in the unique output `y`. In other words:
+
+`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+
+For example:
+
+```
+# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+y, idx = unique(x)
+y ==> [1, 2, 4, 7, 8]
+idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+```
+
+
+x: A `Tensor`.
+axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
+  find the unique elements.
+y: A `Tensor`. Unique elements along the `axis` of `Tensor` x.
+idx: A 1-D Tensor. Has the same type as x that contains the index of each
+  value of x in the output y.
+)doc");
+
 // --------------------------------------------------------------------------
 REGISTER_OP("UniqueWithCounts")
     .Input("x: T")
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 7b10af9f44..d30b847696 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1829,6 +1829,8 @@ need not be sorted and need not cover all values in the full
 range of valid values.
 
 If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+If the given segment ID `i` is negative, the value is dropped and will not be
+added to the sum of the segment.
 
 `num_segments` should equal the number of distinct segment IDs.
 
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index e245c8ba91..a242a13878 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -819,7 +819,7 @@ REGISTER_OP("DepthwiseConv2dNative")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
@@ -945,7 +945,7 @@ REGISTER_OP("Conv3D")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
@@ -977,7 +977,7 @@ REGISTER_OP("Conv3DBackpropInput")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Deprecated(10, "Use Conv3DBackpropInputV2")
@@ -1003,7 +1003,7 @@ REGISTER_OP("Conv3DBackpropFilter")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Deprecated(10, "Use Conv3DBackpropFilterV2")
@@ -1032,7 +1032,7 @@ REGISTER_OP("Conv3DBackpropInputV2")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
@@ -1069,7 +1069,7 @@ REGISTER_OP("Conv3DBackpropFilterV2")
     .Input("filter_sizes: int32")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {half, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 6ce0b70c9d..9c41957ae6 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -5449,6 +5449,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5515,6 +5516,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5570,6 +5572,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5635,6 +5638,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5690,6 +5694,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index f746b15fee..f2fadb4558 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -12,6 +12,7 @@ load("//tensorflow:tensorflow.bzl", "tf_copts")
 load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
 load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static")
 load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path")
+load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp")
 
 cc_library(
     name = "gtest",
@@ -194,17 +195,16 @@ cc_library(
 
 cc_library(
     name = "sycl",
-    data = [
+    data = if_ccpp([
         "@local_config_sycl//sycl:{}".format(sycl_library_path("ComputeCpp")),
-    ],
-    linkopts = select({
-        "//conditions:default": [
-            "-Wl,-rpath,../local_config_sycl/sycl/lib",
-        ],
-    }),
-    deps = [
-        "@local_config_sycl//sycl:syclrt",
-    ],
+    ]),
+    linkopts = if_ccpp([
+        "-Wl,-rpath,../local_config_sycl/sycl/lib",
+    ]),
+    deps = if_ccpp(
+        ["@local_config_sycl//sycl:syclrt"],
+        ["@local_config_sycl//sycl:sycl_headers"],
+    ),
 )
 
 filegroup(
diff --git a/tensorflow/core/platform/default/notification.h b/tensorflow/core/platform/default/notification.h
index 6a214dbd0a..5c401b7477 100644
--- a/tensorflow/core/platform/default/notification.h
+++ b/tensorflow/core/platform/default/notification.h
@@ -73,7 +73,7 @@ class Notification {
   }
 
   mutex mu_;                    // protects mutations of notified_
-  condition_variable cv_;       // signalled when notified_ becomes non-zero
+  condition_variable cv_;       // signaled when notified_ becomes non-zero
   std::atomic<bool> notified_;  // mutations under mu_
 };
 
diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc
index e9baad5422..cda6d7d8f9 100644
--- a/tensorflow/core/platform/posix/error.cc
+++ b/tensorflow/core/platform/posix/error.cc
@@ -72,7 +72,7 @@ error::Code ErrnoToCode(int err_number) {
     case EBUSY:       // Device or resource busy
     case ECHILD:      // No child processes
     case EISCONN:     // Socket is connected
-#if !defined(_WIN32)
+#if !defined(_WIN32) && !defined(__HAIKU__)
     case ENOTBLK:     // Block device required
 #endif
     case ENOTCONN:    // The socket is not connected
@@ -94,7 +94,7 @@ error::Code ErrnoToCode(int err_number) {
     case ENODATA:  // No message is available on the STREAM read queue
     case ENOMEM:   // Not enough space
     case ENOSR:    // No STREAM resources
-#if !defined(_WIN32)
+#if !defined(_WIN32) && !defined(__HAIKU__)
     case EUSERS:   // Too many users
 #endif
       code = error::RESOURCE_EXHAUSTED;
@@ -111,7 +111,7 @@ error::Code ErrnoToCode(int err_number) {
     case EPFNOSUPPORT:     // Protocol family not supported
 #endif
     case EPROTONOSUPPORT:  // Protocol not supported
-#if !defined(_WIN32)
+#if !defined(_WIN32) && !defined(__HAIKU__)
     case ESOCKTNOSUPPORT:  // Socket type not supported
 #endif
     case EXDEV:            // Improper link
@@ -131,7 +131,8 @@ error::Code ErrnoToCode(int err_number) {
     case ENETUNREACH:   // Network unreachable
     case ENOLCK:        // No locks available
     case ENOLINK:       // Link has been severed
-#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32))
+#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) || \
+      defined(__HAIKU__))
     case ENONET:  // Machine is not on the network
 #endif
       code = error::UNAVAILABLE;
@@ -156,7 +157,7 @@ error::Code ErrnoToCode(int err_number) {
     case ENOEXEC:      // Exec format error
     case ENOMSG:       // No message of the desired type
     case EPROTO:       // Protocol error
-#if !defined(_WIN32)
+#if !defined(_WIN32) && !defined(__HAIKU__)
     case EREMOTE:      // Object is remote
 #endif
       code = error::UNKNOWN;
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index 6cba40ccfc..614ee00b01 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -37,7 +37,8 @@ limitations under the License.
 #ifdef TF_USE_SNAPPY
 #include "snappy.h"
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
+    defined(__HAIKU__)
 #include <thread>
 #endif
 
@@ -61,7 +62,8 @@ int NumSchedulableCPUs() {
   }
   perror("sched_getaffinity");
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
+    defined(__HAIKU__)
   unsigned int count = std::thread::hardware_concurrency();
   if (count > 0) return static_cast<int>(count);
 #endif
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 1bf9c93101..ec077c4283 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -24,7 +24,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc1"
+#define TF_VERSION_SUFFIX ""
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index 8fa0dfbed9..cf11f419a4 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -752,6 +752,12 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleDown(unsigned mask, T value,
   return __shfl_down_sync(mask, value, delta, width);
 }
 
+__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDown(
+    unsigned mask, Eigen::half value, int delta, int width = warpSize) {
+  return Eigen::half(
+      __shfl_down_sync(mask, static_cast<uint16>(value), delta, width));
+}
+
 // Variant of the (undocumented) version from the CUDA SDK, but using unsigned
 // instead of float for lo and hi (which is incorrect with ftz, for example).
 // A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
@@ -774,6 +780,12 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleXor(unsigned mask, T value,
   return __shfl_xor_sync(mask, value, laneMask, width);
 }
 
+__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXor(
+    unsigned mask, Eigen::half value, int laneMask, int width = warpSize) {
+  return Eigen::half(
+      __shfl_xor_sync(mask, static_cast<uint16>(value), laneMask, width));
+}
+
 // Variant of the (undocumented) version from the CUDA SDK, but using unsigned
 // instead of float for lo and hi (which is incorrect with ftz, for example).
 // A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 1bfa4f83a3..148c7851bd 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -24,10 +24,9 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "mkl_service.h"
 #include "mkl_trans.h"
+#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-
-#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
@@ -38,6 +37,12 @@ limitations under the License.
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
+
+using mkldnn::engine;
+using mkldnn::memory;
+using mkldnn::padding_kind;
+using mkldnn::primitive;
+using mkldnn::reorder;
 #endif
 
 // The file contains a number of utility classes and functions used by MKL
@@ -51,6 +56,14 @@ namespace tensorflow {
 // Tensorflow tensor.
 
 typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims;
+typedef enum {
+  Dim_N = 0,
+  Dim_C = 1,
+  Dim_H = 2,
+  Dim_W = 3,
+  Dim_O = 0,
+  Dim_I = 1
+} MklDnnDims;
 
 class MklShape {
  public:
@@ -143,7 +156,9 @@ class MklShape {
   size_t GetDimension() const { return dimension_; }
   const size_t* GetSizes() const { return sizes_; }
   int64 dim_size(int index) const { return sizes_[index]; }
-  int64 tf_dim_size(int index) const { return sizes_[tf_to_mkl_dim_map_[index]]; }
+  int64 tf_dim_size(int index) const {
+    return sizes_[tf_to_mkl_dim_map_[index]];
+  }
   const size_t* GetStrides() const { return strides_; }
   const size_t* GetTfToMklDimMap() const { return tf_to_mkl_dim_map_; }
   size_t tf_dim_idx(int index) const { return tf_to_mkl_dim_map_[index]; }
@@ -309,6 +324,260 @@ class MklShape {
       nullptr;  // TF dimension corresponding to this MKL dimension
 };
 
+#ifdef INTEL_MKL_DNN
+
+// Forward decl
+TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format);
+
+class MklDnnShape {
+ private:
+  typedef struct {
+    /// Flag to indicate if the tensor is an  MKL tensor or not
+    bool is_mkl_tensor_ = false;
+    /// Number of dimensions in Tensorflow format
+    size_t dimension_ = 0;
+    /// Required by MKLDNN for conversions
+    mkldnn_dims_t sizes_;  // Required by MKL for conversions
+    memory::format tf_data_format_ = memory::format::format_undef;
+    memory::data_type T_ = memory::data_type::data_undef;
+    // MKL layout
+    mkldnn_memory_desc_t mkl_md_;
+    /// TF dimension corresponding to this MKL dimension
+    mkldnn_dims_t map_;
+  } MklShapeData;
+  MklShapeData data_;
+
+  typedef std::remove_extent<mkldnn_dims_t>::type mkldnn_dim_t;
+#define INVALID_DIM_SIZE -1
+
+ public:
+  MklDnnShape() {
+    for (size_t i = 0; i < sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
+         ++i) {
+      data_.sizes_[i] = -1;
+    }
+    for (size_t i = 0; i < sizeof(data_.map_) / sizeof(data_.map_[0]); ++i) {
+      data_.map_[i] = -1;
+    }
+  }
+
+  ~MklDnnShape() {}
+  TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape);  // Cannot copy
+
+  inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; }
+  inline void SetMklTensor(bool is_mkl_tensor) {
+    data_.is_mkl_tensor_ = is_mkl_tensor;
+  }
+
+  inline void SetDimensions(const size_t dimension) {
+    data_.dimension_ = dimension;
+  }
+  inline size_t GetDimension(char dimension) const {
+    int index = GetMklDnnTensorDimIndex(dimension);
+    CHECK(index >= 0 && index < this->GetDimension())
+        << "Invalid index from the dimension: " << index << ", " << dimension;
+    return this->DimSize(index);
+  }
+
+  inline int32 GetMklDnnTensorDimIndex(char dimension) const {
+    switch (dimension) {
+      case 'N':
+        return MklDnnDims::Dim_N;
+      case 'C':
+        return MklDnnDims::Dim_C;
+      case 'H':
+        return MklDnnDims::Dim_H;
+      case 'W':
+        return MklDnnDims::Dim_W;
+      default:
+        LOG(FATAL) << "Invalid dimension: " << dimension;
+        return -1;  // Avoid compiler warning about missing return value
+    }
+  }
+
+  inline size_t GetDimension() const { return data_.dimension_; }
+  inline const int* GetSizes() const {
+    return reinterpret_cast<const int*>(&data_.sizes_[0]);
+  }
+
+  // Returns an mkldnn::memory::dims object that contains the sizes of this
+  // MklDnnShape object.
+  inline memory::dims GetSizesAsMklDnnDims() const {
+    memory::dims retVal;
+    if (data_.is_mkl_tensor_) {
+      int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
+      for (size_t i = 0; i < dimensions; i++) {
+        if (data_.sizes_[i] != INVALID_DIM_SIZE)
+          retVal.push_back(data_.sizes_[i]);
+      }
+    } else {
+      CHECK_EQ(data_.is_mkl_tensor_, true);
+    }
+    return retVal;
+  }
+
+  inline int64 DimSize(int index) const {
+    CHECK_LT(index, sizeof(data_.sizes_) / sizeof(data_.sizes_[0]));
+    return data_.sizes_[index];
+  }
+
+  /// Return TensorShape that describes the Tensorflow shape of the tensor
+  /// represented by this MklShape.
+  inline TensorShape GetTfShape() {
+    CHECK_EQ(data_.is_mkl_tensor_, true);
+
+    std::vector<int32> shape(data_.dimension_, -1);
+    for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+      shape[idx] = data_.sizes_[TfDimIdx(idx)];
+    }
+
+    TensorShape ts;
+    bool ret = TensorShapeUtils::MakeShape(shape, &ts).ok();
+    CHECK_EQ(ret, true);
+    return ts;
+  }
+
+  inline void SetElemType(memory::data_type dt) { data_.T_ = dt; }
+  inline const memory::data_type GetElemType() { return data_.T_; }
+
+  inline void SetMklLayout(memory::primitive_desc* pd) {
+    CHECK_NOTNULL(pd);
+    data_.mkl_md_ = pd->desc().data;
+  }
+  inline const memory::desc GetMklLayout() const {
+    return memory::desc(data_.mkl_md_);
+  }
+
+  inline memory::format GetTfDataFormat() const {
+    return data_.tf_data_format_;
+  }
+  /// We don't create primitive_descriptor for TensorFlow layout now.
+  /// We use lazy evaluation and create it only when needed.
+  inline void SetTfLayout(size_t dims, const memory::dims& sizes,
+                          memory::format format) {
+    CHECK_EQ(dims, sizes.size());
+    data_.dimension_ = dims;
+    for (size_t ii = 0; ii < dims; ii++) {
+      data_.sizes_[ii] = sizes[ii];
+    }
+    data_.tf_data_format_ = format;
+    SetTfDimOrder(dims, format);
+  }
+  inline const memory::desc GetTfLayout() const {
+    memory::dims dims;
+    for (size_t ii = 0; ii < data_.dimension_; ii++) {
+      dims.push_back(data_.sizes_[ii]);
+    }
+    return memory::desc(dims, data_.T_, data_.tf_data_format_);
+  }
+  inline const memory::desc GetCurLayout() const {
+    return IsMklTensor() ? GetMklLayout() : GetTfLayout();
+  }
+
+  // nhasabni - I've removed SetTfDimOrder that was setting default order in
+  // case of MKL-ML. We don't need a case of default dimension order because
+  // when an operator that does not get data_format attribute gets all inputs
+  // in Tensorflow format, it will produce output in Tensorflow format.
+  inline void SetTfDimOrder(const size_t dimension, const mkldnn_dims_t map) {
+    CHECK(dimension == data_.dimension_);
+    for (size_t ii = 0; ii < dimension; ii++) {
+      data_.map_[ii] = map[ii];
+    }
+  }
+
+  inline void SetTfDimOrder(const size_t dimension, TensorFormat data_format) {
+    // TODO(nhasabni): Why do we restrict this to 4D?
+    CHECK_EQ(dimension, 4);
+    CHECK(dimension == data_.dimension_);
+    data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W;
+    data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H;
+    data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C;
+    data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N;
+  }
+
+  inline void SetTfDimOrder(const size_t dimension, memory::format format) {
+    TensorFormat data_format = MklDnnDataFormatToTFDataFormat(format);
+    SetTfDimOrder(dimension, data_format);
+  }
+
+  inline const mkldnn_dim_t* GetTfToMklDimMap() const { return &data_.map_[0]; }
+  inline size_t TfDimIdx(int index) const { return data_.map_[index]; }
+  inline int64 TfDimSize(int index) const {
+    return data_.sizes_[TfDimIdx(index)];
+  }
+
+  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
+  /// corresponds to MKL's Channel dimension.
+  inline bool IsMklChannelDim(int d) const {
+    return TfDimIdx(d) == MklDnnDims::Dim_C;
+  }
+  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
+  /// corresponds to MKL's Batch dimension.
+  inline bool IsMklBatchDim(int d) const {
+    return TfDimIdx(d) == MklDnnDims::Dim_N;
+  }
+  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
+  /// corresponds to MKL's Width dimension.
+  inline bool IsMklWidthDim(int d) const {
+    return TfDimIdx(d) == MklDnnDims::Dim_W;
+  }
+  /// Query TF-MKL dimension ordering map and check if Tensorflow dimension 'd'
+  /// corresponds to MKL's Height dimension.
+  inline bool IsMklHeightDim(int d) const {
+    return TfDimIdx(d) == MklDnnDims::Dim_H;
+  }
+
+  /// Check if the TF-Mkl dimension ordering map specifies if the input
+  /// tensor is in NCHW format.
+  inline bool IsTensorInNCHWFormat() const {
+    TensorFormat data_format = FORMAT_NCHW;
+    return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) &&
+            IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) &&
+            IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) &&
+            IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W')));
+  }
+
+  /// Check if the TF-Mkl dimension ordering map specifies if the input
+  /// tensor is in NHWC format.
+  inline bool IsTensorInNHWCFormat() const {
+    TensorFormat data_format = FORMAT_NHWC;
+    return (IsMklBatchDim(GetTensorDimIndex<2>(data_format, 'N')) &&
+            IsMklChannelDim(GetTensorDimIndex<2>(data_format, 'C')) &&
+            IsMklHeightDim(GetTensorDimIndex<2>(data_format, 'H')) &&
+            IsMklWidthDim(GetTensorDimIndex<2>(data_format, 'W')));
+  }
+
+  /// The following methods are used for serializing and de-serializing the
+  /// contents of the mklshape object.
+  /// The data is serialized in this order
+  /// is_mkl_tensor_ : dimension_ : sizes_ : map_: format_ : T_ : mkl_pd_;
+
+  /// Size of buffer to hold the serialized object, the size is computed by
+  /// following above mentioned order
+  inline size_t GetSerializeBufferSize() const { return sizeof(MklShapeData); }
+
+  void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const {
+    CHECK(buf_size >= GetSerializeBufferSize())
+        << "Buffer size is too small to SerializeMklDnnShape";
+    *reinterpret_cast<MklShapeData*>(buf) = data_;
+  }
+
+  void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) {
+    // Make sure buffer holds at least is_mkl_tensor_.
+    CHECK(buf_size >= sizeof(data_.is_mkl_tensor_))
+        << "Buffer size is too small in DeSerializeMklDnnShape";
+
+    const bool is_mkl_tensor = *reinterpret_cast<const bool*>(buf);
+    if (is_mkl_tensor) {  // If it is an MKL Tensor then read the rest
+      CHECK(buf_size >= GetSerializeBufferSize())
+          << "Buffer size is too small in DeSerializeMklDnnShape";
+      data_ = *reinterpret_cast<const MklShapeData*>(buf);
+    }
+  }
+};
+
+#endif
+
 // List of MklShape objects. Used in Concat/Split layers.
 typedef std::vector<MklShape> MklShapeList;
 
@@ -347,6 +616,36 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
   return output_tensor;
 }
 
+#ifdef INTEL_MKL_DNN
+template <typename T>
+inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
+                             const MklDnnShape& mkl_shape) {
+  Tensor output_tensor;
+  TensorShape output_shape;
+
+#if 0
+  // TODO(nhasabni): need to implement
+  for (size_t j = 0; j < mkl_shape.GetDimension(); j++) {
+    // Outermost to innermost dimension
+    output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]);
+  }
+
+  // Allocate output tensor.
+  context->allocate_temp(DataTypeToEnum<T>::v(), output_shape, &output_tensor);
+
+  dnnLayout_t output_layout = static_cast<dnnLayout_t>(mkl_shape.GetTfLayout());
+  void* input_buffer = const_cast<T*>(mkl_tensor.flat<T>().data());
+  void* output_buffer = const_cast<T*>(output_tensor.flat<T>().data());
+
+  if (mkl_tensor.NumElements() != 0) {
+    mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer);
+  }
+#endif
+
+  return output_tensor;
+}
+#endif
+
 // Get the MKL shape from the second string tensor
 inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
   mklshape->DeSerializeMklShape(
@@ -359,6 +658,19 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
           sizeof(uint8));
 }
 
+#ifdef INTEL_MKL_DNN
+inline void GetMklShape(OpKernelContext* ctext, int n, MklDnnShape* mklshape) {
+  mklshape->DeSerializeMklDnnShape(
+      ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs()))
+          .flat<uint8>()
+          .data(),
+      ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs()))
+              .flat<uint8>()
+              .size() *
+          sizeof(uint8));
+}
+#endif
+
 // Gets the actual input
 inline const Tensor& MklGetInput(OpKernelContext* ctext, int n) {
   return ctext->input(GetTensorDataIndex(n, ctext->num_inputs()));
@@ -382,6 +694,26 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
   }
 }
 
+#ifdef INTEL_MKL_DNN
+/// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
+/// If the input tensor is in MKL layout, then obtains TensorShape from
+/// MklShape.
+inline TensorShape GetTfShape(OpKernelContext* context, size_t input_idx) {
+  // Sanity check.
+  CHECK_NOTNULL(context);
+  CHECK_LT(input_idx, context->num_inputs());
+
+  MklDnnShape input_mkl_shape;
+  GetMklShape(context, input_idx, &input_mkl_shape);
+  if (input_mkl_shape.IsMklTensor()) {
+    return input_mkl_shape.GetTfShape();
+  } else {
+    const Tensor& t = MklGetInput(context, input_idx);
+    return t.shape();
+  }
+}
+#endif
+
 // Allocate the second output tensor that will contain
 // the MKL shape serialized
 inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
@@ -397,6 +729,23 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
       second_tensor->flat<uint8>().size() * sizeof(uint8));
 }
 
+#ifdef INTEL_MKL_DNN
+// Allocate the second output tensor that will contain
+// the MKL shape serialized
+inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
+                                      const MklDnnShape& mkl_shape) {
+  Tensor* second_tensor = nullptr;
+  TensorShape second_shape;
+  second_shape.AddDim(mkl_shape.GetSerializeBufferSize());
+  OP_REQUIRES_OK(ctext, ctext->allocate_output(
+                            GetTensorMetaDataIndex(n, ctext->num_outputs()),
+                            second_shape, &second_tensor));
+  mkl_shape.SerializeMklDnnShape(
+      second_tensor->flat<uint8>().data(),
+      second_tensor->flat<uint8>().size() * sizeof(uint8));
+}
+#endif
+
 // Allocate the output tensor, create a second output tensor that will contain
 // the MKL shape serialized
 inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
@@ -417,9 +766,43 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
       second_tensor->flat<uint8>().size() * sizeof(uint8));
 }
 
+#ifdef INTEL_MKL_DNN
+// Allocate the output tensor, create a second output tensor that will contain
+// the MKL shape serialized
+inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
+                                      Tensor** output,
+                                      const TensorShape& tf_shape,
+                                      const MklDnnShape& mkl_shape) {
+  Tensor* second_tensor = nullptr;
+  TensorShape second_shape;
+  second_shape.AddDim(mkl_shape.GetSerializeBufferSize());
+  OP_REQUIRES_OK(
+      ctext, ctext->allocate_output(GetTensorDataIndex(n, ctext->num_outputs()),
+                                    tf_shape, output));
+  OP_REQUIRES_OK(ctext, ctext->allocate_output(
+                            GetTensorMetaDataIndex(n, ctext->num_outputs()),
+                            second_shape, &second_tensor));
+  mkl_shape.SerializeMklDnnShape(
+      second_tensor->flat<uint8>().data(),
+      second_tensor->flat<uint8>().size() * sizeof(uint8));
+}
+#endif
+
 // Allocates a temp tensor and returns the data buffer for temporary storage.
 // Currently
-// we only support F32, will need to templatize if other types are added
+#ifdef INTEL_MKL_DNN
+template <typename T>
+inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
+                           const memory::primitive_desc& pd, void** buf_out) {
+  TensorShape tf_shape;
+
+  tf_shape.AddDim(pd.get_size() / sizeof(T) + 1);
+  OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::v(),
+                                                 tf_shape, tensor_out));
+  *buf_out = static_cast<void*>(tensor_out->flat<T>().data());
+}
+#endif
+
 inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
                            dnnLayout_t lt_buff, void** buf_out) {
   TensorShape tf_shape;
@@ -669,6 +1052,8 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0,
   return true;
 }
 
+// These functions do not compile with MKL-DNN since mkl.h is missing.
+// We may need to remove them later.
 // TODO(intel_tf): Remove this routine when faster MKL layout conversion is
 // out.
 inline void MklNHWCToNCHW(const Tensor& input, Tensor** output) {
@@ -707,12 +1092,6 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) {
 
 #ifdef INTEL_MKL_DNN
 
-using mkldnn::engine;
-using mkldnn::memory;
-using mkldnn::padding_kind;
-using mkldnn::primitive;
-using mkldnn::reorder;
-
 /// Return MKL-DNN data type (memory::data_type) for input type T
 ///
 /// @input None
@@ -742,6 +1121,19 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
   return memory::format::format_undef;
 }
 
+/// Map MKL-DNN data format to TensorFlow's data format
+///
+/// @input: memory::format
+/// @return: Tensorflow data format corresponding to memory::format
+///          Fails with an error if invalid data format.
+inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
+  if (format == memory::format::nhwc)
+    return FORMAT_NHWC;
+  else if (format == memory::format::nchw)
+    return FORMAT_NCHW;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
+}
+
 /// Map TensorShape object into memory::dims required by MKL-DNN
 ///
 /// This function will simply map input TensorShape into MKL-DNN dims
@@ -753,7 +1145,7 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
 /// @return memory::dims corresponding to TensorShape
 inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) {
   memory::dims dims(shape.dims());
-  for (unsigned int d = 0; d < shape.dims(); ++d) {
+  for (int d = 0; d < shape.dims(); ++d) {
     dims[d] = shape.dim_size(d);
   }
   return dims;
@@ -783,6 +1175,43 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
   return memory::dims({n, c, h, w});
 }
 
+/// Map MklDnn memory::dims object into TensorShape object.
+///
+/// This function will simply map input shape in MKL-DNN memory::dims format
+/// in Tensorflow's TensorShape object by perserving dimension order.
+///
+/// @input MKL-DNN memory::dims object
+/// @output TensorShape corresponding to memory::dims
+inline TensorShape MklDnnDimsToTFShape(const memory::dims& dims) {
+  std::vector<int32> shape(dims.size(), -1);
+  for (int d = 0; d < dims.size(); d++) {
+    shape[d] = dims[d];
+  }
+
+  TensorShape ret;
+  CHECK_EQ(TensorShapeUtils::MakeShape(shape, &ret).ok(), true);
+  return ret;
+}
+
+/// Function to calculate strides given tensor shape in Tensorflow order
+/// E.g., if dims_tf_order is {1, 2, 3, 4}, then as per Tensorflow convention,
+/// dimesion with size 1 is outermost dimension; while dimension with size 4 is
+/// innermost dimension. So strides for this tensor would be {4 * 3 * 2,
+/// 4 * 3, 4, 1}, i.e., {24, 12, 4, 1}.
+///
+/// @input Tensorflow shape in memory::dims type
+/// @return memory::dims containing strides for the tensor.
+inline memory::dims CalculateTFStrides(const memory::dims& dims_tf_order) {
+  CHECK_GT(dims_tf_order.size(), 0);
+  memory::dims strides(dims_tf_order.size());
+  int last_dim_idx = dims_tf_order.size() - 1;
+  strides[last_dim_idx] = 1;
+  for (int d = last_dim_idx - 1; d >= 0; d--) {
+    strides[d] = strides[d + 1] * dims_tf_order[d + 1];
+  }
+  return strides;
+}
+
 inline padding_kind TFPaddingToMklDnnPadding(Padding pad) {
   // MKL-DNN only supports zero padding.
   return padding_kind::zero;
@@ -821,7 +1250,7 @@ class MklDnnData {
     delete (op_md_);
   }
 
-  void* GetTensorBuffer(const Tensor* tensor) {
+  inline void* GetTensorBuffer(const Tensor* tensor) const {
     CHECK_NOTNULL(tensor);
     return const_cast<void*>(
         static_cast<const void*>(tensor->flat<T>().data()));
@@ -835,35 +1264,83 @@ class MklDnnData {
   /// an operation. E.g., filter of Conv2D is of shape {1, 2, 3, 4}, and
   /// memory format HWIO, and the buffer that contains actual values is
   /// pointed by data_buffer.
-  void SetUsrMem(memory::dims dim, memory::format fm, void* data_buffer) {
-    CHECK_NOTNULL(data_buffer);
-    CHECK_NOTNULL(cpu_engine_);
-    // TODO(nhasabni): can we remove dynamic memory allocation?
-    user_memory_ =
-        new memory(memory::primitive_desc(
-                       memory::desc(dim, MklDnnType<T>(), fm), *cpu_engine_),
-                   data_buffer);
+  inline void SetUsrMem(const memory::dims& dim, memory::format fm,
+                        void* data_buffer = nullptr) {
+    auto md = memory::desc(dim, MklDnnType<T>(), fm);
+    SetUsrMem(md, data_buffer);
   }
 
-  void SetUsrMem(memory::dims dim, memory::format fm, const Tensor* tensor) {
+  inline void SetUsrMem(const memory::dims& dim, memory::format fm,
+                        const Tensor* tensor) {
     CHECK_NOTNULL(tensor);
     SetUsrMem(dim, fm, GetTensorBuffer(tensor));
   }
 
+  /// Helper function to create memory descriptor in Blocked format
+  ///
+  /// @input: Tensor dimensions
+  /// @input: strides corresponding to dimensions. One can use utility
+  ///         function such as CalculateTFStrides to compute strides
+  ///         for given dimensions.
+  /// @return: memory::desc object corresponding to blocked memory format
+  ///          for given dimensions and strides.
+  static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
+                                                  const memory::dims& strides) {
+    CHECK_EQ(dim.size(), strides.size());
+
+    // We have to construct memory descriptor in a C style. This is not at all
+    // ideal but MKLDNN does not offer any API to construct descriptor in
+    // blocked format except a copy constructor that accepts
+    // mkldnn_memory_desc_t.
+    mkldnn_memory_desc_t md;
+    md.primitive_kind = mkldnn_memory;
+    md.ndims = dim.size();
+    md.format = mkldnn_blocked;
+    md.data_type = memory::convert_to_c(MklDnnType<T>());
+
+    for (size_t i = 0; i < dim.size(); i++) {
+      md.layout_desc.blocking.block_dims[i] = 1;
+      md.layout_desc.blocking.strides[1][i] = 1;
+      md.layout_desc.blocking.strides[0][i] = strides[i];
+      md.layout_desc.blocking.padding_dims[i] = dim[i];
+      md.layout_desc.blocking.offset_padding_to_data[i] = 0;
+      md.dims[i] = dim[i];
+    }
+    md.layout_desc.blocking.offset_padding = 0;
+
+    return memory::desc(md);
+  }
+
+  /// A version of SetUsrMem call that allows user to create memory in blocked
+  /// format. So in addition to accepting dimensions, it also accepts strides.
+  /// This allows user to create memory for tensor in a format that is not
+  /// supported by MKLDNN. E.g., MKLDNN does not support tensor format for 6
+  /// dimensional tensor as a native format. But by using blocked format, a user
+  /// can create memory for 6D tensor.
+  inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides,
+                        void* data_buffer = nullptr) {
+    CHECK_EQ(dim.size(), strides.size());
+    auto blocked_md = MklDnnData<T>::CreateBlockedMemDesc(dim, strides);
+    SetUsrMem(blocked_md, data_buffer);
+  }
+
+  inline void SetUsrMem(const memory::dims& dim, const memory::dims& strides,
+                        const Tensor* tensor) {
+    CHECK_NOTNULL(tensor);
+    SetUsrMem(dim, strides, GetTensorBuffer(tensor));
+  }
+
   /// A version of function to set user memory primitive that accepts memory
   /// descriptor directly, instead of accepting dimensions and format. This
   /// function is more generic that the one above, but the function above is
   /// sufficient in most cases.
-  void SetUsrMem(memory::desc md, void* data_buffer) {
-    CHECK_NOTNULL(data_buffer);
-    CHECK_NOTNULL(cpu_engine_);
-    // TODO(nhasabni): can we remove dynamic memory allocation?
-    user_memory_ =
-        new memory(memory::primitive_desc(md, *cpu_engine_), data_buffer);
+  inline void SetUsrMem(const memory::desc& md, void* data_buffer = nullptr) {
+    auto pd = memory::primitive_desc(md, *cpu_engine_);
+    SetUsrMem(pd, data_buffer);
   }
 
   /// A version of SetUsrMem with memory descriptor and tensor
-  void SetUsrMem(memory::desc md, const Tensor* tensor) {
+  inline void SetUsrMem(const memory::desc& md, const Tensor* tensor) {
     CHECK_NOTNULL(tensor);
     SetUsrMem(md, GetTensorBuffer(tensor));
   }
@@ -872,41 +1349,60 @@ class MklDnnData {
   /// descriptor directly, instead of accepting dimensions and format. This
   /// function is more generic that the one above, but the function above is
   /// sufficient in most cases.
-  void SetUsrMem(memory::primitive_desc pd, void* data_buffer) {
-    CHECK_NOTNULL(data_buffer);
+  inline void SetUsrMem(const memory::primitive_desc& pd,
+                        void* data_buffer = nullptr) {
     CHECK_NOTNULL(cpu_engine_);
     // TODO(nhasabni): can we remove dynamic memory allocation?
-    user_memory_ = new memory(pd, data_buffer);
+    if (data_buffer) {
+      user_memory_ = new memory(pd, data_buffer);
+    } else {
+      user_memory_ = new memory(pd);
+    }
   }
 
   /// A version of SetUsrMem with primitive descriptor and tensor
-  void SetUsrMem(memory::primitive_desc pd, const Tensor* tensor) {
+  inline void SetUsrMem(const memory::primitive_desc& pd,
+                        const Tensor* tensor) {
     CHECK_NOTNULL(tensor);
     SetUsrMem(pd, GetTensorBuffer(tensor));
   }
 
   /// Get function for user memory primitive.
-  const memory* GetUsrMem() const { return user_memory_; }
+  inline const memory* GetUsrMem() const { return user_memory_; }
 
   /// Get function for primitive descriptor of user memory primitive.
-  const memory::primitive_desc GetUsrMemPrimDesc() const {
+  inline const memory::primitive_desc GetUsrMemPrimDesc() const {
     CHECK_NOTNULL(user_memory_);
     return user_memory_->get_primitive_desc();
   }
 
   /// Get function for descriptor of user memory.
-  memory::desc GetUsrMemDesc() {
+  inline memory::desc GetUsrMemDesc() {
     // This is ugly. Why MKL-DNN does not provide desc() method of const type??
     const memory::primitive_desc pd = GetUsrMemPrimDesc();
     return const_cast<memory::primitive_desc*>(&pd)->desc();
   }
 
   /// Get function for data buffer of user memory primitive.
-  void* GetUsrMemDataHandle() const {
+  inline void* GetUsrMemDataHandle() const {
     CHECK_NOTNULL(user_memory_);
     return user_memory_->get_data_handle();
   }
 
+  /// Set function for data buffer of user memory primitive.
+  inline void* SetUsrMemDataHandle(void* data_buffer) {
+    CHECK_NOTNULL(user_memory_);
+    CHECK_NOTNULL(data_buffer);
+    return user_memory_->set_data_handle(data_buffer);
+  }
+
+  /// Set function for data buffer of user memory primitive.
+  inline void SetUsrMemDataHandle(const Tensor* tensor) {
+    CHECK_NOTNULL(user_memory_);
+    CHECK_NOTNULL(tensor);
+    user_memory_->set_data_handle(GetTensorBuffer(tensor));
+  }
+
   /// Get the memory primitive for input and output of an op. If inputs
   /// to an op require reorders, then this function returns memory primitive
   /// for reorder. Otherwise, it will return memory primitive for user memory.
@@ -915,7 +1411,7 @@ class MklDnnData {
   /// execute Conv2D, we need memory primitive for I and F. Buf if reorder is
   /// required for I and F (say I_r is reorder primitive for I; F_r is reorder
   /// primitive for F), then we need I_r and F_r to perform Conv2D.
-  const memory& GetOpMem() const {
+  inline const memory& GetOpMem() const {
     return reorder_memory_ ? *reorder_memory_ : *user_memory_;
   }
 
@@ -923,13 +1419,32 @@ class MklDnnData {
   /// format. E.g., For Conv2D, the dimensions would be same as user dimensions
   /// but memory::format would be mkldnn::any because we want MKL-DNN to choose
   /// best layout/format for given input dimensions.
-  void SetOpMemDesc(const memory::dims& dim, memory::format fm) {
+  inline void SetOpMemDesc(const memory::dims& dim, memory::format fm) {
     // TODO(nhasabni): can we remove dynamic memory allocation?
     op_md_ = new memory::desc(dim, MklDnnType<T>(), fm);
   }
 
   /// Get function for memory descriptor for an operation
-  const memory::desc& GetOpMemDesc() const { return *op_md_; }
+  inline const memory::desc& GetOpMemDesc() const { return *op_md_; }
+
+  /// Predicate that checks if we need to reorder user's memory into memory
+  /// pointed by op_pd.
+  ///
+  /// @input: op_pd - memory primitive descriptor of the given input of an
+  ///               operation
+  /// @return: true in case reorder of input is needed; false, otherwise.
+  inline bool IsReorderNeeded(const memory::primitive_desc& op_pd) const {
+    CHECK_NOTNULL(user_memory_);
+    return op_pd != user_memory_->get_primitive_desc();
+  }
+
+  /// Function to create a reorder from memory pointed by from to memory pointed
+  /// by to. Returns created primitive.
+  inline primitive CreateReorder(const memory* from, const memory* to) const {
+    CHECK_NOTNULL(from);
+    CHECK_NOTNULL(to);
+    return reorder(*from, *to);
+  }
 
   /// Function to handle input reordering
   ///
@@ -945,19 +1460,62 @@ class MklDnnData {
   ///               operation
   /// @input: net - net to which to add reorder primitive in case it is needed.
   /// @return: true in case reorder of input is needed; false, otherwise.
-  bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
-                           std::vector<primitive>* net) {
+  inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
+                                  std::vector<primitive>* net) {
     CHECK_NOTNULL(net);
     CHECK_NOTNULL(user_memory_);
-    if (op_pd != user_memory_->get_primitive_desc()) {
+    if (IsReorderNeeded(op_pd)) {
       // TODO(nhasabni): can we remove dynamic memory allocation?
       reorder_memory_ = new memory(op_pd);
-      net->push_back(reorder(*user_memory_, *reorder_memory_));
+      net->push_back(CreateReorder(user_memory_, reorder_memory_));
+      return true;
+    }
+    return false;
+  }
+
+  /// Overloaded version of above function that accepts memory buffer
+  /// where output of reorder needs to be stored.
+  ///
+  /// @input: op_pd - memory primitive descriptor of the given input of an
+  ///               operation
+  /// @reorder_data_handle - memory buffer where output of reorder needs to be
+  ///                        stored. Primitive does not check if buffer is
+  ///                        enough size to write.
+  /// @input: net - net to which to add reorder primitive in case it is needed.
+  /// @return: true in case reorder of input is needed; false, otherwise.
+  inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
+                                  void* reorder_data_handle,
+                                  std::vector<primitive>* net) {
+    CHECK_NOTNULL(net);
+    CHECK_NOTNULL(reorder_data_handle);
+    CHECK_NOTNULL(user_memory_);
+    if (IsReorderNeeded(op_pd)) {
+      // TODO(nhasabni): can we remove dynamic memory allocation?
+      reorder_memory_ = new memory(op_pd, reorder_data_handle);
+      net->push_back(CreateReorder(user_memory_, reorder_memory_));
       return true;
     }
     return false;
   }
 
+  /// Another overloaded version of CheckReorderToOpMem that accepts Tensor
+  /// where output of reorder needs to be stored.
+  ///
+  /// @input: op_pd - memory primitive descriptor of the given input of an
+  ///               operation
+  /// @reorder_tensor - Tensor whose buffer is to be used to store output of
+  ///                   reorder. Primitive does not check if buffer is
+  ///                   enough size to write.
+  /// @input: net - net to which to add reorder primitive in case it is needed.
+  /// @return: true in case reorder of input is needed; false, otherwise.
+  inline bool CheckReorderToOpMem(const memory::primitive_desc& op_pd,
+                                  Tensor* reorder_tensor,
+                                  std::vector<primitive>* net) {
+    CHECK_NOTNULL(net);
+    CHECK_NOTNULL(reorder_tensor);
+    return CheckReorderToOpMem(op_pd, GetTensorBuffer(reorder_tensor), net);
+  }
+
   /// Function to handle output reorder
   ///
   /// This function performs very similar functionality as input reordering
@@ -970,9 +1528,10 @@ class MklDnnData {
   ///
   /// @input memory primitive descriptor for the given output of an operation
   /// @return: true in case reorder of output is needed; false, otherwise.
-  bool PrepareReorderToUserMemIfReq(const memory::primitive_desc& op_pd) {
+  inline bool PrepareReorderToUserMemIfReq(
+      const memory::primitive_desc& op_pd) {
     CHECK_NOTNULL(user_memory_);
-    if (op_pd != user_memory_->get_primitive_desc()) {
+    if (IsReorderNeeded(op_pd)) {
       // TODO(nhasabni): can we remove dynamic memory allocation?
       reorder_memory_ = new memory(op_pd);
       return true;
@@ -987,11 +1546,11 @@ class MklDnnData {
   /// to the user-specified output buffer.
   ///
   /// @input: net - net to which to add reorder primitive
-  void InsertReorderToUserMem(std::vector<primitive>* net) {
+  inline void InsertReorderToUserMem(std::vector<primitive>* net) {
     CHECK_NOTNULL(net);
     CHECK_NOTNULL(user_memory_);
     CHECK_NOTNULL(reorder_memory_);
-    net->push_back(reorder(*reorder_memory_, *user_memory_));
+    net->push_back(CreateReorder(reorder_memory_, user_memory_));
   }
 };
 
diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc
new file mode 100644
index 0000000000..8b73eadb40
--- /dev/null
+++ b/tensorflow/core/util/mkl_util_test.cc
@@ -0,0 +1,91 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include "tensorflow/core/util/mkl_util.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+#ifdef INTEL_MKL_DNN
+
+TEST(MklUtilTest, MklDnnTfShape) {
+  auto cpu_engine = engine(engine::cpu, 0);
+  MklDnnData<float> a(&cpu_engine);
+
+  const int N = 1, C = 2, H = 3, W = 4;
+  memory::dims a_dims = {N, C, H, W};
+  MklDnnShape a_mkldnn_shape;
+  a_mkldnn_shape.SetMklTensor(true);
+  // Create TF layout in NCHW.
+  a_mkldnn_shape.SetTfLayout(a_dims.size(), a_dims, memory::format::nchw);
+  TensorShape a_tf_shape_nchw({N, C, H, W});
+  TensorShape a_tf_shape_nhwc({N, H, W, C});
+  TensorShape a_mkldnn_tf_shape = a_mkldnn_shape.GetTfShape();
+  // Check that returned shape is in NCHW format.
+  EXPECT_EQ(a_tf_shape_nchw, a_mkldnn_tf_shape);
+  EXPECT_NE(a_tf_shape_nhwc, a_mkldnn_tf_shape);
+
+  memory::dims b_dims = {N, C, H, W};
+  MklDnnShape b_mkldnn_shape;
+  b_mkldnn_shape.SetMklTensor(true);
+  // Create TF layout in NHWC.
+  b_mkldnn_shape.SetTfLayout(b_dims.size(), b_dims, memory::format::nhwc);
+  TensorShape b_tf_shape_nhwc({N, H, W, C});
+  TensorShape b_tf_shape_nchw({N, C, H, W});
+  TensorShape b_mkldnn_tf_shape = b_mkldnn_shape.GetTfShape();
+  // Check that returned shape is in NHWC format.
+  EXPECT_EQ(b_tf_shape_nhwc, b_mkldnn_tf_shape);
+  EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape);
+}
+
+TEST(MklUtilTest, MklDnnBlockedFormatTest) {
+  // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension
+  // first (case 1) and then it being outermost dimension (case 2).
+  auto cpu_engine = engine(engine::cpu, 0);
+
+  // Setting for case 1
+  MklDnnData<float> a(&cpu_engine);
+  memory::dims dim1 = {3, 4};
+  memory::dims strides1 = {1, 3};
+  a.SetUsrMem(dim1, strides1);
+
+  memory::desc a_md1 = a.GetUsrMemDesc();
+  EXPECT_EQ(a_md1.data.ndims, 2);
+  EXPECT_EQ(a_md1.data.dims[0], 3);
+  EXPECT_EQ(a_md1.data.dims[1], 4);
+  EXPECT_EQ(a_md1.data.format, mkldnn_blocked);
+
+  // Setting for case 2
+  MklDnnData<float> b(&cpu_engine);
+  memory::dims dim2 = {3, 4};
+  memory::dims strides2 = {4, 1};
+  b.SetUsrMem(dim2, strides2);
+
+  memory::desc b_md2 = b.GetUsrMemDesc();
+  EXPECT_EQ(b_md2.data.ndims, 2);
+  EXPECT_EQ(b_md2.data.dims[0], 3);
+  EXPECT_EQ(b_md2.data.dims[1], 4);
+  EXPECT_EQ(b_md2.data.format, mkldnn_blocked);
+}
+
+#endif  // INTEL_MKL_DNN
+}  // namespace
+}  // namespace tensorflow
+
+#endif  // INTEL_MKL
diff --git a/tensorflow/docs_src/api_guides/python/threading_and_queues.md b/tensorflow/docs_src/api_guides/python/threading_and_queues.md
index ab95ce0af9..8ad4c4c075 100644
--- a/tensorflow/docs_src/api_guides/python/threading_and_queues.md
+++ b/tensorflow/docs_src/api_guides/python/threading_and_queues.md
@@ -3,7 +3,7 @@
 Note: In versions of TensorFlow before 1.2, we recommended using multi-threaded,
 queue-based input pipelines for performance. Beginning with TensorFlow 1.4,
 however, we recommend using the `tf.data` module instead. (See
-[Datasets](datasets) for details. In TensorFlow 1.2 and 1.3, the module was
+@{$datasets$Datasets} for details. In TensorFlow 1.2 and 1.3, the module was
 called `tf.contrib.data`.) The `tf.data` module offers an easier-to-use
 interface for constructing efficient input pipelines. Furthermore, we've stopped
 developing the old multi-threaded, queue-based input pipelines.  We've retained
diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md
index 8409962744..be14ab4026 100644
--- a/tensorflow/docs_src/get_started/get_started.md
+++ b/tensorflow/docs_src/get_started/get_started.md
@@ -272,7 +272,7 @@ train = optimizer.minimize(loss)
 ```
 
 ```python
-sess.run(init) # reset values to incorrect defaults.
+sess.run(init) # reset variables to incorrect defaults.
 for i in range(1000):
   sess.run(train, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]})
 
@@ -317,7 +317,7 @@ y_train = [0, -1, -2, -3]
 # training loop
 init = tf.global_variables_initializer()
 sess = tf.Session()
-sess.run(init) # reset values to wrong
+sess.run(init) # initialize variables with incorrect defaults.
 for i in range(1000):
   sess.run(train, {x: x_train, y: y_train})
 
@@ -383,7 +383,7 @@ train_input_fn = tf.estimator.inputs.numpy_input_fn(
 eval_input_fn = tf.estimator.inputs.numpy_input_fn(
     {"x": x_eval}, y_eval, batch_size=4, num_epochs=1000, shuffle=False)
 
-# We can invoke 1000 training steps by invoking the  method and passing the
+# We can invoke 1000 training steps by invoking the method and passing the
 # training data set.
 estimator.train(input_fn=input_fn, steps=1000)
 
diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md
index 9d3af5d96a..0db5c6143a 100644
--- a/tensorflow/docs_src/get_started/input_fn.md
+++ b/tensorflow/docs_src/get_started/input_fn.md
@@ -191,7 +191,7 @@ import pandas as pd
 
 def get_input_fn_from_pandas(data_set, num_epochs=None, shuffle=True):
   return tf.estimator.inputs.pandas_input_fn(
-      x=pdDataFrame(...),
+      x=pd.DataFrame(...),
       y=pd.Series(...),
       num_epochs=num_epochs,
       shuffle=shuffle)
@@ -267,8 +267,8 @@ tf.logging.set_verbosity(tf.logging.INFO)
 
 Define the column names for the data set in `COLUMNS`. To distinguish features
 from the label, also define `FEATURES` and `LABEL`. Then read the three CSVs
-(@{tf.train},
-@{tf.test}, and
+([train](http://download.tensorflow.org/data/boston_train.csv),
+[test](http://download.tensorflow.org/data/boston_test.csv), and
 [predict](http://download.tensorflow.org/data/boston_predict.csv)) into _pandas_
 `DataFrame`s:
 
diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md
index 3a153e8114..df622c6ac5 100644
--- a/tensorflow/docs_src/install/install_c.md
+++ b/tensorflow/docs_src/install/install_c.md
@@ -38,7 +38,7 @@ enable TensorFlow for C:
          OS="linux" # Change to "darwin" for macOS
          TARGET_DIRECTORY="/usr/local"
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" |
            sudo tar -C $TARGET_DIRECTORY -xz
 
      The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md
index df43255896..8b3da49a0d 100644
--- a/tensorflow/docs_src/install/install_go.md
+++ b/tensorflow/docs_src/install/install_go.md
@@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
          TF_TYPE="cpu" # Change to "gpu" for GPU support
          TARGET_DIRECTORY='/usr/local'
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0-rc1.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0.tar.gz" |
          sudo tar -C $TARGET_DIRECTORY -xz
 
      The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md
index f7f2c3cdc7..6eb8158249 100644
--- a/tensorflow/docs_src/install/install_java.md
+++ b/tensorflow/docs_src/install/install_java.md
@@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
 <dependency>
   <groupId>org.tensorflow</groupId>
   <artifactId>tensorflow</artifactId>
-  <version>1.4.0-rc1</version>
+  <version>1.4.0</version>
 </dependency>
 ```
 
@@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
                <dependency>
                  <groupId>org.tensorflow</groupId>
                  <artifactId>tensorflow</artifactId>
-                 <version>1.4.0-rc1</version>
+                 <version>1.4.0</version>
                </dependency>
              </dependencies>
          </project>
@@ -124,7 +124,7 @@ refer to the simpler instructions above instead.
 Take the following steps to install TensorFlow for Java on Linux or macOS:
 
   1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar),
      which is the TensorFlow Java Archive (JAR).
 
   2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@@ -143,7 +143,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
          OS=$(uname -s | tr '[:upper:]' '[:lower:]')
          mkdir -p ./jni
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0-rc1.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" |
            tar -xz -C ./jni
 
 ### Install on Windows
@@ -151,10 +151,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
 Take the following steps to install TensorFlow for Java on Windows:
 
   1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0-rc1.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar),
      which is the TensorFlow Java Archive (JAR).
   2. Download the following Java Native Interface (JNI) file appropriate for
-     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0-rc1.zip).
+     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0.zip).
   3. Extract this .zip file.
 
 
@@ -202,7 +202,7 @@ must be part of your `classpath`. For example, you can include the
 downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
 as follows:
 
-<pre><b>javac -cp libtensorflow-1.4.0-rc1.jar HelloTF.java</b></pre>
+<pre><b>javac -cp libtensorflow-1.4.0.jar HelloTF.java</b></pre>
 
 
 ### Running
@@ -216,11 +216,11 @@ two files are available to the JVM:
 For example, the following command line executes the `HelloTF` program on Linux
 and macOS X:
 
-<pre><b>java -cp libtensorflow-1.4.0-rc1.jar:. -Djava.library.path=./jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.4.0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
 
 And the following command line executes the `HelloTF` program on Windows:
 
-<pre><b>java -cp libtensorflow-1.4.0-rc1.jar;. -Djava.library.path=jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.4.0.jar;. -Djava.library.path=jni HelloTF</b></pre>
 
 If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
 installed TensorFlow for Java and are ready to use the API.  If the program
diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md
index 414ab7b1f7..f7380bac8a 100644
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv:
      Virtualenv environment:
 
      <pre>(tensorflow)$ <b>pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b></pre>
 
 If you encounter installation problems, see
 [Common Installation Problems](#common_installation_problems).
@@ -293,7 +293,7 @@ take the following steps:
 
      <pre>
      $ <b>sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl</b>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b>
      </pre>
 
      If this step fails, see
@@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
 
      <pre>
      (tensorflow)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b></pre>
 
 
 <a name="ValidateYourInstallation"></a>
@@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations.
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp27-none-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp27-none-linux_x86_64.whl
 </pre>
 
 Note that GPU support requires the NVIDIA hardware and software described in
@@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp34-cp34m-linux_x86_64.whl
 </pre>
 
 Note that GPU support requires the NVIDIA hardware and software described in
@@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp35-cp35m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp35-cp35m-linux_x86_64.whl
 </pre>
 
 
@@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp36-cp36m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp36-cp36m-linux_x86_64.whl
 </pre>
 
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index 9a95710bfa..79b383817b 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -114,7 +114,7 @@ Take the following steps to install TensorFlow with Virtualenv:
      TensorFlow in the active Virtualenv is as follows:
 
      <pre> $ <b>pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
 
 If you encounter installation problems, see
 [Common Installation Problems](#common-installation-problems).
@@ -235,7 +235,7 @@ take the following steps:
      issue the following command:
 
      <pre> $ <b>sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl</b> </pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b> </pre>
 
      If the preceding command fails, see
      [installation problems](#common-installation-problems).
@@ -344,7 +344,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      TensorFlow for Python 2.7:
 
      <pre> (tensorflow)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
 
 
 <a name="ValidateYourInstallation"></a>
@@ -517,7 +517,7 @@ This section documents the relevant values for Mac OS installations.
 
 
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
 </pre>
 
 
@@ -525,7 +525,7 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py2-none-a
 
 
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py3-none-any.whl
 </pre>
 
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index 6d0dcdcd4a..aa4ae6c876 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -355,10 +355,10 @@ Invoke `pip install` to install that pip package.
 The filename of the `.whl` file depends on your platform.
 For example, the following command will install the pip package
 
-for TensorFlow 1.4.0rc1 on Linux:
+for TensorFlow 1.4.0 on Linux:
 
 <pre>
-$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0rc1-py2-none-any.whl</b>
+$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0-py2-none-any.whl</b>
 </pre>
 
 ## Validate your installation
@@ -447,8 +447,10 @@ Stack Overflow and specify the `tensorflow` tag.
 **Linux**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.4.0rc1</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
-<tr><td>tensorflow_gpu-1.4.0rc1</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>6</td><td>8</td></tr>
+<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.5.4</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.4.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.5.4</td><td>6</td><td>8</td></tr>
+ <tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.3.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>6</td><td>8</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.2.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>5.1</td><td>8</td></tr>
 <tr><td>tensorflow-1.1.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.2</td><td>N/A</td><td>N/A</td></tr>
@@ -460,7 +462,8 @@ Stack Overflow and specify the `tensorflow` tag.
 **Mac**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.4.0rc1</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.5.4</td><td>N/A</td><td>N/A</td></tr>
+ <tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow-1.1.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.2</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.1.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.2</td><td>5.1</td><td>8</td></tr>
@@ -471,8 +474,10 @@ Stack Overflow and specify the `tensorflow` tag.
 **Windows**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.4.0rc1</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
-<tr><td>tensorflow_gpu-1.4.0rc1</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
+<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.4.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
+<tr><td>tensorflow-1.3.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.3.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.2.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>5.1</td><td>8</td></tr>
 <tr><td>tensorflow-1.1.0</td><td>CPU</td><td>3.5</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md
index c5a560e074..8fc65be35a 100644
--- a/tensorflow/docs_src/mobile/prepare_models.md
+++ b/tensorflow/docs_src/mobile/prepare_models.md
@@ -296,6 +296,6 @@ complains about missing header files, add the .h’s that are needed into
 the
 [`android_extended_ops`](https://www.tensorflow.org/code/tensorflow/core/kernels/BUILD#L3525) target.
 
-If you’re using a makefile targetting iOS, Raspberry Pi, etc, go to
+If you’re using a makefile targeting iOS, Raspberry Pi, etc, go to
 [`tensorflow/contrib/makefile/tf_op_files.txt`](https://www.tensorflow.org/code/tensorflow/contrib/makefile/tf_op_files.txt) and
 add the right implementation files there.
diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md
index 1f856bbf3f..25cb72008d 100644
--- a/tensorflow/docs_src/programmers_guide/debugger.md
+++ b/tensorflow/docs_src/programmers_guide/debugger.md
@@ -9,11 +9,19 @@ lets you view the internal structure and states of running TensorFlow graphs
 during training and inference, which is difficult to debug with general-purpose
 debuggers such as Python's `pdb` due to TensorFlow's computation-graph paradigm.
 
-> NOTE: The system requirements of tfdbg on supported external platforms include
-> the following. On Mac OS X, the `ncurses` library is required. It can be
-> installed with `brew install homebrew/dupes/ncurses`. On Windows, `pyreadline`
-> is required. If you use Anaconda3, you can install it with a command
+> NOTE: TensorFlow debugger uses a
+> [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based
+> text user interface. On Mac OS X, the `ncurses` library is required and can
+> be installed with `brew install homebrew/dupes/ncurses`. On Windows, curses
+> isn't as well supported, so a
+> [readline](https://en.wikipedia.org/wiki/GNU_Readline)-based interface can
+> be used with tfdbg by installing `pyreadline` with pip.
+> If you use Anaconda3, you can install it with a command
 > such as `"C:\Program Files\Anaconda3\Scripts\pip.exe" install pyreadline`.
+> Unofficial Windows curses packages can be downloaded
+> [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#curses), then subsequently
+> installed using `pip install <your_version>.whl`, however curses on Windows
+> may not work as reliably as curses on Linux or Mac.
 
 This tutorial demonstrates how to use the **tfdbg** command-line interface
 (CLI) to debug the appearance of [`nan`s](https://en.wikipedia.org/wiki/NaN)
@@ -149,6 +157,7 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at
 | | `pt <tensor>[slicing]` | Print a subarray of tensor, using [numpy](http://www.numpy.org/)-style array slicing. | `pt hidden/Relu:0[0:50,:]` |
 | | `-a` | Print the entirety of a large tensor, without using ellipses. (May take a long time for large tensors.) | `pt -a hidden/Relu:0[0:50,:]` |
 | | `-r <range>` | Highlight elements falling into specified numerical range. Multiple ranges can be used in conjunction. | `pt hidden/Relu:0 -a -r [[-inf,-1],[1,inf]]` |
+| | `-n <number>` | Print dump corresponding to specified 0-based dump number. Required for tensors with multiple dumps. | `pt -n 0 hidden/Relu:0` |
 | | `-s` | Include a summary of the numeric values of the tensor (applicable only to non-empty tensors with Boolean and numeric types such as `int*` and `float*`.) | `pt -s hidden/Relu:0[0:50,:]` |
 | **`@[coordinates]`** | | Navigate to specified element in `pt` output. | `@[10,0]` or `@10,0` |
 | **`/regex`** | |  [less](https://linux.die.net/man/1/less)-style search for given regular expression. | `/inf` |
@@ -166,10 +175,12 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at
 | | `-r` | List the inputs to node, recursively (the input tree.) | `li -r hidden/Relu:0` |
 | | `-d <max_depth>` | Limit recursion depth under the `-r` mode. | `li -r -d 3 hidden/Relu:0` |
 | | `-c` | Include control inputs. | `li -c -r hidden/Relu:0` |
+| | `-t` | Show op types of input nodes. | `li -t -r hidden/Relu:0` |
 | **`lo`** | | **List output recipients of node** | |
 | | `-r` | List the output recipients of node, recursively (the output tree.) | `lo -r hidden/Relu:0` |
 | | `-d <max_depth>` | Limit recursion depth under the `-r` mode. | `lo -r -d 3 hidden/Relu:0` |
 | | `-c` | Include recipients via control edges. | `lo -c -r hidden/Relu:0` |
+| | `-t` | Show op types of recipient nodes. | `lo -t -r hidden/Relu:0` |
 | **`ls`** | | **List Python source files involved in node creation.** | |
 | | `-p <path_pattern>` | Limit output to source files matching given regular-expression path pattern. | `ls -p .*debug_mnist.*` |
 | | `-n` | Limit output to node names matching given regular-expression pattern. | `ls -n Softmax.*` |
diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md
index d6f80430cd..88eb277e35 100644
--- a/tensorflow/docs_src/programmers_guide/tensors.md
+++ b/tensorflow/docs_src/programmers_guide/tensors.md
@@ -29,8 +29,8 @@ Some types of tensors are special, and these will be covered in other
 units of the Programmer's guide. The main ones are:
 
   * `tf.Variable`
-  * `tf.Constant`
-  * `tf.Placeholder`
+  * `tf.constant`
+  * `tf.placeholder`
   * `tf.SparseTensor`
 
 With the exception of `tf.Variable`, the value of a tensor is immutable, which
@@ -64,7 +64,7 @@ The following snippet demonstrates creating a few rank 0 variables:
 mammal = tf.Variable("Elephant", tf.string)
 ignition = tf.Variable(451, tf.int16)
 floating = tf.Variable(3.14159265359, tf.float64)
-its_complicated = tf.Variable((12.3, -4.85), tf.complex64)
+its_complicated = tf.Variable(12.3 - 4.85j, tf.complex64)
 ```
 
 Note: A string is treated as a single item in TensorFlow, not as a sequence of
@@ -79,7 +79,7 @@ initial value. For example:
 mystr = tf.Variable(["Hello"], tf.string)
 cool_numbers  = tf.Variable([3.14159, 2.71828], tf.float32)
 first_primes = tf.Variable([2, 3, 5, 7, 11], tf.int32)
-its_very_complicated = tf.Variable([(12.3, -4.85), (7.5, -6.23)], tf.complex64)
+its_very_complicated = tf.Variable([12.3 - 4.85j, 7.5 - 6.23j], tf.complex64)
 ```
 
 
@@ -275,8 +275,8 @@ Graphs and Sessions for more information).
 
 Sometimes it is not possible to evaluate a `tf.Tensor` with no context because
 its value might depend on dynamic information that is not available. For
-example, tensors that depend on `Placeholder`s can't be evaluated without
-providing a value for the `Placeholder`.
+example, tensors that depend on `placeholder`s can't be evaluated without
+providing a value for the `placeholder`.
 
 ``` python
 p = tf.placeholder(tf.float32)
diff --git a/tensorflow/examples/speech_commands/models.py b/tensorflow/examples/speech_commands/models.py
index 82d6a94ea1..ab611f414a 100644
--- a/tensorflow/examples/speech_commands/models.py
+++ b/tensorflow/examples/speech_commands/models.py
@@ -326,7 +326,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings,
   first_filter_height = input_time_size
   first_filter_count = 186
   first_filter_stride_x = 1
-  first_filter_stride_y = 4
+  first_filter_stride_y = 1
   first_weights = tf.Variable(
       tf.truncated_normal(
           [first_filter_height, first_filter_width, 1, first_filter_count],
diff --git a/tensorflow/go/android.go b/tensorflow/go/android.go
new file mode 100644
index 0000000000..3db3ddfec5
--- /dev/null
+++ b/tensorflow/go/android.go
@@ -0,0 +1,20 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build android
+
+package tensorflow
+
+// #cgo LDFLAGS: -landroid -llog -lm -lz -ldl
+import "C"
diff --git a/tensorflow/go/operation_test.go b/tensorflow/go/operation_test.go
index 7cba043af2..40c951ab8c 100644
--- a/tensorflow/go/operation_test.go
+++ b/tensorflow/go/operation_test.go
@@ -123,6 +123,14 @@ func TestOutputDataTypeAndShape(t *testing.T) {
 			[]int64{2, 3},
 			Double,
 		},
+		{ // Matrix of Uint64
+			[][]uint64{
+				{1, 2, 3},
+				{4, 5, 6},
+			},
+			[]int64{2, 3},
+			Uint64,
+		},
 	}
 	for idx, test := range testdata {
 		t.Run(fmt.Sprintf("#%d Value %T", idx, test.Value), func(t *testing.T) {
diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go
index 36a74c0081..1326a95278 100644
--- a/tensorflow/go/tensor.go
+++ b/tensorflow/go/tensor.go
@@ -101,7 +101,7 @@ func NewTensor(value interface{}) (*Tensor, error) {
 			return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len())
 		}
 	} else {
-		e := stringEncoder{offsets: buf, data: raw[nflattened*8 : len(raw)], status: newStatus()}
+		e := stringEncoder{offsets: buf, data: raw[nflattened*8:], status: newStatus()}
 		if err := e.encode(reflect.ValueOf(value), shape); err != nil {
 			return nil, err
 		}
@@ -207,6 +207,9 @@ func (t *Tensor) WriteContentsTo(w io.Writer) (int64, error) {
 func tensorData(c *C.TF_Tensor) []byte {
 	// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
 	cbytes := C.TF_TensorData(c)
+	if cbytes == nil {
+		return nil
+	}
 	length := int(C.TF_TensorByteSize(c))
 	slice := (*[1 << 30]byte)(unsafe.Pointer(cbytes))[:length:length]
 	return slice
@@ -310,7 +313,7 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error {
 		if err := w.WriteByte(b); err != nil {
 			return err
 		}
-	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
 		if err := binary.Write(w, nativeEndian, v.Interface()); err != nil {
 			return err
 		}
@@ -349,7 +352,7 @@ func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect.
 			return err
 		}
 		ptr.Elem().SetBool(b == 1)
-	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
 		if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil {
 			return err
 		}
diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go
index 35bd2fd9a5..674a8ce86f 100644
--- a/tensorflow/go/tensor_test.go
+++ b/tensorflow/go/tensor_test.go
@@ -34,11 +34,15 @@ func TestNewTensor(t *testing.T) {
 		{nil, int64(5)},
 		{nil, uint8(5)},
 		{nil, uint16(5)},
+		{nil, uint32(5)},
+		{nil, uint64(5)},
 		{nil, float32(5)},
 		{nil, float64(5)},
 		{nil, complex(float32(5), float32(6))},
 		{nil, complex(float64(5), float64(6))},
 		{nil, "a string"},
+		{[]int64{1}, []uint32{1}},
+		{[]int64{1}, []uint64{1}},
 		{[]int64{2}, []bool{true, false}},
 		{[]int64{1}, []float64{1}},
 		{[]int64{1}, [1]float64{1}},
@@ -71,11 +75,6 @@ func TestNewTensor(t *testing.T) {
 		// native ints not supported
 		int(5),
 		[]int{5},
-		// uint32 and uint64 are not supported in TensorFlow
-		uint32(5),
-		[]uint32{5},
-		uint64(5),
-		[]uint64{5},
 		// Mismatched dimensions
 		[][]float32{{1, 2, 3}, {4}},
 		// Mismatched dimensions. Should return "mismatched slice lengths" error instead of "BUG"
diff --git a/tensorflow/java/src/main/java/org/tensorflow/Shape.java b/tensorflow/java/src/main/java/org/tensorflow/Shape.java
index 9aa92be111..d533c3d480 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/Shape.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/Shape.java
@@ -77,6 +77,24 @@ public final class Shape {
     return shape[i];
   }
 
+  @Override
+  public int hashCode() {
+    return Arrays.hashCode(shape);
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+
+    if (obj instanceof Shape && Arrays.equals(this.shape, ((Shape) obj).shape)) {
+      return !hasUnknownDimension();
+    }
+
+    return super.equals(obj);
+  }
+
   /** Succinct description of the shape meant for debugging. */
   @Override
   public String toString() {
@@ -98,4 +116,18 @@ public final class Shape {
   }
 
   private long[] shape;
+
+  private boolean hasUnknownDimension() {
+    if (shape == null) {
+      return true;
+    }
+
+    for (long dimension : shape) {
+      if (dimension == -1) {
+        return true;
+      }
+    }
+
+    return false;
+  }
 }
diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
index 3b027700c5..313c09e1e4 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
@@ -16,6 +16,7 @@ limitations under the License.
 package org.tensorflow;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -77,4 +78,27 @@ public class ShapeTest {
       assertEquals(5, n.shape().size(1));
     }
   }
+
+  @Test
+  public void equalsWorksCorrectly() {
+    assertEquals(Shape.scalar(), Shape.scalar());
+    assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3));
+
+    assertNotEquals(Shape.make(1, 2), null);
+    assertNotEquals(Shape.make(1, 2), new Object());
+    assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4));
+
+    assertNotEquals(Shape.unknown(), Shape.unknown());
+    assertNotEquals(Shape.make(-1), Shape.make(-1));
+    assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3));
+  }
+
+  @Test
+  public void hashCodeIsAsExpected() {
+    assertEquals(Shape.make(1, 2, 3, 4).hashCode(), Shape.make(1, 2, 3, 4).hashCode());
+    assertEquals(Shape.scalar().hashCode(), Shape.scalar().hashCode());
+    assertEquals(Shape.unknown().hashCode(), Shape.unknown().hashCode());
+
+    assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode());
+  }
 }
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 5ae4aace16..54c43c1337 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -5,7 +5,10 @@ package(
     default_visibility = [
         "//engedu/ml/tf_from_scratch:__pkg__",
         "//tensorflow:internal",
+        "//tensorflow/contrib/lite/toco/python:__pkg__",
         "//tensorflow_models:__subpackages__",
+        # TODO(aselle): to pass open source test.
+        "//bazel_pip/tensorflow/contrib/lite/toco/python:__pkg__",
     ],
 )
 
@@ -45,6 +48,7 @@ py_library(
         "//tensorflow/compiler/aot/tests:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/contrib/learn:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/contrib/learn/python/learn/datasets:__pkg__",  # TODO(b/34059704): remove when fixed
+        "//tensorflow/contrib/lite/toco/python:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/python/debug:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/python/tools:__pkg__",  # TODO(b/34059704): remove when fixed
         "//tensorflow/tools/api/generator:__pkg__",
diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py
index 62fea05867..fa5d02c476 100644
--- a/tensorflow/python/estimator/canned/head.py
+++ b/tensorflow/python/estimator/canned/head.py
@@ -117,7 +117,7 @@ class _Head(object):
       update_op = tf.contrib.layers.optimize_loss(optimizer=sync,
                                                   loss=estimator_spec.loss, ...)
       hooks = [sync.make_session_run_hook(is_chief)]
-      ... upate train_op and hooks in EstimatorSpec and return
+      ... update train_op and hooks in EstimatorSpec and return
     ```
   """
   __metaclass__ = abc.ABCMeta
diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py
index c9f37f06e8..750af20e8a 100644
--- a/tensorflow/python/estimator/inputs/numpy_io.py
+++ b/tensorflow/python/estimator/inputs/numpy_io.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+from six import string_types
 from tensorflow.python.estimator.inputs.queues import feeding_functions
 
 # Key name to pack the target into dict of `features`. See
@@ -51,8 +52,9 @@ def numpy_input_fn(x,
                    num_threads=1):
   """Returns input function that would feed dict of numpy arrays into the model.
 
-  This returns a function outputting `features` and `target` based on the dict
-  of numpy arrays. The dict `features` has the same keys as the `x`.
+  This returns a function outputting `features` and `targets` based on the dict
+  of numpy arrays. The dict `features` has the same keys as the `x`. The dict
+  `targets` has the same keys as the `y` if `y` is a dict.
 
   Example:
 
@@ -69,7 +71,7 @@ def numpy_input_fn(x,
 
   Args:
     x: dict of numpy array object.
-    y: numpy array object. `None` if absent.
+    y: numpy array object or dict of numpy array object. `None` if absent.
     batch_size: Integer, size of batches to return.
     num_epochs: Integer, number of epochs to iterate over data. If `None` will
       run forever.
@@ -81,11 +83,13 @@ def numpy_input_fn(x,
       such as in prediction and evaluation mode, `num_threads` should be 1.
 
   Returns:
-    Function, that has signature of ()->(dict of `features`, `target`)
+    Function, that has signature of ()->(dict of `features`, `targets`)
 
   Raises:
     ValueError: if the shape of `y` mismatches the shape of values in `x` (i.e.,
       values in `x` have same shape).
+    ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict.
+    ValueError: if x or y is an empty dict.
     TypeError: `x` is not a dict or `shuffle` is not bool.
   """
 
@@ -97,43 +101,75 @@ def numpy_input_fn(x,
     """Numpy input function."""
     if not isinstance(x, dict):
       raise TypeError('x must be dict; got {}'.format(type(x).__name__))
+    if not x:
+      raise ValueError('x cannot be empty')
 
     # Make a shadow copy and also ensure the order of iteration is consistent.
-    ordered_dict_x = collections.OrderedDict(
+    ordered_dict_data = collections.OrderedDict(
         sorted(x.items(), key=lambda t: t[0]))
+    # Deep copy keys which is a view in python 3
+    feature_keys = list(ordered_dict_data.keys())
+
+    if y is None:
+      target_keys = None
+    elif isinstance(y, dict):
+      if not y:
+        raise ValueError('y cannot be empty dict, use None instead.')
+
+      ordered_dict_y = collections.OrderedDict(
+          sorted(y.items(), key=lambda t: t[0]))
+      target_keys = list(ordered_dict_y.keys())
+
+      duplicate_keys = set(feature_keys).intersection(set(target_keys))
+      if duplicate_keys:
+        raise ValueError('{} duplicate keys are found in both x and y: '
+                         '{}'.format(len(duplicate_keys), duplicate_keys))
+
+      ordered_dict_data.update(ordered_dict_y)
+    else:
+      target_keys = _get_unique_target_key(ordered_dict_data)
+      ordered_dict_data[target_keys] = y
+
+    if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
+      shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys}
+
+      if target_keys is None:
+        shape_of_y = None
+      elif isinstance(target_keys, string_types):
+        shape_of_y = y.shape
+      else:
+        shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys}
 
-    unique_target_key = _get_unique_target_key(ordered_dict_x)
-    if y is not None:
-      ordered_dict_x[unique_target_key] = y
-
-    if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1:
-      shape_dict_of_x = {k: ordered_dict_x[k].shape
-                         for k in ordered_dict_x.keys()}
-      shape_of_y = None if y is None else y.shape
       raise ValueError('Length of tensors in x and y is mismatched. All '
                        'elements in x and y must have the same length.\n'
                        'Shapes in x: {}\n'
-                       'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y))
+                       'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y))
 
     queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
-        ordered_dict_x,
+        ordered_dict_data,
         queue_capacity,
         shuffle=shuffle,
         num_threads=num_threads,
         enqueue_size=batch_size,
         num_epochs=num_epochs)
 
-    features = (queue.dequeue_many(batch_size) if num_epochs is None
-                else queue.dequeue_up_to(batch_size))
+    batch = (
+        queue.dequeue_many(batch_size)
+        if num_epochs is None else queue.dequeue_up_to(batch_size))
 
-    # Remove the first `Tensor` in `features`, which is the row number.
-    if len(features) > 0:
-      features.pop(0)
+    # Remove the first `Tensor` in `batch`, which is the row number.
+    if batch:
+      batch.pop(0)
 
-    features = dict(zip(ordered_dict_x.keys(), features))
-    if y is not None:
-      target = features.pop(unique_target_key)
+    features = dict(zip(feature_keys, batch[:len(feature_keys)]))
+    if target_keys is None:
+      # TODO(martinwicke), return consistent result
+      return features
+    elif isinstance(target_keys, string_types):
+      target = batch[-1]
+      return features, target
+    else:
+      target = dict(zip(target_keys, batch[-len(target_keys):]))
       return features, target
-    return features
 
   return input_fn
diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py
index 02df22b632..1374e3f7e1 100644
--- a/tensorflow/python/estimator/inputs/numpy_io_test.py
+++ b/tensorflow/python/estimator/inputs/numpy_io_test.py
@@ -239,6 +239,40 @@ class NumpyIoTest(test.TestCase):
             x, y, batch_size=2, shuffle=False, num_epochs=1)
         failing_input_fn()
 
+  def testNumpyInputFnWithXIsEmptyDict(self):
+    x = {}
+    y = np.arange(4)
+    with self.test_session():
+      with self.assertRaisesRegexp(ValueError, 'x cannot be empty'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
+  def testNumpyInputFnWithYIsNone(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = None
+
+    with self.test_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+      features_tensor = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      feature = session.run(features_tensor)
+      self.assertEqual(len(feature), 2)
+      self.assertAllEqual(feature['a'], [0, 1])
+      self.assertAllEqual(feature['b'], [32, 33])
+
+      session.run([features_tensor])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features_tensor])
+
+      coord.request_stop()
+      coord.join(threads)
+
   def testNumpyInputFnWithNonBoolShuffle(self):
     x = np.arange(32, 36)
     y = np.arange(4)
@@ -285,6 +319,56 @@ class NumpyIoTest(test.TestCase):
             num_epochs=1)
         failing_input_fn()
 
+  def testNumpyInputFnWithYAsDict(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)}
+
+    with self.test_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+      features_tensor, targets_tensor = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      features, targets = session.run([features_tensor, targets_tensor])
+      self.assertEqual(len(features), 2)
+      self.assertAllEqual(features['a'], [0, 1])
+      self.assertAllEqual(features['b'], [32, 33])
+      self.assertEqual(len(targets), 2)
+      self.assertAllEqual(targets['y1'], [-32, -31])
+      self.assertAllEqual(targets['y2'], [32, 31])
+
+      session.run([features_tensor, targets_tensor])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features_tensor, targets_tensor])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithYIsEmptyDict(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = {}
+    with self.test_session():
+      with self.assertRaisesRegexp(ValueError, 'y cannot be empty'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
+  def testNumpyInputFnWithDuplicateKeysInXAndY(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b}
+    with self.test_session():
+      with self.assertRaisesRegexp(
+          ValueError, '2 duplicate keys are found in both x and y'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 2785aed13e..dc4ffb1747 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -860,6 +860,10 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None):
   inputs, which allows those ops to accept numpy arrays, Python lists,
   and scalars in addition to `Tensor` objects.
 
+  Note: This function diverges from default Numpy behavior for `float` and
+    `string` types when `None` is present in a Python list or scalar. Rather
+    than silently converting `None` values, an error will be thrown.
+
   Args:
     value: An object whose type has a registered `Tensor` conversion function.
     dtype: Optional element type for the returned tensor. If missing, the
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index 7e74c19124..e283542172 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -286,6 +286,7 @@ _TF_TO_IS_OK = {
     dtypes.bool: [_FilterBool],
     dtypes.complex128: [_FilterComplex],
     dtypes.complex64: [_FilterComplex],
+    dtypes.float16: [_FilterFloat],
     dtypes.float32: [_FilterFloat],
     dtypes.float64: [_FilterFloat],
     dtypes.int16: [_FilterInt],
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index cfa5fe5e3e..4c026590c2 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -986,10 +986,10 @@ class TensorFlowTestCase(googletest.TestCase):
       err: A float value.
       msg: An optional string message to append to the failure message.
     """
-    self.assertTrue(
-        math.fabs(f1 - f2) <= err,
-        "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
-                               if msg is not None else ""))
+    # f1 == f2 is needed here as we might have: f1, f2 = inf, inf
+    self.assertTrue(f1 == f2 or math.fabs(f1 - f2) <= err,
+                    "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
+                                           if msg is not None else ""))
 
   def assertArrayNear(self, farray1, farray2, err):
     """Asserts that two float arrays are near each other.
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 6eb9c66d06..1bf2b70c1b 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -107,22 +107,41 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
   def setUp(self):
     self.rng = np.random.RandomState(42)
 
-  def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None):
+  def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None, axis=None):
     """Check equivalence between boolean_mask and numpy masking."""
     if make_mask is None:
       make_mask = lambda shape: self.rng.randint(0, 2, size=shape).astype(bool)
     arr = np.random.rand(*arr_shape)
     mask = make_mask(arr_shape[:ndims_mask])
-    masked_arr = arr[mask]
+    if axis is not None:
+      mask = make_mask(arr_shape[axis:ndims_mask + axis])
+    if axis is None or axis == 0:
+      masked_arr = arr[mask]
+    elif axis == 1:
+      masked_arr = arr[:, mask]
+    elif axis == 2:
+      masked_arr = arr[:, :, mask]
     with self.test_session():
-      masked_tensor = array_ops.boolean_mask(arr, mask)
+      masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
 
       # Leading dimension size of masked_tensor is always unknown until runtime
       # since we don't how many elements will be kept.
-      self.assertAllEqual(masked_tensor.get_shape()[1:], masked_arr.shape[1:])
+      leading = 1 if axis is None else axis + 1
+      self.assertAllEqual(masked_tensor.get_shape()[leading:],
+                          masked_arr.shape[leading:])
 
       self.assertAllClose(masked_arr, masked_tensor.eval())
 
+  def testMaskDim1ArrDim2Axis1(self):
+    ndims_mask = 1
+    for arr_shape in [(1, 1), (2, 2), (2, 5)]:
+      self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
+
+  def testMaskDim2ArrDim2Axis1(self):
+    ndims_mask = 2
+    for arr_shape in [(1, 1), (2, 2), (2, 5)]:
+      self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
+
   def testMaskDim1ArrDim1(self):
     ndims_mask = 1
     for arr_shape in [(1,), (2,), (3,), (10,)]:
@@ -486,7 +505,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
         _ = checker2[...]
         _ = checker2[tuple()]
 
-  def testFloatSlicedArrayAndInt64IndicesGPU(self):
+  def testInt64GPU(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
     with self.test_session(use_gpu=True, force_gpu=True):
@@ -497,17 +516,6 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       s = array_ops.strided_slice(x, begin, end, strides)
       self.assertAllEqual([3.], self.evaluate(s))
 
-  def testInt64SlicedArrayAndIndicesGPU(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-    with self.test_session(use_gpu=True, force_gpu=True):
-      x = constant_op.constant([1, 2, 3], dtype=dtypes.int64)
-      begin = constant_op.constant([2], dtype=dtypes.int64)
-      end = constant_op.constant([3], dtype=dtypes.int64)
-      strides = constant_op.constant([1], dtype=dtypes.int64)
-      s = array_ops.strided_slice(x, begin, end, strides)
-      self.assertAllEqual([3], self.evaluate(s))
-
   def testDegenerateSlices(self):
     with self.test_session(use_gpu=True):
       checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR)
@@ -1071,5 +1079,16 @@ class PadTest(test_util.TensorFlowTestCase):
                            [0, 0, 0, 0, 0, 0, 0]])
 
 
+class InvertPermutationTest(test_util.TensorFlowTestCase):
+
+  def testInvertPermutation(self):
+    for dtype in [dtypes.int32, dtypes.int64]:
+      with self.test_session(use_gpu=True):
+        x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype)
+        y = array_ops.invert_permutation(x)
+        self.assertAllEqual(y.get_shape(), [5])
+        self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1])
+
+
 if __name__ == "__main__":
   test_lib.main()
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 7a610debd1..2767df127e 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -29,7 +29,7 @@ from tensorflow.python.platform import googletest
 class BincountTest(test_util.TensorFlowTestCase):
 
   def test_empty(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       self.assertAllEqual(
           math_ops.bincount([], minlength=5).eval(), [0, 0, 0, 0, 0])
       self.assertAllEqual(math_ops.bincount([], minlength=1).eval(), [0])
@@ -42,7 +42,7 @@ class BincountTest(test_util.TensorFlowTestCase):
           np.float64)
 
   def test_values(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       self.assertAllEqual(
           math_ops.bincount([1, 1, 1, 2, 2, 3]).eval(), [0, 3, 2, 1])
       arr = [1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]
@@ -57,14 +57,14 @@ class BincountTest(test_util.TensorFlowTestCase):
           math_ops.bincount(np.arange(10000)).eval(), np.ones(10000))
 
   def test_maxlength(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       self.assertAllEqual(math_ops.bincount([5], maxlength=3).eval(), [0, 0, 0])
       self.assertAllEqual(math_ops.bincount([1], maxlength=3).eval(), [0, 1])
       self.assertAllEqual(math_ops.bincount([], maxlength=3).eval(), [])
 
   def test_random_with_weights(self):
     num_samples = 10000
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       np.random.seed(42)
       for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
         arr = np.random.randint(0, 1000, num_samples)
@@ -72,17 +72,27 @@ class BincountTest(test_util.TensorFlowTestCase):
           weights = np.random.randint(-100, 100, num_samples)
         else:
           weights = np.random.random(num_samples)
-        self.assertAllEqual(
-            math_ops.bincount(arr, weights).eval(),
-            np.bincount(arr, weights))
+        self.assertAllClose(
+            math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights))
+
+  def test_random_without_weights(self):
+    num_samples = 10000
+    with self.test_session(use_gpu=True):
+      np.random.seed(42)
+      for dtype in [np.int32, np.float32]:
+        arr = np.random.randint(0, 1000, num_samples)
+        weights = np.ones(num_samples).astype(dtype)
+        self.assertAllClose(
+            math_ops.bincount(arr, None).eval(), np.bincount(arr, weights))
 
   def test_zero_weights(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       self.assertAllEqual(
           math_ops.bincount(np.arange(1000), np.zeros(1000)).eval(),
           np.zeros(1000))
 
   def test_negative(self):
+    # unsorted_segment_sum will only report InvalidArgumentError on CPU
     with self.test_session():
       with self.assertRaises(errors.InvalidArgumentError):
         math_ops.bincount([1, 2, 3, -1, 6, 8]).eval()
diff --git a/tensorflow/python/kernel_tests/bucketize_op_test.py b/tensorflow/python/kernel_tests/bucketize_op_test.py
index 6db3592055..e612b1c134 100644
--- a/tensorflow/python/kernel_tests/bucketize_op_test.py
+++ b/tensorflow/python/kernel_tests/bucketize_op_test.py
@@ -31,7 +31,7 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([-5, 0, 2, 3, 5, 8, 10, 11, 12]),
         boundaries=[0, 3, 8, 11])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def testFloat(self):
@@ -39,7 +39,7 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([-5., 0., 2., 3., 5., 8., 10., 11., 12.]),
         boundaries=[0., 3., 8., 11.])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def test2DInput(self):
@@ -47,13 +47,13 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([[-5, 0, 2, 3, 5], [8, 10, 11, 12, 0]]),
         boundaries=[0, 3, 8, 11])
     expected_out = [[0, 1, 1, 2, 2], [3, 3, 4, 4, 1]]
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def testInvalidBoundariesOrder(self):
     op = math_ops._bucketize(
         constant_op.constant([-5, 0]), boundaries=[0, 8, 3, 11])
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       with self.assertRaisesRegexp(
           errors_impl.InvalidArgumentError, "Expected sorted boundaries"):
         sess.run(op)
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 6167cb9999..68817cc256 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -439,10 +439,10 @@ class ZerosLikeTest(test.TestCase):
 
   def testZerosLikeCPU(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32,
-        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8,
-        dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64,
-        dtypes_lib.string
+        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8,
+        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32,
+        dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64,
+        dtypes_lib.complex128, dtypes_lib.string
     ]:
       self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False)
       self._compareZeros(dtype, fully_defined_shape=True, use_gpu=False)
@@ -573,9 +573,10 @@ class OnesLikeTest(test.TestCase):
 
   def testOnesLike(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32,
-        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8,
-        dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.int64
+        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8,
+        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32,
+        dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64,
+        dtypes_lib.complex128
     ]:
       numpy_dtype = dtype.as_numpy_dtype
       with self.test_session():
diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index b67a4e3f89..d92797a7d3 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -17,6 +17,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
@@ -49,6 +52,46 @@ class Conv1DTest(test.TestCase):
           self.assertEqual(len(output), 2)
           self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4])
 
+  def testConv1DTranspose(self):
+    with self.test_session():
+      stride = 2
+
+      # Input, output: [batch, width, depth]
+      x_shape = [2, 4, 3]
+      y_shape = [2, 9, 2]
+
+      # Filter: [kernel_width, output_depth, input_depth]
+      f_shape = [3, 2, 3]
+
+      x = constant_op.constant(
+          1.0, shape=x_shape, name="x", dtype=dtypes.float32)
+      f = constant_op.constant(
+          1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
+      output = nn_ops.conv1d_transpose(
+          x, f, y_shape, stride=stride, padding="VALID")
+      value = output.eval()
+
+      cache_values = np.zeros(y_shape, dtype=np.float32)
+
+      # The amount of padding added
+      pad = 1
+
+      for n in xrange(x_shape[0]):
+        for k in xrange(f_shape[1]):
+          for w in xrange(pad, y_shape[1] - pad):
+            target = 3.0
+            # We add a case for locations divisible by the stride.
+            w_in = w % stride == 0 and w > pad and w < y_shape[1] - 1 - pad
+            if w_in:
+              target += 3.0
+            cache_values[n, w, k] = target
+
+          # copy values in the border
+          cache_values[n, 0, k] = cache_values[n, 1, k]
+          cache_values[n, -1, k] = cache_values[n, -2, k]
+
+    self.assertAllClose(cache_values, value)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
index 14622ab467..ec8ac74163 100644
--- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
@@ -21,6 +21,8 @@ from __future__ import print_function
 import collections
 import math
 
+import numpy as np
+
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
@@ -45,8 +47,19 @@ def GetTestConfigs():
 
 class Conv3DTest(test.TestCase):
 
+  def _DtypesToTest(self, use_gpu):
+    if use_gpu:
+      if not test_util.CudaSupportsHalfMatMulAndConv():
+        return [dtypes.float32]
+      else:
+        # It is important that float32 comes before float16 here,
+        # as we will be using its gradients as reference for fp16 gradients.
+        return [dtypes.float32, dtypes.float16]
+    else:
+      return [dtypes.float64, dtypes.float32, dtypes.float16]
+
   def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride,
-                            padding, data_format, use_gpu):
+                            padding, data_format, dtype, use_gpu):
     total_size_1 = 1
     total_size_2 = 1
     for s in tensor_in_sizes:
@@ -54,13 +67,14 @@ class Conv3DTest(test.TestCase):
     for s in filter_in_sizes:
       total_size_2 *= s
 
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
+    # Initializes the input tensor with array containing numbers from 0 to 1.
+    # We keep the input tensor values fairly small to avoid overflowing float16
+    # during the conv3d.
+    x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)]
+    x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)]
     with self.test_session(use_gpu=use_gpu):
-      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
-      t2 = constant_op.constant(x2, shape=filter_in_sizes)
+      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
+      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
 
       if isinstance(stride, collections.Iterable):
         strides = [1] + list(stride) + [1]
@@ -81,27 +95,33 @@ class Conv3DTest(test.TestCase):
                     expected):
     results = []
     for data_format, use_gpu in GetTestConfigs():
-      result = self._SetupValuesForDevice(
-          tensor_in_sizes,
-          filter_in_sizes,
-          stride,
-          padding,
-          data_format,
-          use_gpu=use_gpu)
-      results.append(result)
-      tolerance = 1e-2 if use_gpu else 1e-5
+      for dtype in self._DtypesToTest(use_gpu):
+        result = self._SetupValuesForDevice(
+            tensor_in_sizes,
+            filter_in_sizes,
+            stride,
+            padding,
+            data_format,
+            dtype,
+            use_gpu=use_gpu)
+        results.append(result)
+
       with self.test_session() as sess:
         values = sess.run(results)
         for value in values:
           print("expected = ", expected)
           print("actual = ", value)
-          self.assertAllClose(expected, value.flatten(), atol=tolerance,
-                              rtol=1e-6)
+          tol = 1e-6
+          if value.dtype == np.float16:
+            tol = 1e-3
+
+          self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol)
 
   def testConv3D1x1x1Filter(self):
     expected_output = [
-        30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0,
-        204.0, 174.0, 216.0, 258.0, 210.0, 261.0, 312.0
+        0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5, 0.59259259,
+        0.62962963, 0.77777778, 0.92592593, 0.85185185, 1.05555556, 1.25925926,
+        1.07407407, 1.33333333, 1.59259259, 1.2962963, 1.61111111, 1.92592593
     ]
 
     # These are equivalent to the Conv2D1x1 case.
@@ -127,8 +147,10 @@ class Conv3DTest(test.TestCase):
   # Expected values computed using scipy's correlate function.
   def testConv3D2x2x2Filter(self):
     expected_output = [
-        19554., 19962., 20370., 22110., 22590., 23070., 34890., 35730., 36570.,
-        37446., 38358., 39270., 50226., 51498., 52770., 52782., 54126., 55470.
+        3.77199074, 3.85069444, 3.92939815, 4.2650463, 4.35763889, 4.45023148,
+        6.73032407, 6.89236111, 7.05439815, 7.22337963, 7.39930556, 7.57523148,
+        9.68865741, 9.93402778, 10.17939815, 10.18171296, 10.44097222,
+        10.70023148
     ]
     # expected_shape = [1, 3, 1, 2, 5]
     self._VerifyValues(
@@ -140,69 +162,17 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStrides(self):
     expected_output = [
-        102.,
-        151.,
-        172.,
-        193.,
-        214.,
-        235.,
-        142.,
-        438.,
-        592.,
-        613.,
-        634.,
-        655.,
-        676.,
-        394.,
-        774.,
-        1033.,
-        1054.,
-        1075.,
-        1096.,
-        1117.,
-        646.,
-        1894.,
-        2503.,
-        2524.,
-        2545.,
-        2566.,
-        2587.,
-        1486.,
-        2230.,
-        2944.,
-        2965.,
-        2986.,
-        3007.,
-        3028.,
-        1738.,
-        2566.,
-        3385.,
-        3406.,
-        3427.,
-        3448.,
-        3469.,
-        1990.,
-        3686.,
-        4855.,
-        4876.,
-        4897.,
-        4918.,
-        4939.,
-        2830.,
-        4022.,
-        5296.,
-        5317.,
-        5338.,
-        5359.,
-        5380.,
-        3082.,
-        4358.,
-        5737.,
-        5758.,
-        5779.,
-        5800.,
-        5821.,
-        3334.,
+        0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, 0.13988095,
+        0.08452381, 0.26071429, 0.35238095, 0.36488095, 0.37738095, 0.38988095,
+        0.40238095, 0.23452381, 0.46071429, 0.61488095, 0.62738095, 0.63988095,
+        0.65238095, 0.66488095, 0.38452381, 1.12738095, 1.48988095, 1.50238095,
+        1.51488095, 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095,
+        1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, 1.52738095,
+        2.01488095, 2.02738095, 2.03988095, 2.05238095, 2.06488095, 1.18452381,
+        2.19404762, 2.88988095, 2.90238095, 2.91488095, 2.92738095, 2.93988095,
+        1.68452381, 2.39404762, 3.15238095, 3.16488095, 3.17738095, 3.18988095,
+        3.20238095, 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095,
+        3.45238095, 3.46488095, 1.98452381
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 5, 8, 7, 1],
@@ -212,7 +182,9 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
   def testConv3D2x2x2FilterStride2(self):
-    expected_output = [19554., 19962., 20370., 50226., 51498., 52770.]
+    expected_output = [
+        3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, 10.17939815
+    ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
         filter_in_sizes=[2, 2, 2, 3, 3],
@@ -222,11 +194,12 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStride3(self):
     expected_output = [
-        36564., 38022., 39480., 37824., 39354., 40884., 39084., 40686., 42288.,
-        46644., 48678., 50712., 47904., 50010., 52116., 49164., 51342., 53520.,
-        107124., 112614., 118104., 108384., 113946., 119508., 109644., 115278.,
-        120912., 117204., 123270., 129336., 118464., 124602., 130740., 119724.,
-        125934., 132144.
+        1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, 1.68998016,
+        1.6155754, 1.68179563, 1.74801587, 1.9280754, 2.01215278, 2.09623016,
+        1.98015873, 2.0672123, 2.15426587, 2.03224206, 2.12227183, 2.21230159,
+        4.4280754, 4.65500992, 4.88194444, 4.48015873, 4.71006944, 4.93998016,
+        4.53224206, 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016,
+        4.8968254, 5.15054563, 5.40426587, 4.94890873, 5.20560516, 5.46230159
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 6, 7, 8, 2],
@@ -237,8 +210,8 @@ class Conv3DTest(test.TestCase):
 
   def testConv3D2x2x2FilterStride2Same(self):
     expected_output = [
-        19554., 19962., 20370., 10452., 10710., 10968., 50226., 51498., 52770.,
-        23844., 24534., 25224.
+        3.77199074, 3.85069444, 3.92939815, 2.0162037, 2.06597222, 2.11574074,
+        9.68865741, 9.93402778, 10.17939815, 4.59953704, 4.73263889, 4.86574074
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
@@ -248,7 +221,10 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
   def testKernelSmallerThanStride(self):
-    expected_output = [1., 3., 7., 9., 19., 21., 25., 27.]
+    expected_output = [
+        0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037, 0.77777778,
+        0.92592593, 1.
+    ]
     self._VerifyValues(
         tensor_in_sizes=[1, 3, 3, 3, 1],
         filter_in_sizes=[1, 1, 1, 1, 1],
@@ -263,9 +239,11 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
     expected_output = [
-        1484., 1592., 770., 2240., 2348., 1106., 1149., 1191., 539., 6776.,
-        6884., 3122., 7532., 7640., 3458., 3207., 3249., 1421., 3005., 3035.,
-        1225., 3215., 3245., 1309., 1013., 1022., 343.
+        0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, 0.40306122,
+        0.41873178, 0.4340379, 0.19642857, 2.46938776, 2.50874636, 1.1377551,
+        2.74489796, 2.78425656, 1.26020408, 1.16873178, 1.1840379, 0.51785714,
+        1.09511662, 1.10604956, 0.44642857, 1.17164723, 1.18258017, 0.47704082,
+        0.3691691, 0.37244898, 0.125
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
@@ -274,7 +252,10 @@ class Conv3DTest(test.TestCase):
         padding="SAME",
         expected=expected_output)
 
-    expected_output = [1484., 1592., 2240., 2348., 6776., 6884., 7532., 7640.]
+    expected_output = [
+        0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, 2.744898,
+        2.784257
+    ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
         filter_in_sizes=[2, 2, 2, 1, 1],
@@ -288,7 +269,7 @@ class Conv3DTest(test.TestCase):
         filter_in_sizes=[2, 1, 2, 1, 2],
         stride=1,
         padding="VALID",
-        expected=[50, 60])
+        expected=[1.5625, 1.875])
 
   def _ConstructAndTestGradientForConfig(
       self, batch, input_shape, filter_shape, in_depth, out_depth, stride,
@@ -328,50 +309,58 @@ class Conv3DTest(test.TestCase):
     input_data = [x * 1.0 / input_size for x in range(0, input_size)]
     filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
 
-    if test.is_gpu_available() and use_gpu:
-      data_type = dtypes.float32
+    for data_type in self._DtypesToTest(use_gpu=use_gpu):
       # TODO(mjanusz): Modify gradient_checker to also provide max relative
       # error and synchronize the tolerance levels between the tests for forward
       # and backward computations.
-      if test.is_gpu_available():
+      if data_type == dtypes.float64:
+        tolerance = 1e-8
+      elif data_type == dtypes.float32:
         tolerance = 5e-3
-      else:
-        # As of Aug 2016, higher tolerance is needed for some CPU architectures.
-        # Runs on a single machine can also generate slightly different errors
-        # because of multithreading.
-        tolerance = 8e-3
-    else:
-      data_type = dtypes.float64
-      tolerance = 1e-8
-    with self.test_session(use_gpu=use_gpu):
-      orig_input_tensor = constant_op.constant(
-          input_data, shape=input_shape, dtype=data_type, name="input")
-      filter_tensor = constant_op.constant(
-          filter_data, shape=filter_shape, dtype=data_type, name="filter")
-
-      if data_format == "NCDHW":
-        input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
-        strides = test_util.NHWCToNCHW(strides)
-      else:
-        input_tensor = orig_input_tensor
-
-      conv = nn_ops.conv3d(
-          input_tensor, filter_tensor, strides, padding,
-          data_format=data_format, name="conv")
-
-      if data_format == "NCDHW":
-        conv = test_util.NCHWToNHWC(conv)
-
-      if test_input:
-        err = gradient_checker.compute_gradient_error(orig_input_tensor,
-                                                      input_shape,
-                                                      conv, output_shape)
-      else:
-        err = gradient_checker.compute_gradient_error(filter_tensor,
-                                                      filter_shape, conv,
-                                                      output_shape)
-    print("conv3d gradient error = ", err)
-    self.assertLess(err, tolerance)
+      elif data_type == dtypes.float16:
+        tolerance = 1e-3
+
+      with self.test_session(use_gpu=use_gpu):
+        orig_input_tensor = constant_op.constant(
+            input_data, shape=input_shape, dtype=data_type, name="input")
+        filter_tensor = constant_op.constant(
+            filter_data, shape=filter_shape, dtype=data_type, name="filter")
+
+        if data_format == "NCDHW":
+          input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
+          new_strides = test_util.NHWCToNCHW(strides)
+        else:
+          input_tensor = orig_input_tensor
+          new_strides = strides
+
+        conv = nn_ops.conv3d(
+            input_tensor,
+            filter_tensor,
+            new_strides,
+            padding,
+            data_format=data_format,
+            name="conv")
+
+        if data_format == "NCDHW":
+          conv = test_util.NCHWToNHWC(conv)
+
+        if test_input:
+          jacob_t, jacob_n = gradient_checker.compute_gradient(
+              orig_input_tensor, input_shape, conv, output_shape)
+        else:
+          jacob_t, jacob_n = gradient_checker.compute_gradient(
+              filter_tensor, filter_shape, conv, output_shape)
+
+        if data_type != dtypes.float16:
+          reference_jacob_t = jacob_t
+          err = np.fabs(jacob_t - jacob_n).max()
+        else:
+          # Compare fp16 theoretical gradients to fp32 theoretical gradients,
+          # since fp16 numerical gradients are too imprecise.
+          err = np.fabs(jacob_t - reference_jacob_t).max()
+
+      print("conv3d gradient error = ", err)
+      self.assertLess(err, tolerance)
 
   def ConstructAndTestGradient(self, **kwargs):
     for data_format, use_gpu in GetTestConfigs():
diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 3298092fbe..f7ae1a0f37 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -122,7 +122,9 @@ class DepthwiseConv2DTest(test.TestCase):
     x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
     with self.test_session(use_gpu=use_gpu) as sess:
-      if data_type == dtypes.float32:
+      if data_type == dtypes.float16:
+        tolerance = 1e-5
+      elif data_type == dtypes.float32:
         tolerance = 1e-5
       else:
         self.assertEqual(data_type, dtypes.float64)
@@ -169,7 +171,7 @@ class DepthwiseConv2DTest(test.TestCase):
                 padding) in enumerate(ConfigsToTest()):
       print("Testing DepthwiseConv2D,", index, "th config:", input_size, "*",
             filter_size, "stride:", stride, "padding:", padding)
-      for data_type in [dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
         self._VerifyValues(
             input_size, filter_size, stride, padding, data_type, use_gpu=True)
 
@@ -181,7 +183,7 @@ class DepthwiseConv2DTest(test.TestCase):
                 padding) in enumerate(ConfigsToTest()):
       print("Testing DepthwiseConv2DFormat,", index, "th config:", input_size,
             "*", filter_size, "stride:", stride, "padding:", padding)
-      for data_type in [dtypes.float32, dtypes.float64]:
+      for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
         self._VerifyValues(
             input_size,
             filter_size,
@@ -318,7 +320,9 @@ class DepthwiseConv2DTest(test.TestCase):
     input_data = [x * 1.0 / input_size for x in range(0, input_size)]
     filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
     with self.test_session(use_gpu=use_gpu):
-      if data_type == dtypes.float32:
+      if data_type == dtypes.float16:
+        tolerance = 0.002
+      elif data_type == dtypes.float32:
         tolerance = 0.002
       else:
         self.assertEqual(data_type, dtypes.float64)
@@ -369,6 +373,8 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DInputGrad,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
+      # Note: float16 test for DepthwiseConv2DInputGrad is not enabled,
+      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
@@ -389,6 +395,8 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DInputGradFormat,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
+      # Note: float16 test for DepthwiseConv2DInputGradFormat is not enabled,
+      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
@@ -407,6 +415,8 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DFilterGrad,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
+      # Note: float16 test for DepthwiseConv2DFilterGrad is not enabled,
+      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
@@ -427,6 +437,8 @@ class DepthwiseConv2DTest(test.TestCase):
       print("Testing DepthwiseConv2DFilterGradFormat,", index, "th config:",
             input_size, "*", filter_size, "stride:", stride, "padding:",
             padding)
+      # Note: float16 test for DepthwiseConv2DFilterGradFormat is not enabled,
+      # calculations are not very precise.
       for data_type in [dtypes.float32, dtypes.float64]:
         self._ConstructAndTestGradient(
             input_size,
diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD
index e21446c2ef..e220d05692 100644
--- a/tensorflow/python/kernel_tests/distributions/BUILD
+++ b/tensorflow/python/kernel_tests/distributions/BUILD
@@ -193,6 +193,7 @@ cuda_py_test(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform_test",
     ],
+    tags = ["manual"],  # b/69001419
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py
index ebc89f15c5..e24e8ade73 100644
--- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py
+++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py
@@ -250,13 +250,11 @@ class MultinomialTest(test.TestCase):
     theta = np.array([[1., 2, 3],
                       [2.5, 4, 0.01]], dtype=np.float32)
     theta /= np.sum(theta, 1)[..., array_ops.newaxis]
-    # Ideally we'd be able to test broadcasting but, the multinomial sampler
-    # doesn't support different total counts.
-    n = np.float32(5)
+    n = np.array([[10., 9.], [8., 7.], [6., 5.]], dtype=np.float32)
     with self.test_session() as sess:
-      # batch_shape=[2], event_shape=[3]
+      # batch_shape=[3, 2], event_shape=[3]
       dist = multinomial.Multinomial(n, theta)
-      x = dist.sample(int(250e3), seed=1)
+      x = dist.sample(int(1000e3), seed=1)
       sample_mean = math_ops.reduce_mean(x, 0)
       x_centered = x - sample_mean[array_ops.newaxis, ...]
       sample_cov = math_ops.reduce_mean(math_ops.matmul(
@@ -291,9 +289,9 @@ class MultinomialTest(test.TestCase):
   def testSampleUnbiasedNonScalarBatch(self):
     with self.test_session() as sess:
       dist = multinomial.Multinomial(
-          total_count=5.,
+          total_count=[7., 6., 5.],
           logits=math_ops.log(2. * self._rng.rand(4, 3, 2).astype(np.float32)))
-      n = int(3e3)
+      n = int(3e4)
       x = dist.sample(n, seed=0)
       sample_mean = math_ops.reduce_mean(x, 0)
       # Cyclically rotate event dims left.
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index a126180414..6be8997cab 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 import numpy as np
 
 from tensorflow.python.framework import constant_op
@@ -1341,11 +1342,14 @@ class PoolingTest(test.TestCase):
       return
 
     # Test the GPU implementation that uses cudnn for now.
-    # It does not propagate the diff in cases of NaNs
+    saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP")
+    # Do not propagate the diff in cases of NaNs
+    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0"
     expected_input_backprop_cudnn = [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0
     ]
+
     for v2 in [True, False]:
       self._testMaxPoolGradDirect(
           input_data,
@@ -1361,6 +1365,30 @@ class PoolingTest(test.TestCase):
           use_gpu=True,
           v2=v2)
 
+    # Propagate the diff in cases of NaNs
+    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1"
+    expected_input_backprop_cudnn = expected_input_backprop_tf_cpu
+
+    for v2 in [True, False]:
+      self._testMaxPoolGradDirect(
+          input_data,
+          output_backprop,
+          expected_input_backprop_cudnn,
+          input_sizes=[1, 4, 4, 1],
+          output_sizes=[1, 3, 3, 1],
+          window_rows=2,
+          window_cols=2,
+          row_stride=1,
+          col_stride=1,
+          padding="VALID",
+          use_gpu=True,
+          v2=v2)
+
+    if saved_nanprop:
+      os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop
+    else:
+      del os.environ["TF_ENABLE_MAXPOOL_NANPROP"]
+
   def _testMaxPoolGradDirectWithNans2_2(self):
     input_data = [float("nan")] * 16
     output_backprop = [
@@ -1391,11 +1419,14 @@ class PoolingTest(test.TestCase):
       return
 
     # Test the GPU implementation that uses cudnn for now.
-    # It does not propagate the diff in cases of NaNs
+    saved_nanprop = os.environ.get("TF_ENABLE_MAXPOOL_NANPROP")
+    # Do not propagate the diff in cases of NaNs
+    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "0"
     expected_input_backprop_cudnn = [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0
     ]
+
     for v2 in [True, False]:
       self._testMaxPoolGradDirect(
           input_data,
@@ -1411,6 +1442,30 @@ class PoolingTest(test.TestCase):
           use_gpu=True,
           v2=v2)
 
+    # Propagate the diff in cases of NaNs
+    os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1"
+    expected_input_backprop_cudnn = expected_input_backprop_tf_cpu
+
+    for v2 in [True, False]:
+      self._testMaxPoolGradDirect(
+          input_data,
+          output_backprop,
+          expected_input_backprop_cudnn,
+          input_sizes=[1, 4, 4, 1],
+          output_sizes=[1, 3, 3, 1],
+          window_rows=2,
+          window_cols=2,
+          row_stride=1,
+          col_stride=1,
+          padding="VALID",
+          use_gpu=True,
+          v2=v2)
+
+    if saved_nanprop:
+      os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = saved_nanprop
+    else:
+      del os.environ["TF_ENABLE_MAXPOOL_NANPROP"]
+
   def testMaxPoolGradDirect(self):
     self._testMaxPoolGradDirect1_1()
     self._testMaxPoolGradDirect1_2()
diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py
index 5630259b7b..223a4b2c87 100644
--- a/tensorflow/python/kernel_tests/reader_ops_test.py
+++ b/tensorflow/python/kernel_tests/reader_ops_test.py
@@ -35,6 +35,9 @@ from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
+from tensorflow.python.training import coordinator
+from tensorflow.python.training import input as input_lib
+from tensorflow.python.training import queue_runner_impl
 from tensorflow.python.util import compat
 
 prefix_path = "tensorflow/core/lib"
@@ -1011,6 +1014,25 @@ class LMDBReaderTest(test.TestCase):
                                     "\\(requested 1, current size 0\\)"):
         k, v = sess.run([key, value])
 
+  def testReadFromSameFile(self):
+    with self.test_session() as sess:
+      reader1 = io_ops.LMDBReader(name="test_read_from_same_file1")
+      reader2 = io_ops.LMDBReader(name="test_read_from_same_file2")
+      filename_queue = input_lib.string_input_producer(
+          [self.db_path], num_epochs=None)
+      key1, value1 = reader1.read(filename_queue)
+      key2, value2 = reader2.read(filename_queue)
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
+      for _ in range(3):
+        for _ in range(10):
+          k1, v1, k2, v2 = sess.run([key1, value1, key2, value2])
+          self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2))
+          self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2))
+      coord.request_stop()
+      coord.join(threads)
+
   def testReadFromFolder(self):
     with self.test_session() as sess:
       reader = io_ops.LMDBReader(name="test_read_from_folder")
@@ -1029,6 +1051,26 @@ class LMDBReaderTest(test.TestCase):
                                     "\\(requested 1, current size 0\\)"):
         k, v = sess.run([key, value])
 
+  def testReadFromFileRepeatedly(self):
+    with self.test_session() as sess:
+      reader = io_ops.LMDBReader(name="test_read_from_file_repeated")
+      filename_queue = input_lib.string_input_producer(
+          [self.db_path], num_epochs=None)
+      key, value = reader.read(filename_queue)
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
+      # Iterate over the lmdb 3 times.
+      for _ in range(3):
+        # Go over all 10 records each time.
+        for j in range(10):
+          k, v = sess.run([key, value])
+          self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(j)))
+          self.assertAllEqual(
+              compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + j))))
+      coord.request_stop()
+      coord.join(threads)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 516a9d000e..99f9f09690 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -323,8 +323,9 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
   def testBadIndices(self):
     # Note: GPU kernel does not return the out-of-range error needed for this
     # test, so this test is marked as cpu-only.
+    # Note: With PR #13055 a negative index will be ignored silently.
     with self.test_session(use_gpu=False):
-      for bad in [[-1]], [[7]]:
+      for bad in [[2]], [[7]]:
         unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2)
         with self.assertRaisesOpError(
             r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]):
@@ -360,6 +361,32 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
             x_init_value=np_x.astype(np.double), delta=1)
       self.assertAllClose(jacob_t, jacob_n)
 
+  def testDropNegatives(self):
+    # Note: the test is done by replacing segment_ids with 8 to -1
+    # for index  and replace values generated by numpy with 0.
+    dtypes = [
+        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64,
+        dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128
+    ]
+    indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3])
+    num_segments = 12
+    for indices in indices_flat, indices_flat.reshape(5, 2):
+      shape = indices.shape + (2,)
+      for dtype in dtypes:
+        with self.test_session(use_gpu=True):
+          tf_x, np_x = self._input(shape, dtype=dtype)
+          np_ans = self._segmentReduce(
+              indices, np_x, np.add, op2=None, num_out_rows=num_segments)
+          # Replace np_ans[8] with 0 for the value
+          np_ans[8:] = 0
+          # Replace 8 with -1 in indices
+          np.place(indices, indices == 8, [-1])
+          s = math_ops.unsorted_segment_sum(
+              data=tf_x, segment_ids=indices, num_segments=num_segments)
+          tf_ans = s.eval()
+        self.assertAllClose(np_ans, tf_ans)
+        self.assertShapeEqual(np_ans, s)
+
 
 class SparseSegmentReductionHelper(SegmentReductionHelper):
 
diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py
index a9fc699b21..7368251ab6 100644
--- a/tensorflow/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/python/kernel_tests/shape_ops_test.py
@@ -258,6 +258,16 @@ class ShapeOpsTest(test.TestCase):
       self.assertAllEqual([True], array_ops.expand_dims(inp, 0).eval())
       self.assertAllEqual([True], array_ops.expand_dims(inp, -1).eval())
 
+  def testExpandDimsDimType(self):
+    for dtype in [dtypes.int32, dtypes.int64]:
+      x = np.zeros([2])
+      np_ans = np.expand_dims(x, axis=0)
+      with self.test_session(use_gpu=True):
+        tensor = array_ops.expand_dims(x, constant_op.constant(0, dtype))
+        tf_ans = tensor.eval()
+      self.assertShapeEqual(np_ans, tensor)
+      self.assertAllEqual(np_ans, tf_ans)
+
   def _compareSqueeze(self, x, squeeze_dims, use_gpu):
     with self.test_session(use_gpu=use_gpu):
       if squeeze_dims:
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index a50f53b3cd..6390b7c518 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.platform import test
 
 
@@ -61,6 +62,32 @@ class UniqueTest(test.TestCase):
     for i in range(len(x)):
       self.assertEqual(x[i], tf_y[tf_idx[i]].decode('ascii'))
 
+  def testInt32Axis(self):
+    x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
+    with self.test_session() as sess:
+      y0, idx0 = gen_array_ops.unique_v2(x, axis=[0])
+      tf_y0, tf_idx0 = sess.run([y0, idx0])
+      y1, idx1 = gen_array_ops.unique_v2(x, axis=[1])
+      tf_y1, tf_idx1 = sess.run([y1, idx1])
+    self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
+    self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
+    self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
+    self.assertAllEqual(tf_idx1, np.array([0, 1, 1]))
+
+  def testInt32V2(self):
+    # This test is only temporary, once V2 is used
+    # by default, the axis will be wrapped to allow `axis=None`.
+    x = np.random.randint(2, high=10, size=7000)
+    with self.test_session() as sess:
+      y, idx = gen_array_ops.unique_v2(x, axis=[])
+      tf_y, tf_idx = sess.run([y, idx])
+
+    self.assertEqual(len(x), len(tf_idx))
+    self.assertEqual(len(tf_y), len(np.unique(x)))
+    for i in range(len(x)):
+      self.assertEqual(x[i], tf_y[tf_idx[i]])
+
+
 class UniqueWithCountsTest(test.TestCase):
 
   def testInt32(self):
diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 74b85da845..6be2bc3e76 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -221,7 +221,7 @@ class Layer(object):
 
     Weight updates (for instance, the updates of the moving mean and variance
     in a BatchNormalization layer) may be dependent on the inputs passed
-    when calling a layer. Hence, when reusing a same layer on
+    when calling a layer. Hence, when reusing the same layer on
     different inputs `a` and `b`, some entries in `layer.updates` may be
     dependent on `a` and some on `b`. This method automatically keeps track
     of dependencies.
@@ -295,9 +295,9 @@ class Layer(object):
     """Add loss tensor(s), potentially dependent on layer inputs.
 
     Some losses (for instance, activity regularization losses) may be dependent
-    on the inputs passed when calling a layer. Hence, when reusing a same layer
-    on different inputs `a` and `b`, some entries in `layer.losses` may be
-    dependent on `a` and some on `b`. This method automatically keeps track
+    on the inputs passed when calling a layer. Hence, when reusing the same
+    layer on different inputs `a` and `b`, some entries in `layer.losses` may
+    be dependent on `a` and some on `b`. This method automatically keeps track
     of dependencies.
 
     The `get_losses_for` method allows to retrieve the losses relevant to a
diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py
index 0c7ce02835..8c327d7e27 100644
--- a/tensorflow/python/layers/convolutional.py
+++ b/tensorflow/python/layers/convolutional.py
@@ -813,6 +813,7 @@ def conv3d(inputs,
       bias_constraint=bias_constraint,
       trainable=trainable,
       name=name,
+      dtype=inputs.dtype.base_dtype,
       _reuse=reuse,
       _scope=name)
   return layer.apply(inputs)
@@ -1746,6 +1747,7 @@ def conv3d_transpose(inputs,
       bias_constraint=bias_constraint,
       trainable=trainable,
       name=name,
+      dtype=inputs.dtype.base_dtype,
       _reuse=reuse,
       _scope=name)
   return layer.apply(inputs)
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index 9d9b2b3941..83237b8733 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -26,6 +26,7 @@ import numpy as np
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.layers import base
@@ -239,6 +240,12 @@ class BatchNormalization(base.Layer):
         raise ValueError('Unsupported axis, fused batch norm only supports '
                          'axis == [1] or axis == [3]')
 
+    # Raise parameters of fp16 batch norm to fp32
+    if self.dtype == dtypes.float16:
+      param_dtype = dtypes.float32
+    else:
+      param_dtype = self.dtype or dtypes.float32
+
     axis_to_dim = {x: input_shape[x].value for x in self.axis}
     for x in axis_to_dim:
       if axis_to_dim[x] is None:
@@ -260,28 +267,34 @@ class BatchNormalization(base.Layer):
           self.axis[idx] = x + 1      # Account for added dimension
 
     if self.scale:
-      self.gamma = self.add_variable(name='gamma',
-                                     shape=param_shape,
-                                     initializer=self.gamma_initializer,
-                                     regularizer=self.gamma_regularizer,
-                                     constraint=self.gamma_constraint,
-                                     trainable=True)
+      self.gamma = self.add_variable(
+          name='gamma',
+          shape=param_shape,
+          dtype=param_dtype,
+          initializer=self.gamma_initializer,
+          regularizer=self.gamma_regularizer,
+          constraint=self.gamma_constraint,
+          trainable=True)
     else:
       self.gamma = None
       if self.fused:
-        self._gamma_const = array_ops.constant(1.0, shape=param_shape)
+        self._gamma_const = array_ops.constant(
+            1.0, dtype=param_dtype, shape=param_shape)
 
     if self.center:
-      self.beta = self.add_variable(name='beta',
-                                    shape=param_shape,
-                                    initializer=self.beta_initializer,
-                                    regularizer=self.beta_regularizer,
-                                    constraint=self.beta_constraint,
-                                    trainable=True)
+      self.beta = self.add_variable(
+          name='beta',
+          shape=param_shape,
+          dtype=param_dtype,
+          initializer=self.beta_initializer,
+          regularizer=self.beta_regularizer,
+          constraint=self.beta_constraint,
+          trainable=True)
     else:
       self.beta = None
       if self.fused:
-        self._beta_const = array_ops.constant(0.0, shape=param_shape)
+        self._beta_const = array_ops.constant(
+            0.0, dtype=param_dtype, shape=param_shape)
 
     # Disable variable partitioning when creating the moving mean and variance
     try:
@@ -293,12 +306,14 @@ class BatchNormalization(base.Layer):
       self.moving_mean = self.add_variable(
           name='moving_mean',
           shape=param_shape,
+          dtype=param_dtype,
           initializer=self.moving_mean_initializer,
           trainable=False)
 
       self.moving_variance = self.add_variable(
           name='moving_variance',
           shape=param_shape,
+          dtype=param_dtype,
           initializer=self.moving_variance_initializer,
           trainable=False)
 
@@ -312,10 +327,12 @@ class BatchNormalization(base.Layer):
         # stack to be cleared. The nested ones use a `lambda` to set the desired
         # device and ignore any devices that may be set by the custom getter.
         def _renorm_variable(name, shape):
-          var = self.add_variable(name=name,
-                                  shape=shape,
-                                  initializer=init_ops.zeros_initializer(),
-                                  trainable=False)
+          var = self.add_variable(
+              name=name,
+              shape=shape,
+              dtype=param_dtype,
+              initializer=init_ops.zeros_initializer(),
+              trainable=False)
           return var
 
         with ops.device(None):
@@ -356,7 +373,6 @@ class BatchNormalization(base.Layer):
 
   def _fused_batch_norm(self, inputs, training):
     """Returns the output of fused batch norm."""
-    # TODO(reedwm): Add support for fp16 inputs.
     beta = self.beta if self.center else self._beta_const
     gamma = self.gamma if self.scale else self._gamma_const
 
@@ -752,6 +768,7 @@ def batch_normalization(inputs,
       virtual_batch_size=virtual_batch_size,
       adjustment=adjustment,
       name=name,
+      dtype=inputs.dtype.base_dtype,
       _reuse=reuse,
       _scope=name)
   return layer.apply(inputs, training=training)
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index 90ebdc8c86..7c91c3284e 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -68,11 +68,12 @@ class BNTest(test.TestCase):
              use_gpu,
              is_fused,
              restore=False,
-             freeze_mode=False):
+             freeze_mode=False,
+             dtype=dtypes.float32):
     ops.reset_default_graph()
     graph = ops.get_default_graph()
     with self.test_session(graph=graph, use_gpu=use_gpu) as sess:
-      image = array_ops.placeholder(dtype='float32', shape=shape)
+      image = array_ops.placeholder(dtype=dtype, shape=shape)
       loss, train_op, saver = self._simple_model(image, is_fused, freeze_mode)
       if restore:
         saver.restore(sess, checkpoint_path)
@@ -80,7 +81,7 @@ class BNTest(test.TestCase):
         sess.run(variables.global_variables_initializer())
       np.random.seed(0)
       for _ in range(2):
-        image_val = np.random.rand(*shape).astype(np.float32)
+        image_val = np.random.rand(*shape).astype(dtype.as_numpy_dtype)
         sess.run([loss, train_op], feed_dict={image: image_val})
       if restore:
         all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
@@ -90,15 +91,69 @@ class BNTest(test.TestCase):
         saver.save(sess, checkpoint_path)
 
   def _infer(self, checkpoint_path, image_val, shape, use_gpu, is_fused):
+    dtype = image_val.dtype
     ops.reset_default_graph()
     graph = ops.get_default_graph()
     with self.test_session(graph=graph, use_gpu=use_gpu) as sess:
-      image = array_ops.placeholder(dtype='float32', shape=shape)
+      image = array_ops.placeholder(dtype=dtype, shape=shape)
       loss, _, saver = self._simple_model(image, is_fused, True)
       saver.restore(sess, checkpoint_path)
       loss_val = sess.run(loss, feed_dict={image: image_val})
       return loss_val
 
+  def _trainEvalSequence(self, dtype, train1_use_gpu, train2_use_gpu,
+                         infer_use_gpu):
+    batch, height, width, input_channels = 2, 4, 5, 3
+    shape = [batch, height, width, input_channels]
+    checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' %
+                              (dtype, train1_use_gpu, train2_use_gpu,
+                               infer_use_gpu))
+
+    self._train(
+        checkpoint,
+        shape,
+        use_gpu=train1_use_gpu,
+        is_fused=True,
+        restore=False,
+        freeze_mode=False,
+        dtype=dtype)
+
+    train_vars = self._train(
+        checkpoint,
+        shape,
+        use_gpu=train2_use_gpu,
+        is_fused=True,
+        restore=True,
+        freeze_mode=False,
+        dtype=dtype)
+
+    np.random.seed(0)
+    image_val = np.random.rand(batch, height, width, input_channels).astype(
+        dtype.as_numpy_dtype)
+    loss_val = self._infer(
+        checkpoint, image_val, shape, use_gpu=infer_use_gpu, is_fused=True)
+
+    return train_vars, loss_val
+
+  def testHalfPrecision(self):
+    ref_vars, ref_loss = self._trainEvalSequence(
+        dtype=dtypes.float32,
+        train1_use_gpu=True,
+        train2_use_gpu=True,
+        infer_use_gpu=True)
+
+    self.assertEqual(len(ref_vars), 5)
+
+    for train1_use_gpu in [True, False]:
+      for train2_use_gpu in [True, False]:
+        for infer_use_gpu in [True, False]:
+          test_vars, test_loss = self._trainEvalSequence(
+              dtypes.float16, train1_use_gpu, train2_use_gpu, infer_use_gpu)
+          self.assertEqual(len(test_vars), 5)
+          for test_var, ref_var in zip(test_vars, ref_vars):
+            self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3)
+          self.assertAllClose(test_loss, ref_loss, rtol=1.e-3, atol=1.e-3)
+
   def _testCheckpoint(self, is_fused_checkpoint_a, is_fused_checkpoint_b,
                       use_gpu_checkpoint_a, use_gpu_checkpoint_b,
                       use_gpu_test_a, use_gpu_test_b, freeze_mode):
@@ -218,6 +273,35 @@ class BNTest(test.TestCase):
         ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES),
         bn.trainable_variables)
 
+  def testCreateFusedBNFloat16(self):
+    # Call layer.
+    bn = normalization_layers.BatchNormalization(axis=1, fused=True)
+    inputs = random_ops.random_uniform(
+        (5, 4, 3, 3), seed=1, dtype=dtypes.float16)
+    training = array_ops.placeholder(dtype='bool')
+    outputs = bn.apply(inputs, training=training)
+
+    # Verify shape.
+    self.assertListEqual(outputs.get_shape().as_list(), [5, 4, 3, 3])
+
+    # Verify layer attributes.
+    self.assertEqual(len(bn.updates), 2)
+    self.assertEqual(len(bn.variables), 4)
+    self.assertEqual(len(bn.trainable_variables), 2)
+    self.assertEqual(len(bn.non_trainable_variables), 2)
+    for var in bn.variables:
+      self.assertEqual(var.dtype, dtypes.float32_ref)
+
+    # Test that updates were created and added to UPDATE_OPS.
+    self.assertEqual(len(bn.updates), 2)
+    self.assertListEqual(
+        ops.get_collection(ops.GraphKeys.UPDATE_OPS), bn.updates)
+
+    # Test that weights were created and added to TRAINABLE_VARIABLES.
+    self.assertListEqual(
+        ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES),
+        bn.trainable_variables)
+
   def test3DInputAxis1(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index c3c7ecd080..38eff54c69 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1132,7 +1132,7 @@ def concat(values, axis, name="concat"):
   return gen_array_ops._concat_v2(values=values, axis=axis, name=name)
 
 
-def boolean_mask(tensor, mask, name="boolean_mask"):
+def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
   """Apply boolean mask to tensor.  Numpy equivalent is `tensor[mask]`.
 
   ```python
@@ -1146,11 +1146,17 @@ def boolean_mask(tensor, mask, name="boolean_mask"):
   the first K dimensions of `tensor`'s shape.  We then have:
     `boolean_mask(tensor, mask)[i, j1,...,jd] = tensor[i1,...,iK,j1,...,jd]`
   where `(i1,...,iK)` is the ith `True` entry of `mask` (row-major order).
+  The `axis` could be used with `mask` to indicate the axis to mask from.
+  In that case, `axis + dim(mask) <= dim(tensor)` and `mask`'s shape must match
+  the first `axis + dim(mask)` dimensions of `tensor`'s shape.
 
   Args:
     tensor:  N-D tensor.
     mask:  K-D boolean tensor, K <= N and K must be known statically.
     name:  A name for this operation (optional).
+    axis:  A 0-D int Tensor representing the axis in `tensor` to mask from.
+      By default, axis is 0 which will mask from the first dimension. Otherwise
+      K + axis <= N.
 
   Returns:
     (N-K+1)-dimensional tensor populated by entries in `tensor` corresponding
@@ -1169,10 +1175,10 @@ def boolean_mask(tensor, mask, name="boolean_mask"):
   ```
   """
 
-  def _apply_mask_1d(reshaped_tensor, mask):
+  def _apply_mask_1d(reshaped_tensor, mask, axis=None):
     """Mask tensor along dimension 0 with a 1-D mask."""
     indices = squeeze(where(mask), squeeze_dims=[1])
-    return gather(reshaped_tensor, indices)
+    return gather(reshaped_tensor, indices, axis=axis)
 
   with ops.name_scope(name, values=[tensor, mask]):
     tensor = ops.convert_to_tensor(tensor, name="tensor")
@@ -1187,19 +1193,23 @@ def boolean_mask(tensor, mask, name="boolean_mask"):
       raise ValueError(
           "Number of mask dimensions must be specified, even if some dimensions"
           " are None.  E.g. shape=[None] is ok, but shape=None is not.")
-    shape_tensor[:ndims_mask].assert_is_compatible_with(shape_mask)
+    axis = 0 if axis is None else axis
+    shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask)
 
-    leading_size = gen_math_ops._prod(shape(tensor)[:ndims_mask], [0])
+    leading_size = gen_math_ops._prod(
+        shape(tensor)[axis:axis + ndims_mask], [0])
     tensor = reshape(tensor,
-                     concat([[leading_size],
-                             shape(tensor)[ndims_mask:]], 0))
-    first_dim = shape_tensor[:ndims_mask].num_elements()
+                     concat([
+                         shape(tensor)[:axis], [leading_size],
+                         shape(tensor)[axis + ndims_mask:]
+                     ], 0))
+    first_dim = shape_tensor[axis:axis + ndims_mask].num_elements()
     tensor.set_shape(
-        tensor_shape.as_shape([first_dim])
-        .concatenate(shape_tensor[ndims_mask:]))
+        tensor_shape.as_shape(shape_tensor[:axis]).concatenate([first_dim])
+        .concatenate(shape_tensor[axis + ndims_mask:]))
 
     mask = reshape(mask, [-1])
-    return _apply_mask_1d(tensor, mask)
+    return _apply_mask_1d(tensor, mask, axis)
 
 
 def sparse_mask(a, mask_indices, name=None):
@@ -1521,7 +1531,8 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True):
   Args:
     tensor: A `Tensor`.
     dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
-    `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, or `complex128`.
+      `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`,
+      `complex64`, `complex128` or `bool`.
     name: A name for the operation (optional).
     optimize: if true, attempt to statically determine the shape of 'tensor'
     and encode it as a constant.
@@ -1572,8 +1583,8 @@ def ones_like(tensor, dtype=None, name=None, optimize=True):
   Args:
     tensor: A `Tensor`.
     dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
-      `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, `complex128` or
-      `bool`.
+      `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`,
+      `complex64`, `complex128` or `bool`.
     name: A name for the operation (optional).
     optimize: if true, attempt to statically determine the shape of 'tensor'
     and encode it as a constant.
diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py
index 923696a553..2accedf1b9 100644
--- a/tensorflow/python/ops/distributions/dirichlet.py
+++ b/tensorflow/python/ops/distributions/dirichlet.py
@@ -196,7 +196,7 @@ class Dirichlet(distribution.Distribution):
         alpha=self.concentration,
         dtype=self.dtype,
         seed=seed)
-    return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keep_dims=True)
+    return gamma_sample / math_ops.reduce_sum(gamma_sample, -1, keepdims=True)
 
   @distribution_util.AppendDocstring(_dirichlet_sample_note)
   def _log_prob(self, x):
diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py
index 00b5697c83..04762565c2 100644
--- a/tensorflow/python/ops/distributions/multinomial.py
+++ b/tensorflow/python/ops/distributions/multinomial.py
@@ -23,6 +23,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
@@ -140,6 +141,8 @@ class Multinomial(distribution.Distribution):
 
   counts = [[2., 1, 1], [3, 1, 1]]
   dist.prob(counts)  # Shape [2]
+
+  dist.sample(5) # Shape [5, 2, 3]
   ```
   """
 
@@ -231,29 +234,36 @@ class Multinomial(distribution.Distribution):
 
   def _sample_n(self, n, seed=None):
     n_draws = math_ops.cast(self.total_count, dtype=dtypes.int32)
-    if self.total_count.get_shape().ndims is not None:
-      if self.total_count.get_shape().ndims != 0:
-        raise NotImplementedError(
-            "Sample only supported for scalar number of draws.")
-    elif self.validate_args:
-      is_scalar = check_ops.assert_rank(
-          n_draws, 0,
-          message="Sample only supported for scalar number of draws.")
-      n_draws = control_flow_ops.with_dependencies([is_scalar], n_draws)
     k = self.event_shape_tensor()[0]
-    # Flatten batch dims so logits has shape [B, k],
-    # where B = reduce_prod(self.batch_shape_tensor()).
-    x = random_ops.multinomial(
-        logits=array_ops.reshape(self.logits, [-1, k]),
-        num_samples=n * n_draws,
-        seed=seed)
-    x = array_ops.reshape(x, shape=[-1, n, n_draws])
-    x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k),
-                            axis=-2)  # shape: [B, n, k]
+
+    # boardcast the total_count and logits to same shape
+    n_draws = array_ops.ones_like(
+        self.logits[..., 0], dtype=n_draws.dtype) * n_draws
+    logits = array_ops.ones_like(
+        n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits
+
+    # flatten the total_count and logits
+    flat_logits = array_ops.reshape(logits, [-1, k])  # [B1B2...Bm, k]
+    flat_ndraws = n * array_ops.reshape(n_draws, [-1])  # [B1B2...Bm]
+
+    # computes each total_count and logits situation by map_fn
+    def _sample_single(args):
+      logits, n_draw = args[0], args[1]  # [K], []
+      x = random_ops.multinomial(logits[array_ops.newaxis, ...], n_draw,
+                                 seed)  # [1, n*n_draw]
+      x = array_ops.reshape(x, shape=[n, -1])  # [n, n_draw]
+      x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2)  # [n, k]
+      return x
+
+    x = functional_ops.map_fn(
+        _sample_single, [flat_logits, flat_ndraws],
+        dtype=self.dtype)  # [B1B2...Bm, n, k]
+
+    # reshape the results to proper shape
     x = array_ops.transpose(x, perm=[1, 0, 2])
     final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0)
-    x = array_ops.reshape(x, final_shape)
-    return math_ops.cast(x, self.dtype)
+    x = array_ops.reshape(x, final_shape)  # [n, B1, B2,..., Bm, k]
+    return x
 
   @distribution_util.AppendDocstring(_multinomial_sample_note)
   def _log_prob(self, counts):
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 2946dbe81e..b9c89d62d5 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1119,9 +1119,8 @@ def rgb_to_grayscale(images, name=None):
     # https://en.wikipedia.org/wiki/Luma_%28video%29
     rgb_weights = [0.2989, 0.5870, 0.1140]
     rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
-    gray_float = math_ops.reduce_sum(flt_image * rgb_weights,
-                                     rank_1,
-                                     keep_dims=True)
+    gray_float = math_ops.reduce_sum(
+        flt_image * rgb_weights, rank_1, keepdims=True)
     gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
     return convert_image_dtype(gray_float, orig_dtype, name=name)
 
@@ -1212,26 +1211,7 @@ def adjust_hue(image, delta, name=None):
     orig_dtype = image.dtype
     flt_image = convert_image_dtype(image, dtypes.float32)
 
-    # TODO(zhengxq): we will switch to the fused version after we add a GPU
-    # kernel for that.
-    fused = os.environ.get('TF_ADJUST_HUE_FUSED', '')
-    fused = fused.lower() in ('true', 't', '1')
-
-    if not fused:
-      hsv = gen_image_ops.rgb_to_hsv(flt_image)
-
-      hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
-      saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
-      value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])
-
-      # Note that we add 2*pi to guarantee that the resulting hue is a positive
-      # floating point number since delta is [-0.5, 0.5].
-      hue = math_ops.mod(hue + (delta + 1.), 1.)
-
-      hsv_altered = array_ops.concat([hue, saturation, value], 2)
-      rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)
-    else:
-      rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
+    rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
 
     return convert_image_dtype(rgb_altered, orig_dtype)
 
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index 2cb467c891..be9beee633 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.gen_linalg_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 
 # Names below are lower_case.
 # pylint: disable=invalid-name
@@ -438,7 +439,14 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None):
 
 
 # pylint: disable=redefined-builtin
-def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None):
+@deprecation.deprecated_args(
+    None, 'keep_dims is deprecated, use keepdims instead', 'keep_dims')
+def norm(tensor,
+         ord='euclidean',
+         axis=None,
+         keepdims=None,
+         name=None,
+         keep_dims=None):
   r"""Computes the norm of vectors, matrices, and tensors.
 
   This function can compute several different vector norms (the 1-norm, the
@@ -471,13 +479,14 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None):
       can be either a matrix or a batch of matrices at runtime, pass
       `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are
       computed.
-    keep_dims: If True, the axis indicated in `axis` are kept with size 1.
+    keepdims: If True, the axis indicated in `axis` are kept with size 1.
       Otherwise, the dimensions in `axis` are removed from the output shape.
     name: The name of the op.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     output: A `Tensor` of the same type as tensor, containing the vector or
-      matrix norms. If `keep_dims` is True then the rank of output is equal to
+      matrix norms. If `keepdims` is True then the rank of output is equal to
       the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar,
       if `axis` is an integer, the rank of `output` is one less than the rank
       of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less
@@ -496,6 +505,10 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None):
      higher order tensors.
   @end_compatibility
   """
+  keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims,
+                                                    'keep_dims', keep_dims)
+  if keepdims is None:
+    keepdims = False
 
   is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and
                     len(axis) == 2)
@@ -528,25 +541,25 @@ def norm(tensor, ord='euclidean', axis=None, keep_dims=False, name=None):
       # matrices.
       result = math_ops.sqrt(
           math_ops.reduce_sum(
-              tensor * math_ops.conj(tensor), axis, keep_dims=True))
+              tensor * math_ops.conj(tensor), axis, keepdims=True))
     else:
       result = math_ops.abs(tensor)
       if ord == 1:
         sum_axis = None if axis is None else axis[0]
-        result = math_ops.reduce_sum(result, sum_axis, keep_dims=True)
+        result = math_ops.reduce_sum(result, sum_axis, keepdims=True)
         if is_matrix_norm:
-          result = math_ops.reduce_max(result, axis[-1], keep_dims=True)
+          result = math_ops.reduce_max(result, axis[-1], keepdims=True)
       elif ord == np.inf:
         if is_matrix_norm:
-          result = math_ops.reduce_sum(result, axis[1], keep_dims=True)
+          result = math_ops.reduce_sum(result, axis[1], keepdims=True)
         max_axis = None if axis is None else axis[0]
-        result = math_ops.reduce_max(result, max_axis, keep_dims=True)
+        result = math_ops.reduce_max(result, max_axis, keepdims=True)
       else:
         # General p-norms (positive p only)
         result = math_ops.pow(
-            math_ops.reduce_sum(
-                math_ops.pow(result, ord), axis, keep_dims=True), 1.0 / ord)
-    if not keep_dims:
+            math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True),
+            1.0 / ord)
+    if not keepdims:
       result = array_ops.squeeze(result, axis)
     return result
 
diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py
index 5732c756ce..04eeb00518 100644
--- a/tensorflow/python/ops/math_grad_test.py
+++ b/tensorflow/python/ops/math_grad_test.py
@@ -113,6 +113,23 @@ class MinOrMaxGradientTest(test.TestCase):
       self.assertLess(error, 1e-4)
 
 
+class MaximumOrMinimumGradientTest(test.TestCase):
+
+  def testMaximumGradient(self):
+    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32)
+    outputs = math_ops.maximum(inputs, 3.0)
+    with self.test_session():
+      error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4])
+      self.assertLess(error, 1e-4)
+
+  def testMinimumGradient(self):
+    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32)
+    outputs = math_ops.minimum(inputs, 2.0)
+    with self.test_session():
+      error = gradient_checker.compute_gradient_error(inputs, [4], outputs, [4])
+      self.assertLess(error, 1e-4)
+
+
 class ProdGradientTest(test.TestCase):
 
   def testProdGradient(self):
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 4c400423b6..e2e23dccef 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -170,14 +170,13 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.ops.gen_math_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import compat
-from tensorflow.python.util.deprecation import deprecated
-from tensorflow.python.util.deprecation import deprecated_args
+from tensorflow.python.util import deprecation
 
 # Aliases for some automatically-generated names.
 linspace = gen_math_ops.lin_space
 
-arg_max = deprecated(None, "Use `argmax` instead")(arg_max)  # pylint: disable=used-before-assignment
-arg_min = deprecated(None, "Use `argmin` instead")(arg_min)  # pylint: disable=used-before-assignment
+arg_max = deprecation.deprecated(None, "Use `argmax` instead")(arg_max)  # pylint: disable=used-before-assignment
+arg_min = deprecation.deprecated(None, "Use `argmin` instead")(arg_min)  # pylint: disable=used-before-assignment
 
 
 def _set_doc(doc):
@@ -190,7 +189,8 @@ def _set_doc(doc):
 
 
 # pylint: disable=redefined-builtin
-@deprecated_args(None, "Use the `axis` argument instead", "dimension")
+@deprecation.deprecated_args(None, "Use the `axis` argument instead",
+                             "dimension")
 @_set_doc(
     gen_math_ops.arg_max.__doc__.replace("dimensions", "axes").replace(
         "dimension", "axis"))
@@ -208,7 +208,8 @@ def argmax(input,
   return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type)
 
 
-@deprecated_args(None, "Use the `axis` argument instead", "dimension")
+@deprecation.deprecated_args(None, "Use the `axis` argument instead",
+                             "dimension")
 @_set_doc(
     gen_math_ops.arg_min.__doc__.replace("dimensions", "axes").replace(
         "dimension", "axis"))
@@ -324,7 +325,7 @@ multiply.__doc__ = gen_math_ops._mul.__doc__.replace("Mul", "`tf.multiply`")
 
 
 # TODO(aselle): put deprecation in after another round of global code changes
-@deprecated(
+@deprecation.deprecated(
     "2016-12-30",
     "`tf.mul(x, y)` is deprecated, please use `tf.multiply(x, y)` or `x * y`")
 def _mul(x, y, name=None):
@@ -343,7 +344,7 @@ subtract.__doc__ = gen_math_ops._sub.__doc__.replace("`Sub`", "`tf.subtract`")
 
 
 # TODO(aselle): put deprecation in after another round of global code changes
-@deprecated(
+@deprecation.deprecated(
     "2016-12-30",
     "`tf.sub(x, y)` is deprecated, please use `tf.subtract(x, y)` or `x - y`")
 def _sub(x, y, name=None):
@@ -381,8 +382,9 @@ def negative(x, name=None):
 
 
 # pylint: disable=g-docstring-has-escape
-@deprecated("2016-12-30",
-            "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`")
+@deprecation.deprecated(
+    "2016-12-30",
+    "`tf.neg(x)` is deprecated, please use `tf.negative(x)` or `-x`")
 def _neg(x, name=None):
   """Computes numerical negative value element-wise.
 
@@ -1269,24 +1271,27 @@ def _ReductionDims(x, axis, reduction_indices):
     return range(0, array_ops.rank(x))
 
 
-def _may_reduce_to_scalar(keep_dims, axis, reduction_indices, output):
+def _may_reduce_to_scalar(keepdims, axis, reduction_indices, output):
   """Set a reduction's output's shape to be a scalar if we are certain."""
-  if (not output.shape.is_fully_defined()) and (not keep_dims) and (
+  if (not output.shape.is_fully_defined()) and (not keepdims) and (
       axis is None) and (reduction_indices is None):
     output.set_shape(())
   return output
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_sum(input_tensor,
                axis=None,
-               keep_dims=False,
+               keepdims=None,
                name=None,
-               reduction_indices=None):
+               reduction_indices=None,
+               keep_dims=None):
   """Computes the sum of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1299,7 +1304,7 @@ def reduce_sum(input_tensor,
   tf.reduce_sum(x)  # 6
   tf.reduce_sum(x, 0)  # [2, 2, 2]
   tf.reduce_sum(x, 1)  # [3, 3]
-  tf.reduce_sum(x, 1, keep_dims=True)  # [[3], [3]]
+  tf.reduce_sum(x, 1, keepdims=True)  # [[3], [3]]
   tf.reduce_sum(x, [0, 1])  # 6
   ```
 
@@ -1308,9 +1313,10 @@ def reduce_sum(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1319,26 +1325,34 @@ def reduce_sum(input_tensor,
   Equivalent to np.sum
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._sum(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def count_nonzero(input_tensor,
                   axis=None,
-                  keep_dims=False,
+                  keepdims=None,
                   dtype=dtypes.int64,
                   name=None,
-                  reduction_indices=None):
+                  reduction_indices=None,
+                  keep_dims=None):
   """Computes number of nonzero elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1355,7 +1369,7 @@ def count_nonzero(input_tensor,
   tf.count_nonzero(x)  # 3
   tf.count_nonzero(x, 0)  # [1, 2, 0]
   tf.count_nonzero(x, 1)  # [1, 2]
-  tf.count_nonzero(x, 1, keep_dims=True)  # [[1], [2]]
+  tf.count_nonzero(x, 1, keepdims=True)  # [[1], [2]]
   tf.count_nonzero(x, [0, 1])  # 3
   ```
 
@@ -1364,14 +1378,20 @@ def count_nonzero(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     dtype: The output dtype; defaults to `tf.int64`.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor (number of nonzero values).
   """
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+
   with ops.name_scope(name, "count_nonzero", [input_tensor]):
     input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor")
     zero = input_tensor.dtype.as_numpy_dtype()
@@ -1380,21 +1400,24 @@ def count_nonzero(input_tensor,
             # int64 reduction happens on GPU
             to_int64(gen_math_ops.not_equal(input_tensor, zero)),
             axis=axis,
-            keep_dims=keep_dims,
+            keepdims=keepdims,
             reduction_indices=reduction_indices),
         dtype=dtype)
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_mean(input_tensor,
                 axis=None,
-                keep_dims=False,
+                keepdims=None,
                 name=None,
-                reduction_indices=None):
+                reduction_indices=None,
+                keep_dims=None):
   """Computes the mean of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1414,36 +1437,58 @@ def reduce_mean(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
 
   @compatibility(numpy)
   Equivalent to np.mean
+
+  Please note that `np.mean` has a `dtype` parameter that could be used to
+  specify the output type. By default this is `dtype=float64`. On the other
+  hand, `tf.reduce_mean` has an aggressive type inference from `input_tensor`,
+  for example:
+
+  ```python
+  x = tf.constant([1, 0, 1, 0])
+  tf.reduce_mean(x)  # 0
+  y = tf.constant([1., 0., 1., 0.])
+  tf.reduce_mean(y)  # 0.5
+  ```
+
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._mean(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_prod(input_tensor,
                 axis=None,
-                keep_dims=False,
+                keepdims=None,
                 name=None,
-                reduction_indices=None):
+                reduction_indices=None,
+                keep_dims=None):
   """Computes the product of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1454,9 +1499,10 @@ def reduce_prod(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1465,25 +1511,33 @@ def reduce_prod(input_tensor,
   Equivalent to np.prod
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._prod(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_min(input_tensor,
                axis=None,
-               keep_dims=False,
+               keepdims=None,
                name=None,
-               reduction_indices=None):
+               reduction_indices=None,
+               keep_dims=None):
   """Computes the minimum of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1494,9 +1548,10 @@ def reduce_min(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1505,25 +1560,32 @@ def reduce_min(input_tensor,
   Equivalent to np.min
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._min(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_max(input_tensor,
                axis=None,
-               keep_dims=False,
+               keepdims=None,
                name=None,
-               reduction_indices=None):
+               reduction_indices=None,
+               keep_dims=None):
   """Computes the maximum of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1534,9 +1596,10 @@ def reduce_max(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1545,25 +1608,32 @@ def reduce_max(input_tensor,
   Equivalent to np.max
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._max(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_all(input_tensor,
                axis=None,
-               keep_dims=False,
+               keepdims=None,
                name=None,
-               reduction_indices=None):
+               reduction_indices=None,
+               keep_dims=None):
   """Computes the "logical and" of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1583,9 +1653,10 @@ def reduce_all(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1594,25 +1665,32 @@ def reduce_all(input_tensor,
   Equivalent to np.all
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._all(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_any(input_tensor,
                axis=None,
-               keep_dims=False,
+               keepdims=None,
                name=None,
-               reduction_indices=None):
+               reduction_indices=None,
+               keep_dims=None):
   """Computes the "logical or" of elements across dimensions of a tensor.
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1632,9 +1710,10 @@ def reduce_any(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
@@ -1643,25 +1722,32 @@ def reduce_any(input_tensor,
   Equivalent to np.any
   @end_compatibility
   """
-  return _may_reduce_to_scalar(keep_dims, axis, reduction_indices,
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
+  return _may_reduce_to_scalar(keepdims, axis, reduction_indices,
                                gen_math_ops._any(
                                    input_tensor,
                                    _ReductionDims(input_tensor, axis,
                                                   reduction_indices),
-                                   keep_dims,
+                                   keepdims,
                                    name=name))
 
 
+@deprecation.deprecated_args(
+    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_logsumexp(input_tensor,
                      axis=None,
-                     keep_dims=False,
+                     keepdims=None,
                      name=None,
-                     reduction_indices=None):
+                     reduction_indices=None,
+                     keep_dims=None):
   """Computes log(sum(exp(elements across dimensions of a tensor))).
 
   Reduces `input_tensor` along the dimensions given in `axis`.
-  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keep_dims` is true, the reduced dimensions
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
   If `axis` has no entries, all dimensions are reduced, and a
@@ -1678,7 +1764,7 @@ def reduce_logsumexp(input_tensor,
   tf.reduce_logsumexp(x)  # log(6)
   tf.reduce_logsumexp(x, 0)  # [log(2), log(2), log(2)]
   tf.reduce_logsumexp(x, 1)  # [log(3), log(3)]
-  tf.reduce_logsumexp(x, 1, keep_dims=True)  # [[log(3)], [log(3)]]
+  tf.reduce_logsumexp(x, 1, keepdims=True)  # [[log(3)], [log(3)]]
   tf.reduce_logsumexp(x, [0, 1])  # log(6)
   ```
 
@@ -1687,19 +1773,24 @@ def reduce_logsumexp(input_tensor,
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
       `[-rank(input_tensor), rank(input_tensor))`.
-    keep_dims: If true, retains reduced dimensions with length 1.
+    keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
+    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     The reduced tensor.
   """
+  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
+                                                    "keep_dims", keep_dims)
+  if keepdims is None:
+    keepdims = False
   with ops.name_scope(name, "ReduceLogSumExp", [input_tensor]) as name:
     raw_max = reduce_max(
         input_tensor,
         axis=axis,
         reduction_indices=reduction_indices,
-        keep_dims=True)
+        keepdims=True)
     my_max = array_ops.stop_gradient(
         array_ops.where(
             gen_math_ops.is_finite(raw_max), raw_max,
@@ -1708,13 +1799,13 @@ def reduce_logsumexp(input_tensor,
         reduce_sum(
             gen_math_ops.exp(input_tensor - my_max),
             axis,
-            keep_dims=True,
+            keepdims=True,
             reduction_indices=reduction_indices)) + my_max
-    if not keep_dims:
+    if not keepdims:
       if isinstance(axis, int):
         axis = [axis]
       result = array_ops.squeeze(result, axis)
-    return _may_reduce_to_scalar(keep_dims, axis, reduction_indices, result)
+    return _may_reduce_to_scalar(keepdims, axis, reduction_indices, result)
 
 
 def trace(x, name=None):
@@ -2216,9 +2307,10 @@ def bincount(arr,
     maxlength = ops.convert_to_tensor(
         maxlength, name="maxlength", dtype=dtypes.int32)
     output_size = gen_math_ops.minimum(maxlength, output_size)
-  weights = (
-      ops.convert_to_tensor(weights, name="weights")
-      if weights is not None else constant_op.constant([], dtype))
+  if weights is not None:
+    weights = ops.convert_to_tensor(weights, name="weights")
+    return gen_math_ops.unsorted_segment_sum(weights, arr, output_size)
+  weights = constant_op.constant([], dtype)
   return gen_math_ops.bincount(arr, output_size, weights)
 
 
@@ -2381,7 +2473,7 @@ def reduced_shape(input_shape, axes):
     input_shape: 1-D Tensor, the shape of the Tensor being reduced.
     axes: 1-D Tensor, the reduction axes.
   Returns:
-    A 1-D Tensor, the output shape as if keep_dims were set to True.
+    A 1-D Tensor, the output shape as if keepdims were set to True.
   """
   # Example:
   # cast needed for SparseTensor reductions
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index 717ee1254f..e04121ee31 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -792,9 +792,10 @@ def mean_cosine_distance(labels, predictions, dim, weights=None,
   predictions, labels, weights = _remove_squeezable_dimensions(
       predictions=predictions, labels=labels, weights=weights)
   radial_diffs = math_ops.multiply(predictions, labels)
-  radial_diffs = math_ops.reduce_sum(radial_diffs,
-                                     reduction_indices=[dim,],
-                                     keep_dims=True)
+  radial_diffs = math_ops.reduce_sum(
+      radial_diffs, reduction_indices=[
+          dim,
+      ], keepdims=True)
   mean_distance, update_op = mean(radial_diffs, weights,
                                   None,
                                   None,
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index 1fcd0384da..e72d34d1f7 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -335,22 +335,22 @@ class BatchNormalizationTest(test.TestCase):
 
   def testInference(self):
     x_shape = [1, 1, 6, 1]
-    if test.is_gpu_available(cuda_only=True):
-      for dtype in [np.float16, np.float32]:
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_inference(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
         self._test_inference(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
-    self._test_inference(
-        x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC')
+      self._test_inference(
+          x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 1, 6, 2]
     if test.is_gpu_available(cuda_only=True):
       for dtype in [np.float16, np.float32]:
         self._test_inference(
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
-    self._test_inference(
-        x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC')
+        self._test_inference(
+            x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
@@ -359,33 +359,33 @@ class BatchNormalizationTest(test.TestCase):
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
     x_shape = [27, 131, 127, 6]
-    if test.is_gpu_available(cuda_only=True):
-      for dtype in [np.float16, np.float32]:
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_inference(
             x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
         self._test_inference(
             x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
-    self._test_inference(
-        x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC')
+      self._test_inference(
+          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   def testTraining(self):
     x_shape = [1, 1, 6, 1]
-    if test.is_gpu_available(cuda_only=True):
-      for dtype in [np.float16, np.float32]:
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_training(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
         self._test_training(
             x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
-    self._test_training(
-        x_shape, np.float32, [1], np.float32, use_gpu=False, data_format='NHWC')
+      self._test_training(
+          x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 1, 6, 2]
-    if test.is_gpu_available(cuda_only=True):
-      for dtype in [np.float16, np.float32]:
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_training(
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
-    self._test_training(
-        x_shape, np.float32, [2], np.float32, use_gpu=False, data_format='NHWC')
+      self._test_training(
+          x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')
 
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
@@ -394,20 +394,20 @@ class BatchNormalizationTest(test.TestCase):
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
     x_shape = [27, 131, 127, 6]
-    if test.is_gpu_available(cuda_only=True):
-      for dtype in [np.float16, np.float32]:
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
         self._test_training(
             x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
         self._test_training(
             x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
-    self._test_training(
-        x_shape, np.float32, [6], np.float32, use_gpu=False, data_format='NHWC')
+      self._test_training(
+          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
   def testBatchNormGrad(self):
     for is_training in [True, False]:
       x_shape = [1, 1, 6, 1]
-      if test.is_gpu_available(cuda_only=True):
-        for dtype in [np.float16, np.float32]:
+      for dtype in [np.float16, np.float32]:
+        if test.is_gpu_available(cuda_only=True):
           self._test_gradient(
               x_shape,
               dtype, [1],
@@ -422,17 +422,17 @@ class BatchNormalizationTest(test.TestCase):
               use_gpu=True,
               data_format='NCHW',
               is_training=is_training)
-      self._test_gradient(
-          x_shape,
-          np.float32, [1],
-          np.float32,
-          use_gpu=False,
-          data_format='NHWC',
-          is_training=is_training)
+        self._test_gradient(
+            x_shape,
+            dtype, [1],
+            np.float32,
+            use_gpu=False,
+            data_format='NHWC',
+            is_training=is_training)
 
       x_shape = [1, 1, 6, 2]
-      if test.is_gpu_available(cuda_only=True):
-        for dtype in [np.float16, np.float32]:
+      for dtype in [np.float16, np.float32]:
+        if test.is_gpu_available(cuda_only=True):
           self._test_gradient(
               x_shape,
               dtype, [2],
@@ -440,13 +440,13 @@ class BatchNormalizationTest(test.TestCase):
               use_gpu=True,
               data_format='NHWC',
               is_training=is_training)
-      self._test_gradient(
-          x_shape,
-          np.float32, [2],
-          np.float32,
-          use_gpu=False,
-          data_format='NHWC',
-          is_training=is_training)
+        self._test_gradient(
+            x_shape,
+            dtype, [2],
+            np.float32,
+            use_gpu=False,
+            data_format='NHWC',
+            is_training=is_training)
 
       x_shape = [1, 2, 1, 6]
       if test.is_gpu_available(cuda_only=True):
@@ -460,8 +460,8 @@ class BatchNormalizationTest(test.TestCase):
               is_training=is_training)
 
       x_shape = [5, 7, 11, 4]
-      if test.is_gpu_available(cuda_only=True):
-        for dtype in [np.float16, np.float32]:
+      for dtype in [np.float16, np.float32]:
+        if test.is_gpu_available(cuda_only=True):
           self._test_gradient(
               x_shape,
               dtype, [7],
@@ -476,13 +476,13 @@ class BatchNormalizationTest(test.TestCase):
               use_gpu=True,
               data_format='NHWC',
               is_training=is_training)
-      self._test_gradient(
-          x_shape,
-          np.float32, [4],
-          np.float32,
-          use_gpu=False,
-          data_format='NHWC',
-          is_training=is_training)
+        self._test_gradient(
+            x_shape,
+            dtype, [4],
+            np.float32,
+            use_gpu=False,
+            data_format='NHWC',
+            is_training=is_training)
 
   def _testBatchNormGradGrad(self, config):
     shape = config['shape']
@@ -506,15 +506,14 @@ class BatchNormalizationTest(test.TestCase):
             data_format='NCHW',
             is_training=is_training,
             err_tolerance=err_tolerance)
-      if dtype != np.float16:
-        self._test_grad_grad(
-            shape,
-            np.float32, [shape[3]],
-            np.float32,
-            use_gpu=False,
-            data_format='NHWC',
-            is_training=is_training,
-            err_tolerance=err_tolerance)
+      self._test_grad_grad(
+          shape,
+          dtype, [shape[3]],
+          np.float32,
+          use_gpu=False,
+          data_format='NHWC',
+          is_training=is_training,
+          err_tolerance=err_tolerance)
 
   def testBatchNormGradGrad(self):
     configs = [{
@@ -525,6 +524,10 @@ class BatchNormalizationTest(test.TestCase):
         'shape': [2, 3, 2, 2],
         'err_tolerance': 1e-3,
         'dtype': np.float32,
+    }, {
+        'shape': [2, 3, 4, 5],
+        'err_tolerance': 1e-2,
+        'dtype': np.float16,
     }, {
         'shape': [2, 3, 2, 2],
         'err_tolerance': 2e-3,
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 431ea1186a..654eb1c118 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -32,6 +32,8 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.util.deprecation import deprecated_args
+from tensorflow.python.util.deprecation import deprecated_argument_lookup
 
 
 def log_poisson_loss(targets, log_input, compute_full_loss=False, name=None):
@@ -313,30 +315,33 @@ def swish(features):
   return features * math_ops.sigmoid(features)
 
 
-def l2_normalize(x, dim, epsilon=1e-12, name=None):
-  """Normalizes along dimension `dim` using an L2 norm.
+@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
+  """Normalizes along dimension `axis` using an L2 norm.
 
-  For a 1-D tensor with `dim = 0`, computes
+  For a 1-D tensor with `axis = 0`, computes
 
       output = x / sqrt(max(sum(x**2), epsilon))
 
   For `x` with more dimensions, independently normalizes each 1-D slice along
-  dimension `dim`.
+  dimension `axis`.
 
   Args:
     x: A `Tensor`.
-    dim: Dimension along which to normalize.  A scalar or a vector of
+    axis: Dimension along which to normalize.  A scalar or a vector of
       integers.
     epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
       divisor if `norm < sqrt(epsilon)`.
     name: A name for this operation (optional).
+    dim: Deprecated alias for axis.
 
   Returns:
     A `Tensor` with the same shape as `x`.
   """
   with ops.name_scope(name, "l2_normalize", [x]) as name:
+    axis = deprecated_argument_lookup("axis", axis, "dim", dim)
     x = ops.convert_to_tensor(x, name="x")
-    square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True)
+    square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keep_dims=True)
     x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon))
     return math_ops.multiply(x, x_inv_norm, name=name)
 
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index bdaac65904..ec7b9372ca 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -40,6 +40,7 @@ from tensorflow.python.ops.gen_nn_ops import *
 
 from tensorflow.python.util import deprecation
 
+
 # Aliases for some automatically-generated names.
 local_response_normalization = gen_nn_ops.lrn
 
@@ -1645,52 +1646,62 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   return output
 
 
-def softmax(logits, dim=-1, name=None):
+@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+def softmax(logits, axis=None, name=None, dim=None):
   """Computes softmax activations.
 
   This function performs the equivalent of
 
-      softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), dim)
+      softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis)
 
   Args:
     logits: A non-empty `Tensor`. Must be one of the following types: `half`,
       `float32`, `float64`.
-    dim: The dimension softmax would be performed on. The default is -1 which
+    axis: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
+    dim: Deprecated alias for `axis`.
 
   Returns:
     A `Tensor`. Has the same type and shape as `logits`.
 
   Raises:
-    InvalidArgumentError: if `logits` is empty or `dim` is beyond the last
+    InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
       dimension of `logits`.
   """
-  return _softmax(logits, gen_nn_ops._softmax, dim, name)
+  axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim)
+  if axis is None:
+    axis = -1
+  return _softmax(logits, gen_nn_ops._softmax, axis, name)
 
 
-def log_softmax(logits, dim=-1, name=None):
+@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+def log_softmax(logits, axis=None, name=None, dim=None):
   """Computes log softmax activations.
 
   For each batch `i` and class `j` we have
 
-      logsoftmax = logits - log(reduce_sum(exp(logits), dim))
+      logsoftmax = logits - log(reduce_sum(exp(logits), axis))
 
   Args:
     logits: A non-empty `Tensor`. Must be one of the following types: `half`,
       `float32`, `float64`.
-    dim: The dimension softmax would be performed on. The default is -1 which
+    axis: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
+    dim: Deprecated alias for `axis`.
 
   Returns:
     A `Tensor`. Has the same type as `logits`. Same shape as `logits`.
 
   Raises:
-    InvalidArgumentError: if `logits` is empty or `dim` is beyond the last
+    InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
       dimension of `logits`.
   """
-  return _softmax(logits, gen_nn_ops._log_softmax, dim, name)
+  axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim)
+  if axis is None:
+    axis = -1
+  return _softmax(logits, gen_nn_ops._log_softmax, axis, name)
 
 
 def _ensure_xent_args(name, sentinel, labels, logits):
@@ -2305,6 +2316,103 @@ def conv1d(value, filters, stride, padding,
     return array_ops.squeeze(result, [spatial_start_dim])
 
 
+def conv1d_transpose(
+    value,
+    filter,  # pylint: disable=redefined-builtin
+    output_shape,
+    stride,
+    padding="SAME",
+    data_format="NWC",
+    name=None):
+  """The transpose of `conv1d`.
+
+  This operation is sometimes called "deconvolution" after [Deconvolutional
+  Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is
+  actually the transpose (gradient) of `conv1d` rather than an actual
+  deconvolution.
+
+  Args:
+    value: A 3-D `Tensor` of type `float` and shape
+      `[batch, in_width, in_channels]` for `NWC` data format or
+      `[batch, in_channels, in_width]` for `NCW` data format.
+    filter: A 3-D `Tensor` with the same type as `value` and shape
+      `[filter_width, output_channels, in_channels]`.  `filter`'s
+      `in_channels` dimension must match that of `value`.
+    output_shape: A 1-D `Tensor` representing the output shape of the
+      deconvolution op.
+    stride: An `integer`.  The number of entries by which
+      the filter is moved right at each step.
+    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
+      See the @{tf.nn.convolution$comment here}
+    data_format: A string. 'NHWC' and 'NCHW' are supported.
+    name: Optional name for the returned tensor.
+
+  Returns:
+    A `Tensor` with the same type as `value`.
+
+  Raises:
+    ValueError: If input/output depth does not match `filter`'s shape, or if
+      padding is other than `'VALID'` or `'SAME'`.
+  """
+  with ops.name_scope(name, "conv1d_transpose",
+                      [value, filter, output_shape]) as name:
+    output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape")
+    if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)):
+      raise ValueError("output_shape must have shape (3,), got {}".format(
+          output_shape_.get_shape()))
+
+    # The format could be either NWC or NCW, map to NHWC or NCHW
+    if data_format is None or data_format == "NWC":
+      data_format_2d = "NHWC"
+      axis = 2
+    elif data_format == "NCW":
+      data_format_2d = "NCHW"
+      axis = 1
+    else:
+      raise ValueError("data_format must be \"NWC\" or \"NCW\".")
+
+    if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[2]):
+      raise ValueError("input channels does not match filter's input channels, "
+                       "{} != {}".format(value.get_shape()[axis],
+                                         filter.get_shape()[2]))
+
+    if isinstance(output_shape, (list, np.ndarray)):
+      # output_shape's shape should be == [3] if reached this point.
+      if not filter.get_shape()[1].is_compatible_with(output_shape[axis]):
+        raise ValueError(
+            "output_shape does not match filter's output channels, "
+            "{} != {}".format(output_shape[axis],
+                              filter.get_shape()[1]))
+
+    if padding != "VALID" and padding != "SAME":
+      raise ValueError("padding must be either VALID or SAME:"
+                       " {}".format(padding))
+
+    # Reshape the input tensor to [batch, 1, in_width, in_channels]
+    if data_format_2d == "NHWC":
+      output_shape_ = array_ops.concat(
+          [output_shape_[:1], [1], output_shape_[1:]], axis=0)
+      spatial_start_dim = 1
+      strides = [1, 1, stride, 1]
+    else:
+      output_shape_ = array_ops.concat(
+          [output_shape_[:2], [1], output_shape_[2:]], axis=0)
+      spatial_start_dim = 2
+      strides = [1, 1, 1, stride]
+    value = array_ops.expand_dims(value, spatial_start_dim)
+    filter = array_ops.expand_dims(filter, 0)
+
+    result = gen_nn_ops.conv2d_backprop_input(
+        input_sizes=output_shape_,
+        filter=filter,
+        out_backprop=value,
+        strides=strides,
+        padding=padding,
+        data_format=data_format_2d,
+        name=name)
+    return array_ops.squeeze(result, [spatial_start_dim])
+
+
 @ops.RegisterStatistics("Dilation2D", "flops")
 def _calc_dilation2d_flops(graph, node):
   """Calculates the compute resources needed for Dilation2D."""
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index e9b1c67d16..a1e4305de1 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -1063,13 +1063,13 @@ class Variable(object):
 class PartitionedVariable(object):
   """A container for partitioned `Variable` objects.
 
-  @compatiblity(eager) `tf.PartitionedVariable` is not compatible with
+  @compatibility(eager) `tf.PartitionedVariable` is not compatible with
   eager execution.  Use `tfe.Variable` instead which is compatable
   with both eager execution and graph construction.  See [the
   TensorFlow Eager Execution
   guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers)
   for details on how variables work in eager execution.
-  @end_compatiblity
+  @end_compatibility
   """
 
   class PartitionedVariableIterator(object):
diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py
old mode 100644
new mode 100755
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 99bed86a17..d78362d4fb 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -232,7 +232,6 @@ CUDNN_DNN_ROUTINE_EACH_R3(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
   __macro(cudnnRNNBackwardData)                               \
   __macro(cudnnRNNBackwardWeights)                            \
   __macro(cudnnSetRNNDescriptor)                              \
-  __macro(cudnnSetRNNDescriptor_v6)                           \
   __macro(cudnnGetFilterNdDescriptor)
 
 // clang-format on
@@ -245,7 +244,8 @@ CUDNN_DNN_ROUTINE_EACH_R5(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
 // clang-format off
 #if CUDNN_VERSION >= 6000
 #define CUDNN_DNN_ROUTINE_EACH_R6(__macro)                    \
-  __macro(cudnnConvolutionBiasActivationForward)
+  __macro(cudnnConvolutionBiasActivationForward)              \
+  __macro(cudnnSetRNNDescriptor_v6)
 
 // clang-format on
 CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
@@ -665,7 +665,6 @@ class ScopedPoolingDescriptor {
       LOG(FATAL) << "could not create cudnn pooling descriptor: "
                  << ToString(status);
     }
-
     const std::vector<int64> strides64 = pooling_descriptor.strides();
     const std::vector<int64> padding64 = pooling_descriptor.padding();
     const std::vector<int64> shape64 = pooling_descriptor.window();
@@ -680,14 +679,14 @@ class ScopedPoolingDescriptor {
                    &CheckedNarrowing<int64, int>);
     std::transform(shape64.cbegin(), shape64.cend(), shape.begin(),
                    &CheckedNarrowing<int64, int>);
+    bool propagate_nans = pooling_descriptor.propagate_nans();
     status = wrap::cudnnSetPoolingNdDescriptor(
         parent_, handle_,
         (pooling_descriptor.mode() == dnn::PoolingMode::kMaximum
              ? CUDNN_POOLING_MAX
              : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING),
 #if CUDNN_VERSION >= 5000
-        // Always propagate nans.
-        CUDNN_PROPAGATE_NAN,
+        propagate_nans ? CUDNN_PROPAGATE_NAN : CUDNN_NOT_PROPAGATE_NAN,
 #endif
         nd, shape.data(), padding.data(), strides.data());
     if (status != CUDNN_STATUS_SUCCESS) {
diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc
index 07fe8a85f4..44144a0613 100644
--- a/tensorflow/stream_executor/dnn.cc
+++ b/tensorflow/stream_executor/dnn.cc
@@ -470,6 +470,7 @@ string ConvolutionDescriptor::ToShortString() const {
 PoolingDescriptor::PoolingDescriptor(int ndims)
     : mode_(dnn::PoolingMode::kMaximum),
       ndims_(ndims),
+      propagate_nans_(false),
       window_(ndims, 0),
       padding_(ndims, 0),
       strides_(ndims, 1) {}
@@ -482,6 +483,7 @@ void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) {
   window_ = other.window_;
   padding_ = other.padding_;
   strides_ = other.strides_;
+  propagate_nans_ = other.propagate_nans_;
 }
 
 string PoolingDescriptor::ToString() const {
@@ -495,9 +497,12 @@ string PoolingDescriptor::ToString() const {
     port::Appendf(&padding, "%lld", padding_[i]);
   }
 
-  return port::Printf("{mode: %s window: %s strides: %s padding: %s}",
-                      mode_string, window.c_str(), strides.c_str(),
-                      padding.c_str());
+  const char* propagate_string = propagate_nans_ ? "Yes" : "No";
+
+  return port::Printf(
+      "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}",
+      mode_string, window.c_str(), strides.c_str(), padding.c_str(),
+      propagate_string);
 }
 
 string PoolingDescriptor::ToShortString() const {
@@ -508,7 +513,8 @@ string PoolingDescriptor::ToShortString() const {
     port::Appendf(&padding, "_p%d:%lld", i, padding_[i]);
   }
   return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg",
-                      window, strides, padding);
+                      window, strides, padding,
+                      propagate_nans_ ? "propagate_nans" : "ignore_nans");
 }
 
 // -- NormalizeDescriptor
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 49235167ab..0d2cd4a9f2 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -661,6 +661,10 @@ class PoolingDescriptor {
     SetDim(&strides_, dim, value);
     return *this;
   }
+  PoolingDescriptor& set_propagate_nans(bool value) {
+    propagate_nans_ = value;
+    return *this;
+  }
 
   int ndims() const { return ndims_; }
   void CloneFrom(const PoolingDescriptor& other);
@@ -681,10 +685,12 @@ class PoolingDescriptor {
   std::vector<int64> window() const { return window_; }
   std::vector<int64> padding() const { return padding_; }
   std::vector<int64> strides() const { return strides_; }
+  bool propagate_nans() const { return propagate_nans_; }
 
  private:
   PoolingMode mode_;
   int ndims_;
+  bool propagate_nans_;
 
   // Stored as: ..., y, x.
   std::vector<int64> window_;
diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
index 9fd38a29b7..62e634afb8 100644
--- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
@@ -94,7 +94,7 @@ tf_module {
   }
   member_method {
     name: "norm"
-    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], "
+    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "qr"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
index 24c0448dea..ebd9c079b5 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
@@ -170,7 +170,7 @@ tf_module {
   }
   member_method {
     name: "l2_normalize"
-    argspec: "args=[\'x\', \'dim\', \'epsilon\', \'name\'], varargs=None, keywords=None, defaults=[\'1e-12\', \'None\'], "
+    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
   }
   member_method {
     name: "leaky_relu"
@@ -190,7 +190,7 @@ tf_module {
   }
   member_method {
     name: "log_softmax"
-    argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "log_uniform_candidate_sampler"
@@ -282,7 +282,7 @@ tf_module {
   }
   member_method {
     name: "softmax"
-    argspec: "args=[\'logits\', \'dim\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "softmax_cross_entropy_with_logits"
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index bf7bc6a7c1..0edd4153d7 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -750,7 +750,7 @@ tf_module {
   }
   member_method {
     name: "boolean_mask"
-    argspec: "args=[\'tensor\', \'mask\', \'name\'], varargs=None, keywords=None, defaults=[\'boolean_mask\'], "
+    argspec: "args=[\'tensor\', \'mask\', \'name\', \'axis\'], varargs=None, keywords=None, defaults=[\'boolean_mask\', \'None\'], "
   }
   member_method {
     name: "broadcast_dynamic_shape"
@@ -858,7 +858,7 @@ tf_module {
   }
   member_method {
     name: "count_nonzero"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'dtype\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \"<dtype: \'int64\'>\", \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'int64\'>\", \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "count_up_to"
@@ -1414,7 +1414,7 @@ tf_module {
   }
   member_method {
     name: "norm"
-    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], "
+    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "not_equal"
@@ -1546,11 +1546,11 @@ tf_module {
   }
   member_method {
     name: "reduce_all"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_any"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_join"
@@ -1558,27 +1558,27 @@ tf_module {
   }
   member_method {
     name: "reduce_logsumexp"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_max"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_mean"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_min"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_prod"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "reduce_sum"
-    argspec: "args=[\'input_tensor\', \'axis\', \'keep_dims\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "register_tensor_conversion_function"
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 5f791d7bc7..c27f4953e3 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -165,7 +165,7 @@ else
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:embedding_lookup_sparse_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:fully_connected_test"
-  BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test"
+  # BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/testing:generated_examples_zip_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:hashtable_lookup_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:local_response_norm_test"
   BAZEL_TARGET="${BAZEL_TARGET} //tensorflow/contrib/lite/kernels:lsh_projection_test"
diff --git a/tensorflow/tools/ci_build/install/install_golang.sh b/tensorflow/tools/ci_build/install/install_golang.sh
index 55c1674495..e1edd62cc5 100755
--- a/tensorflow/tools/ci_build/install/install_golang.sh
+++ b/tensorflow/tools/ci_build/install/install_golang.sh
@@ -16,7 +16,7 @@
 
 set -ex
 
-GOLANG_URL="https://storage.googleapis.com/golang/go1.9.1.linux-amd64.tar.gz"
+GOLANG_URL="https://storage.googleapis.com/golang/go1.9.2.linux-amd64.tar.gz"
 
 sudo mkdir -p /usr/local
 wget -q -O - "${GOLANG_URL}" | sudo tar -C /usr/local -xz
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
index dcda8228bc..e5d8303c6e 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
@@ -48,6 +48,6 @@ ${DOCKER_BINARY} run \
   -e "TF_NEED_GCP=0" \
   -e "TF_NEED_HDFS=0" \
   -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \
-  -e "TF_NEED_OPENCL=0" \
+  -e "TF_NEED_OPENCL_SYCL=0" \
   "${DOCKER_IMAGE}" \
   "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh"
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
index d90a1b905d..e1b56b9a25 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
@@ -27,7 +27,7 @@ export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_GCP=0
 export TF_NEED_HDFS=0
 export TF_NEED_CUDA=0
-export TF_NEED_OPENCL=0
+export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
 
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
index 79973647c1..5a901af3e5 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
@@ -28,7 +28,7 @@ export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${L
 export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_GCP=0
 export TF_NEED_HDFS=0
-export TF_NEED_OPENCL=0
+export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
 
diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 5244898c40..88116d9f24 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -75,17 +75,23 @@ if [[ $1 == "PI_ONE" ]]; then
   PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp
   --copt=-DUSE_GEMM_FOR_CONV --copt=-DUSE_OPENBLAS
   --copt=-isystem --copt=${OPENBLAS_INSTALL_PATH}/include/
+  --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
   --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/
   --linkopt=-l:libopenblas.a"
   echo "Building for the Pi One/Zero, with no NEON support"
 else
   PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4
+  --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8'
   echo "Building for the Pi Two/Three, with NEON acceleration"
 fi
 
+# We need to pass down the environment variable with a possible alternate Python
+# include path for Python 3.x builds to work.
+export CROSSTOOL_PYTHON_INCLUDE_PATH
+
 cd ${WORKSPACE_PATH}
 bazel build -c opt ${PI_COPTS} \
   --config=monolithic \
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 924ab1a4ae..44b6d52952 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -117,7 +117,7 @@ function run_configure_for_cpu_build {
   export TF_NEED_VERBS=0
   export TF_NEED_GCP=0
   export TF_NEED_HDFS=0
-  export TF_NEED_OPENCL=0
+  export TF_NEED_OPENCL_SYCL=0
   echo "" | ./configure
 }
 
@@ -141,7 +141,7 @@ function run_configure_for_gpu_build {
   export TF_NEED_MKL=0
   export TF_NEED_GCP=0
   export TF_NEED_HDFS=0
-  export TF_NEED_OPENCL=0
+  export TF_NEED_OPENCL_SYCL=0
 
   # TODO(pcloudy): Remove this after TensorFlow uses its own CRSOOTOOL
   # for GPU build on Windows
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
index 64ebc4607a..9bcc3925a8 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
@@ -101,12 +101,11 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib
                 --jobs=${TF_AVAILABLE_CPUS} \
                 tensorflow/tools/pip_package:build_pip_package && \
     mkdir /pip_pkg && \
-    bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg
-
-# Clean up pip wheel and Bazel cache when done.
-RUN pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \
+    bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \
+    pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \
     rm -rf /pip_pkg && \
     rm -rf /root/.cache
+# Clean up pip wheel and Bazel cache when done.
 
 WORKDIR /root
 
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index 0571dd7391..e212d10290 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
+FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md
index 2e5a0038ed..e35c58ff80 100644
--- a/tensorflow/tools/docker/README.md
+++ b/tensorflow/tools/docker/README.md
@@ -60,6 +60,20 @@ Building TensorFlow Docker containers should be done through the
 script. The raw Dockerfiles should not be used directly as they contain strings
 to be replaced by the script during the build.
 
+Attempting to run [parameterized_docker_build.sh](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docker/parameterized_docker_build.sh)
+from a binary docker image such as for example `tensorflow/tensorflow:latest` will
+not work. One needs to execute the script from a developer docker image since by
+contrast with a binary docker image it contains not only the compiled solution but
+also the tensorflow source code. Please select the appropriate developer docker
+image of tensorflow at `tensorflow/tensorflow:[.](https://hub.docker.com/r/tensorflow/tensorflow/tags/)`.
+
+The smallest command line to generate a docker image will then be:
+```docker run -it tensorflow/tensorflow:"right_tag"```
+
+If you would like to start a jupyter notebook on your docker container, make sure
+to map the port 8888 of your docker container by adding -p 8888:8888 to the above
+command.
+
 To use the script, specify the container type (`CPU` vs. `GPU`), the desired
 Python version (`PYTHON2` vs. `PYTHON3`) and whether the developer Docker image
 is to be built (`NO` vs. `YES`). In addition, you need to specify the central
diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD
index 1bf7113c9e..9216008600 100644
--- a/tensorflow/tools/graph_transforms/BUILD
+++ b/tensorflow/tools/graph_transforms/BUILD
@@ -131,6 +131,8 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
+        "//tensorflow/contrib/rnn:gru_ops_op_lib",
+        "//tensorflow/contrib/rnn:lstm_ops_op_lib",
     ] + if_not_windows([
         "//tensorflow/core/kernels:quantized_ops",
         "//tensorflow/core/kernels:remote_fused_graph_rewriter_transform",
diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc
index 2b85e7e83c..97e8f77616 100644
--- a/tensorflow/tools/graph_transforms/quantize_nodes.cc
+++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc
@@ -759,6 +759,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
           NodeDef reshape_dims;
           reshape_dims.set_op("Const");
           reshape_dims.set_name(unique_input_name + "/reshape_dims");
+          AddNodeInput("^" + input_name, &reshape_dims);
           SetNodeAttr("dtype", DT_INT32, &reshape_dims);
           Tensor reshape_dims_tensor(DT_INT32, {1});
           reshape_dims_tensor.flat<int32>()(0) = -1;
@@ -768,6 +769,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
           NodeDef reduction_dims;
           reduction_dims.set_op("Const");
           reduction_dims.set_name(unique_input_name + "/reduction_dims");
+          AddNodeInput("^" + input_name, &reduction_dims);
           SetNodeAttr("dtype", DT_INT32, &reduction_dims);
           Tensor reduction_dims_tensor(DT_INT32, {1});
           reduction_dims_tensor.flat<int32>()(0) = 0;
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 60282f6aa3..a493c6f2aa 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -29,7 +29,7 @@ from setuptools.dist import Distribution
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.4.0-rc1'
+_VERSION = '1.4.0'
 
 REQUIRED_PACKAGES = [
     'absl-py',
diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD
index bc6a2fd8cc..bc9e37ffb3 100644
--- a/third_party/aws.BUILD
+++ b/third_party/aws.BUILD
@@ -21,6 +21,9 @@ cc_library(
         "@%ws%//tensorflow:linux_ppc64le": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
+        "@%ws%//tensorflow:raspberry_pi_armeabi": glob([
+            "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
+        ]),
         "//conditions:default": [],
     }) + glob([
         "aws-cpp-sdk-core/include/**/*.h",
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index 882967df1c..805a30d262 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -477,7 +477,6 @@ genrule(
         "#  define HAVE_RAND_EGD 1",
         "#  define HAVE_RAND_STATUS 1",
         "#  define HAVE_SSL_GET_SHUTDOWN 1",
-        "#  define HAVE_STROPTS_H 1",
         "#  define HAVE_TERMIOS_H 1",
         "#  define OS \"x86_64-pc-linux-gnu\"",
         "#  define RANDOM_FILE \"/dev/urandom\"",
diff --git a/third_party/sycl/crosstool/CROSSTOOL.tpl b/third_party/sycl/crosstool/CROSSTOOL.tpl
index 32884d71e7..f8e50efcc6 100755
--- a/third_party/sycl/crosstool/CROSSTOOL.tpl
+++ b/third_party/sycl/crosstool/CROSSTOOL.tpl
@@ -35,10 +35,10 @@ toolchain {
   tool_path { name: "compat-ld" path: "/usr/bin/ld" }
   tool_path { name: "cpp" path: "/usr/bin/cpp" }
   tool_path { name: "dwp" path: "/usr/bin/dwp" }
-  tool_path { name: "gcc" path: "computecpp" }
+  tool_path { name: "gcc" path: "%{sycl_impl}" }
   # Use "-std=c++11" for nvcc. For consistency, force both the host compiler
   # and the device compiler to use "-std=c++11".
-  cxx_flag: "-std=c++11"
+  cxx_flag: "%{c++_std}"
   linker_flag: "-Wl,-no-as-needed"
   linker_flag: "-lstdc++"
   linker_flag: "-B/usr/bin/"
@@ -53,7 +53,7 @@ toolchain {
   cxx_builtin_include_directory: "/usr/local/include"
   cxx_builtin_include_directory: "/usr/include"
 
-  cxx_builtin_include_directory: "%{computecpp_toolkit_path}"
+  cxx_builtin_include_directory: "%{sycl_include_dir}"
   cxx_builtin_include_directory: "%{python_lib_path}"
 
   tool_path { name: "gcov" path: "/usr/bin/gcov" }
@@ -214,4 +214,4 @@ toolchain {
     compiler_flag: "-O2"
     compiler_flag: "-DNDEBUG"
   }
-}
+}
\ No newline at end of file
diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl
new file mode 100644
index 0000000000..87a70d8f95
--- /dev/null
+++ b/third_party/sycl/crosstool/trisycl.tpl
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import tempfile
+from subprocess import call
+
+CPU_CXX_COMPILER = ('%{host_cxx_compiler}')
+CPU_C_COMPILER = ('%{host_c_compiler}')
+
+CURRENT_DIR = os.path.dirname(sys.argv[0])
+TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include'
+
+
+def main():
+  compiler_flags = []
+
+  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable',
+                  '-Wignored-attributes', '-fno-exceptions')
+  # remove -fsamotoze-coverage from string with g++
+  if 'g++' in CPU_CXX_COMPILER:
+    remove_flags += ('-fsanitize-coverage',)
+    compiler_flags += ['-fopenmp']
+  else:
+    compiler_flags += ['-fopenmp=libomp']
+
+  compiler_flags += [
+      flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)
+  ]
+
+  output_file_index = compiler_flags.index('-o') + 1
+  output_file_name = compiler_flags[output_file_index]
+
+  if (output_file_index == 1):
+    # we are linking
+    return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined'])
+
+  # find what we compile
+  compiling_cpp = 0
+  if ('-c' in compiler_flags):
+    compiled_file_index = compiler_flags.index('-c') + 1
+    compiled_file_name = compiler_flags[compiled_file_index]
+    if (compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C',
+                                     '.cxx'))):
+      compiling_cpp = 1
+
+  debug_flags = [
+      '-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL',
+      '-lpthread', '-lboost_log', '-g', '-rdynamic'
+  ]
+
+  opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3']
+
+  compiler_flags = compiler_flags + [
+      '-DEIGEN_USE_SYCL=1', '-DEIGEN_HAS_C99_MATH',
+      '-DEIGEN_MAX_ALIGN_BYTES=16', '-DTENSORFLOW_USE_SYCL'
+  ] + opt_flags
+
+  if (compiling_cpp == 1):
+    # create a blacklist of folders that will be skipped when compiling
+    # with triSYCL
+    skip_extensions = ['.cu.cc']
+    skip_folders = [
+        'tensorflow/compiler', 'tensorflow/docs_src', 'tensorflow/tensorboard',
+        'third_party', 'external', 'hexagon'
+    ]
+    skip_folders = [(folder + '/') for folder in skip_folders]
+    # if compiling external project skip triSYCL
+    if any(
+        compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(
+            _folder in output_file_name for _folder in skip_folders):
+      return call([CPU_CXX_COMPILER] + compiler_flags)
+
+    host_compiler_flags = [
+        '-xc++', '-Wno-unused-variable', '-I', TRISYCL_INCLUDE_DIR
+    ] + compiler_flags
+    x = call([CPU_CXX_COMPILER] + host_compiler_flags)
+    return x
+  else:
+    # compile for C
+    return call([CPU_C_COMPILER] + compiler_flags)
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl
index 6cad190630..b6ceaadda7 100755
--- a/third_party/sycl/sycl/BUILD.tpl
+++ b/third_party/sycl/sycl/BUILD.tpl
@@ -10,16 +10,27 @@ package(default_visibility = ["//visibility:public"])
 exports_files(["LICENSE.text"])
 
 config_setting(
-    name = "using_sycl",
-    values = {
-        "define": "using_sycl=true",
+    name = "using_sycl_ccpp",
+    define_values = {
+        "using_sycl": "true",
+        "using_trisycl": "false",
     },
 )
 
+config_setting(
+    name = "using_sycl_trisycl",
+    define_values = {
+        "using_sycl": "true",
+        "using_trisycl": "false",
+    },
+)
+
+
 cc_library(
     name = "sycl_headers",
     hdrs = glob([
         "**/*.h",
+        "**/*.hpp",
     ]),
     includes = [".", "include"],
 )
diff --git a/third_party/sycl/sycl/build_defs.bzl.tpl b/third_party/sycl/sycl/build_defs.bzl.tpl
index 09bef0a661..33386f8957 100755
--- a/third_party/sycl/sycl/build_defs.bzl.tpl
+++ b/third_party/sycl/sycl/build_defs.bzl.tpl
@@ -5,9 +5,24 @@ def if_sycl(if_true, if_false = []):
 
     Returns a select statement which evaluates to if_true if we're building
     with SYCL enabled.  Otherwise, the select statement evaluates to if_false.
+    If we are building with triSYCL instead of ComputeCPP, a list with
+    the first element of if_true is returned.
+    """
+    return select({
+        "@local_config_sycl//sycl:using_sycl_ccpp": if_true,
+        "@local_config_sycl//sycl:using_sycl_trisycl": if_true[0:1],
+        "//conditions:default": if_false
+    })
+
+def if_ccpp(if_true, if_false = []):
+    """Shorthand for select()'ing if we are building with ComputeCPP.
 
+    Returns a select statement which evaluates to if_true if we're building
+    with ComputeCPP enabled. Otherwise, the select statement evaluates
+    to if_false.
     """
     return select({
-        "@local_config_sycl//sycl:using_sycl": if_true,
+        "@local_config_sycl//sycl:using_sycl_ccpp": if_true,
+        "@local_config_sycl//sycl:using_sycl_trisycl": if_false,
         "//conditions:default": if_false
     })
diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl
index 7af063178e..5b9d0eb383 100644
--- a/third_party/sycl/sycl_configure.bzl
+++ b/third_party/sycl/sycl_configure.bzl
@@ -5,20 +5,26 @@
   * HOST_CXX_COMPILER:  The host C++ compiler
   * HOST_C_COMPILER:    The host C compiler
   * COMPUTECPP_TOOLKIT_PATH: The path to the ComputeCpp toolkit.
+  * TRISYCL_INCLUDE_DIR: The path to the include directory of triSYCL.
+                         (if using triSYCL instead of ComputeCPP)
   * PYTHON_LIB_PATH: The path to the python lib
 """
 
 _HOST_CXX_COMPILER = "HOST_CXX_COMPILER"
 _HOST_C_COMPILER= "HOST_C_COMPILER"
 _COMPUTECPP_TOOLKIT_PATH = "COMPUTECPP_TOOLKIT_PATH"
+_TRISYCL_INCLUDE_DIR = "TRISYCL_INCLUDE_DIR"
 _PYTHON_LIB_PATH = "PYTHON_LIB_PATH"
 
 def _enable_sycl(repository_ctx):
-  if "TF_NEED_OPENCL" in repository_ctx.os.environ:
-    enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL"].strip()
+  if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ:
+    enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip()
     return enable_sycl == "1"
   return False
 
+def _enable_compute_cpp(repository_ctx):
+  return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ
+
 def auto_configure_fail(msg):
   """Output failure message when auto configuration fails."""
   red = "\033[0;31m"
@@ -59,6 +65,14 @@ def find_computecpp_root(repository_ctx):
     return sycl_name
   fail("Cannot find SYCL compiler, please correct your path")
 
+def find_trisycl_include_dir(repository_ctx):
+  """Find triSYCL include directory. """
+  if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ:
+    sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip()
+    if sycl_name.startswith("/"):
+      return sycl_name
+  fail( "Cannot find triSYCL include directory, please correct your path")
+
 def find_python_lib(repository_ctx):
   """Returns python path."""
   if _PYTHON_LIB_PATH in repository_ctx.os.environ:
@@ -171,26 +185,53 @@ def _sycl_autoconf_imp(repository_ctx):
     _tpl(repository_ctx, "sycl:platform.bzl")
     _tpl(repository_ctx, "crosstool:BUILD")
     _file(repository_ctx, "sycl:LICENSE.text")
-    _tpl(repository_ctx, "crosstool:computecpp",
-    {
-      "%{host_cxx_compiler}" : find_cc(repository_ctx),
-      "%{host_c_compiler}" : find_c(repository_ctx),
-    })
-
-    computecpp_root = find_computecpp_root(repository_ctx)
-    _check_dir(repository_ctx, computecpp_root)
-
-    _tpl(repository_ctx, "crosstool:CROSSTOOL",
-    {
-      "%{computecpp_toolkit_path}" : computecpp_root,
-      "%{python_lib_path}" : find_python_lib(repository_ctx),
-    })
-
-    # symlink libraries
-    _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" )
-    _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib")
-    _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include")
-    _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin")
+
+    if _enable_compute_cpp(repository_ctx):
+      _tpl(repository_ctx, "crosstool:computecpp",
+      {
+        "%{host_cxx_compiler}" : find_cc(repository_ctx),
+        "%{host_c_compiler}" : find_c(repository_ctx)
+      })
+
+      computecpp_root = find_computecpp_root(repository_ctx);
+      _check_dir(repository_ctx, computecpp_root)
+
+      _tpl(repository_ctx, "crosstool:CROSSTOOL",
+      {
+        "%{sycl_include_dir}" : computecpp_root,
+        "%{sycl_impl}" : "computecpp",
+        "%{c++_std}" : "-std=c++11",
+        "%{python_lib_path}" : find_python_lib(repository_ctx),
+      })
+
+      # symlink libraries
+      _check_lib(repository_ctx, computecpp_root+"/lib", "libComputeCpp.so" )
+      _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib")
+      _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include")
+      _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin")
+    else:
+
+      trisycl_include_dir = find_trisycl_include_dir(repository_ctx);
+      _check_dir(repository_ctx, trisycl_include_dir)
+
+      _tpl(repository_ctx, "crosstool:trisycl",
+      {
+        "%{host_cxx_compiler}" : find_cc(repository_ctx),
+        "%{host_c_compiler}" : find_c(repository_ctx),
+        "%{trisycl_include_dir}" : trisycl_include_dir
+      })
+
+
+      _tpl(repository_ctx, "crosstool:CROSSTOOL",
+      {
+        "%{sycl_include_dir}" : trisycl_include_dir,
+        "%{sycl_impl}" : "trisycl",
+        "%{c++_std}" : "-std=c++1y",
+        "%{python_lib_path}" : find_python_lib(repository_ctx),
+      })
+
+      _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include")
+
 
 sycl_configure = repository_rule(
   implementation = _sycl_autoconf_imp,
diff --git a/third_party/zlib.BUILD b/third_party/zlib.BUILD
index 8509668891..d164ee719c 100644
--- a/third_party/zlib.BUILD
+++ b/third_party/zlib.BUILD
@@ -49,7 +49,7 @@ cc_library(
         ":windows_msvc": [],
         "//conditions:default": [
             "-Wno-shift-negative-value",
-            "-Wno-implicit-function-declaration",
+            "-DZ_HAVE_UNISTD_H",
         ],
     }),
     includes = ["."],
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 2d7201ae57..04c24d7511 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -9,13 +9,16 @@ build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true
 build:mkl --define=using_mkl=true
 
 build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain
-build:sycl --define=using_sycl=true
+build:sycl --define=using_sycl=true --define=using_trisycl=false
 
 build:sycl_nodouble --crosstool_top=@local_config_sycl//crosstool:toolchain
 build:sycl_nodouble --define=using_sycl=true --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE
 
 build:sycl_asan --crosstool_top=@local_config_sycl//crosstool:toolchain
-build:sycl_asan --define=using_sycl=true --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address
+build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address
+
+build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain
+build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true
 
 build --define=use_fast_cpp_protos=true
 build --define=allow_oversize_protos=true
diff --git a/util/python/BUILD b/util/python/BUILD
index 96daf9947a..f5fa0c6d29 100644
--- a/util/python/BUILD
+++ b/util/python/BUILD
@@ -1,4 +1,4 @@
-licenses(["restricted"])
+licenses(["notice"])  # New BSD, Python Software Foundation
 
 package(default_visibility = ["//visibility:public"])
 
-- 
GitLab


From 0927a5da0ab74b7ba23e3d575e5570666c3ef5ef Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 22 Nov 2017 13:53:18 -0800
Subject: [PATCH 0255/1225] Add no_pip tests to new
 contrib/data/python/kernel_tests targets that uses test only dep.

---
 tensorflow/contrib/data/python/kernel_tests/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index c443c7f61a..9d49750c80 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -159,6 +159,7 @@ py_test(
     size = "small",
     srcs = ["flat_map_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -275,6 +276,7 @@ py_test(
     size = "small",
     srcs = ["map_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
-- 
GitLab


From 9d56475bac690b7a5746e2443a3c157779342d03 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 13:58:11 -0800
Subject: [PATCH 0256/1225] Add tensor pool feature to tf.contrib.gan

PiperOrigin-RevId: 176697680
---
 tensorflow/contrib/gan/BUILD                  |  32 +++++
 .../gan/python/features/python/tensor_pool.py |  35 ++++++
 .../features/python/tensor_pool_impl.py       | 118 ++++++++++++++++++
 .../features/python/tensor_pool_test.py       |  94 ++++++++++++++
 4 files changed, 279 insertions(+)
 create mode 100644 tensorflow/contrib/gan/python/features/python/tensor_pool.py
 create mode 100644 tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py
 create mode 100644 tensorflow/contrib/gan/python/features/python/tensor_pool_test.py

diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD
index 1418c87023..abe4665caa 100644
--- a/tensorflow/contrib/gan/BUILD
+++ b/tensorflow/contrib/gan/BUILD
@@ -116,6 +116,7 @@ py_library(
     deps = [
         ":clip_weights",
         ":conditioning_utils",
+        ":tensor_pool",
         ":virtual_batchnorm",
         "//tensorflow/python:util",
     ],
@@ -219,6 +220,37 @@ py_test(
     ],
 )
 
+py_library(
+    name = "tensor_pool",
+    srcs = [
+        "python/features/python/tensor_pool.py",
+        "python/features/python/tensor_pool_impl.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:data_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:util",
+    ],
+)
+
+py_test(
+    name = "tensor_pool_test",
+    srcs = ["python/features/python/tensor_pool_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":tensor_pool",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_library(
     name = "virtual_batchnorm",
     srcs = [
diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool.py b/tensorflow/contrib/gan/python/features/python/tensor_pool.py
new file mode 100644
index 0000000000..0bd2fa3db9
--- /dev/null
+++ b/tensorflow/contrib/gan/python/features/python/tensor_pool.py
@@ -0,0 +1,35 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A tensor pool stores values from an input tensor and returns a stored one.
+
+See the following papers for more details.
+1) `Learning from simulated and unsupervised images through adversarial
+    training` (https://arxiv.org/abs/1612.07828).
+2) `Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial
+    Networks` (https://arxiv.org/abs/1703.10593).
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.gan.python.features.python import tensor_pool_impl
+# pylint: disable=wildcard-import
+from tensorflow.contrib.gan.python.features.python.tensor_pool_impl import *
+# pylint: enable=wildcard-import
+from tensorflow.python.util.all_util import remove_undocumented
+
+__all__ = tensor_pool_impl.__all__
+remove_undocumented(__name__, __all__)
diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py b/tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py
new file mode 100644
index 0000000000..79318a69d2
--- /dev/null
+++ b/tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py
@@ -0,0 +1,118 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A tensor pool stores values from an input tensor and returns a stored one.
+
+We use this to keep a history of values created by a generator, such that
+a discriminator can randomly be trained on some older samples, not just the
+current one. This can help to not let the discriminator get too far ahead of the
+generator and also to keep the system from oscilating, if the discriminator
+forgets too fast what past samples from the generator looked like.
+
+See the following papers for more details.
+1) `Learning from simulated and unsupervised images through adversarial
+    training` (https://arxiv.org/abs/1612.07828).
+2) `Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial
+    Networks` (https://arxiv.org/abs/1703.10593).
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import random_ops
+
+__all__ = [
+    'tensor_pool',
+]
+
+
+def tensor_pool(input_value,
+                pool_size,
+                pooling_probability=0.5,
+                name='tensor_pool'):
+  """Queue storing input values and returning random previously stored ones.
+
+  Every time the returned `output_value` is evaluated, `input_value` is
+  evaluated and its value either directly returned (with
+  `1-pooling_probability`) or stored in the pool and a random one of the samples
+  currently in the pool is popped and returned. As long as the pool in not fully
+  filled, the input_value is always directly returned, as well as stored in the
+  pool. Note during inference / testing, it may be appropriate to set
+  `pool_size` = 0 or `pooling_probability` = 0.
+
+  Args:
+    input_value: A `Tensor` from which to read values to be pooled.
+    pool_size: An integer specifying the maximum size of the pool.
+    pooling_probability: A float `Tensor` specifying the probability of getting
+      a value from the pool, as opposed to just the current input.
+    name: A string prefix for the name scope for all tensorflow ops.
+
+  Returns:
+    A `Tensor` which is with given probability either the `input_value` or a
+    randomly chosen sample that was previously inserted in the pool.
+
+  Raises:
+    ValueError: If `pool_size` is negative.
+  """
+  pool_size = int(pool_size)
+  if pool_size < 0:
+    raise ValueError('`pool_size` is negative.')
+  elif pool_size == 0:
+    return input_value
+
+  with ops.name_scope('{}_pool_queue'.format(name),
+                      values=[input_value, pooling_probability]):
+    pool_queue = data_flow_ops.RandomShuffleQueue(
+        capacity=pool_size,
+        min_after_dequeue=0,
+        dtypes=[input_value.dtype],
+        shapes=None)
+
+    # In pseudeo code this code does the following:
+    # if not pool_full:
+    #   enqueue(input_value)
+    #   return input_value
+    # else
+    #   dequeue_value = dequeue_random_sample()
+    #   enqueue(input_value)
+    #   if rand() < pooling_probability:
+    #     return dequeue_value
+    #   else
+    #     return input_value
+
+    def _get_input_value_pooled():
+      enqueue_op = pool_queue.enqueue(input_value)
+      with ops.control_dependencies([enqueue_op]):
+        return array_ops.identity(input_value)
+
+    def _get_random_pool_value_and_enqueue_input():
+      dequeue_value = pool_queue.dequeue()
+      with ops.control_dependencies([dequeue_value]):
+        enqueue_op = pool_queue.enqueue(input_value)
+        with ops.control_dependencies([enqueue_op]):
+          prob = random_ops.random_uniform(
+              (), dtype=dtypes.float32) < pooling_probability
+          return control_flow_ops.cond(prob, lambda: dequeue_value,
+                                       lambda: input_value)
+
+    output_value = control_flow_ops.cond(
+        pool_queue.size() < pool_size, _get_input_value_pooled,
+        _get_random_pool_value_and_enqueue_input)
+
+  return output_value
diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool_test.py b/tensorflow/contrib/gan/python/features/python/tensor_pool_test.py
new file mode 100644
index 0000000000..49b77bb3fc
--- /dev/null
+++ b/tensorflow/contrib/gan/python/features/python/tensor_pool_test.py
@@ -0,0 +1,94 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tf.contrib.gan.python.features.tensor_pool."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.gan.python.features.python import tensor_pool_impl as tensor_pool
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class TensorPoolTest(test.TestCase):
+
+  def test_pool_unknown_input_shape(self):
+    """Checks that `input_value` can have unknown shape."""
+    input_value = array_ops.placeholder(
+        dtype=dtypes.int32, shape=[None, None, 3])
+    output_value = tensor_pool.tensor_pool(input_value, pool_size=10)
+
+    with self.test_session(use_gpu=True) as session:
+      for i in range(10):
+        session.run(output_value, {input_value: [[[i] * 3]]})
+        session.run(output_value, {input_value: [[[i] * 3] * 2]})
+        session.run(output_value, {input_value: [[[i] * 3] * 5] * 2})
+
+  def test_pool_sequence(self):
+    """Checks that values are pooled and returned maximally twice."""
+    input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[])
+    output_value = tensor_pool.tensor_pool(input_value, pool_size=10)
+
+    with self.test_session(use_gpu=True) as session:
+      outs = []
+      for i in range(50):
+        out = session.run(output_value, {input_value: i})
+        outs.append(out)
+        self.assertLessEqual(out, i)
+
+      _, counts = np.unique(outs, return_counts=True)
+      # Check that each value is returned maximally twice.
+      self.assertTrue((counts <= 2).all())
+
+  def test_never_pool(self):
+    """Checks that setting `pooling_probability` to zero works."""
+    input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[])
+    output_value = tensor_pool.tensor_pool(
+        input_value, pool_size=10, pooling_probability=0.0)
+
+    with self.test_session(use_gpu=True) as session:
+      for i in range(50):
+        out = session.run(output_value, {input_value: i})
+        self.assertEqual(out, i)
+
+  def test_pooling_probability(self):
+    """Checks that `pooling_probability` works."""
+    input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[])
+    pool_size = 10
+    pooling_probability = 0.2
+    output_value = tensor_pool.tensor_pool(
+        input_value,
+        pool_size=pool_size,
+        pooling_probability=pooling_probability)
+
+    with self.test_session(use_gpu=True) as session:
+      not_pooled = 0
+      total = 1000
+      for i in range(total):
+        out = session.run(output_value, {input_value: i})
+        if out == i:
+          not_pooled += 1
+      self.assertAllClose(
+          (not_pooled - pool_size) / (total - pool_size),
+          1 - pooling_probability,
+          atol=0.03)
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From 87f7aa9b1da2614b225e3e457aed10485f05297a Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 22 Nov 2017 13:58:31 -0800
Subject: [PATCH 0257/1225] [tf.data] Allow the DeserializeSparse op to accept
 inconsistent dense shapes.

This changes DeserializeSparse to match the behavior of DeserializeSparseMany
and TakeManySparseFromTensorsMap, and thus makes `Dataset.batch()` on sparse
tensors match the existing behavior of `tf.train.batch()` and family.

The rationale for this change is that the source of many `tf.SparseTensor`
objects is `tf.parse[_single]_example()`, and that operation does not try
to ensure that consecutive `SparseTensor` objects parsed from the same
feature specification have the same `dense_shape`. As a result, the behavior
of existing ops that batch `SparseTensor` objects has been to silently pad
those objects to the bounding dense_shape, by taking the maximum over each
dimension size. While this does reduce our ability to make consistency checks
in the `SparseTensor`-handling code, pragmatically we never get consistently
shaped `SparseTensor`s in real programs, so this seems like a reasonable path
for usability.

PiperOrigin-RevId: 176697720
---
 .../core/kernels/serialize_sparse_op.cc       | 16 +++++----
 .../kernel_tests/batch_dataset_op_test.py     | 33 +++++++++++++++++++
 .../sparse_serialization_ops_test.py          | 14 +++++---
 3 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/tensorflow/core/kernels/serialize_sparse_op.cc b/tensorflow/core/kernels/serialize_sparse_op.cc
index 161c505e84..cfb86904d5 100644
--- a/tensorflow/core/kernels/serialize_sparse_op.cc
+++ b/tensorflow/core/kernels/serialize_sparse_op.cc
@@ -352,13 +352,15 @@ class DeserializeSparseOp : public OpKernel {
                 i, "] was: ", shape.dims() - 1, " but rank of SparseTensor[", i,
                 "] is: ", expanded_tensor_shape.dims() - 1));
         for (int j = 1; j < shape.dims(); ++j) {
-          OP_REQUIRES(
-              context, shape.dim_size(j) == expanded_tensor_shape.dim_size(j),
-              errors::InvalidArgument(
-                  "Inconsistent shape across SparseTensors: dimension ", j - 1,
-                  " prior to SparseTensor[", i, "] was: ", shape.dim_size(j),
-                  " but rank of SparseTensor[", i,
-                  "] is: ", expanded_tensor_shape.dim_size(j)));
+          // NOTE(mrry): For compatibility with the implementations of
+          // DeserializeManySparse, and many ops that generate
+          // SparseTensors to batch that do not have a fixed
+          // dense_shape (e.g. `tf.parse_single_example()`), we
+          // compute the maximum in each dimension to find the
+          // smallest dense_shape that bounds all of the input
+          // SparseTensors.
+          shape.set_dim(j, std::max(shape.dim_size(j),
+                                    expanded_tensor_shape.dim_size(j)));
         }
       }
     }
diff --git a/tensorflow/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/kernel_tests/batch_dataset_op_test.py
index 513dfb1ec3..660cbef302 100644
--- a/tensorflow/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_dataset_op_test.py
@@ -130,6 +130,39 @@ class BatchDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def testBatchSparseWithDifferentDenseShapes(self):
+
+    def _sparse(i):
+      return sparse_tensor.SparseTensor(
+          indices=array_ops.expand_dims(
+              math_ops.range(i, dtype=dtypes.int64), 1),
+          values=array_ops.fill([math_ops.to_int32(i)], i),
+          dense_shape=[i])
+
+    iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(
+        5).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      for i in range(2):
+        actual = sess.run(get_next)
+        expected_indices = []
+        expected_values = []
+        for j in range(5):
+          for k in range(i * 5 + j):
+            expected_indices.append([j, k])
+            expected_values.append(i * 5 + j)
+        expected = sparse_tensor.SparseTensor(
+            indices=expected_indices,
+            values=expected_values,
+            dense_shape=[5, (i + 1) * 5 - 1])
+        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertSparseValuesEqual(actual, expected.eval())
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
   def testNestedBatchSparse(self):
 
     def _sparse(i):
diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
index d0d6cc4c0f..78c113f514 100644
--- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
@@ -108,10 +108,16 @@ class SerializeSparseTest(test.TestCase):
       sp_deserialized = sparse_ops.deserialize_sparse(
           serialized, dtype=dtypes.int32)
 
-      with self.assertRaisesOpError(
-          r"Inconsistent shape across SparseTensors: dimension 0 prior to "
-          r"SparseTensor\[1\] was: 5 but rank of SparseTensor\[1\] is: 3"):
-        sess.run(sp_deserialized)
+      combined_indices, combined_values, combined_shape = sess.run(
+          sp_deserialized)
+
+      self.assertAllEqual(combined_indices[:6, 0], [0] * 6)  # minibatch 0
+      self.assertAllEqual(combined_indices[:6, 1:], sp_input0[0])
+      self.assertAllEqual(combined_indices[6:, 0], [1] * 6)  # minibatch 1
+      self.assertAllEqual(combined_indices[6:, 1:], sp_input1[0])
+      self.assertAllEqual(combined_values[:6], sp_input0[1])
+      self.assertAllEqual(combined_values[6:], sp_input1[1])
+      self.assertAllEqual(combined_shape, [2, 5, 6])
 
   def testSerializeDeserializeNestedBatch(self):
     with self.test_session(use_gpu=False) as sess:
-- 
GitLab


From a4a3816a867ecd25600b30d3ebc9b79d6c8a9e4e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 14:08:18 -0800
Subject: [PATCH 0258/1225] Allow proceeding without --input_shape flags when
 the model already has that information (e.g. when the inputs are Placeholders
 with shape attribs). This required moving the call to ResolveModelFlags back
 until after operators are imported (where it was until it was moved recently
 for drop_control_dependency handling).

PiperOrigin-RevId: 176699182
---
 tensorflow/contrib/lite/toco/import_tensorflow.cc |  3 ++-
 tensorflow/contrib/lite/toco/tooling_util.cc      | 14 ++++++++------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 691b4ff2a9..cde5a936af 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -1507,7 +1507,6 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
   }
 
   Model* model = new Model;
-  ResolveModelFlags(model_flags, model);
 
   for (auto node : inlined_graph.node()) {
     StripZeroOutputIndexFromInputs(&node);
@@ -1639,6 +1638,8 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
     }
   }
 
+  ResolveModelFlags(model_flags, model);
+
   StripCaretFromArrayNames(model);
   AddExtraOutputsFedIntoOtherOps(model);
   FixNoMissingArray(model);
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 639b5f193c..e8fa7a3423 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -1062,12 +1062,6 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
          "--output_arrays flag must be given on the command-line.";
 
   for (const auto& input_array_proto : model->flags.input_arrays()) {
-    QCHECK(!input_array_proto.shape().empty())
-        << "This model does not have shape defined for input array "
-        << input_array_proto.name()
-        << ", so one must be specified by a non-empty --input_shape "
-           "command-line flag.";
-
     auto& input_array = model->GetOrCreateArray(input_array_proto.name());
     if (input_array_proto.has_data_type()) {
       const ArrayDataType specified_type =
@@ -1090,6 +1084,14 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
       input_array.data_type = ArrayDataType::kFloat;
     }
 
+    if (!input_array.has_shape()) {
+      QCHECK(!input_array_proto.shape().empty())
+          << "This model does not have shape defined for input array "
+          << input_array_proto.name()
+          << ", so one must be specified by a non-empty --input_shape "
+             "command-line flag.";
+    }
+
     // Compare/merge the model->flags describing the input_shape with
     // the actual input array's shape.
     auto& input_array_dims = *input_array.mutable_shape()->mutable_dims();
-- 
GitLab


From d4e5b6e8a8985e6648fec1939adab8f51eed0ffe Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Wed, 22 Nov 2017 14:16:41 -0800
Subject: [PATCH 0259/1225] Proper casting in resource scatter_update

PiperOrigin-RevId: 176700288
---
 .../python/kernel_tests/resource_variable_ops_test.py       | 6 ++++++
 tensorflow/python/ops/state_ops.py                          | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index 8f328cea63..4c7a9cb0f9 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -498,6 +498,12 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       state_ops.scatter_update(v, [1], [3.0])
       self.assertAllEqual([1.0, 3.0], v.numpy())
 
+  def testScatterUpdateCast(self):
+    with context.eager_mode():
+      v = resource_variable_ops.ResourceVariable([1.0, 2.0], name="update")
+      state_ops.scatter_update(v, [1], [3])
+      self.assertAllEqual([1.0, 3.0], v.numpy())
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
index dbab07da42..dfc657893c 100644
--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@@ -348,4 +348,4 @@ def scatter_update(ref, indices, updates, use_locking=True, name=None):
     return gen_state_ops.scatter_update(ref, indices, updates,
                                         use_locking=use_locking, name=name)
   return gen_resource_variable_ops.resource_scatter_update(
-      ref.handle, indices, updates, name=name)
+      ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype), name=name)
-- 
GitLab


From 01a266a59783349b4d118eeb0ab1eb669aeef1db Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 22 Nov 2017 14:45:36 -0800
Subject: [PATCH 0260/1225] Skip control edges since they propagate neither
 shapes nor values.

PiperOrigin-RevId: 176703800
---
 tensorflow/core/grappler/costs/graph_properties.cc | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index abcd83a01e..c254fbef7a 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -725,9 +725,12 @@ Status GraphProperties::PropagateShapes(
     while (!new_shapes->empty() &&
            num_loop_iterations++ < max_loop_iterations) {
       const Node* n = new_shapes->pop();
-      for (const Node* fanout : n->out_nodes()) {
-        TF_RETURN_IF_ERROR(
-            UpdateShapes(shape_refiner, relax, fanout, new_shapes));
+      for (const Edge* e : n->out_edges()) {
+        if (!e->IsControlEdge()) {
+          const Node* fanout = e->dst();
+          TF_RETURN_IF_ERROR(
+              UpdateShapes(shape_refiner, relax, fanout, new_shapes));
+        }
       }
     }
 
@@ -913,6 +916,9 @@ Status GraphProperties::InferStatically() {
                                          &input_properties[i]);
       }
       for (const auto& edge : node->in_edges()) {
+        if (edge->IsControlEdge()) {
+          continue;
+        }
         if (!edge->src()->IsConstant()) {
           continue;
         }
-- 
GitLab


From d390d2774fa3b480e723ca0d1539356c8f7b37d3 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Wed, 22 Nov 2017 14:53:42 -0800
Subject: [PATCH 0261/1225] Turn off layout optimizer (as with all other
 optimizers) in the memory optimizer test, because the test assumes no
 modification of the graph.

PiperOrigin-RevId: 176704808
---
 tensorflow/python/grappler/memory_optimizer_test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py
index 09cf5f2270..9fbadeceb3 100644
--- a/tensorflow/python/grappler/memory_optimizer_test.py
+++ b/tensorflow/python/grappler/memory_optimizer_test.py
@@ -128,6 +128,7 @@ class MemoryOptimizerRecomputeTest(test.TestCase):
         rewriter_config_pb2.RewriterConfig(
             disable_model_pruning=True,
             constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
+            layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF,
             arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
             memory_optimization=rewriter_config_pb2.RewriterConfig.
             RECOMPUTATION_HEURISTICS), original_metagraph)
@@ -151,6 +152,7 @@ class MemoryOptimizerRecomputeTest(test.TestCase):
         rewriter_config_pb2.RewriterConfig(
             disable_model_pruning=True,
             constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
+            layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF,
             arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
             memory_optimization=rewriter_config_pb2.RewriterConfig.
             RECOMPUTATION_HEURISTICS,
-- 
GitLab


From feda969d974d02016736faab328419929428ab10 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 22 Nov 2017 15:08:37 -0800
Subject: [PATCH 0262/1225] Make 'name' and 'return_elements' arguments to
 import_graph_def work with C API.

This enables some of the tests in import_test.py to be run using the C API.

PiperOrigin-RevId: 176706698
---
 tensorflow/python/client/tf_session.i        |  38 ++++
 tensorflow/python/framework/importer.py      |  62 ++++++-
 tensorflow/python/framework/importer_test.py | 182 +++++++++++--------
 3 files changed, 205 insertions(+), 77 deletions(-)

diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index ef6f28ce07..099a35202c 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -315,6 +315,44 @@ tensorflow::ImportNumpy();
   $2 = inputs.size();
 }
 
+// Typemaps for TF_ImportGraphDefResultsReturnOutputs
+%typemap(in, numinputs=0) (int* num_outputs, TF_Output** outputs)
+     (int num_outputs, TF_Output* outputs) {
+  $1 = &num_outputs;
+  $2 = &outputs;
+}
+
+%typemap(argout) (int* num_outputs, TF_Output** outputs) {
+  $result = PyList_New(*$1);
+  if (!$result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
+  }
+  int num_outputs = *$1;
+  TF_Output* outputs = *$2;
+  for (int i = 0; i < num_outputs; ++i) {
+    PyList_SET_ITEM($result, i, CreateWrappedTFOutput(outputs[i]));
+  }
+}
+
+// Typemaps for TF_ImportGraphDefResultsReturnOperations
+%typemap(in, numinputs=0) (int* num_opers, TF_Operation*** opers)
+     (int num_opers, TF_Operation** opers) {
+  $1 = &num_opers;
+  $2 = &opers;
+}
+
+%typemap(argout) (int* num_opers, TF_Operation*** opers) {
+  $result = PyList_New(*$1);
+  if (!$result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
+  }
+  int num_opers = *$1;
+  TF_Operation** opers = *$2;
+  for (int i = 0; i < num_opers; ++i) {
+    PyList_SET_ITEM($result, i, CreateWrappedTFOperation(opers[i]));
+  }
+}
+
 // Typemaps for TF_GraphNextOperation().
 %typemap(in) size_t* pos (size_t pos) {
   pos = PyLong_AsUnsignedLong($input);
diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index c00b9da0df..434cbda7ad 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -194,6 +194,18 @@ def _FindAttrInOpDef(attr_name, op_def):
   return None
 
 
+def _PopulateTFImportGraphDefOptions(options, prefix, return_elements):
+  """Populates the TF_ImportGraphDefOptions `options`."""
+  c_api.TF_ImportGraphDefOptionsSetPrefix(options, prefix)
+
+  for name in return_elements or []:
+    if ':' in name:
+      op_name, index = _ParseTensorName(name)
+      c_api.TF_ImportGraphDefOptionsAddReturnOutput(options, op_name, index)
+    else:
+      c_api.TF_ImportGraphDefOptionsAddReturnOperation(options, name)
+
+
 def _ProcessNewOps(graph):
   """Processes the newly-added TF_Operations in `graph`."""
   for c_op in c_api_util.new_tf_operations(graph):
@@ -202,6 +214,35 @@ def _ProcessNewOps(graph):
   # TODO(skyewm): colocation logic
 
 
+def _GatherReturnElements(requested_return_elements, graph, results):
+  """Returns the requested return elements from results.
+
+  Args:
+    requested_return_elements: list of strings of operation and tensor names
+    graph: Graph
+    results: wrapped TF_ImportGraphDefResults
+
+  Returns:
+    list of `Operation` and/or `Tensor` objects
+  """
+  return_outputs = c_api.TF_ImportGraphDefResultsReturnOutputs(results)
+  return_opers = c_api.TF_ImportGraphDefResultsReturnOperations(results)
+
+  combined_return_elements = []
+  outputs_idx = 0
+  opers_idx = 0
+  for name in requested_return_elements:
+    if ':' in name:
+      combined_return_elements.append(
+          graph._get_tensor_by_tf_output(return_outputs[outputs_idx]))  # pylint: disable=protected-access
+      outputs_idx += 1
+    else:
+      combined_return_elements.append(
+          graph._get_operation_by_tf_operation(return_opers[opers_idx]))  # pylint: disable=protected-access
+      opers_idx += 1
+  return combined_return_elements
+
+
 @deprecated_args(None, 'Please file an issue at '
                  'https://github.com/tensorflow/tensorflow/issues if you depend'
                  ' on this feature.',
@@ -263,18 +304,29 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
   graph = ops.get_default_graph()
 
   if graph._c_graph:  # pylint: disable=protected-access
+    with ops.name_scope(name, 'import', input_map.values()) as scope:
+      # Save unique prefix generated by name_scope
+      if scope:
+        assert scope.endswith('/')
+        prefix = scope[:-1]
+      else:
+        prefix = ''
+
     scoped_options = c_api_util.ScopedTFImportGraphDefOptions()
+    options = scoped_options.options
+    _PopulateTFImportGraphDefOptions(options, prefix, return_elements)
 
     with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized:
       with errors.raise_exception_on_not_ok_status() as status:
-        c_api.TF_GraphImportGraphDefWithResults(
-            graph._c_graph, serialized, scoped_options.options, status)  # pylint: disable=protected-access
+        results = c_api.TF_GraphImportGraphDefWithResults(
+            graph._c_graph, serialized, options, status)  # pylint: disable=protected-access
 
     _ProcessNewOps(graph)
 
-    if return_elements is not None:
-      raise ValueError('return_elements not yet implemented with C API')
-    return None
+    if return_elements is None:
+      return None
+    else:
+      return _GatherReturnElements(return_elements, graph, results)
 
   else:
     g = graph
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index 8984282c68..5a6187c8a6 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -43,6 +43,7 @@ import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
 from tensorflow.python.platform import test
 
 
+@test_util.with_c_api
 class ImportGraphDefTest(test.TestCase):
 
   def _MakeGraphDef(self,
@@ -56,67 +57,6 @@ class ImportGraphDefTest(test.TestCase):
     text_format.Merge(text, ret)
     return ret
 
-  # The C API doesn't currently support return elements (or anything else beyond
-  # the most basic import). This test only checks that the import can run
-  # without error, and will be removed once more functionality is implemented
-  # and we can get coverage from the other tests.
-  @test_util.enable_c_api
-  def testCApi(self):
-    importer.import_graph_def(
-        self._MakeGraphDef("""
-        node { name: 'A' op: 'IntOutputFloatOutput' }
-        node { name: 'B' op: 'ListOutput'
-               attr { key: 'T'
-                      value { list { type: DT_INT32 type: DT_FLOAT } } } }
-        node { name: 'C' op: 'ListInput'
-               attr { key: 'N' value { i: 2 } }
-               attr { key: 'T' value { type: DT_INT32 } }
-               input: 'A:0' input: 'B:0' }
-        node { name: 'D' op: 'ListInput'
-               attr { key: 'N' value { i: 2 } }
-               attr { key: 'T' value { type: DT_FLOAT } }
-               input: 'A:1' input: 'B:1' }
-          """))
-
-    graph = ops.get_default_graph()
-    a = graph.get_operation_by_name("A")
-    b = graph.get_operation_by_name("B")
-    c = graph.get_operation_by_name("C")
-    d = graph.get_operation_by_name("D")
-
-    # Assert that the import process creates distinct tensors.
-    self.assertNotEqual(a.outputs[0].name, a.outputs[1].name)
-    self.assertNotEqual(b.outputs[0].name, b.outputs[1].name)
-    self.assertNotEqual(a.outputs[0].name, b.outputs[0].name)
-    self.assertNotEqual(a.outputs[0].name, b.outputs[1].name)
-    self.assertNotEqual(a.outputs[1].name, b.outputs[0].name)
-    self.assertNotEqual(a.outputs[1].name, b.outputs[1].name)
-
-    # Assert that the ops are connected according to the GraphDef topology.
-    self.assertEqual(c.inputs[0], a.outputs[0])
-    self.assertEqual(c.inputs[1], b.outputs[0])
-    self.assertEqual(d.inputs[0], a.outputs[1])
-    self.assertEqual(d.inputs[1], b.outputs[1])
-
-    # Check the types of the returned ops and tensors.
-    self.assertEqual(a.type, "IntOutputFloatOutput")
-    self.assertEqual(b.type, "ListOutput")
-    self.assertEqual(c.type, "ListInput")
-    self.assertEqual(d.type, "ListInput")
-    self.assertEqual(a.outputs[0].dtype, dtypes.int32)
-    self.assertEqual(a.outputs[1].dtype, dtypes.float32)
-    self.assertEqual(b.outputs[0].dtype, dtypes.int32)
-    self.assertEqual(b.outputs[1].dtype, dtypes.float32)
-
-    # Check the names of the returned ops.
-    self.assertEqual(a.name, "A")
-    self.assertEqual(b.name, "B")
-    self.assertEqual(c.name, "C")
-    self.assertEqual(d.name, "D")
-
-    # Check that the op_def is still available.
-    self.assertNotEqual(None, a.op_def)
-
   def testBasic(self):
     with ops.Graph().as_default():
       a, b, c, d = importer.import_graph_def(
@@ -171,6 +111,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertNotEqual(None, a.op_def)
 
   def testMultipleImport(self):
+    if ops._USE_C_API: return  # TODO(skyewm): set uniquify_names
+
     graph_def = self._MakeGraphDef("""
     node { name: 'A' op: 'IntOutput' }
     node { name: 'B' op: 'IntInput' input: 'A:0' }
@@ -259,6 +201,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(outer_inner_c.name, "outer/inner/c_1")
 
   def testInputMap(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       feed_b_1 = constant_op.constant(1, dtype=dtypes.int32)
@@ -286,6 +230,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(d.inputs[1], feed_b_1)
 
   def testInputMapBytes(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       feed_b_1 = constant_op.constant(1, dtype=dtypes.int32)
@@ -313,6 +259,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(d.inputs[1], feed_b_1)
 
   def testInputMapUnicode(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       feed_b_1 = constant_op.constant(1, dtype=dtypes.int32)
@@ -351,6 +299,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(b.inputs[0], a.outputs[0])
 
   def testInputMapImplicitZerothOutput(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       b, = importer.import_graph_def(
@@ -397,6 +347,11 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(d.outputs, [])
 
   def testCyclic(self):
+    # Importing cycles not supported with C API enabled (this test will
+    # eventually be deleted).
+    # TODO(skyewm): write while loop test
+    if ops._USE_C_API: return
+
     with ops.Graph().as_default():
       a, b = importer.import_graph_def(
           self._MakeGraphDef("""
@@ -411,6 +366,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(b.inputs[0], a.outputs[0])
 
   def testTypeMismatchInGraphDef(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -423,6 +380,8 @@ class ImportGraphDefTest(test.TestCase):
           str(e.exception))
 
   def testShapeWhitelist(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     # Barrier's shape is an output vector of 2, but the
     # graph says it's a scalar.  This is currently whitelisted.
     with ops.Graph().as_default():
@@ -436,6 +395,8 @@ class ImportGraphDefTest(test.TestCase):
           name="import")
 
   def testShapeWhitelistViolation(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     # L2 loss produces a scalar shape, but the graph
     # has the wrong shape, so raise an error.
     with ops.Graph().as_default():
@@ -455,6 +416,8 @@ class ImportGraphDefTest(test.TestCase):
             "Shapes () and (43,) are not compatible" in str(e.exception))
 
   def testInvalidSignatureTooManyInputsInGraphDef(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -466,6 +429,8 @@ class ImportGraphDefTest(test.TestCase):
                       str(e.exception))
 
   def testInvalidSignatureNotEnoughInputsInGraphDef(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -477,6 +442,8 @@ class ImportGraphDefTest(test.TestCase):
                       "got 'int32')" in str(e.exception))
 
   def testMissingInputOpInGraphDef(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -486,6 +453,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertTrue("Input tensor 'A:0' not found" in str(e.exception))
 
   def testMissingInputOpInGraphDefButAppearsInInputMap(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(5.0)
       b, = importer.import_graph_def(
@@ -497,6 +466,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(b.inputs[0], feed_a_0)
 
   def testMissingInputTensorInGraphDef(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -507,6 +478,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertTrue("Input tensor 'A:1' not found" in str(e.exception))
 
   def testMissingControlInputInGraphDef(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -516,6 +489,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertTrue("Control input '^A' not found" in str(e.exception))
 
   def testInvalidTensorNameOutputIndexInGraphDef(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -526,6 +501,8 @@ class ImportGraphDefTest(test.TestCase):
                        str(e.exception))
 
   def testInvalidTensorNameInGraphDef(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -536,6 +513,8 @@ class ImportGraphDefTest(test.TestCase):
                        str(e.exception))
 
   def testMissingReturnOperation(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -547,6 +526,8 @@ class ImportGraphDefTest(test.TestCase):
           "return_element 'B' not found in graph_def." in str(e.exception))
 
   def testMissingReturnTensor(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -576,6 +557,8 @@ class ImportGraphDefTest(test.TestCase):
           "return_element 'A:B:0' not found in graph_def." in str(e.exception))
 
   def testMissingInputMap(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -586,6 +569,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertTrue("not found in graph_def: [B:0]" in str(e.exception))
 
   def testInputMapUnusedAsInput(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       # Mapping an unused node output should succeed.
       importer.import_graph_def(
@@ -604,6 +589,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertTrue("not found in graph_def: [A:2]" in str(e.exception))
 
   def testInputMapTypeMismatch(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -637,6 +624,16 @@ class ImportGraphDefTest(test.TestCase):
           name="imported_graph")
       self.assertEqual(a.name, "imported_graph/A")
 
+  def testDefaultNamePrefix(self):
+    with ops.Graph().as_default():
+      a, = importer.import_graph_def(
+          self._MakeGraphDef("""
+          node { name: 'A' op: 'None' }
+          """),
+          return_elements=["A"],
+          name=None)
+      self.assertEqual(a.name, "import/A")
+
   def testNamePrefixColocationAttrs(self):
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None' }
@@ -648,14 +645,14 @@ class ImportGraphDefTest(test.TestCase):
     with ops.Graph().as_default():
       b, = importer.import_graph_def(
           original_graph_def, return_elements=["B"], name="imported_graph")
-      self.assertProtoEqualsVersion("""
-          node { name: 'imported_graph/A' op: 'None' }
-          node { name: 'imported_graph/B' op: 'None'  attr {
-            key: '_class'
-            value { list { s: 'loc:@imported_graph/A' } }
-          } }""", b.graph.as_graph_def())
+      self.assertTrue("_class" in b.node_def.attr)
+      self.assertProtoEquals(
+          "list { s: 'loc:@imported_graph/A' }",
+          b.node_def.attr["_class"])
 
   def testColocationWithDeviceFn(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None' attr {
             key: '_class'
@@ -738,6 +735,8 @@ class ImportGraphDefTest(test.TestCase):
           } }""", b.graph.as_graph_def())
 
   def testMultipleColocationWithDeviceFn(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None'}
           node { name: 'B' op: 'None'}
@@ -774,6 +773,8 @@ class ImportGraphDefTest(test.TestCase):
                }""", c.graph.as_graph_def())
 
   def testNamePrefixColocationAttrsMultipleImport(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None' }
           node { name: 'B' op: 'None'  attr {
@@ -799,6 +800,8 @@ class ImportGraphDefTest(test.TestCase):
           } }""", b.graph.as_graph_def())
 
   def testNamePrefixColocationAttrsNotFound(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     original_graph_def = self._MakeGraphDef("""
           node { name: 'B' op: 'None'  attr {
             key: '_class'
@@ -822,6 +825,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual("graph_def must be a GraphDef proto.", str(e.exception))
 
   def testInvalidInputForInputMap(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(TypeError) as e:
         importer.import_graph_def(
@@ -858,7 +863,17 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual("return_elements must be a list of strings.",
                        str(e.exception))
 
+      if ops._USE_C_API:
+        error_msg = "Cannot convert 'a:b:c' to a tensor name."
+      else:
+        error_msg = "Requested return_element 'a:b:c' not found in graph_def."
+      with self.assertRaisesRegexp(ValueError, error_msg):
+        importer.import_graph_def(self._MakeGraphDef(""),
+                                  return_elements=["a:b:c"])
+
   def testDuplicateOperationNames(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       with self.assertRaises(ValueError) as e:
         importer.import_graph_def(
@@ -880,6 +895,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertAllEqual(pack.outputs[0].eval(), [5.0, 5.0])
 
   def testWithDevice(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default() as g:
       # No device.
       a = constant_op.constant(3.0, name="a")
@@ -923,6 +940,8 @@ class ImportGraphDefTest(test.TestCase):
         self.assertEqual(c.device + "/device:GPU:0", c5.device)
 
   def testWithDeviceFunctionDependingOnInputs(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default() as g:
       with ops.device("/job:ps"):
         v1 = constant_op.constant(1.0)
@@ -948,6 +967,8 @@ class ImportGraphDefTest(test.TestCase):
     self.assertEqual(2, len(ops_with_two_inputs))
 
   def testGradient(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default() as g:
       inputs = array_ops.placeholder(
           dtypes.float32, shape=[None, 100], name="input")
@@ -1012,14 +1033,21 @@ class ImportGraphDefTest(test.TestCase):
       pat = (r"GraphDef producer version -1 below min producer %d supported "
              r"by TensorFlow \S+\.  Please regenerate your graph.$" %
              versions.GRAPH_DEF_VERSION_MIN_PRODUCER)
-      importer.import_graph_def(self._MakeGraphDef("", producer=-1))
-      x = constant_op.constant(
-          7)  # Need at least one op to get a C++ graph generated
-      with self.test_session(graph=g) as sess:
+      # C API throws error during import, Python-only throws error during run
+      if ops._USE_C_API:
         with self.assertRaisesRegexp(Exception, pat):
-          sess.run(x)
+          importer.import_graph_def(self._MakeGraphDef("", producer=-1))
+      else:
+        importer.import_graph_def(self._MakeGraphDef("", producer=-1))
+        x = constant_op.constant(
+            7)  # Need at least one op to get a C++ graph generated
+        with self.test_session(graph=g) as sess:
+          with self.assertRaisesRegexp(Exception, pat):
+            sess.run(x)
 
   def testVersionHigh(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default() as g:
       pat = (r"GraphDef min consumer version %d above current version %d "
              r"for TensorFlow \S+\.  Please upgrade TensorFlow\.$" %
@@ -1033,6 +1061,8 @@ class ImportGraphDefTest(test.TestCase):
 
   def testVersionAppliesToOpConstruction(self):
     """These tests rely on shape fns in test_ops.cc."""
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     with ops.Graph().as_default():
       importer.import_graph_def(
           self._MakeGraphDef(
@@ -1059,6 +1089,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(123.0, a[0].get_attr("default_float"))
 
   def testDefaultAttrsRemoved(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     producer_op_list = op_def_pb2.OpList()
     text_format.Merge("""
       op {
@@ -1090,6 +1122,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(987, a[0].get_attr("default_int"))
 
   def testFunctions(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     dtype = dtypes.float32
     @function.Defun(dtype, dtype, dtype, dtype)
     def Grad(x, y, dout1, dout2):  # pylint: disable=unused-argument
@@ -1167,6 +1201,8 @@ class ImportGraphDefTest(test.TestCase):
         self.assertEqual(sess.run("outer:0"), 21)
 
   def testImportInsideDefun(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     g = ops.Graph()
     with g.as_default():
       @function.Defun()
@@ -1190,6 +1226,8 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(z_val, -2.0)
 
   def testImportGraphWithFunctionTwice(self):
+    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+
     g = ops.Graph()
     with g.as_default():
       @function.Defun()
-- 
GitLab


From c0b8a07776a7f64fc45113159230a0a10273ec4a Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 22 Nov 2017 15:38:46 -0800
Subject: [PATCH 0263/1225] [tf.contrib.data.map_and_batch()] Parallelize the
 destruction of old invocation results.

For map functions with a large number of return values, resetting each InvocationResult to an initialized state led to running `Tensor::~Tensor()` a large number of times in series. This change moves that destruction to each invocation's callback, which effectively parallelizes it. A future optimization could involve moving the tensor (and its content) into the batch.

PiperOrigin-RevId: 176709725
---
 .../core/kernels/map_and_batch_dataset_op.cc      | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/map_and_batch_dataset_op.cc
index 620efdb778..ad1e356dbd 100644
--- a/tensorflow/core/kernels/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/map_and_batch_dataset_op.cc
@@ -258,7 +258,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                 EnsureOutputAllocated(batch_result, result->return_values);
                 const size_t num_components = result->return_values.size();
                 for (size_t i = 0; i < num_components; ++i) {
-                  Tensor tensor = result->return_values[i];
+                  const Tensor& tensor = result->return_values[i];
                   Tensor* batch = &(batch_result->output)[i];
                   if (tensor.NumElements() !=
                       (batch->NumElements() / batch->dim_size(0))) {
@@ -271,6 +271,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                         ", [batch]: ", batch_shape.DebugString()));
                     break;
                   }
+                  // TODO(mrry): Add a version of DoParallelConcat that allows
+                  // us to move `tensor` where possible, to speed up string
+                  // tensor batching.
                   Status copy_status = ::tensorflow::functor::DoParallelConcat(
                       *dataset()->device_, tensor, offset, batch);
                   if (!copy_status.ok()) {
@@ -279,6 +282,11 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                   }
                 }
               }
+              // NOTE(mrry): We clear the return values here to release any
+              // memory associated with them and to paralellize the destruction
+              // of the tensors (which can be surprisingly expensive for
+              // map functions with large numbers of return values).
+              result->return_values.clear();
               batch_result->counter->DecrementCount();
             });
       }
@@ -297,7 +305,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         for (size_t i = 0; i < dataset()->batch_size_; ++i) {
           size_t index = ComputeInvocationIndex(batch_index, i);
           InvocationResult* result = &invocation_results_[index];
-          *result = InvocationResult();
+          // Reset the state of `result`.
+          // NOTE(mrry): `result->return_values` were cleared when the previous
+          // invocation completed.
+          result->status = Status::OK();
         }
         // Start individual invocations.
         for (size_t i = 0; i < dataset()->batch_size_; ++i) {
-- 
GitLab


From 8d9eda26be345ace2e110feb0cf9a2500990eb82 Mon Sep 17 00:00:00 2001
From: Frank Chen <frankchn@google.com>
Date: Wed, 22 Nov 2017 15:51:33 -0800
Subject: [PATCH 0264/1225] Remove hardcoded discovery document now that the
 TPU alpha API definitions are public
 (https://www.googleapis.com/discovery/v1/apis/tpu/v1alpha1/rest).

PiperOrigin-RevId: 176710985
---
 .../python/training/tpu_cluster_resolver.py               | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
index f0144e9faa..c74da9cabd 100644
--- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
+++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
@@ -80,13 +80,9 @@ class TPUClusterResolver(ClusterResolver):
         raise ImportError('googleapiclient must be installed before using the '
                           'TPU cluster resolver')
 
-      # TODO(b/67375680): Remove custom URL once TPU APIs are finalized
       self._service = discovery.build(
-          'tpu',
-          'v1',
-          credentials=self._credentials,
-          discoveryServiceUrl='https://storage.googleapis.com'
-                              '/tpu-api-definition/v1alpha1.json')
+          'tpu', 'v1alpha1',
+          credentials=self._credentials)
     else:
       self._service = service
 
-- 
GitLab


From 0aa09c2de25ddb321405656ae33031773690bd5e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 15:54:27 -0800
Subject: [PATCH 0265/1225] dynamic_rnn: put all ops in the same scope

This clarifies the graph visualization a bit.

PiperOrigin-RevId: 176711260
---
 tensorflow/python/ops/rnn.py | 47 ++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index 436872f044..e30b19842f 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -565,33 +565,34 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
   if not _like_rnncell(cell):
     raise TypeError("cell must be an instance of RNNCell")
 
-  # By default, time_major==False and inputs are batch-major: shaped
-  #   [batch, time, depth]
-  # For internal calculations, we transpose to [time, batch, depth]
-  flat_input = nest.flatten(inputs)
-
-  if not time_major:
-    # (B,T,D) => (T,B,D)
-    flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
-    flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input)
-
-  parallel_iterations = parallel_iterations or 32
-  if sequence_length is not None:
-    sequence_length = math_ops.to_int32(sequence_length)
-    if sequence_length.get_shape().ndims not in (None, 1):
-      raise ValueError(
-          "sequence_length must be a vector of length batch_size, "
-          "but saw shape: %s" % sequence_length.get_shape())
-    sequence_length = array_ops.identity(  # Just to find it in the graph.
-        sequence_length, name="sequence_length")
-
-  # Create a new scope in which the caching device is either
-  # determined by the parent scope, or is set to place the cached
-  # Variable using the same placement as for the rest of the RNN.
   with vs.variable_scope(scope or "rnn") as varscope:
+    # Create a new scope in which the caching device is either
+    # determined by the parent scope, or is set to place the cached
+    # Variable using the same placement as for the rest of the RNN.
     if context.in_graph_mode():
       if varscope.caching_device is None:
         varscope.set_caching_device(lambda op: op.device)
+
+    # By default, time_major==False and inputs are batch-major: shaped
+    #   [batch, time, depth]
+    # For internal calculations, we transpose to [time, batch, depth]
+    flat_input = nest.flatten(inputs)
+
+    if not time_major:
+      # (B,T,D) => (T,B,D)
+      flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
+      flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input)
+
+    parallel_iterations = parallel_iterations or 32
+    if sequence_length is not None:
+      sequence_length = math_ops.to_int32(sequence_length)
+      if sequence_length.get_shape().ndims not in (None, 1):
+        raise ValueError(
+            "sequence_length must be a vector of length batch_size, "
+            "but saw shape: %s" % sequence_length.get_shape())
+      sequence_length = array_ops.identity(  # Just to find it in the graph.
+          sequence_length, name="sequence_length")
+
     batch_size = _best_effort_input_batch_size(flat_input)
 
     if initial_state is not None:
-- 
GitLab


From 806754888188e40430bc96ad33c5f51282c2d338 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 22 Nov 2017 16:30:44 -0800
Subject: [PATCH 0266/1225] Acquire the GIL before working with PyLists and
 PyDict.

PiperOrigin-RevId: 176714705
---
 tensorflow/python/grappler/cluster.i | 9 ++++++++-
 tensorflow/python/grappler/item.i    | 5 +++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i
index 1838c40e46..5a7cdf26f8 100644
--- a/tensorflow/python/grappler/cluster.i
+++ b/tensorflow/python/grappler/cluster.i
@@ -138,6 +138,7 @@ tensorflow::Status _GetOpPerformanceDataAndRunTime(
 
 static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) {
   const std::unordered_map<string, tensorflow::DeviceProperties>& devices = cluster->GetDevices();
+  PyGILState_STATE gstate = PyGILState_Ensure();
   PyObject* result = PyList_New(devices.size());
   int i = 0;
   for (auto& dev : devices) {
@@ -150,6 +151,7 @@ static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) {
     PyList_SetItem(result, i, dev_obj);
     ++i;
   }
+  PyGILState_Release(gstate);
   return result;
 }
 
@@ -184,6 +186,7 @@ static PyObject* TF_MeasureCosts(
   if (!status.ok()) {
     Py_RETURN_NONE;
   }
+  PyGILState_STATE gstate = PyGILState_Ensure();
   PyObject* op_perf_objs = PyList_New(
       op_performance_data.op_performance_size());
   for (int i = 0; i < op_performance_data.op_performance_size(); i++) {
@@ -211,8 +214,10 @@ static PyObject* TF_MeasureCosts(
     status = tensorflow::Status(tensorflow::error::Code::INTERNAL,
                                 "Error setting return tuples.");
     tensorflow::Set_TF_Status_from_Status(out_status, status);
-    Py_RETURN_NONE;
+    Py_INCREF(Py_None);
+    ret = Py_None;
   }
+  PyGILState_Release(gstate);
   return ret;
 }
 
@@ -240,6 +245,7 @@ static PyObject* TF_DeterminePeakMemoryUsage(
     Py_RETURN_NONE;
   }
 
+  PyGILState_STATE gstate = PyGILState_Ensure();
   PyObject* result = PyDict_New();
   for (const auto& device : cluster->GetDevices()) {
     const tensorflow::grappler::GraphMemory::MemoryUsage& usage =
@@ -261,6 +267,7 @@ static PyObject* TF_DeterminePeakMemoryUsage(
     PyTuple_SetItem(ret, 1, per_device);
     PyDict_SetItem(result, PyString_FromString(device.first.c_str()), ret);
   }
+  PyGILState_Release(gstate);
   return result;
 }
 
diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i
index 8c346b4438..2fa502b81d 100644
--- a/tensorflow/python/grappler/item.i
+++ b/tensorflow/python/grappler/item.i
@@ -101,6 +101,7 @@ static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* it
     Py_RETURN_NONE;
   }
 
+  PyGILState_STATE gstate = PyGILState_Ensure();
   PyObject* props = PyDict_New();
   for (const auto& node : item->graph.node()) {
     const string& node_name = node.name();
@@ -115,8 +116,8 @@ static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* it
       PyList_SetItem(prop, i, output_prop);
     }
     CHECK_EQ(0, PyDict_SetItem(props, PyString_FromString(node_name.c_str()), prop));
-   }
-
+  }
+  PyGILState_Release(gstate);
   return props;
 }
 
-- 
GitLab


From 85fa6bdfe40f24259b3cec19637567ed3cff7370 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Wed, 22 Nov 2017 16:35:13 -0800
Subject: [PATCH 0267/1225] [tf.data] Patch for thread safe IgnoreErrorDataset.

PiperOrigin-RevId: 176715082
---
 .../core/kernels/ignore_errors_dataset_op.cc  | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/kernels/ignore_errors_dataset_op.cc b/tensorflow/core/kernels/ignore_errors_dataset_op.cc
index 43ba5ab7dd..8cf263d87f 100644
--- a/tensorflow/core/kernels/ignore_errors_dataset_op.cc
+++ b/tensorflow/core/kernels/ignore_errors_dataset_op.cc
@@ -79,16 +79,20 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
       Status GetNextInternal(IteratorContext* ctx,
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
-        if (!input_impl_) {
-          *end_of_sequence = true;
-          return Status::OK();
-        }
-        Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
-        while (!s.ok()) {
-          out_tensors->clear();
-          s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
+        {
+          tf_shared_lock l(mu_);
+          if (!input_impl_) {
+            *end_of_sequence = true;
+            return Status::OK();
+          }
+          Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
+          while (!s.ok()) {
+            out_tensors->clear();
+            s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
+          }
         }
         if (*end_of_sequence) {
+          mutex_lock l(mu_);
           input_impl_.reset();
         }
         return Status::OK();
@@ -96,6 +100,7 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
 
      protected:
       Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
         if (input_impl_)
           TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
         else
@@ -106,6 +111,7 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
 
       Status RestoreInternal(OpKernelContext* ctx,
                              IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
         if (reader->Contains(full_name("input_impls_empty")))
           input_impl_.reset();
         else
@@ -114,7 +120,8 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
       }
 
      private:
-      std::unique_ptr<IteratorBase> input_impl_;
+      mutex mu_;
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
-- 
GitLab


From 8d84926f525dfd0728325e43cd39dbcb28fd3601 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 22 Nov 2017 16:42:07 -0800
Subject: [PATCH 0268/1225] Remove duplicated testConv1DTranspose

---
 tensorflow/python/kernel_tests/conv1d_test.py | 40 -------------------
 1 file changed, 40 deletions(-)

diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index a86e411e2f..d92797a7d3 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -93,45 +93,5 @@ class Conv1DTest(test.TestCase):
     self.assertAllClose(cache_values, value)
 
 
-  def testConv1DTranspose(self):
-    with self.test_session():
-      stride = 2
-
-      # Input, output: [batch, width, depth]
-      x_shape = [2, 4, 3]
-      y_shape = [2, 9, 2]
-
-      # Filter: [kernel_width, output_depth, input_depth]
-      f_shape = [3, 2, 3]
-
-      x = constant_op.constant(
-          1.0, shape=x_shape, name="x", dtype=dtypes.float32)
-      f = constant_op.constant(
-          1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
-      output = nn_ops.conv1d_transpose(
-          x, f, y_shape, stride=stride, padding="VALID")
-      value = output.eval()
-
-      cache_values = np.zeros(y_shape, dtype=np.float32)
-
-      # The amount of padding added
-      pad = 1
-
-      for n in xrange(x_shape[0]):
-        for k in xrange(f_shape[1]):
-          for w in xrange(pad, y_shape[1] - pad):
-            target = 3.0
-            # We add a case for locations divisible by the stride.
-            w_in = w % stride == 0 and w > pad and w < y_shape[1] - 1 - pad
-            if w_in:
-              target += 3.0
-            cache_values[n, w, k] = target
-
-          # copy values in the border
-          cache_values[n, 0, k] = cache_values[n, 1, k]
-          cache_values[n, -1, k] = cache_values[n, -2, k]
-
-    self.assertAllClose(cache_values, value)
-
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 1885db7ffa6cea7bacfb7ef1507f3103cd1829f0 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Wed, 22 Nov 2017 16:36:48 -0800
Subject: [PATCH 0269/1225] Only convert the layout if the node is placed on
 GPU.

PiperOrigin-RevId: 176715219
---
 tensorflow/core/grappler/optimizers/BUILD     |   2 +
 .../grappler/optimizers/layout_optimizer.cc   | 146 +++++++++++-------
 .../grappler/optimizers/layout_optimizer.h    |   5 +-
 .../optimizers/layout_optimizer_test.cc       |  67 +++++++-
 4 files changed, 163 insertions(+), 57 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index e127556054..5d9eb8e0b1 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -312,6 +312,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":graph_optimizer",
+        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:devices",
@@ -320,6 +321,7 @@ cc_library(
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/clusters:cluster",
         "//tensorflow/core/grappler/costs:graph_properties",
+        "//tensorflow/core/grappler/costs:virtual_placer",
         "//tensorflow/core/grappler/utils:frame",
     ],
 )
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 89ebd8e98f..31c3ba6863 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -27,7 +27,9 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/frame.h"
 #include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/util/device_name_utils.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -109,11 +111,13 @@ bool IsMaxPoolGradV1(const NodeDef& node) {
 
 class GraphProcessor {
  public:
-  GraphProcessor(GraphDef* graph, NodeMap* node_map,
-                 const std::unordered_set<string>& nodes_to_preserve)
-      : graph_(graph),
-        node_map_(node_map),
-        nodes_to_preserve_(nodes_to_preserve) {}
+  GraphProcessor(const VirtualPlacer& virtual_placer,
+                 const std::unordered_set<string>& nodes_to_preserve,
+                 GraphDef* graph, NodeMap* node_map)
+      : virtual_placer_(virtual_placer),
+        nodes_to_preserve_(nodes_to_preserve),
+        graph_(graph),
+        node_map_(node_map) {}
 
  protected:
   NodeDef* AddNodePermConst(const string& name, const string& device,
@@ -122,7 +126,6 @@ class GraphProcessor {
     node_map_->AddNode(name, node);
     node->set_name(name);
     node->set_op("Const");
-    node->set_device(device);
     AttrValue attr_data_type;
     attr_data_type.set_type(DT_INT32);
     node->mutable_attr()->insert({"dtype", attr_data_type});
@@ -133,6 +136,13 @@ class GraphProcessor {
     }
     tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
     node->mutable_attr()->insert({"value", attr_tensor});
+    string device_name;
+    if (device.empty()) {
+      device_name = virtual_placer_.get_canonical_device_name(*node);
+    } else {
+      device_name = device;
+    }
+    node->set_device(device_name);
     return node;
   }
 
@@ -142,7 +152,6 @@ class GraphProcessor {
     node_map_->AddNode(name, node);
     node->set_name(name);
     node->set_op("Const");
-    node->set_device(device);
     AttrValue attr_data_type;
     attr_data_type.set_type(dtype);
     node->mutable_attr()->insert({"dtype", attr_data_type});
@@ -151,6 +160,13 @@ class GraphProcessor {
     tensor.scalar<int>()() = value;
     tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
     node->mutable_attr()->insert({"value", attr_tensor});
+    string device_name;
+    if (device.empty()) {
+      device_name = virtual_placer_.get_canonical_device_name(*node);
+    } else {
+      device_name = device;
+    }
+    node->set_device(device_name);
     return node;
   }
 
@@ -159,7 +175,6 @@ class GraphProcessor {
     node_map_->AddNode(name, node);
     node->set_name(name);
     node->set_op("Const");
-    node->set_device(device);
     AttrValue attr_data_type;
     attr_data_type.set_type(DT_INT32);
     node->mutable_attr()->insert({"dtype", attr_data_type});
@@ -172,26 +187,37 @@ class GraphProcessor {
     }
     tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
     node->mutable_attr()->insert({"value", attr_tensor});
+    string device_name;
+    if (device.empty()) {
+      device_name = virtual_placer_.get_canonical_device_name(*node);
+    } else {
+      device_name = device;
+    }
+    node->set_device(device_name);
     return node;
   }
 
+  const VirtualPlacer& virtual_placer_;
+  const std::unordered_set<string>& nodes_to_preserve_;
   GraphDef* graph_;
   NodeMap* node_map_;
-  const std::unordered_set<string>& nodes_to_preserve_;
 };
 
 struct OptimizeContext {
   OptimizeContext(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                  const VirtualPlacer& virtual_placer,
                   const std::unordered_set<string>& nodes_to_preserve,
                   bool is_in_frame)
       : graph(graph),
         node(node),
         node_map(node_map),
+        virtual_placer(virtual_placer),
         nodes_to_preserve(nodes_to_preserve),
         is_in_frame(is_in_frame) {}
   GraphDef* graph;
   NodeDef* node;
   NodeMap* node_map;
+  const VirtualPlacer& virtual_placer;
   const std::unordered_set<string>& nodes_to_preserve;
   bool is_in_frame;
 };
@@ -199,8 +225,8 @@ struct OptimizeContext {
 class NodeProcessor : public GraphProcessor {
  public:
   explicit NodeProcessor(const OptimizeContext& opt_cxt)
-      : GraphProcessor(opt_cxt.graph, opt_cxt.node_map,
-                       opt_cxt.nodes_to_preserve),
+      : GraphProcessor(opt_cxt.virtual_placer, opt_cxt.nodes_to_preserve,
+                       opt_cxt.graph, opt_cxt.node_map),
         node_(opt_cxt.node),
         is_in_frame_(opt_cxt.is_in_frame) {}
   virtual ~NodeProcessor() {}
@@ -257,7 +283,25 @@ class NodeProcessor : public GraphProcessor {
   }
 
   virtual bool ShouldProcess() const {
-    return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs();
+    return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs() &&
+           IsOnGPU();
+  }
+
+  virtual bool IsOnGPU() const {
+    string device_name;
+    if (node_->device().empty()) {
+      device_name = virtual_placer_.get_canonical_device_name(*node_);
+    } else {
+      device_name = node_->device();
+    }
+    string device;
+    string not_used;
+    if (DeviceNameUtils::SplitDeviceName(device_name, &not_used, &device) &&
+        (StringPiece(str_util::Lowercase(device)))
+            .contains(str_util::Lowercase(DEVICE_GPU))) {
+      return true;
+    }
+    return false;
   }
 
   void UpdateAttrDataFormat() {
@@ -536,6 +580,9 @@ class BiasAddGradProcessor : public NodeProcessor {
     if (MustPreserve()) {
       return false;
     }
+    if (!IsOnGPU()) {
+      return false;
+    }
     auto input = node_map_->GetNode(node_->input(0));
     if (input) {
       if ((IsNHWC() && IsDimsFour(*input)) || IsNodeNCHWToNHWC(input->name())) {
@@ -556,7 +603,7 @@ class Conv2DProcessor : public NodeProcessor {
  protected:
   bool ShouldProcess() const override {
     return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs() &&
-           (!IsGemmUsed() || no_gemm_);
+           (!IsGemmUsed() || no_gemm_) && IsOnGPU();
   }
 
   TensorShapeProto GetShape(const string& input_name) const {
@@ -693,7 +740,7 @@ class AgnosticNodeProcessor : public NodeProcessor {
  protected:
   bool ShouldProcess() const override {
     return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
-           IsNodeAfterNCHWToNHWC();
+           IsNodeAfterNCHWToNHWC() && IsOnGPU();
   }
 
   bool IsNodeAfterNCHWToNHWC() const {
@@ -746,7 +793,8 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
     return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() &&
            (Is4DOperateWithND(4) || Is4DOperateWithScalar() ||
-            Is4DOperateWithVector());
+            Is4DOperateWithVector()) &&
+           IsOnGPU();
   }
 
   std::vector<int> GetInputPos() const override {
@@ -855,7 +903,7 @@ class ConcatProcessor : public AgnosticNodeProcessor {
  protected:
   bool ShouldProcess() const override {
     return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
-           IsNodeAfterNCHWToNHWC() && IsAlongDimC();
+           IsNodeAfterNCHWToNHWC() && IsAlongDimC() && IsOnGPU();
   }
 
   std::vector<int> GetInputPos() const override {
@@ -920,7 +968,7 @@ class PadProcessor : public AgnosticNodeProcessor {
  protected:
   bool ShouldProcess() const override {
     return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
-           IsNodeAfterNCHWToNHWC() && PaddingSupported();
+           IsNodeAfterNCHWToNHWC() && PaddingSupported() && IsOnGPU();
   }
   Status CustomizedProcessing() override { return UpdateAttrValueOfInput(1); }
 
@@ -1132,7 +1180,8 @@ class SqueezeProcessor : public AgnosticNodeProcessor {
  protected:
   bool ShouldProcess() const override {
     return !MustPreserve() && IsDimsN(*node_, 2) && HasOutputs() &&
-           IsNodeAfterNCHWToNHWC() && IsInputConvertible() && IsAlongDimHW();
+           IsNodeAfterNCHWToNHWC() && IsInputConvertible() && IsAlongDimHW() &&
+           IsOnGPU();
   }
 
   Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
@@ -1183,7 +1232,7 @@ class SumProcessor : public AgnosticNodeProcessor {
     auto input0 = node_map_->GetNode(node_->input(0));
     return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
            (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) &&
-           IsAlongDimNHW();
+           IsAlongDimNHW() && IsOnGPU();
   }
 
   Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
@@ -1243,42 +1292,41 @@ class SumProcessor : public AgnosticNodeProcessor {
 class DataLayoutOptimizer : GraphProcessor {
  public:
   explicit DataLayoutOptimizer(
-      LayoutOptimizer::TuningConfig config,
-      const std::unordered_set<string>& nodes_to_preserve,
-      const string& default_device, GraphDef* graph, NodeMap* node_map)
-      : GraphProcessor(graph, node_map, nodes_to_preserve),
-        config_(config),
-        default_device_(default_device) {}
+      const VirtualPlacer& virtual_placer,
+      const LayoutOptimizer::TuningConfig& config,
+      const std::unordered_set<string>& nodes_to_preserve, GraphDef* graph,
+      NodeMap* node_map)
+      : GraphProcessor(virtual_placer, nodes_to_preserve, graph, node_map),
+        config_(config) {}
 
   Status Optimize() {
-    LOG(INFO) << "Number of nodes for original graph: " << graph_->node_size();
+    VLOG(1) << "Number of nodes for original graph: " << graph_->node_size();
     TF_RETURN_IF_ERROR(Expand());
-    LOG(INFO) << "Number of nodes after Expand: " << graph_->node_size();
+    VLOG(1) << "Number of nodes after Expand: " << graph_->node_size();
     TF_RETURN_IF_ERROR(Collapse());
-    LOG(INFO) << "Number of nodes after Collapse: " << graph_->node_size();
+    VLOG(1) << "Number of nodes after Collapse: " << graph_->node_size();
     return Status::OK();
   }
 
  private:
   NodeDef* AddNodePermNHWCToNCHW() {
-    return AddNodePermConst(kPermNHWCToNCHW, default_device_, {0, 3, 1, 2});
+    return AddNodePermConst(kPermNHWCToNCHW, "", {0, 3, 1, 2});
   }
 
   NodeDef* AddNodePermNCHWToNHWC() {
-    return AddNodePermConst(kPermNCHWToNHWC, default_device_, {0, 2, 3, 1});
+    return AddNodePermConst(kPermNCHWToNHWC, "", {0, 2, 3, 1});
   }
 
   NodeDef* AddNodeConcatConst() {
-    return AddNodeConstScalar(kConcatConst, default_device_, DT_INT32, 1);
+    return AddNodeConstScalar(kConcatConst, "", DT_INT32, 1);
   }
 
   NodeDef* AddNodeGatherAxisConst() {
-    return AddNodeConstScalar(kGatherAxisConst, default_device_, DT_INT32, 0);
+    return AddNodeConstScalar(kGatherAxisConst, "", DT_INT32, 0);
   }
 
   NodeDef* AddNodeReductionConst() {
-    return GraphProcessor::AddNodeReductionConst(kReductionConst,
-                                                 default_device_);
+    return GraphProcessor::AddNodeReductionConst(kReductionConst, "");
   }
 
   // Expand all nodes which is in NHWC, but supports NCHW or is layout agnostic.
@@ -1295,8 +1343,8 @@ class DataLayoutOptimizer : GraphProcessor {
           ops_format_supported.end()) {
         auto node = graph_->mutable_node(i);
         bool is_in_frame = !frames[node].empty();
-        OptimizeContext opt_cxt(graph_, node, node_map_, nodes_to_preserve_,
-                                is_in_frame);
+        OptimizeContext opt_cxt(graph_, node, node_map_, virtual_placer_,
+                                nodes_to_preserve_, is_in_frame);
         std::unique_ptr<NodeProcessor> node_processor;
         if (IsAvgPoolGrad(*node)) {
           node_processor.reset(new AvgPoolGradProcessor(opt_cxt));
@@ -1343,8 +1391,8 @@ class DataLayoutOptimizer : GraphProcessor {
             ops_format_agnostic.end()) {
           auto node = graph_->mutable_node(i);
           bool is_in_frame = !frames[node].empty();
-          OptimizeContext opt_cxt(graph_, node, node_map_, nodes_to_preserve_,
-                                  is_in_frame);
+          OptimizeContext opt_cxt(graph_, node, node_map_, virtual_placer_,
+                                  nodes_to_preserve_, is_in_frame);
           std::unique_ptr<NodeProcessor> node_processor;
           if (IsAddN(*node)) {
             node_processor.reset(new AddNProcessor(opt_cxt));
@@ -1419,8 +1467,7 @@ class DataLayoutOptimizer : GraphProcessor {
     return Status::OK();
   }
 
-  LayoutOptimizer::TuningConfig config_;
-  string default_device_;
+  const LayoutOptimizer::TuningConfig& config_;
 };
 
 int GetNumTranspose(const GraphDef& graph) {
@@ -1430,7 +1477,7 @@ int GetNumTranspose(const GraphDef& graph) {
       number++;
     }
   }
-  LOG(INFO) << "Number of Transpose nodes: " << number;
+  VLOG(1) << "Number of Transpose nodes: " << number;
   return number;
 }
 
@@ -1455,7 +1502,6 @@ int GetNumGPUs(const Cluster& cluster) {
 
 Status LayoutOptimizer::Tune(const GrapplerItem& item,
                              const GraphProperties& graph_properties,
-                             const string& default_device,
                              const TuningConfig& config, GraphDef* output) {
   auto status = graph_properties.AnnotateOutputShapes(output);
   if (!status.ok()) {
@@ -1463,8 +1509,8 @@ Status LayoutOptimizer::Tune(const GrapplerItem& item,
     return status;
   }
   NodeMap node_map(output);
-  DataLayoutOptimizer layout_optimizer(config, nodes_to_preserve_,
-                                       default_device, output, &node_map);
+  DataLayoutOptimizer layout_optimizer(*virtual_placer_, config,
+                                       nodes_to_preserve_, output, &node_map);
   status = layout_optimizer.Optimize();
   return status;
 }
@@ -1477,6 +1523,7 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     return Status::OK();
   }
 
+  virtual_placer_.reset(new VirtualPlacer(cluster));
   nodes_to_preserve_ = item.NodesToPreserve();
   GraphProperties graph_properties(item);
   auto status = graph_properties.InferStatically();
@@ -1487,20 +1534,13 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
 
   TuningConfig config;
   config.no_gemm = false;
-  string default_device = "/job:localhost/replica:0/task:0/cpu:0";
-  if (cluster) {
-    if (!cluster->GetDevices().empty()) {
-      default_device = cluster->GetDevices().begin()->first;
-    }
-  }
-
-  status = Tune(item, graph_properties, default_device, config, output);
+  status = Tune(item, graph_properties, config, output);
   // This is based on an empirical observation that if the introduced Transpose
   // nodes is more than 30, not using GEMM implementation would result in better
   // performance.
   if (status.ok() && GetNumTranspose(*output) > 30) {
     config.no_gemm = true;
-    status = Tune(item, graph_properties, default_device, config, output);
+    status = Tune(item, graph_properties, config, output);
   }
 
   if (!status.ok()) {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.h b/tensorflow/core/grappler/optimizers/layout_optimizer.h
index f5dd70356a..357205828d 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_GRAPPLER_OPTIMIZERS_LAYOUT_OPTIMIZER_H_
 
 #include "tensorflow/core/grappler/costs/graph_properties.h"
+#include "tensorflow/core/grappler/costs/virtual_placer.h"
 #include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
 
 namespace tensorflow {
@@ -47,10 +48,10 @@ class LayoutOptimizer : public GraphOptimizer {
                 const GraphDef& optimize_output, double result) override;
 
  private:
+  std::unique_ptr<VirtualPlacer> virtual_placer_;
   std::unordered_set<string> nodes_to_preserve_;
   Status Tune(const GrapplerItem& item, const GraphProperties& graph_properties,
-              const string& default_device, const TuningConfig& config,
-              GraphDef* output);
+              const TuningConfig& config, GraphDef* output);
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 5d2d90b193..d4ab42ad60 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -39,6 +39,11 @@ class LayoutOptimizerTest : public ::testing::Test {
 
   Output SimpleConv2D(tensorflow::Scope* s, int input_size, int filter_size,
                       const string& padding) {
+    return SimpleConv2D(s, input_size, filter_size, padding, "");
+  }
+
+  Output SimpleConv2D(tensorflow::Scope* s, int input_size, int filter_size,
+                      const string& padding, const string& device) {
     int batch_size = 128;
     int input_height = input_size;
     int input_width = input_size;
@@ -59,8 +64,8 @@ class LayoutOptimizerTest : public ::testing::Test {
     Output filter =
         ops::Const(s->WithOpName("Filter"), Input::Initializer(filter_data));
 
-    Output conv = ops::Conv2D(s->WithOpName("Conv2D"), input, filter,
-                              {1, stride, stride, 1}, padding);
+    Output conv = ops::Conv2D(s->WithOpName("Conv2D").WithDevice(device), input,
+                              filter, {1, stride, stride, 1}, padding);
     return conv;
   }
 
@@ -278,6 +283,64 @@ TEST_F(LayoutOptimizerTest, PreserveFetch) {
   EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC");
 }
 
+TEST_F(LayoutOptimizerTest, EmptyDevice) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto conv_node = node_map.GetNode("Conv2D");
+  EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NCHW");
+}
+
+TEST_F(LayoutOptimizerTest, GPUDevice) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv =
+      SimpleConv2D(&s, 3, 2, "VALID", "/job:w/replica:0/task:0/device:gpu:0");
+  Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto conv_node = node_map.GetNode("Conv2D");
+  EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NCHW");
+}
+
+TEST_F(LayoutOptimizerTest, CPUDeviceLowercase) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv =
+      SimpleConv2D(&s, 3, 2, "VALID", "/job:w/replica:0/task:0/device:cpu:0");
+  Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto conv_node = node_map.GetNode("Conv2D");
+  EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC");
+}
+
+TEST_F(LayoutOptimizerTest, CPUDeviceUppercase) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID", "/CPU:0");
+  Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto conv_node = node_map.GetNode("Conv2D");
+  EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 51f5eb77f089f17f17653b5655f8207d3ff5d36b Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 22 Nov 2017 16:47:31 -0800
Subject: [PATCH 0270/1225] Remove extra propagate_nans from
 tensorflow/stream_executor/dnn.cc

---
 tensorflow/stream_executor/dnn.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc
index 6fd0e69905..44144a0613 100644
--- a/tensorflow/stream_executor/dnn.cc
+++ b/tensorflow/stream_executor/dnn.cc
@@ -473,8 +473,7 @@ PoolingDescriptor::PoolingDescriptor(int ndims)
       propagate_nans_(false),
       window_(ndims, 0),
       padding_(ndims, 0),
-      strides_(ndims, 1),
-      propagate_nans_(false) {}
+      strides_(ndims, 1) {}
 
 PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {}
 
-- 
GitLab


From 5548cfc2eda3614a318e04cd922512be99aefefe Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 16:45:54 -0800
Subject: [PATCH 0271/1225] [XLA] Add a convenience function that returns a
 platform with the given name.

PiperOrigin-RevId: 176715886
---
 .../compiler/xla/service/platform_util.cc     | 22 +++++++++++++++++++
 .../compiler/xla/service/platform_util.h      |  8 +++++++
 2 files changed, 30 insertions(+)

diff --git a/tensorflow/compiler/xla/service/platform_util.cc b/tensorflow/compiler/xla/service/platform_util.cc
index 3a1818de82..63f3bfb36c 100644
--- a/tensorflow/compiler/xla/service/platform_util.cc
+++ b/tensorflow/compiler/xla/service/platform_util.cc
@@ -94,6 +94,28 @@ PlatformUtil::GetSupportedPlatforms() {
       platforms_string.c_str());
 }
 
+/*static*/ StatusOr<se::Platform*> PlatformUtil::GetPlatform(
+    const string& platform_name) {
+  using tensorflow::str_util::Lowercase;
+  string platform_str = Lowercase(platform_name);
+  // "cpu" and "host" mean the same thing.
+  if (platform_str == "cpu") {
+    platform_str = "host";
+  }
+  // "gpu" and "cuda" mean the same thing.
+  if (platform_str == "gpu") {
+    platform_str = "cuda";
+  }
+
+  TF_ASSIGN_OR_RETURN(auto platforms, PlatformUtil::GetSupportedPlatforms());
+  for (se::Platform* platform : platforms) {
+    if (Lowercase(platform->Name()) == platform_str) {
+      return platform;
+    }
+  }
+  return InvalidArgument("platform %s not found", platform_name.c_str());
+}
+
 // Returns whether the device underlying the given StreamExecutor is supported
 // by XLA.
 static bool IsDeviceSupported(se::StreamExecutor* executor) {
diff --git a/tensorflow/compiler/xla/service/platform_util.h b/tensorflow/compiler/xla/service/platform_util.h
index eac5737030..a59d4ffe87 100644
--- a/tensorflow/compiler/xla/service/platform_util.h
+++ b/tensorflow/compiler/xla/service/platform_util.h
@@ -16,11 +16,14 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_PLATFORM_UTIL_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_PLATFORM_UTIL_H_
 
+#include <string>
 #include <vector>
 
 #include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace xla {
 
@@ -39,6 +42,11 @@ class PlatformUtil {
   // default platform. Otherwise returns an error.
   static StatusOr<perftools::gputools::Platform*> GetDefaultPlatform();
 
+  // Returns the platform according to the given name. Returns error if there is
+  // no such platform.
+  static StatusOr<perftools::gputools::Platform*> GetPlatform(
+      const string& platform_name);
+
   // Returns a vector of StreamExecutors for the given platform. The vector is
   // indexed by device ordinal (device numbering used by StreamExecutor). If an
   // element is nullptr, then the device is present by not supported by XLA.
-- 
GitLab


From 58d0e5b6c3cf48acb68c87d2deca0e304b075b1a Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Wed, 22 Nov 2017 17:18:52 -0800
Subject: [PATCH 0272/1225] Add persistent GradientTape support

Added two simple tests for persistent tapes and did a manual test
that calling "del" on gradient tape releases all tensors.

Also:
 - Add missing Py_DECREF to error case in MakeTensorIDList
 - Make a couple error messages more descriptive
PiperOrigin-RevId: 176718477
---
 tensorflow/c/eager/tape.h                 | 66 +++++++++++++++++------
 tensorflow/python/eager/backprop.py       | 38 +++++++++++--
 tensorflow/python/eager/backprop_test.py  | 31 +++++++++++
 tensorflow/python/eager/pywrap_tfe.h      |  3 +-
 tensorflow/python/eager/pywrap_tfe_src.cc | 13 +++--
 tensorflow/python/eager/tape.py           |  4 +-
 6 files changed, 128 insertions(+), 27 deletions(-)

diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 84b40a1819..f52248e7d5 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -106,6 +106,12 @@ class VSpace {
 
   // Deletes the input tensor.
   virtual void DeleteGradient(Gradient* gradient) const = 0;
+
+  // Lets this VSpace know that it can release resources held by the
+  // `backward_function`, It will not be called again.
+  // `backward_function` must not be null.
+  virtual void ReleaseBackwardFunction(
+      BackwardFunction* backward_function) const = 0;
 };
 
 // Traces the execution of operations, doing eager garbage collection, and
@@ -113,7 +119,11 @@ class VSpace {
 template <typename Gradient, typename BackwardFunction>
 class GradientTape {
  public:
-  GradientTape() {}
+  // If `persistent` is true, GradientTape will not eagerly delete backward
+  // functions (and hence the tensors they keep alive). Instead, everything
+  // is deleted in ~GradientTape. Persistent GradientTapes are useful when
+  // users want to compute multiple gradients over the same tape.
+  GradientTape(bool persistent) : persistent_(persistent) {}
   ~GradientTape() {
     for (const auto& pair : op_tape_) {
       pair.second.backward_function_deleter();
@@ -150,6 +160,10 @@ class GradientTape {
   // Map from tensor id to number of remaining usages (i.e. how many entries in
   // the tape refer to it); to aid in tape garbage collection.
   std::unordered_map<int64, int64> tensor_usage_;
+
+  // If true, all activations are deleted in the first call to ComputeGradient.
+  // Else, only when this is destructed.
+  bool persistent_;
 };
 
 // Template instantiations here
@@ -279,11 +293,16 @@ struct BackpropInitialState {
   std::unordered_map<int64, int64> op_missing_tensor;
 };
 
+// If `persistent_tape` is true, op_tape is not changed and none of the
+// backwards functions are deleted.
+// If `persistent_tape` is false, op_tape is cleared and backwards functions
+// not needed for gradient computation are deleted. Backwards functions that
+// are needed, are copied and returned in BackpropInitialState.
 template <typename BackwardFunction>
 BackpropInitialState<BackwardFunction> PrepareBackprop(
     gtl::ArraySlice<int64> target, const TensorTape& tensor_tape,
-    OpTape<BackwardFunction> op_tape,
-    const std::unordered_set<int64>& sources_set) {
+    OpTape<BackwardFunction>* op_tape,
+    const std::unordered_set<int64>& sources_set, bool persistent_tape) {
   std::vector<int64> tensor_stack;
   tensor_stack.reserve(target.size());
   for (auto t : target) {
@@ -298,9 +317,9 @@ BackpropInitialState<BackwardFunction> PrepareBackprop(
       continue;
     }
     int64 op_id = op_id_it->second;
-    auto op_it = op_tape.find(op_id);
+    auto op_it = op_tape->find(op_id);
     auto result_op_it = result.op_tape.find(op_id);
-    if (op_id == -1 || op_it == op_tape.end() ||
+    if (op_id == -1 || op_it == op_tape->end() ||
         result_op_it != result.op_tape.end()) {
       continue;
     }
@@ -317,7 +336,9 @@ BackpropInitialState<BackwardFunction> PrepareBackprop(
         }
       }
     }
-    op_tape.erase(op_it);
+    if (!persistent_tape) {
+      op_tape->erase(op_it);
+    }
   }
   for (auto& pair : result.tensor_usage_counts) {
     auto it = tensor_tape.find(pair.first);
@@ -325,9 +346,15 @@ BackpropInitialState<BackwardFunction> PrepareBackprop(
       result.op_missing_tensor[it->second] += 1;
     }
   }
-  // Call destructors for all unneeded gradient functions.
-  for (const auto& op_pair : op_tape) {
-    op_pair.second.backward_function_deleter();
+  if (!persistent_tape) {
+    // Call destructors for all unneeded gradient functions and
+    // clear the op_tape. We can clear the tape because ownership of
+    // backward functions that will be used for gradient computation
+    // has been transfered to `result`.
+    for (const auto& op_pair : *op_tape) {
+      op_pair.second.backward_function_deleter();
+    }
+    op_tape->clear();
   }
   return result;
 }
@@ -369,7 +396,8 @@ Status InitialGradients(
           auto op_it = op_tape.find(tensor_it->second);
           if (op_it == op_tape.end()) {
             return errors::Internal(
-                "Internal state of the gradient tape is invalid.");
+                "Internal state of the gradient tape is invalid: "
+                "failed to find operation producing a tensor");
           }
           bool found = false;
           for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) {
@@ -383,7 +411,8 @@ Status InitialGradients(
           }
           if (!found) {
             return errors::Internal(
-                "Internal state of the gradient tape is invalid.");
+                "Internal state of the gradient tape is invalid: "
+                "none of operations outputs match expected tensor");
           }
         } else {
           // No record of the target tensor found on the tape, so no gradient
@@ -415,17 +444,19 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
   std::unordered_set<int64> sources_set(source_tensor_ids.begin(),
                                         source_tensor_ids.end());
   BackpropInitialState<BackwardFunction> state = PrepareBackprop(
-      target_tensor_ids, tensor_tape_, std::move(op_tape_), sources_set);
+      target_tensor_ids, tensor_tape_, &op_tape_, sources_set, persistent_);
   std::vector<int64> op_stack =
       InitialStack(state.op_tape, state.op_missing_tensor);
   std::unordered_map<int64, std::vector<Gradient*>> gradients;
   Status s = InitialGradients(vspace, target_tensor_ids, output_gradients,
                               tensor_tape_, state.op_tape,
                               state.tensor_usage_counts, &gradients);
-  auto cleanup = [&state]() {
-    // Release all backprop functions
-    for (const auto& pair : state.op_tape) {
-      pair.second.backward_function_deleter();
+  auto cleanup = [this, &state]() {
+    if (!persistent_) {
+      // Release all backprop functions
+      for (const auto& pair : state.op_tape) {
+        pair.second.backward_function_deleter();
+      }
     }
   };
   if (!s.ok()) {
@@ -484,6 +515,9 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
     std::vector<Gradient*> in_gradients;
     Status s = vspace.CallBackwardFunction(trace.backward_function,
                                            out_gradients, &in_gradients);
+    if (!persistent_) {
+      vspace.ReleaseBackwardFunction(trace.backward_function);
+    }
     if (!s.ok()) {
       cleanup();
       return s;
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 25f7ae785e..0144f3b1e5 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -798,13 +798,41 @@ class GradientTape(object):
   grad = g.gradient(y, [x])[0]
   assert grad.numpy() == 6.0
   ```
+
+  By default, the resources held by a GradientTape are released as soon as
+  GradientTape.gradient() method is called. However, if one need to compute
+  multiple gradients over the same computation, she can create a persistent
+  GradientTape. Persistent tapes allow multiple calls to the gradient() method
+  and release resources when the tape object is destructed.
+
+  Example usage:
+
+  ```python
+  with tfe.GradientTape(persistent=True) as g:
+    x = tf.constant(3.0)
+    g.watch(x)
+    y = x * x
+    z = y * y
+  dz_dx = g.gradient(z, [x])[0]
+  assert dz_dx.numpy() == 108.0   # 4*x^3 at x = 3
+  dy_dx = g.gradient(y, [x])[0]
+  assert dy_dx.numpy() == 6.0
+  del g  # Drop the reference to the tape
   """
 
-  def __init__(self):
+  def __init__(self, persistent=False):
+    """Creates a new GradientTape.
+
+    Args:
+      persistent: Boolean controlling whether a persistent gradient tape
+        is created. Must be True or False.
+
+    """
     self._tape = None
+    self._persistent = persistent
 
   def __enter__(self):
-    tape.push_new_tape()
+    tape.push_new_tape(persistent=self._persistent)
     return self
 
   def __exit__(self, typ, value, traceback):
@@ -838,12 +866,14 @@ class GradientTape(object):
        than once.
     """
     if self._tape is None:
-      raise RuntimeError("GradientTape.gradient can only be called once, and "
+      raise RuntimeError("GradientTape.gradient can only be called once "
+                         "on non-persistent tapes, and "
                          "only when the context manager has exited.")
     sources = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable)
                else x
                for x in sources]
     grad = imperative_grad.imperative_grad(
         _default_vspace, self._tape, [target], sources)
-    self._tape = None
+    if not self._persistent:
+      self._tape = None
     return grad
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index e18ebba785..9816dd022e 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -314,6 +314,37 @@ class BackpropTest(test.TestCase):
         RuntimeError, 'GradientTape.gradient can only be called once'):
       g.gradient(y, [x])
 
+  def testPersistentTape(self):
+    with backprop.GradientTape(persistent=True) as g:
+      x = constant_op.constant(3.0)
+      g.watch(x)
+      y = x * x
+      z = y * y
+    dz_dx = g.gradient(z, [x])[0]
+    self.assertEqual(dz_dx.numpy(), 4*3*3*3)
+    dy_dx = g.gradient(y, [x])[0]
+    self.assertEqual(dy_dx.numpy(), 2*3)
+    del g
+
+  def testPersistentNestedTape(self):
+    with backprop.GradientTape(persistent=True) as g:
+      x = constant_op.constant(3.0)
+      g.watch(x)
+      y = x * x
+      with backprop.GradientTape(persistent=True) as gg:
+        gg.watch(y)
+        z = 2 * y
+      for _ in range(2):
+        inner_grad = gg.gradient(z, [y])[0]
+        self.assertEqual(inner_grad.numpy(), 2.0)
+      y += inner_grad
+      del gg
+    grad = g.gradient(y, [x])[0]
+    self.assertEqual(grad.numpy(), 6.0)
+    grad = g.gradient(z, [x])[0]
+    self.assertEqual(grad.numpy(), 12.0)
+    del g
+
   def testGradientTapeVariable(self):
     v = resource_variable_ops.ResourceVariable(1.0, name='v')
     with backprop.GradientTape() as g:
diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h
index f96245f7a5..a33b17ada6 100644
--- a/tensorflow/python/eager/pywrap_tfe.h
+++ b/tensorflow/python/eager/pywrap_tfe.h
@@ -88,7 +88,8 @@ TFE_TensorHandle* EagerTensor_Handle(const PyObject* o);
 PyObject* TFE_Py_InitEagerTensor(PyObject* base_class);
 
 // Pushes a new tape into the thread-local stack.
-void TFE_Py_TapeStackPushNew();
+// `persistent` must be a PyBool_Type, i.e either Py_True or Py_False
+void TFE_Py_TapeStackPushNew(PyObject* persistent);
 
 // Pops the tape from the top of the stack and returns it.
 PyObject* TFE_Py_TapeStackPop();
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 0a0749fd4b..ce823cb567 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -469,7 +469,8 @@ static tensorflow::int64 FastTensorId(PyObject* tensor) {
 class GradientTape
     : public tensorflow::eager::GradientTape<PyObject, PyObject> {
  public:
-  GradientTape() {}
+  explicit GradientTape(bool persistent)
+      : tensorflow::eager::GradientTape<PyObject, PyObject>(persistent) {}
 
   void WatchVariable(PyObject* v) {
     watched_variables_.insert(v);
@@ -557,11 +558,11 @@ std::vector<TFE_Py_Tape*>* GetTapeStack() {
 }
 #endif
 
-void TFE_Py_TapeStackPushNew() {
+void TFE_Py_TapeStackPushNew(PyObject* persistent) {
   TFE_Py_Tape_Type.tp_new = PyType_GenericNew;
   if (PyType_Ready(&TFE_Py_Tape_Type) < 0) return;
   TFE_Py_Tape* tape = PyObject_NEW(TFE_Py_Tape, &TFE_Py_Tape_Type);
-  tape->tape = new GradientTape();
+  tape->tape = new GradientTape(persistent == Py_True);
   GetTapeStack()->push_back(tape);
 }
 
@@ -704,6 +705,7 @@ std::vector<tensorflow::int64> MakeTensorIDList(PyObject* tensors) {
     PyObject* tensor = PySequence_Fast_GET_ITEM(seq, i);
     list.push_back(FastTensorId(tensor));
     if (PyErr_Occurred()) {
+      Py_DECREF(seq);
       return list;
     }
   }
@@ -889,7 +891,6 @@ class PyVSpace : public tensorflow::eager::VSpace<PyObject, PyObject> {
     PyObject* py_result = PyEval_CallObject(
         reinterpret_cast<PyObject*>(backward_function), grads);
     Py_DECREF(grads);
-    Py_DECREF(backward_function);
     if (py_result == nullptr) {
       return tensorflow::errors::Internal("gradient function threw exceptions");
     }
@@ -917,6 +918,10 @@ class PyVSpace : public tensorflow::eager::VSpace<PyObject, PyObject> {
     return tensorflow::Status::OK();
   }
 
+  void ReleaseBackwardFunction(PyObject* backward_function) const final {
+    Py_DECREF(backward_function);
+  }
+
   void DeleteGradient(PyObject* tensor) const final { Py_XDECREF(tensor); }
 
  private:
diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py
index 440c84b7ea..14b5238f74 100644
--- a/tensorflow/python/eager/tape.py
+++ b/tensorflow/python/eager/tape.py
@@ -33,9 +33,9 @@ class Tape(object):
     return pywrap_tensorflow.TFE_Py_TapeWatchedVariables(self._tape)
 
 
-def push_new_tape():
+def push_new_tape(persistent=False):
   """Pushes a new tape onto the tape stack."""
-  pywrap_tensorflow.TFE_Py_TapeStackPushNew()
+  pywrap_tensorflow.TFE_Py_TapeStackPushNew(persistent)
 
 
 def watch(tensor):
-- 
GitLab


From f25abbfb25441bec198ca7517485fbab63f07be1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 17:40:42 -0800
Subject: [PATCH 0273/1225] Minor cleanup: remove unnecessary GetCudaContext.

Note that there is no protection against a caller of CUDAExecutor::Launch from
accidentally passing a Stream associated with the wrong CUDAExecutor.

This is no different from any other CUDAExecutor methods that take a Stream
argument, where we similarly have no such protection.

The main caller is Stream::ThenLaunch, which necessarily calls Launch on the
correct corresponding CUDAExecutor. Other callers use a similar pattern.

PiperOrigin-RevId: 176719918
---
 .../stream_executor/cuda/cuda_gpu_executor.cc     | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 6c522264e1..64d14f29df 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -108,11 +108,6 @@ static CUdeviceptr AsCudaDevicePtr(DeviceMemoryBase *gpu_mem) {
   return AsCudaDevicePtr(*gpu_mem);
 }
 
-static CudaContext* GetCudaContext(Stream *stream) {
-  return static_cast<CUDAExecutor *>(stream->parent()->implementation())
-      ->cuda_context();
-}
-
 CudaContext* ExtractCudaContext(CUDAExecutor *cuda_exec) {
   CHECK(cuda_exec != nullptr);
   return cuda_exec->cuda_context();
@@ -380,11 +375,11 @@ bool CUDAExecutor::Launch(Stream *stream, const ThreadDim &thread_dims,
 
   void **kernel_params = const_cast<void **>(args.argument_addresses().data());
 
-  if (!CUDADriver::LaunchKernel(GetCudaContext(stream), cufunc, block_dims.x,
-                                block_dims.y, block_dims.z, thread_dims.x,
-                                thread_dims.y, thread_dims.z,
-                                args.number_of_shared_bytes(), custream,
-                                kernel_params, nullptr /* = extra */)) {
+  if (!CUDADriver::LaunchKernel(context_, cufunc, block_dims.x, block_dims.y,
+                                block_dims.z, thread_dims.x, thread_dims.y,
+                                thread_dims.z, args.number_of_shared_bytes(),
+                                custream, kernel_params,
+                                nullptr /* = extra */)) {
     LOG(ERROR) << "failed to launch CUDA kernel with args: "
                << args.number_of_arguments()
                << "; thread dim: " << thread_dims.ToString()
-- 
GitLab


From 4e5534d3d35a72b87902212e2847ca2871cc7b75 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 22 Nov 2017 17:53:40 -0800
Subject: [PATCH 0274/1225] Prevented a couple of memory leaks in the code
 generated by swig

PiperOrigin-RevId: 176720721
---
 tensorflow/python/BUILD                    |  5 +-
 tensorflow/python/grappler/cluster.i       | 87 ++++++++++++++--------
 tensorflow/python/grappler/cluster.py      |  4 +-
 tensorflow/python/grappler/cost_analyzer.i | 11 +--
 tensorflow/python/grappler/item.i          | 44 ++++++++---
 tensorflow/python/grappler/tf_optimizer.i  | 14 +---
 tensorflow/python/grappler/tf_optimizer.py |  6 +-
 7 files changed, 110 insertions(+), 61 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 54c43c1337..9d3974b98e 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -4350,7 +4350,10 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
-    deps = [":pywrap_tensorflow_internal"],
+    deps = [
+        ":pywrap_tensorflow_internal",
+        ":tf_cluster",
+    ],
 )
 
 py_test(
diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i
index 5a7cdf26f8..18fda345e6 100644
--- a/tensorflow/python/grappler/cluster.i
+++ b/tensorflow/python/grappler/cluster.i
@@ -14,6 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 %include "tensorflow/python/platform/base.i"
+%include <std_shared_ptr.i>
+%include "item.i"
+
+// Wrap the cluster into an object that swig can manipulate. This ensures it will call the object
+// destructor upon garbage collection instead of leaking memory.
+struct GCluster {
+  std::shared_ptr<tensorflow::grappler::Cluster> cluster_;
+};
 
 %{
 #include "tensorflow/core/protobuf/device_properties.pb.h"
@@ -72,6 +80,7 @@ bool _PyObjAs(PyObject *input, tensorflow::NamedDevice *out) {
 }
 
 %{
+#include <memory>
 #include <vector>
 #include "tensorflow/core/grappler/devices.h"
 #include "tensorflow/core/grappler/clusters/single_machine.h"
@@ -82,39 +91,56 @@ bool _PyObjAs(PyObject *input, tensorflow::NamedDevice *out) {
 #include "tensorflow/core/grappler/costs/utils.h"
 #include "tensorflow/core/protobuf/device_properties.pb.h"
 
-static tensorflow::grappler::Cluster* TF_NewCluster(
-    bool allow_soft_placement,
-    bool disable_detailed_stats, TF_Status* out_status) {
-  int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores();
-  int num_gpus = tensorflow::grappler::GetNumAvailableGPUs();;
+// Provide the implementation of the GCluster struct here.
+struct GCluster {
+  GCluster() {}
+  GCluster(tensorflow::grappler::Cluster* cluster) : cluster_(cluster) {}
+
+  tensorflow::grappler::Cluster* operator->() const {
+    return cluster_.get();
+  }
+  tensorflow::grappler::Cluster* get() const {
+    return cluster_.get();
+  }
+  bool is_none() const {
+    return cluster_.get() == nullptr;
+  }
+
+  std::shared_ptr<tensorflow::grappler::Cluster> cluster_;
+};
+
+
+static GCluster TF_NewCluster(bool allow_soft_placement,
+                   bool disable_detailed_stats, TF_Status* out_status) {
+    int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores();
+  int num_gpus = tensorflow::grappler::GetNumAvailableGPUs();
   int timeout_s = 60 * 10;
-  tensorflow::grappler::Cluster* cluster =
+  tensorflow::grappler::Cluster* cluster_ =
       new tensorflow::grappler::SingleMachine(
           timeout_s, num_cpu_cores, num_gpus);
-  cluster->DisableDetailedStats(disable_detailed_stats);
-  cluster->AllowSoftPlacement(allow_soft_placement);
-  tensorflow::Status status = cluster->Provision();
+  cluster_->DisableDetailedStats(disable_detailed_stats);
+  cluster_->AllowSoftPlacement(allow_soft_placement);
+  tensorflow::Status status = cluster_->Provision();
   tensorflow::Set_TF_Status_from_Status(out_status, status);
-  return cluster;
+  return GCluster(cluster_);
 }
 
-static tensorflow::grappler::Cluster* TF_NewVirtualCluster(
+static GCluster TF_NewVirtualCluster(
     const std::vector<tensorflow::NamedDevice>& named_devices,
     TF_Status* out_status) {
   std::unordered_map<string, tensorflow::DeviceProperties> devices;
   for (const auto& named_device : named_devices) {
     devices[named_device.name()]= named_device.properties();
   }
-  tensorflow::grappler::Cluster* cluster =
+  tensorflow::grappler::Cluster*cluster_ =
       new tensorflow::grappler::VirtualCluster(devices);
-  tensorflow::Status status = cluster->Provision();
+  tensorflow::Status status = cluster_->Provision();
   tensorflow::Set_TF_Status_from_Status(out_status, status);
-  return cluster;
+  return GCluster(cluster_);
 }
 
-static void TF_DeleteCluster(tensorflow::grappler::Cluster* cluster) {
+static void TF_ShutdownCluster(GCluster cluster) {
   cluster->Shutdown();
-  delete cluster;
 }
 
 tensorflow::Status _GetOpPerformanceDataAndRunTime(
@@ -136,7 +162,7 @@ tensorflow::Status _GetOpPerformanceDataAndRunTime(
   return tensorflow::Status::OK();
 }
 
-static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) {
+static PyObject* TF_ListDevices(GCluster cluster) {
   const std::unordered_map<string, tensorflow::DeviceProperties>& devices = cluster->GetDevices();
   PyGILState_STATE gstate = PyGILState_Ensure();
   PyObject* result = PyList_New(devices.size());
@@ -156,13 +182,13 @@ static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster) {
 }
 
 static PyObject* TF_MeasureCosts(
-    const tensorflow::grappler::GrapplerItem* item,
-    tensorflow::grappler::Cluster* cluster,
+    GItem item,
+    GCluster cluster,
     bool generate_timeline, TF_Status* out_status) {
   tensorflow::OpPerformanceList op_performance_data;
   tensorflow::StepStats step_stats;
 
-  tensorflow::grappler::MeasuringCostEstimator cost_measure(cluster, 10, 0);
+  tensorflow::grappler::MeasuringCostEstimator cost_measure(cluster.get(), 10, 0);
 
   tensorflow::grappler::Costs costs;
   tensorflow::Status status = _GetOpPerformanceDataAndRunTime(
@@ -223,10 +249,10 @@ static PyObject* TF_MeasureCosts(
 
 
 static PyObject* TF_DeterminePeakMemoryUsage(
-    const tensorflow::grappler::GrapplerItem* item,
-    tensorflow::grappler::Cluster* cluster,
+    GItem item,
+    GCluster cluster,
     TF_Status* out_status) {
-  if (!item || !cluster) {
+  if (item.is_none() || cluster.is_none()) {
     tensorflow::Status status(tensorflow::error::Code::INTERNAL,
                               "You need both a cluster and an item to determine peak memory usage");
     tensorflow::Set_TF_Status_from_Status(out_status, status);
@@ -236,7 +262,7 @@ static PyObject* TF_DeterminePeakMemoryUsage(
 
   tensorflow::Status status;
   if (cluster->DetailedStatsEnabled()) {
-    status = memory.InferDynamically(cluster);
+    status = memory.InferDynamically(cluster.get());
   } else {
     status = memory.InferStatically(cluster->GetDevices());
   }
@@ -274,18 +300,17 @@ static PyObject* TF_DeterminePeakMemoryUsage(
 %}
 
 // Wrap these functions.
-
-static tensorflow::grappler::Cluster* TF_NewCluster(
+static GCluster TF_NewCluster(
     bool allow_soft_placement, bool disable_detailed_stats, TF_Status* out_status);
-static tensorflow::grappler::Cluster* TF_NewVirtualCluster(
+static GCluster TF_NewVirtualCluster(
     const std::vector<tensorflow::NamedDevice>& named_devices,
     TF_Status* out_status);
-static void TF_DeleteCluster(tensorflow::grappler::Cluster* cluster);
-static PyObject* TF_ListDevices(tensorflow::grappler::Cluster* cluster);
+static void TF_ShutdownCluster(GCluster cluster);
+static PyObject* TF_ListDevices(GCluster cluster);
 static PyObject* TF_MeasureCosts(
-    const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster,
+    GItem item, GCluster cluster,
     bool generate_timeline, TF_Status* out_status);
 static PyObject* TF_DeterminePeakMemoryUsage(
-    const tensorflow::grappler::GrapplerItem* item, tensorflow::grappler::Cluster* cluster,
+    GItem item, GCluster cluster,
     TF_Status* out_status);
 
diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py
index 496f5255b9..cf795fddb7 100644
--- a/tensorflow/python/grappler/cluster.py
+++ b/tensorflow/python/grappler/cluster.py
@@ -46,6 +46,7 @@ class Cluster(object):
         the local machine.
     """
     self._tf_cluster = None
+    self._generate_timeline = not disable_timeline
     with errors.raise_exception_on_not_ok_status() as status:
       if devices is None:
         self._tf_cluster = tf_cluster.TF_NewCluster(
@@ -54,11 +55,10 @@ class Cluster(object):
         devices_serialized = [device.SerializeToString() for device in devices]
         self._tf_cluster = tf_cluster.TF_NewVirtualCluster(
             devices_serialized, status)
-    self._generate_timeline = not disable_timeline
 
   def __del__(self):
     if self._tf_cluster is not None:
-      tf_cluster.TF_DeleteCluster(self._tf_cluster)
+      tf_cluster.TF_ShutdownCluster(self._tf_cluster)
 
   @property
   def tf_cluster(self):
diff --git a/tensorflow/python/grappler/cost_analyzer.i b/tensorflow/python/grappler/cost_analyzer.i
index 0318ff762c..4c0953435b 100644
--- a/tensorflow/python/grappler/cost_analyzer.i
+++ b/tensorflow/python/grappler/cost_analyzer.i
@@ -15,6 +15,7 @@ limitations under the License.
 
 %include "tensorflow/python/lib/core/strings.i"
 %include "tensorflow/python/platform/base.i"
+%include "cluster.i"
 
 %typemap(in) const tensorflow::MetaGraphDef& (tensorflow::MetaGraphDef temp) {
   char* c_string;
@@ -42,8 +43,8 @@ limitations under the License.
 %}
 
 %{
-string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool
-per_node_report, tensorflow::grappler::Cluster* cluster) {
+string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool per_node_report,
+                          GCluster cluster) {
   tensorflow::grappler::ItemConfig cfg;
   cfg.apply_optimizations = false;
   std::unique_ptr<tensorflow::grappler::GrapplerItem> item =
@@ -53,7 +54,7 @@ per_node_report, tensorflow::grappler::Cluster* cluster) {
   }
 
   string suffix;
-  tensorflow::grappler::CostAnalyzer analyzer(*item, cluster, suffix);
+  tensorflow::grappler::CostAnalyzer analyzer(*item, cluster.get(), suffix);
 
   std::stringstream os;
   analyzer.GenerateReport(os, per_node_report);
@@ -62,5 +63,5 @@ per_node_report, tensorflow::grappler::Cluster* cluster) {
 
 %}
 
-string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool
-                          per_node_report, tensorflow::grappler::Cluster* cluster);
+string GenerateCostReport(const tensorflow::MetaGraphDef& metagraph, bool per_node_report,
+                          GCluster cluster);
diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i
index 2fa502b81d..7dd79f7c82 100644
--- a/tensorflow/python/grappler/item.i
+++ b/tensorflow/python/grappler/item.i
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+%include <std_shared_ptr.i>
 %typemap(in) const tensorflow::MetaGraphDef& (tensorflow::MetaGraphDef temp) {
   char* c_string;
   Py_ssize_t py_size;
@@ -30,7 +31,12 @@ limitations under the License.
   $1 = &temp;
 }
 
-%newobject TF_NewItem;
+// Wrap the item into an object that swig can manipulate. This ensures it will call the object
+// destructor upon garbage collection instead of leaking memory.
+struct GItem {
+  std::shared_ptr<tensorflow::grappler::GrapplerItem> item_;
+};
+
 
 %{
 #include <unordered_set>
@@ -42,8 +48,26 @@ limitations under the License.
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+
+// Provide the implementation fo the GItem struct here.
+struct GItem {
+  GItem() {}
+  GItem(tensorflow::grappler::GrapplerItem* item) : item_(item) {}
+
+  tensorflow::grappler::GrapplerItem* operator->() const {
+    return item_.get();
+  }
+  const tensorflow::grappler::GrapplerItem& operator*() const {
+    return *item_.get();
+  }
+  bool is_none() const {
+    return item_.get() == nullptr;
+  }
+  std::shared_ptr<tensorflow::grappler::GrapplerItem> item_;
+};
 
-static tensorflow::grappler::GrapplerItem* TF_NewItem(
+static GItem TF_NewItem(
     const tensorflow::MetaGraphDef& meta_graph, bool ignore_colocation,
     bool ignore_user_placement, TF_Status* out_status) {
   if (meta_graph.collection_def().count("train_op") == 0) {
@@ -65,11 +89,11 @@ static tensorflow::grappler::GrapplerItem* TF_NewItem(
     return nullptr;
   }
   tensorflow::Set_TF_Status_from_Status(out_status, tensorflow::Status::OK());
-  return item.release();
+  return GItem(item.release());
 }
 
-static std::vector<string> TF_IdentifyImportantOps(const tensorflow::grappler::GrapplerItem* item) {
-  if (!item) {
+static std::vector<string> TF_IdentifyImportantOps(GItem item) {
+  if (item.is_none()) {
     return {};
   }
 
@@ -91,8 +115,8 @@ static std::vector<string> TF_IdentifyImportantOps(const tensorflow::grappler::G
   return ops;
 }
 
-static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* item) {
-  if (!item) {
+static PyObject* TF_GetOpProperties(GItem item) {
+  if (item.is_none()) {
     Py_RETURN_NONE;
   }
   tensorflow::grappler::GraphProperties properties(*item);
@@ -125,8 +149,8 @@ static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* it
 
 
 // Wrap these functions.
-static tensorflow::grappler::GrapplerItem* TF_NewItem(
+static GItem TF_NewItem(
     const tensorflow::MetaGraphDef& meta_graph, bool ignore_colocation,
     bool ignore_user_placement, TF_Status* out_status);
-static std::vector<string> TF_IdentifyImportantOps(const tensorflow::grappler::GrapplerItem* item);
-static PyObject* TF_GetOpProperties(const tensorflow::grappler::GrapplerItem* item);
+static std::vector<string> TF_IdentifyImportantOps(GItem item);
+static PyObject* TF_GetOpProperties(GItem item);
diff --git a/tensorflow/python/grappler/tf_optimizer.i b/tensorflow/python/grappler/tf_optimizer.i
index 3965c65bb9..f0dd4483a6 100644
--- a/tensorflow/python/grappler/tf_optimizer.i
+++ b/tensorflow/python/grappler/tf_optimizer.i
@@ -15,6 +15,7 @@ limitations under the License.
 
 
 %include "tensorflow/python/platform/base.i"
+%include "cluster.i"
 
 %typemap(in) const tensorflow::MetaGraphDef& (tensorflow::MetaGraphDef temp) {
   char* c_string;
@@ -92,7 +93,7 @@ void DetectDevices(std::unordered_map<string, tensorflow::DeviceProperties>* dev
 }
 
 PyObject* TF_OptimizeGraph(
-      tensorflow::grappler::Cluster* cluster,
+      GCluster cluster,
       const tensorflow::RewriterConfig& rewriter_config,
       const tensorflow::MetaGraphDef& metagraph,
       bool verbose, const string& graph_id, TF_Status* out_status) {
@@ -102,17 +103,10 @@ PyObject* TF_OptimizeGraph(
     std::unique_ptr<tensorflow::grappler::GrapplerItem> grappler_item =
         tensorflow::grappler::GrapplerItemFromMetaGraphDef(graph_id, metagraph, item_config);
 
-    std::unique_ptr<tensorflow::grappler::VirtualCluster> virtual_cluster;
-    if (cluster == nullptr) {
-      std::unordered_map<string, tensorflow::DeviceProperties> device_map;
-      DetectDevices(&device_map);
-      virtual_cluster.reset(new tensorflow::grappler::VirtualCluster(device_map));
-      cluster = virtual_cluster.get();
-    }
     tensorflow::DeviceBase* cpu_device = nullptr;
     tensorflow::GraphDef out_graph;
     tensorflow::grappler::MetaOptimizer optimizer(cpu_device, rewriter_config);
-    tensorflow::Status status = optimizer.Optimize(cluster, *grappler_item, &out_graph);
+    tensorflow::Status status = optimizer.Optimize(cluster.get(), *grappler_item, &out_graph);
     if (verbose) {
       optimizer.PrintResult();
     }
@@ -127,7 +121,7 @@ PyObject* TF_OptimizeGraph(
 
 // Wrap this function
 PyObject* TF_OptimizeGraph(
-    tensorflow::grappler::Cluster* cluster,
+    GCluster cluster,
     const tensorflow::RewriterConfig& rewriter_config,
     const tensorflow::MetaGraphDef& metagraph, bool verbose,
     const string& graph_id, TF_Status* out_status);
diff --git a/tensorflow/python/grappler/tf_optimizer.py b/tensorflow/python/grappler/tf_optimizer.py
index d430dd9e2f..a73a4a98fc 100644
--- a/tensorflow/python/grappler/tf_optimizer.py
+++ b/tensorflow/python/grappler/tf_optimizer.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 from tensorflow.core.framework import graph_pb2
 from tensorflow.python import pywrap_tensorflow as tf_opt
 from tensorflow.python.framework import errors
+from tensorflow.python.grappler import cluster as gcluster
 
 
 def OptimizeGraph(rewriter_config,
@@ -30,8 +31,9 @@ def OptimizeGraph(rewriter_config,
                   cluster=None):
   """Optimize the provided metagraph."""
   with errors.raise_exception_on_not_ok_status() as status:
-    ret_from_swig = tf_opt.TF_OptimizeGraph(None if cluster is None else
-                                            cluster.tf_cluster,
+    if cluster is None:
+      cluster = gcluster.Cluster()
+    ret_from_swig = tf_opt.TF_OptimizeGraph(cluster.tf_cluster,
                                             rewriter_config.SerializeToString(),
                                             metagraph.SerializeToString(),
                                             verbose, graph_id, status)
-- 
GitLab


From b76620aed0c02d01a823df57e06a67bc4c1424c0 Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Wed, 22 Nov 2017 17:58:34 -0800
Subject: [PATCH 0275/1225] Default to previously specified variables when
 minimizing with KfacOptimizer

If no variables are specified to minimize or compute_gradients, the default was previously to use all trainable variables. However, KfacOptimizer has a list of variables it is able to train, so we should use that instead.

PiperOrigin-RevId: 176720954
---
 .../contrib/kfac/python/ops/optimizer.py       | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py
index 98f8e7b230..ecf7f3e4e5 100644
--- a/tensorflow/contrib/kfac/python/ops/optimizer.py
+++ b/tensorflow/contrib/kfac/python/ops/optimizer.py
@@ -151,16 +151,24 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
     return self._fisher_est.damping
 
   def minimize(self, *args, **kwargs):
-
-    if "var_list" not in kwargs:
-      kwargs["var_list"] = tf_variables.trainable_variables()
-
+    kwargs["var_list"] = kwargs.get("var_list") or self.variables
     if set(kwargs["var_list"]) != set(self.variables):
       raise ValueError("var_list doesn't match with set of Fisher-estimating "
                        "variables.")
-
     return super(KfacOptimizer, self).minimize(*args, **kwargs)
 
+  def compute_gradients(self, *args, **kwargs):
+    # args[1] could be our var_list
+    if len(args) > 1:
+      var_list = args[1]
+    else:
+      kwargs["var_list"] = kwargs.get("var_list") or self.variables
+      var_list = kwargs["var_list"]
+    if set(var_list) != set(self.variables):
+      raise ValueError("var_list doesn't match with set of Fisher-estimating "
+                       "variables.")
+    return super(KfacOptimizer, self).compute_gradients(*args, **kwargs)
+
   def apply_gradients(self, grads_and_vars, *args, **kwargs):
     """Applies gradients to variables.
 
-- 
GitLab


From c714df4c87466e632be4c78f8f55a2ffe47fef62 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 22 Nov 2017 18:03:05 -0800
Subject: [PATCH 0276/1225] Remove duplicate op registration.

---
 tensorflow/core/kernels/depthwise_conv_op.cc | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index 30ecd0c2ba..2759ecb2f1 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -427,11 +427,6 @@ TF_CALL_double(REGISTER_CPU_KERNEL);
 #endif
 
 #if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Eigen::half>("T"),
-                        DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
-
 REGISTER_KERNEL_BUILDER(
     Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
     DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
-- 
GitLab


From ebd26397ab708242d22880f789b168eb16897691 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Wed, 22 Nov 2017 18:22:07 -0800
Subject: [PATCH 0277/1225] Do not convert layout for FusedBatchNormGrad if
 is_training is false (freeze mode), since NCHW is not supported on GPU in
 this case.

PiperOrigin-RevId: 176722850
---
 .../grappler/optimizers/layout_optimizer.cc   | 14 +++++
 .../optimizers/layout_optimizer_test.cc       | 58 +++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 31c3ba6863..d25d9d99c5 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -714,10 +714,24 @@ class FusedBatchNormGradProcessor : public NodeProcessor {
       : NodeProcessor(opt_cxt) {}
 
  protected:
+  bool ShouldProcess() const override {
+    return NodeProcessor::ShouldProcess() && IsTraining();
+  }
+
   std::vector<int> GetInputPos() const override {
     std::vector<int> input_pos = {0, 1};
     return input_pos;
   }
+
+ private:
+  bool IsTraining() const {
+    if (node_->attr().find("is_training") != node_->attr().end()) {
+      if (node_->attr().at("is_training").b()) {
+        return true;
+      }
+    }
+    return false;
+  }
 };
 
 class MaxPoolGradProcessor : public NodeProcessor {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index d4ab42ad60..20a971629c 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -114,6 +114,36 @@ class LayoutOptimizerTest : public ::testing::Test {
     return tensor;
   }
 
+  Output SimpleFusedBatchNormGrad(tensorflow::Scope* s, bool is_training) {
+    int batch_size = 16;
+    int input_height = 8;
+    int input_width = 8;
+    int input_channels = 3;
+    TensorShape shape({batch_size, input_height, input_width, input_channels});
+    Tensor data(DT_FLOAT, shape);
+    test::FillIota<float>(&data, 1.0f);
+    Output x = ops::Const(s->WithOpName("Input"), Input::Initializer(data));
+    Output y_backprop =
+        ops::Const(s->WithOpName("YBackprop"), Input::Initializer(data));
+
+    TensorShape shape_vector({input_channels});
+    Tensor data_vector(DT_FLOAT, shape_vector);
+    test::FillIota<float>(&data_vector, 2.0f);
+    Output scale =
+        ops::Const(s->WithOpName("Scale"), Input::Initializer(data_vector));
+    Output reserve1 =
+        ops::Const(s->WithOpName("Reserve1"), Input::Initializer(data_vector));
+    Output reserve2 =
+        ops::Const(s->WithOpName("Reserve2"), Input::Initializer(data_vector));
+
+    ops::FusedBatchNormGrad::Attrs attrs;
+    attrs.is_training_ = is_training;
+    auto output =
+        ops::FusedBatchNormGrad(s->WithOpName("FusedBatchNormGrad"), y_backprop,
+                                x, scale, reserve1, reserve2, attrs);
+    return output.x_backprop;
+  }
+
   std::unique_ptr<VirtualCluster> virtual_cluster_;
 };
 
@@ -341,6 +371,34 @@ TEST_F(LayoutOptimizerTest, CPUDeviceUppercase) {
   EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC");
 }
 
+TEST_F(LayoutOptimizerTest, FusedBatchNormGradTrainingTrue) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto x_backprop = SimpleFusedBatchNormGrad(&s, true);
+  Output fetch = ops::Identity(s.WithOpName("Fetch"), {x_backprop});
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto conv_node = node_map.GetNode("FusedBatchNormGrad");
+  EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NCHW");
+}
+
+TEST_F(LayoutOptimizerTest, FusedBatchNormGradTrainingFalse) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto x_backprop = SimpleFusedBatchNormGrad(&s, false);
+  Output fetch = ops::Identity(s.WithOpName("Fetch"), {x_backprop});
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto conv_node = node_map.GetNode("FusedBatchNormGrad");
+  EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 622a6ec6dc79c458aac03dafffe0f0fef48e9c01 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Wed, 22 Nov 2017 18:30:37 -0800
Subject: [PATCH 0278/1225] Replace assertAlmostEqual with assertAllClose in
 boosted_trees losses_test.py

Calling assertAlmostEqual() with the places kwarg on numpy.ndarray leads to calling __round__ on numpy.ndarray, which is no consistently defined for all relevant platforms and numpy versions.

PiperOrigin-RevId: 176723366
---
 .../boosted_trees/python/utils/losses_test.py | 35 +++++++------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/python/utils/losses_test.py b/tensorflow/contrib/boosted_trees/python/utils/losses_test.py
index dde1642686..ccb8509c03 100644
--- a/tensorflow/contrib/boosted_trees/python/utils/losses_test.py
+++ b/tensorflow/contrib/boosted_trees/python/utils/losses_test.py
@@ -18,8 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
-
 import numpy as np
 
 from tensorflow.contrib.boosted_trees.python.utils import losses
@@ -60,35 +58,27 @@ class LossesTest(test_util.TensorFlowTestCase):
       neg_loss = loss_for_negatives.eval()
       # For positive labels, points <= 0.3 get max loss of e.
       # For negative labels, these points have minimum loss of 1/e.
-      for i in range(2):
-        self.assertAlmostEqual(math.exp(1), pos_loss[i], places=4)
-        self.assertAlmostEqual(math.exp(-1), neg_loss[i], places=4)
+      self.assertAllClose(np.exp(np.ones([2, 1])), pos_loss[:2], atol=1e-4)
+      self.assertAllClose(np.exp(-np.ones([2, 1])), neg_loss[:2], atol=1e-4)
 
       # For positive lables, p oints with predictions 0.7 and larger get minimum
       # loss value of 1/e. For negative labels, these points are wrongly
       # classified and get loss e.
-      for i in range(6, 10):
-        self.assertAlmostEqual(math.exp(-1), pos_loss[i], places=4)
-        self.assertAlmostEqual(math.exp(1), neg_loss[i], places=4)
+      self.assertAllClose(np.exp(-np.ones([4, 1])), pos_loss[6:10], atol=1e-4)
+      self.assertAllClose(np.exp(np.ones([4, 1])), neg_loss[6:10], atol=1e-4)
 
       # Points in between 0.5-eps, 0..5+eps get loss exp(-label_m*y), where
       # y = 1/eps *x -1/(2eps), where x is the probability and label_m is either
       # 1 or -1 (for label of 0).
-      for i in range(2, 6):
-        self.assertAlmostEqual(
-            math.exp(-1.0 * (predictions_probs[i] * 1.0 / eps - 0.5 / eps)),
-            pos_loss[i],
-            places=4)
-        self.assertAlmostEqual(
-            math.exp(1.0 * (predictions_probs[i] * 1.0 / eps - 0.5 / eps)),
-            neg_loss[i],
-            places=4)
+      self.assertAllClose(
+          np.exp(-(predictions_probs[2:6] * 1.0 / eps - 0.5 / eps)),
+          pos_loss[2:6], atol=1e-4)
+      self.assertAllClose(
+          np.exp(predictions_probs[2:6] * 1.0 / eps - 0.5 / eps),
+          neg_loss[2:6], atol=1e-4)
 
   def test_per_example_squared_loss(self):
 
-    def _squared_loss(p, y):
-      return np.mean(1.0 * (p - y) * (p - y))
-
     labels = np.array([[0.123], [224.2], [-3], [2], [.3]], dtype=np.float32)
     weights = array_ops.ones([5, 1], dtypes.float32)
     predictions = np.array(
@@ -99,9 +89,8 @@ class LossesTest(test_util.TensorFlowTestCase):
                                                        predictions)
 
       loss = loss_tensor.eval()
-      for i in range(5):
-        self.assertAlmostEqual(
-            _squared_loss(labels[i], predictions[i]), loss[i], places=4)
+      self.assertAllClose(
+          np.square(labels[:5] - predictions[:5]), loss[:5], atol=1e-4)
 
 
 if __name__ == "__main__":
-- 
GitLab


From 34a69568752ef8badbe6aab5d1f568821c19e19c Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Wed, 22 Nov 2017 19:13:54 -0800
Subject: [PATCH 0279/1225] Fix flaky test.

PiperOrigin-RevId: 176725659
---
 tensorflow/contrib/data/python/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 995ce6d654..c017cd9c77 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -331,7 +331,7 @@ py_test(
 
 py_test(
     name = "reader_dataset_ops_test",
-    size = "small",
+    size = "medium",
     srcs = ["reader_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-- 
GitLab


From 059e35acc985e99e522ffe89df12cd357871309b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 19:28:47 -0800
Subject: [PATCH 0280/1225] Minor cleanup - replace users()[0] with
 users->front().

PiperOrigin-RevId: 176726299
---
 tensorflow/compiler/xla/service/hlo_verifier.cc          | 4 ++--
 tensorflow/compiler/xla/service/while_loop_simplifier.cc | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index f2a739c1e2..15188c4057 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -283,7 +283,7 @@ class ShapeVerifier : public DfsHloVisitor {
 
   Status HandleSend(HloInstruction* send) override {
     TF_RET_CHECK(send->users().size() == 1);
-    const HloInstruction* send_done = send->users()[0];
+    const HloInstruction* send_done = send->users().front();
     TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone);
     TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done));
     return CheckShape(
@@ -301,7 +301,7 @@ class ShapeVerifier : public DfsHloVisitor {
 
   Status HandleRecv(HloInstruction* recv) override {
     TF_RET_CHECK(recv->users().size() == 1);
-    const HloInstruction* recv_done = recv->users()[0];
+    const HloInstruction* recv_done = recv->users().front();
     TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone);
     TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done));
     return CheckShape(recv,
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index 8f335be794..b38ee907d7 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -342,7 +342,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
       //
       // Careful: HloInstruction::operand_index returns the first index the
       // operand appears in, but it may appear more than once!
-      if (user->user_count() == 1 && user->users()[0] == while_body_root &&
+      if (user->user_count() == 1 && user->users().front() == while_body_root &&
           while_body_root->operand_index(user) == user->tuple_index() &&
           std::count(while_body_root->operands().begin(),
                      while_body_root->operands().end(), user) == 1) {
@@ -444,7 +444,8 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
         // This is a GTE of an index that we've removed.  Remove it from the
         // cloned computation.
         CHECK(user->user_count() == 0 ||
-              user->user_count() == 1 && user->users()[0] == while_body_root)
+              user->user_count() == 1 &&
+                  user->users().front() == while_body_root)
             << "Instruction " << user->ToStringNoMetadata()
             << " should be unused (except by root of while body), but has "
                "users: {"
-- 
GitLab


From d73e8b36d1332723f5819d07f8c44e88c49c7cec Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Wed, 22 Nov 2017 21:20:41 -0800
Subject: [PATCH 0281/1225] Make PrefetchDataset saveable.

PiperOrigin-RevId: 176732156
---
 .../contrib/data/python/kernel_tests/BUILD    |  12 ++
 .../kernel_tests/prefetch_dataset_op_test.py  |  39 +++++
 tensorflow/core/kernels/BUILD                 |   1 +
 .../core/kernels/prefetch_dataset_op.cc       | 141 +++++++++++++++++-
 4 files changed, 186 insertions(+), 7 deletions(-)
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/prefetch_dataset_op_test.py

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index c017cd9c77..3280f1fc35 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -303,6 +303,18 @@ py_test(
     ],
 )
 
+py_test(
+    name = "prefetch_dataset_op_test",
+    size = "small",
+    srcs = ["prefetch_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":dataset_serialization_test",
+        "//tensorflow/python:platform",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
 py_test(
     name = "range_dataset_op_test",
     size = "small",
diff --git a/tensorflow/contrib/data/python/kernel_tests/prefetch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/prefetch_dataset_op_test.py
new file mode 100644
index 0000000000..3d120a3071
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/prefetch_dataset_op_test.py
@@ -0,0 +1,39 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.platform import test
+
+
+class PrefetchDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def build_dataset(self, seed):
+    return dataset_ops.Dataset.range(100).prefetch(10).shuffle(
+        buffer_size=10, seed=seed, reshuffle_each_iteration=False)
+
+  def testCore(self):
+    num_outputs = 100
+    self.run_core_tests(lambda: self.build_dataset(10),
+                        lambda: self.build_dataset(20), num_outputs)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index b4a5a3c796..b86739eea7 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -6054,6 +6054,7 @@ tf_kernel_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
     ],
 )
 
diff --git a/tensorflow/core/kernels/prefetch_dataset_op.cc b/tensorflow/core/kernels/prefetch_dataset_op.cc
index 80592aa353..1a6b7e078e 100644
--- a/tensorflow/core/kernels/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/prefetch_dataset_op.cc
@@ -14,9 +14,10 @@ limitations under the License.
 ==============================================================================*/
 #include <deque>
 
-#include "tensorflow/core/kernels/dataset.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
 
 namespace tensorflow {
 
@@ -37,14 +38,14 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(
         ctx, ParseScalarArgument<int64>(ctx, "buffer_size", &buffer_size));
 
-    *output = new Dataset(input, buffer_size);
+    *output = new Dataset(ctx, input, buffer_size);
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    Dataset(const DatasetBase* input, int64 buffer_size)
-        : input_(input), buffer_size_(buffer_size) {
+    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 buffer_size)
+        : GraphDatasetBase(ctx), input_(input), buffer_size_(buffer_size) {
       input_->Ref();
     }
 
@@ -65,6 +66,18 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel {
 
     string DebugString() override { return "PrefetchDatasetOp::Dataset"; }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+      Node* buffer_size = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(buffer_size_, &buffer_size));
+      TF_RETURN_IF_ERROR(
+          b->AddDataset(this, {input_graph_node, buffer_size}, output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -119,7 +132,10 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel {
 
             // Wake the prefetch thread, in case it has been waiting
             // for space in the buffer.
-            cond_var_.notify_one();
+            // Also wake up threads from other calls to GetNext.
+            // TODO(mrry): Consider using different condition variables
+            // for GetNext and Prefetch.
+            cond_var_.notify_all();
             return s;
           } else if (prefetch_thread_finished_) {
             *end_of_sequence = true;
@@ -128,6 +144,69 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel {
         }
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        // Acquire both locks to ensure that the prefetch thread and
+        // all GetNext threads are blocked.
+        mutex_lock parent_l(parent_mu_);
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("buffer_size"), buffer_.size()));
+        for (size_t i = 0; i < buffer_.size(); i++) {
+          auto& buffer_element = buffer_[i];
+          TF_RETURN_IF_ERROR(WriteStatus(writer, i, buffer_element.status));
+          if (buffer_element.status.ok()) {
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name(strings::StrCat("buffer[", i, "].size")),
+                buffer_element.value.size()));
+            for (size_t j = 0; j < buffer_element.value.size(); j++) {
+              TF_RETURN_IF_ERROR(writer->WriteTensor(
+                  strings::StrCat("buffer[", i, "][", j, "]"),
+                  buffer_element.value[j]));
+            }
+          }
+        }
+        return Status::OK();
+      }
+
+      Status RestoreInternal(OpKernelContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock parent_l(parent_mu_);
+        mutex_lock l(mu_);
+        buffer_.clear();
+        TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        size_t buffer_size;
+        {
+          int64 temp;
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(full_name("buffer_size"), &temp));
+          buffer_size = static_cast<size_t>(temp);
+        }
+        for (size_t i = 0; i < buffer_size; i++) {
+          buffer_.emplace_back();
+          auto& buffer_element = buffer_.back();
+          TF_RETURN_IF_ERROR(ReadStatus(reader, i, &buffer_element.status));
+          if (buffer_element.status.ok()) {
+            size_t value_size;
+            {
+              int64 temp;
+              TF_RETURN_IF_ERROR(reader->ReadScalar(
+                  full_name(strings::StrCat("buffer[", i, "].size")), &temp));
+              value_size = static_cast<size_t>(temp);
+            }
+            buffer_element.value.reserve(value_size);
+            for (size_t j = 0; j < value_size; j++) {
+              buffer_element.value.emplace_back();
+              TF_RETURN_IF_ERROR(reader->ReadTensor(
+                  strings::StrCat("buffer[", i, "][", j, "]"),
+                  &buffer_element.value.back()));
+            }
+          }
+        }
+        return Status::OK();
+      }
+
      private:
       // A buffer element comprises a status and (if that status is
       // OK) a vector of tensors, representing an element of the input dataset.
@@ -171,6 +250,12 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel {
           }
 
           // 2. Read the next element.
+          // Acquire the parent lock since we will be reading an element
+          // from the input iterator. Note that we do not wish to release
+          // this lock till we have added the fetched element to the
+          // `buffer_` else there will be local state that may be missed
+          // by SaveInternal.
+          mutex_lock parent_l(parent_mu_);
           bool end_of_sequence;
           BufferElement buffer_element;
           buffer_element.status = input_impl_->GetNext(
@@ -191,8 +276,50 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel {
         }
       }
 
+      Status WriteStatus(IteratorStateWriter* writer, size_t index,
+                         const Status& status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        TF_RETURN_IF_ERROR(writer->WriteScalar(
+            CodeKey(index), static_cast<int64>(status.code())));
+        if (!status.ok()) {
+          TF_RETURN_IF_ERROR(writer->WriteScalar(ErrorMessageKey(index),
+                                                 status.error_message()));
+        }
+        return Status::OK();
+      }
+
+      Status ReadStatus(IteratorStateReader* reader, size_t index,
+                        Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        int64 code_int;
+        TF_RETURN_IF_ERROR(reader->ReadScalar(CodeKey(index), &code_int));
+        error::Code code = static_cast<error::Code>(code_int);
+
+        if (code != error::Code::OK) {
+          string error_message;
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(ErrorMessageKey(index), &error_message));
+          *status = Status(code, error_message);
+        } else {
+          *status = Status::OK();
+        }
+        return Status::OK();
+      }
+
+      string CodeKey(size_t index) {
+        return full_name(strings::StrCat("status[", index, "].code"));
+      }
+
+      string ErrorMessageKey(size_t index) {
+        return full_name(strings::StrCat("status[", index, "].error_message"));
+      }
+
+      // This mutex is used to ensure exclusivity between multiple threads
+      // reading/writing this iterator's local state.
       mutex mu_;
-      const std::unique_ptr<IteratorBase> input_impl_;
+      // This mutex is used to ensure exclusivity between multiple threads
+      // accessing the parent iterator. We keep this separate from `mu_` to
+      // allow prefetching to run in parallel with GetNext calls.
+      mutex parent_mu_ ACQUIRED_BEFORE(mu_);
+      const std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(parent_mu_);
       condition_variable cond_var_;
       std::deque<BufferElement> buffer_ GUARDED_BY(mu_);
       std::unique_ptr<Thread> prefetch_thread_ GUARDED_BY(mu_);
-- 
GitLab


From 3dbcc9485e11d5a0643baf1cfc9ccd2e6d407c06 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 22 Nov 2017 23:19:07 -0800
Subject: [PATCH 0282/1225] Registers StridedSlice for int64 tensor on GPUs

PiperOrigin-RevId: 176737730
---
 tensorflow/core/kernels/strided_slice_op.cc        | 1 +
 tensorflow/core/kernels/strided_slice_op_gpu.cu.cc | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 8fc40db3cc..73b6d4cf6a 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -427,6 +427,7 @@ REGISTER_STRIDED_SLICE(bfloat16);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
index a8487f49f4..8ca27e3b92 100644
--- a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
@@ -53,6 +53,7 @@ typedef Eigen::GpuDevice GPUDevice;
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
+TF_CALL_int64(DEFINE_GPU_KERNELS);
 DEFINE_GPU_KERNELS(int32);
 
 #undef DEFINE_GPU_KERNELS
-- 
GitLab


From 689bfee5f781d9645f1a415c4c2341119b865a66 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 23 Nov 2017 09:51:34 -0800
Subject: [PATCH 0283/1225] Update TFGAN Readme to link to examples.

PiperOrigin-RevId: 176779769
---
 tensorflow/contrib/gan/README.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/gan/README.md b/tensorflow/contrib/gan/README.md
index 3ab8478070..4bca0a1d62 100644
--- a/tensorflow/contrib/gan/README.md
+++ b/tensorflow/contrib/gan/README.md
@@ -8,7 +8,8 @@ explicitly model the distribution and without writing an explicit loss. For
 example, the generator could learn to draw samples from the distribution of
 natural images. For more details on this technique, see
 ['Generative Adversarial Networks'](https://arxiv.org/abs/1406.2661) by
-Goodfellow et al.
+Goodfellow et al. See [tensorflow/models](https://github.com/tensorflow/models/tree/master/research/gan/) for examples, and [this tutorial](https://github.com/tensorflow/models/tree/master/research/gan/tutorial.ipynb) for an
+introduction.
 
 #### Usage
 ```python
@@ -23,8 +24,8 @@ mix TFGAN, native TF, and other custom frameworks
 * Use already implemented [GAN losses and penalties](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/losses/python/losses_impl.py) (ex Wasserstein loss, gradient penalty, mutual information penalty, etc)
 * [Monitor and visualize](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/eval/python/summaries_impl.py) GAN progress during training, and [evaluate](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py) them
 * Use already-implemented [tricks](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/features/python/) to stabilize and improve training
-* Develop based on examples of common GAN setups
-* Use the TFGAN-backed tf.Learn Estimator to easily train a GAN model
+* Develop based on examples of [common GAN setups](https://github.com/tensorflow/models/tree/master/research/gan/)
+* Use the TFGAN-backed [GANEstimator](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py) to easily train a GAN model
 * Improvements in TFGAN infrastructure will automatically benefit your TFGAN project
 * Stay up-to-date with research as we add more algorithms
 
@@ -51,7 +52,7 @@ network to evaluate your unconditional generative model. You can also use
 your own pretrained classifier for more specific performance numbers, or use
 other methods for evaluating conditional generative models.
 
-* examples (coming soon):
+* [examples](https://github.com/tensorflow/models/tree/master/research/gan/) and [tutorial](https://github.com/tensorflow/models/tree/master/research/gan/tutorial.ipynb):
 See examples of how to use TFGAN to make GAN training easier, or use the more complicated examples to jumpstart your
 own project. These include unconditional and conditional GANs, InfoGANs,
 adversarial losses on existing networks, and image-to-image translation.
-- 
GitLab


From fc0b63edc0116f2df9847e3083247a4613bc0f26 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 23 Nov 2017 11:20:22 -0800
Subject: [PATCH 0284/1225] Clean up RemoveTrivialPassthroughOp and fix an
 issue in an edge case where we were not erasing the correct arrays.

PiperOrigin-RevId: 176784020
---
 .../remove_trivial_passthrough.cc             | 38 ++++++++++++++++---
 .../remove_trivial_passthrough.h              | 10 +++--
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
index d998dcd9f3..047389f69a 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
@@ -63,19 +63,28 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation,
       main_input_array_index = i;
     }
   }
-  CHECK_LE(count_nonconstant_input_arrays, 1);
 
   const string main_input_name = passthru_op->inputs[main_input_array_index];
   const string output_name = passthru_op->outputs[0];
+
+  // Build the list of all input and output arrays of the passthrough node
+  // that we are considering removing. Any of these arrays is a candidate
+  // for being removed as well, if nothing else references it. Doing that
+  // arrays-removal together with the passthrough-node-removal proved too
+  // error-prone.
+  std::vector<string> removal_candidates;
+  for (const string& input : passthru_op->inputs) {
+    removal_candidates.push_back(input);
+  }
+  removal_candidates.push_back(output_name);
+
   if (IsDiscardableArray(*model, output_name)) {
     transformation->AddMessageF(
         "Removing %s, keeping its non-constant input array",
         LogName(*passthru_op));
-    model->arrays.erase(output_name);
     for (const string& input : passthru_op->inputs) {
       if (IsDiscardableArray(*model, input) && input != main_input_name &&
           CountOpsWithInput(*model, input) == 1) {
-        model->arrays.erase(input);
       }
     }
     RerouteEdges(output_name, main_input_name, model);
@@ -85,13 +94,12 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation,
     for (const string& input : passthru_op->inputs) {
       if (IsDiscardableArray(*model, input) &&
           (input == main_input_name || CountOpsWithInput(*model, input) == 1)) {
-        model->arrays.erase(input);
       }
     }
     RerouteEdges(main_input_name, output_name, model);
   } else {
     transformation->AddMessageF(
-        "Cannot remove %s, neither its nonconstant input nor its output may be "
+        "Cannot remove %s, neither its main input nor its output may be "
         "discarded",
         LogName(*passthru_op));
     return false;
@@ -100,6 +108,26 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation,
   // Remove the pass-through node.
   model->operators.erase(passthru_it);
 
+  // Remove any array that is no longer used.
+  for (const string& removal_candidate : removal_candidates) {
+    bool is_referenced = false;
+    for (const auto& op : model->operators) {
+      for (const string& input : op->inputs) {
+        if (input == removal_candidate) {
+          is_referenced = true;
+        }
+      }
+      for (const string& output : op->outputs) {
+        if (output == removal_candidate) {
+          is_referenced = true;
+        }
+      }
+    }
+    if (!is_referenced) {
+      model->arrays.erase(removal_candidate);
+    }
+  }
+
   return true;
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h
index b72c85c0e5..a06181ca0b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h
@@ -21,10 +21,12 @@ limitations under the License.
 namespace toco {
 
 // A "passthrough op" is an op that satisfies the following conditions:
-//   1. It has at most one non-constant input (it may have other constant
-//   inputs).
+//   1. One of its inputs is (per the semantics of that op) its "main input"
+//      for some notion of "main input" that is operator-specific; for example,
+//      for a Reshape op, the main input is the array being reshaped, not the
+//      other input which gives the new shape.
 //   2. It has exactly one output.
-//   3. It forwards exactly its single non-constant input to its single output.
+//   3. It forwards exactly its main input to its single output.
 //
 // Examples include:
 //   1. TensorFlow Identity ops. (Have one input).
@@ -34,7 +36,7 @@ namespace toco {
 //      where one of its inputs is a constant array filled with zeros.
 //
 // A passthrough op is "trivial" and can be removed when it is possible to
-// discard either its single non-constant input or output array, rerouting any
+// discard either its main input or output array, rerouting any
 // edge involving it to the other of these two arrays.
 //
 // It is only possible to discard such an array if it is not explicitly
-- 
GitLab


From 1d12f5d1d5aa282e503b58b41de253d1aa50fa25 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 23 Nov 2017 11:22:10 -0800
Subject: [PATCH 0285/1225] An input that is used only by a RNN state should be
 counted as used.

PiperOrigin-RevId: 176784134
---
 tensorflow/contrib/lite/toco/tooling_util.cc | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index e8fa7a3423..3f289817e0 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -792,15 +792,19 @@ void FixOperatorOrdering(Model* model) {
 }
 
 // Checks that the --input_arrays of the Model are actually used by at least
-// one of the --output_arrays i.e. that the graph contains a path from each one
-// of the inputs to at least one of the outputs. This catches cases where the
-// user passed the wrong --input_arrays or --output_arrays, which otherwise may
-// result in cryptic error messages.
-void CheckInputUsedByOutputs(const Model& model) {
+// one of the --output_arrays or --rnn_states i.e. that the graph contains a
+// path from each one of the inputs to at least one of the outputs or RNN
+// states. This catches cases where the user passed the wrong --input_arrays or
+// --output_arrays or --rnn_states, which otherwise may result in cryptic error
+// messages.
+void CheckInputsActuallyUsed(const Model& model) {
   std::set<string> used_arrays;
   for (const string& output : model.flags.output_arrays()) {
     used_arrays.insert(output);
   }
+  for (const auto& rnn_state : model.flags.rnn_states()) {
+    used_arrays.insert(rnn_state.back_edge_source_array());
+  }
   for (int i = model.operators.size() - 1; i >= 0; i--) {
     bool is_op_used = false;
     for (const string& op_output : model.operators[i]->outputs) {
@@ -832,7 +836,7 @@ void CheckInvariants(const Model& model) {
   CheckNoOrphanedArray(model);
   CheckArrayFieldsConsistent(model);
   CheckOperatorOrdering(model);
-  CheckInputUsedByOutputs(model);
+  CheckInputsActuallyUsed(model);
 }
 
 void CheckCountInRange(const ::toco::ModelFlags::ModelCheck& model_check,
@@ -1087,9 +1091,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
     if (!input_array.has_shape()) {
       QCHECK(!input_array_proto.shape().empty())
           << "This model does not have shape defined for input array "
-          << input_array_proto.name()
-          << ", so one must be specified by a non-empty --input_shape "
-             "command-line flag.";
+          << input_array_proto.name();
     }
 
     // Compare/merge the model->flags describing the input_shape with
-- 
GitLab


From 22b011885def468471d5f9e1d544a187e672a76b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 23 Nov 2017 11:24:05 -0800
Subject: [PATCH 0286/1225] Correctly use IsDiscardableArray in RemoveUnusedOp
 (fixes edge cases where an op is only affecting e.g. a RNN state and should
 not be treated as unused)

PiperOrigin-RevId: 176784216
---
 .../lite/toco/graph_transformations/remove_unused_op.cc      | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
index 1f1f1f6948..0ab301552f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
@@ -88,7 +88,8 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
   // Remove any input array that is not used by anything else,
   // and that is not the output of some other operator.
   for (const auto& input : op->inputs) {
-    if (CountOpsWithInput(*model, input) == 1 &&
+    if (IsDiscardableArray(*model, input) &&
+        CountOpsWithInput(*model, input) == 1 &&
         !GetOpWithOutput(*model, input)) {
       model->arrays.erase(input);
     }
@@ -98,7 +99,7 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
   for (const auto& output : op->outputs) {
     // If the output array is the model's input array, don't remove that.
     // That's the case when cropping a model at a given --input_array.
-    if (IsInputArray(*model, output)) {
+    if (!IsDiscardableArray(*model, output)) {
       continue;
     }
     // Likewise, if the output array is a RNN state array, don't remove that.
-- 
GitLab


From b20ec5c461031f9375274cf026a7dfff0f903acc Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 23 Nov 2017 11:40:22 -0800
Subject: [PATCH 0287/1225] Revert "Only install enum34 on Python <3.4
 versions"

---
 tensorflow/tools/pip_package/setup.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 3852b251d9..c18f20910a 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -33,12 +33,7 @@ _VERSION = '1.4.0'
 
 REQUIRED_PACKAGES = [
     'absl-py',
-    # weakref.finalize introduced in Python 3.4
-    'backports.weakref >= 1.0rc1; python_version < "3.4"',
-    # enum module introduced in Python 3.4
-    'enum34 >= 1.1.6; python_version < "3.4"',
-    # Needed for unittest.mock in Python 2
-    'mock >= 2.0.0; python_version < "3.0"',
+    'enum34 >= 1.1.6',
     'numpy >= 1.12.1',
     'six >= 1.10.0',
     'protobuf >= 3.4.0',
@@ -57,6 +52,8 @@ if sys.version_info.major == 3:
   REQUIRED_PACKAGES.append('wheel >= 0.26')
 else:
   REQUIRED_PACKAGES.append('wheel')
+  # mock comes with unittest.mock for python3, need to install for python2
+  REQUIRED_PACKAGES.append('mock >= 2.0.0')
 
 # tf-nightly should depend on tb-nightly
 if 'tf_nightly' in project_name:
@@ -65,6 +62,10 @@ if 'tf_nightly' in project_name:
       REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.5.0a0, < 1.6.0a0'
       break
 
+# weakref.finalize was introduced in Python 3.4
+if sys.version_info < (3, 4):
+  REQUIRED_PACKAGES.append('backports.weakref >= 1.0rc1')
+
 # pylint: disable=line-too-long
 CONSOLE_SCRIPTS = [
     'freeze_graph = tensorflow.python.tools.freeze_graph:main',
-- 
GitLab


From 742ca716334886b78b7ccc4cbecb33ad13c27cd5 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Thu, 23 Nov 2017 14:38:46 -0800
Subject: [PATCH 0288/1225] [tf.data] Saveable iterator for FilterDataset.

PiperOrigin-RevId: 176791620
---
 .../contrib/data/python/kernel_tests/BUILD    |  1 +
 .../kernel_tests/filter_dataset_op_test.py    | 38 +++++++++
 tensorflow/core/graph/graph_def_builder.h     |  4 +
 tensorflow/core/kernels/dataset.h             |  5 +-
 tensorflow/core/kernels/filter_dataset_op.cc  | 79 +++++++++++++++++--
 5 files changed, 118 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 3280f1fc35..3efe5274f4 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -143,6 +143,7 @@ py_test(
     srcs = ["filter_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py
index 67c49d77e2..95724241ef 100644
--- a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -156,5 +157,42 @@ class FilterDatasetTest(test.TestCase):
         sess.run(get_next)
 
 
+class FilterDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_filter_range_graph(self, div):
+    return dataset_ops.Dataset.range(100).filter(
+        lambda x: math_ops.not_equal(math_ops.mod(x, div), 2))
+
+  def testFilterCore(self):
+    div = 3
+    num_outputs = np.sum([x % 3 is not 2 for x in range(100)])
+    self.run_core_tests(lambda: self._build_filter_range_graph(div),
+                        lambda: self._build_filter_range_graph(div * 2),
+                        num_outputs)
+
+  def _build_filter_dict_graph(self):
+    return dataset_ops.Dataset.range(10).map(
+        lambda x: {"foo": x * 2, "bar": x ** 2}).filter(
+            lambda d: math_ops.equal(d["bar"] % 2, 0)).map(
+                lambda d: d["foo"] + d["bar"])
+
+  def testFilterDictCore(self):
+    num_outputs = np.sum([(x**2) % 2 == 0 for x in range(10)])
+    self.run_core_tests(self._build_filter_dict_graph, None, num_outputs)
+
+  def _build_sparse_filter(self):
+
+    def _map_fn(i):
+      return sparse_tensor.SparseTensor(
+          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
+
+    def _filter_fn(_, i):
+      return math_ops.equal(i % 2, 0)
+
+    return dataset_ops.Dataset.range(10).map(_map_fn).filter(_filter_fn).map(
+        lambda x, i: x)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/core/graph/graph_def_builder.h b/tensorflow/core/graph/graph_def_builder.h
index b389cd8053..a2c0c4d553 100644
--- a/tensorflow/core/graph/graph_def_builder.h
+++ b/tensorflow/core/graph/graph_def_builder.h
@@ -99,6 +99,10 @@ class GraphDefBuilder {
     // Use this to skip processing that may depend on prior results.
     bool HaveError() const { return status_ != nullptr && !status_->ok(); }
 
+    // Returns a string representation of the status associated with *this.
+    // Returns the string `"OK"` if the status doesn't have any error.
+    string StatusToString() const { return status_->ToString(); }
+
     // Given the Op type name, return a name for a node of that type.
     // Uses the value set in WithName() if that has been called.  Otherwise,
     // returns a name built out of the Op type name.
diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h
index 39c10163cf..afbebb0692 100644
--- a/tensorflow/core/kernels/dataset.h
+++ b/tensorflow/core/kernels/dataset.h
@@ -167,7 +167,8 @@ class GraphDefBuilderWrapper {
           opts->WithAttr(attr.first, attr.second)));
     }
     if (opts->HaveError()) {
-      return errors::Internal("AddDataset: Error building Options.");
+      return errors::Internal("AddDataset: Failed to build Options with error ",
+                              opts->StatusToString());
     }
     NodeBuilder node_builder(opts->GetNameForOp(op_type_name), op_type_name,
                              opts->op_registry());
@@ -191,7 +192,7 @@ class GraphDefBuilderWrapper {
     *output = opts->FinalizeBuilder(&node_builder);
     if (*output == nullptr) {
       return errors::Internal("AddDataset: Failed to build ", op_type_name,
-                              " op.");
+                              " op with error ", opts->StatusToString());
     }
     return Status::OK();
   }
diff --git a/tensorflow/core/kernels/filter_dataset_op.cc b/tensorflow/core/kernels/filter_dataset_op.cc
index a69040b3bb..e4d80e4ce3 100644
--- a/tensorflow/core/kernels/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/filter_dataset_op.cc
@@ -51,17 +51,21 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
                                                  std::move(other_arguments),
                                                  &captured_func));
 
-    *output = new Dataset(input, std::move(captured_func));
+    *output = new Dataset(ctx, input, func_, std::move(captured_func));
   }
 
  private:
   const int graph_def_version_;
 
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    Dataset(const DatasetBase* input,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func)
-        : input_(input), captured_func_(std::move(captured_func)) {
+        : GraphDatasetBase(ctx),
+          input_(input),
+          func_(func),
+          captured_func_(std::move(captured_func)) {
       input_->Ref();
     }
 
@@ -82,6 +86,35 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
 
     string DebugString() override { return "FilterDatasetOp::Dataset"; }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
+      Node* input_graph_node;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+
+      DataTypeVector other_arguments_types;
+      other_arguments_types.reserve(captured_func_->captured_inputs().size());
+      std::vector<NodeBuilder::NodeOut> other_arguments;
+      other_arguments.reserve(captured_func_->captured_inputs().size());
+      for (const Tensor& t : captured_func_->captured_inputs()) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        other_arguments.emplace_back(node);
+        other_arguments_types.emplace_back(t.dtype());
+      }
+      AttrValue f;
+      b->BuildAttrValue(func_, &f);
+      AttrValue other_arguments_types_attr;
+      b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
+
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this, {{0, input_graph_node}}, {{1, other_arguments}},
+          {{"predicate", f}, {"Targuments", other_arguments_types_attr}},
+          output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -98,9 +131,18 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
         // non-deterministic order.
         bool matched;
         do {
-          TF_RETURN_IF_ERROR(
-              input_impl_->GetNext(ctx, out_tensors, end_of_sequence));
+          {
+            tf_shared_lock l(mu_);
+            if (!input_impl_) {
+              *end_of_sequence = true;
+              return Status::OK();
+            }
+            TF_RETURN_IF_ERROR(
+                input_impl_->GetNext(ctx, out_tensors, end_of_sequence));
+          }
           if (*end_of_sequence) {
+            mutex_lock l(mu_);
+            input_impl_.reset();
             return Status::OK();
           }
 
@@ -139,11 +181,34 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        if (input_impl_)
+          TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        else
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("input_impls_empty"), ""));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(OpKernelContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        if (reader->Contains(full_name("input_impls_empty")))
+          input_impl_.reset();
+        else
+          TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        return Status::OK();
+      }
+
      private:
-      const std::unique_ptr<IteratorBase> input_impl_;
+      mutex mu_;
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
+    const NameAttrList func_;
     const std::unique_ptr<CapturedFunction> captured_func_;
   };
 
-- 
GitLab


From fed333479868935d24ec104b2ce9e9ac5dadf565 Mon Sep 17 00:00:00 2001
From: Max Galkin <maxgalkin@google.com>
Date: Thu, 23 Nov 2017 16:48:38 -0800
Subject: [PATCH 0289/1225] Some extra checks and warnings in grappler to
 provide more helpful error messages when inputs are imperfect (cost_graph is
 empty and it causes graph_properties to be empty).

PiperOrigin-RevId: 176796142
---
 tensorflow/core/grappler/costs/graph_properties.cc        | 3 +++
 tensorflow/core/grappler/costs/op_level_cost_estimator.cc | 7 ++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index c254fbef7a..dd389de636 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -977,6 +977,9 @@ Status GraphProperties::AnnotateOutputShapes(GraphDef* output_graph_def) const {
 }
 
 Status GraphProperties::InferFromCostGraph(const CostGraphDef& cost_graph) {
+  if (cost_graph.node_size() == 0) {
+    LOG(WARNING) << "cost_graph is empty: nothing can be inferred!";
+  }
   std::unordered_map<string, const CostGraphDef::Node*> name_to_cost;
   std::unordered_map<string, const NodeDef*> name_to_node;  // Empty
   for (auto& node : cost_graph.node()) {
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index f7905d7798..b1e04ceec8 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -510,7 +510,12 @@ int64 OpLevelCostEstimator::CountMatMulOperations(
     bool* found_unknown_shapes) const {
   double ops = 0;
 
-  // first matrix
+  if (op_features.inputs_size() < 2) {
+    LOG(ERROR) << "Need 2 inputs but got " << op_features.inputs_size();
+    *found_unknown_shapes = true;
+    return 0;
+  }
+
   auto& a_matrix = op_features.inputs(0);
   auto& b_matrix = op_features.inputs(1);
 
-- 
GitLab


From 080e432f2bd5566946887ef383acf0b5d34d150a Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Fri, 24 Nov 2017 10:37:32 -0800
Subject: [PATCH 0290/1225] Make sure that control_dependencies execute any
 pending dependency in Eager mode.

PiperOrigin-RevId: 176855214
---
 tensorflow/python/framework/ops.py      | 11 ++++++++++-
 tensorflow/python/framework/ops_test.py | 23 +++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index dc4ffb1747..bcc794b9a9 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -4475,11 +4475,15 @@ def control_dependencies(control_inputs):
   See @{tf.Graph.control_dependencies}
   for more details.
 
+  When eager execution is enabled, any callable object in the `control_inputs`
+  list will be called.
+
   Args:
     control_inputs: A list of `Operation` or `Tensor` objects which
       must be executed or computed before running the operations
       defined in the context.  Can also be `None` to clear the control
-      dependencies.
+      dependencies. If eager execution is enabled, any callable object in the
+      `control_inputs` list will be called.
 
   Returns:
    A context manager that specifies control dependencies for all
@@ -4488,6 +4492,11 @@ def control_dependencies(control_inputs):
   if context.in_graph_mode():
     return get_default_graph().control_dependencies(control_inputs)
   else:
+    if control_inputs:
+      # Excute any pending callables.
+      for control in control_inputs:
+        if callable(control):
+          control()
     return _NullContextmanager()
 
 
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 7ae7b5cb7f..ac35f6f4f5 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -1464,6 +1464,29 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase):
     # e should be dominated by c.
     self.assertEqual(e.op.control_inputs, [])
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testEager(self):
+    def future():
+      future.calls += 1
+      return constant_op.constant(2.0)
+    future.calls = 0
+
+    if context.in_graph_mode():
+      g = ops.Graph()
+      with g.as_default():
+        a = constant_op.constant(1.0)
+        b = future()
+        with g.control_dependencies([a, b]):
+          c = constant_op.constant(3.0)
+      self.assertEqual(c.op.control_inputs, [a.op, b.op])
+      self.assertEqual(future.calls, 1)
+    else:
+      a = constant_op.constant(1.0)
+      b = future
+      with ops.control_dependencies([a, b]):
+        c = constant_op.constant(3.0)
+      self.assertEqual(future.calls, 1)
+
   def testBasicWithConversion(self):
     g = ops.Graph()
     a = _apply_op(g, "FloatOutput", [], [dtypes.float32])
-- 
GitLab


From 93bce00552ac70cc2c9b72e5742f9de87d72985a Mon Sep 17 00:00:00 2001
From: Makoto Uchida <muchida@google.com>
Date: Fri, 24 Nov 2017 22:18:53 -0800
Subject: [PATCH 0291/1225] Accept None vocabulary_size to
 categorical_column_with_vocabulary_file()

Defaults to the length of the given vocabulary file.

PiperOrigin-RevId: 176881510
---
 .../python/feature_column/feature_column.py   | 27 ++++++++++++++-----
 .../feature_column/feature_column_test.py     | 22 ++++++++++++---
 .../golden/tensorflow.feature_column.pbtxt    |  2 +-
 3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 452f84192c..0686480ca4 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -152,6 +152,7 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_utils
 from tensorflow.python.util import nest
@@ -980,9 +981,12 @@ def categorical_column_with_hash_bucket(key,
   return _HashedCategoricalColumn(key, hash_bucket_size, dtype)
 
 
-def categorical_column_with_vocabulary_file(
-    key, vocabulary_file, vocabulary_size, num_oov_buckets=0,
-    default_value=None, dtype=dtypes.string):
+def categorical_column_with_vocabulary_file(key,
+                                            vocabulary_file,
+                                            vocabulary_size=None,
+                                            num_oov_buckets=0,
+                                            default_value=None,
+                                            dtype=dtypes.string):
   """A `_CategoricalColumn` with a vocabulary file.
 
   Use this when your inputs are in string or integer format, and you have a
@@ -1041,7 +1045,7 @@ def categorical_column_with_vocabulary_file(
     vocabulary_file: The vocabulary file name.
     vocabulary_size: Number of the elements in the vocabulary. This must be no
       greater than length of `vocabulary_file`, if less than length, later
-      values are ignored.
+      values are ignored. If None, it is set to the length of `vocabulary_file`.
     num_oov_buckets: Non-negative integer, the number of out-of-vocabulary
       buckets. All out-of-vocabulary inputs will be assigned IDs in the range
       `[vocabulary_size, vocabulary_size+num_oov_buckets)` based on a hash of
@@ -1056,7 +1060,7 @@ def categorical_column_with_vocabulary_file(
     A `_CategoricalColumn` with a vocabulary file.
 
   Raises:
-    ValueError: `vocabulary_file` is missing.
+    ValueError: `vocabulary_file` is missing or cannot be opened.
     ValueError: `vocabulary_size` is missing or < 1.
     ValueError: `num_oov_buckets` is a negative integer.
     ValueError: `num_oov_buckets` and `default_value` are both specified.
@@ -1064,8 +1068,19 @@ def categorical_column_with_vocabulary_file(
   """
   if not vocabulary_file:
     raise ValueError('Missing vocabulary_file in {}.'.format(key))
+
+  if vocabulary_size is None:
+    if not gfile.Exists(vocabulary_file):
+      raise ValueError('vocabulary_file in {} does not exist.'.format(key))
+
+    with gfile.GFile(vocabulary_file) as f:
+      vocabulary_size = sum(1 for _ in f)
+    logging.info(
+        'vocabulary_size = %d in %s is inferred from the number of elements '
+        'in the vocabulary_file %s.', vocabulary_size, key, vocabulary_file)
+
   # `vocabulary_size` isn't required for lookup, but it is for `_num_buckets`.
-  if (vocabulary_size is None) or (vocabulary_size < 1):
+  if vocabulary_size < 1:
     raise ValueError('Invalid vocabulary_size in {}.'.format(key))
   if num_oov_buckets:
     if default_value is not None:
diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py
index 6ac5ce8757..d974f14b8a 100644
--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -2255,10 +2255,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         lookup_ops.tables_initializer().run()
 
   def test_invalid_vocabulary_size(self):
-    with self.assertRaisesRegexp(ValueError, 'Invalid vocabulary_size'):
-      fc.categorical_column_with_vocabulary_file(
-          key='aaa', vocabulary_file=self._wire_vocabulary_file_name,
-          vocabulary_size=None)
     with self.assertRaisesRegexp(ValueError, 'Invalid vocabulary_size'):
       fc.categorical_column_with_vocabulary_file(
           key='aaa', vocabulary_file=self._wire_vocabulary_file_name,
@@ -2372,6 +2368,24 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
               dense_shape=inputs.dense_shape),
           id_weight_pair.id_tensor.eval())
 
+  def test_get_sparse_tensors_none_vocabulary_size(self):
+    column = fc.categorical_column_with_vocabulary_file(
+        key='aaa', vocabulary_file=self._wire_vocabulary_file_name)
+    inputs = sparse_tensor.SparseTensorValue(
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=('marlo', 'skywalker', 'omar'),
+        dense_shape=(2, 2))
+    id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
+    self.assertIsNone(id_weight_pair.weight_tensor)
+    with _initialized_session():
+      _assert_sparse_tensor_value(self,
+                                  sparse_tensor.SparseTensorValue(
+                                      indices=inputs.indices,
+                                      values=np.array(
+                                          (2, -1, 0), dtype=np.int64),
+                                      dense_shape=inputs.dense_shape),
+                                  id_weight_pair.id_tensor.eval())
+
   def test_transform_feature(self):
     column = fc.categorical_column_with_vocabulary_file(
         key='aaa',
diff --git a/tensorflow/tools/api/golden/tensorflow.feature_column.pbtxt b/tensorflow/tools/api/golden/tensorflow.feature_column.pbtxt
index 9eb4cb8ce9..018e8c909a 100644
--- a/tensorflow/tools/api/golden/tensorflow.feature_column.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.feature_column.pbtxt
@@ -14,7 +14,7 @@ tf_module {
   }
   member_method {
     name: "categorical_column_with_vocabulary_file"
-    argspec: "args=[\'key\', \'vocabulary_file\', \'vocabulary_size\', \'num_oov_buckets\', \'default_value\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \"<dtype: \'string\'>\"], "
+    argspec: "args=[\'key\', \'vocabulary_file\', \'vocabulary_size\', \'num_oov_buckets\', \'default_value\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \"<dtype: \'string\'>\"], "
   }
   member_method {
     name: "categorical_column_with_vocabulary_list"
-- 
GitLab


From 758ac9cb907fdd7d9c295ea076e985c9f545667f Mon Sep 17 00:00:00 2001
From: FredZhang <654496915@qq.com>
Date: Sat, 25 Nov 2017 22:13:08 +0800
Subject: [PATCH 0292/1225] Some PATH typo : no `train_dir` in tutorial

This file uses `train_dir`. But in code file, there is no `train_dir`  anymore, it should be replaced with `log_dir` `input_data_dir` and `checkpoint_file` respectively
---
 tensorflow/docs_src/get_started/mnist/mechanics.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/docs_src/get_started/mnist/mechanics.md b/tensorflow/docs_src/get_started/mnist/mechanics.md
index 27fae45b5b..a5c784b30d 100644
--- a/tensorflow/docs_src/get_started/mnist/mechanics.md
+++ b/tensorflow/docs_src/get_started/mnist/mechanics.md
@@ -47,7 +47,7 @@ training folder and then unpack that data to return a dictionary of `DataSet`
 instances.
 
 ```python
-data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)
+data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
 ```
 
 **NOTE**: The `fake_data` flag is used for unit-testing purposes and may be
@@ -369,7 +369,7 @@ may be instantiated to write the events files, which
 contain both the graph itself and the values of the summaries.
 
 ```python
-summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
+summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
 ```
 
 Lastly, the events file will be updated with new summary values every time the
@@ -403,7 +403,7 @@ method will periodically be called to write a checkpoint file to the training
 directory with the current values of all the trainable variables.
 
 ```python
-saver.save(sess, FLAGS.train_dir, global_step=step)
+saver.save(sess, checkpoint_file, global_step=step)
 ```
 
 At some later point in the future, training might be resumed by using the
@@ -411,7 +411,7 @@ At some later point in the future, training might be resumed by using the
 method to reload the model parameters.
 
 ```python
-saver.restore(sess, FLAGS.train_dir)
+saver.restore(sess, checkpoint_file)
 ```
 
 ## Evaluate the Model
-- 
GitLab


From 70bf5374524bc2f19b9196eeb066b883ee504db5 Mon Sep 17 00:00:00 2001
From: concerttttt <yuxin-li@outlook.com>
Date: Mon, 27 Nov 2017 11:58:38 +0800
Subject: [PATCH 0293/1225] Update tf_core_framework.cmake

gpu_tracer.cc replaced by device_tracer.cc
---
 tensorflow/contrib/cmake/tf_core_framework.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
index c607546f4a..5ec1a8d04f 100644
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake
@@ -211,7 +211,7 @@ if (NOT tensorflow_ENABLE_GPU)
   list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_gpu_srcs})
 else()
   file(GLOB tf_core_platform_srcs_exclude
-      "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu_tracer.cc")
+      "${tensorflow_source_dir}/tensorflow/core/platform/default/device_tracer.cc")
   list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_srcs_exclude})
 endif()
 
-- 
GitLab


From a264269f523467ac018708a647eab02c1f1010fe Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 04:26:18 -0800
Subject: [PATCH 0294/1225] Fixed a minor typo in FisherEstimator docstring.

PiperOrigin-RevId: 176999852
---
 tensorflow/contrib/kfac/python/ops/estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/kfac/python/ops/estimator.py b/tensorflow/contrib/kfac/python/ops/estimator.py
index c353f3592f..27ff951f16 100644
--- a/tensorflow/contrib/kfac/python/ops/estimator.py
+++ b/tensorflow/contrib/kfac/python/ops/estimator.py
@@ -95,7 +95,7 @@ class FisherEstimator(object):
           blocks, kronecker factors, and losses associated with the
           graph.
       estimation_mode: The type of estimator to use for the Fishers.  Can be
-          'gradients', 'empirical', 'curvature_propagation', or 'exact'.
+          'gradients', 'empirical', 'curvature_prop', or 'exact'.
           (Default: 'gradients').  'gradients' is the basic estimation approach
           from the original K-FAC paper.  'empirical' computes the 'empirical'
           Fisher information matrix (which uses the data's distribution for the
-- 
GitLab


From 58970731ba6d899c827ab1ce5c853d9ac8ae1414 Mon Sep 17 00:00:00 2001
From: lanhin <lanhin1@gmail.com>
Date: Mon, 27 Nov 2017 20:43:30 +0800
Subject: [PATCH 0295/1225] Comment typo fix.

---
 tensorflow/core/common_runtime/function.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 93bd3a6adb..6fb0dc252e 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -318,7 +318,7 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
                                  kernel);
   }
 
-  // Try to instantiate this function for the func/attr. Maybe its
+  // Try to instantiate this function for the func/attr. Maybe it's
   // cached already.
   Handle handle;
   TF_RETURN_IF_ERROR(Instantiate(ndef.op(), AttrSlice(&ndef.attr()), &handle));
-- 
GitLab


From 191825e63f341a4e7777b85254f616e541000d5c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 06:29:45 -0800
Subject: [PATCH 0296/1225] Delete trailing whitespace

PiperOrigin-RevId: 177008504
---
 RELEASE.md                                    |  2 +-
 .../gpu/llvm_gpu_backend/gpu_backend_lib.cc   |  2 +-
 tensorflow/contrib/android/cmake/README.md    |  2 +-
 .../android/TensorFlowInferenceInterface.java |  6 +--
 .../kernels/bigquery_table_accessor_test.cc   |  2 +-
 tensorflow/contrib/cmake/tf_grappler.cmake    |  2 +-
 tensorflow/contrib/cmake/tf_shared_lib.cmake  |  2 +-
 .../contrib/cmake/tf_stream_executor.cmake    |  6 +--
 ...single_image_random_dot_stereograms_ops.cc |  6 +--
 tensorflow/contrib/lite/g3doc/apis.md         |  2 +-
 .../app/src/main/res/values/base-strings.xml  |  8 +--
 tensorflow/contrib/makefile/README.md         | 32 ++++++------
 .../contrib/makefile/compile_ios_protobuf.sh  |  2 +-
 tensorflow/contrib/makefile/compile_nsync.sh  |  2 +-
 .../contrib/makefile/rename_protobuf.sh       |  4 +-
 tensorflow/contrib/metrics/README.md          |  2 +-
 tensorflow/contrib/mpi/README.md              | 10 ++--
 tensorflow/contrib/pi_examples/README.md      |  2 +-
 .../contrib/pi_examples/camera/Makefile       |  2 +-
 .../contrib/pi_examples/label_image/Makefile  |  2 +-
 .../pi_examples/label_image/label_image.cc    | 14 +++---
 tensorflow/contrib/quantize/README.md         |  2 +-
 .../stochastic_hard_routing_function_op.cc    |  2 +-
 .../g3doc/periodic_multires_derivation.md     |  2 +-
 tensorflow/contrib/tpu/ops/outfeed_ops.cc     |  2 +-
 tensorflow/contrib/verbs/README.md            |  2 +-
 .../common_runtime/accumulate_n_optimizer.cc  |  2 +-
 tensorflow/core/framework/bfloat16.cc         | 28 +++++------
 tensorflow/core/framework/bfloat16.h          |  6 +--
 tensorflow/core/kernels/cast_op.h             |  8 +--
 tensorflow/core/kernels/diag_op.cc            |  4 +-
 tensorflow/core/kernels/diag_op_gpu.cu.cc     |  2 +-
 tensorflow/core/kernels/queue_ops.cc          |  2 +-
 .../core/kernels/sparse_matmul_op_test.cc     | 10 ++--
 tensorflow/core/kernels/xsmm_conv2d_test.cc   | 50 +++++++++----------
 tensorflow/core/ops/image_ops.cc              |  4 +-
 tensorflow/core/ops/nn_ops.cc                 |  4 +-
 .../platform/default/build_config_root.bzl    |  2 +-
 tensorflow/core/profiler/README.md            |  2 +-
 tensorflow/docs_src/about/uses.md             |  2 +-
 tensorflow/docs_src/api_guides/python/nn.md   |  4 +-
 .../docs_src/community/documentation.md       |  6 +--
 tensorflow/docs_src/community/style_guide.md  |  2 +-
 tensorflow/docs_src/community/welcome.md      |  2 +-
 tensorflow/docs_src/deploy/hadoop.md          |  4 +-
 tensorflow/docs_src/extend/add_filesys.md     |  2 +-
 tensorflow/docs_src/extend/index.md           |  2 +-
 .../docs_src/get_started/get_started.md       |  4 +-
 tensorflow/docs_src/install/install_linux.md  |  8 +--
 .../docs_src/install/install_sources.md       |  2 +-
 tensorflow/docs_src/mobile/android_build.md   |  4 +-
 tensorflow/docs_src/mobile/index.md           |  2 +-
 tensorflow/docs_src/mobile/ios_build.md       |  2 +-
 tensorflow/docs_src/mobile/optimizing.md      |  4 +-
 tensorflow/docs_src/mobile/prepare_models.md  | 14 +++---
 tensorflow/docs_src/mobile/tflite/index.md    |  4 +-
 .../docs_src/programmers_guide/saved_model.md |  2 +-
 .../docs_src/programmers_guide/tensors.md     |  8 +--
 .../docs_src/programmers_guide/variables.md   | 16 +++---
 .../docs_src/tutorials/image_recognition.md   |  2 +-
 .../src/org/tensorflow/demo/Classifier.java   |  2 +-
 tensorflow/examples/ios/README.md             |  8 +--
 .../examples/tutorials/deepdream/README.md    |  8 +--
 tensorflow/examples/udacity/README.md         |  6 +--
 tensorflow/g3doc/README.txt                   |  2 +-
 .../java/src/gen/perl/tftypes-runall.pl       |  6 +--
 tensorflow/java/src/gen/perl/tftypes.pl       |  8 +--
 .../java/src/gen/resources/Tensors.java.tmpl  |  4 +-
 tensorflow/python/grappler/model_analyzer.i   |  2 +-
 .../stream_executor/cuda/cuda_platform.cc     |  2 +-
 .../stream_executor/lib/static_threadlocal.h  |  2 +-
 tensorflow/tools/ci_build/README.md           |  4 +-
 .../dist_test/scripts/dist_mnist_test.sh      |  2 +-
 tensorflow/tools/docker/README.md             |  2 +-
 tensorflow/tools/graph_transforms/README.md   |  6 +--
 75 files changed, 203 insertions(+), 203 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index d8db1f7200..e04bd3fc50 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -494,7 +494,7 @@ answered questions, and were part of inspiring discussions.
 This release contains contributions from many people at Google, as well as:
 
 A. Besir Kurtulmus, Adal Chiriliuc, @akash, Alec-Desouza, Alex Rothberg, Alex
-Sergeev, Alexander Heinecke, Allen Guo, Andreas Madsen, Ankesh Anand, Anton 
+Sergeev, Alexander Heinecke, Allen Guo, Andreas Madsen, Ankesh Anand, Anton
 Loss, @Aravind, @Arie, Ashutosh Das, AuréLien Geron, Bairen Yi, @bakunyo, Ben
 Visser, Brady Zhou, Calpa Liu, Changming Sun, Chih Cheng Liang, Christopher
 Berner, Clark Zinzow, @Conchylicultor, Dan Ellis, Dan J, Dan Jarvis, Daniel
diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
index a574123d6b..96981534d5 100644
--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
@@ -77,7 +77,7 @@ static string GetLibdeviceFilename(const string& libdevice_dir_path,
   // Since CUDA 9.0, all GPU versions are included in a single file
   const char* unified_libdevice_filename = "libdevice.10.bc";
   std::vector<string> unified_libdevice_files;
-  const tensorflow::Status status = 
+  const tensorflow::Status status =
     tensorflow::Env::Default()->GetMatchingPaths(
       tensorflow::io::JoinPath(libdevice_dir_path, unified_libdevice_filename),
       &unified_libdevice_files);
diff --git a/tensorflow/contrib/android/cmake/README.md b/tensorflow/contrib/android/cmake/README.md
index 6f19b657fe..934b58c724 100644
--- a/tensorflow/contrib/android/cmake/README.md
+++ b/tensorflow/contrib/android/cmake/README.md
@@ -14,7 +14,7 @@ Add TensorFlow-Android-Inference as a dependency of your Android application
 
 ```
 include ':TensorFlow-Android-Inference'
-findProject(":TensorFlow-Android-Inference").projectDir = 
+findProject(":TensorFlow-Android-Inference").projectDir =
             new File("${/path/to/tensorflow_repo}/contrib/android/cmake")
 ```
 
diff --git a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java
index 1f423a7a5b..dc5b9fb887 100644
--- a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java
+++ b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java
@@ -160,7 +160,7 @@ public class TensorFlowInferenceInterface {
       throw new RuntimeException("Failed to load model from the input stream", e);
     }
   }
-  
+
   /*
    * Construct a TensorFlowInferenceInterface with provided Graph
    *
@@ -168,7 +168,7 @@ public class TensorFlowInferenceInterface {
    */
   public TensorFlowInferenceInterface(Graph g) {
     prepareNativeRuntime();
-      
+
     // modelName is redundant here, here is for
     // avoiding error in initialization as modelName is marked final.
     this.modelName = "";
@@ -290,7 +290,7 @@ public class TensorFlowInferenceInterface {
    */
   public void feed(String inputName, boolean[] src, long... dims) {
     byte[] b = new byte[src.length];
-    
+
     for (int i = 0; i < src.length; i++) {
       b[i] = src[i] ? (byte) 1 : (byte) 0;
     }
diff --git a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc
index b31b882fa1..e9b79a066d 100644
--- a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc
+++ b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc
@@ -421,7 +421,7 @@ TEST_F(BigQueryTableAccessorTest, MultiplePagesTest) {
   TF_EXPECT_OK(accessor_->ReadRow(&row_id, &example));
   EXPECT_EQ(3, row_id);
   EXPECT_TRUE(accessor_->Done());
-  
+
   Example expected_example;
   ASSERT_TRUE(protobuf::TextFormat::ParseFromString(kTestExampleProtoWithNulls,
                                                     &expected_example));
diff --git a/tensorflow/contrib/cmake/tf_grappler.cmake b/tensorflow/contrib/cmake/tf_grappler.cmake
index a7841c98e8..410490531a 100644
--- a/tensorflow/contrib/cmake/tf_grappler.cmake
+++ b/tensorflow/contrib/cmake/tf_grappler.cmake
@@ -23,7 +23,7 @@ file(GLOB tf_grappler_srcs
    "${tensorflow_source_dir}/tensorflow/python/grappler/model_analyzer.cc"
    "${tensorflow_source_dir}/tensorflow/python/grappler/model_analyzer.h"
  )
- 
+
 add_library(tf_grappler OBJECT ${tf_grappler_srcs})
 
 add_dependencies(tf_grappler tf_core_cpu)
\ No newline at end of file
diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake
index 3e3fe0cdfa..dcedabb333 100644
--- a/tensorflow/contrib/cmake/tf_shared_lib.cmake
+++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake
@@ -45,7 +45,7 @@ if(WIN32)
       $<TARGET_FILE:tensorflow_static>
       $<TARGET_FILE:tf_protos_cc>
   )
-    
+
   set(tensorflow_deffile "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/tensorflow.def")
   set_source_files_properties(${tensorflow_deffile} PROPERTIES GENERATED TRUE)
 
diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake
index 8d95f0d3e8..91ca33f4c4 100644
--- a/tensorflow/contrib/cmake/tf_stream_executor.cmake
+++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake
@@ -61,18 +61,18 @@ file(GLOB tf_stream_executor_srcs
     "${tensorflow_source_dir}/tensorflow/stream_executor/platform/default/*.h"
 )
 
-if (tensorflow_ENABLE_GPU)    
+if (tensorflow_ENABLE_GPU)
     file(GLOB tf_stream_executor_gpu_srcs
         "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc"
     )
     list(APPEND tf_stream_executor_srcs ${tf_stream_executor_gpu_srcs})
-endif()    
+endif()
 
 #file(GLOB_RECURSE tf_stream_executor_test_srcs
 #    "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.cc"
 #    "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.h"
 #)
-#list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs}) 
+#list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs})
 
 if (NOT WIN32)
   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lgomp")
diff --git a/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc b/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc
index 2b67992138..f8b56ab1c5 100755
--- a/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc
+++ b/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc
@@ -40,7 +40,7 @@ REGISTER_OP("SingleImageRandomDotStereograms")
     .Doc(R"doc(
 Outputs a single image random dot stereogram for export via encode_PNG/JPG OP.
 
-Given the 2-D tensor 'depth_values' with encoded Z values, this operation will 
+Given the 2-D tensor 'depth_values' with encoded Z values, this operation will
 encode 3-D data into a 2-D image.  The output of this Op is suitable for the
 encode_PNG/JPG ops.  Be careful with image compression as this may corrupt the
 encode 3-D data witin the image.
@@ -68,14 +68,14 @@ with open('picture_out.png', 'wb') as f:
     f.write(png)
 ```
 
-depth_values: Z values of data to encode into 'output_data_window' window, 
+depth_values: Z values of data to encode into 'output_data_window' window,
   lower values are further away {0.0 floor(far), 1.0 ceiling(near) after normalization}, must be 2-D tensor
 hidden_surface_removal: Activate hidden surface removal
 convergence_dots_size: Black dot size in pixels to help view converge image, drawn on bottom of image
 dots_per_inch: Output device in dots/inch
 eye_separation: Separation between eyes in inches
 mu: Depth of field, Fraction of viewing distance (eg. 1/3 = .3333)
-normalize: Normalize input data to [0.0, 1.0] 
+normalize: Normalize input data to [0.0, 1.0]
 normalize_max: Fix MAX value for Normalization - if < MIN, autoscale
 normalize_min: Fix MIN value for Normalization - if > MAX, autoscale
 border_level: Value of border depth 0.0 {far} to 1.0 {near}
diff --git a/tensorflow/contrib/lite/g3doc/apis.md b/tensorflow/contrib/lite/g3doc/apis.md
index 311fc69696..e8f5566f11 100644
--- a/tensorflow/contrib/lite/g3doc/apis.md
+++ b/tensorflow/contrib/lite/g3doc/apis.md
@@ -52,7 +52,7 @@ typedef enum {
 Failures can be easily verified with:
 ```c++
 if (status != kTfLiteOk) {
-  // ... error handling here ... 
+  // ... error handling here ...
 }
 ```
 
diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml
index ab7d3fd496..0a71dbd0e8 100644
--- a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml
+++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/base-strings.xml
@@ -19,12 +19,12 @@
     <string name="app_name">TfLiteCameraDemo</string>
     <string name="intro_message">
         <![CDATA[
-        
-            
+
+
             This sample demonstrates the basic use of TfLite API. Check the source code to see how
             you can use TfLite for efficient, on-device inference with trained TensorFlow models.
-            
-        
+
+
         ]]>
     </string>
 </resources>
diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md
index 65bd60c12a..9345303ff1 100644
--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@@ -16,17 +16,17 @@ This static library will not contain:
 
  - Python or other language bindings
  - GPU support
- 
+
 You can target:
 - iOS
 - OS X (macOS)
 - Android
 - Raspberry-PI
- 
+
 You will compile tensorflow and protobuf libraries that you can link into other
 applications.  You will also compile the [benchmark](../../tools/benchmark/)
 application that will let you check your application.
- 
+
 ## Before you start (all platforms)
 
 First, clone this TensorFlow repository.
@@ -58,9 +58,9 @@ You should then be able to run the `build_all_linux.sh` script to compile:
 tensorflow/contrib/makefile/build_all_linux.sh
 ```
 
-This should compile a static library in 
-`tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a`, 
-and create an example executable at `tensorflow/contrib/makefile/gen/bin/benchmark`. 
+This should compile a static library in
+`tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a`,
+and create an example executable at `tensorflow/contrib/makefile/gen/bin/benchmark`.
 
 Get the graph file, if you have not already:
 
@@ -201,7 +201,7 @@ library in a simple app.
 ### Building by hand
 
 This section covers each step of building.  For all the code in one place, see
-[build_all_ios.sh](build_all_ios.sh). 
+[build_all_ios.sh](build_all_ios.sh).
 
 If you have not already, you will need to download dependencies:
 
@@ -232,7 +232,7 @@ make -f tensorflow/contrib/makefile/Makefile \
 
 This creates a library in
 `tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a` that you can link any
-xcode project against. 
+xcode project against.
 
 To see TensorFlow running on iOS, the example Xcode project in
 [tensorflow/examples/ios](../../examples/ios/) shows how to use the static
@@ -258,15 +258,15 @@ tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h tensorflow/con
 
 In XCode, you will need to use -force_load in the linker flags
 section of the build settings to pull in the global constructors that are used
-to register ops and kernels. 
+to register ops and kernels.
 
 #### Optimization
- 
+
 The `compile_ios_tensorflow.sh` script can take optional command-line arguments.
 The first argument will be passed as a C++ optimization flag and defaults to
 debug mode. If you are concerned about performance or are working on a release
 build, you would likely want a higher optimization setting, like so:
- 
+
 ```bash
 compile_ios_tensorflow.sh -f "-Os"
 ```
@@ -330,7 +330,7 @@ what you need for your desired system.
 ## Dependency Management
 
 The Makefile loads in a list of dependencies stored in text files. These files
-are generated from the main Bazel build by running 
+are generated from the main Bazel build by running
 `tensorflow/contrib/makefile/gen_file_lists.sh`. You'll need to re-run this i
 you make changes to the files that are included in the build.
 
@@ -361,10 +361,10 @@ codebase can sometimes break the makefile build process. If you find that tests
 relying on this makefile are failing with a change you're involved in, here are
 some trouble-shooting steps:
 
- - Try to reproduce the issue on your platform. If you're on Linux, running 
+ - Try to reproduce the issue on your platform. If you're on Linux, running
  `make -f tensorflow/contrib/makefile/Makefile` should be enough to recreate
   most issues. For other platforms, see the sections earlier in this document.
-  
+
  - The most common cause of breakages are files that have been added to the
   Bazel build scripts, but that the makefile isn't aware of. Typical symptoms
   of this include linker errors mentioning missing symbols or protobuf headers
@@ -377,11 +377,11 @@ some trouble-shooting steps:
   `tensorflow/core/BUILD`, so if you change the wildcards there to include new
   files you'll need to also update `CORE_CC_ALL_SRCS` and `CORE_CC_EXCLUDE_SRCS`
   in the makefile.
-  
+
  - Some of the supported platforms use clang instead of gcc as their compiler,
   so if you're hitting compile errors you may need to tweak your code to be more
   friendly to different compilers by avoiding gcc extensions or idioms.
-  
+
 These are the most common reasons for makefile breakages, but it's also
 possible you may hit something unusual, like a platform incompatibility. For
 those, you'll need to see if you can reproduce the issue on that particular
diff --git a/tensorflow/contrib/makefile/compile_ios_protobuf.sh b/tensorflow/contrib/makefile/compile_ios_protobuf.sh
index 43e5809dd2..8fa2021363 100755
--- a/tensorflow/contrib/makefile/compile_ios_protobuf.sh
+++ b/tensorflow/contrib/makefile/compile_ios_protobuf.sh
@@ -270,7 +270,7 @@ case "$1" in
         echo "Unknown ARCH"
         exit 1
         ;;
-esac 
+esac
 }
 
 for build_element in "${build_targets[@]}"
diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh
index 930e6b8dea..7927997678 100755
--- a/tensorflow/contrib/makefile/compile_nsync.sh
+++ b/tensorflow/contrib/makefile/compile_nsync.sh
@@ -28,7 +28,7 @@ usage="usage: $prog [-t linux|ios|android|macos|native]
         [-a architecture] [-v android_api_version]
 
 A script to build nsync for tensorflow.
-This script can be run on Linux or MacOS host platforms, and can target 
+This script can be run on Linux or MacOS host platforms, and can target
 Linux, MacOS, iOS, or Android.
 
 Options:
diff --git a/tensorflow/contrib/makefile/rename_protobuf.sh b/tensorflow/contrib/makefile/rename_protobuf.sh
index b3bff2d503..8d52c1a169 100755
--- a/tensorflow/contrib/makefile/rename_protobuf.sh
+++ b/tensorflow/contrib/makefile/rename_protobuf.sh
@@ -38,7 +38,7 @@
 #
 # Note that this script modifies the source code in-place, so once it's been run
 # it's no longer suitable for further manual modifications, since the difference
-# with the top of tree will already be large. 
+# with the top of tree will already be large.
 
 mv tensorflow/contrib/makefile/downloads/protobuf/src/google/protobuf \
  tensorflow/contrib/makefile/downloads/protobuf//src/google/protobuf3
@@ -71,7 +71,7 @@ sed -i '' 's%::google::protobuf;%google::protobuf3;%' \
 
 # Fix up a couple of special build scripts that look for particular files.
 sed -i '' 's%src/google/protobuf/message.cc%src/google/protobuf3/message.cc%' \
- tensorflow/contrib/makefile/downloads/protobuf/configure.ac 
+ tensorflow/contrib/makefile/downloads/protobuf/configure.ac
 sed -i '' 's%src/google/protobuf/stubs/common.h%src/google/protobuf3/stubs/common.h%' \
  tensorflow/contrib/makefile/downloads/protobuf/autogen.sh
 
diff --git a/tensorflow/contrib/metrics/README.md b/tensorflow/contrib/metrics/README.md
index 247ebac5bb..e0f2d74fa3 100644
--- a/tensorflow/contrib/metrics/README.md
+++ b/tensorflow/contrib/metrics/README.md
@@ -4,7 +4,7 @@
 
 Metrics are used in evaluation to assess the quality of a model. Most are
 "streaming" ops, meaning they create variables to accumulate a running total,
-and return an update tensor to update these variables, and a value tensor to 
+and return an update tensor to update these variables, and a value tensor to
 read the accumulated value. Example:
 
 value, update_op = metrics.streaming_mean_squared_error(
diff --git a/tensorflow/contrib/mpi/README.md b/tensorflow/contrib/mpi/README.md
index b0d03d05a2..75cb823048 100644
--- a/tensorflow/contrib/mpi/README.md
+++ b/tensorflow/contrib/mpi/README.md
@@ -23,7 +23,7 @@ The following environment variables can be set to modify the behavior at runtime
 
 **MPI_DISABLED=[0,1]**
 
-This environment variable allows you to disable the MPI path before launch (e.g. for performance or correctness testing). 
+This environment variable allows you to disable the MPI path before launch (e.g. for performance or correctness testing).
 
 **MPI_OPTIMAL_PATH=[0,1]**
 
@@ -34,10 +34,10 @@ This path is disabled by default as it requires that the MPI library can directl
 
 ## Known problems
 
-For certain complex neural nets the implementation sometimes crashes inside the MPI libraries. This seems to be related to memory allocations/routines that register the memory for the Infiniband transfers. (The crashes do not happen when all MPI processes are within the same physical machine). 
+For certain complex neural nets the implementation sometimes crashes inside the MPI libraries. This seems to be related to memory allocations/routines that register the memory for the Infiniband transfers. (The crashes do not happen when all MPI processes are within the same physical machine).
 
 **MVAPICH**
-- The problem manifests itself with a segmentation fault inside a memory copy routine and during startup you will get the following warning: "WARNING: Error in initializing MVAPICH2 ptmalloc library. Continuing without InfiniBand registration cache support." 
+- The problem manifests itself with a segmentation fault inside a memory copy routine and during startup you will get the following warning: "WARNING: Error in initializing MVAPICH2 ptmalloc library. Continuing without InfiniBand registration cache support."
 
 **OpenMPI**
 - With OpenMPI corrupt data will be received resulting in an assertion or the MPI library will print an error and exit. The error is "Attempt to free memory that is still in use by an ongoing MPI communication.  MPI job will now abort."
@@ -58,11 +58,11 @@ Once a request has arrived from a remote process the request is forwarded to the
 * Receive tensor request
 The MPI thread will check if there are any incoming tensor request messages on the communication lines using MPI_Iprobe. Once a request has been received it will be passed on to the standard TensorFlow code and eventually will be placed on the sendQueue.
 
-* Receive tensor 
+* Receive tensor
 At some point after a request has been sent the remote process will transmit the tensor. This tensor will be received and we look-up the callback that is associated with this tensor in our request table and execute the callback on the received data.
 
 
-In the implementation all send operations are non-blocking, all probe operations are non-blocking and all receive-operations are blocking. The receive-operations are only executed after the probe has determined that there is something to receive. 
+In the implementation all send operations are non-blocking, all probe operations are non-blocking and all receive-operations are blocking. The receive-operations are only executed after the probe has determined that there is something to receive.
 The MPI processes identify each other using an MPI process ID. The TensorFlow gRPC processes identify each other using a name. During launch we create a mapping between the TensorFlow process name and the MPI process ID to allow the processes to communicate with the correct destinations when using MPI operations.
 
 
diff --git a/tensorflow/contrib/pi_examples/README.md b/tensorflow/contrib/pi_examples/README.md
index f550228083..177357bca6 100644
--- a/tensorflow/contrib/pi_examples/README.md
+++ b/tensorflow/contrib/pi_examples/README.md
@@ -13,7 +13,7 @@ sudo apt-get install -y libjpeg-dev
 ```
 
  - To download the example model you'll need, run these commands:
- 
+
 ```bash
 curl https://storage.googleapis.com/download.tensorflow.org/models/inception_dec_2015_stripped.zip \
 -o /tmp/inception_dec_2015_stripped.zip
diff --git a/tensorflow/contrib/pi_examples/camera/Makefile b/tensorflow/contrib/pi_examples/camera/Makefile
index 578f1336f3..b354c03b6e 100644
--- a/tensorflow/contrib/pi_examples/camera/Makefile
+++ b/tensorflow/contrib/pi_examples/camera/Makefile
@@ -76,7 +76,7 @@ $(EXECUTABLE_NAME): $(EXECUTABLE_OBJS) $(TFLIBS)
 	$(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS)
 
 # Matches on C++ source files.
-$(OBJDIR)%.o: %.cc 
+$(OBJDIR)%.o: %.cc
 	@mkdir -p $(dir $@)
 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
 
diff --git a/tensorflow/contrib/pi_examples/label_image/Makefile b/tensorflow/contrib/pi_examples/label_image/Makefile
index 19652e581d..9d054a3133 100644
--- a/tensorflow/contrib/pi_examples/label_image/Makefile
+++ b/tensorflow/contrib/pi_examples/label_image/Makefile
@@ -75,7 +75,7 @@ $(EXECUTABLE_NAME): $(EXECUTABLE_OBJS) $(TFLIBS)
 	$(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS)
 
 # Matches on C++ source files.
-$(OBJDIR)%.o: %.cc 
+$(OBJDIR)%.o: %.cc
 	@mkdir -p $(dir $@)
 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
 
diff --git a/tensorflow/contrib/pi_examples/label_image/label_image.cc b/tensorflow/contrib/pi_examples/label_image/label_image.cc
index 7817cd0c64..0b18045789 100644
--- a/tensorflow/contrib/pi_examples/label_image/label_image.cc
+++ b/tensorflow/contrib/pi_examples/label_image/label_image.cc
@@ -89,7 +89,7 @@ Status LoadJpegFile(string file_name, std::vector<tensorflow::uint8>* data,
   FILE * infile;
   JSAMPARRAY buffer;
   int row_stride;
-  
+
   if ((infile = fopen(file_name.c_str(), "rb")) == NULL) {
     LOG(ERROR) << "Can't open " << file_name;
     return tensorflow::errors::NotFound("JPEG file ", file_name,
@@ -105,7 +105,7 @@ Status LoadJpegFile(string file_name, std::vector<tensorflow::uint8>* data,
     fclose(infile);
     return tensorflow::errors::Unknown("JPEG decoding failed");
   }
-  
+
   jpeg_create_decompress(&cinfo);
   jpeg_stdio_src(&cinfo, infile);
   jpeg_read_header(&cinfo, TRUE);
@@ -119,14 +119,14 @@ Status LoadJpegFile(string file_name, std::vector<tensorflow::uint8>* data,
   buffer = (*cinfo.mem->alloc_sarray)
     ((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
   while (cinfo.output_scanline < cinfo.output_height) {
-    tensorflow::uint8* row_address = &((*data)[cinfo.output_scanline * row_stride]); 
+    tensorflow::uint8* row_address = &((*data)[cinfo.output_scanline * row_stride]);
     jpeg_read_scanlines(&cinfo, buffer, 1);
     memcpy(row_address, buffer[0], row_stride);
   }
 
   jpeg_finish_decompress(&cinfo);
   jpeg_destroy_decompress(&cinfo);
-  fclose(infile);  
+  fclose(infile);
   return Status::OK();
 }
 
@@ -167,7 +167,7 @@ Status ReadTensorFromImageFile(string file_name, const int wanted_height,
     const int top_y_index = static_cast<int>(floorf(in_y));
     const int bottom_y_index =
       std::min(static_cast<int>(ceilf(in_y)), (image_height - 1));
-    const float y_lerp = in_y - top_y_index; 
+    const float y_lerp = in_y - top_y_index;
     tensorflow::uint8* in_top_row = in + (top_y_index * image_rowlen);
     tensorflow::uint8* in_bottom_row = in + (bottom_y_index * image_rowlen);
     float *out_row = out + (y * wanted_width * wanted_channels);
@@ -186,7 +186,7 @@ Status ReadTensorFromImageFile(string file_name, const int wanted_height,
 	in_bottom_row + (right_x_index * wanted_channels);
       const float x_lerp = in_x - left_x_index;
       float *out_pixel = out_row + (x * wanted_channels);
-      for (int c = 0; c < wanted_channels; ++c) {	
+      for (int c = 0; c < wanted_channels; ++c) {
 	const float top_left((in_top_left_pixel[c] - input_mean) / input_std);
 	const float top_right((in_top_right_pixel[c] - input_mean) / input_std);
 	const float bottom_left((in_bottom_left_pixel[c] - input_mean) / input_std);
@@ -198,7 +198,7 @@ Status ReadTensorFromImageFile(string file_name, const int wanted_height,
       }
     }
   }
-  
+
   out_tensors->push_back(image_tensor);
   return Status::OK();
 }
diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md
index 782232e85f..40541729da 100644
--- a/tensorflow/contrib/quantize/README.md
+++ b/tensorflow/contrib/quantize/README.md
@@ -13,7 +13,7 @@ through estimator [2]. Note that during back propagation, the parameters are
 updated at high precision as this is needed to ensure sufficient precision in
 accumulating tiny adjustments to the parameters. However, for the forward pass,
 the parameters and activations are quantized to the desired lower precision.
- 
+
 ![drawing](g3doc/drawings/Fake_Quantization.jpg)
 
 ###Forward pass
diff --git a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc
index 09b83e2af1..66aa293dc1 100644
--- a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc
+++ b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc
@@ -70,7 +70,7 @@ REGISTER_OP("StochasticHardRoutingFunction")
       return Status::OK();
     })
     .Doc(R"doc(
-  Samples a path for each instance in `input_data` and returns the 
+  Samples a path for each instance in `input_data` and returns the
   probability of the path and the path taken.
 
   tree_depth: The depth of the decision tree.
diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/g3doc/periodic_multires_derivation.md b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/g3doc/periodic_multires_derivation.md
index b174bb6af3..872474aee1 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/g3doc/periodic_multires_derivation.md
+++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/g3doc/periodic_multires_derivation.md
@@ -66,7 +66,7 @@ def make_eigval_mat_fn(to_power=1):
         if i == j:
             number = j // 2 + 1
             powersign = ((j + 1) % 2) * 2 - 1
-            return root_of_unity(matsize + 1, number=number, 
+            return root_of_unity(matsize + 1, number=number,
                                  to_power=powersign*to_power)
         else:
             return 0
diff --git a/tensorflow/contrib/tpu/ops/outfeed_ops.cc b/tensorflow/contrib/tpu/ops/outfeed_ops.cc
index ed5756cc54..5900c61a38 100644
--- a/tensorflow/contrib/tpu/ops/outfeed_ops.cc
+++ b/tensorflow/contrib/tpu/ops/outfeed_ops.cc
@@ -39,7 +39,7 @@ REGISTER_OP("OutfeedEnqueueTuple")
     .Doc(R"doc(
 An op which emits multiple Tensor values from an XLA computation.
 
-inputs: A list of tensors that will be inserted into the outfeed queue as an 
+inputs: A list of tensors that will be inserted into the outfeed queue as an
 XLA tuple.
 )doc");
 
diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md
index dcb390b0a5..7c1c8ea459 100644
--- a/tensorflow/contrib/verbs/README.md
+++ b/tensorflow/contrib/verbs/README.md
@@ -38,7 +38,7 @@ The following improvements can be made in the future. First, conversion to Tenso
 * **RDMA channel:** Responsible for RDMA connection to a particular node. It manages multiple buffers. A channel has a callback table which stores all the callbacks for the requested tensors.
 * **RDMA buffer:** Responsible for sending or receiving data. It has a fixed size memory to store the data. It has a queue to store the pending jobs. There are three types of buffers, message buffer, ACK buffer and tensor buffer. A channel has two message buffers, two ack buffers and many tensor buffers.
 * **RDMA manager:** Manages the adapter and channels, including channel creation, channel setup via GRPC service, channel lookup, etc.
-* **RDMA rendezvous manager:** manages multiple rdma rendezvous. 
+* **RDMA rendezvous manager:** manages multiple rdma rendezvous.
 * **RDMA rendezvous:** a derived class of BaseRemoteRendezvous. This class is the back end for "send" and "recv" ops. When the sendrecv_op wants to send or receive a tensor, it calls the rendezvous' "send" and "recv" functions respectively. Rendezvous are identified by "step_id", a random number, so that tensors for different iterations don't get mixed up.
 
 ### The SEND operation
diff --git a/tensorflow/core/common_runtime/accumulate_n_optimizer.cc b/tensorflow/core/common_runtime/accumulate_n_optimizer.cc
index 81cd44870e..a1e3b21e4f 100644
--- a/tensorflow/core/common_runtime/accumulate_n_optimizer.cc
+++ b/tensorflow/core/common_runtime/accumulate_n_optimizer.cc
@@ -35,7 +35,7 @@ Tensor make_zeros(const DataType& dtype, const TensorShapeProto& shape) {
 // Replaces occurrences of the "AccumulateNV2" stub operator with a graph of
 // lower-level ops. The graph is equivalent (modulo certain corner cases)
 // to the semantics of the original accumulate_n() Python op in math_ops.py.
-// Implementing the op with a rewrite allows this new variant of accumulate_n 
+// Implementing the op with a rewrite allows this new variant of accumulate_n
 // to be differentiable.
 //
 // The binary code that generates AccumulateNV2 stub ops is located in a
diff --git a/tensorflow/core/framework/bfloat16.cc b/tensorflow/core/framework/bfloat16.cc
index a5ac0e1a8d..0efe43fde2 100644
--- a/tensorflow/core/framework/bfloat16.cc
+++ b/tensorflow/core/framework/bfloat16.cc
@@ -21,13 +21,13 @@ void FloatToBFloat16(const float* src, bfloat16* dst, int64 size) {
   const uint16_t* p = reinterpret_cast<const uint16_t*>(src);
   uint16_t* q = reinterpret_cast<uint16_t*>(dst);
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    for (; size != 0; p += 2, q++, size--) {  
-      *q = p[0];  
-    }  
+    for (; size != 0; p += 2, q++, size--) {
+      *q = p[0];
+    }
 #else
-    for (; size != 0; p += 2, q++, size--) {  
-     *q = p[1];  
-    }  
+    for (; size != 0; p += 2, q++, size--) {
+     *q = p[1];
+    }
 #endif
 }
 
@@ -35,15 +35,15 @@ void BFloat16ToFloat(const bfloat16* src, float* dst, int64 size) {
   const uint16_t* p = reinterpret_cast<const uint16_t*>(src);
   uint16_t* q = reinterpret_cast<uint16_t*>(dst);
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    for (; size != 0; p++, q += 2, size--) {  
-      q[0] = *p;  
-      q[1] = 0;  
+    for (; size != 0; p++, q += 2, size--) {
+      q[0] = *p;
+      q[1] = 0;
+    }
+#else
+    for (; size != 0; p++, q += 2, size--) {
+      q[0] = 0;
+      q[1] = *p;
     }
-#else  
-    for (; size != 0; p++, q += 2, size--) {  
-      q[0] = 0;  
-      q[1] = *p;  
-    } 
 #endif
 }
 
diff --git a/tensorflow/core/framework/bfloat16.h b/tensorflow/core/framework/bfloat16.h
index b936e899d4..968c18bdd2 100644
--- a/tensorflow/core/framework/bfloat16.h
+++ b/tensorflow/core/framework/bfloat16.h
@@ -19,9 +19,9 @@ limitations under the License.
 #include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/platform/types.h"
 
-#if defined(PLATFORM_WINDOWS)  
-#include "tensorflow/core/platform/windows/cpu_info.h"  
-#endif  
+#if defined(PLATFORM_WINDOWS)
+#include "tensorflow/core/platform/windows/cpu_info.h"
+#endif
 
 // Compact 16-bit encoding of floating point numbers. This representation uses
 // 1 bit for the sign, 8 bits for the exponent and 7 bits for the mantissa.  It
diff --git a/tensorflow/core/kernels/cast_op.h b/tensorflow/core/kernels/cast_op.h
index 7d3e0cbe3d..8fedf2c271 100644
--- a/tensorflow/core/kernels/cast_op.h
+++ b/tensorflow/core/kernels/cast_op.h
@@ -128,10 +128,10 @@ struct scalar_cast_op<::tensorflow::bfloat16, float> {
     float ret;
     uint16_t* p = reinterpret_cast<uint16_t*>(&ret);
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    p[0] = a.value;  
-    p[1] = 0;  
-#else  
-    static_assert(::tensorflow::port::kLittleEndian, "Not a little endian system!");  
+    p[0] = a.value;
+    p[1] = 0;
+#else
+    static_assert(::tensorflow::port::kLittleEndian, "Not a little endian system!");
     p[0] = 0;
     p[1] = a.value;
 #endif
diff --git a/tensorflow/core/kernels/diag_op.cc b/tensorflow/core/kernels/diag_op.cc
index be862b82f1..86fa7dce36 100644
--- a/tensorflow/core/kernels/diag_op.cc
+++ b/tensorflow/core/kernels/diag_op.cc
@@ -108,7 +108,7 @@ class DiagPartOp : public OpKernel {
 };
 
 // Implementation of the functor specialization for CPU.
-// 
+//
 // According to the diagonal definition,
 // `output[i1,..., ik, i1,..., ik] = input[i1,..., ik]`,
 //
@@ -116,7 +116,7 @@ class DiagPartOp : public OpKernel {
 // pointer can be represent by coordinate [i1,..., ik],
 // where `index = i1*(s2*...*sk) + i2*(s3*...*sk) +... + ik`
 //
-// Let new_index is the offset of output's pointer with coordinate 
+// Let new_index is the offset of output's pointer with coordinate
 // [i1,..., ik, i1,..., ik], then we have
 // `new_index = i1*(s2*...sk*s1*...*sk) + i2*(s3*...*sk*s1*...*sk) +... + \
 //              ik*(s1*...*sk) + i1*(s2*...*sk) + i2*(s3*...*sk) +... + ik
diff --git a/tensorflow/core/kernels/diag_op_gpu.cu.cc b/tensorflow/core/kernels/diag_op_gpu.cu.cc
index 684f00ea61..d3c529d784 100644
--- a/tensorflow/core/kernels/diag_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/diag_op_gpu.cu.cc
@@ -33,7 +33,7 @@ __global__ void DiagCudaKernel(const int num_threads,
                                const T* in,
                                T* out) {
   CUDA_1D_KERNEL_LOOP(index, num_threads) {
-    // Fill the diagonal elements or set to zero in other place. 
+    // Fill the diagonal elements or set to zero in other place.
     if (index % (1 + size) == 0) {
       out[index] = in[index / (1 + size)];
     } else {
diff --git a/tensorflow/core/kernels/queue_ops.cc b/tensorflow/core/kernels/queue_ops.cc
index d51dc4ecb0..17831b7437 100644
--- a/tensorflow/core/kernels/queue_ops.cc
+++ b/tensorflow/core/kernels/queue_ops.cc
@@ -429,7 +429,7 @@ class QueueIsClosedOp : public QueueOpKernel {
  public:
   explicit QueueIsClosedOp(OpKernelConstruction* context)
      : QueueOpKernel(context) {}
- 
+
  protected:
   void ComputeAsync(OpKernelContext* ctx, QueueInterface* queue,
                     DoneCallback callback) override {
diff --git a/tensorflow/core/kernels/sparse_matmul_op_test.cc b/tensorflow/core/kernels/sparse_matmul_op_test.cc
index a0c54805e2..f815ca9e34 100644
--- a/tensorflow/core/kernels/sparse_matmul_op_test.cc
+++ b/tensorflow/core/kernels/sparse_matmul_op_test.cc
@@ -284,12 +284,12 @@ class SparseMatmulOpTest : public ::testing::Test {
       uint16_t* data3_bfloat16_p =
           reinterpret_cast<uint16_t*>(data3_bfloat16) + i;
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-            data3_p[1] = 0;  
-            data3_bfloat16_p[0] = data3_p[0];  
+            data3_p[1] = 0;
+            data3_bfloat16_p[0] = data3_p[0];
 #else
-            data3_p[0] = 0;  
-            data3_bfloat16_p[0] = data3_p[1];  
-#endif  
+            data3_p[0] = 0;
+            data3_bfloat16_p[0] = data3_p[1];
+#endif
     }
   }
 
diff --git a/tensorflow/core/kernels/xsmm_conv2d_test.cc b/tensorflow/core/kernels/xsmm_conv2d_test.cc
index 381ea39b77..e294701246 100644
--- a/tensorflow/core/kernels/xsmm_conv2d_test.cc
+++ b/tensorflow/core/kernels/xsmm_conv2d_test.cc
@@ -73,7 +73,7 @@ LIBXSMM_INLINE void naive_copy_KCRS_to_RSCK(const float* kcrs, Tensor  &rsck, in
   LIBXSMM_VLA_DECL(4, const float,  input, kcrs, C, R, S);
   int r, s, c, k;
   auto output =  rsck.flat<float>();
- 
+
   for ( r = 0; r < R; r++ ) {
     for ( s = 0; s < S; s++ ) {
       for ( c = 0; c < C; c++ ) {
@@ -94,14 +94,14 @@ LIBXSMM_INLINE void zero_buf(float* buf, long size) {
     buf[i] = 0.0f;
   }
 }
- 
+
 LIBXSMM_INLINE void copy_buf(Tensor &dst,float *src,long size) {
   long  i;
   auto output =  dst.flat<float>();
-  for (i = 0; i < size; ++i) 
+  for (i = 0; i < size; ++i)
           output(i) = src[i];
 }
- 
+
 LIBXSMM_INLINE void init_buf(float* buf, long size, int initPos, int initOne)
 {
   int i;
@@ -110,7 +110,7 @@ LIBXSMM_INLINE void init_buf(float* buf, long size, int initPos, int initOne)
     buf[i] = (float)((initOne != 0) ? 1.0 : ((initPos != 0) ? drand48() : (0.05 - drand48()/10.0)));
   }
 }
- 
+
 
 
 LIBXSMM_INLINE void naive_conv_fp(naive_conv_t* param, const float* input, float* output, const float* filter)
@@ -138,11 +138,11 @@ LIBXSMM_INLINE void naive_conv_fp(naive_conv_t* param, const float* input, float
   int stride_w  = param->stride_w;
   /* loop counters */
   int img, ofm, ifm, oj, oi, ij, ii, kj, ki;
- 
+
   LIBXSMM_VLA_DECL(4,       float, output_t, output + (pad_w_out * ofwp + pad_h_out), nOfm, ofhp, ofwp);
   LIBXSMM_VLA_DECL(4, const float,  input_t,  input + (pad_w_in * ifwp + pad_h_in), nIfm, ifhp, ifwp);
   LIBXSMM_VLA_DECL(4, const float, filter_t, filter, nIfm, kh, kw);
- 
+
   for (img = 0; img < nImg; ++img) {
     for (ofm = 0; ofm < nOfm; ++ofm) {
       for (ifm = 0; ifm < nIfm; ++ifm) {
@@ -172,7 +172,7 @@ void RunXsmmVsGeneric() {}
 class XsmmConv2DTest : public OpsTestBase {
  protected:
   void MakeOp(int stride) {
-  
+
     TF_CHECK_OK(NodeDefBuilder("xsmm", "Conv2D")
                       .Input(FakeInput(DT_FLOAT))
                       .Input(FakeInput(DT_FLOAT))
@@ -184,7 +184,7 @@ class XsmmConv2DTest : public OpsTestBase {
     TF_ASSERT_OK(InitOp());
   }
 };
- 
+
 TEST_F(XsmmConv2DTest, Basic) {
      MakeOp(1);
 
@@ -206,13 +206,13 @@ TEST_F(XsmmConv2DTest, Basic) {
      int stride_h = stride;
      int pad_h = pad;
      int pad_w = pad;
- 
+
      int pad_h_in = pad_h;
      int pad_w_in = pad_w;
- 
+
      int pad_h_out = 0;
      int pad_w_out = 0;
- 
+
   /* deriving some values for naive code */
      int ofh = (ifh + 2 * pad_h - kh) / stride_h + 1;
      int ofw = (ifw + 2 * pad_w - kw) / stride_w + 1;
@@ -223,7 +223,7 @@ TEST_F(XsmmConv2DTest, Basic) {
 
 
     //Initialization of Filter and Image
-    
+
     /* allocate data */
      float *naive_input           = (float*)libxsmm_aligned_scratch( nImg*nIfm*ifhp*ifwp*sizeof(float), 2097152);
      float *naive_output          = (float*)libxsmm_aligned_scratch( nImg*nOfm*ofhp*ofwp*sizeof(float), 2097152);
@@ -232,21 +232,21 @@ TEST_F(XsmmConv2DTest, Basic) {
      init_buf(naive_input,          nImg*nIfm*ifhp*ifwp, 0, 0);
      zero_buf(naive_output,         nImg*nOfm*ofhp*ofwp);
      init_buf(naive_filter,         nOfm*nIfm*kh*kw, 0, 0);
-        
+
 
      Tensor image(DT_FLOAT,
                  {nImg, ifhp, ifwp, nIfm});
- 
- 
+
+
      Tensor filter(DT_FLOAT, {kh,kw,nIfm,nOfm});
- 
+
 
      naive_copy_NCHW_to_NHWC(naive_input, image, nImg, ifhp, ifwp, nIfm);
-     naive_copy_KCRS_to_RSCK(naive_filter, filter, kh, kw, nIfm, nOfm); 
+     naive_copy_KCRS_to_RSCK(naive_filter, filter, kh, kw, nIfm, nOfm);
 
 
     //Run naive convolution
-    
+
      naive_conv_t naive_param;
 
      naive_param.nImg = nImg;
@@ -274,8 +274,8 @@ TEST_F(XsmmConv2DTest, Basic) {
 
      naive_conv_fp(&naive_param, naive_input, naive_output, naive_filter);
 
- 
- 
+
+
      AddInputFromArray<float>(image.shape(), image.flat<float>());
      AddInputFromArray<float>(filter.shape(), filter.flat<float>());
 
@@ -283,7 +283,7 @@ TEST_F(XsmmConv2DTest, Basic) {
 
      //Run Op (TF)
      TF_ASSERT_OK(RunOpKernel());
- 
+
      // Check the output.
      Tensor expected(DT_FLOAT, {nImg,ofhp,ofwp, nOfm});
      naive_copy_NCHW_to_NHWC(naive_output, expected, nImg, ofhp, ofwp, nOfm);
@@ -329,15 +329,15 @@ TEST(XsmmConv2DTest, Basic) {
     desc.fuse_ops = LIBXSMM_DNN_CONV_FUSE_NONE;
     desc.options = LIBXSMM_DNN_CONV_OPTION_NONE;
     desc.datatype = LIBXSMM_DNN_DATATYPE_F32;
- 
+
     if (!CanUseXsmmConv2D(desc, data_format)) {
       return false;
     }
- 
+
     auto input_ptr = input.template flat<float>().data();
     auto filter_ptr = filter.template flat<float>().data();
     auto output_ptr = output->template flat<float>().data();
- 
+
     bool success = functor::XsmmFwdConv2D<CPUDevice, float>()(
         ctx, desc, input_ptr, filter_ptr, output_ptr);
     return success;
diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc
index c3f8006415..13fbd2fa51 100644
--- a/tensorflow/core/ops/image_ops.cc
+++ b/tensorflow/core/ops/image_ops.cc
@@ -818,8 +818,8 @@ bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
 bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
 height of the underlying image.
 
-For example, if an image is 100 x 200 pixels (height x width) and the bounding 
-box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of 
+For example, if an image is 100 x 200 pixels (height x width) and the bounding
+box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
 the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).
 
 Parts of the bounding box may fall outside the image.
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index a242a13878..654e890b57 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -359,7 +359,7 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors.
 y_backprop: A 4D Tensor for the gradient with respect to y.
 x: A 4D Tensor for input data.
 scale: A 1D Tensor for scaling factor, to scale the normalized x.
-reserve_space_1: When is_training is True, a 1D Tensor for the computed batch 
+reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
                  mean to be reused in gradient computation. When is_training is
                  False, a 1D Tensor for the population mean to be reused in both
                  1st and 2nd order gradient computation.
@@ -407,7 +407,7 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors.
 y_backprop: A 4D Tensor for the gradient with respect to y.
 x: A 4D Tensor for input data.
 scale: A 1D Tensor for scaling factor, to scale the normalized x.
-reserve_space_1: When is_training is True, a 1D Tensor for the computed batch 
+reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
                  mean to be reused in gradient computation. When is_training is
                  False, a 1D Tensor for the population mean to be reused in both
                  1st and 2nd order gradient computation.
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index caeed0aa4a..c63fb28ff9 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -28,7 +28,7 @@ def tf_additional_verbs_deps():
       "//tensorflow:with_verbs_support": [
           "//tensorflow/contrib/verbs:verbs_server_lib",
           "//tensorflow/contrib/verbs:grpc_verbs_client",
-      ], 
+      ],
       "//conditions:default": [],
   })
 
diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md
index 8ca26fa5dc..9e628b1065 100644
--- a/tensorflow/core/profiler/README.md
+++ b/tensorflow/core/profiler/README.md
@@ -48,7 +48,7 @@ bazel-bin/tensorflow/python/profiler/profiler_ui \
 # Create options to profile the time and memory information.
 builder = tf.profiler.ProfileOptionBuilder
 opts = builder(builder.time_and_memory()).order_by('micros').build()
-# Create a profiling context, set constructor argument `trace_steps`, 
+# Create a profiling context, set constructor argument `trace_steps`,
 # `dump_steps` to empty for explicit control.
 with tf.contrib.tfprof.ProfileContext('/tmp/train_dir',
                                       trace_steps=[],
diff --git a/tensorflow/docs_src/about/uses.md b/tensorflow/docs_src/about/uses.md
index d41818e10c..8818177a28 100644
--- a/tensorflow/docs_src/about/uses.md
+++ b/tensorflow/docs_src/about/uses.md
@@ -5,7 +5,7 @@ This page highlights TensorFlow models in real world use.
 
 ## Model zoo
 
-Please visit our collection of TensorFlow models in the 
+Please visit our collection of TensorFlow models in the
 [TensorFlow Zoo](https://github.com/tensorflow/models).
 
 If you have built a model with TensorFlow, please consider publishing it in
diff --git a/tensorflow/docs_src/api_guides/python/nn.md b/tensorflow/docs_src/api_guides/python/nn.md
index 75dbb04e7d..eb3b251099 100644
--- a/tensorflow/docs_src/api_guides/python/nn.md
+++ b/tensorflow/docs_src/api_guides/python/nn.md
@@ -73,7 +73,7 @@ The total padding applied along the height and width is computed as:
       pad_along_width = max(filter_width - strides[2], 0)
     else:
       pad_along_width = max(filter_width - (in_width % strides[2]), 0)
-    
+
 Finally, the padding on the top, bottom, left and right are:
 
     pad_top = pad_along_height // 2
@@ -351,7 +351,7 @@ p_i = max(s\cdot (n_o - 1) + k - n_i, 0)
 \end{equation}
 
 Remember that, for `'SAME'` padding,
-\\(n_o = \left \lceil{\frac{n_i}{s}}\right \rceil\\), as mentioned above. 
+\\(n_o = \left \lceil{\frac{n_i}{s}}\right \rceil\\), as mentioned above.
 We need to analyze in detail two cases:
 
 - \\(n_i \text{ mod } s = 0\\)
diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md
index 77d4e0caec..003e0a25ec 100644
--- a/tensorflow/docs_src/community/documentation.md
+++ b/tensorflow/docs_src/community/documentation.md
@@ -10,10 +10,10 @@ particular, this document explains the following:
 
 You can view TensorFlow documentation on https://www.tensorflow.org, and you
 can view and edit the raw files on
-[GitHub](https://www.tensorflow.org/code/tensorflow/docs_src/). 
+[GitHub](https://www.tensorflow.org/code/tensorflow/docs_src/).
 We're publishing our docs on GitHub so everybody can contribute. Whatever gets
 checked in to `tensorflow/docs_src` will be published soon after on
-https://www.tensorflow.org. 
+https://www.tensorflow.org.
 
 Republishing TensorFlow documentation in different forms is absolutely allowed,
 but we are unlikely to accept other documentation formats (or the tooling to
@@ -237,7 +237,7 @@ If a module is accidentally imported, it typically breaks the doc generator
 even if the doc generator succeeds, unwanted symbols may show up in the
 docs. Check the generated docs to make sure that all symbols that are documented
 are expected. If there are symbols that shouldn’t be there, you have the
-following options for dealing with them: 
+following options for dealing with them:
 
 - Private symbols and imports
 - The `remove_undocumented` filter
diff --git a/tensorflow/docs_src/community/style_guide.md b/tensorflow/docs_src/community/style_guide.md
index 40a75a4736..a4c4e2674e 100644
--- a/tensorflow/docs_src/community/style_guide.md
+++ b/tensorflow/docs_src/community/style_guide.md
@@ -162,7 +162,7 @@ operation.
              it's present in the scope.
 
 * Layers that behave differently during training should take:
-  - `is_training`: `bool` indicator to conditionally choose different 
+  - `is_training`: `bool` indicator to conditionally choose different
                    computation paths (e.g. using `tf.cond`) during execution.
 
 Example:
diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md
index 33740de5d5..a3abf25507 100644
--- a/tensorflow/docs_src/community/welcome.md
+++ b/tensorflow/docs_src/community/welcome.md
@@ -65,5 +65,5 @@ please read the following list carefully:
     [TensorFlow issues tracker](https://github.com/tensorflow/tensorflow/issues)
     on GitHub.  For example, use the issue tracker to request a
     new operation in TensorFlow.
-    
+
 
diff --git a/tensorflow/docs_src/deploy/hadoop.md b/tensorflow/docs_src/deploy/hadoop.md
index 7592cf828b..c4471562b9 100644
--- a/tensorflow/docs_src/deploy/hadoop.md
+++ b/tensorflow/docs_src/deploy/hadoop.md
@@ -32,8 +32,8 @@ be set:
     source ${HADOOP_HOME}/libexec/hadoop-config.sh
     ```
 
-*   **LD_LIBRARY_PATH**: To include the path to libjvm.so, and optionally the path 
-    to libhdfs.so if your Hadoop distribution does not install libhdfs.so in 
+*   **LD_LIBRARY_PATH**: To include the path to libjvm.so, and optionally the path
+    to libhdfs.so if your Hadoop distribution does not install libhdfs.so in
     `$HADOOP_HDFS_HOME/lib/native`. On Linux:
 
     ```shell
diff --git a/tensorflow/docs_src/extend/add_filesys.md b/tensorflow/docs_src/extend/add_filesys.md
index ea3a6fe53a..44ba198998 100644
--- a/tensorflow/docs_src/extend/add_filesys.md
+++ b/tensorflow/docs_src/extend/add_filesys.md
@@ -32,7 +32,7 @@ Note that TensorFlow already includes many filesystem implementations, such as:
 
     Note: NFS filesystems often mount as a POSIX interface, and so standard
     TensorFlow can work on top of NFS-mounted remote filesystems.
-    
+
 *   HDFS - the Hadoop File System
 *   GCS - Google Cloud Storage filesystem
 *   A "memory-mapped-file" filesystem
diff --git a/tensorflow/docs_src/extend/index.md b/tensorflow/docs_src/extend/index.md
index 3f30b9a8c2..00b168c6be 100644
--- a/tensorflow/docs_src/extend/index.md
+++ b/tensorflow/docs_src/extend/index.md
@@ -20,7 +20,7 @@ TensorFlow:
 
 Python is currently the only language supported by TensorFlow's API stability
 promises.  However, TensorFlow also provides functionality in C++, Java, and Go,
-plus community support for [Haskell](https://github.com/tensorflow/haskell) and 
+plus community support for [Haskell](https://github.com/tensorflow/haskell) and
 [Rust](https://github.com/tensorflow/rust).  If you'd like to create or
 develop TensorFlow features in a language other than these languages, read the
 following guide:
diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md
index be14ab4026..231108215a 100644
--- a/tensorflow/docs_src/get_started/get_started.md
+++ b/tensorflow/docs_src/get_started/get_started.md
@@ -330,8 +330,8 @@ When run, it produces
 W: [-0.9999969] b: [ 0.99999082] loss: 5.69997e-11
 ```
 
-Notice that the loss is a very small number (very close to zero). If you run 
-this program, your loss may not be exactly the same as the aforementioned loss 
+Notice that the loss is a very small number (very close to zero). If you run
+this program, your loss may not be exactly the same as the aforementioned loss
 because the model is initialized with pseudorandom values.
 
 This more complicated program can still be visualized in TensorBoard
diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md
index f7380bac8a..28b04bab95 100644
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -51,15 +51,15 @@ must be installed on your system:
     <pre>
     $ <b>sudo apt-get install cuda-command-line-tools</b>
     </pre>
-    
+
     and add its path to your `LD_LIBRARY_PATH` environment variable:
 
-    <pre> 
-    $ <b>export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64</b> 
+    <pre>
+    $ <b>export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64</b>
     </pre>
 
     For CUDA Toolkit <= 7.5 do:
-    
+
     <pre>
     $ <b>sudo apt-get install libcupti-dev</b>
     </pre>
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index aa4ae6c876..dbc90e8112 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -143,7 +143,7 @@ The following NVIDIA <i>software</i> must be installed on your system:
     particularly the description of appending the appropriate pathname
     to your `LD_LIBRARY_PATH` environment variable.
 
-Finally, you must also install `libcupti` which for Cuda Toolkit >= 8.0 you do via 
+Finally, you must also install `libcupti` which for Cuda Toolkit >= 8.0 you do via
 
 <pre> $ <b>sudo apt-get install cuda-command-line-tools</b> </pre>
 
diff --git a/tensorflow/docs_src/mobile/android_build.md b/tensorflow/docs_src/mobile/android_build.md
index 030cd0d051..b5a1d5d7d1 100644
--- a/tensorflow/docs_src/mobile/android_build.md
+++ b/tensorflow/docs_src/mobile/android_build.md
@@ -66,7 +66,7 @@ them.
 
 ## Adding TensorFlow to your apps using Android Studio
 
-To add TensorFlow to your own apps on Android, the simplest way is to add the 
+To add TensorFlow to your own apps on Android, the simplest way is to add the
 following lines to your Gradle build file:
 
     allprojects {
@@ -74,7 +74,7 @@ following lines to your Gradle build file:
             jcenter()
         }
 	}
-											
+
     dependencies {
         compile 'org.tensorflow:tensorflow-android:+'
     }
diff --git a/tensorflow/docs_src/mobile/index.md b/tensorflow/docs_src/mobile/index.md
index 6bcd7d09d9..419ae7094a 100644
--- a/tensorflow/docs_src/mobile/index.md
+++ b/tensorflow/docs_src/mobile/index.md
@@ -2,7 +2,7 @@
 
 TensorFlow was designed to be a good deep learning solution for mobile
 platforms. Currently we have two solutions for deploying machine learning
-applications on mobile and embedded devices: 
+applications on mobile and embedded devices:
 @{$mobile/mobile_intro$TensorFlow for Mobile} and @{$mobile/tflite$TensorFlow Lite}.
 
 ## TensorFlow Lite versus TensorFlow Mobile
diff --git a/tensorflow/docs_src/mobile/ios_build.md b/tensorflow/docs_src/mobile/ios_build.md
index 2e6d3bf90e..a04655052f 100644
--- a/tensorflow/docs_src/mobile/ios_build.md
+++ b/tensorflow/docs_src/mobile/ios_build.md
@@ -24,7 +24,7 @@ If you'd like to add TensorFlow capabilities to your own app, do the following:
 
 - Open `YourProjectName.xcworkspace` and add your code.
 
-- In your app's **Build Settings**, make sure to add `$(inherited)` to the 
+- In your app's **Build Settings**, make sure to add `$(inherited)` to the
   **Other Linker Flags**, and **Header Search Paths** sections.
 
 ## Running the Samples
diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md
index 1da8be5689..d9e8875c38 100644
--- a/tensorflow/docs_src/mobile/optimizing.md
+++ b/tensorflow/docs_src/mobile/optimizing.md
@@ -57,7 +57,7 @@ get one inference every two seconds.
 
 Having this estimate helps you plan for what you’ll be able to realistically
 achieve on a device. If the model is using too many ops, then there are a lot of
-opportunities to optimize the architecture to reduce that number. 
+opportunities to optimize the architecture to reduce that number.
 
 Advanced techniques include [SqueezeNet](https://arxiv.org/abs/1602.07360)
 and [MobileNet](https://arxiv.org/abs/1704.04861), which are architectures
@@ -278,7 +278,7 @@ The run above was on your desktop, but the tool also works on Android, which is
 where it’s most useful for mobile development. Here’s an example command line to
 run it on a 64-bit ARM device:
 
-    bazel build -c opt --config=android_arm64 \ 
+    bazel build -c opt --config=android_arm64 \
     tensorflow/tools/benchmark:benchmark_model
     adb push bazel-bin/tensorflow/tools/benchmark/benchmark_model /data/local/tmp
     adb push /tmp/tensorflow_inception_graph.pb /data/local/tmp/
diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md
index 8fc65be35a..360ee302aa 100644
--- a/tensorflow/docs_src/mobile/prepare_models.md
+++ b/tensorflow/docs_src/mobile/prepare_models.md
@@ -131,9 +131,9 @@ needs to understand which parts of the graph are actually needed, and which are
 artifacts of the training process, like summarization ops. Only ops that
 contribute to calculating the given output nodes will be kept. If you know how
 your graph is going to be used, these should just be the names of the nodes you
-pass into `Session::Run()` as your fetch targets. The easiest way to find the 
+pass into `Session::Run()` as your fetch targets. The easiest way to find the
 node names is to inspect the Node objects while building your graph in python.
-Inspecting your graph in TensorBoard is another simple way.  You can get some 
+Inspecting your graph in TensorBoard is another simple way.  You can get some
 suggestions on likely outputs by running the [`summarize_graph` tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms/README.md#inspecting-graphs).
 
 Because the output format for TensorFlow has changed over time, there are a
@@ -164,7 +164,7 @@ The trickiest part of this process is figuring out the names of the nodes you
 want to use as inputs and outputs during inference.  You'll need these anyway
 once you start to run inference, but you also need them here so that the
 transform can calculate which nodes are not needed on the inference-only
-path. These may not be obvious from the training code. The easiest way to 
+path. These may not be obvious from the training code. The easiest way to
 determine the node name is to explore the graph with TensorBoard.
 
 Remember that mobile applications typically gather their data from sensors and
@@ -187,9 +187,9 @@ output nodes.
 If you’ve just been given a frozen `GraphDef` file, and are not sure about the
 contents, try using the `summarize_graph` tool to print out information
 about the inputs and outputs it finds from the graph structure. Here’s an
-example with the original Inception v3 file: 
+example with the original Inception v3 file:
 
-    bazel run tensorflow/tools/graph_transforms:summarize_graph -- 
+    bazel run tensorflow/tools/graph_transforms:summarize_graph --
     --in_graph=tensorflow_inception_graph.pb
 
 Once you have an idea of what the input and output nodes are, you can feed them
@@ -259,7 +259,7 @@ on how to do this, and also see @{$mobile/optimizing#binary_size$Optimizing} for
 more on reducing your binary size.
 
 ### Locate the implementation
-   
+
 Operations are broken into two parts. The first is the op definition, which
 declares the signature of the operation, which inputs, outputs, and attributes
 it has. These take up very little space, and so all are included by default. The
@@ -267,7 +267,7 @@ implementations of the op computations are done in kernels, which live in the
 `tensorflow/core/kernels` folder. You need to compile the C++ file containing
 the kernel implementation of the op you need into the library. To figure out
 which file that is, you can search for the operation name in the source
-files. 
+files.
 
 [Here’s an example search in github](https://github.com/search?utf8=%E2%9C%93&q=repo%3Atensorflow%2Ftensorflow+extension%3Acc+path%3Atensorflow%2Fcore%2Fkernels+REGISTER+Mul&type=Code&ref=searchresults).
 
diff --git a/tensorflow/docs_src/mobile/tflite/index.md b/tensorflow/docs_src/mobile/tflite/index.md
index 59daa2fe25..49d93669a2 100644
--- a/tensorflow/docs_src/mobile/tflite/index.md
+++ b/tensorflow/docs_src/mobile/tflite/index.md
@@ -40,7 +40,7 @@ TensorFlow Lite provides an interface to leverage hardware acceleration, if
 available on the device. It does so via the Android Neural Networks library,
 released as part of Android O-MR1.
 
-## Why do we need a new mobile-specific library? 
+## Why do we need a new mobile-specific library?
 
 Machine Learning is changing the computing paradigm, and we see an emerging
 trend of new use cases on mobile and embedded devices. Consumer expectations are
@@ -67,7 +67,7 @@ There are several factors which are fueling interest in this domain:
   connected to a network.
 
 We believe the next wave of machine learning applications will have significant
-processing on mobile and embedded devices. 
+processing on mobile and embedded devices.
 
 ## TensorFlow Lite developer preview highlights
 
diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md
index 8731cae0d7..34e8e5faf5 100644
--- a/tensorflow/docs_src/programmers_guide/saved_model.md
+++ b/tensorflow/docs_src/programmers_guide/saved_model.md
@@ -160,7 +160,7 @@ Notes:
 
 ### Inspect variables in a checkpoint
 
-We can quickly inspect variables in a checkpoint with the 
+We can quickly inspect variables in a checkpoint with the
 [`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py) library.
 
 Continuing from the save/restore examples shown earlier:
diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md
index 88eb277e35..47d4db2a56 100644
--- a/tensorflow/docs_src/programmers_guide/tensors.md
+++ b/tensorflow/docs_src/programmers_guide/tensors.md
@@ -43,8 +43,8 @@ generating a random number.
 
 The **rank** of a `tf.Tensor` object is its number of dimensions. Synonyms for
 rank include **order** or **degree** or **n-dimension**.
-Note that rank in TensorFlow is not the same as matrix rank in mathematics. 
-As the following table shows, each rank in TensorFlow corresponds to a 
+Note that rank in TensorFlow is not the same as matrix rank in mathematics.
+As the following table shows, each rank in TensorFlow corresponds to a
 different mathematical entity:
 
 Rank | Math entity
@@ -56,7 +56,7 @@ Rank | Math entity
 n | n-Tensor (you get the idea)
 
 
-### Rank 0 
+### Rank 0
 
 The following snippet demonstrates creating a few rank 0 variables:
 
@@ -108,7 +108,7 @@ my_image = tf.zeros([10, 299, 299, 3])  # batch x height x width x color
 ### Getting a `tf.Tensor` object's rank
 
 To determine the rank of a `tf.Tensor` object, call the `tf.rank` method.
-For example, the following method programmatically determines the rank 
+For example, the following method programmatically determines the rank
 of the `tf.Tensor` defined in the previous section:
 
 ```python
diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md
index f310b89380..16753c931f 100644
--- a/tensorflow/docs_src/programmers_guide/variables.md
+++ b/tensorflow/docs_src/programmers_guide/variables.md
@@ -37,7 +37,7 @@ You may optionally specify the `dtype` and initializer to `tf.get_variable`. For
 example:
 
 ``` python
-my_int_variable = tf.get_variable("my_int_variable", [1, 2, 3], dtype=tf.int32, 
+my_int_variable = tf.get_variable("my_int_variable", [1, 2, 3], dtype=tf.int32,
   initializer=tf.zeros_initializer)
 ```
 
@@ -45,7 +45,7 @@ TensorFlow provides many convenient initializers. Alternatively, you may
 initialize a `tf.Variable` to have the value of a `tf.Tensor`. For example:
 
 ``` python
-other_variable = tf.get_variable("other_variable", dtype=tf.int32, 
+other_variable = tf.get_variable("other_variable", dtype=tf.int32,
   initializer=tf.constant([23, 42]))
 ```
 
@@ -66,13 +66,13 @@ By default every `tf.Variable` gets placed in the following two collections:
 multiple devices,
  * `tf.GraphKeys.TRAINABLE_VARIABLES`--- variables for which TensorFlow will
    calculate gradients.
- 
+
 If you don't want a variable to be trainable, add it to the
 `tf.GraphKeys.LOCAL_VARIABLES` collection instead. For example, the following
 snippet demonstrates how to add a variable named `my_local` to this collection:
 
 ``` python
-my_local = tf.get_variable("my_local", shape=(), 
+my_local = tf.get_variable("my_local", shape=(),
 collections=[tf.GraphKeys.LOCAL_VARIABLES])
 ```
 
@@ -80,8 +80,8 @@ Alternatively, you can specify `trainable=False` as an argument to
 `tf.get_variable`:
 
 ``` python
-my_non_trainable = tf.get_variable("my_non_trainable", 
-                                   shape=(), 
+my_non_trainable = tf.get_variable("my_non_trainable",
+                                   shape=(),
                                    trainable=False)
 ```
 
@@ -126,7 +126,7 @@ cluster_spec = {
     "ps": ["ps0:2222", "ps1:2222"],
     "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]}
 with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)):
-  v = tf.get_variable("v", shape=[20, 20])  # this variable is placed 
+  v = tf.get_variable("v", shape=[20, 20])  # this variable is placed
                                             # in the parameter server
                                             # by the replica_device_setter
 ```
@@ -142,7 +142,7 @@ high-level frameworks such as `tf.contrib.slim`, `tf.estimator.Estimator` and
 Explicit initialization is otherwise useful because it allows you not to rerun
 potentially expensive initializers when reloading a model from a checkpoint as
 well as allowing determinism when randomly-initialized variables are shared in a
-distributed setting. 
+distributed setting.
 
 To initialize all trainable variables in one go, before training starts, call
 `tf.global_variables_initializer()`. This function returns a single operation
diff --git a/tensorflow/docs_src/tutorials/image_recognition.md b/tensorflow/docs_src/tutorials/image_recognition.md
index ddb771700a..df13eabead 100644
--- a/tensorflow/docs_src/tutorials/image_recognition.md
+++ b/tensorflow/docs_src/tutorials/image_recognition.md
@@ -42,7 +42,7 @@ For example, here are the results from [AlexNet] classifying some images:
 To compare models, we examine how often the model fails to predict the
 correct answer as one of their top 5 guesses -- termed "top-5 error rate".
 [AlexNet] achieved by setting a top-5 error rate of 15.3% on the 2012
-validation data set; [Inception (GoogLeNet)] achieved 6.67%; 
+validation data set; [Inception (GoogLeNet)] achieved 6.67%;
 [BN-Inception-v2] achieved 4.9%; [Inception-v3] reaches 3.46%.
 
 > How well do humans do on ImageNet Challenge? There's a [blog post] by
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/Classifier.java b/tensorflow/examples/android/src/org/tensorflow/demo/Classifier.java
index eabc724f7f..07995febaf 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/Classifier.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/Classifier.java
@@ -100,7 +100,7 @@ public interface Classifier {
   List<Recognition> recognizeImage(Bitmap bitmap);
 
   void enableStatLogging(final boolean debug);
-  
+
   String getStatString();
 
   void close();
diff --git a/tensorflow/examples/ios/README.md b/tensorflow/examples/ios/README.md
index 7d2eb870be..5bdaeb43ce 100644
--- a/tensorflow/examples/ios/README.md
+++ b/tensorflow/examples/ios/README.md
@@ -6,7 +6,7 @@ This folder contains examples of how to build applications for iOS devices using
  - You'll need Xcode 7.3 or later.
 
  - There are currently three examples: simple, benchmark, and camera. For now,
-   you can download the sample code by cloning the main tensorflow repository 
+   you can download the sample code by cloning the main tensorflow repository
    (we are planning to make the samples available as a separate repository
    later).
 
@@ -48,8 +48,8 @@ open tf_simple_example.xcworkspace # obs, not the .xcodeproj directory
 ### Troubleshooting
 
  - Make sure you use the TensorFlow-experimental pod (and not TensorFlow).
-  
- - The TensorFlow-experimental pod is current about ~450MB. The reason it is 
+
+ - The TensorFlow-experimental pod is current about ~450MB. The reason it is
    so big is because we are bundling multiple platforms, and the pod includes
    all TensorFlow functionality (e.g. operations). The final app size after
    build is substantially smaller though (~25MB). Working with the complete
@@ -91,7 +91,7 @@ target 'YourProjectName'
    open up the Xcode project in the `camera` subfolder. Once you build and run
    that, you should get a live camera view that you can point at objects to get
    real-time recognition results.
-   
+
 ### Troubleshooting
 
 If you're hitting problems, here's a checklist of common things to investigate:
diff --git a/tensorflow/examples/tutorials/deepdream/README.md b/tensorflow/examples/tutorials/deepdream/README.md
index 3a715f6224..403e4b34f9 100644
--- a/tensorflow/examples/tutorials/deepdream/README.md
+++ b/tensorflow/examples/tutorials/deepdream/README.md
@@ -2,7 +2,7 @@
 
 by [Alexander Mordvintsev](mailto:moralex@google.com)
 
-This directory contains Jupyter notebook that demonstrates a number of Convolutional Neural Network 
+This directory contains Jupyter notebook that demonstrates a number of Convolutional Neural Network
 image generation techniques implemented with TensorFlow:
 
 - visualizing individual feature channels and their combinations to explore the space of patterns learned by the neural network (see [GoogLeNet](http://storage.googleapis.com/deepdream/visualz/tensorflow_inception/index.html) and [VGG16](http://storage.googleapis.com/deepdream/visualz/vgg16/index.html) galleries)
@@ -11,8 +11,8 @@ image generation techniques implemented with TensorFlow:
 - using Laplacian Pyramid Gradient Normalization to produce smooth and colorful visuals at low cost
 - generating DeepDream-like images with TensorFlow
 
-You can view "deepdream.ipynb" directly on GitHub. Note that GitHub Jupyter notebook preview removes 
-embedded graph visualizations. You can still see them online 
+You can view "deepdream.ipynb" directly on GitHub. Note that GitHub Jupyter notebook preview removes
+embedded graph visualizations. You can still see them online
 [using nbviewer](http://nbviewer.jupyter.org/github/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb)
 service.
 
@@ -23,5 +23,5 @@ In order to run the notebook locally, the following dependencies must be install
 - NumPy
 - Jupyter Notebook
 
-To open the notebook, run `ipython notebook` command in this directory, and 
+To open the notebook, run `ipython notebook` command in this directory, and
 select 'deepdream.ipynb' in the opened browser window.
diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md
index 6faad294c2..f80c56d1c1 100644
--- a/tensorflow/examples/udacity/README.md
+++ b/tensorflow/examples/udacity/README.md
@@ -43,15 +43,15 @@ In addition, you may need to pass `--memory=8g` as an extra argument to
 `docker-machine` is a tool to provision and manage docker hosts, it supports multiple platform (ex. aws, gce, azure, virtualbox, ...). To create a new virtual machine locally with built-in docker engine, you can use
 
     docker-machine create -d virtualbox --virtualbox-memory 8196 tensorflow
-    
+
 `-d` means the driver for the cloud platform, supported drivers listed [here](https://docs.docker.com/machine/drivers/). Here we use virtualbox to create a new virtual machine locally. `tensorflow` means the name of the virtual machine, feel free to use whatever you like. You can use
 
     docker-machine ip tensorflow
-    
+
 to get the ip of the new virtual machine. To switch from default virtual machine to a new one (here we use tensorflow), type
 
     eval $(docker-machine env tensorflow)
-    
+
 Note that `docker-machine env tensorflow` outputs some environment variables such like `DOCKER_HOST`. Then your docker client is now connected to the docker host in virtual machine `tensorflow`
 
 * **I'm getting a TLS connection error.**
diff --git a/tensorflow/g3doc/README.txt b/tensorflow/g3doc/README.txt
index 6eaf1e1bda..ed648f8b6b 100644
--- a/tensorflow/g3doc/README.txt
+++ b/tensorflow/g3doc/README.txt
@@ -7,7 +7,7 @@ Documentation (on Github, tensorflow.org, and anywhere else we decide to
 serve it from) is now generated from the files in
 tensorflow/docs_src/ (for tutorials and other guides) and
 TensorFlow source code (for the API reference pages). If you see a problem with
-API reference, edit the code comments in the appropriate language. If you see a 
+API reference, edit the code comments in the appropriate language. If you see a
 problem with our other docs, edit the files in docs_src.
 
 To preview the results of your changes, or generate an offline copy of
diff --git a/tensorflow/java/src/gen/perl/tftypes-runall.pl b/tensorflow/java/src/gen/perl/tftypes-runall.pl
index a451ce92aa..65fe3b1506 100644
--- a/tensorflow/java/src/gen/perl/tftypes-runall.pl
+++ b/tensorflow/java/src/gen/perl/tftypes-runall.pl
@@ -1,13 +1,13 @@
 #!/usr/bin/perl
 #
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
diff --git a/tensorflow/java/src/gen/perl/tftypes.pl b/tensorflow/java/src/gen/perl/tftypes.pl
index 115723ac8a..c7c62e916f 100644
--- a/tensorflow/java/src/gen/perl/tftypes.pl
+++ b/tensorflow/java/src/gen/perl/tftypes.pl
@@ -1,13 +1,13 @@
 #!/usr/bin/perl
 #
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -156,7 +156,7 @@ for (my $i = 1; $i <= $#info; $i++) {
                            ."   *  String elements are sequences of bytes from the last array dimension.\n";
             }
 
-    
+
             my $intro = ($trank > 0)
                 ?  "Creates a rank-$trank tensor of {\@code $jtype} elements."
                 :  "Creates a scalar tensor containing a single {\@code $jtype} element.";
diff --git a/tensorflow/java/src/gen/resources/Tensors.java.tmpl b/tensorflow/java/src/gen/resources/Tensors.java.tmpl
index 98e1588559..e615524c8e 100644
--- a/tensorflow/java/src/gen/resources/Tensors.java.tmpl
+++ b/tensorflow/java/src/gen/resources/Tensors.java.tmpl
@@ -11,7 +11,7 @@ public final class Tensors {
   private Tensors() {}
 
   /** Creates a scalar String tensor using the default, UTF-8 encoding.
-   * 
+   *
    *  @param data  The string to put into the new scalar tensor.
    */
   public static Tensor<String> create(String data) {
@@ -19,7 +19,7 @@ public final class Tensors {
   }
 
   /** Creates a scalar String tensor using a specified encoding.
-   * 
+   *
    *  @param charset The encoding from String to bytes.
    *  @param data    The string to put into the new scalar tensor.
    */
diff --git a/tensorflow/python/grappler/model_analyzer.i b/tensorflow/python/grappler/model_analyzer.i
index d74bd37c63..726143a0bb 100644
--- a/tensorflow/python/grappler/model_analyzer.i
+++ b/tensorflow/python/grappler/model_analyzer.i
@@ -48,7 +48,7 @@ string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph) {
   if (!item) {
     return "Error: failed to preprocess metagraph: check your log file for errors";
   }
-  
+
   string suffix;
   tensorflow::grappler::ModelAnalyzer analyzer(*item);
 
diff --git a/tensorflow/stream_executor/cuda/cuda_platform.cc b/tensorflow/stream_executor/cuda/cuda_platform.cc
index 874ac1ab65..3a73846148 100644
--- a/tensorflow/stream_executor/cuda/cuda_platform.cc
+++ b/tensorflow/stream_executor/cuda/cuda_platform.cc
@@ -197,7 +197,7 @@ void CudaPlatform::UnregisterTraceListener(TraceListener* listener) {
 static void InitializeCudaPlatform() {
   // Disabling leak checking, MultiPlatformManager does not destroy its
   // registered platforms.
-  
+
   std::unique_ptr<cuda::CudaPlatform> platform(new cuda::CudaPlatform);
   SE_CHECK_OK(MultiPlatformManager::RegisterPlatform(std::move(platform)));
 }
diff --git a/tensorflow/stream_executor/lib/static_threadlocal.h b/tensorflow/stream_executor/lib/static_threadlocal.h
index 6e2bd0d455..02720cbd26 100644
--- a/tensorflow/stream_executor/lib/static_threadlocal.h
+++ b/tensorflow/stream_executor/lib/static_threadlocal.h
@@ -17,7 +17,7 @@ limitations under the License.
 #define TENSORFLOW_STREAM_EXECUTOR_LIB_STATIC_THREADLOCAL_H_
 
 #ifdef _MSC_VER
-#define __thread __declspec(thread) 
+#define __thread __declspec(thread)
 #endif
 
 // For POD types in TLS mode, s_obj_VAR is the thread-local variable.
diff --git a/tensorflow/tools/ci_build/README.md b/tensorflow/tools/ci_build/README.md
index 202fcb9101..f2161b700a 100644
--- a/tensorflow/tools/ci_build/README.md
+++ b/tensorflow/tools/ci_build/README.md
@@ -67,10 +67,10 @@ this UI, to see the logs for a failed build:
     the build tool divided the target into multiple shards or ran the test
     multiple times. Each test log is specific to the shard, run, and attempt.
     To see a specific log:
-    
+
     1.  Click on the log icon that is on the right next to the shard, run,
         and attempt number.
-        
+
     2.  In the grid that appears on the right, click on the specific shard,
         run, and attempt to view its log. You can also type the desired shard,
         run, or attempt number in the field above its grid.
diff --git a/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh b/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh
index ea4906588d..e703e78531 100755
--- a/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh
+++ b/tensorflow/tools/dist_test/scripts/dist_mnist_test.sh
@@ -43,7 +43,7 @@
 # NOTES:
 # If you have the error "$'\r': command not found"
 # Please run the command below to remove trailing '\r' character that causes the error:
-#   sed -i 's/\r$//' dist_mnist_test.sh 
+#   sed -i 's/\r$//' dist_mnist_test.sh
 
 
 # Configurations
diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md
index e35c58ff80..f46c56e11a 100644
--- a/tensorflow/tools/docker/README.md
+++ b/tensorflow/tools/docker/README.md
@@ -41,7 +41,7 @@ Note: If you would have a problem running nvidia-docker you may try the old meth
 we have used. But it is not recommended. If you find a bug in nvidia-docker, please report
 it there and try using nvidia-docker as described above.
 
-    $ # The old, not recommended way to run docker with gpu support: 
+    $ # The old, not recommended way to run docker with gpu support:
     $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
     $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
     $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu
diff --git a/tensorflow/tools/graph_transforms/README.md b/tensorflow/tools/graph_transforms/README.md
index c7f7eca257..345d9eadb8 100644
--- a/tensorflow/tools/graph_transforms/README.md
+++ b/tensorflow/tools/graph_transforms/README.md
@@ -95,9 +95,9 @@ transforms to modify the graph with. The transforms are given as a list of
 names, and can each have arguments themselves. These transforms define the
 pipeline of modifications that are applied in order to produce the output.
 Sometimes you need some transforms to happen before others, and the ordering
-within the list lets you specify which happen first. 
-Note that the optimization 
-`remove_nodes(op=Identity, op=CheckNumerics)` will break the model with control 
+within the list lets you specify which happen first.
+Note that the optimization
+`remove_nodes(op=Identity, op=CheckNumerics)` will break the model with control
 flow operations, such as `tf.cond`, `tf.map_fn`, and `tf.while`.
 
 ## Inspecting Graphs
-- 
GitLab


From b924e9f4c380dc85df433106df5f3c6a875318ac Mon Sep 17 00:00:00 2001
From: scott <scotthuang1989@163.com>
Date: Mon, 27 Nov 2017 23:14:19 +0800
Subject: [PATCH 0297/1225] add extra document to parameter:num_epochs

---
 .../contrib/slim/python/slim/data/dataset_data_provider.py    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
index 82c6b5a619..41426a6508 100644
--- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
+++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
@@ -62,7 +62,9 @@ class DatasetDataProvider(data_provider.DataProvider):
                seed=None,
                scope=None):
     """Creates a DatasetDataProvider.
-
+    Note: if `num_epochs` is not `None`,  local counter `epochs` will be created
+    by relevant function. Use `local_variables_initializer()` to initialize
+    local variables.
     Args:
       dataset: An instance of the Dataset class.
       num_readers: The number of parallel readers to use.
-- 
GitLab


From a7c11adad8fa445be1083467ceb76b2d7c98b005 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 07:46:52 -0800
Subject: [PATCH 0298/1225] Update metric library to allow non-literal strings.

PiperOrigin-RevId: 177015295
---
 .../core/lib/monitoring/collection_registry.h |  4 +-
 tensorflow/core/lib/monitoring/metric_def.h   | 55 +++++++------------
 .../core/lib/monitoring/metric_def_test.cc    | 18 ++++++
 3 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h
index 030f8e360a..113d37e07d 100644
--- a/tensorflow/core/lib/monitoring/collection_registry.h
+++ b/tensorflow/core/lib/monitoring/collection_registry.h
@@ -321,13 +321,13 @@ void MetricCollector<metric_kind, Value, NumLabels>::CollectValue(
     const std::array<string, NumLabels>& labels, const Value& value) {
   point_set_->points.emplace_back(new Point());
   auto* const point = point_set_->points.back().get();
-  const std::vector<StringPiece> label_descriptions =
+  const std::vector<string> label_descriptions =
       metric_def_->label_descriptions();
   point->labels.reserve(NumLabels);
   for (int i = 0; i < NumLabels; ++i) {
     point->labels.push_back({});
     auto* const label = &point->labels.back();
-    label->name = label_descriptions[i].ToString();
+    label->name = label_descriptions[i];
     label->value = labels[i];
   }
   internal::CollectValue(value, point);
diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h
index 3459c2ab82..a7f14f9c94 100644
--- a/tensorflow/core/lib/monitoring/metric_def.h
+++ b/tensorflow/core/lib/monitoring/metric_def.h
@@ -43,24 +43,6 @@ enum class ValueType : int { kInt64 = 0, kHistogram, kString };
 // on this.
 namespace internal {
 
-// Ensures that the string is a compile-time string literal.
-class StringLiteral {
- public:
-  // We allow implicit conversions here on purpose.
-  template <int N>
-  StringLiteral(const char (&data)[N]) : literal_(data, N - 1) {}
-
-  // This ctor will be called for non-literals, causing compile-time failure.
-  template <typename NotStringLiteral>
-  StringLiteral(const NotStringLiteral& not_string_literal) = delete;
-
-  // Implicit conversion to StringPiece.
-  operator StringPiece() const { return literal_; }
-
- private:
-  const StringPiece literal_;
-};
-
 template <typename Value>
 ValueType GetValueType();
 
@@ -98,7 +80,7 @@ class AbstractMetricDef {
 
   StringPiece description() const { return description_; }
 
-  const std::vector<StringPiece> label_descriptions() const {
+  const std::vector<string>& label_descriptions() const {
     return label_descriptions_;
   }
 
@@ -106,23 +88,21 @@ class AbstractMetricDef {
   template <MetricKind kind, typename Value, int NumLabels>
   friend class MetricDef;
 
-  AbstractMetricDef(
-      const MetricKind kind, const ValueType value_type,
-      const internal::StringLiteral name,
-      const internal::StringLiteral description,
-      const std::vector<internal::StringLiteral>& label_descriptions)
+  AbstractMetricDef(const MetricKind kind, const ValueType value_type,
+                    const StringPiece name, const StringPiece description,
+                    const std::vector<string>& label_descriptions)
       : kind_(kind),
         value_type_(value_type),
-        name_(name),
-        description_(description),
-        label_descriptions_(std::vector<StringPiece>(
-            label_descriptions.begin(), label_descriptions.end())) {}
+        name_(name.ToString()),
+        description_(description.ToString()),
+        label_descriptions_(std::vector<string>(label_descriptions.begin(),
+                                                label_descriptions.end())) {}
 
   const MetricKind kind_;
   const ValueType value_type_;
-  const StringPiece name_;
-  const StringPiece description_;
-  const std::vector<StringPiece> label_descriptions_;
+  const string name_;
+  const string description_;
+  const std::vector<string> label_descriptions_;
 };
 
 // Metric definition.
@@ -130,15 +110,18 @@ class AbstractMetricDef {
 // A metric is defined by its kind, value-type, name, description and the
 // description of its labels.
 //
-// NOTE: We allow only string literals for the name, description and label
-// descriptions because these should be fixed at compile-time and shouldn't be
-// dynamic.
+// NOTE: Name, description, and label descriptions should be logically static,
+// but do not have to live for the lifetime of the MetricDef.
+//
+// By "logically static", we mean that they should never contain dynamic
+// information, but is static for the lifetime of the MetricDef, and
+// in-turn the metric; they do not need to be compile-time constants.
+// This allows for e.g. prefixed metrics in a CLIF wrapped environment.
 template <MetricKind metric_kind, typename Value, int NumLabels>
 class MetricDef : public AbstractMetricDef {
  public:
   template <typename... LabelDesc>
-  MetricDef(const internal::StringLiteral name,
-            const internal::StringLiteral description,
+  MetricDef(const StringPiece name, const StringPiece description,
             const LabelDesc&... label_descriptions)
       : AbstractMetricDef(metric_kind, internal::GetValueType<Value>(), name,
                           description, {label_descriptions...}) {
diff --git a/tensorflow/core/lib/monitoring/metric_def_test.cc b/tensorflow/core/lib/monitoring/metric_def_test.cc
index dc07a08e4f..66973b6b5f 100644
--- a/tensorflow/core/lib/monitoring/metric_def_test.cc
+++ b/tensorflow/core/lib/monitoring/metric_def_test.cc
@@ -41,6 +41,24 @@ TEST(MetricDefTest, Simple) {
   EXPECT_EQ("LabelName", metric_def1.label_descriptions()[0]);
 }
 
+TEST(MetricDefTest, StringsPersist) {
+  // Ensure string attributes of the metric are copied into the metric
+  string name = "/tensorflow/metric0";
+  string description = "test description";
+  string label_description = "test label description";
+  const MetricDef<MetricKind::kCumulative, int64, 1> metric_def(
+      name, description, label_description);
+
+  // Mutate the strings
+  name[4] = 'A';
+  description[4] = 'B';
+  label_description[4] = 'C';
+
+  EXPECT_NE(name, metric_def.name());
+  EXPECT_NE(description, metric_def.description());
+  EXPECT_NE(label_description, metric_def.label_descriptions()[0]);
+}
+
 }  // namespace
 }  // namespace monitoring
 }  // namespace tensorflow
-- 
GitLab


From 26f43e6a8e1c234060096f21f1fd57d3cf57cfbc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 08:18:45 -0800
Subject: [PATCH 0299/1225] Delete trailing whitespace in Python code

PiperOrigin-RevId: 177018504
---
 .../framework/python/ops/accumulate_n_v2.py   |  6 +-
 .../python/ops/accumulate_n_v2_eager_test.py  |  8 +-
 .../python/ops/accumulate_n_v2_test.py        |  8 +-
 .../single_image_random_dot_stereograms.py    |  8 +-
 tensorflow/contrib/rnn/python/ops/rnn_cell.py |  4 +-
 .../training/sgdr_learning_rate_decay.py      |  6 +-
 .../python/client/session_partial_run_test.py |  4 +-
 .../inputs/queues/feeding_functions.py        | 12 +--
 .../python/framework/tensor_util_test.py      | 78 +++++++++----------
 .../python/kernel_tests/cast_op_test.py       |  8 +-
 .../tools/optimize_for_inference_test.py      |  2 +-
 11 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py
index a0667bd489..2375ee4f55 100644
--- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py
+++ b/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py
@@ -48,7 +48,7 @@ def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None):
   tf.accumulate_n_v2([a, b, a])  # [[7, 4], [6, 14]]
 
   # Explicitly pass shape and type
-  tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32)  
+  tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32)
                                                                    # [[7,  4],
                                                                    #  [6, 14]]
   ```
@@ -93,7 +93,7 @@ def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None):
   elif len(inputs) == 1 and name is not None:
     return array_ops.identity(inputs[0], name=name)
   elif context.in_eager_mode():
-    # TemporaryVariable not currently supported in eager mode; fall back 
+    # TemporaryVariable not currently supported in eager mode; fall back
     # onto AddN for now.
     # TODO(frreiss) remove this once the lifetime of eager variables gets
     # addressed
@@ -101,7 +101,7 @@ def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None):
   else:
     return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape)
 
-# The following code should eventually be merged into 
+# The following code should eventually be merged into
 # tensorflow/python/ops/math_grad.py
 @ops.RegisterGradient("AccumulateNV2")
 def _AddNGrad(op, grad):
diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py
index c2229bb8ad..8f44698da8 100644
--- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py
+++ b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for new version of accumulate_n op that will eventually go into 
+"""Tests for new version of accumulate_n op that will eventually go into
 `ops.math_ops`.
 
-These test cases spefically exercise the `eager` APIs. They need to be in a 
+These test cases spefically exercise the `eager` APIs. They need to be in a
 separate file from the remaining tests because eager mode is currently something
 you can turn on but can't turn off for the lifetime of the current process."""
 from __future__ import absolute_import
@@ -64,7 +64,7 @@ class AccumulateNV2EagerTest(test_util.TensorFlowTestCase):
     np.random.seed(42)
     num_inputs = 3
     input_vars = [
-        resource_variable_ops.ResourceVariable(10.0 * np.random.random(), 
+        resource_variable_ops.ResourceVariable(10.0 * np.random.random(),
                                                name="t%d" % i)
         for i in range(0, num_inputs)
     ]
@@ -72,7 +72,7 @@ class AccumulateNV2EagerTest(test_util.TensorFlowTestCase):
     def fn(first, second, third):
       return av2.accumulate_n_v2([first, second, third])
 
-    grad_fn = backprop.gradients_function(fn)      
+    grad_fn = backprop.gradients_function(fn)
     grad = grad_fn(input_vars[0], input_vars[1], input_vars[2])
     self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1
                         [elem.numpy() for elem in grad])
diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py
index 3386e849d5..b5e9f8df79 100644
--- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py
+++ b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for new version of accumulate_n op that will eventually go into 
+"""Tests for new version of accumulate_n op that will eventually go into
 `ops.math_ops`."""
 from __future__ import absolute_import
 from __future__ import division
@@ -102,21 +102,21 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase):
       with self.assertRaises(ValueError):
         a = variables.Variable(np.array([0.1,0.2]))
         b = variables.Variable(np.array([[0.3],[0.4]]))
-        tf_val = av2.accumulate_n_v2([a,b]) 
+        tf_val = av2.accumulate_n_v2([a,b])
 
   def testWrongType(self):
     with self.test_session():
       with self.assertRaises(TypeError):
         a = variables.Variable(0.2, dtype=np.float32)
         b = variables.Variable(0.1, dtype=np.float32)
-        tf_val = av2.accumulate_n_v2([a,b], tensor_dtype=np.int32) 
+        tf_val = av2.accumulate_n_v2([a,b], tensor_dtype=np.int32)
 
   def testWrongTypeOneInput(self):
     # Scenario that used to trigger a bug, even when testWrongType() worked
     with self.test_session():
       with self.assertRaises(TypeError):
         a = variables.Variable(0.2, dtype=np.float32)
-        tf_val = av2.accumulate_n_v2([a], tensor_dtype=np.int32) 
+        tf_val = av2.accumulate_n_v2([a], tensor_dtype=np.int32)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py b/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py
index 5cccf26028..bb766e59d2 100755
--- a/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py
+++ b/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py
@@ -68,7 +68,7 @@ def single_image_random_dot_stereograms(
   ```
 
   Args:
-    depth_values: A `Tensor`. Must be one of the following types: 
+    depth_values: A `Tensor`. Must be one of the following types:
       `float64`, `float32`, `int64`, `int32`.  Z values of data to encode
       into 'output_data_window' window, lower further away {0.0 floor(far),
       1.0 ceiling(near) after norm}, must be 2-D tensor
@@ -84,17 +84,17 @@ def single_image_random_dot_stereograms(
     mu: An optional `float`. Defaults to `0.3333`.
       Depth of field, Fraction of viewing distance (eg. 1/3 = 0.3333)
     normalize: An optional `bool`. Defaults to `True`.
-      Normalize input data to [0.0, 1.0] 
+      Normalize input data to [0.0, 1.0]
     normalize_max: An optional `float`. Defaults to `-100`.
       Fix MAX value for Normalization (0.0) - if < MIN, autoscale
     normalize_min: An optional `float`. Defaults to `100`.
       Fix MIN value for Normalization (0.0) - if > MAX, autoscale
     border_level: An optional `float`. Defaults to `0`.
-      Value of bord in depth 0.0 {far} to 1.0 {near} 
+      Value of bord in depth 0.0 {far} to 1.0 {near}
     number_colors: An optional `int`. Defaults to `256`. 2 (Black &
       White), 256 (grayscale), and Numbers > 256 (Full Color) are
       supported
-    output_image_shape: An optional `tf.TensorShape` or list of `ints`. 
+    output_image_shape: An optional `tf.TensorShape` or list of `ints`.
       Defaults to shape `[1024, 768, 1]`. Defines output shape of returned
       image in '[X,Y, Channels]' 1-grayscale, 3 color; channels will be
       updated to 3 if number_colors > 256
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 0698d40438..289359e5ec 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -2120,7 +2120,7 @@ class Conv3DLSTMCell(ConvLSTMCell):
 def _conv(args, filter_size, num_features, bias, bias_start=0.0):
   """convolution:
   Args:
-    args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, 
+    args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D,
     batch x n, Tensors.
     filter_size: int tuple of filter height and width.
     num_features: int, number of features.
@@ -2214,7 +2214,7 @@ class GLSTMCell(rnn_cell_impl.RNNCell):
         has the given variables, an error is raised.
 
     Raises:
-      ValueError: If `num_units` or `num_proj` is not divisible by 
+      ValueError: If `num_units` or `num_proj` is not divisible by
         `number_of_groups`.
     """
     super(GLSTMCell, self).__init__(_reuse=reuse)
diff --git a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py b/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py
index 0ef5f111b2..ed0f398e30 100644
--- a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py
+++ b/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py
@@ -28,7 +28,7 @@ from tensorflow.python.ops import math_ops, control_flow_ops
 def sgdr_decay(learning_rate, global_step, initial_period_steps,
                t_mul=2.0, m_mul=1.0, name=None):
   """Implements Stochastic Gradient Descent with Warm Restarts (SGDR).
-  
+
   As described in "SGDR: Stochastic Gradient Descent
   with Warm Restarts" by Ilya Loshchilov & Frank Hutter, Proceedings of
   ICLR'2017, available at https://arxiv.org/pdf/1608.03983.pdf
@@ -48,7 +48,7 @@ def sgdr_decay(learning_rate, global_step, initial_period_steps,
   where `t_0` = `initial_period_steps` is the user-defined number of batch
   iterations (not epochs as in the paper) to be performed before the first
   restart is launched.
-  
+
   Then, we perform the first restart (i=1) by setting the learning rate to
   `learning_rate*(m_mul^i)`, where `m_mul in [0,1]` (set to 1 by default).
   The i-th restart runs for `t_i=t_0*(t_mul^i)` steps, i.e., every new
@@ -73,7 +73,7 @@ def sgdr_decay(learning_rate, global_step, initial_period_steps,
       Training dataset size: 10000
       If the user wants the first decay period to span across 5 epochs, then
       `initial_period_steps` = 5 * 10000/100 = 500
-  
+
       Train for 10000 batch iterations with the initial learning rate set to
       0.1, then restart to run 2 times longer, i.e, for 20000 batch iterations
       and with the initial learning rate 0.05, then restart again and again,
diff --git a/tensorflow/python/client/session_partial_run_test.py b/tensorflow/python/client/session_partial_run_test.py
index 6ecf0fc6c7..6a389b078a 100644
--- a/tensorflow/python/client/session_partial_run_test.py
+++ b/tensorflow/python/client/session_partial_run_test.py
@@ -199,11 +199,11 @@ class PartialRunTestMethods(object):
   def testPartialRunSetupNoFeedsPassed(self):
     sess = session.Session()
     r1 = constant_op.constant([6.0])
-   
+
     h = sess.partial_run_setup([r1])
     result1 = sess.partial_run(h, r1)
     self.assertEqual([6.0], result1)
-      
+
   def testPartialRunDirect(self):
     self.RunTestPartialRun(session.Session())
 
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
index c0a287e922..75c0e61d47 100644
--- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py
+++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
@@ -47,13 +47,13 @@ except ImportError:
 
 
 def _fill_array(arr, seq, fillvalue=0):
-  """ 
-  Recursively fills padded arr with elements from seq. 
+  """
+  Recursively fills padded arr with elements from seq.
   If length of seq is less than arr padded length, fillvalue used.
 
   Args:
     arr: Padded tensor of shape [batch_size, ..., max_padded_dim_len].
-    seq: Non-padded list of data sampels of shape 
+    seq: Non-padded list of data sampels of shape
       [batch_size, ..., padded_dim(None)]
     fillvalue: Default fillvalue to use.
   """
@@ -73,12 +73,12 @@ def _pad_if_needed(batch_key_item, fillvalue=0):
   """ Returns padded batch.
 
   Args:
-    batch_key_item: List of data samples of any type with shape 
+    batch_key_item: List of data samples of any type with shape
       [batch_size, ..., padded_dim(None)].
     fillvalue: Default fillvalue to use.
 
   Returns:
-    Padded with zeros tensor of same type and shape 
+    Padded with zeros tensor of same type and shape
       [batch_size, ..., max_padded_dim_len].
 
   Raises:
@@ -375,7 +375,7 @@ def _enqueue_data(data,
       arrays, a numpy `ndarray`, or a generator producing these.
     NotImplementedError: padding and shuffling data at the same time.
     NotImplementedError: padding usage with non generator data type.
-  """ 
+  """
   with ops.name_scope(name):
     if isinstance(data, np.ndarray):
       types = [dtypes.int64, dtypes.as_dtype(data.dtype)]
diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py
index b4f28cfce0..f2de69e159 100644
--- a/tensorflow/python/framework/tensor_util_test.py
+++ b/tensorflow/python/framework/tensor_util_test.py
@@ -51,9 +51,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto([10.0, 20.0, 30.0])
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_FLOAT  
-        tensor_shape { dim { size: 3 } }  
-        tensor_content: "A \000\000A\240\000\000A\360\000\000"  
+        dtype: DT_FLOAT
+        tensor_shape { dim { size: 3 } }
+        tensor_content: "A \000\000A\240\000\000A\360\000\000"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -69,9 +69,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto([10.0, 20.0, 30.0], dtype=dtypes.float32)
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_FLOAT  
-        tensor_shape { dim { size: 3 } }  
-        tensor_content: "A \000\000A\240\000\000A\360\000\000"  
+        dtype: DT_FLOAT
+        tensor_shape { dim { size: 3 } }
+        tensor_content: "A \000\000A\240\000\000A\360\000\000"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -87,9 +87,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto([10, 20, 30], dtype=dtypes.float32)
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_FLOAT  
-        tensor_shape { dim { size: 3 } }  
-        tensor_content: "A \000\000A\240\000\000A\360\000\000"  
+        dtype: DT_FLOAT
+        tensor_shape { dim { size: 3 } }
+        tensor_content: "A \000\000A\240\000\000A\360\000\000"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -106,9 +106,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto(arr, dtype=dtypes.float32)
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_FLOAT  
-        tensor_shape { dim { size: 3 } }  
-        tensor_content: "A \000\000A\240\000\000A\360\000\000"  
+        dtype: DT_FLOAT
+        tensor_shape { dim { size: 3 } }
+        tensor_content: "A \000\000A\240\000\000A\360\000\000"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -124,9 +124,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto([10.0, 20.0, 30.0], shape=[1, 3])
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_FLOAT  
-        tensor_shape { dim { size: 1 } dim { size: 3 } }  
-        tensor_content: "A \000\000A\240\000\000A\360\000\000"  
+        dtype: DT_FLOAT
+        tensor_shape { dim { size: 1 } dim { size: 3 } }
+        tensor_content: "A \000\000A\240\000\000A\360\000\000"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -142,9 +142,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto([10.0, 20.0, 30.0], shape=[3, 1])
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_FLOAT  
-        tensor_shape { dim { size: 3 } dim { size: 1 } }  
-        tensor_content: "A \000\000A\240\000\000A\360\000\000"  
+        dtype: DT_FLOAT
+        tensor_shape { dim { size: 3 } dim { size: 1 } }
+        tensor_content: "A \000\000A\240\000\000A\360\000\000"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -170,9 +170,9 @@ class TensorUtilTest(test.TestCase):
         np.array([[10.0, 20.0, 30.0]], dtype=np.float64))
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_DOUBLE  
-        tensor_shape { dim { size: 1 } dim { size: 3 } }  
-        tensor_content: "@$\000\000\000\000\000\000@4\000\000\000\000\000\000@>\000\000\000\000\000\000"  
+        dtype: DT_DOUBLE
+        tensor_shape { dim { size: 1 } dim { size: 3 } }
+        tensor_content: "@$\000\000\000\000\000\000@4\000\000\000\000\000\000@>\000\000\000\000\000\000"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -261,9 +261,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto([10, 20, 30, 40], shape=[2, 2])
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_INT32  
-        tensor_shape { dim { size: 2 } dim { size: 2 } }  
-        tensor_content: "\000\000\000\\n\000\000\000\024\000\000\000\036\000\000\000("  
+        dtype: DT_INT32
+        tensor_shape { dim { size: 2 } dim { size: 2 } }
+        tensor_content: "\000\000\000\\n\000\000\000\024\000\000\000\036\000\000\000("
         """, t)
     else:
       self.assertProtoEquals("""
@@ -342,9 +342,9 @@ class TensorUtilTest(test.TestCase):
         [10, 20, 30], shape=[1, 3], dtype=dtypes.int64)
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_INT64  
-        tensor_shape { dim { size: 1 } dim { size: 3 } }  
-        tensor_content: "\000\000\000\000\000\000\000\\n\000\000\000\000\000\000\000\024\000\000\000\000\000\000\000\036"  
+        dtype: DT_INT64
+        tensor_shape { dim { size: 1 } dim { size: 3 } }
+        tensor_content: "\000\000\000\000\000\000\000\\n\000\000\000\000\000\000\000\024\000\000\000\000\000\000\000\036"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -360,9 +360,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto(np.array([10, 20, 30]))
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_INT64  
-        tensor_shape { dim { size: 3 } }  
-        tensor_content: "\000\000\000\000\000\000\000\\n\000\000\000\000\000\000\000\024\000\000\000\000\000\000\000\036"  
+        dtype: DT_INT64
+        tensor_shape { dim { size: 3 } }
+        tensor_content: "\000\000\000\000\000\000\000\\n\000\000\000\000\000\000\000\024\000\000\000\000\000\000\000\036"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -381,9 +381,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint32)
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_QINT32  
-        tensor_shape { dim { size: 3 } }  
-        tensor_content: "\000\000\000\025\000\000\000\026\000\000\000\027"  
+        dtype: DT_QINT32
+        tensor_shape { dim { size: 3 } }
+        tensor_content: "\000\000\000\025\000\000\000\026\000\000\000\027"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -418,9 +418,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto(data, dtype=dtypes.quint16)
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_QUINT16  
-        tensor_shape { dim { size: 3 } }  
-        tensor_content: "\000\025\000\026\000\027"  
+        dtype: DT_QUINT16
+        tensor_shape { dim { size: 3 } }
+        tensor_content: "\000\025\000\026\000\027"
         """, t)
     else:
       self.assertProtoEquals("""
@@ -435,9 +435,9 @@ class TensorUtilTest(test.TestCase):
     t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint16)
     if sys.byteorder == "big":
       self.assertProtoEquals("""
-        dtype: DT_QINT16  
-        tensor_shape { dim { size: 3 } }  
-        tensor_content: "\000\025\000\026\000\027"  
+        dtype: DT_QINT16
+        tensor_shape { dim { size: 3 } }
+        tensor_content: "\000\025\000\026\000\027"
         """, t)
     else:
       self.assertProtoEquals("""
diff --git a/tensorflow/python/kernel_tests/cast_op_test.py b/tensorflow/python/kernel_tests/cast_op_test.py
index c785f2358d..214d5cb3c0 100644
--- a/tensorflow/python/kernel_tests/cast_op_test.py
+++ b/tensorflow/python/kernel_tests/cast_op_test.py
@@ -144,9 +144,9 @@ class CastOpTest(test.TestCase):
 
     self._compare(np.inf, np.float32, np.inf, False)
     self._compare(np.inf, np.float64, np.inf, False)
-    if sys.byteorder == "big":  
-      self._compare(np.inf, np.int32, i4.max, False)  
-      self._compare(np.inf, np.int64, i8.max, False)  
+    if sys.byteorder == "big":
+      self._compare(np.inf, np.int32, i4.max, False)
+      self._compare(np.inf, np.int64, i8.max, False)
     else:
       # np.float64("np.inf").astype(np.int32) is negative on x86 but positive on ppc64le
       # Numpy link to relevant discussion - https://github.com/numpy/numpy/issues/9040
@@ -156,7 +156,7 @@ class CastOpTest(test.TestCase):
         self._compare(-np.inf, np.int64, i8.min, False)
       else:
         self._compare(np.inf, np.int32, i4.min, False)
-        self._compare(np.inf, np.int64, i8.min, False)  
+        self._compare(np.inf, np.int64, i8.min, False)
     self._compare(-np.inf, np.float32, -np.inf, False)
     self._compare(-np.inf, np.float64, -np.inf, False)
     self._compare(-np.inf, np.int32, i4.min, False)
diff --git a/tensorflow/python/tools/optimize_for_inference_test.py b/tensorflow/python/tools/optimize_for_inference_test.py
index 447057cfe9..6dd24c0dca 100644
--- a/tensorflow/python/tools/optimize_for_inference_test.py
+++ b/tensorflow/python/tools/optimize_for_inference_test.py
@@ -272,7 +272,7 @@ class OptimizeForInferenceTest(test.TestCase):
     for node in optimized_graph_def.node:
       self.assertNotEqual("Conv2D", node.op)
       self.assertNotEqual("MirrorPad", node.op)
-      
+
 
   def testFusePadAndConv(self):
     with self.test_session() as sess:
-- 
GitLab


From 8a27a24e959d3711a3974cf2eb963cecffc9e17d Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Mon, 27 Nov 2017 09:20:27 -0800
Subject: [PATCH 0300/1225] update premade_estimators.md

PiperOrigin-RevId: 177026849
---
 .../get_started/premade_estimators.md         | 425 ++++++++++++++++++
 1 file changed, 425 insertions(+)
 create mode 100644 tensorflow/docs_src/get_started/premade_estimators.md

diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/get_started/premade_estimators.md
new file mode 100644
index 0000000000..ff839fd040
--- /dev/null
+++ b/tensorflow/docs_src/get_started/premade_estimators.md
@@ -0,0 +1,425 @@
+
+# Getting Started with TensorFlow
+
+This document introduces the TensorFlow programming environment and shows you
+how to write the Iris classification problem in TensorFlow.
+
+Prior to reading this document, do the following:
+
+* [Install TensorFlow](install/index.md).
+* If you installed TensorFlow with virtualenv or Anaconda, activate your
+  TensorFlow environment.
+* To keep the data import simple, our Iris example uses Pandas. You can
+  install Pandas with:
+
+      `pip install pandas`
+
+## Getting the sample code
+
+Take the following steps to get the sample code for this program:
+
+1. Clone the TensorFlow Models repository from github by entering the following
+   command:
+
+       `git clone https://github.com/tensorflow/models`
+
+1. Change directory within that branch to the location containing the examples
+   used in this document:
+
+       `cd models/samples/core/get_started/`
+
+The program described in this document is called `premade_estimator.py`.
+
+### Running the program
+
+You run TensorFlow programs as you would run any Python program. For example:
+
+``` bsh
+python premade_estimator.py
+```
+
+The program should output training logs and some predictions against a test
+set. For example, the first line in the following output shows that the model
+thinks there is a 99.6% chance that the first example in the test set is a
+Sentosa. Since the test set `expected "Setosa"`, this appears to be a good
+prediction.
+
+``` None
+...
+Prediction is "Sentosa" (99.6%), expected "Setosa"
+
+Prediction is "Versicolor" (99.8%), expected "Versicolor"
+
+Prediction is "Virginica" (97.9%), expected "Virginica"
+```
+
+If the program generates errors instead of answers, ask yourself the following
+questions:
+
+* Did you install TensorFlow properly?
+* Are you using the correct version of tensorflow?
+* Did you activate the environment you installed TensorFlow in? (This is
+  only relevant in certain installation environments.)
+
+## The programming stack
+
+Before getting into the details of the program itself, let's investigate the
+programming environment. As the following illustration shows, TensorFlow
+provides a programming stack consisting of multiple API layers:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/tensorflow_programming_environment.png">
+</div>
+<div style="text-align: center">
+The TensorFlow Programming Environment
+</div>
+
+We strongly recommend writing TensorFlow programs with the following APIs:
+
+* Estimators, which represent a complete model. The Estimator API provides
+  methods to train the model, to judge the model's accuracy, and to generate
+  predictions.
+* Datasets, which build a data input pipeline. The Dataset API has methods to
+  load and manipulate data, and feed it into your model. The Datasets API meshes
+  well with the Estimators API.
+
+## Classifying irises: an overview
+
+The sample program in this document builds and tests a model that
+classifies Iris flowers into three different species based on the size of their
+[sepals](https://en.wikipedia.org/wiki/Sepal) and
+[petals](https://en.wikipedia.org/wiki/Petal).
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%"
+  alt="Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor"
+  src="../images/iris_three_species.jpg">
+</div>
+**From left to right,
+[*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by
+[Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0),
+[*Iris versicolor*](https://commons.wikimedia.org/w/index.php?curid=248095) (by
+[Dlanglois](https://commons.wikimedia.org/wiki/User:Dlanglois), CC BY-SA 3.0),
+and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862)
+(by [Frank Mayfield](https://www.flickr.com/photos/33397993@N05), CC BY-SA
+2.0).**
+
+### The data set
+
+The Iris data set contains four features and one label.  The four features
+identify the following botanical characteristics of individual Iris flowers:
+
+* sepal length
+* sepal width
+* petal length
+* petal width
+
+Our model will represent these features as float32 numerical data.
+
+The label identifies the Iris species, which must be one of the following:
+
+* Iris setosa (0)
+* Iris versicolor (1)
+* Iris virginica (2)
+
+Our model will represent the label as `int32` categorical data.
+
+The following table shows three examples in the data set:
+
+|sepal length | sepal width | petal length | petal width| species (label) |
+|------------:|------------:|-------------:|-----------:|:---------------:|
+|         5.1 |         3.3 |          1.7 |        0.5 |   0 (Sentosa)   |
+|         5.0 |         2.3 |          3.3 |        1.0 |   1 (versicolor)|
+|         6.4 |         2.8 |          5.6 |        2.2 |   2 (virginica) |
+
+### The algorithm
+
+The program trains a Deep Neural Network classifier model having the following
+topology:
+
+* 2 hidden layers.
+* Each hidden layer contains 10 nodes.
+
+The following figure illustrates the features, hidden layers, and predictions
+(not all of the nodes in the hidden layers are shown):
+
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%"
+  alt="A diagram of the network architecture: Inputs, 2 hidden layers, and outputs"
+  src="../images/iris_model.png">
+</div>
+<div style="text-align: center">
+The Model.
+</div>
+
+### Inference
+
+Running the trained model on an unlabeled example yields three predictions,
+namely, the likelihood that this flower is the given Iris species. The sum of
+those output predictions will be 1.0. For example, the prediction on an
+unlabeled example might be something like the following:
+
+* 0.03 for Iris Setosa
+* 0.95 for Iris Versicolor
+* 0.02 for Iris Virginica
+
+The preceding prediction indicates a 95% probability that the given unlabeled
+example is an Iris Versicolor.
+
+## Overview of programming with Estimators
+
+An Estimator is TensorFlow's high level representation of a complete model. It
+handles the details of initialization, logging, saving and restoring, and many
+other features so you can concentrate on your model. For more details see
+@{$programmers_guide/estimators}.
+
+An "Estimator" is any class derived from @{tf.estimator.Estimator}. TensorFlow
+provides a collection of
+[pre-made Estimators](https://developers.google.com/machine-learning/glossary/#pre-made_Estimator)
+(for example, `LinearRegressor`) to implement common ML algorithms. Beyond
+those, you may write your own
+[custom Estimators](https://developers.google.com/machine-learning/glossary/#custom_Estimator).
+We recommend using pre-made Estimators when just getting started with
+TensorFlow. After gaining expertise with the pre-made Estimators, we recommend
+optimizing your model by creating your own custom Estimators.
+
+To write a TensorFlow program based on pre-made Estimators, you must perform the
+following tasks:
+
+* Create one or more input functions.
+* Define the model's feature columns.
+* Instantiate an Estimator, specifying the feature columns and various
+  hyperparameters.
+* Call one or more methods on the Estimator object, passing the appropriate
+  input function as the source of the data.
+
+Let's see how those tasks are implemented in Iris.
+
+## Create input functions
+
+You must create input functions to supply data for training,
+evaluating, and prediction.
+
+An **input function** is a function that returns the following two-element
+tuple:
+
+* "features" - A Python dictionary in which:
+    * Each key is the name of a feature.
+    * Each value is an array containing all of that feature's values.
+* "label" - An array containing the values of the label for every example.
+
+Just to demonstrate the format of the input function here's a simple
+implementation:
+
+```python
+def input_evaluation_set():
+    features = {'SepalLength': np.array([6.4, 5.0]),
+                'SepalWidth':  np.array([2.8, 2.3]),
+                'PetalLength': np.array([5.6, 3.3]),
+                'PetalWidth':  np.array([2.2, 1.0])}
+    labels = np.array([2, 1])
+    return features, labels
+```
+
+Your input function may generate the "features" dictionary and "label" list any
+way you like. However, we recommend using TensorFlow's Dataset API, which can
+deftly parse all sorts of data. At a high-level, the Datasets API consists of
+the following classes:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%"
+  alt="A diagram showing subclasses of the Dataset class"
+  src="../images/dataset_classes.png">
+</div>
+
+
+Where:
+
+* Dataset: Base class containing methods to create and transform datasets. Also
+  allows you to initialize a dataset from data in memory, or from a Python
+  generator.
+* TextLineDataset: Reads lines from text files.
+* TFRecordDataset: Reads records from TFRecord files.
+* FixedLengthRecordDataset: Reads fixed size records from binary files.
+* Iterator: Provides a way to access one data set element at a time.
+
+The Dataset API can handle a lot of common cases for you. For example,
+using the Dataset API, you can easily read in records from a large collection
+of files in parallel and join them into a single stream.
+
+To keep things simple in this example we are going to load the data with pandas, and build our input pipeline from this in-memory data.
+
+Here is the input function used for training in this program:
+
+``` python
+def train_input_fn(features, labels, batch_size):
+    """An input function for training"""
+    # Convert the inputs to a Dataset.
+    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
+
+    # Shuffle, repeat, and batch the examples.
+    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
+
+    # Build the Iterator, and return the read end of the pipeline.
+    return dataset.make_one_shot_iterator().get_next()
+```
+
+## Define the Feature Columns
+
+A [**Feature Column**](https://developers.google.com/machine-learning/glossary/#feature_columns)
+is an object describing how the model should use raw input features from the
+features dictionary. When you build an Estimator model, you pass it a list of
+feature columns that describes each of the features you want the model to use.
+
+These objects are created by functions in the @{tf.feature_column} module. `tf.feature_column` methods provide many different ways to represent data.
+
+For Iris, the 4 raw features are numeric values, so we'll build a list of
+feature columns, to tell the Estimator model to represent each of the four
+features as 32-bit floating-point values. Therefore, the code to create the
+Feature Column is simply:
+
+```python
+# Feature columns describe how to use the input.
+my_feature_columns = []
+for key in train_x.keys():
+    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
+```
+
+Feature Columns can be far more sophisticated than those we're showing here.
+<!--TODO(markdaoust) add link to feature_columns doc when it exists.-->
+
+Now that we have the description of how we want the model to represent the raw
+features, we can build the estimator.
+
+
+## Instantiate an Estimator
+
+The Iris problem is a classic classifier problem. Fortunately, TensorFlow
+provides several pre-made classifier Estimators, including:
+
+* @{tf.estimator.DNNClassifier}—for deep models that perform multi-class
+  classification.
+* @{tf.estimator.DNNLinearCombinedClassifier}—for wide-n-deep models.
+* @{tf.estimator.LinearClassifier}—for linear models that feed results into
+  binary classifiers.
+
+For the Iris problem, `tf.estimator.DNNClassifier` seems like the best choice.
+Here's how we instantiated this Estimator:
+
+```python
+# Build 2 hidden layer DNN with 10, 10 units respectively.
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    # Two hidden layers of 10 nodes each.
+    hidden_units=[10, 10],
+    # The model must choose between 3 classes.
+    n_classes=3)
+```
+
+## Train, Evaluate, and Predict
+
+Now that we have an Estimator object, we can call methods to do the following:
+
+* Train the model.
+* Evaluate the trained model.
+* Use the trained model to make predictions.
+
+### Train the model
+
+Train the model by calling the Estimator's `train` method as follows:
+
+```python
+# Train the Model.
+classifier.train(
+    input_fn=lambda:train_input_fn(train_x, train_y, args.batch_size),
+    steps=args.train_steps)
+```
+
+Here we wrap up our `input_fn` call in a [`lambda`](https://docs.python.org/3/tutorial/controlflow.html)
+to allow the Estimator to call it, at the correct time, with no arguments.
+The `steps` argument tells the method to stop training after a number of
+training steps.
+
+### Evaluate the trained model
+
+Now that the model has been trained, we can get some statistics on its
+performance. The following code block evaluates the accuracy of the trained
+model on the test data:
+
+```python
+# Evaluate the model.
+eval_result = classifier.evaluate(
+    input_fn=lambda:eval_input_fn(test_x, test_y, args.batch_size))
+
+print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
+```
+
+Note how unlike our call to the `train` method, we did not pass the `steps`
+argument to evaluate. Our `eval_input_fn` doesn't use the `repeat` method on
+the dataset, so evaluation just runs to the end of the data.
+
+Running this code yields the following output (or something similar):
+
+```none
+Test set accuracy: 0.967
+```
+
+### Making predictions (inferring) from the trained model
+
+We now have a trained model that produces good evaluation results.
+We can now use the trained model to predict the species of an Iris flower
+based on some unlabeled measurments. As with training and evaluation, we make
+predictions using a single function call:
+
+```python
+# Generate predictions from the model
+expected = ['Setosa', 'Versicolor', 'Virginica']
+predict_x = {
+    'SepalLength': [5.1, 5.9, 6.9],
+    'SepalWidth': [3.3, 3.0, 3.1],
+    'PetalLength': [1.7, 4.2, 5.4],
+    'PetalWidth': [0.5, 1.5, 2.1],
+}
+
+predictions = classifier.predict(
+    input_fn=lambda:eval_input_fn(predict_x, batch_size=args.batch_size))
+```
+
+The `predict` method returns a Python iterable, yielding a dictionary of
+prediction results for each example. The following code prints a few
+predictions and their probabilities:
+
+
+``` python
+for pred_dict, expec in zip(predictions, expected):
+    template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
+
+    class_id = pred_dict['class_ids'][0]
+    probability = pred_dict['probabilities'][class_id]
+    print(template.format(SPECIES[class_id], 100 * probability, expec))
+```
+
+Running the preceding code yields the following output:
+
+``` None
+...
+Prediction is "Sentosa" (99.6%), expected "Setosa"
+
+Prediction is "Versicolor" (99.8%), expected "Versicolor"
+
+Prediction is "Virginica" (97.9%), expected "Virginica"
+```
+
+## Next
+
+Now that you've gotten started writing TensorFlow programs.
+
+* For more on Datasets, see the
+  @{$programmers_guide/datasets$Programmer's guide} and
+  @{tf.data$reference documentation}.
+* For more on Estimators, see the
+  @{$programmers_guide/estimators$Programmer's guide} and
+  @{tf.estimator$reference documentation}.
+<!--TODO(markdaoust) add links to next get_started section when it exists.-->
+
-- 
GitLab


From e424ba4a6d6e2c10f78f7f899de3c5d8dfb2e8c9 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 27 Nov 2017 09:47:00 -0800
Subject: [PATCH 0301/1225] Track symbolic shapes through shapeN operations

PiperOrigin-RevId: 177029912
---
 .../core/common_runtime/shape_refiner.cc      |  2 ++
 .../grappler/costs/graph_properties_test.cc   | 26 +++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index d66865e45b..c82d57694a 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -707,6 +707,8 @@ Status ShapeRefiner::ConstantPartialShape(InferenceContext* target_context,
     *result = target_context->Scalar();
   } else if (src_op == "Shape") {
     *result = src_context->input(0);
+  } else if (src_op == "ShapeN") {
+    *result = src_context->input(input_edge->src_output());
   } else if (src_op == "Pack") {
     std::vector<DimensionHandle> dims;
     // Pack is concatenating its input scalars to form the shape tensor vector.
diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index 74d48158a9..c11af5777a 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -825,6 +825,32 @@ TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) {
   TF_EXPECT_OK(properties.InferStatically());
 }
 
+TEST_F(GraphPropertiesTest, ShapeTracking) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a =
+      ops::Placeholder(s.WithOpName("a"), DT_FLOAT,
+                       ops::Placeholder::Shape(PartialTensorShape({-1, -1})));
+  Output b =
+      ops::Placeholder(s.WithOpName("b"), DT_FLOAT,
+                       ops::Placeholder::Shape(PartialTensorShape({-1})));
+  Output zero = ops::Const(s.WithOpName("zero"), 0.0f, {});
+  auto shp = ops::ShapeN(s.WithOpName("shapes"), {a, b});
+  Output o1 = ops::Fill(s.WithOpName("o1"), shp[0], zero);
+  Output o2 = ops::Fill(s.WithOpName("o2"), shp[1], zero);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  GraphProperties properties(item);
+  TF_CHECK_OK(properties.InferStatically());
+  const auto shape_a = properties.GetOutputProperties("a").at(0).shape();
+  const auto shape_b = properties.GetOutputProperties("b").at(0).shape();
+  const auto shape_o1 = properties.GetOutputProperties("o1").at(0).shape();
+  const auto shape_o2 = properties.GetOutputProperties("o2").at(0).shape();
+  EXPECT_EQ(shape_a.DebugString(), shape_o1.DebugString());
+  EXPECT_EQ(shape_b.DebugString(), shape_o2.DebugString());
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 4fbf63a8ba991113a6e35cbb3e4d14f2343dfbe4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 10:05:11 -0800
Subject: [PATCH 0302/1225] Removed deprecated Hasher alias from StringPiece.
 This will allow StringPiece to be replaced by an alias to absl::string_view.

PiperOrigin-RevId: 177033313
---
 tensorflow/core/lib/core/stringpiece.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h
index b2c6842151..89a1e26b81 100644
--- a/tensorflow/core/lib/core/stringpiece.h
+++ b/tensorflow/core/lib/core/stringpiece.h
@@ -105,8 +105,6 @@ class StringPiece {
 
   StringPiece substr(size_t pos, size_t n = npos) const;
 
-  using Hasher = ::tensorflow::StringPieceHasher;
-
   // Return a string that contains the copy of the referenced data.
   std::string ToString() const { return std::string(data_, size_); }
 
-- 
GitLab


From b115a9fc735d08c55235f99a1cdb194e0f7c5d0c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 10:25:14 -0800
Subject: [PATCH 0303/1225] [XLA] Enhancement to source tensor indexing. Change
 ElementalIrEmitter::ElementwiseSourceIndex to use the target index as a
 source index for the case where the two tensors have the same shape but
 different element types. This improves the implementation of fusion kernels
 by avoiding the calculation of the dimensional indices from the linear index
 for the source tensors.

PiperOrigin-RevId: 177036769
---
 .../compiler/xla/service/elemental_ir_emitter.cc       |  2 +-
 tensorflow/compiler/xla/shape_util.cc                  | 10 ++++++++++
 tensorflow/compiler/xla/shape_util.h                   |  5 +++++
 tensorflow/compiler/xla/shape_util_test.cc             |  2 ++
 4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index 97ced5dfdc..b9407818cd 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -905,7 +905,7 @@ llvm_ir::IrArray::Index ElementalIrEmitter::ElementwiseSourceIndex(
 
   // If no implicit broadcast is needed for this operand, returns the target
   // index as the source index.
-  if (ShapeUtil::Compatible(operand_shape, hlo.shape())) {
+  if (ShapeUtil::CompatibleIgnoringElementType(operand_shape, hlo.shape())) {
     return target_index;
   }
 
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index c0a0e13f07..74fa0b2f2e 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -553,6 +553,16 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
   return SameDimensions(lhs, rhs) && SameElementType(lhs, rhs);
 }
 
+/* static */ bool ShapeUtil::CompatibleIgnoringElementType(const Shape& lhs,
+                                                           const Shape& rhs) {
+  if (lhs.element_type() == TUPLE) {
+    return rhs.element_type() == TUPLE &&
+           ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(),
+                           CompatibleIgnoringElementType);
+  }
+  return SameDimensions(lhs, rhs);
+}
+
 /* static */ int64 ShapeUtil::GetDimension(const Shape& shape,
                                            int64 dimension_number) {
   return shape.dimensions(GetDimensionNumber(shape, dimension_number));
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 82a513a65a..2ea1bd95cb 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -190,6 +190,11 @@ class ShapeUtil {
   // compatibility.
   static bool Compatible(const Shape& lhs, const Shape& rhs);
 
+  // Returns true if the rank and dimension sizes are identical. Element type
+  // and layout are ignored. Tuple elements are compared recursively for
+  // compatibility.
+  static bool CompatibleIgnoringElementType(const Shape& lhs, const Shape& rhs);
+
   // Returns whether the lhs and rhs shapes are identical protobufs.
   static bool Equal(const Shape& lhs, const Shape& rhs);
 
diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc
index 0ba542ad1b..4bce7ca51d 100644
--- a/tensorflow/compiler/xla/shape_util_test.cc
+++ b/tensorflow/compiler/xla/shape_util_test.cc
@@ -145,6 +145,7 @@ TEST(ShapeUtilTest, IncompatibleTuplesWithSwappedElements) {
   Shape tuple2 = ShapeUtil::MakeTupleShape(
       {ShapeUtil::MakeShape(F32, {3, 2}), ShapeUtil::MakeShape(PRED, {4, 5})});
   EXPECT_FALSE(ShapeUtil::Compatible(tuple1, tuple2));
+  EXPECT_FALSE(ShapeUtil::CompatibleIgnoringElementType(tuple1, tuple2));
 }
 
 TEST(ShapeUtilTest, IncompatibleTuplesWithDifferentPrimitiveType) {
@@ -153,6 +154,7 @@ TEST(ShapeUtilTest, IncompatibleTuplesWithDifferentPrimitiveType) {
   Shape tuple2 = ShapeUtil::MakeTupleShape(
       {ShapeUtil::MakeShape(PRED, {4, 5}), ShapeUtil::MakeShape(S32, {3, 2})});
   EXPECT_FALSE(ShapeUtil::Compatible(tuple1, tuple2));
+  EXPECT_TRUE(ShapeUtil::CompatibleIgnoringElementType(tuple1, tuple2));
 }
 
 TEST(ShapeUtilTest, IncompatibleTuplesWithDifferentDimensions) {
-- 
GitLab


From bf001790e84e6dc433ac39e7eaba7896b70fa9ef Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 27 Nov 2017 10:38:30 -0800
Subject: [PATCH 0304/1225] [TPU] Change in preparation for supporting
 model-parallel TPU computations.

PiperOrigin-RevId: 177038993
---
 tensorflow/BUILD                              |   1 +
 tensorflow/compiler/xla/array3d.h             |   2 +
 .../xla/service/computation_placer.cc         |   6 +
 tensorflow/contrib/cmake/tf_python.cmake      |   1 +
 tensorflow/contrib/tpu/BUILD                  |   3 +
 tensorflow/contrib/tpu/__init__.py            |   6 +
 tensorflow/contrib/tpu/ops/replication_ops.cc |  17 +-
 .../contrib/tpu/ops/tpu_configuration_ops.cc  |  27 +-
 tensorflow/contrib/tpu/proto/BUILD            |  25 ++
 tensorflow/contrib/tpu/proto/topology.proto   |  27 ++
 .../tpu/python/tpu/device_assignment.py       | 299 ++++++++++++++++++
 tensorflow/contrib/tpu/python/tpu/topology.py | 137 ++++++++
 tensorflow/contrib/tpu/python/tpu/tpu.py      | 177 ++++++-----
 tensorflow/contrib/tpu/python/tpu/tpu_feed.py |  26 +-
 14 files changed, 636 insertions(+), 118 deletions(-)
 create mode 100644 tensorflow/contrib/tpu/proto/BUILD
 create mode 100644 tensorflow/contrib/tpu/proto/topology.proto
 create mode 100644 tensorflow/contrib/tpu/python/tpu/device_assignment.py
 create mode 100644 tensorflow/contrib/tpu/python/tpu/topology.py

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index c8f0b6b061..e6dc15a701 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -554,6 +554,7 @@ filegroup(
         "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:all_files",
         "//tensorflow/contrib/tpu:all_files",
         "//tensorflow/contrib/tpu/profiler:all_files",
+        "//tensorflow/contrib/tpu/proto:all_files",
         "//tensorflow/contrib/training:all_files",
         "//tensorflow/contrib/util:all_files",
         "//tensorflow/contrib/verbs:all_files",
diff --git a/tensorflow/compiler/xla/array3d.h b/tensorflow/compiler/xla/array3d.h
index e9449f01ad..a1c5840a5f 100644
--- a/tensorflow/compiler/xla/array3d.h
+++ b/tensorflow/compiler/xla/array3d.h
@@ -36,6 +36,8 @@ namespace xla {
 template <typename T>
 class Array3D : public Array<T> {
  public:
+  Array3D() : Array<T>(std::vector<int64>{0, 0, 0}) {}
+
   // Creates an array of dimensions n1 x n2 x n3, uninitialized values.
   Array3D(const int64 n1, const int64 n2, const int64 n3)
       : Array<T>(std::vector<int64>{n1, n2, n3}) {}
diff --git a/tensorflow/compiler/xla/service/computation_placer.cc b/tensorflow/compiler/xla/service/computation_placer.cc
index 6b7b0d25e8..657fba6b62 100644
--- a/tensorflow/compiler/xla/service/computation_placer.cc
+++ b/tensorflow/compiler/xla/service/computation_placer.cc
@@ -52,6 +52,12 @@ Status DeviceAssignment::Serialize(DeviceAssignmentProto* proto) const {
 /* static */ StatusOr<std::unique_ptr<DeviceAssignment>>
 DeviceAssignment::Deserialize(const DeviceAssignmentProto& proto) {
   TF_RET_CHECK(proto.computation_devices_size() == proto.computation_count());
+  if (proto.replica_count() <= 0 || proto.computation_count() <= 0) {
+    return InvalidArgument(
+        "Invalid device assignment topology: replica_count=%d, "
+        "computation_count=%d",
+        proto.replica_count(), proto.computation_count());
+  }
   auto assignment = MakeUnique<DeviceAssignment>(proto.replica_count(),
                                                  proto.computation_count());
   for (int computation = 0; computation < proto.computation_count();
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 61b3fd715d..0128946e45 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -129,6 +129,7 @@ file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir}
     "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
     "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/proto/*.proto"
     "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
+    "${tensorflow_source_dir}/tensorflow/contrib/tpu/proto/*.proto"
     "${tensorflow_source_dir}/tensorflow/contrib/tpu/profiler/*.proto"
     "${tensorflow_source_dir}/tensorflow/contrib/training/*.proto"
 )
diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 64e9d0e765..f542d94139 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -155,6 +155,8 @@ py_library(
     name = "tpu_lib",
     srcs = [
         "python/tpu/__init__.py",
+        "python/tpu/device_assignment.py",
+        "python/tpu/topology.py",
         "python/tpu/tpu.py",
         "python/tpu/tpu_feed.py",
         "python/tpu/tpu_function.py",
@@ -166,6 +168,7 @@ py_library(
     deps = [
         ":profiler",
         ":tpu_py",
+        "//tensorflow/contrib/tpu/proto:topology_proto_py",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
diff --git a/tensorflow/contrib/tpu/__init__.py b/tensorflow/contrib/tpu/__init__.py
index ec4c4e1be6..ea6e874f2d 100644
--- a/tensorflow/contrib/tpu/__init__.py
+++ b/tensorflow/contrib/tpu/__init__.py
@@ -23,6 +23,7 @@
 
 @@initialize_system
 @@shutdown_system
+@@device_assignment
 @@core
 @@replicate
 @@shard
@@ -33,6 +34,9 @@
 
 @@InfeedQueue
 
+@@DeviceAssignment
+@@Topology
+
 @@while_loop
 @@repeat
 
@@ -49,6 +53,8 @@ from __future__ import print_function
 # pylint: disable=wildcard-import,unused-import
 from tensorflow.contrib.tpu.python import profiler
 from tensorflow.contrib.tpu.python.ops.tpu_ops import *
+from tensorflow.contrib.tpu.python.tpu.device_assignment import *
+from tensorflow.contrib.tpu.python.tpu.topology import *
 from tensorflow.contrib.tpu.python.tpu.tpu import *
 from tensorflow.contrib.tpu.python.tpu.tpu_config import *
 from tensorflow.contrib.tpu.python.tpu.tpu_estimator import *
diff --git a/tensorflow/contrib/tpu/ops/replication_ops.cc b/tensorflow/contrib/tpu/ops/replication_ops.cc
index b40dac4717..36e865bf3c 100644
--- a/tensorflow/contrib/tpu/ops/replication_ops.cc
+++ b/tensorflow/contrib/tpu/ops/replication_ops.cc
@@ -24,7 +24,9 @@ using shape_inference::ShapeHandle;
 
 REGISTER_OP("TPUReplicateMetadata")
     .Attr("num_replicas: int >= 0")
-    .Attr("global_tpu_id: list(int) = []")
+    .Attr("topology: string = \"\"")
+    .Attr("device_assignment: list(int) = []")
+    .Attr("computation_shape: list(int) = []")
     .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("TPUReplicatedInput")
@@ -64,7 +66,9 @@ REGISTER_OP("TPUReplicatedOutput")
 REGISTER_OP("TPUReplicate")
     .Attr("computation: func")
     .Attr("num_replicas: int >= 1")
-    .Attr("global_tpu_id: list(int) = []")
+    .Attr("topology: string = \"\"")
+    .Attr("device_assignment: list(int) = []")
+    .Attr("computation_shape: list(int) = []")
     .Attr("Tinputs: list(type) >= 0")
     .Attr("Tbroadcast_inputs: list(type) >= 0")
     .Attr("NumVariables: int >= 0")
@@ -79,7 +83,14 @@ Runs replicated computations on a distributed TPU system.
 
 computation: a function containing the computation to run.
 num_replicas: the number of replicas of the computation to run.
-global_tpu_id: map from device to global tpu id.
+topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
+topology.
+computation_shape: a [mesh_dimension] array describing the shape of each
+  computation replica in numbers of cores in the TPU mesh.
+device_assignment: a flattened array with shape
+  [replica] + computation_shape + [mesh_dimension] that maps the coordinates of
+  logical cores in each replica of a computation to physical coordinates in
+  the TPU topology.
 Tinputs: the types of the arguments to 'computation'.
 inputs: the inputs to 'computation', flattened, in replica-major order.
 Tbroadcast_inputs: the types of the additional arguments to broadcast to all
diff --git a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc
index 8a87a91056..8c4fe5538d 100644
--- a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc
@@ -107,7 +107,7 @@ in a host.
 
 REGISTER_OP("_WaitForDistributedTPU")
     .Input("inputs: N * int32")
-    .Output("global_tpu_array: int32")
+    .Output("topology: string")
     .Attr("host_specs: list(string)")
     .Attr("startup_timeout_sec: int = 20")
     .Attr("N: int")
@@ -118,7 +118,7 @@ REGISTER_OP("_WaitForDistributedTPU")
       for (int i = 0; i < c->num_inputs(); ++i) {
         TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 1, &input));
       }
-      c->set_output(0, c->UnknownShapeOfRank(2));
+      c->set_output(0, c->Scalar());
       return ::tensorflow::Status::OK();
     })
     .Doc(R"doc(
@@ -129,30 +129,26 @@ _InitializeHostForDistributedTPU Ops.
 
 inputs: For each initialized host, a vector giving the global TPU id
 of each TPU on the host.
-global_tpu_array: A two-dimensional array. For each host (the outer
-dimension) the array lists the global ids of the TPUs on that host.
-host_specs: For each initialized host, the partial device specification
-indicating job, replica, and task. Combining this spec with
-'/device:TPU:k' gives the full device name of the k'th TPU on the
-host.
+topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
+topology.
 startup_timeout_sec: The number of seconds to wait for the TPU system
 to stabilize.
 )doc");
 
 REGISTER_OP("_SetGlobalTPUArray")
-    .Input("global_tpu_array: int32")
+    .Input("topology: string")
     .SetIsStateful()
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &input));
       return ::tensorflow::Status::OK();
     })
     .Doc(R"doc(
 An op that informs a host of the global ids of all the of TPUs in the
 system.
 
-global_tpu_array: A two-dimensional array. For each host (the outer
-dimension) the array lists the global ids of the TPUs on that host.
+topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
+topology.
 )doc");
 
 REGISTER_OP("_ShutdownDistributedTPU")
@@ -198,7 +194,7 @@ chips on the host.
 )doc");
 
 REGISTER_OP("ConfigureDistributedTPU")
-    .Output("global_tpu_array: int32")
+    .Output("topology: string")
     .Attr("embedding_config: string = ''")
     .SetIsStateful()
     .SetShapeFn(shape_inference::UnknownShape)
@@ -206,9 +202,8 @@ REGISTER_OP("ConfigureDistributedTPU")
 An op that sets up the centralized structures for a distributed TPU
 system.
 
-global_tpu_array: A two-dimensional array. For each host (the outer
-dimension) the array lists the global ids of the TPUs on that host.
-embedding_config: Internal use.
+topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
+topology.
 )doc");
 
 REGISTER_OP("ShutdownDistributedTPU")
diff --git a/tensorflow/contrib/tpu/proto/BUILD b/tensorflow/contrib/tpu/proto/BUILD
new file mode 100644
index 0000000000..79a79efb6b
--- /dev/null
+++ b/tensorflow/contrib/tpu/proto/BUILD
@@ -0,0 +1,25 @@
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library")
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+tf_proto_library(
+    name = "topology_proto",
+    srcs = [
+        "topology.proto",
+    ],
+    cc_api_version = 2,
+    visibility = ["//visibility:public"],
+)
diff --git a/tensorflow/contrib/tpu/proto/topology.proto b/tensorflow/contrib/tpu/proto/topology.proto
new file mode 100644
index 0000000000..17064ee5a2
--- /dev/null
+++ b/tensorflow/contrib/tpu/proto/topology.proto
@@ -0,0 +1,27 @@
+syntax = "proto3";
+
+option cc_enable_arenas = true;
+
+package tensorflow.tpu;
+
+// Describes the geometry of a TPU mesh.
+message TopologyProto {
+  // The dimensions of the TPU topology, in cores. Typically, this is a 3D
+  // topology [x, y, core], where the major dimensions correspond to TPU chips,
+  // and the minor dimension describes the number of cores on a multicore chip.
+  repeated int32 mesh_shape = 1;
+
+  // Number of TensorFlow tasks in the cluster.
+  int32 num_tasks = 2;
+
+  // Number of TPU devices per task.
+  int32 num_tpu_devices_per_task = 3;
+
+  // A flattened rank 3 int32 array with shape
+  // [num_tasks, num_tpu_devices_per_task, len(mesh_shape)].
+  // `tasks` is the number of tasks in the TPU cluster, `devices` is the number
+  // of TPU devices per task, and the minor dimension corresponds to a position
+  // in the TPU mesh topology. Each entry [task, device, axis] gives the
+  // `axis`-th coordinate in the topology of a task/device pair.
+  repeated int32 device_coordinates = 4;
+}
diff --git a/tensorflow/contrib/tpu/python/tpu/device_assignment.py b/tensorflow/contrib/tpu/python/tpu/device_assignment.py
new file mode 100644
index 0000000000..ee202610a8
--- /dev/null
+++ b/tensorflow/contrib/tpu/python/tpu/device_assignment.py
@@ -0,0 +1,299 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ======================================
+"""Library of TPU helper functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.contrib.tpu.python.tpu.topology import Topology
+
+
+def _tpu_device_name(job, task, device):
+  """Returns the device name for the TPU `device` on `task` of `job`."""
+  if job is None:
+    return "/task:%d/device:TPU:%d" % (task, device)
+  else:
+    return "/job:%s/task:%d/device:TPU:%d" % (job, task, device)
+
+
+def _tpu_host_device_name(job, task):
+  """Returns the device name for the CPU device on `task` of `job`."""
+  if job is None:
+    return "/task:%d/device:CPU:0" % task
+  else:
+    return "/job:%s/task:%d/device:CPU:0" % (job, task)
+
+
+class DeviceAssignment(object):
+  """Mapping from logical cores in a computation to the physical TPU topology.
+
+  Prefer to use the `device_assignment()` helper to construct a
+  `DeviceAssignment`; it is easier if less flexible than constructing a
+  `DeviceAssignment` directly.
+  """
+
+  def __init__(self, topology, core_assignment):
+    """Constructs a `DeviceAssignment` object.
+
+    Args:
+      topology: A `Topology` object that describes the physical TPU topology.
+      core_assignment: A logical to physical core mapping, represented as a
+        rank 3 numpy array. See the description of the `core_assignment`
+        property for more details.
+
+    Raises:
+      ValueError: If `topology` is not `Topology` object.
+      ValueError: If `core_assignment` is not a rank 3 numpy array.
+    """
+    if not isinstance(topology, Topology):
+      raise ValueError("topology must be a Topology object, got {}".format(
+          type(topology)))
+    core_assignment = np.asarray(core_assignment, dtype=np.int32)
+
+    self._topology = topology
+    self._topology_tasks, self._topology_devices = (
+        self._invert_topology(topology))
+
+    topology_rank = self._topology_tasks.ndim
+    if core_assignment.ndim != topology_rank + 2:
+      raise ValueError("core_assignment must be a rank {} numpy array".format(
+          topology_rank + 2))
+
+    self._num_replicas = core_assignment.shape[0]
+    self._computation_shape = np.array(
+        core_assignment.shape[1:-1], dtype=np.int32)
+
+    if core_assignment.shape[-1] != topology_rank:
+      raise ValueError(
+          "minor dimension of core_assignment must have size equal to topology "
+          "rank ({}), got shape {}".format(topology_rank,
+                                           core_assignment.shape))
+
+    self._core_assignment = core_assignment
+
+  def _invert_topology(self, topology):
+    """Inverts a [task,device,axis] topology to [x,y,z] -> task/device maps."""
+    mesh_shape = topology.mesh_shape
+    tasks = np.full(list(mesh_shape), -1, dtype=np.int32)
+    devices = np.full(list(mesh_shape), -1, dtype=np.int32)
+    for task in xrange(topology.device_coordinates.shape[0]):
+      for device in xrange(topology.device_coordinates.shape[1]):
+        x, y, z = topology.device_coordinates[task, device, :]
+        tasks[x, y, z] = task
+        devices[x, y, z] = device
+    return tasks, devices
+
+  @property
+  def topology(self):
+    """A `Topology` that describes the TPU topology."""
+    return self._topology
+
+  @property
+  def computation_shape(self):
+    """The computation shape.
+
+    Returns:
+      A rank-1 int32 numpy array with size equal to the TPU topology rank.
+      Describes the logical shape in numbers of core of each replica of the
+      computation in the TPU topology.
+
+    Returns:
+      The computation shape.
+    """
+    return self._computation_shape
+
+  @property
+  def num_replicas(self):
+    """The number of replicas of the computation."""
+    return self._num_replicas
+
+  @property
+  def core_assignment(self):
+    """The logical to physical core mapping.
+
+    Returns:
+      A numpy array of rank `topology_rank + 2`, with shape
+      `[num_replicas] + computation_shape + [topology_rank]`. Maps
+      (replica, logical core coordinates) pairs to physical topology
+      coordinates.
+    """
+    return self._core_assignment
+
+  def _coordinates(self, replica, logical_core):
+    """Returns the physical topology coordinates of a logical core."""
+    if logical_core is None:
+      logical_core = np.array([0, 0, 0], np.int32)
+
+    if any(logical_core < 0) or any(logical_core >= self.computation_shape):
+      raise ValueError("Invalid core {}; computation shape is {}".format(
+          logical_core, self.computation_shape))
+
+    logical_offset = tuple([replica] + logical_core.tolist() + [slice(3)])
+    return tuple(self.core_assignment[logical_offset])
+
+  def tpu_ordinal(self, replica=0, logical_core=None):
+    """Returns the ordinal of the TPU device assigned to a logical core."""
+    coordinates = self._coordinates(replica, logical_core)
+    return self._topology_devices[coordinates]
+
+  def host_device(self, replica=0, logical_core=None, job=None):
+    """Returns the CPU device attached to a logical core."""
+    coordinates = self._coordinates(replica, logical_core)
+    return _tpu_host_device_name(job, self._topology_tasks[coordinates])
+
+  def tpu_device(self, replica=0, logical_core=None, job=None):
+    """Returns the name of the TPU device assigned to a logical core."""
+    coordinates = self._coordinates(replica, logical_core)
+    return _tpu_device_name(job, self._topology_tasks[coordinates],
+                            self._topology_devices[coordinates])
+
+
+def device_assignment(topology,
+                      computation_shape=None,
+                      computation_stride=None,
+                      num_replicas=1):
+  """Computes a device_assignment of a computation across a TPU topology.
+
+  Returns a `DeviceAssignment` that describes the cores in the topology assigned
+  to each core of each replica.
+
+  `computation_shape` and `computation_stride` values should be powers of 2 for
+  optimal packing.
+
+  Args:
+    topology: A `Topology` object that describes the TPU cluster topology.
+      To obtain a TPU topology, evaluate the `Tensor` returned by
+      `initialize_system` using `Session.run`. Either a serialized
+      `TopologyProto` or a `Topology` object may be passed. Note: you must
+      evaluate the `Tensor` first; you cannot pass an unevaluated `Tensor` here.
+    computation_shape: A rank 1 int32 numpy array of size 3, describing the
+      shape of the computation's block of cores. If None, the
+      `computation_shape` is `[1, 1, 1]`.
+    computation_stride: A rank 1 int32 numpy array of size 3, describing the
+      inter-core spacing of the `computation_shape` cores in the TPU topology.
+      If None, the `computation_stride` is `[1, 1, 1]`.
+    num_replicas: The number of computation replicas to run. The replicas will
+      be packed into the free spaces of the topology.
+
+  Returns:
+    A DeviceAssignment object, which describes the mapping between the logical
+    cores in each computation replica and the physical cores in the TPU
+    topology.
+
+  Raises:
+    ValueError: If `topology` is not a valid `Topology` object.
+    ValueError: If `computation_shape` or `computation_stride` are not 1D int32
+      numpy arrays with shape [3] where all values are positive.
+    ValueError: If computation's replicas cannot fit into the TPU topology.
+  """
+  # Deserialize the Topology proto, if it is a string.
+  if isinstance(topology, bytes):
+    topology = Topology(serialized=topology)
+
+  if not isinstance(topology, Topology):
+    raise ValueError("`topology` is not a Topology object; got {}".format(
+        type(topology)))
+
+  topology_rank = len(topology.mesh_shape)
+  mesh_shape = topology.mesh_shape
+  if computation_shape is None:
+    computation_shape = np.array([1, 1, 1], dtype=np.int32)
+  else:
+    computation_shape = np.asarray(computation_shape, dtype=np.int32)
+
+  if computation_stride is None:
+    computation_stride = np.array([1, 1, 1], dtype=np.int32)
+  else:
+    computation_stride = np.asarray(computation_stride, dtype=np.int32)
+
+  if computation_shape.shape != (3,):
+    raise ValueError("computation_shape must have shape [3]; got {}".format(
+        computation_shape.shape))
+  if computation_stride.shape != (3,):
+    raise ValueError("computation_stride must have shape [3]; got {}".format(
+        computation_stride.shape))
+
+  if any(computation_shape < 1):
+    raise ValueError(
+        "computation_shape must be positive; got computation_shape={}".format(
+            computation_shape))
+  if any(computation_stride < 1):
+    raise ValueError(
+        "computation_stride must be positive; got computation_stride={}".format(
+            computation_stride))
+
+  # Computes the physical size of one computation instance.
+  computation_footprint = computation_shape * computation_stride
+  if any(computation_footprint > mesh_shape):
+    raise ValueError(
+        "computation footprint {} does not fit in TPU topology shape {}".format(
+            computation_footprint, mesh_shape))
+
+  # Computes how many copies of the computation footprint fit in the mesh.
+  block_counts = mesh_shape // computation_footprint
+
+  replica_counts = block_counts * computation_stride
+  max_replicas = np.prod(replica_counts)
+  if num_replicas > max_replicas:
+    raise ValueError(
+        "requested {} replicas but only {} replicas with shape {} and "
+        "computation_stride {} fit in a TPU mesh of shape {}".format(
+            num_replicas, max_replicas, computation_shape, computation_stride,
+            mesh_shape))
+
+  # Choose a compact layout for the cores. Choose the smaller dimension in the
+  # topology to be close to the square root of the number of replicas.
+  num_chips = int(math.ceil(num_replicas / replica_counts[2]))
+  target_size = int(math.ceil(math.sqrt(num_chips)))
+
+  # Prefer an even size, if possible. Odd numbered rows head back towards the
+  # first column, so it's best if the last row has an odd index.
+  if target_size % 2 != 0:
+    target_size -= 1
+  y_size = min(replica_counts[1], target_size)
+  if y_size * replica_counts[0] < num_chips:
+    y_size = replica_counts[1]
+
+  # Assigns an offset to each replica such that no two replicas overlap.
+  replica_offsets = np.full([num_replicas, 3], -1, dtype=np.int32)
+  for replica in xrange(num_replicas):
+    # Chooses a replica number in X/Y/Z axes.
+    z = replica % replica_counts[2]
+    t = replica // replica_counts[2]
+    y = t % y_size
+    x = t // y_size
+    replica_pos = np.array([x, y, z], dtype=np.int32)
+
+    # Determines where that replica starts in each axis.
+    outer = replica_pos // computation_stride
+    inner = replica_pos % computation_stride
+    replica_offsets[replica, :] = outer * computation_footprint + inner
+
+  # Computes a complete logical core -> physical core mapping for each replica.
+  indices = [
+      np.arange(0, computation_shape[i] * computation_stride[i],
+                computation_stride[i]) for i in xrange(topology_rank)
+  ]
+  indices = np.concatenate(
+      [i[..., np.newaxis] for i in np.meshgrid(*indices, indexing="ij")],
+      axis=-1)
+  assignment = (
+      indices + replica_offsets[:, np.newaxis, np.newaxis, np.newaxis, :])
+  return DeviceAssignment(topology, core_assignment=assignment)
diff --git a/tensorflow/contrib/tpu/python/tpu/topology.py b/tensorflow/contrib/tpu/python/tpu/topology.py
new file mode 100644
index 0000000000..cda9a63f20
--- /dev/null
+++ b/tensorflow/contrib/tpu/python/tpu/topology.py
@@ -0,0 +1,137 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ======================================
+"""Defines the `Topology` class, that describes a TPU fabric topology."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.tpu.proto import topology_pb2
+
+
+class Topology(object):
+  """Describes a set of TPU devices.
+
+  Represents both the shape of the physical mesh, and the mapping between
+  TensorFlow TPU devices to physical mesh coordinates.
+  """
+
+  def __init__(self, serialized=None, mesh_shape=None, device_coordinates=None):
+    """Builds a Topology object.
+
+    If `serialized` is not `None`, the topology is parsed from `serialized` and
+    the other arguments are ignored. Otherwise, the topology is computed from
+    `mesh_shape` and `device_coordinates`.
+
+    Args:
+      serialized: A serialized `TopologyProto`, or `None`. If not `None`, the
+        serialized proto is parsed to discover the topology.
+      mesh_shape: A sequence of 3 positive integers, or `None`. If not `None`,
+        the shape of the TPU topology, in number of cores. Ignored if
+        `serialized` is not `None`.
+      device_coordinates: A rank 3 numpy array that describes the mapping from
+        TensorFlow TPU devices to TPU fabric coordinates, or `None`. Ignored
+        if `serialized is not `None`.
+
+    Raises:
+      ValueError: If `serialized` does not describe a well-formed topology.
+      ValueError: If `serialized` is `None` and `mesh_shape` is not a sequence
+        of 3 positive integers.
+      ValueError: If `serialized` is `None` and `device_coordinates` is not a
+        rank 3 numpy int32 array that describes a valid coordinate mapping.
+    """
+
+    if serialized:
+      self._serialized = serialized
+      self._parse_topology(serialized)
+    else:
+      self._mesh_shape = np.asarray(mesh_shape, dtype=np.int32)
+      self._device_coordinates = np.asarray(device_coordinates, np.int32)
+      if len(self._mesh_shape) != 3 or any(self._mesh_shape < 1):
+        raise ValueError("`mesh_shape` must be a sequence of 3 positive "
+                         "entries; got {}".format(self._mesh_shape))
+
+      if (len(self._device_coordinates.shape) != 3 or
+          self._device_coordinates.shape[2] != len(self._mesh_shape)):
+        raise ValueError("`device_coordinates` must be a rank 3 int32 array "
+                         "with minor dimension equal to the mesh shape rank")
+
+  def _parse_topology(self, serialized):
+    """Parses a serialized `TopologyProto` into `self`."""
+    proto = topology_pb2.TopologyProto()
+    proto.ParseFromString(serialized)
+
+    self._mesh_shape = np.array(proto.mesh_shape, dtype=np.int32)
+    if len(self._mesh_shape) != 3 or any(self._mesh_shape < 1):
+      raise ValueError("`mesh_shape` must be a vector of size 3 with positive "
+                       "entries; got {}".format(self._mesh_shape))
+
+    if proto.num_tasks < 0:
+      raise ValueError("`num_tasks` must be >= 0; got {}".format(
+          proto.num_tasks))
+    if proto.num_tpu_devices_per_task < 0:
+      raise ValueError("`num_tpu_devices_per_task` must be >= 0; got {}".format(
+          proto.num_tpu_devices_per_task))
+
+    expected_coordinates_size = (
+        proto.num_tasks * proto.num_tpu_devices_per_task * len(
+            proto.mesh_shape))
+    if len(proto.device_coordinates) != expected_coordinates_size:
+      raise ValueError("`device_coordinates` must have shape num_tasks ({}) * "
+                       "num_tpu_devices_per_task ({}) * len(mesh_shape) ({}); "
+                       "got shape {}".format(proto.num_tasks,
+                                             proto.num_tpu_devices_per_task,
+                                             proto.mesh_shape,
+                                             len(proto.device_coordinates)))
+
+    coords = np.array(proto.device_coordinates, dtype=np.int32)
+    if any(coords < 0):
+      raise ValueError("`device_coordinates` must be >= 0")
+    coords = coords.reshape((proto.num_tasks, proto.num_tpu_devices_per_task,
+                             len(proto.mesh_shape)))
+    self._device_coordinates = coords
+
+  @property
+  def mesh_shape(self):
+    """A rank 1 int32 array describing the shape of the TPU topology."""
+    return self._mesh_shape
+
+  @property
+  def device_coordinates(self):
+    """Describes the mapping from TPU devices to topology coordinates.
+
+    Returns:
+      A rank 3 int32 array with shape `[tasks, devices, axis]`.
+      `tasks` is the number of tasks in the TPU cluster, `devices` is the number
+      of TPU devices per task, and `axis` is the number of axes in the TPU
+      cluster topology. Each entry gives the `axis`-th coordinate in the
+      topology of a task/device pair. TPU topologies are 3-dimensional, with
+      dimensions `(x, y, core number)`.
+    """
+    return self._device_coordinates
+
+  def serialized(self):
+    """Returns the serialized form of the topology."""
+    if self._serialized is None:
+      proto = topology_pb2.TopologyProto()
+      proto.mesh_shape[:] = list(self._mesh_shape)
+      proto.num_tasks = self._device_coordinates.shape[0]
+      proto.num_tpu_devices_per_task = self._device_coordinates.shape[1]
+      proto.device_coordinates = list(self._device_coordinates.flatten())
+      self._serialized = proto.SerializeToString()
+
+    return self._serialized
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 77977b3c94..7fb8a33698 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -53,11 +53,19 @@ _NOT_IMPLEMENTED_OPS = set([
     ])
 
 
+def _tpu_system_device_name(job):
+  """Returns the device name for the TPU_SYSTEM device of `job`."""
+  if job is None:
+    return "/device:TPU_SYSTEM:0"
+  else:
+    return "/job:%s/device:TPU_SYSTEM:0" % job
+
+
 def initialize_system(embedding_config=None, job=None):
   """Initializes a distributed TPU system for use with TensorFlow.
 
   Args:
-    embedding_config: If not None, an EmbeddingLayerConfiguration proto
+    embedding_config: If not None, an `EmbeddingLayerConfiguration` proto
       describing the desired configuration of the hardware embedding lookup
       tables. If embedding_config is None, no hardware embeddings can be used.
     job: The job (the XXX in TensorFlow device specification /job:XXX)
@@ -65,27 +73,18 @@ def initialize_system(embedding_config=None, job=None):
       it is assumed there is only one job in the TensorFlow flock, and an
       error will be returned if this assumption does not hold.
   Returns:
-    Op which, when executed, will initialize the system.
+    A serialized `TopologyProto` that describes the TPU system. Note:
+      the topology must be evaluated using `Session.run` before it can be used.
   """
-  if job is None:
-    device_name = "/device:TPU_SYSTEM:0"
-  else:
-    device_name = "/job:%s/device:TPU_SYSTEM:0" % job
   config_string = ("" if embedding_config is None else
                    embedding_config.SerializeToString())
-  with ops.device(device_name):
-    init_distributed_tpu = tpu_ops.configure_distributed_tpu(
-        embedding_config=config_string)
-  return init_distributed_tpu
+  with ops.device(_tpu_system_device_name(job)):
+    return tpu_ops.configure_distributed_tpu(embedding_config=config_string)
 
 
 def shutdown_system(job=None):
   """Shuts down a running a distributed TPU system."""
-  if job is None:
-    device_name = "/device:TPU_SYSTEM:0"
-  else:
-    device_name = "/job:%s/device:TPU_SYSTEM:0" % job
-  with ops.device(device_name):
+  with ops.device(_tpu_system_device_name(job)):
     shutdown_distributed_tpu = tpu_ops.shutdown_distributed_tpu()
   return shutdown_distributed_tpu
 
@@ -97,23 +96,24 @@ def core(num):
     num: the virtual core number within each replica to which operators should
     be assigned.
   Returns:
-    A device name, suitable for passing to tf.device().
+    A device name, suitable for passing to `tf.device()`.
   """
   return "device:TPU_REPLICATED_CORE:{}".format(num)
 
 
 class TPUReplicateContext(control_flow_ops.ControlFlowContext):
-  """A ControlFlowContext for nodes inside a TPU computation.
+  """A `ControlFlowContext` for nodes inside a TPU computation.
 
-  The primary role of TPUReplicateContext is to mark operators inside a
+  The primary role of `TPUReplicateContext` is to mark operators inside a
   tpu.replicate() computation with the attribute "_tpu_replicate=XYZ", where XYZ
   is a unique name.
 
-  We use a ControlFlowContext to perform the annotation since it
+  We use a `ControlFlowContext` to perform the annotation since it
   integrates with Tensorflow constructs like ResourceVariables. For example,
-  if a ResourceVariable is constructed inside a tpu.replicate() block, the
-  ResourceVariable implementation can use "with ops.control_dependencies(None)"
-  to build the variable's definition outside the replicated computation.
+  if a `ResourceVariable` is constructed inside a tpu.replicate() block, the
+  `ResourceVariable` implementation can use
+  `with ops.control_dependencies(None)` to build the variable's definition
+  outside the replicated computation.
   """
 
   def __init__(self, name):
@@ -167,37 +167,47 @@ class TPUReplicateContext(control_flow_ops.ControlFlowContext):
 def replicate(computation,
               inputs=None,
               infeed_queue=None,
-              global_tpu_id=None,
+              device_assignment=None,
               name=None):
   """Builds a graph operator that runs a replicated TPU computation.
 
   Args:
-    computation: a Python function that builds the computation to replicate.
-    inputs: a list of lists of input tensors or None (equivalent to
-      [[]]), indexed by [replica_num][input_num]. All replicas must
+    computation: A Python function that builds the computation to replicate.
+    inputs: A list of lists of input tensors or `None` (equivalent to
+      `[[]]`), indexed by `[replica_num][input_num]`. All replicas must
       have the same number of inputs.
-    infeed_queue: if not None, the InfeedQueue from which to append a tuple
+    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
       of arguments as inputs to computation.
-    global_tpu_id: if not None, a Numpy 2D array indicating the global
-      id of each TPU device in the system. The outer dimension of the
-      array is host task id, and the inner dimension is device ordinal,
-      so e.g., global_tpu_id[x][y] indicates the global id of device
-      /task:x/device:TPU_NODE:y.
-    name: name of the operator.
+    device_assignment: If not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. Uses a default device assignment if `None`. The
+      `DeviceAssignment` may be omitted if each replica of the computation uses
+      only one core, and there is either only one replica, or the number of
+      replicas is equal to the number of cores in the TPU system.
+    name: The name of the operator.
   Returns:
-    A list of lists of output tensors, indexed by [replica_num][output_num].
+    A list of lists of output tensors, indexed by `[replica_num][output_num]`.
   Raises:
-    ValueError: if all replicas do not have equal numbers of input tensors.
-    ValueError: if the number of inputs per replica does not match
+    ValueError: If all replicas do not have equal numbers of input tensors.
+    ValueError: If the number of inputs per replica does not match
       the number of formal parameters to `computation`.
   """
   if name is None:
     name = "TPUReplicate"
   inputs = [[]] if inputs is None else inputs
 
-  if global_tpu_id is not None:
-    # Turn the Numpy array into a flattened list.
-    global_tpu_id = global_tpu_id.flatten().tolist()
+  metadata_kwargs = {}
+  if device_assignment is not None:
+    # Turn the Numpy array into a flattened list so we can pass it as an
+    # operator attribute.
+    metadata_kwargs = {
+        "topology":
+            device_assignment.topology.serialized(),
+        "device_assignment":
+            device_assignment.core_assignment.flatten().tolist(),
+        "computation_shape":
+            device_assignment.computation_shape.tolist()
+    }
 
   if ((not isinstance(inputs, list)) or
       any(not isinstance(inp, (list, tuple)) for inp in inputs)):
@@ -260,7 +270,7 @@ def replicate(computation,
       context.Enter()
 
       metadata = tpu_ops.tpu_replicate_metadata(
-          num_replicas=num_replicas, global_tpu_id=global_tpu_id)
+          num_replicas=num_replicas, **metadata_kwargs)
 
       with tpu_function.tpu_shard_context(
           num_replicas), ops.control_dependencies([metadata]):
@@ -367,7 +377,7 @@ def shard(computation,
           outputs_from_all_shards=True,
           output_shard_axes=None,
           infeed_queue=None,
-          global_tpu_id=None,
+          device_assignment=None,
           name=None):
   """Shards `computation` for parallel execution.
 
@@ -395,39 +405,40 @@ def shard(computation,
   Inputs and outputs of the computation must be at least rank-1 Tensors.
 
   Args:
-    computation: a Python function that builds a computation to apply to each
+    computation: A Python function that builds a computation to apply to each
       shard of the input.
-    inputs: a list of input tensors or None (equivalent to an empty
+    inputs: A list of input tensors or None (equivalent to an empty
       list). Each input tensor has a corresponding shard axes, given
       by `input_shard_axes`, which must have size divisible by
       `num_shards`.
-    num_shards: the number of shards.
-    input_shard_axes: a list of dimensions along which to shard `inputs`, or
+    num_shards: The number of shards.
+    input_shard_axes: A list of dimensions along which to shard `inputs`, or
       `None`. `None` means "shard all inputs along dimension 0". If not `None`,
       there must be one dimension per input.
-    outputs_from_all_shards: boolean or list of boolean. For each output, if
+    outputs_from_all_shards: Boolean or list of boolean. For each output, if
       `True`, outputs from all shards are concatenated along the corresponding
       `output_shard_axes` entry. Otherwise, each output is taken
       from an arbitrary shard. If the argument is a boolean, the argument's
       value is used for each output.
-    output_shard_axes: a list of dimensions along which to concatenate the
+    output_shard_axes: A list of dimensions along which to concatenate the
       outputs of `computation`, or `None`. `None` means "concatenate all outputs
       along dimension 0". If not `None`, there must be one dimension per output.
       Ignored if `outputs_from_all_shards` is False.
-    infeed_queue: if not None, the InfeedQueue to use to augment the inputs of
-      `computation`.
-    global_tpu_id: if not None, a Numpy 2D array indicating the global
-      id of each TPU device in the system. The outer dimension of the
-      array is host task id, and the inner dimension is device ordinal,
-      so e.g., global_tpu_id[x][y] indicates the global id of device
-      /task:x/device:TPU_NODE:y.
-    name: name of the operator.
+    infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs
+      of `computation`.
+    device_assignment: If not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. Uses a default device assignment if `None`. The
+      `DeviceAssignment` may be omitted if each shard of the computation uses
+      only one core, and there is either only one shard, or the number of shards
+      is equal to the number of cores in the TPU system.
+    name: The name of the operator.
   Returns:
     A list of output tensors.
   Raises:
-    ValueError: if num_shards <= 0
-    ValueError: if len(input_shard_axes) != len(inputs)
-    ValueError: if len(output_shard_axes) != len(outputs from `computation`)
+    ValueError: If num_shards <= 0
+    ValueError: If len(input_shard_axes) != len(inputs)
+    ValueError: If len(output_shard_axes) != len(outputs from `computation`)
   """
 
   if num_shards <= 0:
@@ -458,7 +469,7 @@ def shard(computation,
       computation,
       transposed_inputs,
       infeed_queue=infeed_queue,
-      global_tpu_id=global_tpu_id,
+      device_assignment=device_assignment,
       name=name)
 
   # There must be at least one shard since num_shards > 0.
@@ -512,7 +523,7 @@ def batch_parallel(computation,
                    inputs=None,
                    num_shards=1,
                    infeed_queue=None,
-                   global_tpu_id=None,
+                   device_assignment=None,
                    name=None):
   """Shards `computation` along the batch dimension for parallel execution.
 
@@ -536,55 +547,55 @@ def batch_parallel(computation,
   Inputs and outputs of the computation must be at least rank-1 Tensors.
 
   Args:
-    computation: a Python function that builds a computation to apply to each
+    computation: A Python function that builds a computation to apply to each
       shard of the input.
-    inputs: a list of input tensors or None (equivalent to an empty
+    inputs: A list of input tensors or None (equivalent to an empty
       list). The 0-th dimension of each Tensor must have size
       divisible by `num_shards`.
-    num_shards: the number of shards.
-    infeed_queue: if not None, the InfeedQueue from which to append a tuple
+    num_shards: The number of shards.
+    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
       of arguments as inputs to `computation`.
-    global_tpu_id: if not None, a Numpy 2D array indicating the global
-      id of each TPU device in the system. The outer dimension of the
-      array is host task id, and the inner dimension is device ordinal,
-      so e.g., global_tpu_id[x][y] indicates the global id of device
-      /task:x/device:TPU_NODE:y.
-    name: name of the operator.
+    device_assignment: If not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. Uses a default device assignment if `None`. The
+      `DeviceAssignment` may be omitted if each shard of the computation uses
+      only one core, and there is either only one shard, or the number of shards
+      is equal to the number of cores in the TPU system.
+    name: The name of the operator.
   Returns:
     A list of output tensors.
   Raises:
-    ValueError: if num_shards <= 0
+    ValueError: If `num_shards <= 0`
   """
   return shard(
       computation,
       inputs,
       num_shards=num_shards,
       infeed_queue=infeed_queue,
-      global_tpu_id=global_tpu_id,
+      device_assignment=device_assignment,
       name=name)
 
 
 def rewrite(computation,
             inputs=None,
             infeed_queue=None,
-            global_tpu_id=None,
+            device_assignment=None,
             name=None):
   """Rewrites `computation` for execution on a TPU system.
 
   Args:
-    computation: a Python function that builds a computation to apply
+    computation: A Python function that builds a computation to apply
       to the input. If the function takes n inputs, 'inputs' should be
       a list of n tensors. If the function returns m outputs, rewrite
       will return a list of m tensors.
-    inputs: a list of input tensors or None (equivalent to an empty list).
-    infeed_queue: if not None, the InfeedQueue from which to append a tuple
+    inputs: A list of input tensors or `None` (equivalent to an empty list).
+    infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple
       of arguments as inputs to `computation`.
-    global_tpu_id: if not None, a Numpy 2D array indicating the global
-      id of each TPU device in the system. The outer dimension of the
-      array is host task id, and the inner dimension is device ordinal,
-      so e.g., global_tpu_id[x][y] indicates the global id of device
-      /task:x/device:TPU_NODE:y.
-    name: name of the operator.
+    device_assignment: if not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. May be omitted for a single-core computation, in which
+      case the core attached to task 0, TPU device 0 is used.
+    name: The name of the operator.
   Returns:
     A list of output tensors.
   """
@@ -597,6 +608,6 @@ def rewrite(computation,
       computation,
       None if inputs is None else [inputs],
       infeed_queue=infeed_queue,
-      global_tpu_id=global_tpu_id,
+      device_assignment=device_assignment,
       name=name)[0]
   # pylint: enable=indexing-exception
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_feed.py b/tensorflow/contrib/tpu/python/tpu/tpu_feed.py
index 1c8ea63f00..42ac6eb680 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_feed.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_feed.py
@@ -513,7 +513,7 @@ class InfeedQueue(object):
   # for automatic placement of input pipelines.
   def split_inputs_and_generate_enqueue_ops(self,
                                             inputs,
-                                            global_tpu_id=None,
+                                            device_assignment=None,
                                             placement_function=None,
                                             tpu_ordinal_function=None):
     """POORLY-PERFORMING ON MULTI-HOST SYSTEMS.
@@ -536,14 +536,12 @@ class InfeedQueue(object):
     Args:
       inputs: a list of Tensors which indicates the types and shapes of the
         queue tuple.
-     global_tpu_id: if not None, a Numpy 2D array indicating the global
-        id of each TPU device in the system. The outer dimension of the
-        array is host task id, and the inner dimension is device ordinal,
-        so e.g., global_tpu_id[x][y] indicates the global id of device
-        /task:x/device:TPU_NODE:y. If global_tpu_id is not None, but
-        placement_function and ordinal_function are None, then global_tpu_id
-        will be used to place infeed on the TPUs with the first k global ids,
-        where k is the number of shards in the queue.
+     device_assignment: if not `None`, a TPU `DeviceAssignment`. If
+        device_assignment is not `None`, but `placement_function` and
+        `ordinal_function` are None, then `device_assignment` will be used to
+        place infeeds on the first k TPU shards, where k is the number of shards
+        in the queue. If all three are `None`, then default placement and
+        ordinal functions are used.
       placement_function: if not None, a function that takes the shard
         index as input and returns a device string indicating which
         device the shard's infeed should be placed on. If placement_function
@@ -567,22 +565,18 @@ class InfeedQueue(object):
         types of the elements of inputs are not compatible with the frozen
         configuration.
     """
-    if global_tpu_id is None:
+    if device_assignment is None:
       if placement_function is None:
         placement_function = self._default_placement_function
       if tpu_ordinal_function is None:
         tpu_ordinal_function = self._default_ordinal_function
     else:
-      global_id_map = {}
-      for host, devices in enumerate(global_tpu_id):
-        for ordinal, global_id in enumerate(devices):
-          global_id_map[global_id] = (host, ordinal)
 
       def _placement_function_from_map(index):
-        return "/task:%d/device:CPU:0" % global_id_map[index][0]
+        return device_assignment.host_device(replica=index)
 
       def _ordinal_function_from_map(index):
-        return global_id_map[index][1]
+        return device_assignment.tpu_ordinal(replica=index)
 
       if placement_function is None:
         placement_function = _placement_function_from_map
-- 
GitLab


From 72d72194c1d06e66f7893915a804932b56bef5db Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 10:58:51 -0800
Subject: [PATCH 0305/1225] Simplify Mul(x, x) to Square(x) to cut the number
 of loads in half.

PiperOrigin-RevId: 177042256
---
 .../optimizers/arithmetic_optimizer.cc        | 21 +++++-
 .../optimizers/arithmetic_optimizer_test.cc   | 66 +++++++++++++------
 2 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index d9a544d21f..ec5d2abd7a 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -81,7 +81,6 @@ Status SetTensorValue(DataType dtype, int value, Tensor* tensor) {
   return Status::OK();
 }
 
-
 template <typename T>
 bool AreInversePermutations(const std::vector<T>& a, const std::vector<T>& b) {
   if (a.size() != b.size()) {
@@ -170,7 +169,6 @@ bool IsInnerMatrixTransposeNode(const NodeDef& transpose_node,
   return false;
 }
 
-
 bool MaybeAddControlInput(const string& new_input, NodeDef* node,
                           GraphDef* graph, NodeMap* node_map) {
   bool already_exists = false;
@@ -785,6 +783,25 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
     }
   }
 
+  if (node->op() == "Mul" && node->input(0) == node->input(1) &&
+      node_map->GetNode(node->name() + "_square") == nullptr) {
+    NodeDef* factor = node_map->GetNode(node->input(0));
+    VLOG(2) << "Found square : " << node->DebugString();
+    if (factor != nullptr) {
+      NodeDef* new_mul_node = graph_def->add_node();
+      *new_mul_node = *node;
+      new_mul_node->set_op("Square");
+      new_mul_node->set_name(strings::StrCat(node->name(), "_square"));
+      new_nodes->push_back(new_mul_node);
+      node_map->AddNode(new_mul_node->name(), new_mul_node);
+      for (int i = 1; i < new_mul_node->input_size(); ++i) {
+        new_mul_node->set_input(i - 1, new_mul_node->input(i));
+      }
+      new_mul_node->mutable_input()->RemoveLast();
+      return new_mul_node->name();
+    }
+  }
+
   if (node->input_size() > 0 && IsAggregate(*node)) {
     // Discard aggregate nodes with a single input.
     if (node->input_size() == 1) {
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index 8cec4e4255..6bbc64c7a4 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -58,7 +58,7 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output c1 = ops::Const(s.WithOpName("c1"), {3.14, 2.7}, {1, 2});
   Output c2 = ops::Const(s.WithOpName("c2"), {3.14, 2.7}, {1, 2});
-  Output mul = ops::Mul(s.WithOpName("mul"), c1, c2);
+  Output div = ops::Div(s.WithOpName("div"), c1, c2);
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
@@ -74,11 +74,11 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) {
   EXPECT_EQ(2, output.node_size());
   const NodeDef& new_c1 = output.node(0);
   EXPECT_EQ("c1", new_c1.name());
-  const NodeDef& new_mul = output.node(1);
-  EXPECT_EQ("mul", new_mul.name());
-  EXPECT_EQ(2, new_mul.input_size());
-  EXPECT_EQ("c1", new_mul.input(0));
-  EXPECT_EQ("c1", new_mul.input(1));
+  const NodeDef& new_div = output.node(1);
+  EXPECT_EQ("div", new_div.name());
+  EXPECT_EQ(2, new_div.input_size());
+  EXPECT_EQ("c1", new_div.input(0));
+  EXPECT_EQ("c1", new_div.input(1));
 }
 
 TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) {
@@ -89,9 +89,9 @@ TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) {
   auto check2 = ops::CheckNumerics(s.WithOpName("check2"), c, "foo");
   auto assert1 = ops::Assert(s.WithOpName("assert1"), p, {c});
   auto assert2 = ops::Assert(s.WithOpName("assert2"), p, {c});
-  Output mul = ops::Multiply(s.WithOpName("mul").WithControlDependencies(
-                                 {assert1.operation, assert2.operation}),
-                             check1, check2);
+  Output div = ops::Div(s.WithOpName("div").WithControlDependencies(
+                            {assert1.operation, assert2.operation}),
+                        check1, check2);
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
@@ -105,12 +105,12 @@ TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) {
   TF_EXPECT_OK(status);
 
   EXPECT_EQ(5, output.node_size());
-  const NodeDef& new_mul = output.node(3);
-  EXPECT_EQ(4, new_mul.input_size());
-  EXPECT_EQ("check1", new_mul.input(0));
-  EXPECT_EQ("check1", new_mul.input(1));
-  EXPECT_EQ("^assert1", new_mul.input(2));
-  EXPECT_EQ("^assert1", new_mul.input(3));
+  const NodeDef& new_div = output.node(3);
+  EXPECT_EQ(4, new_div.input_size());
+  EXPECT_EQ("check1", new_div.input(0));
+  EXPECT_EQ("check1", new_div.input(1));
+  EXPECT_EQ("^assert1", new_div.input(2));
+  EXPECT_EQ("^assert1", new_div.input(3));
 }
 
 TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) {
@@ -119,7 +119,7 @@ TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) {
   Output c2 = ops::Const(s.WithOpName("c2"), {3.0f, 4.0f}, {1, 2});
   Output mul1 = ops::Mul(s.WithOpName("mul1"), c1, c2);
   Output mul2 = ops::Mul(s.WithOpName("mul2"), c2, c1);
-  Output mul3 = ops::Mul(s.WithOpName("mul3"), mul1, mul2);
+  Output div1 = ops::Div(s.WithOpName("div1"), mul1, mul2);
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
@@ -142,11 +142,35 @@ TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) {
   EXPECT_EQ(2, new_mul1.input_size());
   EXPECT_EQ("c1", new_mul1.input(0));
   EXPECT_EQ("c2", new_mul1.input(1));
-  const NodeDef& new_mul3 = output.node(3);
-  EXPECT_EQ("mul3", new_mul3.name());
-  EXPECT_EQ(2, new_mul3.input_size());
-  EXPECT_EQ("mul1", new_mul3.input(0));
-  EXPECT_EQ("mul1", new_mul3.input(1));
+  const NodeDef& new_div1 = output.node(3);
+  EXPECT_EQ("div1", new_div1.name());
+  EXPECT_EQ(2, new_div1.input_size());
+  EXPECT_EQ("mul1", new_div1.input(0));
+  EXPECT_EQ("mul1", new_div1.input(1));
+}
+
+TEST_F(ArithmeticOptimizerTest, MulToSquare) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output c = ops::Const(s.WithOpName("c"), {1.0f, 2.0f}, {1, 2});
+  Output d = ops::Const(s.WithOpName("d"), {3.0f, 4.0f}, {1, 2});
+  Output mul = ops::Mul(s.WithControlDependencies(d).WithOpName("mul"), c, c);
+  Output id = ops::Identity(s.WithOpName("id"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  ArithmeticOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  EXPECT_EQ(5, output.node_size());
+  EXPECT_EQ("Square", output.node(4).op());
+  EXPECT_EQ("mul_square", output.node(4).name());
+  EXPECT_EQ(2, output.node(4).input_size());
+  EXPECT_EQ("c", output.node(4).input(0));
+  EXPECT_EQ("^d", output.node(4).input(1));
+  EXPECT_EQ("id", output.node(3).name());
+  EXPECT_EQ("mul_square", output.node(3).input(0));
 }
 
 TEST_F(ArithmeticOptimizerTest, SimplifyInvolutionsReal) {
-- 
GitLab


From 00e566cdfb5145d88414dffb847fd303950d18bf Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 27 Nov 2017 11:19:40 -0800
Subject: [PATCH 0306/1225] [XLA:CPU] Enable some dot simplifications

Rename enable_dot_simplification to enable_dot_strength_reduction and make some
dot simplifications unconditional.

PiperOrigin-RevId: 177045451
---
 .../xla/service/algebraic_simplifier.cc       | 24 ++++++++++---------
 .../xla/service/algebraic_simplifier.h        |  6 ++---
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  4 ++--
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 5dcc1318c9..71491218aa 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -180,17 +180,17 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault {
   static bool Run(
       HloComputation* computation, bool is_layout_sensitive,
       AlgebraicSimplifier::ValidBitcastCallback valid_bitcast_callback,
-      bool enable_dot_simplification, bool enable_conv_simplification);
+      bool enable_dot_strength_reduction, bool enable_conv_simplification);
 
  private:
   explicit AlgebraicSimplifierVisitor(
       HloComputation* computation, bool is_layout_sensitive,
       AlgebraicSimplifier::ValidBitcastCallback valid_bitcast_callback,
-      bool enable_dot_simplification, bool enable_conv_simplification)
+      bool enable_dot_strength_reduction, bool enable_conv_simplification)
       : computation_(computation),
         is_layout_sensitive_(is_layout_sensitive),
         valid_bitcast_callback_(std::move(valid_bitcast_callback)),
-        enable_dot_simplification_(enable_dot_simplification),
+        enable_dot_strength_reduction_(enable_dot_strength_reduction),
         enable_conv_simplification_(enable_conv_simplification) {}
 
   // Convenience method for replacing an instruction with a bitcast.
@@ -265,8 +265,8 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault {
   // Callback used to determine if a bitcast is possible.
   AlgebraicSimplifier::ValidBitcastCallback valid_bitcast_callback_;
 
-  // Disable dot simplication on platforms where it causes a slowdown.
-  bool enable_dot_simplification_;
+  // Disable dot strength reduction on platforms where it causes a slowdown.
+  bool enable_dot_strength_reduction_;
 
   // Disable convolution simplication on platforms where it causes a slowdown.
   bool enable_conv_simplification_;
@@ -275,10 +275,10 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault {
 bool AlgebraicSimplifierVisitor::Run(
     HloComputation* computation, bool is_layout_sensitive,
     AlgebraicSimplifier::ValidBitcastCallback valid_bitcast_callback,
-    bool enable_dot_simplification, bool enable_conv_simplification) {
+    bool enable_dot_strength_reduction, bool enable_conv_simplification) {
   AlgebraicSimplifierVisitor visitor(
       computation, is_layout_sensitive, std::move(valid_bitcast_callback),
-      enable_dot_simplification, enable_conv_simplification);
+      enable_dot_strength_reduction, enable_conv_simplification);
   TF_CHECK_OK(computation->Accept(&visitor));
   return visitor.changed_;
 }
@@ -577,9 +577,7 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) {
 Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
   auto lhs = dot->mutable_operand(0);
   auto rhs = dot->mutable_operand(1);
-  if (!enable_dot_simplification_) {
-    return Status::OK();
-  }
+
   // Only optimize F32 dot operations where the dot, rhs and lhs are rank 2 or
   // below.
   if (dot->shape().element_type() != F32 || ShapeUtil::Rank(lhs->shape()) > 2 ||
@@ -606,6 +604,10 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
         dot, HloInstruction::CreateTranspose(dot->shape(), new_dot, {1, 0}));
   }
 
+  if (!enable_dot_strength_reduction_) {
+    return Status::OK();
+  }
+
   // Simplify outer product into multiply with implicit broadcasting.
   //
   // A dot(a[M, 1], b[1, N]) = multiply(a [M,1], b [1, N])
@@ -1703,7 +1705,7 @@ StatusOr<bool> AlgebraicSimplifier::Run(HloModule* module) {
   for (auto* comp : module->MakeNonfusionComputations()) {
     if (AlgebraicSimplifierVisitor::Run(
             comp, is_layout_sensitive_, valid_bitcast_callback_,
-            enable_dot_simplification_, enable_conv_simplification_)) {
+            enable_dot_strength_reduction_, enable_conv_simplification_)) {
       changed = true;
     }
   }
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h
index a9f476178c..43315f5cdc 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.h
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h
@@ -40,11 +40,11 @@ class AlgebraicSimplifier : public HloPassInterface {
   // bitcasts.
   AlgebraicSimplifier(bool is_layout_sensitive,
                       ValidBitcastCallback valid_bitcast_callback,
-                      bool enable_dot_simplification = true,
+                      bool enable_dot_strength_reduction = true,
                       bool enable_conv_simplification = true)
       : is_layout_sensitive_(is_layout_sensitive),
         valid_bitcast_callback_(std::move(valid_bitcast_callback)),
-        enable_dot_simplification_(enable_dot_simplification),
+        enable_dot_strength_reduction_(enable_dot_strength_reduction),
         enable_conv_simplification_(enable_conv_simplification) {}
   ~AlgebraicSimplifier() override = default;
   tensorflow::StringPiece name() const override { return "algsimp"; }
@@ -58,7 +58,7 @@ class AlgebraicSimplifier : public HloPassInterface {
   ValidBitcastCallback valid_bitcast_callback_;
 
   // Enable dot simplication on platforms where it is profitable.
-  bool enable_dot_simplification_;
+  bool enable_dot_strength_reduction_;
 
   // Enable convolution simplication on platforms where it is profitable.
   bool enable_conv_simplification_;
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index ff6042ae19..99dae793ab 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -287,7 +287,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
     pass.AddPass<AlgebraicSimplifier>(
         /*is_layout_sensitive=*/false,
         [](const Shape&, const Shape&) { return false; },
-        /*enable_dot_simplification=*/false);
+        /*enable_dot_strength_reduction=*/false);
     pass.AddPass<TupleSimplifier>();
     pass.AddPass<WhileLoopSimplifier>();
     pass.AddPass<HloDCE>();
@@ -316,7 +316,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
   pipeline.AddPass<HloPassFix<AlgebraicSimplifier>>(
       /*is_layout_sensitive=*/true,
       [](const Shape&, const Shape&) { return true; },
-      /*enable_dot_simplification=*/false);
+      /*enable_dot_strength_reduction=*/false);
   pipeline.AddPass<HloCSE>(/*is_layout_sensitive=*/true);
   // Outline ops in the entry computation into calls to subcomputations.
   const int max_parallelism =
-- 
GitLab


From 5b7f4f122a16decfa2d64ffdb2882966981562eb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 12:03:24 -0800
Subject: [PATCH 0307/1225] Adds SpaceToDepth and DepthToSpace to list of
 supported NCHW ops in Grappler layout optimizer.

PiperOrigin-RevId: 177051825
---
 tensorflow/core/grappler/optimizers/layout_optimizer.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index d25d9d99c5..c760efac70 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -62,7 +62,9 @@ std::set<string> GetOpsFormatSupported() {
       "FusedBatchNormGrad",
       "FusedConv2DBiasActivation",
       "MaxPool",
-      "MaxPoolGrad"};
+      "MaxPoolGrad",
+      "SpaceToDepth",
+      "DepthToSpace"};
   return ops_format_supported;
 }
 
-- 
GitLab


From 75350d385533b5aae9c33fc52ca90d359db6cc9d Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Mon, 27 Nov 2017 12:11:33 -0800
Subject: [PATCH 0308/1225] Add the missing GPU configs to avoid the division
 by zero (gflops) error.

PiperOrigin-RevId: 177053156
---
 tensorflow/python/grappler/cluster_test.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py
index a71a860a59..f1f02963de 100644
--- a/tensorflow/python/grappler/cluster_test.py
+++ b/tensorflow/python/grappler/cluster_test.py
@@ -93,7 +93,10 @@ class ClusterTest(test.TestCase):
       mg = meta_graph.create_meta_graph_def(graph=g)
       grappler_item = item.Item(mg)
       device_properties = device_properties_pb2.DeviceProperties(
-          type='GPU', environment={
+          type='GPU',
+          frequency=1000,
+          num_cores=60,
+          environment={
               'architecture': '7'
           })
       named_device = device_properties_pb2.NamedDevice(
-- 
GitLab


From d589382b1c17fbaae09e8877d3b8fa784100be70 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 12:33:38 -0800
Subject: [PATCH 0309/1225] Changed default approximation interface in
 LayerCollection to use setter functions to avoid silent failures when user
 misspells the property names.

PiperOrigin-RevId: 177056030
---
 tensorflow/contrib/kfac/python/ops/layer_collection.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py
index d8781231ed..3a005ee39d 100644
--- a/tensorflow/contrib/kfac/python/ops/layer_collection.py
+++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py
@@ -184,8 +184,7 @@ class LayerCollection(object):
   def default_generic_approximation(self):
     return self._default_generic_approximation
 
-  @default_generic_approximation.setter
-  def default_generic_approximation(self, value):
+  def set_default_generic_approximation(self, value):
     if value not in _GENERIC_APPROX_TO_BLOCK_TYPES:
       raise ValueError(
           "{} is not a valid approximation for generic variables.".format(
@@ -196,8 +195,7 @@ class LayerCollection(object):
   def default_fully_connected_approximation(self):
     return self._default_fully_connected_approximation
 
-  @default_fully_connected_approximation.setter
-  def default_fully_connected_approximation(self, value):
+  def set_default_fully_connected_approximation(self, value):
     if value not in _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES:
       raise ValueError(
           "{} is not a valid approximation for fully connected layers.".format(
@@ -208,8 +206,7 @@ class LayerCollection(object):
   def default_conv2d_approximation(self):
     return self._default_convolution_2d_approximation
 
-  @default_conv2d_approximation.setter
-  def default_conv2d_approximation(self, value):
+  def set_default_conv2d_approximation(self, value):
     if value not in _CONV2D_APPROX_TO_BLOCK_TYPES:
       raise ValueError(
           "{} is not a valid approximation for 2d convolutional layers.".format(
-- 
GitLab


From f327ae08506cddae3bf15c8ead901269ce2c4bf9 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 27 Nov 2017 12:59:02 -0800
Subject: [PATCH 0310/1225] Improved the item_tst.py regression test.

PiperOrigin-RevId: 177059258
---
 tensorflow/python/grappler/item_test.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/grappler/item_test.py b/tensorflow/python/grappler/item_test.py
index 69835761bc..71c68d25cd 100644
--- a/tensorflow/python/grappler/item_test.py
+++ b/tensorflow/python/grappler/item_test.py
@@ -25,6 +25,7 @@ from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.grappler import item
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.platform import test
 
 
@@ -59,6 +60,7 @@ class ItemTest(test.TestCase):
       a = constant_op.constant(10)
       b = constant_op.constant(20)
       c = a + b
+      z = control_flow_ops.no_op()
       train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
       train_op.append(c)
       mg = meta_graph.create_meta_graph_def(graph=g)
@@ -69,9 +71,12 @@ class ItemTest(test.TestCase):
       for node in grappler_item.metagraph.graph_def.node:
         node_prop = op_properties[node.name]
 
-        self.assertEqual(1, len(node_prop))
-        self.assertEqual(dtypes.int32, node_prop[0].dtype)
-        self.assertEqual(tensor_shape.scalar(), node_prop[0].shape)
+        if node.name == z.name:
+          self.assertEqual(0, len(node_prop))
+        else:
+          self.assertEqual(1, len(node_prop))
+          self.assertEqual(dtypes.int32, node_prop[0].dtype)
+          self.assertEqual(tensor_shape.scalar(), node_prop[0].shape)
 
   def testUpdates(self):
     with ops.Graph().as_default() as g:
-- 
GitLab


From c67a98530d525d87cf7f7f20114d22593b930763 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 13:04:12 -0800
Subject: [PATCH 0311/1225] Exposes the table_ref in IdTableWithHashBuckets.

PiperOrigin-RevId: 177060029
---
 .../python/kernel_tests/lookup_ops_test.py    | 18 ++++++++++++++++++
 tensorflow/python/ops/lookup_ops.py           | 19 +++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py
index d4bc71f1c8..e4c799cb1c 100644
--- a/tensorflow/python/kernel_tests/lookup_ops_test.py
+++ b/tensorflow/python/kernel_tests/lookup_ops_test.py
@@ -488,6 +488,20 @@ class IndexTableFromFile(test.TestCase):
       self.assertRaises(ValueError, table.lookup,
                         constant_op.constant(["salad", "surgery", "tarkus"]))
 
+  def test_index_table_from_file_table_ref_with_oov_buckets(self):
+    vocabulary_file = self._createVocabFile("f2i_vocab9.txt")
+    with self.test_session():
+      table = lookup_ops.index_table_from_file(
+          vocabulary_file=vocabulary_file, num_oov_buckets=1)
+      self.assertIsNotNone(table.table_ref)
+
+  def test_index_table_from_file_table_ref_without_oov_buckets(self):
+    vocabulary_file = self._createVocabFile("f2i_vocab10.txt")
+    with self.test_session():
+      table = lookup_ops.index_table_from_file(
+          vocabulary_file=vocabulary_file, num_oov_buckets=0)
+      self.assertIsNotNone(table.table_ref)
+
 
 class KeyValueTensorInitializerTest(test.TestCase):
 
@@ -1431,6 +1445,10 @@ class IdTableWithHashBucketsTest(test.TestCase):
             oov_buckets,
             hasher_spec=lookup_ops.StrongHashSpec([None, 2]))
 
+  def testIdTableWithHashBucketsNoInnerTable(self):
+    with self.test_session():
+      table = lookup_ops.IdTableWithHashBuckets(None, num_oov_buckets=1)
+      self.assertIsNone(table.table_ref)
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index 156e415735..8bc0bc7d06 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -789,6 +789,25 @@ class IdTableWithHashBuckets(LookupInterface):
     with ops.name_scope(None, "init"):
       return control_flow_ops.no_op()
 
+  @property
+  def table_ref(self):
+    """Returns the table_ref of the underlying table, if one exists.
+
+    Only use the table_ref directly if you know what you are doing. The
+    table_ref does not have the "hash bucket" functionality, as that is provided
+    by this class.
+
+    One possible use of the table_ref is subtokenization, i.e. ops which
+    dynamically decompose tokens into subtokens based on the contents of the
+    table_ref.
+
+    Returns:
+      the underlying table_ref, or None if there is no underlying table
+    """
+    if self._table is not None:
+      return self._table.table_ref
+    return None
+
   def size(self, name=None):
     """Compute the number of elements in this table."""
     with ops.name_scope(name, "%s_Size" % self.name) as scope:
-- 
GitLab


From 7726333292b9e3d97a033617ee53099f6c4fedd5 Mon Sep 17 00:00:00 2001
From: Clayne Robison <clayne.b.robison@intel.com>
Date: Mon, 27 Nov 2017 14:08:17 -0700
Subject: [PATCH 0312/1225] =?UTF-8?q?Added=20Ubuntu=2016.04=20Dockerfile?=
 =?UTF-8?q?=20with=20TF=201.4=20optimized=20for=20CPU=20with=20Inte?=
 =?UTF-8?q?=E2=80=A6=20(#14468)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Added Ubuntu 16.04 Dockerfile with TF 1.4 optimized for CPU with Intel(R) MKL

* Edits per comments/code review. Now pulls FROM tensorflow/tensorflow. Removed superfluous build options. Now installs emacs in addition to vim.

* More edits per reviewer feedback. Removed dependency on Bazel 0.7.0.
---
 .../tools/docker/Dockerfile.devel-cpu-mkl     | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 tensorflow/tools/docker/Dockerfile.devel-cpu-mkl

diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
new file mode 100644
index 0000000000..8180e5e7fb
--- /dev/null
+++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
@@ -0,0 +1,85 @@
+FROM tensorflow/tensorflow:latest-devel
+
+LABEL maintainer="Clayne Robison<clayne.b.robison@intel.com>"
+
+# These arguments are parameterized. Use --build-args to override.
+ARG TF_BRANCH=r1.4
+ARG WHL_DIR=/whl
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        golang \
+        vim \
+        emacs \
+        && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN pip --no-cache-dir install --upgrade \
+        pip setuptools
+
+RUN pip --no-cache-dir install wheel 
+
+# Download and build TensorFlow.
+WORKDIR /
+RUN rm -rf tensorflow && \
+    git clone https://github.com/tensorflow/tensorflow.git && \
+    cd tensorflow && \
+    git checkout ${TF_BRANCH}
+WORKDIR /tensorflow
+
+# Configure the build for CPU with MKL by accepting default build options and
+# setting library locations
+ENV CI_BUILD_PYTHON=python \
+   LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \
+    PYTHON_BIN_PATH=/usr/bin/python \
+    PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \
+    CC_OPT_FLAGS='-march=native' \
+    TF_NEED_JEMALLOC=0 \
+    TF_NEED_GCP=0 \
+    TF_NEED_CUDA=0 \
+    TF_NEED_HDFS=0 \
+    TF_NEED_S3=0 \
+    TF_NEED_OPENCL=0 \
+    TF_NEED_GDR=0 \
+    TF_ENABLE_XLA=0 \
+    TF_NEED_VERBS=0 \
+    TF_NEED_MPI=0
+RUN ./configure
+
+# Build and Install TensorFlow.
+# The 'mkl' option builds with Intel(R) Math Kernel Library (MKL), which detects
+# the platform it is currently running on and takes appropriately optimized 
+# paths. The -march=native option is for code that is not in MKL, and assumes
+# this container will be run on the same architecture on which it is built.
+RUN LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \
+    bazel build --config=mkl \
+                --config="opt" \
+                --copt="-march=native" \
+                --copt="-O3" \
+                //tensorflow/tools/pip_package:build_pip_package && \
+    mkdir ${WHL_DIR} && \
+    bazel-bin/tensorflow/tools/pip_package/build_pip_package ${WHL_DIR}
+
+# Clean up Bazel cache when done, but leave the whl.
+# This will upgrade the default Tensorflow version with the Intel MKL version
+RUN pip --no-cache-dir install --upgrade ${WHL_DIR}/tensorflow-*.whl && \
+    rm -rf /root/.cache
+
+WORKDIR /root
+
+#add welcome message with instructions
+
+RUN echo '[ ! -z "$TERM" -a -r /etc/motd ] && cat /etc/issue && cat /etc/motd' \
+	>> /etc/bash.bashrc \
+	; echo "\
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\
+|								\n\
+| Docker container running Ubuntu				\n\
+| with TensorFlow ${TF_BRANCH} optimized for CPU		\n\
+| with Intel(R) MKL						\n\
+|								\n\
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\
+\n "\
+	> /etc/motd
+
+CMD ["/bin/bash"]
-- 
GitLab


From 2110de0059a139ed5f4dd15c3e79102cceecad74 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 13:38:09 -0800
Subject: [PATCH 0313/1225] Fix ASAN failure in tests

RELNOTES: None
PiperOrigin-RevId: 177064374
---
 tensorflow/c/eager/c_api_test.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc
index 03843fa913..3fe0b7efa1 100644
--- a/tensorflow/c/eager/c_api_test.cc
+++ b/tensorflow/c/eager/c_api_test.cc
@@ -325,7 +325,8 @@ TEST(CAPI, Function) {
   ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
   TF_DeleteFunction(fn);
 
-  TF_Tensor* t = TF_AllocateTensor(TF_INT32, nullptr, 0, 1);
+  TF_Tensor* t =
+      TF_AllocateTensor(TF_INT32, nullptr, 0, 1 * sizeof(tensorflow::int32));
   *reinterpret_cast<tensorflow::int32*>(TF_TensorData(t)) = 42;
   TFE_TensorHandle* h = TFE_NewTensorHandle(t, status);
   ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
-- 
GitLab


From db9533e4f5fa940f704996cd6d38f40b13d40dff Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Mon, 27 Nov 2017 13:42:53 -0800
Subject: [PATCH 0314/1225] Enable a Session tests using function and
 set_device

These tests were disabled for C API because those features
were not implemented. They are now.

PiperOrigin-RevId: 177064985
---
 tensorflow/python/client/session_test.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index 6b45a5f313..3e85410a97 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -1583,7 +1583,6 @@ class SessionTest(test_util.TensorFlowTestCase):
         sess.run(enqueue_op)
       self.assertEqual(sess.run(q.size()), num_epochs * 2)
 
-  @test_util.disable_c_api  # set_device does not work with C API
   def testRegisterFetchAndFeedConversionFunctions(self):
     class SquaredTensor(object):
       def __init__(self, tensor):
@@ -1733,11 +1732,9 @@ class SessionTest(test_util.TensorFlowTestCase):
       result = sess.run(f)
       self.assertEqual(result, 2.0)
 
-  @test_util.disable_c_api  # functions don't work with C API
   def testAddFunctionToSession(self):
     self.runTestAddFunctionToSession()
 
-  @test_util.disable_c_api  # functions don't work with C API
   def testAddFunctionToGrpcSession(self):
     server = server_lib.Server.create_local_server()
     self.runTestAddFunctionToSession(server.target)
-- 
GitLab


From 6cc7e387fc1b642d363b6a18877a411382a82fa5 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 27 Nov 2017 14:06:23 -0800
Subject: [PATCH 0315/1225] [TF:XLA] Implement StatelessRandomUniform and
 StatelessRandomNormal using the ThreeFry counter-based PRNG.

Extend stateless ops to allow 32-bit integer seeds, with a 64-bit default.

PiperOrigin-RevId: 177068747
---
 tensorflow/compiler/tests/BUILD               |  13 +
 .../tests/stateless_random_ops_test.py        | 118 ++++++++
 tensorflow/compiler/tf2xla/kernels/BUILD      |   2 +
 .../tf2xla/kernels/stateless_random_ops.cc    | 279 ++++++++++++++++++
 .../compiler/xla/service/user_computation.cc  |   8 +
 .../kernel_tests/stateless_random_ops_test.py |  21 +-
 .../core/kernels/stateless_random_ops.cc      |  15 +-
 tensorflow/core/ops/stateless_random_ops.cc   |   3 +-
 8 files changed, 445 insertions(+), 14 deletions(-)
 create mode 100644 tensorflow/compiler/tests/stateless_random_ops_test.py
 create mode 100644 tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 79c4befd36..6cad2b0824 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -457,6 +457,19 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "stateless_random_ops_test",
+    size = "small",
+    srcs = ["stateless_random_ops_test.py"],
+    tags = ["optonly"],
+    deps = [
+        ":xla_test",
+        "//tensorflow/contrib/stateless",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 tf_xla_py_test(
     name = "tensor_array_ops_test",
     size = "small",
diff --git a/tensorflow/compiler/tests/stateless_random_ops_test.py b/tensorflow/compiler/tests/stateless_random_ops_test.py
new file mode 100644
index 0000000000..4336ebdbd1
--- /dev/null
+++ b/tensorflow/compiler/tests/stateless_random_ops_test.py
@@ -0,0 +1,118 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for stateless random-number generation ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import numpy as np
+
+from tensorflow.compiler.tests.xla_test import XLATestCase
+from tensorflow.contrib import stateless
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class StatelessRandomOpsTest(XLATestCase):
+  """Test cases for stateless random-number generator operators."""
+
+  def _random_types(self):
+    return [dtypes.float32]
+
+  def testDeterminism(self):
+    # Stateless values should be equal iff the seeds are equal (roughly)
+    with self.test_session(), self.test_scope():
+      seed_t = array_ops.placeholder(dtypes.int32, shape=[2])
+      seeds = [(x, y) for x in range(5) for y in range(5)] * 3
+      for stateless_op in [
+          stateless.stateless_random_uniform, stateless.stateless_random_normal
+      ]:
+        for shape in (), (3,), (2, 5):
+          for dtype in self._random_types():
+            pure = stateless_op(shape, seed=seed_t, dtype=dtype)
+            values = [(seed, pure.eval(feed_dict={
+                seed_t: seed
+            })) for seed in seeds]
+            for s0, v0 in values:
+              for s1, v1 in values:
+                self.assertEqual(s0 == s1, np.all(v0 == v1))
+
+  def testRandomUniformIsInRange(self):
+    with self.test_session() as sess, self.test_scope():
+      for dtype in self._random_types():
+        seed_t = array_ops.placeholder(dtypes.int32, shape=[2])
+        x = stateless.stateless_random_uniform(
+            shape=[1000], seed=seed_t, dtype=dtype)
+        y = sess.run(x, {seed_t: [0x12345678, 0xabcdef12]})
+        self.assertTrue(np.all(y >= 0))
+        self.assertTrue(np.all(y < 1))
+
+  def _chi_squared(self, x, bins):
+    """Pearson's Chi-squared test."""
+    x = np.ravel(x)
+    n = len(x)
+    histogram, _ = np.histogram(x, bins=bins, range=(0, 1))
+    expected = n / float(bins)
+    return np.sum(np.square(histogram - expected) / expected)
+
+  def testDistributionOfStatelessRandomUniform(self):
+    """Use Pearson's Chi-squared test to test for uniformity."""
+    with self.test_session() as sess, self.test_scope():
+      for dtype in self._random_types():
+        seed_t = array_ops.placeholder(dtypes.int32, shape=[2])
+        n = 1000
+        x = stateless.stateless_random_uniform(
+            shape=[n], seed=seed_t, dtype=dtype)
+        y = sess.run(x, {seed_t: [565656, 121212]})
+        # Tests that the values are distributed amongst 10 bins with equal
+        # probability. 16.92 is the Chi^2 value for 9 degrees of freedom with
+        # p=0.05. This test is probabilistic and would be flaky if the random
+        # seed were not fixed.
+        self.assertTrue(self._chi_squared(y, 10) < 16.92)
+
+  def _normal_cdf(self, x):
+    """Cumulative distribution function for a standard normal distribution."""
+    return 0.5 + 0.5 * np.vectorize(math.erf)(x / math.sqrt(2))
+
+  def _anderson_darling(self, x):
+    """Anderson-Darling test for a standard normal distribution."""
+    x = np.sort(np.ravel(x))
+    n = len(x)
+    i = np.linspace(1, n, n)
+    z = np.sum((2 * i - 1) * np.log(self._normal_cdf(x)) +
+               (2 * (n - i) + 1) * np.log(1 - self._normal_cdf(x)))
+    return -n - z / n
+
+  def testDistributionOfStatelessRandomNormal(self):
+    """Use Anderson-Darling test to test distribution appears normal."""
+    with self.test_session() as sess, self.test_scope():
+      for dtype in self._random_types():
+        seed_t = array_ops.placeholder(dtypes.int32, shape=[2])
+        n = 1000
+        x = stateless.stateless_random_normal(
+            shape=[n], seed=seed_t, dtype=dtype)
+        y = sess.run(x, {seed_t: [25252, 314159]})
+        # The constant 2.492 is the 5% critical value for the Anderson-Darling
+        # test where the mean and variance are known. This test is probabilistic
+        # so to avoid flakiness the seed is fixed.
+        self.assertTrue(self._anderson_darling(y) < 2.492)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 948d7f0b40..6302fece1f 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -65,6 +65,7 @@ tf_kernel_library(
         "spacetodepth_op.cc",
         "split_op.cc",
         "stack_ops.cc",
+        "stateless_random_ops.cc",
         "strided_slice_op.cc",
         "tensor_array_ops.cc",
         "tile_ops.cc",
@@ -96,6 +97,7 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:linalg_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:stateless_random_ops_op_lib",
         "//tensorflow/core/kernels:bounds_check",
         "//tensorflow/core/kernels:concat_lib",
         "//tensorflow/core/kernels:constant_op",
diff --git a/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc b/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc
new file mode 100644
index 0000000000..b10880de77
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc
@@ -0,0 +1,279 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cmath>
+
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/casts.h"
+#include "tensorflow/core/lib/math/math_util.h"
+
+namespace tensorflow {
+namespace {
+
+// Rotates a 32-bit integer 'v' left by 'distance' bits.
+xla::ComputationDataHandle RotateLeftS32(xla::ComputationBuilder* builder,
+                                         const xla::ComputationDataHandle& v,
+                                         int distance) {
+  return builder->Or(
+      builder->ShiftLeft(v, builder->ConstantR0<int>(distance)),
+      builder->ShiftRightLogical(v, builder->ConstantR0<int>(32 - distance)));
+}
+
+// TODO(b/65209188): add a primitive XOR to XLA and call it here, rather than
+// building XOR out of other bitwise operators.
+xla::ComputationDataHandle BitwiseXor(xla::ComputationBuilder* builder,
+                                      const xla::ComputationDataHandle& x,
+                                      const xla::ComputationDataHandle& y) {
+  return builder->Or(builder->And(x, builder->Not(y)),
+                     builder->And(builder->Not(x), y));
+}
+
+using ThreeFry2x32State = std::array<xla::ComputationDataHandle, 2>;
+
+// Implements the ThreeFry counter-based PRNG algorithm.
+// Salmon et al. SC 2011. Parallel random numbers: as easy as 1, 2, 3.
+// http://www.thesalmons.org/john/random123/papers/random123sc11.pdf
+ThreeFry2x32State ThreeFry2x32(xla::ComputationBuilder* builder,
+                               ThreeFry2x32State input, ThreeFry2x32State key) {
+  // Rotation distances specified by the Threefry2x32 algorithm.
+  constexpr std::array<int, 8> rotations = {13, 15, 26, 6, 17, 29, 16, 24};
+  ThreeFry2x32State x;
+
+  std::array<xla::ComputationDataHandle, 3> ks;
+  // 0x1BD11BDA is a parity constant specified by the ThreeFry2x32 algorithm.
+  ks[2] = builder->ConstantR0<int32>(0x1BD11BDA);
+  for (int i = 0; i < 2; ++i) {
+    ks[i] = key[i];
+    x[i] = input[i];
+    ks[2] = BitwiseXor(builder, ks[2], key[i]);
+  }
+
+  x[0] = builder->Add(x[0], ks[0]);
+  x[1] = builder->Add(x[1], ks[1]);
+
+  // Performs a single round of the Threefry2x32 algorithm, with a rotation
+  // amount 'rotation'.
+  auto round = [builder](ThreeFry2x32State v, int rotation) {
+    v[0] = builder->Add(v[0], v[1]);
+    v[1] = RotateLeftS32(builder, v[1], rotation);
+    v[1] = BitwiseXor(builder, v[0], v[1]);
+    return v;
+  };
+
+  // There are no known statistical flaws with 13 rounds of Threefry2x32.
+  // We are conservative and use 20 rounds.
+  x = round(x, rotations[0]);
+  x = round(x, rotations[1]);
+  x = round(x, rotations[2]);
+  x = round(x, rotations[3]);
+  x[0] = builder->Add(x[0], ks[1]);
+  x[1] = builder->Add(builder->Add(x[1], ks[2]), builder->ConstantR0<int32>(1));
+
+  x = round(x, rotations[4]);
+  x = round(x, rotations[5]);
+  x = round(x, rotations[6]);
+  x = round(x, rotations[7]);
+  x[0] = builder->Add(x[0], ks[2]);
+  x[1] = builder->Add(builder->Add(x[1], ks[0]), builder->ConstantR0<int32>(2));
+
+  x = round(x, rotations[0]);
+  x = round(x, rotations[1]);
+  x = round(x, rotations[2]);
+  x = round(x, rotations[3]);
+  x[0] = builder->Add(x[0], ks[0]);
+  x[1] = builder->Add(builder->Add(x[1], ks[1]), builder->ConstantR0<int32>(3));
+
+  x = round(x, rotations[4]);
+  x = round(x, rotations[5]);
+  x = round(x, rotations[6]);
+  x = round(x, rotations[7]);
+  x[0] = builder->Add(x[0], ks[1]);
+  x[1] = builder->Add(builder->Add(x[1], ks[2]), builder->ConstantR0<int32>(4));
+
+  x = round(x, rotations[0]);
+  x = round(x, rotations[1]);
+  x = round(x, rotations[2]);
+  x = round(x, rotations[3]);
+  x[0] = builder->Add(x[0], ks[2]);
+  x[1] = builder->Add(builder->Add(x[1], ks[0]), builder->ConstantR0<int32>(5));
+
+  return x;
+}
+
+// Returns a tensor of 'shape' random values uniformly distributed in the range
+// [minval, maxval)
+xla::ComputationDataHandle RandomUniform(xla::ComputationBuilder* builder,
+                                         const xla::ComputationDataHandle& seed,
+                                         const TensorShape& shape,
+                                         double minval, double maxval) {
+  // Split the seed into two 32-bit scalars to form a key.
+  auto seed0 = builder->Reshape(builder->Slice(seed, {0}, {1}, {1}), {});
+  auto seed1 = builder->Reshape(builder->Slice(seed, {1}, {2}, {1}), {});
+  ThreeFry2x32State key = {seed0, seed1};
+  const int64 size = shape.num_elements();
+
+  const int64 half_size = MathUtil::CeilOfRatio<int64>(size, 2);
+  const bool size_is_odd = (half_size * 2 != size);
+
+  // Fill the generator inputs with unique counter values.
+  ThreeFry2x32State inputs;
+  TF_CHECK_OK(XlaHelpers::Iota(builder, DT_INT32, half_size, &inputs[0]));
+  inputs[1] = builder->Add(inputs[0], builder->ConstantR0<int32>(half_size));
+  ThreeFry2x32State outputs = ThreeFry2x32(builder, inputs, key);
+
+  if (size_is_odd) {
+    outputs[1] = builder->Slice(outputs[1], {0}, {half_size - 1}, {1});
+  }
+
+  auto bits =
+      builder->Reshape(builder->ConcatInDim(outputs, 0), shape.dim_sizes());
+
+  // Form 22 random mantissa bits, with a leading 1 bit. The leading 1 bit
+  // forces the random bits into the mantissa.
+  constexpr int kFloatBits = 32;
+  constexpr int kMantissaBits = 23;
+  bits = builder->Or(
+      builder->ShiftRightLogical(
+          bits, builder->ConstantR0<int32>(kFloatBits - kMantissaBits)),
+      builder->ConstantR0<int32>(bit_cast<int32>(1.0f)));
+  auto floats = builder->BitcastConvertType(bits, xla::F32);
+
+  // We have a floating point number in the range [1.0, 2.0).
+  // Subtract 1.0f to shift to the range [0.0, 1.0)
+  floats = builder->Sub(floats, builder->ConstantR0<float>(1.0f));
+  // Multiply and add to shift to the range [minval, maxval).
+  floats = builder->Mul(floats, builder->ConstantR0<float>(maxval - minval));
+  floats = builder->Add(floats, builder->ConstantR0<float>(minval));
+  return floats;
+}
+
+// Approximation for the inverse error function from
+//   Giles, M., "Approximating the erfinv function".
+// The approximation has the form:
+//   w = -log((1 - x) * (1 + x))
+//   if ( w < 5 ) {
+//     w = w - 2.5
+//     p = sum_{i=1}^n lq[i]*w^i
+//   } else {
+//     w = sqrt(w) - 3
+//     p = sum_{i=1}^n gq[i]*w^i
+//   }
+//   return p*x
+xla::ComputationDataHandle ErfInvF32(xla::ComputationBuilder* b,
+                                     const xla::ComputationDataHandle& x,
+                                     const TensorShape& shape) {
+  constexpr int kDegree = 9;
+  constexpr std::array<float, 9> w_less_than_5_constants = {
+      2.81022636e-08f,  3.43273939e-07f, -3.5233877e-06f,
+      -4.39150654e-06f, 0.00021858087f,  -0.00125372503f,
+      -0.00417768164f,  0.246640727f,    1.50140941f};
+  constexpr std::array<float, 9> w_greater_than_5_constants = {
+      -0.000200214257f, 0.000100950558f, 0.00134934322f,
+      -0.00367342844f,  0.00573950773f,  -0.0076224613f,
+      0.00943887047f,   1.00167406f,     2.83297682f};
+
+  auto one = b->ConstantR0<float>(1.0);
+  auto w = b->Neg(b->Log(b->Mul(b->Sub(one, x), b->Add(one, x))));
+
+  auto lt = b->Lt(w, b->ConstantR0<float>(5.0));
+  auto coefficient = [&](int i) {
+    return b->Select(
+        lt,
+        b->Broadcast(b->ConstantR0<float>(w_less_than_5_constants[i]),
+                     shape.dim_sizes()),
+        b->Broadcast(b->ConstantR0<float>(w_greater_than_5_constants[i]),
+                     shape.dim_sizes()));
+  };
+  w = b->Select(lt, b->Sub(w, b->ConstantR0<float>(2.5f)),
+                b->Sub(b->SqrtF32(w), b->ConstantR0<float>(3.0f)));
+  auto p = coefficient(0);
+  for (int i = 1; i < kDegree; ++i) {
+    p = b->Add(coefficient(i), b->Mul(p, w));
+  }
+  return b->Mul(p, x);
+}
+
+}  // namespace
+
+class StatelessRandomUniformOp : public XlaOpKernel {
+ public:
+  explicit StatelessRandomUniformOp(OpKernelConstruction* ctx)
+      : XlaOpKernel(ctx) {}
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::ComputationBuilder* builder = ctx->builder();
+
+    TensorShape shape;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &shape));
+
+    TensorShape seed_shape = ctx->InputShape(1);
+    OP_REQUIRES(ctx, seed_shape.dims() == 1 && seed_shape.dim_size(0) == 2,
+                errors::InvalidArgument("seed must have shape [2], not ",
+                                        seed_shape.DebugString()));
+    xla::ComputationDataHandle seed = ctx->Input(1);
+    ctx->SetOutput(0, RandomUniform(builder, seed, shape, 0.0, 1.0));
+  }
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(StatelessRandomUniformOp);
+};
+
+// TODO(phawkins): generalize to non-float, non-int32 seed types.
+REGISTER_XLA_OP(Name("StatelessRandomUniform")
+                    .TypeConstraint("dtype", DT_FLOAT)
+                    .TypeConstraint("Tseed", DT_INT32),
+                StatelessRandomUniformOp);
+
+class StatelessRandomNormalOp : public XlaOpKernel {
+ public:
+  explicit StatelessRandomNormalOp(OpKernelConstruction* ctx)
+      : XlaOpKernel(ctx) {}
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    TensorShape shape;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &shape));
+
+    TensorShape seed_shape = ctx->InputShape(1);
+    OP_REQUIRES(ctx, seed_shape == TensorShape({2}),
+                errors::InvalidArgument("seed must have shape [2], not ",
+                                        seed_shape.DebugString()));
+    xla::ComputationDataHandle seed = ctx->Input(1);
+    xla::ComputationBuilder* builder = ctx->builder();
+    auto uniform = RandomUniform(builder, seed, shape, -1.0, 1.0);
+    // Convert uniform distribution to normal distribution by computing
+    // sqrt(2) * erfinv(x)
+    auto normal = builder->Mul(builder->ConstantR0<float>(std::sqrt(2.0)),
+                               ErfInvF32(builder, uniform, shape));
+    ctx->SetOutput(0, normal);
+  }
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(StatelessRandomNormalOp);
+};
+
+// TODO(phawkins): generalize to non-float, non-int32 seed types.
+REGISTER_XLA_OP(Name("StatelessRandomNormal")
+                    .TypeConstraint("dtype", DT_FLOAT)
+                    .TypeConstraint("Tseed", DT_INT32),
+                StatelessRandomNormalOp);
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index b0b15bb571..4e90491b55 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -1739,6 +1739,14 @@ void PureFunctionalVisitor(const SessionComputation& session_computation,
       break;
     }
 
+    case OpRequest::kBitcastConvertRequest: {
+      const ConvertRequest& convert_request =
+          request.request().bitcast_convert_request();
+      PureFunctionalVisitor(session_computation, convert_request.operand(),
+                            num_parameters, visited, is_functional);
+      break;
+    }
+
     case OpRequest::kWhileRequest: {
       const WhileRequest& while_request = request.request().while_request();
       PureFunctionalVisitor(session_computation, while_request.init(),
diff --git a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
index cd4d46aa07..bea6341cfd 100644
--- a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
+++ b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
@@ -69,16 +69,17 @@ class StatelessOpsTest(test.TestCase):
   def testDeterminism(self):
     # Stateless values should be equal iff the seeds are equal (roughly)
     with self.test_session(use_gpu=True):
-      seed_t = array_ops.placeholder(dtypes.int64, shape=[2])
-      seeds = [(x, y) for x in range(5) for y in range(5)] * 3
-      for stateless_op, _ in CASES:
-        for shape in (), (3,), (2, 5):
-          pure = stateless_op(shape, seed=seed_t)
-          values = [(seed, pure.eval(feed_dict={seed_t: seed}))
-                    for seed in seeds]
-          for s0, v0 in values:
-            for s1, v1 in values:
-              self.assertEqual(s0 == s1, np.all(v0 == v1))
+      for seed_type in [dtypes.int32, dtypes.int64]:
+        seed_t = array_ops.placeholder(seed_type, shape=[2])
+        seeds = [(x, y) for x in range(5) for y in range(5)] * 3
+        for stateless_op, _ in CASES:
+          for shape in (), (3,), (2, 5):
+            pure = stateless_op(shape, seed=seed_t)
+            values = [(seed, pure.eval(feed_dict={seed_t: seed}))
+                      for seed in seeds]
+            for s0, v0 in values:
+              for s1, v1 in values:
+                self.assertEqual(s0 == s1, np.all(v0 == v1))
 
   def testShapeType(self):
     with self.test_session(use_gpu=True):
diff --git a/tensorflow/core/kernels/stateless_random_ops.cc b/tensorflow/core/kernels/stateless_random_ops.cc
index f6fb0a121d..88fcf542fb 100644
--- a/tensorflow/core/kernels/stateless_random_ops.cc
+++ b/tensorflow/core/kernels/stateless_random_ops.cc
@@ -50,9 +50,18 @@ class StatelessRandomOpBase : public OpKernel {
     if (shape.num_elements() == 0) return;
 
     // Grab the two seeds
-    const auto seed = seed_t.flat<int64>();
-    const uint64 seed0 = internal::SubtleMustCopy(seed(0));
-    const uint64 seed1 = internal::SubtleMustCopy(seed(1));
+    uint64 seed0;
+    uint64 seed1;
+    if (context->input_dtype(1) == DT_INT32) {
+      const auto seed = seed_t.flat<int32>();
+      seed0 = internal::SubtleMustCopy(seed(0));
+      seed1 = internal::SubtleMustCopy(seed(1));
+    } else {
+      CHECK_EQ(DT_INT64, context->input_dtype(1));
+      const auto seed = seed_t.flat<int64>();
+      seed0 = internal::SubtleMustCopy(seed(0));
+      seed1 = internal::SubtleMustCopy(seed(1));
+    }
 
     // Scramble the seeds so that the user doesn't need to worry about which
     // part of the seed needs to be strong.
diff --git a/tensorflow/core/ops/stateless_random_ops.cc b/tensorflow/core/ops/stateless_random_ops.cc
index 7c00fdb99f..3e1f8781fc 100644
--- a/tensorflow/core/ops/stateless_random_ops.cc
+++ b/tensorflow/core/ops/stateless_random_ops.cc
@@ -38,10 +38,11 @@ static Status StatelessShape(shape_inference::InferenceContext* context) {
 #define REGISTER_STATELESS_OP(name)                  \
   REGISTER_OP(name)                                  \
       .Input("shape: T")                             \
-      .Input("seed: int64")                          \
+      .Input("seed: Tseed")                          \
       .Output("output: dtype")                       \
       .Attr("dtype: {half,float,double} = DT_FLOAT") \
       .Attr("T: {int32, int64} = DT_INT32")          \
+      .Attr("Tseed: {int32, int64} = DT_INT64")      \
       .SetShapeFn(StatelessShape)
 
 // This op is exposed through contrib/stateless only.  The interface may change.
-- 
GitLab


From c159fbe82abb7817b9e556c3607af2efe30206da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Csomor?=
 <laszlocsomor@users.noreply.github.com>
Date: Mon, 27 Nov 2017 23:40:30 +0100
Subject: [PATCH 0316/1225] TFLite: get closer to build with Bazel on Windows
 (#14791)

Bazel cannot yet build TensorFlow Lite on Windows,
but this commit gets us closer.

In this commit:
- make the -Wno-implicit-fallthrough compiler flag
  in flatbuffers' BUILD file be conditional to
  non-Windows builds, because MSVC doesn't know
  this flag
- fix the Bazel build command in README.md by
  removing single quotes around --cxxflags,
  because it's not needed on Bash and is harmful
  on Windows (because cmd.exe doesn't remove the
  single quotes)
- fix non-ASCII quotes and apostrophes, as well as
  some formatting issues in README.md

See https://github.com/bazelbuild/bazel/issues/4148
---
 tensorflow/contrib/lite/README.md         | 47 +++++++++++++----------
 third_party/flatbuffers/flatbuffers.BUILD |  7 +++-
 2 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index 3665a63097..c7464bcc9d 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -1,5 +1,5 @@
 # TensorFlow Lite
-TensorFlow Lite is TensorFlow’s lightweight solution for mobile and embedded devices. It enables low-latency inference of on-device machine learning models with a small binary size and fast performance supporting hardware acceleration.
+TensorFlow Lite is TensorFlow's lightweight solution for mobile and embedded devices. It enables low-latency inference of on-device machine learning models with a small binary size and fast performance supporting hardware acceleration.
 
 TensorFlow Lite uses many techniques for achieving low latency like optimizing the kernels for specific mobile apps, pre-fused activations, quantized kernels that allow smaller and faster (fixed-point math) models, and in the future, leverage specialized machine learning hardware to get the best possible performance for a particular model on a particular device.
 
@@ -20,18 +20,18 @@ In the demo app, inference is done using the TensorFlow Lite Java API. The demo
 The  fastest path to trying the demo, is to download the pre-built binary
 [TfLiteCameraDemo.apk](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk)
 
-Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera’s field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified.
+Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera's field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified.
 
 ## Building in Android Studio using TensorFlow Lite AAR from JCenter
 The simplest way to compile the demo app, and try out changes to the project code is to use AndroidStudio.
 
  - Install the latest version of Android Studio 3 as specified [here](https://developer.android.com/studio/index.html).
  - Make sure the Android SDK version is greater than 26 and NDK version is greater than 14 (in the Android Studio Settings).
- - Import the tensorflow/contrib/lite/java/demo directory as a new Android Studio project.
+ - Import the `tensorflow/contrib/lite/java/demo` directory as a new Android Studio project.
  - Click through installing all the Gradle extensions it requests.
  - Download the quantized Mobilenet TensorFlow Lite model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip)
      - unzip and copy mobilenet_quant_v1_224.tflite to the assets directory:
-       tensorflow/contrib/lite/java/demo/app/src/main/assets/
+       `tensorflow/contrib/lite/java/demo/app/src/main/assets/`
  - Build and run the demo app
 
 ## Building TensorFlow Lite and the demo app from source
@@ -43,7 +43,7 @@ The simplest way to compile the demo app, and try out changes to the project cod
 ### Install Bazel
 If bazel is not installed on your system, install it now by following [these directions](https://bazel.build/versions/master/docs/install.html)
 
-NOTE: Bazel does not currently support building for Android on Windows. Full support for gradle/cmake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) instead.
+NOTE: Bazel does not fully support building Android on Windows yet. Full support for Gradle/CMake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) instead.
 
 ### Install Android NDK and SDK
 Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and SDK must be installed on your system.
@@ -53,25 +53,30 @@ Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and
  - In the root of the TensorFlow repository update the `WORKSPACE` file with the `api_level` and location of the SDK and NDK. If you installed it with AndroidStudio the SDK path can be found in the SDK manager, and the default NDK path is:`{SDK path}/ndk-bundle.`
 
 ```
- Android_sdk_repository (
-   name = "androidsdk",
-   api_level = 23,
-   build_tools_version = "23.0.2",
-   path = "/home/xxxx/android-sdk-linux/", )
+android_sdk_repository (
+    name = "androidsdk",
+    api_level = 23,
+    build_tools_version = "23.0.2",
+    path = "/home/xxxx/android-sdk-linux/",
+)
 
 android_ndk_repository(
-  name="androidndk",
-  path="/home/xxxx/android-ndk-r10e/",
-  api_level=19)
-
+    name = "androidndk",
+    path = "/home/xxxx/android-ndk-r10e/",
+    api_level = 19,
+)
 ```
+
 Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md).
 
 ### Build the  source code
 Run bazel with the following command to build the demo.
 
 Build the demo app:
-bazel build --cxxopt='--std=c++11' //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo
+
+```
+bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo
+```
 
 ### Note
 
@@ -105,7 +110,7 @@ The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tenso
 
 
 ### Train a custom model
-A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow’s Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model.
+A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow's Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model.
 
 TensorFlow Lite currently supports a subset of TensorFlow operators. Please refer to [this document](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for details of supported operators and their usage. This
 set will continue to expand in future releases of Tensorflow Lite.
@@ -129,7 +134,7 @@ Since we employ several formats, the following definitions may be useful:
  - TensorFlow lite model (.lite) - a serialized flatbuffer, containing TensorFlow lite operators and Tensors for the TensorFlow lite interpreter. This is most analogous to TensorFlow frozen GraphDefs.
 
 ### Freeze Graph
-To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as “freezing” the graph.
+To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as "freezing" the graph.
 
 The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md)).
 
@@ -156,7 +161,7 @@ Here is a sample command line to convert the frozen Graphdef to '.lite' format f
 bazel build tensorflow/contrib/lite/toco:toco
 
 bazel-bin/tensorflow/contrib/lite/toco/toco -- \
-  --input_file=(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \
+  --input_file=$(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \
   --input_format=TENSORFLOW_GRAPHDEF  --output_format=TFLITE \
   --output_file=/tmp/mobilenet_v1_1.0_224.lite --inference_type=FLOAT \
   --input_type=FLOAT --input_arrays=input \
@@ -184,7 +189,7 @@ with tf.Session() as sess:
 ```
 For detailed instructions on how to use the Tensorflow Optimizing Converter, please see [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md).
 
-You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for troubleshooting help. If that doesn’t help, please file an [issue](https://github.com/tensorflow/tensorflow/issues).
+You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for troubleshooting help. If that doesn't help, please file an [issue](https://github.com/tensorflow/tensorflow/issues).
 
 ## Step 3. Use the TensorFlow Lite model for inference in a mobile app
 
@@ -193,9 +198,9 @@ After completion of Step 2 the developer should have a .lite model.
 ### For Android
 Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app).
 
-The [demo app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app)  uses this interface, so it’s a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk).
+The [demo app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app)  uses this interface, so it's a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk).
 
-Note that you’d need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build).
+Note that you'd need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build).
 
 ### For iOS
 Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app.
diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/flatbuffers.BUILD
index e1563103c8..0a76adcf91 100644
--- a/third_party/flatbuffers/flatbuffers.BUILD
+++ b/third_party/flatbuffers/flatbuffers.BUILD
@@ -6,8 +6,11 @@ licenses(["notice"])  # Apache 2.0
 
 FLATBUFFERS_COPTS = [
     "-fexceptions",
-    "-Wno-implicit-fallthrough",
-]
+] + select({
+    "@bazel_tools//src:windows": [],
+    "@bazel_tools//src:windows_msvc": [],
+    "//conditions:default": ["-Wno-implicit-fallthrough"],
+})
 
 # Public flatc library to compile flatbuffer files at runtime.
 cc_library(
-- 
GitLab


From 701faa76614021c50975c6e24d0e63dbcb769935 Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Mon, 27 Nov 2017 15:01:16 -0800
Subject: [PATCH 0317/1225] [XLA] Canonicalize convolutions which are
 potentially lowered to Eigen calls

The canonicalization rules needs to map to what is supported by the Eigen runtime helpers.

PiperOrigin-RevId: 177076955
---
 .../xla/service/cpu/conv_canonicalization.cc  |  6 ++---
 .../xla/service/cpu/ir_emission_utils.cc      | 23 ++++++++++---------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
index 80760356e3..a3dd13811c 100644
--- a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
+++ b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
@@ -47,10 +47,8 @@ StatusOr<bool> ConvCanonicalization::Run(HloModule* module) {
       // A canonical convolution's dimension numbers need to satisfy the
       // following conditions (see cs/PotentiallyImplementedAsEigenConvolution).
       //
-      // - the input is in NHWC or NWHC order.
-      // - the kernel is in HWIO or WHIO order.
-      // - the spatial dimensions are in the same relative order in the input,
-      //   kernel and output.
+      // - the input is in NHWC order.
+      // - the kernel is in HWIO order.
       //
       // For simplicity, as a first step, we reshape the input and filter to
       // NHWC and HWIO order, respectively. This may lose precision but won't
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
index cb5cb8a6dd..d2e7f830d1 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
@@ -29,10 +29,8 @@ bool PotentiallyImplementedAsEigenConvolution(
   // The following conditions are necessary (but not sufficient) for
   // implementing `convolution` with Eigen convolution:
   // - the input and kernel have a non-zero number of elements.
-  // - the input is in NHWC or NWHC order.
-  // - the kernel is in HWIO or WHIO order.
-  // - the spatial dimensions are in the same relative order in the input,
-  //   kernel and output.
+  // - the input is in NHWC order.
+  // - the kernel is in HWIO order.
   //
   // To be sufficient, certain layout constraints need to be satisfied as well.
   const Shape& input_shape = convolution.operand(0)->shape();
@@ -51,15 +49,19 @@ bool PotentiallyImplementedAsEigenConvolution(
       convolution.convolution_dimension_numbers();
   // Only 1D and 2D convolutions are supported at the moment.
   // TODO(b/32897908): add an optimized implementation for 3D convolution.
-  if (dnums.spatial_dimensions_size() > 2) {
+  const int64 num_spatial_dims = dnums.spatial_dimensions_size();
+  if (num_spatial_dims > 2) {
     return false;
   }
 
-  bool input_spatial_dims_ascending = std::is_sorted(
-      dnums.spatial_dimensions().begin(), dnums.spatial_dimensions().end());
-  bool kernel_spatial_dims_ascending =
-      std::is_sorted(dnums.kernel_spatial_dimensions().begin(),
-                     dnums.kernel_spatial_dimensions().end());
+  for (int64 i = 0; i < num_spatial_dims; ++i) {
+    if (dnums.spatial_dimensions(i) != i + 1) {
+      return false;
+    }
+    if (dnums.kernel_spatial_dimensions(i) != i) {
+      return false;
+    }
+  }
 
   const Shape& output_shape = convolution.shape();
   return dnums.input_batch_dimension() == 0 &&
@@ -67,7 +69,6 @@ bool PotentiallyImplementedAsEigenConvolution(
          dnums.output_batch_dimension() == 0 &&
          dnums.output_feature_dimension() ==
              output_shape.dimensions_size() - 1 &&
-         input_spatial_dims_ascending == kernel_spatial_dims_ascending &&
          dnums.kernel_input_feature_dimension() ==
              kernel_shape.dimensions_size() - 2 &&
          dnums.kernel_output_feature_dimension() ==
-- 
GitLab


From 20895ffc3d7049cb80e188d78402d13ca5591996 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 15:13:00 -0800
Subject: [PATCH 0318/1225] Modify static bool variable in OpRegistry::Lookup()
 while mutex is locked.

PiperOrigin-RevId: 177078725
---
 tensorflow/core/framework/op.cc | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/framework/op.cc b/tensorflow/core/framework/op.cc
index 4f5a1f80a0..fadb60d744 100644
--- a/tensorflow/core/framework/op.cc
+++ b/tensorflow/core/framework/op.cc
@@ -63,26 +63,32 @@ Status OpRegistry::LookUp(const string& op_type_name,
   const OpRegistrationData* res = nullptr;
 
   bool first_call = false;
+  bool first_unregistered = false;
   {  // Scope for lock.
     mutex_lock lock(mu_);
     first_call = MustCallDeferred();
     res = gtl::FindWithDefault(registry_, op_type_name, nullptr);
+
+    static bool unregistered_before = false;
+    first_unregistered = !unregistered_before && (res == nullptr);
+    if (first_unregistered) {
+      unregistered_before = true;
+    }
     // Note: Can't hold mu_ while calling Export() below.
   }
   if (first_call) {
     TF_QCHECK_OK(ValidateKernelRegistrations(*this));
   }
   if (res == nullptr) {
-    static bool first_unregistered = true;
     if (first_unregistered) {
       OpList op_list;
       Export(true, &op_list);
       if (VLOG_IS_ON(3)) {
-         LOG(INFO) << "All registered Ops:";
-         for (const auto& op : op_list.op())
-            LOG(INFO) << SummarizeOpDef(op);
+        LOG(INFO) << "All registered Ops:";
+        for (const auto& op : op_list.op()) {
+          LOG(INFO) << SummarizeOpDef(op);
+        }
       }
-      first_unregistered = false;
     }
     Status status =
         errors::NotFound("Op type not registered '", op_type_name,
-- 
GitLab


From 78d3ece27c08e0cf74ad02965960fed461a2951d Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 27 Nov 2017 15:16:38 -0800
Subject: [PATCH 0319/1225] Don't try to feed placeholder with default. Instead
 we rely on the default value.

PiperOrigin-RevId: 177079220
---
 tensorflow/core/grappler/grappler_item_builder.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index 3f6183b6f1..36c7f92c49 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -297,7 +297,7 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
   }
 
   for (auto& node : *new_item->graph.mutable_node()) {
-    if (IsPlaceholder(node)) {
+    if (IsPlaceholder(node) && node.op() != "PlaceholderWithDefault") {
       if (node.attr().count("dtype") == 0) {
         LOG(ERROR) << "Unknown type for placeholder " << node.name()
                    << ", skipping this input";
-- 
GitLab


From 2be93d0d543591ebee31bcddfa4b9c6c53e5c793 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 16:03:16 -0800
Subject: [PATCH 0320/1225] Fixes punctuation in tf.nn.moments comment

PiperOrigin-RevId: 177085447
---
 tensorflow/python/ops/nn_impl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 654eb1c118..00e3c7dc0f 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -638,7 +638,7 @@ def moments(x, axes,
   across `axes`.  If `x` is 1-D and `axes = [0]` this is just the mean
   and variance of a vector.
 
-  Note: shift is currently not used, the true mean is computed and used.
+  Note: shift is currently not used; the true mean is computed and used.
 
   When using these moments for batch normalization (see
   `tf.nn.batch_normalization`):
-- 
GitLab


From c17459a0acb5044fa415d11221a45bea619aa349 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 16:12:26 -0800
Subject: [PATCH 0321/1225] [tfgan] Add option to pass MODE to generator_fn,
 for the purpose of things like prediction.

PiperOrigin-RevId: 177086828
---
 .../estimator/python/gan_estimator_impl.py    | 33 +++++++++++++++----
 .../estimator/python/gan_estimator_test.py    |  4 +--
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index 0824ecf616..058dc1d1f8 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
 import enum
 
 from tensorflow.contrib.framework.python.ops import variables as variable_lib
@@ -29,6 +30,7 @@ from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.util import tf_inspect as inspect
 
 
 __all__ = [
@@ -116,7 +118,10 @@ class GANEstimator(estimator.Estimator):
         to continue training a previously saved model.
       generator_fn: A python function that takes a Tensor, Tensor list, or
         Tensor dictionary as inputs and returns the outputs of the GAN
-        generator. See `TFGAN` for more details and examples.
+        generator. See `TFGAN` for more details and examples. Additionally, if
+        it has an argument called `mode`, the Estimator's `mode` will be passed
+        in (ex TRAIN, EVAL, PREDICT). This is useful for things like batch
+        normalization.
       discriminator_fn: A python function that takes the output of
         `generator_fn` or real data in the GAN setup, and `generator_inputs`.
         Outputs a Tensor in the range [-inf, inf]. See `TFGAN` for more details
@@ -225,9 +230,12 @@ def _gan_model_fn(
       labels=None)
 
 
-def _make_train_gan_model(generator_fn, discriminator_fn, real_data,
-                          generator_inputs, generator_scope, add_summaries):
-  """Make a `GANModel` for training."""
+def _make_gan_model(generator_fn, discriminator_fn, real_data,
+                    generator_inputs, generator_scope, add_summaries, mode):
+  """Make a `GANModel`, and optionally pass in `mode`."""
+  # If `generator_fn` has an argument `mode`, pass mode to it.
+  if 'mode' in inspect.getargspec(generator_fn).args:
+    generator_fn = functools.partial(generator_fn, mode=mode)
   gan_model = tfgan_train.gan_model(
       generator_fn,
       discriminator_fn,
@@ -245,15 +253,28 @@ def _make_train_gan_model(generator_fn, discriminator_fn, real_data,
   return gan_model
 
 
+def _make_train_gan_model(generator_fn, discriminator_fn, real_data,
+                          generator_inputs, generator_scope, add_summaries):
+  """Make a `GANModel` for training."""
+  return _make_gan_model(generator_fn, discriminator_fn, real_data,
+                         generator_inputs, generator_scope, add_summaries,
+                         model_fn_lib.ModeKeys.TRAIN)
+
+
 def _make_eval_gan_model(generator_fn, discriminator_fn, real_data,
                          generator_inputs, generator_scope, add_summaries):
   """Make a `GANModel` for evaluation."""
-  return _make_train_gan_model(generator_fn, discriminator_fn, real_data,
-                               generator_inputs, generator_scope, add_summaries)
+  return _make_gan_model(generator_fn, discriminator_fn, real_data,
+                         generator_inputs, generator_scope, add_summaries,
+                         model_fn_lib.ModeKeys.EVAL)
 
 
 def _make_prediction_gan_model(generator_inputs, generator_fn, generator_scope):
   """Make a `GANModel` from just the generator."""
+  # If `generator_fn` has an argument `mode`, pass mode to it.
+  if 'mode' in inspect.getargspec(generator_fn).args:
+    generator_fn = functools.partial(generator_fn,
+                                     mode=model_fn_lib.ModeKeys.PREDICT)
   with variable_scope.variable_scope(generator_scope) as gen_scope:
     generator_inputs = tfgan_train._convert_tensor_or_l_or_d(generator_inputs)  # pylint:disable=protected-access
     generated_data = generator_fn(generator_inputs)
diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
index 1bfdce9ee9..e752f0bccc 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
@@ -48,7 +48,8 @@ from tensorflow.python.training import training
 from tensorflow.python.training import training_util
 
 
-def generator_fn(noise_dict):
+def generator_fn(noise_dict, mode):
+  del mode
   noise = noise_dict['x']
   return layers.fully_connected(noise, noise.shape[1].value)
 
@@ -90,7 +91,6 @@ def mock_head(testcase, expected_generator_inputs, expected_real_data,
         generator_var_names,
         set([x.name for x in gan_model.generator_variables]))
     testcase.assertEqual(generator_scope_name, gan_model.generator_scope.name)
-    testcase.assertEqual(generator_fn, gan_model.generator_fn)
     testcase.assertEqual(_or_none(expected_real_data), gan_model.real_data)
     # TODO(joelshor): Add check on `discriminator_real_outputs`.
     # TODO(joelshor): Add check on `discriminator_gen_outputs`.
-- 
GitLab


From c2aa66115d6c2bad6752474acd3bbf9a616b487a Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 27 Nov 2017 16:28:09 -0800
Subject: [PATCH 0322/1225] Fetch shape information from the C API when
 enabled.

This change makes set_shapes_for_outputs fetch the already-computed tensor shapes from the C API, rather than calling the C++ shape function. It also moves the set_shapes_for_outputs call so it works with Operations created from TF_Operations.

PiperOrigin-RevId: 177088786
---
 tensorflow/python/client/session_test.py      |  2 +
 tensorflow/python/client/tf_session.i         | 35 +++++++++++
 tensorflow/python/client/tf_session_helper.cc | 27 +++++++++
 tensorflow/python/client/tf_session_helper.h  | 10 ++++
 tensorflow/python/framework/ops.py            | 58 ++++++++++++++++---
 tensorflow/python/framework/ops_test.py       | 57 +++++++++++++++++-
 tensorflow/python/ops/math_ops_test.py        | 15 +++--
 7 files changed, 191 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index 3e85410a97..f4b0271195 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -1458,6 +1458,8 @@ class SessionTest(test_util.TensorFlowTestCase):
         self.assertTrue(run_metadata.HasField('step_stats'))
         self.assertEquals(len(run_metadata.step_stats.dev_stats), 1)
 
+  # TODO(nolivia): C API doesn't yet handle marking nodes as not feedable.
+  @test_util.disable_c_api
   def testFeedShapeCompatibility(self):
     with session.Session() as sess:
       some_tensor = constant_op.constant([2.0, 2.0, 2.0, 2.0])
diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index 099a35202c..5fa1a7e8fc 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -497,6 +497,41 @@ def TF_Reset(target, containers=None, config=None):
   }
 }
 
+// Typemaps for TF_GraphGetTensorShapeHelper.
+
+// Convert from C++ integer vector to Python list of ints.
+%typemap(out) tensorflow::gtl::InlinedVector<int64_t, 6>
+     tensorflow::TF_GraphGetTensorShapeHelper {
+  $result = PyList_New($1.size());
+  if (!$result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
+  }
+
+  for (size_t i = 0; i < $1.size(); ++i) {
+    PyList_SET_ITEM($result, i, PyInt_FromLong($1[i]));
+  }
+}
+
+%typemap(in, numinputs=0) bool* unknown_shape (bool temp) {
+  $1=&temp;
+}
+
+// Returns a (list(int), bool) tuple.
+%typemap(argout) bool* unknown_shape {
+  PyObject* new_result = PyTuple_New(2);
+  if (!new_result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create tuple");
+  }
+  // Steals $result reference
+  PyTuple_SET_ITEM(new_result, 0, $result);
+  PyTuple_SET_ITEM(new_result, 1, PyBool_FromLong(*$1));
+  $result = new_result;
+}
+
+%unignore tensorflow;
+%unignore TF_GraphGetTensorShapeHelper;
+%ignore TF_GraphGetTensorShape;
+
 %include "tensorflow/python/client/tf_session_helper.h"
 
 %unignoreall
diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index f5472f316d..ad982e5dd8 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc
@@ -299,6 +299,33 @@ string EqualGraphDefWrapper(const string& actual, const string& expected) {
   return EqualGraphDef(actual_def, expected_def, &diff) ? "" : diff;
 }
 
+// Return value set to 6 inlined elements so it fits in a 64-byte cache line.
+tensorflow::gtl::InlinedVector<int64_t, 6> TF_GraphGetTensorShapeHelper(
+    TF_Graph* graph, TF_Output output, TF_Status* out_status,
+    bool* unknown_shape) {
+  // Allocate a single variable for holding the result for RVO.
+  tensorflow::gtl::InlinedVector<int64_t, 6> result;
+  *unknown_shape = false;
+  int num_dims = TF_GraphGetTensorNumDims(graph, output, out_status);
+  if (TF_GetCode(out_status) != TF_OK) {
+    return result;
+  }
+  // If shape is unknown, set boolean and return.
+  if (num_dims == -1) {
+    *unknown_shape = true;
+    return result;
+  }
+
+  // If shape is a scalar, avoid another C call and just return {}.
+  if (num_dims == 0) {
+    return result;
+  }
+
+  result.resize(num_dims);
+  TF_GraphGetTensorShape(graph, output, result.data(), num_dims, out_status);
+  return result;
+}
+
 void TF_SessionPRunSetup_wrapper(TF_Session* session,
                                  const std::vector<TF_Output>& inputs,
                                  const std::vector<TF_Output>& outputs,
diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h
index 0aca61a2b6..6ed08d3a58 100644
--- a/tensorflow/python/client/tf_session_helper.h
+++ b/tensorflow/python/client/tf_session_helper.h
@@ -97,6 +97,16 @@ void TF_Reset_wrapper(const TF_SessionOptions* opt,
 // for no difference.
 string EqualGraphDefWrapper(const string& actual, const string& expected);
 
+// Gets shape from C API Graph object.
+//
+// If shape is known, returns shape vector where -1 means "unknown
+// dimension".  Sets unknown_shape to false.
+//
+// If shape is unknown, sets unknown_shape to true.
+tensorflow::gtl::InlinedVector<int64_t, 6> TF_GraphGetTensorShapeHelper(
+    TF_Graph* graph, TF_Output output, TF_Status* out_status,
+    bool* unknown_shape);
+
 // Runs the graph associated with the session starting with the supplied inputs.
 // On success, `py_outputs` is populated with a numpy ndarray for each output
 // (the caller must decref these ndarrays, although this will likely be handled
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index bcc794b9a9..60df8f82f0 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -1439,8 +1439,12 @@ def _create_c_op(graph, node_def, inputs, control_inputs):
       c_api.TF_SetAttrValueProto(op_desc,
                                  compat.as_str(name), serialized, status)
 
-  with errors.raise_exception_on_not_ok_status() as status:
-    c_op = c_api.TF_FinishOperation(op_desc, status)
+  try:
+    with errors.raise_exception_on_not_ok_status() as status:
+      c_op = c_api.TF_FinishOperation(op_desc, status)
+  except errors.InvalidArgumentError as e:
+    # Convert to ValueError for backwards compatibility.
+    raise ValueError(str(e))
 
   return c_op
 
@@ -2318,8 +2322,28 @@ class RegisterShape(object):
     return f
 
 
-def set_shapes_for_outputs(op):
-  """Uses the registered shape functions to set the shapes for op's outputs."""
+def _set_shapes_for_outputs_c_api(op):
+  """set_shapes_for_outputs implementation when C API is enabled."""
+  # The C API computes the shapes when the TF_Operation is created. Fetch the
+  # output shapes from the C object.
+  for output in op.outputs:
+    with errors.raise_exception_on_not_ok_status() as status:
+      # pylint: disable=protected-access
+      shape_vector, unknown_shape = c_api.TF_GraphGetTensorShapeHelper(
+          op._graph._c_graph, output._as_tf_output(), status)
+      # pylint: enable=protected-access
+    if unknown_shape:
+      output.set_shape(tensor_shape.unknown_shape())
+    elif not shape_vector:
+      output.set_shape(tensor_shape.scalar())
+    else:
+      shape_vector = [None if d == -1 else d for d in shape_vector]
+      output.set_shape(tensor_shape.TensorShape(shape_vector))
+
+
+# TODO(skyewm): remove this when _USE_C_API flag is removed.
+def _set_shapes_for_outputs(op):
+  """set_shapes_for_outputs implementation when C API is disabled."""
   try:
     shape_func = _shape_registry.lookup(op.type)
   except LookupError:
@@ -2350,6 +2374,14 @@ def set_shapes_for_outputs(op):
     output.set_shape(s)
 
 
+def set_shapes_for_outputs(op):
+  """Set the shapes for op's outputs."""
+  if op._c_op:  # pylint: disable=protected-access
+    return _set_shapes_for_outputs_c_api(op)
+  else:
+    return _set_shapes_for_outputs(op)
+
+
 class OpStats(object):
   """A holder for statistics about an operator.
 
@@ -3067,9 +3099,9 @@ class Graph(object):
         input_types=input_types,
         original_op=self._default_original_op,
         op_def=op_def)
-    if compute_shapes:
-      set_shapes_for_outputs(ret)
-    self._create_op_helper(ret, compute_device=compute_device)
+
+    self._create_op_helper(ret, compute_shapes=compute_shapes,
+                           compute_device=compute_device)
     return ret
 
   def _create_op_from_tf_operation(self, c_op):
@@ -3095,8 +3127,18 @@ class Graph(object):
     self._create_op_helper(ret)
     return ret
 
-  def _create_op_helper(self, op, compute_device=True):
+  def _create_op_helper(self, op, compute_shapes=True, compute_device=True):
     """Common logic for creating an op in this graph."""
+    # TODO(vrv): Instead of eagerly filling in shape property for every op, only
+    # populate the shape when requested.
+    #
+    # TODO(skyewm): unlike in the original Python implementation, the C API
+    # always computes shape information (even for function calls, which the
+    # original Python shape inference code doesn't handle). Deprecate the
+    # compute_shapes argument.
+    if op._c_op or compute_shapes:  # pylint: disable=protected-access
+      set_shapes_for_outputs(op)
+
     # Apply any additional attributes requested. Do not overwrite any existing
     # attributes.
     for key, value in self._attr_scope_map.items():
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index ac35f6f4f5..cd296ccdc5 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -80,7 +80,7 @@ class ResourceTest(test_util.TensorFlowTestCase):
 
 
 @test_util.with_c_api
-class TensorTest(test_util.TensorFlowTestCase):
+class TensorAndShapeTest(test_util.TensorFlowTestCase):
 
   def testShape(self):
     op = ops.Operation(
@@ -99,6 +99,44 @@ class TensorTest(test_util.TensorFlowTestCase):
       for _ in t:
         pass
 
+  def testAddShape(self):
+    with self.test_session():
+      a = array_ops.zeros([2, 3])
+      b = array_ops.ones([1, 3])
+      c = a + b
+      self.assertEqual([2, 3], c.shape)
+
+  def testUnknownDim(self):
+    with self.test_session():
+      a = array_ops.placeholder(dtype=dtypes.float32, shape=[2, None, 3])
+      b = array_ops.placeholder(dtype=dtypes.float32, shape=[2, None, 3])
+      c = a + b
+      self.assertEqual([2, None, 3], c.shape.as_list())
+
+  def testUnknownShape(self):
+    with self.test_session():
+      a = array_ops.placeholder(dtype=dtypes.float32, shape=None)
+      b = array_ops.ones([1, 3])
+      c = a + b
+      self.assertEqual(tensor_shape.unknown_shape(), c.shape)
+
+  def testScalarShape(self):
+    with self.test_session():
+      a = array_ops.placeholder(dtype=dtypes.float32, shape=[])
+      b = array_ops.ones([])
+      c = a + b
+      self.assertEqual(tensor_shape.scalar(), c.shape)
+
+  def testShapeFunctionError(self):
+    with self.test_session():
+      a = array_ops.ones([1, 2, 3])
+      b = array_ops.ones([4, 5, 6])
+      with self.assertRaisesRegexp(
+          ValueError,
+          r"Dimensions must be equal, but are 2 and 5 for 'add' \(op: 'Add'\) "
+          r"with input shapes: \[1,2,3\], \[4,5,6\]."):
+        _ = a + b
+
 
 @test_util.with_c_api
 class IndexedSlicesTest(test_util.TensorFlowTestCase):
@@ -671,6 +709,7 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(op.name, "myop")
     self.assertEqual(op.type, "IntInputIntOutput")
     self.assertEqual(len(op.outputs), 1)
+    self.assertEqual(op.outputs[0].shape, tensor_shape.unknown_shape())
     self.assertEqual(list(op.inputs), [x])
     self.assertEqual(op.control_inputs, [])
     self.assertEqual(op.graph, g)
@@ -679,6 +718,22 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(g.get_operation_by_name("myop"), op)
     self.assertEqual(g.get_tensor_by_name("myop:0"), op.outputs[0])
 
+  def testShape(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = constant_op.constant([[1, 2, 3], [4, 5, 6]])
+      if ops._USE_C_API:
+        c_op = ops._create_c_op(g, ops._NodeDef("Identity", "myop"), [x], [])
+        op = g._create_op_from_tf_operation(c_op)
+      else:
+        # Test pure-Python version to make sure C API has same behavior.
+        op = array_ops.identity(x, name="myop").op
+
+    self.assertEqual(op.name, "myop")
+    self.assertEqual(op.type, "Identity")
+    self.assertEqual(len(op.outputs), 1)
+    self.assertEqual(op.outputs[0].shape, tensor_shape.matrix(2, 3))
+
   def testCond(self):
     g = ops.Graph()
     with g.as_default():
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index 4642f4c580..81a7cf28bb 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -21,7 +21,6 @@ import numpy as np
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
@@ -31,12 +30,12 @@ from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 
-ops._USE_C_API = True
 
 exp = np.exp
 log = np.log
 
 
+@test_util.with_c_api
 class ReduceTest(test_util.TensorFlowTestCase):
 
   @test_util.run_in_graph_and_eager_modes()
@@ -67,11 +66,11 @@ class ReduceTest(test_util.TensorFlowTestCase):
       return
     x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)
     axis = np.array([[0], [1]])
-    with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                 "must be at most rank 1"):
+    with self.assertRaisesRegexp(ValueError, "must be at most rank 1"):
       math_ops.reduce_sum(x, axis)
 
 
+@test_util.with_c_api
 class LogSumExpTest(test_util.TensorFlowTestCase):
 
   def testReduceLogSumExp(self):
@@ -151,6 +150,7 @@ class LogSumExpTest(test_util.TensorFlowTestCase):
       self.assertEqual(-np.inf, res)
 
 
+@test_util.with_c_api
 class RoundTest(test_util.TensorFlowTestCase):
 
   @test_util.run_in_graph_and_eager_modes()
@@ -168,6 +168,7 @@ class RoundTest(test_util.TensorFlowTestCase):
         self.assertAllClose(y_tf_np, y_np, atol=1e-2)
 
 
+@test_util.with_c_api
 class ModTest(test_util.TensorFlowTestCase):
 
   def testFloat(self):
@@ -197,6 +198,7 @@ class ModTest(test_util.TensorFlowTestCase):
         self.assertAllClose(y_tf_np, y_np)
 
 
+@test_util.with_c_api
 class SquaredDifferenceTest(test_util.TensorFlowTestCase):
 
   @test_util.run_in_graph_and_eager_modes()
@@ -210,6 +212,7 @@ class SquaredDifferenceTest(test_util.TensorFlowTestCase):
         self.assertAllClose(z, z_tf)
 
 
+@test_util.with_c_api
 class ApproximateEqualTest(test_util.TensorFlowTestCase):
 
   @test_util.run_in_graph_and_eager_modes()
@@ -241,6 +244,7 @@ class ApproximateEqualTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(z, z_tf)
 
 
+@test_util.with_c_api
 class ScalarMulTest(test_util.TensorFlowTestCase):
 
   @test_util.run_in_graph_and_eager_modes()
@@ -282,6 +286,7 @@ class ScalarMulTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(self.evaluate(x.indices), [0, 2, 5])
 
 
+@test_util.with_c_api
 class AccumulateNTest(test_util.TensorFlowTestCase):
 
   def testFloat(self):
@@ -301,6 +306,7 @@ class AccumulateNTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(x[0] * 6, math_ops.accumulate_n([tf_x[0]] * 6).eval())
 
 
+@test_util.with_c_api
 class AddNTest(test_util.TensorFlowTestCase):
 
   def testPartials(self):
@@ -354,6 +360,7 @@ class AddNTest(test_util.TensorFlowTestCase):
                             [g.eval() for g in add_n_grad])
 
 
+@test_util.with_c_api
 class DivAndModTest(test_util.TensorFlowTestCase):
   # TODO(aselle): Test more types before exposing new division operators.
 
-- 
GitLab


From 8c81bde08cf757645f5937b84e9e97f4bfc97374 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 27 Nov 2017 16:33:11 -0800
Subject: [PATCH 0323/1225] Small code cleanup.

PiperOrigin-RevId: 177089408
---
 tensorflow/core/grappler/costs/virtual_scheduler.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 0bb98d3793..e5e1ee3292 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -752,8 +752,7 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
   if (metadata != nullptr) {
     StepStats* stepstats = metadata->mutable_step_stats();
     for (const auto& device : device_) {
-      GraphDef* device_partition_graph =
-          metadata->mutable_partition_graphs()->Add();
+      GraphDef* device_partition_graph = metadata->add_partition_graphs();
       DeviceStepStats* device_stepstats = stepstats->add_dev_stats();
       device_stepstats->set_device(device.first);
       for (const auto& node_def : device.second.nodes_executed) {
@@ -804,7 +803,7 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
         mem_stats->set_host_persistent_memory_size(host_persistent_memory_size);
         mem_stats->set_device_persistent_memory_size(
             device_persistent_memory_size);
-        *device_partition_graph->mutable_node()->Add() = *node_def;
+        *device_partition_graph->add_node() = *node_def;
       }
     }
   }
-- 
GitLab


From 6c5ccadd434f2e6ed9634031bac2682c3dfe5216 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Mon, 27 Nov 2017 17:00:17 -0800
Subject: [PATCH 0324/1225] Support op split.

PiperOrigin-RevId: 177092757
---
 tensorflow/core/grappler/op_types.cc          |  2 +
 tensorflow/core/grappler/op_types.h           |  1 +
 .../grappler/optimizers/layout_optimizer.cc   | 92 +++++++++++++++--
 .../optimizers/layout_optimizer_test.cc       | 99 +++++++++++++++++++
 4 files changed, 187 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 1f18b56238..83188ffc0d 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -133,6 +133,8 @@ bool IsSend(const NodeDef& node) { return node.op() == "_Send"; }
 
 bool IsSlice(const NodeDef& node) { return node.op() == "Slice"; }
 
+bool IsSplit(const NodeDef& node) { return node.op() == "Split"; }
+
 bool IsSquaredDifference(const NodeDef& node) {
   return node.op() == "SquaredDifference";
 }
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 66ff7a88c5..b1d81448af 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -55,6 +55,7 @@ bool IsReshape(const NodeDef& node);
 bool IsRestore(const NodeDef& node);
 bool IsSend(const NodeDef& node);
 bool IsSlice(const NodeDef& node);
+bool IsSplit(const NodeDef& node);
 bool IsSquaredDifference(const NodeDef& node);
 bool IsSqueeze(const NodeDef& node);
 bool IsStopGradient(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index c760efac70..d5563e9d4c 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -36,6 +36,7 @@ namespace grappler {
 namespace {
 
 const char kConcatConst[] = "LayoutOptimizerConcatConst";
+const char kSplitConst[] = "LayoutOptimizerSplitConst";
 const char kPermNHWCToNCHW[] = "LayoutOptimizerPermConstNHWCToNCHW";
 const char kPermNCHWToNHWC[] = "LayoutOptimizerPermConstNCHWToNHWC";
 const char kGatherAxisConst[] = "LayoutOptimizerGatherAxisConst";
@@ -69,12 +70,25 @@ std::set<string> GetOpsFormatSupported() {
 }
 
 std::set<string> GetOpsFormatAgnostic() {
-  std::set<string> ops_format_agnostic = {
-      "Add",      "AddN",     "Concat", "ConcatV2",
-      "Floor",    "Identity", "Mul",    "Neg",
-      "Pad",      "RealDiv",  "Relu",   "Relu6",
-      "ReluGrad", "Sigmoid",  "Slice",  "SquaredDifference",
-      "Squeeze",  "Sub"};
+  std::set<string> ops_format_agnostic = {"Add",
+                                          "AddN",
+                                          "Concat",
+                                          "ConcatV2",
+                                          "Floor",
+                                          "Identity",
+                                          "Mul",
+                                          "Neg",
+                                          "Pad",
+                                          "RealDiv",
+                                          "Relu",
+                                          "Relu6",
+                                          "ReluGrad",
+                                          "Sigmoid",
+                                          "Slice",
+                                          "Split",
+                                          "SquaredDifference",
+                                          "Squeeze",
+                                          "Sub"};
   return ops_format_agnostic;
 }
 
@@ -764,7 +778,7 @@ class AgnosticNodeProcessor : public NodeProcessor {
     auto node = node_map_->GetNode(node_->name());
     while (node->input_size() > 0) {
       int data_input_pos = 0;
-      if (IsConcatV1(*node)) {
+      if (IsConcatV1(*node) || IsSplit(*node)) {
         data_input_pos = 1;
       }
       node = node_map_->GetNode(node->input(data_input_pos));
@@ -1007,6 +1021,68 @@ class PadProcessor : public AgnosticNodeProcessor {
   }
 };
 
+class SplitProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit SplitProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
+
+ protected:
+  bool ShouldProcess() const override {
+    return AgnosticNodeProcessor::ShouldProcess() && SplitSupported();
+  }
+
+  std::vector<int> GetInputPos() const override {
+    std::vector<int> input_pos = {1};
+    return input_pos;
+  }
+
+  Status CustomizedProcessing() override {
+    string split_const_name = AddNodeSplitConst()->name();
+    node_map_->AddOutput(split_const_name, node_->name());
+    *node_->mutable_input(0) = split_const_name;
+    return Status::OK();
+  }
+
+ private:
+  bool SplitSupported() const {
+    auto dim_node = node_map_->GetNode(node_->input(0));
+    if (!IsConstant(*dim_node)) {
+      return false;
+    }
+    if (HasAttribute(*dim_node, "value").ok()) {
+      auto tensor = dim_node->attr().at({"value"}).tensor();
+      if (tensor.tensor_shape().dim_size() == 0 && tensor.int_val_size() == 1) {
+        if (tensor.int_val(0) < 4 && tensor.int_val(0) >= -4) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  NodeDef* AddNodeSplitConst() {
+    auto dim_node = node_map_->GetNode(node_->input(0));
+    auto tensor = dim_node->attr().at({"value"}).tensor();
+    int value = tensor.int_val(0);
+    value = (value >= 0) ? value : value + 4;
+    if (value == 1 || value == 2) {
+      value = value + 1;
+    } else if (value == 3) {
+      value = 1;
+    }
+    // We created a copy of the node, so that we don't modify the original node,
+    // which might be used elsewhere. Note that this copy also copies the
+    // control dependency input in the case this node is inside a loop,
+    // to ensure added_node is in the same frame with the Split node.
+    NodeDef* added_node = graph_->add_node();
+    *added_node = *dim_node;
+    added_node->set_name(strings::StrCat(kSplitConst, "-", node_->name()));
+    added_node->mutable_attr()->at({"value"}).mutable_tensor()->set_int_val(
+        0, value);
+    return added_node;
+  }
+};
+
 class ReluGradProcessor : public AgnosticNodeProcessor {
  public:
   explicit ReluGradProcessor(const OptimizeContext& opt_cxt)
@@ -1431,6 +1507,8 @@ class DataLayoutOptimizer : GraphProcessor {
             } else {
               node_processor.reset(new SliceProcessor(opt_cxt));
             }
+          } else if (IsSplit(*node)) {
+            node_processor.reset(new SplitProcessor(opt_cxt));
           } else if (IsSqueeze(*node)) {
             node_processor.reset(new SqueezeProcessor(opt_cxt));
           } else if (IsSum(*node)) {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 20a971629c..8c89f6744b 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -399,6 +399,105 @@ TEST_F(LayoutOptimizerTest, FusedBatchNormGradTrainingFalse) {
   EXPECT_EQ(conv_node->attr().at({"data_format"}).s(), "NHWC");
 }
 
+TEST_F(LayoutOptimizerTest, SplitDimC) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto c = ops::Const(s.WithOpName("c"), 3, {});
+  auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
+  auto i = ops::Identity(s.WithOpName("i"), split[0]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto split_node = node_map.GetNode("split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(1), "Conv2D");
+  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_const->op(), "Const");
+  EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 1);
+}
+
+TEST_F(LayoutOptimizerTest, SplitDimH) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto c = ops::Const(s.WithOpName("c"), 1, {});
+  auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
+  auto i = ops::Identity(s.WithOpName("i"), split[0]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto split_node = node_map.GetNode("split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(1), "Conv2D");
+  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_const->op(), "Const");
+  EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 2);
+}
+
+TEST_F(LayoutOptimizerTest, SplitDimW) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto c = ops::Const(s.WithOpName("c"), 2, {});
+  auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
+  auto i = ops::Identity(s.WithOpName("i"), split[0]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto split_node = node_map.GetNode("split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(1), "Conv2D");
+  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_const->op(), "Const");
+  EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 3);
+}
+
+TEST_F(LayoutOptimizerTest, SplitDimN) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto c = ops::Const(s.WithOpName("c"), 0, {});
+  auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
+  auto i = ops::Identity(s.WithOpName("i"), split[0]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto split_node = node_map.GetNode("split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(1), "Conv2D");
+  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_const->op(), "Const");
+  EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 0);
+}
+
+TEST_F(LayoutOptimizerTest, SplitNonConstDim) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto c = ops::Const(s.WithOpName("c"), 0, {});
+  auto i1 = ops::Identity(s.WithOpName("i1"), c);
+  auto split = ops::Split(s.WithOpName("split"), i1, conv, 2);
+  auto i2 = ops::Identity(s.WithOpName("i"), split[0]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto split_node = node_map.GetNode("split");
+  EXPECT_EQ(split_node->input(0), "i1");
+  EXPECT_EQ(split_node->input(1),
+            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-split");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 98ac3f5b7b3942eb0ede7cae1b1afab717b3090a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 17:02:58 -0800
Subject: [PATCH 0325/1225] Refactor code in arithmetic and dependency
 optimizers   - get rid of duplicated code for node creation,   - make the
 optimized graph, the NodeMap and FrameMap data members   - misc. minor
 simplifications. Fix a few bugs in NodeMap: Make sure we strip port numbers
 off inputs before using them as keys to outputs_ or nodes_.

PiperOrigin-RevId: 177093144
---
 .../optimizers/arithmetic_optimizer.cc        | 408 +++++++++---------
 .../optimizers/arithmetic_optimizer.h         |  42 +-
 .../optimizers/arithmetic_optimizer_test.cc   |  84 ++--
 .../optimizers/dependency_optimizer.cc        |  47 +-
 .../optimizers/dependency_optimizer.h         |  11 +-
 tensorflow/core/grappler/utils.cc             |  62 ++-
 tensorflow/core/grappler/utils.h              |   7 +-
 7 files changed, 350 insertions(+), 311 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index ec5d2abd7a..eaf5f1f5cf 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
+#include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/frame.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
@@ -38,6 +39,8 @@ limitations under the License.
 #include "tensorflow/core/util/device_name_utils.h"
 #include "tensorflow/core/util/saved_tensor_slice_util.h"
 
+using tensorflow::strings::StrCat;
+
 namespace tensorflow {
 namespace grappler {
 namespace {
@@ -296,30 +299,6 @@ bool ReshapeIsIdentity(const NodeDef& reshape, const NodeDef& input,
   return true;
 }
 
-// Fix frame dependencies by adding control dependencies from old_input to nodes
-// in new_nodes_for_control_dep, and update frame_map for all nodes in
-// new_nodes.
-void AddFrameControlDeps(const NodeDef* old_node,
-                         const std::vector<NodeDef*>& new_nodes,
-                         const string& source_for_ctrl_dep,
-                         const std::vector<NodeDef*>& sinks_for_control_dep,
-                         GraphDef* graph, NodeMap* node_map,
-                         FrameMap* frame_map) {
-  const auto frame_it = frame_map->find(old_node);
-  if (frame_it != frame_map->end()) {
-    for (auto node : new_nodes) {
-      frame_map->emplace(node, frame_it->second);
-    }
-    if (!source_for_ctrl_dep.empty() && !sinks_for_control_dep.empty()) {
-      const string ctrl_dep = ConstantFolding::AddControlDependency(
-          source_for_ctrl_dep, graph, node_map);
-      for (auto node : sinks_for_control_dep) {
-        node->add_input(ctrl_dep);
-      }
-    }
-  }
-}
-
 NodeDef* GetTailOfValuePreservingChain(
     const NodeDef& node, const NodeMap& node_map,
     const std::unordered_set<string>& nodes_to_preserve) {
@@ -437,6 +416,25 @@ bool UniqueNodes::SameNode(const NodeDef& node1, const NodeDef& node2) const {
   return true;
 }
 
+NodeDef* ArithmeticOptimizer::AddNode(const string& name,
+                                      const NodeDef* node_to_copy) {
+  NodeDef* new_node = optimized_graph_->add_node();
+  const string name_with_prefix =
+      AddPrefixToNodeName(name, kArithmeticOptimizer);
+  node_map_->AddNode(NodeName(name_with_prefix), new_node);
+  if (node_to_copy != nullptr) {
+    new_node->CopyFrom(*node_to_copy);
+  }
+  new_node->set_name(name_with_prefix);
+  return new_node;
+}
+
+bool ArithmeticOptimizer::OptimizedNodeExists(const string& name) {
+  const string name_with_prefix =
+      AddPrefixToNodeName(name, kArithmeticOptimizer);
+  return node_map_->NodeExists(name_with_prefix);
+}
+
 bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const {
   if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) {
     return false;
@@ -454,18 +452,17 @@ bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const {
   return IsFreeOfSideEffect(node);
 }
 
-void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const {
-  NodeMap map(optimized_graph);
+void ArithmeticOptimizer::DedupComputations() {
   bool stop = true;
   std::set<int> duplicates;
   do {
     stop = true;
     UniqueNodes nodes;
-    for (int i = 0; i < optimized_graph->node_size(); ++i) {
+    for (int i = 0; i < optimized_graph_->node_size(); ++i) {
       if (duplicates.find(i) != duplicates.end()) {
         continue;
       }
-      NodeDef* node = optimized_graph->mutable_node(i);
+      NodeDef* node = optimized_graph_->mutable_node(i);
       if (!CanDedup(*node)) {
         continue;
       }
@@ -473,20 +470,21 @@ void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const {
       if (rep == node) {
         continue;
       }
-      const std::set<NodeDef*>& fanouts = map.GetOutputs(node->name());
+      const std::set<NodeDef*>& fanouts = node_map_->GetOutputs(node->name());
       for (NodeDef* fanout : fanouts) {
         for (string& name : *fanout->mutable_input()) {
           int position;
-          string nodename = ParseNodeName(name, &position);
+          const string nodename = ParseNodeName(name, &position);
           if (nodename == node->name()) {
+            // Update name in-place.
             if (position > 0) {
-              name = strings::StrCat(rep->name(), ":", position);
+              name = StrCat(rep->name(), ":", position);
             } else if (position == 0) {
               name = rep->name();
             } else {
-              name = strings::StrCat("^", rep->name());
+              name = StrCat("^", rep->name());
             }
-            map.AddOutput(rep->name(), fanout->name());
+            node_map_->AddOutput(rep->name(), fanout->name());
           }
         }
       }
@@ -497,20 +495,40 @@ void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const {
 
   // Delete duplicates
   if (!duplicates.empty()) {
-    int last = optimized_graph->node_size() - 1;
+    int last = optimized_graph_->node_size() - 1;
     for (auto it = duplicates.rbegin(); it != duplicates.rend(); ++it) {
       int index = *it;
-      optimized_graph->mutable_node()->SwapElements(index, last);
+      optimized_graph_->mutable_node()->SwapElements(index, last);
       last--;
     }
-    optimized_graph->mutable_node()->DeleteSubrange(last + 1,
-                                                    duplicates.size());
+    optimized_graph_->mutable_node()->DeleteSubrange(last + 1,
+                                                     duplicates.size());
+    // Rebuild the NodeMap which was invalidated by the node  swapping above.
+    node_map_.reset(new NodeMap(optimized_graph_));
+  }
+}
+
+void ArithmeticOptimizer::AddFrameControlDeps(
+    const NodeDef* old_node, const std::vector<NodeDef*>& new_nodes,
+    const string& source_for_ctrl_dep,
+    const std::vector<NodeDef*>& sinks_for_control_dep) {
+  const auto frame_it = frame_map_.find(old_node);
+  if (frame_it != frame_map_.end()) {
+    for (auto node : new_nodes) {
+      frame_map_.emplace(node, frame_it->second);
+    }
+    if (!source_for_ctrl_dep.empty() && !sinks_for_control_dep.empty()) {
+      const string ctrl_dep = ConstantFolding::AddControlDependency(
+          source_for_ctrl_dep, optimized_graph_, node_map_.get());
+      for (auto node : sinks_for_control_dep) {
+        MaybeAddControlInput(ctrl_dep, node, optimized_graph_, node_map_.get());
+      }
+    }
   }
 }
 
 string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
-    const NodeDef* node, GraphDef* graph_def, NodeMap* node_map,
-    std::vector<const NodeDef*>* new_nodes, FrameMap* frame_map) const {
+    const NodeDef* node, SetVector<NodeDef*>* nodes_to_simplify) {
   // Remove involutions applied twice.
   if (IsInvolution(*node)) {
     // An involution is an element-wise function f(x) that is its own inverse,
@@ -520,8 +538,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
     // the two instances of the involution from the graph, since they cancel
     // each other.
     NodeDef* tail =
-        GetTailOfValuePreservingChain(*node, *node_map, nodes_to_preserve_);
-    NodeDef* involution = node_map->GetNode(tail->input(0));
+        GetTailOfValuePreservingChain(*node, *node_map_, nodes_to_preserve_);
+    NodeDef* involution = node_map_->GetNode(tail->input(0));
     if (involution->op() == node->op()) {
       // Skip both *node and *involution since they cancel each other.
       if (tail == node) {
@@ -529,8 +547,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
         return involution->input(0);
       } else {
         tail->set_input(0, involution->input(0));
-        node_map->UpdateInput(tail->name(), involution->name(),
-                              involution->input(0));
+        node_map_->UpdateInput(tail->name(), involution->name(),
+                               involution->input(0));
         return node->input(0);
       }
     }
@@ -538,10 +556,10 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
 
   // Remove inverse transposes.
   if (node->op() == "Transpose" || node->op() == "ConjugateTranspose") {
-    NodeDef* input = node_map->GetNode(node->input(0));
+    NodeDef* input = node_map_->GetNode(node->input(0));
     if (input->op() == node->op()) {
-      const NodeDef* node_perm = node_map->GetNode(node->input(1));
-      const NodeDef* input_perm = node_map->GetNode(input->input(1));
+      const NodeDef* node_perm = node_map_->GetNode(node->input(1));
+      const NodeDef* input_perm = node_map_->GetNode(input->input(1));
       // Try 32-bit indices.
       std::vector<int> node_perm_values;
       std::vector<int> input_perm_values;
@@ -578,14 +596,14 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
     //      ^      |
     //      |      |
     //    input ---+
-    NodeDef* reshape = node_map->GetNode(node->name());
+    NodeDef* reshape = node_map_->GetNode(node->name());
     int output_pos = 0;
     string input_node_name = ParseNodeName(node->input(0), &output_pos);
-    const NodeDef* input = node_map->GetNode(input_node_name);
+    const NodeDef* input = node_map_->GetNode(input_node_name);
     if (input->op() == "Reshape") {
       reshape->set_input(0, input->input(0));
-      node_map->UpdateInput(reshape->name(), input->name(), input->input(0));
-      new_nodes->push_back(reshape);
+      node_map_->UpdateInput(reshape->name(), input->name(), input->input(0));
+      nodes_to_simplify->PushBack(reshape);
       return reshape->name();
     }
 
@@ -625,38 +643,30 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
                                          &device) &&
         (StringPiece(device).contains(DEVICE_CPU) ||
          StringPiece(device).contains(DEVICE_GPU))) {
-      const NodeDef* cast = node_map->GetNode(transpose->input(0));
+      const NodeDef* cast = node_map_->GetNode(transpose->input(0));
       if (cast->op() == "Cast") {
-        const NodeDef* input = node_map->GetNode(cast->input(0));
+        const NodeDef* input = node_map_->GetNode(cast->input(0));
         const DataType src_type = GetSourceDataType(*cast);
         const DataType dst_type = GetDestinationDataType(*cast);
         if (IsNumberType(src_type) && IsNumberType(dst_type) &&
             DataTypeSize(src_type) < DataTypeSize(dst_type)) {
-          NodeDef* new_transpose = graph_def->add_node();
-          *new_transpose = *transpose;
-          new_transpose->set_name(transpose->name() + "_" +
-                                  DataTypeString(src_type));
+          NodeDef* new_transpose =
+              AddNode(StrCat(transpose->name(), "_", DataTypeString(src_type)),
+                      transpose);
           (*new_transpose->mutable_attr())["T"].set_type(src_type);
-          node_map->AddNode(new_transpose->name(), new_transpose);
-
           new_transpose->set_input(0, cast->input(0));
-          node_map->AddOutput(input->name(), new_transpose->name());
-          node_map->AddOutput(NodeName(new_transpose->input(1)),
-                              new_transpose->name());
-
-          NodeDef* new_cast = graph_def->add_node();
-          *new_cast = *cast;
-          new_cast->set_name(cast->name() + "_new");
-          node_map->AddNode(new_cast->name(), new_cast);
+          node_map_->AddOutput(input->name(), new_transpose->name());
+          node_map_->AddOutput(NodeName(new_transpose->input(1)),
+                               new_transpose->name());
 
+          NodeDef* new_cast = AddNode(StrCat(cast->name(), "_new"), cast);
           new_cast->set_input(0, new_transpose->name());
-          node_map->AddOutput(new_transpose->name(), new_cast->name());
+          node_map_->AddOutput(new_transpose->name(), new_cast->name());
 
-          new_nodes->push_back(new_transpose);
+          nodes_to_simplify->PushBack(new_transpose);
           //  Add frame dependencies that the original node might have had.
           AddFrameControlDeps(node, {new_transpose, new_cast},
-                              new_transpose->input(0), {new_transpose},
-                              graph_def, node_map, frame_map);
+                              new_transpose->input(0), {new_transpose});
 
           return new_cast->name();
         }
@@ -665,20 +675,20 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   }
 
   if (node->op() == "Bitcast") {
-    NodeDef* bitcast = node_map->GetNode(node->name());
+    NodeDef* bitcast = node_map_->GetNode(node->name());
     // Bypass bitcasts whose source type and destination type are equal.
     if (GetSourceDataType(*bitcast) == GetDestinationDataType(*bitcast)) {
       return bitcast->input(0);
     }
 
-    const NodeDef* operand = node_map->GetNode(bitcast->input(0));
+    const NodeDef* operand = node_map_->GetNode(bitcast->input(0));
     if (operand->op() == bitcast->op()) {
       // Bitcast(Bitcast(x, type1), type2) => Bitcast(x, type2)
       bitcast->set_input(0, operand->input(0));
       SetSourceDataType(GetSourceDataType(*operand), bitcast);
-      node_map->UpdateInput(bitcast->name(), bitcast->input(0),
-                            operand->input(0));
-      new_nodes->push_back(bitcast);
+      node_map_->UpdateInput(bitcast->name(), bitcast->input(0),
+                             operand->input(0));
+      nodes_to_simplify->PushBack(bitcast);
       return bitcast->name();
     }
   }
@@ -720,22 +730,22 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   // Conv?DBackpropInput.
   if (node->op() == "Conv2D" || node->op() == "Conv3D") {
     NodeDef* conv = const_cast<NodeDef*>(node);
-    const NodeDef* weights = node_map->GetNode(NodeName(conv->input(1)));
+    const NodeDef* weights = node_map_->GetNode(NodeName(conv->input(1)));
     // Fold the multiply to conv only when the weights are constant, so the
     // multiply can be constant-folded. TODO(jingyue): When the weights aren't
     // constant, this should also help performance a bit and memory usage a lot,
     // since the weights tend to be smaller than the activations.
     if (weights->op() == "Const") {
-      const NodeDef* source = node_map->GetNode(
-          GetTailOfValuePreservingChain(*node, *node_map, nodes_to_preserve_)
+      const NodeDef* source = node_map_->GetNode(
+          GetTailOfValuePreservingChain(*node, *node_map_, nodes_to_preserve_)
               ->input(0));
       if (source->op() == "Mul" &&
-          node_map->GetOutputs(source->name()).size() == 1) {
+          node_map_->GetOutputs(source->name()).size() == 1) {
         const NodeDef* mul = source;
         // `scale` is the scalar multiplier, and `other` is the other operand.
         // TODO(jingyue): handle the case where `scale` is 0-th operand.
-        const NodeDef* scale = node_map->GetNode(mul->input(1));
-        const NodeDef* other = node_map->GetNode(mul->input(0));
+        const NodeDef* scale = node_map_->GetNode(mul->input(1));
+        const NodeDef* other = node_map_->GetNode(mul->input(0));
         if (scale->op() == "Const" && scale->attr().at("dtype").type() ==
                                           weights->attr().at("dtype").type()) {
           const TensorProto& scale_tensor = scale->attr().at("value").tensor();
@@ -743,39 +753,36 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
           if (scale_tensor.has_tensor_shape() &&
               scale_tensor.tensor_shape().dim_size() == 0) {
             // Create new node `scaled_weights`.
-            NodeDef* scaled_weights = graph_def->add_node();
-            scaled_weights->set_name(weights->name() + "_scaled_" +
-                                     conv->name());
+            NodeDef* scaled_weights = AddNode(
+                StrCat(weights->name(), "_scaled_", conv->name()), nullptr);
             scaled_weights->set_op("Mul");
             scaled_weights->set_device(weights->device());
             (*scaled_weights->mutable_attr())["T"] =
                 weights->attr().at("dtype");
-            node_map->AddNode(scaled_weights->name(), scaled_weights);
-            new_nodes->push_back(scaled_weights);
+            nodes_to_simplify->PushBack(scaled_weights);
 
             // Link in its inputs.
             scaled_weights->add_input(conv->input(1));
-            node_map->AddOutput(weights->name(), scaled_weights->name());
+            node_map_->AddOutput(weights->name(), scaled_weights->name());
             scaled_weights->add_input(mul->input(1));
-            node_map->AddOutput(scale->name(), scaled_weights->name());
-            AddFrameControlDeps(node, {scaled_weights}, "", {}, graph_def,
-                                node_map, frame_map);
+            node_map_->AddOutput(scale->name(), scaled_weights->name());
+            AddFrameControlDeps(node, {scaled_weights}, "", {});
 
             // Update `conv`'s weights to `scaled_weights`.
             conv->set_input(1, scaled_weights->name());
-            node_map->UpdateInput(conv->name(), weights->name(),
-                                  scaled_weights->name());
-            new_nodes->push_back(conv);
+            node_map_->UpdateInput(conv->name(), weights->name(),
+                                   scaled_weights->name());
+            nodes_to_simplify->PushBack(conv);
 
             // Update `mul`'s consumer to bypass `mul` because it's folded to
             // the weights.
-            CHECK_EQ(node_map->GetOutputs(mul->name()).size(), 1);
+            CHECK_EQ(node_map_->GetOutputs(mul->name()).size(), 1);
             NodeDef* consumer_of_mul =
-                *node_map->GetOutputs(mul->name()).begin();
+                *node_map_->GetOutputs(mul->name()).begin();
             consumer_of_mul->set_input(0, mul->input(0));
-            node_map->UpdateInput(consumer_of_mul->name(), mul->name(),
-                                  other->name());
-            new_nodes->push_back(consumer_of_mul);
+            node_map_->UpdateInput(consumer_of_mul->name(), mul->name(),
+                                   other->name());
+            nodes_to_simplify->PushBack(consumer_of_mul);
             return conv->name();
           }
         }
@@ -784,25 +791,18 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   }
 
   if (node->op() == "Mul" && node->input(0) == node->input(1) &&
-      node_map->GetNode(node->name() + "_square") == nullptr) {
-    NodeDef* factor = node_map->GetNode(node->input(0));
-    VLOG(2) << "Found square : " << node->DebugString();
-    if (factor != nullptr) {
-      NodeDef* new_mul_node = graph_def->add_node();
-      *new_mul_node = *node;
-      new_mul_node->set_op("Square");
-      new_mul_node->set_name(strings::StrCat(node->name(), "_square"));
-      new_nodes->push_back(new_mul_node);
-      node_map->AddNode(new_mul_node->name(), new_mul_node);
-      for (int i = 1; i < new_mul_node->input_size(); ++i) {
-        new_mul_node->set_input(i - 1, new_mul_node->input(i));
-      }
-      new_mul_node->mutable_input()->RemoveLast();
-      return new_mul_node->name();
+      !OptimizedNodeExists(StrCat(node->name(), "_square"))) {
+    NodeDef* new_square_node =
+        AddNode(strings::StrCat(node->name(), "_square"), node);
+    new_square_node->set_op("Square");
+    for (int i = 1; i < new_square_node->input_size(); ++i) {
+      new_square_node->set_input(i - 1, new_square_node->input(i));
     }
+    new_square_node->mutable_input()->RemoveLast();
+    return new_square_node->name();
   }
 
-  if (node->input_size() > 0 && IsAggregate(*node)) {
+  if (IsAggregate(*node) && NumNonControlInputs(*node) > 0) {
     // Discard aggregate nodes with a single input.
     if (node->input_size() == 1) {
       return node->input(0);
@@ -828,7 +828,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
         break;
       }
     }
-    if (all_equal && node_map->GetNode(node->name() + "_const") == nullptr) {
+    const string mul_node_name = StrCat(node->name(), "_mul");
+    if (all_equal && !OptimizedNodeExists(mul_node_name)) {
       // 1. Create constant node with value N.
       const auto type = GetDataTypeFromAttr(*node, "T");
       Tensor t(type, TensorShape({}));
@@ -839,28 +840,26 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
         return "";
       }
       TensorValue value(&t);
-      NodeDef* new_const_node = graph_def->add_node();
+      NodeDef* new_const_node =
+          AddNode(StrCat(node->name(), "_const"), nullptr);
       *new_const_node =
-          ConstantFolding::CreateNodeDef(node->name() + "_const", value);
+          ConstantFolding::CreateNodeDef(new_const_node->name(), value);
       new_const_node->set_device(node->device());
-      node_map->AddNode(new_const_node->name(), new_const_node);
-      new_nodes->push_back(new_const_node);
+      nodes_to_simplify->PushBack(new_const_node);
 
       // 2. Replace the aggregate node with Mul(Const(N), x).
-      NodeDef* new_mul_node = graph_def->add_node();
-      new_mul_node->set_name(node->name() + "_mul");
+      NodeDef* new_mul_node = AddNode(mul_node_name, nullptr);
       new_mul_node->set_op("Mul");
       new_mul_node->set_device(node->device());
       SetDataTypeToAttr(type, "T", new_mul_node);
-      node_map->AddNode(new_mul_node->name(), new_mul_node);
       new_mul_node->add_input(new_const_node->name());
-      node_map->AddOutput(new_const_node->name(), new_mul_node->name());
+      node_map_->AddOutput(new_const_node->name(), new_mul_node->name());
       new_mul_node->add_input(node->input(0));
-      node_map->AddOutput(node->input(0), new_mul_node->name());
+      node_map_->AddOutput(node->input(0), new_mul_node->name());
 
-      CopyControlInputs(*node, new_mul_node, graph_def, node_map);
+      CopyControlInputs(*node, new_mul_node, optimized_graph_, node_map_.get());
       AddFrameControlDeps(node, {new_const_node, new_mul_node}, node->input(0),
-                          {new_const_node}, graph_def, node_map, frame_map);
+                          {new_const_node});
       return new_mul_node->name();
     }
   }
@@ -869,14 +868,18 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   // multiplication over addition to hoist common factors out of aggregate nodes
   // where all the inputs are Mul nodes. This pattern occurs frequently in
   // regularization terms for the gradients during training.
-  if (node->input_size() > 1 && IsAggregate(*node) &&
-      node_map->GetNode(node->name() + "_hoist_add") == nullptr) {
+  if (IsAggregate(*node) && NumNonControlInputs(*node) > 1 &&
+      !OptimizedNodeExists(StrCat(node->name(), "_hoist_add"))) {
     // Determine the set of common factors if the input nodes are all Mul nodes.
     std::set<string> common_factors;
-    int i = 0;
-    while (i < node->input_size() && (i == 0 || !common_factors.empty()) &&
-           !IsControlInput(node->input(i))) {
-      const NodeDef* input = node_map->GetNode(node->input(i));
+    for (int i = 0; i < node->input_size(); ++i) {
+      if (i > 0 && common_factors.empty()) {
+        break;
+      }
+      if (IsControlInput(node->input(i))) {
+        break;
+      }
+      const NodeDef* input = node_map_->GetNode(node->input(i));
       if (input->op() == "Mul") {
         std::set<string> factors_i{input->input(0), input->input(1)};
         if (i == 0) {
@@ -891,47 +894,42 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
         }
       } else {
         common_factors.clear();
-        break;
       }
-      ++i;
     }
     if (common_factors.size() == 1) {
+      const string& common_factor = *common_factors.begin();
       // In this case we have an expression of the form
       //   AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn))
       // that can be rewritten as
       //   Mul(x, AddN(y1, y2, y3, ... yn))
-      // 1. Hoist non-shared factors up into AddN node.
-      const string& common_factor = *common_factors.begin();
-      NodeDef* new_mul_node = graph_def->add_node();
-      NodeDef* new_add_node = graph_def->add_node();
-      *new_add_node = *node;
-      new_add_node->set_name(node->name() + "_hoist_add");
-      new_nodes->push_back(new_add_node);
-      node_map->AddNode(new_add_node->name(), new_add_node);
+
+      // 1. Use a copy of the first Mul node for the outer multiplication.
+      NodeDef* new_mul_node = AddNode(StrCat(node->name(), "_hoist_mul"),
+                                      node_map_->GetNode(node->input(0)));
+      NodeDef* new_add_node = AddNode(StrCat(node->name(), "_hoist_add"), node);
+      new_mul_node->set_device(node->device());
+      new_mul_node->set_input(0, common_factor);
+      node_map_->AddOutput(common_factor, new_mul_node->name());
+      new_mul_node->set_input(1, new_add_node->name());
+      node_map_->AddOutput(new_add_node->name(), new_mul_node->name());
+
+      // 2. Hoist non-shared factors up into the new AddN node.
+      nodes_to_simplify->PushBack(new_add_node);
       for (int i = 0; i < node->input_size(); ++i) {
         const string& input = node->input(i);
         if (IsControlInput(input)) {
-          MaybeAddControlInput(input, new_add_node, graph_def, node_map);
-          continue;
+          break;
         }
-        NodeDef* mul_node = node_map->GetNode(input);
-        int unique_factor_index = mul_node->input(0) == common_factor ? 1 : 0;
+        const NodeDef* mul_node = node_map_->GetNode(input);
+        const int unique_factor_index =
+            mul_node->input(0) == common_factor ? 1 : 0;
         const string unique_factor = mul_node->input(unique_factor_index);
         new_add_node->set_input(i, unique_factor);
-        // 2. Use a copy of the first Mul node for the outer multiplication.
-        if (i == 0) {
-          *new_mul_node = *mul_node;
-          new_mul_node->set_device(node->device());
-          new_mul_node->set_name(node->name() + "_hoist_mul");
-          new_mul_node->set_input(0, common_factor);
-          new_mul_node->set_input(1, new_add_node->name());
-          node_map->AddNode(new_mul_node->name(), new_mul_node);
-        }
       }
 
-      // 3. Add frame dependencies that the original node might have had.
+      // 4. Add frame dependencies that the original node might have had.
       AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor,
-                          {new_add_node}, graph_def, node_map, frame_map);
+                          {new_add_node});
 
       return new_mul_node->name();
     }
@@ -940,9 +938,9 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   // Fold Transpose into matrix multiplication.
   if ((node->op() == "MatMul" || node->op() == "SparseMatMul" ||
        node->op() == "BatchMatMul") &&
-      node_map->GetNode(node->name() + "_fused") == nullptr) {
-    const NodeDef* a = node_map->GetNode(node->input(0));
-    const NodeDef* b = node_map->GetNode(node->input(1));
+      !OptimizedNodeExists(StrCat(node->name(), "_fused"))) {
+    const NodeDef* a = node_map_->GetNode(node->input(0));
+    const NodeDef* b = node_map_->GetNode(node->input(1));
     bool is_complex = false;
     if (node->op() != "SparseMatMul") {
       const DataType type = GetDataTypeFromAttr(*node, "T");
@@ -954,32 +952,27 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
                            ? std::set<string>{"ConjugateTranspose"}
                            : std::set<string>{"Transpose"});
     const bool a_is_foldable = foldable_transpose_ops.count(a->op()) > 0 &&
-                               IsInnerMatrixTransposeNode(*a, node_map);
+                               IsInnerMatrixTransposeNode(*a, node_map_.get());
     const bool b_is_foldable = foldable_transpose_ops.count(b->op()) > 0 &&
-                               IsInnerMatrixTransposeNode(*b, node_map);
+                               IsInnerMatrixTransposeNode(*b, node_map_.get());
     if (a_is_foldable || b_is_foldable) {
-      NodeDef* new_op = graph_def->add_node();
-      *new_op = *node;
-      new_op->set_name(node->name() + "_fused");
-      node_map->AddNode(new_op->name(), new_op);
+      NodeDef* new_op = AddNode(StrCat(node->name(), "_fused"), node);
       if (a_is_foldable) {
         const string attr_a =
             node->op() == "BatchMatMul" ? "adj_x" : "transpose_a";
         FlipBooleanAttr(attr_a, new_op);
         new_op->set_input(0, a->input(0));
-        node_map->UpdateInput(new_op->name(), a->name(), a->input(0));
-        AddFrameControlDeps(node, {new_op}, a->input(0), {new_op}, graph_def,
-                            node_map, frame_map);
+        node_map_->UpdateInput(new_op->name(), a->name(), a->input(0));
+        AddFrameControlDeps(node, {new_op}, a->input(0), {new_op});
       }
       if (b_is_foldable) {
         const string attr_b =
             node->op() == "BatchMatMul" ? "adj_y" : "transpose_b";
         FlipBooleanAttr(attr_b, new_op);
         new_op->set_input(1, b->input(0));
-        node_map->UpdateInput(new_op->name(), b->name(), b->input(0));
+        node_map_->UpdateInput(new_op->name(), b->name(), b->input(0));
         if (!a_is_foldable) {
-          AddFrameControlDeps(node, {new_op}, b->input(0), {new_op}, graph_def,
-                              node_map, frame_map);
+          AddFrameControlDeps(node, {new_op}, b->input(0), {new_op});
         }
       }
     }
@@ -988,25 +981,21 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   // Fold Conj into Transpose or ConjugateTranspose.
   if ((node->op() == "Conj" || node->op() == "Transpose" ||
        node->op() == "ConjugateTranspose") &&
-      node_map->GetNode(node->name() + "_fused") == nullptr) {
-    const NodeDef* input = node_map->GetNode(node->input(0));
+      !OptimizedNodeExists(StrCat(node->name(), "_fused"))) {
+    const NodeDef* input = node_map_->GetNode(node->input(0));
     const NodeDef* transpose_op = node->op() == "Conj" ? input : node;
     const NodeDef* conj_op = node->op() == "Conj" ? node : input;
 
     if ((transpose_op->op() == "Transpose" ||
          transpose_op->op() == "ConjugateTranspose") &&
         conj_op->op() == "Conj") {
-      NodeDef* new_op = graph_def->add_node();
-      *new_op = *transpose_op;
-      new_op->set_name(node->name() + "_fused");
+      NodeDef* new_op = AddNode(StrCat(node->name(), "_fused"), transpose_op);
       // Flip the type of transpose op to absorb the conjugation.
       new_op->set_op(transpose_op->op() == "Transpose" ? "ConjugateTranspose"
                                                        : "Transpose");
       new_op->set_input(0, input->input(0));
-      node_map->AddNode(new_op->name(), new_op);
-      node_map->UpdateInput(new_op->name(), node->name(), input->input(0));
-      AddFrameControlDeps(node, {new_op}, "", {}, graph_def, node_map,
-                          frame_map);
+      node_map_->UpdateInput(new_op->name(), node->name(), input->input(0));
+      AddFrameControlDeps(node, {new_op}, "", {});
       return new_op->name();
     }
   }
@@ -1014,29 +1003,23 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   return "";
 }
 
-Status ArithmeticOptimizer::SimplifyArithmeticOps(
-    GraphDef* optimized_graph) const {
-  NodeMap node_map(optimized_graph);
-  FrameMap frame_map;
-  int num_frames;
-  TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph, node_map,
-                                               &frame_map, &num_frames));
-  SetVector<const NodeDef*> nodes_to_simplify;
-  for (int i = 0; i < optimized_graph->node_size(); ++i) {
-    nodes_to_simplify.PushBack(optimized_graph->mutable_node()->Mutable(i));
+Status ArithmeticOptimizer::SimplifyArithmeticOps() {
+  SetVector<NodeDef*> nodes_to_simplify;
+  nodes_to_simplify.Reserve(optimized_graph_->node_size());
+  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
+    nodes_to_simplify.PushBack(optimized_graph_->mutable_node(i));
   }
   while (!nodes_to_simplify.Empty()) {
     const NodeDef* node = nodes_to_simplify.PopBack();
-    std::vector<const NodeDef*> new_nodes;
-    const string simplified_tensor = TrySimplifyAndReplaceUses(
-        node, optimized_graph, &node_map, &new_nodes, &frame_map);
+    const string simplified_tensor =
+        TrySimplifyAndReplaceUses(node, &nodes_to_simplify);
     if (simplified_tensor.empty()) {
       continue;
     }
 
     if (NodeName(simplified_tensor) != node->name()) {
       // Always consider simplified_tensor for further optimizations.
-      const NodeDef* simplified_node = node_map.GetNode(simplified_tensor);
+      NodeDef* simplified_node = node_map_->GetNode(simplified_tensor);
       if (simplified_node != nullptr) {
         nodes_to_simplify.PushBack(simplified_node);
       }
@@ -1044,7 +1027,7 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(
       // consumers of `node` are already redirected to `simplified_tensor`.
       // Re-push the consumers into `nodes_to_simplify` for further
       // optimizations.
-      std::set<NodeDef*> consumers = node_map.GetOutputs(node->name());
+      std::set<NodeDef*> consumers = node_map_->GetOutputs(node->name());
       for (NodeDef* consumer : consumers) {
         // Update `consumer`'s use of `node` to `input`'s operand.
         for (int i = 0; i < consumer->input_size(); ++i) {
@@ -1057,16 +1040,12 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(
                      ? AsControlDependency(NodeName(simplified_tensor))
                      : simplified_tensor);
           }
-          VLOG(2) << "Update input " << consumer->input(i) << " of "
-                  << consumer->name() << " to " << simplified_tensor;
         }
-        node_map.UpdateInput(consumer->name(), node->name(), simplified_tensor);
+        node_map_->UpdateInput(consumer->name(), node->name(),
+                               simplified_tensor);
         nodes_to_simplify.PushBack(consumer);
       }
     }
-    for (const NodeDef* new_node : new_nodes) {
-      nodes_to_simplify.PushBack(new_node);
-    }
   }
   return Status::OK();
 }
@@ -1074,22 +1053,31 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(
 Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/,
                                      const GrapplerItem& item,
                                      GraphDef* optimized_graph) {
-  *optimized_graph = item.graph;
-  nodes_to_preserve_ = item.NodesToPreserve();
+  optimized_graph_ = optimized_graph;
+  *optimized_graph_ = item.graph;
 
+  // Set up helper data structures.
+  nodes_to_preserve_ = item.NodesToPreserve();
+  fetch_nodes_known_ = !item.fetch.empty();
+  node_map_.reset(new NodeMap(optimized_graph_));
+  int num_frames;
+  TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph_, *node_map_,
+                                               &frame_map_, &num_frames));
   if (opt_level_ == RewriterConfig::AGGRESSIVE) {
+    graph_properties_.reset(new GraphProperties(item));
     // Shapes are only needed in aggressive mode.
-    GraphProperties graph_properties(item);
-    TF_RETURN_IF_ERROR(graph_properties.InferStatically());
-    TF_RETURN_IF_ERROR(graph_properties.AnnotateOutputShapes(optimized_graph));
+    TF_RETURN_IF_ERROR(graph_properties_->InferStatically());
+    TF_RETURN_IF_ERROR(
+        graph_properties_->AnnotateOutputShapes(optimized_graph_));
   }
 
-  DedupComputations(optimized_graph);
-  TF_RETURN_IF_ERROR(SimplifyArithmeticOps(optimized_graph));
+  // Perform the optimizations.
+  DedupComputations();
+  TF_RETURN_IF_ERROR(SimplifyArithmeticOps());
 
   // Clear output shapes.
   for (int i = 0; i < optimized_graph->node_size(); ++i) {
-    optimized_graph->mutable_node(i)->mutable_attr()->erase(kOutputShapesAttr);
+    optimized_graph_->mutable_node(i)->mutable_attr()->erase(kOutputShapesAttr);
   }
 
   return Status::OK();
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
index c22e2d5363..ec26979238 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
@@ -17,13 +17,17 @@ limitations under the License.
 #define TENSORFLOW_GRAPPLER_OPTIMIZERS_ARITHMETIC_OPTIMIZER_H_
 
 #include <unordered_set>
+#include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/grappler/utils/frame.h"
 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
 
 namespace tensorflow {
 namespace grappler {
 
+constexpr char kArithmeticOptimizer[] = "ArithmeticOptimizer";
+
 // Optimize TF computations by reducing the arithmetic complexity required to
 // run a model.
 class ArithmeticOptimizer : public GraphOptimizer {
@@ -42,13 +46,32 @@ class ArithmeticOptimizer : public GraphOptimizer {
                 const GraphDef& optimized_graph, double result) override;
 
  private:
+  // Returns true is a node with given name and the optimizer prefix already
+  // exists.
+  bool OptimizedNodeExists(const string& name);
+
+  // Creates a new node in the graph, prefixed with "ArithmeticOptimizer/",
+  // updates node_map_, and optionally copies *node_to_copy into the new
+  // node, if node_to_copy is not nullptr.
+  NodeDef* AddNode(const string& name, const NodeDef* node_to_copy);
+
   // Returns true if it is safe to dedup node from the graph.
   bool CanDedup(const NodeDef& node) const;
 
-  void DedupComputations(GraphDef* optimized_graph) const;
+  // Dedup redundant nodes in the graph.
+  void DedupComputations();
+
+  // Fix frame dependencies by adding control dependencies from old_input to
+  // nodes in new_nodes_for_control_dep, and update frame_map for all nodes in
+  // new_nodes.
+  void AddFrameControlDeps(const NodeDef* old_node,
+                           const std::vector<NodeDef*>& new_nodes,
+                           const string& source_for_ctrl_dep,
+                           const std::vector<NodeDef*>& sinks_for_control_dep);
+
   // Runs peep-hole optimizations on `optimized_graph`, e.g., removing inverse
   // transposes.
-  Status SimplifyArithmeticOps(GraphDef* optimized_graph) const;
+  Status SimplifyArithmeticOps();
   // Tries to simplify the expression that roots at `node` and replaces the uses
   // of `node` to the simplified expression. Returns the name of the simplified
   // tensor (e.g. "split:1") or an emtpy string if no simplification is
@@ -64,14 +87,17 @@ class ArithmeticOptimizer : public GraphOptimizer {
   // TODO(jingyue): This interface is not suitable for optimizing nodes with
   // multiple output tensors. We should pass in a tensor name instead of a
   // NodeDef.
-  string TrySimplifyAndReplaceUses(
-      const NodeDef* node, GraphDef* graph_def, NodeMap* node_map,
-      std::vector<const NodeDef*>* new_nodes,
-      std::unordered_map<const NodeDef*, std::vector<int>>* frame_map) const;
-
-  std::unordered_set<string> nodes_to_preserve_;
+  string TrySimplifyAndReplaceUses(const NodeDef* node,
+                                   SetVector<NodeDef*>* nodes_to_simplify);
 
   RewriterConfig::Toggle opt_level_;
+
+  bool fetch_nodes_known_;
+  std::unordered_set<string> nodes_to_preserve_;
+  std::unique_ptr<NodeMap> node_map_;
+  FrameMap frame_map_;
+  std::unique_ptr<GraphProperties> graph_properties_;
+  GraphDef* optimized_graph_;  // Not owned.
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index 6bbc64c7a4..e8a18ff9d9 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -28,6 +28,10 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
+string OptimizedName(const string& name) {
+  return AddPrefixToNodeName(name, kArithmeticOptimizer);
+}
+
 class ArithmeticOptimizerTest : public ::testing::Test {};
 
 TEST_F(ArithmeticOptimizerTest, NoOp) {
@@ -164,13 +168,13 @@ TEST_F(ArithmeticOptimizerTest, MulToSquare) {
   TF_EXPECT_OK(status);
 
   EXPECT_EQ(5, output.node_size());
+  EXPECT_EQ("id", output.node(3).name());
+  EXPECT_EQ(OptimizedName("mul_square"), output.node(3).input(0));
   EXPECT_EQ("Square", output.node(4).op());
-  EXPECT_EQ("mul_square", output.node(4).name());
+  EXPECT_EQ(OptimizedName("mul_square"), output.node(4).name());
   EXPECT_EQ(2, output.node(4).input_size());
   EXPECT_EQ("c", output.node(4).input(0));
   EXPECT_EQ("^d", output.node(4).input(1));
-  EXPECT_EQ("id", output.node(3).name());
-  EXPECT_EQ("mul_square", output.node(3).input(0));
 }
 
 TEST_F(ArithmeticOptimizerTest, SimplifyInvolutionsReal) {
@@ -271,17 +275,17 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) {
 
   EXPECT_EQ(5, output.node_size());
   const NodeDef& new_const = output.node(3);
-  EXPECT_EQ("add_const", new_const.name());
+  EXPECT_EQ(OptimizedName("add_const"), new_const.name());
   EXPECT_EQ("^x", new_const.input(0));
   EXPECT_EQ(std::string("\0\0\0@", 4),
             new_const.attr().at("value").tensor().tensor_content());
   const NodeDef& new_mul = output.node(4);
-  EXPECT_EQ("add_mul", new_mul.name());
-  EXPECT_EQ("add_const", new_mul.input(0));
+  EXPECT_EQ(OptimizedName("add_mul"), new_mul.name());
+  EXPECT_EQ(OptimizedName("add_const"), new_mul.input(0));
   EXPECT_EQ("x", new_mul.input(1));
   const NodeDef& new_id = output.node(2);
   EXPECT_EQ("id", new_id.name());
-  EXPECT_EQ("add_mul", new_id.input(0));
+  EXPECT_EQ(OptimizedName("add_mul"), new_id.input(0));
 }
 
 TEST_F(ArithmeticOptimizerTest, TrivialSumsSimpleWithControlDep) {
@@ -305,18 +309,18 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimpleWithControlDep) {
 
   EXPECT_EQ(6, output.node_size());
   const NodeDef& new_const = output.node(4);
-  EXPECT_EQ("add_const", new_const.name());
+  EXPECT_EQ(OptimizedName("add_const"), new_const.name());
   EXPECT_EQ("^x", new_const.input(0));
   EXPECT_EQ(std::string("\0\0\0@", 4),
             new_const.attr().at("value").tensor().tensor_content());
   const NodeDef& new_mul = output.node(5);
-  EXPECT_EQ("add_mul", new_mul.name());
-  EXPECT_EQ("add_const", new_mul.input(0));
+  EXPECT_EQ(OptimizedName("add_mul"), new_mul.name());
+  EXPECT_EQ(OptimizedName("add_const"), new_mul.input(0));
   EXPECT_EQ("x", new_mul.input(1));
   EXPECT_EQ("^y", new_mul.input(2));
   const NodeDef& new_id = output.node(3);
   EXPECT_EQ("id", new_id.name());
-  EXPECT_EQ("add_mul", new_id.input(0));
+  EXPECT_EQ(OptimizedName("add_mul"), new_id.input(0));
 }
 
 TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) {
@@ -353,38 +357,39 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) {
   // Mul(p,
   //     Add(Add(Const(2), Const(2)),
   //         Add(Const(2), Const(2))))
+  EXPECT_EQ(17, output.node_size());
   for (const auto& node : output.node()) {
     if ("id" == node.name()) {
       EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("Add_6_hoist_mul", node.input(0));
-    } else if ("Add_6_hoist_mul" == node.name()) {
+      EXPECT_EQ(OptimizedName("Add_6_hoist_mul"), node.input(0));
+    } else if (OptimizedName("Add_6_hoist_mul") == node.name()) {
       EXPECT_EQ("Mul", node.op());
       EXPECT_EQ(2, node.input_size());
       EXPECT_EQ("Placeholder", node.input(0));
-      EXPECT_EQ("Add_6_hoist_add", node.input(1));
-    } else if ("Add_6_hoist_add" == node.name()) {
+      EXPECT_EQ(OptimizedName("Add_6_hoist_add"), node.input(1));
+    } else if (OptimizedName("Add_6_hoist_add") == node.name()) {
       EXPECT_EQ("Add", node.op());
       EXPECT_EQ(3, node.input_size());
-      EXPECT_EQ("Add_4_hoist_add", node.input(0));
-      EXPECT_EQ("Add_5_hoist_add", node.input(1));
+      EXPECT_EQ(OptimizedName("Add_4_hoist_add"), node.input(0));
+      EXPECT_EQ(OptimizedName("Add_5_hoist_add"), node.input(1));
       EXPECT_EQ("^Placeholder", node.input(2));
-    } else if ("Add_4_hoist_add" == node.name()) {
+    } else if (OptimizedName("Add_4_hoist_add") == node.name()) {
       EXPECT_EQ("Add", node.op());
       EXPECT_EQ(3, node.input_size());
-      EXPECT_EQ("Add_const", node.input(0));
-      EXPECT_EQ("Add_1_const", node.input(1));
+      EXPECT_EQ(OptimizedName("Add_const"), node.input(0));
+      EXPECT_EQ(OptimizedName("Add_1_const"), node.input(1));
       EXPECT_EQ("^Placeholder", node.input(2));
-    } else if ("Add_5_hoist_add" == node.name()) {
+    } else if (OptimizedName("Add_5_hoist_add") == node.name()) {
       EXPECT_EQ("Add", node.op());
       EXPECT_EQ(3, node.input_size());
-      EXPECT_EQ("Add_const", node.input(0));
-      EXPECT_EQ("Add_1_const", node.input(1));
+      EXPECT_EQ(OptimizedName("Add_const"), node.input(0));
+      EXPECT_EQ(OptimizedName("Add_1_const"), node.input(1));
       EXPECT_EQ("^Placeholder", node.input(2));
-    } else if ("Add_const" == node.name()) {
+    } else if (OptimizedName("Add_const") == node.name()) {
       EXPECT_EQ("Const", node.op());
       EXPECT_EQ(1, node.input_size());
       EXPECT_EQ("^Placeholder", node.input(0));
-    } else if ("Add_1_const" == node.name()) {
+    } else if (OptimizedName("Add_1_const") == node.name()) {
       EXPECT_EQ("Const", node.op());
       EXPECT_EQ(1, node.input_size());
       EXPECT_EQ("^Placeholder", node.input(0));
@@ -416,16 +421,16 @@ TEST_F(ArithmeticOptimizerTest, HoistFactor) {
 
   EXPECT_EQ(9, output.node_size());
   const NodeDef& new_add = output.node(8);
-  EXPECT_EQ("add_hoist_add", new_add.name());
+  EXPECT_EQ(OptimizedName("add_hoist_add"), new_add.name());
   EXPECT_EQ("y1", new_add.input(0));
   EXPECT_EQ("y2", new_add.input(1));
   const NodeDef& new_mul = output.node(7);
-  EXPECT_EQ("add_hoist_mul", new_mul.name());
+  EXPECT_EQ(OptimizedName("add_hoist_mul"), new_mul.name());
   EXPECT_EQ("x", new_mul.input(0));
-  EXPECT_EQ("add_hoist_add", new_mul.input(1));
+  EXPECT_EQ(OptimizedName("add_hoist_add"), new_mul.input(1));
   const NodeDef& new_id = output.node(6);
   EXPECT_EQ("id", new_id.name());
-  EXPECT_EQ("add_hoist_mul", new_id.input(0));
+  EXPECT_EQ(OptimizedName("add_hoist_mul"), new_id.input(0));
 }
 
 TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) {
@@ -449,7 +454,7 @@ TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) {
   TF_EXPECT_OK(status);
 
   EXPECT_EQ(7, output.node_size());
-  EXPECT_EQ("trans_fused", output.node(6).name());
+  EXPECT_EQ(OptimizedName("trans_fused"), output.node(6).name());
   EXPECT_EQ("ConjugateTranspose", output.node(6).op());
   EXPECT_EQ("z", output.node(6).input(0));
   EXPECT_EQ("perm", output.node(6).input(1));
@@ -473,7 +478,7 @@ TEST_F(ArithmeticOptimizerTest, FuseConjAndConjugateTranspose) {
   TF_EXPECT_OK(status);
 
   EXPECT_EQ(7, output.node_size());
-  EXPECT_EQ("conjugate_trans_fused", output.node(6).name());
+  EXPECT_EQ(OptimizedName("conjugate_trans_fused"), output.node(6).name());
   EXPECT_EQ("Transpose", output.node(6).op());
   EXPECT_EQ("z", output.node(6).input(0));
   EXPECT_EQ("perm", output.node(6).input(1));
@@ -500,7 +505,7 @@ TEST_F(ArithmeticOptimizerTest, FuseTransposeAndConj) {
   TF_EXPECT_OK(status);
 
   EXPECT_EQ(7, output.node_size());
-  EXPECT_EQ("conj_fused", output.node(6).name());
+  EXPECT_EQ(OptimizedName("conj_fused"), output.node(6).name());
   EXPECT_EQ("ConjugateTranspose", output.node(6).op());
   EXPECT_EQ("z", output.node(6).input(0));
   EXPECT_EQ("perm", output.node(6).input(1));
@@ -536,7 +541,7 @@ TEST_F(ArithmeticOptimizerTest, FoldTransposeIntoMatMul) {
     TF_EXPECT_OK(status);
 
     EXPECT_EQ(7, output.node_size());
-    EXPECT_EQ("matmul_fused", output.node(6).name());
+    EXPECT_EQ(OptimizedName("matmul_fused"), output.node(6).name());
     EXPECT_EQ("a", output.node(6).input(0));
     EXPECT_EQ("b", output.node(6).input(1));
     if (matmul_type == "BatchMatMul") {
@@ -574,7 +579,7 @@ TEST_F(ArithmeticOptimizerTest, FoldConjugateTransposeIntoBatchMatMul) {
   TF_EXPECT_OK(status);
 
   EXPECT_EQ(11, output.node_size());
-  EXPECT_EQ("matmul_fused", output.node(10).name());
+  EXPECT_EQ(OptimizedName("matmul_fused"), output.node(10).name());
   EXPECT_EQ("a", output.node(10).input(0));
   EXPECT_EQ("b", output.node(10).input(1));
   EXPECT_TRUE(output.node(10).attr().at("adj_x").b());
@@ -1020,10 +1025,11 @@ TEST_F(ArithmeticOptimizerTest, OptimizeCastMulTransposeConv) {
   NodeMap node_map(&output);
   const NodeDef* inputs_node = CHECK_NOTNULL(node_map.GetNode("Placeholder"));
   const NodeDef* transpose_node =
-      CHECK_NOTNULL(node_map.GetNode("Transpose_uint8"));
-  const NodeDef* cast_node = CHECK_NOTNULL(node_map.GetNode("Cast_new"));
+      CHECK_NOTNULL(node_map.GetNode(OptimizedName("Transpose_uint8")));
+  const NodeDef* cast_node =
+      CHECK_NOTNULL(node_map.GetNode(OptimizedName("Cast_new")));
   const NodeDef* weights_node =
-      CHECK_NOTNULL(node_map.GetNode("weights_scaled_Conv2D"));
+      CHECK_NOTNULL(node_map.GetNode(OptimizedName("weights_scaled_Conv2D")));
   const NodeDef* conv_node = CHECK_NOTNULL(node_map.GetNode("Conv2D"));
 
   EXPECT_EQ(output.node_size(), 7);
@@ -1067,11 +1073,11 @@ TEST_F(ArithmeticOptimizerTest, OptimizeMultipleMulTransposeConv) {
 
   NodeMap node_map(&output);
   const NodeDef* weights_node =
-      CHECK_NOTNULL(node_map.GetNode("weights_scaled_Conv2D"));
+      CHECK_NOTNULL(node_map.GetNode(OptimizedName("weights_scaled_Conv2D")));
   const NodeDef* conv_node = CHECK_NOTNULL(node_map.GetNode("Conv2D"));
 
   const NodeDef* weights_node_1 =
-      CHECK_NOTNULL(node_map.GetNode("weights_scaled_Conv2D_1"));
+      CHECK_NOTNULL(node_map.GetNode(OptimizedName("weights_scaled_Conv2D_1")));
   const NodeDef* conv_node_1 = CHECK_NOTNULL(node_map.GetNode("Conv2D_1"));
   EXPECT_EQ(conv_node->input(1), weights_node->name());
   EXPECT_EQ(conv_node_1->input(1), weights_node_1->name());
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 7a9db9bebb..bd8a58d814 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -107,7 +107,7 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
 }
 
 string DependencyOptimizer::TryOptimizeDependencies(
-    NodeDef* node, GraphDef* graph, std::vector<NodeDef*>* new_nodes) {
+    NodeDef* node, SetVector<NodeDef*>* nodes_to_simplify) {
   // Change ops that only have control dependencies as outputs to NoOps.
   if (node->op() != "NoOp" && SafeToConvertToNoOp(*node)) {
     VLOG(1) << "***** Replacing  " << node->name() << " (" << node->op()
@@ -129,18 +129,18 @@ string DependencyOptimizer::TryOptimizeDependencies(
         continue;
       }
       const string ctrl_input = ConstantFolding::AddControlDependency(
-          old_input, graph, node_map_.get());
+          old_input, optimized_graph_, node_map_.get());
       if (ctrl_inputs.insert(ctrl_input).second) {
         node->set_input(pos, ctrl_input);
         node_map_->UpdateInput(node->name(), old_input, ctrl_input);
         auto old_input_node = node_map_->GetNode(old_input);
-        new_nodes->push_back(old_input_node);
+        nodes_to_simplify->PushBack(old_input_node);
       }
       ++pos;
     }
     node->set_op("NoOp");
     node->clear_attr();
-    new_nodes->push_back(node);
+    nodes_to_simplify->PushBack(node);
     return "";
   }
 
@@ -186,7 +186,7 @@ string DependencyOptimizer::TryOptimizeDependencies(
           consumer->add_input(input);
           updated_consumer = true;
           node_map_->AddOutput(NodeName(input), consumer->name());
-          new_nodes->push_back(input_nodes[i]);
+          nodes_to_simplify->PushBack(input_nodes[i]);
         }
       }
       // Remove dependency on node from consumer.
@@ -195,11 +195,11 @@ string DependencyOptimizer::TryOptimizeDependencies(
       if (updated_consumer) {
         VLOG(1) << "***** Updated consumer  " << consumer->name() << " ("
                 << consumer->op() << ")";
-        new_nodes->push_back(consumer);
+        nodes_to_simplify->PushBack(consumer);
       }
     }
 
-    // Clear all control inputs to node.
+    // Clear all (control) inputs to this NoOp node.
     if (fetch_nodes_known_) {
       node_map_->RemoveInputs(node->name());
       node->clear_input();
@@ -209,12 +209,12 @@ string DependencyOptimizer::TryOptimizeDependencies(
   return "";
 }
 
-Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) {
-  // TODO(rmlarsen,bsteiner): The folloing code is similar to the control loop
+Status DependencyOptimizer::OptimizeDependencies() {
+  // TODO(rmlarsen,bsteiner): The following code is similar to the control loop
   // in the ArithmeticOptimizer. Dedup this.
   SetVector<NodeDef*> nodes_to_simplify;
-  for (int i = 0; i < optimized_graph->node_size(); ++i) {
-    NodeDef* node = optimized_graph->mutable_node(i);
+  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
+    NodeDef* node = optimized_graph_->mutable_node(i);
     if (node->op() == "NoOp" || SafeToConvertToNoOp(*node)) {
       PruneControlInputs(node);
       nodes_to_simplify.PushBack(node);
@@ -222,13 +222,10 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) {
   }
   while (!nodes_to_simplify.Empty()) {
     NodeDef* node = nodes_to_simplify.PopBack();
-    std::vector<NodeDef*> new_nodes;
     const string simplified_tensor =
-        TryOptimizeDependencies(node, optimized_graph, &new_nodes);
-    if (simplified_tensor.empty()) {
-      continue;
-    }
-    if (NodeName(simplified_tensor) != node->name()) {
+        TryOptimizeDependencies(node, &nodes_to_simplify);
+    if (!simplified_tensor.empty() &&
+        NodeName(simplified_tensor) != node->name()) {
       // Always consider simplified_tensor for further optimizations.
       NodeDef* simplified_node = node_map_->GetNode(simplified_tensor);
       if (simplified_node != nullptr) {
@@ -257,12 +254,9 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) {
         nodes_to_simplify.PushBack(consumer);
       }
     }
-    for (auto new_node : new_nodes) {
-      nodes_to_simplify.PushBack(new_node);
-    }
   }
-  for (int i = 0; i < optimized_graph->node_size(); ++i) {
-    NodeDef* node = optimized_graph->mutable_node(i);
+  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
+    NodeDef* node = optimized_graph_->mutable_node(i);
     PruneControlInputs(node);
   }
   return Status::OK();
@@ -270,13 +264,14 @@ Status DependencyOptimizer::OptimizeDependencies(GraphDef* optimized_graph) {
 
 Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                      GraphDef* optimized_graph) {
-  *optimized_graph = item.graph;
+  optimized_graph_ = optimized_graph;
+  *optimized_graph_ = item.graph;
   nodes_to_preserve_ = item.NodesToPreserve();
   node_map_.reset(new NodeMap(optimized_graph));
   fetch_nodes_known_ = !item.fetch.empty();
-  VLOG(1) << "Graph before optimization:\n" << optimized_graph->DebugString();
-  TF_RETURN_IF_ERROR(OptimizeDependencies(optimized_graph));
-  VLOG(1) << "Graph after optimization:\n" << optimized_graph->DebugString();
+  VLOG(1) << "Graph before optimization:\n" << optimized_graph_->DebugString();
+  TF_RETURN_IF_ERROR(OptimizeDependencies());
+  VLOG(1) << "Graph after optimization:\n" << optimized_graph_->DebugString();
 
   return Status::OK();
 }
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.h b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
index cab9383b94..a9d3322744 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
@@ -43,23 +43,26 @@ class DependencyOptimizer : public GraphOptimizer {
                 const GraphDef& optimized_graph, double result) override;
 
  private:
+  Status OptimizeDependencies();
+
   // Returns true if it is safe to convert node to NoOp.
   bool SafeToConvertToNoOp(const NodeDef& node);
 
-  Status OptimizeDependencies(GraphDef* optimized_graph);
   // Tries to simplify the expression that roots at `node` and replaces the uses
   // of `node` to the simplified expression. Returns the name of the simplified
   // tensor (e.g. "split:1") or an empty string if no simplification is
   // performed.
-  string TryOptimizeDependencies(NodeDef* node, GraphDef* graph,
-                                 std::vector<NodeDef*>* new_nodes);
+  string TryOptimizeDependencies(NodeDef* node,
+                                 SetVector<NodeDef*>* nodes_to_simplify);
 
   bool HasOnlyControlOutputs(const NodeDef* node);
 
-  bool fetch_nodes_known_;
   RewriterConfig::Toggle opt_level_;
+
+  bool fetch_nodes_known_;
   std::unordered_set<string> nodes_to_preserve_;
   std::unique_ptr<NodeMap> node_map_;
+  GraphDef* optimized_graph_;  // Not owned.
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 9ab889beb5..07cf2cfc05 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -28,22 +28,29 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-NodeMap::NodeMap(GraphDef* graph) : graph_(graph) {
-  for (int i = 0; i < graph_->node_size(); i++) {
-    auto node = graph_->mutable_node(i);
-    auto rslt = nodes_.insert(std::make_pair(node->name(), node));
+NodeMap::NodeMap(GraphDef* graph) {
+  CHECK(graph != nullptr);
+  for (int i = 0; i < graph->node_size(); i++) {
+    NodeDef* node = graph->mutable_node(i);
+    const string& node_name = node->name();
+    auto rslt = nodes_.emplace(node_name, node);
     // Check that the graph doesn't contain multiple nodes with the same name.
     if (!rslt.second) {
-      LOG(WARNING) << "Duplicated node in the graph: " << node->name();
+      LOG(WARNING) << "Duplicated node in the graph: " << node_name;
     }
     for (const auto& input : node->input()) {
-      outputs_[NodeName(input)].insert(nodes_[node->name()]);
+      outputs_[NodeName(input)].insert(nodes_[node_name]);
     }
   }
 }
 
+void NodeMap::RemoveNode(const string& name) {
+  nodes_.erase(NodeName(name));
+  outputs_.erase(NodeName(name));
+}
+
 NodeDef* NodeMap::GetNode(const string& name) const {
-  string node_name = NodeName(name);
+  const string node_name = NodeName(name);
   auto it = nodes_.find(node_name);
   if (it == nodes_.end()) {
     return nullptr;
@@ -51,6 +58,11 @@ NodeDef* NodeMap::GetNode(const string& name) const {
   return it->second;
 }
 
+bool NodeMap::NodeExists(const string& name) const {
+  const string node_name = NodeName(name);
+  return nodes_.find(node_name) != nodes_.end();
+}
+
 const std::set<NodeDef*>& NodeMap::GetOutputs(const string& node_name) const {
   auto it = outputs_.find(node_name);
   if (it == outputs_.end()) {
@@ -59,27 +71,27 @@ const std::set<NodeDef*>& NodeMap::GetOutputs(const string& node_name) const {
   return it->second;
 }
 
-void NodeMap::AddNode(const string& name, NodeDef* node) {
-  auto ret = nodes_.insert(std::make_pair(name, node));
-  CHECK(ret.second) << "Pair (" << name << "," << node
+void NodeMap::AddNode(const string& node_name, NodeDef* node) {
+  auto ret = nodes_.emplace(node_name, CHECK_NOTNULL(node));
+  CHECK(ret.second) << "Pair (" << node_name << "," << node
                     << ") is not inserted because the same key already exists.";
 }
 
 void NodeMap::AddOutput(const string& node_name, const string& output_name) {
-  auto output_node = nodes_[output_name];
+  auto output_node = nodes_[NodeName(output_name)];
   CHECK(output_node) << "Output node " << output_name
                      << " is missing in NodeMap.";
   outputs_[node_name].insert(output_node);
 }
 
 void NodeMap::RemoveOutput(const string& node_name, const string& output_name) {
-  outputs_[node_name].erase(nodes_[output_name]);
+  outputs_[node_name].erase(nodes_[NodeName(output_name)]);
 }
 
 void NodeMap::UpdateInput(const string& node_name, const string& old_input_name,
                           const string& new_input_name) {
-  RemoveOutput(old_input_name, node_name);
-  AddOutput(new_input_name, node_name);
+  RemoveOutput(NodeName(old_input_name), node_name);
+  AddOutput(NodeName(new_input_name), node_name);
 }
 
 void NodeMap::RemoveInputs(const string& node_name) {
@@ -97,14 +109,14 @@ void NodeMap::UpdateOutput(const string& node_name,
                            const string& old_output_name,
                            const string& new_output_name) {
   std::set<NodeDef*>& outputs = outputs_[node_name];
-  outputs.erase(nodes_[old_output_name]);
-  outputs.insert(nodes_[new_output_name]);
+  outputs.erase(nodes_[NodeName(old_output_name)]);
+  outputs.insert(nodes_[NodeName(new_output_name)]);
 }
 
 OutputMap::OutputMap(GraphDef* graph) : graph_(graph) {
   for (int i = 0; i < graph_->node_size(); i++) {
     auto node = graph_->mutable_node(i);
-    auto rslt = nodes_.insert(std::make_pair(node->name(), node));
+    auto rslt = nodes_.emplace(node->name(), node);
     // Check that the graph doesn't contain multiple nodes with the same name.
     CHECK(rslt.second);
     for (const auto& input : node->input()) {
@@ -250,8 +262,8 @@ int NumOutputs(const NodeDef& node) {
 
 int NumNonControlInputs(const NodeDef& node) {
   int num_inputs = node.input_size();
-  for (int i = 0; i < node.input_size(); ++i) {
-    if (IsControlInput(node.input(i))) {
+  for (const string& input : node.input()) {
+    if (IsControlInput(input)) {
       --num_inputs;
     }
   }
@@ -261,8 +273,11 @@ int NumNonControlInputs(const NodeDef& node) {
 int NumNonControlOutputs(const NodeDef& node, const NodeMap& node_map) {
   int num_outputs = 0;
   for (const NodeDef* output : node_map.GetOutputs(node.name())) {
-    for (const string& input : output->input()) {
-      if (input == node.name()) {
+    for (const string& node_as_input : output->input()) {
+      if (IsControlInput(node_as_input)) {
+        break;
+      }
+      if (NodeName(node_as_input) == node.name()) {
         ++num_outputs;
       }
     }
@@ -288,13 +303,16 @@ NodeDef* GetTailOfChain(const NodeDef& source, const NodeMap& node_map,
                         const std::function<bool(const NodeDef&)>& pred_fn) {
   const NodeDef* current = &source;
   const NodeDef* next = current;
-  while (next == &source || pred_fn(*next)) {
+  while (next == &source || (next != nullptr && pred_fn(*next))) {
     current = next;
     if (current->input_size() == 0 ||
         (!follow_control_input && IsControlInput(current->input(0)))) {
       break;
     }
     next = node_map.GetNode(current->input(0));
+    if (next == nullptr) {
+      LOG(ERROR) << "Node not found: " << current->input(0);
+    }
   }
   return const_cast<NodeDef*>(current);
 }
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index b98b8656e2..411e44d487 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -33,12 +33,16 @@ namespace grappler {
 // A utility class to lookup a node and its outputs by node name.
 class NodeMap {
  public:
+  // Note: The NodeMap will store pointers to nodes in graph, which may become
+  // invalid if graph is changed.
   explicit NodeMap(GraphDef* graph);
   NodeDef* GetNode(const string& name) const;
+  bool NodeExists(const string& name) const;
   const std::set<NodeDef*>& GetOutputs(const string& node_name) const;
   // This method doesn't record the outputs of the added node; the outputs need
   // to be explicitly added by the AddOutput method.
   void AddNode(const string& name, NodeDef* node);
+  void RemoveNode(const string& name);
   void UpdateInput(const string& node_name, const string& old_input_name,
                    const string& new_input_name);
   void AddOutput(const string& node_name, const string& output_name);
@@ -49,8 +53,7 @@ class NodeMap {
                     const string& new_output_name);
 
  private:
-  GraphDef* graph_;
-  std::set<NodeDef*> empty_set_;
+  const std::set<NodeDef*> empty_set_;
   std::unordered_map<string, NodeDef*> nodes_;
   std::unordered_map<string, std::set<NodeDef*>> outputs_;
 };
-- 
GitLab


From 148f157bb89d33db123c9519e94c2781ca3488c9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 17:41:24 -0800
Subject: [PATCH 0326/1225] Fix docs to recommend cuDNN 6.0, rather than the
 old 5.1 or non-existent 6.1.

Also see #14805

PiperOrigin-RevId: 177097162
---
 tensorflow/docs_src/install/install_sources.md | 2 +-
 tensorflow/docs_src/install/install_windows.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index dbc90e8112..c01aa907a3 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -138,7 +138,7 @@ The following NVIDIA <i>software</i> must be installed on your system:
     `LD_LIBRARY_PATH` environment variable as described in the
     NVIDIA documentation.
   * The NVIDIA drivers associated with NVIDIA's Cuda Toolkit.
-  * cuDNN (>= v3). We recommend version 5.1. For details, see
+  * cuDNN (>= v3). We recommend version 6.0. For details, see
     [NVIDIA's documentation](https://developer.nvidia.com/cudnn),
     particularly the description of appending the appropriate pathname
     to your `LD_LIBRARY_PATH` environment variable.
diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md
index 4098ee5b2e..63742828b0 100644
--- a/tensorflow/docs_src/install/install_windows.md
+++ b/tensorflow/docs_src/install/install_windows.md
@@ -36,7 +36,7 @@ installed on your system:
     Ensure that you append the relevant Cuda pathnames to the `%PATH%`
     environment variable as described in the NVIDIA documentation.
   * The NVIDIA drivers associated with CUDA Toolkit 8.0.
-  * cuDNN v6.1. For details, see
+  * cuDNN v6.0. For details, see
     [NVIDIA's documentation](https://developer.nvidia.com/cudnn).
     Note that cuDNN is typically installed in a different location from the
     other CUDA DLLs. Ensure that you add the directory where you installed
-- 
GitLab


From 446f8fd6e93199838c087b6320cbb9aa7834fd53 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 18:26:14 -0800
Subject: [PATCH 0327/1225] Fixed code for Adadelta to match correct algorithm
 and tightened tolerances in test to catch this problem in the future.  The
 previous code was incorrect because it did not account for the lazy
 evaluation caused by "update" being declared as "const auto" (and thus using
 the current value of "accum_update_" at each point where it was used in the
 code).

PiperOrigin-RevId: 177101858
---
 tensorflow/core/kernels/training_ops.cc        |  6 +++---
 tensorflow/core/kernels/training_ops_gpu.cu.cc |  2 +-
 tensorflow/python/training/adadelta_test.py    | 11 +++++------
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc
index 76c30c5a46..b8d601389b 100644
--- a/tensorflow/core/kernels/training_ops.cc
+++ b/tensorflow/core/kernels/training_ops.cc
@@ -76,9 +76,9 @@ struct ApplyAdadelta<CPUDevice, T> {
         accum * rho() + grad.square() * (static_cast<T>(1) - rho());
     const auto update =
         (accum_update + epsilon()).sqrt() * (accum + epsilon()).rsqrt() * grad;
+    var.device(d) -= update * lr();
     accum_update.device(d) =
         accum_update * rho() + update.square() * (static_cast<T>(1) - rho());
-    var.device(d) -= update * lr();
   }
 };
 
@@ -784,11 +784,11 @@ class SparseApplyAdadeltaOp : public OpKernel {
         const auto update =
             (accum_update_ + accum_update_.constant(epsilon_scalar)).sqrt() *
             (accum_ + accum_.constant(epsilon_scalar)).rsqrt() * grad_;
+        auto v = var_flat.template chip<0>(index);
+        v -= update * update.constant(lr_scalar);
         accum_update_ =
             accum_update_ * accum_update_.constant(rho_scalar) +
             update.square() * update.constant(static_cast<T>(1) - rho_scalar);
-        auto v = var_flat.template chip<0>(index);
-        v -= update * update.constant(lr_scalar);
       }
     }
     if (use_exclusive_lock_) {
diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc
index f501161095..d443a6b3c1 100644
--- a/tensorflow/core/kernels/training_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc
@@ -70,11 +70,11 @@ struct ApplyAdadelta<GPUDevice, T> {
     const auto update =
         (accum_update + epsilon.reshape(single).broadcast(bcast)).sqrt() *
         (accum + epsilon.reshape(single).broadcast(bcast)).rsqrt() * grad;
+    var.device(d) -= update * lr.reshape(single).broadcast(bcast);
     accum_update.device(d) =
         accum_update * rho.reshape(single).broadcast(bcast) +
         update.square() *
             (grad.constant(T(1)) - rho.reshape(single).broadcast(bcast));
-    var.device(d) -= update * lr.reshape(single).broadcast(bcast);
   }
 };
 
diff --git a/tensorflow/python/training/adadelta_test.py b/tensorflow/python/training/adadelta_test.py
index de59768d0b..50f435236b 100644
--- a/tensorflow/python/training/adadelta_test.py
+++ b/tensorflow/python/training/adadelta_test.py
@@ -112,17 +112,16 @@ class AdadeltaOptimizerTest(test.TestCase):
               # Check that the accumulators have been updated
               for slot_idx in range(2):
                 self.assertAllCloseAccordingToType(
-                    np.array(
-                        [accum, accum], dtype=dtype.as_numpy_dtype()),
+                    np.array([accum, accum], dtype=dtype.as_numpy_dtype()),
                     slot[slot_idx].eval(),
-                    rtol=1e-3)
+                    rtol=1e-5)
 
                 self.assertAllCloseAccordingToType(
                     np.array(
                         [accum_update, accum_update],
                         dtype=dtype.as_numpy_dtype()),
                     slot_update[slot_idx].eval(),
-                    rtol=1e-3)
+                    rtol=1e-5)
 
               # Check that the parameters have been updated
               self.assertAllCloseAccordingToType(
@@ -130,14 +129,14 @@ class AdadeltaOptimizerTest(test.TestCase):
                       [var0_init[0] - tot_update, var0_init[1] - tot_update],
                       dtype=dtype.as_numpy_dtype()),
                   var0.eval(),
-                  rtol=1e-3)
+                  rtol=1e-5)
 
               self.assertAllCloseAccordingToType(
                   np.array(
                       [var1_init[0] - tot_update, var1_init[1] - tot_update],
                       dtype=dtype.as_numpy_dtype()),
                   var1.eval(),
-                  rtol=1e-3)
+                  rtol=1e-5)
 
   def testBasic(self):
     self.doTestBasic(use_resource=False)
-- 
GitLab


From f4a33d1c142475da42ad9812c5f0cab7704cb275 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 18:54:57 -0800
Subject: [PATCH 0328/1225] Remove old pre-C++11 non-portable helper code.

With C++11, vector::data is guaranteed to return a valid pointer p regardless of the range size such that [p, p + size) is a valid range, and basic_string always contains a null terminator.

Current consuming code never checks for the nullness (which already isn't guaranteed for vector by the current implementation) and passes the size separately to the eventual consumer.

PiperOrigin-RevId: 177104032
---
 tensorflow/core/lib/gtl/stl_util.h | 49 ++++++++----------------------
 1 file changed, 12 insertions(+), 37 deletions(-)

diff --git a/tensorflow/core/lib/gtl/stl_util.h b/tensorflow/core/lib/gtl/stl_util.h
index cda72a579d..ffeca4e88a 100644
--- a/tensorflow/core/lib/gtl/stl_util.h
+++ b/tensorflow/core/lib/gtl/stl_util.h
@@ -29,48 +29,23 @@ limitations under the License.
 namespace tensorflow {
 namespace gtl {
 
-// Returns a mutable char* pointing to a string's internal buffer, which may not
-// be null-terminated. Returns NULL for an empty string. If not non-null,
-// writing through this pointer will modify the string.
-//
-// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
-// next call to a string method that invalidates iterators.
-//
-// In C++11 you may simply use &str[0] to get a mutable char*.
-//
-// Prior to C++11, there was no standard-blessed way of getting a mutable
-// reference to a string's internal buffer. The requirement that string be
-// contiguous is officially part of the C++11 standard [string.require]/5.
-// According to Matt Austern, this should already work on all current C++98
-// implementations.
-inline char* string_as_array(string* str) {
-  return str->empty() ? NULL : &*str->begin();
-}
-
-// Returns the T* array for the given vector, or NULL if the vector was empty.
-//
-// Note: If you know the array will never be empty, you can use &*v.begin()
-// directly, but that is may dump core if v is empty. This function is the most
-// efficient code that will work, taking into account how our STL is actually
-// implemented. THIS IS NON-PORTABLE CODE, so use this function instead of
-// repeating the nonportable code everywhere. If our STL implementation changes,
-// we will need to change this as well.
+// Returns a char* pointing to the beginning of a string's internal buffer.
+// The result is a valid "null-terminated byte string", even if *str is empty.
+// Up to C++14 it is not valid to *write* to the null terminator; as of C++17,
+// it is valid to write zero to the null terminator (but not any other value).
+inline char* string_as_array(string* str) { return &*str->begin(); }
+
+// The following vector_as_array functions return raw pointers to the underlying
+// data buffer. The return value is unspecified (but valid) if the input range
+// is empty.
 template <typename T, typename Allocator>
 inline T* vector_as_array(std::vector<T, Allocator>* v) {
-#if defined NDEBUG && !defined _GLIBCXX_DEBUG
-  return &*v->begin();
-#else
-  return v->empty() ? NULL : &*v->begin();
-#endif
+  return v->data();
 }
-// vector_as_array overload for const std::vector<>.
+
 template <typename T, typename Allocator>
 inline const T* vector_as_array(const std::vector<T, Allocator>* v) {
-#if defined NDEBUG && !defined _GLIBCXX_DEBUG
-  return &*v->begin();
-#else
-  return v->empty() ? NULL : &*v->begin();
-#endif
+  return v->data();
 }
 
 // Like str->resize(new_size), except any new characters added to "*str" as a
-- 
GitLab


From 28ee7877a9df4e66f81d31d48b067091726c71ab Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 19:10:43 -0800
Subject: [PATCH 0329/1225] Disable Mul hoisting out of AddN where broadcasting
 might be needed.

PiperOrigin-RevId: 177105161
---
 tensorflow/core/grappler/op_types.cc                        | 4 +++-
 tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 83188ffc0d..15fcaa857e 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -24,7 +24,9 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-bool IsAdd(const NodeDef& node) { return node.op() == "Add"; }
+bool IsAdd(const NodeDef& node) {
+  return node.op() == "Add" || node.op() == "AddV2";
+}
 
 bool IsAddN(const NodeDef& node) { return node.op() == "AddN"; }
 
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index eaf5f1f5cf..1e39c610a4 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -868,7 +868,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   // multiplication over addition to hoist common factors out of aggregate nodes
   // where all the inputs are Mul nodes. This pattern occurs frequently in
   // regularization terms for the gradients during training.
-  if (IsAggregate(*node) && NumNonControlInputs(*node) > 1 &&
+  // TODO(rmlarsen): Check shapes and enable for AddN.
+  if (IsAdd(*node) && NumNonControlInputs(*node) > 1 &&
       !OptimizedNodeExists(StrCat(node->name(), "_hoist_add"))) {
     // Determine the set of common factors if the input nodes are all Mul nodes.
     std::set<string> common_factors;
-- 
GitLab


From e02be0161393d255d8e1ade54c2a885b9695beab Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Mon, 27 Nov 2017 19:40:45 -0800
Subject: [PATCH 0330/1225] Fix the flaky file_block_cache_test.

PiperOrigin-RevId: 177106823
---
 tensorflow/core/platform/cloud/file_block_cache_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/platform/cloud/file_block_cache_test.cc b/tensorflow/core/platform/cloud/file_block_cache_test.cc
index 2a9eb7d524..081b32af64 100644
--- a/tensorflow/core/platform/cloud/file_block_cache_test.cc
+++ b/tensorflow/core/platform/cloud/file_block_cache_test.cc
@@ -461,7 +461,7 @@ TEST(FileBlockCacheTest, CoalesceConcurrentReads) {
         TF_EXPECT_OK(cache.Read("", 0, block_size / 2, &out));
         EXPECT_EQ(out.size(), block_size / 2);
       }));
-  EXPECT_TRUE(WaitForNotificationWithTimeout(&notification, 1000))
+  EXPECT_TRUE(WaitForNotificationWithTimeout(&notification, 10000))
       << "Timeout waiting for concurrent thread to start.";
   std::vector<char> out;
   TF_EXPECT_OK(cache.Read("", block_size / 2, block_size / 2, &out));
-- 
GitLab


From 587dbb404318039c37d7587b6ac5d044504d0ad1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 19:48:53 -0800
Subject: [PATCH 0331/1225] [XLA] Remove the extra RunHloOptimization in
 InterpreterCompiler::RunBackend.

PiperOrigin-RevId: 177107209
---
 tensorflow/compiler/xla/service/interpreter/compiler.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index c9a5285a4f..dc63a2224d 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -83,8 +83,6 @@ StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend(
 
   VLOG(1) << "Run backend " << hlo_module->name();
 
-  TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get()));
-
   // Typically you would visit the HLO graph, building up a compiled equivalent
   // In this case we are using an HloEvaluator at execution time, so we don't
   // need to compile anything
-- 
GitLab


From a8a923b3be645bad6cd08c7d80a148ebbaf47445 Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Mon, 27 Nov 2017 20:06:25 -0800
Subject: [PATCH 0332/1225] Add non_trainable_variables property to
 EagerVariableStore.

PiperOrigin-RevId: 177108237
---
 tensorflow/python/kernel_tests/variable_scope_test.py | 2 ++
 tensorflow/python/ops/variable_scope.py               | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 5396214956..70fe0a4785 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -128,6 +128,8 @@ class VariableScopeTest(test.TestCase):
       self.assertTrue(w in store.variables())
       self.assertTrue(v in store.trainable_variables())
       self.assertFalse(w in store.trainable_variables())
+      self.assertFalse(v in store.non_trainable_variables())
+      self.assertTrue(w in store.non_trainable_variables())
 
   @test_util.run_in_graph_and_eager_modes()
   def testInitFromNonTensorValue(self):
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index dd435249f4..ae2d46a2b7 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -1233,6 +1233,12 @@ class EagerVariableStore(object):
                   key=lambda x: x.name)
     # pylint: enable=protected-access
 
+  def non_trainable_variables(self):
+    # pylint: disable=protected-access
+    return sorted([x for x in self._store._vars.values() if not x._trainable],
+                  key=lambda x: x.name)
+    # pylint: enable=protected-access
+
 
 def get_variable(name,
                  shape=None,
-- 
GitLab


From 119e3a18ce480b7f808638a2821de1d935f2df8f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 20:28:58 -0800
Subject: [PATCH 0333/1225] Make ClientLibraryTestBase automatic choose float
 precision based on a flag.

PiperOrigin-RevId: 177109696
---
 tensorflow/compiler/xla/reference_util.cc     | 133 ----------------
 tensorflow/compiler/xla/reference_util.h      | 146 ++++++++++++++++--
 .../xla/tests/client_library_test_base.cc     |  87 ++++++++---
 .../xla/tests/client_library_test_base.h      |  49 +++++-
 .../compiler/xla/tests/literal_test_util.cc   |  32 ++++
 .../compiler/xla/tests/literal_test_util.h    |   6 +
 6 files changed, 289 insertions(+), 164 deletions(-)

diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc
index 90aa9720a1..5a899d550b 100644
--- a/tensorflow/compiler/xla/reference_util.cc
+++ b/tensorflow/compiler/xla/reference_util.cc
@@ -703,137 +703,4 @@ ReferenceUtil::ReduceToRowArray2D(
   return result;
 }
 
-/* static */ std::unique_ptr<Array2D<float>> ReferenceUtil::PadArray2D(
-    const Array2D<float>& operand, const PaddingConfig& padding,
-    const float pad) {
-  int64 in0 = operand.n1();
-  int64 high_padding0 = padding.dimensions(0).edge_padding_high();
-  int64 low_padding0 = padding.dimensions(0).edge_padding_low();
-  int64 interior_padding0 = padding.dimensions(0).interior_padding();
-  int64 out0 =
-      in0 + low_padding0 + high_padding0 + (in0 - 1) * interior_padding0;
-
-  int64 in1 = operand.n2();
-  int64 high_padding1 = padding.dimensions(1).edge_padding_high();
-  int64 low_padding1 = padding.dimensions(1).edge_padding_low();
-  int64 interior_padding1 = padding.dimensions(1).interior_padding();
-  int64 out1 =
-      in1 + low_padding1 + high_padding1 + (in1 - 1) * interior_padding1;
-
-  auto result = MakeUnique<Array2D<float>>(out0, out1);
-  result->Fill(pad);
-  int64 o0 = low_padding0;
-  for (int64 i0 = 0; i0 < in0; ++i0) {
-    int64 o1 = low_padding1;
-    for (int64 i1 = 0; i1 < in1; ++i1) {
-      if (o0 >= 0 && o1 >= 0 && o0 < out0 && o1 < out1) {
-        (*result)(o0, o1) = operand(i0, i1);
-      }
-      o1 += interior_padding1 + 1;
-    }
-    o0 += interior_padding0 + 1;
-  }
-  return result;
-}
-
-/* static */ Array3D<float> ReferenceUtil::PadArray3D(
-    const Array3D<float>& operand, const PaddingConfig& padding,
-    const float pad) {
-  CHECK_EQ(padding.dimensions_size(), 3);
-
-  const std::vector<int64> input_bounds = {operand.n1(), operand.n2(),
-                                           operand.n3()};
-  std::vector<int64> pad_low(3);
-  std::vector<int64> pad_high(3);
-  std::vector<int64> pad_interior(3);
-  std::vector<int64> output_bounds(3);
-  for (int64 i = 0; i < 3; ++i) {
-    pad_low[i] = padding.dimensions(i).edge_padding_low();
-    pad_high[i] = padding.dimensions(i).edge_padding_high();
-    CHECK_LE(0, pad_low[i]);
-    CHECK_LE(0, pad_high[i]);
-    CHECK_LE(0, padding.dimensions(i).interior_padding()) << "not implemented";
-    pad_interior[i] = padding.dimensions(i).interior_padding();
-
-    output_bounds[i] = pad_low[i] + input_bounds[i] + pad_high[i] +
-                       (input_bounds[i] - 1) * pad_interior[i];
-  }
-
-  Array3D<float> result(output_bounds[0], output_bounds[1], output_bounds[2]);
-  std::vector<int> indices = {0, 0, 0};
-  for (indices[0] = 0; indices[0] < output_bounds[0]; ++indices[0]) {
-    for (indices[1] = 0; indices[1] < output_bounds[1]; ++indices[1]) {
-      for (indices[2] = 0; indices[2] < output_bounds[2]; ++indices[2]) {
-        float* value = &result(indices[0], indices[1], indices[2]);
-        bool value_padded = false;
-        for (int i = 0; i < 3; ++i) {
-          bool in_low_padding = indices[i] < pad_low[i];
-          bool in_high_padding = indices[i] >= output_bounds[i] - pad_high[i];
-          if (in_low_padding || in_high_padding) {
-            *value = pad;
-            value_padded = true;
-          }
-          if (pad_interior[i] &&
-              (indices[i] - pad_low[i]) % (pad_interior[i] + 1)) {
-            *value = pad;
-            value_padded = true;
-          }
-        }
-        if (value_padded) {
-          continue;
-        }
-        *value = operand((indices[0] - pad_low[0]) / (pad_interior[0] + 1),
-                         (indices[1] - pad_low[1]) / (pad_interior[1] + 1),
-                         (indices[2] - pad_low[2]) / (pad_interior[2] + 1));
-      }
-    }
-  }
-  return result;
-}
-
-/* static */ Array4D<float> ReferenceUtil::PadArray4D(
-    const Array4D<float>& operand, const PaddingConfig& padding,
-    const float pad) {
-  CHECK_EQ(padding.dimensions_size(), 4);
-
-  const std::vector<int64> input_bounds = {operand.n1(), operand.n2(),
-                                           operand.n3(), operand.n4()};
-  std::vector<int64> pad_low(4);
-  std::vector<int64> pad_high(4);
-  std::vector<int64> pad_interior(4);
-  std::vector<int64> output_bounds(4);
-  for (int64 i = 0; i < 4; ++i) {
-    pad_low[i] = padding.dimensions(i).edge_padding_low();
-    pad_high[i] = padding.dimensions(i).edge_padding_high();
-    CHECK_LE(0, padding.dimensions(i).interior_padding()) << "not implemented";
-    pad_interior[i] = padding.dimensions(i).interior_padding();
-
-    output_bounds[i] = pad_low[i] + input_bounds[i] + pad_high[i] +
-                       (input_bounds[i] - 1) * pad_interior[i];
-  }
-
-  Array4D<float> result(output_bounds[0], output_bounds[1], output_bounds[2],
-                        output_bounds[3]);
-  result.Each([&](tensorflow::gtl::ArraySlice<int64> indices, float* value) {
-    for (int i = 0; i < 4; ++i) {
-      bool in_low_padding = indices[i] < pad_low[i];
-      bool in_high_padding = indices[i] >= output_bounds[i] - pad_high[i];
-      if (in_low_padding || in_high_padding) {
-        *value = pad;
-        return;
-      }
-      if (pad_interior[i] &&
-          (indices[i] - pad_low[i]) % (pad_interior[i] + 1)) {
-        *value = pad;
-        return;
-      }
-    }
-    *value = operand((indices[0] - pad_low[0]) / (pad_interior[0] + 1),
-                     (indices[1] - pad_low[1]) / (pad_interior[1] + 1),
-                     (indices[2] - pad_low[2]) / (pad_interior[2] + 1),
-                     (indices[3] - pad_low[3]) / (pad_interior[3] + 1));
-  });
-  return result;
-}
-
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h
index 2da1730781..62d455d71a 100644
--- a/tensorflow/compiler/xla/reference_util.h
+++ b/tensorflow/compiler/xla/reference_util.h
@@ -486,19 +486,147 @@ class ReferenceUtil {
   }
 
   // Returns the result of a 2D pad on an input matrix.
-  static std::unique_ptr<Array2D<float>> PadArray2D(
-      const Array2D<float>& operand, const PaddingConfig& padding,
-      const float pad);
+  template <typename NativeT>
+  static std::unique_ptr<Array2D<NativeT>> PadArray2D(
+      const Array2D<NativeT>& operand, const PaddingConfig& padding,
+      const NativeT pad) {
+    int64 in0 = operand.n1();
+    int64 high_padding0 = padding.dimensions(0).edge_padding_high();
+    int64 low_padding0 = padding.dimensions(0).edge_padding_low();
+    int64 interior_padding0 = padding.dimensions(0).interior_padding();
+    int64 out0 =
+        in0 + low_padding0 + high_padding0 + (in0 - 1) * interior_padding0;
+
+    int64 in1 = operand.n2();
+    int64 high_padding1 = padding.dimensions(1).edge_padding_high();
+    int64 low_padding1 = padding.dimensions(1).edge_padding_low();
+    int64 interior_padding1 = padding.dimensions(1).interior_padding();
+    int64 out1 =
+        in1 + low_padding1 + high_padding1 + (in1 - 1) * interior_padding1;
+
+    auto result = MakeUnique<Array2D<NativeT>>(out0, out1);
+    result->Fill(pad);
+    int64 o0 = low_padding0;
+    for (int64 i0 = 0; i0 < in0; ++i0) {
+      int64 o1 = low_padding1;
+      for (int64 i1 = 0; i1 < in1; ++i1) {
+        if (o0 >= 0 && o1 >= 0 && o0 < out0 && o1 < out1) {
+          (*result)(o0, o1) = operand(i0, i1);
+        }
+        o1 += interior_padding1 + 1;
+      }
+      o0 += interior_padding0 + 1;
+    }
+    return result;
+  }
 
   // Returns the result of a 3D pad on an input matrix.
-  static Array3D<float> PadArray3D(const Array3D<float>& operand,
-                                   const PaddingConfig& padding,
-                                   const float pad);
+  template <typename NativeT>
+  static Array3D<NativeT> PadArray3D(const Array3D<NativeT>& operand,
+                                     const PaddingConfig& padding,
+                                     const NativeT pad) {
+    CHECK_EQ(padding.dimensions_size(), 3);
+
+    const std::vector<int64> input_bounds = {operand.n1(), operand.n2(),
+                                             operand.n3()};
+    std::vector<int64> pad_low(3);
+    std::vector<int64> pad_high(3);
+    std::vector<int64> pad_interior(3);
+    std::vector<int64> output_bounds(3);
+    for (int64 i = 0; i < 3; ++i) {
+      pad_low[i] = padding.dimensions(i).edge_padding_low();
+      pad_high[i] = padding.dimensions(i).edge_padding_high();
+      CHECK_LE(0, pad_low[i]);
+      CHECK_LE(0, pad_high[i]);
+      CHECK_LE(0, padding.dimensions(i).interior_padding())
+          << "not implemented";
+      pad_interior[i] = padding.dimensions(i).interior_padding();
+
+      output_bounds[i] = pad_low[i] + input_bounds[i] + pad_high[i] +
+                         (input_bounds[i] - 1) * pad_interior[i];
+    }
+
+    Array3D<NativeT> result(output_bounds[0], output_bounds[1],
+                            output_bounds[2]);
+    std::vector<int> indices = {0, 0, 0};
+    for (indices[0] = 0; indices[0] < output_bounds[0]; ++indices[0]) {
+      for (indices[1] = 0; indices[1] < output_bounds[1]; ++indices[1]) {
+        for (indices[2] = 0; indices[2] < output_bounds[2]; ++indices[2]) {
+          NativeT* value = &result(indices[0], indices[1], indices[2]);
+          bool value_padded = false;
+          for (int i = 0; i < 3; ++i) {
+            bool in_low_padding = indices[i] < pad_low[i];
+            bool in_high_padding = indices[i] >= output_bounds[i] - pad_high[i];
+            if (in_low_padding || in_high_padding) {
+              *value = pad;
+              value_padded = true;
+            }
+            if (pad_interior[i] &&
+                (indices[i] - pad_low[i]) % (pad_interior[i] + 1)) {
+              *value = pad;
+              value_padded = true;
+            }
+          }
+          if (value_padded) {
+            continue;
+          }
+          *value = operand((indices[0] - pad_low[0]) / (pad_interior[0] + 1),
+                           (indices[1] - pad_low[1]) / (pad_interior[1] + 1),
+                           (indices[2] - pad_low[2]) / (pad_interior[2] + 1));
+        }
+      }
+    }
+    return result;
+  }
 
   // Returns the result of a 4D pad on an input array.
-  static Array4D<float> PadArray4D(const Array4D<float>& operand,
-                                   const PaddingConfig& padding,
-                                   const float pad);
+  template <typename NativeT>
+  static Array4D<NativeT> PadArray4D(const Array4D<NativeT>& operand,
+                                     const PaddingConfig& padding,
+                                     const NativeT pad) {
+    CHECK_EQ(padding.dimensions_size(), 4);
+
+    const std::vector<int64> input_bounds = {operand.n1(), operand.n2(),
+                                             operand.n3(), operand.n4()};
+    std::vector<int64> pad_low(4);
+    std::vector<int64> pad_high(4);
+    std::vector<int64> pad_interior(4);
+    std::vector<int64> output_bounds(4);
+    for (int64 i = 0; i < 4; ++i) {
+      pad_low[i] = padding.dimensions(i).edge_padding_low();
+      pad_high[i] = padding.dimensions(i).edge_padding_high();
+      CHECK_LE(0, padding.dimensions(i).interior_padding())
+          << "not implemented";
+      pad_interior[i] = padding.dimensions(i).interior_padding();
+
+      output_bounds[i] = pad_low[i] + input_bounds[i] + pad_high[i] +
+                         (input_bounds[i] - 1) * pad_interior[i];
+    }
+
+    Array4D<NativeT> result(output_bounds[0], output_bounds[1],
+                            output_bounds[2], output_bounds[3]);
+    result.Each(
+        [&](tensorflow::gtl::ArraySlice<int64> indices, NativeT* value) {
+          for (int i = 0; i < 4; ++i) {
+            bool in_low_padding = indices[i] < pad_low[i];
+            bool in_high_padding = indices[i] >= output_bounds[i] - pad_high[i];
+            if (in_low_padding || in_high_padding) {
+              *value = pad;
+              return;
+            }
+            if (pad_interior[i] &&
+                (indices[i] - pad_low[i]) % (pad_interior[i] + 1)) {
+              *value = pad;
+              return;
+            }
+          }
+          *value = operand((indices[0] - pad_low[0]) / (pad_interior[0] + 1),
+                           (indices[1] - pad_low[1]) / (pad_interior[1] + 1),
+                           (indices[2] - pad_low[2]) / (pad_interior[2] + 1),
+                           (indices[3] - pad_low[3]) / (pad_interior[3] + 1));
+        });
+    return result;
+  }
 
   // ApplyElementwise2D(f, x, y, ...) returns the Array2D formed by running
   // f(x[i], y[i], ...) for each array element in the Array2Ds x, y, ....
diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc
index ef54714e46..15bd273e9b 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.cc
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc
@@ -262,20 +262,34 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
                  expected.shape().element_type() == PRED)
         << ShapeUtil::HumanString(expected.shape());
   }
+  // We allow using a float expected literal for a bfloat16 output. In this
+  // case, we need to convert the expected literal to bfloat16.
+  const Literal* expected_ptr = &expected;
+  std::unique_ptr<Literal> converted_expected;
+  Shape layout_shape;
+  if (expected.shape().element_type() == F32 && use_bfloat16_) {
+    converted_expected = LiteralTestUtil::ConvertF32ToBF16(expected);
+    expected_ptr = converted_expected.get();
+    if (shape_with_layout != nullptr) {
+      layout_shape = *shape_with_layout;
+      layout_shape.set_element_type(BF16);
+      shape_with_layout = &layout_shape;
+    }
+  }
   auto expect_equal = [&](const Literal& actual, const string& error_message) {
-    LiteralTestUtil::ExpectEqual(expected, actual, error_message);
+    LiteralTestUtil::ExpectEqual(*expected_ptr, actual, error_message);
   };
   if (execution_options_.debug_options().xla_test_all_output_layouts()) {
     return ComputeAndCompareLiteralWithAllOutputLayouts(
-        computation, expected, arguments, expect_equal);
+        computation, *expected_ptr, arguments, expect_equal);
   }
   if (execution_options_.debug_options().xla_test_all_input_layouts()) {
     return ComputeAndCompareLiteralWithAllInputLayouts(
-        computation, expected, arguments, expect_equal, shape_with_layout);
+        computation, *expected_ptr, arguments, expect_equal, shape_with_layout);
   }
   TF_ASSIGN_OR_RETURN(auto actual, ExecuteAndTransfer(computation, arguments,
                                                       shape_with_layout));
-  LiteralTestUtil::ExpectEqual(expected, *actual);
+  LiteralTestUtil::ExpectEqual(*expected_ptr, *actual);
   return tensorflow::Status::OK();
 }
 
@@ -286,20 +300,35 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
   TF_RET_CHECK(ShapeUtil::ElementIsFloating(expected.shape()) ||
                ShapeUtil::ElementIsComplex(expected.shape()));
   TF_ASSIGN_OR_RETURN(auto computation, builder->Build());
+  // We allow using a float expected literal for a bfloat16 output. In this
+  // case, we need to convert the expected literal to bfloat16.
+  const Literal* expected_ptr = &expected;
+  std::unique_ptr<Literal> converted_expected;
+  Shape layout_shape;
+  if (expected.shape().element_type() == F32 && use_bfloat16_) {
+    converted_expected = LiteralTestUtil::ConvertF32ToBF16(expected);
+    expected_ptr = converted_expected.get();
+    layout_shape.set_element_type(BF16);
+    if (shape_with_layout != nullptr) {
+      layout_shape = *shape_with_layout;
+      layout_shape.set_element_type(BF16);
+      shape_with_layout = &layout_shape;
+    }
+  }
   auto expect_near = [&](const Literal& actual, const string& error_message) {
-    LiteralTestUtil::ExpectNear(expected, actual, error, error_message);
+    LiteralTestUtil::ExpectNear(*expected_ptr, actual, error, error_message);
   };
   if (execution_options_.debug_options().xla_test_all_output_layouts()) {
-    return ComputeAndCompareLiteralWithAllOutputLayouts(computation, expected,
-                                                        arguments, expect_near);
+    return ComputeAndCompareLiteralWithAllOutputLayouts(
+        computation, *expected_ptr, arguments, expect_near);
   }
   if (execution_options_.debug_options().xla_test_all_input_layouts()) {
     return ComputeAndCompareLiteralWithAllInputLayouts(
-        computation, expected, arguments, expect_near, shape_with_layout);
+        computation, *expected_ptr, arguments, expect_near, shape_with_layout);
   }
   TF_ASSIGN_OR_RETURN(auto actual, ExecuteAndTransfer(computation, arguments,
                                                       shape_with_layout));
-  LiteralTestUtil::ExpectNear(expected, *actual, error);
+  LiteralTestUtil::ExpectNear(*expected_ptr, *actual, error);
   return tensorflow::Status::OK();
 }
 
@@ -402,8 +431,11 @@ ClientLibraryTestBase::ComputeValueAndReference(
 
 Computation ClientLibraryTestBase::CreateScalarRelu() {
   ComputationBuilder builder(client_, "relu");
-  auto z_value = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "z_value");
-  auto zero = builder.ConstantR0<float>(0.0);
+  auto shape = ShapeUtil::MakeShape(use_bfloat16_ ? BF16 : F32, {});
+  auto z_value = builder.Parameter(0, shape, "z_value");
+  auto zero = use_bfloat16_
+                  ? builder.ConstantR0<bfloat16>(static_cast<bfloat16>(0.0f))
+                  : builder.ConstantR0<float>(0.0f);
   builder.Max(z_value, zero);
   auto computation_status = builder.Build();
   TF_CHECK_OK(computation_status.status());
@@ -412,8 +444,9 @@ Computation ClientLibraryTestBase::CreateScalarRelu() {
 
 Computation ClientLibraryTestBase::CreateScalarMax() {
   ComputationBuilder builder(client_, "max");
-  auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x");
-  auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y");
+  auto shape = ShapeUtil::MakeShape(use_bfloat16_ ? BF16 : F32, {});
+  auto x = builder.Parameter(0, shape, "x");
+  auto y = builder.Parameter(1, shape, "y");
   builder.Max(x, y);
   auto computation_status = builder.Build();
   TF_CHECK_OK(computation_status.status());
@@ -422,11 +455,12 @@ Computation ClientLibraryTestBase::CreateScalarMax() {
 
 Computation ClientLibraryTestBase::CreateScalarReluSensitivity() {
   ComputationBuilder builder(client_, "relu_sensitivity");
-  auto activation =
-      builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "activation");
-  auto backprop =
-      builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "backprop");
-  auto zero = builder.ConstantR0<float>(0.0);
+  auto shape = ShapeUtil::MakeShape(use_bfloat16_ ? BF16 : F32, {});
+  auto activation = builder.Parameter(0, shape, "activation");
+  auto backprop = builder.Parameter(1, shape, "backprop");
+  auto zero = use_bfloat16_
+                  ? builder.ConstantR0<bfloat16>(static_cast<bfloat16>(0.0f))
+                  : builder.ConstantR0<float>(0.0f);
   auto activation_gtz = builder.Gt(activation, zero);
   builder.Select(activation_gtz, /*on_true=*/backprop, /*on_false=*/zero);
 
@@ -461,4 +495,21 @@ ClientLibraryTestBase::CreatePatternedMatrixWithZeroPadding(int rows, int cols,
   return array;
 }
 
+std::unique_ptr<GlobalData>
+ClientLibraryTestBase::CreateParameterAndTransferLiteral(
+    int64 parameter_number, const Literal& literal, const string& name,
+    ComputationBuilder* builder, ComputationDataHandle* data_handle) {
+  const Literal* param_literal = &literal;
+  std::unique_ptr<Literal> converted_literal;
+  if (use_bfloat16_ && literal.shape().element_type() == F32) {
+    converted_literal = LiteralTestUtil::ConvertF32ToBF16(literal);
+    param_literal = converted_literal.get();
+  }
+  std::unique_ptr<GlobalData> data =
+      client_->TransferToServer(*param_literal).ConsumeValueOrDie();
+  *data_handle =
+      builder->Parameter(parameter_number, param_literal->shape(), name);
+  return data;
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h
index af22c12684..e8599a5cd3 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.h
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.h
@@ -245,51 +245,76 @@ class ClientLibraryTestBase : public ::testing::Test {
       const int rows, const int cols, const int rows_padded,
       const int cols_padded);
 
-  // Create a parameter instruction that wraps a given value and then stores
+  // Creates a parameter instruction, transfers the literal for the parameter to
+  // server, then stores into "data_handle" the global handle for that
+  // parameter. When the use_bfloat16 flag is set but the literal has F32
+  // elements, the literal will be converted to BF16 before being transferred.
+  std::unique_ptr<GlobalData> CreateParameterAndTransferLiteral(
+      int64 parameter_number, const Literal& literal, const string& name,
+      ComputationBuilder* builder, ComputationDataHandle* data_handle);
+
+  // Creates a parameter instruction that wraps a given value and then stores
   // into "data_handle" the global handle for that parameter.
   //
   // "parameter_number" is the parameter number.
   // "name" is the name of the parameter instruction.
+  //
+  // When the use_bfloat16 flag is set but NativeT is float, the data will be
+  // converted to bfloat16.
   template <typename NativeT>
   std::unique_ptr<GlobalData> CreateR0Parameter(
       NativeT value, int64 parameter_number, const string& name,
       ComputationBuilder* builder, ComputationDataHandle* data_handle);
 
-  // Create a parameter instruction that wraps the given values and then stores
+  // Creates a parameter instruction that wraps the given values and then stores
   // into "data_handle" the global handle for that parameter.
   //
   // "parameter_number" is the parameter number.
   // "name" is the name of the parameter instruction.
+  //
+  // When the use_bfloat16 flag is set but NativeT is float, the data will be
+  // converted to bfloat16.
   template <typename NativeT>
   std::unique_ptr<GlobalData> CreateR1Parameter(
       tensorflow::gtl::ArraySlice<NativeT> values, int64 parameter_number,
       const string& name, ComputationBuilder* builder,
       ComputationDataHandle* data_handle);
 
-  // Create a parameter instruction that wraps the given constant array
+  // Creates a parameter instruction that wraps the given constant array
   // "array_2d" and then stores to "data_handle" the global handle for that
   // parameter.
   //
   // "parameter_number" is the parameter number.
   // "name" is the name of the parameter instruction.
+  //
+  // When the use_bfloat16 flag is set but NativeT is float, the data will be
+  // converted to bfloat16.
   template <typename NativeT>
   std::unique_ptr<GlobalData> CreateR2Parameter(
       const Array2D<NativeT>& array_2d, int64 parameter_number,
       const string& name, ComputationBuilder* builder,
       ComputationDataHandle* data_handle);
 
-  // Create a parameter instruction that wraps the given constant array
+  // Creates a parameter instruction that wraps the given constant array
   // "array_3d" and then stores to "data_handle" the global handle for that
   // parameter.
   //
   // "parameter_number" is the parameter number.
   // "name" is the name of the parameter instruction.
+  //
+  // When the use_bfloat16 flag is set but NativeT is float, the data will be
+  // converted to bfloat16.
   template <typename NativeT>
   std::unique_ptr<GlobalData> CreateR3Parameter(
       const Array3D<NativeT>& array_3d, int64 parameter_number,
       const string& name, ComputationBuilder* builder,
       ComputationDataHandle* data_handle);
 
+  // Getter and setter for the use_bfloat16 flag, which indicates whether to run
+  // tests with all float-type input/output converted to bfloat16.
+  bool use_bfloat16() const { return use_bfloat16_; }
+  void set_use_bfloat16(bool value) { use_bfloat16_ = value; }
+
   Client* client_;
   ExecutionOptions execution_options_;
 
@@ -315,6 +340,10 @@ class ClientLibraryTestBase : public ::testing::Test {
   ComputeValueAndReference(ComputationBuilder* builder,
                            const ComputationDataHandle& operand,
                            tensorflow::gtl::ArraySlice<Literal> arguments);
+
+  // Whether to run tests with all float-type input/output converted to
+  // bfloat16.
+  bool use_bfloat16_ = false;
 };
 
 template <typename NativeT>
@@ -443,6 +472,9 @@ std::unique_ptr<GlobalData> ClientLibraryTestBase::CreateR0Parameter(
     NativeT value, int64 parameter_number, const string& name,
     ComputationBuilder* builder, ComputationDataHandle* data_handle) {
   std::unique_ptr<Literal> literal = Literal::CreateR0(value);
+  if (use_bfloat16_ && literal->shape().element_type() == F32) {
+    literal = LiteralTestUtil::ConvertF32ToBF16(*literal);
+  }
   std::unique_ptr<GlobalData> data =
       client_->TransferToServer(*literal).ConsumeValueOrDie();
   *data_handle = builder->Parameter(parameter_number, literal->shape(), name);
@@ -455,6 +487,9 @@ std::unique_ptr<GlobalData> ClientLibraryTestBase::CreateR1Parameter(
     const string& name, ComputationBuilder* builder,
     ComputationDataHandle* data_handle) {
   std::unique_ptr<Literal> literal = Literal::CreateR1(values);
+  if (use_bfloat16_ && literal->shape().element_type() == F32) {
+    literal = LiteralTestUtil::ConvertF32ToBF16(*literal);
+  }
   std::unique_ptr<GlobalData> data =
       client_->TransferToServer(*literal).ConsumeValueOrDie();
   *data_handle = builder->Parameter(parameter_number, literal->shape(), name);
@@ -467,6 +502,9 @@ std::unique_ptr<GlobalData> ClientLibraryTestBase::CreateR2Parameter(
     const string& name, ComputationBuilder* builder,
     ComputationDataHandle* data_handle) {
   std::unique_ptr<Literal> literal = Literal::CreateR2FromArray2D(array_2d);
+  if (use_bfloat16_ && literal->shape().element_type() == F32) {
+    literal = LiteralTestUtil::ConvertF32ToBF16(*literal);
+  }
   std::unique_ptr<GlobalData> data =
       client_->TransferToServer(*literal).ConsumeValueOrDie();
   *data_handle = builder->Parameter(parameter_number, literal->shape(), name);
@@ -479,6 +517,9 @@ std::unique_ptr<GlobalData> ClientLibraryTestBase::CreateR3Parameter(
     const string& name, ComputationBuilder* builder,
     ComputationDataHandle* data_handle) {
   std::unique_ptr<Literal> literal = Literal::CreateR3FromArray3D(array_3d);
+  if (use_bfloat16_ && literal->shape().element_type() == F32) {
+    literal = LiteralTestUtil::ConvertF32ToBF16(*literal);
+  }
   std::unique_ptr<GlobalData> data =
       client_->TransferToServer(*literal).ConsumeValueOrDie();
   *data_handle = builder->Parameter(parameter_number, literal->shape(), name);
diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc
index 9ae5c7b6f0..6aa27e5470 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util.cc
+++ b/tensorflow/compiler/xla/tests/literal_test_util.cc
@@ -100,6 +100,38 @@ namespace xla {
   ASSERT_EQ(expected.ShortDebugString(), actual.ShortDebugString());
 }
 
+/* static */ std::unique_ptr<Literal> LiteralTestUtil::ConvertBF16ToF32(
+    const Literal& bf16_literal) {
+  CHECK_EQ(bf16_literal.shape().element_type(), BF16);
+  Shape converted_shape = bf16_literal.shape();
+  converted_shape.set_element_type(F32);
+  auto converted = Literal::CreateFromShape(converted_shape);
+  if (!ShapeUtil::HasZeroElements(converted_shape)) {
+    std::vector<int64> index(converted_shape.dimensions_size(), 0);
+    do {
+      converted->Set<float>(
+          index, static_cast<float>(bf16_literal.Get<bfloat16>(index)));
+    } while (IndexUtil::BumpIndices(converted_shape, &index));
+  }
+  return converted;
+}
+
+/* static */ std::unique_ptr<Literal> LiteralTestUtil::ConvertF32ToBF16(
+    const Literal& f32_literal) {
+  CHECK_EQ(f32_literal.shape().element_type(), F32);
+  Shape converted_shape = f32_literal.shape();
+  converted_shape.set_element_type(BF16);
+  auto converted = Literal::CreateFromShape(converted_shape);
+  if (!ShapeUtil::HasZeroElements(converted_shape)) {
+    std::vector<int64> index(converted_shape.dimensions_size(), 0);
+    do {
+      converted->Set<bfloat16>(
+          index, static_cast<bfloat16>(f32_literal.Get<float>(index)));
+    } while (IndexUtil::BumpIndices(converted_shape, &index));
+  }
+  return converted;
+}
+
 namespace {
 
 string Hostname() {
diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h
index 467d44b857..6e4add2690 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util.h
+++ b/tensorflow/compiler/xla/tests/literal_test_util.h
@@ -59,6 +59,12 @@ class LiteralTestUtil {
   static void AssertEqualShapesAndLayouts(const Shape& expected,
                                           const Shape& actual);
 
+  // Converts a bfloat16 literal to a float literal.
+  static std::unique_ptr<Literal> ConvertBF16ToF32(const Literal& bf16_literal);
+
+  // Converts a float literal to a bfloat16 literal.
+  static std::unique_ptr<Literal> ConvertF32ToBF16(const Literal& f32_literal);
+
   // Asserts that the expected and actual literals are (bitwise) equal for all
   // elements in the literal. Also, asserts that the rank, dimensions sizes, and
   // primitive type are equal.
-- 
GitLab


From 8781d69b2e619e64555cb00b13783a7eee524b81 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 27 Nov 2017 21:48:38 -0800
Subject: [PATCH 0334/1225] Allow BF16 to use error spec.

PiperOrigin-RevId: 177114689
---
 tensorflow/compiler/xla/tests/client_library_test_base.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h
index e8599a5cd3..1d27880fb1 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.h
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.h
@@ -387,6 +387,7 @@ void ClientLibraryTestBase::ComputeAndCompareR1(
     tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) {
   static_assert(std::is_same<NativeT, float>::value ||
                     std::is_same<NativeT, double>::value ||
+                    std::is_same<NativeT, bfloat16>::value ||
                     std::is_same<NativeT, complex64>::value,
                 "Float or complex type required when specifying an ErrorSpec");
   std::unique_ptr<Literal> expected_literal =
@@ -411,6 +412,7 @@ void ClientLibraryTestBase::ComputeAndCompareR2(
     tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) {
   static_assert(std::is_same<NativeT, float>::value ||
                     std::is_same<NativeT, double>::value ||
+                    std::is_same<NativeT, bfloat16>::value ||
                     std::is_same<NativeT, complex64>::value,
                 "Float or complex type required when specifying an ErrorSpec");
   std::unique_ptr<Literal> expected_literal =
@@ -435,6 +437,7 @@ void ClientLibraryTestBase::ComputeAndCompareR3(
     tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) {
   static_assert(std::is_same<NativeT, float>::value ||
                     std::is_same<NativeT, double>::value ||
+                    std::is_same<NativeT, bfloat16>::value ||
                     std::is_same<NativeT, complex64>::value,
                 "Float or complex type required when specifying an ErrorSpec");
   std::unique_ptr<Literal> expected_literal =
@@ -459,6 +462,7 @@ void ClientLibraryTestBase::ComputeAndCompareR4(
     tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) {
   static_assert(std::is_same<NativeT, float>::value ||
                     std::is_same<NativeT, double>::value ||
+                    std::is_same<NativeT, bfloat16>::value ||
                     std::is_same<NativeT, complex64>::value,
                 "Float or complex type required when specifying an ErrorSpec");
   std::unique_ptr<Literal> expected_literal =
-- 
GitLab


From 102bfdfd830f4dab6e00371e63a82561e1246518 Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Mon, 27 Nov 2017 22:31:25 -0800
Subject: [PATCH 0335/1225] [XLA] Separate input and output spatial dimensions
 for convolution

This lets us reason about input spatial dimensions as distinct from output spatial dimensions. By doing this, it opens up more opportunities for assigning more interesting, different, layouts for the activations and the output.

PiperOrigin-RevId: 177117140
---
 .../compiler/tf2xla/kernels/conv_ops.cc       | 19 ++---
 .../xla/client/computation_builder.cc         | 45 +++++++-----
 .../compiler/xla/client/computation_builder.h |  5 +-
 tensorflow/compiler/xla/reference_util.cc     | 11 +--
 .../compiler/xla/reference_util_test.cc       | 16 +++--
 .../xla/service/algebraic_simplifier_test.cc  | 13 ++--
 .../xla/service/cpu/conv_canonicalization.cc  | 23 ++++---
 .../service/cpu/conv_canonicalization_test.cc | 12 ++--
 .../xla/service/cpu/ir_emission_utils.cc      |  7 +-
 .../compiler/xla/service/cpu/ir_emitter.cc    | 69 ++++++++++---------
 .../xla/service/gpu/convolution_folding.cc    | 34 +++++----
 .../service/gpu/convolution_folding_test.cc   | 18 +++--
 .../xla/service/gpu/convolution_thunk.cc      |  9 +--
 .../service/gpu/instruction_fusion_test.cc    |  6 +-
 .../xla/service/gpu/ir_emission_utils.cc      |  2 +-
 .../xla/service/gpu/layout_assignment.cc      | 12 ++--
 .../compiler/xla/service/gpu/pad_insertion.cc | 19 +++--
 .../compiler/xla/service/hlo_evaluator.cc     | 16 +++--
 .../xla/service/hlo_evaluator_test.cc         | 15 ++--
 .../compiler/xla/service/hlo_instruction.cc   | 14 ++--
 .../compiler/xla/service/shape_inference.cc   | 29 ++++++--
 .../xla/service/shape_inference_test.cc       | 24 ++++---
 .../compiler/xla/service/transpose_folding.cc | 18 +----
 .../xla/service/transpose_folding_test.cc     | 16 +++--
 .../convolution_dimension_numbers_test.cc     | 18 +++--
 .../compiler/xla/tests/convolution_test.cc    | 18 +++--
 .../xla/tests/convolution_variants_test.cc    | 65 ++++++++++-------
 .../compiler/xla/tools/parser/hlo_parser.cc   | 13 ++--
 .../xla/tools/parser/hlo_parser_test.cc       |  6 --
 tensorflow/compiler/xla/xla_data.proto        | 22 +++---
 30 files changed, 353 insertions(+), 241 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
index 885f716afa..c5017704e2 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
@@ -184,10 +184,11 @@ class ConvOp : public XlaOpKernel {
     dims.set_input_feature_dimension(feature_dim);
     dims.set_output_feature_dimension(feature_dim);
     for (int i = 0; i < num_spatial_dims_; ++i) {
-      int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
-      dims.add_spatial_dimensions(input_dim);
+      int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      dims.add_input_spatial_dimensions(dim);
       dims.add_kernel_spatial_dimensions(i);
-      window_strides.push_back(strides_.at(input_dim));
+      dims.add_output_spatial_dimensions(dim);
+      window_strides.push_back(strides_.at(dim));
     }
     dims.set_kernel_input_feature_dimension(num_spatial_dims_);
     dims.set_kernel_output_feature_dimension(num_spatial_dims_ + 1);
@@ -302,9 +303,10 @@ class ConvBackpropInputOp : public XlaOpKernel {
     std::vector<int64> lhs_dilation(num_spatial_dims_);
     std::vector<int64> ones(num_spatial_dims_, 1);
     for (int i = 0; i < num_spatial_dims_; ++i) {
-      dnums.add_spatial_dimensions(
-          GetTensorSpatialDimIndex(num_dims(), data_format_, i));
+      int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      dnums.add_input_spatial_dimensions(dim);
       dnums.add_kernel_spatial_dimensions(i);
+      dnums.add_output_spatial_dimensions(dim);
 
       kernel_spatial_dims[i] = i;
       padding[i] = {dims.spatial_dims[i].pad_before,
@@ -439,9 +441,10 @@ class ConvBackpropFilterOp : public XlaOpKernel {
     std::vector<int64> ones(num_spatial_dims_, 1);
 
     for (int i = 0; i < num_spatial_dims_; ++i) {
-      int dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
-      dnums.add_spatial_dimensions(dim);
+      int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      dnums.add_input_spatial_dimensions(dim);
       dnums.add_kernel_spatial_dimensions(dim);
+      dnums.add_output_spatial_dimensions(dim);
 
       // We will also need to pad the input with zeros such that after the
       // convolution, we get the right size for the filter.
@@ -506,7 +509,7 @@ class ConvBackpropFilterOp : public XlaOpKernel {
     std::vector<int64> transpose_dims;
     transpose_dims.reserve(num_dims());
     for (int i = 0; i < num_spatial_dims_; ++i) {
-      transpose_dims.push_back(dnums.spatial_dimensions(i));
+      transpose_dims.push_back(dnums.output_spatial_dimensions(i));
     }
     transpose_dims.push_back(c_dim);
     transpose_dims.push_back(n_dim);
diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc
index b17d221ef5..cce9310003 100644
--- a/tensorflow/compiler/xla/client/computation_builder.cc
+++ b/tensorflow/compiler/xla/client/computation_builder.cc
@@ -694,11 +694,15 @@ bool ComputationBuilder::VerifyConvolution(
         }
         return true;
       };
-  return check_spatial_dimensions("spatial_dimensions",
-                                  dimension_numbers.spatial_dimensions()) &&
+  return check_spatial_dimensions(
+             "input_spatial_dimensions",
+             dimension_numbers.input_spatial_dimensions()) &&
          check_spatial_dimensions(
              "kernel_spatial_dimensions",
-             dimension_numbers.kernel_spatial_dimensions());
+             dimension_numbers.kernel_spatial_dimensions()) &&
+         check_spatial_dimensions(
+             "output_spatial_dimensions",
+             dimension_numbers.output_spatial_dimensions());
 }
 
 ComputationDataHandle ComputationBuilder::ConvWithGeneralDimensions(
@@ -730,11 +734,11 @@ ComputationDataHandle ComputationBuilder::ConvWithGeneralDimensions(
   }
 
   std::vector<int64> base_area_dimensions(
-      dimension_numbers.spatial_dimensions_size());
+      dimension_numbers.input_spatial_dimensions_size());
   for (std::vector<int64>::size_type i = 0; i < base_area_dimensions.size();
        ++i) {
     base_area_dimensions[i] =
-        lhs_shape->dimensions(dimension_numbers.spatial_dimensions(i));
+        lhs_shape->dimensions(dimension_numbers.input_spatial_dimensions(i));
   }
 
   std::vector<int64> window_dimensions(
@@ -1845,25 +1849,27 @@ ComputationBuilder::CreateDefaultConvDimensionNumbers(int num_spatial_dims) {
   dimension_numbers.set_kernel_input_feature_dimension(
       kConvKernelInputDimension);
   for (int i = 0; i < num_spatial_dims; ++i) {
-    dimension_numbers.add_spatial_dimensions(i + 2);
+    dimension_numbers.add_input_spatial_dimensions(i + 2);
     dimension_numbers.add_kernel_spatial_dimensions(i + 2);
+    dimension_numbers.add_output_spatial_dimensions(i + 2);
   }
   return dimension_numbers;
 }
 
 /* static */ StatusOr<ConvolutionDimensionNumbers>
 ComputationBuilder::CreateConvDimensionNumbers(
-    int64 input_batch, int64 input_feature, int64 output_batch,
-    int64 output_feature, int64 first_spatial, int64 second_spatial,
+    int64 input_batch, int64 input_feature, int64 input_first_spatial,
+    int64 input_second_spatial, int64 output_batch, int64 output_feature,
+    int64 output_first_spatial, int64 output_second_spatial,
     int64 kernel_output_feature, int64 kernel_input_feature,
     int64 kernel_first_spatial, int64 kernel_second_spatial) {
-  if (std::set<int64>(
-          {input_batch, input_feature, first_spatial, second_spatial})
+  if (std::set<int64>({input_batch, input_feature, input_first_spatial,
+                       input_second_spatial})
           .size() != 4) {
     return FailedPrecondition(
         "dimension numbers for the input are not unique: (%lld, %lld, %lld, "
         "%lld)",
-        input_batch, input_feature, first_spatial, second_spatial);
+        input_batch, input_feature, input_first_spatial, input_second_spatial);
   }
   if (std::set<int64>({kernel_output_feature, kernel_input_feature,
                        kernel_first_spatial, kernel_second_spatial})
@@ -1874,25 +1880,28 @@ ComputationBuilder::CreateConvDimensionNumbers(
         kernel_output_feature, kernel_input_feature, kernel_first_spatial,
         kernel_second_spatial);
   }
-  if (std::set<int64>(
-          {output_batch, output_feature, first_spatial, second_spatial})
+  if (std::set<int64>({output_batch, output_feature, output_first_spatial,
+                       output_second_spatial})
           .size() != 4) {
     return FailedPrecondition(
         "dimension numbers for the output are not unique: (%lld, %lld, %lld, "
         "%lld)",
-        output_batch, output_feature, first_spatial, second_spatial);
+        output_batch, output_feature, output_first_spatial,
+        output_second_spatial);
   }
   ConvolutionDimensionNumbers dimension_numbers;
   dimension_numbers.set_input_batch_dimension(input_batch);
   dimension_numbers.set_input_feature_dimension(input_feature);
-  dimension_numbers.set_output_batch_dimension(output_batch);
-  dimension_numbers.set_output_feature_dimension(output_feature);
-  dimension_numbers.add_spatial_dimensions(first_spatial);
-  dimension_numbers.add_spatial_dimensions(second_spatial);
+  dimension_numbers.add_input_spatial_dimensions(input_first_spatial);
+  dimension_numbers.add_input_spatial_dimensions(input_second_spatial);
   dimension_numbers.set_kernel_output_feature_dimension(kernel_output_feature);
   dimension_numbers.set_kernel_input_feature_dimension(kernel_input_feature);
   dimension_numbers.add_kernel_spatial_dimensions(kernel_first_spatial);
   dimension_numbers.add_kernel_spatial_dimensions(kernel_second_spatial);
+  dimension_numbers.set_output_batch_dimension(output_batch);
+  dimension_numbers.set_output_feature_dimension(output_feature);
+  dimension_numbers.add_output_spatial_dimensions(output_first_spatial);
+  dimension_numbers.add_output_spatial_dimensions(output_second_spatial);
   return dimension_numbers;
 }
 
diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h
index 3a34010e6a..d2dbbbbebb 100644
--- a/tensorflow/compiler/xla/client/computation_builder.h
+++ b/tensorflow/compiler/xla/client/computation_builder.h
@@ -413,8 +413,9 @@ class ComputationBuilder {
   // Creates a ConvolutionDimensionNumbers with the given arguments. Returns an
   // error if either the input or the weight dimension numbers have conflicts.
   static StatusOr<ConvolutionDimensionNumbers> CreateConvDimensionNumbers(
-      int64 input_batch, int64 input_feature, int64 output_batch,
-      int64 output_feature, int64 first_spatial, int64 second_spatial,
+      int64 input_batch, int64 input_feature, int64 input_first_spatial,
+      int64 input_second_spatial, int64 output_batch, int64 output_feature,
+      int64 output_first_spatial, int64 output_second_spatial,
       int64 kernel_output_feature, int64 kernel_input_feature,
       int64 kernel_first_spatial, int64 kernel_second_spatial);
 
diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc
index 5a899d550b..5bb81b80dd 100644
--- a/tensorflow/compiler/xla/reference_util.cc
+++ b/tensorflow/compiler/xla/reference_util.cc
@@ -102,7 +102,9 @@ ReferenceUtil::ConvArray3DGeneralDimensionsDilated(
     const Array3D<float>& lhs, const Array3D<float>& rhs, int64 kernel_stride,
     Padding padding, int64 lhs_dilation, int64 rhs_dilation,
     const ConvolutionDimensionNumbers& dnums) {
-  CHECK_EQ(dnums.spatial_dimensions_size(), 1);
+  CHECK_EQ(dnums.input_spatial_dimensions_size(), 1);
+  CHECK_EQ(dnums.kernel_spatial_dimensions_size(), 1);
+  CHECK_EQ(dnums.output_spatial_dimensions_size(), 1);
   // Reuse the code for Array4D-convolution by extending the 3D input into a 4D
   // array by adding a fourth dummy dimension of size 1 without stride, padding
   // and dilation.
@@ -120,8 +122,9 @@ ReferenceUtil::ConvArray3DGeneralDimensionsDilated(
       });
   // Add a second dummy spatial dimensions.
   ConvolutionDimensionNumbers dnums2d = dnums;
-  dnums2d.add_spatial_dimensions(3);
+  dnums2d.add_input_spatial_dimensions(3);
   dnums2d.add_kernel_spatial_dimensions(3);
+  dnums2d.add_output_spatial_dimensions(3);
   std::unique_ptr<Array4D<float>> convr4 = ConvArray4DGeneralDimensionsDilated(
       a4dlhs, a4drhs, {kernel_stride, 1}, padding, {lhs_dilation, 1},
       {rhs_dilation, 1}, dnums2d);
@@ -465,9 +468,9 @@ ReferenceUtil::ConvArray4DGeneralDimensionsDilated(
   }
 
   ordered_input_dimensions[0] =
-      lhs_literal->shape().dimensions(dnums.spatial_dimensions(0));
+      lhs_literal->shape().dimensions(dnums.input_spatial_dimensions(0));
   ordered_input_dimensions[1] =
-      lhs_literal->shape().dimensions(dnums.spatial_dimensions(1));
+      lhs_literal->shape().dimensions(dnums.input_spatial_dimensions(1));
   ordered_kernel_dimensions[0] =
       rhs_literal->shape().dimensions(dnums.kernel_spatial_dimensions(0));
   ordered_kernel_dimensions[1] =
diff --git a/tensorflow/compiler/xla/reference_util_test.cc b/tensorflow/compiler/xla/reference_util_test.cc
index eb6a71242f..846ccdc83d 100644
--- a/tensorflow/compiler/xla/reference_util_test.cc
+++ b/tensorflow/compiler/xla/reference_util_test.cc
@@ -60,7 +60,9 @@ TEST_F(ReferenceUtilTest, TransposeArray2D) {
 
 TEST_F(ReferenceUtilTest, MatmulArray2D) {
   Array2D<float> rhs({
-      {7.f, 8.f}, {9.f, 10.f}, {11.f, 12.f},
+      {7.f, 8.f},
+      {9.f, 10.f},
+      {11.f, 12.f},
   });
   auto result = ReferenceUtil::MatmulArray2D(*matrix_, rhs);
   auto actual_literal = Literal::CreateR2FromArray2D(*result);
@@ -326,8 +328,10 @@ TEST_F(ReferenceUtilTest, ConvGeneralDimensionsWithSamePadding) {
   dimension_numbers.set_input_feature_dimension(0);
   dimension_numbers.set_output_batch_dimension(2);
   dimension_numbers.set_output_feature_dimension(0);
-  dimension_numbers.add_spatial_dimensions(1);
-  dimension_numbers.add_spatial_dimensions(3);
+  dimension_numbers.add_input_spatial_dimensions(1);
+  dimension_numbers.add_output_spatial_dimensions(1);
+  dimension_numbers.add_input_spatial_dimensions(3);
+  dimension_numbers.add_output_spatial_dimensions(3);
   dimension_numbers.set_kernel_output_feature_dimension(0);
   dimension_numbers.set_kernel_input_feature_dimension(2);
   dimension_numbers.add_kernel_spatial_dimensions(1);
@@ -380,8 +384,10 @@ TEST_F(ReferenceUtilTest, ConvGeneralDimensionsWithValidPadding) {
   dimension_numbers.set_input_feature_dimension(0);
   dimension_numbers.set_output_batch_dimension(2);
   dimension_numbers.set_output_feature_dimension(0);
-  dimension_numbers.add_spatial_dimensions(1);
-  dimension_numbers.add_spatial_dimensions(3);
+  dimension_numbers.add_input_spatial_dimensions(1);
+  dimension_numbers.add_output_spatial_dimensions(1);
+  dimension_numbers.add_input_spatial_dimensions(3);
+  dimension_numbers.add_output_spatial_dimensions(3);
 
   dimension_numbers.set_kernel_output_feature_dimension(0);
   dimension_numbers.set_kernel_input_feature_dimension(2);
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 097f30be32..56dfb1cf0b 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -1624,8 +1624,11 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) {
     ConvolutionDimensionNumbers dnums;
     std::vector<int64> in_dims;
     int in_channel_idx = -1;
-    dnums.add_spatial_dimensions(-1);  // filled in later
-    dnums.add_spatial_dimensions(-1);  // filled in later
+    // filled in later
+    dnums.add_input_spatial_dimensions(-1);
+    dnums.add_output_spatial_dimensions(-1);
+    dnums.add_input_spatial_dimensions(-1);
+    dnums.add_output_spatial_dimensions(-1);
     for (int i = 0; i < strlen(options.dim_order); ++i) {
       char ch = options.dim_order[i];
       if (ch == 'N') {
@@ -1633,10 +1636,12 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) {
         dnums.set_output_batch_dimension(i);
         in_dims.push_back(options.in_batch);
       } else if (ch == 'H') {
-        dnums.set_spatial_dimensions(0, i);
+        dnums.set_input_spatial_dimensions(0, i);
+        dnums.set_output_spatial_dimensions(0, i);
         in_dims.push_back(options.in_height);
       } else if (ch == 'W') {
-        dnums.set_spatial_dimensions(1, i);
+        dnums.set_input_spatial_dimensions(1, i);
+        dnums.set_output_spatial_dimensions(1, i);
         in_dims.push_back(options.in_width);
       } else if (ch == 'C') {
         dnums.set_input_feature_dimension(i);
diff --git a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
index a3dd13811c..2136aeb387 100644
--- a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
+++ b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
@@ -41,8 +41,8 @@ StatusOr<bool> ConvCanonicalization::Run(HloModule* module) {
       auto kernel_input_feature_dim = dnums.kernel_input_feature_dimension();
       auto kernel_output_feature_dim = dnums.kernel_output_feature_dimension();
 
-      int num_spatial_dims = dnums.spatial_dimensions_size();
-      int num_dims = num_spatial_dims + 2;
+      const int64 num_spatial_dims = dnums.output_spatial_dimensions_size();
+      const int64 num_dims = num_spatial_dims + 2;
 
       // A canonical convolution's dimension numbers need to satisfy the
       // following conditions (see cs/PotentiallyImplementedAsEigenConvolution).
@@ -59,10 +59,10 @@ StatusOr<bool> ConvCanonicalization::Run(HloModule* module) {
       std::vector<int64> new_input_dims(num_dims);
       new_input_dim_order[0] = input_batch_dim;
       new_input_dims[0] = input->shape().dimensions(input_batch_dim);
-      for (int i = 0; i < num_spatial_dims; ++i) {
-        new_input_dim_order[i + 1] = dnums.spatial_dimensions(i);
+      for (int64 i = 0; i < num_spatial_dims; ++i) {
+        new_input_dim_order[i + 1] = dnums.input_spatial_dimensions(i);
         new_input_dims[i + 1] =
-            input->shape().dimensions(dnums.spatial_dimensions(i));
+            input->shape().dimensions(dnums.input_spatial_dimensions(i));
       }
       new_input_dim_order[num_dims - 1] = input_feature_dim;
       new_input_dims[num_dims - 1] =
@@ -78,7 +78,7 @@ StatusOr<bool> ConvCanonicalization::Run(HloModule* module) {
 
       std::vector<int64> new_kernel_dim_order(num_dims);
       std::vector<int64> new_kernel_dims(num_dims);
-      for (int i = 0; i < num_spatial_dims; ++i) {
+      for (int64 i = 0; i < num_spatial_dims; ++i) {
         new_kernel_dim_order[i] = dnums.kernel_spatial_dimensions(i);
         new_kernel_dims[i] =
             kernel->shape().dimensions(dnums.kernel_spatial_dimensions(i));
@@ -102,10 +102,10 @@ StatusOr<bool> ConvCanonicalization::Run(HloModule* module) {
       auto output_feature_dim = dnums.output_feature_dimension();
       new_output_dim_order[0] = output_batch_dim;
       new_conv_dims[0] = hlo->shape().dimensions(output_batch_dim);
-      for (int i = 0; i < num_spatial_dims; ++i) {
-        new_output_dim_order[i + 1] = dnums.spatial_dimensions(i);
+      for (int64 i = 0; i < num_spatial_dims; ++i) {
+        new_output_dim_order[i + 1] = dnums.output_spatial_dimensions(i);
         new_conv_dims[i + 1] =
-            hlo->shape().dimensions(dnums.spatial_dimensions(i));
+            hlo->shape().dimensions(dnums.output_spatial_dimensions(i));
       }
       new_output_dim_order[num_dims - 1] = output_feature_dim;
       new_conv_dims[num_dims - 1] = hlo->shape().dimensions(output_feature_dim);
@@ -115,9 +115,10 @@ StatusOr<bool> ConvCanonicalization::Run(HloModule* module) {
       ConvolutionDimensionNumbers new_dnums;
       new_dnums.set_input_batch_dimension(0);
       new_dnums.set_output_batch_dimension(0);
-      for (int i = 0; i < num_spatial_dims; ++i) {
-        new_dnums.add_spatial_dimensions(i + 1);
+      for (int64 i = 0; i < num_spatial_dims; ++i) {
+        new_dnums.add_input_spatial_dimensions(i + 1);
         new_dnums.add_kernel_spatial_dimensions(i);
+        new_dnums.add_output_spatial_dimensions(i + 1);
       }
       new_dnums.set_input_feature_dimension(num_dims - 1);
       new_dnums.set_output_feature_dimension(num_dims - 1);
diff --git a/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc b/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc
index d593ba26b6..968f53d5c7 100644
--- a/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc
@@ -69,8 +69,10 @@ TEST_F(ConvCanonicalizationTest, NonCanonicalToCanonical) {
   ConvolutionDimensionNumbers dnums;
   dnums.set_input_batch_dimension(1);
   dnums.set_output_batch_dimension(1);
-  dnums.add_spatial_dimensions(2);
-  dnums.add_spatial_dimensions(3);
+  dnums.add_input_spatial_dimensions(2);
+  dnums.add_output_spatial_dimensions(2);
+  dnums.add_input_spatial_dimensions(3);
+  dnums.add_output_spatial_dimensions(3);
   dnums.set_input_feature_dimension(0);
   dnums.set_output_feature_dimension(0);
   dnums.add_kernel_spatial_dimensions(2);
@@ -125,8 +127,10 @@ TEST_F(ConvCanonicalizationTest, CanonicalStaysTheSame) {
   ConvolutionDimensionNumbers dnums;
   dnums.set_input_batch_dimension(0);
   dnums.set_output_batch_dimension(0);
-  dnums.add_spatial_dimensions(1);
-  dnums.add_spatial_dimensions(2);
+  dnums.add_input_spatial_dimensions(1);
+  dnums.add_output_spatial_dimensions(1);
+  dnums.add_input_spatial_dimensions(2);
+  dnums.add_output_spatial_dimensions(2);
   dnums.set_input_feature_dimension(3);
   dnums.set_output_feature_dimension(3);
   dnums.add_kernel_spatial_dimensions(0);
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
index d2e7f830d1..3993779da6 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
@@ -49,18 +49,21 @@ bool PotentiallyImplementedAsEigenConvolution(
       convolution.convolution_dimension_numbers();
   // Only 1D and 2D convolutions are supported at the moment.
   // TODO(b/32897908): add an optimized implementation for 3D convolution.
-  const int64 num_spatial_dims = dnums.spatial_dimensions_size();
+  const int64 num_spatial_dims = dnums.output_spatial_dimensions_size();
   if (num_spatial_dims > 2) {
     return false;
   }
 
   for (int64 i = 0; i < num_spatial_dims; ++i) {
-    if (dnums.spatial_dimensions(i) != i + 1) {
+    if (dnums.input_spatial_dimensions(i) != i + 1) {
       return false;
     }
     if (dnums.kernel_spatial_dimensions(i) != i) {
       return false;
     }
+    if (dnums.output_spatial_dimensions(i) != i + 1) {
+      return false;
+    }
   }
 
   const Shape& output_shape = convolution.shape();
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 49f4782693..502dd2e738 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -822,14 +822,16 @@ Status IrEmitter::HandleSelectAndScatter(HloInstruction* select_and_scatter) {
   // If the initialized_flag is false, initialize the selected value and index
   // with the currently visiting operand.
   SetToFirstInsertPoint(if_initialized.false_block, &ir_builder_);
-  const auto save_operand_index = [&](
-      const llvm_ir::IrArray::Index& operand_index) {
-    for (int64 i = 0; i < rank; ++i) {
-      llvm::Value* selected_index_address_slot = ir_builder_.CreateInBoundsGEP(
-          selected_index_address, {ir_builder_.getInt32(i)});
-      ir_builder_.CreateStore(operand_index[i], selected_index_address_slot);
-    }
-  };
+  const auto save_operand_index =
+      [&](const llvm_ir::IrArray::Index& operand_index) {
+        for (int64 i = 0; i < rank; ++i) {
+          llvm::Value* selected_index_address_slot =
+              ir_builder_.CreateInBoundsGEP(selected_index_address,
+                                            {ir_builder_.getInt32(i)});
+          ir_builder_.CreateStore(operand_index[i],
+                                  selected_index_address_slot);
+        }
+      };
   llvm_ir::IrArray operand_array(GetIrArrayFor(operand));
   llvm::Value* operand_data =
       operand_array.EmitReadArrayElement(operand_index, &ir_builder_);
@@ -952,11 +954,12 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
       // Input tensor.
       const Shape& input_shape = convolution->operand(0)->shape();
       int64 input_batch = input_shape.dimensions(dnums.input_batch_dimension());
-      int64 input_rows = input_shape.dimensions(dnums.spatial_dimensions(0));
+      int64 input_rows =
+          input_shape.dimensions(dnums.input_spatial_dimensions(0));
       int64 input_cols =
           one_dim_convolution
               ? 1
-              : input_shape.dimensions(dnums.spatial_dimensions(1));
+              : input_shape.dimensions(dnums.input_spatial_dimensions(1));
       int64 input_channels =
           input_shape.dimensions(dnums.input_feature_dimension());
 
@@ -976,11 +979,11 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
       // Output tensor.
       const Shape& convolution_shape = convolution->shape();
       int64 output_rows =
-          convolution_shape.dimensions(dnums.spatial_dimensions(0));
-      int64 output_cols =
-          one_dim_convolution
-              ? 1
-              : convolution_shape.dimensions(dnums.spatial_dimensions(1));
+          convolution_shape.dimensions(dnums.output_spatial_dimensions(0));
+      int64 output_cols = one_dim_convolution
+                              ? 1
+                              : convolution_shape.dimensions(
+                                    dnums.output_spatial_dimensions(1));
 
       // Extract the window stride for the convolution.
       const Window& window = convolution->window();
@@ -1068,10 +1071,10 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
   return EmitTargetElementLoop(
       convolution, [this, convolution, lhs, rhs, window,
                     dnums](const llvm_ir::IrArray::Index& index) {
-        int num_spatial_dims = dnums.spatial_dimensions_size();
+        int num_spatial_dims = dnums.output_spatial_dimensions_size();
         std::vector<llvm::Value*> output_spatial(num_spatial_dims);
         for (int i = 0; i < num_spatial_dims; ++i) {
-          output_spatial[i] = index[dnums.spatial_dimensions(i)];
+          output_spatial[i] = index[dnums.output_spatial_dimensions(i)];
         }
         llvm::Value* output_feature = index[dnums.output_feature_dimension()];
         llvm::Value* batch = index[dnums.output_batch_dimension()];
@@ -1091,8 +1094,9 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
         for (int i = 0; i < num_spatial_dims; ++i) {
           kernel_spatial[i] =
               loops
-                  .AddLoop(0, rhs->shape().dimensions(
-                                  dnums.kernel_spatial_dimensions(i)),
+                  .AddLoop(0,
+                           rhs->shape().dimensions(
+                               dnums.kernel_spatial_dimensions(i)),
                            tensorflow::strings::StrCat("k", i))
                   ->GetIndVarValue();
         }
@@ -1108,17 +1112,18 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
         // Calculate the spatial index in the input array, taking striding,
         // dilation and padding into account. An index in the padding will be
         // out of the bounds of the array.
-        const auto calculate_input_index = [this](
-            llvm::Value* output_index, llvm::Value* kernel_index,
-            const WindowDimension& window_dim) {
-          llvm::Value* strided_index = ir_builder_.CreateNSWMul(
-              output_index, ir_builder_.getInt64(window_dim.stride()));
-          llvm::Value* dilated_kernel_index = ir_builder_.CreateNSWMul(
-              kernel_index, ir_builder_.getInt64(window_dim.window_dilation()));
-          return ir_builder_.CreateNSWSub(
-              ir_builder_.CreateNSWAdd(strided_index, dilated_kernel_index),
-              ir_builder_.getInt64(window_dim.padding_low()));
-        };
+        const auto calculate_input_index =
+            [this](llvm::Value* output_index, llvm::Value* kernel_index,
+                   const WindowDimension& window_dim) {
+              llvm::Value* strided_index = ir_builder_.CreateNSWMul(
+                  output_index, ir_builder_.getInt64(window_dim.stride()));
+              llvm::Value* dilated_kernel_index = ir_builder_.CreateNSWMul(
+                  kernel_index,
+                  ir_builder_.getInt64(window_dim.window_dilation()));
+              return ir_builder_.CreateNSWSub(
+                  ir_builder_.CreateNSWAdd(strided_index, dilated_kernel_index),
+                  ir_builder_.getInt64(window_dim.padding_low()));
+            };
         std::vector<llvm::Value*> input_spatial(num_spatial_dims);
         for (int i = 0; i < num_spatial_dims; ++i) {
           input_spatial[i] = calculate_input_index(
@@ -1144,7 +1149,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
         for (int i = 0; i < num_spatial_dims; ++i) {
           llvm::ConstantInt* input_bound =
               ir_builder_.getInt64(window_util::DilatedBound(
-                  lhs->shape().dimensions(dnums.spatial_dimensions(i)),
+                  lhs->shape().dimensions(dnums.input_spatial_dimensions(i)),
                   window.dimensions(i).base_dilation()));
           llvm::Value* dim_in_bound =
               ir_builder_.CreateICmpULT(input_spatial[i], input_bound);
@@ -1176,7 +1181,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
         int num_dims = num_spatial_dims + 2;
         llvm_ir::IrArray::Index input_index(num_dims);
         for (int i = 0; i < num_spatial_dims; ++i) {
-          input_index[dnums.spatial_dimensions(i)] = input_spatial[i];
+          input_index[dnums.input_spatial_dimensions(i)] = input_spatial[i];
         }
         input_index[dnums.input_feature_dimension()] = input_feature;
         input_index[dnums.input_batch_dimension()] = batch;
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
index 5aaf072f9d..828ae675d7 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
@@ -74,9 +74,10 @@ MatchBackwardFilter(HloInstruction* conv) {
       conv->convolution_dimension_numbers();
   auto input_batch_dim = conv_dnums.input_batch_dimension();
   auto input_feature_dim = conv_dnums.input_feature_dimension();
+  auto input_spatial_dims = conv_dnums.input_spatial_dimensions();
   auto output_batch_dim = conv_dnums.output_batch_dimension();
   auto output_feature_dim = conv_dnums.output_feature_dimension();
-  auto spatial_dims = conv_dnums.spatial_dimensions();
+  auto output_spatial_dims = conv_dnums.output_spatial_dimensions();
 
   for (const WindowDimension& window_dim : conv->window().dimensions()) {
     if (window_dim.stride() != 1) {
@@ -108,11 +109,11 @@ MatchBackwardFilter(HloInstruction* conv) {
   //
   // Compute the window of the backward convolution.
   Window backward_conv_window;
-  for (int i = 0; i < spatial_dims.size(); ++i) {
+  for (int i = 0; i < input_spatial_dims.size(); ++i) {
     WindowDimension* dim = backward_conv_window.add_dimensions();
     // The window size of the backward convolution equals the output size of the
     // forward convolution.
-    int64 filter_size = conv->shape().dimensions(spatial_dims[i]);
+    int64 filter_size = conv->shape().dimensions(output_spatial_dims[i]);
     dim->set_size(filter_size);
     // The window stride equals the window dilation of the forward convolution.
     dim->set_stride(conv->window().dimensions(i).window_dilation());
@@ -120,7 +121,8 @@ MatchBackwardFilter(HloInstruction* conv) {
     // activations.
     dim->set_padding_low(conv->window().dimensions(i).padding_low());
 
-    int64 input_size = conv->operand(0)->shape().dimensions(spatial_dims[i]);
+    int64 input_size =
+        conv->operand(0)->shape().dimensions(input_spatial_dims[i]);
     int64 output_size = conv->window().dimensions(i).size();
     // Compute the range of the amount of valid high padding. We first compute
     // min_padding_high, the amount of padding on the right/bottom to ensure the
@@ -189,8 +191,11 @@ MatchBackwardFilter(HloInstruction* conv) {
   backward_conv_dnums.set_input_feature_dimension(input_batch_dim);
   backward_conv_dnums.set_output_batch_dimension(output_feature_dim);
   backward_conv_dnums.set_output_feature_dimension(output_batch_dim);
-  for (int i = 0; i < spatial_dims.size(); ++i) {
-    backward_conv_dnums.add_spatial_dimensions(spatial_dims[i]);
+  for (int i = 0; i < input_spatial_dims.size(); ++i) {
+    backward_conv_dnums.add_input_spatial_dimensions(input_spatial_dims[i]);
+  }
+  for (int i = 0; i < output_spatial_dims.size(); ++i) {
+    backward_conv_dnums.add_output_spatial_dimensions(output_spatial_dims[i]);
   }
   // The dimension numbering of the output of the forward convolution (before
   // transposition) is the same as that of the activations (according to the
@@ -205,9 +210,9 @@ MatchBackwardFilter(HloInstruction* conv) {
       PositionInContainer(transpose->dimensions(), output_batch_dim));
   backward_conv_dnums.set_kernel_output_feature_dimension(
       PositionInContainer(transpose->dimensions(), output_feature_dim));
-  for (int i = 0; i < spatial_dims.size(); ++i) {
+  for (int i = 0; i < output_spatial_dims.size(); ++i) {
     backward_conv_dnums.add_kernel_spatial_dimensions(
-        PositionInContainer(transpose->dimensions(), spatial_dims[i]));
+        PositionInContainer(transpose->dimensions(), output_spatial_dims[i]));
   }
 
   return std::make_tuple(true, std::vector<HloInstruction*>({transpose, conv}),
@@ -272,12 +277,14 @@ MatchBackwardInput(HloInstruction* conv) {
     }
   }
 
-  const auto& spatial_dims = dnums.spatial_dimensions();
-  CHECK_EQ(conv->window().dimensions().size(), spatial_dims.size());
+  const auto& input_spatial_dims = dnums.input_spatial_dimensions();
+  const auto& output_spatial_dims = dnums.output_spatial_dimensions();
+  CHECK_EQ(conv->window().dimensions().size(), input_spatial_dims.size());
+  CHECK_EQ(output_spatial_dims.size(), input_spatial_dims.size());
 
   const Window& old_window = conv->window();
   Window new_window = old_window;
-  for (size_t i = 0; i < spatial_dims.size(); ++i) {
+  for (size_t i = 0; i < input_spatial_dims.size(); ++i) {
     // Restore backward convolution's padding config from the matched pattern.
     // See the comment in tensorflow/core/kernels/conv_grad_tuple_ops.cc
     // for how we convert backward input convolution to a variant of forward
@@ -310,8 +317,9 @@ MatchBackwardInput(HloInstruction* conv) {
     // end at the border. The maximum amount (max_padding_high) equals
     // min_padding_high+stride-1 -- max_padding_high+1 would cause the output
     // size to change.
-    auto unpadded_input_size = conv->shape().dimensions(spatial_dims[i]);
-    auto output_size = conv->operand(0)->shape().dimensions(spatial_dims[i]);
+    auto unpadded_input_size = conv->shape().dimensions(output_spatial_dims[i]);
+    auto output_size =
+        conv->operand(0)->shape().dimensions(input_spatial_dims[i]);
     auto padded_input_size = kernel_size + dim->stride() * (output_size - 1);
     auto total_pad_size = padded_input_size - unpadded_input_size;
     auto min_padding_high = total_pad_size - backward_padding_low;
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc
index 19b122ba06..112c496e1f 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc
@@ -49,8 +49,10 @@ class ConvolutionFoldingTest : public HloTestBase {
     tf_default_dnums_for_backward_filter_.set_output_batch_dimension(3);
     tf_default_dnums_for_backward_filter_.set_input_feature_dimension(0);
     tf_default_dnums_for_backward_filter_.set_output_feature_dimension(0);
-    tf_default_dnums_for_backward_filter_.add_spatial_dimensions(1);
-    tf_default_dnums_for_backward_filter_.add_spatial_dimensions(2);
+    tf_default_dnums_for_backward_filter_.add_input_spatial_dimensions(1);
+    tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(1);
+    tf_default_dnums_for_backward_filter_.add_input_spatial_dimensions(2);
+    tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(2);
     tf_default_dnums_for_backward_filter_.set_kernel_input_feature_dimension(0);
     tf_default_dnums_for_backward_filter_.set_kernel_output_feature_dimension(
         3);
@@ -61,8 +63,10 @@ class ConvolutionFoldingTest : public HloTestBase {
     tf_default_dnums_for_backward_input_.set_output_batch_dimension(0);
     tf_default_dnums_for_backward_input_.set_input_feature_dimension(3);
     tf_default_dnums_for_backward_input_.set_output_feature_dimension(3);
-    tf_default_dnums_for_backward_input_.add_spatial_dimensions(1);
-    tf_default_dnums_for_backward_input_.add_spatial_dimensions(2);
+    tf_default_dnums_for_backward_input_.add_input_spatial_dimensions(1);
+    tf_default_dnums_for_backward_input_.add_output_spatial_dimensions(1);
+    tf_default_dnums_for_backward_input_.add_input_spatial_dimensions(2);
+    tf_default_dnums_for_backward_input_.add_output_spatial_dimensions(2);
     tf_default_dnums_for_backward_input_.set_kernel_input_feature_dimension(3);
     tf_default_dnums_for_backward_input_.set_kernel_output_feature_dimension(2);
     tf_default_dnums_for_backward_input_.add_kernel_spatial_dimensions(0);
@@ -258,8 +262,10 @@ TEST_F(ConvolutionFoldingTest, BackwardInputConvolveEvenPadding) {
   conv_dnums.set_output_batch_dimension(0);
   conv_dnums.set_input_feature_dimension(1);
   conv_dnums.set_output_feature_dimension(1);
-  conv_dnums.add_spatial_dimensions(2);
-  conv_dnums.add_spatial_dimensions(3);
+  conv_dnums.add_input_spatial_dimensions(2);
+  conv_dnums.add_output_spatial_dimensions(2);
+  conv_dnums.add_input_spatial_dimensions(3);
+  conv_dnums.add_output_spatial_dimensions(3);
   conv_dnums.set_kernel_input_feature_dimension(0);
   conv_dnums.set_kernel_output_feature_dimension(1);
   conv_dnums.add_kernel_spatial_dimensions(2);
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
index 5fe5f55857..037eec8ef5 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
@@ -29,12 +29,12 @@ namespace se = ::perftools::gputools;
 namespace xla {
 namespace gpu {
 
+using se::dnn::AlgorithmDesc;
 using se::dnn::BatchDescriptor;
 using se::dnn::ConvolutionDescriptor;
 using se::dnn::DataLayout;
 using se::dnn::FilterDescriptor;
 using se::dnn::FilterLayout;
-using se::dnn::AlgorithmDesc;
 
 ConvolveScratchAllocator::ConvolveScratchAllocator(
     int device_ordinal, DeviceMemoryAllocator* memory_allocator)
@@ -131,8 +131,9 @@ tensorflow::Status ConvolutionThunk::ExecuteOnStream(
   const int effective_num_dimensions = std::max(2, num_dimensions);
 
   CHECK_EQ(F32, output_shape_.element_type());
-  CHECK_EQ(num_dimensions, dim_nums_.spatial_dimensions_size());
+  CHECK_EQ(num_dimensions, dim_nums_.input_spatial_dimensions_size());
   CHECK_EQ(num_dimensions, dim_nums_.kernel_spatial_dimensions_size());
+  CHECK_EQ(num_dimensions, dim_nums_.output_spatial_dimensions_size());
   for (const WindowDimension& dim : window_.dimensions()) {
     CHECK_EQ(dim.padding_low(), dim.padding_high());
   }
@@ -148,7 +149,7 @@ tensorflow::Status ConvolutionThunk::ExecuteOnStream(
     // Note that the dimensions are reversed. The same holds below.
     input_descriptor.set_spatial_dim(
         static_cast<se::dnn::DimIndex>(effective_num_dimensions - dim - 1),
-        input_shape_.dimensions(dim_nums_.spatial_dimensions(dim)));
+        input_shape_.dimensions(dim_nums_.input_spatial_dimensions(dim)));
   }
 
   FilterDescriptor filter_descriptor(effective_num_dimensions);
@@ -182,7 +183,7 @@ tensorflow::Status ConvolutionThunk::ExecuteOnStream(
   for (int dim = 0; dim < num_dimensions; ++dim) {
     output_descriptor.set_spatial_dim(
         static_cast<se::dnn::DimIndex>(effective_num_dimensions - dim - 1),
-        output_shape_.dimensions(dim_nums_.spatial_dimensions(dim)));
+        output_shape_.dimensions(dim_nums_.output_spatial_dimensions(dim)));
   }
 
   // Add a singleton dimension in the 1D convolution case.
diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc
index 9a4bfd0905..1d47ffde43 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc
@@ -156,8 +156,10 @@ TEST_F(InstructionFusionTest, PotentialBitcastTransposeOfConvolutionUnfused) {
   conv_dnums.set_output_batch_dimension(0);
   conv_dnums.set_input_feature_dimension(1);
   conv_dnums.set_output_feature_dimension(1);
-  conv_dnums.add_spatial_dimensions(2);
-  conv_dnums.add_spatial_dimensions(3);
+  conv_dnums.add_input_spatial_dimensions(2);
+  conv_dnums.add_output_spatial_dimensions(2);
+  conv_dnums.add_input_spatial_dimensions(3);
+  conv_dnums.add_output_spatial_dimensions(3);
   conv_dnums.set_kernel_output_feature_dimension(0);
   conv_dnums.set_kernel_input_feature_dimension(1);
   conv_dnums.add_kernel_spatial_dimensions(2);
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
index 8fb7a6adda..658fd05cd4 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
@@ -100,7 +100,7 @@ bool ImplementedAsDnnConvolution(const HloInstruction& hlo) {
   if (hlo.opcode() == HloOpcode::kConvolution) {
     const ConvolutionDimensionNumbers& dnums =
         hlo.convolution_dimension_numbers();
-    if (dnums.spatial_dimensions_size() > 3) {
+    if (dnums.input_spatial_dimensions_size() > 3) {
       return false;
     }
 
diff --git a/tensorflow/compiler/xla/service/gpu/layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/layout_assignment.cc
index 0bbd63fb7b..d475c4171b 100644
--- a/tensorflow/compiler/xla/service/gpu/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/layout_assignment.cc
@@ -80,9 +80,9 @@ Status GpuLayoutAssignment::AddBackendConstraints(
       const ConvolutionDimensionNumbers& dimension_numbers =
           instruction->convolution_dimension_numbers();
       std::vector<int64> input_layout;
-      for (int i = dimension_numbers.spatial_dimensions_size() - 1; i >= 0;
-           --i) {
-        input_layout.push_back(dimension_numbers.spatial_dimensions(i));
+      for (int i = dimension_numbers.input_spatial_dimensions_size() - 1;
+           i >= 0; --i) {
+        input_layout.push_back(dimension_numbers.input_spatial_dimensions(i));
       }
       input_layout.push_back(dimension_numbers.input_feature_dimension());
       input_layout.push_back(dimension_numbers.input_batch_dimension());
@@ -102,9 +102,9 @@ Status GpuLayoutAssignment::AddBackendConstraints(
       *filter_shape.mutable_layout() = LayoutUtil::MakeLayout(filter_layout);
 
       std::vector<int64> output_layout;
-      for (int i = dimension_numbers.spatial_dimensions_size() - 1; i >= 0;
-           --i) {
-        output_layout.push_back(dimension_numbers.spatial_dimensions(i));
+      for (int i = dimension_numbers.output_spatial_dimensions_size() - 1;
+           i >= 0; --i) {
+        output_layout.push_back(dimension_numbers.output_spatial_dimensions(i));
       }
       output_layout.push_back(dimension_numbers.output_feature_dimension());
       output_layout.push_back(dimension_numbers.output_batch_dimension());
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
index 9274e16a45..11290eda4f 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
@@ -49,8 +49,8 @@ HloInstruction* MaybePaddedAndSlicedInput(
     // applies positive padding and dilation.
     PaddingConfig padding_config =
         MakeNoPaddingConfig(input->shape().dimensions_size());
-    for (size_t i = 0; i < conv_dnums.spatial_dimensions().size(); ++i) {
-      int64 dim = conv_dnums.spatial_dimensions(i);
+    for (size_t i = 0; i < conv_dnums.input_spatial_dimensions().size(); ++i) {
+      int64 dim = conv_dnums.input_spatial_dimensions(i);
       padding_config.mutable_dimensions(dim)->set_edge_padding_low(
           std::max<int64>(0LL, conv_window.dimensions(i).padding_low()));
       padding_config.mutable_dimensions(dim)->set_edge_padding_high(
@@ -81,8 +81,8 @@ HloInstruction* MaybePaddedAndSlicedInput(
     std::vector<int64> limit_indices(input->shape().dimensions().begin(),
                                      input->shape().dimensions().end());
     std::vector<int64> strides(input->shape().dimensions_size(), 1);
-    for (size_t i = 0; i < conv_dnums.spatial_dimensions().size(); ++i) {
-      int64 dim = conv_dnums.spatial_dimensions(i);
+    for (size_t i = 0; i < conv_dnums.input_spatial_dimensions().size(); ++i) {
+      int64 dim = conv_dnums.input_spatial_dimensions(i);
       // If dimension "dim" has negative padding, increase the start index or
       // decrement the limit index by the amount of negative padding.
       start_indices[dim] +=
@@ -117,8 +117,8 @@ HloInstruction* MaybePaddedKernel(const Window& conv_window,
   for (size_t i = 0; i < kernel->shape().dimensions_size(); ++i) {
     padding_config.add_dimensions();
   }
-  for (size_t i = 0; i < conv_dnums.spatial_dimensions().size(); ++i) {
-    int64 dim = conv_dnums.spatial_dimensions(i);
+  for (size_t i = 0; i < conv_dnums.kernel_spatial_dimensions().size(); ++i) {
+    int64 dim = conv_dnums.kernel_spatial_dimensions(i);
     padding_config.mutable_dimensions(dim)->set_interior_padding(
         conv_window.dimensions(i).window_dilation() - 1);
   }
@@ -229,7 +229,7 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution(
     // later. Therefore, the amount of new padding (low or high) is the minimum
     // of the amount of old padding low and old padding high.
     int64 new_conv_padding = std::min(padding_low, padding_high);
-    int64 dim = backward_conv_dnums.spatial_dimensions(i);
+    int64 dim = backward_conv_dnums.input_spatial_dimensions(i);
     input_padding_config.mutable_dimensions(dim)->set_edge_padding_low(
         padding_low - new_conv_padding);
     input_padding_config.mutable_dimensions(dim)->set_edge_padding_high(
@@ -369,12 +369,11 @@ bool PadInsertion::CanonicalizeBackwardInputConvolution(
   std::vector<int64> limit_indices(
       new_backward_conv->shape().dimensions().begin(),
       new_backward_conv->shape().dimensions().end());
-  std::vector<int64> strides(new_backward_conv->shape().dimensions_size(),
-                             1LL);
+  std::vector<int64> strides(new_backward_conv->shape().dimensions_size(), 1LL);
   for (size_t i = 0; i < backward_conv->window().dimensions_size(); ++i) {
     int64 padding_low = backward_conv->window().dimensions(i).padding_low();
     int64 padding_high = backward_conv->window().dimensions(i).padding_high();
-    int64 dim = backward_conv_dnums.spatial_dimensions(i);
+    int64 dim = backward_conv_dnums.output_spatial_dimensions(i);
     if (padding_low > padding_high) {
       // If the amount of low padding (of the old backward convolution) is
       // larger, we internally pad the low end of the activations and slice
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 0a1ebe3416..e693d167a1 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -812,7 +812,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     CHECK(ShapeUtil::SameElementType(lhs_shape, result_shape));
 
     const auto& dnums = conv->convolution_dimension_numbers();
-    const int64 num_spatial_dims = dnums.spatial_dimensions_size();
+    const int64 num_spatial_dims = dnums.output_spatial_dimensions_size();
+    CHECK_EQ(num_spatial_dims, dnums.input_spatial_dimensions_size());
     CHECK_EQ(num_spatial_dims, dnums.kernel_spatial_dimensions_size());
     CHECK_GE(num_spatial_dims, 0);
     CHECK_EQ(window.dimensions_size(), num_spatial_dims);
@@ -877,13 +878,15 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
           // Find corresponding spatial dimension index for input (lhs).
           for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) {
             // Spatial dimension number for input (lhs) and output.
-            const int64 spatial_dim = dnums.spatial_dimensions(ki);
+            const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki);
+            const int64 output_spatial_dim =
+                dnums.output_spatial_dimensions(ki);
 
             // Calculate lhs (input) index without taking base dilation into
             // account.
             const auto& window_dim = window.dimensions(ki);
             const int64 undilated_index =
-                out_index[spatial_dim] * window_dim.stride() -
+                out_index[output_spatial_dim] * window_dim.stride() -
                 window_dim.padding_low() +
                 rhs_spatial_index[ki] * window_dim.window_dilation();
             // Skip if the lhs (input) index is to be dilated.
@@ -892,12 +895,13 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
             }
 
             // Calculate the actual lhs (input) index after dilation.
-            lhs_index[spatial_dim] =
+            lhs_index[input_spatial_dim] =
                 undilated_index / window_dim.base_dilation();
 
             // Skip if input index is not in bound.
-            if (!(lhs_index[spatial_dim] >= 0 &&
-                  lhs_index[spatial_dim] < lhs_shape.dimensions(spatial_dim))) {
+            if (!(lhs_index[input_spatial_dim] >= 0 &&
+                  lhs_index[input_spatial_dim] <
+                      lhs_shape.dimensions(input_spatial_dim))) {
               goto cnt;
             }
 
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index d0d6029d5f..b2c4351896 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -751,7 +751,8 @@ TEST_F(HloEvaluatorTest, SimpleConv1D) {
   dnums.set_output_batch_dimension(0);
   dnums.set_input_feature_dimension(1);
   dnums.set_output_feature_dimension(1);
-  dnums.add_spatial_dimensions(2);
+  dnums.add_input_spatial_dimensions(2);
+  dnums.add_output_spatial_dimensions(2);
 
   dnums.set_kernel_output_feature_dimension(0);
   dnums.set_kernel_input_feature_dimension(1);
@@ -886,8 +887,10 @@ TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) {
   dnums.set_output_batch_dimension(2);
   dnums.set_input_feature_dimension(0);
   dnums.set_output_feature_dimension(0);
-  dnums.add_spatial_dimensions(1);
-  dnums.add_spatial_dimensions(3);
+  dnums.add_input_spatial_dimensions(1);
+  dnums.add_output_spatial_dimensions(1);
+  dnums.add_input_spatial_dimensions(3);
+  dnums.add_output_spatial_dimensions(3);
 
   dnums.set_kernel_output_feature_dimension(0);
   dnums.set_kernel_input_feature_dimension(2);
@@ -960,8 +963,10 @@ TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) {
   dnums.set_output_batch_dimension(2);
   dnums.set_input_feature_dimension(0);
   dnums.set_output_feature_dimension(0);
-  dnums.add_spatial_dimensions(1);
-  dnums.add_spatial_dimensions(3);
+  dnums.add_input_spatial_dimensions(1);
+  dnums.add_output_spatial_dimensions(1);
+  dnums.add_input_spatial_dimensions(3);
+  dnums.add_output_spatial_dimensions(3);
 
   dnums.set_kernel_output_feature_dimension(0);
   dnums.set_kernel_input_feature_dimension(2);
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 854185af56..c30c432654 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -3021,25 +3021,25 @@ string HloInstruction::ConvolutionDimensionNumbersToString() const {
 
   // lhs_dims[i] is the symbol of the logical dimension i for the lhs
   // operand. E.g. if batch has dimension number 2, then lhs_dims[2] == "b".
-  std::vector<string> lhs_dims(2 + dnums.spatial_dimensions().size());
+  std::vector<string> lhs_dims(2 + dnums.input_spatial_dimensions().size());
   lhs_dims[dnums.input_batch_dimension()] = 'b';
   lhs_dims[dnums.input_feature_dimension()] = 'f';
-  for (int64 i = 0; i < dnums.spatial_dimensions().size(); ++i) {
-    lhs_dims[dnums.spatial_dimensions(i)] = StrCat(i);
+  for (int64 i = 0; i < dnums.input_spatial_dimensions().size(); ++i) {
+    lhs_dims[dnums.input_spatial_dimensions(i)] = StrCat(i);
   }
 
   std::vector<string> rhs_dims(2 + dnums.kernel_spatial_dimensions().size());
   rhs_dims[dnums.kernel_input_feature_dimension()] = "i";
   rhs_dims[dnums.kernel_output_feature_dimension()] = "o";
-  for (int64 i = 0; i < dnums.spatial_dimensions().size(); ++i) {
+  for (int64 i = 0; i < dnums.kernel_spatial_dimensions().size(); ++i) {
     rhs_dims[dnums.kernel_spatial_dimensions(i)] = StrCat(i);
   }
 
-  std::vector<string> output_dims(2 + dnums.spatial_dimensions().size());
+  std::vector<string> output_dims(2 + dnums.output_spatial_dimensions().size());
   output_dims[dnums.output_batch_dimension()] = 'b';
   output_dims[dnums.output_feature_dimension()] = 'f';
-  for (int64 i = 0; i < dnums.spatial_dimensions().size(); ++i) {
-    output_dims[dnums.spatial_dimensions(i)] = StrCat(i);
+  for (int64 i = 0; i < dnums.output_spatial_dimensions().size(); ++i) {
+    output_dims[dnums.output_spatial_dimensions(i)] = StrCat(i);
   }
 
   result += "dim_labels=";
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 0a2bf939c1..3df1911d07 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -1445,7 +1445,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
         ShapeUtil::HumanString(lhs).c_str(),
         ShapeUtil::HumanString(rhs).c_str());
   }
-  if (dnums.spatial_dimensions_size() !=
+  if (dnums.input_spatial_dimensions_size() !=
       dnums.kernel_spatial_dimensions_size()) {
     return InvalidArgument(
         "Both arguments to convolution must have same number of dimensions.\n"
@@ -1453,7 +1453,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
         window.DebugString().c_str());
   }
 
-  const int num_spatial_dims = dnums.spatial_dimensions_size();
+  const int num_spatial_dims = dnums.input_spatial_dimensions_size();
   if (window.dimensions_size() != num_spatial_dims) {
     return InvalidArgument(
         "Window must have same number of dimensions as dimension numbers.\n"
@@ -1482,8 +1482,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
   std::vector<int64> input_dnums(num_dims);
   input_dnums[0] = dnums.input_batch_dimension();
   input_dnums[1] = dnums.input_feature_dimension();
-  std::copy(dnums.spatial_dimensions().begin(),
-            dnums.spatial_dimensions().end(), input_dnums.begin() + 2);
+  std::copy(dnums.input_spatial_dimensions().begin(),
+            dnums.input_spatial_dimensions().end(), input_dnums.begin() + 2);
   std::sort(input_dnums.begin(), input_dnums.end());
 
   std::vector<int64> window_dnums(num_dims);
@@ -1493,12 +1493,20 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
             dnums.kernel_spatial_dimensions().end(), window_dnums.begin() + 2);
   std::sort(window_dnums.begin(), window_dnums.end());
 
+  std::vector<int64> output_dnums(num_dims);
+  output_dnums[0] = dnums.output_batch_dimension();
+  output_dnums[1] = dnums.output_feature_dimension();
+  std::copy(dnums.output_spatial_dimensions().begin(),
+            dnums.output_spatial_dimensions().end(), output_dnums.begin() + 2);
+  std::sort(output_dnums.begin(), output_dnums.end());
+
   std::vector<int64> expected_dnums(num_dims);
   std::iota(expected_dnums.begin(), expected_dnums.end(), 0);
 
   const auto in_range = [num_dims](int64 i) { return 0 <= i && i < num_dims; };
   if (!std::all_of(input_dnums.begin(), input_dnums.end(), in_range) ||
-      !std::all_of(window_dnums.begin(), window_dnums.end(), in_range)) {
+      !std::all_of(window_dnums.begin(), window_dnums.end(), in_range) ||
+      !std::all_of(output_dnums.begin(), output_dnums.end(), in_range)) {
     return InvalidArgument(
         "A dimension number is out of range in convolution: %s",
         dnums.DebugString().c_str());
@@ -1516,10 +1524,16 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
         "once: %s",
         dnums.DebugString().c_str());
   }
+  if (output_dnums != expected_dnums) {
+    return InvalidArgument(
+        "Output dimensions of convolution must contain each dimension exactly "
+        "once: %s",
+        dnums.DebugString().c_str());
+  }
 
   std::vector<int64> input_spatial_dims(num_spatial_dims);
   for (int i = 0; i < num_spatial_dims; ++i) {
-    input_spatial_dims[i] = lhs.dimensions(dnums.spatial_dimensions(i));
+    input_spatial_dims[i] = lhs.dimensions(dnums.input_spatial_dimensions(i));
   }
   const int64 input_features = lhs.dimensions(dnums.input_feature_dimension());
   const int64 input_batch = lhs.dimensions(dnums.input_batch_dimension());
@@ -1567,7 +1581,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
   dimensions[dnums.output_batch_dimension()] = input_batch;
   dimensions[dnums.output_feature_dimension()] = kernel_output_features;
   for (int i = 0; i < num_spatial_dims; ++i) {
-    dimensions[dnums.spatial_dimensions(i)] = window_output_shape.dimensions(i);
+    dimensions[dnums.output_spatial_dimensions(i)] =
+        window_output_shape.dimensions(i);
   }
 
   return ShapeUtil::MakeShape(lhs.element_type(), dimensions);
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index d12f7bd145..be93c879c0 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -395,8 +395,10 @@ TEST_F(ShapeInferenceTest, Convolve) {
   dnums.set_output_batch_dimension(0);
   dnums.set_input_feature_dimension(1);
   dnums.set_output_feature_dimension(1);
-  dnums.add_spatial_dimensions(2);
-  dnums.add_spatial_dimensions(3);
+  dnums.add_input_spatial_dimensions(2);
+  dnums.add_output_spatial_dimensions(2);
+  dnums.add_input_spatial_dimensions(3);
+  dnums.add_output_spatial_dimensions(3);
 
   // Dimension order: x1, batch, feature, x0
   Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 12, 11, 3});
@@ -437,8 +439,10 @@ TEST_F(ShapeInferenceTest, ConvolveWithWindowDilation) {
   dnums.set_output_batch_dimension(0);
   dnums.set_input_feature_dimension(1);
   dnums.set_output_feature_dimension(1);
-  dnums.add_spatial_dimensions(2);
-  dnums.add_spatial_dimensions(3);
+  dnums.add_input_spatial_dimensions(2);
+  dnums.add_output_spatial_dimensions(2);
+  dnums.add_input_spatial_dimensions(3);
+  dnums.add_output_spatial_dimensions(3);
 
   // Dimension order: x1, batch, feature, x0
   Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 12, 11, 3});
@@ -480,8 +484,10 @@ TEST_F(ShapeInferenceTest, ConvolveWithBaseDilation) {
   dnums.set_output_batch_dimension(0);
   dnums.set_input_feature_dimension(1);
   dnums.set_output_feature_dimension(1);
-  dnums.add_spatial_dimensions(2);
-  dnums.add_spatial_dimensions(3);
+  dnums.add_input_spatial_dimensions(2);
+  dnums.add_output_spatial_dimensions(2);
+  dnums.add_input_spatial_dimensions(3);
+  dnums.add_output_spatial_dimensions(3);
 
   // Dimension order: x1, batch, feature, x0
   Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 12, 11, 4});
@@ -524,8 +530,10 @@ TEST_F(ShapeInferenceTest, ConvolveDimensionNumbersOverlapError) {
   dnums.set_output_batch_dimension(3);
   dnums.set_input_feature_dimension(2);
   dnums.set_output_feature_dimension(2);
-  dnums.add_spatial_dimensions(0);
-  dnums.add_spatial_dimensions(1);
+  dnums.add_input_spatial_dimensions(0);
+  dnums.add_output_spatial_dimensions(0);
+  dnums.add_input_spatial_dimensions(1);
+  dnums.add_output_spatial_dimensions(1);
   dnums.set_kernel_input_feature_dimension(0);  // duplicated with kernel_x0
   dnums.set_kernel_output_feature_dimension(3);
   dnums.add_kernel_spatial_dimensions(0);
diff --git a/tensorflow/compiler/xla/service/transpose_folding.cc b/tensorflow/compiler/xla/service/transpose_folding.cc
index 8c2640adf5..fb55d4e543 100644
--- a/tensorflow/compiler/xla/service/transpose_folding.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding.cc
@@ -58,27 +58,11 @@ TransposeFolding::OperandIndices CanFoldOperandsIntoConvolution(
     return {};
   }
 
-  const ConvolutionDimensionNumbers& dnums =
-      convolution.convolution_dimension_numbers();
-
   TransposeFolding::OperandIndices operand_set;
   for (int64 i = 0; i < convolution.operand_count(); ++i) {
     auto& operand = *convolution.operand(i);
     if (operand.opcode() == HloOpcode::kTranspose &&
         operand.user_count() == 1) {
-      const auto& transpose_dimensions = operand.dimensions();
-      // We can transpose the LHS so long as it doesn't move around spatial
-      // dimensions because ConvolutionDimensionNumbers doesn't have different
-      // fields for input and output spatial dimensions.
-      if (i == 0 &&
-          std::any_of(dnums.spatial_dimensions().begin(),
-                      dnums.spatial_dimensions().end(),
-                      [&](const int64 spatial_dimension) {
-                        return transpose_dimensions[spatial_dimension] !=
-                               spatial_dimension;
-                      })) {
-        continue;
-      }
       operand_set.push_back(i);
     }
   }
@@ -137,7 +121,7 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) {
         transpose_dimensions[dnums.input_batch_dimension()]);
     new_dnums.set_input_feature_dimension(
         transpose_dimensions[dnums.input_feature_dimension()]);
-    for (const auto& spatial_dimension : dnums.spatial_dimensions()) {
+    for (const auto& spatial_dimension : dnums.input_spatial_dimensions()) {
       CHECK_EQ(spatial_dimension, transpose_dimensions[spatial_dimension]);
     }
     new_lhs = &transpose_operand;
diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc
index 00462f9be1..6ac32e88f1 100644
--- a/tensorflow/compiler/xla/service/transpose_folding_test.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc
@@ -362,10 +362,18 @@ TEST_F(TransposeFoldingTest, FoldConvTransposeLhs) {
   EXPECT_EQ(
       dnums.input_batch_dimension(),
       new_conv->convolution_dimension_numbers().input_feature_dimension());
-  EXPECT_EQ(dnums.spatial_dimensions(0),
-            new_conv->convolution_dimension_numbers().spatial_dimensions(0));
-  EXPECT_EQ(dnums.spatial_dimensions(1),
-            new_conv->convolution_dimension_numbers().spatial_dimensions(1));
+  EXPECT_EQ(
+      dnums.input_spatial_dimensions(0),
+      new_conv->convolution_dimension_numbers().input_spatial_dimensions(0));
+  EXPECT_EQ(
+      dnums.input_spatial_dimensions(1),
+      new_conv->convolution_dimension_numbers().input_spatial_dimensions(1));
+  EXPECT_EQ(
+      dnums.output_spatial_dimensions(0),
+      new_conv->convolution_dimension_numbers().output_spatial_dimensions(0));
+  EXPECT_EQ(
+      dnums.output_spatial_dimensions(1),
+      new_conv->convolution_dimension_numbers().output_spatial_dimensions(1));
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc b/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc
index b0a63bccbb..896b34fb6e 100644
--- a/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc
+++ b/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc
@@ -39,8 +39,8 @@ class ConvolutionDimensionNumbersTest : public ClientLibraryTestBase {};
 // Tests the convolution operation with invalid input dimension numbers.
 TEST_F(ConvolutionDimensionNumbersTest, InvalidInputDimensionNumbers) {
   auto dimension_numbers_status =
-      ComputationBuilder::CreateConvDimensionNumbers(0, 2, 0, 2, 2, 3, 0, 1, 2,
-                                                     3);
+      ComputationBuilder::CreateConvDimensionNumbers(0, 2, 2, 3, 0, 1, 2, 3, 0,
+                                                     1, 2, 3);
   ASSERT_FALSE(dimension_numbers_status.ok());
   ASSERT_THAT(dimension_numbers_status.status().error_message(),
               ::testing::HasSubstr("input are not unique"));
@@ -49,13 +49,23 @@ TEST_F(ConvolutionDimensionNumbersTest, InvalidInputDimensionNumbers) {
 // Tests the convolution operation with invalid weight dimension numbers.
 TEST_F(ConvolutionDimensionNumbersTest, InvalidWeightDimensionNumbers) {
   auto dimension_numbers_status =
-      ComputationBuilder::CreateConvDimensionNumbers(0, 1, 0, 1, 2, 3, 2, 3, 2,
-                                                     3);
+      ComputationBuilder::CreateConvDimensionNumbers(0, 1, 2, 3, 0, 1, 2, 3, 0,
+                                                     2, 2, 3);
   ASSERT_FALSE(dimension_numbers_status.ok());
   ASSERT_THAT(dimension_numbers_status.status().error_message(),
               ::testing::HasSubstr("weight are not unique"));
 }
 
+// Tests the convolution operation with invalid output dimension numbers.
+TEST_F(ConvolutionDimensionNumbersTest, InvalidOutputDimensionNumbers) {
+  auto dimension_numbers_status =
+      ComputationBuilder::CreateConvDimensionNumbers(0, 1, 2, 3, 0, 2, 2, 3, 0,
+                                                     1, 2, 3);
+  ASSERT_FALSE(dimension_numbers_status.ok());
+  ASSERT_THAT(dimension_numbers_status.status().error_message(),
+              ::testing::HasSubstr("output are not unique"));
+}
+
 XLA_TEST_F(ConvolutionDimensionNumbersTest,
            TwoConvsWithDifferentDimensionNumbers) {
   auto input_array = MakeUnique<Array4D<float>>(2, 3, 5, 5);
diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc
index 8de7c9ffdc..2924c08615 100644
--- a/tensorflow/compiler/xla/tests/convolution_test.cc
+++ b/tensorflow/compiler/xla/tests/convolution_test.cc
@@ -370,9 +370,12 @@ XLA_TEST_F(ConvolutionTest, Convolve3D_1x4x2x3x3_2x2x2x3x3_Valid) {
     ConvolutionDimensionNumbers dnums;
     dnums.set_input_batch_dimension(0);
     dnums.set_output_batch_dimension(0);
-    dnums.add_spatial_dimensions(1);
-    dnums.add_spatial_dimensions(2);
-    dnums.add_spatial_dimensions(3);
+    dnums.add_input_spatial_dimensions(1);
+    dnums.add_output_spatial_dimensions(1);
+    dnums.add_input_spatial_dimensions(2);
+    dnums.add_output_spatial_dimensions(2);
+    dnums.add_input_spatial_dimensions(3);
+    dnums.add_output_spatial_dimensions(3);
     dnums.set_input_feature_dimension(4);
     dnums.set_output_feature_dimension(4);
     dnums.add_kernel_spatial_dimensions(0);
@@ -423,8 +426,10 @@ XLA_TEST_F(ConvolutionTest, Convolve2D_1x3x3x5_3x3x5x5_Valid) {
     ConvolutionDimensionNumbers dnums;
     dnums.set_input_batch_dimension(0);
     dnums.set_output_batch_dimension(0);
-    dnums.add_spatial_dimensions(1);
-    dnums.add_spatial_dimensions(2);
+    dnums.add_input_spatial_dimensions(1);
+    dnums.add_output_spatial_dimensions(1);
+    dnums.add_input_spatial_dimensions(2);
+    dnums.add_output_spatial_dimensions(2);
     dnums.set_input_feature_dimension(3);
     dnums.set_output_feature_dimension(3);
     dnums.add_kernel_spatial_dimensions(0);
@@ -538,7 +543,8 @@ XLA_TEST_P(Convolve1D1WindowTest, Convolve1D1Window) {
     ConvolutionDimensionNumbers dnums;
     dnums.set_input_batch_dimension(0);
     dnums.set_output_batch_dimension(0);
-    dnums.add_spatial_dimensions(1);
+    dnums.add_input_spatial_dimensions(1);
+    dnums.add_output_spatial_dimensions(1);
     dnums.set_input_feature_dimension(2);
     dnums.set_output_feature_dimension(2);
     dnums.add_kernel_spatial_dimensions(0);
diff --git a/tensorflow/compiler/xla/tests/convolution_variants_test.cc b/tensorflow/compiler/xla/tests/convolution_variants_test.cc
index 9b36e3722b..9c1145def8 100644
--- a/tensorflow/compiler/xla/tests/convolution_variants_test.cc
+++ b/tensorflow/compiler/xla/tests/convolution_variants_test.cc
@@ -320,9 +320,10 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter3x3in2x2Padded) {
   Array4D<float> input_array(1, 1, 2, 2, {1, 2, 3, 4});
   auto input = builder.ConstantR4FromArray4D<float>(input_array);
 
-  const Array4D<float> filter_array(1, 1, 3, 3, {10000, 0, 1000,  // row 0
-                                                 0, 100, 0,       // row 1
-                                                 10, 0, 1});      // row 2
+  const Array4D<float> filter_array(1, 1, 3, 3,
+                                    {10000, 0, 1000,  // row 0
+                                     0, 100, 0,       // row 1
+                                     10, 0, 1});      // row 2
   auto filter = builder.ConstantR4FromArray4D<float>(filter_array);
 
   builder.Conv(input, filter, {1, 1}, Padding::kSame);
@@ -472,7 +473,9 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x2Input3x1x2x2) {
   builder.Conv(input, filter, {1, 1}, Padding::kValid);
 
   std::vector<float> expected_data = {
-      23, 33, 43,
+      23,
+      33,
+      43,
   };
   Array4D<float> expected(bs, 1, 1, 1, expected_data);
   ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_);
@@ -669,10 +672,11 @@ XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation) {
   std::iota(input_data.begin(), input_data.end(), 1.0);
   Array4D<float> input_array(1, 1, 3, 4, input_data);
 
-  Array4D<float> filter_array(1, 1, 4, 3, {100, 10, 1,  //
-                                           200, 20, 2,  //
-                                           300, 30, 3,  //
-                                           400, 40, 4});
+  Array4D<float> filter_array(1, 1, 4, 3,
+                              {100, 10, 1,  //
+                               200, 20, 2,  //
+                               300, 30, 3,  //
+                               400, 40, 4});
   auto input = builder.ConstantR4FromArray4D<float>(input_array);
   auto filter = builder.ConstantR4FromArray4D<float>(filter_array);
   builder.ConvGeneralDilated(
@@ -681,9 +685,10 @@ XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation) {
       /*rhs_dilation=*/{},
       ComputationBuilder::CreateDefaultConvDimensionNumbers());
 
-  Array4D<float> expected(1, 1, 3, 5, {204, 40, 406, 60, 608,       //
-                                       1518, 180, 1821, 210, 2124,  //
-                                       4146, 460, 4651, 510, 5156});
+  Array4D<float> expected(1, 1, 3, 5,
+                          {204, 40, 406, 60, 608,       //
+                           1518, 180, 1821, 210, 2124,  //
+                           4146, 460, 4651, 510, 5156});
   ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_);
 }
 
@@ -926,7 +931,8 @@ XLA_TEST_F(ConvolutionVariantsTest, RandomData_Input16x16x1x1_Filter16x16x1x1) {
   ComputeAndCompareR4<float>(&builder, *expected, {}, error_spec_);
 }
 
-XLA_TEST_F(ConvolutionVariantsTest, RandomData_Input16x16x16x16_Filter16x16x16x16) {
+XLA_TEST_F(ConvolutionVariantsTest,
+           RandomData_Input16x16x16x16_Filter16x16x16x16) {
   constexpr int bs = 16;
   constexpr int iz = 16;
   constexpr int oz = 16;
@@ -976,8 +982,10 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x2x1x1Input1x2x3x1GeneralPadding) {
   // NHWC input format.
   dnums.set_input_batch_dimension(0);
   dnums.set_output_batch_dimension(0);
-  dnums.add_spatial_dimensions(1);
-  dnums.add_spatial_dimensions(2);
+  dnums.add_input_spatial_dimensions(1);
+  dnums.add_output_spatial_dimensions(1);
+  dnums.add_input_spatial_dimensions(2);
+  dnums.add_output_spatial_dimensions(2);
   dnums.set_input_feature_dimension(3);
   dnums.set_output_feature_dimension(3);
 
@@ -1018,8 +1026,10 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input1x2x3x1GeneralPadding) {
   // NHWC input format.
   dnums.set_input_batch_dimension(0);
   dnums.set_output_batch_dimension(0);
-  dnums.add_spatial_dimensions(1);
-  dnums.add_spatial_dimensions(2);
+  dnums.add_input_spatial_dimensions(1);
+  dnums.add_output_spatial_dimensions(1);
+  dnums.add_input_spatial_dimensions(2);
+  dnums.add_output_spatial_dimensions(2);
   dnums.set_input_feature_dimension(3);
   dnums.set_output_feature_dimension(3);
 
@@ -1060,8 +1070,10 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input1x2x3x1NoPadding) {
   // NHWC input format.
   dnums.set_input_batch_dimension(0);
   dnums.set_output_batch_dimension(0);
-  dnums.add_spatial_dimensions(1);
-  dnums.add_spatial_dimensions(2);
+  dnums.add_input_spatial_dimensions(1);
+  dnums.add_output_spatial_dimensions(1);
+  dnums.add_input_spatial_dimensions(2);
+  dnums.add_output_spatial_dimensions(2);
   dnums.set_input_feature_dimension(3);
   dnums.set_output_feature_dimension(3);
 
@@ -1099,8 +1111,10 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x3Input1x2x3x2NoPadding) {
   // NHWC input format.
   dnums.set_input_batch_dimension(0);
   dnums.set_output_batch_dimension(0);
-  dnums.add_spatial_dimensions(1);
-  dnums.add_spatial_dimensions(2);
+  dnums.add_input_spatial_dimensions(1);
+  dnums.add_output_spatial_dimensions(1);
+  dnums.add_input_spatial_dimensions(2);
+  dnums.add_output_spatial_dimensions(2);
   dnums.set_input_feature_dimension(3);
   dnums.set_output_feature_dimension(3);
 
@@ -1131,7 +1145,8 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x3Input1x2x3x2NoPadding) {
 //   Conv([1,2,3], Reverse([5,6]), padding_low=1)
 // into
 //   BackwardInputConv([1,2,3], [5,6], padding_low=0, padding_high=1)
-XLA_TEST_F(ConvolutionVariantsTest, BackwardInputLowPaddingLessThanHighPadding) {
+XLA_TEST_F(ConvolutionVariantsTest,
+           BackwardInputLowPaddingLessThanHighPadding) {
   ComputationBuilder builder(client_, TestName());
 
   auto gradients = builder.ConstantR4FromArray4D<float>(
@@ -1149,7 +1164,8 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardInputLowPaddingLessThanHighPadding)
 //   Conv([1], Reverse([1,10,100]), padding_high=3, base_dilation=3)
 // into
 //   BackwardInputConv([1], [1,10,100], stride=3, padding=(2,1))
-XLA_TEST_F(ConvolutionVariantsTest, BackwardInputLowPaddingGreaterThanHighPadding) {
+XLA_TEST_F(ConvolutionVariantsTest,
+           BackwardInputLowPaddingGreaterThanHighPadding) {
   ComputationBuilder builder(client_, TestName());
 
   auto gradients = builder.ConstantR4FromArray4D<float>(
@@ -1206,7 +1222,8 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardInputWithNegativePaddingHigh) {
   ComputeAndCompareR4<float>(&builder, {{{{12, 23, 30, 0}}}}, {}, error_spec_);
 }
 
-XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterLowPaddingLessThanHighPadding) {
+XLA_TEST_F(ConvolutionVariantsTest,
+           BackwardFilterLowPaddingLessThanHighPadding) {
   ComputationBuilder builder(client_, TestName());
 
   // activations:      1,2,3,4  ---pad--> 0,1,2,3,4,0,0
@@ -1230,7 +1247,7 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterLowPaddingLessThanHighPadding)
 }
 
 XLA_TEST_F(ConvolutionVariantsTest,
-       BackwardFilterLowPaddingGreaterThanHighPadding) {
+           BackwardFilterLowPaddingGreaterThanHighPadding) {
   ComputationBuilder builder(client_, TestName());
 
   // activations:      1,2,3,4  ---pad--> 0,0,1,2,3,4
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index a10497665a..47979ec6f3 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -1685,7 +1685,7 @@ bool HloParser::ParseConvolutionDimensionNumbers(
           StrCat("expects unique lhs dimension numbers, but sees ", lhs));
     }
     for (int i = 0; i < rank - 2; i++) {
-      dnums->add_spatial_dimensions(-1);
+      dnums->add_input_spatial_dimensions(-1);
     }
     for (int i = 0; i < rank; i++) {
       char c = lhs[i];
@@ -1694,7 +1694,7 @@ bool HloParser::ParseConvolutionDimensionNumbers(
       } else if (c == 'f') {
         dnums->set_input_feature_dimension(i);
       } else if (c < '0' + rank && c >= '0') {
-        dnums->set_spatial_dimensions(c - '0', i);
+        dnums->set_input_spatial_dimensions(c - '0', i);
       } else {
         return TokenError(
             Printf("expects [0-%lldbf] in lhs dimension numbers", rank - 1));
@@ -1732,6 +1732,9 @@ bool HloParser::ParseConvolutionDimensionNumbers(
       return TokenError(
           StrCat("expects unique output dimension numbers, but sees ", out));
     }
+    for (int i = 0; i < rank - 2; i++) {
+      dnums->add_output_spatial_dimensions(-1);
+    }
     for (int i = 0; i < rank; i++) {
       char c = out[i];
       if (c == 'b') {
@@ -1739,11 +1742,7 @@ bool HloParser::ParseConvolutionDimensionNumbers(
       } else if (c == 'f') {
         dnums->set_output_feature_dimension(i);
       } else if (c < '0' + rank && c >= '0') {
-        if (dnums->spatial_dimensions(c - '0') != i) {
-          return TokenError(
-              "output spatial dimensions should be the same as input spatial "
-              "dimensions");
-        }
+        dnums->set_output_spatial_dimensions(c - '0', i);
       } else {
         return TokenError(
             Printf("expects [0-%lldbf] in output dimension numbers", rank - 1));
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index e56f120def..90cdb87a1e 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -873,12 +873,6 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2
                       .status()
                       .error_message(),
                   "must have the same rank");
-
-  ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=0bf_io0->b0f", suffix))
-                      .status()
-                      .error_message(),
-                  "output spatial dimensions should be the same as input "
-                  "spatial dimensions");
 }
 
 TEST_F(HloParserTest, UnexpectedAttribute) {
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index d3c5a88807..b560354050 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -417,15 +417,9 @@ message ConvolutionDimensionNumbers {
   // The number of the dimension that represents features in the input.
   int64 input_feature_dimension = 8;
 
-  // The number of the dimension that represents batch in the output.
-  int64 output_batch_dimension = 9;
-
-  // The number of the dimension that represents features in the output.
-  int64 output_feature_dimension = 10;
-
   // The dimension numbers for the spatial dimensions that the window
-  // moves through in the input (lhs) and output.
-  repeated int64 spatial_dimensions = 5;
+  // moves through in the input.
+  repeated int64 input_spatial_dimensions = 11;
 
   // The number of the dimension that represents input features in the
   // convolutional kernel (rhs).
@@ -439,6 +433,18 @@ message ConvolutionDimensionNumbers {
   // moves through in the kernel (rhs). window.strides(0) is the
   // stride in the kernel_spatial_dimensions(0) dimension.
   repeated int64 kernel_spatial_dimensions = 6;
+
+  // The number of the dimension that represents batch in the output.
+  int64 output_batch_dimension = 9;
+
+  // The number of the dimension that represents features in the output.
+  int64 output_feature_dimension = 10;
+
+  // The dimension numbers for the spatial dimensions that the window
+  // moves through in the output.
+  repeated int64 output_spatial_dimensions = 12;
+
+  // Next = 13
 };
 
 message ConvolveRequest {
-- 
GitLab


From cf0717bfd701d3a11143e00545ced4019b067a51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20=C5=A0uppa?= <mrshu@users.noreply.github.com>
Date: Tue, 28 Nov 2017 08:39:52 +0100
Subject: [PATCH 0336/1225] softmax_cross_entropy: Improve docstring

Improve docstring of `softmax_cross_entropy`.
---
 tensorflow/python/ops/losses/losses_impl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index 55a18d28ca..b74971f654 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -652,7 +652,7 @@ def softmax_cross_entropy(
 
   Args:
     onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
-    logits: [batch_size, num_classes] logits outputs of the network .
+    logits: `[batch_size, num_classes]` logits outputs of the network .
     weights: Optional `Tensor` whose rank is either 0, or rank 1 and is
       broadcastable to the loss which is a `Tensor` of shape `[batch_size]`.
     label_smoothing: If greater than 0 then smooth the labels.
-- 
GitLab


From 64e1459ef218263046fe7afd71b02548fc01383a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 03:46:56 -0800
Subject: [PATCH 0337/1225] Add DeviceFactory to list of exported headers.

PiperOrigin-RevId: 177140363
---
 tensorflow/core/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 30ff4ef358..4ca6fb1631 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -746,6 +746,7 @@ tf_cuda_library(
     name = "core_cpu",
     hdrs = [
         "common_runtime/device.h",
+        "common_runtime/device_factory.h",
         "common_runtime/optimization_registry.h",
         "common_runtime/shape_refiner.h",
         "graph/algorithm.h",
-- 
GitLab


From b262375fa67d82d84e8cf9304c4c4d63411a0bc3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 04:40:54 -0800
Subject: [PATCH 0338/1225] Fixed two bugs when importing MetaGraphDefs that
 contain ResourceVariables.

1) In the ResourceVariable implementation, pass import_scope when creating the SaveSliceInfo. This is present in the implementation of plain variables, and was likely a copy-and-paste omission.
2) When importing a MetaGraphDef, restoring the GLOBAL_VARIABLES and TRAINABLE_VARIABLES collections will add ops to the graph for ResourceVariables. Made graph construction deterministic by fixing the order in which collections are restored.

PiperOrigin-RevId: 177144138
---
 tensorflow/python/framework/meta_graph.py     |  2 +-
 .../python/framework/meta_graph_test.py       | 48 ++++++++++++++++---
 .../python/ops/resource_variable_ops.py       |  3 +-
 3 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py
index a8bc2d2e3f..44ddc013b2 100644
--- a/tensorflow/python/framework/meta_graph.py
+++ b/tensorflow/python/framework/meta_graph.py
@@ -663,7 +663,7 @@ def import_scoped_meta_graph(meta_graph_or_file,
         [part for part in [graph.get_name_scope(), import_scope] if part])
 
     # Restores all the other collections.
-    for key, col_def in meta_graph_def.collection_def.items():
+    for key, col_def in sorted(meta_graph_def.collection_def.items()):
       # Don't add unbound_inputs to the new graph.
       if key == unbound_inputs_col_name:
         continue
diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py
index 06cee46bf6..4c22c913b8 100644
--- a/tensorflow/python/framework/meta_graph_test.py
+++ b/tensorflow/python/framework/meta_graph_test.py
@@ -662,22 +662,36 @@ class MetaGraphWithVariableScopeTest(test.TestCase):
 class ExportImportAcrossScopesTest(test.TestCase):
 
   def testPartionedVariables(self):
-    def make_graph_with_partitioned_variables():
+
+    def make_graph_with_partitioned_variables(use_resource):
       variable_scope.get_variable(
           name="weights",
           partitioner=partitioned_variables.fixed_size_partitioner(3, axis=0),
-          initializer=random_ops.truncated_normal([100, 10]))
-    self._testExportImportAcrossScopes(make_graph_with_partitioned_variables)
+          initializer=random_ops.truncated_normal([100, 10]),
+          use_resource=use_resource)
+      # The next variable illustrates the necessity of restoring collections
+      # in a deterministic fashion when using ResourceVariables.
+      variable_scope.get_variable(
+          name="another",
+          shape=[],
+          collections=["a", "b", "z", "f", "e", "d", "g"],
+          use_resource=use_resource)
+
+    self._testExportImportAcrossScopes(
+        make_graph_with_partitioned_variables, use_resource=False)
+    self._testExportImportAcrossScopes(
+        make_graph_with_partitioned_variables, use_resource=True)
 
-  def _testExportImportAcrossScopes(self, graph_fn):
+  def _testExportImportAcrossScopes(self, graph_fn, use_resource):
     """Tests export and importing a graph across scopes.
 
     Args:
       graph_fn: A closure that creates a graph on the current scope.
+      use_resource: A bool indicating whether or not to use ResourceVariables.
     """
     with ops.Graph().as_default() as original_graph:
       with variable_scope.variable_scope("dropA/dropB/keepA"):
-        graph_fn()
+        graph_fn(use_resource=use_resource)
     exported_meta_graph_def = meta_graph.export_scoped_meta_graph(
         graph=original_graph,
         export_scope="dropA/dropB")[0]
@@ -689,10 +703,32 @@ class ExportImportAcrossScopesTest(test.TestCase):
 
     with ops.Graph().as_default() as expected_graph:
       with variable_scope.variable_scope("importA/keepA"):
-        graph_fn()
+        graph_fn(use_resource=use_resource)
+
+      if use_resource:
+        # Bringing in a collection that contains ResourceVariables adds ops
+        # to the graph, so mimic the same behavior.
+        for collection_key in sorted([
+            ops.GraphKeys.GLOBAL_VARIABLES,
+            ops.GraphKeys.TRAINABLE_VARIABLES,
+        ]):
+          for var in expected_graph.get_collection(collection_key):
+            var._read_variable_op()
 
     result = meta_graph.export_scoped_meta_graph(graph=imported_graph)[0]
     expected = meta_graph.export_scoped_meta_graph(graph=expected_graph)[0]
+
+    if use_resource:
+      # Clear all shared_name attributes before comparing, since they are
+      # supposed to be orthogonal to scopes.
+      for meta_graph_def in [result, expected]:
+        for node in meta_graph_def.graph_def.node:
+          shared_name_attr = "shared_name"
+          shared_name_value = node.attr.get(shared_name_attr, None)
+          if shared_name_value and shared_name_value.HasField("s"):
+            if shared_name_value.s:
+              node.attr[shared_name_attr].s = b""
+
     self.assertProtoEquals(expected, result)
 
 
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index a746735f58..343e38f960 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -513,7 +513,8 @@ class ResourceVariable(variables.Variable):
       self._cached_value = None
     if variable_def.HasField("save_slice_info_def"):
       self._save_slice_info = variables.Variable.SaveSliceInfo(
-          save_slice_info_def=variable_def.save_slice_info_def)
+          save_slice_info_def=variable_def.save_slice_info_def,
+          import_scope=import_scope)
     else:
       self._save_slice_info = None
     self._caching_device = None
-- 
GitLab


From 6ec7e7680a8a1c5eaf1054a9eb81c8f608aadb90 Mon Sep 17 00:00:00 2001
From: Daniel Ylitalo <daniel.ylitalo@mytaste.com>
Date: Tue, 28 Nov 2017 15:30:47 +0100
Subject: [PATCH 0339/1225] Add FreeBSD compatibility

---
 tensorflow/contrib/lite/kernels/internal/BUILD |  9 +++++++++
 tensorflow/core/platform/env.cc                | 14 ++++++++++++++
 third_party/flatbuffers/flatbuffers.BUILD      | 14 ++++++++++++--
 3 files changed, 35 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 tensorflow/contrib/lite/kernels/internal/BUILD
 mode change 100644 => 100755 tensorflow/core/platform/env.cc
 mode change 100644 => 100755 third_party/flatbuffers/flatbuffers.BUILD

diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
old mode 100644
new mode 100755
index 288534099b..a3ecb2ebf6
--- a/tensorflow/contrib/lite/kernels/internal/BUILD
+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
@@ -124,6 +124,13 @@ config_setting(
     },
 )
 
+config_setting(
+    name = "freebsd",
+    values = {
+        "cpu": "freebsd",
+    },
+)
+
 cc_library(
     name = "optimized_base",
     srcs = [],
@@ -147,6 +154,7 @@ cc_library(
         ":x86": tflite_deps_intel,
         ":x86_64": tflite_deps_intel,
         ":darwin": tflite_deps_intel,
+        ":freebsd": tflite_deps_intel,
         "//conditions:default": [],
     }),
 )
@@ -224,6 +232,7 @@ cc_library(
         ":x86": tflite_deps_intel,
         ":x86_64": tflite_deps_intel,
         ":darwin": tflite_deps_intel,
+        ":freebsd": tflite_deps_intel,
         "//conditions:default": [],
     }),
 )
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
old mode 100644
new mode 100755
index 12ef55ec26..5118c4cb59
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -20,6 +20,10 @@ limitations under the License.
 #if defined(__APPLE__)
 #include <mach-o/dyld.h>
 #endif
+#if defined(__FreeBSD__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
 #if defined(PLATFORM_WINDOWS)
 #include <windows.h>
 #include "tensorflow/core/platform/windows/windows_file_system.h"
@@ -266,6 +270,13 @@ string Env::GetExecutablePath() {
   char unresolved_path[buffer_size];
   _NSGetExecutablePath(unresolved_path, &buffer_size);
   CHECK(realpath(unresolved_path, exe_path));
+#elif defined(__FreeBSD__)
+  int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+  size_t exe_path_size = PATH_MAX;
+
+  if (sysctl(mib, 4, exe_path, &exe_path_size, NULL, 0) != 0) {
+    // Not sure what to do if it fails?
+  }
 #elif defined(PLATFORM_WINDOWS)
   HMODULE hModule = GetModuleHandleW(NULL);
   WCHAR wc_file_path[MAX_PATH] = {0};
@@ -293,6 +304,9 @@ bool Env::LocalTempFilename(string* filename) {
     pthread_threadid_np(nullptr, &tid64);
     int32 tid = static_cast<int32>(tid64);
     int32 pid = static_cast<int32>(getpid());
+#elif defined(__FreeBSD__)
+    int32 tid = static_cast<int32>((long) pthread_self());
+    int32 pid = static_cast<int32>(getpid());
 #elif defined(PLATFORM_WINDOWS)
     int32 tid = static_cast<int32>(GetCurrentThreadId());
     int32 pid = static_cast<int32>(GetCurrentProcessId());
diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/flatbuffers.BUILD
old mode 100644
new mode 100755
index 0a76adcf91..c06c269bb2
--- a/third_party/flatbuffers/flatbuffers.BUILD
+++ b/third_party/flatbuffers/flatbuffers.BUILD
@@ -4,6 +4,12 @@ package(
 
 licenses(["notice"])  # Apache 2.0
 
+config_setting(
+    name = "freebsd",
+    values = {"cpu": "freebsd"},
+    visibility = ["//visibility:public"],
+)
+
 FLATBUFFERS_COPTS = [
     "-fexceptions",
 ] + select({
@@ -107,10 +113,14 @@ cc_binary(
         "grpc/",
         "include/",
     ],
-    linkopts = [
+    linkopts = select({
+    ":freebsd": [
         "-lm",
-        "-ldl",
     ],
+    "//conditions:default": [
+        "-lm",
+        "-ldl",
+    ]}),
     deps = [
         ":flatc_library",
     ],
-- 
GitLab


From 1a53e4a82f1d077859214dab4d4fb84479ae70e6 Mon Sep 17 00:00:00 2001
From: Daniel Ylitalo <daniel.ylitalo@mytaste.com>
Date: Tue, 28 Nov 2017 15:33:34 +0100
Subject: [PATCH 0340/1225] change back file permissions

---
 tensorflow/contrib/lite/kernels/internal/BUILD | 0
 tensorflow/core/platform/env.cc                | 0
 third_party/flatbuffers/flatbuffers.BUILD      | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 tensorflow/contrib/lite/kernels/internal/BUILD
 mode change 100755 => 100644 tensorflow/core/platform/env.cc
 mode change 100755 => 100644 third_party/flatbuffers/flatbuffers.BUILD

diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
old mode 100755
new mode 100644
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
old mode 100755
new mode 100644
diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/flatbuffers.BUILD
old mode 100755
new mode 100644
-- 
GitLab


From 92d65fe6d71b5b80c130f9d9fb4474c4587f2855 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 28 Nov 2017 09:42:43 -0800
Subject: [PATCH 0341/1225] Add `ConfigProto.isolate_session_state` option for
 the distributed runtime.

Setting this option to true when creating a session ensures that no
stateful resources (variables, queues, iterators, etc.) will be
visible to any other session running on the same server, and those
resources will be deleted when the session is closed.

The default behavior, namely that all `tf.Variable` objects are shared by
default and most other resources are shared when their `shared_name` attr is
non-empty, is preserved.

This change augments the semantics of the WorkerService.CreateWorkerSession
RPC. Now, if the server_def in the request is empty, it implies that
the worker should use its default ClusterSpec. Note that clusters created
using ClusterSpec propagation always have isolated session state, and are
unaffected by this change.

PiperOrigin-RevId: 177173545
---
 tensorflow/core/common_runtime/device.h       |  2 +-
 .../core/common_runtime/renamed_device.cc     | 11 ++-
 .../core/common_runtime/renamed_device.h      | 16 +++-
 tensorflow/core/distributed_runtime/BUILD     | 12 +++
 .../distributed_runtime/master_session.cc     | 32 ++++---
 .../core/distributed_runtime/session_mgr.cc   | 23 +++--
 .../core/distributed_runtime/session_mgr.h    |  4 +-
 .../distributed_runtime/session_mgr_test.cc   | 66 ++++++++++++--
 tensorflow/core/distributed_runtime/worker.cc |  3 +-
 .../worker_cache_wrapper.h                    | 90 +++++++++++++++++++
 tensorflow/core/protobuf/config.proto         |  6 +-
 tensorflow/core/protobuf/worker.proto         |  4 +
 .../client/session_clusterspec_prop_test.py   | 43 +++++++++
 tensorflow/python/training/server_lib_test.py | 89 ++++++++++++++++++
 .../api/golden/tensorflow.-config-proto.pbtxt |  4 +
 15 files changed, 374 insertions(+), 31 deletions(-)
 create mode 100644 tensorflow/core/distributed_runtime/worker_cache_wrapper.h

diff --git a/tensorflow/core/common_runtime/device.h b/tensorflow/core/common_runtime/device.h
index 3912cd177b..d5a452a796 100644
--- a/tensorflow/core/common_runtime/device.h
+++ b/tensorflow/core/common_runtime/device.h
@@ -131,7 +131,7 @@ class Device : public DeviceBase {
   OpSegment* op_segment() { return &op_seg_; }
 
   // Returns the resource manager associated w/ this device.
-  ResourceMgr* resource_manager() { return rmgr_; }
+  virtual ResourceMgr* resource_manager() { return rmgr_; }
 
   // Summarizes the status of this Device, for debugging.
   string DebugString() const { return ProtoDebugString(device_attributes_); }
diff --git a/tensorflow/core/common_runtime/renamed_device.cc b/tensorflow/core/common_runtime/renamed_device.cc
index fa9713735e..56766a8df4 100644
--- a/tensorflow/core/common_runtime/renamed_device.cc
+++ b/tensorflow/core/common_runtime/renamed_device.cc
@@ -21,7 +21,8 @@ namespace tensorflow {
 /* static */
 Device* RenamedDevice::NewRenamedDevice(const string& new_base,
                                         Device* underlying,
-                                        bool owns_underlying) {
+                                        bool owns_underlying,
+                                        bool isolate_session_state) {
   DeviceNameUtils::ParsedName parsed_name;
   CHECK(DeviceNameUtils::ParseFullName(new_base, &parsed_name));
   DeviceNameUtils::ParsedName underlying_parsed_name =
@@ -35,15 +36,17 @@ Device* RenamedDevice::NewRenamedDevice(const string& new_base,
                                           parsed_name.id);
   DeviceAttributes attributes(underlying->attributes());
   attributes.set_name(name);
-  return new RenamedDevice(underlying, attributes, owns_underlying);
+  return new RenamedDevice(underlying, attributes, owns_underlying,
+                           isolate_session_state);
 }
 
 RenamedDevice::RenamedDevice(Device* underlying,
                              const DeviceAttributes& attributes,
-                             bool owns_underlying)
+                             bool owns_underlying, bool isolate_session_state)
     : Device(underlying->env(), attributes),
       underlying_(underlying),
-      owns_underlying_(owns_underlying) {}
+      owns_underlying_(owns_underlying),
+      isolate_session_state_(isolate_session_state) {}
 
 RenamedDevice::~RenamedDevice() {
   if (owns_underlying_) {
diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h
index 3103ca0751..c5c204d4fa 100644
--- a/tensorflow/core/common_runtime/renamed_device.h
+++ b/tensorflow/core/common_runtime/renamed_device.h
@@ -29,7 +29,9 @@ namespace tensorflow {
 class RenamedDevice : public Device {
  public:
   static Device* NewRenamedDevice(const string& new_base, Device* underlying,
-                                  bool owns_underlying);
+                                  bool owns_underlying,
+                                  bool isolate_session_state);
+
   ~RenamedDevice() override;
 
   // Below are virtual methods defined on DeviceBase
@@ -113,11 +115,21 @@ class RenamedDevice : public Device {
     return underlying_->FillContextMap(graph, device_context_map);
   }
 
+  // Returns the resource manager associated w/ this device.
+  ResourceMgr* resource_manager() override {
+    if (isolate_session_state_) {
+      return Device::resource_manager();
+    } else {
+      return underlying_->resource_manager();
+    }
+  }
+
  private:
   RenamedDevice(Device* underlying, const DeviceAttributes& attributes,
-                bool owns_underlying);
+                bool owns_underlying, bool isolate_session_state);
   Device* const underlying_;
   const bool owns_underlying_;
+  const bool isolate_session_state_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD
index 93adc7ef4f..29164bbffe 100644
--- a/tensorflow/core/distributed_runtime/BUILD
+++ b/tensorflow/core/distributed_runtime/BUILD
@@ -140,6 +140,7 @@ cc_library(
     hdrs = ["session_mgr.h"],
     deps = [
         ":graph_mgr",
+        ":worker_cache_wrapper",
         ":worker_session",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:lib",
@@ -263,6 +264,17 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "worker_cache_wrapper",
+    hdrs = ["worker_cache_wrapper.h"],
+    deps = [
+        ":worker_cache",
+        ":worker_interface",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
 cc_library(
     name = "remote_device",
     srcs = ["remote_device.cc"],
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 3379302b9b..03b65d8cba 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -1049,7 +1049,10 @@ Status MasterSession::Create(GraphDef* graph_def,
     TF_RETURN_IF_ERROR(GraphExecutionState::MakeForBaseGraph(
         graph_def, execution_options, &execution_state_));
   }
-  if (options.cluster_def != nullptr) {
+  // TODO(b/36574172): Remove these conditions when ClusterSpec
+  // propagation is supported in all servers.
+  if (options.cluster_def != nullptr ||
+      session_opts_.config.isolate_session_state()) {
     should_delete_worker_sessions_ = true;
     return CreateWorkerSessions(options);
   }
@@ -1058,10 +1061,9 @@ Status MasterSession::Create(GraphDef* graph_def,
 
 Status MasterSession::CreateWorkerSessions(
     const WorkerCacheFactoryOptions& options) {
-  CHECK(worker_cache_) << "CreateWorkerSessions should be called only with "
-                       << "dynamic cluster membership.";
   std::vector<string> worker_names;
-  worker_cache_->ListWorkers(&worker_names);
+  WorkerCacheInterface* worker_cache = get_worker_cache();
+  worker_cache->ListWorkers(&worker_names);
 
   struct WorkerGroup {
     // The worker name. (Not owned.)
@@ -1079,10 +1081,10 @@ Status MasterSession::CreateWorkerSessions(
   std::vector<WorkerGroup> workers(worker_names.size());
 
   // Release the workers.
-  auto cleanup = gtl::MakeCleanup([this, &workers] {
+  auto cleanup = gtl::MakeCleanup([this, &workers, worker_cache] {
     for (auto&& worker_group : workers) {
       if (worker_group.worker != nullptr) {
-        worker_cache_->ReleaseWorker(*worker_group.name, worker_group.worker);
+        worker_cache->ReleaseWorker(*worker_group.name, worker_group.worker);
       }
     }
   });
@@ -1091,11 +1093,19 @@ Status MasterSession::CreateWorkerSessions(
   // Create all the workers & kick off the computations.
   for (size_t i = 0; i < worker_names.size(); ++i) {
     workers[i].name = &worker_names[i];
-    workers[i].worker = worker_cache_->CreateWorker(worker_names[i]);
+    workers[i].worker = worker_cache->CreateWorker(worker_names[i]);
     workers[i].request.set_session_handle(handle_);
-    *workers[i].request.mutable_server_def()->mutable_cluster() =
-        *options.cluster_def;
-    workers[i].request.mutable_server_def()->set_protocol(*options.protocol);
+    if (options.cluster_def) {
+      *workers[i].request.mutable_server_def()->mutable_cluster() =
+          *options.cluster_def;
+      workers[i].request.mutable_server_def()->set_protocol(*options.protocol);
+      // Session state is always isolated when ClusterSpec propagation
+      // is in use.
+      workers[i].request.set_isolate_session_state(true);
+    } else {
+      workers[i].request.set_isolate_session_state(
+          session_opts_.config.isolate_session_state());
+    }
 
     DeviceNameUtils::ParsedName name;
     if (!DeviceNameUtils::ParseFullName(worker_names[i], &name)) {
@@ -1162,7 +1172,7 @@ Status MasterSession::DeleteWorkerSessions() {
   // Create all the workers & kick off the computations.
   for (size_t i = 0; i < worker_names.size(); ++i) {
     workers[i].name = &worker_names[i];
-    workers[i].worker = worker_cache_->CreateWorker(worker_names[i]);
+    workers[i].worker = worker_cache->CreateWorker(worker_names[i]);
     workers[i].request.set_session_handle(handle_);
   }
 
diff --git a/tensorflow/core/distributed_runtime/session_mgr.cc b/tensorflow/core/distributed_runtime/session_mgr.cc
index b97749dc41..fabcbd00f5 100644
--- a/tensorflow/core/distributed_runtime/session_mgr.cc
+++ b/tensorflow/core/distributed_runtime/session_mgr.cc
@@ -20,7 +20,10 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
 #include "tensorflow/core/distributed_runtime/graph_mgr.h"
+#include "tensorflow/core/distributed_runtime/worker_cache_wrapper.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/protobuf/cluster.pb.h"
+#include "tensorflow/core/protobuf/tensorflow_server.pb.h"
 
 namespace tensorflow {
 
@@ -29,7 +32,10 @@ SessionMgr::SessionMgr(
     std::unique_ptr<WorkerCacheInterface> default_worker_cache,
     WorkerCacheFactory worker_cache_factory)
     : worker_env_(worker_env),
-      legacy_session_("", default_worker_name, std::move(default_worker_cache),
+      default_worker_cache_(std::move(default_worker_cache)),
+      legacy_session_("", default_worker_name,
+                      std::unique_ptr<WorkerCacheInterface>(
+                          new WorkerCacheWrapper(default_worker_cache_.get())),
                       std::unique_ptr<DeviceMgr>(worker_env->device_mgr),
                       std::unique_ptr<GraphMgr>(
                           new GraphMgr(worker_env, worker_env->device_mgr))),
@@ -41,7 +47,8 @@ string SessionMgr::WorkerNameFromServerDef(const ServerDef& server_def) {
 }
 
 Status SessionMgr::CreateSession(const string& session,
-                                 const ServerDef& server_def) {
+                                 const ServerDef& server_def,
+                                 bool isolate_session_state) {
   mutex_lock l(mu_);
   if (session.empty()) {
     return errors::InvalidArgument("Session must be non-empty.");
@@ -50,12 +57,18 @@ Status SessionMgr::CreateSession(const string& session,
   const string worker_name = WorkerNameFromServerDef(server_def);
 
   WorkerCacheInterface* worker_cache = nullptr;
-  TF_RETURN_IF_ERROR(worker_cache_factory_(server_def, &worker_cache));
+  if (server_def.cluster().job().empty()) {
+    worker_cache = new WorkerCacheWrapper(default_worker_cache_.get());
+  } else {
+    TF_RETURN_IF_ERROR(worker_cache_factory_(server_def, &worker_cache));
+  }
 
+  CHECK(!worker_env_->local_devices.empty())
+      << "The WorkerEnv must have at least one device in `local_devices`.";
   std::vector<Device*> renamed_devices;
   for (Device* d : worker_env_->local_devices) {
-    renamed_devices.push_back(
-        RenamedDevice::NewRenamedDevice(worker_name, d, false));
+    renamed_devices.push_back(RenamedDevice::NewRenamedDevice(
+        worker_name, d, false, isolate_session_state));
   }
   std::unique_ptr<DeviceMgr> device_mgr(new DeviceMgr(renamed_devices));
 
diff --git a/tensorflow/core/distributed_runtime/session_mgr.h b/tensorflow/core/distributed_runtime/session_mgr.h
index c44bca7b7a..d85b6c3059 100644
--- a/tensorflow/core/distributed_runtime/session_mgr.h
+++ b/tensorflow/core/distributed_runtime/session_mgr.h
@@ -45,7 +45,8 @@ class SessionMgr {
   ~SessionMgr() {}
 
   // Allocates state for a new session.
-  Status CreateSession(const string& session, const ServerDef& server_def);
+  Status CreateSession(const string& session, const ServerDef& server_def,
+                       bool isolate_session_state);
 
   // Locates the worker session for a given session handle
   WorkerSession* WorkerSessionForSession(const string& session);
@@ -71,6 +72,7 @@ class SessionMgr {
   // legacy_session_ is deleted. Further, we must ensure that WorkerSession's
   // device_mgr is deleted after WorkerSession's graph_mgr.
 
+  std::unique_ptr<WorkerCacheInterface> default_worker_cache_;
   WorkerSession legacy_session_;
 
   const WorkerCacheFactory worker_cache_factory_;
diff --git a/tensorflow/core/distributed_runtime/session_mgr_test.cc b/tensorflow/core/distributed_runtime/session_mgr_test.cc
index 7132f123a5..ffe4809f2b 100644
--- a/tensorflow/core/distributed_runtime/session_mgr_test.cc
+++ b/tensorflow/core/distributed_runtime/session_mgr_test.cc
@@ -22,14 +22,36 @@ limitations under the License.
 
 namespace tensorflow {
 
+class FakeDevice : public Device {
+ private:
+  explicit FakeDevice(const DeviceAttributes& device_attributes)
+      : Device(nullptr, device_attributes) {}
+
+ public:
+  Status Sync() override { return errors::Unimplemented("FakeDevice::Sync()"); }
+
+  Allocator* GetAllocator(AllocatorAttributes attr) override { return nullptr; }
+
+  static std::unique_ptr<Device> MakeCPU(const string& name) {
+    DeviceAttributes device_attributes;
+    device_attributes.set_name(name);
+    device_attributes.set_device_type(DeviceType("FakeCPU").type());
+    return std::unique_ptr<Device>(new FakeDevice(device_attributes));
+  }
+};
+
 class SessionMgrTest : public ::testing::Test {
  protected:
   SessionMgrTest()
-      : mgr_(&env_, "/job:mnist/replica:0/task:0",
-             std::unique_ptr<WorkerCacheInterface>(),
-             factory_),
-        legacy_session_(mgr_.WorkerSessionForSession("novel_session_id")) {}
+      : device_(FakeDevice::MakeCPU(
+            "/job:mnist/replica:0/task:0/device:fakecpu:0")),
+        mgr_(&env_, "/job:mnist/replica:0/task:0",
+             std::unique_ptr<WorkerCacheInterface>(), factory_),
+        legacy_session_(mgr_.WorkerSessionForSession("novel_session_id")) {
+    env_.local_devices = {device_.get()};
+  }
 
+  std::unique_ptr<Device> device_;
   WorkerEnv env_;
   SessionMgr::WorkerCacheFactory factory_ =
       [](const ServerDef& server_def, WorkerCacheInterface** worker_cache) {
@@ -42,14 +64,48 @@ class SessionMgrTest : public ::testing::Test {
 
 TEST_F(SessionMgrTest, CreateSessionSimple) {
   ServerDef server_def;
+  server_def.set_job_name("worker");
+  server_def.set_task_index(3);
+
   string session_handle = "test_session_handle";
-  TF_EXPECT_OK(mgr_.CreateSession(session_handle, server_def));
+  TF_EXPECT_OK(mgr_.CreateSession(session_handle, server_def, true));
   WorkerSession* session = mgr_.WorkerSessionForSession(session_handle);
   EXPECT_NE(nullptr, session) << "Session for " << session_handle << "was null";
   EXPECT_NE(mgr_.LegacySession(), session);
   TF_EXPECT_OK(mgr_.DeleteSession(session_handle));
 }
 
+TEST_F(SessionMgrTest, CreateSessionIsolateSessionState) {
+  ServerDef server_def;
+  server_def.set_job_name("worker");
+  server_def.set_task_index(3);
+
+  TF_EXPECT_OK(mgr_.CreateSession("handle_1", server_def, false));
+  WorkerSession* session_1 = mgr_.WorkerSessionForSession("handle_1");
+  std::vector<Device*> devices_1 = session_1->device_mgr->ListDevices();
+  EXPECT_EQ(1, devices_1.size());
+
+  TF_EXPECT_OK(mgr_.CreateSession("handle_2", server_def, false));
+  WorkerSession* session_2 = mgr_.WorkerSessionForSession("handle_2");
+  std::vector<Device*> devices_2 = session_2->device_mgr->ListDevices();
+  EXPECT_EQ(1, devices_2.size());
+
+  TF_EXPECT_OK(mgr_.CreateSession("handle_3", server_def, true));
+  WorkerSession* session_3 = mgr_.WorkerSessionForSession("handle_3");
+  std::vector<Device*> devices_3 = session_3->device_mgr->ListDevices();
+  EXPECT_EQ(1, devices_3.size());
+
+  TF_EXPECT_OK(mgr_.CreateSession("handle_4", server_def, true));
+  WorkerSession* session_4 = mgr_.WorkerSessionForSession("handle_4");
+  std::vector<Device*> devices_4 = session_4->device_mgr->ListDevices();
+  EXPECT_EQ(1, devices_4.size());
+
+  EXPECT_EQ(devices_1[0]->resource_manager(), devices_2[0]->resource_manager());
+  EXPECT_NE(devices_1[0]->resource_manager(), devices_3[0]->resource_manager());
+  EXPECT_NE(devices_1[0]->resource_manager(), devices_4[0]->resource_manager());
+  EXPECT_NE(devices_3[0]->resource_manager(), devices_4[0]->resource_manager());
+}
+
 TEST_F(SessionMgrTest, LegacySession) {
   ServerDef server_def;
   string session_handle = "";
diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc
index 8bf87923ed..6cd92f5fe7 100644
--- a/tensorflow/core/distributed_runtime/worker.cc
+++ b/tensorflow/core/distributed_runtime/worker.cc
@@ -44,7 +44,8 @@ void Worker::CreateWorkerSessionAsync(const CreateWorkerSessionRequest* request,
                                       CreateWorkerSessionResponse* response,
                                       StatusCallback done) {
   Status s = env_->session_mgr->CreateSession(request->session_handle(),
-                                              request->server_def());
+                                              request->server_def(),
+                                              request->isolate_session_state());
   done(s);
 }
 
diff --git a/tensorflow/core/distributed_runtime/worker_cache_wrapper.h b/tensorflow/core/distributed_runtime/worker_cache_wrapper.h
new file mode 100644
index 0000000000..43c3b6285b
--- /dev/null
+++ b/tensorflow/core/distributed_runtime/worker_cache_wrapper.h
@@ -0,0 +1,90 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_CACHE_WRAPPER_H_
+#define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_CACHE_WRAPPER_H_
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/distributed_runtime/worker_cache.h"
+
+namespace tensorflow {
+
+class WorkerCacheWrapper : public WorkerCacheInterface {
+ public:
+  WorkerCacheWrapper(WorkerCacheInterface* wrapped) : wrapped_(wrapped) {}
+
+  // Updates *workers with strings naming the remote worker tasks to
+  // which open channels have been established.
+  virtual void ListWorkers(std::vector<string>* workers) const {
+    return wrapped_->ListWorkers(workers);
+  }
+
+  // If "target" names a remote task for which an RPC channel exists
+  // or can be constructed, returns a pointer to a WorkerInterface object
+  // wrapping that channel. The returned value must be destroyed by
+  // calling `this->ReleaseWorker(target, ret)`
+  // TODO(mrry): rename this to GetOrCreateWorker() or something that
+  // makes it more obvious that this method returns a potentially
+  // shared object.
+  virtual WorkerInterface* CreateWorker(const string& target) {
+    return wrapped_->CreateWorker(target);
+  }
+
+  // Release a worker previously returned by this->CreateWorker(target).
+  //
+  // TODO(jeff,sanjay): Consider moving target into WorkerInterface.
+  // TODO(jeff,sanjay): Unify all worker-cache impls and factor out a
+  //                    per-rpc-subsystem WorkerInterface creator.
+  virtual void ReleaseWorker(const string& target, WorkerInterface* worker) {
+    return wrapped_->ReleaseWorker(target, worker);
+  }
+
+  // Set *locality with the DeviceLocality of the specified remote device
+  // within its local environment.  Returns true if *locality
+  // was set, using only locally cached data.  Returns false
+  // if status data for that device was not available.  Never blocks.
+  virtual bool GetDeviceLocalityNonBlocking(const string& device,
+                                            DeviceLocality* locality) {
+    return wrapped_->GetDeviceLocalityNonBlocking(device, locality);
+  }
+
+  // Set *locality with the DeviceLocality of the specified remote device
+  // within its local environment.  Callback gets Status::OK if *locality
+  // was set.
+  virtual void GetDeviceLocalityAsync(const string& device,
+                                      DeviceLocality* locality,
+                                      StatusCallback done) {
+    return wrapped_->GetDeviceLocalityAsync(device, locality, std::move(done));
+  }
+
+  // Start/stop logging activity.
+  virtual void SetLogging(bool active) { wrapped_->SetLogging(active); }
+
+  // Discard any saved log data.
+  virtual void ClearLogs() { wrapped_->ClearLogs(); }
+
+  // Return logs for the identified step in *ss.  Any returned data will no
+  // longer be stored.
+  virtual bool RetrieveLogs(int64 step_id, StepStats* ss) {
+    return wrapped_->RetrieveLogs(step_id, ss);
+  }
+
+ private:
+  WorkerCacheInterface* wrapped_;  // Not owned.
+};
+}  // namespace tensorflow
+#endif  // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_CACHE_WRAPPER_H_
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index a956aab3dc..1916316245 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -303,7 +303,11 @@ message ConfigProto {
   // Optional list of all workers to use in this session.
   ClusterDef cluster_def = 14;
 
-  // Next: 15
+  // If true, any resources such as Variables used in the session will not be
+  // shared with other sessions.
+  bool isolate_session_state = 15;
+
+  // Next: 16
 };
 
 // Options for a single Run() call.
diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto
index e7b3f36fcc..385e2dd163 100644
--- a/tensorflow/core/protobuf/worker.proto
+++ b/tensorflow/core/protobuf/worker.proto
@@ -59,6 +59,10 @@ message CreateWorkerSessionRequest {
 
   // Defines the configuration of a TensorFlow worker.
   ServerDef server_def = 2;
+
+  // If true, any resources such as Variables used in the session will not be
+  // shared with other sessions.
+  bool isolate_session_state = 3;
 }
 
 message CreateWorkerSessionResponse {
diff --git a/tensorflow/python/client/session_clusterspec_prop_test.py b/tensorflow/python/client/session_clusterspec_prop_test.py
index 28a4dd27a7..c85b22eb15 100644
--- a/tensorflow/python/client/session_clusterspec_prop_test.py
+++ b/tensorflow/python/client/session_clusterspec_prop_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.client import session
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
@@ -415,6 +416,48 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
               node_stats.node_name.startswith('Const')
           ]), run_metadata)
 
+  def testClusterSpecPropagationIsolation(self):
+    """Test that two sessions using ClusterSpec propagation are isolated."""
+    server = server_lib.Server.create_local_server()
+    init_value = array_ops.placeholder(dtypes.int32, shape=[])
+    v = variables.Variable(init_value)
+
+    cluster_def = cluster_pb2.ClusterDef()
+    job = cluster_def.job.add()
+    job.name = 'worker'
+    job.tasks[0] = server.target[len('grpc://'):]
+    config = config_pb2.ConfigProto(cluster_def=cluster_def)
+
+    sess1 = session.Session(server.target, config=config)
+    sess2 = session.Session(server.target, config=config)
+
+    # Initially, the variable is uninitialized in both sessions.
+    with self.assertRaises(errors.FailedPreconditionError):
+      sess1.run(v)
+    with self.assertRaises(errors.FailedPreconditionError):
+      sess2.run(v)
+
+    # An update in sess1 should be visible in sess1 only.
+    sess1.run(v.initializer, feed_dict={init_value: 37})
+    self.assertEqual(37, sess1.run(v))
+    with self.assertRaises(errors.FailedPreconditionError):
+      sess2.run(v)
+
+    # An update in sess2 should be visible in sess2 only.
+    sess2.run(v.initializer, feed_dict={init_value: 86})
+    self.assertEqual(37, sess1.run(v))
+    self.assertEqual(86, sess2.run(v))
+
+    # Closing sess2 has no effect on the state of sess1.
+    sess2.close()
+    self.assertEqual(37, sess1.run(v))
+
+    # Subsequent sessions will not see the state of existing sessions.
+    sess3 = session.Session(server.target, config=config)
+    self.assertEqual(37, sess1.run(v))
+    with self.assertRaises(errors.FailedPreconditionError):
+      sess3.run(v)
+
   @test_util.disable_c_api  # Partial runs don't work with C API
   def testClusterSpecPropagationPartialRun(self):
     """Test successful partial run with ClusterSpec propagation."""
diff --git a/tensorflow/python/training/server_lib_test.py b/tensorflow/python/training/server_lib_test.py
index 0a8ec4901c..26aac787ed 100644
--- a/tensorflow/python/training/server_lib_test.py
+++ b/tensorflow/python/training/server_lib_test.py
@@ -241,6 +241,95 @@ class GrpcServerTest(test.TestCase):
       queue_runner_impl.start_queue_runners(sess)
       sess.run(var.assign(3.0))
 
+  def testIsolateSessionState(self):
+    server = self._cached_server
+
+    init_value = array_ops.placeholder(dtypes.int32)
+    v = variables.Variable(init_value, validate_shape=False, name="v")
+
+    sharing_config = config_pb2.ConfigProto(isolate_session_state=False)
+    sharing_sess_0 = session.Session(server.target, config=sharing_config)
+    sharing_sess_1 = session.Session(server.target, config=sharing_config)
+
+    isolate_config = config_pb2.ConfigProto(isolate_session_state=True)
+    isolate_sess_0 = session.Session(server.target, config=isolate_config)
+    isolate_sess_1 = session.Session(server.target, config=isolate_config)
+
+    # Initially all variables are initialized.
+    for sess in [sharing_sess_0, sharing_sess_1,
+                 isolate_sess_0, isolate_sess_1]:
+      with self.assertRaises(errors_impl.FailedPreconditionError):
+        sess.run(v)
+
+    # Shared sessions will see each other's updates, but isolated sessions
+    # will not.
+    sharing_sess_0.run(v.initializer, feed_dict={init_value: 86})
+    self.assertAllEqual(86, sharing_sess_0.run(v))
+    self.assertAllEqual(86, sharing_sess_1.run(v))
+    with self.assertRaises(errors_impl.FailedPreconditionError):
+      isolate_sess_0.run(v)
+    with self.assertRaises(errors_impl.FailedPreconditionError):
+      isolate_sess_1.run(v)
+
+    # Changing the shape works because `validate_shape` is False.
+    sharing_sess_1.run(v.initializer, feed_dict={init_value: [86, 99]})
+    self.assertAllEqual([86, 99], sharing_sess_0.run(v))
+    self.assertAllEqual([86, 99], sharing_sess_1.run(v))
+    with self.assertRaises(errors_impl.FailedPreconditionError):
+      isolate_sess_0.run(v)
+    with self.assertRaises(errors_impl.FailedPreconditionError):
+      isolate_sess_1.run(v)
+
+    # Initializing in an isolated session will only affect the state in that
+    # session.
+    isolate_sess_0.run(v.initializer, feed_dict={init_value: 37})
+    self.assertAllEqual([86, 99], sharing_sess_0.run(v))
+    self.assertAllEqual([86, 99], sharing_sess_1.run(v))
+    self.assertAllEqual(37, isolate_sess_0.run(v))
+    with self.assertRaises(errors_impl.FailedPreconditionError):
+      isolate_sess_1.run(v)
+
+    # Isolated sessions can have different shapes for the same variable.
+    isolate_sess_1.run(v.initializer, feed_dict={init_value: [19, 86]})
+    self.assertAllEqual([86, 99], sharing_sess_0.run(v))
+    self.assertAllEqual([86, 99], sharing_sess_1.run(v))
+    self.assertAllEqual(37, isolate_sess_0.run(v))
+    self.assertAllEqual([19, 86], isolate_sess_1.run(v))
+
+  def testShapeChangingIsolateState(self):
+    server = self._cached_server
+    sharing_config = config_pb2.ConfigProto(isolate_session_state=False)
+    isolate_config = config_pb2.ConfigProto(isolate_session_state=True)
+
+    with ops.Graph().as_default():
+      w_vector = variables.Variable([1, 2, 3], name="w")
+      with session.Session(server.target, config=sharing_config) as sess:
+        with self.assertRaises(errors_impl.FailedPreconditionError):
+          sess.run(w_vector)
+        sess.run(w_vector.initializer)
+        self.assertAllEqual([1, 2, 3], sess.run(w_vector))
+
+    with ops.Graph().as_default():
+      w_vector = variables.Variable([4, 5, 6], name="w")
+      with session.Session(server.target, config=sharing_config) as sess:
+        self.assertAllEqual([1, 2, 3], sess.run(w_vector))
+        sess.run(w_vector.initializer)
+        self.assertAllEqual([4, 5, 6], sess.run(w_vector))
+
+    with ops.Graph().as_default():
+      w_scalar = variables.Variable(86, name="w")
+      with session.Session(server.target, config=sharing_config) as sess:
+        with self.assertRaises(errors_impl.InvalidArgumentError):
+          sess.run(w_scalar.initializer)
+
+    with ops.Graph().as_default():
+      w_scalar = variables.Variable(37, name="w")
+      with session.Session(server.target, config=isolate_config) as sess:
+        with self.assertRaises(errors_impl.FailedPreconditionError):
+          sess.run(w_scalar)
+        sess.run(w_scalar.initializer)
+        self.assertAllEqual(37, sess.run(w_scalar))
+
 
 class ServerDefTest(test.TestCase):
 
diff --git a/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt
index da6af3919e..009d64aed0 100644
--- a/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt
@@ -46,6 +46,10 @@ tf_class {
     name: "INTRA_OP_PARALLELISM_THREADS_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "ISOLATE_SESSION_STATE_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "LOG_DEVICE_PLACEMENT_FIELD_NUMBER"
     mtype: "<type \'int\'>"
-- 
GitLab


From 4e9fa6dcce4912a4797c48f4cb55d3564961bfca Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 09:44:25 -0800
Subject: [PATCH 0342/1225] Adapt upstream API change from r319082.

PiperOrigin-RevId: 177173806
---
 .../xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
index 96981534d5..059943d48c 100644
--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
@@ -34,7 +34,7 @@ limitations under the License.
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/CodeGen/CommandFlags.def"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
-- 
GitLab


From 82fa1e1ae5b2f8af642979fafb1cab455db1882f Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Tue, 28 Nov 2017 09:53:51 -0800
Subject: [PATCH 0343/1225] Adding support for `tf.SparseTensorValue` and
 `tf.SparseTensor` as inputs and outputs for various `tf.data` operations. In
 particular: - adding support for `tf.SparseTensorValue` as output type of
 `tf.data.Dataset.map()`. - adding support for both `tf.SparseTensor` and
 `tf.SparseTensorValue` as inputs to `tf.data.from_tensors()`. - adding
 support for both `tf.SparseTensor` and `tf.SparseTensorValue` as inputs to
 `tf.data.from_tensor_slices()`.

PiperOrigin-RevId: 177175439
---
 .../contrib/data/python/kernel_tests/BUILD    |   1 +
 .../kernel_tests/batch_dataset_op_test.py     |  36 ++--
 .../dataset_constructor_op_test.py            | 197 +++++++++++++++++-
 .../kernel_tests/filter_dataset_op_test.py    |  11 +-
 .../kernel_tests/flat_map_dataset_op_test.py  |   2 +-
 .../interleave_dataset_op_test.py             |   3 +-
 .../kernel_tests/map_dataset_op_test.py       |  32 +--
 .../contrib/data/python/ops/dataset_ops.py    |   1 -
 tensorflow/contrib/data/python/ops/readers.py |   2 +-
 tensorflow/python/data/ops/dataset_ops.py     |  60 ++++--
 tensorflow/python/data/util/nest.py           |  17 +-
 tensorflow/python/data/util/nest_test.py      |  16 +-
 tensorflow/python/framework/sparse_tensor.py  |  17 +-
 .../python/framework/sparse_tensor_test.py    |  12 ++
 tensorflow/python/framework/tensor_util.py    |   2 +-
 .../kernel_tests/batch_dataset_op_test.py     |  26 +--
 .../dataset_constructor_op_test.py            | 197 +++++++++++++++++-
 .../kernel_tests/filter_dataset_op_test.py    |  11 +-
 .../kernel_tests/flat_map_dataset_op_test.py  |   2 +-
 .../interleave_dataset_op_test.py             |   3 +-
 .../kernel_tests/map_dataset_op_test.py       |  37 ++--
 21 files changed, 571 insertions(+), 114 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 3efe5274f4..0790a4a737 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -110,6 +110,7 @@ py_test(
         "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:session",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
         "//tensorflow/python/data/util:nest",
         "//third_party/py/numpy",
     ],
diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
index b0064f8ae7..a939b3c841 100644
--- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
@@ -112,7 +112,7 @@ class BatchDatasetTest(test.TestCase):
   def testBatchSparse(self):
 
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0]], values=(i * [1]), dense_shape=[1])
 
     iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(
@@ -124,19 +124,19 @@ class BatchDatasetTest(test.TestCase):
       sess.run(init_op)
       for i in range(2):
         actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensor(
+        expected = sparse_tensor.SparseTensorValue(
             indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
             values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
             dense_shape=[5, 1])
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertTrue(sparse_tensor.is_sparse(actual))
+        self.assertSparseValuesEqual(actual, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
   def testNestedBatchSparse(self):
 
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0]], values=(i * [1]), dense_shape=[1])
 
     iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(5).batch(
@@ -147,13 +147,13 @@ class BatchDatasetTest(test.TestCase):
     with self.test_session() as sess:
       sess.run(init_op)
       actual = sess.run(get_next)
-      expected = sparse_tensor.SparseTensor(
+      expected = sparse_tensor.SparseTensorValue(
           indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [0, 4, 0],
                    [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], [1, 4, 0]],
           values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
           dense_shape=[2, 5, 1])
-      self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-      self.assertSparseValuesEqual(actual, expected.eval())
+      self.assertTrue(sparse_tensor.is_sparse(actual))
+      self.assertSparseValuesEqual(actual, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
@@ -294,7 +294,7 @@ class BatchDatasetTest(test.TestCase):
   def testPaddedBatchSparseError(self):
 
     def _map_fn(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
 
     with self.assertRaises(TypeError):
@@ -485,7 +485,7 @@ class BatchDatasetTest(test.TestCase):
   def testBatchAndDropRemainderSparse(self):
 
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0]], values=(i * [1]), dense_shape=[1])
 
     iterator = dataset_ops.Dataset.range(12).map(_sparse).apply(
@@ -497,12 +497,12 @@ class BatchDatasetTest(test.TestCase):
       sess.run(init_op)
       for i in range(2):
         actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensor(
+        expected = sparse_tensor.SparseTensorValue(
             indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
             values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
             dense_shape=[5, 1])
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertTrue(sparse_tensor.is_sparse(actual))
+        self.assertSparseValuesEqual(actual, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
@@ -545,7 +545,7 @@ class BatchDatasetTest(test.TestCase):
   def testPaddedBatchAndDropRemainderSparseError(self):
 
     def _map_fn(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
 
     with self.assertRaises(TypeError):
@@ -643,7 +643,7 @@ class BatchDatasetTest(test.TestCase):
   def testMapAndBatchSparse(self):
 
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0]], values=(i * [1]), dense_shape=[1])
 
     iterator = dataset_ops.Dataset.range(10).apply(
@@ -655,12 +655,12 @@ class BatchDatasetTest(test.TestCase):
       sess.run(init_op)
       for i in range(2):
         actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensor(
+        expected = sparse_tensor.SparseTensorValue(
             indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
             values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
             dense_shape=[5, 1])
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertTrue(sparse_tensor.is_sparse(actual))
+        self.assertSparseValuesEqual(actual, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
index 0f1c8838ca..55a1d3b95b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -39,7 +40,7 @@ from tensorflow.python.platform import test
 
 class DatasetConstructorTest(test.TestCase):
 
-  def testTensorDataset(self):
+  def testFromTensors(self):
     """Test an dataset that represents a single tuple of tensors."""
     components = (np.array(1), np.array([1, 2, 3]), np.array(37.0))
 
@@ -59,7 +60,75 @@ class DatasetConstructorTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testTensorSliceDataset(self):
+  def assertSparseValuesEqual(self, a, b):
+    self.assertAllEqual(a.indices, b.indices)
+    self.assertAllEqual(a.values, b.values)
+    self.assertAllEqual(a.dense_shape, b.dense_shape)
+
+  def testFromTensorsSparse(self):
+    """Test an dataset that represents a single tuple of tensors."""
+    components = (sparse_tensor.SparseTensorValue(
+        indices=np.array([[0]]),
+        values=np.array([0]),
+        dense_shape=np.array([1])),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 1]]),
+                      values=np.array([-1, 1]),
+                      dense_shape=np.array([2, 2])))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensors(components)
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual(
+        [tensor_shape.TensorShape(c.dense_shape) for c in components],
+        [shape for shape in iterator.output_shapes])
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      results = sess.run(get_next)
+      for component, result_component in zip(components, results):
+        self.assertSparseValuesEqual(component, result_component)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testFromTensorsMixed(self):
+    """Test an dataset that represents a single tuple of tensors."""
+    components = (np.array(1), np.array([1, 2, 3]), np.array(37.0),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0]]),
+                      values=np.array([0]),
+                      dense_shape=np.array([1])),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 1]]),
+                      values=np.array([-1, 1]),
+                      dense_shape=np.array([2, 2])))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensors(components)
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual([
+        tensor_shape.TensorShape(c.dense_shape)
+        if sparse_tensor.is_sparse(c) else c.shape for c in components
+    ], [shape for shape in iterator.output_shapes])
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      results = sess.run(get_next)
+      for component, result_component in zip(components, results):
+        if sparse_tensor.is_sparse(component):
+          self.assertSparseValuesEqual(component, result_component)
+        else:
+          self.assertAllEqual(component, result_component)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testFromTensorSlices(self):
     """Test an dataset that represents the slices from a tuple of tensors."""
     components = (
         np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile(
@@ -84,7 +153,127 @@ class DatasetConstructorTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testTensorSliceDatasetWithDict(self):
+  def testFromTensorSlicesSparse(self):
+    """Test an dataset that represents the slices from a tuple of tensors."""
+    components = (sparse_tensor.SparseTensorValue(
+        indices=np.array([[0, 0], [1, 0], [2, 0]]),
+        values=np.array([0, 0, 0]),
+        dense_shape=np.array([3, 1])),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 1], [2, 2]]),
+                      values=np.array([1, 2, 3]),
+                      dense_shape=np.array([3, 3])))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual(
+        [tensor_shape.TensorShape(c.dense_shape[1:]) for c in components],
+        [shape for shape in iterator.output_shapes])
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      expected = [
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[0]]),
+               values=np.array([1]),
+               dense_shape=np.array([3]))),
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[1]]),
+               values=np.array([2]),
+               dense_shape=np.array([3]))),
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[2]]),
+               values=np.array([3]),
+               dense_shape=np.array([3]))),
+      ]
+      for i in range(3):
+        results = sess.run(get_next)
+        for component, result_component in zip(expected[i], results):
+          self.assertSparseValuesEqual(component, result_component)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testFromTensorSlicesMixed(self):
+    """Test an dataset that represents the slices from a tuple of tensors."""
+    components = (np.tile(np.array([[1], [2], [3]]), 20),
+                  np.tile(np.array([[12], [13], [14]]), 22),
+                  np.array([37.0, 38.0, 39.0]),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 0], [2, 0]]),
+                      values=np.array([0, 0, 0]),
+                      dense_shape=np.array([3, 1])),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 1], [2, 2]]),
+                      values=np.array([1, 2, 3]),
+                      dense_shape=np.array([3, 3])))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual([
+        tensor_shape.TensorShape(c.dense_shape[1:])
+        if sparse_tensor.is_sparse(c) else c.shape[1:] for c in components
+    ], [shape for shape in iterator.output_shapes])
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      expected = [
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[0]]),
+               values=np.array([1]),
+               dense_shape=np.array([3]))),
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[1]]),
+               values=np.array([2]),
+               dense_shape=np.array([3]))),
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[2]]),
+               values=np.array([3]),
+               dense_shape=np.array([3]))),
+      ]
+      for i in range(3):
+        results = sess.run(get_next)
+        for component, result_component in zip(
+            (zip(*components[:3])[i] + expected[i]), results):
+          if sparse_tensor.is_sparse(component):
+            self.assertSparseValuesEqual(component, result_component)
+          else:
+            self.assertAllEqual(component, result_component)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testFromTensorSlicesWithDict(self):
     components = {"foo": [1, 2, 3], "bar": [[4.0], [5.0], [6.0]]}
     iterator = (dataset_ops.Dataset.from_tensor_slices(components)
                 .make_initializable_iterator())
@@ -105,7 +294,7 @@ class DatasetConstructorTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testSparseTensorSliceDataset(self):
+  def testFromSparseTensorSlices(self):
     """Test a dataset based on slices of a `tf.SparseTensor`."""
     st = array_ops.sparse_placeholder(dtypes.float64)
     iterator = (dataset_ops.Dataset.from_sparse_tensor_slices(st)
diff --git a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py
index 95724241ef..5921be2ae8 100644
--- a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py
@@ -132,9 +132,12 @@ class FilterDatasetTest(test.TestCase):
     self.assertAllEqual(a.dense_shape, b.dense_shape)
 
   def testSparse(self):
+
     def _map_fn(i):
-      return sparse_tensor.SparseTensor(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1])),
+          dense_shape=np.array([1, 1])), i
 
     def _filter_fn(_, i):
       return math_ops.equal(i % 2, 0)
@@ -149,10 +152,8 @@ class FilterDatasetTest(test.TestCase):
       sess.run(init_op)
       for i in range(5):
         actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[i*2], dense_shape=[1, 1])
         self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertSparseValuesEqual(actual, _map_fn(i * 2)[0])
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py
index ddb4bc34f3..d4fbaa5cdc 100644
--- a/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/flat_map_dataset_op_test.py
@@ -131,7 +131,7 @@ class FlatMapDatasetTest(test.TestCase):
 
   def testSparse(self):
     def _map_fn(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2])
 
     def _flat_map_fn(x):
diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
index c6e8ed5bdc..e66ed3f7aa 100644
--- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
@@ -187,8 +187,9 @@ class InterleaveDatasetTest(test.TestCase):
         sess.run(next_element)
 
   def testSparse(self):
+
     def _map_fn(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2])
 
     def _interleave_fn(x):
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
index 514b08b874..e9a07da84a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
@@ -626,9 +626,13 @@ class MapDatasetTest(test.TestCase):
     self.assertAllEqual(a.dense_shape, b.dense_shape)
 
   def testSparse(self):
+
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1])
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1])),
+          dense_shape=np.array([1, 1]))
+
     iterator = (dataset_ops.Dataset.range(10)
                 .map(_sparse)
                 .make_initializable_iterator())
@@ -639,24 +643,26 @@ class MapDatasetTest(test.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[i], dense_shape=[1, 1])
         self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertSparseValuesEqual(actual, _sparse(i))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
   def testSparseChain(self):
+
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1])
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1])),
+          dense_shape=np.array([1, 1]))
+
     def _check(i):
-      self.assertTrue(isinstance(i, sparse_tensor.SparseTensor))
+      self.assertTrue(sparse_tensor.is_sparse(i))
       return sparse_ops.sparse_concat(0, [i, i])
 
-    iterator = (dataset_ops.Dataset.range(10)
-                .map(_sparse).map(_check)
-                .make_initializable_iterator())
+    iterator = (
+        dataset_ops.Dataset.range(10).map(_sparse).map(_check)
+        .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -664,10 +670,8 @@ class MapDatasetTest(test.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensor(
-            indices=[[0, 0], [1, 0]], values=[i, i], dense_shape=[2, 1])
         self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval())
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py
index 863c94ef9f..626a9e0edc 100644
--- a/tensorflow/contrib/data/python/ops/dataset_ops.py
+++ b/tensorflow/contrib/data/python/ops/dataset_ops.py
@@ -21,7 +21,6 @@ from tensorflow.contrib.data.python.ops import batching
 from tensorflow.contrib.data.python.ops import enumerate_ops
 from tensorflow.contrib.data.python.ops import error_ops
 from tensorflow.contrib.data.python.ops import grouping
-
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.ops import gen_dataset_ops
diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index bb47832fe9..acb7a43211 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -164,7 +164,7 @@ def read_batch_features(file_pattern,
       shuffling but would increase memory usage and startup time.
 
   Returns:
-    A dict from keys in features to Tensor or SparseTensor objects.
+    A dict from keys in features to `Tensor` or `SparseTensor` objects.
   """
   filenames = _get_file_names(file_pattern, randomize_input)
   if reader_args:
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 00ac3334b0..dbe29c087a 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -40,6 +40,7 @@ from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import gen_io_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
+from tensorflow.python.ops import sparse_ops
 
 
 class Dataset(object):
@@ -892,11 +893,20 @@ class TensorDataset(Dataset):
     """See `Dataset.from_tensors()` for details."""
     super(TensorDataset, self).__init__()
     with ops.name_scope("tensors"):
-      self._tensors = nest.pack_sequence_as(tensors, [
-          ops.convert_to_tensor(t, name="component_%d" % i)
+      tensors = nest.pack_sequence_as(tensors, [
+          sparse_tensor_lib.SparseTensor.from_value(t)
+          if sparse_tensor_lib.is_sparse(t) else ops.convert_to_tensor(
+              t, name="component_%d" % i)
           for i, t in enumerate(nest.flatten(tensors))
       ])
 
+    self._tensors = sparse.serialize_sparse_tensors(tensors)
+    self._output_classes = sparse.get_classes(tensors)
+    self._output_shapes = nest.pack_sequence_as(
+        tensors, [t.get_shape() for t in nest.flatten(tensors)])
+    self._output_types = nest.pack_sequence_as(
+        tensors, [t.dtype for t in nest.flatten(tensors)])
+
   def _as_variant_tensor(self):
     return gen_dataset_ops.tensor_dataset(
         nest.flatten(self._tensors),
@@ -905,18 +915,15 @@ class TensorDataset(Dataset):
 
   @property
   def output_classes(self):
-    return nest.pack_sequence_as(
-        self._tensors, [ops.Tensor for _ in nest.flatten(self._tensors)])
+    return self._output_classes
 
   @property
   def output_shapes(self):
-    return nest.pack_sequence_as(self._tensors,
-                                 [t.shape for t in nest.flatten(self._tensors)])
+    return self._output_shapes
 
   @property
   def output_types(self):
-    return nest.pack_sequence_as(self._tensors,
-                                 [t.dtype for t in nest.flatten(self._tensors)])
+    return self._output_types
 
 
 class TensorSliceDataset(Dataset):
@@ -926,15 +933,27 @@ class TensorSliceDataset(Dataset):
     """See `Dataset.from_tensor_slices()` for details."""
     super(TensorSliceDataset, self).__init__()
     with ops.name_scope("tensors"):
-      flat_tensors = [
-          ops.convert_to_tensor(t, name="component_%d" % i)
+      tensors = nest.pack_sequence_as(tensors, [
+          sparse_tensor_lib.SparseTensor.from_value(t)
+          if sparse_tensor_lib.is_sparse(t) else ops.convert_to_tensor(
+              t, name="component_%d" % i)
           for i, t in enumerate(nest.flatten(tensors))
-      ]
+      ])
+      flat_tensors = nest.flatten(tensors)
 
-    self._tensors = nest.pack_sequence_as(tensors, flat_tensors)
     batch_dim = flat_tensors[0].get_shape()[0]
     for t in flat_tensors[1:]:
       batch_dim.assert_is_compatible_with(t.get_shape()[0])
+    self._tensors = nest.pack_sequence_as(tensors, [
+        sparse_ops.serialize_many_sparse(tensor)
+        if sparse_tensor_lib.is_sparse(tensor) else tensor
+        for tensor in nest.flatten(tensors)
+    ])
+    self._output_classes = sparse.get_classes(tensors)
+    self._output_shapes = nest.pack_sequence_as(
+        tensors, [t.get_shape()[1:] for t in nest.flatten(tensors)])
+    self._output_types = nest.pack_sequence_as(
+        tensors, [t.dtype for t in nest.flatten(tensors)])
 
   def _as_variant_tensor(self):
     return gen_dataset_ops.tensor_slice_dataset(
@@ -944,20 +963,15 @@ class TensorSliceDataset(Dataset):
 
   @property
   def output_classes(self):
-    return nest.pack_sequence_as(
-        self._tensors, [ops.Tensor for _ in nest.flatten(self._tensors)])
+    return self._output_classes
 
   @property
   def output_shapes(self):
-    return nest.pack_sequence_as(self._tensors, [
-        tensor_shape.TensorShape(t.shape[1:])
-        for t in nest.flatten(self._tensors)
-    ])
+    return self._output_shapes
 
   @property
   def output_types(self):
-    return nest.pack_sequence_as(self._tensors,
-                                 [t.dtype for t in nest.flatten(self._tensors)])
+    return self._output_types
 
 
 class SparseTensorSliceDataset(Dataset):
@@ -1513,6 +1527,12 @@ class MapDataset(Dataset):
       if isinstance(ret, list):
         ret = tuple(ret)
 
+      # Convert any `SparseTensorValue`s to `SparseTensor`s.
+      ret = nest.pack_sequence_as(ret, [
+          sparse_tensor_lib.SparseTensor.from_value(t)
+          if sparse_tensor_lib.is_sparse(t) else t for t in nest.flatten(ret)
+      ])
+
       self._output_classes = sparse.get_classes(ret)
       self._output_shapes = nest.pack_sequence_as(
           ret, [t.get_shape() for t in nest.flatten(ret)])
diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py
index 421513cafc..2f89c006d2 100644
--- a/tensorflow/python/data/util/nest.py
+++ b/tensorflow/python/data/util/nest.py
@@ -17,17 +17,22 @@
 """## Functions for working with arbitrarily nested sequences of elements.
 
 NOTE(mrry): This fork of the `tensorflow.python.util.nest` module
-makes two changes:
+makes three changes:
 
 1. It adds support for dictionaries as a level of nesting in nested structures.
 2. It removes support for lists as a level of nesting in nested structures.
+3. It adds support for `SparseTensorValue` as an atomic element.
 
-The motivation for this change is twofold:
+The motivation for this change is threefold:
 
 1. Many input-processing functions (e.g. `tf.parse_example()`) return
    dictionaries, and we would like to support them natively in datasets.
 2. It seems more natural for lists to be treated (e.g. in Dataset constructors)
    as tensors, rather than lists of (lists of...) tensors.
+3. This is needed because `SparseTensorValue` is implemented as a `namedtuple`
+   that would normally be flattened and we want to be able to create sparse
+   tensor from `SparseTensorValue's similarly to creating tensors from numpy
+   arrays.
 """
 
 from __future__ import absolute_import
@@ -38,6 +43,7 @@ import collections as _collections
 
 import six as _six
 
+from tensorflow.python.framework import sparse_tensor as _sparse_tensor
 from tensorflow.python.util.all_util import remove_undocumented
 
 
@@ -87,6 +93,8 @@ def _yield_value(iterable):
     # corresponding `OrderedDict` to pack it back).
     for key in _sorted(iterable):
       yield iterable[key]
+  elif isinstance(iterable, _sparse_tensor.SparseTensorValue):
+    yield iterable
   else:
     for value in iterable:
       yield value
@@ -116,8 +124,9 @@ def is_sequence(seq):
     True if the sequence is a not a string or list and is a
     collections.Sequence.
   """
-  return (isinstance(seq, (_collections.Sequence, dict))
-          and not isinstance(seq, (list, _six.string_types)))
+  return (isinstance(seq, (_collections.Sequence, dict)) and
+          not isinstance(seq, _sparse_tensor.SparseTensorValue) and
+          not isinstance(seq, (list, _six.string_types)))
 
 
 def flatten(nest):
diff --git a/tensorflow/python/data/util/nest_test.py b/tensorflow/python/data/util/nest_test.py
index 6416e2850d..0bd0a5f443 100644
--- a/tensorflow/python/data/util/nest_test.py
+++ b/tensorflow/python/data/util/nest_test.py
@@ -24,6 +24,7 @@ import numpy as np
 
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
@@ -86,7 +87,7 @@ class NestTest(test.TestCase):
         ordered_reconstruction)
     self.assertEqual({"d": 3, "b": 1, "a": 0, "c": 2}, plain_reconstruction)
 
-  def testFlattenAndPack_withDicts(self):
+  def testFlattenAndPackWithDicts(self):
     # A nice messy mix of tuples, lists, dicts, and `OrderedDict`s.
     named_tuple = collections.namedtuple("A", ("b", "c"))
     mess = (
@@ -132,6 +133,17 @@ class NestTest(test.TestCase):
     self.assertIsInstance(unflattened_ordered_dict, collections.OrderedDict)
     self.assertEqual(list(unflattened_ordered_dict.keys()), ["b", "a"])
 
+  def testFlattenSparseValue(self):
+    st = sparse_tensor.SparseTensorValue([[0]], [0], [1])
+    single_value = st
+    list_of_values = [st, st, st]
+    nest_of_values = ((st), ((st), (st)))
+    dict_of_values = {"foo": st, "bar": st, "baz": st}
+    self.assertEqual([st], nest.flatten(single_value))
+    self.assertEqual([[st, st, st]], nest.flatten(list_of_values))
+    self.assertEqual([st, st, st], nest.flatten(nest_of_values))
+    self.assertEqual([st, st, st], nest.flatten(dict_of_values))
+
   def testIsSequence(self):
     self.assertFalse(nest.is_sequence("1234"))
     self.assertFalse(nest.is_sequence([1, 3, [4, 5]]))
@@ -143,6 +155,8 @@ class NestTest(test.TestCase):
     self.assertFalse(nest.is_sequence(math_ops.tanh(ones)))
     self.assertFalse(nest.is_sequence(np.ones((4, 5))))
     self.assertTrue(nest.is_sequence({"foo": 1, "bar": 2}))
+    self.assertFalse(
+        nest.is_sequence(sparse_tensor.SparseTensorValue([[0]], [0], [1])))
 
   def testAssertSameStructure(self):
     structure1 = (((1, 2), 3), 4, (5, 6))
diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py
index 10f5579ae5..6218cc34ca 100644
--- a/tensorflow/python/framework/sparse_tensor.py
+++ b/tensorflow/python/framework/sparse_tensor.py
@@ -93,8 +93,7 @@ class SparseTensor(_TensorLike):
 
   @classmethod
   def from_value(cls, sparse_tensor_value):
-    if not (isinstance(sparse_tensor_value, SparseTensor) or
-            isinstance(sparse_tensor_value, SparseTensorValue)):
+    if not is_sparse(sparse_tensor_value):
       raise TypeError("Neither a SparseTensor nor SparseTensorValue: %s." %
                       sparse_tensor_value)
     return SparseTensor(
@@ -253,3 +252,17 @@ def convert_to_tensor_or_sparse_tensor(value, dtype=None, name=None):
     return value
   return ops.internal_convert_to_tensor(
       value, dtype=dtype, name=name)
+
+
+def is_sparse(x):
+  """Check whether `x` is sparse.
+
+  Check whether an object is a `tf.SparseTensor` or `tf.SparseTensorValue`.
+
+  Args:
+    x: A python object to check.
+
+  Returns:
+    `True` iff `x` is a `tf.SparseTensor` or `tf.SparseTensorValue`.
+  """
+  return isinstance(x, (SparseTensor, SparseTensorValue))
diff --git a/tensorflow/python/framework/sparse_tensor_test.py b/tensorflow/python/framework/sparse_tensor_test.py
index e709eaeda1..c001fed3b0 100644
--- a/tensorflow/python/framework/sparse_tensor_test.py
+++ b/tensorflow/python/framework/sparse_tensor_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
@@ -51,6 +53,16 @@ class SparseTensorTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(sess_run_value.values, value.values)
         self.assertAllEqual(sess_run_value.dense_shape, value.dense_shape)
 
+  def testIsSparse(self):
+    self.assertFalse(sparse_tensor.is_sparse(3))
+    self.assertFalse(sparse_tensor.is_sparse("foo"))
+    self.assertFalse(sparse_tensor.is_sparse(np.array(3)))
+    self.assertTrue(
+        sparse_tensor.is_sparse(sparse_tensor.SparseTensor([[0]], [0], [1])))
+    self.assertTrue(
+        sparse_tensor.is_sparse(
+            sparse_tensor.SparseTensorValue([[0]], [0], [1])))
+
 
 class ConvertToTensorOrSparseTensorTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index e283542172..9fc0e49463 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -874,7 +874,7 @@ def is_tensor(x):  # pylint: disable=invalid-name
   `isinstance(x, [tf.Tensor, tf.SparseTensor, tf.Variable])`.
 
   Args:
-    x: An python object to check.
+    x: A python object to check.
 
   Returns:
     `True` if `x` is a tensor, `False` if not.
diff --git a/tensorflow/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/kernel_tests/batch_dataset_op_test.py
index 660cbef302..0546218601 100644
--- a/tensorflow/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_dataset_op_test.py
@@ -109,7 +109,7 @@ class BatchDatasetTest(test.TestCase):
   def testBatchSparse(self):
 
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0]], values=(i * [1]), dense_shape=[1])
 
     iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(
@@ -121,19 +121,19 @@ class BatchDatasetTest(test.TestCase):
       sess.run(init_op)
       for i in range(2):
         actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensor(
+        expected = sparse_tensor.SparseTensorValue(
             indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
             values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
             dense_shape=[5, 1])
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertTrue(sparse_tensor.is_sparse(actual))
+        self.assertSparseValuesEqual(actual, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
   def testBatchSparseWithDifferentDenseShapes(self):
 
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=array_ops.expand_dims(
               math_ops.range(i, dtype=dtypes.int64), 1),
           values=array_ops.fill([math_ops.to_int32(i)], i),
@@ -154,19 +154,19 @@ class BatchDatasetTest(test.TestCase):
           for k in range(i * 5 + j):
             expected_indices.append([j, k])
             expected_values.append(i * 5 + j)
-        expected = sparse_tensor.SparseTensor(
+        expected = sparse_tensor.SparseTensorValue(
             indices=expected_indices,
             values=expected_values,
             dense_shape=[5, (i + 1) * 5 - 1])
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertTrue(sparse_tensor.is_sparse(actual))
+        self.assertSparseValuesEqual(actual, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
   def testNestedBatchSparse(self):
 
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0]], values=(i * [1]), dense_shape=[1])
 
     iterator = dataset_ops.Dataset.range(10).map(_sparse).batch(5).batch(
@@ -177,13 +177,13 @@ class BatchDatasetTest(test.TestCase):
     with self.test_session() as sess:
       sess.run(init_op)
       actual = sess.run(get_next)
-      expected = sparse_tensor.SparseTensor(
+      expected = sparse_tensor.SparseTensorValue(
           indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [0, 4, 0],
                    [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], [1, 4, 0]],
           values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
           dense_shape=[2, 5, 1])
-      self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-      self.assertSparseValuesEqual(actual, expected.eval())
+      self.assertTrue(sparse_tensor.is_sparse(actual))
+      self.assertSparseValuesEqual(actual, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
@@ -314,7 +314,7 @@ class BatchDatasetTest(test.TestCase):
 
   def testPaddedBatchSparseError(self):
     def _map_fn(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
 
     with self.assertRaises(TypeError):
diff --git a/tensorflow/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/kernel_tests/dataset_constructor_op_test.py
index b51d483b5b..9e2a620550 100644
--- a/tensorflow/python/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/python/kernel_tests/dataset_constructor_op_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -35,7 +36,7 @@ from tensorflow.python.platform import test
 
 class DatasetConstructorTest(test.TestCase):
 
-  def testTensorDataset(self):
+  def testFromTensors(self):
     """Test an dataset that represents a single tuple of tensors."""
     components = (np.array(1), np.array([1, 2, 3]), np.array(37.0))
 
@@ -55,7 +56,75 @@ class DatasetConstructorTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testTensorSliceDataset(self):
+  def assertSparseValuesEqual(self, a, b):
+    self.assertAllEqual(a.indices, b.indices)
+    self.assertAllEqual(a.values, b.values)
+    self.assertAllEqual(a.dense_shape, b.dense_shape)
+
+  def testFromTensorsSparse(self):
+    """Test an dataset that represents a single tuple of tensors."""
+    components = (sparse_tensor.SparseTensorValue(
+        indices=np.array([[0]]),
+        values=np.array([0]),
+        dense_shape=np.array([1])),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 1]]),
+                      values=np.array([-1, 1]),
+                      dense_shape=np.array([2, 2])))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensors(components)
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual(
+        [tensor_shape.TensorShape(c.dense_shape) for c in components],
+        [shape for shape in iterator.output_shapes])
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      results = sess.run(get_next)
+      for component, result_component in zip(components, results):
+        self.assertSparseValuesEqual(component, result_component)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testFromTensorsMixed(self):
+    """Test an dataset that represents a single tuple of tensors."""
+    components = (np.array(1), np.array([1, 2, 3]), np.array(37.0),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0]]),
+                      values=np.array([0]),
+                      dense_shape=np.array([1])),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 1]]),
+                      values=np.array([-1, 1]),
+                      dense_shape=np.array([2, 2])))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensors(components)
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual([
+        tensor_shape.TensorShape(c.dense_shape)
+        if sparse_tensor.is_sparse(c) else c.shape for c in components
+    ], [shape for shape in iterator.output_shapes])
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      results = sess.run(get_next)
+      for component, result_component in zip(components, results):
+        if sparse_tensor.is_sparse(component):
+          self.assertSparseValuesEqual(component, result_component)
+        else:
+          self.assertAllEqual(component, result_component)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testFromTensorSlices(self):
     """Test an dataset that represents the slices from a tuple of tensors."""
     components = (
         np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile(
@@ -80,7 +149,127 @@ class DatasetConstructorTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testTensorSliceDatasetWithDict(self):
+  def testFromTensorSlicesSparse(self):
+    """Test an dataset that represents the slices from a tuple of tensors."""
+    components = (sparse_tensor.SparseTensorValue(
+        indices=np.array([[0, 0], [1, 0], [2, 0]]),
+        values=np.array([0, 0, 0]),
+        dense_shape=np.array([3, 1])),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 1], [2, 2]]),
+                      values=np.array([1, 2, 3]),
+                      dense_shape=np.array([3, 3])))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual(
+        [tensor_shape.TensorShape(c.dense_shape[1:]) for c in components],
+        [shape for shape in iterator.output_shapes])
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      expected = [
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[0]]),
+               values=np.array([1]),
+               dense_shape=np.array([3]))),
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[1]]),
+               values=np.array([2]),
+               dense_shape=np.array([3]))),
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[2]]),
+               values=np.array([3]),
+               dense_shape=np.array([3]))),
+      ]
+      for i in range(3):
+        results = sess.run(get_next)
+        for component, result_component in zip(expected[i], results):
+          self.assertSparseValuesEqual(component, result_component)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testFromTensorSlicesMixed(self):
+    """Test an dataset that represents the slices from a tuple of tensors."""
+    components = (np.tile(np.array([[1], [2], [3]]), 20),
+                  np.tile(np.array([[12], [13], [14]]), 22),
+                  np.array([37.0, 38.0, 39.0]),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 0], [2, 0]]),
+                      values=np.array([0, 0, 0]),
+                      dense_shape=np.array([3, 1])),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 1], [2, 2]]),
+                      values=np.array([1, 2, 3]),
+                      dense_shape=np.array([3, 3])))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual([
+        tensor_shape.TensorShape(c.dense_shape[1:])
+        if sparse_tensor.is_sparse(c) else c.shape[1:] for c in components
+    ], [shape for shape in iterator.output_shapes])
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      expected = [
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[0]]),
+               values=np.array([1]),
+               dense_shape=np.array([3]))),
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[1]]),
+               values=np.array([2]),
+               dense_shape=np.array([3]))),
+          (sparse_tensor.SparseTensorValue(
+              indices=np.array([[0]]),
+              values=np.array([0]),
+              dense_shape=np.array([1])),
+           sparse_tensor.SparseTensorValue(
+               indices=np.array([[2]]),
+               values=np.array([3]),
+               dense_shape=np.array([3]))),
+      ]
+      for i in range(3):
+        results = sess.run(get_next)
+        for component, result_component in zip(
+            (zip(*components[:3])[i] + expected[i]), results):
+          if sparse_tensor.is_sparse(component):
+            self.assertSparseValuesEqual(component, result_component)
+          else:
+            self.assertAllEqual(component, result_component)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testFromTensorSlicesWithDict(self):
     components = {"foo": [1, 2, 3], "bar": [[4.0], [5.0], [6.0]]}
     iterator = (dataset_ops.Dataset.from_tensor_slices(components)
                 .make_initializable_iterator())
@@ -101,7 +290,7 @@ class DatasetConstructorTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testSparseTensorSliceDataset(self):
+  def testFromSparseTensorSlices(self):
     """Test a dataset based on slices of a `tf.SparseTensor`."""
     st = array_ops.sparse_placeholder(dtypes.float64)
     iterator = (dataset_ops.Dataset.from_sparse_tensor_slices(st)
diff --git a/tensorflow/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/kernel_tests/filter_dataset_op_test.py
index 6eb445445f..b9258b720e 100644
--- a/tensorflow/python/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/kernel_tests/filter_dataset_op_test.py
@@ -131,9 +131,12 @@ class FilterDatasetTest(test.TestCase):
     self.assertAllEqual(a.dense_shape, b.dense_shape)
 
   def testSparse(self):
+
     def _map_fn(i):
-      return sparse_tensor.SparseTensor(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1])),
+          dense_shape=np.array([1, 1])), i
 
     def _filter_fn(_, i):
       return math_ops.equal(i % 2, 0)
@@ -148,10 +151,8 @@ class FilterDatasetTest(test.TestCase):
       sess.run(init_op)
       for i in range(5):
         actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[i*2], dense_shape=[1, 1])
         self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertSparseValuesEqual(actual, _map_fn(i * 2)[0])
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
diff --git a/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py
index 895f36382a..350234a839 100644
--- a/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py
+++ b/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py
@@ -124,7 +124,7 @@ class FlatMapDatasetTest(test.TestCase):
 
   def testSparse(self):
     def _map_fn(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2])
 
     def _flat_map_fn(x):
diff --git a/tensorflow/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/kernel_tests/interleave_dataset_op_test.py
index 0a3c4af9e0..28cb50c002 100644
--- a/tensorflow/python/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/python/kernel_tests/interleave_dataset_op_test.py
@@ -177,8 +177,9 @@ class InterleaveDatasetTest(test.TestCase):
         sess.run(next_element)
 
   def testSparse(self):
+
     def _map_fn(i):
-      return sparse_tensor.SparseTensor(
+      return sparse_tensor.SparseTensorValue(
           indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2])
 
     def _interleave_fn(x):
diff --git a/tensorflow/python/kernel_tests/map_dataset_op_test.py b/tensorflow/python/kernel_tests/map_dataset_op_test.py
index 51f43bfd89..ad6bbc043d 100644
--- a/tensorflow/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/kernel_tests/map_dataset_op_test.py
@@ -284,9 +284,8 @@ class MapDatasetTest(test.TestCase):
     with self.test_session() as sess:
       sess.run(table.init)
       sess.run(init_op)
-
-      print(sess.run(get_next))
-      print(sess.run(get_next))
+      sess.run(get_next)
+      sess.run(get_next)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
@@ -553,9 +552,13 @@ class MapDatasetTest(test.TestCase):
     self.assertAllEqual(a.dense_shape, b.dense_shape)
 
   def testSparse(self):
+
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1])
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1])),
+          dense_shape=np.array([1, 1]))
+
     iterator = (dataset_ops.Dataset.range(10)
                 .map(_sparse)
                 .make_initializable_iterator())
@@ -566,24 +569,26 @@ class MapDatasetTest(test.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[i], dense_shape=[1, 1])
         self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertSparseValuesEqual(actual, _sparse(i))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
   def testSparseChain(self):
+
     def _sparse(i):
-      return sparse_tensor.SparseTensor(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1])
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1])),
+          dense_shape=np.array([1, 1]))
+
     def _check(i):
-      self.assertTrue(isinstance(i, sparse_tensor.SparseTensor))
+      self.assertTrue(sparse_tensor.is_sparse(i))
       return sparse_ops.sparse_concat(0, [i, i])
 
-    iterator = (dataset_ops.Dataset.range(10)
-                .map(_sparse).map(_check)
-                .make_initializable_iterator())
+    iterator = (
+        dataset_ops.Dataset.range(10).map(_sparse).map(_check)
+        .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -591,10 +596,8 @@ class MapDatasetTest(test.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensor(
-            indices=[[0, 0], [1, 0]], values=[i, i], dense_shape=[2, 1])
         self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
-        self.assertSparseValuesEqual(actual, expected.eval())
+        self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval())
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-- 
GitLab


From 7ea0fd6ccadc2922560ee66e3b11ae45324cc946 Mon Sep 17 00:00:00 2001
From: Guenther Schmuelling <guschmue@microsoft.com>
Date: Tue, 28 Nov 2017 10:46:48 -0800
Subject: [PATCH 0344/1225] add support for quantized ops on windows

---
 tensorflow/contrib/cmake/README.md            | 17 ------
 .../contrib/cmake/external/gemmlowp.cmake     |  4 +-
 .../contrib/cmake/tf_core_kernels.cmake       |  3 -
 tensorflow/contrib/cmake/tf_tests.cmake       |  2 +
 tensorflow/core/kernels/quantized_conv_ops.cc |  7 +++
 .../python/ops/quantized_conv_ops_test.py     |  2 +-
 tensorflow/python/ops/quantized_ops_test.py   | 57 +++++++++++++++++++
 7 files changed, 69 insertions(+), 23 deletions(-)
 create mode 100644 tensorflow/python/ops/quantized_ops_test.py

diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 4ddfec5960..4be733a280 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -19,23 +19,6 @@ for instructions on how to install a pre-built TensorFlow package on Windows.
 ### Current known limitations
 * It is not possible to load a custom Op library.
 * GCS file system is not supported.
-* The following Ops are not currently implemented:
- - Dequantize
- - QuantizeAndDequantize
- - QuantizedAvgPool
- - QuantizedBatchNomWithGlobalNormalization
- - QuantizedBiasAdd
- - QuantizedConcat
- - QuantizedConv2D
- - QuantizedMatmul
- - QuantizedMaxPoo
- - QuantizeDownAndShrinkRange
- - QuantizedRelu
- - QuantizedRelu6
- - QuantizedReshape
- - QuantizeV2
- - RequantizationRange
- - Requantize
 
 ## Building with CMake
 
diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake
index 3b146657bf..a235442dc5 100644
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@@ -14,8 +14,8 @@
 # ==============================================================================
 include (ExternalProject)
 
-set(gemmlowp_URL https://mirror.bazel.build/github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip)
-set(gemmlowp_HASH SHA256=dd2557072bde12141419cb8320a9c25e6ec41a8ae53c2ac78c076a347bb46d9d)
+set(gemmlowp_URL https://github.com/google/gemmlowp/archive/6a2a90822e8546fc2bfa7044de0faf1c1cb4862f.zip)
+set(gemmlowp_HASH SHA256=3447948d219f3270383766bbe08942888c0eb4e0ca6663c0e0548502ec5bb77d)
 set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index 2d015908a8..eb6bf567aa 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -150,9 +150,6 @@ list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs})
 if(WIN32)
   file(GLOB_RECURSE tf_core_kernels_windows_exclude_srcs
       # not working on windows yet
-      "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
       "${tensorflow_source_dir}/tensorflow/core/kernels/neon/*"
       # not in core - those are loaded dynamically as dll
       "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc"
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 18b71d1f9a..46134f4455 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -145,6 +145,8 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     "${tensorflow_source_dir}/tensorflow/contrib/estimator/python/estimator/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/kernel_tests/*.py"
     "${tensorflow_source_dir}/tensorflow/python/meta_graph_transform/*_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/ops/quantized_conv_ops_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/ops/quantized_ops_test.py"
     "${tensorflow_source_dir}/tensorflow/python/platform/build_info_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/internal/*_test.py"
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index 3b0764bb9b..54090bac7e 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -268,6 +268,13 @@ class Im2ColConvFunctor {
     Im2ColBufferResource<T1, chunk_value_count>* im2col_buffer_resource;
     std::function<Status(Im2ColBufferResource<T1, chunk_value_count>**)>
         creator = [](Im2ColBufferResource<T1, chunk_value_count>** resource) {
+#ifdef _MSC_VER
+          // MSVC complains about the capture of chunk_value_count which oddly
+          // works fine in conv_ops_using_gemm.cc for example.
+          // Define chunk_value_count inside the lambda for now.
+          const int64 chunk_value_count =
+              (kMaxChunkSize + (sizeof(T1) - 1)) / sizeof(T1);
+#endif
           *resource = new Im2ColBufferResource<T1, chunk_value_count>();
           return Status::OK();
         };
diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/python/ops/quantized_conv_ops_test.py
index 5ea47ea40e..5e9e710027 100644
--- a/tensorflow/python/ops/quantized_conv_ops_test.py
+++ b/tensorflow/python/ops/quantized_conv_ops_test.py
@@ -93,7 +93,7 @@ class Conv2DTest(test.TestCase):
     quantized_range = ((quantized_max - quantized_min) * range_adjust)
     range_scale = (quantized_range / number_of_steps)
     lowest_quantized = -(1 << (number_of_bits - 1))
-    result = np.array([(quantized_min + ((x - lowest_quantized) * range_scale))
+    result = np.array([(quantized_min + ((float(x) - lowest_quantized) * range_scale))
                        for x in quantized.flatten()])
     return result
 
diff --git a/tensorflow/python/ops/quantized_ops_test.py b/tensorflow/python/ops/quantized_ops_test.py
new file mode 100644
index 0000000000..4bf3b35e13
--- /dev/null
+++ b/tensorflow/python/ops/quantized_ops_test.py
@@ -0,0 +1,57 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for quantized operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class QuantizedOpsTest(test.TestCase):
+
+  def __init__(self, method_name="runTest"):
+    super(QuantizedOpsTest, self).__init__(method_name)
+
+  def testQuantizeOp(self):
+    expected_output = [1, 1, 2, 127, 255, 255]
+    with self.test_session(use_gpu=False) as sess:
+      x = constant_op.constant([1.0, 1.25, 1.75, 127.0, 255.0, 500.0], shape=[6], dtype=dtypes.float32)
+      x_min = 0.0
+      x_max = 255.0
+      op = array_ops.quantize(x, x_min, x_max, dtypes.quint8, mode="MIN_FIRST")
+      value = sess.run(op)
+      self.assertArrayNear(expected_output, value.output, 0.1)
+
+  def testDequantizeOp(self):
+    expected_output = [1.0, 2.0, 4.0, 8.0, 16.0, 255.0]
+    inp = np.array([1, 2, 4, 8, 16, 255]).astype(np.uint8)
+    with self.test_session(use_gpu=False) as sess:
+      x = constant_op.constant(inp, shape=[6], dtype=dtypes.quint8)
+      x_min = 0.0
+      x_max = 255.0
+      op = array_ops.dequantize(x, x_min, x_max, mode="MIN_FIRST")
+      value = sess.run(op)
+      self.assertArrayNear(expected_output, value, 0.1)
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From 4cb754e0513262e6d89eacc90eb3673f2b405234 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Tue, 28 Nov 2017 10:48:47 -0800
Subject: [PATCH 0345/1225] Add empty placeholder git/gen files.

Projects that depend on TensorFlow as a Bazel external dependency will
almost certainly not be modifying any of the TensorFlow source and therefore
don't need these git symlinked files (which exist for Bazel to keep track
of changes to HEAD).

Adding empty files for the files generated by configure so that TensorFlow
can be built without running configure for these projects.

These placeholders will get overridden for TF checkouts the first time you
run configure.py.

PiperOrigin-RevId: 177184829
---
 tensorflow/tools/git/gen/branch_ref | 1 +
 tensorflow/tools/git/gen/head       | 1 +
 tensorflow/tools/git/gen/spec.json  | 3 +++
 3 files changed, 5 insertions(+)
 create mode 100644 tensorflow/tools/git/gen/branch_ref
 create mode 100644 tensorflow/tools/git/gen/head
 create mode 100644 tensorflow/tools/git/gen/spec.json

diff --git a/tensorflow/tools/git/gen/branch_ref b/tensorflow/tools/git/gen/branch_ref
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/tensorflow/tools/git/gen/branch_ref
@@ -0,0 +1 @@
+
diff --git a/tensorflow/tools/git/gen/head b/tensorflow/tools/git/gen/head
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/tensorflow/tools/git/gen/head
@@ -0,0 +1 @@
+
diff --git a/tensorflow/tools/git/gen/spec.json b/tensorflow/tools/git/gen/spec.json
new file mode 100644
index 0000000000..176bbc21cc
--- /dev/null
+++ b/tensorflow/tools/git/gen/spec.json
@@ -0,0 +1,3 @@
+{
+  "git": false
+}
-- 
GitLab


From c81a8ae591cf43b6d10b887dfb22a780af3beec0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 10:51:26 -0800
Subject: [PATCH 0346/1225] Make sure that additional ops added by Savers to
 read ResourceVariables are added to the graph in a deterministic way.

For ResourceVariables (op "VarHandleOp"), ops.internal_convert_to_tensor will add new ops such as "Read_8/ReadVariableOp". If op_list is cast to a set, as before this change, then adding these new ops made graph construction non-deterministic.

PiperOrigin-RevId: 177185279
---
 tensorflow/python/training/saver.py      |  5 ++++-
 tensorflow/python/training/saver_test.py | 12 ++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index 5bddde1698..bd47736d4b 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -523,7 +523,10 @@ class BaseSaverBuilder(object):
     if not isinstance(op_list, (list, tuple, set)):
       raise TypeError("Variables to save should be passed in a dict or a "
                       "list: %s" % op_list)
-    op_list = set(op_list)
+    # When ResourceVariables are converted to Tensors, read ops are added to the
+    # graph. Sorting the op_list ensures that the resulting graph is always
+    # constructed in a deterministic way:
+    op_list = sorted(op_list, key=lambda x: x.name)
     names_to_saveables = {}
     # pylint: disable=protected-access
     for var in op_list:
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 744b17dd22..98ac197204 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -164,6 +164,18 @@ class SaverTest(test.TestCase):
   def testResourceBasic(self):
     self.basicSaveRestore(resource_variable_ops.ResourceVariable)
 
+  def testResourceVariableReadOpsAddedDeterministically(self):
+    graph_defs = []
+    num_graphs = 10
+    for _ in range(num_graphs):
+      with ops_lib.Graph().as_default() as g:
+        for i in range(20):
+          resource_variable_ops.ResourceVariable(i, name="var%s" % i)
+        saver_module.Saver()
+        graph_defs.append(g.as_graph_def())
+    for i in range(num_graphs - 1):
+      self.assertEqual(graph_defs[i], graph_defs[i + 1])
+
   def testEagerBasic(self):
     with context.eager_mode():
       ckpt_prefix = os.path.join(self.get_temp_dir(), "ckpt")
-- 
GitLab


From 723f285b64ace381e4180b342e31a8117b483058 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Tue, 28 Nov 2017 10:53:48 -0800
Subject: [PATCH 0347/1225] [XLA] Improvements to replay_computation tool.

 * Reduce threshold at which we run fake-data generation on the device
   from 1gb to 1mb.  At the old threshold, I observed cases where
   we'd spend many seconds, and >50% of our runtime, in logf(), used for
   computing random numbers.

 * Don't retrieve or print the result when running with fake data.
   Presumably this is uninteresting, because garbage in, garbage out.
   Retrieving this data can take as long as running the whole
   computation, and printing it can take many times longer.

 * Add a LOG(INFO) indicating how long execution took.

 * Add a --num_runs flag.  This is particularly important on GPUs, where
   the first run does autotuning, and so isn't interesting from a
   performance perspective.

PiperOrigin-RevId: 177185636
---
 tensorflow/compiler/xla/client/lib/testing.cc |  2 +-
 .../compiler/xla/tools/replay_computation.cc  | 57 ++++++++++++++-----
 2 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc
index d936bd870b..5f2b55713e 100644
--- a/tensorflow/compiler/xla/client/lib/testing.cc
+++ b/tensorflow/compiler/xla/client/lib/testing.cc
@@ -51,7 +51,7 @@ std::unique_ptr<GlobalData> MakeFakeDataViaDeviceOrDie(const Shape& shape,
 
 std::unique_ptr<GlobalData> MakeFakeDataOrDie(const Shape& shape,
                                               Client* client) {
-  if (ShapeUtil::ByteSizeOf(shape) < (1LL << 30)) {
+  if (ShapeUtil::ByteSizeOf(shape) < (1LL << 20)) {
     StatusOr<std::unique_ptr<Literal>> literal_status = MakeFakeLiteral(shape);
     if (!literal_status.ok()) {
       // If we got an Unimplemented error, fall back to making the fake data via
diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc
index 503e7d456e..ec3f6a0471 100644
--- a/tensorflow/compiler/xla/tools/replay_computation.cc
+++ b/tensorflow/compiler/xla/tools/replay_computation.cc
@@ -65,8 +65,9 @@ namespace {
 // Similarly, infeeds fake data of shape fake_infeed_shape if it is provided;
 // otherwise, no infeed is performed.
 StatusOr<std::unique_ptr<Literal>> ReplayComputation(
-    const SessionModule& module, tensorflow::StringPiece fake_infeed_shape,
-    bool use_fake_data, Client* client) {
+    const SessionModule& module, int num_runs,
+    tensorflow::StringPiece fake_infeed_shape, bool use_fake_data,
+    Client* client) {
   TF_ASSIGN_OR_RETURN(Computation computation, client->LoadSnapshot(module));
 
   std::vector<std::unique_ptr<GlobalData>> arguments;
@@ -107,10 +108,32 @@ StatusOr<std::unique_ptr<Literal>> ReplayComputation(
   for (auto& argument : arguments) {
     execute_arguments.push_back(argument.get());
   }
-  return client->ExecuteAndTransfer(computation, execute_arguments);
+
+  // Run the computation num_runs times, and return the result from the last
+  // execution.
+  std::unique_ptr<Literal> result;
+  for (int i = 0; i < num_runs; ++i) {
+    ExecutionProfile profile;
+    if (use_fake_data) {
+      // If using fake data, execute the computation but don't bother retrieving
+      // the result -- presumably it's uninteresting, since our data is fake.
+      TF_RETURN_IF_ERROR(client
+                             ->Execute(computation, execute_arguments,
+                                       /*execution_options=*/nullptr, &profile)
+                             .status());
+    } else {
+      TF_ASSIGN_OR_RETURN(result, client->ExecuteAndTransfer(
+                                      computation, execute_arguments,
+                                      /*execution_options=*/nullptr, &profile));
+    }
+    LOG(INFO) << "Execution took "
+              << static_cast<double>(profile.compute_time_ns()) / 1e9 << "s";
+  }
+
+  return std::move(result);
 }
 
-int RealMain(tensorflow::gtl::ArraySlice<char*> args,
+int RealMain(tensorflow::gtl::ArraySlice<char*> args, int num_runs,
              tensorflow::StringPiece fake_infeed_shape, bool use_fake_data) {
   Client* client = ClientLibrary::LocalClientOrDie();
   tensorflow::Env* env = tensorflow::Env::Default();
@@ -118,22 +141,25 @@ int RealMain(tensorflow::gtl::ArraySlice<char*> args,
   for (char* arg : args) {
     SessionModule module;
     TF_CHECK_OK(tensorflow::ReadBinaryProto(env, arg, &module));
-    StatusOr<std::unique_ptr<Literal>> result_status =
-        ReplayComputation(module, fake_infeed_shape, use_fake_data, client);
+    StatusOr<std::unique_ptr<Literal>> result_status = ReplayComputation(
+        module, num_runs, fake_infeed_shape, use_fake_data, client);
     if (!result_status.ok()) {
       fprintf(stderr, "%s: error: %s\n", arg,
               result_status.status().ToString().c_str());
       exit_status = EXIT_FAILURE;
       continue;
     }
+
     std::unique_ptr<Literal> result = result_status.ConsumeValueOrDie();
-    fprintf(stdout, "%s: %s :: %s:%s\n", arg, module.entry().name().c_str(),
-            ShapeUtil::HumanString(result->shape()).c_str(),
-            result->ToString().c_str());
-    if (module.has_result()) {
-      fprintf(stdout, "was %s:%s\n",
-              ShapeUtil::HumanString(module.result().shape()).c_str(),
-              Literal(module.result()).ToString().c_str());
+    if (result != nullptr) {
+      fprintf(stdout, "%s: %s :: %s:%s\n", arg, module.entry().name().c_str(),
+              ShapeUtil::HumanString(result->shape()).c_str(),
+              result->ToString().c_str());
+      if (module.has_result()) {
+        fprintf(stdout, "was %s:%s\n",
+                ShapeUtil::HumanString(module.result().shape()).c_str(),
+                Literal(module.result()).ToString().c_str());
+      }
     }
   }
   return exit_status;
@@ -147,9 +173,12 @@ int main(int argc, char** argv) {
   // Flags
   xla::string fake_infeed_shape;
   bool use_fake_data = false;
+  int num_runs = 1;
   const std::vector<tensorflow::Flag> flag_list = {
       tensorflow::Flag("use_fake_data", &use_fake_data,
                        "Replay computation using fake data"),
+      tensorflow::Flag("num_runs", &num_runs,
+                       "Number of times to run each computation"),
       tensorflow::Flag("fake_infeed_shape", &fake_infeed_shape,
                        "Shape of fake data to construct for (infinite) infeed"),
   };
@@ -162,5 +191,5 @@ int main(int argc, char** argv) {
 
   tensorflow::gtl::ArraySlice<char*> args(argv, argc);
   args.pop_front();  // Pop off the binary name, argv[0]
-  return xla::tools::RealMain(args, fake_infeed_shape, use_fake_data);
+  return xla::tools::RealMain(args, num_runs, fake_infeed_shape, use_fake_data);
 }
-- 
GitLab


From 5d3d7fa81b87aa3c1137366f062c4f4ab9681a09 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 10:55:32 -0800
Subject: [PATCH 0348/1225] RevBlock: memory-efficient implementation of a
 series of reversible residual layers.

PiperOrigin-RevId: 177185950
---
 tensorflow/contrib/layers/BUILD               |  15 +
 tensorflow/contrib/layers/__init__.py         |   3 +
 .../contrib/layers/python/layers/__init__.py  |   1 +
 .../layers/python/layers/rev_block_lib.py     | 538 ++++++++++++++++++
 .../python/layers/rev_block_lib_test.py       | 331 +++++++++++
 5 files changed, 888 insertions(+)
 create mode 100644 tensorflow/contrib/layers/python/layers/rev_block_lib.py
 create mode 100644 tensorflow/contrib/layers/python/layers/rev_block_lib_test.py

diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD
index 2f1f283811..852d06e1e3 100644
--- a/tensorflow/contrib/layers/BUILD
+++ b/tensorflow/contrib/layers/BUILD
@@ -61,6 +61,7 @@ tf_custom_op_py_library(
         "python/layers/normalization.py",
         "python/layers/optimizers.py",
         "python/layers/regularizers.py",
+        "python/layers/rev_block_lib.py",
         "python/layers/summaries.py",
         "python/layers/target_column.py",
         "python/layers/utils.py",
@@ -376,6 +377,20 @@ py_test(
     ],
 )
 
+py_test(
+    name = "rev_block_lib_test",
+    size = "small",
+    srcs = ["python/layers/rev_block_lib_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":layers_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py
index d309ba958d..6c624929f2 100644
--- a/tensorflow/contrib/layers/__init__.py
+++ b/tensorflow/contrib/layers/__init__.py
@@ -42,6 +42,9 @@ See the @{$python/contrib.layers} guide.
 @@relu
 @@relu6
 @@repeat
+@@recompute_grad
+@@RevBlock
+@@rev_block
 @@safe_embedding_lookup_sparse
 @@scale_gradient
 @@separable_conv2d
diff --git a/tensorflow/contrib/layers/python/layers/__init__.py b/tensorflow/contrib/layers/python/layers/__init__.py
index 03337f9a5d..f1ae2de68b 100644
--- a/tensorflow/contrib/layers/python/layers/__init__.py
+++ b/tensorflow/contrib/layers/python/layers/__init__.py
@@ -28,6 +28,7 @@ from tensorflow.contrib.layers.python.layers.layers import *
 from tensorflow.contrib.layers.python.layers.normalization import *
 from tensorflow.contrib.layers.python.layers.optimizers import *
 from tensorflow.contrib.layers.python.layers.regularizers import *
+from tensorflow.contrib.layers.python.layers.rev_block_lib import *
 from tensorflow.contrib.layers.python.layers.summaries import *
 from tensorflow.contrib.layers.python.layers.target_column import *
 from tensorflow.contrib.layers.python.ops.bucketization_op import *
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
new file mode 100644
index 0000000000..31a1b38bd4
--- /dev/null
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
@@ -0,0 +1,538 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Reversible Residual Block.
+
+From
+[The Reversible Residual Network: Backpropagation Without Storing
+Activations](https://arxiv.org/abs/1707.04585).
+
+Also contains the @recompute_grad decorator, which recomputes the forward
+function on the backwards pass.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import re
+
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.contrib.framework.python import ops as contrib_framework_ops
+from tensorflow.python.framework import function
+from tensorflow.python.framework import ops as framework_ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import template
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.util import nest
+
+__all__ = ["rev_block", "RevBlock", "recompute_grad"]
+
+LAYER_RE = re.compile(".*revlayer_([0-9]*)/([fg])/.*")
+
+
+def _acc_grads(*lists_of_grads):
+  """Accumulates lists of gradients."""
+  acc_grads = []
+  for grads in zip(*lists_of_grads):
+    grads = [g for g in grads if g is not None]
+    if grads:
+      acc_grads.append(math_ops.add_n(grads))
+    else:
+      acc_grads.append(None)
+  return acc_grads
+
+
+def _rev_layer_forward(xs, f, g, f_side_input, g_side_input,
+                       gate_outputs=False):
+  """Forward for 1 reversible layer."""
+  x1, x2 = xs
+  y1 = x1 + (f(x2, f_side_input) if f_side_input else f(x2))
+  y2 = x2 + (g(y1, g_side_input) if g_side_input else g(y1))
+  if gate_outputs:
+    return control_flow_ops.tuple([y1, y2])
+  else:
+    return (y1, y2)
+
+
+def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars,
+                        g_side_input):
+  """Backprop for 1 layer."""
+  y1, y2 = ys
+  grad_y1, grad_y2 = grad_ys
+
+  # Reconstruct intermediates and inputs (x1, x2)
+  # stop_gradients required on fn inputs to prevent infinite recursion into this
+  # grad function on the calls to gradients.
+  y1_stop = array_ops.stop_gradient(y1)
+  g_side_input = [array_ops.stop_gradient(t) for t in g_side_input]
+  gy1 = g(y1_stop, g_side_input) if g_side_input else g(y1_stop)
+
+  x2 = y2 - gy1
+  x2_stop = array_ops.stop_gradient(x2)
+  f_side_input = [array_ops.stop_gradient(t) for t in f_side_input]
+  fx2 = f(x2_stop, f_side_input) if f_side_input else f(x2_stop)
+
+  x1 = y1 - fx2
+
+  # Compute gradients wrt to inputs
+  # dL/dy2 * dG(y1)/y1
+  grad_gy1_y2 = gradients_impl.gradients(gy1, y1_stop, grad_y2)[0]
+  grad_x1 = grad_y1 + grad_gy1_y2
+  grad_x2 = (
+      gradients_impl.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 +
+      gradients_impl.gradients(fx2, x2_stop, grad_gy1_y2)[0])
+
+  # Compute gradients wrt to vars and side inputs in f and g
+  grads1 = gradients_impl.gradients(gy1, g_vars + g_side_input, grad_y2)
+  grad_g_vars, grad_g_side = grads1[:len(g_vars)], grads1[len(g_vars):]
+  grads2 = gradients_impl.gradients(fx2, f_vars + f_side_input, grad_y1)
+  grad_f_y1, grad_f_side1 = grads2[:len(f_vars)], grads2[len(f_vars):]
+  grads3 = gradients_impl.gradients(fx2, f_vars + f_side_input, grad_gy1_y2)
+  grad_f_y2, grad_f_side2 = grads3[:len(f_vars)], grads3[len(f_vars):]
+  grad_f_vars = _acc_grads(grad_f_y1, grad_f_y2)
+
+  grad_f_side = _acc_grads(grad_f_side1, grad_f_side2)
+
+  # Put returns in a tuple to ensure a constant memory budget (i.e. don't want
+  # the subsequent layer to start computing and consuming memory based on a
+  # subset of these values).
+  outputs = ((x1, x2), (grad_x1, grad_x2), (grad_f_vars, grad_f_side),
+             (grad_g_vars, grad_g_side))
+  tupled = control_flow_ops.tuple(nest.flatten(outputs))
+  return nest.pack_sequence_as(outputs, tupled)
+
+
+def _rev_block_forward(x1,
+                       x2,
+                       f,
+                       g,
+                       num_layers=1,
+                       f_side_input=None,
+                       g_side_input=None,
+                       gate_outputs=False):
+  """Forward for a series of reversible layers."""
+  out = (x1, x2)
+  for i in xrange(num_layers):
+    out = _rev_layer_forward(
+        out, f[i], g[i], f_side_input, g_side_input, gate_outputs=gate_outputs)
+
+  y1, y2 = out
+  return y1, y2
+
+
+class RevBlock(object):
+  """Block of reversible layers. See rev_block."""
+
+  def __init__(self,
+               f,
+               g,
+               num_layers=1,
+               f_side_input=None,
+               g_side_input=None,
+               use_efficient_backprop=True):
+
+    if isinstance(f, list):
+      assert len(f) == num_layers
+    else:
+      f = [f] * num_layers
+
+    if isinstance(g, list):
+      assert len(g) == num_layers
+    else:
+      g = [g] * num_layers
+
+    scope_prefix = "revblock/revlayer_%d/"
+    f_scope = scope_prefix + "f"
+    g_scope = scope_prefix + "g"
+
+    f = [
+        template.make_template(f_scope % i, fn, create_scope_now_=True)
+        for i, fn in enumerate(f)
+    ]
+    g = [
+        template.make_template(g_scope % i, fn, create_scope_now_=True)
+        for i, fn in enumerate(g)
+    ]
+
+    self.f = f
+    self.g = g
+
+    self.num_layers = num_layers
+    self.f_side_input = f_side_input or []
+    self.g_side_input = g_side_input or []
+
+    self._use_efficient_backprop = use_efficient_backprop
+
+  def _efficient_grad_fn(self, inputs, variables, ys, grad_ys):
+    """Custom gradient fn for a block of reversible residual layers."""
+    side_inputs = inputs[2:]
+    f_side_idxs = [None] * len(self.f_side_input)
+    g_side_idxs = [None] * len(self.g_side_input)
+    assert len(side_inputs) == len(self.f_side_input) + len(self.g_side_input)
+
+    for i, t in enumerate(side_inputs):
+      if t in self.f_side_input:
+        f_side_idxs[self.f_side_input.index(t)] = i
+      elif t in self.g_side_input:
+        g_side_idxs[self.g_side_input.index(t)] = i
+      else:
+        assert False
+
+    f_vars = [[] for _ in range(self.num_layers)]
+    g_vars = [[] for _ in range(self.num_layers)]
+    f_vars_idxs = [[] for _ in range(self.num_layers)]
+    g_vars_idxs = [[] for _ in range(self.num_layers)]
+
+    for i, t in enumerate(variables):
+      ref = _underlying_variable_ref(t)
+
+      # Use the name to identify the layer number and function (f or g)
+      regex = LAYER_RE.match(ref.name)
+      layer_no = int(regex.group(1))
+      fn_name = regex.group(2)
+      if fn_name == "f":
+        f_vars[layer_no].append(ref)
+        f_vars_idxs[layer_no].append(i)
+      else:
+        assert fn_name == "g"
+        g_vars[layer_no].append(ref)
+        g_vars_idxs[layer_no].append(i)
+
+    f_var_grads = []
+    g_var_grads = []
+    f_side_grads = []
+    g_side_grads = []
+
+    # Reverse variable containers to go backward
+    f_vars.reverse()
+    g_vars.reverse()
+    f = list(self.f)
+    g = list(self.g)
+    f.reverse()
+    g.reverse()
+
+    for i in xrange(self.num_layers):
+      ys, grad_ys, f_ret, g_ret = _rev_layer_backward(
+          ys, grad_ys, f[i], g[i], f_vars[i], self.f_side_input, g_vars[i],
+          self.g_side_input)
+
+      grad_f_vars, grad_f_side = f_ret
+      grad_g_vars, grad_g_side = g_ret
+      f_var_grads.append(grad_f_vars)
+      g_var_grads.append(grad_g_vars)
+      f_side_grads.append(grad_f_side)
+      g_side_grads.append(grad_g_side)
+
+    # Accumulate layer gradients for f_side_input and g_side_input
+    acc_f_side_grads = _acc_grads(*f_side_grads)
+    acc_g_side_grads = _acc_grads(*g_side_grads)
+
+    # Use the stored idxs to put gradients in the passed-in order.
+    side_input_grads = [None] * len(side_inputs)
+    variable_grads = [None] * len(variables)
+
+    # Variable gradients were collected in reverse layer order. Reverse to match
+    # idxs.
+    f_var_grads.reverse()
+    g_var_grads.reverse()
+    for idxs, grads in list(zip(f_vars_idxs, f_var_grads)) + list(
+        zip(g_vars_idxs, g_var_grads)):
+      for i, grad in zip(idxs, grads):
+        variable_grads[i] = grad
+
+    for i, grad in zip(f_side_idxs, acc_f_side_grads):
+      side_input_grads[i] = grad
+    for i, grad in zip(g_side_idxs, acc_g_side_grads):
+      side_input_grads[i] = grad
+
+    grad_x1, grad_x2 = grad_ys
+    return [grad_x1, grad_x2] + side_input_grads, variable_grads
+
+  def forward(self, x1, x2):
+    """Run forward through the reversible layers."""
+
+    side_inputs = [self.f_side_input, self.g_side_input]
+    flat_side_inputs = nest.flatten(side_inputs)
+
+    custom_grad_fn = (
+        self._efficient_grad_fn if self._use_efficient_backprop else None)
+
+    @_fn_with_custom_grad(custom_grad_fn)
+    def _forward(x1_, x2_, *flat_side_inputs):
+      f_side, g_side = nest.pack_sequence_as(side_inputs, flat_side_inputs)
+      return _rev_block_forward(
+          x1_,
+          x2_,
+          self.f,
+          self.g,
+          num_layers=self.num_layers,
+          f_side_input=f_side,
+          g_side_input=g_side,
+          gate_outputs=self._use_efficient_backprop)
+
+    return _forward(x1, x2, *flat_side_inputs)
+
+  def backward(self, y1, y2):
+    """Run backward through the reversible layers."""
+
+    f = list(self.f)
+    g = list(self.g)
+    f.reverse()
+    g.reverse()
+
+    for i in xrange(self.num_layers):
+      gy1 = g[i](y1, self.g_side_input) if self.g_side_input else g[i](y1)
+      x2 = y2 - gy1
+      fx2 = f[i](x2, self.f_side_input) if self.f_side_input else f[i](x2)
+      x1 = y1 - fx2
+
+      y1, y2 = x1, x2
+
+    return x1, x2
+
+
+def rev_block(x1,
+              x2,
+              f,
+              g,
+              num_layers=1,
+              f_side_input=None,
+              g_side_input=None,
+              is_training=True):
+  """A block of reversible residual layers.
+
+  A reversible residual layer is defined as:
+
+  ```
+  y1 = x1 + f(x2, f_side_input)
+  y2 = x2 + g(y1, g_side_input)
+  ```
+
+  A reversible residual block, defined here, is a series of reversible residual
+  layers.
+
+  Limitations:
+  * f and g must not close over any Tensors; all side inputs to f and g should
+    be passed in with f_side_input and g_side_input which will be forwarded to
+    f and g.
+  * f and g must not change the dimensionality of their inputs in order for the
+    addition in the equations above to work.
+
+  Args:
+    x1: a float Tensor.
+    x2: a float Tensor.
+    f: a function, (Tensor) -> (Tensor) (or list of such of length num_layers).
+      Should not change the shape of the Tensor. Can make calls to get_variable.
+      See f_side_input if there are side inputs.
+    g: a function, (Tensor) -> (Tensor) (or list of such of length num_layers).
+      Should not change the shape of the Tensor. Can make calls to get_variable.
+      See g_side_input if there are side inputs.
+    num_layers: int, number of reversible residual layers. Each layer will
+      apply f and g according to the equations above, with new variables in each
+      layer.
+    f_side_input: list of Tensors, side input to f. If not None, signature of f
+      should be (Tensor, list<Tensor>) -> (Tensor).
+    g_side_input: list of Tensors, side input to g. If not None, signature of g
+      should be (Tensor, list<Tensor>) -> (Tensor).
+    is_training: bool, whether to actually use the efficient backprop codepath.
+
+  Returns:
+    y1, y2: tuple of float Tensors.
+  """
+  block = RevBlock(f, g, num_layers, f_side_input, g_side_input, is_training)
+  return block.forward(x1, x2)
+
+
+def recompute_grad(fn):
+  """Decorator that recomputes the function on the backwards pass.
+
+  Args:
+    fn: a function that takes Tensors (all as positional arguments) and returns
+      a tuple of Tensors.
+
+  Returns:
+    A wrapped fn that is identical to fn when called, but its activations will
+    be discarded and recomputed on the backwards pass (i.e. on a call to
+    tf.gradients).
+  """
+
+  @functools.wraps(fn)
+  def wrapped(*args):
+    return _recompute_grad(fn, args)
+
+  return wrapped
+
+
+def _recompute_grad(fn, args):
+  """See recompute_grad."""
+
+  cached_vs = []
+  cached_arg_scope = []
+
+  def grad_fn(inputs, variables, outputs, output_grads):
+    """Recompute outputs for gradient computation."""
+    del outputs
+    # Recompute outputs
+    with framework_ops.control_dependencies(output_grads):
+      with contrib_framework_ops.arg_scope(cached_arg_scope[0]):
+        with variable_scope.variable_scope(cached_vs[0], reuse=True):
+          outputs = fn(*inputs)
+
+    if not (isinstance(outputs, list) or isinstance(outputs, tuple)):
+      outputs = [outputs]
+    outputs = list(outputs)
+    grads = gradients_impl.gradients(outputs, inputs + variables, output_grads)
+    grad_inputs = grads[:len(inputs)]
+    grad_vars = grads[len(inputs):]
+    return grad_inputs, grad_vars
+
+  @_fn_with_custom_grad(grad_fn)
+  def fn_with_recompute(*args):
+    cached_vs.append(variable_scope.get_variable_scope())
+    # TODO(rsepassi): Rm conditional in TF 1.4
+    if hasattr(contrib_framework_ops, "current_arg_scope"):
+      cached_arg_scope.append(contrib_framework_ops.current_arg_scope())
+    else:
+      cached_arg_scope.append({})
+    return fn(*args)
+
+  return fn_with_recompute(*args)
+
+
+def _underlying_variable_ref(t):
+  """Find the underlying variable ref.
+
+  Traverses through Identity, ReadVariableOp, and Enter ops.
+  Stops when op type has Variable or VarHandle in name.
+
+  Args:
+    t: a Tensor
+
+  Returns:
+    a Tensor that is a variable ref, or None on error.
+  """
+  while t.op.type in ["Identity", "ReadVariableOp", "Enter"]:
+    t = t.op.inputs[0]
+
+  op_type = t.op.type
+  if "Variable" in op_type or "VarHandle" in op_type:
+    return t
+  else:
+    return None
+
+
+def _fn_with_custom_grad(grad_fn, use_global_vars=False):
+  """Decorator to create a subgraph with a custom gradient function.
+
+  The subgraph created by the decorated function is NOT put in a Defun and so
+  does not suffer from the limitations of the Defun (all subgraph ops on the
+  same device, no summaries).
+
+  Args:
+    grad_fn: function with signature
+      (inputs, variables, outputs, output_grads) -> (grad_inputs, grad_vars),
+      all of which are lists of Tensors.
+    use_global_vars: if True, variables will be the global variables created.
+      If False, will be the trainable variables.
+
+  Returns:
+    Decorator for function such that the gradient is defined by grad_fn.
+  """
+
+  def dec(fn):
+
+    @functools.wraps(fn)
+    def wrapped(*args):
+      return _fn_with_custom_grad_internal(
+          fn, args, grad_fn, use_global_vars=use_global_vars)
+
+    return wrapped
+
+  return dec
+
+
+def _fn_with_custom_grad_internal(fn, inputs, grad_fn, use_global_vars=False):
+  """Create a subgraph with a custom gradient.
+
+  Args:
+    fn: function that takes inputs as arguments and produces 1 or more Tensors.
+    inputs: list<Tensor>, will be passed as fn(*inputs).
+    grad_fn: function with signature
+      (inputs, vars, outputs, output_grads) -> (grad_inputs, grad_vars),
+      all of which are lists of Tensors.
+    use_global_vars: if True, variables will be the global variables created.
+      If False, will be the trainable variables.
+
+  Returns:
+    fn(*inputs)
+  """
+  vs = variable_scope.get_variable_scope()
+  get_vars_fn = (
+      vs.global_variables if use_global_vars else vs.trainable_variables)
+  len_before_vars = len(get_vars_fn())
+  inputs = list(inputs)
+  outputs = fn(*inputs)
+  train_vars = get_vars_fn()[len_before_vars:]
+
+  if grad_fn is None:
+    return outputs
+
+  if not (isinstance(outputs, tuple) or isinstance(outputs, list)):
+    outputs = [outputs]
+  outputs = list(outputs)
+
+  defun_inputs = [inputs, train_vars, outputs]
+
+  def custom_grad_fn(op, *dys):
+    """Custom grad fn applying grad_fn for identity Defun."""
+    fn_inputs, fn_vars, fn_outputs = nest.pack_sequence_as(
+        defun_inputs, list(op.inputs))
+    dys = list(dys)
+    assert len(fn_outputs) == len(outputs)
+    assert len(fn_outputs) == len(dys)
+
+    grad_inputs, grad_vars = grad_fn(fn_inputs, fn_vars, fn_outputs, dys)
+    grad_outputs = [None] * len(fn_outputs)
+    return tuple(grad_inputs + grad_vars + grad_outputs)
+
+  # The Defun takes as input the original inputs, the trainable variables
+  # created in fn, and the outputs. In the forward it passes through the
+  # outputs. In the backwards, it produces gradients for the original inputs
+  # and the trainable variables.
+  in_types = [t.dtype for t in inputs]
+  out_types = [t.dtype for t in outputs]
+  var_types = [t.dtype for t in train_vars]
+
+  # Get a unique name for the Defun
+  with framework_ops.name_scope("identity_custom_grad") as ns:
+    defun_name = ns
+
+  @function.Defun(
+      *(in_types + var_types + out_types),
+      func_name=defun_name,
+      python_grad_func=custom_grad_fn,
+      shape_func=lambda _: [t.get_shape() for t in outputs])
+  def identity(*args):
+    _, _, outs = nest.pack_sequence_as(defun_inputs, args)
+    return tuple([array_ops.identity(t) for t in outs])
+
+  flat_inputs = nest.flatten(defun_inputs)
+  id_out = identity(*flat_inputs)
+  return id_out
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
new file mode 100644
index 0000000000..a420753fd5
--- /dev/null
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
@@ -0,0 +1,331 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for RevBlock."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.layers.python.layers import layers
+from tensorflow.contrib.layers.python.layers import rev_block_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import random_seed
+from tensorflow.python.layers import convolutional
+from tensorflow.python.layers import core as core_layers
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class RevBlockTest(test.TestCase):
+  CHANNELS = 8
+  NUM_LAYERS = 4
+  BATCH_SIZE = 16
+
+  def testForwardBackward(self):
+
+    def f(x):
+      return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)
+
+    def g(x):
+      return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)
+
+    x = random_ops.random_uniform(
+        [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32)
+    x1, x2 = array_ops.split(x, 2, axis=-1)
+
+    block = rev_block_lib.RevBlock(f, g, num_layers=3)
+    y1, y2 = block.forward(x1, x2)
+    x1_inv, x2_inv = block.backward(y1, y2)
+
+    with self.test_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      x1, x2, x1_inv, x2_inv = sess.run([x1, x2, x1_inv, x2_inv])
+
+      self.assertAllClose(x1, x1_inv)
+      self.assertAllClose(x2, x2_inv)
+
+  def testBackwardForward(self):
+
+    def f(x):
+      return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)
+
+    def g(x):
+      return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)
+
+    y = random_ops.random_uniform(
+        [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32)
+    y1, y2 = array_ops.split(y, 2, axis=-1)
+
+    block = rev_block_lib.RevBlock(f, g, num_layers=3)
+    x1, x2 = block.backward(y1, y2)
+    y1_inv, y2_inv = block.forward(x1, x2)
+
+    with self.test_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      y1, y2, y1_inv, y2_inv = sess.run([y1, y2, y1_inv, y2_inv])
+
+      self.assertAllClose(y1, y1_inv)
+      self.assertAllClose(y2, y2_inv)
+
+  def _testRevBlock(self,
+                    x=None,
+                    f=None,
+                    g=None,
+                    f_side_input=None,
+                    g_side_input=None):
+    random_seed.set_random_seed(1234)
+
+    if f is None:
+
+      def f(x):  # pylint: disable=function-redefined
+        return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)
+
+    if g is None:
+
+      def g(x):  # pylint: disable=function-redefined
+        return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)
+
+    if f_side_input is None:
+      f_side_input = []
+
+    if g_side_input is None:
+      g_side_input = []
+
+    if x is None:
+      x = random_ops.random_uniform(
+          [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32)
+    x1, x2 = array_ops.split(x, 2, axis=-1)
+
+    with variable_scope.variable_scope("rev_test") as vs:
+      y1_rev, y2_rev = rev_block_lib.rev_block(
+          x1,
+          x2,
+          f,
+          g,
+          f_side_input=f_side_input,
+          g_side_input=g_side_input,
+          num_layers=self.NUM_LAYERS)
+      y_rev = array_ops.concat([y1_rev, y2_rev], axis=1)
+      fg_vars = vs.trainable_variables()
+
+    num_vars = len(variables.global_variables())
+    with variable_scope.variable_scope(vs, reuse=True):
+      y1, y2 = rev_block_lib.rev_block(
+          x1,
+          x2,
+          f,
+          g,
+          f_side_input=f_side_input,
+          g_side_input=g_side_input,
+          num_layers=self.NUM_LAYERS,
+          is_training=False)
+      y = array_ops.concat([y1, y2], axis=1)
+    # Ensure no new vars were created - full reuse
+    assert len(variables.global_variables()) == num_vars
+
+    loss_rev = math_ops.reduce_mean(y_rev + 10.)
+    loss = math_ops.reduce_mean(y + 10.)
+
+    wrt = [x] + f_side_input + g_side_input + fg_vars
+    grads_rev = gradients_impl.gradients(loss_rev, wrt)
+    grads = gradients_impl.gradients(loss, wrt)
+
+    with self.test_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      y_val, yd_val, gd_val, g_val = sess.run([y, y_rev, grads_rev, grads])
+      self.assertAllClose(y_val, yd_val)
+      for g1, g2 in zip(gd_val, g_val):
+        self.assertAllClose(g1, g2)
+
+  def testRevBlock(self):
+    self._testRevBlock()
+
+  def testSideInput(self):
+    f_side_input = random_ops.random_uniform(
+        [self.BATCH_SIZE, self.CHANNELS // 2])
+
+    def f(x, side_input):
+      return core_layers.dense(
+          x, self.CHANNELS // 2, use_bias=True) + side_input[0]
+
+    self._testRevBlock(f=f, f_side_input=[f_side_input])
+
+  def testMultipleFns(self):
+
+    def f1(x):
+      return core_layers.dense(x, self.CHANNELS // 2)
+
+    def f2(x):
+      return core_layers.dense(x, self.CHANNELS // 2, activation=nn_ops.relu)
+
+    self._testRevBlock(f=[f1, f2, f1, f2])
+
+  # TODO(rsepassi): Recent change to conv seems to have broken this test. Find
+  # out why.
+  def _testConvAndBatchNorm(self):
+
+    x = random_ops.random_uniform(
+        [self.BATCH_SIZE, 10, self.CHANNELS], dtype=dtypes.float32)
+
+    def f(x):
+      x = convolutional.conv1d(x, self.CHANNELS // 2, 3, padding="same")
+      x = core_layers.batch_normalization(x, training=True)
+      x = convolutional.conv1d(x, self.CHANNELS // 2, 3, padding="same")
+      x = core_layers.batch_normalization(x, training=True)
+      return x
+
+    self._testRevBlock(x=x, f=f)
+
+
+class RecomputeTest(test.TestCase):
+
+  def testRecompute(self):
+
+    def layer(x, name=None):
+      with variable_scope.variable_scope(name, default_name="layer"):
+        x = layers.layer_norm(x)
+        x = convolutional.conv1d(
+            x,
+            10,
+            1,
+            use_bias=False,
+            kernel_initializer=init_ops.constant_initializer(42.42))
+        x = nn_ops.relu(x)
+        return x
+
+    def fn(x):
+      out = x
+      for _ in range(3):
+        out = layer(out)
+      return out
+
+    @rev_block_lib.recompute_grad
+    def fn_recompute(x):
+      return fn(x)
+
+    x = random_ops.random_uniform((3, 1, 3))
+    recompute_vars = None
+    with variable_scope.variable_scope("recompute") as vs:
+      out1 = math_ops.reduce_sum(fn_recompute(x))
+      recompute_vars = vs.trainable_variables()
+    reg_vars = None
+    with variable_scope.variable_scope("regular") as vs:
+      out2 = math_ops.reduce_sum(fn(x))
+      reg_vars = vs.trainable_variables()
+
+    grad1 = gradients_impl.gradients(out1, recompute_vars)
+    grad2 = gradients_impl.gradients(out2, reg_vars)
+
+    with self.test_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      outs = sess.run([out1, out2, grad1, grad2])
+      self.assertAllClose(outs[0], outs[1])
+      for g1, g2 in zip(outs[2], outs[3]):
+        self.assertAllClose(g1, g2)
+
+
+class FnWithCustomGradTest(test.TestCase):
+
+  def testCorrectness(self):
+
+    w = random_ops.random_uniform([6, 10])
+
+    def fn(a, b, c):
+      return core_layers.dense(
+          a,
+          10,
+          use_bias=False,
+          kernel_initializer=lambda shape, dtype, partition_info: w
+      ) + math_ops.matmul(b, c)
+
+    def grad_fn(inputs, trainable_variables, outputs, grad_outputs):
+      outputs = outputs[0]
+      grad_outputs = grad_outputs[0]
+      grad_inputs = gradients_impl.gradients(
+          outputs, inputs, grad_ys=grad_outputs)
+      grad_vars = gradients_impl.gradients(
+          outputs, trainable_variables, grad_ys=grad_outputs)
+      return grad_inputs, grad_vars
+
+    custom_fn = rev_block_lib._fn_with_custom_grad(grad_fn)(fn)
+
+    a = random_ops.random_uniform([11, 6])
+    b = random_ops.random_uniform([11, 7])
+    c = random_ops.random_uniform([7, 10])
+
+    out = fn(a, b, c)
+    custom_out = custom_fn(a, b, c)
+    self.assertEqual(out.get_shape().as_list(),
+                     custom_out.get_shape().as_list())
+
+    loss = math_ops.reduce_mean(out)
+    custom_loss = math_ops.reduce_mean(custom_out)
+
+    grads = gradients_impl.gradients(
+        loss, [a, b, c] + [variables.trainable_variables()[0]])
+    custom_grads = gradients_impl.gradients(
+        custom_loss, [a, b, c] + [variables.trainable_variables()[1]])
+
+    with self.test_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      out_val, custom_out_val, grads_val, custom_grads_val = sess.run(
+          [out, custom_out, grads, custom_grads])
+      self.assertAllClose(out_val, custom_out_val)
+      for g1, g2 in zip(grads_val, custom_grads_val):
+        self.assertAllClose(g1, g2)
+
+  def testCustomGrad(self):
+
+    def fn(a, b, c):
+      return core_layers.dense(a, 10, use_bias=False) + math_ops.matmul(b, c)
+
+    def grad_fn(inputs, trainable_variables, unused_outputs,
+                unused_grad_outputs):
+      grad_inputs = [
+          array_ops.ones_like(t) * (i + 1.) for i, t in enumerate(inputs)
+      ]
+      grad_vars = [
+          array_ops.ones_like(t) * (i + len(inputs) + 1.)
+          for i, t in enumerate(trainable_variables)
+      ]
+      return grad_inputs, grad_vars
+
+    a = random_ops.random_uniform([11, 6])
+    b = random_ops.random_uniform([11, 7])
+    c = random_ops.random_uniform([7, 10])
+    w = random_ops.random_uniform([6, 10])
+    out = rev_block_lib._fn_with_custom_grad(grad_fn)(fn)(a, b, c)
+    loss = math_ops.reduce_mean(out)
+    grads = gradients_impl.gradients(
+        loss, [a, b, c, variables.trainable_variables()[0]])
+    expected_grads = [
+        array_ops.ones_like(t) * (i + 1.) for i, t in enumerate([a, b, c, w])
+    ]
+    with self.test_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      g_val, eg_val = sess.run([grads, expected_grads])
+      for g1, g2 in zip(g_val, eg_val):
+        self.assertAllClose(g1, g2)
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From 10d3ba2cf54710d0fd43a8c5723101a06f25f915 Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Tue, 28 Nov 2017 11:05:24 -0800
Subject: [PATCH 0349/1225] Run replicate_model_fn_test on the multi-gpu
 testing cluster.

PiperOrigin-RevId: 177187767
---
 tensorflow/contrib/estimator/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index 197cf7e56f..8395e2db5e 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -374,4 +374,5 @@ cuda_py_test(
         "//tensorflow/python:variables",
         ":replicate_model_fn",
     ],
+    tags = ["multi_gpu"],
 )
-- 
GitLab


From 3527d477d6af99bc664785fd81d896605ebf4d48 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Tue, 28 Nov 2017 11:28:54 -0800
Subject: [PATCH 0350/1225] Automated g4 rollback of changelist 176737730

PiperOrigin-RevId: 177191521
---
 tensorflow/core/kernels/strided_slice_op.cc        | 1 -
 tensorflow/core/kernels/strided_slice_op_gpu.cu.cc | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 73b6d4cf6a..8fc40db3cc 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -427,7 +427,6 @@ REGISTER_STRIDED_SLICE(bfloat16);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
-TF_CALL_int64(REGISTER_GPU);
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
index 8ca27e3b92..a8487f49f4 100644
--- a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
@@ -53,7 +53,6 @@ typedef Eigen::GpuDevice GPUDevice;
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
-TF_CALL_int64(DEFINE_GPU_KERNELS);
 DEFINE_GPU_KERNELS(int32);
 
 #undef DEFINE_GPU_KERNELS
-- 
GitLab


From bc8718b090c565c6562dce098d16cdadffc6a213 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 11:30:15 -0800
Subject: [PATCH 0351/1225] Removed unused variables from
 curl_http_request_test.

PiperOrigin-RevId: 177191730
---
 tensorflow/core/platform/cloud/curl_http_request_test.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc
index 6c0f081852..d476a1a4db 100644
--- a/tensorflow/core/platform/cloud/curl_http_request_test.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc
@@ -263,7 +263,6 @@ TEST(CurlHttpRequestTest, GetRequest) {
 
   std::vector<char> scratch;
   scratch.insert(scratch.begin(), kTestContent.begin(), kTestContent.end());
-  StringPiece result;
   scratch.reserve(100);
 
   TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
@@ -594,7 +593,6 @@ TEST(CurlHttpRequestTest, ErrorReturnsNoResponse) {
 
   std::vector<char> scratch;
   scratch.insert(scratch.begin(), kTestContent.begin(), kTestContent.end());
-  StringPiece result;
   scratch.reserve(100);
 
   TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-- 
GitLab


From cbd31dd4d30663344d0d15d8897d8ce652cf6294 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Tue, 28 Nov 2017 11:47:47 -0800
Subject: [PATCH 0352/1225] Eager function definition no longer adds control
 dependencies after constructing nodes.

PiperOrigin-RevId: 177194441
---
 tensorflow/python/eager/function.py       | 59 ++++++++++++++---------
 tensorflow/python/eager/graph_callable.py |  1 +
 2 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 9bcd9c23c7..2f4b59e938 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -30,7 +30,7 @@ from tensorflow.python.eager import execute
 from tensorflow.python.eager import tape
 from tensorflow.python.eager.graph_only_ops import graph_placeholder
 from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import dtypes as dtypes_module
 from tensorflow.python.framework import graph_to_function_def
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gradients_impl
@@ -48,23 +48,7 @@ _scoped_captures.tensors = None
 
 
 def make_function_def(graph, operations, inputs, outputs):
-  """Makes function def where accesses to resources are serialized."""
-  last_op_using_resource_tensor = {}
-
-  # TODO(apassos) probably control flow has to be handled delicately here as in
-  # if a resource is accessed inside a control flow context we need the control
-  # dependency to point to something outside the context which is guaranteed to
-  # happen after the access.
-  #
-  # TODO(apassos) this should do some form of alias analysis as ops which
-  # forward the resources such as Identity and Switch can cause serialization to
-  # fail.
-  for op in operations:
-    for t in op.inputs:
-      if t.dtype == dtypes.resource:
-        if t.name in last_op_using_resource_tensor:
-          op._add_control_input(last_op_using_resource_tensor[t.name])  # pylint: disable=protected-access
-        last_op_using_resource_tensor[t.name] = op
+  """Makes function def from the given graph with the operations."""
   return graph_to_function_def.graph_to_function_def(
       graph, operations, inputs, outputs)
 
@@ -85,7 +69,7 @@ def capture_value(tensor_map, value, dtype, name):
   if captured_value is None:
     captured_value = graph_placeholder(
         dtype=dtype or value.dtype, shape=value.shape, name=name)
-    if captured_value.dtype == dtypes.resource:
+    if captured_value.dtype == dtypes_module.resource:
       captured_value._handle_data = value._handle_data  # pylint: disable=protected-access
     tensor_map[ops.tensor_id(value)] = (value, captured_value)
   else:
@@ -120,11 +104,19 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False):
 
 
 class CapturingGraph(ops.Graph):
+  """Graph used when constructing eager functions."""
 
   def __init__(self, captures):
     super(CapturingGraph, self).__init__()
     self._building_function = True
     self.captures = captures
+    # Map from resource tensor name to last op (in program order) which uses
+    # this tensor. Used to enforce that execution order matches program order
+    # for resource tensors.
+    self._last_op_using_resource_tensor = {}
+
+  def clear_resource_control_flow_state(self):
+    self._last_op_using_resource_tensor = {}
 
   def create_op(
       self,
@@ -137,12 +129,31 @@ class CapturingGraph(ops.Graph):
       op_def=None,
       compute_shapes=True,
       compute_device=True):
+    # TODO(apassos) probably control flow has to be handled delicately here as
+    # in if a resource is accessed inside a control flow context we need the
+    # control dependency to point to something outside the context which is
+    # guaranteed to happen after the access.
+    #
+    # TODO(apassos) this should do some form of alias analysis as ops which
+    # forward the resources such as Identity and Switch can cause serialization
+    # to fail.
+    resource_inputs = set()
+    control_inputs = set()
     for i, inp in enumerate(inputs):
       if inp.graph is not self:
         inputs[i] = capture_value(self.captures, inp, inp.dtype, inp.op.name)
-    return super(CapturingGraph, self).create_op(
-        op_type, inputs, dtypes, input_types, name, attrs, op_def,
-        compute_shapes, compute_device)
+      inp = inputs[i]
+      if inp.dtype == dtypes_module.resource:
+        if inp.name in self._last_op_using_resource_tensor:
+          control_inputs.add(self._last_op_using_resource_tensor[inp.name])
+        resource_inputs.add(inp.name)
+    with self.control_dependencies(list(control_inputs)):
+      op = super(CapturingGraph, self).create_op(
+          op_type, inputs, dtypes, input_types, name, attrs, op_def,
+          compute_shapes, compute_device)
+    for name in resource_inputs:
+      self._last_op_using_resource_tensor[name] = op
+    return op
 
 
 # TODO(apassos): it'd be really nice if we could scope this registration.
@@ -314,7 +325,7 @@ class GraphModeFunction(object):
         return ops.internal_convert_to_tensor(x, ctx=ctx)
       op = g.create_op(
           signature.name, [make_tensor(x) for x in all_args],
-          [dtypes.DType(x.type) for x in signature.output_arg],
+          [dtypes_module.DType(x.type) for x in signature.output_arg],
           op_def=signature,
           name="FunctionCall",
           compute_shapes=False)
@@ -373,7 +384,7 @@ class GraphModeFunction(object):
       args = list(tensor_inputs) + self._extra_inputs
       op = g.create_op(
           signature.name, [ops.convert_to_tensor(x) for x in args],
-          [dtypes.DType(x.type) for x in signature.output_arg],
+          [dtypes_module.DType(x.type) for x in signature.output_arg],
           op_def=signature,
           name="FunctionCall",
           compute_shapes=False)
diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py
index 837a75c808..faf0ac88bc 100644
--- a/tensorflow/python/eager/graph_callable.py
+++ b/tensorflow/python/eager/graph_callable.py
@@ -296,6 +296,7 @@ def _graph_callable_internal(func, shape_and_dtypes):
       # Call the function again, now replacing usages of variables with
       # placeholders. This assumes the variable capturing scope created above
       # knows about all variables.
+      tmp_graph.clear_resource_control_flow_state()
       with variable_captures.capturing_scope(), function.capture_tensors(
           captures):
         captured_outputs = func(*func_inputs)
-- 
GitLab


From 570d2772796bd642dc4808bd869e293f74553620 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 12:09:49 -0800
Subject: [PATCH 0353/1225] Bump LLVM snapshot to r319150.

PiperOrigin-RevId: 177197719
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index dd5dc37a87..cb77f96be5 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -578,11 +578,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   temp_workaround_http_archive(
       name = "llvm",
       urls = [
-          "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8d26b8bee4d8e7230870a600bc968c7ee8cf6f67.tar.gz",
-          "https://github.com/llvm-mirror/llvm/archive/8d26b8bee4d8e7230870a600bc968c7ee8cf6f67.tar.gz",
+          "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/9ab4c272cb604a7f947865428c4ef2169fee2100.tar.gz",
+          "https://github.com/llvm-mirror/llvm/archive/9ab4c272cb604a7f947865428c4ef2169fee2100.tar.gz",
       ],
-      sha256 = "ff5ddbe5af5e264426c8d489e7fddfc5ad7e0975f19cefe9db8c0a5d0faeb23e",
-      strip_prefix = "llvm-8d26b8bee4d8e7230870a600bc968c7ee8cf6f67",
+      sha256 = "1b1b7d3800a94ca2302e3dd670dbe84238749583027883784b55297059d83da8",
+      strip_prefix = "llvm-9ab4c272cb604a7f947865428c4ef2169fee2100",
       build_file = str(Label("//third_party/llvm:llvm.BUILD")),
       repository = tf_repo_name,
   )
-- 
GitLab


From ba87a8030aa30f24c354cf705e79734658bb0a8b Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Tue, 28 Nov 2017 12:17:52 -0800
Subject: [PATCH 0354/1225] Eager: Better errors for invalid options to
 enable_eager_execution.

Fixes #14739

PiperOrigin-RevId: 177198861
---
 tensorflow/python/framework/ops.py      | 11 +++++++++++
 tensorflow/python/framework/ops_test.py |  7 +++++++
 2 files changed, 18 insertions(+)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 60df8f82f0..cfef5e35f4 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -35,6 +35,7 @@ from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
 from tensorflow.core.framework import op_def_pb2
 from tensorflow.core.framework import versions_pb2
+from tensorflow.core.protobuf import config_pb2
 from tensorflow.python import pywrap_tensorflow as c_api
 from tensorflow.python.eager import context
 from tensorflow.python.eager import core
@@ -4794,6 +4795,16 @@ def enable_eager_execution(config=None, device_policy=None):
      or if trying to create a context with nontrivial options which differ
      from those of the existing context.
   """
+  if config is not None and not isinstance(config, config_pb2.ConfigProto):
+    raise TypeError(
+        "config must be a tf.ConfigProto, but got %s" % type(config))
+  if device_policy not in (None, context.DEVICE_PLACEMENT_EXPLICIT,
+                           context.DEVICE_PLACEMENT_WARN,
+                           context.DEVICE_PLACEMENT_SILENT):
+    raise ValueError(
+        "device_policy must be one of None, tfe.DEVICE_PLACEMENT_EXPLICIT, "
+        "tfe.DEVICE_PLACEMENT_WARN, tfe.DEVICE_PLACEMENT_SILENT"
+    )
   # pylint: disable=protected-access
   if context._default_mode == context.GRAPH_MODE:
     graph_mode_has_been_used = (
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index cd296ccdc5..e929cc8abf 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -2395,6 +2395,13 @@ class InputTypesTest(test_util.TensorFlowTestCase):
       self.assertEqual([dtypes.double, dtypes.double], z.op._input_dtypes)
       # pylint: enable=protected-access
 
+  def testBadArgumentsToEnableEagerExecution(self):
+    with self.assertRaisesRegexp(TypeError, "config must be a tf.ConfigProto"):
+      ops.enable_eager_execution(context.DEVICE_PLACEMENT_SILENT)
+    with self.assertRaisesRegexp(ValueError, "device_policy must be one of"):
+      c = config_pb2.ConfigProto()
+      ops.enable_eager_execution(c, c)
+
 
 if __name__ == "__main__":
   googletest.main()
-- 
GitLab


From b911049edfbb4a4eb07b3b46ed144da6cd33f9c1 Mon Sep 17 00:00:00 2001
From: Yilei Yang <yileiyang@google.com>
Date: Tue, 28 Nov 2017 12:26:12 -0800
Subject: [PATCH 0355/1225] Continue to allow old argument names specified in
 tf.flags.DEFINE functions.

There are more DEFINE functions in absl.flags, they only accept the absl names.

PiperOrigin-RevId: 177199982
---
 tensorflow/python/platform/flags.py      | 48 ++++++++++++++++++++++++
 tensorflow/python/platform/flags_test.py | 41 +++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/platform/flags.py b/tensorflow/python/platform/flags.py
index e9a36ae75d..abd6f3d855 100644
--- a/tensorflow/python/platform/flags.py
+++ b/tensorflow/python/platform/flags.py
@@ -18,5 +18,53 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import logging as _logging
+
 # go/tf-wildcard-import
 from absl.flags import *  # pylint: disable=wildcard-import
+import six as _six
+
+from tensorflow.python.util import tf_decorator
+
+
+# Since we wrap absl.flags DEFINE functions, we need to declare this module
+# does not affect key flags.
+disclaim_key_flags()  # pylint: disable=undefined-variable
+
+
+_RENAMED_ARGUMENTS = {
+    'flag_name': 'name',
+    'default_value': 'default',
+    'docstring': 'help',
+}
+
+
+def _wrap_define_function(original_function):
+  """Wraps absl.flags's define functions so tf.flags accepts old names."""
+
+  def wrapper(*args, **kwargs):
+    """Wrapper function that turns old keyword names to new ones."""
+    has_old_names = False
+    for old_name, new_name in _six.iteritems(_RENAMED_ARGUMENTS):
+      if old_name in kwargs:
+        has_old_names = True
+        value = kwargs.pop(old_name)
+        kwargs[new_name] = value
+    if has_old_names:
+      _logging.warning(
+          'Use of the keyword argument names (flag_name, default_value, '
+          'docstring) is deprecated, please use (name, default, help) instead.')
+    return original_function(*args, **kwargs)
+
+  return tf_decorator.make_decorator(original_function, wrapper)
+
+
+# pylint: disable=invalid-name,used-before-assignment
+# absl.flags APIs use `default` as the name of the default value argument.
+# Allow the following functions continue to accept `default_value`.
+DEFINE_string = _wrap_define_function(DEFINE_string)
+DEFINE_boolean = _wrap_define_function(DEFINE_boolean)
+DEFINE_bool = DEFINE_boolean
+DEFINE_float = _wrap_define_function(DEFINE_float)
+DEFINE_integer = _wrap_define_function(DEFINE_integer)
+# pylint: enable=invalid-name,used-before-assignment
diff --git a/tensorflow/python/platform/flags_test.py b/tensorflow/python/platform/flags_test.py
index 23060e17d2..e8200142dd 100644
--- a/tensorflow/python/platform/flags_test.py
+++ b/tensorflow/python/platform/flags_test.py
@@ -24,11 +24,50 @@ from absl import flags as absl_flags
 from tensorflow.python.platform import flags
 
 
+flags.DEFINE_string(
+    flag_name='old_string', default_value='default', docstring='docstring')
+flags.DEFINE_string(
+    name='new_string', default='default', help='docstring')
+flags.DEFINE_integer(
+    flag_name='old_integer', default_value=1, docstring='docstring')
+flags.DEFINE_integer(
+    name='new_integer', default=1, help='docstring')
+flags.DEFINE_float(
+    flag_name='old_float', default_value=1.5, docstring='docstring')
+flags.DEFINE_float(
+    name='new_float', default=1.5, help='docstring')
+flags.DEFINE_bool(
+    flag_name='old_bool', default_value=True, docstring='docstring')
+flags.DEFINE_bool(
+    name='new_bool', default=True, help='docstring')
+flags.DEFINE_boolean(
+    flag_name='old_boolean', default_value=False, docstring='docstring')
+flags.DEFINE_boolean(
+    name='new_boolean', default=False, help='docstring')
+
+
 class FlagsTest(unittest.TestCase):
 
   def test_global_flags_object(self):
     self.assertIs(flags.FLAGS, absl_flags.FLAGS)
 
+  def test_keyword_arguments(self):
+    test_cases = (
+        ('old_string', 'default'),
+        ('new_string', 'default'),
+        ('old_integer', 1),
+        ('new_integer', 1),
+        ('old_float', 1.5),
+        ('new_float', 1.5),
+        ('old_bool', True),
+        ('new_bool', True),
+        ('old_boolean', False),
+        ('new_boolean', False),
+    )
+    for flag_name, default_value in test_cases:
+      self.assertEqual(default_value, absl_flags.FLAGS[flag_name].default)
+      self.assertEqual('docstring', absl_flags.FLAGS[flag_name].help)
+
 
-if __name__ == "__main__":
+if __name__ == '__main__':
   unittest.main()
-- 
GitLab


From a86f2d2c1af7ac0e5c36eedb18d74c022737fc25 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Tue, 28 Nov 2017 12:49:38 -0800
Subject: [PATCH 0356/1225] Added an option to assume that the shape of fed
 nodes in unknown since any shape can be actually used.

PiperOrigin-RevId: 177203023
---
 tensorflow/core/grappler/costs/BUILD          |  1 +
 .../core/grappler/costs/graph_properties.cc   | 74 ++++++++++++++---
 .../core/grappler/costs/graph_properties.h    | 37 ++++++---
 .../grappler/costs/graph_properties_test.cc   | 82 +++++++++++++++----
 .../core/grappler/costs/virtual_scheduler.cc  |  2 +-
 .../optimizers/arithmetic_optimizer.cc        |  2 +-
 .../grappler/optimizers/constant_folding.cc   |  2 +-
 .../grappler/optimizers/layout_optimizer.cc   |  2 +-
 .../grappler/optimizers/memory_optimizer.cc   |  2 +-
 .../grappler/optimizers/static_schedule.cc    |  4 +-
 tensorflow/python/grappler/item.i             |  2 +-
 tensorflow/python/grappler/model_analyzer.cc  |  2 +-
 12 files changed, 164 insertions(+), 48 deletions(-)

diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD
index f02cb51038..f1edbbb602 100644
--- a/tensorflow/core/grappler/costs/BUILD
+++ b/tensorflow/core/grappler/costs/BUILD
@@ -50,6 +50,7 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/clusters:cluster",
     ],
 )
diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index dd389de636..fb7e20fca0 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/grappler/costs/utils.h"
+#include "tensorflow/core/grappler/utils.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -316,7 +317,11 @@ class SymbolicShapeRefiner {
                   shape_inference::ShapeHandle shape) {
     return shape_refiner_->SetShape(node, output_port, shape);
   }
-
+  Status SetUnknownShape(const Node* node, int output_port) {
+    shape_inference::ShapeHandle shape =
+        GetUnknownOutputShape(node, output_port);
+    return shape_refiner_->SetShape(node, output_port, shape);
+  }
   struct ShapeId {
     const Node* node;
     int port_id;
@@ -646,6 +651,23 @@ Status GraphProperties::UpdateMergeNode(SymbolicShapeRefiner* shape_refiner,
   return Status::OK();
 }
 
+Status GraphProperties::OverwriteFedPorts(
+    SymbolicShapeRefiner* shape_refiner,
+    const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
+    const Node* node, TopoQueue* new_shapes) const {
+  auto it = fed_ports.find(node->name());
+  Status status;
+  if (it != fed_ports.end()) {
+    // It is possible to feed node output ports with tensors of any shape: as a
+    // result, the shape of a fed port is completely unknown.
+    for (const int output_port : it->second) {
+      status.Update(shape_refiner->SetUnknownShape(node, output_port));
+    }
+    new_shapes->push(node);
+  }
+  return status;
+}
+
 // Manually propagate the input shape for Enter nodes and update any Merge node
 // outputs.
 Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner,
@@ -673,9 +695,10 @@ Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner,
   return Status::OK();
 }
 
-Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner,
-                                     bool relax, const Node* n,
-                                     TopoQueue* new_shapes) {
+Status GraphProperties::UpdateShapes(
+    SymbolicShapeRefiner* shape_refiner, bool relax,
+    const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
+    const Node* n, TopoQueue* new_shapes) const {
   if (n->IsEnter()) {
     // The Enter shape function always forwards an UnknownShape, so do the right
     // thing here.
@@ -695,7 +718,9 @@ Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner,
       }
     }
   }
-  return Status::OK();
+  // Nodes can be fed with any shape. The TensorFlow shape inference code can't
+  // handle this properly, so overwrite its behavior here.
+  return OverwriteFedPorts(shape_refiner, fed_ports, n, new_shapes);
 }
 
 // Propagates the shapes in the transitive fan-out of <new_shapes>.
@@ -703,6 +728,7 @@ Status GraphProperties::PropagateShapes(
     SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes,
     const std::unordered_map<const Node*, std::unordered_set<const Node*>>&
         resources,
+    const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
     int num_loops) const {
   // Limit the number of iterations to prevent infinite loops in the presence of
   // incorrect shape functions. The algoritm should converge in at most
@@ -728,8 +754,8 @@ Status GraphProperties::PropagateShapes(
       for (const Edge* e : n->out_edges()) {
         if (!e->IsControlEdge()) {
           const Node* fanout = e->dst();
-          TF_RETURN_IF_ERROR(
-              UpdateShapes(shape_refiner, relax, fanout, new_shapes));
+          TF_RETURN_IF_ERROR(UpdateShapes(shape_refiner, relax, fed_ports,
+                                          fanout, new_shapes));
         }
       }
     }
@@ -803,7 +829,7 @@ Status GraphProperties::UpdateResource(
   return Status::OK();
 }
 
-Status GraphProperties::InferStatically() {
+Status GraphProperties::InferStatically(bool assume_valid_feeds) {
   Graph graph(OpRegistry::Global());
   FunctionLibraryDefinition function_library(graph.op_registry(),
                                              item_.graph.library());
@@ -820,11 +846,21 @@ Status GraphProperties::InferStatically() {
   Status s = ImportGraphDef(options, item_.graph, &graph, &shape_refiner);
   TF_RETURN_IF_ERROR(s);
 
+  std::unordered_map<string, std::unordered_set<int>> fed_ports;
+  if (!assume_valid_feeds) {
+    for (const auto& feed : item_.feed) {
+      int port_index = 0;
+      string node_name = ParseNodeName(feed.first, &port_index);
+      fed_ports[node_name].insert(port_index);
+    }
+  }
+
   // List the resources and the nodes using them. Also collect the Enter and
   // Merge nodes.
   std::unordered_map<const Node*, std::unordered_set<const Node*>> resources;
   std::unordered_set<const Node*> enter_nodes;
   std::unordered_set<const Node*> merge_nodes;
+  std::unordered_set<const Node*> fed_nodes;
   int num_loops = 0;
   for (const Node* const node : graph.nodes()) {
     for (int i = 0; i < node->num_inputs(); ++i) {
@@ -841,6 +877,9 @@ Status GraphProperties::InferStatically() {
     } else if (node->IsNextIteration()) {
       ++num_loops;
     }
+    if (fed_ports.find(node->name()) != fed_ports.end()) {
+      fed_nodes.insert(node);
+    }
   }
 
   SymbolicShapeRefiner refiner(&shape_refiner);
@@ -855,15 +894,22 @@ Status GraphProperties::InferStatically() {
     // Force the propagation of shapes of Enter nodes manually (the Enter shape
     // function always forwards an UnknownShape).
     for (const Node* node : enter_nodes) {
-      TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes));
+      TF_RETURN_IF_ERROR(
+          UpdateShapes(&refiner, relax, fed_ports, node, &new_shapes));
     }
     // Seed the propagation of shapes through merge nodes.
     for (const Node* node : merge_nodes) {
-      TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes));
+      TF_RETURN_IF_ERROR(
+          UpdateShapes(&refiner, relax, fed_ports, node, &new_shapes));
+    }
+    // Also seed the propagation of shapes in the fanout of fed nodes.
+    for (const Node* node : fed_nodes) {
+      TF_RETURN_IF_ERROR(
+          OverwriteFedPorts(&refiner, fed_ports, node, &new_shapes));
     }
     // Propagate shapes normally.
-    TF_RETURN_IF_ERROR(
-        PropagateShapes(&refiner, relax, &new_shapes, resources, num_loops));
+    TF_RETURN_IF_ERROR(PropagateShapes(&refiner, relax, &new_shapes, resources,
+                                       fed_ports, num_loops));
   }
 
   // Track shapes globally across the graph.
@@ -874,6 +920,10 @@ Status GraphProperties::InferStatically() {
     if (!node_ctx) {
       continue;
     }
+    // Skip any information that comes from fed nodes.
+    if (fed_ports.find(node->name()) != fed_ports.end()) {
+      continue;
+    }
     for (const auto& merged_shapes : node_ctx->MergedShapes()) {
       if (!shape_manager.Merge(merged_shapes.first, merged_shapes.second)
                .ok()) {
diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index 95bc5044d0..6fc53a7f2e 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -34,12 +34,19 @@ class TopoQueue;
 // nodes, and potentially a set of nodes to feed.
 class GraphProperties {
  public:
-  // Factory method for creating a GrapplerShapes from a MetaGraphDef.
-  // Returns nullptr if the given meta_graph cannot be converted.
   explicit GraphProperties(const GrapplerItem& item) : item_(item) {}
 
-  Status InferStatically();
+  // Infer the shapes through abstract interpretation. Feed information can be
+  // incorrect so it should be discarded to ensure correctness of the analysis.
+  // However, it can help infer shapes in the fanout of fed nodes (even though
+  // the correctness of these shapes can't be guaranteed), so in some cases
+  // (such as simulation or scheduling) it makes sense of keep these shapes.
+  Status InferStatically(bool assume_valid_feeds);
+  // Infer the shape by running the graph on the specified cluster and recording
+  // the shapes of the processed tensors.
   Status InferDynamically(Cluster* cluster);
+  // Extract the properties from a cost graph. For testing only since there is
+  // no way to ensure that the cost graph match the item.
   Status InferFromCostGraph(const CostGraphDef& cost_graph);
 
   // Stores `item_.graph` with the inferred output shapes to `output_graph_def`.
@@ -65,12 +72,6 @@ class GraphProperties {
       OpInfo::TensorProperties*);
 
  private:
-  // Inputs
-  GrapplerItem item_;
-  std::map<string, std::vector<OpInfo::TensorProperties>> input_properties_;
-  std::map<string, std::vector<OpInfo::TensorProperties>> output_properties_;
-  const std::vector<OpInfo::TensorProperties> missing_properties_;
-
   // Merges shapes <shapes_and_types>, determined from an EnqueueV2 node, into
   // <*queue_shapes_and_types>.
   static Status MergeEnqueueShapesAndTypes(
@@ -99,17 +100,31 @@ class GraphProperties {
   static Status UpdateEnter(SymbolicShapeRefiner* shape_refiner,
                             const Node* node, bool relax,
                             TopoQueue* new_shapes);
+  // Process a node that is used to feed the model.
+  Status OverwriteFedPorts(
+      SymbolicShapeRefiner* shape_refiner,
+      const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
+      const Node* node, TopoQueue* new_shapes) const;
   // Update the shapes for node 'n'. If output shapes for n have changed,
   // enqueue its fanout in 'new_shapes'.
-  static Status UpdateShapes(SymbolicShapeRefiner* shape_refiner, bool relax,
-                             const Node* n, TopoQueue* new_shapes);
+  Status UpdateShapes(
+      SymbolicShapeRefiner* shape_refiner, bool relax,
+      const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
+      const Node* n, TopoQueue* new_shapes) const;
   // Propagate the shapes for the nodes enqueued in new_shapes and their
   // transitive fanout until a fixed point is reached.
   Status PropagateShapes(
       SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes,
       const std::unordered_map<const Node*, std::unordered_set<const Node*>>&
           resources,
+      const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
       int num_loops) const;
+
+  // Data members
+  GrapplerItem item_;
+  std::map<string, std::vector<OpInfo::TensorProperties>> input_properties_;
+  std::map<string, std::vector<OpInfo::TensorProperties>> output_properties_;
+  const std::vector<OpInfo::TensorProperties> missing_properties_;
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index c11af5777a..ad8e768f1f 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -73,7 +73,7 @@ TEST_F(GraphPropertiesTest, StaticProperties) {
   CHECK(fake_input.NextItem(&item));
 
   GraphProperties properties(item);
-  Status s = properties.InferStatically();
+  Status s = properties.InferStatically(true);
   TF_CHECK_OK(s);
 
   for (const auto& node : item.graph.node()) {
@@ -179,7 +179,7 @@ TEST_F(GraphPropertiesTest, Variables) {
 
   {
     GraphProperties static_properties(item);
-    TF_CHECK_OK(static_properties.InferStatically());
+    TF_CHECK_OK(static_properties.InferStatically(false));
 
     const auto props = static_properties.GetOutputProperties("Var");
     EXPECT_EQ(1, props.size());
@@ -219,7 +219,7 @@ TEST_F(GraphPropertiesTest, VarHandles) {
                   .Finalize(item.graph.add_node()));
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   const auto props = properties.GetOutputProperties("VarRead");
   EXPECT_EQ(1, props.size());
@@ -286,7 +286,7 @@ TEST_F(GraphPropertiesTest, Queues) {
   TF_CHECK_OK(root.ToGraphDef(&item.graph));
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   const auto props1 = properties.GetOutputProperties("Dequeue1");
   ASSERT_EQ(1, props1.size());
@@ -335,7 +335,7 @@ TEST_F(GraphPropertiesTest, MergeWithoutLoops) {
                                  "merge_without_loops.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> nodes{"cond/Merge", "cond/concat", "cond/concat_1"};
   std::vector<string> expected_outputs{"float: [-1,-1,1]", "float: [2,1,1]",
@@ -377,7 +377,7 @@ TEST_F(GraphPropertiesTest, WhileLoop) {
                                  "while_loop.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> nodes{"while/Merge_1", "while/NextIteration_1",
                             "while/Exit_1"};
@@ -435,7 +435,7 @@ TEST_F(GraphPropertiesTest, NestedLoop) {
                                  "nested_loop.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> outer_nodes{"while/Merge_1", "while/NextIteration_1",
                                   "while/Exit_1"};
@@ -498,7 +498,7 @@ TEST_F(GraphPropertiesTest, LoopsAndQueues) {
                                  "loops_and_queues.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> outer_nodes{"while/Merge_1", "while/NextIteration_1",
                                   "while/Exit_1"};
@@ -556,7 +556,7 @@ TEST_F(GraphPropertiesTest, LoopsAndResourceVars) {
                                  "loops_and_resource_vars.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> outer_nodes{"while/Merge_1", "while/NextIteration_1",
                                   "while/Exit_1"};
@@ -608,7 +608,7 @@ TEST_F(GraphPropertiesTest, QueuesAndLoops) {
                                  "queues_and_loops.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> nodes{"while/Merge_1", "while/NextIteration_1",
                             "while/Exit_1"};
@@ -657,7 +657,7 @@ TEST_F(GraphPropertiesTest, InferRestoreOpShape) {
   item.fetch.push_back("init_restore");
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   const auto restore_props = properties.GetOutputProperties("restore");
   const OpInfo::TensorProperties& restore_prop = restore_props[0];
@@ -704,7 +704,7 @@ TEST_F(GraphPropertiesTest, InferRestoreOpShape_WithTwoNodesShareSameOutput) {
   item.fetch.push_back("init2");
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   const auto props = properties.GetOutputProperties("restore");
   const OpInfo::TensorProperties& prop = props[0];
@@ -732,7 +732,7 @@ TEST_F(GraphPropertiesTest, FunctionStaticShapeInference) {
                                  "simple_function.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
   const auto props = properties.GetOutputProperties("MyAdd_55e046a8_1");
   const OpInfo::TensorProperties& prop = props[0];
   EXPECT_EQ(DT_FLOAT, prop.dtype());
@@ -766,7 +766,7 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) {
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
   const auto shape_a = properties.GetOutputProperties("a").at(0).shape();
   const auto shape_c = properties.GetOutputProperties("c").at(0).shape();
   EXPECT_EQ(2, shape_a.dim_size());
@@ -822,7 +822,7 @@ TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) {
   GraphProperties properties(item);
   // This function should return OK, since it doesn't validate the colocation
   // constraints internally.
-  TF_EXPECT_OK(properties.InferStatically());
+  TF_EXPECT_OK(properties.InferStatically(false));
 }
 
 TEST_F(GraphPropertiesTest, ShapeTracking) {
@@ -842,7 +842,7 @@ TEST_F(GraphPropertiesTest, ShapeTracking) {
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
   const auto shape_a = properties.GetOutputProperties("a").at(0).shape();
   const auto shape_b = properties.GetOutputProperties("b").at(0).shape();
   const auto shape_o1 = properties.GetOutputProperties("o1").at(0).shape();
@@ -851,6 +851,56 @@ TEST_F(GraphPropertiesTest, ShapeTracking) {
   EXPECT_EQ(shape_b.DebugString(), shape_o2.DebugString());
 }
 
+TEST_F(GraphPropertiesTest, FedNodes) {
+  TrivialTestGraphInputYielder fake_input(4, 1, 10, false,
+                                          cluster_->GetDeviceNames());
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+  item.feed.emplace_back("AddN", Tensor());
+
+  {
+    // Conservative shape analysis: the shape of fed ports should be unknown
+    GraphProperties properties(item);
+    Status s = properties.InferStatically(false);
+    TF_CHECK_OK(s);
+    for (const auto& node : item.graph.node()) {
+      if (node.name() == "AddN") {
+        const auto in_props = properties.GetInputProperties(node.name());
+        EXPECT_EQ(1, in_props.size());
+        const OpInfo::TensorProperties& in_prop = in_props[0];
+        EXPECT_EQ(DT_FLOAT, in_prop.dtype());
+        EXPECT_FALSE(in_prop.shape().unknown_rank());
+        EXPECT_EQ(2, in_prop.shape().dim_size());
+        const auto out_props = properties.GetOutputProperties(node.name());
+        EXPECT_EQ(1, out_props.size());
+        EXPECT_EQ(DT_FLOAT, in_prop.dtype());
+        EXPECT_TRUE(in_prop.shape().unknown_rank());
+      }
+    }
+  }
+  {
+    // Optimistic shape analysis: the shape of fed ports should be derived from
+    // the shape of the fanin.
+    GraphProperties properties(item);
+    Status s = properties.InferStatically(true);
+    TF_CHECK_OK(s);
+    for (const auto& node : item.graph.node()) {
+      if (node.name() == "AddN") {
+        const auto in_props = properties.GetInputProperties(node.name());
+        EXPECT_EQ(1, in_props.size());
+        const OpInfo::TensorProperties& in_prop = in_props[0];
+        EXPECT_EQ(DT_FLOAT, in_prop.dtype());
+        EXPECT_FALSE(in_prop.shape().unknown_rank());
+        EXPECT_EQ(2, in_prop.shape().dim_size());
+        const auto out_props = properties.GetOutputProperties(node.name());
+        EXPECT_EQ(1, out_props.size());
+        const OpInfo::TensorProperties& out_prop = out_props[0];
+        EXPECT_EQ(in_prop.DebugString(), out_prop.DebugString());
+      }
+    }
+  }
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index e5e1ee3292..6640de668d 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -122,7 +122,7 @@ Status VirtualScheduler::Init() {
   // Construct graph properties.
   Status status;
   if (use_static_shapes_) {
-    status = graph_properties_.InferStatically();
+    status = graph_properties_.InferStatically(true);
   } else {
     status = graph_properties_.InferDynamically(cluster_);
   }
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 1e39c610a4..930d122234 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -1067,7 +1067,7 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/,
   if (opt_level_ == RewriterConfig::AGGRESSIVE) {
     graph_properties_.reset(new GraphProperties(item));
     // Shapes are only needed in aggressive mode.
-    TF_RETURN_IF_ERROR(graph_properties_->InferStatically());
+    TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false));
     TF_RETURN_IF_ERROR(
         graph_properties_->AnnotateOutputShapes(optimized_graph_));
   }
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index c77b2badf4..33a9dddba7 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1163,7 +1163,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
   Status s = errors::Unknown(
       "The graph properties are needed but were not initialized");
   if (needs_shapes) {
-    s = properties.InferStatically();
+    s = properties.InferStatically(false);
   }
 
   if (!has_feed && s.ok()) {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index d5563e9d4c..1b8046b787 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -1620,7 +1620,7 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   virtual_placer_.reset(new VirtualPlacer(cluster));
   nodes_to_preserve_ = item.NodesToPreserve();
   GraphProperties graph_properties(item);
-  auto status = graph_properties.InferStatically();
+  auto status = graph_properties.InferStatically(false);
   if (!status.ok()) {
     *output = item.graph;
     return status;
diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
index 7c44ce15c6..a2a2680c4f 100644
--- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
@@ -716,7 +716,7 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   {
     // Estimate the size of the data to swap for each node.
     GraphProperties properties(item);
-    TF_RETURN_IF_ERROR(properties.InferStatically());
+    TF_RETURN_IF_ERROR(properties.InferStatically(true));
     for (auto& swap : nodes_to_swap) {
       const NodeDef* node = swap.first;
       std::vector<OpInfo::TensorProperties> props =
diff --git a/tensorflow/core/grappler/optimizers/static_schedule.cc b/tensorflow/core/grappler/optimizers/static_schedule.cc
index 6ce6deef2c..450e853407 100644
--- a/tensorflow/core/grappler/optimizers/static_schedule.cc
+++ b/tensorflow/core/grappler/optimizers/static_schedule.cc
@@ -86,7 +86,7 @@ Status EstimateEarliestExecutionTimes(
   name_map.clear();
 
   GraphProperties properties(item);
-  TF_RETURN_IF_ERROR(properties.InferStatically());
+  TF_RETURN_IF_ERROR(properties.InferStatically(true));
   OpLevelCostEstimator estimator;
   VirtualPlacer placer(cluster);
 
@@ -154,7 +154,7 @@ Status EstimateRequiredTimes(
     }
   }
   GraphProperties properties(item);
-  TF_RETURN_IF_ERROR(properties.InferStatically());
+  TF_RETURN_IF_ERROR(properties.InferStatically(true));
   OpLevelCostEstimator estimator;
   VirtualPlacer placer(cluster);
 
diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i
index 7dd79f7c82..8f72a425c3 100644
--- a/tensorflow/python/grappler/item.i
+++ b/tensorflow/python/grappler/item.i
@@ -120,7 +120,7 @@ static PyObject* TF_GetOpProperties(GItem item) {
     Py_RETURN_NONE;
   }
   tensorflow::grappler::GraphProperties properties(*item);
-  tensorflow::Status status = properties.InferStatically();
+  tensorflow::Status status = properties.InferStatically(false);
   if (!status.ok()) {
     Py_RETURN_NONE;
   }
diff --git a/tensorflow/python/grappler/model_analyzer.cc b/tensorflow/python/grappler/model_analyzer.cc
index 7d365c3be9..da5b03234e 100644
--- a/tensorflow/python/grappler/model_analyzer.cc
+++ b/tensorflow/python/grappler/model_analyzer.cc
@@ -27,7 +27,7 @@ ModelAnalyzer::ModelAnalyzer(const GrapplerItem& item) : item_(item) {}
 
 Status ModelAnalyzer::GenerateReport(std::ostream& os) {
   GraphProperties properties(item_);
-  TF_RETURN_IF_ERROR(properties.InferStatically());
+  TF_RETURN_IF_ERROR(properties.InferStatically(false));
 
   for (const auto& node : item_.MainOpsFanin()) {
     PrintNodeInfo(node, properties, os);
-- 
GitLab


From d96e936fffb8ccd5761c4bf59a8f8ce185f4d50c Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Tue, 28 Nov 2017 12:50:21 -0800
Subject: [PATCH 0357/1225] Add custom_estimators

PiperOrigin-RevId: 177203120
---
 .../docs_src/get_started/custom_estimators.md | 576 ++++++++++++++++++
 1 file changed, 576 insertions(+)
 create mode 100644 tensorflow/docs_src/get_started/custom_estimators.md

diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/get_started/custom_estimators.md
new file mode 100644
index 0000000000..e347aa6bd0
--- /dev/null
+++ b/tensorflow/docs_src/get_started/custom_estimators.md
@@ -0,0 +1,576 @@
+
+# Creating Custom Estimators
+This document introduces custom Estimators. In particular, this document
+demonstrates how to create a custom @{tf.estimator.Estimator$Estimator} that
+mimics the behavior of the pre-made Estimator
+@{tf.estimator.DNNClassifier$`DNNClassifier`} in solving the Iris problem. See
+the @{$get_started/estimator$Pre-Made Estimators chapter} for details.
+
+If you are feeling impatient, feel free to compare and contrast the following
+full programs:
+
+* Iris implemented with the [pre-made DNNClassifier Estimator](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py).
+* Iris implemented with a [custom Estimator](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py).
+
+## Pre-made vs. custom
+
+As the following figure shows, pre-made Estimators are subclasses of the
+@{tf.estimator.Estimator} base class, while custom Estimators are an instance
+of tf.estimator.Estimator:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%"
+  alt="Premade estimators are sub-classes of `Estimator`. Custom Estimators are usually (direct) instances of `Estimator`"
+  src="../images/custom_estimators/estimator_types.png">
+</div>
+<div style="text-align: center">
+Pre-made and custom Estimators are all Estimators.
+</div>
+
+Pre-made Estimators are fully baked. Sometimes though, you need more control
+over an Estimator's behavior.  That's where custom Estimators come in. You can
+create a custom Estimator to do just about anything. If you want hidden layers
+connected in some unusual fashion, write a custom Estimator. If you want to
+calculate a unique
+[metric](https://developers.google.com/machine-learning/glossary/#metric)
+for your model, write a custom Estimator.  Basically, if you want an Estimator
+optimized for your specific problem, write a custom Estimator.
+
+A model function (or `model_fn`) implements the ML algorithm. The
+only difference between working with pre-made Estimators and custom Estimators
+is:
+
+* With pre-made Estimators, someone already wrote the model function for you.
+* With custom Estimators, you must write the model function.
+
+Your model function could implement a wide range of algorithms, defining all
+sorts of hidden layers and metrics.  Like input functions, all model functions
+must accept a standard group of input parameters and return a standard group of
+output values. Just as input functions can leverage the Dataset API, model
+functions can leverage the Layers API and the Metrics API.
+
+Let's see how to solve the Iris problem with a custom Estimator. A quick
+reminder--here's the organization of the Iris model that we're trying to mimic:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="height:260px"
+  alt="A diagram of the network architecture: Inputs, 2 hidden layers, and outputs"
+  src="../images/custom_estimators/full_network.png">
+</div>
+<div style="text-align: center">
+Our implementation of Iris contains four features, two hidden layers,
+and a logits output layer.
+</div>
+
+## Write an Input function
+
+In our custom Estimator implementation, we'll reuse the input function we used
+in the pre-made Estimator implementation. Namely:
+
+```python
+def train_input_fn(features, labels, batch_size):
+    """An input function for training"""
+    # Convert the inputs to a Dataset.
+    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
+
+    # Shuffle, repeat, and batch the examples.
+    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
+
+    # Return the read end of the pipeline.
+    return dataset.make_one_shot_iterator().get_next()
+```
+
+This input function builds an input pipeline that yields batches of
+`(features, labels)` pairs, where `features` is a dictionary features.
+
+## Create feature columns
+
+<!-- TODO(markdaoust): link to feature_columns when it exists-->
+As detailed in @{$get_started/estimator$Premade Estimators}, you must define
+your model's feature columns to specify how the model should use each feature.
+Whether working with pre-made Estimators or custom Estimators, you define
+feature columns in the same fashion.
+
+The following code creates a simple `numeric_column` for each input feature,
+indicating that the value of the input feature should be used directly as an
+input to the model:
+
+```python
+# Feature columns describe how to use the input.
+my_feature_columns = []
+for key in train_x.keys():
+    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
+```
+
+## Write a model function
+
+The model function we'll use has the following call signature:
+
+```python
+def my_model_fn(
+   features, # This is batch_features from input_fn
+   labels,   # This is batch_labels from input_fn
+   mode,     # An instance of tf.estimator.ModeKeys
+   params):  # Additional configuration
+```
+
+The first two arguments are the batches of features and labels returned from
+the input function; that is, `features` and `labels` are the handles to the
+data your model will use. The `mode` argument indicates whether the caller is
+requesting training, predicting, or evaluation.
+
+The caller may pass `params` to an Estimator's constructor. The `params` passed
+to the constructor become the `params` passed to `model_fn`.
+
+```python
+    # Build 2 hidden layer DNN with 10, 10 units respectively.
+    classifier = tf.estimator.Estimator(
+        model_fn=my_model,
+        params={
+            'feature_columns': my_feature_columns,
+            # Two hidden layers of 10 nodes each.
+            'hidden_units': [10, 10],
+            # The model must choose between 3 classes.
+            'n_classes': 3,
+        })
+```
+
+To implement a typical model function, you must do the following:
+
+* (Define the model)[#define_the_model].
+* Specify additional calculations for each of
+  the [three different modes](#modes):
+  * [Predict](#predict)
+  * [Evaluate](#evaluate)
+  * [Train](#train)
+
+## Define the model
+
+The basic deep neural network model must define the following three sections:
+
+* An [input layer](https://developers.google.com/machine-learning/glossary/#input_layer)
+* One or more [hidden layers](https://developers.google.com/machine-learning/glossary/#hidden_layer)
+* An [output layer](https://developers.google.com/machine-learning/glossary/#output_layer)
+
+### Define the input layer
+
+Call @{tf.feature_column.input_layer} to convert your feature dictionary and
+feature columns into input for your model. For example:
+
+```python
+    # Use `input_layer` to apply the feature columns.
+    net = tf.feature_column.input_layer(features, params['feature_columns'])
+```
+
+The preceding line applies the transformations defined by your feature columns,
+creating the input layer of our model.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="height:260px"
+  alt="A diagram of the input layer, in this case a 1:1 mapping from raw-inputs to features."
+  src="../images/custom_estimators/input_layer.png">
+</div>
+
+
+### Hidden Layers
+
+If you are creating a deep neural network, you must define one or more hidden
+layers. The Layers API provides a rich set of functions to define all types of
+hidden layers, including convolutional, pooling, and dropout layers. For Iris,
+we're simply going to call @{tf.layers.dense} to create hidden layers, with
+dimensions defined by `params['hidden_layers']`. In a `dense` layer each node
+is connected to every node in the preceding layer.  Here's the relevant code:
+
+``` python
+    # Build the hidden layers, sized according to the 'hidden_units' param.
+    for units in params['hidden_units']:
+        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
+```
+* The `units` parameter defines the number of output neurons in a given layer.
+* The `activation` parameter defines the [activation function](https://developers.google.com/machine-learning/glossary/#a) —
+  [Relu](https://developers.google.com/machine-learning/glossary/#ReLU) in this
+  case.
+
+The variable `net` here signifies the current top layer of the network. During
+the first iteration, `net` signifies the input layer. On each loop iteration
+`tf.layers.dense` creates a new layer, which takes the previous layer as its
+input. So, the loop uses `net` to pass the previously created layer as input
+to the layer being created.
+
+After creating two hidden layers, our network looks as follows. For
+simplicity, the figure only shows four hidden units in each layer.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="height:260px"
+  alt="The input layer with two hidden layers added."
+  src="../images/custom_estimators/add_hidden_layer.png">
+</div>
+
+Note that @{tf.layers.dense} provides many additional capabilities, including
+the ability to set a multitude of regularization parameters. For the sake of
+simplicity, though, we're going to simply accept the default values of the
+other parameters.
+
+### Output Layer
+
+We'll define the output layer by calling @{tf.layers.dense} yet again, this
+time without an activation function:
+
+```python
+    # Compute logits (1 per class).
+    logits = tf.layers.dense(net, params['n_classes'], activation=None)
+```
+
+Here, `net` signifies the final hidden layer. Therefore, the full set of layers
+is now connected as follows:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="height:260px"
+  alt="A logit output layer connected to the top hidden layer"
+  src="../images/custom_estimators/add_logits.png">
+</div>
+<div style="text-align: center">
+The final hidden layer feeds into the output layer.
+</div>
+
+When defining an output layer, the `units` parameter specifies the number of
+outputs. So, by setting `units` to `params['n_classes']`, the model produces
+one output value per class. Each element of the output vector will contains the
+score, or "logit", calculated to the associated class of Iris: Setosa,
+Versicolor, or Virginica, respectively.
+
+Later on, these logits will be transformed into probabilities by the
+@{tf.nn.softmax} function.
+
+## Implement training, evaluation, and prediction {modes}
+
+The final step in creating a model function is to write branching code that
+implements prediction, evaluation, and training.
+
+The model function gets invoked whenever someone calls the Estimator's `train`,
+`evaluate`, or `predict` methods. Recall that the signature for the model
+function looks like this:
+
+``` python
+def my_model_fn(
+   features, # This is batch_features from input_fn
+   labels,   # This is batch_labels from input_fn
+   mode):    # An instance of tf.estimator.ModeKeys, see below
+```
+
+Focus on that third argument, mode. As the following table shows, when someone
+calls train, evaluate, or predict, the Estimator framework invokes your model
+function with the mode parameter set as follows:
+
+| Estimator method                 |    Estimator Mode |
+|:---------------------------------|:------------------|
+|@{tf.estimator.Estimator.train$`train()`} |@{tf.estimator.ModeKeys.TRAIN$`ModeKeys.TRAIN`} |
+|@{tf.estimator.Estimator.evaluate$`evaluate()`}  |@{tf.estimator.ModeKeys.EVAL$`ModeKeys.EVAL`}      |
+|@{tf.estimator.Estimator.predict$`predict()`}|@{tf.estimator.ModeKeys.PREDICT$`ModeKeys.PREDICT`} |
+
+For example, suppose you instantiate a custom Estimator to generate an object
+named `classifier`. Then, you make the following call:
+
+``` python
+classifier = tf.estimator.Estimator(...)
+classifier.train(input_fn=lambda: my_input_fn(FILE_TRAIN, True, 500))
+```
+The Estimator framework then calls your model function with mode set to
+`ModeKeys.TRAIN`.
+
+Your model function must provide code to handle all three of the mode values.
+For each mode value, your code must return an instance of
+`tf.estimator.EstimatorSpec`, which contains the information the caller
+requires. Let's examine each mode.
+
+### Predict
+
+When the Estimator's `predict` method is called, the `model_fn` receives
+`mode = ModeKeys.PREDICT`. In this case, the model function must return a
+`tf.estimator.EstimatorSpec` containing the prediction.
+
+The model must have been trained prior to making a prediction. The trained model
+is stored on disk in the `model_dir` directory established when you
+instantiated the Estimator.
+
+The code to generate the prediction for this model looks as follows:
+
+```python
+# Compute predictions.
+predicted_classes = tf.argmax(logits, 1)
+if mode == tf.estimator.ModeKeys.PREDICT:
+    predictions = {
+        'class_ids': predicted_classes[:, tf.newaxis],
+        'probabilities': tf.nn.softmax(logits),
+        'logits': logits,
+    }
+    return tf.estimator.EstimatorSpec(mode, predictions=predictions)
+```
+The prediction dictionary contains everything that your model returns when run
+in prediction mode.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="height:260px"
+  alt="Additional outputs added to the output layer."
+  src="../images/custom_estimators/full_network.png">
+</div>
+
+The `predictions` holds the following three key/value pairs:
+
+*   `class_ids` holds the class id (0, 1, or 2) representing the model's
+    prediction of the most likely species for this example.
+*   `probabilities` holds the three probabilities (in this example, 0.02, 0.95,
+    and 0.03)
+*   `logit` holds the raw logit values (in this example, -1.3, 2.6, and -0.9)
+
+We return that dictionary to the caller via the `predictions` parameter of the
+@{tf.estimator.EstimatorSpec}. The Estimator's
+@{tf.estimator.Estimator.predict$`predict`} method will yield these
+dictionaries.
+
+### Calculate the loss
+
+For both [training](#train) and [evaluation](#evaluate) we need to calculate the
+model's loss. This is the
+[objective](https://developers.google.com/machine-learning/glossary/#objective)
+that will be optimized.
+
+Before we calculate loss, we we must first convert the labels from a list of
+indexes `(0, 1, 2)` to a
+[one-hot representation](https://developers.google.com/machine-learning/glossary/#one-hot_encoding)
+by calling @{tf.one_hot}. Then, we can calculate the loss by calling
+@{tf.losses.softmax_cross_entropy}. Here's the complete code:
+
+
+```python
+    # Convert the labels to a one-hot tensor of shape (length of features, 3)
+    # and with a on-value of 1 for each one-hot vector of length 3.
+    onehot_labels = tf.one_hot(labels, 3, 1, 0)
+
+    # Compute loss.
+    loss = tf.losses.softmax_cross_entropy(
+        onehot_labels=onehot_labels, logits=logits)
+```
+
+### Evaluate
+
+When the Estimator's `evaluate` method is called, the `model_fn` receives
+`mode = ModeKeys.EVAL`. In this case, the model function must return a
+`tf.estimator.EstimatorSpec` containing the model's loss and optionally one
+or more metrics.
+
+Although returning metrics is optional, most custom Estimators do return at
+least one metric. TensorFlow provides a Metrics module @{tf.metrics} to
+calculate common metrics.  For brevity's sake, we'll only return accuracy. The
+@{tf.metrics.accuracy} function compares our predictions against the
+true values, that is, against the labels provided by the input function. The
+@{tf.metrics.accuracy} function requires the labels and predictions to have the
+same shape. Here's the call to @{tf.metrics.accuracy}:
+
+``` python
+    # Compute evaluation metrics.
+    accuracy = tf.metrics.accuracy(labels=labels,
+                                   predictions=predicted_classes,
+                                   name='acc_op')
+```
+
+The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for evaluation
+typically contains the following information:
+
+* `loss`, which is the model's loss
+* `eval_metric_ops`, which is an optional dictionary of metrics.
+
+So, we'll create a dictionary containing our sole metric. If we had calculated
+other metrics, we would have added them as additional key/value pairs to that
+same dictionary.  Then, we'll pass that dictionary in the `eval_metric_ops`
+argument of `tf.estimator.EstimatorSpec`. Here's the code:
+
+```python
+    metrics = {'accuracy': accuracy}
+    tf.summary.scalar('accuracy', accuracy[1])
+
+    if mode == tf.estimator.ModeKeys.EVAL:
+        return tf.estimator.EstimatorSpec(
+            mode, loss=loss, eval_metric_ops=metrics)
+```
+
+The @{tf.summary.scalar} will make accuracy available to TensorBoard (more on
+this later).
+
+### Train
+
+When the Estimator's `train` method is called, the `model_fn` is called
+with `mode = ModeKeys.TRAIN`. In this case, the model function must return an
+`EstimatorSpec` that contains the loss and a training operation.
+
+Building the training operation will require an optimizer. We will use
+@{tf.train.AdagradOptimizer} because we're mimicking the `DNNClassifier`, which
+also uses `Adagrad` by default. The `tf.train` package provides many other
+optimizers—feel free to experiment with them.
+
+Here is the code that builds the optimizer:
+
+``` python
+  # Instantiate an optimizer.
+  optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
+```
+
+Next, we train the model using the optimizer's
+@{tf.train.Optimizer.minimize$`minimize`} method on the loss we calculated
+earlier.
+
+The `minimize` method also takes a `global_step` parameter. TensorFlow uses this
+parameter to count the number of training steps that have been processed
+(to know when to end a training run). Furthermore, the `global_step` is
+essential for TensorBoard graphs to work correctly. Simply call
+@{tf.train.get_global_step} and pass the result to the `global_step`
+argument of `minimize`.
+
+Here's the code to train the model:
+
+``` python
+  # Train the model by establishing an objective, which is to
+  # minimize loss using that optimizer.
+  train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
+```
+
+The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for training
+must have the following fields set:
+
+* `loss`, which contains the value of the loss function.
+* `train_op`, which executes a training step.
+
+Here's our code to call `EstimatorSpec`:
+
+```python
+    # Return training information.
+    return tf.estimator.EstimatorSpec(
+        mode=tf.estimator.ModeKeys.TRAIN,
+        loss=loss,
+        train_op=train_op)
+```
+
+The model function is now complete.
+
+## The custom Estimator
+
+Instantiate the custom Estimator through the Estimator base class as follows:
+
+```python
+    # Build 2 hidden layer DNN with 10, 10 units respectively.
+    classifier = tf.estimator.Estimator(
+        model_fn=my_model,
+        params={
+            'feature_columns': my_feature_columns,
+            # Two hidden layers of 10 nodes each.
+            'hidden_units': [10, 10],
+            # The model must choose between 3 classes.
+            'n_classes': 3,
+        })
+```
+Here the `params` dictionary serves the same purpose as the key-word
+arguments of `DNNClassifier`; that is, the `params` dictionary lets you
+configure your Estimator without modifying the code in the `model_fn`.
+
+The rest of the code to train, evaluate, and generate predictions using our
+Estimator is the same as for the pre-made `DNNClassifier`. For example, the
+following line will train the model:
+
+```python
+    # Train the Model.
+    classifier.train(
+        input_fn=lambda:train_input_fn(train_x, train_y, args.batch_size),
+        steps=args.train_steps)
+```
+
+## TensorBoard
+
+You can view training results for your custom Estimator in TensorBoard. To see
+this reporting, start TensorBoard from your command line as follows:
+
+```bsh
+# Replace PATH with the actual path passed as model_dir
+tensorboard --logdir=PATH
+```
+
+Then, open TensorBoard by browsing to: [http://localhost:6006](http://localhost:6006)
+
+All the pre-made Estimators automatically log a lot of information to
+TensorBoard. With custom Estimators, however, TensorBoard only provides one
+default log (a graph of the loss) plus the information you explicitly tell
+TensorBoard to log. For the custom Estimator you just created, TensorBoard
+generates the following:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="height:260px"
+  alt="Accuracy, steps/second, and loss 'scalar' graphs from tensorboard"
+  src="../images/custom_estimators/tensorboard.png">
+</div>
+<div style="text-align: center">
+TensorBoard displays three graphs.
+</div>
+
+In brief, here's what the three graphs tell you:
+
+* global_step/sec: A performance indicator showing how many batches (gradient
+  updates) we processed per second as the model trains.
+
+* loss: The loss reported.
+
+* accuracy: The accuracy is recorded by the following two lines:
+
+  * `eval_metric_ops={'my_accuracy': accuracy})`, during evaluation.
+  * `tf.summary.scalar('accuracy', accuracy[1])`, during training.
+
+These tensorboard graphs are one of the main reasons it's important to pass a
+`global_step` to your optimizer's `minimize` method. The model can't record
+the x-coordinate for these graphs without it.
+
+Note the following in the `my_accuracy` and `loss` graphs:
+
+* The orange line represents training.
+* The blue dot represents evaluation.
+
+During training, summaries (the orange line) are recorded periodically as
+batches are processed, which is why it becomes a graph spanning x-axis range.
+
+By contrast, evaluation produces only a single point on the graph for each call
+to `evaluate`. This point contains the average over the entire evaluation call.
+This has no width on the graph as it is evaluated entirely from the model state
+at a particular training step (from a single checkpoint).
+
+As suggested in the following figure, you may see and also selectively
+disable/enable the reporting using the controls on the left side.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="margin:auto;display:block;"
+  alt="Check-boxes allowing the user to select which runs are shown."
+  src="../images/custom_estimators/select_run.jpg">
+</div>
+<div style="text-align: center">
+Enable or disable reporting.
+</div>
+
+
+## Summary
+
+Although pre-made Estimators can be an effective way to quickly create new
+models, you will often need the additional flexibility that custom Estimators
+provide. Fortunately, pre-made and custom Estimators follow the same
+programming model. The only practical difference is that you must write a model
+function for custom Estimators; everything else is the same.
+
+For more details, be sure to check out:
+
+* The
+[official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/master/official/mnist),
+which uses a custom estimator.
+
+* The TensorFlow
+[official models repository](https://github.com/tensorflow/models/tree/master/official),
+which contains more curated examples using custom estimators.
+
+* This [TensorBoard video](https://youtu.be/eBbEDRsCmv4), which introduces
+TensorBoard.
+
+
-- 
GitLab


From c68d35becf59396f86b5d90d236405eafef3349e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 12:57:42 -0800
Subject: [PATCH 0358/1225] Change HLO verifier semantics for bitcasts to:
 Bitcasts that are not the root of a computation can be any shape byte size.
 Bitcasts that are the root of a computation must have the same shape byte
 size as their operand.

PiperOrigin-RevId: 177204171
---
 tensorflow/compiler/xla/service/hlo_verifier.cc | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 15188c4057..2c09d2defb 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -143,9 +143,13 @@ class ShapeVerifier : public DfsHloVisitor {
   }
 
   Status HandleBitcast(HloInstruction* bitcast) override {
-    // Bitcasts can be any shape, as long as the size matches the operand size.
-    TF_RET_CHECK(shape_size_fn_(bitcast->shape()) ==
-                 shape_size_fn_(bitcast->operand(0)->shape()));
+    // Bitcasts that are not the root of a computation can be any shape.
+    // Bitcasts that are the root of a computation must have the same shape
+    // byte size as their operand.
+    if (bitcast->parent()->root_instruction() == bitcast) {
+      TF_RET_CHECK(shape_size_fn_(bitcast->shape()) ==
+                   shape_size_fn_(bitcast->operand(0)->shape()));
+    }
     return tensorflow::Status::OK();
   }
 
-- 
GitLab


From 9c7fd28542b37e7980f2b0a155996cc1703bd0d7 Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Tue, 28 Nov 2017 12:59:35 -0800
Subject: [PATCH 0359/1225] Allow unsorted_segment_sum and unsorted_segment_max
 to take int64 num_segments

Previously this argument could only be an int32. It can now be int32 or int64, defaulting to int32.

This fixes bugs that can occur when calling _IndexedSlicesToTensor with int64 arguments.

PiperOrigin-RevId: 177204464
---
 tensorflow/core/ops/math_ops.cc               |  6 ++++--
 .../segment_reduction_ops_test.py             | 21 +++++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index d30b847696..d7afd02df6 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1811,10 +1811,11 @@ output: Has same shape as data, except for dimension 0 which
 REGISTER_OP("UnsortedSegmentSum")
     .Input("data: T")
     .Input("segment_ids: Tindices")
-    .Input("num_segments: int32")
+    .Input("num_segments: Tnumsegments")
     .Output("output: T")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32,int64}")
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
     .SetShapeFn(UnsortedSegmentReductionShapeFn)
     .Doc(R"doc(
 Computes the sum along segments of a tensor.
@@ -1849,10 +1850,11 @@ output: Has same shape as data, except for the first `segment_ids.rank`
 REGISTER_OP("UnsortedSegmentMax")
     .Input("data: T")
     .Input("segment_ids: Tindices")
-    .Input("num_segments: int32")
+    .Input("num_segments: Tnumsegments")
     .Output("output: T")
     .Attr("T: realnumbertype")
     .Attr("Tindices: {int32,int64}")
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
     .SetShapeFn(UnsortedSegmentReductionShapeFn)
     .Doc(R"doc(
 Computes the Max along segments of a tensor.
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 99f9f09690..fd58cdb170 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -266,6 +266,27 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
         self.assertAllClose(np_ans, tf_ans)
         self.assertShapeEqual(np_ans, s)
 
+  def testNumSegmentsTypes(self):
+    dtypes = [dtypes_lib.int32, dtypes_lib.int64]
+    indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3])
+    num_segments = 12
+    for indices in indices_flat, indices_flat.reshape(5, 2):
+      shape = indices.shape + (2,)
+      for dtype in dtypes:
+        with self.test_session(use_gpu=True):
+          tf_x, np_x = self._input(shape)
+          num_segments_constant = constant_op.constant(
+              num_segments, dtype=dtype)
+          np_ans = self._segmentReduce(
+              indices, np_x, np.add, op2=None, num_out_rows=num_segments)
+          s = math_ops.unsorted_segment_sum(
+              data=tf_x,
+              segment_ids=indices,
+              num_segments=num_segments_constant)
+          tf_ans = s.eval()
+        self.assertAllClose(np_ans, tf_ans)
+        self.assertShapeEqual(np_ans, s)
+
   def testGradientSegmentSum(self):
     num_cols = 2
     indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3])
-- 
GitLab


From f93c8a72154fd22fe1578bf448df156acd54fddf Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 28 Nov 2017 13:40:05 -0800
Subject: [PATCH 0360/1225] [XLA:CPU] Avoid using the untiled lowering for dot
 when possible

We still need the "make rhs column major" layout assignment optimization since
we see significant regressions without it.

I did not port the logic around single_threaded_eigen from
ProfitableToImplementDotInUntiledLlvmIr.  That logic became stale after we
changed dot-matrix products to always be single threaded, even when calling into
the Eigen implementation.

PiperOrigin-RevId: 177210146
---
 .../xla/service/cpu/dot_op_emitter.cc         | 53 +++----------------
 .../compiler/xla/service/cpu/dot_op_emitter.h | 16 ++----
 .../xla/service/cpu/layout_assignment.cc      |  6 +--
 3 files changed, 12 insertions(+), 63 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index 4c40dae512..8f7b478cee 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -518,9 +518,7 @@ DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs,
 bool DotOpEmitter::ShapesAreLegalForRuntimeDot() const { return true; }
 
 bool DotOpEmitter::EmitLlvmIrDotIfProfitable() {
-  if (dot_.shape().dimensions_size() != 2 ||
-      ProfitableToImplementDotInUntiledLlvmIr(dot_) ==
-          DotInLlvmIrProfitable::kYes) {
+  if (dot_.shape().dimensions_size() != 2) {
     return false;
   }
 
@@ -977,9 +975,7 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) {
       return false;
     }
 
-    if (ProfitableToImplementDotInUntiledLlvmIr(hlo) ==
-            DotInLlvmIrProfitable::kYes ||
-        ProfitableToImplementDotInTiledLlvmIr(hlo)) {
+    if (ProfitableToImplementDotInTiledLlvmIr(hlo)) {
       return false;
     }
 
@@ -1010,46 +1006,11 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) {
   return false;
 }
 
-DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr(
-    const HloInstruction& dot) {
-  if (dot.opcode() == HloOpcode::kDot && dot.shape().dimensions_size() == 2) {
-    const Shape& result_shape = dot.shape();
-    // kReductionDimensionThresholdBytes was chosen to be 1/4 of a typical L1
-    // cache line size, so that we can have the reduction dimension of both the
-    // LHS and RHS matrices and still have some space "left over".  This needs
-    // to be tuned further.
-    const int64 kReductionDimensionThresholdBytes = 8 * 1024;
-    const bool single_threaded_eigen =
-        !dot.GetModule()->config().debug_options().xla_cpu_multi_thread_eigen();
-
-    // This is the point at which it is better to call into Eigen and shard the
-    // dot across multiple worker threads.  This is a rough estimate by running
-    // a matmult benchmark on my local machine, and it can be tuned further.
-    const int64 kMaxSingleThreadedFlops = 16 * 1024;
-
-    const int64 M = result_shape.dimensions(0);
-    const int64 N = result_shape.dimensions(1);
-    const int64 K = dot.operand(1)->shape().dimensions(0);
-    const int64 primitive_type_size =
-        ShapeUtil::ByteSizeOfPrimitiveType(result_shape.element_type());
-    if (M == 1 &&
-        K * primitive_type_size <= kReductionDimensionThresholdBytes &&
-        (single_threaded_eigen || M * K * N <= kMaxSingleThreadedFlops)) {
-      // Heuristics:
-      //
-      //  - Look for a configuration where we will likely be able to keep LHS in
-      //    L1 and do a cache-optimal traversal of RHS.
-      //
-      //  - Bail out on matrices that are large enough that Eigen can profitably
-      //    shard the computation across multiple cores.  This only applies when
-      //    multi-threading is enabled.
-      return LayoutUtil::IsMonotonicWithDim0Major(
-                 dot.operand(1)->shape().layout())
-                 ? DotInLlvmIrProfitable::kWithColumnMajorRhs
-                 : DotInLlvmIrProfitable::kYes;
-    }
-  }
-  return DotInLlvmIrProfitable::kNo;
+// For vector-matrix dot products, it is always profitable to make the Rhs
+// column major.
+bool ProfitableToMakeDotRhsColumnMajor(const HloInstruction& hlo) {
+  return hlo.opcode() == HloOpcode::kDot &&
+         hlo.shape().dimensions_size() == 2 && hlo.shape().dimensions(0) == 1;
 }
 
 bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot) {
diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h
index c9168ccc0f..2badb26f90 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h
@@ -32,19 +32,9 @@ namespace cpu {
 
 bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo);
 
-enum class DotInLlvmIrProfitable { kYes, kNo, kWithColumnMajorRhs };
-
-// Returns a value to indicate if (and under what conditions) will lowering
-// |dot| as a untiled LLVM IR dot operation be profitable over calling into
-// Eigen or emitting a tiled LLVM IR implementation.  Possible return values
-// are:
-//
-//  * DotInLlvmIrProfitable::kYes - always profitable.
-//  * DotInLlvmIrProfitable::kNo - never profitable.
-//  * DotInLlvmIrProfitable::kWithColumnMajorRhs - only if we can manage to make
-//    the Rhs layout column major.
-DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr(
-    const HloInstruction& dot);
+// Returns true to indicate that |hlo| is a dot, and that it is profitable to
+// switch the layout of the |hlo|'s RHS operand to column major.
+bool ProfitableToMakeDotRhsColumnMajor(const HloInstruction& hlo);
 
 // Returns true to indicate that we can generate a tiled LLVM IR implementation
 // for |dot|.
diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc
index 3f2d101959..69466fd32e 100644
--- a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/layout_assignment.cc
@@ -52,8 +52,7 @@ Status CpuLayoutAssignment::AddBackendConstraints(
   tensorflow::gtl::FlatMap<const HloInstruction*, bool>
       should_make_rhs_col_major_cache;
   auto should_make_rhs_col_major = [&](const HloInstruction& instruction) {
-    if (ProfitableToImplementDotInUntiledLlvmIr(instruction) !=
-        DotInLlvmIrProfitable::kWithColumnMajorRhs) {
+    if (!ProfitableToMakeDotRhsColumnMajor(instruction)) {
       return false;
     }
 
@@ -69,8 +68,7 @@ Status CpuLayoutAssignment::AddBackendConstraints(
 
     bool result = std::all_of(
         rhs->users().begin(), rhs->users().end(), [&](HloInstruction* user) {
-          return ProfitableToImplementDotInUntiledLlvmIr(*user) ==
-                     DotInLlvmIrProfitable::kWithColumnMajorRhs &&
+          return ProfitableToMakeDotRhsColumnMajor(*user) &&
                  user->operand(0) != rhs;
         });
 
-- 
GitLab


From c294fcfd85c03a801d3aad83cfd08055dadbad1a Mon Sep 17 00:00:00 2001
From: Mustafa Ispir <ispir@google.com>
Date: Tue, 28 Nov 2017 14:10:24 -0800
Subject: [PATCH 0361/1225] Dataset support within Estimator. With this cl
 Input_fn can return a Dataset.

PiperOrigin-RevId: 177215252
---
 tensorflow/python/estimator/BUILD             |  1 +
 tensorflow/python/estimator/estimator.py      | 49 +++++++++---
 tensorflow/python/estimator/estimator_test.py | 74 +++++++++++++++++++
 3 files changed, 114 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index 03f386e9cf..8e6945b0f3 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -433,6 +433,7 @@ py_library(
         "//tensorflow/python:summary",
         "//tensorflow/python:training",
         "//tensorflow/python:util",
+        "//tensorflow/python/data",
         "//tensorflow/python/saved_model:builder",
         "//tensorflow/python/saved_model:tag_constants",
         "//third_party/py/numpy",
diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index f267f4a54e..63103ef4c1 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -30,6 +30,7 @@ from google.protobuf import message
 from tensorflow.core.framework import summary_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session as tf_session
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
 from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.estimator import run_config
@@ -416,7 +417,7 @@ class Estimator(object):
     with ops.Graph().as_default() as g:
       random_seed.set_random_seed(self._config.tf_random_seed)
       self._create_and_assert_global_step(g)
-      features = self._get_features_from_input_fn(
+      features, input_hooks = self._get_features_from_input_fn(
           input_fn, model_fn_lib.ModeKeys.PREDICT)
       estimator_spec = self._call_model_fn(
           features, None, model_fn_lib.ModeKeys.PREDICT, self.config)
@@ -426,7 +427,7 @@ class Estimator(object):
               checkpoint_filename_with_path=checkpoint_path,
               scaffold=estimator_spec.scaffold,
               config=self._session_config),
-          hooks=hooks) as mon_sess:
+          hooks=input_hooks + hooks) as mon_sess:
         while not mon_sess.should_stop():
           preds_evaluated = mon_sess.run(predictions)
           if not isinstance(predictions, dict):
@@ -582,6 +583,11 @@ class Estimator(object):
   def _get_features_from_input_fn(self, input_fn, mode):
     """Extracts the `features` from return values of `input_fn`."""
     result = self._call_input_fn(input_fn, mode)
+    input_hooks = []
+    if isinstance(result, dataset_ops.Dataset):
+      iterator = result.make_initializable_iterator()
+      input_hooks.append(_DatasetInitializerHook(iterator))
+      result = iterator.get_next()
     if isinstance(result, (list, tuple)):
       # Unconditionally drop the label (the second element of result).
       result = result[0]
@@ -590,16 +596,22 @@ class Estimator(object):
       logging.warning('Input graph does not use tf.data.Dataset or contain a '
                       'QueueRunner. That means predict yields forever. '
                       'This is probably a mistake.')
-    return result
+    return result, input_hooks
 
   def _get_features_and_labels_from_input_fn(self, input_fn, mode):
+    """Extracts the `features` and labels from return values of `input_fn`."""
     result = self._call_input_fn(input_fn, mode)
+    input_hooks = []
+    if isinstance(result, dataset_ops.Dataset):
+      iterator = result.make_initializable_iterator()
+      input_hooks.append(_DatasetInitializerHook(iterator))
+      result = iterator.get_next()
     if isinstance(result, (list, tuple)):
       if len(result) != 2:
         raise ValueError(
             'input_fn should return (feautures, labels) as a len 2 tuple.')
-      return result
-    return result, None
+      return result[0], result[1], input_hooks
+    return result, None, input_hooks
 
   def _extract_batch_length(self, preds_evaluated):
     """Extracts batch length of predictions."""
@@ -723,8 +735,10 @@ class Estimator(object):
       random_seed.set_random_seed(self._config.tf_random_seed)
       global_step_tensor = self._create_and_assert_global_step(g)
       training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
-      features, labels = self._get_features_and_labels_from_input_fn(
-          input_fn, model_fn_lib.ModeKeys.TRAIN)
+      features, labels, input_hooks = (
+          self._get_features_and_labels_from_input_fn(
+              input_fn, model_fn_lib.ModeKeys.TRAIN))
+      worker_hooks.extend(input_hooks)
       estimator_spec = self._call_model_fn(
           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
       # Check if the user created a loss summary, and add one if they didn't.
@@ -822,8 +836,9 @@ class Estimator(object):
     with ops.Graph().as_default() as g:
       random_seed.set_random_seed(self._config.tf_random_seed)
       global_step_tensor = self._create_and_assert_global_step(g)
-      features, labels = self._get_features_and_labels_from_input_fn(
-          input_fn, model_fn_lib.ModeKeys.EVAL)
+      features, labels, input_hooks = (
+          self._get_features_and_labels_from_input_fn(
+              input_fn, model_fn_lib.ModeKeys.EVAL))
       estimator_spec = self._call_model_fn(
           features, labels, model_fn_lib.ModeKeys.EVAL, self.config)
 
@@ -844,7 +859,8 @@ class Estimator(object):
             'already defines a default metric with the same name.')
       eval_dict[ops.GraphKeys.GLOBAL_STEP] = global_step_tensor
 
-      all_hooks = list(hooks or [])
+      all_hooks = list(input_hooks)
+      all_hooks.extend(hooks)
       all_hooks.extend(list(estimator_spec.evaluation_hooks or []))
 
       eval_results = evaluation._evaluate_once(  # pylint: disable=protected-access
@@ -1039,3 +1055,16 @@ def _has_dataset_or_queue_runner(maybe_tensor):
 
   # Now, check queue.
   return ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS)
+
+
+class _DatasetInitializerHook(training.SessionRunHook):
+
+  def __init__(self, iterator):
+    self._iterator = iterator
+
+  def begin(self):
+    self._initializer = self._iterator.initializer
+
+  def after_create_session(self, session, coord):
+    del coord
+    session.run(self._initializer)
diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py
index c1b773b8c4..db64fbc9cc 100644
--- a/tensorflow/python/estimator/estimator_test.py
+++ b/tensorflow/python/estimator/estimator_test.py
@@ -913,6 +913,80 @@ class EstimatorGetVariablesTest(test.TestCase):
     self.assertEqual(3., est.get_variable_value('three'))
 
 
+class EstimatorDatasetIntegrationTest(test.TestCase):
+  """Tests dataset integration."""
+
+  def test_returned_by_input_fn(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors(([1.], [2.]))
+
+    def _model_fn(features, labels, mode):
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=features + labels,  # 1 + 2
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    scores = est.evaluate(_input_fn, steps=1)
+    self.assertEqual(3., scores[model_fn_lib.LOSS_METRIC_KEY])
+
+  def test_with_none_labels(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors([7.])
+
+    def _model_fn(features, labels, mode):
+      self.assertIsNone(labels)
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=features,  # 7
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    scores = est.evaluate(_input_fn, steps=1)
+    self.assertEqual(7., scores[model_fn_lib.LOSS_METRIC_KEY])
+
+  def test_with_predict(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors([10.])
+
+    def _model_fn(features, labels, mode):
+      _ = labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=features,  # 10
+          loss=features,  # 10
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    self.assertEqual([10.], next(est.predict(input_fn=_input_fn)))
+
+  def test_batching(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensor_slices(([[1.], [2.]],
+                                                     [[10.], [20.]])).batch(1)
+
+    def _model_fn(features, labels, mode):
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=features,
+          loss=features + (0 if labels is None else labels),  # 11, 22
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn)
+    scores = est.evaluate(_input_fn)
+    # (11 + 22)/2 = 16.5
+    self.assertEqual(16.5, scores[model_fn_lib.LOSS_METRIC_KEY])
+    self.assertEqual([1., 2.], list(est.predict(_input_fn)))
+
+
 class EstimatorEvaluateTest(test.TestCase):
 
   def test_input_fn_args(self):
-- 
GitLab


From 49bb801e65caf6afeb7cc7f67a168c9a19582ad1 Mon Sep 17 00:00:00 2001
From: HyoukJoong Lee <hyouklee@google.com>
Date: Tue, 28 Nov 2017 14:11:13 -0800
Subject: [PATCH 0362/1225] Changed to allow removing side-effect instructions
 from an HLO computation and moved the condition to the hlo_dce pass.

PiperOrigin-RevId: 177215395
---
 .../compiler/xla/service/hlo_computation.cc   |  7 ++-----
 .../compiler/xla/service/hlo_computation.h    | 16 ++++++++++-----
 tensorflow/compiler/xla/service/hlo_dce.cc    |  3 ++-
 .../compiler/xla/service/hlo_dce_test.cc      | 20 +++++++++++++++++++
 .../xla/service/while_loop_simplifier.cc      |  4 ++--
 5 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index c215cc48d6..014a851c96 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -176,10 +176,6 @@ bool HloComputation::IsRemovable(const HloInstruction* instruction) {
     return false;
   }
 
-  if (instruction->HasSideEffect()) {
-    return false;
-  }
-
   return true;
 }
 
@@ -207,7 +203,8 @@ Status HloComputation::RemoveInstructionAndUnusedOperands(
     worklist.pop();
 
     if (removed.count(item) != 0 || item->user_count() != 0 ||
-        item == root_instruction() || !IsRemovable(item)) {
+        item == root_instruction() || !IsRemovable(item) ||
+        item->HasSideEffect()) {
       continue;
     }
     for (int i = 0; i < item->operand_count(); ++i) {
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 353b30bc69..ccedda2a03 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -313,11 +313,17 @@ class HloComputation {
           replacements,
       HloModule* module = nullptr, const string& suffix = "clone");
 
-  // Returns true if the given instruction can be removed from the
-  // computation. Instructions such as parameters and send/receive instructions
-  // cannot be removed without violating invariants of the HLO computation or
-  // module with the exception of fusion computation.  A parameter instruction
-  // is removable for a fusion computation.
+  // Returns true if the given instruction can be removed from the computation.
+  // Parameter instructions cannot be removed without violating invariants of
+  // the HLO computation with the exception of fusion computation. A parameter
+  // instruction is removable for a fusion computation.
+  //
+  // Note that IsRemovable() is a necessariy condition to remove an instruction
+  // rather than a sufficient condition. For example, instructions with
+  // side-effect (e.g., Send, Infeed) may be removed from a computation, but the
+  // transformation must guarantee the invariants relevant to the instructions
+  // still hold (e.g., Send and Recv must be removed together to make each
+  // channel complete).
   bool IsRemovable(const HloInstruction* instruction);
 
   // Returns true if this computation has a side effect. A computation has a
diff --git a/tensorflow/compiler/xla/service/hlo_dce.cc b/tensorflow/compiler/xla/service/hlo_dce.cc
index 40e67c8780..1e5f0f797a 100644
--- a/tensorflow/compiler/xla/service/hlo_dce.cc
+++ b/tensorflow/compiler/xla/service/hlo_dce.cc
@@ -55,7 +55,8 @@ StatusOr<bool> HloDCE::Run(HloModule* module) {
     for (auto* instruction : computation->instructions()) {
       if (instruction->user_count() == 0 &&
           live_instructions.count(instruction) == 0 &&
-          computation->IsRemovable(instruction)) {
+          computation->IsRemovable(instruction) &&
+          !instruction->HasSideEffect()) {
         dead_roots.push_back(instruction);
       }
     }
diff --git a/tensorflow/compiler/xla/service/hlo_dce_test.cc b/tensorflow/compiler/xla/service/hlo_dce_test.cc
index d54b9a2708..5a56607a66 100644
--- a/tensorflow/compiler/xla/service/hlo_dce_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dce_test.cc
@@ -70,6 +70,26 @@ TEST_F(HloDceTest, NoDeadCode) {
   EXPECT_EQ(3, computation->instruction_count());
 }
 
+TEST_F(HloDceTest, InstructionsWithSideEffect) {
+  // Verify that side-effect instructions (Send in this test) are not removed.
+  auto builder = HloComputation::Builder(TestName());
+  auto constant = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(42.0f)));
+  builder.AddInstruction(
+      HloInstruction::CreateSend(constant, /*channel_id=*/0));
+  builder.AddInstruction(HloInstruction::CreateTuple({}));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+
+  EXPECT_EQ(3, computation->instruction_count());
+
+  HloDCE dce;
+  EXPECT_FALSE(dce.Run(module.get()).ValueOrDie());
+
+  EXPECT_EQ(3, computation->instruction_count());
+}
+
 TEST_F(HloDceTest, DeadParameters) {
   // Verify that dead parameters are not removed, but use of the dead parameters
   // are.
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index b38ee907d7..b2fd64a4d9 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -289,7 +289,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
   // Don't try this transformation if the while loop isn't removable, since if
   // it succeeds ultimately we're going to have to replace the old while loop
   // with a new one.
-  if (!while_op->parent()->IsRemovable(while_op)) {
+  if (!while_op->parent()->IsRemovable(while_op) || while_op->HasSideEffect()) {
     VLOG(2) << "Can't remove dead parameters from non-removable while op.";
     return false;
   }
@@ -558,7 +558,7 @@ static StatusOr<bool> TryRemoveWhileLoop(HloInstruction* while_op) {
   // the loop aren't removed, just cloned and added back to the loop.
   // Nevertheless our infrastructure sees loop simplification as removal of
   // these nodes and currently doesn't allow it.
-  if (!while_op->parent()->IsRemovable(while_op)) {
+  if (!while_op->parent()->IsRemovable(while_op) || while_op->HasSideEffect()) {
     VLOG(2) << "Not attempting to remove while loop it is not removable: "
             << while_op->ToShortString();
     return false;
-- 
GitLab


From b5683d210834fd314410ea4b9c1a756b473fdece Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 14:12:19 -0800
Subject: [PATCH 0363/1225] Change description of
 sparse_column_with_integerized_feature to make consistent with the
 _SparseColumn that it creates.

Documentation here says that the bucket_size must be an int that is greater than 1. The check performed when creating a _SparseColumn only requires that the bucket_size be at least 1.  Hence, bucket_size==1 should be ok.

PiperOrigin-RevId: 177215556
---
 tensorflow/contrib/layers/python/layers/feature_column.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py
index 226d933d85..092d418c3f 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@@ -521,7 +521,7 @@ def sparse_column_with_integerized_feature(column_name,
 
   Args:
     column_name: A string defining sparse column name.
-    bucket_size: An int that is > 1. The number of buckets. It should be bigger
+    bucket_size: An int that is >= 1. The number of buckets. It should be bigger
       than maximum feature. In other words features in this column should be an
       int64 in range [0, bucket_size)
     combiner: A string specifying how to reduce if the sparse column is
@@ -539,7 +539,7 @@ def sparse_column_with_integerized_feature(column_name,
     An integerized _SparseColumn definition.
 
   Raises:
-    ValueError: bucket_size is not greater than 1.
+    ValueError: bucket_size is less than 1.
     ValueError: dtype is not integer.
   """
   return _SparseColumnIntegerized(
-- 
GitLab


From bb33903b4b34e4ac096908c1a08cf5ffa33b6ccf Mon Sep 17 00:00:00 2001
From: Olivia Nordquist <nolivia@google.com>
Date: Tue, 28 Nov 2017 14:17:25 -0800
Subject: [PATCH 0364/1225] add parentheses because this test is failing in my
 current CL

PiperOrigin-RevId: 177216384
---
 tensorflow/python/framework/ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index e929cc8abf..371eadcd13 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -1537,7 +1537,7 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase):
       self.assertEqual(future.calls, 1)
     else:
       a = constant_op.constant(1.0)
-      b = future
+      b = future()
       with ops.control_dependencies([a, b]):
         c = constant_op.constant(3.0)
       self.assertEqual(future.calls, 1)
-- 
GitLab


From e917bf7131b3216f7d09c0251d27a9aafd5b8373 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 14:36:12 -0800
Subject: [PATCH 0365/1225] [TF] Improve LogSoftMax performance

In Eigen, eval should be called immediately before a
broadcast. Otherwise, broadcast's lazy evaluation causes the
broadcasted expression to be evaluated many times. Moving the eval
changes the number of calls to log from batch_size * num_classes to
batch_size.

PiperOrigin-RevId: 177219486
---
 tensorflow/core/kernels/softmax_op_functor.h | 33 +++++++++-----------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/tensorflow/core/kernels/softmax_op_functor.h b/tensorflow/core/kernels/softmax_op_functor.h
index 1f38bdce8c..d3a267ed87 100644
--- a/tensorflow/core/kernels/softmax_op_functor.h
+++ b/tensorflow/core/kernels/softmax_op_functor.h
@@ -64,23 +64,21 @@ struct SoftmaxEigenImpl {
     one_by_class.set(1, num_classes);
 #endif
     // shifted_logits = logits - max(logits along classes);
-    auto shifted_logits = (logits -
-                           logits.maximum(along_class)
-                               .eval()
-                               .reshape(batch_by_one)
-                               .broadcast(one_by_class));
+    auto shifted_logits = (logits - logits.maximum(along_class)
+                                        .eval()
+                                        .reshape(batch_by_one)
+                                        .broadcast(one_by_class));
     if (log) {
       // Calculate the log of the softmax
       // softmax = logits - max(logits along classes);
       softmax.device(d) = shifted_logits;
       // softmax = softmax - log(sum(exp(softmax along classes)));
-      softmax.device(d) = (softmax -
-                           softmax.exp()
-                               .sum(along_class)
-                               .eval()
-                               .reshape(batch_by_one)
-                               .log()
-                               .broadcast(one_by_class));
+      softmax.device(d) = (softmax - softmax.exp()
+                                         .sum(along_class)
+                                         .log()
+                                         .eval()
+                                         .reshape(batch_by_one)
+                                         .broadcast(one_by_class));
     } else {
       // NOTE(touts): If you modify this implementation please run
       // the BM_ImageNetSoftmaxFwd benchmark in nn_ops_test.cc.
@@ -88,12 +86,11 @@ struct SoftmaxEigenImpl {
       // softmax = exp(logits - max(logits along classes));
       softmax.device(d) = shifted_logits.exp();
       // softmax = softmax * (1 / sum(softmax along classes));
-      softmax.device(d) = (softmax *
-                           softmax.sum(along_class)
-                               .inverse()
-                               .eval()
-                               .reshape(batch_by_one)
-                               .broadcast(one_by_class));
+      softmax.device(d) = (softmax * softmax.sum(along_class)
+                                         .inverse()
+                                         .eval()
+                                         .reshape(batch_by_one)
+                                         .broadcast(one_by_class));
     }
   }
 };
-- 
GitLab


From f252ea2d8ac13dd5c558e3862b3885585d3bccfe Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Tue, 28 Nov 2017 14:48:22 -0800
Subject: [PATCH 0366/1225] Deprecating
 `tf.data.Dataset.from_sparse_tensor_slices`.

PiperOrigin-RevId: 177221417
---
 tensorflow/python/data/ops/BUILD          | 1 +
 tensorflow/python/data/ops/dataset_ops.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD
index 05acfe4de7..695d3ef790 100644
--- a/tensorflow/python/data/ops/BUILD
+++ b/tensorflow/python/data/ops/BUILD
@@ -21,6 +21,7 @@ py_library(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:tensor_util",
+        "//tensorflow/python:util",
         "//tensorflow/python/data/util:nest",
         "//tensorflow/python/data/util:sparse",
         "//third_party/py/numpy",
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index dbe29c087a..b5a8622306 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -41,6 +41,7 @@ from tensorflow.python.ops import gen_io_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import sparse_ops
+from tensorflow.python.util import deprecation
 
 
 class Dataset(object):
@@ -219,6 +220,7 @@ class Dataset(object):
     return TensorSliceDataset(tensors)
 
   @staticmethod
+  @deprecation.deprecated(None, "Use `tf.data.Dataset.from_tensor_slices()`.")
   def from_sparse_tensor_slices(sparse_tensor):
     """Splits each rank-N `tf.SparseTensor` in this dataset row-wise.
 
-- 
GitLab


From efe5658aaa6f1666d4967880311430a70bdb23b9 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Tue, 28 Nov 2017 14:50:43 -0800
Subject: [PATCH 0367/1225] Make 'input_map' argument to import_graph_def work
 with C API.

PiperOrigin-RevId: 177221757
---
 tensorflow/python/framework/importer.py      | 88 +++++++++++++++-----
 tensorflow/python/framework/importer_test.py | 29 ++-----
 2 files changed, 74 insertions(+), 43 deletions(-)

diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index 434cbda7ad..73c35de578 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -179,12 +179,11 @@ def _ProcessInputMapParam(input_map):
 
 def _ProcessReturnElementsParam(return_elements):
   """Type-checks and possibly canonicalizes `return_elements`."""
-  if return_elements is not None:
-    return_elements = tuple(return_elements)
-    if not all(isinstance(x, compat.bytes_or_text_types)
-               for x in return_elements):
-      raise TypeError('return_elements must be a list of strings.')
-  return return_elements
+  if return_elements is None: return None
+  if not all(isinstance(x, compat.bytes_or_text_types)
+             for x in return_elements):
+    raise TypeError('return_elements must be a list of strings.')
+  return tuple(compat.as_str(x) for x in return_elements)
 
 
 def _FindAttrInOpDef(attr_name, op_def):
@@ -194,16 +193,60 @@ def _FindAttrInOpDef(attr_name, op_def):
   return None
 
 
-def _PopulateTFImportGraphDefOptions(options, prefix, return_elements):
+def _ConvertInputMapValues(name, input_map):
+  """Ensures all input map values are tensors.
+
+  This should be called from inside the import name scope.
+
+  Args:
+    name: the `name` argument passed to import_graph_def
+    input_map: the `input_map` argument passed to import_graph_def.
+
+  Returns:
+    An possibly-updated version of `input_map`.
+
+  Raises:
+    ValueError: if input map values cannot be converted due to empty name scope.
+  """
+  if not all(isinstance(v, ops.Tensor) for v in input_map.values()):
+    if name == '':  # pylint: disable=g-explicit-bool-comparison
+      raise ValueError(
+          'tf.import_graph_def() requires a non-empty `name` if `input_map` '
+          'contains non-Tensor values. Try calling tf.convert_to_tensor() on '
+          '`input_map` values before calling tf.import_graph_def().')
+    with ops.name_scope('_inputs'):
+      input_map = {k: ops.convert_to_tensor(v) for k, v in input_map.items()}
+  return input_map
+
+
+def _PopulateTFImportGraphDefOptions(options, prefix, input_map,
+                                     return_elements):
   """Populates the TF_ImportGraphDefOptions `options`."""
   c_api.TF_ImportGraphDefOptionsSetPrefix(options, prefix)
 
+  for input_src, input_dst in input_map.items():
+    input_src = compat.as_str(input_src)
+    if input_src.startswith('^'):
+      src_name = compat.as_bytes(input_src[1:])
+      dst_op = input_dst._as_tf_output().oper  # pylint: disable=protected-access
+      c_api.TF_ImportGraphDefOptionsRemapControlDependency(options, src_name,
+                                                           dst_op)
+    else:
+      src_name, src_idx = _ParseTensorName(input_src)
+      src_name = compat.as_str(src_name)
+      dst_output = input_dst._as_tf_output()  # pylint: disable=protected-access
+      c_api.TF_ImportGraphDefOptionsAddInputMapping(options, src_name,
+                                                    src_idx, dst_output)
   for name in return_elements or []:
     if ':' in name:
       op_name, index = _ParseTensorName(name)
+      op_name = compat.as_str(op_name)
       c_api.TF_ImportGraphDefOptionsAddReturnOutput(options, op_name, index)
     else:
-      c_api.TF_ImportGraphDefOptionsAddReturnOperation(options, name)
+      c_api.TF_ImportGraphDefOptionsAddReturnOperation(options,
+                                                       compat.as_str(name))
+
+  # TODO(skyewm): control dependencies
 
 
 def _ProcessNewOps(graph):
@@ -312,17 +355,27 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
       else:
         prefix = ''
 
+      # Generate any input map tensors inside name scope
+      input_map = _ConvertInputMapValues(name, input_map)
+
     scoped_options = c_api_util.ScopedTFImportGraphDefOptions()
     options = scoped_options.options
-    _PopulateTFImportGraphDefOptions(options, prefix, return_elements)
+    _PopulateTFImportGraphDefOptions(options, prefix, input_map,
+                                     return_elements)
 
     with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized:
-      with errors.raise_exception_on_not_ok_status() as status:
-        results = c_api.TF_GraphImportGraphDefWithResults(
-            graph._c_graph, serialized, options, status)  # pylint: disable=protected-access
+      try:
+        with errors.raise_exception_on_not_ok_status() as status:
+          results = c_api.TF_GraphImportGraphDefWithResults(
+              graph._c_graph, serialized, options, status)  # pylint: disable=protected-access
+      except errors.InvalidArgumentError as e:
+        # Convert to ValueError for backwards compatibility.
+        raise ValueError(str(e))
 
     _ProcessNewOps(graph)
 
+    # TODO(skyewm): error if unused input map key
+
     if return_elements is None:
       return None
     else:
@@ -359,16 +412,7 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
       # more nuanced.
       g.graph_def_versions.CopyFrom(graph_def.versions)
 
-      if not all(isinstance(v, ops.Tensor) for v in input_map.values()):
-        if not scope:
-          # The caller must have passed `name=''`.
-          raise ValueError(
-              'tf.import_graph_def() requires a non-empty `name` if `input_map`'
-              ' contains non-Tensor values. Try calling tf.convert_to_tensor() '
-              'on `input_map` values before calling tf.import_graph_def().')
-        with ops.name_scope('_inputs'):
-          input_map = {k: ops.convert_to_tensor(v)
-                       for k, v in input_map.items()}
+      input_map = _ConvertInputMapValues(name, input_map)
 
       # NOTE(mrry): We do this in two passes, because there may be a cycle in
       # `graph_def`.
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index 5a6187c8a6..000a88bc09 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -201,8 +201,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(outer_inner_c.name, "outer/inner/c_1")
 
   def testInputMap(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       feed_b_1 = constant_op.constant(1, dtype=dtypes.int32)
@@ -230,8 +228,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(d.inputs[1], feed_b_1)
 
   def testInputMapBytes(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       feed_b_1 = constant_op.constant(1, dtype=dtypes.int32)
@@ -259,8 +255,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(d.inputs[1], feed_b_1)
 
   def testInputMapUnicode(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       feed_b_1 = constant_op.constant(1, dtype=dtypes.int32)
@@ -299,8 +293,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(b.inputs[0], a.outputs[0])
 
   def testInputMapImplicitZerothOutput(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       b, = importer.import_graph_def(
@@ -453,8 +445,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertTrue("Input tensor 'A:0' not found" in str(e.exception))
 
   def testMissingInputOpInGraphDefButAppearsInInputMap(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(5.0)
       b, = importer.import_graph_def(
@@ -589,19 +579,20 @@ class ImportGraphDefTest(test.TestCase):
       self.assertTrue("not found in graph_def: [A:2]" in str(e.exception))
 
   def testInputMapTypeMismatch(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
+    if ops._USE_C_API:
+      error_msg = ("Input 0 of node import/B was passed float from Const:0 "
+                   "incompatible with expected int32.")
+    else:
+      error_msg = ("Cannot convert a tensor of type float32 to an input of "
+                   "type int32.")
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             node { name: 'B' op: 'IntInput' input: 'A:0' }
             """),
             input_map={"A:0": constant_op.constant(5.0)})
-      self.assertTrue(
-          "Cannot convert a tensor of type float32 to an input of type int32."
-          in str(e.exception))
 
   def testNoReturns(self):
     with ops.Graph().as_default() as g:
@@ -825,8 +816,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual("graph_def must be a GraphDef proto.", str(e.exception))
 
   def testInvalidInputForInputMap(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       with self.assertRaises(TypeError) as e:
         importer.import_graph_def(
@@ -967,7 +956,7 @@ class ImportGraphDefTest(test.TestCase):
     self.assertEqual(2, len(ops_with_two_inputs))
 
   def testGradient(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API: return  # TODO(skyewm): get_shape() doesn't work
 
     with ops.Graph().as_default() as g:
       inputs = array_ops.placeholder(
@@ -1226,8 +1215,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(z_val, -2.0)
 
   def testImportGraphWithFunctionTwice(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     g = ops.Graph()
     with g.as_default():
       @function.Defun()
-- 
GitLab


From 9306dd922fde7b739c5a4230fdc6d9bd646fb71c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 15:02:11 -0800
Subject: [PATCH 0368/1225] Add bool value type support for gauge metrics.

PiperOrigin-RevId: 177223509
---
 .../core/lib/monitoring/collected_metrics.h   |  1 +
 .../core/lib/monitoring/collection_registry.h |  6 ++++
 tensorflow/core/lib/monitoring/gauge.h        | 33 +++++++++++++++++--
 tensorflow/core/lib/monitoring/gauge_test.cc  | 22 +++++++++++++
 tensorflow/core/lib/monitoring/metric_def.h   | 13 +++++---
 5 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/lib/monitoring/collected_metrics.h b/tensorflow/core/lib/monitoring/collected_metrics.h
index fbef25619f..acdb0d86ed 100644
--- a/tensorflow/core/lib/monitoring/collected_metrics.h
+++ b/tensorflow/core/lib/monitoring/collected_metrics.h
@@ -88,6 +88,7 @@ struct Point {
   ValueType value_type;
   int64 int64_value;
   string string_value;
+  bool bool_value;
   HistogramProto histogram_value;
 
   // start_timestamp and end_timestamp indicate the time period over which this
diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h
index 113d37e07d..2c8e250c56 100644
--- a/tensorflow/core/lib/monitoring/collection_registry.h
+++ b/tensorflow/core/lib/monitoring/collection_registry.h
@@ -224,6 +224,12 @@ inline void CollectValue(const string& value, Point* const point) {
   point->string_value = value;
 }
 
+template <>
+inline void CollectValue(const bool& value, Point* const point) {
+  point->value_type = ValueType::kBool;
+  point->bool_value = value;
+}
+
 template <>
 inline void CollectValue(const HistogramProto& value, Point* const point) {
   point->value_type = ValueType::kHistogram;
diff --git a/tensorflow/core/lib/monitoring/gauge.h b/tensorflow/core/lib/monitoring/gauge.h
index 75471cfb22..ec978a9193 100644
--- a/tensorflow/core/lib/monitoring/gauge.h
+++ b/tensorflow/core/lib/monitoring/gauge.h
@@ -86,8 +86,29 @@ class GaugeCell<int64> {
   TF_DISALLOW_COPY_AND_ASSIGN(GaugeCell);
 };
 
+// Explicit specialization of GaugeCell<bool>. Compared to the primary
+// template, it uses atomic values as opposed to mutex. This class is
+// thread-safe.
+template <>
+class GaugeCell<bool> {
+ public:
+  explicit GaugeCell(bool value) : value_(value) {}
+  ~GaugeCell() {}
+
+  // Atomically sets the value.
+  void Set(bool value);
+
+  // Retrieves the current value.
+  bool value() const;
+
+ private:
+  std::atomic<bool> value_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(GaugeCell);
+};
+
 // A stateful class for updating a gauge-like metric. Allowed ValueType are
-// int64 and string.
+// int64, string and bool.
 //
 // This class encapsulates a set of values (or a single value for a label-less
 // metric). Each value is identified by a tuple of labels. The class allows the
@@ -117,6 +138,9 @@ class Gauge {
   //
   // auto* integer_gauge = Gauge<int64, 0>::New("/tensorflow/integer_gauge",
   //   "Integer gauge")
+  //
+  // auto* bool_gauge = Gauge<bool, 0>::New("/tensorflow/bool_gauge",
+  //   "Bool gauge")
   template <typename... MetricDefArgs>
   static Gauge* New(MetricDefArgs&&... metric_def_args);
 
@@ -172,12 +196,17 @@ inline void GaugeCell<int64>::Set(int64 value) { value_ = value; }
 
 inline int64 GaugeCell<int64>::value() const { return value_; }
 
+inline void GaugeCell<bool>::Set(bool value) { value_ = value; }
+
+inline bool GaugeCell<bool>::value() const { return value_; }
+
 template <typename ValueType, int NumLabels>
 template <typename... MetricDefArgs>
 Gauge<ValueType, NumLabels>* Gauge<ValueType, NumLabels>::New(
     MetricDefArgs&&... metric_def_args) {
   static_assert(std::is_same<ValueType, int64>::value ||
-                    std::is_same<ValueType, string>::value,
+                    std::is_same<ValueType, string>::value ||
+                    std::is_same<ValueType, bool>::value,
                 "Gauge only allows int64 and string types.");
   return new Gauge<ValueType, NumLabels>(
       MetricDef<MetricKind::kGauge, ValueType, NumLabels>(
diff --git a/tensorflow/core/lib/monitoring/gauge_test.cc b/tensorflow/core/lib/monitoring/gauge_test.cc
index f98cfe2a3b..c8f673db38 100644
--- a/tensorflow/core/lib/monitoring/gauge_test.cc
+++ b/tensorflow/core/lib/monitoring/gauge_test.cc
@@ -87,6 +87,28 @@ TEST(GaugeOfStringValue, GetCell) {
   EXPECT_EQ("bar", same_cell->value());
 }
 
+auto* bool_gauge =
+    Gauge<bool, 0>::New("/tensorflow/test/bool_gauge", "Gauge of bool value.");
+
+TEST(GaugeOfBoolValue, InitializedWithFalseValue) {
+  EXPECT_EQ(false, bool_gauge->GetCell()->value());
+}
+
+TEST(GaugeOfBoolValue, GetCell) {
+  auto* cell = bool_gauge->GetCell();
+  EXPECT_EQ(false, cell->value());
+
+  cell->Set(true);
+  EXPECT_EQ(true, cell->value());
+
+  auto* same_cell = bool_gauge->GetCell();
+  EXPECT_EQ(true, cell->value());
+
+  same_cell->Set(false);
+  EXPECT_EQ(false, cell->value());
+  EXPECT_EQ(false, same_cell->value());
+}
+
 }  // namespace
 }  // namespace monitoring
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h
index a7f14f9c94..f046842618 100644
--- a/tensorflow/core/lib/monitoring/metric_def.h
+++ b/tensorflow/core/lib/monitoring/metric_def.h
@@ -28,16 +28,16 @@ namespace monitoring {
 // The different metric kinds available.
 //
 // Gauge indicates that the metric's values are instantaneous measurements of a
-// (typically) continuously varying quantity or a string value. Examples: a
-// process's current heap size, a queue's current length, the name of the binary
-// used by a process.
+// (typically) continuously varying value. Examples: a process's current heap
+// size, a queue's current length, the name of the binary used by a process,
+// whether a task is complete.
 //
 // Cumulative indicates that the metric's values represent non-negative changes
 // over specified time periods. Example: the number of rpc calls to a service.
 enum class MetricKind : int { kGauge = 0, kCumulative };
 
 // The type of the metric values.
-enum class ValueType : int { kInt64 = 0, kHistogram, kString };
+enum class ValueType : int { kInt64 = 0, kHistogram, kString, kBool };
 
 // Everything in the internal namespace is implementation details. Do not depend
 // on this.
@@ -61,6 +61,11 @@ inline ValueType GetValueType<string>() {
   return ValueType::kString;
 }
 
+template <>
+inline ValueType GetValueType<bool>() {
+  return ValueType::kBool;
+}
+
 }  // namespace internal
 
 // Abstract base class for a metric definition.
-- 
GitLab


From 8966a794411bd5d17e5ef024a96140f85a9ab500 Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Tue, 28 Nov 2017 15:14:30 -0800
Subject: [PATCH 0369/1225] Add a log test for bfloat16.

PiperOrigin-RevId: 177225564
---
 tensorflow/compiler/xla/tests/bfloat16_test.cc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/compiler/xla/tests/bfloat16_test.cc b/tensorflow/compiler/xla/tests/bfloat16_test.cc
index a1c53ef2aa..ac3f3f4c9d 100644
--- a/tensorflow/compiler/xla/tests/bfloat16_test.cc
+++ b/tensorflow/compiler/xla/tests/bfloat16_test.cc
@@ -61,6 +61,15 @@ XLA_TEST_F(Bfloat16Test, ScalarOperation) {
                                 error_spec_);
 }
 
+XLA_TEST_F(Bfloat16Test, LogOperation) {
+  ComputationBuilder builder(client_, TestName());
+  auto x = builder.ConstantR0<bfloat16>(static_cast<bfloat16>(4.0f));
+  builder.Log(x);
+
+  ComputeAndCompareR0<bfloat16>(&builder, static_cast<bfloat16>(1.387f), {},
+                                error_spec_);
+}
+
 XLA_TEST_F(Bfloat16Test, NegateScalarF16) {
   ComputationBuilder builder(client_, TestName());
   builder.Neg(builder.ConstantR0<bfloat16>(static_cast<bfloat16>(2.1f)));
-- 
GitLab


From d72e2a318c6b15d800aa1468dc2af658ea40dffd Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Tue, 28 Nov 2017 15:16:16 -0800
Subject: [PATCH 0370/1225] scatter_nd_update for resource variables

PiperOrigin-RevId: 177225812
---
 .../api_def_ResourceScatterNdUpdate.pbtxt     | 69 ++++++++++++++++++
 tensorflow/core/framework/common_shape_fns.cc |  7 +-
 tensorflow/core/kernels/BUILD                 |  6 +-
 tensorflow/core/kernels/scatter_nd_op.cc      | 63 ++++++++++++++---
 tensorflow/core/ops/state_ops.cc              | 56 +++++++++++++++
 tensorflow/python/kernel_tests/BUILD          |  1 +
 .../kernel_tests/scatter_nd_ops_test.py       | 15 ++++
 tensorflow/python/ops/state_ops.py            | 70 ++++++++++++++++++-
 8 files changed, 274 insertions(+), 13 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt

diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt
new file mode 100644
index 0000000000..b07ee9fda9
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt
@@ -0,0 +1,69 @@
+op {
+  graph_op_name: "ResourceScatterNdUpdate"
+  in_arg {
+    name: "ref"
+    description: <<END
+A resource handle. Must be from a VarHandleOp.
+END
+  }
+  in_arg {
+    name: "indices"
+    description: <<END
+A Tensor. Must be one of the following types: int32, int64.
+A tensor of indices into ref.
+END
+  }
+  in_arg {
+    name: "updates"
+    description: <<END
+A Tensor. Must have the same type as ref. A tensor of updated
+values to add to ref.
+END
+  }
+  attr {
+    name: "use_locking"
+    description: <<END
+An optional bool. Defaults to True. If True, the assignment will
+be protected by a lock; otherwise the behavior is undefined,
+but may exhibit less contention.
+END
+  }
+  summary: "Applies sparse `updates` to individual values or slices within a given"
+  description: <<END
+variable according to `indices`.
+
+`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+
+`indices` must be integer tensor, containing indices into `ref`.
+It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+
+The innermost dimension of `indices` (with length `K`) corresponds to
+indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+dimension of `ref`.
+
+`updates` is `Tensor` of rank `Q-1+P-K` with shape:
+
+```
+[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+```
+
+For example, say we want to update 4 scattered elements to a rank-1 tensor to
+8 elements. In Python, that update would look like this:
+
+```python
+    ref = tfe.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+    indices = tf.constant([[4], [3], [1] ,[7]])
+    updates = tf.constant([9, 10, 11, 12])
+    update = tf.scatter_nd_update(ref, indices, updates)
+    with tf.Session() as sess:
+      print sess.run(update)
+```
+
+The resulting update to ref would look like this:
+
+    [1, 11, 3, 10, 9, 6, 7, 12]
+
+See @{tf.scatter_nd} for more details about how to make updates to
+slices.
+END
+}
diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index ea66863bed..be7f2e2808 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -1307,6 +1307,9 @@ Status ValidateSparseTensor(InferenceContext* c, ShapeHandle indices_shape,
 
 Status ScatterNdUpdateShape(InferenceContext* c) {
   ShapeHandle input_shape = c->input(0);
+  if (c->input_handle_shapes_and_types(0) != nullptr) {
+    input_shape = (*c->input_handle_shapes_and_types(0))[0].shape;
+  }
   ShapeHandle indices_shape;
   TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &indices_shape));
   ShapeHandle updates_shape;
@@ -1361,7 +1364,9 @@ Status ScatterNdUpdateShape(InferenceContext* c) {
     }
   }
 
-  c->set_output(0, input_shape);
+  if (c->input_handle_shapes_and_types(0) == nullptr) {
+    c->set_output(0, input_shape);
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index b86739eea7..eff15e809a 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -3918,7 +3918,11 @@ tf_kernel_library(
         "scatter_nd_op.h",
         "scatter_nd_op_gpu.cu.cc",
     ],
-    deps = STATE_DEPS + [":dense_update_functor"],
+    deps = STATE_DEPS + [
+        ":dense_update_functor",
+        ":training_op_helpers",
+        ":variable_ops",
+    ],
 )
 
 tf_kernel_library(
diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index 484932ab01..98c0181afb 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #endif  // GOOGLE_CUDA
 
 #include "tensorflow/core/kernels/scatter_nd_op.h"
+
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -28,6 +29,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/dense_update_functor.h"
 #include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/kernels/training_op_helpers.h"
+#include "tensorflow/core/kernels/variable_ops.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
@@ -83,7 +86,10 @@ class ScatterNdUpdateOp : public OpKernel {
     const DataType dt = DataTypeToEnum<T>::v();
     const DataType dt_ref = DataTypeToEnum<T>::ref();
     const DataType index_t = DataTypeToEnum<Index>::v();
-    if (IsRefType(c->input_type(0))) {
+    dtype_ = c->input_type(0);
+    if (c->input_type(0) == DT_RESOURCE) {
+      // TODO(apassos): what to validate here?
+    } else if (IsRefType(c->input_type(0))) {
       OP_REQUIRES_OK(c, c->MatchSignature({dt_ref, index_t, dt}, {dt_ref}));
       OP_REQUIRES_OK(c, c->GetAttr("use_locking", &use_exclusive_lock_));
     } else {
@@ -93,7 +99,16 @@ class ScatterNdUpdateOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* c) override {
-    if (use_exclusive_lock_) {
+    if (dtype_ == DT_RESOURCE) {
+      if (use_exclusive_lock_) {
+        Var* v;
+        OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v));
+        mutex_lock m(*v->mu());
+        DoCompute(c);
+      } else {
+        DoCompute(c);
+      }
+    } else if (use_exclusive_lock_) {
       // If we're here, it means the input type is a ref.
       DCHECK(IsRefType(c->input_dtype(0)));
       // Hold mutex while we apply updates
@@ -105,6 +120,7 @@ class ScatterNdUpdateOp : public OpKernel {
   }
 
  private:
+  DataType dtype_;
   bool use_exclusive_lock_;
 
   void DoCompute(OpKernelContext* c) {
@@ -113,7 +129,20 @@ class ScatterNdUpdateOp : public OpKernel {
     Tensor params;
     TensorShape params_shape;
 
-    if (IsRefType(c->input_dtype(0))) {
+    if (dtype_ == DT_RESOURCE) {
+      Var* v;
+      OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v));
+      Tensor* t = v->tensor();
+      if (!use_exclusive_lock_) {
+        // We're not holding the lock in the outer scope so need it here.
+        mutex_lock m(*v->mu());
+        OP_REQUIRES_OK(c, PrepareToUpdateVariable<Device, T>(c, t));
+      } else {
+        OP_REQUIRES_OK(c, PrepareToUpdateVariable<Device, T>(c, t));
+      }
+      params = *t;
+      params_shape = params.shape();
+    } else if (IsRefType(c->input_dtype(0))) {
       params = c->mutable_input(0, use_exclusive_lock_);
       params_shape = params.shape();
       c->forward_ref_input_to_ref_output(0, 0);
@@ -159,6 +188,16 @@ class ScatterNdUpdateOp : public OpKernel {
           .TypeConstraint<index_type>("Tindices"),                           \
       ScatterNdUpdateOp<dev##Device, type, index_type, op>)
 
+#define REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL_INDEX(type, index_type, \
+                                                         dev, name, op)    \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name(name)                                                           \
+          .Device(DEVICE_##dev)                                            \
+          .TypeConstraint<type>("T")                                       \
+          .TypeConstraint<index_type>("Tindices")                          \
+          .HostMemory("ref"),                                              \
+      ScatterNdUpdateOp<dev##Device, type, index_type, op>)
+
 #define REGISTER_SCATTER_ND_KERNEL(type, dev, name)         \
   REGISTER_SCATTER_ND_KERNEL_INDEX(type, int32, dev, name); \
   REGISTER_SCATTER_ND_KERNEL_INDEX(type, int64, dev, name)
@@ -167,6 +206,11 @@ class ScatterNdUpdateOp : public OpKernel {
   REGISTER_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int32, dev, name, op); \
   REGISTER_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int64, dev, name, op)
 
+#define REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL(type, dev, name, op)    \
+  REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int32, dev, name, \
+                                                   op);                    \
+  REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int64, dev, name, op)
+
 #define REGISTER_SCATTER_ND_ADD_SUB(type, dev)                            \
   REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdAdd",            \
                                     scatter_nd_op::UpdateOp::ADD);        \
@@ -178,9 +222,11 @@ class ScatterNdUpdateOp : public OpKernel {
 #define REGISTER_SCATTER_ND(type, dev) \
   REGISTER_SCATTER_ND_KERNEL(type, dev, "ScatterNd");
 
-#define REGISTER_SCATTER_ND_UPDATE(type, dev)                     \
-  REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdUpdate", \
-                                    scatter_nd_op::UpdateOp::ASSIGN);
+#define REGISTER_SCATTER_ND_UPDATE(type, dev)                         \
+  REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdUpdate",     \
+                                    scatter_nd_op::UpdateOp::ASSIGN); \
+  REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL(                         \
+      type, dev, "ResourceScatterNdUpdate", scatter_nd_op::UpdateOp::ASSIGN);
 
 // Registers CPU kernels.
 #define REGISTER_SCATTER_ND_ADD_SUB_CPU(type) \
@@ -281,8 +327,7 @@ Status ValidateUpdateShape(const TensorShape& params_shape,
 }
 
 template <typename Index>
-Status PrepareAndValidateInputs(OpKernelContext* c,
-                                const TensorShape& params_shape,
+Status PrepareAndValidateInputs(const TensorShape& params_shape,
                                 const Tensor& indices, const Tensor& updates,
                                 int64* slice_dim, Index* num_updates,
                                 Index* slice_size) {
@@ -396,7 +441,7 @@ Status DoScatterNd(OpKernelContext* c, const Tensor& indices,
   Index num_updates;
   Index slice_size;
   TF_RETURN_IF_ERROR(PrepareAndValidateInputs<Index>(
-      c, shape, indices, updates, &slice_dim, &num_updates, &slice_size));
+      shape, indices, updates, &slice_dim, &num_updates, &slice_size));
 
   IndexFlattener<Device, Index> index_flattener;
   auto indices_flat = index_flattener(c, indices);
diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc
index da5f091e9f..5b1f5d2477 100644
--- a/tensorflow/core/ops/state_ops.cc
+++ b/tensorflow/core/ops/state_ops.cc
@@ -513,6 +513,62 @@ output_ref: Same as ref. Returned as a convenience for operations that want to
   use the updated values after the update is done.
 )doc");
 
+REGISTER_OP("ResourceScatterNdUpdate")
+    .Input("ref: resource")
+    .Input("indices: Tindices")
+    .Input("updates: T")
+    .Attr("T: type")
+    .Attr("Tindices: {int32, int64}")
+    .Attr("use_locking: bool = true")
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape)
+    .Doc(R"doc(
+Applies sparse `updates` to individual values or slices within a given
+variable according to `indices`.
+
+`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+
+`indices` must be integer tensor, containing indices into `ref`.
+It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+
+The innermost dimension of `indices` (with length `K`) corresponds to
+indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+dimension of `ref`.
+
+`updates` is `Tensor` of rank `Q-1+P-K` with shape:
+
+```
+[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+```
+
+For example, say we want to update 4 scattered elements to a rank-1 tensor to
+8 elements. In Python, that update would look like this:
+
+```python
+    ref = tfe.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+    indices = tf.constant([[4], [3], [1] ,[7]])
+    updates = tf.constant([9, 10, 11, 12])
+    update = tf.scatter_nd_update(ref, indices, updates)
+    with tf.Session() as sess:
+      print sess.run(update)
+```
+
+The resulting update to ref would look like this:
+
+    [1, 11, 3, 10, 9, 6, 7, 12]
+
+See @{tf.scatter_nd} for more details about how to make updates to
+slices.
+
+ref: A resource handle. Must be from a VarHandleOp.
+indices: A Tensor. Must be one of the following types: int32, int64.
+  A tensor of indices into ref.
+updates: A Tensor. Must have the same type as ref. A tensor of updated
+  values to add to ref.
+use_locking: An optional bool. Defaults to True. If True, the assignment will
+  be protected by a lock; otherwise the behavior is undefined,
+  but may exhibit less contention.
+)doc");
+
 REGISTER_OP("ScatterNdAdd")
     .Input("ref: Ref(T)")
     .Input("indices: Tindices")
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 4522520ee4..f15b3baabe 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -676,6 +676,7 @@ cuda_py_test(
         "//tensorflow/python:gradients",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:variables",
+        "//tensorflow/python:resource_variable_ops",
     ],
     tags = ["noasan"],  # http://b/32635055
 )
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index a79d66e988..d7bde04230 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -157,6 +158,20 @@ class StatefulScatterNdTest(test.TestCase):
       result = sess.run(scatter)
       self.assertAllClose(result, expected)
 
+  def testSimpleResource(self):
+    indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32)
+    updates = constant_op.constant([9, 10, 11, 12], dtype=dtypes.float32)
+    ref = resource_variable_ops.ResourceVariable(
+        [0, 0, 0, 0, 0, 0, 0, 0], dtype=dtypes.float32)
+    expected = np.array([0, 11, 0, 10, 9, 0, 0, 12])
+    scatter = state_ops.scatter_nd_update(ref, indices, updates)
+    init = variables.global_variables_initializer()
+
+    with self.test_session(use_gpu=True) as sess:
+      sess.run(init)
+      sess.run(scatter)
+      self.assertAllClose(ref.eval(), expected)
+
   def testSimple2(self):
     indices = constant_op.constant([[1, 0], [1, 1]], dtype=dtypes.int32)
     updates = constant_op.constant([11., 12.], dtype=dtypes.float32)
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
index dfc657893c..dee495f78f 100644
--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@@ -347,5 +347,71 @@ def scatter_update(ref, indices, updates, use_locking=True, name=None):
   if ref.dtype._is_ref_dtype:
     return gen_state_ops.scatter_update(ref, indices, updates,
                                         use_locking=use_locking, name=name)
-  return gen_resource_variable_ops.resource_scatter_update(
-      ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype), name=name)
+  with ops.control_dependencies(
+      [gen_resource_variable_ops.resource_scatter_update(
+          ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype),
+          name=name)]):
+    return ref.read_value()
+
+
+def scatter_nd_update(ref, indices, updates, use_locking=True, name=None):
+  r"""Applies sparse `updates` to individual values or slices in a Variable.
+
+  `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+
+  `indices` must be integer tensor, containing indices into `ref`.
+  It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+
+  The innermost dimension of `indices` (with length `K`) corresponds to
+  indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+  dimension of `ref`.
+
+  `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+
+  ```
+  [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+  ```
+
+  For example, say we want to update 4 scattered elements to a rank-1 tensor to
+  8 elements. In Python, that update would look like this:
+
+  ```python
+      ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+      indices = tf.constant([[4], [3], [1] ,[7]])
+      updates = tf.constant([9, 10, 11, 12])
+      update = tf.scatter_nd_update(ref, indices, updates)
+      with tf.Session() as sess:
+        print sess.run(update)
+  ```
+
+  The resulting update to ref would look like this:
+
+      [1, 11, 3, 10, 9, 6, 7, 12]
+
+  See @{tf.scatter_nd} for more details about how to make updates to
+  slices.
+
+  Args:
+    ref: A Variable.
+    indices: A `Tensor`. Must be one of the following types: `int32`, `int64`.
+      A Tensor. Must be one of the following types: int32, int64.
+      A tensor of indices into ref.
+    updates: A `Tensor`. Must have the same type as `ref`.
+      A Tensor. Must have the same type as ref. A tensor of updated
+      values to add to ref.
+    use_locking: An optional `bool`. Defaults to `True`.
+      An optional bool. Defaults to True. If True, the assignment will
+      be protected by a lock; otherwise the behavior is undefined,
+      but may exhibit less contention.
+    name: A name for the operation (optional).
+
+  Returns:
+    The value of the variable after the update.
+  """
+  if ref.dtype._is_ref_dtype:
+    return gen_state_ops.scatter_nd_update(
+        ref, indices, updates, use_locking, name)
+  with ops.control_dependencies([gen_state_ops.resource_scatter_nd_update(
+      ref.handle, indices, ops.convert_to_tensor(updates, dtype=ref.dtype),
+      use_locking, name)]):
+    return ref.read_value()
-- 
GitLab


From a99e9a2c56a4922e76c367b8d3a9c43ea0a4ef61 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Tue, 28 Nov 2017 15:27:57 -0800
Subject: [PATCH 0371/1225] Support tfe.Network.losses

Supports only variable regularization losses when executing eagerly. They are
stored as zero-argument lambdas and executed when the property is requested.

PiperOrigin-RevId: 177227550
---
 tensorflow/contrib/eager/python/BUILD         |  1 +
 tensorflow/contrib/eager/python/network.py    | 24 +++++-
 .../contrib/eager/python/network_test.py      | 29 +++++++
 tensorflow/python/layers/base.py              | 85 ++++++++++++-------
 tensorflow/python/layers/base_test.py         |  5 ++
 5 files changed, 114 insertions(+), 30 deletions(-)

diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index bf2e883bc5..55d768044b 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -232,6 +232,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":network",
+        "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_test_lib",
diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py
index 0388aaa849..e3c13cbd2e 100644
--- a/tensorflow/contrib/eager/python/network.py
+++ b/tensorflow/contrib/eager/python/network.py
@@ -451,8 +451,30 @@ class Network(base.Layer):
         "at https://github.com/tensorflow/tensorflow/issues/new if this is "
         "important to you")
 
+  def add_loss(self, losses, inputs=None):
+    raise RuntimeError(
+        "add_loss is not supported in Network class yet. Please file an issue "
+        "at https://github.com/tensorflow/tensorflow/issues/new if this is "
+        "important to you")
+
+  @property
+  def losses(self):
+    """Gather losses from `Layer`s in the `Network`.
+
+    Note that when executing eagerly, `Layer.losses` evaluates
+    regularizers. When using graph execution, variable regularization ops have
+    already been created and are simply returned here.
+
+    Returns:
+      A list of tensors.
+    """
+    layer_losses = []
+    for layer in self.layers:
+      layer_losses.extend(layer.losses)
+    return layer_losses
+
   # TODO(allenl): Support other Layer methods needed for graph mode, such as for
-  # losses and updates
+  # updates
 
 
 class Sequential(Network):
diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py
index e7835a63e6..3eb4f5f8b3 100644
--- a/tensorflow/contrib/eager/python/network_test.py
+++ b/tensorflow/contrib/eager/python/network_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 import gc
 
 from tensorflow.contrib.eager.python import network
+from tensorflow.contrib.layers.python.layers import regularizers
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function
 from tensorflow.python.eager import test
@@ -45,6 +46,22 @@ class MyNetwork(network.Network):
     return self.l1(x)
 
 
+class RegularizedNetwork(network.Network):
+
+  def __init__(self):
+    super(RegularizedNetwork, self).__init__()
+    self.l1 = self.track_layer(core.Dense(
+        1,
+        bias_regularizer=regularizers.l1_regularizer(2.0),
+        kernel_regularizer=regularizers.l1_regularizer(2.0)))
+    self.l2 = self.track_layer(core.Dense(
+        1,
+        bias_regularizer=regularizers.l1_regularizer(2.0)))
+
+  def call(self, values):
+    return self.l2(self.l1(values))
+
+
 class NetworkTest(test.TestCase):
 
   def _save_modify_load_network_built(self, net, global_step=None):
@@ -484,6 +501,18 @@ class NetworkTest(test.TestCase):
       _check_op_prefixes(expected_prefix="my_network_1/dense/",
                          checked_ops=checked_ops)
 
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testVariableRegularizers(self):
+    net = RegularizedNetwork()
+    net(constant_op.constant([[1.]]))
+    self.evaluate(net.variables[0].assign([[2.]]))
+    self.evaluate(net.variables[1].assign([3.]))
+    self.evaluate(net.variables[2].assign([[-2.]]))
+    self.evaluate(net.variables[3].assign([4.]))
+    self.assertAllEqual([4., 6., 8.], self.evaluate(net.losses))
+    self.evaluate(net.variables[3].assign([5.]))
+    self.assertAllEqual([4., 6., 10.], self.evaluate(net.losses))
+
   @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
   def testDuplicateNameError(self):
     one = constant_op.constant([[1.]])
diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 6be2bc3e76..c083f8a5d2 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -103,10 +103,16 @@ class Layer(object):
     self.built = False
     self.input_spec = None
 
+    if activity_regularizer and context.in_eager_mode():
+      raise ValueError(
+          ('Activity regularization is not supported when executing eagerly. '
+           'Got activity_regularizer=%s') % (activity_regularizer,))
     self._activity_regularizer = activity_regularizer
     self._trainable_weights = []
     self._non_trainable_weights = []
     self._updates = []
+    # When executing eagerly, _losses is a list of zero-argument lambdas which
+    # return tensors. When using graph execution, _losses is a list of ops.
     self._losses = []
     self._reuse = kwargs.get('_reuse')
     self._graph = ops.get_default_graph()
@@ -287,9 +293,22 @@ class Layer(object):
 
   @property
   def losses(self):
+    """Losses which are associated with this `Layer`.
+
+    Note that when executing eagerly, getting this property evaluates
+    regularizers. When using graph execution, variable regularization ops have
+    already been created and are simply returned here.
+
+    Returns:
+      A list of tensors.
+    """
     if context.in_eager_mode():
-      raise RuntimeError('Layer.losses not supported in Eager mode.')
-    return self._losses
+      # _losses may only contain variable regularization losses when executing
+      # eagerly, and they have been saved as lambdas to be executed when
+      # requested.
+      return [regularizer() for regularizer in self._losses]
+    else:
+      return self._losses
 
   def add_loss(self, losses, inputs=None):
     """Add loss tensor(s), potentially dependent on layer inputs.
@@ -303,6 +322,11 @@ class Layer(object):
     The `get_losses_for` method allows to retrieve the losses relevant to a
     specific set of inputs.
 
+    Note that `add_loss` is not supported when executing eagerly. Instead,
+    variable regularizers may be added through `add_variable`. Activity
+    regularization is not supported directly (but such losses may be returned
+    from `Layer.call()`).
+
     Arguments:
       losses: Loss tensor, or list/tuple of tensors.
       inputs: Optional input tensor(s) that the loss(es) depend on. Must
@@ -462,16 +486,8 @@ class Layer(object):
     Raises:
       RuntimeError: If called in Eager mode with regularizers.
     """
-    # Note that we currently don't support variable regularization in Eager
-    # mode. An alternative is for users to directly compute these losses before
-    # performing a backward pass.
     if context.in_graph_mode():
       existing_variables = set(tf_variables.global_variables())
-    else:
-      existing_variables = []
-      if regularizer is not None:
-        raise RuntimeError('Variable regularization not supported in Eager '
-                           'mode.')
     if dtype is None:
       dtype = self.dtype or dtypes.float32
 
@@ -486,28 +502,39 @@ class Layer(object):
                                    constraint=constraint,
                                    trainable=trainable and self.trainable,
                                    partitioner=partitioner)
-        if (context.in_graph_mode() and trainable and self.trainable
-            and variable not in tf_variables.trainable_variables()):
-          # A custom getter / variable scope overrode the trainable flag.
-          trainable = False
-        if variable in existing_variables:
-          return variable
-        if regularizer:
-          # To match the behavior of tf.get_variable(), we only
-          # apply regularization if the variable is newly created.
-          if isinstance(variable, tf_variables.PartitionedVariable):
-            for v in variable:
-              with ops.colocate_with(v.op):
+        if context.in_graph_mode():
+          if (trainable and self.trainable
+              and variable not in tf_variables.trainable_variables()):
+            # A custom getter / variable scope overrode the trainable flag.
+            trainable = False
+          if variable in existing_variables:
+            return variable
+          if regularizer:
+            # To match the behavior of tf.get_variable(), we only
+            # apply regularization if the variable is newly created.
+            if isinstance(variable, tf_variables.PartitionedVariable):
+              for v in variable:
+                with ops.colocate_with(v.op):
+                  with ops.name_scope(name + '/Regularizer'):
+                    regularization = regularizer(v)
+                if regularization is not None:
+                  self.add_loss(regularization)
+            else:
+              with ops.colocate_with(variable.op):
                 with ops.name_scope(name + '/Regularizer'):
-                  regularization = regularizer(v)
+                  regularization = regularizer(variable)
               if regularization is not None:
                 self.add_loss(regularization)
-          else:
-            with ops.colocate_with(variable.op):
-              with ops.name_scope(name + '/Regularizer'):
-                regularization = regularizer(variable)
-            if regularization is not None:
-              self.add_loss(regularization)
+        elif regularizer:
+          if isinstance(variable, tf_variables.PartitionedVariable):
+            raise RuntimeError(
+                'Partitioned variable regularization is not yet supported when '
+                'executing eagerly. File a feature request is this is '
+                'important to you.')
+          # Save a zero-argument lambda which runs the regularizer on the
+          # variable, to be executed when `Layer.losses` is requested. This
+          # makes losses responsive to variable updates when executing eagerly.
+          self._losses.append(lambda: regularizer(variable))
     if trainable:
       self._trainable_weights.append(variable)
     else:
diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py
index 1eea20deef..3e5a51eb62 100644
--- a/tensorflow/python/layers/base_test.py
+++ b/tensorflow/python/layers/base_test.py
@@ -88,6 +88,11 @@ class BaseLayerTest(test.TestCase):
           regularizer=regularizer)
       self.assertEqual(len(layer.losses), 1)
 
+  def testNoEagerActivityRegularizer(self):
+    with context.eager_mode():
+      with self.assertRaisesRegexp(ValueError, 'activity_regularizer'):
+        core_layers.Dense(1, activity_regularizer=lambda *args, **kwargs: 0.)
+
   def testGetVariable(self):
     with self.test_session():
 
-- 
GitLab


From a6ee905de83834c35e7cf01182270309ec2425f3 Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Tue, 28 Nov 2017 15:31:17 -0800
Subject: [PATCH 0372/1225] Add non_trainable_variables to templates. Add
 aliases for weights, trainable_weights and non_trainable_weights.

PiperOrigin-RevId: 177228107
---
 .../python/kernel_tests/template_test.py      | 37 +++++++++++++++++--
 tensorflow/python/ops/template.py             | 37 ++++++++++++++++++-
 2 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/template_test.py b/tensorflow/python/kernel_tests/template_test.py
index 40c0ade62a..f0354374ac 100644
--- a/tensorflow/python/kernel_tests/template_test.py
+++ b/tensorflow/python/kernel_tests/template_test.py
@@ -34,9 +34,10 @@ from tensorflow.python.platform import test
 from tensorflow.python.training import gradient_descent
 
 
-def variable_scoped_function():
+def variable_scoped_function(trainable=True):
   return variable_scope.get_variable(
-      "dummy", shape=[1], initializer=init_ops.zeros_initializer())
+      "dummy", shape=[1], trainable=trainable,
+      initializer=init_ops.zeros_initializer())
 
 
 def internally_variable_scoped_function(scope_name):
@@ -413,7 +414,7 @@ class TemplateTest(test.TestCase):
     self.assertEqual(custom_getter_count[0], 2)
 
     # Test that custom getter is called when the variable scope is created
-  # during construction
+    # during construction
     custom_getter_count[0] = 0
     tmpl2 = template.make_template(
         "s2",
@@ -539,6 +540,36 @@ class TemplateTest(test.TestCase):
     # Ensure we can get the scopes before either template is actually called.
     self.assertEqual(1, len(ta.trainable_variables))
     self.assertEqual(1, len(tb.trainable_variables))
+    # None non-trainable variable was created.
+    self.assertEqual([], list(ta.non_trainable_variables))
+    self.assertEqual([], list(tb.non_trainable_variables))
+    # Ensure variables returns all the variables.
+    self.assertEqual(1, len(ta.variables))
+    self.assertEqual(1, len(tb.variables))
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_non_trainable_variables(self):
+    # Make sure non_trainable_variables are created.
+    with variable_scope.variable_scope("foo2"):
+      ta = template.make_template("a", variable_scoped_function,
+                                  trainable=True)
+      tb = template.make_template("b", variable_scoped_function,
+                                  trainable=False)
+    # Initially there are not variables created.
+    self.assertEqual([], list(ta.variables))
+    self.assertEqual([], list(tb.variables))
+    # After calling there are variables created.
+    ta()
+    tb()
+    # Check the trainable and non_trainable variables.
+    self.assertEqual(1, len(ta.trainable_variables))
+    self.assertEqual([], list(ta.non_trainable_variables))
+
+    self.assertEqual([], list(tb.trainable_variables))
+    self.assertEqual(1, len(tb.non_trainable_variables))
+    # Ensure variables returns all the variables.
+    self.assertEqual(1, len(ta.variables))
+    self.assertEqual(1, len(tb.variables))
 
   # TODO(apassos) handle local variables in Eager
   def test_local_variables(self):
diff --git a/tensorflow/python/ops/template.py b/tensorflow/python/ops/template.py
index 98578b799a..07796b28d9 100644
--- a/tensorflow/python/ops/template.py
+++ b/tensorflow/python/ops/template.py
@@ -307,6 +307,12 @@ class Template(object):
       # To prevent partial matches on the scope_name, we add '/' at the end.
       return name if name[-1] == "/" else name + "/"
 
+  @property
+  def variables(self):
+    """Returns the list of global and local variables created by the Template.
+    """
+    return self.global_variables + self.local_variables
+
   @property
   def trainable_variables(self):
     """Returns the list of trainable variables created by the Template."""
@@ -316,6 +322,14 @@ class Template(object):
     else:
       return []
 
+  @property
+  def non_trainable_variables(self):
+    """Returns the list of non-trainable variables created by the Template."""
+    # TODO(apassos) Make sure it matches Eager when using local variables.
+    global_variables = self.global_variables
+    trainable_variables = set(self.trainable_variables)
+    return [x for x in global_variables if x not in trainable_variables]
+
   @property
   def global_variables(self):
     """Returns the list of global variables created by the Template."""
@@ -334,6 +348,21 @@ class Template(object):
     else:
       return []
 
+  @property
+  def weights(self):
+    """List of weights/variables created by the Template."""
+    return self.variables
+
+  @property
+  def trainable_weights(self):
+    """List of trainable weights/variables created by the Template."""
+    return self.trainable_variables
+
+  @property
+  def non_trainable_weights(self):
+    """List of non-trainable weights/variables created by the Template."""
+    return self.non_trainable_variables
+
   @property
   @deprecated(
       "2017-02-21", "The .var_scope property is deprecated. Please change your "
@@ -501,7 +530,7 @@ class EagerTemplate(Template):
 
   @property
   def variables(self):
-    """Returns the list of trainable variables created by the Template."""
+    """Returns the list of variables created by the Template."""
     # Currently there is no local variable in Eager mode.
     return self._eager_variable_store.variables()
 
@@ -511,6 +540,12 @@ class EagerTemplate(Template):
     # Currently there is no local variable in Eager mode.
     return self._eager_variable_store.trainable_variables()
 
+  @property
+  def non_trainable_variables(self):
+    """Returns the list of non-trainable variables created by the Template."""
+    # Currently there is no local variable in Eager mode.
+    return self._eager_variable_store.non_trainable_variables()
+
   @property
   def global_variables(self):
     """Returns the list of global variables created by the Template."""
-- 
GitLab


From d8de0d979e9b9dacb20ebf425d54bbc98ed65fad Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Tue, 28 Nov 2017 15:31:21 -0800
Subject: [PATCH 0373/1225] Fixing the windows nightly build.

PiperOrigin-RevId: 177228112
---
 tensorflow/contrib/cmake/tf_core_cpu.cmake       | 2 +-
 tensorflow/contrib/cmake/tf_core_framework.cmake | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/cmake/tf_core_cpu.cmake b/tensorflow/contrib/cmake/tf_core_cpu.cmake
index 5c01ca382f..e4213ea2a4 100644
--- a/tensorflow/contrib/cmake/tf_core_cpu.cmake
+++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake
@@ -63,7 +63,7 @@ if (tensorflow_ENABLE_GPU)
   file(GLOB_RECURSE tf_core_gpu_srcs
     "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/*.cc"
     "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu/cupti_wrapper.cc"
-    "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu_tracer.cc"
+    "${tensorflow_source_dir}/tensorflow/core/platform/default/device_tracer.cc"
     "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu_device_factory.cc"
     "${tensorflow_source_dir}/tensorflow/core/grappler/devices.h"
     "${tensorflow_source_dir}/tensorflow/core/grappler/devices.cc"
diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
index c607546f4a..5ec1a8d04f 100644
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake
@@ -211,7 +211,7 @@ if (NOT tensorflow_ENABLE_GPU)
   list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_gpu_srcs})
 else()
   file(GLOB tf_core_platform_srcs_exclude
-      "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu_tracer.cc")
+      "${tensorflow_source_dir}/tensorflow/core/platform/default/device_tracer.cc")
   list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_srcs_exclude})
 endif()
 
-- 
GitLab


From 5f1b61b5c851409c76015c908d127fbc2f886013 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Tue, 28 Nov 2017 15:37:52 -0800
Subject: [PATCH 0374/1225] Check per HLO instruction only at vlog=1 in non-opt
 build.

PiperOrigin-RevId: 177229069
---
 tensorflow/compiler/xla/service/hlo_rematerialization.cc | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index 017f996bc4..d09de7b528 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -566,7 +566,9 @@ Status MemoryUsageTracker::BeginInstruction(Item* item) {
   VLOG(3) << "  memory usage = " << memory_usage_;
   VLOG(10) << ToString();
 
-  DCHECK(Check());
+  if (VLOG_IS_ON(1)) {
+    DCHECK(Check());
+  }
   return Status::OK();
 }
 
@@ -603,8 +605,9 @@ Status MemoryUsageTracker::EndInstruction() {
   VLOG(3) << "  memory usage = " << memory_usage_;
   VLOG(10) << ToString();
 
-  DCHECK(Check());
-
+  if (VLOG_IS_ON(1)) {
+    DCHECK(Check());
+  }
   return Status::OK();
 }
 
-- 
GitLab


From b8969d12f9260a7b1981b8d22788aa1f8c8cbbb6 Mon Sep 17 00:00:00 2001
From: Martin Wicke <wicke@google.com>
Date: Tue, 28 Nov 2017 15:44:48 -0800
Subject: [PATCH 0375/1225] Mark Supervisor deprecated. Please use
 MonitoredTrainingSession instead.

Fixes #6263.

PiperOrigin-RevId: 177230053
---
 tensorflow/python/training/monitored_session.py | 1 -
 tensorflow/python/training/supervisor.py        | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index e931555470..f1cb81981a 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@@ -52,7 +52,6 @@ _PREEMPTION_ERRORS = (errors.AbortedError, errors.UnavailableError)
 USE_DEFAULT = object()
 
 
-# TODO(touts): Share that with the Supervisor.
 class Scaffold(object):
   """Structure to create or gather pieces commonly needed to train a model.
 
diff --git a/tensorflow/python/training/supervisor.py b/tensorflow/python/training/supervisor.py
index a634a842b6..e4514aaea2 100644
--- a/tensorflow/python/training/supervisor.py
+++ b/tensorflow/python/training/supervisor.py
@@ -36,11 +36,15 @@ from tensorflow.python.training import coordinator
 from tensorflow.python.training import saver as saver_mod
 from tensorflow.python.training import session_manager as session_manager_mod
 from tensorflow.python.training import training_util
+from tensorflow.python.util import deprecation
 
 
 class Supervisor(object):
   """A training helper that checkpoints models and computes summaries.
 
+  This class is deprecated. Please use
+  ${tf.train.MonitoredTrainingSession} instead.
+
   The Supervisor is a small wrapper around a `Coordinator`, a `Saver`,
   and a `SessionManager` that takes care of common needs of TensorFlow
   training programs.
@@ -198,6 +202,8 @@ class Supervisor(object):
   # the default behavior should be used.
   USE_DEFAULT = 0
 
+  @deprecation.deprecated(None,
+                          "Please switch to tf.train.MonitoredTrainingSession")
   def __init__(self,
                graph=None,
                ready_op=USE_DEFAULT,
-- 
GitLab


From 5a1e22b753225a7fa14f4ae60c06cf50bce6b9a6 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Tue, 28 Nov 2017 15:45:09 -0800
Subject: [PATCH 0376/1225] Remove temp_workaround_http_archive.

PiperOrigin-RevId: 177230105
---
 tensorflow/workspace.bzl       | 51 +++++-----------------------------
 third_party/aws.BUILD          | 16 +++++------
 third_party/curl.BUILD         | 46 +++++++++++++++---------------
 third_party/gif.BUILD          |  2 +-
 third_party/jemalloc.BUILD     | 10 +++----
 third_party/jpeg/jpeg.BUILD    |  2 +-
 third_party/mkl/build_defs.bzl |  1 -
 third_party/nccl.BUILD         |  8 +++---
 third_party/snappy.BUILD       |  4 +--
 9 files changed, 51 insertions(+), 89 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index cb77f96be5..68d663acfc 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -57,33 +57,6 @@ def check_version(bazel_version):
       fail("\nCurrent Bazel version is {}, expected at least {}\n".format(
           native.bazel_version, bazel_version))
 
-def _repos_are_siblings():
-  return Label("@foo//bar").workspace_root.startswith("../")
-
-# Temporary workaround to support including TensorFlow as a submodule until this
-# use-case is supported in the next Bazel release.
-def _temp_workaround_http_archive_impl(repo_ctx):
-  repo_ctx.template("BUILD", repo_ctx.attr.build_file, {
-      "%prefix%": ".." if _repos_are_siblings() else "external",
-      "%ws%": repo_ctx.attr.repository
-  }, False)
-  repo_ctx.download_and_extract(repo_ctx.attr.urls, "", repo_ctx.attr.sha256,
-                                "", repo_ctx.attr.strip_prefix)
-  if repo_ctx.attr.patch_file != None:
-    _apply_patch(repo_ctx, repo_ctx.attr.patch_file)
-
-temp_workaround_http_archive = repository_rule(
-    attrs = {
-        "build_file": attr.label(),
-        "repository": attr.string(),
-        "patch_file": attr.label(default = None),
-        "urls": attr.string_list(default = []),
-        "sha256": attr.string(default = ""),
-        "strip_prefix": attr.string(default = ""),
-    },
-    implementation = _temp_workaround_http_archive_impl,
-)
-
 # Executes specified command with arguments and calls 'fail' if it exited with
 # non-zero code
 def _execute_and_check_ret_code(repo_ctx, cmd_and_args):
@@ -121,8 +94,6 @@ def _patched_http_archive_impl(repo_ctx):
 patched_http_archive = repository_rule(
     attrs = {
         "patch_file": attr.label(),
-        "build_file": attr.label(),
-        "repository": attr.string(),
         "urls": attr.string_list(default = []),
         "sha256": attr.string(default = ""),
         "strip_prefix": attr.string(default = ""),
@@ -157,7 +128,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "57ba56c4c243f403ff78f417ff854ef50b9eddf4a610a917b7c95e7fa8553a4b",
       strip_prefix = "mklml_lnx_2018.0.20170720",
       build_file = str(Label("//third_party/mkl:mkl.BUILD")),
-      repository = tf_repo_name,
   )
 
   if path_prefix:
@@ -292,7 +262,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:nasm.BUILD")),
   )
 
-  temp_workaround_http_archive(
+  native.new_http_archive(
       name = "jpeg",
       urls = [
           "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz",
@@ -301,7 +271,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "c15a9607892113946379ccea3ca8b85018301b200754f209453ab21674268e77",
       strip_prefix = "libjpeg-turbo-1.5.1",
       build_file = str(Label("//third_party/jpeg:jpeg.BUILD")),
-      repository = tf_repo_name,
   )
 
   native.new_http_archive(
@@ -502,7 +471,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:swig.BUILD")),
   )
 
-  temp_workaround_http_archive(
+  native.new_http_archive(
       name = "curl",
       sha256 = "ff3e80c1ca6a068428726cd7dd19037a47cc538ce58ef61c59587191039b2ca6",
       urls = [
@@ -511,7 +480,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       ],
       strip_prefix = "curl-7.49.1",
       build_file = str(Label("//third_party:curl.BUILD")),
-      repository = tf_repo_name
   )
 
   # grpc expects //external:protobuf_clib and //external:protobuf_compiler
@@ -575,7 +543,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
 
   # TODO(phawkins): currently, this rule uses an unofficial LLVM mirror.
   # Switch to an official source of snapshots if/when possible.
-  temp_workaround_http_archive(
+  native.new_http_archive(
       name = "llvm",
       urls = [
           "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/9ab4c272cb604a7f947865428c4ef2169fee2100.tar.gz",
@@ -584,7 +552,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "1b1b7d3800a94ca2302e3dd670dbe84238749583027883784b55297059d83da8",
       strip_prefix = "llvm-9ab4c272cb604a7f947865428c4ef2169fee2100",
       build_file = str(Label("//third_party/llvm:llvm.BUILD")),
-      repository = tf_repo_name,
   )
 
   native.new_http_archive(
@@ -650,7 +617,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party/fft2d:fft2d.BUILD")),
   )
 
-  temp_workaround_http_archive(
+  native.new_http_archive(
       name = "snappy",
       urls = [
           "https://mirror.bazel.build/github.com/google/snappy/archive/1.1.4.tar.gz",
@@ -659,10 +626,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "2f7504c73d85bac842e893340333be8cb8561710642fc9562fccdd9d2c3fcc94",
       strip_prefix = "snappy-1.1.4",
       build_file = str(Label("//third_party:snappy.BUILD")),
-      repository = tf_repo_name,
   )
 
-  temp_workaround_http_archive(
+  native.new_http_archive(
       name = "nccl_archive",
       urls = [
           "https://mirror.bazel.build/github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz",
@@ -671,10 +637,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176",
       strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7",
       build_file = str(Label("//third_party:nccl.BUILD")),
-      repository = tf_repo_name,
   )
 
-  temp_workaround_http_archive(
+  native.new_http_archive(
       name = "aws",
       urls = [
           "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz",
@@ -683,7 +648,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "f599b57aec4f03ad696044dd430b2d201864113937353adc346f53ad47991319",
       strip_prefix = "aws-sdk-cpp-1.0.90",
       build_file = str(Label("//third_party:aws.BUILD")),
-      repository = tf_repo_name
   )
 
   java_import_external(
@@ -711,7 +675,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       testonly_ = True,
   )
 
-  temp_workaround_http_archive(
+  native.new_http_archive(
       name = "jemalloc",
       urls = [
           "https://mirror.bazel.build/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz",
@@ -720,7 +684,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8",
       strip_prefix = "jemalloc-4.4.0",
       build_file = str(Label("//third_party:jemalloc.BUILD")),
-      repository = tf_repo_name,
   )
 
   java_import_external(
diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD
index bc9e37ffb3..bf5310aa16 100644
--- a/third_party/aws.BUILD
+++ b/third_party/aws.BUILD
@@ -7,21 +7,21 @@ licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-load("@%ws%//third_party:common.bzl", "template_rule")
+load("@org_tensorflow//third_party:common.bzl", "template_rule")
 
 cc_library(
     name = "aws",
     srcs = select({
-        "@%ws%//tensorflow:linux_x86_64": glob([
+        "@org_tensorflow//tensorflow:linux_x86_64": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
-        "@%ws%//tensorflow:darwin": glob([
+        "@org_tensorflow//tensorflow:darwin": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
-        "@%ws%//tensorflow:linux_ppc64le": glob([
+        "@org_tensorflow//tensorflow:linux_ppc64le": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
-        "@%ws%//tensorflow:raspberry_pi_armeabi": glob([
+        "@org_tensorflow//tensorflow:raspberry_pi_armeabi": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
         "//conditions:default": [],
@@ -53,17 +53,17 @@ cc_library(
         "aws-cpp-sdk-core/include/aws/core/SDKConfig.h",
     ],
     defines = select({
-        "@%ws%//tensorflow:linux_x86_64": [
+        "@org_tensorflow//tensorflow:linux_x86_64": [
             "PLATFORM_LINUX",
             "ENABLE_CURL_CLIENT",
             "ENABLE_NO_ENCRYPTION",
         ],
-        "@%ws%//tensorflow:darwin": [
+        "@org_tensorflow//tensorflow:darwin": [
             "PLATFORM_APPLE",
             "ENABLE_CURL_CLIENT",
             "ENABLE_NO_ENCRYPTION",
         ],
-        "@%ws%//tensorflow:linux_ppc64le": [
+        "@org_tensorflow//tensorflow:linux_ppc64le": [
             "PLATFORM_LINUX",
             "ENABLE_CURL_CLIENT",
             "ENABLE_NO_ENCRYPTION",
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index 805a30d262..e311c7e758 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -6,7 +6,7 @@ licenses(["notice"])  # MIT/X derivative license
 exports_files(["COPYING"])
 
 CURL_WIN_COPTS = [
-    "/I%prefix%/curl/lib",
+    "/Iexternal/curl/lib",
     "/DHAVE_CONFIG_H",
     "/DCURL_DISABLE_FTP",
     "/DCURL_DISABLE_NTLM",
@@ -224,14 +224,14 @@ cc_library(
         "lib/wildcard.h",
         "lib/x509asn1.h",
     ] + select({
-        "@%ws%//tensorflow:darwin": [
+        "@org_tensorflow//tensorflow:darwin": [
             "lib/vtls/darwinssl.c",
         ],
-        "@%ws%//tensorflow:ios": [
+        "@org_tensorflow//tensorflow:ios": [
             "lib/vtls/darwinssl.c",
         ],
-        "@%ws%//tensorflow:windows": CURL_WIN_SRCS,
-        "@%ws%//tensorflow:windows_msvc": CURL_WIN_SRCS,
+        "@org_tensorflow//tensorflow:windows": CURL_WIN_SRCS,
+        "@org_tensorflow//tensorflow:windows_msvc": CURL_WIN_SRCS,
         "//conditions:default": [
             "lib/vtls/openssl.c",
         ],
@@ -248,10 +248,10 @@ cc_library(
         "include/curl/typecheck-gcc.h",
     ],
     copts = select({
-        "@%ws%//tensorflow:windows": CURL_WIN_COPTS,
-        "@%ws%//tensorflow:windows_msvc": CURL_WIN_COPTS,
+        "@org_tensorflow//tensorflow:windows": CURL_WIN_COPTS,
+        "@org_tensorflow//tensorflow:windows_msvc": CURL_WIN_COPTS,
         "//conditions:default": [
-            "-I%prefix%/curl/lib",
+            "-Iexternal/curl/lib",
             "-D_GNU_SOURCE",
             "-DHAVE_CONFIG_H",
             "-DCURL_DISABLE_FTP",
@@ -261,14 +261,14 @@ cc_library(
             "-Wno-string-plus-int",
         ],
     }) + select({
-        "@%ws%//tensorflow:darwin": [
+        "@org_tensorflow//tensorflow:darwin": [
             "-fno-constant-cfstrings",
         ],
-        "@%ws%//tensorflow:windows": [
+        "@org_tensorflow//tensorflow:windows": [
             # See curl.h for discussion of write size and Windows
             "/DCURL_MAX_WRITE_SIZE=16384",
         ],
-        "@%ws%//tensorflow:windows_msvc": [
+        "@org_tensorflow//tensorflow:windows_msvc": [
             # See curl.h for discussion of write size and Windows
             "/DCURL_MAX_WRITE_SIZE=16384",
         ],
@@ -278,20 +278,20 @@ cc_library(
     }),
     includes = ["include"],
     linkopts = select({
-        "@%ws%//tensorflow:android": [
+        "@org_tensorflow//tensorflow:android": [
             "-pie",
         ],
-        "@%ws%//tensorflow:darwin": [
+        "@org_tensorflow//tensorflow:darwin": [
             "-Wl,-framework",
             "-Wl,CoreFoundation",
             "-Wl,-framework",
             "-Wl,Security",
         ],
-        "@%ws%//tensorflow:ios": [],
-        "@%ws%//tensorflow:windows": [
+        "@org_tensorflow//tensorflow:ios": [],
+        "@org_tensorflow//tensorflow:windows": [
             "-Wl,ws2_32.lib",
         ],
-        "@%ws%//tensorflow:windows_msvc": [
+        "@org_tensorflow//tensorflow:windows_msvc": [
             "-Wl,ws2_32.lib",
         ],
         "//conditions:default": [
@@ -302,9 +302,9 @@ cc_library(
     deps = [
         "@zlib_archive//:zlib",
     ] + select({
-        "@%ws%//tensorflow:ios": [],
-        "@%ws%//tensorflow:windows": [],
-        "@%ws%//tensorflow:windows_msvc": [],
+        "@org_tensorflow//tensorflow:ios": [],
+        "@org_tensorflow//tensorflow:windows": [],
+        "@org_tensorflow//tensorflow:windows_msvc": [],
         "//conditions:default": [
             "@boringssl//:ssl",
         ],
@@ -312,7 +312,7 @@ cc_library(
 )
 
 CURL_BIN_WIN_COPTS = [
-    "/I%prefix%/curl/lib",
+    "/Iexternal/curl/lib",
     "/DHAVE_CONFIG_H",
     "/DCURL_DISABLE_LIBCURL_OPTION",
 ]
@@ -406,10 +406,10 @@ cc_binary(
         "src/tool_xattr.h",
     ],
     copts = select({
-        "@%ws%//tensorflow:windows": CURL_BIN_WIN_COPTS,
-        "@%ws%//tensorflow:windows_msvc": CURL_BIN_WIN_COPTS,
+        "@org_tensorflow//tensorflow:windows": CURL_BIN_WIN_COPTS,
+        "@org_tensorflow//tensorflow:windows_msvc": CURL_BIN_WIN_COPTS,
         "//conditions:default": [
-            "-I%prefix%/curl/lib",
+            "-Iexternal/curl/lib",
             "-D_GNU_SOURCE",
             "-DHAVE_CONFIG_H",
             "-DCURL_DISABLE_LIBCURL_OPTION",
diff --git a/third_party/gif.BUILD b/third_party/gif.BUILD
index 27808a9d64..78fbd6c0e0 100644
--- a/third_party/gif.BUILD
+++ b/third_party/gif.BUILD
@@ -21,7 +21,7 @@ cc_library(
     ],
     hdrs = ["lib/gif_lib.h"],
     defines = select({
-        #"@%ws%//tensorflow:android": [
+        #"@org_tensorflow//tensorflow:android": [
         ":android": [
             "S_IREAD=S_IRUSR",
             "S_IWRITE=S_IWUSR",
diff --git a/third_party/jemalloc.BUILD b/third_party/jemalloc.BUILD
index a2addf2c66..1b0829b8fe 100644
--- a/third_party/jemalloc.BUILD
+++ b/third_party/jemalloc.BUILD
@@ -5,7 +5,7 @@ licenses(["notice"])  # BSD
 
 exports_files(["COPYING"])
 
-load("@%ws%//third_party:common.bzl", "template_rule")
+load("@org_tensorflow//third_party:common.bzl", "template_rule")
 
 cc_library(
     name = "jemalloc_headers",
@@ -97,10 +97,10 @@ cc_library(
     includes = ["include"],
     # pthread_atfork() is called for PPC.
     linkopts = select({
-        "@%ws%//tensorflow:linux_ppc64le": [
+        "@org_tensorflow//tensorflow:linux_ppc64le": [
             "-lpthread",
         ],
-        "@%ws%//tensorflow:linux_x86_64": [
+        "@org_tensorflow//tensorflow:linux_x86_64": [
             "-lpthread",
         ],
         "//conditions:default": [
@@ -208,8 +208,8 @@ genrule(
     name = "size_classes_h",
     outs = ["include/jemalloc/internal/size_classes.h"],
     cmd = select({
-        "@%ws%//tensorflow:linux_ppc64le": "$(location :size_classes_sh) \"3 4\" 3 16 2 >$@",
-        "@%ws%//tensorflow:linux_x86_64": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
+        "@org_tensorflow//tensorflow:linux_ppc64le": "$(location :size_classes_sh) \"3 4\" 3 16 2 >$@",
+        "@org_tensorflow//tensorflow:linux_x86_64": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
         "//conditions:default": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
     }),
     tools = [":size_classes_sh"],
diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD
index f6078052ec..e431f19382 100644
--- a/third_party/jpeg/jpeg.BUILD
+++ b/third_party/jpeg/jpeg.BUILD
@@ -5,7 +5,7 @@ licenses(["notice"])  # custom notice-style license, see LICENSE.md
 
 exports_files(["LICENSE.md"])
 
-load("@%ws%//third_party:common.bzl", "template_rule")
+load("@org_tensorflow//third_party:common.bzl", "template_rule")
 
 libjpegturbo_nocopts = "-[W]error"
 
diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl
index 533c0766c7..f637873f14 100644
--- a/third_party/mkl/build_defs.bzl
+++ b/third_party/mkl/build_defs.bzl
@@ -60,7 +60,6 @@ mkl_repository = repository_rule(
     ],
     attrs = {
         "build_file": attr.label(),
-        "repository": attr.string(),
         "urls": attr.string_list(default = []),
         "sha256": attr.string(default = ""),
         "strip_prefix": attr.string(default = ""),
diff --git a/third_party/nccl.BUILD b/third_party/nccl.BUILD
index 06b9b8ff68..3a2a3afe46 100644
--- a/third_party/nccl.BUILD
+++ b/third_party/nccl.BUILD
@@ -44,17 +44,17 @@ cc_library(
         "-O3",
     ] + cuda_default_copts(),
     linkopts = select({
-        "@%ws%//tensorflow:android": [
+        "@org_tensorflow//tensorflow:android": [
             "-pie",
         ],
-        "@%ws%//tensorflow:darwin": [
+        "@org_tensorflow//tensorflow:darwin": [
             "-Wl,-framework",
             "-Wl,CoreFoundation",
             "-Wl,-framework",
             "-Wl,Security",
         ],
-        "@%ws%//tensorflow:ios": [],
-        "@%ws%//tensorflow:windows": [
+        "@org_tensorflow//tensorflow:ios": [],
+        "@org_tensorflow//tensorflow:windows": [
             "ws2_32.lib",
         ],
         "//conditions:default": [
diff --git a/third_party/snappy.BUILD b/third_party/snappy.BUILD
index 9c00b7068a..fd48ed8941 100644
--- a/third_party/snappy.BUILD
+++ b/third_party/snappy.BUILD
@@ -50,8 +50,8 @@ genrule(
            "-e 's/@ac_cv_have_stddef_h@/1/g' " +
            "-e 's/@ac_cv_have_stdint_h@/1/g' " +
            select({
-               "@%ws%//tensorflow:windows": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ",
-               "@%ws%//tensorflow:windows_msvc": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ",
+               "@org_tensorflow//tensorflow:windows": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ",
+               "@org_tensorflow//tensorflow:windows_msvc": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ",
                "//conditions:default": "-e 's/@ac_cv_have_sys_uio_h@/1/g' ",
            }) +
            "-e 's/@SNAPPY_MAJOR@/1/g' " +
-- 
GitLab


From 9049b440df17de47baf16d9e24590c3d0761e2c9 Mon Sep 17 00:00:00 2001
From: Andrew Harp <andrewharp@google.com>
Date: Tue, 28 Nov 2017 16:06:37 -0800
Subject: [PATCH 0377/1225] Fix tensorflow-android jcenter link

PiperOrigin-RevId: 177233056
---
 tensorflow/contrib/android/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/android/README.md b/tensorflow/contrib/android/README.md
index f49e5857fe..c7c128bf14 100644
--- a/tensorflow/contrib/android/README.md
+++ b/tensorflow/contrib/android/README.md
@@ -15,9 +15,9 @@ For prebuilt libraries, see the
 page for a recent build.
 
 The TensorFlow Inference Interface is also available as a
-[JCenter package](https://bintray.com/google/tensorflow/tensorflow-android) and
-can be included quite simply in your android project with a couple of lines in
-the project's `build.gradle` file:
+[JCenter package](https://bintray.com/google/tensorflow/tensorflow)
+(see the tensorflow-android directory) and can be included quite simply in your
+android project with a couple of lines in the project's `build.gradle` file:
 
 ```
 allprojects {
-- 
GitLab


From a80fd2acf08ceba0c8fc7684c3013e8e7d6bd8d3 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Tue, 28 Nov 2017 16:11:48 -0800
Subject: [PATCH 0378/1225] C API: fix bug in ValidateNoCycles().

This change makes ValidateNoCycles() work when the graph has unused
node ids (i.e. when Graph::num_nodes() < Graph::num_node_ids()).

PiperOrigin-RevId: 177234002
---
 tensorflow/c/c_api.cc                   | 9 ++++-----
 tensorflow/python/framework/ops_test.py | 8 ++++++++
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index bb41f92306..4fb8ec8e4b 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -383,12 +383,11 @@ void TF_Reset_Helper(const TF_SessionOptions* opt, const char** containers,
 // be less than the total node count.
 Status ValidateNoCycles(const Graph& g) {
   // TODO(nolivia): check this on a subset of the graph instead of all of it.
-  int total_num_nodes = g.num_node_ids();
   // A node is ready when all of its inputs have been visited.
   std::vector<const Node*> ready;
-  std::vector<int> pending_count(total_num_nodes, 0);
+  std::vector<int> pending_count(g.num_node_ids(), 0);
 
-  for (int i = 0; i < total_num_nodes; ++i) {
+  for (int i = 0; i < g.num_node_ids(); ++i) {
     const Node* n = g.FindNodeId(i);
     if (n == nullptr) continue;
     pending_count[i] = n->in_edges().size();
@@ -421,7 +420,7 @@ Status ValidateNoCycles(const Graph& g) {
     }
   }
 
-  if (processed < total_num_nodes) {
+  if (processed < g.num_nodes()) {
     std::vector<string> nodes_in_cycle;
     for (int i = 0; i < pending_count.size() && nodes_in_cycle.size() < 3;
          ++i) {
@@ -430,7 +429,7 @@ Status ValidateNoCycles(const Graph& g) {
       }
     }
     return errors::InvalidArgument(
-        "Graph is invalid, contains a cycle with ", total_num_nodes - processed,
+        "Graph is invalid, contains a cycle with ", g.num_nodes() - processed,
         " nodes, including: ", str_util::Join(nodes_in_cycle, ", "));
   }
   return Status::OK();
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 371eadcd13..3eae3b5a25 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -1876,6 +1876,14 @@ class GraphTest(test_util.TensorFlowTestCase):
     gc.collect()
     self.assertIsNone(g_ref())
 
+  def testRunnableAfterInvalidShape(self):
+    with ops.Graph().as_default():
+      with self.assertRaises(ValueError):
+        math_ops.add([1, 2], [1, 2, 3])
+      a = constant_op.constant(1)
+      with session.Session() as sess:
+        sess.run(a)
+
 
 @test_util.with_c_api
 class AttrScopeTest(test_util.TensorFlowTestCase):
-- 
GitLab


From f22261e61c2359483ad17465161918856bb86e65 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 16:22:17 -0800
Subject: [PATCH 0379/1225] Add depthwise ops for NAS cell in nn_ops_test to
 improve the inference time on the particular depthwise ops.

PiperOrigin-RevId: 177235744
---
 tensorflow/core/kernels/nn_ops_test.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/core/kernels/nn_ops_test.cc b/tensorflow/core/kernels/nn_ops_test.cc
index 0db7c63b8b..a841291ddd 100644
--- a/tensorflow/core/kernels/nn_ops_test.cc
+++ b/tensorflow/core/kernels/nn_ops_test.cc
@@ -653,6 +653,8 @@ BM_ConvFloatDepthwiseFwd(32, 7, 7, 1024, 1, 1024, 3, 3, 1, SAME, conv6);
 // Benchmarks with different stride and padding options.
 BM_ConvFloatDepthwiseFwd(32, 112, 112, 3, 8, 24, 3, 3, 2, SAME, conv7);
 BM_ConvFloatDepthwiseFwd(32, 112, 112, 3, 8, 24, 3, 3, 2, VALID, conv8);
+BM_ConvFloatDepthwiseFwd(1, 100, 100, 72, 1, 72, 3, 3, 1, SAME, conv9);
+BM_ConvFloatDepthwiseFwd(1, 100, 100, 72, 1, 72, 5, 5, 1, SAME, conv10);
 
 #define BM_ConvFloatDepthwiseBk(BS, R, C, ID, DM, OD, KR, KC, STR, PAD, LABEL) \
   static void BM_ConvFloatDepthwiseBkInCPU1_##LABEL(int iters) {               \
-- 
GitLab


From b89251c6300b9941d06071543e5c4974d0db1984 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 28 Nov 2017 16:31:39 -0800
Subject: [PATCH 0380/1225] [TF:XLA] Implement Cumsum and Cumprod using the XLA
 ReduceWindow operator.

PiperOrigin-RevId: 177236996
---
 tensorflow/compiler/tests/BUILD               |  14 ++
 tensorflow/compiler/tests/scan_ops_test.py    | 229 ++++++++++++++++++
 tensorflow/compiler/tf2xla/const_analysis.cc  |   2 +
 tensorflow/compiler/tf2xla/kernels/BUILD      |   1 +
 .../compiler/tf2xla/kernels/scan_ops.cc       | 140 +++++++++++
 tensorflow/compiler/tf2xla/xla_context.cc     |  14 ++
 tensorflow/compiler/tf2xla/xla_context.h      |   8 +
 tensorflow/compiler/tf2xla/xla_op_kernel.cc   |   5 +
 tensorflow/compiler/tf2xla/xla_op_kernel.h    |   5 +
 9 files changed, 418 insertions(+)
 create mode 100644 tensorflow/compiler/tests/scan_ops_test.py
 create mode 100644 tensorflow/compiler/tf2xla/kernels/scan_ops.cc

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 6cad2b0824..fff1a7f57b 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -416,6 +416,20 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "scan_ops_test",
+    size = "small",
+    srcs = ["scan_ops_test.py"],
+    tags = ["optonly"],
+    deps = [
+        ":xla_test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 tf_xla_py_test(
     name = "segment_reduction_ops_test",
     size = "medium",
diff --git a/tensorflow/compiler/tests/scan_ops_test.py b/tensorflow/compiler/tests/scan_ops_test.py
new file mode 100644
index 0000000000..3260e63b23
--- /dev/null
+++ b/tensorflow/compiler/tests/scan_ops_test.py
@@ -0,0 +1,229 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for scan ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.compiler.tests.xla_test import XLATestCase
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+def numpy_reverse(x, axis):
+  length = len(x.shape)
+  if axis < 0:
+    axis = length + axis
+
+  ix = [
+      slice(None, None, -1) if i == axis else slice(None) for i in range(length)
+  ]
+  return x[ix]
+
+
+def handle_options(func, x, axis, exclusive, reverse):
+  """Adds tf options to numpy scan ops."""
+  length = len(x.shape)
+  if axis < 0:
+    axis = length + axis
+
+  if reverse:
+    x = numpy_reverse(x, axis)
+
+  if exclusive:
+    ix_head = [slice(0, 1) if i == axis else slice(None) for i in range(length)]
+    ix_init = [
+        slice(0, -1) if i == axis else slice(None) for i in range(length)
+    ]
+    if func == np.cumsum:
+      init = np.zeros_like(x[ix_head])
+    elif func == np.cumprod:
+      init = np.ones_like(x[ix_head])
+    else:
+      raise ValueError("Unknown scan function.")
+    x = np.concatenate([init, func(x[ix_init], axis)], axis=axis)
+  else:
+    x = func(x, axis=axis)
+
+  if reverse:
+    x = numpy_reverse(x, axis)
+  return x
+
+
+class CumsumTest(XLATestCase):
+
+  valid_dtypes = [np.float32]
+
+  def axis_dtypes(self):
+    return set(self.int_types).intersection([np.int32, np.int64])
+
+  def _compare(self, x, axis, exclusive, reverse):
+    np_out = handle_options(np.cumsum, x, axis, exclusive, reverse)
+    with self.test_session(), self.test_scope():
+      p = array_ops.placeholder(x.dtype)
+      tf_out = math_ops.cumsum(p, axis, exclusive, reverse).eval(
+          feed_dict={p: x})
+
+    self.assertAllClose(np_out, tf_out)
+
+  def _compareAll(self, x, axis):
+    for exclusive in [True, False]:
+      for reverse in [True, False]:
+        self._compare(x, axis, exclusive, reverse)
+
+  def testEmpty(self):
+    for dtype in self.valid_dtypes:
+      x = np.zeros([0]).astype(dtype)
+      for axis in (-1, 0):
+        self._compareAll(x, axis)
+
+  def testAxisType(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 6).reshape([5]).astype(dtype)
+      for axis_dtype in self.axis_dtypes():
+        with self.test_session(), self.test_scope():
+          p = array_ops.placeholder(x.dtype)
+          axis = constant_op.constant(0, axis_dtype)
+          math_ops.cumsum(p, axis).eval(feed_dict={p: x})
+
+  def test1D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 6).reshape([5]).astype(dtype)
+      for axis in (-1, 0):
+        self._compareAll(x, axis)
+
+  def test2D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(0, 10).reshape([2, 5]).astype(dtype)
+      for axis in (-2, -1, 0, 1):
+        self._compareAll(x, axis)
+
+  def test3D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(0, 20).reshape([2, 2, 5]).astype(dtype)
+      for axis in (-3, -2, -1, 0, 1, 2):
+        self._compareAll(x, axis)
+
+  def test6D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 145).reshape([2, 2, 3, 3, 2, 2]).astype(dtype)
+      for axis in range(-6, 6, 3):
+        self._compareAll(x, axis)
+
+  def testInvalidAxis(self):
+    x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
+    with self.test_session(), self.test_scope():
+      input_tensor = ops.convert_to_tensor(x)
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
+        math_ops.cumsum(input_tensor, -3).eval()
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
+        math_ops.cumsum(input_tensor, 2).eval()
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "axis must be a scalar" in str(e)):
+        math_ops.cumsum(input_tensor, [0]).eval()
+
+
+class CumprodTest(XLATestCase):
+
+  valid_dtypes = [np.float32]
+
+  def axis_dtypes(self):
+    return set(self.int_types).intersection([np.int32, np.int64])
+
+  def _compare(self, x, axis, exclusive, reverse):
+    np_out = handle_options(np.cumprod, x, axis, exclusive, reverse)
+    with self.test_session(), self.test_scope():
+      p = array_ops.placeholder(x.dtype)
+      prod = math_ops.cumprod(p, axis, exclusive, reverse)
+      tf_out = prod.eval(feed_dict={p: x})
+
+    self.assertAllClose(np_out, tf_out)
+
+  def _compareAll(self, x, axis):
+    for exclusive in [True, False]:
+      for reverse in [True, False]:
+        self._compare(x, axis, exclusive, reverse)
+
+  def testEmpty(self):
+    for dtype in self.valid_dtypes:
+      x = np.zeros([0]).astype(dtype)
+      for axis in (-1, 0):
+        self._compareAll(x, axis)
+
+  def testAxisType(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 6).reshape([5]).astype(dtype)
+      for axis_dtype in self.axis_dtypes():
+        with self.test_session(), self.test_scope():
+          p = array_ops.placeholder(x.dtype)
+          axis = constant_op.constant(0, axis_dtype)
+          math_ops.cumprod(x, axis).eval(feed_dict={p: x})
+
+  def test1D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 6).reshape([5]).astype(dtype)
+      for axis in (-1, 0):
+        self._compareAll(x, axis)
+
+  def test2D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 11).reshape([2, 5]).astype(dtype)
+      for axis in (-2, -1, 0, 1):
+        self._compareAll(x, axis)
+
+  def test3D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 21).reshape([2, 2, 5]).astype(dtype)
+      for axis in (-3, -2, -1, 0, 1, 2):
+        self._compareAll(x, axis)
+
+  def test6D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 145).reshape([2, 2, 3, 3, 2, 2]).astype(dtype)
+      for axis in range(-6, 6, 3):
+        self._compareAll(x, axis)
+
+  def testInvalidAxis(self):
+    x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
+    with self.test_session(), self.test_scope():
+      input_tensor = ops.convert_to_tensor(x)
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
+        math_ops.cumprod(input_tensor, -3).eval()
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
+        math_ops.cumprod(input_tensor, 2).eval()
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "axis must be a scalar" in str(e)):
+        math_ops.cumprod(input_tensor, [0]).eval()
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc
index d57273d844..6a1a5467e0 100644
--- a/tensorflow/compiler/tf2xla/const_analysis.cc
+++ b/tensorflow/compiler/tf2xla/const_analysis.cc
@@ -52,6 +52,8 @@ Status BackwardsConstAnalysis(const Graph& g,
       {"Conv2DBackpropInput", "input_sizes"},
       {"Conv3DBackpropFilterV2", "filter_sizes"},
       {"Conv3DBackpropInputV2", "input_sizes"},
+      {"Cumprod", "axis"},
+      {"Cumsum", "axis"},
       {"DepthwiseConv2dNativeBackpropFilter", "filter_sizes"},
       {"DepthwiseConv2dNativeBackpropInput", "input_sizes"},
       {"DynamicStitch", "indices"},
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 6302fece1f..a1720ff919 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -54,6 +54,7 @@ tf_kernel_library(
         "reshape_op.cc",
         "retval_op.cc",
         "reverse_op.cc",
+        "scan_ops.cc",
         "segment_reduction_ops.cc",
         "select_op.cc",
         "sendrecv_ops.cc",
diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
new file mode 100644
index 0000000000..3cc9d14411
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
@@ -0,0 +1,140 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <vector>
+
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace {
+
+class ScanOp : public XlaOpKernel {
+ public:
+  ScanOp(OpKernelConstruction* ctx, bool sum) : XlaOpKernel(ctx), sum_(sum) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("reverse", &reverse_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("exclusive", &exclusive_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    const TensorShape input_shape = ctx->InputShape(0);
+    const TensorShape tensor_axis_shape = ctx->InputShape(1);
+
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(tensor_axis_shape),
+                errors::InvalidArgument("ScanOp: axis must be a scalar, not ",
+                                        tensor_axis_shape.DebugString()));
+
+    int64 axis;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &axis));
+    if (axis < 0) {
+      axis += input_shape.dims();
+    }
+    OP_REQUIRES(
+        ctx, FastBoundsCheck(axis, input_shape.dims()),
+        errors::InvalidArgument("ScanOp: Expected scan axis in the range [",
+                                -input_shape.dims(), ", ", input_shape.dims(),
+                                "), but got ", axis));
+
+    DataType dtype = ctx->input_type(0);
+
+    if (input_shape.num_elements() == 0) {
+      // Exit early if there is nothing to compute.
+      ctx->SetOutput(0, ctx->Input(0));
+      return;
+    }
+
+    xla::ComputationBuilder* builder = ctx->builder();
+
+    std::vector<int64> window_strides(input_shape.dims(), 1);
+    std::vector<int64> window_dims(input_shape.dims(), 1);
+    window_dims[axis] = input_shape.dim_size(axis);
+
+    std::vector<std::pair<int64, int64>> padding(input_shape.dims(), {0, 0});
+    padding[axis].first = input_shape.dim_size(axis) - 1;
+    // In exclusive mode, add an extra padding element so there is a complete
+    // window of padding before the data starts.
+    if (exclusive_) {
+      ++padding[axis].first;
+    }
+    if (reverse_) {
+      std::swap(padding[axis].first, padding[axis].second);
+    }
+
+    xla::ComputationDataHandle input = ctx->Input(0);
+    xla::ComputationDataHandle init;
+    const xla::Computation* reducer;
+    if (sum_) {
+      init = XlaHelpers::Zero(builder, dtype);
+      reducer = ctx->GetOrCreateAdd(dtype);
+    } else {
+      init = XlaHelpers::One(builder, dtype);
+      reducer = ctx->GetOrCreateMul(dtype);
+    }
+    auto output = builder->ReduceWindowWithGeneralPadding(
+        ctx->Input(0), init, *reducer, window_dims, window_strides, padding);
+
+    // In exclusive mode, we have computed an extra element containing the sum
+    // of all the input elements. Slice off this extra "last" element.
+    if (exclusive_) {
+      if (reverse_) {
+        output = builder->SliceInDim(output, 1, input_shape.dim_size(axis) + 1,
+                                     1, axis);
+
+      } else {
+        output =
+            builder->SliceInDim(output, 0, input_shape.dim_size(axis), 1, axis);
+      }
+    }
+    ctx->SetOutput(0, output);
+  }
+
+ private:
+  const bool sum_;  // True=cumulative sum. False=cumulative product.
+  bool reverse_;
+  bool exclusive_;
+};
+
+class CumsumOp : public ScanOp {
+ public:
+  explicit CumsumOp(OpKernelConstruction* ctx) : ScanOp(ctx, /*sum=*/true) {}
+};
+// TODO(phawkins): implement non-float windowed reductions in XLA and remove the
+// type constraint.
+REGISTER_XLA_OP(Name("Cumsum").TypeConstraint("T", DT_FLOAT), CumsumOp);
+
+class CumprodOp : public ScanOp {
+ public:
+  explicit CumprodOp(OpKernelConstruction* ctx) : ScanOp(ctx, /*sum=*/false) {}
+};
+// TODO(phawkins): implement non-float windowed reductions in XLA and remove the
+// type constraint.
+REGISTER_XLA_OP(Name("Cumprod").TypeConstraint("T", DT_FLOAT), CumprodOp);
+
+}  // anonymous namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc
index 651bafd6c5..78e770c62b 100644
--- a/tensorflow/compiler/tf2xla/xla_context.cc
+++ b/tensorflow/compiler/tf2xla/xla_context.cc
@@ -178,6 +178,20 @@ const xla::Computation* XlaContext::GetOrCreateAdd(const DataType type) {
   });
 }
 
+const xla::Computation* XlaContext::GetOrCreateMul(const DataType type) {
+  return LookupOrCreate(type, &mul_func_, [this, type] {
+    const string type_string = DataTypeString(type);
+    VLOG(1) << "Building Mul() for " << type_string;
+    xla::ComputationBuilder b(builder()->client(), "mul<" + type_string + ">");
+    xla::PrimitiveType xla_type;
+    TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type));
+    auto x = b.Parameter(0, xla::ShapeUtil::MakeShape(xla_type, {}), "x");
+    auto y = b.Parameter(1, xla::ShapeUtil::MakeShape(xla_type, {}), "y");
+    b.Mul(x, y);
+    return b.Build().ConsumeValueOrDie();
+  });
+}
+
 const xla::Computation* XlaContext::LookupOrCreate(
     DataType type, ComputationMap* out,
     const std::function<xla::Computation()>& create) {
diff --git a/tensorflow/compiler/tf2xla/xla_context.h b/tensorflow/compiler/tf2xla/xla_context.h
index de8aafa362..55d2995987 100644
--- a/tensorflow/compiler/tf2xla/xla_context.h
+++ b/tensorflow/compiler/tf2xla/xla_context.h
@@ -102,6 +102,11 @@ class XlaContext : public ResourceBase {
   // separate specialization of the computation for each DataType.
   const xla::Computation* GetOrCreateAdd(const DataType type);
 
+  // Get an XLA lambda to compute Mul. This is cached in the
+  // XlaContext since it may be used by multiple Ops. There is a
+  // separate specialization of the computation for each DataType.
+  const xla::Computation* GetOrCreateMul(const DataType type);
+
   // The name of the XlaContext resource during symbolic graph execution.
   static const char kXlaContextResourceName[];
 
@@ -155,6 +160,9 @@ class XlaContext : public ResourceBase {
   // Cached computation to compute Sum of two elements, specialized by type.
   ComputationMap add_func_;
 
+  // Cached computation to compute Mul of two elements, specialized by type.
+  ComputationMap mul_func_;
+
   // Cached computation to compute Sigmoid of an element, specialized by type.
   ComputationMap sigmoid_func_;
 
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
index a052bb105e..f16472cac8 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
@@ -417,6 +417,11 @@ const xla::Computation* XlaOpKernelContext::GetOrCreateAdd(
   return XlaContext::Get(context_).GetOrCreateAdd(type);
 }
 
+const xla::Computation* XlaOpKernelContext::GetOrCreateMul(
+    const DataType type) {
+  return XlaContext::Get(context_).GetOrCreateMul(type);
+}
+
 XlaOpKernel::XlaOpKernel(OpKernelConstruction* context) : OpKernel(context) {}
 
 void XlaOpKernel::Compute(OpKernelContext* context) {
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h
index 76bcf594e6..06845a674e 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.h
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h
@@ -210,6 +210,11 @@ class XlaOpKernelContext {
   // separate specialization of the computation for each DataType.
   const xla::Computation* GetOrCreateAdd(const DataType type);
 
+  // Gets an XLA lambda to compute Mul. This is cached in the
+  // XlaContext since it may be used by multiple Ops. There is a
+  // separate specialization of the computation for each DataType.
+  const xla::Computation* GetOrCreateMul(const DataType type);
+
  private:
   OpKernelContext* const context_;
 };
-- 
GitLab


From f2f6356f15f4c8b5c560ee8aec7bf1dd097bfbfb Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Tue, 28 Nov 2017 16:31:57 -0800
Subject: [PATCH 0381/1225] Automated g4 rollback of changelist 177191521

PiperOrigin-RevId: 177237037
---
 tensorflow/core/kernels/strided_slice_op.cc        | 1 +
 tensorflow/core/kernels/strided_slice_op_gpu.cu.cc | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 8fc40db3cc..73b6d4cf6a 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -427,6 +427,7 @@ REGISTER_STRIDED_SLICE(bfloat16);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
index a8487f49f4..8ca27e3b92 100644
--- a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
@@ -53,6 +53,7 @@ typedef Eigen::GpuDevice GPUDevice;
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
+TF_CALL_int64(DEFINE_GPU_KERNELS);
 DEFINE_GPU_KERNELS(int32);
 
 #undef DEFINE_GPU_KERNELS
-- 
GitLab


From 625ae88377b16705378065576cfd6983bb876435 Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Tue, 28 Nov 2017 16:47:47 -0800
Subject: [PATCH 0382/1225] Round-robin variables across local devices with
 `replicate_model_fn`.

When the user specifies `replicate_model_fn(... devices=[d1, d2, ... dN])` all variables are going to be stored on each device an round-robin fashion.  They are still going to be shared by all devices.

PiperOrigin-RevId: 177239111
---
 tensorflow/contrib/estimator/BUILD            |   2 +-
 .../python/estimator/replicate_model_fn.py    |  94 +++++++++----
 .../estimator/replicate_model_fn_test.py      | 130 ++++++++++++++++--
 3 files changed, 188 insertions(+), 38 deletions(-)

diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index 8395e2db5e..e4d51aa148 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -346,7 +346,7 @@ py_library(
 
 cuda_py_test(
     name = "replicate_model_fn_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/estimator/replicate_model_fn_test.py"],
     additional_deps = [
         "//tensorflow/python/estimator",
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
index d9c83aa865..6f7f37473f 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
@@ -42,10 +42,45 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging
+from tensorflow.python.training import device_setter as device_setter_lib
 from tensorflow.python.training import training_util
 
 
-def replicate_model_fn(model_fn, optimizer_fn, devices=None):
+class Mode(object):
+  """Modes for variables replication used for forcing a particular mode."""
+
+  AUTO = 0
+  """Use internal heuristics for choosing the best Mode value.
+
+     This mode is supposed to be the most appropriate in most cases given what
+     is known about the system.
+  """
+  # TODO(isaprykin): Query system configuration to choose modes other than
+  # `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often appropriate.
+
+  SHARED_LOCAL_PARAMETER_SERVER = 2
+  """Variables are placed on a single device and shared across all devices.
+
+  Two ways to achieve this replication over available GPUs are supported:
+    1)  If exactly 1 GPU is detected, then variables and operations are placed
+        onto GPU.
+    2)  If more than 1 GPU is detected, then variables are going to be placed on
+        the CPU.  Replicas of operations are placed on each individual GPU.
+  """
+
+  SHARED_ROUND_ROBIN = 3
+  """Variables are placed on all devices in a round-robin fashion.
+
+  Every subsequent variable is placed on the next device.  There is only one
+  copy of each variable that is shared across all devices.
+  """
+
+  # TODO(isaprykin):  Implement `REPLICATED_ALL_REDUCE`.
+  REPLICATED_ALL_REDUCE = 3
+  """Variables are mirrored on all devices."""
+
+
+def replicate_model_fn(model_fn, optimizer_fn, devices=None, mode=Mode.AUTO):
   """Replicate `Estimator.model_fn` over GPUs within a single host.
 
   The given `model_fn` specifies a single forward pass of a model.  To replicate
@@ -58,14 +93,11 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
   optimizer.
 
   If `devices` are `None`, then all available GPUs are going to be used for
-  replication.  If no GPUs are available, then the model is going to be
-  placed on the CPU.
+  replication: `devices=[<all available GPUs>]`.  If no GPUs are available,
+  then the model is going to be placed on the CPU: `devices=['/device:CPU:0']`.
 
-  Two modes of local replication over available GPUs are supported:
-    1)  If exactly 1 GPU is detected, then variables and operations are placed
-        onto GPU.
-    2)  If more than 1 GPU is detected, then variables are going to be placed on
-        the CPU.  Replicas of operations are placed on each individual GPU.
+  Varibles are placed on to `devices` according to the given `mode`. Operations
+  are going for each tower are going to be copied on each device.
 
   Here is an example of how one might use their `model_fn` to run over GPUs:
     ```python
@@ -127,6 +159,8 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
       argument can be used to replice only on the subset of available GPUs.
       If `None`, then all available GPUs are going to be used for replication.
       If no GPUs are available, then the model is going to be placed on the CPU.
+    mode: An optional argument that specifies the replication method used for
+      distributing variables across devices.
 
   Returns:
     A replicated version of the supplied `model_fn`. Returned function that
@@ -137,16 +171,21 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
     devices = _get_local_devices('GPU') or _get_local_devices('CPU')
 
   is_a_single_gpu_case = len(devices) == 1 and 'GPU' in devices[0]
-  local_ps_device = '/{}:0'.format('GPU' if is_a_single_gpu_case else 'CPU')
+  consolidation_device = '/{}:0'.format('GPU'
+                                        if is_a_single_gpu_case else 'CPU')
 
-  tf_logging.info('Replicating the `model_fn` across {}.  Local parameter '
-                  'server device is going to be {}.'.format(
-                      devices, local_ps_device))
+  ps_devices = [consolidation_device]
+  if mode == Mode.SHARED_ROUND_ROBIN:
+    ps_devices = devices
+
+  tf_logging.info('Replicating the `model_fn` across {}.  Variables are going '
+                  'to be placed on {}.  Consolidation device is going to be {}.'
+                  .format(devices, ps_devices, consolidation_device))
 
   def replicated_model_fn(features, labels, mode, params=None, config=None):
     """Replicated version of `model_fn` to be used instead."""
     feature_shards, label_shards = _split_batch(
-        features, labels, len(devices), device=local_ps_device)
+        features, labels, len(devices), device=consolidation_device)
     tower_specs = _get_loss_towers(
         model_fn=model_fn,
         mode=mode,
@@ -155,17 +194,17 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
         params=params,
         config=config,
         devices=devices,
-        local_ps_device=local_ps_device)
+        local_ps_devices=ps_devices)
 
     if mode == model_fn_lib.ModeKeys.TRAIN:
       train_op = _minimize_towers(tower_specs,
                                   _call_optimizer_fn(optimizer_fn, params))
       return _train_spec(
-          tower_specs, train_op, aggregation_device=local_ps_device)
+          tower_specs, train_op, aggregation_device=consolidation_device)
     elif mode == model_fn_lib.ModeKeys.EVAL:
-      return _eval_spec(tower_specs, aggregation_device=local_ps_device)
+      return _eval_spec(tower_specs, aggregation_device=consolidation_device)
     elif mode == model_fn_lib.ModeKeys.PREDICT:
-      return _predict_spec(tower_specs, aggregation_device=local_ps_device)
+      return _predict_spec(tower_specs, aggregation_device=consolidation_device)
 
   return replicated_model_fn
 
@@ -222,7 +261,7 @@ def _get_loss_towers(model_fn,
                      params,
                      config,
                      devices,
-                     local_ps_device,
+                     local_ps_devices,
                      name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN):
   """Replicate the loss computation across devices."""
   tower_specs = []
@@ -234,15 +273,22 @@ def _get_loss_towers(model_fn,
   if 'config' in model_fn_args:
     optional_params['config'] = copy.deepcopy(config)
 
+  # pylint: disable=protected-access
+  round_robin_strategy = device_setter_lib._RoundRobinStrategy(
+      num_tasks=len(local_ps_devices))
+  # pylint: enable=protected-access
+
   for i, device in enumerate(devices):
     is_the_first_tower = (i == 0)
 
     device_setter = _local_device_setter(
-        worker_device=device, ps_device=local_ps_device)
+        worker_device=device,
+        ps_devices=local_ps_devices,
+        ps_strategy=round_robin_strategy)
 
-    # We would like to preserve the names of the variables and ops that a user
-    # might be relying on. Names with prefix are going to resolve to variables
-    # and ops of the first tower.
+    # We would like to preserve the names of the variables and ops that the user
+    # might be relying on. Names without a prefix are going to resolve to
+    # variables and ops of the first tower.
     name_scope = name_scope_pattern
     if is_the_first_tower:
       name_scope = ''
@@ -263,7 +309,7 @@ def _get_loss_towers(model_fn,
   return tower_specs
 
 
-def _local_device_setter(ps_device, worker_device):
+def _local_device_setter(worker_device, ps_devices, ps_strategy):
   """A device setter that puts distributes Var/Ops to PS/workers."""
   ps_ops = ['Variable', 'VariableV2', 'VarHandleOp']
 
@@ -273,7 +319,7 @@ def _local_device_setter(ps_device, worker_device):
     node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def
     if node_def.op in ps_ops:
       ps_device_spec = framework_device.DeviceSpec.from_string(
-          '{}'.format(ps_device))
+          '{}'.format(ps_devices[ps_strategy(op)]))
 
       ps_device_spec.merge_from(current_device)
       return ps_device_spec.to_string()
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
index ffe69f89b4..662021853d 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
@@ -49,15 +49,29 @@ from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import device_setter
 from tensorflow.python.training import gradient_descent
 
 
+# TODO(isaprykin):  Parametrize all the tests on replicate_model_fn.Mode when
+#   it's supported.
 class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
 
-  def test_complete_flow(self):
+  def test_complete_flow_with_mode_auto(self):
+    return self._complete_flow_with_mode(replicate_model_fn.Mode.AUTO)
+
+  def test_complete_flow_with_mode_local_ps_server(self):
+    return self._complete_flow_with_mode(
+        replicate_model_fn.Mode.SHARED_LOCAL_PARAMETER_SERVER)
+
+  def test_complete_flow_with_mode_round_robin(self):
+    return self._complete_flow_with_mode(
+        replicate_model_fn.Mode.SHARED_ROUND_ROBIN)
+
+  def _complete_flow_with_mode(self, mode):
     n_classes = 3
     input_dimension = 2
     batch_size = 12
@@ -109,7 +123,8 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase):
         model_fn=replicate_model_fn.replicate_model_fn(
             estimator.model_fn,
             optimizer_fn,
-            devices=['/gpu:0', '/gpu:1', '/gpu:2']),
+            devices=['/gpu:0', '/gpu:1', '/gpu:2'],
+            mode=mode),
         model_dir=estimator.model_dir,
         config=estimator.config,
         params=estimator.params)
@@ -359,7 +374,7 @@ class GetLossTowersTest(test_util.TensorFlowTestCase):
           params=None,
           config=None,
           devices=['/gpu:0', '/gpu:1'],
-          local_ps_device='/gpu:0',
+          local_ps_devices=['/gpu:0'],
           name_scope_pattern='test_tower_{}')
       session.run(variables.global_variables_initializer())
 
@@ -382,6 +397,54 @@ class GetLossTowersTest(test_util.TensorFlowTestCase):
         c = variable_scope.get_variable('c', dtype=dtypes.float64)
         self.assertEqual(0.25, session.run(c))
 
+  def test_variables_are_round_robined_correctly(self):
+    """Test that creates multiple variables and tests round-robin placement."""
+
+    def model_fn(mode, features, labels, params):
+      del params
+      for variable_name in ['a', 'b', 'c', 'd']:
+        c = variable_scope.get_variable(
+            variable_name,
+            initializer=constant_op.constant(0.25, dtype=dtypes.float64),
+            dtype=dtypes.float64)
+
+      predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c)
+      labels = np.array([0.1, 0.2, 0.3, labels[0]])
+      loss = losses.absolute_difference(
+          labels=labels,
+          predictions=predictions,
+          reduction=losses.Reduction.SUM)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode, loss=math_ops.reduce_sum(loss))
+
+    with self.test_session() as session:
+      tower_specs = replicate_model_fn._get_loss_towers(
+          model_fn,
+          mode=None,
+          features=[[0.6], [1.6], [2.6]],
+          labels=[[0.6], [0.6], [2.6]],
+          params=None,
+          config=None,
+          devices=['/gpu:0', '/gpu:1', '/gpu:3'],
+          local_ps_devices=['/gpu:0', '/gpu:1', '/gpu:3'],
+          name_scope_pattern='test_tower_{}')
+      session.run(variables.global_variables_initializer())
+
+      self.assertEqual(len(tower_specs), 3)
+      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
+      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
+      self.assertEqual('/device:GPU:3', tower_specs[2].loss.device)
+
+      with variable_scope.variable_scope('', reuse=True):
+        a = variable_scope.get_variable('a', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:0', a.device)
+        b = variable_scope.get_variable('b', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:1', b.device)
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:3', c.device)
+        d = variable_scope.get_variable('d', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:0', d.device)
+
 
 class SplitBatchTest(test_util.TensorFlowTestCase):
 
@@ -604,7 +667,7 @@ class PredictSpecTest(test_util.TensorFlowTestCase):
           params=None,
           config=None,
           devices=['/gpu:0', '/gpu:1'],
-          local_ps_device='/gpu:0',
+          local_ps_devices=['/gpu:0'],
       )
       session.run(variables.global_variables_initializer())
 
@@ -850,25 +913,66 @@ class GetLocalDevicesTest(test_util.TensorFlowTestCase):
 class LocalDeviceSetterTest(test_util.TensorFlowTestCase):
 
   def test_vars_are_on_ps_but_ops_are_on_workers(self):
+    ps_devices = ['/device:GPU:3']
+    round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices))
+
+    local_device_setter = replicate_model_fn._local_device_setter(
+        ps_devices=ps_devices,
+        ps_strategy=round_robin,
+        worker_device='/device:GPU:2')
+
+    with ops_lib.device(local_device_setter):
+      a = variables.Variable(0.01)
+      self.assertEqual('/device:GPU:3', a.device)
+
+      b = variables.Variable(0.02)
+      self.assertEqual('/device:GPU:3', b.device)
+
+      c = variables.Variable(0.03)
+      self.assertEqual('/device:GPU:3', c.device)
+
+      a_op = array_ops.concat(a, axis=0)
+      self.assertEqual('/device:GPU:2', a_op.device)
+
+      b_op = array_ops.concat(b, axis=0)
+      self.assertEqual('/device:GPU:2', b_op.device)
+
+  def test_round_robin_placement(self):
+    ps_devices = [
+        '/device:GPU:0', '/device:GPU:1', '/device:GPU:3', '/device:GPU:4'
+    ]
+    round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices))
+
     local_device_setter = replicate_model_fn._local_device_setter(
-        ps_device='/device:GPU:3', worker_device='/device:GPU:2')
+        ps_devices=ps_devices,
+        ps_strategy=round_robin,
+        worker_device='/device:GPU:2')
 
     with ops_lib.device(local_device_setter):
-      c = variables.Variable(0.01)
+      a = variables.Variable(0.01)
+      self.assertEqual('/device:GPU:0', a.device)
+
+      b = variables.Variable(0.02)
+      self.assertEqual('/device:GPU:1', b.device)
+
+      c = variables.Variable(0.03)
       self.assertEqual('/device:GPU:3', c.device)
 
-      cc = variables.Variable(0.02)
-      self.assertEqual('/device:GPU:3', cc.device)
+      a_op = array_ops.concat(a, axis=0)
+      self.assertEqual('/device:GPU:2', a_op.device)
+
+      b_op = array_ops.concat(b, axis=0)
+      self.assertEqual('/device:GPU:2', b_op.device)
 
-      ccc = variables.Variable(0.03)
-      self.assertEqual('/device:GPU:3', ccc.device)
+      c = variables.Variable(0.03)
+      self.assertEqual('/device:GPU:4', c.device)
+
+      d = variables.Variable(0.03)
+      self.assertEqual('/device:GPU:0', d.device)
 
       c_op = array_ops.concat(c, axis=0)
       self.assertEqual('/device:GPU:2', c_op.device)
 
-      cc_op = array_ops.concat(cc, axis=0)
-      self.assertEqual('/device:GPU:2', cc_op.device)
-
 
 class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 57839fbf307fb01a280505f1f964d7331104d8f3 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Tue, 28 Nov 2017 17:02:01 -0800
Subject: [PATCH 0383/1225] Re-using (the more general) DeserializeSparse
 kernel to implement DeserializeSparseMany and improving documentation.

PiperOrigin-RevId: 177241063
---
 .../base_api/api_def_DeserializeSparse.pbtxt  |  43 +++++
 .../core/kernels/serialize_sparse_op.cc       | 177 +-----------------
 tensorflow/core/ops/sparse_ops.cc             |  42 +++++
 .../sparse_serialization_ops_test.py          |   4 +-
 tensorflow/python/ops/sparse_ops.py           |  45 ++++-
 5 files changed, 129 insertions(+), 182 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
index 00e96c8a15..dfaa531cbc 100644
--- a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
@@ -14,4 +14,47 @@ The `dtype` of the serialized `SparseTensor` objects.
 END
   }
   summary: "Deserialize `SparseTensor` objects."
+  description: <<END
+The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+the last dimension stores serialized `SparseTensor` objects and the other N
+dimensions (N >= 0) correspond to a batch. The ranks of the original
+`SparseTensor` objects must all match. When the final `SparseTensor` is
+created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+the sparse tensors have been concatenated along new dimensions, one for each
+batch.
+
+The output `SparseTensor` object's shape values for the original dimensions
+are the max across the input `SparseTensor` objects' shape values for the
+corresponding dimensions. The new dimensions match the size of the batch.
+
+The input `SparseTensor` objects' indices are assumed ordered in
+standard lexicographic order.  If this is not the case, after this
+step run `SparseReorder` to restore index ordering.
+
+For example, if the serialized input is a `[2 x 3]` matrix representing two
+original `SparseTensor` objects:
+
+    index = [ 0]
+            [10]
+            [20]
+    values = [1, 2, 3]
+    shape = [50]
+
+and
+
+    index = [ 2]
+            [10]
+    values = [4, 5]
+    shape = [30]
+
+then the final deserialized `SparseTensor` will be:
+
+    index = [0  0]
+            [0 10]
+            [0 20]
+            [1  2]
+            [1 10]
+    values = [1, 2, 3, 4, 5]
+    shape = [2 50]
+END
 }
diff --git a/tensorflow/core/kernels/serialize_sparse_op.cc b/tensorflow/core/kernels/serialize_sparse_op.cc
index cfb86904d5..f4159da229 100644
--- a/tensorflow/core/kernels/serialize_sparse_op.cc
+++ b/tensorflow/core/kernels/serialize_sparse_op.cc
@@ -409,186 +409,11 @@ class DeserializeSparseOp : public OpKernel {
 TF_CALL_ALL_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
 
-template <typename T>
-class DeserializeManySparseOp : public OpKernel {
- public:
-  explicit DeserializeManySparseOp(OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    const Tensor& serialized_sparse = context->input(0);
-    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(serialized_sparse.shape()),
-                errors::InvalidArgument(
-                    "Serialized sparse should be a matrix but received shape ",
-                    serialized_sparse.shape().DebugString()));
-    OP_REQUIRES(
-        context, serialized_sparse.shape().dim_size(1) == 3,
-        errors::InvalidArgument(
-            "Serialized sparse should have 3 columns but received shape ",
-            serialized_sparse.shape().DebugString()));
-
-    int num_sparse_tensors = serialized_sparse.shape().dim_size(0);
-
-    OP_REQUIRES(
-        context, num_sparse_tensors > 0,
-        errors::InvalidArgument("Must have at least 1 serialized SparseTensor, "
-                                "but input matrix has 0 rows"));
-
-    std::vector<Tensor> indices_to_concat;
-    std::vector<Tensor> values_to_concat;
-    std::vector<TensorShape> shapes_to_concat;
-
-    const auto& serialized_sparse_t = serialized_sparse.matrix<string>();
-
-    for (int i = 0; i < num_sparse_tensors; ++i) {
-      Tensor output_indices(DT_INT64);
-      Tensor output_values(DataTypeToEnum<T>::value);
-      Tensor output_shape(DT_INT64);
-      TensorProto proto_indices;
-      TensorProto proto_values;
-      TensorProto proto_shape;
-
-      OP_REQUIRES(
-          context,
-          ParseProtoUnlimited(&proto_indices, serialized_sparse_t(i, 0)),
-          errors::InvalidArgument("Could not parse serialized_sparse[", i,
-                                  ", 0]"));
-      OP_REQUIRES(context,
-                  ParseProtoUnlimited(&proto_values, serialized_sparse_t(i, 1)),
-                  errors::InvalidArgument("Could not parse serialized_sparse[",
-                                          i, ", 1]"));
-      OP_REQUIRES(context,
-                  ParseProtoUnlimited(&proto_shape, serialized_sparse_t(i, 2)),
-                  errors::InvalidArgument("Could not parse serialized_sparse[",
-                                          i, ", 2]"));
-
-      OP_REQUIRES(context, output_indices.FromProto(proto_indices),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 0] (indices)"));
-      OP_REQUIRES(context, TensorShapeUtils::IsMatrix(output_indices.shape()),
-                  errors::InvalidArgument(
-                      "Expected serialized_sparse[", i,
-                      ", 0] to represent an index matrix but received shape ",
-                      output_indices.shape().DebugString()));
-      OP_REQUIRES(context, output_values.FromProto(proto_values),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 1] (values)"));
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(output_values.shape()),
-                  errors::InvalidArgument(
-                      "Expected serialized_sparse[", i,
-                      ", 1] to represent a values vector but received shape ",
-                      output_values.shape().DebugString()));
-      OP_REQUIRES(context, output_shape.FromProto(proto_shape),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 2] (shape)"));
-      OP_REQUIRES(
-          context, TensorShapeUtils::IsVector(output_shape.shape()),
-          errors::InvalidArgument("Expected serialized_sparse[", i,
-                                  ", 1] to be a shape vector but its shape is ",
-                                  output_shape.shape().DebugString()));
-
-      OP_REQUIRES(
-          context, DataTypeToEnum<T>::value == output_values.dtype(),
-          errors::InvalidArgument(
-              "Requested SparseTensor of type ",
-              DataTypeString(DataTypeToEnum<T>::value), " but SparseTensor[", i,
-              "].values.dtype() == ", DataTypeString(output_values.dtype())));
-
-      int64 num_entries = output_indices.dim_size(0);
-      OP_REQUIRES(context, num_entries == output_values.dim_size(0),
-                  errors::InvalidArgument(
-                      "Expected row counts of SparseTensor[", i,
-                      "].indices and SparseTensor[", i,
-                      "].values to match but they do not: ", num_entries,
-                      " vs. ", output_values.dim_size(0)));
-      int rank = output_indices.dim_size(1);
-      OP_REQUIRES(
-          context, rank == output_shape.dim_size(0),
-          errors::InvalidArgument("Expected column counts of SparseTensor[", i,
-                                  "].indices to match size of SparseTensor[", i,
-                                  "].shape "
-                                  "but they do not: ",
-                                  rank, " vs. ", output_shape.dim_size(0)));
-
-      // Now we expand each SparseTensors' indices and shape by
-      // prefixing a dimension
-      Tensor expanded_indices(
-          DT_INT64, TensorShape({num_entries, 1 + output_indices.dim_size(1)}));
-      Tensor expanded_shape(DT_INT64,
-                            TensorShape({1 + output_shape.dim_size(0)}));
-      const auto& output_indices_t = output_indices.matrix<int64>();
-      const auto& output_shape_t = output_shape.vec<int64>();
-      auto expanded_indices_t = expanded_indices.matrix<int64>();
-      auto expanded_shape_t = expanded_shape.vec<int64>();
-      expanded_indices_t.chip<1>(0).setZero();
-      Eigen::DSizes<Eigen::DenseIndex, 2> indices_start(0, 1);
-      Eigen::DSizes<Eigen::DenseIndex, 2> indices_sizes(num_entries, rank);
-      expanded_indices_t.slice(indices_start, indices_sizes) = output_indices_t;
-      expanded_shape_t(0) = 1;
-      std::copy_n(&output_shape_t(0), rank, &expanded_shape_t(1));
-
-      TensorShape expanded_tensor_shape(expanded_shape.vec<int64>());
-
-      indices_to_concat.push_back(expanded_indices);
-      values_to_concat.push_back(output_values);
-      shapes_to_concat.push_back(expanded_tensor_shape);
-    }
-
-    int rank = -1;
-    for (int i = 0; i < num_sparse_tensors; ++i) {
-      if (rank < 0) rank = shapes_to_concat[i].dims();
-      OP_REQUIRES(context, rank == shapes_to_concat[i].dims(),
-                  errors::InvalidArgument(
-                      "Inconsistent rank across SparseTensors: rank prior to "
-                      "SparseTensor[",
-                      i, "] was: ", rank, " but rank of SparseTensor[", i,
-                      "] is: ", shapes_to_concat[i].dims()));
-    }
-
-    // SparseTensor::Concat requires consistent shape for all but the
-    // primary order dimension (dimension 0 in this case).  So we get
-    // the maximum value across all the input SparseTensors for each
-    // dimension and use that.
-    TensorShape preconcat_shape(shapes_to_concat[0]);
-    for (int i = 0; i < num_sparse_tensors; ++i) {
-      for (int d = 0; d < rank; ++d) {
-        preconcat_shape.set_dim(d, std::max(preconcat_shape.dim_size(d),
-                                            shapes_to_concat[i].dim_size(d)));
-      }
-    }
-
-    // Dimension 0 is the primary dimension.
-    gtl::InlinedVector<int64, 8> std_order(rank);
-    std::iota(std_order.begin(), std_order.end(), 0);
-
-    std::vector<SparseTensor> tensors_to_concat;
-    tensors_to_concat.reserve(num_sparse_tensors);
-    for (int i = 0; i < num_sparse_tensors; ++i) {
-      tensors_to_concat.emplace_back(indices_to_concat[i], values_to_concat[i],
-                                     preconcat_shape, std_order);
-    }
-
-    SparseTensor output = SparseTensor::Concat<T>(tensors_to_concat);
-
-    Tensor final_output_shape(DT_INT64, TensorShape({output.dims()}));
-
-    std::copy_n(output.shape().data(), output.dims(),
-                final_output_shape.vec<int64>().data());
-
-    context->set_output(0, output.indices());
-    context->set_output(1, output.values());
-    context->set_output(2, final_output_shape);
-  }
-};
-
 #define REGISTER_KERNELS(type)                                \
   REGISTER_KERNEL_BUILDER(Name("DeserializeManySparse")       \
                               .Device(DEVICE_CPU)             \
                               .TypeConstraint<type>("dtype"), \
-                          DeserializeManySparseOp<type>)
+                          DeserializeSparseOp<type>)
 
 TF_CALL_ALL_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc
index 8414519f0b..772e2531dc 100644
--- a/tensorflow/core/ops/sparse_ops.cc
+++ b/tensorflow/core/ops/sparse_ops.cc
@@ -256,6 +256,48 @@ REGISTER_OP("DeserializeSparse")
     .Doc(R"doc(
 Deserialize `SparseTensor` objects.
 
+The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+the last dimension stores serialized `SparseTensor` objects and the other N
+dimensions (N >= 0) correspond to a batch. The ranks of the original
+`SparseTensor` objects must all match. When the final `SparseTensor` is
+created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+the sparse tensors have been concatenated along new dimensions, one for each
+batch.
+
+The output `SparseTensor` object's shape values for the original dimensions
+are the max across the input `SparseTensor` objects' shape values for the
+corresponding dimensions. The new dimensions match the size of the batch.
+
+The input `SparseTensor` objects' indices are assumed ordered in
+standard lexicographic order.  If this is not the case, after this
+step run `SparseReorder` to restore index ordering.
+
+For example, if the serialized input is a `[2 x 3]` matrix representing two
+original `SparseTensor` objects:
+
+    index = [ 0]
+            [10]
+            [20]
+    values = [1, 2, 3]
+    shape = [50]
+
+and
+
+    index = [ 2]
+            [10]
+    values = [4, 5]
+    shape = [30]
+
+then the final deserialized `SparseTensor` will be:
+
+    index = [0  0]
+            [0 10]
+            [0 20]
+            [1  2]
+            [1 10]
+    values = [1, 2, 3, 4, 5]
+    shape = [2 50]
+
 serialized_sparse: The serialized `SparseTensor` objects. The last dimension
   must have 3 columns.
 dtype: The `dtype` of the serialized `SparseTensor` objects.
diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
index 78c113f514..d1a90952c7 100644
--- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
@@ -254,8 +254,8 @@ class SerializeSparseTest(test.TestCase):
           serialized_concat, dtype=dtypes.int32)
 
       with self.assertRaisesOpError(
-          r"Inconsistent rank across SparseTensors: rank prior to "
-          r"SparseTensor\[1\] was: 3 but rank of SparseTensor\[1\] is: 4"):
+          r"Inconsistent shape across SparseTensors: rank prior to "
+          r"SparseTensor\[1\] was: 2 but rank of SparseTensor\[1\] is: 3"):
         sess.run(sp_deserialized,
                  {sp_input0: input0_val,
                   sp_input1: input1_val})
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index cdfe9e1c1e..9bdc124c83 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -1437,10 +1437,47 @@ def serialize_many_sparse(sp_input, name=None):
 def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None):
   """Deserialize `SparseTensor` objects.
 
-  The input is expected to have shape [d_1, ..., d_m, 3], where the last
-  dimension stores a serialized `SparseTensor`. The method deserializes
-  all input `SparseTensor`s, concatenates them into a single tensor, and
-  reshapes the sparse tensor to preserve the structure of the input.
+  The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+  the last dimension stores serialized `SparseTensor` objects and the other N
+  dimensions (N >= 0) correspond to a batch. The ranks of the original
+  `SparseTensor` objects must all match. When the final `SparseTensor` is
+  created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+  the sparse tensors have been concatenated along new dimensions, one for each
+  batch.
+
+  The output `SparseTensor` object's shape values for the original dimensions
+  are the max across the input `SparseTensor` objects' shape values for the
+  corresponding dimensions. The new dimensions match the size of the batch.
+
+  The input `SparseTensor` objects' indices are assumed ordered in
+  standard lexicographic order.  If this is not the case, after this
+  step run `SparseReorder` to restore index ordering.
+
+  For example, if the serialized input is a `[2 x 3]` matrix representing two
+  original `SparseTensor` objects:
+
+      index = [ 0]
+              [10]
+              [20]
+      values = [1, 2, 3]
+      shape = [50]
+
+  and
+
+      index = [ 2]
+              [10]
+      values = [4, 5]
+      shape = [30]
+
+  then the final deserialized `SparseTensor` will be:
+
+      index = [0  0]
+              [0 10]
+              [0 20]
+              [1  2]
+              [1 10]
+      values = [1, 2, 3, 4, 5]
+      shape = [2 50]
 
   Args:
     serialized_sparse: The serialized `SparseTensor` objects.
-- 
GitLab


From 73a803fb854fc842700a865d4742ae893ed236d3 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Tue, 28 Nov 2017 17:03:33 -0800
Subject: [PATCH 0384/1225] Fix flakiness in map_dataset_op_test.

PiperOrigin-RevId: 177241314
---
 tensorflow/contrib/data/python/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 0790a4a737..4cb69d7c8e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -275,7 +275,7 @@ py_test(
 
 py_test(
     name = "map_dataset_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["map_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-- 
GitLab


From e2f9107effb0c5c4cee49a71562865d9e919b3d0 Mon Sep 17 00:00:00 2001
From: Tayo Oguntebi <tayo@google.com>
Date: Tue, 28 Nov 2017 17:20:39 -0800
Subject: [PATCH 0385/1225]   Adds minor-dim pooling tests for cases in which
 windows exist entirely in   padding. Modifies reference util reduce-window 1D
 implementation to accept general padding.

PiperOrigin-RevId: 177243527
---
 tensorflow/compiler/xla/reference_util.cc     |  18 ++-
 tensorflow/compiler/xla/reference_util.h      |   6 +
 .../compiler/xla/tests/reduce_window_test.cc  | 112 +++++++++++++-----
 3 files changed, 102 insertions(+), 34 deletions(-)

diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc
index 5bb81b80dd..bdf92eaed1 100644
--- a/tensorflow/compiler/xla/reference_util.cc
+++ b/tensorflow/compiler/xla/reference_util.cc
@@ -195,14 +195,26 @@ ReferenceUtil::ReduceWindow1DGeneric(
     const tensorflow::gtl::ArraySlice<int64>& window,
     const tensorflow::gtl::ArraySlice<int64>& stride, Padding padding) {
   std::vector<int64> dim_lengths{static_cast<int64>(operand.size())};
-  auto padding_both = xla::MakePadding(dim_lengths, window, stride, padding);
+  return ReduceWindow1DGeneric(
+      operand, init, reduce_func, window, stride,
+      xla::MakePadding(dim_lengths, window, stride, padding));
+}
 
+/* static  */ std::unique_ptr<std::vector<float>>
+ReferenceUtil::ReduceWindow1DGeneric(
+    const tensorflow::gtl::ArraySlice<float>& operand, float init,
+    const std::function<float(float, float)>& reduce_func,
+    const tensorflow::gtl::ArraySlice<int64>& window,
+    const tensorflow::gtl::ArraySlice<int64>& stride,
+    const tensorflow::gtl::ArraySlice<std::pair<int64, int64>>& padding) {
+  std::vector<int64> dim_lengths{static_cast<int64>(operand.size())};
   std::vector<int64> window_counts(window.size(), 0);
   std::vector<int64> pad_low(window.size(), 0);
   for (int64 i = 0; i < window.size(); ++i) {
+    int64 padded_width = padding[i].first + dim_lengths[i] + padding[i].second;
     window_counts[i] =
-        WindowCount(dim_lengths[i], window[i], stride[i], padding);
-    pad_low[i] = padding_both[i].first;
+        window_util::StridedBound(padded_width, window[i], stride[i]);
+    pad_low[i] = padding[i].first;
   }
   auto result = MakeUnique<std::vector<float>>(window_counts[0]);
 
diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h
index 62d455d71a..ee244e9a66 100644
--- a/tensorflow/compiler/xla/reference_util.h
+++ b/tensorflow/compiler/xla/reference_util.h
@@ -184,6 +184,12 @@ class ReferenceUtil {
       const std::function<float(float, float)>& reduce_func,
       const tensorflow::gtl::ArraySlice<int64>& window,
       const tensorflow::gtl::ArraySlice<int64>& stride, Padding padding);
+  static std::unique_ptr<std::vector<float>> ReduceWindow1DGeneric(
+      const tensorflow::gtl::ArraySlice<float>& operand, float init,
+      const std::function<float(float, float)>& reduce_func,
+      const tensorflow::gtl::ArraySlice<int64>& window,
+      const tensorflow::gtl::ArraySlice<int64>& stride,
+      const tensorflow::gtl::ArraySlice<std::pair<int64, int64>>& padding);
   static std::unique_ptr<Array4D<float>> ReduceWindow4DGeneric(
       const Array4D<float>& operand, float init,
       const std::function<float(float, float)>& reduce_func,
diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index 0601a1466b..aa035f0ba5 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -962,68 +962,114 @@ struct R1ReduceWindowTestData {
   int64 base_bounds[1];
   int64 window_bounds[1];
   int64 strides[1];
-  Padding padding;
+  int64 pad_low[1];
+  int64 pad_high[1];
   Reducer reducer;
 } kR1TestCases[] = {
     {/*base_bounds=*/{1}, /*window_bounds=*/{1},
      /*strides=*/{1},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({1}, {1}, {1}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({1}, {1}, {1}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{3}, /*window_bounds=*/{3},
      /*strides=*/{1},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({3}, {3}, {1}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({3}, {3}, {1}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{3}, /*window_bounds=*/{2},
      /*strides=*/{1},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({3}, {2}, {1}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({3}, {2}, {1}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{5}, /*window_bounds=*/{1},
      /*strides=*/{1},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kMax},
+     /*pad_low=*/{xla::MakePadding({5}, {1}, {1}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({5}, {1}, {1}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kMax},
 
     {/*base_bounds=*/{16}, /*window_bounds=*/{4},
      /*strides=*/{4},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kMax},
+     /*pad_low=*/{xla::MakePadding({16}, {4}, {4}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({16}, {4}, {4}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kMax},
 
     {/*base_bounds=*/{16}, /*window_bounds=*/{4},
      /*strides=*/{3},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({16}, {4}, {3}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({16}, {4}, {3}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
-    {/*base_bounds=*/{128 * 2}, /*window_bounds=*/{30},
+    {/*base_bounds=*/{128 * 2},
+     /*window_bounds=*/{30},
      /*strides=*/{27},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
-
-    {/*base_bounds=*/{128 * 17}, /*window_bounds=*/{7},
+     /*pad_low=*/
+     {xla::MakePadding({128 * 2}, {30}, {27}, Padding::kValid)[0].first},
+     /*pad_high=*/
+     {xla::MakePadding({128 * 2}, {30}, {27}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
+
+    {/*base_bounds=*/{128 * 17},
+     /*window_bounds=*/{7},
      /*strides=*/{64},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
-
-    {/*base_bounds=*/{128 * 2}, /*window_bounds=*/{32},
+     /*pad_low=*/
+     {xla::MakePadding({128 * 17}, {7}, {64}, Padding::kValid)[0].first},
+     /*pad_high=*/
+     {xla::MakePadding({128 * 17}, {7}, {64}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
+
+    {/*base_bounds=*/{128 * 2},
+     /*window_bounds=*/{32},
      /*strides=*/{56},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/
+     {xla::MakePadding({128 * 2}, {32}, {56}, Padding::kValid)[0].first},
+     /*pad_high=*/
+     {xla::MakePadding({128 * 2}, {32}, {56}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{3}, /*window_bounds=*/{2},
      /*strides=*/{1},
-     /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({3}, {2}, {1}, Padding::kSame)[0].first},
+     /*pad_high=*/{xla::MakePadding({3}, {2}, {1}, Padding::kSame)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{5}, /*window_bounds=*/{3},
      /*strides=*/{2},
-     /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({5}, {3}, {2}, Padding::kSame)[0].first},
+     /*pad_high=*/{xla::MakePadding({5}, {3}, {2}, Padding::kSame)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{16}, /*window_bounds=*/{4},
      /*strides=*/{3},
-     /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({16}, {4}, {3}, Padding::kSame)[0].first},
+     /*pad_high=*/{xla::MakePadding({16}, {4}, {3}, Padding::kSame)[0].second},
+     /*reducer=*/Reducer::kAdd},
+
+    {/*base_bounds=*/{5}, /*window_bounds=*/{5},
+     /*strides=*/{1},
+     /*pad_low=*/{0},
+     /*pad_high=*/{5},
+     /*reducer=*/Reducer::kAdd},
+
+    {/*base_bounds=*/{5}, /*window_bounds=*/{5},
+     /*strides=*/{1},
+     /*pad_low=*/{5},
+     /*pad_high=*/{0},
+     /*reducer=*/Reducer::kAdd},
 };
 
 string R1ReduceWindowTestDataToString(
     const ::testing::TestParamInfo<R1ReduceWindowTestData>& data) {
   string str = tensorflow::strings::StrCat(
-      "base_bounds_",
-      tensorflow::str_util::Join(data.param.base_bounds, "x"),  //
+      "base_bounds_", tensorflow::str_util::Join(data.param.base_bounds, "x"),
       "__window_bounds_",
-      tensorflow::str_util::Join(data.param.window_bounds, "x"),              //
-      "__strides_", tensorflow::str_util::Join(data.param.strides, "x"),      //
-      "__padding_", data.param.padding == Padding::kSame ? "same" : "valid",  //
-      "__reducer_", data.param.reducer == kAdd ? "add" : "max");
+      tensorflow::str_util::Join(data.param.window_bounds, "x"), "__strides_",
+      tensorflow::str_util::Join(data.param.strides, "x"), "__pad_low_",
+      tensorflow::str_util::Join(data.param.pad_low, "x"), "__pad_high_",
+      tensorflow::str_util::Join(data.param.pad_high, "x"), "__reducer_",
+      data.param.reducer == kAdd ? "add" : "max");
   return str;
 }
 
@@ -1044,15 +1090,18 @@ TEST_P(R1ReduceWindowTest, DoIt) {
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_arg,
                           client_->TransferToServer(*input_literal));
 
+  std::vector<std::pair<int64, int64>> padding(1);
+  padding[0] = {param.pad_low[0], param.pad_high[0]};
+
   auto computation = param.reducer == kAdd
                          ? CreateScalarAddComputation(F32, &b)
                          : CreateScalarMaxComputation(F32, &b);
-  b.ReduceWindow(/*operand=*/
-                 b.Parameter(0, input_literal->shape(), "p0"),
-                 /*init_value=*/b.ConstantR0<float>(kInitValue),
-                 /*computation=*/computation,
-                 /*window_dimensions=*/param.window_bounds,
-                 /*window_strides=*/param.strides, /*padding=*/param.padding);
+  b.ReduceWindowWithGeneralPadding(
+      /*operand=*/b.Parameter(0, input_literal->shape(), "p0"),
+      /*init_value=*/b.ConstantR0<float>(kInitValue),
+      /*computation=*/computation,
+      /*window_dimensions=*/param.window_bounds,
+      /*window_strides=*/param.strides, /*padding=*/padding);
 
   auto reduce_func = param.reducer == kAdd
                          ? +[](float a, float b) { return a + b; }
@@ -1062,7 +1111,8 @@ TEST_P(R1ReduceWindowTest, DoIt) {
       /*init=*/kInitValue,
       /*reduce_func=*/reduce_func,
       /*window=*/param.window_bounds,
-      /*stride=*/param.strides, /*padding=*/param.padding);
+      /*stride=*/param.strides,
+      /*padding=*/padding);
 
   ComputeAndCompareR1<float>(&b, tensorflow::gtl::ArraySlice<float>(*expected),
                              {input_arg.get()}, ErrorSpec(1e-3, 1e-3));
-- 
GitLab


From 782ec4e186943d69e4ad047a835cbbf2eb477359 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Tue, 28 Nov 2017 18:17:01 -0800
Subject: [PATCH 0386/1225] Silenced noisy log

PiperOrigin-RevId: 177249675
---
 tensorflow/core/grappler/grappler_item_builder.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index 36c7f92c49..a186e9a181 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -173,7 +173,7 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
                  << ", skipping this input.";
       return nullptr;
     }
-    LOG(INFO) << "Will use feed node " << feed_name;
+    VLOG(1) << "Will use feed node " << feed_name;
     new_item->feed.emplace_back(feed_name, Tensor());
   }
 
-- 
GitLab


From bf05a2d1dce3af9b88dcd5c9253a163353951c99 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 28 Nov 2017 18:39:34 -0800
Subject: [PATCH 0387/1225] Support shape inference (i.e., shapes containing
 -1) in the Reshape bijector.

PiperOrigin-RevId: 177251901
---
 .../kernel_tests/bijectors/reshape_test.py    | 342 +++++++++++-------
 .../python/ops/bijectors/reshape_impl.py      | 277 +++++++-------
 2 files changed, 362 insertions(+), 257 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py
index 38b3a23c2d..49451446b5 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py
@@ -28,8 +28,19 @@ from tensorflow.python.ops.distributions.bijector_test_util import assert_biject
 from tensorflow.python.platform import test
 
 
-class ReshapeBijectorTest(test.TestCase):
-  """Tests correctness of the reshape transformation."""
+class _ReshapeBijectorTest(object):
+  """Base class for testing the reshape transformation.
+
+  Methods defined in this class call a method self.build_shapes() that
+  is implemented by subclasses defined below, returning respectively
+   ReshapeBijectorTestStatic: static shapes,
+   ReshapeBijectorTestDynamic: shape placeholders of known ndims, and
+   ReshapeBijectorTestDynamicNdims: shape placeholders of unspecified ndims,
+  so that each test in this base class is automatically run over all
+  three cases. The subclasses also implement assertRaisesError to test
+  for either Python exceptions (in the case of static shapes) or
+  TensorFlow op errors (dynamic shapes).
+  """
 
   def setUp(self):
     self._rng = np.random.RandomState(42)
@@ -40,9 +51,10 @@ class ReshapeBijectorTest(test.TestCase):
     expected_y = np.reshape(expected_x, [4, 6])
 
     with self.test_session() as sess:
+      shape_in, shape_out, feed_dict = self.build_shapes([3, 2], [6,])
       bijector = Reshape(
-          event_shape_out=[6,],
-          event_shape_in=[3, 2],
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
           validate_args=True)
       (x_,
        y_,
@@ -52,66 +64,23 @@ class ReshapeBijectorTest(test.TestCase):
            bijector.forward(expected_x),
            bijector.forward_log_det_jacobian(expected_x),
            bijector.inverse_log_det_jacobian(expected_y),
-       ))
+       ), feed_dict=feed_dict)
       self.assertEqual("reshape", bijector.name)
       self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0)
       self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0)
       self.assertAllClose(0., fldj_, rtol=1e-6, atol=0)
       self.assertAllClose(0., ildj_, rtol=1e-6, atol=0)
 
-  def testEventShapeDynamicNdims(self):
-    """Check forward/inverse shape methods with dynamic ndims."""
-
-    shape_in = tensor_shape.TensorShape([6,])
-    shape_in_ph = array_ops.placeholder(dtype=dtypes.int32)
-
-    shape_out = tensor_shape.TensorShape([2, 3])
-    shape_out_ph = array_ops.placeholder(dtype=dtypes.int32)
-
-    bijector = Reshape(
-        event_shape_out=shape_out_ph,
-        event_shape_in=shape_in_ph, validate_args=True)
-
-    # using the _tensor methods, we should always get a fully-specified
-    # result since these are evaluated at graph runtime.
-    with self.test_session() as sess:
-      (shape_out_,
-       shape_in_) = sess.run((
-           bijector.forward_event_shape_tensor(shape_in),
-           bijector.inverse_event_shape_tensor(shape_out),
-       ), feed_dict={
-           shape_in_ph: shape_in,
-           shape_out_ph: shape_out,
-       })
-      self.assertAllEqual(shape_out, shape_out_)
-      self.assertAllEqual(shape_in, shape_in_)
-
-  def testEventShapeDynamic(self):
-    """Check shape methods with static ndims but dynamic shape."""
-
-    shape_in = tensor_shape.TensorShape([6,])
-    shape_in_partial = tensor_shape.TensorShape([None,])
-    shape_in_ph = array_ops.placeholder(
-        shape=[1,], dtype=dtypes.int32)
-
-    shape_out = tensor_shape.TensorShape([2, 3])
-    shape_out_partial = tensor_shape.TensorShape([None, None])
-    shape_out_ph = array_ops.placeholder(
-        shape=[2,], dtype=dtypes.int32)
+  def testEventShapeTensor(self):
+    """Test event_shape_tensor methods when even ndims may be dynamic."""
 
+    shape_in_static = [2, 3]
+    shape_out_static = [6,]
+    shape_in, shape_out, feed_dict = self.build_shapes(shape_in_static,
+                                                       shape_out_static)
     bijector = Reshape(
-        event_shape_out=shape_out_ph,
-        event_shape_in=shape_in_ph,
-        validate_args=True)
-
-    # if event shapes are not statically available, should
-    # return partially-specified TensorShapes.
-    self.assertAllEqual(
-        bijector.forward_event_shape(shape_in).as_list(),
-        shape_out_partial.as_list())
-    self.assertAllEqual(
-        bijector.inverse_event_shape(shape_out).as_list(),
-        shape_in_partial.as_list())
+        event_shape_out=shape_out,
+        event_shape_in=shape_in, validate_args=True)
 
     # using the _tensor methods, we should always get a fully-specified
     # result since these are evaluated at graph runtime.
@@ -120,42 +89,9 @@ class ReshapeBijectorTest(test.TestCase):
        shape_in_) = sess.run((
            bijector.forward_event_shape_tensor(shape_in),
            bijector.inverse_event_shape_tensor(shape_out),
-       ), feed_dict={
-           shape_in_ph: shape_in,
-           shape_out_ph: shape_out,
-       })
-      self.assertAllEqual(shape_out, shape_out_)
-      self.assertAllEqual(shape_in, shape_in_)
-
-  def testEventShapeStatic(self):
-    """Check shape methods when shape is statically known."""
-
-    shape_in = tensor_shape.TensorShape([6,])
-    shape_out = tensor_shape.TensorShape([2, 3])
-
-    bijector_static = Reshape(
-        event_shape_out=shape_out,
-        event_shape_in=shape_in,
-        validate_args=True)
-
-    # test that forward_ and inverse_event_shape do sensible things
-    # when shapes are statically known.
-    self.assertEqual(
-        bijector_static.forward_event_shape(shape_in),
-        shape_out)
-    self.assertEqual(
-        bijector_static.inverse_event_shape(shape_out),
-        shape_in)
-
-    with self.test_session() as sess:
-      (shape_out_static_,
-       shape_in_static_,
-      ) = sess.run((
-          bijector_static.forward_event_shape_tensor(shape_in),
-          bijector_static.inverse_event_shape_tensor(shape_out),
-      ))
-      self.assertAllEqual(shape_out, shape_out_static_)
-      self.assertAllEqual(shape_in, shape_in_static_)
+       ), feed_dict=feed_dict)
+      self.assertAllEqual(shape_out_static, shape_out_)
+      self.assertAllEqual(shape_in_static, shape_in_)
 
   def testScalarReshape(self):
     """Test reshaping to and from a scalar shape ()."""
@@ -166,11 +102,11 @@ class ReshapeBijectorTest(test.TestCase):
     expected_x_scalar = np.random.randn(1,)
     expected_y_scalar = expected_x_scalar[0]
 
+    shape_in, shape_out, feed_dict = self.build_shapes([], [1,])
     with self.test_session() as sess:
       bijector = Reshape(
-          event_shape_out=[],
-          event_shape_in=[1,], validate_args=True)
-
+          event_shape_out=shape_in,
+          event_shape_in=shape_out, validate_args=True)
       (x_,
        y_,
        x_scalar_,
@@ -180,53 +116,178 @@ class ReshapeBijectorTest(test.TestCase):
           bijector.forward(expected_x),
           bijector.inverse(expected_y_scalar),
           bijector.forward(expected_x_scalar),
-      ))
+      ), feed_dict=feed_dict)
       self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0)
       self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0)
       self.assertAllClose(expected_y_scalar, y_scalar_, rtol=1e-6, atol=0)
       self.assertAllClose(expected_x_scalar, x_scalar_, rtol=1e-6, atol=0)
 
-  def testRaisesOpError(self):
-    x1 = np.random.randn(4, 2, 3)
-    x2 = np.random.randn(4, 3, 2)
-    x3 = np.random.randn(4, 5, 1, 1)
+  def testMultipleUnspecifiedDimensionsOpError(self):
 
     with self.test_session() as sess:
-      shape_in_ph = array_ops.placeholder(shape=[2,], dtype=dtypes.int32)
-      shape_out_ph = array_ops.placeholder(shape=[3,], dtype=dtypes.int32)
+      shape_in, shape_out, feed_dict = self.build_shapes([2, 3], [4, -1, -1,])
       bijector = Reshape(
-          event_shape_out=shape_out_ph,
-          event_shape_in=shape_in_ph,
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
           validate_args=True)
 
-      with self.assertRaisesOpError(
+      with self.assertRaisesError(
+          "elements must have at most one `-1`."):
+        sess.run(bijector.forward_event_shape_tensor(shape_in),
+                 feed_dict=feed_dict)
+
+  def testInvalidDimensionsOpError(self):
+
+    with self.test_session() as sess:
+
+      shape_in, shape_out, feed_dict = self.build_shapes([2, 3], [1, 2, -2,])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
+
+      with self.assertRaisesError(
+          "elements must be either positive integers or `-1`."):
+        sess.run(bijector.forward_event_shape_tensor(shape_in),
+                 feed_dict=feed_dict)
+
+  def testValidButNonMatchingInputOpError(self):
+    x = np.random.randn(4, 3, 2)
+
+    with self.test_session() as sess:
+      shape_in, shape_out, feed_dict = self.build_shapes([2, 3], [1, 6, 1,])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
+
+      # Here we pass in a tensor (x) whose shape is compatible with
+      # the output shape, so tf.reshape will throw no error, but
+      # doesn't match the expected input shape.
+      with self.assertRaisesError(
           "Input `event_shape` does not match `event_shape_in`."):
-        sess.run(bijector.forward(x2),
-                 feed_dict={shape_out_ph: [1, 6, 1],
-                            shape_in_ph: [2, 3]})
+        sess.run(bijector.forward(x),
+                 feed_dict=feed_dict)
 
-      with self.assertRaisesOpError(
-          "event_shape_out entries must be positive."):
-        sess.run(bijector.forward(x1),
-                 feed_dict={shape_out_ph: [-1, -1, 6],
-                            shape_in_ph: [2, 3]})
+  def testValidButNonMatchingInputPartiallySpecifiedOpError(self):
+    x = np.random.randn(4, 3, 2)
+
+    with self.test_session() as sess:
+      shape_in, shape_out, feed_dict = self.build_shapes([2, -1], [1, 6, 1,])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
+
+      with self.assertRaisesError(
+          "Input `event_shape` does not match `event_shape_in`."):
+        sess.run(bijector.forward(x),
+                 feed_dict=feed_dict)
+
+  def testInputOutputMismatchOpError(self):
+    x1 = np.random.randn(4, 2, 3)
+    x2 = np.random.randn(4, 1, 1, 5)
+
+    with self.test_session() as sess:
+      shape_in, shape_out, fd_mismatched = self.build_shapes([2, 3],
+                                                             [1, 1, 5])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
 
       # test that *all* methods check basic assertions
-      fd_mismatched = {shape_out_ph: [1, 1, 5], shape_in_ph: [2, 3]}
-      with self.assertRaisesOpError(
-          "Input/output `event_size`s do not match."):
+      with self.assertRaisesError(
+          "Input to reshape is a tensor with"):
         sess.run(bijector.forward(x1), feed_dict=fd_mismatched)
-      with self.assertRaisesOpError(
-          "Input/output `event_size`s do not match."):
-        sess.run(bijector.inverse(x3), feed_dict=fd_mismatched)
-      with self.assertRaisesOpError(
-          "Input/output `event_size`s do not match."):
-        sess.run(bijector.inverse_log_det_jacobian(x3),
-                 feed_dict=fd_mismatched)
-      with self.assertRaisesOpError(
-          "Input/output `event_size`s do not match."):
-        sess.run(bijector.forward_log_det_jacobian(x1),
-                 feed_dict=fd_mismatched)
+      with self.assertRaisesError(
+          "Input to reshape is a tensor with"):
+        sess.run(bijector.inverse(x2), feed_dict=fd_mismatched)
+
+  def testOneShapePartiallySpecified(self):
+    expected_x = np.random.randn(4, 6)
+    expected_y = np.reshape(expected_x, [4, 2, 3])
+
+    with self.test_session() as sess:
+      # one of input/output shapes is partially specified
+      shape_in, shape_out, feed_dict = self.build_shapes([-1,], [2, 3])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
+      (x_,
+       y_,
+      ) = sess.run((
+          bijector.inverse(expected_y),
+          bijector.forward(expected_x),
+      ), feed_dict=feed_dict)
+      self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0)
+      self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0)
+
+  def testBothShapesPartiallySpecified(self):
+    expected_x = np.random.randn(4, 2, 3)
+    expected_y = np.reshape(expected_x, [4, 3, 2])
+    with self.test_session() as sess:
+      shape_in, shape_out, feed_dict = self.build_shapes([-1, 3], [-1, 2])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
+      (x_,
+       y_,
+      ) = sess.run((
+          bijector.inverse(expected_y),
+          bijector.forward(expected_x),
+      ), feed_dict=feed_dict)
+      self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0)
+      self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0)
+
+  def testDefaultVectorShape(self):
+    expected_x = np.random.randn(4, 4)
+    expected_y = np.reshape(expected_x, [4, 2, 2])
+    with self.test_session() as sess:
+      _, shape_out, feed_dict = self.build_shapes([-1,], [-1, 2])
+      bijector = Reshape(shape_out,
+                         validate_args=True)
+      (x_,
+       y_,
+      ) = sess.run((
+          bijector.inverse(expected_y),
+          bijector.forward(expected_x),
+      ), feed_dict=feed_dict)
+      self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0)
+      self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0)
+
+  def build_shapes(self, *args, **kwargs):
+    raise NotImplementedError("Subclass failed to implement `build_shapes`.")
+
+
+class ReshapeBijectorTestStatic(test.TestCase, _ReshapeBijectorTest):
+
+  def build_shapes(self, shape_in, shape_out):
+    shape_in_static = shape_in
+    shape_out_static = shape_out
+    feed_dict = {}
+    return shape_in_static, shape_out_static, feed_dict
+
+  def assertRaisesError(self, msg):
+    return self.assertRaisesRegexp(Exception, msg)
+
+  def testEventShape(self):
+    shape_in_static = tensor_shape.TensorShape([2, 3])
+    shape_out_static = tensor_shape.TensorShape([6,])
+    bijector = Reshape(
+        event_shape_out=shape_out_static,
+        event_shape_in=shape_in_static, validate_args=True)
+
+    # test that forward_ and inverse_event_shape do sensible things
+    # when shapes are statically known.
+    self.assertEqual(
+        bijector.forward_event_shape(shape_in_static),
+        shape_out_static)
+    self.assertEqual(
+        bijector.inverse_event_shape(shape_out_static),
+        shape_in_static)
 
   def testBijectiveAndFinite(self):
     x = np.random.randn(4, 2, 3)
@@ -238,5 +299,32 @@ class ReshapeBijectorTest(test.TestCase):
           validate_args=True)
       assert_bijective_and_finite(bijector, x, y, rtol=1e-6, atol=0)
 
+
+class ReshapeBijectorTestDynamic(test.TestCase, _ReshapeBijectorTest):
+
+  def build_shapes(self, shape_in, shape_out):
+    shape_in_ph = array_ops.placeholder(shape=(len(shape_in),),
+                                        dtype=dtypes.int32)
+    shape_out_ph = array_ops.placeholder(shape=(len(shape_out),),
+                                         dtype=dtypes.int32)
+    feed_dict = {shape_in_ph: shape_in, shape_out_ph: shape_out}
+    return shape_in_ph, shape_out_ph, feed_dict
+
+  def assertRaisesError(self, msg):
+    return self.assertRaisesOpError(msg)
+
+
+class ReshapeBijectorTestDynamicNdims(test.TestCase, _ReshapeBijectorTest):
+
+  def build_shapes(self, shape_in, shape_out):
+    shape_in_ph = array_ops.placeholder(shape=None, dtype=dtypes.int32)
+    shape_out_ph = array_ops.placeholder(shape=None, dtype=dtypes.int32)
+    feed_dict = {shape_in_ph: shape_in, shape_out_ph: shape_out}
+    return shape_in_ph, shape_out_ph, feed_dict
+
+  def assertRaisesError(self, msg):
+    return self.assertRaisesOpError(msg)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
index 93682639aa..1eb8e74fda 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
@@ -36,70 +36,77 @@ __all__ = [
 ]
 
 
+def _static_ndims_from_shape(shape):
+  return shape.shape.with_rank_at_least(1)[0].value
+
+
+def _ndims_from_shape(shape):
+  return array_ops.shape(shape)[0]
+
+
 class Reshape(bijector_lib.Bijector):
   """Reshapes the `event_shape` of a `Tensor`.
 
   The semantics generally follow that of `tf.reshape()`, with
   a few differences:
-   * The user must provide both the input and output shape, so that
-     the transformation can be inverted.
-   * The `Reshape` bijector automatically broadcasts over the leftmost
-     dimensions of its input (`sample_shape` and `batch_shape`); only
-     the rightmost `event_ndims_in` dimensions are reshaped. The
-     number of dimensions to reshape is inferred from the provided
-     `event_shape_in` (`event_ndims_in = len(event_shape_in)`).
-   * The `Reshape` bijector does not currently support
-     partially-specified shapes, i.e., those with a dimension
-     implicitly specified by `-1`.
+
+  * The user must provide both the input and output shape, so that
+    the transformation can be inverted. If an input shape is not
+    specified, the default assumes a vector-shaped input, i.e.,
+    event_shape_in = (-1,).
+  * The `Reshape` bijector automatically broadcasts over the leftmost
+    dimensions of its input (`sample_shape` and `batch_shape`); only
+    the rightmost `event_ndims_in` dimensions are reshaped. The
+    number of dimensions to reshape is inferred from the provided
+    `event_shape_in` (`event_ndims_in = len(event_shape_in)`).
 
   Example usage:
   ```python
 
   bs = tf.contrib.distributions.bijectors
 
-  reverse = bs.Reshape(event_shape_out=[1,2],
-                       event_shape_in=[2,])
+  r = bs.Reshape(event_shape_out=[1, -1])
 
-  reverse.forward([1., 2.])    # shape [2,]
-  # ==> [[1., 2.]]             # shape [1,2]
+  r.forward([3., 4.])    # shape [2]
+  # ==> [[3., 4.]]       # shape [1, 2]
 
-  reverse.forward([[1., 2.], [3., 4.]])  # shape [2, 2]
-  # ==> [[[1., 2.]], [[3., 4.]]]         # shape [2, 1, 2]
+  r.forward([[1., 2.], [3., 4.]])  # shape [2, 2]
+  # ==> [[[1., 2.]],
+  #      [[3., 4.]]]   # shape [2, 1, 2]
 
-  reverse.inverse([[1., 2.]])  # shape [1,2]
-  # ==> [1., 2.]               # shape [2,]
+  r.inverse([[3., 4.]])  # shape [1,2]
+  # ==> [3., 4.]         # shape [2]
 
-  reverse.forward_log_det_jacobian(any_value)
+  r.forward_log_det_jacobian(any_value)
   # ==> 0.
 
-  reverse.inverse_log_det_jacobian(any_value)
+  r.inverse_log_det_jacobian(any_value)
   # ==> 0.
   ```
 
   """
 
-  def __init__(self, event_shape_out, event_shape_in,
+  def __init__(self, event_shape_out, event_shape_in=(-1,),
                validate_args=False, name=None):
     """Creates a `Reshape` bijector.
 
     Args:
       event_shape_out: An `int`-like vector-shaped `Tensor`
-        representing the fully specified (no -1's) event shape of the
-        transformed output.
-      event_shape_in: An `int`-like vector-shaped `Tensor`
-        representing the fully specified (no -1's) event shape of the
-        input.
+        representing the event shape of the transformed output.
+      event_shape_in: An optional `int`-like vector-shape `Tensor`
+        representing the event shape of the input. This is required in
+        order to define inverse operations; the default of (-1,)
+        assumes a vector-shaped input.
       validate_args: Python `bool` indicating whether arguments should
         be checked for correctness.
       name: Python `str`, name given to ops managed by this object.
 
     Raises:
       TypeError: if either `event_shape_in` or `event_shape_out` has
-       non-vector shape (`rank > 1`), or non-integer `dtype`.
-      ValueError: if either `event_shape_in` or `event_shape_out`
-       contains non-positive entries, or if their sizes do not match
-       (`prod(event_shape_in)` != `prod(event_shape_out)`), or if
-       their dimensionality(s) cannot be statically inferred.
+        non-integer `dtype`.
+      ValueError: if either of `event_shape_in` or `event_shape_out`
+       has non-vector shape (`rank > 1`), or if their sizes do not
+       match.
     """
     with ops.name_scope(name, "reshape",
                         values=[event_shape_out, event_shape_in]):
@@ -111,105 +118,74 @@ class Reshape(bijector_lib.Bijector):
                                              name="event_shape_in",
                                              preferred_dtype=dtypes.int32)
 
-      # check that input shapes are positive integers
       assertions = []
-      assertions += self._maybe_check_valid_shape(
-          event_shape_out, "event_shape_out",
-          validate_args=validate_args)
-      assertions += self._maybe_check_valid_shape(
-          event_shape_in, "event_shape_in", validate_args=validate_args)
-
-      # check that prod(event_shape_in) = prod(event_shape_out)
-      assertions += self._maybe_check_matching_sizes(
-          event_shape_in, event_shape_out, validate_args=validate_args)
+      assertions.extend(self._maybe_check_valid_shape(
+          event_shape_out, validate_args))
+      assertions.extend(self._maybe_check_valid_shape(
+          event_shape_in, validate_args))
 
       self._assertions = assertions
       self._event_shape_in = event_shape_in
       self._event_shape_out = event_shape_out
-      self._event_shape_in_static = tensor_util.constant_value_as_shape(
-          event_shape_in)
-      self._event_shape_out_static = tensor_util.constant_value_as_shape(
-          event_shape_out)
 
       super(Reshape, self).__init__(is_constant_jacobian=True,
                                     validate_args=validate_args,
                                     name=name or "reshape")
 
-  def _maybe_check_valid_shape(self, shape_tensor, label,
-                               validate_args=False):
-    """Check that a shape Tensor is int-type and positive."""
-
-    assertions = []
-
-    if not shape_tensor.dtype.is_integer:
+  def _maybe_check_valid_shape(self, shape, validate_args):
+    """Check that a shape Tensor is int-type and otherwise sane."""
+    if not shape.dtype.is_integer:
       raise TypeError("{} dtype ({}) should be `int`-like.".format(
-          label, shape_tensor.dtype.name))
+          shape.op.name, shape.dtype.name))
 
-    shape_rank = tensor_util.constant_value(array_ops.rank(shape_tensor))
-    if shape_rank is not None and shape_rank > 1:
-      raise ValueError("{} rank should be <= 1.".format(label))
+    assertions = []
 
-    s = tensor_util.constant_value(shape_tensor)
-    if s is not None:
-      if (s <= 0).any():
-        raise ValueError("{} entries must be positive, but found {}".format(
-            label, s))
+    ndims = array_ops.rank(shape)
+    ndims_ = tensor_util.constant_value(ndims)
+    if ndims_ is not None and ndims_ > 1:
+      raise ValueError("`{}` rank ({}) should be <= 1.".format(
+          shape.op.name, ndims_))
     elif validate_args:
-      assertions.append(check_ops.assert_positive(
-          shape_tensor, message="{} entries must be positive".format(label)))
-
-    return assertions
-
-  def _maybe_check_matching_sizes(self, event_shape_in, event_shape_out,
-                                  validate_args=False):
-    """Check that prod(event_shape_in)==prod(event_shape_out)."""
+      assertions.append(check_ops.assert_less_equal(
+          ndims, 1, message="`{}` rank should be <= 1.".format(shape.op.name)))
 
-    def _get_size_from_shape(shape):
-      """Computes size from a shape `Tensor`, statically if possible."""
-      s = tensor_util.constant_value(shape)
-      if s is not None:
-        return [np.int32(np.prod(s))]*2
-      return None, math_ops.reduce_prod(shape, name="size")
-
-    # Ensure `event_shape_in` is compatible with `event_shape_out`.
-    event_size_in_, event_size_in = _get_size_from_shape(  # pylint: disable=unbalanced-tuple-unpacking
-        event_shape_in)
-    event_size_out_, event_size_out = _get_size_from_shape(  # pylint: disable=unbalanced-tuple-unpacking
-        event_shape_out)
-
-    assertions = []
-    if event_size_in_ is not None and event_size_out_ is not None:
-      if event_size_in_ != event_size_out_:
+    shape_ = tensor_util.constant_value_as_shape(shape)
+    if shape_.is_fully_defined():
+      es = np.int32(shape_.as_list())
+      if sum(es == -1) > 1:
+        raise ValueError(
+            "`{}` must have at most one `-1` (given {})"
+            .format(shape.op.name, es))
+      if np.any(es < -1):
         raise ValueError(
-            "Input `event_size` ({}) does not match output `event_size` ({}).".
-            format(event_size_in, event_size_out_))
+            "`{}` elements must be either positive integers or `-1`"
+            "(given {})."
+            .format(shape.op.name, es))
     elif validate_args:
-      assertions.append(check_ops.assert_equal(
-          event_size_in, event_size_out,
-          message="Input/output `event_size`s do not match."))
-
+      assertions.extend([
+          check_ops.assert_less_equal(
+              math_ops.reduce_sum(
+                  math_ops.cast(math_ops.equal(shape, -1), dtypes.int32)),
+              1,
+              message="`{}` elements must have at most one `-1`."
+              .format(shape.op.name)),
+          check_ops.assert_greater_equal(
+              shape, -1,
+              message="`{}` elements must be either positive integers or `-1`."
+              .format(shape.op.name)),
+      ])
     return assertions
 
   def _reshape_helper(self, x, event_shape_in, event_shape_out):
     """Reshape only the event_shape of an input `Tensor`."""
 
-    def _get_rank_from_shape(shape):
-      """Computes rank from a shape `Tensor`, statically if possible."""
-      # Uses fact that rank is "shape of shape".
-      ndims = shape.shape.with_rank_at_least(1)[0].value
-      if ndims is not None:
-        return ndims, ndims
-      return None, array_ops.shape(shape)[0]
-
-    event_ndims_in_, event_ndims_in = _get_rank_from_shape(event_shape_in)
+    event_ndims_in_ = _static_ndims_from_shape(event_shape_in)
+    event_ndims_in = _ndims_from_shape(event_shape_in)
+    x_ndims_, x_ndims = x.shape.ndims, array_ops.rank(x)
 
     assertions = []
-    # Ensure x.event_shape is compatible with event_shape_in.
-    if x.shape.ndims is not None:
-      x_ndims_, x_ndims = [x.shape.ndims]*2
-    else:
-      x_ndims_, x_ndims = None, array_ops.rank(x)
 
+    # Ensure x.event_shape is compatible with event_shape_in.
     if (event_ndims_in_ is not None
         and x_ndims_ is not None
         and x.shape.with_rank_at_least(event_ndims_in_)[
@@ -223,13 +199,35 @@ class Reshape(bijector_lib.Bijector):
     event_shape_in_ = tensor_util.constant_value(event_shape_in)
 
     if x_event_shape_ is not None and event_shape_in_ is not None:
-      if not np.equal(x_event_shape_, event_shape_in_).all():
+      # Compare the shape dimensions that are fully specified in the
+      # input (i.e., for which event_shape_in is not -1). If x_event_shape
+      # matches along all of these dimensions, it is compatible with
+      # the desired input shape and any further mismatches (i.e.,
+      # imcompatibility with the desired *output* shape) will be
+      # caught inside of array_ops.reshape() below.
+      x_event_shape_specified_ = x_event_shape_[event_shape_in_ >= 0]
+      event_shape_in_specified_ = event_shape_in_[event_shape_in_ >= 0]
+      if not np.equal(x_event_shape_specified_,
+                      event_shape_in_specified_).all():
         raise ValueError(
-            "Input `event_shape` ({}) does not match `event_shape_in` ({}).".
+            "Input `event_shape` does not match `event_shape_in` ({} vs {}).".
             format(x_event_shape_, event_shape_in_))
     elif self.validate_args:
+      # Similarly to the static case, we compare the shape dimensions
+      # that are fully specified in the input. We extract these
+      # dimensions using boolean_mask(), which requires that the mask
+      # have known ndims. We can assume that shape Tensors always have
+      # ndims==1 (this assumption is verified inside of
+      # _maybe_check_valid_shape), so the reshape operation is just a
+      # no-op that formally encodes this fact to make boolean_mask()
+      # happy.
+      event_shape_mask = array_ops.reshape(event_shape_in >= 0, [-1])
+      x_event_shape_specified = array_ops.boolean_mask(x_event_shape,
+                                                       event_shape_mask)
+      event_shape_in_specified = array_ops.boolean_mask(event_shape_in,
+                                                        event_shape_mask)
       assertions.append(check_ops.assert_equal(
-          x_event_shape, event_shape_in,
+          x_event_shape_specified, event_shape_in_specified,
           message="Input `event_shape` does not match `event_shape_in`."))
 
     if assertions:
@@ -243,8 +241,19 @@ class Reshape(bijector_lib.Bijector):
     sample_and_batch_shape = sample_and_batch_shape[
         :(ndims - math_ops.abs(event_ndims_in))]
 
-    new_shape = array_ops.concat(
-        [sample_and_batch_shape, event_shape_out], axis=0)
+    if (event_ndims_in_ is not None
+        and x_ndims_ is not None
+        and event_ndims_in_ == x_ndims_):
+      # Hack to allow forward/inverse_event_shape to do shape
+      # inference by calling this helper method with a dummy Tensor of
+      # shape event_shape_in. In this special case,
+      # sample_and_batch_shape will be empty so we can preserve static
+      # shape information by avoiding the concat operation below
+      # (which would be a no-op).
+      new_shape = event_shape_out
+    else:
+      new_shape = array_ops.concat(
+          [sample_and_batch_shape, event_shape_out], axis=0)
 
     return array_ops.reshape(x, new_shape)
 
@@ -269,29 +278,37 @@ class Reshape(bijector_lib.Bijector):
       return constant_op.constant(0., dtype=x.dtype)
 
   def _forward_event_shape(self, input_shape):
-    self._event_shape_in_static.assert_is_compatible_with(input_shape)
-    return self._event_shape_out_static
+    # NOTE: this method and the other *_event_shape* methods
+    # compute shape by explicit transformation of a dummy
+    # variable. This approach is not generally recommended because it
+    # bloats the graph and could in general trigger side effects.
+    #
+    # In this particular case of the Reshape bijector, the
+    # forward and inverse transforms have no side effects, and we
+    # believe the reduction in code complexity from delegating the
+    # heavy lifting to tf.reshape() is worth the added graph ops.
+    # However, you should think hard before implementing this approach
+    # in other Bijectors; it is strongly preferred to compute
+    # shapes explicitly whenever it's feasible to do so.
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=input_shape)
+      dummy_reshaped = self.forward(dummy)
+      return dummy_reshaped.shape
 
   def _inverse_event_shape(self, output_shape):
-    self._event_shape_out_static.assert_is_compatible_with(output_shape)
-    return self._event_shape_in_static
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=output_shape)
+      dummy_reshaped = self.inverse(dummy)
+      return dummy_reshaped.shape
 
   def _forward_event_shape_tensor(self, input_shape):
-    input_assertions = self._maybe_check_valid_shape(
-        input_shape, "input event shape", validate_args=self.validate_args)
-    input_assertions += self._maybe_check_matching_sizes(
-        input_shape, self._event_shape_out,
-        validate_args=self.validate_args)
-
-    return control_flow_ops.with_dependencies(
-        input_assertions + self._assertions, self._event_shape_out)
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=input_shape)
+      dummy_reshaped = self.forward(dummy)
+      return array_ops.shape(dummy_reshaped)
 
   def _inverse_event_shape_tensor(self, output_shape):
-
-    output_assertions = self._maybe_check_valid_shape(
-        output_shape, "output event shape", validate_args=self.validate_args)
-    output_assertions += self._maybe_check_matching_sizes(
-        output_shape, self._event_shape_in, validate_args=self.validate_args)
-
-    return control_flow_ops.with_dependencies(
-        output_assertions + self._assertions, self._event_shape_in)
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=output_shape)
+      dummy_reshaped = self.inverse(dummy)
+      return array_ops.shape(dummy_reshaped)
-- 
GitLab


From 60d2e51254028df73f650abe07fad024c49688bb Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Tue, 28 Nov 2017 19:07:56 -0800
Subject: [PATCH 0388/1225] Introduce tf.contrib.summary.flush

This op has been useful while writing benchmarks.

PiperOrigin-RevId: 177254316
---
 tensorflow/contrib/summary/summary.py         |  1 +
 tensorflow/contrib/summary/summary_ops.py     | 21 +++++++++++++++
 .../contrib/summary/summary_ops_test.py       | 27 +++++++++++++++++++
 3 files changed, 49 insertions(+)

diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py
index f783179f61..9e6af5232f 100644
--- a/tensorflow/contrib/summary/summary.py
+++ b/tensorflow/contrib/summary/summary.py
@@ -31,6 +31,7 @@ from tensorflow.contrib.summary.summary_ops import audio
 from tensorflow.contrib.summary.summary_ops import create_summary_db_writer
 from tensorflow.contrib.summary.summary_ops import create_summary_file_writer
 from tensorflow.contrib.summary.summary_ops import eval_dir
+from tensorflow.contrib.summary.summary_ops import flush
 from tensorflow.contrib.summary.summary_ops import generic
 from tensorflow.contrib.summary.summary_ops import graph
 from tensorflow.contrib.summary.summary_ops import histogram
diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py
index 8e37987cb7..de6f2cd79f 100644
--- a/tensorflow/contrib/summary/summary_ops.py
+++ b/tensorflow/contrib/summary/summary_ops.py
@@ -516,6 +516,27 @@ def import_event(tensor, name=None):
       context.context().summary_writer_resource, tensor, name=name)
 
 
+def flush(writer=None, name=None):
+  """Forces summary writer to send any buffered data to storage.
+
+  This operation blocks until that finishes.
+
+  Args:
+    writer: The @{tf.contrib.summary.SummaryWriter} resource to flush.
+      The thread default will be used if this parameter is None.
+      Otherwise a @{tf.no_op} is returned.
+    name: A name for the operation (optional).
+
+  Returns:
+    The created @{tf.Operation}.
+  """
+  if writer is None:
+    writer = context.context().summary_writer_resource
+    if writer is None:
+      return control_flow_ops.no_op()
+  return gen_summary_ops.flush_summary_writer(writer, name=name)
+
+
 def eval_dir(model_dir, name=None):
   """Construct a logdir for an eval summary writer."""
   return os.path.join(model_dir, "eval" if not name else "eval_" + name)
diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py
index ad89c0c36a..3fe421a7e9 100644
--- a/tensorflow/contrib/summary/summary_ops_test.py
+++ b/tensorflow/contrib/summary/summary_ops_test.py
@@ -109,6 +109,33 @@ class TargetTest(test_util.TensorFlowTestCase):
       self.assertEqual(len(events), 2)
       self.assertEqual(events[1].summary.value[0].tag, 'scalar')
 
+  def testMaxQueue(self):
+    logs = tempfile.mkdtemp()
+    with summary_ops.create_summary_file_writer(
+        logs, max_queue=2, flush_millis=999999,
+        name='lol').as_default(), summary_ops.always_record_summaries():
+      get_total = lambda: len(summary_test_util.events_from_logdir(logs))
+      # Note: First tf.Event is always file_version.
+      self.assertEqual(1, get_total())
+      summary_ops.scalar('scalar', 2.0, step=1)
+      self.assertEqual(1, get_total())
+      summary_ops.scalar('scalar', 2.0, step=2)
+      self.assertEqual(3, get_total())
+
+  def testFlush(self):
+    logs = tempfile.mkdtemp()
+    with summary_ops.create_summary_file_writer(
+        logs, max_queue=999999, flush_millis=999999,
+        name='lol').as_default(), summary_ops.always_record_summaries():
+      get_total = lambda: len(summary_test_util.events_from_logdir(logs))
+      # Note: First tf.Event is always file_version.
+      self.assertEqual(1, get_total())
+      summary_ops.scalar('scalar', 2.0, step=1)
+      summary_ops.scalar('scalar', 2.0, step=2)
+      self.assertEqual(1, get_total())
+      summary_ops.flush()
+      self.assertEqual(3, get_total())
+
 
 class DbTest(summary_test_internal.SummaryDbTest):
 
-- 
GitLab


From d2e7a2e4bf295a23d6a2e86aa7e0636f00cc2d75 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Tue, 28 Nov 2017 19:42:30 -0800
Subject: [PATCH 0389/1225] Add VLOG-ging to gcs_file_system

PiperOrigin-RevId: 177256727
---
 tensorflow/core/platform/cloud/gcs_file_system.cc | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 54d38fe962..45e9b05092 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -697,6 +697,9 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset,
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading gs://",
                                   bucket, "/", object);
 
+  VLOG(1) << "Successful read of gs://" << bucket << "/" << object << " @ "
+          << offset << " of size: " << out->size();
+
   if (out->size() < block_size()) {
     // Check stat cache to see if we encountered an interrupted read.
     FileStatistics stat;
@@ -706,6 +709,8 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset,
             "File contents are inconsistent for file: %s @ %lu.",
             filename.c_str(), offset));
       }
+      VLOG(2) << "Successful integrity check for: gs://" << bucket << "/"
+              << object << " @ " << offset;
     }
   }
 
@@ -868,6 +873,11 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket,
         TF_RETURN_IF_ERROR(GetStringValue(root, "updated", &updated));
         TF_RETURN_IF_ERROR(ParseRfc3339Time(updated, &(stat->mtime_nsec)));
 
+        VLOG(1) << "Stat of: gs://" << bucket << "/" << object << " -- "
+                << " length: " << stat->length
+                << "; mtime_nsec: " << stat->mtime_nsec
+                << "; updated: " << updated;
+
         stat->is_directory = false;
         return Status::OK();
       };
-- 
GitLab


From bdde4d040cf01ef241ad349cf222c227b9a88814 Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Tue, 28 Nov 2017 20:41:47 -0800
Subject: [PATCH 0390/1225] [XLA] Support transposing the spatial dimensions of
 a convolution's activations

PiperOrigin-RevId: 177260886
---
 .../compiler/xla/service/transpose_folding.cc |  9 ++-
 .../xla/service/transpose_folding_test.cc     | 64 +++++++++++++++++++
 2 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/transpose_folding.cc b/tensorflow/compiler/xla/service/transpose_folding.cc
index fb55d4e543..42b616f4c3 100644
--- a/tensorflow/compiler/xla/service/transpose_folding.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding.cc
@@ -102,6 +102,10 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) {
   auto& convolution = *pair.first;
   auto& operand_indices = pair.second;
 
+  if (operand_indices.empty()) {
+    return false;
+  }
+
   const ConvolutionDimensionNumbers& dnums =
       convolution.convolution_dimension_numbers();
   ConvolutionDimensionNumbers new_dnums = dnums;
@@ -121,8 +125,9 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) {
         transpose_dimensions[dnums.input_batch_dimension()]);
     new_dnums.set_input_feature_dimension(
         transpose_dimensions[dnums.input_feature_dimension()]);
-    for (const auto& spatial_dimension : dnums.input_spatial_dimensions()) {
-      CHECK_EQ(spatial_dimension, transpose_dimensions[spatial_dimension]);
+    for (auto& input_spatial_dimension :
+         *new_dnums.mutable_input_spatial_dimensions()) {
+      input_spatial_dimension = transpose_dimensions[input_spatial_dimension];
     }
     new_lhs = &transpose_operand;
   } else {
diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc
index 6ac32e88f1..ba99852905 100644
--- a/tensorflow/compiler/xla/service/transpose_folding_test.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc
@@ -376,5 +376,69 @@ TEST_F(TransposeFoldingTest, FoldConvTransposeLhs) {
       new_conv->convolution_dimension_numbers().output_spatial_dimensions(1));
 }
 
+// Test that a transpose of every dimension in the activations gets folded into
+// convolution.
+TEST_F(TransposeFoldingTest, FoldConvComplexTransposeLhs) {
+  auto builder = HloComputation::Builder("entry_computation");
+  HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter(
+      /*parameter_number=*/0, ShapeUtil::MakeShape(F32, {3, 2, 1, 1}),
+      /*name=*/"x"));
+  HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter(
+      /*parameter_number=*/1, ShapeUtil::MakeShape(F32, {2, 3, 1, 1}),
+      /*name=*/"y"));
+  HloInstruction* transpose_x =
+      builder.AddInstruction(HloInstruction::CreateTranspose(
+          ShapeUtil::MakeShape(F32, {2, 3, 1, 1}), x, {1, 0, 3, 2}));
+  auto dnums = ComputationBuilder::CreateDefaultConvDimensionNumbers();
+  Window window;
+  for (int i = 0; i < 2; ++i) {
+    WindowDimension* dim = window.add_dimensions();
+    dim->set_padding_low(0);
+    dim->set_padding_high(0);
+    dim->set_base_dilation(1);
+    dim->set_window_dilation(1);
+    dim->set_stride(1);
+    dim->set_size(y->shape().dimensions(dnums.kernel_spatial_dimensions(i)));
+  }
+  StatusOr<Shape> conv_shape = ShapeInference::InferConvolveShape(
+      transpose_x->shape(), y->shape(), window, dnums);
+  EXPECT_IS_OK(conv_shape);
+  HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
+      conv_shape.ValueOrDie(), transpose_x, y, window, dnums));
+
+  HloModule module("test_module");
+  HloComputation* entry_computation =
+      module.AddEntryComputation(builder.Build(conv));
+  FoldTranspose(&module);
+
+  // Instructions after folding: x, y, and the convolution.
+  std::unordered_set<HloInstruction*> instruction_set(
+      entry_computation->instructions().begin(),
+      entry_computation->instructions().end());
+  EXPECT_EQ(1, instruction_set.erase(x)) << "x is not in entry_computation.";
+  EXPECT_EQ(1, instruction_set.erase(y)) << "y is not in entry_computation.";
+  EXPECT_EQ(1, instruction_set.size())
+      << "entry_computation should contain exactly 3 instructions.";
+  HloInstruction* new_conv = *instruction_set.begin();
+  EXPECT_EQ(HloOpcode::kConvolution, new_conv->opcode());
+  EXPECT_EQ(dnums.input_feature_dimension(),
+            new_conv->convolution_dimension_numbers().input_batch_dimension());
+  EXPECT_EQ(
+      dnums.input_batch_dimension(),
+      new_conv->convolution_dimension_numbers().input_feature_dimension());
+  EXPECT_EQ(
+      dnums.input_spatial_dimensions(0),
+      new_conv->convolution_dimension_numbers().input_spatial_dimensions(1));
+  EXPECT_EQ(
+      dnums.input_spatial_dimensions(1),
+      new_conv->convolution_dimension_numbers().input_spatial_dimensions(0));
+  EXPECT_EQ(
+      dnums.output_spatial_dimensions(0),
+      new_conv->convolution_dimension_numbers().output_spatial_dimensions(0));
+  EXPECT_EQ(
+      dnums.output_spatial_dimensions(1),
+      new_conv->convolution_dimension_numbers().output_spatial_dimensions(1));
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From a55ee58c89d5bf6a8cd70b706dc3af90d7d6efc4 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Tue, 28 Nov 2017 20:54:54 -0800
Subject: [PATCH 0391/1225] Include the filename when we encounter EOF

PiperOrigin-RevId: 177261696
---
 tensorflow/core/platform/cloud/file_block_cache.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/platform/cloud/file_block_cache.cc b/tensorflow/core/platform/cloud/file_block_cache.cc
index a472ae52fc..e1afc7b308 100644
--- a/tensorflow/core/platform/cloud/file_block_cache.cc
+++ b/tensorflow/core/platform/cloud/file_block_cache.cc
@@ -181,7 +181,9 @@ Status FileBlockCache::Read(const string& filename, size_t offset, size_t n,
       // The requested offset is at or beyond the end of the file. This can
       // happen if `offset` is not block-aligned, and the read returns the last
       // block in the file, which does not extend all the way out to `offset`.
-      return errors::OutOfRange("EOF at offset ", offset);
+      return errors::OutOfRange("EOF at offset ", offset, " in file ", filename,
+                                " at position ", pos, "with data size ",
+                                data.size());
     }
     auto begin = data.begin();
     if (offset > pos) {
-- 
GitLab


From 05f57851d4657ec6c09a454b157cf17d89d0cfe2 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Tue, 28 Nov 2017 21:10:16 -0800
Subject: [PATCH 0392/1225] Bugfixes: Gather's gradient for 2+ dimensional
 indices with eager execution.

And shape inference function for the VariableShape operation.

PiperOrigin-RevId: 177262783
---
 tensorflow/core/ops/resource_variable_ops.cc  |  5 ++-
 .../python/ops/resource_variable_ops.py       | 21 ++++------
 tensorflow/python/training/momentum_test.py   | 41 +++++++++++++------
 3 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/tensorflow/core/ops/resource_variable_ops.cc b/tensorflow/core/ops/resource_variable_ops.cc
index cdfbec85cf..bf9e673e8e 100644
--- a/tensorflow/core/ops/resource_variable_ops.cc
+++ b/tensorflow/core/ops/resource_variable_ops.cc
@@ -204,7 +204,10 @@ Status VariableShapeShapeFn(InferenceContext* c) {
   if (handle_data == nullptr || handle_data->empty()) {
     return errors::InvalidArgument("Handle doesn't have shape information.");
   }
-  c->set_output(0, (*handle_data)[0].shape);
+  ShapeHandle var_shape = (*handle_data)[0].shape;
+  int64 rank = c->RankKnown(var_shape) ? c->Rank(var_shape)
+                                       : InferenceContext::kUnknownDim;
+  c->set_output(0, c->Vector(rank));
   return Status::OK();
 }
 
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 343e38f960..652bfa1ebc 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -887,26 +887,19 @@ def _ReadGrad(_, grad):
 def _GatherGrad(op, grad):
   """Gradient for gather op."""
   # Build appropriately shaped IndexedSlices
-  # Walk graph back until the original handle is found.
-  # TODO(apassos): more robust way of getting the shape.
-  # TODO(apassos): implement this for EAGER mode.
-  if context.in_eager_mode():
-    dense_shape = gen_resource_variable_ops.variable_shape(op.inputs[0])
-    return (ops.IndexedSlices(grad,
-                              op.inputs[1],
-                              dense_shape=dense_shape),
-            None)
   handle = op.inputs[0]
-  while handle.op.type != "VarHandleOp":
-    handle = handle.op.inputs[0]
-  params_shape = ops.convert_to_tensor(
-      tensor_shape.TensorShape(handle.op.get_attr("shape")))
   indices = op.inputs[1]
+  if context.in_graph_mode():
+    # Walk graph back until the original handle is found.
+    # TODO(apassos): implement this for EAGER mode.
+    while handle.op.type != "VarHandleOp":
+      handle = handle.op.inputs[0]
+  params_shape = gen_resource_variable_ops.variable_shape(handle)
   size = array_ops.expand_dims(array_ops.size(indices), 0)
   values_shape = array_ops.concat([size, params_shape[1:]], 0)
   values = array_ops.reshape(grad, values_shape)
   indices = array_ops.reshape(indices, size)
-  return [ops.IndexedSlices(values, indices, params_shape), None]
+  return (ops.IndexedSlices(values, indices, params_shape), None)
 
 
 def _to_proto_fn(v, export_scope=None):
diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py
index 7268b3abc9..6865513b0e 100644
--- a/tensorflow/python/training/momentum_test.py
+++ b/tensorflow/python/training/momentum_test.py
@@ -234,23 +234,38 @@ class MomentumOptimizerTest(test.TestCase):
           self.assertAllClose(var0_np, var0.eval())
           self.assertAllClose(var1_np, var1.eval())
 
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
   def testMinimizeSparseResourceVariable(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.test_session():
-        var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+      var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+
+      # pylint: disable=cell-var-from-loop
+      def loss():
         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
         pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
-        loss = pred * pred
-        sgd_op = momentum_lib.MomentumOptimizer(
-            learning_rate=1.0, momentum=0.0).minimize(loss)
-        variables.global_variables_initializer().run()
-        # Fetch params to validate initial values
-        self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
-        # Run 1 step of sgd
-        sgd_op.run()
-        # Validate updated params
-        self.assertAllCloseAccordingToType(
-            [[-111, -138]], var0.eval())
+        return pred * pred
+      # pylint: enable=cell-var-from-loop
+
+      opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0)
+      sgd_op = opt.minimize(loss if context.in_eager_mode() else loss())
+      self.evaluate(variables.global_variables_initializer())
+      # Run 1 step of sgd
+      self.evaluate(sgd_op)
+      # Validate updated params
+      self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0))
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testMinimizeWith2DIndiciesForEmbeddingLookup(self):
+    var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2]))
+
+    def loss():
+      return math_ops.reduce_sum(embedding_ops.embedding_lookup(var0, [[1]]))
+
+    opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0)
+    sgd_op = opt.minimize(loss if context.in_eager_mode() else loss())
+    self.evaluate(variables.global_variables_initializer())
+    self.evaluate(sgd_op)
+    self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0))
 
   def testTensorLearningRateAndMomentum(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-- 
GitLab


From 2ab7e9dad284bd15d69779ee0bcf8a2c894c2a2a Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Tue, 28 Nov 2017 21:22:20 -0800
Subject: [PATCH 0393/1225] Go: Add some more detail to an error message.

Closes #14806

PiperOrigin-RevId: 177263469
---
 tensorflow/go/tensor.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go
index 1326a95278..cd05e2aa0a 100644
--- a/tensorflow/go/tensor.go
+++ b/tensorflow/go/tensor.go
@@ -270,7 +270,7 @@ func typeOf(dt DataType, shape []int64) reflect.Type {
 		}
 	}
 	if ret == nil {
-		panic(bug("DataType %v is not supported", dt))
+		panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt))
 	}
 	for range shape {
 		ret = reflect.SliceOf(ret)
-- 
GitLab


From a7c13b33d6df91e25fc793043bf748b30e311c73 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Tue, 28 Nov 2017 21:45:51 -0800
Subject: [PATCH 0394/1225] Update TF Android build instructions to warn about
 NDK 16

See https://github.com/bazelbuild/bazel/issues/4068

PiperOrigin-RevId: 177264756
---
 tensorflow/contrib/lite/java/demo/README.md | 8 +++++++-
 tensorflow/examples/android/README.md       | 8 ++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/java/demo/README.md b/tensorflow/contrib/lite/java/demo/README.md
index 71b633c577..5d13a798e2 100644
--- a/tensorflow/contrib/lite/java/demo/README.md
+++ b/tensorflow/contrib/lite/java/demo/README.md
@@ -8,7 +8,12 @@
      It's easiest with Android Studio.
 
       - You'll need at least SDK version 23.
+      - Make sure to install the latest version of Bazel. Some distributions
+        ship with Bazel 0.5.4, which is too old.
       - Bazel requires Android Build Tools `26.0.1` or higher.
+      - **Bazel is incompatible with NDK revisions 15 and above,** with revision
+        16 being a compile-breaking change. [Download an older version manually
+        instead of using the SDK Manager.](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#install-bazel-and-android-prerequisites)
       - You also need to install the Android Support Repository, available
         through Android Studio under `Android SDK Manager -> SDK Tools ->
         Android Support Repository`.
@@ -19,7 +24,8 @@
       - Make sure the `api_level` in `WORKSPACE` is set to an SDK version that
         you have installed.
       - By default, Android Studio will install the SDK to `~/Android/Sdk` and
-        the NDK to `~/Android/Sdk/ndk-bundle`.
+        the NDK to `~/Android/Sdk/ndk-bundle` (but the NDK should be a manual
+        download until Bazel supports NDK 16. See bullet points under (1)).
 
 2. Build the app with Bazel. The demo needs C++11:
 
diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md
index 79202a38d7..881a975e60 100644
--- a/tensorflow/examples/android/README.md
+++ b/tensorflow/examples/android/README.md
@@ -126,6 +126,10 @@ the Android NDK and SDK must be installed on your system.
 2.  The Android NDK is required to build the native (C/C++) TensorFlow code. The
     current recommended version is 14b, which may be found
     [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-14b-downloads).
+
+      * NDK 16, the revision released in November 2017, is **incompatible** with
+        Bazel. See [here](https://github.com/tensorflow/tensorflow/issues/14918).
+
 3.  The Android SDK and build tools may be obtained
     [here](https://developer.android.com/tools/revisions/build-tools.html), or
     alternatively as part of [Android
@@ -133,6 +137,10 @@ the Android NDK and SDK must be installed on your system.
     23 is required to build the TF Android demo (though it will run on API >= 21
     devices).
 
+      - The Android Studio SDK Manager's NDK installer will install the latest
+        revision of the NDK, which is **incompatible** with Bazel. You'll need
+        to download an older version manually, as (2) suggests.
+
 ##### Edit WORKSPACE
 
 The Android entries in
-- 
GitLab


From 9aeb0eef9188a48a02078128d3d1ca6f78f0f438 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Tue, 28 Nov 2017 22:12:34 -0800
Subject: [PATCH 0395/1225] Add logging to help differentiate multiple
 stacktraces

PiperOrigin-RevId: 177266569
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index fe17664d7f..84a4208be3 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -514,6 +514,7 @@ class _InfeedThreadController(_InfeedOutfeedThreadBaseController):
           exc_info=1
       )
       time.sleep(120)
+      logging.error('Closing the failed session.')
       session.close()
 
   def join(self):
-- 
GitLab


From 4c2ca8b0cbfdbdc9f7525b1d9ad0c057cb513749 Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Tue, 28 Nov 2017 22:48:10 -0800
Subject: [PATCH 0396/1225] Fix typo in GradientTape.persistent_ comment.

PiperOrigin-RevId: 177268420
---
 tensorflow/c/eager/tape.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index f52248e7d5..191e9c3413 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -161,7 +161,7 @@ class GradientTape {
   // the tape refer to it); to aid in tape garbage collection.
   std::unordered_map<int64, int64> tensor_usage_;
 
-  // If true, all activations are deleted in the first call to ComputeGradient.
+  // If false, all activations are deleted in the first call to ComputeGradient.
   // Else, only when this is destructed.
   bool persistent_;
 };
-- 
GitLab


From bc87c28c60dddc6137b11f8a1fd31fa79bcf0c1f Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Wed, 29 Nov 2017 00:34:54 -0800
Subject: [PATCH 0397/1225] Register fp16 Reduce min on GPU.

PiperOrigin-RevId: 177274800
---
 tensorflow/core/kernels/reduction_ops_min.cc  | 1 +
 tensorflow/core/kernels/reduction_ops_test.cc | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/tensorflow/core/kernels/reduction_ops_min.cc b/tensorflow/core/kernels/reduction_ops_min.cc
index 807ac0a456..5c537c5b9c 100644
--- a/tensorflow/core/kernels/reduction_ops_min.cc
+++ b/tensorflow/core/kernels/reduction_ops_min.cc
@@ -50,6 +50,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
           .TypeConstraint<int64>("Tidx")                                       \
           .HostMemory("reduction_indices"),                                    \
       ReductionOp<GPUDevice, type, int64, Eigen::internal::MinReducer<type>>);
+REGISTER_GPU_KERNELS(Eigen::half);
 REGISTER_GPU_KERNELS(float);
 REGISTER_GPU_KERNELS(double);
 
diff --git a/tensorflow/core/kernels/reduction_ops_test.cc b/tensorflow/core/kernels/reduction_ops_test.cc
index 9bbe993a2f..fe8ea59f1b 100644
--- a/tensorflow/core/kernels/reduction_ops_test.cc
+++ b/tensorflow/core/kernels/reduction_ops_test.cc
@@ -174,6 +174,11 @@ static void BM_Min2DToScalarGPU(int iters, int num_x, int num_y) {
 }
 BENCHMARK(BM_Min2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192);
 
+static void BM_Min2DToScalarGPUHalf(int iters, int num_x, int num_y) {
+  ReduceToScalar<Eigen::half>(iters, "gpu", "Min", num_x, num_y);
+}
+BENCHMARK(BM_Min2DToScalarGPUHalf)->RangePair(2048, 8192, 2048, 8192);
+
 static void BM_Bool2DToScalarGPU(int iters, int num_x, int num_y) {
   ReduceToScalar<bool>(iters, "gpu", "All", num_x, num_y);
 }
-- 
GitLab


From 6196d30cf8498c428bdb7fbd4b4ab9cb83853457 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 29 Nov 2017 02:51:49 -0800
Subject: [PATCH 0398/1225] Add alpha support for dealing with shardings to
 batchnorm rewriter

PiperOrigin-RevId: 177285265
---
 .../xla/service/batchnorm_rewriter.cc         | 328 ++++++++++--------
 .../compiler/xla/service/hlo_sharding.cc      |   3 +-
 2 files changed, 186 insertions(+), 145 deletions(-)

diff --git a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc b/tensorflow/compiler/xla/service/batchnorm_rewriter.cc
index c6193b3fbb..2bbae25aee 100644
--- a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc
+++ b/tensorflow/compiler/xla/service/batchnorm_rewriter.cc
@@ -149,6 +149,15 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
   if (!rewrite_training_op_) {
     return Status::OK();
   }
+
+  std::vector<HloInstruction*> added_instructions;
+  auto add = [&](std::unique_ptr<HloInstruction> inst) {
+    HloInstruction* added_inst = computation_->AddInstruction(std::move(inst));
+    added_instructions.push_back(added_inst);
+    return added_inst;
+  };
+  int64 instruction_count_before = computation_->instruction_count();
+
   // Expand batch norm training into smaller HLO ops.
   HloInstruction* operand = batch_norm->mutable_operand(0);
   const Shape operand_shape = operand->shape();
@@ -160,7 +169,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
       Literal::CreateR0<float>(size_in_elements / feature_count);
   TF_ASSIGN_OR_RETURN(elements_per_feature_literal,
                       elements_per_feature_literal->Convert(ptype));
-  auto elements_per_feature = computation_->AddInstruction(
+  auto elements_per_feature = add(
       HloInstruction::CreateConstant(std::move(elements_per_feature_literal)));
 
   HloInstruction* scale = batch_norm->mutable_operand(1);
@@ -169,14 +178,12 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
 
   auto zero_literal = Literal::CreateR0(0.0f);
   TF_ASSIGN_OR_RETURN(zero_literal, zero_literal->Convert(ptype));
-  auto zero = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(zero_literal)));
+  auto zero = add(HloInstruction::CreateConstant(std::move(zero_literal)));
 
   auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon());
   TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype));
-  auto epsilon = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(epsilon_literal)));
-
+  auto epsilon =
+      add(HloInstruction::CreateConstant(std::move(epsilon_literal)));
   std::vector<int64> dimensions_without_feature;
 
   for (int64 i = 0; i < ShapeUtil::Rank(operand_shape); ++i) {
@@ -185,105 +192,110 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
     }
   }
 
-  auto scale_broadcasted = computation_->AddInstruction(
+  auto scale_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, scale, {feature_index}));
 
-  auto offset_broadcasted = computation_->AddInstruction(
+  auto offset_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, offset, {feature_index}));
 
   HloComputation* add_reduce_computation =
       GetScalarBinaryComputation(ptype, HloOpcode::kAdd);
 
   // X^2.
-  auto operand_squared =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kMultiply, operand, operand));
+  auto operand_squared = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kMultiply, operand, operand));
   // Sum[X].
-  auto sum = computation_->AddInstruction(HloInstruction::CreateReduce(
-      feature_shape, operand, zero, dimensions_without_feature,
-      add_reduce_computation));
+  auto sum = add(HloInstruction::CreateReduce(feature_shape, operand, zero,
+                                              dimensions_without_feature,
+                                              add_reduce_computation));
 
   // Sum[X^2].
-  auto squared_sum = computation_->AddInstruction(HloInstruction::CreateReduce(
+  auto squared_sum = add(HloInstruction::CreateReduce(
       feature_shape, operand_squared, zero, dimensions_without_feature,
       add_reduce_computation));
 
   // Fuse two parallel reduces together to improve performance.
-  if (use_fusion_) {
-    auto tuple = computation_->AddInstruction(
-        HloInstruction::CreateTuple({sum, squared_sum}));
+  if (use_fusion_ && !batch_norm->has_sharding()) {
+    auto tuple = add(HloInstruction::CreateTuple({sum, squared_sum}));
 
     auto fused = computation_->CreateFusionInstruction(
         {tuple, sum, squared_sum, operand_squared},
         HloInstruction::FusionKind::kInput);
 
-    sum = computation_->AddInstruction(
-        HloInstruction::CreateGetTupleElement(feature_shape, fused, 0));
+    sum = add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 0));
 
-    squared_sum = computation_->AddInstruction(
-        HloInstruction::CreateGetTupleElement(feature_shape, fused, 1));
+    squared_sum =
+        add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 1));
   }
 
   // E[X].
-  auto mean = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto mean = add(HloInstruction::CreateBinary(
       feature_shape, HloOpcode::kDivide, sum, elements_per_feature));
 
-  auto mean_broadcasted = computation_->AddInstruction(
+  auto mean_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, mean, {feature_index}));
 
   // E[X^2].
-  auto square_mean = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto square_mean = add(HloInstruction::CreateBinary(
       feature_shape, HloOpcode::kDivide, squared_sum, elements_per_feature));
 
   // E^2[X].
-  auto mean_square = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto mean_square = add(HloInstruction::CreateBinary(
       feature_shape, HloOpcode::kMultiply, mean, mean));
 
   // Var[X].
-  auto var = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto var = add(HloInstruction::CreateBinary(
       feature_shape, HloOpcode::kSubtract, square_mean, mean_square));
 
-  auto var_broadcasted = computation_->AddInstruction(
-      HloInstruction::CreateBroadcast(operand_shape, var, {feature_index}));
+  auto var_broadcasted =
+      add(HloInstruction::CreateBroadcast(operand_shape, var, {feature_index}));
 
   // Var[X] + epsilon.
-  auto var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon));
+  auto var_add_epsilon = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon));
 
   auto neg_half_literal = Literal::CreateR0(-0.5f);
   TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype));
-  auto neg_half = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(neg_half_literal)));
+  auto neg_half =
+      add(HloInstruction::CreateConstant(std::move(neg_half_literal)));
 
   // 1 / Sqrt[Var[X] + epsilon].
-  auto rsqrt_var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half));
+  auto rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half));
 
   // X - E[X].
-  auto operand_minus_mean =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted));
+  auto operand_minus_mean = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon].
-  auto normalized = computation_->AddInstruction(
+  auto normalized = add(
       HloInstruction::CreateBinary(operand_shape, HloOpcode::kMultiply,
                                    operand_minus_mean, rsqrt_var_add_epsilon));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale.
-  auto scaled_normalized =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted));
+  auto scaled_normalized = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale + offset.
-  auto shifted_normalized = computation_->AddInstruction(
-      HloInstruction::CreateBinary(operand_shape, HloOpcode::kAdd,
-                                   scaled_normalized, offset_broadcasted));
-
-  TF_CHECK_OK(ReplaceWithNewInstruction(
-      batch_norm,
-      HloInstruction::CreateTuple({shifted_normalized, mean, var})));
+  auto shifted_normalized = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kAdd, scaled_normalized, offset_broadcasted));
+
+  auto tuple = HloInstruction::CreateTuple({shifted_normalized, mean, var});
+
+  if (batch_norm->has_sharding()) {
+    int64 instruction_count_after = computation_->instruction_count();
+    CHECK_EQ(instruction_count_after,
+             instruction_count_before + added_instructions.size());
+    for (HloInstruction* inst : added_instructions) {
+      if (ShapeUtil::Equal(inst->shape(), operand_shape)) {
+        inst->set_sharding(batch_norm->sharding());
+      } else {
+        inst->set_sharding(HloSharding::Replicate());
+      }
+    }
+    tuple->set_sharding(batch_norm->sharding());
+  }
+  TF_CHECK_OK(ReplaceWithNewInstruction(batch_norm, std::move(tuple)));
   return Status::OK();
 }
 
@@ -317,52 +329,69 @@ Status BatchNormRewriterVisitor::HandleBatchNormInference(
     }
   }
 
-  auto scale_broadcasted = computation_->AddInstruction(
+  std::vector<HloInstruction*> added_instructions;
+  auto add = [&](std::unique_ptr<HloInstruction> inst) {
+    HloInstruction* added_inst = computation_->AddInstruction(std::move(inst));
+    added_instructions.push_back(added_inst);
+    return added_inst;
+  };
+  int64 instruction_count_before = computation_->instruction_count();
+
+  auto scale_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, scale, {feature_index}));
 
-  auto offset_broadcasted = computation_->AddInstruction(
+  auto offset_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, offset, {feature_index}));
 
-  auto mean_broadcasted = computation_->AddInstruction(
+  auto mean_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, mean, {feature_index}));
 
-  auto var_broadcasted = computation_->AddInstruction(
-      HloInstruction::CreateBroadcast(operand_shape, var, {feature_index}));
+  auto var_broadcasted =
+      add(HloInstruction::CreateBroadcast(operand_shape, var, {feature_index}));
 
   // Var[X] + epsilon.
-  auto var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon));
+  auto var_add_epsilon = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon));
 
   auto neg_half_literal = Literal::CreateR0(-0.5f);
   TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype));
-  auto neg_half = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(neg_half_literal)));
+  auto neg_half =
+      add(HloInstruction::CreateConstant(std::move(neg_half_literal)));
 
   // 1 / Sqrt[Var[X] + epsilon].
-  auto rsqrt_var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half));
+  auto rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half));
 
   // X - E[X].
-  auto operand_minus_mean =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted));
+  auto operand_minus_mean = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon].
-  auto normalized = computation_->AddInstruction(
+  auto normalized = add(
       HloInstruction::CreateBinary(operand_shape, HloOpcode::kMultiply,
                                    operand_minus_mean, rsqrt_var_add_epsilon));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale.
-  auto scaled_normalized =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted));
+  auto scaled_normalized = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale + offset.
   auto shifted_normalized = HloInstruction::CreateBinary(
       operand_shape, HloOpcode::kAdd, scaled_normalized, offset_broadcasted);
 
+  int64 instruction_count_after = computation_->instruction_count();
+  CHECK_EQ(instruction_count_after,
+           instruction_count_before + added_instructions.size());
+  if (batch_norm->has_sharding()) {
+    for (HloInstruction* inst : added_instructions) {
+      if (ShapeUtil::Equal(inst->shape(), operand_shape)) {
+        inst->set_sharding(batch_norm->sharding());
+      } else {
+        inst->set_sharding(HloSharding::Replicate());
+      }
+    }
+    shifted_normalized->set_sharding(batch_norm->sharding());
+  }
   TF_CHECK_OK(
       ReplaceWithNewInstruction(batch_norm, std::move(shifted_normalized)));
   return Status::OK();
@@ -385,6 +414,13 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad(
   if (!rewrite_grad_op_) {
     return Status::OK();
   }
+  std::vector<HloInstruction*> added_instructions;
+  auto add = [&](std::unique_ptr<HloInstruction> inst) {
+    HloInstruction* added_inst = computation_->AddInstruction(std::move(inst));
+    added_instructions.push_back(added_inst);
+    return added_inst;
+  };
+  int64 instruction_count_before = computation_->instruction_count();
 
   HloInstruction* activation = batch_norm->mutable_operand(0);
   const Shape activation_shape = activation->shape();
@@ -403,23 +439,22 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad(
       Literal::CreateR0<float>(size_in_elements / feature_count);
   TF_ASSIGN_OR_RETURN(elements_per_feature_literal,
                       elements_per_feature_literal->Convert(ptype));
-  auto elements_per_feature = computation_->AddInstruction(
+  auto elements_per_feature = add(
       HloInstruction::CreateConstant(std::move(elements_per_feature_literal)));
 
   auto zero_literal = Literal::CreateR0(0.0f);
   TF_ASSIGN_OR_RETURN(zero_literal, zero_literal->Convert(ptype));
-  auto zero = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(zero_literal)));
+  auto zero = add(HloInstruction::CreateConstant(std::move(zero_literal)));
 
   auto neg_half_literal = Literal::CreateR0(-0.5f);
   TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype));
-  auto neg_half = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(neg_half_literal)));
+  auto neg_half =
+      add(HloInstruction::CreateConstant(std::move(neg_half_literal)));
 
   auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon());
   TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype));
-  auto epsilon = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(epsilon_literal)));
+  auto epsilon =
+      add(HloInstruction::CreateConstant(std::move(epsilon_literal)));
 
   std::vector<int64> dimensions_without_feature;
 
@@ -429,126 +464,131 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad(
     }
   }
 
-  auto scale_broadcasted =
-      computation_->AddInstruction(HloInstruction::CreateBroadcast(
-          activation_shape, scale, {feature_index}));
-  auto variance_broadcasted =
-      computation_->AddInstruction(HloInstruction::CreateBroadcast(
-          activation_shape, variance, {feature_index}));
+  auto scale_broadcasted = add(HloInstruction::CreateBroadcast(
+      activation_shape, scale, {feature_index}));
+  auto variance_broadcasted = add(HloInstruction::CreateBroadcast(
+      activation_shape, variance, {feature_index}));
 
   // E[X].
-  auto mean_broadcasted = computation_->AddInstruction(
+  auto mean_broadcasted = add(
       HloInstruction::CreateBroadcast(activation_shape, mean, {feature_index}));
 
   // rsqrt[Var[X] + epsilon].
-  auto rsqrt_var_add_epsilon_broadcasted =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          activation_shape, HloOpcode::kPower,
-          computation_->AddInstruction(
-              HloInstruction::CreateBinary(activation_shape, HloOpcode::kAdd,
-                                           variance_broadcasted, epsilon)),
-          neg_half));
-
-  auto rsqrt_var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          feature_shape, HloOpcode::kPower,
-          computation_->AddInstruction(HloInstruction::CreateBinary(
-              feature_shape, HloOpcode::kAdd, variance, epsilon)),
-          neg_half));
+  auto rsqrt_var_add_epsilon_broadcasted = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kPower,
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kAdd,
+                                       variance_broadcasted, epsilon)),
+      neg_half));
+
+  auto rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary(
+      feature_shape, HloOpcode::kPower,
+      add(HloInstruction::CreateBinary(feature_shape, HloOpcode::kAdd, variance,
+                                       epsilon)),
+      neg_half));
 
   // X - E[X].
-  auto activation_minus_mean = computation_->AddInstruction(
-      HloInstruction::CreateBinary(activation_shape, HloOpcode::kSubtract,
-                                   activation, mean_broadcasted));
+  auto activation_minus_mean = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kSubtract, activation, mean_broadcasted));
 
   // Grad[Y] * (X - E[X]).
-  auto grad_output_times_activiation_minus_mean = computation_->AddInstruction(
-      HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
-                                   grad_output, activation_minus_mean));
+  auto grad_output_times_activiation_minus_mean =
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
+                                       grad_output, activation_minus_mean));
 
   HloComputation* add_reduce_computation =
       GetScalarBinaryComputation(ptype, HloOpcode::kAdd);
 
   // sum(Grad[Y] * (X - E[X])).
   auto sum_grad_output_times_activiation_minus_mean =
-      computation_->AddInstruction(HloInstruction::CreateReduce(
+      add(HloInstruction::CreateReduce(
           feature_shape, grad_output_times_activiation_minus_mean, zero,
           dimensions_without_feature, add_reduce_computation));
 
   // Grad[beta] = Sum(Grad[Y]).
-  auto grad_beta = computation_->AddInstruction(HloInstruction::CreateReduce(
+  auto grad_beta = add(HloInstruction::CreateReduce(
       feature_shape, grad_output, zero, dimensions_without_feature,
       add_reduce_computation));
 
-  if (use_fusion_) {
-    auto tuple = computation_->AddInstruction(HloInstruction::CreateTuple(
+  if (use_fusion_ && !batch_norm->has_sharding()) {
+    auto tuple = add(HloInstruction::CreateTuple(
         {sum_grad_output_times_activiation_minus_mean, grad_beta}));
 
     auto fused = computation_->CreateFusionInstruction(
         {tuple, sum_grad_output_times_activiation_minus_mean, grad_beta},
         HloInstruction::FusionKind::kInput);
 
-    sum_grad_output_times_activiation_minus_mean = computation_->AddInstruction(
-        HloInstruction::CreateGetTupleElement(feature_shape, fused, 0));
+    sum_grad_output_times_activiation_minus_mean =
+        add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 0));
 
-    grad_beta = computation_->AddInstruction(
-        HloInstruction::CreateGetTupleElement(feature_shape, fused, 1));
+    grad_beta =
+        add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 1));
   }
 
   // Grad[scale] = Sum(Grad[Y] * (X - E[X]) * rsqrt[Var[X] + epsilon]).
-  auto grad_scale = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto grad_scale = add(HloInstruction::CreateBinary(
       feature_shape, HloOpcode::kMultiply,
       sum_grad_output_times_activiation_minus_mean, rsqrt_var_add_epsilon));
 
   // I2 = Sum(Grad[Y])
-  auto I2 = computation_->AddInstruction(HloInstruction::CreateBroadcast(
-      activation_shape, grad_beta, {feature_index}));
+  auto i2 = add(HloInstruction::CreateBroadcast(activation_shape, grad_beta,
+                                                {feature_index}));
 
   // I3 = Sum(Grad[Y] * (X - E[X]))
-  auto I3 = computation_->AddInstruction(HloInstruction::CreateBroadcast(
+  auto i3 = add(HloInstruction::CreateBroadcast(
       activation_shape, sum_grad_output_times_activiation_minus_mean,
       {feature_index}));
 
   // I4 = (X - E[X]) * I3
-  auto I4 = computation_->AddInstruction(HloInstruction::CreateBinary(
-      activation_shape, HloOpcode::kMultiply, I3, activation_minus_mean));
+  auto i4 = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kMultiply, i3, activation_minus_mean));
 
   // I5 = I4 / (Var[X] + epsilon)
-  auto I5 = computation_->AddInstruction(HloInstruction::CreateBinary(
-      activation_shape, HloOpcode::kDivide, I4,
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          activation_shape, HloOpcode::kAdd, variance_broadcasted, epsilon))));
+  auto i5 = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kDivide, i4,
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kAdd,
+                                       variance_broadcasted, epsilon))));
 
   // scale * rsqrt[Var[X] + epsilon] * 1/N
-  auto scale_times_rsqrt_var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          activation_shape, HloOpcode::kMultiply, scale_broadcasted,
-          rsqrt_var_add_epsilon_broadcasted));
+  auto scale_times_rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kMultiply, scale_broadcasted,
+      rsqrt_var_add_epsilon_broadcasted));
 
-  scale_times_rsqrt_var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          activation_shape, HloOpcode::kDivide,
-          scale_times_rsqrt_var_add_epsilon, elements_per_feature));
+  scale_times_rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kDivide, scale_times_rsqrt_var_add_epsilon,
+      elements_per_feature));
 
-  auto I1 = computation_->AddInstruction(
-      HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
-                                   grad_output, elements_per_feature));
+  auto i1 =
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
+                                       grad_output, elements_per_feature));
 
   // I6 = I1 - I2 - I5
-  auto I6 = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto i6 = add(HloInstruction::CreateBinary(
       activation_shape, HloOpcode::kSubtract,
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          activation_shape, HloOpcode::kSubtract, I1, I2)),
-      I5));
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kSubtract,
+                                       i1, i2)),
+      i5));
 
   // Grad[X] = scale * rsqrt[Var[X] + epsilon] * 1/N * I6.
-  auto grad_activation = computation_->AddInstruction(
-      HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
-                                   scale_times_rsqrt_var_add_epsilon, I6));
+  auto grad_activation =
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
+                                       scale_times_rsqrt_var_add_epsilon, i6));
+  auto tuple =
+      HloInstruction::CreateTuple({grad_activation, grad_scale, grad_beta});
+  if (batch_norm->has_sharding()) {
+    int64 instruction_count_after = computation_->instruction_count();
+    CHECK_EQ(instruction_count_after,
+             instruction_count_before + added_instructions.size());
+    for (HloInstruction* inst : added_instructions) {
+      if (ShapeUtil::Equal(inst->shape(), activation_shape)) {
+        inst->set_sharding(batch_norm->sharding());
+      } else {
+        inst->set_sharding(HloSharding::Replicate());
+      }
+    }
+    tuple->set_sharding(batch_norm->sharding());
+  }
 
-  TF_CHECK_OK(ReplaceWithNewInstruction(
-      batch_norm,
-      HloInstruction::CreateTuple({grad_activation, grad_scale, grad_beta})));
+  TF_CHECK_OK(ReplaceWithNewInstruction(batch_norm, std::move(tuple)));
 
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
index d1adec31c2..447c244666 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding.cc
@@ -246,7 +246,8 @@ Status HloSharding::ValidateNonTuple(const Shape& shape,
   // The tile rank must be the same as the input rank.
   if (ShapeUtil::Rank(shape) != ShapeUtil::Rank(tile_shape_)) {
     return tensorflow::errors::InvalidArgument(
-        "Tile rank is different to the input rank");
+        "Tile rank is different to the input rank. sharding=", ToString(),
+        ", input_shape=", ShapeUtil::HumanString(shape));
   }
 
   // The tile shape must not be the same as the input shape without maximal_
-- 
GitLab


From 4b60f92986ef1a3e4456aa26911df449a68251a5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 29 Nov 2017 04:40:56 -0800
Subject: [PATCH 0399/1225] Change "safe pointers" to make the deleters
 stateless (i.e. a type, not a value).

This decreases the size of each safe pointer object and allows more inlining of the destructor.

PiperOrigin-RevId: 177292948
---
 tensorflow/python/lib/core/safe_ptr.cc | 16 ++++------
 tensorflow/python/lib/core/safe_ptr.h  | 42 +++++++++++++++++---------
 2 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/tensorflow/python/lib/core/safe_ptr.cc b/tensorflow/python/lib/core/safe_ptr.cc
index 456ea3348b..ce34b6d004 100644
--- a/tensorflow/python/lib/core/safe_ptr.cc
+++ b/tensorflow/python/lib/core/safe_ptr.cc
@@ -16,25 +16,21 @@ limitations under the License.
 #include "tensorflow/python/lib/core/safe_ptr.h"
 
 namespace tensorflow {
-namespace {
 
-inline void Py_DECREF_wrapper(PyObject* o) { Py_DECREF(o); }
-
-}  // namespace
-
-Safe_PyObjectPtr make_safe(PyObject* o) {
-  return Safe_PyObjectPtr(o, Py_DECREF_wrapper);
+Safe_PyObjectPtr make_safe(PyObject* object) {
+  return Safe_PyObjectPtr(object);
 }
 
 Safe_TF_TensorPtr make_safe(TF_Tensor* tensor) {
-  return Safe_TF_TensorPtr(tensor, TF_DeleteTensor);
+  return Safe_TF_TensorPtr(tensor);
 }
 
 Safe_TFE_TensorHandlePtr make_safe(TFE_TensorHandle* handle) {
-  return Safe_TFE_TensorHandlePtr(handle, TFE_DeleteTensorHandle);
+  return Safe_TFE_TensorHandlePtr(handle);
 }
 
 Safe_TF_StatusPtr make_safe(TF_Status* status) {
-  return Safe_TF_StatusPtr(status, TF_DeleteStatus);
+  return Safe_TF_StatusPtr(status);
 }
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/lib/core/safe_ptr.h b/tensorflow/python/lib/core/safe_ptr.h
index 70cd2fdf6c..80db840aeb 100644
--- a/tensorflow/python/lib/core/safe_ptr.h
+++ b/tensorflow/python/lib/core/safe_ptr.h
@@ -17,39 +17,51 @@ limitations under the License.
 #define THIRD_PARTY_TENSORFLOW_PYTHON_LIB_CORE_SAFE_PTR_H_
 
 #include <memory>
-#include <Python.h>
 
+#include <Python.h>
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/eager/c_api.h"
 
 namespace tensorflow {
+namespace detail {
+
+struct PyDecrefDeleter {
+  void operator()(PyObject* p) const { Py_DECREF(p); }
+};
+
+struct TFTensorDeleter {
+  void operator()(TF_Tensor* p) const { TF_DeleteTensor(p); }
+};
+
+struct TFETensorHandleDeleter {
+  void operator()(TFE_TensorHandle* p) const { TFE_DeleteTensorHandle(p); }
+};
+
+struct TFStatusDeleter {
+  void operator()(TF_Status* p) const { TF_DeleteStatus(p); }
+};
+
+}  // namespace detail
 
 // Safe container for an owned PyObject. On destruction, the reference count of
 // the contained object will be decremented.
-typedef void (*Py_DECREF_wrapper_type)(PyObject*);
-typedef std::unique_ptr<PyObject, Py_DECREF_wrapper_type> Safe_PyObjectPtr;
+using Safe_PyObjectPtr = std::unique_ptr<PyObject, detail::PyDecrefDeleter>;
 Safe_PyObjectPtr make_safe(PyObject* o);
 
 // Safe containers for an owned TF_Tensor. On destruction, the tensor will be
 // deleted by TF_DeleteTensor.
-// Note: can't use decltype(&TF_DeleteTensor) due to SWIG
-typedef void (*TF_DeleteTensor_type)(TF_Tensor*);
-typedef std::unique_ptr<TF_Tensor, TF_DeleteTensor_type> Safe_TF_TensorPtr;
+using Safe_TF_TensorPtr = std::unique_ptr<TF_Tensor, detail::TFTensorDeleter>;
 Safe_TF_TensorPtr make_safe(TF_Tensor* tensor);
 
 // Safe containers for an owned TFE_TensorHandle. On destruction, the handle
-// will be deleted by TFE_DeleteTensorHandle. Note: can't use
-// decltype(&TFE_DeleteTensorHandle) due to SWIG
-typedef void (*TFE_DeleteTensorHandle_type)(TFE_TensorHandle*);
-typedef std::unique_ptr<TFE_TensorHandle, TFE_DeleteTensorHandle_type>
-    Safe_TFE_TensorHandlePtr;
+// will be deleted by TFE_DeleteTensorHandle.
+using Safe_TFE_TensorHandlePtr =
+    std::unique_ptr<TFE_TensorHandle, detail::TFETensorHandleDeleter>;
 Safe_TFE_TensorHandlePtr make_safe(TFE_TensorHandle* handle);
 
 // Safe containers for an owned TF_Status. On destruction, the handle
-// will be deleted by TF_DeleteStatus. Note: can't use
-// decltype(&TF_DeleteStatus) due to SWIG
-typedef void (*TF_DeleteStatus_type)(TF_Status*);
-typedef std::unique_ptr<TF_Status, TF_DeleteStatus_type> Safe_TF_StatusPtr;
+// will be deleted by TF_DeleteStatus.
+using Safe_TF_StatusPtr = std::unique_ptr<TF_Status, detail::TFStatusDeleter>;
 Safe_TF_StatusPtr make_safe(TF_Status* status);
 
 }  // namespace tensorflow
-- 
GitLab


From e6d823dd19a5768d0dcd651c14a6ebf4bb023180 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 29 Nov 2017 07:32:31 -0800
Subject: [PATCH 0400/1225] Make NCCL code ready for NVIDIA's NCCL 2.

PiperOrigin-RevId: 177306507
---
 .../contrib/nccl/kernels/nccl_manager.cc      | 32 +++++++++++++++++--
 .../contrib/nccl/kernels/nccl_manager_test.cc |  2 ++
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.cc b/tensorflow/contrib/nccl/kernels/nccl_manager.cc
index 31a35b0d53..913935b382 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_manager.cc
+++ b/tensorflow/contrib/nccl/kernels/nccl_manager.cc
@@ -258,9 +258,37 @@ NcclManager::Communicator* NcclManager::GetCommunicator(
     devices[i] = collective->participants[i]->gpu_device_id;
   }
 
+  int device_count = num_devices;
+#if NCCL_MAJOR >= 2
+  // NCCL2 prevents InitAll for more communicators than devices (but doesn't
+  // check that device ids are unique). Work around it by initializing each
+  // rank individually.
+  cudaGetDeviceCount(&device_count);
+#endif
   std::vector<ncclComm_t> nccl_comms(num_devices);
-  auto result = ncclCommInitAll(nccl_comms.data(), num_devices, devices.data());
-  CHECK_EQ(result, ncclSuccess) << ncclGetErrorString(result);
+  if (num_devices <= device_count) {
+    auto result =
+        ncclCommInitAll(nccl_comms.data(), num_devices, devices.data());
+    CHECK_EQ(result, ncclSuccess) << ncclGetErrorString(result);
+  } else {
+    int savedDevice = 0;
+    CHECK_EQ(cudaGetDevice(&savedDevice), cudaSuccess);
+    ncclUniqueId commId;
+    ncclGetUniqueId(&commId);
+#if NCCL_MAJOR >= 2
+    CHECK_EQ(ncclGroupStart(), ncclSuccess);
+#endif
+    for (int rank = 0; rank < num_devices; ++rank) {
+      cudaSetDevice(devices[rank]);
+      auto result =
+          ncclCommInitRank(nccl_comms.data() + rank, num_devices, commId, rank);
+      CHECK_EQ(result, ncclSuccess) << ncclGetErrorString(result);
+    }
+#if NCCL_MAJOR >= 2
+    CHECK_EQ(ncclGroupEnd(), ncclSuccess);
+#endif
+    cudaSetDevice(savedDevice);
+  }
   for (int rank = 0; rank < num_devices; ++rank) {
     members[rank].nccl_comm = nccl_comms[rank];
   }
diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
index 505c4b0d71..abafe4b407 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
+++ b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
@@ -30,6 +30,8 @@ namespace tensorflow {
 static std::vector<BaseGPUDevice*> GetGPUDevices() {
   std::vector<Device*> devices;
   SessionOptions session_options;
+  session_options.config.mutable_gpu_options()
+      ->set_per_process_gpu_memory_fraction(0.1);
   session_options.env = Env::Default();
   Status s = DeviceFactory::GetFactory(DEVICE_GPU)
                  ->AddDevices(session_options, "", &devices);
-- 
GitLab


From 136ba0a2f80262816434abdde6fcd3a729aa3437 Mon Sep 17 00:00:00 2001
From: Simone Cirillo <my.accounts@gmx.se>
Date: Wed, 29 Nov 2017 16:51:18 +0100
Subject: [PATCH 0401/1225] Add layer scope to
 tf.contrib.layers.spatial_softmax (#14287)

---
 .../contrib/layers/python/layers/layers.py    | 91 ++++++++++---------
 1 file changed, 47 insertions(+), 44 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 623f1153b6..6cd586a5f0 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -2651,51 +2651,54 @@ def spatial_softmax(features,
     ValueError: If unexpected data_format specified.
     ValueError: If num_channels dimension is unspecified.
   """
-  shape = array_ops.shape(features)
-  static_shape = features.shape
-  if data_format == DATA_FORMAT_NHWC:
-    height, width, num_channels = shape[1], shape[2], static_shape[3]
-  elif data_format == DATA_FORMAT_NCHW:
-    num_channels, height, width = static_shape[1], shape[2], shape[3]
-  else:
-    raise ValueError('data_format has to be either NCHW or NHWC.')
-  if num_channels.value is None:
-    raise ValueError('The num_channels dimension of the inputs to '
-                     '`spatial_softmax` should be defined. Found `None`.')
-
-  with ops.name_scope(name, 'spatial_softmax', [features]) as name:
-    # Create tensors for x and y coordinate values, scaled to range [-1, 1].
-    pos_x, pos_y = array_ops.meshgrid(math_ops.lin_space(-1., 1., num=height),
-                                      math_ops.lin_space(-1., 1., num=width),
-                                      indexing='ij')
-    pos_x = array_ops.reshape(pos_x, [height * width])
-    pos_y = array_ops.reshape(pos_y, [height * width])
-    if temperature is None:
-      temperature_collections = utils.get_variable_collections(
-          variables_collections, 'temperature')
-      temperature = variables.model_variable(
-          'temperature',
-          shape=(),
-          dtype=dtypes.float32,
-          initializer=init_ops.ones_initializer(),
-          collections=temperature_collections,
-          trainable=trainable)
-    if data_format == 'NCHW':
-      features = array_ops.reshape(features, [-1, height * width])
+  with variable_scope.variable_scope(name, 'spatial_softmax'):  
+    shape = array_ops.shape(features)
+    static_shape = features.shape
+    if data_format == DATA_FORMAT_NHWC:
+      height, width, num_channels = shape[1], shape[2], static_shape[3]
+    elif data_format == DATA_FORMAT_NCHW:
+      num_channels, height, width = static_shape[1], shape[2], shape[3]
     else:
-      features = array_ops.reshape(
-          array_ops.transpose(features, [0, 3, 1, 2]), [-1, height * width])
-
-    softmax_attention = nn.softmax(features/temperature)
-    expected_x = math_ops.reduce_sum(
-        pos_x * softmax_attention, [1], keep_dims=True)
-    expected_y = math_ops.reduce_sum(
-        pos_y * softmax_attention, [1], keep_dims=True)
-    expected_xy = array_ops.concat([expected_x, expected_y], 1)
-    feature_keypoints = array_ops.reshape(
-        expected_xy, [-1, num_channels.value * 2])
-    feature_keypoints.set_shape([None, num_channels.value * 2])
-    return feature_keypoints
+      raise ValueError('data_format has to be either NCHW or NHWC.')
+    if num_channels.value is None:
+      raise ValueError('The num_channels dimension of the inputs to '
+                       '`spatial_softmax` should be defined. Found `None`.')
+  
+    with ops.name_scope('spatial_softmax_op', 'spatial_softmax_op', [features]):  
+      # Create tensors for x and y coordinate values, scaled to range [-1, 1].
+      pos_x, pos_y = array_ops.meshgrid(math_ops.lin_space(-1., 1., num=height),
+                                        math_ops.lin_space(-1., 1., num=width),
+                                        indexing='ij')
+      pos_x = array_ops.reshape(pos_x, [height * width])
+      pos_y = array_ops.reshape(pos_y, [height * width])
+      if temperature is None:
+        temperature_collections = utils.get_variable_collections(
+            variables_collections, 'temperature')
+        temperature = variables.model_variable(
+            'temperature',
+            shape=(),
+            dtype=dtypes.float32,
+            initializer=init_ops.ones_initializer(),
+            collections=temperature_collections,
+            trainable=trainable)
+      if data_format == 'NCHW':
+        features = array_ops.reshape(features, [-1, height * width])
+      else:
+        features = array_ops.reshape(
+            array_ops.transpose(features, [0, 3, 1, 2]), [-1, height * width])
+  
+      softmax_attention = nn.softmax(features/temperature)
+      expected_x = math_ops.reduce_sum(
+          pos_x * softmax_attention, [1], keep_dims=True)
+      expected_y = math_ops.reduce_sum(
+          pos_y * softmax_attention, [1], keep_dims=True)
+      expected_xy = array_ops.concat([expected_x, expected_y], 1)
+      feature_keypoints = array_ops.reshape(
+          expected_xy, [-1, num_channels.value * 2])
+      feature_keypoints.set_shape([None, num_channels.value * 2])
+  return feature_keypoints
+
+
 
 
 def stack(inputs, layer, stack_args, **kwargs):
-- 
GitLab


From f7015074d78f930e1e1a5458dd460a9ea181dff2 Mon Sep 17 00:00:00 2001
From: Jimmy Jia <tesrin@gmail.com>
Date: Wed, 29 Nov 2017 10:51:45 -0500
Subject: [PATCH 0402/1225] Lazily configure TensorFlow logger (#10657)

---
 tensorflow/python/platform/tf_logging.py | 90 ++++++++++++++++--------
 1 file changed, 59 insertions(+), 31 deletions(-)

diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py
index 71ee5e365f..85ed4f071c 100644
--- a/tensorflow/python/platform/tf_logging.py
+++ b/tensorflow/python/platform/tf_logging.py
@@ -30,64 +30,92 @@ from logging import ERROR
 from logging import FATAL
 from logging import INFO
 from logging import WARN
+import threading
 
 import six
 
 from tensorflow.python.util.all_util import remove_undocumented
 
 
-# Determine whether we are in an interactive environment
-_interactive = False
-try:
-  # This is only defined in interactive shells
-  if _sys.ps1: _interactive = True
-except AttributeError:
-  # Even now, we may be in an interactive shell with `python -i`.
-  _interactive = _sys.flags.interactive
+# Don't use this directly. Use _get_logger() instead.
+_logger = None
+_logger_lock = threading.Lock()
 
-# Scope the tensorflow logger to not conflict with users' loggers
-_logger = _logging.getLogger('tensorflow')
 
-# If we are in an interactive environment (like jupyter), set loglevel to info
-# and pipe the output to stdout
-if _interactive:
-  _logger.setLevel(INFO)
-  _logging_target = _sys.stdout
-else:
-  _logging_target = _sys.stderr
+def _get_logger():
+  global _logger
 
-# Add the output handler
-_handler = _logging.StreamHandler(_logging_target)
-_handler.setFormatter(_logging.Formatter(_logging.BASIC_FORMAT, None))
-_logger.addHandler(_handler)
+  # Use double-checked locking to avoid taking lock unnecessarily.
+  if _logger:
+    return _logger
+
+  _logger_lock.acquire()
+
+  try:
+    if _logger:
+      return _logger
+
+    # Scope the TensorFlow logger to not conflict with users' loggers.
+    logger = _logging.getLogger('tensorflow')
+
+    # Don't further configure the TensorFlow logger if the root logger is
+    # already configured. This prevents double logging in those cases.
+    if not _logging.getLogger().handlers:
+      # Determine whether we are in an interactive environment
+      _interactive = False
+      try:
+        # This is only defined in interactive shells.
+        if _sys.ps1: _interactive = True
+      except AttributeError:
+        # Even now, we may be in an interactive shell with `python -i`.
+        _interactive = _sys.flags.interactive
+
+      # If we are in an interactive environment (like Jupyter), set loglevel
+      # to INFO and pipe the output to stdout.
+      if _interactive:
+        logger.setLevel(INFO)
+        _logging_target = _sys.stdout
+      else:
+        _logging_target = _sys.stderr
+
+      # Add the output handler.
+      _handler = _logging.StreamHandler(_logging_target)
+      _handler.setFormatter(_logging.Formatter(_logging.BASIC_FORMAT, None))
+      logger.addHandler(_handler)
+
+    _logger = logger
+    return _logger
+
+  finally:
+    _logger_lock.release()
 
 
 def log(level, msg, *args, **kwargs):
-  _logger.log(level, msg, *args, **kwargs)
+  _get_logger().log(level, msg, *args, **kwargs)
 
 
 def debug(msg, *args, **kwargs):
-  _logger.debug(msg, *args, **kwargs)
+  _get_logger().debug(msg, *args, **kwargs)
 
 
 def error(msg, *args, **kwargs):
-  _logger.error(msg, *args, **kwargs)
+  _get_logger().error(msg, *args, **kwargs)
 
 
 def fatal(msg, *args, **kwargs):
-  _logger.fatal(msg, *args, **kwargs)
+  _get_logger().fatal(msg, *args, **kwargs)
 
 
 def info(msg, *args, **kwargs):
-  _logger.info(msg, *args, **kwargs)
+  _get_logger().info(msg, *args, **kwargs)
 
 
 def warn(msg, *args, **kwargs):
-  _logger.warn(msg, *args, **kwargs)
+  _get_logger().warn(msg, *args, **kwargs)
 
 
 def warning(msg, *args, **kwargs):
-  _logger.warning(msg, *args, **kwargs)
+  _get_logger().warning(msg, *args, **kwargs)
 
 
 _level_names = {
@@ -118,7 +146,7 @@ def flush():
 
 # Code below is taken from pyglib/logging
 def vlog(level, msg, *args, **kwargs):
-  _logger.log(level, msg, *args, **kwargs)
+  _get_logger().log(level, msg, *args, **kwargs)
 
 
 def _GetNextLogCountPerToken(token):
@@ -225,12 +253,12 @@ def google2_log_prefix(level, timestamp=None, file_and_line=None):
 
 def get_verbosity():
   """Return how much logging output will be produced."""
-  return _logger.getEffectiveLevel()
+  return _get_logger().getEffectiveLevel()
 
 
 def set_verbosity(v):
   """Sets the threshold for what messages will be logged."""
-  _logger.setLevel(v)
+  _get_logger().setLevel(v)
 
 
 def _get_thread_id():
-- 
GitLab


From c39544a066eae78e4672467c3883b0158638f1a1 Mon Sep 17 00:00:00 2001
From: Andrei Nigmatulin <andrei.nigmatulin@gmail.com>
Date: Wed, 29 Nov 2017 15:56:52 +0000
Subject: [PATCH 0403/1225] golang: ~2x speedup for encodeTensor() (#14427)

---
 tensorflow/go/tensor.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go
index 1326a95278..bfd8660b92 100644
--- a/tensorflow/go/tensor.go
+++ b/tensorflow/go/tensor.go
@@ -328,6 +328,14 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error {
 			}
 		}
 
+		// Optimisation: if only one dimension is left we can use binary.Write() directly for this slice
+		if len(shape) == 1 && v.Len() > 0 {
+			switch v.Index(0).Kind() {
+			case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+				return binary.Write(w, nativeEndian, v.Interface())
+			}
+		}
+
 		subShape := shape[1:]
 		for i := 0; i < v.Len(); i++ {
 			err := encodeTensor(w, v.Index(i), subShape)
-- 
GitLab


From 6bbff8370fa4d00a0001d930f101d4507b6c0ad5 Mon Sep 17 00:00:00 2001
From: Andrei Nigmatulin <andrei.nigmatulin@gmail.com>
Date: Wed, 29 Nov 2017 16:02:56 +0000
Subject: [PATCH 0404/1225] golang: ~15x speedup for decodeTensor() (#14912)

---
 tensorflow/go/tensor.go      |  9 +++++++++
 tensorflow/go/tensor_test.go | 20 ++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go
index bfd8660b92..cd6f4bc1f0 100644
--- a/tensorflow/go/tensor.go
+++ b/tensorflow/go/tensor.go
@@ -368,6 +368,15 @@ func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect.
 	case reflect.Slice:
 		val := reflect.Indirect(ptr)
 		val.Set(reflect.MakeSlice(typ, int(shape[0]), int(shape[0])))
+
+		// Optimization: if only one dimension is left we can use binary.Read() directly for this slice
+		if len(shape) == 1 && val.Len() > 0 {
+			switch val.Index(0).Kind() {
+			case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+				return binary.Read(r, nativeEndian, val.Interface())
+			}
+		}
+
 		for i := 0; i < val.Len(); i++ {
 			if err := decodeTensor(r, shape[1:], typ.Elem(), val.Index(i).Addr()); err != nil {
 				return err
diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go
index 674a8ce86f..793c36dd4d 100644
--- a/tensorflow/go/tensor_test.go
+++ b/tensorflow/go/tensor_test.go
@@ -243,3 +243,23 @@ func BenchmarkNewTensor(b *testing.B) {
 	)
 	b.Run("[150528]", func(b *testing.B) { benchmarkNewTensor(b, vector) })
 }
+
+func benchmarkDecodeTensor(b *testing.B, t *Tensor) {
+	for i := 0; i < b.N; i++ {
+		_ = t.Value()
+	}
+}
+
+func BenchmarkDecodeTensor(b *testing.B) {
+	var (
+		// Some sample sizes from the Inception image labeling model.
+		// Where input tensors correspond to a 224x224 RGB image
+		// flattened into a vector.
+		vector [224 * 224 * 3]int32
+	)
+	t, err := NewTensor(vector)
+	if err != nil {
+		b.Fatalf("(%v, %v)", t, err)
+	}
+	b.Run("[150528]", func(b *testing.B) { benchmarkDecodeTensor(b, t) })
+}
-- 
GitLab


From 4b7d79b6eaae375559ac32f0c8a8b46cd8825774 Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Wed, 29 Nov 2017 08:08:44 -0800
Subject: [PATCH 0405/1225] Fix absl flag initialization in cloud_tpu_profiler
 (#14426)

This fixes a regression caused by 2652704b576adc16b4d735f651cea1024e88b72e
where the command would not run.

See also: tensorflow/tensorboard#716
---
 .../profiler/pip_package/cloud_tpu_profiler/main.py  | 12 ++++--------
 tensorflow/contrib/tpu/profiler/pip_package/setup.py |  2 +-
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
index 3bdd475fad..7970c20a26 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
@@ -24,22 +24,18 @@ import sys
 
 import tensorflow as tf
 
-
 tf.flags.DEFINE_string('service_addr', '',
                        'Address of TPU profiler service e.g. localhost:8466')
-
-
 tf.flags.DEFINE_string('logdir', '',
                        'Path of TensorBoard log directory e.g. /tmp/tb_log')
-
-
 tf.flags.DEFINE_integer('duration_ms', 2000, 'Duration of tracing in ms.')
 
-
 FLAGS = tf.flags.FLAGS
+EXECUTABLE = 'data/capture_tpu_profile'
 
 
-EXECUTABLE = 'data/capture_tpu_profile'
+def run_main():
+  tf.app.run(main)
 
 
 def main(unused_argv=None):
@@ -54,4 +50,4 @@ def main(unused_argv=None):
 
 
 if __name__ == '__main__':
-  tf.app.run(main)
+  run_main()
diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
index e77cae4695..ee6950699e 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
@@ -23,7 +23,7 @@ from setuptools import setup
 _VERSION = '1.3.0-a1'
 
 CONSOLE_SCRIPTS = [
-    'capture_tpu_profile=cloud_tpu_profiler.main:main',
+    'capture_tpu_profile=cloud_tpu_profiler.main:run_main',
 ]
 
 REQUIRED_PACKAGES = [
-- 
GitLab


From 18a36a823141c675658d218fa78ed5e2bf19ea8c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 29 Nov 2017 08:05:58 -0800
Subject: [PATCH 0406/1225] [XLA:CPU] Factor IR function building logic out of
 IrEmitter into its own file (no functional changes, just code movement). This
 will enable building parallel IR functions from other emitters, and remove
 the requirement that parallel IR functions are associated with a
 sub-computation.

PiperOrigin-RevId: 177309875
---
 tensorflow/compiler/xla/service/cpu/BUILD     |  17 ++
 .../compiler/xla/service/cpu/ir_emitter.cc    | 180 +++-------------
 .../compiler/xla/service/cpu/ir_emitter.h     |  20 +-
 .../compiler/xla/service/cpu/ir_function.cc   | 195 ++++++++++++++++++
 .../compiler/xla/service/cpu/ir_function.h    | 109 ++++++++++
 5 files changed, 352 insertions(+), 169 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/cpu/ir_function.cc
 create mode 100644 tensorflow/compiler/xla/service/cpu/ir_function.h

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index e1eed498f6..e64b313ffc 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -250,6 +250,7 @@ cc_library(
         ":dot_op_emitter",
         ":external_constant_pool",
         ":ir_emission_utils",
+        ":ir_function",
         ":shape_partition",
         ":simple_orc_jit",
         "//tensorflow/compiler/xla:shape_util",
@@ -280,6 +281,22 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "ir_function",
+    srcs = ["ir_function.cc"],
+    hdrs = ["ir_function.h"],
+    deps = [
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
+        "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop",
+        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
+        "//tensorflow/compiler/xla/service/llvm_ir:vector_support_library",
+        "@llvm//:core",
+    ],
+)
+
 cc_library(
     name = "dot_op_emitter",
     srcs = ["dot_op_emitter.cc"],
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 502dd2e738..f087329c6d 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -42,6 +42,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/cpu/ir_function.h"
 #include "tensorflow/compiler/xla/service/cpu/shape_partition.h"
 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
 #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h"
@@ -124,131 +125,27 @@ StatusOr<llvm::Function*> IrEmitter::EmitComputation(
   } else {
     TF_RETURN_IF_ERROR(computation->AcceptOrdered(this, *instruction_order));
   }
-  InsertOrDie(&emitted_functions_, computation, compute_function_);
-
-  return compute_function_;
-}
-
-static llvm::Argument* GetArg(llvm::Function* f, int idx) {
-  llvm::Function::arg_iterator arg_iter = f->arg_begin();
-  std::advance(arg_iter, idx);
-  return &*arg_iter;
+  llvm::Function* ir_function = compute_function_->function();
+  InsertOrDie(&emitted_functions_, computation, ir_function);
+  // Delete 'compute_function', finalizing 'ir_function' and restoring caller
+  // IR insert point.
+  compute_function_.reset();
+  return ir_function;
 }
 
 void IrEmitter::InitializeIrFunction(const string& function_name) {
-  // The function signature is:
-  //   void function(i8* retval, i8* run_options, i8** params, i8** temps,
-  //                 i64* dynamic_loop_bounds, i64* prof_counters)
-  //
-  // retval: points to the returned value.
-  // params: address of an array with pointers to parameters.
-  // temps: address of an array with pointers to temporary buffers.
-  //
-  // Therefore, the generated function's signature (FunctionType) is statically
-  // determined - parameter unpacking is done in code generated into the
-  // function, rather than by a prologue dictated by the platform ABI.
-  //
-  //                      /--------------\
-  //   retval ----------> | return value |
-  //                      \--------------/
-  //
-  //                      /-------------------------------\
-  //   run_options -----> | xla::ExecutableRunOptions |
-  //                      \-------------------------------/
-  //
-  //                     /---------------------------------------------\
-  //   params -------->  |  param 0  |  param 1  | ..... |  param N-1  |
-  //                     |   addr    |   addr    |       |   addr      |
-  //                     \---------------------------------------------/
-  //                          |           |                   |
-  //                          |           |                   |
-  //                          V           V                   V
-  //                     /---------\  /---------\         /-----------\
-  //                     | param 0 |  | param 1 |         | param N-1 |
-  //                     \---------/  \---------/         \-----------/
-  //
-  //                     /---------------------------------------------\
-  //   temps --------->  |  temp  0  |  temp  1  | ..... |  temp  N-1  |
-  //                     |   addr    |   addr    |       |   addr      |
-  //                     \---------------------------------------------/
-  //                          |           |                   |
-  //                          |           |                   |
-  //                          V           V                   V
-  //                     /---------\  /---------\         /-----------\
-  //                     | temp  0 |  | temp  1 |         | temp  N-1 |
-  //                     \---------/  \---------/         \-----------/
-  //
-  //                        /--------------------------------------------\
-  // dynamic loop bounds -> | outer_dim0_start | outer_dim0_limit | .....|
-  //  (elided for aot)      \--------------------------------------------/
-  //
-  //                     /---------------------------------------------\
-  //   prof counters ->  | counter 0 | counter 1 | ..... | counter N-1 |
-  //  (elided for aot)   \---------------------------------------------/
-
-  // Even though the type of params and temps is void** in the host's view, in
-  // LLVM IR this is represented by i8*, similarly to void*. It's up to the code
-  // to use GEPs to unravel the indirection layers.
-  llvm::FunctionType* compute_function_type = llvm::FunctionType::get(
-      /*Result=*/llvm::Type::getVoidTy(module_->getContext()),
-      /*Params=*/GetComputeFunctionParams(),
-      /*isVarArg=*/false);
-
   // Functions with local linkage get an inlining bonus.  Because we know
   // a-priori that embedded functions (non-entry functions) will not have its
   // name resolved, give it local linkage.
   llvm::Function::LinkageTypes linkage =
       is_top_level_computation_ ? llvm::GlobalValue::ExternalLinkage
                                 : llvm::GlobalValue::InternalLinkage;
-  compute_function_ =
-      llvm::Function::Create(/*Ty=*/compute_function_type,
-                             /*Linkage=*/linkage,
-                             /*Name=*/AsStringRef(function_name),
-                             /*Module=*/module_);
-  compute_function_->setCallingConv(llvm::CallingConv::C);
-
-  // Set meaningful names for the function's arguments: useful for debugging.
-  llvm::Function::arg_iterator arg_iter = compute_function_->arg_begin();
-  arg_iter->setName("retval");
-  (++arg_iter)->setName("run_options");
-  (++arg_iter)->setName("params");
-  (++arg_iter)->setName("temps");
-  if (num_dynamic_loop_bounds_ > 0) {
-    (++arg_iter)->setName("dynamic_loop_bounds");
-  }
-  (++arg_iter)->setName("prof_counters");
-
-  // We know a-priori that the function arguments are guaranteed to point to
-  // disjoint objects.
-  llvm::Argument* retval = GetResultArgument();
-  for (llvm::Argument& argument : compute_function_->args()) {
-    // However, the return buffer aliases the temporaries and thus cannot be
-    // marked noalias.
-    if (&argument == retval) {
-      continue;
-    }
-    compute_function_->addAttribute(argument.getArgNo() + 1,
-                                    llvm::Attribute::NoAlias);
-  }
-
-  // Add the optize attribute to the function if optimizing for size. This
-  // controls internal behavior of some optimization passes (e.g. loop
-  // unrolling).
-  if (options::OptimizeForSizeRequested(hlo_module_config_)) {
-    compute_function_->addFnAttr(llvm::Attribute::OptimizeForSize);
-  }
-
-  if (hlo_module_config_.debug_options().xla_enable_fast_math()) {
-    compute_function_->addFnAttr("unsafe-fp-math", "true");
-    compute_function_->addFnAttr("no-infs-fp-math", "true");
-    compute_function_->addFnAttr("no-nans-fp-math", "true");
-    compute_function_->addFnAttr("no-signed-zeros-fp-math", "true");
-  }
-
-  ir_builder_.SetInsertPoint(llvm::BasicBlock::Create(
-      /*Context=*/module_->getContext(),
-      /*Name=*/"entry",
-      /*Parent=*/compute_function_));
+  // Create and initialize new IrFunction.
+  compute_function_.reset(
+      new IrFunction(function_name, linkage,
+                     options::OptimizeForSizeRequested(hlo_module_config_),
+                     hlo_module_config_.debug_options().xla_enable_fast_math(),
+                     module_, &ir_builder_, num_dynamic_loop_bounds_));
 }
 
 IrEmitter::~IrEmitter() {}
@@ -1452,7 +1349,7 @@ Status IrEmitter::HandleParameter(HloInstruction* parameter) {
   //
   // Where Param is the actual element type of the underlying buffer (for
   // example, float for an XLA F32 element type).
-  llvm::Argument* params = GetArg(compute_function_, 2);
+  llvm::Argument* params = compute_function_->parameters_arg();
   llvm::Value* param_address_offset =
       llvm_ir::EmitBufferIndexingGEP(params, param_number, &ir_builder_);
   llvm::LoadInst* param_address_untyped =
@@ -1590,7 +1487,7 @@ IrEmitter::ShardedVectorType IrEmitter::CreateShardedVectorType(
   // Here we assume that the largest register is a vector register.
   int max_vector_register_size_in_bytes =
       target_machine_features_.largest_register_size_in_bytes(
-          compute_function_);
+          compute_function_->function());
 
   int vector_register_size_in_elements =
       max_vector_register_size_in_bytes /
@@ -2410,7 +2307,7 @@ Status IrEmitter::HandleWhile(HloInstruction* xla_while) {
   // Terminates the current block with a branch to a while header.
   llvm::BasicBlock* header_bb = llvm::BasicBlock::Create(
       module_->getContext(), AsStringRef(IrName(xla_while, "header")),
-      compute_function_);
+      compute_function_->function());
   ir_builder_.CreateBr(header_bb);
   ir_builder_.SetInsertPoint(header_bb);
 
@@ -2427,7 +2324,7 @@ Status IrEmitter::HandleWhile(HloInstruction* xla_while) {
   // Branches to the body or to the while exit depending on the condition.
   llvm::BasicBlock* body_bb = llvm::BasicBlock::Create(
       module_->getContext(), AsStringRef(IrName(xla_while, "body")),
-      compute_function_);
+      compute_function_->function());
   llvm::BasicBlock* exit_bb = llvm::BasicBlock::Create(
       module_->getContext(), AsStringRef(IrName(xla_while, "exit")));
   ir_builder_.CreateCondBr(while_predicate, body_bb, exit_bb);
@@ -2442,7 +2339,7 @@ Status IrEmitter::HandleWhile(HloInstruction* xla_while) {
   ir_builder_.CreateBr(header_bb);
 
   // Adds the exit block to the function and sets the insert point there.
-  compute_function_->getBasicBlockList().push_back(exit_bb);
+  compute_function_->function()->getBasicBlockList().push_back(exit_bb);
   ir_builder_.SetInsertPoint(exit_bb);
 
   return Status::OK();
@@ -2642,7 +2539,6 @@ Status IrEmitter::FinishVisit(HloInstruction* root) {
   if (prof_counter) {
     profiling_state_.RecordCompleteComputation(&ir_builder_, prof_counter);
   }
-  ir_builder_.CreateRetVoid();
   return Status::OK();
 }
 
@@ -2783,43 +2679,16 @@ llvm::Type* IrEmitter::IrShapeType(const Shape& shape) {
   return llvm_ir::ShapeToIrType(shape, module_);
 }
 
-std::vector<llvm::Type*> IrEmitter::GetComputeFunctionParams() {
-  llvm::Type* i8_ptr_type = llvm::Type::getInt8PtrTy(module_->getContext());
-  llvm::Type* i8_ptr_ptr_type = i8_ptr_type->getPointerTo();
-  llvm::Type* i64_ptr_type = llvm::Type::getInt64PtrTy(module_->getContext());
-  std::vector<llvm::Type*> compute_function_params(
-      {i8_ptr_type, i8_ptr_type, i8_ptr_ptr_type, i8_ptr_ptr_type});
-  if (num_dynamic_loop_bounds_ > 0) {
-    compute_function_params.push_back(i64_ptr_type);
-  }
-  compute_function_params.push_back(i64_ptr_type);
-  return compute_function_params;
-}
-
-llvm::Argument* IrEmitter::GetResultArgument() {
-  return GetArg(compute_function_, 0);
-}
-
 llvm::Argument* IrEmitter::GetProfileCountersArgument() {
-  const int64 arg_index = num_dynamic_loop_bounds_ > 0 ? 5 : 4;
-  return GetArg(compute_function_, arg_index);
+  return compute_function_->profile_counters_arg();
 }
 
 llvm::Value* IrEmitter::GetTempBuffersArgument() {
-  return GetArg(compute_function_, 3);
-}
-
-llvm::Value* IrEmitter::GetDynamicLoopBound(const int64 offset) {
-  CHECK_GT(num_dynamic_loop_bounds_, 0);
-  CHECK_LT(offset, num_dynamic_loop_bounds_ * 2);
-  llvm::Argument* loop_bounds_arg = GetArg(compute_function_, 4);
-  string name = tensorflow::strings::StrCat("dynamic_loop_bound_", offset);
-  return ir_builder_.CreateLoad(ir_builder_.CreateGEP(
-      loop_bounds_arg, ir_builder_.getInt64(offset), AsStringRef(name)));
+  return compute_function_->temp_buffers_arg();
 }
 
 llvm::Value* IrEmitter::GetExecutableRunOptionsArgument() {
-  return GetArg(compute_function_, 1);
+  return compute_function_->exec_run_options_arg();
 }
 
 llvm::Value* IrEmitter::EmitTempBufferPointer(
@@ -2965,7 +2834,8 @@ Status IrEmitter::EmitParallelForkJoin(
   HloInstruction* root = computation->root_instruction();
 
   // Build ParallelForkJoin function type.
-  std::vector<llvm::Type*> compute_function_params = GetComputeFunctionParams();
+  std::vector<llvm::Type*> compute_function_params =
+      compute_function_->GetComputeFunctionParams();
   // Number of parallel compute functions.
   compute_function_params.push_back(ir_builder_.getInt32Ty());
   // Array of partitions. There is an array element for each
@@ -3066,7 +2936,7 @@ Status IrEmitter::EmitTargetAddressForOp(const HloInstruction* op) {
   if (op == op->parent()->root_instruction()) {
     // For the root node, we write directly to the output buffer of the
     // function.
-    llvm::Argument* retval = GetResultArgument();
+    llvm::Argument* retval = compute_function_->result_arg();
     if (!ShapeUtil::IsNil(target_shape)) {
       llvm::AttrBuilder attr_builder;
       attr_builder.addAlignmentAttr(MinimumAlignmentForShape(target_shape));
@@ -3148,7 +3018,7 @@ Status IrEmitter::EmitParallelTargetElementLoop(
   // Emit code to read dynamic loop bounds from function argument 4.
   std::vector<llvm::Value*> dynamic_loop_bounds(2 * num_dynamic_loop_bounds_);
   for (int i = 0; i < 2 * num_dynamic_loop_bounds_; ++i) {
-    dynamic_loop_bounds[i] = GetDynamicLoopBound(i);
+    dynamic_loop_bounds[i] = compute_function_->GetDynamicLoopBound(i);
   }
 
   llvm_ir::ForLoopNest loop_nest(loop_name, &ir_builder_);
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 351c95278c..9e5595052f 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <stddef.h>
 #include <map>
+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -30,6 +31,7 @@ limitations under the License.
 #include "llvm/Target/TargetMachine.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/external_constant_pool.h"
+#include "tensorflow/compiler/xla/service/cpu/ir_function.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -233,13 +235,6 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   // Convenience function to get the IR type matching the given shape.
   llvm::Type* IrShapeType(const Shape& shape);
 
-  // Returns an array of compute function parameter types.
-  std::vector<llvm::Type*> GetComputeFunctionParams();
-
-  // Get the llvm::Value* that represents the "retval" argument of the
-  // computation function being emitted by this emitter.
-  llvm::Argument* GetResultArgument();
-
   // Get the llvm::Value* that represents the "prof_counters" argument of the
   // computation function being emitted by this emitter.
   llvm::Argument* GetProfileCountersArgument();
@@ -252,11 +247,6 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   // computation function being emitted by this emitter.
   llvm::Value* GetTempBuffersArgument();
 
-  // Emit ir to read and return the ir value for the dynamic loop bound at
-  // 'offset' from the "dynamic_loop_bounds" argument of the computation
-  // function being emitted by this emitter.
-  llvm::Value* GetDynamicLoopBound(const int64 offset);
-
   // Emits code that computes the address of the given temporary buffer to the
   // function. target_shape is the shape of this temporary buffer.
   // The returned Value's type is a pointer to element_type.
@@ -476,8 +466,10 @@ class IrEmitter : public DfsHloVisitorWithDefault {
       thread_local_buffers_;
 
   // The following fields track the IR emission state. According to LLVM memory
-  // management rules, their memory is owned by the module.
-  llvm::Function* compute_function_;
+  // management rules, their memory is owned by the module (Note that IrFunction
+  // creates the encapsulated llvm::Function s.t. it is added to the llvm
+  // module's function list).
+  std::unique_ptr<IrFunction> compute_function_;
   llvm::IRBuilder<> ir_builder_;
 
   // Maps HLOs to their index into the profile counter array.
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.cc b/tensorflow/compiler/xla/service/cpu/ir_function.cc
new file mode 100644
index 0000000000..fa88627156
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.cc
@@ -0,0 +1,195 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <iterator>
+
+#include "tensorflow/compiler/xla/service/cpu/ir_function.h"
+
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+
+namespace xla {
+
+namespace {
+using llvm_ir::AsStringRef;
+}  // namespace
+
+namespace cpu {
+
+IrFunction::IrFunction(const string& function_name,
+                       llvm::Function::LinkageTypes linkage,
+                       const bool optimize_for_size_requested,
+                       const bool enable_fast_math, llvm::Module* llvm_module,
+                       llvm::IRBuilder<>* ir_builder,
+                       int64 num_dynamic_loop_bounds)
+    : ir_builder_(ir_builder),
+      llvm_module_(llvm_module),
+      caller_insert_point_guard_(*ir_builder),
+      num_dynamic_loop_bounds_(num_dynamic_loop_bounds) {
+  Initialize(function_name, linkage, optimize_for_size_requested,
+             enable_fast_math);
+}
+
+IrFunction::~IrFunction() {
+  // Emit function return value.
+  ir_builder_->CreateRetVoid();
+}
+
+void IrFunction::Initialize(const string& function_name,
+                            llvm::Function::LinkageTypes linkage,
+                            const bool optimize_for_size_requested,
+                            const bool enable_fast_math) {
+  // The function signature is:
+  //   void function(i8* retval, i8* run_options, i8** params, i8** temps,
+  //                 i64* dynamic_loop_bounds, i64* prof_counters)
+  //
+  // retval: points to the returned value.
+  // params: address of an array with pointers to parameters.
+  // temps: address of an array with pointers to temporary buffers.
+  //
+  // Therefore, the generated function's signature (FunctionType) is statically
+  // determined - parameter unpacking is done in code generated into the
+  // function, rather than by a prologue dictated by the platform ABI.
+  //
+  //                      /--------------\
+  //   retval ----------> | return value |
+  //                      \--------------/
+  //
+  //                      /-------------------------------\
+  //   run_options -----> | xla::ExecutableRunOptions |
+  //                      \-------------------------------/
+  //
+  //                     /---------------------------------------------\
+  //   params -------->  |  param 0  |  param 1  | ..... |  param N-1  |
+  //                     |   addr    |   addr    |       |   addr      |
+  //                     \---------------------------------------------/
+  //                          |           |                   |
+  //                          |           |                   |
+  //                          V           V                   V
+  //                     /---------\  /---------\         /-----------\
+  //                     | param 0 |  | param 1 |         | param N-1 |
+  //                     \---------/  \---------/         \-----------/
+  //
+  //                     /---------------------------------------------\
+  //   temps --------->  |  temp  0  |  temp  1  | ..... |  temp  N-1  |
+  //                     |   addr    |   addr    |       |   addr      |
+  //                     \---------------------------------------------/
+  //                          |           |                   |
+  //                          |           |                   |
+  //                          V           V                   V
+  //                     /---------\  /---------\         /-----------\
+  //                     | temp  0 |  | temp  1 |         | temp  N-1 |
+  //                     \---------/  \---------/         \-----------/
+  //
+  //                        /--------------------------------------------\
+  // dynamic loop bounds -> | outer_dim0_start | outer_dim0_limit | .....|
+  //  (elided for aot)      \--------------------------------------------/
+  //
+  //                     /---------------------------------------------\
+  //   prof counters ->  | counter 0 | counter 1 | ..... | counter N-1 |
+  //  (elided for aot)   \---------------------------------------------/
+
+  // Even though the type of params and temps is void** in the host's view, in
+  // LLVM IR this is represented by i8*, similarly to void*. It's up to the code
+  // to use GEPs to unravel the indirection layers.
+  llvm::FunctionType* function_type = llvm::FunctionType::get(
+      /*Result=*/llvm::Type::getVoidTy(llvm_module_->getContext()),
+      /*Params=*/GetComputeFunctionParams(),
+      /*isVarArg=*/false);
+
+  // Functions with local linkage get an inlining bonus.  Because we know
+  // a-priori that embedded functions (non-entry functions) will not have its
+  // name resolved, give it local linkage.
+  function_ = llvm::Function::Create(/*Ty=*/function_type,
+                                     /*Linkage=*/linkage,
+                                     /*N=*/AsStringRef(function_name),
+                                     /*M=*/llvm_module_);
+  function_->setCallingConv(llvm::CallingConv::C);
+
+  // Set meaningful names for the function's arguments: useful for debugging.
+  llvm::Function::arg_iterator arg_iter = function_->arg_begin();
+  arg_iter->setName("retval");
+  result_arg_ = &*arg_iter;
+  (++arg_iter)->setName("run_options");
+  exec_run_options_arg_ = &*arg_iter;
+  (++arg_iter)->setName("params");
+  parameters_arg_ = &*arg_iter;
+  (++arg_iter)->setName("temps");
+  temp_buffers_arg_ = &*arg_iter;
+  if (num_dynamic_loop_bounds_ > 0) {
+    (++arg_iter)->setName("dynamic_loop_bounds");
+    dynamic_loop_bounds_arg_ = &*arg_iter;
+  }
+  (++arg_iter)->setName("prof_counters");
+  profile_counters_arg_ = &*arg_iter;
+
+  // We know a-priori that the function arguments are guaranteed to point to
+  // disjoint objects.
+  llvm::Argument* retval = result_arg();
+  for (llvm::Argument& argument : function_->args()) {
+    // However, the return buffer aliases the temporaries and thus cannot be
+    // marked noalias.
+    if (&argument == retval) {
+      continue;
+    }
+    function_->addAttribute(argument.getArgNo() + 1, llvm::Attribute::NoAlias);
+  }
+
+  // Add the optize attribute to the function if optimizing for size. This
+  // controls internal behavior of some optimization passes (e.g. loop
+  // unrolling).
+  if (optimize_for_size_requested) {
+    function_->addFnAttr(llvm::Attribute::OptimizeForSize);
+  }
+
+  if (enable_fast_math) {
+    function_->addFnAttr("unsafe-fp-math", "true");
+    function_->addFnAttr("no-infs-fp-math", "true");
+    function_->addFnAttr("no-nans-fp-math", "true");
+    function_->addFnAttr("no-signed-zeros-fp-math", "true");
+  }
+
+  ir_builder_->SetInsertPoint(llvm::BasicBlock::Create(
+      /*Context=*/llvm_module_->getContext(),
+      /*Name=*/"entry",
+      /*Parent=*/function_));
+}
+
+std::vector<llvm::Type*> IrFunction::GetComputeFunctionParams() {
+  llvm::Type* i8_ptr_type =
+      llvm::Type::getInt8PtrTy(llvm_module_->getContext());
+  llvm::Type* i8_ptr_ptr_type = i8_ptr_type->getPointerTo();
+  llvm::Type* i64_ptr_type =
+      llvm::Type::getInt64PtrTy(llvm_module_->getContext());
+  std::vector<llvm::Type*> compute_function_params(
+      {i8_ptr_type, i8_ptr_type, i8_ptr_ptr_type, i8_ptr_ptr_type});
+  if (num_dynamic_loop_bounds_ > 0) {
+    compute_function_params.push_back(i64_ptr_type);
+  }
+  compute_function_params.push_back(i64_ptr_type);
+  return compute_function_params;
+}
+
+llvm::Value* IrFunction::GetDynamicLoopBound(const int64 offset) {
+  CHECK_GT(num_dynamic_loop_bounds_, 0);
+  CHECK_LT(offset, num_dynamic_loop_bounds_ * 2);
+  string name = tensorflow::strings::StrCat("dynamic_loop_bound_", offset);
+  return ir_builder_->CreateLoad(
+      ir_builder_->CreateGEP(CHECK_NOTNULL(dynamic_loop_bounds_arg_),
+                             ir_builder_->getInt64(offset), AsStringRef(name)));
+}
+
+}  // namespace cpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.h b/tensorflow/compiler/xla/service/cpu/ir_function.h
new file mode 100644
index 0000000000..b7516b403e
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.h
@@ -0,0 +1,109 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_FUNCTION_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_FUNCTION_H_
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/types.h"
+
+namespace xla {
+namespace cpu {
+
+// IrFunction creates and encapsulates an llvm::Function, exposing methods to
+// emitters for function and function argument access.
+// The llvm::Function is created with the standard function signature
+// used in the XLA CPU backend (see ir_function.cc for argument details).
+// In addtion IrFunction saves the callers IR insert point during contruction,
+// and restores it after desctruction.
+//
+// Example usage:
+//
+//    // Create and initialize new IrFunction.
+//    std::unique_ptr<IrFunction> compute_function(new IrFunction(...));
+//    // Emit IR for function body using IrFunction helper methods.
+//    ...
+//    // Store reference to llvm::Function for future invocation.
+//    ir_functions.push_back(compute_function.function());
+//    // Delete IrFunction (finalizes IR function and restores caller insertion
+//    // point).
+//    compute_function.reset();
+//
+
+class IrFunction {
+ public:
+  IrFunction(const string& function_name, llvm::Function::LinkageTypes linkage,
+             const bool optimize_for_size_requested,
+             const bool enable_fast_math, llvm::Module* llvm_module,
+             llvm::IRBuilder<>* ir_builder, int64 num_dynamic_loop_bounds);
+  ~IrFunction();
+
+  // Returns an array of compute function parameter types.
+  std::vector<llvm::Type*> GetComputeFunctionParams();
+
+  // Emit ir to read and return the ir value for the dynamic loop bound at
+  // 'offset' from the "dynamic_loop_bounds" argument of this function.
+  llvm::Value* GetDynamicLoopBound(int64 offset);
+
+  // Returns the encapculated llvm::Function.
+  llvm::Function* function() { return function_; }
+
+  // Get the llvm::Value* that represents this functions "retval" argument.
+  llvm::Argument* result_arg() { return result_arg_; }
+
+  // Get the xla::ExecutableRunOptions that represents this functions
+  // "run_options" argument.
+  llvm::Value* exec_run_options_arg() { return exec_run_options_arg_; }
+
+  // Get the llvm::Argument that represents this functions parameters argument.
+  llvm::Argument* parameters_arg() { return parameters_arg_; }
+
+  // Get the llvm::Value* that represents this functions "temps" argument.
+  llvm::Value* temp_buffers_arg() { return temp_buffers_arg_; }
+
+  // Get the llvm::Value* that represents this functions "prof_counters"
+  // argument.
+  llvm::Argument* profile_counters_arg() { return profile_counters_arg_; }
+
+ private:
+  // Initialize an llvm::Function with standard signature based on arguments.
+  void Initialize(const string& function_name,
+                  llvm::Function::LinkageTypes linkage,
+                  bool optimize_for_size_requested, bool enable_fast_math);
+
+  llvm::IRBuilder<>* ir_builder_;
+  llvm::Module* llvm_module_;
+  llvm::IRBuilder<>::InsertPointGuard caller_insert_point_guard_;
+
+  int64 num_dynamic_loop_bounds_ = 0;
+  // Encapsulated llvm::Function.
+  llvm::Function* function_;
+  // Function argument IR values.
+  llvm::Argument* result_arg_;
+  llvm::Value* exec_run_options_arg_;
+  llvm::Argument* parameters_arg_;
+  llvm::Value* temp_buffers_arg_;
+  llvm::Argument* dynamic_loop_bounds_arg_ = nullptr;
+  llvm::Argument* profile_counters_arg_;
+};
+
+}  // namespace cpu
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_FUNCTION_H_
-- 
GitLab


From e7e1cab9fe66f00716ffaae8a180c5c08a2a050e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 29 Nov 2017 09:13:54 -0800
Subject: [PATCH 0407/1225] [XLA:CPU] Factor out parallel loop emission into
 its own file so it can be called by other emitters (no functional change,
 just code movement).

PiperOrigin-RevId: 177317764
---
 tensorflow/compiler/xla/service/cpu/BUILD     | 17 +++++
 .../compiler/xla/service/cpu/ir_emitter.cc    | 72 ++++--------------
 .../compiler/xla/service/cpu/ir_emitter.h     | 11 +--
 .../xla/service/cpu/parallel_loop_emitter.cc  | 76 +++++++++++++++++++
 .../xla/service/cpu/parallel_loop_emitter.h   | 75 ++++++++++++++++++
 5 files changed, 184 insertions(+), 67 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
 create mode 100644 tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index e64b313ffc..bf41d5ce07 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -251,6 +251,7 @@ cc_library(
         ":external_constant_pool",
         ":ir_emission_utils",
         ":ir_function",
+        ":parallel_loop_emitter",
         ":shape_partition",
         ":simple_orc_jit",
         "//tensorflow/compiler/xla:shape_util",
@@ -297,6 +298,22 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "parallel_loop_emitter",
+    srcs = ["parallel_loop_emitter.cc"],
+    hdrs = ["parallel_loop_emitter.h"],
+    deps = [
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
+        "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop",
+        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
+        "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter",
+        "//tensorflow/core:lib",
+        "@llvm//:core",
+    ],
+)
+
 cc_library(
     name = "dot_op_emitter",
     srcs = ["dot_op_emitter.cc"],
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index f087329c6d..3f991c03e9 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -43,6 +43,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/cpu/ir_function.h"
+#include "tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/shape_partition.h"
 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
 #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h"
@@ -1892,7 +1893,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) {
   VLOG(2) << "HandleSlice: " << slice->ToString();
   auto operand = slice->operand(0);
   // The code below emits a sequential loop nest. For the parallel backend, use
-  // EmitParallelTargetElementLoop() which respects dynamic loop bounds.
+  // ParallelLoopEmitter which respects dynamic loop bounds.
   if (ShouldEmitParallelLoopFor(*slice)) {
     return DefaultAction(slice);
   }
@@ -2997,8 +2998,19 @@ Status IrEmitter::EmitTargetElementLoop(
 
   } else {
     if (ShouldEmitParallelLoopFor(*target_op)) {
-      TF_RETURN_IF_ERROR(EmitParallelTargetElementLoop(
-          target_shape, element_generator, IrName(target_op), &target_array));
+      // Emit code to read dynamic loop bounds from compute function argument.
+      ParallelLoopEmitter::LoopBounds dynamic_loop_bounds(
+          num_dynamic_loop_bounds_);
+      for (int i = 0; i < num_dynamic_loop_bounds_; ++i) {
+        dynamic_loop_bounds[i].first =
+            compute_function_->GetDynamicLoopBound(i * 2 + 0);
+        dynamic_loop_bounds[i].second =
+            compute_function_->GetDynamicLoopBound(i * 2 + 1);
+      }
+      // Emit parallel loop with dynamic loop bounds for most-major dimensions.
+      TF_RETURN_IF_ERROR(ParallelLoopEmitter(element_generator, target_array,
+                                             &dynamic_loop_bounds, &ir_builder_)
+                             .EmitLoop(IrName(target_op)));
     } else {
       TF_RETURN_IF_ERROR(
           llvm_ir::LoopEmitter(element_generator, target_array, &ir_builder_)
@@ -3008,60 +3020,6 @@ Status IrEmitter::EmitTargetElementLoop(
   return Status::OK();
 }
 
-Status IrEmitter::EmitParallelTargetElementLoop(
-    const Shape& target_shape,
-    const llvm_ir::ElementGenerator& element_generator,
-    tensorflow::StringPiece loop_name, llvm_ir::IrArray* target_array) {
-  CHECK(!ShapeUtil::IsTuple(target_shape));
-  CHECK(!ShapeUtil::IsScalar(target_shape));
-
-  // Emit code to read dynamic loop bounds from function argument 4.
-  std::vector<llvm::Value*> dynamic_loop_bounds(2 * num_dynamic_loop_bounds_);
-  for (int i = 0; i < 2 * num_dynamic_loop_bounds_; ++i) {
-    dynamic_loop_bounds[i] = compute_function_->GetDynamicLoopBound(i);
-  }
-
-  llvm_ir::ForLoopNest loop_nest(loop_name, &ir_builder_);
-  const int64 num_dims = target_shape.dimensions_size();
-  llvm_ir::IrArray::Index array_index(num_dims);
-
-  // Add loops from outer-most to inner-most dimensions.
-  for (int i = target_shape.layout().minor_to_major_size() - 1; i >= 0; --i) {
-    const int64 dimension = target_shape.layout().minor_to_major(i);
-    const int bounds_index = num_dims - 1 - i;
-    if (bounds_index < num_dynamic_loop_bounds_) {
-      // Emit dynamic loop bounds for this dimension. Dynamic loop bounds
-      // are read from ir function dynamic loop bounds argument.
-      llvm::Value* start_index = dynamic_loop_bounds[bounds_index * 2 + 0];
-      llvm::Value* end_index = dynamic_loop_bounds[bounds_index * 2 + 1];
-
-      std::unique_ptr<llvm_ir::ForLoop> loop = loop_nest.AddLoop(
-          /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension),
-          start_index, end_index);
-      array_index[dimension] = loop->GetIndVarValue();
-    } else {
-      // Emit static loop bounds for this dimension.
-      std::unique_ptr<llvm_ir::ForLoop> loop = loop_nest.AddLoop(
-          /*start_index=*/0,
-          /*end_index=*/target_shape.dimensions(dimension),
-          /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension));
-      array_index[dimension] = loop->GetIndVarValue();
-    }
-  }
-  // Point IR builder at inner loop BB.
-  SetToFirstInsertPoint(loop_nest.GetInnerLoopBodyBasicBlock(), &ir_builder_);
-
-  // Emit loop body.
-  TF_ASSIGN_OR_RETURN(llvm::Value * target_element,
-                      element_generator(array_index));
-  target_array->EmitWriteArrayElement(array_index, target_element,
-                                      &ir_builder_);
-  // Point IR builder at outer loop exit BB.
-  SetToFirstInsertPoint(loop_nest.GetOuterLoopExitBasicBlock(), &ir_builder_);
-
-  return Status::OK();
-}
-
 Status IrEmitter::EmitMemcpy(const HloInstruction& source,
                              const HloInstruction& destination) {
   llvm::Value* source_value = GetEmittedValueFor(&source);
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 9e5595052f..6b576d16bb 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -336,15 +336,6 @@ class IrEmitter : public DfsHloVisitorWithDefault {
       HloInstruction* target_op, tensorflow::StringPiece desc,
       const llvm_ir::ElementGenerator& element_generator);
 
-  // Emit IR to perform a computation for every element in a partition/slice of
-  // 'target_shape'. The loop bounds for the outer-dimension partitions are
-  // passed into the compute function as a runtime argument (accessible from
-  // GetDynamicLoopBound).
-  Status EmitParallelTargetElementLoop(
-      const Shape& target_shape,
-      const llvm_ir::ElementGenerator& element_generator,
-      tensorflow::StringPiece loop_name, llvm_ir::IrArray* target_array);
-
   // Emits a memcpy from the source instruction's result value to the
   // destination's.  Both source and destination must have an entry in the
   // emitted_value_ table.
@@ -482,7 +473,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   llvm_ir::AliasAnalysis alias_analysis_;
 
   // The number of root instruction outer dimensions used in parallel loop
-  // emission (EmitParallelTargetElementLoop).
+  // emission (ParallelLoopEmitter).
   int64 num_dynamic_loop_bounds_ = 0;
 
   // Returns whether the given instruction should be emitted as a parallel loop.
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
new file mode 100644
index 0000000000..91e704e3d0
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
@@ -0,0 +1,76 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h"
+
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace xla {
+namespace cpu {
+
+ParallelLoopEmitter::ParallelLoopEmitter(
+    const llvm_ir::ElementGenerator& target_element_generator,
+    const llvm_ir::IrArray& target_array, const LoopBounds* dynamic_loop_bounds,
+    llvm::IRBuilder<>* ir_builder)
+    : LoopEmitter(target_element_generator, target_array, ir_builder),
+      dynamic_loop_bounds_(dynamic_loop_bounds) {}
+
+llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock(
+    tensorflow::StringPiece loop_name) {
+  CHECK(!ShapeUtil::IsTuple(shape_));
+  CHECK(!ShapeUtil::IsScalar(shape_));
+
+  llvm_ir::ForLoopNest loop_nest(loop_name, ir_builder_);
+  const int64 num_dims = shape_.dimensions_size();
+  llvm_ir::IrArray::Index array_index(num_dims);
+
+  // Add loops from outer-most to inner-most dimensions.
+  for (int i = shape_.layout().minor_to_major_size() - 1; i >= 0; --i) {
+    const int64 dimension = shape_.layout().minor_to_major(i);
+    const int bounds_index = num_dims - 1 - i;
+    if (bounds_index < dynamic_loop_bounds_->size()) {
+      // Emit dynamic loop bounds for this dimension. Dynamic loop bounds
+      // are read from ir function dynamic loop bounds argument.
+      llvm::Value* start_index = (*dynamic_loop_bounds_)[bounds_index].first;
+      llvm::Value* end_index = (*dynamic_loop_bounds_)[bounds_index].second;
+
+      std::unique_ptr<llvm_ir::ForLoop> loop = loop_nest.AddLoop(
+          /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension),
+          start_index, end_index);
+      array_index[dimension] = loop->GetIndVarValue();
+    } else {
+      // Emit static loop bounds for this dimension.
+      std::unique_ptr<llvm_ir::ForLoop> loop = loop_nest.AddLoop(
+          /*start_index=*/0,
+          /*end_index=*/shape_.dimensions(dimension),
+          /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension));
+      array_index[dimension] = loop->GetIndVarValue();
+    }
+  }
+  // Point IR builder at inner loop BB.
+  llvm_ir::SetToFirstInsertPoint(loop_nest.GetInnerLoopBodyBasicBlock(),
+                                 ir_builder_);
+
+  // Set exit_bb_ to the exit block of the loop nest.
+  exit_bb_ = loop_nest.GetOuterLoopExitBasicBlock();
+  CHECK(exit_bb_ != nullptr);
+
+  return array_index;
+}
+
+}  // namespace cpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h
new file mode 100644
index 0000000000..492d5953c4
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h
@@ -0,0 +1,75 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_PARALLEL_LOOP_EMITTER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_PARALLEL_LOOP_EMITTER_H_
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Value.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h"
+
+namespace xla {
+namespace cpu {
+
+// ParallelLoopEmitter emits a loop nest for the target array shape.
+// The outer loop bounds of the loop nest are passed as ir values at runtime
+// (specified in 'dynamic_loop_bounds'), and the inner loop bounds are static.
+// Dynamic loop bounds are specified as an array of dimension index
+// [start, limit) pairs of ir values (one for each partitioned outer dimension).
+//
+// EX: Let 'shape' = [8, 16, 32], with the loop bounds of the two-most major
+//     dimensions dynamic.
+//     Then 'dynamic_loop_bounds' will contain the following ir values for
+//     the two most-major dimenions:
+//       [dim0_index_start_ir_value, dim0_index_limit_ir_value]
+//       [dim1_index_start_ir_value, dim1_index_limit_ir_value]
+//
+// Code emitted by ParallelLoopEmitter will be called in a multi-threaded
+// context where each thread will be assigned a different set of outer dimension
+// partitions, and where all threads will collectively iterate over the
+// entire target array shape.
+//
+// Outer dimension partitions can be generated using the ShapePartitionAssigner
+// and ShapePartitionIterator utility classes from shape_partition.cc.
+//
+class ParallelLoopEmitter : public llvm_ir::LoopEmitter {
+ public:
+  using LoopBounds = std::vector<std::pair<llvm::Value*, llvm::Value*>>;
+
+  // Constructs a ParallelLoopEmitter which uses 'target_element_generator' to
+  // generate elements, 'dynamic_loop_bounds' to set the loop bounds of the
+  // most-major dimensions, and 'target_array.' shape to set the static loop
+  // bounds for the most-minor dimensions.
+  ParallelLoopEmitter(const llvm_ir::ElementGenerator& target_element_generator,
+                      const llvm_ir::IrArray& target_array,
+                      const LoopBounds* dynamic_loop_bounds,
+                      llvm::IRBuilder<>* ir_builder);
+
+  ParallelLoopEmitter(const ParallelLoopEmitter&) = delete;
+  ParallelLoopEmitter& operator=(const ParallelLoopEmitter&) = delete;
+  ~ParallelLoopEmitter() override = default;
+
+  llvm_ir::IrArray::Index EmitIndexAndSetExitBasicBlock(
+      tensorflow::StringPiece loop_name) override;
+
+ private:
+  const LoopBounds* dynamic_loop_bounds_;
+};
+
+}  // namespace cpu
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_PARALLEL_LOOP_EMITTER_H_
-- 
GitLab


From 667282eb0e62bef03bbe527bef88c656532444bb Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Wed, 29 Nov 2017 09:30:56 -0800
Subject: [PATCH 0408/1225] [TFXLA] Return nullopt if no merge node found.

PiperOrigin-RevId: 177319722
---
 tensorflow/compiler/tf2xla/functionalize_control_flow.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 5726d8294a..267268298c 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -1067,6 +1067,10 @@ FunctionalizeCond::CreateCorrespondingMergeCluster(Cluster* switch_cluster) {
       enqueue_or_update_merge(out);
     }
   }
+  // Return if there are no merge nodes.
+  if (merges.empty()) {
+    return gtl::nullopt;
+  }
   auto it = merges.begin();
   Cluster* merge_cluster = *it;
   for (++it; it != merges.end(); ++it) {
-- 
GitLab


From 537ecc56cf09d5dcb2b328b322d9f8b195abcc6c Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 29 Nov 2017 09:48:08 -0800
Subject: [PATCH 0409/1225] [tf.data] Remove GraphDefBuilder and NodeBuilder
 dependencies from "dataset.h".

This is a step towards making a header-only library on which external op
implementations can depend. To do this "dataset.h" cannot depend on any
headers in "tensorflow/core/graph/...".

PiperOrigin-RevId: 177322011
---
 tensorflow/core/kernels/BUILD                 |   2 +-
 tensorflow/core/kernels/dataset.cc            | 140 ++++++++++++++++
 tensorflow/core/kernels/dataset.h             | 155 +++---------------
 tensorflow/core/kernels/filter_dataset_op.cc  |   2 +-
 .../core/kernels/flat_map_dataset_op.cc       |   2 +-
 .../core/kernels/interleave_dataset_op.cc     |   2 +-
 tensorflow/core/kernels/map_dataset_op.cc     |   2 +-
 .../core/kernels/padded_batch_dataset_op.cc   |   4 +-
 tensorflow/core/kernels/tensor_dataset_op.cc  |   2 +-
 .../core/kernels/tensor_slice_dataset_op.cc   |   2 +-
 tensorflow/core/kernels/zip_dataset_op.cc     |   2 +-
 11 files changed, 171 insertions(+), 144 deletions(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index eff15e809a..fd36e6ca1f 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -5832,11 +5832,11 @@ cc_library(
     srcs = ["dataset.cc"],
     hdrs = ["dataset.h"],
     deps = [
+        "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core/util/tensor_bundle",
     ],
 )
 
diff --git a/tensorflow/core/kernels/dataset.cc b/tensorflow/core/kernels/dataset.cc
index fcfa2956f7..0972129787 100644
--- a/tensorflow/core/kernels/dataset.cc
+++ b/tensorflow/core/kernels/dataset.cc
@@ -15,6 +15,9 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/dataset.h"
 
+#include "tensorflow/core/graph/graph_def_builder.h"
+#include "tensorflow/core/graph/node_builder.h"
+
 namespace tensorflow {
 
 namespace {
@@ -70,6 +73,143 @@ class DatasetVariantWrapper {
 
 }  // namespace
 
+Status GraphDefBuilderWrapper::AddDataset(
+    const GraphDatasetBase* dataset,
+    const std::vector<std::pair<size_t, Node*>>& inputs,
+    const std::vector<std::pair<size_t, gtl::ArraySlice<Node*>>>& list_inputs,
+    const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
+    Node** output) {
+  const string& op_type_name = dataset->op_name();
+  std::unique_ptr<const GraphDefBuilder::Options> opts(
+      new GraphDefBuilder::Options(b_->opts()));
+  // TODO(srbs|mrry): Not all datasets have output_types and output_shapes
+  // attributes defined. It will be nice to have a consistent pattern.
+  bool has_output_types_attr = HasAttr(op_type_name, "output_types");
+  bool has_output_shapes_attr = HasAttr(op_type_name, "output_shapes");
+  if (has_output_shapes_attr) {
+    opts.reset(new GraphDefBuilder::Options(
+        opts->WithAttr("output_shapes", dataset->output_shapes())));
+  }
+  if (has_output_types_attr) {
+    opts.reset(new GraphDefBuilder::Options(
+        opts->WithAttr("output_types", dataset->output_dtypes())));
+  }
+  for (auto attr : attrs) {
+    opts.reset(
+        new GraphDefBuilder::Options(opts->WithAttr(attr.first, attr.second)));
+  }
+  if (opts->HaveError()) {
+    return errors::Internal("AddDataset: Failed to build Options with error ",
+                            opts->StatusToString());
+  }
+  NodeBuilder node_builder(opts->GetNameForOp(op_type_name), op_type_name,
+                           opts->op_registry());
+  {
+    size_t total_size = inputs.size() + list_inputs.size();
+    auto inputs_iter = inputs.begin();
+    auto list_inputs_iter = list_inputs.begin();
+    for (int i = 0; i < total_size; i++) {
+      if (inputs_iter != inputs.end() && inputs_iter->first == i) {
+        node_builder.Input(NodeBuilder::NodeOut(inputs_iter->second));
+        inputs_iter++;
+      } else if (list_inputs_iter != list_inputs.end() &&
+                 list_inputs_iter->first == i) {
+        std::vector<NodeBuilder::NodeOut> nodeout_inputs;
+        nodeout_inputs.reserve(list_inputs_iter->second.size());
+        for (Node* n : list_inputs_iter->second) {
+          nodeout_inputs.emplace_back(n);
+        }
+        node_builder.Input(nodeout_inputs);
+        list_inputs_iter++;
+      } else {
+        return errors::InvalidArgument("No input found for index ", i);
+      }
+    }
+  }
+  *output = opts->FinalizeBuilder(&node_builder);
+  if (*output == nullptr) {
+    return errors::Internal("AddDataset: Failed to build ", op_type_name,
+                            " op with error ", opts->StatusToString());
+  }
+  return Status::OK();
+}
+
+Status GraphDefBuilderWrapper::AddFunction(OpKernelContext* ctx,
+                                           const string& function_name) {
+  if (b_->HasFunction(function_name)) {
+    LOG(INFO) << "Function with name " << function_name << "already exists in"
+              << " the graph. It will not be added again.";
+    return Status::OK();
+  }
+  TF_RETURN_IF_ERROR(EnsureFunctionIsStateless(ctx, function_name));
+  const FunctionLibraryDefinition* flib_def =
+      ctx->function_library()->GetFunctionLibraryDefinition();
+  const FunctionDef* f_def = flib_def->Find(function_name);
+  if (f_def == nullptr) {
+    return errors::InvalidArgument("Unable to find FunctionDef for ",
+                                   function_name, " in the registry.");
+  }
+  FunctionDefLibrary def;
+  *def.add_function() = *f_def;
+  const string gradient_func = flib_def->FindGradient(function_name);
+  if (!gradient_func.empty()) {
+    GradientDef* g_def = def.add_gradient();
+    g_def->set_function_name(function_name);
+    g_def->set_gradient_func(gradient_func);
+  }
+  TF_RETURN_IF_ERROR(b_->AddFunctionLibrary(def));
+
+  // Recursively add functions in inputs of function_name.
+  for (const NodeDef& node_def : f_def->node_def()) {
+    const OpRegistrationData* op_reg_data = nullptr;
+    TF_RETURN_IF_ERROR(flib_def->LookUp(node_def.op(), &op_reg_data));
+    if (op_reg_data->is_function_op) {
+      TF_RETURN_IF_ERROR(AddFunction(ctx, op_reg_data->op_def.name()));
+    }
+    // Recursively add functions in attrs of this NodeDef.
+    for (const auto& pair : node_def.attr()) {
+      TF_RETURN_IF_ERROR(AddAttrFunctions(pair.second, ctx));
+    }
+  }
+
+  // Recursively add functions in attrs of function_name.
+  for (auto iter = f_def->attr().begin(); iter != f_def->attr().end(); iter++) {
+    TF_RETURN_IF_ERROR(AddAttrFunctions(iter->second, ctx));
+  }
+  return Status::OK();
+}
+
+void GraphDefBuilderWrapper::AddTensorInternal(const Tensor& val,
+                                               Node** output) {
+  *output = ops::SourceOp(
+      "Const",
+      b_->opts().WithAttr("dtype", val.dtype()).WithAttr("value", val));
+}
+
+bool GraphDefBuilderWrapper::HasAttr(const string& op_type_name,
+                                     const string& attr_name) const {
+  const OpDef* op_def = nullptr;
+  Status s = b_->opts().op_registry()->LookUpOpDef(op_type_name, &op_def);
+  if (!s.ok() || op_def == nullptr) {
+    return false;
+  }
+  return HasAttr(op_def, attr_name);
+}
+
+Status GraphDatasetBase::Serialize(OpKernelContext* ctx,
+                                   string* serialized_graph_def,
+                                   string* output_node) const {
+  GraphDefBuilder b;
+  DatasetGraphDefBuilder db(&b);
+  Node* node = nullptr;
+  TF_RETURN_IF_ERROR(AsGraphDefInternal(ctx, &db, &node));
+  *output_node = node->name();
+  GraphDef graph_def;
+  TF_RETURN_IF_ERROR(b.ToGraphDef(&graph_def));
+  graph_def.SerializeToString(serialized_graph_def);
+  return Status::OK();
+}
+
 Status GetDatasetFromVariantTensor(const Tensor& tensor,
                                    DatasetBase** out_dataset) {
   if (!(tensor.dtype() == DT_VARIANT ||
diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h
index afbebb0692..504a88a309 100644
--- a/tensorflow/core/kernels/dataset.h
+++ b/tensorflow/core/kernels/dataset.h
@@ -19,12 +19,13 @@ limitations under the License.
 
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/variant_encode_decode.h"
 #include "tensorflow/core/framework/variant_tensor_data.h"
-#include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/tracing.h"
@@ -59,6 +60,12 @@ class IteratorStateWriter {
   virtual ~IteratorStateWriter() {}
 };
 
+// Forward declarations to avoid introducing a dependency on headers in
+// "tensorflow/core/graph/...".
+class GraphDefBuilder;
+class GraphDatasetBase;
+class Node;
+
 // Wrapper around GraphDefBuilder. Used to serialize Dataset graph.
 class GraphDefBuilderWrapper {
  public:
@@ -110,10 +117,8 @@ class GraphDefBuilderWrapper {
     return Status::OK();
   }
 
-  template <class DatasetType>
-  Status AddDataset(const DatasetType* dataset,
-                    const std::vector<NodeBuilder::NodeOut>& inputs,
-                    Node** output) {
+  Status AddDataset(const GraphDatasetBase* dataset,
+                    const std::vector<Node*>& inputs, Node** output) {
     return AddDataset(dataset, inputs, {}, output);
   }
 
@@ -125,77 +130,23 @@ class GraphDefBuilderWrapper {
   // `*output` contains a pointer to the output `Node`. It is guaranteed to be
   // non-null if the method returns with an OK status.
   // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  template <class DatasetType>
-  Status AddDataset(const DatasetType* dataset,
-                    const std::vector<NodeBuilder::NodeOut>& inputs,
+  Status AddDataset(const GraphDatasetBase* dataset,
+                    const std::vector<Node*>& inputs,
                     const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
                     Node** output) {
-    std::vector<std::pair<size_t, NodeBuilder::NodeOut>> enumerated_inputs(
-        inputs.size());
+    std::vector<std::pair<size_t, Node*>> enumerated_inputs(inputs.size());
     for (int i = 0; i < inputs.size(); i++) {
       enumerated_inputs[i] = std::make_pair(i, inputs[i]);
     }
     return AddDataset(dataset, enumerated_inputs, {}, attrs, output);
   }
 
-  template <class DatasetType>
   Status AddDataset(
-      const DatasetType* dataset,
-      const std::vector<std::pair<size_t, NodeBuilder::NodeOut>>& inputs,
-      const std::vector<
-          std::pair<size_t, gtl::ArraySlice<NodeBuilder::NodeOut>>>&
-          list_inputs,
+      const GraphDatasetBase* dataset,
+      const std::vector<std::pair<size_t, Node*>>& inputs,
+      const std::vector<std::pair<size_t, gtl::ArraySlice<Node*>>>& list_inputs,
       const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
-      Node** output) {
-    const string& op_type_name = dataset->op_name();
-    std::unique_ptr<const GraphDefBuilder::Options> opts(
-        new GraphDefBuilder::Options(b_->opts()));
-    // TODO(srbs|mrry): Not all datasets have output_types and output_shapes
-    // attributes defined. It will be nice to have a consistent pattern.
-    bool has_output_types_attr = HasAttr(op_type_name, "output_types");
-    bool has_output_shapes_attr = HasAttr(op_type_name, "output_shapes");
-    if (has_output_shapes_attr) {
-      opts.reset(new GraphDefBuilder::Options(
-          opts->WithAttr("output_shapes", dataset->output_shapes())));
-    }
-    if (has_output_types_attr) {
-      opts.reset(new GraphDefBuilder::Options(
-          opts->WithAttr("output_types", dataset->output_dtypes())));
-    }
-    for (auto attr : attrs) {
-      opts.reset(new GraphDefBuilder::Options(
-          opts->WithAttr(attr.first, attr.second)));
-    }
-    if (opts->HaveError()) {
-      return errors::Internal("AddDataset: Failed to build Options with error ",
-                              opts->StatusToString());
-    }
-    NodeBuilder node_builder(opts->GetNameForOp(op_type_name), op_type_name,
-                             opts->op_registry());
-    {
-      size_t total_size = inputs.size() + list_inputs.size();
-      auto inputs_iter = inputs.begin();
-      auto list_inputs_iter = list_inputs.begin();
-      for (int i = 0; i < total_size; i++) {
-        if (inputs_iter != inputs.end() && inputs_iter->first == i) {
-          node_builder.Input(inputs_iter->second);
-          inputs_iter++;
-        } else if (list_inputs_iter != list_inputs.end() &&
-                   list_inputs_iter->first == i) {
-          node_builder.Input(list_inputs_iter->second);
-          list_inputs_iter++;
-        } else {
-          return errors::InvalidArgument("No input found for index ", i);
-        }
-      }
-    }
-    *output = opts->FinalizeBuilder(&node_builder);
-    if (*output == nullptr) {
-      return errors::Internal("AddDataset: Failed to build ", op_type_name,
-                              " op with error ", opts->StatusToString());
-    }
-    return Status::OK();
-  }
+      Node** output);
 
   // Adds a user-defined function with name `function_name` to the graph and
   // recursively adds all functions it references. If a function with a matching
@@ -203,50 +154,7 @@ class GraphDefBuilderWrapper {
   // name `function_name` is not found in the FunctionLibraryDefinition, returns
   // an InvalidArgumentError. If the function with name `function_name` or any
   // of its dependent functions are stateful, returns an InvalidArgument error.
-  Status AddFunction(OpKernelContext* ctx, const string& function_name) {
-    if (b_->HasFunction(function_name)) {
-      LOG(INFO) << "Function with name " << function_name << "already exists in"
-                << " the graph. It will not be added again.";
-      return Status::OK();
-    }
-    TF_RETURN_IF_ERROR(EnsureFunctionIsStateless(ctx, function_name));
-    const FunctionLibraryDefinition* flib_def =
-        ctx->function_library()->GetFunctionLibraryDefinition();
-    const FunctionDef* f_def = flib_def->Find(function_name);
-    if (f_def == nullptr) {
-      return errors::InvalidArgument("Unable to find FunctionDef for ",
-                                     function_name, " in the registry.");
-    }
-    FunctionDefLibrary def;
-    *def.add_function() = *f_def;
-    const string gradient_func = flib_def->FindGradient(function_name);
-    if (!gradient_func.empty()) {
-      GradientDef* g_def = def.add_gradient();
-      g_def->set_function_name(function_name);
-      g_def->set_gradient_func(gradient_func);
-    }
-    TF_RETURN_IF_ERROR(b_->AddFunctionLibrary(def));
-
-    // Recursively add functions in inputs of function_name.
-    for (const NodeDef& node_def : f_def->node_def()) {
-      const OpRegistrationData* op_reg_data = nullptr;
-      TF_RETURN_IF_ERROR(flib_def->LookUp(node_def.op(), &op_reg_data));
-      if (op_reg_data->is_function_op) {
-        TF_RETURN_IF_ERROR(AddFunction(ctx, op_reg_data->op_def.name()));
-      }
-      // Recursively add functions in attrs of this NodeDef.
-      for (const auto& pair : node_def.attr()) {
-        TF_RETURN_IF_ERROR(AddAttrFunctions(pair.second, ctx));
-      }
-    }
-
-    // Recursively add functions in attrs of function_name.
-    for (auto iter = f_def->attr().begin(); iter != f_def->attr().end();
-         iter++) {
-      TF_RETURN_IF_ERROR(AddAttrFunctions(iter->second, ctx));
-    }
-    return Status::OK();
-  }
+  Status AddFunction(OpKernelContext* ctx, const string& function_name);
 
   template <typename T>
   void BuildAttrValue(const T& value, AttrValue* attr) {
@@ -254,11 +162,7 @@ class GraphDefBuilderWrapper {
   }
 
  private:
-  void AddTensorInternal(const Tensor& val, Node** output) {
-    *output = ops::SourceOp(
-        "Const",
-        b_->opts().WithAttr("dtype", val.dtype()).WithAttr("value", val));
-  }
+  void AddTensorInternal(const Tensor& val, Node** output);
 
   Status EnsureFunctionIsStateless(OpKernelContext* ctx,
                                    const string& function_name) const {
@@ -294,14 +198,7 @@ class GraphDefBuilderWrapper {
            HasAttr(op_def, "output_shapes");
   }
 
-  bool HasAttr(const string& op_type_name, const string& attr_name) const {
-    const OpDef* op_def = nullptr;
-    Status s = b_->opts().op_registry()->LookUpOpDef(op_type_name, &op_def);
-    if (!s.ok() || op_def == nullptr) {
-      return false;
-    }
-    return HasAttr(op_def, attr_name);
-  }
+  bool HasAttr(const string& op_type_name, const string& attr_name) const;
 
   bool HasAttr(const OpDef* op_def, const string& attr_name) const {
     for (auto attr : op_def->attr()) {
@@ -548,17 +445,7 @@ class GraphDatasetBase : public DatasetBase {
 
  private:
   Status Serialize(OpKernelContext* ctx, string* serialized_graph_def,
-                   string* output_node) const {
-    GraphDefBuilder b;
-    DatasetGraphDefBuilder db(&b);
-    Node* node = nullptr;
-    TF_RETURN_IF_ERROR(AsGraphDefInternal(ctx, &db, &node));
-    *output_node = node->name();
-    GraphDef graph_def;
-    TF_RETURN_IF_ERROR(b.ToGraphDef(&graph_def));
-    graph_def.SerializeToString(serialized_graph_def);
-    return Status::OK();
-  }
+                   string* output_node) const;
 
   const string op_name_;
 };
diff --git a/tensorflow/core/kernels/filter_dataset_op.cc b/tensorflow/core/kernels/filter_dataset_op.cc
index e4d80e4ce3..0ac6cd9a98 100644
--- a/tensorflow/core/kernels/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/filter_dataset_op.cc
@@ -95,7 +95,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
 
       DataTypeVector other_arguments_types;
       other_arguments_types.reserve(captured_func_->captured_inputs().size());
-      std::vector<NodeBuilder::NodeOut> other_arguments;
+      std::vector<Node*> other_arguments;
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
diff --git a/tensorflow/core/kernels/flat_map_dataset_op.cc b/tensorflow/core/kernels/flat_map_dataset_op.cc
index ac1689e5bf..8fe8489371 100644
--- a/tensorflow/core/kernels/flat_map_dataset_op.cc
+++ b/tensorflow/core/kernels/flat_map_dataset_op.cc
@@ -102,7 +102,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
 
       DataTypeVector other_arguments_types;
       other_arguments_types.reserve(captured_func_->captured_inputs().size());
-      std::vector<NodeBuilder::NodeOut> other_arguments;
+      std::vector<Node*> other_arguments;
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
diff --git a/tensorflow/core/kernels/interleave_dataset_op.cc b/tensorflow/core/kernels/interleave_dataset_op.cc
index cbee68b2db..833e8cb9c5 100644
--- a/tensorflow/core/kernels/interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/interleave_dataset_op.cc
@@ -126,7 +126,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
       TF_RETURN_IF_ERROR(b->AddScalar(block_length_, &block_length_node));
       DataTypeVector other_arguments_types;
       other_arguments_types.reserve(captured_func_->captured_inputs().size());
-      std::vector<NodeBuilder::NodeOut> other_arguments;
+      std::vector<Node*> other_arguments;
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
diff --git a/tensorflow/core/kernels/map_dataset_op.cc b/tensorflow/core/kernels/map_dataset_op.cc
index 4ba09bc335..23148f122d 100644
--- a/tensorflow/core/kernels/map_dataset_op.cc
+++ b/tensorflow/core/kernels/map_dataset_op.cc
@@ -100,7 +100,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
       DataTypeVector other_arguments_types(
           captured_func_->captured_inputs().size());
-      std::vector<NodeBuilder::NodeOut> other_arguments(
+      std::vector<Node*> other_arguments(
           captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
diff --git a/tensorflow/core/kernels/padded_batch_dataset_op.cc b/tensorflow/core/kernels/padded_batch_dataset_op.cc
index 7c28d955e1..cef5bde156 100644
--- a/tensorflow/core/kernels/padded_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/padded_batch_dataset_op.cc
@@ -242,7 +242,7 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
       Node* batch_size = nullptr;
       TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size));
 
-      std::vector<NodeBuilder::NodeOut> padded_shapes;
+      std::vector<Node*> padded_shapes;
       padded_shapes.reserve(padded_shapes_.size());
       for (int i = 0; i < padded_shapes_.size(); i++) {
         Node* node;
@@ -254,7 +254,7 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
         padded_shapes.emplace_back(node);
       }
 
-      std::vector<NodeBuilder::NodeOut> padding_values;
+      std::vector<Node*> padding_values;
       padding_values.reserve(padding_values_.size());
       for (const Tensor& t : padding_values_) {
         Node* node;
diff --git a/tensorflow/core/kernels/tensor_dataset_op.cc b/tensorflow/core/kernels/tensor_dataset_op.cc
index fe53434d17..5cf9931188 100644
--- a/tensorflow/core/kernels/tensor_dataset_op.cc
+++ b/tensorflow/core/kernels/tensor_dataset_op.cc
@@ -70,7 +70,7 @@ class TensorDatasetOp : public DatasetOpKernel {
    protected:
     Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      std::vector<NodeBuilder::NodeOut> components;
+      std::vector<Node*> components;
       components.reserve(tensors_.size());
       for (const Tensor& t : tensors_) {
         Node* node;
diff --git a/tensorflow/core/kernels/tensor_slice_dataset_op.cc b/tensorflow/core/kernels/tensor_slice_dataset_op.cc
index e85f59b584..19d4816ff3 100644
--- a/tensorflow/core/kernels/tensor_slice_dataset_op.cc
+++ b/tensorflow/core/kernels/tensor_slice_dataset_op.cc
@@ -86,7 +86,7 @@ class TensorSliceDatasetOp : public DatasetOpKernel {
    protected:
     Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      std::vector<NodeBuilder::NodeOut> components;
+      std::vector<Node*> components;
       components.reserve(tensors_.size());
       for (const Tensor& t : tensors_) {
         Node* node;
diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/zip_dataset_op.cc
index 9381915ae9..31e5737f62 100644
--- a/tensorflow/core/kernels/zip_dataset_op.cc
+++ b/tensorflow/core/kernels/zip_dataset_op.cc
@@ -80,7 +80,7 @@ class ZipDatasetOp : public DatasetOpKernel {
    protected:
     Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      std::vector<NodeBuilder::NodeOut> input_graph_nodes;
+      std::vector<Node*> input_graph_nodes;
       input_graph_nodes.reserve(inputs_.size());
       for (const auto& input : inputs_) {
         Node* input_node;
-- 
GitLab


From fa8bfa89cd2d7b57bb119afcabdf67ce1539081d Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Wed, 29 Nov 2017 09:52:17 -0800
Subject: [PATCH 0410/1225] Add feature_columns doc.

PiperOrigin-RevId: 177322632
---
 .../docs_src/get_started/feature_columns.md   | 570 ++++++++++++++++++
 1 file changed, 570 insertions(+)
 create mode 100644 tensorflow/docs_src/get_started/feature_columns.md

diff --git a/tensorflow/docs_src/get_started/feature_columns.md b/tensorflow/docs_src/get_started/feature_columns.md
new file mode 100644
index 0000000000..f9537927b7
--- /dev/null
+++ b/tensorflow/docs_src/get_started/feature_columns.md
@@ -0,0 +1,570 @@
+# Feature Columns
+
+This document details feature columns. Think of **feature columns** as the
+intermediaries between raw data and Estimators. Feature columns are very rich,
+enabling you to transform a diverse range of raw data into formats that
+Estimators can use, allowing easy experimentation.
+
+In @{$get_started/estimator$Premade Estimators}, we used the premade Estimator,
+@{tf.estimator.DNNClassifier$`DNNClassifier`} to train a model to predict
+different types of Iris flowers from four input features. That example created
+only numerical feature columns (of type @{tf.feature_column.numeric_column}).
+Although numerical feature columns model the lengths of petals and sepals
+effectively, real world data sets contain all kinds of features, many of which
+are non-numerical.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/feature_cloud.jpg">
+</div>
+<div style="text-align: center">
+Some real-world features (such as, longitude) are numerical, but many are not.
+</div>
+
+## Input to a Deep Neural Network
+
+What kind of data can a deep neural network operate on? The answer
+is, of course, numbers (for example, `tf.float32`). After all, every neuron in
+a neural network performs multiplication and addition operations on weights and
+input data. Real-life input data, however, often contains non-numerical
+(categorical) data. For example, consider a `product_class` feature that can
+contain the following three non-numerical values:
+
+* `kitchenware`
+* `electronics`
+* `sports`
+
+ML models generally represent categorical values as simple vectors in which a
+1 represents the presence of a value and a 0 represents the absence of a value.
+For example, when `product_class` is set to `sports`, an ML model would usually
+represent `product_class` as  `[0, 0, 1]`, meaning:
+
+* `0`: `kitchenware` is absent
+* `0`: `electronics` is absent
+* `1`: `sports` is present
+
+So, although raw data can be numerical or categorical, an ML model represents
+all features as numbers.
+
+## Feature Columns
+
+As the following figure suggests, you specify the input to a model through the
+`feature_columns` argument of an Estimator (`DNNClassifier` for Iris).
+Feature Columns bridge input data (as returned by `input_fn`) with your model.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/inputs_to_model_bridge.jpg">
+</div>
+<div style="text-align: center">
+Feature columns bridge raw data with the data your model needs.
+</div>
+
+To create feature columns, call functions from the
+@{tf.feature_column} module. This document explains nine of the functions in
+that module. As the following figure shows, all nine functions return either a
+Categorical-Column or a Dense-Column object, except `bucketized_column`, which
+inherits from both classes:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/some_constructors.jpg">
+</div>
+<div style="text-align: center">
+Feature column methods fall into two main categories and one hybrid category.
+</div>
+
+Let's look at these functions in more detail.
+
+### Numeric column
+
+The Iris classifier calls the @{tf.feature_column.numeric_column} function for
+all input features:
+
+  * `SepalLength`
+  * `SepalWidth`
+  * `PetalLength`
+  * `PetalWidth`
+
+Although `tf.numeric_column` provides optional arguments, calling
+`tf.numeric_column` without any arguments, as follows, is a fine way to specify
+a numerical value with the default data type (`tf.float32`) as input to your
+model:
+
+```python
+# Defaults to a tf.float32 scalar.
+numeric_feature_column = tf.feature_column.numeric_column(key="SepalLength")
+```
+
+To specify a non-default numerical data type, use the `dtype` argument. For
+example:
+
+``` python
+# Represent a tf.float64 scalar.
+numeric_feature_column = tf.feature_column.numeric_column(key="SepalLength",
+                                                          dtype=tf.float64)
+```
+
+By default, a numeric column creates a single value (scalar). Use the shape
+argument to specify another shape. For example:
+
+<!--TODO(markdaoust) link to full example-->
+```python
+# Represent a 10-element vector in which each cell contains a tf.float32.
+vector_feature_column = tf.feature_column.numeric_column(key="Bowling",
+                                                         shape=10)
+
+# Represent a 10x5 matrix in which each cell contains a tf.float32.
+matrix_feature_column = tf.feature_column.numeric_column(key="MyMatrix",
+                                                         shape=[10,5])
+```
+### Bucketized column
+
+Often, you don't want to feed a number directly into the model, but instead
+split its value into different categories based on numerical ranges.  To do so,
+create a @{tf.feature_column.bucketized_column$bucketized column}. For
+example, consider raw data that represents the year a house was built. Instead
+of representing that year as a scalar numeric column, we could split the year
+into the following four buckets:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/bucketized_column.jpg">
+</div>
+<div style="text-align: center">
+Dividing year data into four buckets.
+</div>
+
+The model will represent the buckets as follows:
+
+|Date Range |Represented as... |
+|:----------|:-----------------|
+|< 1960               | [1, 0, 0, 0] |
+|>= 1960 but < 1980   | [0, 1, 0, 0] |
+|>= 1980 but < 2000   | [0, 0, 1, 0] |
+|> 2000               | [0, 0, 0, 1] |
+
+Why would you want to split a number—a perfectly valid input to your
+model—into a categorical value? Well, notice that the categorization splits a
+single input number into a four-element vector. Therefore, the model now can
+learn _four individual weights_ rather than just one; four weights creates a
+richer model than one weight. More importantly, bucketizing enables the model
+to clearly distinguish between different year categories since only one of the
+elements is set (1) and the other three elements are cleared (0). When we just
+use a single number (a year) as input, the model can only learn a linear
+relationship. So, bucketing provides the model with additional flexibility that
+the model can use to learn.
+
+The following code demonstrates how to create a bucketized feature:
+
+<!--TODO(markdaoust) link to full example - housing price grid?-->
+```python
+# First, convert the raw input to a numeric column.
+numeric_feature_column = tf.feature_column.numeric_column("Year")
+
+# Then, bucketize the numeric column on the years 1960, 1980, and 2000.
+bucketized_feature_column = tf.feature_column.bucketized_column(
+    source_column = numeric_feature_column,
+    boundaries = [1960, 1980, 2000])
+```
+Note that specifying a _three_-element boundaries vector creates a
+_four_-element bucketized vector.
+
+
+### Categorical identity column
+
+**Categorical identity columns** can be seen as a special case of bucketized
+columns. In traditional bucketized columns, each bucket represents a range of
+values (for example, from 1960 to 1979). In a categorical identity column, each
+bucket represents a single, unique integer. For example, let's say you want to
+represent the integer range `[0, 4)`.  That is, you want to represent the
+integers 0, 1, 2, or 3. In this case, the categorical identity mapping looks
+like this:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/categorical_column_with_identity.jpg">
+</div>
+<div style="text-align: center">
+A categorical identity column mapping. Note that this is a one-hot
+encoding, not a binary numerical encoding.
+</div>
+
+As with bucketized columns, a model can learn a separate weight for each class
+in a categorical identity column. For example, instead of using a string to
+represent the `product_class`, let's represent each class with a unique integer
+value. That is:
+
+* `0="kitchenware"`
+* `1="electronics"`
+* `2="sport"`
+
+Call @{tf.feature_column.categorical_column_with_identity} to implement a
+categorical identity column. For example:
+
+``` python
+# Create categorical output for an integer feature named "my_feature_b",
+# The values of my_feature_b must be >= 0 and < num_buckets
+identity_feature_column = tf.feature_column.categorical_column_with_identity(
+    key='my_feature_b',
+    num_buckets=4) # Values [0, 4)
+
+# In order for the preceding call to work, the input_fn() must return
+# a dictionary containing 'my_feature_b' as a key. Furthermore, the values
+# assigned to 'my_feature_b' must belong to the set [0, 4).
+def input_fn():
+    ...
+    return ({ 'my_feature_a':[7, 9, 5, 2], 'my_feature_b':[3, 1, 2, 2] },
+            [Label_values])
+```
+
+### Categorical vocabulary column
+
+We cannot input strings directly to a model. Instead, we must first map strings
+to numeric or categorical values. Categorical vocabulary columns provide a good
+way to represent strings as a one-hot vector. For example:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/categorical_column_with_vocabulary.jpg">
+</div>
+<div style="text-align: center">
+Mapping string values to vocabulary columns.
+</div>
+
+As you can see, categorical vocabulary columns are kind of an enum version of
+categorical identity columns. TensorFlow provides two different functions to
+create categorical vocabulary columns:
+
+* @{tf.feature_column.categorical_column_with_vocabulary_list}
+* @{tf.feature_column.categorical_column_with_vocabulary_file}
+
+`categorical_column_with_vocabulary_list` maps each string to an integer based
+on an explicit vocabulary list. For example:
+
+```python
+# Given input "feature_name_from_input_fn" which is a string,
+# create a categorical feature by mapping the input to one of
+# the elements in the vocabulary list.
+vocabulary_feature_column =
+    tf.feature_column.categorical_column_with_vocabulary_list(
+        key="a feature returned by input_fn()",
+        vocabulary_list=["kitchenware", "electronics", "sports"])
+```
+
+The preceding function is pretty straightforward, but it has a significant
+drawback. Namely, there's way too much typing when the vocabulary list is long.
+For these cases, call
+`tf.feature_column.categorical_column_with_vocabulary_file` instead, which lets
+you place the vocabulary words in a separate file. For example:
+
+```python
+
+# Given input "feature_name_from_input_fn" which is a string,
+# create a categorical feature to our model by mapping the input to one of
+# the elements in the vocabulary file
+vocabulary_feature_column =
+    tf.feature_column.categorical_column_with_vocabulary_file(
+        key="a feature returned by input_fn()",
+        vocabulary_file="product_class.txt",
+        vocabulary_size=3)
+```
+
+`product_class.txt` should contain one line for each vocabulary element. In our
+case:
+
+```None
+kitchenware
+electronics
+sports
+```
+
+### Hashed Column
+
+So far, we've worked with a naively small number of categories. For example,
+our product_class example has only 3 categories. Often though, the number of
+categories can be so big that it's not possible to have individual categories
+for each vocabulary word or integer because that would consume too much memory.
+For these cases, we can instead turn the question around and ask, "How many
+categories am I willing to have for my input?"  In fact, the
+@{tf.feature_column.categorical_column_with_hash_bucket} function enables you
+to specify the number of categories. For this type of feature column the model
+calculates a hash value of the input, then puts it into one of
+the `hash_bucket_size` categories using the modulo operator, as in the following
+pseudocode:
+
+```python
+# pseudocode
+feature_id = hash(raw_feature) % hash_buckets_size
+```
+
+The code to create the `feature_column` might look something like this:
+
+``` python
+hashed_feature_column =
+    tf.feature_column.categorical_column_with_hash_bucket(
+        key = "some_feature",
+        hash_buckets_size = 100) # The number of categories
+```
+At this point, you might rightfully think: "This is crazy!" After all, we are
+forcing the different input values to a smaller set of categories. This means
+that two probably unrelated inputs will be mapped to the same
+category, and consequently mean the same thing to the neural network. The
+following figure illustrates this dilemma, showing that kitchenware and sports
+both get assigned to category (hash bucket) 12:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/hashed_column.jpg">
+</div>
+<div style="text-align: center">
+Representing data with hash buckets.
+</div>
+
+As with many counterintuitive phenomena in machine learning, it turns out that
+hashing often works well in practice. That's because hash categories provide
+the model with some separation. The model can use additional features to further
+separate kitchenware from sports.
+
+### Crossed column
+
+Combining features into a single feature, better known as
+[feature crosses](https://developers.google.com/machine-learning/glossary/#feature_cross),
+enables the model to learn separate weights for each combination of
+features.
+
+More concretely, suppose we want our model to calculate real estate prices in
+Atlanta, GA. Real-estate prices within this city vary greatly depending on
+location. Representing latitude and longitude as separate features isn't very
+useful in identifying real-estate location dependencies; however, crossing
+latitude and longitude into a single feature can pinpoint locations. Suppose we
+represent Atlanta as a grid of 100x100 rectangular sections, identifying each
+of the 10,000 sections by a feature cross of latitude and longitude. This
+feature cross enables the model to train on pricing conditions related to each
+individual section, which is a much stronger signal than latitude and longitude
+alone.
+
+The following figure shows our plan, with the latitude & longitude values for
+the corners of the city in red text:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/Atlanta.jpg">
+</div>
+<div style="text-align: center">
+Map of Atlanta. Imagine this map divided into 10,000 sections of
+equal size.
+</div>
+
+For the solution, we used a combination of the `bucketized_column` we looked at
+earlier, with the @{tf.feature_column.crossed_column} function.
+
+<!--TODO(markdaoust) link to full example-->
+
+``` python
+def make_dataset(latitude, longitude, labels):
+    assert latitude.shape == longitude.shape == labels.shape
+
+    features = {'latitude': latitude.flatten(),
+                'longitude': longitude.flatten()}
+    labels=labels.flatten()
+
+    return tf.data.Dataset.from_tensor_slices((features, labels))
+
+
+# Bucketize the latitude and longitude usig the `edges`
+latitude_bucket_fc = tf.feature_column.bucketized_column(
+    tf.feature_column.numeric_column('latitude'),
+    list(atlanta.latitude.edges))
+
+longitude_bucket_fc = tf.feature_column.bucketized_column(
+    tf.feature_column.numeric_column('longitude'),
+    list(atlanta.longitude.edges))
+
+# Cross the bucketized columns, using 5000 hash bins.
+crossed_lat_lon_fc = tf.feature_column.crossed_column(
+    [latitude_bucket_fc, longitude_bucket_fc], 5000)
+
+fc = [
+    latitude_bucket_fc,
+    longitude_bucket_fc,
+    crossed_lat_lon_fc]
+
+# Build and train the Estimator.
+est = tf.estimator.LinearRegressor(fc, ...)
+```
+
+You may create a feature cross from either of the following:
+
+* Feature names; that is, names from the `dict` returned from `input_fn`.
+* Any categorical column, except `categorical_column_with_hash_bucket`
+  (since `crossed_column` hashes the input).
+
+When the feature columns `latitude_bucket_fc` and `longitude_bucket_fc` are
+crossed, TensorFlow will create `(latitude_fc, longitude_fc)` pairs for each
+example. This would produce a full grid of possibilities as follows:
+
+``` None
+ (0,0),  (0,1)...  (0,99)
+ (1,0),  (1,1)...  (1,99)
+   ...     ...       ...
+(99,0), (99,1)...(99, 99)
+```
+
+Except that a full grid would only be tractable for inputs with limited
+vocabularies. Instead of building this, potentially huge, table of inputs,
+the `crossed_column` only builds the number requested by the `hash_bucket_size`
+argument. The feature column assigns an example to a index by running a hash
+function on the tuple of inputs, followed by a modulo operation with
+`hash_bucket_size`.
+
+As discussed earlier, performing the
+hash and modulo function limits the number of categories, but can cause category
+collisions; that is, multiple (latitude, longitude) feature crosses will end
+up in the same hash bucket. In practice though, performing feature crosses
+still adds significant value to the learning capability of your models.
+
+Somewhat counterintuitively, when creating feature crosses, you typically still
+should include the original (uncrossed) features in your model (as in the
+preceding code snippet). The independent latitude and longitude features help the
+model distinguish between examples where a hash collision has occured in the
+crossed feature.
+
+## Indicator and embedding columns
+
+Indicator columns and embedding columns never work on features directly, but
+instead take categorical columns as input.
+
+When using an indicator column, we're telling TensorFlow to do exactly what
+we've seen in our categorical product_class example. That is, an
+**indicator column** treats each category as an element in a one-hot vector,
+where the matching category has value 1 and the rest have 0s:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/categorical_column_with_identity.jpg">
+</div>
+<div style="text-align: center">
+Representing data in indicator columns.
+</div>
+
+Here's how you create an indicator column by calling
+@{tf.feature_column.indicator_column}:
+
+``` python
+categorical_column = ... # Create any type of categorical column.
+
+# Represent the categorical column as an indicator column.
+indicator_column = tf.feature_column.indicator_column(categorical_column)
+```
+
+Now, suppose instead of having just three possible classes, we have a million.
+Or maybe a billion. For a number of reasons, as the number of categories grow
+large, it becomes infeasible to train a neural network using indicator columns.
+
+We can use an embedding column to overcome this limitation. Instead of
+representing the data as a one-hot vector of many dimensions, an
+**embedding column** represents that data as a lower-dimensional, ordinary
+vector in which each cell can contain any number, not just 0 or 1. By
+permitting a richer palette of numbers for every cell, an embedding column
+contains far fewer cells than an indicator column.
+
+Let's look at an example comparing indicator and embedding columns. Suppose our
+input examples consists of different words from a limited palette of only 81
+words. Further suppose that the data set provides provides the following input
+words in 4 separate examples:
+
+* `"dog"`
+* `"spoon"`
+* `"scissors"`
+* `"guitar"`
+
+In that case, the following figure illustrates the processing path for
+embedding columns or indicator columns.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/embedding_vs_indicator.jpg">
+</div>
+<div style="text-align: center">
+An embedding column stores categorical data in a lower-dimensional
+vector than an indicator column. (We just placed random numbers into the
+embedding vectors; training determines the actual numbers.)
+</div>
+
+When an example is processed, one of the `categorical_column_with...` functions
+maps the example string to a numerical categorical value. For example, a
+function maps "spoon" to `[32]`. (The 32 comes from our imagination—the actual
+values depend on the mapping function.) You may then represent these numerical
+categorical values in either of the following two ways:
+
+* As an indicator column. A function converts each numeric categorical value
+  into an 81-element vector (because our palette consists of 81 words), placing
+  a 1 in the index of the categorical value (0, 32, 79, 80) and a 0 in all the
+  other positions.
+
+* As an embedding column. A function uses the numerical categorical values
+  `(0, 32, 79, 80)` as indices to a lookup table. Each slot in that lookup table
+  contains a 3-element vector.
+
+How do the values in the embeddings vectors magically get assigned? Actually,
+the assignments happen during training. That is, the model learns the best way
+to map your input numeric categorical values to the embeddings vector value in
+order to solve your problem. Embedding columns increase your model's
+capabilities, since an embeddings vector learns new relationships between
+categories from the training data.
+
+Why is the embedding vector size 3 in our example? Well, the following "formula"
+provides a general rule of thumb about the number of embedding dimensions:
+
+```python
+embedding_dimensions =  number_of_categories**0.25
+```
+
+That is, the embedding vector dimension should be the 4th root of the number of
+categories. Since our vocabulary size in this example is 81, the recommended
+number of dimensions is 3:
+
+``` python
+3 =  81**0.25
+```
+Note that this is just a general guideline; you can set the number of embedding
+dimensions as you please.
+
+Call @{tf.feature_column.embedding_column} to create an `embedding_column` as
+suggested by the following snippet:
+
+``` python
+categorical_column = ... # Create any categorical column
+
+# Represent the categorical column as an embedding column.
+# This means creating a one-hot vector with one element for each category.
+embedding_column = tf.feature_column.embedding_column(
+    categorical_column=categorical_column,
+    dimension=dimension_of_embedding_vector)
+```
+
+@{$programmers_guide/embedding$Embeddings} is a significant topic within machine
+learning. This information was just to get you started using them as feature
+columns.
+
+## Passing feature columns to Estimators
+
+As the following list indicates, not all Estimators permit all types of
+`feature_columns` argument(s):
+
+* @{tf.estimator.LinearClassifier$`LinearClassifier`} and
+  @{tf.estimator.LinearRegressor$`LinearRegressor`}: Accept all types of
+  feature column.
+* @{tf.estimator.DNNClassifier$`DNNClassifier`} and
+  @{tf.estimator.DNNRegressor$`DNNRegressor`}: Only accept dense columns. Other
+  column types must be wrapped in either an `indicator_column` or
+  `embedding_column`.
+* @{tf.estimator.DNNLinearCombinedClassifier$`DNNLinearCombinedClassifier`} and
+  @{tf.estimator.DNNLinearCombinedRegressor$`DNNLinearCombinedRegressor`}:
+    * The `linear_feature_columns` argument accepts any feature column type.
+    * The `dnn_feature_columns` argument only accepts dense columns.
+
+## Other Sources
+
+For more examples on feature columns, view the following:
+
+* The @{$wide_and_deep$Wide & Deep Tutorial}
+* [Examples](https://github.com/tensorflow/models/tree/master/samples/cookbook/regression)
+  of DNNs and linear models that use feature columns.
+
+To learn more about embeddings, see the following:
+
+* [Deep Learning, NLP, and representations](http://colah.github.io/posts/2014-07-NLP-RNNs-Representations/)
+  (Chris Olah's blog)
+* The TensorFlow [Embedding Projector](http://projector.tensorflow.org)
-- 
GitLab


From c27a90d2195545c9147ec79094d7bca3176deb44 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Wed, 29 Nov 2017 09:59:34 -0800
Subject: [PATCH 0411/1225] [TF:XLA] VariableShape op support.

PiperOrigin-RevId: 177323587
---
 .../compiler/tf2xla/kernels/variable_ops.cc   | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc
index b19ea22f50..2346c62ad1 100644
--- a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/no_op.h"
 
 namespace tensorflow {
@@ -121,5 +122,31 @@ class ResourceGatherOp : public XlaOpKernel {
 REGISTER_XLA_OP(Name("ResourceGather").TypeConstraint("dtype", kNumericTypes),
                 ResourceGatherOp);
 
+class VariableShapeOp : public XlaOpKernel {
+ public:
+  explicit VariableShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    DataType dtype;
+    TensorShape shape;
+    OP_REQUIRES_OK(ctx, ctx->GetVariableTypeAndShape(0, &dtype, &shape));
+    const int rank = shape.dims();
+    Tensor shape_constant(DT_INT32, TensorShape({rank}));
+    auto vec = shape_constant.vec<int32>();
+    // TODO(dga): support int64.  b/28119922.
+    for (int i = 0; i < rank; ++i) {
+      int64 dim_size = shape.dim_size(i);
+      OP_REQUIRES(
+          ctx, FastBoundsCheck(dim_size, std::numeric_limits<int32>::max()),
+          errors::InvalidArgument("Shape does not support tensors > int32max",
+                                  " but dim ", i, " is ", dim_size));
+      vec(i) = static_cast<int32>(dim_size);
+    }
+
+    ctx->SetConstantOutput(0, shape_constant);
+  }
+};
+
+REGISTER_XLA_OP(Name("VariableShape"), VariableShapeOp);
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From 2229a6cbbe27b3c42fbcd4aff0bb3de1925a8768 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 29 Nov 2017 10:04:47 -0800
Subject: [PATCH 0412/1225] Internal Change

PiperOrigin-RevId: 177324488
---
 tensorflow/contrib/lite/python/lite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py
index 0fd70f842b..982ea90f2b 100644
--- a/tensorflow/contrib/lite/python/lite.py
+++ b/tensorflow/contrib/lite/python/lite.py
@@ -50,7 +50,7 @@ GRAPHVIZ_DOT = _toco_flags_pb2.GRAPHVIZ_DOT
 # to protect against crashes. However, it breaks some dependent targets because
 # it forces us to depend on an external py_binary. The experimental API doesn't
 # have that drawback.
-EXPERIMENTAL_USE_TOCO_API_DIRECTLY = True
+EXPERIMENTAL_USE_TOCO_API_DIRECTLY = False
 
 # Find the toco_from_protos binary using the resource loader if using from
 # bazel, otherwise we are in a pip where console_scripts already has
-- 
GitLab


From 7921d01ec8fed3e5c62264b99b09440ea09796fe Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Wed, 29 Nov 2017 10:06:59 -0800
Subject: [PATCH 0413/1225] Raise an exception when converting lists with
 invalid lengths to Tensors instead of CHECK failing

PiperOrigin-RevId: 177324815
---
 tensorflow/contrib/cmake/tf_python.cmake      |  2 +
 tensorflow/python/BUILD                       | 13 ++++
 .../kernel_tests/constant_op_eager_test.py    | 33 +++++++++
 tensorflow/python/lib/core/py_func.cc         | 54 ++------------
 tensorflow/python/lib/core/py_seq_tensor.cc   | 18 ++++-
 tensorflow/python/lib/core/py_util.cc         | 70 +++++++++++++++++++
 tensorflow/python/lib/core/py_util.h          | 27 +++++++
 tensorflow/tools/ci_build/ci_sanity.sh        |  3 +-
 8 files changed, 169 insertions(+), 51 deletions(-)
 create mode 100644 tensorflow/python/lib/core/py_util.cc
 create mode 100644 tensorflow/python/lib/core/py_util.h

diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 0128946e45..819b6213ea 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -899,6 +899,8 @@ set (pywrap_tensorflow_internal_src
     "${tensorflow_source_dir}/tensorflow/python/lib/core/py_func.cc"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/py_seq_tensor.h"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/py_seq_tensor.cc"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/py_util.h"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/py_util.cc"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/safe_ptr.h"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/safe_ptr.cc"
     "${tensorflow_source_dir}/tensorflow/python/lib/io/py_record_reader.h"
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 9d3974b98e..5e7a6c0b59 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -268,6 +268,7 @@ cc_library(
     deps = [
         ":ndarray_tensor_bridge",
         ":numpy_lib",
+        ":py_util",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -309,6 +310,7 @@ cc_library(
     hdrs = ["lib/core/py_seq_tensor.h"],
     deps = [
         ":numpy_lib",
+        ":py_util",
         ":safe_ptr",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -316,6 +318,17 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "py_util",
+    srcs = ["lib/core/py_util.cc"],
+    hdrs = ["lib/core/py_util.h"],
+    deps = [
+        "//tensorflow/core:lib",
+        "//tensorflow/core:script_ops_op_lib",
+        "//util/python:python_headers",
+    ],
+)
+
 cc_library(
     name = "py_record_reader_lib",
     srcs = ["lib/io/py_record_reader.cc"],
diff --git a/tensorflow/python/kernel_tests/constant_op_eager_test.py b/tensorflow/python/kernel_tests/constant_op_eager_test.py
index 3b71586b55..8e9d75667d 100644
--- a/tensorflow/python/kernel_tests/constant_op_eager_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_eager_test.py
@@ -237,6 +237,39 @@ class ConstantTest(test.TestCase):
     self._testAll((1, x))
     self._testAll((x, 1))
 
+  def testInvalidLength(self):
+
+    class BadList(list):
+
+      def __init__(self):
+        super(BadList, self).__init__([1, 2, 3])  # pylint: disable=invalid-length-returned
+
+      def __len__(self):
+        return -1
+
+    with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+      constant_op.constant([BadList()])
+    with self.assertRaisesRegexp(ValueError, "mixed types"):
+      constant_op.constant([1, 2, BadList()])
+    with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+      constant_op.constant(BadList())
+    with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+      constant_op.constant([[BadList(), 2], 3])
+    with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+      constant_op.constant([BadList(), [1, 2, 3]])
+    with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+      constant_op.constant([BadList(), []])
+
+    # TODO(allenl, josh11b): These cases should return exceptions rather than
+    # working (currently shape checking only checks the first element of each
+    # sequence recursively). Maybe the first one is fine, but the second one
+    # silently truncating is rather bad.
+
+    # with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+    #   constant_op.constant([[3, 2, 1], BadList()])
+    # with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+    #   constant_op.constant([[], BadList()])
+
   def testSparseValuesRaiseErrors(self):
     with self.assertRaisesRegexp(ValueError, "non-rectangular Python sequence"):
       constant_op.constant([[1, 2], [3]], dtype=dtypes_lib.int32)
diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc
index 8bf831f8ba..a42282b055 100644
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -22,11 +22,11 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/threadpool.h"
-#include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
+#include "tensorflow/python/lib/core/py_util.h"
 #include <Python.h>
 
 namespace tensorflow {
@@ -133,48 +133,6 @@ bool IsSingleNone(PyObject* obj) {
   return item == Py_None;
 }
 
-// py.__class__.__name__
-const char* ClassName(PyObject* py) {
-/* PyPy doesn't have a separate C API for old-style classes. */
-#if PY_MAJOR_VERSION < 3 && !defined(PYPY_VERSION)
-  if (PyClass_Check(py))
-    return PyString_AS_STRING(
-        CHECK_NOTNULL(reinterpret_cast<PyClassObject*>(py)->cl_name));
-  if (PyInstance_Check(py))
-    return PyString_AS_STRING(CHECK_NOTNULL(
-        reinterpret_cast<PyInstanceObject*>(py)->in_class->cl_name));
-#endif
-  if (Py_TYPE(py) == &PyType_Type) {
-    return reinterpret_cast<PyTypeObject*>(py)->tp_name;
-  }
-  return Py_TYPE(py)->tp_name;
-}
-
-string PyExcFetch() {
-  CHECK(PyErr_Occurred()) << "Must only call PyExcFetch after an exception.";
-  PyObject* ptype;
-  PyObject* pvalue;
-  PyObject* ptraceback;
-  PyErr_Fetch(&ptype, &pvalue, &ptraceback);
-  PyErr_NormalizeException(&ptype, &pvalue, &ptraceback);
-  string err = ClassName(ptype);
-  if (pvalue) {
-    PyObject* str = PyObject_Str(pvalue);
-    if (str) {
-#if PY_MAJOR_VERSION < 3
-      strings::StrAppend(&err, ": ", PyString_AS_STRING(str));
-#else
-      strings::StrAppend(&err, ": ", PyUnicode_AsUTF8(str));
-#endif
-      Py_DECREF(str);
-    }
-    Py_DECREF(pvalue);
-  }
-  Py_DECREF(ptype);
-  Py_XDECREF(ptraceback);
-  return err;
-}
-
 // Calls the registered py function through the trampoline.
 Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) {
   *out_log_on_error = true;
@@ -195,18 +153,18 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) {
     if (PyErr_Occurred()) {
       if (PyErr_ExceptionMatches(PyExc_ValueError) ||
           PyErr_ExceptionMatches(PyExc_TypeError)) {
-        return errors::InvalidArgument(PyExcFetch());
+        return errors::InvalidArgument(PyExceptionFetch());
       } else if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
         *out_log_on_error = false;
-        return errors::OutOfRange(PyExcFetch());
+        return errors::OutOfRange(PyExceptionFetch());
       } else if (PyErr_ExceptionMatches(PyExc_MemoryError)) {
-        return errors::ResourceExhausted(PyExcFetch());
+        return errors::ResourceExhausted(PyExceptionFetch());
       } else if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
-        return errors::Unimplemented(PyExcFetch());
+        return errors::Unimplemented(PyExceptionFetch());
       } else {
         // TODO(ebrevdo): Check if exception is an OpError and use the
         // OpError.error_code property to map it back in the Status.
-        return errors::Unknown(PyExcFetch());
+        return errors::Unknown(PyExceptionFetch());
       }
     } else {
       return errors::Internal("Failed to run py callback ", call->token,
diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc
index 71cb38f8fd..317bdc2e14 100644
--- a/tensorflow/python/lib/core/py_seq_tensor.cc
+++ b/tensorflow/python/lib/core/py_seq_tensor.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/python/lib/core/numpy.h"
+#include "tensorflow/python/lib/core/py_util.h"
 #include "tensorflow/python/lib/core/safe_ptr.h"
 
 namespace tensorflow {
@@ -89,12 +90,25 @@ Status InferShapeAndType(PyObject* obj, TensorShape* shape, DataType* dtype) {
       *dtype = DT_STRING;
     } else if (PySequence_Check(obj)) {
       auto length = PySequence_Length(obj);
-      shape->AddDim(length);
       if (length > 0) {
+        shape->AddDim(length);
         obj = PySequence_GetItem(obj, 0);
         continue;
-      } else {
+      } else if (length == 0) {
+        shape->AddDim(length);
         *dtype = DT_INVALID;  // Invalid dtype for empty tensors.
+      } else {
+        // The sequence does not have a valid length (PySequence_Length < 0).
+        if (PyErr_Occurred()) {
+          // PySequence_Length failed and set an exception. Fetch the message
+          // and convert it to a failed status.
+          return errors::InvalidArgument(PyExceptionFetch());
+        } else {
+          // This is almost certainly dead code: PySequence_Length failed but
+          // did not set an exception.
+          return errors::InvalidArgument(
+              "Attempted to convert an invalid sequence to a Tensor.");
+        }
       }
     } else if (IsPyFloat(obj)) {
       *dtype = DT_DOUBLE;
diff --git a/tensorflow/python/lib/core/py_util.cc b/tensorflow/python/lib/core/py_util.cc
new file mode 100644
index 0000000000..2635694e23
--- /dev/null
+++ b/tensorflow/python/lib/core/py_util.cc
@@ -0,0 +1,70 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/python/lib/core/py_util.h"
+
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include <Python.h>
+
+namespace tensorflow {
+namespace {
+
+// py.__class__.__name__
+const char* ClassName(PyObject* py) {
+/* PyPy doesn't have a separate C API for old-style classes. */
+#if PY_MAJOR_VERSION < 3 && !defined(PYPY_VERSION)
+  if (PyClass_Check(py))
+    return PyString_AS_STRING(
+        CHECK_NOTNULL(reinterpret_cast<PyClassObject*>(py)->cl_name));
+  if (PyInstance_Check(py))
+    return PyString_AS_STRING(CHECK_NOTNULL(
+        reinterpret_cast<PyInstanceObject*>(py)->in_class->cl_name));
+#endif
+  if (Py_TYPE(py) == &PyType_Type) {
+    return reinterpret_cast<PyTypeObject*>(py)->tp_name;
+  }
+  return Py_TYPE(py)->tp_name;
+}
+
+}  // end namespace
+
+string PyExceptionFetch() {
+  CHECK(PyErr_Occurred())
+      << "Must only call PyExceptionFetch after an exception.";
+  PyObject* ptype;
+  PyObject* pvalue;
+  PyObject* ptraceback;
+  PyErr_Fetch(&ptype, &pvalue, &ptraceback);
+  PyErr_NormalizeException(&ptype, &pvalue, &ptraceback);
+  string err = ClassName(ptype);
+  if (pvalue) {
+    PyObject* str = PyObject_Str(pvalue);
+    if (str) {
+#if PY_MAJOR_VERSION < 3
+      strings::StrAppend(&err, ": ", PyString_AS_STRING(str));
+#else
+      strings::StrAppend(&err, ": ", PyUnicode_AsUTF8(str));
+#endif
+      Py_DECREF(str);
+    }
+    Py_DECREF(pvalue);
+  }
+  Py_DECREF(ptype);
+  Py_XDECREF(ptraceback);
+  return err;
+}
+
+}  // end namespace tensorflow
diff --git a/tensorflow/python/lib/core/py_util.h b/tensorflow/python/lib/core/py_util.h
new file mode 100644
index 0000000000..44dfe7ba21
--- /dev/null
+++ b/tensorflow/python/lib/core/py_util.h
@@ -0,0 +1,27 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PYTHON_LIB_CORE_UTIL_H_
+#define TENSORFLOW_PYTHON_LIB_CORE_UTIL_H_
+
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+// Fetch the exception message as a string. An exception must be set
+// (PyErr_Occurred() must be true).
+string PyExceptionFetch();
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_PYTHON_LIB_CORE_UTIL_H_
diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh
index 404a9a6b62..4021d794b6 100755
--- a/tensorflow/tools/ci_build/ci_sanity.sh
+++ b/tensorflow/tools/ci_build/ci_sanity.sh
@@ -99,7 +99,8 @@ do_pylint() {
 "^tensorflow/contrib/eager/python/metrics_impl\.py.*\[E0202.*method-hidden "\
 "^tensorflow/python/platform/gfile\.py.*\[E0301.*non-iterator "\
 "^tensorflow/python/keras/_impl/keras/callbacks\.py.*\[E1133.*not-an-iterable "\
-"^tensorflow/python/keras/_impl/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition"
+"^tensorflow/python/keras/_impl/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition "\
+"^tensorflow/python/kernel_tests/constant_op_eager_test.py.*\[E0303.*invalid-length-returned"
 
   echo "ERROR_WHITELIST=\"${ERROR_WHITELIST}\""
 
-- 
GitLab


From c572bc4fd7c73f4b8014ae43cdf9da5b99592f59 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 29 Nov 2017 10:37:28 -0800
Subject: [PATCH 0414/1225] Outline generated LLVM IR matrix-vector dot kernels

This is a code size optimization for cases that dot matrix-vectors of the same
shape repeatedly, but is also a slight performance improvment (most likely due
to better icache behavior).

PiperOrigin-RevId: 177329302
---
 .../xla/service/cpu/dot_op_emitter.cc         | 62 +++++++++++++------
 .../service/llvm_ir/kernel_support_library.cc | 44 +++++++++++++
 .../service/llvm_ir/kernel_support_library.h  | 32 ++++++++++
 3 files changed, 118 insertions(+), 20 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index 8f7b478cee..4ccff756a3 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -522,8 +522,10 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() {
     return false;
   }
 
-  if (!primitive_util::IsFloatingPointType(dot_.shape().element_type()) &&
-      !primitive_util::IsIntegralType(dot_.shape().element_type())) {
+  PrimitiveType primitive_type = dot_.shape().element_type();
+
+  if (!primitive_util::IsFloatingPointType(primitive_type) &&
+      !primitive_util::IsIntegralType(primitive_type)) {
     return false;
   }
 
@@ -573,30 +575,50 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() {
   int64 tiling_factor = GetGemvTilingFactor();
   CHECK_GT(tiling_factor, 0);
 
+  llvm::Value* result_op = target_array_.GetBasePointer();
+  llvm::Value* lhs_op =
+      swap_operands ? rhs_array_.GetBasePointer() : lhs_array_.GetBasePointer();
+  llvm::Value* rhs_op =
+      swap_operands ? lhs_array_.GetBasePointer() : rhs_array_.GetBasePointer();
+
   if (is_column_major_matrix_vector) {
     VLOG(2) << "Emitting column major matrix-vector multiply with m = " << m
             << " and k = " << k;
-    ColumnMajorMatrixVectorProductEmitter emitter(
-        dot_.shape().element_type(), /*tile_rows=*/8,
-        /*tile_cols=*/tiling_factor, m, k,
-        swap_operands ? rhs_array_.GetBasePointer()
-                      : lhs_array_.GetBasePointer(),
-        swap_operands ? lhs_array_.GetBasePointer()
-                      : rhs_array_.GetBasePointer(),
-        target_array_.GetBasePointer(), ir_builder_);
-    emitter.Emit();
+    int64 tile_rows = 8;
+    int64 tile_cols = tiling_factor;
+
+    string kernel_name = tensorflow::strings::StrCat(
+        "col_major_gemv_", PrimitiveType_Name(primitive_type), "_", tile_rows,
+        "_", tile_cols, "_", m, "_", k);
+
+    KernelSupportLibrary::EmitAndCallOutlinedKernel(
+        ir_builder_, kernel_name, lhs_op, rhs_op, result_op,
+        [this, tile_rows, tile_cols, m, k, primitive_type](
+            llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* result_op) {
+          ColumnMajorMatrixVectorProductEmitter emitter(
+              primitive_type, tile_rows, tile_cols, m, k, lhs_op, rhs_op,
+              result_op, ir_builder_);
+          emitter.Emit();
+        });
   } else {
     VLOG(2) << "Emitting row major matrix-vector multiply with m = " << m
             << " and k = " << k;
-    RowMajorMatrixVectorProductEmitter emitter(
-        dot_.shape().element_type(), /*tile_rows=*/tiling_factor,
-        /*tile_cols=*/8, m, k,
-        swap_operands ? rhs_array_.GetBasePointer()
-                      : lhs_array_.GetBasePointer(),
-        swap_operands ? lhs_array_.GetBasePointer()
-                      : rhs_array_.GetBasePointer(),
-        target_array_.GetBasePointer(), ir_builder_);
-    emitter.Emit();
+    int64 tile_rows = tiling_factor;
+    int64 tile_cols = 8;
+
+    string kernel_name = tensorflow::strings::StrCat(
+        "row_major_gemv_", PrimitiveType_Name(primitive_type), "_", tile_rows,
+        "_", tile_cols, "_", m, "_", k);
+
+    KernelSupportLibrary::EmitAndCallOutlinedKernel(
+        ir_builder_, kernel_name, lhs_op, rhs_op, result_op,
+        [this, tile_rows, tile_cols, m, k, primitive_type](
+            llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* result_op) {
+          RowMajorMatrixVectorProductEmitter emitter(
+              primitive_type, tile_rows, tile_cols, m, k, lhs_op, rhs_op,
+              result_op, ir_builder_);
+          emitter.Emit();
+        });
   }
 
   return true;
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
index 29cc0f81bd..d951a37d5d 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h"
 
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 
 namespace xla {
 void KernelSupportLibrary::For(
@@ -62,4 +63,47 @@ void KernelSupportLibrary::If(
   false_block_generator();
   llvm_ir::SetToLastInsertPoint(if_data.after_block, ir_builder_);
 }
+
+void KernelSupportLibrary::EmitAndCallOutlinedKernel(
+    llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
+    KernelSupportLibrary::ArgumentVector arguments,
+    const std::function<void(KernelSupportLibrary::ArgumentVector)>&
+        kernel_body_generator) {
+  llvm::Module* module = ir_builder->GetInsertBlock()->getModule();
+  llvm::Function* function =
+      module->getFunction(llvm_ir::AsStringRef(kernel_name));
+  if (!function) {
+    VLOG(2) << "Generating kernel for " << kernel_name;
+    std::vector<llvm::Type*> arg_types;
+    std::transform(arguments.begin(), arguments.end(),
+                   std::back_inserter(arg_types),
+                   [](llvm::Value* arg) { return arg->getType(); });
+
+    auto* function_type = llvm::FunctionType::get(
+        ir_builder->getVoidTy(), arg_types, /*isVarArg=*/false);
+
+    function = llvm::Function::Create(
+        function_type, llvm::GlobalValue::InternalLinkage,
+        llvm_ir::AsStringRef(kernel_name), module);
+
+    llvm::IRBuilder<>::InsertPointGuard guard(*ir_builder);
+
+    auto* entry_bb =
+        llvm::BasicBlock::Create(ir_builder->getContext(), "entry", function);
+    auto* return_inst = llvm::ReturnInst::Create(ir_builder->getContext(),
+                                                 /*retVal=*/nullptr, entry_bb);
+    // Set the insert point to before return_inst.
+    ir_builder->SetInsertPoint(return_inst);
+
+    std::vector<llvm::Value*> arg_values;
+    std::transform(function->arg_begin(), function->arg_end(),
+                   std::back_inserter(arg_values), std::addressof<llvm::Value>);
+    kernel_body_generator(arg_values);
+  } else {
+    VLOG(3) << "Re-using kernel for " << kernel_name;
+  }
+
+  ir_builder->CreateCall(function, llvm_ir::AsArrayRef(arguments));
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
index 9bafb7b577..997b84bb27 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
@@ -118,6 +118,38 @@ class KernelSupportLibrary {
           const std::function<void()>& true_block_generator,
           const std::function<void()>& false_block_generator = []() {});
 
+  using ArgumentVector = tensorflow::gtl::ArraySlice<llvm::Value*>;
+
+  // Generates the following control flow structure:
+  //
+  //  define @`kernel_name`(arg0, arg1, ... arg`arguments.size()`) {
+  //    kernel_body_generator({arg0, arg1, ... arg`arguments.size()`});
+  //  }
+  //
+  //  ...
+  //  call @`kernel_name`(arguments[0], arguments[1] ...)
+  //  ...
+  //
+  // If a function called `kernel_name` is already present in the module then
+  // that function is re-used.  In that sense we're using the llvm::Module as a
+  // cache of outlined kernels, keyed by function name.
+  static void EmitAndCallOutlinedKernel(
+      llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
+      ArgumentVector arguments,
+      const std::function<void(ArgumentVector)>& kernel_body_generator);
+
+  // Thin wrapper around the more general EmitAndCallOutlinedKernel above.
+  static void EmitAndCallOutlinedKernel(
+      llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
+      llvm::Value* arg0, llvm::Value* arg1, llvm::Value* arg2,
+      const std::function<void(llvm::Value*, llvm::Value*, llvm::Value*)>&
+          kernel_body_generator) {
+    EmitAndCallOutlinedKernel(
+        ir_builder, kernel_name, {arg0, arg1, arg2}, [&](ArgumentVector args) {
+          kernel_body_generator(args[0], args[1], args[2]);
+        });
+  }
+
  private:
   llvm::IRBuilder<>* ir_builder_;
   bool prevent_unrolling_;
-- 
GitLab


From 78a4873cfa4562cf071492636f03e13fcb188bd8 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Wed, 29 Nov 2017 11:18:38 -0800
Subject: [PATCH 0415/1225] Go: Bugfix: Make list-of-shape attributes in an
 operation work.

By respecting cgo rules on pointers.
Without the change to graph.go, the newly added test would fail with:

panic: runtime error: cgo argument has Go pointer to Go pointer

in the call to the C function TF_SetAttrShapeList.

Fixes #14891

PiperOrigin-RevId: 177336663
---
 tensorflow/go/graph.go      | 64 +++++++++++++++++++-------------
 tensorflow/go/op/op_test.go | 73 +++++++++++++++++++++++++++++++++++++
 2 files changed, 112 insertions(+), 25 deletions(-)

diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go
index 46c600eab1..f200a8e00a 100644
--- a/tensorflow/go/graph.go
+++ b/tensorflow/go/graph.go
@@ -20,6 +20,24 @@ package tensorflow
 //
 // #include <stdlib.h>
 // #include <string.h>
+//
+// void TF_SetAttrShapeList_Helper(TF_OperationDescription* desc,
+//                                 const char* attr_name,
+//                                 const int64_t* flat_dims,
+//                                 const int* num_dims,
+//                                 int num_shapes) {
+//  const int64_t** dims =
+//    (const int64_t**)malloc(sizeof(const int64_t*) * num_shapes);
+//  for (int i = 0; i < num_shapes; i++) {
+//    dims[i] = flat_dims;
+//    if (num_dims[i] > 0) {
+//      // flat_dims will be NULL iff num_shapes is 0 or all elements in num_dims are <= 0.
+//      flat_dims += num_dims[i];
+//    }
+//  }
+//  TF_SetAttrShapeList(desc, attr_name, dims, num_dims, num_shapes);
+//  free(dims);
+// }
 import "C"
 
 import (
@@ -289,41 +307,37 @@ func setAttr(cdesc *C.TF_OperationDescription, status *status, name string, valu
 			return fmt.Errorf("bad value for attribute %q: %v", name, err)
 		}
 	case Shape:
-		ndims, dims := cshape(value)
+		ndims := C.int(value.NumDimensions())
 		var dimsp *C.int64_t
 		if ndims > 0 {
+			dims := make([]C.int64_t, ndims)
+			for i, d := range value.dims {
+				dims[i] = C.int64_t(d)
+			}
 			dimsp = &dims[0]
 		}
 		C.TF_SetAttrShape(cdesc, cAttrName, dimsp, ndims)
 	case []Shape:
-		ndims := make([]C.int, len(value))
-		dims := make([][]C.int64_t, len(value))
-		dimsp := make([]*C.int64_t, len(value))
-		for i, s := range value {
-			ndims[i], dims[i] = cshape(s)
-			if ndims[i] > 0 {
-				dimsp[i] = &dims[i][0]
-			}
-		}
-		if len(value) > 0 {
-			C.TF_SetAttrShapeList(cdesc, cAttrName, &dimsp[0], &ndims[0], C.int(len(value)))
-		} else {
+		if len(value) == 0 {
 			C.TF_SetAttrShapeList(cdesc, cAttrName, nil, nil, 0)
+		} else {
+			var flatDims []C.int64_t
+			ndims := make([]C.int, len(value))
+			for i, s := range value {
+				nd := s.NumDimensions()
+				ndims[i] = C.int(nd)
+				for _, d := range s.dims {
+					flatDims = append(flatDims, C.int64_t(d))
+				}
+			}
+			var flatDimsp *C.int64_t
+			if len(flatDims) > 0 {
+				flatDimsp = &flatDims[0]
+			}
+			C.TF_SetAttrShapeList_Helper(cdesc, cAttrName, flatDimsp, &ndims[0], C.int(len(value)))
 		}
 	default:
 		return fmt.Errorf("attribute %q has a type (%T) which is not valid for operation attributes", name, value)
 	}
 	return nil
 }
-
-func cshape(s Shape) (C.int, []C.int64_t) {
-	ndims := C.int(s.NumDimensions())
-	if ndims < 0 {
-		return -1, nil
-	}
-	dims := make([]C.int64_t, ndims)
-	for i, s := range s.dims {
-		dims[i] = C.int64_t(s)
-	}
-	return ndims, dims
-}
diff --git a/tensorflow/go/op/op_test.go b/tensorflow/go/op/op_test.go
index 2451ba3606..842dee9ffe 100644
--- a/tensorflow/go/op/op_test.go
+++ b/tensorflow/go/op/op_test.go
@@ -58,3 +58,76 @@ func TestAddOperationFailure(t *testing.T) {
 	_ = resize.Shape()
 	t.Errorf("resize.Shape() should have paniced since the underlying Operation was not created")
 }
+
+func TestShapeAttribute(t *testing.T) {
+	s := NewScope()
+	x := Placeholder(s.SubScope("x"), tf.Int32, PlaceholderShape(tf.MakeShape(1)))
+	y := Placeholder(s.SubScope("y"), tf.Int32, PlaceholderShape(tf.Shape{}))
+	z := Add(s, x, y)
+	graph, err := s.Finalize()
+	if err != nil {
+		t.Fatal(err)
+	}
+	sess, err := tf.NewSession(graph, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	value, err := tf.NewTensor([]int32{7})
+	if err != nil {
+		t.Fatal(err)
+	}
+	feeds := map[tf.Output]*tf.Tensor{
+		x: value,
+		y: value,
+	}
+	fetched, err := sess.Run(feeds, []tf.Output{z}, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if got, want := len(fetched), 1; got != want {
+		t.Fatalf("Fetched %d tensors, expected %d", got, want)
+	}
+	if got, want := fetched[0].Value().([]int32), []int32{14}; len(got) != len(want) || len(got) != 1 || got[0] != want[0] {
+		t.Fatalf("Got %v, want %v", got, want)
+	}
+}
+
+func TestDataset(t *testing.T) {
+	var (
+		s = NewScope()
+
+		// The use of a non-scalar here is inspired by
+		// https://github.com/tensorflow/tensorflow/issues/14891
+		c       = Const(s, []int32{21718, 31415})
+		types   = []tf.DataType{c.DataType()}
+		shapes  = []tf.Shape{c.Shape()}
+		dataset = TensorDataset(s, []tf.Output{c}, shapes)
+
+		iterator = Iterator(s, "", "", types, shapes)
+		next     = IteratorGetNext(s, iterator, types, shapes)
+		init     = MakeIterator(s, dataset, iterator)
+	)
+	graph, err := s.Finalize()
+	if err != nil {
+		t.Fatal(err)
+	}
+	sess, err := tf.NewSession(graph, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := sess.Run(nil, nil, []*tf.Operation{init}); err != nil {
+		t.Fatal(err)
+	}
+	results, err := sess.Run(nil, next, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	got := results[0].Value().([]int32)
+	if len(got) != 2 || got[0] != 21718 || got[1] != 31415 {
+		t.Errorf("Got %v, want {21718, 31415}", got)
+	}
+	if _, err := sess.Run(nil, next, nil); err == nil {
+		t.Errorf("Expected sess.Run() to fail since the iterator should have reached the end of the dataset")
+	}
+}
-- 
GitLab


From 71f22bbab05e25c5f026c4343664091cc117b5ab Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 29 Nov 2017 11:27:08 -0800
Subject: [PATCH 0416/1225] (Temporarily) call Graph._add_op outside of
 Operation.__init__ again.

This change partially undoes my previous commit
(https://github.com/tensorflow/tensorflow/commit/f4c18a0eb05e21bae397c9c16527ff8080cae6b8).
Without this change, if an op is added that has invalid input shapes
and also requires a kernel label, the op will be added to the graph
before shape inference is run, but then the shape inference error will
prevent the kernel label from being applied. The placer will then
complain about the missing label when the graph is run.

This is only a problem with the C API disabled. With the C API
enabled, shape inference is performed when the TF_Operation is created
in Operation.__init__. Thus we can and should move the _add_op call
back to Operation.__init__ once the _USE_C_API flag is removed.

PiperOrigin-RevId: 177338123
---
 .../copy_graph/python/util/copy_elements.py    |  1 +
 tensorflow/contrib/graph_editor/transform.py   |  3 +++
 tensorflow/python/framework/ops.py             |  5 ++---
 tensorflow/python/framework/ops_test.py        | 13 +++++++++++++
 tensorflow/python/framework/test_ops.cc        | 18 ++++++++++++++++++
 5 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/copy_graph/python/util/copy_elements.py b/tensorflow/contrib/copy_graph/python/util/copy_elements.py
index d060eda0a7..bae66ffd42 100644
--- a/tensorflow/contrib/copy_graph/python/util/copy_elements.py
+++ b/tensorflow/contrib/copy_graph/python/util/copy_elements.py
@@ -225,6 +225,7 @@ def copy_op_to_graph(org_instance, to_graph, variables,
                            new_original_op,
                            op_def)
     #Use Graph's hidden methods to add the op
+    to_graph._add_op(new_op)  # pylint: disable=protected-access
     to_graph._record_op_seen_by_control_dependencies(new_op)
     for device_function in reversed(to_graph._device_function_stack):
       new_op._set_device(device_function(new_op))
diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py
index 2a97a79070..14ac529665 100644
--- a/tensorflow/contrib/graph_editor/transform.py
+++ b/tensorflow/contrib/graph_editor/transform.py
@@ -173,6 +173,9 @@ def copy_op_handler(info, op, copy_shape=True):
   if op._original_op:
     op_._original_op = op._original_op
 
+  # Add op to the graph
+  info.graph_._add_op(op_)
+
   return op_, op_.outputs
 
 
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index cfef5e35f4..2217513966 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -1635,8 +1635,6 @@ class Operation(object):
     self._id_value = self._graph._next_id()  # pylint: disable=protected-access
     self._recompute_node_def()
 
-    self._graph._add_op(self)  # pylint: disable=protected-access
-
   def _reconstruct_sequence_inputs(self, op_def, inputs, attrs):
     """Regroups a flat list of input tensors into scalar and sequence inputs.
 
@@ -3100,7 +3098,6 @@ class Graph(object):
         input_types=input_types,
         original_op=self._default_original_op,
         op_def=op_def)
-
     self._create_op_helper(ret, compute_shapes=compute_shapes,
                            compute_device=compute_device)
     return ret
@@ -3139,6 +3136,8 @@ class Graph(object):
     # compute_shapes argument.
     if op._c_op or compute_shapes:  # pylint: disable=protected-access
       set_shapes_for_outputs(op)
+    # TODO(b/XXXX): move to Operation.__init__ once _USE_C_API flag is removed.
+    self._add_op(op)
 
     # Apply any additional attributes requested. Do not overwrite any existing
     # attributes.
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 3eae3b5a25..b1ad6ad744 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -274,6 +274,7 @@ class OperationTest(test_util.TensorFlowTestCase):
     op1 = ops.Operation(
         ops._NodeDef("RefOutputFloatOutput", "op1"), g, [],
         [dtypes.float32_ref, dtypes.float32])
+    g._add_op(op1)
     self.assertProtoEquals("op:'RefOutputFloatOutput' name:'op1'", op1.node_def)
     self.assertEquals([], list(op1.inputs))
     ref_t, nonref_t = op1.values()
@@ -282,12 +283,14 @@ class OperationTest(test_util.TensorFlowTestCase):
         ops._NodeDef("RefInputFloatInput", "op2"),
         g, [ref_t, nonref_t], [],
         input_types=[dtypes.float32_ref, dtypes.float32])
+    g._add_op(op2)
     self.assertProtoEquals(
         "op:'RefInputFloatInput' name:'op2' input:'op1' input:'op1:1'",
         op2.node_def)
     self.assertEquals([ref_t, nonref_t], list(op2.inputs))
     op3 = ops.Operation(
         ops._NodeDef("TwoFloatInputs", "op3"), g, [ref_t, nonref_t], [])
+    g._add_op(op3)
     self.assertProtoEquals(
         "op:'TwoFloatInputs' name:'op3' input:'op1' input:'op1:1'",
         op3.node_def)
@@ -1884,6 +1887,16 @@ class GraphTest(test_util.TensorFlowTestCase):
       with session.Session() as sess:
         sess.run(a)
 
+  def testRunnableAfterInvalidShapeWithKernelLabelMap(self):
+    g = ops.Graph()
+    with g.as_default():
+      with g._kernel_label_map({"KernelLabelRequired": "overload_1"}):
+        with self.assertRaises(ValueError):
+          test_ops.kernel_label_required(1)
+      a = constant_op.constant(1)
+      with session.Session() as sess:
+        sess.run(a)
+
 
 @test_util.with_c_api
 class AttrScopeTest(test_util.TensorFlowTestCase):
diff --git a/tensorflow/python/framework/test_ops.cc b/tensorflow/python/framework/test_ops.cc
index 25bb7af20c..dbabce0962 100644
--- a/tensorflow/python/framework/test_ops.cc
+++ b/tensorflow/python/framework/test_ops.cc
@@ -26,6 +26,16 @@ REGISTER_OP("KernelLabel")
     .Output("result: string")
     .SetShapeFn(shape_inference::ScalarShape);
 
+REGISTER_OP("KernelLabelRequired")
+    .Input("input: int32")
+    .Output("result: string")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      shape_inference::ShapeHandle out;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &out));
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    });
+
 REGISTER_OP("GraphDefVersion")
     .Output("version: int32")
     .SetIsStateful()
@@ -104,6 +114,14 @@ REGISTER_KERNEL_BUILDER(Name("KernelLabel")
                             .Label("overload_2"),
                         KernelLabelOp<OVERLOAD_2_LABEL>);
 
+// All "KernelLabelRequired" kernels have labels
+REGISTER_KERNEL_BUILDER(
+    Name("KernelLabelRequired").Device(DEVICE_CPU).Label("overload_1"),
+    KernelLabelOp<OVERLOAD_1_LABEL>);
+REGISTER_KERNEL_BUILDER(
+    Name("KernelLabelRequired").Device(DEVICE_CPU).Label("overload_2"),
+    KernelLabelOp<OVERLOAD_2_LABEL>);
+
 class GraphDefVersionOp : public OpKernel {
  public:
   explicit GraphDefVersionOp(OpKernelConstruction* ctx)
-- 
GitLab


From ad1310a87caa14c495ad7ab47db7572443b2e7ef Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Wed, 29 Nov 2017 11:36:36 -0800
Subject: [PATCH 0417/1225] Add RandomDataset which generates pseudo random
 number of type int64. Add tf.contrib.data.shuffle_and_repeat which reshuffles
 its input on each epoch. Going forward, this will replace
 reshuffle_each_iteration=true.

PiperOrigin-RevId: 177339570
---
 tensorflow/contrib/data/BUILD                 |   1 +
 tensorflow/contrib/data/__init__.py           |   1 +
 .../contrib/data/python/kernel_tests/BUILD    |   4 +-
 .../kernel_tests/shuffle_dataset_op_test.py   |  80 +++++++++
 tensorflow/contrib/data/python/ops/BUILD      |  32 ++++
 .../contrib/data/python/ops/random_ops.py     |  67 ++++++++
 .../contrib/data/python/ops/shuffle_ops.py    |  69 ++++++++
 .../base_api/api_def_RandomDataset.pbtxt      |  18 ++
 tensorflow/core/kernels/BUILD                 |  13 ++
 tensorflow/core/kernels/random_dataset_op.cc  | 154 ++++++++++++++++++
 tensorflow/core/ops/dataset_ops.cc            |  18 ++
 tensorflow/python/data/ops/dataset_ops.py     |  33 +++-
 12 files changed, 486 insertions(+), 4 deletions(-)
 create mode 100644 tensorflow/contrib/data/python/ops/random_ops.py
 create mode 100644 tensorflow/contrib/data/python/ops/shuffle_ops.py
 create mode 100644 tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt
 create mode 100644 tensorflow/core/kernels/random_dataset_op.cc

diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD
index f7d8a084d9..3b1c33063f 100644
--- a/tensorflow/contrib/data/BUILD
+++ b/tensorflow/contrib/data/BUILD
@@ -18,6 +18,7 @@ py_library(
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/contrib/data/python/ops:readers",
+        "//tensorflow/contrib/data/python/ops:shuffle_ops",
         "//tensorflow/contrib/data/python/ops:transformation_ops",
         "//tensorflow/python:util",
         "//tensorflow/python/data/ops:iterator_ops",
diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index 7c6244f22b..c9ad091bd4 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -66,6 +66,7 @@ from tensorflow.contrib.data.python.ops.readers import TextLineDataset
 from tensorflow.contrib.data.python.ops.readers import TFRecordDataset
 from tensorflow.contrib.data.python.ops.resampling import rejection_resample
 from tensorflow.contrib.data.python.ops.scan_ops import scan
+from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat
 from tensorflow.python.data.ops.iterator_ops import Iterator
 # pylint: enable=unused-import
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 4cb69d7c8e..43431ca2c5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -414,12 +414,14 @@ py_test(
 
 py_test(
     name = "shuffle_dataset_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["shuffle_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/contrib/data/python/ops:iterator_ops",
+        "//tensorflow/contrib/data/python/ops:shuffle_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
diff --git a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
index 6b5b53cc0f..ba1be0690f 100644
--- a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
@@ -22,8 +22,10 @@ import os
 
 import numpy as np
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import dataset_ops as contrib_dataset_ops
 from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.contrib.data.python.ops import shuffle_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
@@ -156,6 +158,13 @@ class ShuffleDatasetTest(test.TestCase):
     for i in range(5):
       self.assertEqual(10, counts[i])
 
+  def testSeedNoneSeed2NonNone(self):
+    with self.assertRaises(ValueError):
+      dataset_ops.ShuffleDataset(dataset_ops.Dataset.range(5),
+                                 buffer_size=1,
+                                 seed=None,
+                                 seed2=10)
+
 
 class ShuffleDatasetSerializationTest(test.TestCase):
 
@@ -474,5 +483,76 @@ class ShuffleDatasetSerializationTest(test.TestCase):
       self.assertEqual(expected_outputs_sorted, sorted(actual))
 
 
+class ShuffleAndRepeatTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_ds(self, seed, count=5):
+    return dataset_ops.Dataset.range(20).apply(
+        shuffle_ops.shuffle_and_repeat(buffer_size=5, count=count, seed=seed))
+
+  def testCorrectOutput(self):
+    output = self.gen_outputs(lambda: self._build_ds(10), [], 100)
+    self.assertSequenceEqual(
+        sorted(output), sorted(
+            np.array([range(20) for _ in range(5)]).flatten()))
+    for i in range(5):
+      self.assertSequenceEqual(sorted(output[i * 20:(i + 1) * 20]), range(20))
+
+  def testReshuffling(self):
+    # Check that the output orders of different epochs are indeed different.
+    output = self.gen_outputs(lambda: self._build_ds(10), [], 100)
+    for i in range(4):
+      epoch1 = output[i * 20:(i + 1) * 20]
+      epoch2 = output[(i + 1) * 20:(i + 2) * 20]
+      self.assertNotEqual(epoch1, epoch2)
+
+  def testSameOrderForSameSeeds(self):
+    output1 = self.gen_outputs(lambda: self._build_ds(10), [], 100)
+    output2 = self.gen_outputs(lambda: self._build_ds(10), [], 100)
+    self.assertEqual(output1, output2)
+
+  def testDifferentOrderForDifferentSeeds(self):
+    output1 = self.gen_outputs(lambda: self._build_ds(10), [], 100)
+    output2 = self.gen_outputs(lambda: self._build_ds(20), [], 100)
+    self.assertNotEqual(output1, output2)
+    self.assertEqual(sorted(output1), sorted(output2))
+
+  def testCountNone(self):
+    output1 = self.gen_outputs(
+        lambda: self._build_ds(10, count=None), [], 100, verify_exhausted=False)
+    output2 = self.gen_outputs(
+        lambda: self._build_ds(20, count=None), [], 100, verify_exhausted=False)
+    self.assertNotEqual(output1, output2)
+    self.assertEqual(sorted(output1), sorted(output2))
+
+  def testCountMinusOne(self):
+    output1 = self.gen_outputs(
+        lambda: self._build_ds(10, count=-1), [], 100, verify_exhausted=False)
+    output2 = self.gen_outputs(
+        lambda: self._build_ds(20, count=-1), [], 100, verify_exhausted=False)
+    self.assertNotEqual(output1, output2)
+    self.assertEqual(sorted(output1), sorted(output2))
+
+  def testInfiniteOutputs(self):
+    # Asserting that the iterator is exhausted after producing 100 items should
+    # fail.
+    with self.assertRaises(AssertionError):
+      self.gen_outputs(lambda: self._build_ds(10, count=None), [], 100)
+    with self.assertRaises(AssertionError):
+      self.gen_outputs(lambda: self._build_ds(10, count=-1), [], 100)
+
+
+class ShuffleAndRepeatSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_ds(self, seed):
+    return dataset_ops.Dataset.range(20).apply(
+        shuffle_ops.shuffle_and_repeat(buffer_size=5, count=5, seed=seed))
+
+  def testCore(self):
+    self.run_core_tests(lambda: self._build_ds(10), lambda: self._build_ds(20),
+                        100)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index 25ed58cdf5..1f35ee056b 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -40,6 +40,25 @@ py_library(
     ],
 )
 
+py_library(
+    name = "random_ops",
+    srcs = [
+        "random_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:random_seed",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
 py_library(
     name = "readers",
     srcs = [
@@ -62,6 +81,19 @@ py_library(
     ],
 )
 
+py_library(
+    name = "shuffle_ops",
+    srcs = [
+        "shuffle_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":random_ops",
+        ":transformation_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
 py_library(
     name = "transformation_ops",
     srcs = [
diff --git a/tensorflow/contrib/data/python/ops/random_ops.py b/tensorflow/contrib/data/python/ops/random_ops.py
new file mode 100644
index 0000000000..7d727165fe
--- /dev/null
+++ b/tensorflow/contrib/data/python/ops/random_ops.py
@@ -0,0 +1,67 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Datasets for random number generators."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import gen_dataset_ops
+
+
+class RandomDataset(dataset_ops.Dataset):
+  """A `Dataset` of pseudorandom values."""
+
+  def __init__(self, seed=None):
+    """A `Dataset` of pseudorandom values."""
+    super(RandomDataset, self).__init__()
+    seed, seed2 = random_seed.get_seed(seed)
+    if seed is None:
+      self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed")
+    else:
+      self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed")
+    if seed2 is None:
+      self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2")
+    else:
+      self._seed2 = ops.convert_to_tensor(
+          seed2, dtype=dtypes.int64, name="seed2")
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.random_dataset(
+        seed=self._seed,
+        seed2=self._seed2,
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
+        output_types=nest.flatten(
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return ops.Tensor
+
+  @property
+  def output_shapes(self):
+    return tensor_shape.scalar()
+
+  @property
+  def output_types(self):
+    return dtypes.int64
diff --git a/tensorflow/contrib/data/python/ops/shuffle_ops.py b/tensorflow/contrib/data/python/ops/shuffle_ops.py
new file mode 100644
index 0000000000..460732d65e
--- /dev/null
+++ b/tensorflow/contrib/data/python/ops/shuffle_ops.py
@@ -0,0 +1,69 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental shuffle ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.data.python.ops import batching
+from tensorflow.contrib.data.python.ops import random_ops
+from tensorflow.python.data.ops import dataset_ops
+
+
+def shuffle_and_repeat(buffer_size, count=None, seed=None):
+  """Shuffles and repeats a Dataset returning a new permutation for each epoch.
+
+  `dataset.apply(tf.contrib.data.shuffle_and_repeat(buffer_size, count))`
+
+  is equivalent to
+
+  `dataset.shuffle(buffer_size, reshuffle_each_iteration=True).repeat(count)`
+
+  The difference is that the latter dataset is not serializable. So,
+  if you need to checkpoint an input pipeline with reshuffling you must use
+  this implementation.
+
+  Args:
+    buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the
+      maximum number elements that will be buffered when prefetching.
+    count: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+      number of times the dataset should be repeated. The default behavior
+      (if `count` is `None` or `-1`) is for the dataset be repeated
+      indefinitely.
+    seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+      random seed that will be used to create the distribution. See
+      @{tf.set_random_seed} for behavior.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    @{tf.contrib.data.Dataset.apply}.
+  """
+  def _apply_fn(dataset):  # pylint: disable=missing-docstring
+    random_ds = random_ops.RandomDataset(seed).apply(
+        batching.batch_and_drop_remainder(2))
+    if count is not None and count is not -1:
+      random_ds = random_ds.take(count)
+
+    def map_fn(seeds):
+      return dataset_ops.ShuffleDataset(
+          input_dataset=dataset,
+          buffer_size=buffer_size,
+          seed=seeds[0],
+          reshuffle_each_iteration=False,
+          seed2=seeds[1])
+
+    return random_ds.flat_map(map_fn)
+
+  return _apply_fn
diff --git a/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt
new file mode 100644
index 0000000000..0466b40f85
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt
@@ -0,0 +1,18 @@
+op {
+  graph_op_name: "RandomDataset"
+  in_arg {
+    name: "seed"
+    description: <<END
+A scalar seed for the random number generator. If either seed or
+seed2 is set to be non-zero, the random number generator is seeded
+by the given seed.  Otherwise, a random seed is used.
+END
+  }
+  in_arg {
+    name: "seed2"
+    description: <<END
+A second scalar seed to avoid seed collision.
+END
+  }
+  summary: "Creates a Dataset that returns pseudorandom numbers."
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index fd36e6ca1f..9279514e6b 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -6123,6 +6123,18 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "random_dataset_op",
+    srcs = ["random_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
 tf_kernel_library(
     name = "range_dataset_op",
     srcs = ["range_dataset_op.cc"],
@@ -6290,6 +6302,7 @@ tf_kernel_library(
         ":parallel_interleave_dataset_op",
         ":parallel_map_dataset_op",
         ":prefetch_dataset_op",
+        ":random_dataset_op",
         ":range_dataset_op",
         ":reader_dataset_ops",
         ":repeat_dataset_op",
diff --git a/tensorflow/core/kernels/random_dataset_op.cc b/tensorflow/core/kernels/random_dataset_op.cc
new file mode 100644
index 0000000000..03d481a593
--- /dev/null
+++ b/tensorflow/core/kernels/random_dataset_op.cc
@@ -0,0 +1,154 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/lib/random/philox_random.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/random/random_distributions.h"
+
+namespace tensorflow {
+
+namespace {
+
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
+
+class RandomDatasetOp : public DatasetOpKernel {
+ public:
+  explicit RandomDatasetOp(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {}
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override {
+    int64 seed;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed", &seed));
+
+    int64 seed2;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed2", &seed2));
+
+    // By TensorFlow convention, passing 0 for both seeds indicates
+    // that the shuffling should be seeded non-deterministically.
+    if (seed == 0 && seed2 == 0) {
+      seed = random::New64();
+      seed2 = random::New64();
+    }
+
+    *output = new Dataset(ctx, seed, seed2);
+  }
+
+ private:
+  class Dataset : public GraphDatasetBase {
+   public:
+    Dataset(OpKernelContext* ctx, int64 seed, int64 seed2)
+        : GraphDatasetBase(ctx), seed_(seed), seed2_(seed2) {}
+
+    std::unique_ptr<IteratorBase> MakeIterator(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::Random")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      static DataTypeVector* dtypes = new DataTypeVector({DT_INT64});
+      return *dtypes;
+    }
+
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      static std::vector<PartialTensorShape>* shapes =
+          new std::vector<PartialTensorShape>({{}});
+      return *shapes;
+    }
+
+    string DebugString() override {
+      return strings::StrCat("RandomDatasetOp(", seed_, ", ", seed2_,
+                             ")::Dataset");
+    }
+
+   protected:
+    Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* seed = nullptr;
+      Node* seed2 = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(seed_, &seed));
+      TF_RETURN_IF_ERROR(b->AddScalar(seed2_, &seed2));
+      TF_RETURN_IF_ERROR(b->AddDataset(this, {seed, seed2}, output));
+      return Status::OK();
+    }
+
+   private:
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params),
+            parent_generator_(dataset()->seed_, dataset()->seed2_),
+            generator_(&parent_generator_) {}
+
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        mutex_lock l(mu_);
+        Tensor value_tensor(cpu_allocator(), DT_INT64, {});
+        value_tensor.scalar<int64>()() = Random();
+        out_tensors->emplace_back(std::move(value_tensor));
+        *end_of_sequence = false;
+        return Status::OK();
+      }
+
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("num_random_samples"),
+                                               num_random_samples_));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(OpKernelContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("num_random_samples"),
+                                              &num_random_samples_));
+        parent_generator_ =
+            random::PhiloxRandom(dataset()->seed_, dataset()->seed2_);
+        generator_ = random::SingleSampleAdapter<random::PhiloxRandom>(
+            &parent_generator_);
+        generator_.Skip(num_random_samples_);
+        return Status::OK();
+      }
+
+     private:
+      random::SingleSampleAdapter<random::PhiloxRandom>::ResultType Random()
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        num_random_samples_++;
+        auto out = generator_();
+        return out;
+      }
+      mutex mu_;
+      random::PhiloxRandom parent_generator_ GUARDED_BY(mu_);
+      random::SingleSampleAdapter<random::PhiloxRandom> generator_
+          GUARDED_BY(mu_);
+      int64 num_random_samples_ GUARDED_BY(mu_) = 0;
+    };
+
+    const int64 seed_;
+    const int64 seed2_;
+  };
+};
+
+REGISTER_KERNEL_BUILDER(Name("RandomDataset").Device(DEVICE_CPU),
+                        RandomDatasetOp);
+
+}  // namespace
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 6bf226e7a5..be41531347 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -469,6 +469,24 @@ stop: corresponds to stop in python's xrange().
 step: corresponds to step in python's xrange().
 )doc");
 
+REGISTER_OP("RandomDataset")
+    .Input("seed: int64")
+    .Input("seed2: int64")
+    .Output("handle: variant")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
+                      // stateful to inhibit constant folding.
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a Dataset that returns pseudorandom numbers.
+
+seed: A scalar seed for the random number generator. If either seed or
+  seed2 is set to be non-zero, the random number generator is seeded
+  by the given seed.  Otherwise, a random seed is used.
+seed2: A second scalar seed to avoid seed collision.
+)doc");
+
 REGISTER_OP("ShuffleDataset")
     .Input("input_dataset: variant")
     .Input("buffer_size: int64")
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index b5a8622306..927c6d5c02 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1234,13 +1234,40 @@ class ShuffleDataset(Dataset):
                input_dataset,
                buffer_size,
                seed=None,
-               reshuffle_each_iteration=None):
-    """See `Dataset.shuffle()` for details."""
+               reshuffle_each_iteration=None,
+               seed2=None):
+    """Randomly shuffles the elements of this dataset.
+
+    Args:
+      input_dataset: The input dataset.
+      buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the
+        number of elements from this dataset from which the new
+        dataset will sample.
+      seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+        random seed that will be used to create the distribution. See
+        @{tf.set_random_seed} for behavior.
+      reshuffle_each_iteration: (Optional.) A boolean, which if true indicates
+        that the dataset should be pseudorandomly reshuffled each time it is
+        iterated over. (Defaults to `True`.)
+      seed2: (Optional.) A `tf.int64` scalar `tf.Tensor` used to avoid seed
+        collision. Users should generally not need to specify this. This is
+        supposed to be used when both the seeds for the Dataset op need to be
+        manually specified. If not None, seed must also be non-None.
+
+    Returns:
+      A `Dataset`.
+
+    Raises:
+      ValueError: if invalid arguments are provided.
+    """
     super(ShuffleDataset, self).__init__()
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
-    seed, seed2 = random_seed.get_seed(seed)
+    if seed2 is None:
+      seed, seed2 = random_seed.get_seed(seed)
+    elif seed is None:
+      raise ValueError("seed must be non-None if seed2 is non-None.")
     if seed is None:
       self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed")
     else:
-- 
GitLab


From c0bd9dffccb29d4c01a2a18fc23b0ecad41aa4c6 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 29 Nov 2017 11:59:47 -0800
Subject: [PATCH 0418/1225] [tf.data] Fix compiler warnings about unused
 captures in lambda expressions.

PiperOrigin-RevId: 177343020
---
 tensorflow/core/kernels/dataset_utils.cc              | 2 +-
 tensorflow/core/kernels/filter_dataset_op.cc          | 2 +-
 tensorflow/core/kernels/group_by_window_dataset_op.cc | 6 +++---
 tensorflow/core/kernels/map_and_batch_dataset_op.cc   | 4 ++--
 tensorflow/core/kernels/map_dataset_op.cc             | 2 +-
 tensorflow/core/kernels/parallel_map_dataset_op.cc    | 4 ++--
 tensorflow/core/kernels/scan_dataset_op.cc            | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/core/kernels/dataset_utils.cc b/tensorflow/core/kernels/dataset_utils.cc
index cd58c80912..bd20e20cad 100644
--- a/tensorflow/core/kernels/dataset_utils.cc
+++ b/tensorflow/core/kernels/dataset_utils.cc
@@ -32,7 +32,7 @@ Status MakeIteratorFromInputElement(
   // is always 0, so a negative random step ID should suffice.
   opts.step_id = CapturedFunction::generate_step_id();
   ScopedStepContainer step_container(
-      opts.step_id, [captured_func, ctx](const string& name) {
+      opts.step_id, [captured_func](const string& name) {
         captured_func->resource_manager()->Cleanup(name).IgnoreError();
       });
   opts.step_container = &step_container;
diff --git a/tensorflow/core/kernels/filter_dataset_op.cc b/tensorflow/core/kernels/filter_dataset_op.cc
index 0ac6cd9a98..67417d467d 100644
--- a/tensorflow/core/kernels/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/filter_dataset_op.cc
@@ -149,7 +149,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
           FunctionLibraryRuntime::Options opts;
           opts.step_id = CapturedFunction::generate_step_id();
           ScopedStepContainer step_container(
-              opts.step_id, [this, ctx](const string& name) {
+              opts.step_id, [this](const string& name) {
                 dataset()
                     ->captured_func_->resource_manager()
                     ->Cleanup(name)
diff --git a/tensorflow/core/kernels/group_by_window_dataset_op.cc b/tensorflow/core/kernels/group_by_window_dataset_op.cc
index 8644bcf9b5..604555a560 100644
--- a/tensorflow/core/kernels/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/group_by_window_dataset_op.cc
@@ -169,7 +169,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
               opts.step_id = CapturedFunction::generate_step_id();
               opts.runner = ctx->runner();
               ScopedStepContainer step_container(
-                  opts.step_id, [this, ctx](const string& name) {
+                  opts.step_id, [this](const string& name) {
                     dataset()
                         ->captured_key_func_->resource_manager()
                         ->Cleanup(name)
@@ -198,7 +198,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
                 opts2.step_id = CapturedFunction::generate_step_id();
                 opts2.runner = ctx->runner();
                 ScopedStepContainer step_container2(
-                    opts2.step_id, [this, ctx](const string& name) {
+                    opts2.step_id, [this](const string& name) {
                       dataset()
                           ->captured_window_size_func_->resource_manager()
                           ->Cleanup(name)
@@ -257,7 +257,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
         opts.step_id = CapturedFunction::generate_step_id();
         opts.runner = ctx->runner();
         ScopedStepContainer step_container(
-            opts.step_id, [this, ctx](const string& name) {
+            opts.step_id, [this](const string& name) {
               dataset()
                   ->captured_reduce_func_->resource_manager()
                   ->Cleanup(name)
diff --git a/tensorflow/core/kernels/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/map_and_batch_dataset_op.cc
index ad1e356dbd..9bd66e681f 100644
--- a/tensorflow/core/kernels/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/map_and_batch_dataset_op.cc
@@ -239,8 +239,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         // to unblock a consumer.
         FunctionLibraryRuntime::Options opts;
         opts.step_id = CapturedFunction::generate_step_id();
-        ScopedStepContainer* step_container = new ScopedStepContainer(
-            opts.step_id, [this, ctx](const string& name) {
+        ScopedStepContainer* step_container =
+            new ScopedStepContainer(opts.step_id, [this](const string& name) {
               dataset()
                   ->captured_func_->resource_manager()
                   ->Cleanup(name)
diff --git a/tensorflow/core/kernels/map_dataset_op.cc b/tensorflow/core/kernels/map_dataset_op.cc
index 23148f122d..29899a987e 100644
--- a/tensorflow/core/kernels/map_dataset_op.cc
+++ b/tensorflow/core/kernels/map_dataset_op.cc
@@ -146,7 +146,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
         FunctionLibraryRuntime::Options opts;
         opts.step_id = CapturedFunction::generate_step_id();
         ScopedStepContainer step_container(
-            opts.step_id, [this, ctx](const string& name) {
+            opts.step_id, [this](const string& name) {
               dataset()
                   ->captured_func_->resource_manager()
                   ->Cleanup(name)
diff --git a/tensorflow/core/kernels/parallel_map_dataset_op.cc b/tensorflow/core/kernels/parallel_map_dataset_op.cc
index 2be87f4bde..b9175fe904 100644
--- a/tensorflow/core/kernels/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/parallel_map_dataset_op.cc
@@ -195,8 +195,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
 
           FunctionLibraryRuntime::Options opts;
           opts.step_id = CapturedFunction::generate_step_id();
-          ScopedStepContainer* step_container = new ScopedStepContainer(
-              opts.step_id, [this, ctx](const string& name) {
+          ScopedStepContainer* step_container =
+              new ScopedStepContainer(opts.step_id, [this](const string& name) {
                 dataset()
                     ->captured_func_->resource_manager()
                     ->Cleanup(name)
diff --git a/tensorflow/core/kernels/scan_dataset_op.cc b/tensorflow/core/kernels/scan_dataset_op.cc
index 76c219f1ae..bc52322022 100644
--- a/tensorflow/core/kernels/scan_dataset_op.cc
+++ b/tensorflow/core/kernels/scan_dataset_op.cc
@@ -132,7 +132,7 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
         FunctionLibraryRuntime::Options opts;
         opts.step_id = CapturedFunction::generate_step_id();
         ScopedStepContainer step_container(
-            opts.step_id, [this, ctx](const string& name) {
+            opts.step_id, [this](const string& name) {
               dataset()
                   ->captured_func_->resource_manager()
                   ->Cleanup(name)
-- 
GitLab


From dcf9b035a09904322020d87a9324f04bcaf89eec Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 29 Nov 2017 12:06:55 -0800
Subject: [PATCH 0419/1225] Made sure the unknown shapes of placeholders always
 propagate to their fanouts

PiperOrigin-RevId: 177344207
---
 .../core/grappler/costs/graph_properties.cc   | 12 ++++---
 .../grappler/costs/graph_properties_test.cc   | 31 ++++++++++++-------
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index fb7e20fca0..fbc52e9bd1 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -313,15 +313,17 @@ class SymbolicShapeRefiner {
   Status UpdateNode(const Node* node, bool relax, bool* refined) {
     return shape_refiner_->UpdateNode(node, relax, refined);
   }
-  Status SetShape(const Node* node, int output_port,
-                  shape_inference::ShapeHandle shape) {
-    return shape_refiner_->SetShape(node, output_port, shape);
-  }
   Status SetUnknownShape(const Node* node, int output_port) {
     shape_inference::ShapeHandle shape =
         GetUnknownOutputShape(node, output_port);
-    return shape_refiner_->SetShape(node, output_port, shape);
+    InferenceContext* ctx = GetContext(node);
+    if (ctx == nullptr) {
+      return errors::InvalidArgument("Missing context");
+    }
+    ctx->set_output(output_port, shape);
+    return Status::OK();
   }
+
   struct ShapeId {
     const Node* node;
     int port_id;
diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index ad8e768f1f..cc40ff2cfc 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -856,7 +856,6 @@ TEST_F(GraphPropertiesTest, FedNodes) {
                                           cluster_->GetDeviceNames());
   GrapplerItem item;
   CHECK(fake_input.NextItem(&item));
-  item.feed.emplace_back("AddN", Tensor());
 
   {
     // Conservative shape analysis: the shape of fed ports should be unknown
@@ -864,17 +863,27 @@ TEST_F(GraphPropertiesTest, FedNodes) {
     Status s = properties.InferStatically(false);
     TF_CHECK_OK(s);
     for (const auto& node : item.graph.node()) {
-      if (node.name() == "AddN") {
-        const auto in_props = properties.GetInputProperties(node.name());
-        EXPECT_EQ(1, in_props.size());
-        const OpInfo::TensorProperties& in_prop = in_props[0];
-        EXPECT_EQ(DT_FLOAT, in_prop.dtype());
+      if (node.op() == "Const") {
+        continue;
+      }
+      const auto in_props = properties.GetInputProperties(node.name());
+      EXPECT_EQ(1, in_props.size());
+      const OpInfo::TensorProperties& in_prop = in_props[0];
+      const auto out_props = properties.GetOutputProperties(node.name());
+      EXPECT_EQ(1, out_props.size());
+      const OpInfo::TensorProperties& out_prop = out_props[0];
+
+      if (node.name() == "x") {
+        // x is fed: its input should have a known shape, while its output
+        // doesn't
         EXPECT_FALSE(in_prop.shape().unknown_rank());
-        EXPECT_EQ(2, in_prop.shape().dim_size());
-        const auto out_props = properties.GetOutputProperties(node.name());
-        EXPECT_EQ(1, out_props.size());
-        EXPECT_EQ(DT_FLOAT, in_prop.dtype());
+        EXPECT_EQ(1, in_prop.shape().dim_size());
+        EXPECT_EQ(2, in_prop.shape().dim(0).size());
+        EXPECT_TRUE(out_prop.shape().unknown_rank());
+      } else if (node.op() == "Square" || node.op() == "AddN") {
+        // These nodes are in the fanout of x: their shapes should be unknown.
         EXPECT_TRUE(in_prop.shape().unknown_rank());
+        EXPECT_TRUE(out_prop.shape().unknown_rank());
       }
     }
   }
@@ -885,7 +894,7 @@ TEST_F(GraphPropertiesTest, FedNodes) {
     Status s = properties.InferStatically(true);
     TF_CHECK_OK(s);
     for (const auto& node : item.graph.node()) {
-      if (node.name() == "AddN") {
+      if (node.op() == "Square" || node.op() == "AddN") {
         const auto in_props = properties.GetInputProperties(node.name());
         EXPECT_EQ(1, in_props.size());
         const OpInfo::TensorProperties& in_prop = in_props[0];
-- 
GitLab


From 97da160010a47ba37afa1afca914038d3ab0ba55 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 29 Nov 2017 12:31:41 -0800
Subject: [PATCH 0420/1225] Allow the toolchain defaults to be used instead of
 hard-coding -Os.

For example toolchains with clang may set -Oz which is more analogous to gcc's
-Os. -Os for clang is closer to -O2.

PiperOrigin-RevId: 177347371
---
 .../contrib/android/cmake/CMakeLists.txt      |  2 ++
 tensorflow/core/BUILD                         | 10 ++++-----
 tensorflow/tensorflow.bzl                     | 22 ++++++++++++-------
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt
index aba356d616..a115d1610e 100644
--- a/tensorflow/contrib/android/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/android/cmake/CMakeLists.txt
@@ -34,6 +34,8 @@ add_library(lib_tf STATIC IMPORTED )
 set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION
         ${PREBUILT_DIR}/lib/libtensorflow-core.a)
 # Change to compile flags should be replicated into bazel build file
+# TODO: Consider options other than -O2 for binary size.
+#       e.g. -Os for gcc, and -Oz for clang.
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \
                      -std=c++11 -fno-rtti -fno-exceptions \
                      -O2 -Wno-narrowing -fomit-frame-pointer \
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 4ca6fb1631..a1d61a7932 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1008,7 +1008,7 @@ filegroup(
 cc_library(
     name = "android_tensorflow_lib_lite",
     srcs = if_android(["//tensorflow/core:android_srcs"]),
-    copts = tf_copts() + if_not_android_mips_and_mips64(["-Os"]),
+    copts = tf_copts(android_optimization_level_override = None),
     linkopts = ["-lz"],
     tags = [
         "manual",
@@ -1096,8 +1096,7 @@ cc_library(
 cc_library(
     name = "android_tensorflow_lib_selective_registration",
     srcs = if_android(["//tensorflow/core:android_srcs"]),
-    copts = tf_copts() + [
-        "-Os",
+    copts = tf_copts(android_optimization_level_override = None) + [
         "-DSUPPORT_SELECTIVE_REGISTRATION",
     ],
     tags = [
@@ -1118,8 +1117,7 @@ cc_library(
 cc_library(
     name = "android_tensorflow_lib_selective_registration_nortti",
     srcs = if_android(["//tensorflow/core:android_srcs"]),
-    copts = tf_copts() + tf_opts_nortti_if_android() + [
-        "-Os",
+    copts = tf_copts(android_optimization_level_override = None) + tf_opts_nortti_if_android() + [
         "-DSUPPORT_SELECTIVE_REGISTRATION",
     ],
     tags = [
@@ -1198,7 +1196,7 @@ cc_library(
         "framework/tensor_testutil.h",
         "util/reporter.h",
     ],
-    copts = tf_copts() + ["-Os"],
+    copts = tf_copts(android_optimization_level_override = None),
     tags = [
         "manual",
         "notap",
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 8d392fb36d..76ef59484f 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -167,7 +167,19 @@ WIN_COPTS = [
 ]
 
 # LINT.IfChange
-def tf_copts():
+def tf_copts(android_optimization_level_override="-O2"):
+  # For compatibility reasons, android_optimization_level_override
+  # is currently only being set for Android.
+  # To clear this value, and allow the CROSSTOOL default
+  # to be used, pass android_optimization_level_override=None
+  android_copts = [
+      "-std=c++11",
+      "-DTF_LEAN_BINARY",
+      "-Wno-narrowing",
+      "-fomit-frame-pointer",
+  ]
+  if android_optimization_level_override:
+    android_copts.append(android_optimization_level_override)
   return (
       if_not_windows([
           "-DEIGEN_AVOID_STL_ARRAY",
@@ -180,13 +192,7 @@ def tf_copts():
       + if_android_arm(["-mfpu=neon"])
       + if_linux_x86_64(["-msse3"])
       + select({
-            clean_dep("//tensorflow:android"): [
-                "-std=c++11",
-                "-DTF_LEAN_BINARY",
-                "-O2",
-                "-Wno-narrowing",
-                "-fomit-frame-pointer",
-            ],
+            clean_dep("//tensorflow:android"): android_copts,
             clean_dep("//tensorflow:darwin"): [],
             clean_dep("//tensorflow:windows"): WIN_COPTS,
             clean_dep("//tensorflow:windows_msvc"): WIN_COPTS,
-- 
GitLab


From 3bf2f35c7131a0cbea3b05e7aababc23a1cf2d8f Mon Sep 17 00:00:00 2001
From: hyunyoung2 <le32146@gmail.com>
Date: Thu, 30 Nov 2017 05:42:43 +0900
Subject: [PATCH 0421/1225] Update datasets.md (#14941)

* Update datasets.md

specifically I change Iterator into tf.data.Iterator

* Update datasets.md

I fixed the indentation according to your requesting.
---
 tensorflow/docs_src/programmers_guide/datasets.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md
index f458cbcef2..9ced56f0f5 100644
--- a/tensorflow/docs_src/programmers_guide/datasets.md
+++ b/tensorflow/docs_src/programmers_guide/datasets.md
@@ -190,8 +190,8 @@ validation_dataset = tf.data.Dataset.range(50)
 # A reinitializable iterator is defined by its structure. We could use the
 # `output_types` and `output_shapes` properties of either `training_dataset`
 # or `validation_dataset` here, because they are compatible.
-iterator = Iterator.from_structure(training_dataset.output_types,
-                                   training_dataset.output_shapes)
+iterator = tf.data.Iterator.from_structure(training_dataset.output_types,
+                                           training_dataset.output_shapes)
 next_element = iterator.get_next()
 
 training_init_op = iterator.make_initializer(training_dataset)
-- 
GitLab


From 037acadcfc5f2b96a2e9c1653f28131bb91858aa Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 29 Nov 2017 12:56:52 -0800
Subject: [PATCH 0422/1225] Deleted unused method arguments

PiperOrigin-RevId: 177350575
---
 .../grappler/optimizers/constant_folding.cc   | 25 +++----------------
 .../grappler/optimizers/constant_folding.h    |  6 ++---
 2 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 33a9dddba7..03eaa4a84a 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -190,8 +190,7 @@ Status ConvertShapeToConstant(const string& op, const DataType& type,
   return Status::OK();
 }
 
-Status ConstantFolding::MaterializeShapes(const GrapplerItem& item,
-                                          const GraphProperties& properties) {
+Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
   // We may add some nodes to the graph to encode control dependencies: there is
   // no need to process these, so only iterate over the nodes of the input
   // graph.
@@ -285,22 +284,6 @@ Status ConstantFolding::MaterializeShapes(const GrapplerItem& item,
   return Status::OK();
 }
 
-bool ShapesEqual(const TensorShapeProto& shape1,
-                 const TensorShapeProto& shape2) {
-  if (shape1.unknown_rank() || shape2.unknown_rank()) {
-    return false;
-  }
-  if (shape1.dim_size() != shape2.dim_size()) {
-    return false;
-  }
-  for (int i = 0; i < shape1.dim_size(); ++i) {
-    if (shape1.dim(i).size() != shape2.dim(i).size()) {
-      return false;
-    }
-  }
-  return true;
-}
-
 namespace {
 bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties,
                   BCast::Vec* shape, int64* min_id) {
@@ -504,7 +487,7 @@ Status ConstantFolding::MaterializeReductionIndices(
 }
 
 Status ConstantFolding::MaterializeConstants(
-    const GrapplerItem& item, const GraphProperties& properties) {
+    const GraphProperties& properties) {
   const int node_count = graph_.node_size();
   for (int i = 0; i < node_count; ++i) {
     NodeDef& node = *graph_.mutable_node(i);
@@ -1171,10 +1154,10 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
     // graph. That's because it's possible to feed a placeholder with a tensor
     // of any shape, which could make the static information inconsistent with
     // the shapes actually fed.
-    TF_RETURN_IF_ERROR(MaterializeShapes(item, properties));
+    TF_RETURN_IF_ERROR(MaterializeShapes(properties));
   }
   if (opt_level_ == RewriterConfig::AGGRESSIVE && s.ok()) {
-    TF_RETURN_IF_ERROR(MaterializeConstants(item, properties));
+    TF_RETURN_IF_ERROR(MaterializeConstants(properties));
   }
 
   TF_RETURN_IF_ERROR(FoldGraph(output));
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index f04f413c10..7c5db2a70f 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h
@@ -51,16 +51,14 @@ class ConstantFolding : public GraphOptimizer {
                 const GraphDef& optimize_output, double result) override;
 
  private:
-  Status MaterializeShapes(const GrapplerItem& item,
-                           const GraphProperties& properties);
+  Status MaterializeShapes(const GraphProperties& properties);
 
   Status MaterializeBroadcastGradientArgs(const NodeDef& node,
                                           const GraphProperties& properties);
   Status MaterializeReductionIndices(NodeDef* node,
                                      const GraphProperties& properties);
 
-  Status MaterializeConstants(const GrapplerItem& item,
-                              const GraphProperties& properties);
+  Status MaterializeConstants(const GraphProperties& properties);
   bool IsFoldable(const NodeDef& node) const;
 
   Status EvaluateNode(const NodeDef& node,
-- 
GitLab


From d1aea3be42f6153a970319a298eb55372ab9aa2e Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Wed, 29 Nov 2017 13:02:30 -0800
Subject: [PATCH 0423/1225] Clarify the role of replicate_model_fn.Mode better.

PiperOrigin-RevId: 177351409
---
 .../estimator/python/estimator/replicate_model_fn.py        | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
index 6f7f37473f..f5154231da 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
@@ -47,7 +47,11 @@ from tensorflow.python.training import training_util
 
 
 class Mode(object):
-  """Modes for variables replication used for forcing a particular mode."""
+  """Modes for variables replication used for forcing a particular mode.
+
+  Forcing a mode is meant for performance experimentation purposes rather than
+  for general use cases.
+  """
 
   AUTO = 0
   """Use internal heuristics for choosing the best Mode value.
-- 
GitLab


From ec4d31e82c2237824276eef302d9edc38c28c3a2 Mon Sep 17 00:00:00 2001
From: Charles Shenton <cshenton@users.noreply.github.com>
Date: Thu, 30 Nov 2017 08:15:38 +1100
Subject: [PATCH 0424/1225] Half Normal Distribution (and inverse error
 function) (#14056)

* foldednormal docstring

* folded __init__ method

* prob, log_prob methods

* rewrote halfnormal docstring

* initial implementation of dist methods

* halfnormal unit tests

* registered HalfNormal to contrib.distributions

* added erfinv function

* unit tests for erfinv

* registered erfinv symbol

* cdf, pdf now deal with x < 0 correctly

* pylint fixes

* cuda_py test reference in BUILD

* erfinv fixes

* corrections to scipy reference tests

* Added reference to entropy test case.
---
 tensorflow/contrib/distributions/BUILD        |  18 +
 tensorflow/contrib/distributions/__init__.py  |   2 +
 .../python/kernel_tests/half_normal_test.py   | 320 ++++++++++++++++++
 .../distributions/python/ops/half_normal.py   | 170 ++++++++++
 .../distributions/special_math_test.py        |  26 ++
 .../python/ops/distributions/special_math.py  |  24 ++
 6 files changed, 560 insertions(+)
 create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py
 create mode 100644 tensorflow/contrib/distributions/python/ops/half_normal.py

diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 145b9495ff..b2c641f8ab 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -204,6 +204,24 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "half_normal_test",
+    size = "medium",
+    srcs = ["python/kernel_tests/half_normal_test.py"],
+    additional_deps = [
+        ":distributions_py",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:nn_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
+    ],
+)
+
 cuda_py_test(
     name = "inverse_gamma_test",
     srcs = ["python/kernel_tests/inverse_gamma_test.py"],
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 0d12d83893..66827179e9 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -36,6 +36,7 @@ from tensorflow.contrib.distributions.python.ops.distribution_util import softpl
 from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag
 from tensorflow.contrib.distributions.python.ops.estimator import *
 from tensorflow.contrib.distributions.python.ops.geometric import *
+from tensorflow.contrib.distributions.python.ops.half_normal import *
 from tensorflow.contrib.distributions.python.ops.independent import *
 from tensorflow.contrib.distributions.python.ops.inverse_gamma import *
 from tensorflow.contrib.distributions.python.ops.logistic import *
@@ -107,6 +108,7 @@ _allowed_symbols = [
     'Gamma',
     'GammaWithSoftplusConcentrationRate',
     'Geometric',
+    'HalfNormal',
     'Independent',
     'InverseGamma',
     'InverseGammaWithSoftplusConcentrationRate',
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py b/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py
new file mode 100644
index 0000000000..a7571806f2
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py
@@ -0,0 +1,320 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for initializers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import importlib
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import variables
+from tensorflow.contrib.distributions.python.ops import half_normal as hn_lib
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+
+
+def try_import(name):  # pylint: disable=invalid-name
+  module = None
+  try:
+    module = importlib.import_module(name)
+  except ImportError as e:
+    tf_logging.warning("Could not import %s: %s" % (name, str(e)))
+  return module
+
+stats = try_import("scipy.stats")
+
+
+class HalfNormalTest(test.TestCase):
+
+  def setUp(self):
+    self._rng = np.random.RandomState(123)
+
+  def assertAllFinite(self, tensor):
+    is_finite = np.isfinite(tensor.eval())
+    all_true = np.ones_like(is_finite, dtype=np.bool)
+    self.assertAllEqual(all_true, is_finite)
+
+  def _testParamShapes(self, sample_shape, expected):
+    with self.test_session():
+      param_shapes = hn_lib.HalfNormal.param_shapes(sample_shape)
+      scale_shape = param_shapes["scale"]
+      self.assertAllEqual(expected, scale_shape.eval())
+      scale = array_ops.ones(scale_shape)
+      self.assertAllEqual(
+          expected,
+          array_ops.shape(hn_lib.HalfNormal(scale).sample()).eval())
+
+  def _testParamStaticShapes(self, sample_shape, expected):
+    param_shapes = hn_lib.HalfNormal.param_static_shapes(sample_shape)
+    scale_shape = param_shapes["scale"]
+    self.assertEqual(expected, scale_shape)
+
+  def _testBatchShapes(self, dist, tensor):
+    self.assertAllEqual(dist.batch_shape_tensor().eval(), tensor.shape)
+    self.assertAllEqual(dist.batch_shape_tensor().eval(), tensor.eval().shape)
+    self.assertAllEqual(dist.batch_shape, tensor.shape)
+    self.assertAllEqual(dist.batch_shape, tensor.eval().shape)
+
+  def testParamShapes(self):
+    sample_shape = [10, 3, 4]
+    self._testParamShapes(sample_shape, sample_shape)
+    self._testParamShapes(constant_op.constant(sample_shape), sample_shape)
+
+  def testParamStaticShapes(self):
+    sample_shape = [10, 3, 4]
+    self._testParamStaticShapes(sample_shape, sample_shape)
+    self._testParamStaticShapes(
+        tensor_shape.TensorShape(sample_shape), sample_shape)
+
+  def testHalfNormalLogPDF(self):
+    with self.test_session():
+      batch_size = 6
+      scale = constant_op.constant([3.0] * batch_size)
+      x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      log_pdf = halfnorm.log_prob(x)
+      self._testBatchShapes(halfnorm, log_pdf)
+
+      pdf = halfnorm.prob(x)
+      self._testBatchShapes(halfnorm, pdf)
+
+      if not stats:
+        return
+      expected_log_pdf = stats.halfnorm(scale=scale.eval()).logpdf(x)
+      self.assertAllClose(expected_log_pdf, log_pdf.eval())
+      self.assertAllClose(np.exp(expected_log_pdf), pdf.eval())
+
+  def testHalfNormalLogPDFMultidimensional(self):
+    with self.test_session():
+      batch_size = 6
+      scale = constant_op.constant([[3.0, 1.0]] * batch_size)
+      x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      log_pdf = halfnorm.log_prob(x)
+      self._testBatchShapes(halfnorm, log_pdf)
+
+      pdf = halfnorm.prob(x)
+      self._testBatchShapes(halfnorm, pdf)
+
+      if not stats:
+        return
+      expected_log_pdf = stats.halfnorm(scale=scale.eval()).logpdf(x)
+      self.assertAllClose(expected_log_pdf, log_pdf.eval())
+      self.assertAllClose(np.exp(expected_log_pdf), pdf.eval())
+
+  def testHalfNormalCDF(self):
+    with self.test_session():
+      batch_size = 50
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      cdf = halfnorm.cdf(x)
+      self._testBatchShapes(halfnorm, cdf)
+
+      log_cdf = halfnorm.log_cdf(x)
+      self._testBatchShapes(halfnorm, log_cdf)
+
+      if not stats:
+        return
+      expected_logcdf = stats.halfnorm(scale=scale).logcdf(x)
+      self.assertAllClose(expected_logcdf, log_cdf.eval(), atol=0)
+      self.assertAllClose(np.exp(expected_logcdf), cdf.eval(), atol=0)
+
+  def testHalfNormalSurvivalFunction(self):
+    with self.test_session():
+      batch_size = 50
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      sf = halfnorm.survival_function(x)
+      self._testBatchShapes(halfnorm, sf)
+
+      log_sf = halfnorm.log_survival_function(x)
+      self._testBatchShapes(halfnorm, log_sf)
+
+      if not stats:
+        return
+      expected_logsf = stats.halfnorm(scale=scale).logsf(x)
+      self.assertAllClose(expected_logsf, log_sf.eval(), atol=0)
+      self.assertAllClose(np.exp(expected_logsf), sf.eval(), atol=0)
+
+  def testHalfNormalQuantile(self):
+    with self.test_session():
+      batch_size = 50
+      scale = self._rng.rand(batch_size) + 1.0
+      p = np.linspace(0., 1.0, batch_size).astype(np.float64)
+
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+      x = halfnorm.quantile(p)
+      self._testBatchShapes(halfnorm, x)
+
+      if not stats:
+        return
+      expected_x = stats.halfnorm(scale=scale).ppf(p)
+      self.assertAllClose(expected_x, x.eval(), atol=0)
+
+  def testFiniteGradients(self):
+    for dtype in [np.float32, np.float64]:
+      g = ops.Graph()
+      with g.as_default():
+        scale = variables.Variable(dtype(3.0))
+        dist = hn_lib.HalfNormal(scale=scale)
+        x = np.array([0.01, 0.1, 1., 5., 10.]).astype(dtype)
+        for func in [
+            dist.cdf, dist.log_cdf, dist.survival_function,
+            dist.log_prob, dist.prob, dist.log_survival_function,
+        ]:
+          print(func.__name__)
+          value = func(x)
+          grads = gradients_impl.gradients(value, [scale])
+          with self.test_session(graph=g):
+            variables.global_variables_initializer().run()
+            self.assertAllFinite(value)
+            self.assertAllFinite(grads[0])
+
+  def testHalfNormalEntropy(self):
+    with self.test_session():
+      scale = np.array([[1.0, 2.0, 3.0]])
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+      
+      # See https://en.wikipedia.org/wiki/Half-normal_distribution for the
+      # entropy formula used here.
+      expected_entropy = 0.5 * np.log(np.pi * scale ** 2.0 / 2.0) + 0.5
+
+      entropy = halfnorm.entropy()
+      self._testBatchShapes(halfnorm, entropy)
+      self.assertAllClose(expected_entropy, entropy.eval())
+
+  def testHalfNormalMeanAndMode(self):
+    with self.test_session():
+      scale = np.array([11., 12., 13.])
+
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+      expected_mean = scale * np.sqrt(2.0) / np.sqrt(np.pi)
+
+      self.assertAllEqual((3,), halfnorm.mean().eval().shape)
+      self.assertAllEqual(expected_mean, halfnorm.mean().eval())
+
+      self.assertAllEqual((3,), halfnorm.mode().eval().shape)
+      self.assertAllEqual([0., 0., 0.], halfnorm.mode().eval())
+
+  def testHalfNormalVariance(self):
+    with self.test_session():
+      scale = np.array([7., 7., 7.])
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+      expected_variance = scale ** 2.0 * (1.0 - 2.0 / np.pi)
+
+      self.assertAllEqual((3,), halfnorm.variance().eval().shape)
+      self.assertAllEqual(expected_variance, halfnorm.variance().eval())
+
+  def testHalfNormalStandardDeviation(self):
+    with self.test_session():
+      scale = np.array([7., 7., 7.])
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+      expected_variance = scale ** 2.0 * (1.0 - 2.0 / np.pi)
+
+      self.assertAllEqual((3,), halfnorm.stddev().shape)
+      self.assertAllEqual(np.sqrt(expected_variance), halfnorm.stddev().eval())
+
+  def testHalfNormalSample(self):
+    with self.test_session():
+      scale = constant_op.constant(3.0)
+      n = constant_op.constant(100000)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      sample = halfnorm.sample(n)
+
+      self.assertEqual(sample.eval().shape, (100000,))
+      self.assertAllClose(sample.eval().mean(),
+                          3.0 * np.sqrt(2.0) / np.sqrt(np.pi), atol=1e-1)
+
+      expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
+          tensor_shape.TensorShape(halfnorm.batch_shape_tensor().eval()))
+      self.assertAllEqual(expected_shape, sample.shape)
+      self.assertAllEqual(expected_shape, sample.eval().shape)
+
+      expected_shape_static = (tensor_shape.TensorShape(
+          [n.eval()]).concatenate(halfnorm.batch_shape))
+      self.assertAllEqual(expected_shape_static, sample.shape)
+      self.assertAllEqual(expected_shape_static, sample.eval().shape)
+
+  def testHalfNormalSampleMultiDimensional(self):
+    with self.test_session():
+      batch_size = 2
+      scale = constant_op.constant([[2.0, 3.0]] * batch_size)
+      n = constant_op.constant(100000)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      sample = halfnorm.sample(n)
+      self.assertEqual(sample.shape, (100000, batch_size, 2))
+      self.assertAllClose(sample.eval()[:, 0, 0].mean(),
+                          2.0 * np.sqrt(2.0) / np.sqrt(np.pi), atol=1e-1)
+      self.assertAllClose(sample.eval()[:, 0, 1].mean(),
+                          3.0 * np.sqrt(2.0) / np.sqrt(np.pi), atol=1e-1)
+
+      expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
+          tensor_shape.TensorShape(halfnorm.batch_shape_tensor().eval()))
+      self.assertAllEqual(expected_shape, sample.shape)
+      self.assertAllEqual(expected_shape, sample.eval().shape)
+
+      expected_shape_static = (tensor_shape.TensorShape(
+          [n.eval()]).concatenate(halfnorm.batch_shape))
+      self.assertAllEqual(expected_shape_static, sample.shape)
+      self.assertAllEqual(expected_shape_static, sample.eval().shape)
+
+  def testNegativeSigmaFails(self):
+    with self.test_session():
+      halfnorm = hn_lib.HalfNormal(scale=[-5.], validate_args=True, name="G")
+      with self.assertRaisesOpError("Condition x > 0 did not hold"):
+        halfnorm.mean().eval()
+
+  def testHalfNormalShape(self):
+    with self.test_session():
+      scale = constant_op.constant([6.0] * 5)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      self.assertEqual(halfnorm.batch_shape_tensor().eval(), [5])
+      self.assertEqual(halfnorm.batch_shape, tensor_shape.TensorShape([5]))
+      self.assertAllEqual(halfnorm.event_shape_tensor().eval(), [])
+      self.assertEqual(halfnorm.event_shape, tensor_shape.TensorShape([]))
+
+  def testHalfNormalShapeWithPlaceholders(self):
+    scale = array_ops.placeholder(dtype=dtypes.float32)
+    halfnorm = hn_lib.HalfNormal(scale=scale)
+
+    with self.test_session() as sess:
+      # get_batch_shape should return an "<unknown>" tensor.
+      self.assertEqual(halfnorm.batch_shape, tensor_shape.TensorShape(None))
+      self.assertEqual(halfnorm.event_shape, ())
+      self.assertAllEqual(halfnorm.event_shape_tensor().eval(), [])
+      self.assertAllEqual(
+          sess.run(halfnorm.batch_shape_tensor(),
+                   feed_dict={scale: [1.0, 2.0]}), [2])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/half_normal.py b/tensorflow/contrib/distributions/python/ops/half_normal.py
new file mode 100644
index 0000000000..12059b6a9e
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/ops/half_normal.py
@@ -0,0 +1,170 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The Half Normal distribution class."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops.distributions import distribution
+from tensorflow.python.ops.distributions import special_math
+
+
+__all__ = [
+    "HalfNormal",
+]
+
+
+class HalfNormal(distribution.Distribution):
+  """The Half Normal distribution with scale `scale`.
+
+  #### Mathematical details
+
+  The half normal is a transformation of a centered normal distribution.
+  If some random variable `X` has normal distribution,
+  ```none
+  X ~ Normal(0.0, scale)
+  Y = |X|
+  ```
+  Then `Y` will have half normal distribution. The probability density
+  function (pdf) is:
+
+  ```none
+  pdf(x; scale, x > 0) = sqrt(2) / (scale * sqrt(pi)) *
+    exp(- 1/2 * (x / scale) ** 2)
+  )
+  ```
+  Where `scale = sigma` is the standard deviation of the underlying normal
+  distribution.
+
+  #### Examples
+
+  Examples of initialization of one or a batch of distributions.
+
+  ```python
+  # Define a single scalar HalfNormal distribution.
+  dist = tf.contrib.distributions.HalfNormal(scale=3.0)
+
+  # Evaluate the cdf at 1, returning a scalar.
+  dist.cdf(1.)
+
+  # Define a batch of two scalar valued HalfNormals.
+  # The first has scale 11.0, the second 22.0
+  dist = tf.contrib.distributions.HalfNormal(scale=[11.0, 22.0])
+
+  # Evaluate the pdf of the first distribution on 1.0, and the second on 1.5,
+  # returning a length two tensor.
+  dist.prob([1.0, 1.5])
+
+  # Get 3 samples, returning a 3 x 2 tensor.
+  dist.sample([3])
+  ```
+
+  """
+  def __init__(self,
+               scale,
+               validate_args=False,
+               allow_nan_stats=True,
+               name="HalfNormal"):
+    """Construct HalfNormals with scale `scale`.
+
+    Args:
+      scale: Floating point tensor; the scales of the distribution(s).
+        Must contain only positive values.
+      validate_args: Python `bool`, default `False`. When `True` distribution
+        parameters are checked for validity despite possibly degrading runtime
+        performance. When `False` invalid inputs may silently render incorrect
+        outputs.
+      allow_nan_stats: Python `bool`, default `True`. When `True`,
+        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
+        indicate the result is undefined. When `False`, an exception is raised
+        if one or more of the statistic's batch members are undefined.
+      name: Python `str` name prefixed to Ops created by this class.
+    """
+    parameters = locals()
+    with ops.name_scope(name, values=[scale]):
+      with ops.control_dependencies([check_ops.assert_positive(scale)] if
+                                    validate_args else []):
+        self._scale = array_ops.identity(scale, name="scale")
+    super(HalfNormal, self).__init__(
+        dtype=self._scale.dtype,
+        reparameterization_type=distribution.FULLY_REPARAMETERIZED,
+        validate_args=validate_args,
+        allow_nan_stats=allow_nan_stats,
+        parameters=parameters,
+        graph_parents=[self._scale],
+        name=name)
+
+  @staticmethod
+  def _param_shapes(sample_shape):
+    return {'scale': ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)}
+
+  @property
+  def scale(self):
+    """Distribution parameter for the scale."""
+    return self._scale
+
+  def _batch_shape_tensor(self):
+    return array_ops.shape(self.scale)
+
+  def _batch_shape(self):
+    return self.scale.shape
+
+  def _event_shape_tensor(self):
+    return constant_op.constant([], dtype=dtypes.int32)
+
+  def _event_shape(self):
+    return tensor_shape.scalar()
+
+  def _sample_n(self, n, seed=None):
+    shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
+    sampled = random_ops.random_normal(
+        shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed)
+    return math_ops.abs(sampled * self.scale)
+
+  def _prob(self, x):
+    coeff = np.sqrt(2) / self.scale / np.sqrt(np.pi)
+    pdf = coeff * math_ops.exp(- 0.5 * (x / self.scale) ** 2)
+    return pdf * math_ops.cast(x >= 0, self.dtype)
+
+  def _cdf(self, x):
+    truncated_x = nn.relu(x)
+    return math_ops.erf(truncated_x / self.scale / np.sqrt(2.0))
+
+  def _entropy(self):
+    return 0.5 * math_ops.log(np.pi * self.scale ** 2.0 / 2.0) + 0.5
+
+  def _mean(self):
+    return self.scale * np.sqrt(2.0) / np.sqrt(np.pi)
+
+  def _quantile(self, p):
+    return np.sqrt(2.0) * self.scale * special_math.erfinv(p)
+
+  def _mode(self):
+    return array_ops.zeros(self.batch_shape_tensor())
+
+  def _variance(self):
+    return self.scale ** 2.0 * (1.0 - 2.0 / np.pi)
diff --git a/tensorflow/python/kernel_tests/distributions/special_math_test.py b/tensorflow/python/kernel_tests/distributions/special_math_test.py
index 9441cdbe39..2d434a39c2 100644
--- a/tensorflow/python/kernel_tests/distributions/special_math_test.py
+++ b/tensorflow/python/kernel_tests/distributions/special_math_test.py
@@ -332,6 +332,32 @@ class LogNdtrGradientTest(NdtrGradientTest):
   _use_log = True
 
 
+class ErfInvTest(test.TestCase):
+
+  def testErfInvValues(self):
+    with self.test_session():
+      if not special:
+        return
+
+      x = np.linspace(0., 1.0, 50).astype(np.float64)
+
+      expected_x = special.erfinv(x)
+      x = special_math.erfinv(x)
+      self.assertAllClose(expected_x, x.eval(), atol=0.)
+
+  def testErfInvIntegerInput(self):
+    with self.test_session():
+
+      with self.assertRaises(TypeError):
+        x = np.array([1, 2, 3]).astype(np.int32)
+        special_math.erfinv(x)
+
+      with self.assertRaises(TypeError):
+        x = np.array([1, 2, 3]).astype(np.int64)
+        special_math.erfinv(x)
+
+
+
 class LogCDFLaplaceTest(test.TestCase):
   # Note that scipy.stats.laplace does not have a stable Log CDF, so we cannot
   # rely on scipy to cross check the extreme values.
diff --git a/tensorflow/python/ops/distributions/special_math.py b/tensorflow/python/ops/distributions/special_math.py
index 222a39ad82..bed4cbb2c1 100644
--- a/tensorflow/python/ops/distributions/special_math.py
+++ b/tensorflow/python/ops/distributions/special_math.py
@@ -27,6 +27,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 
 __all__ = [
+    "erfinv",
     "ndtr",
     "ndtri",
     "log_ndtr",
@@ -350,6 +351,29 @@ def _log_ndtr_asymptotic_series(x, series_order):
   return 1. + even_sum - odd_sum
 
 
+def erfinv(x, name="erfinv"):
+  """The inverse function for erf, the error function.
+
+  Args:
+    x: `Tensor` of type `float32`, `float64`.
+    name: Python string. A name for the operation (default="erfinv").
+
+  Returns:
+    x: `Tensor` with `dtype=x.dtype`.
+
+  Raises:
+    TypeError: if `x` is not floating-type.
+  """
+
+  with ops.name_scope(name, values=[x]):
+    x = ops.convert_to_tensor(x, name="x")
+    if x.dtype.as_numpy_dtype not in [np.float32, np.float64]:
+      raise TypeError(
+          "x.dtype=%s is not handled, see docstring for supported types."
+          % x.dtype)
+    return ndtri((x + 1.0) / 2.0) / np.sqrt(2)
+
+
 def _double_factorial(n):
   """The double factorial function for small Python integer `n`."""
   return np.prod(np.arange(n, 1, -2))
-- 
GitLab


From 197850fb12015f8e01a3b9c4d3e3546fc54aaa0b Mon Sep 17 00:00:00 2001
From: Olivia Nordquist <nolivia@google.com>
Date: Wed, 29 Nov 2017 13:21:31 -0800
Subject: [PATCH 0425/1225] enabling Tensor._set_shape() to work with the C API

PiperOrigin-RevId: 177353959
---
 tensorflow/python/client/tf_session.i         | 43 ++++++++++++++
 tensorflow/python/client/tf_session_helper.cc | 19 +++++++
 tensorflow/python/client/tf_session_helper.h  | 14 +++++
 tensorflow/python/framework/ops.py            | 57 +++++++++++++------
 4 files changed, 117 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index 5fa1a7e8fc..c286d5fe47 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -532,6 +532,49 @@ def TF_Reset(target, containers=None, config=None):
 %unignore TF_GraphGetTensorShapeHelper;
 %ignore TF_GraphGetTensorShape;
 
+// We use TF_GraphSetTensorShape_wrapper instead of
+// TF_GraphSetTensorShape
+%ignore TF_GraphSetTensorShape;
+%unignore tensorflow;
+%unignore TF_GraphSetTensorShape_wrapper;
+
+// $input is a Python list of ints to a vector<int> for TF_GraphSetTensorShape_wrapper
+%typemap(in) (const std::vector<int64_t>& dims)
+    (std::vector<int64_t> dims_local){
+  if ($input != Py_None) {
+    if (!PyList_Check($input)) {
+      SWIG_exception_fail(SWIG_TypeError, tensorflow::strings::Printf(
+              "$symname: expected list but got %s ", Py_TYPE($input)->tp_name).c_str());
+    }
+    size_t size = PyList_Size($input);
+    for (int i = 0; i < size; ++i) {
+      PyObject* item = PyList_GetItem($input, i);
+      dims_local.push_back(PyInt_AS_LONG(item));
+    }
+    $1 = &dims_local;
+  } else {
+    $1 = nullptr;
+  }
+}
+
+// We use TF_GraphGetTensorShape_wrapper instead of
+// TF_GraphGetTensorShape
+%ignore TF_GraphGetTensorShape;
+%unignore tensorflow;
+%unignore TF_GraphGetTensorShape_wrapper;
+
+// Build a Python list of ints and return it.
+%typemap(out) std::vector<int64_t> tensorflow::TF_GraphGetTensorShape_wrapper {
+  $result = PyList_New($1.size());
+  if (!$result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
+  }
+
+  for (size_t i = 0; i < $1.size(); ++i) {
+    PyList_SET_ITEM($result, i, PyInt_FromLong($1[i]));
+  }
+}
+
 %include "tensorflow/python/client/tf_session_helper.h"
 
 %unignoreall
diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index ad982e5dd8..e4bf09a0ca 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc
@@ -407,4 +407,23 @@ TF_Function* TF_GraphToFunction_wrapper(
                             opts, description, out_status);
 }
 
+void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output,
+                                    const std::vector<int64_t>& dims,
+                                    bool unknown_shape, TF_Status* status) {
+  if (unknown_shape) {
+    TF_GraphSetTensorShape(graph, output, nullptr, -1, status);
+    return;
+  }
+  TF_GraphSetTensorShape(graph, output, dims.data(), dims.size(), status);
+}
+
+std::vector<int64_t> TF_GraphGetTensorShape_wrapper(TF_Graph* graph,
+                                                    TF_Output output,
+                                                    int num_dims,
+                                                    TF_Status* status) {
+  std::vector<int64_t> dims(num_dims);
+  TF_GraphGetTensorShape(graph, output, dims.data(), num_dims, status);
+  return dims;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h
index 6ed08d3a58..bb7171db31 100644
--- a/tensorflow/python/client/tf_session_helper.h
+++ b/tensorflow/python/client/tf_session_helper.h
@@ -168,6 +168,20 @@ TF_Function* TF_GraphToFunction_wrapper(
     const std::vector<TF_Output>& inputs, const std::vector<TF_Output>& outputs,
     const NameVector& output_names, const TF_FunctionOptions* opts,
     const char* description, TF_Status* out_status);
+
+// Set the shape of output. If unknown is true, `num_dims` must be set to
+// -1 and `dims` is set to nullptr.
+void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output,
+                                    const std::vector<int64_t>& dims,
+                                    bool unknown_shape, TF_Status* status);
+
+// Return the shape of output. `num_dims` should be the output of
+// TF_GraphGetTensorNumDims. If `num_dims = -1`, this should not be called.
+std::vector<int64_t> TF_GraphGetTensorShape_wrapper(TF_Graph* graph,
+                                                    TF_Output output,
+                                                    int num_dims,
+                                                    TF_Status* status);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 2217513966..975a1c87ec 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -374,6 +374,19 @@ class Tensor(_TensorLike):
       A `TensorShape` representing the shape of this tensor.
 
     """
+    if _USE_C_API:
+      graph = self._op._graph._c_graph  # pylint: disable=protected-access
+      with errors.raise_exception_on_not_ok_status() as status:
+        num_dims = c_api.TF_GraphGetTensorNumDims(graph, self._as_tf_output(),
+                                                  status)
+      if num_dims == -1:
+        dim_list = None
+      else:
+        with errors.raise_exception_on_not_ok_status() as status:
+          dim_list = c_api.TF_GraphGetTensorShape_wrapper(
+              graph, self._as_tf_output(), num_dims, status)
+        dim_list = [None if i == -1 else i for i in dim_list]
+      return tensor_shape.TensorShape(dim_list)
     return self._shape
 
   def __iter__(self):
@@ -393,8 +406,8 @@ class Tensor(_TensorLike):
       yield self[i]
 
   def _shape_as_list(self):
-    if self._shape.ndims is not None:
-      return [dim.value for dim in self._shape.dims]
+    if self.shape.ndims is not None:
+      return [dim.value for dim in self.shape.dims]
     else:
       return None
 
@@ -410,7 +423,7 @@ class Tensor(_TensorLike):
     Returns:
       Integer rank or None
     """
-    return self._shape.ndims
+    return self.shape.ndims
 
   def get_shape(self):
     """Alias of Tensor.shape."""
@@ -441,14 +454,35 @@ class Tensor(_TensorLike):
     ```
 
     Args:
-      shape: A `TensorShape` representing the shape of this tensor.
+      shape: A `TensorShape` representing the shape of this tensor, a
+      `TensorShapeProto`, a list, a tuple, or None.
 
     Raises:
       ValueError: If `shape` is not compatible with the current shape of
         this tensor.
     """
-    # TODO(skyewm): call C API
-    self._shape = self._shape.merge_with(shape)
+    if not _USE_C_API:
+      self._shape = self._shape.merge_with(shape)  # pylint: disable=protected-access
+      return
+    if not isinstance(shape, tensor_shape.TensorShape):
+      shape = tensor_shape.TensorShape(shape)
+    dim_list = []
+    if shape.dims is None:
+      unknown_shape = True
+    else:
+      unknown_shape = False
+      for dim in shape.dims:
+        if dim.value is None:
+          dim_list.append(-1)
+        else:
+          dim_list.append(dim.value)
+    with errors.raise_exception_on_not_ok_status() as status:
+      c_api.TF_GraphSetTensorShape_wrapper(
+          self._op._graph._c_graph,  # pylint: disable=protected-access
+          self._as_tf_output(),
+          dim_list,
+          unknown_shape,
+          status)
 
   @property
   def value_index(self):
@@ -4517,15 +4551,11 @@ def control_dependencies(control_inputs):
   See @{tf.Graph.control_dependencies}
   for more details.
 
-  When eager execution is enabled, any callable object in the `control_inputs`
-  list will be called.
-
   Args:
     control_inputs: A list of `Operation` or `Tensor` objects which
       must be executed or computed before running the operations
       defined in the context.  Can also be `None` to clear the control
-      dependencies. If eager execution is enabled, any callable object in the
-      `control_inputs` list will be called.
+      dependencies.
 
   Returns:
    A context manager that specifies control dependencies for all
@@ -4534,11 +4564,6 @@ def control_dependencies(control_inputs):
   if context.in_graph_mode():
     return get_default_graph().control_dependencies(control_inputs)
   else:
-    if control_inputs:
-      # Excute any pending callables.
-      for control in control_inputs:
-        if callable(control):
-          control()
     return _NullContextmanager()
 
 
-- 
GitLab


From e00156b36d91019039c9148dc86b64017154564e Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Wed, 29 Nov 2017 13:24:28 -0800
Subject: [PATCH 0426/1225] Proper deallocation in the thread-local tape stack.

PiperOrigin-RevId: 177354350
---
 tensorflow/python/eager/pywrap_tfe_src.cc | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index ce823cb567..b52d71dc6c 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -531,12 +531,9 @@ static PyTypeObject TFE_Py_Tape_Type = {
 // xcode 7 doesn't define thread_local, so for compatibility we implement our
 // own. TODO(apassos) remove once we can deprecate xcode 7.
 #ifndef __APPLE__
-thread_local std::vector<TFE_Py_Tape*>* tape_stack = nullptr;
 std::vector<TFE_Py_Tape*>* GetTapeStack() {
-  if (tape_stack == nullptr) {
-    tape_stack = new std::vector<TFE_Py_Tape*>;
-  }
-  return tape_stack;
+  thread_local std::vector<TFE_Py_Tape*> tape_stack;
+  return &tape_stack;
 }
 #else
 static tensorflow::mutex stack_mu(tensorflow::LINKER_INITIALIZED);
-- 
GitLab


From d3a8bf0783754b8f4bbc24274ecd79d4cc3217f0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 29 Nov 2017 13:28:59 -0800
Subject: [PATCH 0427/1225] Added comment/TODO concerning memory use of
 extract_images_patches.

PiperOrigin-RevId: 177354924
---
 tensorflow/contrib/kfac/python/ops/fisher_factors.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
index fbc192f1dc..6c1dd0ae40 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
@@ -580,6 +580,9 @@ class ConvDiagonalFactor(DiagonalFactor):
     # the target entry of _outputs_grads changes with idx.)
     with _maybe_colocate_with(inputs, self._colocate_cov_ops_with_inputs):
       filter_height, filter_width, _, _ = self._filter_shape
+
+      # TODO(b/64144716): there is potential here for a big savings in terms of
+      # memory use.
       patches = array_ops.extract_image_patches(
           inputs,
           ksizes=[1, filter_height, filter_width, 1],
@@ -739,6 +742,9 @@ class ConvInputKroneckerFactor(InverseProvidingFactor):
     # TODO(jamesmartens): factor this patches stuff out into a utility function
     with _maybe_colocate_with(self._inputs, self._colocate_cov_ops_with_inputs):
       filter_height, filter_width, in_channels, _ = self._filter_shape
+
+      # TODO(b/64144716): there is potential here for a big savings in terms of
+      # memory use.
       patches = array_ops.extract_image_patches(
           self._inputs,
           ksizes=[1, filter_height, filter_width, 1],
-- 
GitLab


From 19f62f62e5dab41b62b60ac66e7d07c09d55e17a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 29 Nov 2017 13:41:03 -0800
Subject: [PATCH 0428/1225] Re-enable Mul hoisting for aggregations other than
 Add when input shapes match.

PiperOrigin-RevId: 177356621
---
 .../optimizers/arithmetic_optimizer.cc        | 107 ++++++++++++------
 .../optimizers/arithmetic_optimizer_test.cc   | 106 +++++++++--------
 2 files changed, 135 insertions(+), 78 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 930d122234..6861a51795 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -253,6 +253,30 @@ bool IsNumberType(DataType dtype) {
 
 const char kOutputShapesAttr[] = "_output_shapes";
 
+PartialTensorShape GetInputShape(const string& input, const NodeMap& node_map) {
+  int output_pos;
+  string node_name = ParseNodeName(input, &output_pos);
+  const NodeDef* input_node = node_map.GetNode(node_name);
+  return input_node->attr().at(kOutputShapesAttr).list().shape(output_pos);
+}
+
+bool ShapesEqual(const string& input_x, const string& input_y,
+                 const NodeMap& node_map) {
+  PartialTensorShape x_shape = GetInputShape(input_x, node_map);
+  PartialTensorShape y_shape = GetInputShape(input_y, node_map);
+  if (x_shape.unknown_rank() || y_shape.unknown_rank() ||
+      x_shape.dims() != y_shape.dims()) {
+    return false;
+  }
+  for (int i = 0; i < x_shape.dims(); ++i) {
+    if (x_shape.dim_size(i) == -1 || y_shape.dim_size(i) == -1 ||
+        x_shape.dim_size(i) != y_shape.dim_size(i)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 // Returns whether `reshape` is an identity op. The tensor that `reshape`
 // reshapes is the `output_pos`-th output of node `input`.
 bool ReshapeIsIdentity(const NodeDef& reshape, const NodeDef& input,
@@ -868,8 +892,11 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   // multiplication over addition to hoist common factors out of aggregate nodes
   // where all the inputs are Mul nodes. This pattern occurs frequently in
   // regularization terms for the gradients during training.
-  // TODO(rmlarsen): Check shapes and enable for AddN.
-  if (IsAdd(*node) && NumNonControlInputs(*node) > 1 &&
+  // For example, we can rewrite an expression of the form:
+  //   AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn))
+  // to the following:
+  //   Mul(x, AddN(y1, y2, y3, ... yn))
+  if (IsAggregate(*node) && NumNonControlInputs(*node) > 1 &&
       !OptimizedNodeExists(StrCat(node->name(), "_hoist_add"))) {
     // Determine the set of common factors if the input nodes are all Mul nodes.
     std::set<string> common_factors;
@@ -899,24 +926,15 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
     }
     if (common_factors.size() == 1) {
       const string& common_factor = *common_factors.begin();
-      // In this case we have an expression of the form
-      //   AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn))
-      // that can be rewritten as
-      //   Mul(x, AddN(y1, y2, y3, ... yn))
-
-      // 1. Use a copy of the first Mul node for the outer multiplication.
-      NodeDef* new_mul_node = AddNode(StrCat(node->name(), "_hoist_mul"),
-                                      node_map_->GetNode(node->input(0)));
-      NodeDef* new_add_node = AddNode(StrCat(node->name(), "_hoist_add"), node);
-      new_mul_node->set_device(node->device());
-      new_mul_node->set_input(0, common_factor);
-      node_map_->AddOutput(common_factor, new_mul_node->name());
-      new_mul_node->set_input(1, new_add_node->name());
-      node_map_->AddOutput(new_add_node->name(), new_mul_node->name());
-
-      // 2. Hoist non-shared factors up into the new AddN node.
-      nodes_to_simplify->PushBack(new_add_node);
-      for (int i = 0; i < node->input_size(); ++i) {
+
+      // Gather up the non-shared factors (the y's in the example).
+      // Unless the aggregation is Add, we have to make sure that all the y's
+      // have the same shape since the other aggregation ops do not support
+      // broadcasting.
+      std::vector<string> unique_factors;
+      unique_factors.reserve(node->input_size());
+      bool shapes_match = true;
+      for (int i = 0; i < node->input_size() && shapes_match; ++i) {
         const string& input = node->input(i);
         if (IsControlInput(input)) {
           break;
@@ -924,15 +942,41 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
         const NodeDef* mul_node = node_map_->GetNode(input);
         const int unique_factor_index =
             mul_node->input(0) == common_factor ? 1 : 0;
-        const string unique_factor = mul_node->input(unique_factor_index);
-        new_add_node->set_input(i, unique_factor);
+        unique_factors.push_back(mul_node->input(unique_factor_index));
+        if (i > 0 && !IsAdd(*node)) {
+          shapes_match = ShapesEqual(unique_factors.front(),
+                                     unique_factors.back(), *node_map_);
+        }
       }
 
-      // 4. Add frame dependencies that the original node might have had.
-      AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor,
-                          {new_add_node});
+      if (shapes_match) {
+        // 1. Use a copy of the first Mul node for the outer multiplication.
+        NodeDef* new_mul_node = AddNode(StrCat(node->name(), "_hoist_mul"),
+                                        node_map_->GetNode(node->input(0)));
+        NodeDef* new_add_node =
+            AddNode(StrCat(node->name(), "_hoist_add"), node);
+        new_mul_node->set_device(node->device());
+        new_mul_node->set_input(0, common_factor);
+        node_map_->AddOutput(common_factor, new_mul_node->name());
+        new_mul_node->set_input(1, new_add_node->name());
+        node_map_->AddOutput(new_add_node->name(), new_mul_node->name());
+
+        // 2. Hoist non-shared factors up into the new AddN node.
+        nodes_to_simplify->PushBack(new_add_node);
+        for (int i = 0; i < node->input_size(); ++i) {
+          const string& input = node->input(i);
+          if (IsControlInput(input)) {
+            break;
+          }
+          new_add_node->set_input(i, unique_factors[i]);
+        }
 
-      return new_mul_node->name();
+        // 3. Add frame dependencies that the original node might have had.
+        AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor,
+                            {new_add_node});
+
+        return new_mul_node->name();
+      }
     }
   }
 
@@ -1064,13 +1108,10 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/,
   int num_frames;
   TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph_, *node_map_,
                                                &frame_map_, &num_frames));
-  if (opt_level_ == RewriterConfig::AGGRESSIVE) {
-    graph_properties_.reset(new GraphProperties(item));
-    // Shapes are only needed in aggressive mode.
-    TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false));
-    TF_RETURN_IF_ERROR(
-        graph_properties_->AnnotateOutputShapes(optimized_graph_));
-  }
+  graph_properties_.reset(new GraphProperties(item));
+  // Shapes are only needed in aggressive mode.
+  TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false));
+  TF_RETURN_IF_ERROR(graph_properties_->AnnotateOutputShapes(optimized_graph_));
 
   // Perform the optimizations.
   DedupComputations();
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index e8a18ff9d9..80f42694d9 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -32,6 +32,21 @@ string OptimizedName(const string& name) {
   return AddPrefixToNodeName(name, kArithmeticOptimizer);
 }
 
+void VerifyGraphsMatch(const GraphDef& original_graph,
+                       const GraphDef& optimized_graph, int line) {
+  EXPECT_EQ(original_graph.node_size(), optimized_graph.node_size()) << line;
+  for (int i = 0; i < original_graph.node_size(); ++i) {
+    const NodeDef& original = original_graph.node(i);
+    const NodeDef& optimized = optimized_graph.node(i);
+    EXPECT_EQ(original.name(), optimized.name()) << line;
+    EXPECT_EQ(original.op(), optimized.op()) << line;
+    EXPECT_EQ(original.input_size(), optimized.input_size()) << line;
+    for (int j = 0; j < original.input_size(); ++j) {
+      EXPECT_EQ(original.input(j), optimized.input(j)) << line;
+    }
+  }
+}
+
 class ArithmeticOptimizerTest : public ::testing::Test {};
 
 TEST_F(ArithmeticOptimizerTest, NoOp) {
@@ -44,18 +59,7 @@ TEST_F(ArithmeticOptimizerTest, NoOp) {
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
-
-  EXPECT_EQ(item.graph.node_size(), output.node_size());
-  for (int i = 0; i < item.graph.node_size(); ++i) {
-    const NodeDef& original = item.graph.node(i);
-    const NodeDef& optimized = output.node(i);
-    EXPECT_EQ(original.name(), optimized.name());
-    EXPECT_EQ(original.op(), optimized.op());
-    EXPECT_EQ(original.input_size(), optimized.input_size());
-    for (int j = 0; j < original.input_size(); ++j) {
-      EXPECT_EQ(original.input(j), optimized.input(j));
-    }
-  }
+  VerifyGraphsMatch(item.graph, output, __LINE__);
 }
 
 TEST_F(ArithmeticOptimizerTest, OpDedupping) {
@@ -398,39 +402,51 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) {
 }
 
 TEST_F(ArithmeticOptimizerTest, HoistFactor) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
-  Output y1 = ops::Const(s.WithOpName("y1"), {3.0f, 4.0f}, {1, 2});
-  Output y2 = ops::Const(s.WithOpName("y2"), {5.0f, 6.0f}, {1, 2});
-  Output mul1 = ops::Mul(s.WithOpName("mul1"), x, y1);
-  Output mul2 = ops::Mul(s.WithOpName("mul2"), y2, x);
-  Output add = ops::Add(s.WithOpName("add"), mul1, mul2);
-  Output id = ops::Identity(s.WithOpName("id"), add);
-
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-
-  ArithmeticOptimizer optimizer;
-  GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
-  TF_EXPECT_OK(status);
-  // Run the optimizer twice to make sure the rewrite is idempotent.
-  item.graph.Swap(&output);
-  status = optimizer.Optimize(nullptr, item, &output);
-  TF_EXPECT_OK(status);
-
-  EXPECT_EQ(9, output.node_size());
-  const NodeDef& new_add = output.node(8);
-  EXPECT_EQ(OptimizedName("add_hoist_add"), new_add.name());
-  EXPECT_EQ("y1", new_add.input(0));
-  EXPECT_EQ("y2", new_add.input(1));
-  const NodeDef& new_mul = output.node(7);
-  EXPECT_EQ(OptimizedName("add_hoist_mul"), new_mul.name());
-  EXPECT_EQ("x", new_mul.input(0));
-  EXPECT_EQ(OptimizedName("add_hoist_add"), new_mul.input(1));
-  const NodeDef& new_id = output.node(6);
-  EXPECT_EQ("id", new_id.name());
-  EXPECT_EQ(OptimizedName("add_hoist_mul"), new_id.input(0));
+  for (bool matching_shapes : {true, false}) {
+    for (bool use_addn : {true, false}) {
+      tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+      Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
+      Output y1 = ops::Const(s.WithOpName("y1"), {3.0f, 4.0f}, {1, 2});
+      Output y2 = matching_shapes
+                      ? ops::Const(s.WithOpName("y2"), {5.0f, 6.0f}, {1, 2})
+                      : ops::Const(s.WithOpName("y2"), {5.0f}, {1, 1});
+      Output mul1 = ops::Mul(s.WithOpName("mul1"), x, y1);
+      Output mul2 = ops::Mul(s.WithOpName("mul2"), y2, x);
+      Output id =
+          use_addn ? ops::Identity(s.WithOpName("id"),
+                                   ops::AddN(s.WithOpName("add"), {mul1, mul2}))
+                   : ops::Identity(s.WithOpName("id"),
+                                   ops::Add(s.WithOpName("add"), mul1, mul2));
+
+      GrapplerItem item;
+      TF_CHECK_OK(s.ToGraphDef(&item.graph));
+      ArithmeticOptimizer optimizer;
+      GraphDef output;
+      Status status = optimizer.Optimize(nullptr, item, &output);
+      TF_EXPECT_OK(status);
+      // Run the optimizer twice to make sure the rewrite is idempotent.
+      item.graph.Swap(&output);
+      status = optimizer.Optimize(nullptr, item, &output);
+      TF_EXPECT_OK(status);
+
+      if (use_addn && !matching_shapes) {
+        VerifyGraphsMatch(item.graph, output, __LINE__);
+      } else {
+        EXPECT_EQ(9, output.node_size());
+        const NodeDef& new_add = output.node(8);
+        EXPECT_EQ(OptimizedName("add_hoist_add"), new_add.name());
+        EXPECT_EQ("y1", new_add.input(0));
+        EXPECT_EQ("y2", new_add.input(1));
+        const NodeDef& new_mul = output.node(7);
+        EXPECT_EQ(OptimizedName("add_hoist_mul"), new_mul.name());
+        EXPECT_EQ("x", new_mul.input(0));
+        EXPECT_EQ(OptimizedName("add_hoist_add"), new_mul.input(1));
+        const NodeDef& new_id = output.node(6);
+        EXPECT_EQ("id", new_id.name());
+        EXPECT_EQ(OptimizedName("add_hoist_mul"), new_id.input(0));
+      }
+    }
+  }
 }
 
 TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) {
-- 
GitLab


From 48347ee4105d78d8f36ba8645953b75cb5280c4c Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Wed, 29 Nov 2017 13:46:24 -0800
Subject: [PATCH 0429/1225] Simplify const node creation.

PiperOrigin-RevId: 177357416
---
 tensorflow/core/grappler/optimizers/BUILD     |   5 +
 .../grappler/optimizers/layout_optimizer.cc   | 218 +++++++-----------
 .../optimizers/layout_optimizer_test.cc       |  75 +++++-
 .../python/grappler/layout_optimizer_test.py  |   2 +-
 4 files changed, 169 insertions(+), 131 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 5d9eb8e0b1..24e6f8847a 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -332,6 +332,11 @@ tf_cc_test(
     deps = [
         ":layout_optimizer",
         "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:cc_ops_internal",
+        "//tensorflow/core:all_kernels",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 1b8046b787..ef4b015295 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -69,6 +69,8 @@ std::set<string> GetOpsFormatSupported() {
   return ops_format_supported;
 }
 
+// TODO(yaozhang): enable SumProcessor with auto-tuning. Currently disabled
+// because of the worse performance in some cases.
 std::set<string> GetOpsFormatAgnostic() {
   std::set<string> ops_format_agnostic = {"Add",
                                           "AddN",
@@ -88,7 +90,7 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "Split",
                                           "SquaredDifference",
                                           "Squeeze",
-                                          "Sub"};
+                                          /*"Sum",*/ "Sub"};
   return ops_format_agnostic;
 }
 
@@ -186,33 +188,6 @@ class GraphProcessor {
     return node;
   }
 
-  NodeDef* AddNodeReductionConst(const string& name, const string& device) {
-    NodeDef* node = graph_->add_node();
-    node_map_->AddNode(name, node);
-    node->set_name(name);
-    node->set_op("Const");
-    AttrValue attr_data_type;
-    attr_data_type.set_type(DT_INT32);
-    node->mutable_attr()->insert({"dtype", attr_data_type});
-
-    AttrValue attr_tensor;
-    Tensor tensor(DT_INT32, TensorShape({3}));
-    std::vector<int> axis = {0, 2, 3};
-    for (int i = 0; static_cast<size_t>(i) < axis.size(); i++) {
-      tensor.flat<int>()(i) = axis[i];
-    }
-    tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
-    node->mutable_attr()->insert({"value", attr_tensor});
-    string device_name;
-    if (device.empty()) {
-      device_name = virtual_placer_.get_canonical_device_name(*node);
-    } else {
-      device_name = device;
-    }
-    node->set_device(device_name);
-    return node;
-  }
-
   const VirtualPlacer& virtual_placer_;
   const std::unordered_set<string>& nodes_to_preserve_;
   GraphDef* graph_;
@@ -370,10 +345,20 @@ class NodeProcessor : public GraphProcessor {
       LOG(ERROR) << "Failed to parse TensorProto.";
     }
     if (tensor.dims() == 1) {
-      int c = tensor.flat<int>()(3);
-      tensor.flat<int>()(3) = tensor.flat<int>()(2);
-      tensor.flat<int>()(2) = tensor.flat<int>()(1);
-      tensor.flat<int>()(1) = c;
+      if (tensor.flat<int>().size() == 4) {
+        int c = tensor.flat<int>()(3);
+        tensor.flat<int>()(3) = tensor.flat<int>()(2);
+        tensor.flat<int>()(2) = tensor.flat<int>()(1);
+        tensor.flat<int>()(1) = c;
+      } else if (tensor.flat<int>().size() == 3) {
+        tensor.flat<int>()(0) = 0;
+        tensor.flat<int>()(1) = 2;
+        tensor.flat<int>()(2) = 3;
+      } else {
+        return Status(error::INVALID_ARGUMENT,
+                      strings::StrCat("Unsupported tensor size: ",
+                                      tensor.flat<int>().size()));
+      }
     } else if (tensor.dims() == 2) {
       for (int i = 0; i < 2; i++) {
         int c = tensor.matrix<int>()(3, i);
@@ -394,7 +379,9 @@ class NodeProcessor : public GraphProcessor {
   Status UpdateAttrValueOfInput(int input_index) {
     auto input_node = node_map_->GetNode(node_->input(input_index));
     // We created a copy of the node, so that we don't modify the original node,
-    // which might be used elsewhere.
+    // which might be used elsewhere. Note that this copy also copies the
+    // control dependency input in the case this node is inside a loop,
+    // to ensure added_node is in the same frame with node_.
     NodeDef* added_node = graph_->add_node();
     *added_node = *input_node;
     string base_name = strings::StrCat(node_->name(), "-", input_node->name());
@@ -411,6 +398,14 @@ class NodeProcessor : public GraphProcessor {
     return input_pos;
   }
 
+  virtual std::set<int> GetOutputPos() const {
+    // For most nodes, no need to process control nodes or nodes that use an
+    // output other than the first output: only the first output is of
+    // 4D NCHW/NHWC format and thus relevant here.
+    std::set<int> output_pos = {0};
+    return output_pos;
+  }
+
   NodeDef* AddNodeTranspose(const string& node_name, const string& input_name,
                             const string& const_name, DataType data_type,
                             const TensorShapeProto& input_shape,
@@ -476,37 +471,28 @@ class NodeProcessor : public GraphProcessor {
     auto outputs = node_map_->GetOutputs(node_->name());
     string const_name = GetOrAddNodePermNCHWToNHWC();
     for (const auto& output : outputs) {
-      string base_name = strings::StrCat(node_->name(), "-", output->name());
-      string node_name =
-          AddPrefixToNodeName(base_name, kTransposeNCHWToNHWC, "-");
-      // TODO(yaozhang): handle the rare case where node A is connected to more
-      // than one input of node B.
-      auto it = std::find_if(output->mutable_input()->begin(),
-                             output->mutable_input()->end(),
-                             [this](const string& input) {
-                               string node_name = NodeName(input);
-                               return node_name.compare(node_->name()) == 0;
-                             });
-      if (it == output->mutable_input()->end()) {
-        return Status(error::INVALID_ARGUMENT,
-                      strings::StrCat("Expect ", node_->name(),
-                                      " to be an input of ", output->name()));
-      }
-      int output_pos = NodePosition(*it);
-      // No need to process control nodes or nodes that use an output
-      // other than the first output: only the first output is of 4D NCHW/NHWC
-      // format and thus relevant here.
-      if (output_pos != 0) {
-        continue;
+      for (int i = 0; i < output->input_size(); i++) {
+        auto& input = *output->mutable_input(i);
+        int input_port;
+        string input_name = ParseNodeName(input, &input_port);
+        auto output_pos = GetOutputPos();
+        if (input_name == node_->name() &&
+            output_pos.find(input_port) != output_pos.end()) {
+          string base_name =
+              strings::StrCat(node_->name(), "-", output->name(), "-", i);
+          string node_name =
+              AddPrefixToNodeName(base_name, kTransposeNCHWToNHWC, "-");
+          TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
+          TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes"));
+          AddNodeTranspose(
+              node_name, input, const_name, node_->attr().at("T").type(),
+              node_->attr().at("_output_shapes").list().shape(0), false);
+          input = node_name;
+          node_map_->AddOutput(node_->name(), node_name);
+          node_map_->AddOutput(node_name, output->name());
+        }
       }
-      TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
-      TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes"));
-      AddNodeTranspose(
-          node_name, node_->name(), const_name, node_->attr().at("T").type(),
-          node_->attr().at("_output_shapes").list().shape(0), false);
-      *it = node_name;
-      node_map_->UpdateOutput(node_->name(), output->name(), node_name);
-      node_map_->AddOutput(node_name, output->name());
+      node_map_->RemoveOutput(node_->name(), output->name());
     }
     return Status::OK();
   }
@@ -948,7 +934,7 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   }
 
   Status CustomizedProcessing() override {
-    string concat_const_name = GetOrAddNodeConcatConst();
+    string concat_const_name = AddNodeConcatConst()->name();
     node_map_->AddOutput(concat_const_name, node_->name());
     *node_->mutable_input(axis_node_pos_) = concat_const_name;
     return Status::OK();
@@ -956,8 +942,14 @@ class ConcatProcessor : public AgnosticNodeProcessor {
 
   bool IsAlongDimC() const {
     auto axis_node = node_map_->GetNode(node_->input(axis_node_pos_));
+    if (!IsConstant(*axis_node)) {
+      return false;
+    }
     if (axis_node->attr().find("value") != axis_node->attr().end()) {
-      return axis_node->attr().at("value").tensor().int_val(0) == 3;
+      auto tensor = axis_node->attr().at({"value"}).tensor();
+      if (tensor.tensor_shape().dim_size() == 0 && tensor.int_val_size() == 1) {
+        return tensor.int_val(0) == 3;
+      }
     }
     return false;
   }
@@ -965,28 +957,18 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   int axis_node_pos_;
 
  private:
-  NodeDef* AddNodeConcatConst(const string& suffix, const string& depended_node,
-                              const string& device) {
-    auto const_node = AddNodeConstScalar(
-        strings::StrCat(kConcatConst, "-", suffix), device, DT_INT32, 1);
-    // This is to ensure the concat node and the const node are
-    // in the same frame.
-    *const_node->add_input() = AsControlDependency(depended_node);
-    return const_node;
-  }
-
-  string GetOrAddNodeConcatConst() {
-    string const_name;
-    if (is_in_frame_) {
-      int value_node_pos = (axis_node_pos_ == 0) ? 1 : 0;
-      auto const_node = AddNodeConcatConst(
-          node_->name(), NodeName(node_->input(value_node_pos)),
-          node_->device());
-      const_name = const_node->name();
-    } else {
-      const_name = kConcatConst;
-    }
-    return const_name;
+  NodeDef* AddNodeConcatConst() {
+    auto axis_node = node_map_->GetNode(node_->input(axis_node_pos_));
+    // We created a copy of the node, so that we don't modify the original node,
+    // which might be used elsewhere. Note that this copy also copies the
+    // control dependency input in the case this node is inside a loop,
+    // to ensure added_node is in the same frame with node_.
+    auto added_node = graph_->add_node();
+    *added_node = *axis_node;
+    added_node->set_name(strings::StrCat(kConcatConst, "-", node_->name()));
+    added_node->mutable_attr()->at({"value"}).mutable_tensor()->set_int_val(0,
+                                                                            1);
+    return added_node;
   }
 };
 
@@ -1036,6 +1018,16 @@ class SplitProcessor : public AgnosticNodeProcessor {
     return input_pos;
   }
 
+  std::set<int> GetOutputPos() const override {
+    std::set<int> output_pos{0};
+    if (HasAttribute(*node_, "num_split").ok()) {
+      for (int i = 1; i < node_->attr().at("num_split").i(); i++) {
+        output_pos.insert(i);
+      }
+    }
+    return output_pos;
+  }
+
   Status CustomizedProcessing() override {
     string split_const_name = AddNodeSplitConst()->name();
     node_map_->AddOutput(split_const_name, node_->name());
@@ -1073,7 +1065,7 @@ class SplitProcessor : public AgnosticNodeProcessor {
     // We created a copy of the node, so that we don't modify the original node,
     // which might be used elsewhere. Note that this copy also copies the
     // control dependency input in the case this node is inside a loop,
-    // to ensure added_node is in the same frame with the Split node.
+    // to ensure added_node is in the same frame with node_.
     NodeDef* added_node = graph_->add_node();
     *added_node = *dim_node;
     added_node->set_name(strings::StrCat(kSplitConst, "-", node_->name()));
@@ -1329,20 +1321,21 @@ class SumProcessor : public AgnosticNodeProcessor {
 
   Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
 
-  Status CustomizedProcessing() override {
-    node_map_->AddOutput(kReductionConst, node_->name());
-    *node_->mutable_input(1) = GetOrAddNodeReductionConst();
-    return Status::OK();
-  }
+  Status CustomizedProcessing() override { return UpdateAttrValueOfInput(1); }
 
  private:
   bool IsAlongDimNHW() const {
-    NodeDef* node = node_map_->GetNode(node_->input(1));
+    NodeDef* reduction_indices = node_map_->GetNode(node_->input(1));
+    if (!IsConstant(*reduction_indices)) {
+      return false;
+    }
     Tensor tensor;
-    if (node->attr().find({"value"}) == node->attr().end()) {
+    if (reduction_indices->attr().find({"value"}) ==
+        reduction_indices->attr().end()) {
       return false;
     }
-    auto success = tensor.FromProto(node->attr().at({"value"}).tensor());
+    auto success =
+        tensor.FromProto(reduction_indices->attr().at({"value"}).tensor());
     if (!success) {
       LOG(ERROR) << "Failed to parse TensorProto.";
       return false;
@@ -1356,29 +1349,6 @@ class SumProcessor : public AgnosticNodeProcessor {
     }
     return false;
   }
-
-  NodeDef* AddNodeReductionConst(const string& suffix,
-                                 const string& depended_node,
-                                 const string& device) {
-    auto const_node = GraphProcessor::AddNodeReductionConst(
-        strings::StrCat(kReductionConst, "-", suffix), device);
-    // This is to ensure the Sum node and the const node are in the
-    // same frame.
-    *const_node->add_input() = AsControlDependency(depended_node);
-    return const_node;
-  }
-
-  string GetOrAddNodeReductionConst() {
-    string const_name;
-    if (is_in_frame_) {
-      auto const_node = AddNodeReductionConst(
-          node_->name(), NodeName(node_->input(0)), node_->device());
-      const_name = const_node->name();
-    } else {
-      const_name = kReductionConst;
-    }
-    return const_name;
-  }
 };
 
 class DataLayoutOptimizer : GraphProcessor {
@@ -1409,18 +1379,10 @@ class DataLayoutOptimizer : GraphProcessor {
     return AddNodePermConst(kPermNCHWToNHWC, "", {0, 2, 3, 1});
   }
 
-  NodeDef* AddNodeConcatConst() {
-    return AddNodeConstScalar(kConcatConst, "", DT_INT32, 1);
-  }
-
   NodeDef* AddNodeGatherAxisConst() {
     return AddNodeConstScalar(kGatherAxisConst, "", DT_INT32, 0);
   }
 
-  NodeDef* AddNodeReductionConst() {
-    return GraphProcessor::AddNodeReductionConst(kReductionConst, "");
-  }
-
   // Expand all nodes which is in NHWC, but supports NCHW or is layout agnostic.
   Status Expand() {
     int node_size_original = graph_->node_size();
@@ -1474,9 +1436,7 @@ class DataLayoutOptimizer : GraphProcessor {
     if (graph_->node_size() > node_size_original) {
       NodeDef* n = AddNodePermNHWCToNCHW();
       n = AddNodePermNCHWToNHWC();
-      n = AddNodeConcatConst();
       n = AddNodeGatherAxisConst();
-      n = AddNodeReductionConst();
       std::set<string> ops_format_agnostic = GetOpsFormatAgnostic();
       for (int i = 0; i < graph_->node_size(); i++) {
         if (ops_format_agnostic.find(graph_->node(i).op()) !=
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 8c89f6744b..e8f7b8ac3c 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -495,7 +495,80 @@ TEST_F(LayoutOptimizerTest, SplitNonConstDim) {
   auto split_node = node_map.GetNode("split");
   EXPECT_EQ(split_node->input(0), "i1");
   EXPECT_EQ(split_node->input(1),
-            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-split");
+            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-split-1");
+}
+
+TEST_F(LayoutOptimizerTest, SplitSamePortToMultipleInputsOfSameNode) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto axis = ops::Const(s.WithOpName("axis"), 3);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat =
+      ops::Concat(s.WithOpName("concat"), {split[1], split[1], split[1]}, axis);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split:1");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "split:1");
+  EXPECT_EQ(concat_node->input(3), "LayoutOptimizerConcatConst-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerConcatConst-concat");
+  EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1);
+}
+
+TEST_F(LayoutOptimizerTest, Concat) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto axis = ops::Const(s.WithOpName("axis"), 3);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerConcatConst-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerConcatConst-concat");
+  EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1);
+}
+
+TEST_F(LayoutOptimizerTest, Sum) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto reduction_indices =
+      ops::Const(s.WithOpName("reduction_indices"), {0, 1, 2}, {3});
+  auto sum = ops::Sum(s.WithOpName("sum"), conv, reduction_indices);
+  auto o = ops::Identity(s.WithOpName("o"), sum);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  // TODO(yaozhang): enable SumProcessor with auto-tuning. Currently disabled
+  // because of the worse performance in some cases.
+  /*
+  NodeMap node_map(&output);
+  auto sum_node = node_map.GetNode("sum");
+  EXPECT_EQ(sum_node->input(0), "Conv2D");
+  EXPECT_EQ(sum_node->input(1), "LayoutOptimizer-sum-reduction_indices");
+  auto sum_const = node_map.GetNode("LayoutOptimizer-sum-reduction_indices");
+  Tensor tensor;
+  EXPECT_TRUE(
+      tensor.FromProto(sum_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor tensor_expected(DT_INT32, {3});
+  test::FillValues<int>(&tensor_expected, {0, 2, 3});
+  test::ExpectTensorEqual<int>(tensor_expected, tensor);
+  */
 }
 
 }  // namespace
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 626e0502cb..50735fb567 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -190,7 +190,7 @@ class LayoutOptimizerTest(test.TestCase):
       self.assertEqual(expected_num_transposes, num_transposes)
       self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
                     nodes)
-      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Relu_1-MaxPool_1',
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Relu_1-MaxPool_1-0',
                     nodes)
 
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
-- 
GitLab


From 1d0b07351d901334b33565595d4c23607f11cc27 Mon Sep 17 00:00:00 2001
From: Christopher Olston <olston@google.com>
Date: Wed, 29 Nov 2017 13:58:55 -0800
Subject: [PATCH 0430/1225] Add a way to query a batch scheduler to determine
 the max task size.

A layer on top of the batcher could use this interface to pre-split large tasks that exceed the max batch size.

PiperOrigin-RevId: 177359263
---
 .../contrib/batching/adaptive_shared_batch_scheduler.h       | 2 ++
 .../contrib/batching/adaptive_shared_batch_scheduler_test.cc | 1 +
 tensorflow/contrib/batching/basic_batch_scheduler.h          | 4 ++++
 tensorflow/contrib/batching/basic_batch_scheduler_test.cc    | 1 +
 tensorflow/contrib/batching/batch_scheduler.h                | 4 ++++
 tensorflow/contrib/batching/shared_batch_scheduler.h         | 5 +++++
 tensorflow/contrib/batching/shared_batch_scheduler_test.cc   | 1 +
 7 files changed, 18 insertions(+)

diff --git a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
index 6ed177e001..9e32bee505 100644
--- a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
+++ b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
@@ -208,6 +208,8 @@ class ASBSQueue : public BatchScheduler<TaskType> {
   // place any more tasks in this batch.
   void ReleaseBatch(const ASBSBatch<TaskType>* batch);
 
+  size_t max_task_size() const override { return options_.max_batch_size; }
+
  private:
   std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>> scheduler_;
   const QueueOptions options_;
diff --git a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc
index a07cd6d834..e2aac54eeb 100644
--- a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc
+++ b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc
@@ -186,6 +186,7 @@ TEST(AdaptiveSharedBatchSchedulerTest, ObeysQueueOptions) {
     queue_options.max_enqueued_batches = 2;
     TF_ASSERT_OK(
         scheduler->AddQueue(queue_options, queue_0_callback, &queue_0));
+    EXPECT_EQ(10, queue_0->max_task_size());
     queue_options.max_batch_size = 0;
     // Queue must have max_batch_size > 0.
     EXPECT_FALSE(
diff --git a/tensorflow/contrib/batching/basic_batch_scheduler.h b/tensorflow/contrib/batching/basic_batch_scheduler.h
index 9d3805fbaf..91065db249 100644
--- a/tensorflow/contrib/batching/basic_batch_scheduler.h
+++ b/tensorflow/contrib/batching/basic_batch_scheduler.h
@@ -192,6 +192,10 @@ class BasicBatchScheduler : public BatchScheduler<TaskType> {
   size_t NumEnqueuedTasks() const override;
   size_t SchedulingCapacity() const override;
 
+  size_t max_task_size() const override {
+    return shared_scheduler_queue_->max_task_size();
+  }
+
  private:
   explicit BasicBatchScheduler(
       std::unique_ptr<BatchScheduler<TaskType>> shared_scheduler_queue);
diff --git a/tensorflow/contrib/batching/basic_batch_scheduler_test.cc b/tensorflow/contrib/batching/basic_batch_scheduler_test.cc
index e020301795..187823151c 100644
--- a/tensorflow/contrib/batching/basic_batch_scheduler_test.cc
+++ b/tensorflow/contrib/batching/basic_batch_scheduler_test.cc
@@ -73,6 +73,7 @@ TEST(BasicBatchSchedulerTest, Basic) {
     std::unique_ptr<BasicBatchScheduler<FakeTask>> scheduler;
     TF_ASSERT_OK(
         BasicBatchScheduler<FakeTask>::Create(options, callback, &scheduler));
+    EXPECT_EQ(10, scheduler->max_task_size());
     EXPECT_EQ(0, scheduler->NumEnqueuedTasks());
     EXPECT_EQ(3 * 10, scheduler->SchedulingCapacity());
     TF_ASSERT_OK(ScheduleTask(3, scheduler.get()));
diff --git a/tensorflow/contrib/batching/batch_scheduler.h b/tensorflow/contrib/batching/batch_scheduler.h
index a5072f439a..e18cf6c350 100644
--- a/tensorflow/contrib/batching/batch_scheduler.h
+++ b/tensorflow/contrib/batching/batch_scheduler.h
@@ -178,6 +178,10 @@ class BatchScheduler {
   // This method is useful for monitoring, or for guaranteeing a future slot in
   // the schedule (but being mindful about the caveats listed above).
   virtual size_t SchedulingCapacity() const = 0;
+
+  // Returns the maximum allowed size of tasks submitted to the scheduler. (This
+  // is typically equal to a configured maximum batch size.)
+  virtual size_t max_task_size() const = 0;
 };
 
 //////////
diff --git a/tensorflow/contrib/batching/shared_batch_scheduler.h b/tensorflow/contrib/batching/shared_batch_scheduler.h
index 41a3f99137..1d2158062e 100644
--- a/tensorflow/contrib/batching/shared_batch_scheduler.h
+++ b/tensorflow/contrib/batching/shared_batch_scheduler.h
@@ -248,6 +248,9 @@ class Queue {
   // BatchScheduler::SchedulingCapacity().
   size_t SchedulingCapacity() const;
 
+  // Returns the maximum allowed size of tasks submitted to the queue.
+  size_t max_task_size() const { return options_.max_batch_size; }
+
   // Called by a thread that is ready to process a batch, to request one from
   // this queue. Either returns a batch that is ready to be processed, or
   // nullptr if the queue declines to schedule a batch at this time. If it
@@ -338,6 +341,8 @@ class QueueHandle : public BatchScheduler<TaskType> {
   size_t NumEnqueuedTasks() const override;
   size_t SchedulingCapacity() const override;
 
+  size_t max_task_size() const override { return queue_->max_task_size(); }
+
  private:
   // The scheduler that owns 'queue_'.
   std::shared_ptr<SharedBatchScheduler<TaskType>> scheduler_;
diff --git a/tensorflow/contrib/batching/shared_batch_scheduler_test.cc b/tensorflow/contrib/batching/shared_batch_scheduler_test.cc
index 3e924ae5f1..3ac79a8fdc 100644
--- a/tensorflow/contrib/batching/shared_batch_scheduler_test.cc
+++ b/tensorflow/contrib/batching/shared_batch_scheduler_test.cc
@@ -429,6 +429,7 @@ TEST(SharedBatchSchedulerTest, ConstMethods) {
     queue_options.max_enqueued_batches = max_enqueued_batches;
     std::unique_ptr<BatchScheduler<FakeTask>> queue;
     TF_ASSERT_OK(scheduler->AddQueue(queue_options, callback, &queue));
+    EXPECT_EQ(2, queue->max_task_size());
     EXPECT_EQ(0, queue->NumEnqueuedTasks());
     EXPECT_EQ(max_enqueued_batches * 2, queue->SchedulingCapacity());
 
-- 
GitLab


From cb5a63d8d2b6e049a0a128ba47560f842497db8b Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Wed, 29 Nov 2017 14:01:29 -0800
Subject: [PATCH 0431/1225] Check when session cannot run because its graph was
 modified

With current tensorflow code, if user modifies
some operation after session.run() was called, this modification will
never make it to the C++ runtime and no errors will be raised leading
to silent wrong results.

This change adds checks for such cases when C API is enabled. We don't
change the code path for C API being disabled because C API should
be enabled by default soon.

PiperOrigin-RevId: 177359630
---
 tensorflow/c/c_api.cc                    |  38 +++++--
 tensorflow/c/c_api_internal.h            |  21 +++-
 tensorflow/c/python_api.cc               |  10 ++
 tensorflow/python/client/session_test.py | 133 +++++++++++++++++++++++
 4 files changed, 190 insertions(+), 12 deletions(-)

diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 4fb8ec8e4b..c8b4bfffd4 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -624,6 +624,23 @@ Status MessageToBuffer(const tensorflow::protobuf::Message& in,
   return Status::OK();
 }
 
+void RecordMutation(TF_Graph* graph, const TF_Operation& op,
+                    const char* mutation_type)
+    EXCLUSIVE_LOCKS_REQUIRED(graph->mu) {
+  // If any session has already run this node_id, mark this session as
+  // unrunnable.
+  for (auto it : graph->sessions) {
+    if (it.first->last_num_graph_nodes > op.node.id()) {
+      it.second = FailedPrecondition(
+          "Operation '", op.node.DebugString(), "' was changed by ",
+          mutation_type,
+          " after it was run by a session. Nodes can be mutated "
+          "only before they are executed by a session. Either don't modify "
+          "nodes after running them or create a new session.");
+    }
+  }
+}
+
 // Helpers for loading a TensorFlow plugin (a .so file).
 Status LoadLibrary(const char* library_filename, void** result,
                    const void** buf, size_t* len);
@@ -1744,7 +1761,6 @@ void TF_OperationToNodeDef(TF_Operation* oper, TF_Buffer* output_node_def,
 TF_Graph::TF_Graph()
     : graph(tensorflow::OpRegistry::Global()),
       refiner(graph.versions().producer(), graph.op_registry()),
-      num_sessions(0),
       delete_requested(false),
       parent(nullptr),
       parent_inputs(nullptr) {}
@@ -1754,7 +1770,7 @@ TF_Graph* TF_NewGraph() { return new TF_Graph; }
 void TF_DeleteGraph(TF_Graph* g) {
   g->mu.lock();
   g->delete_requested = true;
-  const bool del = g->num_sessions == 0;
+  const bool del = g->sessions.empty();
   g->mu.unlock();
   if (del) delete g;
 }
@@ -2324,11 +2340,12 @@ TF_Session* TF_NewSession(TF_Graph* graph, const TF_SessionOptions* opt,
   Session* session;
   status->status = NewSession(opt->options, &session);
   if (status->status.ok()) {
+    TF_Session* new_session = new TF_Session(session, graph);
     if (graph != nullptr) {
       mutex_lock l(graph->mu);
-      graph->num_sessions += 1;
+      graph->sessions[new_session] = Status::OK();
     }
-    return new TF_Session(session, graph);
+    return new_session;
   } else {
     DCHECK_EQ(nullptr, session);
     return nullptr;
@@ -2392,7 +2409,7 @@ TF_Session* TF_LoadSessionFromSavedModel(
 
   TF_Session* session = new TF_Session(bundle.session.release(), graph);
 
-  graph->num_sessions += 1;
+  graph->sessions[session] = Status::OK();
   session->last_num_graph_nodes = graph->graph.num_node_ids();
   return session;
 #endif  // __ANDROID__
@@ -2407,8 +2424,8 @@ void TF_DeleteSession(TF_Session* s, TF_Status* status) {
   TF_Graph* const graph = s->graph;
   if (graph != nullptr) {
     graph->mu.lock();
-    graph->num_sessions -= 1;
-    const bool del = graph->delete_requested && graph->num_sessions == 0;
+    graph->sessions.erase(s);
+    const bool del = graph->delete_requested && graph->sessions.empty();
     graph->mu.unlock();
     if (del) delete graph;
   }
@@ -2424,6 +2441,13 @@ static bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status) {
     mutex_lock session_lock(session->mu);
     session->graph->mu.lock();
     const Graph& graph = session->graph->graph;
+
+    status->status = session->graph->sessions[session];
+    if (!status->status.ok()) {
+      session->graph->mu.unlock();
+      return false;
+    }
+
     const auto num_nodes = graph.num_node_ids();
     if (session->last_num_graph_nodes < num_nodes) {
       status->status = tensorflow::ValidateNoCycles(session->graph->graph);
diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h
index bb04e01bee..aac333d9e2 100644
--- a/tensorflow/c/c_api_internal.h
+++ b/tensorflow/c/c_api_internal.h
@@ -81,12 +81,20 @@ struct TF_Graph {
   std::unordered_map<tensorflow::string, tensorflow::Node*> name_map
       GUARDED_BY(mu);
 
-  // TF_Graph may only / must be deleted when
-  //   num_sessions == 0 && delete_requested == true
-
-  // num_sessions incremented by TF_NewSession, and decremented by
+  // The keys of this map are all the active sessions using this graph.
+  // Each value is the current "runnability" status of the corresponding
+  // session. Under normal conditions all statuses are Status::OK(), but
+  // if some operation is mutated after it was run by a session (this
+  // is detected in RecordMutation function), that session is no longer
+  // safe to run. Its status will contain the error that will be returned
+  // to the user, should she try running this session.
+  //
+  // Sessions are added to this map in TF_NewSession, and removed in
   // TF_DeleteSession.
-  int num_sessions GUARDED_BY(mu);
+  // TF_Graph may only / must be deleted when
+  //   sessions.size() == 0 && delete_requested == true
+  tensorflow::gtl::FlatMap<TF_Session*, tensorflow::Status> sessions
+      GUARDED_BY(mu);
   bool delete_requested GUARDED_BY(mu);  // set true by TF_DeleteGraph
 
   // Used to link graphs contained in TF_WhileParams to the parent graph that
@@ -167,6 +175,9 @@ TF_Tensor* TF_TensorFromTensor(const Tensor& src, TF_Status* status);
 
 Status MessageToBuffer(const tensorflow::protobuf::Message& in, TF_Buffer* out);
 
+void RecordMutation(TF_Graph* graph, const TF_Operation& op,
+                    const char* mutation_type);
+
 }  // end namespace tensorflow
 
 #endif  // TENSORFLOW_C_C_API_INTERNAL_H_
diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc
index ba5a9268b4..37629a74ba 100644
--- a/tensorflow/c/python_api.cc
+++ b/tensorflow/c/python_api.cc
@@ -22,6 +22,7 @@ namespace tensorflow {
 void AddControlInput(TF_Graph* graph, TF_Operation* op, TF_Operation* input) {
   mutex_lock l(graph->mu);
   graph->graph.AddControlEdge(&input->node, &op->node);
+  RecordMutation(graph, *op, "adding control input");
 }
 
 void SetAttr(TF_Graph* graph, TF_Operation* op, const char* attr_name,
@@ -36,11 +37,13 @@ void SetAttr(TF_Graph* graph, TF_Operation* op, const char* attr_name,
 
   mutex_lock l(graph->mu);
   op->node.AddAttr(attr_name, attr_val);
+  RecordMutation(graph, *op, "setting attribute");
 }
 
 void SetRequestedDevice(TF_Graph* graph, TF_Operation* op, const char* device) {
   mutex_lock l(graph->mu);
   op->node.set_requested_device(device);
+  RecordMutation(graph, *op, "setting device");
 }
 
 void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
@@ -75,6 +78,13 @@ void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
   }
   status->status = graph->graph.UpdateEdge(&new_src.oper->node, new_src.index,
                                            &dst.oper->node, dst.index);
+
+  if (status->status.ok()) {
+    // This modification only updates the destination node for
+    // the purposes of running this graph in a session. Thus, we don't
+    // record the source node as being modified.
+    RecordMutation(graph, *dst.oper, "updating input tensor");
+  }
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index f4b0271195..e4545d287b 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -28,6 +28,8 @@ import numpy as np
 import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.core.framework import attr_value_pb2
+from tensorflow.core.framework import types_pb2
 from tensorflow.core.lib.core import error_codes_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
@@ -1742,5 +1744,136 @@ class SessionTest(test_util.TensorFlowTestCase):
     self.runTestAddFunctionToSession(server.target)
 
 
+class GraphMutationTest(test_util.TensorFlowTestCase):
+
+  def testUpdateInputAfterRunning(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+    with session.Session(graph=g) as sess:
+      self.assertAllEqual(3.0, sess.run(c))
+      c.op._update_input(1, a)  # pylint: disable=protected-access
+      with self.assertRaisesRegexp(
+          errors.FailedPreconditionError,
+          'add.*was changed by updating input tensor after it was run'):
+        sess.run(c)
+
+      # Check that running the graph with a new session is fine
+      with session.Session(graph=g) as sess2:
+        self.assertAllEqual(2.0, sess2.run(c))
+
+  def testSetDeviceAfterRunning(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+    with session.Session(graph=g) as sess:
+      self.assertAllEqual(3.0, sess.run(c))
+      c.op._set_device('/cpu:0')  # pylint: disable=protected-access
+      with self.assertRaisesRegexp(
+          errors.FailedPreconditionError,
+          'add.*was changed by setting device after it was run'):
+        sess.run(c)
+
+  def testSetAttrAfterRunning(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(1.0, dtype=dtypes.float32)
+      b = math_ops.cast(a, dtypes.float64)
+
+    with session.Session(graph=g) as sess:
+      self.assertAllEqual(1.0, sess.run(b))
+      b.op._set_attr('DstT',
+                     attr_value_pb2.AttrValue(type=types_pb2.DT_FLOAT))
+      with self.assertRaisesRegexp(
+          errors.FailedPreconditionError,
+          'Cast.*was changed by setting attribute after it was run'):
+        sess.run(b)
+
+  def testRunModifyRun(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+      with session.Session(graph=g) as sess:
+        self.assertAllEqual(3.0, sess.run(c))
+
+        d = b + c
+        d.op._update_input(0, a)  # pylint: disable=protected-access
+        self.assertAllEqual(3.0, sess.run(c))
+        self.assertAllEqual(4.0, sess.run(d))
+
+  def testRunModifyRunTwoSessions(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+      with session.Session(graph=g) as sess1:
+        with session.Session(graph=g) as sess2:
+          self.assertAllEqual(3.0, sess1.run(c))
+          self.assertAllEqual(3.0, sess2.run(c))
+
+          d = b + c
+          d.op._update_input(0, a)  # pylint: disable=protected-access
+          self.assertAllEqual(3.0, sess2.run(c))
+          self.assertAllEqual(4.0, sess2.run(d))
+
+          d.op._update_input(0, b)  # pylint: disable=protected-access
+          self.assertAllEqual(3.0, sess1.run(c))
+          self.assertAllEqual(5.0, sess1.run(d))
+
+          with self.assertRaisesRegexp(
+              errors.FailedPreconditionError,
+              'add.*was changed by updating input tensor after it was run'):
+            sess2.run(c)
+
+  def testTwoSessionsOneRunBeforeModification(self):
+    with ops.Graph().as_default() as g, ops.device('/cpu:0'):
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+    with session.Session(graph=g) as sess1:
+      with session.Session(graph=g) as sess2:
+        sess1.run(c)
+
+        c.op._set_device('/cpu:0')  # pylint: disable=protected-access
+
+        with self.assertRaisesRegexp(
+            errors.FailedPreconditionError,
+            'add.*was changed by setting device after it was run'):
+          sess1.run(c)
+
+        # sess2 was not run before modification
+        self.assertAllEqual(3.0, sess2.run(c))
+
+  def testTwoSessionsBothRunBeforeModification(self):
+    with ops.Graph().as_default() as g, ops.device('/cpu:0'):
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+    with session.Session(graph=g) as sess1:
+      with session.Session(graph=g) as sess2:
+        sess1.run(c)
+        sess2.run(c)
+
+        c.op._set_device('/cpu:0')  # pylint: disable=protected-access
+
+        with self.assertRaisesRegexp(
+            errors.FailedPreconditionError,
+            'add.*was changed by setting device after it was run'):
+          sess1.run(c)
+
+        with self.assertRaisesRegexp(
+            errors.FailedPreconditionError,
+            'add.*was changed by setting device after it was run'):
+          sess2.run(c)
+
+
 if __name__ == '__main__':
   googletest.main()
-- 
GitLab


From d0d85965f3dc92a1572bd0853526c657395dff99 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 29 Nov 2017 14:21:53 -0800
Subject: [PATCH 0432/1225] Add R1 slice tests.

PiperOrigin-RevId: 177362829
---
 tensorflow/compiler/xla/tests/slice_test.cc | 88 +++++++++++++++------
 1 file changed, 65 insertions(+), 23 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc
index c21124750a..981d075089 100644
--- a/tensorflow/compiler/xla/tests/slice_test.cc
+++ b/tensorflow/compiler/xla/tests/slice_test.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
 #include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -211,6 +212,13 @@ class SliceR1Test : public ClientLibraryTestBase,
   }
 };
 
+string SliceR1TestDataToString(const ::testing::TestParamInfo<R1Spec>& data) {
+  const R1Spec& spec = data.param;
+  return ::tensorflow::strings::Printf("%lld_%lld_%lld_%lld", spec.input_dim0,
+                                       spec.slice_start, spec.slice_limit,
+                                       spec.slice_stride);
+}
+
 XLA_TEST_P(SliceR1Test, DoIt_F32) { Run<float>(GetParam()); }
 
 XLA_TEST_P(SliceR1Test, DoIt_F64) { Run<double>(GetParam()); }
@@ -223,30 +231,64 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run<uint64>(GetParam()); }
 
 XLA_TEST_P(SliceR1Test, DoIt_S64) { Run<int64>(GetParam()); }
 
-INSTANTIATE_TEST_CASE_P(                          //
-    SliceR1TestInstantiation,                     //
-    SliceR1Test,                                  //
-    ::testing::Values(                            //
-        R1Spec{10, 0, 0, 1},                      //
-        R1Spec{10, 7, 7, 1},                      //
-        R1Spec{10, 2, 4, 1},                      //
-        R1Spec{10, 2, 4, 2},                      //
-        R1Spec{10, 0, 10, 1},                     //
-        R1Spec{1024, 1024 - 4, 1024, 1},          //
-        R1Spec{4096, 7, 7 + 1024, 1},             //
-        R1Spec{10, 0, 10, 2},                     //
-        R1Spec{10, 0, 10, 3},                     //
-        R1Spec{10, 0, 10, 4},                     //
-        R1Spec{10, 0, 10, 5},                     //
-        R1Spec{10, 0, 10, 10},                    //
-        R1Spec{500, 200, 400, 7},                 //
-        R1Spec{4096, 1, 4095, 3},                 //
-        R1Spec{2047, 1024 - 24, 1024 + 160, 31},  //
-        R1Spec{2047, 1, 2046, 3 * 128},           //
-        R1Spec{4096, 1024 + 3, 4095, 500},        //
-        R1Spec{8192, 0, 8192, 1024 * 3 + 400}     //
-        )                                         //
+// Tests for R1 slice ops.
+// The format for each testcase is {input size, start, limit, stride}.
+// clang-format off
+INSTANTIATE_TEST_CASE_P(
+    SliceR1TestInstantiation,
+    SliceR1Test,
+    ::testing::Values(
+        R1Spec{10, 0, 0, 1},
+        R1Spec{10, 7, 7, 1},
+        R1Spec{10, 0, 5, 1},
+        R1Spec{10, 3, 5, 1},
+        R1Spec{10, 0, 10, 1},
+        R1Spec{1024, 0, 5, 1},
+        R1Spec{1024, 3, 5, 1},
+        R1Spec{1024 + 17, 0, 5, 1},
+        R1Spec{1024 + 17, 3, 5, 1},
+        R1Spec{1024 + 17, 1024, 1024 + 6, 1},
+        R1Spec{1024 + 17, 1024 + 1, 1024 + 6, 1},
+        R1Spec{1024, 1024 - 4, 1024, 1},
+        R1Spec{4 * 1024, 7, 7 + 1024, 1},
+        R1Spec{4 * 1024, 0, 4 * 1024, 1},
+        R1Spec{4 * 1024, 1, 4 * 1024 - 1, 1},
+        R1Spec{4 * 1024, 1024, 3 * 1024, 1},
+        R1Spec{4 * 1024, 1024 + 1, 3 * 1024 - 1, 1},
+        R1Spec{16 * 1024, 0, 5, 1},
+        R1Spec{16 * 1024, 3, 5, 1},
+        R1Spec{16 * 1024 + 17, 0, 5, 1},
+        R1Spec{16 * 1024 + 17, 3, 5, 1},
+        R1Spec{16 * 1024 + 17, 16 * 1024, 16 * 1024 + 6, 1},
+        R1Spec{16 * 1024 + 17, 16 * 1024 + 1, 16 * 1024 + 6, 1},
+        R1Spec{64 * 1024, 0, 64 * 1024, 1},
+        R1Spec{64 * 1024, 1, 64 * 1024 - 1, 1},
+        R1Spec{64 * 1024, 1024, 63 * 1024, 1},
+        R1Spec{64 * 1024, 1024 + 1, 63 * 1024 - 1, 1},
+        R1Spec{64 * 1024, 32 * 1024, 33 * 1024, 1},
+        R1Spec{64 * 1024, 32 * 1024 + 1, 33 * 1024 - 1, 1},
+// TODO(b/69425338): This uses too much memory on GPU.
+#ifndef XLA_TEST_BACKEND_GPU
+        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1},
+        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1},
+        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1},
+#endif
+        R1Spec{10, 2, 4, 2},
+        R1Spec{10, 0, 10, 2},
+        R1Spec{10, 0, 10, 3},
+        R1Spec{10, 0, 10, 4},
+        R1Spec{10, 0, 10, 5},
+        R1Spec{10, 0, 10, 10},
+        R1Spec{500, 200, 400, 7},
+        R1Spec{4096, 1, 4095, 3},
+        R1Spec{2047, 1024 - 24, 1024 + 160, 31},
+        R1Spec{2047, 1, 2046, 3 * 128},
+        R1Spec{4096, 1024 + 3, 4095, 500},
+        R1Spec{8192, 0, 8192, 1024 * 3 + 400}
+        ),
+    SliceR1TestDataToString
 );
+// clang-format on
 
 struct R2Spec {
   int64 input_dim0;
-- 
GitLab


From aeba52380f1b3bdf2ff9bd2256129e209bab08ca Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 29 Nov 2017 14:48:23 -0800
Subject: [PATCH 0433/1225] Updating references to the `tf.data` API to
 `tf.data` from `Datasets`.

PiperOrigin-RevId: 177367024
---
 tensorflow/docs_src/programmers_guide/datasets.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md
index f458cbcef2..073bdb7baa 100644
--- a/tensorflow/docs_src/programmers_guide/datasets.md
+++ b/tensorflow/docs_src/programmers_guide/datasets.md
@@ -1,16 +1,16 @@
 # Importing Data
 
-The @{tf.data.Dataset$`Dataset`} API enables you to build complex input pipelines from
+The `tf.data` API enables you to build complex input pipelines from
 simple, reusable pieces. For example, the pipeline for an image model might
 aggregate data from files in a distributed file system, apply random
 perturbations to each image, and merge randomly selected images into a batch
 for training. The pipeline for a text model might involve extracting symbols
 from raw text data, converting them to embedding identifiers with a lookup
-table, and batching together sequences of different lengths. The `Dataset` API
+table, and batching together sequences of different lengths. The `tf.data` API
 makes it easy to deal with large amounts of data, different data formats, and
 complicated transformations.
 
-The `Dataset` API introduces two new abstractions to TensorFlow:
+The `tf.data` API introduces two new abstractions to TensorFlow:
 
 * A `tf.data.Dataset` represents a sequence of elements, in which
   each element contains one or more `Tensor` objects. For example, in an image
@@ -121,7 +121,7 @@ dataset3 = dataset3.filter(lambda x, (y, z): ...)
 ### Creating an iterator
 
 Once you have built a `Dataset` to represent your input data, the next step is to
-create an `Iterator` to access elements from that dataset.  The `Dataset` API
+create an `Iterator` to access elements from that dataset.  The `tf.data` API
 currently supports the following iterators, in increasing level of
 sophistication:
 
@@ -379,7 +379,7 @@ sess.run(iterator.initializer, feed_dict={features_placeholder: features,
 
 ### Consuming TFRecord data
 
-The `Dataset` API supports a variety of file formats so that you can process
+The `tf.data` API supports a variety of file formats so that you can process
 large datasets that do not fit in memory. For example, the TFRecord file format
 is a simple record-oriented binary format that many TensorFlow applications use
 for training data. The `tf.data.TFRecordDataset` class enables you to
@@ -628,7 +628,7 @@ TODO(mrry): Add this section.
 
 ### Processing multiple epochs
 
-The `Dataset` API offers two main ways to process multiple epochs of the same
+The `tf.data` API offers two main ways to process multiple epochs of the same
 data.
 
 The simplest way to iterate over a dataset in multiple epochs is to use the
@@ -693,7 +693,7 @@ dataset = dataset.repeat()
 The @{tf.train.MonitoredTrainingSession} API simplifies many aspects of running
 TensorFlow in a distributed setting. `MonitoredTrainingSession` uses the
 @{tf.errors.OutOfRangeError} to signal that training has completed, so to use it
-with the `Dataset` API, we recommend using
+with the `tf.data` API, we recommend using
 `Dataset.make_one_shot_iterator()`. For example:
 
 ```python
-- 
GitLab


From 32b861d3d4f920b46954a2e02aee1fbf46a81c63 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Wed, 29 Nov 2017 15:17:06 -0800
Subject: [PATCH 0434/1225] Automated g4 rollback of changelist 177353959

PiperOrigin-RevId: 177371177
---
 tensorflow/python/client/tf_session.i         | 43 --------------
 tensorflow/python/client/tf_session_helper.cc | 19 -------
 tensorflow/python/client/tf_session_helper.h  | 14 -----
 tensorflow/python/framework/ops.py            | 57 ++++++-------------
 4 files changed, 16 insertions(+), 117 deletions(-)

diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index c286d5fe47..5fa1a7e8fc 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -532,49 +532,6 @@ def TF_Reset(target, containers=None, config=None):
 %unignore TF_GraphGetTensorShapeHelper;
 %ignore TF_GraphGetTensorShape;
 
-// We use TF_GraphSetTensorShape_wrapper instead of
-// TF_GraphSetTensorShape
-%ignore TF_GraphSetTensorShape;
-%unignore tensorflow;
-%unignore TF_GraphSetTensorShape_wrapper;
-
-// $input is a Python list of ints to a vector<int> for TF_GraphSetTensorShape_wrapper
-%typemap(in) (const std::vector<int64_t>& dims)
-    (std::vector<int64_t> dims_local){
-  if ($input != Py_None) {
-    if (!PyList_Check($input)) {
-      SWIG_exception_fail(SWIG_TypeError, tensorflow::strings::Printf(
-              "$symname: expected list but got %s ", Py_TYPE($input)->tp_name).c_str());
-    }
-    size_t size = PyList_Size($input);
-    for (int i = 0; i < size; ++i) {
-      PyObject* item = PyList_GetItem($input, i);
-      dims_local.push_back(PyInt_AS_LONG(item));
-    }
-    $1 = &dims_local;
-  } else {
-    $1 = nullptr;
-  }
-}
-
-// We use TF_GraphGetTensorShape_wrapper instead of
-// TF_GraphGetTensorShape
-%ignore TF_GraphGetTensorShape;
-%unignore tensorflow;
-%unignore TF_GraphGetTensorShape_wrapper;
-
-// Build a Python list of ints and return it.
-%typemap(out) std::vector<int64_t> tensorflow::TF_GraphGetTensorShape_wrapper {
-  $result = PyList_New($1.size());
-  if (!$result) {
-    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
-  }
-
-  for (size_t i = 0; i < $1.size(); ++i) {
-    PyList_SET_ITEM($result, i, PyInt_FromLong($1[i]));
-  }
-}
-
 %include "tensorflow/python/client/tf_session_helper.h"
 
 %unignoreall
diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index e4bf09a0ca..ad982e5dd8 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc
@@ -407,23 +407,4 @@ TF_Function* TF_GraphToFunction_wrapper(
                             opts, description, out_status);
 }
 
-void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output,
-                                    const std::vector<int64_t>& dims,
-                                    bool unknown_shape, TF_Status* status) {
-  if (unknown_shape) {
-    TF_GraphSetTensorShape(graph, output, nullptr, -1, status);
-    return;
-  }
-  TF_GraphSetTensorShape(graph, output, dims.data(), dims.size(), status);
-}
-
-std::vector<int64_t> TF_GraphGetTensorShape_wrapper(TF_Graph* graph,
-                                                    TF_Output output,
-                                                    int num_dims,
-                                                    TF_Status* status) {
-  std::vector<int64_t> dims(num_dims);
-  TF_GraphGetTensorShape(graph, output, dims.data(), num_dims, status);
-  return dims;
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h
index bb7171db31..6ed08d3a58 100644
--- a/tensorflow/python/client/tf_session_helper.h
+++ b/tensorflow/python/client/tf_session_helper.h
@@ -168,20 +168,6 @@ TF_Function* TF_GraphToFunction_wrapper(
     const std::vector<TF_Output>& inputs, const std::vector<TF_Output>& outputs,
     const NameVector& output_names, const TF_FunctionOptions* opts,
     const char* description, TF_Status* out_status);
-
-// Set the shape of output. If unknown is true, `num_dims` must be set to
-// -1 and `dims` is set to nullptr.
-void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output,
-                                    const std::vector<int64_t>& dims,
-                                    bool unknown_shape, TF_Status* status);
-
-// Return the shape of output. `num_dims` should be the output of
-// TF_GraphGetTensorNumDims. If `num_dims = -1`, this should not be called.
-std::vector<int64_t> TF_GraphGetTensorShape_wrapper(TF_Graph* graph,
-                                                    TF_Output output,
-                                                    int num_dims,
-                                                    TF_Status* status);
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 975a1c87ec..2217513966 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -374,19 +374,6 @@ class Tensor(_TensorLike):
       A `TensorShape` representing the shape of this tensor.
 
     """
-    if _USE_C_API:
-      graph = self._op._graph._c_graph  # pylint: disable=protected-access
-      with errors.raise_exception_on_not_ok_status() as status:
-        num_dims = c_api.TF_GraphGetTensorNumDims(graph, self._as_tf_output(),
-                                                  status)
-      if num_dims == -1:
-        dim_list = None
-      else:
-        with errors.raise_exception_on_not_ok_status() as status:
-          dim_list = c_api.TF_GraphGetTensorShape_wrapper(
-              graph, self._as_tf_output(), num_dims, status)
-        dim_list = [None if i == -1 else i for i in dim_list]
-      return tensor_shape.TensorShape(dim_list)
     return self._shape
 
   def __iter__(self):
@@ -406,8 +393,8 @@ class Tensor(_TensorLike):
       yield self[i]
 
   def _shape_as_list(self):
-    if self.shape.ndims is not None:
-      return [dim.value for dim in self.shape.dims]
+    if self._shape.ndims is not None:
+      return [dim.value for dim in self._shape.dims]
     else:
       return None
 
@@ -423,7 +410,7 @@ class Tensor(_TensorLike):
     Returns:
       Integer rank or None
     """
-    return self.shape.ndims
+    return self._shape.ndims
 
   def get_shape(self):
     """Alias of Tensor.shape."""
@@ -454,35 +441,14 @@ class Tensor(_TensorLike):
     ```
 
     Args:
-      shape: A `TensorShape` representing the shape of this tensor, a
-      `TensorShapeProto`, a list, a tuple, or None.
+      shape: A `TensorShape` representing the shape of this tensor.
 
     Raises:
       ValueError: If `shape` is not compatible with the current shape of
         this tensor.
     """
-    if not _USE_C_API:
-      self._shape = self._shape.merge_with(shape)  # pylint: disable=protected-access
-      return
-    if not isinstance(shape, tensor_shape.TensorShape):
-      shape = tensor_shape.TensorShape(shape)
-    dim_list = []
-    if shape.dims is None:
-      unknown_shape = True
-    else:
-      unknown_shape = False
-      for dim in shape.dims:
-        if dim.value is None:
-          dim_list.append(-1)
-        else:
-          dim_list.append(dim.value)
-    with errors.raise_exception_on_not_ok_status() as status:
-      c_api.TF_GraphSetTensorShape_wrapper(
-          self._op._graph._c_graph,  # pylint: disable=protected-access
-          self._as_tf_output(),
-          dim_list,
-          unknown_shape,
-          status)
+    # TODO(skyewm): call C API
+    self._shape = self._shape.merge_with(shape)
 
   @property
   def value_index(self):
@@ -4551,11 +4517,15 @@ def control_dependencies(control_inputs):
   See @{tf.Graph.control_dependencies}
   for more details.
 
+  When eager execution is enabled, any callable object in the `control_inputs`
+  list will be called.
+
   Args:
     control_inputs: A list of `Operation` or `Tensor` objects which
       must be executed or computed before running the operations
       defined in the context.  Can also be `None` to clear the control
-      dependencies.
+      dependencies. If eager execution is enabled, any callable object in the
+      `control_inputs` list will be called.
 
   Returns:
    A context manager that specifies control dependencies for all
@@ -4564,6 +4534,11 @@ def control_dependencies(control_inputs):
   if context.in_graph_mode():
     return get_default_graph().control_dependencies(control_inputs)
   else:
+    if control_inputs:
+      # Excute any pending callables.
+      for control in control_inputs:
+        if callable(control):
+          control()
     return _NullContextmanager()
 
 
-- 
GitLab


From 963a521e255d2a189e349fc5c24ebc2bc032be5b Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Wed, 29 Nov 2017 15:46:42 -0800
Subject: [PATCH 0435/1225] Using the C API in eager mode for graph functions.

PiperOrigin-RevId: 177375237
---
 tensorflow/python/eager/backprop.py           |   2 +-
 tensorflow/python/eager/context.py            |  15 ++
 tensorflow/python/eager/function.py           | 144 ++++++++++++------
 tensorflow/python/eager/graph_callable.py     |  18 ++-
 .../python/eager/graph_callable_test.py       |   1 -
 tensorflow/python/framework/ops.py            |  30 ++--
 tensorflow/python/pywrap_tfe.i                |   3 +-
 7 files changed, 143 insertions(+), 70 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 0144f3b1e5..dc1142705a 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -540,7 +540,7 @@ def _ensure_unique_tensor_objects(parameter_positions, args):
     if i in parameter_positions:
       tid = ops.tensor_id(t)
       if tid in s:
-        args[i] = args[i]._dup()  # pylint: disable=protected-access
+        args[i] = gen_array_ops.identity(args[i])
       else:
         s.add(tid)
   return args
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 92f4e15c05..415416cfae 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -288,6 +288,21 @@ class Context(object):
     self._initialize_handle_and_devices()
     return self._num_gpus
 
+  def add_function(self, fn):
+    """Add a function definition to the context.
+
+    Once added, the function (identified by its name) can be executed like any
+    other operation.
+
+    Args:
+      fn: A wrapped TF_Function (returned from TF_GraphToFunction_wrapper).
+    """
+    with errors.raise_exception_on_not_ok_status() as status:
+      pywrap_tensorflow.TFE_ContextAddFunction(
+          self._handle,  # pylint: disable=protected-access
+          fn,
+          status)
+
   def add_function_def(self, fdef):
     """Add a function definition to the context.
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 2f4b59e938..092b36ff20 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -25,15 +25,19 @@ import threading
 
 import numpy as np
 
+from tensorflow.core.framework import function_pb2
+from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.eager import execute
 from tensorflow.python.eager import tape
 from tensorflow.python.eager.graph_only_ops import graph_placeholder
+from tensorflow.python.framework import c_api_util
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_module
-from tensorflow.python.framework import graph_to_function_def
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gradients_impl
+from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 
@@ -47,10 +51,41 @@ _scoped_captures = threading.local()
 _scoped_captures.tensors = None
 
 
-def make_function_def(graph, operations, inputs, outputs):
-  """Makes function def from the given graph with the operations."""
-  return graph_to_function_def.graph_to_function_def(
-      graph, operations, inputs, outputs)
+def make_function_def(name, graph, operations, inputs, outputs):
+  """Makes FunctionDef proto and defined function.
+
+  Args:
+    name: the function name
+    graph: the graph from which to build the function
+    operations: the operations in the function body
+    inputs: tensors to be used as function arguments
+    outputs: tensors to be returned from the function
+
+  Returns:
+   fdef: a FunctionDef protocol buffer for the function
+   fn: a wrapped TF_Function for the function
+  """
+  with errors.raise_exception_on_not_ok_status() as status:
+    fn = pywrap_tensorflow.TF_GraphToFunction_wrapper(
+        graph._c_graph,  # pylint: disable=protected-access
+        compat.as_text(name),
+        False,
+        [o._c_op for o in operations],  # pylint: disable=protected-access
+        [t._as_tf_output() for t in inputs],  # pylint: disable=protected-access
+        [t._as_tf_output() for t in outputs],  # pylint: disable=protected-access
+        [compat.as_text("%s" % i) for i in range(len(outputs))],
+        None,
+        compat.as_text(""),
+        status)
+  # TODO(apassos) avoid creating a FunctionDef (specially to grab the signature,
+  # but also in general it's nice not to depend on it.
+  with c_api_util.tf_buffer() as buffer_:
+    with errors.raise_exception_on_not_ok_status() as status:
+      pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_, status)
+    proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
+  fdef = function_pb2.FunctionDef()
+  fdef.ParseFromString(compat.as_bytes(proto_data))
+  return fdef, fn
 
 
 @contextlib.contextmanager
@@ -115,6 +150,10 @@ class CapturingGraph(ops.Graph):
     # for resource tensors.
     self._last_op_using_resource_tensor = {}
 
+  # TODO(apassos) remove once the C API is used by default.
+  def _use_c_api_hack(self):
+    return True
+
   def clear_resource_control_flow_state(self):
     self._last_op_using_resource_tensor = {}
 
@@ -207,14 +246,20 @@ def _inference_name(n):
   return "__inference_%s_%s" % (n, ops.uid())
 
 
+# TODO(apassos) get rid of this by splitting framework.function._DefinedFunction
+# so it doesn't have the definition-generating logic and is just a container for
+# an already-defined function.
 class _DefinedFunction(object):
   """Mocks the interface of tf _DefinedFunction."""
 
-  def __init__(self, fdef):
+  def __init__(self, fdef, fn):
     self.definition = fdef
     self.name = fdef.signature.name
+    self.signature = fdef.signature
     self.grad_func_name = None
     self.python_grad_func = None
+    self._c_func = fn
+    self._grad_func = None
 
 
 def _map_sequence_obj_to_idx(sequence):
@@ -250,6 +295,7 @@ class GraphModeFunction(object):
                input_placeholders,
                extra_inputs,
                fdef,
+               fn,
                graph,
                operations,
                func_outputs,
@@ -263,7 +309,7 @@ class GraphModeFunction(object):
     self._graph = graph
     self._has_backprop = False
     self._func_name = fdef.signature.name
-    self._fdef = _DefinedFunction(fdef)
+    self._fdef = _DefinedFunction(fdef, fn)
     self._num_outputs = len(fdef.signature.output_arg)
     self._ops = operations
     self._func_outputs = func_outputs
@@ -283,38 +329,45 @@ class GraphModeFunction(object):
     with self._graph.as_default(), context.graph_mode():
       c = _CapturingContext()
       with c:
-        filtered_outputs = [
-            x for x in self._returns if x is not None
-        ]
+        filtered_outputs = [x for x in self._returns if x is not None]
         self._out_grad_placeholders = [
-            graph_placeholder(x.dtype, x.shape) for x in filtered_outputs
-        ]
+            graph_placeholder(x.dtype, x.shape) for x in filtered_outputs]
         in_gradients = gradients_impl.gradients(
             filtered_outputs,
             self._input_placeholders,
             grad_ys=self._out_grad_placeholders)
-        shapes = [x.shape for x in in_gradients if x is not None]
+        shapes = tuple(x.shape for x in in_gradients if x is not None)
     captures = list(sorted(c.captured_tensors, key=lambda x: x.name))
-    forward_function_def = make_function_def(
-        self._graph, self._ops, self._input_placeholders,
+    forward_name = _forward_name(self._func_name)
+    forward_function_def, forward_fn = make_function_def(
+        forward_name, self._graph, self._ops, self._input_placeholders,
         filtered_outputs + captures)
-    self._forward_fdef = _DefinedFunction(forward_function_def)
-    _register_with_name(_forward_name(self._func_name), forward_function_def)
-    backward_outputs = [x for x in in_gradients if x is not None]
+    self._forward_fdef = _DefinedFunction(forward_function_def, forward_fn)
+    _register(forward_fn)
+    backward_outputs = tuple(x for x in in_gradients if x is not None)
     all_inputs = self._out_grad_placeholders + captures
-    backward_function_def = make_function_def(
-        self._graph, [x.op for x in self._out_grad_placeholders
-                     ] + list(sorted(c.known_ops, key=lambda x: x.name)),
+    # Excluding input ops from the body as we do not intend to execute these
+    # operations when the function is executed.
+    all_ignored_ops = frozenset(x.op for x in all_inputs)
+    # Enforce a deterministic order of operations in the generated graph. This
+    # means rerunning the function-defining code will always define the same
+    # function, which is useful if we serialize this etc.
+    fdef_ops = tuple(x for x in sorted(c.known_ops, key=lambda x: x.name)
+                     if x not in all_ignored_ops)
+    bname = _backward_name(self._func_name)
+    backward_function_def, backward_fn = make_function_def(
+        bname, self._graph, fdef_ops,
         all_inputs, backward_outputs)
-    _register_with_name(_backward_name(self._func_name), backward_function_def)
+    _register(backward_fn)
     self._backward_function = GraphModeFunction(
-        all_inputs, [], backward_function_def, self._graph, c.known_ops,
-        in_gradients, _map_sequence_obj_to_idx(backward_outputs), shapes)
+        all_inputs, [], backward_function_def, backward_fn, self._graph,
+        c.known_ops, in_gradients, _map_sequence_obj_to_idx(backward_outputs),
+        shapes)
 
   def _backprop_call(self, args):
     """Calls the wrapped function and records the result on a tape."""
     all_args = args + self._extra_inputs
-    signature = self._forward_fdef.definition.signature
+    signature = self._forward_fdef.signature
     ctx = context.context()
     if ctx.in_graph_mode():
       g = ops.get_default_graph()
@@ -325,7 +378,7 @@ class GraphModeFunction(object):
         return ops.internal_convert_to_tensor(x, ctx=ctx)
       op = g.create_op(
           signature.name, [make_tensor(x) for x in all_args],
-          [dtypes_module.DType(x.type) for x in signature.output_arg],
+          tuple(dtypes_module.DType(x.type) for x in signature.output_arg),
           op_def=signature,
           name="FunctionCall",
           compute_shapes=False)
@@ -361,11 +414,8 @@ class GraphModeFunction(object):
       if v._trainable:  # pylint: disable=protected-access
         tape.watch_variable(v)
 
-    tensor_inputs = [
-        x for x in nest.flatten(args)
-        if isinstance(x, ops.Tensor)
-    ]
-
+    tensor_inputs = [x for x in nest.flatten(args)
+                     if isinstance(x, ops.Tensor)]
     if tape.should_record(tensor_inputs) or tape.should_record(
         self._extra_inputs):
       if not self._has_backprop:
@@ -384,7 +434,7 @@ class GraphModeFunction(object):
       args = list(tensor_inputs) + self._extra_inputs
       op = g.create_op(
           signature.name, [ops.convert_to_tensor(x) for x in args],
-          [dtypes_module.DType(x.type) for x in signature.output_arg],
+          tuple(dtypes_module.DType(x.type) for x in signature.output_arg),
           op_def=signature,
           name="FunctionCall",
           compute_shapes=False)
@@ -469,29 +519,32 @@ def _defun_internal(name, func, args, kwds):
         extra_inputs = []
         extra_placeholders = []
       outputs_list = nest.flatten(func_outputs)
-      output_shapes = [x.shape for x in outputs_list if x is not None]
+      output_shapes = tuple(x.shape for x in outputs_list if x is not None)
 
-  flat_inputs = [
-      x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor)
-  ]
+  flat_inputs = [x for x in nest.flatten(func_inputs)
+                 if isinstance(x, ops.Tensor)]
   all_inputs = flat_inputs + list(extra_placeholders)
-
+  all_ignored_ops = frozenset(x.op for x in all_inputs)
   func_def_outputs = [x for x in outputs_list if x is not None]
-  inference_function_def = make_function_def(
-      tmp_graph, tmp_graph.get_operations(), all_inputs, func_def_outputs)
+  fname = _inference_name(name)
+  operations = tuple(x for x in tmp_graph.get_operations()
+                     if x not in all_ignored_ops)
+  inference_function_def, fn = make_function_def(
+      fname, tmp_graph, operations, all_inputs, func_def_outputs)
   # Register any other functions defined in the graph
   # TODO(ashankar): Oh lord, forgive me for this lint travesty.
   for f in tmp_graph._functions.values():  # pylint: disable=protected-access
     # TODO(ashankar): What about the gradient registry?
-    _register_with_name(f.name, f.definition)
-  _register_with_name(_inference_name(name), inference_function_def)
+    _register(f._c_func)  # pylint: disable=protected-access
+  _register(fn)
 
   return GraphModeFunction(
       all_inputs,
       extra_inputs,
       inference_function_def,
+      fn,
       tmp_graph,
-      tmp_graph.get_operations(),
+      operations,
       func_outputs,
       _map_sequence_obj_to_idx(func_def_outputs),
       output_shapes,
@@ -517,10 +570,9 @@ def _cache_key(x):
   return x
 
 
-def _register_with_name(name, fdef):
-  """Registers the function `fdef` with the name `name`."""
-  fdef.signature.name = name
-  context.context().add_function_def(fdef)
+def _register(fn):
+  """Registers the function `fn`."""
+  context.context().add_function(fn)
 
 
 # TODO(apassos): better error messages for non-hashable arguments.
diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py
index faf0ac88bc..3da100d800 100644
--- a/tensorflow/python/eager/graph_callable.py
+++ b/tensorflow/python/eager/graph_callable.py
@@ -318,7 +318,9 @@ def _graph_callable_internal(func, shape_and_dtypes):
   placeholder_inputs = flat_inputs+ list(extra_placeholders)
 
   func_def_outputs = [x for x in outputs_list if isinstance(x, tf_ops.Tensor)]
-  initializer_function_def = function.make_function_def(
+  initialization_name = function._inference_name(func.__name__)  # pylint: disable=protected-access
+  initializer_function_def, initializer_fn = function.make_function_def(
+      initialization_name,
       tmp_graph,
       initializing_operations,
       placeholder_inputs,
@@ -327,13 +329,13 @@ def _graph_callable_internal(func, shape_and_dtypes):
   # Also, what about the gradient registry of these functions? Those need to be
   # addressed as well.
   for f in tmp_graph._functions.values():  # pylint: disable=protected-access
-    function._register_with_name(f.name, f.definition)  # pylint: disable=protected-access
-  function._register_with_name(function._inference_name(func.__name__),  # pylint: disable=protected-access
-                               initializer_function_def)
+    function._register(f._c_func)  # pylint: disable=protected-access
+  function._register(initializer_fn)  # pylint: disable=protected-access
   initializer_function = function.GraphModeFunction(
       placeholder_inputs,
       extra_inputs,
       initializer_function_def,
+      initializer_fn,
       tmp_graph,
       initializing_operations,
       func_outputs,
@@ -342,18 +344,20 @@ def _graph_callable_internal(func, shape_and_dtypes):
 
   capture_func_def_outputs = [
       x for x in captured_outlist if isinstance(x, tf_ops.Tensor)]
-  captured_function_def = function.make_function_def(
+  captured_function_name = function._inference_name(func.__name__)  # pylint: disable=protected-access
+  captured_function_def, capturing_fn = function.make_function_def(
+      captured_function_name,
       tmp_graph,
       capturing_operations,
       placeholder_inputs,
       capture_func_def_outputs)
-  function._register_with_name(function._inference_name(func.__name__),  # pylint: disable=protected-access
-                               captured_function_def)
+  function._register(capturing_fn)  # pylint: disable=protected-access
 
   captured_function = function.GraphModeFunction(
       placeholder_inputs,
       extra_inputs,
       captured_function_def,
+      capturing_fn,
       tmp_graph,
       capturing_operations,
       captured_outputs,
diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py
index 548e16a909..b9e6ca2a93 100644
--- a/tensorflow/python/eager/graph_callable_test.py
+++ b/tensorflow/python/eager/graph_callable_test.py
@@ -152,7 +152,6 @@ class GraphCallableTest(test.TestCase):
     self.assertAllEqual(5, f(constant_op.constant(2)))
 
   def testNestedFunction(self):
-
     # TensorFlow function (which is what would be used in TensorFlow graph
     # construction).
     @function.Defun(dtypes.int32, dtypes.int32)
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 2217513966..36daf59647 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -599,11 +599,6 @@ class Tensor(_TensorLike):
     """
     return _eval_using_default_session(self, feed_dict, self.graph, session)
 
-  def _dup(self):
-    ret = copy.copy(self)
-    ret._id = uid()  # pylint: disable=protected-access
-    return ret
-
 
 # TODO(agarwal): consider getting rid of this.
 class _EagerTensorBase(Tensor):
@@ -729,9 +724,6 @@ class _EagerTensorBase(Tensor):
     return new_tensor
     # pylint: enable=protected-access
 
-  def _dup(self):
-    return self._copy(device_name=self.device)
-
   @property
   def shape(self):
     return tensor_shape.TensorShape(self._shape_tuple())
@@ -1794,7 +1786,7 @@ class Operation(object):
       c_api.SetRequestedDevice(
           self._graph._c_graph,  # pylint: disable=protected-access
           self._c_op,  # pylint: disable=protected-access
-          _device_string(device))
+          compat.as_text(_device_string(device)))
     else:
       self._node_def.device = _device_string(device)
 
@@ -2083,7 +2075,7 @@ class Operation(object):
 
   def _set_attr(self, attr_name, attr_value):
     """Private method used to set an attribute in the node_def."""
-    if _USE_C_API:
+    if self._c_op:
       buf = c_api.TF_NewBufferFromString(
           compat.as_bytes(attr_value.SerializeToString()))
       try:
@@ -2652,11 +2644,16 @@ class Graph(object):
 
     # TODO(skyewm): fold as much of the above as possible into the C
     # implementation
-    if _USE_C_API:
+    if _USE_C_API or self._use_c_api_hack():
       self._scoped_c_graph = c_api_util.ScopedTFGraph()
     else:
       self._scoped_c_graph = None
 
+  # TODO(apassos) remove once the C API is used by default.
+  def _use_c_api_hack(self):
+    """Temporary hack; can be overridden to force C API usage."""
+    return False
+
   def _convert_stack(self, stack, include_func_start_lineno=False):
     """Converts a stack extracted using _extract_stack() to a traceback stack.
 
@@ -2985,9 +2982,14 @@ class Graph(object):
     # Add function to graph
     # pylint: disable=protected-access
     if self._c_graph:
-      assert function._c_func, (
-          "Cannot add function created without C API support to graph "
-          "created with C API support")
+      # Handle functions created without using the C API. TODO(apassos,skyewm)
+      # remove this when all functions are generated using the C API by default
+      # as this will be unnecessary.
+      if not function._c_func:
+        with errors.raise_exception_on_not_ok_status() as status:
+          serialized = function.definition.SerializeToString()
+          function._c_func = c_api.TF_FunctionImportFunctionDef(
+              serialized, status)
       with errors.raise_exception_on_not_ok_status() as status:
         gradient = function._grad_func._c_func if function._grad_func else None
         c_api.TF_GraphCopyFunction(self._c_graph, function._c_func, gradient,
diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
index 82b154164e..82750e9e49 100644
--- a/tensorflow/python/pywrap_tfe.i
+++ b/tensorflow/python/pywrap_tfe.i
@@ -18,6 +18,7 @@ limitations under the License.
 %rename("%s") TFE_NewContext;
 %rename("%s") TFE_DeleteContext;
 %rename("%s") TFE_ContextListDevices;
+%rename("%s") TFE_ContextAddFunction;
 %rename("%s") TFE_ContextAddFunctionDef;
 %rename("%s") TFE_OpNameGetAttrType;
 %rename("%s") TFE_Py_InitEagerTensor;
@@ -149,7 +150,7 @@ limitations under the License.
   }
   $1 = &temp;
   $1->resize(PyInt_AsLong($input), nullptr);
-}
+} 
 
 // Create new Status object.
 %typemap(in, numinputs=0) TF_Status *out_status {
-- 
GitLab


From 2f56cb1e1764efcfbd8277b77eccd9c4f3e9fc64 Mon Sep 17 00:00:00 2001
From: Robin Richtsfeld <robin.richtsfeld@gmail.com>
Date: Thu, 30 Nov 2017 01:02:05 +0100
Subject: [PATCH 0436/1225] Format AUTHORS file (#14881)

---
 AUTHORS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/AUTHORS b/AUTHORS
index a46ae7e616..aa4be5169d 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -7,4 +7,4 @@
 # The email address is not required for organizations.
 
 Google Inc.
-Yuan Tang terrytangyuan@gmail.com
+Yuan Tang <terrytangyuan@gmail.com>
-- 
GitLab


From 5ba21573f6763ac32b1c3dcc10d78d3e9f71c2d5 Mon Sep 17 00:00:00 2001
From: ted chang <htchang@us.ibm.com>
Date: Wed, 29 Nov 2017 16:02:25 -0800
Subject: [PATCH 0437/1225] I would like to clarify checkpoint is not a file. I
 also changed some (#14961)

wording which may lead readers to use a physical file name in their code.
---
 .../docs_src/programmers_guide/saved_model.md  | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md
index 34e8e5faf5..54693f3d4d 100644
--- a/tensorflow/docs_src/programmers_guide/saved_model.md
+++ b/tensorflow/docs_src/programmers_guide/saved_model.md
@@ -33,7 +33,7 @@ roughly speaking, map variable names to tensor values.
 
 Create a `Saver` with `tf.train.Saver()` to manage all variables in the
 model. For example, the following snippet demonstrates how to call the
-`tf.train.Saver.save` method to save variables to a checkpoint file:
+`tf.train.Saver.save` method to save variables to checkpoint files:
 
 ```python
 # Create some variables.
@@ -58,7 +58,7 @@ with tf.Session() as sess:
   dec_v2.op.run()
   # Save the variables to disk.
   save_path = saver.save(sess, "/tmp/model.ckpt")
-  print("Model saved in file: %s" % save_path)
+  print("Model saved in path: %s" % save_path)
 ```
 
 
@@ -66,10 +66,10 @@ with tf.Session() as sess:
 ### Restoring variables
 
 The `tf.train.Saver` object not only saves variables to checkpoint files, it
-also restores variables.  Note that when you restore variables from a file you
-do not have to initialize them beforehand. For example, the following snippet
-demonstrates how to call the `tf.train.Saver.restore` method to restore
-variables from a checkpoint file:
+also restores variables. Note that when you restore variables you do not have
+to initialize them beforehand. For example, the following snippet demonstrates
+how to call the `tf.train.Saver.restore` method to restore variables from the
+checkpoint files:
 
 ```python
 tf.reset_default_graph()
@@ -92,6 +92,12 @@ with tf.Session() as sess:
   print("v2 : %s" % v2.eval())
 ```
 
+Notes:
+
+*  There is not a physical file called "/tmp/model.ckpt". It is the **prefix**
+   of filenames created for the checkpoint. Users only interact with the
+   prefix instead of physical checkpoint files.
+
 
 ### Choosing which variables to save and restore
 
-- 
GitLab


From 495bb7b9f6b55b0e431fc604ad9dbf5415016d90 Mon Sep 17 00:00:00 2001
From: Alexander <akindyakov@gmail.com>
Date: Thu, 30 Nov 2017 03:02:49 +0300
Subject: [PATCH 0438/1225] Speed up safe_strtod and safe_strtof functions by
 using double-conversion library (#12102)

* Add double-conversion library to third_party

Link: https://github.com/google/double-conversion
Version: v3.0.0
With patch (add case insensivity for special values): https://github.com/google/double-conversion/pull/47

* Strnlen function to str_util.h
Returns the length of the given null-terminated byte string 'str'.
Returns strsz if the null character was not found in the first
'strsz' bytes of 'str'.

* Use double-conversion lib for safe_strto{d,f} implementation

* Path for double-conversion library is no longer needed, just use new version of this lib

* Use more obvious arguments names for Strnlen function

* Fixed DoubleToBuffer function in case of [full_precision_needed]
use safe_strtod insetead of raw inplementation

* add double-conversion compilation in makefile builds
- created compilation script: compile_double_conversion.sh
- added running compile_double_conversion.sh in {build_all_android.sh, build_all_ios.sh, build_all_linux.sh}
- added linking libdouble-conversion.a to build in tensorflow/contrib/makefile/Makefile

* Build double-conversion directly inside tensorflow/contrib/makefile/Makefile

* Put double_conversion to the rest of targets including number.* as src

* Sort external dependencies and make Sanity Checks happy

* Add test cases with trailing and leading whitespace characters to //tensorflow/core:lib_strings_numbers_test

* Remove octal numbers support from safe_strtod and safe_strtof

* Add double-conversion library to the cmake build

* Prepend lib/ to cmake lib path as per mrry instructions

See https://www.github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/cmake/external/zlib.cmake#L24

* Add  to double-conversion win32 build output path: fix up windows CMake build
---
 tensorflow/contrib/cmake/CMakeLists.txt       |   4 +
 .../cmake/external/double_conversion.cmake    |  54 +++++++++
 tensorflow/contrib/makefile/Makefile          |   9 +-
 .../contrib/makefile/download_dependencies.sh |   2 +
 tensorflow/core/BUILD                         |   9 +-
 tensorflow/core/lib/strings/numbers.cc        | 112 +++++-------------
 tensorflow/core/lib/strings/numbers_test.cc   |  69 +++++++++++
 tensorflow/core/lib/strings/str_util.cc       |   8 ++
 tensorflow/core/lib/strings/str_util.h        |   5 +
 tensorflow/core/lib/strings/str_util_test.cc  |   8 ++
 tensorflow/workspace.bzl                      |   9 ++
 11 files changed, 203 insertions(+), 86 deletions(-)
 create mode 100644 tensorflow/contrib/cmake/external/double_conversion.cmake

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 77a3fc0c83..ba708673b0 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -160,6 +160,7 @@ include(protobuf)
 include(re2)
 include(cub)
 include(sqlite)
+include(double_conversion)
 if (tensorflow_BUILD_CC_TESTS)
   include(googletest)
 endif()
@@ -178,6 +179,7 @@ set(tensorflow_EXTERNAL_LIBRARIES
     ${protobuf_STATIC_LIBRARIES}
     ${re2_STATIC_LIBRARIES}
     ${sqlite_STATIC_LIBRARIES}
+    ${double_conversion_STATIC_LIBRARIES}
 )
 set(tensorflow_EXTERNAL_DEPENDENCIES
     zlib_copy_headers_to_destination
@@ -196,6 +198,7 @@ set(tensorflow_EXTERNAL_DEPENDENCIES
     fft2d
     re2
     sqlite_copy_headers_to_destination
+    double_conversion
 )
 
 include_directories(
@@ -218,6 +221,7 @@ include_directories(
     ${PROTOBUF_INCLUDE_DIRS}
     ${re2_INCLUDE_DIR}
     ${sqlite_INCLUDE_DIR}
+    ${double_conversion_INCLUDE_DIR}
 )
 
 if(tensorflow_ENABLE_SSL_SUPPORT)
diff --git a/tensorflow/contrib/cmake/external/double_conversion.cmake b/tensorflow/contrib/cmake/external/double_conversion.cmake
new file mode 100644
index 0000000000..527ccdc8d8
--- /dev/null
+++ b/tensorflow/contrib/cmake/external/double_conversion.cmake
@@ -0,0 +1,54 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+include (ExternalProject)
+
+set(double_conversion_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/double_conversion/src/double_conversion)
+set(double_conversion_URL https://github.com/google/double-conversion.git)
+set(double_conversion_TAG 5664746)
+set(double_conversion_BUILD ${double_conversion_INCLUDE_DIR})
+set(double_conversion_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.so)
+set(double_conversion_INCLUDES ${double_conversion_BUILD})
+
+if(WIN32)
+  set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/$(Configuration)/double-conversion.lib)
+else()
+  set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.a)
+endif()
+
+set(double_conversion_HEADERS
+    "${double_conversion_INCLUDE_DIR}/double-conversion/bignum-dtoa.h"
+    "${double_conversion_INCLUDE_DIR}/double-conversion/cached-powers.h"
+    "${double_conversion_INCLUDE_DIR}/double-conversion/double-conversion.h"
+    "${double_conversion_INCLUDE_DIR}/double-conversion/fixed-dtoa.h"
+    "${double_conversion_INCLUDE_DIR}/double-conversion/strtod.h"
+    "${double_conversion_INCLUDE_DIR}/double-conversion/bignum.h"
+    "${double_conversion_INCLUDE_DIR}/double-conversion/diy-fp.h"
+    "${double_conversion_INCLUDE_DIR}/double-conversion/fast-dtoa.h"
+    "${double_conversion_INCLUDE_DIR}/double-conversion/ieee.h"
+    "${double_conversion_INCLUDE_DIR}/double-conversion/utils.h"
+)
+
+ExternalProject_Add(double_conversion
+    PREFIX double_conversion
+    GIT_REPOSITORY ${double_conversion_URL}
+    GIT_TAG ${double_conversion_TAG}
+    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+    BUILD_IN_SOURCE 1
+    INSTALL_COMMAND ""
+    CMAKE_CACHE_ARGS
+        -DCMAKE_BUILD_TYPE:STRING=Release
+        -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+)
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index e2e6c05591..617ef25fa4 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -89,6 +89,7 @@ HOST_INCLUDES := \
 -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/downloads/nsync/public \
 -I$(MAKEFILE_DIR)/downloads/fft2d \
+-I$(MAKEFILE_DIR)/downloads/double_conversion \
 -I$(HOST_GENDIR)
 ifeq ($(HAS_GEN_HOST_PROTOC),true)
 	HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
@@ -125,7 +126,9 @@ PROTO_TEXT := $(HOST_BINDIR)proto_text
 # The list of dependencies is derived from the Bazel build file by running
 # the gen_file_lists.sh script on a system with a working Bazel setup.
 PROTO_TEXT_CC_FILES := $(shell cat $(MAKEFILE_DIR)/proto_text_cc_files.txt)
-PROTO_TEXT_PB_CC_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt)
+PROTO_TEXT_PB_CC_LIST := \
+	$(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt) \
+	$(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc)
 PROTO_TEXT_PB_H_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_h_files.txt)
 
 # Locations of the intermediate files proto_text generates.
@@ -171,6 +174,7 @@ INCLUDES := \
 -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/downloads/nsync/public \
 -I$(MAKEFILE_DIR)/downloads/fft2d \
+-I$(MAKEFILE_DIR)/downloads/double_conversion \
 -I$(PROTOGENDIR) \
 -I$(PBTGENDIR)
 ifeq ($(HAS_GEN_HOST_PROTOC),true)
@@ -326,6 +330,8 @@ $(MARCH_OPTION) \
 -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/downloads/nsync/public \
 -I$(MAKEFILE_DIR)/downloads/fft2d \
+-I$(MAKEFILE_DIR)/downloads/double_conversion \
+-I$(MAKEFILE_DIR)/gen/protobuf/include \
 -I$(MAKEFILE_DIR)/gen/protobuf_android/$(ANDROID_ARCH)/include \
 -I$(PROTOGENDIR) \
 -I$(PBTGENDIR)
@@ -543,6 +549,7 @@ $(wildcard tensorflow/core/platform/*/*.cc) \
 $(wildcard tensorflow/core/platform/*/*/*.cc) \
 $(wildcard tensorflow/core/util/*.cc) \
 $(wildcard tensorflow/core/util/*/*.cc) \
+$(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc) \
 tensorflow/core/util/version_info.cc
 # Remove duplicates (for version_info.cc)
 CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS))
diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index a2b444d53a..e8021a53af 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -26,6 +26,7 @@ NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.
 PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
+DOUBLE_CONVERSION_URL="$(grep -o "https.*google/double-conversion.*\.zip" "${BZL_FILE_PATH}" | head -n1)"
 ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
 
 # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
@@ -74,6 +75,7 @@ download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync"
 download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf"
 download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2"
 download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d"
+download_and_extract "${DOUBLE_CONVERSION_URL}" "${DOWNLOADS_DIR}/double_conversion"
 download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl"
 
 replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 0f2b77e9b4..bd7617fa96 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -276,7 +276,9 @@ cc_library(
         "platform/types.h",
     ] + glob(tf_additional_proto_hdrs()) + glob(tf_env_time_hdrs()),
     copts = tf_copts(),
-    deps = tf_lib_proto_parsing_deps(),
+    deps = tf_lib_proto_parsing_deps() + [
+        "@double_conversion//:double-conversion",
+    ],
 )
 
 # This build rule (along with :lib_internal, :framework, and
@@ -1024,6 +1026,7 @@ cc_library(
     deps = [
         ":protos_all_cc_impl",
         "//third_party/eigen3",
+        "@double_conversion//:double-conversion",
         "@nsync//:nsync_cpp",
         "@protobuf_archive//:protobuf",
     ],
@@ -1048,6 +1051,7 @@ cc_library(
         ":protos_all_cc_impl",
         "//third_party/eigen3",
         "//third_party/fft2d:fft2d_headers",
+        "@double_conversion//:double-conversion",
         "@fft2d//:fft2d",
         "@gemmlowp//:gemmlowp",
         "@nsync//:nsync_cpp",
@@ -1114,6 +1118,7 @@ cc_library(
     deps = [
         ":protos_all_cc_impl",
         "//third_party/eigen3",
+        "@double_conversion//:double-conversion",
         "@protobuf_archive//:protobuf",
     ],
     alwayslink = 1,
@@ -1136,6 +1141,7 @@ cc_library(
     deps = [
         ":protos_all_cc_impl",
         "//third_party/eigen3",
+        "@double_conversion//:double-conversion",
         "@nsync//:nsync_cpp",
         "@protobuf_archive//:protobuf",
     ],
@@ -1497,6 +1503,7 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:platformlib",
         "@snappy",
         "@zlib_archive//:zlib",
+        "@double_conversion//:double-conversion",
         "@protobuf_archive//:protobuf",
     ] + tf_protos_all_impl(),
 )
diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc
index 302a6967e3..b3cca504e1 100644
--- a/tensorflow/core/lib/strings/numbers.cc
+++ b/tensorflow/core/lib/strings/numbers.cc
@@ -23,6 +23,9 @@ limitations under the License.
 #include <locale>
 #include <unordered_map>
 
+#include "double-conversion/double-conversion.h"
+
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -32,72 +35,15 @@ namespace tensorflow {
 
 namespace {
 
-template <typename T>
-T locale_independent_strtonum(const char* str, const char** endptr) {
-  static const std::unordered_map<string, T> special_nums = {
-      {"inf", std::numeric_limits<T>::infinity()},
-      {"+inf", std::numeric_limits<T>::infinity()},
-      {"-inf", -std::numeric_limits<T>::infinity()},
-      {"infinity", std::numeric_limits<T>::infinity()},
-      {"+infinity", std::numeric_limits<T>::infinity()},
-      {"-infinity", -std::numeric_limits<T>::infinity()},
-      {"nan", std::numeric_limits<T>::quiet_NaN()},
-      {"+nan", std::numeric_limits<T>::quiet_NaN()},
-      {"-nan", -std::numeric_limits<T>::quiet_NaN()},
-  };
-  std::stringstream s(str);
-
-  // Check if str is one of the special numbers.
-  string special_num_str;
-  s >> special_num_str;
-
-  for (int i = 0; i < special_num_str.length(); ++i) {
-    special_num_str[i] =
-        std::tolower(special_num_str[i], std::locale::classic());
-  }
-
-  auto entry = special_nums.find(special_num_str);
-  if (entry != special_nums.end()) {
-    *endptr = str + (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
-                             : s.tellg());
-    return entry->second;
-  } else {
-    // Perhaps it's a hex number
-    if (special_num_str.compare(0, 2, "0x") == 0 ||
-        special_num_str.compare(0, 3, "-0x") == 0) {
-      return strtol(str, const_cast<char**>(endptr), 16);
-    }
-  }
-  // Reset the stream
-  s.str(str);
-  s.clear();
-  // Use the "C" locale
-  s.imbue(std::locale::classic());
-
-  T result;
-  s >> result;
-
-  // Set to result to what strto{f,d} functions would have returned. If the
-  // number was outside the range, the stringstream sets the fail flag, but
-  // returns the +/-max() value, whereas strto{f,d} functions return +/-INF.
-  if (s.fail()) {
-    if (result == std::numeric_limits<T>::max()) {
-      result = std::numeric_limits<T>::infinity();
-      s.clear(s.rdstate() & ~std::ios::failbit);
-    } else if (result == -std::numeric_limits<T>::max()) {
-      result = -std::numeric_limits<T>::infinity();
-      s.clear(s.rdstate() & ~std::ios::failbit);
-    }
-  }
-
-  if (endptr) {
-    *endptr =
-        str +
-        (s.fail() ? static_cast<std::iostream::pos_type>(0)
-                  : (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
-                             : s.tellg()));
-  }
-  return result;
+static inline const double_conversion::StringToDoubleConverter& StringToFloatConverter() {
+    const static double_conversion::StringToDoubleConverter converter(
+        double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES
+        | double_conversion::StringToDoubleConverter::ALLOW_HEX
+        | double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES
+        | double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY,
+        0., 0., "inf", "nan"
+    );
+    return converter;
 }
 
 }  // namespace
@@ -165,8 +111,8 @@ char* DoubleToBuffer(double value, char* buffer) {
     // larger than the precision we asked for.
     DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
 
-    full_precision_needed =
-        locale_independent_strtonum<double>(buffer, nullptr) != value;
+    auto parsed_value = double{};
+    full_precision_needed = !safe_strtod(buffer, &parsed_value) || parsed_value != value;
   }
 
   if (full_precision_needed) {
@@ -302,25 +248,23 @@ bool safe_strtou32(StringPiece str, uint32* value) {
 }
 
 bool safe_strtof(const char* str, float* value) {
-  const char* endptr;
-  *value = locale_independent_strtonum<float>(str, &endptr);
-  while (isspace(*endptr)) ++endptr;
-  // Ignore range errors from strtod/strtof.
-  // The values it returns on underflow and
-  // overflow are the right fallback in a
-  // robust setting.
-  return *str != '\0' && *endptr == '\0';
+  int processed_characters_count = -1;
+  auto len = str_util::Strnlen(str, kFastToBufferSize);
+  *value = StringToFloatConverter().StringToFloat(
+      str,
+      len,
+      &processed_characters_count);
+  return processed_characters_count > 0;
 }
 
 bool safe_strtod(const char* str, double* value) {
-  const char* endptr;
-  *value = locale_independent_strtonum<double>(str, &endptr);
-  while (isspace(*endptr)) ++endptr;
-  // Ignore range errors from strtod/strtof.
-  // The values it returns on underflow and
-  // overflow are the right fallback in a
-  // robust setting.
-  return *str != '\0' && *endptr == '\0';
+  int processed_characters_count = -1;
+  auto len = str_util::Strnlen(str, kFastToBufferSize);
+  *value = StringToFloatConverter().StringToDouble(
+      str,
+      len,
+      &processed_characters_count);
+  return processed_characters_count > 0;
 }
 
 char* FloatToBuffer(float value, char* buffer) {
diff --git a/tensorflow/core/lib/strings/numbers_test.cc b/tensorflow/core/lib/strings/numbers_test.cc
index e15161de66..df395c301e 100644
--- a/tensorflow/core/lib/strings/numbers_test.cc
+++ b/tensorflow/core/lib/strings/numbers_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 
 #include <string>
+#include <cmath>
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -277,7 +278,40 @@ TEST(safe_strtof, Float) {
   EXPECT_TRUE(safe_strtof("-0x2A", &result));
   EXPECT_EQ(-42.0f, result);
 
+  EXPECT_TRUE(safe_strtof(" -0x2", &result));
+  EXPECT_EQ(-2.0f, result);
+
+  EXPECT_TRUE(safe_strtof("8 \t", &result));
+  EXPECT_EQ(8.0f, result);
+
+  EXPECT_TRUE(safe_strtof("\t20.0\t ", &result));
+  EXPECT_EQ(20.0f, result);
+
   EXPECT_FALSE(safe_strtof("-infinity is awesome", &result));
+
+  EXPECT_TRUE(safe_strtof("-inf", &result));
+  EXPECT_EQ(-std::numeric_limits<float>::infinity(), result);
+
+  EXPECT_TRUE(safe_strtof("+inf", &result));
+  EXPECT_EQ(std::numeric_limits<float>::infinity(), result);
+
+  EXPECT_TRUE(safe_strtof("InF", &result));
+  EXPECT_EQ(std::numeric_limits<float>::infinity(), result);
+
+  EXPECT_TRUE(safe_strtof("-INF", &result));
+  EXPECT_EQ(-std::numeric_limits<float>::infinity(), result);
+
+  EXPECT_TRUE(safe_strtof("nan", &result));
+  EXPECT_TRUE(std::isnan(result));
+
+  EXPECT_TRUE(safe_strtof("-nan", &result));
+  EXPECT_TRUE(std::isnan(result));
+
+  EXPECT_TRUE(safe_strtof("-NaN", &result));
+  EXPECT_TRUE(std::isnan(result));
+
+  EXPECT_TRUE(safe_strtof("+NAN", &result));
+  EXPECT_TRUE(std::isnan(result));
 }
 
 TEST(safe_strtod, Double) {
@@ -296,6 +330,41 @@ TEST(safe_strtod, Double) {
 
   EXPECT_TRUE(safe_strtod("1e-325", &result));
   EXPECT_EQ(0, result);
+
+  EXPECT_TRUE(safe_strtod(" -0x1c", &result));
+  EXPECT_EQ(-28.0, result);
+
+  EXPECT_TRUE(safe_strtod("50 \t", &result));
+  EXPECT_EQ(50.0, result);
+
+  EXPECT_TRUE(safe_strtod("\t82.0\t ", &result));
+  EXPECT_EQ(82.0, result);
+
+  EXPECT_FALSE(safe_strtod("infinity", &result));
+
+  EXPECT_TRUE(safe_strtod("-inf", &result));
+  EXPECT_EQ(-std::numeric_limits<double>::infinity(), result);
+
+  EXPECT_TRUE(safe_strtod("+inf", &result));
+  EXPECT_EQ(std::numeric_limits<double>::infinity(), result);
+
+  EXPECT_TRUE(safe_strtod("InF", &result));
+  EXPECT_EQ(std::numeric_limits<double>::infinity(), result);
+
+  EXPECT_TRUE(safe_strtod("-INF", &result));
+  EXPECT_EQ(-std::numeric_limits<double>::infinity(), result);
+
+  EXPECT_TRUE(safe_strtod("nan", &result));
+  EXPECT_TRUE(std::isnan(result));
+
+  EXPECT_TRUE(safe_strtod("-nan", &result));
+  EXPECT_TRUE(std::isnan(result));
+
+  EXPECT_TRUE(safe_strtod("-NaN", &result));
+  EXPECT_TRUE(std::isnan(result));
+
+  EXPECT_TRUE(safe_strtod("+NAN", &result));
+  EXPECT_TRUE(std::isnan(result));
 }
 
 }  // namespace strings
diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
index d28857803d..0ae6c66080 100644
--- a/tensorflow/core/lib/strings/str_util.cc
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -452,5 +452,13 @@ bool SplitAndParseAsFloats(StringPiece text, char delim,
                                     result);
 }
 
+size_t Strnlen(const char* str, const size_t string_max_len) {
+  size_t len = 0;
+  while (len < string_max_len && str[len] != '\0') {
+    ++len;
+  }
+  return len;
+}
+
 }  // namespace str_util
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h
index 8cea0f0718..b0d774a05c 100644
--- a/tensorflow/core/lib/strings/str_util.h
+++ b/tensorflow/core/lib/strings/str_util.h
@@ -209,6 +209,11 @@ std::vector<string> Split(StringPiece text, char delims, Predicate p) {
   return Split(text, StringPiece(&delims, 1), p);
 }
 
+// Returns the length of the given null-terminated byte string 'str'.
+// Returns 'string_max_len' if the null character was not found in the first
+// 'string_max_len' bytes of 'str'.
+size_t Strnlen(const char* str, const size_t string_max_len);
+
 }  // namespace str_util
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc
index d5909d17aa..3a8de7c96b 100644
--- a/tensorflow/core/lib/strings/str_util_test.cc
+++ b/tensorflow/core/lib/strings/str_util_test.cc
@@ -430,4 +430,12 @@ TEST(StringReplace, EmptyStringReplaceAll) {
   EXPECT_EQ("", str_util::StringReplace("", "a", "X", /*replace_all=*/true));
 }
 
+TEST(Strnlen, Basic) {
+  EXPECT_EQ(0, str_util::Strnlen("ab", 0));
+  EXPECT_EQ(1, str_util::Strnlen("a", 1));
+  EXPECT_EQ(2, str_util::Strnlen("abcd", 2));
+  EXPECT_EQ(3, str_util::Strnlen("abc", 10));
+  EXPECT_EQ(4, str_util::Strnlen("a \t\n", 10));
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 7d07769a45..6b13271002 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -828,6 +828,15 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       ],
   )
 
+  native.http_archive(
+      name = "double_conversion",
+      urls = [
+          "https://github.com/google/double-conversion/archive/5664746c5e64dc265e7fbc1a890a6698e6ad0ebb.zip",
+      ],
+      sha256 = "a0c49fb3cc8d34b2230d278a115f1bb266bcfcaae10400b84dc2a3b7dc2c8bc6",
+      strip_prefix = "double-conversion-5664746c5e64dc265e7fbc1a890a6698e6ad0ebb",
+  )
+  
   native.new_http_archive(
       name = "tflite_mobilenet",
       build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
-- 
GitLab


From f42fde42beda0b6080f7051238cfb7f5036d861c Mon Sep 17 00:00:00 2001
From: Christoph Boeddeker <boeddeker@users.noreply.github.com>
Date: Thu, 30 Nov 2017 01:03:38 +0100
Subject: [PATCH 0439/1225] improved estimator.export_savedmodel exception
 (#14979)

---
 tensorflow/python/estimator/export/export.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py
index 3b295a7e35..51075731dd 100644
--- a/tensorflow/python/estimator/export/export.py
+++ b/tensorflow/python/estimator/export/export.py
@@ -191,7 +191,8 @@ def build_all_signature_defs(receiver_tensors,
   if not isinstance(receiver_tensors, dict):
     receiver_tensors = {_SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors}
   if export_outputs is None or not isinstance(export_outputs, dict):
-    raise ValueError('export_outputs must be a dict.')
+    raise ValueError('export_outputs must be a dict and not'
+                     '{}'.format(type(export_outputs)))
 
   signature_def_map = {}
   excluded_signatures = {}
-- 
GitLab


From 8a4d84969130162ee001fa52bac51e730129399b Mon Sep 17 00:00:00 2001
From: Gary Deer <gdeer81@gmail.com>
Date: Wed, 29 Nov 2017 18:04:23 -0600
Subject: [PATCH 0440/1225] Feature Request: C++ gradient for LRN (#13987)

* WIP: added stub method and failing test

* test with default bias,alpha,beta,depth_radius passes

* Made changes based on code review

Swapped out LRN for LRNGrad
renamed to LRNGradHelper

* Fixed call to LRNGrad with proper namespace

* fixed function call to LRNGrad, all tests pass somehow

* fixed function call to LRNGrad, tests pass

* minor formatting changes based on clang-format suggestions

* formatting changes and updated test based on code review

* trimmed the tensor shape to prevent test timeout
---
 tensorflow/cc/gradients/nn_grad.cc      | 12 ++++++++++++
 tensorflow/cc/gradients/nn_grad_test.cc |  7 +++++++
 2 files changed, 19 insertions(+)

diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc
index 09fadfcab5..13a3bba5e6 100644
--- a/tensorflow/cc/gradients/nn_grad.cc
+++ b/tensorflow/cc/gradients/nn_grad.cc
@@ -196,6 +196,18 @@ Status MaxPoolGradV2Helper(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("MaxPoolV2", MaxPoolGradV2Helper);
 
+Status LRNGradHelper(const Scope& scope, const Operation& op,
+                     const std::vector<Output>& grad_inputs,
+                     std::vector<Output>* grad_outputs){
+  internal::LRNGrad::Attrs grad_attrs;
+
+  auto dx = internal::LRNGrad(scope, grad_inputs[0], op.input(0), op.output(0),
+                              grad_attrs);
+  grad_outputs->push_back(dx);
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("LRN", LRNGradHelper);
+
 }  // anonymous namespace
 }  // namespace ops
 }  // namespace tensorflow
diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index ac66f51cf0..f9063e8365 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -191,5 +191,12 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) {
   RunTest(x, x_init_value, y, y_shape);
 }
 
+TEST_F(NNGradTest, LRN){
+  TensorShape x_shape({1, 1, 2, 1});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
+  auto y = LRN(scope_, x);
+  RunTest(x, x_shape, y, x_shape);
+}
+
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From bc7180f02002788a7b57b36b14ceb7a47d6c76f4 Mon Sep 17 00:00:00 2001
From: Eli Bendersky <eliben@google.com>
Date: Wed, 29 Nov 2017 16:12:31 -0800
Subject: [PATCH 0441/1225] Fix more clang-tidy warnings:

- Parameter names consistent in function declarations and definitions
- Class members naming

PiperOrigin-RevId: 177379085
---
 tensorflow/compiler/xla/literal_util.h             |  6 +++---
 tensorflow/compiler/xla/reference_util.h           |  2 +-
 tensorflow/compiler/xla/shape_layout.cc            |  8 ++++----
 tensorflow/compiler/xla/shape_layout.h             | 13 +++++++------
 tensorflow/compiler/xla/shape_util.cc              |  4 ++--
 tensorflow/compiler/xla/shape_util.h               |  2 +-
 tensorflow/compiler/xla/statusor_test.cc           | 14 +++++++-------
 .../compiler/xla/tests/client_library_test_base.h  |  2 +-
 tensorflow/compiler/xla/util.cc                    |  6 +++---
 9 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h
index f37e529caf..069d1b33ca 100644
--- a/tensorflow/compiler/xla/literal_util.h
+++ b/tensorflow/compiler/xla/literal_util.h
@@ -285,11 +285,11 @@ class Literal {
   std::unique_ptr<Literal> Relayout(const Layout& new_layout,
                                     const ShapeIndex& shape_index = {}) const;
 
-  // Creates a new literal by reshaping this literal to have 'shape'. Both the
-  // original shape and 'shape' must contain the same number of elements. The
+  // Creates a new literal by reshaping this literal to have the given
+  // dimensions. The total number of elements must not change; The
   // implementation currently only supports monotonic dim0-major layouts.
   StatusOr<std::unique_ptr<Literal>> Reshape(
-      tensorflow::gtl::ArraySlice<int64> shape) const;
+      tensorflow::gtl::ArraySlice<int64> dimensions) const;
 
   // Creates a new literal by reordering the dimensions of this literal.
   // The given `permutation` must be a permutation of the dimension numbers
diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h
index ee244e9a66..58e1a84461 100644
--- a/tensorflow/compiler/xla/reference_util.h
+++ b/tensorflow/compiler/xla/reference_util.h
@@ -70,7 +70,7 @@ class ReferenceUtil {
   // dilation factors.
   static std::unique_ptr<Array4D<float>> ConvArray4DGeneralDimensionsDilated(
       const Array4D<float>& lhs, const Array4D<float>& rhs,
-      std::pair<int64, int64> stride, Padding padding,
+      std::pair<int64, int64> kernel_stride, Padding padding,
       std::pair<int64, int64> lhs_dilation,
       std::pair<int64, int64> rhs_dilation, ConvolutionDimensionNumbers dnums);
 
diff --git a/tensorflow/compiler/xla/shape_layout.cc b/tensorflow/compiler/xla/shape_layout.cc
index 5bf9842a6c..789eba5780 100644
--- a/tensorflow/compiler/xla/shape_layout.cc
+++ b/tensorflow/compiler/xla/shape_layout.cc
@@ -32,13 +32,13 @@ tensorflow::Status ShapeLayout::CopyLayoutFromShape(const Shape& other_shape) {
   return tensorflow::Status::OK();
 }
 
-tensorflow::Status ShapeLayout::AssignLayoutToShape(Shape* other_shape) const {
-  if (!ShapeUtil::Compatible(*other_shape, shape_)) {
+tensorflow::Status ShapeLayout::AssignLayoutToShape(Shape* to_shape) const {
+  if (!ShapeUtil::Compatible(*to_shape, shape_)) {
     return InvalidArgument("Shape %s is not compatible with shape %s",
-                           ShapeUtil::HumanString(*other_shape).c_str(),
+                           ShapeUtil::HumanString(*to_shape).c_str(),
                            ShapeUtil::HumanString(shape()).c_str());
   }
-  *other_shape = shape_;
+  *to_shape = shape_;
   return tensorflow::Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/shape_layout.h b/tensorflow/compiler/xla/shape_layout.h
index 92564660f2..4c83750f3e 100644
--- a/tensorflow/compiler/xla/shape_layout.h
+++ b/tensorflow/compiler/xla/shape_layout.h
@@ -38,18 +38,19 @@ class ShapeLayout {
   explicit ShapeLayout(const Shape& shape) : shape_(shape) {}
 
   // Assigns the layouts in this ShapeLayout to the Layout fields of the given
-  // shape. 'shape' and the shape of the ShapeLayout object must be compatible.
-  tensorflow::Status AssignLayoutToShape(Shape* shape) const;
+  // shape. 'to_shape' and the shape of the ShapeLayout object must be
+  // compatible.
+  tensorflow::Status AssignLayoutToShape(Shape* to_shape) const;
 
   // Returns true if the Layouts in this ShapeLayout match the layouts in the
   // given shape. Returns false otherwise. If the given shape is not compatible
   // with the ShapeLayout's shape, then false is returned.
   bool MatchesLayoutInShape(const Shape& shape) const;
 
-  // Copies the layout from the given shape into this ShapeLayout. 'shape' must
-  // be compatible with the ShapeLayout's shape, and 'shape' must have a layout
-  // (LayoutUtil::HasLayout).
-  tensorflow::Status CopyLayoutFromShape(const Shape& shape);
+  // Copies the layout from the given shape into this ShapeLayout. 'other_shape'
+  // must be compatible with the ShapeLayout's shape, and 'other_shape' must
+  // have a layout (LayoutUtil::HasLayout).
+  tensorflow::Status CopyLayoutFromShape(const Shape& other_shape);
 
   // Clears (Layout::Clear) all the Layouts stored in this object.
   void Clear();
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 74fa0b2f2e..9e3f06e527 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -694,9 +694,9 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
   return LayoutUtil::ValidateLayoutInShape(shape);
 }
 
-/* static */ Shape ShapeUtil::ChangeElementType(const Shape& shape,
+/* static */ Shape ShapeUtil::ChangeElementType(const Shape& original,
                                                 PrimitiveType type) {
-  Shape new_shape = shape;
+  Shape new_shape = original;
   new_shape.set_element_type(type);
   return new_shape;
 }
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 2ea1bd95cb..df5b450438 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -170,7 +170,7 @@ class ShapeUtil {
   // As above, but for program shapes, returns a string for the form:
   //
   // (param_name: f32[42x12], ...) -> f32[24x42]
-  static string HumanString(const ProgramShape& shape);
+  static string HumanString(const ProgramShape& program_shape);
 
   // Parses a ShapeUtil::HumanString-format shape string back into a shape
   // object.
diff --git a/tensorflow/compiler/xla/statusor_test.cc b/tensorflow/compiler/xla/statusor_test.cc
index 5fa2211ac6..f9d25945bc 100644
--- a/tensorflow/compiler/xla/statusor_test.cc
+++ b/tensorflow/compiler/xla/statusor_test.cc
@@ -32,26 +32,26 @@ namespace {
 class Base1 {
  public:
   virtual ~Base1() {}
-  int pad;
+  int pad_;
 };
 
 class Base2 {
  public:
   virtual ~Base2() {}
-  int yetotherpad;
+  int yetotherpad_;
 };
 
 class Derived : public Base1, public Base2 {
  public:
   ~Derived() override {}
-  int evenmorepad;
+  int evenmorepad_;
 };
 
 class CopyNoAssign {
  public:
-  explicit CopyNoAssign(int value) : foo(value) {}
-  CopyNoAssign(const CopyNoAssign& other) : foo(other.foo) {}
-  int foo;
+  explicit CopyNoAssign(int value) : foo_(value) {}
+  CopyNoAssign(const CopyNoAssign& other) : foo_(other.foo_) {}
+  int foo_;
 
  private:
   const CopyNoAssign& operator=(const CopyNoAssign&);
@@ -253,7 +253,7 @@ TEST(StatusOr, TestCopyCtorNonAssignable) {
   StatusOr<CopyNoAssign> original(value);
   StatusOr<CopyNoAssign> copy(original);
   EXPECT_EQ(copy.status(), original.status());
-  EXPECT_EQ(original.ValueOrDie().foo, copy.ValueOrDie().foo);
+  EXPECT_EQ(original.ValueOrDie().foo_, copy.ValueOrDie().foo_);
 }
 
 TEST(StatusOr, TestCopyCtorStatusOKConverting) {
diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h
index 1d27880fb1..d8fe12a72d 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.h
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.h
@@ -194,7 +194,7 @@ class ClientLibraryTestBase : public ::testing::Test {
       tensorflow::gtl::ArraySlice<GlobalData*> arguments);
   void ComputeAndCompareTuple(
       ComputationBuilder* builder, const Literal& expected,
-      tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec abs_error);
+      tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error);
 
   // Convenience method for running a built computation and comparing the result
   // with the HloEvaluator.
diff --git a/tensorflow/compiler/xla/util.cc b/tensorflow/compiler/xla/util.cc
index e595df3052..fe5d29a6b6 100644
--- a/tensorflow/compiler/xla/util.cc
+++ b/tensorflow/compiler/xla/util.cc
@@ -191,9 +191,9 @@ std::vector<int64> ComposePermutations(tensorflow::gtl::ArraySlice<int64> p1,
   return output;
 }
 
-bool IsIdentityPermutation(tensorflow::gtl::ArraySlice<int64> p) {
-  for (int64 i = 0; i < p.size(); ++i) {
-    if (p[i] != i) {
+bool IsIdentityPermutation(tensorflow::gtl::ArraySlice<int64> permutation) {
+  for (int64 i = 0; i < permutation.size(); ++i) {
+    if (permutation[i] != i) {
       return false;
     }
   }
-- 
GitLab


From 4ada275eed7472ae32c67a1ec0b9b1dc8d80d1f0 Mon Sep 17 00:00:00 2001
From: "Joshua V. Dillon" <jvdillon@google.com>
Date: Wed, 29 Nov 2017 16:33:56 -0800
Subject: [PATCH 0442/1225] Change `tf.contrib.distributions` docstring
 examples to use `tfd` alias rather than `ds`, `bs`.

PiperOrigin-RevId: 177381853
---
 .../ops/bijectors/absolute_value_impl.py      |  4 ++-
 .../bijectors/masked_autoregressive_impl.py   | 28 +++++++++----------
 .../python/ops/bijectors/permute_impl.py      |  4 +--
 .../python/ops/bijectors/reshape_impl.py      |  4 +--
 .../distributions/python/ops/cauchy.py        | 22 ++++++++-------
 .../distributions/python/ops/deterministic.py |  6 ++--
 .../distributions/python/ops/gumbel.py        |  8 ++++--
 .../distributions/python/ops/independent.py   | 10 +++----
 .../distributions/python/ops/inverse_gamma.py |  5 ++--
 .../distributions/python/ops/logistic.py      | 13 ++++-----
 .../distributions/python/ops/mixture.py       | 10 +++----
 .../python/ops/mixture_same_family.py         | 16 +++++------
 .../distributions/python/ops/mvn_diag.py      |  8 +++---
 .../python/ops/mvn_diag_plus_low_rank.py      |  6 ++--
 .../python/ops/mvn_full_covariance.py         |  6 ++--
 .../python/ops/mvn_linear_operator.py         | 11 ++++----
 .../distributions/python/ops/mvn_tril.py      | 13 +++++----
 .../python/ops/poisson_lognormal.py           |  5 ++--
 .../distributions/python/ops/sinh_arcsinh.py  |  2 +-
 .../python/ops/vector_diffeomixture.py        | 11 ++++----
 .../python/ops/vector_exponential_diag.py     |  7 ++---
 .../ops/vector_exponential_linear_operator.py | 11 ++++----
 .../python/ops/vector_laplace_diag.py         |  8 +++---
 .../ops/vector_laplace_linear_operator.py     | 11 ++++----
 .../python/ops/vector_sinh_arcsinh_diag.py    |  2 +-
 .../python/ops/vector_student_t.py            |  6 ++--
 26 files changed, 121 insertions(+), 116 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py
index b84502003a..0fe9f6aa78 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py
@@ -48,7 +48,9 @@ class AbsoluteValue(bijector.Bijector):
 
 
   ```python
-  abs = ds.bijectors.AbsoluteValue()
+  tfd = tf.contrib.distributions
+
+  abs = tfd.bijectors.AbsoluteValue()
 
   abs.forward([-1., 0., 1.])
   ==> [1., 0.,  1.]
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
index ae14288393..f51c48d2dd 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
@@ -124,17 +124,17 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector):
   #### Example Use
 
   ```python
-  ds = tf.contrib.distributions
-  bs = tf.contrib.distributions.bijectors
+  tfd = tf.contrib.distributions
+  tfb = tfd.bijectors
 
   dims = 5
 
   # A common choice for a normalizing flow is to use a Gaussian for the base
   # distribution. (However, any continuous distribution would work.) E.g.,
-  maf = ds.TransformedDistribution(
-      distribution=ds.Normal(loc=0., scale=1.),
-      bijector=bs.MaskedAutoregressiveFlow(
-          shift_and_log_scale_fn=bs.masked_autoregressive_default_template(
+  maf = tfd.TransformedDistribution(
+      distribution=tfd.Normal(loc=0., scale=1.),
+      bijector=tfb.MaskedAutoregressiveFlow(
+          shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
               hidden_layers=[512, 512])),
       event_shape=[dims])
 
@@ -143,10 +143,10 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector):
   maf.log_prob(0.)  # Cheap; no `tf.while_loop` despite no Bijector caching.
 
   # [1] also describes an "Inverse Autoregressive Flow", e.g.,
-  iaf = ds.TransformedDistribution(
-      distribution=ds.Normal(loc=0., scale=1.),
-      bijector=bs.Invert(bs.MaskedAutoregressiveFlow(
-          shift_and_log_scale_fn=bs.masked_autoregressive_default_template(
+  iaf = tfd.TransformedDistribution(
+      distribution=tfd.Normal(loc=0., scale=1.),
+      bijector=tfb.Invert(tfb.MaskedAutoregressiveFlow(
+          shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
               hidden_layers=[512, 512]))),
       event_shape=[dims])
 
@@ -158,10 +158,10 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector):
   # poor choice. Here's an example of using a "shift only" version and with a
   # different number/depth of hidden layers.
   shift_only = True
-  maf_no_scale_hidden2 = ds.TransformedDistribution(
-      distribution=ds.Normal(loc=0., scale=1.),
-      bijector=bs.MaskedAutoregressiveFlow(
-          bs.masked_autoregressive_default_template(
+  maf_no_scale_hidden2 = tfd.TransformedDistribution(
+      distribution=tfd.Normal(loc=0., scale=1.),
+      bijector=tfb.MaskedAutoregressiveFlow(
+          tfb.masked_autoregressive_default_template(
               hidden_layers=[32],
               shift_only=shift_only),
           is_constant_jacobian=shift_only),
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py
index b1d8f2f41b..8654cc39d0 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py
@@ -40,9 +40,9 @@ class Permute(bijector_lib.Bijector):
   """Permutes the rightmost dimension of a `Tensor`.
 
   ```python
-  bs = tf.contrib.distributions.bijectors
+  tfd = tf.contrib.distributions
 
-  reverse = bs.Permute(permutation=[2, 1, 0])
+  reverse = tfd.bijectors.Permute(permutation=[2, 1, 0])
 
   reverse.forward([-1., 0., 1.])
   # ==> [1., 0., -1]
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
index 1eb8e74fda..55eca06312 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
@@ -63,9 +63,9 @@ class Reshape(bijector_lib.Bijector):
   Example usage:
   ```python
 
-  bs = tf.contrib.distributions.bijectors
+  tfd = tf.contrib.distributions
 
-  r = bs.Reshape(event_shape_out=[1, -1])
+  r = tfd.bijectors.Reshape(event_shape_out=[1, -1])
 
   r.forward([3., 4.])    # shape [2]
   # ==> [[3., 4.]]       # shape [1, 2]
diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py
index 8d59c1abfb..6f5d724a2a 100644
--- a/tensorflow/contrib/distributions/python/ops/cauchy.py
+++ b/tensorflow/contrib/distributions/python/ops/cauchy.py
@@ -43,16 +43,17 @@ class Cauchy(distribution.Distribution):
   The probability density function (pdf) is,
 
   ```none
-  pdf(x; loc, scale) = 1 / (pi * scale * (1 + ((x - loc) / scale)**2))
+  pdf(x; loc, scale) = 1 / (pi scale (1 + z**2))
+  z = (x - loc) / scale
   ```
   where `loc` is the location, and `scale` is the scale.
 
   The Cauchy distribution is a member of the [location-scale family](
   https://en.wikipedia.org/wiki/Location-scale_family), i.e.
+  `Y ~ Cauchy(loc, scale)` is equivalent to,
 
   ```none
   X ~ Cauchy(loc=0, scale=1)
-  Y ~ Cauchy(loc=loc, scale=scale)
   Y = loc + scale * X
   ```
 
@@ -61,14 +62,16 @@ class Cauchy(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
+  tfd = tf.contrib.distributions
+
   # Define a single scalar Cauchy distribution.
-  dist = Cauchy(loc=0., scale=3.)
+  dist = tfd.Cauchy(loc=0., scale=3.)
 
   # Evaluate the cdf at 1, returning a scalar.
   dist.cdf(1.)
 
   # Define a batch of two scalar valued Cauchy distributions.
-  dist = Cauchy(loc=[1, 2.], scale=[11, 22.])
+  dist = tfd.Cauchy(loc=[1, 2.], scale=[11, 22.])
 
   # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
   # returning a length two tensor.
@@ -76,18 +79,17 @@ class Cauchy(distribution.Distribution):
 
   # Get 3 samples, returning a 3 x 2 tensor.
   dist.sample([3])
-  ```
-
-  Arguments are broadcast when possible.
 
-  ```python
+  # Arguments are broadcast when possible.
   # Define a batch of two scalar valued Cauchy distributions.
   # Both have median 1, but different scales.
-  dist = tf.contrib.distributions.Cauchy(loc=1., scale=[11, 22.])
+  dist = tfd.Cauchy(loc=1., scale=[11, 22.])
+
   # Evaluate the pdf of both distributions on the same point, 3.0,
   # returning a length 2 tensor.
-  dist.prob(3.0)
+  dist.prob(3.)
   ```
+
   """
 
   def __init__(self,
diff --git a/tensorflow/contrib/distributions/python/ops/deterministic.py b/tensorflow/contrib/distributions/python/ops/deterministic.py
index 850d08d1bd..8049522e9f 100644
--- a/tensorflow/contrib/distributions/python/ops/deterministic.py
+++ b/tensorflow/contrib/distributions/python/ops/deterministic.py
@@ -290,8 +290,10 @@ class VectorDeterministic(_BaseDeterministic):
   #### Examples
 
   ```python
+  tfd = tf.contrib.distributions
+
   # Initialize a single VectorDeterministic supported at [0., 2.] in R^2.
-  constant = tf.contrib.distributions.Deterministic([0., 2.])
+  constant = tfd.Deterministic([0., 2.])
   constant.prob([0., 2.])
   ==> 1.
   constant.prob([0., 3.])
@@ -299,7 +301,7 @@ class VectorDeterministic(_BaseDeterministic):
 
   # Initialize a [3] batch of constants on R^2.
   loc = [[0., 1.], [2., 3.], [4., 5.]]
-  constant = constant_lib.VectorDeterministic(loc)
+  constant = tfd.VectorDeterministic(loc)
   constant.prob([[0., 1.], [1.9, 3.], [3.99, 5.]])
   ==> [1., 0., 0.]
   ```
diff --git a/tensorflow/contrib/distributions/python/ops/gumbel.py b/tensorflow/contrib/distributions/python/ops/gumbel.py
index ba8d3c639b..d0efaefb8e 100644
--- a/tensorflow/contrib/distributions/python/ops/gumbel.py
+++ b/tensorflow/contrib/distributions/python/ops/gumbel.py
@@ -62,15 +62,17 @@ class _Gumbel(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
+  tfd = tf.contrib.distributions
+
   # Define a single scalar Gumbel distribution.
-  dist = tf.contrib.distributions.Gumbel(loc=0., scale=3.)
+  dist = tfd.Gumbel(loc=0., scale=3.)
 
   # Evaluate the cdf at 1, returning a scalar.
   dist.cdf(1.)
 
   # Define a batch of two scalar valued Gumbels.
   # The first has mean 1 and scale 11, the second 2 and 22.
-  dist = tf.contrib.distributions.Gumbel(loc=[1, 2.], scale=[11, 22.])
+  dist = tfd.Gumbel(loc=[1, 2.], scale=[11, 22.])
 
   # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
   # returning a length two tensor.
@@ -85,7 +87,7 @@ class _Gumbel(distribution.Distribution):
   ```python
   # Define a batch of two scalar valued Logistics.
   # Both have mean 1, but different scales.
-  dist = tf.contrib.distributions.Gumbel(loc=1., scale=[11, 22.])
+  dist = tfd.Gumbel(loc=1., scale=[11, 22.])
 
   # Evaluate the pdf of both distributions on the same point, 3.0,
   # returning a length 2 tensor.
diff --git a/tensorflow/contrib/distributions/python/ops/independent.py b/tensorflow/contrib/distributions/python/ops/independent.py
index 6a74ca9a0a..cbce005013 100644
--- a/tensorflow/contrib/distributions/python/ops/independent.py
+++ b/tensorflow/contrib/distributions/python/ops/independent.py
@@ -68,11 +68,11 @@ class Independent(distribution_lib.Distribution):
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Make independent distribution from a 2-batch Normal.
-  ind = ds.Independent(
-      distribution=ds.Normal(loc=[-1., 1], scale=[0.1, 0.5]),
+  ind = tfd.Independent(
+      distribution=tfd.Normal(loc=[-1., 1], scale=[0.1, 0.5]),
       reinterpreted_batch_ndims=1)
 
   # All batch dims have been "absorbed" into event dims.
@@ -80,8 +80,8 @@ class Independent(distribution_lib.Distribution):
   ind.event_shape  # ==> [2]
 
   # Make independent distribution from a 2-batch bivariate Normal.
-  ind = ds.Independent(
-      distribution=ds.MultivariateNormalDiag(
+  ind = tfd.Independent(
+      distribution=tfd.MultivariateNormalDiag(
           loc=[[-1., 1], [1, -1]],
           scale_identity_multiplier=[1., 0.5]),
       reinterpreted_batch_ndims=1)
diff --git a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
index 956dee38a3..ee4d86867d 100644
--- a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
+++ b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
@@ -88,8 +88,9 @@ class InverseGamma(distribution.Distribution):
   #### Examples
 
   ```python
-  dist = InverseGamma(concentration=3.0, rate=2.0)
-  dist2 = InverseGamma(concentration=[3.0, 4.0], rate=[2.0, 3.0])
+  tfd = tf.contrib.distributions
+  dist = tfd.InverseGamma(concentration=3.0, rate=2.0)
+  dist2 = tfd.InverseGamma(concentration=[3.0, 4.0], rate=[2.0, 3.0])
   ```
 
   """
diff --git a/tensorflow/contrib/distributions/python/ops/logistic.py b/tensorflow/contrib/distributions/python/ops/logistic.py
index 48794a4882..473677f8d9 100644
--- a/tensorflow/contrib/distributions/python/ops/logistic.py
+++ b/tensorflow/contrib/distributions/python/ops/logistic.py
@@ -60,15 +60,17 @@ class Logistic(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
+  tfd = tf.contrib.distributions
+
   # Define a single scalar Logistic distribution.
-  dist = tf.contrib.distributions.Logistic(loc=0., scale=3.)
+  dist = tfd.Logistic(loc=0., scale=3.)
 
   # Evaluate the cdf at 1, returning a scalar.
   dist.cdf(1.)
 
   # Define a batch of two scalar valued Logistics.
   # The first has mean 1 and scale 11, the second 2 and 22.
-  dist = tf.contrib.distributions.Logistic(loc=[1, 2.], scale=[11, 22.])
+  dist = tfd.Logistic(loc=[1, 2.], scale=[11, 22.])
 
   # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
   # returning a length two tensor.
@@ -76,14 +78,11 @@ class Logistic(distribution.Distribution):
 
   # Get 3 samples, returning a 3 x 2 tensor.
   dist.sample([3])
-  ```
 
-  Arguments are broadcast when possible.
-
-  ```python
+  # Arguments are broadcast when possible.
   # Define a batch of two scalar valued Logistics.
   # Both have mean 1, but different scales.
-  dist = tf.contrib.distributions.Logistic(loc=1., scale=[11, 22.])
+  dist = tfd.Logistic(loc=1., scale=[11, 22.])
 
   # Evaluate the pdf of both distributions on the same point, 3.0,
   # returning a length 2 tensor.
diff --git a/tensorflow/contrib/distributions/python/ops/mixture.py b/tensorflow/contrib/distributions/python/ops/mixture.py
index e676931d91..f2d492f548 100644
--- a/tensorflow/contrib/distributions/python/ops/mixture.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture.py
@@ -49,13 +49,13 @@ class Mixture(distribution.Distribution):
 
   ```python
   # Create a mixture of two Gaussians:
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
   mix = 0.3
-  bimix_gauss = ds.Mixture(
-    cat=ds.Categorical(probs=[mix, 1.-mix]),
+  bimix_gauss = tfd.Mixture(
+    cat=tfd.Categorical(probs=[mix, 1.-mix]),
     components=[
-      ds.Normal(loc=-1., scale=0.1),
-      ds.Normal(loc=+1., scale=0.5),
+      tfd.Normal(loc=-1., scale=0.1),
+      tfd.Normal(loc=+1., scale=0.5),
   ])
 
   # Plot the PDF.
diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
index 5558ef0f25..5448918a50 100644
--- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
@@ -43,15 +43,14 @@ class MixtureSameFamily(distribution.Distribution):
   #### Examples
 
   ```python
-  import matplotlib.pyplot as plt
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   ### Create a mixture of two scalar Gaussians:
 
-  gm = ds.MixtureSameFamily(
-      mixture_distribution=ds.Categorical(
+  gm = tfd.MixtureSameFamily(
+      mixture_distribution=tfd.Categorical(
           probs=[0.3, 0.7]),
-      components_distribution=ds.Normal(
+      components_distribution=tfd.Normal(
         loc=[-1., 1],       # One for each component.
         scale=[0.1, 0.5]))  # And same here.
 
@@ -63,14 +62,15 @@ class MixtureSameFamily(distribution.Distribution):
 
   # Plot PDF.
   x = np.linspace(-2., 3., int(1e4), dtype=np.float32)
+  import matplotlib.pyplot as plt
   plt.plot(x, gm.prob(x).eval());
 
   ### Create a mixture of two Bivariate Gaussians:
 
-  gm = ds.MixtureSameFamily(
-      mixture_distribution=ds.Categorical(
+  gm = tfd.MixtureSameFamily(
+      mixture_distribution=tfd.Categorical(
           probs=[0.3, 0.7]),
-      components_distribution=ds.MultivariateNormalDiag(
+      components_distribution=tfd.MultivariateNormalDiag(
           loc=[[-1., 1],  # component 1
                [1, -1]],  # component 2
           scale_identity_multiplier=[.3, .6]))
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag.py b/tensorflow/contrib/distributions/python/ops/mvn_diag.py
index 163cf75d99..e862552880 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_diag.py
@@ -84,10 +84,10 @@ class MultivariateNormalDiag(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 2-variate Gaussian.
-  mvn = ds.MultivariateNormalDiag(
+  mvn = tfd.MultivariateNormalDiag(
       loc=[1., -1],
       scale_diag=[1, 2.])
 
@@ -101,7 +101,7 @@ class MultivariateNormalDiag(
   mvn.prob([-1., 0]).eval()  # shape: []
 
   # Initialize a 3-batch, 2-variate scaled-identity Gaussian.
-  mvn = ds.MultivariateNormalDiag(
+  mvn = tfd.MultivariateNormalDiag(
       loc=[1., -1],
       scale_identity_multiplier=[1, 2., 3])
 
@@ -119,7 +119,7 @@ class MultivariateNormalDiag(
   mvn.prob([-1., 0]).eval()  # shape: [3]
 
   # Initialize a 2-batch of 3-variate Gaussians.
-  mvn = ds.MultivariateNormalDiag(
+  mvn = tfd.MultivariateNormalDiag(
       loc=[[1., 2, 3],
            [11, 22, 33]]           # shape: [2, 3]
       scale_diag=[[1., 2, 3],
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py
index 040bc23072..413e88f03a 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py
@@ -86,7 +86,7 @@ class MultivariateNormalDiagPlusLowRank(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate Gaussian with covariance `cov = S @ S.T`,
   # `S = diag(d) + U @ diag(m) @ U.T`. The perturbation, `U @ diag(m) @ U.T`, is
@@ -97,7 +97,7 @@ class MultivariateNormalDiagPlusLowRank(
        [-1, 1],
        [2, -0.5]]        # shape: [3, 2]
   m = [4., 5]            # shape: [2]
-  mvn = ds.MultivariateNormalDiagPlusLowRank(
+  mvn = tfd.MultivariateNormalDiagPlusLowRank(
       loc=mu
       scale_diag=d
       scale_perturb_factor=U,
@@ -118,7 +118,7 @@ class MultivariateNormalDiagPlusLowRank(
   m = [[0.1, 0.2],
        [0.4, 0.5]]         # shape: [b, r] = [2, 2]
 
-  mvn = ds.MultivariateNormalDiagPlusLowRank(
+  mvn = tfd.MultivariateNormalDiagPlusLowRank(
       loc=mu,
       scale_perturb_factor=U,
       scale_perturb_diag=m)
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
index f9952b2069..8e69dadfb4 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
@@ -73,14 +73,14 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL):
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate Gaussian.
   mu = [1., 2, 3]
   cov = [[ 0.36,  0.12,  0.06],
          [ 0.12,  0.29, -0.13],
          [ 0.06, -0.13,  0.26]]
-  mvn = ds.MultivariateNormalFullCovariance(
+  mvn = tfd.MultivariateNormalFullCovariance(
       loc=mu,
       covariance_matrix=cov)
 
@@ -100,7 +100,7 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL):
   mu = [[1., 2, 3],
         [11, 22, 33]]              # shape: [2, 3]
   covariance_matrix = ...  # shape: [2, 3, 3], symmetric, positive definite.
-  mvn = ds.MultivariateNormalFullCovariance(
+  mvn = tfd.MultivariateNormalFullCovariance(
       loc=mu,
       covariance=covariance_matrix)
 
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py
index 300bdd5f60..a739979289 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py
@@ -90,8 +90,7 @@ class MultivariateNormalLinearOperator(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
-  la = tf.linalg
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate Gaussian.
   mu = [1., 2, 3]
@@ -103,9 +102,9 @@ class MultivariateNormalLinearOperator(
   #      [ 0.2,  0.5,  0. ],
   #      [ 0.1, -0.3,  0.4]])
 
-  mvn = ds.MultivariateNormalLinearOperator(
+  mvn = tfd.MultivariateNormalLinearOperator(
       loc=mu,
-      scale=la.LinearOperatorLowerTriangular(scale))
+      scale=tf.linalg.LinearOperatorLowerTriangular(scale))
 
   # Covariance agrees with cholesky(cov) parameterization.
   mvn.covariance().eval()
@@ -122,9 +121,9 @@ class MultivariateNormalLinearOperator(
   scale_diag = [[1., 2, 3],
                 [0.5, 1, 1.5]]     # shape: [2, 3]
 
-  mvn = ds.MultivariateNormalLinearOperator(
+  mvn = tfd.MultivariateNormalLinearOperator(
       loc=mu,
-      scale=la.LinearOperatorDiag(scale_diag))
+      scale=tf.linalg.LinearOperatorDiag(scale_diag))
 
   # Compute the pdf of two `R^3` observations; return a length-2 vector.
   x = [[-0.9, 0, 0.1],
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_tril.py b/tensorflow/contrib/distributions/python/ops/mvn_tril.py
index 260dcc18f5..6c7dc4ca7a 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_tril.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_tril.py
@@ -76,12 +76,13 @@ class MultivariateNormalTriL(
   ```
 
   Trainable (batch) lower-triangular matrices can be created with
-  `ds.matrix_diag_transform()` and/or `ds.fill_triangular()`
+  `tf.contrib.distributions.matrix_diag_transform()` and/or
+  `tf.contrib.distributions.fill_triangular()`
 
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate Gaussian.
   mu = [1., 2, 3]
@@ -92,7 +93,7 @@ class MultivariateNormalTriL(
   # ==> [[ 0.6,  0. ,  0. ],
   #      [ 0.2,  0.5,  0. ],
   #      [ 0.1, -0.3,  0.4]])
-  mvn = ds.MultivariateNormalTriL(
+  mvn = tfd.MultivariateNormalTriL(
       loc=mu,
       scale_tril=scale)
 
@@ -112,7 +113,7 @@ class MultivariateNormalTriL(
   mu = [[1., 2, 3],
         [11, 22, 33]]              # shape: [2, 3]
   tril = ...  # shape: [2, 3, 3], lower triangular, non-zero diagonal.
-  mvn = ds.MultivariateNormalTriL(
+  mvn = tfd.MultivariateNormalTriL(
       loc=mu,
       scale_tril=tril)
 
@@ -124,9 +125,9 @@ class MultivariateNormalTriL(
   # Instantiate a "learnable" MVN.
   dims = 4
   with tf.variable_scope("model"):
-    mvn = ds.MultivariateNormalTriL(
+    mvn = tfd.MultivariateNormalTriL(
         loc=tf.get_variable(shape=[dims], dtype=tf.float32, name="mu"),
-        scale_tril=ds.fill_triangular(
+        scale_tril=tfd.fill_triangular(
             tf.get_variable(shape=[dims * (dims + 1) / 2],
                             dtype=tf.float32, name="chol_Sigma")))
   ```
diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
index 8a95038a3c..96dff85665 100644
--- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
+++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
@@ -107,10 +107,11 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
+
   # Create two batches of PoissonLogNormalQuadratureCompounds, one with
   # prior `loc = 0.` and another with `loc = 1.` In both cases `scale = 1.`
-  pln = ds.PoissonLogNormalQuadratureCompound(
+  pln = tfd.PoissonLogNormalQuadratureCompound(
       loc=[0., -0.5],
       scale=1.,
       quadrature_grid_and_probs=(
diff --git a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py
index b05f15771a..c4b8f055b7 100644
--- a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py
+++ b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py
@@ -115,7 +115,7 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution):
       tailweight:  Tailweight parameter. Default is `1.0` (unchanged tailweight)
       distribution: `tf.Distribution`-like instance. Distribution that is
         transformed to produce this distribution.
-        Default is `ds.Normal(0., 1.)`.
+        Default is `tf.distributions.Normal(0., 1.)`.
         Must be a scalar-batch, scalar-event distribution.  Typically
         `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
         a function of non-trainable parameters. WARNING: If you backprop through
diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
index 92043d6a08..904724af42 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
@@ -188,8 +188,7 @@ class VectorDiffeomixture(distribution_lib.Distribution):
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
-  la = tf.linalg
+  tfd = tf.contrib.distributions
 
   # Create two batches of VectorDiffeomixtures, one with mix_loc=[0.] and
   # another with mix_loc=[1]. In both cases, `K=2` and the affine
@@ -197,20 +196,20 @@ class VectorDiffeomixture(distribution_lib.Distribution):
   # k=0: loc=zeros(dims)  scale=LinearOperatorScaledIdentity
   # k=1: loc=[2.]*dims    scale=LinOpDiag
   dims = 5
-  vdm = ds.VectorDiffeomixture(
+  vdm = tfd.VectorDiffeomixture(
       mix_loc=[[0.], [1]],
       mix_scale=[1.],
-      distribution=ds.Normal(loc=0., scale=1.),
+      distribution=tfd.Normal(loc=0., scale=1.),
       loc=[
           None,  # Equivalent to `np.zeros(dims, dtype=np.float32)`.
           np.float32([2.]*dims),
       ],
       scale=[
-          la.LinearOperatorScaledIdentity(
+          tf.linalg.LinearOperatorScaledIdentity(
             num_rows=dims,
             multiplier=np.float32(1.1),
             is_positive_definite=True),
-          la.LinearOperatorDiag(
+          tf.linalg.LinearOperatorDiag(
             diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
             is_positive_definite=True),
       ],
diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py
index 356d78b67a..526fe2d39a 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py
@@ -89,14 +89,13 @@ class VectorExponentialDiag(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
-  la = tf.linalg
+  tfd = tf.contrib.distributions
 
   # Initialize a single 2-variate VectorExponential, supported on
   # {(x, y) in R^2 : x > 0, y > 0}.
 
   # The first component has pdf exp{-x}, the second 0.5 exp{-x / 2}
-  vex = ds.VectorExponentialDiag(scale_diag=[1., 2.])
+  vex = tfd.VectorExponentialDiag(scale_diag=[1., 2.])
 
   # Compute the pdf of an`R^2` observation; return a scalar.
   vex.prob([3., 4.]).eval()  # shape: []
@@ -107,7 +106,7 @@ class VectorExponentialDiag(
   scale_diag = [[1., 2, 3],
                 [0.5, 1, 1.5]]     # shape: [2, 3]
 
-  vex = ds.VectorExponentialDiag(loc, scale_diag)
+  vex = tfd.VectorExponentialDiag(loc, scale_diag)
 
   # Compute the pdf of two `R^3` observations; return a length-2 vector.
   x = [[1.9, 2.2, 3.1],
diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py
index b313a851b3..9d5fd9ac41 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py
@@ -107,16 +107,15 @@ class VectorExponentialLinearOperator(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
-  la = tf.linalg
+  tfd = tf.contrib.distributions
 
   # Initialize a single 2-variate VectorExponential, supported on
   # {(x, y) in R^2 : x > 0, y > 0}.
   mat = [[1.0, 0.1],
          [0.1, 1.0]]
 
-  vex = ds.VectorExponentialLinearOperator(
-      scale=la.LinearOperatorFullMatrix(mat))
+  vex = tfd.VectorExponentialLinearOperator(
+      scale=tf.linalg.LinearOperatorFullMatrix(mat))
 
   # Compute the pdf of an`R^2` observation; return a scalar.
   vex.prob([1., 2.]).eval()  # shape: []
@@ -127,9 +126,9 @@ class VectorExponentialLinearOperator(
   scale_diag = [[1., 2, 3],
                 [0.5, 1, 1.5]]     # shape: [2, 3]
 
-  vex = ds.VectorExponentialLinearOperator(
+  vex = tfd.VectorExponentialLinearOperator(
       loc=mu,
-      scale=la.LinearOperatorDiag(scale_diag))
+      scale=tf.linalg.LinearOperatorDiag(scale_diag))
 
   # Compute the pdf of two `R^3` observations; return a length-2 vector.
   x = [[1.9, 2.2, 3.1],
diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py
index 0e3867809a..8dd983b750 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py
@@ -101,10 +101,10 @@ class VectorLaplaceDiag(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 2-variate VectorLaplace.
-  vla = ds.VectorLaplaceDiag(
+  vla = tfd.VectorLaplaceDiag(
       loc=[1., -1],
       scale_diag=[1, 2.])
 
@@ -118,7 +118,7 @@ class VectorLaplaceDiag(
   vla.prob([-1., 0]).eval()  # shape: []
 
   # Initialize a 3-batch, 2-variate scaled-identity VectorLaplace.
-  vla = ds.VectorLaplaceDiag(
+  vla = tfd.VectorLaplaceDiag(
       loc=[1., -1],
       scale_identity_multiplier=[1, 2., 3])
 
@@ -136,7 +136,7 @@ class VectorLaplaceDiag(
   vla.prob([-1., 0]).eval()  # shape: [3]
 
   # Initialize a 2-batch of 3-variate VectorLaplace's.
-  vla = ds.VectorLaplaceDiag(
+  vla = tfd.VectorLaplaceDiag(
       loc=[[1., 2, 3],
            [11, 22, 33]]           # shape: [2, 3]
       scale_diag=[[1., 2, 3],
diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py
index c7abdbb4ca..ec485c95c1 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py
@@ -109,8 +109,7 @@ class VectorLaplaceLinearOperator(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
-  la = tf.linalg
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate VectorLaplace with some desired covariance.
   mu = [1., 2, 3]
@@ -124,9 +123,9 @@ class VectorLaplaceLinearOperator(
   #      [ 0.1, -0.3,  0.4]])
 
   # Divide scale by sqrt(2) so that the final covariance will be what we want.
-  vla = ds.VectorLaplaceLinearOperator(
+  vla = tfd.VectorLaplaceLinearOperator(
       loc=mu,
-      scale=la.LinearOperatorLowerTriangular(scale / tf.sqrt(2)))
+      scale=tf.linalg.LinearOperatorLowerTriangular(scale / tf.sqrt(2.)))
 
   # Covariance agrees with cholesky(cov) parameterization.
   vla.covariance().eval()
@@ -143,9 +142,9 @@ class VectorLaplaceLinearOperator(
   scale_diag = [[1., 2, 3],
                 [0.5, 1, 1.5]]     # shape: [2, 3]
 
-  vla = ds.VectorLaplaceLinearOperator(
+  vla = tfd.VectorLaplaceLinearOperator(
       loc=mu,
-      scale=la.LinearOperatorDiag(scale_diag))
+      scale=tf.linalg.LinearOperatorDiag(scale_diag))
 
   # Compute the pdf of two `R^3` observations; return a length-2 vector.
   x = [[-0.9, 0, 0.1],
diff --git a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py
index 544a871070..e1ccf11645 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py
@@ -143,7 +143,7 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution):
         broadcastable with `event_shape`.
       distribution: `tf.Distribution`-like instance. Distribution from which `k`
         iid samples are used as input to transformation `F`.  Default is
-        `ds.Normal(0., 1.)`.
+        `tf.distributions.Normal(loc=0., scale=1.)`.
         Must be a scalar-batch, scalar-event distribution.  Typically
         `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
         a function of non-trainable parameters. WARNING: If you backprop through
diff --git a/tensorflow/contrib/distributions/python/ops/vector_student_t.py b/tensorflow/contrib/distributions/python/ops/vector_student_t.py
index 29d41ab81c..8c67647a61 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_student_t.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_student_t.py
@@ -91,14 +91,14 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution):
   Extra leading dimensions, if provided, allow for batches.
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate vector Student's t-distribution.
   mu = [1., 2, 3]
   chol = [[1., 0, 0.],
           [1, 3, 0],
           [1, 2, 3]]
-  vt = ds.VectorStudentT(df=2, loc=mu, scale_tril=chol)
+  vt = tfd.VectorStudentT(df=2, loc=mu, scale_tril=chol)
 
   # Evaluate this on an observation in R^3, returning a scalar.
   vt.prob([-1., 0, 1])
@@ -107,7 +107,7 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution):
   mu = [[1., 2, 3],
         [11, 22, 33]]
   chol = ...  # shape 2 x 3 x 3, lower triangular, positive diagonal.
-  vt = ds.VectorStudentT(loc=mu, scale_tril=chol)
+  vt = tfd.VectorStudentT(loc=mu, scale_tril=chol)
 
   # Evaluate this on a two observations, each in R^3, returning a length two
   # tensor.
-- 
GitLab


From cb4ef362e4a18b3c42a2c90bdad8754d5ead4caf Mon Sep 17 00:00:00 2001
From: Yangzihao Wang <yangzihao@google.com>
Date: Wed, 29 Nov 2017 16:38:32 -0800
Subject: [PATCH 0443/1225] Add native dilated support for conv2d and its
 gradients in cudnn v>=6.

PiperOrigin-RevId: 177382431
---
 .../compiler/tf2xla/kernels/conv_ops.cc       |  57 +++
 .../fused_conv2d_bias_activation_op.cc        |   2 +
 .../fused_conv/kernels/fused_conv_ops_gpu.h   |   9 +-
 .../ops/fused_conv2d_bias_activation_op.cc    |   6 +
 .../api_def/base_api/api_def_Conv2D.pbtxt     |  12 +-
 .../api_def_Conv2DBackpropFilter.pbtxt        |  10 +
 .../api_def_Conv2DBackpropInput.pbtxt         |  10 +
 .../api_def/base_api/api_def_Conv3D.pbtxt     |  10 +
 .../api_def_Conv3DBackpropFilterV2.pbtxt      |  10 +
 .../api_def_Conv3DBackpropInputV2.pbtxt       |  10 +
 .../api_def_DepthwiseConv2dNative.pbtxt       |  10 +
 ..._DepthwiseConv2dNativeBackpropFilter.pbtxt |  10 +
 ...f_DepthwiseConv2dNativeBackpropInput.pbtxt |  10 +
 .../base_api/api_def_QuantizedConv2D.pbtxt    |  10 +
 tensorflow/core/framework/common_shape_fns.cc |  23 +-
 .../core/framework/common_shape_fns_test.cc   | 106 ++++-
 .../core/kernels/conv_grad_filter_ops.cc      |  93 +++-
 .../core/kernels/conv_grad_input_ops.cc       |  97 ++++-
 tensorflow/core/kernels/conv_grad_ops.h       |  16 +-
 tensorflow/core/kernels/conv_grad_ops_3d.cc   |   4 +
 tensorflow/core/kernels/conv_ops.cc           | 113 +++--
 tensorflow/core/kernels/conv_ops.h            |  10 +-
 tensorflow/core/kernels/conv_ops_3d.cc        |   3 +
 tensorflow/core/kernels/conv_ops_gpu.h        |  12 +-
 tensorflow/core/kernels/conv_ops_test.cc      |   4 +
 tensorflow/core/kernels/depthwise_conv_op.cc  |   5 +-
 tensorflow/core/kernels/quantized_conv_ops.cc |  13 +
 tensorflow/core/ops/nn_ops.cc                 |  80 +++-
 .../conv2d_backprop_filter_grad_test.py       |  54 ++-
 .../python/kernel_tests/conv_ops_test.py      | 407 +++++++++++++++++-
 tensorflow/python/ops/nn_grad.py              |  90 ++--
 tensorflow/python/ops/nn_ops.py               |  28 +-
 .../tools/api/golden/tensorflow.nn.pbtxt      |  18 +-
 33 files changed, 1181 insertions(+), 171 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
index c5017704e2..c150394c07 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
@@ -121,6 +121,7 @@ class ConvOp : public XlaOpKernel {
       : XlaOpKernel(ctx),
         num_spatial_dims_(num_spatial_dims),
         depthwise_(depthwise) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_));
 
@@ -144,6 +145,23 @@ class ConvOp : public XlaOpKernel {
         errors::Unimplemented("Current implementation does not yet support "
                               "strides in the batch and depth dimensions."));
 
+    OP_REQUIRES(ctx, dilations_.size() == num_dims(),
+                errors::InvalidArgument("Dilations field must "
+                                        "specify ",
+                                        num_dims(), " dimensions"));
+    OP_REQUIRES(
+        ctx, dilations_[batch_dim] == 1 && dilations_[feature_dim] == 1,
+        errors::Unimplemented("Current implementation does not yet support "
+                              "dilations in the batch and depth dimensions."));
+    for (int i = 0; i < num_spatial_dims_; ++i) {
+      int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      OP_REQUIRES(
+          ctx, dilations_[input_dim] == 1,
+          errors::Unimplemented("Current implementation does not yet support "
+                                "dilations in the ",
+                                i, "th spatial dimension."));
+    }
+
     const TensorShape input_shape = ctx->InputShape(0);
     // Input filter is of the following dimensions:
     // [ filter_rows, filter_cols, ..., in_depth, out_depth]
@@ -204,6 +222,7 @@ class ConvOp : public XlaOpKernel {
  protected:
   const int num_spatial_dims_;
   const bool depthwise_;
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_ = FORMAT_NHWC;
@@ -241,6 +260,7 @@ class ConvBackpropInputOp : public XlaOpKernel {
       : XlaOpKernel(ctx),
         num_spatial_dims_(num_spatial_dims),
         depthwise_(depthwise) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_));
     string data_format;
@@ -263,6 +283,23 @@ class ConvBackpropInputOp : public XlaOpKernel {
         errors::Unimplemented("Current implementation does not yet support "
                               "strides in the batch and depth dimensions."));
 
+    OP_REQUIRES(ctx, dilations_.size() == num_dims(),
+                errors::InvalidArgument("Dilations field must "
+                                        "specify ",
+                                        num_dims(), " dimensions"));
+    OP_REQUIRES(
+        ctx, dilations_[batch_dim] == 1 && dilations_[feature_dim] == 1,
+        errors::Unimplemented("Current implementation does not yet support "
+                              "dilations in the batch and depth dimensions."));
+    for (int i = 0; i < num_spatial_dims_; ++i) {
+      int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      OP_REQUIRES(
+          ctx, dilations_[input_dim] == 1,
+          errors::Unimplemented("Current implementation does not yet support "
+                                "dilations in the ",
+                                i, "th spatial dimension."));
+    }
+
     TensorShape input_shape;
     OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &input_shape));
 
@@ -336,6 +373,7 @@ class ConvBackpropInputOp : public XlaOpKernel {
  protected:
   const int num_spatial_dims_;
   const bool depthwise_;
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_ = FORMAT_NHWC;
@@ -373,6 +411,7 @@ class ConvBackpropFilterOp : public XlaOpKernel {
       : XlaOpKernel(ctx),
         num_spatial_dims_(num_spatial_dims),
         depthwise_(depthwise) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_));
     string data_format;
@@ -392,6 +431,23 @@ class ConvBackpropFilterOp : public XlaOpKernel {
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
 
+    OP_REQUIRES(ctx, dilations_.size() == num_dims(),
+                errors::InvalidArgument("Dilations field must "
+                                        "specify ",
+                                        num_dims(), " dimensions"));
+    OP_REQUIRES(
+        ctx, dilations_[n_dim] == 1 && dilations_[c_dim] == 1,
+        errors::Unimplemented("Current implementation does not yet support "
+                              "dilations in the batch and depth dimensions."));
+    for (int i = 0; i < num_spatial_dims_; ++i) {
+      int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      OP_REQUIRES(
+          ctx, dilations_[input_dim] == 1,
+          errors::Unimplemented("Current implementation does not yet support "
+                                "dilations in the ",
+                                i, "th spatial dimension."));
+    }
+
     const TensorShape activations_shape = ctx->InputShape(0);
     TensorShape filter_shape;
     OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(1, &filter_shape));
@@ -526,6 +582,7 @@ class ConvBackpropFilterOp : public XlaOpKernel {
  protected:
   const int num_spatial_dims_;
   const bool depthwise_;
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_ = FORMAT_NHWC;
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
index 88306094ab..5fec69ea43 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
@@ -493,6 +493,8 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
       {{conv_input_rows, conv_input_cols}},
       output_depth,
       {{filter_rows, filter_cols}},
+      // TODO(yangzihao): Add support for arbitrary dilations for fused conv.
+      {{1, 1}},  // dilation_rows, dilation_cols
       {{row_stride, col_stride}},
       {{padding_rows, padding_cols}},
       conv_input->dtype(),
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
index dc43af1158..fa7a3c03aa 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
@@ -30,11 +30,12 @@ class FusedConvParameters : public ConvParameters {
  public:
   FusedConvParameters(int64 batch, int64 in_depths, const SpatialArray& in,
                       int64 out_depths, const SpatialArray& filter,
-                      const SpatialArray& stride, const SpatialArray& padding,
-                      DataType dtype, int device_id, bool has_side_input,
+                      const SpatialArray& dilation, const SpatialArray& stride,
+                      const SpatialArray& padding, DataType dtype,
+                      int device_id, bool has_side_input,
                       ActivationMode activation_mode)
-      : ConvParameters(batch, in_depths, in, out_depths, filter, stride,
-                       padding, dtype, device_id),
+      : ConvParameters(batch, in_depths, in, out_depths, filter, dilation,
+                       stride, padding, dtype, device_id),
         activation_mode_(activation_mode),
         has_side_input_(has_side_input) {
     hash_code_ = Hash64Combine(hash_code_, has_side_input);
diff --git a/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc
index 887ebc5a6c..6a56237f67 100644
--- a/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc
@@ -52,6 +52,7 @@ REGISTER_OP("FusedConv2DBiasActivation")
     .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'")
     .Attr("filter_format: {'HWIO', 'OIHW', 'OIHW_VECT_I'} = 'HWIO'")
     .Attr("activation_mode: {'Relu'} = 'Relu'")
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       using shape_inference::ShapeHandle;
       using shape_inference::DimensionHandle;
@@ -151,6 +152,11 @@ REGISTER_OP("FusedConv2DBiasActivation")
                      kernel_height, kernel_width, input_channels % 4 ]`
     activation_mode: The activation applied to the output.
         Currently must be "Relu".
+    dilations: 1-D tensor of length 4.  The dilation factor for each dimension
+        of `input`. If set to k > 1, there will be k-1 skipped cells between
+        each filter element on that dimension. The dimension order is determined
+        by the value of `data_format`, see above for details. Dilations in the
+        batch and depth dimensions must be 1.
 )doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt
index 6522ce976f..070d6adb97 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt
@@ -26,7 +26,7 @@ END
     description: <<END
 1-D tensor of length 4.  The stride of the sliding window for each
 dimension of `input`. The dimension order is determined by the value of
-  `data_format`, see below for details.
+`data_format`, see below for details.
 END
   }
   attr {
@@ -43,6 +43,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, height, width, channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, channels, height, width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each
+filter element on that dimension. The dimension order is determined by the
+value of `data_format`, see above for details. Dilations in the batch and
+depth dimensions must be 1.
 END
   }
   summary: "Computes a 2-D convolution given 4-D `input` and `filter` tensors."
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt
index 4ea3374dbb..ff2d9d71db 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt
@@ -51,6 +51,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, in_height, in_width, in_channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, in_channels, in_height, in_width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each filter
+element on that dimension. The dimension order is determined by the value of
+`data_format`, see above for details. Dilations in the batch and depth
+dimensions must be 1.
 END
   }
   summary: "Computes the gradients of convolution with respect to the filter."
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt
index 4420073e38..2de38b4263 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt
@@ -50,6 +50,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, in_height, in_width, in_channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, in_channels, in_height, in_width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each filter
+element on that dimension. The dimension order is determined by the value of
+`data_format`, see above for details. Dilations in the batch and depth
+dimensions must be 1.
 END
   }
   summary: "Computes the gradients of convolution with respect to the input."
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt
index 8f3cd4493c..d26564097e 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt
@@ -34,6 +34,16 @@ default format "NDHWC", the data is stored in the order of:
     [batch, in_depth, in_height, in_width, in_channels].
 Alternatively, the format could be "NCDHW", the data storage order is:
     [batch, in_channels, in_depth, in_height, in_width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 5.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each
+filter element on that dimension. The dimension order is determined by the
+value of `data_format`, see above for details. Dilations in the batch and
+depth dimensions must be 1.
 END
   }
   summary: "Computes a 3-D convolution given 5-D `input` and `filter` tensors."
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt
index 6f9b917237..937c9c8ead 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt
@@ -43,6 +43,16 @@ default format "NDHWC", the data is stored in the order of:
     [batch, in_depth, in_height, in_width, in_channels].
 Alternatively, the format could be "NCDHW", the data storage order is:
     [batch, in_channels, in_depth, in_height, in_width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 5.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each
+filter element on that dimension. The dimension order is determined by the
+value of `data_format`, see above for details. Dilations in the batch and
+depth dimensions must be 1.
 END
   }
   summary: "Computes the gradients of 3-D convolution with respect to the filter."
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt
index 19aba156d5..414e418dc5 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt
@@ -43,6 +43,16 @@ default format "NDHWC", the data is stored in the order of:
     [batch, in_depth, in_height, in_width, in_channels].
 Alternatively, the format could be "NCDHW", the data storage order is:
     [batch, in_channels, in_depth, in_height, in_width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 5.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each
+filter element on that dimension. The dimension order is determined by the
+value of `data_format`, see above for details. Dilations in the batch and
+depth dimensions must be 1.
 END
   }
   summary: "Computes the gradients of 3-D convolution with respect to the input."
diff --git a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt
index cc10ebe923..3c313f7be6 100644
--- a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt
@@ -21,6 +21,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, height, width, channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, channels, height, width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each filter
+element on that dimension. The dimension order is determined by the value of
+`data_format`, see above for details. Dilations in the batch and depth
+dimensions must be 1.
 END
   }
   summary: "Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors."
diff --git a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt
index 9126be2afa..e66aa3b707 100644
--- a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt
@@ -54,6 +54,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, height, width, channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, channels, height, width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each filter
+element on that dimension. The dimension order is determined by the value of
+`data_format`, see above for details. Dilations in the batch and depth
+dimensions must be 1.
 END
   }
   summary: "Computes the gradients of depthwise convolution with respect to the filter."
diff --git a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt
index f1d16858db..f501ad21b3 100644
--- a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt
@@ -54,6 +54,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, height, width, channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, channels, height, width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each filter
+element on that dimension. The dimension order is determined by the value of
+`data_format`, see above for details. Dilations in the batch and depth
+dimensions must be 1.
 END
   }
   summary: "Computes the gradients of depthwise convolution with respect to the input."
diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt
index b19bbeab12..d18bafdce9 100644
--- a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt
@@ -53,6 +53,16 @@ END
     name: "padding"
     description: <<END
 The type of padding algorithm to use.
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each
+filter element on that dimension. The dimension order is determined by the
+value of `data_format`, see above for details. Dilations in the batch and
+depth dimensions must be 1.
 END
   }
   summary: "Computes a 2D convolution given quantized 4D input and filter tensors."
diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index be7f2e2808..036e3473b1 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -397,6 +397,15 @@ Status Conv2DShape(shape_inference::InferenceContext* c) {
   TF_RETURN_IF_ERROR(
       CheckFormatConstraintsOnShape(data_format, filter_shape, "filter", c));
 
+  std::vector<int32> dilations;
+  TF_RETURN_IF_ERROR(c->GetAttr("dilations", &dilations));
+
+  if (dilations.size() != 4) {
+    return errors::InvalidArgument(
+        "Conv2D requires the dilation attribute to contain 4 values, but got: ",
+        dilations.size());
+  }
+
   std::vector<int32> strides;
   TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
 
@@ -410,6 +419,8 @@ Status Conv2DShape(shape_inference::InferenceContext* c) {
 
   const int32 stride_rows = GetTensorDim(strides, data_format, 'H');
   const int32 stride_cols = GetTensorDim(strides, data_format, 'W');
+  const int32 dilation_rows = GetTensorDim(dilations, data_format, 'H');
+  const int32 dilation_cols = GetTensorDim(dilations, data_format, 'W');
 
   DimensionHandle batch_size_dim;
   DimensionHandle input_depth_dim;
@@ -447,12 +458,12 @@ Status Conv2DShape(shape_inference::InferenceContext* c) {
   TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
 
   DimensionHandle output_rows, output_cols;
-  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(c, input_spatial_dims[0],
-                                                   filter_rows_dim, stride_rows,
-                                                   padding, &output_rows));
-  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(c, input_spatial_dims[1],
-                                                   filter_cols_dim, stride_cols,
-                                                   padding, &output_cols));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2(
+      c, input_spatial_dims[0], filter_rows_dim, dilation_rows, stride_rows,
+      padding, &output_rows));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2(
+      c, input_spatial_dims[1], filter_cols_dim, dilation_cols, stride_cols,
+      padding, &output_cols));
 
   ShapeHandle output_shape;
   TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc
index ec9746b2af..5f3e5ad457 100644
--- a/tensorflow/core/framework/common_shape_fns_test.cc
+++ b/tensorflow/core/framework/common_shape_fns_test.cc
@@ -423,6 +423,15 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) {
                     .Finalize(&op.node_def));
   };
 
+  // Invalid rank for input
+  INFER_ERROR("must be rank 4", op, "[4,4];[2,1,1,1]");
+  // Invalid rank for filter
+  INFER_ERROR("must be rank 4", op, "[1,4,4,1];[2,1,1]");
+
+  // Invalid value for strides
+  set_op({{1, 1, 0, 1}}, "VALID", "NHWC", "HWIO");
+  INFER_ERROR("must be > 0", op, "[1,2,2,1];[1,1,1,1]");
+
   // 1x1 filter
   set_op({{1, 1, 1, 1}}, "VALID", "NHWC", "HWIO");
   INFER_OK(op, "[1,2,2,1];[1,1,1,1]", "[d0_0,2,2,d1_3]");
@@ -443,11 +452,6 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) {
   set_op({{1, 1, 2, 1}}, "VALID", "NHWC", "HWIO");
   INFER_OK(op, "[1,4,4,1];[2,1,1,1]", "[d0_0,3,2,d1_3]");
 
-  // Invalid rank for input
-  INFER_ERROR("must be rank 4", op, "[4,4];[2,1,1,1]");
-  // Invalid rank for filter
-  INFER_ERROR("must be rank 4", op, "[1,4,4,1];[2,1,1]");
-
   // Unknown dims in the critical fields lead to partial inference.
   INFER_OK(op, "[1,4,4,1];[2,1,1,1]", "[d0_0,3,2,d1_3]");
   INFER_OK(op, "[1,?,4,1];[2,1,1,1]", "[d0_0,?,2,d1_3]");
@@ -538,6 +542,98 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) {
   INFER_OK(op, "[1,4,4,?];[?,?,?,?]", "[d0_0,2,2,d1_3]");
 }
 
+TEST(CommonShapeFnsTest, Conv2DDilatedShapeTest) {
+  ShapeInferenceTestOp op("Conv2D");
+  auto set_op = [&op](const std::vector<int32>& dilations,
+                      const std::vector<int32>& strides, const string& padding,
+                      const string& data_format) {
+    TF_CHECK_OK(NodeDefBuilder("test", "Conv2D")
+                    .Input("input", 0, DT_FLOAT)
+                    .Input("filter", 0, DT_FLOAT)
+                    .Attr("dilations", dilations)
+                    .Attr("strides", strides)
+                    .Attr("padding", padding)
+                    .Attr("data_format", data_format)
+                    .Finalize(&op.node_def));
+  };
+
+  // Invalid rank for dilation
+  set_op({{1, 2, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC");
+  INFER_ERROR("contain 4 values", op, "[1,2,2,1];[1,1,1,1]");
+
+  // Invalid value for dilation
+  set_op({{1, 0, 1, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC");
+  INFER_ERROR("must be >= 1", op, "[1,2,2,1];[1,1,1,1]");
+
+  // Tests for NHWC
+  // 1x1 filter, 2x1 dilations, 1x1 strides
+  set_op({{1, 2, 1, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,2,2,1];[1,1,1,1]", "[d0_0,2,2,d1_3]");
+
+  // 1x1 filter, 2x1 dilations, 2x1 strides
+  set_op({{1, 2, 1, 1}}, {{1, 2, 1, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,4,4,1];[1,1,1,1]", "[d0_0,2,4,d1_3]");
+
+  // 1x1 filter, 2x1 dilations, 2x2 strides
+  set_op({{1, 2, 1, 1}}, {{1, 2, 2, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,4,4,1];[1,1,1,1]", "[d0_0,2,2,d1_3]");
+
+  // 3x3 filter, 2x1 dilations, 1x1 strides
+  set_op({{1, 2, 1, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,5,5,1];[3,3,1,1]", "[d0_0,1,3,d1_3]");
+
+  // 3x3 filter, 2x1 dilations, 2x1 strides
+  set_op({{1, 2, 1, 1}}, {{1, 2, 1, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,5,5,1];[3,3,1,1]", "[d0_0,1,3,d1_3]");
+
+  // 3x3 filter, 1x2 dilations, 2x2 strides
+  set_op({{1, 1, 2, 1}}, {{1, 2, 2, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,5,5,1];[3,3,1,1]", "[d0_0,2,1,d1_3]");
+
+  // Tests for NCHW
+  // 1x1 filter, 2x1 dilations, 1x1 strides
+  set_op({{1, 1, 2, 1}}, {{1, 1, 1, 1}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,2,2];[1,1,1,1]", "[d0_0,d1_3,2,2]");
+
+  // 1x1 filter, 2x1 dilations, 2x1 strides
+  set_op({{1, 1, 2, 1}}, {{1, 1, 2, 1}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,4,4];[1,1,1,1]", "[d0_0,d1_3,2,4]");
+
+  // 1x1 filter, 2x1 dilations, 2x2 strides
+  set_op({{1, 1, 2, 1}}, {{1, 1, 2, 2}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,4,4];[1,1,1,1]", "[d0_0,d1_3,2,2]");
+
+  // 3x3 filter, 2x1 dilations, 1x1 strides
+  set_op({{1, 1, 2, 1}}, {{1, 1, 1, 1}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,5,5];[3,3,1,1]", "[d0_0,d1_3,1,3]");
+
+  // 3x3 filter, 2x1 dilations, 2x1 strides
+  set_op({{1, 1, 2, 1}}, {{1, 1, 2, 1}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,5,5];[3,3,1,1]", "[d0_0,d1_3,1,3]");
+
+  // 3x3 filter, 1x2 dilations, 2x2 strides
+  set_op({{1, 1, 1, 2}}, {{1, 1, 2, 2}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,5,5];[3,3,1,1]", "[d0_0,d1_3,2,1]");
+
+  // Some tests for "SAME" padding
+
+  // 4x4 input, 1x1 filter, 2x1 dilations, 1x1 stride
+  set_op({{1, 2, 1, 1}}, {{1, 1, 1, 1}}, "SAME", "NHWC");
+  INFER_OK(op, "[1,4,4,1];[1,1,1,1]", "[d0_0,d0_1,d0_2,d1_3]");
+
+  // 3x3 input, 2x2 filter, 2x2 dilations, 1x1 stride
+  set_op({{1, 2, 2, 1}}, {{1, 1, 1, 1}}, "SAME", "NHWC");
+  INFER_OK(op, "[1,3,3,1];[2,2,1,1]", "[d0_0,d0_1,d0_2,d1_3]");
+
+  // 4x4 input, 2x2 filter, 1x2 dilations, 2x2 stride
+  set_op({{1, 1, 2, 1}}, {{1, 2, 2, 1}}, "SAME", "NHWC");
+  INFER_OK(op, "[1,4,4,1];[2,2,1,1]", "[d0_0,2,2,d1_3]");
+
+  // 4x4 input, 2x2 filter, 2x2 dilations, 1x1 stride
+  set_op({{1, 2, 2, 1}}, {{1, 1, 1, 1}}, "SAME", "NHWC");
+  INFER_OK(op, "[1,4,4,1];[2,2,1,1]", "[d0_0,d0_1,d0_2,d1_3]");
+}
+
 TEST(CommonShapeFnsTest, Conv3DShapeTest) {
   ShapeInferenceTestOp op("Conv3D");
   auto set_op = [&op](const std::vector<int32>& strides,
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index 3d2bb57aff..1791c51096 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -194,7 +194,23 @@ class Conv2DFastBackpropFilterOp : public OpKernel {
         context, (strides_[0] == 1 && strides_[3] == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    OP_REQUIRES(context, (dilations_[0] == 1 && dilations_[3] == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    // TODO(yangzihao): Add a CPU implementation for dilated convolution.
+    OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1),
+                errors::InvalidArgument(
+                    "Current Eigen and libxsmm implementations do not "
+                    "yet support dilation rates larger than 1."));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -262,6 +278,7 @@ class Conv2DFastBackpropFilterOp : public OpKernel {
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
@@ -290,7 +307,23 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
         context, (strides_[0] == 1 && strides_[3] == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    OP_REQUIRES(context, (dilations_[0] == 1 && dilations_[3] == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    // TODO(yangzihao): Add a CPU implementation for dilated convolution.
+    OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1),
+                errors::InvalidArgument(
+                    "Current libxsmm and customized CPU implementations do "
+                    "not yet support dilation rates larger than 1."));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -459,6 +492,7 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
@@ -510,10 +544,30 @@ class Conv2DSlowBackpropFilterOp : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
     int stride_n = GetTensorDim(strides_, data_format_, 'N');
     int stride_c = GetTensorDim(strides_, data_format_, 'C');
+    int stride_h = GetTensorDim(strides_, data_format_, 'H');
+    int stride_w = GetTensorDim(strides_, data_format_, 'W');
     OP_REQUIRES(
         context, (stride_n == 1 && stride_c == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, stride_h > 0 && stride_w > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    int dilation_n = GetTensorDim(dilations_, data_format_, 'N');
+    int dilation_c = GetTensorDim(dilations_, data_format_, 'C');
+    int dilation_h = GetTensorDim(dilations_, data_format_, 'H');
+    int dilation_w = GetTensorDim(dilations_, data_format_, 'W');
+    OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1,
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    OP_REQUIRES(
+        context, dilation_h > 0 && dilation_w > 0,
+        errors::InvalidArgument("Dilated rates should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_));
     use_cudnn_ &= CanUseCudnn();
     cudnn_use_autotune_ = CudnnUseAutotune();
@@ -546,13 +600,16 @@ class Conv2DSlowBackpropFilterOp : public OpKernel {
     // do not support striding on the batch or depth dimension).
     const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
     const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
+    const int dilation_rows = GetTensorDim(dilations_, data_format_, 'H');
+    const int dilation_cols = GetTensorDim(dilations_, data_format_, 'W');
 
     launcher_(context, use_cudnn_, cudnn_use_autotune_, out_backprop, input,
-              stride_rows, stride_cols, padding_, filter_backprop,
-              data_format_);
+              dilation_rows, dilation_cols, stride_rows, stride_cols, padding_,
+              filter_backprop, data_format_);
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   bool use_cudnn_;
@@ -566,38 +623,46 @@ class Conv2DSlowBackpropFilterOp : public OpKernel {
 template <typename T>
 void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
     OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-    const Tensor& out_backprop, const Tensor& input, int row_stride,
-    int col_stride, const Padding& padding, Tensor* filter_backprop,
-    TensorFormat data_format) {
+    const Tensor& out_backprop, const Tensor& input, int row_dilation,
+    int col_dilation, int row_stride, int col_stride, const Padding& padding,
+    Tensor* filter_backprop, TensorFormat data_format) {
   using perftools::gputools::dnn::AlgorithmConfig;
   using perftools::gputools::dnn::AlgorithmDesc;
   using perftools::gputools::dnn::ProfileResult;
 
+  std::vector<int32> dilations(4, 1);
+  dilations[GetTensorDimIndex(data_format, 'H')] = row_dilation;
+  dilations[GetTensorDimIndex(data_format, 'W')] = col_dilation;
+
   std::vector<int32> strides(4, 1);
   strides[GetTensorDimIndex(data_format, 'H')] = row_stride;
   strides[GetTensorDimIndex(data_format, 'W')] = col_stride;
   TensorShape filter_shape = filter_backprop->shape();
 
   ConvBackpropDimensions dims;
-  OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensions(
+  OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensionsV2(
                           "Conv2DSlowBackpropFilter", /*num_spatial_dims=*/2,
                           input.shape(), filter_shape, out_backprop.shape(),
-                          strides, padding, data_format, &dims));
+                          dilations, strides, padding, data_format, &dims));
 
+  // TODO(yangzihao): The padding computations should be done in
+  // GetWindowedOutputSize() functions.
   const int padding_rows =
       (padding == VALID)
           ? 0
           : std::max<int>(0, (dims.spatial_dims[0].output_size - 1) *
                                      dims.spatial_dims[0].stride +
-                                 dims.spatial_dims[0].filter_size -
-                                 dims.spatial_dims[0].input_size);
+                                 (dims.spatial_dims[0].filter_size - 1) *
+                                     dims.spatial_dims[0].dilation +
+                                 1 - dims.spatial_dims[0].input_size);
   const int padding_cols =
       (padding == VALID)
           ? 0
           : std::max<int>(0, (dims.spatial_dims[1].output_size - 1) *
                                      dims.spatial_dims[1].stride +
-                                 dims.spatial_dims[1].filter_size -
-                                 dims.spatial_dims[1].input_size);
+                                 (dims.spatial_dims[1].filter_size - 1) *
+                                     dims.spatial_dims[1].dilation +
+                                 1 - dims.spatial_dims[1].input_size);
 
   // TODO(zhengxq): cuDNN only supports equal padding on both sides, so only
   // calling it when that is true. Remove this check when (if?) cuDNN starts
@@ -730,7 +795,9 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
       .set_input_feature_map_count(dims.in_depth)
       .set_output_feature_map_count(dims.out_depth);
   perftools::gputools::dnn::ConvolutionDescriptor conv_desc;
-  conv_desc.set_vertical_filter_stride(dims.spatial_dims[0].stride)
+  conv_desc.set_vertical_dilation_rate(dims.spatial_dims[0].dilation)
+      .set_horizontal_dilation_rate(dims.spatial_dims[1].dilation)
+      .set_vertical_filter_stride(dims.spatial_dims[0].stride)
       .set_horizontal_filter_stride(dims.spatial_dims[1].stride)
       .set_zero_padding_height(padding_rows / 2)
       .set_zero_padding_width(padding_cols / 2);
@@ -821,6 +888,8 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
       dims.out_depth,                        // out_depths
       {{dims.spatial_dims[0].filter_size,    // filter_rows
         dims.spatial_dims[1].filter_size}},  // filter_cols
+      {{dims.spatial_dims[0].dilation,       // dilation_rows
+        dims.spatial_dims[1].dilation}},     // dilation_cols
       {{dims.spatial_dims[0].stride,         // stride_rows
         dims.spatial_dims[1].stride}},       // stride_cols
       {{padding_rows,                        // padding_rows
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index d28f6b4d10..736241a029 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -198,7 +198,23 @@ class Conv2DFastBackpropInputOp : public OpKernel {
         context, (strides_[0] == 1 && strides_[3] == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    OP_REQUIRES(context, (dilations_[0] && dilations_[3]),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    // TODO(yangzihao): Add a CPU implementation for dilated convolution.
+    OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1),
+                errors::InvalidArgument(
+                    "Current Eigen and libxsmm implementations do not "
+                    "yet support dilation rates larger than 1."));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -268,6 +284,7 @@ class Conv2DFastBackpropInputOp : public OpKernel {
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
@@ -296,7 +313,23 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
         context, (strides_[0] == 1 && strides_[3] == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    OP_REQUIRES(context, (dilations_[0] == 1 && dilations_[3] == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    // TODO(yangzihao): Add a CPU implementation for dilated convolution.
+    OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1),
+                errors::InvalidArgument(
+                    "Current libxsmm and customized CPU implementations do "
+                    "not yet support dilation rates larger than 1."));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -532,6 +565,7 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
@@ -586,10 +620,30 @@ class Conv2DSlowBackpropInputOp : public OpKernel {
                                         "specify 4 dimensions"));
     int stride_n = GetTensorDim(strides_, data_format_, 'N');
     int stride_c = GetTensorDim(strides_, data_format_, 'C');
+    int stride_h = GetTensorDim(strides_, data_format_, 'H');
+    int stride_w = GetTensorDim(strides_, data_format_, 'W');
     OP_REQUIRES(
         context, (stride_n == 1 && stride_c == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, stride_h > 0 && stride_w > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    int dilation_n = GetTensorDim(dilations_, data_format_, 'N');
+    int dilation_c = GetTensorDim(dilations_, data_format_, 'C');
+    int dilation_h = GetTensorDim(dilations_, data_format_, 'H');
+    int dilation_w = GetTensorDim(dilations_, data_format_, 'W');
+    OP_REQUIRES(context, (dilation_n == 1 && dilation_c == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    OP_REQUIRES(
+        context, dilation_h > 0 && dilation_w > 0,
+        errors::InvalidArgument("Dilated rates should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_));
     use_cudnn_ &= CanUseCudnn();
     cudnn_use_autotune_ = CudnnUseAutotune();
@@ -622,12 +676,16 @@ class Conv2DSlowBackpropInputOp : public OpKernel {
     // do not support striding on the batch or depth dimension).
     const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
     const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
+    const int dilation_rows = GetTensorDim(dilations_, data_format_, 'H');
+    const int dilation_cols = GetTensorDim(dilations_, data_format_, 'W');
 
     launcher_(context, use_cudnn_, cudnn_use_autotune_, out_backprop, filter,
-              stride_rows, stride_cols, padding_, in_backprop, data_format_);
+              dilation_rows, dilation_cols, stride_rows, stride_cols, padding_,
+              in_backprop, data_format_);
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   bool use_cudnn_;
@@ -641,39 +699,48 @@ class Conv2DSlowBackpropInputOp : public OpKernel {
 template <typename T>
 void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
     OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-    const Tensor& out_backprop, const Tensor& filter, int row_stride,
-    int col_stride, const Padding& padding, Tensor* in_backprop,
-    TensorFormat data_format) {
+    const Tensor& out_backprop, const Tensor& filter, int row_dilation,
+    int col_dilation, int row_stride, int col_stride, const Padding& padding,
+    Tensor* in_backprop, TensorFormat data_format) {
   using perftools::gputools::dnn::AlgorithmConfig;
   using perftools::gputools::dnn::AlgorithmDesc;
   using perftools::gputools::dnn::ProfileResult;
 
   std::vector<int32> strides(4, 1);
-  strides[GetTensorDimIndex(data_format, 'H')] = row_stride;
-  strides[GetTensorDimIndex(data_format, 'W')] = col_stride;
+  std::vector<int32> dilations(4, 1);
+  auto input_h = GetTensorDimIndex(data_format, 'H');
+  auto input_w = GetTensorDimIndex(data_format, 'W');
+  strides[input_h] = row_stride;
+  strides[input_w] = col_stride;
+  dilations[input_h] = row_dilation;
+  dilations[input_w] = col_dilation;
   TensorShape input_shape = in_backprop->shape();
 
   const TensorShape& filter_shape = filter.shape();
   ConvBackpropDimensions dims;
-  OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensions(
+  OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensionsV2(
                           "Conv2DSlowBackpropInput", /*num_spatial_dims=*/2,
                           input_shape, filter_shape, out_backprop.shape(),
-                          strides, padding, data_format, &dims));
+                          dilations, strides, padding, data_format, &dims));
 
+  // TODO(yangzihao): The padding computations should be done in
+  // GetWindowedOutputSize() functions.
   const int padding_rows =
       (padding == VALID)
           ? 0
           : std::max<int>(0, (dims.spatial_dims[0].output_size - 1) *
                                      dims.spatial_dims[0].stride +
-                                 dims.spatial_dims[0].filter_size -
-                                 dims.spatial_dims[0].input_size);
+                                 (dims.spatial_dims[0].filter_size - 1) *
+                                     dims.spatial_dims[0].dilation +
+                                 1 - dims.spatial_dims[0].input_size);
   const int padding_cols =
       (padding == VALID)
           ? 0
           : std::max<int>(0, (dims.spatial_dims[1].output_size - 1) *
                                      dims.spatial_dims[1].stride +
-                                 dims.spatial_dims[1].filter_size -
-                                 dims.spatial_dims[1].input_size);
+                                 (dims.spatial_dims[1].filter_size - 1) *
+                                     dims.spatial_dims[1].dilation +
+                                 1 - dims.spatial_dims[1].input_size);
 
   // TODO(keveman): cuDNN only supports equal padding on both sides, so only
   // calling it when that is true. Remove this check when (if?) cuDNN starts
@@ -789,7 +856,9 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
       .set_input_feature_map_count(dims.in_depth)
       .set_output_feature_map_count(dims.out_depth);
   perftools::gputools::dnn::ConvolutionDescriptor conv_desc;
-  conv_desc.set_vertical_filter_stride(dims.spatial_dims[0].stride)
+  conv_desc.set_vertical_dilation_rate(dims.spatial_dims[0].dilation)
+      .set_horizontal_dilation_rate(dims.spatial_dims[1].dilation)
+      .set_vertical_filter_stride(dims.spatial_dims[0].stride)
       .set_horizontal_filter_stride(dims.spatial_dims[1].stride)
       .set_zero_padding_height(padding_rows / 2)
       .set_zero_padding_width(padding_cols / 2);
@@ -875,6 +944,8 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
       dims.out_depth,                        // out_depths
       {{dims.spatial_dims[0].filter_size,    // filter_rows
         dims.spatial_dims[1].filter_size}},  // filter_cols
+      {{dims.spatial_dims[0].dilation,       // dilation_rows
+        dims.spatial_dims[1].dilation}},     // dilation_cols
       {{dims.spatial_dims[0].stride,         // stride_rows
         dims.spatial_dims[1].stride}},       // stride_cols
       {{padding_rows,                        // padding_rows
diff --git a/tensorflow/core/kernels/conv_grad_ops.h b/tensorflow/core/kernels/conv_grad_ops.h
index e068fb8684..535586d53a 100644
--- a/tensorflow/core/kernels/conv_grad_ops.h
+++ b/tensorflow/core/kernels/conv_grad_ops.h
@@ -175,15 +175,17 @@ template <typename Device, typename T>
 struct LaunchConv2DBackpropInputOp {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
                   const Tensor& out_backprop, const Tensor& filter,
-                  int row_stride, int col_stride, const Padding& padding,
-                  Tensor* in_backprop, TensorFormat data_format);
+                  int row_dilation, int col_dilation, int row_stride,
+                  int col_stride, const Padding& padding, Tensor* in_backprop,
+                  TensorFormat data_format);
 };
 
 template <typename Device, typename T>
 struct LaunchConv2DBackpropFilterOp {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
                   const Tensor& out_backprop, const Tensor& input,
-                  int row_stride, int col_stride, const Padding& padding,
+                  int row_dilation, int col_dilation, int row_stride,
+                  int col_stride, const Padding& padding,
                   Tensor* filter_backprop, TensorFormat data_format);
 };
 
@@ -191,8 +193,9 @@ struct LaunchConv2DBackpropFilterOp {
 template <typename T>
 struct LaunchConv2DBackpropInputOp<Eigen::GpuDevice, T> {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-                  const Tensor& input, const Tensor& filter, int row_stride,
-                  int col_stride, const Padding& padding, Tensor* output,
+                  const Tensor& input, const Tensor& filter, int row_dilation,
+                  int col_dilation, int row_stride, int col_stride,
+                  const Padding& padding, Tensor* output,
                   TensorFormat data_format);
 };
 
@@ -200,7 +203,8 @@ template <typename T>
 struct LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T> {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
                   const Tensor& out_backprop, const Tensor& input,
-                  int row_stride, int col_stride, const Padding& padding,
+                  int row_dilation, int col_dilation, int row_stride,
+                  int col_stride, const Padding& padding,
                   Tensor* filter_backprop, TensorFormat data_format);
 };
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index c2d24d1f12..4d0f1ab317 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -645,6 +645,9 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
         {{input_size[0], input_size[1], input_size[2]}},
         out_depth,
         {{filter_size[0], filter_size[1], filter_size[2]}},
+        // TODO(yangzihao): Send in arbitrary dilation rates after the dilated
+        // conv is supported.
+        /*dilations=*/{{1, 1, 1}},
         {{strides[0], strides[1], strides[2]}},
         {{padding_planes, padding_rows, padding_cols}},
         dtype,
@@ -1011,6 +1014,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
         {{input_size[0], input_size[1], input_size[2]}},
         out_depth,
         {{filter_size[0], filter_size[1], filter_size[2]}},
+        {{1, 1, 1}},
         {{strides[0], strides[1], strides[2]}},
         {{padding_planes, padding_rows, padding_cols}},
         dtype,
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index bb67113fb0..ba40c428e4 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -112,7 +112,8 @@ struct LaunchGeneric {
 template <typename T>
 struct LaunchConv2DOp<CPUDevice, T> {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-                  const Tensor& input, const Tensor& filter, int row_stride,
+                  const Tensor& input, const Tensor& filter,
+                  int /*row_dilation*/, int /*col_dilation*/, int row_stride,
                   int col_stride, const Padding& padding, Tensor* output,
                   TensorFormat data_format) {
     if (data_format != FORMAT_NHWC) {
@@ -133,8 +134,10 @@ class LaunchDeepConvOp {
                   const Tensor& filter, int batch, int input_rows,
                   int input_cols, int in_depth, int filter_rows,
                   int filter_cols, int pad_rows, int pad_cols, int out_rows,
-                  int out_cols, int out_depth, int stride_rows, int stride_cols,
-                  Tensor* output, TensorFormat data_format) {
+                  int /*out_cols*/, int /*out_depth*/, int /*dilation_rows*/,
+                  int /*dilation_cols*/, int /*stride_rows*/,
+                  int /*stride_cols*/, Tensor* /*output*/,
+                  TensorFormat /*data_format*/) {
     return false;
   }
 };
@@ -147,9 +150,11 @@ class LaunchDeepConvOp<CPUDevice, float> {
                   const Tensor& filter, int batch, int input_rows,
                   int input_cols, int in_depth, int filter_rows,
                   int filter_cols, int pad_rows, int pad_cols, int out_rows,
-                  int out_cols, int out_depth, int stride_rows, int stride_cols,
+                  int out_cols, int out_depth, int dilation_rows,
+                  int dilation_cols, int stride_rows, int stride_cols,
                   Tensor* output, TensorFormat data_format) {
-    if (data_format != FORMAT_NHWC ||
+    if (data_format != FORMAT_NHWC || dilation_rows != 1 ||
+        dilation_cols != 1 ||
         !CanUseDeepConv2D(stride_rows, stride_cols, filter_rows, filter_cols,
                           in_depth, out_depth, out_rows, out_cols)) {
       return false;
@@ -187,7 +192,8 @@ class LaunchXsmmConvOp {
                   int input_cols, int in_depth, int filter_rows,
                   int filter_cols, int pad_rows, int pad_cols, int out_rows,
                   int out_cols, int out_depth, int stride_rows, int stride_cols,
-                  Tensor* output, TensorFormat data_format) {
+                  int dilation_rows, int dilation_cols, Tensor* output,
+                  TensorFormat data_format) {
     return false;
   }
 };
@@ -199,7 +205,8 @@ class LaunchXsmmConvOp<CPUDevice, float> {
                   const Tensor& filter, int batch, int input_rows,
                   int input_cols, int in_depth, int filter_rows,
                   int filter_cols, int pad_rows, int pad_cols, int out_rows,
-                  int out_cols, int out_depth, int stride_rows, int stride_cols,
+                  int out_cols, int out_depth, int dilation_rows,
+                  int dilation_cols, int stride_rows, int stride_cols,
                   Tensor* output, TensorFormat data_format) {
     auto num_threads =
         ctx->device()->tensorflow_cpu_worker_threads()->num_threads;
@@ -228,11 +235,8 @@ class LaunchXsmmConvOp<CPUDevice, float> {
     desc.options = LIBXSMM_DNN_CONV_OPTION_WU_EXT_FILTER_REDUCE_OVERWRITE;
     desc.datatype = LIBXSMM_DNN_DATATYPE_F32;
 
-    if (!CanUseXsmmConv2D(desc, data_format)) {
-      return false;
-    }
-
-    if (!CanUseXsmmConv2D(desc, data_format)) {
+    if (dilation_rows != 1 || dilation_cols != 1 ||
+        !CanUseXsmmConv2D(desc, data_format)) {
       return false;
     }
 
@@ -251,6 +255,7 @@ template <typename Device, typename T>
 class Conv2DOp : public BinaryOp<T> {
  public:
   explicit Conv2DOp(OpKernelConstruction* context) : BinaryOp<T>(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
     OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
     string data_format;
     OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
@@ -259,15 +264,35 @@ class Conv2DOp : public BinaryOp<T> {
     OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_));
     use_cudnn_ &= CanUseCudnn();
     cudnn_use_autotune_ = CudnnUseAutotune();
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
     OP_REQUIRES(context, strides_.size() == 4,
                 errors::InvalidArgument("Sliding window strides field must "
                                         "specify 4 dimensions"));
     const int64 stride_n = GetTensorDim(strides_, data_format_, 'N');
     const int64 stride_c = GetTensorDim(strides_, data_format_, 'C');
+    const int64 stride_h = GetTensorDim(strides_, data_format_, 'H');
+    const int64 stride_w = GetTensorDim(strides_, data_format_, 'W');
     OP_REQUIRES(
         context, stride_n == 1 && stride_c == 1,
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, stride_h > 0 && stride_w > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
+
+    const int64 dilation_n = GetTensorDim(dilations_, data_format_, 'N');
+    const int64 dilation_c = GetTensorDim(dilations_, data_format_, 'C');
+    const int64 dilation_h = GetTensorDim(dilations_, data_format_, 'H');
+    const int64 dilation_w = GetTensorDim(dilations_, data_format_, 'W');
+    OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1,
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    OP_REQUIRES(
+        context, dilation_h > 0 && dilation_w > 0,
+        errors::InvalidArgument("Dilated rates should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
   }
 
@@ -334,18 +359,22 @@ class Conv2DOp : public BinaryOp<T> {
                 errors::InvalidArgument("batch is too large"));
     const int batch = static_cast<int>(batch_raw);
 
-    // For now we take the stride from the second and third dimensions only (we
-    // do not support striding on the batch or depth dimension).
+    // For now we take the stride and dilation from the second and third
+    // dimensions only (we do not support striding or dilation on the batch or
+    // depth dimension).
     const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
     const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
 
+    const int dilation_rows = GetTensorDim(dilations_, data_format_, 'H');
+    const int dilation_cols = GetTensorDim(dilations_, data_format_, 'W');
+
     int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
-    OP_REQUIRES_OK(context,
-                   GetWindowedOutputSize(input_rows, filter_rows, stride_rows,
-                                         padding_, &out_rows, &pad_rows));
-    OP_REQUIRES_OK(context,
-                   GetWindowedOutputSize(input_cols, filter_cols, stride_cols,
-                                         padding_, &out_cols, &pad_cols));
+    OP_REQUIRES_OK(context, GetWindowedOutputSizeV2(
+                                input_rows, filter_rows, dilation_rows,
+                                stride_rows, padding_, &out_rows, &pad_rows));
+    OP_REQUIRES_OK(context, GetWindowedOutputSizeV2(
+                                input_cols, filter_cols, dilation_cols,
+                                stride_cols, padding_, &out_cols, &pad_cols));
     TensorShape out_shape =
         ShapeFromFormat(data_format_, batch, out_rows, out_cols, out_depth);
 
@@ -361,6 +390,8 @@ class Conv2DOp : public BinaryOp<T> {
             << ", filter_rows = " << filter_rows
             << ", stride_rows = " << stride_rows
             << ", stride_cols = " << stride_cols
+            << ", dilation_rows = " << dilation_rows
+            << ", dilation_cols = " << dilation_cols
             << ", out_depth = " << out_depth;
 
     // If there is nothing to compute, return.
@@ -372,7 +403,8 @@ class Conv2DOp : public BinaryOp<T> {
     if (LaunchXsmmConvOp<Device, T>::Run(
             context, input, filter, batch, input_rows, input_cols, in_depth,
             filter_rows, filter_cols, pad_rows, pad_cols, out_rows, out_cols,
-            out_depth, stride_rows, stride_cols, output, data_format_)) {
+            out_depth, dilation_rows, dilation_cols, stride_rows, stride_cols,
+            output, data_format_)) {
       return;
     }
 #endif
@@ -380,15 +412,18 @@ class Conv2DOp : public BinaryOp<T> {
     if (LaunchDeepConvOp<Device, T>::Run(
             context, input, filter, batch, input_rows, input_cols, in_depth,
             filter_rows, filter_cols, pad_rows, pad_cols, out_rows, out_cols,
-            out_depth, stride_rows, stride_cols, output, data_format_)) {
+            out_depth, dilation_rows, dilation_cols, stride_rows, stride_cols,
+            output, data_format_)) {
       return;
     }
 
     launcher_(context, use_cudnn_, cudnn_use_autotune_, input, filter,
-              stride_rows, stride_cols, padding_, output, data_format_);
+              dilation_rows, dilation_cols, stride_rows, stride_cols, padding_,
+              output, data_format_);
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   bool use_cudnn_;
   Padding padding_;
@@ -443,9 +478,9 @@ typedef AutoTuneSingleton<ConvAutoTuneGroup, ConvParameters,
 template <typename T>
 void LaunchConv2DOp<GPUDevice, T>::operator()(
     OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-    const Tensor& input_param, const Tensor& filter, int row_stride,
-    int col_stride, const Padding& padding, Tensor* output,
-    TensorFormat data_format) {
+    const Tensor& input_param, const Tensor& filter, int row_dilation,
+    int col_dilation, int row_stride, int col_stride, const Padding& padding,
+    Tensor* output, TensorFormat data_format) {
   using perftools::gputools::dnn::AlgorithmConfig;
   using perftools::gputools::dnn::AlgorithmDesc;
   using perftools::gputools::dnn::ProfileResult;
@@ -461,8 +496,9 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
 
   Tensor input = input_param;
 
-  if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && row_stride == 1 &&
-      col_stride == 1 && data_format == FORMAT_NHWC) {
+  if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && row_dilation == 1 &&
+      col_dilation == 1 && row_stride == 1 && col_stride == 1 &&
+      data_format == FORMAT_NHWC) {
     // 1x1 filter, so call cublas directly.
     const uint64 m = input.dim_size(0) * input.dim_size(1) * input.dim_size(2);
     const uint64 k = filter.dim_size(2);
@@ -487,7 +523,8 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
     }
     return;
   } else if (filter.dim_size(0) == input.dim_size(1) &&
-             filter.dim_size(1) == input.dim_size(2) && padding == VALID &&
+             filter.dim_size(1) == input.dim_size(2) && row_dilation == 1 &&
+             col_dilation == 1 && padding == VALID &&
              data_format == FORMAT_NHWC) {
     // The input data and filter have the same height/width, so call cublas
     // directly.
@@ -530,17 +567,19 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
   const int64 patch_cols = filter.dim_size(1);
   if (padding == SAME) {
     // Total padding on rows and cols is
-    // Pr = (R' - 1) * S + Kr - R
-    // Pc = (C' - 1) * S + Kc - C
+    // Pr = (R' - 1) * S + (Kr - 1) * Dr + 1 - R
+    // Pc = (C' - 1) * S + (Kc - 1) * Dc + 1 - C
     // where (R', C') are output dimensions, (R, C) are input dimensions, S
-    // is stride, (Kr, Kc) are filter dimensions.
+    // is stride, (Dr, Dc) are dilations, (Kr, Kc) are filter dimensions.
     // We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top
     // and Pc - Pc/2 on the bottom.  When Pr or Pc is odd, this means
     // we pad more on the right and bottom than on the top and left.
     padding_rows =
-        std::max<int>(0, (out_rows - 1) * row_stride + patch_rows - in_rows);
+        std::max<int>(0, (out_rows - 1) * row_stride +
+                             (patch_rows - 1) * row_dilation + 1 - in_rows);
     padding_cols =
-        std::max<int>(0, (out_cols - 1) * col_stride + patch_cols - in_cols);
+        std::max<int>(0, (out_cols - 1) * col_stride +
+                             (patch_cols - 1) * col_dilation + 1 - in_cols);
     const bool rows_odd = (padding_rows % 2 != 0);
     const bool cols_odd = (padding_cols % 2 != 0);
     if (rows_odd || cols_odd) {
@@ -605,7 +644,9 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
       .set_input_feature_map_count(filter.dim_size(2))
       .set_output_feature_map_count(filter.dim_size(3));
   perftools::gputools::dnn::ConvolutionDescriptor conv_desc;
-  conv_desc.set_vertical_filter_stride(row_stride)
+  conv_desc.set_vertical_dilation_rate(row_dilation)
+      .set_horizontal_dilation_rate(col_dilation)
+      .set_vertical_filter_stride(row_stride)
       .set_horizontal_filter_stride(col_stride)
       .set_zero_padding_height(padding_rows / 2)
       .set_zero_padding_width(padding_cols / 2);
@@ -652,6 +693,8 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
       out_depths,        // out_depths
       {{patch_rows,      // filter_rows
         patch_cols}},    // filter_cols
+      {{row_dilation,    // dilation_rows
+        col_dilation}},  // dilation_cols
       {{row_stride,      // stride_rows
         col_stride}},    // stride_cols
       {{padding_rows,    // padding_rows
diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h
index e29271dff2..09a3b78776 100644
--- a/tensorflow/core/kernels/conv_ops.h
+++ b/tensorflow/core/kernels/conv_ops.h
@@ -34,8 +34,9 @@ class OpKernelContext;
 template <typename Device, typename T>
 struct LaunchConv2DOp {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-                  const Tensor& input, const Tensor& filter, int row_stride,
-                  int col_stride, const Padding& padding, Tensor* output,
+                  const Tensor& input, const Tensor& filter, int row_dilation,
+                  int col_dilation, int row_stride, int col_stride,
+                  const Padding& padding, Tensor* output,
                   TensorFormat data_format);
 };
 
@@ -43,8 +44,9 @@ struct LaunchConv2DOp {
 template <typename T>
 struct LaunchConv2DOp<Eigen::GpuDevice, T> {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-                  const Tensor& input, const Tensor& filter, int row_stride,
-                  int col_stride, const Padding& padding, Tensor* output,
+                  const Tensor& input, const Tensor& filter, int row_dilation,
+                  int col_dilation, int row_stride, int col_stride,
+                  const Padding& padding, Tensor* output,
                   TensorFormat data_format);
 };
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index 37cb67bc51..39202d7334 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -377,6 +377,9 @@ struct LaunchConvOp<GPUDevice, T> {
         {{in_planes, in_rows, in_cols}},
         out_depth,
         {{filter_planes, filter_rows, filter_cols}},
+        // TODO(yangzihao): Send in arbitrary dilation rates after the dilated
+        // conv is supported.
+        /*dilations=*/{{1, 1, 1}},
         {{strides[0], strides[1], strides[2]}},
         {{pad_planes, pad_rows, pad_cols}},
         dtype,
diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h
index c852dc9991..6f82698596 100644
--- a/tensorflow/core/kernels/conv_ops_gpu.h
+++ b/tensorflow/core/kernels/conv_ops_gpu.h
@@ -91,13 +91,14 @@ class ConvParameters {
   using SpatialArray = gtl::InlinedVector<int64, 3>;
   ConvParameters(int64 batch, int64 in_depths, const SpatialArray& in,
                  int64 out_depths, const SpatialArray& filter,
-                 const SpatialArray& stride, const SpatialArray& padding,
-                 DataType dtype, int device_id)
+                 const SpatialArray& dilation, const SpatialArray& stride,
+                 const SpatialArray& padding, DataType dtype, int device_id)
       : batch_(batch),
         in_depths_(in_depths),
         out_depths_(out_depths),
         in_(in),
         filter_(filter),
+        dilation_(dilation),
         stride_(stride),
         padding_(padding),
         dtype_(dtype),
@@ -107,6 +108,7 @@ class ConvParameters {
     for (int64 val : in) hash_code_ = Hash64Combine(hash_code_, val);
     hash_code_ = Hash64Combine(hash_code_, out_depths);
     for (int64 val : filter) hash_code_ = Hash64Combine(hash_code_, val);
+    for (int64 val : dilation) hash_code_ = Hash64Combine(hash_code_, val);
     for (int64 val : stride) hash_code_ = Hash64Combine(hash_code_, val);
     for (int64 val : padding) hash_code_ = Hash64Combine(hash_code_, val);
     hash_code_ = Hash64Combine(hash_code_, dtype);
@@ -128,6 +130,7 @@ class ConvParameters {
         "(", str_util::Join(in_, ", "), "), ",
         out_depths_, ", ",
         "(", str_util::Join(filter_, ", "), "), ",
+        "(", str_util::Join(dilation_, ", "), "), ",
         "(", str_util::Join(stride_, ", "), "), ",
         "(", str_util::Join(padding_, ", "), "), ",
         dtype_, ", ",
@@ -154,11 +157,11 @@ class ConvParameters {
  protected:
   using ParameterDataType =
       std::tuple<int64, int64, SpatialArray, int64, SpatialArray, SpatialArray,
-                 SpatialArray, DataType, int>;
+                 SpatialArray, SpatialArray, DataType, int>;
 
   ParameterDataType get_data_as_tuple() const {
     return std::make_tuple(batch_, in_depths_, in_, out_depths_, filter_,
-                           stride_, padding_, dtype_, device_id_);
+                           dilation_, stride_, padding_, dtype_, device_id_);
   }
 
   uint64 hash_code_;
@@ -169,6 +172,7 @@ class ConvParameters {
   int64 out_depths_;
   SpatialArray in_;
   SpatialArray filter_;
+  SpatialArray dilation_;
   SpatialArray stride_;
   SpatialArray padding_;
   DataType dtype_;
diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc
index ea54d6cf6c..666bca265c 100644
--- a/tensorflow/core/kernels/conv_ops_test.cc
+++ b/tensorflow/core/kernels/conv_ops_test.cc
@@ -43,6 +43,8 @@ TEST(ConvParameters, WinogradNonfusedAlgoSize) {
       128,       // out_depths
       {{3,       // filter_rows
         3}},     // filter_cols
+      {{1,       // dilation_rows
+        1}},     // dilation_cols
       {{1,       // stride_rows
         1}},     // stride_cols
       {{0,       // padding_rows
@@ -60,6 +62,8 @@ TEST(ConvParameters, WinogradNonfusedAlgoSize) {
       768,       // out_depths
       {{3,       // filter_rows
         3}},     // filter_cols
+      {{1,       // dilation_rows
+        1}},     // dilation_cols
       {{1,       // stride_rows
         1}},     // stride_cols
       {{0,       // padding_rows
diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index 7c43dcb670..02da64ce98 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -373,8 +373,11 @@ class DepthwiseConv2dNativeOp : public BinaryOp<T> {
     // If in_depth==1, this operation is just a standard convolution, so
     // invoke that op.
     if (std::is_same<T, float>::value && in_depth == 1) {
+      // TODO(yangzihao): Send in arbitrary dilation rates after the dilated
+      // conv is supported.
       launcher_(context, use_cudnn_, cudnn_use_autotune_, input, filter,
-                stride_, stride_, padding_, output, data_format_);
+                /*row_dilation=*/1, /*col_dilation=*/1, stride_, stride_,
+                padding_, output, data_format_);
       return;
     }
 
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index 3b0764bb9b..f83998e0c1 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -457,6 +457,19 @@ class QuantizedConv2DOp : public OpKernel {
         context, (strides_[0] == 1 && strides_[3] == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    std::vector<int32> dilations;
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations));
+    OP_REQUIRES(context, dilations.size() == 4,
+                errors::InvalidArgument("Dilations field must "
+                                        "specify 4 dimensions"));
+    OP_REQUIRES(context, dilations[1] == 1 && dilations[2] == 1,
+                errors::InvalidArgument(
+                    "Current implementation only supports dilated rate as 1 "
+                    "in the row and column dimensions."));
+    OP_REQUIRES(context, (dilations[0] == 1 && dilations[3] == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
   }
 
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 654e890b57..59c4642e4d 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -513,6 +513,7 @@ REGISTER_OP("Conv2D")
     .Attr("use_cudnn_on_gpu: bool = true")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn(shape_inference::Conv2DShape)
     .Doc(R"doc(
 Computes a 2-D convolution given 4-D `input` and `filter` tensors.
@@ -546,7 +547,7 @@ filter: A 4-D tensor of shape
 output: A 4-D tensor. The dimension order is determined by the value of
     `data_format`, see below for details.
 strides: 1-D tensor of length 4.  The stride of the sliding window for each
-  dimension of `input`. The dimension order is determined by the value of
+    dimension of `input`. The dimension order is determined by the value of
     `data_format`, see below for details.
 padding: The type of padding algorithm to use.
 data_format: Specify the data format of the input and output data. With the
@@ -554,6 +555,11 @@ data_format: Specify the data format of the input and output data. With the
         [batch, height, width, channels].
     Alternatively, the format could be "NCHW", the data storage order of:
         [batch, channels, height, width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+    `input`. If set to k > 1, there will be k-1 skipped cells between each
+    filter element on that dimension. The dimension order is determined by the
+    value of `data_format`, see above for details. Dilations in the batch and
+    depth dimensions must be 1.
 )doc");
 
 REGISTER_OP("Conv2DBackpropInput")
@@ -566,6 +572,7 @@ REGISTER_OP("Conv2DBackpropInput")
     .Attr("use_cudnn_on_gpu: bool = true")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
@@ -589,10 +596,15 @@ padding: The type of padding algorithm to use.
 output: 4-D with shape `[batch, in_height, in_width, in_channels]`.  Gradient
   w.r.t. the input of the convolution.
 data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
+  default format "NHWC", the data is stored in the order of:
+      [batch, in_height, in_width, in_channels].
+  Alternatively, the format could be "NCHW", the data storage order of:
+      [batch, in_channels, in_height, in_width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+  `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+  element on that dimension. The dimension order is determined by the value of
+  `data_format`, see above for details. Dilations in the batch and depth
+  dimensions must be 1.
 )doc");
 
 // TODO(jeff): Instead of 'use_cudnn_for_gpu', maybe we should have a
@@ -608,6 +620,7 @@ REGISTER_OP("Conv2DBackpropFilter")
     .Attr("use_cudnn_on_gpu: bool = true")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s));
@@ -632,10 +645,15 @@ output: 4-D with shape
   `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
   the `filter` input of the convolution.
 data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
+  default format "NHWC", the data is stored in the order of:
+      [batch, in_height, in_width, in_channels].
+  Alternatively, the format could be "NCHW", the data storage order of:
+      [batch, in_channels, in_height, in_width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+  `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+  element on that dimension. The dimension order is determined by the value of
+  `data_format`, see above for details. Dilations in the batch and depth
+  dimensions must be 1.
 )doc");
 
 namespace {
@@ -823,6 +841,7 @@ REGISTER_OP("DepthwiseConv2dNative")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn(shape_inference::DepthwiseConv2DNativeShape)
     .Doc(R"doc(
 Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
@@ -845,7 +864,6 @@ for k in 0..in_channels-1
 
 Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
 horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
-
 strides: 1-D of length 4.  The stride of the sliding window for each dimension
   of `input`.
 padding: The type of padding algorithm to use.
@@ -854,6 +872,11 @@ data_format: Specify the data format of the input and output data. With the
         [batch, height, width, channels].
     Alternatively, the format could be "NCHW", the data storage order of:
         [batch, channels, height, width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+  `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+  element on that dimension. The dimension order is determined by the value of
+  `data_format`, see above for details. Dilations in the batch and depth
+  dimensions must be 1.
 )doc");
 
 REGISTER_OP("DepthwiseConv2dNativeBackpropInput")
@@ -865,6 +888,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropInput")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
@@ -892,6 +916,11 @@ data_format: Specify the data format of the input and output data. With the
         [batch, height, width, channels].
     Alternatively, the format could be "NCHW", the data storage order of:
         [batch, channels, height, width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+  `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+  element on that dimension. The dimension order is determined by the value of
+  `data_format`, see above for details. Dilations in the batch and depth
+  dimensions must be 1.
 output: 4-D with shape according to `data_format`.  For example, if
   `data_format` is 'NHWC', output shape is `[batch, in_height,
   in_width, in_channels]`.  Gradient w.r.t. the input of the
@@ -907,6 +936,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropFilter")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s));
@@ -935,6 +965,11 @@ data_format: Specify the data format of the input and output data. With the
         [batch, height, width, channels].
     Alternatively, the format could be "NCHW", the data storage order of:
         [batch, channels, height, width].
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+  `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+  element on that dimension. The dimension order is determined by the value of
+  `data_format`, see above for details. Dilations in the batch and depth
+  dimensions must be 1.
 output: 4-D with shape
   `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
   the `filter` input of the convolution.
@@ -949,6 +984,7 @@ REGISTER_OP("Conv3D")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1, 1]")
     .SetShapeFn(shape_inference::Conv3DShape)
     .Doc(R"doc(
 Computes a 3-D convolution given 5-D `input` and `filter` tensors.
@@ -970,6 +1006,11 @@ data_format: The data format of the input and output data. With the
         [batch, in_depth, in_height, in_width, in_channels].
     Alternatively, the format could be "NCDHW", the data storage order is:
         [batch, in_channels, in_depth, in_height, in_width].
+dilations: 1-D tensor of length 5.  The dilation factor for each dimension of
+    `input`. If set to k > 1, there will be k-1 skipped cells between each
+    filter element on that dimension. The dimension order is determined by the
+    value of `data_format`, see above for details. Dilations in the batch and
+    depth dimensions must be 1.
 )doc");
 
 REGISTER_OP("Conv3DBackpropInput")
@@ -1036,6 +1077,7 @@ REGISTER_OP("Conv3DBackpropInputV2")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
@@ -1061,6 +1103,11 @@ data_format: The data format of the input and output data. With the
         [batch, in_depth, in_height, in_width, in_channels].
     Alternatively, the format could be "NCDHW", the data storage order is:
         [batch, in_channels, in_depth, in_height, in_width].
+dilations: 1-D tensor of length 5.  The dilation factor for each dimension of
+    `input`. If set to k > 1, there will be k-1 skipped cells between each
+    filter element on that dimension. The dimension order is determined by the
+    value of `data_format`, see above for details. Dilations in the batch and
+    depth dimensions must be 1.
 
 )doc");
 
@@ -1073,6 +1120,7 @@ REGISTER_OP("Conv3DBackpropFilterV2")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s));
@@ -1098,6 +1146,11 @@ data_format: The data format of the input and output data. With the
         [batch, in_depth, in_height, in_width, in_channels].
     Alternatively, the format could be "NCDHW", the data storage order is:
         [batch, in_channels, in_depth, in_height, in_width].
+dilations: 1-D tensor of length 5.  The dilation factor for each dimension of
+    `input`. If set to k > 1, there will be k-1 skipped cells between each
+    filter element on that dimension. The dimension order is determined by the
+    value of `data_format`, see above for details. Dilations in the batch and
+    depth dimensions must be 1.
 
 )doc");
 
@@ -2613,6 +2666,7 @@ REGISTER_OP("QuantizedConv2D")
     .Attr("out_type: quantizedtype = DT_QINT32")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c));
       ShapeHandle unused;
@@ -2641,7 +2695,11 @@ min_filter: The float value that the lowest quantized filter value represents.
 max_filter: The float value that the highest quantized filter value represents.
 min_output: The float value that the lowest quantized output value represents.
 max_output: The float value that the highest quantized output value represents.
-
+dilations: 1-D tensor of length 4.  The dilation factor for each dimension of
+    `input`. If set to k > 1, there will be k-1 skipped cells between each
+    filter element on that dimension. The dimension order is determined by the
+    value of `data_format`, see above for details. Dilations in the batch and
+    depth dimensions must be 1.
 )doc");
 
 REGISTER_OP("QuantizedMaxPool")
diff --git a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
index 1679857bd5..be299beee4 100644
--- a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
+++ b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
@@ -42,17 +42,21 @@ class Conv2DBackpropFilterGradTest(test.TestCase):
           filter_shape = [3, 3, 4, 6]
           # Make a convolution op with the current settings, just to easily get
           # the shape of the output.
-          conv_out = nn_ops.conv2d(in_val,
-                                   array_ops.zeros(filter_shape),
-                                   [1, stride, stride, 1], padding)
+          conv_out = nn_ops.conv2d(
+              in_val,
+              array_ops.zeros(filter_shape),
+              strides=[1, stride, stride, 1],
+              padding=padding)
           out_backprop_shape = conv_out.get_shape().as_list()
           out_backprop_val = constant_op.constant(
               2 * np.random.random_sample(out_backprop_shape) - 1,
               dtype=dtypes.float32)
-          output = nn_ops.conv2d_backprop_filter(in_val, filter_shape,
-                                                 out_backprop_val,
-                                                 [1, stride, stride, 1],
-                                                 padding)
+          output = nn_ops.conv2d_backprop_filter(
+              in_val,
+              filter_shape,
+              out_backprop_val,
+              strides=[1, stride, stride, 1],
+              padding=padding)
           err = gradient_checker.compute_gradient_error(
               [in_val, out_backprop_val], [in_shape, out_backprop_shape],
               output, filter_shape)
@@ -60,6 +64,42 @@ class Conv2DBackpropFilterGradTest(test.TestCase):
           err_tolerance = 2e-3
           self.assertLess(err, err_tolerance)
 
+  def testGradientDilatedConv(self):
+    if test.is_gpu_available(cuda_only=True):
+      with self.test_session(use_gpu=True):
+        for padding in ["SAME", "VALID"]:
+          for stride in [1, 2]:
+            np.random.seed(1)
+            in_shape = [5, 8, 6, 4]
+            in_val = constant_op.constant(
+                2 * np.random.random_sample(in_shape) - 1, dtype=dtypes.float32)
+            filter_shape = [3, 3, 4, 6]
+            # Make a convolution op with the current settings,
+            # just to easily get the shape of the output.
+            conv_out = nn_ops.conv2d(
+                in_val,
+                array_ops.zeros(filter_shape),
+                dilations=[1, 2, 2, 1],
+                strides=[1, stride, stride, 1],
+                padding=padding)
+            out_backprop_shape = conv_out.get_shape().as_list()
+            out_backprop_val = constant_op.constant(
+                2 * np.random.random_sample(out_backprop_shape) - 1,
+                dtype=dtypes.float32)
+            output = nn_ops.conv2d_backprop_filter(
+                in_val,
+                filter_shape,
+                out_backprop_val,
+                dilations=[1, 2, 2, 1],
+                strides=[1, stride, stride, 1],
+                padding=padding)
+            err = gradient_checker.compute_gradient_error(
+                [in_val, out_backprop_val], [in_shape, out_backprop_shape],
+                output, filter_shape)
+            print("conv2d_backprop_filter gradient err = %g " % err)
+            err_tolerance = 2e-3
+            self.assertLess(err, err_tolerance)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index 22e5400c37..bf7245a2ae 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import os
 import time
 
@@ -32,6 +33,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import nn_impl
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
@@ -240,6 +242,77 @@ class Conv2DTest(test.TestCase):
     for i in range(1, len(values)):
       self.assertAllClose(values[0], values[i], rtol=1e-5, atol=1e-5)
 
+  def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes,
+                                   stride, dilation, padding, data_format,
+                                   use_gpu):
+    total_size_1 = 1
+    total_size_2 = 1
+    for s in tensor_in_sizes:
+      total_size_1 *= s
+    for s in filter_in_sizes:
+      total_size_2 *= s
+
+    # Initializes the input tensor with array containing incrementing
+    # numbers from 1.
+    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
+    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
+    with test_util.device(use_gpu):
+      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
+      t2 = constant_op.constant(x2, shape=filter_in_sizes)
+      if isinstance(stride, collections.Iterable):
+        strides = list(stride)
+      else:
+        strides = [stride, stride]
+      if data_format == "NCHW":
+        t1 = test_util.NHWCToNCHW(t1)
+        full_strides = [1, 1] + strides
+        full_dilation = [1, 1] + dilation
+      else:
+        full_strides = [1] + strides + [1]
+        full_dilation = [1] + dilation + [1]
+      expected = nn_ops.convolution(
+          t1,
+          t2,
+          padding=padding,
+          strides=strides,
+          dilation_rate=dilation,
+          data_format=data_format)
+      computed = nn_ops.conv2d(
+          t1,
+          t2,
+          strides=full_strides,
+          dilations=full_dilation,
+          padding=padding,
+          data_format=data_format)
+      if data_format == "NCHW":
+        expected = test_util.NCHWToNHWC(expected)
+        computed = test_util.NCHWToNHWC(computed)
+    return expected, computed
+
+  def _VerifyDilatedConvValues(self, tensor_in_sizes, filter_in_sizes, strides,
+                               padding, dilations):
+    expected_results = []
+    computed_results = []
+    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
+    for data_format, use_gpu in GetTestConfigs():
+      # If any dilation rate is larger than 1, only do test on the GPU
+      # because we currently do not have a CPU implementation for arbitrary
+      # dilation rates.
+      if default_dilations or use_gpu:
+        expected, computed = self._ComputeReferenceDilatedConv(
+            tensor_in_sizes, filter_in_sizes, strides, dilations, padding,
+            data_format, use_gpu)
+        expected_results.append(expected)
+        computed_results.append(computed)
+        tolerance = 1e-2 if use_gpu else 1e-5
+        expected_values = self.evaluate(expected_results)
+        computed_values = self.evaluate(computed_results)
+        for e_value, c_value in zip(expected_values, computed_values):
+          print("expected = ", e_value)
+          print("actual = ", c_value)
+          self.assertAllClose(
+              e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-6)
+
   def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, strides, padding,
                     expected):
     tensors = []
@@ -279,6 +352,16 @@ class Conv2DTest(test.TestCase):
         padding="VALID",
         expected=expected_output)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2D2x2Filter2x1Dilation(self):
+    if test.is_gpu_available(cuda_only=True):
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=[1, 4, 4, 1],
+          filter_in_sizes=[2, 2, 1, 1],
+          strides=[1, 1],
+          dilations=[2, 1],
+          padding="VALID")
+
   @test_util.run_in_graph_and_eager_modes()
   def testConv2DEmpty(self):
     expected_output = []
@@ -289,6 +372,16 @@ class Conv2DTest(test.TestCase):
         padding="VALID",
         expected=expected_output)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2DEmptyDilation(self):
+    if test.is_gpu_available(cuda_only=True):
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=[0, 2, 3, 3],
+          filter_in_sizes=[1, 1, 3, 3],
+          strides=[1, 1],
+          dilations=[2, 1],
+          padding="VALID")
+
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D2x2Filter(self):
     # The outputs are computed using third_party/py/IPython/notebook.
@@ -300,6 +393,16 @@ class Conv2DTest(test.TestCase):
         padding="VALID",
         expected=expected_output)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2D2x2FilterDilation(self):
+    if test.is_gpu_available(cuda_only=True):
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=[1, 2, 3, 3],
+          filter_in_sizes=[2, 2, 3, 3],
+          strides=[1, 1],
+          dilations=[1, 2],
+          padding="VALID")
+
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D1x2Filter(self):
     # The outputs are computed using third_party/py/IPython/notebook.
@@ -314,6 +417,16 @@ class Conv2DTest(test.TestCase):
         padding="VALID",
         expected=expected_output)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2D1x2FilterDilation(self):
+    if test.is_gpu_available(cuda_only=True):
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=[1, 2, 3, 3],
+          filter_in_sizes=[1, 2, 3, 3],
+          strides=[1, 1],
+          dilations=[2, 1],
+          padding="VALID")
+
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D2x2FilterStride2(self):
     expected_output = [2271.0, 2367.0, 2463.0]
@@ -386,13 +499,23 @@ class Conv2DTest(test.TestCase):
         padding="VALID",
         expected=[50, 60])
 
-    # TODO this currently fails.
-    # self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1],
-    #                   filter_in_sizes=[2, 2, 1, 1],
-    #                   strides=[4, 4], padding="SAME",
-    #                   expected=[72, 112, 392, 432])
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2DKernelSizeMatchesInputSizeDilation(self):
+    if test.is_gpu_available(cuda_only=True):
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=[1, 3, 3, 1],
+          filter_in_sizes=[2, 2, 1, 2],
+          strides=[1, 1],
+          dilations=[2, 2],
+          padding="VALID")
+
+  # TODO this currently fails.
+  # self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1],
+  #                   filter_in_sizes=[2, 2, 1, 1],
+  #                   strides=[4, 4], padding="SAME",
+  #                   expected=[72, 112, 392, 432])
 
-    # Testing for backprops
+  # Testing for backprops
   def _RunAndVerifyBackpropInput(self, input_sizes, filter_sizes, output_sizes,
                                  strides, padding, expected, data_format,
                                  use_gpu, err):
@@ -724,6 +847,255 @@ class Conv2DTest(test.TestCase):
           data_format=data_format,
           use_gpu=use_gpu)
 
+  # Testing for backprops
+  def _RunAndVerifyBackpropInputDilation(self, input_sizes, filter_sizes,
+                                         output_sizes, strides, dilations,
+                                         padding, data_format, use_gpu, err):
+    total_input_size = 1
+    total_filter_size = 1
+    for s in input_sizes:
+      total_input_size *= s
+    for s in filter_sizes:
+      total_filter_size *= s
+    # Initializes the input tensor with array containing incrementing
+    # numbers from 1.
+    x1 = [f * 1.0 for f in range(1, total_input_size + 1)]
+    x2 = [f * 1.0 for f in range(1, total_filter_size + 1)]
+    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
+    if default_dilations or use_gpu:
+      with self.test_session(use_gpu=use_gpu) as sess:
+        if data_format == "NCHW":
+          input_sizes = test_util.NHWCToNCHW(input_sizes)
+        t1 = constant_op.constant(x1, shape=input_sizes)
+        t2 = constant_op.constant(x2, shape=filter_sizes)
+        full_strides = [1] + strides + [1]
+        full_dilations = [1] + dilations + [1]
+        if data_format == "NCHW":
+          full_strides = test_util.NHWCToNCHW(full_strides)
+          full_dilations = test_util.NHWCToNCHW(full_dilations)
+        conv_forward = nn_ops.conv2d(
+            t1,
+            t2,
+            strides=full_strides,
+            dilations=full_dilations,
+            padding=padding,
+            data_format=data_format)
+        conv_forward_2 = nn_ops.convolution(
+            t1,
+            t2,
+            padding=padding,
+            strides=strides,
+            dilation_rate=dilations,
+            data_format=data_format)
+        if data_format == "NCHW":
+          conv_forward = test_util.NCHWToNHWC(conv_forward)
+          conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2)
+        conv = gradients_impl.gradients(conv_forward, t1)[0]
+        conv_2 = gradients_impl.gradients(conv_forward_2, t1)[0]
+        # "values" consists of two tensors for two backprops
+        value = sess.run(conv)
+        value_2 = sess.run(conv_2)
+        self.assertShapeEqual(value, conv)
+        self.assertShapeEqual(value_2, conv_2)
+      print("expected = ", value_2)
+      print("actual = ", value)
+      self.assertArrayNear(value_2.flatten(), value.flatten(), err)
+
+  # Testing for backprops
+  def _RunAndVerifyBackpropFilterDilation(self, input_sizes, filter_sizes,
+                                          output_sizes, strides, dilations,
+                                          padding, data_format, use_gpu, err):
+    total_input_size = 1
+    total_filter_size = 1
+    for s in input_sizes:
+      total_input_size *= s
+    for s in filter_sizes:
+      total_filter_size *= s
+    # Initializes the input tensor with array containing incrementing
+    # numbers from 1.
+    x1 = [f * 1.0 for f in range(1, total_input_size + 1)]
+    x2 = [f * 1.0 for f in range(1, total_filter_size + 1)]
+    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
+    if default_dilations or use_gpu:
+      with self.test_session(use_gpu=use_gpu) as sess:
+        if data_format == "NCHW":
+          input_sizes = test_util.NHWCToNCHW(input_sizes)
+        t1 = constant_op.constant(x1, shape=input_sizes)
+        t2 = constant_op.constant(x2, shape=filter_sizes)
+        full_strides = [1] + strides + [1]
+        full_dilations = [1] + dilations + [1]
+        if data_format == "NCHW":
+          full_strides = test_util.NHWCToNCHW(full_strides)
+          full_dilations = test_util.NHWCToNCHW(full_dilations)
+        conv_forward = nn_ops.conv2d(
+            t1,
+            t2,
+            strides=full_strides,
+            dilations=full_dilations,
+            padding=padding,
+            data_format=data_format)
+        conv_forward_2 = nn_ops.convolution(
+            t1,
+            t2,
+            padding=padding,
+            strides=strides,
+            dilation_rate=dilations,
+            data_format=data_format)
+        if data_format == "NCHW":
+          conv_forward = test_util.NCHWToNHWC(conv_forward)
+          conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2)
+        conv = gradients_impl.gradients(conv_forward, t2)[0]
+        conv_2 = gradients_impl.gradients(conv_forward, t2)[0]
+        value = sess.run(conv)
+        value_2 = sess.run(conv_2)
+        self.assertShapeEqual(value, conv)
+        self.assertShapeEqual(value_2, conv_2)
+      print("expected = ", value_2)
+      print("actual = ", value)
+      self.assertArrayNear(value_2.flatten(), value.flatten(), err)
+
+  def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropFilterDilation(
+            input_sizes=[1, 3, 6, 1],
+            filter_sizes=[2, 2, 1, 1],
+            output_sizes=[1, 1, 5, 1],
+            strides=[1, 1],
+            dilations=[2, 1],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropFilterDilation(
+            input_sizes=[1, 2, 3, 1],
+            filter_sizes=[2, 2, 1, 1],
+            output_sizes=[1, 1, 2, 1],
+            strides=[1, 1],
+            dilations=[1, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2DEmptyBackpropFilterDilation1x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropFilterDilation(
+            input_sizes=[1, 2, 3, 1],
+            filter_sizes=[2, 2, 1, 0],
+            output_sizes=[1, 1, 2, 0],
+            strides=[1, 1],
+            dilations=[1, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropFilterDilation(
+            input_sizes=[1, 3, 4, 3],
+            filter_sizes=[2, 2, 3, 3],
+            output_sizes=[1, 1, 2, 3],
+            strides=[1, 1],
+            dilations=[2, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropFilterDilation(
+            input_sizes=[1, 3, 3, 1],
+            filter_sizes=[2, 2, 1, 2],
+            output_sizes=[1, 1, 1, 2],
+            strides=[1, 1],
+            dilations=[2, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropInputDilation(
+            input_sizes=[1, 3, 6, 1],
+            filter_sizes=[2, 2, 1, 1],
+            output_sizes=[1, 1, 5, 1],
+            strides=[1, 1],
+            dilations=[2, 1],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropInputDilation(
+            input_sizes=[1, 2, 3, 1],
+            filter_sizes=[2, 2, 1, 1],
+            output_sizes=[1, 1, 2, 1],
+            strides=[1, 1],
+            dilations=[1, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2DEmptyBackpropInputDilation1x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropInputDilation(
+            input_sizes=[0, 2, 3, 1],
+            filter_sizes=[2, 2, 1, 1],
+            output_sizes=[0, 1, 2, 1],
+            strides=[1, 1],
+            dilations=[1, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        # The GPU version of this test is not very stable. So adjusting the
+        # error threshold to 1e-4.
+        self._RunAndVerifyBackpropInputDilation(
+            input_sizes=[1, 3, 2, 3],
+            filter_sizes=[2, 2, 3, 3],
+            output_sizes=[1, 1, 2, 3],
+            strides=[1, 1],
+            dilations=[2, 1],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-4)
+
+  def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropInputDilation(
+            input_sizes=[1, 3, 3, 1],
+            filter_sizes=[2, 2, 1, 2],
+            output_sizes=[1, 1, 1, 2],
+            strides=[1, 1],
+            dilations=[2, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
   # Gradient checkers
   def ConstructAndTestGradient(self, batch, input_rows, input_cols, filter_rows,
                                filter_cols, in_depth, out_depth, stride_rows,
@@ -1457,6 +1829,22 @@ def GetInceptionFwdTest(input_size, filter_size, stride, padding,
   return Test
 
 
+def GetInceptionFwdDilatedConvTest(input_size, filter_size, stride, padding):
+
+  def Test(self):
+    if test.is_gpu_available(cuda_only=True) and stride == 1:
+      tf_logging.info("Testing InceptionFwd with dilations %s",
+                      (input_size, filter_size, stride, padding))
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=input_size,
+          filter_in_sizes=filter_size,
+          strides=[stride, stride],
+          dilations=[2, 2],
+          padding=padding)
+
+  return Test
+
+
 def GetInceptionBackInputTest(input_size, filter_size, output_size, stride,
                               padding,
                               gpu_only=False):
@@ -1497,6 +1885,10 @@ if __name__ == "__main__":
             test_util.run_in_graph_and_eager_modes()(
                 GetInceptionFwdTest(input_size_, filter_size_, stride_,
                                     padding_)))
+    setattr(
+        Conv2DTest, "testInceptionFwdDilatedConv_" + str(index),
+        test_util.run_in_graph_and_eager_modes()(GetInceptionFwdDilatedConvTest(
+            input_size_, filter_size_, stride_, padding_)))
     setattr(Conv2DTest, "testInceptionBackInput_" + str(index),
             test_util.run_in_graph_and_eager_modes()(
                 GetInceptionBackInputTest(input_size_, filter_size_,
@@ -1519,6 +1911,9 @@ if __name__ == "__main__":
   setattr(Conv2DTest, "testInceptionFwd_No_Winograd_Nonfused",
           test_util.run_in_graph_and_eager_modes()(
               GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True)))
+  setattr(Conv2DTest, "testInceptionFwdDilatedConv_No_Winograd_Nonfused",
+          test_util.run_in_graph_and_eager_modes()(
+              GetInceptionFwdDilatedConvTest(ishape, fshape, 1, "SAME")))
   setattr(Conv2DTest, "testInceptionBackInput_No_Winograd_Nonfused",
           test_util.run_in_graph_and_eager_modes()(
               GetInceptionBackInputTest(ishape, fshape, oshape, 1, "SAME",
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index 4b406ba840..8cd535aa0b 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@@ -41,33 +41,48 @@ def _Conv2DBackpropInputGrad(op, grad):
   Returns:
     the gradients w.r.t. the input and the filter
   """
-  return [None,
-          nn_ops.conv2d_backprop_filter(grad, array_ops.shape(op.inputs[1]),
-                                        op.inputs[2], op.get_attr("strides"),
-                                        op.get_attr("padding"),
-                                        op.get_attr("use_cudnn_on_gpu"),
-                                        op.get_attr("data_format")),
-          nn_ops.conv2d(grad, op.inputs[1], op.get_attr("strides"),
-                        op.get_attr("padding"), op.get_attr("use_cudnn_on_gpu"),
-                        op.get_attr("data_format"))]
+  return [
+      None,
+      nn_ops.conv2d_backprop_filter(
+          grad,
+          array_ops.shape(op.inputs[1]),
+          op.inputs[2],
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"),
+          data_format=op.get_attr("data_format")),
+      nn_ops.conv2d(
+          grad,
+          op.inputs[1],
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"),
+          data_format=op.get_attr("data_format"))
+  ]
 
 
 @ops.RegisterGradient("Conv2DBackpropFilter")
 def _Conv2DBackpropFilterGrad(op, grad):
   return [
       nn_ops.conv2d_backprop_input(
-          array_ops.shape(op.inputs[0]), grad, op.inputs[2],
-          op.get_attr("strides"),
-          op.get_attr("padding"),
-          op.get_attr("use_cudnn_on_gpu"),
-          op.get_attr("data_format")),
-      None,
+          array_ops.shape(op.inputs[0]),
+          grad,
+          op.inputs[2],
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"),
+          data_format=op.get_attr("data_format")), None,
       nn_ops.conv2d(
-          op.inputs[0], grad,
-          op.get_attr("strides"),
-          op.get_attr("padding"),
-          op.get_attr("use_cudnn_on_gpu"),
-          op.get_attr("data_format"))
+          op.inputs[0],
+          grad,
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"),
+          data_format=op.get_attr("data_format"))
   ]
 
 
@@ -466,25 +481,32 @@ def _SparseSoftmaxCrossEntropyWithLogitsGrad(op, grad_0, _):
 
 @ops.RegisterGradient("Conv2D")
 def _Conv2DGrad(op, grad):
+  dilations = op.get_attr("dilations")
   strides = op.get_attr("strides")
   padding = op.get_attr("padding")
   use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu")
   data_format = op.get_attr("data_format")
   shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]])
-  return [nn_ops.conv2d_backprop_input(shape_0,
-                                       op.inputs[1],
-                                       grad,
-                                       strides,
-                                       padding,
-                                       use_cudnn_on_gpu,
-                                       data_format),
-          nn_ops.conv2d_backprop_filter(op.inputs[0],
-                                        shape_1,
-                                        grad,
-                                        strides,
-                                        padding,
-                                        use_cudnn_on_gpu,
-                                        data_format)]
+  return [
+      nn_ops.conv2d_backprop_input(
+          shape_0,
+          op.inputs[1],
+          grad,
+          dilations=dilations,
+          strides=strides,
+          padding=padding,
+          use_cudnn_on_gpu=use_cudnn_on_gpu,
+          data_format=data_format),
+      nn_ops.conv2d_backprop_filter(
+          op.inputs[0],
+          shape_1,
+          grad,
+          dilations=dilations,
+          strides=strides,
+          padding=padding,
+          use_cudnn_on_gpu=use_cudnn_on_gpu,
+          data_format=data_format)
+  ]
 
 
 @ops.RegisterGradient("DepthwiseConv2dNative")
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index ec7b9372ca..b3c0a22efc 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1205,13 +1205,14 @@ def conv2d_transpose(value,
       raise ValueError("padding must be either VALID or SAME:"
                        " {}".format(padding))
 
-    return gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_,
-                                            filter=filter,
-                                            out_backprop=value,
-                                            strides=strides,
-                                            padding=padding,
-                                            data_format=data_format,
-                                            name=name)
+    return gen_nn_ops.conv2d_backprop_input(
+        input_sizes=output_shape_,
+        filter=filter,
+        out_backprop=value,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        name=name)
 
 
 def atrous_conv2d_transpose(value,
@@ -1343,12 +1344,13 @@ def atrous_conv2d_transpose(value,
                    (in_width + pad_right_extra) // rate,
                    output_shape[3]]
 
-    value = gen_nn_ops.conv2d_backprop_input(input_sizes=input_sizes,
-                                             filter=filters,
-                                             out_backprop=value,
-                                             strides=[1, 1, 1, 1],
-                                             padding="VALID",
-                                             data_format="NHWC")
+    value = gen_nn_ops.conv2d_backprop_input(
+        input_sizes=input_sizes,
+        filter=filters,
+        out_backprop=value,
+        strides=[1, 1, 1, 1],
+        padding="VALID",
+        data_format="NHWC")
 
     # The crops argument to batch_to_space includes both padding components.
     batch_to_space_crop = [[pad_top, pad_bottom + pad_bottom_extra],
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
index ebd9c079b5..d920fef770 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
@@ -54,15 +54,15 @@ tf_module {
   }
   member_method {
     name: "conv2d"
-    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "conv2d_backprop_filter"
-    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "conv2d_backprop_input"
-    argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'None\'], "
+    argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "conv2d_transpose"
@@ -70,11 +70,11 @@ tf_module {
   }
   member_method {
     name: "conv3d"
-    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'[1, 1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "conv3d_backprop_filter_v2"
-    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'[1, 1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "conv3d_transpose"
@@ -106,15 +106,15 @@ tf_module {
   }
   member_method {
     name: "depthwise_conv2d_native"
-    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "depthwise_conv2d_native_backprop_filter"
-    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "depthwise_conv2d_native_backprop_input"
-    argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "dilation2d"
@@ -234,7 +234,7 @@ tf_module {
   }
   member_method {
     name: "quantized_conv2d"
-    argspec: "args=[\'input\', \'filter\', \'min_input\', \'max_input\', \'min_filter\', \'max_filter\', \'strides\', \'padding\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'qint32\'>\", \'None\'], "
+    argspec: "args=[\'input\', \'filter\', \'min_input\', \'max_input\', \'min_filter\', \'max_filter\', \'strides\', \'padding\', \'out_type\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'qint32\'>\", \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "quantized_max_pool"
-- 
GitLab


From b97585f5d2157b1e0273a4b20a568635fb58ad57 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 29 Nov 2017 17:55:53 -0800
Subject: [PATCH 0444/1225] Always leverage shapes inference now that it can
 handle fed nodes conservatively.

PiperOrigin-RevId: 177391746
---
 .../grappler/optimizers/constant_folding.cc   | 88 ++++++++++---------
 .../grappler/optimizers/constant_folding.h    |  6 +-
 2 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 03eaa4a84a..b5172a4833 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -190,6 +190,14 @@ Status ConvertShapeToConstant(const string& op, const DataType& type,
   return Status::OK();
 }
 
+bool ConstantFolding::IsReallyConstant(const NodeDef& node) const {
+  if (!IsConstant(node)) {
+    return false;
+  }
+  // If the node is fed it's not constant anymore.
+  return feed_nodes_.find(node.name()) == feed_nodes_.end();
+}
+
 Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
   // We may add some nodes to the graph to encode control dependencies: there is
   // no need to process these, so only iterate over the nodes of the input
@@ -327,9 +335,9 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs(
   const NodeDef* shape_node1 = node_map_->GetNode(node.input(0));
   const NodeDef* shape_node2 = node_map_->GetNode(node.input(1));
   if (shape_node1 == nullptr ||
-      (shape_node1->op() != "Shape" && shape_node1->op() != "Const") ||
+      (shape_node1->op() != "Shape" && !IsReallyConstant(*shape_node1)) ||
       shape_node2 == nullptr ||
-      (shape_node2->op() != "Shape" && shape_node2->op() != "Const")) {
+      (shape_node2->op() != "Shape" && !IsReallyConstant(*shape_node2))) {
     return Status::OK();
   }
   int64 min_id = 0;
@@ -409,7 +417,7 @@ Status ConstantFolding::MaterializeReductionIndices(
     return Status::OK();
   }
   const NodeDef* indices = node_map_->GetNode(node->input(1));
-  if (!indices || IsConstant(*indices)) {
+  if (!indices || IsReallyConstant(*indices)) {
     // The reduction indices are already constant, there's nothing to do.
     return Status::OK();
   }
@@ -506,24 +514,23 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const {
   if (node.input().empty()) {
     return false;
   }
-
   // Skips nodes that must be preserved except whitelisted nodes.
   if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end() &&
       nodes_whitelist_.find(node.name()) == nodes_whitelist_.end()) {
     return false;
   }
-
-  // Skips ops that don't benefit from folding.
-  const string& op = node.op();
-  // Skip constants, they're already folded
-  if (op == "Const") {
+  // Skip control flow nodes, they can't be folded
+  if (ModifiesFrameInfo(node)) {
     return false;
   }
-  // Skip constrol flow nodes, they can't be folded
-  if (op == "Enter" || op == "RefEnter" || op == "Exit" || op == "RefExit" ||
-      op == "NextIteration" || op == "RefNextIteration") {
+  // Skip constants, they're already folded
+  if (IsConstant(node)) {
     return false;
   }
+
+  // Skips ops that don't benefit from folding.
+  const string& op = node.op();
+
   if (op.find("Placeholder") == 0) {
     return false;
   }
@@ -577,7 +584,7 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const {
     if (!input_node) {
       return false;
     }
-    bool is_const = IsConstant(*input_node);
+    bool is_const = IsReallyConstant(*input_node);
     if (!is_const && !is_merge) {
       return false;
     }
@@ -703,7 +710,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node,
       break;
     }
     const NodeDef* input_node = node_map_->GetNode(input);
-    if (!IsConstant(*input_node)) {
+    if (!IsReallyConstant(*input_node)) {
       return Status(error::INVALID_ARGUMENT,
                     strings::StrCat("Can't fold ", node.name(), ", its ", input,
                                     " isn't constant"));
@@ -757,7 +764,7 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) {
         continue;
       }
       NodeDef* input_node = node_map_->GetNode(input);
-      if (!IsConstant(*input_node)) {
+      if (!IsReallyConstant(*input_node)) {
         continue;
       }
       bool valid_input = true;
@@ -999,7 +1006,7 @@ bool ConstantFolding::IsSimplifiableReduction(const NodeDef& node) const {
   if (IsReduction(node)) {
     CHECK_LE(2, node.input_size());
     const NodeDef* reductions_indices = node_map_->GetNode(node.input(1));
-    if (IsConstant(*reductions_indices)) {
+    if (IsReallyConstant(*reductions_indices)) {
       TensorVector output;
       Status s = EvaluateNode(*reductions_indices, TensorVector(), &output);
       if (!s.ok()) {
@@ -1023,7 +1030,7 @@ bool ConstantFolding::IsSimplifiableReshape(
   }
   CHECK_LE(2, node.input_size());
   const NodeDef* new_shape = node_map_->GetNode(node.input(1));
-  if (!IsConstant(*new_shape)) {
+  if (!IsReallyConstant(*new_shape)) {
     return false;
   }
   TensorVector outputs;
@@ -1074,7 +1081,8 @@ bool ConstantFolding::IsSimplifiableReshape(
 }
 
 Status ConstantFolding::SimplifyGraph(GraphDef* output,
-                                      const GraphProperties& properties) {
+                                      const GraphProperties& properties,
+                                      bool use_shape_info) {
   for (auto& node : *output->mutable_node()) {
     if (IsSimplifiableReduction(node)) {
       // Replace the reduction node with an identity node, that can be further
@@ -1099,10 +1107,10 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
         *node.add_input() = input;
       }
     }
-    // It's possible to feed a placeholder with a tensor that doesn't have the
-    // proper shape, and reshape this tensor later on. Therefore only remove
-    // reshapes in graphs that don't have placeholders.
-    if (IsSimplifiableReshape(node, properties)) {
+    const bool safe_to_use_shapes =
+        use_shape_info &&
+        (feed_nodes_.empty() || opt_level_ == RewriterConfig::AGGRESSIVE);
+    if (safe_to_use_shapes && IsSimplifiableReshape(node, properties)) {
       const NodeDef* new_shape = node_map_->GetNode(node.input(1));
       DataType output_type = node.attr().at("T").type();
       node.set_op("Identity");
@@ -1141,36 +1149,34 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
   }
 
   GraphProperties properties(item);
-  const bool has_feed = !item.feed.empty();
-  bool needs_shapes = !has_feed || opt_level_ == RewriterConfig::AGGRESSIVE;
-  Status s = errors::Unknown(
-      "The graph properties are needed but were not initialized");
-  if (needs_shapes) {
-    s = properties.InferStatically(false);
-  }
-
-  if (!has_feed && s.ok()) {
-    // Only use static shape information when there is no feed in the
-    // graph. That's because it's possible to feed a placeholder with a tensor
-    // of any shape, which could make the static information inconsistent with
-    // the shapes actually fed.
+  // It's possible to feed a placeholder with a tensor of any shape: make sure
+  // that the shape inference deals with this conservatively unless we're in
+  // aggressive mode.
+  const bool assume_valid_feeds = opt_level_ == RewriterConfig::AGGRESSIVE;
+  Status s = properties.InferStatically(assume_valid_feeds);
+  const bool can_use_shape_info = s.ok();
+
+  if (can_use_shape_info) {
     TF_RETURN_IF_ERROR(MaterializeShapes(properties));
-  }
-  if (opt_level_ == RewriterConfig::AGGRESSIVE && s.ok()) {
-    TF_RETURN_IF_ERROR(MaterializeConstants(properties));
+
+    if (opt_level_ == RewriterConfig::AGGRESSIVE) {
+      TF_RETURN_IF_ERROR(MaterializeConstants(properties));
+    }
   }
 
   TF_RETURN_IF_ERROR(FoldGraph(output));
 
-  if (!has_feed && s.ok()) {
-    TF_RETURN_IF_ERROR(SimplifyGraph(output, properties));
-  }
+  TF_RETURN_IF_ERROR(SimplifyGraph(output, properties, can_use_shape_info));
+
   return Status::OK();
 }
 
 Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
                                  GraphDef* output) {
   nodes_to_preserve_ = item.NodesToPreserve();
+  for (const auto& feed : item.feed) {
+    feed_nodes_.insert(NodeName(feed.first));
+  }
 
   if (cpu_device_ == nullptr) {
     owned_device_.reset(new DeviceSimple());
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index 7c5db2a70f..8af5b5fbe6 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h
@@ -51,6 +51,8 @@ class ConstantFolding : public GraphOptimizer {
                 const GraphDef& optimize_output, double result) override;
 
  private:
+  bool IsReallyConstant(const NodeDef& node) const;
+
   Status MaterializeShapes(const GraphProperties& properties);
 
   Status MaterializeBroadcastGradientArgs(const NodeDef& node,
@@ -75,7 +77,8 @@ class ConstantFolding : public GraphOptimizer {
   bool IsSimplifiableReduction(const NodeDef& node) const;
   bool IsSimplifiableReshape(const NodeDef& node,
                              const GraphProperties& properties) const;
-  Status SimplifyGraph(GraphDef* output, const GraphProperties& properties);
+  Status SimplifyGraph(GraphDef* output, const GraphProperties& properties,
+                       bool use_shape_info);
 
   Status RunOptimizationPass(Cluster* cluster, const GrapplerItem& item,
                              GraphDef* output);
@@ -90,6 +93,7 @@ class ConstantFolding : public GraphOptimizer {
   std::unique_ptr<NodeMap> node_map_;
   std::unordered_set<string> nodes_to_preserve_;
   std::unordered_set<string> nodes_whitelist_;
+  std::unordered_set<string> feed_nodes_;
   bool has_fetch_;
 };
 
-- 
GitLab


From 65778d86a898d2aa73038837ab6c589b0a345d76 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 30 Nov 2017 02:24:29 +0000
Subject: [PATCH 0445/1225] Add `AWS_REGION` env for S3 in TensorFlow

This fix tries to address the issue raised in 14951 where
the region can only be specified with non-common `S3_REGION`
environment variables.

This fix adds the support of `AWS_REGION` which takes precedence
over `S3_REGION`.

This fix fixes 14951.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/platform/s3/s3_file_system.cc | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc
index 234f3c3aed..682ad97eec 100644
--- a/tensorflow/core/platform/s3/s3_file_system.cc
+++ b/tensorflow/core/platform/s3/s3_file_system.cc
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/core/platform/s3/s3_file_system.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/s3/s3_file_system.h"
 #include "tensorflow/core/platform/s3/s3_crypto.h"
 
 #include <aws/core/Aws.h>
@@ -49,9 +49,15 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() {
     if (endpoint) {
       cfg.endpointOverride = Aws::String(endpoint);
     }
-    const char* region = getenv("S3_REGION");
+    const char* region = getenv("AWS_REGION");
     if (region) {
       cfg.region = Aws::String(region);
+    } else {
+      // TODO (yongtang): `S3_REGION` should be deprecated after 2.0.
+      const char* region = getenv("S3_REGION");
+      if (region) {
+        cfg.region = Aws::String(region);
+      }
     }
     const char* use_https = getenv("S3_USE_HTTPS");
     if (use_https) {
-- 
GitLab


From d8d43898b972a1224db50035a771e82985f60035 Mon Sep 17 00:00:00 2001
From: FredZhang <654496915@qq.com>
Date: Thu, 30 Nov 2017 12:40:49 +0800
Subject: [PATCH 0446/1225] wrong code in programmer's guide in Variable
 Section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In Programmer's guide Variable section
 the assignment variable is a `tf.Tensor` and should use `assignment.op.run()` instead of `assignment.run()`
Otherwise, this code would produce an error:
```
AttributeError: 'Tensor' object has no attribute 'run'
```
Or we can use sess.run(assignment) to finish this assignment operation
---
 tensorflow/docs_src/programmers_guide/variables.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md
index 16753c931f..bac385c02c 100644
--- a/tensorflow/docs_src/programmers_guide/variables.md
+++ b/tensorflow/docs_src/programmers_guide/variables.md
@@ -205,7 +205,7 @@ methods:
 v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
 assignment = v.assign_add(1)
 tf.global_variables_initializer().run()
-assignment.run()
+sess.run(assignment)  # or assignment.op.run()
 ```
 
 Most TensorFlow optimizers have specialized ops that efficiently update the
-- 
GitLab


From 4422aaa61338c3af8ce80034d92693a1bd33b09d Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 30 Nov 2017 00:18:03 -0800
Subject: [PATCH 0447/1225] Automated g4 rollback of changelist 177375237

PiperOrigin-RevId: 177418947
---
 tensorflow/python/eager/backprop.py           |   2 +-
 tensorflow/python/eager/context.py            |  15 --
 tensorflow/python/eager/function.py           | 144 ++++++------------
 tensorflow/python/eager/graph_callable.py     |  18 +--
 .../python/eager/graph_callable_test.py       |   1 +
 tensorflow/python/framework/ops.py            |  30 ++--
 tensorflow/python/pywrap_tfe.i                |   3 +-
 7 files changed, 70 insertions(+), 143 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index dc1142705a..0144f3b1e5 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -540,7 +540,7 @@ def _ensure_unique_tensor_objects(parameter_positions, args):
     if i in parameter_positions:
       tid = ops.tensor_id(t)
       if tid in s:
-        args[i] = gen_array_ops.identity(args[i])
+        args[i] = args[i]._dup()  # pylint: disable=protected-access
       else:
         s.add(tid)
   return args
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 415416cfae..92f4e15c05 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -288,21 +288,6 @@ class Context(object):
     self._initialize_handle_and_devices()
     return self._num_gpus
 
-  def add_function(self, fn):
-    """Add a function definition to the context.
-
-    Once added, the function (identified by its name) can be executed like any
-    other operation.
-
-    Args:
-      fn: A wrapped TF_Function (returned from TF_GraphToFunction_wrapper).
-    """
-    with errors.raise_exception_on_not_ok_status() as status:
-      pywrap_tensorflow.TFE_ContextAddFunction(
-          self._handle,  # pylint: disable=protected-access
-          fn,
-          status)
-
   def add_function_def(self, fdef):
     """Add a function definition to the context.
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 092b36ff20..2f4b59e938 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -25,19 +25,15 @@ import threading
 
 import numpy as np
 
-from tensorflow.core.framework import function_pb2
-from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.eager import execute
 from tensorflow.python.eager import tape
 from tensorflow.python.eager.graph_only_ops import graph_placeholder
-from tensorflow.python.framework import c_api_util
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_module
-from tensorflow.python.framework import errors
+from tensorflow.python.framework import graph_to_function_def
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gradients_impl
-from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 
@@ -51,41 +47,10 @@ _scoped_captures = threading.local()
 _scoped_captures.tensors = None
 
 
-def make_function_def(name, graph, operations, inputs, outputs):
-  """Makes FunctionDef proto and defined function.
-
-  Args:
-    name: the function name
-    graph: the graph from which to build the function
-    operations: the operations in the function body
-    inputs: tensors to be used as function arguments
-    outputs: tensors to be returned from the function
-
-  Returns:
-   fdef: a FunctionDef protocol buffer for the function
-   fn: a wrapped TF_Function for the function
-  """
-  with errors.raise_exception_on_not_ok_status() as status:
-    fn = pywrap_tensorflow.TF_GraphToFunction_wrapper(
-        graph._c_graph,  # pylint: disable=protected-access
-        compat.as_text(name),
-        False,
-        [o._c_op for o in operations],  # pylint: disable=protected-access
-        [t._as_tf_output() for t in inputs],  # pylint: disable=protected-access
-        [t._as_tf_output() for t in outputs],  # pylint: disable=protected-access
-        [compat.as_text("%s" % i) for i in range(len(outputs))],
-        None,
-        compat.as_text(""),
-        status)
-  # TODO(apassos) avoid creating a FunctionDef (specially to grab the signature,
-  # but also in general it's nice not to depend on it.
-  with c_api_util.tf_buffer() as buffer_:
-    with errors.raise_exception_on_not_ok_status() as status:
-      pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_, status)
-    proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
-  fdef = function_pb2.FunctionDef()
-  fdef.ParseFromString(compat.as_bytes(proto_data))
-  return fdef, fn
+def make_function_def(graph, operations, inputs, outputs):
+  """Makes function def from the given graph with the operations."""
+  return graph_to_function_def.graph_to_function_def(
+      graph, operations, inputs, outputs)
 
 
 @contextlib.contextmanager
@@ -150,10 +115,6 @@ class CapturingGraph(ops.Graph):
     # for resource tensors.
     self._last_op_using_resource_tensor = {}
 
-  # TODO(apassos) remove once the C API is used by default.
-  def _use_c_api_hack(self):
-    return True
-
   def clear_resource_control_flow_state(self):
     self._last_op_using_resource_tensor = {}
 
@@ -246,20 +207,14 @@ def _inference_name(n):
   return "__inference_%s_%s" % (n, ops.uid())
 
 
-# TODO(apassos) get rid of this by splitting framework.function._DefinedFunction
-# so it doesn't have the definition-generating logic and is just a container for
-# an already-defined function.
 class _DefinedFunction(object):
   """Mocks the interface of tf _DefinedFunction."""
 
-  def __init__(self, fdef, fn):
+  def __init__(self, fdef):
     self.definition = fdef
     self.name = fdef.signature.name
-    self.signature = fdef.signature
     self.grad_func_name = None
     self.python_grad_func = None
-    self._c_func = fn
-    self._grad_func = None
 
 
 def _map_sequence_obj_to_idx(sequence):
@@ -295,7 +250,6 @@ class GraphModeFunction(object):
                input_placeholders,
                extra_inputs,
                fdef,
-               fn,
                graph,
                operations,
                func_outputs,
@@ -309,7 +263,7 @@ class GraphModeFunction(object):
     self._graph = graph
     self._has_backprop = False
     self._func_name = fdef.signature.name
-    self._fdef = _DefinedFunction(fdef, fn)
+    self._fdef = _DefinedFunction(fdef)
     self._num_outputs = len(fdef.signature.output_arg)
     self._ops = operations
     self._func_outputs = func_outputs
@@ -329,45 +283,38 @@ class GraphModeFunction(object):
     with self._graph.as_default(), context.graph_mode():
       c = _CapturingContext()
       with c:
-        filtered_outputs = [x for x in self._returns if x is not None]
+        filtered_outputs = [
+            x for x in self._returns if x is not None
+        ]
         self._out_grad_placeholders = [
-            graph_placeholder(x.dtype, x.shape) for x in filtered_outputs]
+            graph_placeholder(x.dtype, x.shape) for x in filtered_outputs
+        ]
         in_gradients = gradients_impl.gradients(
             filtered_outputs,
             self._input_placeholders,
             grad_ys=self._out_grad_placeholders)
-        shapes = tuple(x.shape for x in in_gradients if x is not None)
+        shapes = [x.shape for x in in_gradients if x is not None]
     captures = list(sorted(c.captured_tensors, key=lambda x: x.name))
-    forward_name = _forward_name(self._func_name)
-    forward_function_def, forward_fn = make_function_def(
-        forward_name, self._graph, self._ops, self._input_placeholders,
+    forward_function_def = make_function_def(
+        self._graph, self._ops, self._input_placeholders,
         filtered_outputs + captures)
-    self._forward_fdef = _DefinedFunction(forward_function_def, forward_fn)
-    _register(forward_fn)
-    backward_outputs = tuple(x for x in in_gradients if x is not None)
+    self._forward_fdef = _DefinedFunction(forward_function_def)
+    _register_with_name(_forward_name(self._func_name), forward_function_def)
+    backward_outputs = [x for x in in_gradients if x is not None]
     all_inputs = self._out_grad_placeholders + captures
-    # Excluding input ops from the body as we do not intend to execute these
-    # operations when the function is executed.
-    all_ignored_ops = frozenset(x.op for x in all_inputs)
-    # Enforce a deterministic order of operations in the generated graph. This
-    # means rerunning the function-defining code will always define the same
-    # function, which is useful if we serialize this etc.
-    fdef_ops = tuple(x for x in sorted(c.known_ops, key=lambda x: x.name)
-                     if x not in all_ignored_ops)
-    bname = _backward_name(self._func_name)
-    backward_function_def, backward_fn = make_function_def(
-        bname, self._graph, fdef_ops,
+    backward_function_def = make_function_def(
+        self._graph, [x.op for x in self._out_grad_placeholders
+                     ] + list(sorted(c.known_ops, key=lambda x: x.name)),
         all_inputs, backward_outputs)
-    _register(backward_fn)
+    _register_with_name(_backward_name(self._func_name), backward_function_def)
     self._backward_function = GraphModeFunction(
-        all_inputs, [], backward_function_def, backward_fn, self._graph,
-        c.known_ops, in_gradients, _map_sequence_obj_to_idx(backward_outputs),
-        shapes)
+        all_inputs, [], backward_function_def, self._graph, c.known_ops,
+        in_gradients, _map_sequence_obj_to_idx(backward_outputs), shapes)
 
   def _backprop_call(self, args):
     """Calls the wrapped function and records the result on a tape."""
     all_args = args + self._extra_inputs
-    signature = self._forward_fdef.signature
+    signature = self._forward_fdef.definition.signature
     ctx = context.context()
     if ctx.in_graph_mode():
       g = ops.get_default_graph()
@@ -378,7 +325,7 @@ class GraphModeFunction(object):
         return ops.internal_convert_to_tensor(x, ctx=ctx)
       op = g.create_op(
           signature.name, [make_tensor(x) for x in all_args],
-          tuple(dtypes_module.DType(x.type) for x in signature.output_arg),
+          [dtypes_module.DType(x.type) for x in signature.output_arg],
           op_def=signature,
           name="FunctionCall",
           compute_shapes=False)
@@ -414,8 +361,11 @@ class GraphModeFunction(object):
       if v._trainable:  # pylint: disable=protected-access
         tape.watch_variable(v)
 
-    tensor_inputs = [x for x in nest.flatten(args)
-                     if isinstance(x, ops.Tensor)]
+    tensor_inputs = [
+        x for x in nest.flatten(args)
+        if isinstance(x, ops.Tensor)
+    ]
+
     if tape.should_record(tensor_inputs) or tape.should_record(
         self._extra_inputs):
       if not self._has_backprop:
@@ -434,7 +384,7 @@ class GraphModeFunction(object):
       args = list(tensor_inputs) + self._extra_inputs
       op = g.create_op(
           signature.name, [ops.convert_to_tensor(x) for x in args],
-          tuple(dtypes_module.DType(x.type) for x in signature.output_arg),
+          [dtypes_module.DType(x.type) for x in signature.output_arg],
           op_def=signature,
           name="FunctionCall",
           compute_shapes=False)
@@ -519,32 +469,29 @@ def _defun_internal(name, func, args, kwds):
         extra_inputs = []
         extra_placeholders = []
       outputs_list = nest.flatten(func_outputs)
-      output_shapes = tuple(x.shape for x in outputs_list if x is not None)
+      output_shapes = [x.shape for x in outputs_list if x is not None]
 
-  flat_inputs = [x for x in nest.flatten(func_inputs)
-                 if isinstance(x, ops.Tensor)]
+  flat_inputs = [
+      x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor)
+  ]
   all_inputs = flat_inputs + list(extra_placeholders)
-  all_ignored_ops = frozenset(x.op for x in all_inputs)
+
   func_def_outputs = [x for x in outputs_list if x is not None]
-  fname = _inference_name(name)
-  operations = tuple(x for x in tmp_graph.get_operations()
-                     if x not in all_ignored_ops)
-  inference_function_def, fn = make_function_def(
-      fname, tmp_graph, operations, all_inputs, func_def_outputs)
+  inference_function_def = make_function_def(
+      tmp_graph, tmp_graph.get_operations(), all_inputs, func_def_outputs)
   # Register any other functions defined in the graph
   # TODO(ashankar): Oh lord, forgive me for this lint travesty.
   for f in tmp_graph._functions.values():  # pylint: disable=protected-access
     # TODO(ashankar): What about the gradient registry?
-    _register(f._c_func)  # pylint: disable=protected-access
-  _register(fn)
+    _register_with_name(f.name, f.definition)
+  _register_with_name(_inference_name(name), inference_function_def)
 
   return GraphModeFunction(
       all_inputs,
       extra_inputs,
       inference_function_def,
-      fn,
       tmp_graph,
-      operations,
+      tmp_graph.get_operations(),
       func_outputs,
       _map_sequence_obj_to_idx(func_def_outputs),
       output_shapes,
@@ -570,9 +517,10 @@ def _cache_key(x):
   return x
 
 
-def _register(fn):
-  """Registers the function `fn`."""
-  context.context().add_function(fn)
+def _register_with_name(name, fdef):
+  """Registers the function `fdef` with the name `name`."""
+  fdef.signature.name = name
+  context.context().add_function_def(fdef)
 
 
 # TODO(apassos): better error messages for non-hashable arguments.
diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py
index 3da100d800..faf0ac88bc 100644
--- a/tensorflow/python/eager/graph_callable.py
+++ b/tensorflow/python/eager/graph_callable.py
@@ -318,9 +318,7 @@ def _graph_callable_internal(func, shape_and_dtypes):
   placeholder_inputs = flat_inputs+ list(extra_placeholders)
 
   func_def_outputs = [x for x in outputs_list if isinstance(x, tf_ops.Tensor)]
-  initialization_name = function._inference_name(func.__name__)  # pylint: disable=protected-access
-  initializer_function_def, initializer_fn = function.make_function_def(
-      initialization_name,
+  initializer_function_def = function.make_function_def(
       tmp_graph,
       initializing_operations,
       placeholder_inputs,
@@ -329,13 +327,13 @@ def _graph_callable_internal(func, shape_and_dtypes):
   # Also, what about the gradient registry of these functions? Those need to be
   # addressed as well.
   for f in tmp_graph._functions.values():  # pylint: disable=protected-access
-    function._register(f._c_func)  # pylint: disable=protected-access
-  function._register(initializer_fn)  # pylint: disable=protected-access
+    function._register_with_name(f.name, f.definition)  # pylint: disable=protected-access
+  function._register_with_name(function._inference_name(func.__name__),  # pylint: disable=protected-access
+                               initializer_function_def)
   initializer_function = function.GraphModeFunction(
       placeholder_inputs,
       extra_inputs,
       initializer_function_def,
-      initializer_fn,
       tmp_graph,
       initializing_operations,
       func_outputs,
@@ -344,20 +342,18 @@ def _graph_callable_internal(func, shape_and_dtypes):
 
   capture_func_def_outputs = [
       x for x in captured_outlist if isinstance(x, tf_ops.Tensor)]
-  captured_function_name = function._inference_name(func.__name__)  # pylint: disable=protected-access
-  captured_function_def, capturing_fn = function.make_function_def(
-      captured_function_name,
+  captured_function_def = function.make_function_def(
       tmp_graph,
       capturing_operations,
       placeholder_inputs,
       capture_func_def_outputs)
-  function._register(capturing_fn)  # pylint: disable=protected-access
+  function._register_with_name(function._inference_name(func.__name__),  # pylint: disable=protected-access
+                               captured_function_def)
 
   captured_function = function.GraphModeFunction(
       placeholder_inputs,
       extra_inputs,
       captured_function_def,
-      capturing_fn,
       tmp_graph,
       capturing_operations,
       captured_outputs,
diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py
index b9e6ca2a93..548e16a909 100644
--- a/tensorflow/python/eager/graph_callable_test.py
+++ b/tensorflow/python/eager/graph_callable_test.py
@@ -152,6 +152,7 @@ class GraphCallableTest(test.TestCase):
     self.assertAllEqual(5, f(constant_op.constant(2)))
 
   def testNestedFunction(self):
+
     # TensorFlow function (which is what would be used in TensorFlow graph
     # construction).
     @function.Defun(dtypes.int32, dtypes.int32)
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 36daf59647..2217513966 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -599,6 +599,11 @@ class Tensor(_TensorLike):
     """
     return _eval_using_default_session(self, feed_dict, self.graph, session)
 
+  def _dup(self):
+    ret = copy.copy(self)
+    ret._id = uid()  # pylint: disable=protected-access
+    return ret
+
 
 # TODO(agarwal): consider getting rid of this.
 class _EagerTensorBase(Tensor):
@@ -724,6 +729,9 @@ class _EagerTensorBase(Tensor):
     return new_tensor
     # pylint: enable=protected-access
 
+  def _dup(self):
+    return self._copy(device_name=self.device)
+
   @property
   def shape(self):
     return tensor_shape.TensorShape(self._shape_tuple())
@@ -1786,7 +1794,7 @@ class Operation(object):
       c_api.SetRequestedDevice(
           self._graph._c_graph,  # pylint: disable=protected-access
           self._c_op,  # pylint: disable=protected-access
-          compat.as_text(_device_string(device)))
+          _device_string(device))
     else:
       self._node_def.device = _device_string(device)
 
@@ -2075,7 +2083,7 @@ class Operation(object):
 
   def _set_attr(self, attr_name, attr_value):
     """Private method used to set an attribute in the node_def."""
-    if self._c_op:
+    if _USE_C_API:
       buf = c_api.TF_NewBufferFromString(
           compat.as_bytes(attr_value.SerializeToString()))
       try:
@@ -2644,16 +2652,11 @@ class Graph(object):
 
     # TODO(skyewm): fold as much of the above as possible into the C
     # implementation
-    if _USE_C_API or self._use_c_api_hack():
+    if _USE_C_API:
       self._scoped_c_graph = c_api_util.ScopedTFGraph()
     else:
       self._scoped_c_graph = None
 
-  # TODO(apassos) remove once the C API is used by default.
-  def _use_c_api_hack(self):
-    """Temporary hack; can be overridden to force C API usage."""
-    return False
-
   def _convert_stack(self, stack, include_func_start_lineno=False):
     """Converts a stack extracted using _extract_stack() to a traceback stack.
 
@@ -2982,14 +2985,9 @@ class Graph(object):
     # Add function to graph
     # pylint: disable=protected-access
     if self._c_graph:
-      # Handle functions created without using the C API. TODO(apassos,skyewm)
-      # remove this when all functions are generated using the C API by default
-      # as this will be unnecessary.
-      if not function._c_func:
-        with errors.raise_exception_on_not_ok_status() as status:
-          serialized = function.definition.SerializeToString()
-          function._c_func = c_api.TF_FunctionImportFunctionDef(
-              serialized, status)
+      assert function._c_func, (
+          "Cannot add function created without C API support to graph "
+          "created with C API support")
       with errors.raise_exception_on_not_ok_status() as status:
         gradient = function._grad_func._c_func if function._grad_func else None
         c_api.TF_GraphCopyFunction(self._c_graph, function._c_func, gradient,
diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
index 82750e9e49..82b154164e 100644
--- a/tensorflow/python/pywrap_tfe.i
+++ b/tensorflow/python/pywrap_tfe.i
@@ -18,7 +18,6 @@ limitations under the License.
 %rename("%s") TFE_NewContext;
 %rename("%s") TFE_DeleteContext;
 %rename("%s") TFE_ContextListDevices;
-%rename("%s") TFE_ContextAddFunction;
 %rename("%s") TFE_ContextAddFunctionDef;
 %rename("%s") TFE_OpNameGetAttrType;
 %rename("%s") TFE_Py_InitEagerTensor;
@@ -150,7 +149,7 @@ limitations under the License.
   }
   $1 = &temp;
   $1->resize(PyInt_AsLong($input), nullptr);
-} 
+}
 
 // Create new Status object.
 %typemap(in, numinputs=0) TF_Status *out_status {
-- 
GitLab


From bec3d96c1f9973c22136f6fd33388edbd78f0824 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 00:31:02 -0800
Subject: [PATCH 0448/1225] Automated g4 rollback of changelist 177362829

PiperOrigin-RevId: 177419730
---
 tensorflow/compiler/xla/tests/slice_test.cc | 88 ++++++---------------
 1 file changed, 23 insertions(+), 65 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc
index 981d075089..c21124750a 100644
--- a/tensorflow/compiler/xla/tests/slice_test.cc
+++ b/tensorflow/compiler/xla/tests/slice_test.cc
@@ -26,7 +26,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
 #include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
-#include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -212,13 +211,6 @@ class SliceR1Test : public ClientLibraryTestBase,
   }
 };
 
-string SliceR1TestDataToString(const ::testing::TestParamInfo<R1Spec>& data) {
-  const R1Spec& spec = data.param;
-  return ::tensorflow::strings::Printf("%lld_%lld_%lld_%lld", spec.input_dim0,
-                                       spec.slice_start, spec.slice_limit,
-                                       spec.slice_stride);
-}
-
 XLA_TEST_P(SliceR1Test, DoIt_F32) { Run<float>(GetParam()); }
 
 XLA_TEST_P(SliceR1Test, DoIt_F64) { Run<double>(GetParam()); }
@@ -231,64 +223,30 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run<uint64>(GetParam()); }
 
 XLA_TEST_P(SliceR1Test, DoIt_S64) { Run<int64>(GetParam()); }
 
-// Tests for R1 slice ops.
-// The format for each testcase is {input size, start, limit, stride}.
-// clang-format off
-INSTANTIATE_TEST_CASE_P(
-    SliceR1TestInstantiation,
-    SliceR1Test,
-    ::testing::Values(
-        R1Spec{10, 0, 0, 1},
-        R1Spec{10, 7, 7, 1},
-        R1Spec{10, 0, 5, 1},
-        R1Spec{10, 3, 5, 1},
-        R1Spec{10, 0, 10, 1},
-        R1Spec{1024, 0, 5, 1},
-        R1Spec{1024, 3, 5, 1},
-        R1Spec{1024 + 17, 0, 5, 1},
-        R1Spec{1024 + 17, 3, 5, 1},
-        R1Spec{1024 + 17, 1024, 1024 + 6, 1},
-        R1Spec{1024 + 17, 1024 + 1, 1024 + 6, 1},
-        R1Spec{1024, 1024 - 4, 1024, 1},
-        R1Spec{4 * 1024, 7, 7 + 1024, 1},
-        R1Spec{4 * 1024, 0, 4 * 1024, 1},
-        R1Spec{4 * 1024, 1, 4 * 1024 - 1, 1},
-        R1Spec{4 * 1024, 1024, 3 * 1024, 1},
-        R1Spec{4 * 1024, 1024 + 1, 3 * 1024 - 1, 1},
-        R1Spec{16 * 1024, 0, 5, 1},
-        R1Spec{16 * 1024, 3, 5, 1},
-        R1Spec{16 * 1024 + 17, 0, 5, 1},
-        R1Spec{16 * 1024 + 17, 3, 5, 1},
-        R1Spec{16 * 1024 + 17, 16 * 1024, 16 * 1024 + 6, 1},
-        R1Spec{16 * 1024 + 17, 16 * 1024 + 1, 16 * 1024 + 6, 1},
-        R1Spec{64 * 1024, 0, 64 * 1024, 1},
-        R1Spec{64 * 1024, 1, 64 * 1024 - 1, 1},
-        R1Spec{64 * 1024, 1024, 63 * 1024, 1},
-        R1Spec{64 * 1024, 1024 + 1, 63 * 1024 - 1, 1},
-        R1Spec{64 * 1024, 32 * 1024, 33 * 1024, 1},
-        R1Spec{64 * 1024, 32 * 1024 + 1, 33 * 1024 - 1, 1},
-// TODO(b/69425338): This uses too much memory on GPU.
-#ifndef XLA_TEST_BACKEND_GPU
-        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1},
-        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1},
-        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1},
-#endif
-        R1Spec{10, 2, 4, 2},
-        R1Spec{10, 0, 10, 2},
-        R1Spec{10, 0, 10, 3},
-        R1Spec{10, 0, 10, 4},
-        R1Spec{10, 0, 10, 5},
-        R1Spec{10, 0, 10, 10},
-        R1Spec{500, 200, 400, 7},
-        R1Spec{4096, 1, 4095, 3},
-        R1Spec{2047, 1024 - 24, 1024 + 160, 31},
-        R1Spec{2047, 1, 2046, 3 * 128},
-        R1Spec{4096, 1024 + 3, 4095, 500},
-        R1Spec{8192, 0, 8192, 1024 * 3 + 400}
-        ),
-    SliceR1TestDataToString
+INSTANTIATE_TEST_CASE_P(                          //
+    SliceR1TestInstantiation,                     //
+    SliceR1Test,                                  //
+    ::testing::Values(                            //
+        R1Spec{10, 0, 0, 1},                      //
+        R1Spec{10, 7, 7, 1},                      //
+        R1Spec{10, 2, 4, 1},                      //
+        R1Spec{10, 2, 4, 2},                      //
+        R1Spec{10, 0, 10, 1},                     //
+        R1Spec{1024, 1024 - 4, 1024, 1},          //
+        R1Spec{4096, 7, 7 + 1024, 1},             //
+        R1Spec{10, 0, 10, 2},                     //
+        R1Spec{10, 0, 10, 3},                     //
+        R1Spec{10, 0, 10, 4},                     //
+        R1Spec{10, 0, 10, 5},                     //
+        R1Spec{10, 0, 10, 10},                    //
+        R1Spec{500, 200, 400, 7},                 //
+        R1Spec{4096, 1, 4095, 3},                 //
+        R1Spec{2047, 1024 - 24, 1024 + 160, 31},  //
+        R1Spec{2047, 1, 2046, 3 * 128},           //
+        R1Spec{4096, 1024 + 3, 4095, 500},        //
+        R1Spec{8192, 0, 8192, 1024 * 3 + 400}     //
+        )                                         //
 );
-// clang-format on
 
 struct R2Spec {
   int64 input_dim0;
-- 
GitLab


From ad3213bc53d9905c788509948412ad9703fa976b Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 30 Nov 2017 01:21:49 -0800
Subject: [PATCH 0449/1225] Disable baseline_test in asan.

PiperOrigin-RevId: 177423981
---
 tensorflow/python/estimator/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index 8e6945b0f3..e062e1fbfe 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -215,6 +215,7 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "no_pip",
+        "noasan",  # test flakily times out in asan mode.
         "notsan",  # b/67510291
     ],
     deps = [
-- 
GitLab


From 8a98563eb6d552f0bd0931f83837640481c1f938 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 30 Nov 2017 06:13:53 -0800
Subject: [PATCH 0450/1225] Add support for int32 output types to the
 Multinomial op.

PiperOrigin-RevId: 177444775
---
 .../compiler/tests/categorical_op_test.py     | 38 ++++++++------
 tensorflow/core/kernels/multinomial_op.cc     | 51 ++++++++++++-------
 tensorflow/core/kernels/multinomial_op.h      |  2 +-
 .../core/kernels/multinomial_op_gpu.cu.cc     | 30 +++++++----
 tensorflow/core/ops/random_ops.cc             |  3 +-
 .../random/multinomial_op_test.py             | 14 ++---
 tensorflow/python/ops/random_ops.py           |  5 +-
 tensorflow/tools/api/golden/tensorflow.pbtxt  |  2 +-
 8 files changed, 90 insertions(+), 55 deletions(-)

diff --git a/tensorflow/compiler/tests/categorical_op_test.py b/tensorflow/compiler/tests/categorical_op_test.py
index 5e06f9a724..035cdea178 100644
--- a/tensorflow/compiler/tests/categorical_op_test.py
+++ b/tensorflow/compiler/tests/categorical_op_test.py
@@ -35,6 +35,9 @@ from tensorflow.python.platform import googletest
 class CategoricalTest(XLATestCase):
   """Test cases for random-number generating operators."""
 
+  def output_dtypes(self):
+    return set(self.int_types).intersection([np.int32, np.int64])
+
   def _chi2(self, expected, actual):
     """Returns Chi2 GOF statistic."""
     actual = np.asarray(actual)
@@ -55,7 +58,8 @@ class CategoricalTest(XLATestCase):
     """
     with self.test_session() as sess, self.test_scope():
       random_seed.set_random_seed(1618)
-      op = random_ops.multinomial(logits, num_samples)
+      op = random_ops.multinomial(logits, num_samples,
+                                  output_dtype=dtypes.int32)
       d = sess.run(op)
 
     batch_size, num_classes = logits.shape
@@ -73,11 +77,11 @@ class CategoricalTest(XLATestCase):
 
     return freqs_mat
 
-  def _testRngIsNotConstant(self, rng, dtype):
+  def _testRngIsNotConstant(self, rng, dtype, output_dtype):
     # Tests that 'rng' does not always return the same value.
     with self.test_session() as sess:
       with self.test_scope():
-        x = rng(dtype)
+        x = rng(dtype, output_dtype)
 
       # The random-number generator, if working correctly, should produce the
       # same output multiple times with low probability.
@@ -92,21 +96,25 @@ class CategoricalTest(XLATestCase):
                       (not np.array_equal(y, w)))
 
   def testCategoricalIsNotConstant(self):
-    def rng(unused_dtype):
-      return random_ops.multinomial([[1., 1., 1.]], 10)
+    def rng(dtype, output_dtype):
+      return random_ops.multinomial(np.array([[1., 1., 1.]], dtype=dtype), 10,
+                                    output_dtype=output_dtype)
 
-    dtype = dtypes.float32
-    self._testRngIsNotConstant(rng, dtype)
+    dtype = np.float32
+    for output_dtype in self.output_dtypes():
+      self._testRngIsNotConstant(rng, dtype, output_dtype)
 
   def testCategoricalIsInRange(self):
-    for dtype in [dtypes.float32, dtypes.float64]:
-      with self.test_session() as sess:
-        with self.test_scope():
-          x = random_ops.multinomial(
-              array_ops.ones(shape=[1, 20], dtype=dtype), 1000)
-        y = sess.run(x)
-        self.assertTrue((y >= 0).sum() == 1000)
-        self.assertTrue((y < 20).sum() == 1000)
+    for dtype in self.float_types:
+      for output_dtype in self.output_dtypes():
+        with self.test_session() as sess:
+          with self.test_scope():
+            x = random_ops.multinomial(
+                array_ops.ones(shape=[1, 20], dtype=dtype), 1000,
+                output_dtype=output_dtype)
+          y = sess.run(x)
+          self.assertTrue((y >= 0).sum() == 1000)
+          self.assertTrue((y < 20).sum() == 1000)
 
   def testSamplingCorrectness(self):
     np.random.seed(1618)  # Make it reproducible.
diff --git a/tensorflow/core/kernels/multinomial_op.cc b/tensorflow/core/kernels/multinomial_op.cc
index 8c0109f5c8..d086abb247 100644
--- a/tensorflow/core/kernels/multinomial_op.cc
+++ b/tensorflow/core/kernels/multinomial_op.cc
@@ -40,7 +40,7 @@ typedef Eigen::GpuDevice GPUDevice;
 
 namespace functor {
 
-template <typename Device, typename T>
+template <typename Device, typename T, typename OutputType>
 struct MultinomialFunctor {
   void operator()(OpKernelContext* ctx, const Device& d,
                   typename TTypes<T>::ConstMatrix logits,
@@ -49,11 +49,11 @@ struct MultinomialFunctor {
                   typename TTypes<float>::Flat scratch, int batch_size,
                   int num_classes, int num_samples,
                   const random::PhiloxRandom& gen,
-                  typename TTypes<int64>::Matrix output);
+                  typename TTypes<OutputType>::Matrix output);
 };
 
-template <typename T>
-struct MultinomialFunctor<CPUDevice, T> {
+template <typename T, typename OutputType>
+struct MultinomialFunctor<CPUDevice, T, OutputType> {
   void operator()(OpKernelContext* ctx, const CPUDevice& d,
                   typename TTypes<T>::ConstMatrix logits,
                   typename TTypes<float>::Flat /* noises */,
@@ -61,7 +61,7 @@ struct MultinomialFunctor<CPUDevice, T> {
                   typename TTypes<float>::Flat /* scratch */, int batch_size,
                   int num_classes, int num_samples,
                   const random::PhiloxRandom& gen,
-                  typename TTypes<int64>::Matrix output) {
+                  typename TTypes<OutputType>::Matrix output) {
     auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
 
     // The implementation only parallelizes by batch.
@@ -128,7 +128,7 @@ struct MultinomialFunctor<CPUDevice, T> {
 }  // namespace functor
 
 // Samples from a multinomial distribution.
-template <typename Device, typename T>
+template <typename Device, typename T, typename OutputType>
 class MultinomialOp : public OpKernel {
  public:
   explicit MultinomialOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -195,11 +195,11 @@ class MultinomialOp : public OpKernel {
       if (std::is_same<Device, CPUDevice>::value) num_samples_ceil_4 *= 2;
       auto rng =
           generator_.ReserveRandomOutputs(batch_size * num_samples_ceil_4, 256);
-      functor::MultinomialFunctor<Device, T>()(
+      functor::MultinomialFunctor<Device, T, OutputType>()(
           ctx, ctx->eigen_device<Device>(), logits_t.matrix<T>(),
           noises.flat<float>(), scores.flat<float>(), scratch.flat<float>(),
           batch_size, num_classes, num_samples, rng,
-          samples_t->matrix<int64>());
+          samples_t->matrix<OutputType>());
     }
   }
 
@@ -209,10 +209,17 @@ class MultinomialOp : public OpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(MultinomialOp);
 };
 
-#define REGISTER(TYPE)                                                  \
-  REGISTER_KERNEL_BUILDER(                                              \
-      Name("Multinomial").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
-      MultinomialOp<CPUDevice, TYPE>);
+#define REGISTER(TYPE)                                                   \
+  REGISTER_KERNEL_BUILDER(Name("Multinomial")                            \
+                              .Device(DEVICE_CPU)                        \
+                              .TypeConstraint<TYPE>("T")                 \
+                              .TypeConstraint("output_dtype", DT_INT32), \
+                          MultinomialOp<CPUDevice, TYPE, int32>);        \
+  REGISTER_KERNEL_BUILDER(Name("Multinomial")                            \
+                              .Device(DEVICE_CPU)                        \
+                              .TypeConstraint<TYPE>("T")                 \
+                              .TypeConstraint("output_dtype", DT_INT64), \
+                          MultinomialOp<CPUDevice, TYPE, int64>);
 
 TF_CALL_half(REGISTER);
 TF_CALL_float(REGISTER);
@@ -220,12 +227,20 @@ TF_CALL_double(REGISTER);
 #undef REGISTER
 
 #if GOOGLE_CUDA
-#define REGISTER(TYPE)                                    \
-  REGISTER_KERNEL_BUILDER(Name("Multinomial")             \
-                              .Device(DEVICE_GPU)         \
-                              .HostMemory("num_samples")  \
-                              .TypeConstraint<TYPE>("T"), \
-                          MultinomialOp<GPUDevice, TYPE>)
+#define REGISTER(TYPE)                                                   \
+  REGISTER_KERNEL_BUILDER(Name("Multinomial")                            \
+                              .Device(DEVICE_GPU)                        \
+                              .HostMemory("num_samples")                 \
+                              .TypeConstraint<TYPE>("T")                 \
+                              .TypeConstraint("output_dtype", DT_INT32), \
+                          MultinomialOp<GPUDevice, TYPE, int32>)         \
+  REGISTER_KERNEL_BUILDER(Name("Multinomial")                            \
+                              .Device(DEVICE_GPU)                        \
+                              .HostMemory("num_samples")                 \
+                              .TypeConstraint<TYPE>("T")                 \
+                              .TypeConstraint("output_dtype", DT_INT64), \
+                          MultinomialOp<GPUDevice, TYPE, int64>)
+
 TF_CALL_half(REGISTER);
 TF_CALL_float(REGISTER);
 TF_CALL_double(REGISTER);
diff --git a/tensorflow/core/kernels/multinomial_op.h b/tensorflow/core/kernels/multinomial_op.h
index af5e81f219..6e41060aa4 100644
--- a/tensorflow/core/kernels/multinomial_op.h
+++ b/tensorflow/core/kernels/multinomial_op.h
@@ -21,7 +21,7 @@ namespace tensorflow {
 namespace functor {
 
 // Generic helper functor for the Multinomial Op.
-template <typename Device, typename T>
+template <typename Device, typename T, typename OutputType>
 struct MultinomialFunctor;
 
 }  // namespace functor
diff --git a/tensorflow/core/kernels/multinomial_op_gpu.cu.cc b/tensorflow/core/kernels/multinomial_op_gpu.cu.cc
index 19b4f3ca55..5cc5877cce 100644
--- a/tensorflow/core/kernels/multinomial_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/multinomial_op_gpu.cu.cc
@@ -37,20 +37,22 @@ using GPUDevice = Eigen::GpuDevice;
 
 // Kernel for Multinomial op.  Data is interpreted to have the following shapes:
 //   scores: [B, S, C];  maxima: [B, S];  output: [B, S].
+template <typename OutputType>
 __global__ void MultinomialKernel(int32 nthreads, const int32 num_classes,
                                   const int32 num_samples, const float* scores,
-                                  const float* maxima, int64* output) {
+                                  const float* maxima, OutputType* output) {
   CUDA_1D_KERNEL_LOOP(index, nthreads) {
     const int maxima_idx = index / num_classes;
     if (ldg(maxima + maxima_idx) == ldg(scores + index)) {
-      CudaAtomicMax(reinterpret_cast<uint64*>(output + maxima_idx),
-                    static_cast<uint64>(index % num_classes));
+      using UnsignedOutputType = typename std::make_unsigned<OutputType>::type;
+      CudaAtomicMax(reinterpret_cast<UnsignedOutputType*>(output + maxima_idx),
+                    static_cast<UnsignedOutputType>(index % num_classes));
     }
   }
 }
 
-template <typename T>
-struct MultinomialFunctor<GPUDevice, T> {
+template <typename T, typename OutputType>
+struct MultinomialFunctor<GPUDevice, T, OutputType> {
   void operator()(OpKernelContext* ctx, const GPUDevice& d,
                   typename TTypes<T>::ConstMatrix logits,
                   typename TTypes<float>::Flat noises,
@@ -58,7 +60,7 @@ struct MultinomialFunctor<GPUDevice, T> {
                   typename TTypes<float>::Flat maxima, int batch_size,
                   int num_classes, int num_samples,
                   const random::PhiloxRandom& gen,
-                  typename TTypes<int64>::Matrix output) {
+                  typename TTypes<OutputType>::Matrix output) {
     // Uniform, [0, 1).
     typedef random::UniformDistribution<random::PhiloxRandom, float> Dist;
     functor::FillPhiloxRandom<GPUDevice, Dist>()(ctx, d, gen, noises.data(),
@@ -111,11 +113,17 @@ struct MultinomialFunctor<GPUDevice, T> {
 };
 
 // Explicit instantiation of the GPU functors.
-template struct MultinomialFunctor<GPUDevice, Eigen::half>;
-template struct MultinomialFunctor<GPUDevice, float>;
-template struct MultinomialFunctor<GPUDevice, double>;
-template struct MultinomialFunctor<GPUDevice, int32>;
-template struct MultinomialFunctor<GPUDevice, int64>;
+template struct MultinomialFunctor<GPUDevice, Eigen::half, int32>;
+template struct MultinomialFunctor<GPUDevice, float, int32>;
+template struct MultinomialFunctor<GPUDevice, double, int32>;
+template struct MultinomialFunctor<GPUDevice, int32, int32>;
+template struct MultinomialFunctor<GPUDevice, int64, int32>;
+
+template struct MultinomialFunctor<GPUDevice, Eigen::half, int64>;
+template struct MultinomialFunctor<GPUDevice, float, int64>;
+template struct MultinomialFunctor<GPUDevice, double, int64>;
+template struct MultinomialFunctor<GPUDevice, int32, int64>;
+template struct MultinomialFunctor<GPUDevice, int64, int64>;
 
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/random_ops.cc b/tensorflow/core/ops/random_ops.cc
index 2429171fa9..5a436fb93e 100644
--- a/tensorflow/core/ops/random_ops.cc
+++ b/tensorflow/core/ops/random_ops.cc
@@ -201,10 +201,11 @@ REGISTER_OP("Multinomial")
     .SetIsStateful()
     .Input("logits: T")
     .Input("num_samples: int32")
-    .Output("output: int64")
+    .Output("output: output_dtype")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .Attr("T: realnumbertype")
+    .Attr("output_dtype: {int32, int64} = DT_INT64")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle logits_shape;
       ShapeHandle unused;
diff --git a/tensorflow/python/kernel_tests/random/multinomial_op_test.py b/tensorflow/python/kernel_tests/random/multinomial_op_test.py
index ca48ba6cad..a9dc7b7de0 100644
--- a/tensorflow/python/kernel_tests/random/multinomial_op_test.py
+++ b/tensorflow/python/kernel_tests/random/multinomial_op_test.py
@@ -57,12 +57,14 @@ class MultinomialTest(test.TestCase):
   @test_util.run_in_graph_and_eager_modes()
   def testSmallEntropy(self):
     random_seed.set_random_seed(1618)
-    with test_util.device(use_gpu=True):
-      # A logit value of -10 corresponds to a probability of ~5e-5.
-      logits = constant_op.constant([[-10., 10., -10.], [-10., -10., 10.]])
-      num_samples = 1000
-      samples = self.evaluate(random_ops.multinomial(logits, num_samples))
-      self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples)
+    for output_dtype in [np.int32, np.int64]:
+      with test_util.device(use_gpu=True):
+        # A logit value of -10 corresponds to a probability of ~5e-5.
+        logits = constant_op.constant([[-10., 10., -10.], [-10., -10., 10.]])
+        num_samples = 1000
+        samples = self.evaluate(random_ops.multinomial(
+            logits, num_samples, output_dtype=output_dtype))
+        self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples)
 
   def testOneOpMultipleStepsIndependent(self):
     with self.test_session(use_gpu=True) as sess:
diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py
index 52fb5131cf..afaff8ca41 100644
--- a/tensorflow/python/ops/random_ops.py
+++ b/tensorflow/python/ops/random_ops.py
@@ -316,7 +316,7 @@ def random_crop(value, size, seed=None, name=None):
     return array_ops.slice(value, offset, size, name=name)
 
 
-def multinomial(logits, num_samples, seed=None, name=None):
+def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None):
   """Draws samples from a multinomial distribution.
 
   Example:
@@ -336,6 +336,7 @@ def multinomial(logits, num_samples, seed=None, name=None):
       @{tf.set_random_seed}
       for behavior.
     name: Optional name for the operation.
+    output_dtype: integer type to use for the output. Defaults to int64.
 
   Returns:
     The drawn samples of shape `[batch_size, num_samples]`.
@@ -344,7 +345,7 @@ def multinomial(logits, num_samples, seed=None, name=None):
     logits = ops.convert_to_tensor(logits, name="logits")
     seed1, seed2 = random_seed.get_seed(seed)
     return gen_random_ops.multinomial(
-        logits, num_samples, seed=seed1, seed2=seed2)
+        logits, num_samples, seed=seed1, seed2=seed2, output_dtype=output_dtype)
 
 
 ops.NotDifferentiable("Multinomial")
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index 0edd4153d7..57573d5024 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -1394,7 +1394,7 @@ tf_module {
   }
   member_method {
     name: "multinomial"
-    argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+    argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\', \'output_dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "multiply"
-- 
GitLab


From 976049bb0bcdebe10d0a67f6c843f2b51eb1348c Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 30 Nov 2017 09:09:16 -0800
Subject: [PATCH 0451/1225] Implement Python-specific device and colocation
 logic in import_graph_def with C API enabled.

PiperOrigin-RevId: 177462553
---
 tensorflow/python/framework/importer.py      |  63 ++++++++++-
 tensorflow/python/framework/importer_test.py | 108 ++++++++-----------
 tensorflow/python/framework/ops.py           |   6 +-
 3 files changed, 107 insertions(+), 70 deletions(-)

diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index 73c35de578..ada8c30fab 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -251,10 +251,67 @@ def _PopulateTFImportGraphDefOptions(options, prefix, input_map,
 
 def _ProcessNewOps(graph):
   """Processes the newly-added TF_Operations in `graph`."""
-  for c_op in c_api_util.new_tf_operations(graph):
-    graph._create_op_from_tf_operation(c_op)  # pylint: disable=protected-access
+  # Maps from a node to the names of the ops it's colocated with, if colocation
+  # is specified in the attributes.
+  colocation_pairs = {}
 
-  # TODO(skyewm): colocation logic
+  for c_op in c_api_util.new_tf_operations(graph):
+    # pylint: disable=protected-access
+    new_op = graph._create_op_from_tf_operation(c_op, compute_device=False)
+    # pylint: enable=protected-access
+
+    colocation_names = _GetColocationNames(new_op)
+    if colocation_names:
+      colocation_pairs[new_op] = colocation_names
+      # Don't apply this op's device function, since colocation constraints
+      # override device functions. Note that this op's device may still be set
+      # by the loop below.
+    else:
+      with _MaybeDevice(new_op.device):
+        graph._apply_device_functions(new_op)  # pylint: disable=protected-access
+
+  # The following loop populates the device field of ops that are colocated
+  # with another op.  This is implied by the colocation attribute, but we
+  # propagate the device field for completeness.
+  for op, coloc_op_list in colocation_pairs.items():
+    coloc_device = None
+    # Find any device in the list of colocated ops that have a device, if it
+    # exists.  We assume that if multiple ops have devices, they refer to the
+    # same device.  Otherwise, a runtime error will occur since the colocation
+    # property cannot be guaranteed.
+    #
+    # One possible improvement is to try to check for compatibility of all
+    # devices in this list at import time here, which would require
+    # implementing a compatibility function for device specs in python.
+    for coloc_op_name in coloc_op_list:
+      try:
+        coloc_op = graph._get_operation_by_name_unsafe(coloc_op_name)  # pylint: disable=protected-access
+      except KeyError:
+        raise ValueError('Specified colocation to an op that '
+                         'does not exist during import: %s in %s' % (
+                             coloc_op_name, op.name))
+      if coloc_op.device:
+        coloc_device = pydev.DeviceSpec.from_string(coloc_op.device)
+        break
+    if coloc_device:
+      op._set_device(coloc_device)  # pylint: disable=protected-access
+
+
+def _GetColocationNames(op):
+  """Returns names of the ops that `op` should be colocated with."""
+  colocation_names = []
+  try:
+    class_values = op.get_attr('_class')
+  except ValueError:
+    # No _class attr
+    return
+  for val in class_values:
+    val = compat.as_str(val)
+    if val.startswith('loc:@'):
+      colocation_node_name = val[len('loc:@'):]
+      if colocation_node_name != op.name:
+        colocation_names.append(colocation_node_name)
+  return colocation_names
 
 
 def _GatherReturnElements(requested_return_elements, graph, results):
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index 000a88bc09..4a215abd2e 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -642,8 +642,6 @@ class ImportGraphDefTest(test.TestCase):
           b.node_def.attr["_class"])
 
   def testColocationWithDeviceFn(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None' attr {
             key: '_class'
@@ -665,23 +663,17 @@ class ImportGraphDefTest(test.TestCase):
 
     with ops.Graph().as_default():
       with ops.device(CustomDeviceFn):
-        b, = importer.import_graph_def(
-            original_graph_def, return_elements=["B"], name="imported_graph")
-
-      self.assertProtoEqualsVersion("""
-          node { name: 'imported_graph/A' op: 'None' device: "/device:A:0"
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-                }
-          }
-          node { name: 'imported_graph/B' op: 'None' device: "/device:A:0"
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-          } }""", b.graph.as_graph_def())
-
-    # Test a scenario where 'A' doesn't get a device; 'A' should
-    # not have a device, but during runtime will get colocated with
-    # 'B' because of the colocation attribute.
+        a, b = importer.import_graph_def(original_graph_def,
+                                         return_elements=["A", "B"],
+                                         name="imported_graph")
+      self.assertEqual(a.device, "/device:A:0")
+      self.assertEqual(b.device, "/device:A:0")
+      self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"])
+      self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/A"])
+
+    # Test a scenario where 'A' doesn't get a device; 'A' should not have a
+    # device, but during runtime will get colocated with 'B' because of the
+    # colocation attribute. B's device function is still overridden by A.
     def BDeviceFn(op):
       if "B" in op.name:
         return "/device:B:0"
@@ -689,19 +681,13 @@ class ImportGraphDefTest(test.TestCase):
 
     with ops.Graph().as_default():
       with ops.device(BDeviceFn):
-        b, = importer.import_graph_def(
-            original_graph_def, return_elements=["B"], name="imported_graph")
-
-      self.assertProtoEqualsVersion("""
-          node { name: 'imported_graph/A' op: 'None'
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-                }
-          }
-          node { name: 'imported_graph/B' op: 'None'
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-          } }""", b.graph.as_graph_def())
+        a, b = importer.import_graph_def(original_graph_def,
+                                         return_elements=["A", "B"],
+                                         name="imported_graph")
+      self.assertEqual(a.device, "")
+      self.assertEqual(b.device, "")
+      self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"])
+      self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/A"])
 
     # Only A gets a device, so B inherits it implicitly.
     def ADeviceFn(op):
@@ -711,23 +697,15 @@ class ImportGraphDefTest(test.TestCase):
 
     with ops.Graph().as_default():
       with ops.device(ADeviceFn):
-        b, = importer.import_graph_def(
-            original_graph_def, return_elements=["B"], name="imported_graph")
-
-      self.assertProtoEqualsVersion("""
-          node { name: 'imported_graph/A' op: 'None' device: "/device:A:0"
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-                }
-          }
-          node { name: 'imported_graph/B' op: 'None' device: "/device:A:0"
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-          } }""", b.graph.as_graph_def())
+        a, b = importer.import_graph_def(original_graph_def,
+                                         return_elements=["A", "B"],
+                                         name="imported_graph")
+      self.assertEqual(a.device, "/device:A:0")
+      self.assertEqual(b.device, "/device:A:0")
+      self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"])
+      self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/A"])
 
   def testMultipleColocationWithDeviceFn(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None'}
           node { name: 'B' op: 'None'}
@@ -748,23 +726,19 @@ class ImportGraphDefTest(test.TestCase):
 
     with ops.Graph().as_default():
       with ops.device(CustomDeviceFn):
-        c, = importer.import_graph_def(
-            original_graph_def, return_elements=["C"], name="imported_graph")
-
-      self.assertProtoEqualsVersion("""
-          node { name: 'imported_graph/A' op: 'None' }
-          node { name: 'imported_graph/B' op: 'None' device: "/device:B:0" }
-          node { name: 'imported_graph/C' op: 'None' device: "/device:B:0"
-                 attr {
-                   key: '_class' value {
-                     list { s: 'loc:@imported_graph/A'
-                            s: 'loc:@imported_graph/B' }
-                   }
-                 }
-               }""", c.graph.as_graph_def())
+        a, b, c = importer.import_graph_def(original_graph_def,
+                                            return_elements=["A", "B", "C"],
+                                            name="imported_graph")
+      self.assertEqual(a.device, "")
+      self.assertEqual(b.device, "/device:B:0")
+      self.assertEqual(c.device, "/device:B:0")
+      self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"])
+      self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/B"])
+      self.assertEqual(c.colocation_groups(),
+                       [b"loc:@imported_graph/A", b"loc:@imported_graph/B"])
 
   def testNamePrefixColocationAttrsMultipleImport(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API: return  # TODO(skyewm): set uniquify_names
 
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None' }
@@ -791,15 +765,19 @@ class ImportGraphDefTest(test.TestCase):
           } }""", b.graph.as_graph_def())
 
   def testNamePrefixColocationAttrsNotFound(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     original_graph_def = self._MakeGraphDef("""
           node { name: 'B' op: 'None'  attr {
             key: '_class'
             value { list { s: 'loc:@A' } }
           } }""")
+
+    if ops._USE_C_API:
+      error_msg = "Node 'B' expects to be colocated with unknown node 'A'"
+    else:
+      error_msg = "does not exist during import"
+
     with ops.Graph().as_default():
-      with self.assertRaisesRegexp(ValueError, "does not exist during import"):
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             original_graph_def, return_elements=["B"], name="imported_graph")
 
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 2217513966..a616b15cf7 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -3102,7 +3102,7 @@ class Graph(object):
                            compute_device=compute_device)
     return ret
 
-  def _create_op_from_tf_operation(self, c_op):
+  def _create_op_from_tf_operation(self, c_op, compute_device=True):
     """Creates an `Operation` in this graph from the supplied TF_Operation.
 
     This method is like create_op() except the new Operation is constructed
@@ -3112,6 +3112,8 @@ class Graph(object):
 
     Args:
       c_op: a wrapped TF_Operation
+      compute_device: (Optional.) If True, device functions will be executed
+        to compute the device property of the Operation.
 
     Returns:
       An `Operation` object.
@@ -3122,7 +3124,7 @@ class Graph(object):
                     for output in tf_outputs)
     control_inputs = self._control_dependencies_for_inputs(input_ops)
     ret = Operation(c_op, self, control_inputs=control_inputs)
-    self._create_op_helper(ret)
+    self._create_op_helper(ret, compute_device=compute_device)
     return ret
 
   def _create_op_helper(self, op, compute_shapes=True, compute_device=True):
-- 
GitLab


From 5d52b95279be57076a794c2f334c150a26566360 Mon Sep 17 00:00:00 2001
From: Vishvananda Ishaya Abrams <vishvananda@gmail.com>
Date: Wed, 29 Nov 2017 22:25:37 -0800
Subject: [PATCH 0452/1225] Adds Operations() method to Graph

There is currently no way to list all of the operations in a graph
from the go api. This patch ads an Operations() method to retrieve the
list using the existing TF_GraphNextOperation c api. The graph_test
was modified to include testing this new method.

Signed-off-by: Vishvananda Ishaya Abrams <vishvananda@gmail.com>
---
 tensorflow/go/graph.go      | 14 ++++++++++++++
 tensorflow/go/graph_test.go | 22 +++++++++++++++++++---
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go
index 46c600eab1..a40aded3bf 100644
--- a/tensorflow/go/graph.go
+++ b/tensorflow/go/graph.go
@@ -114,6 +114,20 @@ func (g *Graph) Operation(name string) *Operation {
 	return &Operation{cop, g}
 }
 
+// Operations returns a list of all operations in the graph
+func (g *Graph) Operations() []Operation {
+	var pos C.size_t = 0
+	ops := []Operation{}
+	for {
+		cop := C.TF_GraphNextOperation(g.c, &pos)
+		if cop == nil {
+			break
+		}
+		ops = append(ops, Operation{cop, g})
+	}
+	return ops
+}
+
 // OpSpec is the specification of an Operation to be added to a Graph
 // (using Graph.AddOperation).
 type OpSpec struct {
diff --git a/tensorflow/go/graph_test.go b/tensorflow/go/graph_test.go
index c3120bc720..b8d65c54f6 100644
--- a/tensorflow/go/graph_test.go
+++ b/tensorflow/go/graph_test.go
@@ -29,10 +29,26 @@ func hasOperations(g *Graph, ops ...string) error {
 			missing = append(missing, op)
 		}
 	}
-	if len(missing) == 0 {
-		return nil
+	if len(missing) != 0 {
+		return fmt.Errorf("Graph does not have the operations %v", missing)
 	}
-	return fmt.Errorf("Graph does not have the operations %v", missing)
+
+	inList := map[string]bool{}
+	for _, op := range g.Operations() {
+		inList[op.Name()] = true
+	}
+
+	for _, op := range ops {
+		if !inList[op] {
+			missing = append(missing, op)
+		}
+	}
+
+	if len(missing) != 0 {
+		return fmt.Errorf("Operations %v are missing from graph.Operations()", missing)
+	}
+
+	return nil
 }
 
 func TestGraphWriteToAndImport(t *testing.T) {
-- 
GitLab


From 1c4810141e71289d71bfd94a74434bd09ee6b20f Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 30 Nov 2017 09:27:16 -0800
Subject: [PATCH 0453/1225] Hoist function input placeholders out of any
 control flow context.

Prior to this change, functions that closed over external tensors in a
while loop would cause a segfault at runtime. This is because the
external tensors are temporarily represented as placeholders in the
function body before being replaced by input parameters, and the
placeholders would be created directly in the while loop body. This
would eventually lead to using the input tensor in the while loop body
without an enter node. This wasn't caught by the runtime check because
it isn't applied to function bodies.

This change adds tests for capturing tensors in a while loop body and
in a cond context. Note that the cond test passed without this fix.

PiperOrigin-RevId: 177464541
---
 tensorflow/python/framework/function.py      |  5 ++-
 tensorflow/python/framework/function_test.py | 32 ++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 29cf223724..366025a0d8 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -692,7 +692,10 @@ class _FuncGraph(ops.Graph):
         else:
           # Substitute with a placeholder.
           self.extra_inputs.append(x)
-          ph = array_ops.placeholder(x.dtype, shape=x.get_shape())
+          # Hoist the new input placeholder out of any control flow context
+          # we're currently in.
+          with ops.control_dependencies(None):
+            ph = array_ops.placeholder(x.dtype, shape=x.get_shape())
           # pylint: disable=protected-access
           ph._handle_data = x._handle_data
           # pylint: enable=protected-access
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index ba43e9199b..11f343c579 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -724,6 +724,38 @@ class FunctionTest(test.TestCase):
         # NOTE: We still do not support capturing control deps.
         _ = Foo(x)
 
+  def testCaptureInWhileLoop(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = constant_op.constant(1)
+
+      @function.Defun()
+      def Foo():
+        return control_flow_ops.while_loop(lambda i: i < 10,
+                                           lambda i: i + x,
+                                           [0])
+      y = Foo()
+
+    with self.test_session(graph=g) as sess:
+      self.assertEqual(sess.run(y), 10)
+
+  def testCaptureInCond(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = constant_op.constant(1)
+
+      @function.Defun(dtypes.bool)
+      def Foo(pred):
+        return control_flow_ops.cond(pred,
+                                     lambda: x,
+                                     lambda: x + 1)
+      y = Foo(True)
+      z = Foo(False)
+
+    with self.test_session(graph=g) as sess:
+      self.assertEqual(sess.run(y), 1)
+      self.assertEqual(sess.run(z), 2)
+
   def testStableName(self):
 
     @function.Defun()
-- 
GitLab


From 9308470197bcc068dca9fe227d0ab144157950e1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 09:52:28 -0800
Subject: [PATCH 0454/1225] Rename tests.

PiperOrigin-RevId: 177467740
---
 ...se_am_model_test.cc => speech_asr_am_model_test.cc} | 10 +++++-----
 ...se_lm_model_test.cc => speech_asr_lm_model_test.cc} |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)
 rename tensorflow/contrib/lite/models/{speech_terse_am_model_test.cc => speech_asr_am_model_test.cc} (93%)
 rename tensorflow/contrib/lite/models/{speech_terse_lm_model_test.cc => speech_asr_lm_model_test.cc} (94%)

diff --git a/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc b/tensorflow/contrib/lite/models/speech_asr_am_model_test.cc
similarity index 93%
rename from tensorflow/contrib/lite/models/speech_terse_am_model_test.cc
rename to tensorflow/contrib/lite/models/speech_asr_am_model_test.cc
index 30d89a1354..bf95b313f3 100644
--- a/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc
+++ b/tensorflow/contrib/lite/models/speech_asr_am_model_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-// Unit test for speech TERSE AM model using TFLite Ops.
+// Unit test for speech ASR AM model using TFLite Ops.
 
 #include <string.h>
 
@@ -45,10 +45,10 @@ constexpr int kLstmLayer5OutputStateTensor = 103;
 constexpr int kLstmLayer5CellStateTensor = 104;
 constexpr int kModelOutputTensor = 109;
 
-TEST(SpeechTerseAm, RandomIOTest) {
+TEST(SpeechAsrAm, RandomIOTest) {
   // Read the model.
   string tflite_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_am_model.tflite");
+      file::JoinPath(TestDataPath(), "speech_asr_am_model.tflite");
   auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str());
   CHECK(model) << "Failed to mmap model " << tflite_file_path;
 
@@ -62,13 +62,13 @@ TEST(SpeechTerseAm, RandomIOTest) {
   // Load the input frames.
   Frames input_frames;
   const string input_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_am_model_in.csv");
+      file::JoinPath(TestDataPath(), "speech_asr_am_model_in.csv");
   ReadFrames(input_file_path, &input_frames);
 
   // Load the golden output results.
   Frames output_frames;
   const string output_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_am_model_out.csv");
+      file::JoinPath(TestDataPath(), "speech_asr_am_model_out.csv");
   ReadFrames(output_file_path, &output_frames);
 
   const int speech_batch_size =
diff --git a/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc b/tensorflow/contrib/lite/models/speech_asr_lm_model_test.cc
similarity index 94%
rename from tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc
rename to tensorflow/contrib/lite/models/speech_asr_lm_model_test.cc
index 04c54ffb22..53f2b66da4 100644
--- a/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc
+++ b/tensorflow/contrib/lite/models/speech_asr_lm_model_test.cc
@@ -59,10 +59,10 @@ static void ClearLstmStates(Interpreter* interpreter) {
          interpreter->tensor(kLstmLayer3CellStateTensor)->bytes);
 }
 
-TEST(SpeechTerseLm, EndToEndTest) {
+TEST(SpeechAsrLm, EndToEndTest) {
   // Read the model.
   string tflite_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_lm_model.tflite");
+      file::JoinPath(TestDataPath(), "speech_asr_lm_model.tflite");
   auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str());
   CHECK(model) << "Failed to mmap model " << tflite_file_path;
 
@@ -76,13 +76,13 @@ TEST(SpeechTerseLm, EndToEndTest) {
   // Load the input frames.
   Frames input_frames;
   const string input_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_lm_model_in.csv");
+      file::JoinPath(TestDataPath(), "speech_asr_lm_model_in.csv");
   ReadFrames(input_file_path, &input_frames);
 
   // Load the golden output results.
   Frames output_frames;
   const string output_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_lm_model_out.csv");
+      file::JoinPath(TestDataPath(), "speech_asr_lm_model_out.csv");
   ReadFrames(output_file_path, &output_frames);
 
   CHECK_EQ(interpreter->tensor(kModelInput1Tensor)->dims->size, 1);
-- 
GitLab


From f283173062f3ff9b6f69e8fc8a77421dcfdaa8f2 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 30 Nov 2017 09:56:07 -0800
Subject: [PATCH 0455/1225] [TF:XLA] Add support for the V2 variants of the
 FusedBatchNorm operators, which support mixed precision training.

Until the necessary support for mixed precision fused batch norm is added to XLA, implement by casting to a common type.

PiperOrigin-RevId: 177468202
---
 .../compiler/tf2xla/kernels/batch_norm_op.cc  | 74 ++++++++++++++-----
 1 file changed, 55 insertions(+), 19 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
index 248e9d111e..468af34aab 100644
--- a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 // XLA implementation of BatchNorm operations.
-#include "tensorflow/compiler/tf2xla/literal_util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -42,27 +42,44 @@ class FusedBatchNormOp : public XlaOpKernel {
   }
 
   void Compile(XlaOpKernelContext* ctx) override {
+    xla::PrimitiveType input_type;
+    OP_REQUIRES_OK(ctx,
+                   DataTypeToPrimitiveType(ctx->input_type(0), &input_type));
+    xla::PrimitiveType stats_type;
+    OP_REQUIRES_OK(ctx,
+                   DataTypeToPrimitiveType(ctx->input_type(1), &stats_type));
+
+    xla::ComputationBuilder* builder = ctx->builder();
+
+    xla::ComputationDataHandle input = ctx->Input(0);
+
+    // TODO(b/69928690): support mixed precision in the XLA batch normalization
+    // operators. As a workaround, cast everything to the statistics type (which
+    // may be more precise than the input type).
+    input = builder->ConvertElementType(input, stats_type);
+
     if (is_training_) {
-      xla::ComputationDataHandle output = ctx->builder()->BatchNormTraining(
-          ctx->Input(0), ctx->Input(1), ctx->Input(2), epsilon_,
-          feature_index_);
+      xla::ComputationDataHandle output = builder->BatchNormTraining(
+          input, ctx->Input(1), ctx->Input(2), epsilon_, feature_index_);
 
       // In training mode, outputs the normalized value as well as the
       // calculated mean and variance.
-      for (int i = 0; i < 3; i++) {
-        ctx->SetOutput(i, ctx->builder()->GetTupleElement(output, i));
-      }
+      ctx->SetOutput(0, builder->ConvertElementType(
+                            builder->GetTupleElement(output, 0), input_type));
+      ctx->SetOutput(1, builder->GetTupleElement(output, 1));
+      ctx->SetOutput(2, builder->GetTupleElement(output, 2));
+
       // Output 3 and 4 for "FusedBatchNorm" are currently marked as "reserved
       // space 1 & 2". They are used to pass the per-batch mean and
       // variance to the gradient. Here we maintain the same behavior by setting
       // them to the mean and variance calculated by BatchNormTraining.
-      ctx->SetOutput(3, ctx->builder()->GetTupleElement(output, 1));
-      ctx->SetOutput(4, ctx->builder()->GetTupleElement(output, 2));
+      ctx->SetOutput(3, builder->GetTupleElement(output, 1));
+      ctx->SetOutput(4, builder->GetTupleElement(output, 2));
     } else {
-      xla::ComputationDataHandle output = ctx->builder()->BatchNormInference(
-          ctx->Input(0), ctx->Input(1), ctx->Input(2), ctx->Input(3),
-          ctx->Input(4), epsilon_, feature_index_);
-      ctx->SetOutput(0, output);
+      xla::ComputationDataHandle output = builder->BatchNormInference(
+          input, ctx->Input(1), ctx->Input(2), ctx->Input(3), ctx->Input(4),
+          epsilon_, feature_index_);
+      ctx->SetOutput(0, builder->ConvertElementType(output, input_type));
       // Directly send input to output as mean and variance in inference mode.
       ctx->SetOutput(1, ctx->Input(3));
       ctx->SetOutput(2, ctx->Input(4));
@@ -78,6 +95,7 @@ class FusedBatchNormOp : public XlaOpKernel {
 };
 
 REGISTER_XLA_OP(Name("FusedBatchNorm"), FusedBatchNormOp);
+REGISTER_XLA_OP(Name("FusedBatchNormV2"), FusedBatchNormOp);
 
 class FusedBatchNormGradOp : public XlaOpKernel {
  public:
@@ -101,19 +119,36 @@ class FusedBatchNormGradOp : public XlaOpKernel {
   }
 
   void Compile(XlaOpKernelContext* ctx) override {
+    xla::ComputationBuilder* builder = ctx->builder();
+
     auto grad_output = ctx->Input(0);
     auto activation = ctx->Input(1);
     auto scale = ctx->Input(2);
     auto mean = ctx->Input(3);
     auto var = ctx->Input(4);
-    xla::ComputationDataHandle output = ctx->builder()->BatchNormGrad(
+
+    xla::PrimitiveType input_type;
+    OP_REQUIRES_OK(ctx,
+                   DataTypeToPrimitiveType(ctx->input_type(0), &input_type));
+    xla::PrimitiveType stats_type;
+    OP_REQUIRES_OK(ctx,
+                   DataTypeToPrimitiveType(ctx->input_type(3), &stats_type));
+
+    // TODO(b/69928690): support mixed precision in the XLA batch normalization
+    // operators. As a workaround, cast everything to the statistics type (which
+    // may be more precise than the input type).
+    grad_output = builder->ConvertElementType(grad_output, stats_type);
+    activation = builder->ConvertElementType(activation, stats_type);
+
+    xla::ComputationDataHandle output = builder->BatchNormGrad(
         activation, scale, mean, var, grad_output, epsilon_, feature_index_);
 
-    for (int i = 0; i < 3; i++) {
-      ctx->SetOutput(i, ctx->builder()->GetTupleElement(output, i));
-    }
-    ctx->SetOutput(3, ctx->builder()->GetTupleElement(output, 1));
-    ctx->SetOutput(4, ctx->builder()->GetTupleElement(output, 2));
+    ctx->SetOutput(0, builder->ConvertElementType(
+                          builder->GetTupleElement(output, 0), input_type));
+    ctx->SetOutput(1, builder->GetTupleElement(output, 1));
+    ctx->SetOutput(2, builder->GetTupleElement(output, 2));
+    ctx->SetOutput(3, builder->GetTupleElement(output, 1));
+    ctx->SetOutput(4, builder->GetTupleElement(output, 2));
   }
 
  private:
@@ -122,6 +157,7 @@ class FusedBatchNormGradOp : public XlaOpKernel {
 };
 
 REGISTER_XLA_OP(Name("FusedBatchNormGrad"), FusedBatchNormGradOp);
+REGISTER_XLA_OP(Name("FusedBatchNormGradV2"), FusedBatchNormGradOp);
 
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From 12976748822cdb3885f37dbda42ce8674afa6f91 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Thu, 30 Nov 2017 10:09:52 -0800
Subject: [PATCH 0456/1225] Uses C API for eager functions.

Rolls back the rollback with some swiggery to get python3 to work.

PiperOrigin-RevId: 177470328
---
 tensorflow/python/eager/backprop.py           |   2 +-
 tensorflow/python/eager/context.py            |  15 ++
 tensorflow/python/eager/function.py           | 144 ++++++++++++------
 tensorflow/python/eager/graph_callable.py     |  18 ++-
 .../python/eager/graph_callable_test.py       |   1 -
 tensorflow/python/framework/ops.py            |  30 ++--
 tensorflow/python/pywrap_tfe.i                |   3 +-
 7 files changed, 143 insertions(+), 70 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 0144f3b1e5..dc1142705a 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -540,7 +540,7 @@ def _ensure_unique_tensor_objects(parameter_positions, args):
     if i in parameter_positions:
       tid = ops.tensor_id(t)
       if tid in s:
-        args[i] = args[i]._dup()  # pylint: disable=protected-access
+        args[i] = gen_array_ops.identity(args[i])
       else:
         s.add(tid)
   return args
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 92f4e15c05..415416cfae 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -288,6 +288,21 @@ class Context(object):
     self._initialize_handle_and_devices()
     return self._num_gpus
 
+  def add_function(self, fn):
+    """Add a function definition to the context.
+
+    Once added, the function (identified by its name) can be executed like any
+    other operation.
+
+    Args:
+      fn: A wrapped TF_Function (returned from TF_GraphToFunction_wrapper).
+    """
+    with errors.raise_exception_on_not_ok_status() as status:
+      pywrap_tensorflow.TFE_ContextAddFunction(
+          self._handle,  # pylint: disable=protected-access
+          fn,
+          status)
+
   def add_function_def(self, fdef):
     """Add a function definition to the context.
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 2f4b59e938..cadabb3a24 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -25,15 +25,19 @@ import threading
 
 import numpy as np
 
+from tensorflow.core.framework import function_pb2
+from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.eager import execute
 from tensorflow.python.eager import tape
 from tensorflow.python.eager.graph_only_ops import graph_placeholder
+from tensorflow.python.framework import c_api_util
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_module
-from tensorflow.python.framework import graph_to_function_def
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gradients_impl
+from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 
@@ -47,10 +51,41 @@ _scoped_captures = threading.local()
 _scoped_captures.tensors = None
 
 
-def make_function_def(graph, operations, inputs, outputs):
-  """Makes function def from the given graph with the operations."""
-  return graph_to_function_def.graph_to_function_def(
-      graph, operations, inputs, outputs)
+def make_function_def(name, graph, operations, inputs, outputs):
+  """Makes FunctionDef proto and defined function.
+
+  Args:
+    name: the function name
+    graph: the graph from which to build the function
+    operations: the operations in the function body
+    inputs: tensors to be used as function arguments
+    outputs: tensors to be returned from the function
+
+  Returns:
+   fdef: a FunctionDef protocol buffer for the function
+   fn: a wrapped TF_Function for the function
+  """
+  with errors.raise_exception_on_not_ok_status() as status:
+    fn = pywrap_tensorflow.TF_GraphToFunction_wrapper(
+        graph._c_graph,  # pylint: disable=protected-access
+        compat.as_str(name),
+        False,
+        [o._c_op for o in operations],  # pylint: disable=protected-access
+        [t._as_tf_output() for t in inputs],  # pylint: disable=protected-access
+        [t._as_tf_output() for t in outputs],  # pylint: disable=protected-access
+        [],
+        None,
+        compat.as_str(""),
+        status)
+  # TODO(apassos) avoid creating a FunctionDef (specially to grab the signature,
+  # but also in general it's nice not to depend on it.
+  with c_api_util.tf_buffer() as buffer_:
+    with errors.raise_exception_on_not_ok_status() as status:
+      pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_, status)
+    proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
+  fdef = function_pb2.FunctionDef()
+  fdef.ParseFromString(compat.as_bytes(proto_data))
+  return fdef, fn
 
 
 @contextlib.contextmanager
@@ -115,6 +150,10 @@ class CapturingGraph(ops.Graph):
     # for resource tensors.
     self._last_op_using_resource_tensor = {}
 
+  # TODO(apassos) remove once the C API is used by default.
+  def _use_c_api_hack(self):
+    return True
+
   def clear_resource_control_flow_state(self):
     self._last_op_using_resource_tensor = {}
 
@@ -207,14 +246,20 @@ def _inference_name(n):
   return "__inference_%s_%s" % (n, ops.uid())
 
 
+# TODO(apassos) get rid of this by splitting framework.function._DefinedFunction
+# so it doesn't have the definition-generating logic and is just a container for
+# an already-defined function.
 class _DefinedFunction(object):
   """Mocks the interface of tf _DefinedFunction."""
 
-  def __init__(self, fdef):
+  def __init__(self, fdef, fn):
     self.definition = fdef
     self.name = fdef.signature.name
+    self.signature = fdef.signature
     self.grad_func_name = None
     self.python_grad_func = None
+    self._c_func = fn
+    self._grad_func = None
 
 
 def _map_sequence_obj_to_idx(sequence):
@@ -250,6 +295,7 @@ class GraphModeFunction(object):
                input_placeholders,
                extra_inputs,
                fdef,
+               fn,
                graph,
                operations,
                func_outputs,
@@ -263,7 +309,7 @@ class GraphModeFunction(object):
     self._graph = graph
     self._has_backprop = False
     self._func_name = fdef.signature.name
-    self._fdef = _DefinedFunction(fdef)
+    self._fdef = _DefinedFunction(fdef, fn)
     self._num_outputs = len(fdef.signature.output_arg)
     self._ops = operations
     self._func_outputs = func_outputs
@@ -283,38 +329,45 @@ class GraphModeFunction(object):
     with self._graph.as_default(), context.graph_mode():
       c = _CapturingContext()
       with c:
-        filtered_outputs = [
-            x for x in self._returns if x is not None
-        ]
+        filtered_outputs = [x for x in self._returns if x is not None]
         self._out_grad_placeholders = [
-            graph_placeholder(x.dtype, x.shape) for x in filtered_outputs
-        ]
+            graph_placeholder(x.dtype, x.shape) for x in filtered_outputs]
         in_gradients = gradients_impl.gradients(
             filtered_outputs,
             self._input_placeholders,
             grad_ys=self._out_grad_placeholders)
-        shapes = [x.shape for x in in_gradients if x is not None]
+        shapes = tuple(x.shape for x in in_gradients if x is not None)
     captures = list(sorted(c.captured_tensors, key=lambda x: x.name))
-    forward_function_def = make_function_def(
-        self._graph, self._ops, self._input_placeholders,
+    forward_name = _forward_name(self._func_name)
+    forward_function_def, forward_fn = make_function_def(
+        forward_name, self._graph, self._ops, self._input_placeholders,
         filtered_outputs + captures)
-    self._forward_fdef = _DefinedFunction(forward_function_def)
-    _register_with_name(_forward_name(self._func_name), forward_function_def)
-    backward_outputs = [x for x in in_gradients if x is not None]
+    self._forward_fdef = _DefinedFunction(forward_function_def, forward_fn)
+    _register(forward_fn)
+    backward_outputs = tuple(x for x in in_gradients if x is not None)
     all_inputs = self._out_grad_placeholders + captures
-    backward_function_def = make_function_def(
-        self._graph, [x.op for x in self._out_grad_placeholders
-                     ] + list(sorted(c.known_ops, key=lambda x: x.name)),
+    # Excluding input ops from the body as we do not intend to execute these
+    # operations when the function is executed.
+    all_ignored_ops = frozenset(x.op for x in all_inputs)
+    # Enforce a deterministic order of operations in the generated graph. This
+    # means rerunning the function-defining code will always define the same
+    # function, which is useful if we serialize this etc.
+    fdef_ops = tuple(x for x in sorted(c.known_ops, key=lambda x: x.name)
+                     if x not in all_ignored_ops)
+    bname = _backward_name(self._func_name)
+    backward_function_def, backward_fn = make_function_def(
+        bname, self._graph, fdef_ops,
         all_inputs, backward_outputs)
-    _register_with_name(_backward_name(self._func_name), backward_function_def)
+    _register(backward_fn)
     self._backward_function = GraphModeFunction(
-        all_inputs, [], backward_function_def, self._graph, c.known_ops,
-        in_gradients, _map_sequence_obj_to_idx(backward_outputs), shapes)
+        all_inputs, [], backward_function_def, backward_fn, self._graph,
+        c.known_ops, in_gradients, _map_sequence_obj_to_idx(backward_outputs),
+        shapes)
 
   def _backprop_call(self, args):
     """Calls the wrapped function and records the result on a tape."""
     all_args = args + self._extra_inputs
-    signature = self._forward_fdef.definition.signature
+    signature = self._forward_fdef.signature
     ctx = context.context()
     if ctx.in_graph_mode():
       g = ops.get_default_graph()
@@ -325,7 +378,7 @@ class GraphModeFunction(object):
         return ops.internal_convert_to_tensor(x, ctx=ctx)
       op = g.create_op(
           signature.name, [make_tensor(x) for x in all_args],
-          [dtypes_module.DType(x.type) for x in signature.output_arg],
+          tuple(dtypes_module.DType(x.type) for x in signature.output_arg),
           op_def=signature,
           name="FunctionCall",
           compute_shapes=False)
@@ -361,11 +414,8 @@ class GraphModeFunction(object):
       if v._trainable:  # pylint: disable=protected-access
         tape.watch_variable(v)
 
-    tensor_inputs = [
-        x for x in nest.flatten(args)
-        if isinstance(x, ops.Tensor)
-    ]
-
+    tensor_inputs = [x for x in nest.flatten(args)
+                     if isinstance(x, ops.Tensor)]
     if tape.should_record(tensor_inputs) or tape.should_record(
         self._extra_inputs):
       if not self._has_backprop:
@@ -384,7 +434,7 @@ class GraphModeFunction(object):
       args = list(tensor_inputs) + self._extra_inputs
       op = g.create_op(
           signature.name, [ops.convert_to_tensor(x) for x in args],
-          [dtypes_module.DType(x.type) for x in signature.output_arg],
+          tuple(dtypes_module.DType(x.type) for x in signature.output_arg),
           op_def=signature,
           name="FunctionCall",
           compute_shapes=False)
@@ -469,29 +519,32 @@ def _defun_internal(name, func, args, kwds):
         extra_inputs = []
         extra_placeholders = []
       outputs_list = nest.flatten(func_outputs)
-      output_shapes = [x.shape for x in outputs_list if x is not None]
+      output_shapes = tuple(x.shape for x in outputs_list if x is not None)
 
-  flat_inputs = [
-      x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor)
-  ]
+  flat_inputs = [x for x in nest.flatten(func_inputs)
+                 if isinstance(x, ops.Tensor)]
   all_inputs = flat_inputs + list(extra_placeholders)
-
+  all_ignored_ops = frozenset(x.op for x in all_inputs)
   func_def_outputs = [x for x in outputs_list if x is not None]
-  inference_function_def = make_function_def(
-      tmp_graph, tmp_graph.get_operations(), all_inputs, func_def_outputs)
+  fname = _inference_name(name)
+  operations = tuple(x for x in tmp_graph.get_operations()
+                     if x not in all_ignored_ops)
+  inference_function_def, fn = make_function_def(
+      fname, tmp_graph, operations, all_inputs, func_def_outputs)
   # Register any other functions defined in the graph
   # TODO(ashankar): Oh lord, forgive me for this lint travesty.
   for f in tmp_graph._functions.values():  # pylint: disable=protected-access
     # TODO(ashankar): What about the gradient registry?
-    _register_with_name(f.name, f.definition)
-  _register_with_name(_inference_name(name), inference_function_def)
+    _register(f._c_func)  # pylint: disable=protected-access
+  _register(fn)
 
   return GraphModeFunction(
       all_inputs,
       extra_inputs,
       inference_function_def,
+      fn,
       tmp_graph,
-      tmp_graph.get_operations(),
+      operations,
       func_outputs,
       _map_sequence_obj_to_idx(func_def_outputs),
       output_shapes,
@@ -517,10 +570,9 @@ def _cache_key(x):
   return x
 
 
-def _register_with_name(name, fdef):
-  """Registers the function `fdef` with the name `name`."""
-  fdef.signature.name = name
-  context.context().add_function_def(fdef)
+def _register(fn):
+  """Registers the function `fn`."""
+  context.context().add_function(fn)
 
 
 # TODO(apassos): better error messages for non-hashable arguments.
diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py
index faf0ac88bc..3da100d800 100644
--- a/tensorflow/python/eager/graph_callable.py
+++ b/tensorflow/python/eager/graph_callable.py
@@ -318,7 +318,9 @@ def _graph_callable_internal(func, shape_and_dtypes):
   placeholder_inputs = flat_inputs+ list(extra_placeholders)
 
   func_def_outputs = [x for x in outputs_list if isinstance(x, tf_ops.Tensor)]
-  initializer_function_def = function.make_function_def(
+  initialization_name = function._inference_name(func.__name__)  # pylint: disable=protected-access
+  initializer_function_def, initializer_fn = function.make_function_def(
+      initialization_name,
       tmp_graph,
       initializing_operations,
       placeholder_inputs,
@@ -327,13 +329,13 @@ def _graph_callable_internal(func, shape_and_dtypes):
   # Also, what about the gradient registry of these functions? Those need to be
   # addressed as well.
   for f in tmp_graph._functions.values():  # pylint: disable=protected-access
-    function._register_with_name(f.name, f.definition)  # pylint: disable=protected-access
-  function._register_with_name(function._inference_name(func.__name__),  # pylint: disable=protected-access
-                               initializer_function_def)
+    function._register(f._c_func)  # pylint: disable=protected-access
+  function._register(initializer_fn)  # pylint: disable=protected-access
   initializer_function = function.GraphModeFunction(
       placeholder_inputs,
       extra_inputs,
       initializer_function_def,
+      initializer_fn,
       tmp_graph,
       initializing_operations,
       func_outputs,
@@ -342,18 +344,20 @@ def _graph_callable_internal(func, shape_and_dtypes):
 
   capture_func_def_outputs = [
       x for x in captured_outlist if isinstance(x, tf_ops.Tensor)]
-  captured_function_def = function.make_function_def(
+  captured_function_name = function._inference_name(func.__name__)  # pylint: disable=protected-access
+  captured_function_def, capturing_fn = function.make_function_def(
+      captured_function_name,
       tmp_graph,
       capturing_operations,
       placeholder_inputs,
       capture_func_def_outputs)
-  function._register_with_name(function._inference_name(func.__name__),  # pylint: disable=protected-access
-                               captured_function_def)
+  function._register(capturing_fn)  # pylint: disable=protected-access
 
   captured_function = function.GraphModeFunction(
       placeholder_inputs,
       extra_inputs,
       captured_function_def,
+      capturing_fn,
       tmp_graph,
       capturing_operations,
       captured_outputs,
diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py
index 548e16a909..b9e6ca2a93 100644
--- a/tensorflow/python/eager/graph_callable_test.py
+++ b/tensorflow/python/eager/graph_callable_test.py
@@ -152,7 +152,6 @@ class GraphCallableTest(test.TestCase):
     self.assertAllEqual(5, f(constant_op.constant(2)))
 
   def testNestedFunction(self):
-
     # TensorFlow function (which is what would be used in TensorFlow graph
     # construction).
     @function.Defun(dtypes.int32, dtypes.int32)
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index a616b15cf7..5f945ac133 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -599,11 +599,6 @@ class Tensor(_TensorLike):
     """
     return _eval_using_default_session(self, feed_dict, self.graph, session)
 
-  def _dup(self):
-    ret = copy.copy(self)
-    ret._id = uid()  # pylint: disable=protected-access
-    return ret
-
 
 # TODO(agarwal): consider getting rid of this.
 class _EagerTensorBase(Tensor):
@@ -729,9 +724,6 @@ class _EagerTensorBase(Tensor):
     return new_tensor
     # pylint: enable=protected-access
 
-  def _dup(self):
-    return self._copy(device_name=self.device)
-
   @property
   def shape(self):
     return tensor_shape.TensorShape(self._shape_tuple())
@@ -1794,7 +1786,7 @@ class Operation(object):
       c_api.SetRequestedDevice(
           self._graph._c_graph,  # pylint: disable=protected-access
           self._c_op,  # pylint: disable=protected-access
-          _device_string(device))
+          compat.as_str(_device_string(device)))
     else:
       self._node_def.device = _device_string(device)
 
@@ -2083,7 +2075,7 @@ class Operation(object):
 
   def _set_attr(self, attr_name, attr_value):
     """Private method used to set an attribute in the node_def."""
-    if _USE_C_API:
+    if self._c_op:
       buf = c_api.TF_NewBufferFromString(
           compat.as_bytes(attr_value.SerializeToString()))
       try:
@@ -2652,11 +2644,16 @@ class Graph(object):
 
     # TODO(skyewm): fold as much of the above as possible into the C
     # implementation
-    if _USE_C_API:
+    if _USE_C_API or self._use_c_api_hack():
       self._scoped_c_graph = c_api_util.ScopedTFGraph()
     else:
       self._scoped_c_graph = None
 
+  # TODO(apassos) remove once the C API is used by default.
+  def _use_c_api_hack(self):
+    """Temporary hack; can be overridden to force C API usage."""
+    return False
+
   def _convert_stack(self, stack, include_func_start_lineno=False):
     """Converts a stack extracted using _extract_stack() to a traceback stack.
 
@@ -2985,9 +2982,14 @@ class Graph(object):
     # Add function to graph
     # pylint: disable=protected-access
     if self._c_graph:
-      assert function._c_func, (
-          "Cannot add function created without C API support to graph "
-          "created with C API support")
+      # Handle functions created without using the C API. TODO(apassos,skyewm)
+      # remove this when all functions are generated using the C API by default
+      # as this will be unnecessary.
+      if not function._c_func:
+        with errors.raise_exception_on_not_ok_status() as status:
+          serialized = function.definition.SerializeToString()
+          function._c_func = c_api.TF_FunctionImportFunctionDef(
+              serialized, status)
       with errors.raise_exception_on_not_ok_status() as status:
         gradient = function._grad_func._c_func if function._grad_func else None
         c_api.TF_GraphCopyFunction(self._c_graph, function._c_func, gradient,
diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
index 82b154164e..82750e9e49 100644
--- a/tensorflow/python/pywrap_tfe.i
+++ b/tensorflow/python/pywrap_tfe.i
@@ -18,6 +18,7 @@ limitations under the License.
 %rename("%s") TFE_NewContext;
 %rename("%s") TFE_DeleteContext;
 %rename("%s") TFE_ContextListDevices;
+%rename("%s") TFE_ContextAddFunction;
 %rename("%s") TFE_ContextAddFunctionDef;
 %rename("%s") TFE_OpNameGetAttrType;
 %rename("%s") TFE_Py_InitEagerTensor;
@@ -149,7 +150,7 @@ limitations under the License.
   }
   $1 = &temp;
   $1->resize(PyInt_AsLong($input), nullptr);
-}
+} 
 
 // Create new Status object.
 %typemap(in, numinputs=0) TF_Status *out_status {
-- 
GitLab


From b8c9b75bbb75def92b2cae2406ba88d20630b66a Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 30 Nov 2017 10:33:27 -0800
Subject: [PATCH 0457/1225] Change "Datasets" to "`tf.data`" in the "Reading
 Data" API guide.

PiperOrigin-RevId: 177473833
---
 .../api_guides/python/reading_data.md         | 32 ++++++++++---------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/tensorflow/docs_src/api_guides/python/reading_data.md b/tensorflow/docs_src/api_guides/python/reading_data.md
index b3ebaa0f0a..4594887349 100644
--- a/tensorflow/docs_src/api_guides/python/reading_data.md
+++ b/tensorflow/docs_src/api_guides/python/reading_data.md
@@ -1,11 +1,11 @@
 # Reading data
 
 Note: The preferred way to feed data into a tensorflow program is using the
-@{$datasets$Datasets API}.
+@{$datasets$`tf.data` API}.
 
 There are four methods of getting data into a TensorFlow program:
 
-*   `Dataset` API: Easily construct a complex input pipeline. (preferred method)
+*   `tf.data` API: Easily construct a complex input pipeline. (preferred method)
 *   Feeding: Python code provides the data when running each step.
 *   `QueueRunner`: a queue-based input pipeline reads the data from files
     at the beginning of a TensorFlow graph.
@@ -14,26 +14,27 @@ There are four methods of getting data into a TensorFlow program:
 
 [TOC]
 
-## Dataset API
+## `tf.data` API
 
 See the @{$datasets$programmer's guide} for an in-depth explanation of
-@{tf.data.Dataset}. The `Dataset` API allows you to extract and preprocess data
-from different input/file formats, and apply transformations such as batch,
-shuffle, and map to the dataset. This is an improved version of the old input
-methods, feeding and `QueueRunner`.
+@{tf.data.Dataset}. The `tf.data` API enables you to extract and preprocess data
+from different input/file formats, and apply transformations such as batching,
+shuffling, and mapping functions over the dataset. This is an improved version
+of the old input methods---feeding and `QueueRunner`---which are described
+below for historical purposes.
 
 ## Feeding
 
+Warning: "Feeding" is the least efficient way to feed data into a TensorFlow
+program and should only be used for small experiments and debugging.
+
 TensorFlow's feed mechanism lets you inject data into any Tensor in a
-computation graph. A python computation can thus feed data directly into the
+computation graph. A Python computation can thus feed data directly into the
 graph.
 
 Supply feed data through the `feed_dict` argument to a run() or eval() call
 that initiates computation.
 
-Warning: "Feeding" is the least efficient way to feed data into a tensorflow
-program and should only be used for small experiments and debugging.
-
 ```python
 with tf.Session():
   input = tf.placeholder(tf.float32)
@@ -55,6 +56,10 @@ and is described in the @{$mechanics$MNIST tutorial}.
 
 ## `QueueRunner`
 
+Warning: This section discusses implementing input pipelines using the
+queue-based APIs which can be cleanly replaced by the @{$datasets$`tf.data`
+API}.
+
 A typical queue-based pipeline for reading records from files has the following stages:
 
 1.  The list of filenames
@@ -66,9 +71,6 @@ A typical queue-based pipeline for reading records from files has the following
 7.  *Optional* preprocessing
 8.  Example queue
 
-Warning: This section discusses implementing input pipelines using the
-queue-based APIs which can be cleanly replaced by the @{$datasets$Datasets API}.
-
 ### Filenames, shuffling, and epoch limits
 
 For the list of filenames, use either a constant string Tensor (like
@@ -499,7 +501,7 @@ You can have the train and eval in the same graph in the same process, and share
 their trained variables or layers. See @{$variables$the shared variables tutorial}.
 
 To support the single-graph approach
-@{$programmers_guide/datasets$Datasets} also supplies
+@{$programmers_guide/datasets$`tf.data`} also supplies
 @{$programmers_guide/datasets#creating_an_iterator$advanced iterator types} that
 that allow the user to change the input pipeline without rebuilding the graph or
 session.
-- 
GitLab


From 3b9a26d04544ba6e13181a7df07bb693769b7d7c Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Thu, 30 Nov 2017 10:40:46 -0800
Subject: [PATCH 0458/1225] Turned a verbose log into a vlog

PiperOrigin-RevId: 177474943
---
 tensorflow/core/grappler/grappler_item_builder.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index a186e9a181..da99777bbc 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -188,7 +188,7 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
                      << ", skipping this input";
           return nullptr;
         }
-        LOG(INFO) << "Will use fetch node " << name;
+        VLOG(1) << "Will use fetch node " << name;
         new_item->fetch.push_back(name);
       }
     }
-- 
GitLab


From 0369392dfcb569a2b8c55fb7a5d3dc08b6cb6ef8 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Thu, 30 Nov 2017 11:00:11 -0800
Subject: [PATCH 0459/1225] Internal testing change.

PiperOrigin-RevId: 177478003
---
 tensorflow/contrib/tpu/BUILD                  |  15 -
 .../contrib/tpu/python/tpu/test_util.py       | 296 ------------------
 2 files changed, 311 deletions(-)
 delete mode 100644 tensorflow/contrib/tpu/python/tpu/test_util.py

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index f542d94139..a34c7f91f2 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -31,21 +31,6 @@ cc_library(
     ],
 )
 
-py_library(
-    name = "tpu_test_util",
-    srcs = ["python/tpu/test_util.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":tpu_lib",
-        ":tpu_py",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:session",
-        "//tensorflow/python:variables",
-    ],
-)
-
 py_library(
     name = "tpu_estimator",
     srcs = [
diff --git a/tensorflow/contrib/tpu/python/tpu/test_util.py b/tensorflow/contrib/tpu/python/tpu/test_util.py
deleted file mode 100644
index a5d4ff9722..0000000000
--- a/tensorflow/contrib/tpu/python/tpu/test_util.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ===================================================================
-"""Utilities to ease testing on TPU devices."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os.path
-import pickle
-import tempfile
-
-import numpy as np
-
-from tensorflow.contrib.tpu.python.tpu import tpu
-from tensorflow.contrib.tpu.python.tpu import tpu_config
-from tensorflow.contrib.tpu.python.tpu import tpu_estimator
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.client import session as tf_session
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
-from tensorflow.python.framework import test_util
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import saver as tf_saver
-
-
-def has_tpu():
-  """Check if a TPU device is available.
-
-  Device enumeration via `device_lib` currently fails for TPU systems.
-  (http://b/68333779).  To work around this, we determine the existence of a
-  TPU by a successful call to `initialize_system`.
-
-  Returns:
-    boolean, True if a TPU device is available, otherwise False.
-  """
-
-  def _check():
-    with tf_session.Session() as sess:
-      sess.run(tpu.initialize_system())
-      sess.run(tpu.shutdown_system())
-
-  try:
-    _check()
-    return True
-  except errors.OpError as _:
-    return False
-
-
-def _available_devices():
-  devices = ["cpu"]
-  if not test_util.gpu_device_name():
-    devices.append("gpu")
-
-  if has_tpu():
-    devices.append("tpu")
-
-  return tuple(devices)
-
-
-def copy_dir(src, tgt):
-  """Copy src to tgt."""
-  gfile.MakeDirs(tgt)
-  seen_dirs = set()
-  for dirname, _, files in gfile.Walk(src):
-    for f in files:
-      src_f = os.path.join(dirname, f)
-      tgt_f = src_f.replace(src, tgt)
-      tgt_d = os.path.dirname(tgt_f)
-      if tgt_d not in seen_dirs:
-        gfile.MkDir(tgt_d)
-        seen_dirs.add(tgt_d)
-      gfile.Copy(src_f, tgt_f, overwrite=True)
-
-
-def compare_model(model_fn,
-                  input_fn,
-                  params,
-                  master="local",
-                  temp_dir=None,
-                  num_shards=2,
-                  tolerance=1e-4):
-  """Compare the results of running `model_fn` on the TPU and CPU."""
-  if not temp_dir:
-    temp_dir = tempfile.mkdtemp()
-
-  cpu_model_dir = "%s/cpu-model" % temp_dir
-  tpu_model_dir = "%s/tpu-model" % temp_dir
-  initial_model_dir = "%s/initial-model" % temp_dir
-
-  logging.info("Checkpoints and weights will be written to %s", temp_dir)
-
-  num_steps = 1
-
-  def _model_adapter(features, labels, mode, params):
-    """Run users model function with random seeds fixed to known values."""
-    random_seed.set_random_seed(0)
-    np.random.seed(0)
-    return model_fn(features, labels, mode, params)
-
-  def _input_adapter(params):
-    random_seed.set_random_seed(0)
-    np.random.seed(0)
-    return input_fn(params)
-
-  def _make_run_config(model_dir):
-    return tpu_config.RunConfig(
-        master=master,
-        model_dir=model_dir,
-        save_checkpoints_secs=10000,
-        session_config=config_pb2.ConfigProto(
-            allow_soft_placement=True, log_device_placement=False),
-        tpu_config=tpu_config.TPUConfig(
-            iterations_per_loop=num_steps,
-            num_shards=num_shards,
-        ),
-    )
-
-  def _make_estimator(use_tpu, model_dir):
-    return tpu_estimator.TPUEstimator(
-        model_fn=_model_adapter,
-        use_tpu=use_tpu,
-        config=_make_run_config(model_dir),
-        train_batch_size=num_shards,
-        params=dict(params, use_tpu=use_tpu),
-    )
-
-  def _extract_weights(checkpoint):
-    """Extract model weights from the given checkpoint file."""
-    weights = {}
-    graph = ops.Graph()
-    with graph.as_default():
-      features, labels = _input_adapter(dict(params, batch_size=num_shards))
-      model_fn(
-          features, labels,
-          params=dict(params, use_tpu=False),
-          mode=model_fn_lib.ModeKeys.TRAIN)
-      saver = tf_saver.Saver()
-      with tf_session.Session(graph=graph) as sess:
-        saver.restore(sess, checkpoint)
-        all_vars = []
-        all_vars.extend(graph.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
-        all_vars.extend(graph.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
-        all_vars.extend(graph.get_collection(ops.GraphKeys.MODEL_VARIABLES))
-
-        for var in all_vars:
-          weights[var.name] = sess.run(var)
-    return weights
-
-  def _run_step(use_tpu, model_dir):
-    """Create an estimator and run a single step on the given device."""
-    tf_session.Session.reset(target=master)
-
-    logging.info("Running step.  TPU=%d.  model_dir=%s", use_tpu, model_dir)
-    est = _make_estimator(use_tpu=use_tpu, model_dir=model_dir)
-    est.train(input_fn=_input_adapter, steps=num_steps)
-    weights = _extract_weights(est.latest_checkpoint())
-    with gfile.Open(os.path.join(temp_dir, "tpu-%d.weights" % use_tpu),
-                    "wb") as f:
-      f.write(pickle.dumps(weights))
-    return weights
-
-  # initialize models to the same weights by running a single step on the CPU
-  _run_step(use_tpu=False, model_dir=initial_model_dir)
-
-  copy_dir(initial_model_dir, cpu_model_dir)
-  copy_dir(initial_model_dir, tpu_model_dir)
-
-  cpu_weights = _run_step(use_tpu=False, model_dir=cpu_model_dir)
-  tpu_weights = _run_step(use_tpu=True, model_dir=tpu_model_dir)
-
-  bad_weights = False
-  for k in cpu_weights:
-    if k not in tpu_weights:
-      raise KeyError("Missing weight %s from TPU checkpoint.", k)
-
-    if not np.allclose(
-        cpu_weights[k], tpu_weights[k], rtol=tolerance, atol=tolerance):
-      bad_weights = True
-      logging.error("Weights for layer %s have diverged.", k)
-
-  if bad_weights:
-    raise ValueError("Some weights have diverged.  Output pickle files have "
-                     "been written to %s for inspection." % temp_dir)
-
-
-class TPUTestCase(test_util.TensorFlowTestCase):
-  """Adds helpers for testing on TPU devices to `TensorFlowTestCase`.
-
-  Example usage:
-
-  ```
-  def model_fn(features):
-    return tf.reduce_sum(features * 2)
-
-  class ModelTests(test_util.TPUTestCase):
-    def test_sum(self):
-      v = np.random.randn(10, 10).astype("float32")
-      self.assert_device_output(model_fn, [v], (v*2).sum(),
-                                devices=("cpu", "tpu"))
-  ```
-  """
-
-  def __init__(self, methodName="runTest"):  # pylint: disable=invalid-name
-    super(TPUTestCase, self).__init__(methodName)
-    self._available_devices = _available_devices()
-
-  def run_on_device(self, model_fn, model_inputs, device):
-    """Runs `model_fn` on the given device.
-
-    Raises an exception if no such device is available.  `model_fn` should
-    return one or more tensors as a list or tuple.
-
-    Args:
-      model_fn: Function returning one or more tensors.
-      model_inputs: An iterable of Numpy arrays or scalars.
-                    These will be passed as arguments to `model_fn`.
-      device: Device to run on.  One of ("tpu", "gpu", "cpu").
-
-    Returns:
-      Output from the model function.
-    """
-
-    def _make_placeholders():
-      return dict([(gen_array_ops.placeholder_with_default(v, v.shape), v)
-                   for v in model_inputs])
-
-    if device == "tpu":
-      with self.test_session(graph=ops.Graph()) as sess:
-        placeholders = _make_placeholders()
-        tpu_computation = tpu.rewrite(model_fn, placeholders.keys())
-        sess.run(tpu.initialize_system())
-        sess.run(variables.global_variables_initializer())
-        result = sess.run(tpu_computation, placeholders)
-        sess.run(tpu.shutdown_system())
-        # TODO(b/36891278): supports non-flat returns lists in tpu.rewrite().
-        if len(result) == 1:
-          return result[0]
-        return result
-    elif device == "gpu":
-      with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
-        placeholders = _make_placeholders()
-        sess.run(variables.global_variables_initializer())
-        return sess.run(model_fn(placeholders.keys()), placeholders)
-    elif device == "cpu":
-      # TODO(power) -- will this interact poorly with cached GPU sessions?
-      with self.test_session(graph=ops.Graph(), use_gpu=False) as sess:
-        placeholders = _make_placeholders()
-        sess.run(variables.global_variables_initializer())
-        return sess.run(model_fn(placeholders.keys()), placeholders)
-
-  def _compare_values(self, actual_outputs, expected_outputs):
-    if isinstance(expected_outputs, (list, tuple)):
-      for a, b in zip(actual_outputs, expected_outputs):
-        self.assertAllCloseAccordingToType(a, b)
-    else:
-      self.assertAllCloseAccordingToType(actual_outputs, expected_outputs)
-
-  def assert_device_output(self,
-                           model_fn,
-                           model_inputs,
-                           expected_outputs,
-                           devices=("cpu", "gpu", "tpu")):
-    """Run `model_fn` on the given devices.
-
-    Results are compared via `assertAllCloseAccordingToType`.
-
-    Args:
-      model_fn: Function returning one or more tensors
-      model_inputs: Numpy arrays or scalars passed as arguments to model_fn
-      expected_outputs: Numpy arrays or scalars to compare against.
-      devices: Set of devices to run on.  If a device is not available, tests
-               will be skipped for that device.
-    """
-    devices = set(devices).intersection(self._available_devices)
-
-    for device in devices:
-      device_out = self.run_on_device(model_fn, model_inputs, device=device)
-      self._compare_values(device_out, expected_outputs)
-- 
GitLab


From fa6a704e4ab891e69642332f991e510a6873aa83 Mon Sep 17 00:00:00 2001
From: Zhengsheng Wei <zhengsheng.wei.cn@gmail.com>
Date: Fri, 1 Dec 2017 03:05:32 +0800
Subject: [PATCH 0460/1225] revise docstring (#14792)

---
 .../contrib/model_pruning/python/layers/core_layers.py    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/model_pruning/python/layers/core_layers.py b/tensorflow/contrib/model_pruning/python/layers/core_layers.py
index ae60d8b1e1..95dfd8f421 100644
--- a/tensorflow/contrib/model_pruning/python/layers/core_layers.py
+++ b/tensorflow/contrib/model_pruning/python/layers/core_layers.py
@@ -72,8 +72,8 @@ class _MaskedConv(base.Layer):
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Regularizer function for the output.
@@ -279,8 +279,8 @@ class MaskedConv2D(_MaskedConv):
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Regularizer function for the output.
-- 
GitLab


From c9f9f054f7a4414577684cde3087b690877403d9 Mon Sep 17 00:00:00 2001
From: dmaclach <dmaclach@gmail.com>
Date: Thu, 30 Nov 2017 11:05:58 -0800
Subject: [PATCH 0461/1225] Update CONTRIBUTING.md (#14695)

Add Objective-C Style guide to list.
---
 CONTRIBUTING.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 43abdaafbf..1b537ca73c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -114,6 +114,7 @@ pylint --rcfile=/tmp/pylintrc myfile.py
 * [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html)
 * [Google JavaScript Style Guide](https://google.github.io/styleguide/jsguide.html)
 * [Google Shell Style Guide](https://google.github.io/styleguide/shell.xml)
+* [Google Objective-C Style Guide](http://google.github.io/styleguide/objcguide.html)
 
 #### Running sanity check
 
-- 
GitLab


From 4e8301be75a234d53b08bec577ac0069fc40bea3 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 30 Nov 2017 11:00:54 -0800
Subject: [PATCH 0462/1225] Disable dnn_linear_combined_test

PiperOrigin-RevId: 177478106
---
 tensorflow/contrib/estimator/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index e4d51aa148..706a174efb 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -93,6 +93,7 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "no_pip",
+        "notap",  # b/62204861
         "notsan",
     ],
     deps = [
-- 
GitLab


From 4cc4d5329122f0f97c3804e6f8d27ed4b5874028 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 30 Nov 2017 11:14:36 -0800
Subject: [PATCH 0463/1225] Fix decode_bmp crash by adding length check before
 reading the data in buffer (#14967)

* Fix decode_bmp crash by adding length check before reading the data in buffer

This fix tries to address the issue raised in 14959 where
the bmp content length was not checked before reading the buffer.
As a result, decode_bmp might trigger a crash if the content of bmp
is incomplete.

This fix fixes the issue by adding the needed check before
reading the data.

This fix fixes 14959.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Sanitize decode_bmp_op.cc with clang-format -i --style=Google

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Add additional check to make sure header is safe to access in bmp

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Not require padding (as paddings are not accessed)

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Remove duplicated row_size calculation.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Add test cases for decoding incomplete bmp

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/decode_bmp_op.cc      | 51 +++++++++++++------
 .../python/kernel_tests/decode_bmp_op_test.py | 50 ++++++++++++++++++
 2 files changed, 86 insertions(+), 15 deletions(-)

diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc
index cd7956e1cb..c778278e8f 100644
--- a/tensorflow/core/kernels/decode_bmp_op.cc
+++ b/tensorflow/core/kernels/decode_bmp_op.cc
@@ -33,10 +33,11 @@ class DecodeBmpOp : public OpKernel {
  public:
   explicit DecodeBmpOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("channels", &channels_));
-    OP_REQUIRES(context, channels_ == 0 || channels_ == 1 || channels_ == 3 ||
-                             channels_ == 4,
-                errors::InvalidArgument("channels must be 0, 1, 3 or 4, got ",
-                                        channels_));
+    OP_REQUIRES(
+        context,
+        channels_ == 0 || channels_ == 1 || channels_ == 3 || channels_ == 4,
+        errors::InvalidArgument("channels must be 0, 1, 3 or 4, got ",
+                                channels_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -48,6 +49,12 @@ class DecodeBmpOp : public OpKernel {
     // Start decoding image to get shape details
     const StringPiece input = contents.scalar<string>()();
 
+    OP_REQUIRES(context, (32 <= input.size()),
+                errors::InvalidArgument("Incomplete bmp content, requires at "
+                                        "least 32 bytes to find the header "
+                                        "size, width, height, and bpp, got ",
+                                        input.size(), " bytes"));
+
     const uint8* img_bytes = reinterpret_cast<const uint8*>(input.data());
     const int32 header_size = internal::SubtleMustCopy(
         *(reinterpret_cast<const int32*>(img_bytes + 10)));
@@ -73,6 +80,22 @@ class DecodeBmpOp : public OpKernel {
                 errors::InvalidArgument(
                     "Number of channels must be 1, 3 or 4, was ", channels_));
 
+    // there may be padding bytes when the width is not a multiple of 4 bytes
+    // 8 * channels == bits per pixel
+    const int row_size = (8 * channels_ * width + 31) / 32 * 4;
+
+    const int last_pixel_offset =
+        header_size + (abs(height) - 1) * row_size + (width - 1) * channels_;
+
+    // [expected file size] = [last pixel offset] + [last pixel size=channels]
+    const int expected_file_size = last_pixel_offset + channels_;
+
+    OP_REQUIRES(
+        context, (expected_file_size <= input.size()),
+        errors::InvalidArgument("Incomplete bmp content, requires at least ",
+                                expected_file_size, " bytes, got ",
+                                input.size(), " bytes"));
+
     // if height is negative, data layout is top down
     // otherwise, it's bottom up
     bool top_down = (height < 0);
@@ -85,25 +108,23 @@ class DecodeBmpOp : public OpKernel {
 
     const uint8* bmp_pixels = &img_bytes[header_size];
 
-    Decode(bmp_pixels, output->flat<uint8>().data(), width, abs(height),
-           channels_, top_down);
+    Decode(bmp_pixels, row_size, output->flat<uint8>().data(), width,
+           abs(height), channels_, top_down);
   }
 
-  uint8* Decode(const uint8* input, uint8* const output, const int width,
-                const int height, const int channles, bool top_down);
+  uint8* Decode(const uint8* input, const int row_size, uint8* const output,
+                const int width, const int height, const int channles,
+                bool top_down);
 
  private:
   int channels_;
 };
 REGISTER_KERNEL_BUILDER(Name("DecodeBmp").Device(DEVICE_CPU), DecodeBmpOp);
 
-uint8* DecodeBmpOp::Decode(const uint8* input, uint8* const output,
-                           const int width, const int height,
-                           const int channels, bool top_down) {
-  // there may be padding bytes when the width is not a multiple of 4 bytes
-  // 8 * channels == bits per pixel
-  int row_size = (8 * channels * width + 31) / 32 * 4;
-
+uint8* DecodeBmpOp::Decode(const uint8* input, const int row_size,
+                           uint8* const output, const int width,
+                           const int height, const int channels,
+                           bool top_down) {
   for (int i = 0; i < height; i++) {
     int src_pos;
     int dst_pos;
diff --git a/tensorflow/python/kernel_tests/decode_bmp_op_test.py b/tensorflow/python/kernel_tests/decode_bmp_op_test.py
index e7b472240e..c086f46170 100644
--- a/tensorflow/python/kernel_tests/decode_bmp_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_bmp_op_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import image_ops
 from tensorflow.python.platform import test
@@ -99,5 +100,54 @@ class DecodeBmpOpTest(test.TestCase):
       decoded = decode.eval()
       self.assertAllEqual(decoded, img_bytes)
 
+  def testIncompleteHeader(self):
+    # Encoded BMP bytes from Wikipedia
+    encoded_bytes = [
+        0x42, 0x40,
+        0x46, 0, 0, 0,
+    ]
+
+    byte_string = bytes(bytearray(encoded_bytes))
+    img_in = constant_op.constant(byte_string, dtype=dtypes.string)
+    decode = array_ops.squeeze(image_ops.decode_bmp(img_in))
+
+    with self.test_session():
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+          "requires at least 32 bytes to find the header"):
+        decoded = decode.eval()
+
+  def testIncompleteBody(self):
+    # Encoded BMP bytes from Wikipedia
+    encoded_bytes = [
+        0x42, 0x40,
+        0x46, 0, 0, 0,
+        0, 0,
+        0, 0,
+        0x36, 0, 0, 0,
+        0x28, 0, 0, 0,
+        0x2, 0, 0, 0,
+        0x2, 0, 0, 0,
+        0x1, 0,
+        0x18, 0,
+        0, 0, 0, 0,
+        0x10, 0, 0, 0,
+        0x13, 0xb, 0, 0,
+        0x13, 0xb, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+        0, 0, 0xff,
+        0xff, 0xff, 0xff,
+        0, 0,
+    ]
+
+    byte_string = bytes(bytearray(encoded_bytes))
+    img_in = constant_op.constant(byte_string, dtype=dtypes.string)
+    decode = array_ops.squeeze(image_ops.decode_bmp(img_in))
+
+    with self.test_session():
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+          "requires at least 68 bytes, got 62 bytes"):
+        decoded = decode.eval()
+
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From d24199ba2b845f516dbbdb558999e4c402342f42 Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@andyselle.com>
Date: Thu, 30 Nov 2017 11:15:19 -0800
Subject: [PATCH 0464/1225] Add user friendly error checking about script run
 location. (#14949)

---
 tensorflow/contrib/lite/download_dependencies.sh     | 7 +++++++
 tensorflow/contrib/makefile/download_dependencies.sh | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh
index e13df2fa1c..571d857be7 100755
--- a/tensorflow/contrib/lite/download_dependencies.sh
+++ b/tensorflow/contrib/lite/download_dependencies.sh
@@ -19,6 +19,13 @@ set -e
 DOWNLOADS_DIR=tensorflow/contrib/lite/downloads
 BZL_FILE_PATH=tensorflow/workspace.bzl
 
+# Ensure it is being run from repo root
+if [ ! -f $BZL_FILE_PATH ]; then
+  echo "Could not find ${BZL_FILE_PATH}":
+  echo "Likely you are not running this from the root directory of the repository.";
+  exit 1;
+fi
+
 EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
 GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index e8021a53af..19e25ad767 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -19,6 +19,13 @@ set -e
 DOWNLOADS_DIR=tensorflow/contrib/makefile/downloads
 BZL_FILE_PATH=tensorflow/workspace.bzl
 
+# Ensure it is being run from repo root
+if [ ! -f $BZL_FILE_PATH ]; then
+  echo "Could not find ${BZL_FILE_PATH}":
+  echo "Likely you are not running this from the root directory of the repository.";
+  exit 1;
+fi
+
 EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
 GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
-- 
GitLab


From ea1c29552b01f3404e27999a27a1919b3accc594 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Thu, 30 Nov 2017 11:13:19 -0800
Subject: [PATCH 0465/1225] Change depthwise convolution filter expansion and
 contraction with algebraic manipulation instead of slices and pads that are
 more difficult to fuse.

PiperOrigin-RevId: 177480353
---
 .../compiler/tf2xla/kernels/conv_ops.cc       | 166 ++++++++++++------
 1 file changed, 112 insertions(+), 54 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
index c150394c07..61f4d1993a 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
@@ -46,72 +46,130 @@ TensorShape ExpandedFilterShapeForDepthwiseConvolution(
   return expanded_shape;
 }
 
+// Broadcast zeros to ExpandedFilterShapeForDepthwiseConvolution.
+xla::ComputationDataHandle CreateExpandedZero(
+    const TensorShape& filter_shape, DataType dtype,
+    xla::ComputationBuilder* builder) {
+  TensorShape expanded_filter_shape =
+      ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
+  return builder->Broadcast(XlaHelpers::Zero(builder, dtype),
+                            expanded_filter_shape.dim_sizes());
+}
+
+// Create a mask for depthwise convolution that will make a normal convolution
+// produce the same results as a depthwise convolution. For a [2, 2, 3, 2]
+// depthwise filter this returns a [2, 2, 3, 6] tesnsor
+//   1 1 0 0 0 0   1 1 0 0 0 0
+//   0 0 1 1 0 0   0 0 1 1 0 0
+//   0 0 0 0 1 1   0 0 0 0 1 1
+//
+//   1 1 0 0 0 0   1 1 0 0 0 0
+//   0 0 1 1 0 0   0 0 1 1 0 0
+//   0 0 0 0 1 1   0 0 0 0 1 1
+//
+// The first step is to create a one tensor, A, that is [3]
+//   0 1 2
+//
+// and another tensor, B,  that is [3 * 2]
+//   0 1 2 3 4 5
+//
+// and divide B it by 2 to get
+//   0 0 1 1 2 2
+//
+// then we broadcast the B to [2, 2, 3, 3 * 2]
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//
+// Finally compare A and broadcasted B in dimension 2 amd return the result at
+// the beginning of the comment.
+xla::ComputationDataHandle CreateExpandedFilterMask(
+    const TensorShape& filter_shape, xla::ComputationBuilder* builder) {
+  TensorShape expanded_filter_shape =
+      ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
+  int64 depthwise_multiplier = filter_shape.dim_size(filter_shape.dims() - 1);
+  int64 input_feature = filter_shape.dim_size(filter_shape.dims() - 2);
+
+  // Create a M sized linspace and an M*N sized linspace that will be
+  // broadcasted into perpendicular dimensions and compared.
+  xla::ComputationDataHandle input_feature_iota;
+  // DT_INT32 Iota will always return status::OK().
+  TF_CHECK_OK(XlaHelpers::Iota(builder, DataType::DT_INT32, input_feature,
+                               &input_feature_iota));
+  xla::ComputationDataHandle expanded_feature_iota;
+  TF_CHECK_OK(XlaHelpers::Iota(builder, DataType::DT_INT32,
+                               input_feature * depthwise_multiplier,
+                               &expanded_feature_iota));
+
+  // Divide the M*N sized linspace by the depthwise_multiplier to create
+  // [0 0 1 1 2 2] in the example in the function comment.
+  expanded_feature_iota =
+      builder->Div(expanded_feature_iota,
+                   XlaHelpers::IntegerLiteral(builder, DataType::DT_INT32,
+                                              depthwise_multiplier));
+
+  // Broadcast the N*M linspace to [H, W, ..., M, M*N].
+  auto expanded_feature_broadcast_dims = expanded_filter_shape.dim_sizes();
+  expanded_feature_broadcast_dims.pop_back();
+  auto broadcasted_expanded_feature_iota = builder->Broadcast(
+      expanded_feature_iota, expanded_feature_broadcast_dims);
+
+  // Compare the broadcasted linspace to the input feature linspace in the
+  // input feature dimension to create a diagonal predicate.
+  return builder->Eq(broadcasted_expanded_feature_iota, input_feature_iota,
+                     {expanded_filter_shape.dims() - 2});
+}
+
 // Expands a filter of shape [H, W, ..., M, N] to [H, W, ..., M, M*N] by adding
 // zeros for the cross-depth filters. Used to build a depthwise convolution.
 xla::ComputationDataHandle ExpandFilterForDepthwiseConvolution(
     const TensorShape& filter_shape, DataType dtype,
     const xla::ComputationDataHandle& filter,
     xla::ComputationBuilder* builder) {
-  // Filter has shape [H, W, ..., M, N]
-  // Dilate to [H, W, ..., M*M, N] using M inter-element padding, and then
-  // reshape to [H, W, ..., M, M*N].
-  int num_spatial_dims = filter_shape.dims() - 2;
-  const int64 in_depth = filter_shape.dim_size(num_spatial_dims);
-  xla::PaddingConfig padding = xla::MakeNoPaddingConfig(filter_shape.dims());
-  padding.mutable_dimensions(num_spatial_dims)->set_interior_padding(in_depth);
-  auto dilated_filter =
-      builder->Pad(filter, XlaHelpers::Zero(builder, dtype), padding);
-
+  int64 depthwise_multiplier = filter_shape.dim_size(filter_shape.dims() - 1);
+  int64 input_feature = filter_shape.dim_size(filter_shape.dims() - 2);
   TensorShape expanded_filter_shape =
       ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
-  return builder->Reshape(dilated_filter, expanded_filter_shape.dim_sizes());
+
+  // Create a [H, W, ..., 1, N*M] reshape of the filter.
+  TensorShape implicit_broadcast_filter_shape = expanded_filter_shape;
+  implicit_broadcast_filter_shape.set_dim(
+      implicit_broadcast_filter_shape.dims() - 2, 1);
+  implicit_broadcast_filter_shape.set_dim(
+      implicit_broadcast_filter_shape.dims() - 1,
+      depthwise_multiplier * input_feature);
+  auto implicit_broadcast_filter =
+      builder->Reshape(filter, implicit_broadcast_filter_shape.dim_sizes());
+
+  // Broadcast the filter to  [H, W, ..., M, M*N].
+  auto expanded_zero = CreateExpandedZero(filter_shape, dtype, builder);
+  auto expanded_filter = builder->Add(implicit_broadcast_filter, expanded_zero);
+
+  // If the filter mask is set, choose the broadcasted filter, othwerwise,
+  // choose zero.
+  return builder->Select(CreateExpandedFilterMask(filter_shape, builder),
+                         expanded_filter, expanded_zero);
 }
 
 // Inverse of ExpandFilterForDepthwiseConvolution.
 xla::ComputationDataHandle ContractFilterForDepthwiseBackprop(
-    const TensorShape& filter_shape, DataType dtype,
+    XlaOpKernelContext* ctx, const TensorShape& filter_shape, DataType dtype,
     const xla::ComputationDataHandle& filter_backprop,
     xla::ComputationBuilder* builder) {
-  int num_spatial_dims = filter_shape.dims() - 2;
-
-  // Reshape to [H, W, ..., M*M, N]
-  TensorShape shape = filter_shape;
-  int64 in_depth = filter_shape.dim_size(num_spatial_dims);
-  shape.set_dim(num_spatial_dims, in_depth * in_depth);
-  auto reshaped = builder->Reshape(filter_backprop, shape.dim_sizes());
-
-  std::vector<int64> zeros(filter_shape.dims());
-  std::vector<int64> strides(filter_shape.dims(), 1LL);
-  strides[num_spatial_dims] = in_depth + 1;
-  return builder->Slice(reshaped, zeros, shape.dim_sizes(), strides);
-
-  // Alternate implementation for backends without strided Slice() support.
-  // TODO(phawkins): Remove when all backends support strided slice.
-  //   // Pad [..., M * (M + 1), N]
-  //   xla::PaddingConfig config =
-  //   xla::MakeNoPaddingConfig(filter_shape.dims());
-  //   config.mutable_dimensions(num_spatial_dims)
-  //     ->set_edge_padding_high(in_depth);
-  //   auto zero = XlaHelpers::Zero(builder, dtype);
-  //   auto padded = builder->Pad(reshaped, zero, config);
-  //
-  //   // Reshape to [..., M, M + 1, N]
-  //   shape = filter_shape;
-  //   shape.set_dim(num_spatial_dims, in_depth);
-  //   shape.set_dim(num_spatial_dims + 1, in_depth + 1);
-  //   int64 out_depth = filter_shape.dim_size(num_spatial_dims + 1);
-  //   shape.AddDim(out_depth);
-  //   reshaped = builder->Reshape(padded, shape.dim_sizes());
-  //
-  //   // Slice to [..., M, 1, N]
-  //   std::vector<int64> zeros(shape.dims());
-  //   std::vector<int64> strides(shape.dims(), 1LL);
-  //   shape.set_dim(num_spatial_dims + 1, 1);
-  //   auto sliced = builder->Slice(reshaped, zeros, shape.dim_sizes(),
-  //   strides);
-  //
-  //   // Reshape to [..., M, N]
-  //   return builder->Reshape(sliced, filter_shape.dim_sizes());
+  TensorShape expanded_filter_shape =
+      ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
+  auto masked_expanded_filter = builder->Select(
+      CreateExpandedFilterMask(filter_shape, builder), filter_backprop,
+      CreateExpandedZero(filter_shape, dtype, builder));
+  return builder->Reshape(
+      builder->Reduce(masked_expanded_filter, XlaHelpers::Zero(builder, dtype),
+                      *ctx->GetOrCreateAdd(dtype),
+                      {expanded_filter_shape.dims() - 2}),
+      filter_shape.dim_sizes());
 }
 
 class ConvOp : public XlaOpKernel {
@@ -202,7 +260,7 @@ class ConvOp : public XlaOpKernel {
     dims.set_input_feature_dimension(feature_dim);
     dims.set_output_feature_dimension(feature_dim);
     for (int i = 0; i < num_spatial_dims_; ++i) {
-      int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      const int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
       dims.add_input_spatial_dimensions(dim);
       dims.add_kernel_spatial_dimensions(i);
       dims.add_output_spatial_dimensions(dim);
@@ -574,7 +632,7 @@ class ConvBackpropFilterOp : public XlaOpKernel {
 
     if (depthwise_) {
       filter_backprop_reshaped = ContractFilterForDepthwiseBackprop(
-          filter_shape, ctx->input_type(0), filter_backprop_reshaped, b);
+          ctx, filter_shape, ctx->input_type(0), filter_backprop_reshaped, b);
     }
     ctx->SetOutput(0, filter_backprop_reshaped);
   }
-- 
GitLab


From 4146ff1259c0b4ada8afbbad11a7b37d8373d1b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 11:18:54 -0800
Subject: [PATCH 0466/1225] [XLA] Adds Dot with DotDimensionNumbers proto for
 specifying arbitrary contracting and batch dimensions.

PiperOrigin-RevId: 177481231
---
 .../xla/client/computation_builder.cc         |  36 +++-
 .../compiler/xla/client/computation_builder.h |   5 +
 .../xla/service/algebraic_simplifier.cc       |  17 +-
 .../xla/service/algebraic_simplifier_test.cc  |   6 +-
 .../xla/service/buffer_assignment_test.cc     |  11 +-
 .../cpu/cpu_instruction_fusion_test.cc        |  36 ++--
 .../compiler/xla/service/cpu/ir_emitter.cc    |   5 +
 .../xla/service/gpu/ir_emitter_unnested.cc    |   5 +
 .../compiler/xla/service/graphviz_example.cc  |   5 +-
 .../xla/service/heap_simulator_test.cc        |  24 ++-
 tensorflow/compiler/xla/service/hlo.proto     |   3 +
 .../compiler/xla/service/hlo_cost_analysis.cc |   5 +-
 .../xla/service/hlo_evaluator_test.cc         |  21 ++-
 .../compiler/xla/service/hlo_instruction.cc   |  56 +++++-
 .../compiler/xla/service/hlo_instruction.h    |  18 ++
 .../xla/service/hlo_instruction_test.cc       |  12 +-
 .../compiler/xla/service/hlo_verifier.cc      |   6 +-
 .../xla/service/liveness_util_test.cc         |  10 +-
 tensorflow/compiler/xla/service/service.cc    |   3 +
 .../compiler/xla/service/shape_inference.cc   | 156 ++++++++++++---
 .../compiler/xla/service/shape_inference.h    |   6 +-
 .../xla/service/shape_inference_test.cc       | 177 ++++++++++++++++--
 .../xla/service/transpose_folding_test.cc     |  27 ++-
 .../compiler/xla/service/user_computation.cc  |  57 +++++-
 .../compiler/xla/service/user_computation.h   |   4 +
 .../xla/service/user_computation_test.cc      |  45 -----
 .../compiler/xla/tests/dot_operation_test.cc  |  20 ++
 .../xla/tests/multioutput_fusion_test.cc      |  12 +-
 tensorflow/compiler/xla/xla_data.proto        |  23 ++-
 .../performance/xla/operation_semantics.md    |  81 ++++++++
 30 files changed, 736 insertions(+), 156 deletions(-)

diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc
index cce9310003..9febea8dcf 100644
--- a/tensorflow/compiler/xla/client/computation_builder.cc
+++ b/tensorflow/compiler/xla/client/computation_builder.cc
@@ -625,7 +625,41 @@ ComputationDataHandle ComputationBuilder::Lt(
 
 ComputationDataHandle ComputationBuilder::Dot(
     const ComputationDataHandle& lhs, const ComputationDataHandle& rhs) {
-  return BinaryOp(BINOP_DOT, lhs, rhs, /*broadcast_dimensions=*/{});
+  StatusOr<std::unique_ptr<Shape>> lhs_shape_or_status = GetShape(lhs);
+  if (!lhs_shape_or_status.ok()) {
+    NoteError(lhs_shape_or_status.status());
+    return ComputationDataHandle();
+  }
+  std::unique_ptr<Shape> lhs_shape = lhs_shape_or_status.ConsumeValueOrDie();
+
+  DotDimensionNumbers dimension_numbers;
+  dimension_numbers.add_lhs_contracting_dimensions(
+      lhs_shape->dimensions_size() == 1 ? 0 : 1);
+  dimension_numbers.add_rhs_contracting_dimensions(0);
+  return DotGeneral(lhs, rhs, dimension_numbers);
+}
+
+ComputationDataHandle ComputationBuilder::DotGeneral(
+    const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
+    const DotDimensionNumbers& dimension_numbers) {
+  if (!first_error_.ok() || !PrepareComputation().ok()) {
+    return ComputationDataHandle();
+  }
+
+  DotRequest request;
+  *request.mutable_lhs() = lhs;
+  *request.mutable_rhs() = rhs;
+  *request.mutable_dimension_numbers() = dimension_numbers;
+
+  OpRequest op_request;
+  *op_request.mutable_computation() = computation_.handle();
+  *op_request.mutable_dot_request() = request;
+  AddCommonFieldsToOpRequest(&op_request);
+  OpResponse response;
+
+  VLOG(2) << "making Dot request";
+  Status s = client_->stub()->Op(&op_request, &response);
+  return ParseOpResponse(s, &response);
 }
 
 ComputationDataHandle ComputationBuilder::Conv(
diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h
index d2dbbbbebb..531b98cfb9 100644
--- a/tensorflow/compiler/xla/client/computation_builder.h
+++ b/tensorflow/compiler/xla/client/computation_builder.h
@@ -393,6 +393,11 @@ class ComputationBuilder {
   ComputationDataHandle Dot(const ComputationDataHandle& lhs,
                             const ComputationDataHandle& rhs);
 
+  // Enqueues a general dot instruction onto the computation.
+  ComputationDataHandle DotGeneral(
+      const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
+      const DotDimensionNumbers& dimension_numbers);
+
   // Default dimension numbers used for a 2D convolution.
   static constexpr int64 kConvBatchDimension = 0;
   static constexpr int64 kConvFeatureDimension = 1;
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 71491218aa..b1d0345e70 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -597,9 +597,13 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
 
   // Simplify dot(transpose(a), transpose(b)) to transpose(dot(b,a)).
   if (lhs->IsRank2Transpose() && rhs->IsRank2Transpose()) {
-    auto new_dot = computation_->AddInstruction(HloInstruction::CreateBinary(
-        ShapeUtil::PermuteDimensions({1, 0}, dot->shape()), HloOpcode::kDot,
-        rhs->mutable_operand(0), lhs->mutable_operand(0)));
+    DotDimensionNumbers dot_dimension_numbers;
+    dot_dimension_numbers.add_lhs_contracting_dimensions(1);
+    dot_dimension_numbers.add_rhs_contracting_dimensions(0);
+    auto new_dot = computation_->AddInstruction(HloInstruction::CreateDot(
+        ShapeUtil::PermuteDimensions({1, 0}, dot->shape()),
+        rhs->mutable_operand(0), lhs->mutable_operand(0),
+        dot_dimension_numbers));
     return ReplaceWithNewInstruction(
         dot, HloInstruction::CreateTranspose(dot->shape(), new_dot, {1, 0}));
   }
@@ -1616,8 +1620,11 @@ Status AlgebraicSimplifierVisitor::HandleConvolution(
 
   auto new_lhs = add_bitcast(new_input_shape, lhs);
   auto new_rhs = add_bitcast(new_filter_shape, rhs);
-  auto dot = computation_->AddInstruction(HloInstruction::CreateBinary(
-      dot_output_shape, HloOpcode::kDot, new_lhs, new_rhs));
+  DotDimensionNumbers dot_dimension_numbers;
+  dot_dimension_numbers.add_lhs_contracting_dimensions(1);
+  dot_dimension_numbers.add_rhs_contracting_dimensions(0);
+  auto dot = computation_->AddInstruction(HloInstruction::CreateDot(
+      dot_output_shape, new_lhs, new_rhs, dot_dimension_numbers));
   return ReplaceInstruction(convolution, add_bitcast(convolution_shape, dot));
 }
 
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 56dfb1cf0b..3d70505f6e 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -2138,8 +2138,10 @@ TEST_F(AlgebraicSimplifierTest, IteratorInvalidation) {
       builder.AddInstruction(HloInstruction::CreateParameter(0, r1f32, "x"));
   HloInstruction* y =
       builder.AddInstruction(HloInstruction::CreateParameter(1, r1f32, "y"));
-  builder.AddInstruction(
-      HloInstruction::CreateBinary(r1f32, HloOpcode::kDot, x, y));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  builder.AddInstruction(HloInstruction::CreateDot(r1f32, x, y, dot_dnums));
   std::unique_ptr<HloComputation> dot_computation(builder.Build());
 
   HloComputation::Builder call_builder(TestName() + ".Call");
diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
index 4d4c5b953e..75c71dfeb1 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
@@ -1360,10 +1360,13 @@ TEST_F(BufferAssignmentTest, OneTempAllocation) {
       HloInstruction::CreateParameter(1, shape_3x4, "param_b"));
   auto param_c = builder.AddInstruction(
       HloInstruction::CreateParameter(2, shape_4x4, "param_c"));
-  auto dot_ab = builder.AddInstruction(HloInstruction::CreateBinary(
-      shape_2x4, HloOpcode::kDot, param_a, param_b));
-  auto dot_bc = builder.AddInstruction(HloInstruction::CreateBinary(
-      shape_3x4, HloOpcode::kDot, param_b, param_c));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  auto dot_ab = builder.AddInstruction(
+      HloInstruction::CreateDot(shape_2x4, param_a, param_b, dot_dnums));
+  auto dot_bc = builder.AddInstruction(
+      HloInstruction::CreateDot(shape_3x4, param_b, param_c, dot_dnums));
   builder.AddInstruction(
       HloInstruction::CreateConcatenate(shape_5x4, {dot_ab, dot_bc}, 1));
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
index b9e4d006d7..1c04c9835e 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
@@ -31,6 +31,14 @@ namespace {
 
 using InstructionFusionTest = HloTestBase;
 
+std::unique_ptr<HloInstruction> MakeDot(const Shape& shape, HloInstruction* lhs,
+                                        HloInstruction* rhs) {
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  return HloInstruction::CreateDot(shape, lhs, rhs, dot_dnums);
+}
+
 TEST_F(InstructionFusionTest, DotOperationFusion_Basic_0) {
   HloComputation::Builder builder(TestName());
   HloInstruction* arg0 = builder.AddInstruction(HloInstruction::CreateParameter(
@@ -40,8 +48,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Basic_0) {
 
   HloInstruction* exp0 = builder.AddInstruction(HloInstruction::CreateUnary(
       ShapeUtil::MakeShape(S32, {1024, 256}), HloOpcode::kExp, arg0));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1024, 1}), HloOpcode::kDot, exp0, arg1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1024, 1}), exp0, arg1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -59,8 +67,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Basic_1) {
 
   HloInstruction* exp1 = builder.AddInstruction(HloInstruction::CreateUnary(
       ShapeUtil::MakeShape(S32, {256, 1024}), HloOpcode::kExp, arg1));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1, 1024}), HloOpcode::kDot, arg0, exp1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1, 1024}), arg0, exp1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -80,8 +88,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Bitcast) {
       ShapeUtil::MakeShape(S32, {2, 512, 2, 128}), HloOpcode::kExp, arg0));
   HloInstruction* bitcast0 = builder.AddInstruction(HloInstruction::CreateUnary(
       ShapeUtil::MakeShape(S32, {1024, 256}), HloOpcode::kBitcast, exp0));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1024, 1}), HloOpcode::kDot, bitcast0, arg1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1024, 1}), bitcast0, arg1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -102,8 +110,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Reshape) {
   HloInstruction* reshape0 =
       builder.AddInstruction(HloInstruction::CreateReshape(
           ShapeUtil::MakeShape(S32, {1024, 256}), exp0));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1024, 1}), HloOpcode::kDot, reshape0, arg1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1024, 1}), reshape0, arg1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -121,8 +129,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_TooLarge) {
 
   HloInstruction* exp1 = builder.AddInstruction(HloInstruction::CreateUnary(
       ShapeUtil::MakeShape(S32, {256, 32 * 1024}), HloOpcode::kExp, arg1));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1, 32 * 1024}), HloOpcode::kDot, arg0, exp1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1, 32 * 1024}), arg0, exp1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -140,8 +148,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_ElementReuse) {
 
   HloInstruction* exp1 = builder.AddInstruction(HloInstruction::CreateUnary(
       ShapeUtil::MakeShape(S32, {256, 1024}), HloOpcode::kExp, arg1));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {2, 1024}), HloOpcode::kDot, arg0, exp1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {2, 1024}), arg0, exp1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -162,8 +170,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_TransposeFusion) {
   HloInstruction* transpose1 =
       builder.AddInstruction(HloInstruction::CreateTranspose(
           ShapeUtil::MakeShape(S32, {256, 1024}), exp1, {1, 0}));
-  builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1, 1024}), HloOpcode::kDot, arg0, transpose1));
+  builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1, 1024}), arg0, transpose1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 3f991c03e9..f242e0acb8 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -796,6 +796,11 @@ Status IrEmitter::HandleDot(HloInstruction* dot) {
   TF_RETURN_IF_ERROR(ElementTypesSameAndSupported(
       /*instruction=*/*dot, /*operands=*/{lhs, rhs},
       /*supported_types=*/{F32, F64, C64}));
+  const DotDimensionNumbers& dnums = dot->dot_dimension_numbers();
+  if (dnums.lhs_batch_dimensions_size() > 0 ||
+      dnums.rhs_batch_dimensions_size() > 0) {
+    return Unimplemented("Dot with batch dimensions not implemented.");
+  }
 
   llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs));
   llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs));
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 1b863c9e3c..abc739d181 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -246,6 +246,11 @@ Status IrEmitterUnnested::DefaultAction(HloInstruction* hlo) {
 }
 
 Status IrEmitterUnnested::HandleDot(HloInstruction* dot) {
+  const DotDimensionNumbers& dnums = dot->dot_dimension_numbers();
+  if (dnums.lhs_batch_dimensions_size() > 0 ||
+      dnums.rhs_batch_dimensions_size() > 0) {
+    return Unimplemented("Dot with batch dimensions not implemented.");
+  }
   if (ImplementedAsGemm(*dot)) {
     thunk_sequence_->emplace_back(BuildGemmThunk(dot));
     return Status::OK();
diff --git a/tensorflow/compiler/xla/service/graphviz_example.cc b/tensorflow/compiler/xla/service/graphviz_example.cc
index 049e8d80d8..05017008e2 100644
--- a/tensorflow/compiler/xla/service/graphviz_example.cc
+++ b/tensorflow/compiler/xla/service/graphviz_example.cc
@@ -108,8 +108,11 @@ std::unique_ptr<HloModule> MakeBigGraph() {
       HloInstruction::CreateUnary(vshape, HloOpcode::kCopy, param_v0));
   auto clamp = builder.AddInstruction(HloInstruction::CreateTernary(
       vshape, HloOpcode::kClamp, copy, param_v1, param_v2));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(vshape, HloOpcode::kDot, clamp, param_v0));
+      HloInstruction::CreateDot(vshape, clamp, param_v0, dot_dnums));
   auto tuple = builder.AddInstruction(
       HloInstruction::CreateTuple({dot, param_s, clamp}));
   auto scalar = builder.AddInstruction(
diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc
index 17b926c874..387b649a73 100644
--- a/tensorflow/compiler/xla/service/heap_simulator_test.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc
@@ -259,8 +259,11 @@ TEST_F(HeapSimulatorTest, MultiplyDot) {
       HloInstruction::CreateParameter(2, f32scalar_, "paramY"));
   auto mul = builder.AddInstruction(HloInstruction::CreateBinary(
       f32vec4_, HloOpcode::kMultiply, paramA, paramX));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY));
+      HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums));
 
   // The buffer for dot is the output, and it cannot be shared with the buffer
   // for mul, since dot isn't elementwise.
@@ -292,8 +295,11 @@ TEST_F(HeapSimulatorTest, MultiplyDotAdd) {
       HloInstruction::CreateParameter(2, f32scalar_, "paramY"));
   auto mul = builder.AddInstruction(HloInstruction::CreateBinary(
       f32vec4_, HloOpcode::kMultiply, paramA, paramX));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY));
+      HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums));
   auto add = builder.AddInstruction(
       HloInstruction::CreateBinary(f32vec4_, HloOpcode::kAdd, dot, paramA));
 
@@ -327,10 +333,13 @@ TEST_F(HeapSimulatorTest, MultiplyDotDot) {
       HloInstruction::CreateParameter(2, f32scalar_, "paramY"));
   auto mul = builder.AddInstruction(HloInstruction::CreateBinary(
       f32vec4_, HloOpcode::kMultiply, paramA, paramX));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot0 = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY));
+      HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums));
   auto dot1 = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, dot0, paramY));
+      HloInstruction::CreateDot(f32vec4_, dot0, paramY, dot_dnums));
 
   // The buffer for dot1 is the output.  No buffers can be shared.  The buffer
   // for mul is freed before the end, since it's no longer used after dot0
@@ -365,10 +374,13 @@ TEST_F(HeapSimulatorTest, MultiplyDotDotTuple) {
       HloInstruction::CreateParameter(2, f32scalar_, "paramY"));
   auto mul = builder.AddInstruction(HloInstruction::CreateBinary(
       f32vec4_, HloOpcode::kMultiply, paramA, paramX));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot0 = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY));
+      HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums));
   auto dot1 = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, dot0, paramY));
+      HloInstruction::CreateDot(f32vec4_, dot0, paramY, dot_dnums));
   auto tuple =
       builder.AddInstruction(HloInstruction::CreateTuple({dot0, dot1}));
 
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index e984bdb5f7..5d0cfba1fc 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -118,6 +118,9 @@ message HloInstructionProto {
 
   // Shape of outfeed request.
   xla.Shape outfeed_shape = 29;
+
+  // Describes the dimension numbers used for a dot operation
+  xla.DotDimensionNumbers dot_dimension_numbers = 30;
 }
 
 // Serialization of HloComputation.
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index 6fcc01dd64..0ed64e6779 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
@@ -201,10 +201,11 @@ Status HloCostAnalysis::HandleCopy(const HloInstruction*) {
 Status HloCostAnalysis::HandleDot(const HloInstruction* dot) {
   const Shape& lhs_shape = dot->operand(0)->shape();
   const Shape& rhs_shape = dot->operand(1)->shape();
+  const DotDimensionNumbers& dnums = dot->dot_dimension_numbers();
   // Count of elements along the reduction dimension (last dimension for the
   // rhs).
-  int64 reduction_width = lhs_shape.dimensions(ShapeUtil::Rank(lhs_shape) - 1);
-
+  int64 reduction_width =
+      lhs_shape.dimensions(dnums.lhs_contracting_dimensions(0));
   // First divide by reduction width before multiplying by rhs elements to avoid
   // overflow.
   int64 fma_count;
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index b2c4351896..a5d39fe086 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -621,8 +621,11 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank1) {
       b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal)));
 
   Shape shape = ShapeUtil::MakeShape(F32, {4, 2});
-  b.AddInstruction(HloInstruction::CreateBinary(
-      shape, HloOpcode::kDot, lhs_instruction, rhs_instruction));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction,
+                                             rhs_instruction, dot_dnums));
   auto computation = module().AddEntryComputation(b.Build());
 
   std::unique_ptr<Literal> result =
@@ -664,8 +667,11 @@ TEST_F(HloEvaluatorTest, DotRank1AndRank2) {
       b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal)));
 
   Shape shape = ShapeUtil::MakeShape(F32, {2});
-  b.AddInstruction(HloInstruction::CreateBinary(
-      shape, HloOpcode::kDot, lhs_instruction, rhs_instruction));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction,
+                                             rhs_instruction, dot_dnums));
   auto computation = module().AddEntryComputation(b.Build());
 
   std::unique_ptr<Literal> result =
@@ -705,8 +711,11 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank2) {
       b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal)));
 
   Shape shape = ShapeUtil::MakeShape(F32, {4, 2});
-  b.AddInstruction(HloInstruction::CreateBinary(
-      shape, HloOpcode::kDot, lhs_instruction, rhs_instruction));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction,
+                                             rhs_instruction, dot_dnums));
   auto computation = module().AddEntryComputation(b.Build());
 
   std::unique_ptr<Literal> result =
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index c30c432654..b4bac18bcd 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -118,6 +118,10 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
         MakeUnique<ConvolutionDimensionNumbers>(
             proto.convolution_dimension_numbers());
   }
+  if (proto.has_dot_dimension_numbers()) {
+    instruction->dot_dimension_numbers_ =
+        MakeUnique<DotDimensionNumbers>(proto.dot_dimension_numbers());
+  }
   for (const HloInstructionProto::SliceDimensions& slice_dimensions :
        proto.slice_dimensions()) {
     instruction->slice_starts_.push_back(slice_dimensions.start());
@@ -332,6 +336,17 @@ HloInstruction::CreateGetTupleElement(const Shape& shape,
   return instruction;
 }
 
+/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateDot(
+    const Shape& shape, HloInstruction* lhs, HloInstruction* rhs,
+    const DotDimensionNumbers& dimension_numbers) {
+  auto instruction = WrapUnique(new HloInstruction(HloOpcode::kDot, shape));
+  instruction->AppendOperand(lhs);
+  instruction->AppendOperand(rhs);
+  instruction->dot_dimension_numbers_ =
+      MakeUnique<DotDimensionNumbers>(dimension_numbers);
+  return instruction;
+}
+
 /* static */ std::unique_ptr<HloInstruction>
 HloInstruction::CreateReducePrecision(const Shape& shape,
                                       HloInstruction* operand,
@@ -1086,7 +1101,6 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
     case HloOpcode::kLe:
     case HloOpcode::kLt:
     case HloOpcode::kNe:
-    case HloOpcode::kDot:
     case HloOpcode::kMaximum:
     case HloOpcode::kMinimum:
     case HloOpcode::kPower:
@@ -1138,6 +1152,11 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
       clone = CreateConvolve(shape, new_operands[0], new_operands[1], *window_,
                              *convolution_dimension_numbers_);
       break;
+    case HloOpcode::kDot:
+      CHECK_EQ(new_operands.size(), 2);
+      clone = CreateDot(shape, new_operands[0], new_operands[1],
+                        *dot_dimension_numbers_);
+      break;
     case HloOpcode::kCrossReplicaSum:
       CHECK_EQ(new_operands.size(), 1);
       clone = CreateCrossReplicaSum(shape, new_operands[0]);
@@ -1509,7 +1528,6 @@ bool HloInstruction::IdenticalSlowPath(
     case HloOpcode::kCos:
     case HloOpcode::kCrossReplicaSum:
     case HloOpcode::kDivide:
-    case HloOpcode::kDot:
     case HloOpcode::kEq:
     case HloOpcode::kExp:
     case HloOpcode::kFloor:
@@ -1582,6 +1600,10 @@ bool HloInstruction::IdenticalSlowPath(
              protobuf_util::ProtobufEquals(
                  convolution_dimension_numbers(),
                  other.convolution_dimension_numbers());
+    // Check dot dimension numbers.
+    case HloOpcode::kDot:
+      return protobuf_util::ProtobufEquals(dot_dimension_numbers(),
+                                           other.dot_dimension_numbers());
 
     // Reduction results are determined by the reduction dimension and the
     // reduction computation.
@@ -1990,6 +2012,9 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
   if (convolution_dimension_numbers_ != nullptr) {
     extra.push_back(ConvolutionDimensionNumbersToString());
   }
+  if (dot_dimension_numbers_ != nullptr) {
+    extra.push_back(DotDimensionNumbersToString());
+  }
 
   if (opcode() == HloOpcode::kWhile) {
     extra.push_back(StrCat("condition=%", while_condition()->name()));
@@ -2086,6 +2111,9 @@ HloInstructionProto HloInstruction::ToProto() const {
     *proto.mutable_convolution_dimension_numbers() =
         *convolution_dimension_numbers_;
   }
+  if (dot_dimension_numbers_ != nullptr) {
+    *proto.mutable_dot_dimension_numbers() = *dot_dimension_numbers_;
+  }
   for (int i = 0; i < slice_starts_.size(); ++i) {
     auto* slice_dimension = proto.add_slice_dimensions();
     slice_dimension->set_start(slice_starts_[i]);
@@ -3051,6 +3079,30 @@ string HloInstruction::ConvolutionDimensionNumbersToString() const {
   return result;
 }
 
+string HloInstruction::DotDimensionNumbersToString() const {
+  string result;
+  if (dot_dimension_numbers_ == nullptr) {
+    return result;
+  }
+  const DotDimensionNumbers& dnums = *dot_dimension_numbers_;
+  if (!dnums.lhs_batch_dimensions().empty()) {
+    result += "lhs_batch_dims=";
+    StrAppend(&result, Join(dnums.lhs_batch_dimensions(), ","));
+  }
+  result += "lhs_contracting_dims=";
+  StrAppend(&result, Join(dnums.lhs_contracting_dimensions(), ","));
+
+  result += ",";
+  if (!dnums.rhs_batch_dimensions().empty()) {
+    result += "rhs_batch_dims=";
+    StrAppend(&result, Join(dnums.rhs_batch_dimensions(), ","));
+  }
+  result += "rhs_contracting_dims=";
+  StrAppend(&result, Join(dnums.rhs_contracting_dimensions(), ","));
+
+  return result;
+}
+
 bool HloInstruction::CouldBeBitcast() const {
   switch (opcode_) {
     case HloOpcode::kTranspose:
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index cda8b07c61..768c027a42 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -160,6 +160,12 @@ class HloInstruction {
       const Window& window,
       const ConvolutionDimensionNumbers& dimension_numbers);
 
+  // Creates a dot op with operands 'lhs' and 'rhs' with contracting and batch
+  // dimensions specified in 'dimension_numbers'.
+  static std::unique_ptr<HloInstruction> CreateDot(
+      const Shape& shape, HloInstruction* lhs, HloInstruction* rhs,
+      const DotDimensionNumbers& dimension_numbers);
+
   // Creates a reduce-precision op, where operand is the data to reduce in
   // precision, and exponent_bits and mantissa_bits describe the precision to
   // reduce it to.
@@ -915,6 +921,15 @@ class HloInstruction {
   // Returns the dump string of the convolution dimension numbers.
   string ConvolutionDimensionNumbersToString() const;
 
+  // Returns data on the dimension numbers used for a dot operation.
+  const DotDimensionNumbers& dot_dimension_numbers() const {
+    CHECK(dot_dimension_numbers_ != nullptr);
+    return *dot_dimension_numbers_;
+  }
+
+  // Returns the dump string of the dot dimension numbers.
+  string DotDimensionNumbersToString() const;
+
   // Returns the random distribution for this rng node.
   //
   // Precondition: opcode() == HloOpcode::kRng
@@ -1173,6 +1188,9 @@ class HloInstruction {
   // Describes the dimension numbers used for a convolution.
   std::unique_ptr<ConvolutionDimensionNumbers> convolution_dimension_numbers_;
 
+  // Describes the dimension numbers used for a dot.
+  std::unique_ptr<DotDimensionNumbers> dot_dimension_numbers_;
+
   // Describes the [begin, end) index range for a slice.
   std::vector<int64> slice_starts_;
   std::vector<int64> slice_limits_;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index 76b12fc8d3..11420cae63 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -1068,8 +1068,11 @@ TEST_F(HloInstructionTest, CloneOfFusionPreservesShape) {
       builder.AddInstruction(HloInstruction::CreateParameter(1, s2, "y"));
   HloInstruction* reshape =
       builder.AddInstruction(HloInstruction::CreateTranspose(s2t, y, {1, 0}));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   HloInstruction* dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(sout, HloOpcode::kDot, x, reshape));
+      HloInstruction::CreateDot(sout, x, reshape, dot_dnums));
 
   HloModule module(TestName());
   auto* computation = module.AddEntryComputation(builder.Build());
@@ -1182,12 +1185,15 @@ TEST_F(HloInstructionTest, Stringification) {
       builder.AddInstruction(HloInstruction::CreateParameter(1, s2, "y"));
   HloInstruction* reshape =
       builder.AddInstruction(HloInstruction::CreateTranspose(s2t, y, {1, 0}));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   HloInstruction* dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(sout, HloOpcode::kDot, x, reshape));
+      HloInstruction::CreateDot(sout, x, reshape, dot_dnums));
 
   EXPECT_EQ(dot->ToString(false, false),
             "%dot = f32[5,20]{1,0} dot(f32[5,10]{1,0} %x, f32[10,20]{1,0} "
-            "%transpose)");
+            "%transpose), lhs_contracting_dims=1,rhs_contracting_dims=0");
 
   HloModule module(TestName());
   auto* computation = module.AddEntryComputation(builder.Build());
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 2c09d2defb..ea7775b18a 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -75,7 +75,11 @@ class ShapeVerifier : public DfsHloVisitor {
   }
 
   Status HandleDot(HloInstruction* dot) override {
-    return CheckBinaryShape(dot);
+    TF_ASSIGN_OR_RETURN(const Shape expected,
+                        ShapeInference::InferDotOpShape(
+                            dot->operand(0)->shape(), dot->operand(1)->shape(),
+                            dot->dot_dimension_numbers()));
+    return CheckShape(dot, expected);
   }
 
   Status HandleConvolution(HloInstruction* convolution) override {
diff --git a/tensorflow/compiler/xla/service/liveness_util_test.cc b/tensorflow/compiler/xla/service/liveness_util_test.cc
index 476e86fa72..2c2a02f637 100644
--- a/tensorflow/compiler/xla/service/liveness_util_test.cc
+++ b/tensorflow/compiler/xla/service/liveness_util_test.cc
@@ -277,8 +277,11 @@ TEST_F(CanShareOperandBufferWithUserTest, FusedDotAdd) {
   auto b = builder.AddInstruction(HloInstruction::CreateConstant(
       Literal::CreateR2<float>({{2.0, 2.0}, {2.0, 2.0}})));
 
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(data_shape, HloOpcode::kDot, a, b));
+      HloInstruction::CreateDot(data_shape, a, b, dot_dnums));
 
   auto one = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
@@ -312,8 +315,11 @@ TEST_F(CanShareOperandBufferWithUserTest, FusedTransposeDotAdd) {
   auto b_t = builder.AddInstruction(
       HloInstruction::CreateTranspose(data_shape, b, {1, 0}));
 
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(data_shape, HloOpcode::kDot, a, b_t));
+      HloInstruction::CreateDot(data_shape, a, b_t, dot_dnums));
 
   auto one = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index d997cab83f..fa62080be4 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -1381,6 +1381,9 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) {
       handle_status =
           computation->AddCustomCallInstruction(arg->custom_call_request());
       break;
+    case OpRequest::kDotRequest:
+      handle_status = computation->AddDotInstruction(arg->dot_request());
+      break;
     case OpRequest::kDynamicSliceRequest:
       handle_status =
           computation->AddDynamicSliceInstruction(arg->dynamic_slice_request());
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 3df1911d07..7178eb40dd 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -90,8 +91,6 @@ BinaryOperation OpcodeToBinaryOperation(HloOpcode opcode) {
       return BINOP_ATAN2;
     case HloOpcode::kComplex:
       return BINOP_COMPLEX;
-    case HloOpcode::kDot:
-      return BINOP_DOT;
     case HloOpcode::kMultiply:
       return BINOP_MUL;
     case HloOpcode::kAdd:
@@ -549,8 +548,98 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
   return ShapeUtil::MakeShape(operand_shape.element_type(), dimensions);
 }
 
-/* static */ StatusOr<Shape> ShapeInference::InferDotOpShape(const Shape& lhs,
-                                                             const Shape& rhs) {
+// Current DotDimensionNumbers Requirements:
+//
+// Contracting Dimensions:
+// *) Exactly one contracting dimension on both lhs and rhs.
+// *) Contracting dimension size must be the same on both lhs and rhs.
+// *) Contracting dimension numbers do not need to be the same (i.e. transposes
+//    are passed on to emitter implementations).
+//
+// Batch Dimensions:
+// *) Same number of batch dimensions on both lhs and rhs.
+// *) Same batch dimension numbers (and sizes) on both lhs and rhs.
+//
+// Non-Contracting-Non-Batch Dimensions:
+// *) Can be 0 (matrix-vector) or 1 (matrix-matrix).
+//
+
+namespace {
+
+Status ValidateDotDimensionNumbers(
+    const Shape& lhs, const Shape& rhs,
+    const DotDimensionNumbers& dimension_numbers) {
+  // Check that dimension numbers are in range.
+  auto dims_in_range =
+      [](const int64 rank, tensorflow::gtl::ArraySlice<int64> contracting_dims,
+         tensorflow::gtl::ArraySlice<int64> batch_dims) -> bool {
+    auto in_range = [&rank](int64 i) -> bool { return 0 <= i && i < rank; };
+    return std::all_of(contracting_dims.begin(), contracting_dims.end(),
+                       in_range) &&
+           std::all_of(batch_dims.begin(), batch_dims.end(), in_range);
+  };
+
+  tensorflow::gtl::ArraySlice<int64> lhs_contracting_dimensions =
+      AsInt64Slice(dimension_numbers.lhs_contracting_dimensions());
+  tensorflow::gtl::ArraySlice<int64> rhs_contracting_dimensions =
+      AsInt64Slice(dimension_numbers.rhs_contracting_dimensions());
+  tensorflow::gtl::ArraySlice<int64> lhs_batch_dimensions =
+      AsInt64Slice(dimension_numbers.lhs_batch_dimensions());
+  tensorflow::gtl::ArraySlice<int64> rhs_batch_dimensions =
+      AsInt64Slice(dimension_numbers.rhs_batch_dimensions());
+
+  if (!dims_in_range(ShapeUtil::Rank(lhs), lhs_contracting_dimensions,
+                     lhs_batch_dimensions) ||
+      !dims_in_range(ShapeUtil::Rank(rhs), rhs_contracting_dimensions,
+                     rhs_batch_dimensions)) {
+    return InvalidArgument("A dimension number is out of range in dot: %s",
+                           dimension_numbers.DebugString().c_str());
+  }
+
+  // Check that dimension numbers are unique.
+  auto dims_unique = [](tensorflow::gtl::ArraySlice<int64> contracting_dims,
+                        tensorflow::gtl::ArraySlice<int64> batch_dims) -> bool {
+    tensorflow::gtl::FlatSet<int64> dim_set;
+    auto is_unique = [&dim_set](int64 i) -> bool {
+      return dim_set.insert(i).second;
+    };
+    return std::all_of(contracting_dims.begin(), contracting_dims.end(),
+                       is_unique) &&
+           std::all_of(batch_dims.begin(), batch_dims.end(), is_unique);
+  };
+
+  if (!dims_unique(lhs_contracting_dimensions, lhs_batch_dimensions) ||
+      !dims_unique(rhs_contracting_dimensions, rhs_batch_dimensions)) {
+    return InvalidArgument("A dimension number is not unique in dot: %s",
+                           dimension_numbers.DebugString().c_str());
+  }
+
+  // Check that the count of non-contracting-non-batch dimensions is in {0, 1}.
+  const int64 lhs_non_contracting_non_batch_dims =
+      ShapeUtil::Rank(lhs) -
+      dimension_numbers.lhs_contracting_dimensions_size() -
+      dimension_numbers.lhs_batch_dimensions_size();
+  const int64 rhs_non_contracting_non_batch_dims =
+      ShapeUtil::Rank(rhs) -
+      dimension_numbers.rhs_contracting_dimensions_size() -
+      dimension_numbers.rhs_batch_dimensions_size();
+  if (lhs_non_contracting_non_batch_dims < 0 ||
+      lhs_non_contracting_non_batch_dims > 1 ||
+      rhs_non_contracting_non_batch_dims < 0 ||
+      rhs_non_contracting_non_batch_dims > 1) {
+    return InvalidArgument(
+        "batch and contracting dimension number mismatch "
+        "with rank ");
+  }
+
+  return Status::OK();
+}
+
+}  // namespace
+
+/* static */ StatusOr<Shape> ShapeInference::InferDotOpShape(
+    const Shape& lhs, const Shape& rhs,
+    const DotDimensionNumbers& dimension_numbers) {
   TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(lhs, "lhs of dot"));
   TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(rhs, "rhs of dot"));
 
@@ -570,37 +659,62 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
     return fail("element types do not match");
   }
 
-  if (ShapeUtil::Rank(lhs) < 1 || ShapeUtil::Rank(lhs) > 2 ||
-      ShapeUtil::Rank(rhs) < 1 || ShapeUtil::Rank(rhs) > 2) {
-    return fail("dot only supports rank 1 or 2");
+  if ((ShapeUtil::Rank(lhs) < 1) || (ShapeUtil::Rank(rhs) < 1)) {
+    return fail("dot only supports rank 1 or above.");
   }
 
-  // Determine the index of the contracted dimensions for input tensors.
-  // dimensions -1 of lhs and dimension 0 of rhs are contracted.
-  int64 lhs_contracted_dimension = ShapeUtil::GetDimensionNumber(lhs, -1);
-  int64 rhs_contracted_dimension = 0;
+  // Validate basic properties of dot dimension numbers.
+  TF_RETURN_IF_ERROR(ValidateDotDimensionNumbers(lhs, rhs, dimension_numbers));
+
+  // Check that there is only one contracting dimension for both lhs and rhs.
+  if (dimension_numbers.lhs_contracting_dimensions_size() !=
+          dimension_numbers.rhs_contracting_dimensions_size() ||
+      dimension_numbers.lhs_contracting_dimensions_size() != 1) {
+    return fail("must specify one contracting dimension for both lhs and rhs.");
+  }
 
-  // Check if the contracted dimension sizes are the same.
-  if ((lhs_contracted_dimension < ShapeUtil::Rank(lhs) &&
-       rhs_contracted_dimension < ShapeUtil::Rank(rhs)) &&
-      lhs.dimensions(lhs_contracted_dimension) !=
-          rhs.dimensions(rhs_contracted_dimension)) {
-    return fail("contracted dimensions mismatch");
+  // Check that contracting dimension sizes match.
+  const int64 lhs_contracting_dimension =
+      dimension_numbers.lhs_contracting_dimensions(0);
+  const int64 rhs_contracting_dimension =
+      dimension_numbers.rhs_contracting_dimensions(0);
+  if (lhs.dimensions(lhs_contracting_dimension) !=
+      rhs.dimensions(rhs_contracting_dimension)) {
+    return fail("contracting dimension sizes do not match.");
+  }
+
+  // Check that number of batch dimensions match.
+  if (dimension_numbers.lhs_batch_dimensions_size() !=
+      dimension_numbers.rhs_batch_dimensions_size()) {
+    return fail("must the same number of batch dimensions for lhs and rhs.");
+  }
+
+  // Check that batch dimension numbers and sizes match.
+  for (int64 i = 0; i < dimension_numbers.lhs_batch_dimensions_size(); ++i) {
+    if (dimension_numbers.lhs_batch_dimensions(i) !=
+            dimension_numbers.rhs_batch_dimensions(i) ||
+        lhs.dimensions(dimension_numbers.lhs_batch_dimensions(i)) !=
+            rhs.dimensions(dimension_numbers.rhs_batch_dimensions(i))) {
+      return fail("batch dimension numbers and sizes must match for lhs/rhs.");
+    }
   }
 
   // The ranks of lhs and rhs are decremented by 1 respectively due to the
   // contraction, and added for the rank of the result. When an input tensor is
   // a scalar, its contribution to the rank of the result is 0.
   // Generate the result dimensions in order, rhs dimensions followed by lhs
-  // dimensions except the contracted dimensions.
+  // dimensions except the contracted and batch dimensions.
   std::vector<int64> dimensions;
+  std::unordered_set<int64> rhs_batch_dims(
+      dimension_numbers.rhs_batch_dimensions().begin(),
+      dimension_numbers.rhs_batch_dimensions().end());
   for (int64 i = 0; i < ShapeUtil::Rank(lhs); i++) {
-    if (i != lhs_contracted_dimension) {
+    if (i != lhs_contracting_dimension) {
       dimensions.push_back(lhs.dimensions(i));
     }
   }
   for (int64 i = 0; i < ShapeUtil::Rank(rhs); i++) {
-    if (i != rhs_contracted_dimension) {
+    if (i != rhs_contracting_dimension && rhs_batch_dims.count(i) == 0) {
       dimensions.push_back(rhs.dimensions(i));
     }
   }
@@ -816,8 +930,6 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
       rhs, tensorflow::strings::StrCat("rhs of binary operation ",
                                        BinaryOperation_Name(operation))));
   switch (operation) {
-    case BINOP_DOT:
-      return InferDotOpShape(lhs, rhs);
     case BINOP_MAX:
     case BINOP_MIN:
     case BINOP_SUB:
diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h
index 0aadb98a40..382c4f8abc 100644
--- a/tensorflow/compiler/xla/service/shape_inference.h
+++ b/tensorflow/compiler/xla/service/shape_inference.h
@@ -229,11 +229,13 @@ class ShapeInference {
       tensorflow::gtl::ArraySlice<const Shape*> arg_shapes,
       const ProgramShape& to_apply);
 
- private:
   // Helper that infers the shape produced by performing a dot operation with
   // the given LHS and RHS shapes.
-  static StatusOr<Shape> InferDotOpShape(const Shape& lhs, const Shape& rhs);
+  static StatusOr<Shape> InferDotOpShape(
+      const Shape& lhs, const Shape& rhs,
+      const DotDimensionNumbers& dimension_numbers);
 
+ private:
   // Helper that infers the shape produced by performing an element-wise binary
   // operation with the given LHS and RHS shapes.
   // Note: By "element-wise" we mean operations that look at a single element in
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index be93c879c0..6e53d2d609 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -898,8 +898,11 @@ TEST_F(ShapeInferenceTest, BroadcastScalar) {
 
 // scalar <dot> vector: error
 TEST_F(ShapeInferenceTest, ScalarDotVector) {
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto inferred_status =
-      ShapeInference::InferBinaryOpShape(BINOP_DOT, f32_, vector_32_, {});
+      ShapeInference::InferDotOpShape(f32_, vector_32_, dot_dnums);
   ASSERT_FALSE(inferred_status.ok());
   ASSERT_THAT(inferred_status.status().error_message(),
               HasSubstr("dot only supports rank"));
@@ -907,61 +910,199 @@ TEST_F(ShapeInferenceTest, ScalarDotVector) {
 
 // 3D <dot> 2D: error
 TEST_F(ShapeInferenceTest, DotWithRankHigherThanTwo) {
-  auto inferred_status = ShapeInference::InferBinaryOpShape(
-      BINOP_DOT, ShapeUtil::MakeShape(F32, {32, 32, 32}), matrix_32_64_, {});
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  auto inferred_status = ShapeInference::InferDotOpShape(
+      ShapeUtil::MakeShape(F32, {32, 32, 32}), matrix_32_64_, dot_dnums);
   ASSERT_FALSE(inferred_status.ok());
   ASSERT_THAT(inferred_status.status().error_message(),
-              HasSubstr("dot only supports rank"));
+              HasSubstr("batch and contracting dimension number mismatch"));
 }
 
 // vector <dot> vector -> scalar
 TEST_F(ShapeInferenceTest, VectorDotVector) {
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto inferred_status =
-      ShapeInference::InferBinaryOpShape(BINOP_DOT, vector_64_, vector_64_, {});
+      ShapeInference::InferDotOpShape(vector_64_, vector_64_, dot_dnums);
   ASSERT_IS_OK(inferred_status.status());
   ASSERT_TRUE(ShapeUtil::Equal(f32_, inferred_status.ValueOrDie()));
   auto inferred_status_mismatch =
-      ShapeInference::InferBinaryOpShape(BINOP_DOT, vector_64_, vector_32_, {});
+      ShapeInference::InferDotOpShape(vector_64_, vector_32_, dot_dnums);
   ASSERT_FALSE(inferred_status_mismatch.ok());
 }
 
 // matrix <dot> vector -> vector
 TEST_F(ShapeInferenceTest, MatrixDotVector) {
-  auto inferred_status = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, matrix_32_64_, vector_64_, {});
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(matrix_32_64_, vector_64_, dot_dnums);
   ASSERT_IS_OK(inferred_status.status());
   ASSERT_TRUE(ShapeUtil::Equal(inferred_status.ValueOrDie(), vector_32_));
-  auto inferred_status_mismatch = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, matrix_32_64_, vector_32_, {});
+  auto inferred_status_mismatch =
+      ShapeInference::InferDotOpShape(matrix_32_64_, vector_32_, dot_dnums);
   ASSERT_FALSE(inferred_status_mismatch.ok());
 }
 
 // vector <dot> matrix -> vector
 TEST_F(ShapeInferenceTest, VectorDotMatrix) {
-  auto inferred_status = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, vector_32_, matrix_32_64_, {});
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(vector_32_, matrix_32_64_, dot_dnums);
   ASSERT_IS_OK(inferred_status.status());
   ASSERT_TRUE(ShapeUtil::Equal(inferred_status.ValueOrDie(), vector_64_));
-  auto inferred_status_mismatch = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, vector_64_, matrix_32_64_, {});
+  auto inferred_status_mismatch =
+      ShapeInference::InferDotOpShape(vector_64_, matrix_32_64_, dot_dnums);
   ASSERT_FALSE(inferred_status_mismatch.ok());
 }
 
 // matrix <dot> matrix -> matrix
 TEST_F(ShapeInferenceTest, MatrixDotMatrix) {
-  auto inferred_status_match = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, matrix_32_64_, matrix_64_48_, {});
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  auto inferred_status_match =
+      ShapeInference::InferDotOpShape(matrix_32_64_, matrix_64_48_, dot_dnums);
   ASSERT_IS_OK(inferred_status_match.status());
   ASSERT_TRUE(
       ShapeUtil::Equal(inferred_status_match.ValueOrDie(), matrix_32_48_))
       << "inferred: "
       << ShapeUtil::HumanString(inferred_status_match.ValueOrDie())
       << " expected: " << ShapeUtil::HumanString(matrix_64_48_);
-  auto inferred_status_mismatch = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, matrix_32_64_, matrix_32_64_, {});
+  auto inferred_status_mismatch =
+      ShapeInference::InferDotOpShape(matrix_32_64_, matrix_32_64_, dot_dnums);
   ASSERT_FALSE(inferred_status_mismatch.ok());
 }
 
+// BatchMatMul with two batch dimensions and one contracting dimension.
+TEST_F(ShapeInferenceTest, DotGeneral) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {5, 2, 11, 3});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {5, 2, 3, 14});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {5, 2, 11, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(3);
+  dot_dnums.add_lhs_batch_dimensions(0);
+  dot_dnums.add_lhs_batch_dimensions(1);
+
+  dot_dnums.add_rhs_contracting_dimensions(2);
+  dot_dnums.add_rhs_batch_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(1);
+
+  auto inferred_status_match =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_IS_OK(inferred_status_match.status());
+  ASSERT_TRUE(
+      ShapeUtil::Equal(inferred_status_match.ValueOrDie(), output_shape))
+      << "inferred: "
+      << ShapeUtil::HumanString(inferred_status_match.ValueOrDie())
+      << " expected: " << ShapeUtil::HumanString(output_shape);
+}
+
+// BatchMatMul with two contracting dimensions fails.
+TEST_F(ShapeInferenceTest, DotWithTwoContractingDimsFails) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3, 2});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 3, 14});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {2, 11, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(2);
+  dot_dnums.add_lhs_contracting_dimensions(3);
+  dot_dnums.add_lhs_batch_dimensions(0);
+
+  dot_dnums.add_rhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_batch_dimensions(0);
+
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_FALSE(inferred_status.ok());
+  ASSERT_THAT(inferred_status.status().error_message(),
+              HasSubstr("must specify one contracting dimension for both "
+                        "lhs and rhs"));
+}
+
+// BatchMatMul with different batch dimension sizes fails.
+TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimSizesFails) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {3, 3, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(2);
+  dot_dnums.add_lhs_batch_dimensions(0);
+
+  dot_dnums.add_rhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_batch_dimensions(0);
+
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_FALSE(inferred_status.ok());
+  ASSERT_THAT(inferred_status.status().error_message(),
+              HasSubstr("batch dimension numbers and sizes must match"));
+}
+
+// BatchMatMul with different batch dimension numbers fails.
+TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimNumbersFails) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {3, 2, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(2);
+  dot_dnums.add_lhs_batch_dimensions(0);
+
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(1);
+
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_FALSE(inferred_status.ok());
+  ASSERT_THAT(inferred_status.status().error_message(),
+              HasSubstr("batch dimension numbers and sizes must match"));
+}
+
+// BatchMatMul with out-of-range dimension numbers fails.
+TEST_F(ShapeInferenceTest, DotWithContractingDimNumberOutOfRange) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 3, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(3);
+  dot_dnums.add_lhs_batch_dimensions(0);
+
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(1);
+
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_FALSE(inferred_status.ok());
+  ASSERT_THAT(inferred_status.status().error_message(),
+              HasSubstr("A dimension number is out of range"));
+}
+
+// BatchMatMul with non-unique dimension numbers fails.
+TEST_F(ShapeInferenceTest, DotWithContractingNonUniqueDimNumber) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 3, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(0);
+  dot_dnums.add_lhs_batch_dimensions(0);
+
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(1);
+
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_FALSE(inferred_status.ok());
+  ASSERT_THAT(inferred_status.status().error_message(),
+              HasSubstr("A dimension number is not unique"));
+}
+
 TEST_F(ShapeInferenceTest, BinOpBroadcastMatrixVector) {
   // Test variations of broadcasting a vector for a binary add with a
   // matrix.
diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc
index ba99852905..caa1a111ad 100644
--- a/tensorflow/compiler/xla/service/transpose_folding_test.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc
@@ -64,9 +64,12 @@ TEST_F(TransposeFoldingTest, FoldDotTranspose) {
   HloInstruction* transpose_y =
       builder.AddInstruction(HloInstruction::CreateTranspose(
           ShapeUtil::MakeShape(F32, {3, 2}), y, {1, 0}));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {2, 2}), /*opcode=*/HloOpcode::kDot,
-      /*lhs=*/x, /*rhs=*/transpose_y));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  HloInstruction* dot = builder.AddInstruction(
+      HloInstruction::CreateDot(ShapeUtil::MakeShape(F32, {2, 2}), /*lhs=*/x,
+                                /*rhs=*/transpose_y, dot_dnums));
 
   HloModule module("test_module");
   HloComputation* entry_computation =
@@ -104,9 +107,12 @@ TEST_F(TransposeFoldingTest, FoldDotTransposeConstant) {
   HloInstruction* transpose1 =
       builder.AddInstruction(HloInstruction::CreateTranspose(
           ShapeUtil::MakeShape(F32, {2, 3}), const1, {1, 0}));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1, 3}), /*opcode=*/HloOpcode::kDot,
-      /*lhs=*/transpose0, /*rhs=*/transpose1));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateDot(
+      ShapeUtil::MakeShape(F32, {1, 3}),
+      /*lhs=*/transpose0, /*rhs=*/transpose1, dot_dnums));
 
   HloModule module("test_module");
   HloComputation* entry_computation =
@@ -169,9 +175,12 @@ TEST_F(TransposeFoldingTest, FoldDotTransposeInWhile) {
   HloInstruction* transpose_y =
       builder.AddInstruction(HloInstruction::CreateTranspose(
           ShapeUtil::MakeShape(F32, {3, 2}), y, {1, 0}));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {2, 2}), /*opcode=*/HloOpcode::kDot,
-      /*lhs=*/x, /*rhs=*/transpose_y));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  HloInstruction* dot = builder.AddInstruction(
+      HloInstruction::CreateDot(ShapeUtil::MakeShape(F32, {2, 2}), /*lhs=*/x,
+                                /*rhs=*/transpose_y, dot_dnums));
 
   HloModule module("test_module");
   HloComputation* entry_computation =
diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index 4e90491b55..6d0d367981 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -88,8 +88,6 @@ HloOpcode BinaryOperationToHloOpcode(BinaryOperation binop) {
       return HloOpcode::kAtan2;
     case BINOP_COMPLEX:
       return HloOpcode::kComplex;
-    case BINOP_DOT:
-      return HloOpcode::kDot;
     case BINOP_MUL:
       return HloOpcode::kMultiply;
     case BINOP_ADD:
@@ -1207,6 +1205,33 @@ StatusOr<ComputationDataHandle> UserComputation::AddCustomCallInstruction(
   return handle;
 }
 
+StatusOr<ComputationDataHandle> UserComputation::AddDotInstruction(
+    const DotRequest& dot_request) {
+  tensorflow::mutex_lock lock(mutex_);
+
+  TF_ASSIGN_OR_RETURN(const OperationRequest* lhs,
+                      LookUpRequest(dot_request.lhs()));
+  TF_ASSIGN_OR_RETURN(const OperationRequest* rhs,
+                      LookUpRequest(dot_request.rhs()));
+
+  TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferDotOpShape(
+                                       lhs->output_shape(), rhs->output_shape(),
+                                       dot_request.dimension_numbers()));
+
+  const ComputationDataHandle handle = CreateComputationDataHandle();
+
+  OperationRequest& request =
+      (*session_computation_.mutable_requests())[handle.handle()];
+  *request.mutable_output_handle() = handle;
+  *request.mutable_output_shape() = shape;
+  *request.mutable_request()->mutable_dot_request() = dot_request;
+
+  VLOG(1) << "AddDotInstruction (" << GetVersionedHandleInternal()
+          << "), data handle " << handle.handle() << ": "
+          << dot_request.ShortDebugString();
+  return handle;
+}
+
 StatusOr<ComputationDataHandle> UserComputation::AddUnaryInstruction(
     const UnaryOpRequest& unary_request) {
   tensorflow::mutex_lock lock(mutex_);
@@ -1629,6 +1654,15 @@ void PureFunctionalVisitor(const SessionComputation& session_computation,
       break;
     }
 
+    case OpRequest::kDotRequest: {
+      const DotRequest& dot_request = request.request().dot_request();
+      PureFunctionalVisitor(session_computation, dot_request.lhs(),
+                            num_parameters, visited, is_functional);
+      PureFunctionalVisitor(session_computation, dot_request.rhs(),
+                            num_parameters, visited, is_functional);
+      break;
+    }
+
     case OpRequest::kSendRequest: {
       *is_functional = false;
       break;
@@ -2453,6 +2487,13 @@ static void ForEachOperand(
       break;
     }
 
+    case OpRequest::kDotRequest: {
+      const DotRequest& dot_request = request.request().dot_request();
+      apply(dot_request.rhs());
+      apply(dot_request.lhs());
+      break;
+    }
+
     case OpRequest::kUnaryOpRequest: {
       const UnaryOpRequest& unary_op_request =
           request.request().unary_op_request();
@@ -2732,6 +2773,15 @@ void ComputationLowerer::Visit(
       break;
     }
 
+    case OpRequest::kDotRequest: {
+      const DotRequest& dot_request = request.request().dot_request();
+      HloInstruction* lhs = lookup_instruction(dot_request.lhs());
+      HloInstruction* rhs = lookup_instruction(dot_request.rhs());
+      hlo_instruction = add_instruction(HloInstruction::CreateDot(
+          request.output_shape(), lhs, rhs, dot_request.dimension_numbers()));
+      break;
+    }
+
     case OpRequest::kCrossReplicaSumRequest: {
       const CrossReplicaSumRequest& cross_replica_sum_request =
           request.request().cross_replica_sum_request();
@@ -3151,8 +3201,7 @@ void ComputationLowerer::Visit(
         lhs = (lhs == operand_to_broadcast) ? broadcasted_operand : lhs;
         rhs = (rhs == operand_to_broadcast) ? broadcasted_operand : rhs;
       }
-      if (debug_options_.xla_eliminate_hlo_implicit_broadcast() &&
-          binary_op_request.binop() != BINOP_DOT) {
+      if (debug_options_.xla_eliminate_hlo_implicit_broadcast()) {
         if (!ShapeUtil::SameDimensions(request.output_shape(), lhs->shape())) {
           // lhs side is being implicitly broadcast. Change to explicit.
           lhs =
diff --git a/tensorflow/compiler/xla/service/user_computation.h b/tensorflow/compiler/xla/service/user_computation.h
index 317c631dca..b6686c3f1a 100644
--- a/tensorflow/compiler/xla/service/user_computation.h
+++ b/tensorflow/compiler/xla/service/user_computation.h
@@ -153,6 +153,10 @@ class UserComputation {
   StatusOr<ComputationDataHandle> AddCustomCallInstruction(
       const CustomCallRequest& custom_call_request);
 
+  // Enqueues a dot instruction onto this user computation.
+  StatusOr<ComputationDataHandle> AddDotInstruction(
+      const DotRequest& dot_request);
+
   // Enqueues a broadcast instruction onto this user computation.
   StatusOr<ComputationDataHandle> AddBroadcastInstruction(
       const BroadcastRequest& broadcast_request);
diff --git a/tensorflow/compiler/xla/service/user_computation_test.cc b/tensorflow/compiler/xla/service/user_computation_test.cc
index 5afaf226ae..e45673300b 100644
--- a/tensorflow/compiler/xla/service/user_computation_test.cc
+++ b/tensorflow/compiler/xla/service/user_computation_test.cc
@@ -334,50 +334,5 @@ TEST_F(UserComputationTest, EliminateDegenerateBroadcastAfterIndimBroadcast) {
               operands[1]->opcode() == HloOpcode::kBroadcast);
 }
 
-TEST_F(UserComputationTest, SkipDotInEliminatingImplicitBroadcast) {
-  auto debug_options = DebugOptions();
-  debug_options.set_xla_eliminate_hlo_implicit_broadcast(true);
-
-  //  %a = Param({1, 3});
-  //  %b = Param({3, 1});
-  //  %dot = Dot(%a, %b);
-  ComputationHandle handle;
-  handle.set_handle(123);
-  UserComputation computation("TheComputation", handle);
-
-  ParameterRequest a_request;
-  *a_request.mutable_shape() = ShapeUtil::MakeShape(F32, {1, 3});
-  a_request.set_name("a");
-  a_request.set_parameter(0);
-  TF_ASSERT_OK_AND_ASSIGN(ComputationDataHandle a_handle,
-                          computation.AddParameterInstruction(a_request));
-
-  ParameterRequest b_request;
-  *b_request.mutable_shape() = ShapeUtil::MakeShape(F32, {3, 1});
-  b_request.set_name("b");
-  b_request.set_parameter(1);
-  TF_ASSERT_OK_AND_ASSIGN(ComputationDataHandle b_handle,
-                          computation.AddParameterInstruction(b_request));
-
-  BinaryOpRequest dot;
-  dot.set_binop(BINOP_DOT);
-  *dot.mutable_lhs() = a_handle;
-  *dot.mutable_rhs() = b_handle;
-  TF_ASSERT_OK(computation.AddBinaryInstruction(dot).status());
-
-  auto hlo_resolver = [](const VersionedComputationHandle& handle) {
-    return nullptr;
-  };
-  VersionedComputationHandle latest_version = computation.GetVersionedHandle();
-
-  // Build the HLO computation.
-  TF_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<HloComputation> hlo_computation,
-      computation.BuildHloComputation(latest_version.version, hlo_resolver,
-                                      debug_options));
-
-  EXPECT_EQ(3, hlo_computation->instruction_count());
-}
-
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index bfb04fd9f9..680d790b57 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc
@@ -561,5 +561,25 @@ TEST_F(DotOperationTest, TransposeFolding) {
   }
 }
 
+XLA_TEST_F(DotOperationTest, DotGeneralUnimplemented) {
+  ComputationBuilder builder(client_, TestName());
+  auto lhs = builder.ConstantR3FromArray3D<float>(
+      {{{1.0, 2.0}, {3.0, 4.0}}, {{5.0, 6.0}, {7.0, 8.0}}});
+  auto rhs = builder.ConstantR3FromArray3D<float>(
+      {{{1.0, 0.0}, {0.0, 1.0}}, {{0.0, 1.0}, {1.0, 0.0}}});
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(2);
+  dot_dnums.add_rhs_contracting_dimensions(1);
+  dot_dnums.add_lhs_batch_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(0);
+  builder.DotGeneral(lhs, rhs, dot_dnums);
+
+  auto status = Execute(&builder, {}).status();
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(
+      status.error_message(),
+      ::testing::HasSubstr("Dot with batch dimensions not implemented."));
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
index 22d2b917a1..89fa6ed9f7 100644
--- a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
@@ -76,8 +76,11 @@ class MultiOutputFusionTest : public HloTestBase {
         elem_shape2, HloOpcode::kAdd, broadcast, param1));
     HloInstruction* sub = builder.AddInstruction(HloInstruction::CreateBinary(
         elem_shape2, HloOpcode::kSubtract, param1, broadcast));
+    DotDimensionNumbers dot_dnums;
+    dot_dnums.add_lhs_contracting_dimensions(1);
+    dot_dnums.add_rhs_contracting_dimensions(0);
     HloInstruction* dot = builder.AddInstruction(
-        HloInstruction::CreateBinary(elem_shape2, HloOpcode::kDot, sub, add2));
+        HloInstruction::CreateDot(elem_shape2, sub, add2, dot_dnums));
     auto computation = hlo_module->AddEntryComputation(builder.Build(dot));
 
     if (manual_fusion) {
@@ -133,8 +136,11 @@ class MultiOutputFusionTest : public HloTestBase {
     HloInstruction* reshape =
         builder.AddInstruction(HloInstruction::CreateReshape(
             ShapeUtil::MakeShape(F32, {size, 1}), add));
-    HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-        ShapeUtil::MakeShape(F32, {1}), HloOpcode::kDot, sub, reshape));
+    DotDimensionNumbers dot_dnums;
+    dot_dnums.add_lhs_contracting_dimensions(0);
+    dot_dnums.add_rhs_contracting_dimensions(0);
+    HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateDot(
+        ShapeUtil::MakeShape(F32, {1}), sub, reshape, dot_dnums));
     auto computation = hlo_module->AddEntryComputation(builder.Build(dot));
 
     if (manual_fusion) {
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index b560354050..7efdf8552e 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -498,6 +498,23 @@ message CustomCallRequest {
   Shape shape = 4;
 }
 
+message DotDimensionNumbers {
+  // The dimension numbers that represent the 'lhs' contracting dimensions.
+  repeated int64 lhs_contracting_dimensions = 1;
+  // The dimension numbers that represent the 'rhs' contracting dimensions.
+  repeated int64 rhs_contracting_dimensions = 2;
+  // The dimension numbers that represent the 'lhs' batch dimensions.
+  repeated int64 lhs_batch_dimensions = 3;
+  // The dimension numbers that represent the 'rhs' batch dimensions.
+  repeated int64 rhs_batch_dimensions = 4;
+};
+
+message DotRequest {
+  ComputationDataHandle lhs = 2;
+  ComputationDataHandle rhs = 3;
+  DotDimensionNumbers dimension_numbers = 4;
+}
+
 message MapRequest {
   repeated ComputationDataHandle operands = 2;
   ComputationHandle to_apply = 3;
@@ -732,9 +749,6 @@ enum BinaryOperation {
   BINOP_LT = 9;
   BINOP_NE = 10;
 
-  // Dot product, matrix multiply.
-  BINOP_DOT = 12;
-
   // Element-wise maximum.
   BINOP_MAX = 14;
 
@@ -885,6 +899,7 @@ message OpRequest {
     ConvolveRequest convolve_request = 8;
     CrossReplicaSumRequest cross_replica_sum_request = 9;
     CustomCallRequest custom_call_request = 10;
+    DotRequest dot_request = 43;
     DynamicSliceRequest dynamic_slice_request = 11;
     DynamicUpdateSliceRequest dynamic_update_slice_request = 12;
     GetTupleElementRequest get_tuple_element_request = 13;
@@ -914,7 +929,7 @@ message OpRequest {
     BatchNormInferenceRequest batch_norm_inference_request = 38;
     FftRequest fft_request = 41;
     ConvertRequest bitcast_convert_request = 42;
-    // Next: 43
+    // Next: 44
   }
 }
 
diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
index 8831b3d0fd..4333f94486 100644
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -511,6 +511,87 @@ contracted dimensions of `lhs` and `rhs` must be of the same size. In practice,
 it can be used to perform dot products between vectors, vector/matrix
 multiplications or matrix/matrix multiplications.
 
+## DotGeneral
+
+See also
+[`ComputationBuilder::DotGeneral`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+<b> `DotGeneral(lhs, rhs, dimension_numbers)` </b>
+
+| Arguments | Type                    | Semantics
+| --------- | ----------------------- | ---------------
+| `lhs`     | `ComputationDataHandle` | array of type T
+| `rhs`     | `ComputationDataHandle` | array of type T
+| `dimension_numbers` | `DotDimensionNumbers` | array of type T
+
+As Dot, but allows contracting and batch dimension numbers to be specified for
+both the 'lhs' and 'rhs'.
+
+| DotDimensionNumbers Fields | Type                    | Semantics
+| --------- | ----------------------- | ---------------
+| 'lhs_contracting_dimensions' | repeated int64 | 'lhs' contracting dimension numbers |
+| 'rhs_contracting_dimensions' | repeated int64 | 'rhs' contracting dimension numbers |
+| 'lhs_batch_dimensions' | repeated int64 | 'lhs' batch dimension numbers |
+| 'rhs_batch_dimensions' | repeated int64 | 'rhs' batch dimension numbers |
+
+DotGeneral performs the sum of products over contracting dimensions specified
+in 'dimension_numbers'.
+
+Associated contracting dimension numbers from the 'lhs' and 'rhs' do not need
+to be the same, but must be listed in the same order in both
+'lhs/rhs_contracting_dimensions' arrays and have the same dimension sizes.
+
+Example with contracting dimension numbers:
+
+```
+lhs = { {1.0, 2.0, 3.0},
+        {4.0, 5.0, 6.0} }
+
+rhs = { {1.0, 1.0, 1.0},
+        {2.0, 2.0, 2.0} }
+
+DotDimensionNumbers dnums;
+dnums.add_lhs_contracting_dimensions(1);
+dnums.add_rhs_contracting_dimensions(1);
+
+DotGeneral(lhs, rhs, dnums) -> { {6.0, 12.0},
+                                 {15.0, 30.0} }
+```
+
+Associated batch dimension numbers from the 'lhs' and 'rhs' must have the same
+dimension number, must be listed in the same order in both arrays, and must
+have the same dimension sizes.
+
+Example with batch dimension numbers (batch size 2, 2x2 matrices):
+
+```
+lhs = { { {1.0, 2.0},
+          {3.0, 4.0} },
+        { {5.0, 6.0},
+          {7.0, 8.0} } }
+
+rhs = { { {1.0, 0.0},
+          {0.0, 1.0} },
+        { {1.0, 0.0},
+          {0.0, 1.0} } }
+
+DotDimensionNumbers dnums;
+dnums.add_lhs_contracting_dimensions(2);
+dnums.add_rhs_contracting_dimensions(1);
+dnums.add_lhs_batch_dimensions(0);
+dnums.add_rhs_batch_dimensions(0);
+
+DotGeneral(lhs, rhs, dnums) -> { { {1.0, 2.0},
+                                   {3.0, 4.0} },
+                                 { {5.0, 6.0},
+                                   {7.0, 8.0} } }
+```
+
+| Input                               | Output            | Semantics        |
+| ----------------------------------- | ----------------- | ---------------- |
+| [b0, m, k] `dot` [b0, k, n]         | [b0, m, n]        |  batch matmul    |
+| [b0, b1, m, k] `dot` [b0, b1, k, n] | [b0, b1, m, n]    |  batch matmul    |
+
 ## Element-wise binary arithmetic operations
 
 See also
-- 
GitLab


From eafa8efc55fb9989a679e36b030742c6d87b0310 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 30 Nov 2017 11:23:25 -0800
Subject: [PATCH 0467/1225] [XLA:CPU] Add Hlo profiling support to
 XlaJitCompiledCpuFunction

Some of the functionality has bled into the generic XlaCompiledCpuFunction, but
there still remains a fair amount of work to do before the AOT side of things
start working.

This CL also fixes a bug I introduced in a previous CL -- when I changed
IrEmitter::hlo_to_profile_idx_ to a value, I changed the signature of the
generated function to always have the "profile_counters" argument when the AOT
client code expects the signature to not have that argument.  In practice this
wasn't an issue for the standard x86 calling convention, but it could easily
have been problematic on other architectures and calling conventions.  After this
change the mismatch is no longer present.
PiperOrigin-RevId: 177481998
---
 tensorflow/compiler/aot/codegen.cc            |  8 +--
 tensorflow/compiler/aot/codegen_test_h.golden |  8 +--
 .../compiler/aot/tests/tfcompile_test.cc      |  4 +-
 .../tf2xla/xla_compiled_cpu_function.cc       | 15 +++++-
 .../tf2xla/xla_compiled_cpu_function.h        | 53 ++++++++++++++-----
 .../tf2xla/xla_jit_compiled_cpu_function.cc   | 25 ++++-----
 .../compiler/xla/service/cpu/ir_function.cc   |  2 +-
 .../xla/service/hlo_execution_profile.cc      |  3 +-
 .../xla/service/hlo_profile_printer.h         |  8 ++-
 9 files changed, 82 insertions(+), 44 deletions(-)

diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc
index ae22f7edc4..28ac40df18 100644
--- a/tensorflow/compiler/aot/codegen.cc
+++ b/tensorflow/compiler/aot/codegen.cc
@@ -418,7 +418,7 @@ namespace xla { class ExecutableRunOptions; }
 // (Implementation detail) Entry point to the function in the object file.
 extern "C" void {{ENTRY}}(
     void* result, const xla::ExecutableRunOptions* run_options,
-    const void** args, void** temps);
+    const void** args, void** temps, tensorflow::int64* profile_counters);
 
 {{NS_START}}
 // {{CLASS}} represents a computation previously specified in a
@@ -483,7 +483,7 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
     return *kStaticData;
   }
 
-  {{CLASS}}(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS)
+  {{CLASS}}(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS)
       : XlaCompiledCpuFunction(StaticData(), alloc_mode) {}
 
   {{CLASS}}(const {{CLASS}}&) = delete;
@@ -496,8 +496,8 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
   // void set_argN_data(void* data)
   //   Sets the buffer of type T for positional argument N. May be called in
   //   any AllocMode. Must be called before Run to have an affect. Must be
-  //   called in AllocMode::RESULTS_AND_TEMPS_ONLY for each positional argument,
-  //   to set the argument buffers.
+  //   called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional
+  //   argument, to set the argument buffers.
   //
   // T* argN_data()
   //   Returns the buffer of type T for positional argument N.
diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden
index 65f342ce27..cf01bee325 100644
--- a/tensorflow/compiler/aot/codegen_test_h.golden
+++ b/tensorflow/compiler/aot/codegen_test_h.golden
@@ -19,7 +19,7 @@ namespace xla { class ExecutableRunOptions; }
 // (Implementation detail) Entry point to the function in the object file.
 extern "C" void entry_point(
     void* result, const xla::ExecutableRunOptions* run_options,
-    const void** args, void** temps);
+    const void** args, void** temps, tensorflow::int64* profile_counters);
 
 namespace foo {
 namespace bar {
@@ -86,7 +86,7 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
     return *kStaticData;
   }
 
-  MyClass(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS)
+  MyClass(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS)
       : XlaCompiledCpuFunction(StaticData(), alloc_mode) {}
 
   MyClass(const MyClass&) = delete;
@@ -99,8 +99,8 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
   // void set_argN_data(void* data)
   //   Sets the buffer of type T for positional argument N. May be called in
   //   any AllocMode. Must be called before Run to have an affect. Must be
-  //   called in AllocMode::RESULTS_AND_TEMPS_ONLY for each positional argument,
-  //   to set the argument buffers.
+  //   called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional
+  //   argument, to set the argument buffers.
   //
   // T* argN_data()
   //   Returns the buffer of type T for positional argument N.
diff --git a/tensorflow/compiler/aot/tests/tfcompile_test.cc b/tensorflow/compiler/aot/tests/tfcompile_test.cc
index 6b037f276a..413efd9cea 100644
--- a/tensorflow/compiler/aot/tests/tfcompile_test.cc
+++ b/tensorflow/compiler/aot/tests/tfcompile_test.cc
@@ -70,7 +70,7 @@ TEST(TFCompileTest, Add) {
 // Run tests that use set_argN_data separately, to avoid accidentally re-using
 // non-existent buffers.
 TEST(TFCompileTest, Add_SetArg) {
-  AddComp add(AddComp::AllocMode::RESULTS_AND_TEMPS_ONLY);
+  AddComp add(AddComp::AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY);
 
   int32 arg_x = 10;
   int32 arg_y = 32;
@@ -258,7 +258,7 @@ TEST(TFCompileTest, MatMul2_SetArg) {
   Eigen::ThreadPoolDevice device(&tp, tp.NumThreads());
 
   foo::bar::MatMulComp matmul(
-      foo::bar::MatMulComp::AllocMode::RESULTS_AND_TEMPS_ONLY);
+      foo::bar::MatMulComp::AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY);
   matmul.set_thread_pool(&device);
 
   // Test using the set_argN_data() methods.
diff --git a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc
index b5c17c5273..43d0e17c2c 100644
--- a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc
@@ -28,9 +28,10 @@ XlaCompiledCpuFunction::XlaCompiledCpuFunction(const StaticData& static_data,
       temps_(new void*[static_data.num_temps]),
       arg_names_(static_data.arg_names),
       result_names_(static_data.result_names),
-      program_shape_(static_data.program_shape) {
+      program_shape_(static_data.program_shape),
+      hlo_profile_printer_(static_data.hlo_profile_printer) {
   // Allocate arg and temp buffers.
-  if (alloc_mode == AllocMode::ARGS_RESULTS_AND_TEMPS) {
+  if (alloc_mode == AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS) {
     alloc_args_ = tensorflow::tfcompile::runtime::MallocContiguousBuffers(
         static_data.arg_sizes, static_data.num_args, args_,
         /*annotate_initialized=*/false);
@@ -43,6 +44,15 @@ XlaCompiledCpuFunction::XlaCompiledCpuFunction(const StaticData& static_data,
   if (static_data.requires_runtime_context) {
     args_[static_data.num_args - 1] = &context_;
   }
+
+  // If Hlo profiling is enabled the generated code expects an appropriately
+  // sized buffer to be passed in as the last argument.  If Hlo profiling is
+  // disabled the last function argument is still present in the function
+  // signature, but it is ignored by the generated code and we pass in null for
+  // it.
+  if (hlo_profiling_enabled()) {
+    profile_counters_ = new int64[static_data.profile_counters_size]();
+  }
 }
 
 XlaCompiledCpuFunction::~XlaCompiledCpuFunction() {
@@ -50,6 +60,7 @@ XlaCompiledCpuFunction::~XlaCompiledCpuFunction() {
   tensorflow::tfcompile::runtime::FreeContiguous(alloc_temps_);
   delete[] args_;
   delete[] temps_;
+  delete[] profile_counters_;
 }
 
 namespace {
diff --git a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h
index f49a788922..3c4314d498 100644
--- a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h
+++ b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_COMPILED_CPU_FUNCTION_H_
 #define TENSORFLOW_COMPILER_TF2XLA_XLA_COMPILED_CPU_FUNCTION_H_
 
-#include <functional>
+#include <cassert>
 #include <string>
 
 #include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h"
@@ -27,6 +27,7 @@ limitations under the License.
 // never use this functionality.
 namespace xla {
 class ProgramShape;
+class HloProfilePrinter;
 }
 
 namespace tensorflow {
@@ -48,12 +49,10 @@ namespace tensorflow {
 class XlaCompiledCpuFunction {
  public:
   // Type of the raw function, produced by either JIT or AOT.
-  //
-  // TODO(toddw): Add support for hlo profiling, and replace std::function with
-  // a raw function pointer, for some codesize savings.
-  using RawFunction = std::function<void(
-      void* result, const xla::ExecutableRunOptions* run_options,
-      const void** args, void** temps)>;
+  using RawFunction = void (*)(void* result,
+                               const xla::ExecutableRunOptions* run_options,
+                               const void** args, void** temps,
+                               int64* profile_counters);
 
   // StaticData represents the state necessary to run an XLA-compiled
   // function. For JIT this is backed by data in XlaJitCompiledCpuFunction; for
@@ -81,21 +80,29 @@ class XlaCompiledCpuFunction {
 
     // [Optional] Arg and result shapes.
     const xla::ProgramShape* program_shape = nullptr;
+
+    // [Optional] Profile printer.  Null if profiling is disabled.
+    const xla::HloProfilePrinter* hlo_profile_printer = nullptr;
+
+    // [Optional] The number of profile counters expected in the profile counter
+    // buffer by the generated code and hlo_profile_printer.  0 if profiling is
+    // disabled.
+    int64 profile_counters_size = 0;
   };
 
   // AllocMode controls the buffer allocation mode.
   enum class AllocMode {
-    // Allocate all buffers - args, results and temps.
-    ARGS_RESULTS_AND_TEMPS,
+    // Allocate all buffers - args, results, profile and temps.
+    ARGS_RESULTS_PROFILES_AND_TEMPS,
 
-    // Only allocate result and temp buffers.
+    // Only allocate result, profile and temp buffers.
     // Use set_arg_data to set argument buffers before Run is called.
-    RESULTS_AND_TEMPS_ONLY,
+    RESULTS_PROFILES_AND_TEMPS_ONLY,
   };
 
   XlaCompiledCpuFunction(
       const StaticData& static_data,
-      AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS);
+      AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS);
   virtual ~XlaCompiledCpuFunction();
 
   XlaCompiledCpuFunction(const XlaCompiledCpuFunction&) = delete;
@@ -113,7 +120,7 @@ class XlaCompiledCpuFunction {
     context_.error = false;
     context_.error_msg.clear();
     raw_function_(temps_[result_index_], &run_options_,
-                  const_cast<const void**>(args_), temps_);
+                  const_cast<const void**>(args_), temps_, profile_counters_);
     return !context_.error;
   }
 
@@ -162,6 +169,16 @@ class XlaCompiledCpuFunction {
     return static_cast<const void* const*>(temps_[result_index_]);
   }
 
+  // Profile counters for this XLA computation.
+  //
+  // When Hlo profiling is enabled (`hlo_profiling_enabled()` return true in
+  // this case) these counters are non-null and are automatically populated by
+  // `Run`.  The counters can then be pretty-printed using
+  // `hlo_profile_printer()`.
+  //
+  // When Hlo profiling is disabled, this accessor returns null.
+  const int64* profile_counters() const { return profile_counters_; }
+
   // Returns the buffer for the positional result at the given `index`.
   void* result_data(size_t index) { return results()[index]; }
   const void* result_data(size_t index) const { return results()[index]; }
@@ -195,6 +212,12 @@ class XlaCompiledCpuFunction {
   // program shape isn't available.
   const xla::ProgramShape* ProgramShape() const { return program_shape_; }
 
+  bool hlo_profiling_enabled() const { return hlo_profile_printer_ != nullptr; }
+  const xla::HloProfilePrinter& hlo_profile_printer() const {
+    assert(hlo_profiling_enabled());
+    return *hlo_profile_printer_;
+  }
+
  private:
   const RawFunction raw_function_;
   const size_t result_index_;
@@ -208,6 +231,9 @@ class XlaCompiledCpuFunction {
   void* alloc_args_ = nullptr;
   void* alloc_temps_ = nullptr;
 
+  // Backing memory for profiling counters.
+  int64* profile_counters_ = nullptr;
+
   // Options and context passed to the compiled function.
   xla::ExecutableRunOptions run_options_;
   tensorflow::XlaLocalRuntimeContext context_;
@@ -216,6 +242,7 @@ class XlaCompiledCpuFunction {
   const char** arg_names_ = nullptr;
   const char** result_names_ = nullptr;
   const xla::ProgramShape* program_shape_ = nullptr;
+  const xla::HloProfilePrinter* hlo_profile_printer_ = nullptr;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc b/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc
index 1dd454ea8d..f727f20464 100644
--- a/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc
+++ b/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc
@@ -90,21 +90,6 @@ xla::StatusOr<size_t> ComputeResultIndex(
   return result_slice.index();
 }
 
-// Adapt ComputeFunctionType, which includes a final profile_counters arg, to
-// RawFunction, which doesn't include that final arg.
-//
-// TODO(toddw): Change RawFunction and AOT to also pass the final
-// profile_counters arg, and remove this adapter.
-XlaCompiledCpuFunction::RawFunction RawFunctionAdapter(
-    xla::cpu::CpuExecutable::ComputeFunctionType compute_function) {
-  return [compute_function](void* result,
-                            const xla::ExecutableRunOptions* run_options,
-                            const void** args, void** temps) {
-    return compute_function(result, run_options, args, temps,
-                            /*profile_counters=*/nullptr);
-  };
-}
-
 // Collect names from `entries`, where T is one of tf2xla::{Feed,Fetch}. We hold
 // the actual strings in nonempty_names, and hold arrays of pointers in
 // name_ptrs, terminated by a nullptr entry.
@@ -177,7 +162,7 @@ XlaJitCompiledCpuFunction::Compile(
   const xla::cpu::CpuExecutable* cpu_executable =
       static_cast<xla::cpu::CpuExecutable*>(executable->executable());
   XlaCompiledCpuFunction::RawFunction raw_function =
-      RawFunctionAdapter(cpu_executable->compute_function());
+      cpu_executable->compute_function();
   const xla::BufferAssignment& buffer_assignment =
       cpu_executable->buffer_assignment();
 
@@ -211,6 +196,14 @@ XlaJitCompiledCpuFunction::Compile(
   jit->static_data_.arg_names = jit->arg_names_.data();
   jit->static_data_.result_names = jit->result_names_.data();
   jit->static_data_.program_shape = jit->program_shape_.get();
+
+  if (cpu_executable->hlo_profiling_enabled()) {
+    jit->static_data_.hlo_profile_printer =
+        &cpu_executable->hlo_profile_printer();
+    jit->static_data_.profile_counters_size =
+        cpu_executable->hlo_profile_printer().profile_counters_size();
+  }
+
   return std::move(jit_unique_ptr);
 }
 
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.cc b/tensorflow/compiler/xla/service/cpu/ir_function.cc
index fa88627156..701bce2cbf 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_function.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.cc
@@ -99,7 +99,7 @@ void IrFunction::Initialize(const string& function_name,
   //
   //                     /---------------------------------------------\
   //   prof counters ->  | counter 0 | counter 1 | ..... | counter N-1 |
-  //  (elided for aot)   \---------------------------------------------/
+  //                     \---------------------------------------------/
 
   // Even though the type of params and temps is void** in the host's view, in
   // LLVM IR this is represented by i8*, similarly to void*. It's up to the code
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index ba75e2ef1b..0809fe780d 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -109,7 +109,8 @@ std::unique_ptr<HloProfilePrinter> CreateHloProfilePrinter(
   };
 
   return MakeUnique<HloProfilePrinter>(
-      computation_infos, hlo_profile_index_map.computation_count(), deleter);
+      computation_infos, hlo_profile_index_map.computation_count(),
+      /*profile_counters_size=*/max_profile_index, deleter);
 }
 
 HloExecutionProfile::HloExecutionProfile(
diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.h b/tensorflow/compiler/xla/service/hlo_profile_printer.h
index 316753a82a..2f056490ae 100644
--- a/tensorflow/compiler/xla/service/hlo_profile_printer.h
+++ b/tensorflow/compiler/xla/service/hlo_profile_printer.h
@@ -65,9 +65,11 @@ class HloProfilePrinter {
 
   HloProfilePrinter(
       HloComputationInfo* computation_infos, int64 computation_infos_size,
+      int64 profile_counters_size,
       std::function<void(HloComputationInfo*, int64)> deleter = nullptr)
       : computation_infos_(computation_infos),
         computation_infos_size_(computation_infos_size),
+        profile_counters_size_(profile_counters_size),
         deleter_(std::move(deleter)) {}
 
   HloProfilePrinter(HloProfilePrinter&& other) {
@@ -79,10 +81,13 @@ class HloProfilePrinter {
   HloProfilePrinter(const HloProfilePrinter&) = delete;
   HloProfilePrinter& operator=(const HloProfilePrinter&) = delete;
 
-  // Convert the profile counter sequence `counters` to a human readable string
+  // Converts the profile counter sequence `counters` to a human readable string
   // representation.
   string ToString(const int64* counters, double clock_rate_ghz) const;
 
+  // Returns the size of the profile buffer expected by this printer.
+  int64 profile_counters_size() const { return profile_counters_size_; }
+
   ~HloProfilePrinter();
 
  private:
@@ -90,6 +95,7 @@ class HloProfilePrinter {
   // is manifested as the deleter_ function.
   HloComputationInfo* computation_infos_ = nullptr;
   int64 computation_infos_size_ = 0;
+  int64 profile_counters_size_ = 0;
   std::function<void(HloComputationInfo*, int64)> deleter_;
 };
 }  // namespace xla
-- 
GitLab


From af36437e3937e6e532579e9c42d7f45353b88990 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 12:29:06 -0800
Subject: [PATCH 0468/1225] Add rules to replace nodes corresponding to
 operations with the neutral and absorbing elements for addition and
 multiplication with Identity.

Get rid of a gratuitous copy of the entire graph in the main optimizer loop.

PiperOrigin-RevId: 177491247
---
 tensorflow/core/grappler/optimizers/BUILD     |   1 +
 .../grappler/optimizers/constant_folding.cc   | 274 ++++++++++++++++--
 .../grappler/optimizers/constant_folding.h    |   9 +-
 .../optimizers/constant_folding_test.cc       |  96 ++++++
 4 files changed, 359 insertions(+), 21 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 24e6f8847a..7b4ed10e7e 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -96,6 +96,7 @@ cc_library(
         ":graph_optimizer",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:op_types",
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index b5172a4833..cf913d6f48 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -30,13 +30,16 @@ limitations under the License.
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/tensor_coding.h"
 #include "tensorflow/core/public/version.h"
 #include "tensorflow/core/util/bcast.h"
+#include "tensorflow/core/util/saved_tensor_slice_util.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -95,7 +98,38 @@ class DeviceSimple : public DeviceBase {
   std::unique_ptr<Eigen::ThreadPoolDevice> eigen_device_;
 };
 
+template <typename T>
+bool AllValuesAre(const TensorProto& tensor, const T& value) {
+  // TensorProto represents the content of the tensor in either <type>_val or
+  // tensor_content.
+  typename checkpoint::SaveTypeTraits<T>::RepeatedField* tensor_values =
+      checkpoint::MutableTensorProtoData<T>(const_cast<TensorProto*>(&tensor));
+  if (!tensor_values->empty()) {
+    for (const T& tensor_value : *tensor_values) {
+      if (tensor_value != value) {
+        return false;
+      }
+    }
+    return true;
+  }
+  const auto tensor_content_size = tensor.tensor_content().size();
+  if (tensor_content_size > 0) {
+    CHECK_EQ(0, tensor_content_size % sizeof(T));
+    std::vector<T> raw_values(tensor_content_size / sizeof(T));
+    port::CopyToArray(tensor.tensor_content(),
+                      reinterpret_cast<char*>(raw_values.data()));
+    for (int i = 0; i < tensor_content_size / sizeof(T); ++i) {
+      if (raw_values[i] != value) {
+        return false;
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
 }  // namespace
+
 ConstantFolding::ConstantFolding(RewriterConfig::Toggle opt_level,
                                  DeviceBase* cpu_device)
     : opt_level_(opt_level), cpu_device_(cpu_device) {
@@ -202,9 +236,9 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
   // We may add some nodes to the graph to encode control dependencies: there is
   // no need to process these, so only iterate over the nodes of the input
   // graph.
-  const int node_count = graph_.node_size();
+  const int node_count = graph_->node_size();
   for (int i = 0; i < node_count; ++i) {
-    NodeDef& node = *graph_.mutable_node(i);
+    NodeDef& node = *graph_->mutable_node(i);
     const string op = node.op();
     if (op != "Shape" && op != "Size" && op != "Rank" && op != "ShapeN") {
       continue;
@@ -248,7 +282,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
           // cases where the shape/rank/size would have been run in
           // the original graph. Additional inputs are extra control
           string ctrl_dep =
-              AddControlDependency(node.input(0), &graph_, node_map_.get());
+              AddControlDependency(node.input(0), graph_, node_map_.get());
           node.set_input(0, ctrl_dep);
           node_map_->AddOutput(NodeName(ctrl_dep), node.name());
         } else {
@@ -263,7 +297,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
                     AddPrefixToNodeName(strings::StrCat(node.name(), "-", j),
                                         kConstantFoldingConst);
                 if (node_map_->GetNode(const_name) == nullptr) {
-                  NodeDef* added_node = graph_.add_node();
+                  NodeDef* added_node = graph_->add_node();
                   added_node->set_name(const_name);
                   added_node->set_op("Const");
                   added_node->set_device(node.device());
@@ -274,7 +308,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
                   // We add a control dependency to the original ShapeN node,
                   // so that the node will only be run if all inputs of the
                   // original ShapeN node are run.
-                  string ctrl_dep = AddControlDependency(node.name(), &graph_,
+                  string ctrl_dep = AddControlDependency(node.name(), graph_,
                                                          node_map_.get());
                   *added_node->add_input() = ctrl_dep;
                   node_map_->AddOutput(NodeName(ctrl_dep), added_node->name());
@@ -293,6 +327,25 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
 }
 
 namespace {
+bool ShapesEqual(const TensorShapeProto& shape1,
+                 const TensorShapeProto& shape2) {
+  if (shape1.unknown_rank() || shape2.unknown_rank()) {
+    return false;
+  }
+  if (shape1.dim_size() != shape2.dim_size()) {
+    return false;
+  }
+  for (int i = 0; i < shape1.dim_size(); ++i) {
+    if (shape1.dim(i).size() != shape2.dim(i).size()) {
+      return false;
+    }
+    if (shape1.dim(i).size() == -1 || shape2.dim(i).size() == -1) {
+      return false;
+    }
+  }
+  return true;
+}
+
 bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties,
                   BCast::Vec* shape, int64* min_id) {
   if (shape_node.op() == "Shape") {
@@ -383,13 +436,13 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs(
           strings::StrCat(node.name(), "-", j), kConstantFoldingConst);
       out[j] = node_map_->GetNode(const_name);
       if (out[j] == nullptr) {
-        out[j] = graph_.add_node();
+        out[j] = graph_->add_node();
         Tensor value(type, TensorShape({0}));
         *out[j] = CreateNodeDef(const_name, TensorValue(&value));
         out[j]->set_device(node.device());
         node_map_->AddNode(const_name, out[j]);
         string ctrl_dep =
-            AddControlDependency(node.name(), &graph_, node_map_.get());
+            AddControlDependency(node.name(), graph_, node_map_.get());
         *out[j]->add_input() = ctrl_dep;
         node_map_->AddOutput(NodeName(ctrl_dep), const_name);
       }
@@ -470,7 +523,7 @@ Status ConstantFolding::MaterializeReductionIndices(
   if (node_map_->GetNode(const_name)) {
     return Status::OK();
   }
-  NodeDef* reduction_indices = graph_.add_node();
+  NodeDef* reduction_indices = graph_->add_node();
   Tensor value(dtype, TensorShape({rank}));
   for (int i = 0; i < rank; ++i) {
     if (dtype == DT_INT32) {
@@ -482,7 +535,7 @@ Status ConstantFolding::MaterializeReductionIndices(
   *reduction_indices = CreateNodeDef(const_name, TensorValue(&value));
   reduction_indices->set_device(node->device());
   string ctrl_dep =
-      AddControlDependency(node->input(1), &graph_, node_map_.get());
+      AddControlDependency(node->input(1), graph_, node_map_.get());
   *reduction_indices->add_input() = ctrl_dep;
   node_map_->AddNode(const_name, reduction_indices);
   node_map_->AddOutput(NodeName(ctrl_dep), const_name);
@@ -496,9 +549,9 @@ Status ConstantFolding::MaterializeReductionIndices(
 
 Status ConstantFolding::MaterializeConstants(
     const GraphProperties& properties) {
-  const int node_count = graph_.node_size();
+  const int node_count = graph_->node_size();
   for (int i = 0; i < node_count; ++i) {
-    NodeDef& node = *graph_.mutable_node(i);
+    NodeDef& node = *graph_->mutable_node(i);
     const string& op = node.op();
     if (op == "BroadcastGradientArgs") {
       TF_RETURN_IF_ERROR(MaterializeBroadcastGradientArgs(node, properties));
@@ -602,6 +655,32 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const {
   return true;
 }
 
+namespace {
+
+#define SET_TENSOR_VAL_CASE(DTYPE, TYPE)           \
+  case DTYPE:                                      \
+    t->add_##TYPE##_val(static_cast<TYPE>(value)); \
+    break;
+
+Status CreateConstantTensorAttrValue(DataType type, double value,
+                                     const TensorShapeProto& shape,
+                                     AttrValue* attr_tensor) {
+  TensorProto* t = attr_tensor->mutable_tensor();
+  *t->mutable_tensor_shape() = shape;
+  switch (type) {
+    SET_TENSOR_VAL_CASE(DT_FLOAT, float);
+    SET_TENSOR_VAL_CASE(DT_DOUBLE, double);
+    SET_TENSOR_VAL_CASE(DT_INT64, int64);
+    SET_TENSOR_VAL_CASE(DT_INT32, int);
+    default:
+      return errors::InvalidArgument("Unsupported type: ", type);
+  }
+  return Status::OK();
+}
+
+#undef SET_TENSOR_CAL_CASE
+}  // namespace
+
 // static
 NodeDef ConstantFolding::CreateNodeDef(const string& name,
                                        const TensorValue& tensor) {
@@ -945,8 +1024,8 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) {
 Status ConstantFolding::FoldGraph(GraphDef* output) {
   std::unordered_set<string> processed_nodes;
   std::deque<NodeDef*> queue;
-  for (int i = 0; i < graph_.node_size(); i++) {
-    auto node = graph_.mutable_node(i);
+  for (int i = 0; i < graph_->node_size(); i++) {
+    auto node = graph_->mutable_node(i);
     if (IsFoldable(*node)) {
       queue.push_back(node);
     }
@@ -985,7 +1064,7 @@ Status ConstantFolding::FoldGraph(GraphDef* output) {
   output->mutable_node()->DeleteSubrange(last + 1,
                                          output->node_size() - last - 1);
 
-  for (const auto& node : graph_.node()) {
+  for (const auto& node : graph_->node()) {
     // If no fetch nodes is provided, we conservatively
     // keep all nodes in the original graph in case users need to fetch
     // their values.
@@ -1080,6 +1159,104 @@ bool ConstantFolding::IsSimplifiableReshape(
   return shape.IsCompatibleWith(new_dims);
 }
 
+#define IS_VALUE_CASE(DTYPE, VALUE)                   \
+  case DTYPE:                                         \
+    return AllValuesAre<EnumToDataType<DTYPE>::Type>( \
+        node.attr().at("value").tensor(), EnumToDataType<DTYPE>::Type(VALUE))
+
+#define IS_ONES_CASE(TYPE) IS_VALUE_CASE(TYPE, 1)
+#define IS_ZEROS_CASE(TYPE) IS_VALUE_CASE(TYPE, 0)
+
+bool ConstantFolding::IsOnes(const NodeDef& node) const {
+  if (feed_nodes_.find(node.name()) != feed_nodes_.end()) {
+    return false;
+  }
+  if (node.op() == "OnesLike") {
+    return true;
+  }
+  if (node.op() != "Const") {
+    return false;
+  }
+  const auto dtype = node.attr().at("dtype").type();
+  switch (dtype) {
+    //    IS_ONES_CASE(DT_HALF);
+    IS_ONES_CASE(DT_FLOAT);
+    IS_ONES_CASE(DT_DOUBLE);
+    IS_ONES_CASE(DT_UINT8);
+    IS_ONES_CASE(DT_INT8);
+    IS_ONES_CASE(DT_UINT16);
+    IS_ONES_CASE(DT_INT16);
+    IS_ONES_CASE(DT_INT32);
+    IS_ONES_CASE(DT_INT64);
+    IS_ONES_CASE(DT_COMPLEX64);
+    IS_ONES_CASE(DT_COMPLEX128);
+    default:
+      LOG(ERROR) << "Unexpected type " << DataTypeString(dtype);
+      return false;
+  }
+  return false;
+}
+
+bool ConstantFolding::IsZeros(const NodeDef& node) const {
+  if (feed_nodes_.find(node.name()) != feed_nodes_.end()) {
+    return false;
+  }
+  if (node.op() == "ZerosLike") {
+    return true;
+  }
+  if (!IsConstant(node)) {
+    return false;
+  }
+  const auto dtype = node.attr().at("dtype").type();
+  switch (dtype) {
+    //    IS_ZEROS_CASE(DT_HALF);
+    IS_ZEROS_CASE(DT_FLOAT);
+    IS_ZEROS_CASE(DT_DOUBLE);
+    IS_ZEROS_CASE(DT_UINT8);
+    IS_ZEROS_CASE(DT_INT8);
+    IS_ZEROS_CASE(DT_UINT16);
+    IS_ZEROS_CASE(DT_INT16);
+    IS_ZEROS_CASE(DT_INT32);
+    IS_ZEROS_CASE(DT_INT64);
+    IS_ZEROS_CASE(DT_COMPLEX64);
+    IS_ZEROS_CASE(DT_COMPLEX128);
+    default:
+      LOG(ERROR) << "Unexpected type " << DataTypeString(dtype);
+      return false;
+  }
+  return false;
+}
+
+void ConstantFolding::ReplaceAddOrMulWithIdentity(int input_to_forward,
+                                                  NodeDef* node) {
+  node->set_op("Identity");
+  // Propagate the designated input through the identity.
+  node->mutable_input()->SwapElements(0, input_to_forward);
+  // Add all other inputs as control dependencies.
+  for (int i = 1; i < node->input_size(); ++i) {
+    node->set_input(i, AsControlDependency(node->input(i)));
+  }
+  graph_modified_ = true;
+}
+
+Status ConstantFolding::ReplaceAddOrMulWithConstant(
+    double value, const TensorShapeProto& shape, NodeDef* node) {
+  AttrValue tensor_attr;
+  TF_RETURN_IF_ERROR(CreateConstantTensorAttrValue(node->attr().at("T").type(),
+                                                   value, shape, &tensor_attr));
+  node->mutable_attr()->insert({"value", tensor_attr});
+  node->set_op("Const");
+  // Convert all inputs to control dependencies.
+  for (int i = 0; i < node->input_size(); ++i) {
+    if (IsControlInput(node->input(i))) {
+      break;
+    }
+    node->set_input(i, AsControlDependency(node->input(i)));
+  }
+  graph_modified_ = true;
+  return Status::OK();
+}
+
 Status ConstantFolding::SimplifyGraph(GraphDef* output,
                                       const GraphProperties& properties,
                                       bool use_shape_info) {
@@ -1125,6 +1302,63 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
         *node.add_input() = input;
       }
     }
+
+    // Simplify multiplication by ones or zeros, and addition of zeros.
+    bool is_mul = IsMul(node);
+    bool is_add = IsAdd(node);
+    if (opt_level_ == RewriterConfig::AGGRESSIVE && use_shape_info &&
+        (is_mul || is_add) && properties.HasInputProperties(node.name()) &&
+        properties.HasOutputProperties(node.name())) {
+      const NodeDef* x = node_map_->GetNode(node.input(0));
+      const NodeDef* y = node_map_->GetNode(node.input(1));
+      if (x == nullptr || y == nullptr) {
+        return errors::InvalidArgument("Invalid inputs to node: ",
+                                       node.DebugString());
+      }
+      const TensorShapeProto& output_shape =
+          properties.GetOutputProperties(node.name())[0].shape();
+      const TensorShapeProto& x_shape =
+          properties.GetInputProperties(node.name())[0].shape();
+
+      // Simplify multiplication by or addition of zeros.
+      const bool x_is_zero = IsZeros(*x);
+      const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
+      if (x_is_zero && x_matches_output_shape) {
+        // 0 * y = 0 or 0 + y = y.
+        ReplaceAddOrMulWithIdentity(is_mul ? 0 : 1, &node);
+        continue;
+      }
+      const TensorShapeProto& y_shape =
+          properties.GetInputProperties(node.name())[1].shape();
+      const bool y_is_zero = IsZeros(*y);
+      const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape);
+      if (y_is_zero && y_matches_output_shape) {
+        // x * 0 = 0 or x + 0 = x.
+        ReplaceAddOrMulWithIdentity(is_mul ? 1 : 0, &node);
+        continue;
+      }
+
+      if (is_mul) {
+        // Simplify multiplication by zeros where the output shape does not
+        // match the shape of the zero input.
+        if (x_is_zero || y_is_zero) {
+          TF_RETURN_IF_ERROR(
+              ReplaceAddOrMulWithConstant(0, output_shape, &node));
+          continue;
+        }
+
+        // Simplify multiplication by ones.
+        if (IsOnes(*x) && y_matches_output_shape) {
+          // 1 * y = y.
+          ReplaceAddOrMulWithIdentity(1, &node);
+          continue;
+        } else if (IsOnes(*y) && x_matches_output_shape) {
+          // x * 1 = x.
+          ReplaceAddOrMulWithIdentity(0, &node);
+          continue;
+        }
+      }
+    }
   }
   return Status::OK();
 }
@@ -1132,7 +1366,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
 Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
                                             const GrapplerItem& item,
                                             GraphDef* output) {
-  node_map_.reset(new NodeMap(&graph_));
+  node_map_.reset(new NodeMap(graph_));
   nodes_whitelist_.clear();
   // Fold fetch nodes iff it has a single fanout. Note that if a fetch node
   // has a single fanout, it would be rewritten as a constant with the same
@@ -1189,13 +1423,13 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
   *output = item.graph;
   int64 node_count;
   do {
-    graph_.Swap(output);
-    item_to_optimize.graph = graph_;
+    graph_modified_ = false;
+    item_to_optimize.graph.Swap(output);
+    graph_ = &item_to_optimize.graph;
     *output = GraphDef();
-    node_count = graph_.node_size();
+    node_count = graph_->node_size();
     TF_RETURN_IF_ERROR(RunOptimizationPass(cluster, item_to_optimize, output));
-  } while (output->node_size() != node_count);
-
+  } while (graph_modified_ || output->node_size() != node_count);
   *output->mutable_library() = item.graph.library();
   *output->mutable_versions() = item.graph.versions();
 
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index 8af5b5fbe6..3bb9926338 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h
@@ -72,6 +72,12 @@ class ConstantFolding : public GraphOptimizer {
 
   Status FoldNode(NodeDef* node, GraphDef* output_graph);
 
+  bool IsOnes(const NodeDef& node) const;
+  bool IsZeros(const NodeDef& node) const;
+  void ReplaceAddOrMulWithIdentity(int input_to_forward, NodeDef* node);
+  Status ReplaceAddOrMulWithConstant(double value,
+                                     const TensorShapeProto& shape,
+                                     NodeDef* node);
   Status FoldGraph(GraphDef* output);
 
   bool IsSimplifiableReduction(const NodeDef& node) const;
@@ -89,12 +95,13 @@ class ConstantFolding : public GraphOptimizer {
   std::unique_ptr<DeviceBase> owned_device_;
 
   std::unique_ptr<ResourceMgr> resource_mgr_;
-  GraphDef graph_;
+  GraphDef* graph_;
   std::unique_ptr<NodeMap> node_map_;
   std::unordered_set<string> nodes_to_preserve_;
   std::unordered_set<string> nodes_whitelist_;
   std::unordered_set<string> feed_nodes_;
   bool has_fetch_;
+  bool graph_modified_;
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index b2d9b02c68..c72ed96520 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -77,6 +77,102 @@ TEST_F(ConstantFoldingTest, SimpleFolding) {
   test::ExpectTensorEqual<float>(tensors_expected[0], tensors[0]);
 }
 
+TEST_F(ConstantFoldingTest, NeutralElement) {
+  for (bool use_const : {true, false}) {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT,
+                                ops::Placeholder::Shape(TensorShape({1, 2})));
+    Output y = ops::Placeholder(s.WithOpName("y"), DT_FLOAT,
+                                ops::Placeholder::Shape(TensorShape({1, 2})));
+    Output zeros =
+        !use_const ? ops::ZerosLike(s.WithOpName("zeros"), x)
+                   : ops::Const(s.WithOpName("zeros"), {0.0f, 0.0f}, {1, 2});
+    Output zeros_broadcast =
+        ops::Const(s.WithOpName("zeros_broadcast"), {0.0f}, {1, 1});
+    Output ones = !use_const
+                      ? ops::OnesLike(s.WithOpName("ones"), x)
+                      : ops::Const(s.WithOpName("ones"), {1.0f, 1.0f}, {1, 2});
+    Output mul1 = ops::Mul(s.WithOpName("mul1"), x, zeros);
+    Output mul2 = ops::Mul(s.WithOpName("mul2"), zeros, y);
+    Output mul3 = ops::Mul(s.WithOpName("mul3"), x, ones);
+    Output mul4 = ops::Mul(s.WithOpName("mul4"), ones, y);
+    Output mul5 = ops::Mul(s.WithOpName("mul1"), x, zeros_broadcast);
+    Output mul6 = ops::Mul(s.WithOpName("mul2"), zeros_broadcast, y);
+    Output add1 = ops::Add(s.WithOpName("add1"), x, zeros);
+    Output add2 = ops::Add(s.WithOpName("add2"), zeros, y);
+    Output addn = ops::AddN(s, {mul1, mul2, mul3, mul4, add1, add2});
+    GrapplerItem item;
+    TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+    ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                              nullptr /* cpu_device */);
+    GraphDef output;
+    Status status = optimizer.Optimize(nullptr, item, &output);
+    TF_EXPECT_OK(status);
+
+    EXPECT_EQ(14, output.node_size());
+    for (int i = 0; i < output.node_size(); ++i) {
+      const NodeDef& node = output.node(i);
+      const string& name = node.name();
+      if (name == "mul1") {
+        if (use_const) {
+          EXPECT_EQ("Const", node.op());
+          EXPECT_EQ("^x", node.input(0));
+        } else {
+          EXPECT_EQ("Identity", node.op());
+          EXPECT_EQ("zeros", node.input(0));
+          EXPECT_EQ("^x", node.input(1));
+        }
+      } else if (name == "mul2") {
+        if (use_const) {
+          EXPECT_EQ("Const", node.op());
+          EXPECT_EQ("^y", node.input(0));
+        } else {
+          EXPECT_EQ("Identity", node.op());
+          EXPECT_EQ("zeros", node.input(0));
+          EXPECT_EQ("^y", node.input(1));
+        }
+      } else if (name == "mul3") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("x", node.input(0));
+        EXPECT_EQ("^ones", node.input(1));
+      } else if (name == "mul4") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("y", node.input(0));
+        EXPECT_EQ("^ones", node.input(1));
+      } else if (name == "mul5") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^x", node.input(0));
+        EXPECT_EQ("^ones", node.input(1));
+        TensorProto t = node.attr().at("value").tensor();
+        EXPECT_EQ(1, t.float_val_size());
+        EXPECT_EQ(0, t.float_val(0));
+        EXPECT_EQ(2, t.tensor_shape().dim_size());
+        EXPECT_EQ(1, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(2, t.tensor_shape().dim(1).size());
+      } else if (name == "mul6") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^y", node.input(0));
+        EXPECT_EQ("^ones", node.input(1));
+        TensorProto t = node.attr().at("value").tensor();
+        EXPECT_EQ(1, t.float_val_size());
+        EXPECT_EQ(0, t.float_val(0));
+        EXPECT_EQ(2, t.tensor_shape().dim_size());
+        EXPECT_EQ(1, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(2, t.tensor_shape().dim(1).size());
+      } else if (name == "add1") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("x", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
+      } else if (name == "add2") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("y", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
+      }
+    }
+  }
+}
+
 TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) {
   // Build a simple graph with a few trivially prunable ops.
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-- 
GitLab


From 5e54d87f94271ce671ddc874cca8d34c83c180cc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 13:14:09 -0800
Subject: [PATCH 0469/1225] Add an option to override maximum number of
 elements in the quantile accumulator.

PiperOrigin-RevId: 177497240
---
 tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py
index 7e8e15e7d8..294e04002a 100644
--- a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py
+++ b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py
@@ -45,6 +45,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject):
                init_stamp_token,
                epsilon,
                num_quantiles,
+               max_elements=None,
                name=None,
                container=None):
     """Creates a QuantileAccumulator object.
@@ -53,6 +54,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject):
       init_stamp_token: The initial value for the stamp token.
       epsilon: Error bound on the quantile computation.
       num_quantiles: Number of quantiles to produce from the final summary.
+      max_elements: Maximum number of elements added to the accumulator.
       name: the name to save the accumulator under.
       container: An optional `string`. Defaults to `""`
     """
@@ -67,6 +69,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject):
           self._quantile_accumulator_handle,
           init_stamp_token,
           epsilon=epsilon,
+          max_elements=max_elements,
           num_quantiles=num_quantiles)
       is_initialized_op = gen_quantile_ops.quantile_accumulator_is_initialized(
           self._quantile_accumulator_handle)
-- 
GitLab


From 62b70f5566768e0fd57013e3042a402830b2c4f0 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Thu, 30 Nov 2017 13:29:12 -0800
Subject: [PATCH 0470/1225] Support binary operations with a scalar and a 4d
 tensor as input; refactor connectivity check code.

PiperOrigin-RevId: 177499365
---
 .../grappler/optimizers/layout_optimizer.cc   |  87 ++++++++----
 .../optimizers/layout_optimizer_test.cc       | 128 ++++++++++++++++++
 2 files changed, 189 insertions(+), 26 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index ef4b015295..97c8e6f907 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -761,24 +761,52 @@ class AgnosticNodeProcessor : public NodeProcessor {
 
   bool IsNodeAfterNCHWToNHWC() const {
     std::set<string> ops_format_agnostic = GetOpsFormatAgnostic();
-    auto node = node_map_->GetNode(node_->name());
-    while (node->input_size() > 0) {
-      int data_input_pos = 0;
-      if (IsConcatV1(*node) || IsSplit(*node)) {
-        data_input_pos = 1;
-      }
-      node = node_map_->GetNode(node->input(data_input_pos));
-      if (IsNodeNCHWToNHWC(node->name())) {
+    std::deque<NodeDef*> queue;
+    auto first_node_pos = DataInputPos(*node_);
+    for (const auto& pos : first_node_pos) {
+      auto input_node = node_map_->GetNode(node_->input(pos));
+      queue.push_back(input_node);
+    }
+    // The code will exit this while loop in one iteration in most cases, as the
+    // graph is already topologically sorted.
+    while (!queue.empty()) {
+      NodeDef* current_node = queue.front();
+      queue.pop_front();
+      if (IsNodeNCHWToNHWC(current_node->name())) {
         return true;
       }
-      bool connected =
-          ops_format_agnostic.find(node->op()) != ops_format_agnostic.end();
-      if (!connected) {
-        return false;
+      // We only continue searching if the path is connected through
+      // format-agnostic nodes.
+      if (ops_format_agnostic.find(current_node->op()) !=
+          ops_format_agnostic.end()) {
+        auto current_node_pos = DataInputPos(*current_node);
+        for (const auto& pos : current_node_pos) {
+          auto input_node = node_map_->GetNode(current_node->input(pos));
+          queue.push_back(input_node);
+        }
       }
     }
     return false;
   }
+
+ private:
+  std::vector<int> DataInputPos(const NodeDef& node) const {
+    std::vector<int> pos;
+    if (IsSplit(node)) {
+      return {1};
+    }
+    if (IsConcatV1(node)) {
+      return {1};
+    }
+    if (IsAdd(node) || IsMul(node) || IsRealDiv(node) ||
+        IsSquaredDifference(node) || IsSub(node)) {
+      return {0, 1};
+    }
+    if (node.input_size() > 0 && !IsControlInput(node.input(0))) {
+      return {0};
+    }
+    return {};
+  }
 };
 
 class AddNProcessor : public AgnosticNodeProcessor {
@@ -801,42 +829,49 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
  public:
   explicit BinaryOpProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {
-    is_4d_with_vector_ = Is4DOperateWithVector();
+    is_4d_with_vector_ = IsNDOperateWithMD(4, 1);
   }
 
  protected:
   bool ShouldProcess() const override {
+    // TODO(yaozhang): Support IsNDOperateWithMD(1, 4): first input is a vector
+    // and the second input is a 4D tensor; and update CustomizedProcessing()
+    // accordingly.
     return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() &&
-           (Is4DOperateWithND(4) || Is4DOperateWithScalar() ||
-            Is4DOperateWithVector()) &&
+           (IsNDOperateWithMD(4, 0) || IsNDOperateWithMD(4, 1) ||
+            IsNDOperateWithMD(4, 4) || IsNDOperateWithMD(0, 4)) &&
            IsOnGPU();
   }
 
   std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos = {0};
-    if (Is4DOperateWithND(4)) {
+    std::vector<int> input_pos;
+    auto input0 = node_map_->GetNode(node_->input(0));
+    auto input1 = node_map_->GetNode(node_->input(1));
+    if (IsDimsFour(*input0)) {
+      input_pos.push_back(0);
+    }
+    if (IsDimsFour(*input1)) {
       input_pos.push_back(1);
     }
     return input_pos;
   }
 
-  bool Is4DOperateWithND(int n) const {
+  bool IsDimsFour(const NodeDef& node) const {
+    return NodeProcessor::IsDimsFour(node) || IsNodeNCHWToNHWC(node.name());
+  }
+
+  bool IsNDOperateWithMD(int n, int m) const {
     auto input0 = node_map_->GetNode(node_->input(0));
     auto input1 = node_map_->GetNode(node_->input(1));
     if (input0 && input1) {
-      return (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) &&
-             ((n == 4)
-                  ? (IsDimsFour(*input1) || IsNodeNCHWToNHWC(input1->name()))
-                  : IsDimsN(*input1, n));
+      bool input0_is_n = (n == 4) ? IsDimsFour(*input0) : IsDimsN(*input0, n);
+      bool input1_is_m = (m == 4) ? IsDimsFour(*input1) : IsDimsN(*input1, m);
+      return input0_is_n && input1_is_m;
     }
     return false;
   }
 
-  bool Is4DOperateWithScalar() const { return Is4DOperateWithND(0); }
-
-  bool Is4DOperateWithVector() const { return Is4DOperateWithND(1); }
-
   NodeDef* AddNodeShapeConst(const string& name, int num_channels) {
     NodeDef* node = graph_->add_node();
     node_map_->AddNode(name, node);
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index e8f7b8ac3c..363b4c3fd8 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -298,6 +298,39 @@ TEST_F(LayoutOptimizerTest, Connectivity) {
   EXPECT_EQ(node_i2_output->input(0), "i1");
 }
 
+TEST_F(LayoutOptimizerTest, ConnectivityBinaryOpWithInputScalarAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto i1 = ops::Identity(s.WithOpName("i1"), conv);
+  auto i2 = ops::Identity(s.WithOpName("i2"), i1);
+  auto scalar_sub = ops::Const(s.WithOpName("scalar_sub"), 3.0f, {});
+  auto sub = ops::Sub(s.WithOpName("sub"), scalar_sub, i2);
+  auto i3 = ops::Identity(s.WithOpName("i3"), sub);
+  auto i4 = ops::Identity(s.WithOpName("i4"), i3);
+  auto i5 = ops::Identity(s.WithOpName("i5"), i4);
+  auto scalar_mul = ops::Const(s.WithOpName("scalar_mul"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), scalar_mul, i5);
+  auto i6 = ops::Identity(s.WithOpName("i6"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  // Make the graph not in topological order to test the handling of multi-hop
+  // connectivity (here we say two nodes are connected if all nodes in the
+  // middle are layout agnostic). If the graph is already in topological order,
+  // the problem is easier, where layout optimizer only needs to check
+  // single-hop connectivity.
+  NodeMap node_map_original(&item.graph);
+  auto node_i1 = node_map_original.GetNode("i1");
+  auto node_mul = node_map_original.GetNode("mul");
+  node_mul->Swap(node_i1);
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map_output(&output);
+  auto mul_node = node_map_output.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "scalar_mul");
+  EXPECT_EQ(mul_node->input(1), "i5");
+}
+
 TEST_F(LayoutOptimizerTest, PreserveFetch) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv = SimpleConv2D(&s, 3, 2, "VALID");
@@ -571,6 +604,101 @@ TEST_F(LayoutOptimizerTest, Sum) {
   */
 }
 
+TEST_F(LayoutOptimizerTest, MulScalarAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), scalar, conv);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "scalar");
+  EXPECT_EQ(mul_node->input(1), "Conv2D");
+}
+
+TEST_F(LayoutOptimizerTest, Mul4DAndScalar) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), conv, scalar);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "Conv2D");
+  EXPECT_EQ(mul_node->input(1), "scalar");
+}
+
+TEST_F(LayoutOptimizerTest, Mul4DAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto i = ops::Identity(s.WithOpName("i"), conv);
+  auto mul = ops::Mul(s.WithOpName("mul"), conv, i);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "Conv2D");
+  EXPECT_EQ(mul_node->input(1), "i");
+}
+
+TEST_F(LayoutOptimizerTest, Mul4DAndVector) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2});
+  auto mul = ops::Mul(s.WithOpName("mul"), conv, vector);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "Conv2D");
+  EXPECT_EQ(mul_node->input(1), "LayoutOptimizerReshapeNHWCToNCHW-mul-vector");
+  auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-vector");
+  Tensor tensor;
+  EXPECT_TRUE(
+      tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor tensor_expected(DT_INT32, {4});
+  test::FillValues<int>(&tensor_expected, {1, 2, 1, 1});
+  test::ExpectTensorEqual<int>(tensor_expected, tensor);
+}
+
+TEST_F(LayoutOptimizerTest, MulVectorAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2});
+  auto mul = ops::Mul(s.WithOpName("mul"), vector, conv);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  // TODO(yaozhang): Support vector as the first input and 4d tensor as the
+  // second input for BinaryOpProcessor.
+  EXPECT_EQ(mul_node->input(0), "vector");
+  EXPECT_EQ(mul_node->input(1),
+            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-mul-1");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 15b06e060af59a1e30f4a9079679718aaa68dbc7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 13:48:37 -0800
Subject: [PATCH 0471/1225] Add R1 slice tests.

PiperOrigin-RevId: 177502259
---
 tensorflow/compiler/xla/tests/slice_test.cc | 90 +++++++++++++++------
 1 file changed, 67 insertions(+), 23 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc
index c21124750a..4db566f784 100644
--- a/tensorflow/compiler/xla/tests/slice_test.cc
+++ b/tensorflow/compiler/xla/tests/slice_test.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
 #include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -211,6 +212,13 @@ class SliceR1Test : public ClientLibraryTestBase,
   }
 };
 
+string SliceR1TestDataToString(const ::testing::TestParamInfo<R1Spec>& data) {
+  const R1Spec& spec = data.param;
+  return ::tensorflow::strings::Printf("%lld_%lld_%lld_%lld", spec.input_dim0,
+                                       spec.slice_start, spec.slice_limit,
+                                       spec.slice_stride);
+}
+
 XLA_TEST_P(SliceR1Test, DoIt_F32) { Run<float>(GetParam()); }
 
 XLA_TEST_P(SliceR1Test, DoIt_F64) { Run<double>(GetParam()); }
@@ -223,30 +231,66 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run<uint64>(GetParam()); }
 
 XLA_TEST_P(SliceR1Test, DoIt_S64) { Run<int64>(GetParam()); }
 
-INSTANTIATE_TEST_CASE_P(                          //
-    SliceR1TestInstantiation,                     //
-    SliceR1Test,                                  //
-    ::testing::Values(                            //
-        R1Spec{10, 0, 0, 1},                      //
-        R1Spec{10, 7, 7, 1},                      //
-        R1Spec{10, 2, 4, 1},                      //
-        R1Spec{10, 2, 4, 2},                      //
-        R1Spec{10, 0, 10, 1},                     //
-        R1Spec{1024, 1024 - 4, 1024, 1},          //
-        R1Spec{4096, 7, 7 + 1024, 1},             //
-        R1Spec{10, 0, 10, 2},                     //
-        R1Spec{10, 0, 10, 3},                     //
-        R1Spec{10, 0, 10, 4},                     //
-        R1Spec{10, 0, 10, 5},                     //
-        R1Spec{10, 0, 10, 10},                    //
-        R1Spec{500, 200, 400, 7},                 //
-        R1Spec{4096, 1, 4095, 3},                 //
-        R1Spec{2047, 1024 - 24, 1024 + 160, 31},  //
-        R1Spec{2047, 1, 2046, 3 * 128},           //
-        R1Spec{4096, 1024 + 3, 4095, 500},        //
-        R1Spec{8192, 0, 8192, 1024 * 3 + 400}     //
-        )                                         //
+// Tests for R1 slice ops.
+// The format for each testcase is {input size, start, limit, stride}.
+// clang-format off
+INSTANTIATE_TEST_CASE_P(
+    SliceR1TestInstantiation,
+    SliceR1Test,
+    ::testing::Values(
+        R1Spec{10, 0, 0, 1},
+        R1Spec{10, 7, 7, 1},
+        R1Spec{10, 0, 5, 1},
+        R1Spec{10, 3, 5, 1},
+        R1Spec{10, 0, 10, 1},
+        R1Spec{1024, 0, 5, 1},
+        R1Spec{1024, 3, 5, 1},
+        R1Spec{1024 + 17, 0, 5, 1},
+        R1Spec{1024 + 17, 3, 5, 1},
+        R1Spec{1024 + 17, 1024, 1024 + 6, 1},
+        R1Spec{1024 + 17, 1024 + 1, 1024 + 6, 1},
+        R1Spec{1024, 1024 - 4, 1024, 1},
+        R1Spec{4 * 1024, 7, 7 + 1024, 1},
+        R1Spec{4 * 1024, 0, 4 * 1024, 1},
+        R1Spec{4 * 1024, 1, 4 * 1024 - 1, 1},
+        R1Spec{4 * 1024, 1024, 3 * 1024, 1},
+        R1Spec{4 * 1024, 1024 + 1, 3 * 1024 - 1, 1},
+        R1Spec{16 * 1024, 0, 5, 1},
+        R1Spec{16 * 1024, 3, 5, 1},
+        R1Spec{16 * 1024 + 17, 0, 5, 1},
+        R1Spec{16 * 1024 + 17, 3, 5, 1},
+        R1Spec{16 * 1024 + 17, 16 * 1024, 16 * 1024 + 6, 1},
+        R1Spec{16 * 1024 + 17, 16 * 1024 + 1, 16 * 1024 + 6, 1},
+        R1Spec{16 * 1024, 4 * 1024 - 17, 8 * 1024 - 18, 1},
+        R1Spec{64 * 1024, 0, 64 * 1024, 1},
+        R1Spec{64 * 1024, 1, 64 * 1024 - 1, 1},
+        R1Spec{64 * 1024, 1024, 63 * 1024, 1},
+        R1Spec{64 * 1024, 1024 + 1, 63 * 1024 - 1, 1},
+        R1Spec{64 * 1024, 32 * 1024, 33 * 1024, 1},
+        R1Spec{64 * 1024, 32 * 1024 + 1, 33 * 1024 - 1, 1},
+        R1Spec{64 * 1024, 32 * 1024 - 17, 36 * 1024 - 18, 1},
+// TODO(b/69425338): This uses too much memory on GPU.
+#ifndef XLA_TEST_BACKEND_GPU
+        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1},
+        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1},
+        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1},
+#endif
+        R1Spec{10, 2, 4, 2},
+        R1Spec{10, 0, 10, 2},
+        R1Spec{10, 0, 10, 3},
+        R1Spec{10, 0, 10, 4},
+        R1Spec{10, 0, 10, 5},
+        R1Spec{10, 0, 10, 10},
+        R1Spec{500, 200, 400, 7},
+        R1Spec{4096, 1, 4095, 3},
+        R1Spec{2047, 1024 - 24, 1024 + 160, 31},
+        R1Spec{2047, 1, 2046, 3 * 128},
+        R1Spec{4096, 1024 + 3, 4095, 500},
+        R1Spec{8192, 0, 8192, 1024 * 3 + 400}
+        ),
+    SliceR1TestDataToString
 );
+// clang-format on
 
 struct R2Spec {
   int64 input_dim0;
-- 
GitLab


From 39cac0519176d1244b0e29d6c28691189ea755ec Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 30 Nov 2017 13:50:13 -0800
Subject: [PATCH 0472/1225] [TF:XLA] Allow bfloat16 types in more places.

PiperOrigin-RevId: 177502497
---
 .../compiler/tf2xla/kernels/matmul_op.cc      |  4 +-
 tensorflow/compiler/tf2xla/lib/util.cc        |  3 ++
 tensorflow/compiler/tf2xla/xla_helpers.cc     |  7 ++-
 tensorflow/compiler/xla/literal_util.cc       |  6 ++-
 tensorflow/core/framework/bfloat16_test.cc    | 12 ++++++
 tensorflow/core/framework/numeric_types.h     | 43 ++++++++++++++++---
 6 files changed, 65 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc
index fcef497e58..a62d233526 100644
--- a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc
@@ -23,8 +23,8 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-constexpr std::array<DataType, 4> kMatmulTypes = {
-    {DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64}};
+constexpr std::array<DataType, 5> kMatmulTypes = {
+    {DT_HALF, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64}};
 
 class MatMulOp : public XlaOpKernel {
  public:
diff --git a/tensorflow/compiler/tf2xla/lib/util.cc b/tensorflow/compiler/tf2xla/lib/util.cc
index 7ffe0aa6df..943248aedb 100644
--- a/tensorflow/compiler/tf2xla/lib/util.cc
+++ b/tensorflow/compiler/tf2xla/lib/util.cc
@@ -40,6 +40,9 @@ xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder,
     case xla::F16:
       return builder->ConstantR0<xla::half>(static_cast<xla::half>(value));
       break;
+    case xla::BF16:
+      return builder->ConstantR0<bfloat16>(static_cast<bfloat16>(value));
+      break;
     case xla::F32:
       return builder->ConstantR0<float>(static_cast<float>(value));
       break;
diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc
index 9c3e15d2fa..ec9e535b70 100644
--- a/tensorflow/compiler/tf2xla/xla_helpers.cc
+++ b/tensorflow/compiler/tf2xla/xla_helpers.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// This file defines helper routines for Tla JIT compilation.
+// This file defines helper routines for XLA compilation.
 
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/lib/util.h"
@@ -121,6 +121,8 @@ xla::ComputationDataHandle XlaHelpers::One(xla::ComputationBuilder* b,
 xla::ComputationDataHandle XlaHelpers::Epsilon(xla::ComputationBuilder* b,
                                                DataType data_type) {
   switch (data_type) {
+    case DT_BFLOAT16:
+      return b->ConstantR0<bfloat16>(bfloat16::epsilon());
     case DT_FLOAT:
       return b->ConstantR0<float>(std::numeric_limits<float>::epsilon());
     case DT_DOUBLE:
@@ -169,6 +171,9 @@ xla::ComputationDataHandle XlaHelpers::IntegerLiteral(
     case xla::S16:
     case xla::U16:
       LOG(FATAL) << "u16/s16 literals not yet implemented";
+    case xla::BF16:
+      literal = *xla::Literal::CreateR0<bfloat16>(static_cast<bfloat16>(value));
+      break;
     case xla::F16:
       literal =
           *xla::Literal::CreateR0<xla::half>(static_cast<xla::half>(value));
diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc
index 93d3cd425f..250df5f4d5 100644
--- a/tensorflow/compiler/xla/literal_util.cc
+++ b/tensorflow/compiler/xla/literal_util.cc
@@ -252,6 +252,10 @@ Status Literal::Copy(const Literal& src_literal,
       return *Literal::CreateR0<int32>(1);
     case S64:
       return *Literal::CreateR0<int64>(1);
+    case F16:
+      return *Literal::CreateR0<half>(static_cast<half>(1.0f));
+    case BF16:
+      return *Literal::CreateR0<bfloat16>(static_cast<bfloat16>(1.0f));
     case F32:
       return *Literal::CreateR0<float>(1);
     case F64:
@@ -263,8 +267,6 @@ Status Literal::Copy(const Literal& src_literal,
     case S16:
     case U16:
       LOG(FATAL) << "u16/s16 literals not yet implemented";
-    case F16:
-      return *Literal::CreateR0<half>(static_cast<half>(1.0f));
     case TUPLE:
       LOG(FATAL) << "tuple element type cannot take on value of 1";
     case OPAQUE:
diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc
index 6e45338751..17e6209f8e 100644
--- a/tensorflow/core/framework/bfloat16_test.cc
+++ b/tensorflow/core/framework/bfloat16_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/bfloat16.h"
 
+#include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/lib/core/casts.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
@@ -104,6 +105,17 @@ TEST(Bfloat16Test, Conversion) {
   }
 }
 
+TEST(Bfloat16Test, Epsilon) {
+  EXPECT_LT(1.0f, static_cast<float>(bfloat16::epsilon() + bfloat16(1.0f)));
+  EXPECT_EQ(1.0f, static_cast<float>((bfloat16::epsilon() / bfloat16(2.0f)) +
+                                     bfloat16(1.0f)));
+}
+
+TEST(Bfloat16Test, Negate) {
+  EXPECT_EQ(-3.0f, static_cast<float>(-bfloat16(3.0f)));
+  EXPECT_EQ(4.5f, static_cast<float>(-bfloat16(-4.5f)));
+}
+
 static void BM_FloatToBFloat16(int iters) {
   testing::StopTiming();
   static const int N = 32 << 20;
diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index 2b080e13fd..29cac26244 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -121,15 +121,48 @@ struct bfloat16 {
     return static_cast<double>(float(*this));
   }
 
+  static bfloat16 epsilon() {
+    bfloat16 x;
+    x.value = 0x3c00;  // 0x1.0p-7
+    return x;
+  }
+
   uint16_t value;
 };
 
-inline bool operator==(const bfloat16 a, const bfloat16 b) {
-  return a.value == b.value;
+inline bfloat16 operator+(bfloat16 a, bfloat16 b) {
+  return bfloat16(static_cast<float>(a) + static_cast<float>(b));
 }
-
-inline bool operator!=(const bfloat16 a, const bfloat16 b) {
-  return a.value != b.value;
+inline bfloat16 operator-(bfloat16 a, bfloat16 b) {
+  return bfloat16(static_cast<float>(a) - static_cast<float>(b));
+}
+inline bfloat16 operator*(bfloat16 a, bfloat16 b) {
+  return bfloat16(static_cast<float>(a) * static_cast<float>(b));
+}
+inline bfloat16 operator/(bfloat16 a, bfloat16 b) {
+  return bfloat16(static_cast<float>(a) / static_cast<float>(b));
+}
+inline bfloat16 operator-(bfloat16 a) {
+  a.value ^= 0x8000;
+  return a;
+}
+inline bool operator<(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) < static_cast<float>(b);
+}
+inline bool operator<=(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) <= static_cast<float>(b);
+}
+inline bool operator==(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) == static_cast<float>(b);
+}
+inline bool operator!=(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) != static_cast<float>(b);
+}
+inline bool operator>(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) > static_cast<float>(b);
+}
+inline bool operator>=(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) >= static_cast<float>(b);
 }
 
 }  // end namespace tensorflow
-- 
GitLab


From bc2b4b0679dc6e4ad6dc543d475d759f3ad6cadf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 13:57:33 -0800
Subject: [PATCH 0473/1225] Enable tests that pass now with the new copy
 insertion.

PiperOrigin-RevId: 177503567
---
 tensorflow/compiler/xla/tests/while_test.cc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc
index 49f673f5f0..f3f10517e3 100644
--- a/tensorflow/compiler/xla/tests/while_test.cc
+++ b/tensorflow/compiler/xla/tests/while_test.cc
@@ -357,8 +357,7 @@ TEST_F(WhileTest, WhileWithVectorResultIntoTuple) {
   ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001));
 }
 
-// TODO(b/63003356): 11-06-2017: fails on all back-ends with incorrect result.
-TEST_F(WhileTest, DISABLED_WhileWithPermutationAndTupleResult) {
+TEST_F(WhileTest, WhileWithPermutationAndTupleResult) {
   std::vector<Shape> shape_elements = {
       ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}),
       ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})};
@@ -411,8 +410,7 @@ TEST_F(WhileTest, DISABLED_WhileWithPermutationAndTupleResult) {
   ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001));
 }
 
-// TODO(b/63003356): 11-06-2017: fails on all back-ends with incorrect result.
-TEST_F(WhileTest, DISABLED_WhileWithPermutationAndVectorResult) {
+TEST_F(WhileTest, WhileWithPermutationAndVectorResult) {
   std::vector<Shape> shape_elements = {
       ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}),
       ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})};
-- 
GitLab


From 99525c7e1e21b3548eafdc7ae606ac1df2bf06fe Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 30 Nov 2017 14:11:17 -0800
Subject: [PATCH 0474/1225] Automated g4 rollback of changelist 177499365

PiperOrigin-RevId: 177505909
---
 .../grappler/optimizers/layout_optimizer.cc   |  87 ++++--------
 .../optimizers/layout_optimizer_test.cc       | 128 ------------------
 2 files changed, 26 insertions(+), 189 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 97c8e6f907..ef4b015295 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -761,52 +761,24 @@ class AgnosticNodeProcessor : public NodeProcessor {
 
   bool IsNodeAfterNCHWToNHWC() const {
     std::set<string> ops_format_agnostic = GetOpsFormatAgnostic();
-    std::deque<NodeDef*> queue;
-    auto first_node_pos = DataInputPos(*node_);
-    for (const auto& pos : first_node_pos) {
-      auto input_node = node_map_->GetNode(node_->input(pos));
-      queue.push_back(input_node);
-    }
-    // The code will exit this while loop in one iteration in most cases, as the
-    // graph is already topologically sorted.
-    while (!queue.empty()) {
-      NodeDef* current_node = queue.front();
-      queue.pop_front();
-      if (IsNodeNCHWToNHWC(current_node->name())) {
+    auto node = node_map_->GetNode(node_->name());
+    while (node->input_size() > 0) {
+      int data_input_pos = 0;
+      if (IsConcatV1(*node) || IsSplit(*node)) {
+        data_input_pos = 1;
+      }
+      node = node_map_->GetNode(node->input(data_input_pos));
+      if (IsNodeNCHWToNHWC(node->name())) {
         return true;
       }
-      // We only continue searching if the path is connected through
-      // format-agnostic nodes.
-      if (ops_format_agnostic.find(current_node->op()) !=
-          ops_format_agnostic.end()) {
-        auto current_node_pos = DataInputPos(*current_node);
-        for (const auto& pos : current_node_pos) {
-          auto input_node = node_map_->GetNode(current_node->input(pos));
-          queue.push_back(input_node);
-        }
+      bool connected =
+          ops_format_agnostic.find(node->op()) != ops_format_agnostic.end();
+      if (!connected) {
+        return false;
       }
     }
     return false;
   }
-
- private:
-  std::vector<int> DataInputPos(const NodeDef& node) const {
-    std::vector<int> pos;
-    if (IsSplit(node)) {
-      return {1};
-    }
-    if (IsConcatV1(node)) {
-      return {1};
-    }
-    if (IsAdd(node) || IsMul(node) || IsRealDiv(node) ||
-        IsSquaredDifference(node) || IsSub(node)) {
-      return {0, 1};
-    }
-    if (node.input_size() > 0 && !IsControlInput(node.input(0))) {
-      return {0};
-    }
-    return {};
-  }
 };
 
 class AddNProcessor : public AgnosticNodeProcessor {
@@ -829,49 +801,42 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
  public:
   explicit BinaryOpProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {
-    is_4d_with_vector_ = IsNDOperateWithMD(4, 1);
+    is_4d_with_vector_ = Is4DOperateWithVector();
   }
 
  protected:
   bool ShouldProcess() const override {
-    // TODO(yaozhang): Support IsNDOperateWithMD(1, 4): first input is a vector
-    // and the second input is a 4D tensor; and update CustomizedProcessing()
-    // accordingly.
     return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() &&
-           (IsNDOperateWithMD(4, 0) || IsNDOperateWithMD(4, 1) ||
-            IsNDOperateWithMD(4, 4) || IsNDOperateWithMD(0, 4)) &&
+           (Is4DOperateWithND(4) || Is4DOperateWithScalar() ||
+            Is4DOperateWithVector()) &&
            IsOnGPU();
   }
 
   std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos;
-    auto input0 = node_map_->GetNode(node_->input(0));
-    auto input1 = node_map_->GetNode(node_->input(1));
-    if (IsDimsFour(*input0)) {
-      input_pos.push_back(0);
-    }
-    if (IsDimsFour(*input1)) {
+    std::vector<int> input_pos = {0};
+    if (Is4DOperateWithND(4)) {
       input_pos.push_back(1);
     }
     return input_pos;
   }
 
-  bool IsDimsFour(const NodeDef& node) const {
-    return NodeProcessor::IsDimsFour(node) || IsNodeNCHWToNHWC(node.name());
-  }
-
-  bool IsNDOperateWithMD(int n, int m) const {
+  bool Is4DOperateWithND(int n) const {
     auto input0 = node_map_->GetNode(node_->input(0));
     auto input1 = node_map_->GetNode(node_->input(1));
     if (input0 && input1) {
-      bool input0_is_n = (n == 4) ? IsDimsFour(*input0) : IsDimsN(*input0, n);
-      bool input1_is_m = (m == 4) ? IsDimsFour(*input1) : IsDimsN(*input1, m);
-      return input0_is_n && input1_is_m;
+      return (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) &&
+             ((n == 4)
+                  ? (IsDimsFour(*input1) || IsNodeNCHWToNHWC(input1->name()))
+                  : IsDimsN(*input1, n));
     }
     return false;
   }
 
+  bool Is4DOperateWithScalar() const { return Is4DOperateWithND(0); }
+
+  bool Is4DOperateWithVector() const { return Is4DOperateWithND(1); }
+
   NodeDef* AddNodeShapeConst(const string& name, int num_channels) {
     NodeDef* node = graph_->add_node();
     node_map_->AddNode(name, node);
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 363b4c3fd8..e8f7b8ac3c 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -298,39 +298,6 @@ TEST_F(LayoutOptimizerTest, Connectivity) {
   EXPECT_EQ(node_i2_output->input(0), "i1");
 }
 
-TEST_F(LayoutOptimizerTest, ConnectivityBinaryOpWithInputScalarAnd4D) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
-  auto i1 = ops::Identity(s.WithOpName("i1"), conv);
-  auto i2 = ops::Identity(s.WithOpName("i2"), i1);
-  auto scalar_sub = ops::Const(s.WithOpName("scalar_sub"), 3.0f, {});
-  auto sub = ops::Sub(s.WithOpName("sub"), scalar_sub, i2);
-  auto i3 = ops::Identity(s.WithOpName("i3"), sub);
-  auto i4 = ops::Identity(s.WithOpName("i4"), i3);
-  auto i5 = ops::Identity(s.WithOpName("i5"), i4);
-  auto scalar_mul = ops::Const(s.WithOpName("scalar_mul"), 3.0f, {});
-  auto mul = ops::Mul(s.WithOpName("mul"), scalar_mul, i5);
-  auto i6 = ops::Identity(s.WithOpName("i6"), mul);
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-  // Make the graph not in topological order to test the handling of multi-hop
-  // connectivity (here we say two nodes are connected if all nodes in the
-  // middle are layout agnostic). If the graph is already in topological order,
-  // the problem is easier, where layout optimizer only needs to check
-  // single-hop connectivity.
-  NodeMap node_map_original(&item.graph);
-  auto node_i1 = node_map_original.GetNode("i1");
-  auto node_mul = node_map_original.GetNode("mul");
-  node_mul->Swap(node_i1);
-  LayoutOptimizer optimizer;
-  GraphDef output;
-  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
-  NodeMap node_map_output(&output);
-  auto mul_node = node_map_output.GetNode("mul");
-  EXPECT_EQ(mul_node->input(0), "scalar_mul");
-  EXPECT_EQ(mul_node->input(1), "i5");
-}
-
 TEST_F(LayoutOptimizerTest, PreserveFetch) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv = SimpleConv2D(&s, 3, 2, "VALID");
@@ -604,101 +571,6 @@ TEST_F(LayoutOptimizerTest, Sum) {
   */
 }
 
-TEST_F(LayoutOptimizerTest, MulScalarAnd4D) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
-  auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {});
-  auto mul = ops::Mul(s.WithOpName("mul"), scalar, conv);
-  auto o = ops::Identity(s.WithOpName("o"), mul);
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-  LayoutOptimizer optimizer;
-  GraphDef output;
-  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
-  NodeMap node_map(&output);
-  auto mul_node = node_map.GetNode("mul");
-  EXPECT_EQ(mul_node->input(0), "scalar");
-  EXPECT_EQ(mul_node->input(1), "Conv2D");
-}
-
-TEST_F(LayoutOptimizerTest, Mul4DAndScalar) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
-  auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {});
-  auto mul = ops::Mul(s.WithOpName("mul"), conv, scalar);
-  auto o = ops::Identity(s.WithOpName("o"), mul);
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-  LayoutOptimizer optimizer;
-  GraphDef output;
-  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
-  NodeMap node_map(&output);
-  auto mul_node = node_map.GetNode("mul");
-  EXPECT_EQ(mul_node->input(0), "Conv2D");
-  EXPECT_EQ(mul_node->input(1), "scalar");
-}
-
-TEST_F(LayoutOptimizerTest, Mul4DAnd4D) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
-  auto i = ops::Identity(s.WithOpName("i"), conv);
-  auto mul = ops::Mul(s.WithOpName("mul"), conv, i);
-  auto o = ops::Identity(s.WithOpName("o"), mul);
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-  LayoutOptimizer optimizer;
-  GraphDef output;
-  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
-  NodeMap node_map(&output);
-  auto mul_node = node_map.GetNode("mul");
-  EXPECT_EQ(mul_node->input(0), "Conv2D");
-  EXPECT_EQ(mul_node->input(1), "i");
-}
-
-TEST_F(LayoutOptimizerTest, Mul4DAndVector) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
-  auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2});
-  auto mul = ops::Mul(s.WithOpName("mul"), conv, vector);
-  auto o = ops::Identity(s.WithOpName("o"), mul);
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-  LayoutOptimizer optimizer;
-  GraphDef output;
-  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
-  NodeMap node_map(&output);
-  auto mul_node = node_map.GetNode("mul");
-  EXPECT_EQ(mul_node->input(0), "Conv2D");
-  EXPECT_EQ(mul_node->input(1), "LayoutOptimizerReshapeNHWCToNCHW-mul-vector");
-  auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-vector");
-  Tensor tensor;
-  EXPECT_TRUE(
-      tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor()));
-  Tensor tensor_expected(DT_INT32, {4});
-  test::FillValues<int>(&tensor_expected, {1, 2, 1, 1});
-  test::ExpectTensorEqual<int>(tensor_expected, tensor);
-}
-
-TEST_F(LayoutOptimizerTest, MulVectorAnd4D) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
-  auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2});
-  auto mul = ops::Mul(s.WithOpName("mul"), vector, conv);
-  auto o = ops::Identity(s.WithOpName("o"), mul);
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-  LayoutOptimizer optimizer;
-  GraphDef output;
-  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
-  NodeMap node_map(&output);
-  auto mul_node = node_map.GetNode("mul");
-  // TODO(yaozhang): Support vector as the first input and 4d tensor as the
-  // second input for BinaryOpProcessor.
-  EXPECT_EQ(mul_node->input(0), "vector");
-  EXPECT_EQ(mul_node->input(1),
-            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-mul-1");
-}
-
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From ceeb930773d2af5c5b594c515988399e0d5bdc9a Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <1517779+sb2nov@users.noreply.github.com>
Date: Thu, 30 Nov 2017 14:16:14 -0800
Subject: [PATCH 0475/1225] Fix export_test failure (#15011)

---
 tensorflow/python/estimator/export/export_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/estimator/export/export_test.py b/tensorflow/python/estimator/export/export_test.py
index 3cbef4707a..8442bf04ac 100644
--- a/tensorflow/python/estimator/export/export_test.py
+++ b/tensorflow/python/estimator/export/export_test.py
@@ -358,7 +358,8 @@ class ExportTest(test_util.TensorFlowTestCase):
     with self.assertRaises(ValueError) as e:
       export.build_all_signature_defs(receiver_tensor, None)
 
-    self.assertEqual("export_outputs must be a dict.", str(e.exception))
+    self.assertTrue(str(e.exception).startswith(
+        "export_outputs must be a dict"))
 
   def test_get_timestamped_export_dir(self):
     export_dir_base = tempfile.mkdtemp() + "export/"
-- 
GitLab


From 06a5cc4acd377815a0a6bbaac6ea0ab972f1c8fb Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <1517779+sb2nov@users.noreply.github.com>
Date: Thu, 30 Nov 2017 14:16:34 -0800
Subject: [PATCH 0476/1225] Fix dataset tests broken on HEAD (#15012)

---
 tensorflow/contrib/data/python/kernel_tests/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 0f3ed9084d..1d4817fa26 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -143,6 +143,7 @@ py_test(
     size = "small",
     srcs = ["filter_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -312,6 +313,7 @@ py_test(
     size = "small",
     srcs = ["prefetch_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/python:platform",
-- 
GitLab


From 3a011f904112fe8c61017248d343798569b174f0 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 30 Nov 2017 14:12:54 -0800
Subject: [PATCH 0477/1225] Disable state_saving_rnn_estimator_test in asan
 mode.

PiperOrigin-RevId: 177506166
---
 tensorflow/contrib/learn/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD
index 94920db574..26bbcab307 100644
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@@ -461,6 +461,7 @@ py_test(
     size = "medium",
     srcs = ["python/learn/estimators/state_saving_rnn_estimator_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["noasan"],
     deps = [
         ":learn",
         "//tensorflow/contrib/layers:layers_py",
-- 
GitLab


From 6ebb6d6465ddf2380430de7aa287676e9440df7e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 14:14:29 -0800
Subject: [PATCH 0478/1225] TF server should not crash when -v=1 is enabled.

These WriteTextProto() calls are purely for diagnostics (and are usually called within IF_VLOG_IS_ON(1) guards), but if they fail to write to a file, they'll take down the entire calling process.  Which makes debugging difficult, and seems rather astonishing.

PiperOrigin-RevId: 177506379
---
 tensorflow/compiler/tf2xla/dump_graph.cc | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/dump_graph.cc b/tensorflow/compiler/tf2xla/dump_graph.cc
index ddd912b873..03603ee9ba 100644
--- a/tensorflow/compiler/tf2xla/dump_graph.cc
+++ b/tensorflow/compiler/tf2xla/dump_graph.cc
@@ -63,7 +63,12 @@ string MakeUniquePath(string name) {
 
 string DumpGraphDefToFile(const string& name, GraphDef const& graph_def) {
   string path = MakeUniquePath(name);
-  TF_CHECK_OK(WriteTextProto(Env::Default(), path, graph_def));
+  Status status = WriteTextProto(Env::Default(), path, graph_def);
+  if (!status.ok()) {
+    VLOG(1) << "Failed to dump GraphDef to file: " << path << " : " << status;
+    path.clear();
+    path = "(unavailable)";
+  }
   return path;
 }
 
@@ -79,7 +84,13 @@ string DumpGraphToFile(const string& name, Graph const& graph,
 
 string DumpFunctionDefToFile(const string& name, FunctionDef const& fdef) {
   string path = MakeUniquePath(name);
-  TF_CHECK_OK(WriteTextProto(Env::Default(), path, fdef));
+  Status status = WriteTextProto(Env::Default(), path, fdef);
+  if (!status.ok()) {
+    VLOG(1) << "Failed to dump FunctionDef to file: " << path << " : "
+            << status;
+    path.clear();
+    path = "(unavailable)";
+  }
   return path;
 }
 
-- 
GitLab


From 6bfc73a0b3c6810725a5eb0020470457cc5cc23e Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 30 Nov 2017 14:26:58 -0800
Subject: [PATCH 0479/1225] Extract out a MathUtil::GCD helper

This fixes a TODO.

PiperOrigin-RevId: 177508258
---
 .../compiler/xla/service/cpu/ir_emitter.cc    | 22 ++++----------
 tensorflow/core/lib/core/arena.cc             | 18 ++----------
 tensorflow/core/lib/math/math_util.h          | 17 +++++++++++
 tensorflow/core/lib/math/math_util_test.cc    | 29 +++++++++++++++++++
 4 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index f242e0acb8..bb75d3f49e 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -1651,19 +1652,6 @@ void IrEmitter::EmitShardedVectorStore(
   }
 }
 
-namespace {
-// TODO(sanjoy): This is duplicated in tensorflow/core/lib/core/arena.cc.
-// Extract out a common implementation to tensorflow/core/lib/math/math_util.h
-uint32 GCD(uint32 x, uint32 y) {
-  while (y != 0) {
-    uint32 r = x % y;
-    x = y;
-    y = r;
-  }
-  return x;
-}
-}  // namespace
-
 StatusOr<bool> IrEmitter::EmitVectorizedReduce(
     HloInstruction* reduce, HloInstruction* arg, HloInstruction* init_value,
     tensorflow::gtl::ArraySlice<int64> dimensions, HloComputation* function,
@@ -1686,9 +1674,9 @@ StatusOr<bool> IrEmitter::EmitVectorizedReduce(
       std::find(dimensions.begin(), dimensions.end(),
                 arg->shape().layout().minor_to_major(0)) != dimensions.end();
 
-  unsigned element_alignment =
-      GCD(ShapeUtil::ByteSizeOfPrimitiveType(reduce->shape().element_type()),
-          MinimumAlignmentForPrimitiveType(reduce->shape().element_type()));
+  unsigned element_alignment = tensorflow::MathUtil::GCD<unsigned>(
+      ShapeUtil::ByteSizeOfPrimitiveType(reduce->shape().element_type()),
+      MinimumAlignmentForPrimitiveType(reduce->shape().element_type()));
 
   if (is_reduction_over_minor_dimension) {
     // TODO(sanjoy): Implement vectorized reduction over the minor dimension.
@@ -2463,7 +2451,7 @@ void IrEmitter::EmitTransferElements(llvm::Value* target, llvm::Value* source,
                                      const llvm_ir::IrArray& source_array) {
   unsigned primitive_type_size =
       ShapeUtil::ByteSizeOfPrimitiveType(primitive_type);
-  unsigned element_alignment = GCD(
+  unsigned element_alignment = tensorflow::MathUtil::GCD<unsigned>(
       primitive_type_size, MinimumAlignmentForPrimitiveType(primitive_type));
   llvm::Type* primitive_ptr_type = llvm::PointerType::getUnqual(
       llvm_ir::PrimitiveTypeToIrType(primitive_type, module_));
diff --git a/tensorflow/core/lib/core/arena.cc b/tensorflow/core/lib/core/arena.cc
index 2a04f7bd39..55e481d0e6 100644
--- a/tensorflow/core/lib/core/arena.cc
+++ b/tensorflow/core/lib/core/arena.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mem.h"
@@ -113,24 +114,11 @@ void Arena::MakeNewBlock(const uint32 alignment) {
   CHECK(SatisfyAlignment(alignment));
 }
 
-// The following simple numeric routines also exist in util/math/mathutil.h
-// but we don't want to depend on that library.
-
-// Euclid's algorithm for Greatest Common Denominator.
-static uint32 GCD(uint32 x, uint32 y) {
-  while (y != 0) {
-    uint32 r = x % y;
-    x = y;
-    y = r;
-  }
-  return x;
-}
-
 static uint32 LeastCommonMultiple(uint32 a, uint32 b) {
   if (a > b) {
-    return (a / GCD(a, b)) * b;
+    return (a / MathUtil::GCD<uint32>(a, b)) * b;
   } else if (a < b) {
-    return (b / GCD(b, a)) * a;
+    return (b / MathUtil::GCD<uint32>(b, a)) * a;
   } else {
     return a;
   }
diff --git a/tensorflow/core/lib/math/math_util.h b/tensorflow/core/lib/math/math_util.h
index 6f279865e7..9e71598622 100644
--- a/tensorflow/core/lib/math/math_util.h
+++ b/tensorflow/core/lib/math/math_util.h
@@ -16,6 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LIB_MATH_MATH_UTIL_H_
 #define TENSORFLOW_LIB_MATH_MATH_UTIL_H_
 
+#include <type_traits>
+
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -59,6 +61,9 @@ class MathUtil {
   template <typename IntegralType, bool ceil>
   static IntegralType CeilOrFloorOfRatio(IntegralType numerator,
                                          IntegralType denominator);
+
+  template <typename IntegralType>
+  static IntegralType GCD(IntegralType x, IntegralType y);
 };
 
 // ---- CeilOrFloorOfRatio ----
@@ -107,6 +112,18 @@ IntegralType MathUtil::CeilOrFloorOfRatio(IntegralType numerator,
   }
 }
 
+template <typename IntegralType>
+IntegralType MathUtil::GCD(IntegralType a, IntegralType b) {
+  static_assert(std::is_unsigned<IntegralType>::value,
+                "signed GCD not supported!");
+  while (b != 0) {
+    IntegralType r = a % b;
+    a = b;
+    b = r;
+  }
+  return a;
+}
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_LIB_MATH_MATH_UTIL_H_
diff --git a/tensorflow/core/lib/math/math_util_test.cc b/tensorflow/core/lib/math/math_util_test.cc
index eaf8c31a43..a96e5467c3 100644
--- a/tensorflow/core/lib/math/math_util_test.cc
+++ b/tensorflow/core/lib/math/math_util_test.cc
@@ -195,4 +195,33 @@ TEST(MathUtil, CeilOfRatio) {
 #endif
 }
 
+struct GCDTestCase {
+  unsigned int x;
+  unsigned int y;
+  unsigned int gcd;
+};
+
+TEST(MathUtil, GCD) {
+  std::vector<GCDTestCase> testcases({
+      {10, 20, 10},  //
+      {27, 8, 1},    //
+      {4, 3, 1},     //
+      {6, 8, 2},     //
+      {5, 0, 5},     //
+      {5, 5, 5},     //
+      {0, 0, 0}      //
+  });
+
+  for (const auto& tc : testcases) {
+    EXPECT_EQ(tc.gcd, MathUtil::GCD<uint32>(tc.x, tc.y));
+    EXPECT_EQ(tc.gcd, MathUtil::GCD<uint32>(tc.y, tc.x));
+    EXPECT_EQ(tc.gcd, MathUtil::GCD<uint64>(tc.x, tc.y));
+    EXPECT_EQ(tc.gcd, MathUtil::GCD<uint64>(tc.y, tc.x));
+  }
+
+  const uint64 biggish_prime = 1666666667;
+  EXPECT_EQ(biggish_prime,
+            MathUtil::GCD<uint64>(biggish_prime * 3, biggish_prime * 4));
+}
+
 }  // namespace tensorflow
-- 
GitLab


From ce4200eae990d7f5efdfb727939d38bf48001ba2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 15:20:30 -0800
Subject: [PATCH 0480/1225] Fix profiler to track some missed persistent bytes.

PiperOrigin-RevId: 177516249
---
 tensorflow/core/profiler/g3doc/options.md     |  11 +-
 .../core/profiler/internal/tfprof_node.cc     |  28 +++++
 .../core/profiler/internal/tfprof_node.h      |  10 +-
 .../profiler/internal/tfprof_show_test.cc     |  37 +++---
 .../profiler/internal/tfprof_stats_test.cc    | 105 ++++++++++--------
 tensorflow/core/profiler/tfprof_log.proto     |   5 +-
 .../python/profiler/model_analyzer_test.py    |  40 ++++++-
 7 files changed, 157 insertions(+), 79 deletions(-)

diff --git a/tensorflow/core/profiler/g3doc/options.md b/tensorflow/core/profiler/g3doc/options.md
index 4c73e372e3..dd12f76d6f 100644
--- a/tensorflow/core/profiler/g3doc/options.md
+++ b/tensorflow/core/profiler/g3doc/options.md
@@ -60,11 +60,14 @@ Currently, profiler only tracks the allocation of memory. As a result, the
 accumulated memory request is uaually larger than the peak memory of the overall
 model.
 
-bytes: The memory allocations requested by the operation.
-peak_bytes: The peak requested memory (not de-allocated) by the operation.
-residual_bytes: The memory requested by the operation and not de-allocated
+It's recommended to generate timeline to see the allocator memory usage over
+time.
+
+`bytes`: The memory allocations requested by the operation.
+`peak_bytes`: The peak requested memory (not de-allocated) by the operation.
+`residual_bytes`: The memory requested by the operation and not de-allocated
                 when Compute finishes.
-output_bytes: The memory output by the operation. It's not necessarily requested
+`output_bytes`: The memory output by the operation. It's not necessarily requested
               by the current operation. For example, it can be a tensor
               forwarded from input to output, with in-place mutation.
 
diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc
index 671b65d708..5cd1050bcc 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node.cc
@@ -139,6 +139,25 @@ void ExecStep::AddMemoryStats(const string& dev,
         exec_.accelerator_persistent_bytes() +
         step_stat.memory_stats().device_persistent_memory_size());
   }
+
+  // TODO(xpan): Make this more accurate:
+  // High level: Memory tracking is suspicous and requires large scale
+  // clean up.
+  // Investigte the memory usage difference between CPU/GPU with OpViewTest.
+  //
+  // 1. OpKernelConstruction::allocate_xxx is not traced. Below, we only
+  //    discuss OpKernelContext-related allocations.
+  // 2. allocate_output calls allocate_tensor, which is properly tracked in
+  //    'NodeExecStats.memory'.
+  // 3. allocate_temp is only tracked through record_xxx_temp. It appears
+  //    in 'NodeExecStats.memory_stats'.
+  // 4. allocate_persistent calls allocate_tensor, which is properly tracked
+  //    in 'NodeExecStats.memory'. However, there is no way to count it as
+  //    persistent now.
+  // 5. record_xxx_persistent is called when allocate_persistent
+  //    is not used and hence tracks some complementary bytes. It appears in
+  //    'NodeExecStats.memory_stats'. It's suspicious. But we should
+  //    use it now since it covers constant op.
   int64 residual_bytes = 0;
   int64 requested_bytes = 0;
   int64 peak_bytes = 0;
@@ -147,6 +166,15 @@ void ExecStep::AddMemoryStats(const string& dev,
     requested_bytes += mem.total_bytes();
     peak_bytes += mem.peak_bytes();
   }
+  residual_bytes +=
+      exec_.host_persistent_bytes() + exec_.accelerator_persistent_bytes();
+  requested_bytes += exec_.host_persistent_bytes() +
+                     exec_.accelerator_persistent_bytes() +
+                     exec_.host_temp_bytes() + exec_.accelerator_temp_bytes();
+  peak_bytes += exec_.host_persistent_bytes() +
+                exec_.accelerator_persistent_bytes() + exec_.host_temp_bytes() +
+                exec_.accelerator_temp_bytes();
+
   exec_.set_requested_bytes(requested_bytes);
   exec_.set_residual_bytes(residual_bytes);
   exec_.set_peak_bytes(peak_bytes);
diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h
index e2d0563a07..77c14cb792 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.h
+++ b/tensorflow/core/profiler/internal/tfprof_node.h
@@ -593,17 +593,11 @@ class TFGraphNode {
   int64 accelerator_persistent_bytes() const {
     int64 persistent_bytes = 0;
     for (const auto& exec : execs_) {
-      persistent_bytes += exec.second.accelerator_persistent_bytes();
+      persistent_bytes = std::max(persistent_bytes,
+                                  exec.second.accelerator_persistent_bytes());
     }
     return persistent_bytes;
   }
-  int64 host_persistent_bytes(int64 step) const {
-    auto exec = execs_.find(step);
-    if (exec == execs_.end()) {
-      return 0;
-    }
-    return exec->second.host_persistent_bytes();
-  }
   const std::map<int32, std::pair<int64, uint64>>& output_memory(
       int64 step) const {
     auto exec = execs_.find(step);
diff --git a/tensorflow/core/profiler/internal/tfprof_show_test.cc b/tensorflow/core/profiler/internal/tfprof_show_test.cc
index 1f19f8c322..98773ae19e 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show_test.cc
@@ -105,12 +105,13 @@ TEST_F(TFProfShowTest, DumpScopeMode) {
       "node name | # parameters | # float_ops | requested bytes | peak bytes | "
       "residual bytes | output bytes | total execution time | accelerator "
       "execution time | cpu execution time\n_TFProfRoot (--/451 params, --/0 "
-      "flops, --/0B, --/0B, --/0B, --/2.56KB, --/13us, --/0us, --/13us)\n  DW "
-      "(3x3x3x6, 162/162 params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, "
-      "1.28KB/1.28KB, 2us/2us, 0us/0us, 2us/2us)\n  DW2 (2x2x6x12, 288/288 "
-      "params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, 1.28KB/1.28KB, 11us/11us, "
-      "0us/0us, 11us/11us)\n  ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, 0B/0B, "
-      "0B/0B, 0B/0B, 0us/0us, 0us/0us, 0us/0us)\n",
+      "flops, --/2.56KB, --/2.56KB, --/2.56KB, --/2.56KB, --/13us, --/0us, "
+      "--/13us)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, 1.28KB/1.28KB, "
+      "1.28KB/1.28KB, 1.28KB/1.28KB, 1.28KB/1.28KB, 2us/2us, 0us/0us, "
+      "2us/2us)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.28KB/1.28KB, "
+      "1.28KB/1.28KB, 1.28KB/1.28KB, 1.28KB/1.28KB, 11us/11us, 0us/0us, "
+      "11us/11us)\n  ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, "
+      "0B/0B, 0us/0us, 0us/0us, 0us/0us)\n",
       dump_str);
 
   EXPECT_EQ(dump_str, TestToFromProto("scope", opts));
@@ -178,22 +179,22 @@ TEST_F(TFProfShowTest, DumpOpMode) {
   EXPECT_EQ(
       "nodename|requestedbytes|totalexecutiontime|acceleratorexecutiontime|"
       "cpuexecutiontime|#parameters|#float_ops|opoccurrence(run|defined)|"
-      "inputshapes\nVariableV20B(0.00%,0.00%),13us(100.00%,0.26%),0us(100.00%,"
-      "0.00%),13us(100.00%,0.29%),451params(100.00%,100.00%),0float_ops(100.00%"
-      ",0.00%),2|3\n\ninput_type:\t(run*2|defined*3)\texec_time:13us\n\nAdd0B("
-      "0.00%,0.00%),0us(99.74%,0.00%),0us(100.00%,0.00%),0us(99.71%,0.00%),"
-      "0params(0.00%,0.00%),0float_ops(100.00%,0.00%),0|3\n\ninput_type:0:1,"
-      "\t1:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12,\t1:1\t("
-      "run*0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6,\t1:1\t(run*0|"
-      "defined*1)\texec_time:0us\n\nAssign0B(0.00%,0.00%),0us(99.74%,0.00%),"
-      "0us(100.00%,0.00%),0us(99.71%,0.00%),0params(0.00%,0.00%),0float_ops("
-      "100.00%,0.00%),0|3\n\ninput_type:0:1,\t1:1\t(run*0|defined*1)\texec_"
+      "inputshapes\nVariableV22.56KB(100.00%,8.40%),13us(100.00%,0.26%),0us("
+      "100.00%,0.00%),13us(100.00%,0.29%),451params(100.00%,100.00%),0float_"
+      "ops(100.00%,0.00%),2|3\n\ninput_type:\t(run*2|defined*3)\texec_time:"
+      "13us\n\nAdd0B(0.00%,0.00%),0us(99.74%,0.00%),0us(100.00%,0.00%),0us(99."
+      "71%,0.00%),0params(0.00%,0.00%),0float_ops(100.00%,0.00%),0|3\n\ninput_"
+      "type:0:1,\t1:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12,"
+      "\t1:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6,\t1:1\t("
+      "run*0|defined*1)\texec_time:0us\n\nAssign0B(0.00%,0.00%),0us(99.74%,0."
+      "00%),0us(100.00%,0.00%),0us(99.71%,0.00%),0params(0.00%,0.00%),0float_"
+      "ops(100.00%,0.00%),0|3\n\ninput_type:0:1,\t1:1\t(run*0|defined*1)\texec_"
       "time:0us\ninput_type:0:2x2x6x12,\t1:2x2x6x12\t(run*0|defined*1)\texec_"
       "time:0us\ninput_type:0:3x3x3x6,\t1:3x3x3x6\t(run*0|defined*1)\texec_"
       "time:0us\n\nConst0B(0.00%,0.00%),2us(99.74%,0.04%),0us(100.00%,0.00%),"
       "2us(99.71%,0.04%),0params(0.00%,0.00%),0float_ops(100.00%,0.00%),1|"
-      "10\n\ninput_type:\t(run*1|defined*10)\texec_time:2us\n\nConv2D14.59KB("
-      "100.00%,100.00%),4.89ms(99.70%,98.87%),404us(100.00%,100.00%),4.49ms(99."
+      "10\n\ninput_type:\t(run*1|defined*10)\texec_time:2us\n\nConv2D27.90KB("
+      "91.60%,91.60%),4.89ms(99.70%,98.87%),404us(100.00%,100.00%),4.49ms(99."
       "67%,98.77%),0params(0.00%,0.00%),10.44kfloat_ops(100.00%,100.00%),2|"
       "2\n\ninput_type:0:2x3x3x6,\t1:2x2x6x12\t(run*1|defined*1)\texec_time:"
       "597us\ninput_type:0:2x6x6x3,\t1:3x3x3x6\t(run*1|defined*1)\texec_time:4."
diff --git a/tensorflow/core/profiler/internal/tfprof_stats_test.cc b/tensorflow/core/profiler/internal/tfprof_stats_test.cc
index 2f2101d76b..b86a83cb1b 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_stats_test.cc
@@ -89,21 +89,27 @@ TEST_F(TFProfStatsTest, CustomOpType) {
 
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_parameters: "
-      "451\nchildren {\n  name: \"DW\"\n  exec_micros: 2\n  parameters: 162\n  "
-      "total_exec_micros: 2\n  total_parameters: 162\n  devices: "
+      "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_requested_bytes: "
+      "2560\ntotal_parameters: 451\nchildren {\n  name: \"DW\"\n  exec_micros: "
+      "2\n  requested_bytes: 1280\n  parameters: 162\n  total_exec_micros: 2\n "
+      " total_requested_bytes: 1280\n  total_parameters: 162\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 2\n  "
       "total_cpu_exec_micros: 2\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
-      "1280\n}\nchildren {\n  name: \"DW2\"\n  exec_micros: 11\n  parameters: "
-      "288\n  total_exec_micros: 11\n  total_parameters: 288\n  devices: "
+      "total_definition_count: 1\n  peak_bytes: 1280\n  residual_bytes: 1280\n "
+      " output_bytes: 1280\n  total_peak_bytes: 1280\n  total_residual_bytes: "
+      "1280\n  total_output_bytes: 1280\n}\nchildren {\n  name: \"DW2\"\n  "
+      "exec_micros: 11\n  requested_bytes: 1280\n  parameters: 288\n  "
+      "total_exec_micros: 11\n  total_requested_bytes: 1280\n  "
+      "total_parameters: 288\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 11\n  "
       "total_cpu_exec_micros: 11\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
-      "1280\n}\nchildren {\n  name: \"ScalarW\"\n  parameters: 1\n  "
-      "total_parameters: 1\n  total_definition_count: "
+      "total_definition_count: 1\n  peak_bytes: 1280\n  residual_bytes: 1280\n "
+      " output_bytes: 1280\n  total_peak_bytes: 1280\n  total_residual_bytes: "
+      "1280\n  total_output_bytes: 1280\n}\nchildren {\n  name: \"ScalarW\"\n  "
+      "parameters: 1\n  total_parameters: 1\n  total_definition_count: "
       "1\n}\ntotal_cpu_exec_micros: 13\ntotal_run_count: "
-      "2\ntotal_definition_count: 3\ntotal_output_bytes: 2560\n",
+      "2\ntotal_definition_count: 3\ntotal_peak_bytes: "
+      "2560\ntotal_residual_bytes: 2560\ntotal_output_bytes: 2560\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 
@@ -119,21 +125,27 @@ TEST_F(TFProfStatsTest, CheckPointOpType) {
 
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_parameters: "
-      "451\nchildren {\n  name: \"DW\"\n  exec_micros: 2\n  parameters: 162\n  "
-      "total_exec_micros: 2\n  total_parameters: 162\n  devices: "
+      "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_requested_bytes: "
+      "2560\ntotal_parameters: 451\nchildren {\n  name: \"DW\"\n  exec_micros: "
+      "2\n  requested_bytes: 1280\n  parameters: 162\n  total_exec_micros: 2\n "
+      " total_requested_bytes: 1280\n  total_parameters: 162\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 2\n  "
       "total_cpu_exec_micros: 2\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
-      "1280\n}\nchildren {\n  name: \"DW2\"\n  exec_micros: 11\n  parameters: "
-      "288\n  total_exec_micros: 11\n  total_parameters: 288\n  devices: "
+      "total_definition_count: 1\n  peak_bytes: 1280\n  residual_bytes: 1280\n "
+      " output_bytes: 1280\n  total_peak_bytes: 1280\n  total_residual_bytes: "
+      "1280\n  total_output_bytes: 1280\n}\nchildren {\n  name: \"DW2\"\n  "
+      "exec_micros: 11\n  requested_bytes: 1280\n  parameters: 288\n  "
+      "total_exec_micros: 11\n  total_requested_bytes: 1280\n  "
+      "total_parameters: 288\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 11\n  "
       "total_cpu_exec_micros: 11\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
-      "1280\n}\nchildren {\n  name: \"ScalarW\"\n  parameters: 1\n  "
-      "total_parameters: 1\n  total_definition_count: "
+      "total_definition_count: 1\n  peak_bytes: 1280\n  residual_bytes: 1280\n "
+      " output_bytes: 1280\n  total_peak_bytes: 1280\n  total_residual_bytes: "
+      "1280\n  total_output_bytes: 1280\n}\nchildren {\n  name: \"ScalarW\"\n  "
+      "parameters: 1\n  total_parameters: 1\n  total_definition_count: "
       "1\n}\ntotal_cpu_exec_micros: 13\ntotal_run_count: "
-      "2\ntotal_definition_count: 3\ntotal_output_bytes: 2560\n",
+      "2\ntotal_definition_count: 3\ntotal_peak_bytes: "
+      "2560\ntotal_residual_bytes: 2560\ntotal_output_bytes: 2560\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 
@@ -150,7 +162,7 @@ TEST_F(TFProfStatsTest, TestGraph) {
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
       "name: \"_TFProfRoot\"\ntotal_exec_micros: 4945\ntotal_requested_bytes: "
-      "14592\ntotal_parameters: 451\nchildren {\n  name: "
+      "30464\ntotal_parameters: 451\nchildren {\n  name: "
       "\"DW/Initializer/random_normal/mul\"\n  children {\n    name: "
       "\"DW/Initializer/random_normal/RandomStandardNormal\"\n    children {\n "
       "     name: \"DW/Initializer/random_normal/shape\"\n      "
@@ -166,7 +178,7 @@ TEST_F(TFProfStatsTest, TestGraph) {
       "4\n}\ntotal_float_ops: 10440\ntotal_accelerator_exec_micros: "
       "404\ntotal_cpu_exec_micros: 4541\ntotal_run_count: "
       "6\ntotal_definition_count: 32\ntotal_peak_bytes: "
-      "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n",
+      "25856\ntotal_residual_bytes: 3840\ntotal_output_bytes: 4864\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 
@@ -181,9 +193,9 @@ TEST_F(TFProfStatsTest, TestFloatOps) {
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
       "name: \"_TFProfRoot\"\ntotal_exec_micros: 4945\ntotal_requested_bytes: "
-      "14592\ntotal_parameters: 451\nchildren {\n  name: \"Conv2D\"\n  "
-      "exec_micros: 4292\n  requested_bytes: 9472\n  total_exec_micros: 4292\n "
-      " total_requested_bytes: 9472\n  devices: "
+      "30464\ntotal_parameters: 451\nchildren {\n  name: \"Conv2D\"\n  "
+      "exec_micros: 4292\n  requested_bytes: 18176\n  total_exec_micros: "
+      "4292\n  total_requested_bytes: 18176\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  float_ops: 5832\n  "
       "total_float_ops: 5832\n  input_shapes {\n    key: 0\n    value {\n      "
       "dim {\n        size: 2\n      }\n      dim {\n        size: 6\n      "
@@ -194,11 +206,11 @@ TEST_F(TFProfStatsTest, TestFloatOps) {
       "6\n      }\n    }\n  }\n  accelerator_exec_micros: 226\n  "
       "cpu_exec_micros: 4066\n  total_accelerator_exec_micros: 226\n  "
       "total_cpu_exec_micros: 4066\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  peak_bytes: 5888\n  residual_bytes: 768\n  "
-      "output_bytes: 768\n  total_peak_bytes: 5888\n  total_residual_bytes: "
+      "total_definition_count: 1\n  peak_bytes: 14592\n  residual_bytes: 768\n "
+      " output_bytes: 768\n  total_peak_bytes: 14592\n  total_residual_bytes: "
       "768\n  total_output_bytes: 768\n}\nchildren {\n  name: \"Conv2D_1\"\n  "
-      "exec_micros: 597\n  requested_bytes: 5120\n  total_exec_micros: 597\n  "
-      "total_requested_bytes: 5120\n  devices: "
+      "exec_micros: 597\n  requested_bytes: 9728\n  total_exec_micros: 597\n  "
+      "total_requested_bytes: 9728\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  float_ops: 4608\n  "
       "total_float_ops: 4608\n  input_shapes {\n    key: 0\n    value {\n      "
       "dim {\n        size: 2\n      }\n      dim {\n        size: 3\n      "
@@ -209,12 +221,12 @@ TEST_F(TFProfStatsTest, TestFloatOps) {
       "12\n      }\n    }\n  }\n  accelerator_exec_micros: 178\n  "
       "cpu_exec_micros: 419\n  total_accelerator_exec_micros: 178\n  "
       "total_cpu_exec_micros: 419\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  peak_bytes: 4096\n  residual_bytes: 512\n  "
-      "output_bytes: 512\n  total_peak_bytes: 4096\n  total_residual_bytes: "
+      "total_definition_count: 1\n  peak_bytes: 8704\n  residual_bytes: 512\n  "
+      "output_bytes: 512\n  total_peak_bytes: 8704\n  total_residual_bytes: "
       "512\n  total_output_bytes: 512\n}\ntotal_float_ops: "
       "10440\ntotal_accelerator_exec_micros: 404\ntotal_cpu_exec_micros: "
       "4541\ntotal_run_count: 6\ntotal_definition_count: 35\ntotal_peak_bytes: "
-      "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n",
+      "25856\ntotal_residual_bytes: 3840\ntotal_output_bytes: 4864\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 
@@ -231,9 +243,9 @@ TEST_F(TFProfStatsTest, TestAccountShownNameOnly) {
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
       "name: \"_TFProfRoot\"\ntotal_exec_micros: 597\ntotal_requested_bytes: "
-      "5120\nchildren {\n  name: \"Conv2D_1\"\n  exec_micros: 597\n  "
-      "requested_bytes: 5120\n  total_exec_micros: 597\n  "
-      "total_requested_bytes: 5120\n  devices: "
+      "9728\nchildren {\n  name: \"Conv2D_1\"\n  exec_micros: 597\n  "
+      "requested_bytes: 9728\n  total_exec_micros: 597\n  "
+      "total_requested_bytes: 9728\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  float_ops: 4608\n  "
       "total_float_ops: 4608\n  input_shapes {\n    key: 0\n    value {\n      "
       "dim {\n        size: 2\n      }\n      dim {\n        size: 3\n      "
@@ -244,12 +256,12 @@ TEST_F(TFProfStatsTest, TestAccountShownNameOnly) {
       "12\n      }\n    }\n  }\n  accelerator_exec_micros: 178\n  "
       "cpu_exec_micros: 419\n  total_accelerator_exec_micros: 178\n  "
       "total_cpu_exec_micros: 419\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  peak_bytes: 4096\n  residual_bytes: 512\n  "
-      "output_bytes: 512\n  total_peak_bytes: 4096\n  total_residual_bytes: "
+      "total_definition_count: 1\n  peak_bytes: 8704\n  residual_bytes: 512\n  "
+      "output_bytes: 512\n  total_peak_bytes: 8704\n  total_residual_bytes: "
       "512\n  total_output_bytes: 512\n}\ntotal_float_ops: "
       "4608\ntotal_accelerator_exec_micros: 178\ntotal_cpu_exec_micros: "
       "419\ntotal_run_count: 1\ntotal_definition_count: 2\ntotal_peak_bytes: "
-      "4096\ntotal_residual_bytes: 512\ntotal_output_bytes: 512\n",
+      "8704\ntotal_residual_bytes: 512\ntotal_output_bytes: 512\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 
@@ -265,8 +277,9 @@ TEST_F(TFProfStatsTest, TestShowTensorValue) {
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
       "name: \"_TFProfRoot\"\ntotal_exec_micros: 4945\ntotal_requested_bytes: "
-      "14592\ntotal_parameters: 451\nchildren {\n  name: \"DW\"\n  "
-      "exec_micros: 2\n  parameters: 162\n  total_exec_micros: 2\n  "
+      "30464\ntotal_parameters: 451\nchildren {\n  name: \"DW\"\n  "
+      "exec_micros: 2\n  requested_bytes: 1280\n  parameters: 162\n  "
+      "total_exec_micros: 2\n  total_requested_bytes: 1280\n  "
       "total_parameters: 162\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  tensor_value {\n    dtype: "
       "DT_FLOAT\n    value_double: -0.000534315\n    value_double: "
@@ -351,11 +364,13 @@ TEST_F(TFProfStatsTest, TestShowTensorValue) {
       "value_double: 0.000374641\n    value_double: -0.00149603\n    "
       "value_double: -0.000317367\n    value_double: -0.000417829\n  }\n  "
       "cpu_exec_micros: 2\n  total_cpu_exec_micros: 2\n  run_count: 1\n  "
-      "total_run_count: 1\n  total_definition_count: 10\n  output_bytes: "
-      "1280\n  total_output_bytes: 1280\n}\ntotal_float_ops: "
-      "10440\ntotal_accelerator_exec_micros: 404\ntotal_cpu_exec_micros: "
-      "4541\ntotal_run_count: 6\ntotal_definition_count: 35\ntotal_peak_bytes: "
-      "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n",
+      "total_run_count: 1\n  total_definition_count: 10\n  peak_bytes: 1280\n  "
+      "residual_bytes: 1280\n  output_bytes: 1280\n  total_peak_bytes: 1280\n  "
+      "total_residual_bytes: 1280\n  total_output_bytes: "
+      "1280\n}\ntotal_float_ops: 10440\ntotal_accelerator_exec_micros: "
+      "404\ntotal_cpu_exec_micros: 4541\ntotal_run_count: "
+      "6\ntotal_definition_count: 35\ntotal_peak_bytes: "
+      "25856\ntotal_residual_bytes: 3840\ntotal_output_bytes: 4864\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 }
diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto
index f92301133a..b49bdf64ac 100644
--- a/tensorflow/core/profiler/tfprof_log.proto
+++ b/tensorflow/core/profiler/tfprof_log.proto
@@ -124,9 +124,10 @@ message ExecProfile {
   int64 residual_bytes = 9;
   // Total bytes output by the op (not necessarily requested by the op).
   int64 output_bytes = 10;
-  // Total temporary bytes allocated and released by the op.
+  // NOTE: Please don't depend on the following 4 fields yet. Due to
+  // TensorFlow internal tracing issues, the numbers can be quite wrong.
+  // TODO(xpan): Fix the TensorFlow internal tracing.
   int64 host_temp_bytes = 11;
-  // Total persistent bytes (e.g. variable) allocated by the op.
   int64 host_persistent_bytes = 12;
   int64 accelerator_temp_bytes = 13;
   int64 accelerator_persistent_bytes = 14;
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 698f8906d4..c39d0fa5b1 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -23,12 +23,15 @@ import os
 import random
 import re
 
+import numpy as np
+
 from tensorflow.core.profiler import profile_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
@@ -346,8 +349,8 @@ class PrintModelAnalysisTest(test.TestCase):
       with gfile.Open(outfile, 'r') as f:
         # pylint: disable=line-too-long
         self.assertEqual(
-            'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes\nConst0B(0',
-            f.read().replace('\t', '').replace(' ', '')[0:180])
+            'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes',
+            f.read().replace('\t', '').replace(' ', '')[0:170])
         # pylint: enable=line-too-long
 
       total_children = 0
@@ -694,6 +697,39 @@ class PrintModelAnalysisTest(test.TestCase):
                       exception_str)
       self.assertTrue(mat is None)
 
+  def testTrackPersistentBytes(self):
+    ops.reset_default_graph()
+    a = array_ops.constant(np.ones((100, 100)))
+    b = array_ops.constant(np.ones((100, 100)))
+    c = a * b
+
+    with session.Session() as sess:
+      run_options = config_pb2.RunOptions(
+          trace_level=config_pb2.RunOptions.FULL_TRACE)
+      run_metadata = config_pb2.RunMetadata()
+      sess.run(c, options=run_options, run_metadata=run_metadata)
+
+      options = option_builder.ProfileOptionBuilder.time_and_memory()
+      options['min_bytes'] = 0
+      options['select'] = ('bytes', 'peak_bytes', 'output_bytes',
+                           'residual_bytes')
+      ret = model_analyzer.profile(
+          sess.graph, run_meta=run_metadata, cmd='scope', options=options)
+
+      run_metadata = config_pb2.RunMetadata()
+      sess.run(c, options=run_options, run_metadata=run_metadata)
+      ret2 = model_analyzer.profile(
+          sess.graph, run_meta=run_metadata, cmd='scope', options=options)
+
+      n = lib.SearchTFProfNode(ret, 'mul')
+      n2 = lib.SearchTFProfNode(ret2, 'mul')
+      self.assertGreater(n.peak_bytes, 0)
+      self.assertGreater(n.output_bytes, 0)
+      self.assertGreater(n.residual_bytes, 0)
+      self.assertEqual(n.peak_bytes, n2.peak_bytes)
+      self.assertEqual(n.output_bytes, n2.output_bytes)
+      self.assertEqual(n.residual_bytes, n2.residual_bytes)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From f69915dc152c5516e6bc88b93515cbb02a1fbfc5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 15:33:39 -0800
Subject: [PATCH 0481/1225] [XLA] Sanitize hlo names to match regexp
 "[a-zA-Z_][a-zA-Z0-9_.-]*".

PiperOrigin-RevId: 177518046
---
 tensorflow/compiler/xla/service/hlo_module.cc |  7 ++--
 .../compiler/xla/service/name_uniquer.cc      | 32 +++++++++++++++++++
 .../compiler/xla/service/name_uniquer.h       | 13 ++++++--
 .../compiler/xla/service/name_uniquer_test.cc | 26 ++++++++++++---
 4 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index faaf73ea1c..6fe2134466 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -35,14 +35,15 @@ namespace xla {
 HloModule::HloModule(const string& name,
                      const VersionedComputationHandle& entry_computation_handle,
                      const HloModuleConfig& config)
-    : name_(name),
+    : name_(NameUniquer::GetSanitizedName(name)),
       config_(config),
       has_entry_computation_handle_(true),
       entry_computation_handle_(entry_computation_handle) {}
 
-HloModule::HloModule(const string& name) : name_(name) {}
+HloModule::HloModule(const string& name)
+    : name_(NameUniquer::GetSanitizedName(name)) {}
 HloModule::HloModule(const string& name, const HloModuleConfig& config)
-    : name_(name), config_(config) {}
+    : name_(NameUniquer::GetSanitizedName(name)), config_(config) {}
 
 HloComputation* HloModule::AddComputationInternal(
     std::unique_ptr<HloComputation> computation, bool is_entry,
diff --git a/tensorflow/compiler/xla/service/name_uniquer.cc b/tensorflow/compiler/xla/service/name_uniquer.cc
index a0d08c288d..7d8c05fffa 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.cc
+++ b/tensorflow/compiler/xla/service/name_uniquer.cc
@@ -17,12 +17,44 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
 
+namespace {
+
+bool IsAllowed(char character) {
+  auto c = static_cast<unsigned char>(character);
+  return (isalnum(c) != 0) || c == '_' || c == '.' || c == '-';
+}
+
+}  // namespace
+
+NameUniquer::NameUniquer(const string& separator) {
+  CHECK(std::all_of(separator.begin(), separator.end(), IsAllowed))
+      << "separator should comprises allowed characters only";
+  separator_ = separator;
+}
+
+/*static*/ string NameUniquer::GetSanitizedName(const string& name) {
+  string result = name;
+  CHECK(!result.empty()) << "name should not be empty";
+  char c = static_cast<unsigned char>(result[0]);
+  if (!isalpha(c) && c != '_') {
+    result[0] = '_';
+  }
+  for (int i = 1; i < result.length(); i++) {
+    if (!IsAllowed(result[i])) {
+      result[i] = '_';
+    }
+  }
+  return result;
+}
+
 string NameUniquer::GetUniqueName(tensorflow::StringPiece prefix) {
   string root = prefix.empty() ? "name" : prefix.ToString();
+  root = GetSanitizedName(root);
 
   // Strip away numeric suffix (if any). Only recognize separator if it is in
   // the middle of the name.
diff --git a/tensorflow/compiler/xla/service/name_uniquer.h b/tensorflow/compiler/xla/service/name_uniquer.h
index ed379b5225..4139c2700b 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.h
+++ b/tensorflow/compiler/xla/service/name_uniquer.h
@@ -28,14 +28,21 @@ namespace xla {
 // Simple stateful class that helps generate "unique" names. To use it, simply
 // call GetUniqueName as many times as needed. The names returned by
 // GetUniqueName are guaranteed to be distinct for this instance of the class.
+// Note that the names will be sanitized to match regexp
+// "[a-zA-Z_][a-zA-Z0-9_.-]*".
 class NameUniquer {
  public:
-  explicit NameUniquer(const string& separator = "__")
-      : separator_(separator) {}
+  // The separator must contain allowed characters only: "[a-zA-Z0-9_.-]".
+  explicit NameUniquer(const string& separator = "__");
 
-  // Get a unique name in a string, with an optional prefix for convenience.
+  // Get a sanitized unique name in a string, with an optional prefix for
+  // convenience.
   string GetUniqueName(tensorflow::StringPiece prefix = "");
 
+  // Sanitizes and returns the name. Unallowed characters will be replaced with
+  // '_'. The result will match the regexp "[a-zA-Z_][a-zA-Z0-9_.-]*".
+  static string GetSanitizedName(const string& name);
+
  private:
   // The string to use to separate the prefix of the name from the uniquing
   // integer value.
diff --git a/tensorflow/compiler/xla/service/name_uniquer_test.cc b/tensorflow/compiler/xla/service/name_uniquer_test.cc
index 9f0747a6e2..4258cf1687 100644
--- a/tensorflow/compiler/xla/service/name_uniquer_test.cc
+++ b/tensorflow/compiler/xla/service/name_uniquer_test.cc
@@ -60,12 +60,30 @@ TEST_F(NameUniquerTest, NumericSuffixes) {
   EXPECT_EQ("bar", uniquer.GetUniqueName("bar.-1000"));
   EXPECT_EQ("bar.1", uniquer.GetUniqueName("bar.-2000"));
   EXPECT_EQ("bar.2", uniquer.GetUniqueName("bar.1"));
+}
+
+TEST_F(NameUniquerTest, Sanitize) {
+  NameUniquer uniquer("_");
+
+  EXPECT_EQ("foo", uniquer.GetUniqueName("foo"));
+  EXPECT_EQ("foo_1", uniquer.GetUniqueName("foo"));
+  EXPECT_EQ("foo.54", uniquer.GetUniqueName("foo.54"));
+  EXPECT_EQ("foo_54", uniquer.GetUniqueName("foo_54"));
+  EXPECT_EQ("foo_54.1", uniquer.GetUniqueName("foo_54.1"));
+  EXPECT_EQ("foo_55", uniquer.GetUniqueName("foo"));
+
+  // Invalid characters will be replaced with '_'.
+  EXPECT_EQ("bar", uniquer.GetUniqueName("bar<-1000"));
+  EXPECT_EQ("bar_1", uniquer.GetUniqueName("bar<-2000"));
+  EXPECT_EQ("bar_2", uniquer.GetUniqueName("bar_1"));
 
   // Separator is only recognized in the middle of the prefix.
-  EXPECT_EQ(".10", uniquer.GetUniqueName(".10"));
-  EXPECT_EQ(".10.1", uniquer.GetUniqueName(".10"));
-  EXPECT_EQ("foobar.", uniquer.GetUniqueName("foobar."));
-  EXPECT_EQ("foobar..1", uniquer.GetUniqueName("foobar."));
+  EXPECT_EQ("_10", uniquer.GetUniqueName(
+                       ".10"));  // the leading '.' is replaced with '_'.
+  EXPECT_EQ("_10_1", uniquer.GetUniqueName(".10"));
+  EXPECT_EQ("_10_2", uniquer.GetUniqueName("_10"));
+  EXPECT_EQ("foobar_", uniquer.GetUniqueName("foobar_"));
+  EXPECT_EQ("foobar__1", uniquer.GetUniqueName("foobar_"));
 }
 
 }  // namespace
-- 
GitLab


From 0472116d163eeb77d51cabdc5fc67be917048870 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 15:34:36 -0800
Subject: [PATCH 0482/1225] [TF:XLA] Make tf_cnn_benchmarks run on CPU with
 XLA.

Adds _cpu_jit to tf_cnn_benchmarks_xla BUILD rule and fixes an issue in XLA bridge triggered by XLA CPU compilation of whole graphs. In particular, modifies mark_for_compilation_pass.cc to skip _Retval nodes when looking for compilation candidates in the top level function. _Retval nodes are introduced in the input subgraph as a replacement for fetches. Including _Retval nodes into XLA clusters confuses encapsulate subgraph pass that expects a graph with no pre-existing _Retval nodes.

PiperOrigin-RevId: 177518178
---
 .../compiler/jit/mark_for_compilation_pass.cc |  7 +++++
 .../jit/mark_for_compilation_pass_test.cc     | 27 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index 74c9791f5e..aceedeb823 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -210,6 +210,13 @@ Status FindCompilationCandidates(
         !IsCompilableWhile(*node, jit_device_type, 0, lib_runtime)) {
       continue;
     }
+    // _Retval nodes in a top-level function represent fetches.
+    // Do not compile them.
+    if (node->type_string() == "_Retval") {
+      VLOG(2) << "Compilation rejected node: return value " << node->name()
+              << ": " << node->type_string();
+      continue;
+    }
     candidates->insert(node);
   }
   return Status::OK();
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
index b3d258aea1..454f0aeae9 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
@@ -525,5 +525,32 @@ TEST(XlaCompilationTest, IllegalCycle_UsefulErrorMessage) {
                             "+-- c\n"));
 }
 
+TEST(XlaCompilationTest, Retval) {
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  GraphDef graphdef;
+  {
+    GraphDefBuilder builder(GraphDefBuilder::kFailImmediately);
+    Node* a = ops::SourceOp("Const", builder.opts()
+                                         .WithName("A")
+                                         .WithAttr("dtype", DT_FLOAT)
+                                         .WithAttr("value", Tensor()));
+    Node* b = ops::UnaryOp("Relu", a, builder.opts().WithName("B"));
+    ops::UnaryOp("_Retval", b,
+                 builder.opts()
+                     .WithName("R")
+                     .WithAttr("T", DT_FLOAT)
+                     .WithAttr("index", 0));
+
+    TF_EXPECT_OK(builder.ToGraph(graph.get()));
+  }
+
+  TF_ASSERT_OK(MarkForCompilation(&graph));
+  auto clusters = GetClusters(*graph);
+
+  EXPECT_EQ(2, clusters.size());
+  EXPECT_TRUE(clusters.find("R") == clusters.cend());
+  EXPECT_EQ(clusters["A"], clusters["B"]);
+}
+
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From 186caed810c0e9a9ee9a3f1e0f8bea50764ce5df Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 15:48:06 -0800
Subject: [PATCH 0483/1225] Add int64 support to XLA Shape op.

PiperOrigin-RevId: 177519992
---
 .../compiler/tf2xla/kernels/shape_op.cc       | 76 +++++++++++--------
 1 file changed, 45 insertions(+), 31 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/shape_op.cc b/tensorflow/compiler/tf2xla/kernels/shape_op.cc
index 24a99f253d..06838d1625 100644
--- a/tensorflow/compiler/tf2xla/kernels/shape_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/shape_op.cc
@@ -25,58 +25,72 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+// Converts a TensorShape to a constant Tensor.
+//
+// The input TensorShape input_shape is used to populate the elements of
+// shape_constant, which is modified in place.
+Status TensorShapeToConstant(const TensorShape& input_shape,
+                             Tensor* shape_constant) {
+  const int dims = input_shape.dims();
+  if (shape_constant->dtype() == DT_INT32) {
+    auto vec = shape_constant->vec<int32>();
+    for (int i = 0; i < dims; ++i) {
+      int64 dim_size = input_shape.dim_size(i);
+      if (!FastBoundsCheck(dim_size, std::numeric_limits<int32>::max())) {
+        return errors::InvalidArgument(
+            "Shape with out_type=int32 does not support tensors > int32max",
+            " but dim ", i, " is ", dim_size);
+      }
+      vec(i) = static_cast<int32>(dim_size);
+    }
+  } else {
+    auto vec = shape_constant->vec<int64>();
+    for (int i = 0; i < dims; ++i) {
+      int64 dim_size = input_shape.dim_size(i);
+      vec(i) = dim_size;
+    }
+  }
+  return Status::OK();
+}
+
 class ShapeOp : public XlaOpKernel {
  public:
-  explicit ShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
+  explicit ShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_dtype_));
+  }
 
   void Compile(XlaOpKernelContext* ctx) override {
     const TensorShape input_shape = ctx->InputShape(0);
-    const int rank = input_shape.dims();
-    Tensor shape_constant(DT_INT32, TensorShape({rank}));
-    auto vec = shape_constant.vec<int32>();
-    // TODO(dga): support int64.  b/28119922.
-    for (int i = 0; i < rank; ++i) {
-      int64 dim_size = input_shape.dim_size(i);
-      OP_REQUIRES(
-          ctx, FastBoundsCheck(dim_size, std::numeric_limits<int32>::max()),
-          errors::InvalidArgument("Shape does not support tensors > int32max",
-                                  " but dim ", i, " is ", dim_size));
-      vec(i) = static_cast<int32>(dim_size);
-    }
-
+    Tensor shape_constant(out_dtype_, TensorShape({input_shape.dims()}));
+    OP_REQUIRES_OK(ctx, TensorShapeToConstant(input_shape, &shape_constant));
     ctx->SetConstantOutput(0, shape_constant);
   }
+
+ private:
+  DataType out_dtype_;
 };
 
 REGISTER_XLA_OP(Name("Shape"), ShapeOp);
 
 class ShapeNOp : public XlaOpKernel {
  public:
-  explicit ShapeNOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
+  explicit ShapeNOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_dtype_));
+  }
 
   void Compile(XlaOpKernelContext* ctx) override {
     for (int i = 0; i < ctx->num_inputs(); ++i) {
-      const TensorShape shape = ctx->InputShape(i);
-      const int dims = shape.dims();
-      Tensor shape_constant(DT_INT32, TensorShape({dims}));
-      auto vec = shape_constant.vec<int32>();
-
-      // TODO(dga): support int64.  b/28119922.
-      for (int j = 0; j < dims; ++j) {
-        int64 dim_size = shape.dim_size(j);
-        OP_REQUIRES(
-            ctx, FastBoundsCheck(dim_size, std::numeric_limits<int32>::max()),
-            errors::InvalidArgument("Shape does not support tensors > int32max",
-                                    " but shape ", i, " dim ", j, " is ",
-                                    dim_size));
-        vec(j) = static_cast<int32>(dim_size);
-      }
-
+      const TensorShape input_shape = ctx->InputShape(i);
+      Tensor shape_constant(out_dtype_, TensorShape({input_shape.dims()}));
+      OP_REQUIRES_OK(ctx, TensorShapeToConstant(input_shape, &shape_constant));
       ctx->SetConstantOutput(i, shape_constant);
     }
   }
 
   bool IsExpensive() override { return false; }
+
+ private:
+  DataType out_dtype_;
 };
 REGISTER_XLA_OP(Name("ShapeN"), ShapeNOp);
 
-- 
GitLab


From 0438ac79bdb503ed267bec2146e7136ac8e99ff9 Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Thu, 30 Nov 2017 16:07:24 -0800
Subject: [PATCH 0484/1225] [TF:XLA] Use output spatial dimensions instead of a
 transpose for conv backwards filter

PiperOrigin-RevId: 177522710
---
 .../compiler/tf2xla/kernels/conv_ops.cc       | 31 ++++-------
 .../xla/service/gpu/convolution_folding.cc    | 55 +++++--------------
 .../service/gpu/convolution_folding_test.cc   | 42 +++++---------
 .../compiler/xla/service/gpu/pad_insertion.cc | 16 +-----
 4 files changed, 41 insertions(+), 103 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
index 61f4d1993a..aaddbe811c 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
@@ -540,9 +540,7 @@ class ConvBackpropFilterOp : public XlaOpKernel {
 
     // Swap n_dim and c_dim in the activations.
     dnums.set_input_batch_dimension(c_dim);
-    dnums.set_output_batch_dimension(c_dim);
     dnums.set_input_feature_dimension(n_dim);
-    dnums.set_output_feature_dimension(n_dim);
 
     // The gradients become the RHS of the convolution.
     // The gradients have shape [batch, out_rows, out_cols, ..., out_depth]
@@ -554,11 +552,17 @@ class ConvBackpropFilterOp : public XlaOpKernel {
     std::vector<int64> rhs_dilation(num_spatial_dims_);
     std::vector<int64> ones(num_spatial_dims_, 1);
 
+    // Tensorflow filter shape is [ H, W, ..., inC, outC ].
+    for (int i = 0; i < num_spatial_dims_; ++i) {
+      dnums.add_output_spatial_dimensions(i);
+    }
+    dnums.set_output_batch_dimension(num_spatial_dims_);
+    dnums.set_output_feature_dimension(num_spatial_dims_ + 1);
+
     for (int i = 0; i < num_spatial_dims_; ++i) {
       int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
       dnums.add_input_spatial_dimensions(dim);
       dnums.add_kernel_spatial_dimensions(dim);
-      dnums.add_output_spatial_dimensions(dim);
 
       // We will also need to pad the input with zeros such that after the
       // convolution, we get the right size for the filter.
@@ -615,26 +619,11 @@ class ConvBackpropFilterOp : public XlaOpKernel {
                               /*window_strides=*/ones, padding,
                               /*lhs_dilation=*/ones, rhs_dilation, dnums);
 
-    // The layout of filter_backprop will match the layout of
-    // padded_activations
-    // and so will have layout: [out_feature, h, w, ..., in_feature]
-    // Tensorflow filter shape is [ H, W, ..., inC, outC ], so we transpose the
-    // output.
-    std::vector<int64> transpose_dims;
-    transpose_dims.reserve(num_dims());
-    for (int i = 0; i < num_spatial_dims_; ++i) {
-      transpose_dims.push_back(dnums.output_spatial_dimensions(i));
-    }
-    transpose_dims.push_back(c_dim);
-    transpose_dims.push_back(n_dim);
-    xla::ComputationDataHandle filter_backprop_reshaped =
-        b->Transpose(filter_backprop, transpose_dims);
-
     if (depthwise_) {
-      filter_backprop_reshaped = ContractFilterForDepthwiseBackprop(
-          ctx, filter_shape, ctx->input_type(0), filter_backprop_reshaped, b);
+      filter_backprop = ContractFilterForDepthwiseBackprop(
+          ctx, filter_shape, ctx->input_type(0), filter_backprop, b);
     }
-    ctx->SetOutput(0, filter_backprop_reshaped);
+    ctx->SetOutput(0, filter_backprop);
   }
 
  protected:
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
index 828ae675d7..f198c4c08e 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
@@ -55,19 +55,7 @@ MatchBackwardFilter(HloInstruction* conv) {
   //               v       v
   //              Convolution
   //                 conv
-  //                   |
-  //                   v
-  //               Transpose (optional if identity transposition)
   CHECK_EQ(HloOpcode::kConvolution, conv->opcode());
-  // If the forward convolution is followed by a transpose, we can fuse the
-  // transpose into the backward convolution as well.
-  HloInstruction* transpose = nullptr;
-  if (conv->user_count() == 1) {
-    HloInstruction* single_user = *conv->users().begin();
-    if (single_user->opcode() == HloOpcode::kTranspose) {
-      transpose = single_user;
-    }
-  }
 
   // Step 2: match paddings and dimension numbers of the forward convolution.
   const ConvolutionDimensionNumbers& conv_dnums =
@@ -75,6 +63,9 @@ MatchBackwardFilter(HloInstruction* conv) {
   auto input_batch_dim = conv_dnums.input_batch_dimension();
   auto input_feature_dim = conv_dnums.input_feature_dimension();
   auto input_spatial_dims = conv_dnums.input_spatial_dimensions();
+  auto kernel_input_feature_dim = conv_dnums.kernel_input_feature_dimension();
+  auto kernel_output_feature_dim = conv_dnums.kernel_output_feature_dimension();
+  auto kernel_spatial_dims = conv_dnums.kernel_spatial_dimensions();
   auto output_batch_dim = conv_dnums.output_batch_dimension();
   auto output_feature_dim = conv_dnums.output_feature_dimension();
   auto output_spatial_dims = conv_dnums.output_spatial_dimensions();
@@ -98,7 +89,8 @@ MatchBackwardFilter(HloInstruction* conv) {
     }
     // Padding high will be checked in Step 3.
   }
-  if (transpose == nullptr && !window_util::HasWindowDilation(conv->window())) {
+  if (input_batch_dim == output_batch_dim &&
+      !window_util::HasWindowDilation(conv->window())) {
     VLOG(1) << conv->ToString()
             << " is a regular forward convolution. No need "
                "to fold it to a backward filter convolution.";
@@ -169,53 +161,32 @@ MatchBackwardFilter(HloInstruction* conv) {
     }
   }
 
-  // To make future HLO passes easier, we canonicalize the fused expression by
-  // adding an identity transposition if it's omitted in the pattern.
-  if (transpose == nullptr) {
-    // Create an identity transposition with the same rank as the forward
-    // convolution.
-    HloComputation* parent_computation = conv->parent();
-    std::vector<int64> transpose_dimensions(ShapeUtil::Rank(conv->shape()));
-    std::iota(transpose_dimensions.begin(), transpose_dimensions.end(), 0);
-    transpose =
-        parent_computation->AddInstruction(HloInstruction::CreateTranspose(
-            conv->shape(), conv, transpose_dimensions));
-    TF_CHECK_OK(conv->ReplaceAllUsesWith(transpose));
-  }
-
   // Restore the dimension numbers of the backward convolution from the forward
   // convolution. The two activation dimensions are reversed (batch and
   // feature).
   ConvolutionDimensionNumbers backward_conv_dnums;
   backward_conv_dnums.set_input_batch_dimension(input_feature_dim);
   backward_conv_dnums.set_input_feature_dimension(input_batch_dim);
-  backward_conv_dnums.set_output_batch_dimension(output_feature_dim);
-  backward_conv_dnums.set_output_feature_dimension(output_batch_dim);
   for (int i = 0; i < input_spatial_dims.size(); ++i) {
     backward_conv_dnums.add_input_spatial_dimensions(input_spatial_dims[i]);
   }
-  for (int i = 0; i < output_spatial_dims.size(); ++i) {
-    backward_conv_dnums.add_output_spatial_dimensions(output_spatial_dims[i]);
+  backward_conv_dnums.set_output_batch_dimension(kernel_input_feature_dim);
+  backward_conv_dnums.set_output_feature_dimension(kernel_output_feature_dim);
+  for (int i = 0; i < kernel_spatial_dims.size(); ++i) {
+    backward_conv_dnums.add_output_spatial_dimensions(kernel_spatial_dims[i]);
   }
   // The dimension numbering of the output of the forward convolution (before
   // transposition) is the same as that of the activations (according to the
   // semantics of kConvolution). The batch dimension of the activations should
   // be treated as the input feature dimension, and the feature dimension should
   // be treated as the output feature.
-  //
-  // The output of the forward convolution needs to be transposed to fit into
-  // the dimension numbering of the weight gradients. This transposition maps
-  // dimension i to PositionInContainer(transpose->dimensions(), i).
-  backward_conv_dnums.set_kernel_input_feature_dimension(
-      PositionInContainer(transpose->dimensions(), output_batch_dim));
-  backward_conv_dnums.set_kernel_output_feature_dimension(
-      PositionInContainer(transpose->dimensions(), output_feature_dim));
+  backward_conv_dnums.set_kernel_input_feature_dimension(output_batch_dim);
+  backward_conv_dnums.set_kernel_output_feature_dimension(output_feature_dim);
   for (int i = 0; i < output_spatial_dims.size(); ++i) {
-    backward_conv_dnums.add_kernel_spatial_dimensions(
-        PositionInContainer(transpose->dimensions(), output_spatial_dims[i]));
+    backward_conv_dnums.add_kernel_spatial_dimensions(output_spatial_dims[i]);
   }
 
-  return std::make_tuple(true, std::vector<HloInstruction*>({transpose, conv}),
+  return std::make_tuple(true, std::vector<HloInstruction*>({conv}),
                          backward_conv_window, backward_conv_dnums);
 }
 
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc
index 112c496e1f..34e6bdb117 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc
@@ -46,18 +46,18 @@ class ConvolutionFoldingTest : public HloTestBase {
     //
     // TODO(jingyue): Add more tests on NCHW input order which TF also supports.
     tf_default_dnums_for_backward_filter_.set_input_batch_dimension(3);
-    tf_default_dnums_for_backward_filter_.set_output_batch_dimension(3);
     tf_default_dnums_for_backward_filter_.set_input_feature_dimension(0);
-    tf_default_dnums_for_backward_filter_.set_output_feature_dimension(0);
     tf_default_dnums_for_backward_filter_.add_input_spatial_dimensions(1);
-    tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(1);
     tf_default_dnums_for_backward_filter_.add_input_spatial_dimensions(2);
-    tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(2);
     tf_default_dnums_for_backward_filter_.set_kernel_input_feature_dimension(0);
     tf_default_dnums_for_backward_filter_.set_kernel_output_feature_dimension(
         3);
     tf_default_dnums_for_backward_filter_.add_kernel_spatial_dimensions(1);
     tf_default_dnums_for_backward_filter_.add_kernel_spatial_dimensions(2);
+    tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(0);
+    tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(1);
+    tf_default_dnums_for_backward_filter_.set_output_batch_dimension(2);
+    tf_default_dnums_for_backward_filter_.set_output_feature_dimension(3);
 
     tf_default_dnums_for_backward_input_.set_input_batch_dimension(0);
     tf_default_dnums_for_backward_input_.set_output_batch_dimension(0);
@@ -86,7 +86,7 @@ class ConvolutionFoldingTest : public HloTestBase {
   ConvolutionDimensionNumbers tf_default_dnums_for_backward_input_;
 };
 
-TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithoutTranspose) {
+TEST_F(ConvolutionFoldingTest, BackwardFilterConvolve) {
   HloComputation::Builder builder(TestName());
   HloInstruction* activations =
       builder.AddInstruction(HloInstruction::CreateParameter(
@@ -136,7 +136,7 @@ TEST_F(ConvolutionFoldingTest,
 
   auto module = CreateNewModule();
   module->AddEntryComputation(builder.Build());
-  EXPECT_FALSE(FoldConvolution(module.get()));
+  EXPECT_TRUE(FoldConvolution(module.get()));
 }
 
 // Extracted from block35 training.
@@ -155,13 +155,9 @@ TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithPaddedActivations) {
     conv_window.mutable_dimensions(i)->set_padding_low(1);
     conv_window.mutable_dimensions(i)->set_padding_high(1);
   }
-  HloInstruction* convolution =
-      builder.AddInstruction(HloInstruction::CreateConvolve(
-          ShapeUtil::MakeShape(F32, {32, 3, 3, 32}), activations, gradients,
-          conv_window, tf_default_dnums_for_backward_filter_));
-
-  builder.AddInstruction(HloInstruction::CreateTranspose(
-      ShapeUtil::MakeShape(F32, {3, 3, 32, 32}), convolution, {1, 2, 3, 0}));
+  builder.AddInstruction(HloInstruction::CreateConvolve(
+      ShapeUtil::MakeShape(F32, {32, 3, 3, 32}), activations, gradients,
+      conv_window, tf_default_dnums_for_backward_filter_));
 
   auto module = CreateNewModule();
   HloComputation* entry_computation =
@@ -189,13 +185,9 @@ TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithPaddedGradients) {
     conv_window.mutable_dimensions(i)->set_padding_high(-1);
     conv_window.mutable_dimensions(i)->set_window_dilation(2);
   }
-  HloInstruction* convolution =
-      builder.AddInstruction(HloInstruction::CreateConvolve(
-          ShapeUtil::MakeShape(F32, {320, 3, 3, 192}), activations, gradients,
-          conv_window, tf_default_dnums_for_backward_filter_));
-
-  builder.AddInstruction(HloInstruction::CreateTranspose(
-      ShapeUtil::MakeShape(F32, {3, 3, 192, 320}), convolution, {1, 2, 3, 0}));
+  builder.AddInstruction(HloInstruction::CreateConvolve(
+      ShapeUtil::MakeShape(F32, {320, 3, 3, 192}), activations, gradients,
+      conv_window, tf_default_dnums_for_backward_filter_));
 
   auto module = CreateNewModule();
   HloComputation* entry_computation =
@@ -222,13 +214,9 @@ TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithUnevenPadding) {
     // Uneven padding: padding_low=0, padding_high=1
     conv_window.mutable_dimensions(i)->set_padding_high(1);
   }
-  HloInstruction* convolution =
-      builder.AddInstruction(HloInstruction::CreateConvolve(
-          ShapeUtil::MakeShape(F32, {32, 2, 2, 32}), activations, gradients,
-          conv_window, tf_default_dnums_for_backward_filter_));
-
-  builder.AddInstruction(HloInstruction::CreateTranspose(
-      ShapeUtil::MakeShape(F32, {2, 2, 32, 32}), convolution, {1, 2, 3, 0}));
+  builder.AddInstruction(HloInstruction::CreateConvolve(
+      ShapeUtil::MakeShape(F32, {32, 2, 2, 32}), activations, gradients,
+      conv_window, tf_default_dnums_for_backward_filter_));
 
   auto module = CreateNewModule();
   HloComputation* entry_computation =
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
index 11290eda4f..c29fee0879 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
@@ -202,8 +202,7 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution(
   //   ABCD0 = Pad(ABCD, padding_high=1)
   //   BackwardFilterConv(ABCD0, xyz, padding_low=pading_high=1)
   // We choose the lesser of padding_low and padding_high as the new padding.
-  HloInstruction* transpose = backward_conv->fused_expression_root();
-  HloInstruction* forward_conv = transpose->mutable_operand(0);
+  HloInstruction* forward_conv = backward_conv->fused_expression_root();
   HloInstruction* input = backward_conv->mutable_operand(0);
   Window new_forward_conv_window = forward_conv->window();
   Window new_backward_conv_window = backward_conv->window();
@@ -269,19 +268,10 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution(
               .ConsumeValueOrDie(),
           padded_input, output, new_forward_conv_window, forward_conv_dnums));
 
-  HloInstruction* new_transpose =
-      computation->AddInstruction(HloInstruction::CreateTranspose(
-          ShapeInference::InferTransposeShape(new_forward_conv->shape(),
-                                              transpose->dimensions())
-              .ConsumeValueOrDie(),
-          new_forward_conv, transpose->dimensions()));
-
-  // Fuse the new forward convolution and the new transpose to the new backward
-  // convolution.
+  // Fuse the new forward convolution to the new backward convolution.
   HloInstruction* new_backward_conv =
       computation->CreateFusionInstructionForBackwardConvolution(
-          {new_transpose, new_forward_conv},
-          HloInstruction::FusionKind::kConvBackwardFilter,
+          {new_forward_conv}, HloInstruction::FusionKind::kConvBackwardFilter,
           new_backward_conv_window, backward_conv_dnums);
 
   VLOG(1) << "Canonicalizing backward filter conv";
-- 
GitLab


From b2db981a6731e978453862a73dab892bc674db68 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Thu, 30 Nov 2017 16:37:11 -0800
Subject: [PATCH 0485/1225] Merge changes from github.

PiperOrigin-RevId: 177526301
---
 .gitignore                                    |   5 +
 tensorflow/compiler/tf2xla/xla_op_kernel.cc   |   4 +-
 tensorflow/compiler/xla/BUILD                 |   1 +
 tensorflow/compiler/xla/ptr_util.h            |  47 +-
 .../xla/service/buffer_assignment_test.cc     |  20 +-
 .../xla/service/buffer_liveness_test.cc       |  34 +-
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  25 +-
 .../xla/service/cpu/simple_orc_jit.cc         | 130 ++---
 tensorflow/compiler/xla/xla_data.proto        |   2 +-
 .../boosted_trees/lib/utils/batch_features.h  |   2 +-
 .../contrib/boosted_trees/lib/utils/example.h |  10 +-
 .../lib/utils/sparse_column_iterable.cc       |   4 +
 tensorflow/contrib/cmake/external/re2.cmake   |   1 +
 tensorflow/contrib/cmake/tf_shared_lib.cmake  |  10 +-
 tensorflow/contrib/cmake/tf_tests.cmake       |   6 +-
 tensorflow/contrib/crf/python/ops/crf.py      |   2 +-
 .../contrib/data/python/kernel_tests/BUILD    |   3 +
 .../python/ops/poisson_lognormal.py           |   2 +-
 .../contrib/eager/python/metrics_impl.py      |   2 +-
 .../contrib/factorization/python/ops/wals.py  |   2 +-
 tensorflow/contrib/ffmpeg/BUILD               |  47 ++
 tensorflow/contrib/ffmpeg/__init__.py         |   3 +-
 tensorflow/contrib/ffmpeg/decode_audio_op.cc  |  25 +-
 tensorflow/contrib/ffmpeg/decode_video_op.cc  | 118 ++++
 .../contrib/ffmpeg/decode_video_op_test.py    |  69 +++
 .../contrib/ffmpeg/default/ffmpeg_lib.cc      | 247 ++++++---
 .../ffmpeg/default/ffmpeg_lib_utility_test.cc |   3 +-
 tensorflow/contrib/ffmpeg/ffmpeg_lib.h        |  25 +-
 tensorflow/contrib/ffmpeg/ffmpeg_ops.py       |  17 +
 tensorflow/contrib/ffmpeg/testdata/small.mp4  | Bin 0 -> 383631 bytes
 .../contrib/ffmpeg/testdata/small_100.bmp     | Bin 0 -> 537654 bytes
 .../framework/python/framework/graph_util.py  |  28 +-
 .../python/framework/graph_util_test.py       |  14 +
 tensorflow/contrib/gan/python/train.py        |   2 +-
 .../contrib/layers/python/layers/layers.py    |   5 +-
 .../layers/python/layers/layers_test.py       |  22 +-
 tensorflow/contrib/lite/BUILD                 |   3 +
 tensorflow/contrib/lite/Makefile              | 147 +++++
 tensorflow/contrib/lite/README.md             |  66 +--
 .../contrib/lite/build_ios_universal_lib.sh   |  31 ++
 .../contrib/lite/download_dependencies.sh     |  99 ++++
 .../lite/examples/ios/camera/.gitignore       |   2 +
 .../ios/camera/CameraExampleAppDelegate.h     |  21 +
 .../ios/camera/CameraExampleAppDelegate.m     |  44 ++
 .../ios/camera/CameraExampleViewController.h  |  48 ++
 .../ios/camera/CameraExampleViewController.mm | 506 ++++++++++++++++++
 .../lite/examples/ios/camera/Info.plist       |  44 ++
 .../camera/MainStoryboard_iPhone.storyboard   |  46 ++
 .../contrib/lite/examples/ios/camera/Podfile  |   5 +
 .../lite/examples/ios/camera/data/.gitignore  |   0
 .../contrib/lite/examples/ios/camera/main.mm  |  28 +
 .../project.pbxproj                           | 419 +++++++++++++++
 .../lite/examples/ios/simple/AppDelegate.h    |  21 +
 .../lite/examples/ios/simple/AppDelegate.mm   |  47 ++
 .../contrib/lite/examples/ios/simple/Podfile  |   5 +
 .../examples/ios/simple/RunModel-Info.plist   |  47 ++
 .../ios/simple/RunModelViewController.h       |  24 +
 .../ios/simple/RunModelViewController.mm      | 221 ++++++++
 .../ios/simple/RunModelViewController.xib     |  46 ++
 .../examples/ios/simple/data/grace_hopper.jpg | Bin 0 -> 73746 bytes
 .../lite/examples/ios/simple/ios_image_load.h |  23 +
 .../examples/ios/simple/ios_image_load.mm     |  80 +++
 .../contrib/lite/examples/ios/simple/main.mm  |  22 +
 .../simple/simple.xcodeproj/project.pbxproj   | 359 +++++++++++++
 tensorflow/contrib/lite/g3doc/apis.md         |   2 +-
 .../lite/g3doc/tf_ops_compatibility.md        |   2 +-
 tensorflow/contrib/lite/ios_makefile.inc      |  31 ++
 .../contrib/lite/java/demo/app/build.gradle   |   4 +-
 .../lite/models/testdata/g3doc/README.md      |  29 +-
 .../contrib/lite/nnapi/NeuralNetworksShim.h   |   6 +-
 .../lite/schema/upgrade_schema_test.py        |   2 +-
 tensorflow/contrib/lite/testing/BUILD         |   1 +
 .../contrib/lite/testing/parse_testdata.cc    |   2 +-
 tensorflow/contrib/lite/testing/test_runner.h |   2 +-
 tensorflow/contrib/lite/toco/model.h          |   2 +-
 .../contrib/lite/tools/benchmark_model.cc     |  95 ++++
 .../contrib/lite/tools/mutable_op_resolver.h  |  10 +
 tensorflow/contrib/mpi/BUILD                  |   1 +
 .../contrib/nn/python/ops/cross_entropy.py    |   2 +-
 .../contrib/nn/python/ops/sampling_ops.py     |   2 +-
 tensorflow/contrib/rnn/python/ops/rnn_cell.py |   1 -
 tensorflow/contrib/slim/README.md             |   9 +-
 .../contrib/slim/python/slim/evaluation.py    |   4 +-
 tensorflow/contrib/summary/BUILD              |  14 -
 .../contrib/summary/summary_ops_graph_test.py |   5 +-
 .../contrib/summary/summary_ops_test.py       |   7 +-
 .../contrib/summary/summary_test_util.py      |  39 +-
 .../models/decisions_to_data_then_nn_test.py  |   6 +-
 tensorflow/core/BUILD                         |   6 +
 tensorflow/core/graph/graph.h                 |   1 -
 .../optimizers/arithmetic_optimizer.cc        |   2 +-
 tensorflow/core/kernels/BUILD                 |   9 +-
 .../core/kernels/batch_matmul_op_complex.cc   |   2 +
 .../core/kernels/batch_matmul_op_real.cc      |   2 +
 tensorflow/core/kernels/cwise_op_asinh.cc     |   2 +-
 tensorflow/core/kernels/decode_bmp_op.cc      |  15 +-
 .../core/kernels/dynamic_partition_op_test.cc |  58 ++
 .../core/kernels/mkl_batch_matmul_op.cc       | 238 ++++++++
 .../core/kernels/prefetch_dataset_op.cc       |   2 +
 tensorflow/core/kernels/summary_interface.cc  |   4 +-
 tensorflow/core/lib/io/path.cc                |  66 ++-
 tensorflow/core/lib/io/path.h                 |   3 +
 tensorflow/core/ops/math_ops.cc               |  24 +-
 tensorflow/core/ops/math_ops_test.cc          |  11 +
 .../platform/default/build_config_root.bzl    |  22 +-
 tensorflow/core/util/ptr_util.h               |  80 +++
 tensorflow/docs_src/extend/adding_an_op.md    |  28 +-
 tensorflow/docs_src/get_started/input_fn.md   |   4 +-
 .../docs_src/install/install_windows.md       |  10 +-
 tensorflow/docs_src/mobile/ios_build.md       |   2 +-
 tensorflow/docs_src/mobile/mobile_intro.md    |   2 +-
 tensorflow/docs_src/mobile/optimizing.md      |   2 +-
 .../performance/xla/operation_semantics.md    |   2 +-
 .../docs_src/tutorials/image_recognition.md   |   2 +-
 .../reading_data/convert_to_records.py        |  24 +-
 .../examples/speech_commands/input_data.py    |   3 +-
 tensorflow/examples/speech_commands/train.py  |   3 +-
 tensorflow/examples/udacity/1_notmnist.ipynb  |   6 +-
 .../java/org/tensorflow/OperationBuilder.java |  35 +-
 .../src/main/native/operation_builder_jni.cc  |  36 ++
 .../src/main/native/operation_builder_jni.h   |   8 +
 .../org/tensorflow/OperationBuilderTest.java  |  37 ++
 tensorflow/python/BUILD                       |   2 +
 tensorflow/python/data/util/nest.py           |  10 +
 tensorflow/python/data/util/nest_test.py      |   9 +
 tensorflow/python/estimator/export/export.py  |   2 +-
 tensorflow/python/estimator/training_test.py  |   4 +-
 tensorflow/python/keras/BUILD                 |   1 +
 .../python/keras/_impl/keras/backend.py       |   2 +-
 .../python/keras/_impl/keras/callbacks.py     |   2 +-
 .../keras/_impl/keras/callbacks_test.py       |  61 ++-
 .../keras/_impl/keras/engine/training_test.py |   6 +
 .../keras/_impl/keras/estimator_test.py       |   5 +
 .../python/keras/_impl/keras/models_test.py   |  21 +-
 .../_impl/keras/utils/data_utils_test.py      |   7 +
 .../keras/_impl/keras/utils/io_utils.py       |  11 +-
 .../keras/_impl/keras/utils/vis_utils.py      |   2 +-
 tensorflow/python/kernel_tests/BUILD          |  16 +-
 .../python/kernel_tests/decode_bmp_op_test.py |  75 +++
 .../kernel_tests/prefetch_dataset_op_test.py  |  59 ++
 tensorflow/python/layers/convolutional.py     |   6 +-
 tensorflow/python/ops/math_ops_test.py        |   2 +-
 tensorflow/python/ops/variable_scope.py       |   2 +-
 tensorflow/python/ops/variables.py            |   4 +-
 .../python/profiler/model_analyzer_test.py    |   2 +-
 tensorflow/python/training/saver_test.py      |  24 +-
 .../training/sync_replicas_optimizer.py       |   2 +-
 tensorflow/python/util/nest.py                |  15 +-
 tensorflow/python/util/nest_test.py           |  13 +-
 ...orflow.keras.callbacks.-tensor-board.pbtxt |   2 +-
 tensorflow/tools/benchmark/benchmark_model.cc |   2 +-
 .../tools/ci_build/builds/test_user_ops.sh    |  28 +-
 .../tools/ci_build/ci_parameterized_build.sh  |   4 +-
 .../ci_build/windows/cpu/cmake/run_build.bat  |   2 +-
 .../windows/cpu/pip/build_tf_windows.sh       |   4 +-
 .../ci_build/windows/gpu/cmake/run_build.bat  |   2 +-
 .../windows/gpu/pip/build_tf_windows.sh       |   4 +-
 .../dist_test/python/census_widendeep.py      |   3 +-
 tensorflow/tools/docker/Dockerfile.devel      |   1 -
 .../tools/docker/Dockerfile.devel-cpu-mkl     |  85 +++
 tensorflow/tools/docker/Dockerfile.devel-gpu  |   2 -
 .../docker/Dockerfile.devel-gpu-cuda9-cudnn7  |   2 -
 .../docker/notebooks/2_getting_started.ipynb  |  12 +-
 .../notebooks/3_mnist_from_scratch.ipynb      |   2 +
 .../tools/pip_package/pip_smoke_test.py       |   3 -
 tensorflow/tools/pip_package/setup.py         |  27 +-
 tensorflow/workspace.bzl                      |   2 +-
 third_party/flatbuffers/flatbuffers.BUILD     |   7 +-
 third_party/mkl/build_defs.bzl                |   2 +-
 third_party/nccl.BUILD                        |   2 +-
 third_party/py/python_configure.bzl           |  91 ++--
 171 files changed, 4446 insertions(+), 578 deletions(-)
 create mode 100644 tensorflow/contrib/ffmpeg/decode_video_op.cc
 create mode 100644 tensorflow/contrib/ffmpeg/decode_video_op_test.py
 create mode 100644 tensorflow/contrib/ffmpeg/testdata/small.mp4
 create mode 100644 tensorflow/contrib/ffmpeg/testdata/small_100.bmp
 create mode 100644 tensorflow/contrib/lite/Makefile
 create mode 100755 tensorflow/contrib/lite/build_ios_universal_lib.sh
 create mode 100755 tensorflow/contrib/lite/download_dependencies.sh
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/.gitignore
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/Info.plist
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/Podfile
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/data/.gitignore
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/main.mm
 create mode 100644 tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/Podfile
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/data/grace_hopper.jpg
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/ios_image_load.h
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/main.mm
 create mode 100644 tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj
 create mode 100644 tensorflow/contrib/lite/ios_makefile.inc
 create mode 100644 tensorflow/contrib/lite/tools/benchmark_model.cc
 create mode 100644 tensorflow/core/kernels/mkl_batch_matmul_op.cc
 create mode 100644 tensorflow/core/util/ptr_util.h
 create mode 100644 tensorflow/python/kernel_tests/prefetch_dataset_op_test.py
 create mode 100644 tensorflow/tools/docker/Dockerfile.devel-cpu-mkl

diff --git a/.gitignore b/.gitignore
index 9ae0d9c96f..d11a504bdc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,8 @@ Pods
 Podfile.lock
 *.pbxproj
 *.xcworkspacedata
+/tensorflow/contrib/lite/downloads/**
+/tensorflow/contrib/lite/gen/**
+/tensorflow/contrib/lite/examples/ios/simple/data/*.txt
+/tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
+xcuserdata/**
\ No newline at end of file
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
index f16472cac8..79d501b511 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
@@ -346,9 +346,9 @@ void XlaOpKernelContext::SetConstantOutput(int index, const Tensor& constant) {
 }
 
 void XlaOpKernelContext::SetInvalidOutput(int index) {
-  const TensorShape shape;
   Tensor* output = nullptr;
-  OP_REQUIRES_OK(context_, context_->allocate_output(index, shape, &output));
+  OP_REQUIRES_OK(context_,
+                 context_->allocate_output(index, TensorShape({}), &output));
   XlaExpression* expression = CastExpressionFromUninitializedTensor(output);
   xla::ComputationDataHandle handle;
   handle.set_handle(0);
diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 515b572b0e..d3f292207f 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -175,6 +175,7 @@ cc_library(
         ":types",
         ":xla_data_proto",
         "//tensorflow/core:lib",
+        "//tensorflow/core:ptr_util",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/ptr_util.h b/tensorflow/compiler/xla/ptr_util.h
index fa67030313..c58c19db2c 100644
--- a/tensorflow/compiler/xla/ptr_util.h
+++ b/tensorflow/compiler/xla/ptr_util.h
@@ -16,7 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_PTR_UTIL_H_
 #define TENSORFLOW_COMPILER_XLA_PTR_UTIL_H_
 
-// Utility functions for pointers.
+// As this was moved to tensorflow/core/util, provide indirections here to
+// maintain current functionality of the library.
 
 #include <stddef.h>
 
@@ -24,55 +25,27 @@ limitations under the License.
 #include <type_traits>
 #include <utility>
 
-namespace xla {
-
-namespace internal {
-
-// Trait to select overloads and return types for MakeUnique.
-template <typename T>
-struct MakeUniqueResult {
-  using scalar = std::unique_ptr<T>;
-};
-template <typename T>
-struct MakeUniqueResult<T[]> {
-  using array = std::unique_ptr<T[]>;
-};
-template <typename T, size_t N>
-struct MakeUniqueResult<T[N]> {
-  using invalid = void;
-};
+#include "tensorflow/core/util/ptr_util.h"
 
-}  // namespace internal
+namespace xla {
 
-// Transfers ownership of a raw pointer to a std::unique_ptr of deduced type.
-// Example:
-//   X* NewX(int, int);
-//   auto x = WrapUnique(NewX(1, 2));  // 'x' is std::unique_ptr<X>.
-//
-// WrapUnique is useful for capturing the output of a raw pointer factory.
-// However, prefer 'MakeUnique<T>(args...) over 'WrapUnique(new T(args...))'.
-//   auto x = WrapUnique(new X(1, 2));  // works, but nonideal.
-//   auto x = MakeUnique<X>(1, 2);  // safer, standard, avoids raw 'new'.
-//
-// Note: Cannot wrap pointers to array of unknown bound (i.e. U(*)[]).
 template <typename T>
 std::unique_ptr<T> WrapUnique(T* ptr) {
-  static_assert(!std::is_array<T>::value || std::extent<T>::value != 0,
-                "types T[0] or T[] are unsupported");
-  return std::unique_ptr<T>(ptr);
+  return tensorflow::WrapUnique<T>(ptr);
 }
 
 template <typename T, typename... Args>
-typename internal::MakeUniqueResult<T>::scalar MakeUnique(Args&&... args) {
-  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+typename tensorflow::helper::MakeUniqueResult<T>::scalar MakeUnique(
+    Args&&... args) {
+  return tensorflow::MakeUnique<T, Args...>(std::forward<Args>(args)...);
 }
 
 // Overload for array of unknown bound.
 // The allocation of arrays needs to use the array form of new,
 // and cannot take element constructor arguments.
 template <typename T>
-typename internal::MakeUniqueResult<T>::array MakeUnique(size_t n) {
-  return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
+typename tensorflow::helper::MakeUniqueResult<T>::array MakeUnique(size_t n) {
+  return tensorflow::MakeUnique<T>(n);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
index 75c71dfeb1..09681b34e7 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
@@ -85,7 +85,7 @@ class BufferAssignmentTest : public HloTestBase {
   std::unique_ptr<BufferAssignment> RunBufferAssignment(HloModule* module,
                                                         int64 alignment = 1) {
     return BufferAssigner::Run(
-               module, MakeUnique<DependencyHloOrdering>(module),
+               module, xla::MakeUnique<DependencyHloOrdering>(module),
                backend().compiler()->BufferSizeBytesFunction(),
                [alignment](LogicalBuffer::Color) { return alignment; })
         .ConsumeValueOrDie();
@@ -94,7 +94,7 @@ class BufferAssignmentTest : public HloTestBase {
   std::unique_ptr<BufferAssignment> RunColoredBufferAssignment(
       HloModule* module, BufferLiveness::Colorer colorer, int64 alignment = 1) {
     return BufferAssigner::Run(
-               module, MakeUnique<DependencyHloOrdering>(module),
+               module, xla::MakeUnique<DependencyHloOrdering>(module),
                backend().compiler()->BufferSizeBytesFunction(),
                [alignment](LogicalBuffer::Color) { return alignment; }, false,
                std::move(colorer))
@@ -1451,7 +1451,7 @@ class WhileBufferAssignmentTest : public HloTestBase {
     auto sequence =
         CreateMemoryMinimizingSequence(*module, ByteSizeOf).ConsumeValueOrDie();
     return BufferAssigner::Run(
-               module, MakeUnique<SequentialHloOrdering>(module, sequence),
+               module, xla::MakeUnique<SequentialHloOrdering>(module, sequence),
                ByteSizeOf,
                [alignment](LogicalBuffer::Color) { return alignment; })
         .ConsumeValueOrDie();
@@ -1472,7 +1472,7 @@ static void RunCopyInsertion(HloModule* module) {
 }
 
 TEST_F(WhileBufferAssignmentTest, TwoForwardWhileLoops) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   auto builder = HloComputation::Builder("entry");
 
   auto input0 = builder.AddInstruction(
@@ -1529,7 +1529,7 @@ TEST_F(WhileBufferAssignmentTest, TwoForwardWhileLoops) {
 }
 
 TEST_F(WhileBufferAssignmentTest, OneForwardBackwardWhileLoopSet) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   auto builder = HloComputation::Builder("entry");
 
   auto input0 = builder.AddInstruction(
@@ -1574,7 +1574,7 @@ TEST_F(WhileBufferAssignmentTest, OneForwardBackwardWhileLoopSet) {
 }
 
 TEST_F(BufferAssignmentTest, TwoCalls) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   Shape r0f32 = ShapeUtil::MakeShape(xla::F32, {});
   HloComputation* sub_computation;
   {
@@ -1639,7 +1639,7 @@ static bool IsPostOrderTraversal(
 }
 
 TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   auto builder = HloComputation::Builder(TestName());
 
   auto zero = builder.AddInstruction(
@@ -1710,15 +1710,15 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) {
   auto assignment =
       BufferAssigner::Run(
           module.get(),
-          MakeUnique<SequentialHloOrdering>(module.get(), sequence), ByteSizeOf,
-          [](LogicalBuffer::Color) { return 1; })
+          xla::MakeUnique<SequentialHloOrdering>(module.get(), sequence),
+          ByteSizeOf, [](LogicalBuffer::Color) { return 1; })
           .ConsumeValueOrDie();
 
   EXPECT_TRUE(BuffersDistinct({while0}, {while1}, *assignment));
 }
 
 TEST_F(WhileBufferAssignmentTest, WhilesDontShareEntryParamIfLiveOut) {
-  auto module = MakeUnique<HloModule>(TestName());
+  auto module = xla::MakeUnique<HloModule>(TestName());
   auto builder = HloComputation::Builder("entry");
 
   auto input0 = builder.AddInstruction(
diff --git a/tensorflow/compiler/xla/service/buffer_liveness_test.cc b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
index 56600b5838..13825fe05b 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
@@ -120,7 +120,7 @@ TEST_F(BufferLivenessTest, ElementwiseChain) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, negate));
@@ -167,10 +167,10 @@ TEST_F(BufferLivenessTest, MultipleEntryParameters_Sequential) {
 
   SequentialHloOrdering::HloModuleSequence sequence;
   sequence.insert({entry, {param0, negate, param1, exp, add}});
-  auto liveness = BufferLiveness::Run(
-                      module.get(),
-                      MakeUnique<SequentialHloOrdering>(module.get(), sequence))
-                      .ConsumeValueOrDie();
+  auto liveness =
+      BufferLiveness::Run(module.get(), xla::MakeUnique<SequentialHloOrdering>(
+                                            module.get(), sequence))
+          .ConsumeValueOrDie();
 
   // Entry parameters interfere as if they are defined simultaneously at
   // the very beginning.
@@ -216,7 +216,7 @@ TEST_F(BufferLivenessTest, NonElementwiseOperand) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, exp));
@@ -250,7 +250,7 @@ TEST_F(BufferLivenessTest, OverlappedBuffers) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   EXPECT_TRUE(InstructionsMayInterfere(*liveness, param, negate));
@@ -294,7 +294,7 @@ TEST_F(BufferLivenessTest, OverlappedBuffersSequentialOrder) {
   std::vector<const HloInstruction*> order = {param, negate, exp, add};
   module_sequence.emplace(computation, order);
   auto liveness =
-      BufferLiveness::Run(module.get(), MakeUnique<SequentialHloOrdering>(
+      BufferLiveness::Run(module.get(), xla::MakeUnique<SequentialHloOrdering>(
                                             module.get(), module_sequence))
           .ConsumeValueOrDie();
 
@@ -334,7 +334,7 @@ TEST_F(BufferLivenessTest, TupleLiveOut) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   // All buffers should be live out except the param
@@ -370,7 +370,7 @@ TEST_F(BufferLivenessTest, EmbeddedComputation) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   // Buffers in different computations should always interfere.
@@ -409,7 +409,7 @@ TEST_F(BufferLivenessTest, TupleConstantLiveOut) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   // Only the element buffers of the tuple constant which are pointed to by
@@ -474,7 +474,7 @@ TEST_F(BufferLivenessTest, IndependentTupleElements) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   // We compare tuple element pairs that are input/output to the computation:
@@ -536,7 +536,7 @@ TEST_F(BufferLivenessTest, DependentTupleElements) {
 
   auto liveness =
       BufferLiveness::Run(module.get(),
-                          MakeUnique<DependencyHloOrdering>(module.get()))
+                          xla::MakeUnique<DependencyHloOrdering>(module.get()))
           .ConsumeValueOrDie();
 
   // We compare tuple element pairs that are input/output to the computation:
@@ -624,8 +624,8 @@ class FusedDynamicUpdateSliceLivenessTest : public BufferLivenessTest {
 
     // Run BufferLiveness on 'module'.
     auto liveness =
-        BufferLiveness::Run(module.get(),
-                            MakeUnique<DependencyHloOrdering>(module.get()))
+        BufferLiveness::Run(
+            module.get(), xla::MakeUnique<DependencyHloOrdering>(module.get()))
             .ConsumeValueOrDie();
     // Return whether or not buffers interference is detected between
     // 'tuple_param0' and 'tuple_root' at shape index '{1}'.
@@ -736,8 +736,8 @@ class DynamicUpdateSliceLivenessTest : public BufferLivenessTest {
     module->AddEmbeddedComputation(builder.Build());
     // Run BufferLiveness on 'module'.
     auto liveness =
-        BufferLiveness::Run(module.get(),
-                            MakeUnique<DependencyHloOrdering>(module.get()))
+        BufferLiveness::Run(
+            module.get(), xla::MakeUnique<DependencyHloOrdering>(module.get()))
             .ConsumeValueOrDie();
     // Return whether or not buffers interference is detected between
     // 'tuple_param0' and 'tuple_root' at shape index '{1}'.
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 99dae793ab..988f632748 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -469,11 +469,11 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
       &pre_optimization_ir_hook, &post_optimization_ir_hook));
 
   // Compile must be thread-safe so create a new LLVM context for the module.
-  auto llvm_context = MakeUnique<llvm::LLVMContext>();
+  auto llvm_context = xla::MakeUnique<llvm::LLVMContext>();
   auto llvm_module =
-      MakeUnique<llvm::Module>("__compute_module", *llvm_context);
+      xla::MakeUnique<llvm::Module>("__compute_module", *llvm_context);
 
-  auto jit = MakeUnique<SimpleOrcJIT>(
+  auto jit = xla::MakeUnique<SimpleOrcJIT>(
       CompilerTargetOptions(module->config()),
       CodeGenOptLevel(module->config()),
       options::OptimizeForSizeRequested(module->config()),
@@ -528,9 +528,9 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     // uses data dependencies for determining order.
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<BufferAssignment> assignment,
-        BufferAssigner::Run(module.get(),
-                            MakeUnique<DependencyHloOrdering>(module.get()),
-                            BufferSizeBytesFunction(), memory_alignment));
+        BufferAssigner::Run(
+            module.get(), xla::MakeUnique<DependencyHloOrdering>(module.get()),
+            BufferSizeBytesFunction(), memory_alignment));
     // BufferAssignment::ToString() includes a header, so no need for us to
     // print one ourselves.
     XLA_VLOG_LINES(2, assignment->ToString());
@@ -557,7 +557,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
         const void* data = instruction->literal().InternalData();
         int64 size = CpuExecutable::ShapeSizeBytes(instruction->shape());
         auto iter = aligned_constants.emplace(
-            instruction, MakeUnique<unsigned char[]>(size));
+            instruction, xla::MakeUnique<unsigned char[]>(size));
         CHECK_EQ(iter.second, true);
         unsigned char* aligned_data = iter.first->second.get();
         memcpy(aligned_data, data, size);
@@ -642,10 +642,10 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     // temporary buffers are required to run the computation.
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<BufferAssignment> assignment,
-        BufferAssigner::Run(
-            module.get(),
-            MakeUnique<SequentialHloOrdering>(module.get(), module_sequence),
-            BufferSizeBytesFunction(), memory_alignment));
+        BufferAssigner::Run(module.get(),
+                            xla::MakeUnique<SequentialHloOrdering>(
+                                module.get(), module_sequence),
+                            BufferSizeBytesFunction(), memory_alignment));
     // BufferAssignment::ToString() includes a header, so no need for us to
     // print one ourselves.
     XLA_VLOG_LINES(2, assignment->ToString());
@@ -824,7 +824,8 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<BufferAssignment> assignment,
         BufferAssigner::Run(
-            module, MakeUnique<SequentialHloOrdering>(module, module_sequence),
+            module,
+            xla::MakeUnique<SequentialHloOrdering>(module, module_sequence),
             BufferSizeBytesFunction(), memory_alignment));
     // BufferAssignment::ToString() includes a header, so no need for us to
     // print one ourselves.
diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index db6c201876..cda2783307 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -213,71 +213,75 @@ bool RegisterKnownJITSymbols() {
 
 #undef REGISTER_CPU_RUNTIME_SYMBOL
 
-#define REGISTER_LIBM_SYMBOL(name)                                    \
-  do {                                                                \
-    /* Register both the F32 and F64 variants of the libm symbol.  */ \
-    registry->Register(#name "f", reinterpret_cast<void*>(name##f));  \
-    registry->Register(#name, reinterpret_cast<void*>(name));         \
+// Register both the f32 (float) and f64 (double) versions of a libm symbol.
+// Unfortunately the double versions are overloaded on some systems, e.g.
+// Mac so we need an explicit cast. This requires passing the function signature
+// for that case.
+#define REGISTER_LIBM_SYMBOL(name, double_sig)                          \
+  do {                                                                  \
+    registry->Register(#name "f", reinterpret_cast<void*>(name##f));    \
+    registry->Register(                                                 \
+        #name, reinterpret_cast<void*>(static_cast<double_sig>(name))); \
   } while (false)
 
-  REGISTER_LIBM_SYMBOL(acos);
-  REGISTER_LIBM_SYMBOL(acosh);
-  REGISTER_LIBM_SYMBOL(asin);
-  REGISTER_LIBM_SYMBOL(asinh);
-  REGISTER_LIBM_SYMBOL(atan);
-  REGISTER_LIBM_SYMBOL(atan2);
-  REGISTER_LIBM_SYMBOL(atanh);
-  REGISTER_LIBM_SYMBOL(cbrt);
-  REGISTER_LIBM_SYMBOL(ceil);
-  REGISTER_LIBM_SYMBOL(copysign);
-  REGISTER_LIBM_SYMBOL(cos);
-  REGISTER_LIBM_SYMBOL(cosh);
-  REGISTER_LIBM_SYMBOL(erf);
-  REGISTER_LIBM_SYMBOL(erfc);
-  REGISTER_LIBM_SYMBOL(exp);
-  REGISTER_LIBM_SYMBOL(exp2);
-  REGISTER_LIBM_SYMBOL(expm1);
-  REGISTER_LIBM_SYMBOL(fabs);
-  REGISTER_LIBM_SYMBOL(fdim);
-  REGISTER_LIBM_SYMBOL(floor);
-  REGISTER_LIBM_SYMBOL(fma);
-  REGISTER_LIBM_SYMBOL(fmax);
-  REGISTER_LIBM_SYMBOL(fmin);
-  REGISTER_LIBM_SYMBOL(fmod);
-  REGISTER_LIBM_SYMBOL(frexp);
-  REGISTER_LIBM_SYMBOL(hypot);
-  REGISTER_LIBM_SYMBOL(ilogb);
-  REGISTER_LIBM_SYMBOL(ldexp);
-  REGISTER_LIBM_SYMBOL(lgamma);
-  REGISTER_LIBM_SYMBOL(llrint);
-  REGISTER_LIBM_SYMBOL(llround);
-  REGISTER_LIBM_SYMBOL(log);
-  REGISTER_LIBM_SYMBOL(log10);
-  REGISTER_LIBM_SYMBOL(log1p);
-  REGISTER_LIBM_SYMBOL(log2);
-  REGISTER_LIBM_SYMBOL(logb);
-  REGISTER_LIBM_SYMBOL(lrint);
-  REGISTER_LIBM_SYMBOL(lround);
-  REGISTER_LIBM_SYMBOL(modf);
-  REGISTER_LIBM_SYMBOL(nan);
-  REGISTER_LIBM_SYMBOL(nearbyint);
-  REGISTER_LIBM_SYMBOL(nextafter);
-  REGISTER_LIBM_SYMBOL(nexttoward);
-  REGISTER_LIBM_SYMBOL(pow);
-  REGISTER_LIBM_SYMBOL(remainder);
-  REGISTER_LIBM_SYMBOL(remquo);
-  REGISTER_LIBM_SYMBOL(rint);
-  REGISTER_LIBM_SYMBOL(round);
-  REGISTER_LIBM_SYMBOL(scalbln);
-  REGISTER_LIBM_SYMBOL(scalbn);
-  REGISTER_LIBM_SYMBOL(sin);
-  REGISTER_LIBM_SYMBOL(sincos);
-  REGISTER_LIBM_SYMBOL(sinh);
-  REGISTER_LIBM_SYMBOL(sqrt);
-  REGISTER_LIBM_SYMBOL(tan);
-  REGISTER_LIBM_SYMBOL(tanh);
-  REGISTER_LIBM_SYMBOL(tgamma);
-  REGISTER_LIBM_SYMBOL(trunc);
+  REGISTER_LIBM_SYMBOL(acos, double (*)(double));
+  REGISTER_LIBM_SYMBOL(acosh, double (*)(double));
+  REGISTER_LIBM_SYMBOL(asin, double (*)(double));
+  REGISTER_LIBM_SYMBOL(asinh, double (*)(double));
+  REGISTER_LIBM_SYMBOL(atan, double (*)(double));
+  REGISTER_LIBM_SYMBOL(atan2, double (*)(double, double));
+  REGISTER_LIBM_SYMBOL(atanh, double (*)(double));
+  REGISTER_LIBM_SYMBOL(cbrt, double (*)(double));
+  REGISTER_LIBM_SYMBOL(ceil, double (*)(double));
+  REGISTER_LIBM_SYMBOL(copysign, double (*)(double, double));
+  REGISTER_LIBM_SYMBOL(cos, double (*)(double));
+  REGISTER_LIBM_SYMBOL(cosh, double (*)(double));
+  REGISTER_LIBM_SYMBOL(erf, double (*)(double));
+  REGISTER_LIBM_SYMBOL(erfc, double (*)(double));
+  REGISTER_LIBM_SYMBOL(exp, double (*)(double));
+  REGISTER_LIBM_SYMBOL(exp2, double (*)(double));
+  REGISTER_LIBM_SYMBOL(expm1, double (*)(double));
+  REGISTER_LIBM_SYMBOL(fabs, double (*)(double));
+  REGISTER_LIBM_SYMBOL(fdim, double (*)(double, double));
+  REGISTER_LIBM_SYMBOL(floor, double (*)(double));
+  REGISTER_LIBM_SYMBOL(fma, double (*)(double, double, double));
+  REGISTER_LIBM_SYMBOL(fmax, double (*)(double, double));
+  REGISTER_LIBM_SYMBOL(fmin, double (*)(double, double));
+  REGISTER_LIBM_SYMBOL(fmod, double (*)(double, double));
+  REGISTER_LIBM_SYMBOL(frexp, double (*)(double, int*));
+  REGISTER_LIBM_SYMBOL(hypot, double (*)(double, double));
+  REGISTER_LIBM_SYMBOL(ilogb, int (*)(double));
+  REGISTER_LIBM_SYMBOL(ldexp, double (*)(double, int));
+  REGISTER_LIBM_SYMBOL(lgamma, double (*)(double));
+  REGISTER_LIBM_SYMBOL(llrint, long long (*)(double));
+  REGISTER_LIBM_SYMBOL(llround, long long (*)(double));
+  REGISTER_LIBM_SYMBOL(log, double (*)(double));
+  REGISTER_LIBM_SYMBOL(log10, double (*)(double));
+  REGISTER_LIBM_SYMBOL(log1p, double (*)(double));
+  REGISTER_LIBM_SYMBOL(log2, double (*)(double));
+  REGISTER_LIBM_SYMBOL(logb, double (*)(double));
+  REGISTER_LIBM_SYMBOL(lrint, long (*)(double));
+  REGISTER_LIBM_SYMBOL(lround, long (*)(double));
+  REGISTER_LIBM_SYMBOL(modf, double (*)(double, double*));
+  REGISTER_LIBM_SYMBOL(nan, double (*)(const char*));
+  REGISTER_LIBM_SYMBOL(nearbyint, double (*)(double));
+  REGISTER_LIBM_SYMBOL(nextafter, double (*)(double, double));
+  REGISTER_LIBM_SYMBOL(nexttoward, double (*)(double, long double));
+  REGISTER_LIBM_SYMBOL(pow, double (*)(double, double));
+  REGISTER_LIBM_SYMBOL(remainder, double (*)(double, double));
+  REGISTER_LIBM_SYMBOL(remquo, double (*)(double, double, int*));
+  REGISTER_LIBM_SYMBOL(rint, double (*)(double));
+  REGISTER_LIBM_SYMBOL(round, double (*)(double));
+  REGISTER_LIBM_SYMBOL(scalbln, double (*)(double, long));
+  REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
+  REGISTER_LIBM_SYMBOL(sin, double (*)(double));
+  REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
+  REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
+  REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
+  REGISTER_LIBM_SYMBOL(tan, double (*)(double));
+  REGISTER_LIBM_SYMBOL(tanh, double (*)(double));
+  REGISTER_LIBM_SYMBOL(tgamma, double (*)(double));
+  REGISTER_LIBM_SYMBOL(trunc, double (*)(double));
 
 #undef REGISTER_LIBM_SYMBOL
 
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index 7efdf8552e..6800c3d7fa 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -450,7 +450,7 @@ message ConvolutionDimensionNumbers {
 message ConvolveRequest {
   ComputationDataHandle lhs = 2;
   ComputationDataHandle rhs = 3;  // This is the filter/kernel.
-  Window window = 4;              // Describes the filter/kenel.
+  Window window = 4;              // Describes the filter/kernel.
   ConvolutionDimensionNumbers dimension_numbers = 5;
 }
 
diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
index 7a550d6f73..badc629a11 100644
--- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
+++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
@@ -56,7 +56,7 @@ class BatchFeatures {
     *num_sparse_int_features = sparse_int_feature_columns_.size();
     if (*num_dense_float_features == 0 && *num_sparse_float_features == 0 &&
         *num_sparse_int_features == 0) {
-      return errors::FailedPrecondition("Not intialized yet.");
+      return errors::FailedPrecondition("Not initialized yet.");
     }
     return Status::OK();
   }
diff --git a/tensorflow/contrib/boosted_trees/lib/utils/example.h b/tensorflow/contrib/boosted_trees/lib/utils/example.h
index e388cf332c..54f60e1dee 100644
--- a/tensorflow/contrib/boosted_trees/lib/utils/example.h
+++ b/tensorflow/contrib/boosted_trees/lib/utils/example.h
@@ -63,7 +63,7 @@ class SparseFloatFeatureColumn {
  public:
   void Reserve(const int32 size) {
     if (!single_dimensional_) {
-      mutlidimensional_values.Reserve(size);
+      multidimensional_values.Reserve(size);
     }
   }
 
@@ -76,7 +76,7 @@ class SparseFloatFeatureColumn {
       DCHECK_EQ(0, feature_idx);
       single_value_ = value;
     } else {
-      mutlidimensional_values.Add(feature_idx, value);
+      multidimensional_values.Add(feature_idx, value);
     }
     initialized_ = true;
   }
@@ -84,7 +84,7 @@ class SparseFloatFeatureColumn {
   void Clear() {
     single_dimensional_ = false;
     initialized_ = false;
-    mutlidimensional_values.Clear();
+    multidimensional_values.Clear();
   }
 
   OptionalValue<T> operator[](int feature_idx) const {
@@ -94,7 +94,7 @@ class SparseFloatFeatureColumn {
     if (single_dimensional_) {
       return OptionalValue<T>(single_value_);
     } else {
-      return mutlidimensional_values[feature_idx];
+      return multidimensional_values[feature_idx];
     }
   }
 
@@ -102,7 +102,7 @@ class SparseFloatFeatureColumn {
   bool single_dimensional_;
   bool initialized_;
   T single_value_;
-  SparseMultidimensionalValues<T> mutlidimensional_values;
+  SparseMultidimensionalValues<T> multidimensional_values;
 };
 
 // Holds data for one example and enables lookup by feature column.
diff --git a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc
index bc0a93db8c..ccee9530b6 100644
--- a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc
+++ b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc
@@ -96,6 +96,10 @@ class IndicesRowIterator
     return (row_idx_ != other.row_idx_);
   }
 
+  bool operator<(const IndicesRowIterator& other) const {
+    return (row_idx_ < other.row_idx_);
+  }
+
   bool operator==(const IndicesRowIterator& other) const {
     QCHECK_EQ(iter_, other.iter_);
     return (row_idx_ == other.row_idx_);
diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake
index b56f4b0898..d10f5959f7 100644
--- a/tensorflow/contrib/cmake/external/re2.cmake
+++ b/tensorflow/contrib/cmake/external/re2.cmake
@@ -45,4 +45,5 @@ ExternalProject_Add(re2
 		endif()
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL}
+        -DRE2_BUILD_TESTING:BOOL=OFF
 )
diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake
index dcedabb333..571d2b0dec 100644
--- a/tensorflow/contrib/cmake/tf_shared_lib.cmake
+++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake
@@ -95,10 +95,18 @@ if(WIN32)
   add_dependencies(tensorflow tensorflow_static)
 endif(WIN32)
 
-install(TARGETS tensorflow
+target_include_directories(tensorflow PUBLIC 
+    $<INSTALL_INTERFACE:include/>
+    $<INSTALL_INTERFACE:include/external/nsync/public>)
+
+install(TARGETS tensorflow EXPORT tensorflow_export
         RUNTIME DESTINATION bin
         LIBRARY DESTINATION lib
         ARCHIVE DESTINATION lib)
+        
+install(EXPORT tensorflow_export
+        FILE TensorflowConfig.cmake
+        DESTINATION lib/cmake)
 
 # install necessary headers
 # tensorflow headers
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 5d6ba9ca8d..2e3ee2c96b 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -153,7 +153,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     "${tensorflow_source_dir}/tensorflow/contrib/data/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/factorization/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/image/*_test.py"
-    "${tensorflow_source_dir}/tensorflow/contrib/keras/python/keras/integration_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/python/kernel_tests/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/python/kernel_tests/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/stateless/python/kernel_tests/*_test.py"
@@ -171,7 +171,6 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       "${tensorflow_source_dir}/tensorflow/contrib/graph_editor/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/bayesflow/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/framework/*_test.py"
-      "${tensorflow_source_dir}/tensorflow/contrib/keras/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/distributions/*_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/learn/*_test.py"
     )
@@ -225,6 +224,9 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       # Numerical issues, calculations off.
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/concat_op_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/wals_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/backend_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py"
       # Float division by zero
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/benchmark_test.py"
       # Flaky, for unknown reasons. Cannot reproduce in terminal. Revisit once we can get stack traces.
diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py
index ec395e41d0..7f5ae937b2 100644
--- a/tensorflow/contrib/crf/python/ops/crf.py
+++ b/tensorflow/contrib/crf/python/ops/crf.py
@@ -420,7 +420,7 @@ class CrfDecodeBackwardRnnCell(rnn_cell.RNNCell):
     """Initialize the CrfDecodeBackwardRnnCell.
 
     Args:
-      num_tags: The number of tags.
+      num_tags: An integer. The number of tags.
     """
     self._num_tags = num_tags
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 43431ca2c5..ffb5655c3e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -161,6 +161,7 @@ py_test(
     size = "small",
     srcs = ["flat_map_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -278,6 +279,7 @@ py_test(
     size = "medium",
     srcs = ["map_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -348,6 +350,7 @@ py_test(
     size = "medium",
     srcs = ["reader_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:readers",
diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
index 96dff85665..2701c36fb5 100644
--- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
+++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
@@ -293,7 +293,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
     # where,
     #
     # Z|v ~ interpolate_affine[v](distribution)
-    # V ~ mixture_distrubution
+    # V ~ mixture_distribution
     #
     # thus,
     #
diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py
index aa359b7a0d..2f8016ede3 100644
--- a/tensorflow/contrib/eager/python/metrics_impl.py
+++ b/tensorflow/contrib/eager/python/metrics_impl.py
@@ -73,7 +73,7 @@ class Metric(object):
   * `result()`: Computes and returns a final value for the metric
     from the variables in `self`.
 
-  Decendants may override `aggregate()`, but usually won't need to.  It
+  Descendants may override `aggregate()`, but usually won't need to.  It
   adds in the state from a list of metrics of the same type as `self`.
   (Default is to sum all the variables.) Note that users should not call
   `aggregate()`, it is for use by TensorFlow infrastructure.
diff --git a/tensorflow/contrib/factorization/python/ops/wals.py b/tensorflow/contrib/factorization/python/ops/wals.py
index 2bde3e0dd7..4fe22ea26e 100644
--- a/tensorflow/contrib/factorization/python/ops/wals.py
+++ b/tensorflow/contrib/factorization/python/ops/wals.py
@@ -183,7 +183,7 @@ def _wals_factorization_model_function(features, labels, mode, params):
 
   # TRAIN mode:
   if mode == model_fn.ModeKeys.TRAIN:
-    # Training consists of the folowing ops (controlled using a SweepHook).
+    # Training consists of the following ops (controlled using a SweepHook).
     # Before a row sweep:
     #   row_update_prep_gramian_op
     #   initialize_row_update_op
diff --git a/tensorflow/contrib/ffmpeg/BUILD b/tensorflow/contrib/ffmpeg/BUILD
index 7a5a4cb8c9..eccce99071 100644
--- a/tensorflow/contrib/ffmpeg/BUILD
+++ b/tensorflow/contrib/ffmpeg/BUILD
@@ -47,10 +47,25 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "decode_video_op_cc",
+    srcs = ["decode_video_op.cc"],
+    copts = tf_copts(),
+    linkstatic = 1,
+    visibility = ["//visibility:private"],
+    deps = [
+        "//tensorflow/contrib/ffmpeg/default:ffmpeg_lib",
+        "//tensorflow/core:framework_headers_lib",
+        "//third_party/eigen3",
+    ],
+    alwayslink = 1,
+)
+
 tf_custom_op_library(
     name = "ffmpeg.so",
     deps = [
         ":decode_audio_op_cc",
+        ":decode_video_op_cc",
         ":encode_audio_op_cc",
     ],
 )
@@ -59,6 +74,7 @@ cc_library(
     name = "ffmpeg_op_lib",
     deps = [
         ":decode_audio_op_cc",
+        ":decode_video_op_cc",
         ":encode_audio_op_cc",
     ],
 )
@@ -81,6 +97,15 @@ tf_gen_op_wrapper_py(
     ],
 )
 
+tf_gen_op_wrapper_py(
+    name = "decode_video_op_py",
+    require_shape_functions = True,
+    visibility = ["//visibility:private"],
+    deps = [
+        ":decode_video_op_cc",
+    ],
+)
+
 tf_py_test(
     name = "decode_audio_op_test",
     srcs = ["decode_audio_op_test.py"],
@@ -115,6 +140,27 @@ tf_py_test(
     tags = ["manual"],
 )
 
+tf_py_test(
+    name = "decode_video_op_test",
+    size = "small",
+    srcs = ["decode_video_op_test.py"],
+    additional_deps = [
+        ":ffmpeg_ops_py",
+        "@six_archive//:six",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:image_ops",
+    ],
+    data = [
+        ":test_data",
+    ],
+    tags = [
+        "manual",
+        "notap",
+    ],
+)
+
 py_library(
     name = "ffmpeg_ops_py",
     srcs = [
@@ -126,6 +172,7 @@ py_library(
     visibility = ["//visibility:public"],
     deps = [
         ":decode_audio_op_py",
+        ":decode_video_op_py",
         ":encode_audio_op_py",
         "//tensorflow/contrib/util:util_py",
         "//tensorflow/python:framework_for_generated_wrappers",
diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py
index 2bcb7284e1..484ffee3e7 100644
--- a/tensorflow/contrib/ffmpeg/__init__.py
+++ b/tensorflow/contrib/ffmpeg/__init__.py
@@ -26,9 +26,10 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio
+from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio
 
 from tensorflow.python.util.all_util import remove_undocumented
 
-_allowed_symbols = ['decode_audio', 'encode_audio']
+_allowed_symbols = ['decode_audio', 'encode_audio', 'decode_video']
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/ffmpeg/decode_audio_op.cc b/tensorflow/contrib/ffmpeg/decode_audio_op.cc
index 4b1c8a337e..92fad70b1f 100644
--- a/tensorflow/contrib/ffmpeg/decode_audio_op.cc
+++ b/tensorflow/contrib/ffmpeg/decode_audio_op.cc
@@ -37,29 +37,6 @@ namespace {
 // https://www.ffmpeg.org/ffmpeg-formats.html
 const char* kValidFileFormats[] = {"mp3", "mp4", "ogg", "wav"};
 
-// Writes binary data to a file.
-Status WriteFile(const string& filename, tensorflow::StringPiece contents) {
-  Env& env = *Env::Default();
-  std::unique_ptr<WritableFile> file;
-  TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file));
-  TF_RETURN_IF_ERROR(file->Append(contents));
-  TF_RETURN_IF_ERROR(file->Close());
-  return Status::OK();
-}
-
-// Cleans up a file on destruction.
-class FileDeleter {
- public:
-  explicit FileDeleter(const string& filename) : filename_(filename) {}
-  ~FileDeleter() {
-    Env& env = *Env::Default();
-    env.DeleteFile(filename_).IgnoreError();
-  }
-
- private:
-  const string filename_;
-};
-
 /*
  * Decoding implementation, shared across V1 and V2 ops. Creates a new
  * output in the context.
@@ -69,7 +46,7 @@ void Decode(OpKernelContext* context,
             const string& file_format, const int32 samples_per_second,
             const int32 channel_count) {
   // Write the input data to a temp file.
-  const string temp_filename = GetTempFilename(file_format);
+  const string temp_filename = io::GetTempFilename(file_format);
   OP_REQUIRES_OK(context, WriteFile(temp_filename, file_contents));
   FileDeleter deleter(temp_filename);
 
diff --git a/tensorflow/contrib/ffmpeg/decode_video_op.cc b/tensorflow/contrib/ffmpeg/decode_video_op.cc
new file mode 100644
index 0000000000..d44032968d
--- /dev/null
+++ b/tensorflow/contrib/ffmpeg/decode_video_op.cc
@@ -0,0 +1,118 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#include <stdlib.h>
+
+#include <cstdio>
+#include <set>
+
+#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace ffmpeg {
+
+class DecodeVideoOp : public OpKernel {
+ public:
+  explicit DecodeVideoOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    OP_REQUIRES(
+        context, context->num_inputs() == 1,
+        errors::InvalidArgument("DecodeVideo requires exactly 1 input."));
+    const Tensor& contents_tensor = context->input(0);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents_tensor.shape()),
+                errors::InvalidArgument(
+                    "contents must be a rank-0 tensor but got shape ",
+                    contents_tensor.shape().DebugString()));
+    const tensorflow::StringPiece contents = contents_tensor.scalar<string>()();
+
+    // Write the input data to a temp file.
+    string extension;
+    const string temp_filename = io::GetTempFilename(extension);
+    OP_REQUIRES_OK(context, WriteFile(temp_filename, contents));
+    FileDeleter deleter(temp_filename);
+
+    uint32 width = 0;
+    uint32 height = 0;
+    uint32 frames = 0;
+
+    // Run FFmpeg on the data and verify results.
+    std::vector<uint8> output_data;
+    const Status result = ffmpeg::ReadVideoFile(temp_filename, &output_data,
+                                                &width, &height, &frames);
+    if (result.code() == error::Code::NOT_FOUND) {
+      OP_REQUIRES(
+          context, result.ok(),
+          errors::Unavailable("FFmpeg must be installed to run this op. FFmpeg "
+                              "can be found at http://www.ffmpeg.org."));
+    } else if (result.code() == error::UNKNOWN) {
+      LOG(ERROR) << "Ffmpeg failed with error '" << result.error_message()
+                 << "'. Returning empty tensor.";
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, TensorShape({0, 0}), &output));
+      return;
+    } else {
+      OP_REQUIRES_OK(context, result);
+    }
+    OP_REQUIRES(context, !output_data.empty(),
+                errors::Unknown("No output created by FFmpeg."));
+    OP_REQUIRES(
+        context, output_data.size() == (frames * height * width * 3),
+        errors::Unknown("Output created by FFmpeg [", output_data.size(),
+                        "] does not match description [", frames, ", ", height,
+                        ", ", width, ", 3]"));
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(
+                       0, TensorShape({frames, height, width, 3}), &output));
+    auto output_flat = output->flat<uint8>();
+    std::copy_n(output_data.begin(), output_data.size(), &output_flat(0));
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("DecodeVideo").Device(DEVICE_CPU), DecodeVideoOp);
+
+REGISTER_OP("DecodeVideo")
+    .Input("contents: string")
+    .Output("output: uint8")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->UnknownShapeOfRank(4));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Processes the contents of an audio file into a tensor using FFmpeg to decode
+the file.
+
+One row of the tensor is created for each channel in the audio file. Each
+channel contains audio samples starting at the beginning of the audio and
+having `1/samples_per_second` time between them. If the `channel_count` is
+different from the contents of the file, channels will be merged or created.
+
+contents: The binary audio file contents, as a string or rank-0 string
+    tensor.
+)doc");
+
+}  // namespace ffmpeg
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ffmpeg/decode_video_op_test.py b/tensorflow/contrib/ffmpeg/decode_video_op_test.py
new file mode 100644
index 0000000000..b43b6b8919
--- /dev/null
+++ b/tensorflow/contrib/ffmpeg/decode_video_op_test.py
@@ -0,0 +1,69 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Tests for third_party.tensorflow.contrib.ffmpeg.decode_video_op."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os.path
+
+import six  # pylint: disable=unused-import
+
+from tensorflow.contrib import ffmpeg
+from tensorflow.python.ops import image_ops
+from tensorflow.python.platform import resource_loader
+from tensorflow.python.platform import test
+
+
+class DecodeVideoOpTest(test.TestCase):
+
+  def _loadFileAndTest(self, filename, width, height, frames, bmp_filename,
+                       index):
+    """Loads an video file and validates the output tensor.
+
+    Args:
+      filename: The filename of the input file.
+      width: The width of the video.
+      height: The height of the video.
+      frames: The frames of the video.
+      bmp_filename: The filename for the bmp file.
+      index: Index location inside the video.
+    """
+    with self.test_session():
+      path = os.path.join(resource_loader.get_data_files_path(), 'testdata',
+                          filename)
+      with open(path, 'rb') as f:
+        contents = f.read()
+
+      bmp_path = os.path.join(resource_loader.get_data_files_path(), 'testdata',
+                              bmp_filename)
+      with open(bmp_path, 'rb') as f:
+        bmp_contents = f.read()
+
+      image_op = image_ops.decode_bmp(bmp_contents)
+      image = image_op.eval()
+      self.assertEqual(image.shape, (height, width, 3))
+      video_op = ffmpeg.decode_video(contents)
+      video = video_op.eval()
+      self.assertEqual(video.shape, (frames, height, width, 3))
+      self.assertAllEqual(video[index, :, :, :], image)
+
+  def testMp4(self):
+    self._loadFileAndTest('small.mp4', 560, 320, 166, 'small_100.bmp', 99)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
index 545a4386d0..1245f515fe 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
@@ -16,6 +16,7 @@
 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
 
 #include <errno.h>
+#include <fcntl.h>
 #include <stdlib.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -25,6 +26,7 @@
 #include <vector>
 
 #include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/env.h"
@@ -38,28 +40,45 @@ namespace {
 const char kFfmpegExecutable[] = "ffmpeg";
 const int32 kDefaultProbeSize = 5000000;  // 5MB
 
-std::vector<string> FfmpegCommandLine(const string& input_filename,
-                                      const string& output_filename,
-                                      const string& input_format_id,
-                                      int32 samples_per_second,
-                                      int32 channel_count) {
-  return {
-    "-nostats",  // No additional progress display.
-    "-nostdin",  // No interactive commands accepted.
-    "-f", input_format_id,  // eg: "mp3"
-    "-probesize", StrCat(kDefaultProbeSize),
-    "-i", input_filename,
-    "-loglevel", "info",  // Enable verbose logging to support debugging.
-    "-map_metadata", "-1",  // Copy global metadata from input to output.
-    "-vn",  // No video recording.
-    "-ac:a:0", StrCat(channel_count),
-    "-ar:a:0", StrCat(samples_per_second),
-    // Output set (in several ways) to signed 16-bit little-endian ints.
-    "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le",
-    "-sn",  // No subtitle recording.
-    "-y",  // Overwrite output file.
-    StrCat(output_filename)
-  };
+std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
+                                           const string& output_filename,
+                                           const string& input_format_id,
+                                           int32 samples_per_second,
+                                           int32 channel_count) {
+  return {"-nostats",             // No additional progress display.
+          "-nostdin",             // No interactive commands accepted.
+          "-f", input_format_id,  // eg: "mp3"
+          "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
+          "-loglevel", "info",  // Enable verbose logging to support debugging.
+          "-map_metadata", "-1",  // Copy global metadata from input to output.
+          "-vn",                  // No video recording.
+          "-ac:a:0", StrCat(channel_count), "-ar:a:0",
+          StrCat(samples_per_second),
+          // Output set (in several ways) to signed 16-bit little-endian ints.
+          "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le",
+          "-sn",  // No subtitle recording.
+          "-y",   // Overwrite output file.
+          StrCat(output_filename)};
+}
+
+std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
+                                           const string& output_filename) {
+  return {"-nostats",  // No additional progress display.
+          "-nostdin",  // No interactive commands accepted.
+          "-i",
+          input_filename,
+          "-f",
+          "image2pipe",
+          "-probesize",
+          StrCat(kDefaultProbeSize),
+          "-loglevel",
+          "info",  // Enable verbose logging to support debugging.
+          "-vcodec",
+          "rawvideo",
+          "-pix_fmt",
+          "rgb24",
+          "-y",  // Overwrite output file.
+          StrCat(output_filename)};
 }
 
 // Is a named binary installed and executable by the current process?
@@ -106,7 +125,7 @@ bool IsBinaryInstalled(const string& binary_name) {
   ::execvp(kFfmpegExecutable, args_chars.data());
   // exec only returns on error.
   const int error = errno;
-  LOG(ERROR) << "FFmpeg could not be executed: " << error;
+  LOG(ERROR) << "FFmpeg could not be executed: " << strerror(error);
   ::_exit(error);
 }
 
@@ -198,52 +217,101 @@ string BuildWavFile(int32 samples_per_second, int32 channel_count,
   return data;
 }
 
-// Returns a unique number every time it is called.
-int64 UniqueId() {
-  static mutex mu(LINKER_INITIALIZED);
-  static int64 id = 0;
-  mutex_lock l(mu);
-  return ++id;
-}
-
-}  // namespace
-
-string GetTempFilename(const string& extension) {
-  for (const char* dir : std::vector<const char*>(
-           {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) {
-    if (!dir || !dir[0]) {
+Status ReadInfoFile(const string& filename, uint32* width, uint32* height,
+                    uint32* frames) {
+  string data;
+  TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &data))
+      << "Could not read FFmpeg file: " << filename;
+  bool in_output = false;
+  bool in_mapping = false;
+  uint32 frames_value = 0;
+  uint32 height_value = 0;
+  uint32 width_value = 0;
+  for (const string& line : str_util::Split(data, '\n')) {
+    // Output starts with the first line of `Output #..`.
+    // Further processing output region starts next line so we could continue
+    // the loop.
+    if (!in_output && line.find("Output #") == 0) {
+      in_output = true;
+      in_mapping = false;
       continue;
     }
-    struct stat statbuf;
-    if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) {
-      // UniqueId is added here because mkstemps is not as thread safe as it
-      // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows
-      // the problem.
-      string tmp_filepath = io::JoinPath(
-          dir,
-          StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.", extension));
-      int fd = mkstemps(&tmp_filepath[0], extension.length() + 1);
-      if (fd < 0) {
-        LOG(FATAL) << "Failed to create temp file.";
-      } else {
-        close(fd);
-        return tmp_filepath;
+    // Stream mapping starts with the first line of `Stream mapping`, it also
+    // signals the end of Output section.
+    // Further processing of stream mapping region starts next line so we could
+    // continue the loop.
+    if (!in_mapping && line.find("Stream mapping:") == 0) {
+      in_output = false;
+      in_mapping = true;
+      continue;
+    }
+    if (in_output) {
+      // We only look for the first stream in output `Stream #0`.
+      // Once processed we will not further process output section.
+      if (line.find("    Stream #") == 0) {
+        size_t p = line.find(", rgb24, ", 24);
+        if (p != std::string::npos) {
+          string rgb24 = line.substr(p + 9, line.find(" ", p + 9));
+          rgb24 = rgb24.substr(0, rgb24.find(","));
+          string rgb24_width = rgb24.substr(0, rgb24.find("x"));
+          string rgb24_height = rgb24.substr(rgb24_width.length() + 1);
+          if (strings::safe_strtou32(rgb24_width, &width_value) &&
+              strings::safe_strtou32(rgb24_height, &height_value)) {
+            in_output = false;
+          }
+        }
+      }
+      continue;
+    }
+    if (in_mapping) {
+      // We only look for the first stream mapping to have the number of the
+      // frames.
+      // Once processed we will not further process stream mapping section.
+      if (line.find("frame=  ") == 0) {
+        string number = line.substr(8, line.find(" ", 8));
+        number = number.substr(0, number.find(" "));
+        if (strings::safe_strtou32(number, &frames_value)) {
+          in_mapping = false;
+        }
       }
+      continue;
     }
   }
-  LOG(FATAL) << "No temp directory found.";
+  if (frames_value == 0 || height_value == 0 || width_value == 0) {
+    return errors::Unknown("Not enough video info returned by FFmpeg [",
+                           frames_value, ", ", height_value, ", ", width_value,
+                           ", 3]");
+  }
+  *width = width_value;
+  *height = height_value;
+  *frames = frames_value;
+  return Status::OK();
 }
 
-Status ReadAudioFile(const string& filename,
-                     const string& audio_format_id,
-                     int32 samples_per_second,
-                     int32 channel_count,
+}  // namespace
+
+FileDeleter::~FileDeleter() {
+  Env& env = *Env::Default();
+  env.DeleteFile(filename_).IgnoreError();
+}
+
+Status WriteFile(const string& filename, StringPiece contents) {
+  Env& env = *Env::Default();
+  std::unique_ptr<WritableFile> file;
+  TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file));
+  TF_RETURN_IF_ERROR(file->Append(contents));
+  TF_RETURN_IF_ERROR(file->Close());
+  return Status::OK();
+}
+
+Status ReadAudioFile(const string& filename, const string& audio_format_id,
+                     int32 samples_per_second, int32 channel_count,
                      std::vector<float>* output_samples) {
   // Create an argument list.
-  string output_filename = GetTempFilename("raw");
+  string output_filename = io::GetTempFilename("raw");
   const std::vector<string> args =
-      FfmpegCommandLine(filename, output_filename, audio_format_id,
-                        samples_per_second, channel_count);
+      FfmpegAudioCommandLine(filename, output_filename, audio_format_id,
+                             samples_per_second, channel_count);
 
   // Unfortunately, it's impossible to differentiate an exec failure due to the
   // binary being missing and an error from the binary's execution. Therefore,
@@ -256,7 +324,8 @@ Status ReadAudioFile(const string& filename,
   // Execute ffmpeg and report errors.
   pid_t child_pid = ::fork();
   if (child_pid < 0) {
-    return Status(error::Code::UNKNOWN, StrCat("fork failed: ", errno));
+    return Status(error::Code::UNKNOWN,
+                  StrCat("fork failed: ", strerror(errno)));
   }
   if (child_pid == 0) {
     ExecuteFfmpeg(args);
@@ -285,5 +354,63 @@ Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second,
   return Status::OK();
 }
 
+Status ReadVideoFile(const string& filename, std::vector<uint8>* output_data,
+                     uint32* width, uint32* height, uint32* frames) {
+  if (!IsBinaryInstalled(kFfmpegExecutable)) {
+    return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found."));
+  }
+
+  string output_filename = io::GetTempFilename("raw");
+  string stderr_filename = io::GetTempFilename("err");
+
+  // Create an argument list.
+  const std::vector<string> args =
+      FfmpegVideoCommandLine(filename, output_filename);
+
+  // Execute ffmpeg and report errors.
+  pid_t child_pid = ::fork();
+  if (child_pid < 0) {
+    return Status(error::Code::UNKNOWN,
+                  StrCat("fork failed: ", strerror(errno)));
+  }
+  if (child_pid == 0) {
+    const int fd =
+        open(stderr_filename.c_str(), O_RDWR | O_CREAT | O_APPEND, 0600);
+    if (fd < 0) {
+      const int error = errno;
+      LOG(ERROR) << "FFmpeg stderr file could not be created: "
+                 << strerror(error);
+      ::_exit(error);
+    }
+    close(STDERR_FILENO);
+    dup2(fd, STDERR_FILENO);
+    ExecuteFfmpeg(args);
+  } else {
+    int status_code;
+    if (::waitpid(child_pid, &status_code, 0) < 0) {
+      return Status(error::Code::UNKNOWN,
+                    StrCat("waitpid failed: ", strerror(errno)));
+    }
+    if (status_code) {
+      return Status(error::Code::UNKNOWN,
+                    StrCat("FFmpeg execution failed: ", status_code));
+    }
+
+    TF_QCHECK_OK(ReadInfoFile(stderr_filename, width, height, frames))
+        << "Could not read FFmpeg stderr file: " << stderr_filename;
+
+    string raw_data;
+    TF_QCHECK_OK(ReadFileToString(Env::Default(), output_filename, &raw_data))
+        << "Could not read FFmpeg output file: " << output_filename;
+    output_data->resize(raw_data.size());
+    std::copy_n(raw_data.data(), raw_data.size(), output_data->begin());
+
+    TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename))
+        << output_filename;
+    TF_QCHECK_OK(Env::Default()->DeleteFile(stderr_filename))
+        << stderr_filename;
+    return Status::OK();
+  }
+}
 }  // namespace ffmpeg
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
index 7176f3b550..d6c885a324 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
@@ -21,6 +21,7 @@
 #include <vector>
 
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/test.h"
@@ -49,7 +50,7 @@ TEST(FfmpegLibTest, TestTempDirectoryThreading) {
     pool.Schedule([&mu, &temp_filenames, environment]() {
       std::array<string, kStringsPerItem> buffer;
       for (int32 j = 0; j < kStringsPerItem; ++j) {
-        buffer[j] = GetTempFilename("mp3");
+        buffer[j] = io::GetTempFilename("mp3");
         TF_QCHECK_OK(environment->DeleteFile(buffer[j]));
       }
       mutex_lock l(mu);
diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
index f64007c81d..c5ea1432bf 100644
--- a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
@@ -24,16 +24,24 @@
 namespace tensorflow {
 namespace ffmpeg {
 
-// Gets a temp filename in an appropriate location.
-string GetTempFilename(const string& extension);
+// Cleans up a file on destruction.
+class FileDeleter {
+ public:
+  explicit FileDeleter(const string& filename) : filename_(filename) {}
+  ~FileDeleter();
+
+ private:
+  const string filename_;
+};
+
+// Writes binary data to a file.
+Status WriteFile(const string& filename, tensorflow::StringPiece contents);
 
 // Reads an audio file using ffmpeg and converts it into an array of samples in
 // [-1.0, 1.0]. If there are multiple channels in the audio then each frame will
 // contain a separate sample for each channel. Frames are ordered by time.
-Status ReadAudioFile(const string& filename,
-                     const string& audio_format_id,
-                     int32 samples_per_second,
-                     int32 channel_count,
+Status ReadAudioFile(const string& filename, const string& audio_format_id,
+                     int32 samples_per_second, int32 channel_count,
                      std::vector<float>* output_samples);
 
 // Creates an audio file using ffmpeg in a specific format. The samples are in
@@ -45,6 +53,11 @@ Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second,
                        int32 samples_per_second, int32 channel_count,
                        const std::vector<float>& samples, string* output_data);
 
+// Reads an video file using ffmpeg adn converts it into a RGB24 in uint8
+// [frames, height, width, 3]. The w, h, and frames are obtained from ffmpeg.
+Status ReadVideoFile(const string& filename, std::vector<uint8>* output_data,
+                     uint32* width, uint32* height, uint32* frames);
+
 }  // namespace ffmpeg
 }  // namespace tensorflow
 
diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
index 18b0b8b812..5bb011f41c 100644
--- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py
+from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
 from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py
 from tensorflow.contrib.util import loader
 from tensorflow.python.framework import ops
@@ -89,3 +90,19 @@ def encode_audio(audio, file_format=None, samples_per_second=None):
 
 
 ops.NotDifferentiable('EncodeAudio')
+
+
+def decode_video(contents):
+  """Create an op that decodes the contents of a video file.
+
+  Args:
+    contents: The binary contents of the video file to decode. This is a
+      scalar.
+
+  Returns:
+    A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output.
+  """
+  return gen_decode_video_op_py.decode_video(contents)
+
+
+ops.NotDifferentiable('DecodeVideo')
diff --git a/tensorflow/contrib/ffmpeg/testdata/small.mp4 b/tensorflow/contrib/ffmpeg/testdata/small.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..1fc478842f51e7519866f474a02ad605235bc6a6
GIT binary patch
literal 383631
zcmZQzV30{GsVvAXFfn3aU|;~znZ^0JiDk)#3=9k{X+^22An^gitfyb(rX-dyFfehl
zvHt(hddKzU-e)&GwolW!p>R|A-TMk7GZO_}h2;FAR0VTO1ziOXJtH#{ecu3AcU=<&
z$1rDw<ouM>WCdLX=lp`ooYb@u1tS9kV_hQy1CZ*Bl9B=|ef{$Ca=o(5l+^s3#5}$H
zqI7+bv3eOLxj72D3i$;knfZCeRtm|9Nr}m}1`0)~X|_fRDXB?0`N`R~hE@hv1`3IJ
zi8+<UskR0chE@g@hK7a;xv93Lxfu$@rAfJ|wq^<i#g(>(ATquv#nw>Iz{)_+Kp{7?
zA~hu*WFAO;d{JUvdaA9VnL=_#QGRY>d~T|(p+ZSfYEDjOv8{oEMTJF5a*3^hLULiQ
zt${*HYGO)NeqO4rk)e*EAxuwUL41B%T5)QLt*((mNk&m>VoI^CnL=I>SVwYZZej_@
z)ZC=Rv^0=W`FX`9MTwbtsVVW9c_l@Owgw7GX+?>-sURE63yM-x3TzD&vQsNF^Ga-u
z4Peap+{`>%V*`ca<kY;><kAvb69a{!<oKNY{OrVx)Wj4B-!{1@O(8d_q$m|+XL3=R
ztr5uEg~|E31-1ryW(Ep{1t5bA!HmQTTT?@Y!h+(G)B;-*h0KEZqQsKSd|N|369a|B
zLR&*CLp=inh6V<P5V6#b1_1`HM@k1?HZXMVRy@Gq;AX+V!QuLB@h^Mv-pab2p9BIm
zxo;TRT>n_vp{)74@{>T){F>z-{XfmOWYhe@Z)o=<vZ7I8{+=IemmiO~|NsAl9~F{X
zpD+Aq-}{KI{dm=SfzOY6wImzY6or4CFZ%z_|Ihhf;{zq6&UsIMXnS#~`=KuWqffv7
zirBNaep}7Xc{Ks$vl%ZR2rBt)m^pcpdXLMbokn(=Q$4R;tXgkc^XpfQTdT*__4((Y
zitYNl=g0cB$ss%T-{#rD;8M5xuKoW9tYz=TW;+}{z@z+8Iq`txwy(_h_P4V-pV%*U
z<OPpNs%gO{KW2*ueiK5@SCp{M3o~NTSekP|a*7pq>;FFzE9>PLL;dGfu>5}gi}i`d
z!T)!%o9{K}MYb&NF0$tOG|w>ddFNF7|NpvH9bWQ4!(#e}YYav!?zC(-fAk^M<wf8O
zrNqy#TNvE4nHU5m_89%(GHj6AQ^0+TA@cZV5e7yvrW&U8I=ZrZDmKfz8&2iBpl6(w
zA$;op-q-eT&-3-%iK<pA`yt_w=<q?{V8J}61KtHf*PFkuQ7_QgU?Su==@-+tUk9dr
zJX5kKAYtO4HJ_Z0I>b(zuqQ5f%m1f8rkAz7?{8yZjr8$wQYq-T%N)oZk-D$Jx{u8*
z;biNFH}%Z2iJ~*SwZ24M-oH=X_oDSi>8=k6Ut;|K_)U4@X{J=|vipe0v4qV|>3;Xm
zw@bzfHoSN-i6`Khxp2V#E72>~2WNkNRC88rn|;3B%84J<mHk%Mm2UBRFShR9{Q06^
zUcI$c>I;6fQ{+bzqyBmI11hW+Vg%T;v+An9>}B}3Rzl?f-`-92YfL@!M1=M-H!yzI
zOrFrtrN_uHf%8H4S>tt%hgb9O{g5CP$dRU%!t(SzV~dZuSW2(y)Ni>z87`d>*&O)H
z{=kut*)tt16`p0}{E=S0@<vYo4$pr-U)V3^n=s#b+v*Jo3MqUH3S5(QS3GL&dvdHY
zXI+%6%45}*j=HL2ty}%&-1*yl4?h!d-nfEWN=UR_f<>u7`0TNes@i|fFK2L1;n9fj
z+?Dcs=U%puU#?sB32@yL5IHtY;oGGJ4_0(!dglsV5xuR#>Sh)2mtV-xlf#~Y?b?R?
z13cM@rxRSCfB)zvU;Ff5*#AYRW^P)4`_xp1{92<9Hk11?&;IS4ze7gi(FdIsSpwoh
zhZ!BCB5xc}=zrOe&^ae{$>~%BiD`aJ31<tOk3ACC-hQhs(DsgR*p`^(TdW?4MEWIg
zJh^!P>FOH+Gr#N4;`skTSM~EV$A)8~Rt8}wlFly)=X&(8WRD{Q=gdP4ee>j=-Z^wI
zPVUsqh_9T=>YWor1(u07>i#|U_1LZ5M-)3(GN?y9+_ufpg@4QG-R2Lro|7tVUOQ#s
zvYPYdWzvn)((YGkI(r+ukjp*nq*E8PZuzGL6&DN`G>$nkFqSW#Q*ho@>yh!3PwBer
z8ZY=Z3FMxsP5SvGQdc@_MeO2m;qsGht0(TfIy+cAv@UGV*=<u5ZA@wo9Ll&+sMYc3
z|GV?P=f6CEb0P5eW&Qs@H(j22UgG~@uPMwW=QrMOet+v&)v1||p)Zr4=kH_A4cS=b
z)?aAx-+29ZK7PH9X97w=&YTy+{xLXl3$gLnE3|Fjv%9z<hvUGbj{ynCWW^Z!8`t(v
z<ecLCsWUL9VyeKmUX}3m`>m#L|Noz*`nksZJ#iudy7uRnUtF^7@b%7{6Bij+8P0Tb
zdwpi>&yQtuUH{upQeLs!Z`I}3r_Ogi4D;Kj8gcjO%|E+)1EVk23vIW`ZvQ1)AX94c
zt?q+i<l)CUzYj48N<C;$H(1ztlEW{7NvmG}xFAE$Q3jsI=YRj?w>xYI+hBQuhgs&U
z%&fQRu3e@_7pbT!FD*L!UH?&OVb)^}*HtHH)>-_qx-Y3?Xms13H8X~NX6AGwF&*Ce
zW@DZ&^+qdi{oK$Konkv*YM1D>Z`ZtbhU^N+kA1Z=c+K}U;fDhsHoajfVqex7)>rhW
z<>^eL*%seTLl}cO&A#3;GI%rN*g1dJyP3zIT@A{b6aJL%zx|KTIbH7sJA>Ay?J{`4
zEW_Er#Ivl%=Y<CYOTnLI{7j{yZ&Vq$+^!21oVjsmnVkAF(S|F9r{}NzI{%Q8Wf}YZ
z5Tk$JzDED6NPO%pQzE-$*5ac|I`_jDFnB()G+Er|;}<LUriX!v!T$W0f1B^W-WQS?
zx@hHw-JOAPYn{3}pL>R>%s8qq>7X(_p=QUC8+?7eifNrE=gsw<D}8PLqf(oTyMFtB
zwAk;rO+WbG++5+kmmh6f;&r3@ZuyU?_L*1XLW@?cfBOH%#-Lw?S43CXK78f+RI9Ub
zx4)v?FI&+x1||mKXSdlVm2R6^el;P0L4c2?fl*Mw<@DwcQ*vyR#7aNEFx6UG7P9}|
zLxyIR0}ag|ABCI|extzPHuZB9?=~$)?ufcM?Q5!7Q)1OQI6VRye1hD#8(q!L{WW3K
zmQS~NxMsii=^b-_sk+JpTCCTYswZ0XX;)`diD{hu+ed}`=Q<9Qe9*KN?wsJgsPtv1
z-=a5ut7e*toLen3XX?vqM<;6?`5E^p^sm0vFO^Auy}a4gZI~P0&0)yo-)Sc_{oX9r
z`$cN9iW}7xKL1JySwAc8o{i^BHGZL%Gncx*{CRj@mVZf}L(Nr}Cz=kmci0pSj#x7-
zmT_8HrYjY|J!c`ynLXK}dJG&78wAC7Fz+vPpT581oyY7eMw}kG2hRSVbG)c_c~+|A
zRe?!upIIe$9AI#eZxD9<t8Tk^;{Ls&b9{r?7?@-hHauB(M)01Vo$SH07hL;W-n{R;
zeAcRiea|a}>d8<4YV8aOGZ($O{&DDU|Bw6fUke{dmD>94#~h7y6?4L+O1ZQ?O2`^;
zPM7Ws^uM;Ml)H8Exm6dnG{5-io2*)X_4%jyWt~S0{nsUx9@llT5OuXU|Nlc=dz}55
zzwIsw3cVtx_kOS|tnK1TGqt^KbU^FB$mxG~rY0|&v|V5Br>pmm&l`VpaBul@bg%ib
z^4DzN=Kq+IJ^yE5Vd>1ND+@&e?=x^5*_zmvE~0L1pu=GC@POxOo;_TgUAt@h-6L{}
zW5q8ssPzdbPkU?-{lD&Q?F{`*9qUy3*Kg=jnmExz_TUjF&F+_19*Fk#ZdonHAiGY~
zp)!I&=wr}ytJF6U%c`30PB^{Hjp6KV#f9~I5+i3!s;)4yxYb-OqkeeKiaCX@o0TTj
z2>&uKd--SgbkV7)D$YmVeR{M}OFPND=>0dR@*l!`Ppo10XILV8clwb>|CK%$-RnNG
zW~Ej4*GDs^#(S-*zdYHaf4|qptejZ;>H3f7Z;m<2Vk2bR;qh++myVh}!<}hP3-qcV
z@P1psY{3&}S!}XtJHr9D@RJheCJP!EZZL6a2?(rKGRa^_Gwhpjzwyli$CbP{xRWgy
z%gr}mX`R||ae+N&4|8DiBp2@(Mum-ee^}<c)ZDyZr$M&qkHp!PQ-4elP`u0^arnd8
zi;Ea?Z){I0EHhkTSg=4=y=+#uiSw(=X5xXS2~A9$A3V7jE`K;y|5@(*m-NEsbsvuA
zKK=YbSN7ojik>a2*plBH9P<n6HLP_nH+^wFKqR4Ld5KrV7uSEvJOz{2UNF}7<tzUs
zQPZR+w#Hh3t6knz%1P}1Ju%A}q8Z;9JCai*SU(xI-TS4n!DHLA`y1Kp96xT{w}GGY
z2&-<fR*i;Z7N_W~f^Yx$W|Xy_vn_h85Ow8+(~{2(E126KNVwd4e{iSjhWo4c{Eg``
z4qbdRd8YjPolQ676w~K8yKnk)@cXWg)gsC*({7i?DEqoOcHH@4Dk{U6u;zjocca84
zGw%qVV>g$?@vj#=61`#TGUGpDeQxz1J73h+2AVxvAbx!1g!{22Mu$Z|?|UaSL4tjm
zlV8Mo+sq@0ihmylq)W^y3stbPkea2jo<aLBZ@?)BcV-9As>Ym$oZAlg36#9tqdk9G
zW`BYB8Pz6*tHSG87D#zNm}75T+EI7U$3c+gw=F~2{zuOno2NTeJji>T+2vr#Bf!U~
zz|PV8m`|W*(^bz$fd}O_C`)o3tZ9sKUiRR`!50lvm;SIg@FAlAq!9ZXk=@J*Ukb#v
z^7b(NpOUhMf%DJIhcmoaf2sc9G2iA2+kx%5+ZZ-{uaMwtc(9t$eb4dMeMSvit2Z(#
z?4380{j%Mnl7%ii3olx%>1V2WvZU+Czo{`xx{geGH#NrZjoXTL@#~#O%UxC$PvtmJ
zQlc`i^RLjh&d|!jLan_O%WC4+#`qtqJU>~^{^jaJZ0m16V18}f$nj-$iM3xEg8*ko
zfzdmLRu9R86Z-pDm%aG!y<^9u35v%~f7ri#3qw)zhMsWG$H|V<*X`f0I$tWNuP*2P
zhN8J0+qE3pL|YZU-V0w}Cd8lWQ;>Z4|APq(0!#@81`OX+gcaKEtZWso5o{DGkKE+u
za=?W>>dEOw{^OS)%zn0f|L%LMggw;{ty=k_p0VZ%`<_MYX<q{+H%v&#mD6M|UR=re
z?`ODly2|z`>pOx^l}=#hIB|<t_hZ6QBffjif~KAE*PA%(JQ6sV8rWUW7%Ol|@-Riq
zG6@~lmYAyffl=*llR$px4UIgW=Zv3<SNApP&u5am!BES_f8E1Qqv1s67dN5hUNTG6
z>g5(q@%h=#!jR$EvZ*p;);#m*?fY)I`t}K@hIbueIl!R6#m>TdK%!blTh6)0C-0>^
z17p;rme`ke=E>~}oxOWb@Z_(Y*v0s0fk@q>9<7GZ?`K_@UuD+EX>rVB^x3gf^2NFa
zV`gEKFY}Cp(l@*=lL%Va{pI4s;+Z9UcLih?-m(*5;N|CG;!CWm(qfQED99+-Y5UfC
zN|wlkCnpyOB`C&fFwavsc4e*>Gq1w*t1j6Q4l|u<8vJi1c;2^XxWAhD#Z+6zz3Y_g
zjg6=O2o>G3*qiZaCEJs__{AJf2@C?lOe@OXEqKx7G;P5(r}J(FL6tift)8ZmA9pWG
zclQ<7kaW3CUX1JNN^;W`7xnKtE7+WPN&n*nv-9%NmJf`t*!_trfA;@-t6bqkjV0!j
zkLB}3_@A?nUipmiLYmzYhSjF;7C&m|Jg6lT>GyOV=auz~@*NbW`R!kp)hxyz#_OC_
z`R}q#hGXs49~&a}O_}-s@{2=LWuoUFh}gK`+UpWMW>>>`_8bg?G8qnnPV$n6^u8R}
zd@#;XlP~^x#LX=mo(U~6WWCMevGd;Vh-2>-nZ{q`c$6s5y0%X09W%ouKBZTxhZ#;6
zx6RRYG_-AC(o`smzv%LA@rxZ+0r%eoBs0|)|2}eWeyg>%#dM!3k0iNgF)<`ekZ86o
zxbl3(as!9ihJ_4FJ1<Sq-6*VnzSKzW;0MNgue?$o@7T3gmOr^^#hP6k{g=u=-g!JO
zqo(@*@usWW@*6k;AL?}E+br4hfn}HTb~~+|KLyu^32j_m{Qf3~hn?;Nk>@(ir^A1W
zXk{K~U{T`c5EGr_7%arTW7Z9gNfQ_{tX?>9uFXye;oB4|x=lkwd`@j)YI*7ti@0<q
zGwtF%p0@TzuH^=frd-SqlYh=&-}HDt=Xu+Mo?DyQBx?3EZxUH5DscLU(*$X;OAmNI
zEo0nm;J}c;+-ah~Ai%IB@`Xi7(Vxig4Nn5|4_j6|(7LF{@PH?TWo_gCManYL`ANJC
zU)IYV7v8;(q3~g9>;|6bzv1~UW^sv(O^Z~f&ztd!CA#;}%(}1Hvp+C76mm`vxV~YQ
z_{JJ$ku^4-cfa9ncRGKkTJdn;2BXu{S8=d5TyxTiZrJHiHb=^>iAB51tRb&|Uslth
z^RYW-WZub;iM*#J+4;@&+K;uLQv3w@Z#MPU=<i;xS(e**aO2680EzQ@|DVm^yZpd8
zPb16VjOLp?zWzZvHMt6pBb!=fc)KQi6^w~{ExO(03}5y4?!eX;l7BDXJ^oHEJwE*3
z|JT31etsReZhGcFb%Ez@tJk|fEqXBbsdm?qN86b~0+%_LPxpTu<9zG?kpwF%A;&H+
zk9oY;pV~i+ZTkOjX66;8EGh9L^Z)<d_wCQGX<Nnn{|V@5)dnwRejI;bJ@0!C;hxo#
zJw+`l0)t$=d>tl;3-Z|8ZhFVk?>mR%_3b>qf@%w{MZ(jiyBZE8s8pSO=fMB$fM;X_
z*Tvh#Z=yfjeBdsM$WoH}*0Am?laR(;`@gRW{~SC2&D)zvdh^u2&4DvY)lYnDIkb6;
z026}%2XA8o!^&f63}LDtmNJ~OZfm-5h|96y*QF+pzm0D9q&vCK>`6U(=jySQ;><1&
z)$BVI_N?50(0y{^qVGzb`ATnpd|JBkv8f<`T|rpwOkumP`?C~gq%!|MQn`sexcYP?
z-_1O+9Rd2TDz{Jf^}gAXr<fsgX3{}<rt>GHbXKXT?cB75*J8#O#sdrvF03{R6CMZ{
zEm`0s(^}=r@Yq3zF(he~m+18^6)cJG4+s7{_Le>9^0J$fza>o`$8BOOV~*0fYq>~9
zJ9m>`(;m6*2v!4L9mk88oZlz@WHeXqZ=0ez$K#RW`8{8|H|oh>t1(Z$a9O_N{^Qx-
z&xB6dR;l^K)QnZ8y21Hwc~8&k|Nk#4Rg|^FPh5IUseb>L`%xSW4(T5p<eIJ(zc!6u
za_RxE)AV@9b-bTu8*z9={<)R@XKL*ojh4_S@k){0UpHQTw_?E*l@&SjZTL2wZCre<
zX4eDr4H*?n_x~!H*(d)wbMfJIoyVt|-d%3&<|y|0=;1#)+bSeQjxh3H+P_5cUem!u
zzOB3d&)8y_Qd$?a{o4(0vG)f~@EbI!$_DMzu4*t^Z_iT6^z-#y=H)&rR@3*?6?}@n
zd2Ryxrh*!tEC22NqW+uBna{JOZr^Ny-OsxC15_UB94R?vqMvnF*{Os1m&fsGpF_g}
z9_dcG%b&7&ZPM}eSFVam?3D`gKYgfiRtpnD(_E2N8;T1RRr9!h^4W-eJMb@m!xDzm
zOO<8TXgm|Pkv%NN!+GuXp~I6O*wo0||9w2w=%@XhhpcZN|Lj)ZwUl{_|Aae?zXd|$
zKmQQl`8882=mF#5H7xNC4_jkb*PLevTXm1&$s*Hvn?emwu>4Sd&M9;4ooK@a<~j+>
zBZV)Rn_~9x#BnV?!aC)6&b9wJn***mBuL9lv|Yhjz}Uvlz&MSQ`PaL{2Ud9NFv`D(
z+O5N$T)^D7W(HdVb0X(jzVP!o9|Ac49#|Uq@9NReQdU8;B4!WiXm_{G``8>)k0p4N
z-+TJ(h33H()!BZ&57gZ2_&Fm}HwN&mZ2JEBu<?hR@1F(MN>x6cv;JU+wn+RRw!%9M
z|INde?p?6X^xp*s7M2B#Jk9e986;P~<yCyt&HwS^ls&&{k1lRi4zJ<exU)x_$u#(V
zPj=TL0fQ%>O^$d^Zm@1&d*@hVS~=rI^%kEEN^4Kt%#AZVEpzR7fAz*T2B(cI3>-(f
zy#yZ}u*piSF=+hz;h2%Y^v!d`6-=3~BryK#w7F4xg^_0#uY~@EW%2(`o?I`*_C<gC
z*XCT22DJropC?!Fv#7W%KK-fj_UrQ;9M*muf*Th!A1DZR4&~!?JLq=l@dN39Cngq7
zPGpLVd|s>?{`lXWzl&syU2{tQzs}8@rT&>|oAX`9U;AGl5xB-%^Fh-!K|Esr4GmQb
z8TO_+<|WS#FgVC`CUk#Lu=Te$+9GiF|C8r>dQIA~At7hI7qU3DSe9<K<2&}^q*b+-
zGQ-{M**wS0cI-V@8KU;+*owKk-T6{-+*bdc;KUFTkigP4?biF_F^9}$<aajnGS6Vv
zS$Hh=Zw|A?%xONS&xELO@qfLOwoG!);%v1&`)uw^sz1XjaqH%fd&^WdJ&3=0vxrA*
z{<}5?$y@1(o728dKKM?6p&_F{f<drlA49~l5NnPP48}PRCf+cZE)w*-{Xj8~MXAD^
z{BU-@+SnV9I@G3}G-vp3@a1d&1fLo2|15r#p{vlK%6H(5jRWU3Wr;=I+n5;N8JytS
z<(S;DK=oWvcpF2CaMv%@_&{wfmp}8=r>cevciAjGDlsSUiyPye4_e1Mr_8))XZ*lu
zl`m82^gBB|R#$plkG_&qCRO@B=f3GJm##Tay?YO(x?T9SJjuPwZMl(3nMjs_$Hv>D
z$*cbOnyS@*ws&OYjxN|4{CQd@vnKPcx36ZqF}l6(>iGYc|JACM+3RlC@8>?VUa&B4
zio-|dmWGvwPgKua#l|R7xj?bg_gw?)zZ9K^`OCK54d0~r|0%-}xwSGY7?>Z1?F?*Q
z+bU_|dUb&l1JeS5gNh8UQPP}pw;HtEHar$d&TtT1IQOC9We4W16VBb{_qVaqo^8Bv
zajRH?&4h@VGdOe(9C+uN#ITBq(Ll`pVVA+hM=?uJ8{4|RzPT;2yY}sKJ%^pw9RGA>
zg-*_VdHG0F$|0$oUrTZv{WmY`Y-G79%~<uofgwOXL8UoCF)8D~^IN$LoVWe1cC3lJ
zSGMCqvvp$j#v^ZJZ}oJ3DxE!h)8C0QbH0S0tK^@^HG7`=L)99lZHz0<DeL-Xc6nMi
zzVm)3{-cSf{H^)jg=La$iF0n0&%6As&d4v@h=cWaYog0cmYIUn55M^@9ijO5t@ztT
za}y_}Ez~qJTD14ry06pQIC}!Ed>eY7m1tZjs9!dDd)}5S&1?Ssos;Ime6`f@QTP9U
z+iPa!Zw<}gpe$?tk^5f2<iDOz7K^RFS9xLONtNmoJG9Sk?lxBGR{6su$I<!V(Y<Zo
zvtB<?V){Az2bWO`7Z-=9z{Bawj2chW@0HtYG+8z?nRUxAD}S3w*(Zulw&(rjEIx8+
zSzGZ>g);HVyEkSU`EP%i{_l)rUD?zv>vEk#<Nw@zEU;8-+y1}5%)8FqjaVFCS;iQb
z|A^ak$)$QzDY4B{RHb7BXJ1&bb4};U24>4u-4opvv!6BX?BNQ{U%DV-`S&$@#DCA~
zt^alP_L>sI{VWa!77g<ZuYdBen|+C)uHxY|qwbD>RWHiS9t!P$SSRQkw5xxkj<Ck}
zo-C>P<##oFI5?)Z87mn2Imh4AoWGVWVArFcDQZD?Pq(^$EeleRYRWqid%XUVi7Zd<
zuj#)hDi(CU_-4mte}ki6Va1#0XAd(lGO97iNw6;M`XK4VyZOk6Jq&717dLY#J6T=`
zF0eA2o{}ucAhwG?amAIVOl=zT-`6(YJ!gN!dJDs{uD8;xS`NDwOkRCz+m4cyzYez!
z$5|}0zVgE9L7H@gv`yuI%`;U&TLf5qZ4{*Lek%*#cQc&vZ5(qNqkqMp*1bXe4S!^I
z*i^g`uwFPxd68LIe&d-d#h2E-ce!*!((Ki`M|+%@oz`|RaCDf&<htHIzM*FGo4Y}m
z8J;L4Div9<tuxNo-Fz-#`y$TT`4YEf{xs{HShOm#aJAJ9`8_UtjM987Ll|wY8uWC}
zvux~OXbdP|Ffdx<Ym~qJ=<-7i6%$h`R`i@~$=!2A`nR&d_eby9%e_*A)%f1*@VTS4
zNBgMJN1;aw3<8Xk6pBKw{qkq?PtU2EYx=mXvfjc&T7rimgn@-)c{k&`k{4^4{!hIA
zy(d|$t1Y2IZ{O+!{aLSl7KA@hjCd*Lxtj6+&u4Fn`0^z0XZAmzdaQoSoYJhsyJ!DK
zE8gC5lSBE@Q@MH0&QG>~{F|7tXQLELz)YuKW#Iy@fiJqWxdJz^i#N{RcIs-y%ZE&o
z$>$B`ck>E;II*m>;mP$0_9v<>*JNFO^?dSY6B%av&SZr~-lr@K4&^MHY7Rx7m6Dil
zb2+x|kgA)edY{K*21W+v2{wlUUQS48WVVex)?Kae(>hHqp)hiW#{MVo=dpe}IFGf*
zTFpUa&nNjyN42V!X|}C%UjC<H#mhH!x<SghEfZ=JCJH+&d;EPq3!~^EC&r}B7f&1g
zR_gc|+NdA5i`Tcs#L{Ue+k?|BE(`~R0**Vyv~wPNw<sVe^zUT{PC;kK1-xBm$yZOz
z%sHlHf9=1aO8zsZ--0Q<jftA4`&f%VEc`mz@7iw1?;h`%Eeh-nt|m-v4)5$`6JVIY
z)W*gjpdlK*fJrQJNt<b{#p8%qiiHf#Yd`Y%ux;)Mo|$qmG$1DQ<geobYuQVdzvp+F
z8R}9eWw5@YP`vBnzmM$gA`>H%wpFN4RkB*srgDMn*ncDU9qgrrBDe1^=~{WeMQFl9
z27xfSyu#B>3u+c#w9VdNmcQNTF7N7}>@KYW?VA_=dU<ElCvH=DhNHaO8jt)>b>4qt
z&qIMHAMej#HVB!)({FsfS&YwS<(JdejNA7XerRBqacBq(3jE!8AV*H*ds<tGQLjBi
z(1h#yKf||NGM#IG%<ljE<eGcC9!yA7<G2x!^7f2VM$gfCpO_584YkjBZCtHXA2Ig~
zXGmC(Yf@p9gV3T~osVt>1)Fhmh^St)h}rZq)O`g<`P9n$`Ttj~b(eopC|CTW_}c1F
zrE~N1|NmcjDKU6{`i?)k`=<SBp3*m2@Xqh}|Nk8m`QBZ<@3;T|i@fi<Z@qr|+rZW3
zi)q)2O>XXUa!Qu<$nb^Lewe1_ShG^;$Sd`=l}o#BKFnIQJG1IavZR+%{PWv_yQ(!D
zd3cv|>fTx3v|o1ry&JCH7w0Lj6Y1b!Vlr!AEK)Hq|3`TNlT4MksrXh2rL(V@7^E09
zHXLFJxXd6_^)F4QBi-}CF~-HhqQ_=_<kI@wm~dNbvM$$o#?EP(7CGk_Dy=eRsdi<o
zTJm1ySoV=$=Knr^i;-J%RMkS=Xwj#%L%*|ajx@iDGu^%7+;<yUM!}TSgp@zt)AuOX
zM@3FB4wU8>)>*lV$M!(S{PR5g#pbLE;_d%RcoYO0<2l%GmfJ2+`p$6ga-ghq?5m?I
zF9a1x7;U{P+hoUM%`-`VQ`Wx|HL-cOGutL!Q%>6RHAAZ?GFD@UuSbV*xw77U`CUoz
ze5=e_PVURznYg7?Fur+1k%pT8KgsL_=KqZx;=O*)mi1cQf1J+nP3U&PhDEyj6&^=?
zTDtr4L6&s~5_RS}aR(oq?$N+p{Z8c1Ua{aQ3Ee(imi03GI6|M;c?TP8*T4VnO{K&v
z{|{mM%dTa=vR=(G*ZkkZC0z<fj+$?6;3;l={N<~lq5husrTo`^Y`c5Z_e8^D=e{`~
z`8?Sf7#Z1_82H2cx(|N&e^(&IMTL28SIU7zc6}D3^vrOV77Ouv92al<xz)wnx}414
zCOB!?1I?D{9@h7l%fFNkHkID&7t8!WLWY4e+)Bji!CHP-sX4lqb5=!oOqQ-~xjs4P
zs9@z6&lhYzS12Ej5vb)ZK9~Jr|F&)o)~P*@iViafpXq0MX2HR9Z%6a`76yT*JB&M}
z7P<ao(EH?aW(Gq#(^cyr1y#lsnhR8&CpVP%OZxLHySC3ef_KjKqU>oK5||p}dE5CH
zf7)?FW7Qmq#g~<qG+#cz;8HeCf?;1v%PAwCh7U*YPwc$5$C`nKd$Vz0TJrbfY~}wg
zDwplc>@{(GtMR<Vi+Q<1d{`M<>yDUbH|DR_W0N!Af7<_ji5iRZvkb>KTem1Ct=yZ}
z&X>42uI_XO!;MY1qJDL~P7bljx!-w2Sf#w{sDf3WYVUj7)3SlCP79xlYFezf=Q7nf
z_JWPAgpGY8pW5xe??e?(JLT(IIanIxmHp`s@jQ7|<Mbg02Zj#z76z6B=6zN^2{Sit
zb1=Bo-_F2zSk__sV~%-mITQ|>{`j^&IFEaU^en!KKdYvOzVA`84*1s^s>pmsF5}BV
zBW<=|qiY<37alY)K2!=&eRRn}izDoYNnLo8@MF=NYfo}YU9&zXdj9a1bMLqpya}A=
zxcK1SPkVXppIK^|bT`d$V_3if1_ibS3XvaOthIX*-`LN*b92pw|IZm%nGY|}Ss<L>
z8SCMEJ>;*tDZ97q>)9>dYU&SoQXafhKD7Pan#X6gWRutz9AJ}vSR`{GIbsjna|a&A
ztu{Q4WlEikzyIeKU4H8Di?6)twU-X)JUS=ZwnFY9^W=Z~kFYe@<Vp9peeb?f70;;r
z<@JNNdOX29Cf{HU>T6($J1u#U%`=4UZIDg1i|Y5w%Uv|$9wqU-jJ`dcadRf~BUu(k
zKF>x5#tV$itWQ=&&tq~k>~G|MCO)yD#3N|xi31-j4Uez!VF|BS70{TFe4_8<-9&-y
z><3zx8>GuOy|uVez}Vnk@Sy0IWqOF@jfZz7n<C$f&zO_RqAakWfyqG1Nb=M?heGei
zWgBMO@GWY#n%T-Xlh^6~2~N)sd}3Btv-#s6tjSHXjqlv35jW-E_CtdFxgVcK%)e;8
z#HsvcWzk3Lsj5el>Rmot|3CdTH0pKK`Y*dWZ$EL33=?$c*V_Mo`Trut_mkJXZru4H
zt~ucICkHui1zF{8kuMD8Ia?pky~HDX_kM5~``IKJN#_YRjVmYeO=gig%~PUow&}1e
z=NZ%M{E3b$8kLT2<0;kpyC(2^=c63GU%S+q3x59Avku?ja3z_WrAX{vMgGirJxqVt
zO)XOX*WQh=J(gDfEk1jC^F7BCeY)=z{p`4-3#ztU4t)9Y)s5x{3Jly92E06L5@n{^
zN`;sHy}U+TeS_wdg2nq8j$S_c(_YTP_J)PV%AW#kd%w!WCQad9YjpRB<5Pt?gRl4c
z<rdt@n>nGT!ehbJ`nsKq`FFRizpt(Elu7L48UFP0TRUe2Rm#swaLxP7{NKXx;f}1=
zUS+0hZqH=Ra94Wnd{*L(!ee$jx&EE^t&gui#K6pWQbvM7Ktp1?$hX85m$YwXEQoBA
zKRdx#>s{LYjk~A1*1z)qoD(ek+00q-<?XuWSV0^4dsp`CekyDy&2w#KQ=BN%w%O-z
z$Jc&n5N>hE_W7^SF*Al=?BnI8V2+tF_xK+O1}A+!|L~&S<O`n}{N{IEIlu3N!CdEA
zSCu!fFkZ&>^T<!8y{2)D50~tZajNBicRlL;otlv2#cFR97&J60JQM;rg!oJquP3tK
z{*rTz&7>lkTks+O)19|&njD`rnMu7V^C8##C;Ls#UeA_)IqOLLukS2-^VVPGVDa6t
z|E~kXcW%GXOoogfC0@-ZKDgSId=O+3*<isSnBaO~!{4<FB>J9+#7)@1{rQ~KJE=OB
z@|=`62WIRyF*g@i`ZL#nkta#BH{#stWKjkU#@G)U+!4C#C%nA4h~chSX1PGhL7)0&
zk^a9C7WVrOF<KnhaDXW>flcKF=k(=^UN`bPKiQt!zr<RIVM{}_S@5@}4Jz~Wc;X)2
zKGNp>w`Spj4+jHInY##y)d`d(Eemc{R;<@Ov4wl8Q|ydi{j5_ec5*PyQAprtb@r2G
z{ZbHO^ly`b*lWe*zm`q;+xcJINuuzi^U{oMdpLF+^fQ^d>1j@^+S<-3F*ZLdVw%@2
zmiSbmm5}j}(Q0CiXR`kSC$*M0eKG|M%jX{C$-aEmc+c*>t;@`k&J}EPTks|Q#=H}}
z5qXY}&B9i^bQ4!dWZ-jT7cfxTQ6(@#;?x}xFXkDF_nDF=U8oUayTZESzw86U#j1t6
z`@eEJx@(x6F|JeKVH7_rz~|^WwZ6$&*HVAyZ`=OO2cH&HZ<_n=_+g%j32a>LEB0}-
zrL&)wt_s+&(n3Q$Vlq=_x6Z*61>weaT5`*|qR$)<Q~Kxdu2_-f4DU8yV-eL8-|svz
zi(6ndVQq+ztT)rkt|>|m3<^pLQVbko3Cg~9WoODh{pn%a$RL>f+4p&|<d&|)t#{Ne
zY4X^6gkK4n-d86kmb9Z&`QAh+H_Ntd8l}3OT|d?5)cz26n)0X9jQ2<^OVuw&JJU0h
zmH6Y$riUg?wv0WgcH`cj_A^aalO;acH)PG2z_V0$cahu7w;Q*Vy#1SFvi!{dWLwL|
z`Y&@<aLzk-AcLVvztNy{;u@)U4MuJE?AsnFIGpfsXqqGBVLb8F6X~Y;&s(nl*EqoY
zh1d1Va;D~%MB{eNzN4;Nez<I6n7GnlVe{^Hmy)+B|5JS+TPMgE^yrqn!BJr@v!pGz
zzMXtOPp@%`aN>?T@#Rbld!IRV8fu82+aoEO@VdP#>B$Qo29{=p&ITTdo5|Nyl^esf
zCSCaRr!w!|7JrqW&F5yY>r}_FziiOd((amcGTf8rq1p+XD#afXhP-V3+RvFZ^esYk
zVqDA?d7E)8;Fz|)^u+<iP4aIR8(m{jyZVEt-t6Vl_M27x2me_-y0(6g0>hVu0@k<k
zg?0tMYANVzIk`_dOFphq<Ht_RJimWyW{0r8S-&jQE3I_b%KbT;&gv|A;kfrJ<98V&
zku&?7ZmfG9vU{N^cV@@W*6p)Ta0SnPz%qH_OZj<H8~lR``Okcic+bsZkzF<=FjRuA
zKB3{#fd+;fZrsyLTLSXuZ4I{lV!8Z*s_abtL@D7=z8g-@<VuV$KRGPM{bl|E_J&mZ
z&xWB(owC=f$!Og>GvmL#sHqg2E%)xY4Ao;NuU0sT$tW<GFcdp*c=S%KQ$C))&LDRB
z4Nk{Xv5j-|9CEB$gH*d2C&}E;)l@g|Dq`RN)#jboudj?rm4@p$pZ)oNrA&Y`;%4Sg
zH-+Shai1nUdX(~I)uYf?p`Ax81NF3D%op*#rxYM7FXcBiB>z?YA>#%X_os1}>i;j}
zZs*_sy|DU`)u$)C_nCAeuRLy@*RYn|WdmOoH%HwZ!z&uU7{AYNX|U(xo>3FidB42Q
zbAx`b)9--l1J8Ja+q9M&u4du&R;cM+yL#Vsb7sy51+QlJ{jFbR_u<j|`hRUNS!6lS
zT(z9_y7TO6^JzQ3KU8|cbs^#X^)G?U@6P}K-*voG|9RdXE}LD!I;#(QHN39(v%CM|
z{KHa5rV8usqpo*j-!>eNO<#M*xp3aQyeWpF2R^2;xjd+xB7foH?Ug+n)*Y?sYwz6r
z+htLpU{;$xn{`kQ>&?vjLcbS(p5iy-kkD?SPx9YxrzZNEv}~6<duyiS1J(_<Zt@9-
zz27fl#1J;qCoLpgdGD<M`#GX=T2|c=VBlb$!yv@Wpv8LPPKL!|*~{&xIe#*#=ZM)f
zENDDicf<G5lj%|{p>iE&bFQgB_{6Nzz^a*ED}PZ?lrw?-=Mjf{_snM0bg#PlqLjn_
z`%8oBeLCE7e-6G1=X6neV6}eb`U}FPZ@!f99la_(dERc*C)-*xE-p5lCz8KGe_9*U
zt0Mv1qZr~|+`6|uM|`5^kKYgSgHD}4VQA31vgqH#J1eZi^Q4wmXl3(CTt2GJ;ke+^
zjknfvzrNq@W@2RP&WpaR|FHhxvD3!fpXzgu<nSnP@U}Jf@^~C!=HL!K-Vp9HlV5qx
zXPzwyDpmPE`CqPMl&}BCoMe1}@j=+VX^KDc|I2WMHab*J&6Y}Hy0l7BorQtr0N()-
z0pSAqKQe1(oIMtKjA!8w&eP3VYVPq*Lv1cD<PpiAbpFwtO_w{LE?~&q6zN>@&!qp<
z`CCV94cATlwWVhd8(YHpe}>-jJKp}fy{26z`MhCqeqnt9v&4%)uLHKsWpkyCmu{Bv
zo@l(&MYe%aU=qXZJPG@>!wVdja-6);oAqzH!;FR`-zL?7J-dGGII_b{hquf@;oY8(
z*PqS%TmQ@QzOa@DyENCH?Fsr*-Pn7l?_+LYWN6R<ji_iaaIiBnxZe?uudynzlUi^w
zrlNU|$WhJ>Oq*u3%-=1wqB?~~AbFmff^~o6y@k3Ayp^vk-*|PnPW>CZv|xTf*){`5
zEq4oxZ55^oUcq0&s%n-CsZ{UZ($f&)_V>MFRRaTqivoiU2g6@B#ch!i$G%RLkTIU~
z<wtLV#oF$P2iKY~oQ%D0?7?f$Xt$w(v0=ym{~H&ntuA?;uFkTd#{Iv_o&VP#?|t_$
z`nueb!-3Wk$L^kGUHWj%=QBLb3O<~lu5;x@-c6Qb@!WPUX6xI}0cToR@&jgF``;kA
z?d+lc_<xf{XPhd?zFxrSQ`>nX@wpp!9d`=1hqd1V&BTHRh8t|`+$}7Yi&!Is*%Bm<
z$e&{f{>iOb$*HE)k>f1!-)YT`@F@p`c8P?y)ZV+l^__dtWMR)w2kf6{v3!5>+l%$I
z<lC2;yIq!fPxEbIU~_oZ(&|{z)pE@5f->L!6m!2PCo<iArL>KX?2Ss*Iq#~##~*N$
z``e+OSuY%ZbJrAU_P4S}8EubWlF#M0A?en+xe8h3x%WPl#m~7g`*8l+7DkOSHpw4m
z+SzW-&%{hT#6&KBh`n%o$)#(bjM*=2kWJ56DX>DCxz3UE$9$dkEe!KC8!xnp|6@3-
zsJY%HO0>P?>&)k@c|Vk%|6BCn%9@4Vb_}QE)9rc}h=)$ANY1}8>)!{}2#J}>3#Mc!
zovk?T*Z%Cl)7f`;L^;2hd~X!p7i%y(x>hY*B127k)|#FRkyc%6CMBNGeH$JAO+Rww
zrYBnuyzMeR`)he%XnXV>(+TPJs_AdfnKrQg@D`nxHCy^n-^YNZmK(m>-rOMeskLGE
z*ZW<fT|ao7_N+?2fA94|21db}3oD%c!aJvO|J<kEx_VX4qU6T#{pNm+EQ?=stoK|c
zu&+D$ZITJ2{FQ(1CwESd$&(1>VrQJ{>sGhNa98k!d0IRg8<iO&ySTls1Z%G8T{5A?
zuw(lNYsYSBi+eLAW}egDfA<p0QWnX&&;MS%4sohs|G@FKnC<k51Gd@i%U7GeFJCkB
zg;2?+GxuxuSv=Y{_w(U>R{ojK7vJ9#)T{9Qou+`jsr96A?Ye^x(&h_geaPTp;52e-
zU}#vuz`?xB`uD!Hr4MFU_Xabl&I$=qn&idsY+9n@-Ii%qCpq{&FbFW8FOEq!J#*tz
z%>kV&RrUvF?zlI7>w)_m47?42F4n7l2l45fGCc@d{@~QI_POm3tw5cenD9%FHd?el
z?|kZ6lrV2<*A(_*rk3gckIHtbXX#y#X4rRnAKS$*7bI=k7!ww%w8(9^)A-U!v4d%S
ztZS&YqRI*V0>hk&2mK0rpS-`eU!`44^M!wJOvCT)j_2}PCX0HXB_tboAMrb=qG7Rv
zhn<6uNlTn9aCU3b@=c7#)wtv(UH+x>9smEQqGYw|?^oOl)%LWkPe0ps_+Rp~s#iPM
z9)4NC{BerXasdODHF05^WZpBn-~7bpaj->)uhU90?+L&8g=_KF->cS%MwoO&zmjfp
z$eP1__{~<f<-V*93TaHv$9JnVF>OB|DJ~-Mt0F+^*~5Z|9?q){b2e}b2naP?7BafZ
ze?wZ)bHnN%t2DFBn;h$;u8VU0<u!W5?jcvPYsPIojcI-dYbrx79y7Cbcu>d8yTonB
zcPYulDTS99XG=@Hhze!@5&R=bF!I^^&xaWUEfu6(V<&bB+Az+&aLBAcbGLK<-v8w;
zLb_k4sR-|!m!6)<d;j;uho$?jIhiRO5!iA5vi9u<wF~+J^=%)#+t+rEqx|`2lghtB
zn|t@1>D{W@u$EWYL}tzL6P<TAyzXse_<mWw;h|;Y@=I23j;gg9Z)K*jsegMYez<z=
z;>WIs{|k0}-=(UX*%AKoOLe<Qt3}pYMm7P4g${wuU1lD<8XQ~yPdL8p=X_;_&zI-l
zG*U6JyX<n>IAF2Di?2V8miKjVE^Yh6G-KK1i~E01n7q#8_N&`Ex3||ZsjX}_{A|z=
zBiP8<$tE)=v6SON@`Rwa&o|cTOV`vKy8ryFt7xdE#ezxhFD_-6n|!R^e5T3SCE$Jb
zMvvsM=0`$%S?`qX@A<3CuXvM(K|8hQ2AA@l_>i`L8<=@G4n^2H{gpCs{=2zh#*{<-
z3A-Ya{MCAP_5In#^=E$ezMdoRET*12k>~Vp*6IuGT4|P_9&_%q*}nKQmzL(0_m4Cg
z8aRLNR?~kyXU&2oCQUYRzO`0I6V?PXYp54p{`pP8gHe3vq@x<X=d_NA_HC5sKFaGO
z?S3GF!Ih8A(Lq&_zvO*QUZCJbm-7xQrZ%6qT%pOnH6U~KZj)Y@{4j@&n*%#l3%?yO
zYG>BBc_w!B55rzH!H@6HZ!i};%k8eZDu-1;;IW$h+Q)K=4d+kJzmd}{z#zcN?9{`s
z;1Q!{o{+$Mei^^*KOa7k<=gVkoLSw6(_`WB7jO2mPh{M{5OYQHM?(GIi4&JPr>yz+
zP&IA&yO@6Q$jk-%|EstBpKrZw>RvsreB1x&K0AcV*%$B7+%SpLiF1~?f#<TErfd2?
z<t8XBzP@W)kLQ1xclFzpoV31|p6p9CG-S+^vWoJXGq-{L>;-8(om#!-%L<7H4=7Ao
zcDPJoAFt6d$%?kV0`Z#PCK{_FZ@&G((w%f_7DHnF?l98>@`hz@W(<ssWwA`Yr3cUT
z|E-tFa$u`A{dwJmZQ1q*s_dFl@r=ulnOtP~pC>6Mk|FauYBz_IXQ8kBSG9MX!d>@c
ze%@U8CFx9K)7&rlrEecJGH9?0a4|T@IT-ZoG4bWyU;Xvr!l^Nd2G>>uBzLjBVA)$B
zv802AZNaXmapEgf^Jlg{_n32XsnX3pi-uU;=9PhLYR@;`-yOR~=b+jCcMS8kU%%e&
z?!n`DA>Ad1p|Rl%vp~ZC6F;AI_jrD0Y+!H@VbDHsfWbpZ>+?*82!Wt~j%^J3KRYf5
zEL{A{Y~y#G0%IN~#?J!xbB#nxjj9wU{K+?1Asf_{BE={3;<~*7<Fx2|Oz!yyRtq|u
z=lHjGO_5>3+lczqhULjw*V=#H{b9WNVb})2^j*c%J~Hn}4Dk+W&bD^&Vv=EHFl>$%
zEUGDbxPD9K-1YbRrYkTAuqVf>vIuNB=E-ZW_Ea{3({a^IV`gUN6;?l7%?x6fxrP4V
zvhO-^%d_lP%k(>6k2rWXOf_Kr6Xmlz=?v>@u_sTtl;&r?Q0rjrDfumCx_81a!wr%C
zhn^VlSQ*_t`PM^Pi&uYz9gAb*Q5MC7bsnWspWB<PnQk3mTKmrNhTI$15-X-651IW2
zobnt2VNq&UliRbKzMbEHu*h^LgM0k*`?)-eShWju*46x1sq{S0^t+|`U)idkPvjL(
zt(Uy?LHTCyvrAIDc;6r1w&-R3@q;0D8`Kia@141zp8ntXn3((Rzs-xBJf6Kfr|bE3
z|65y;uGjas7`4O*OzPD8dSiKmQdv&xloc!W&X^YS?ELsJ<VDfsvn%IS>&<?}q{<%j
z_nWwmY+olsql08(OO;aatQVb+ijVdR7GGePY`>DjoS%O|-+|1PHtYsrsYwZJ0#OVu
zth)D-12e)!`);L4ePc`fcr{63{b8OntaaCIB(g1}TFlw@9*FLazaaj%VCmhR)AC%F
zYdAP9WSD6F#Pfs8R-L+tjokYjxu<+Ot+#H6$c)g|)%8m>*8j9;e5sIi+{ba<l7))*
z)EGPGu}q(0&}$~6{&V@}Fs>%wfbaijPC0y}uHgLR11_q+_?91-IfHeZ#cR3OEw}O;
z85kMwY-nIfUGe1t=Yb+684kT4H!@814Jr&1?q3z#rM)<G&xw!M7r2~XIC;9=qUgX?
z+u9SCD3r71^LD=o+r0D4B;Oe-znZ>mHk>HBWBShu){l%O0t~WjVjEtF-ff9^Th}`M
z^ct6K3GOTZYQ`AqK8+QhlX&6RGGXU+Q@)B^<<!%^W@;y`Sns+o>PCf`wM4mf<F5J{
zQ;bgEF|2xL`L$_Dkn;Thfy)mnT7KEHTDa@9_s*C-eoy!8|L+?5`qk<+mv$wUU&#wN
zkZfE3XrH~;{)a!czQ)Jgzgqvw;i$m|g{Fg+Cn_$VJMdRS!<gyNzk-P+%*JAmMHueb
zb{yDb={`4%>+7H9lM@{e9jMkXGI-T`BsgJ0&kQM_c!ei7Bm>sV$0qL+OZ+%JWj)uS
zMXMf${w*-=>)yyWYr%?2Lq`7gleJre4rlN@dg#1{-SGOgB?pclfB22L^!2KB@*mc+
z>!h(39{(!(@6h3jre%DQ`tz9t8oeZTBpo<o&!?>SKlFM;c>kIe^?Nwe+T!f)sd&g9
z42hTiP*Ae{`hyjRKjjLBEij7_*xW34@8TP$NKLOND^|{SQ?R<ttiRSc(josNlgGkD
z&-X${I5qFRWjJ%(LyG^s#=K}<h1Y*Hk0^+*d$~5)UfF?(`T6G`C5}s;^PV_3A)$dq
zVZjUsKXvBKzXaVa-e11HNx>kZwyy1hrIUS6=d5GLF0Qegc*{^DUE#-r>8CbKti748
zcv1Gk)mZDBLgH8QKNzt-TE=>HzsJ96P1<*_e0t&EXP0R8qGXf1!FQjDIagMOeJ~Z$
z`mtT&%7-ahyuXqSo^Xf?B<xBMKJsZ(#DYzGe!a4Ob85n~8GHXHG>I;5G7-Kf(pPuZ
z>`;tY59@Ka1oM5oi+->deu&flT-WxdcdDH0^<M^S7Noo=S=@E^n)-xQF*}ZZluVKC
z&9itoPxuvU^>@WwgSzDlT^QKT&7WJ!A?9GvZeGp&PqgXVrC+lXuZnK3f6wFbzA1Wd
zN_D8(^A>JKSH?yW_a3GR77Ewi@I|HEY`kzOwBeMUkM4ylTax)JYOXoib6bZsH0=8s
zBJ#Cr_kMNlPj)ZrMSgye|E;5$_2|VCUJZuVc3(X1F-T;e|0_FtZJYSFsj=49@s}Eo
zR+t>%;b3Ucx^nrEiLENj8u15;^*1(1KC}xv$UdLNP&3}oF7eY@jeku4Qr^8^yr0wH
ze#0K;*J^?6$181`Hy_yc-rnliY-x@9?)bH-d@>wNavuyBtQe#PN<<76>dUM!bA9lu
zaSK;~{N+EQ%NzT{?}#s8%u$=bF=tbQ=7x)W>;F3IM`=wJv-@*m^TlP1yI!Rjoqf-9
zsHaIFzeqT6ZQ1l?MfXfSwQ~E|zRvb2`h6^S;`wvarnZJY{r<UW`UD0J2BzlTIYL4o
z7O*ieF*^P<lCbt*ds-y)q?$P)!=7;hOR@7IwLTGB|E=m8I(e)do7XN_*x4{~mAd2U
z37gd<rbzA7&VSgVbxKZyH$kPX!11uBHlMz1_crTU`)8glTKJ{G)WAZ|p@D;SLg5C(
z+i97>o^Kcfg#2TR)p{5M68Ha%TcGin$>QPTJGEzhODD|y{7B8a*5Vy=&=a2B6<k_}
z)pYMQ>)3MX%T<V4cK%_BU`yyW5Lg?|kRrZZ{ds=moT^HT70e3zk9J*rp#LEz)Gi>$
zglm%G>T@RfTc)1i`C|4zGRjujFjC^d92f8FzCO+?!e2-VialDXeJYUAf;IR{_A~_x
z0mC_^cAMhjzqppZFGyrB;*jlGHq9t0I#<X1?sk?1%oaz=drH`5>p5KR=#Ni`v02r*
zvEllyC-+=*dRF$V6ZF1Uzh&wibCt>Lay<W)?#L`|v0gYeqjXn@vP$tG7M&L628l48
zvLiEXm~RNLJ`nl5G%GvI)G}qhv7T_Kgub-Et?PUL%>JG?p-%l%zh=ptQ#_^$2|~5U
zc3%(ke*LJ&?4p*?B4%qjafSC{nrZIoD`b*m*)B~lU(k2D_M|rR&xpC^8R`o!Px~t!
zuwG`vu`31L4Ts)*`0-*hYyQ7ZF_v^bgM5}-T;FOsXE2^&w%MC+vP?TJLix|Mbxu=)
z7`pd4Y?^xJ!=HP_7Pnf}ed5Lbe)v$pl_Ng!D*tTxmCR)hTV6em_<vBNEaPLR$Ksr6
zm#Yt*ES$0CpZdWm*LE{IXUGY;%|E!S!fQ{2xcvF8+cI07H3JJS#y;x*ecOuJ+QI0}
z&OiS)Ni2A?=T>w7uh%CTKAv^x*$}+;n^1Z6VS~v!2YVl$l3CUGdzP__e*eugbGyV7
zRIaIeCvU!)XM3xk$AEDy55s{Eu19!|uqI3n788q2VeL{BKDcZFtHz1+z^D$U)m=<&
zewQj2-3k?XvSoAU<-ghO5fU?Pp7ThR7ORx3JY_1`Cigkk^4ZjKNreZi|3A=OmvF^E
zhD|M@(EGs*cOB*p6*XZxMU8(0=V>jpP<;N{l~bqLvm!R>e7?`~?#%g(2Ur!K8-K9i
zDLZB}x4ZdFRL=by)dHp+OM6N?jz737^oVn2Wr|i>qX5gBju#u=Maq8v!uwNDIfa);
zZ%gq5r=2oY7uKwlkT>UL+{>O`AOAcp<GP7q><Ke9tIloROPM&>XKgTj@F(I<_fC#r
zgU<MQUYyL8e5U_8*j%%%+LY(1gzD^aF8=nLoq>hHLx90ShC9Nj!t2hF$BwV=ct7$n
zzHQ0gx#nMK?AKRqhZz{QE)0EE?)XxPaq1y^`{TY3JEs5Ms=B4YN?xgBZGg4Vt;WVa
zA<@Kugo9S>X0m(TXEmmTCtv^i{B=a1h47sZ7D@|?Px<$h81j{K{1HEvzH#^WA70n(
z?L_1XEKi)dZXmlOI(^1IHV5<6&!UF@|Lk{NahGJf>LZwOz)$jLz0wJpf*A}ik1YB<
zZ&&|o{%uO_O^#&;cC_nRC~M!46OH;G&T;MVuEcV~vn<~jr|2*Lk)&|cbwb)5&WT|z
zrMIuW{Q5O8Ytme~>)8Q`U)Bj0DjZ%cSUuC^-E*;r!s}M6ia6}gthm(tDlfAxl5N6M
z{r_iIUE8-_SR<?d!m5@(_qX3(HgA50tD~oP@c%#BEmE&)_rLx7vCjT=eblQ}JG~ap
zG5-D}E+qfT7Q==_i?xd@<32x+&RVo=^}kmMOWwGbO8?{8(|B?J^xJ2@OsZGdqi|+#
zQ=shw-;|Vun%RF||BGtrUN}Q^dZUBM@s|G0lb6-A2LF1s&Yo@Edd_F7Y7eW<$yF{$
zh;<WhRTq9&{-A$FbC%ui{|}DMI(hbAx#Ir#p#QJg^f(vGw_j#r6P+iLy8iV+fvhbd
z|99z_uzP%-Quw{m@v(r#dRO`T2TpDM88>P6&M&eei&Gm{%>RFa_ebQ@6Jl5IeQ@5<
zeDC@CMD1Arpz__%e}7;TU@(v{5v)mkx?!RpqwUs!FFXv3xmI$$?&9gcET6N)sKL{v
z<lZ5fBfKeJ!+-Z2SsXm=Igi%;nD^mx&Sc)&y!$|Ez|Nq~|L6PS78kHc?y9wOkp7^k
zX)Wvj|2LB)^Zc0#4Gay2njDQgr2NmTDQMmoFN_Y;I`D1z!fl6L*c{jX2)yWg{>-h<
z+Xb9<ge<d|tJm>KexvlUy0C*&@BfeyWH2<i%gAYW=Zx5Hp4cV)^(D2X2RP(fPXsD1
zeatR-i}U2qBaddL3obgkO5l@RKnME)0|^Eejs^+Vp5)uc`eqHu3zBazs7*0=rf_ZV
z7sGE)s#J0qHIj4AHEtGakeanlt%Et?==xjhUvKe~S=-Xn_+gh#V)trZ9lo1KxO*%2
z+C7VXa?y&>G;*8CLj6PhEv9NIx{YQFxb#0|hDc}#g!ws#8a!)w%A?S*n(N`ieZdAY
z=C7M}+&y>g^M1M82Nqme<)|RL{d93s-q|^&jsZ&<Q%{t~=uGjL-k$Yu@uw3!tC^!K
z&x`pyuv+I_vC7D?;msii<|C|sp3i$}G<~j%xwApSdzBQf$t(TU682f1Qi>H_X1MFS
zpG4b^J2hV}WLz?C+kdL;X4#>xD`t02hUPTfe`**g%I%;UwM4#%Q$fO^kw=fMxt2L^
zMb5!FJ3hFEP7^9-ciUu8!N1tqa8}>84$V84-193p-bnE3z0mRH$3dCUDp}QbQ|T`s
zzs@eNa8WZzaC4N+pO^jbTL0AaX$}kKBz@LP)4s6$dVK8lJZ8b?MLRSS@0~oIbn$BI
z9I<;j&l}qXJa&4S=LhW#>a(1{`d>VD$_dMp2Q=e+gUffU(ONNe*4N#fMK+PU*MH1&
zVmu(MYhbhF&<CYC+5tLS6zY>?vd{G@+<C~sZQ;P6Ai%)Hu<+<Zripi^Jo@;9ZN+En
zw&h1}*j`zt+GXS!u>4Kq0rvF7X*r2gb}KVJ*pnM-^D22_THDqu3XL^(%*``88sjJ3
z%_>NI?<nW+aPPTxuMY>Vl^AVHe%i2+$1INJq3ypA=SeLL3XDB$EyAoqjS3P%A7u7P
zi&)6y%Za-FGk1&mSjpbUe5>Wju8G^78zcA|t7foieLes8xz26bjSf4$KQPYR7_d+K
z=js2P!P{;(hyFQKq`$t4_wcj>2^oza>L)U8bP<eZJhV})S6Os^lIb;uWcQU1&djWk
zl%Kgo=I%2;EzQny{(CQFCgr~MSZYxf7;yT3?J}>sP0fGys+za5$(}lO{t$<c5o=4t
z1q1cC;B`HB)z_>}dZhR7`_}6LQ#$Re-A~<D|JM7oh;?dh|MY)_N7DKhEU=v(&1bXV
zmej1qb9Z_VI9;!wWBya<=cU50Yi6weYm$8a&%uQ$GV+n#a{@J%nG5Ph%y)d9c*Whl
z`y}`7-PTW@_<!A`u&XKM<&;qMP!;w?YeY@8HQB2z66G&+t%{d<SJby6HSW-Q+xyQg
zo7)fj=sA09%5~LbuehVNZ$(j#b<z&mPboe}3-@hRaoH&3@lHWXv88JLeYcwoKTbBN
zYbeXuo_*@I{Z-x(&lwT5_a3O+Kiu%r)a)rkq0;eh-|LRg5dNJY(a+N@ymOAB!x_U9
zYzJ%qH8v|mnEf){7@%{ng3&mkH=|#8-WtYbdLIj4S*+UE%YD=4(=8V30H-kRugv=|
zbCg6}Hr%)Q?C*Q$EV<_$(|9A3c;KYw4+U4JM`z}Qnz`yUZ%~op6A9M2`BCFWUHhp{
zf3EpnYfWWD?`?0jvsldU#@2ZKhtVvqY1iL>U*9K~bS7_E!Y+TyL|ftd0|$-@I6Cb<
zusnW8QtVg7yZ@vnepz$jKX1EgkA2g=)Xm3adfQz-_wkrWvhOW#TwnT{`9>Q9qg=5=
zn0EUIgQ*+7?)Y(jDPKpP<Cb~Xj*HAUf6&VDuyQkR(lS}5jS}`^R;n^g3fiB`e|W?k
zbe9V*Kk~x5=3ZzItM@khtF<4W9NQH8kXeQ)A!4zJ$-Gy0pB?|Xw2#aAXZ*a|3@d7?
zCsbOp=!;4HJIp)5IHPgbG@Xr!508u6TIyViOb@!Yl{M=_)2o;VrD~GQz0MY_YxNo)
zeb8`tf7r-_zk}@$>!OUWR>J9dG9u13;ZHsFripXe@fj&xw0hwZ7I|p-5<~N(mg^pK
zo3eOYj{MKIacoHl4b6G><-<EyW?$pxZHy(_eNJna#6A<a@b=aNkAK_0==4rjnB~ZA
zVDQ3CsQv%zgZFZH1y?Lse$9UNrkt-XuNX@TvvmTEvTN(P+YY}}otrqP)m1<6J%9Ow
z=smj%t{v|@Aav;1OR1gTSJ?;D{j>ep8Y*_;{H%#S+tSR%9t29bigS47UC2LnnoHg5
zR&v1Qs)H6285kPYKWMwwy!np;ll3VMmQ=f91^vkO%?mHH=rrF@uvz;5(bre|mR3xQ
zK4dZ1{=Z+i$gvf3#kW3;;rsOe-+YeLV|)KEn>VX;`M-;^{wcS<zH{T}3tva6gh}@Q
zZ$Igd+gNCLW6J!iQ)e;-RkSeuUeUK{+ppe+?+bsttcl|NJJI#+Kdr5?k$XLE>e{~Z
zWk}iKR=&8pt>}r}`7*I9%=TQnzeg@#cmI4jW84l^^&di3C**_W=YEq2@z7Cn2>R$C
z?OuDWApGK#&wGzgTr$@&?5orPo<6~VhQ;OkR_Z347rAj^gFM??38y~=3?(n#a#SZ(
zUbyBaSZ^+}-ajx^<3*;T!$LkkQ-0<qRUbRye=O}k92f#uFi1*HG(MZcxlJSURdGS0
z)W7-|uf{11Pp9`b_WW^AVEQdtlC{rc)$*48(YvdR83Hx<*k?^G+n>o=B4c-AX3`l)
z)=a(SUD{{rIbuKXJmJ%j+A+Umwt__wqrv<y^U5RB)f0H;c(=W;G32ssY7=4Rlh}E7
zf7jy&Pv_dro4PT-<cgevXw7ryxnU0$)IEru7m&ugXqB;5(eakE>R0dFaOd;V{ui3E
z{Qbr97V$C<Ei1|R-e23<DsPB3`71n|_>C)wlUd`%&y|G^vTG83_D9thvhx}GK43c_
z%l6`N_cv+Vwa*f>Sryuv&Q1C9vg47S$eAb6Yl^tOeXz_uY@zTrb*8V-fs!*SDI(d+
ze>d1<?2nL9J;<`CS}bhxg)@ij7cNcN^uY1AVv5F_7E6!y!7fXuGL&>0RBp=g=44ts
zMev-F=keEyuhlmDo#*>eVd>y9rAapU_=Wom4lFv%#Cmhl;w4undFe5&Tapvdu=q!q
z;F)3u1AbA%t?BH(3O;(0SF?gzp9rkDwOm5}ke>7&wSpIO+jJ%shWtGzRO!d+&@i!u
zp<$koj(LaFmpzZN6GU4+Jjgq_L12o@x>im-14e0$TcR_#pRJH}SnxhS?aL(1BXQ@$
z{OSx%w|Cp^S`qldl~a^qeP!4F7vfya4skQQU&NQ(X5zeJ;J{GAEW^^1srfvl_k+oS
z(|b4yb#@q>FDqhOY@j3I$#Crg6Ic9AVVmX4dVbnm`)1*NadF@hotufAvxU~wi+VST
zJ2~e5%r#zoKj(*oz@Z-k4Gayd8yFgvDc`u`Wx%7nYsRMK60>*TuVB{TRDI~T^Mk`@
zNzHc7b5-wuU6*U>P;7fTf7(ti?Z%}|=JPJ;Jmr&#V|8;lAN%P$gOP~j%O)|0$qbAP
z4H|wHrVPP*6Q>#Q?8x^^I`Z_umjD)Xo=*a<J49<I_$~SSkwfWD%Y-{jZ=U%1e`m}V
zoO(i~THs>La-EK6r#8q(Gk@R+_;RDJ#Je#%Emh!2<AtjaX9}}lt!di!z2nrj$-%h>
zvR_^-yrri2fa#Ef4pU<Tt0POW$-JtWDi&vsZ7hDKdLVAk=8HXNgeOJu+g&y<m)XB?
z*(#~bM$^N#JCZnKG=kQY&)#l7Q{z^xj7L=Y{JpL_1tRw{{AT~7@$le@NpD-{m%fZ%
z@L_-c>+)@f7hJLBek0CVC&S|X>`op3y9-UH84q81nKN6%sOHayNf|un4m(becCFjC
zsNjF%>;KN1`!pKoYk#n4z16hf{>MtmC5Pls)o;?^KXUHlH={fJ7y9M}GCwZ)zh>R^
zUn^EH_sM@gd9kT1$$Zg)jLI^JX6dzCP79x7JW$Ma$NTfQ>v?s8+|?gMx?|OD-e@g;
z&ug)&>YP>YwTCg`YRr`fHa>67IDX<ob()>v+X+S0DhcJ60}S8G3bgDyU}6(j!)S7v
zV_~WHf4c|$M~w_LUTxtoaQA*+$8>^Ah|OYsAA967745l2lfy+5FY}1je!Wn5RGQJk
z{lT3BvW4?!woGhX;r&5hs??yi;lYG1J>4IQP7i-f@BL7|N!Ol5@WAaOXNv3=DR<Yg
z^9oo>$xN$U&6lFVyZm<O;lHnTbX+YwyeUxN*2(6knGJK4KhC(}svazEB9tIoFoW4+
zM{M!&+Ez1%Dw_rWxfRbP`L;DY{`G;6d%@f2pwp2|Tdx#0M#*~4&z<d1Z^*f7ruc!D
zizRw{k{FMMve^Fm)N^1;`jaxb;27Nv5n|SjH78ykRO(}6+y3~XT0**w#HaJ@=~uK@
zeLQ#QwFmzmmt&4!)jyZ@d$asrzh?j6<^MKLxV)_5WzH7vjSu>r4{S-k$3116Tzz+P
z`03L9KQAuWw^8oFwMA||UpH$oh;nBcep*vf&HPME)@yd${6iC*npo-_3xDxulznP4
z+_G)qQqRd(WQ!8zHZks5mSC{VX~(`R@{TGS)||+fdcM6`X<DH6+@#q5|DyD^u8*zv
zdZqqW=v7+hY4`d3t%B?SKVJ|QzHVFSXU*-JVUiQLtrq|PJ!5Zurbo!y!`IFIuix)+
z5DTr}!cviF_51hxw{DKL_OFj0T6t{h`v0#*AMSK;+**@rulx7-hZlt=i%w0OEv+5<
zf9r(_if5mnxBU0i;br~z8v_4d*MBi-JmS@1Xa7fv;ltII{q|=La0Zy!{V|SMvqvms
zqpSM)^X~;(I8XkVcjSKJA_3DI{e9Y}HZ=8Z-tn&T?d%n;91jaOGhLXLCNF36RwOwv
zflFcifk$td<k|QB+qb?-fFs}d*@wdX7A^++53g6f3^CpD|Hb;F7qnBO^<F-`=gCmH
zT{!DT{8=T7CDz&>cW|{I+i&^dr6o%VmyJv`(~0uupRAl1Toen9B4)SwZ<b&ETVV2r
z-fxw<k>RS>KDRW7E;KcN5$iHxvF4Iw7k&St<xki62VCnZjBowZF=K<2?1KddMfnZ*
zKQwTNS$QXhYF6FOO}Ve6tm)fl%=&)9hJtl_drVhYT+7l9>yM6LNcC$GExHhIwB|>`
zM_IMkzukZLF}iRY+-MILF)({OPjsv9We$fq2N@V2FcfvJGvBJy_&4qNgKOPV>}m&9
z-a8&r&{<~RnsHl{`M}>0wcXsFy>IVV^xaR?PJQcD#&<)#$BZfGjHB7VV+@m+HF#}R
zMN3%{4fbS}NLsKkFex%9Bsk^zoDk-G_hiA{&_xeUSVS#c(tZ9RugA2*avCh^zYflw
zD01}fy`SgmnrHXuuaa)RuAXZ+%YY$#%H`O`XCmdAXCoF|Rg(C9_~8l%?gn-)4hD`D
z50hpIr{7w4ov&4AiH!B3|LrP5F>2Z^`&rH;Gp>DaUTmLq?C7!$8S|Pei`?#~WEcf^
zr1Z^kob~ZW^}Zdp{w#85|LsU;WPG$|_NM*+bDx<{{{JuI{;&I&@3VWozozQY8EUan
zu;r_FfjWPiOXM=E!Yc*Ow@2)+oWJz{j$c*&&K_5;+Fy7up*7F+=ik<)hXS<tWmykT
zVzhN{y#M_`<TYp2H{Y(WTy<{i@`XoKwC^z_NVSJ<fBI9Sv5fBzPc27vg7ud@D&htK
zAN|Vawlb8;Pn;g1ar&?Ss{Z(YYgcCNQ4P%e`9Ud|Ws&qJwFedJ<!`)Q!pmC6=6%%l
zlKABxiA=5uO(B)OZ@!0!PPK}aGynhV>(!K%tS1duO1Mn;7ysxC<Bpt!l*5k$KQ{5r
zN?aBAD17;90dbj=bNRV@8x%Qb9P(pU$Y^#f;f>kYQBaxUqkV%bbqa6jsfwkmu7v*$
zP2`uHVtwz{((?yrNI7l@kL=AkR4Cud#U7(_?#~@-?gp2G3S0Icx*YN-&PgD}MNss*
z!VXD;f+IT^r>!}W@R#Ru!i}u8zR9AGe|2Tu2%XjXT3hewT2qD-way7DJ{sZAT5=^e
zq;5=%(fH21@aWU}iHyfTHa4w)QD5u!bd}&4mnSnCngR_IN~H|A9!N`FW@0yHwt7CB
z>EUF)4J%HYJSbPuKC&b3<kE;~q1rAF+kDglA~L)UZ5p+-4=*ZCK0W(QwCR#KhhwYW
zA2r;uBtTH=&i%Gm`~S?gS$gbvxVZSL>>rA+e>Qov)jV%n?0xUYm5_=5CqMf1a=G>A
z!?)$nGc>JOrBm;HE@GPN58bbi^WWdKj@oh1p0)m78dJm1a@qg?kMceEvU>T-rzt#D
zJ|{L*gg^aXE9JLu$Eux=s%js-Q<rf%QKl4|*Bozqe_gbxyZeRHC0~qWDz8WF^D;@#
zx&QST!^8jSde+a=qs-o9@8L{HP?>fvLEEzZY>G|&_uKoc@?S0781?^Ou>3F8s@HMX
zH(U<re^a{k>sR?V`#zu0nx)QV^J1#5M_2dGRK3o#pLzo}pMJCdk#^wYhuNRI?^{25
z-<Pp+Uei5k_jei|PTR%fWH!jj_5C^h<4@Qug^R75o~l|d_dmQ{QMReCGe(%bF8>Xy
zwu!br|H8t5yIu)1zbJXJV1}(=$7|;!nmikyS#8TVUe4^W!I5!F=lYr&KB<Gwedi|{
z#U0;sV#AFJ1_o{!$5{*wI-)<O=vsX(mh3*-^QN{mV*lTwUq@qOKH5#ydjGoq|MBYZ
z5B+^npa1_4Ka%Lm^y-i4uWj-Ff9=1fljb+EPP68o^{e;ht$PHue<TL~=qb;>b@bUs
z?sY9pb~0&Q!a~JDj%;#Q)~B$POptD1xVNXZG3VHu{YS%Hr&){cE#0<w>zbnbA}gz&
z9*m9Kc<NJnxZU2KOK+SeE7=Q2PQ4#~JoUti{WZ$-CZ68@v-Yp4$Hn^h(rMD&Z41<@
zR;RsxvYvm|zu*0$+ZTVlSCymSzPWVfyh$%V{FUfjCvLO!>`nb1IrUpR)D##S-@Uy5
zxBgphKcDXL)rL2I>)YzYY0GNowoJdDztQDGe%P<;SMUE?wlMzMzFo&PH|`1AGsFG$
zdf~5GD^$-M>wUgsU#n}a_`a?Q;Vg!S&cBb17oF1+_VjgVOO5jX)rP0D7i<44?5;^K
zes#4o?&B53k1>0EZ{)X4Ie-7+oL{yPOGUS~*7k)@w|dpCYxV5&>KSWIG;7MQtzKJO
z+7-XHa^L!|Qw!!CU{JiH@!R9BW!=3MBJZvU%yxb9;wDGNt?XA`?HX$i{JP#5X1Dcg
za*Wc}t8uNFh1z;YweN=?uP=?be*Itms+{o0*{i2ttj=3>wvWeEf9(<{u~~I3*DtOP
zZ?z2H$g#3jIfZ%u>UZIZuKP4ESZ+x6Pb{yJjQ7gCa@Ojf=DU4qHmBpGeqFP!Oqvq*
zu}*E*@<5j#TYY}TEj<19?8<|&*=PSGF9|tTe$>t|S!Ckvc8$9(A6CxJ{I}3A<}t7J
z+sN2CWy;s$Z<idJ&Jn!)$Am~pNx>rtK~3F7KX~eW-FH-WO#G^^A@8_x`u~7djgzGY
z+fQs0%i)Z^t^ccx<9Cs_{*xcUeW`)&lTw1+CDcPq^gPmDG5r(Zd(t6X@-=72L{qn=
zEw3hj{9|;SRsQO|kg|!hUq?LsGB^9o`bRZh`>dY^{u95k^ksG2QC;!p|2}JfkGkJI
z-K_mjQRc^lgP#se-T&o&_jw7^pY^v7m1Ny|Y`UhVl*?sHrl9^UgLOK;U-s$Uc68nJ
zLg@IDn5J2r*KcQTJ$!u2HKjb;Gog#cW@v4Ax+J_qMV~8@ZP|qX{eihDyuZ2QBp0Ng
zIa)dQ%Mst`yDzUFbI-of_w3IK?>QV_&RXyt692{Oyr>}1Pkn{<-LunMe*ORaE6iq1
z@A;O58Gp}<KEHBi^)A=CYey97rmj6H{37X=)*bt>-zQ$P#l6Uu@7rgf_er^C>7o#K
zlk5HKudkL<e;ubUx>R)Ek=m;EVsp;Awa?bfoBDcvxa<4My7E#@#mLTY+D9YY^d>b4
z9B8=ul|O@BLj6;lGPnAbr)sH9CSIFgnhDr!-aO&LdZ|XMyvCgFHPTs$%>T?=w*O|C
zudS)-n3n!ug8%We#HeKPHM6#*EjV$J%k%iV_$IrYCx@4C<X3anUp-{>+DOoNrQNwJ
z%JYJ|)DB;K);5Vv@Q$mFJ+~d-%e^n`g(OeKN5w6e`dUDh?fZnkCBh#WuWj92yHjD2
z>{J0^t<&mN<^Ni>IS-y>mF<!Ga@|Bn_wI+O@jb^&{hu7!D!*V;<oyqqO`LasWew8r
z_C3B))b7M-U*i*d68`<uO?v46(d+d(iKP*XqP`xAUA-%Nz5bMC<-f1Ldtp!*@uuK{
zrHhNT#>4CFCeO@vOy@nPSpLtx=gH#3U01eB+MQ~ZU-FSPdX8dR>GRhc=UkO-UnlFp
zm$Yz7$J3OvK@YrNPEEMG>u;^<@3JYGx*Lut<W6Ndn-ybyv*(4q_Bqdp2}-@SKVEUQ
z7}&2+fA%@_Ykrf`<Zq|{-EAtWDS24*?9JPc0^dp&-{)nE{>PCqi$m^M!{t?1L>;-$
z79amJ)v9W?uWaV^<gzoe8=n-X=`&Y9S-AF|Syol<r&lazt*W>VwV9u?+4j-D^WY6@
zt<oJ`zjBJg_6XfG+AEW~AgF&`>_NR~$(5<RZb{2!^IyBZdT+Fq>$TC&ucsC>-LVgU
zI9V@LG-~bYkl*@$lPl!@9+Y4A_7Ia^$m<6y6qtgPI2(>n^nK`8y0Ls_$fgH8?e>R6
zKUye1t^LLSw@$W0%5lYiX^(~mqo_pb3Nh`MQRn9i^qQZ$`0ZyvbLTP(=l|>ZK8kB9
z)_#w!{*%XM{5a}w%#=127NaNU<7e}oIV`tu<){DtEj#~CFsn7$6#Jkd$0|nPp0?<*
z+c#7`9{M|5OG&@xg!e&tHTJ|sf-_a(WSZaH(|=>wvwEY=j5NddRrl17{(L3!H(qwS
z>dEMz8@-*UUAr>ZC~4QK^^d=AwwOL4;lTM*2DKNS{@-KG;+rtPgmq6*S&mkITYUAT
z_`X$7;|~4Tsn%cgcj*lc)wDBVoV!hpZTsfmIMUH@IaJ2yMrKDQi~6tAf-6^wU(LJC
zGAZrHKhExL^WIIZP*rG|Ao$m-?b*%xq)+Qbq80CR{<E{%uw>)k?wFD%zhg3{t-5<@
z>C$;0Yg=lwcHb$~3YP6@IKBG;|Fp{bdk5WSB`Z%2KRW$?z!cS(>3()spX~3xAwB89
z@uZR^3J!Unv^ifg@#?RCxIggy`Du@zuRp)}hp_Fg`h}<7FQ0e%|Np3cTce^w9o^^M
z{gZxBaowMzMV#f=4VRh)t1H_DH5clIgq>XU|K~bw+j`EJ-&^A*>F0e|C>6SSJ{Q;V
zv-c(nNo>EFUdSRLcCn1}q}{}qJ9&=p*8jfrie63TWDZvuzuq||_3Nk4iJi?_qwLOk
z!^ZLJtk*LOSZeQxW-kje`g`AD!G#~6IU+nhS!d?#+uu~&UbFc=$DU7B$q%CqS|&be
z+fdK6Q+@jVFMhpIotO6IuUH?yo+Y?YZJx|@#ig&cS?}I4T+v<rqgR!`KTWUu*Uu$~
zgZ;lhRN6A5<FH=t-+K}ZC-iNz*6RErBGYxFhfCdhk0RUhbH|$=S)?3`oMWML%;vZL
z^%duM@^$ZA47B{H!y&Ky`tQC3eepKm6Jn}X54`rS6DiieYu<F|oQRP-o4(}K{kAqg
znj-arkBOCf#Le%R(V5}ZBU3st{w;S&h3b(>|80fck3VjoQ=v9b=Hm0eAKb!N9l6$P
zFDk#jO5pSBMSHpa?GK+A{$VL!h*olb%b9<xd5TpV7~IU)DzGO${PE*p!oS21VLzKM
zUAb|fPv&Ys$GZK~|LceyJ9tnzr@V9OW8s7&FW0`4U7LE?G2Kvynd!Ul(&`mwohJXg
zyT0{nU)A;#$+<1Eb<Re?hpx#?%lNTlv(eAFsaodm{|IHgb4>l`>y{TP`Tw$(kN3N>
z?5fx7M?V;D*zOQ*@Z|g>57)p(#<K-m|3=4zmme;u(liina=O}7w|vrxi&j5vrvBLL
zs5<MW;dXgeR^OIi>~`F9JWI1){nM;^x;ETpulV1>nn}<0Pj4@8W&S9jSY@L2{>S6~
zr@E6|98UlH#*zHy$4V{7T9%p}Nuu$~Z`V&h8UFp5VNuNwC1JkboA+N?nELn5gwE9|
z%(tg=|2?&T@A>}|rrKGpN{&&ln*D0tf+_1IdRX);Sr#$B&Tk5eJM(7o_jP_z`DZ<L
z*gs8+ZJPYncK2^xnX2p8w7ULAU)t&#wkvekmfbZiiW8D&+w5Pri1XwBm=9a^e@$L{
zWX{hoLPk=trRJs+?)2<EC^(bpOy;+nOTLDm{=IsZ*~y=u97Qsh&y!ylxv5v`#oCz7
z_dj@+%fCHa%T*Jy`pD|kZ~85DW_zc9+yCn7HGaX|aQ#X9r(chqdG^=7YY#WS)!J(R
zch~BvrK|eZmPcP_ap~Rqul)48eL?<nU#-&&5iGTM{WW5z!sKH!j($<UvBtXh%lfYC
z*SG!pe(mek_3NV!Uhn4B^Wai&4@$24z3c9wlhY@kboKfk`0ba=ndCXQuB&a^df2r4
z_4KQ&elRew?r@yN$l%3Vzp#AK|Gyl+rgner7W}I%Z#B{WZ_%r${Vd_Nzr+7VCL6}Z
zbFBVb|NqthZ_9G#$NzoEv)}&xjPmF8QTz0ltd0$MTfJ+q9jE0bsWVgJ|LiJ#(f=w=
za;|>uqot+w5*w1|i&rJ`?Tu{N_3P{F;;*mD*UA3ds-AsU`pcThR~EJ=t~n&WGClIu
zPuu^C^QNb--;y}#Xo60i+QY5cFM4P59**kz?4=g3X=M58y3pDX-O11YKRcd!c#+1_
zy;U2^e?3+8dQ&u`>0s5>`&sveWBgxFS@K=Ciu=<2@_@_Tw||$j1i9qJ-@LVy)#Z<W
zy`Ht`_gDX3{a;s8)_>#e)3~*_PK#buI{j6tJmmD}eM{GOgoUo2akOrqYSQ1N-7ejG
z!b?xT*!cVB)fwLFuTJ+__c!Tp)!v9h-o@G9U)Rn4x@zC`*^ffI_JoIVD&JqY!PueV
zQ&s7cMuk5g|KEQtUUKW|zPyj8w3|!vf8D)jt>^iQ^Xl)2-QoMRc1{ni{i3w@VeIFt
zi|jM}VxRn2e@-!P_TT;CaeF_x)NlVA^JV+%ki{HJ6Z4;Z`p<INf7ZO%Hm?Jz=7tBe
zbviQA9e=;kx8e0|=sLu5v;VA5+WK49RxbMeeyf62W_#J+Y?<xrbp2zEw!aRoUwz^1
z<=(iig5Jrj-1n2t2!HC^t1f#<e%+d{UqjLie{J*Me)o>M_Wks4uG#f|zr{3Ta{lfs
zyey^pgFmrq^V@j-l^hY_8}3!Sx>xh}<)5NQlYWOR+FP}1b(r17*e{!9Yu~?~bmQ0S
zecvmZ3)@pov)+D~*HkvQW2JS?(k&IQ&Yn$v7F3n`BW|kJtHo&>4Q8LH^Oygy-QjS-
zv|D>`OqqP|czxban@XckwZEUQHgOKxf9v4-1+K^M*A;%Xs@q%OeE*2I->!v`|F6YO
zP}zN+XU`O6t@zm58Jn)}{!~-{<m>g)!VM=qszc3At(^X5%gcZ3zi_XyxVgE+aXI^!
z*Cl@s@{}1*Y&~}J`Rk}^mYTY-u&4YHW->G0ZOx62__g5X^sGOBSBI~y+w}66^6ss5
zv-e+L|97wbgWhi^6ABM3yfM3E?PUw+@X3w0qn3QX`tP8^?2N9z<v)F5G+C!z`~6Hn
z^ww?lsV{@}-Fh)U{_mRb*FjagzplS__0>tu_1P~I<>t*Vkx~Df>VJKMm-FAo0BK3X
z-u?GqY&<hx@7027GN&UiHl4|ga{P4i`TzI3f1mvPdv+!Jy3cOM-4nas2&LTLA2<D~
zmioHgVe7-ce~<aUs79l2Z|dDD-K-Dp`6ly&A58e-_wrNq|IJ1J+qSK=a?1K&*4exJ
zX4v*bB_S(DlbiZ+r`|kGf6ad6#72p!UE8B(X1x#D(HjwdZ_?Mpr&;#om(FRImz;e2
z{NacjtNjBW&HeWO$*1q>7w>OZd-Cu9AB`a6vchH0zP;UUDtUGKduPK%+qTSSU7~9x
z`?E~AZ^FF7l@-+?Qtz~Ch4oMG{JegFTiMpaPlDUOaa0x^xo$q=_pYypzt&fMa8oXu
zfBoyBJqppG8f)J#p8h{{YL?K}rSrvKoj+gxa{fiLFX9^<o_ZQhRsV2a`~Sg)iBtFQ
zQd$3OQI>U@;=_MZ%3&*6m)0-67Ug&Sb!=19wk67|<i1YpoxW?s`(Ju{|3|EP^;5Mz
zB<gSJuVd>KIzNi7i)L|8JQ=q7gZmQwW8tkHt6yp@T2@(`-){K0y!+{`(1(-G?%qG`
z*5vD7!#L*KKkChS(4c*|Dt6V?gFIi#pHK6@^tb$V{{Mr5SLS<2n{4?V_E)%IGRM`t
z?)$5Me{p|RP^+_R&Hmq;i)|}ZbhV1s$NXRG`Jiuh7;o?E&B;Yu_pjge`v29=@E89!
z-rC=?<Ee99)R)N0zvs0C?ys*~`?K=luhU;@vv021|8H;0l>L^s|CHtPU#<S)y<fnu
z=7`Dt-}nEXa=ca7A1k&#Y0JO$_k*^-`ubwWeAAMhb)EcQUv_`-`oHLqQFZD1Gd+J#
z>Spzw{@|kZ>2wVP1M5A<S&R)?tn~|Xe{Brgx$2|T&)vV@X8&3=ZJ*uyt;zd4SLr`_
zn=RP&>Sgw#u>Ie|KXgT%eEfg%`Tzf}>(=VVt_@#Xw>56-#lNe+<R4Am7_sem#k*^}
zqxW9v;_8gfS=l_}eVh9)m9^79pSiH|L*VOHcXNl^8z+C6pQC%(L&${n%(q3!${te%
z*S6Ll>T)^cHT!Sy?H%j3)|BV%n?7x8)ZZW#<7I)7#_rxFhM8ekS2!)tP3XGBq^QS#
zIB)_(#*OLgCpz-hI$7K-nPzq2R=Ifki-vPE3bI0*Snm6UtbN&--#)k5yY6km$=BO2
zls$;Km{O6|D|jJuxo1a5*fJi~<qM-1dV8(S=G3jSzy8T{)upxHRyMYD)(8LJ`t#S8
z?RUdgieH_-Fw}c*(z>m;5@tj%U9|Vz8)Ny(YTr+pQ!m}xeztne?xULfey<LBeqxD#
zXy03pt)VTu{=WSh_BZ=k*x#IzyN{-=EqWQcKjKFJ*1T2m8NIhQzgru<vo?C&mXDXt
zd{%$|zxJy6tLwYAOh4mZzp}9BvhliuCe{9KD@tB(xNm&dBhOs?YsJ%Y!`u6_p1;0g
zt^fV?y5Ae`_ROol>b3sUx~gYyv(`_Iz5hDm{QcG936tlSmKEt<uD|uL_>A?by-^P>
zCUQH8UcLJLenQLQh7aNV?+@=gC~vSLbyl%8|7|1jo4tKGD@y%%FS}jZwg2nkuU#>U
ze+5@xUH)ueYTobFYk%K<&Hh^a)zqA)fj#w~n{uDdy>#$?qMhb3p2}~H)g>Gyiw}g@
zaeOP><hIk<;%1%Aq`yb|A9A)mQ4|f|5Ui>BcFmM?&R$wyyZ2w)7=LPwZ~f}Jt*6!$
zXWbXRbv^5PV)v#j$*XqT+7kcYc-gz4BXil}nN#Po_HW<v?zg3g*{uKnSzVV+&n=s4
z^y}e`wD+r1U5)nNNG?xU%#x`z^=wT+ZH{d6@`JKxGW71g`g?k3`;GX`b<bWFhOVs*
zUww6T_{yVN^ErdIoV)F4S10%TO{@@e$%eCkR!zSC$2v*#bpN4iXA0E{Cq8Ake(9Ln
zx*EYVlU9qrds`iK^7sAL&nw=&sQ$BVTOspC@%)9y?&o{WJ8aZ<rI~xboaUn&;T-e+
zPZbXf65}k?Z23G_|K%O=+m~fFJHB1<MSuC1A6oYeHP3iXJue?St$N)*<*)TucRyeK
zHY@Q~-K19s6%Ee^WU|-JPMZD3<(X-brTVLujkip9uV;zcbM}VP-PNz+!~d;y`L;KR
zW%{Lc@2?tIcYfVw{Z;t$|2sml@0-eRC~cHv`Vc>(f3nKYs~^0*E*iwPFQ_bgzwdPU
zzpi8Ng*A36mhZ1!^z^2ug@fVa()h{G|6H5&ZR^dqyK{ek&H8_BcjVRA(`!$@mOA;!
z|JCy+r+!Ab{CxF&@2t<BzrKXM70$mNUZj4?ll#tnbC0#ZuKv`t|GL(WWA}FZ2iqR)
zo&7=M>z|8X^#6Qy?0fw;vU6tl7j^!_W!tZAKDuqm|3%*F_kXOo)+1r^(&|%X<`2J|
zgo2H`wk-Mm|8j-i{A0YkUvAiacwS8Svi%<-@0^vAG+&?g;XwHtp+K2m%wM{nO)-eS
zb?a!6)Akn|YxTDNI(RSs`jc<Uro8iA^G_$QfA;BMrP=ze2iYGiPFAa2-ux*$<ljk0
z!v*tSeEs=oy0Y3oVYBuYv;B(=f0|FQ`y)7Qw#l=#^-fxof7OU^o|TuCnDt>n>zTqA
z6HD%AY+E0;KI}u^Udh#0@9&vk7xd}>Y`4aF@-Y*=fB%2bZua(<TJ(LLz4yP=82w(~
z6zRz57(4Bq)|IbcbN?&Ob1Y&0b@aFPzI@kJAKWrUBA(vT&KJFQG-}6PkJH?rej00?
z`;-#*pzmg2@qWFw{p-uLzTdc!zxA~K+X|I4Q|EB4*&n~iGJ5s=2{SiWf39fSY}t4D
z)xol@y4H0$Q@(9|v|?>Y(EiwaZ$DN4<@m?(Qt$fC)$x_JQhFyUj{m<qbJ@J!_2qq+
z4$JPGJnt{Z`uh5+hz}jBgg*71b6fXMQAqs7-d~!Gzsy$s{wwx;*GBKNVSl4mzqHDG
zKkI93eZf5z28Nu>q_o7uWCcS#BMSxwCL2wKDH`V7yi6yY4FY;pBwL!44K{7=zOQFg
zTxmM>n3rz&?mN?3UKIVmWOu*$i(S&kPw#Ke{`{|h^`6wt)2CR!xWBn>kDcvQ_RX)a
zep=UDulatSwMzPYD|<Qj&%dJL*IcV!>ipmMr~Cb{y!SVi_$vO+W&FEuVpBu&{VNl?
zoV;re#q7_$@=tBy|9`e2rCPzdf0~)t_w%ng^8XKmTtAz_;+hqj+mAlhS#acOD68gg
zlPiKp|6A_<cPq#$!8p8j;e&1S|0l5IUN7Hhe>n7-bDhCE&s}Y7cmL0Jn>1m$MecKU
z!};MKC$KZBh)mHjkrre+;jm$Xj)*nup@j;aA;B?E*6isFa#?YDWn__%6@O#qgDT<A
zbFyb<wcpr#<Hv{HU(f&iv#;vy&3EVaU$>k7>C?BKS#7iR-bofLZe?^6iii=l=l#ga
zvvJ8ggQpv&XbSnYTvvQ|K(pw8`)saTK2NgW-tAs+MRn`Hix+ft7gqnPFO{42&|<6A
z>$=dlhdH;2Gt9rf#M#WQx_Z-%qA!+lj0)91E;DMj97}8caID5|Vd|vcQi`^vqJ_89
zt>@elxG{}ut%R55pDSyRCf%R6e>U^C(zKHt%?*rdB0U~%T&zuv4I3}yh-me)94uJj
z;w30??dIA0Mxm}x*Q*4iZ0%h5&;HTBYPY)j)@S>Fyk7p}hK>5eSMy?;FNRhbZEBbQ
zzHZ*RE3f*JCfApG=AZpOFZ|Sg=`;4+$16`6^mtyV^ZBx`)It51MR=6P`Gd3GRH?}S
zSXy`B?!3hHj~ag`*J}1I=Zk(?ckkDeSH*7yLkhyV-OUUBa`|NKy7m0Uw4%Ga<No)_
z+AiYzJ|o$Ax(SQx_5(BK{_~3qELHsE>h(0({lJ!)y6Z}>ADpw*MPlWoow}Khy|!1@
z-S61`vTwlyMir3>XH>X_m<~29i0}}TVr^<pSa3r_lvBXvzVxoVsXRMYrp9W8s<`y>
z|NeSx>5shTXV>4o+5g}6{><MMALYzW#@b2wE;!WZ`S_Q9NQr&ry7b@Q8{%yCL^szx
zZ?_N<Tj^hSN=9<p6yGbdIrAMp<?X+dwm6`(nQ!0Kqg>~%$5y}8`n>f=zJhP`n^_BL
z9=VpfC0NOFcSp{hz5J<-|DEuSg|+YRvAw?A{&0tW`r$e+wj`~y`nC-ruDfGC{ho8Q
z?Cz~yYql1ws(E{Vl>%d^L2ALS1eul2pH?bw_X>0DY7JZ)`tZJj1*4kCgf~}&IS(Cl
zSP`Hj&ehto&|yMLzTJ}Y%F1AcEmw;VEe(C=tsQ#c(cg0uuUWmm{C{mlcJ}}9+Q;sx
zbFwZkZv9huEcCFwl<CEy`C*%vuPuG`)HM5Etd(es#*XX%ch;T#$LwWL9Pd8eR!8)i
zjoMr}eY+|8xuTO7zh!0@d)1O7;r_X|L@m57!si&f?$b;E51lwyz!bPcXP&QM-dS7I
zfUjQ-*GfITZ?78@-!9m%*}&{@Vw(OhC4ZxZ0?K}S-Y;-Gk@NG_I>Y#$3yY7wojNr!
z>33L~*XB1?*N()^kBiu*S^ITr*krZ{NOIK><6?DcN_3c@Bf-tebg*H=1RWD@PNs#4
z8Ub30nyp%)DSPg1<6NZ`8x*Q{RqKcRqr2wfvNihxe|&#;Y2~ZpRE78l+j`0?8griu
zT={9e|4HroZ8c3I#q#?Mc4i(f_#*Xii{9iEskE|38xyOqJk#9x?R?`w(K&WU=B{77
zX;D$)){E?px6~daI<M-E<9V^m{lfd}tX}78E+jg8DRf<VHDl7`xr~z-uTA0n@N#)N
zU*NjEbME!E^A$KXG5)as=BW8}-TUqB36kHRv3}$3THADe%TkTFwUdHA{qOJIE_Qg=
z$y-P59&M0eR1uk>p)ADO(Ufo?;)NL~e+Fy_&{5^&IHYW_Z@+HQ)ZN#sR!!fzH08>(
zxRa%&>JGc=`Jc!i{B!?b@9t8&@6$K`h`(mF{`>hj>(I3|GP{d?MNXfy@V@Zk)wEMp
zz6SrfSNj&TW>+b{+CKGV{-u9em*ttJKa-FCY5uJ%>r3lfk3)a{U)!g_%fq}RB&R(-
z-rx^coMr?|Ki5C2&V(P|A~}Q-lbzm&r}K!OxPGTNfA;64>QlHn=QvH?>=-j=<{bvh
zsT`fZR!*8<{NUph-^Xw38>dfN#rC<NZr9Qurz4Nce~)?>`;Y%)XuS5%*sJru%yKAT
zU|`+rIE#s)i?t+v{e>C(>aYGY+PVMzj#*lNtKR07g!!M}|MmUVz59OuO?omh`_VPM
zsDC{_=6!Ve^Y_d5W8d#*wEbPP*HOLi;qmMCF8??UmuT~4M?L7C);O)?S^VqhuQe)L
zf4#bHGDX>3_0`7s)n$7>+SlH13cA3#c%#?VPyzE}@lV!Yx2m3H6Z5@h!?|r2t+zIR
zxBXcgfA#K3*FWkXZG~^`2wQVpcb{5(m{pX_zQ55=*2nD*oAmqg7LM#qg<s!a{T=r`
z?zry9=*ibt%#W>&FRgeNvOZ{O)l@_7U;F=i{as(S)$iYRy{o4--TS&eD|%1Rqt5qV
zwL)5Mya~Vl`tF+WY|*OS65H3u{jI+!Y4iUtXWYWm_FC6B)z;5n@#cAM{Ygi+b@|=L
z>*d#ndzJi+-)j47s`cAyk@q6&+BNHUM^@?mto!J-`uhH`u-D%dQ?3YAyl>Tf)fIBI
zswPs$>{kE6hkviR<zL_E_5SLU-52IhJN^I9Mn%<C^{>NOWp<n{GvhA&@}+I2-sfJ;
z6w}3P7k`nzoAh7n;q>nx?&bXZ@wq0do@E-l%HFy)cDrVmzS{ck_3P<dtM*-e^0lh!
z;hqf%TXQ#iHGNH4GW8$Fi}I(_{}=cK9?@TFad7*CX+@5QvM*#k-o^Crx8ZENx|mPb
zXH8jVKk4k;^}B7u<b_r5epwx{_J2TmUDx}obFZ%1_CxofG0)Wn?akgX`vo0U=CUe@
z{j-@F*e2UDZ;N~9R>$eP?_ITDv?EUX?$p{7M!{111bd!-s+Tngi~qOj&yn&^|2X=J
z+=_ypKU2EC|N8!@zj1rBw;HUzdgI-@o``e5Bg^BqYF%fQxZvQ%dtinDYnga;+xLy@
zH<e}91iD2laBqr#*u{2s;^}t+cDBAcx$J+I>|Xds;FN}Bo(l`(r$5`S?RTxesH)zz
zS+VD2*w-gF{io}BTv^UpZZmD`-KwYYPSfI@wf@+d&)d4D<YSbA_Rm$De*NRhxjkW#
z!Ce0@>?Y;bEbddglD9uGefh`OXUnIV-rK)EIVHiXzG~yos?{5uk1;WI8wl?Do_#g@
z)VK9cQ+94$xkr6bP1^pg*OPlrMfe=sKIPSisVv7n9Jo8bPTI~{SAR~TOG>`^hcnC0
z=g68$+s%xR>o$MX;TUN@S-fqV;^|qZPs?9lTedYkZfE(e{Ts9OuW9X{EwaVGwtT(Q
z{Q8V+(FYq^Pjha~RH}=)TB#_`{qXTKJC2`+7&iHaCBF-?W@*X0&1i8WFEeq0>W|Ai
z+pISW<m<0?Z{BN{oV+XhX5ps))^%Q5TUEoh{@VX~-q%0cz0KY0lOFLiXk@<-C>2+B
ztckNS*squWLhyvbvH*v4>kVI*FW9MIwrgSKj^Dx28&-NPt8W*$^p$5<?-%X5cXuB!
z<jRLXQK*gjf3S38m%x3O_$l$5(vD;uPQR+U=EB#9T7m}{G@Z=<bG{V_^jHyL#LNCY
z|Km2Ui$~Zlr*(f_dOJ(U?zO#}<JwgZpYiY{70&*;`bf&6K&>VEtDJt#`l0vMN^!qU
z=>M!=`}duVNl1xX@A7T`;+RQK{=Q!oTDAJ>`e$|~Hd50!+oWjPwmw*}MLB3z;YI(i
zM}99Ze|p;gsdeCt_*ZU!JvRM~f4BYJzLQV?ah{Ocdb_mXoL<ZOBR_s9N^0@5|M~jQ
ze*c`+zqf9x%-?&@cXLl%vihrkPP)4Pgm?e!*}lQ4`fAhN_z!V8SvuDKbHAtj`ueB!
zWcAnWe}&Iq*Nm!*_rK$6Jn7T^4^C;}S$x0O21eM&*q*&Q*KT*utLqP&z2{wysBM^J
ze*DI!v)h+V^ho_5o4srIr@W<c{;&U7t}gv{J8C0~bZWS+ki+@f%AeJ<4|3+7+Wxy^
z_Xp?Q_x6g#d8x$h+x2gc^3v?0$I{!Cex!89Z814H&-cP?`6j=2&sJUMZ}~l0MX<v#
z{f74|zxb7Tz3(neo3wOm)pgF*b-lm7l&xo(d-pu||DfgEPd-Nf-hbuK@1W*MVKc&0
z|Np9gz`(#_>^O_5VHIof)t=6Y+E@FpFZud@(%YEnS9iyq{Q3XabL+Uj@pF@7MK9da
zt=q1$#c%r5*gJE!)~?pRrnObEZcVs&-{~9rQ7a4MwAW4T?+P!dkJ}pb$2aPlVfZua
zsn+lIW@dNC?EWJ-b?#r6Z+{(mB4=|sX0q}9XAyp7*SO(WR_46dpWXJm+`N&VsrAdm
z^{Dcb=DEMt=KtDnwU<xzZdZ8A?*H3<W?g-{(>8A1{JTGtpKO`AJ~VNc$lBWMJ@f5<
zJQcmJwKw+H{#S3pV!rIX%N6}~YM_bnt{r>-?fl3c9_M+4^K6>v7oR=xuXW0&yZqbw
z>uKDY^6;>Aq2Jb-U;4Ms^FjCSr@u~h|No->{%O>NvY7v`{{HdWb=m$?-P&LKE0+F^
zU;WieJ8W-Xv@qB2@V5#3qmJsHzqV`P>5BPr>%(Ww<}Cg8Gj84bpL=G0O|5zwuBvtY
z)w!BL6OCC<S7u*bQJ#PGfB4Vc%Xh8bZJT^+Mw<Swqd%0l@2*=Dzb^Lbw{>RgZ#|9G
zcPUw~zWVCyRmaknmoEC|KmU>Mo#$Tjx&55|!+I1}c)tF=|Bp!Qw(Y9#f2gb~liqJ=
zmc9RcXiu;FzqLO$U0G?qcv0>DMWyjYzrMeI{_54rxc6VX{_?y(XIkgi)&BX)qscBy
z>L0Z1xy}A$qwdy+H=fn7CaCdzzA|&F(PC+Zf1NMCCd`itjKBJPeec5HrPKd?xXL)^
z5|_}FA1^ug|4S-dxMbftyLGX`xAxnuU)}Y=dBf9yyR&EA;D7Q~Bx%p{c%w~EC)PZR
zH=U!v|LN(SX`D7wuJ%3kzrOyO*8Ze-QD46=%)UBpZQS1J|30{ve_FlXQZz*?vH5?D
zV@%7}|4F`nd!EFbZfgybd~Whr_pFKd8)G-Kz5frJslNJqO<+yvYrS8qLo5HrOxfrE
z`rlPEfBR2Uum4h>zefC_ke7m{a>mof#IN;pUTwU{?)~zk+TE9H9Nu5ez7qf6wd(8X
zPhmT2|Hqdt_rLOG-mdkweij~!YWAmZQN3^fYx9&=ZE@%C<<HFAmaMH+U-Vr(uJfLD
z%=~}dL6dLRyCh8ezr$}*-TTdMswUHa{aTm3-YKF+fA$oEVtp&?O|#tVKhLbYld|*Y
zn_uRE=6|lRC$usCtWeZ^)5Pq)*JZ-lOu^SvzRVOAU*LYpo9k=U*Se*z4o!VO`;%v8
z?*2OYU3ytD^&*Q`d9}>{6BcHAY@(h1<B8t!Kh!R6T-mzip8iFnzoomc^UUA>f6<N~
zD%&Sywq`d;KNWp{vqQoBkE+6#ndSewAJqDUZjI}>du9Ln(A9TO>^;43+PvTEYdI39
z?7x3~^`u`ad#}#A>nJXn_n_cz%yiYo-z;7D!d<UTO@9?zwYNI6{>8b~lP`X~weg3_
z#cQ$uII7l%uD;%+bMNon{L{hz_RRg@p0&1mZT7ya@BT+-O%lBQ`b+EAzY%}qyd$0_
z8CUJ!p>&$}+b{DF1_l;p$63q_r&#M38vc*jI8XnY_S4<rKT{gn|NjsF_5c6B|KY#>
zM*I$Gug>~?@^}2deb%nYYoq5zDL7p>*?ypX^5y>_;h!pNIHHulttnR%y}mm1>gs)3
zJHuCZ#n$bO+uNb4{`%^r@8z%C->ekfa!odU#<ED6|JUd3v+BHXYSMh(i-%Wu2Gt*X
z)@JhLm-^rRpFA`L@7?+)6ds@dWB1mIc~M(e%x1lQUZ(f$-L`AmJ9ovbs^`3}_j`8t
z5&hRMb+f;|zkc<#=+D>hPX4zIHv6~#>&pe_J_N51E00^^-#PjE?uGaDuO;rg+O>6R
zY~<^Y&1TX6UN5a)?f*UK<m>%KG5dDbtqYs~s=PgY{gu$~zrMeo{&&sp-A}`JuTqSA
zTdMDUZO6RzyYw&rh+Z5r|EAohMZ1%o-AuP|$E^up6TddLcD4Ab?@@chqXU)qulgTh
zAGg}PJxKa%@#i~>cmF>q`f}SdfBXN}c2_1G_`9lf^PMygiO$d8Z(7`qV^L)(oh39q
z>Wlh|t*_&f%q3a<A8Nm0a$A>hpp4^<w?%?&+}iuuqLsh?Wr<8bn5!cFMdF3S-d}p6
z8MCIBzTcX4H8MK;zItuwt8-7jChQe?8W*H?{Zon2|CTN0dsf=Bq@Mr!LtUjyr)d2v
zZx);DT3_E^*IFC3Q!iBf&ZkfJS9?Ax`X&43U*w(J%zwfq^z7~|V%~q~c<rQXON3?e
zw6wUdFLtb}3$c{&|Hr9mP#nHLLjSO|lIpj~QHl!_?!Vi&J7U$xJKI}|GgsfedVQVj
ziTL<CSw`%YHTz3_wte3JGdue8Z<)rK{;$s_@=f^lHS1^h|IbzWp091IO-=vYy|Vr8
z&sP%<vl&YN-c<X4w-ldM@{2XGCfn85hOf4dS|2XYldYCDYf0>1@xNt$X>X?Gl-+Mq
zTAKC0>-5IOt!JwLCamWD@v?7@&4H^m`?yb}YUVqy?-7xk^&vVWZbfB?%0G40($|-&
zwDm9U+FP~fX}p7{T4DYAy1l=)Ka9S0?`#0iq^wiv>Ty*ICRydR|Bc@iwttp=|Jyka
z&Hr&{FS~hjvsGQs^Vj>I&T46XeRR52`0Cm-Gwm;KTJQR5_t#h7JTu?#+MKrhOW2>=
zYpV{PTIN;$t(X0YaLE2}d6ud7R$r8O_g8T4y3iLfNqe3}E?F=2_x@^*myutq9=;CS
zeKKKEU)J5p@2{IZJ|x8c!OivZs;}?QzrL0KPio!1If)iVLW|;8uMgusD7x*n*j>-_
zzWeIW-<~X8|8;t{cy@eS@sg>^f9_i@vHt#I{pq`poJ3!*zpnLlRr%u3pWAY+{rqbe
z?S1<8|B|S`(t0oE*X`VGzWR0O;j>Y*ri%Xln|f=8Y{|N*`=;r2hJ5l~ulF}5BQ*bq
z;=A{SM;`ADUl+Q5x8cYCs|^?!m^V4jVs3cFT6<meFGq|^)b4QmnAO+znR&kp`!H4b
z_`g@G_kV3#8~?R<-}NP5%NKHAs@hZ?x6@I+=;*5d5y@Hi<GrsN%(Ivu`cVJ=yR~Jp
ztG}DCi~YB^Zg1bU_$J@WJL@>rwtUw*RO+p9nYW(Bh$raQ!>{kJufMuK?6{s`gxbF~
z;p=PMgT80q*RA@pZ*SPzzUS<(UVpI;iC6!BkmKX@YnRML%GQ+ab>8{)*4L;#(|_%)
zy1wdEbZmLunuXq7(T~!0Z+*43HzvG(-PiK0qg8vWj@~(_e=Yu`%Rk|LXPI^PfAFl<
z`Mdhxy4X)Av-5t1UY-6>{(sb8pIepHVY8>kfAQHpx&Gqk6Ytb^Oz}Hf7_Po1^mY5|
z*!ycjUwwV8_3=`V;D?1K)>XIS4;wCA{`!5doxOSJ>O)59({0bs53k!0u5VZr^)+fg
z$9MbHfA0QU8?x;_OU%xz(U<mL-~LKv`6i)n$2y|bsx{qvPXAxY)7w<3|77*Y(3g3}
z7pwM&zj?1M^>^pj3U$Se4Hx|;{CxUj#v}3b+|2t~ToqZ2d7eMLk(IN5(@U=ZAEHmI
z$ptEz?>7|9`PF21?CXt?CtH{<xSw_N;_yjbUH98%<=V)yt@m4u;{5a9eY}75(3jZo
z8J%ticGq<s75}=&<+5TXhvvNh2X()F31KPO+F@;-QnHlC_Iy0Q`qpEO`9;syPdT#l
z<fnhD{;&SFrfhF`_^&JJ?PmSEe*F0QN$IC559|Kd?XN?hZCt;r`qIYx4<^;WOZoZz
zboI{Ks=r06V@}p)1$wJ4a=l+a-LIte^VE8edH%J3rp<Er8)*7NMQGZa_?Aw^m3de7
zyC=!(DcFA75PN@hyzsBBlOO-{{`2+c%)FDYu9iONR{G5pp)$!VX7~PZ`=~eOPp^L8
z=E5#I-ELjz)p;A|*RQ_!-hAHS3s-N~abz0a{`u?a4@Iq%^zIGLXZ7nk?>Bi0U+UT`
zc<S?9?^$*0)wloXo?zAx@x#Sr5BuMf@%LYgznJ^TP+WI)dDx#CF|9|ovR^AZzQxbi
z7Tj@>i~T?6YtKF>-YT8DQ_fd?UHxf(=D#)ZYf5i@e;sLW|NcVXe%IF@Hu}q#gnn*g
zI><j=J}YhY{xEypcR~DD_iHXZqQ8FMlTWv@Lnrvm)qi^L_0qhjUt6QrEctMv<0ZSZ
z$n0-DbBg}IuPfS{+}3}xvhKg|^X%#cR~K1IzS!Tn@74bZjd{O+axN~<Iu)PsY4WxG
z@mE({uQ%xUHvjddTh`vM?#A9<w7pjK)z-UJ8-6|gHDgo7@Bcw}J}|0;^k|rIv9>iP
zElAK&k!EE%=x7k2W6H&Hs8O-QH`#c-WT9)U_sY<sl}?Xj_|Mf9&#9lXGh6BYy!GDy
z{j<$J)oHg%pZ>c2(3(HfAKVXm__$7W!v5}c8!~;b=jTN(_?cZ||DbTKX8hCtR;#kw
zx-3@A4O9O;Kjh8XSB0-LbS7s=T@9c5R=<KfH}LM8|7WcZs-Fv4b495yjsKa#wJ#TP
zYfV3{a@c(Cn9*&Pm{${j-VreJTOJv+VQbu!4UE~A@3OB?58G_7?pwZtTS@-7QNq_8
z?fQxPQyyPV>C;Y+T6^XgSN28I!j2Zf8IuxcE#7?o{{ucov6=sP)y{d$ESjXXY>Pzc
zH&vmoq|~oRX1s4O&i@?K!1->Hq`Kwy44aiRj>$|7b~!O&<2%b*d8vHU|EHeqe*5;*
z)ys$G+C?Qjv$9vccE+N5`MYI_bELY|TfGC7A|&^$-fMp~=5}}arse>#c%Q3x4w%L&
zUEcMoW}4%<@GVi^9$sIAt_YkIzOp$zPkQdkjmw3UTUR;F(47{4FmPg9qfcI5vj4Kw
zXIWcL_OgaPebU{T@#b}UobHM9oSnT>{3`Ny6ck<ao>zM6t03!_hpu9axo;l7vfJ>>
zY#HrYvv|5UUwo?()&4bkMWfuq3m<1aE^m0isB~q*nkmwpPE866PI#ztak?lfboeL<
zb4Q-_7oA@pQ!KV>XLw{$YsjNs3pu^3|K?>cn4DFk^{>9~+@1~c#($a87MB0N{b#ZJ
zfBq@|)Bm6UU0-=ze94Ph;(X7ZAHH~@*gn!IYR&ekSIr-;bC^Bt?|%1+eP-VT&s6QP
zV|V#-@5vty-|qkQn~uLWcUsl7S<*Cm-KGXzo#&G4^1}B$m75=b)%4l*YbElfZ+O4%
zd64w(lit~)-<yAzYxFRC{!vddJ3BRXU0df&LB?fzr>wup=dPG~GyG=gnYbzJj0z%C
zG}O2_XEX+E4Db>WW_3CkFu_Gdn3Kij*^Xz$)mz_|YOPx?yYy(N)YMWF*;(_y%n1*u
z+4OGK*LPR;Zt_3ub9Ygb?fxUPs^c~HHMcF>reA9uY5aM=(?9!HzrX%JIoo~xzGU9F
zwoKpo7j(R1JYIi=BX7!<Z9QILF)~$AjP*Mv7`uMsJLZ$owe?*2$Msg8T+ho_yl%Rq
z5g%0YsOfL%PRWLaf72H#eZTm7VtjqMz2LgFYZyx|tU1%bK2LV8@$qPz$&R<D7O(j?
zf11>nOp(ii`BturIaMFM2|BQ;cILEsZkDHAY#1IeDv3<+5mDx4I_EecMng=BrA0BI
zW9F)pGiHi(ww}Fpb6v_g&C*!D_n~I*@4ddO{8y?k=0)zD?qAEK*2mWV5%=4^`n}ES
zeQArjcB-ge$`_g~@@P|ed7-oO$LUsQG8X@1xNg#}{(wX3rS0|$70b<5X>WdR`pVU!
z{O|qWvrj*p`tse+D`#Z)AK|-vZ|+*p<$>!tkIt$3;rDkdhvFKm=U@L_e_wx<A#-JC
z^M0N=3-dmuu!U_`+_`FwqLgJ8_v^DxVH^Ty*Ay(>#2UQdiqn!W+r4z37rY8hir>Gu
zHg-qvy000pw(K}-zb`((f>A}JM?;j0x2Z94!2%r#Zb_y?P8y(*a+X7m0ux+(*G3-i
zUVXqTR#$78YgUkz{M=u&AOG~7{g?CVU;UW3&%eiiwBKFKzpUnxY|P=`eVtXFb2*m1
z)u=l@Sz`Ku)FpMlHU@Z~KKnB5+RCez=AG-ltzx$*Ji@y+SU9`7Cx@^2XwiiWa(nkR
zhA-GtrFXbE{AcC2ViW7AzsE)Ur+xf%ezjJQdOLSj{`G3cEQYYH2X)y}w!HG6AZhED
z?610S!Tqb3Io^Lyyt|Mi_hod9cVF+sBfr+XiCjEK>ch!zM-`G!m=$SE`ChkW`?o&_
z9)KpYa#VylnGQJ|i0}{-Vr^+mSQw!p#?{K?=+NPjCUPrj`u)(Z4O2_iRJCGOO+WBN
zO8NPm$%+4&f7Th7Pk&WA>B}_l9n){`+o^oy;P!1swQBQq1J}Cew`@&LNnSVkxBr!V
z)!#e4o|f*;o+Is2&(<L>?Ub-3bxw=#_0Jb-J@4nH?@PG(-)Elf<m=aM4hVQ1UiT+)
z;w<&gCm7GXZChR&bcKO`d8T3RdG*c*9t|}OYMq=*cCHf_eN_1RdqFkp&BNb5lrMa<
zEBS}rl=xIp_r>y>>QZ-G_jIxUSthQT<b1m)C$aCOU|9dkx6KU<3~Y{$vsf6pK)u4c
zjdg3|_IA9U{QqBQ()z8hw!ZZK?;p4KqkZh^`QLwS^Pm5<`~BB<|EBN#9rmM3^y<I8
z;*(XcKb}74Z`Avp+Ho7b{%QF~Y&}}L-bb@u-Bv*U$dt^f%r`gB?7zu2%YOgKXYE(}
zuWNnv4_AAA<LNca{80Vy)ysBft$x2XAv!$ufBe4dtFPKjNw4=7?F#?3Z1&YBySKjH
z`u@7!lU<MO<DSR;IC^SX{q=S4uk8<0-Pu}IzyEqrh4Pc+*?Wq*?!ABXum0+)zx%Ij
zX_~g~Z_)a&fTqg#S6^KhxTSe(&#}&s3!J}>+OGC4jxUal{dsCPi}bzyuN>+=&Fx=*
z&GOg&?BMHyuXVx$)@S)Ai{87oV{ed>R9dQz!ud8ug##aR+viCs&bXQ}!+;~4v;Lqe
zdqV1&{al~RW-ZhTeabgiWO4oppNqM_u0OK5e7M6Y<!ILLb<>x=jN8!nSA6|-t<O&3
z`pOK4&hD7}(Ip|$`DuWxi(cc;`u!|7eXkq}{8?ADe%Y++O}_2CSGqrVF1)9IW%`4j
zj;BAvn~dL@9Q*6#e({g+&p-b)_D6l1ws?PLPONI#vb@~*tEH-IJ+oZ<ThduXjJh^3
zu$dRmuJ7aUw6*&rDy_Wxr>>pEdFki{^@dv3mR#}wuIX<ylI72f&Hm=Jv`qFN=gFpt
zu6<=P>%;3d&u`K>ys5%AuE_9xlg4NLQ>#N3Y+vWKb^6ISo6FYg_ins;$X$P5fYICT
z6rlwcDz*0cd$`p3efF%Z@12$SB16b>rRK+L2kt8Ami+(wk%NC`*SritCZ70@;j_Zn
z?9^3KvyziOUocc?SRb3H{IKcN&qvMMm*l_xzG~YpmZ0RGGfU3}gdgfz_3pilS;_OK
zdv#W7oxa4kaIvb_*Ql#^KZpMD`L`kaO`^pG1!KFl@8!*;YO>e)_g@j3cEOc{J;tC`
zntNTho|~U+ZIdwjM2(UPd!0^WCWiH?nV0YF)h(~y5HG(#<7dJ9Z_ocPo+$apQNR0%
zQ~%H9{p$nf9W_0#|KRYkRFRM*>-FuyJ03)Rjk>dIXUL|g*ITD16^Z}X3(eB5xE6l-
zhR4k6SJw_|79LJ!YABarjhDZ%h%e^j!RDCcH%lLu+^jl#oK0_U%pbwiFDkrVG|k?g
zzn!z!!8J+B|B<v4TLlBVM*E{Tl7RuYB+?z@yA98HvU+6wT|IgIq36B23mZ=D+ZWNk
z<G)2uo6+S3oxI_-!fS<h&%YSes{OY*Y(ab2`&VnSzb1YRuQxTSuKwGhVy<R>$R}o9
z{O{vGi)^gyb65SZci7$W)NtZ7lU0?kKTkV;tT%i0r|jH{{O~I`DsKM!&$3%uNl};C
z>BoMqxP{EQaskSpZ`Xa@^=JR}?k7jw&j0M6VdQ4U%`r3mmn_q)8>SntR%$yM|9&C(
z@IyesrC;BL73!q~^q$vzu9~+hPW*fL|GOMjUZ+q0(9jMza%Re`zR%Ymy-<AC#{XPO
z%A`iQiPzMBC&Np<EB`nu3$+SQ>92oqc-y*Su@K&1Cudvx2mS4LvL>+y><ddx+xl(g
z)=&TSJD(*-XV!6llBsK3wYtnQv3|<arT^Jv1Wo2wUXp$J`gxbd&-o`ZF3fRzU!Uj~
zBK7ceVb89-Nj)zsZ``hmNM5*#@wnL|uE+oX`S1I)=SkZv-PQNKqA&juecJnN*4^Ki
zPJWMirnGtgs#q=I)k}Yh?($z+^|dzpcat#h@Ap6Ue4o~Ol6{|GkHp=xvL_3sb+3{A
zHvOZq(SMy@k9~2o|13>*6q+-?Zr|VhgW?&fYqqW{(lcNGUtE**^R<vh=3QpR>)**d
z{TmVRievrs?~i(y8eI0>6;!=%N8iIHpSwS8ub;eMG1I^El6UR@@b}Rx)|+>Tsh{1v
z>iyT(`>*d0pCtF>O3NX$Sk2mcMRULX*Pq#0zslIZzOwGu_DS(4_Z(t>^e3Aq_x+j{
zy{O9ieXEZoF8L~Q^sxy`&8NEcug>oKF8@9Hp`Bdlv>!k0Qr}&@bY=b9(pTTN{FU8!
z*Frzy(%O=cJui-|t~&XZ<?=(${A#&1)84DkuWFL>jJUq*+x{i5idWpew*Ehh8PCN0
ztM^Z>jb6R~-rtDNDnE-q{G0t?`mrdpOC{Hz&X<3+VZZyk$W`I{BX&n-sef4a{{Hp(
zTY{JB|95$HcF*2gCGW4@XS}}4$FE>uV9s}(#nPz7T3##E`t-k(ym>|M_ghDQZLHgS
z=ezytFzL0sRAc^MU-fg!;duFuNZYE|@ZI~vwXdJr_4SqPuT~xRwc)EhU%&oZe{Fx%
z-$c;|?OB$o`&rB~J&N`3d+M)~Uy~FaU+q0LHfm?x+N&Sy*R8&~RPbh<_19NhSFV}o
zle|03_twsj_R?kR?;p{wnZIU#Z0z;bld5)QO+C4``s@0uyQ9`cf6dyOFT470%-@*5
zQF~+lR(~!2T68q4z0Q5>`m5Jpi@z4F+NgWFXMOzo^O36S!{gW0l+Rq>{cgK?zoLG)
zY5a|}+*h4b()Y{`U3K)=(Z{RKE9R@M3jbTxs{OWDer@Q{3YFt`qeS+zioX4S(u}Ed
z{^6P{Ve(6=!*4zObu4_fP`&uAt&g|9`dXfK_0`w<wRQQ@S%1EM{A(+JZNF`%@V`HQ
zmU&oQ$-M6Wk2_<Tyu(e^<inq@tIjyt>-Jp1zR7Ds!uua8x3@h$tNQ-Q`lw~KRa@rm
z{dH6IU)7ljY4TrROf0__U;k$R_xp3+)&BbR{avT{>-S#X!P_r=z4bl&ivRkbwpIRN
z^0svs1<oI=jEH<W`NPhba{ud#vO0fXpSQ1c;jYTtFZNp-n)&a$zI*w*b)2uRhwI*r
z+FKlGZ(CV&Q#yY4@uTcN_f*YiF_PH$rmXMS^AoA}^H;Ao70RisuaB%c`C!rZi?RPB
zc1>ILYTeK7hgV<i{F}7;&yiZq`tPo7Cx8E+{P~|hU#{m|uc*ym-}T<xD|$NoUvH%S
z-e0@FzWVy^@A^Dd`*PJMpZ-sJ@+ExI`<QwEzn)B<5-u8hzx1J&zk{VEQ|OYvm9J%v
z)xVxvnNjK)Tcs9#eSHbLr`r1oVgDl6?|WZge?D^E{Pj_*_eZS1FlE7Cj;hV#S}jw5
zf8YLEAoZO{>Ya6}QQo`X`E#8~U01Ao>NiKO#v%#c$k&s)Uti-czrOn4)?e43DldBV
z^|bo)Crv`jc1!Pzc1_;ueZRC$|I5~@CcXF5U(8yss(7HO_U#v+t?$F@x^=Jq750m}
z_kaKTy8lyJ_Fg@G<ocrR+4tAPuZdqYbx+;fqV-X4h52jd$A5j$*C=JOI__azv$pUf
z{iR2z#_x~xe6g`^-Pfq6Tleojr<i%#|9+NZjd9EJ^{*rDR_%ZF$G*<{{@1Hj`?U^U
zy1S|%&gtEftAb0KWaiz!UZ=F9?8W_C#&aJqFtGAD&SGWsVy*hiefsO^r$^G3xQBn#
zzHx49Zq<tY|Nllm`Tzg=zU#-Yg9gKX?Ek0zb^7bOb=R}=uAcuLW*0wg{nxsG0#Wb3
zmLGYvCuynFkN(-uZ0q?A?tKp{IrV;q;P>3Q2FkOFC-1YKs=f4oiFNhq*YV!^S5B6E
ztz8%TD&=kT*HyAl!>_u2{TsK(^yx8qH>=#t{i|n%v;HaAe7sPo>2yrWIqxm+f5*Lz
zytSukd(g_s`(Itt+q&+3=ih9JeY^igtIz*)Z{>d$JC{s@AF4e!{=Vg2Vjo$rx^DOH
z@b~+E>Bhf|TX($NEB^oLzgegMAJPteb@Z3<w7wVN`u3%zmvll;-tCErz4~v{U%%<E
z-u+VBn>;n^>ygtZ_Ss(Fz3Sh)CySmgs*gBcRJn`I==zKG_ttYvQVq76c=2L^f~AX&
zs-MX`_eUwQ{Rdx)abMS363(q#^wR3t%c!4MSH)l79kJx=%NmiKuFhQP>xCb@+ozT7
z(~kNh?8dZwS%k^|sS&dS%i|B4nH22oczWLW>-+Aw^|3V*!^3v1k1r1o@!J0S>a&}R
z<yvw?{1mL57sp)v8pdhy@!+Mz4^biIuM!;g#b1jN^zbVFH2p!#p_q^PF_VK6_GwSo
zxjS#`(Nivq!z~V`|MP3;?oOY6efR5iQ+MrBi+6p0byC&qk0CjdZ90e9ERysN3)NH#
z$_VfY*4_G}!6jO(@3CZaeZu~U(`(dKS82}K|4~)qc<k%@54vpSPGr>nP;5`1&^dd;
z0n?Q|QrCqa&M0!<_2=t@u15Y-AM2EM?EHL}+2n<7)5@)@`1KZi`_#s!_o(dG)ivcW
z_r!;UBx(!nZ#g+*^V{_oAN&yvJpIbAx#7Tzs*~^k{|Hay>27!A^vPf=`su{v$QZZY
zQvBoVIrFdoPnfLK7A-%?`{l3IQ~dANOv#wjZpXE8`SVYanf%S)Pq<9`n|tf)r~4dw
zvF5L@XWhBAU%ZS%*DZ;Q_4wIYoB9Qu9(L{darke`!yjB0j1v@O*S5TBJH+S0%VWC#
zYw?yc^Lxnw{eP!RrA&M4Dq6L(Zf#uE&xcjh=e^&#W8N#h_xrwm<MeG8dhmYt_q#t-
z?3%3Jn0d2jED?Rsv*M+3@;9H^VII!&PTo>$%(3L#U+XtDxnuv<_ot8f-v0IE!@jj~
zd*gn&PTBs$;qsQwGA8GCbHPu->%;F>*{1$~Q>faWZkKiM#r{8c|E;mBTHt$lyY@fr
z<L=zwrcbuDUtPE3f7I#!Utj%STeL3p)$vzfmrnltHR}C`fbhNL#WjBGG_UOm>|HC$
z$`)c+ugbEf_WShf{;n;j|83JgHd)+k-nHE<h8_KzKVR8!jHl|eAiJc1{6lLkQ<r#$
zZ@(mG#tEPDy0v+^{!jZSXB6!(+Ul>G^nBO8gXM+S+|NDxFT8taXQtloKa*$7NqD~h
z`tEmcWAE2~zh4_0rS#uZE@^Y<>#Ogt?+<(9cJ6)MlP_-i2L+R@|7TvU`V+aCYxVnm
z7jNX4+6vvNHoD3#e^Bg_N$d|5!*5e}>wOQuwavf(yI0)X&<Vb0=e_!;wpVz`*QQ;r
z(`&!A$nH+vR(>&mgY#6k=&wHVoVig|wcodIN)$VreIw7_Bl?u*ll@WOZ*s~=c)#Cw
z_0^mG-xqzmeJDj~(&pWN>qOSOZ|D3ZC@}qG|Cag*t%mpO_x8Sg^>DTnui1w`U#IOa
zy8Oq{m*2{T^AYpcBi>cz@An^6XwSZ08vg3}Dq9cjpQ$g4-k*H`_0W6u{eOSyd9}vI
zIxsLWD>=?$Z3JzJ(Kk0!K2p&aBk=qGKbCvH|L_0*zp3trW%%EsU%%ttM*RB!b@j7%
z{xR#XzA5iM$eJDFe>|(~>ZPaKC!gOPeB<fPyDYnw|NW&s<N4+F>ucA${89<c`e?VY
zZk_n*skLhZ)~$_y8P_V@7d<IHTeCFo-OrDw-@U$iInwZj{#V_)b>Uw_?_XbEx7#*c
zd*5}guQR%yF8a59Qt7{^N3V8o?f$y@YxT|+E$?edf3rpQuYNQwdVR#Q{nyrCi+DG8
z`qfp>cCHl*nLq7Op7&qDQ~R%L+7?Z|7QdSHb=dsa@>|b6vyS?|)LvD!HSg)xS6{Qf
zR&A}cjsM#HHUHXvJ~y{k{?$F5(r)~J{HpAN^#2CQ?@RLh8@*kuWK!Hl&$_Cwcc!fi
zU)!~H)t9L6;Y;fuwA{HLzb3TO-u}tgx|i?Q#ftwr^?q*lH1FMi#dUwL6na!2Rvx$M
zjq0&Odl>Ux9}0VU`L6cc-Cf&eFYep)dwpQRq|ViouIk-iy&^B|pMU+Y{a62+o7h)}
z*1h<yKg&jseM>w`P3mSNm&=~7@2)OetGhV$hKly$n?KV(wJ^SkRBzd|scOU3FV`O%
zyTsRZ&5>_P+Wm92?=R*3EL$%fi`~-wTL1JU9<^ux<NikF<vpu@%_DrWJ#6!zufHWH
zZZE(8>h5aUMLU0~?T*kt<!6<gv$S1(Ro?ISTVL;ukMX_rG;U*{?f&okZ=d^qGSMnO
z@ytDc{eRbV=Gm_(*l}Bfw@${p-}Up|s;Tz7)4cXS>?skDcPo4Sg{knx{OK7DT@5og
zhnK%A&)<LX(dv#x*;k)@k}hevU;F0v8;jDd@w29E|84Ya+0&i2@=xZiee{2AzWkLd
z|B5tz9;$!;wL9*`)|;>F&3UK(ufBTGw2q^jPa&^Dx_`Duj^=;f7w?Pa)&E!A_@Pnc
zXSC*9xxJE4_TSjOwrcA=>$Irnt0T+fgakDlM5_J&PPx5c`|G-`%fI~dpZ_VFW6!gT
zoA$>|KAYv_arMc+Ka)iJzCP&b)w{p=ijx2Rm|52!wE2H6w_oe|U`B7s*}I+Zzm)yW
z@($i!zuI24JZ`=Ej@ueLc4$WI3E$va{Omet`LdP>XfdzDh5#K2Zc(Pgjs_tbp<Jv@
zO$rM<lGIGkobXyTDdeQWuDhGUmbw0vaeeW-ZR(H7rT>q7(4Xi2aqSxBt)9YW&##ut
zUiq=DxBdMu@5TE)#KP-dfBSkvY+vuEqs!kH#ILP!ez5Q(zhGAH#rsAj;r!(<%-N~~
z-e3B)U+$<!nB}eSWv#y-+57kWneO$1^9R@MD9fqGWq!W<`ONR#GWJ*Yizgij)?nRU
zm>R-!>2{sr?YTAewUY0T2Gt*r{qMe0{aaO@P9}?m%!Y|sOFQ0vs?Fq&{TewX<lusS
zmxJ@9uRL%1XXzsY@@bC<H!I7LhK&I_D#ol%jR^+=RFpY+4$b}YZbz|Rm%a(7XKQA~
zUjCDR+W)tot>J#P_2rNAceg!$e(p_re1UkV(z>r^*Y@7$(_I(&YmZ#azE+>vPoEgn
z3*Pw?Tvt9fICo)aPr;wEuUCI-@xS@9^VOR6KRQ1R?cT~Iz6|*M)bsDJtriR4M(F)&
zR&EH*T))+zWnnzu92dzi<+5zkeb)y+pK--4a(!w+`?NOpTP6yT48a|ByvEb_?c4t(
ze%VLq#$NT!Jx#5D-<znp?7B36a=X!&%f+I0Z0r4#jy>3PrD9rlz#fUc4Hlrm${rEU
zPAAZ+`we1ToK1}Z3qv&2xLP_C4LV}3ZC)R4wBz1Gwbd#swYt1#W^X+5?f;!D|H_L0
zYyX-bb)WC|{ML)Txpi|}uTR_feZRx`+HfC>&)iZ6&HkwArrp2LyL`!(1O6J)Y=MRc
z=Z3qSFWzK+CXJ2LY|B$mOS79zPcBC8m>=w*d;LY)%D}r-j{9HTxLl(eFZ%1-(^MNh
zw)q>*`t9elI?ug#;-8#-E_uwIvlEzm%R=I6xL(}3b6sxAbonf`V2J{8>EF8ytloR|
zO|$CcTV_;q^{DghX>IPIvzO12VN?>Cq9G>4+SHt=5XlAFR?wKRVuF`ACy$e(pz-|t
zm}32Do4HR<>Uy-&%VSqu^Y+v@&eQ*G-M{3||9|aX{8x*BWT))4ne)$V`YpG6KkurO
zOKt=geqFP(!}Wgeq>qj(BA@lot$nw=RA^<t>eB4x$HWemRDFAYYs$MF@BZD=RWlMa
zc((ZYkqVc~v8Rg;&f#28Fv%>uKJxsFnJ#-TyxSd?RjB&>gx%Dg`yJ2U&;Iu7_-7|=
zm0#;C#XrB@Hbpx>eZkIapZN}k3BQWB*uBatOlaz5?R^V=*Xno9O6<vPTfP1GU5nMc
z^CFlV7!^b&sEHVxIvsQ|2uLv$Y~B01>f`h3X?K0|;tC^Lf|j&icRupB{;s+Irx)ux
zGk2D~+Eve;`cUO{yx69Cm8YAR&i;`mRsFKt<ax)k4^y{KpZn}pB=5ps*W*9O@0GkE
z@aaMD{IeXjv)}IX+st4){hs}T+TGzdEF$J>TUQopcK#9JFIza-;(LzEeaoD0Pnmws
zeja?|<M*iNYaI?uwRP|o-S4vG-}Ane($4Zt+J`p(wrkC}w7=7{^whPbQ!=9h@0bf^
zS38|r$|HHrd*aKmKhu^hxEU7q|8?U5khi=<6kC~&I2?%R5aVKPX$hXtq9Q1~w^-w;
zY0r{5c~^s8eGKw)c~Fx3d*QMz^*`%<|Jc92`=~Y~{0{$--4<tm_rI;^cG>gy@1=vU
z{$88E`F6Lrt86~s?CI+kw(qX{{8mR?>(;{M>$ZzdR2KYOAJ;N_yBlNrvni+bKYUP{
zu<m*D1eFOlIYUZypGt(3X&b+6pCi>0%O`W^!HadpvlEXm3cvB|_ix)3PfvEp-nnkq
zvFDl6ttYP4-P2a(>4z<pI(p0R$E&n!68@&;yS;=2EJfP3Sth0Wul>N>z$ha!MWYzB
zUm)o~M1~lbL6c&@gciM@*W&yto@qud+g>d6DlE=B^wb0EZSp4yez!gRF8{4=@q-)3
zJ$~}7ypvsXD_cM0PImowuG#hS51Tf>Zrb?z(j~9pzl?g`e<odw|6MYDqtBx$^IG26
z&wO|LzuOP~rMK2!y8q;^(vG=>5mP@@WpBR3_TB2&kMPqwr|&*B?`+3|+{1jE(l=H2
zl>YLwEZF(-+idsEy$`CFZx%S&Vn02#QuTbu^IpjX3ntbj-n!K9?)N=jFOPS!<b=Sk
z(DXyWZt^-IPV;ty<^-6YI?iHa>|!l_;S;}l>XCr{t*Zq$3%@$@RVUK6{#Di1%69$L
z-IHD~?P|@d5BnSW>-E>vTUV<#^!=?~ANp_C-&Fmzo>^ayeAT?Y^<}Mx{`r_0_h*EE
z{=B+w*W2J5OV_SjdirYA-l}8znyV*YoEL8~^;_4}HPdUgMPD7ge`$Mc)c4ogUz_%p
za+K_iTNiqDb?oDvT3=tX>vQTxEcCwhFkIp3#$V60ukP+T`fO{LfB2-UCvCGoxZgPJ
z;pzOh^w#O!U%TRCy|Z3@t-q$VwO)Guwf*6{*RvGK*a&Fxo_l{~`)j_xwPAb9Zz*5@
z&T)I)?srP>MJIFQ&sz6aY{|agyyY(2l5e|A{;$4f_xkWZOFWLq*Dikk=Sj75as0o%
z;q`HQ%T?Ewzgn-lx_34IqK9!OwuNV`(fc!hfAppJ!$xh9Zi}a1Xi;Q%f5`COH;%_A
zUM_KbGd->|{<LDz)6-eU)4y(h#M|-z>72^{E$h0UReyc;HTkRUuczv(?QNr%8Q$Al
z_5W+>NBi}$Cnv`(Wq;9Qcj{LB_1%VgVb4Fu{jC4)6#sc*&AqNQ#;#Qh*A%q>FMiB-
zFh?lA>TSf^s&#v_zZ%&7KC1sZer@d8w5RK9o4ntKKU`NnZPGv6E7Rw#40%=mDk1jY
z8u5ASSI2*zSm&M8v0we<kw14QtY5NMv}@DE`zz!2xD|`-H+r)Frn_VK>x<T3o42M`
zzB77xY=3=|R{rUamTci`|5g25W8WO1znXv1T$@;-`1SEcg~pXTGwZTU()eFrIa-&w
z)NxMN=c~bP`@H{5PQRhJX|Zk5#p>yLEo-NRyy;gIl<`@;=U8_9<UYx2*XgO(w!U6}
zo$tTuyPCQ+;;Tg;uDDkB^YMX;Z#MjI((<hEe5!o$d)<z5fj`!7J6B)5`~7}KoB!#A
za;feuUc1+;Cvp@QhCh@|%J?e~|NZLu>#O75U$pZ6vC~#oM0)q$YL}OLqecI%E%B}T
z8k^>^CVWx(i$$xyYG~~Lx5NL%1dsi0hZz`{%N=L2HLe0peyx}v|M5=z|L>pbe?9%R
z`SJeM(cjj@ua;jlSO20_eAJJvFZBgBRee7y-n>y?VSQKl|7S0!-fgltefrJnAG7up
z-P;@Se(Rcf_kX;fUKamv-8D_yxV>?IBmPd=`srug{Rf9R<nl~Bh0E2Yq_pkVhuO0l
zT+)i$^6qnmTe8=_7CZm-e}XjDY0U5Z@$UW7tG~9)xxd;aw(9$<lY48#SH9%_{kQ77
zoo4*&_t)273oSjeKK5c={_Crke+iuE+V2$cw`hOdq$gWnf9>*)>-c&o^ZDcK>qUQy
zI_^fr$3$1m`_1wzYmVqkQKj7eJGI9DwN80IkNfL!sp@;}zqPlnR(&n5+iUu7eI3Ww
zZI53af6!KuA;L9%wS3#B4S~DkWp@Alw`SF!-s-zqfw7<d?eu&d{_6jlpvpCCcYW}#
zJ{fm^rI>l#&hT|Xd}p0au5F7w8?+O&xbk)QYn}P`|F?ds78JU-+%Mt8NA0Ko_T=|1
z^8PvRZsL!+Kmpa?g3EsIKU6-$c;mJA>sjVW9$5b)W1Y{YAQp)aAB*IFC@Onarv8r%
zyIi%sDC+C4FW2-v->3BXzyIJ~6uj<9ywmgV_vW36tluPb)ad<n{*!xE{=Z&-P56T=
z|CtJ|S>9o*CttG(-`lV#C;I6OhWsCXNn$&)yWW4k<`&)k^#2-@itMt|gf8(}f2)m_
z|9bzieBbSVHSt+yQ~yT%`TECr#ns6>eQWDx{k__MeSh4$zcGK~-d4Ril3%;%VfdYS
zFGY?v{x9L(o?X9a=P%Xx*_+h<2>!SyQ5&<y?_Ku%^Vi>=_n)4y{J$OlJ@@xt_5S=V
zInEjXB~1IsM9WKIT`Q|Rx&QvJ->E<C>iX+TKL^FMeE9oh_wL-k>tc5%{Q8=>@3gVt
zljGkld;hCG3l1w<Yxv}a{JZGcSNA^u;`cV<%Kz6>t7h(3{qnDq{Y6yz7WcY1J3C%J
zf4hC__6lqCyJvl^`ucjOcx;5)8|5=aQY$uJ-oN4M^5ln~FBx%Isb=)ZPS_>&_h$XC
zt&?B>|B{)1|Lft+nWt_4$p{_$6MU=wkEvwlE9a1t(e<A>Lim2~)U=i3zZSc`?xbCP
z%B75d9R8c?-U@}*?wz<j_wVmnz0JCYc3q#V^rAlWZ@#+i`n#*A_nlF^`1<<p&Mj)1
zdhc7F9kTnqJJ>Y-)#KOt*SPmx+x@rn*TGxHr?lUHiLcvzhk=3F&2bhx<0;nKg}KtN
z<JbNVQ$1w&JN*CG!hiqY|No`G@Av+w*-KOQ{aO>WWY5>*O2?mmEi4n5cb|8A+0K*6
zS^KYUJaTVu_SQr35sPe#{wDpc-oLc?YnF)r^ZieEt>+7^tvNMq_Ad47>sSBNjd^rs
zn^4fBzx!W(fA#n7zm1+HhTeVg!f~6!SBI|&j?a4cerer5)tDE%_JsY8*k!ghHaz^-
z-l&~1Ti&Vf*#A>+-)pYwA6qMHg*4?qRq4jBeSP)y)$@m>S#^!pXB?5zomBkt@~>0>
zcI@j4Po5I?W9?tvvNe-lcOK6dR?$rRdZun&{QA&EGeZ33KXYua{j&Ye7vG|A>Hogr
zcRw0MUA<f3v$*&EvD59plzUHqzI#i*Z2jk&ub^21m(_La%lytPee(PN-~1nYt#Zn4
z{`u;-QsC<QLuFjmMH7}B{HE^lF#DOIk$?Aw(@srH2N^YV*Ump=ER=B8=Fl%6X^v$R
zH_SUEwz26%#r`Y5?~2;3c^kZb-O^uQtKUX>Kb?H^d-_S<`>QY3Uw?awLo~7O*RuB;
zyq6S3Zcfbpe8qgJ_wRG-UhiE0b^TQ(*I)nk2)x_!Xx$$5$NJlZeCxLUnE8Iwj<;9l
z+XgPm`1JMF|F$)E7fr0Jj&k;2|IIJvwq~btO5v;N-1mybMAg@Cv-tX7QShYn`<=RR
zt2=M6e&-$ad-}zs-8`@44|I4xHI>uZ{`>yRwe``f_q$XDeSNXMZq4rECZ<HrX4aU{
z+8LK6*8R<TzyD(7&sVhxtN%yVt&3RtHg0>&9=CnFyFPesJoRGL{}>V3HBawskE=R<
zV!Ougui@*XCjb69_1TxGKR@<V9Dh6UyX*g$Y5B|cChh(!roAO+^`V*@2bbr)`};ZN
z_I2|Yo2Ffq{B>0Jh4=2&`y+Nw=ip$Ovp?|=r*Q1IHM6h2KH+=u@2BUlPJap7{#x?a
z(eQhJBTZ75A3c|`^<LMzz0q4Dtlv+S4t?Pxzxv;L-IK+YMN8cOn5Kk&{j}-1>HCf5
zH@-~zwfn;}1A|FdSGh~rf5};XeShdJt&6YTRw*CkR9$|5a`|7M(APKX@9P$)U#yOP
zazZ;}-g?u;7IEuqm3M8~8+vH#tFKe5u14+Jo3KB8efj?K;@GI|8cH+z-v2VK^Q*DE
z!N9<r?l_CR@fB<N3vc!#kHpj-z27=}*UpTawvk=xUspXY<aV;=6pml?Z~Nc;Uq{Q<
z-|c&`Gw--?^S*2QOykzZZ|w_sy3hLi>(JL)U)KbEzh9!6_1`j-&3t|DV%Mn6H+~21
zTNEbW@Aj`}`poa^H!hqh)SPzoX=3~1wN_hvc89;);JVN3Z~WKf<zK_6IQ?-iFN)fs
zKBxOh?|;|4YkI$rmuszOtvaZGYvawr=C9LV$A-V28vAR1*3qh`9QpdI?Za2S2)e&6
ze)ZI>@1@z{uf<>e-Z8a$k@esBuj!=*({*;ZJ55`+H(?|9ooQ?8HvD&O55IG*Q}oOF
z=%A=C{yNjPuV%4%B4%OFGLwD&&R-92Z48eV+V7k7IsVcq?}$~``LC{8y><HQyZ?lz
zO<%Z6_DAok%FZde`upFn`txVUYl;7_uf9J&d-bPxSFNw@4E^F8GyT{1*Y_8Ftx`OC
zbLI6<CC3;fKVAE~Zhvg~-CNreS~ff}*=@M{?_||~d(~H;JbP!md0W;*_YA2&XZ%C!
z*X({b`D^_e{e!&uho=4dGF|laudP+eo=;Em*IxbC-Cq7$d)Kb7FW1(^t}m}!tNL%(
zdX~)`d?znR2Qz<N_xh%Za$xNJ41cAc9EGp1e*d-O*E`XxCwG1Eop+@E|Nk}XE*h+N
zE#4BYedKL4N6efboJ;1Z^1S=+l(e2@s<vG0zp5W6`~O&0-~2RB?f!$w(`2jHR+qmH
zFRD;l^-grr&KpHFb^G-$-I?xvi|<9MfZXk}@D;fxzxIbdQ86vKw()~gn9=#Yzf3}(
zyq%z+cc_{<<h{1_`f%C6OAG7Py<5wXv^O#?>EnUsKZ?%%vmaH9RLSOlE_2xqnlKfA
zw@3Zdv13`;=SqLg>HdGPm1*{+d3wiB88{nB`X0XY>vxw?beQQIfn%3y)~3vRoVZJt
zv2JS}r*Yi7->d(Mi@&-Q^<`SJ*Rj9idcWiT9+&!GX(#`|d+%%Qoq4~$9{F4S_5Hz2
zGk5XCuC-6T>;Anu;q&EN-=?43<@Y!HVWf4~$>0Bfz4CUedw((Z{_~yD*}s>5(|5^x
z6PT-V`F-7=K%<Ri_V1!{rk;I&=J>^^@PGHGUw!s<!;Qa}_cCs}t6Zg9x3k7;*Iudp
zE|sORwTiZ1)~wneCLOas{Ai7O+}^tNxAwoj{^acT@L%7vr&cM~?eHvpvHsWA1B^N%
z6LLhfd6^j#cSK|eaq+Y)bkvxjCB)Hl<aO@d?4#GCZ+*``8|rn{_t2V&2OdSm`@Hs*
z+*dQ(`_H1gwI%c1{><*}*?TFdrgMK=U$^`JYt<I^og#WAX(q>SH#LT4f1MMs`NMzh
zU)9$6+p8Y2eKoN%e#_>xe9iInuQMxeRQ%dmZoF^uwzKaq?<zZRI^@UEJ>SoK)!K9=
zg;)K{=BM)VxyoB_U#`8jT-JO4_Mk^qRYqUdJ~vsnZh!h|KJGSE?!!x_ywTDNI968A
zxov$@$eg85H}3qP-tKfHef^t*2S9@?IwG8{ObZhgW)y(B>5WN>5*n)9oJ>v*6MDRE
z-8{bc$ldI+Jrh>XZdH19mwm?DANeoL^8?M}*ME&aRpD~UU9SAktt!pe;XI!MPrKEa
zmmj;d?EO<qvo*h`U#k%dsLnd#QehNs#m_1hQtbEO+>D03%Ij^<?swR{zJF=T^uYNX
zZSxZy1#YhSV>$bPV0?+C=xlcDs_wtD;zRj63)uC#I_Aa7{AJ_G+Nve^?gJOcOR<d~
zG!(e^uvo9Iyf1$`@!`YO$JX#iwZHhwDd-t`Kd)S9uVe1cMlV4*i#2_hRi5v!&d9Ax
z4&h^z1I3{-E7L)T4HtA&xH)+iCM*CAm$9@o22AkLOMB{DwR2ses%a=^c$?cMy&W>%
zT{Wi{70ZhLjE%kT{<XJAcgO#))7G)3CuF@pYVd3M>-tMiZiq+inP2^@{ZdVq)`@K2
zY0`C9mcMDVQ$KxR!#}YD&Y5exW&QGdUR_#T^ZZN3>_4}~&DWH>y<29%CpXbc+Rx|e
z3ZsDjXug}Sr604FrUgb;9<H6Y`UGeE)*taY=aYX<soLCize`T;p<CA11u{+SZ?k43
z1<haFKGi5`;w)cBU8gyEho(L3$esFW`$p5q7YnnNuwUg^AkFss7(1hs$P^tBu1>Zy
zjvE4cRJd7L7CKC1(T}aZUA0ha)hpN7kQq}a?U*Whpu+R~g1!Cx7ylfO(qH@Uzv8K!
zw!3$kcADmhpS$$9Zf~T1C-><F@w%OD$0nDq(&VhX-oB!`f78u{`?tOe+&0yMC9dkn
z4Y`;p>ke<V|0y}~&z|S+g{~%he)KL@RABHqe472XsE9tx9;PKxEbPbqzyCX1w5+VA
z`u`IL-LG#{7J1Bh9>3<#4xZz_%O5|xFDLj$Tkrn*S*73p`*Ezyo0T1<Z<d>D`#!?|
zOjtzOjAed0nF{Z`JeP{6`5cjabaI6ZqlU<Y9APewLx&wUUhom+l5A>BT;ZW7sJQ#w
z&b8&oBxaoq3JN(nWpbBWd;90}>tD#9&DwwRe@Dr(t?#-5>p5rC?9{F(y*i=nw9%cV
zs{{RWbAzsLneNc~DM6R>TJA>Pm&v7t<<AqQ<nNz*c@^J{D!(~3Ul$p)Xm|0b9{Hjp
z_B-ua>w)|J-<}5kU6nPlS8OuZ0lm9t*W6{_yQ%Bn)Kd#AwKIQ7UQOPf=H#7wBE|Kl
zR{!iL((jUG8&4~(UC#OV^Jks&9l;B&4!(K+<)f+Vt#@kg8!uT+UHa`sCr{yDfyNIX
z^4LKh)Dm%KWjX>r6vwHR<xoIJjEW#9%b|Ujrmp+3cG9b5+ZDA|t@<4F$*(>C-qriP
zm!o!n+Q02j{7Fl_#PfCWZenjI$IrBtTFPbnWB&i1M$LH6EjLqblDGWY-ctIGQSRBg
zyGQ%(JiW~z{$O{<re??Li+TT-zRM4C?9`gUx%Aaa%M-IMO!%_n-L@;|E8oP|$eeq;
zf8B~r^-t}7M;Bha!mvYsUH1A7e^yl<TBmBk$@Vu`=I;CHwT?o{cet)FU%vlEjqB&N
zM}aS{oBPS_oY|AV>zKzDHFMsm%D7#d*Ue{X@!RHo=LS1yd9jw55N}I!!hr}4F)7Zb
zg)RmGI-(q{PL6^S(LPh7KX294SDET5krdlL@#p;s+vm&WuiIZTPx{Hv_&fXF?x<Zk
zzxL+EGqzjaEq-7XXL@d;(zL6w&3n6_aMV<q&GVfeG=oca@#gNmE1S1|b@|8sw0W`a
zi`LC|Z$$b(`6_ji)#<RN`@uT4mtRb|CGOp77I6G8_xHM8_=nl&=ZIwg3s9JNO3d#{
z!kRgD6aSo>aJswJ+3!nj;$la!h?WZ>4lIAeKWx&QTQjAPZ+ph2?`dIsk4>$<qt%q6
zFh6yw=yo=V{VMsk`3igt3{0CGXK^rbvDPlk51;=3{j8n)|5pF{`a@l5^Vj!R_pcB8
zvHxoO&)<Jz-aiSm|0}Z5``S9Ib?4q+-0*H?_V1rh_x+2X8vSo-L~Yd;<;RxcyQc0G
zOufi`HEZT^-Kq7G;eGM(>-NX&{##xDZEv~i%B<aSul0US?W=QH8M;5dY;F0iB~fa}
z^i~wz+FP_PWB19Qzssz*Ok3lj{eJb={Hv30ZRLp67ukEEwR2kbx~uzkcg|Z|^*3&B
z)!XV1Q#OAy-Z=fE_nm!frg!|9splh_vhR1-%edux!{Yp3JuI&IT<n{+r(ojWf<OO$
zyexNG_a|0mM&{|Noy&K}?ep4yZT)qvJ<j)62in#D{_|tSyZ@i^w!RL1^|ky})a$5@
zdf{JNtG>+ft$O-1O+-erY;UT0+xA_X&Hul?-)^;Wze$(l|2gZ`c%?R^2C9GSDD}?1
z>wSNH$=ua{_N#lEX_x<eXB)lzYm@h1(bd=akEA9t-*|EBm-78d>!)Ae|F?Q==|`@M
z|334i@1OHp?y&gN{TKSCJ-J(@etq|T*VUh9m;cgkzW?On&W{x;T55CWhTc6{bhk=9
zcG0Uni^M&vbXK1aVR_NcAHOy0Yty=TmKJ{3)ql&nwZ3-k4bop*x4FFQXw9tMUn}#!
zgsgwiz2tgXP~OwsUt(_m(hr;Zy*xaw<M@M~-Mw@CF1>$xx-{czxcTboMNvXiYlGU)
zrADPm*cleT`oCy@?dtHgfi~BFzA<<5os{d#WI6r$-=sIf$CMQ0dcU5oPw1{H@Xk7%
z(DysCyo`IrIR)EH$&2B)_VV5Q8vbkFy;m2N*4uMAw`JIWzZ)C*`lqkd^s4veKh@5%
z{87Cq(sU+u|KA@69H)G$x-#R=jrp&yXM7Fo6}|r|Abiu__p|q1-KGBZrH$b1c{lX+
zPQAN$6SNSU^VhOfOM|bzx^%y^s7fug7Svn!aGb@_q{Uj)@%qBFN$;Mls*l=xb$873
z+W1eUrLRR}&ioQQS^9c<>8poT8_M?HnYZq^?e5jC$*UY?f2qXk#wpDDx+ZA<$&>3p
zPp$d1$+Pm!Z^PHz?>F2_S?bJi=T92bL9u_*y)Q4UQ`!E?>Zoi>^vdfoEDlHJ>MhEP
zk6Q1t{%XS+&D{|z!nEJ5SsiXSJ$_}@*PmRsz8<u#daA#=cD3*8>#wKyudQ1fx6%Ln
zM-yw&@0E7@Utib!8yVxXZ>xIu<eF9Qzb5_7@_zsI^{)NbmOgs=GXCr8uTv}3r)zw=
zpR~8=k8rlSJKL{$*ELO}<;7Lk-94SnUT+#(S#)uI_Vg#K-rq^s_5bAU+BMSmS*+EM
z-dwutpHEe3tM~KuJ>O5-1iaZ{==JsW*43ZNGb7Zu%(FPVh=2Nn_VZ7Fo!WDEX8l3l
z$Y&hOv+}J)-c|jRSz6n4X+FQ*!q@Bn|5Dk_5@utY^pf-A|65z?XP3Oc9zV@lC{g|I
z**2G^@3-{iCrsaWUH{tZE3M}1cJKTj*YhD!oAK821r}~c-5;*#(f+@)w6TAF_K^tA
z#yJi-TfW7={QN5Pw)c}=U%UPmyv_1(-wIw&nYDA(yzI!jB*n8^pX!-ZUY{{()0KL@
zFK$gb*QM<?y}bPQC%5+hNycJTU&6|gBU0w?yPx(yT>Dx=ePrz2YyZB!dJ;O}<mLL7
zZWFIwpXVGqqPxrAL_V8tGvfi5zxbayDa<hp@>5Qw`0Q_TK73N|ogK%A=Y|e9W(&^0
z+J1HQHT^@?`^)1LCVjs>wQJwehet2f6dV5Ry7!;O_TKJ40xuVruhuk~UO#VTUoUgZ
zi~3I&_4WVP&vxE_a=K}uUH3}Q*ZaQKFS3eW{kLFyd05}m3Kc`Q;QUL~CqGqaXq~hQ
zS<~^ga!<?Y|K_F|^Xk98_`CgX+wHjjO-i=E{(Q}A@v2gv|KCDU<NyEo^>soflO?B~
z_OD;Gr}f{TBju-GyubCe?{E3t*q^U@O=kb!y<h)siNLnqVeehyH;VbB)K9EG96PgD
zf5S2PqXLOFTi8uFp8Pr(djHv<gpXG>8YRBO3O}l8tyQYJvcvsQdD)poTJK)|UcK)f
z=q!VmHi8`<KTrLQig$`^*>d&&ukcs%;;ycH_O&Ty-?Ih=2BtE{S)5E>ti^Zkwd{X=
z`J=se{J&i%U;h99fAtr|?+4ds&8+(Wzw`am$=6n2UsL|_ZPKdLb=S^az5Y7%^_uYT
zm{sR1ZL{_NpLY2kuD(XLNvTKtJYR)e=jVSOuk}o9|7Coxwf~y1Z>^BF=*jvui)&L=
zPrqJ$ZN<8GtdTqBUA<l}oV-)2M!DnPtIpl8CQsk_ZpXZ<C-%op&;FX~dcL@7W8Av3
zy`Md^4fWT^>&Cv@dN*;-b&aLdeyM+bANsXw!>_OL#lMb*YL|X5b^WzJ`|_{v&(|+-
z7X7<AFXqyHec@ZLywt-*Z+)4!K5XlqdENS@UEY7IYyUT8<p`Bu)4wRQKCEuz{nh1R
z+utTkE?vE3a%b26>!G#Z!*%a$eJyfty<p4t=c^4bZTi^1-o0(olimBLdB0owZm;BH
z?!P}g`ll-&en0(G>`m==d%tUQpS^O3c}Ms9h^5t^D{Hpg<d}EwT<9OG*&X#^+NaMg
zijG)+!tJx@pXUj+`TrNy%)YjI_OYj5Ppwn>{mx&n_x0K8f9us3fBmIex4&ZTt)sf@
z!&fDLID0YH&&lNaPur~byT7>AwQl^8*IwoPa$2|CldVnhZhk*@>1<u~tNhhhU3=At
zS)Z!^&sK}io}RwR#O3k-^FaZxG{oO^|N0yC;Nf?ThUM?}3Ln|WQSQBC{gTN4;k(!P
zMt?2PJ}Bor<>~tKAs6{=>wl>4efPcjYx?t5f(rlt+_@6^I_U1}pRQt0l=JV`dD*YO
z7_t1-dKH_}_}5oo9e=O-?#Wl9x*f0X-oO9q@5TDR6IHf9+^fERy71Bu?x#D{wGU5S
z;>h?hvi!-eeXq^t{nFVVwvsz;cig9xudDW7l=F}Jo8=$3f`NhQu;VPwCeR8g!>u*3
zXJ7qaeX=;VD123kFX#TackAlo>$Zl6uf6^?#rNO3)!`aJ_t$%da=f1QKJ3l<-Jd7c
z+5X#`wQl`ZBmdgfU$v*cj$a?X`tDlK*B8ZqzDn%gf23sXMf2>rp=*Mx_RQO|de;{B
z4CjCSnz>nOC1I{nJM~wGublksZ^WOwf|LI2_Wif^*HhuFqlLk@?yNqdU-4dK`^}mg
ze_E1~W{OPj*dH0b`o??FPe+UX+<m;iZtu_SN^`3<PI_Mo+xu(7uY+omceZC=s@gMe
zWBlq1$BUNNUT>;Z+cL|i^M~Ss-tT{IrSq_fU+GxzNA<Ci<LrCi4~Hx-&DQ(<`AYr&
z&-b&%O@&{FmOkC!@BG7H#^Xq#<Gvg8elaewdT{spY@Xe}CvE@fIs4PU?)bKx)%Eiw
zxBn<sfAxaR_iya4y@%}A1o8bXTj%{mW#@Wzo=Ce_|K00;7bTgoP0WkEXtYl6(W^%_
zJIYkKIsPa*Xx8#29GD`&IpM*7v*ZtVW`s|F{qNuI+2`XoSd`8C8+>8i+U%YBU$f43
ze3)rj`S9*^72_MLx%~a+KNmZ{Ve$HH@;_Yy@@CprZ+-FCdu4fC*Rx$GJRWX}E)W0d
zA39_I{!P|#|0DJb_?aYky7<@an|@F*>)-4n{vCRs*6(|DG+)*+Tkh@mj%}wNi+#Si
z(L(zD$$5p%fA3E_Wb*gN_wZ9YL)Ywa-<dP*?)$4<pK|z&#UJzqM;*PnC2jBh^4fo#
zT$@%__fP4!obr!zXZA#8yH#hO?fn)}^Im%YtAM)BtF_++U&NkV<Je`|ADq9RC3W}R
zABz2%KX$vU+EjdL-5=E_H3gMFZT{bD@LRV3;=8(aWi46WU0-6u*U5hWb?WbA^?8r<
zztnBKu~^Bo_M&b5>x)|JzZUNLBqn!l-|Oq)ucyBb`eiKoI_SyuMOUV6oqYV?TG92_
z^_OOUm0hw^f7kwtQlQ<IGc>}uI45-lESRAo%Ei*6Qn7l^p6AXg%VMXr`h-mqZixQM
ze$;eb^F#jlk9EpntD1AKey}k78yx;$eACxknal3StWBA(tE^eS{jt>I2m32!*$;o~
z_0B)pbg1r2q2tPZ<xj3Y>$YdV%^muo!J6wq*|%qtPE=3+oG!cK?X}Fi+TU#dsI68$
zdbs{n{5S6F$IGQcwtd*J_v+f}%zeLfTs}Vdzk~1oiCrf5XZ|lUn4k5};BE7>e@hM7
z1ztt)ZaTvsw8ybEcdK2-L$6&E{|lenC38F3MfpuTLjh<Psh0?6t5c$*;RPKP!&aU{
z4jmp!LL8lSpW<!PUWLYbt@UZWv@FP@oq2Leym!Ta?(LuN&;B64YV*IKyWSqL{i#v=
zgtMmVo;7u6{%g;_f9g~2PuA8V!fW5WvHa-2i!Jx+&N*|kUWmrsjdRU)(UzD0m-xA!
zpSiikG_LdYpNonUW25VL*|w_fn@}&?ko>WKYO(jZ1uYLwmfin#c=EsO--mKORxMnq
zxXj?%r~64x5vLu!mn9b1xH#W(XJk(Lsu^*<w<z|?>J6&Vzt2ASR@EWM<sr0qNASMP
zKU<nXizOy#i72%;H6|>G2ryG(b!tvn;GrQ3TCSux!zJ&vzOb6x8@*7^kg0}ct5+^u
z`bRo%(qH|@_r>r3PQSXhw#p(*_0E&B9ozy(SLW|qe!F+>jk~YdT;@)Hs<->O315WB
zwtw#V)}Pw0-Yu#9aR2RhnV_9hl3h60NA_1LR35T=Bk+7(PU!wN(QQ+H`9GiSCcVeG
zKk;jLtj;N~$;X%Pc)s^X{O<E7L`7x)H9xG~BH`Q_w@v>_=-<^Gy4*kCJ6`MAc4dZW
z)rsj5bw?N6D@|Fx;nC&mRa2d!XS3~*<L9to)ClR&5>e&^jVT2ffcA?v2P~MPQ7SOy
z(Br+uyJt+boVqkdYpKtal`Fkx$*=R{&v~*Z<>&i4zj<+)7qW$JtDlOS`nq33-}dhO
zVB6oDcIRb<JCt$!x3uHxestG8PODAkO5Z<SA?^z4#Y<JUACvm<e5?MhxHTMmjc0DO
zwR_|tP;~RRz`mmB3Gd5{+1gAp|0^`#f6vD7JA1<4>F28Uf7?;^?LiyEj^lmJ?`LQ4
zUq5B*WmogZzZB<QI$YAS^4_YvlBcej+@2S$g}<I#dH;oC?Y=?_P>jv|+vC>fF*9HF
zvT!WZjB?qsS9O1KBiGNIe97_lr4M^K11>C^{@N;khGS{vOJ&Wap||}zgmxDOwI${(
zmGBTQnB9{&m*vod=6yXc%O}i9pECF2QjSTU(c6~E_em6-tDJvaYiZ%cwUKMe6ShT7
zPN~{3^TyK78A0c|^Y(vlVr6adIWU3!^uF5C)8=V4OsSqpca|;U^WjX;%!yp4t-f;O
zT7f;{HXG)>T7Tf(w{H*2?qzMwJ+tJ2Oiy8A-;%d-;>?czp^MiyzSveYS^v&{Q?1$4
zwk$Y$?%#<Fy}gpGA7u6yB$*jZkbdL#AmK;>qs)~lDxzG1P6q=vPS6pNYHeywSg=4x
zm1E|6KjYYwXYLiA>kXW+(q*lZ1%Ln3+mAn;UEWzW=hY+GqXixBGHc5p&y(`%Q}xxq
z`@Qbk=0}<S58wU}<==I#X#0<}y*07iyd3_v8+d=^Z+la;m`VDto#Gw#C%bm;OPTJi
zxliDH`8|_uD_=A+c&RcQAKvY=$LythP0616Um5z^-`>><nncW*6r<u}l6L56!_6&>
zrwa|@nDtC9?hecKFf?1ZCB(r{K7N&6N{2Rgl*r;T-`(%r%$JoYCvH+(r)sUKvOPwI
zQAMOfBbAG_rLke-L>^EMe5lYF8Ln-)HrP0{<aLSQM7O0b4{~l;)Yd*Mkox`XPfXF%
z0N?9xckSSg{{Q67husT}{;K$C+`M~@i6>-U*KXc>t$FtE>ds1pFE9Uh_e$@@V}GXb
zAC9~5VD}Y|EwZ1#C+g?#|FbB-$wlQ+*|a^||F70axoaHxd;Zla!A;M+mm8dGFrED`
z`P!qX#mk=T<7b!8N~@Yv`Ltr+svH4%l?G>~#JJo{A$B>&_l}*e-0H5H8p^j$xu0k?
zT-=-DopQCHfPsNY%W)PLQx|LP^@Tdzm-ftCAGg(XefawL)l*+jxh?!zTYmL?!B~CY
zS1<o&@2rizduwaeH|-rF*`dw*ujzd}Z56-X`}Iq9{nZB*-EQ8${$|b$@AYB3vSj>M
zcgF3HEb$M2{c6|l9UtSjdiVazn)hRMb6<Si+p4{#zrGiS>#z6ZKUupze7*ncTZ=6J
zrdhw+8~=61+@Fs!c7M(Onk;(y#m4X^Vcp#mLwxW2+bWv9vS#umQ`7p@+8xt({GNQB
zd)N1~d4IE%tDn#3{ChELZ`IzS^(^Mna}HRocbWcX>Ak&0e{KdU7loF-`oCtsOXbdY
zl_x)m{5=x$Z?C=S-=MWFRYy7Y^#0d7w|r*m*UUdpV~yTVJ{%cwrIP(`tnJl_uYX-#
zzhPd6=~}+OMZ4eajobfxpHH~@;@4k1gTJni4ZXhlev_Y7bICp5obBrCmMjhL{JQ7f
z2H%oj*ERQi{ctKGAnMJus{K*lquzh*TG0M=MZaz8?)a@u@j5jM_x<*4d%H>R;pC~V
zuYddt{qvuhC*aPb_%G@9ag(oWYW?~v`uf_slc9J2aq9i`O<(_T%D4UVRz7TbWB=-G
z{QZncf4;v+{kK+3c+;2sO~Doi|0VP9x7#bu{Ut;1obJ-Q_V=F%&Jx>o@m_mwdHHtz
zJyY+`kDs)jH~ztdizn}E?fkPT{+Dw6=6B!UtKKoL>$@12tXm(WeYI=Xm$J9Qp*xi3
z^Cf?Oaclb3^PNsvN0YW@ulsm!{q_CfY&-79?AcnWz`(#P<T#6~X%%aE>5J&8ukW6$
z7rbb{+Vgenzm4&$cdz~%wKwK_xP9=|SI<jdi&y=ae|7%ax~=MKLXX^CeSJ;%tM`X%
zt~lEscRw0v{5w4M>UH6|z4vQ%qc*<V8@D5TUFhqJc01!&x4N8t{hej|>(JNJU&pU^
z`*qy+$%an<*F5V&Pd?jv`s<6?Z==7~U0-CG{<2D~GwxOR?y&H+p+-l49S!jXZ8Z3@
zrmo{_dHIpkZFN)TTzc}a>r&PB(7Sz4gzNT(*Kw{1{Br)JOU3!C^Vf)a*X{Oy^>VM^
z@~<ztS!!<;Z2BM8^EK19Xo7oP(o3<f|6!|Rb{B2=bbs&DO;t~;buX=6@@V<j*ZZ%n
z`Y+^X?>KRL?VWwynzc#m!~Ryi+bgu_?{xh?LPZk&?pb?d>vB$?7O!7q@&2c2WNp&y
zS=r$opX4m~tbca&`W*ip@HA=9l$Tmkn=VN1P5poE&rY74`Y@HRT|ahzt+dUmXEAxX
z!0*qV{a@VI>m1XceyAkp-kekQPVpbr7ERZ${pN9U^MA90y)$<Rh@X}4&+m=26}hzh
zQSt%?>l136?H3ak`}2Dq);?S_!DUCG&HSf7(rb2~{<rl<Q)<%pwJ-MVPgq@YX*xf9
z3lo>-u^h)kd(^^SWG*~W|LKOZ;sozUzIka}r&~1m<PW<$rgi-&PduM7RW{4zuk57z
z&z0>yZj$p?v&npYJ+`YyI^&CvdHtO|Z(kpBQK`+WIl4ge#!n8D89%wd{t}v*m;E<Y
zeO;hj$N$yy*M>LQMH+SMzkl`oLBIO%&jRz;JH_vNeNbLbYs$>fYQx>U8T<C_pKX@)
zwKmE+?A6tl*s7hszRcq|8}%yM@>5UuE3R$aSHJ%fc>JKlr$n?W|Del;-wU7q+Wy3}
zD@xy0Lv{aA@r<tdTKs2?<!-;fx;k{FZmQsmxpkfE&qSAf6H=F!6{`QKyu@qQ_w!!+
z{<GfMo?abUfBSV%$^G}n@)xV#M(vn?(Z4F@>#GOzdhf4p&zoF-cJI^Qe}qN;?}~~2
zxAxaj_4WQQ+{?pSwjT}sw)gI>@1?J=zZRL-mEam%W&HlC)G7uBCMCyN+)Srfix#h+
zy?ow3{r{7{sQwQ7zyHdC|NH;{{`LE(b-4Bar+>57@4I?>*+%a>P1Bdfy`Q-+HUC%s
z%$S!`r@y`?6!rf5>uZ{Mzuzwpn-G3!ZN&Q-f3x<ACCz=l{*tNfglS8pe;uqz+3j6_
zbyC#cSmF2oA6dn7K3Tp#*0V15m&)Fk_nmfsDPxh{d1-Ib-#De$*Y=0^&W&o-?%2I+
zYxg6oT$|dDMJ#*v+MDm3;a&9o*6E$KvFfYqcGj)&Ew1`rdw+dhlG*&PdA}$Ba|vH(
zq<&5R`dvZ&l|g$2Pwli#z5DBGV$XfubBfjL<5o}lujtLq{4RS|>brOOaSLbW>s?!Q
z_54Bc@Yi2$^%sRkX_s-Vue<(Y(!A7Fi`M14AKLR-{G37W!cXzfl}_cZ{8V-Q^}VR4
z5zevtPIgtBR|eMKlrJrRu`zt|iU$$paeH6wJ+)UzyW?wWCFilm8rSpOXJ737_517l
z>r1{~na8nl;_auPv#2K2b=_YryLdxfuXy?Em3vd|*ZaOIdG|MAb=h_G_1?viAKm_1
z|G%z((K~!``Rj}3>g$B=7yr^<^7YjhkEOf%_q$hJtv+P;$Y%Cs!PgR#?>0`ZJ^i5n
z^u5I%S=V2@|8@1W>h%ee?_O`xbde78eI0buvQ2aH)yq})R&1@Fza@Ol)TXA`!|_-D
z?Jes#TtBIh(d+E3gOeZaUwyM}Q;hrC<e9<cHJaMttB(Zi+W+eRn%$FLUpf2hpx~~f
zDhv!v)sC~cn_jWjd-%l5t*cvOqxq-Fq`Q87dHApEudbH97MZucZe`b&xc58RJ!gL{
zFMoCV>x$KXc6-;Y)mXnZ&ARMu#QKPJFUnti?aE#se))6Ev^ABl&R^Yo?a06NzpiH;
zxwo}$>#e<2yP}uop00ZxUh4X7ecXEemGcE(Pk%lAb){`;+16jz+g68e)%TA5w)NN1
zQ!CxGW52BnJ^5PX_L?Q1t_H|f?R>YcTxHei=Wnx?zu(9IKm6tObsxDk7G=Hs<E0~e
zHe+Jww@vSlhOh5<Iep{Z{%gCc_Jk|6?vHvKw>R~N;-_mCKbJ1|tgZf<^!#t!-uOdx
z|CHWld3T6xeRcimy6#tBpX{=Yndg^wf1Ui=uuY$m_Uu3T`s!;<-J(U6|L?E$em(tB
z8JoT3{=aWOotSg~Yr@|6t=(Vq{>QAo{*|kCb!E}7FV}?jO}&x(N3cA%(e~qqMnk#1
zKVO~Ae}DCd;y=&d`@`NwDW8wmIOw2uTWC}1p+fJN>qS+2Z+!`$)8)QbUwzRg|FUI<
ze*{DGwLV|@u>WNDYVXolN8H!P-d`U+E9Q6S)_3pU%WwY^zE`#`nrYtjS4XxBmVWxR
zf7M&vNiSpeN#A>Qp!vl<`Tsn(dm67#f6l_#)5Q5V>d%zQm0vZzU&l{B>lE{C*YmJx
zH_J<32mR#LKbi4v?L~!Y*Yq#OU7uzj_S9nDm;G*2=5hGlH2$+aZttzHzu!IkIz4G;
z%}bMbo*Q3dGX8IuQg{=4`|bXRo%VCPq>gR)|89@i6T^j1moMMBcXD~S>f)o}LD$#B
z-aYxC<Er!b^!0sPN<RJluf7PJ%GNunnru55FDf?g>E~DV^;@n4?Y!r&|ENAjP5u4<
zLb1;GSJyGhg-p>f;^J&+Y*-NSqLqucX|Y0Q>^txKa$3H<$&r;=Q^O<0#e<z^2j@l0
z@7r-H|9q>=!m7~bh5ucr#dB{<_uu_z+sZWmMOleq+dThGRN~t-dBY`Z{yXa!Shl81
zUBA04V}-o@w>!IDJ@@$3zijcR@7XRo{z1`G|4)C`w=3-`=UT2l(dzXF)+NnvZ`R+P
z7<BXB(E~T9Z{+a0qMfzm>W|hrdONgzckiflJznn~9yxz=+x0WyE;_fkYHPW=P9C~e
z`+M!39`$$gs_o5p6>jtq(wB&ed0tS!C^hq+bV1AMj4tmBs>1opbXXPBX0R(rJ-Kc*
zTfD;F#YUaA$@|Kt1lQw{&qFq^*t&jtWP$K`^R)H+Tg|??*VS4Swbf1i?s?)@a@*cl
zrBf&8Jy_fF^!0?sW{H?I9r5|u?31;mvQ`zSWC=WKf4m~yCrW1Xa=DX1e6QY1>NKD1
z`}KO&AJId_drQMJ?j}kkdkZX<&$xV0RI==&Rx!^uh9&Q{c)xfTrRHj1WIwAQ#dT<A
zUz32ql?m?{yk1NVEpUtzW3t^6rY4y#^mwUIjTFxYTfK_Xi(=JZe0^{B8J?6oe%I>N
zeB<D$!WIu0b!PtamC%`_DU{m%C^Y0cvuwH0?wp$H+v&TkRj2&8J;!Lh#MMJZ$`>Xj
z3EZ9+-*7*hk)_OYbG_dL#?YIJ=MJW=t?>PK#?3%@W7~qV7@z-s=NEOe2nyR3-e;b9
z<xKc;@jWd+-rC(0?`Z#>bmr2TyKh2*E}Ohn3eINsm%6suNsUwSzTrs&`?>9<2c=AM
z^43ZAtX-xkYPso>*_#`EQ#W*o?(pOfm)ZGLsz6J*Y)4$JL&Ji*e7XF|PKOm9Flt=s
z$PxnenF2N>fYyjG9SWG}q9w@L;+7<=mAAZZ&)J7Y@0QP;)O~Z3%C5P5|0I9fTI$y8
zf7xDM82oeV<Aa-S$JfWtUZ?g~Y;N7v(=MNT-*yKbZkhkJx3|JRJDoo;x^&H=zndPM
zSZwNH;>#r{6SUai)Z{mT8S8iZ-OJtf-ym&CiF(wbOGh_~MH`45td{#FcTxH5ckQnN
zOIp)IzO&{P3HV**=2rgBcz{t&WQvLi=wPZw0}mY)Zb4=yMU4(8C9W1H$AB3w$Hc3y
zS6BU-r>(Z>x%pGkpq^O0pkI7-vnRLrF1a%=`Y*d{{=#{^NB``Xn)III(-MzM>S-@;
zt*JUI_T<d9w$%ntSteVRTlL-jV=%QNWuB>CgrWD7(gja97cZIk?D&G0g;7^zo6DK<
z^j7kGHV=robYi){Bgu6(t{T70c_&SMyYO$hwrD_T^~$t}vOMnh;Va7A7|z~dXH*jb
zg>D;5B4`DU2{$K`qrrp#6-7Z7Cr5)W>DT*q&fk82_uB3~JCCu7?3wg^S^NC&+Uv`e
z<LqVYKURMHw8|u4cF~`eHT947UXJ@x{J5x;r@oOhI`F6X{htPw_g!wK_rCt{>C#!f
z&GM%YbiMxcJLyqki<gA)>ZkX=PI|j??+)9QlLB@Iy>^v<to|d~J8<zg*&CZqN0vVO
zSpP1yM*H`(NUcC^2~n??7gfa<Whkep7@u13fKed?boLn^OH-mnM246M%OOXH2~*UB
zI9r?&9Y81baZKG@T=%cg==-8sJ12!2y?W&pb^M2Qd;ap1KYvx*t&v|*&seRM*m7N7
zd`8Xg<K-a}Zl6_se%$Y9N2%7=jJFHB67^5)DK1srFLnCI;V>c7QqftQr95-5czyWW
zYkz*%nKLemvY+4Rs-*w_>3*#6tANRd!#Azoh3syPiS^Db@pZm8`J}H<!>*e-@1pOD
zh%o&y-JdBk;WX>jze_<~X2utevv`<6YYuYn-rD-<+s^vB_2KHPWlxr0`tdvNZ;<|K
zQ5Ku1&0q7bZ~Ff?YJb*|`B#7K{q?kN`=2{EZhfu#T3YqqdAICZ@3#Nz>$a~Ey!w0h
zv7+U#!(U&0t$+2T{?{V^7&Y15Wt&z<-EUg8r}h7@eZOZt3ArnL{>O{?;kDXUnsF=o
zq$hcA&DxpgAGR*_(E9m1^*n#SjoAMxOzU@;*8hmr*Vnzi9-4hQ_LoZh>d@~Md*e&{
zkA&K<3)X)1{OhSL`=d|4da`e??cIMnW7q9I{eOqw>Zc#eWwcjMz8Sk_U+4YR(_Lb|
zXUF+$7k%<%&2-g&U7zn~2h~=cTDLWB&i?w{vSz0569sBB1*;CKZ{jtn^%u^*{=N46
z`?;011rzVb?TlY77<%k?uZ-Ou*DaDv3>PM6d6&mY_3}51M`upWzkBEM>DKIkx8HAF
zpRd09zQ;!IS5Llet=sfhSnFdH=S2RtiS>4WHXA0d{+sky{(r`F%dj0^SL%PQ{t}aa
z_1Tw9|5r!0|N46O_lx6^_U_YD->ts-^!nDKc(<v)bYidmtCW7~o8s}#d-de2`@{B!
z?QW^fGQYn1>PP=yd#k=yJ&o(Szb1B#Nc{gvPd;B+)O7gIw5|8;vrgJ_z32_t7VaJT
zD$)D3cxh49)yePwe@UOmp{c;Y!1&B@7EiMlYv~K`xn47DUQgMsdi7-XMX|lNwpMMu
zrCXjPzE)^cP5qaxr%f+B-CK2K-kR0ntN%uS&E8tHwt7*RPVt>-ua;)IX@3v>^|kWU
zed~*-GN%7hW_^8seZc$stCRPKuZ~^q@=<<GP}SGGz4oiyU%&fP8nxEjv-aexS68c)
z9_jf_-}dWk)yDG7yVWnN)>dDwuK%~DZsX}iVtUut6up-H{WW_=*N)kdm#V%-ef_fb
z))v>3udBYqdcT+&wJx-N-J-uq`-4=wZi_#^ZmBr$>+A6Bm9<&=SD(z=b8SgNlEgn>
z_1*i_GTi*5{#M0D++P#8Z)17f&hRzyt53eZZ~ALVWXk@?y{SL9$6uZP<jnrBRbLDD
z9}3yBGDu;m=(MN$f3yA`t2%hcecILZ<NxF11#fN1y}v$wk@vo5`v1?Ky`@{WE`Gh6
z_>cYTW8)W<XKY#@vE=KNd298(uNK{Z-CFffe_i|<K{2J2ZSRx+T#|P*{C;15*LPpl
zlUbjC-T!>$Xxjg}t->jHwZ0yk_xkGli&5TjvoA`9$8{x^bq3w7QY|;1<Lf**{QuT<
zS6{u@|KEJg`csP8as8{mu2`Ltx+-q--PqXJ()$^I>(<zH9GJINeNA}%n#HEBd9T(V
zln-6~vifV`<<*_#vVz(52gTy|9$;W#{OCA~ml<?@vrgSg;jh(SU%s=xzGO?><^QX%
zyqL5;LiOvXz13e|?=SscbZhh1?5(f9Cao9Q`s&Bhzmd0&s;@6wy7AQPy5(M9-*?Ww
z7%}bLf0tLM-&P-zzrJK^{q-d)i*~>NdMaw`N2BTY?{BTYzUm2RF;MlQ(05l4eO(iD
z^w*W?s<HAH7VW&hWzDpu#XHv~O}@4!=txZ1)vE1TckkY7(RKCP8eX@dYv<a83FXCK
z*LO_%9W8Vxetq4B`1NJ$xQj}5tqs%M74f}xUC8$H@rE0h^?%K*nKI?m^|j?Umn_X+
zI`!ArLZSPwgW}6y9j#Ies?rOJ+MxTrZr^14Ta&MAbkF+q|K8U7uMWQp>7Oq@8FYkG
zVOh7{vrm&14u-$x5w<^G^XqP0et7s=-`5-JIxkggZQt+u?&9v<pT1oBq5Sy$`UkVa
zt-fskWfxSX?E5-2ef_js-&I%6-kJBeDtp5I{i~OKZHj)kH*4Lyx3{l9=-G4Qi2EXM
zS)G+}9rss<FS1lU^6SgBuICS0HZBPI;QgXMuIuXStugU?->wfYzh4xg{5jnG`TARS
z`=Z}}aOG9B__y?X^}oG$f9<RiezZM1{F#9FtLI;8l+5S7341;Hs(AdGjoxva-lm5C
zGIfc)JN@L}too><)(bDadK)i#_51y;g4+dmfhPaoUtPz*!1&y87H@MFX!O&vZezUB
z_lmyk?ys-DzSNCKU!~Pnx2UrI?w`*eMPIMX%MUpl6S_KlP1V=rty7EsZLIMLUmLRH
z>HeTMwc4k%{(i6VFL$|mea*T@dab%ud%2G0Ut9HLYUG_i_g7zE>+;()$fIfJU)gup
zR$p7KTC_0!_4M+ypX=7Eua;jE`&awA;IAvL$9_GHTPMH1ZbSL=f1Z)sj(_#*vYxg&
zvTWD)s7b57zPcK<PO<*!zfFI=_^<2V-F7{Gdap^=lSkH(%f4oH&nY^!YL~#?wdHYp
z?_B?xF@tOO?zp#JlfBke{NEeD>eSw-ze&AYt;*~6)@}M*{k8gP_12ep)$gj;tC<Lu
z{+V{^yw%aVy|=zreGmGQ`n_(A@9WlITlQ_e_4L>G*WuZsG50pjdVMXl^g(-6)Zf=C
z;_K_y@YeoYulp}s_3qjFO-py??a@Cwg-^c2INnL`RQq$K<+^t7cOS`ref4B+T=w6j
z>Gx)GM%D^(>UBQ-uYY~DTh*)jsQDKoPX7Go^z_sH+SQ?}L)W`h9?_{&U4Qbq)<28p
zGEU3ltEIKy<2(}U`LFLRU9<h!_0=((Zt3j~oAlPU$<J*6)K^cVzT_`5c{I_*zBVfC
z-Mi;skN=+h@AcY$dw+dD{p<USc2Cn8LFebDeEZ+IbJMHm4UAed|M{qS%vfcl;-|FP
z>VbP_#mrav3r-)g%lsTskurJSw>Q5uuAOnSxl-D=g^NY4R>{bB)_;y)6SkL0y}fC=
zxaha^MYoI)2AhSpQ9>(E>^}J5iut}}Kj(>W6gu9x<CU<SzHFZMi8Y*!dv>l!$-Z+m
za6!Y3Nf%S2zjPQbu$&+@Vco9ID<PVzjn^-_XmS0+!@S}@k5@(=c^I*3CX0ioz^8xq
zE_wE!k56weDVpD&sdv>d_Rz2Ex0|)STerVmzH8}evmf2Pta)lR3sR($N)8n;DqNYM
zBErcDx<oEO$AgQvX`!P=fJz{!{iM)&-zIDK{@WjW?ws57e!g1hDe1?i9`oEgcOUzi
zeJ!@;L)zcnyO{n>Oj_}vJom#+mA4z;cSvdQA9)n<O@ZY~bN07dp|;!IF%4|p$EUtp
z=4j^Ye05*(cOH(A+P0WeD*F}`o8PzJ>i)&zXHwA9+h+ap|EIl)y#K9}v-)nUG3%AD
zkM~Ii?Ej!CS2APq#~sE3b57j12-&o)<PoT$I>ALmnvJO?VL^-zXsWVBQNqJZM5ra;
z)YX(fzeB^Mv>v@SHJUy>JZRl6neG*@KW?{E_utJ{FZFoW<yBYW44&J~zw*_7{i_wy
zZS#KqG+#TN!RmhK-w-WFw{`c`d(z}8)9m7JENlI(QorG(ykm3lvsV>yQzqUE&H1}(
z=e2}~hj*+sWiRw|J^ias^HubtV11?+YjeK)HSCeEOb?$Jqpa(p`FlZptGT+Q$2x{N
zixgLSmohgn>WEBn5#{7`df2ccz(<3Nx9OpiK!}nNXG_yUgU&m}*M8pbzrJf<YU$&s
z=WBW%>#L?N3Yo)R?i}3r&foiY>Gyf}Kj(1laOIu1YW1^OL393#{$AF>=Xj~~)Nzq2
z>s;Fl_LF)y&XfLCqIUk?x#+nbcazPsSS&O%qhfQnS)4rF&t&oGoMPtgri910{G$)Q
zSrIUAGhax}6rE@6JHCBVnsqbPW%8sM>?Lbi0xc(NmZUmmn=m&pYKTnHP~vK7I@qva
zhK86_E8D|>86H|9934%G0xNv9ggNF~?)mod=kKejp{rgVbBzi)STu3LyL;yA_~wa!
z_CNPCZl(Gq>xr{Z^=<P%^nRJ>$H(;xr9Z5j@@1a?qrYxvGG1F=y?y!r{p*FrH(tk1
zy>UAD+SlaP-FnxOUv{xfW_qEqe9`BSy}uT=X{s~KzIi^J<u!lJnz>cFCP^K>3$}08
z&x|`^p)d2@+TaOq%kxaPKTJ<fuV1v_0jTSxA;iV$bg*GV0O(8=F3^>bDk_{UZHWOh
zTGRv$r)t&BE#0H^YIX13C%aa9?akBM#b@VMXZo^GZOi|gg^w?Ndp$X$%|9l4{$HDX
z^97xIkKcU$N8uG~RQ>bJ#r7fMEDP_hbBIZEKjC|Q>WRv2vDc?G&y^^e9w8huT{iC0
z%x?ZmoIky`?hZJi+RQ0``ETdR^(S9G%Z&=!nRBy#e#x2wuUi=_LylaTxAyHr)@gGV
zZPXJky1@=wW}qd)$;z}a;lP9(5usL=g$*kLS_C;+og4!?e3V2vrW|@2TKe-(<!_&r
zYY)eMow}6EoAXIQO!W5cp*Qpb{_mT*UHsOzeEBHhzxr3|lm73E{gG(?u$I3+>}v6E
zUx6tbxZgB)hrVd$KXkh$hC!g(vwB1P;=L`ee|G;h>e<GbuHk;|mC&Q9H(xvwx+dNc
zT%tZ%&91)KY&qMfo$>icH7dGSK6$XAdqwZWt`M!g8Ee-TZ?5Uc-oqiJdBy485g7&s
z#&3?Z_?Wv`OEY}SV?u+I>^s+ae_eO=k!{@judBYUzxqE+dhOL#t3`XG)RujHclGL}
zqra|p#b5m$?oze!qv)$w`@6!|hv{F}=zbR;HT`P)bxzxuh4**<j{e>iz58o|@~ppE
zD)Zz1zdCSl@7>s6TVlWe+IVNT$&KE?t*>6+`kSTn`ufs&dsE{!i|n5L_1%-5wtn+o
zUq7O4AHMPQ*Q)QQe|=fB^W9%R&b@xCzE01&zuNQl)I0Nb`o6lK_4xIw@BOC#^=Ek>
zc|KjiL8R*I;jgd0u6(sWGJNexy;k9vz5llUT)Xve!T#{+*Oxp}`MQ4EI`6;!q2Jd1
zO?qeb`pU}cHKAF5%**eDuL~`Gx;Q<%{MF@1%jJTv?|xhJWA2{(?5S0I;@9lAt$n^W
z{+0LUtYuF=2W|Fn6<B-gy4B`=za#bsFLAxUC}g^*v46H`R(J5PEBCg>eg0b|7*r*8
z_`&vGN#_l{rnx)AC#_k@HP7GF+diXby=&jJC!ZpeyDtCC&Hc0erqJ`RD{GS`cTB&y
zVAA{R`&n8QyG?w)sxJzE5v}^cDK~7{rdKi77wxUuzx3MLsP$n-d>{05b)T}n7W7MJ
z_vEYnSAT!qA6d5d&i1&i;bEPuQ7VCV!nD_2nD<R@)x7sNO+Ai$kN^FAQC0o*MJ8bz
z?OMM6I$F2J{MV`IeW`Ejf8Fz7U|@XgIE$}&6>EvV{_j;Q=3SWgH15~?|NDRc-2MOm
z|G(M$|Nl+;|2F^E{=fg%UyR>cer$Ew*0|2=XK!71tz8@YX#cETcH3vyZ`snke`(jY
zsjoB3>qFN@tlJyCHhcZnbr)mR1@*71R;k6WyezWm=8DyK|CeaL4PFZxcKyG)_4Ot3
z|1nO-s$PH1J1BRxVAjgLaa!-M1(oczzpyBM|JBvk@2?MEUvu~5>)@&_({`4BzPdi_
z`pH_|T5av6<=NM)|Evss^0n&!)vK=#{<{9Uy?(v-_w2jtR$brSz4i6){nxyh_T{C%
zUh=p4<m#9u`>(G19rtYSx@-HxR!@8XN^fsfe%#OX*H>Sic(3zn(W$-91iIH<+Z{F8
zHvZ}UxW7fyuN)2k^~yi&-Madyb;?`c>F>Y#>Sy)pdB4}YzRdIIyt%5Xm|x%RrK^6|
zr_+VgqA%R=dwo5q>Xi2D>v~uByT13*y3nJ2@oC+zX`lYq3s0|GYx{4n?$f{j_7?4r
zSa-^2rujnKTgvm-hOaS=t$J1Ol9~5cWP71`+)nQ;-S@p$nohd5?2E^%uU&s5`gYwF
zZTea@%|Gnv@=RUN=j+?2JN|oh_5Nh_C*R&*<8Z&aCj4~qeeabvEgoMs<e%JozOZiZ
z(F^aczkd1t^rE`@e|=X=3nQ~GIWRCV{&1Yd&kWibuK#OkeB9rvwN>$P$EsGxf#CWu
z&{pr)yY^k*AGSX3ZS|LUJMCS&xA-URTetK5*3{S^DvPc~+P(kubXmLp?)7m;YgdQ-
zSmat5y+2I5bA52DzF^JOS5KpM{Ms6|_1E|88Gozyy}r6n{blv-^>Oc?c5nR}b>+J0
zYR|h|uS#EkEyz>en!WpL^4FyLu-Vy<)?Ysws>YFh_3Zb$t+$S<{#)n$>T2oaFKbo@
zZ9iWVU!FbxYu0YTyWiHmEt>S|_us78m4CChZiw4zzP4^%xYysitNCYVF5A~}VBgY>
zK@+;azSK>4=e<8_((m=*Kc-gNT$z98-g@1&@1AX)GQDnVtkO~G(l<wzb>98;{dN56
ztEY=AHy!?^C;QJSDlJ*O^tskT{#l#;mp^}1$~&R;Sj~k0udWvPt#P{>Tj{x}ynelR
z>8tN8Mpe6vt_8>J+j_V9b70jK->d(+ufD$i`f~h>b#{&-li#V|>wU=Y+P(YJ<jH0m
z%hr{P?fb7%HkWzzvsa>zp6vb-{x|NeG^bv?W?KE%BJV%bgzs$E{g(7<$$qulG50!7
z))sVpdg<%@OS`;ot8O;yRrS@@??1Rcsn$9hazW+#q7vnxZI;t7-<tQgD&OVxm$<iy
z@vHCoc&xp@vh~-#_^(&LzSOVVwLY$MtJd+0Uh`K5<z?DRFKJ+4V7%!#i@*65YkBR$
z%7A`B(4OVc__ejVzgPdw4-b#q`nmk#?+y9y7s~yQdjD13^L1#Y-Rgh;qt-_q@{T+I
z=lbi+TEX4BcmHx<GvEGhLjToJ?Nx7rzPtYay_2<mk)zAUKYOjdtueoQ{`9TySx>Xp
zcfDuoa9`)>^~iqt+N)2$_=Ydq|2{Ky-cOa?JJ;{5jd;Ja_TA*~llsC}W$rClAGI^|
z)Yq=HNuVTiZGUXo>%CPw1^r9^uAUq8Ys<Es<-hhGT^D;|<xahC>p8=<o=$s|7ag_U
zHq*V+e*ei`TR*?@u>bV`eVpW+iM`cqccZ_)dRf1}FFasfbj1FMWrE-1cKE-T82Vi$
zO1D;L+1WLQlit1hAG4n&#*{18D!gt(_-r{Ac8_U0>o(SH+8?t@F#K=!^yh!8{Bptt
zi(_M4ddt>clwZGY_Se;4-<P;e`#tT+mvH-VrS0)Gr|-SK{_5lQ@H^8*@4nmX`v2>y
zUAwnt=B=?PnZ0Sp-?IwsH|zG#zR<qfwX(M8-QT3%udkl2kK5g|v*zE1)0^B+zWP+P
z)2wH0z5b%BRp)%mU-Wc;UAe9>w(4QrrfJuLsy2$hIOqT3eNmveaQTa673a3zdMc&m
z^6C5C*!UM~)fd^n*s-W!{<Af88|$8cBKMHvECCkKRRg)@s-OkHb$jo-lx_WVHQs9f
z|Fv~{e|@hF6~A@8#%S-UHDz1>zup+PB>(#E)$iv0+O)RlZIoZ=`)ljtC#|%by6S)2
zueddq@7l}DZNKddc$aphyyBha>WdQl!{3)*{kK)MwCGBg%R0@tr2j|i*CvZ!y7lE=
z)gPCHr(aV`HpUBVoa=SDYWLUIUr+t5itg+VU+8`Q+uGodCjRSxHOl2j?9yKntX{vi
zy!`I3-MhcO+WPL=(*0MvMX#?e1Kq#+?&_1>&wtFn+O_rLFSSL**|S!?^Yb)HD&0N(
z>Vb8;kJnB5nQ}GyYu2uPPrk&ydiP{&_SSdTISb}i<%gG-o1QOy9pr!bzgqnNmYP{-
zlJZ?%?k!qVzV%e>$~?VS?_Qq^wL6vl$ZpxY-_t{1?6%FhGv!l!p~$D0dz(5VEqAhV
z{XPCZylRonc9+lp-|d;jkrS7??_!iy#Qd<o$u2ST)Us-uBz=D<-s@Vk|JjRme}(s_
zM2DBYpV^k$|5j$ztf;9m9bcT6m%e&3@AOIj)pcus30y4{kJNY<^Tj{@|24j+TTAw>
zbB+0~>wR(FR@2{yj{NvP=~HF>qIuJ=YwQ1CY;Ln}XWe=g1_s7ej<W<>v_QuV$ghp}
zy3zMH`s=&dU-O^t+JE)es#ojw{g3)|_1yno@soe;iVt7E`)|_z@H$SD()0(D!y>C!
z|J$!MU-tLbch|H;udl9KGi%n{h*uMees6t$oqPA!gmvCw*5NOvzTSFh!?N{XS61$N
zwl!n-+xUI&q}Bxg+Zz7s>aG1c4om-Lsb78d`nuMC)%dS9<#RuQ7V`*yid|dc{yL8%
zWoy;`(uzLieOKE)#_Zczx9?r_de>v_G4+c~Z~az3|GI3W@2is!Chc8XwQ5~<)cUZ$
z9-Qm`#(&OvrF1>^qwPtT`zt4{-@W5}#FDS;uW27m*?P+E{ngXEzE1tMdDo$P*H_h-
zt^yr9^*j8lOGUp^>YeMql!gC3(ylX(3|+nP+?y7gt9$&{)~%^q`!`Yc*SF`-c7LtE
z+WAiV+S09+deN%)!++n3T)(5~|9Sz<%J4<D;u_t%r(gZeGUs;w#W{at>$+F|jZ?ms
zq;`GTmoV$DuX%Sif4|pi{^ISeFWWYhi@thZ(EKmE`-_KK{J)md_fPM4%joexx#<6g
z-l{9}x-Yp3)~%1*nfhV*&%aeillN|Y2^tPQ;y6o?#f!D5=+D-Qj;V1QcfVV=V{7-{
z8rA#XUx#Kr^;;j~zyJEr`oH4)Bld2cGC!`f(l*NbsQ$vR{clqzKVLuT>gliR9@!R6
zdM2`|YN!5c>&cTv_Z00fy1st(HO`$WJ6UzYS9{m5p1USq{nGBI8}F>Vb8l_k)+BS)
zYu)<mL+}4xAN&2+rnPnQ;xYHvhYRbkFTb_@)zK>Ty1jRA>4$4Q{af`l;X;}A`uL__
z+h50i|Fz}bn(#HdCtnMF9lvJvwe`E2MDwp~jek}3w`xQG*3-)t-gznb`udBRo-ek(
z+Vw5;)vUjfF>`Ai<yODix$=MHBKfs<eAoKFx*YYBv+8?Mc-Y!s*B71^+duv4MDw4o
zB=^nFzIuO=Yk0Z-N!RP*b$u7j<9aXtpM3lOlJ%~izg~a6;=iu?(W;%dHrA`JTa;V9
zGdJdXSn>X+p;9W{yF{;ES6Q^ZuEjLIsOsoF|GmH0yLK60Uo}hgi_elzIn(xv?|%30
ztCP_rtzGdKwojJt{91B7ZoTBI&oTFUgC-yUr+z*@=X;{+Yx5aSJ!}0Wmj30>=XmyY
z{ne#AZT%)yef@P*@OAuZ@9b&XyVup0uJ?Vues$)*qE*)E{lC^G*$ROx#;uOC1Y5E|
zxo-OFt7m_G_uH8A=l??43;nM8_4l*>|G%|;`>pS_>uWXNUyJ|#ZR^+Tr@A%UJof(D
z{C?lFuhY-0+x>G_YT>fj<CCJ_7QKyI8>xDF)mE;kmoe+(cJDX6|LbVn+W1>vW9F^z
zdVO{M_5E?{!xnP?+S?Hxyx#R|OxIQUJJa^w`X==LZ@T0!h4NEs^S|bI{9Kj4Yxm+Q
zU#l)P{;l5n>goE(y0v$GzwO(-I&PQ#*F(p3e=XJjzw%&T`1jXU`~HWmUN=p9|26Kf
zFZ1`U`yJ_N_4Ud6UD5lm?T^?WHv7uSxV>?aH-7(3552!ORB6xDb>XF<zov-3zQm!H
z?~?VU{BO+eztT&8K6ib!>uaT+@%_Fh{%V&d{p(!)|6;1n*4NjMT(T3`xqGc()~~p?
z@y{k-UHoV6t)Er<uP?GqeR?|U-0$hv7U|Wyl>Rze*Y&jK%0zM9)mj$=SIK@9*I#d5
zzu49EyJW%q^`51#FU6#qMp>8NeI0JU$oT8(uLaRPJJ%)sf35mGZqwf^<Lg?VdG?fg
zmi+kjmnC7+@4s2`hLU#|RqeU{Yme*oMcWUVz83j1xoXCy|JUuk!_VH@GXXrbA<D_h
z<d|?ELIrfLLUW?R3=JhAR;QFe&O@8(YJR?*x=QQE`%q7#hgz>fURZBax4YXtYwm@g
zPY*BgcA9&kV2}6hHQZjyugy3A^F-0}XCP<R^>g-%ckFwv{X8*a;ae`Tk7p;n{cQBH
zkNxJgO}k|GUF@ss*VmprdzE>B`ib(Y^LHD|H-?+5m3^;Ur(8edSm}*fPHz_c%3FVH
zVZzk(pG=NndsnX2Jk)h6^HV_ULJMy7V3RC9(4nGQB3vy^3mpyw<e2cVwloGTh|m%f
z>Sam{R0vTjd$jrXufOJ@qFNTuwBkb7ty?h7V)Jz8Cud%M*lJ;&7k#8M>7sPhT;s$S
zjF08}jS{Y&J@ifG@Bfe^@r=K(-+yAc>RsQ8*OO-n1+87b{!WW;4eQ#aoZGmjT#f8H
zxbK{x+>r;XLqC1qp}9}yZ{+Ns%(gY(x4#$pROxoU{&i&PHT|dy)AKhbo$On&IB$21
zg#77`w^OF^y*zV1bn7bluFtpDh$nO&`Y>}TkE-CLik}gn14Jiu330KpG$k&G(GcTe
zb!kjka7L@pdh4C<-+#?p8Zv3iuBkgK_4R9<LO)p7HuvZ9z1x?#fA)Iy6W8jGS(caj
z=TAv}chT?QFROc>e(vXgzih?C%zv8e>kE!VK91M_^l6n3i{G7@F}F;<^6_61|G4O{
zoz*2(&*y&m#}u~nPbqx1cTVQ%p6K0MxKC6k3kt@rF|qvo;(>Ulz=GfD<_!G(QXlut
z&omO+qGqPe{!+?4vU0)JrC(;OvM%#s37=Kxdhh_FhDe8&5I8nAOwqC6W_3C^VL^x=
zXxe|Fp+=9EI>+2&d3kkn{fY$R`@WvbRD1fq@6V-0KV<vn%(=%`zhQFN%vk5s(&dTg
z<~;in(kF8?wD`sP-w9jaFMm9@&{AmilbW9mRZ?4TzjXK@Q(>w#eOAHtPgk`rKVLKF
zqT-|rwGp4CZU1Y!dbiJ*5x0P|f2ZSJ`Dc$EgqF46t3J3>dwy?b?2ee227x(x=C5WI
zY;qD@sI;)r@}3ZT2+ZZ$oNR|2Rzw&HakjEK1<aVDrpVb6kmgl$Z@bUL`F$1dI<L1&
z)_f4kC@bCmv*PXLw0HccEYB-FvJag0=&stjtN*&=``0@Bjgy~z#<09B_5Z2`O}`xW
zIks-k-TdhO+BEx#+g&%+?RjyVq5R?cR_&kK58vFF@||yonZ=&7t1bMm)psp9_&fE6
z*}P}}#a}or6^*;9DSbeM<BGGQMqCnic8Bl|3(x@zF5oe-M2!G15pGsz#|aYxR7ANr
zJrV_WxM&GF@4tWTb-nq=yQ0^=f7w@Vyz=hrVpEaH3y)Uq**o7|v}WbSEps{Qx7z<N
z`T4Vc^4lG1OZsaB%y!TH=YRSL-&~t}`ywvX-OfM2@h$qNcDsJ{jP)_Hk(tX|tQbVT
z3#^t1dA`^0P1)kT{GJ_0#U0l8{*tr1B-J?odBQWBZ|}DSxhifmxhK7mX-S1{%PglC
zzM?T#*=K-`OPHb}!pZ97l&BG*A;!bnv~Z$Ehn9+9r;DP)j8?v{=eF+;KVA~IxA5xE
zpGzyt%Xg}VUcJ7d{LP0di|y(A`-5!S^-OBd?{VF~Dy-u|ehuG0Th;6L>fPR}J<;Ai
zzpZ&uzRun=>+al&Ej`{6WqjbNeD?mB`&T1XuB(23m=-#F>BeIVgGAL2?6p!YuUh|$
zf7fTGH$PbZ73Az)+!H9eVNygqOPZ3{%FM1-y`b+F9M2mp7#J9P9A^o!bg>qFjQ#rk
z!~Fk$W^_zfHGlPYZOQknyJ8zl6wCHrF7<w~X1~j-<*UPw>K4^)^nUf<>dN+8uiEqf
zb*~Pyj&Ci~n0EK%t^Kcm|La@%^y~YpzgOGV%U@SmztgtpUAykb_}6!)-sMo)x#NGB
ze8j4itopB~#!Oq&yQAB3`swz0KhwUH+bLGdF45hmmLIeFnt8Uki)Yc@yD^i$>V~~?
z@U*(pygz>Rq^hGI?N_fi%?y~Au`*xnZN%H8A2Vy?zZUM^e|_oh{dS#i|Cep8TmS2O
zX;IY`-@Av5ey#O=EfT&sB+mX_--o|m=Sxp-om#asZqxd>mcJ2iWA^J?D2Pp89a_3-
z_xr7{MfR^gU9jCGe$mrk%8PO%{yfcpm&vLtwe)M%uAO<E@9Pi!>u#CX^HR@y`h{)Q
ztGjki?_BE|pE0#ucYR%p&Fhz(y1(`)zSdTsetq)g|6hyuJ=WW8@Z{k3M=sXP!Qofy
z+P~hd`g-eo{bI-Py4EY<6BNUz|B}A`>ier7>%TVbt$OG0n)Stv$uedC)praGjQx(Y
zgjztSU+N3Kjy?VA^RMf#{{CCC`r7`ex4w5zzYeOp4mxH&>($nkwb@&*zn(I0tN!Zm
z_g~F1+v>dOQMl~o?e(kW*S_wIdYj+(_t*98__gug@oU3Ze^=kNZd2Iauyxl~FI{(S
zqu*=s)qmrDg}sZ@znXnFE8bZ`?Du}>h>fvTyPy4v+8yivI`q}2%Pzn6d|Tgj{nh)c
z(^nl;UlY65|8<`)=U=JS7hm5^nxwb4>c#J!dP~%|p7P%M(X{+^`0LeQU+PwW&HB3T
z>eNeL?yaj^TeoTV?)4G&VV<u=tG@5r^1WR3`s#n{?^f-(w^jJ{{dMa(p1jQ3TlH>l
zY}`htdAs(^v`y9fuqQTeZ`jsf+e<^U){DNrzbH}s#a><0s$GAxzh>qM?S7Q3C#`>d
z&vf0{XS+|{J^gyhydCa&dq3K*^?a?p|6-ih#nt=%N9}%R^nR+`>g%9gs}t?kyx;fa
z>*){Sd%jj}oApZhdhF@W)2~_Xb)4R*XZdCBFKz$VLGh*ai$d>AmtCcuyuIDfw*LLK
zCHvzZYeh|BU|{TUoF&Y1inaFiu|M~}f4b5ClC#K9{r~#^@Bd%zzo!59_y7ODqyA=p
z37_`+#k^_$E-TH${-(y9y1y>o^#AMZt+Tf#?5$GzrT^@2we0Oh)-Ps#t-ro>Yg7FC
z@V^mni&jVdt=`e4ls)_6oVSuszUcm*RH^-5^zqVX9bc!$RxODSpMQPH7sac4pI%pw
z&v;W4UKBHJZQRb21;=-Ehim@}eYv(~dgsy7*O#k~{(7pvZjqjc{*!lqm)3Xwt@4gr
zJ^yOcuC3i)uS}1s*YWyWrSvFIedkW4*xDHXdHYV;zHwh8`X{zfvi{%S>S-Q3{zr8z
zSshj$sV#C{wD{MZ*;oJnoEGGLH|nY29_8$7+S5A2K81zEf4#osYr2(j_~Kt;PhN_y
zo>ac;=&yryol|0KkK75pyXZ_*)jR*%kh{{6Y3s7xWmjE!X}`F%>TCS#>o3;c+W!3a
z+hw;E<9x$nHTB}%-dzv;m%aJyGntI(+@~4;vfT5I>)H26S7c?}a+cTq@2~ED@#>3r
z(*Ell_uaKzqs+_J7BDa{UUQr!-13UGEdJM%E$Zw0e*XXAzW<u}_5Z6&*VpR*{#(8O
ze{A`$@6-0)+B|>%_1#hLxBmXR{<=P>@f@{p>eNHFrk_*3{!4v7JN}n>y=h;4-Hy=j
z-`3xmseS!*p!ny|*nfM!{@Pmgux^d$`u5T{<?}ip|G#zp*7wQZ*2is(UtPCVIO_ZM
zPj%}h_c*-&T6HSWTlDBvp>^J`uh$C}uUT?>?#HVAuf?;c%-b5bzHFm+`Rj0f+q<uJ
zf~L!5Z%^Bs^f&(L>S@<jUrZIvm^FJp=uC(#<*WZAmVEL0oAp;L=tR=h*kIl8towp@
z?yU{~bw%;j^?LQ$PV;wvU6~unS!J?&&;BhH`>KxA=}pR)eL8Q>cJnpz>*80>|F`aE
zUEHgG`j5jG%BzQ@>;KITuf1QsY`wXP{(04+UthL!9gdkXZ^8fBS0B~fz8n@+@;~b2
z->n~O7hesoUC1{j_}t$opFED=+8I~%r|wtP?k%c)XL7e3JO4}j-`=RTbz22r+$;~j
z8(XEkTjSc-TPCqb?7k{2n6dvy#)Adr+*&FZ=QeXlEy=&&`s%U%&b`(5zr?JV_v!2P
zg|jyO-nV6&{0W7Ly8j)mR<94&y1Gm2Z)x3n^)>NrMREIz-M9YRDtP+oy71RQ4xqEn
zG=#WVni>N(Own=SYIAlBSm2>2A`EKo&T!FE^gi<WasPa&sX<Fl@4Q}F`E2KvkIN=4
zyiojj|9tiL`>iWq+|pm~?|D61f8|v>)vs@5``_t@Z`s%XG}h+KLH>^L#q(c%UcF{P
z$ETOGAAb>WmfO0P@$sCk7F(_Mt$exQBs)jfcM<c{?sMz^pL<@-aPPL|xswcb)_u#l
zj~<U{s49MBW3MyWsnxDf_IlyYlq+)zzIV7DIMe)fkHf^BRwc|0p!vHL5w6y*#)JbA
zT9Q1hE{*{U7O04Fu{tFNYzT2GytC$Kb+NX(?4PvS>Z{M^+`VsE^yt+Wi{;AK?w{*=
zTt9zZzN+)0b4BJRzqud(+{T-6aMw%Ok5987RkgQR+eq>?FW!CqYnB!J#q7BE%(LHy
zE_i%Du-^M>Oj40k1@nd7l9!qya^CrKe^=U6{?X^WynNr25Y9UDtXZd<7jgg3zq<EM
zv4n0x63fA?_<M?1YjkV$4~4$6Whk7mC@je3deirrt4`Z9ujcW5z_q?yWOZ&s0l0^x
z%Ej5%n6zVwmJ(Mh(;|ThQ}jd?=ANIsyx#80vsd@*HrwqEdA+Ujy>^V2yW{Cyw`DBf
zi)^m9m18~~Sy}kHc+ED?sZ+|L%5vR%S+oED|7F*9yqtf%#MgMeNhi-e-+yH9*>^|X
zZBokg?=Jdq&NlM>$$5u6K0ioqe$V;f`sS?{EDm^Y&<kp5+U5GErp9yEb#a@MCk_5t
zarWL<=YFp}>7Po!C7%$B24~_^4~h1qqms%+1}u{QC-8w>?jg*{%5vCYqll3@7faJa
zM-3k>VXlrti-mVST+e@g%ll~Wk54||ee88#@@#g?R13a+-x+_Mf1db1)^0zih1~D2
zsxxa=$gkP;W@2AoP0SvtuD{HmHKq$2zIM%v53EnIJHBhpiGaUJzwWO1yf#z6?Qj3}
ztwlci9fc2N1j4=V9qo+e)7!W`?I_2DkN5unKNR|H`%DY#?o)ivtEW0o(I}p`Z{i)c
z7sp%*0*c%;99>dosw`8q?Dko8h5>xVt(KAy7t=vUg8&^>UN)!3fC(WULZHr(p+JC^
zILFNQOS9|k*YDGxYyD%-=X*O>U%QiTx_WA9mCWij&)<J}`*p_ee@3}?ZvUva=Kbb>
z?DW%{uC3>~-+bm5xV!pq+M3PRPVeK}&-q5;?#JVCHXZLgk60zGU)XM)dG(3hznao(
zt+)CNi(~l2cCJ<`+;Q{e$<5y)V&;Z7tH<9BHgV?h3Y@pXaMQ&5>CF1|TLKKjCou%-
z?Km88aIa>#PSc6AuCbR`!@=t_&X|DiGHKYb#7tC(wP~Tlh5!{2t`?`HKna)gQm<#;
z-}`ZE==0dGb+unZWAfWx76w|#IEVcB>-K(+dfJ~#|21suL*+v+6;1czpK*EpV^^`8
z7t8YM&K_`J(TJ7Xc|N{x$);r|#9vMIX}@(!UYh@d(T4|HS`OVhviEn<>Vx+TKg||6
z@})O!ZNxbfp}?<a-``p;wZipbR$)&0(oPM#DNk?N7AWlb*RtJfE0?|gPQlRoC9jPP
z&P`RB_q1igLW})H%(mbQ5_)t*I9XYaIBZy;BhJI=)EICeL_>|U!^N>+g^yGEx%D!?
z_HEpC?On{icT<n-I+`K8@aNp4Klaaed2)2|^y_z>U;mT4SQ_<TMsIt^-fd^=UtO#6
z-#Brpx#w;LAE!{c_nYI7yqR-8e#KMn_Qq!JrmOXC^LuKIzcSc!zy7}@khfYZTAwL8
z#b5EMO_lPFXz$axlMd~9``E(y0Ta*pO*=xhRd?r1fA*Z!iQQlD%>K_aI`>HWhpkYz
zxEf)d5cP1Dz^0=8k!)|fPt-bqs?2?kvqV@yBmDX5>qAdo+8?uTgJ-s=@zQtK^tb93
zul9WHQMc4<Yw!KqyW+n#Rj-bXU1a;~tMKK2e?|9)S*dwPmY3{bJ^l5i*wm}v)fb!W
z>w26$wd&8Sh~2vvp6(LZ`B!kg%X-&Sj^4G}cSB!m$?Wufb@|su-PPf1g3iBN=l%NX
zYw_3D>(|%qtlQi5dwpc=kzZf-tyTTD_SfmHdDj-bQ`g&@bp31g{q^3jWA9#HEqpiT
zx}I{@wb<2PUr*nfwd?C4t4D@1LEmp}e|4#9_t*8;G|s!d%#}ItH|cND>S<R_{+fMn
zit>w_-lavq?5kEE|8;7w)Zztgd&PnZjbki}qO|L}N+w^l68ZYF^Yb5{eS4>Wk5j+C
z>aNZIj0tW@k;j{+>^2elo20aV{>_A``o9F9UbJ7g<ZGo~bpMIf!8}i&#9d@6Pp|9w
zpM7`rpU74FuUVQdHhP!u6tQ~Z|NLl{=1oZ(Y8TqB`)T+kZhwr;5C8bT7uxN0ZbVO(
z{@4A#<zUA~))TUB3%4?uvR>KRpSkwG^DFPk7i0E^^=>`=(u?bE(ehgFd!No<=X)5i
zWOrDW&_VZ{V%hgkD%NKGdh~08dw6+KOzg?pD+~;*XB=mVv}&=|Ui!J}nfm>|A31#M
zcK&}c>ucA#wNZbo_Ew+X`MT>v#Koe?7q!;?jg2{d^=$0-;`&9q@87?-Jwhw=n)R1?
zJ6}Eh;`MWNXxH8>|Chh^e%(7sGIPKF{*-?w^*rb6&OGVoS)tY<VR!vQ``(<kizVB+
zE@rRHmf82tw|nFL_3B^WU)S%Lk#sNazvYadz2ZgL-+%4+q<r*lXx*l7>#u3%8PALV
z`fg|S+Hn2Q!&9r)?Tvp~w0dRr`luiCelPSdTN}6e>HpvQfA3a3RlUBpZmZM&t6Hk-
z%hu|z_S|(;ef8h?E>WdVn$1s*Cw_~rTKYv_>qxlayY=Vxt9;FupZDx*)~DGW5tHBf
zzrT3qev6Nf?8?*gE*R#l&N^~Di$7v#XM|nSnfudPcg1Dj6}}4UQ@!5xHT!G!*ZkOb
z*SGH7_cvy}jL(mnv?6cYX%p*?1#LRJzb$9IoX*1~e>P6Pz9VVRF85j11r2YT<CpoS
zU7P<j$s;LfiA-L?Jy&VYTJBR@!?dplitFtC`C?;W-S>>eI@9&%nm%6@@#5G2E`_qH
zDd`MXw=YorS$ZvgeNJ=9xrO5YK6og#$_U6Uf8x4ADXVhA{{0W8tliIY_0IkM>WWJJ
zUwI}nw?*+bhn>4Q|G!iGuCG@=oy?e8*8l1*SEgCpt5vI3EqXE8RONWj)ngx8_wB!+
zs2VbVW%WO&SAAbTvX(XUt~qZy<x~Glrr-KkziCX<u-50hY!cU~dFE1Z)2!$Qy^`zO
zIMa9L%G{V@^6K}^DGLkEIKG~K{cXWDKksu%LHxR(BJ5P}-<8w9y88P5xWCn3yH|gG
ze|_m)=_Masf>d?o_z%jp*T1ON-Bx>Mj`Y%rx0p+RZ!zD`(st_b!Rfza{JxuN-oO83
zXUErdzo#_+ek;GPxrwvm>DJ$cB5~)7pPl;j_^;^o*PzDsySLSkl+0G&>`>_Gbr)Z#
z`ZDOt{l1>*6IXY~|NedYb)CYxXwi8O*IA`BURM1u)8M!06ie|x!n_xEN5xKmeXdbp
z;f(&D({FwKvt8-$>qJZAz)z<aPMET9+d64IqYMA``(#_l?DsmI{{2JWVgIhLnWpCD
zPk#NX{};%1(pdl3|JnL~|8cTT^J0-c7C-;<tv@2d>a$-SRr>PRDsIu9l3JD@d!#lp
zF7T~8rI6slrvI|&Rrte)mY-9cxA%(At>@hQ>u*X#nB2S-{7e1z7ghZg-*H;eHUG-G
zs%KaJt-t-`mCxE3SJ!uo{^#r8e{<oHPi1X=Y?;cA;*FCWId^ZkEL-}i>)4SukuF|K
zj{oQsxUl~HTb`Kz<tgd^uSeGHI^@E<XVtpf2ia$Z|K8oMW3PXCSJT<qk9jMuU9R()
z7XQ~N?0GzoX5yLG@8jchdIL{hpKK}{zWO%L$Fymmdh7q|-#gb8_&UHmx%}3WY_5wu
zbHD3-ebKY&|E;bMZ%lIT9JXJf*fTr%al)h1uXj6YH>s~Hy>dM4{VJW5n!9nk_D@ew
z)4iJYVd-vbb%U7Il6|XpPwg;!*;FjqX;aAV`ZhD{+h=)ciC=GD{+$tKuO)T8cB9>q
z_bf>fdtdp@-PE~Q{(JnC)#j!aIqDC6i*(EHmzKqU4dyVlKil85{-yYB^_NYPx-F-F
zempz7_|r>yUG>c(-!AXVJ*<DF{)xgB_kSEa4U0MJzp{jcOzN&$rEIORhwobS51%e?
z{io+L?%td0<o4{q)CGYHn-0JHD8{*RT0(}Q`_)@7Cg)A_&#r&+*HQ4=uKNr6U;SS_
z^~Kh%u&melADma;zW(7s$7lETEBD7eSRMXssmeCfACo34e=n;2HDhmWy<&GYN5YoZ
z(@%aontp6ka=nZCWIc%{Cv#6Ve|e#kC*S@wfAng<{tsm}#p7lErr-BF-+AZx`r7Wb
zDRC?2PXD*z$ZgMgIXgcsvN)Zq{lUpCe8=?PQHq}(;^JT5UA4sDwfk%Nbl>oY`T;)O
zM+KJ)eeHOBXxgf)E;k#Gvh7)@5qk6TuP^S$tDAp)S^Zt`eEi8*f3Nw>)z3ZsZUF-W
z`xeJpqO4x5wf|ao{@Q1}{ngXL{eOb4pTD*JWXE}5lX)Ap?!S(`zCQK;!>#XILVsB%
zUzb~Fx9@MTW&Q7P>u~G8@jtcJ#dn3Rdb0I>Qnv8N+S+&dYrGA&oz+qO7IFLRlKN;_
z`=xur<M#W$4!Rs#`(^bl)+4_z>Yu*hm)PaoAh7Fe;s4E({z<7d_KJH=EdOnGP>$0s
zPq=G~9mhju|CsYpQ3=<o=g)kjs=nuvZRk}&?-py3t*>7#UHvY5LeSlXJwlOTYh{1i
z?@O7p;g?<bwDmFWN0N5kj(hhvwdDS~{c$sj?)|NPSz12%aQxHfmJ6m_TyXur`|Cy4
zPxrQnuj;u9I_)dVW#!DQZL5C2dvj>J{VTm`chmnb%r`w<`a1M=?4w=J)>WElIaU1I
z6WMt<MYk~H*Za*py$dc>7a3<>UsziH>*@7srR(*pWmWENjazGasZ&$1k6*}XvYB4t
z@iyc1GTG<LvTF8BP<|I>c}ibW@4eh()>;39SIjV-CFpc>mE)pVfsao!w$`nw+Z!{r
zWc~ZE>qQpT#(#;h=<Z9{aV};nBXhCozMUOy0jKNyyVgHvF%=KBz4oAd_irH;&F^1Q
zkMv9kNiR9eG4aUB2<H@iF@OJ>TNb}#9vrGRO})OQNm^(6)1~WH1uA5-2v3sJb8AaF
z_4LdWmz=7tf6Q%5%MM*nSz&(6`Sc$7iR@28(&9eb-;JMs%O?EQrK&yG1O9EdVwtr<
z=~MnYF-NtlA@SEWlC28`jvXlV=B$2IGmpDYqxGzc5cmJ96<1d=9&+CL)5d<o_J-3w
z*ApGJgb!FP|GiV>fRz=CQpEdWml;iW-}`p?^Ix&>X6ot8`tkPusic4z26Go~TKcKx
z^fQHB61Mz`@16Zc+jOMdReDU1ZS49KcE{+fp6PFvr8zB?^>-FVIK8~fK2PMs@6RW+
zWbSVG|1lz^RsX7jflk@;=d0bj{Nw%>&9^`8_IA<TOZuPPuB}?7x9H)DOeQrx)z^<g
zT)K>7e(i|2v<&)tZV|_-t-q_^7(|&$DV4C;2$*ijG+lFQqZ?P~8(U4K<7bi_9JWtY
zbm0-IGVzRA|MD`+QkHuaC$>s$Y7KjTaPsMQH^uh8@Cxvs)3&H%LHPvhE1QeTWGz3K
zosRnNP&-}k-m!_Scg^?zj?A-lR&%KMKmAln>imr%+O3gcS<jd!7#4nYo3-!I#wMrA
z*$)h_Uk^x2dt8_(w{MN^lMg|=563Q=9^Yhp=KY5C>_=LgkDZ9_Kf+;OJKyr$zK<5R
z_FqqBN7nfMy88a^y^C%EGgjGdZ@ZXRcjQ3QJl;!(qYvNwQmDVvKk>}MhGPY5r<_^!
zKcQ#6Lx;A(;e?JDzB|vaYZN%MnLd%*lfQ4Ta^(51|09*Qy}rKd!*kVMt$$9!d52S9
z_Vc{`qf|TRdFk6HJi=3Z@=xuZEI;Xl07qSatbaV8($4F$@Bi)-t(vp?d3;LV@3#?q
z#CTpmn|^=w-4jdAH$K`LRXi!qfNPqbs7B8Msjj5Tuky1NRNFtfI$M&(Om}ic{Pl0o
z7M=QjW5=S;hRHdJY)6t^7gX(t7M{kldh2qx4(=Y=)A^@G+#g-@e)?(aoeA~+ABzrt
zxqn4y(NR60njfKZ0SQ4q>*u5{InH!xLEw6e<U`x!T(c*?e>HXQwMDw(q2;ss?&Ph_
zHLtYqe0z|?+5X|VqID*n4FB)7YSx{J;z@DT_}BN#iuI9&%NqX2Nn!hpDtc?rFTM27
z@y1c>xzjsh`GS}V`iz_wOcj66R`hVg(}oukS(Soihd-9oCo;zDvuifqA@%I&`SlMA
z6IXunn3sQm=WJf-uiyP{oLZ94-@UQ!Q8{$9WcT8q-uG7h;+~y#!#*J=Wb4tmH=mz>
zj?u7tT{F*T-xHAo1#0;swNfGP4z{*^wMf{_v@*Tz>G`kUSKT`K_1?qF8w|`3DnGh;
z_ny8$_MQLVBwsXYKYjVPM(yeA#M*ba6}xP#qB7(Tl}G;la5LNRjb%cm!J#b&)x>=M
z`RD4E{8&G;?folfEyv3zzi)oM(#XJd*1V|gEJqI<TeX)%SZ?u#^_#ojKk!~rQ1fGd
zcu4#kw`E7aJY5{{P%iaup76cvDq0_=3E!}a=1gzmez#fv{Y#0}7RS|(U!AUQS08u!
zd;Ic^E~k!`PyV*w>CVZI`xc-0c;~nJ`D0JF&b0lzN^@P{uXs)76_Is^#J0RRYO}_C
zl}b6MeeZ%dm!9jmY<SS=`8dk+g|gPVsX|{0`sU6#6t?=R;QNc!`~JtxKhl*Sot(h)
z<mdIWz#092wiVy4(%v7Vc`IE-qMY&JpZRTZVH=Cml^(`~v+!TM$ohC@yw93G@$bu1
z^oySQYPejN;eHyk)r?8B=<IuE%a;#cpS0)d)fd#^otPr0DVtUG+x=e8JoR3)t=D7r
zTW-Gg``yvf@)yz9Hwq}-tGgJ#-amH29M!zFmPP_4o8p}QWpr&nCjX?*eq-c^DF^>D
zt>4SCx;6Asb+PdEwvLOFSFQha<;wdjcMX~6vs6~dZ_Ts(C3WR*=>9N^<ml^bgrZI*
z{m)u+bH$bY_ZBQ#rtsEiZT=d?-RI{woz-18vGjM_vHn#3wRQ1p-`5Ltg{=Pm{L&J&
zbNih56j^J+{&>$zl9H;vW@O2<TL1B#TQwhq9;|Sec^1MH@Jz65+Q(a01C8!ho!V3%
zH~IJcOtC$;8yFba${c5jwq~)`e!m+fd^P*7_ubQ%c79%ycYS}{`+rw=zuz~tda>xa
z$^Sav#@^bVeOLBEmwx#+fh()lyPWclf84qK)z#Xo*Z)j4U!C?g^7tadufJ4QUzqnd
z%0F&>-0rx)sc~z6{q2b_Tif@ec72lgPImpH$BmRfrhmB>GXGP`rN83$h4!AmsQt-5
zzWdke*`Zky`#>u^zMj6YYHQZ&Ey_!8hn;>}`i^r}(&7qp_2++Y3E#aYusU>o-OBx8
zZ&TIRm+kF}WYvw@_|bNC^0c3}|9&4TeciI_>EEc!f7i&b4(GTJI`I3b{`%1M<z2s5
z|CqJ))zzMZy-xY1e_3ijKmEV&t?;VryZ=UphsW)D8~=6coozeo*6dNdqn6*ZKKT0S
zZ(G(RUVmxV_Wf+quC4h~gVbF<c>eLp{nqt3O;k<c_EfnCJyW|69zC(wE8;Ad#P7GV
zQ;(GEuD-i^#`>MCIyYBjHoTsGztT?pb$UOSvGAuvt4&hdmIhDd%=@~!!z}yl^8264
zHDx9wY4h=HH~SnsZO-EW(ewW&pAOkDwK>CJ&y$riz8sQUHK(QaM?+kd=(QI=<JX6-
z{`hZomdK+0tEcNXa=ZSwt$q`^IjBpjsG6n4@>|{R_xsMM&B^dN`Ec?>Bjd;u#t&Xc
zaV`D6dwTCO`|eM*r@S^5i8gaiO*v4rQDpOse_ME_EZoYn<apGMseEA^;q|uXPpdCJ
z`0MCHt^1chWr@AyU83W(WJ}@u%GmImd?yw$zfnDKE$7s+2d|4#cXd3=Z8>o%wrELy
ziq4i7@An<FdoSg<xJ0v3b-Dis=Y+@l{vrNT|FlFr;mKELF?`Rxh4q4;%8!4Iy7zbF
z&N@(7<<Q<!*)sQTUf=(9aj)Z7uD`bBZ`AeILD>=OOOshejEod?$|qZ#=(yUj?wKD?
z&$gXwe3mOXuKl`i=GyF6CZ$c6o~Q*jn_BGV{cw2d>Z{Y=G$mep^DQ>z<Cp!4=C<!k
z6^*86wHB6MZI)MFD{*Tz&(vjGf2CgO`5k<tz;%V;UiL4BtP3CR+r9IsetGGZmZ@65
z4msQ8^xj`INg(iJE0fogWs28Ww(h)e`r3&lF1at({##dfrdB<UW4TiFx0Ijv4;Jd3
z*~?`x^Xt902Ss{>#mn~f>hRB*_cypnsY!jBP=QQj8K?3?r{4NjW%sy}`>NL$U5(l=
z)*tiBbjv-*DKc^Xo%MXl#)V;at8TFvsMs&gQBU5}$WZcb_eE9h-LC8Y^H%&>YL$F%
z-3BX_O6hC=4i|1*pLp*64P~Fx+hw<uILZ_p`_J8ZQ208Rdy@L*=dTZMKDf`&bl)kh
z_mfY@KYuSP{xi~k^-rUe|KW?{RpnzR?^w58{p9T%F`>2Zd|tPQtp90K`l+&R)6_<H
zy`mcZXNHFv3l4Phuk&B$rxKX#?6l%Yvgi5_GoO5TvHeF>A)832{ygF47qUn6zqtRg
z?ps#tc--{`&s4^R4ZpXsK1xjsv9JC7)84+|p^`+Fh3lskzwaHIBmGyL_ij~LWNh73
z`5T5gn@;|IyDdsWEng+P{ZjbTl<G*E^{Os`=bRFvt~_VF_+a*|G7e+e3m-Ob>b=Dq
z(|?WgZ)x0);?HN^btpC+*1fsu=Hk8KU8lmg^!zOQo3SQqkN&R2)Wvf$xULFo37>o@
zcVFHtj^l33xm9tGb&oxHzxUFrYtI+>x$pV^aK&s9^YY5KQ_q@l_vSwoW_?*+J8Qxn
z=Wi-KHAe(q<<{5indi@>vVY(2%L`?8XHULx?ftH;UEkA=r8<-vtyFw6%YMUsIS%2j
zkIzHmH~8}D%rDz!&B}VprLty?eeJratwp=_|HkXD`}ekdca_+2u`u0|sp5CdR$t#U
zPkzNl=hNYA+v1nr-#g{u9OqfT-S1a(f15tL@%z~`OKh%8zAF0Z$;V$EYs!=_ehtmO
z#M-2`Y}#AXW!E3{otp4iWm^2b{TqCu!fP-8<23%rmlNJnzVXRF-Y=6sy*{M!q+$A7
z;R|MaJu+8*dNKdjzwK4ktF%9yJ1>59UXIjP)96K-(Rakk3vDhR*wP?1l|ezz?Y}|I
z^?jlYA47!it^504_W!Td7ryV*o3_LGYxmcyDc2@)WJ%jC($$%AvirkRm93w@{kn88
z_pa{h%^zHE+&%m2AIFyN*fVjPLgQMdnp=Or$u6(>v*zi^j~Zqg`jZ@muYCwrD6##N
z)P8*3mjgxy(`JbZO+96N$<}6GQ*PDjyZ2S5$8FudeC@T@Q+@}<S9u?0v17aVMQ7P`
z@bprahzx5}W8#7tA!5R<O^t~PAufWPtxie8d*9VmzrXh@UugY&)8lIspLv-V#1@#&
z@814(+kE9|><jDW9*Xmts8zbV{CiFHWBb29XB7U*`LL^MSM?IfmGLVc8b1BDuRyu^
z<SK*Lr-MT^e%0B3v+B5aW!7%a6xnOa&7LdEHkSX`pI-R1w!vnJ=qZCm5e-2}PtNVL
zQrWujR>k)nObd2@dsyAxelBWX<SFH8trn3LnTwpZSX>kS>LL_!vTOArGokAN1)$|T
zOT>6soenx0X6UGLvoalWSP`Hm$l2nQBs}xDZhdU&uDy5jVy+tn_l7=J;aqs@i?8y{
zSL<JAO<Y)eXXP*c<0X2lc3E=#D-WCee#hFkH^cr}?>nV@LZUJ?dUw;d*$u~f{yi~w
zKbXf`p0@huB%x<h%Fkryt?^3|t~6U4#d+rUt@)bSuP2zuX&=lDSZZUn_fJ`8xS!{#
zW9PQ_EZQ~itlg^b+}HBUH$ODFw6){jiIdVxoHG6Q-MZqWn<EsfwM6~!1n}+US|UQN
zObfwVBDq-G6G6*-)VNp<fp+O>2@A}fZXW#Q*RwFMlv#65R;~?M7|5@@wWd_urL;u7
zz3bc7pS!lzZ1F#kcgOP8{eOSwv7OI9)bjc7?n)az9;O?6b!Ygid4K(2`iiH0RkeCu
z9uNN)ev`DP*K$rP-kMc$xc$qTt-8hwZ*1THPG?C@GS|Xu5{<D1`!sy&o=w%gb8%8j
z)bS~y7xlO*1TXEsZN@WstN2XI57QbJS}yIoHfgKcGKo;mvx3&Q5<v4zGqhA>I6FYI
zl_o0OtWHM*7KCJoa<=$sZTj!uR~o%~MeMu4V;fd#g{*!duO7WU^p%}1JO7vbLl?jF
z<X_j!p8U)Hi#<>An#Sz-MR~hB-Pk*SE>~=8?*Hdx_c`v1R>s+#e_t5fb^Bzl6Kt`5
z{kHBUTU4h-yg9?b^5js}1I^uXC85#ktCv|<dfeGIXSG}I+Xb&51aA`cURA2gu{v11
zSJ$S?WQY8Pi)!&lH(NLDp0GDwZJJs1Hh-?KVLx@2vRZHd*1TlnoX+K$zlGEuAGDjw
z4!S@}ON6VH<zT~x3p!C;oQIAUfcB_zwfHy++GZ{Dzr4!#UbN}5xW{3uR+f0Y$lO2q
zcxiTSp#8jX|H<=b*G0x|`~Pv(yLtJqdB3}-?N_-scN6a{-JeFs%NqY)Kf2@kd0V%n
zz_risrcQfj&~N)JxlrR)P|%+x7Mqf}cHcSB_fk1x&Z7&ab^EL`cb<BEd*_ZRoo9u<
z&yt_$_98yS!@VW^$?aPyZOmS!?tCiM`;6{vTAlK^T;|sFonH)(-Ye}*pJ}bsqFi}_
z`Ph|9ofhsM$19o}7^Ore_<;5~FHCZX2$16DbZTr^kf348#c^n%Lg&5j>ZR|W{hqPs
z^*+_@dv&$-9Q@jzpFV!r|D*2b^;715|1AF6&FHM))%8>BcC}8v6#eb<?~?VW)l!!I
zp1+D`LO)l-v)OGbSC$A=|9m&EXZ^2$hZk08OLe~VjXv|wNSW98)nXRU|DP^9PJfVn
z?9lYOs^m^9*5~;Xp1ygKRg&=~r2m;{JogRTzqOs`!|wh#I<fEZQ@cn%%aaBgC$lro
zupBh=meksx8c})HY3iy8{#!2X(T|Mt@jd3=0y_A1f{ut}YcpfQf`|e!QMN;k1uFtT
zr2)&rf)z6~L^)ZU5(SqAtDnD?@^MvmozF}))95{qgqAb^<v#85Lbm^N-1>9NtDo%O
zU0HY9KV+YOnCy<UR2h>GPwm5BwO&+Q`etuL!I1}s=Z!zktG=0>`S88+U;Cwx{GS&)
zSFdWn@;D*pkM^wp%gr^TILc3mD=8(OQn+qYf0^^I^Q~_ZziqQ`pL%^NjkQen!PWCM
zdPRo}E0*q?_qXc%KL2j+u6s|v-Cg!4e8taBM+UKr&N2GgybMkso;#)$UcKne?xyqF
zvj2LDy12{RZ<;GL*6i1O6*aNwR9!T~0|o}prH-@2SV6mEqs3o`Ub^m9^7s3H?@Q};
zzu&zwIrabEAf>8}zc$9Lf49=gYVKdq8vE<(uWr1vz4X=bZ1H>k_5WU}t3O|RwUle2
z|B<z2n^y09F|Ypb!rZV;>(p0&UH3cg{mZz$@k`&vEh$}h^;qXJ^>4-cn^iO))m(n?
zGl+d+h{E+>{x#vBYJWbe@;>D@%dRX??EGq{=&x&n?mn6lb~Mi~dQnx}t9R?K{(g5Y
zR?*M8$@!t&(fzzNW*kW=^9#SIuDdApsQ+*3{p|SFXKNpctzWu6T>I*)tNqu4oOs&2
z_ic&$%U$=5C(AVUZq)biS65$)m%hIKD&=p{{QqJ483|5B4jb0C9F(6ft0H%8hSsIZ
z#UhW4KKu=fcfN7;Q*hbdU*EJvrxe}t51-8SOX+>}Gc_Bhdk+P@S$TF}ocp)x<lm^t
zx8$d5efhoC^L2aunk8FbeXag#?mh9zckTVGOsgX|D;7!|ZVx!;SpLA~PyU}xOG6*O
z^}RK>@4Nc$_|AsLSgHH(SNm%14>PLy_1JKV=Bd`+3_B~az`aMWXqyCh3e1Si$hBF-
zl=JtO_$|F9Utd~Hm|y?9{%fV4f6V@{Mb*D|6wmA1WpJN=?}>neJFd@g*(3WP>GmOZ
zt@QUb=5-b3kL_z5|K{B4f6(?$TlBU1_lp8m--Fg#KlVO&WrDn<;`id$x(7YaIC3Ah
zw^WI$^ZoX#P?$UYFcXW^{#^6>8=q7>)+t?Joh!(dQe)V*XI}UhuB~xRfB*B|@;_?q
zzx;625-#N@N^=+A-d+)XqN7zH`_$gR7tMxh3v3kT2Km2g`C7F%=-awie{1iD-<jdn
zFD}H%X0awrtZ2QW)7?Aa3&Sovvwx-$wQLc0Pt*R3{r^Ne|6kE_)8Md~@<{vXf{9M2
zk9JwEUh)0(|A(D34%^6EePb<oanQ-F+iKG4w}Qt%eo;ND?X&#6`d;B33QEpBYnpG)
znX-b{@mSD3xmZTOY5)0y&bm!hes(NpSMGsZS!XKiqh*9|Z*i>mJ#tWz-DSf0ncJ$K
zv>x;L%Y4dC*6(`3MYq$RFaDn<8MN~F-$R@GW=r_#AMfb$UfP!H|98&Z4B_hg!Nu3q
zD~%p)jW6Z0uZ{gb`QEv}jX46bcg#%Pxt6xnB(9tEWkci28HdbPE?NBlAg4@N^wBtR
zD~7hpS=<Q<<q}6H+>V%O9q8&L!RHnlv;TJ!Ux0&96T^&(9Q_Z%Zrr{>pEe(n{9~ZC
zX#J<EuiK=0PyPBQ>FW9aLez|ZvF6v=D{8$~UhR4?<?$trQ&ZR`)$A$Rb^7xaujn1W
z?aOxB?*6c%<K<-bXSy*NLQ`Ho|9?Jy(yf0|53YZ7Pg!ol{%1e0|I=@hPY+J06%712
ztK!cI#j?Y?E&Jx}JhK0uf~mN9>5G2#v$~J|xycDey-GjlCFS|I@Gp1BO_zm_jHZX*
zzYz6_+3VAfCto-vU$vcdS&-5YuYc_>+m6!_+-tW>i`mDwo%4uHO>1V_HBZrV^6xeC
zbA<AwBouyMi@$wEclw*&`C)~6bMwP*gni*yQgr#^(Qkjf@-E!pT0Q6MU*$XV{=JhC
zs5z-}`16E*t1El6YjjV$S#4Uz&r-czKJ`|;<>f!yt-AMpjNg3sY|S52p{-9o)m?vB
z82WMTfgFyUHwSkyPd}vdr{nSCiiYo87uFq<TrVX&ziI7i#s7cIR2v?YH9iek9(9)e
zi1_cD(f7TI^;xQ=?<*Xtp8PXo$)UZHmD;cRo-IDUP3ix)Ie~F!H_Uu#D!R8dHB!&~
zbx4hIBA3~rUrF3P>%YF^;f$8v84|a8?*F)dPkJ-|OkDT>(7qUsf_w506MknMnd`fR
z$7YMgVZ|=h;|H0gZYI{<Dwr8xY5Y>W@%CxKujSKKJ+`hsp;R#C=7&4-Ph@R4cDVIL
z8ELHlC$_Wxp!(TgQ;ovymV9^EfAPeI>`wl#FPV%V{@4&F{o171M(VHm&o4opJ=L#y
zIlTQR9kG3_5USbT)AM)hjHfqVD$cq#^>U4#pspg*nw6>^{ySJ-bI$s8A>m?#m&75J
z-@lIv=c=Fb-ah$z-PBM2<CHEo**U#wUw`%Y?2gY*F0;7a{c!zJxn8dQ(noeV>7k)l
zYgO<0U-4hCa>2jWr5DWEHB;rTsTOlNPk->Ue(i?!@ynOj$fy?GkN@(;$CAZfW|_l}
z@98Wx9z_P7zfZpYE4Oj=jC%Du^Co0?sAfgJmuf%zS4nif`qli_HL?!PKZ_dPZ<qV_
z^vSosj+wJEzsXpN&6ECfV~$n6m`9aBc;Q^F{MgczKmTRiE`@Xc+w=Q++)TCepS%uF
z?SJ@bFH^<s#}};hAK5kMF&EA4U&MB7x)$rDmv{fN{{H<}dddIN>fI++DZQCB?eEhs
zKMfq+np)QQ?kP1gIpzI~_4+GeL#^m#PdqqH*jTr{*q_S(@QQz@xa2*hV7<(((>F*P
zGEAApvok;b$+vmGSVS}?M}K%M7q}sLY1>VcG;<;S<GCVYVe`bcPX7Ga`TN>?)olND
zgy!GucL?3wzxBu3orXW0%<B)zhld^N{m){)c;@s!X1@0$epUUy?Huj8+ss-$FO1ul
zBXjO*-QVR-9G&;9oYN(rzR^}vy}#6G)3iBDw#8ofR2$eDyEFMn@_wEl6}`(RI20cX
zEo+VcqWp-fyl?gABc7#J+F!!6gY-V^SX1KXbe#3TgKP;&z4=f7-)1rU^7Tf_LT(=W
z13Nn}ZWP^cONV#as|_388E?2Z;dENp{|`<tAK5K=y}o!w%G2Up?e+b$%$A=jcK*BL
z$@`jp_0w;5PLi~HU(@uEW8rO^>nqMR{hU+zZjy3krkk4%mw#sJ7bo6@He%8&%5K%i
z4_}=4@sFNQuIGv_&P=6WJcZMJwOE??gR<g3E-FuX!6%@T?;l^cxZCW-{q5co>1X<W
ztob=LZkc8E;_PeOrItL^Zd?CfP@VbwN@7RNQh7u6>Z}N#%ny#+Qg>XN#e4IZ&HXUD
ziN^Ost=hj_c>3I^^YNEbi+r6o@dq4V<tMG*xU25h)%#6J&%eDdTdR<`sPt0lpVJH`
z^^+gJICJ`ciN>a!eepFDcFAq)-Cx9UFxTpP=j7AVkL<Qp44;3?kpEn`;J>9hH;(V;
zF7I-_H(g@&+KP+=i)<hLvEX-n@bjFUiDoU+@7ID2a%UsEd-ea7n69e&`t{dW|LE1h
zCi|L}@TAV18~(n>yZ?{tGEdc2r~mJsuXn!q-*4eGqqjc{_U`t*_~U<1p}^g^y;fhl
zwttNJQk$>)eriw2o^wHIXZCGW3=>XKeSPBazg4SC4qRq4cr9$!?EGl@cKzk%aZUQ+
zWjpKksxR7_x|*qKd+Eu^$N%k^{WCiI>4Xsdck|oNX}B*Fo@*7B{eXdiOT%%NSnDd*
z`i1#5cWtJJhyOag>+9>Ur?>X3y}mzYf4KF>dF!uls`>x@`mJmJ`~P3nWtsZu%ij()
z&);$H_g&u|w<fmqMTEvPmE_2x`)lIP?e#w7!pD*EOvh0B|L?cOI<=QoKYVNx)~U>l
z?W;(&c)f~0wK?-)>fgltzTV>16J~ua{;FNNK0N!1W%$ImQEN-@UpgAL{r%VS+VA)6
z%Nm{TzYyrKUE6nIb*&^@&9=ox?|ai$Fa9YIE-<OT`C@N*jk%Tm`-N-7gkG#vH9uGs
zl<BlbS-e4&?de3G-B%kEe;wJyCH|;v{jrC)m#z8G?tFZc>5rGPX@$qXUb*qxzjmAT
zv^&kdY+L3r2TeP;_>EE%L;053?H7No*m_#@-`-nartJ(}eR|dNX;)vp>sT7&cxE5}
zmt#9TC3fTm`2KGZ*(tC0vbE@cK<KK@k53<;zBt`Td1cKkorEO?JkyUY2-SI1W1`)k
z{?PiK-!*aNCg~-@Z=Y>9y}$T>{J*^qYrpJ`+vs2Vdg|T#XSUXDeLel@OyB1gt)E}l
zZ@!gl_Vm*G@{<`+u6L`OLhCB^zt&xUdiU4$`n9#Xo?o-YUx$2Jm?mYxrL#-%roGPz
z_8h*y><$O-9_dMVki#4(FVv*>?c1|M>F;%ym}hMWx!*e{p_p;!SB43OHX<A6t!uBy
zXgDFY>_+PUr{6YD7Cu|__?ZJ&ux*NRzwG4q{eR}o-*^4hlc~Ii-ucIMeC>L<`ghbX
zG2ewVcIV`5O8s1|Eut~o&+mh|a>`fXH7C1HdH>p`xa-K_dba!SXJ6DDt<SpaE%i2g
z$NVtQT^rt}e$3qK$gnygwo&Ks!e@S|KH@p&4ZN?-|6^3lVSaT+-~TW-f!^N!4H+Vv
zE>2hZ`2G7UgXw$k-kqx%vgWhPX~!L|t9^b*eRVdC5<2x&B-Ui!u2p{5!i!>><u7S&
z7O_ZAI5r_*x?I&v!}56bSAW}WHhQ0_2v|^-5cc5v%;}dy5Atb0I{ojS_@0c!V$Esk
z92$0OzDl}e+igAUls5Zoue3k^>tYx0T!{>+w)H#2zbSm*e^7t2#>75J^*?iz4jTW+
za7;UVKX8e2jnOO*$yXY0ULKtF!*FKh+j-aDC^U<i_c*l3-&4zxW(zmnB!0KbHun3Y
z@>f6JPk$YIHS5>~WnNDH<5@*14>CWTP>|F5UcGw{>lDk(e+KJCl4S1A@aUKF`u6(u
z&Hi0?AHHQg*7MRr!SZl{U8B2ZKFg~1*Ztcy*(diZDBFCnSJDbu+~S++>v(_ZpZ%db
z>bLK!7JbY7&7kx$&$;7&eU;jdod5o-aDl`Aggr}t-wFHvbITexxAjd$LM^5d3YF!r
zEpPv1fB4%XvL|u%%YuFV^1d8D?baPJew@x1xxe~-`=+B&$`5*aQWd)TKHZ-t@n~b5
zv3ATdx5)noHT@mG{abuQ*m<t~c018!%c?nrB^`2<g=QR)-Tv>Kt687)tGw#FGAq@y
zMSmSUD7F8!)|R**_r2@IFWT$<WOnG$u=*~N)GU(JSHz|J^IcV9#Db%YorOR6+NWQ!
zdzQ-c{3D-~&>>0IwaoEtPj0^smww7>-S}{eI$vn=hK_}NY_&{fud42J+s8loUVcSL
zsPjYShnGPH8j6RFEZ-=`-rKL?&?9|)^6lGeZ%x>~XP(q*hZWBIrDqDsZ8_Y&(`a^h
z|K2MCou7Q(AMt10a&(FK{~wLa&;Ly_`o;d?&DZ{$z6<Viat0TFoVDZoOa2AUZ=;N7
zi~J8>EO*xW>JrhuO?l3VbH3TR?Yr^M_Q~ZvtABp~qI&f;t0V8T)j#WEtG-t4d!w;t
zfBcvI?QM>7-=Ez}dHP4lZvGbT1*TeUJ*LjHUV6X(q4;3A-nmCz=fjTqTyojBe|=ct
zfdj$L>_1xHe0|WKGym^G^L*LQ#h1Qb;e543K;CI;W&Np*b!)2sztiU|sJJ%y<f|`2
z_aEjR<bRy=@}Knf-ywBfpP#G~&Ioss{mCSEq|*A<g++H0_ogMf#pksDyP+)9U@)_3
z<F_|Ud1gpGvt{RD+4(g?ZT0f+o2+*3KKb<f@1@(9pR4-2L@@J?`NduTd@R}am+#*Z
z(=Y8fwe-|p&bWB#hJsEmPif6dUSj_BHAb1u%$ap(Rw=*J%h0>^_i(`cL|#|ZwmE4J
z>OVZU>^A*(p-f}`oYjw6qkEpP%}qc3^@!)MqE7esmj9i0eLEbNx8d&Y|FI96Ckb9o
zo8$6|^|0rw_<lYQ)uyAzR=*IbX!mGl5-w)pti1ewkNMfIKh`(v%V+GUEn+R1dGY>-
z1zj`y%`H1^MKTQ+PnL;qdwA4%*Plbr1E0FR?@(NEcX|4c&!<Y+>ol)(gv6#l`S<5a
z+#hk-Z#Rq{fBmt!AlARAVWs4S{l?y3!#sV;bPvqP5dUyt<Fr%!ndcu~EB<3FFiV=N
zZeNdo)I{y*=~s7NOcqKx`Re|f(ATWLcK%SkJN16|{MraD!Gxqo$shUC=bqhn`tPsT
zpXRJm^X>D$FQ9d>pzT_~0oVN3Rrmh0JnapVdvY`=`TK(dFM^yOY@5^4@%-#zkG-P*
zBp<TL-Ce)Z@b8mv|2h)X>%HgyZ+W<@Q^&sOueHaLg*>Tm!WA`_En9Y~_S4_pk2(Io
z=RCK$wBf|6R`2iW{o&##_d7qHsG`4zW54F%+n*Wxmrbwln-L_mw!S*s=S<Ugt3UZx
zzc|G69Iv-7DoOCT`Sbky^*?el-bqhoGOt{upOyQ6#;X0N)n8xSl^@(1x7Ou@`dyA5
z&I|hc`X@=f3YGut%ki#Q|N9P|B^x!@s#gE5)tEFXim9M|(fTQ0KbRLt?0)=b|37V&
zOLum3Tz!?__H*`~3)e2(x&EoXZrR!UKccIoi)K6xHoTM2eLZf9{|C>*Gv@5`&&uE6
z;-U29e3#qgnf7{hErPrT>(=#7UUGHi+}nHRUo2eV67YNX+jsMlO$Fj?*5|!@_v8Ie
z+cVYEIbxNby?gg|cdfg#=erMW>#ikNYJcyK>u<UC@6YDY`st-{5u1*u7#Gi}i@&<?
zW6APEmjxaazWQ*X(^hu(yM<p*{W-+#|1l=RFW|#;(MqMi<?j#8vrD>A{VewCt~nv)
zxh?y@@A$s|gIl!6#gwJuZ|lEBrFpx4NwfZAm>Jele>YQ5IrP^@_N{t}Yb4{}^>Roq
zvzzk8@A?z7FbVD_H`8CKRNLSCtrq1upT#KY()%O6y(+ubAI)a2Z`iYB%KYp48ZN#O
zIdj6kSFBsKNkBQO<>Bt#-$kzo3p=m1Q$5IQoqA&WqAlM8O&@d|-}!5gUGQJN=60jY
zE3VIe{9)hJ+9s>$oj>2$^;uMTMBRSSf1d5nk^VXL2Q8BytG<c2l4<<^c;zqQ8z&~d
zsntG_SMjwbM4I(e%CXAd*EIqf7EIVTW0e5E%KTZnaR<dir@zPv`t?0yW4Y?;@I~d1
z)FyU3R|$#!TAcsJbAwy<UCUqAS6A0{>3Qv}<?Gsd-Oi?4w*U5PmwyHhNmHf!p4dKq
zAr>IVy`<){`7inFtF~TQdoegln|1Q9Q+uX#%ItAJYq}`4IPNBs$eZmA3=CWqj<dvB
zPq7x=5<9Uz{B1nw0Ft_`xAu!4X#REdZTWrS>tC<``gynNYt){5TT8aH|6#u{|J2f5
z=F3=GA1GEPTSpiPNNXf0K8~^A2?#CWdwQ@;^WW5eitE|j#5d%nY^j)@(?99L_0{3l
zVblIcv+Le(X4Lw>`gD1xT&swD(@OmZQy+c4&;4VC&PBocxopOBw75(}S*65erA*ib
zZZ4jw5}#PV;D5Woo*jj}vlx#Qs%|KVH`!ppwNSNvvdSrgKJPA<h1=$@(V5A}C=@Vz
zCEu^$2ulU)vy9hG*kdkm=O^bb`(+gPUifokN%In|TeDqv*1fShSYw!+!uTqxSSY{m
zg8Y<6WpzKbZnNc`JpL=5e-4u_YmV50gg9-sdWp$r1sWJ;xNlHkP!R3?&>OIEI;S4*
zq+bs;#3LUuZ0Hb|F_^fSdExqmM=5P`y0@B>KAgT4aKg6ulHI>`p7ra!wAY7EzOHE%
z_O@t!yx8u4hn=`jUx`!C-FCa4Ls7ImbA7vDg8l#HlY?I0&R^!cCen$8D?Rk7r_)cp
zhp)0UU#>TKwIPRN#>1uJQ-g1PovD6A{A)-jgG0cBuiZ)2X<ggiJ_uw{$WJcXlyT2}
z!>ViEN3X48Phu|KeeHe?`;HZ=VITSngX<f0#lG+IEI7;gj<rT*YyF4mC*#gPT@?E2
z)z#}|+J9sAhg-~<cXT12(EC|ITz7xJT3Mq$-IlW}@O0JJt{>gI*3XTZb@kL?lk(IL
zlWlr@G=vy`@6cI4vumU9tBKN;Q|@_e)HYaon&oVxXAJk5;8umN2fa!nf7e{-sF~0^
znYHPypPGoxWY)%RhUpHmc_|Gnmfq(rQ#L;qIa+dCr+$C${i}}mPhJkax}Y~cJ2b@g
z-`;n7ReyiV`1C1IUVqXhryJ@VGV)yss-2<o2F-t_I8Xa}<iWe-`}^KF-}~2oDsSqc
zeUsPiO_+Uc$=AwjDt#tykJhouig2+sznp%@LoB@D$MpWb%Qv;wuibIHCNM&>P{#Uj
zM_yc$P}7G)Mt4j?EAF3X+1+Mgl%llcW@`H58ItY)w0=xpboZ5;!Gv>t8z*;sW8-9d
zR%?}Snk;J}88q$6l1qx_L9@~yZ_{mP*c*56?xDE0O+K@p?cV44SK#=}xa8ym4Zm#*
z4yj04{(d8T>gdXS!4>TnzwTF$da&a7GX2Az*-Go5PYBx*sH3;?XUgxdN6H250@9aV
z3Mk*R?5>b)o8F6)4v$R=O%Cqz{>?g#Pxzy!2=8~v&zcKT7w-I~mc-4TxFX(7)hSWu
zdwJLIx3cd&t1B$7H?)X*Kg>-y;E<!h#cF-`?71W^E}7r<4`%FGdy?^DM#sK=eNQK!
z{$;|Rr#Ndu$TyBorcGhe<&rG?Vdqq9UOhe$8#-m3cG*78lPWrTRx`^Vr%TFngam|b
zSJ?I{(x7RUpM1?jXQ6=kvb;;CI%_x9)-CtlxnRMB(rC#;$6h}Ck=)K0Ftz2A^wIE-
z^M2k?(m8TZMrM)z@zdHXl~htUZ&JuOp5!g{ah~{o_gCw8H7Wg%UDDqERpI>S%Uz4U
zGAaBC&=IYu+?^{V`PnOcRx`tNmW_^`?BeSJ<=3yaQesQ_^7^+(1)JRU{rYdz^CC?6
z&Lrz6CM}X?ZdA2cAFzm5OQ58$!|?pe`}wc7a?gAjz3{~D7llX8KV?yJG85S<ce6h4
zgSYLRr0BN0D!f+Pb+nWOwwLex$nyNrb^W<t+fw_@=G|3gH~gQPq;UJf%A8ron{L?M
zSzn&5cVT|$>cZN)<?mMS5BvG$qaBOg*ZCzA|F*EppMRd<enZBXFKOqrL#*x`HK$^a
zSA3Z9>3P!UV8_V^tln{X?Gc>%RgOpZ{tL^5hsUL_OnJ2a+nVoJ*C$M$)ATcQ?atDB
zr}suXeR!m=`f%a5qRSC^-&1Q+HeFfxyYtB3i}__+Z(R+%lX^cuT{Pl;lI7AVvhOD@
z?hI2q{4(jk{_=0ZZtrSmy|hoV+45-fvoAklzUS{=f6G*(Ui|zlkyUnF8L68;g--00
zm{66=d{O&-^dC`y>|?qmyJl~DqP5`otO-&3uf<>8SeSnG!RplJy}!2>Tx?6STi4lU
z`~Orm+vzp#;`d9m*x&DqQoVjdr9ExA<CHncpQfwn-mBZ+o_thKZvMiqeO`N7?p|7Q
zP`vAYe9{Y>|GnZt)u(^t$;9L@TO7a9ne*;WtE!tjD-tHl9aOw~?U=vm|9uDR%FL`-
zT(3WFei9P$ee&TSlUPLeZ#(L=Tqn}E@sG>r<Jm8A)LIh0*-WmlsuAwa601!!`~UXA
z&PVqn5{|2;F&tm#Tl#I4&l$(O%Bw36F)Ha#e}7Ua{o*u_;KyCM%W~hg9+{GHWXCtF
zpBjeRzg6G;F;f>k5^&Dn|L6Mo#br|0SXktX_SVVz$xnX0>!cy~u|~Cd4mU&FDxAs%
z1^>*uz5jli%If%skC#g<3vdW}6@8`dh^YM6rfhCz-7azIzxVo1B&<Gr=8=5Mk3(P1
z{%zvba6fc~f77vTwx1sstUYZo)!|{(W!)1QwbReJM!7Nne<S=s$iCQm3rqFo*ZyX2
zx1PGJ@nh>Zn`y0fv-bH;xZ3Tui}6)Za|D0oX}71Js;<j3UbU;XeA~RAA#JWX@0VSF
z)GwF%?*6m<=*t%d5nEljs^)Ur8FHPsYG3Zo^Xkdx=X*qWt)z3_JUx|it@MZM+K?3!
z>(>4{wJtQjWbw(}Jzjl6KF1&J;ZDji&PvvD=PJAu{bu4Sztz<rR%V?{4%ygjVm068
zO4Q%)j(%kefBcQU_3^UJqnq(z@vlBg?+eqvC6nTPcwxbk<cg~z`&IT?9C80Hvs%h<
z#oc%-M_=8gKkFXdR1p;2zx%aQ!NPa%z8{i*d^xLc`-xKmI}O$xi!P`;7UR7>aLR(@
zq~zSbEyA*XMl<KSekjS%x@zxebM&mEjK%YNvK-x=CPGCM=J6Y)$)0QJzN#<s(MJ45
z#HQ1|)@Jjzo<8>EPkiaT?%?<*W<F_CBZ@5=jyL|zbvth#pL+Jp8l(IB4ob#dJ6yjm
z`aO#(`>v|Bj)FVSigm0!`A)ey>;Ig-q72vP@x?1ykG01?b(HzJb&lg{`vp!uTdvI%
zQA#c`ozNuU_xh>ZY0n?Eld7Lg{TIa1?;^CiF0cQU(o4O=o4KYw?9$)&$wiUH=D|GA
zw!?{ctf#-?+OcC^Qbms7;ho<fO1ZSU$cTKjiJhrCY5IS6j?VMj9>wk6v#<M8|5xXZ
znj4diU-rybUTxwpw=wyu@Za(@LFr}X(hqtfkImfrq1^xWlZgszJ)cfGE7&v9ev-{Z
zmb>#E+H3WHKigsTdBHJ3of$qakNUFhZ*_SdST@;U(_y*nPxov41R`@Ko?QxC9V1du
z8oVSaFf5Ye-Ta5%+trV3`>|NSvQ=i+Z;rBSi+Yb9z4Vv6GQ%xDeD&X$TEU+Eznb2g
zX}!_h6C3qqnt%WQ!@uvBuT9pTw(!WM2US&y8SLV3!@l>&g}>wUsi~2EBxD$?{6Bxu
z@&9Rj*=tjuoPQ|f_${;XoPFWW)Ac<fMoZ7h{_UK@lRtmfETg&>HXaEr_31}k{0s~P
z#L~@*()DL6WE4ygc(7o>r*of!Uh3`TU4LzV+~kX?aVuNybsTWly_+^6An-xK4~_qm
z9;~Prm!EuJ`mm8yjr4Oj1_qw>j<dvDU$NFN%>A`BYU8b)zy3_WzPjUQ_4}t`>!$v@
zruFsp*W#_|ub%pc*Dcu{w>oCk>uXEDu3x&{YwMS-pa1Qw+iknmC{fp|_1^ps4b9Sh
zk6R{OF8UMNIg`(%`SbVJpDiB7om9#e49J`q_Rz4`>1L*>I7^EzJA3`RAKuP!8x&qo
zj94A@x3nuJyzBnavaP>1mTgrR*!t`GrK|T}$F9v~-7sUnoQj1pOTSN}>y<P2J!Z`<
zf9%C=?sD2b;nm*L(@eYVICK{K{MnIY-DfJjGg<$MN!F*I$5z~kX=sj?vpONBysPPE
zr@=I@7M2vVm-dd$=eNGpmaxBQV*9i1!)sOz!w(XxQ{Ft`=QQtm%6?Qj@SXoLyRD}p
zs?I$;xai)8G8J!$)N}v4%iAZYyW16}u%6m>ZRUYRf4BU*KDXq=*Z)WU>V|&XUaH$u
zynlu2$^C0%t2X{RdS`ps+LW*F^}lGIU*P0A_x#nfi6M$io(7UGb$0s{GF`6(i-(H`
z<#)f+)0<Gzlgb<B>uYgav__jNDzLDu=;VBH@uR1&#JWu}4DVc_)+tlIe3$bTf%_F!
z>!#iQ`}gGOpoOM)i>|H|{+hRR&HVYF>g&zJpGj=}Gx`6TgpSm?_n)RKP4-FuwNLF=
z^cKC<*Iru8`~C9iubaQGJ#A%kG;-hM`ZK$3<%D{pnR8;FHoaww4q3@?g72=0w|;AU
z3LB%}WHI)K_i{SEIfb_!PX4g+mFJnAa#72d-_O%_I-HSP5`X&i)kFTae|#2CzrWTy
zJX?I#^8B-pwG)o~vv99Iq9ewkDYQSJpmI^abK1AVbL9Ry?7YEKt$zLLMxJkr7R>x^
zzM#)5?8?5qvRRLw2XmbJ$QNfBaPQd<q3*NK-b^uHT6H1xX1%qxcV)f*;g63t=`b!f
zwVR^(Dy_GuPR#V=-CG~({ok@#W<(`?Z{BAlvHnr@l}u$9>5yH`TaAB3IGOB|Dw*e-
z?X<}5{QS$pdVHH6T;-HFaP77Khovf0PCer;jOF?m^R4C8y6W{|hhnSrgWiWVw|Tm`
zcyeyy`k;Q#^7PYB<r$WDh4){|IJ}Bae4p})9f$5uzWeUqLBlgH_I7(S#rdnAe*MZd
zqufJ0{^`H|?x(BY?=M)kHITP3Wr6eDj4cWK9olcHPXF~G`p5={cb_G4DkFdWc)nSG
zL&JP4zZikSAE{5=W_ZqNHcjS@|JgTr^4Yu(zM^ag`#%PFefYaokN4b8Tlsa@?!CTy
zeQn*=mtm{J*>@e>BHr5N8x}uNN<^BaMZh9Ph+lW6sOPue$`1Rt6+U|PYRAKV-emzI
z^IhFz*;N7)Lbr3w*dMF(=;NgChrS;E6*hgY)Q1PxB!Z5(E%`R-=l;*%cFCAuo;Y7h
zUs{ay)Pn`ua~Hh5V)nsQWNFne)s}?+s`0DYIFIBl`5CodL*V4zTPyBf5~^CB^+wa~
z+HS7t<(|A3u0J;CN}GRBQ8iIQ^v1>5%~urJid6hXt}p-J<b8bq1xa2Gp80-x75+13
zu5dW$QOjO6QKY%aQTB<r_#ML|i&>2AqC2NddeHiMR%_p)60-+W7U#^jTY8eyachQl
z_Me)$H;lCSG#+2&_WmEgsW@TPOIP97|G(6puF!3BpR{Uqgfx@N>wfnBgA#N0$1#iC
zKC&>eV8L?HJAqxiN0UC(ZZF}}ak~2}VU>oV+o#g-Wp#zjMe@GsViO8JcZYwIQ`Jrp
z<X_P8@;OUCTWzu1-|X)BCx5SAaYrh5!G=Hoec2>xA4_owA7S0}+UaY;l4-wk=lyS5
z)E<0ZyRAU;zxkI0{R`_SPI$NX{DZ$z@!i7J?|vS3o7csjYczAQ_OtkRxB2F%XYWZ}
zp8xw(cWPWrweZFp{m(Y$OXl@`swsOXdU3Jh_O`Xh<2M}6{QRG*gH!!tZ}IJ4f0IAj
z`5#r^a3*Z_@w0KQ8IxZpYkjqox?%aUMSGJgxBoY(XPX2Azhy|?|9@y(&XtOnIw_m)
z@PEna{+}+i|Cs*kt53eZX#ce@K7RcP!~1)@rUtLiaQ@3{q4=w0v3SW@j+s*<+t{QZ
zv_5^myD(6#WBd0XB@3U;XWLis-*l2}&Yk@$PMr4*I8v~4*)zo_KmYRv1Wi_`j7T-^
zS^D2cV9H_^v4iu!rEXt3;obV5J(oC24Sv-YycLf9oVa;y;elfcfydLOvY$TXy)~=j
z`lm@|x56gAtLK=XmdST6?B9*_Ew5w*l0~|!JGa?oONTGf?D<#qZ;9KR|DVJz28kAm
z-_riKvu@uL!T0n1IVxFd=Oi<0XK5Um@V~uF-pQ*e$WSNiy1|=J`RLiV*%$ky-<yAJ
zThPae+%K8ijvrwU?%)(Lu;PCA`igeNHQ~E&z6U*MIsZN7SCjK?)1BJ#(<?K}Tay>2
zEltUOn!nIfbg2pd*?O-PF*9wfCJ8ityR3DU#a`y*86~4j`hJ2>zOUL{erfXUuj)ER
zb^jK0Ii*Q#|K`7ER>5D<7Gw3oh*I8P%~ekP&x+<K7_H=#t^aT^y!lSH-}w*auHUNd
zo6L^Ct>|7^{bPRD_xo4ZpVv<F`>X$-mCekvhwI=ht)}Z)FU&%u54b2ioD$P~`n<0b
zdwaHjXq9@#vVfaj^6g0vN)L-&|DUF3RPitA)9?8#PEz+-gTFa?2@4*yncrpU*;KSl
zwdF@do$V9-?F)8nlQG)gW3c-2=7Z6k6&@8Pf2*393#Tn!vt{xhhqlE+ZFL))nbtjY
z-hIC}x9rNRfAM}V6>d)YlBWCO;NQAC>3=^Z7Al`#_i@qBuRl81M^t6YD<<#WS!e9~
zKiSs#xWBZm!N0EZ0)bf*<J?!ZywcyhFh2IX)AqucD^EX_7v9?4-Tg8Dj@y@s`{Ubu
zXLlL5b66(yNhI%DVo?3<+q!F$rYueRRd%?ASLwl5hSjgu)a5-+vR_nRzSH1H?^;2z
zSE52L7uMf@uwKvg*u7Huo$Gi0|Ne53T+Mv>2!*D^hlib4sHVSLuQjjqy+R^S>dg)L
z-(*xS{F-&EjC0$k`;%X<J1qEL>SNdMmNge2`f{hL^z2o9RuU$$Oe**o|Ea~-J(RD{
zJpRxB-n&CjeD=+?@cDP|cHXat`Tws!Ha%1s`C4etq%xNU65SJw7CbJJ+<WJC?b7w;
zyYod>dC70&T-Sa7+^XM&e|;Q}Y&Ji&f3jhn+GkzsZFX9=nQC%RBUhP~x+<FF?hIC3
z`>pK8r)e@z!@j@1#p5?C-PB2Z`LXCHrvDuls`IGo|JLf3ZsiGy->UoX-S2m+C+27U
zzN(V`>Gyh5-IexRw={ihJDzrC_UmUHJ5x4n`k{C2jj+6|QM3NUuo>#Rn|ji`QciKa
z{?=TUzdSCu*<UBqj)Pk%{JV2R>QDFSCW~|bU&*=kLhkdd`3F?i?b;YI=i^$P&^fox
zG}+9z{ke7VOQtzy9*=u74eRW-T~7Bm%-?cUu%qP6eD?MG3fy!){FoZ$rSRE?Wp{zd
z&c|t`f7stA*Ew{~DK%vly=vXvJ1xKDc>DQ<$C4L%Cvs}dIq3bP>sr}o*88dR&p-Vu
zv!`}P%=x)@=i~)@+&j}HpkwxOI`8SPo4Nm9n>|1OTDOo<yF}>uskOUzum0P2&bsQ7
z(dmwtyX8Mj{Nv9bzs39=*CDkzO=rX6zo~Lwespfz#kETL-MiiM#HQ`FdK>4xsBZd?
zh5i-^79X^#GW9nuWqJDE`)8&v%gU#x&1xR6T;#X)puAnxc9B@)&=(h%?2P&I&;R`W
zD_VZJ3%gGH)@ZU?Y>}CFI%J_-^wQc{{%H>nrJs7tbE3C*e!Qb%i9y?w>aH8N_N<oo
zTl+*&&pcl-;d`Nxfp}p`^&Z(3-!E~@PP3A7Tw6Hb?U4AJ{abIz9`A7bRa?1uZ^Fjg
z*Z!Vyo4-&Z=Rxbfuzv^jc-=$XUfmBgh-c({Y`g!Urs$gKD_7k9b2TphZHV&Hj26+$
z-Y*vO`MkZd&ob=Yy?-;@OYgO<oA*yg_4J*vyQ^+oIp`>AmHW8v#oC|M0wLcz^It5#
z?%S<<_q+GgnM#*e3yWUPdz)2r{oeM?c`;8ut|~exXj!<QS1e3(-Adc(-)}YbOP}Mo
zC9C}5k3)A`R6f(1a=DKpoex(3JuUch%08Q0uU`rF|G0Gh7&rH^N=`Pn=xa$l4x3L-
z^0|5b))8rw`tv9DX&lwem|o$sT(Kf`+Uqrfq5tmPj((|@*8S_oobtlB_Z<cDPwQU&
zzwqaB)m;7S`~04$uUlJbTeotyz-|5L-#k%EwjAES^-FTbzWppVVYmC6lIDM#-u#_w
zS@`Bh``3iee-xplWAa_ks76Xq_}~6HNlQIu|Jbl4{r=(jtlB)Y&TW4u2uh2_Z0(8A
zQ#!|H{BRBXE%k|>adHNer_G!9T6n+n`&;{8uY0YxHg0M7{<y#KSEeOdZ~Fg#&egpi
z#V7BuK6|xLMdFdnD-*#x-uw|Uk8YVnFsg`5(GcU}ZDtHu5b;HXo0I9F<AxX=32xRy
z2MboXEEAVo9`pYG`?FqmCR_AeO$l1{xl>eq!jHmrfvZaQKhwA|f7Z{x7s|ffzHTct
zfAReDCwFfZ?zf#?%2yQq`r$_fDTWVrE7mb5P8GYrXY*|TkNQ>r%ewb8D6YFC-+q_L
zp7$o>-TV8V9;ka>X8HPvk3!<w-3w3e&OVgw6%}ow#Qm+++e20Km7`PC!ReQD-ZhrU
z9SS_99+GO2=jr&prDk^C`4VgE#S>T5v0Q3uVyk`d%leh>p{He4A0k52w{;$$zR@#u
z>iacGCp+0PbIyCNwF+OoU3W`;#50X4)wdZQFe->lQ4<qlbvn3UQ-Y3)R%=sZqC<p%
zI9E%HvcZN`=j(G?PEIf7@>->7${W6++CI|mUGlcQpYwzdJ(*X`^EEBX;NZ6MKYNc}
zyBfNC(|S9Bz}tWBXq&udDPD2dKXS^}(--zd?EAmsgNeTWo3bRwcsBE+Tgq)0{@nWJ
z%&WhCH$O0D@4k4l<Lt}7?-%{rb9eL7NaKA+IGqaiAJB~w`gY2Nhok&eg`f#rJL}xU
zeRHc{O0iE$xVn0U^Q;dF&tFHsu+r*SQM_(OlHbu<x0C0OeW>N1!_@z|EBNlEP(cr$
z;CbvF>pCBbY}@cHeo^r8jMMwHZ=L$K|4Ze82aIAOJvt(wW1t)k1n7uxYcd^l*bv|&
z!Og~Wuwh4sk_ab@%g;%_@7FS=Z7*6VI=N`ptktI<)NOAm+Wz<~zyIm)JNK(ytB?69
z{whmzJ>Q`>{+@5N)%>bzbaE_yL_T1hmAz{E%ME;+u9&`iAf6?syS_H?Im6VnJU!z}
zU#!w*-qX3!%~3DCwKTEt+ROH0yYF6Mm;dl}@r3iLEq1l^(BRn7`c>Nc$?7!IBu2x;
z>9uCOYh7oo?s!-KPgi86ry8f(>`mP3?=;>IICVaF%4Iccp?CTAg`aEP8~gV>e7Ht~
z<EPf<@U4t9`KoSc{JgyXsFYl5RPg?FrF@H<Hm{xC&CGrD`3eirmINIYZa$F5FX(7+
zw{|opDnwim=W1zF_A9#nt2#7$XQ^gMRI;SjSJC3ee;cn^)#slTo_FMH=e2m|d26pN
z6)QWt>b?2b^ZoPgtN6NmEZbrlzj(#1*V6WqPkLvIC9l{cH7~z(_l7DJxya8A^`CTm
zlx7twFW$v<s6phKtnuspAx%e27&E68?FfpFKW+PoqxGl>W6suP9GefS{w)tZ_cQ9E
zCP&z=zgIS$k$P@$zCKy}*`f#LcVC|B`hWf`%PYfxlEs?Sty5#$!fUMT+RwL}FFh`D
z^+LS-POa?c8^T@~BpSqRHS@XGSLS7sViA4g`k|PYSx;Y0{`;M|fl&bz`<$&!i4F&5
zOb`=cZDveb5TT)_-O|)3FyV+^*4Mq?zdzTnELs{HtNLiw)YZ*V^2gum|NVbpPQC4i
z{Z0Gbl%K}ds(oyj7CURJ>DIG$+NVF=e|MoV;cNT+>q%eCW^$?>tn0aUK>z4ZQLleu
z=iGmBZ(J1coHyrM%bwH)mcGIN)kQ=cyW7wB#+{2?_~*R;;o0x@vab$5Q_1^xlG42n
z#`E7lr)DqxeSO6PR`H;qLoz>j|4Hb~c+arc>dhRz`MzRHjoDso&*3%AcF^kCoppS}
z?FZNOH!)@fa@nS+7+*a9TxiX_H)WR|-~D%^Zwjl%2Zat{lWvdM^4}jD6oBI0OGKEJ
znK9X6j?Ppr@BwI3R0JhioD$2|yx#S7ZGK;|g=gs6(_O(*o79~@-PitNSN+kBd$~<~
z=Xd+-E}3tauK1Hb``Oa(*FQfze|$l^2}j22hhqP~Ge<98dCH$Bd)pSBJ<TVh<GCW_
z-kE9JDJKN_>9$vSiSM3StYJI#BZInS+;gTc5ARo={X5P0`=m4f8fKhz*F53CySn@O
z&AT0+9!&igY%zT*^Cw%;*I#XPkLDVCwJy<Q+aR{Bsx7PGmF>y!^V47Sou92(?7MCD
zvQGxZukU|L^=GqZoN2GcT6;sV<&}NW^gr>=dpDn9XH*fHpd%vK+RB)CV1<taH!sr>
zgB1Z<B2uk~loUFJ>+V^;{`~#zUdz|zS}apVLyx2`cb-(Ff9Id_SE*O}m-f8c@=wF>
z*K(uAzk79jKQB_rFMM|4b7J<LeWz+a2u=AGD|&hRVvXQ0ayc^Dm;Qt=KJ$dp`}C`w
zJ4B}Q>-`dI_7u)PbA~Va(*EmrU)KKlP+9zRm#mCWRp_^>z~?a=XE%JF``afa_e4!(
zxP0Kdjp@g>*X++)Uv>8MuJ_roRR?QkzN#pA{PJsQ>&Z!}HqY8!Ei;;*C2U;CrgG-Z
zr8yyM?v)+8EO$}+^sfwd1_sU!$5|3=T&$&~;ZZ{GUSHiG^<&=p{r`6V-~an>{Qv*|
zzk2Omz5D<D|LeA{`V#dvX#HAGmp}e5`hM(RHTzfiy12QK=P%upk`Na>eW&7uxz;Il
zKCOKl60`yiGObC}&28}#O7YA+IaTHJYuEc9MB7F7?wEdI$LecKwpJQA?%t*RY+-Cr
ziTlZO8&8Ssx!-!9Y2$K<eP@n&|Fq4?K5|ec=jxr6bz1VzY-$WowML5SPChAbE>~;!
z<dSOk8jqQbiQ+$Ryf&$+WoG{(e?c~QX@b9-kkh%F)6XA%W6mmLTI#v_)Ffq}$c}yW
zY#N!a4CxOGBSVFXl<(hJ{A{)Kk9-wD#)pRlL<I^?M7=8SUC6aG_}6uo)`Qs(Q;tQ)
z2`gyaiI-d9pEbjJVeQv<dry|?ht^8={XP8Sy=7eI)nfbi*Z(VhT`gzIwKmAZWY>Dr
z(5Q6@8p)ULq<>j%XfJpvuQtp7&YzGg36~aa|IpZWynA{FzrKDM_w*^3X3lec$*ID(
zb+YUJ8C6`rzB;bk75E^bit&`3w3`;Qd*eRIjT`oOdFX_t9Vk2Q5Nv(ac#m7s{>yIn
zAA~tho89of{N4K+n{)Z||NoI$kfbmpDsxjyxPkV}ZK1MfwoJ2%Uv;IgYxk3N`PcUe
z#caH>uKv32vK2}aTfcqCpAw(zvuq=KOi4v!-n)12zREHNi3+M!@A~b2ub=DQyYHo+
zX4?JW3+@hNTX47Dyzc+xtMk35%*lPVCiIKfmpNNs-&ndnoa>+3@_6%eJbWUuObOo~
z3oJXd?a21FgS}cy=S_E$3|*9eW#RLkZJ#bU<cU6NY~(jN{hzl{<d^S*^?xTwuyAh3
zICC}N`@+59Q@?G<__112$mHDr9;XQ>D?|S8y0}Ak(VzYGs(j9c73sge?~VGJwdrrv
zyXaNd!~Ex;{IKQNdG|-E;%(2Pmp@XNlX7fhu}19eBP(4xih8Fs_&-u++%$)$Z`0Jq
zQ$=0sBL1OK9KZG^ANlp=-riqZHflv*64RdA7qkBA?XMD*0`4{4$}8_Vaj(35FaGNx
z;WVy~4?2SLGDO~esSj+k%X=$%^rMa8oWklW!4jU^^e>kgB)jbrTxn(|Wx2=2ZE<FO
z&`IIfD}OHF4%@C0r{d?LzU%tZw^8xo$ICi)*F8FWy!Mz~Q+({#JLRz{zsf(d<rdX{
z_;FFj(Dd5Om%G2-HeB)l`N~7fT$g{hZtI<Q;f91L>&{EL_FvU6sg<67|1O(--upYs
zOPnM&dv2ShdU*fxMIPCyKlR?NmDeoDu{pXs%HfW}!rlinPBJy#?z*SJW5my~B=lqJ
zqn|5$LiUIMh-;kEx%G$Av_oZFO<|7~+;so=pEX3hulcT1qjq_0T<YHw@jdoSR##;F
zcb_A-IP1*nZ?`-uToSv2zJ|@bC$Y_GD)+MUpWU_pJmN7HxgOQkcFpL|z9}gV*){eL
zwI|POac%rB{BqfoDM}enH%2_%|7+Ig|3@ce`37;>?ESY){6WXo`uq>BCu?l|Ts4;j
z9X`2A-}mIp?@rSXKM{$#w*BFTc^^;c2r4X%XMEZewQlvdjZe7OCN)a<IB2R~oxO+k
z?CyCrb?O@)EEDZwILeU4f8iJ7o$T22zcl8~)Of}-XS)0E>rbQ-E<JW)yrAFKInh^d
z`TJ8F*?INfA7f(qC#4hHVzTK+p2{?~ZQ+;Nd}{Jn9Y5xm66n7s^u@Bc)cZeFEiTI6
zuimdZG5L?<g2p|k9k*Zq=d)1bM#Jg<F>Y7(Onve8*Z#@x5~WrhRB0*yAnVdJd-vV%
zyO+nh-46IQW7i`l>74I>!h1RORx7pq`>Uwx)bKfmo#CbI<HA4t*SpO*a<KYO?7jE<
zPw1Ij|LZyY=-H-cF@<j0`faCAnjZWoZ2zGDkel?!{Hy0r<lg$V!Cds?eaVlaFTRG~
zEjZrvIQ!%e2Z!ag_kXN;u4*RlGIOu;_G>RsD}R0R*1nKkkL%43g~a^x@816reo?eh
ztM~Q6u*9Y!ul8nz&*sN6{9`u#?@yeOyi3_KzVwgz#{b-hgVr^EzZ-K{$4CBu)86F5
zrHel=`&Ihk$&CY1^BOZ0qZ9f6#83Lt?!;;?{`vl*y;WD%MY+|zKP>Dv%{lDKqnu{X
zvV<8z#w*rpPcVJ>CHBPjzW)1hT>M!pvI1q#A6B(l8~IQ4&W{4eh^eYI1^%BLf3mqp
zRo2UXSowIjss7#GMFu`$e`S?^-?yw5{{R2x?W7ck1Aih{H1N!>P2f7T>6(`Q($tih
zbG+jJxEQ2uVq#soeRBOnDQVgNPlS#Y{(sOFEE#ca{`4pNUPpf^vya<U_h&}N(Pj5G
zTvEEQ|Hr)g%D(%pAAhi}ahtU3Uiklqmp9wC30l1MZ?#|X_y7OHW?Ok0^7a|!w4Qvh
z#x3UfW4nnTm&mM4(QfZewOQ>jO;<jI!P{=B;M}U5sFV$|6Lng%co}`Uqq$Q5Pd{xd
zcPx)v{ABX2bOxqvyNo4E(}X$K`-=qm&wpe8=>Ol1ulK9|7qFL1Vc!3O@5!e8$x(f)
z|K5u_%>6_Ap{mWKNp`C?*w(2Y`u&gVbU)9p0RLwzPV=1&m_5Dp<L%QQZtUUKUOqLW
z>z{D;=}Fy_yN}xb+Agj?=hQ*>BVC<!@hq)j3vc=w{H)ph>-)o))mQ%RUd?mCHr;2-
zFXLI?T6(p*tC#-l?Y~>qKfyhPWxZ#t{<Uisjlbuw<6Y~ZxrTFZ-qJpwuI=}luN7IG
zaecb!-ebF6z5%;J1I|ueXyII*yI^yl*+u<&3x^d+v*(@O(b|3|H}%i#<H<D+|Ng~4
zT*LgSwtSt;lP%Njrq})RU*Ms(ddjx0|K4}k`#upV*|g-(zn@p4>$~)3e>(l3Z@OJ#
zTzAmVBbjwqp0%IY&!Ti8*==s<FXd~?j5d_in@fH5{28FyaJqi3Lh_mTb*nDkJg{eL
z|BhzUwI?UXxBQqVEflxuib&|P>{WTGU*ifQDqiiW%!t`pep-Khg<H05@b23?9{p>-
zwJWkv#rpM9R+g;T(D?>+?b}~n`nBbruyIcAlHcpqj(vZw-Ep$t;>hNV_J_MAe^vdR
zCy>unR&4pBmPbc)z0Ul#f4~1&eM~U#r^Lr)^=m)O`y4!9_4}sc2!mf$&CWqbuHP*C
zf3temoffuR^PWvF{LXEa$p2CO_d^B8HT#@)9OCWS6ZUPcLiOvFx62#%_W#g+pS#&W
z<5>BroX(kNHZ9B*F0M2dG}Vd|e)oK;%Dy6<<dFY=Z{~lAjI`Syr*`eL^4yh`IfXCh
zUE6>Cy{X+2K`po4(+|0`F*9wL(3sLvw)f}LUk4Qzoc|)aQa66<>8<a6_$l7kUvx@+
zef?{eM+FQFoPLh8B-*rC>plGI_Qvh~^|j;X?pfEhw_4Th3}3(g+R{apb(+<k@7I5Q
zX<NN|>#MErXN&k}-(R0J?QivlXNn)0&mHaWtQUW9;;?I;_UQnd(?^?yY)Ze(la4<n
z>Ye?-;D(C4ls((Z*sx3sgV%!ZuCXi;VORXOF7j6-=Qr5{^_Mr#Qr#{f;&3qZ;A>Am
zzP;%U-s}@DPcOQ)@z-}3&AhI+)mLld)?PLJy1u-lYTv6*b@`1V<`3qm>6G496aJgf
zqxN8t+?AViB&*nY_4k}u-E!o~Zykkzs)D${jiGH#W)C;#PPukgr9!ZOV|U}bSu+z{
zqEc7<+`Z=N8Y@1g6c+K_ZL6n8AAf)4!aR;1O*fS{`kK;qcRPKf$`ouLoIaf?cqgo_
zj-B7Zz|v9mj~st|4A-vrvfLH*TwOe3{rc5Uc-j_jIqLr4+=TTj4=XXc-rwDCE*@D{
zs1U~x!@9P^_JNa@!SB1?cdxy=zcym&>h)psuWP-`UbZ~^C8vbelhE_>`VWoxO81D(
zVAZ<rpetdpYMsv4gEJ$t?dDv4C?cV7$A4#Knr2thoa8%Ihj(@@SpMZ)#`m;ekM^(I
z<yrgr36Dj?j}Mm{3b&qYxNul={*3=?c~8H7eNU#6t$T68bRYMF4}6}N&Y5HM^sDNd
z)-K~0OFw-oH4^=Ny8Fr(&2BwD(W_5B{(m=LghOFYXm-@=6ZuQj_pmppg<n^hX~!gg
z)w=x3)_VE+wdJ?=#)j@yo}ytO_<n<+q|d9qDTn#jw+Y{6G0K|Pll=BF%gPBwqBr9B
zmsqC$Op%+-mB4c_@Nx1{*2nfC2l&j)N_{s>2xfi1&Q$5ZDlH%Wbp8L^7hPBRs<`&z
zs(05{glUJHul9CXAHAsFWp&(bg=1VS%KJM~Z`S0COC0IaSi+ZZJ<G*3V3p@=w<)#q
zPaN&mUf;bx?t9$Z+FMiOyuRQ1TJ^nF&$6~|@7=1ezrM8oz5b<T!iVEbXCF&0ePa+S
zVYs*T#8+#Hg^Z~`0tDM!Iczt5o6OOny4`TIc6)`1^rLedCo7syiqYYg4wqb^!EjBr
zX<bRr>wA~Hwx~s{JzsiiZ^`P5cI)&0{r)4!V;-{H{~*Wp{c%q=yqz_Doy`@W_Z2pF
zXLo*{er=Ebf^%LU+j`k{IbYs?S(5Lz^CvI!hiAMUXC3gDddVZu{fGI8#D2Y;*#?I`
z?<(aen=8Bcmi4>6^2`2D{+g+`w`#|fxO?mhVXO=d4`af<dmfcPu%S`)zSnxzMUDb7
zA{-U#?rt`i>iuxRIccr{UE#?GOkx>a1hy$MFHqp0pY`b(|D5Lc`E5HR{z`I+<n`*`
z(vkh9^<&-CRW(BSIjz4o-w_c?vpxU&(}!QjwWmMPxLLCE=a+pPna>l=rq8-JQ}VKz
zQQMZV)qktMzQ4ZYz%;RIjGRmk3T#TR^j0l7Kf{RAW7Vpy-+M*ZNVPM!uskb_(-tXz
zo8<Jc(9us%uSV#xiT>PcDh1yyUZ2lO5c{ERpgN7gpybDKS+@5j7bb3seZpAvll5fW
z>r*Zj@29u=_s*)`T-V#gJK^@P-+!YwJk+{cD>!5I2XB_L3BUaQ9rpVs->P)V$Kdoy
z-gjl14<`#oIzIgT^vL|${XAF2b9d!SgkN$vIW_D@=fi2zzBSs%q69acd;NF)5{cOQ
z^|KGxcO2DU)-C?(>Q%S(;imD2<Vtj$u9n7ehWA@?JlbCUA<0Bjw99<Q`qke>C)T$q
z#OBONTDj)C>}me~JO6&Oq?Vt!TKZ_cNYaMtr#*YNSNI4#-MzQk&Ln2qtj{HRdyYO1
zS{-x9;`+9|zqPJKzu($aAzUYQJ#M>GhkM-zJ>C<E5$k{WRnM95Kh@-dD+lvx+nNLq
zTlb$^m=wEOZzi6T|95)D{9k*2ZkusbR>Vkua`QdgIQ!YF{+Nd@xPIH-;QR;op6dP2
zHZOa9b@j!FX>m{EKWN$c+y8x~{J@=sq2+zn{e_{w)Z`bPzxzw*>vN;+{3~PIiw;gd
z$g%&>BJ(t>AA3W;_50ob6R^A2^ePL-oqh4&4>B74vXHuUKz6r+q203uipmpD_43T{
zGN1fs{~w2qJU^J$JZe9lsc#p5|AbnU{YQ!1X}2#uzP@L>rCt4#T_2oJX4?Kxy7#j@
zj-#J<!OfSazZWFLuUG7yTj6ecfyeH&%%Z1#Mn`MbcAMR@miamPi$*c$d42)OEgw#(
zr+#l>G-YZz@HxVF+Wmxk;diH|DJdt2mH292KN=smSa0f*U#n+FJ`8lJVk~U^8hR_>
zyU)&kP5x(_UjNHkvSiiiFPlyotc;rF7RhQi<5yW|M!V$is?Mu<Pk+4>NsEw5WV0*v
zZGJO-@>$O+?bSat=FMW!D!$IU@}q6B-OL^PHn`a{HI-Uuy*jOtY4=y~{q<duwSw;t
z$NbyEk^kQNe`N4=)9?B%Yk7{;-hUB2f3s=*38f29v`@vSrCI7E%B)flJofnYQme_I
zJQqFKDi*c$>%IDQd;b}|Db_vY8lKv8szcUO`o!0Y*Z#+364@V{8!L-#S^M)W^G@k4
z&KI-nyuTdsP_}SbJ7<G=yQ<cg?FY6OX6sK_Htm$rn`@1-X<H9J-6FdB^78q18TGrO
zmx^|Mm7FgAXNE@C-Mf!sM4vusvse?pMDE}IWx3B?g-dRg$JR30@n(jW-}?Ug>g&64
zf=4Pmrdi*Rh~#Cf+gK;5`|Rp(#nV^z<pqm<+gPWvDCXIf*?*;Z_Wkd;_uWW;hw8&)
ztaoo6{jo`N^U3*5iWw4xEAH1N1s;<R`t`bf-S@}-8WX>K{QB+eTK6{s(i*RQG$$?6
zciiJ0FQvK4>sHbGKa(YO?L|4(?5cjhOl$XxWh@(XYm5Zv99X{lf$hqd;xim)*#&kM
zguk9%b92pi(W=jnOLkct{>Ue;sah8ksAm!H7vC{IaCS}g={@)Uy|=sjK4PPowe^Gg
zbsyvBN;FmWJ&SgJweaVjwi4d|QM0eMpIp23iHYv1$G6m~kNtfzKW^8ac6~c}ZB<9f
zk9^VRS_BtAn;_p;WcO*o#P-vzY+>`;Ypy)_^zO*)O@I7^L<J1ay_>v0H#qnH-dRUq
zO-?&)pHWk3o27k_=i9Y|l^hj$PL=;BY&M)dRXi-d_;1f0^J#1EY`%U?>Cw&)KbwDL
zv$~x~X}`h$i-B>{0yQQE9k#}p1#Xea)!aH)E!@RqC;e4i-54Uu%U<*J%+9-y=WJc2
z9q{h?s;P$YMNj^%{q^<M)~X$0UD3BYE1a+8KjU?=Z(v~HVQ`!!$>zmcyD<N5d~wvC
zYs)>?y;~JF?a8XWS-YavM@_%BYk$-_#h6Uq>8F24^>MHk-R{UtQ`T4|nz7|TRi)wu
zGo}dtwZc`tt#NC3eWYd>IP4Q@zx47^&(AbnE~`G<yI%$Gu^(`dwhGw2M|BC?KX$jf
z4-WnQB`jL9^`rKEud79U;p;i7BX&LAn)m9--RiH&U*F8%@^A0t9})*nOS2~2IV5JS
zA@<+9QN~QrWM7<K&u8DA?rSVq1Ed)orR(HA7acEPoaCKg7~vE5X~mw(bIT6g?!5C<
zL+|kOOD*iruO~-To7`79<)gnVkfBcFpN4&dk+A<^jc*IAFFgIA)OP3vM>@+A=}SBI
zOpAES{(j=D4F5~3KDwpwypUJXvnc&j^}4jD*f!{QuY^r|d*vGGiu?aOw&uS1JH>E%
zeuPUZ%Wq%X8S(Ep4m0jtxnqr(#I*SjT68#0SI;>bB_1z*_t*C9@Kw)$)`waDjrtq0
zkn4Z=s(osmuj|)WY*?}I>fX0=-_EI-9_ew;_icg10vCSSa`~m!_Jv}1!nDhm|DST<
zejyXvj4;*X+n?Uv{YZ-6iswJ$0*-H;zYG3tP(Gm`;9c5voAuTTu37Kw4*Qi`aCCJ~
zxb1V6ZMt_$NZA1&v6&{i(?5ormdgKSI#-bBA$M(8WZ=>#Cj83r{8Rizg4IQCUb^>Q
z$v$Fha`q?Z@I5)~sSQ11tNMZuOj%OxR4ubUsOb8d<zHXgdQCkV{Pb^a#Hs5OzHAV>
zA0T09`XWDkz4tonzxr!^Uxyae`Z!r$49yAd;d1+tZB(*l760@P=U)Dcm}emB`lxgr
zThPy--iJO#HeIGhLc89u&O3Uhf4bCUqqr*-SqUm<_Wx(wzj{u1NUr$n{}<dtUwwUD
zz3c1r*MZV&Q@*RrHB9Qs{Jvy+zhqOQ#Ki+DCoLFvEXYn+wqe`TZ@V2OD{KF4ty^1u
z_i*;~vVYUoy{q0G_ji-~VduHCCnQZTDNMRq`}Gjha(ivTyMKGw)TJ8xckWx~zf|?3
zabSt_vSk+RzvnK{IeIhv-ksF9oS>VYu4&JiKQ%sNqFm6sS8wNXiMGv5-S_SAk}Eq`
z@t)_>b=63nQ_6hU@#WX}Q^(KRR@z!xu{bf83WmuyE8boBeEYBOD>6<h^%VVLk;<&^
znl}ID=R5QL=Rf&ZA2VP6x4Erb>ev4n-kz^RcYWXW{fKtxn*C89=I{Rcp1F{z?e53e
z`;|NPNgq3KC7!up&i;cE_AF{r8nf0WsED~7_@(`*VppT`1h>fvyEu5)gkIQdEO=(|
zb5WM+Md?o;oT*oStXNxf>z8f(8`0l~Q+Fsxd4AuLV|K6YMnPHdqdoKPKU7*<l=YiW
zxbe!%>AjD?=`?@&WAxtSZhs5+2FKeI@62nn_xsP1BDvdfN~oI9j34?sUqAacM;uuc
z{#*Wyppf6eLeGq?C36J6*37-Do1l2myeYuXXwj=xAh2aM>w+GkmL@0u{*xQC*9f*B
zxS-<lL-p{FdX^n31^+_zo34pSu^4O+bmLV%+dbJKZr}6Hg6z$rCz3zZ+W4(7Ioke1
zqK#=O^BGR3Z468dA~EuNxYjT_NFI6AI&)k3Pq)wY|JBduHwD(kA8US*8U4~=;ePd7
zt9CR;d)<}3vZML&Mv2OmlV_#>^*hDMlGL-~(bM&Xt_5X9DUqHN(|kf_M(>z(;nA&|
z3p38kTc3BH@qPc~-{m)_ioU=4;9Sal|CA?NR#&m?ITAG0%U-Xzv+AbB&Bs>%JKsjj
z-rx0g{ndYQyRXJ==+}R+fTQ4mhv1L=Jx7?iI9rr<dw0F&U*MwSnqB_7;DZvY;VJXB
zt@A$3Xp%mCq{8Uswwk34_4B>tnwPsXzZd3pi1~U!{od(4uih>1(5#)xX1Zkl{Krq1
z+oev{2o#hEc_(>msh;WmlaJzmX*#za{#rgUPBOQD^0%(IwwG5V`C1!}x1VN^o^GX3
z@uA>f+NagwtCoLR)xy~+uls#{%H3ag17Ba+Ght<zeqHUpj(5RRYm%NFzou_0{m68c
zcF5|hjKPyHuUF^|TeLC2`D(TLQ>KMG7x5mk)MNaRC3AqcmTC0^=7wEW`~}}6JWQ=_
zImfJXtZMgP9_JE~>9+BtOG#bxcQezE+h?D5y572AnUHvkpZvj9?E3;kr-+#8vVO06
z_#^ySLATFKgKYsqe~fQxrT=va4Ve-DUokwssrLP;#c@yGoUc6e<k-ugEfR|^o7yh0
zy#J}@#}dw@bKh1*Zk~MjyIb)G$79#auP*qt-J@;)N=4=d&Sm=fn+yIfne}7)Ep7kn
zufEPNSY=;u{+Y=8r6q|wi$Az%9&>zPx|p?Pe|!t4dk`P{s@-PCXPy4C-SN{Q9&atp
z2A7Ts!7$T(ua@(O<;;Jw_n^#5h1!n3_!(;rr}o8Hs!ac|)8y=NVavE?|L>;k@9J6Z
z+?0}%Rry|WV)hGPwi8Mz>1hvEyq&y1{jG_kiJifzQxm4!9hJ2;VY0C*=MidVkeU5s
zdjG$WV9tv_4<7NH_n4*pwJ!5b#Xs}cmOC~Iue-^x_N%n`E4S`I*Vh40=G5Pa@ibUg
z!JM{r{>Jj_56$?_ezWR+F0}8{zkf<mdVghC&)zeC{`UyKl}7J=em8e&F|e^xV%~3e
zKEC3_<ZnT9RlYbc`Viy6+RLQqQrKAge%Y1qo=ex1O?DS}sYGx7(A7Bg-`z*qJsM}9
z+uVNf>)yf4Cf1i?w_n}mPmFJQC)LTM^0a-!KC8(74ju2j53*6RN(R!0_HURtSytF?
zg;H)3Tc?Pe;p7;e>Z3{*1K%y=xPD%t*`n}{5o7hN*q;Ye`Xgm$d(At||KQi$r2Ahv
zB@Q+34*P8P`jBX>(Y)Qy|9+bG>5^mfl!w2T{QWLqQ+ZbC!Ay;&KcV~@&wLL2U4KP0
z{X8T8Hj!oGcm2P+2tIuMv9RCjwZ`f)$Mp$4b?o2gB_*ret;$oL_`Xv3iOAvPgj3m%
zO0NmbX1|yJBQA%t`qs_D=SMDdCZ2osu>amYG41?i60aLBXmMQ=TweD2f9IXg@2)?T
z4lVv&8d<)xs4Ctz+pPAz!?LpI+m;D$sxv|kJ&*b$JfZ5*#kTTCcKc4g{(rN-;IQ4%
zpq;ErO!5<(?!BM>j;q2@L3gcOwBN+1X7f4(t(AP4_cYJ<nEGFALssd3-Ha#B9FO+<
zhLz1&mS?QJljllU^*$$FnefH>F^&JC%Uon?ydt^nN($PvX7%l7cz^p~q1WQ_*OLk-
z-fn!(&Sc%|zr^I^?XFixi_X4(-uY!xdA-iv`@gOo?OxLHZEx+`%zsPYUwwZu%m3?F
zjh0F8KR8y^Y|biZa8NxKx%}9I25sK>=0E%6+iP~}Ul7^Vb7HNq&T)bLxBF%OKUlET
z!+PesKXY5=-+uqfXW~DFrK=|Ar^Lq<Yj1n%|0?F+?fL)yv$!8#>H78Ei6tBV7lzGx
z&?0Lv`&v|s&z)pv;e|{|pZzb%eeOQyGU@QOu#3uTOB1injNiT6;B|Q2F^-G#Sc8}S
z+4PkCx@c1*cigE(oIjSWPwiSMUy)>U{9v?hNMK~ameV0WuXC)>m@p}F&G&EdtrwRE
zPOrOCv@k^VNFmqzH4eMDIM*#X_|0<atEb1Ntximi3x2at{fFDd4I;Cy#Qgbq{$8Bm
zs#OPDvUS%lf2ZlZ=;%z<=}rp!4&J<b$-6kNZEw|=e;j*ErT^~F5t_*z6fJPlpib^x
z{)cI2ihS?yE(nX1T-q7vaxqtSN0s5^>-!w`IREW^XY=arv}^wsOjjwp^X6~$2hS(n
zt<OFrt^CmU@tRBAF>m+n&-dO_IJTg&<MF+OIM2D6svXZ}O|j#O4ZZkdB`33pp*4r(
zEqfDzKk=)j9|o2#W?%F4r2S5IB`<Y@AD462YBciaJgoE3=esrSb&IcL@!iXHk+~-P
zA8O~dOFMIi-nX;yJ%2&{d-2oOefA#vop0@p+gmjG`ueN?U9aYG^h8Ph?%TBPe$~6{
zam%z{udQ0(8NSxP#(V$6B95gp_rzK6-BUe#R&AEhu|*bISDNF*<Rf<X_N|io$M*Gx
z-SLO%YR7eUUH|kW;Gn~bJe~(DoMuHd{$bh{@vMC5ir(8FxE&LgWTbm7`r4GASovyC
z_mZ9?lNRRog;(!fzyIp1S9jls-#7iW|IX3Be_D_E{SRVQ{jVgLS1)ksf+B~(g_#@I
z8qPO=a)N<@cdz3t$+j%emXezNCtrOne;vQpb@{4&*Y-{AzqaISY1GyFvtQPK)Q*>q
z+gi6?Rb*v(V`k&brpAzUG1H#=-1xDP!FacRzul)R(V~fw;Rerd-du3D@NIHk<0KA#
zV*xdXtgVN>mUQSOJAZ$!;`29wRf0Prk0(Dzjj`+Et>y928|OP*<g@HmtJ*90ds;}e
zOx9JF`S-6s3YA#Dwr;2Q!cw1(t3_4UYrh{*ajtB>9(vW`aa+88ZQO(@3c4SeuO?P@
z_8d(xjGKMCe|cD*GK1Nb%^UBAZ_c`^+jwz>l!orMlkU%Zv(vaG(xpy5S7>Tb{80DU
zMs}l|;-RO_pLA1J7y9iz{qRLZ!=44b?XT}WsL3+sf4j_akuTFvJ72TyGW%=}3QkJw
z@|C;!_VK+ruEy{GR2)b=-Ek+>M7^i?`TvIj3(DSene3~v*%rKM=GXl{Ty*62-MwF4
z{nv6YSJbNK8&A~!UbSlSs&#uK-udPI;doTuWH8%c<FvPHi~jO*?Q)p1)@<ISl2uu2
zFFyQK!eg?BS!Ti^b!$!)gN~XR&shU~q<TL@*|$ENm?9-6SoUaRMB|$?QLWy`b{{^!
za7)<#PkFN{)cW?g*E~yPZPw(>R-7oirPlW5;{U%`_dRoscE0p!rsQ<>51sCZCOB0#
z2uwDVNDEgI)8;Mzn6O&YzWyPjxB8#?)f*g5pZ`8wU;BSy-S@cF-^@OmcKOF0FJ85`
z`swOfPwqx-@ZD53Cn3Y>!pEZ%WU?ch{#@tE7JB}yBgrBA($8D>OqEy-)a;{q{wfAu
zc(i3t=6Tt8DZ3l0ZojslmR`o3(Pr>ft-k5b?1k@5w!V&E{cr8Z($ehk*P*Yz?Ae&K
zbA6i4wGS6MCiPxBbxq(s$B9Odr|Wk93k&<b{)^c@wRL~9zplQUwJtwuf6S7nKc-&i
zbXZU(BUjHnBg=!qN{r#;Rimm_3)=@q^6R`T&NIlUi|T$|mM^JWXAo_?^z!B(nZXi?
zGCGa-{w3+Vi?Ux4eX;qrXp7z#hPzw*yBDbkW~YBh4!CjK!kTx!#X(7BF`a)me*32E
z2ney6Ba~|R+Aqg==fi7r7|-V15BYuGLpbf!lG~rGSFM}>MJ-AB+WKotzP|brzWcB6
z&hkZuj++an>^JPrWARN-kN*)TQ=yYkA!qT!Qd{uJqm5BPf14QBKlv20M_}TOmH9_k
zZMewt@uW+kQ))=5($Rh2?i74D@=L6`@q>iD>NAy?twkZ+sk8q#2@4g?zu*5je9<ef
ziyoGIseh&i&g%Eoc+j#?pz@7ewPWa+W`Wm5GG8tnkBn|%FK*Yn-TznZhQd>|3cG%@
zuvn%2PyQKperT7hawt^kWlp%P@PCW$!S!3*-|a2cU7OGBki+Zx!I+zsNj|e|cLZnm
z^exw!K5WWbU7;x7qqS<)s;h;i+>9Xy&0^;?>^Jy0G2+B84+UdC-ON5kZoj8Vzu#}y
z;`g&MlHj#WX-~e+dHD|iLSvx|)eFQ{e2rtS-M8S&ck%z+6Am9#=UR{=!&ct#>r$uS
z(XZEkE$<O|AXWOa{;;@`)z9z^R_g?&Y@V_};)ja9q{7sG`!>H2?Jb5Db3E?#nrlA&
z|G}kn{jJ|DY`vTAsUEtlD=^Xf@$?UmbzWV6Gl%8&)xE{>Kh@V6C;t|8{i^qTuWCie
zQl-FKHM5?m{L$3efBU^uo5Ja1{D)n3SxvdR@%H{B0ox~El)oFl`|Fx{@t<|s7!(vd
zRA1>wq%ra_cW4?c7Tt1rKFb^%i&c91w+>p&5t{IwJN-l1aj#{IuJJo79}G}XdocUE
zjRzn5!9@N~|K`u=EP7M9&_e&g6cbU62nJrwHH;1&8y3#*l$zOP=izt6)O!DmqaR(j
zK6n1l%iS8tT4j8HN2Th-yB75<ER9V3jjHn+kI$dNA$BjnaEgJ1Irp+pG6nUv^(^9U
zHg<aQvRsL`lq=c8SeTY<>RrUP?$*UcRSMhEg8#ED)!g78e66Yc(T_<#zvkLV^VRWY
z_ZXEN&D!Jq;N}Gfizf@1mNvZq#Cf#7>2Sn?vdilKcCvNH&sk+(?6=hZ*Ya48)MFl&
zhd-BEZI|%XT~;t(_<rRs(T$6(_7?=}NjV=;;}L8Ucw2woU+>VCmByPV)XujGis|^(
zTDeiyNbvg8i<_nAf1bz2A2I*y(QjMB)=z%^f7h;O>#`1f<1&AsbjVHhdc_j<f{U!K
z<!ds!S#Nhfd-rJX%=&u%D`A0cyeCAqWeag8FfO^a)RcG89l3&s8x}42$b9=p!~>0c
z?0+Novpsq7`pO?G*P}ri6{pmAE{b*TdZFKHyz%kBFaeqCZ#fKHZWM-OWI8QXIGAww
z`+0){Cqty1FJ=5cRebPgS-tw%zY^D6dDz~6_!fQg0K3wG<Zb(zel&f$rr-JTib(ur
z!+*aO@9g^eNATXi<xk#KzvVo3`(dM4`JKY&9RK8&FNx*4{rsWfN}Hyw>&yCI-Tf}5
z{osN}^Rs(<_!=FYEVI_G;Z|sM?_iH(-sie%L9r>Hgw}rk;^~JRnrAF!6FOMKdiVE=
z2R8rACSQNPYO(g=sCJo2$9WR3L^r?T`({;ot7E|~p_IKJrnf$SeVeavlhdR_J14QX
z{Hj}Pc(<{?zp450uhwAsmpdYk8ywRweRb|2%acn<pEvAy{dJOv#fpvklQ=$H+WSVl
zgGJ=Z%|AP*-)z5c!@qLLKmYreRBoK&pC#!flq2L+XKWep?}52^R?x@J5UrxEC3k#&
z^iSLS`q#JkzX8)tWu7{8xH8=RaQDHC6MPnWrExb~{+^T9o}lWlV0c6H#?6SdS`M+4
zM@&ZEN!?eK1ovA-9hjQ>-CzRe-dAzI4RUU3ot`M#8_UijnJw6yy|6#RM!oLVY2I6R
z+kdHt%qU0{Ul>+??*E3kWApB83HmWZA>Yc``L*MYBLBM1XV)V>W~|}-c;n2{E!x^L
z0`g0^IoorV9DQD_#B1sCsP^@PxXX`QavMVuPt1rj+~D)+(dEy2KbL&h3_tY9+n-7O
zf&I7j*&o`k-SdCCcEOut?oP)`Sba;y?{57f_O|6i-I4kA>$~o+cJVEL{`BvsgMyzP
z@t*Z}T=BgB^7>0=Kjv?^*r<2>Fh|$6B^%4kUS&0<U)%5csZ#&J^w)kBZj%j*>zlbN
zogP2<zFuV4v|=BNeg3y~O_Q%V`9Av=+U_Sj!)o%E$cJx|E$rrYDet$w!CO>a@81^3
zzkKzj>59#>>^CIpX|r&$pHR@@GX3dR@Tcv%m;9X@k6x)N7^q)7nY-qs$WQ&#5u#Ub
zES6B@l)O22!q0_^79HPvqHQCik${bB-C}+LH+!kcJDbiwZZXKvIwiJCC+>h~?o8|Z
zU(DCch-6;Nuq0Y~mYBu=r@viNH}3g7(S5o7!_`}g0!_`Ycdupbi4WgmU}q64UbsC(
z#hNEy{&gYqvH7xD(r45Dy=geNF3#suri%jmUcq<k-=CWD`{b9ooo!6aI;lb8r*G;1
z4LCQ$FEw-hm5*F^v*RbN^DixVW5|=`S(NlD$nbKyFw4Wc=6Qlu`cn;VT>tH2`h8-F
zwanx4)Bm4|oPFQ-s%9QXziyMa!I!U<8B4nB&R$!!p-({7&ftCftk;?0*H=XBzLPWk
z$3B~5_v;VsEH2yn;X&uuXJ5B{*e|+xv8KVgb*Voj9&s}<yb=7EHP_;L-KNVc{QaK)
z^Vk3TE1|{HUUoN2+X)}-<r%7rxOg+G&RM*wIk?7;eTz})mC4Ot{9iATy_Vv5_Z3gj
zwU%G`7TXWVBz!no{BYT%UCaNsoBWFxl6!l6vQ*@0sg0``Z?{Z4_5S10mId<HY`)g)
zy)#L9&aO3$VJhhYw!gdIq&KW~_Pt%r|9y^vlGPiA6&j2D&NN5dt_f&}Ja=>5kCW^E
z%U*x>lA&}`_Qmq;r7q>a<gQMRU0b(=y)?)5{Rhwfrd6R%DSy1HSY<BhZ=7(T?)k$A
zr@xGASN}PamfOEmE3xTA<I>Ml>)pi9{j4@G|DmdVgl*cr5A!D+ooVj!JNoL26GiVm
z7Yp0S9hoek*ZYMf>HUAX&rME|TkKBjhBwuws@m?GTGl4H{6oiyGO;uHQ>$Yl(^acj
z^|yFh%s$4~e6l+8UX((IxUKh`cJ=eqw`qy4S@T_J$H&%q8Iud9-Sr<QvgvV0e$;%&
z8XwNFa`{1CJDWw1Wb~w_igUKEdhOmPp|>`~^MRF1jhMdY49y$woVudV?_<BVpg_uK
z$BcWtdFk(DLON#z+WitT_PS<zamV`ww;tH^J^A+O&&(8qjRBS4*KmC>Iop%a=d1Pf
zFMr-gzcc%~^@BJ}7XR&7%d_Ifw8vdFca9~ie5<?7B9y(g@5S#z#s7Rijvs56GKtZ@
zzD@A%;wK-aQ!eqB?9(qPzr0ratLgjcU)Kbd=hfT)+IfH1w)iD+hg2TBI@)~`$j|fS
zknwetTDWH3zk?H61S={#pDsOeq-?TP;XKbvrq5chO5WM}ZDohr_igi6KRs5XZMG@(
zTO#Yj88=_+9+do0cEE6F-gklQI{($$ku@9t7GzC(w9|K<`k@C^pTq)H?ppL){9kb|
z?tM+T8P^euz)iAS<6i#Wwfl?xjz9bVKZ-AZ(^vEPU(C$ENpG|3Ui44C7k{F1scZP%
zMNjW-I(hAf(y`NPrx<_Yl1+UX7pXWW+I&jk^4q^QUWgQWIOA;V`APbdKeP#?9y#kj
zeY<{_fCT4D4neU_!9DeUH=q95b&z9H!nM8Uk4XG{b7K38>OX$g<?E)eeLnln_FKEQ
z?%ut7js3^D_x5h)ntt%W!-okczJC|6%q;Jh6Xls+dVo<(WP*=~Fc;I|h7B1$8Qfe<
zjRp&DXsB}X98xmazSZwfsnOi3YrmF+WOh!CnyB?d+MO%vm-@?X{B3vtysrM~a<BB5
z^@d83pL;b*+N?`ocVA{spS5lFSM|`i-;-Y4&zH*O`X|xTkg!Yn@TP#ihm&0A%PH3B
zc|ZUCBxOm^!S|A<a+HIjEXyiqSzq_~|MGA0HP7F9Hgg01J$_{|`+NM<)pNcI?`3{!
zA+W*e#~agPr4`E}ZNswn6xds>*F7XwT2?!qw`%wLyByq-UPr@1&a{;=?9<6*k(m)`
zrTKo-p-o@!ZCo2z@wm<;`dktpqnyYL4L2^<UZ;cu5g8J~tcMN;Eb!105$b44ROpoK
zURV9=qv7@a(O10>c^|psS<1(hck%wV%X?<*_*-4>d--$N7u!d5=by#JnMzG)c=$W}
z&64EvlXsu(>RGC{`orseKhCT;9bdR4KIQ7%?$b={#iCM9)3)j`=lx&Q5Pxv{U)>pR
zl6o1L0#)Yq9f?2B^C8aI!1MHGi}e@M_uN#vwdI<cV6FS@Dd#!vhu17WcWSxt1139H
zH=X@Q^Tn8cTm3CN?#@y5^ZqiewrN{M=Vm&;_rEc9aa+F0t_vGZd&Mo?VxGIn_{!l`
zTY9)_Oqm-Pl|XSS*xJpQaAARu2Wan%lg11+5l$WtzaRJVQlHE0`SohiGCi)PC#P#K
zJXwEZPU*dk|7E4KzAc}&@AmE=o11T5J?$>Fs7iR#_m62jyHsjot{qg{ARC%><x*7a
z;#zq|4mSTXU5mpK!SD9`)0%qdTS?Y?lcY0`0v@|EX=_$wM%T$N-CE5Pz2&37z4ZU}
z`vgLM6u1YQ$KLu{Ikm&of5rKD)!VXL3hyub_+e#g^;ao@y`oE6<QM9H<zM9e_^im6
z7xi5+ch-9aiC);U;99q~lzS0R$)DB2ANFd_Usks$Y|_`ezVe#Q4U94(6VgPaSq~j*
z*pOf(#=|<HF<?Q2mWW`hlY04@Gqdk+D?i?SEhgww#LVd#Q!S*ff1JPZpZf2AYd>lK
z*c10IBx~Md@9rN*DmmgOKTZCTahR2#f62LDGxu0*s)_tLE3M{QqyXRfQ$EbqC(ZXi
zea+RJdMQ!EQ-QU*PWY^$fa2|aUbgpNxbFGadX2BAoa?`~o7x9OE(tCUy{FbY*1Ff6
zy>or~zStjr$G0cvEn(R8yGs72$m+-29)7Bmex}MjGyVIbaDkJy5hu3fea!Y0Des%H
zh5b2~TbOa|*_$D$%Oxy8Ia5nSS+40|!$uJu5pGVdg$^q^l#Dn!&pq3HE;X$5^_s_e
z>oyk1+*>fYad~a7^%;v}XXeCTz4Z2N_s8?)?VBHj|J&?db!7dg>82%qf6hPCseO4{
zDZ=gQFZ04jGXvP&!>8r-r(T~l|9Hm#h(CXm13xW!cE=`dZPQIAr&?Zl&1ZJ^(uMx+
zZ<+92u5RB_f2N7c*MISq?K)mEPx$0%bEdx7)vwO$^&FQAv}^c1Rlx4?nKv;eT|K|~
z-iGo|c)V&uv&15mm~E1m{P!eZs|~EtSJm8iW!0*X(zZQ<4mY1OH!#YHOi&Y%W@S3)
zXt2OULRgaJP@uvT9dWLfCbe9{bJ^wlY-6q*S$*<o%GAA)L4NF<zjDnl?09bSXWbk7
zx&M~d?D(U1aesn#!5<0Rhb`x74?UZ7uG4PS`mM~dGRxQLZa-3`Te)y%*{vYU%PG6l
zY|W;Bv=w))4dY+@MeC!c>zn;w9lAEm=G(zEYl&0c`@H4r@>xFI)1E#-UVr;9b?e<l
zo1LyLStV=hC@6fsweb7ZLXj!y4vlwS>$-2w+Ns-`ddGFS)|B+u9d~XfPP+1N9h-5%
zmJeUgrrDW%*Wayazwr56Z|=}rIz79e@iD51^k@llakerhIn2~h<6`Y-OxWS!BqZ4R
z{N8t&fB%z>)`<yy4B8yXS;K$EPJY?%=@+6WmTS-c@cNy>?-H|ze=bJx*v#sjCoLDZ
ze!I5y|Ef=4rp^2K;<oeGl<98oPE7CNm_JK6?pjUS@t7MwqC$O2b7R`S-4*t$4|}or
zzv+F}RcW)ASozw9|2z3Rf0f#nnOR=X-p%V|<e8=Eyl`jK!{ixTrCk}OEcZ=jD%$2Y
zXPLwfjh%eojOV_&b3CZHwzGccGZu~AQ^gj&pZn?SrnMXHMl-57d(Sl}%2hk`<<NbJ
z<n<d)Ur%l0?)jl`dA(G*VyWip>(9P39$;YLFL0bC#n#1I{L%hf%Qxls2O5<3xc!gW
zr~Z2XzkmDxzs=tNWMBQNue%?;>Wy0;e>GM-M)7{BfAN#=f4G^>r20RIvQSa@vRoi#
z-cIE;Tt`{ec-(Hr?p^MoY0TUcVUVZwy7NVn9oMw@e;4lly~efq(prVh`Wabq`B&rK
z?T!Ds{ng#oXG_msG_4g@Rp0&~Y))F2_u2SayLMa+zcMTKu5Xt9^kWPx3<c5CBCfyF
z*m(cU=>W+u{IWWMU-fRw{E0O1zs$VNIlAsn*6#G_PZn@ExUerM5h$>|XOSbT!Lu^z
z(>;T%qNdI6QrDag1WguASst-%pYrs}Tr3Rp64uzcZtFB(9{A_~bf$zaf0gy7XU>lD
z`1**+@$VVO6E0C{Gr5JPc?EPIK5bUJ>{ZrJcJ|f}iTWa{ubi*(N9xZy8{uv<cbTU9
zH0EnDavNtJXL1WZRo&<EPcx$=WBt3=nbBYCuKtUBzqM;w{_K6%_VE2@Umlqj!Wi8D
zQ(0)auBNp1%l*s>6?~52(&4(ZC9k~S!>ptrvB4$U;LGm{l|#xNk9N<qKb?C|af8m*
z*GC&CmuyH~5E?8MRJCoTd&tr?`}I|tjI}mvZD`sqe#7_cCysSnd&|2LbKDoMS6>^x
zZbi`jb*qwc%D*q$nmzfN{rdwwoF@dtg%9r9eQ?J?g{ema-!A<azr}9r0{4@%FB;v`
z%zE;*`)f+?@2{mbGmIn*6q#Z#XlqS0n#y+j{hkk7j=e8Zi>j<?T;^zWtiZhJSnR(K
zN}0F!waJ!Ge73jZu-A$^F^0ya=U*s&eRVbHRQBq~TJ3*(V?PL%*3Le_cdcsgs#~l~
zhMXUm93oo|O6zQ!!@#@fOo^3UkGIC0=Nj`^IWNn~1Tve)DJ^+;@+)gM>(hiL{sPZw
zvQu@F<F^Fyf4X*_?SEgV-ri5wTK>HH{_1*m5a>iC!K_F5f9Bpj*zP3k!aXI+KK+C6
z!law2-#$e?;Cs8_U*3~<>$Mj#PiG9#iQ=r#ixE2Q9bRVGqT{7ComHsh>#ATkrfYu_
zfA8qd))R?!JZQATNR>4%@!FwRtCnSGsI<;sZ`rS~rTeVdbRKrb@3qJNR<i5!b8tim
zuq;<%<XtN5eYZz-%PfIE|CsYuBpS_e5#TKUc&&R!y6^90rvIn^*GV|D@5nOMV6MgU
zj>;H@w^+q|cDc62V%p3FXIUjLo38vE*Yawe@zoQvKUFN@k?M-cSj2Cb>Sb}o<yu+k
zPutg-$G-Woy-D<u>bLvBJngA!S#{r|2f^HhdXb(Bn^v%KnJQj-ov(h=o1-GEc?R1#
zIbR0;`HY8;^F<$t;aJJ`F0y<@n)Tnc`3X}34Yb~fet-T%cHJE-L5W`~|0hpgI@{x;
z-_nb9Zl7;U<(lRGZdqSy|6%1)dv}{D8@xrA|35UhPtv9DnbE;!$BUcyZ(rTI`|^Sb
zHud-R^XX4M{ZlWZNX~z{V9I`{WtOVX`&)7y*1hpjJ$%mObnf<J2TfOrMKX!BB^xdY
z+1kD9OT|9_>q`$=eSP({Yl9)nA0EeerVZf%+>HlbXz5&CwN{T!L&xgcYOj_0+nZ-M
zb-nKH^!T0WQ4-A0!s-w-|6ohO^myJyuXjlC22}Vw{UcxAzD>H9c`<8-569j<sh;fu
za(b8l``wfIe!NnVPx8)ezBpyZ(1$k<1#oK~^mr>NC>_2}B*u7^=q~Ty_rJt`HRU&7
zq2c2H*JS?oI7y9v@$vWUZyU^g)+iXZ@8tVmGv;V5Q~moVNAkoJsSO1hg}yi0@A&<D
zGA)mB&6d4<Qy;AR<<DXGHA_z-rm8CYS${_#_lE#CMLwrT%_r~vP<gfY=c%n`|JP<{
zbNC0|aWg)euCSt*IV$^q#*>I&**^X6BA@K~q8^m?`rwZEIL;e7va@-cV(juyO$jdc
zh;2V7wEBpRcfVx~k3x$`+YJScirlIyN5gj;RxR1d>9+Rg-j>IA@9WN5eC$X}mhu0o
zOc4uP`&o_ROdY+r9gOd{wH@Xtm)mi^{=-j;G<op_0k@?0NxuCoRhX&A)Goa2gx%Dn
zSUWQ#jU9ZSS|b!D_x`m=zWO4yJve=xai3*RUz3=>{)dz9+(k7yjM|EhVeShKz6cDe
zxBe~VdCA3gt7^Euc1eYMKi8_L)k&eZxix$5-Aws+!mp?Lo`2RvvxACL_&%}BTRJPA
zZ)#hDac=tFbG3KRY3zP&QBwcS<AT?hf1)NE?{2I0NxA)J|KHZEB4NR9{XQn|O-82g
zUzhUx$qOnwFH<s<`F^razSpkS>T<!gCp)H@eO>tIUxeyX_9TTjZeiiqo<93hV;{GA
zb=1?S{SUYzWcTnc=xEFMc}?|Vi~ro5h8h8ZjZ+kYCMyU!X{9sN1*aTkKKP^NfuG%}
z{pVuSo}Y=)-M@7HtNIg_9NKXl+)Q2$2a?;nSpRcBwFrnlF<DB0JtBF*)+n>vrk1t!
ze4?e(|1B(8{L3*=WB&Q+@fRK3-Y$)v;h1w{`^TLRix0QY5fjviU`zH}F1z#K!B<`j
zmzK}^e`L1!^yMet-hQ}f58rCx|8G2_u0BuxI&H%Ia5EO^UroYOy3%epUolV)F+ZNP
z{^P_);YKg#O{uXKa_<pKOz*q2^q~4Ox%>)2=7Sdx@m8#H7y8e4n&IG5jRleif6esZ
zY8E_i)!o-u%H;g?{~D_ZZoXLSqpzCxn|ME%aDUoQ_WsLXmvDI4sN3I)`_1cVr|!yg
z^ke4yzXo0f%Zqm#cm1q;A9pxar8sbEl@y<6kF*_!$2a>r@i(p$BMvV2nZDYH^T_%?
z6E<JUn^L;7EOz0t<9mf9lD7ZeuwO4Mkx^3q{Qi|IeWg~_>4>>6zU<8E=ii*+X1yih
zX!Y#S1%KPjn`Y<~7~F2DJ7qDGgZqwSQA(LaQEkxK8iNP(&o5bdX8yjo{d@jjU-cvI
zulQ=G->z%sP3YfW{WWQO9dENjNA%wR>jN(CtU6Q4`%tg$@gJQu*_}J~Oyf>W(!RRa
z;m2uSha(M(9m7}D?CD`M-{19xb=3r)@A5CVM!K{;?AS7+o{9PCVWaTBYj&t;UOMI{
zU)NPW)2?bcKS%Y?&(2yGk6ck+D)U~-r^X=Q(#*vX?r&mW+2v)m?K-vjuw(qb<=ija
zeti*DXxrEpa_-zg#-Ee#mCQI+bs^6}@qZGx<QePFC%>xgi23m*HRN2fG}r4DM}l<<
zdJOk1`4p=wyzihygZVAq$Oqq))?`19wAb6t-Pvn&N2%bIeEAtYgRkGs!lV{hZrj&X
zq1x=nzqk1L`Y#_9B^CO7JAQmA|C{RaasA1w#c%)2yY+SHa@Me;U+y3Gs4ldAd-&KA
z*Ty$IEK2Vp*xu_0GWq>n#J)zLY~q9Ntm<!Do==IEvVE|Bb<xAx3y%+cC@$yk`Lsf4
zkD%H9;L9qmqGepAMjNZAH6Les<+Axiv#hE%Q}f#`!BWEeDlAIgxcVo~XSnR4psGF3
zy8O9uyzRp}|NdK+;ur30`6k}ODY0>d$HzHlZOkjP4!T`>y7T&vf+@YKN)JLF&y$N+
z|0>RGV$Q~Y@a`O+X7$zQ&uGlJ!#GvkQZoLs#m(!}?*Eyep~iKvusO3=W!c898fRv)
z$FXsQt(YD#rR2${ZXY@3LjkN_;@bS(Y!UCjytVz^vwu#BYsPM$8LStR4WxY@)_mwT
zQruBBU*F{3`|@+Sf23aLJ{O+jdRzbHV<u((2c;FB`RkX9C;ZATJe;^!SjFpJ@8mCY
zULC%8Y}S!hvtMZvEk|sheHZXi&|b`7ous~er&;@|{V!kGKW1Leex*|LS?8q~Q_FJ)
znZ4`&tMA|RRQx1!T9p3(HF2wh>R*fcuKrvb_ix|7Wrmqge*X?|bXcr2g}<No^1grP
zZ4y$7)H<I?%FSBt-L2mLP2z&l-_}VyQ?;4<n^r1W+>@xU4bZ<LY;Y&k|4iyZCJj@E
zbMsXGEY1JT?Y8N?W2=O?ZQ`H(TddMA+SzSfy8D5T%GCafjMPQ!Pi$A#NzD-vEK&I$
zf4Jt1-l>zi*H(HKJ=c6}-)<xLd*6RK?c;3$=gPP~eKURD_I`WbjorHrJ*g195+(P#
zM){W~*U6W$rzd~?DRT7i3ZMIjoS4KdbR6U+XDBnDFZmthT)je5al!v}H{Ei>PJW*n
z6q3QE>?6P8tNPK__tPIrByF6to#*DSUAvSYn@yHmCjL_4?7k(x+HZLGN`&+5lF2Te
z`SNqhdh-ibDSLbNo>7+&I1t;hkLzARS<}lEPnK_(Tz`rG0pBVO*#*pb+u3sFub8Yf
z?|FzEi}qaS&&g3UGJc)@bx5gD_O$P;kB=N(-__i`aeZawi>YrOwY+}kz2g3bT`z8T
zUtQ-v>3zV1j+ZkN_Zg_@ivKm6ws*rxkCKMD@1MRYU#;I(_`&DyEAve{C!{yr^0-^?
z#(2NG>VnNKy)%);uQ;;2=RaeuSd_f9t#zLK`S`Z^TNRf75uaSH%RRR!^W@*R7Y^p{
zF__tQ@o%X4yBalLgDdqK2c?a^a9bH&(tfujDKGA!aA8X?+mV9hW)1h;(|FxJ{fN3V
z@kWyEW&4Hm9Q|DO-cwXAI@&)W;`7s~AHHkVyss~mzH;M?#7isYi4UGWT(Pq-$>y<_
zh~e593w9>YGRd(#_%G$$tp62zCeGS1J*?lC`Dfo56?y*8wjVh%V{h%^dV1k`SHH&8
zkAGGw*t7rIH`_Z`efq0Mjt5*4Urq0@`SH=UkY%yzwF<+gv%YiHMVE5*9G<i9Q#VJ@
z)<f&^!vD?}`ec|ihv#6QOmOg99q(jeEwAWh)3yk{4SUdWP<-d9XZxf7hz9GI<vljl
z;E`MSSN4DA@7LZe(thuLyjG8^sqgI&ns(~Z`oo_ef7<aW@kOzV@OHjp`!9byPd@+E
zq;%|@=Ea>iSMQmxU)8em)KRnlYkPk;?UxLyEv_yM?>g0$FSVp&ht=y1joM3J*Ghhv
zcGEZLC&!DM*PnFsRn;;7e*MX%EbrBdq>|>f;X78G_OWi<`}e1Ph@-jiC7xr>g+-Mt
zoclN}q_d*m&QKSAW>_KqRU`Ci)1`Hpw&&YED*j7dr*%YNoA##%)^&e)9vaR$b8l5%
zPu}}EPLAKWJ_Tgu9a?Mfm?<b>#Tw_S!rE`sH5Hw>&KG^pd%II(?Sq4RZu?DFFVCnC
zefiHe>2K`)x>tuEOldzfp^%rUfA_B@y?4*1{}=R*jEsN$uE}%XYQK2}C%IQ&Kit_r
zQ~#jm`M&`d{=R$lw2Jj@+^@eP+ox^%<FL5L!`tN2ulSbq&J{m<T3e^r|7#D{i%su1
z79l-XX}Vy<6gh?F=$<e!<_CW`{R-;$L`F(4eShvlWBW#-#GvcPiWYqKe<&ijQde|U
zh|`vTT@UYc>!s$aM#U(9sthy@-_AE}9#021tAOStIh8sF1_2kxSyFAQSW7d!Csl2)
zy)U@F_s9MJ@%8`ze)((ue|5O%_g_o-|NqZ^{LqfcLG5qs$Ng)<UtP_*TD!hHRKISo
z(iP4|fdj2RGa5g>5SuW6ox+>bTmCnCD47SY&)U@_7Po7bl0j$kkvic#?RVTR>=sPn
zk2-5^-B@bTs$BV{^M2ror+KqK9G`Akz+=OHo+bPSU&p79@7ret{gZg?TeC;nrr}&q
zvD$;`+FAeDgWhp6H6BV`!goR|UsP&p#q+1jyZ>?~FA6>Px;O54$w86*>sQRb`uEq-
z5U1_0{;$5ezHVdaw~e8H0@N?}T?%`dUvpr$Lf^us9qYDjeErtM=d>%&Dv9l--0crt
zf>{d!5_|hD8cgZ%7Km?Ksic;8a6uTG)rO0cSQu?Y0!|kFTUaf*IH%^2gcWaTom8Ja
zn<;-@kD#0W>0ifdR24XcRzH6(x^R2V{Edlhe2fyjg$m6Z8uw{CmLDiEUr`qGG9<wN
z^k?H&!Gbf|9<TTws^6z_z+H@`F!59F?kq#Wbj!{Qavyi>%gxU7U$DW!hNGeF<DS^q
zjHG)CYK`5IY~T7q>(_@Y`ErP@r;pX5srl`-+OD+)N1b}uHa0BT*nhkKx1GFgZyImo
ztqKP{Cl;}CR{_SP3Ri}rLi>fK;kUjUCC{6^?@?~j+pJYT-^&SGG={u%*}VJ1Zv77y
zZBEPFc<w#E#`tBA(}Zp(zGh!_n-Ymn)kizom$L@9^U1KsPFsEG)%Oa%iT6TSC+x^r
zv?)RK&iidse=0^k{#|pOLwLqPmR60`(p~cW1+AN<1nj3j>-eLvAeooJ;0*u8p6-Y{
z3*N53{#+wu!TiHKD$ftzx$)2U>5^COMH=((f8Brne)*xefN38$2rx4}RAjK>dz)3i
zy+Y#9`-<Ru50Y%88GT&7_iL`Y?lX1j!-vbo1ceQp(hTZ2**{pxzwcPddFp4~{_x4y
zw6}JxTKzX+_1n0A)0h-g*bf{CP-Xa|_3BsxL;tJoN?p9Z?&+p%Q(h^kOT@<auaV+9
zCVcD}*G(4r+Ly16s;pf3{;F8~L%#Ug|1X5EE`2}kc+umXw&g$T;-^-B&A+~T)yMhE
zPv2Y2%Ep#FtM$XXv{@{tRz6uR^>%vQ2d{usP49x5xgl(p{7+W@`Ov<ir_?Z8@corb
z_R7|)os#B9%l_IqeTnRcw^c`q?)jfpo*>XDU}2$oathm?4pHX+3RU&T|C+t?d-P$y
z$__R)u_*2Z`m;LL-TGPk*YWXJ*3Ow_jcIe^+m0|B#O7I^{B(Ioz&EY1!W|!eFW7ce
zH`iO|-Y>4DA_`ybN2ut@by%}Jk-Js)oAJX-@eY-WHS5%`|1p|Uwdtfsy72uUIlnHc
z$DcpE@%jn{kqOp;U)T(_H|j8*SG{s~{prcEsyozfo_-v-;OHi%P(_zpL0xJa?ew<4
zdf&f$;|bsO*Y-!OKRnCr;H~aI>EEXGvYwsJ=eS%(Qe3rf@n6*lg#+(YgLk^@(fqn`
z!MvNh*|Hs3%A{Xfa&TShJv^!M-;FZgBcHx&?XSAR>g{Hi@vi>TYXd6}o8%oALPR@w
zSPp%>I<x(Tfagi2Fj-$iW2ftXS<SD^Sok6R)%SN|Qk9eC>~$VlOniIZ&-(u(#hqUZ
z<yviaF5Xf8quJ<x@9`S9Upl$6#{VAL+A%rpnX{ll;eF4g^-tpy7Ddjs?9l8w{!`<R
zvgyrEtv!D4bXB8wI87-|n7gHGoBN41d9~Uc|4rY1zV@lgW`ESb4?66BxE%i`Xe~1`
zui#jc{O%(k^J<apr+kAaKiD++U|qv;-WxyST;>SfdnWk0=!^a2<6jO>klnFMbQQZ=
z3u_qD{|M8T*}QFsbwdlLe%Q&}5g&f(-h<!yVFd?#?(o}lwwye)@&AwX?@bJCK`GlF
zJ#t*jZXuoUk@26j6<?OhCGBhTd5>(qxo_S<);7*t)}IetGq|;M(W@wtd)F+SbE|kd
z4n^I!4SglQrOEg~pU)+k;6GL`<o7Ijyl-+j&(pvqe|^5x{aKN{^Tsab$=~lP9P8*g
zbVDR!^{-@cJ1!0hk9SYc9J%@MN%t#r-||^*{%2LE6uR9EJ>em<e%g<N<=5_7ZQOLm
zS(Y_+M-|6j!#O*bi#o|2y!-H|O}JvUWN&Jv{no4}vtGZRT(ODqiC_z>*8A0aZXSH0
z%~~3J-^_Zq*2&O>Id)u(3Ae6KXEgjhLuMLFi|6@26}GP%^R~AAcp+MUNl@IeC@Wy0
zbT`kk4LdGv;)oWr-go%1b48+#{EdQ1y#g7Rd!DS_lGtdz{mxe@m+l?Y)!+Y6xMgtk
z@MoTK#cla-UVT;yQq8;Lac}XbTD9vP8s|b<-r9dPeihjC@84mT)(W4+0so}=86G}7
zF7IpXzt8pBH1QWxj(?2z;#=?kYg^M%`H-0bmLW4&IM(J0-v58ruKG{q;+Nm=ZA!VG
zwe3z>%D*j1>wEX?IoR;}_Lud`4x6rJwZENpGLOqqZtDM=uYdS{^}lTElj-IDzj&_o
zkGk8(FYM7>AGUf`^#1Afe`b9!Y*LB}6<@<*QNntn!cnTb`>N}c&r(b)_Q*5IAM4<~
z6gh)+(l_ZoC&!)yhq((Rr!cTEb}r!BRIJh8d~Sj9!LLlg8#bBMMC}vPVEOuQ{;EIK
z6Q+l3uCKcwA<o8;RxI*M!tcoRKf)h3J&gNv`^Tp0mp@82^+<?%J*|(Q%4{A{&2!Yh
ziPNxbUgX+3IpN*M3h#=9Z3xObSbvzU{3^5T;lg<W`=3{&y1z0{OUdJ~_`tKro!41B
zNA9q(N=|Ld?mzp>ubtg%^*)L7&HsdshpF*0o7y$&|IXQxy;rTdEqX&{`x^_@tNZic
zf4+VsUinwP_>WG-sFmWYLo3C<HcXJ;&6cpDNyvNKhPD1Zo+gZ)+50NIW=))R^hcx6
z)_GSFe!TdlwT;_v(br>B+7~?y`moKdUF^iwD$`%lzi-afT@Y@4<BVwD9;WvnOSh+&
zHrzb#VV+>YeE+RmfYjpsHS(V){Ewe=K2AkJ)9J7K`j3}(bL9JTP0MbZcQm9gbxO(2
z_cH(f-92WrzQ8Z~-ZFLPX8pBQ+x9y09Ldx?{ox_sqEg1t_*D5rg3}_R4L=8~?~LD(
z)A8^bQ$f=!_uSbo0zSs}Joy$4eI0#VE1q#M`Tnd|wGnXg{<pFJ<l$)(9*EQzioJWC
ze6;0wi~U*dX!TbTj~5H(Ea*S5aDKt1=~BMgf#Q#TgmLYk{I&Y@9~R^3*Y+vBd%q=Y
z_miDVDytUTK6s$l9W{4@i|1kHviv;(-RHdmr6+%V`^K|o-ocA4c?MrpH_eYR)|vmG
zmG$gnr&z{`(K7Z?_btPJJk$OVpb*ZxWINjziEACN)2Hp{ouVY9rhoX#k;QG-{4Z6c
zH1{mJ@Wr<E{Be!rlWcOi<fg?xImoYLZLXd8(0F6Q;wcX4yQjEj9?YqXP}4{*{-L1G
zulKO|V9So(pIUn3jlaHs7W~t8c9n|YY|V>14oNxBSzE_>&gMeT=^BpP$5Z3(c)mIP
zBf9P8^>qiYnqNO)*LAOen`iYr)wmqR)vH}ssZZ$)UwYEkKXT3O-`Dt<_4M_>SH0)C
z6&rZm=l_=OKUyBOm##DlCAqCwBFDVtHLw2hMSt5?NQYk&co}P<zdc|p!(*q{&%S2V
zEx7r=z*C+zCU}S7>HF)8#HaS^Hx*fAYU;)1=cn%o`P$Tct@ghhdp~<o<K4$`oqzKd
zv_0W}t8Z@k)3f^IZ=T>X!6vWBJ&)QwN{$7|vwU0e#>huA*170Ho%HnQ@7BaRdfCQm
z?Y_yD^7Fs+P02FW4TcPgQ&Mtvo{Z#I|FKoH)gwd9`c(WTotI_&OwkXxZY=E0?XjwQ
z@>IsMrqL`+&_qMw{*MaPyi1Igr+OS3UoSb>xZ0u2zVcO5KmgzTGug%(3#TjV?p+>x
zTZ4;7?5@TdL#`*r@$2muwiJ8H9qay`p1gFP<5O2<6~mgVsk`M_T{b-VuDvxyKH)+F
zN2<!<_mg+LloSw`SnK#;%G-{^rM2e@6}t>gWG1cr!Lu`J&%)O)1P%)Mw_IJuaqQul
z-Gz5Q%)K*Za{f==U7o+3^Y^F}Cgw`^Tz{y%Q)JU+K69@lte>j>O-O&!=6;oJ>ER0s
zP4{m%ls^5G?ITc-J^!?r(BtL@$EIJ4oN{c-EUDS+FE}0YTf48b>5chI`|J-p3!`^!
z;xs#R&NXk#{@;JZ-~F^Y9C9howbksh!EU1!|Egwh`Zw!fZr_%xg{7@00?wS?vGLU5
zHH(gUT)%D?f3-x<c!luHO-uS4HSc6TzaIV2W1*8a_Y;@K9UJ+h!(F4N-{`(2;UKrp
z=hBOBTZ$Am{3=-e!sMJK*UVs!E|Z`C*6qF5^H_a{`O)V$l(%+FkqVtGm&dXGPLufh
zlzj?Xj%yqP4ozn-(w+8I&z$jgkKv2!(L6t|B`@k(y<pkq{n1nBak}?kJ6(P+)M35k
z3h~bj->!2mmM?f@_@+?TY&Ykuy$^*}%ZPI9TYdBIoQQdRt5+895$k?`^`Fx3y+4mc
z?)xJ6`I&O~pSthN+xV*wG4yaAcUEw(z3Z)z%#gl7AdC6V^nLqS=W$5erXQc6+#35Z
z!s`+HKk1^~_9qrhd}XrOwxDKGrq7}B>XV-f7BgPla7o~UeL~;VyNa*4*YAG!=<m&-
z{=5IX42~?WvFH4~{~%w~htscL3D5s{?4Huox}+F4fm#oJh9I+|=RfV^^EoFT>&fo>
zd41Clm2=O(-`wGHe?~$ti_k%lW7~OTj$cx(jcM)OFhx+LO+@~Vk#MZ_zb3P^b3#e+
zUs_7iTi%_TR<mcrMi#+{BYE4L-u|qZE9E6`B68%zO<}{8cf>u9l-s&}RA>2Y7I@T9
zc!&PqOJ;w6^{o1HC&pjX<c?bCmBhE<c48ruwp=g_5t($vVQu!LAFY+F{kxy-Zk-zA
z{`AY2N~fvej|-Fkc(<N9KcO<X;?abKjZE7GbTX3+;?yQz^-vCfef@6K_w_ea1)m?2
zi*h}>zHqm|nRPXGS>8N$%R;~O&YZeD{QARB`(@>dn?KE8*q^(2iG9f3`w>px&pKTG
zYFM=KuK&;153hd7?c00EF5>Rj^#|o@l5f-mN^PzZeb@QX>xXh{h}17{xr;i2f^&{<
zUnuH2Q=ruS%Z8Act?5(WeTY<4k`_+fQRKGgeC#=i{$}-^i#XI<RlBx}{0(&!YR;=!
z_g=)IJ#8&NpF?1vn^4LbHEylwhl)%u^{yGdGdQrg_sWaXV3|jkwXy}Km}XqK61r{v
zuY}c##iwtHYJHlyCi=7Dw8r-*rR&p6PIr|0s4Y61%DPAT&T+vaIngBsX#v)gmWT#g
z&OZC+|9goT1qWZ=%Z9TO81BnCv}?8}UaMwK_Ivs+{=&YM*{|&8{a@L=`}wyc(sp@&
z=FfkmdV0R$UDNmK|Kw-RDmw3Sv(WzW#b`Oz>-7P;hrOHk{hj=1_4&YF`*-TQ`WO8O
zdG9Bz&giL>vh($K7LkOvz8=}K-!t_u>0W)tnd#%M;#%4yaj7XdX+y<J{)w&au6!?&
zZ@4&$JTX^yzE_bUryMFVQzUQyFWD!dQ!=&1JPY#M{<QDuc&Y#Tg5Rna<yF7mUw^tc
ze2@NmmW|W8etCW<dwy)+?iKZIm2wL!l`AywE#lzV(7#?|(?2$c?Gtpj<QrYBEe(Bm
zx7>TGjr_OIH3z$delRcyGC9tYW;?}NTKdBK>V@9fTeBb4{tvoYAMcvH|KIfeum3-q
zdPi;V{iuz%RPuhm|IZo{{<b=d>+khN=C4DuufD$fJtS(`PU#~pMzvo!TbmLWHga|N
zhrQM_W~kpC`#fe#%oV)^jt>v^W~~l9b6xKJkBEi)1>|dX+&cP4kmt8^>4Of<<t)vt
zJ;zE`y{jrvV%?MWvwHf4eUpBvFP#v6=ln&dM^n-c&C$_3bZFjVqeGva_B0)u{yb{)
zlN)c2^1AYDFh1PWIE~G$jq71h?@@_It3NKu-CI0OGO<fyAq4fQI9&05KJ($jFZG?t
zHCLF#cNzWvr`h>n;WVk{)vP=l443kVO0Ld-Eu|yd#lmvIpz-O01NE(we{4=?Ix{y(
zD*S!^>*M90{y0=P2t-&WS52+6ex|VAZ`N*4Jtx=5ZGSHL%=~sRrFQWHvxOJr=WEWm
zC$ru1(CwPVUb0Our9uqf8}iD2*>wB%?gK|e*4OYxe~o{zeD^H7ckhm${%Em&wffxf
zBFBlbe-#Vs_Nsebln#IOv$l(gGqEG#5kmya!Aa8}Ts{4VWygh!LTX8jN&+k^Ds;Ko
z%KMrxGR%C)8gPmI1<#rC3k&QX9+~2B!6^P?;v=rY$yQG<2S~;7$!M`=crkEIoV#0j
zQ=Yl2!LJ)>D;QOt9Qu~#bGEno?1%S>D<)e-@82qE&!}gUC%5sPj(uZa(4BKDe*JL{
ztJ%jS;Ipqnj6tx4K`_xaXKG<Wg#-VlO$UDj*0r3zHNA6v*yQW`!}+hCzrN~PP}Q`z
zaX+WNzP`07K~c;mdGGJN?+sti{!wCGeuddLO@97|wJdE)Jx)ds%DF#pX#DWswxz1w
ziiOXK`NI1J<<pPf;Q##RImaZ0X=(f?oPU&@-?vxx)PXLu`ncnIS6^Q(e?2wm-`?Qs
zYhwO<-}NniijMH%g$fH^dOA*gz{<(FyL_J3{$Q=u87K4)On4T<bRuhN0Q=?D`Wrf8
zgqSWZG3a}4X83CMnMXPaCfwFj8rZ8OS$5jVL@rwQ;zj-At=@G_g`TzRtJjCFTe9!{
z)n7|we|`ER+R4gxnt{1x<NDjn!ewV4`2V@k`?Nl*WLH)27Sr<?1zuqnyl(M+uKBoM
zG5(ges7qXI(EWw_<>6QLSBJ0uw>NJ4cF6`uhdUfR8b>7-i&V__VKH+{b6UaW(sQ6~
zE#qr0gZb>{Mhrh^Ee&&1c(UoIy+fZID+kL0HKu?XyBvvoYbQ<FAQjSdv*XnV<)^1F
zG*6#$_4z!BUH2!Mcp7DDzqx<Y(8J#SzrvmC{OyOHusZH~`1?$}@8<h=G<{xY@J}p%
z@c!?I$H70kXWTfu^ViYIcQ%K!ws37X`%!*!{ZsDgjFR)e-*&dRn=j3|;f-J0GVy8g
zjr~0GH*SaQeDupZUT^+$rz)qq`qR6<#(Z0Qzuq(}UpnKdV^NB@p%xdzo|jw2)f(mh
znSX1(7h$$##l+)mMQz^_*#ev{gepmzE>rzG|JXT;9zNH(Qs<`L`t0=G_D0p6O6yCt
zQ*1Q)ydH4<I={-nrse=+O~U2Fcd~C7{Htvb;@<uEUwrX(mcyx<)4wbzoGW+w^*v56
z`GlNUpR3hu<x*c+oGc!mi(B$**%trg4gZ(FIg%me@XqsyWDCzRRX<DJ=|7z(3vN!E
zQXEpTOYef4@u7nFGda?SSgNo8a6ViY&om=6;X#}!uY0}QnzEhVukNm|@vYjPt@oNy
zMPB^Kr``7w4=6O*9jOU_`YPvLQW87+LB_Z2H70Eb)7)?V+I*b3w&lyuy5?5FN%2-+
z+RoUVx-Vz{cyT=6hmL-ZcD73$Z+R^K34aRtvwwZ>i+0}zvHjg=g#W)<`(N?jsXX^3
z_w)VN)+_zs*7Mt!pP|pYb-(`4xT1!Gk6b4+g?=-ilek_$R8BjK`<jjM&+8Jir>zoh
z&iwDK6etjF`{uETnfmtkx>vk^ZI!hWx^iqmU5$8Hnc4jGi|3ZVKl5|K>6c{-7ECR=
zESV9duyy*?_xy)-Ho0XiyYxu(`mgT89(@@JOC&0?j;>KZVE-d8wM+fQt(l?|{=V~2
zR^gOi9{RPdO#1nPJKcwQdsFt<EWdjD!Il*gwv#??I^CUiBW>l@{hV>N8fn^BxU>H{
zU&)ckoW}lNtMg0K%x;;b*EZX~zAE><_FE@=jdIA{wVq82|A?|Y6L|5vK(U`CrF^wy
zy_02Yc=g%oja!sC7Nr^e@KMt|Zp6gAHsA>3ft*eJ*1Bp-Ki26?QM<eM=q--BQ98T4
zcE&FV?Q~OF_w(R|g4N;IAMDBWV-K=i;$Be_;(0MM>t2<y#=*w_51lNvd5^tN+&Dea
zmpSDe$1U?}^FlV~w7<@O&P`F&fB!hy_0yrdkIzbGq&1m67K!s=-DdtZvaM$Oxhen8
z=~!hbZ4oPO%9?D@``~+$N^(c8*Q*0hKh<;HI8^g{$sxz0M_)d7>i2&rU#<E7V%z;*
zjroVJK6v$(@1u@U(P8VSTBqLZUl+gpO|I?tm`t|GlP-OqJZJapW4$Z^8OD+851N?m
zcvGusEWgs2<x*emIdP4H-$j02KN-Zc^s9h^$$O2z@2;J4lZ{S%dHAgR!Q1<%Y}lkE
zbYHOg&+i9CcjmpCSSGe(-lEB~{!ci}@c-H2OOIT(PpUF2x!4={F-q?PXWaVt^Cw?^
zabZ34wkzezR&T_XUQ_yX+VtA)--~ycuaOhDzV@G*|Hsx3LEC#<b{~}CP5eJ0V^xSk
zuGU7`ik25YOy_V)FE9$=ni%4Dc+Z;F=R2wl7pHr6)?M51hw<oF#_zXZh?lI;h}USh
zUvbfp;nMdGw!&kn>gN08v=g5m;yRZpZ&1@Gk!o@)BvdN*+W*M9!rH&Dq~F!a7)_bj
zba53why2_By{D4yc?a*<axUC%+3Q6WA*Wh)oP7EJVBz}m{oAieX{`CPH&$%3|KIgx
z+0H(D=I?u?cxW%Pr@=q|iw4!U^*qK=@6<mjPps=*Fg^HRmGi@EEI%0e7Ec!rxBfr<
zOL$OhHt(N!{o^XHzbORXDaqJ;MnWh(VC6yUrT^dcNzQMO_|m2pGB4@>)~B~uJpUK*
zLq9jb;mrPi&TGBmuMX`sSXss<-}WUW?!J%lzOB8p@1!>G_i^FQn{ODr`*-Tiy+&zg
z7TpiD-LgITkD6!x!J9=@A|X2Jy0^A#HT9j{b4@P6h3m_l!p$j4+(M`J@!kBP@NCbT
z{CjJUOYTjQa%^87T+i!QlaOp{IxR!B_4_~hvr7ci9+qwVu+-7bI`P=l;}K%l1<RMJ
z{rOgRGuZTD)gv3uFX8K^tyACY^pp2=x7HuMYQ}RnSC%I(wf{K(MLlnanViG{8?M=w
zspV}OI9{omR^6!Cu&W_Y;OoJ!ae7ikv26|$^@G<mG_4M0ddfIAhHcVMBZG`q?!=ce
zx;;IA{%yG>7I?8Tq4%aq93OM}uhnk?Q&lrmq%U^NZ~L*m{BWy#Z|9MtpRWIt7F;P_
zub?LwuGQEoe^h4Q|HMrXPpD*Sd{q3llIKp9Wa+<)GxvUY(9?bWwTi(sQH!-MJD(oj
z*luw#>*2KjRy*y*s|3~d*S`DEG0Eudr9J69^8Z=H56s(sFX<usF)4<k_nT86DsPQ&
zx%TzS-^r4#KJzBUy!QSYrX?Yocs14FsJi>Ry}EJn|HOq3HcKu)yrJ@bbK=G6Ph#hP
zN->gM|M%F1g^fm+=jA;3Bi?weDeS9=l2OaCsLwvn5_(o-^ff*bvs&vK>V0Q!*#31L
zF?+vRJ$;|Fz1IB3>h&8|&TH%}c+Md#H`l1fPVmQz-`mPguHMgLli-wj*i5E*LtH|G
zhqT3k$2t#Wch~w){ylrG&y4s-p@xAmm;P-$_oL)NPe|mvg$)m{ERdP&DDR`NcspOn
zN4d$2r>|Z}Un)Ie?x#6bA3vRX+J4T*rdVG6dTbMi$n|ft>et-fF-uZFeWgvnYt9?H
z4`$YvYJ>+d$7#uZfA{iZBmd8Xj%+s-{a?SH@GX(o;D=knY4vCU{Rc{60lygd1(jyh
zroR7?u=b2#^1{OUr&&!34bG18$7arVe%rP1+njsm_Zio^7IsX2@!a%rPW6^!Ro`_|
z&rA_ERC|B5F!ciC6|c;`goWSU1=%Go6EezOzG3#w>fN!wpGP^^zUFV`o|5*`t*+;_
zymREteW}S&D>f~EoO-NT_~eo$+V9ubDBJg(y4^VI-SjK_`K;nMH8IV%<osM~C7$?f
z1*g6Lv#EbI`n=v8Sid~$n#lqGPJVAjw<B%m%CdV)kDbYv+8V$A`-|oJ7p?Y|Dy;vS
zb^6i%{BOQ@g-=F**q>r8vDVJUDqgbAT6fk()ho7+8`OFvk{_&IFK~tbP-#<`qvket
zUd8Oce_1x@{nKB%rs4HvZV!Eb^TTcHUo|c`{Pz4$oB!wIrKf#~UlFru!-`;|*}Q%_
zYm(RfOP#5<Ri4}J<E!JxH5RmF-r8<wb@AbG^>1GmOm+@vZG5olVjhQner4R<w5ONr
z{t9H;?|bXxthw*b-aQID+&5Ufw)OtnSI3lpx32bzR_>YUuM?RT9@-EV>-MnPEqc|5
zo2$!LJTv-rH(Bj>?yU`~FVr?1-&A=bV{6IVtsAA+X7S~}{{NRx_*nYWpC<%oIV{t7
z-+mx@S#djy<z&9pX2H2H3y#Yd=AS5NFFW7fBYUiDhPHQ>|0em~#D>Ppi<$Bdz7g5-
z^qUW7^D&mQ(UVLPH79huX_2_QQ{eq70pA(AuYU&02%g$);G`1c*Hr20da}L9fA?Wd
znfSokMZt5tG=t-my2Tma$lUi{R}%JZ_12fB0;#c0OM=37b)TQ<DHl>y*?wekp7^0V
zhH5IZ{hzyc@fg(2%`*tS<edAKy(G7Q=Z4;c7Uv)KrE*hN@y#&p(9c@9du9LQr0{)f
z|D8&U`z^Zh*7+{K+C^0zzH^I;Ex7onX_vg&bVaT&^xNb1rS0>cukV{sl~o>kFY^7D
zbFM$X)-BoB*e0xLbNZ{OX5X7LX+K>jM%A9=u$>a^cFgh}cZY?E`kUi_r$+>R+n(;7
z&(nRW+G`sJ&kep1SM>*(Jk}aX$E+T$Ivp2PxA=Z^$jWn>mUcI~yjn_jIDQLxxO%eg
z={F72R$I2Q_I#RhQ^YU-iqH|>hkwsYo7Se!5k6u#bz0CLQNfkZn&zoDRTTH^-}5Hj
zEGRAI`0-hVHfBNlJ}ileXU{R4tYqeG=r2@wbNbzq!uH&sMLDOZoz%J?-`{gA|M=RY
zFaGI>lm>nO`XXXuWWgEb$Cjmz=8r;G+do+!vFdl!)tekrN4c{9-)2?$SZ*vgYn$iv
ziEmAphyJK8{481fc+Irmwc^JrW9Mnsem^#+w#!o||JUMMKLt4==GjUFD!ClVm(6sW
zX}&(-&aSn019}b@?n%2W=UBV+9p~C*FZV9_`XhT*=ud4O>lXd$yD5#ErhfN0JNbEj
zNo3EwpqCFj6}NJ3VSD{NT=`R#={jdQxB1fFet5IV`KUbp_bT`HPurhtIhQ8#th(p_
zQbn1=^_tzkCCdXjyEmjxvdNwBZ~lpm+2@XanD~x8)WCXu>C)-P_I{i;D<~l#Yx6YI
z`CPo-zqn#=vCmiY&Y%9#g`aEB?+;TFwO;yu7rj{!rkJv==)97oNK}`|g1&jOWje>-
zmZ}G*m6&VqtQFIm9do|(`mSkf4{FZ&aA#+ERnzOebKl>6@~u8T;OcGV$MFUYbFxa!
zZa?`K{rT&MrFQLqnIfhOZ#7%A?fZWB?qBNX_BlDf_%F#TIZxPBZ;9{!N!)wCiLJ<d
z{``o_7p^&Nf?qk6I+pATy(g-XvS7vS-sc~EFaE3u$xs#W37xOpzjVcBF7}(*VlhG|
z<9T9_H%N&wPHtB|C&oC9aju~DsuJ;<pzm*W_Q&kZRQsMSD*XI^e(1wBIjtZ57;pam
zuDaGG{C(V3^=p#fI<@ycxZt3qpvocqvVHHQRM%UN<LmVp7zAP+XGyocVl9e4^8fp%
zy4LHj#sAjFmzNhseT}bOt-4S&J<M3Ydf)HxbxQm9r9`Z|`s(XLh8+oE#^F(>r`P_?
zP@M8=>*P1`*Gv?21Ya`+ThvKMpLBiC(qzHO=y2eSaQlkT6>FUP4|V=fI@2W5@5sg9
zY4E^}lgT1_oA<``?|R?unDxc<_0^MkKc;5dR?m(39kXib`(hqrz1iPcC$#8a|FI_~
z*7fL_R=Yq)pDSk14(#R0cH8jAOnmxo+196zmsGxF;#_{VD^Aa1-Jwt4f2VoODXMHz
zOlWWjX}r|?=&U@SX{KAWzEF+7>z~B@x-G4dSC)%x(elz*#QY)Q-pWVrTu-;L#}=(<
zkGeHG`={m+Kb6Hr>01k5o|*73(`w<~u&Q!BU4!Nu4{io22YK`@e<pJJ+v#{C?%jWO
z-C4DI`MZ~;yxS~UFS_Y5b&KA}6?_=#+wt-1_m8GW;$HZd?Y;H&eo<i1r>pBj>v^*3
z^<M~9q=dvRWSFpogVQ7AU<%js+iVU`e>e)UzsOp}X0y-kLUBH)j!$FDrJd=V^J-4K
zlzjTbPGe4h9{1&qR=&|K3<7-x4h((Yr-<t93z@U~v(+25)uHE>uG4DHKK}pyk{srB
zEXpQJ*Ps2zeS)*2LQ3|@+3)3zW!&F)w6!i1l+tjRyu_w>pC8NRJ<JATG7+2EuPO5j
z9rB*s{h_`2eoJ1|qss6lGTU}Pk6Wd_WbfAvkrPt*&elHY5m+WUS*wDtu!5mRzFz(7
z|H+Sh_~!QS{k>YW_PhF3uOnBSo~o~l_YR6bS-xk*qQ(Fxv*MbH_@_IhFWgwByg#@%
z?TVu#`ziT@Y15b(bMyC$H#M(boVrEjm+KYLlTsUMa-OYRUiO3Qn4|dX)jNN`tt;DE
zx8`qrqn&?@>el3)x>21^|7|R&{q1yeM|w#@S#~&607I!%mcKu%%)SZNql+u+YUTti
z3|9&(-W)UkiqqTgkuIN3X4IFfyn45F{q<E>_s9LMS{MEN!~Ww6JGyrlWJQ?p+g2Q}
z*~jbD(4r&2yTHlh+}q?+vr85T+OTezb5MAeneid12C3Hclp^6?0X7wp*~XW*Jl;|l
zUml}j+ULo)aCYh<`3<jKPRsQC_46rxwm&PZX~sV1t-cbD9PejW<q5D{(Ku4}_xifM
z-<P%**GZL&U)v&cIrDdm&*jzUrH(Q!HkJA!tp8-5hxX+?Po%nxU-aCFHM(aUzWT{j
z`FNc>_f7m4fBO`3<RRz1tyv))oX2J|*-l^4Y8%5@u&gsAMU9m^aj(G#!<h7*oHaqo
z7m}UU-)b)J<9l9p|KhB4w<sC$!`rfj6CO`CSuXzJSHRcuv+O-=6|Pbx@tq6|4bQ4i
zB(S=jF%x>=z|3rP{?P#r*A?8JIiJ<k-gz-IWIjkRXil6g$ig;tZ(aiXqu3TjxBrfg
z)*Jr3w>&2Fhoho%q5qeOv*sw*Ets;5pUHXMj|a;xE5zTlm8}uzQ2BY~(YM<j{hv=i
zIkLc0ZoYiN3$qx1`Oj)!1GG17xRbl!*t6-JH7lN6c=o(Lp?R)mTS>y(47Jn?IsYa$
z#(zA;Q8dZoX#ApY7p}d3_HO0GU3;s3OtroI<j4QN#jOn+IDT*>bbZ_ZLEWmtY>Unv
zotYMkbtD|xW_{~x^e>;1&=hwx_|W31wagEH7|an6`@}ZkIS*?-OYeG{`op&$&f2hK
z`6lyyEw|35#VfI`U7+uLYR>VNPY>%GOqto1B>b`c#gcVxdyeMHym(Jb&R-(udP*-n
z-(l?Ix_^Do!_S?`mJ(`Rn~s{il@{U+nfXO8^Va*2b20Vmu}tSUco(;&{!pl^GM(qG
z5dGtZzGBYP-=)V+l)WqDxw-2+$A-0bK631P+Aqu8v*Nt+@Wg}b_B#va%j$_;<O;KA
zNpZL)vdnb9${mSgK8l+UyGwF&6u;SLIN5jqt9-7BwVmgT3||xnF1FhL<Zt|8lk)yj
zg)^^A&%d9)@3(8x>c6EC+izX3o4TLT%l^P8`$_@Dg*TH#+HAB04P}2FadK2#6sET^
z=82#)A8Sj}H}0M@ZrApH?A^TcRmI=OLCHxy8)S_>ls;RNoFp7&Ve&q~rtNy5RsHAg
z7k|(Ht?pf5a7d@&_TfU-2#2dp$D?9wC-B{=xFz6ZnpSxvXHy8bOO8R-%5!H*5_x|2
zR7L%hRWla;%_#Ij;-b`rCWo#4dFr?HpZrn&&Z?@I%;PEQrlY*G)8pUW=YKT3yk&Mq
z-u*fG{-&v`Dr@cL7+x-yTIxHuTX4S4{{02pI%i}p+SswwbMD5;=Bp-doOeii_P_t(
zS8hgx95j!cmZYbh(HiUhLQ%-JY(hDU^TDgRyH7QKoVxVNWs8RGXFE(U1*{X@kRf?y
zMU9(BpWfp80peQ=|2YZXy#LH;$79p8@m{}f+i2hMC|z8rvh<rksg+%-<@Iy>_!q{l
zuUmgN>i@2<@BO~a6x)4GU7(dQ?PYwjlieaO4Yiq<wDwreep{32l=V0N<tC<iKi=>8
zaYAFG-hneuL-psqsGc2S<0rv4QS*4ah~fof*WT){`3w7`H!N{6*vtR8QstQx$6kR4
z!jt}4aquVg)N=p#j>uc$_ayxI4{i2KUC-vb1^<6{aj#SA?o%C7w-}DUd3?_EZ@Di^
zPU+N3Q@rw`Pna97d#``ue(&B}$91-KKld;*-RpJM`c+kpYPrLUF#R9N=bxMB7Hi98
z{O@j$-@!9;NmlHsz4yK=^JFdNNIhP;BKu(9`DD)CnDr;QD{iJ$m`>sA`lQX%FWvTY
zZvDFY^z~~dER1ozzG(AB?Iymx_rLx;raMP1>2JNal0wPjLnlrjNWbr#ymdiDU5a?<
z-Tz&E^|PvWKiE9|qL4g~mz2xHPLAJeL=1J=3N5e7O(-}Uy>fn?3u{}-`X`@$YdWWv
zmy12zlF0hz-;Vdnb1iMVVhi5uMQfGH2;9B#C+B|nyR%h-zT9WkbAPq<&z$j!b+`GS
zUwdqFmIyz%Q*HPrxn#qe%c83`D2po1iku&26ts2E{CQ?w9LlS{*p*D1d|&@?kD`qA
zTZ71;x38rSNc`x(di2BA7x|CPkFja4=6&ncG0*+Pbk68j9#PHqiA}*1K5?gr{&l&i
z{iXRI<E4c)A*sJ69nwBGvGuV3vD3E>fBKV?f9BXei}wG|m&G58znaQ>-8B5em9Vuw
zK41R+*>WfHPko)2TJ7r9pDJz3zW%Z2@!Xz~pvK?%=%^a|(W!HGv#6{1ai+N}*~I_t
zk=jF#$|}=WCQE-DI9kNSvX=i2M~HV!*lO8c9<~J~2c$MQxV0YmV%i^ia_XVjZLF8}
zZK^$eTO-fHq=HTH=)<Qoe`wD+VmEU$>)}q0Z+7QrS=Rk)o0b3Jox|c{lY)H(^&jdF
zrWa^lmee~lHO=s^&~o()ns*-=9((mrVY!k{Ghct~H*txtl2>-z)#aMG=EK}iP9C0b
zyC2=ic(i8ToOQ07c@|q$Uj1^|@Xx|hvA@>af3I|Gn7yr`i-+-*`~8+JTx>=v^>4&#
zRB!J$Js@#(&ZN?Pf$XxUjb1)J{6<Mr#&h2ixji{5CwK+#b4r%~`>+4xZqq`&_k8NJ
zo$ndRZrpuaTH`~!_0-qOoBiTAI;@y~KmT@UZH@nm{P};yCj70HIXSK6QR?rq>j{f@
z-}_kk&-2vVm71TbxBgIhzVz?E!)|OAHZ7$&t4|6QY+JjJ^QGm{fZKmxCa!VI72E&w
zC_8_>ctYW<WpB1`cmA-8k9oI;zn0+P9S!?C_4H1aPyRh+TcaA^tD;Hkn<B$3iW8N+
zZ+^PJBmIz3SoL-%9sB32{cZ&qiodVQlG}Ze<Hh7pub*;GZm13N?(@6vcqsJS$)+kF
zug~po_fBFHl1X=d<??H~SdQ5eqaLZJlPa_JeLwagx3|P_LIsb(Z=ZlUXIi$O{JD_r
zfFdi)bJJ~#<ux~zg)+9pTN-`8XY0qf?AGgFclkNGKB${~ZQrQBV)g7E=SllE9)4ON
zo15?GRdt{%>mRSx3**+tf1Jw|pZX+=YB9QOIl50hM9t*?gQtnTnP*J)JNvR~YhMod
zb^4R<S9P=hFVZ=RFJ9d8Ph&&bij>XF%2V%VCVu*Sl1CxM*jQ)f=9eD-W+xsK?Z{Zl
zFJbmuGw#ZDef{a@eBb8W%)GdA_jIGlOQs#FnUu+rRa8CUdPQ7C-=0&oi(lMOaG1HL
zO#Ww^GLvKGyd@D<XB15T<Z>4~JPGi+Az(e%*;j8)h|Bs=@ngLkePlKj{N1>bWx?9{
z2U#~Lyo<j5;+DJZG{HosGgWg8?+2Xu_MjpyYD>>={i{}q_m(8fy!Y6$ZT|aL=9043
zj&j^rE7!}q?eMyMXUG2n|83fp`G>EVXYs7hZZE(8vQFvK#_b<>#T_zQ%Wo}S^mIwE
z^;edrT$5fs|M_``o*fo0kNE%Z!(pC-2mdCs1^sY49>(dWU(UUC?XN#gFVai3U!AWm
ztMoBG))rp0R>AJxO4BkUCFTo}PZr82r~eb^3D}!^;k=%>+^%^qUfDjl?_4f+takcO
zC)S5c?F1gKte+!)I7ssy*B=ixT~qhWj<eS`@U4FypZX;%`k2792{&&wXBPc(7B&9)
ze)$X8jHvkAe+<KHOl^x_mcK4=Qh9XQrYQ6F*B_f!9q!ExQ&p?anDnBxYWMW%A7;+|
zbpGV;_uYGsofPOgeB)<*-Uj#Ed<td<o5gk?o5DH2a-sEpuN&QO`>kBuy)SmuE!PRX
zaA<OT@4gsY`NAg(Z<F5b&2)d!eddhh{k8Qht!}&b9u#bsv|zAR_^PgJRK0(N_LiRp
zD%I=v+h&%Fx$9N$z1{S&?8x6uY3tTp{GT|9@t%&3#J$DOs#o;?RG7@vA2Yu;b1I8(
zqTJ?Q8L84|#iz7SN4>K7WEWh)^{nO3tDCb~EoO8t&=E{b4cMN?_x|#}PewC@u1$Xa
zi1Vr1k=<E8i%a62mmg{Qn6hEvU0!*<hqdd^pMH3K+Crn4m>Uy~*B?D?wr!T`hAaCg
zwWx9B{n=7A_vvz5#pe|pduG1MPdRCNt-qsroBYww^%nkJOFeGX>@!gd<eTKS??rXh
z>Tm1gw#M!Be6qwq!v03c-FuR$v;7V~ykA+ry&&>%{H6<DN$0oB`8hwn{ajJ(#$~$#
zlO5j0Y|VHPxNrX#uAOx*r@72;6<1e89ecSer#!xF_2;J_AF6M4e`vc{;?K_5hiC52
zpWMHB&Vsxwefye{+?km@-HlfJFSyL|6u$88{o6W)<_i<t9`A}1eOr;ZB067H@!!%#
z@~_M;a&6leU!=6z$tcqF`y@qOPsyy!&sBVQEWbb7zha+}i`etkq5EgAmI?^G;8?%)
z-kp;#xSTYCPI(#Xm2G?c?xobFIsBr1y8j;fZj3O0Iz3$Or{NWKQJw6<(*2x@DWc8y
zEEDc&+<tTLjyr43&o1lhSHApLUAXtw*DAf(HKDhbbK2Fvm>>V5$7<g5^K1XJxTI&k
z-WB`5_oBG<{p9McK|O|BH2=?8H8ZGd|38jIrjP57KH8UGI{(S9r=Py7a&+r=?3mre
z*|Bum{Nk(WS5`#3-?Vp=2yA{7vwPZ{`OG}5AKaRl6?W}iZu@@nneD6x4|pAFT9{g8
zz>>Utp3mt;mityR9F=i@A}hwcFkv3&gGXW_j#WwDn*J)R-<tP(`npKHJp#vX)^;DN
z{4wu~hC-9f%(+HCB$*p-Y&v)7<io_%hKkIuR_blNU?A?oWU%Vh8PIVwTEbi`hYmJu
zn4n|9t;=-OVZ#I!5l)^%N0xlQwzK!{``!<yI+mQ;^^l$6SM~jMKj%L`-BGfCz5nd`
zGxB%!7oClHp`-n;`@=4m(!d@6>~^|`Z+EXMS{!mu!vEa*wf~QX2DGVa<!@bMzts5C
z-L!|x_9lxSn>oK`(;ADbN?(IhbI)yG=Xjm#rOt$jO6PApT`stJrP8P6v9Ht&wqL!&
zaxl>|e3OId?0pBdD)t<hX3Ab+Gd<(n>wy3Kr@H+=#c!+eNWZPrfAY~IrhVMZu6MT=
zIVbv-%3Gh{XER>oyl2m}=II{IYwv#h__~m*BH>kKf>g)rS5fC%bu3;w=>C_wvcrN=
zN@PNg2RAR%!G;YFbY5_?9!hkWa7ORe{q5&0&e=$<4D!9}DlxgUYajc8`u~69{PrC@
zoWF6OVrAY_yZ4_jtADZkomS!X*^awjp7%?8&Nch;rQH`Mi9O&u@wevrRX6U?b-eNE
zu43EY+J@ddzv=3RO8;E*V6(=3+zp<-Fa4ITtLmE8+;Al|^p%-$|Eb@eY-#4p7Ed};
z8ZY5>Y0B~qjAhr^*jv|^{W{(7``bRTfITm^3fC3-uQ|-QcHT*?rxn-QOL&&e?rL#l
zs1muW@aE_Kr^Ou$$~|n3cg24&O<C2Iedk=YAy=90_HD|Kuk`T<uJyk*u~T!7RJyO`
zp@VX6PsD0nUqg=RIU~U>)O5<xV1kbbH>XphLWhTwFt^(FeYRiM{rNFfCpOgM^eX+x
z)DQL-|G$5If9b{fzY|ZcmFu%jpVoIjeSOQkg1cwFRqwgszx}X$T6M|IjYX?l{-o@C
z@cHkGSwFY!v|TZ0_t|F>Yx`Z>m*}1O`?^7V_orOTl^(U)#TOR3o#xXL$b6MG^X19E
z#^;W|d}C7g_iu1~j!e*9{~vyv&KfF4Br&bta*=Ch`emk$KbPZ*@@Bl(T(fU=iRou%
zpLc9ugCfrrt|)Roe%W?<ddlkk8`pFF_FMgE^&vrR&E)IxYCT)c9(lgdy8KCuL*4TE
zu2Wtj+Y{e99snKMqa)14>U7ZIzyuvB9@b{2M283uF(JVY$jK-iEr;&Kh87>y3X99i
z)|eV-URjeA`l9A>NL~N0<G1!tKfUnn?&MmJeO61a9b^%iTRMC3tzG6PwqJ^P-aD7|
z_<z@;>(knwyyEC)%Vvsxw|nlIX!iRi>1uz@$G%clD~cA_krn>!`~P!)x+lM%e^ULo
z>;GM^;`j5t{!%}+O?Cg#pZeEM`852=YTH-rzCLfiq2O|>PoV;9noJTUI$a*7et9V|
zZ=cNa>bNLZM$`XQcay*6Zz-v}cly}|1{b|KGi=;1elw1m>(-l=Dspvl<ufhD-=*bE
zIx{b}&6`}dEm-$6)3Kd;XH)*KIDM9#QA%WnhcM`7`$Uh30<k34rp6?J02fit)|+#R
zyO)MO@znZQa`)=vmX#LL{=t8WztnU8*njx%|95*nm(<_3tYa36mD(p~XD3qhCtYXB
z=Lxs|ML(T=d+wiKtN!0S7+!N%Yud3#y=HF>WJ-c$`ad>WF0K8p$hy_M`MR*2?y~N*
zHCH}eb&PRiD^*(&vs?GE^s#n{g|D;I^n!msjN1NIX8W&KqLuSAsx9Vz*N8WJ!2b1j
z-Mir2e<$8KKA%yN?!a*W`2?o>GO9Y8=DSv1TcvT|fx%uicACZBFXHFtGI5*~n_#$3
z{zyucnfS8ZleY%?Ca<c#G9$V3!5TrS=D<~*e752L-Y7f(osFX*%FW7jtYJffjt93U
zs4zIOOkAR0WY_9vroO43QMVQ)J#nA-$9{H=`|0`ei|gjQ$rrs8efMd5%ngH@`A<LI
zc{$7b?3-2Z=07#L^LxqKZ{cn`ydPh1pIvn`^H;?~i$uM5E93nR`tnG#-IAMoG`Lsf
zX4*kbku}R}m%A*?zmjSmq0(fqaW}tZ&dZQzA0{-|h6H`OC7r!#>O!CCNB=YaNRXc!
zywbwYcool-vmKZ2H%+i^nLJx=MY7Pmyaj>V3c0l=_ul>g>=pC8MFHFQpSmnxb^HHv
zPR(Bqk`j`Z`7@-x&bw^(;N+c$J*T%N@D!yoYNka_xmgvn<VWcdKF}duI>y{wOb-(s
zZWxG3v34~k9EkAH;ATDKXfVOWO;qu;)T)!7ieX)=@5W}usxv=-zv91)^pCf9cb&1~
z-x=~XIP`ySs7ilGbDiI8Zz-3Zf4*=pbuth95qWpjnWFhI#k=*S%%93Q#jQFNweB!q
z-*>Tle+-RsbWCn6wyosb&am^H%-i=9>Vi9;{)n6r$Z+4Q&P*;#Gt(i7VY#qG=nf0h
z;*i3czmuBcN~eiQ{`O_)e7!gKme7w+a-Uy0+^>E8bNh#lSMwFT7wmhk*89*wsX{~a
zz0cw4(_=&YP5$g(6dM+!a?P;vh}XTS33|V>_dR@jDM0P_C9hrVr{!4#a&F(e{;{_1
zf&~KuPrTzS8Fnt#@=Ibz@7I1!G7m55`u~4lhHujRYg@0a|F~ai_x}C=e}&hi7e(!A
zE17)j#=ApY5~)4yl@odOW^R)xyk`G9_wDu@-a+?Yyj<j{-YR;!;fMea&$)uRcMiV%
za>UE<XpH=ixoMy4S9BCA{QL3QLHVB549D!IHj{(z9d{qC7d^$dmd$?d@%RT(GSZfc
zIaipYIWvprKdP_&e$>ONCYA4Y-ZiW2_cANDzs_8>{E>4~TXAZ3l`a3aO$&~lw8_})
z`Dk;}w4$FAeN?WPy?%Astj3|hp-?&EW5UODzbTWJ7$vV<b>k*mV(aztl$_+18XG)y
z0@S{XajLd9d1l%@Pn2Ra=v24WojrB3;?w4R(tZnOo%WrjF2TI6`17nVsniL_{_dEx
zYgWXkTBG%oi`HmtYCQNt-tAt+>otbbJC8?~GDe8b;CSqRRjlQtw9b(wwLYwJ4?Xrj
zzfeC%A@GL6H=R_qRn}|ftbTnq{=VFjUtcS$qD`7M8_Z~xtPs0j&A=aZn=yQ!5##BD
zPs~p{?q``P5OObst$Yga>!fM#*K)rpE8^)|)w?=of0)X?cfVtn*2Pbp5Y&~W&#_+S
zjs78y-lK2-Tl1%dx;s5h;A(C%muV1h73*WSWNbYD>|#%)%|iv1hfOSOH-9g7o8)S~
z=+WyQ1#XdDncI)&C0cc#GVSxZt9vCi{Qf0o4FQq<K-<-Yd`I;gmng7JWZK`DlK9}o
zBLD9de;2-u64-m^>iXRg`@>d;$>~<DjhKA(_v*K)0lH5<|NmS4bzLnBTbt<QE9^%c
z4J`Z=6Wg=@#ZS<0`_Ghc;cPgUkbsJ*h8fctc_I0x4qi2>_s{=wcRzc7>-ns!`b#=4
zR<B!qUB6?>{N<7r6Dmwf%5)MYu5Hd=YUc4%f@u}w#|BxZz-KYs-3QbrACGJZN*DDv
z*y=v(@@%`Dj7`5iE2r<2WO<&zF?}u1ouh^OB$KZTTF%RzeyXXhCM(NnVaa*6(zeDh
z`P7T|(_YrAedy(%{(5ShWnuC@lj)`2Syx|+zrK2AeRzb<jH5^TJfG|fdbmZt*>Oda
z8&5N9keP{(9Al%YPXm+sWvK(#zAmgwO4zxpy8q9-`1&Z0uYb$--uk_>Ht6bg^H={@
z6@Jt@`fJN;F0R7GJMM4%)*q1kR8)oYPQSvo*9?;Wm1!KMD;LOD?n>^om(fcW^zvEo
zwBUokf7&nmo_h|Zj28~@q^6xTjTZfMxw>{^?4M`sZ<F`D`IF53Be^N|X4{FP8xQpu
z=YBGb@4O=U>yH!1j3wT_f@if2A1-?%Wxeu}XTmyezYMvT58r*cbhMT4RdN@b!SVRe
z`wNZs$F1|{tX^XpHcd5dNtVpMzfphVzh-^#+&f#o_2d?l(}!YtyI!8wpZ>j+C-DhG
z(6N*LhTA7<+txa~+x~CC?9f;Cz4x0I^4xuu(z@lG1;4(;ajT%uPK%%SSNw4BGI<#n
z>i+j@hrqAk1;-=pj@R5M`v0SPahtuX<83{=t2LHt{nvgtSx>0q&0pDEucq}dW%Z}K
zY&Yi0?>qZZw74!q|M}Fr%?Ve^;<x{C%nm79T6HvT@2{i3jw&pwQrjTtxOJ`l@_#k=
zr}@6;5c(J|8P3IZQ(;}){p*ZU%s%V|hqb2j6^1-K`1p9{;lmq*+7mSo@ar%nZ=1Af
zpR8u#@x}-7%{QvddiW+K^F5Aue`+rORRQHJTlHNn&sWu(h!vLSbEl>LUo%hOLTFd(
z|6^avW!vtQJKnS1%aqc{dA({*L&Mcm2U%VS8(yw`$nZy};_GRz+0Xy4ia)=rE9GRp
z{=9!2oTpb5{wgWnwC|jO*|PZi6E+-of1fYjaCc){vi!;$o_mWD_Z^c~yYx8sYyHL2
z-)e?$Q||uj4lU$6(!9Z6JoM!B_l!ou_f`Bpd^ljud-_9<he1%Ch)7(4&8>=u2N*sb
zU|4g)@xX))oc;m3eLJ3Cn3)``^_=T|Yia{yg_+}pFU(>8`Vu<q!bR_Ws*wrIWj-1k
zu+Jh^`tc*CE)nZ@W_zoeRx>kt`bu=J;ghd-ydZHdvQDSZ>2H(Sje4%>x`s3S0~e{V
zTx|dK+U3puKPPsl{{0*I<K^yoYhPbYubKaSPQvu+`vMu+-#cIa5qOree;S8a9cPa0
zVkU+y9~JVl?|&+~^jBX0`YMi!FxMk>lGj(i3y9$`KawW6YWal{<vlEOcpvURcF<8i
zOoORNeMju14Nj5^F1=gvB(`nwNzN8c{uf8<{<R#GOFzl{P3WNWMz#G?Vc*ynBpxwd
z%Xq$~%%62xw|iI3zUvjU+c?d$ZVH&^AF9~bU-N&~v$W&iB@B1m@-MhqS(W-@s&=jU
z^FwF+Uu&*h*3{4)V)AQY*2jaq@7r3PGykb^l>RDQ`1NCYey;gH&J|qpE^YVz$u~U~
zF|bJxi40%$;bJUvc|bqgzgyY=7OdIv_th%-^!5J_D*Cr&ZE&dma6EKRslmS2>7TC}
z|Noa%RCCdLRjgBh?En3375A^{A1;yjWw%nz*!=&Y?|Ydicq~`9n<klezR4=|UEF@#
z!u;QpIcqzoy}tO%`{diJ4`bgw<zYQnb-X(Kt|6OU$b3y70iJ@Je}qJNN(vG;PJVv%
zzsv3qH@&9vt^3W}QM_`!!V}9{^9k>MeF#vQsNrC9DB$niqS^QP&$Cx^PP=HCAl~ho
zI^|gN)_U7tj#EPNg8y@KJwC+TB-hLsU$rqcYR&(@vd6C<a2F2^U;O>`)kp95z1ySe
zF|#mw+3SOf%e~9F&&N9yYiOOmSpBKu6@OE<T<?t=_tzh6my+N6Zr`>y`d@8-#2vW1
zRiWYK{%Ohwa*~|BKWI3-%z|a9()JS_-wz*PJ-l{0zgB0^TGcS4nX8g|LVL5eZ94bA
zQt}<c^n&|O^qb!<wzS(Ga^sx5l%U(fxCd9wRRu5ql{#)?^!V;hQ>{539vaFWwXyCJ
zf^mH^482CiA4ECq=3Lf_yHYAX^L|F|lD^L1c~_q;yC0J}`P&<ZSWdaPz^ZQkqEpW{
zOw~FcaQ*3rz^PAWJZf^Dy<NOcuU+<K*BZO9nfqCiK1@+(oND^=$Id-pbFTmT;<sO~
zxjb&4{<$!J=R;DHH4S?k*EePPX-_yk;dV#TABp(%1K#`MDn;!NPkQ<zT1#6h`|ILV
z8O8eNO8+d2xz@8NeJ{N2{qS+lxnuFuXQaN#KmB%XQLkVKgX?^c!;ROE=q?bqaF1nN
zkh6Yk>^yzlUk0as9iOvY^=Ee4zQ>$K8dIKJmYKb5+n!}1o7-M7Y(3U0P`U8H&ePvs
zUHUV*{>Hi253FDMUiRiK*&DHu=O*9weva!;fBencZu`E<I=SVbqW_|2R}X(n`!2!0
zrc&|yUUMm@TaD{>=}M|=KeB!~jrF0p_J#(A+=XV}Z||+0xwY!<L6@+Y`P|bVe%9A9
zKPs`d-skptr&DY8e_bE_|H#$-v9XJ;R&83Y&|b~;_CNPwjs2!?7Ej(gBh8z0wSfQ6
z<#T0!6l^!YB)>^gM@RU_pAFBq-@O0bxO(Yd+Ycv|_SS_Q{u2AMzjQszld?=5rl8G9
z{{6wV+$IaBh=_l1DpitNH0?8ApDWMqqlek-_ozxO|6Y1wPm_kp?<w1FmF0JB-SkhP
z>3;YZ>s&Xtg6BIFGQM0q^Sb5S!E%-Zh8kr8mu>gBqzgPO_@SMbcj&4_R&|q<v8?y$
zf~EIfs=QM?_SEpcMd;V^OMxnK8lP^ba$eQleQhfL<VU6!^VsFm>MrbO->3e3ujc-%
zE>l?c*3D?=?w!aInRi1si{bwT*ZD>|?X$dWs&@ole&PLZ)2lswi?9COX=9OD9CqX6
z7X2SNTgtD_nj7X_CR88hu|LT4N5V`yr|=MsTCacesyh@?=N|vgy!qUA8+YMe^~apt
z*S2l@-PPT9L~r4-=xeWo-rLrzPLECfvE=T;zsvL3{-kc$Y~%gu*R4Y`J8qukY}Zx_
z`eb=%^@)<tovpmaqI>RM+1ScZJI6x%bc~vYnOy6s9ro5U`K~kyuS;U$?A2g0I@%Je
zr1j)ZTEQXFlDhW2@%8ezv^`(9XRrIuqL%UV=<g+yo_v0NUoU*p)%`au{~vbm<=7>%
z&ntNUhtLPt!_-s%UjLJ@LVoE}qeCCEW=56<)II$i;(I8@*-s?LTK_C-jrprhhZ~E#
z`lmP67>Ou;xW9P9gDVyXuD{vP@G5)G7A_TTnZ@FBuJbUj&=TZb6tM4&)Ti}6|0Z9E
z-5Z#-bYkW4Q}O@Thlfv?y))_6>8tVLlP`bp{NpZ>eN$*cWnH7fLB}vjIf2{_l^-@X
zWNpidJh_Nn^@9nUvCW~Yb)3@}7<hCYXUVi{v6h#<@DE=fyK=JN*^1VpQ+vN2t=W7_
z|31h1I`4g+ALEPf#wH|+$8mCs9<gEGTt8u(abfIt*(`0nMAyV^GgdJ(6(m@AaG4)k
zaB+QjR$pPjxq#(0H-27a(|TbYn8DiguI+VdhQlS<mu;;N9QZ;OXUVu19c=#mX5u@h
z9kvfLxI@p{_VsS;yT5vx+pmq`zrI#ojrtn*M)Du`L5Cw<r}GZVr=~Hrvp(n%{q)L_
z`{&$U&HETGJ$w+aWGyt;v0z3Lx5k<((c}}3tL7d3?Q~^dn6Z)6++L@K-oMV<gtTgJ
zJ5GN%bMkD}s1q069o{;;H)WP%Pg6=hoc`Kyhp^`)w?8ua$9;H>l6&<2xGmay<4lUB
zZ;I?w&lys?7M%Nbb?@FR77cfGpSep@v!5^X&uo9r9ChH3DDz3?J^5yr8QrfQdh4Bi
z<z|4x>K%@036G{~FxZyf5!@nhMC|SM$wwFk8pSx@vr49HE~s7FXSSPR1EYeWj?0Jd
z@3hY{%3rzq^GM4U$FL7mTyGyzvb%ZQ`_$j+t<x*sMdgRDn)>vhZ}pKW9uHmTK3emB
z+3F9WYoqLRRy|{CXa3o=Yjx(Dh$H38bdz62E&X8hScRdFkNeje_9x5s@P4q4<N5TT
zZOgt4nToarB6jm+Pru)Bo-ylw&E9)%JVH`0nS|EFvz-2DKI>}8aRx{Jvi%u6ALMP1
zIb1C<zj8vp=vje8V$%bT%pmFGb@5$SG|JhULWN!&{MaLSaf$NM3zt@D>uhihD!N>w
z;=%GZBkk28hZz}`<|5lfU2b%zv0T~X>s;>n|6-m0kw5+~;$v5TzqRrH*<atwUHt2X
zpZth0ys$Jux0$I))H&+fLPd7rs8y@hG0bOvci~Shr<|(tf>dvn(;t6kKmDmb|LUDh
zzrM`dShu%4ZtuIjaeJp9KX0NgVa3VuFx|EEKJ(rO+fCkn`XnGaD>yUZ3Tu+qN{@e?
zYs?Ho`<$b0WmqL7nB;7{>rjySP<z(4hmZYy*~GUzp46AzcOgT0e`1WmY+v)L((+&O
zcdsgaul?dQwNP%Y`s%B9S6`2NJ-wYp;`ZL^iwpk<d@L2caPZ!>(u?d}4IZ}q-#vYI
z?p&_<%rX8>YRH$_#%dAX!ZN3RmH2M&@Z+kU_2i4-`|I&q--SQ@TK#C+->AP?zLN3J
zPCr~I^}lz9fk5ZsuJp?eX6{Sk8oN8>cAx2AuaYTp;_y5DAD1NE6gb5%H!?CFKF?n-
z8YVjLSo#0!r}r(KBs6XL`oB*<`%Ti8@KxGmZG7n32DvrY4&8rR{z}d2Ld~>0=Brtr
z&Ak(MVXkN0k?+yvN7q(eU+tOo_wJwg?D%=lJu>J26+CzLdGf=`Psi0?+wiUbSzFJ`
zku3P^&nlb#|A(D!7<`Lff7zofzUsl2#mx6_MDNXx(Nef`<#d`vo1tpptVu6QJHGwj
zX5V*QCE*JHo5&Bh6PkTCZF>4S)aclcjKi1D9kwjHb>yJ#*UV`yrwe}9sRsR#d{gu2
zjpSxCNlot!)eq(`X5A5_d+B!W?LEFL{<SVPSZb7e_?GzoqQ@(Gf6cUxnjiW)f~%o{
zL-Ejw_hwv%Zq}*6y#GG^RJl~EmgwqNlg^)K&L>ygka&W>zwB3W<<+Wn9hF;8-F_z-
zc66cptXZi`_gTa#o@H$3<`ZJdxmsfOO?-j0;YP&^(p**?p$-?<+s=DocAWLx$+Rwq
z11I0#O_XNb@aB#CIeTlfe;i)<H}35bKNGuNRw?B0H-*|;ZSnUu%w)T%sgxSg>E%}U
z^TU?y!aRvOr(W#ek}%!sUzU1IAIBf<j&A=Bjoa$aOPPE2F0BeVaf$P@#hLx_2CDtr
z*Xv2}F>l+k{?$tF_y04tDi>(BnKpc6a^ZM%V)css6|5Ef3FeJ6IYXll#I-c|G{xN5
zq-;L<uCh-3viG9Xb+$`S_h4JYd816f`5yCmDceJWcRF`0$Z~)1`r!5MHJt1rpFceA
z|0k;Pf<caz<$&otzK`ozV{VrINoBnMuV~u;g{70<|NkubVV6O8?UT>{8VjrPJ?^ge
z+h%1^m~k**=8AgNw{piy&A;4@RJeNi{v_+f$n-61mwx-ll{jOAcbg33t!=J#eJp%N
z5%q0{wyH~~EM1$O7#DVvdFzqIC(lW|;{9}M`u{EAD|vn{eH6BD+WwezU+NoqnvQ6#
zdbNE*L%7|BbWs7<yLa#HNK??<u_jDgwCla-uI?|dAFPuJ`|Kd}=Y(hDzx0NOzrU^C
zG^KmNg3CWX-8hx}@p6mUw>|y(|C7G($rt6<AB>oNUwz+9?hh{MwV&$3cf`$Z{#RhZ
z{fm#GsAq%Z?S94eW&abE^B(P3`%h!l`A`2R&zir!OlP}ZTut2{&)r)$3!e6l7k;|%
z^Ofzt^pkdNUElfb`ugeno7XcZ1k9Lj^*!xsJ|n|NmhvMzRL?)Od{<K<|M`>bL_?;3
z9M679?_SRtCEe%Ef9~*q)%(A9-+wr1&r97eD(N?O9}#-xBl~sFRQvu*mRdRL)uKG%
zVS=mWdvA(OWqJ4DU(Bo3Ngg?uzw7<p^||6)-}~!J=8E56d1?LAxJg<cf&~6X)OJK3
z)%sNTKYpQJ=%%QpUt(6TnP=hrsKsuHVdD$~`TWp77Aod{pDo}yKdoY4grQ!1><7oE
zOPQoz2k@5f6l~e0Cw4Z<D&ocUuV17^dE?Y1WS@Swt#6LFyPjpaUP0&m%e()rwVCs;
z>-dB2RbL~NwtBNaTolp9$aMX7RXwZ3j~RbV+By!s39OI0I)BmjN0#|IGdFg<X8G}P
z@$Sw0AIv^K_dWZofYjfwSw1dblwUIM{<}jd$66E~a<>XL{}a4<+$r%L|EZUv`vNuy
zPCgLNI`Q+d6(Vy?&g`iDq4P#ZXc4o!PxP}OqaD+~U*?#6xM=3>eBM)B4^)&Lws!Sj
z*>_91%-}%T7L(%<50Y7)ebSlq#&>DHTtHFLKE}3ZAMYRAH2K7fSwael-}0_J4m#6s
zoot!!^K&2jtXa2=xGz8aXZld>&hZ`Jm2M=gdi)^7Q*1)ejZew{kKPNA{`0~proKRa
z!Hqehfv>LLblY^|Uz~09+A9CJ{VYL`;^Uhnb@la5x!3+WTGuWA;kbm>0^a*=e+@5}
zfAG91>9U_y)_iMO_L?Qt|D?q4UbVUX{fo`z^)bo3+Ke*Cw&@*_&|o&RIN@H}wf)`A
zpa!;z3+;<JLOupq|IvB!x3hM|zos@7KMR}pFHZ_v`qsJ|)-Ks?TzvcsljY>%&`Wj~
z)E7HG|GE2L-uz_scPEOcX`T4Nn_HEb79H69HQFVa@8R~ymJPy>_B$P1<n`{w{Z!cw
zi5Cx|EVr>=;|aZ4`1$cShc{C{m~FoK#)M6yG3v4s_oPYN*WF&k%p_8}eKVUmf5KMb
z+4Wb_y+ktqw>;Z@Z|D5@-|FQre3smvd{g%SENemWA4>0zuRkA?A;o?1|M~PMU6=Fk
z91Y9)AN2gig#5eD&c*BBt*-t*;f24#-mk1jJQ=4=FPhz{P%3}--BVAdW7hpW4xKX!
z&dt8<|NPyVgEd!!W9w(mRlLzPNtk8-eG6u_3y<aFf9x-KYf$y&Y-3>h5dmkutp?S4
zu|)+B?2k{hJ@xp*e8!Du_Jr0bvWVAsH?hsIePDlVe)n(J+uylT+kRFB-w~;3DA-;5
zx0UZ;*VXlh3ROa`v)vWrNPhL!Re*^_L15MK8517N6Q9q#Dr%<-_dDwe^SpBQ1v341
zo3+N}XvSO9lTS?V?Fsy}B4^_2oBF%!UH1Lw`tCAk?)4NIzvaJXT;F&<(%A6y*Z&q_
z=PV)$S9*KT59_SI`$$z!@#cKX$Lfz~dr!XYxjBqa$G^O|N6z2<Pwe9G;)8$GC%@lx
z{OFIu-+AWhr%tIq{abwed)c#>T^CQ5zhv<YH5Z-#e8=6QJ2&RN77hF#QZ#+fx)s|t
z?G8RRV^`?ymuJpgcSzgrQtX&ibGzc`R@T|SUbDJAGqSFDJ$p7o0*Anc6F)ZFKAlnV
z<Kxrk{OSv4d))t9wYPfft54qkA6#6L`}fa`Q(Lyo`(<gn$y?(Or~3jgR!GMw`TYEw
zw10WfwMD$U-@RAgXQ$*bdugd&QRUQWo)7v@%WSGzskeH@$v1YUf{kB9dQ=v_zoLDp
ze@?N?$$Ovbq?Jp!TExE1$*6g0v}9ZCCeHniQIlR@U)|ZWKdPm3vgn87$7(cPlTS3e
zs4idCS^n$z#)lsn8D9Ji<FDY!Pi2lgn^W4<FzeOo_-zbL4rSH`|Cp0j>}O!$wsV{%
z%kITmR3(1*lyUjbr7x3qX~ykMnzs6D_sh7oNzcFkO`iI?wd%^WtzXv1uAKEfc=_w=
zO&ls8`PU|tG#0k8sAwPhT4_-t<t?Ev@o4SAtyj)6y7p}lY+?KG(^yEi<6*tJXgka2
zGY3+**j{!TUMW~{*l&NUtx&}e{stq49Wv6^PY*H8dC0WlV$PzfkG!6J`&s*T?D`V1
zRQ7WJwH4ugv7PImg}<%Z9kDL7>w#rkNuwCYioTTZ(oL38yc~O9hUtC`-^1<Xa7)Gd
z&aMb%8-{|_KUQz$v+?57>@6{9>EqaXT5rzQ+e@AlgkF|X{M9VEA})ia%%4-^EMMve
z%Q@z3&so>xD_(az%o?44Hu#R#G7G6Miw{ijJndk-aC5*!uX`_9)#eoa&=nBoWI3An
z>ukcjIQM10n%8{{t+wwHy3L^KaaW_{>>f2CHW^O)x%q#WwS3ro`psAA3T=L-{i4nG
zyJj`+d#AeDj`Q%rPmkF8_c-6Ho|s_z@KsoF&k|>+o$jtHXTAS={q^+X*jSg<u~By>
zI%`KPKmGsI`iH{o3vPY$FBkHA_p@QOX!aj5$3WJk?d98xZtX9Sn4<RQ-A||ey_NrR
za*EFCo{(H2-~O3-iiBihl2o&_NN-^F3s;#h9qTx1I5bkX2D*I=v@B@aAyLFU^UXz-
z^AU~420!X`nNBTjoXg(d@2z#|<IGf>eVm66R^PpQ_u9c%a*o%z_Py}-d>!%hYiv>I
zJ?)ySubxcXwfcwiqyC6s{XhH5f7a{}*i?E=L}2w@&a0sTOXBadXZTmIbb27fc<x{P
zH2=7D%J-M9kDAmu|LX76@7GV)Kf)>E%<^D`BAfYweuYVX^WP`m)H>^As&2d5tCqi*
zWmDVLZ}G`0+#ly|;dI-#z;MpfePz3MKi;UnCVuh-w{=gyuH&fQrxtxgx9a*44}bfU
zA1qnBlnbo$+5Q@IG`!<#-*~~twmqw-=V1D3y(K@-<(IOUef_<&mL=zJ{O+&sR+gx>
z>#hpDvTy6J{jY9K{ZucutNK<!k6%Mis$BQX8ooUuWyx<iV!hHgvOn?&+~Pb}KjXyx
zWrf+@Mbr5ntm2A4)Gf{-CG`J!Q~I{azdIlJt;%=&snfFKa=?cDMr}K1_wTIr%bNYu
zmUDggw5#vA-dzu>I$F0rT(LfOZ&Z^)i~1zD?;2%a`en>bYbqB%?0lu>S?jcvU2f@}
z?kTA)9VPoe6|4Qfwvs83Ys#dxi=X_8^2peD`cA$778my2XXdUnTjKIFYt|K$8cmy-
ziw~4Z|8+C%+7rZd>56H>j~5vt=N~<3h;#a$@cPr4^##)|3tayuc!YK94}(`xtNL$g
zX4t9kt+Fjw72101>8+=Ad(D^_C1&iH7pKRb7G$^fQ{#=i4|%ez2kwN1hUYNnFTC!5
z(6CUQM?pa)&Y|~q;Xkbp(+(+leeut4dKa^H%Jvug{uqR8RX_UulDppN-@96RJm$^k
z>DjV>dc_UFd(TXbHrJXntT}XX;~BOr-Ua^^H9c1dY+6#U6Q8}}&5i&EE%Bg?THen~
z|Jr_u*yM0GG5`7NJA0R{yChTL@ZEdE<o1`z&YN#5GQSc3d8{sH|J_^qAyxZX{=K@p
zepl+t+R_8|2NvYL_Yg`@Y<Q-pr?)k-Xo8m*vxN>PSJm28KYPxlZ?JxH@o#0Jp5nnv
zVejt0W=}MzlHfbi@L;iB`lFdY47g;^a7-u==&M%#+F*A3LiOZtr>8{7wXrx}eh{-^
zTE%-g?t^X<{%`JX{<!diySs(l_4q}pzjV}o9d0|q$93iM%>1QGCwx)o)tvVK<|`BL
z?!UPYgPavqB^&K0XCB#SGu_{R_53HXC&Dk(3kKcunfTG{;sx9HO*1|8j0Gz{D8`EY
z=Ct?zqkYF?%gX%1N0;<Vt+dNOYTvzP9)9b~j*I~HkNorPu1To~zh+*La*40}b?e~?
zkzMcJecyAy`NMV&)}<4?8EQ8lV3@~LIjg|)+1F{a?@sVcxc)`w?9ZOsxNRSnY43EB
z<z`4t4n81b^OGmze3AE~Kc1~c2j}1ZA8O|x`**&>VS{tk)874`f5)Kz*Z1|NTC@Kc
zpD{GL^mAM7Pw(?jc0F6@ZTfu$U*p@WRg*4C++$i%p~KOy+$po;g&Ze?jP2n&>Raz|
zIo;@dz!%T>YM)(l`2G4t-}7v>Sl3_7{uo)ms<a}kUCQxEd*IDe`>$+WEcIVD>t>F6
zO&06*vKE$X`D6M1me;o$1$|DbKYG@M`FShrN6W3%x5747oQ<yu_Iw>SPyI{P?)O_f
z4wuZoz0SYr*I8Nf0yEti+xN_$e?djy>=O=^{L}Nzd0IWIS5+O4t@!_X`hy9cZ+@9c
zDcsujWIs<-;>o%GS05f)xFB+M)YDT_uKaH@jLUj+`~QTO!PB3MKK=UVX!+gM_O`im
zb?U#qKasxwVbBx@Uv}gBA{jFLs}$Q;vPsl$*gm(mQAX~v*Pf)8Yx%?f9iGG@yrbf%
zms#iY;NPa^|Fr$$^*5gUTN@CueUtjPU$5@}<4BnPW1;q4W`isKYrk>|DEdr!89l`)
zgmazU&sPDjpPkxpOQqlG$0mjLr&S;Kd%v&KQvSB>Zl1`or3TMrSNq#;bh+imW!4=s
zy?LLP_sfaK(_|ISHe5exo)CFWyGCVqzu(+v<*IvrbKiB9TRH!~`i@Ca6^A~2nj<ez
zSh`wI<%Nc-ijMb_$~?O{y!Tt~@3LJzA&K$*_5SJK-xxAP-}RS`S-QgF*N*@He<+Lh
z9DNtQw&HBn9``k0UrhaVH1+Qv(c8Uxx9$Zr=-92>9y(*gyQpsQJ0gWOLh^67_Njf@
zdi|l7oUyZ-;rAD(EB~)O-=q|Crsyws;zu`^_fqC10&i`Zf|%6R@0g^z?@CJZ-@W#r
z<b<71maHnUuW9I>|M%86hXoDSPv3v@x$DQH`*p3148O&S*S>a0e=_$h@5bbc`qigj
z$E*@;Ir3ZXVdDA5mJ`2KQad(Y-WP8zA)GS9(Yy6nXUMH*|Mnd%`mtd4k24ZV2CuGr
zY0lt#Tm2#Y^jj-Cfg_zf!rZ#Ce>lIswmdttX7iJvyZ;{j-uGx<sQ$wBb^Ge}uaDdB
zr2R4c_z||R@(0b_OjjPM`y(MzbNi=H-jWUHF8+Udw&?%t@IUIcx3a#jW%aVx{*~w9
zH6d~O{}<=N*F<y2Uwo{6klmrV^XYEuiJueRea>?8x@`ZkX?x8@W8>2Od5{0yZ=5)7
z`jwSe+?IW?i?W%&XGWt~a<@q4x$Jo--)k#6ZEJY;Fplxms`N$Ie{5>E(Nfo0JO5UR
zbYXXR^WWD=AD@)+cFfu_tJ~#3#;1OnKNH$bMIJ8rJ}py)w_YTE>5rbW!qU*)sW16r
z-tuU)^*+omY`hn!!##J0RM+-~+|?Tj@1(Bzx%p-RPt+@C@12v@u8GXlNZXXfee~|n
zfL*hN($w;<adj4~cYpn1<(j<bRoCkFS@AQv?k~`95_-9Cx&EOqxAor?FHJwmWPe3q
zYTq;7p!YVa|K4r-e~r`l_xDGynq&4)zJISHefRb7AM^YrmjBzC{X*!l=a<@5;igY=
z=H%bfzt!2<!gb41XR-AC^nUR}4kEMK6m;!2ZeF@~-tN@<uksWZ*go0!cKU?l+fT38
z39MZ{@$b_hmh?WcZ`*IHe=&NRuUcn2DO6tk+A5Wwb$1pYV%TGqeSFg<2We4zl~>I!
zs^Us-^|JQs=G8lzy-nI5=<qkU?#!91zdQDxpS?@}`s-<XkDr|KewOk7H+}yf2G2i#
zFMp+4`sV*nCtLTw@4fQxPflC;d((JMujM+wuI#b6wC~<tR|)-%?^ndXtWmQN;9%l#
zvEgt#JZ;LZ@6E}(>bO5^^2T0`-Kkxn#$Qv$G`~Z>=ymw1wyU|W_g}nxWZyJPzr~Bw
zFYyu6BBtZ#J``1O{a?+R<Z@%KZLrby4-5=EoQ|_(+p}2f7wVUP*&DaD-1YzOuX(Rn
z<obU6<Nvik>Ywkmn7`}G@BjbSy=tn^{-}3)@0pGHBz6|upR}@c|FicWbMK1GEI6_u
zKkGJEYvS9)nyb!M;=U3Ro{Z0K%RD*$O?O&vd-e~e1*}P@r{*-j{mkjM|Niacg|S`t
zcOAXkbvLtY<Lj%hubzCmkmcU%<gc%uM*aN3x})Bp>e;SUifldmG=ASYe7Nn=mAU>g
zf9*vsvIe};H=Mk!`s20_4L4V&Cue!Q^Vaxau|wK0=d0r5<-M*k-4f|immALWvf42$
zd}DF`$C4}G=U8x7&p7{-<!9E$Yx!|&E-%v4<W(#!pSERHh4DTXk@kfdAD=VMKJsqy
zX~xUVUgAEtC%kdh+&Jg-<1@i_TsE<#F<pLvjoydkUjHxTD~<Njn;Lh7lb7?}uQl_F
z-nX6hwLUL%M#y@dr{0Eb-s_z{&3pCw)n_lk^*&E{x%Me8zQW1I=svY{+Eb&`KcpHX
zRL|ZjyR~uF?jH)@9q;|&p2WZ4cfcecjbBmxZh8_X-HVqsHGh4u#j~ZAx9t7uEUkHo
zz5YKESt^dEWJPcN9ToDSYr4GG{I4uqLqpeDeu>^+W52Lg_wJgFvA?d&+pc$|Q)ln`
z{H{OlPwzTRay)ak{C`IEAFT~F+L2uwy-O<=Wp_@#w)&c8ZR*{RQ>RCWH90!8L>Vgw
z+%!=9d*?$}jKdPOb-yE&Z#JEnX203sf<%F8R`j7N$%g)_eqOVbQ?EQq-_2WCTy$;E
z?|rOm1N@hME&cRwd)d}dy?C$Zt9Jf6_G88>rA;;5jofukO$QGaFf5a0^pRs=k+9$v
zaQ!LlV$S!2A;U(EpHD2|yJ&vOkr_2-*x!7K?SJq@;p#POr|I)H)~R={ceD)Gxwgi;
z_8TYv@!G;}Rz}|)+RPaxLJWM(2S0VBF;+*i&PjMw^dxM`98ukypI=n(mo_-4$a=tH
zpXP2cpR<k?7ra;8a6YT$Gwo7ndf)m*{{&V(*wE;@(AzP;^HeN%eeS())#h89W?Wm^
zcQ1@t^V2W3bryWHKXQlsz9RBHJj6FwQNQbdn0Nh}(ARetUC;Wrx^Ab<AzcTh!1H@c
zPq}e)7_DG%`(xSm`@{!tb;h8BcYm(^yNN@k{vgL!hiQ)=aUGn|tGdwr{CpNC9p&I+
z{ofC!HR|iExz-b1@yzqiKEEBO)!XxQ7pA?vBq`yX6PI*4Q6uNf?hk>6tl~1qGxYAZ
z1nBI1F|qXgwC;(9+l87x-nTlvBr1hBT4ip@YmP_p!uLa8#9WV3iC_0$I3#NCx%GF}
zGBhv=*z!le7izd*f8d0D1XrR{n&9p`Y!iM6HFSvxty-lA0{Vf4g^CA4-Q2xxe{AB<
zn8cv{j8`PjZrve)N44`392ZQRD7Y+n!879xp>tekUs_OlW7*_z{wF)8Tg}Ph;4j|b
zdzCpVi^Eg0?s=k7|7`CEKMUXQ6`NQqZ5RLRU|8nVEjO$T7e3hakhiMu&zy^1e>P8y
zn#OMWbzfebNNSB$;eR#v_bNSclYbXBhrKY)|KlhVt-5rPSt{4D(=FNVcaL634l31L
zG;8MnFI78Vg;sBU_kP;S{pO)7GD6sY*eR;t*qd~Wu|}rfU~#=rfk*LlZQhNC8g;_$
z+YTiv8jEen66S6?xYheW=(;M0EQj_jHk=IX3|+UGWVqIu#Ld5YAVWf6&8wsvT)Mg~
zcXEwhCd)opA{bGn!64Vjv4qjbWMa7erYcQ~LuR!N4T}=H7=O(<+Pd^pb?p;jr(OB#
z_urZCaF9snVPg!mc<_3UQlf|Z<;<EF?!onkS|qpvY-b66dAvp7qLB9NxldNe@qH*(
zZ~k}cEpJktpxS%(e=`p}%-O)$!LIUiU8&;FeP=u)jRoHboRJWIc7M?_&f0penJg+B
zz5KqeHqWa6Dp>d@Dtlf2hqpIFIG!ml$@1usTztN6=~?|hw%g}y6Zcm5a_g$>sx4Ca
z^S=u{n7%Nzzh{Z_8r5L&7aI#}{ImWFtgZaNHulOP=??;m&BCFRKdF?k{oLrJxAXIw
z5B5#!h0*Tk9%k6q%-O?sTJl`OIR@q(LQO7`IS=Nw>UQ(Z?f4hJYL7wyb8q&7|91Uf
z8Fwv+|6F^zN9VV#w3Ot%q`U1m^P6(6oW6d;V8`)>sa3^?+i&+x`u+Qtses|kdWR;l
zkdMcX-gw2n>F8O>M`!O9PT{z0oBvPc_`j9w{N=yYY^@dl;jBN2b?NSRXZuzy7MgPX
z)s9(TuOzGtn_$5xZE*YEl7@~F%bxi&I3Dg;A|iOGY0-@a#>f8(80`MMx)yfwl?3b5
zGP!vSC#Rj5z@X(Bv!r#-F57o&Uklm)7AU;dYTfBlWwU*r;?7<5E#|wQ8wNh&HvUnb
z>+4W<pzK)6>~#0f<~M{=_CIKQm3Di++V>x7x)&onu3UQ<ZL*}#t2FEX;@JO5PYxyN
zM*Yid<G4OqdA0cy@BfkBSHvRbN4?8^9aeqMzy2dj^$R}z4++ZQkM);YB+PZz|H{(U
zu<V3<anQ0uJ3Y5fv-h*VShc6cTdg)>Q|0;#E>rt@=4_NTyzc*D#~hC6U)EP{hVDCP
zKF#i!vGH%$Z#F8G`Y!}36f@4RX)=prc(?z}#`}*x$678vaxl2Yq?288f1+VX`SJ4m
zJJc=C*O~CG-+6^I%i{FJSz57TOTXs5zWTazV&?i)+Vj`fMZKO{@TNtA)hqP<=i=1L
z<)6MtZJt_pT*hE;+@25?*}qHAZ`SOq|DmVs-P`EDqq^kkjBD}x3nHH!yw99$^|{3F
z__>Odg|SNvTKgxw?E0KAiM=9DG1-|#>MS=$+)6{iZ>oQ`%n?4e_B8jhs?A}k=0`8d
z?cHs4%`4f5JMI1kEp3;7nZK=H#CGkP7PS2Kqu}>y@<H$Tjz`B|J+?^v$?KEf_N;i!
zzo^z>b^P~o$K}a(pPUzq|M+dmx&NP!%%tk+;k^r2@Xlmy`hH@2sT8xat=r=Bi+);p
zh`83Ax&4f%c>n3IyV_KGo28ev{4Pmmt5Fr%pP&4TmG`_^UvpAzfplH`m2d0TX6?<M
z754Gg*VS9s1yA(dUFR*jV1nTyMW2kTZ%T#dS!NXG?s-$A+BWmlr_;LiWtU4|tgDm$
zbk*InTKfOIeJ0KS`N~VbxKwFby*~MJ|CF_Xdv_-q#T7C>+p}-&D=trWMb;mz(Mgvb
z+(VpVQX<v9wQ*#Jxo@+b<#9MS>-L9gBaurLI+p{~{=D=$SrcHicK7>_A0ygrm+bdc
z+sO9GxPOOT-5EjCt~0jB%FC^1n+vD!=b!I%Ox80^;XP*<SMi4p7iX_nztC#!k@@ZX
zZ<*%!Z#rRe?=sVwwI6=%WN_N#JZ;<YlONOsuW7GZb?NYfMSVW8QEw%)er<GddX*pc
zwtCv+S6?6T)cn~J7FX=?kIQJ01TWJmnQMAWzC>u()fYIW_e$pI)kwsBuPxz`4}bN+
zMNP20q})1XlgZWGQkH-9EXQ8Foqjd;kedFLBRV}()E*cv+Pv&>WcWvG?HwoICUCKS
z&lJhHf9*-w-$~#9xIa8LW$%@LR(^%=q&9WloRrD8&3zUppJmPF@_qhySTeLfJ`p~g
z@SE@1qEEpO6dE1QskfbSJ3FIXNAixy-((I=RxY2I*g|!d4~`4da#A9arq!H$wNxds
zy=QXH^V7#)UM-Yj6}8rKoz#A)Us1|_$E(?MpM6%pGcoM(&$FzR>xCXJ{4`<n<LfWH
zT`P6h?K$$S{-9vi{m`d(Tjrm2zQ02E-K6Sk@#_!I-di(aqI#Xj>-!2lJxTGC?@j(J
z7+SBW_-6Z_P2Zod&%dKzos%biqkH9=3IF1Y?j8!ivn1+e?w|Qa;!l?uzdQKOW?K3=
zf6>pLqJOIA@AxsD#a=Z`@xIn&^L6%;dg9)aQ!?*7XR+A%$4u_kr+?}*#G9B|60|Q=
znbxo?*j;tmWF~fV#tx>M4X$;2`7^WLiY)kBw0i!<MQ;S-%S}S-w*H=Rw0>Rai@h#?
z-d}w)f8IZTra#`CJa&4QUwNyabGB37J?YZDcyUgC8y(JL2fj^7W~}<uV7L5Hi*dW*
zi!-m9T$AtX<bVE|_(gyH)%Vw93y*yNzkgS};LcA!E;W5rO6Bd4=<)w@_ce3G#S|_H
zuEymd`s@<&;R+wGy6Za}=RWtH`MZYq8Snen6|pz&ar6h?O=ev3qOwu<?vt(05`t^O
z4=^f;%<vQ8Y;9#sa+oneOo_E=VZwq4JrQnRr$)t&mSXE4Pu8C3nyj`eFMG1hDz%1p
zv!_p8u)p`3=ePX5Pv5<nf4J?)|EL+gnvrvCT@vg5M(HdrzqR`7pVCP6_xB68O=a&s
z|KZeByL+E3_Z+!#!+w%y>50QTUTmH#{rZ3V=YS>mHV1yH(_ihcd@6hOwq_BV-R_=&
z;@cg6i{JdgKkHO$(JQ$r94xnNThe=$)y({8=BZe}=t-U9{)@98KH0M7w075u`}dMu
z!VelVWFP-BXU*5g7Ob0pdfnb|WViL__1izaf70n9lc?|9Z?r?f+B)SO<F8szMwwHG
z4*y*p?NIRa)b*85{^;>BDum3?5aVoZYfM-XQ6MJ5I;k<yBBD!4p>=0XMc$e@*H(Cj
z2X)^$w$<ap=IN)tFMpl)^p-96SN_oawY#$IukSXxcqHY_set`~|2)_8uCLTBtPQ?;
z^vl(s+xDIRoN8L`zxc=HUT)F5=PV>|s}=4NyY^<aW+sR9t>u>LoA#aj8hk3tt7%Qf
zit_jSy}y>&O5{!3eqH0F2Iqro>FU;jD`xVjel=Nh>e`o@wKi|3{ha4AK}zYT*v;R3
zt38*!_fBa$zND4cC9+7iBBT5L#ha$TXVrS_;b)Y4!+iVw*UCR%IrUO`z6zJ^^m?YS
z`~J2c*~%st7f$rzoEG_HR{TS!1bh1_2M;ic%=~AO%yxRR?xUyXrWYPRaj87qc1Lcd
zh0~{ithmgD!kvCA)%R`sn6sqXY5GLD{raNav39G2)!v=6Gl^?Gv-<JsW%KmJUVW+Y
zPz`W&I5oeS#r89wW_shoea{s>`@9Hfvf;{8KBAD-xcGR(uUv*}F&^>UJxgX;zuc;-
zdS=N3fk*O_uANtkyRCZa`lNRy4-{3V>gLb+v3}O_OG@&g!3Vm16-<t-NN({vbi=3X
z^s<}$HiB18b(<o&yTqc;eyG+vo_pr*A`1f%PL@yxMj-|UO+7yb_Nj7i3@M6_xj4=x
z`>kkL`qy^S@)h18p$TWFKJ`{MJ$tg$G$Qtpyzs;`_D7Fgp7?l954(*2lOu22FR!Zq
zbFiK90Hegrf1(mw5*7wMo3?7@bEh(=ms}AxE_YQL85%8xG8fwPrkVNdDlWJZx}=)j
zC@iSV?DrN`QxPv`zc+tQvRs<{K5k)^?K{z9ZvVbosGH6RO$=)ByjAyYdFz&`o^#Zy
zRHJ9@H!u+8J*YV8(UX18A2v^AHYz@J<ZiOt#Ylx&>$NNeW0=>b=RW7q583#j*EB9L
zC`r7^^_g@avq#HJuY$nV4ZaI!KiK&F2)B*}r<&X2shLlgto7zJ(Oi7(m@b3FGOiNt
z2}eZ^E#fK*eD3%0$D2Ql3~TJCeR>_1CEi=bobX;wBgSG?1$V&}g`MnoJ<bWwy;fTH
z?tfplmfhCk>zb*#F)LKhUD{CXki_uh2~SK@VZxCY;xpJ@IUZnCx-vmcjEmK&G5J8m
z2?=f`rb7i2LX?y^I%1wztXMT!-!xEbz0qskqgPg6xE{asPEF<YU!^biyT-|B`p=Gj
z9Jg})&u^zrms!Q{Pg_@A_NwoHedV*ep3?dAZ_huo%<`MR+pgd5U#&cFL*arQ`|5)k
zv5h`K?Xn)s1_91C_pRq2I&ShZGDamMy-8Y-w=ST#PCcXe!KC>cM6I^<l{NaF<ovI}
z7`v-3gLltd*_IzXP4^{oS+(*G3#c0|zdSwdznp0Aq`T!(|379j$g%G_Uzt%*{6i*w
z`mONzPHvy{?R{&aU!L^xXK@L7!xy1@XxH-y&?uE>+r7VbuG631Zm?if3Ym~2%*ERB
zFwr4mf|v+vFJrQW$0A|w<D2Jg-g{zZ(0ZNK%0-*vIfF{<7uK!5aJ&84mz~*uFMiKo
z^!)dope?JC^P|PfUGH9fcs;fL_1A|T|E5*{oBUpR`ucVKntW4NIey;wZ${tF)eo+(
z{p0sHblFrL;{d(K`wp?`{$9FH^{|!K`;;R~3wqPf6&*9~T_iMFm@DXyy##wi@YEZT
zI}?^vo|58u|JncRP1b7+3|oRlZXKBK<h9}TP4iq0@uY7j<s3Jic=xncMpa;^jrL~M
z(kN-G`L)+xe-(0ga-?rTSPp0Fsi;-blS;corp#**3$%6lUB-S#aEX4?4ciL^j50I-
zDNFQC*|s)WtMGz$rHfF+hX*tIR)tQMP`nkrWNy5)B}2DxT<w%6N&oa?;tK;dAH1D=
zIG>aCx~}=f&P4H0<<{WDXjL<}g>%<_PIOJb`cC6Rufhq}oSDq#D&eKP%ew03YWZ9j
zIwclzvf`Y?d!Z#U2VA17oG(nCBJ^RuoSNVR9v7+NzQ{SEJiqParFU(zimKuaO$=?F
zCvF-M-Ec|U@rXK8M+lprNX&(gN*gWzdzbs~I~uw_wX;xO%U-S5T6=+GuJRm-h@4K@
za`PpB{GDI?j6MFk{{8>Hwz#XAU)SD!b6PhqyIVn*)hp}y&)2OI3D+)dvv?R`ylsu%
zWg`zxW?_Yb4M`vPr!QjYW|v}aU|`_abDSl|-o;u}C3L(n{9^t0Uq{tfUtL?X|NE7l
zMXSq}`2T<UH}%&Q_thnPb*rw-kY2FCX^r`-`|~Z2GjDYMYrgHscX7Fzl+A2!0&kWF
z=c#_Ge&Er~5m3;e7QFoV%EhnjH>^0La#6>DbMouc+(z1mGPtJ1UH`9;<FChB#r*uh
zOLqNcmZhivt0+l7-nV(8*O9ZA&tKYcui&5|U%7yV0_TJVuMP6-ZYIsm2MY|6nz-ga
z-26~@-5;^MjW1>5u4_MkX&t70UH|G0&-0h<Zx>g+-hcIf)Ze1@ajzymC=rrvJtjYU
z+k5k%4>NLDT7NA4=z3=HA&K>I`@^T-iha0Mcnx1`$dMoB&5T_$m(ON!^q23Ob7O5-
zXvK_p_ijfW{>MM;zW%gZ<ai-~Mf1QL9!FN?2<evzWmysYok=eCc8^{avv-!b?J@N1
z%=$Ak%Zg)7_j<9+qPf#j1RewjZwlS9Yssn8-aVCW%q$a<0xt(gY`)Vk?0b}bW=+Du
zMg`?L8+U$j<MUV}dd7w`&-<EWZfafb<NFp7Hb#Nlg{mhP9a49@vEkBX)!dblTW3C8
zaN)-4mrp;QcimCFCvZ=^-tFlL_vPNlS1e{%Xs^EVt!=6Q)g?Rk#)qGNt#!buCFI(N
z4pXKXLWd9cr=Q8cUlYvc%Gk(bqVaEg`_rr24{WorR<B-dE3<9YCXPMw^}6*2^Z%I3
zY`OV5c5~pI*&UHBN=YrkJJqgQ@NbWq!pi!$LpfXK)A}{6EA3g@t~afH*LL(x_^yq0
z>+hGoj$hprwASTr(R+@En<O{ZJuzT)5IHEYe5=BoT(*i3?V>v~ETV62n0fF`^ZVoV
zKlZUQRj=OfDE>3++NybftKO~JAG7M&t^=N|O^q7*HtD=}vJRnnvqKFulPt1++*!kX
zvQ^`n<xEYbtkzAxBn|Qw@0xLKIzwm7{q@U~clpOwX<qJ7pTFyi@cYo$UsKPmz3;a-
zwEXl{{ng^HqT&sIs8rhDJ}^Z?$0EYx%FJWOXa5Oc(`h)C)o*IzE}pYPK4e+bV@Js!
zM>vWDG|#h%eEN}O&OG;$jQNGlEKcnLA2zc)iJokjZh2(u=dfjBrh$r4ygT-`3v9lX
z>u4YA8~@}JYwBSQTb4uZO-Wu1ZYuZe)z>N+?GJyU^jz!Lu81Ye<KBPO^0BPj9d!BT
zx#g|bpZ&ji^3WkhziEtjB16n`=dd2o&{J~1eM*7vV|?WHqVn^q26c?x++wX>te->w
zOXuv}sjo9{U8mr?V`;Bet8g{QZaZ|(Wl7SuC;zxQ?;dJ;)p_TLa=p|VhcD7^*Qfk4
zneMcb>w}CX?@cE4eGjg$`1kR?W=r+k&9AmUKfH)9{#E?9zOERn=qUab0k>9I#?9z;
zj(DdaWw=(cV#=oU&a)TxY!M2XwDbH5i<2B4C-=WqVbtnr-~aPcY}$j|_t9V9Z>ih4
z`bg-;yH{UdeRVKU+g`u4_WMV>S6}{q*Gg<sZ4b+Ab33MX;7>F=i+}%{`Tz%yz{upM
zW^Q*aR+WBf{^#`MU%G<lp3H#AwsYdM7*}eC{oI|^<2+-2?96hm6)&RQb{*@LD5_DI
zbi37!ZF}N_)y!X39AKEozh}#*GXkA%^S?iexcI|ZY@X-z`zM>vO0E5nm1AINRUf<}
zWO4Xui}FKZb~DT-2b>qbGUa`Yma_Uy4-dunk*)y`m;af+>ruqn7H<xB{<gOP&aT&E
zu0-~);@SRPTK7P@?$#w7<>dxP)AG$fUszK5GT@@YTb_A!6S8UyD_I{d=3A4+z3#H2
z;zRQXX#x8VE=X1up6?d5H0rW%#^VbT-?)C9KNS$Gk#frHcExGe(<!^e4R5IzeZTee
z&UQT}1*3h{RR^X@e-Jsuld~gc4L@T;k*>UMO5>KJkND%8F8OADT*d!1+WeLA&XVJ-
z!RL;aw%7PX?AM%npmk-AfJ5#p2JQM)m1<F+&;Fji#aPW_f5V5orAx!s_C93zJKgdB
zZr&FH2kw@{#yhv<2p;FyDRptNdBLp~?g@s*hyHVmZdukY*^~Tv@yf!dA0|KPwAK~1
zs1Xo4=eWgEay!GeWovdgmKz<*I&QjP+N1(@{VRXYu8*?Iv?~`p7|6t%HG@a?$Aw#N
z%{7`+&v+;wU6c5#P@tc8$!ZQB|Mh>mpZwd`a#%v~-1bQ;=l!ty`1Iex-}eF^i+dP$
zx;&`PvWZ{scIWmC_XX>J1gsIT)BBY$@u%#kJ-U()cU#}t{_vOKZX2^bi}tSGx~JvB
zzKVqS-@jR&ypaswcziPJ&XoNZqoTv>Htp>^{kXiZzUd+Vt}dxg<`cJ`Fk8e#vNSgq
z#@CsspJtv>8<HKjIyYuU+`ap%zphT}?(R&<d2&zd_T(R&3nd*T9yl3fhD~S}HGA6r
zYsqT%vOV6XZ37mqE!=o^Ro$wCI*-r1nsGQe`lb5(7YDztH%hGA_LZ;meZH*le2d9X
z&$IrzdUof!IrHn{1QhNhCvJIb+{50u|Ao-B3p=gZ=cG@G{^`_`XU_gt-SYI@?$qAR
z$(wd--mkQ^pAcvoQo3@MqR-SXYi-InzRT@hys)gM?s3<|8J0?y1#MNYe@#E<fA>$~
zjMbJ$<??Mm*50dGs(!FMLTlPF`!$kp3zdWZHY)Kby_C7TTm9WX(OX4Ni(V9d{mv5V
zY5&8k>6m%>{fBE$IqSTde?Ld4CT_l1SJ%7$LCcSNW(UQqK2SU?$5s&fK;4l+Rq=!O
z%y)cy)@CzF2s$N9?RvGFHI?PZL=~|UAq+G9MVQP3BvpA@tG5XK{{ClvXM<2}_`A<F
zY+E1H{+-^FXLjvP<wd^kB~>97%7uEWkJW7JxH5$j_e~G2etyPeh4)u>o{0rc$0i6q
zPOJYA$=ks0m>{vCo9TdYp_Sc~yX<mPz8qb;R46BS`l^2#+bb{K^!}YV``^}3v;XSv
zUti%kJzGNU=7a5-KVPnyCtIfV{^h<65A{p`8hDpfs;~Pe`L3k<abdaVyf~%!sJC%3
zqL~$fO_sI{OSl;xa5U`TDh$lnarNG}-3d$Dca$G9FN@mJ%;J%9?upo|!%Ho938Y@S
z*dx2S<-_TX%hniX|8L6GJ-j;dv)SpjaXE)YuJrd_ZO=>Y)j92LE9<}J$!X^QyIb=0
zR)3$Kw!19$pk?mv%B@?LtrK8bV$jg8zIMv=$h9)R*dz5W2k}ok<TgKfU%60Xs$svG
z!neaE0pE6L+^kS<@cq}d@88eV4GY)kSD!f^_Tsp={f)3wvlsSFlVuJ%n*GoJ_<h#9
zk9Y8!x;j6e*KZp2w7$muNM*))y%$ftcE|p#f2qv(WNMtsrpiSP-;W$AopZYK@u#}K
zDjes^iqoTCv&!U_efb?eW$`kru0!zxj;+bsmUf5aKC<qeo%BiKRq^*vzv~?fEsLVq
z0veAw7HI7hP5aDspJT@LlYdqANnCb#XX>mt|A&aq-6_BK=I;OR-{Knkf%jTY+UoSr
z!M)Oxzw0lUFy-{TvroRPKfH~t;=F_J&)MG&fBsurldwIbNc?5o^gqfMZU1dZ{r_us
z)cTdLpR6lcbus=^`dRyp&(1SF72M3eCcgH`xBn(TH2!b8$n{@z>g>`r`{VywC`mP6
zj{Ki6YxR>mHT!L^2{o$MM^@jGT+6s{-|XM4Y_lSJ?yAWhJ3ZyXK}qE=EN<PZ{dxQ)
zT$|Rfc+=gs)4=L$)2|a{hmVEMc4Mfp_pK{TaAmslz);77UEf;lqKQj(x8)m_R>oh(
zJu?4)+7)lobUXRnxYmm)<Ibxiw;9iLGcE|ZAo1E*@qY1_`0C(yk`e7cPgV)FYCfO%
zY032Z>K|+Sk8h5<B)M^Z`tE$T_wtKw{r6cPxXWm35sz*U^Jk+|35Rvff=_He63E0Y
zYao_8?Pur`iS~F6k?IBCMKg20_ic4xSZBCrU-J^~EUgc2rt_V2xz4Lz{a&}6Yj5(e
z#kN_#XXgjU{`#YMb@jdXyAMy2k}+3!5t~!S6!L%d!IJkJKYY$sMzI%ibI!fn^G6`E
z-EGTr72#P4YZhHonCH4InRh*>?fI|kf4&kw_y3>PxfgOLc7E9M|Hp$f`@Tp2-&iH(
zwd;>y#D#f*bw`%{{pGOZ+Jow~ajES|7k#3xItH@)tSb9FmE)?XqVd@qKiMRlPV;Y%
zi&&bzc$?D^ZA0%RwMj<WjE{RKM5pMn-ncR$@AZ@KPCT~TIZp5$-qYlNR*2I)xN>er
zIQuh;i)ISfE|<P$vNhSc!17_v@%L_PR&AZP_2$QvUvoCI-tp&<{}^`4^4&$|+mou#
z@9jJ`yIV}}_DB2u(>P9f{Mhqnew`whfKKf5J2#@jzh~W8{?4;A>ifwr|2S%%3F?*H
z%c(g2WA>g$%zP{|YwKB5(`w}BUwQnX@!$FnC*5vJSeR+O{VDubQ{eIqC4(2Qf6J%5
zvT>RGX8q|H^X+MB%5A-OnI`5=i`{#7{V7|65Vt;qq8ZyS&S=`udi9}jhzWo0qZf@m
z*Y-}zeAPH@PyAG^h!;n{T(M<Q3q84I`P<Wa6@R8i-F{_fa_-&l6U$gONNu|K;?lRM
z-?0%ATMp^8hm~mremzrjU`4N2$bwgUi_h_`DT{wwA;jdcezvN~qOI@kozrjD7D-CP
zu2`n`v|+CDnjI6LawS_&y6WnAUSMNJ;-lp{wfXZhtFA=Z7}QT^jtk=3G%N7w*;>Qf
zE4K%~FXySaet7+<j~>5gY3T-iIX$O%yZYj<E`L^ieI4{;ZEu)g`hP>7qx-MNPLF=S
zS@7nk|FKO<M|6trR_K24x;brg=M%N(izFD|hbwR1{%4-k?H_%LXLd|Kty*;N&k0M1
z1HZq=_dUKgF`{#2|8L!CQ(K~2^l#0(&%CV8h^5^8dU7>Gm#~UJ`k9?y({^l`ekE@2
z7rV=k+!%ic-2Z+v<=n)DJn6Hg^rob5OS$t;S}ae~b~EP!?Iwd|T5l$cPLg&gGxeNN
z@nG?r)1P)LET}S>@#KBtHwih%6}PU2$ZmMvl_45s_}8FTKm3=}_1Dv1?5ta}`u&Ob
z2L-!a?~B(w{w{lRNB-{pYrZaf{qJDwm*sc+cYK^?yGY&S)4x+<zwiF9`SbU^<$0dj
zKG`}-_kY@)mYI9?tbV)YrsdaeW#ul7eYRwJ=!^qR#xHgX@tRc6yT0?w?#WS0xDPgV
z*d!$+*nTLza4hse!Mj9PKbiPxn?J?Au4~(AzG&}Hqx7$SOMjjJ?<%|B<<;(H(d&~s
z?|<8w6*#v-nWx|6`sVbg3s1f?|A^sNK6v4RkB&GG$6POMZD$$%K>f~5l?_+<8!j>G
zaVC_pCi=ZNy++WO{jkT;imy`)-ml3`-@w4YRqi-Tu6-42{X*TiHRa-^r8V)d#s7X=
z`)hm8>he|duhu;OcD3#Q2mk-mzrMe|Pc6~wcWnRCmp1npU00rY@I%kUZS|^ln+EyG
zA{8@zW4|Y;2YOHFY509KyLDOnWd`4Nb}NUtS+NhNRO(tWI2uSRw3w^D;BL@KVd&sJ
zrLKJG!Io8GwkdzVt8)J6o3rED93d8y2OR+kWs-8MGp|dT*cV=QHrspp;F}JE{JhTR
zzyGy<?^s`5zyAF7jd9kZ->h$BigDUTZ_Ru4X8*@uJ3p_BUtP2M_N_zDg5Gz2e17zJ
z|BAOek|dn{m$jw^aSQeF&#=9%yYHX=Cik)h8A5Cg*ZjhS0yzyOxQ<V*PdZX+q&4rp
z!RANya?LLKW=FP}Ih3sFEm(j1LBOY$?giHkc;ZwyT~^kT+IQHP^}&;`M}K7Rx8dLA
z!1H)VOaA>e$}hJwE^^%P<lV&Y!H%up|IEAO+OVAIhN_gw%r!mpp4ISMGM1j^(-%v5
zotu^SQ}mYf6w9mL9REb-FZfY#iJ?u3{Su3fLDrrPlLai;?XGC-D$^+pd&qdub939|
zzl+6xeNV_%&b)iya8IX<=6(L+Cp&JdO{o6x^ZpSYzBxa;j+QJxE!uS_mc>#nR^wGu
zT5iN1Ii6KndxEYnv<(*8Yp%-^C+_-h-;e&StH0VtTwJ&?C{E2umsu;))BEGk<mLMt
zrt-8F#j(G1eiSdD)53buqvt&5JihjYg(g3{78m|<c--4{{8q@uyvy_Fc7<!cOyam;
zC?v`w@&D20V@ucRAJ`_&YQE{7;XCa@<zLo2A`a-YHddC`hHx@4DKacz(h-*^I=dmb
z_q6P!6BRv2*UsSIyr1#D?STsmg61qrQM%|gO*7nHYiG)v{WZQ}E#J#4p6g}3-zjdk
zSJ1a&a#nm9laz)co6p|&1=36nDh$VrOr#cT=*523jr(?WYm1;;fyl*{W$j8_%+|+$
zM3~fn(ObFy^WCpHk@f%n7OYa<^)+*E(xTe>>+4Iag+JcCxFjX;@a?{aN!|Q;$v#()
zpW>hCqNyczjIHgIN87c9J0G4raJt{oc2!6D9tR&oE+NS^ehlAYGt!+L#rJRfd;ehc
z%1_~Hs?zU&>&<-dGwZ*j)w}Fz*C$GKPA+|ETU}wSIYC3TRhDU061T4LjE!HS19T3C
z8buT)ZM6S+SD(w#(XEN+P{Hh%oQ1`$+gp7?ERv>WS<I;F58P@p@y6ZwSB`ey<K9-S
zf17`2j{mE&9lQ3sy^VSIw`yn3{CNt`1tipE4p>ESH3)<pZr~T1%u?~tvYdxOQ>Oar
z178Vw$t^R^Nv|sv=yK^ldMuxN)rvJIw#@wZ`ahRI!KJB>%`Xbo&D#G$rPKZ2Bh7+I
zg5Un#uD4#HV0{1h|2eCF`nY@d?2F8b&@q{sdis2FZiEqQNXgUf#amgU!mW7|lRBDq
zcZc#;X)~mhn*6)}AeQ~kVRnsG``t^g>^-afU14caaoL74PU~qm{tEWYub5n+(64cw
zBj@lL<;l}p_-1WCdp1=dS3qW})+?8egE|wmB45d?{<u5q@PYIEJYhQ@zexCYG9|)L
zr1bXzcS)n3u>XO|@2jUIY^`2vuTyE(v$93(Rxf{UQ0da|EZ1&LTcLjO-G0&ZS*t%E
zz2wR4aA`{4f2TWLqW?b!eF)LbKG47sICGuw!q~7aJGeS`?`J-K@cZXNJ~qdfb+3>8
zeBQ^_cxhd4vVnpLZ{5vw{%7rilHt~~-nKqdOIiJ`a^|wX6NCOq_U+d^;@I`)w$A#R
zST@bMP0r4o3e|tMA6TvS`MK6@`O*XXAEgIvI{5XkaOkH%p<mzYJvP?delGmQUr9vU
z^R3oB0ST51^FHJi7_^<q^_|*uxk=%In$RkJ_HM?fQxAN&9`W0`sLeco4*OO|A!+?z
zPxOQ;*FI+3YjCsp*Uzqr&e}f$I;tMM7XE5uQ9g}v8vAWi%`f*~%uW6NTAt(2j-#tW
zzW;F&lhqRPzZb9m&F{_h`iu8>bia*SH@~Oqz$_J(j0lHDJ-c7m*eA>oesT9l$wLWG
zEkRx;>Fe(0Ee#V}m?Lj*a%W&<ILa)P!#O=seCbQ0L|s4kr%k#&t*aEH?>r8<ySo0f
z#239V|GGay_l}DgI;$Bg{GC}GSA6=}y8U`nudlD`Ze5nYQ_nlRWMAw>8$LVvA6i!5
zHhfSzkzRd|<B&wfv}-X%?DY#zoYZZ!?Ph#s(eY6Kf9l@qntguV|5tWq{#WVPw*B?~
zBl5O4-Yrqz-aa9=jyrj3e3tD0%^#keip+i&B&_`}%CVREKtWyn*CS7FFR*rAvv1S-
zX>sr0%#%M-kmGdp?oGpwn$<NzGZPNJD3S9h^S|}y=t-@wU3(VA*SED49_D_$$jQp;
zi{G-=9aXObna_Vq<zU|1K7E>5Jm-~k>Cff$X9}lvRDAgHM<hdxHClRl$Kty`Hp(#G
zd@{T5j_Li2H}+3^_x?oGu6V)nyZ6?AeSdx3`S_Ztbvc**asJ->wrX$D<P!=Dm#o+s
z!N11gJHG&1^Az>%%|DutTD#e+FfbJz6q>{E|HOuHF@;khCPyUgSy~RdE)kl=vZB1J
zZR5A2@%oGQs+jp2uRrx@GiOgi!_4Ds$umt3_N2@Tyt!>s;PV^H)_>OTHF)f1o}utF
z_WpgtJ5#5q%>QXyl+rE#ZHMVC#VIpa#vc>sn6*4FCSS5z^!>@Ohy0T#%<3rH-hcG;
zu`VyIoQ0oO$%~!R*X8ck%U>P$H-2BR(EHnOx2iw?yZdM$U+c8mMNAJvAAOl|+f<_d
zb^KnpnMJ?;%%2q*d4KilbE>sb%RjheoM^53^i{EWpNZ%6+_&E>CUbbs7N0l8>T|Z&
ztIMqg%uK(XI=9bF=H7XKZoK+_s}%~(`)v>Ou*rOW{P4)ULazJv;(Al%uNTY+_FS~_
zzFe5>hr8<Uot)ZdPMoUS`)mCqUM-JhU6Y>Ie)=xq(or$<Mr25!^}*h$J(s=h4!QOB
zy|{Qu|LM8l*!>p!-tT@B`EKg{=S8)_buCwX@3(UXU5?jiWmy(n9~S>U>g)QmXL3#~
z{ae=C%DVeIN5|pF>#`C1eArH3-7u+R(+z`tqUEAu{|kj~9=LR|NcVkMOOO`p|Jm`a
zQLA-7c()6A%Ln~5eBH0m*0eon&5cW*r9a%>FF98&>V8UCf5*v>-@OZ?SG>_<6<zUk
zzeGU5v#h^~il(~zJ@?c`zF*CG<k#~fu8-XB{S^@x4`sil+!#>%eN*?!O+E*zlb*I!
zuknj_&;0wDgL_Ho<lpbP-@8|B{&7i3l4tq(_g|UVmp|Y4_W6!R$8C%35AR#^u~1h0
zi|3QV{gY(&@5~cg;yC?M?Z59v;t%sTf4Toizsl8opO)&HNusjR>+B5b1^&F<Sk|&H
zv!t!iMlN3Zh}@-D)~#PtzpfGIXH8k#bMueHeY3i@z44d!#%F9SnDu6N7`NA-Cb>&S
zmFfTXt&@FO`Sl_1dYzW)s&LMr8WSU#>!%+t>+I~CCOhTl9ltpp{(btouXR2hteW;W
zkK;(MlFRLb#wUGGJBZ(X$x-p)qo{rN?Z0=Xm$g4tUteCgA@uau+VA(@TdMCma=bff
zk>%;ftN0E*h?e8(h>Q3Wnk19I=^qCx^Mno!<{1GqGbZp{cNh7wV>!oq*$EpM7<joI
zXUVglVy(TpP+xt0g|z+u|Nq$L|NWc#|NZ{`|4;w_zrH}{-~RWF_4`hiU%kF)DQng@
z|M*{Dr%wO>>J`(yy@D_1$9-SJQ((j9%yvYlz|}OH^{?@qzx(&_H%{Wde|+|Ic}9)$
zreuko^-Ye9O|c43E{jgr{UMmv6jbTc#cw##Z;vF0SQ_v1cKKyO$w5pX6f3l*@3G+D
zFVKCzD)X1%yR|Vd<^+9S{cop!QGCXv7c+CS+V%9WZTk^(?3hDo$i_=4+xYVHe;V!C
zusrJV?&rUbT1-C86L6YY#2~VVwQ!j<H=p#nTMOe77Z=&<7&?m>Izvc-GKFM5v#FYP
z;!8H_KW#d}a<*)C>>tmAHabR!g$ktBi%BN-YIv@+_heag;D!N5l2hB!;1B(m)aB*W
ztS*{9j$+vKLoBUHBDD92g_i%L^sCDP)toyvY_zGpwUlv7&Q00a`j1QdboS3>T;jo!
zWgaNSI#X|F>DsRsc81@%-NGPv?LY&gr0;Ukn?cvovsYWzKhIT8-}>XVJFoMVZ0V4F
zJ$EKGUe$6}O=HSF_`uPHQ;Fl%M7i(T+Fw^Ksxyr%<PKN8{U<c)+UlAK8;`PdMrbZv
z#?QJt;Sqm@V?tfSVZ)cHq57v6oSyzrRge7?TUu1B=ya#nVyB|**B#FqSg$Rto0<6C
zN8xr8yPss{nFh9;%{TbGg)PD=_qJTU;u{*yp?D~3OU`+QeRt$eEO-%M%KLNCGpTBm
zEw@a)4|FB?Bu&`Upq!VqUFv#LM*rk@1=8Gyb{oI7@F_T?<-_p5_eqS_Py2l<9=oi#
zmUZl=XUwr`+x8dzFSce+zjS4*p6q&`e`?+TrPqfGeiY(+mUVWIPr%_<E<4-`Th1wd
z^<U$mV0k>h_KLm!?mdT3?w{A}YJ2{Uinu~R#e)qQrw;6Yu)-kmf=vA6Uk8mpABbRi
zWc?`iTkF+8X}fo8>q76Y_FVNazOuab%GXN!AdVx8=2W~;O`5l4y5GtNQcO>K5|=$-
zIMq{ra;u1Y`JUtxzizSo+4V7{zP)zB{lANXwM_K_<kKydT~4Yj+4+92{t@BF|Gx<4
zUG4h1?w|SBO1oXLzPGM@K4D_uyMcM$8Gh-%Z=Bt<(>PhPybJd?*X*Bs`N*9;`pi<S
zl9!Ke*JWfAKXblDpYsrhhDLng`u4>>seSqqJ2S+C%rBk~{nviu>-4Mr*VaG#`m^5Z
z;F=9PEmW!*u6WI5W-#3FVpqTpP7hViZ&`;G1pnObw1{m<es(aUbOle<GMO`8QzX0(
z+eB}i*4ZP!=H`tUfh$k*r`k+xnY}+^=Nx^W-v^ybrC1m_6qX*Svh!Jd^CH`!C0u+)
zYp<0F{k88GzVOg=x&6OySubvdd^6wSAef_2!pG=fk@V)6_uA|*!2qFezDxg!`CMD<
z$#z#Y=kSli{&OYxbJL>_&Nh##NSEcwa<lvYX0>EbQd(Vnz`@hUBXd-Kv1R<4CHbdB
zM%VV9|2qG?X!li5reE71w*GIe>;FzBkNKQe&!+BBQ0tK0xTqueO3G&$LAy;$H}@^A
zFL+*Nvi`Y*+KMY*o#*V|6(YIn`m24sm;Z8ZF=Kn?ES=%}W&wi$+vdxkyfS3W4_fsY
zWC{LIEr0lIefRc)IVHSWE4XW>@!h+fHG%)3{xx33+VAx{w2uEdxasw_L-KQ$r|y%Q
zGvVzM>61GGe(~JeWv!?tAI~HC`cqw&n#aj3mEQ0ab6MHfpCUC|US0dK`d6=Z@H?*G
z+q^c(z5EdM+_x>#NqlwF$%37%=h9V86jlo)Ny|ShnfYAEW#QF|z|j07IU9eMemL!1
z^!nuA-`5wF{@Pl8Kl|>r`i4C$szO`68y4^^l>M-N-Hu0{V(Ou>Pe0t@Hn44DaBf<d
zw7y&<?PB899d5>5hZ#1!{~*2L=D9oj?#oYp{hdX^{(#cE$Un7=Z4uMWlL{9$9)7Xa
zdxL~Yq2hn;2^JUhqi^<B=sQWD{9ip^UiIU>Jvye#Z(sZUTl$=g&sEVYGqR8Pbg%rO
z^Xuo=^h_VA_x~89#SYr7Yc%|>_tb2<<*n-pi^IN#%=yZivRR|yrf|}J+lnpc+z(fs
z%nlbmu9_KGVE=2n+Oc3!y>-{KoE9Xld2})9$@j27j)JM*_p@2638XO@YSuqF5;;Z2
zLtbKbk;&h?*QEH@S8xcbz196~rzl~<8<hC}-`3FBET6ASj(@p4{lH}Y4xJU5r4@^P
zyc`7>4^B}N+RMG_gYd5Dw+>y?Q@!xVH?wC!*)`5a?&KtfDhGzF?(aurq+ZXR?|x$W
z$3-E`D`y&ca8Cd9@2|)+4mM-$S4tZaUjEYg#dIrOt^WB><*KGf()KozYk&CMT+eKD
z;{T=@H(rE@{%aF%j_RJBwes}U(7e2eUrm!5uYEf_#jZW-Z|GM3V52QfmoM!xa(?^z
zt$20JhIPV8EeBU+e~9dC{V@H_{yQ$xUu1q}g}(UesCM9O<>ux3YnO${U6$0V=jHj*
zI^|{M(QgND=KIOMnEXPv)8TEP{M{?NL;`O857O42<)aw!fAzP$Azw@4d;T9jEI9j!
ztng<3zdu4eKC3+qd%r$=uI~RH=cZFf*DqXj+<%gpb6%)ekpYWn^S!2Gxm7~1)|{Wb
zxsq*(fCQIG;W4(=kw5B;a=-fj?6(aKOI;Lf*tOv6zZW}N);gzNy6uu!VS8AamHAjq
zZxhr17=;^)mvGjeci~yGzB~SJ?TqbVf17SzI<o1)mY?BWw<lis9Ch)JV*2BE-}8f~
z8NdHjc`o!%S#0?`E}q|&4`R~ZvlzxM`zU)w_?-X#Uth|1KZxSk^?reuW~j%}SqpTw
z-%}Fn+n*M)rP}uCzdyFTC;L^s@85m%o7v)wQu>u2PT%*v&;R7T(Oxh9^?#nu6UX$|
z#m3njUK=~xRZ`6_P2tV4)7q@;<?p_WNm{rxALoB0bNAnGm6rymTx%W6%aR{5+VwQ<
z4K@({#5HF}-S)U-Tk-00-FN?8cl<oJYg)AX@(YVqKfm;H5{ef%H2ZvDOZ|U0D}y7S
zFEUEJx=}vqhsM==$<Z3U6?5_&Cx8Fw#>%9WBJuFn$v5ja{qu`|^UCLas9n6D(1%-d
zrq^}9+82L){hgoxw%4t7{q|~2+Y!r~dwxu>@E5-%-siJgq;j!---3JR3x9sx{qvqh
zW9!nh3HvP4zdHo=>Lxe2zyHW0`%zml{QKP}pI>b(N;tk-VA&$e*d3R|Ed)18)u`WD
z#JXsz$Mwi5=PycVlx6-rSRwGE`RT@;_SY2DX50>V<SH2wawbu0LTQX{5PPwg;M#LS
z2F?#HLK2KkWP4@u`Si?%n4TM?Pd1b9z0z@kal%2BBxY@mBV0!d-YVXBU$J3gz#+w`
zr&@hCmM`_^jV_h)3Mt^o_WZJ2(rn-UTZbHOPHuGL{^~h-z1sT6m#5#3Jb1w)v!dtq
zO;KCZ?~1YY4>|6I#>Gn;{;c26|KU}dQK4DF@n4O5)lb;$i}jv<Kbp1t{>nwUfzw6)
zziZk0>w4)UO+8!f|1R&Z>7Trn`AvAkTY;wcZ&cf_ta&ldzx>}G;i-&MjueJJo-)D5
zZh6+%x8>i?)z<m@{b}2rIn`&<PkX7oCz3YV9JH#5l9rbbS;=#ZA>HC%?Yd2Zn*5nz
zPG{VNW$v(NPQSOVMyi7I5aTk(P$|*J`ImXhegD1PzxDr{W{x}FHFeq#Zme3Xn~-F)
z_`Z|Vm;F)+_cadP)_wV~@Nm%)ZprE5`*f#V*>}9>tlCk|HGvcB3-9!&^DjOsv3UB0
zS&!IV5B#iH)6tq#+2~qrJ0r<tlauO4Tki*oe2E+O_pbhNZ~y6!mbnXSzgNeK?D{kJ
ze);SBliqK=mMHYU(%$|*OY3ayDf1+6+82J(xfAsNn$)YdeU&`3?O#mzzIyennnO|B
z-w7+WY_AIOzk0f$bM;BH9x(@{od-(iKWcR5U;5*i#U3}Qy|!B7my3Ti%7`k~^gJut
zWpz`>wKylFGseDYBl84V_ByE!#X|L)?%#?X`NBCKy=C5)v@t$TP(#61<6aUwcl*Zs
zLX!eN8t_GCZmG}Nefno18*gvp{V12U@iOPazRoW?xRcX7|9@mkzjk2G)>$8{Pw_{D
zXrx%BhW_P#^-lWphSattF)Kt$dX6!CwOo77fM2He)4%m`-S^kJ{8Fu3WBzMzh00ZC
z*U*G+*_proC_fH7`7)sQV8r_mE}046?|6inEAk}m_;07>a{pzFJM-38;%xsN;#npx
zXL?yTC;wEl*sSc@$xpBRpZs&x{rzXpx;*`RQ$^oaYWj;MwnxG?atrR9U!%cx;l??=
z+}_U*jSB16<|qZO`?bt?hv}88HO?DXzB8_@wN;W4>)@`Gwf$InJtHT$>igBIuTgue
zzVF_dIsM|?pZpe%d-hL%b^XQGXV*LS-kbdUpMd0nce~4rZ-4rkr{uA7h0)%P7d?MX
zytt%iTh7nNXFoe{DtbOsU{ZV>r%T7&L+3?!WVhY=V!I~n_4Oq?75cn;@7Fg8x*tAn
zm3>gdy+YvlME1_fL49d&8ro+#lrT0JvK`QW(RKgy_67z9t~-vi<lA4d)?Qzz`)lu=
zdF$U?e=T}zr|#9&r?W#}$DYq_z4iU|rC|M%ugv@QN%NFU4{~Py@n&;y&W)A}Em!6V
zHFzH6;GOuP^VG5{|6(sy6>tW43h6Q4<NLvA&A1_OXOTip#_Prj3S1m7Bm&yECEql*
zIsIhv4=E;xp66$Gtk3*XrXsUl@BLLZ>*BU<!Mm%!zmgJ+GHsi(?q7Voqro1gz1BZt
zqud*6s+Q=*W!BXEQoVXY=k~>oOYAPU)`do1;^u9iF_XP_rud1<Wez_wb&Za{)bssf
zvTG~L@^$qERcx0IHEh|uBF@Nc8kggwIlFJ}mGgDncK`GziG)o|J1$C3oAglK$L4zT
z-FFMVR|>m2|64m%VfC6@hs7?>yb~f58XvVUz;MQu{{l@jwokQwlcB#SwCCMU-5Rgn
zzit^TwlX%nV`q$E>wj=>UyMVs*Xpj!yqm2eu_r@rupW@v_f+jl)2^zOHk+;FigP7>
z=RH3A`A)Q#?vKykR%k7G!Q>+}k2%WordFAIcEkU_t-HP`$bQPPDEASw&o;fj<EE>J
zWP4C!+Ke|3r`^c$@sQAJuXtF(Je!Fjp+bx2#v?JtfJ=3ia?+Ybd;eV*W3&oC&E$3U
zx^Uln=cVQQWA=wnzq+z?_1!;r*LrN*BylU|k3*R8lzkryeLb641se1k7aU9NdXSNG
zVACFj7UP|cBIgbj&J&0fzkSL3Q5*Xzm5=9jKD=Oh+~UX8)Mci`DX`+2bwz_m3!@hE
z+0*VTD@-mO-Ru3DCAYQtY3MU=vCT5F8QOxJDoWR*i}HU?O?qp(j*DB*+%(F_w$R>k
zM`Uc#cBh#gd>+!~Z5lrgy4;gG-(C`?&E-(oc!zrl%kp<eON|Q7mM)vDt`p3-#LlFK
zW5e4#zQS+!^Zw?>NHFvYFmSX^6svx&x|HE9)3TL(6|YwGyEYw)-gf%mlzCh4+*`8o
zZl`{Xi&kRa;@5U@%m*9<xwk+2wdBw;uXhu+7Rm~WyQO~2sfwR|(6QdGWU2R$UGM6{
zP46$XsPX#xYp-|MI@hSD9=u&VG8qdd{!LJ~abfQMms6&xe3x(Pq8+!6dOc2HP>}rh
z>;-cPgZr~@E7MK6JQZvkI!!M<*``?&`Z?Kko0-B&GqITFO20}5{k@a6uQxfilkt<%
zeOU&vLo<)x$=#q`U67^cbNIKI@(V2<xy!75HoKjxR)_tH`><?b_19Hf-~EqF-d%EU
z^`(3M_WwGk%&*(oG380WW&NK$)hEB4{55;qcTVe8ZE+tNu8f&I7j<44N61}ql*oEE
z>B95<*-lU1@4mF&^}EX6Ti>%qGu&NDT(-xpywJL?NdL*EC;P+&q-7+8M5N3&_Px~c
z{k~`F?amM}u3QdDcDV~}d>@`&Wn_E0Wuu!=g`%nRy#*_bR_g6Ls{i^yyH90RMf>q9
zvKj4+8lN6r)8G2+{U3vvB?1qOZk}WOw=(?6Bdyf0!ix_!E;RqL*5Pw1!*mt<Jy%(-
z=xpXYeJE|^BG%}2URfUBGroq0Y?NE)U9?JnJ&*7-8|K>WsXY_lf8$)gI6*IFzIe*d
z_nV?pcHLg^^{XH26-(XB19LVvDIE$n>Jkb*{8e)!gXYBpj&h6t-M#;(-9o`CKFKg@
z-JLp{D4EZO-eH&BHM?S;JpSu1v3*LGjX|we<ki-D@w)G~=&rh`TH^ou()!MgE+1GW
zBwihMXb!Iz(_gi2kBo$fcxYjyW&TXx>5{^IP9E!G$}MXAidzppe$KBF^6<dM`en(7
z_1?FOEdAOvdxwpjP{YH!F>6%L9H=(;yf<I}f7*Q8>rM4MMf#Glzd1KmeZMf5dGCd@
z`_7eq6G;%T{Gq!$B<_8p*Q31us(%z*rYASQfBLmDC4ITug2ha8(%c>YYUPwI-Tg-3
z^6iJWzU$u7k2|;XwczTgJ?balACy&i_eH@ma@y`gY>V^WAJqE&N3-kXxyl1J8yHd!
ztaxD~a?RVTaps=g)#lf|0#C#(j5pO2oyNeDa7$LMah>5$b%Xnl*KIvMYlo|#EHBe~
zv1#YSILd;TOHRLgQT^hDhrh*K*phuiPs%Ua{Kiu|x@hyhvomM^F$!$?Ew8`gqWb!m
zzdKZfceYm@{GRZ8`rE^L`l%b{lx%MJ7c9QqWE<!GdHeo~@4dP^?rrqemj&PBTTZN8
zHJ@eY>8H-7(^q|HEI%o~zK3&yY@I>r4_ir|G9ERV`;si|flW<K3%CD%zp3}`_f@+;
zve_7=$<Oz-o$WP0?!RGvrTqqFwi<=(>34VE_fJaNE;|48_tGQRj-J*n4w3!7-N{Bt
zK`3nB)fY3%Z|T;4zp3{7-5$Z|X>0yC3aAxDKX6z5Fz@}T6WiS~64t$4{bKXJbG4dl
zCp1|9udmv_dsqFBVBJOgj~A)Oiy!@bze(p=`l<JuCa(D&AA9wC*1wyQ`uoq6rAu0L
zvFzFUMNiZ-`_T4xIj#T2ls;(aOqVv*eZSvqLtFG)?&^KfU(>T=R>yw7;a_qy?)Rd~
z|M%N=?~YdLuHW~yLe*r0UiJ2A_YS65SXr?Z9o(LObGqV=tP7=&GiQC`mvV7=s1{-V
z|G?yL?-mCBWA%{JJvC|hzQ}E*HWsUnrSTP*R<N_wRXp?9cEU!g?^2_gbI1+8MaTce
z7YVFWNm;ddnfA7W4F`4Y-Y+g{+hF<Sq_XysrFr4CnT@NYEzhS;nXeRC#33Z`+vn1b
z6T90}wmPdTAFZ3<KdVu(Je*PdEbsDDIt%5h7q|uQ+&S&Zbgj8lJ{85@efy+iuU*n}
zz2C8QO5ZA1PP;mHq0z~g*E>u%wK1R4nzrzZh-y^x9<kTFcLaW)VfWegcGtK1VxEF)
z?+m&O-{q*>lj6Gfh@EGxzf=9P18Yty9#a-AIG)2C-unN{qxboj-_8BEMX#-5X^(Tm
zF)3AX)f+lj&lhTihPK;F72SPx!C#{M;fHN^u1)hYiq;BW_|tu@uSaNekgECVF6S1e
zDGjoJF6e!JQ>}F~bz{Pp!>z4qCI_oxQ-6AR9~X3t>g?5$+#Ay9W0#lmDtqt4tL~qE
z<jKBXETJG0boYC0^t_#=`Wtzah19-Rzk1jCQ~Iy)?^pZ7--@N?epmhe+gwUOC|dJ~
zP!scCsrbo#cFT9_J>B>GjO*%nr`ubc)ci$D7dYNAyX?(%g@3Vp{HZAwpZy|SZdm`>
zu4ymWyFY&3X}gdo3hypXVwOB;x_U8-`;WF+T>=waj~+90pDwuK$I8V|oK)Qo=Efuk
zwz4gJFW=r5vSz`QPL=AG#jmEFo><-UQssR>`JwPT4;P+`+Uq7DAQzEv{zbP4kNj1O
z4BMx_(*NsU7fNt4nzVMIsP%~>$A3(IB%@c-Cc5I&cfCaSizN-m)fUL~_x0b4Y-P2y
zGVl6(Z^5NA&!c~`3VuK25O%I7o3rTC>GDV3d3WFb*>dMhz1eq{-)j3m_^vv+CGJhz
z^y*W$Rc^l44wOCX7qesY91(vxv%evy|9*RTbLEkPA9LgHT$_Ki`@{5yb4++TekQGq
zpKf|Y%+7E1=V{-Xeyut8ul?D-G7GDnwp!1Ri5G5FWm~;J#x%^w*(u=3n{^)|a)11u
z+qL^>3g@fVJ@fX4?X2t8n(wN-_x{(UIqIvKs!WnMh3%iy{qV@;-*ti>exj@P&sy(Q
z;~4j4?n{*iE#<2Jw+L{kh`Imy@gwk^`JDOvPg)Jk?H&gAmb}@1{>S%KwLd=hJzZeZ
zzU;=<v(?f?Zok`azCP+Sr?)!f_x<@*t5!Qjy-h7!<MvC}mv!o`hdaxR<o>?4o?hK{
zPEhr(^{uj76^t5JCa8&Uvoa+*8YSqs2(unK*687*qR7eObT;Hv_O%|B``eQBK5M#r
zul7$|y+8WnJl}uu5C0YaP<`{h<Ms;fRbRh{y|C|-Df^?gXvXVZXJ>_8`t`v2?U(zl
zC7<iQFzH$y?CINaspQSIyY4^gx-V?`-v20D@0O88J#)e-!Izby#+BJcJLWhD&DvIN
zF>iOvk9dXlTusly<-7;%dV@`EZ`R9;GnUu?V_?11yCVHz5_{dVtP=OUx1l%PPT749
zHfvOJh>I<K@Q`VCTHl0{ldg{>6c1^fv$+3dj&8os)x*xx$KI#;eBS>f{^Y%-y2>KX
z^M%$fm24^5s>dU{b-fFp!efWCQOymEN+CTOqC%`qiH-&r48&wudqDSBX-RN%F&%Q4
z;L_7ucKVssyUtn9SFJ3aa3||hxbu%$ogZs{C_nyX-|=5_bzfL{^nT8z|BfHO^jv81
z>zBLZ_gK9Ce%{EcWg>4z#qZiX#&2o^7GA2I`CLck=;}?&cijrfTpoLm)n~UZ=c*_B
z{}|q4Xc2k;+>%-Tg)5_5;j*tM3J$EB!jOM~x4ZP`>-(SI9Nb^-Q20hZf`P@_X|-Re
zZp*>H#~2=2eyy9DcPO6Y{v(c<{j00qbOn9*w%X<P!%CYFX_>&~+r3uLY(J$UaC>*k
z-%B0L4I5-~xYs7~s5s~+_e8CkoBhl<$o6sC%3Wt}h5ycEZeSFf`A=4@AZDS_d!c73
zpBGw`1bR8ysZQxzp<eG7yEOB$lkS_?ZMXgR@|f{VR?iaIaQoxmb*DGXyfZmWFmvWf
zd4@$-&lJlnYg%<7{K&ZpA_iyQbFN9-y!08*K9i8zn>-uO1nW%QR<r8r^iMykO)h<^
zT)On$*SK4$u^uyD9!uS%V{JP%EZH=5i{|XrFL!+0HN7Wy?z-19CfA(f59RyyZ%<t<
z#ItUtjBbjVsL@HiWlPM@+BMxhJe5_zu`?&yOY+IWPi<bh$FkR6e)??PZoU--)pi=s
zAJsmQT~eA|Wf!h!Q6V$$zi|K3XD6=f7g&9r7E@EamSN_572VXn3$y>fT~*K(;P6@E
zd~oB=l~r>X6|PKh6VVoAW=v6t&=BM5VtN>`VUCv|CyPsB?(we2dryR>t*<(HO!cX|
zVNKeq63h8_{=1&7tN+x0vHtty<4?k3?>E2t7yax0tJNRxXU~_rRde^(r<>eQ7yXkH
z=DSx@zuzp*UG2P9#+_}?JMX6cy?LxAJFZgt&eAohGopXJuw_^mYU||ca^(wKUvTfy
zFaJsv&rg#2l`gaOo#K*{Q4hpcHnwOLef{FKzVz7DYbQ?3v-vlV!{>diy;k~$dmE0f
z(=pX=3A&xH!n4T7?y>gY-TA)v3N`rHVy83D4sTw1?9ZCqoE@68uep{iNn3R1!>e1>
zyJ8M#_?<G8T5&$#t8jMcD~AV+av~G-#JB`o8WS%h=q%xy*zr)2!$V26{Mxf0KfgUo
zjSX9=H`O#!TlA`@46oIjiuM2gYyWgVTv7RVM%|15J+C(3_Z5A*!)fsoKU01Y)yr+3
z$NKNMy{y_>_-pdr?-xr$-*2tnkPxLgIYySvgHiaqvE=HaMU#2fajo#^T^t^-ul79d
z?YC!v-~Z1(x_#x<V1fMg@8`+hw%vUA+o6k(SH|v>_vQ_mz;5|0gjqPgbkeWXLl-PR
zY56ldtLvA#t!$Eiez~6Q!=t_bK6<pW?s)q$Xti#wM5_LjkUhmem#eZ}c5LXfl(AYG
zc{zY1(W6(g<>CC>e;FQtuCUV*QEcsXOjd~S64PpJWlY%N;vy(jyfXUKN^L<cuU)h5
zZCf?-*+L5${<W*W<~{!ze{FfnZS9+-a;{ewEOrZ9(PQ3NSGTuG_Hx0@KUZ7k#eBJ`
zpX^zl*PoW``S<kHn#nt}-oFZ(!&Lj@zR{nmn(DuI6|z&B3m(PBrA54(BpK1s-C3oz
z%rna&z|rrslas}|XO;}Fz5cvf#J^JIXOfoe^e<uUUuQm^q~v%qd|GG5Rwb)gbuR&i
zJNM=m%atr%HATHLYf<OM4xi`mPRILk`F!r&85j6-!=BC8k55>4vgWnS@`lot7ye4B
z{7N!#i_$oL$^ZVh1`9@okSQ8rLagmf2?rt!#H?DG79}kRQ4^AEjSLTe^-*MXaH#0@
zf?J<gs5^fC`~8aj?WOx&>t65LzasI=`@1cdawZ!JvNf~Lo2vIm|7hfu51E`>*tnFh
zPTm#G{_T&asod@jD_-8ZB>H2s>MnsE<3GtcpY}IQo)ICva>w&KujQ{7#e8sm-dobJ
zOZxGWBz^OjY_;>I_g1`*ooljdqNRey&w~oOZ+pJZoO^R!2J0K2wlfP&78b1gbbu$=
zvMT%i&#J(j6*s0h{Fz*-^J(Ylo3G-TTHc9Hxe>#eC~jFGlQ!j6|H*e(w){z1$j#%*
z*x{*sRAN!VcFDyL%NY+aFmQ1>&QjpuVy$1O6MJ9$|H=Qg|EK@||NH&dq$u^R-~Y$|
z|1Y%u`>);m_g@dnxqGd%M)AH&{QA1?J4{EE55#hOU@;N-p(m2h#QZ#KgD!{WVW#dw
z3x0Plkvd>i#_CYJ^?||h09)D37nbZdTdcMJgC-YC>HA%i`|JMldL`}D|KhVhY~$&x
zL7I{c^S0Kl?fW0Ux^C~ey)iRwGqm*G?z!EFago2HF!f#A&wvG`m-N0VzM8typ2Lwv
z!$r&Ce_i}z_ZgG>>~-cp`WS3e^H}Wu0~QUwrrF+0ObqXMX?9gR%~<ZXQ2l|S{FDGo
z=Sl;U&(RN3tnwypTKHtO&XJ5MlAh16x=YzE3X0)ccA@;~T2H~8-D@})LRg<lKWvI`
zur>|5pSz`MnPrZKf|CO;KQBjeW31a5?|VO+q=FhM7#838vwHQeSee4t0y<i4cNcA8
z+QH(+e<#mh^6`|n)Bl}eS)zXbufP6fUE_y_3)ZHtkmmbp6LK)%+}_Ok1MdO^4gO2s
zubQR!aWzNg)Q3(>Ih@aMZLjR$Gvl@KeWPrC&f-z@mL_i*=?>nzLJtfwx^nCnocnZ3
zc-7auzHs~a@>^T(t-o8f{p@}F6KkT&zZ}e*@^9CgVh&~gt;M%r{$??a(@j1ip(Cet
zHqAliZCA;6arsk=R27Atx<xA{rKb4rJ?zAGUPnwau}d;>T2h-v`Z1vkQWNWW9yK=h
zNAKae%KUDQc&uM~Ox5a+Yr7vRYrpTUDt;#Z(O4nyGfTHk(hX(PH=GNq&I>;C-QdV@
zvoOMUMajA=@3~~P=FGUKkvT21X~Fe+_uF^&Xn&WUeyaJN)yXHV>Hl8tzQlVe_``}5
zQMx|`Ppo?#;;~-sMNfHY)awg7D{ij*=zadf(q|g#BF{cVcrX8VHnC65UoI|QJhCp+
z+Uf5B^Jez$rZ&cZYv(_>KK)DMOr7LA4C)SFLOD$S$>v4<=>PTb?bWs6s!<!e)K;$#
ze^QyJ{AcRv{nMXtaWr{rH^|1!$m8VkIjy{{HSPY|_wLJo&Uan#FSn2>(PIZQ$BUWW
z_u1OIHkO^-_gF?N%F4t@&T2x>DTY(bR+<Hh-484-8~tQIH9O{4V&k{O%PcDnyneBM
z@)xz$YW^V$w-)c-6SVLBb?rr|Ti-5Ov8llGv+Yl#lg7GnQ`7!&YYSuxR7h{w!}EJ`
zgrfcuR@2uBOIN*Hw`<8?u|2JO!<Ia{`t<9{l7oue!j9UxUlW?GoHBDaZQ==>(*HAO
zw#!OIwvWH(m-k;WN$`?-ovU)laQS4mPW$_ZCJ96(3W#q$Iwg<q%j!of&aHm?{@?kR
zpYC5!*0Z|!bozgt#2ud!mIS^!sI8OBdT>>na`@@Ew@o=FJW<nEHM@RSK9{PK{q+9V
z7w=2Lj|41TQ7_^fv}Wnk-BCYge{^}gW-~)ldi<}S<-4*1|CHX-xU}zKX#T@DQir#y
zaj85$?6!Eb*tOr>mqm}rq!?e=QOI+uxj~J+ZbFDNr?{l~+ao5<%K~0=wEpwYx-PG0
z(A`ldGf|OEUj5v3=e_KQV`5)Mx9%-A`)kLg_xJt!(y3Kq$7}z@%<G(5wddBN{kx7n
zs1tuXW!9Q~tR9jZ`nZg~<`>3Vu9<%zfP<x-Nw7g+gCWzQ)YEsQj~Qo7=WO`Ex&2fB
z!vuBt)^(3$!bCir=dXFZ#AfO?f#PmIGsl18M*}A$NIzZv<l)BM;WsiKKABP@eCvbH
zn^5Lb@!tIZ@vUZCB$*lYOSe3H?jP<jFQ)lJSlFjK|24a!{hsewUuGAte%OD9!n{-Z
zADmx`>?sw@6@PI+YHO9v;~B4>s_#otwWw%!DEx3|UZ{y_ow@bml}%kC9!jVFM?b14
zv}+eKm-+m^R<l5U-ErGw%ZmB_Z<g%;o_qa+i?`(F?rCv#>R+p;icU)0_-N~Tf!{}Y
z+rB>tl;ieU?e+aVQ|FP;(o1^`8k&p`b~a7qw_tpJ_~4-f3_Sr1$~NrGJFXV?%y+BG
z^=P>?x9Ldwhcy8V>Rf!x!G$I7o;F->sn4=wY!F^@Jox{g<!ATzv|mbgcr;7J#_Z!J
z(IavW4chv@A5PMeO8B+TO5OeuSCdOd`b_2XC*S7l^h|ua-YhII{>T)bl}3VBzv+iH
z$8mbfn$-AqJe)1xB>DXK&0i0H+LY|5<Emo$rC{^(-B<m8Tc0f2|2p>S`bqX3-yGd8
z*M?7AQ+&Xalf|X@xy<QLQ{w+wX>B=Wm}uMk@r%e=ll7A?eETaM^+)vCnG2qC`MznN
z49rXaTN<~a>R-|N53ia38C>6e_wKTF*FT(nAFpt*r}_EUmwHi3Pu<J=|9{x-EMv9R
zKmME2RNeGd%x$Vi{y#VrKY!5#{lJaeH#K|q|J>Xm+F3noUCoof@88DGYG?a$@SE0#
zCKjfs%ZK|<R0?h7t+(Sj`$urL^!1l|@zXA5&&}TVW1r+i^Xr{zg-tir`%Zm5S$@J%
z;;YU{+lxDkM1p-aZS3!)99<%s#gnmR$>z?!W%j?c^<F*SQepjd($jBQr<$K?@Bf_a
zW`DK)`tER*&s;5Qw|t%8C1xIICT?om&@tn?zUHk77jyHn0zY4{v8j;ZyZxxVK5t34
z`W@*{FZ!0U<%;HJhbff(s%$#-N{ao+sh6v+{$kvzt6QM0<gxcu0%v5_-Rp~5EUwS)
zZd>xbRLP}n>(USNd1F27euX5c<S)MeAs|5cO5_%Wf7Xu{#lO8?7bRsKHs4z~BP@RU
z?)9Q`6u7obj&|YsbLq5^Q<Z+*zDXA!1*<>*f05Omp?a3;l}7%Q%<l`AWNPMCru^?)
zFFoht^6!7{l>Er64>P#4L|X0i|EQezI?wN4yJuWoaJliljcosc_D9q2o>M#(A}7SH
z)%jZCwnX{ur>oR|r=_$?u2tRXvemyfML5x`$=~Ss>EK?~ox;hwFT+bsPe)zQRMlYm
zoX+i$uJ-cPf>Yh~U)4X={t8_lqO$gK%}zV}*Vkgd|N1ho^XjXw&-VUaZy73SIwx+=
zlHLC{wC?wKwdZKj<sUAt%Hqcp`}XPo-IF_gLGZTL)n|+JgKnJK^f!L<@)Jr`2H{md
zrrk}+vATCqPw}9I;UgIjnJP)GUx_E>yjFjmEtTggbpJ}aXK}Ls=_Pl%<-hFXxP7Zv
zxNLou+rpPi)?b?aQ0QN_`rcFyLCvy1ub!U17HZ+(Iir8}z8brbu)l|F_AXs|@am`k
zKYM)-C!fA}>!{6y)T{rTg{`+#tiF0_^11(inpEZYpERrAS0k4({eQ9NyxoNbt@DbO
zbiO{_x<o#1%h#oE^H$xy+9hAUCHCzK`O~j6HuTy)FMJg`SwC%Kd&XOj*#+PC#%9?p
zS><vxYwfnCCJXC?y7hl%T0L)m{Y78vYnEu&{Y8EE*91+z|L13Iw13^E*!@xd-s@g-
z)~g<~{h1SHcKSQl>7s=5Sst@~e${{0E_}88hm+04f97wu#xF8l9~kO&=IQZ7-6wuN
zKPO$d*1wbc|3|qsS9WrVrfTK=UwGGB=abOX2Mi3{>5j7$I<#18uj{{vi?@G$cfD`c
z)2OHB>%zld-(Ow3CcJ)q+1|39;cNG=Qhua$b$@iYYSoo{d-uy&Sa^tfO>cOwQ@Q^A
z)&~vg`}PPMUH%bdGDGb6gDoE(8D?HK3qH(sw(r1u%?py}oY$UgI~HUfwDOpE&9Am(
zHyJkJOul^_o>#uT4?SqeocbXAtkVLMfXllNs~$W1@aE|gkE>GeeyLj+A9VEB(Tg?$
z@9Nd});sEdnxFrEYwG`^*iWZhU1m)7n$OuU<x{%u;IW&{PYM&he!BksiRi((7kl1j
zOnIc%JvHvJYwyDkx2BZ+4Gx=B7s5XM&xgaCmh}B(J{~D}cD2}(E$tINT~T#C+ATfh
zQvS-`=}c#jY&Z8-6<fUShxHB9S**d!+LNXyNgC^#=Z1FKvUhax?RwMT7JJ3%&!w%l
ziy7B%(C=BesN>wCTzjSijmvJ=J0*sxo-nst_|avx&3x{xZ5fk}G-^-%wq5z{#wpAB
zw0>;sZuc^?;h!oTR1nq@w?Ha8*gD6{?p2jRfBnDm0|yQ@w@%#u=Q_7cl<>4I3;*mB
z)(bO}_!pfLac{k`D%a$Bo!7<N)Q-1(wyW;jd6w<Pp5{gWFMoaY)2{ya`tZNiUthnC
z-~IJvduG|M^@mvmPkoK(n|4Vw#Uy?idsWQgKmQlJob>(iyvg;m|Nmw8>c3zkGmG`d
zbjQng4Z=dcZcAd#TD3KV|M@Mm7wd%_1D6~<o%;0Yqeq|O&ZHHJ{R`fDB{L)@Cg#S{
zM{f@OiHV!=<K2yIxu;TgADNP~ok?Fj%U@hvTzsb8)SB|6w|h(9_Upw6Y&&14xTx^;
z_Ps9?vx0yB{kNpX_4B^+>)v-CO$i6BVZXj>f5iUC@YUYgS6@BvJ^APR$>~RqlpH!_
zcqV^+c$9vuqV3C-?+vCzEVdO|{`Hi9_`ZKv_lK`L#<AgnLcQ?HCvQDd|9*Ba4*y~u
z|FYFU<&S`Tu2zM1TC~8HB?s+e{=8x+IPA7WVQ$*IIs2aMw0$%oH0JBpUqWBAFNXix
zGHt!OY1Q7kJqikjk6VAdpC5Q@@!rJwk5)(R@KwFIbiYWWO81%KJN<^X`(JScp5U}i
zn)P(2ZA^`baI3Ir%!V}Gzbxx5i@&a%xi%+F@ta-n7qJfxt&1CuomqQW_WZP>-@6}W
zmjvdpT?)0A{Bm)p?d?l8dGXr3VpoC^MSK5FH#lTH<E7_EsXu9f-7+($Utg0GBF!%T
zBwme+`<Tk{g8_!KYiF-lzx6lvWQkS0_v@A2|JP@Iy4iUkxj0<?_50R8yQj81lwUa0
zTs?Xb*V#Ki|MR=4ia);2-m}A_ShJr~Z$)Za#<BCf!Fs{^{}yljwtl@X`=YonlE?B&
z4yy<Js<jI^^-1yn<ilSDl%xe8C7QQHPycqGHGD$8o%4dH0S4xmZt>}sH~qcgko=fE
z+*@_#%9qh4ohy4Lnua+ENq5cq`|k|bb=Sm&e1Z?{niuBO>NQ)gZI+l3C!#QK!?YzD
z$zi_@wfI+w{a<9?Vk)-YY5LZx{Z~6}Bi4p%M|ia}^_!Y{@$5It*4Siw`|i`<e^h7O
zxcT%D(?RyBHuK{z_j<)dRES<#v`**T491D^9&UR?{+-^xYwPc|0T-GNZVRef8nZBW
zlg6DbRsK2^Ecs!N-kC-V@=Mh1bz%Cq?S)m)%IWuOeKL)|&B?pB>zaPh(fss4i$jUK
z*XD}nv(JCN&wpxl!IM<O?N@dshwSsOz7xa~abREf--Z90pCxM*PM^yCQ}v<6N6YEE
z`&+#ld#b0rWysuh|JxtaG++0B@$rQ$;(NoRqFFn7`Zi5}e@E0kDa`J^Yp(2$=IQsg
zl(p>neD{z4Y5zX1O`+L`79Eic&077)Ws}xFRpG7YBWoPuiaV@WSw)U7cN1S~;d$y|
z<>l7&V}5BmM`R8$&GCFHktDJ|K*`EAmt8z!4dbz*<LY1fI6~*I(%OD(qVf4x+4Y-?
zF9o}(cE0b*|FGo7{$*aC-}ZNETXd)YPWbj?lHJ>7-+%I_R(%V&s2XT&`uP6eU0;4K
zdgXdLZ$fj3TFKYcAGh~7{n~N!%IQ?Gw&a|BGWqP^?GANko_zW<NtcE1X$?z}v)~u8
z!;voz8Gqg<#<S$H+O&f=vx2HK=S-MT-d?%FVyCy??qeo<Z-+GVoxh|NwqM16(eC~4
zguZ^Ae)RvRgBndw@7{e9e?!%H_uWtG4?lOQpG_&0KgxDv{Sq^=B`+qVtNlN8;gOn~
za^{+4C1<`(tNZWqkYU3AJ>?(bS(ZLu*nZ4=^`p!0PtSKu%&*?J@otIz&lYw$6_4oe
z57xWfnsZ_5m*;Al9JNowyr<uY7rf23f79GWQl&fcvSOwGOJ&BKJIJvnFz@@;$>(35
zyj=alnI)}SNjvx7O3uHj7iG=QhTpCCoqT)!)mCvu?wAXULd7jzckMsOKl%0N<e*9K
zcFA56?OJ-X=>EP<u6e(&|K%)OdPMM%nCg}k6@IO0KRP!^+|!(OY@Mm;p?@<D@0)h{
z<Nc!J9VzFx$(ep>`2BwO>xX|u*J{+K?l|*&`m42$B0K8tuRj=HR=I3~&qj8skL*T5
zZf>s<w|2J7iPHD{^78-n^&)$8cJDeUCil8NSYOQX<$CU<X?63bMW4P~C3JVAc%_Y_
ziQl@pCBj1O6CdoiX<_-FJ3&Luk+0J|u`}-E#v6_<Z{rr6&wO-*@22a&Lr&cM=?;;b
zS__}{&bE42R?d);x2LeQ{+r~n?sfZgbG}|(^tI~u*LR(9f1}o~-fMDg`qfi+=FHgr
zoW(k%Z<&Ano3`Uc3-_Is5Z-<1>HpVXDy&uS{(kSDqVRBb@gISQyL)6-|E_OSKj8i8
zbE(^GvoMv$xrtxY6<00!{bVc01Z9@ESuO6Ktr}*J8=nTT3ZH(KqN!)DCm>w7=4ee#
z0e9q`WBsw+J#FEydoC2WZj7ELcIw38X&N_QC$sLjR8qGoN_xtTx6|hxiV;4*61V%u
z!u}GmC;QY_8vdH@^ru|4CqKR~`I?u;eU{2?yXSO;{0!|>oxg5Tey;TEZH>B*<nMkv
zlr0&%(dLiTcmBH{o>`hC&hI~K(0h5xmj2B@lyc&1mJ07`RsDK;M`L_T(T^tUz2Dz{
zoqXJ5HiwhVqwf<O+-3<^_C?w2|69<_{8Zbr?A_8MQd`31qyF5JN!`oTHB%)v>u}Xe
zlj7wUDsHendlu@{@NR2Aw`kN%xBvD1>Lqio`t5V$e))Ef6W6*AFIE3m{62VVlHo<R
zb+Nmit!3eFUq5Zd<m-<f-v8dSC8l@h+oe^fzZpC|vcc-&`Mj#QMEm!VGK&q@%+q-E
z^zHV$-=Aoxsm+=l-BT^RHZdb$)~1R@VNt~<rt@0vFS0M~`mS8lQnTgWs+0K=h8t^s
z80EaO+4orOfB6&LOYh^Bv;|ywC;jb}Q{Javzh=GrePaC^;dz{8+qk2yJ>POx^v_Ds
zzY8Liw>YPG7{8ykCi%>n>c|-lQ<5HU{j!g1`}IjDl;xG!YgfNf3wZ0<bTi9tiBC&Z
zXU;y&Rk{Bkr!CxY{jP7Z$Bd@5sQa%UEzR8fLOVIIzOltfV%wdTMIJK_W^eZ0e?&k;
zG^E_|y_CM*BJbsMD(C4xopSH5W5Tr8p+8rO?by!w<4BC#yu^e>QQ`9v?7xQAE7sP8
z{gvIXxOlPf+sUhbzuywhb#=Gus&8E8vnH4RPES!R`gQKb%%Hfx-u81Rr`|j1`F+;9
zWaZj-p=Q!`zd25aY=7P=UukpYe&LLMueq+%WmUIym_G|z8oG9Je21FN<*UZ3wf|a@
zJYUxqJ?`(aINANy_VrxXp!u?yd-lDb5XoYl`D50vllvV-%J+9qKbNYu{8!bkx!T8z
z|Ha?^mt_0j{q?+cou8IJ-f7Q${mZ6*DW3N>ODCzF`*VzCck90N;~sHQ8)mvr^skpV
z<$uz+C`vZ&pI*Q7{jYtKugO++=e_*@?R))&_C2>V`u8v5Rui_Jpt9)i(++o~$-)0;
zX{(hkWnkbaaGa&c;l)}Me@XmJ`kp&;b$@Mtb$@m2%1c>O_eboyzE6AK^>hFKMy-3d
zH}=={?0Dh0y~#WCuKo|-&(hM`a$?2G*ZONWZIe)Vf93lFw&~|p_HVm%y#C7`JJFn!
z`7ami_}u=oV?v3LX6cvDi{JbV<0*Qyk~^OFy41SA5pQdqHpW%{a`EbY<+48Nexr=v
zhXe(!!z_zS=4Uodxg7stH{XXj=Qa7;W~Tmo5&HG~rXPO7<!y`31#uXQdy3Cma$mpY
zev*Wydho1y(e93NH`i=B_EwqW_WkAe1wM=1bWwEG+WGZI-_!40Q??!6=h?XGQ%jwL
z%+DjA<(?m$*7&4plcU^+><_0OF-OVnKeWamz&LTP&2HZ94}TrYY~w7Cn^^i{$-Hle
zQ+b3Ku1NZnnJwF~v*zT+U;liSqK_mA+*=iDd$}Ot|C%__KXKMmluv%Dez(-+RP>MC
zUyHxK`yu&#{m(PUKk8jm62JHPxws?$<Ja1UzOI^dlPA>b<N5zOd8<~3PJNx`QaaxV
z)L_+r{^Qc(w$)cdv-BghWw%A$%F0~w_k8h>Pg_5qbG6M*%@4l5_~@&x>J^KscgL-d
z+r47f*X*6LQzF@<f7+dzZz3?=tn|_c)zA8&mlbUVUk6oPnXa<^@0u8%tzFK%Qo8Eh
z8t=a*s$XfB+I(!`+c=}S;Wde~J{wi>+j(>ZPde@*|7wL%cKcert@9Rs$WzLHKiPO%
z+}5@Ld6iH$PRVzDM`oJ7I%!+S`Ff|q(^Gqcl>Or$W<6Se^3|((!H#>^P4|gsGoS5a
zHT#>yI!|5s6-B@PR;|;%wq&<{>(bb!!-1AF7EJZ&opOCY-_(QlGgVCPznNfqtLo3M
zyY*~Bhm8+4vAS%T%DhA%cj2pRS52PXl~@<=dF>#J@r=Y<d9S4wPISCl&bMWX!PHmQ
z#c99JHn-)hKl$0!I!s>a`pvM6U+t9(WLK$dOZDG-a@xPBBRdV{XMd@ies%t8m#V#a
zt&_eQtZCDX+v$1u&eex=X1eWr_3O8ysf@#3Zrkbq1DUwbw=Gufoy3<t`FqeNh4)ff
zHGZed9{#JSy1K+t{oX3GORmcbXYj=K*75x~cyQ+wpVn{2>so5EQ&J@y_4mwQ!et<F
zJAFsz$<jFWo-NK(|FtfQUVG7c_t)&5>R-cNv=*Iyw2-@W&sRe^ZHvYq{c0f>{>k)l
z-sildDSmOQtiXJgvf%6u{71e=>i;O%GuOjtV$$3H^Y-fm9{=F#(IRIt{p%NxO?B)4
zJAJM$>-<^QePz>K=Lu)*S)WQ~aM!-BWB<tA&6DBZW^(^cS5qqUvHS5XTT%>pjZVzW
z&7Y(`TmRzQyLaxbnar-dr@|*v+`s&dhx)5^y?^((#r~Mq`(CIqPy6WAoIkba=Wg}w
zQ~$rvvM%nap?<q2vsB-;$WN;ymwps%Ic^uFuc*?JS@&D>pU_#~B`e;?h$haQnRy}L
z$eGPobk*M<)Q`FFDx~<1d7=5Q?=RNgEuH>|`#;C31JnGEbew;1^m1LDn&d(LIRfUU
z@^h;H9{$|1Vt42>iFc2-K3e)c`Ekg4@BIfQZ%vm^uGEsa7bL?ud;a{_Gfr-`xUzj=
zo%$wT#gxjO#R3WSwl)1zJ1cj#ZoFx}`<ZUh->je4mc&_H4bM3yvFG@`zQfiRuAeZ9
z`?sO>{nK|w%T4NPgg)M>)P3-sW%G=#pP9LFp_9ruT3)KmEoW7m&-e6VShD>ZO_%e2
z`=Vw|*I)Gg_0=Qm0~S@ke^kBRH7{xU?vFRM4c=An`uE%L?%k{3IDXo%KcTHH+hKc>
zH)igZA8EBm1Fk;(|0iJ1&6c|k(sfIp?-A|RY+Kc4|I7QIYn%RpQl9G#2_ENO+bsLS
zyu{GmCQQaD{ZmC<rmOqhW%<=&zwGNcmo`5<;yJ%)ulWA3NB^T%zpZWx&H8$E^}B0d
zQw%Seb^aEdlW_Oe_q)RP*Pnd*|89JuwdH3i!+%l#_2Z|1vgr_T{^B^jH-CYG<i!O7
zy$80hP4=94tx5H1RzQm{Blm6n|6gB8K9yhQW*On1wB7#qA&#TcE_L%veWx6lAb8%0
zHTvD*SN~!-A0FMx@}6<cl0`==-?07>`68UWtu|O*|72{Rnz8ot;A_Xi-S<{;FNnC5
zJpJq+Ume3~(k*{quaZ9?(RO8Sq38>CslOH~RnJ$e|E)|qmGN$}XJ}Aiz>TP=drzl_
z)a?(t6l;CTXaCmIyVUpJbJ{KakE3+*=Fb&pgQ~AT-qOC}ZJ2HE*MDb{HvZ!{w)ax<
zhmd@k&EBh6Paf%he_w6;&D!O<2L%nLB+X*>xv(we*xB5~ODgY!URcDjN||y*|GM;j
zq4uhObCRy6Z~xV|VCld3n^|qePv1SZI9>EdBa<iP=(@E<kNKuO>ATA;vuM-dPZ!@`
zlI#nKiL*4ldcQGUdVi5qhsne<$3Cl`?%$?y)23ub%*~rF!AfHG8^ia-T5R&qh_=yR
zAMv%c^g&DK%hO-G-hXhvRAbNiZm#W<kD*21)+Hx1tH(P`iG7=Vko`^h4fWGrY&U94
z=gyw3efe>VO(Mtab**s)PQU7E?dC3Z`0co2%?|eLRY6(x_q{}Cn7z8Q@~6<^c)o49
z$9DSck8U}+YRUBK9U5BeO{%Wn(C*z*`pA3c_TK5wCL2o>MHEi^zIkPlR;KF3rYgUm
zi|&4L=2pA-x%r!y_I~&J>#--Fs9s*ZsIt61`|8st|C_qADh^HlQGWW~n$40udFm&>
z+?3^<b#1D}k0QgTaxYB%W^Vs}bGNJEdfB)?u9~0Lf905>@%`+)#~b;U{&}nyv-R%B
zx{IG5EOF(1n!oPNdgth`mAYP&s`mU_7dx5HRQ}cXvV8{yPye-l#1?tK{4L9m<BOWj
z_oZ@1c~744JMQ+q{U?9?<Zn_^ocTX?;aaEdaeK|?M(t4j_E_#`>U8tt9M4^qD=Teo
z+kfrP<7A4_SSP{2z+>b%OR*!1wSHk?{Q9!Bcjjsf-8y<dWZv2{@h89SeW-u^|F^x9
zU%&hR{{6qwUft_n=dXVH|7VYHp}6<Mj)XbRN)LbTOOIoe*?(li!xZzCi>DiOG<)7y
zI<bXqnOEmK_p3Xu$K842nENYusgaZBI#r3&5jSP#Z+iB7|NhxqPj89Yv3uXg`0$`D
z@#{V7SMwD&eNKPr7RCK2;?CyNGft}VUAQr6iv1zQWo}z`l)q)MyV|T*U>oyta@}lG
zll0!W=G|-=N2HUtCY;ub%a*^${7insA)UB&tB$?M<F091z?=Me=L)ODlP`m;kL7&p
zbKInGzezA_%j3eNxuL=zE7$DPi})51{6%8(%&S6mU;nBWNba>=%FF*yh_|`<hmopi
zs@kb#ZxmVV=W%DAxaTb|_j_%*`k`n8lL`8vk8Z}XS*|k+{CdPfV_yGn$8AfDYfp$+
zl(HEFZd~#A0BhBx|L*(uzTEgH;cDRWDURp78?tpx&kJmQzE}ADza8s4eupiXba{RJ
zBJu05{tMrKJ!QM<{WhODYr>mN_NkhddezMDkK(!X`Q>+&V!cH%@n?C(-o5{4uV?kH
zG2fIYb%s}2>i;LwQ$w$Ymc}$#Uov}s?dOG>RlT91D${nq<@H`OYtqf!uWuIY|G?_i
zW7T1`{pGGZZsDi*A6OHSGV|U3Z{a#B>kpOGsBB$XF?HSb-SeJY`xCJ2?e4t0kMGwD
zs~rjVh_BV2C@y*HY4!<+g)5yx@12tI`egS1uJ(n0GwV0~&b-uWVI3eExI(Ul^=6Cj
zngxw2TFb+)*?h^!&9$&OUw+NM=j03DxO>`v<G-H%ntWk@XWZ)9N!i_TYvvoWPEKqW
zxFd6M+2l=M*LQyXb3~^7>^8}a*+;W4{!*=1y)N$Z^Z(z7R6e1PHZd#OJL_|o@#tqx
zJQ|Vo`Qoya`LECYf9M<W#jka(lmMHI;k52$jtl>*Cg|^fa41T(t=!G?%G7Pwzp~Wa
z?5^Gs7qj<G+m`s;_v()?Ms@Cxp0jz*M(xn)j<@o{)ms1fd~lB}eX;h}ckj5hchBXk
z>V_O-H0n1!EfhU5VdDRziI=#QxD8i4bbezvMS%6e($%5KY-~H_58LXB^Q&LlU+KL(
zc!lzV_rKl*>J$~1=_}iF$i933@m{NH->fft7I8T_|9&SjC!uZmsZSqPw2S^Tl@zg=
zude&=@5zK0hBEV9xKtz^9lsv@rts*UmUwVl66cA&iFT^Yyzi&AZvFCDHT=ke_IB~E
z(+*d*%4KvhPoA~SKUaSDM(?<NyVkxFv(m4K6mi~Pw_wso=BDi@e{|n!S#?I=xK@Z`
zcHpnXDsCC;BhD4yf1W$xlY3XI`JnOAKl^`FHTi8S4r>his=21LrF7$4_Ok8LwJL09
zXUsFIzkYPF%k-*)9Z8$5?kWGt&pdm?RMbgv{ex|rZ-wRl{@~;I^jhw2wNq|>_uooZ
zIEZ&FJIMZU(p-kMcaP7R8ocb^&T5DKPBIcdc?#yNnrY-~IlC$Tc2?1ErE4aQxk09x
zJ6i0Q_xGr<wl7icylArOpz`uN%=Pc0KAPU&SrGV_Z|1Vf#B+i%Z;HEn_AC8)7<4f7
z@Apl!*F+@yK5U!+$Cf4Sojw1ur1LIw1Kw`Q*}G!VtU0sJ{{POOe_VXQhsO@G%l^(h
zTF5Qw_~wH98l!y=d&P5h?AkML&Gpw;Pe$CYU$Z{IBQE~FMpI~L*j`bgJ<2Y*kDu!;
zVsYQl@45ML#6!7<a}Q4R;!NrZoM$<!LfdJd&OH5chYlabf_L){KhEdj$&jj=I6F=<
zK59+IQ;VM-Z7C}v4t|?yzy9>EACEL-_8;dkV3BQ;C_6FN?t8$=1pcb+tqrw1@?N#i
zU!i^LZ2jInI-dHqmJz1~chCQqeDA-A@9n;2*}=Asri(0oUkm=xe=gLQ)%^D#wWQC5
zCx5=*GoL$7^l@C&HTfLwgUfHNZH>Pl5oP3J{@JUk^Ls$mHE|;qNz)ap?+D*jT;?+U
zzVL1R-(I5c6lOG?UE@%;f6w_9zb(VhDVW(m3$L{Ei~lF~AxG|~%I>dKucK!!cDK=p
z4{&m?wfgz`dE)A5{wa)a<LWnEd>7zc!nNO*;Z8=Zjj^S|K~taEhke?7A|!bC$<F(c
zS2bz*+(hAjdnzWI8EoRvUG-mo{lncFwQ8mdH*E4OwEra)P?soi;pqB@(uH@DH_p$q
zto@bd@AbV%T=x3()6?~&bno8%qk3>ztlq5P{Q8#nyQ)wBUr}_ZCuwdjpG1|$%NYWz
z+jVDr(-w@Y(XF1g-Lk(|^+uXd{lvXLyuasv`7li~`|8P8dmmIVvHcOK5zyZC_4Lu}
z>u1N6uSu-!Oe_?d5WpuH6!7@j)!uop*^X3fxVQM>+Q^qpYqndt|G#)@ZQ|7F{|_CB
zQnzh*sLj6dYn+Nr6Z^lMw+ohh`?g_yMnU?DtnjQJgFFB4y;Ip<UjAwK{r^}0^XFx`
z^yDNOrUgC9ie}!j%KdSwto_q}=g*b4M~eSjcE!#!eW~f*1?M03`5g7JFX*zEr%+*d
zjFYpTCn_;>^7qawX4jbb^sn!@UF8zn_cvet#|ODAi*>g`3_@7hoVD8*ZMiB}|7&)@
z-@WfV0^Ta<-8j3X^O2EVXVI@iu?G8pD1X(J3Jm?XHNx3xnt$Bt+tZ)kTd{Y2z`pI~
zGrzBW%og?kXqTSLj{0rJzw<SNmS^2@+jc+y&CBXbZ}#LISAYNBujJ4p6LV#YL+R1?
zt^dyw)xET>v!_5*{FU6&`t^qo1*Y;p6mQI2e(XNu8jDw761>-5&TW4Gar6G~{XB=)
zn>TH6so-|sc%Sq7t@+NgKh@sfF_B~9V-H68%Ri@w&dt1k`P{7dmqFZfY?r^6X05Gb
z;$30;e~Fem@2Nd`3-(-pma%>-=dRr|w0SR06aQnC;;m|5tkop<dwtkGz3B^GR()N+
z{_Fbn*Y}p&vUB*{O*Ebs_Tl)imVf^JUjHH^er>AB`*)su?jhsJ9oD`^_DM%JJi4dl
z*1o!o&*0j(HCi!BZgsyTUiN)0k-GcJEAi=X`-J9W+D&E$UixJn*wFVWYIcOAiGcai
zpSJlQ9Qj^7<X!kyNaT#dw%Y>l92*Za9Q|^6|L+qYeW!@UPWWcd{$@pU$sEfXXJ2m#
zzYI<dk!txczIorYc~Uoa1%(|--Ly8NZrAa~lYX16?lJbpzWjD4KhYx7Gu*RD`>pHb
zxAqJ7$;3@C{53(=ar1?`l{ZT5f9p=pc%J;%x{3Sl;Xf0$Y<;!GdE)w`eKL7_*%oyA
zWYt&&xT&i~)n8w>wl+R&Z(V2jzFURgzHFb87ACy(j=dH0(u*^a8Q)E_F6TJ#;Qozz
z^?UO?yk!+tUre1dZ~8T>J5v(A7wUdZJhbFl?S>y^Qx4zAFKa5-*#Gy?OT+vfDjzQ&
zi*Hp=c|LdYG?~Q~t^HSTT&{@e;MkRFk?M0U;%VQ9|Jz!Zb6>I&bHC4|{;+=knw5rb
zXYQolIh1yLMp}ISvNnN^Wjm`H58mp1^)BV($|H5D?{7(#-#v5g@~PM}yDr{Kf4QsA
z$~CD%)AH@HzeV%+#gtBbu-o(b@!!>spE^{ZU15)2^sSHQNW0#zUzOim{)xXn&C-6y
zdG7k_ORlZ^&f*&Sx&D5Jd&-~Hp8`J4S@L~vVa<oV`iGWsnjVdRS;@jX;m>-nJryZ_
zXQzl%^Z$IH?4HQ6Mk4%feQ)}v>VFG5cJD89VyXRe{=euYsf*`)Dr@%ZtG^eFNH~7*
zTL00~zY8C2PS_Inh$;2&J;Rq>O02te72D=#z2DHetA{r#(!KV4PlSp6qt#ROZ@%x?
z^fqg`V&eAuU)HXDCdmFj%1xQ^^y_`1v%2$kc|S_qKS^blSM=|Dhtt)x?6wQsEjzto
z^@l`TyXUXG?(<)d_`Lhh&h2^AwrI2LsXu*Pttc-nUc~x|SmvjB+LCK)@18&ZuH~z}
zM-#hb$<|-nc7OlLb*uht?Z4N{_g~xob>D}phkEr=W4a{&Pk+CZrF`%0`a6yb>|g3H
zI#X@;$l{r|{o8}rZq}byV!f*uEwAVh+p}NOtGe&w`?!SIDJ(UAQ^g*=`*ApEM(g*-
zM}AxGuwP_#y7=n0%l_9LwfFV?`YuskmG}Aa-FcCwPgs+rZ-<r!r|*p~-Kv+nN3p8t
zd#cdOdX<}TXMAS<c)ZpoUw_NgQ_+{yp6t4RdiyGCktmMhCB@+vr%kFo(!yf(y1Xzx
zY1-0-fop5C_6nZd>e>_^f12m>jrDuxUq6(*+ePrg;uR;KXte&H=cMBOfPsPYk>e~S
zjxN^Ph5B*pZ|%RdPx){5*HvGOqNDa@Z(V<FmD81zx32qi{ayW6FP$yxzx}19@&Er{
zue24~9UHFy`l8&{Tiaj#U7N|;QN$;Z&gW93sJm9;p6BlQYg7*l3o}Z*>3HxzGI8GG
z)mQ&t+Hm&tr@kK^*Yv0VQ9LO5)$+~XtR1VrR-cagTlF&(bp7l5t0!ike8=H+@1b($
zoGE;l^-mrCQ~XijYw^C;!o5NZuChv>66QF$!RztsEBb~~1`a24qe}kEKDE!PpMKt2
z=#Fhu=Swbz%g-+zeQJ@pLPx(r`1;L953|Z&YwP5d3M6>S>V03$v+HKzS5vkBiz~KA
zd<l`ADmCNPvp#|8JiYQlF6R$7=BaWRe9v}R@*(58|IDIYe;WU-GTmri75{0Gk&fun
zUbozKwv}-q64Nw(X7X;jn*RFf`r5jkx81ManX_-z^YYi)tD@IOPulV*#3-q%V*j$*
zUo(_%xh>^BeKGG|<bu_!vrU6y=DN9*9y!1N?fIp~S282#Tsn0(c=@!{@SviU`*mAZ
z<x8A4-SqmVqnHf)mfzPOPO^SgahCsO^}F3p)wQ+PR$cPi8+!8fMVV`V<{s_n6P+dY
z>~s3HmVL{wZ>n^QiGQecl6CLBSib+0I^PAJfBAmvs>)R+cdWD%Lyp&rL`7Y-`x4o*
zTqsteS!dOp`R}8+O7FN<{+<xdy?)}yUsKs0Mc(T<!PELy%XoF+x-a>&r^>cp+ZUO!
zwQ1e#uh|#tHr6F2?wKIn$ts!K`?JUBzVEa+rnNcWes`Roe6jlWTF#!Y^=I5Ru2ub8
z(^9Q}Y|gQRqQ<&q*{*r*$s3nH<T!oe@QUx#W|seN(dzW&J69t#p{_#p!}GwS`EPsm
z3unC3eVf30SLv?I*M$LL$0TR$X#TK~`-|k)+k4nJde(SdxmvHc;;U+Q{OVm-r&v$b
zo*J{WK5BjVJNI{-HaR<!KXh6aD$TeQR{B8Au9xq6+`@mtAKku)U3s6JaHdN@__Bh7
zhe*P#U*C@B7jAuhSA4}H>4m{X<tbI|enrW5QWx0f%0=G|bQAwSVOerweoR19+_92&
zRq+pwXFOF|w8Ph>cGe_ZzdiZZ{6O(ZcP4i4%@;J^cz+_3$Lq&$4fdUAOxeMnJCS2v
zXij&{^ma+sbvu8v->TWm9Q5w~F6A#(Z$0J;ep(x@ANBow_^(%gBUT9c?yY!ovi$xH
zmFY^GmL{t|UFPo4T<}WvDvR>`IeEd}Qfa2;zh@~+%PI*iWcj=8_&Ft?gZ($Y+A$m!
z$T28e!256Cm;1Fl;v@~keVDuE`gY!9nqHM3xqpMQdAK3(>BZean&0HQo`3pXexZEE
zG21KMUr(fF9bdR2_OqI8-P<RB>r8h3FTXo`&%e2q7u8DQYhU;5p4xkuXWyw~(=2X0
z;y)NEk#ue!SIW7x1oQZXu0MACJ^!5N*kswCsfSM$R@)woz5k#*%yg&gb3vA_3$p*t
z@jU$-&$2muTUE4fu_MP9#k-r&9lm&?=lbP&UXR~*=3BKFJr!9tOQrVz>-)BM6K!vO
z-?ZMduKZE_+}E0!9B(rs7j}1jF#Y%}+1E*8%f8pV&1vixUqAULUDKm2+8QsCtbg=-
z>X8tup!&tX|1>3Ux7lBJq5RmNkAjo>7s#Z34dp!eSE63w!Gq<VkJ<cFUViw=)^nH7
z=y=@amNk!l$X49Y)Y7O||NKu>%khd<2M5zRuSr*4zW#8>Qz!WErmXjG#O8gk4mbbd
zGEa7y^>>Y#aq3?p_6sDwNI6w}^oDYf|I&SJ@53%{mU_#S$9G&)dYjqwx<C7FZ|!)r
zT2WcZ;n%&E$X%TC&ScI%v^`DR{bVBNgA1u2WP_w#{~zD~&+iA@12Oe4;d|;Y*36f=
zu;O`Jeb2GUs<8|E<DHK$b^L$4*5T@vDVh5OG&tt|+5gktT<utP`s@8Y8EJ3dDj&X<
zY%qVallrXp%kP|C5H)S7>HmXzXX2PEFAJsXUi~YvkXz%`nf({scXwX6vd?^PU9j!H
zj?e`m|DLD2&A-}zP%<K@XTDCI^wULb`+L2EF3evhzxUm{hPlVBgr3c;JIEV+p}s}u
z>k7M=^8Mz=mZpduKlW#JOnA)PFL%G~t=wvLwf@D%O1pjYA0M;c{qtA&{skYM1dpX2
zaZ7($ard-=f#mGHk$rrwhhO;Sc>hrDZ(8oKe*J=(mUdz0;gjFH?r~dum}Qoem(I_R
zthT>HGb@yI?w|f7Y$)(8`oXG6AyT`4@BSxmeSBBG8Nb=SnJ3q8*W57i;j{R^&MC)Z
zr2ehDA;NzlDW>LO!@hmDwoOX^+jLuzDY?(f!7J?9ZvA^d3_dxm=l$!gsAliB@k8d1
z{QgtdFP^B<504JJ|Ksfch~C(>r$0)krLJGU@Y!)j&U=4sZT9m{Nz0NK_F5t=t(>#}
z{srX;x{|DN+rMxBZ*uxM|KuZr$uGBMWZIok$>`Xld13#&og1FpKUfgXcj)3S8`J3<
z&3yieP7rWT7JA)sZEL0Y;kWa>|70GN`Cl%jn%!Cxezo>u@!4;jHaDJE-J4kL-=w6m
z@BZHVu@8e{efuto#$Uf;@b>$&ISM=n8Ll@u?|#|-?yA}S3C+q@(U0ya)Cs@ooEKTA
zXa6vLXLaM}2f1@=G^^@N^zsbeO<b|jt+_sVh1aYw*TuR|9xRv|XOO7q|7v01o*VHO
zg3J`wEq1p|XjrHGqfk)VIl}wHk)O4-iqbs7wWYSp7QR0ELVK6l;zC}z=6DaAr&ag$
zig@39shH{a*<C;Lg5`bK`>(H@{-{p6x@Q0XyD_Wx1TJUc@@fzEKmX|6uElHS-@j>a
z*`zA?H1F2(f8Xm5I!4z{EPnMl-g5rDQ!Cr;{a=X9FF)-Q->>?3!hMIOKQ0`%m&x!C
z4s&G8vURur;N!Z8=}zNiT_N?!zdzOr{N%J@F>+dKlX&;tuds>sxp)7z7(UJ{?)vWe
z^+(Ml*HpO$sgJ8#uP&2+a>@Bswb0AnpnuF$xS!qo!FEIReEiw9g{&;?mpmFiOnCIY
zxJuWh(k0~X%nutq=Q(<o#cL#JT#C*+D64H;<NoY!_Cen5uRq7WuX`|U($AICcXq`1
z?>{^zHeApo@c-HmE^5x)U;gapoLpC&wnuDsn{Xyydaql>e!)im>0fkxj90#1bjN36
zx|aTqQ`~>NPygHZcXR5O_iuda_r1UNPUYRd_lKCL?@_Q27Z%+e{?4rL`{&q-X2;q0
zP3>o!WnWC1_{;6=&F`;@^pAHPK7Lhy){Si6*Q!UphD&Z=6LS52SGT%d()Kk@I)!oX
z8YPncNuL!AcUa@G`@EN@{j;Y}PrbkY=%nAuIPduvZfIR!eNjUA<k{bvn)~Lxy7*v^
ztLT!C%od?vuYPZ05w-oU`<qQ{{_V`DT$6wPMNjYRpN^B<WiN0#_5Qj!=G9i)?s)#x
zGkSf-^lPnl-8$c5+X5#6GtTGtXXkBTU|{QXoTb#UinS<8DCKX=v3iNK9^5-C=KcR0
z|Nf>`Xx;Dk%U*|n`5&?W)zi1x`}Xxs`ZzDVsOsuRuebNV-ujv*z`;5F>B+AfO#hfG
zS$q!M-)6wMYQ_7vNAIzE?+=aAvbdG1lH;IOm+;&D{ndBB<KC@!_djai)m5+lMSXo~
z8!>%pZji`xqZ1iho;+I~bA`j=jGI*KxlQ{|cfT;>DAwY?pn6WnSo~y4k?S3Shg$_w
zSN{FFyQw!wnfdV154Vy}HBB^Aw`%yfaRT4Im#;7CYag~}yu_|$cJ=j#<M!zuyf?O1
z{d0EaUsdzeYeV<z_wT3q$G!6on|xh!`Rt1`YM#WOUt9WTY1PvYs@Ja`EvZn|T6DVV
z#_9K8!|&IdY)$?DCGKz1%Cw#C=|Adr#;(5KC6XT{vighP^2%zbtQSkyUG3WUbeF)!
z^81fk_<Hso2o{Q{XRVqtdF6~r7Mbx^)qjbL-uf@IJA8fJ`p{JY-*$Mu`oGrq)zhj|
zd-DTL*H5o-HeX*Fzt;2BN4bsRtACruY6o*g<zGpB?wIzbBCzA#Q(oKe!aN5U<JE6v
zUyb=Y>HCk@)8@Ip^*A(Nc;%9+msRVhMX#QHkkO>a_1jw$hU4Y7WmmT?R_xZ~-1PP6
zf8NtgQB`-uC+z=T+STQ{|B=6~UC!zE)Bk%2OqO$*es8DxX`@4XoeTaxk=pX5>aXSc
zhk8BrcBb!}<b&P!J5{|+T0Lpf-srFIqQ2(+kNaFR>!aGUuHZixoFo40&J*OE@pJCC
z$9V@Hw)lvi=1}-MXX(`L4Idi69SgsEuYmQY3)f$Ud7aYHALkqv_L#0_o_}Eajn_HH
zPyNbr+{XS<Rl4QdyEmt28r`p~k3D+#pK;xM4xxs_=X{HjBMROd{bv>2xcT$?Pk#g-
z8!vr**&?vMRzzsZgX@*<XPm!3n0B&dg2rmI+8^;79B)+zdq-SdyeiS&{>bW(|7-Tg
zO?w@B{q^;`@84;-RVfwU@G_leeBt?gHtF5dWLjRIUjF9)1W#rrtEmhB*jDp8-ZXi4
zwx%k=uuX++ZM*oh|9>iVU1i;WJQU<F|IxI(=G))e#g6Y<HvU)?H05;U)M>Z=@BC=W
z`dc&Ortg0iHT}5q+aJ8!o|N3bl~MJWzbSHM^hv2D+duqq)y~-bIG#7}>hUGU(*KIq
z@69-C^v8Yu-tfJ(n>hCV`@LWH-v7iGc{8>go}^Z0_ggo&BxApt@QGP%YCk6?9rF>N
z|3}z2LVV*J>18gPzitq=XJg&+LorRe%Qcx_>b>0`^R<n$cUoNkkiDq<o})?lH3_rM
zher~hvA>J0NZs;(!8Y;2tuF0B8dFNX-@mxXYNz`9kHHmh9+zHQRwub9rF})Q_kaDr
z4^KItjhpq4t>%2x*SbJf$=cxW+v}F88l4kAD0|y_$Fg5v^Q?~7?OONbgG-)4()2oI
zhvdiHddWWff_ix7S*T>c5tzi@KchMK;qG_41S;+3|HuxFn#rkb{clhE-s`LFMclZ;
zRFlH&or301I1+g?yy-{&Qt!!DC)fV}(0($`W{tQ!2g|j+cQf{{5B+of#oh(`T<__)
z%NZV-zF6F527kKJA-g|&>^FOul`!{u)QcY$@_JSM`C8V*%Kx2DPlkJnzc{~tfs2c|
zR_}k2I1?kiee3M||1=dx>v#9r{aN-&Nu&L#XeP_`qnv%y<fZiL5AWWp+4stk<!aNt
z`&_F0%pN;8Ez`ezEPn4h*LA+HKe)KCOt`Q^e!r+%-CL!K`#1c%&q;9j-(@#hAJ^ah
zF3kS=%36UQ(Tji7dzlt$o}R+`!SgiFeof88ny-GWd2~$TTacmt`iQ?LOq9<Z|6KiA
zux)D6<yoHL7dOw?`X{Ddu5)65(%F!w|9@?Zy|*_j{_uoLi;qudVqL{-IivmW-Igaj
zPGTAkrH+hGGs^pW=5suL%=PDH%9$-1N2_-H<TmnJA1wZ0(bQGdyX)iUy#4m{rR?h_
z!{#f;gJsOPckk7my-x1?CF`|6)^k1i_+5VA#T%mc65aNHnCaFu{ptMAcYfuxFR-lj
zuM<+fymjjb_t4)TnR4yquKwSo=pX*%@5}hl5*3>sU)#}HF>MwH-}i0v)+zC=pZxGn
z*sQ%bIlSNAkNW=NNA^TV{qoa3aUH!^Uw9w+X&C(PZ0?HvEW6VTk2@`8t^4C<VrXA>
z>$?4ZzuLVg<*V)9FG$_D^~sXyAEiQ{U48eyR$29(`h_3+BlZdwtPb;uU;k#`{>^#U
zo;(VS{}x`WTYddgi>&;;c;~OlOZHZ6)Lp%7-^(Z8Utcs}U|{*;I7^x16!_MUwP9;}
z{=5>nS~vZro93^L*Y#J1-p{(fdhWlC;j+Ju{y+Nt|LgBr_t)27TC-}!EdQ;q_fM@_
z(faQC_3W!pO-fV~e)9DTS1jn-o_+n~ugy>5cCFigjq7WY<LS)3F;@8(XZ(%YBN%QX
zQ}fj~_kVq*-Cs_tnpJ;S3;){w>h5a6{M(byX!5z)zWjPu{QjnItyktPn!WGo*O#@`
zH)ieor?#&9kKy_k$z`q2R=d3ZQnfo`$=0l|#b4h`My-$9=oRzpss3tV@xMh}Jg5Hu
zy1sk2Q;civ>OJO@o_?)dx6bMJ|JScLe(=AGzN-I7>uBk!TYuw^e5$Em?|JlOX~?|b
z;>z0Ve?R^E)g-&<uItyoZ;B1#)RZs&`WrF%>Y{l&nr+|LpK)C&6uw$$xy1ieS-t!}
zLLJY~2^Bpp{r&0EUeAPgMi%=77Vnq#YTT0SV#@e?zq_$Z)Rpb(GhE~LD22_E6R%eM
zIJsli)$c!9>NsC6lra#FQJ?fbdBgPI9}?G2Fc4gFe#v5oj(Zj+#`TAeChn3qeqtpt
zXaBPPf7$y#crKe9`=h4oEW^VclPf;n3Q{TbvgFa&5^&`0#hP%L7kp{SpB``L+FNh#
z_MheHteBrqf4+V@y?(}|jYW617lppq>HWfae&6oggf~Yr#FFF=zmG9dJ6@)`&|m%k
zL497u1v`$KZ2EWcW1Yej*6C&^9H;;976|jT{&Kffz4iO%l=BY_&E|do6DV*z*z|<n
zo7$S-ZDLFQ2#UNgtE}VjDG}g(ZnK|ty48#aDxxoT9lZSegX6Ii27Jw$U3N3|gO$8y
zyj;ZG{zGY9(3`4`$2qgyESmp{`S(3vSyQ%uE$7>+%<AcflGs{YN>b9~oI5oV66`zM
z7kPZmn!AX};lQ68C;LDB{QoBV{by;(*dJ@E_7pki#(aNz>HXh`^?D|SudiOdb(QOP
z)atm2iTZUd-XDU37HyN;Ec5h@%D?^UAC3OlJ=y;HaDngVEBp2fDqf$leX97$sk8pL
zElb{<Ruu9rDK+_%?EUHgrV9So4gaBhOlYU$&mHeXe_Xj(7x(VXjJ(b3<Nh3(7JpFk
zu=eI4j(+{Knx_OMf7u>CSkQCt-kLv~{(G-zQJmjo^f`I;8P!=n#(OI?MUQ7p6l#CU
zdTveg;=L8x*1ic2-z@I@zrAhtdhux&i&jUh{k0~3wb)+Qdrz0W+hdYc@v+z2Y{mZv
zU5%4uM4zu@{_)`G*C+OJ0`g5#vTKs<g||oTe|>JBlSs?Of7~lB6@2`8_@LFo+aLPp
zX`Kl9=l*^AuMd8Hvax&<6>YO)u1Ox{I-=i_d3pNT{hw!_+JCfipH<<%ADQ;*9Dlw#
zZkBkn-_x&USLBnaSsOa*W0rmr_;NwL==a9=pIvlnzja4nmvD|d77~}RIau(bJ6mm^
zQRC?!D}LOseLw4X@dTGUlTT(;|Kr#=VPV~Vft;Ps&tJXYrEcaVrrH1LTN?vY_Cd*~
zt=muk+y1()CU_cOg8aXeg_HC4a&}KyvOCiDWxf9ECx&$q%D=up-TUOz%c!5PpMBjQ
zq3<=_Tzz5s-taa1tJXjA6wS$fvMIgv^w+CDl;8dRd15n<c&)AQ``3TmibE3jY_?DR
zTQhzB^(J4IWXYF%lm0B3+W#Vc_0pHMVzFyt|E&|9UOoLp+O++=nM+Hq9aXqK^Y`Lu
zYvvzRjH~I&<bJnbZN=4E?QiRn=6(s!zP$X&*XC)}(<2oa7}(n!XDN5QVy!)y;Vp6g
zMsL~P`$u0tw(|e~H~!!6|NH*#kH5A*>TTq&qgU_Of4{Z!zRe=B_xq>cdbgvm?s}+O
z$(-I3hmJ^c1{MGF-~W&I--O+(>#zQfd*>hi?0RTvp-i{mrr7Z9C;n~9a99{qW_S91
z<X-6|pPudSn0#UT`j~xDB0kys_y2aQ-TG!WcV^t%@H0QvuYT#eZhzWo|Fm_HsryU2
z)c5P3wyjmlzw<R`_rGhq*L=OcaMJZ(JMX_1uhYJtE&lrM=YJbL?JsT5dNS*~)3m2?
z@3&Te%@Wxw`s>%<=v7+RufOOCi?98zdSU5~NBS3Y!^8GUswJmfj6dT(|9$M-8~JhZ
zX3;xzCjDJ;At=+<>u%??{%VC!qC2`@>{VZ1w{GP><ww@xldi45x>Wy=_KHoJJZ1ql
zQ?ITpzJ50%d$ZKstS_qn72QtmK9?2KWR~n{TIav~HRr4PVyVeLTtcsi`1toduucyC
zEO>2UOvsIk9#iVey4<^3bp0fm;`hBaaB^c#c)pGImrZ5fmsg=HYqJ(*pURj&`zxE^
zp<p+@2~G#&-|}t>%CoZDpID)De5KiB_KyAb-&=O<n)B})Pg|@3Z&m%t@BfVV)%9Kd
zG5ytqtl9N#=MVp0ZD$ww>HO|}zqwx?Q?1DKIsNIz8TD5Jsuz_4E-X3z!{gdfDdn&B
zI}7f-Y}xyk@mBi!$5Rc~A9PQ)u6X>4XWs80P4hM1oVoedqV}N9>HR^6qqqu}dF?rO
zMBwo66B{bd#LVknKP^9M@<p$ES5Hs=_U1be-<7srm)`zT(N(u+&DUT5pgXxgl+C~7
z`(dXmM$^>eXBKX9;fw!!<TuC9<Fi!0-9MOVGV6TK_V27qkJ!a+jm?a<oPKP}t?la*
zi*7$CyWn0bsFCUIVj28*E^Gds>0jk{U;Ll3Gxh%}fvj5|39WuAd|MB;2^H=(p0#`K
zhU6#n_F4a%7q7mY)!@c)ub$H{GHwb8$ZGj%U(qi7(Q8#J8y%jtMMsNIs9K(R#~gQ7
zxma%9_1||s{qCw%w^da0sFqH}#eYW*njPT&uGFM|>h~tLZ#wJj-}f+0UC8i8cav{n
zc>1c0<30QL2YBdyPn<f__Fs_k{=jwL@A=nWU%TPpcb?7bb;Z9)6c?{Qea?|(zxnDo
z|1BJP_c^Oqip}2nT>sEx`58CzR5w(;F=gFt@a1ft!Fr81Ydy<dWV};ney@MsS~Kg^
z_qf%2?XK;5ef4_B>ex&9GRcAs&4R26tHTz2di<~T(*-s;Pre5;6mxu6%#ck!X8Y>j
z!+4V}g-5<Y9u-dYcaMJ$|H&1<tbVV*!?c|XvNu~6^(dRnoGjQoVSl;$)GPmjU+(=q
zp*iXEe^#-c<esF;;94`GmfUBLP41>2zJHr#<_3!oX?M?@{t~x3YR!&*_v;x~m1+bN
zC-L56Uw4>EUY^x6;F)quNWj|<7ye!6U~g(YG;4Nq_8S3<QbzWmUkO34zP$f)RN5+I
z(MH2d{x=f(d6%^IEx3IuwU*7t`q|#o`stqh&y{|k_%mOBPl1xMjeJ$&zLnD+bKie;
zX!|kF;@>J3F5ebCZstDp<<IrYe?Qp$YZacAuXFu^$JNIRrcW&hTm3dZ{J{p#44Kq@
zRf!LI*Gu#F_TF5vcKW9`jo*y^&tACm)AUqPt+${_nt$HQ-nqU0p{#rF$fP+=+rQL*
z6ko6~_TFocnvWOwE*~!4<9AE$<=>*eCvU`TT_f;i%IkFVUsYU>m-q9^E&gt0dGAmB
z?p?Z`zfS(X-@APWZ}g>xM+Ryin-4PGzPZZCNiz8V$!0DScIk5r{oT!99(}){e7L6m
zPNL!c%IfSte?Mg9hMoF#kmXlf>C1rLRehTm%YWN^=Kt^W{}r1(oa;YIyw<P$d{L`s
zcW8FEPT}@gY3BOLU({3XYM*_w`)Ag_P1TtdI%*e6xq|LWZH_UUAj15<a5mqJ%8DKd
zX@k9=-#IcqnDU~;v-WF&>kR`T<=^#97B}TX-@cx1|6c8o>)g-Ar$4ot8-&RmeS7+G
z=JN8gtjVlxH&T=Dp8R}ReO1(hp4@j+%xtba-1+gFYf_wBQGIzyaE9WGO?CGl`5w2i
zNc~#+xZ7=|#rvHzt{+{TH@(+u!u6fi`%^SOuCJYNyjJOO?VnQt;aT_B=lzO(CdxTO
z(=Na_Kc>?6an1a`wGU=2n619QE{wl!Q}zGb-~RqrpEW&8^sO{Y@%1Yi7YgS+d!M(6
zZ}!bS0s^m`_ODP;U1@k`o_BV~ok#4|PyR)}NZel2^kDX}+8wg->kk(G`>XvYcgAO(
zNej0$HJ+c(J#*7)Py0BIod%CBt1qA4oRu^CYWC!ryqRAwiCxbQiZ6fEoAKegXNCIe
zbz$aZk@i1}s*3*KU#k=QqI&Tkw~DHK{r`^D%Z+XJuXE-9xA0Eg?7P=5|5sGVU-IYs
z;mZ7^Nk4w;UH|$#k=xpf+cf<5;zw~4o?hQC=EufU+jh_N!xABjcZ=Wr_E+yM{}%Ut
z!?gE%*Ih5V6neBuFMhW0i~oy^^)F@jTfLX-+vU$_TeW3E*Y+nu@%6FI;&BQpvnz|g
zvy|`GRZ?@9)cfw(UYFSK`-7yuz592yOZL@Y!4tEWercJhv}3RKq^bAC6cp9cw(Z|!
z{fbd$=09#JLy3cvR?hY`ns-fRqj&eT`-fRiBnUF*-KjM!elqv|nY%&X|5cxgVmU0v
z#mF8N@%i+<$~%k>vuu*quhfdQsjI(#W5F*LwO@~x6koa>_xoxQ%g1{fi=#OWS*EfH
zt1R7<6y#?3`})4)%N}s(d9*uCRC4|Ch^KjH^m%1Q7H93LdApD8e#0_*j%j&dyyc7K
ztJMzg`j}ttYgRsKrth2^41dk<^lW-{vsR(4dvU{QmBwjP_s;MA$SS&2qe%GmyBU`s
zIb68f`05XbtY4K%k;QNJ=m(4Vrk`-S|LWh*MLh@RGMg(IusXLaduQCC{^aHLWz)DB
zHkc}eG0d3Y!}#`RiVUOF%zxSvIte?2QqOC9oqX=7#l9p-R^pSpVQTwo=dP2#%FMHM
zr!Gt1eQNrPAD@lSPqI95x!2oJ&UdY^Oz!Vv?_BFIuG)OGRPN=!lQVmx96C)Eqc1)9
z8o2&=&i(D@CuRNK^)*j@`hzdY&QH58-PH)KpSSqrHl`Pa3R0RYK3m**bYJ4bl&y0Q
z-ab6}`QiC-Hrh}4Zoc4F+i01$#O(fz+f2==epjbEE?ITpbi-BGl5h>FRUafT6*UD<
zJ367e=th51^ujrFAMM-!;*EUb{)E>CH!Q5pG~e&~JEdUnj+76k+j#x66W$)#n&zT!
z(lzOQz)|DXcSX2mXWVG~TgumB^-D&=JK!xlqtcZLSt7!`Oa~ne5_}@KSUVgG4d&>G
zbF?%mDRjhKkJ)ah7V3R6>h!AD`!>y<dQoqO3}4rk`{GaRubS__EO_|qkL}yuFA%@*
zzvX)Uxmd@bsCt)~Hr=zI?B9Fqjn>!mbN8g}pC^7l;8&x8^6CJWfXJ)%2mj2iU0v$L
zS$0u7Z?)f>R?%6aj7Mz0nFp|asE+!%>e#aNb|Kd%OI`b6FD$7ccfWhvhM0JDo)tXl
zTB-#FjJ5X^Y!6*bwY#*^>iRBQm4fW1ed5X66@zA8V2y9xC-+~ak5Rt4?~G^5k)S-m
zQy<rgF3h>(XteA+??vUfDRt9x`eZE6e{5?gU{ny9p&`b_+18Zk5HUeaiM6RQU_pqE
zt8hz`V!#9!zpvBJhR4jh_cXF(YH6vL-%8C3cJlVutRBzG-BSJb>F50&{_Y3Y&kt=j
z*z(i;&ff6SYQv|$ChtD4UUTjj*Sz)%X>vO=cb#~u`iI-#d9#OndDM$P;-8{Eekh#d
zV}5x<f$g`SKW16m{?D29Bl-XG^?c?B-uWI~RVC~5^S@!`sr~8&9qkqjM!^X)DmgDJ
zEKxf(`G=eWgQLwP#<PY7m(;JBdzaJ{FPCrm^Zwlowz_Bkr<bRFK4AI#%35QuuC0gc
zdV<$+?v>-udKH(UJlB}T$|+fM*2gccNA})qJiw?TGGPtq0Lar08v=ApxLP|F7ECze
zCdjqEbj{tURmH2OPg<9HWzn*wana758M5rZ?=N|}`C;MV2Qe$W%93u1A6oJ=yJz?5
z-TUoT<2Q?1dVl@>;={S6?3-KV=Z2lVxuEXg&)6kZF=8<{4qwju|LOg@)B^w7@c7!B
zAJX;+{PGSx88A0wUsvpZ_JB8UKDW!o8noLlmCHLTDtbI1dqKlf;f6Kkya$gpGtHVj
z^A=O3_kBADb1%`jt&EWl89pESYW3>QRy8eeY~9qOU0x?%E4%QN#NrvtCuQ-quqMow
zh>ZMr?w620*L9cWvsZs)k2h;5V3ZM=;3Le<d8jF1!HpAUQmjo26IMicDRQ!SecxO0
zSnH<Irl;IzQ+6%beR}n}_Fr}rrT!nS`xk5@f5p!KL#F@cFaQ7Aef*nzn|r<Y|NEbk
ze&t3?{xkV-&h@~&x;P2_uX)0;|C4_{j9IqO|9)#&h+O7}?|0|iaTJx%o1{Kz!l$d8
zf3ttC`~GMd7wgf0%eETNd5hNFPt{&=<kj&P*IRx^vaXwa@cCi>kg{JooOjkv@_Llr
z_;2RCOIx_!KJq{7;rKr0_3m2-{Wbp0o7EKgX7_GiRoN@?Od;NR0zGewCp+Ddy3p}Z
z+Ubwwr$y5zZ_7z|z^EoNK}&?ImF0}Xh5#QGX-+Q{g^sz!XC80$*|S{cCa3qBsWMtm
zEu;?LxwL-%lRu{K{5D=Znf-jR#@l60SGPYszI&-n-tyo6=56b}PcG}7wl{WHbZ`By
zw-0MBZGSya{BFI~{Rh=YF6oKAIDNA;QgJ5Ry7mP&`VX%CGQXYad>~}eo)_Hz=PfK2
zEegNx(E2z?`oyOY<7@`@dev|7@vVE5{&pp4U#q--b4z;jxw?D%=G|}OIm18YQq4Zr
z5B|&J?{7XHo#3^u^V^|*i61<B<!8ITTf6c;Q`Y`!!D)OoZEF{+Pk3w3VZp$_zSnV<
z3MUt9?S+0pA-xtofvEo;Zm%yD?*DVY_W%9*)#c~EzQ1pE^#9k_8mrdY>p15=;?gou
zd8@s_%{=s6@UE!m@8?Fm3NPELl3%)Hjc?Zfb@6LLkF2j-=e@Gkc71^RdAFkX`~Gs=
zpWt-5M=^WiZl|Kvd#~<}oA%^sxFNUxYL#o9pKHHn1fR@!|7GvT*wT!R#b2hrx_bKl
zMxkF{w#G$m4BJ}#Y2n;!ZAV@HUHU)Ki|>cnzUxbO|C3YA{@S&Ek9)b@_PT%8F<)QD
zwE4ac|5SA)?a|2%%{uSpCrW2+{;#Pjv8X+5%Aws$oC>D(JYU^)H)*{_U%pd`wrg4W
z)dyRaF1q3F_D{Yk>%rAG@5|TQ)=pe<zxamJ!AZpxuTn$oOw)HiaecY&<F6gtPA&UT
zUpDhbLWjVEH$OJ0zIXiCZph(zIOd;^<=RvFXR>7HdcB)`|2nJny~(?G$?5G2@U1>t
zeCU6Z^ZD6x{~d`{*{9w1UA;$U<{v|eDX+@bE8bat%;#TAH;Wig;kTd@l93q)*Yz?-
zobiv7nm6x|;El=bJZ7`c1#xmpG-n)pXMXYCy2bi2^RGuGD*KAf=Zsu4pL2gR$HR30
z1sZQ{mPsD=$PYaJvG7qf_ZeH|m%k)=*#(%UHnEv5zWOdw(N&nQ|J&Q%7hKG|TR%E8
zzB^hH*nYEi!}{wYFCBbVet3Al{6}q_-P*Hud#{--4@}`%^!mm=tJfP8q{Uh6jKW;s
zaeQ8>H23d8wa0e5?oP5;Bak}lYchw{KlwQcduO-r^f2$6RyFC(PwnM5_*@RR|DT}d
zEPc=M#^Mgn7hBbGZlwQE(bw-jJ5fbAVL|a0^Ev6Ng)%WJ{pHuoT3@TTKXvR<ocK9I
zU*n+s>@q`#Yw}Xvvm=$dE3DGLT1s3p5Z1UU*f-JCVB)dsZ;WT_F+@%>;N$-pH1E->
zjjW2@eGF+oUmMi12%md#JzV|!Ef?eWlby0Z30ckTHcFX)>f`LM;gxpv_nf7zzw<lU
zeB<HTk~?wcv3vJV_*TV#SgGS8VVcOTnvq(&X{rA?IjwWG+)=hBuNKXJ{WdCak_eyv
zpPDZznbV`Ln_DbbnYQ-#Hx7>~l|$~H)=T@osP9er;9<n@zs~A($p1^<WHng)<9>$U
z+AI5%hxHTl^&YOL9ERoGr;M0SKd|69b%Ud0Ven2fAL;LcJ=0<@bWc9D|Kz9tGj=!}
zeWAbKRk2{_SK;079|u-nF}!MYp>2!MQ;+E1pZ@Es2F9+x;r9FU(Rb^2q)$nhuk<7B
zt9|*M$?7`O>$<+z?6h6~LQBET|H5+JRcuw-Zx69$e|;z6vamjJ_TKAXB=Yt3UtLq4
z-g==SY3}F5xc9qP{I7rU;%WRt_33}T?sxP}_<4Hrq<0F}ssgGvU;4$ydUEaEyLA=&
zUs!6jU)?FBfAZHaKM$`9QsJC>=bELJ1IiBk4l+vLYh1>f)u?bwR?&9ha^BnPyk!5B
z{S2&nTl6>kk)7IGF}=E~{YNElme(In^Qz%n#Imp9ghAW1!{rZ~HubdJDwprPdDwO_
zi~a1|D=SLY*w0stuivz9{)M)>DCXqi?~giNr6k#U3SWd>-yYq1WZ|)yW*3&Pe32jT
zkm)?pwwfhHETQ}T@;}>44BYjPUHmP%xo=C^q=kifGO2qK7~Ea=n{)2IRkLsUh4Pj*
z@w|k0)6W*Dn@tRuyMBd?=-KK2e|bN$x~6Hln<X^#>E!b#tLrB3m~J;+MPcfK*JnOf
zA64GmAECOJefNhC_hZ+4KG^c{gOkzu`&m^Les$lCbg$2tc++md|CU`n`MW31N|$ru
zJZ*89PpSOd7cS08i<7y8Hkh%`F*DLwRCu`GSkkU~zxwjHkLzm{jV@g^PuKdzyVGo%
zUZcc>=F%#5jk)3b<JMi%U46>GZdG&3eBteOjr^`_WaHU{otma=v>$WL=U0mS=RfZ|
z>khRyKjrr(mL8q_`#r1antk6pXT4c(dba9=N1W-lKjyl&$4`7xeCz#J=E3cI?>5z4
zf8^x+c=~0Q{rcJe*34h`zS3S&YkGA?*dG<^2%qzN4?Q!C{d1&L#P{~fcU9`&?XNG|
zzh1vOUPQCu)6+v~%YU;;-+5*{AuE2O_^Y*3+Lo-#|6^`u;(h%?a{W*1U7x2$R9vrJ
zKS_|kxcuqgyMbJhHQU3vz2_dgKRr&YZ`$>{4`%e&+}I=UsF-mmPW|1xzfDf5a-0Tx
z`Cm5a^KSX|owaR6Uj34eu+9Y~^S504neTU6G?Hsmz2IJpE{@toA3uCNR^vYT<9}Xn
zyEVtv>#qoF$3^;<zngrqb??1pS>Ke_ZwX&LegDGXu;u%r`0e96ObqQ})0h7dD}H7w
zc*|q^hUxD^xkYA7Gkury<M*k|^{aI~Q|{&NKd5^5W=xe!s_^yQ{{>GOdAh!0`ZbT4
z`R(rKEGow(bI)Z|>zGd2e)8MtEZtSE>a)vttb1SnF;?N0BKJqV<eXhKf&QKOF42=u
z?LJ$*udw!h$<lxCj^4fZ{`oKWtgZX&IZl?pzJ0{|>wZK2-0Hg*R33b9J>%qGch&Xo
zzsY}8BKParfA?h7Txo0nw9ZS@aqp204eizMzL&p}RP4UD?`HX{jb|0vTeXs9m)YuN
zA9%CZcIEt(^Bu3aT@I>!PgcKMCTzJg^Op48)pa||)|Qw5dMdg<W9yyot{-aB?#Jvb
zs`2yRFlqf}A;pjU4GavN?;U5UbZW8IFVqPa{I&ha`Xq6I)nB{6R<4bTc73($t5)@z
zIInH%rfY9~-}$%n-E#k5Q{VTxT;++cTmS3(>q~`YnpM+6UwyUvo*~%N^8fn%CCA-A
zmaX~u@zne4G5hVqENkYi*1x{1H1FB8wK2P+CZAF7yWI6-J<m&(Z(>K+vUsmJGr`$@
z?<-kxPMdGP*xLlNYp*W+ziRdMWmhX5tB(JTt6m?k`BP^0O}mr3FSLE<x}Sdh?3e2~
zyO-tOb<TKS(siVHdwIycy?5^I4Ed^i`fi;6?*C`khA+9fPXBB2&Z#kX{IkVh{a;@n
ze!tGQ>dT$@tgrFQU#<Vp|LVHS$=6bU{Z>C+6BB7Tx5j*0B74d6-%|oD7G+<oek{hd
zH11bRjR?o%h?m_Dr+>2c5T5ki_dVN{D+zO~c9cDPBKBy#+0A!9yvyU5pSC<#*c-Ej
zY45|?ntOJB-{`aGy-=LmGK=<;Hq(P8KUMr+Wfxwom*;o%XR!-UlKO^+%#%&nRvQZ@
z&Dhzd`1Rml$GYUO=9f#WCO2eWE>4py`fzd5ltg(R1G_b?%WJ>M3N`YZKAzJgW40wf
zp~dq4mgK<eJ34+$e0oRm{A<@uK8{A)zfb<Gzc^5C?mJaBu7mwmxmV8jp7^<8TWsOC
z<La)_9NFR<B6WL>n5ADkD7M@3rEV`cpEEV<x68r|&5+oDs++5K&HE*)thvZ+Vx~m?
zxhM0wW7mhS3A32ZFL-O~=kV2Y{?@1lZxS)f+`W5E?vwqumvE@eF??8HBly$p$Nr0D
zQ4hi&rao?dHnH%3MBJ=@QZux_wJ>_mtbL%6^Iy{T_xvN9SRa>`^SHUNo<8Uh&;M`D
zh5rZbA7{?BH=6Z8TTW?SQGSZc{eL=7lU0AT%cty?UdtJ9_ugU_{S}{G4sM?mq7?0L
ziAzcNbC1%q6`zjP=(~k=aOXZ|`6G5te(E*<<#+GT+<WwP_1nqcIrIJ+E@=7Vm~kiH
zN9}swq0n7Tarc-4uAFRr@nxZ%m#3emH{ZpFw^OVqWXbrP4=wv-vdsD1E6az^o1zv~
zzpe3PlW&>clpA_Pqb6wGyP8w>j!QeD6%N_C{<e+CdU9RbK5=0|XV(#S_xop7N^8dS
z_B=C_>^?F#m_?e&Y0u$Pcb&ejoxP@gXJJs%ckkc3dKJq4#x*6L4`JFi*Z95J{ZdPr
z+$Wt@{X5_2M7%H&uKAR#AJ|o|;UFSdcKt@l`Mr17{4)<)^WlGv_x?3DE?<w!y2!}v
zPCPbQ$xq<Qo(`73!Ruq=<TQ0Bakf~byk50peOL7AzqPDBl5@nJ)KU(e-B&ihZt<(_
zr)El-`ue1V=kTgj_q>0w@5Rr~Si>*<^#xaxbNzjFjIR8@r?fEos9fX{iIWe^X70FL
z*TlrXZ|?gSyZ)wz_*s7~u3Hzj`u_U()dklBT;~1^yVuqwEg+htHf1AoW6j(%1-EpL
z30-!QOaHp<u@|R%^6xe7y>qrN`g8BUyTI4ER(d{6?^k+P%zrLD%l4eBwn%$X$uyrN
z;m2Y(_%>Xc|CeQn?7<DybsF<dl*ugjFsKR<;1Aq<Jkav!zFm^FtY5Z0Gf4mFT4{9V
z{SJp&%ib&8U$y?|wY~RdE%3g3J?D4bgBfc!i(B=+Q~mxm{8{&t{jMM0{F79!Oq^qR
z!}0jEIkiu7jwnrM{MKBxFv;}iZBC=~f9uP;UiW?cTJGXm-d=kD#ocXd^rzfZS+h%8
zU;_Waqz7kIxYlHxc3Ab{W9Wn^oh>Duul8B=T)9?wGwnp!ubo<l`b+;u<TaEXKXPiF
z82@_V`e%n#SlE;A&i$bmWKj{Jf9GA*|F%!RS)wKiE1kY2D1U0*>aT|H^DVb%hW}7}
z7I5&})(s(9o?3-Rq$3)CFJEVT<*8Gbw65#B_w_yR9#_`h__RUw_}gm!=wokKva(HC
zQ_}g$PU!CRj$ZnqeNM`XC7Ya9JIqg6F+V}4X5XZDIsYer{<rwaTh7x(v+u_F?|-7W
zaHru++g%?v?s#VQ#Jm0KS9iTzTO>8!>Rrxud)UUiOUH+&&Y1PXl?s<_5<e7d`#X95
zFS<~0%H4j(!X10^4MRA%n~ZAL`2TM4m^SNT^tSEyq}ylrMjXC=x_HIXk1ZmtK35k!
z`MCIxO3C@Ne%~kGuRg45e>X_}{lBB4t9P4pseS&<!cwDp&gQthmVXIv;maeN*ZpsI
zsjUA0Blo6-pxu|5)7H=b^jH2O>)n6tJ{cmHZkycpU32)Q=KY`NlYeAd&AIXBuF$GU
zm)A>Ae*KH(>G_-U?Kah9R&r?9S8eCd<a|)(bs$9|yh(DU;jCnZrH>!^N2>%iUgclk
zdtmY7d%;<)x-9&grhh0;$eS)+%CNX#?c(3QtA70we#|*r>O^0@=*gGa4-U*Ua#<83
z`YLf%PX|9wbnByTjmD`ytJ2<|u6@<JpeHY0!sY$7dsgZZT;Hns9j7^MzW08U<F0;@
z;`#Oug;L+n>#eZ;W|sVIpOuPMJ;SujMc1F|Gz&C6kYmp%)Ut6ufBE%~+iJSe0t+4G
z<~&<|no0Pc!h>D?7j&1(r+iA;nId?1$$96CKW$#76tbF>OHO^UdP|`UWB(>k?afnX
z?)k$#^_9egrocZn?O*@%Jlj-Xn{@1niO*r@kZ+r&b94QewR_*+>hJH>&u1odT+8J4
zxq2bb{-pZqrw^Za+pF)&Ugq&(y4<&WN-_Iwe(VYTvYu~awSHarzW4r<4x0+>Uzk@X
zU~c^F?L%qX<?BigGH&p&zNt7Z;^geKVA-0SqjhgZ65F;0&z>e!xKncVJ(Jb(^K9l7
zg}jrwcyL?9x|`)MGrisJL`6+jZ}dsz@6$0W5p@!qX0&MjnWG!`b~*A_fBY2t>!Ey%
zlEth?!7NR&chBsbwKppB-9^cF+$n84k8yvydr++HZ_3B~ml2^456$eoe#|p+bI$*J
zG1)2)ro~O4tg2t&;;2`&{NawdJjq8t=eDH$<2>@>?!7m8bsU{}@5;0*-$kjOe3`k`
zQP?_5JLM8f|Mc1S>)SW2H98nB+rztNeHzQ!qMxeynulsuthD_;>2llcmG-y&^fl+|
z`M)VW%AsFA{U%qE)%9ODyA`(0KQ5zj?sv%Cl=~jp=O0XQWb5n^%QC+0w`{ikAy&J9
zZyDE3#opb0ueN+?e6N(u^e2kxRi7$1T@6YUPWrP^{O-H##k=bRLVU_AXS|X-U71~z
zVk!NSH~HE7g=aDkl-Jd5w7J%B_0#<aiw{OmyZi2VpX1K_@_)Hm{`aCpSz3?R?%Vz8
zkJnqC9c%BNZrZ)}`RQv}Q~Lh-9Jw8{$x(`9r~3Nv^<^95)^zQ^o4)_POzr)%rdPgy
zII0_UbNz*$IV(**-%S#WH?1xDc0^*|o4fCe=js#(Tz?(3k!5SXNBQ-&O@8m^`~8WM
z&-?v0Qe}1Q$Gu1V*8gtHSAIRUDo$<wkx#<+U*F#t6kpIXZQG%S33^&=Rxhku>+{S{
z2(6!gWFG?q`(ejfs+?Y|wP#;=*SOdJzyJ69>HjIafB*mY`)|bC)c^nW-M`FT{r`Wh
z?yu7qr@s3AW7faUb)l;kg{{A~FZ4Y}(`PO{@z!|P2ty$gA@(2fp{st(K6W|h_~lhG
zzYUM;?wa`T&GQxSQ&yC}yP^Ex(?rWj%Re|e7k#{%^)<=y{_dFFhg`1TzOBC2Lu20S
zYdbqvzs)`zEF@hW&*${WBGV@8#I}CTjB96R&9Ae5JZt&sM@|9@R1^{#-&r<%vFg-*
zwlnj%;`FOkK|gCIISR3|a@85xOz8i)@9uhKu4X6K1#Ijk4~qM0B9?Z~XKj_%N$t_%
zlb^vnyY@=;!|%fHVuc^F^f<I+J>p-^?!RF7h8R)XqJZCj78<y_%2sI_XT9a_ejEMt
z!?eBm&U;o})B3u8*5A0DRqD&0{EOQ6PUl&4!f&^{Zl_>>)7TSxk2fjG?7zh<&6W2i
zOk1?;X!5`1g*Un7t&}#Y9ctCH%)NUjOuM@4Xx`(V`bk_~$6`fNx9$n-I<h0$W{$v~
z7*6eH(dSwYm4CV_w6nZrXZ5%LtLMG?ySm);d#t`^{W{TqM}C^zf7`M#{<y;W?7bZC
zuW!57fA8+E%`c0d#yx-cxBBb%*Hibcjq-i6|LXbr>regmZdH{KIC%BY0X2`sCa)|O
zeJwn5Wq0TnYq?sB8K0c{6;^gl|MrJH{><VhD}<H^zgiMssD1TSfp=|b(XX#j-(OuV
zeI2{LeEGLkU;lBo1iTBs;Q3qs&8q8B_GNo*udlAG@kw8OeO>qy$?5yg=w7`4@MUS!
z%8<G!)=5GWiXL7U|9n{HIpceS>H}f_T(&MM{1ffxzsB`r%Ua)Mi>+tOco!rat3Ai+
zkl=eOy*D4-7|U0FO}hK*O<-Ni_u@-Bp~Y2S{>ePu=6!ts_jNjR3Z6|%iRbyaNL`d&
zU$<8L_eUqg$Upxat{+smeOKXcy#DN03XW$IyzA^`h1b3^`nzAUV`+`~pD7~Nf`=a-
zls5F9y>I=+>Pf4j_5@vBU$<{(=$id)KTKb<zd2~n{?y<d^JK%4eJqzA{Fve4C$wW>
zr;N18|2rky7iQd_^TCbnW$XXe-Fxrtn{oE6eUm1q#?;>*RthL?{bjQwh-;c<;(xu{
z(SH=HpO~~1Y%Kq?`}WUM8yE4}|2kPOf6md{P<-KGt<>cI|J7H0{k3Pha)E=0>fIk=
ztY)02A2hhP9?5P}*f3$js$-8|)qjW;6k+COs40v1I{A+fckS!1P4R|u`<smBR{z>N
z%RKzo_6I8#rY?v(KP5!SZTSnOr(a*mY&FyRo!qa(_H^mTUGMg2{C*o(qxiC-<%5^<
zB72R7t;HMuPRVi3(>aiTP+)uZsXd2g2N(U{`~HJ#n59|#ggu!nYtpt(4GnxaJ0>c=
zbfv;RAInw$RIB3j8`eM2{->|rX3w#p|II0%`%RzT{WxIzx?lM5*9VJtyr0_J%znhq
zdiu5#R_DVm`lSErU-N#l+T?wE#HNL%y^oC8`B#Ylj87&@c`N6P9rg!PKV52i`~TCi
zlK)pf{qtX~|1ti>-Ur9^qQ0N4>t4CX!g$(sqtx|(mToYdaOAb>Lfbf%^LAw}@t!}F
zBxjcy{fIu@)~B7bCfgt(K&WMZT=AOS77hQ8-Z0$U`?0|3u5ZnvO*)*?=J`Kck9o`M
z|Ia)1V%?#I@?EoDcTK*pSJ(CY_5CUB=l*`*w&%!v`zGPtyTqP9tYvhcy4}$I`_rDH
zcL(!4O()i`54ih%eea7K`A=<Y-<_GK;Uvw+kel4nx_0-2Wj-5gbuRx|)YP_hTPWX}
z)ibJ{KC*K&oAR%aFKP62U3;Q%^44YhCY)ZRJNXjR*{!dQoa)OzOx(A5ALq&!^Ox-3
z<1nGYVeJ9ee+PVYU7!9BPWJV>H_f>5zBPwybxoc1`|VpBv+edbSrpHI6MpPw*ZcJ`
ztIwt-Hvg^uaI;kLhQVtQr*&qP4tLz2ufOM!I)(B3zU<3?gx};HT$Y{6ylMRh&$H{}
z=X6^e@Bh}eGiHe-qyL&xqf~dPSEsM(Bu*;gR^DT~biH&S<6IT<-`Zy`-?ERKKK-i3
zj*0W$Y+Jix`M>I%1L2<cGyfMxN!&QJYL(2YtX#Kcxm@{YCZ2T<3%tmzwMsuDCT7js
zbkC^u8)s-KxjdA(o#^i4oEF_JwujB+W5~zoPfFkGT-Sf!d9k}H_EhcSp77GeF)Mb|
zZ79Dpuh7fLpj_+s2e(MY@2@^pa<u)*?-Y7}@4uny{$1ZM_FN7r`m6q6Uu*R`=3mY`
z_4{WX4`1A|A?0yM#-^iRB#)(jsXrokMd#|Ko6Xy7?>+6Gw&+6SbiWv>3uVVI{r*$?
zbmiL9jq6YE{qaKa?yIcHvpmg;{;4>rIPWQ}y3fIRWPjAE?(XY5oMqmx*d18<O~;+j
zM!ie-+V>aL{;#e-H~OOz8St0;)%`U=KVN@ZDIqQOAuxI4*2zZ8U%yhg<(qLl>iUYE
zoObqCt8N8~M8B(9yXNAz#?Zh2S?bs{Pv^hNvNXP|S-E**it_sywZ+d5-7hb<i`QRh
z6D6hPlUBLqxBd!;=raOpao4N;?=R@RGq0)g+%>)5k?{*9y`Qh&q0`CcCH15DtAN@2
zy+2o!r+$r_vvbu&^Hqv1v0qQ@Ix1}5aOv{m@)ydxMZOp7cTB%_QC#c8k?DF-`_(%W
zl>>h}3N$o$2sl(L|2-kfD^L^T`YT4djy>%7P0j>equ|a=&?P#@9cQU_X0a9(YIy(u
zvfauud*3hhUAwkcZM}N;*7e$N8-KseJ`(yr_W!G&_0iwf#_d|A{dIlMy3&<-@7vE6
zUJCW$c>R2G-i<q4OB0;JH&_2Z(0z?{QqzMh8BU8eg)^*6Hn3Y1-h1#p`tW(_TV|;z
zl%DtK?!NNb;mA{0_YVzIo^c3CyyW|@`X&3x!t*zB;uKG8V(3@6$^2p3^$`EsMMqCR
z-o&!&ckV)$;?>Dh_P)NZfA!pzsa-+;*XJE*kYjP$5fF8{LV;V3Yh6@m&zol-{Q5VQ
z=HwQp{5r}f&~`9-_3_uT#|_L*iG?jUQrW7qb%mH8Q|gK=L+1?X-Vcj%c<PN_nWp?T
zc*xw&Wg<}cV*0hk6D317oLV}&;?&YMuW4*A+(IO_Oh3C~qP>igr$JF=N5#GT8K2EA
ze-y3s@)1ki+4#ue`$CC7ot=!mI<l*Ud)AtHKQzcF;hJD0B;S*sbxUyhbhpln4{Ww^
zKK|JE*I)gA#Qh(F5>*;(=lCPclkeS$us-mB!KsCrfrpFtReeRfU%9Ggn{zvlxboy9
zCOHl&9o*+F68LAOd^~yFU2?g~($MHHQ7f<SkDt3z_-oPMqSgDR?o67?xiw~m<I$F>
z*@ja}6fe)c?EhRL!==ccb>XQt`<v!_5)Y?yiEK#f*^tx|*ijHT&3KY(`Duy8Ta+@4
zr)3`TtXj9@hWu&KD{?QrXETTMrTR*Ucpo;ebxPduEdEGghtK}K|5Mi%Sje=#UNY&L
z-s`JJ%1>{#RbLnS`kMdwpLy%IUax&U*U{pg+L^uIPk)F%aG1YBO_60j<MsLHTs?~7
zGxFlYT3+qV{#r6`{g=Jvr=PBkJyLb_ZfyL818t!P3~sh@9IM$HelmXb<HENke{LLF
zw6R$+rH<|KgO>EBFISd5ILYwV()7>kHOuWM?0UOm-qxbm|E!x6^(L)<6cD!Rm-5!F
z=k~^2ef58R?7zL?s^5bayZ*9SU8eNK?q<i)`EU8ZMtzyL*X7ssS3kqoPxFtPd~MbL
zkQbBx1#8<s<Y&}$-mNaJ7ZX?iQreE=<aF+~X#Lf@dJb=yd#dQ-Zq3b!bI-r4+%r{i
z@}rDnQFrexs8|(pu>7acj((@exb<>d&$zlA=2iM5KYg7<wfg;cQrgD%r*52?W-0$(
z{qg$W6PzrP<<e?gkMz&6liGXjd1_Xfgi)1V?AmvE(P8tMc*19E#k^+rn2}?l^?`rB
z*2i;?Yd`qJhB&_CVgLK1sJ)MYE%MT~tNS0VoTTKCBE#Ln5&m@6qs6EH{!pIV621DM
z!X)F}yA92f&FkbBx2yb`Jt@br?=stJX`}zo*DrZ7;mN<dg(>B$tve!a<ab1bo^|{G
zi0{ph&(Hs8$N0_LHE$AY`y>6QyTfh6LSNthu%JLJ_I~xM`9JfOIN2rI>l;rf1WH8h
zXLICn>b}<G@Iy_+Zgp@#@;W`Oz&#I=-@LG|uxm|XU}$4!U}$GG3~aK|V-rq@n-sSv
ztE2J$)&DQzw!RWyvd^$}PZRf=jNZv5U3|)U@z;xmZhn5neagtr|Igj`ffF*+B-9I*
zzrO9(_m{KqLrA*U*O1+}^Kbo>jLZ8he|@)r!y%_SQPG2MTplJDo<4rpm*<b`j8$**
zqyNtrS=JU`|E=p~ZT0`^=YLbne7xsIeAv*O_j_+bzG(G_IOPL@ZTneO#Mex0pBSu=
zvElf-1eU~1;}4BqiY+?RxJ<Y1UFm##?iHT-2knfu%d0N(Ug+|@y2;+5n{~sx)}udr
z^_Xjvf4H>#+2a2A%KZA-dHEk6rv6%)`Ty*ar)S^Ce-_)akJ~qNRrUKAfopsAhx>j%
z)qTxc|6RwbI{ib9yFNOtPY(HgT_!~Cp-#(Z|CyaLepY-q_-c=6VDNdF@Y;_$nnnL#
zzY=|Rv^(y{$ye*^9lEuq>puP8)^9$wY@_J=-CI2F?N)nPW8t_(Y74u`^%sjpR&#Hb
z*`h0ydSPLTP)nNb`S(@!hyLWvnyC2o$FKh_3p{_+E^QVwm6vR=|L|XO-H*Z*Zmu^T
zIqXkb9`KsEX5QYa-|Kz<1TX$}x>M1-sM0{<oSNS9g97dQ<C~hEuVUHo?cMJ;mb35H
z$?n@<-}WO-p|9y-=h6P(hVBmc`8O(FdcSSMPr2JOd;Xa;hq@kU4T(6$wQR*cj)#&b
zAAfrq6QCsWK1{n}@_lZ77WbUlUJV6ui+=rjfBj109&?}cUvk2RZXD9TrDt;gh{-hi
z@mX$PPZHzPe_vSycUt&At$z3Ti$<i4_rF~`RIi(t3Qzw2SA19KdrnR>d+pUHU;myw
z-E@7Jf=kxic@`Z0Pkug*Z?Zc3{)_O7<qbdP+>_v28~EUQnBBRFFU_0WCx7Do`|t0I
z9jcob{P-Zvvi?`p``!1hadU9oU-mxbrijQXzr>!0KR4bu^vmkHqNTR}n|VTWs~Tjs
z9KGAWtK($&{^X`MM%T*>PdctyOEKnetc{(%=rN~79KV<to0qV(0pEuy#@-K(N_u}Q
z>r^<GEzJ7uJY(N;&g%CaZ~Xpk-ltouuVf`2e5*fh-He6pM+%k79eOiUUq>pX&hI~K
z7iPC`^POLEer(%=b5$3Y%vCU!DlGc{iTjk>8y@R=*~`l~j(xW;-?lOQ$N%g*we>Id
zmcOrSsoJCeaOxjj17nTU$XzD?;@=idHsVc|Wk305f_c5-^#CKG;BbYe?q5F&w)b@(
z@1I(D{~uR+%BhowCHAZKl~w0|O`4qg&#zTd@pNml^zyymn;h5Ia4r2BetgFD)a1P%
zR&!eZJLG0&K4VVK0fB0sGq%xs|GysV_PQ>S9ava;^zj3>%I`0Gp3REXW_-+{bL0K#
zf4iiwxh>8+%Ren(>%CL2!=x_T=XB)j?7M56TiDy9wQ%;6<I9h~+tMVu{<y~0r$1I|
zx`ig$hdNI<?^yWg(f2PRQyV5eNOr0JqQho)@__rI!u!+u=im9aE%v{Don~cuM)dcB
z|F17q8FPO-WcYFN&$9yeSeBkj=Zb$fMg0Ek%Mwh?*>BtftTd|r=<+(;cRGFJt?2O+
zYYwx`y)|Wn@7@~?pY`KvUte!}y?3Ya>9_?gb0(+PcL~pmet&(=H<rnn^$!K3c$%0F
zeO#pOnHErK{AjWK+*-@RKi(G`?8|IkT~0r!pU@<z%@(?(VCCV?G%s0OMK)1irKFzi
zrSnSdxPER3<=vF}F{Ai)@rU;EQtr^j8DWOcD(jAQPSQwp`#pDub@Ebe?ySFD62;$F
zsUEtp_wS5NpU+$0I}>_RKw$H|F!ft=U)P;TWOfS*wENfgTej=Q-_&2<vo78J5i)6G
z+Rc^U&$TXQyMCnV?dg}7pIn|ee|_p-_4Nl0Gv?J-?`M_fy{Hx=`D)W&Dc<jsxh?L@
z@2u0~x$@`d`@$a`ZgvLci)wdtF|OGy{=7rYVX5*S2|vY~`mcAc`hUdN>G`}1mbL#s
z?bkMY|F+5K#qr(y0@*^}bMFl6@4viX;7Pvrr$2GUUD{iterY(&-uV2~R__-|A?w4Y
zT}*o48FY2k>#wKQN?v^(S-<>nxnA~`nxA$1tCza`Sbwtc_59sSF24V&81`b<+c<xN
z)%PFrmluepC;pY3W&gp)rPuF@R9ax_r<kp>PnY>VuH0q5s_k8pdHiLu*N2a>Nbitq
z+mofyKBYmbzet8LhEZVc`bTlz`<>(Whc=&|v?R>)>Au}A*<Yf%KE+pksJ%ali|Kab
z$8{z=7KaujL>XNZ)KHZ%FK_!3Id|cGn*xTW2QmyC0joAt8oxB|-Sqwge4$jL!-fk6
zVmz#UjY$habVMb2oDv1K&YW6Z_()SI?%k3#dwRX}BK|aA{}S)K>gV?lc3ZcHl;8cO
zz4iPZp|$dTS^jH&ty6H%{&Jw?goJmzozT~koM8VYYu4?b_jL`!nM41*&hYM6`CFh+
zbHm|0lW|GL%xMQ-7O^(zux!!wQ&AA(PyO!5^5=PycN71on%QBQtL|*gtKD7LZ})oN
zU4{0d7)9&wI*yIIPa4kXE&Y1;Uxn<3^|qg<e2C|ZkFU>(ER9_9;AAg<^b%3i7fPqI
zy^4aIuD-vv=IY)bFTO-g^pk#a>h$wpjNk=-Y9h*-OotmbT+k8a=5#s~u)sq}h@<CF
zUg>$wX}2O)bWNCZZ5_*l3j4!9`Csm@zv*7ydgbHQ;4;6dj=vX|e}1z(&hFXr$ID%w
zK7Vgt&R#$3VQ;zW`8oe)O}b*@)#x52s6YQ?$nEt8P0M@UOZq6i{*fm#@m{6Tg6tJ>
z8#q@Tx>GB!FU`o=-z{bAC34O1xv(%(h{u%c%Qv?x$vjAOvYyo5ZsFzkRb$)9We@#0
zgcw-Tq}B!W+p%AFy6Rqb+T2LF&4O>qy<LA-m)3HeQ24kd`=CowP{7*^rDf5^J7aG!
zH!w<xOwkaN;%!@)bRZ(bjf>NzF=0iAn~*@~!|?8PDXXr9&D2||YI;gf%a8eRJm0fR
zHCxtSF1d5JZfAbHYE}FHV7_UQe-guO&YtW2t(RAye&peUi;sWI3isW6>5l56nKI`O
zKYacF;hC0C_D5=^<ouiyZP(hpUR3g4IC-Ai(UbfyudL~xSle}3=KS*Q&N<6l!~MQn
z%zMGH-|qaON7dP@vuot9ZGCU}@zRf%stu2Qu2#4jFvKy}>&*-Jet7lH7n*h5pIWEp
zKM9BqYn7bg5vy_G`udHn-{K2|mcM-T>&#Wf%-gfr%k%TUfc-haM?{#H<zT>qh!tW|
ztWK?o8WCE8oGe~n*M9g~B07Cq_ugkSP0w_%<ZGLD&n$F7>7gvYU6nQbucT8~g)jX2
zq273fon-pcPj+9QY|XXjntew+^Z(^nGRNmR-k5k{*~;Z}swY*b?|vuzXy!@T`RwM=
z8|U9Q&z0slZh9j}-$yy-`L%F<%OgAgoeuB#{N(G0MJ-xlygAGKPc2YUm@p}WE4(T_
z&-(3(>*tTl7pz{yV^_nL!R&CS^^M~BjUQXqE%Pt&I_YxjOh!w-b<~b<RlChyFBdNR
z<>`4#{%8PbIqno46|PpMBMzGcd{mq{nH~yTuFs23mk#xuqqSt|nbUVqu2E_H7#sHa
zLu}lnZ<*`P&OW~8m+*_}Y>V?_ZfEaW^66Li(&*ol{`+&@TX$c}oN=4~m;ZJK)=%Q6
zMRVqS*(z|NVQ*MwPN@+8w-=K;zx-Qv%jmq^^k3{^W=db<)=$$pEhGLU>Mm#YTG76l
zKhi3X|2z9%UjIq`?0+BPPNX|aKE3s5!5Pla6)TwTa5ePXZ+#-_!kFvhe!4a~O|P-(
z!k4KnJdgS=ZdsiZo%%H~>Z8}Uxi5EV`re3+sh$4)Zv}G$qngMJH!&_&m&SwzGggRk
zu{Jd&ESREW&c)i2m@uJ5MNpXg+NU4iuO@v8eYz?pFYoE8mJKzP`#<hDf9$S*_@lKK
zf7buozu55R#Gt#`&n}<(_vfXrc<`_M*YgBhZXUn-CI57A*h0=a{+)ln|KG26ug*VX
z#SNR4_e9r!-*<nJ@Za|uFKgF3MebZ@A;Y3D!|%fVBRzqlK@!gUSQur6v==9Q2zc3h
z_gH0GA7B4>Gwwg<CyKn@woxH1XjAK%&`QsIG1eOmaXTEmvR}+|j-O)Ql;INiZpu`a
z3lpMslV*v?{K$K9N;b8!?1_I;r&~<GH2oW!-|kCz0A69O#Kqdw9I#-<2{Eo#mV*u}
z0<y%UTb<OZb}w9eP1C$gP%ZS$!%F3bS^q*;{;zJk_*bptM%?}-|Lco>br!s_X|Fwc
zr2g0WS8|N`Ey21kw=IA4|6x(}a;drV=C!`5nm)7epQ-%YT@R!dZN4Vbo1DJy@~Wwq
z!){uC>b~|?@Sl4R`;Y9~&)$39JU#1a<Lu><!GhPiUl#wYKXNE3B$Dm+sw)!RMr)VJ
zyweIfw{?5&mflxK0~OPx^lGEI<{WHi%<`+>w>z7o=}Wo0UQ^*~E~9PcC2GytHsJ?W
z-kN35+`z!Vb<S~?8fO=4(U+Fng4fr2x_tlp>r{T+`k4JOyCYUz?R!_c^Op9XQ=jil
z4~={F!F}J=$IXlfW3KHi)d>~*)3CtLc>1xhy<EKC59~PnamLy^EdQH6x3h+XC2@V^
zoF{hdgJL&N>VeEzZr^pd+mE!a(rsK?Dlb%?$adQ6+5JY92<0Vd?HxQ1Oc*1Uo_zbi
ztaZO{*v6V4-l9+byZ_r5zdn5R$$Hseo7@5_#3YhW9ez{$ou^0T&x1(kBhlL@e+W3@
zG1D=<|CDam>Hhqsx4-#2GEZ}4XPxk=Hfw3RXxGQ+BPAz)o$9s7;;>X>miyQg>z>CZ
z?m5Y1>#Ze87tj5)?>t!8a3EmGHH%X(dc7A}SiG#LpXYHRh)eO|mtXOx&b;PPP!Y&o
z@^x)>wAZJ&pS!yh5AHc|+9Bk<fc+K~y9~GgpOaUX?c@K%$H|Z+@vL>tfqsEQ{`G8!
zS3TKj<@_u2^+oxYaodzUCrwM3{+6xPVtt^on(?2i{!NStX}QdD@siUb;~z%vzWeU*
zOW~?RPRF|WCGQ?jk`{fm#VE~M$?W-!H@iPuy%M!rcgmshWaEin;Wd1P&y{|hntp5h
zhsRvr(&p1rzlz?EYUY=&N!WO_Eh-}C+~(tJgcFlb+9*lSIadAjOZmH9VOIYm{;$5O
z8{>X$wdTA_O@-6D-q+4=vcC6T@Ops9uXU|`CV`9>G$(F9WPa&i+>@ufU&ZZ@d-^w`
z<A2Zl`ir&yV|K^=jlaz5$nf}xQ@g3F=Or$?Z3cIz^)WjftT5rYAt2wE(ZUwBIZLDe
zXwJPa`U&ojF5mAyu-ZO<xmSZEf6$hX|99Wa+5KwXKejEx*XPvl+Uv}Hw`$YEs=3)a
z+4JwN{&T&4t>^35>uW<V)>lo=aOCde@G?q0zlm?#WrNaR>1LwL*<Wn`N9~VVeYBKo
zjr`hjgQwfeU*A3VR7FGRkz9;QH$(cq_`=J_zi0j8y|YA4%1L5@{S|xd4@|=A=7nFo
z7WL-K28riSiuX4ES+Mx*466;*DzWPWB3wE*ZIteL^+EgP@sFF{^&WfC!4(ivcKPTJ
zrPD7`LhIMXo_;NIZ++bU<*ztT?*8oBS|{<;s_WgXKNqI{tvzA+{f|J)3EP_J`7D7G
zE&u(|QY?GvYg%~O{r{x%^@Vc6d2b)4`S-S*-o4eA_1LlE4TsE(745wHMGl?qRXFmy
z_8RMjKDEX}kM4w|s73S2oV5P_>SF2rR~O~h|9bi{P4DTc4<ZU3@p>G^0*|lX+ZdoQ
zZxZ`trm1`64$SA_TKy`9&1PH3{==&@Yj|xq?}&YNVrja-aVcwx$oleY@qb;@W?3`~
z<z7D0{BTNLlvAAGhKb9s9y-17Zj1Zl`6s7yhKQMomu`19;I-#xKXUzXGjp11`-$zf
zZ+{+Uy#6qG{^NN^Yxddom)Nt+%J;reoN`84XO2kxltj%{+XU3&!wl+=UDipt$hh~j
z`(>+NwMU<wm@U2i!?BM~=4Pw&gn#5U`uERb`4+B^C+nre?iYAhT4dc@tb5_i-6p5t
z$zQg0P5N6Jv+C6DI;O=BG<V--T+zfN_gA%Q%|1qvyJ@Q7&#oV7`p?pQqa^)o+N%E%
zc4c-N5)(|SdyJetf-H|&zr6O-$vIkveSHs;ebA2b>0DeN(wL+!IaVu2pZR8CJv02<
zJ`rV(jaQFLda+#&v3R#8(Y)yIsa#>V>USOP)z1zJY~Qb6=vErCbP}(YLrzlHRD+Yr
z`72-Szr93iztr`Uh9UDG@kW`PzxnBZXp?NZsMy(;4}%t;wicN%+l6Plm%Ll*fr9)0
z<fh9t>)+P)%99PhVQ_2WzmrqCBV=0Azts6(J@+d5P+HtQ)ikyDkvF7{U4Gv6M<d~C
z(!w^ozdV-59!oe(w3pH6?=HE1<I*051G6jG*T0XfmYV-W?Qi~9S4qhLr`Vt2TPl<%
zXRs%#pVXed?xXA8w`S8kjznjFFMl0>XQ}z8|NCOqpPu|Eo89)lr;#!K?BZgNeLNSf
zre&yp=eZ`6Ic1B*C*hv?zgN$CddPnI`u)92*LI7%Kh5|2=CSv6f&BTgi8F4reE#~0
zt=;8!@VQ94O?CCHl}U0}+RDRbuJm3q|KIbe^@{QJvm<9r{~vH=N!zRqB~mj^JlX%B
zUotpK>>%Gn`QBu0L2kC!(lTdi52ol!>~>$W|5sc2(XTTy8XGpWe_N-2W_R{Kp<V|$
zg`+*X@;|hnw9gA;lk`5(yEwc?^6tsk@r$q4cl=nk_u%6D6DD|HwzK{2YUIQ7<D1H-
zm8C4L)f_Jkem&LaahbW^sra9i;Ou4l9j>IL|1>sy@x=XSvs>p=)iw8@{NPfQ=br1G
z^YEdV`qOXC*H5ID*sNIpV_kNlK*IN*Yxw!q^1DS-8{(Jze)??c>7Vsm3!L=qoZhc~
zRHDD@XjZJ=6MOT!`bRhpJwN`*A}=+zcHh*C_3v3Gbx54-kDu`A@$0i`-}b#RJO1~_
z1oueE{WoG7zyDqK_S~{HLR>n3PhBu*pU7ffvd_fIwocMp|CI5EMx&?I`h88x=cla`
zytuJJ?sHVudG`(0OV@b(k34Q-;2^wZd9)9+b;GfXr$rWLw7glXwKV&7P}G#KDekLJ
zs2(e>$}hbtUj6Sr)7jSbd*>gyH*MB~RlBeI6x94|a|m&L)cJ4qLDw*a70bM4|J!@~
zOcAs83X@9{#iq<MaIyUn9BbLyx`V~*@`3x$M4PT3S?s>fo+mf1wY$83>y@kZF-vyu
z+`2j?^!ls{dHWWP<x+Z@@~nrLnR|+iPb7bET_)x0`|$3~()8)y_B=gT+vasT{6Wk6
zx>Egsju!_4ixWZ_59?n0Z)$HW^y|rvM?9x>KR#pBnU{K#>rB_Si@9eWD=2HrJ^aM~
zwzyCK6R%{M$r*j!YHd#GgPtoNUo@TUC}H<+p5VO2UaMXwtFfjow(?vN^3>?H=c#Mk
zHcsa}aL4(}hf_aolkQX+Yvq1Eq(1B1+r!>_f4J}GJXsxmP@rjY%0<?@U;fQ!N&2pr
zwKgt#((8kcYj`Gfu8LTFP%&OSH;&Wd!i9VT{iAok&x`L^^`l&QrTXLlTR%%n&HCq9
z(AgUE?~Lch1ic$dZ>@^;zpqd?w=De7*ng6565H=<Dq=o*zvR3hZ)7e!oxS&TWucOV
zl||&V(-V(hU&ykyV#jjTYqd-fJ2!hSx@k~#?xVr77@>@M*}JVPuSxA&e7yMk4xM+g
zrmUY6vvOuUi=W`}ULtH`#cW=aGZW+Mf_OBh@9(<Bwl6&7VAGVts$ahdK9V_g>ZEDG
zmuZ@H${#w`{CGFDCvZi&+uG_Mr}oAw^ep#!#FEUSxorR1x}du%Oa8sA3oQP(>+jL(
z^+Dpblc!tS)@_>b|Eo%p4Yy9Lz5bIgoJaq-N&igNncuw0GTD9ke{lgn!%u0-{59QT
zUwuNu*3V~q7W^}!L;dRU>CfLEnId@a`|_C*i=K77U-EwU$DikPc4i*DI_v3)bgrh&
zcb;hc7fIUQA;w}c^Ih%6IXk93y_V{jTA6T8#^v`nohEkamCp`8{F%%0=-oQ+ck92;
zfBGdu<%37i?YQ`z#!DyoAN0@qw*Bi0iDn-6&f*<=gj>7%0>b967Z!6!eHz+R^`mWS
zr_j^A8~x%fzuLsq-#3llQ<9*=@kmbm>E-D>4Qj4t8h6+DMgQfuDk^@u`szQQ>+koy
z3vu69-4wfji3kVNhJ{XxE&ka4o6zaf#mZ#xaH?}#X-7fz;dWz5+Ydbp8E!b_@vUER
zx-52cL&20#0p<GBfl7;v6n`b%DqvvXE^wTs*13wc{6$=7nDUYAuU2(y%~vJt@BH=t
z|Fylp-}=q}|35za9%ucUJ?_nNlfUM@o|^jWmG<@ZCPyA`nf&`k#(@Ke^hBK<_Fr7E
zBjJ5q-p@tMuKFxZ7pf0R`q*Bcpdj;f@w!;=4>kK_tGgMcBzc4u+&O*ldBf+U$5Pk5
zGrm;$L)dSI#?skA7nyym4yRVW6JDY9hi_(iv-<5XW&7j)Myek9b@bQw+SPKeu5Z}L
zB=q`6!0uh4cghq`UlyHqG-?07f@Nkq|MC3*t&U0BbYrn-*8vvs*n=+4&c8QCM0OP2
z44kqrs7aa0L!g<Vxq07&o^ZWczDaK8cQq#m^%_e~6Me8mw)atw-sxaTG4EFbk;c2c
zS(4@IrL|I*v7eu&wJ6|&6<3CLXvJ~Wu8+HHOBCdr=2;oaA3F8<jb_Qj30^xpo88q#
z|Cp5CI&!=0eL<v?FXO+pcM2VjMpze2sB>~r+<f+cL5>WYdw9{7Z3SNwP0N3Yg&fye
zd26e~?E}lp_RGgsn!jeP+8Q738FXc}=;TZ0k3AeCGf%%`UBuA!h<Eqy|12A$A8m~N
zDWb4=O~apt!x9!hc-T)rcy=H|R{zYIS*wpTuPNM_`;Xb^TvkBBHjV|0G+U}$AMYrA
z(sIf+f@$>=nMg0U{YSG`9B^PYJbSx-`*QvEhDc`SCJirHjRc#(nOE0-UYWY{bLYg8
ztIsa-CO`ehp(n*18Xnaqq{Jb7it~i%mkm?TZkW*E@ZaiDwdIfU`i#x<y)OlYn{v6<
zt_<FPO|v#@UG}>A*04DLl17L9TrE?yPw(<M6VaMwwa;YV&e}qOiATCLRz14@QB)-T
zh5zn|_0zXzK8&m_`W3NWZ^!<teXl+XFPiEa#;ksbL2HrSQRA?W{nfkWy1yU!z-6h)
z=Vf#xy5!|#oB7As92FxrIva${y>oL#(*fT+pI=Pc-nwZInU>2L#-!ZX`pEO^hf3Q-
z|No8>p)RMhS%d!VwO@TC-qy%vk59xo{+Er*N*WKG2)w})7W`m>`gVr<A94Z(dH(!U
zS!{Tq?+72`;|psQj^Af;;JNF1gumg-$4h?~Zmdb|irN2P_mI~WAwGTf=8p^?gYwxL
zCkpOUs5Y2Ag-IttR<Gesi^U0MAsf4&SzYDKp_Lov@yO0xBM{>GV8_C_kv04N3#c31
ztuNiLqR7U!FGi`k>Ee5V<Xu0WSpG=Z`AU7Zi`)Kf)_R*hw)H2ypVoQH)cZo=4!eUU
zi!N3!cS^pich9Kr_xGb!``$NI?4EUD-s=6F9J}uAZIXTWv-Qf%2|{^i7SFJHI#=Ok
zR87*6H2Lf1N7<iG6Pj{J(RY920SyCYhO#FQmDCa@HUBHPkRNpLPqSOp;s2__U-v2W
zpYIV5Ik{DRd7Sm)wVCfcK77x*zH#f>aPd6X{OK#%EEjlLdv}MF9A+vv+@kd|_He0G
z#?@(}|0Yk|aQ$eMrf`qHZ&1c}r-+W3%0j6dlIKk?O5SG4xzzjGs{br=kJ$EBln8O}
zzI%7^dqy@X-RFE<uMc+2tq^4Uk^jK4o!7iUitj7;Z++gvg43@LG<;Yds4mJf*-eOh
zp>%Vtg4G2Xv4!mQ_AJ3OcN|~vCy-C!%*PK5NBQjjPpdGD2>m7~^nQ7Ht?JXqJ&WQv
z%KP@+iF*~Ccq>&_%}@Hn#iWag|E$-4JigCD`Be(vSH^mNlk2-2BO32)I-|V(@2Oa!
zTCSjH_rG<rI_e(m-=(+en$}L%Izb+R{_7p;jT`g1rfKV6W0ifa*>FcijAM;ID`U&*
z6f3J8KhDMMu@w|K(kP*JaDh1^V~m=5-~N9JdG9{XwbK8e)EU&YUGZRN%)amO7arxk
zuipQ`P50iL{5?WaEonC9Ulg{@Y`c?p^Y1CgbpQCKMN;!tKl1$i^x~the5VeyY?>eV
zePYG-rRN+IqB$cr?f)-$xoDPM{Xv~4LQh!(tp92KYu<2PbJm7Qzc^Aq=hzjk-{X)|
zTko?W?)6pIvrH3hxJw&Z{66S79MjgnUM43I9GuL-uVz}@Gi%bm<<6S(=e&Ni;`i|n
zy)v0!MSeaLkaW)4%ssdJ>NAt_!&T}@cNSI}M)Q3$DB2Ko*rfk|y{WuI|LNQDb5{E-
zTXlM|aXgQ-%_PZ-pDNDn-*i|>qn^cPi=>X>>3P~0v%j96|4{V$Av>-n(?@JsSEjt(
zV^YW$w%oyD|BL<iTmIz6-l}c;zA5IVbLaebYqYoAcsSK^^P|Ij_W#Q>*Vdg{y}l}l
zN518($C0Ef`&MxrowH-I@s`Jyr~h6`*kmku)ob_PchBB6%48|E-&cRXx_7SXS*{cb
zYp$$?U*>JT_Wwuy1QFA(M$<h$=S!^UDN~-8wausY)A{s`-&WpiT;H*N_k~^!Kkd`~
z*M2zuy*T@H(!Ku^Cfu3v{riu%X^Y><M-<I|;{4fS8i&iYd(o%AXih!-^QivE?oE|P
zTEyG>G+x`@lX%8sz2lbr8{z5GIKQ9vPkDC!V0zok-3(TzZtwr?CCfJV=P@2O{=hd+
zZ*1g}uTZwLkUM^+<*3cGCq2b_+XG^BPk6VjTc3ETu+?!|SL6L8trLBjayK+MK4@^f
zia+_aT=Zx5L6IklInO%VH@thl_xzv0M)?5K=Ya+d|B4)~dM<x`t!Xpy&|zECg8!UF
zHa1h-uhgGErn%E>{f~Pcr;nYe3VM(_weMGh&z;z<oA}gsJ1V=R8eUUTxuGQ*H)rzf
zNAb!kA{%@!sr<^nq0^?ZLt|}$)30yw2G<_)r0?}l>3o@^c7NeB$-BaQ-DOLDFIQR2
zHrHX@WoF~^Ym34Z%xjNsT@$qF+`g(UyS^n(y#JSDR*6%vsG7#B{dsa<<C<hDwCqG4
zm^JrC{12GB(_H_!Qa6w3n)RR4w^mv%`4aUvYtQtWX)WQ;S5<!hCw_5GSL&aMmqNT>
za6Dv|n4Z7)Q}~0;25shVY>r%EVa?rfrSR)I*)KiEs;ph4b~7GwjlVwoJp1eEPv1q=
za?9@i%JP_Nw_@DMj*53z_J*F9`&kz%G+8FmY2q4YF_#arzl47<m1WPES>C6l)%7<2
z{RvUYH13;a(=x;#olwx)6P4u`U8lY4>@4Q&%<#~y4R;D9rpjG>eB&0&j^6d{d!N2Z
z&|W5+)svT)-5x)+`svRWNsES{@JB^K2X8-%P%9B<TC-F6Z{&hQ#@8>K{ZUbxzC?e?
zyBIHh17+(S?Yh0yi%#v0Dysc|!hio85vfS2Nm<qQEY}U6B}(1=F=^7@NuJil&;9Rw
zt23+KQNAN&dU|!p>rZd3YGOBB^Pjz_D!xNl=gl3H?@J4uT!j5UZmQ4TS+)L<+G7<9
z5oKfh36sC=zcc^#^rqV@gkMWvtzC4*UF%3y!E!@)^;QoK3)Qx-Y@3cXeEiEe`*yyF
zg3ydzf+yejHT`~O<IpIa>fgPl_~@rUrL$*EG`v5zLn3@@V}J4F7v-XVckFST5VGc>
zO5xKLKaVE4vwcq1@5#)*vGDU>|7k}Plf&Yl%n0CL9A-WD7RRe~`D#Ktc7K0&$XBvT
z{k`(TmtW49TW{G_(SBU`jpPr%FSjcru3vxnd+*Obd%yp@d}fK#!D*9^X6>1OEB5*X
z7wO3AlPB$F*|tyfm#kTnuD)*aGR{xeXZ<_*>2B5QVw=71G;jJI|9ib?dPfFZ@zR(d
zO_uq6dp<}rA8D1EId3mFYoy7J^=hxy$Cu@tm|3?l``Bc?3RCgeWwWK^5}R6h@*EiE
zi-~SpGiBOD_bq}I-BBH1{PvxXx6PjX`1jI}<{93>IhW^5vlOl0Tr)p4`#Gn^#ih6Z
zE!5EqtyM_pf8<kDeb4`aYTJ39&a-!)e26bSlk)Qhm(sDrx9?2aFCX-|S@(Z&PP#0|
zhrdaGllCda%&e^Q>s+Vb6wg`yzr`-U@8v;-1mRZMI}Q)?J$ABfvSN*qX>WMoeny5#
zoUuXhW>!R(Yp_=LBH_QTVmzYH6WwnvVA>uVa4|FC?vAZ*`OgcKDaSrcblkzfz@y?g
zOP%u+YkBDlpNR9*kA&;y?X~w7jVaUr8+G*VN25aRYZ|jV&0F@LJ!p{;ah&^A`1Zsc
z=V==z$3<xLCTz{!J=@Ft1UuJ<0~b^G<ng{OQWl(1QhMyr!i^SZ+)~0DQy1rRcpLY{
z)U!PP!@Y3RWStuB#F-US%HQvPD0fZ&x<>b;*V8j|A{O#_T|8wg^B}VPP;v$nYea2?
zL$~<#ip0}AKI;}1{8^+~F2Fa3?~HEqihmVZznd;E`EY5CjF1acjl`W*aWgD`8tYH~
z@N^Pm^g&ybZ_2es=>iNa3=Sy@404QD_RDYv2rx0kBzgzBE}0*=vo_&<WAloswVNYd
zj<oOK|7Q5^@hMfMmwh=whQ|s%K5<Wy%@@A(`<fc-1q0qMd1uV+FTY)Mt@+QS?YXkD
z&o968RXhD)ha>Af^QGm5`p=H)mzl)rNf%mk{eBds{__~GmWyTZ{o=&RG(EjVuN*9@
zTwh)-*`fGo=OM0(zc&7hp1=F+x>xU>$=+DOT9NOO<3DT1|Gwt7-QOGEG@QBO%{5`2
z+`fnicY}Xo4pW-kS^AQC^&{toI7PAS7kE}Rr72cecB}ZG;KK|d7c5x0gydhHmRKWo
z=UrU<0e45;<vYx7ioN}9T;Kn~I=f_t69Y%XLIH+53ZY$`XV!3=N^mDNZ{(Zv@%`tz
z$8CZR>vuUcGCW)<+4Dmnckx43*#LuM>@T9&&64-==G6Txp2N_6;7Yw$|I?lJ`s@E*
z6)qS4n(=q{Z?*Uj0uTK8T4XH#ZK|kPz-D)-?bE6iGB0?RU6}VFS$|!if)A@r0Y?lw
zL;noX?JrGpJ=EnI8IQE8G({(5t+8*M{_tP-(-YdWoC0R;kDB*;ddH9cYis85a2_>M
z>gQzq-7l8DNAz#s=`+WaXB<p9G`CUuV6bC`qi5-4W1gwk1$3&KlAix+I`Xx7ozDu1
zEmd2#lss6f|5ryytitE&N{I(XypA1$lbXc6^3T0*?hO<$IW4tiPUxSrVNJTa>tw&z
z>#vUYzW!QMZ_Pc1oG_`)mZcBOm<5>*$wj-{8l-N~iaw*RG3Vgb<wCjTEi-&sTSPfC
zDj2lfIV&D8oa5oVvV!4N$SKXt`uk_?h5H<3LSG%{UZ1@C$g8c!hZZsX;dV@L4n5uI
zP~_n}an8euA@diw@-YP-ED&Kd;i|YCu};QzZmY|x-S<z+=|~$C{AuGhxOLN0J|ouo
zu<vr+)3pcrlC3A@>wk^oQmjl_<hsk;oT)%<$%X@|ySM+Yte8{sz|TH0m7ynd-T&H|
z-`Fn&Y_gX<B-bDM>TCJFe}`Gl7)qUN>6hl_o^wR;=*s^Ib0%0e2W(p2@bvxy;o5Ey
zvjPSiIUD}t4G*oBu2@uB`@QQOn<2-GXtm2Zzs)DVP!Q;vXJVtZ_?%0DGK0I<lxvlH
zzFX`FN#6g*!}Z;J`#&?^$2r$$wB4-zxBT+@z*NN!zYEd}=RcO4%H}6~y7AV&B<DEh
zKeaois&TAu+ZV0KVs3NK-$|*IBhpzY%T=gd(qfKnlVQi}I&asN?Pb&19+@8N=VPg!
zuJ<oz4|l=^o&}R*)D#vSwf)t7{rD8``|;uj6cW!fNV`;LG|GHCecZY|By?A|!j%+<
zwyo24+8z3M$9{2KW0P0wPTobI<L5-b3M~1U_Rs(Pi`+?!&gw?IZydYRKVE(%+M>Mf
z>HR$q4+?LT+Z!98ZF|4;)8o%sKWneXs6J*^zMyu!$>L@I{#3)6)A!y?`dLsnJM*28
zWsAtn`u0Wh|5p_LF}5qx*|LAu>x)I}^n&XCzur*&v0wkrw)4|}<_O9}JUxBBzoTfw
z5?OApxW6GD76OX}Oq16e2ezesP%-=}XgJ}4TZWKNPW-w1mG$3x%mNvuB)mDc>Y1Hb
z$yxoAcZ!63BGa-WNsGf;|Nq2pc<?|mq`6^bPuOM-3G?L|zy3DG)Fdi6*IhXEVbc9Q
z@`+BtdaiQX2P3YTgv^@Murg-@W2b!m;ilBz|Hb_uF7|!3E%3<sKY~-=ugdRGvhs|0
zJa78lQbFA<$8Wr7o&ES^ZRQ&HiJEc>H)cu|)!9vxdbqW3TFup#9IyKK`;Ohz*|qyi
z)cQzY<KP?KYTpkioJflM%(djKfW41n{nxi?C$?#we*g6@?;ZBEYkB#d89V{sQ#Q|F
z;9zK9X;~~|(7LnEeUIan&Hvv#VLp7qTJFB+aZg_fogJ(OQ*)D4CEeFXy?xPC<Wf}I
z&bfP)jnOXus}Y)}`)y=&s+T_e=J(D$b;h49-XEO8_vs&0++qGpdd2kmhnx27j8}eA
zTXi(XW#2~6RY!MU-KVudok{KpPuHgeQ8fmp?DRg*4~f%nPgAg&r`um(74c7+Yc6B|
zq#N2fI%~Z=Z08sLKPdP8t=&3XCSPCAcL!@{JxodYwc%#E?7!&?Mf$hj{aY9Grc-~C
z?`r!F!-B=^TkeN9H9uWseYNX@yKsZ_GWqoz*1c6`$xq+4qDDKs?CFY6{2%vRG3>qf
z!N`e?N%rENGKP)U<-07tzTh!i`0werww~Sdj(-eZ6MufquVu5(sF&1m9-NY{;`^({
zqi5>nbw|bR=NL%ao$xbIuy>si`%L3h_`l)}zY14Rs;vFr=8}5qp|rd2lz%6m<v%e^
zR(Sj+_P>ekw7A%seSSX@rYu%DAZ5%Y`s~!hSyLbNFSmEI*7vymF=d|jB!{;4E1e%(
zFWlN`^y#PFls`@)y4rXDEm&9JAo$Zm$Nc||d7s#-SGV`wH}}2M`tR$>Z|{~|emJM~
z>C*fO?FZvK|2V2myVAtG{Pn?-iRToQ3wjr_eU#&{`Ow$g@Wt1}$0+ZEwEmK)2}Ral
znv6==+FL4ZH8$>=TK%<?ONFa9@Ri6>y9ME<PU{4P*(UbpJ;+`4h&S*~Ei1>V*Rw9G
zuN6`#Ni%49u2?2hcWiOu<O&7FsOfBPBdmJ85AB-KGOxOWtDA@4-jg|e@<(a5XvSSx
zB83$%l`Tz9{gSrq5fy#QS$$=?$NXKZ^wkxb@7%Y%CR4co^sglsoZhDk2<>-!CeXBz
zL(HaYRd2?kV=Ct|1m-=HUR&w$jrZSP?>)jdQq64+`&{stXew~hZ^Ln2?JtfCIW&s`
zYEpmN*B3^-cC60$F#V(HntfScpDkBBcRqSn{q-Fxy4pt<FDzB}Dh>JPzhT~+?GB3k
zYt|i;ob^lj(S_6Bi=Hi+n08z$>m&DR=5MABFQukUNjaqIexqgQ-F=&!l$Cw{ui-x6
z!t7xz`Si-xP}?8#Z*y+_aox-~!fK|P_P>>WAtt5mn=+Oy398ZKV@)b|IHMiC!|qe<
z2@#`+*LxYh?OV;2!fYB8@x^A%E#bu*wv?xO3hQ@s#1|IU+_*9`tU_b$4E@5=H}mVx
zy6xV5dBvtn%$uX{cyoOSFk1hmY<_fj(wXL*ddU^vmmhJ@skh-fGB5f^#J2gyEIY1$
zNEY}%&FAe)^B-ALbGTA118e5HE`4Z}YvsJYfA4O-sLQb%oOgWvZ!k-Ed)y+A?|ar{
z$0nWpvwDl5X{q!1t@<mAtL!SSENP17*pW7m^ZmR}TTb7&H|0S)<K{B)3jz`y%)3|J
z3F=&snE&V0>9bSU&Hb-1!7K5fs_kU%a~|8~{s_`jpYX`6kgrnl*`6);vfn4U6@L*w
z+qw3Vu3DnsmRFN6`EJNbboG(Rv)Gw4<L?iB=U3vhuLhR-?mN~qU;b#KmB-3}OJ1pG
zU-SeRn7tQ%+aAFg`*`kQCHn>au9Y0YiMs>;<vcT=e1Ct~-@EIdyeo^Juu^;bhkKJ2
z7xe3T^}qh5fAYs>;oqvyRcCxW{>jBhJlJYZ=H&G!f6o4C@4ug8;(OI^$vt~>=dX*K
zJE3)xUeu!i_wrAd?LT==77AUjB=4ae{`h109=Q)Qem~o>;(ye&X=*3Cil1D6#QpGb
zjbG#C%+zzbf30eN97y2csj)wBntvmo*`H$z`)>wDW^kN-{dYlMtk9;vRrztVFG~8?
z7CaX(KRvx_wb|`|oC}w)y}7Gu{}Y`I0kPQMP0<GJwz6Mc{;pU564ZSCSiR*ruJqmF
zpZ-h}Fsqq&WT(X@%g0wg)z?m5lj(ITdw0-Js~!8^@4jXysgcw-$?xKtlCa&W_hXcG
zpM3j&@$)av33{(X>+clbzxy_6Tled!m3r^~2r?x^h%u(`VoYS`cE6(^cx1!BpBB3`
z&)9~4{29l-+53L8)U>j=|Myj$*6ua=IW^xP;9s-TtND_rbv!0qbh=YvI<>xrfq`q9
z<1F>gSFH65bG!sz|J}OKynJif#@O&nU#pHr)c=2Vm~H?1eN*p${~L2}?XNHM)~%24
z<8xb|ufvnj5wN_xoNYDx=L7qsxCHOjl-Hm8_Tzhxz~j$1*4fwbxUzru{qcSx|2qly
z%y5;?JgcVl%XPM0%6v9Q-=jKt|8d4XuJEnDo_-2=x=%Cy^}VP~OXFVntH1xX{nn#h
zRbOA$W`DhUYpeMG?@e-TwKGGMSG9&u7WkjNx73V>gO`!*w%C!cMjyk9cQB=}F5J9R
zcKPI@^UND{4mB-$wV8P(v-<2D*?rmiYY+V3YkPI9Oug;kX3fNHOLugKR4q7QUoW4e
z8Sj(*JwnX2hgqZU>#wYyoy!znHA$Ug@G&-xXSx$~p_*0vlTe`4oQ{3PlKBTGY}_?n
zdRqVEUhT>q&PN|CW$ru>w2p6<$cAe{=YG~i`~7A#(9k@jUBA1#`I`8L4M&UlxB~AK
z1T`;p^x&SOSMb-$uV&MNEe<b(g)-EgkE~x+_CxSsMBd8j$9JtfZE$(xi|Y%wepws-
zc<ZGvp{HL@v;JE5Q@k?4q_IKa_+s1lhq~7I+fP5`#(Rz@M5ya%S?8l$%=;M90?Vgz
zUy5{I=J38sW<xXg)!jB1xs@0l;?4X%TfO0Z-*AZSeB-wzO$$|jZDWi{WKb?X#L&7e
z;npLT)*jg!abX#SrNxFug4e|t%3Rs6uljzPYxixNCaWWfD<A&lUG?nw^y{4LIx8QX
z7JeOK`B%}N=@}Ehj)}$(lRb*c3d?$mpE8)%Y-M$ju+Iw-7f`VE>iD{9*C8c04!3m?
ztec-U==@*fVWi1)>40&6T4^LZm-YW2OdfpJKe8`R|24tw)Ug%kGIOhbOud@Y%EV^D
zakSMmps<6%JGzpG?+ioKep}(Gi~7$@c>YjJ_4uQ`#+!b&Y)tG-Hk`<2wV`Zjp-r)W
z?uFOJVx6|F4Qi5EbGV-=WhpPpzTmn#bp88l@vDpet^M`A^tIme#*3{T2Q3)elbKrN
zZ%%$-z_w%!_XK6}=1o6$ie?FNx~XJNT397fuXlK%*t`#$c@`R8JMdt3L!|%lQzv*L
z)V}||c!YbS;+=@>hduY23}bKBe+`RRx<CAH_RgZ~a`EpPTpK+Mj7@wG9e(8gFx)2f
zrM?pblj0ozmkmLS-pnpnXQ|q);Q4Vb+Xk1tsY`nGnM2BytsHJ|WG+2@U^A1oY0C8#
zwlc5R2uRcjyyUNV)-qX9`i;<|AH6Ccmw!{usxMz-+oIq#M?7Imi>Oe)<ZNEy`-k@H
zOL(jk31$rKtA4(c=gXGXJxg@cl@&itsqO2#ef-E{C$Fsr7bfhE@JX8Ewk_&v>aH(p
zOTV6ew>Rl;>UrC>{tsGi@2@jIV#EDxSxd$9gU65Sz2W@uoza8E;BuSVVYMcu*z4c8
z*2(H;{&`nAKcx9Dd&dR_0TpKf1_6N-35`nu7d9j;ir|p;zW9Ps-BG4ENtelS#X=z$
z&VZY7A~WS!Y|Z9>&55@Cc7)^6Z}}HT*U4QrSM@BpwMj^G)>pxYe?J>E*)G(Ne<=Iw
zYt`3}a%=(Zjc;sp&eyGu;1F<pcC;`)>O#t?ob^SkE4X*?Y*e+ryMB7tUEQKj7pEVM
zoM3h@tWRM7(YPaRFRH$%Zm+E}HqYa>-r@3np}|c-&H1}i-d%pM=$EnPrHcA?uVO(>
z-5FIy|E!!Eu5uVHG5__=g_l_-zfI}&_m3J|73=?>5(xiPQ+dWnyU^X}YPfUA|MKdt
zcel5D7cR57ynB1tlixfJ8{`;WnNo88KZ-ASSl-mdF5c+0Lu>EB3$sl3i(MDE+OB{7
zoyKj|TJZ~)Z++FYEPlANkR>Dg$x5A5RbPdRW+nW+|L{+d+vy8e_6oc>{Nc(JQH{s9
z-B#Y)WxJoH?M|lY{Wfnq!`jbEhSR^T{h_Mln!H(c_uO~NI(4F_i+0o<p7!I`|96Wo
z9aH8puNU0xDP~;z{loRNSHa3D_WxZ~%XWCajy+%fLt8Y)`4Qu%Uku3&3wd~D64_ZT
zCW&13eCmBIY4Qf?seCV5ZU^$IRK-{x)!sHsch1AS#RdZGKDTDoAAS~DWnfs6W4p`k
z;soPcc^ygVij6rL)_IFghAAo7Zuqd&zEeAJvF@3Ye@eeEXD)xH_k4fTqOi+z7+4}~
z7%DOtMEMzHR0=Ad-~8dkH-8@U)9v@Gv`&^S`N!#Z@XE}7ll(P%uJk!>-gAF%t<uh4
z#*M|7f4NrL-;dyJ$YXjS(_y@IKEwM}^W@cMXfW?k<T_>J(44d*AY{U713Oa*2~qDO
zEC#+I1$+*>Zd$xrqCC&>e#^P48nb=+pWe0qd}U=fLn21)Oa0cjaY{b|FHL)Y$aQT|
z`Xe<JsmvL!)91A*JN`MA7b>+euqkNo-T6KnPgmOfkNV)^RIjyjo7%gdwcKyHuV-95
zRj<y-Vp=Axq@_0f<K){H^>v@}u<hk8f6x(@!m(>-(SLQpU4ey9%=`8IGI)9BzvAqw
z`MvklhR8c5CV>aU?|c5Qym%<q_+r+ZuS%&4PXA4MbhN_jg6Yh{jrB8f<3C@&;JmD2
zyY08V>XY7GnYTBt^X~PYoi{h#eaaobd+OiRfA98lo}GG{|9$zNX_uaU>l0HI2sBVL
z%bTnH;Xpt_|69ew(sS3YN#wY?Nn@SUv_-c<WZ06ws55_5=xnXK{fqaz`N21q5z7mo
ze!S1~^z#Lt*yf0cXa8d#Kiu7YLft%?CyVozW=`wm(uwcZ#C-Qy=@@5PBow|sBVzM+
zW~m<@TkdYJU6=Vb%KD(7hV<?w8$8X_Rd#R6n_IO}f0A+kU(TvG4y(5w?wi6LT@Y8u
z#GzAl@6EQ|4pW~Qc3f)wGxK@iOrZ_Z3@;UyRe#71HqD5>bS{Z6R$hOaqR2Lb4|fe>
zB`VJr-qtE=I3l3pb=z>pr29p+GP;%>9%`N61NMGv+ETvKf<LymR6yi-mJ0XZ&%1Uw
zZ#r_|SUPhPJ5QU|i8cknPm{v0m_=HweR10K{->H&J<Zpj=Ci$t7Fo8?w)#ut(v3_z
z*1i3{`t?!S3*l+!-k<PQT+uZB>R*vpzs0nyPH)%w^eZWU(!mRI@Avv_T(D&KBc7Y9
z%<tFiD`Y#of4Xc%pncuL$b=)z)7j@S=O20BF5Z=_cK;vO>e{}37G@s@3;v2TtA8Jz
zd{4M)L%W(*fMjw*<`G`TzFkFCIhUF{KCS%|o~3#6XM|wFkH;$i*^K^%)C=lGzGW$@
zH<NaJK5<>`x28wtMMts@2pl}J^8kZ0tHJt^s}+rbyS@n4Utjhm>`xPa|9Z7ERm%2l
z<-3-dbsgot-tj!~rMLJ_-kr(n^Y%*d#>KBRYdt<o!LD(}<S)z2axDITo_^rQt%${S
zPd+`k{eAK`;nVg}W}MsR_x*nFF)i7^``wq1Y@K)C?9DgW!o0?DmVQI|lZi!(E<N~k
zN31I4^a|tGepilN_?gT6i>LBtVt2Cb{MITh^_MKKpT<r5A6vH<bne~suU~S$KKgCC
z>SV^>h-iO~6vgK%|2PggbtTyge0%>%&9(d2+dobsO5e*ppMFeonG;sv<`+}1bLIXP
zAJI9Xr}L`~%FhXYuWMR!`v2G08a=Z!>z}WmcVF=J)p*v`)BIyBI5uwLD6ctl;2gWv
zxmAz58-ImcJ~<y>d`!5;=5xWCNa?p%eal`mHCV7H{1I&VoWG$-!ze?qesKe%mPn6|
zD3@qcW5R+E9TRRgrh|?f0<=VgIzdM*x2OmzZ4I7hTpsl-&nd`td-3emSA`<}e0)=5
ze($*Z`}_7+%_c0})&Hr~=JvBYPcqH7E1%c1Km3z*cl9<^<)6ZWi6zs6#5yHEuAg1!
zxbAk$x8wi!v9auG7hBW+*-c+0TzGGk%cH9s=lo!>Z(ky#pU}Q?)9<Cfva8*-EROj2
zKRNEAAf<V9wRbP$3d4K*-t#_v+Ps=+ieAC;8(($B!-UR<*D$fm&xm~UcD91w>2oUt
zxaQ6^JXu@0G;7P+TPu!qx9VS?<9L8kO=KdEiZCD3L5Bko9->^VPR)S|SM&spwniTB
zO5Lbgx%Q4eS8{jPWan*<>-}$=+eI$g=e>3Me61^UieGeJe;2*$?Vf+)GfW;PmFK<x
zUiY)jEQC+|{r=}ixK=KHbx8JWO{JMll|fNk=p(joUSFR3316tFS+r;FlXR0g?EH%s
z-Oj4fYmqIo{C@iUwA3pb>%zQyYUcO<z2P_Q<(oAD=_lfE$M0Qd{@K2LOaHw;8Abo)
z9YZ2xnXY_v5!9HyVA|ZEf`vJ2t}5&m)UGM!c8Yz+2il3C5yr*a)0k*6K}Up}m2FAF
z3YT|P_ao2kURiM>ol~?T-aM-N#0zn|&I{k|ZZ8)5oBEl5N$fd~%?&et@LY`VC|?$u
zxO2siYd_0mBe~{9sopg1z4LnA<NA$9!~gdG;J*2QXD{>G!~d5C@z3;4nA(`3vt{@8
z&HAVLUNhNnyH`kG{y)D+PJR8R;7xDi-gSFzUOang`F(kzIcdQ!4qZR5ZqLL&<8Da<
z$DKtE9;^R7G8HV2jI^nq6FlqXrTI&fFDO>3)Tgxu?X6hO(ldwQ0i%Y<1RW7&R;Ggv
z8v=AxxJ3^&Caefh6X9k#w8&vbh*Ns4YO%rI!&PVF!d7(d3@;2Su*h>h_p|@f_Ll3P
zVo(2)-uj^~{+s8ky4U6B{?C4rf4c3w-f8U%V#oG2-VJ{h!Ikw!P3hk!)4EIb=kKmf
zoHTv*k=w67JURUA{>PcRA9+qxW?3g&`tRf3dM1chs6b2p#-?)RS4>Nb>m(kUb?<ui
zLgL|-(wf~O>z25_-+%L8_^bcjcT(Oxezfj>P{f0cRiA%z3upOm+3K_;An=3ViquF6
zSJj|B*E{_e$S`V&OvqBv=4Cq6upvQDMVOUok;94!T8_r^kDs6Q{k8S(IWt%7n!R(?
zl2xVx7aq-Scl%gkHIM(~tNLel_AS{z?S+ls+poM=d*y7t`q_CR?VH7R+?ggDqcZ*W
zCzYZ-;iepq1OA79wo~)5HP{+(!y@R)!rQZd{qx%GcBT8o(R#U4{k_#ZJ8y>{Yd^lY
z^!}3o-EW7lDtz0R5WglpTUlN9(?0eV{dW&*cZkneSN+THU0=yM2AjXjE^;WAUW(kA
zHOJ5^Y{HcFDT4Nqm#)RE(`asB)CigI#)Vsy>9E6w0Iy{3R;NZ`%eC4y`Lla`Urh^r
zHT!Dlv(l6ccXvEjdb4dl|F>D<S^e3^-G6Y{-icYgJK1A@)pc9dXNwE#Y}ntht7@KK
z^ks8ngK*X?Kh1@uvGbzYSn}(f%3uD!{HmsU%j%s^Z4>proV=Z3Wuy21%+_m7;-*32
z?#JV#A2W;SKiu_2ks)y3e#i6gxc?r0y~q3h%^(F~TPyj(+Vdx0K4kc2w}Ab2kM-R$
zox?KfOBcqv#(FM&*%`XLMEW=A?EIPkj6^j;I*s0~va`PS<3fSM$vaPaIs`R0yi}dh
zbh?(yNG104M!QL`&V*-X2~EqM!e%w`wg>YQ#j2^Rrf2xvc6;$dpqaBt!$Cr5`}}|J
zK2N*gw}PR1?<bKIu9BZ0q|=^Ce}3JT7bSRBe%-(GoU{ME{w852A2p4iJ0|8n^WJ4v
zx%)-Ze;h6TyL^B8cI%q?=DQaMZPn=Y?$mjecKf_jjE(nodtt$EpETbenAP-b2fNLM
zA8YpBe6;$sl<1sNn+rEN4`laVm~pV-65}h-{uQ1W$5|R&T&%SxU&I9!noj=zcYXb>
z@8#vCbpro?ZLeLoKkEJJ{k`#1U$4KuBI&o_)%D?P!&m>?`|F#({jUc1wz8%ja_&rw
z;oGaX>D`GqRncRy?qPv-hxD#3@3^?9SqP~ee-yZ1*DSk(f%Tvg1Ea#{1XI4a3U=>?
zkJ+yR*w$K@G&3a$=?dK8|F%t3kn!4a>67*c|M&f!qjI$C=$XY$jwx~<9{i5$FMb~J
zTx;inzP&N=ukQYcEd9D>*RH2Kv)=3fzj`~|yz%3j3v(u#%Z4;wyj`^N$g;x<uX#nX
z=Qq7~Td-_Vx=Gj6WQFOX?`Q8we(5`VgTzm<z3unsHoRpHoWA<;!K;7rHuv$Zc;)a;
zgo)w6(iOJ7s#DGWeDR<5>8oefm##Fcr6KY2mWb}0d)emrtkZMFCV8jn7KcUUo#IjG
z=H?I%;@WVuu~|2N!FGw2$Ldxrp8P|kTO*uRQNZyuXMw|o^(kRDg;rl)FY*0vlGBQZ
zP7nHhzB`}o3hg$r3|<kIVprbH*R(Ik_mZYfYx$l#<^^9TzyIXu@^jWI@2Rn&p`K2m
zFXAVP<ow`RFl|TABCVX+=PEpG{hlpJ-P&FB_y5Oz?@XF@Uk#1DB^T9oY{LU7jxveq
zdE9M0#a|vZT%021x&Pf)frUS>v3?F*9{a<b`+M0+Hi4E_E~&}PHU>+s=_E%l5WH2f
zP;`UA3l_O@m&w*&w2Z)$kWbi;&p2Tkbo&VZx5Rgse+IJk)%;tKki~f5UG4h7rdm#~
z87C}K;!K3wJbX?ybULMq8WnO2+Epowerx+$7@`>*eqH=)_A1$56Mx$7=s)>GFv)|-
zM*S6&gigT>$G5LTr+;R;)6?2Fe@d0qXTN<vcgOW_Es9=LQ~T4l=>ML1iWgi0pEP-B
zaqvD<d8S|g>fH*7!#~ovlUt=9IS8q`lt?eI;*bq^yGKFiLU4m&=Ry_z+f%l8<!$@_
z>(^IT+qy6R%k#S$7=&0>)Su#0Ti-dK^VuO2yLJE6{HODseRzCFlFp3)#$`L0R=hDe
zvoR#<k%_vN`aJ#@9)&DU?yZfd6Fef=|1C_3PIR-f^WRkPxadObLe;WN>2F47c^>~?
z_%~`@Xm<FkpLPD%HO-@rR@VQ`+ju&Ar`^)<hc)+)b6Rw9@LbunLR@o&kj4g?`%DZB
zAvvYy4k11+zq)rQFgVI&xR|POxC&Hw2rNrG_S4GVxs)-(PL*9op3#6O-|A6S18X~D
zsKW#1qE-f`M<*DIHQYsbjC%5Z95OgA&0wXms$yfUb?3Iv?r+}d?Eho0k($8fx$LOe
zF8;5|7N_^V|M6R@_xs;0g9(S`CA<oMn$DsxYH+TmU9tRXLRLkjd9wO}?Rili4Vxxy
zU-RoN%ZaJ_)1RKMUpO<&_p@j1YR~)ocWm<4RpQAJRe3O<xlh&Hf={z{pZUHn9osSn
zmMtj?Gi7Ab0&eWTchyPeCd1<IzFQXc&g=UmEs$8eqg|*-a;;;=mCX+`tFQh2FQDy_
zy<NIbRy5grsnRZ1lMa_9LMK{g7<*}Os{T0t@34?V=5%?xptT#f{r|P7^7v=J)`Ka3
z|G(%yY~rzsrL@wv=)-#be>{Jf&xm9*vKc6U5J=Sh!Ljik(=skryPP8*7#F{YyRWh7
z@7AX}7A?Z>cO-RcPtCf>xYna?32*(u@RoZT))Tv5>23Er9;XmFo4>_!l9SCHwem|w
zvx8FpUKHn@w?6;jl)q7*g`3XIyd(O-`Tbdw86P<Qh&_7X<uTDBYmeZ;eLY;d3r`FE
zi5EUpUjLCbqjvk6l+;#n_ib((F@;}0WhDDb{nGzm`*+dxvnSle)~air+R(3m{m`;^
z+Utav9qhzKcQhWJQ@dULd&4Hy23JNq4YzlCE5$9O84Wy6#=aG|I?$4^!IIT_x?#kc
zFqxP@h60^RJ<A794%1W^ct0r}kYi&$@1Dh1Qods$_uqp8UydCLuhM_DE-CEZ$+cNe
zBsHh52|oHEOO*TQ>~7}Z=E+;io{D5x`x+kZx-58Q_rYshI8M#}@xNq}rTV2k)6%D_
z<mi8x6Cd;K@9SM#{|MaKd34V$!2`#ioUW~zbNBp<_y2xO(BL=RUdNI!lS5CVC}p05
zYWvOvCyO7;=0r69@t$)|fA8_{L7s&lqxWZq`179rP}4X!bNROWM~<x7c{7{o>)8a+
zSqv<)|5=W!_uM_R!|j*5=l!nZMc1`OLf4;D$bEOS=l;%?AS?gJX%}xl{J`nSpmkQS
zGr!+1!l3fg!Fv|Y_xFEO>G`K|C;XhxzW=PptzyjhA`4RgzleEhcIT(X@rS3+PyN%@
zuAW<Nw|{Bp{V76XIW{)iwcF0_JS_UBVs}<#{dM7;9S8j0X#Kygz0*?b{l$CT4?<ZJ
zj;Kp<SK1veSbw~$*8Td#s(TyUEI1MmvT!%7e6M?ZM+n#F?KS*tCnH<`3stk#i@yJK
z=BG3BzK<7N7rwRg$a==Zc3}FxiaMpP@AZ09UuO3o+v2HSc=P#(<OQz}8K$4^&5F6A
zaDIp1+KYdG|I6d}c8*8(Z@gG+<aE7!zxt>jv(?+*ge^Gs-$ld5)7q(8eg7+Gi#(s$
z*L#*d)ShyDfm3m5zSfjI3P&!-ak8x2dqZ7xXN!nUXFy&?%~Ga*^9c`<be{YdvD!9!
z{@+%YYyNk(#{1-Q|8{+QPyCt0lu6DTPiM>NZvTFb!zx+f;uJkk>$R+YElm*t|CZgo
zWOn-N<rdaTZHXIiANHUAtoXqGGxyZ`2YvFMk8fJ-OY|08JNe`LfORPcPkd20WVW@x
zch<!(jq>^G=Wk8=IsInhy3JX?u5NV6n({;Y$=|sB2Q8E8Oq#rXQnv>;1-(tO-NJkO
zP=0~4%V}-HH|v_*I8VN9iLy-lV)E26^xX+I{z|)FA7YQqdd73U@7GJ$iM=dlcRQ=f
ztj|sevzpOdbYs0$<L{;UJ%*_>*I%q%qh-?1x<r?)vFfOS=-TzZhGCmOYp$91szV`z
z^TC7wKOTuBwy5j5OMl#ncG_Up+HkM_zQQ2}%^y$9Y>jx7tZy9qt8RXO=ZuO|99AWt
znD#UL-&1@hSM}+XUDFevSqFW5#?f6~nG&s9qo{su@01rOv#-9o?|t>f-d|tlwOFXP
zTU<E(+a>R1&3yT1`!-d*%oCbtKdmeNu<N|t_R?&;UHX@F-$khY@G|tj*ZruY?TBQ+
ztwn#Q<h|eMq^Pxj?$^m5Cpe1B<ZOA^>3BTouC$e&!H-Y%>i&I()!n`8RHRJo7FeW~
z208`&c+q_FXZOye1#d1*`c`n&WZ%p;tE=TgwH6CHPUcVT)Zu<t_34Lz?T*J?*IuMB
zx78|^9Js$sVa1gmZ~grTH72L1gch)8Co8Qxd9YJg(|qYGJ)_BwR{n8r+I#O$&R4yq
zf0bIqa^E+lT<<mX%P}i`+O2))zuo<?_~#u{;>#Z%41RT~%4YZNtFL!&=Gnw(T=M7F
zx<<KK>8JiYEm1D-be*=ZsB-%|>3R2K9xK=XzFS`6yh9~y`KS6dLPFezGiCgj-BGPs
z?0<emznJ1d*DJdUSEt&(6?u4z!)D&%=UD<4+kXEzK69?yo&6sZmaM+1occFA|J|SM
z%PN|q^Mt=%@83{3eZR0E-}nCfX$z8nED_#bz2j$2SS<5a*3?65A03T~P2s=boj1XL
zf&KX%T9%)+ZpbNSEDW5k(im?2>CC;0aw1i)_r&h|^kLt$Z+z?uCx5QV;NG!1<v^6&
z^x)>}(rrtV6Ae^Y+4ABdp4;|pmuQzf<+y52ck=Rc>^!Y2@9SL5&<d#TQtyrboVHhZ
zf9iC<^55_NhwJ`j>W#Hp+ob>B<@Eb-N2S6wE&JYYO1O3UlB<x?&V|9QZzKxawoiJ*
zx#=`}(bAV8^PO&TPh|OL{qWoUOWjF(eq4>6-ScSsd8Wtv@4nGzZ7r|fQ8-ukxt_hw
z$wLBpQdz#|W0tw7)%maR)2V2fAD!&d(zz?*<#hG8*C)&=7y6m-q1<7*chQ?EQf*=$
zGn<=?nI8X4woKffCc9QkFpP)Eq~YqlBqpEb4)SqAp3K}%TLrHOc6TJ)|03A+Eb8&Q
zmlb`xzCEg66aMDR`Z)d5p5L#;W&deyyJc7-aLp_2XQ1t>tY5GE9XmLb_}*9f{aKo1
z{`AQw(@GYB>-TQ13{+PBKI37=@2UL<SvZeG?tO4_MFbyf_~{2f0xl{XWHjKob=*1O
zNJSri<#e5+;b!sjFMhwx4BL3}$4^@}|8o1b73X8?WHT?;hs5o3x_kf0?$;&mldtVx
zD%AAT-e7{---I3^<_$qQIyLP6#;?63?Za3keEvx;Sp9&3fj84}mPVHrYf+W(rQ*81
zVSDSg>IwWmx3T=bOa1F#`;YuyJ#YWFedSmG-i?{OHukT!<^S+F)`@jYxlBdYU3Zt}
z#;##`$^5u&w%fWl_XQp_9)A3p@pvMStD<go#oX)zCu|IL1*dLOoi2AYu}`DL<aUzv
za^2d6%Y5e5|K(T{l()5R>!ZKNVz$J6aAp3G@bz(JJ#&JFly-T*icp=MJ7T9?2!Cp)
z*OH<hkgS{I&GyyjtIqBDs~$V5zbyJ1x7tmFy*VrUFmuxt9@dQb8)A{)M1N@9V_owx
zaoMaaE#(ho;mPV9GPxQlGgk$^FZKUEWebOfOd9LttTcwRP127~Gp?D&;<4^bOvTPA
zMT$`h{U^S?+nFULnK9?bpFpYp=0nQ^BzTiuy7p;hO<8b9ccXLEqUT2=yI7leKlVJG
z?<;U+uFLwl&hvs_OsL~;f6ibhwrHx6M%3rRue*K(RNSBT;hw7UzS1nFvwN*JOM9nX
zeqSdbC$Z<&YtO9@R$tulWX1tk>8Mp}Jw2VJd;_zRb~TtVmj3@L|AFIxm5hh12X8*J
zSlkC&yYFHB=i@en#;^4(UG-FW@|o_CTLOYCZcqO{bYe>6FMDgJ{VYjB<G}s}7sKzz
zHF<B*TDD)mjiI?Z?t?{AW8=zWBDH&d@K(lDcx1Hdhl)=uERBg`%eP~(u(lVQ^y%s>
z&yPK47;C@pQ+Sv0I;!N#8IgnPTQoDb3R#<9Q(nLD_+*p)1&i0P^VD$jz7b*E*Qv06
z8&_LZdY;8=PsN*`b)&xWSu{D#-E=MLV8c~?ooz*@AIz}hJYRe@WVyg+gN(3gwg3Of
zFgZ8~olln9Id@InOkM82^3SEKrygDF|2lrHZ=Lk_%f)L8l%-pyZT<f`wfjCxL3{0-
zhPu|xA=>)<T>C#XDRNBYQ0v?;vZ`KdXJYA(x^vYpa_4`)Q~&;J-7JBxE9+wR>{D`@
z+A0uG;8nR~=_5nu!}c{-*0y9gxwF}O+GgKZ|L8GOOW=4E<E@6xFZ-)ERDbzufARkF
zKeM+8R=>^K@#^@}cU7zQmj3!$b-msE_3G}oF*>`>8*ZAUGsW*))5WGWg@J39o##nl
z-u&*=Y{_%>I-b{K?zq%H{#qJ-(rMbOm!`ET3v1O^pL`wu`lkK<*YyDr1`}Rw+BD1J
z(X>B8okqdam2JYV^B;^kJmbP~!#5KT{CGE8dXGrO`)$v4Q&${v3wU+G=vws!BdK|J
z_Z}}+)xGq8XTZ!~s@jaMljnKtx#tmkQcLKgT-WtVyQQ0yl)3D@PF<fQb8n9PijU6B
zd-`2+-!~~SU4CPJ{92*H8$sUk4@)aPvu(K<EwI1Va!oa}>z}yz(?>V{%=eS-bUE8s
z7&$%o^xw&jl4sHaUY(dC*LWyx-bQWV)3c>h=Y?qXh&*iHW&UZo{QbY2RZFM+(-zqj
z|N44EyUg0@2mB9&eP;W?WMrXa^j$opd%9@X56+29CwG64`+B3tN@u44a}&GKEQ=4^
zQxcd{u3ih&diPIxQ&jRv`P0Ad_P$Wg?0s}b#<GIr=E=te`K$%)LJRvP6x1YEiyPH#
z`<k3=@$o=Gd-|%?OV2U%-nHMVd*iH>rOo;m=dHi~_)s5Xm$2`~jsJHf<Yj^vX1TA}
zEy(;evFFk^(?ib#&ODRaePpHft_#QPAG9oezq`MsdzbXxmmg-z{yx~F`ki-z-SqZ9
z!pS^~Sy$C3K6;!p+xKSgW33man!K*_-;|3vT$-_TjmXjR$yYR^KK=VU;c@DXjMwwd
z{gYk3r>xsF;>PE&c@58Jao4jboc}f1{r~3)?7_e0q&@W6G>dJg_TQ?N*{mAJ3l^+s
z?`Qt>Uy4aUis6@lkl^n7(Fd;ZGK4Xyxz{NQaD=prCZw<&TBxi2X#4bY0;~*sI2h(|
z_^9r%KP0*H)nwL%9~pnhEQ~N?TbRbc?!5i#eF0781u?vr&)l%@nW8aQQsWWNwyJv8
z$EO`$MOL5pdVFm1|JS#qZ?XKZJnrx!VdmqO1DnJi=~tg)J<vNxMMQsHq`-U2FMk7L
zPOYB&eECh|z3vO_F7zo*y1XLr^xrmJF+0tU@7i*b|EJ%7^Su6Y>5rJ}`s{BUe~Xy@
zoNt?ViDkJ}@5$h8T}thDTxEI_u5;?GnO737+7!sRD{$wQd)4pm+<V87ugl$kPKp18
z)x~QQZWvzLUvM?yxY2^C3$I^qm;8MF!C^zC3Gvf+{aY8capRqRHvjLNE7#mLvJaQk
zef7!v)WJ0~<AfXq!(M%_y|?#c$AQ2%>GxP19gav>C3~f9GGbJHom_BOn?rbmix(ry
zqK#~h=EoWx84qk%i4<77{zuaR85X8h_4)UcfBct?V{E$|U|P1tbFOZksK!m5{>7F5
z*6@C<`S^ZCotbSDk442?y_-9p>wgV-(4@lf@Iir8!-@!otm?;G_#TyQc@&%cw^PCH
zbMu??-ZhRdwO(caIy(KjzV5{tSF}$bbXQ|vR=e}(_JW7So^tZbZj@=n^KkTXAFtx8
z5*L5GKJ;CyLgV!tIV+Yfdm3f&>2bq>zoq_v??lh8l$jNtEwcSyNB9+{9+4Ft)A?u5
z{#4JjF=4H(qU?{f7}kc?@I-b4y@Vr`ZfwW@7)9JTBCj=h^BKX<({wlgyP13Z%)grj
z)5Rmha{V(6zPFrNYp!qNz_Wa>$A!m?8~$%!DEzQs<FxlNOV-V+_3wD}aoMU_=O?k=
zaq_Bhnpm1Pr<%L-jOSaontg}1m}Q4|2)2B#3A=yuW9rJF|2LHrCq_8hE&sah;ikt=
zGJbjY3#z}oq_24(a_OE+4|(H@?%g*O)vZt#KVlhWrO@sW_;h*EOpWOhPrkiBRS-Js
zX_}N_!=<YN@q9-getNFwum6&>?r;;=(o=JT?TqXe|9*YrkjFkNX|;!<?K%;msxy3>
z_fNk5RWa_`se@rhCEKzl)lQIS3iGMl{xRiq%vKgTljz=rY%M*eFa`UVU&;0A{gn|L
zT~igOui(F$Tv<{2HRa#?fFGOR1-^Xz>CofzVd1Ohf2n)_#cx;W`>Q8SU03NpT{WZi
z?DZz4-Mjbgt^U#&&~b71<0OfP?0X+Pz4}x$#QIyZO@e4ujX>v}e_On}Od4lAn4a|i
zVC4mc$t`k*{_Bsm=l$c}C3&`HuX&}t{Fm%kFK#Y;BT?|?r`E@wsH;s*K6&w%PCeMr
zay8j|-Xe3QJgXSb+0{7*LzkRqUSZO+YerVa3(YM%A}4!KVXAam`o}JXZGGOpOLq@9
z%@gw0&De1`Z0Xh=BJT}8);<uN{!r-h{%zHlxy14^olbGu-Zd8zVRfp}_}Tb0|JGW@
zKUcRXU1#dL@VM_@kzj8_`Q%;8R!N^e>@d%5m0!}%71M8jET7mA*HPW~KyBLBTaz8t
zf8J)^de8r^)3@f2cT)8_PCa{|cQS9Mv(f{}@OM`qDJY81neu;0pvpA$*u^(e9&Vnj
zEKvDa!+EK#>z=DSZo5{y*9n!@8BX7sw`!;9|1XNu-g6(bnRoiqLVukOf6>|5U+xr?
zdOm%d_&aF&I(aF(*=+y#c0^R%>?(*&lwNZ7R+X=FrsIZY%~$(<eA4xr+9G%TpMNLp
z*uy&<rM{81him4=#?M-sy!%UV=e4xNQ1;Cm1D|MVJox>4+l5`Xb}Zyc%4<kce|-Id
z#yP_U8`m8aa#|LWlcl)-{&Hn`S)Ih!JN-}1)YkC$RrRUrY_Gs0o@J4_D*m@J-nlW~
zf1lPDD7~R`Gt(dCthrN;sGfPghhxfg{+0^yPYb_G$C&<K7vD3fR7lkzq(bdgf9yj?
zmn_dug@-O|IOCzMqWeTrZw}Ab#C!H#2A}T-r)_09$F3+B(R*q4{e2&g@T|>P)-817
zRa3{`@BMjuTCUIi{!2ez@cg^3CtbPq5*mB&@6MA-ZDv~YHtEj$y9cGVEr0)EV`<?M
zt(nEeKU_?-Uw>UyAK$j&(0i+&wcF}hVve{=&j|~k)BW*g#T%ssPhY5c_xc)0{%yKw
zmbmxsxnt3>21^tk*U8?tzxVFUU!O~t%r9G)E!cO-Lvho4&4u@~-<(*IJ8AtV!^PbD
ztoyd;FHpIEYtr?Tw=YjW^Y^4v+mny$1H}~93KrikQn%s`iL^J1XSEEOd&IuRFZb`g
zn7Ott9BCX+A3du4(9kGQnqZw0m8IM?_vt34b?S~bXD3R_e1BWAsGswq(wRr`pQ8BA
zI$k})<@JlDsa5j9)sTA=xSH?F?f&%PhMtC+G~4z@+n1v2=WSTR{~(Kn^NybS$&6n*
zY~mL_p4hGB?s<N3&!7AM7yb>mUvxH>DKN_C^`42FPhUw(v-D72owe6DXx)d#RIXDV
zYuW7kqJHXLIsNXBQ2P6gaaI529q&F^(o^~Gvvl%Co$a|CySJp96wX=q`^StXW1scY
z8jgM6>vr<6&ZS*upJFCf{q9)u>HGfI`S%vN{8;>B@#~Xv;hvvn*OwnUVc@tnQ@T)f
z_q)%0YwA~g@%3%{Y25qYrqK8Ck)~*;81bjSO5U>kH(j*zmdfg-UF%N&-&^UCS-&^F
zR>Nbf*W~|ero?FL_ipWc6A)1I|HlsJ^BPC8U;l~z^sgdxnnvjAryK9yZ~P(5cP#v5
zk<eY;`4!?lOb!B-%wkr8+qCu7*Dd)Hu6?z!M&Q=I)mPU&dA<JRUm@KeF-MH|?adKj
z{dFL%lc!Tm!{c|?9<3r7Zwt|bKiwWAJ3nAx;IVR?rOD;RT7L4<BXLQgb-UMxPrlmx
z_0;mHca>NF`Tq}jUAubj*OjZLx{9CvsP%Vk_?q~&@q%$%f9<bdTPN}9SxnQ5pba@s
zuQv-#ueYrzJpQ9P`#+QV_S^sG*o8g)ab<r)GwU+73HImMF4RdFeq8-9TD6OZV<Ly;
zkBQfNT}spPY{M2VY5pBIx8f<gP}hbxNjGB_-ORY@9y{kD$BjlI>Fct3w~FUaKK^e*
z+bZ{+<y~qEZ`Q`ehihLwS)k6Gu;BUSz{rB_VvlYn3q4qH|CDRrCx<0mOuMYE+*;yN
z8fbTRe{;fx;J%-G_C?R*vlo)vwBpl&qKGbI$+M-*917tLDK5(8vz{C{p#0+e!#hf+
zd6Fx1l#aH&YUVg2AMd$F?~J`i*N+_n_Ws+o9Z&Q<sfoTM`EBpC1G_%5C`rUETf*wH
z@q5zC#Hwn$wP{<UOM^vkZ+Tug^Tox>OGA}TW-$m>yWLrA>vczEW?1ZOHD<<I2Np9n
z0lk>U=SH>PC%BwAV#8y@u5xYf<l_P-ALV~+`pH~dvy){-f~Zu&f-f?BZzs=8pZh?y
z`NUJE{C~5bz26h0z5k23$fo#L_g7zEefqU{-L-A@0#<Vs9ry1);rb?PPLt~l$Jh+D
z^II<aWduKEh!apLKTsYu<22ik<4!w^3vy5IH_$6?yez8Vuu}Dd`OB@7SyM6^ROTw$
zK3bQ^c~^-^ep-Ka_q!=~3mHV57(&%ulsDH*+sv=jAbEcK3<lNm$@-W5eQixX6g#(9
zSnZy^Yq_1bY=xrcW1ccT{r-X*R(6iB4_5?FTfb}H>x&Ub^S^4vuUmax>-7BVOTNC2
zY@H$2;u5Dl*EPQ`Lrr<>#K>2Tkw!B_=WMhNP~d#fu{EmH_`m!MQOA`WTU2I>UD>hd
z;QITK4|1B~*YL<+@3Visu0(Cg)^|UDPv`hQ>0@l@uRYWAJr1+AF!-I4DD<;3P267)
zVHP3SbbOJNgd|ItswIo!d4rAYJ#CsHe?#8QKWn&3e8%C2=EvL5E?OR>zI$_R&L+=k
zU9IPq+0~txH&dmeJziydX=(8<jxC-x_3ti5JT*8Y|De(?W>+oSVooX7%ZdjyUmU%3
zl5Luh?t)dVTKVhd@6m}^lc&5$DrdvL3Ag@whu5!-UoCGk>D%648<M8&jhOZRjKI^=
zp~rqS{#d{hxXi;Mcw(}woc`;@@_M(|KbJCfQy1PG$`N!_*<8B)MY-Chk1mVeZT;-`
zJM{3EMfI!77|w|+ypnY_nP@2ec$wqm7Rjgo^?%G;qrd3b#N+#{u6d<pN#B06&EoE)
zeTLj-`no#)Q9X|@|9a^dv$S%V_38|cyOVgscfP-~|D^WGlb=qO`j#r1>aTZ+?48~o
zvS$AL8%c*AE?15<H5LAMXNBOz@bnd0rXBJ^4;E#m_qodKGn%zqO0a*^t$vZSHSRZx
zl?&gV(dIS&F82G)38BN$%S0!=-F~O(ja9n~d)MaG$-fjz^{3`f-|PP8(*38uil!Yq
zmw#v4Y3|P_To!e&<IVB5Ihe??-Pm~E561<!e}6mZ^TUYSaz@*~>d*E?n#U!&=Osur
zKS?(}_eXBN{`U9p1pCY{<ZQKbZ<Fuz;{Pt}o+r__c&%KLz0=>|Z+qXKSpMWuQANER
zC-?5{Nk7_IEISHzZcIDfdTKvIgkz)kl9-ED5iV>SL=HMm4OI<#T)5?6#z)3xty_UH
z?B{-L?^P<?_v73ncG<UWxjuX!=WN_-Ro3)WQTrH6t+Cs3uE&dCf4@<c_u#-8366hK
z6B%CYzMXN<i#upfW5e%oxpO7QUlgo;Iz`EOg5zq%8t;C6UD506OJ#rDFx;|y#+Ouq
zmV+P4%Pd;0p4afRZZkbp;9T6gAii-`-S$J}MYGQ|25)@*;h;kA-CJ)LyyX0PxW7Iu
zBKc^-p2=pHX4aYC^;#%=%su}=f06ut3-9uKdt-&7`JYOdztWg^gs0Z3-l=+L-p?iV
zi^_f|vNt3s&iVN<RZw@Dxt-Yi$o78=TiHGJqV68|m|O3+f$Ll1^o`N4^k?%nJ>v5@
z{XqH7rmCg}A4Ua^WAFCdW31h3AajFHw&BVVc4i~_uf0-y(|<eoFsU_$RB%s@3e{Ng
zabY8yefYOa>;C=o*i^ew)BX5+_38iK9=oYLDM$L{yZ7bq+X93C%=@p(r{?c_ZMFF=
zVZBAtr{C@o68rA4?t0zsIUPbXZ#Qk9ko)e}yo3A=x97<%+P&jm{GJazf*z@L);sFn
z$}ib{YqHMFZ~2jaET1_{W@zhncjbNNsQ=gGroZ>@{Rn-*>I0rrI2LcJWw^v<`(Rer
zl4KRnzJvhX#U4dTd+(=P9p-e%I@*8r@50=B*Ec>|uxeW6n!7GHrUq(lU8tiKd})f*
zM-PYad*+YhuYbO-aN=OWUlZ?~c+-|&Zy)gZuHXIp|I5Gbk3KD$J89PCli&PTC_cVb
zdiT?jU#E73FF(om+VP^%<nl+=jSJ$_PQKpvE$ozti~GJW|L(eo7d?7@C-#21U6lGa
z^%Y@qNli~@E<CWvblyGBI<1{{COet0@G6#`wiPsbX7PyEFH`8mY1#f{IsZ%zwL43n
zetp~0WA*>n`rkei|LMI=4SW^K>BMq&`<vv;{=0Vfu3r6C{?a>6u6KXKoUf(ry(M^C
zo26~R;`j63@164K{Pca3-d{QNv;J_?>`lHt3+J5SnP>Twb?HRkv;Pwp%I#Nl;n=-5
zML_PkHowIGCqLt6^SPZ~`mlw?YR1i}e=794o|zj*eVBgSvMhATkIyg6JJiw}OSM$B
z%KP|z{vGXF>ESfxQI2$m`gHx;g*orue+k?DxI22fzJ2PJUtjY+RX^Q7`S5yy%=%?1
zv9jfp5AC|MaQ6g-JHPC<#BbYLcYW8UiqiXOCdK+2t_AnLIw%;k{p-{fm&8mgHdIb1
zdTn)3lg-k`;W9^E%$EC_9mWnP|9+0!z`BNE*_VTPTvNO3wp%19`7z78WKQFcVEy@!
z$$=-8yEcI@FmSV+wW`V=jYg(`&|}e)Rz<M|>d#f~wlDj?>}L`$FV9WcC%>)ao_9#Z
z>m29l4$P1}`Kw2*<Usb(SCP}toqZy&k`+?u#_N1W`iH8$LtEr0?yQ-1H9i}{57|d<
zeE)XGS4Wmh%d=YjCx5@a;lL&y)l<u!I%YqWYQJB7`|po6_qLYbul;^Msk86pw>|}H
z6{Y)^eA6DwuAW-Iw)}o%^}Z##vbQ^Ux&#H9gdYBVcJ^%H#>GXITr8@7EgZ@v&ldk{
zS(9ZrJ>-Pno`3AwLe1(=90lKr7=;@tr6-Ck_~Md3@3o1kn5xfYQ<H?1f<KJ+o!z8n
zT(!{fY|_IQ_JWJ23k5&!kAG1nDkV5&=}p~qlYM(7iv+#j^zpSNI%*&HSz7tIgGY?p
z^Io`WP$j?26NTI!g&my6_YQoF=G%AKRsPP_lLqVeUXe9zlAO!&*C61fO~Ap)0y#$=
zUgezZ_KbR@^yE;wDeLczg@wgQKF8J^tXaCKtJ2<P;>L6V3vHq2<@dxA1(klz5-)mt
zH|YM0_0ccam?@V>9JsNwewFw8-Jd!wPv5jzeZA;~^Q>~G-?um?E|U24{DY^@mir&#
zly*IgJiPPK`X@g=g$O1{KHfH6#j!Y9J$J|LFY}usOWNbqcb@wcV)Xa!)5%XvOkyMp
z*;1#4bF&|9f5q!^#MRJeJCEl<n=U;Y57pgT)~)kJj(pteBI4lGzT1CC``Yk%94;*0
z5&h-tVSH<r$X)omaE`}=UPC?3hkol<$#V7cZ@PMu(PP1)g)ijWb7Vi*o4SAc`nkd8
zy#@0*o1l+hgtxyIFTTINc5!&^$ufV@-{R^EryEOEZxy`2F)79SJByZ*XIt^yt$gZB
zF238dh@*4bRo~M#)3TV3JY8FEx*)W^O7@yA|EDQCuJ6&(XU|y@edOH6i%DEQGd|U}
z{4|>IN>uB+hm^B|13!;I(5I94U1xJn{G6`Gd|plNf&VMp$=%a+H%?dnrx(Y!r}OD!
zms19<b5dWf+%)C*9=*JO6T-E2+KR0_W&cZU$<|YU)cmr`?>Ft%{$76f+?lZLcRwjT
z4O=Ga`teO*=8p;OW(ksaJ|0?EvcO)w>lMp{4ws8J_Qm|vj;Qd6@wn*{#Vg5wxJ#-h
zfZ_OlgS=mR^WJ>EUA8|&$lc1x^7|f9BYE}mxTejkm#)@-_BS^E#a#9Eo-fuuwLkq!
zS?HkY<jb9ZABylWCIp;6`u}NdM)SwVEvt6ld;4_Lk4rza<wQiUuJ5hhvgvm6SLfoW
z_M_czS0DMff`NhmhvO{Gt}NF2h57oTJuz0h=4J)`f3LnieD!bfqyPW!{}=ySf9v~y
zp~?HIcGruosoPn8x9WSzKGoXO-lrs3TK0&}RWp6K;X=aoBSx~H3(iDa&V5kF<1Y7{
zr*LY<;W!E2dCU&t+&_1=%9U!b5h(~gZ=1j6%*Bld#6Ku5-20$ZNJcQ|j$yP#LFn=0
zR{4L9Unsfp+B2)1i(e_p;g!R+%`GSNKYlvRru~Q2BgFje;opL7v+IA_{68prr*K-L
z##`=pzm%WEt&b0Tb(Lw>cQ<{{daduTu9uc(i|&aj5$^KXcwaBEb7n(=D%<O1uL|j!
z%KEoQ*2XRsTBJPrm!N4hlly$eloz^MCl**7TIQl^SpLA~h4vi>20La&28IWG`~I%J
z>i9w8;XB^CnODCYEEjkp`<aJ<|A3SPgUkXw{qx>ucf7XL2|YRM(m~}Lhvlp|9gpt3
zD|bD<ltZpXl2evz(@~w>+>)1)bv7n%pD|Bw)k~H>pYpSZkKOQCf8tr++cS*UdRhLw
zOD)(c@o8W2aV~{V3y#*VEVFyg9vO7)zY)(1HNEDK%XOV&!uAUB?En2h^6ZTV6BK-z
z^HcT9-7Z^ymey#n|I2qPGdg-dZ!ep#(si?z#0@$do=q&#l|3X>qUhDQjDwAVC7<P=
zN7tiO;j#jYPd{FHsji~$U)<{p^Cpx{JgP3VCUkAx+VEc+KgZs*wfqpT$lqywh@;^@
zL&TD8`P<Jd{%7y7qM^<qamF^4nMZc1l+W5NT7N<3Y~LB737redE);H;oaUAE=VDCk
zQ5F%NpVjFv%<WdnZ8@6v&H4!A8X3cd{I4oE=uYOp>+Y~G#ksKk+s1v%Ui`W7kMBaw
z|JMg1x%fUOd-+@~C|-D^^NO6<I)yC0_U$$Y51jPuOmTPRSo&bPWuRsCn>vOqj8%Th
zPmWeB{jo-+<@)uvRWIwN?G#f9|8@1&{-ghPru;p`a8^RNsH%%eXZ?l-i#^%v9=esr
zTzCEdZ{MmfIxp+?iWlXr{U}rV+~B|j)+rtzG91?LOg<Utki1IJD}hI2+s`wLnqD`o
z;pA8P7XRdfg;1B0mrR5}!%?H71xjz55+)qmePHMEGBf8SR>Ob)-OuaCZ<O;bX@BK-
zI%ofX{neAIww~%<GjG4lac+(T?mA)l$0pp;*U#Tvp?{2Z_DA*oO@)ya+_xI{)bYEN
z*<A4t`84sMpSBaXW|Q%b0x`p!^i%71uG{F#z0x*&YnOl6)AezGt2Vv9W-9!crC?)Y
zhj4Q<n@Mnn#fHg071-to7#~#WZ)Ym}bNA#umz(VmBsOf!kbhA5!(%;HmH(!H3nb4R
zZBBW4^MM2hllt4ob5BQi=}t+QyZnGf#V!v?58KazS9)9SHNR|saeL{{{kQgSn&Yu?
z;uAZ`pqli3PsIWPYuT>eGCQq)slzb&#v!Mkp8SMe(;kML^jp6zs_68dnHhVVGNv<y
zyh_hz@QEq=8?w3j;+LNCDB(*VZR2fIPTFj@@q22?$Y}Yb%d?|c%3SEPVTK0Btj|sP
zE7v?}{xj|3v4p1|HCpBcIHvzoc<MP%dv5JXu9NQ)N(IG*L<JT_>$#Xp#C<>I`pTMP
zYW=;nhaZ0kJ#+8-A@zn8#!LUSP1#(SQ>9a~MXW6$JN@pzo>_4Q|JNQ~$~BQKLg`+S
zguP1A)lxg<tzXv}od~G9+q+FO_hJ9BuE3QKZ)fl=I~{OstDtnE247T`tHXT_fzY{3
z&+U3SUI_ootbCC?%izwd?0=E3``s2K*l$_0M*MWaW%Y7h@t22#@BZ5DrdZY^>Rq95
z<(7T?+VHjUVw~JZY`;A4WD4-Oy?f{T*Ef&e348Zd<mrFLc{~jY3ns^NPVXpu&G^VF
zo>5-R_udA9b$a)8behvnOt4U~>?=CksFbI`s5-&ava$Z%{3FWQX?8jL-r6mm)Nhsa
zK-oUMdifjuf=iDc6?c5N>9t<|PE;4q;ji}>#@ojp-qYs9@zjL>Mp5X6`^<Tha^j4<
zf8YOctTgGsdP!xY^5$QyeR>L?i}#D?m8(fvRUONFr}nP${ndE&RgQKy6b>|hzVqE<
zx3j~koohF{`&^lAA@O1Ex^o+LZ(I^|sfb(fo%|~ceub0Ww>y)Y4tQMmvrYA=_FT3j
zcm0nR=?BX$g?h~V7ypr)wJy4lTYt7k;WFD*&m{hAyze?^%Di{0n~bg=kPO|u?&qhI
zpQcuF?(De!;85Zh-txJ=y{S2+TDAYx=l?$vuCnLu{e_0*+uE3fVm6+iQ(niEFtdh9
zmGOdKFw2g0A7&mX;1a(q`jM|d(COG+m;N=Lj~Y@RCOpwF+*w-i`B{13X8*5TQvw)N
zg&0^?7#OmhXeei8xuJ4m>iiCo^Gl8F=l?&azyI7u?LPvE%J1HmaLJ$f)X!XcEGvhD
zYvUU^U!$56E39>Yg<f7!(|1KwD<ae)X@al&#Z8x=`l){QQ~aB2p|6?5eLw7SXkv<M
z_C|{UF=>~Hi~5Up)Li<+|L4q=aM{>U)z`uDuiu!c9eeis@s0l<pVWP`cqOyNmw&<f
z^VaU~O6t#kYV#1_-SIwcpX{l|s;S2wygymf#8w<__*N>$;^q4f&zAph%}@4r$<Pj(
zr04WiGX8I5M9-3Sz0G3Db}K*16h|Bmx+SP;{?zi*58vDWHP_Xty{%fC{q?=<^s685
zsxvN8e-JMA?mzSW3XTsq*tSS?$!N`Q`s483LSFsd<S=>dSu^;~u36ag)<aT(!9+2x
z=OT~C{g?8}ZEw3c<9|3Yrr)v6{%11(ROR;%AFn)8UzS+uexUPB{XHEEMcq&B74yW-
zsKsvk`pP^g@`HX<J<F<=?CNW_36tAD{eRW+zr5|t`}Him?^l^m_09C+;*d-CSNP=p
zy?%ZER@c*atIIOiM*U`$J(oPUKQqRvyn64u-^GvWE45?<PR-oBUvK84+!LwQD_=iO
zsV%QR%)RQ>j+1Z2wfA>UW(?q}h*CJQe)9F-Y(fd4Qy<^`#4RiHCh)KGqEIH5sdC~$
zuil-WUtFPL{&Zzaon6gWp6I>1SPFN~WwOY<Su5<#EBO6t#e`4Vzpom*mS4Ksw%tzs
zr!&V6U!VW4IK3tuS+R5dmbY(qvHq;z9CG-_oo$PMsP+nPPCIe1DZ4WL*<Qsb(`1cT
z9KLc!_Q}kWGi5r@jx9es>96Mwg_d``3mh+nCEt=e$t;u5`u5!`F6o^M!>u+>+OSGg
z@vdHUbiH)8=4MtILtQJEtE_@dOy9KJOEYW*EMGnR|9xV@x-*~F>#<G0*15W(X@|MR
zTAgqIc%FD<J$d$W)e~9$-471TS3CLp+s^v3E5ba-KkimJ*f^O#;OFTtFN`G?2v_qm
zMyZCJFbL^6RmveFKKV9x-iNz+7muFUdWcnQ`v02w>t8Oc?3piHp)WeO`Os6|>a>M&
zyAHgVaPQ-iqx&zt=s4`Gwsr33{V!4#ot=`W))=Y1<E>Zp3r)TH-|s?^sQ2=IhU4pZ
z-)GttYuI^vBR3aIZ(i-C3I7kW^Vv*f4C5(jeeLEOQ687maBY2vyVS4QhnXZ2<}Ul!
z_GfA;$IGgat&z$exo2<w5#aW)n3sR|f%^~s`u(qeC@ih5^S^d=kNBlt=}5NgD>#?e
zU$vPMP+ib?@VAU>#H4xNQR`i&mahKux4P@4miP4tRke5TuZvH<zq{Zf)2bb}7zE2r
zTY6$vACCX@?#s3}mNG0H?@BXTUOf~4U-gUS>9h%Dabm%8{~aS`7yjc<6>I)j$NpA+
z=laY45ArOoSZDaX_T{95p@(y`nF6Qq^V-!Y7nyEcv*fVRCefzV9IvF>gx7Fx{k{EV
zmEnvh2bN!|e*HMLU$e$p?uSliXItjKFo{|2!mO5pU*s~E3+0v_SbO?fZ}8pc67|z>
z6g!?|>quYu;pVd+T-qfZw=VzP!=>=x+~ze3&kvXsecT#VF77a0LZq?6?#e&^?M7i5
zxwmW|>e>XghnKDUASCrW-R|Me>mD=L)%`#DPI%%ZmBZW*&39H62Rbr++j{%M;;Ffh
zA2?k%4q0~X)brW<zqS6mur<8M@m0-IsW`T{LVMqbXMZhR)uq8{DJTD+ujsyGOV3ZU
zVDC2z7Q`jlmbU!Nx&P+ezO@%)->r#VZ1a2dwXH8^r2e1IAN$@y%g9NIi^E=wJBWjG
zqOALW(=AV0_O164I%xI7hkyAdwHjZ!=u(CCw(&n-Kh_Ty_}e7?_TbNC-`q{HbN2mB
zG+O&7E4F_9zlk6I1oX0fY~B$n@$a!`?8(FWPuclx3e`VIDJSj8oqnFxr07zx?j)Ig
zfv?j01fI@m>6!FIJzwX=`-4~6uB3*xUecSo^vkq`5|d7Uu)ROIfMfdpvUNZC_ptgF
ziJe=_UF$CR^Re1Wi@na1e$S}t6R;MZ<ty|#cf}mRJu2%AZXSEZ8Y3~+rKRo9?4W-q
zzfTTkxNIu(gZEzS>0Prk-^6{4{S$udr^|~Y{TkbwS4*wl&!TAcCGJPp*Y}$H<KEBT
zv;W>RQJzUG<#(CaU1Y9!!opg0P%M#MuK(>kfqh~c!mrJP|5>eH?(uN;;TsBvRtQ#>
zZS#N7Jk5H(e9R*0MVo&abX(2en)u;i;C!w36ZK5zA1V|wy=z@{?S$is{<SXUo|&pw
zmIyt#U;X8v!1e<wHRav$Eho<W^31;@R*~A3aX;@(oyqm>#~c~GZT+|ng7&#({*Se(
zKR0jp|3e0;0&Q0Xf3&}S99+3_<yI9QwjA9pzY-%|e^&oHeU_zi8H;_@!EgVM9$I3<
z&cbl%jI^BddG;Dvo50*PzwUeTW&C(P*Qu$_LQ0yq`8+%S;RmnJt&HX<DEbg3QzKj1
zA;p{?<gXgCyrpE3yy%aQS4);=E$Ta%_-OZj`G5@<K2)t)^=AH@Fs5(DcEyG{VQ)>u
zijU`H#WQt$ROOn%bwt7_W?9VaORF~@UpIxx_28Fj{};?JkJ}rsXm9`5Gj-Y7|2H(v
zYV0C2{(bkJ{wsImPW|(<Z{>+w%O@ntzG!rfZ`w3j;q9Z1dda(@Wt+SfzC3p%Z}#*5
zv&-yG-@U%bdbVZkl*FV``;5M7mjkP_--{Yuixa)r+pjM8SjCICCNCy_8n;r8bE40&
z8-FJ<$9`An?&9<)F*9+<JuYk}F>TVHfVc@w^;{f01tz^qJA5|k{Q5W1tn*NE>rK}A
zxjWTs{dzX0JTO1cJ&9+1mxz$ff~5!dn4B)1&&eYga>=9Y&5s`Aq_QNr3g)SeQ!-Kt
z&UogPzh0%=yzcPge~<WhLM0du9#1!!#XWC<alO)_V3}t;d#!fMu;;Wf)*D~${a~`Z
zeZBK0Mv1KqHO`B!<!0QO`>$zDt=<%~jpbpx)(a^7__6l))eYVP4~uG!<~?7jIpg}3
z+ne8Bo4;#gSnj=(&;S1t)^g*h+1+%=kg19F;Xkv>M)j3y?SC#7rAE}gs!h4}e}6xx
zl=$!IW<kF>&NmA;&EB%=@3Cfec8P2Hx{Z#PUPsL1bX%43s{Hob?^i_3ICh-%w)=Wg
z{d-`I*Tzayom2nS_v=0Xc>7m`=<yxBUk<-n+S2#F;DCaF<<Fx(Gb=PXAO7CE<NK;(
zQ^G!RX}@^hUC*d;Wr~U@CnuAm<AxXwH7?G!hYlMOGz0}(om31~xSY%1o4(Tc_~&17
z|MtGD*!%Ox+8;kctzQZ+c=!H&#ml#LCilgj_4@Vu-`O$$(Jb=?uh%Em)pyoQdoTKX
zVc(=%r=P_?=#}CNzcBlE_#fv#Ne7(o71hq?)=$loVBmVbxrDvC!04>r3kNy&6&y3(
zKDODrOXbKSv8c>|ZjD)i+yNDZP7mjutp4rL9JE=ZrzJaM759Q$J8x*pDm-A+5$W&{
zQfg&67_cEgM}(V;>1aX66em}Knfol~R=xfBdEM8!)n{7P+e@CWyBIkqD6T!<zI=9n
z|Kj~+k2`wzznR*aeS7}LkS)jhHZ|un@8h2SNaob0i%mAamWNJ}KWkU}xz%8A_1^hc
zmZe1BJhc4Kk;_lNR(`S+pP_X;spQ@ShOfU^^pC%Iy3RJhdcDS6R$s>ZPX!IF*v`LW
zko|X9mLcrqv{=Q^iCu}&_mnb{MM7j4bwVbnh;XtxIXP+sXo+#PGA(pi5#b@h&3VXC
zU`B+Npk%9)TJguOXM3_%mq)FAbT#~I-OiWmd^rz%u-$8?cW>S~{S%cd|L(r1&GzQ_
z>)7Kz+Pgir#{HT*=U~t6qGd{_zQ+GpSkD=4)!uhP=5xK>CU35U<8{8S^DFn}|9+Kx
zV!^G=LM{<68s_}__wT{#o9;=?3t|@4%f8`mjuN}5Kc&L)v%Vcy_{XlvExY*J4>DZ-
zb~Q9bb<x*N>5<Et0(myCns7jH%_}*}BQlIiGyl#J^GaAH^3nQQ<-6DC+7xv^uI;gG
z-}!UO57p4)h0m9Njk#^^GHLJA`}rZPkGF*`D!Hh?Ecej9f^Tl`;@>tOk&B&wH?iK@
z>9y3<{kL!bZ;Gv(@}){MZ@SMnGiG(YD^rTO*Ut=HUz9JD8R}B{_Pm<Kocg>Ksx2I{
z2O~mVpIeHFR3vry#w7+`@%sEpJ8;vCi3ZBgFS1!@KXJ4AYZV_}@3T+m^{T!buNNGb
z^odJ7_K(j`R(AWzmp`tavfcGuw_VR;P1IfWO{azLeLS*Z%NZtdWuwK%EEp9)-tvn$
zn6$)u=h>^D=2bByT^EjuJM@<|KjEfP;k<SKH$U03pYPEk{gU6uKK>1P8j^5mt?;db
zvHKN7oGof%)fR2|u>Nh8jp3ej^GbEQ<{!&dW=}h`&e-g{#(dpXlJA(yUFI@(vR~2n
zxZf!oV=-IdTeN%R2}YOgmlsX_=^TH1>aK53OaJ9ZE;@PV5~pUKim#-f*UZPfCxT@S
zEVfN|{H*Lb<IS!kGbXw)rt!SZV7JWL%3L>-lX=B9QO3A4XBmnoonqEb`ocF~YI7?C
z!@{JKu`ztz7k;ZZ_7*cO+S#+noH;JJ0p!U^Vp)+Zh2BmNJ!|oc?I7>Ht#{mSFo^lb
zuXuJ__0t4Kg`0oZYV1G!`#-~<|Hs;=_{?woaeIrkTZhS!k~7X;Hk~YboJZQkez9v@
zE$WWT2%S7vDP@w$j#m@E%#F{yf35su_wkc8ukz<J-2A7R`T9g}!qWA=iuTM)4S9Cd
zu&%Q{_vricsSD;Eo_Eat>aFiuDQfkFmul~SJGJR<%%rP3Uwl&OzFxi5xM5kU;IasH
zjs6RQ|L^SnewcszuSp%%xorILhu=2`-)g)ZVEyC3o5#$nJL=vUnI3cB+^<;pgz=1r
zvt!{cc1DdW9j}Cp1P(1Wm~cczgqxM+P{WQ8JwfSXJMQLvd)|9ByXxHRMLRzFo;f}J
z$MFrHzg-QDv)L^x^P@8P>BQ?h*XPNE_GfyVg>Br|thZ_M!2k<OpI?2Y`j@t3y6rj?
zIw@eD^3MdV6JmCGmrM3|et)MJv`9)JI*W<#g~`6JG50UCa5F~j)W5s>cU1w0+tqVF
zd1gh6m1I7PDp;ZY?#yg$uW9!t?-fW<+9B4}&ab`x?cp4@ZEgL#WcN03{Qe^=p~1ja
zz?8aRhXn%zU!CJDEv_!sT8`5h@l##@ZT%B^Rs87Z|My?refjV2?~|W@e=so5Ix=r<
zXsF<=gX*jAuCEYsJ}7v5aqW}Y_n9;j1d1NFPI~&i?qh_?f}rj#C;gZ6a*E%P+&k6U
zG0y#tnYz^f1Ve@xZf@m#rlpeG5A;cYSa2ZlvQBYQVA}s9f7`P;=BBqZRJb{nHX1Bj
z-c|mFHMg2=)0g`{*R5(^@|y2e)Yb407KfN+Di5eRCCHvxA$d%(=9bveLhh-3e@@0(
z=GiT~9`sfG|L=EB|9^cw|7+ttYrWVtKOHR&Jv`jRK8;zxW`m4(Gn@I0YChA~y_?P1
zKdFhHyWM!%(PLJ|V>VTN@!PTw^sRZz5}G(xzdVt=wfgUpDbk+~v5Iku`F*LhR9}62
zg~$xn#J2Bx+UGU6DlV`p7%Bhzd&S;;MyKhn=jNtM^#!gU-IjSx#ejJS*W>D!hntVM
z%AT8MHS^rcy4KCdg_yH$yn3q6_UX<uqx4Q`;gs@sH~mjFct19-kH50D=;eVwesUGe
zr;naKdh~=<#+U1=)|VKM^Sk!mcYhaG@Wg9N;Ct@rXS%yPwz$--t^drrD!AY{&vx0f
zU7xeR$M4}yWIWP1?P|Ho%gpa*jrk1zK0W<;&Hk?(5sYjc6Bt_sEKcz6WZ*rcFX*@V
z0Auo&@^#)z7T&$X`Tg~s)<63uefz&VZdavj!*&)MCZ2f4Ge%MeSAXAmVr{(rpZN_x
zR2&r>oEW+p;@aKxzwcG#Gc(^I#Kq{ke@f~0W$GK(tr0)W${M{s@SE((tJ!Hud14Qi
zehdEnVbR(b4;dyLZee+}N!sF7L3Quq<D$zMBMrjM<-c%!Udqtm@K2Rz(}7*LD<|LW
zvv3S-oXz}Y!qOyRr>+l8oifZk2mXpXzAtNBtN-Vb(^AH}Z2{5qc_ZH2{|VCGv$p!(
z&a7+tSNBK#t$taRAFi@B`D@nQT2F)i2E)L`{Zow<Uc|&P{azV!YTdeBFV<Fd?ejNK
zI`}}6gS9DkhTW-G2gDaBmEIHh*8V-~y4bpJ>#wh#_qS^LtLv{dZL7b&+Q0Rx@EI-E
z#*{)H4FjfUW@}`hGfz0^Au`F6vo3AT28($D-#B=ZmNa_m$u0HH_xg9u^PvwzveZPg
z=ifUIOH4WSOuk(AdiMMs{)XDCXFl0?<bSp8>I?U#b^eNb&|CGl`m1iO&hB+pPgXwJ
zf8}e1iatZgv7*O!_olKkZ~5R}a!7qk^<3?F%>9)GlLPB76+Kpw*ME9aWnb&p;Lv}|
zSF5dh8zc90m;M)4hai>%PC5!3zCWsX)AMdZz0UpzMYg;jEH+GEFSS5#@?qse83~hr
zSZ_69GVeJ0ha+Xdk+#N*N<|;+y#h_T6I0VQw4`g2g5`}vnF`HxFZ*<f8c*_gV3L%w
z>GF&P>vS(ZUM8D)X-3Vr<lI+lwD;sOzmF<Z-(9)H!+Va=JT<%fyZ0OqYzmB+`emc(
z3)4dGXHA^T-oMaQRA^l`algW)%f4q?%@j77msn^X=cxZHw)fipsA<3Vd3{}VVP<XI
z_b@$6W|oInXFq<d63IX5NbPgwZ{i9C6(U`%EHgxPnNRCpNI1)sr@Hh@pa75B=bf$|
zA7@TKuu6i-Bck=7&cY2P)?0QlKB+zMer-%1+x4X%Hy(W$@^O3r*;MY+46nI5r275u
zrbg*cWUStq8UFuZqWevo>4!CE?rg5#n|()MYPEaSAFZT|FQhXhZp@C!jpwzzzi<1I
znA3$l&vU0=o5qzPKQl|I=H45QsAcbeE<f<gWPjHaVd>~iPO%5$JJN4=3QXn{`+YRh
zRQ#9S?sxBE<XiS|DK5%;w2yUy<AXB-hT(f_JoDZ^sQn?~GUE^9M7yTyOYwCptREYj
z%@GhwV0^XyrR_F{73MlCPJi6%U3>V$o39qfuFkRi`9Vp-_W1=pD}7TX^Y*8=Ef$#;
z>htCZWCkx0+@W)#IB$uJ?7n5^o&6VIDd1E0ThUwICqAS6Nc4guhTe1a1=}}nV`4E>
zJM(O=vjNA8smq_R-B4M({r!*2Vh`n{xqj5#tj*K>{`_oBVaMA3n|&82_nf@PzkXrr
zS*K^(m9<|svd-U^FS(F4`a#33@H<+Y-dIfHsb|TJ3kdqPzqjG<j>(-|8yfbc)NFpZ
zA@QA{ZH}M0l(}PX#9Z4hiRD}3KYYvP-*Q6z@`jfl%8zVU`MmxjxV8U>vdTYskyF?5
zB&Lam-z_{W|HrB<|K|Q_U)`LOQ~STK|6#Dl_RL;B{nMNF%>T8uZ2hnOllGdw=jyZg
zuUYgX^+!x)yWXe2r`Qr3K6q&Fo3QJxWzqczFP)6r_ct?TpGuf`V8v7U@D1HEZ%mKO
z5&8S>gNe4FxPVVZK|<b&jW%+ROn>Oh&B$@lzW1B|go=?4f84@b2dmZi#9uWzbY1ST
ztvmCnZPiDO$0ry2I^9>F|E@@<PO5DF%ce<l5)S^*o#KDHdM8_TLc?^?Jvx`un@^`U
z7&_$HT<MzprK@`Xg9V-Y^HTZh9&;@`Yw%O8=JzKT!HKWBE$u%_s`~jx-3U3(=>INC
zbLwBGe=&Adznwczmd|?_=dG)n93r6cT=?HVU0K`vN7mMOy<U|5HUIV0c=H1@nJQ<y
zy;t9}zeB$K>{i}cC+2Xfd;N_%V*849X6--!*_w?7A}j29c5)dpak2$$;oT~h#(3($
zyER8M601KnPU<Z?vT1wY<p&Zxfs2~i=BEB#C?{suG*!H{iz(E$vH0$XsB0DlobFxf
z3BK$s3Q3o_JrjGseP7@wJJ;jl?s*f=6nK46n7V+`rS(GLw|E(WdHY$a3%<6o8@%Im
zd)GOcxth~{^(lu7opmc8H17TS)4sOgqe`C-*EIf``zzTw*4O?m|DmYy(f;DmSNW>C
zw*K*lwoRS9*FWB`b@}>-hu^*aW8JEMl6O|Cq|j_Xo1a&h4;=k)UP@=~l~p-wEJYrz
z>B@c3-)}tc{l;&0J9cKz`@N{*gThaqrGGQ{etgPX_hG?`8mXJ_ihewB)R`vQ$Mn;u
zC|iwz-$s5x`WLCc<&{@*Y*ZICW*mHc&WE#Rr^d2;HkWNLbh00st@*Ivm+05jNy3Nq
zuSa_7zpuGoe)r*gJDn%z|N2EOFZ}=Mr&-kN*__|YwS_ju&s*5W=yd<pDMy!xv}CK<
zLMOlOx{$D=G2!kn?>~80_Wy2K-ad`_nq}?%s4MY(rwn_;cwc8#e>Y$4`C{wj!yi)A
z8`emMWymQn`u}C?@(tI741F)p_ij?Vrux`5P18niW`+dUof?6;TJE|}re3sh|1<CJ
zJvqCMG~qo8;uX_g?bJ#${iU+~hqA`UZD$Khms`9)`ZZb3zVQubh0e=u+EZD?U(S_G
zdb>0}W7g#f+B<J^-LJiSCQ;IEjZpoSdoe#;e&o(w)wAuJq^`rULm3tPhXoc%ILW`>
z94xbBRgP%xfjVPpiATKCdN=>y&~WbUv8WUN_S;<z|8XAcGm3rR%f$Up^?SVJfoVH7
z{`!^l?N>?s{f#G<XB)D5w-lY$pRgypUQ_69o)ZuA?T8@$FH8TgW|>nt?QQ+!-*-0r
z7kb(l`}n}dS<LIA`uf-ZvR8jFpY2Xx@dN9q<a_52Z+W}FSmKNKYk`^vzt0_iaA?b|
z*<FnH^$Q=WSDat;LviohAG!+3_ZnurT=?{dUm5=kabxC?K7XUFpC%aooc@nn<>B(4
zL^aDYGZjG|&qFiB_1$tJm7>mxB|0o<EfWym<R`Y*-Kfp{rcC3<#JQ;*79W^}*k^gn
zXKhH0YW!}rW{XXYa=vjx#!mr(%+vfU@1LI_((Y5Uk8AbPFJGm^_~$dfH}K-|J@(Ca
z@29J#+FCQsf7(u%FJ+<byP@jgwa>+>oa&GFx$vl+Q2T7<HRIgWj-~%25_TS2|3Dx$
ztZJUmzw6htFRj*TyINTKpxNPO&#A&y<x?f?{0$!G^&EO;DlZUoLSmgf+s|S<E#Af*
zFU%&CWczPF^drt??%72hJhC3DYgq)3?pOc(k8@v~aP|Cur#?nW?3wYlu6m!>qx{pq
z|35NLl9j&yA+-D|M|aGO%OdBG{t~<8_3>Hu9vAKP*S{<^i;ryj$zwSo(Ol|K^z^Ww
zHSJ#|6tpe}w7)-@kZrRsNTf{TlWf!RBhsHfhTpIL^rP|Ap&T{AZ3|rtC-_cX|M_~u
z#M!pmj;s=qq0Tl>=KkToYoFe8_2gHc%%m)fnL%gvyf^#0Y_V=bqKV1Lg=)^ve=xq)
zKGN{QNJiquhrD9HjZfOAtNa#G5aIf8y*&7J{bfh1rJ+AxZ>jwwrCDKr_T~BYM!TYS
z?Yr>p{q@*1QI2V{lCMAgxWDi>=M<v`wm%<UuMvD^Qe9~ozG(0CuWO|uYinJD_LuK1
z-}|p+rrEY%r;o*po|y2M&7?2sx^MnX)~=?mX%8R!^X`7p7Bp?`go)oZ4mqU#VLq&_
zKc%V9ze%iKaG}`D#g6hizDIJ-Zd_>3rJw4!X@5%#hr-*YRTDp6dg5#B{qP_wi`uMr
zucBW)RXC@(_}$HQbAD*`?|Alh$HI@w^V$Dh@n06N{?d7lkuy{Ahe`H~RjW>S#{^~v
zm~ZG?dVbZ`csZSf<wyPrF4PRI%DYe=9?AQU-<dt~Elb=xzvxfys;)~vvYnrq@$19Q
zxat}^fmiFhzfP@u<tqB?Yu4v$Yj`gHpVARo{A7oS#LK$;NgSTlcHfS==BUqWj{0ez
zF_9<4*VmWZlJVCZ#aoMiZMZQZAS|!S-k|hppHyOv)y9hVyYf2CRJrEGcXu9MxS+FN
z{^<vaIal@KnOhW@1R6dZ=$W_d_z9UG>;bXT8K*CPXlMwZd(cL0#s5IYU$1$E6n?R;
zVqiXCb4YyZ42F71uKWxCX1%*^Yc<bxzxSR)>gOMHd{*7PU(l}7ST86||In6*ZLfc+
zh+LIb6l|}Gn<&NeJ#(qg?JcoSKF4^6cx<Yi6u0nDUY5?UIrR~-LRKAR7yqZ6>Pbje
zUA?|IEuJOy<OV16l`FUH?445H^fona<)x?VU5oyv7B4cHU+~xQzwiCldw(Ji3d&UP
zmSZh^u)^WG9oKKZ!&_ghU0%H;yph9(|Fswc19z$8EUm6pti?RtTkjYB`g-eVVD0L%
zwf{}l{@Nb({k3>~y!ZV-tDf(#fBOHSXpw)oow~^0Z+q+Z-nv?~YjV?sAQ_X?QuF@$
zAJT^oI@^So>+A0CDq3^s_)eC`aeS@44^tP$Z-0Jdo{C9F!aN1R1qZs>8qUQYdj8hJ
zz<kQb!|Ru`EfUvvtl9r(MRq=m59<o$s|UIMT%H{MZLR9}d)7~l>vw(a`Wy2%`m4`$
zujqrXSsuy!+569C$8O_5QHQH>izgIrj6IrvtD@pSgxBQNU(a8w%IIqOFaLb^ZvGF+
zt|nDWqdimn9&bMGwLdH_<W6vDZbOU4J=0a9zn8EZTn#MR&=>1GNqL{oKHL9)_OURu
zIGAv?ami0tw@7w-pEXUN|4HGsrrMPiALZUAr_JiKmQ83_aFuBv&$gyJoJ+pleLU~R
z?Gw!*jo-xYUa-o0G(ooGS<H#3<%g7=rhI?CA>mAGsP3)z@~6uWALLN9^N*hRK6w9&
z>Ui;w_Ui+du8!Lu_ctnW_Oj{M_ga}auXnk_Atex}`17pLf$8j%rGF-U_gOgIhNF$e
zz|Onqn%8=k9~LYudP&<Krz>qsXU=S2^latIK%K-h?|<*UBx>|Gp?1|&+xJuE2{fg1
z@kWWB<~bH|pY6_uPsXR$FiFTaW=nLhQCMmC^6#hT8tPUJ%Q|eYh}RsxbFXz{Kypab
z_u7-$_t(eXUthLYwXMhg;q^JJ4L)3_4<A&RyZMaj|6i;vaf%*}8B3PVk$v)5XJcJF
zlf;9VjC6}8KKA?nBlJbN7w<pF`?pH$?)osn(@w&7tFD*6W{sKd`tg`zbWX(swXSPN
zd+L{Vu8j}3e#0v-<$l6uf5u~rH^SGeC%9g93ktbcRK;WQa<?3(+?NU8j~3lJdjHj@
zsI7(CAP3cL^xtURn5QEm&2s$l0;jgOy?sZvSmsqo{a$yze!c!3Pi^(g$%`H*GCMAL
zspZSkaiiGYbH%^k>!N<YWT}4t_|e*;4)x<Y4Mqn)6gaRSxI4v^ZGln4%(R0EnVk*{
zhdLV^n3SagANa6%FV~geVwk$^^Z|VZjmCh+B_c^ZP8oX1M@(P!-f{2|7mwu0YhXLX
z#I)dxbV+sL84rc$JrY(GuM!-E&DfV;Zk+r1dVO%(zxgUwI(w%Yh;Ezld0p|Ovr)Vg
z<aTkk-QNFUj>X)r#?aR6si&t+41E2|kZXR6huKMQm!~t7s$y5{e#rAGlV?@bwhg{A
z(^#ZszIM8~y|ugYY^w6!FK0j0^^4bv$DiA4`)#ed(B6>Pt5LI;L@3p+e=z5|jovoD
z=Um4>eXLV4TweR0dFAroM+#QT6jl7b5`0nr$1DEzPUq_s&dPaj2yst}zt+=yJ$t`&
zrAM>K>52OcK7D)oZ@%OM6Q1o0te40y^HqI$&hgK-Ztr-G8^%J0i#(2>?mn#g$J$Wy
zw#w!Uf1bqm9=qt3R{MKP-Hy`goqPS(Uti7a5gj3OhU1~^`|7=48z%Fl2)w^HgX==u
zXHI61gVT4v`{OVB_rV+E>rF*B=NQPWaZ<D0q9C(h^l*0L%B^bMANutjoIkYO`2BeQ
zbM2a%%{R`!npyL8$}P^^`P09CI4CxMYU^(iqq+Y2zy5b`<le|ta86VA^#2w6OLzSf
z(z*RC`Gb?=?2k3}LG1OaU*>+9{$gj{-mZU!)sGz2^*9Q969XTpH?-&k?@ShW>fL*@
zhINA#bHc--gCR4N=jp0E+P!fNZ!;guhJ`FU>#it<y%9eA*qHO)>=)ME2OoqNDtwH7
zS8wq%=34*%Cd<7a>rQl*>^l}Mv3RcM$&dEavV78NWse^fx?}C*x#raW^{WqRs(pSE
zp853~S82_`q9>xv(`D|j2s=1&zC+3FPJ?Uv<nAU|h(9tqx@uPEnU*>pi^mb`zJ7Xe
z`tqOW<@Xl3U%$V)Zdb>|>Dyi}5N^2G%ElpbCEoVvm0-bNTh)6sg!!e|8)lreu2Xzo
zy-j23w~evSip15ckH7YCZel+Yv+e2aeam{!e-PnN5WF`l^Y)Lr3|*a!nG=l-Q%`4h
zEpFg)Fnbol;l3hKll8>WGs}-1xAlvDE&lM)h2*Dk(U)?(d6SE7Eq{HmVSDZOsNMR%
z1XBJ~>KC{36;%98xD)=0<?_$uuD`*1JpF&&UT<>w<os~$vz~naxy!O%MP+&W|6hH*
z=+_tjNva3lNc~DXsJV2ZkLTfMl?Uriz7wC)GD~6B61(N?k1W+!vL=6fct`4a)XEyM
z&IKA@RxkTd%J%l&Qjvon8vf^W@BRMorzuOc$%UDId(=)9-Sw<z{V?I~{lgkxf802i
zIP*t*OU#_L84dHEz5aUSn9uab%UgFmlWw#*AN*f;?r(jG?LVuN*Jl2o=V7?I>r<kz
z>c2qlJ|^K8?|*VuKHN0L_T;QbuT3-jL)*^CxNPR$n0Vm$jXm1ZTm1L`nRoyHrs)47
z(YMac@a;T)Fur_RM4D(^tyY~*=mRwo7kQa;Z|W_!`gmox^hi{%o<1+y^=d@V&gwh!
zI<NT4?J@DVagMFlKjip{9rd|SmUv{|%UapQ@UryZx%T?2r#4BXuV2sc(#LSJ;X=nx
zslS?v7);7fX3yI+S-GfB+(rG?(KfEcC5+O_^Pe8ptuf_i7p&tqz8T`naDQXUn^aD(
z>8&XtFC-=;pEvy>P_A;!yZ-b?R!z3!d<#8PGheS$QmQ=_|J<bUZE>94&RX8))wUvR
zs_tJUVh_2ytJ=M_v%VS?!2a5eL)`HH<$WAC<KpaRTuA;cc3egFu!?i9-QSbHpXzZM
zWV^7r@7MjYjN9$ns$={Crb}H_Uf<2SzuNyrZ`j^Ctph%V3y+`WKYGJe)#}?a&kyl?
zmWaPBzunjL+voDbb5Y^9jI<Wre^?##<bz&R{7jdOm^zN<hI^*AU*E5{^!$T4{mxVS
z*PStYbL)q$iO`pGwWoQ^pBgkL+rRjbBq=j<iK^}Yn!-gVH0)PiJSuT)-m&|;*D}<7
zJJa;S-8=Kyiln6vZ?PWLKYq>Q<cIAm-Iq&?gh@+Iz5GN|UN!!<(tXqIHy0ai)D}L;
z`uW~Jlbm}uma^3Hb6Gq(q>vvG7$m|q`}q=9qX(iI$HZ!@1>DTE8LfPUBv>|t%J62L
z-@jdRjttkoXkCM8YeoBiB#KWLT2py4X8R<=womu=PY&LD>+=1H3vI*JURv$*Vg34)
zbB?ub%RObyvL%GX`Tm_Ys8`%F-!jZBQ*+)yKIP?Kzxn&te~NpzS75DwjEMK`a=oaD
zcWRo_yLI#I-p%H^sImCcs_&CO7n>YXTIW^ZxWp>L<nzPz%RgqW{~Gjr+dHAvDeKhV
z%`Ez|T=0ye{Oa2`tS@mK$%*TD+;Z^yhaN{x=LN6Mm>hMy&7ErAzUIfzOzk*TtK`*;
zw*}|qud|u3T=$30l7mm1e03z^AKf;ox9QM4Uw1{S)oZK3cY}>}51HL$w7wtw#{4Ja
zfPdO@eKzI|k$O5jFa8N$WEBlt-FrdHAkL~|$=*+F%?fjFM3g5z5i^#2XI`$l@NeAS
zx<c8Y;QviZcjh<mtzA8T*Xs}a-mcBsXE=wc_2#zYw(D6UPWH3KPyaOGUww2_gm}=#
zX)ZrET7G@_Hoh{TDZ?~KcK6&H9@W+@e(~BL<wZZv+5gbvrFY}+_fH-=NUgfC<ZCF~
zMpY5F-2yfef8}hLd!}p*`m;)0LdH?3aO;GU3ijf-x98<oXx|RK;@4PrOkv)PY*P^i
z3HkOI>&X}V;@3pl+JF6Dd*=S>@9Q{PV(0On&u<rQz5G{n{-ld5SB7t@savr$YioK-
zY*I$>`<h6_{+gTDBrTKkZ0}58s&`padA@YKr`hZ^*@u@r?|9q1k4?4dU?anp;|6!E
z#PrQ>?`+CEQn#9S?otJgxy`$}!uK(-h%Jogx=`Ph&{31Rdzp9iO5t@+zD|EJ_x_8$
z-}atXpR)AtojEJ+Dt}juk2-l!U|IkCJZVNDaaI>*>oC=)?{@Frz4NmA&cqGVA|B-&
zT=gw#{q6${47?8<XK8btVy!p*C4BYO*Vp$~UtjN8`YL2n=g(<+S31w%(h22U6*I3R
z^nV-cueDYG|L^*qEnZY;UKHma|I<KFF~-(_Q*KW6|JO0c_s22C6x6uhz4s$9Qo+_*
zE?zIbQRi?z&!(d*A4S@yE`Rv-iD+4aVH5L?^uFfpYLED)@2_t=rFhny!>2>byz%1p
z3TegP4=?@T_~7lls3Ly#|MIwX>%$Ml2LJO9k6k0bwo3TE{i|;iYz`W5ZaCO#=o9OC
z>3ChBe(|L_Q&uz_;W(gtm_?QGwq#chBiE}$_etimI%^**2rcF|dYzhV_U_@0XPjE=
z4k+5p)1Cd{1)GCwsH^jxyZQVd!Zs}|IbeB)hs(g|k>E?#dGB=YNUdn;RoVT{M%<XS
zlwEU6ymHuWpClLl%Wo_HBnMVa{_-mN+M@Ljk_rk%zi{|&n&fD5n5F!_^3x4>jp~{I
zELg~>pk|)IxBTt5GSx!`x-3&WD%GF%zB4qc-uoc)K~0WkjY)w^z2urTAGX{tFiy5y
zE^s0IKm-T>r50{>3DvUWHC^{67Hkvq;EwRIuRkvE_a_&}!;SJnk^KhhtgTnWCTFc$
zdrN%wu3byQ|HiG2$c_Ix{dLUV>#HA$?alsLnLjuFz!atvdWNU-v<uzsGRvo5%RjE*
zTvu~HLgW<7B(_}6Vn+K}P50liB{C}b@d+A8K3?2hru&-x*7@`6)mJF8HK-nPYBb<f
zVzQbaU!$~mAKO!XwvbH|y$`OQ7HV10G^dGYlEBtuYU#R3!gd84Pfybp?YhsuLw3p2
zE5CJp<Ib`O)oY2z#7tKg*!8z6K5pL6t?wB3abAo1!WD2VPk__&n=Wtn*Y_+9yPvFH
zWm&Z{+a>P(LjI+%F3pO+zUqIBc4>a;tN6=j1&+5gWG(L$UYj2{{cP}+vK5Q>%{as8
z9@YDqoo#7uC;y(^zY>xhk1lP!KUI=}^<ZZ4`~F2*O9bB;tnb*d>+AdLJAWBQCH?<1
z-9FA=_)x4A<8M~o!oyosMCPtOw=;xgqPy>#UB8qrYTi(}Q+@e=*Uw$`TkMzY+5i7z
z_kE6lb-v*nWADr?ef=uRi<6b}LCiKTN!1A)&vqS5RC}{|$7c1(kN#+wEK#&8+B5NW
zA{%pgZHLF4?zg@9tqNAt|FSYR6jUj%bNZjpsK4*{+$Hrz+Y~weS$_I&xzB&`$xtcp
zLT;T&y~mR8TZyuBN<1v$OLYzZxW?q{k&H#DOQHk}XYB~OZSD}Y+x^}3)$<-oy{&#8
z_qTe<&m<iY@$x%6E4lyI{q<2?*4$}5>yLi-+Jfs*cZy0Mcd%8lr(at4_tgzWrH<zt
zesaWgRIgXK;=hHd;k9*uS7Oua1-(6Yd7n7>t^Tm&%U-2thYL5oQ<Xe6lew){cYW3)
zf4em+1j>vb*kwfqay$8`I<Eh2SZ6FyvOhz_zraLoiPgcYMR#21O?h@WjOWQQX^;Ba
z)3O<k;XVlh(o6TGHa}J@-?vn0?W$V_`4?Y*O>T<*`qH-czU~1_N3#vWkET~DD4x*?
zdn6MR#h|vJL%fE?!$C;6<?Qrs{eMe^RG)ah)_?tI$A_mEj}-pxnq~4U@~oX={oDhm
zq+O?<{wDt?SKJ}}r-<7@_i!hPUootetNyd7b1zokZEyB{Z$adA^OYU?2aaa;rT(o^
zaQJ5G>!F~0mor%Y8SkOmKkH8~yd|^$`HFmwPM4d_KOgzNl6d##@xALGulV11w8Q$|
z=7atI<v(6L`6jtmux7!2MN!t5UW*I=?Q5Tyb2IGl)Q-Dm{;u!0zIs{xWaS^h&eQ+b
zMjiD&;L5~UUi)?O{R5Q(Y(du>O%wzrcNPmgSf{zuxN(KB*lMx(3=UVeh9lBU0*+mG
zIR3UM@Ub&Hs-A0e$}Rq8v&&hnYSLDQE_sbA4F(R@1Nw?9SZ8#}i&jbOW)3V&?>s8w
z@nLeoD#zXJ=gxm~lC4a<uO}tdCgB~q!{T53#U0^AOiTxNsNDK!6XE(W{AYb_?V%Xw
zmut&cCwu(Pye?X}iqm3gV<%sw)<>=W=TBLFsdVa!@R`jhJror)-#$-Ndquy&5w{;)
zJdX|B&wW4tpT%bTef{ara~k&ll(S}ve|=Zs>+uO|&!n%6=#{=Ba3j_Kgnv+y#i!cv
z6(^%v1lFIfE@X{<^eB3(GH;do?qkOztIZZ%?~$K4r|IA1&<>yfAH0vvWWQY^v4!hI
zRN=d>sg*YB>sRlOn%@1v_g)#3ftlk(nb*t{${NGF?t1^eRr|nELY$51WcY`@sb7A|
z*cFt1oTwyQdQViqxx&<9#+(BTu@_mF$Q=|v9&X>DIZMMY_rK$(M!StC|7hgenxFdk
zykN?Utc8kgFO%LGs4VO*JS}*ws(%}IQRa#DdJ>QI#n!VFKlwW?`Hgt*2JZR0_l7)d
z^#Ao%!9$^R)ykkd-ClX(iU#jED$jA$PM>!3<Gt74OHY5b<B9*@t$pwN&7Esae*cNt
zl5cjs?EXss-CtRn_>yXxr7sw+TOm=LIHUZc#ovyiKT3aJ9!V%=v11qTpMGGGAlqXP
zI~zCA^_!BvHkQ1zIl8@kv(!q@`Ki(s3|vVEW-A_$v0$FL#!yYeV$lupqVSBEmsWjJ
z)0027`yK3Cd~8OA*Uvvqz5g5T+x_1lzcu0Hd-wk=*C(hdF!n1{B)raj7|*q;q9*X8
zBBP^#gU159Io^lkny0R<%Sej+CwsZLN!(U>MM|FC;-}?&d+N0mmu2kV*{go}k6>j)
zt#-on-3H0k8gJ&$y7S7WmuJuX_8oI)CA~FSAKI5@7n9|GUw(o2xy%I-%l28Y&-r6t
zdn&7CcAK_%#P@9z9%!zZa`S)n!<+w#l^3g+{%HIBYWl$+&$_+dI)CT?p0?TVjX>ap
z#=lqAK0W{Ex?`Yz-1^mVA1#{xzt!HmDsGwH`?x16)$2}AI#{{w^5X|9*6<zUm=LeN
zzbH+!nQ`e&9uo!o^_%C`Elb?`>Ge51i<%9;zp^&XJtP;){b#d+_nn8|?RCt3c6i<w
zUR3_fKz@rv-G%r=%a@%fKDxQ8b4PspWqto9r+wM)7khYa@cHy~`MNcR3k)8eV`E&J
zwU>=&o>=hT!|S4Y=lzR+?c~Jv;Qt}<CgvlPs?S?<zkRDd|1amdLz6aclj!8V7toN)
zzwG{|>iX-~uFQ#<m;27?_v_1F1t&_&iY3q8VBl=jQy{wUUE7Dowl6UgTiQ9r&m7PD
z`*HCtp%%xvCZ1(4>MNLAm7J%4S<lAuY;xCE9b^Ch?rY}F%@W_aM`DSnI`@<{t9|z0
zFEi8ZtiQ`P$92W;r_C>~Us)+W`P1F+RaR_vE;Y6C``!FM{V#Pf2#Ixiv;BCY&E$PC
zP6yt~Km8P2Fkdw%ufN_=k|#+nogrz8lYxN&@7GsNQOj$6DmQ&-nE3kb(<G;9TrGmR
zK5J)Q-dylkvuQcg7Ds=RJ%Y{57b5s<dHvRX`^@Vqy{gd9u<6`JWz%~;n>WtOsyhF5
z&Vi|4o^Lm?YpVObb^Te<m!>^(ktcLLmY>t!@ZpzoOp~^Lec&wKs<j<fofE5;Mn8S}
zI!kLF<MBf=|JzjT_vAX-`XB0_JFV0G_4OY`Z-0xKZfmU9S#dq>TJ4=XlTP1%C~SDr
z!M-r4T=MJkupL3Q*H>uYE-=m3ZnO3}JEuU7o4@H~f$^-DFJ@X?3Dh{~HnV4bQ+~PD
z{eQpimi}0id0vR)T9?@2G7;&SM&9qGcb<N|e)3!UBdM`>4n3dD`pa6zarf>uPD-by
zU!D8p-^-72`#%5MRTSSIk|-*vKmCu_#zO*S={j6H-KQQc-LTT({CkJFO>d0t*A~u<
zOA*tbx^it!^YpaPPi@WZhs>()H<ax1nd&!fM&Ig3Jr*KU4FjV%FN>}}m2*V-+`+~B
zYW53=S!;MCyqmjsZ_@FOx^vv7HOID}Ewo&<k4<s<**aN{S-(CmK3nDU<Kj{~{?GoU
zwTr&5+Oy=>`rm0=>z++Xm{I*oOWtMX(r%?zh0>%4KE}T#@AbX^XZ_$1gTc4r`MDY|
z#U{^-KmDSz{$W#F)<N!17pJd(ox19ubf<j&?2_%3<+ZsJtuwt>zP;}CHQ+ke67AEn
z{i=nFZ@+ea@;kir`o4W9f(?BB2`dQI8y|c0e_sF6EenN9d7is=oQb=$@qhdwWA&YF
zVV|ta)pV1-%VeKYIym8|XaB3%_lJ~^1Xl>|x~Vi*`#sN_<Ujva92viHKYCR0W}k!r
zXWFruJ!jlm|4x2qp{afJ(bXA!!Ut-1x^tb@h<ELDds%(LW~;o7^TFQ6r+1&Ke(rj@
zou5H8SpI^B>MF;D5wZ!pepv6G^!xq2=~)~thiY$c{Gs-KPHIija@!3(clK`kemAdq
z<@MyM<##5{S?`c@+TBTV(xmQf;_oDGt>Tfo(->XmeC%q?t3=MgXBTHqKK#xhBEtUT
zt%Pen<r|A`+NLl0G@;8fKV>$z-Nzu$huq0OIzGkz^q+IX{O9}0XO_mi39C`now@RP
z?Y;YJvTf^qZeP+YY4?+z;;MS4WyR0qYi21M7JN0!nf7$^g){qY>Tj3wb9MdEuivAU
zlepmR>($3<-Unw|Y`wD3Y3;^YJG0r$HZIY0bY~6z!ec$FuCDq-^oxK^DZlq06jxI@
zzR!RE*Y4^Y27C8KuYap=`fn!p>OVqrX8WFbSC#o@Ui{xhNAGGRY6#6OmHEEX(Z5<i
z$UA24D;2fU;y{kYkLwx>ZQ}X&esI#UHhpl>(lcI3H{@Vf@`IK3o92jD#p|~oY1+)<
zCoHAnAlvJ_b7kdo#bCkTf0K3}d-p_r_5Zv6r^D43tgY+3RQ~q!pDRDRKMFD#yx&**
zBjZflv*i&2ZpW1j8yFaP6&z=2cfDdQeGyf%?r*{St*dt0#?)q?{=c<*>-zsO|NsBb
zjuTxyC;tDpt>>2H{|<lWm-jzvf7IWs*sF8bR#vP(;M893%pfS}*f__z>tgPrv$K_J
zKbzEtNUfewFPnWJS*P)O#;1#7jkmuk1o3ceRCw{-t-LGZykBO6wt}zak(IA^i!S_Q
z)j#?0|5ZMg|BnT}3~Dr65c)U&{%eu#`)6kF+InS*dt-u-#I25crU^<ix#o|%o-fS(
zX|}psi%Xzq-o9`?n={cDXI=eexNnIKTU{BSJR9o*t6rYyC4Bij>|2h7ZV0Jcev~<J
z+rh=h4;0?Gz*>9b^zUgO?>}9+cH{i4(Eqt5`;8P`XDV%ye)2LZ<w4TU)IEGpz4iaO
zIV@kh`dPT}Nd^_U2P=MT-*nAGZ9<+Q&%bV&Vv|JGa80ZJXPbg|7JN4=4xGi{zVE=@
z2QQ~Pe7x$VYMd?`wY?#>@uQd7uOHRA*K1`Q{u(d*HGwZg|Ng8`wI?4mR3t}v`CZ9g
zx&QJOF0RG%GJo18r8a~&)nB;rb#K)*jd?6zlXq=h_vCBd?^V0>{wD2N(<Jiz2><lU
ze`jpGnfyWBU}acnYG}B}>@641I(PklxqR6?orLu6wKoJ72i>(75fwZeE_@(q58Dsk
zPuGv>E<bhb2=i&S0-FsIi*=HkMI>q}gYHEOS^O~GaAqHO;UDkK8n3K=tXQ~o-}+Y<
zB%}@$9C}*So?95kzB8?$dFvj<nBP}st^2nip^kY<@#ni*Rwu7$I_sxO6{SQ6%DfI;
z)twMr7vC!5<j12q!$7Pt;lo_<Ytk2sIh3Deo%#8|)BV-a`iMn)Q%}cjEpbixx1FDn
ziz9p||Bu+}N8#~P!qq;XwBN6&5&f-*Yr%54?dQu|L=u<qE?g<#)5IIEC!RWcpWI*3
zUF!3{RzIq_Q?6Y8>-y{1Z(CKrm&a{AeRuk0Nxfq%jlIsvo^5s!McIZKuO@7r^!+iH
z0h{vaXWEIrd<XX0P4e|K*luN1bNvV-cXa$a0dCd?-3v=s|7ETI_w(xgwMqMUs>2@t
zUU+-j+v0yaUte2#H*0TnjQjtERa?chTiT_6z3fu25I*+x-iO5=Itp^UjR#nF@PAy*
z$FQCKgVaSnv$KpZSxvP6d(_5WJ*jSc{&jfiGEP>0op1NUxIHIZ%!`Q+5s^D&w&K-l
z?}hWHsIIS{vByzgz}x+yr$ea078CBf`&VdP`fB{MqD%VOw6_<R$1Page2-cE&2z?o
zcAE+uoF063z9u{WWZ|mGTAX&vesNvlauK-qTIbp1!ugYLY}jyVDR<_YscWt<T-|bU
zdj8ot+4EdOZBKv7-S@8Q-uo|A@0+c*uD_;f8ug)n*~?dw>uoE_RQi7?s8#X()?`u5
z2~T5;UTjsl{^hZ=cOBiD4#n!RO5e!5;Bf1zK%#zW@$dQC?<?!4?6Jw1dgV;(E&KJy
z+cfw3p4<NE|6_rNYvd~Rtm3By>hGCw+5PZQN4Z@yPjNLV9D09#sghm#zda|k6q(=q
z98ED;lW}ZO+QEZnpI>G!{<rPiv6Gh`hDGvkD%$$Os>{y$?vBf<aX}aP)*1KM%}Ftw
zw?FD}-rMXK@}d>}T^FOgQ&t5XGUMNPiX%3=PjZEgnpsTg$!9Lg7j~4-5_c(3>G7T)
z`@cS}@5j5D+OH!P6$m`L#`MN5Yt7CIBUT0F<L|{11d3Tq8K0frCo2|HP$9?9KI2Xq
zL*Yl&0<8+UH~+tv9<!Q$^3UF7lGFMph(1iIt@F`mQG3U8Yg_Ea?q2RMZZYP^s?~eu
z9p`w=@&BvZ{L5R+e?F1kY#l!1`|*2zA@a4{x;Evf>Ub9aU-0wD2GQwv9~{bU-rx8n
zT<_)~aURoe&D-znye+(Y*Pji0_T8^gINTZXM{Cn^Jw8*#8CPpIe!19S6Y}oJ(%bdN
zCMFjjJEEKZJ)N;_W?r-XqKB{TAFEIISD&%aWy+TgeB!_N8C<bGXVW3#<hYk(&H9>v
zEV~V#qONbfCjH=a&;ot;8ndN|57t@QZ9Lj*{Qf}b&0W#VI{&zJ?j79y?r-(Kb?d{|
zM}IH=y1s5ltkBkVCY%Res6LVr?K+yY>%p?XCZmVN0uS2d{$|IBu(`Bw{N8)l^^%%z
z|NjdiQ?~ERSNiu$@Q(AlcYoh6$q+Me2#sJp$s3el6TyBm`%3zkr)->28~-;<Nq%rc
zRr^Yhw_pcn;gNU$<{#htevNQ{GK==~b%uFAvtJZiycC|mU?iV2@kmycG(+`SxA<3&
zW+x}~))*I@ZR(%hvx6=EhV$JA%M^|@Br8sT|H<t{yUF)&X2pM;&p-bbxJye>wddBx
z3e9<~^?#d$z6(6fjIr_6et6rr&*-JrqJ4sa3(s!(=}|4ZRfOa9%ezyiA93QhusXWD
z<c)9BotTd-n;lBd|45T@e|=x|U*GTlkHVI0o%-u*)z!(DSUxKKnZEmvlIGX)^1DBd
zJ~)~r(9pYO^%Y@G6Jd@6iy~GCB}{wivoO~_KJimi`hmNz1)k5>{Hv{8!_34Mbs)j+
z-^P1+Ozs{TwJ8V1nk=ik)vw=3Fp!=n!uIZ;mG}B5(k`DgO#GjImC`EsvpJyb>5R{g
z_V=#wJy;)>!RxpEk0<BTPyf}|U$a?vUuovNcX6fWOFNIB_ul`0_v!!Z_J3EHe*J~x
z^q+4(+`92U>esye^KZXedok|eenz!FVy&eOtsHGlRtF|=*l+mI__XDH!6Q@uSr=7~
z$M;IcNqylyHJLA2_NLYEDR1UK%I_;UaO(Vv{{5$)p6__p8Psz-e*cN^-EYc6%dgD*
z{>OE8*sSdJQ}XO5u{}2Qi?~!B(aLIgE!3cNFOyW$bS)OKQ)LUoCv3Vek$$n|?c5*s
zN7)`<-ma_nU-eSbAHgTQ*4g6EwsP)hvOWHHef(O_T{>ab-y0(Ye`F}v?ftgy{)GhR
zJ9eGCD+JdsFyYAg82e9W?U{_!uj{%WG-T*oOYxZ~*d|Q;`fOV8KaMB*-_Fe0b#Ch`
zz0XfyxCt0leLuOiz`4A-zv<Jdg&!w2J0&h-3qLdCm4?&PpB5hTBmZfXl&CvRb@$Qv
zW75fZaq|C*k?%MySsMB;eE0h!R(R&6!t4hJq-_cr{%3j2D^3)RY?inE`LEHj?|^pb
z*R+){^j>~QsZ~9F=Fg#ow=ar*y?U^oZ~5t-zIyZ76U!}B6gF{vdj0j?qtd_P`e(!H
z&-jL4+W#=dd58L+ZicfnG$-&|W|V~F_X#G=`+q!tkBfk`#>s;gtm;o+Ep4Bup~>pD
zk!jZzfysiW_(OLnoqc>I`GZg4<YVjRCYvUoG=0UrX~XquX};CsJ9G{H-oLvoqAX{T
z8t>8j`gipN6+U#lwCs~zR(i6(>S*gk_FJ}&1y>@c=}S+||6BBpWu>C*H@08=6<IYJ
z-UoJE{n{!R*OMhNamF&y!wK7zGZ_xuj^3s9J!z@D$F)~wT>oxf75<^&`DpbPuI`(~
zw|0KqqHh-DRQWEuXojt1YU8dO&(F^3%-0mq`n5ctKO!#U|Hsl@;z2S|7Y;crh<iG-
zJbZ(j-3r?qfgP*&UDVneS6iF%A;eq#bP3OG-gWXl5BDDL^E<Wj&x9ZPhhJ21-?Q?+
zZCJc;{Xg#Nou9Aj7Whxym7jU^eUr`HsX1@O_5QQC@oCPE*tFfSc;niwiu>Z#mu&i>
zsSpuSE3?tDE<5{d&8@tE&=1^3r)}mOh;VQH`0rQh!s)M=H`e^qV*K;}e5%Mshs>}Y
z=W^JCRi>2wHdtp?y~)roe$&C}62*odRc#!XJ}uu-?aVeQ+{<_RVh@q&6K-0c>u2Ws
zZZqe^_m6JTfpK*&!#NM7q??(n{xBypw5-Qb&Dbs|YsXT}Lf)UL59)uHhqZ1}ztOy*
zzJ9if{+A$TugcfoBo0YWcKI21|DHdKYx_pMd&`wt-WuEg+i7QdODyTZnbjx7)-Shl
zyVzS{ExBLEP0f4NhG@f`b%*?qIMlsrGC3h(V>7RF!xtONd8S9?0+l!v4`$Z4*QjY~
z{CId?$SB85=3(j4rSm>rF@LSMp^=G&<7LdYn48_s^K93jx^n7lE8AiN>*rST0s9L>
zW8{8+)JXYxRdZL?-lBIuk4^~Jl~!LZ@UtmNTlg*a;i;1*8U{bwV-_<#-rV)tj<~pE
zZ*r%0uiv-6b=sQw@hkQ1C+z>rw&`zOhuyaMM<-ir^A#l4|8W!D_~^{qo1b@tZ<6Y`
z_K@qu;!C%(|ID8$xMPpq>7sJkSAoSL*Z!t-H+63IV%oY&@PGDEM>)xk7N?E(#dg^k
zN_{)?{Lsas*Y)b(=S<SCQt(>IwEXNmwh1TRxExrutypWSc5!;uhZ-RXrFh}@KRc_J
z)UKRb7G!;Uf{5>r_=Ap*j%j=C@U&&As*em^U$-|rXwKYk{r~6RNbdjtX6pKbnm1mz
zi*0^AZMW<6tdi9c^1bd_g_X9PF)0EqyfX@ylw5yU@jHIj{FQ3A;v@EPaO(b<xhnhT
z*WGugi2Yd4Jm>n_heC}yW+vxyO1)$xLZx^$PCsKm++}9Jrqrj{XUh85&xGRHPL|Eg
zb`!bETUDliJuYw6^I+Bc+g~b5<}auaNfF)e(xA#1#;76!+AzuM<m|A*!$XLRrDdVO
zgb+0$&Q7PMf&~$I${f9~*FM=-IqUlK$m0IE?83=$MxRTAJHPt1cV4`mwtwyIm)93(
z#qsUipPO6J>v)Xs?q=temDW;T{a)o4cI|8Wu<-r%$Q8AUdaq(G{uS}Q=&@%v>+PeL
zFLS@&-}r^+{H>oiyk_10_OUi@%k$p<%=`J%6@Qd05W9W<&<z&OPp_T_seW8svO3i$
ze^E!|&XeVNhh+}lnRlyB-R;9gmQPlF*Ry-pHb}{ree`;|Q&&l8RpSa7MxBr;E+(Lj
zpNa||8cLk4Oo;&<J}QcwEl!D!77<zETrD2^QdWPz9%p**x#W7;!dJOh|GWzeiv9b&
zpmgru*jv`-;nT0}+J3CsuIspC@4E-Cf4?WZ{C9T2#Ao(@YUixHta#;%;`4dm*H2ad
z;b8ydWq#@Y%XbX-FRze0y*P6DuLW&ZN3C0P_r}_XTFtWAWBT6ohB*s|9DBn8Mz1-V
zbC|NzUf(^rV%ItQzAx|gs_#q5dv*Qk&Q~vgPO^W!?%nQ$6XJc-pH4kfXT)q#{CTz|
z2ls@Iy@DMwjB+#o&Y9*Hai}QuVW`@x^)J*l*GbP+Iyi@Q<%ana@~2PTy}0<c>niS&
zZC$_a@A)v-@4=L(R#H(rkF{TAW7{xekrv<SzuT%VE@%l7yyhtVsZ}kd{g3#?cX#JJ
zdv|87W9k#bwl@W-PtAX2t-Guo-z&a%3*)J4`&!n?sc)LAV!hPm`y^GvXTq(kPcVrr
zbmM$h`r@2~b4b%2p?LvUZnqb0wu|nU6g_xkx0UGH3~?3>zqJv!6t=Es5>lO8nxy3+
z_9&1eoMX<y-S^(|S1yqkVop2oF~Mr?!V|Y-e3-M3sR}ShBzA6fI^4jh0@@tv6|pmE
ziTCz$)6H>*j7&9~6_mE_v_0~9W#q<Wn{5~0?9TSPdve>G)oap%te<2Q@U!HIe<-<n
z$$XCgl7Ct6*1rC>e@nHy$4@g>ch{E(_5X7%`Jpn=K=d+K><2Hs&x*F2*6(=U^wVpf
zYAx$G@we7omzO;XI~$R=;-Jj*=GK2pnO~SkCsm!kZ(Ukz8fKXu8j*it*4apvIX})M
z%T!5py%F5ouzbx^uJvbk$Szu-p4gg@d*lw^YQ+mr8dW%+$OXy0W4zAzo++ce$y3!p
z$(%pNUx4Gz14flA6I4VwS(z3&tq9N(;bvuKOxTg2BhJm@vP57-%e#vA;h(1(KfY}9
zWA5GOlD5ltTU@R9tTgZN&d+5(OKZ;k=dODf`b+Np!DaPjZ(W|h{K2*=ar(A*pO@y8
z1s`qjx%2&mfwdj8pB6j&aqIPK9k}#aO83`B<*~)RI#b4+`}Im}p7`tLV#)VsA{bu%
zuKmv!x~{givaI{d1J;VW;tzdiKMU~E%Mm{R_~9-8HnVk=uirLpT63wYCVt6Pj^msu
zr-UbXT3-udn%#JSQBP!wimDWAOJl-<kPK07&Ld5U9CNY+xjOt-Jz8D&zpwK3{Nv~H
z*Nbb(`}@55T<bOEft69tgBK+hino90nta&x@!|LOrw=bP)iT&BZTug1&-%d6n$N#C
zFzxZ*&G7EGpRy10Le^ic-<lZ~{NH^!r%CmWaZ~H$&0M9?A`!bP<l3%JS74N%J?%$!
z^0kboQ`h_3ePbxRQ2fW+>yaoEcj)(jYdqtgaX**;+vv)^e8sDXX~`D*JvJ+Bxxx{)
z_zZIcqn=2ImWog-%R+|@6I8^wSX&+j8iZ&lakM%;3h0=kRGMviU*0C{@9yi({ZiMh
z?*8_dj19TB$aLY27X@!E^VMzt+|O<+*jzTja+hOJrtvr5rHv=IR+RlHJ@}{o%Y!TV
z79YMeua*pd%W(0wvQW*|S#sZ8w>Yjf-5<EwcV%GHoOJR14cfLL^P~zG@78|E`dX)V
z$UnQzL9%tV+@U3Q@p2rU(WO$x6ZeUKIVt*X-^Spe*ZP}ME{e4t$y#W<)0^=C0|S?^
z<18I+F4p>mhULHJS}&ZxspkLv_2oKSv!~Zy{T{c|v-b11?#KHiqiwEMeev`xeSP=4
z$-?uj>lb|7_js=Pzq%FT(%hC2GKq{kK2CUX=<|kSOL@Zkp9_`!@c8rLx@d(<W8dSQ
zxhK3k7-eKnlv-6fifmoh$HZd4Pmno6m5;}aw^CvI#v2-kbqzn1e4FB)_FuL_pnjKa
zeBinn<x^*F?tYS$<i7WE=S$ma-s5}2IUG(1X!u#^96tZij_c!do4s5yg{-Fomd1Wp
z{x7p%ezokMtFJCaJq&-hE_8JmH)Df>XiDDqK;}0Ozi-;Q`hU}d5RuNhJ)z~bcTcyZ
zhi_cap<{e-s>V*+=1&ics)Wwn{ivYQm~c1p?2|s>@Z;T1mv=c`6nHt=d8=jR#);Oy
zxtk_t3cZtX%lI6k^W@BB&BaD1W_D&CPF(O)p?+~%w~*L^Z!h+$9C>$g&efoYck+*P
zH?P`Q(kU-@zu~QXo2SCH3)eqvy7o%U@6B1w6sfE?&u_jIf5!Gz=Qh`?xeHmYF7R3>
z!{oZs^#0=yor+eW26Y{$zD#;6U%{>5QmFE^^XJ3;*TSr?%<x+0`|4^@%=G-$Xobaj
z?@!0vaCLgEq_`^M_Y&WStc98!TW(kO%s6ji|A|k)?0+&>!=0aLtwpDQL{5wTta-&r
z+w_#<@ui*LgI9R*Ozkr~zkIt_*4jIZu4h!==}!B$WpnC=gR>Mu)vHS**Y9{|_~+sD
z7x}l22AfZxZMR|0L3ZJ`=kvwc|5X2Ix0Bu);gq0oFhJs|P|Ri(zM5@67Vm!l==V|G
zDElz2jp3_>W3GRcUzztiGM-iGSn%@dFFH%g%h&KbYMAhfhcc$S9}HchTYAv0@`C*0
z+$f)WLR0Q2ni;HeIPmVriaqgniz998pI?<<DtP);%GzuC*JNf_MO1J;I>yJjz9{2J
z;$NeH*?D3Df_Vu$8o7&8B&NB0I%L?21|H^YRr$2u<88JHi@+TvC%x9kZoKOz)m#yC
znRLAUYJk+7ovT(J)R>YOx3kVtYTBFCdehBsaa^DMYvae*__b4a?fLY*Zm;U+)#dtA
z+(hL0KmW96Sjg<AEuv8(uUW$Ge(?J`z7u^845j*8WZw8}tleuJ-1BvMTeC{lJMQdR
z@9(mtMz8ifv)6pJ*0ZFPRwnTRTi(J8cB+r$3YhOakgIt5_=~*WrH_vv_S|1E*P&f{
z?T$(36Hh*w^`m}eVR=}h08i2~F|Ey#$=jDMfBviD!jo9tnp*blFAQSyran8zA>nhw
z`QJi8k=)4#qO3P&*7^zEVl*n@d^TzAL9+#aY8K5)oL&^@X7=Zeet%v6-Aj*tfmUto
zFu1CuzP@Pn`Bz_Ghraqh<2Fx9;=aa!#(5U(k78IS{65(bTFW_gpV3S^S$melq0O_8
z-xizD$A3`M&%<`tU$#a2qOKm{NDIjNv`U4y=)d2Dp462_V%PoUGJZIHE)8`veVVw-
zWzWvw6}zX+;@EjVQLaXyiRsOaxQiZd6psd2KfCNa`+ohxig45Tvvr-*OXB}W|FW&@
z-_dv~DZxWrwCnyfZP6_`lP~{?XP?JWEbzeg`6-_rF%N>x8#=k9!}AtM8we}-@<@j4
zxbr~$Zck!`d+&}yubR|^7ZJ*znPb>2nv8QKYNPVcaj-vVb}Kk=lyA{H<&2gZ$!pCG
z7uTPOlZj7$xbh%t<E9p7cCm9dGyWgC%_X<MMzMbV@l#v@!YW@2JX9C2y#K+$c@pF5
zopx4U1~G4^omR2`&#@uDT>t40pT_t*2XpUS+NG5F^+$UDgtgH!vn2Na+5aUXZ$aY2
z>kFcmCOq4p`Yb`VuJ@p3emQ4^&Yms%&lPELi~gDK{kLSRT5W8UPMG$6F*!M5)%UwU
zToIahf7j~Q%nP(QHbh>Dy)JT;H|S)@ewGVYCUu*J7M4zb=rvn>)!NW(P6bW2gOza)
z#E)G$^w6-Pu`*Ha5$k{EEkZ{a7|#f;VqhxJXO*lF_`=Py;8|$zRmLMW`<>3jw`PVU
z>X|xt?T^vFv|xJY4dYUmxYb`gRv(Jo(_<AEaMJx*-98UR+e)51|35C9dv&rUJ^#$}
zdbB^vs#1GaY4`33MULu21$FWQ?%6`$j#eF7&XmQnY{{o&<I@%P2}-<sSWhxc;^5r%
zk8@d~XW~Ou!;=+z=WlAeWV^#a{(QWuj(MCyZr=MpLR^Mj_WFk^_J@W(*?GhN*U?|s
z^=<8i?eyOnq^f^=e@*mkdDc(c`X>iE1g{5YHwnzN;bfHlI`xO{8Cx~pnf3bxuX@hc
zi{!g19k1AE{X72m-A}*vpN!J2EHAHLIq98e_CdR^OUvTt-2G7%Kd)Y)XWrg!yLWR}
z?3=)RMRixBoMg!k5&b4+r84F>9IAQz<qds~*XpV^%J}U6q<J{RZrSdyV%JlRyFSJK
z+8sWlX~&y+&-dO-lC5@)`<}+yS|B*nMEv4$tyz0k7a8bH``<Ln-CHH7GP0#m*~hH(
zd+qk7Equ(g%(;FB^(~ztH~X!D(`WvDJx7w`&+@!&jg9~1(s@kmtbM!WrDZ$zH!ivB
zE7@E>osBtB#L~t1n&-Q`$|Fo`a=UttyB(<Lzc|0_=HbW*%8S$G=UkuRQ8!09c=Gw;
znmU1%zPpW$yBG3LPO|4UTJ@BN|DmX#WP8C)<Nf-}BVJ#tx4rYP)y8*SeB1k~<^3;T
zUrdcF)<4Oq_{FsSgreXC6+h!Ej+>sn782e4@}!)U<_nQY8XA{;>t?8KQ_l14KR>TW
zY*yxy6L}^Jm46<$TJrvNofG>e{jXK0?k;7E{kVkZn|RLd{}Pom0#h~#B};tUZx$>w
zTlYv;&ScMvIzbi94<;8q%41$}Jv?pU|B#TDRo^<fvLyO5`9e2`%D=DheZY6}KvJ&s
zrJEb??0gvg*&;{(Qh4?HWw{^Ij?XghzIlFuvC7h8d(F>ZU-ZuNZq-rqHV=dPup|3Y
zCN>^+Nck49>h9MU=k`0re-_F#zCPpl#FKv&1KhrxS+*?y<ae=-UnkDj?ofS`%fCJP
z@IGE+j=GgU{&}b>9=PD}tcv-b?nmnfZ?0J`;|i(0aVh@ZM1wE3=cZ~)AKZUm`b^1(
zfQrR(70-9<O1r!1os(0$Wcq%i%?y=K8VpKTOg^#kO1$Ko`5p6knv+fHKHANU|GTQ|
z+>NR8_IM~ra?AVAR{AlI^MTve)s?k#j>#_6_&zzre#zIaciq4K{yecSB1OX9wx%ZU
z)*-Q{pQLI|Y&9{i*%DopfAa4my{Z|P3jgn)+hnh1Td6l!d_#}^^4<H>tJ0Y@a?%TV
zqK{|R`ETz!+_Yc|`}c#f!QuiZ{Fi(%@u*y(V7%5ThO4$d{Y6Z;&iA_QtKWZdQh(6B
zaj$pA()A(5r`PWn{H$i}G4o8Ls&c&eBaf9e#rGQd)vTot>=S2tQIcN1sdw_K2<CIm
zf!pp!Z@tDcQLtf6xbflrAC`G<6+Ez`_EKo_!q|`glBF5XeJ}K@o;s>4^3@@Rn@M29
z+HaR1bWEE1;?=nW3=D$H9B1itYq8dw_<oms5f}IB`m5tDd)LIQ*8lzKci73F|3kyq
z`<5Pwi{IIKJoAv^f&=}!2D-8h2Rpw^v;4Ydmhbfc0s=fDJMYCsw7g;xZ@jGc!B*Z>
z+)?4VUbx=&u2l^gO`GRxJ&1mG$J{GWoWG4FA@{HsV^?O$(|m_%6GHB3F>9*!>*$ET
z@O*!Qc};>Svtsva!56cRo>G%7Qhjwn{OteGKVqkS+%{N96c{jjHa%F%aehO9w$nWI
z(BlCwHs!xsxAc3xVEM1FeCpS;^{;jsE@X6`lV)GFZL`CisE0Xre=kk@vi^RfLBnSr
zy)WflRXa~VW{P2d!!8l#mvM)G{@R+=%nXb_7#SWg6g50AU;q32nLZ|E#@I_IN*A!C
z_uc(j_ULc9em3ix($^{>42{<w8Y!2jF)ViGF8UMlAcjeSgTa{hMTE+B;RA+uykB`|
ze#zNtV&%GWm-L<J{j;3a1O4_EJ>&gpxBgTD-|O68Z+6G-nKQ|9iM1e03un+zW0$J|
zlQ&#tZ7^%lVw<jbwYdGut>dShuW#2WdHa3u{H}NZKZbvHdC^z@!nEw|#`i{X(r<ip
zuRY4U{A;#z`m#iI&QgOWMwf=CTmFgab(e0JG4Gx06jFO(*Wc_5Q`28x>Qj9c9kc%G
zqZ0KB(`JJXC6)Sndze`G8a3y)pYd*ZedbA<K)}2b#&Zs72K!X3r|cI^s44n$<J97R
zjfpnOuZ&uvbR8Ca`Lm1ltpBspqj?RJ=Xc28kaYT|ayh=(FmHzhYeC{^QHwXvSU07A
zdLe#>A>!e~jfebhK0h*l-Rc0NL(G%%%c6d8Hyp_5(r<1kVq*|>@^!3W?YC&i;&DjJ
z|KVU9da$X3k2lbX)h{UMPmJT`sWJN>M2Hk0w^04~|5MGC{9AhAXFr<0F5YQd8n^Xs
zaahmRTbtEibTLWJb7+a*;vkyJVsypxRs9?7y`7;~&Z?Rg*IY{a{^GCd_4VGhCp(}1
z2{=4KKtRJncv7{4<)++y98SXD!X)<nbU%?{+rDp#;mZ2%po9?a2KFUtyeH<Ie4@e~
z)PCT@V?BnGCwng!6-e|vy%ZX9WBQ!+(?T*<KEHiKTmDLIVM6Wy45j1BI-#fUzxsQ9
zz3-Bx{+;i<r#&_Q_c+d`JchH`xxiLfzIWP$AZ8oYHTiwAmxanZr#F~B@N}=!C|+nW
zp+F^&bILK1liJ6%91c}aj`&#akg~p;`A0yc14E-g3X|fN#ZgrXj2xxA9>&dD`q$2?
z|I(f5ceYy}>tLRs&)TdVr`X`|Wy)5u&smWbDWPV1e&4q|;yz$KNvi(?_p>FgwU7VB
zG5%A3pki%w;buUXg_#4>wmb8ttDg)llM}!7({-(hpF`Q4mmZ(4Og7lnIXix;Yr1*J
zJ{P}-e<x)5s6Tu&k@qF%j};5|Y*P8OQsklj;`h4FthW>1_UFzE6!iXaU)b{W-$04W
z4*5P;lhT7!x?PTW@hmGT$a)lg`D}7q)-=iOpDM3aCH}m_zUD^L$wZfb4z{+jXFopt
zeCkljZu?KVQ9_?$V+%Rvey&k`SpVv3=atp>jgCf^mag2ByXegd0pa$dD_jD6D?eEu
zZ%obIAi*hj_xJQS)^8t4#k3hNiWe?f?;oD((CoCSt?rV>$?Suk-<}GViFO$uJntVA
zB>(w9r#wgLf=7;lj(R>3`m@^Ew3e4moLZG|_VGUJ{qGYm9bOz~^G9djv8}p)q>lKk
zc)b5#eEbm+>+>sWnf}VeuShxQ7ZIbJsK0#E;r}bDc+X#-mAdJ6>UmxE6O9Wa)AQzq
zt?IGq%>0(xo_cp>mQQ@%tN6%u<-g@BPh0)^ug!gZZu`$oimac7Z+R75n|u6=fOEjS
zNqYp+RO~oHBzSLxr<z6T{-5!}Juj$5O;v5W(wv{sFNO6?xf*}&J~UtL<8IM^-Iw&4
zI1I$g?`0XU(=qz`AXzRrX+L9<fX1Bv2Lf7bct1;R;M(J#w`K*ehjpmPk#>zE4^A<7
zY<HNvIBf;z5wqssGG-gzg>1NN8F`gWFW7?bUf}x4Oq}A`t4}TbAI5a{wdqgW{|mlO
zdz84LXS=B6Ub%qV4Yyt#vU_JPDEqzqfayW$WA*zUJ2Q6N_xY+}-}}Xa*=%<hV_~DQ
z_qBJ)%f2?R)-o^Z+8g05_BP_kHTFf!ZQ`$Qq%1r7`j@QT&(~K)xBn=ZuxRN*_o?d=
zJAL*)kodJ;<LTGZgN;mv?<QUBoWI0UE39ow@fMN4r~aCrH1~aT(wzIxn^&A(jlC9j
zMh`YjI3n8=`Bg{aaE$9K?p6G}QSFP;?!+F+uogX9_~cm9lnV#`=W1=fdiDFAeLFpy
zCv5t@W1{r7_f?0Y?^}p6Nk8A1_VoQm<~b}MHY{|N?qU}DDkQIR)5$@`_Q|R1{Ctbv
z>TXu8Yuc#5-ekkt%(eJfn<=|#K#AlBUFWsM?cdkQN<Z3i*P!?C#VCn+l82_AXMff9
zsYh<Fq07YC43BOlIDE_h;&xJAI>>kF6o&v&g<0Jv8YExtQJtb0r^s3uAOC!HzJ=e0
z-exwRIkt~NmKn$TYIJ<tIlK39nAgFjYrkFPeYiF2{*nD+Z>O1y<SpD)zt{C%q3o|F
zrFZ}2S)R22O1!mu+QtZtSJ|~Y%D7(5KGFVCEk$X;mIE`E9W}muzyI$48zp-rZf6#;
z?Y{kfR?+eBkYgw6{Jq}`Bs^*rJGe}{_xCn4ZL^$bQyz9^@;+1BQE0H$#+~iFifj0X
zw@eB13`@8i4NF=7T7*Aln&&QgS&`%Pwc9tc`<=QfOU-y~wk=&_yS0|jBm9WPwc3V1
z%g>tJ*M2(7BDg+d<Efk5{(OyJQ#Ze1lf;LjdX}Z9KR5hmTOhWBLqH_CO}ob7dwQ2e
z`)@9h5S@<u@0ed(#Z3Ce-cqwyL(8)v)8B%D?*rFL-q)+ol`Lg#;y?W1mrRD9kih&4
z^8fzzPw1Y)q5W!Budu|M8FilaJ1W+0v5F{~E~hdjVNaW#qNR*?z$=Bly1)PC3N$QN
zR%G~E_GcBZx6mtji=XZ1O#k;U3^etz+G?@%;q=0PhyUKOGTD2(#p}~Oi;Ae)d941g
z-|Jt$p)<APainV8Hs6E$E(w17y?D3x74^GcQkULdT_$PCY4`l&&8)M#d8E$C<Y-2-
z?Uyx~^`*7k)w$}*W$waDLZA3^R=@aiSDb&(-@|5auOEq5e)lV%>wRa(`~wc`KaMIa
z7vl4u&F}8;X@<h3y7G@3DwjrH^pW5?b)=-WK5&s+Q<{)46XUH#@BUv^S<iBI!ELuS
ze{5<Wi~LfE?mhEb;A2;2FAqEa<5aez@~wNGeB?-|^^=m&INqWh{@?NS{oMgG{g-4F
z2=iHF_{_hy_=&UF+)qvOcyiSAOrrGTpWe#+e=kF|qepV;s@2og*5vvf6e~5Zt?Jl!
zXPfEcJ7K40FMQH*&FZ8@v!i5BrD5|r&-oq(sv7an-6gMn`u(HMLS*077Yp_@96!Z>
zPC@nf#!0;%dcS}D5vi<ru;Bcg`BksXcemUxit&ou`Lfo0!IVjYjZTdH{~z*OXrzTZ
z+a;L(335@p$LsU&%M`&MM^@aK_`k-z`|RW=Kedj(nZ57AcC(hZUB?|+Kl=Ze*vrFL
zA3E)f1Z!mN^ba@B*z{eR>2P4Obk;%nU#ESic;p%SMu=qUH;GM)mjAig*|eMS?fz3O
z`?qnh6s(OiQ+*ydbBff>Y1d_#{Q0^qR^JJk@kM&}H<iWzHoe;uaJ0i*bGcG~!G&2r
z&WVK<eA$>d*H~rIi=Fm+U)YqGv#~xtbbj`9Cyj`nVp+k8qE~kQ`)hLc-+gFoqdCLt
zuxXWwp~j=YV>w@4Os}3+pI3k5qWDc21Gyi}xwbPO8=9@(_IKaZb;+0hZHwZS;h*oY
z>lX`)Y;pC^DZ2B*nZhRg(fnuiw>j(MZqdoV7TN~%e2+|+)3kl7`0KdnJ-63iOnPn-
zrnp@H>GTPww!fc#D*n*N389DdHZHjCuxI8P<Fq)bpY<1Y^c{QLYO=MmudP0_`sw%I
z8x$mN?0$RY2+JDhzcV5qe6#-Q{=UHf!M&L~D-Sca)k)4y_!N3ExuLx6^#6l$-`i}P
z^4`6#%>HnZLHTNNI?wvzE2qw^sQdrxVC#<haJisOi)ESCinKqJ)4%?yqCV-w8P15f
z-`AG-_HLT|rR9%+lQ@6H6$8a(GyJym@SL8lH}~nSwsnpyMi<Q=eF*Q8YgxYU@4|Q%
z-xa}m0!^YS_a}DU-C1wFl1-pa&6(e<u;|jn$?jz@J6XeDPd)NhX(y|<7Pn@p>_49?
zH{8FQten#1Rwo_0?SlM;{C%MtH`%Ikal8&a7r1m03-@-eB`f7tsf4#$uCiOT?#_yz
z+gQIS-#YK;u|;mxmviCMzgtY6WB-yjBW?R9`(?+%w4Vgu&^XTXaW7NHTfe}EjcZ+J
zd}P%NyBoaxgXGHnf9LqcY!$6P5~>lO|E{g{*Pf;Se_s`xx-NcQH23$kM)9DM8`6)C
z3im3Q^@t>I`Xc&ri8<@ugKj(*G*?b5eV@*L^=QTU>1Wrv7d>YZzql^n_`<=)hV>t|
z=Tz`HcKVe4)}H+J&)1)dQa&}>I_CF^V>ipV{#U={w?jprk>lKAr#cDch2P$up88LE
z{_-Cx3-5m29pI(Xvp=)<_nS)Vf=e|oK1eNj_=NBG8o{6T`k9UzD;*B+5a&K>v8+LA
zy9QI^qII6Pw?CQd`M~qYE3ULh8=}Kc6$XoIu8-QEFLmXR?8+CWzfxXE<a-7f#^%>Y
z&pKov`BPuPdDcw96NN2N0Zzwtn7<uYzw5GS$Kh2+u1^r0J1;iM_*nn7H@7EB>aZF%
ze?5GpZ?<6pPc`TDX<2_hRWA=_?kHC>Hr;#J&FRZO$-Q^XRNK#NkWHHBthHzMRE0FP
zhecvbu3kyM{=cg2&|_=E_XStqPx@(l!%y|fv5d<<eDt_ZubQfV>s!P7->UJB@4jcB
zI(vH7d&93W%xie-<T<}T+WskfeXu}9Cu7yi!@u{wIc4)e)p&-qRqZtc6)hL`Uyj)+
zUq2@@GWcDxY<20n9mL!oY_GoWXGF<D<8P}&3a3?G2}%rU=1jZTEW?tw_TRCjw7=f}
zCM9O?y>XV^L0h7Jp`6e6b=hG*oyzyly>I!hF3?rUf2HDDbD3`zlb;tJ66UZ>T_3RB
z?NhX#7RRpppQ=CU>AU=%_v?>X=d7fE+c$r&Q&;2gc@TH}>enR~64uu+>h+)gQ+NOE
zGUs>R`b$GRbOdMpTxG_#wbg!QL*#_0XY)UweBW`*=j4wG&!U>*8XA*@K28!ab+-Sx
zI`LKI;vMU^T|AN_DD%yx?ZtkN@0Yj^3T0gCd#l>D|5m_8F=oblT(z%tO``sUer0@N
zQPi~3ct>{ETz1c@$4ecrc1RYV6I);;;}RpS&v)hct(L{fUHs4Q@ZaAV5M2FTdYg6P
zhquj(Cgf?$hNVr?|8XiH&iO&vwSf74wg#-hU%l8pv~+fC%GzjVA^(c^hsHjg(xVSO
zg7w~BnU|+0`h;uZ>P&}pult_OK8sFPJfGw(7kho7{+&tpCk17*=}*7ub@GVhj?h1E
zmoAQeJ^k8tHi=jK=es}lpZfZW%j<C2`?@pvCqwfV>|a$`w6AdTH?HQ%-{b%I-rk=3
z`k>qAyJ62C9?E<D@ssYthST3ay}rp<KfQVR!w`qk<CEpT-%C-nQTr~>rF>^<d#srr
zi>RNmxc!P0DM@ku>1Q%Kj~!TUqxe#_s^b^`=Edc|Bfo9v;+SGzvG&<&oBC^ytp7e;
zb3J3SvE0rt%j*6Lhy=Aw4w&WiVS;+G>zZDL=nZ!(cy5aCSez#s=CgkK>nZIfT6g{*
z6Ptd1W%u$+9d&g%)2}~bjE%U;)nBskhH|M&(#juES_l7J`quJqZ`}ST>s|f!YVVGX
z`kUqNr0{2YdDS1wj>6I}5-*>}A1;aB`E`lzBlg&f+@XT)o)hFMN~HJctDW~-7=Kc2
z=~tE=8~pzAoxAMDw%198r}~XV{@shVg8E<faW}b%m{pXWxpZm&mMOnt{r_<@9kf&a
z;GtquS=Z1e&Na!Q_a=||dA7?RR==#_eOOQ@mE^QsPT6vr?AIk{=BfW{%6HxqDjNDH
z<lo*%)788Gvly&?zcE}twAAx3m&V%gRrlLka-_K&H9TEDf4Xe=PWf!Ez>`()8yFab
z5*=sha(l7XFSLwb>%Z#z_5c5N)~+p3TU~zsBiH-?@Bd$m|Ni~{-_>(CYrn0$e|5=@
zJ~IyvTMJ`eZqdq=CjRs4`Fr>to)L@t_TZ)Z#ize^o&E3rPQ!ZEZRUMZn@(BQsZ4n9
z$85=XSe#Y7Q-b$A+tfpiJ8H~ou2nwf%xF6Mu;I{l8<R(I73>+-_gD*Vh#a<KQW0+Z
zp<Q(Av0K;v#y`T{YB4HCpPuZGZ#(*M`j$URc@?>y+1Kqowod;3Z*9WQYgZ@#nppZ;
zwCbVyx}d*NPan9mS2k3Hb*Tp`JUlP&{fgb-;G|HQM+>`kxPuz5&N!0dx9iY)sgSLf
z9oiEAbB}w>TxNQxGx*~+>%-~%u`>^(yiiyvo!oKj>l(2V&0jZC7wrl<5OMf-*>|~_
z>f3&oHrhPM5)%2{yK2$Vr`3Vmz4<ruZGG@APJ8o(Ih)xH@7(x(jaex!EvtOKmhlqn
z=nY#ROsM93d#s5eU1gG){e$zG^S1JENQFu-(Ab&tTA@ED`)l5`#6LcaL1z~KTJ)RW
zUGtjt8;it!0`g0bnJcyy2i<givh3Dw*~yAgiJS8pvoEp#-(%LGy7=(sdvjkhe)q3^
za%0onAM-zF>`RgVwr|&8Cjk{MLFUKYOu0QD3|zE?-lQ`eJRSccdezn6;cu({ChguI
zH~s3lt?zpdaoL@$PyNil{_%y3%g?GfzaKdv@|dOc&5x4IKZiD^2DZgnI4}2cxZDxG
zhKD&Lah8#`6H8gdvE>1~mux;DF~PKH(eovnFFF3%spj=ra=utr4$JS`x6ag*UNw2>
zTJkDQfMJG_6N5TehmdaYSq`Bbj+dX0?_;oQpD0;t^5ydXbMh}H@2|YvUZb%5!0pCg
zGSfE2_ny|+S<k6qzo6-YdZf<J^#QB?Nm}gZ&^aTZw~D`E!#}<USL#Eib1qc%=ehb%
zYiF4D*VL%jSBu_7W?$FZ_g0}XiB-2yuc-PI<1uE2eWIp0NexN->S`e!uWyUi%zN;K
zdpUoa#3Mf0V+U=07<JfX&-lmxpUI!zmA$SzJa&E8*Y(#l&WBI#zNV0{keQv~WY3JI
zNEz`PQ&#HmOKrZwC03xJ%8(Qg7}9dr{FEuD*Qf1Q@9bXRJ4dQ6WBKN$In1*9jEfg0
z7Me2W2klLd{Sk4xWY2<!N!k~_zm0zy86$snwTE_@^|4<4|8Bwu<&OXSzT=?Mp_x0x
zs~7w|@?ggIf*yZ+PJuwn_EN*Eoj*L9*;67`YD|?|v8h)6QNz;2)M~Sn^W!-;1n3L1
z9#qWBy01R*y!XkEf+F!RyxCvthrYgBRrEh0jCFwze>X=`iI`{a6WhsU@pm{JZt!i~
z$1dZ&aSiv9?{gc%D|7`jjMW-Lt{y+^a?nvyM7FEt^wGn6=YQws=h}DVYx&*ct4psc
z<^L*W4L#6gF!{yQm2I~5r&0>5UwpZLXSaT5)s8!*hEX%xEKV8S;QenNklc3M!F0WV
zb=0=+N_!l3`Nm9twn^!=vWmTG4e#G`7yrw)e{@`|=wIu-JVZ5X+QMVEFDw@NwsL3P
z^{sys{|i^VUugR3$i1C@e-}9JWYdl^IMTz=^5xR?^J^`yT{tjf#o?asZnx{V$f-|j
zkiC?prm(@fNknSG0jZlBGmWdAcL}Ccv@bs!XO`HZp*5M;dsRHsj0pMGb<G=>9r$5?
zYu*95hu_W5J~gR+$Kd+Wdy!w<kFXmbE+#%|Jub_6ZNu3%uc`ywFCv~Xajaa%aXBK1
zqq#lY`FiIS#udK=ayX37t1j=%oa-KF_wW5To_|Fv&l$Mxe_bk&eQ`t3g(l_~nvee-
zpS5Y(&fZII=N829e*AjK<MO7XV)3_h4T4K+SO19(p1t75hueIWY#+A#Uhq0XUFcy)
zt=;NrGTHK?PcQe{-TgDm;M}JTK|0EsnvtB%!nRZ2OO@<ZS@N?W$^P)i`_WBTCd<UC
z%-8#6k~mMqd3RmAx0U7ke^F2WR`34$`f6-nROo?40-|^Cn-%CbF$sQu@W)2j`0oAa
z<MTLdSn>~iQ#n+@v~!17<NMxc2S0w*c-i^%mPd~y>%_RQm)B0+$?|MV|5g9=+pIk~
zmNruplMlC?$?*<LJ>T|5t~%L{ZJ&y1+liH4J0{2*9^x!4FZ%zDLqRuJe1>xWuh-R2
zGUJ!uyK}eP`i|Efx!9LgdHfBkvv%~D7|5{%>{Ncdh*|%CaxJs`!PUCC>QhBHd!7d-
zFG)Q*<^CFnueaF~9alb|ucg>Mz2oJMncG!oslKRNIsK4(#0#(aAKZMNb9n3QU*E(O
zc<k7Q$?bbeqEy_?+Cr4ds~<|+%W>6scg=d5sIRlXGIE0Q#u=VHHH&M%H}whcd6V_t
z?&GD`Jp1-7ygZ{$Z^7F4UynBBJ>U9R){T?L#9O1fY|ebGy{w)YsaZBW_4jyuesDY~
zXj?REWn;lV|J|t*6}CpLTr%k#r`xpFdHa8|DnI>nW9h6Xb^BCOZO$76ec!`>P(j#n
zfnQ+szDFC~>zNNp3otxrkP%^!OyF^zbZ~z2^2Wro)7PuMTpq3xpJ#bC*scB-%TjZW
zpj{JQ9DVUMV*Y-H=vua;51&NTeLh%!)H#RUsr~xDw3MemE%=M4yGZ#@?7aWR@_mhF
z3(JAVBD3Pn{A*^MnJ#spsQTqwro%BU<~LIoR95V<+i~{Ouhr8;b*d9h&-x^?Z*<hM
z^|F*X^#8`aDQ~1FOHUWDvvzJM<oI}iVM7J;scMZjUM-)=dox7DGm4&<@BDEoX_o|l
z*-Vx%=eFEiY4kcrD!O&{)M^<G{zEJ34sY8au;<NE?X({~<)<PGY9{%-J8JjAWlhSx
z#eX{A8C~S_e{`hMX$h~PijZsby5q7p4nBE(B2z2q*V@FtE!{K4OZaYQCd$paS7)Lu
zCKIn^7d`#^hqn@4sbP+vc-&|G33-{!q2Jf~Z&QqExlE_S<xK~BHg9V8G6_2H(NFYl
zv%Y-$$`)1UpyktJi}poGuRj|7;>lOds&k>gul?s=Ytvihy<YwKo~bW&<Bw#!pHWL^
zI&j&fMRWSW$ET|2*uFad{&V0A!I@jWE9`!(b-DWOqVHT>+vj||mfHF+evK%j(cvdL
zk<3|a|Mpy;!T5*av`pD5#Vi{U^<zTc-k3~Izao6e{3N%Hs@2|~i-KLYi3ix{U$F~d
z>V6ex@&4Z@zmHwja(yow-+#F4eKIjKa>u+91;LMusm*7uMY!zGcxlKmVZE(w?;Fuv
z6|RYFS|4V4GjcC*W%GY}=Lz3;(ffBZw4yH5@5y812sOR`NIdobn~K|)R<gzy|31@r
zFye&;Pxe9XfBV+6xCk+~Hv5FPziHoJDmuse<eU9j%Vbu*UjIarZ%6Eoq@1!GaXUTs
zU%vx8J#Ifd^zi7Rms2-O^|RRTbGQC5FZbu>6$NbzXB`NUdSeiHPG*VV_5lA^Pd}e?
z5H<7bOS-$nHF27LXWY9Qg;z`o?pqIk67X(scpk;Pt7%<Sb>qhs1qZL4FZylt<ALLu
zQ*zA?f|2|_=Q|r1rrWUZWMK<E;=NakxsSPL=|9tL?|I6eS-z2+yl-i3=W_4*o(&uF
z&wgrSf0)$Yy#Ao}gSf{{3P~TX{wQYC`}p5Q#x1>a!Pk>=zkOmpRIwIMsQ%VcaZW2c
zZgx)|>$^LF;UPS+y1%;am5Qmmx-GnsG`lOpjHhtHlWu=&ab1T0FaB?QcICUj-0~RN
zD=&^KG+x@$Uz`-WQ?Y8-hIemDjA9n+Z?R6v=D%}{?X#zZht9-`dhU-mBUjH^#dSEx
zR#aB>-0WuuSww<koP#R7{w_XHU~%Aq8dL9)l#hGj)Mq3-oK@oeKPaN!+(?>t%fWK4
z<YVDk9M=ymjpK@A+26n8T4|b_OGZl7r47%ba{n)N`7%v-9_P`cMd3ex)_;%NeeJYH
z_9MBCYwMqW->Yz=`%?UkPO({&@6Jr!aPV@Lw?=|8!_os%JDrLJZnSwXZGQf+dhgR|
z8!r8_f4`mghVT@x>+dBz{$)-uFu%V;MSS<6)BhthMB4?+U!(+lJiS8fd3y-kZQieJ
zt&&c!<#$&9JMGRd`)uv|x2KpTS=2vh8a~oZXfN?9Ez7Igod4g#!0xGGZc8S^+4bMQ
zKZ~{CdUf&pl0`3h?QNIoB^Yg45dI+|^Tyo92_9zG56)P2VO8XZr!S0)`t75054$ED
z|ML2cgqGy*4;>SZ@ZC_+Kie!n{rXuR@i|N_oi!7}?_90^7igLPF>1O+t9sp=OUXaB
zzCW~OJHx*P|5G_VXXhra{&d)FV$Hnx?+fNnesL_~kGZTw^cAVB0}oG~e`#6pO_}d&
zz{U^hiT9oKqN3KyZCrJ~`M>a4Z~N1~?3;W_*)C?ivh$n&T5}rPZ}Gd|BzrEc30t<S
z@JbkGy;{MxCzHPa=5_e7A;eo(X6KcKjf;C1)gNrVC9u0O*?z&{Jv~|f%nWLB^(|_q
zZ<gvbEv$`O<5J`-BU1Pw&u6animu;ltt6bDxN`rHY&N%=aN137$@!MBXWP#_I^7t}
z81zSEMqmM}_~Bb3d<p@FxK>|E<&sF$F6zAffGvG(l8I|su6|BoeA(Mq|2XHUR~ery
z-6oT=eX+0iY5Sv@GV_eLZ#WQqd*S+>Zt}PP*4~^ZT6~#H<caXB_6ttM%l~Z1S{fMh
zC2Q@UFW(yDZd<SHTK8b{)zacfuNz5QlHF&U?8?)$Pg%6l*^oI^UP8%vFXuzUhZ}Pb
z*T*LWzViMn`>SSil}h-Ft+v0KHV4MrDYzW$nYxxM`cEFG*&erT=Sq2aC)=KTRlGD*
zb$&}-qWv4Ohm4$0GR_uGou+v8(cApK4{}C-Lrd?ii&ek&O6INBe7DCseE&l?{7}(N
zP|j<(ZIXQdaYjPx`$HQyv%J;)_o;TNs7%;%xm?)=LFfM0M)F*Ju)FFv%X7^I$#1s2
z{{8#M$92E&Y<RKqfKt+_{s`*~9y!II#~)6pI3hO5E{id2tGVXGMJM)#b5Dy2Id1qb
zJ|y|3+nJW>+WhNR7nf}eyKwZD!=f#d7=M>4M+F_IZ~gP{Z*FyKsmr67-2YpSEoiIV
zcDd*H2?zH>p$^`^w(R!}R#J_h;NT{^YN!5fy#=bD`CjeXZpp%9)38$B?}FC1NrkUe
zYwFd~r#+m!YW?K50$TDd8{YHJ$WdGMnZ5K`VxC6km4;W>6+W0Hx$|w{+H$aSL-K;p
znf3=IQtBJoX4ZunImdnYG2ujvmT!1y%&+^`I3752v>jP0e=$&G<&n4c53CPd()r~O
zb8JV*v$dxx-(Qs58yj?e(Q~aL4%f8b*Ta&WnN(~`r*m$Ydyw;wRl%>#>mAj8OZ#4p
zcz)x@A$eK-^rxp<u2l6mN#B>r5xu+puJ+;wCx3i@IO8Dq>*q`LH#r(Fv)bbLwYBnd
z+$ola&xAb8csDLS{*{g8bnCJ9zgvH~Jll8m@Eq%d>tiHmF7K58BiwmVJ1ytkAM-83
zW{O#Re}12D)pY1X?UZ#BrhiQP^T_{wkca@s=bXz1hQft?Sx(^|;=5|%-~3qjLHpmN
zKYv@^zT)ySj(Cwh%egh|{IA>=PJ<gN5uXKa&7NHT%kBEK?4^RXQ>=IKC2rzdca1r5
z)6KfavsgnrB)3bK|2pzXHM71_(Xlah*`BMHIJPK$W4yFL_VlH9C(m>^MqmE7qsMV>
zuM7WXna?Zr4@GDw=rnaM)Ah<JKK^O0Uq0I-hjPXMG5<OK;@;ftN3QneAJA&3;}Ws$
z`p9$ia^}{mt=pU5M5b+!b%<h)2yxShjaYS7UrJ|r8`pN$c1fm5`}I!<vbKiqn9tpQ
zcH!hCeKYo?A5pe1LV~}3*)yqX<4;bt?4y&c#SVE#*?$YmeNlM&@4lQfs!U(j<lVXU
zEID{ZcGdcMH&kD9m)!gKUo`!<!{o>Hv7c)GH|d$B-mev$G4VwI2k$I9J6_Hvx65Vb
zJr_Exik3{A_F;YYgTA%blU@6NN(WtAxXz`jIc(NW#+tq_Hw6@WwygL+d#~-|I}(us
z_1-6fBNgs{pX6~$e3{U4LCJ(Wo`v;^I#W2d{Hm4Q<D?_{E&Eig(sT)(mp}Ut&#{}i
z^|s#2*%dyEPA)Op@#I6gWsPH3@~4zPR*%>XXBR(xB&Ygn)uz|+Qx@{QPd}q&w=Mhs
zj2-J#idUR^#n@N$#4#wz`?Hb0`ifis18t5i+!?+)Y176<>#wcvT>ZD|@;d*z>#Qu|
z*_nZhi{qQ}mUjduiJH}QJo?Y#-rJk|ckS;jF;=^;>`+o<<Iwq7#~Z&}V}grvoBNJU
zH9K1#DRc!LTlUDG=c0}E)T^}{zJJx6(!2k$Q)!&ormr`qIEmYS;eD{&M_k=)#sLPU
zf>O@L+mCG1Vva_|Y<*E1K7G~d7yGaNySDCC)u+TI^_N$FfB$;_wH;q;BTC9M`YOWj
zK7O%m>x6a7{g3c*r~F!9v7~-QvDuUS>A?>e7<kJaXX$olu@;5KI0?u9kGcE3`1<Oz
zucv4Iy}J73rH!>Zvc<K9+M%zno)1v@-goEade(>gnSMlwaqZrnRFId%&$IT#?t?oP
z{MX}pB=O<9TxQ=X^@Arwcnu64bDRHl^aPy@44dM9a7TZL?WX_D6>`oNOFx*j`cH4{
z+qk3j=*qaYuGfQ}mT{D>PM&|Y#prZ=T!+i#sabntPQL#CVE!I!pCfml@vROG6<yQp
zqNezJe>Pt`@9Ci8tvmk~TX*dUe*SCzyknDe743KYZ#;NH!q#Z{4z0&Jk*~_RQgwFP
z9Q1fGL8!TK_hNT5sSd~9BK2uATF+0@{Zw6Fq16}=akI$kg|*&}7#p_j%Qv>v-CuQn
zdoV*_F@Hhu=gX!|_ny4fdw7pGw)eWnCh6H<*=v}M<f5nbx7%(1`>9bQqW+-%^vLK<
zNqt+Tv!c|Z{zmzIEaqnw+`RPE?ci;vHSUGJSaiLqz^&@XBmZ5;`4-jw|Eu`-Zqzr;
z-{I4*F4>uXH~Ve;*V+UP#sbb087`8451egS#+&*fJ^nyTg13>9!T0&D`OB^PWrU==
z)J1Ym?ljo;Xk+A#cJ@6M%|*}t#O3QB7HT#K;Br)`xopy!_~Giw=m5>>H}95v9k|lu
zJEQLRgIljZOxn|2@4b~nV{MV7pz($o;lJPg3!a-~kRPZce6sfY`TM0W-W9H1KW+Wj
zm9{av<JN!<n7l6f<o5xOKaZGL=QeR~p6qynb+^MK#`gh2$GF6#4B`{NAG*Z6TSH<$
z>%qnQ9WUx1=VqIJcH#RNgWVrIPnOm`+8SPNZghRsm%A_jb-ayNe{o`oks+@v=d6TF
z1~=aMK5|T!)pU@KJ!do3rtqJozgYaW-1Xm%CCq%@+IFpifBiBx<DGI&*&JIRUkmr%
zeC+zm`1$VNGz;U4?)J45-Tkli<m9XM=Bp?B_{}(Dki&9pn?dHry(e;}N*GnoVDE37
z71_4OdB6JprY1(M`1+Vy`AlU+XZH!0mN`CL?jy%J{o_O9<jc8+ulK&=pXHDxe|x(1
zP1or>8+6zVTn_y^vfxgY(vO+8vgh7Cy4k#;A-P>HIEC3+^x`242H^sh1$hcavlu+s
zw@hFVSY(~hz_3E`kP}nTtQjZYu<o9~kyD;?;=m*Omm7<U%~u+-t1)h|n<8VzUB|qQ
zoBM*+7S5@snQM0ap7`YVSr^T3vYBVTYAtTeE?HprZsFm+T~anzR@~dqp;ve?fsvs>
zn92P?!^X<>M#axf88;L>2#_*baPG~FCFzn^;)H{ugJ0-oeD>gDzOP@{JYRlVtxMtq
ziI+7u^#!J${n5;lKI7t)?M8{4%hk^+h#vWJ>9_i0^$vqF-Iw|e5_e6UyWc)|H%D;(
z{_k(zi~L@+(e~f^5_hYsQF~_9H~E`xwa&;iT-eWNQ(#fux-ZLkPes=9xRx;Im8X4m
z-^{Cdx#DH+{RdypR)2E7(k|+JfbmC)Ni=gs;e)^`pZrL#Ofgg6d&f$A&(zIzFTU?^
z@Wr&~d+~KI91n}{Y|txY=Z|}}*T5^=bEBFkv%_q!tv{++R^6~%^Dj_uMnmcMwACf&
z*h{}2U*BdR{`yX?C2!b~T{RJ=FLV-piwe%|(-M|%t~he_bA++#gzATVWs+MH&bSs%
ziTZF!WAE!GIUkAX;?IS)Jpcb}Vxc_u$-32XbIm_`KPctOX8sZV!K#(LXZPKI`W1`X
z9T|S_-uLd?_Qo2)Wl8VyZ-oo}|Hi4j`l)aK!LU;)CI-8>TJmL|#0jt~^3T{WePW`o
zk4Htp+DCzV^Xxx&aIaQcnDXJk{|_6Z>z=9Sf1cPcmHkdeQ}z1elV97rYRr75v3Onn
z@cifGw&=AwIc+V*40`+L_}{m3WXW7w*xm7BW9eL%^vUbLoSI>@qMw6@^WdD6`%Df?
zuJU%)mKXfho{{tYgGHtKk9vhG2N``T*3?Mv<T(3oVUv?zlu!BhypY@b|4B?pc+mXd
zox-xi(!J5z_q*<{IpaQA-sw#6p%>2A?6w-;6I!~0!@(su?+-siM~$40K(V;UtIJ=T
zTMF_-6c&sA>|<E1*(6kOOGJCm@f*$?FP=@P@-kV}V5oex%(~Lc=Gn%_T+eNs4l!NX
zbgs?ta}4wPoTkTL;vbn<9%hVC$t`lQ{j>j9%bCWqWg<Cp`Hp|IqRKY?<8R545N6%9
zFD6xS!G+1zKPFE(`{{7tnMFU8^vyl?*|aAAIemD`>By;F*Bq_Z#g-j2+*mFAoh4Y$
z-+tfM!<%Mn?Mj?>d!7l~i$`<r%+7h1=KW!%s<7ksWX+~<&x_XgjqcWeI2$3qWZxl$
zm!_R{Zv@>ZW$y2I*Y{NZTxjIOPfp)!_I}y^>b~Iph8BxQ*1L~>FF*N!*@S(KfWnM|
zhE)oWO{0&nPoE_E)U?dl(t7fqW$PtX^u*nZq9+=J3$$~m^O=UdID0T4VcD}Qmwk5K
zeI-9b{YOY+LR+h;V3T~C_DsgBjv0;nPnWjr)q5{K`N*2^@Ab}JEc&YLbtNDFr5xQ7
z$hhaZgM9oe4!2vncbZt8VuWXPFFM%&kH>Tm_r)p4BVMV^zkWk`YJJ??y27P;XDW{W
zIW)WH^4^@h_%2`P#zRX?br{l?EYtJgYj1d{l=a8r{37Y=d>=Y$BUgL*=Uu<5ch3Cw
z-&Z=@7gt<xsH%FfU`~ode8%su32OT#=O+9S{`GjHpdM#&9ebtgd4`|1H#jAoSUFsz
z&hN3_YX7MF!Kw=O87)nc^S>ot*<s}}Bk$oC&zZ$dLG35=wk-6W>fJM+J9_e#l4!Yz
z>(4itnQ33xTcLC%xLeTs`(-unst2>H{L7F3@R{_p@{)^=dC-lV=%9jpqax>`$euk<
z{0#2B;^5fz?aHja%|8p$jP^Su+cN)1I#jLG-qL>Mx2o{JhjsRb%!_MlC#Yo3cd7s2
zxL5SWhW^Kb)~c&bxBr>9H6~`d{fxd}pE;kMx_wae$EV}wWwIi(cd8e2Sb11FGq>c&
zR<xG<^SA!ZIrZIXmDvvkoV@dYi<_MmeR4fv(wu#7=I#G0wB{9em$B3}w`?2r`q%uO
z(=L5n!`HX2@oBA$b&u+=x@DTHvU-{%O_)@F+Oy7!Vx8vOFJ>Ng=iv<nrR1p}*ghpm
zpZ&nRSCUQX@|XMjpUn7s&wtIF_4@0hk3TJxzQ1_p`m=lA$I3iaoe+4Kaes+_)o&A%
z399=#pZ@)(v03AL^>hv|MHyrBcfH?h1*&WM7j6xo&`^H;Q(b++u9baG=>k9hiOlDi
zm5_b0HPWH{vX*)QU!-0C-rw<@Ogt+U)-FH#Xx7H%r#Fk7_z};!+i~}q9htk<{_~5!
zB-!xbl=83O&1|cq8rqA$-!NeOzEpef=H<6@tj+l|YWf=+B_^dE4mc6;+i>^&#A$JD
zbvvguvgWQ^7F4<Bgm~|Ho6solKbb{=3f;F)Eby4*zPm{8rIF^#4t6V#1J|W*KbH9a
z;pl?~ufiYpD^+IbE`HU0XTxnHzH1@Rn&KKAwzk>c{i%N|KX3N4%BuZ)rnT5#+-c{(
z>c9H^AKvLPAFl0HKU!?~I4W<C_U^n?IqxqXM!zc=Rn~Ykvu8xlZSOfB?W7(tMgH+Z
z&AWf{p5ML^&>qGTxTq;Bp}|uo`qEKu1tz{G{byRn1}Cdu*gTy0R_3SKyVJqkY4MlC
zr3H?ehVsiDzL%r1;Fw;r#~&{K)M)v>S%>+X({yH}zJGJc!|8EHbo~d*!(RE<jI$oc
zEV+5M*doAvZD>Hm3zy|T)Av-~`cZahZJYU~UrSUcB)EM2D(aj$`B`gVmCS?-oA)eQ
z#ON2L^I2+DTb%IWW0Bb_n<n2@^_`>?8y$KsUfki9@KmlBY)_2mo-{rkp~oSUv2%s`
ztnSH@Q_nW_d{cM&bMw83vi_fgjvU#dpR!fgh1Pz*=O2Gnvj22;Z2#VOze@l7QC@c6
z``?r4zh3Q~^y2%jc1y+ha~WSJ-~0ddX0FVunR@r%2yh&jI`w#y-=`l;Iu4)XI-~B-
zzk1oQXujT`eJiqDlfC!KlrAzjYL*eVt0eZ=6HBFEuPmIp^zYn~Q#zX>IziKDo%?(R
z%gNsa1;US{J=^`u$?vV6T2)J?aoLY&2R42R{H9~S<54R|WamTUg{gdNj^9vt#ItG9
z|5N><+yA8gGH7HsJTx!MbH?V5=?cGD3cqdB433x<5wOTYZ*#KO#59!$cmByg{8Pri
zVV&`v`%Ab|jsD&am@8ym_Otzn<*RHvRyl#0>93Ov+I5!(ehPCqcC7l6jAiY*%*M#_
zvZtqSZ94eR^^Wg9pDouHr`}N1e7W#OZtVSM8{dEW^y{<W&7%KbZz%U)j+_0^E5?QG
z^}?cGG1~<UKTZC6@`}*EAH6(2XII|d^t1Znf`6^ct9Q)Y6mvEE&h`4<^;HfNW^I16
z|52sR{kcz1DQ`KcQ2i**`_R$F3Ku$qI>byP%BH8rH5G)<K4OvM!n7nfKW5Yan1xP`
zZpS@LyT1C=UfyeYYF6H6L*7eUADZaz%-eslG(2wW<kQCwO)c8n5u84?;P3nTCZ7*?
zuCGwM8}XQT&ZeGTc9#6_@?PqXzTN+AFeUwKZ0bit!wF5CQBs%NJEuSWShz7%*1hwu
za+JN##>sDXeQ<s`O(*xv*Bd^IIAWIXxp1}9T~<GClGG)xpgPIZF8>y7{I&Pj{?ga+
z>znsQ?cAjBHGSEt)oZ->-&-gx^ZK5MdH|z}NQZ|I7jM%-hYc5WRJhrg9tJG9p(V)0
z>X8`8IZr*>e}3Q3Z|~Pvmb5;7^(U_Iyrik8g|xj*e%{gTHMd*lp6C3q-#u_Gcjb%8
zWzY5(RlmC~|4HO(+u8reTa=~PZ*Ti6ld*2!dail@<?d`-6;ii=zx?2D`3bJO0vGgz
ze}5<Rigj-Ks!Ho-{pw#=$|J8y{X7|a<9Y7e=WotuvNSDQzVhXM0q3_b-t1!%FD##1
z`ovZ4AAe+x=qrXNEMfg@PHN`S7ZyEz)XeI^@}aqbQBGuvh7y-xQ)9q_86ILHtW1X-
z4MH?jxmXS@5w?A@_j<jYcEs7dbz4J^#hfX<FLA}K-8}c~zIpyaPyU<Vun9eF&)-~h
z{@9no&vzvLr0RK})Sq^q+fbhSkF~_I0vFk3e=ex~Q2G>NxZd+^yVW_}iyI>O>;y8D
zs#B(KesD>TB{6K9{f6)z+y6|ty0!HCmHb#IyT>>AZr&=nKk3#yulruH+UGtt6qHq2
zZFb#!eqrMA1-FmxZB6@e>yL8p6xle<teqm^xvtjMVXvO6DhF;hVVrl3olzrXf|n2%
zr&CL!!i)|zA(p1a4l5>T85-`s_I;mvan-%uyLW|Nn>sn>Ufilz1vYkTk1yZ)YVY|Y
zrB$D2|GS)*psYS!xAx^<ZvVR-r5}&K*0}v?z2+KYp0aOKtt`IW>AGP4anU8^`TEY2
z{sq76@%Z~}``@<Grs}4<OoH<ksX08Cv8Z%kygR;q{=Rn_OA3F!lI=^-*(v$|ke?i<
z^Yogj=l$&z{cIFg=O53UwXT0=jno$tzM3hib36l%&uR5reN6SoPSF+Xe(s9lW7L}Y
zcZ!%s%*v!zkL8x-bBk9!ZeZ#Xvp2eBcjTM8&C>gRKacN^`1>dA<M(To8NSUElk?>E
z&E8NqZTZ7>l7;yXzkPkN{>SE+Ti&1d$v@YAdB3+tJSF(z75^0%&h4o_WoPoHN-XbI
zwDqZOy~}o<^UKZ^3Z|4#KD+PU>I{Qj&9+|{?jD&}XErIoL4;XL^6^RIEi!7oVHsDy
z?z-x&8X3HNVSeP5h1V^b=M>1KFG&t}FIJX#Vmz-^LtInf;Ip02*GXMVQaY@0N}W0N
z6d$A3l?f^?T%Aph2{U3e#JJd)4h1U2Xb5w3Iwc9ra8Xk<-nU)!=gafQms>sW4N868
z`z&kgu7{<n{nB>zJaeZPzPR*@qkh`=qu&<9&)Dy`<>l6O%Qp$kxVi7U$GeB8Z%>!7
zt$&_)?*BE>Y1?<*>{H|wdVed(cmE+j$&M<wO1*D9Hd&f~)Bi8(s4^Fvy==-?k&m0s
z|J2}r{m7*E`PE04)c=2VVd~J>d~9Ll0gh`2qLv0GvomUiOi@u0<YYUjupz)ejF+Wp
zsly5vHB*6>BkF0t!q?09)z;R0ecUgbRlN4zp6}->r_Q{xbc^LKixRuV`QLm0<ezl;
zA^VB-mh0RfwI3yZc3J;&V0_!Yuk~kDpF!QRMQ`hCJ>tK<lneXwPA71`tFPjL8Fq8+
zV|$;kufDKH!1f}OYfJk&$Bi>t#KKIEe>>RxaChOKh9ycIS47J?1iDz?iJCV&U{ss=
zcdCR=%u1)D9?NZCdv1BXKu;{>*ul(S!uKr7^BMl~CZGJh?DoSN({p#ub$*+$x!;O+
zhggQ!o>kA)at`e{>EfYlT6+4FS?`Gj&c5sJ&7D#jTzvAvd5vSqS%!zxjE#G9RyI#N
z^L@^v<oRdTEh;WKRq<On<J|WPpC4FEEU;hv(RN1onIDV}Oz9_M9N2`ONVUjSXK|<n
zvF|HUVs?5RvNxK~NM35<)djy9uNLS(YTkZJylGO_tm@k*j{UjbWc`TsQ3<2;42A**
z2BA5Qv-G&TSnC%WMx2+KbbZ~meY(~E|Hu9HTED(jD@@?u-do@N&;Pamb?jH%-<WUz
z|3CR@Ss(XrZ`9hV_ggb99IF>u$Gr}()h^%^v?@3daHFl?hJP^|S9$gBS67d0X=RLK
z4Pxd#@c*OxCy&GDk6JqjII%l39-LF%Qs#KLQcLyz!;PArcOU9H2G{TiZr{T-NpkyT
z_wT#D3B~2zt`)z&Kkl!a!UOlX-Elwdl()XWx^JTO*Hhk6f2$v*?Am?7<>-T$W1H%n
zH#kW#|8Qt*>{?YcmCd|YWwOVab?dI5IWu#^%`CN>*0ZKc?6vuqa;085bxZR%tHZwq
zogPj}*kR0mOQXslfcM~A4dxXGq+F#BN*r}AdT&xwrr?oL*J^Q?O{(_xHJ9~&%2zXK
zOwd-`HK(cEz1r*jrvQ1OKN~Z`AEczVEGqK;(|2#C@t;j+C;bpT-s+TcO7_F!yD4XP
zbWM$)aq{ziG0tPtWd*iAJIZG6`RiHF4$oNMz%cnm+YfsM?YMc4^C0`IhV4HNn#`}Y
zVSAtXEbi?y%Tkg5hZ1@W9hOf%X;`(6?{oTZr{7y&U9Iwtuk|goUR$@)F+s1<U{P{M
zjq}ypM-7kEsmtm9xw7s3>4!I}%5SfDE5zL1C-(i+Y1y6cW?PxPXNm~O`xrfMd2wL$
zv^)9%K4(O)uqTGJ1^%#J*lByPm}lh^Rh0#=l6W^}2p?Ee&~Q0UJLgQaWv<;Sx%vAI
z<eu(-|B)r0pZ`UR$lMHOE9RNggOZlcDP7hcH;?a(;f((;#KiWou{|idx?x{rV9>{2
z54oK+(hEMirZ2v|yX*UvH%_xMr)~HxaHsZ|_%;sCsI_5R4Z6<wc1_6?*eLYZyk=6B
z_>^hure8hvwfk$P`P-zw@#@$AU;Hh0dj8!6O=oqL2!(6sX8r$lPEJ8EqP@JHZNaiT
zGT!YDh8tFgf3z!Kcb=(j!2>nt`1$+ZmHL+WY};A2x^A!Pwbz&K`M-Zzy`MQF{)mKz
z6~o@m2ljlHX?f<Gd~sG+aMO**4-Lnk7udV6(cJ6HxPF1-^q$CF2ZASNSG;(_7ntbF
zrc<$!@9i}~i#w;%<7KxlzOHi6cFH<a)h8+sP5;e#x0knD^!jSM>+7zsdwuoe{e4%T
zd_Dd3(7LSejVbEg>I~a{TwMM1lZryxW_2l#C)1lJ#cYs#Z^J(C<HmQ4IuYEjR(px>
zSXO@Vu8!LJms%TZvb8q!Zg>Bp`zN*f>(%_KHR0#>mi}9_K5Sps*Y(%-M@>FgS)3ey
z#K1wj;PgK>%K+N~4~89Q9)=uXS{2K{%<mYa%g8Bwc8P>ea{M(N$K*<f;6p1``8EVU
z(^2Iuj?P}i&i3fh;j?q>ICCXFykE>U+wrz%Xlhq@#sA(vpMB3glF9uR>1o5uz0hFN
zw&=Aj2e(-BJxpW&_%mJcpBeuv%?&M#erYNN=j+#`^7&k=sjQ#(*0iUhNx9+JGDZW{
z!WI1QgH)5R<mFpUeap7CNcS2`T~Bp@M7mboZ-M+8_uQyU2HRN!%Q!E*`V*07@_)}x
z=WmP`8CJ=7&Xs;Gq{yyyXLSkJG$$d^Z5<V2cip}_`~NHGGIWt$;gOnl?eX+a|L=0u
zU(|m3vwBy>?EPUo{O{Z|z0+gidroEBBu3$D1$PUV&ldTk&--HzfA{G(ODC+AUOGWE
zto!4JWaEN%NyEII-}Rr?nN_6D3ibY{aVPvFqnMWY#g+E|<TgC>iFE(G;fkxbR8N}L
z#;M9y8?v{q_kZlQU`yP^iu{$Y98xnr-}gHz<t^KHGv`!lO8S|M+jq1b=07ctKWunm
z3-|tZCAPnZk4QezP2Vu%rh@j#hg;ToYMXv@_b6fC&difl7k~Roei7f7bMAY8_iSj|
zp^^0>*y`FUsrQ%U=Kc@dbkN-CiQ&(B-cp{W{SOqU-r2a=A$RhF6DJnu)n%$2Tu@kl
zlDGJhwxV2OVDtU^ZjKVKmF(^}B?`Z`m{@dHrKUqTY||q}u@cGUj8Eh1x-Z|Y+VsJo
z?}L5EJtMY_Q3t%bCPXZ~U%rO5o=4tv$-4!LJL2wo9}_5;cy@)LGV31ht=D9o<~{qj
zr$@ra<m4nClkTp6ZTe5LCKrhRia-6oZQ+ss{oE&9{x+6xTRx}l*kil7qSOD0-Cv;2
zn|MUNSo`pO$zv|jkN-=Y$a;9zxM$*ONjq7M_Ph7%-cC$x?)vrK-C1PWl==2M-_@D~
zU)mn9_le+zjo&29w+Nc&E?~_yX{ukZ)W&z+R_N(hY4$I*v8qRu%~o*K+J~-`ZxUbl
z{(D4<&>tI*iepED)Z2vDX?zY7yIj}tGDKA1btKcB1C6p((i?65+ZW3p_YH0hOOa@Q
zJB3wX?Mv?UoOUmMxZPgZYUQf%I==2<(D_P_HmT2R-DmxesCvQwB5;Mcrod;h-@E?B
z_9gvQSd+Te>9)Hcd&$#9a$6TJip^0@`*!jB{}&&t7kfAUnEHNtP57_PcdPg7@0|Z&
z!XM{pb_Ib;_~rQ7r!=0B;}h7K9LJ&%k~lfZ{XbJeMWK+lr}j0UH4*}%OdB^=A5Chw
z!+7`O?i~%x4S7186HHtDD;#{>ESCjN4!h|ZE6;vVV9f^x0nP_;J_i_{NiFz!=cfX{
zPIuFek_oam^tW05Xs`dHW%^!9=uC{ph1H3l^(@y|+Sh$>Y%rMG_-pRW72j^U3CMrE
zqQk8f$Q*9}*0)>hfcuX6pKtH&-ecwSL`QAf83Qk#0+TIbhl1X_b-p-#Y{&D>EF!y(
z9nG^#d~@<z;pN`Y!{_7*Ht=lVEqFDre)8*k9g+Y2+h1O^`1NJOry0)|<@wsp>9GGI
zyVWDv<c8+<ysy7xr|*-#_W$!OBX{wiwJ!Gs9Ih55y(>9>v$a?DOq`!N`|&B;1Z|5m
zdXKo6Z<)!zR&uXjOZT&?Wv#V|FTOkY)wxWoTDxw~+7{mJ4wtWY%a~qSAu0RAbxl>>
zqAS*tMGsu(1t|nhd2Xj1b}b}i+k3t_E<%&#-pHMJzCZhv^E}u0_mqCF=<n{RV^&b$
zwBB1G&v(S`eS|z?F2~8H-II6xeek5<gNMP?+PJr7`r-vUqwXZAN)`5;*epEh;QEQT
z=3hQ2{rl#d3ZIP+naevXA22A$-eb&hH)+gJe)#?D@&gI^Gn?*5E^+*FiFfN&n|Twf
z^ZBnehP6rWW)4?+^*hLIA)Bh~8i@tYFGX06#Kv)OaT(6cOzE}j@{|8xJn4m$NsPCX
z#6gDRj;UKt&(6)<c0M4x<Y(~5FQIildnHP(_s#3jJzoD!SmbT(#%126@5Gx=bMsDe
zVxPvNaUy~z?CSDu+P0z2zuQ|2Rm`4hti4;2vaF|QZOHZ<n@hR>*XOE!`V;DHp`bl&
zK115;Tw6Ee=ayxA|IhYtem{#(xIDh+kH8koU$IKtbH3Mfe7KMic>7;E_sy68j^8@I
zQP9A}XXzi--G9^THl*pZZ1`bYZZ2Y6eBiU$2hAB$?JNQwcAJiU-l6e&@)Z^ff&E;k
z1^Ai%d`jStej3a8lb461E%FkxTk4wMjM0yF>}H&*@vynSldFnX#Cqlg>u-}PdZgZN
zo)vsJ-u~o|UrjF3Z3z;|J2D=2EuH@G%gW07FGX4(6_@TWb)MbAqWa;{)Ay;hZxiGB
zelES6By9i2RsHSVUmkn!t#fcbn%o<?dc#9Kn;D%s3rfGMwB{&pWGb7x_Q~H%S{_<{
zPh5A*Vit7xzQOV8Eos^J9P&HY{Mg^V{Q8fwol-&EpXU9xI?l*j@c&gln~3MCel})?
ztIvdg^@gkS)ygVH#mAP;Nw>(q7<1|Fe}28i@vi^>FMXNkv%kK?XEn<~k#f;R54tsC
zHx=JV`MWpAE={sz76<Rq;+CcQ+?+)P%fnta#hcvyyX{8JjvF19^cIO3toY=^_0vFp
zO7Vt_Ni#nC)tQ*vI0P))rulP&!!7S?e6t+)^SInxC-5lr-(AD%r7N8Ol|&m07atNY
z*wJTqV$IXkhG%~B42){+s_XerMhmCk|LCl6c15K4!c40}i{dICX>zw0-ngXjGV<R7
z_uZdPKEC(x@ot%g?QHiKWgIl$yMp0S#<DB}2QkzCiv7PunM@VcTia@XibObWpR(_r
zpTx4{W;5*$8TOjz?(54+Gkh`y-ujt_-M#<ga^aHtv^jdaW~6<&@2I&`Af-HidAVe@
zTi316Qx48wx~XST+T=>j$=B@#nx0yg*D!Ry`~AK4>$lb>-m02;D(XAR(xf*WDwCYN
z&hvAp#MCCfr5~0peA9C2-50IOQ~Re2u8y1RALqJ8wLZWjV&AW?pNnd*<`sLb6@1t(
zbNS!y74z-|KRj2k<AbHP3p>xr_&=@93%Bb3yOGWQu;GkkP2OML^}qgz9s3l=Usr$o
z!`T;W*41!sU;E`@-F}wvn#WK7{o)kda_2!qrL^zEn*j@^q~7~`x2`eqC-03U!wH>h
z_=A3*j=iL^VYb^((_r_N*-Sg_*7S8XJrQ=UGJ4f$xWVmWTl0gK4Xnb#*El?{o|0+W
z@?o0M{`slr6DBsA{Ez2g;!Bcn;uJdBkhFiJnE1}q58Rg&eX`n6E&8`_v&-wt@rvt@
z<$k=sRb^j?!6c0v^L=xtpINfxuU`9x%9wN(fwMw(v+U1DCp_61xzV#HOXTFO<b9Dp
zdc@QgS}l^-kPQB(+W%nl)$sNMIm_IGo4-6;=J-r__SCIfzryYu{$tv?=i-95YFcXN
z>l#wa4gWMWM|n<N5Vv`rn%8ydSr_B`ek_i^cD6d@o|&}VoP|}2nUkKpeKyB;NAmu&
z@jCN*ufDz#|3B>7!XHX6Rj<T<ubQ8(QI*ngP4cPXG?^CGEk++CRW47Nw6W()PR+8y
zt5!dR`)8lAP`_t?Gv#CW3+r&FIRbJGU0b8i7W|Yr+r`zg*v>at=hzb7Jg!5VU72>>
zs&}lp`26e{1NZqpa(>n)qH1?dyvjDiqIc)<Ytjm)TYf!$wsqyiul2TdGA_U7?)p55
zP5RDzMIo7G`==Bcbmk}S`Lii@)~wF0GV&&e|NAFQ*xiu7y+3<3|G6_EH|_7<eQ;c_
z{+pDdbI``aopI~ypZts0&zhnnXZf)wW&WdjCi~@GG2hoWiT&LE`1RA?!j-*B<{?KV
z4);a8db%a)x*O*ywuZ~wOK)|zd{MY{<Vg6NzgF_EI~OkTy7I1p*>ld7qJ`)9uB?#T
z_+?_SO;b+q3#Nta=YLKAyhHqcb?a^u=NoqatBZRSbHbNdGkN}aBrh{VZ0+I+1#d!K
zW~FI=`eLx{eA_{roNXVO%uZUq&COg8rW?Cdf5HhDHxW&ta)BqM_gO#9l~~ZJ@OZ{t
zmXEzJFWmEsW(awn)h8{?zCt<j_Xc(5RorXWMY0;qjlSPMW09p`(CWYMY#RSbo;FBJ
zee~>1S+ljpmVN#eXOx*&q(q8`mqs62VsZA1v&4>e7EAqKg4gcs>%Y3|+yAHc-?#;f
z71i1=VtvlR)+aq(n8W&abBo08u8mv&KkP}fOmb)|y--^!AYZg(-hxwq9ws%Y)UB{6
ztlv0UQQf1)?cSc-Wy+Nt7A@iyc|A?6`uhu}v|X;>P`fy<yLRb2rNnl-z}P3ht3SCY
zGSvS4^t0}#-I3zc-<nx>-!HS<srj-gNHwzK_wM<vrxP9*8s_W%{&<tiS^kjHv<tH7
zrxirkZmB*H-@@~L?|QDDEN3;@bJNcShjUH26a6=H$KD->jyEzqyDE0?+Rt<IW#3<y
zs`zoW(BgytE0gA|rRDoKtg$XpV_O=;8LM=n`oRC_H5Yxvi#4w<z4QOWiHU4o)lE`K
zTUM)VXxH;^KeI(IEdG`{|5T+k{!dZ;U6UVbOS@!VSnRQ3zqmQut@}1LvLBp%a-(}w
zZ~cmTWGM5){OuOK;tp{;8;x1FCNyrUm0#5T`=@sF#D#423)zm@bWaXm9<cZB+_2TJ
z+)PvYmmhwvk)X4_yL<lAsJdP?@!wXaU*g3lO8()!_n)(3?d`eqo22hO+PZM}`f~Nj
zYs2<0Xyo=fdVPhUkepZ1lev@6z4{n_N|(P>{`K@}N|KT{#O%yW(tb^U*Uc=xeviNW
z!O$;fOsBj*tM|4-S!?IL_)|wdII`_;mc3zG-*WQNq5cK7lik;^yzjNyo6Bj<I?>7-
zwQW*eJ~8rNF8-C8BgMASAn|O0nfD{1DWA=L@CFoT$9F&QP<Zk#*87!?(!s<e^T)ni
zv(|3@eoCYL#T(&2SL${tGEGk0s{Hp~`Pxf2|9;)*+QPN5I6I{^_Qdkw>#Ds<PDi}`
z&%ArvwI-DB<&=M_fqxh3SFQE(-~UrrO#35~)XC}V3#GDha@WVdzZ34c>4)8#31{!U
zve^Iq#opj6d5b=>RzE*}+4W@7j>4Drg~1_$NhKGnmR+%ae&(aZdD%%<eogkRnfJ6`
z&Pb>!Uch`(XkBfV%@Rf9r$056>`a8FgqLs2k1tnWnB#rc&!px3S1-NSC4SFCmS26e
z`o7iP#Pg-6{fp}bG-NBE{?nAX=`lSsDfy(tr#FX{%nBCYV_*=z;y6pMdlhS;xbEGB
zwJ}$-&Ry-l+P&|8nD*p<{bAB!|NmF*+NtNg`rQZnmKq)<KE?X(lRrCk<X9OQF6B*I
zD>jk2ETzv$Do!e0W=m~){K@YR9&BRM6gR#sdN`(4bB;mbmjcs2_2MdjJu;RZuX&?-
zYW^Fq(yFgDDoaoOyXyVx)!w4r?;nZpQ}W&;&&u4Ed6;39?KA%m^`$SH`JNuUWN;(q
z6sx9JdO{DofR*#K&f|$oY8nn|=5GBT^YLMThk3~nrUQ#b(mPHRl>C^Y%2)Bje)>h>
zgEx;aj(@1NJ63bk>=Z3i_T-Fq!@{+S0xAvDSROvMyWjqy?4)4rxp=0J89a=)!k%fZ
z<=O94nSF`<eqy-QO=l&yAG$TSg&tSeS;RK2n)bjzeBMv{+c#!Ad|qT#cxLuaJM+V9
z65MnID%kfgJ&^t5`h2bE`4N*O;^pIKsObsJ{r!CNzMo&4>tb}|`M-v`tl!#^!oqF;
z`Ns2W8=kpIJ#gV+I~B7$w(sbPppQBgJ=K3X*I&~P|GirK`rY;GuWPMx?5?e^GJk!Q
zMPu#Sf*p_N9F+<Dn|VC^cfxemDQo{;Pkzep(#QAXZ`7()L7UAURV5oHroS~lD9QF}
zmA=xe)dkmPm~M6NSGO=*y17E)EC21|++q$9PB(J`yG++|EI)ldVjr8ad*Al88785>
zA|x1B2#LfAwcp@oSbF1uz10IP(b+PEhbBh;SgiZTxpia2nje33l6tmjOC75#2#QqK
ztB-2xvTHh~Es_+!O!~Hp@ud}4#J#$vd^O=aP(Cy4z*LX0uH&yYEe+$HB9`wD|C?G~
za_Z>5ReOux#vYizK>m-y%DwA<RV;qU*d3v9QUCFjC;uM$|ET1@7T@H5F40+hgSI}K
z)Pu+}e$yv^{_lEU68`aP*WRR=g|fGvhV86d;%Tz``Kr>KeKU@92(+@9u<Y9(IZH!3
zsLfmB8dq7Jdk^nh?^@N(d=4pj>I^JtFE5nK74aMrod0(>n_}q8{cEQGd8=tHBVM1^
zIc>|lG`DcIbnj2cw`%U5w#D@7t4~q;z2mmtx~4H}^^WN?+h!ymSobnv#=B?Qb?ILu
zB(948E#dNI4;HXEHT|ws@e0RJr(eI*D*f52aMk7ad-eJ2f7yf{S^fQfw$@?Mb%JZ|
zy}G9Lb$@y7sZYiR$J3Thy|YsyHB4rH{5v@{(}^_)H<l&pHR+}D@YkQ8^-ARy%U-uB
z8#HSje1C?0DNbQ~d1C8+hnHQ)qxd}RJ6%5U1ujXMaby3rwd@bR&v`%nvV(&x#{{4I
z@_qM1_aAoJ@vtw<ZF2mjKY8&*M`X3^*DNgV`BmP)-qZ0-DCEfvy;G;(H0GbJbvy7X
zr}3_Zm!wfM^ZOs2i+cn+Cv*O|aqfQpXP1hDUw&>5(wv((ebcMe;XlQ8M@{?V{y#KR
zJ8a$6)mQ&bTdi~3BClrY51*r^2U*`s>lo-Y&N<rTa80G7`uu;#4(|n0pMv}Ddwl&B
z@oTHdzNCNhdrC?z9O4WP$ol<TespfdtFXGjK<(nP<W%V`jnX>#^O%31pKSEFaJgvT
z!q=S=vCpsG`L^~EW9iWxzu@&;C9n71mUcfjC#GHfhL_3hTcIiMtlnQ<UvB?FO;Nz>
z>XU?~&zx=L>q=x-xW4_7I?MgikMISzKArz~&ZExz<GGLBXZOE)xa?z@+xmYGRy1wA
z8@w&p^ZA+HAMb5h6W>)I_sH0H;qJElTPO6-hd11d+nVKXfBXxtR%HK%M)hOOKWko|
zHDVPoU774QD|*qyS@z5B<uY-zB|Yd^_vo}*G!JjghhtJ}(xU8gIlc(|v1v+Pvs))H
z<J|u9i#ay*`z`PO68E?I>#GCrQew6@aR}dR)%(Ml@<DAzMDP7}(LauEy+<rQXI|JV
z!2OKv;C7z;bxyrZa;!g(h-MZ{ugd2=<}qJdR`>d`-)z%f9N5<080lDH@|C}S=W9pV
zfL(!R3%Aw2d>|5fyk6&cM`r#!1$+Lm9x0X=opA^D9^}s!RQdJKDg4%so((#;8eAXp
z?Y|iRzP@Qri(sXVwYtE0`3rBX%#$8?KA5I^e_r5!^+wrP-VeNo_~m|B{3^(`n;^|O
zEhIl6(9!1djyU(EvqyrvPkXO;c$Z`Q@t(b>c=<L}=X$@t$D=X#{j&MLO1EFWI)B5;
z^73;t-{)OE@h#B%%YEmHGv)alHJ9%m@-zO#d~fwkgVbtYt`>Hl#8)MYY_m@*y9jvf
z$nZCR{o$jWhuK+&o=~?^pNh777c44em4aidrhO4Ey8lPuORu{;tH_kD)U5gWr+Et1
z1#B#guH64oH1+A<Kd!6t6Arw$*&tsioVYY$>e162+p8wBrp?R#bLO&fWMsiv;mx`_
z_C-F|UtcXhn|)w{1WRgp+5DITXPdfrEEXwGGtYkgT*O#UTsw}3iFMV@YaBdW%*kFp
zf(QA;j$YZ>Vc~ovP(hygfjYC?2flZ|?gwn;zs<Y6%+B@G4-xkJjyGf-*p?(2u<kr8
zx9rXlp$2)z0^`N2R`I_%SM_DV!Oh|a;|p>EH$6PB_`qQ)$F;=tP5y1Ia}0T$H@F?U
zS5O^0g(q*acIeKa%kd8yX0Zs)DdOOHV(}t1N-TU;`Mb+5UGh(UUthR2cjmmUQ3|R8
zDkgijEL3TmA-m)CXa4r&psNkV<&{zsCtQoZeCek?PoLzU^rR;{<Th2WyWE;+UY~98
zJzP@c;wdhXkkh9uxD6j}FEUws=l_B~Z%p2vzwy<_Uqx!>5f=xyuYBtle|*jBq#hff
z^6tAle|_lTeKV(VEGXQ<rL(BAvo@YD*#1ackw6anVvDS~G4ZB9$|twqf3V6(r7bx4
zc;2~Fy7%>uPuORAq2kkl|0^w``gGTSSgRAG7&x=gIk)-u(v@ZNpZ@8LV%c6%RlB^+
z!#&<H%G01JdEH`{po4$SPo)^_I`L1Y<Eh5JJvlk`$7{;VUkATh?|q;_;6ua-rTA-7
zQzxzd)ZwIbZD#Xgx2Dxf26lP>=X-ig=)J8ybES>L1|^j%66-jRD>3L^db9LPBk$qw
zOTO!9JzleDZn2WLzt)-KUR_V})Gw5Aue+}^|NZ{P<z>9j7WgNaH-53W{I|K$m_1UI
zcjn~<XD?<v=M|oKZAER3rQ(4jvlYK7dAz=~<($Ax{r^w@ztC-w{qwI;NOnb2fT77^
z=YY>40{q{WEnHaj!C`{j{x=>%y+WTfV$S_;m*ee>dcR2|u*IJtINtWx>DCbYCXN{9
zz=X68Pcg>D^Ih#;_5~h1<hVMi&9nS&O6QG5FFdb?2^bxbzx#gQS36Gs!bykz9E&oS
zwk@2b>{~N`->#+M>!hC^xb!&5`NuY!{B@mj!OzR)?@ZQz`lH~Cribd;<BwN5+qM_n
z*_mrxBe3o8?=!#mfBmc1`Ml%#Ud}cACTg302pwKtaDR=&ltYXf?cQn}PvW;WeQ#QQ
zSJJTTvfe$O{cjUrA7DJx(j$9v<KO<~hAo*1He4)wI0MUdI+{}B8Vk-hKXScc>fv9v
z`sjf_)(-P~UpTzxHM>^wGEUp^{Q|YQZ%a)dscyR=wsBV6tH+gl)7&S2Z}$AKefI?A
zhTkn44$Q9+{+TrY%@d7<Yd8JlP=1uQCjL%>ht#KvU79x8+ijk{NPK8F|MebI?Y&#N
zQx^Q@$kUFKyw`MM*0TCJP0C^abZU3zgx|?d{Iqvfbk}d`v%b!k&!25KT6eN*QeEe+
zgSXF}X$+fid{VmWrdvNHyuN>1bVowKdfGRmHBW-8_PuZpXOBM3rg8MtoTmnpF3PLF
z?@(&@T=%e3>94}X&$C|~{{7fYdRN~R`CI&+yDtiUIN}`k_0*K4_^03As<VD@*m6{F
zvUx^VfNh{~cIp2r-!`*w#`2pD0)01*Gg!>?d&beWX!(XK+hkm=-)t|etn<3MI9F`z
z>bA~=^9BC{w|3O8x2*s4x8z1mA73u_hTC#RdyIE}zFug-p?vh2_Rrc=@ocPzr+qvj
zz*2MLE>m3NG6}!889T%@-JF9emP$@BwTPE_dhq6^H}!uScQO~)wU|EqaOn0ud2jdc
zCK+-Y(%)3iDe+dae&a2bHs$_#<_mG{+LA&SQ(i95-V||!WxiD9<x`E5ALa>^2cJ2e
z<u&Or%hr3F3IfiD)VuvZ#(JjZqWHbPnSrx}fBAT9STS|&rjm(bVG?DYJZeY&8`*vO
z{qFGbJqH;NNv9VW{Am5i`1bCcEwLE}6YrScd&gnJ`1Z=}Gc(_QVLsU6GvR?|T#Vh)
z$|vt$nyldKDK}rM|F4YuO!FakzWl5S%0{anrT+AL^&?}2;_mr<ajh?GB9;4$D*iVZ
zz29o}b^SJ_?6oR~ci3qbF37N(*t!4Wf4`qUFMLk)caeyDn)%Idw$|zCcJEv#Em^fI
zWa4^#;oH08zRgU0_m1)YgIBi_bn<&umUV=&uJTU%-L~4rYiT+6WQLA=^Y=S4Pv+0v
z86tE`@tg$jvG!h;ybnDy?A=-aNlm;}%@_AIA>pvN{NE)43*Rt_KKaxo%zR0h??c00
zZWa5zmnR&V^ds%esqeL#+r$rks7ts#&DA-A|7ZEX!%JC>FF!utQ?k>{qF%qcY?Frg
z{mYtthYj3*UP-OH&-0y!<*8Og*{^$XcX!^}{X_Z}pTG;b_WP9&r{CDRCa*rzX~#4>
z3snILJCA_&#hDQ*d@S=P#QuA}D*MSFj_otP{#UfPp_Bh2_kLkt$9f-62Fd2;Aj9(k
z;frK57QDZ-$R&AGW0UIU{Mt1UCj`Bp@0nW0VY+WdMwjPy{o|g3&S_^q8co^xv+u7N
z%dIM|)|H2Mcq!|iNj<Pnu_<tS*8lVQYo6><i<_#$5Nv&tbM}l!YdY5ZoXF8ikJr5_
zpmolV?f(;nM@J*1mn5(~G!)-<KyBXp*!x-Kxhe9dw>v}2j{XQMC@ip_a7r@T`B%t-
zmOA4l`=%}wW?$REB)LrE;mxEw<>C*fURId+Wv`6cj`KXr{q~>ywoKhQ<3h4gr{%h9
zW;sy`=kL9A?%Q$r-BnZl>ig3~9{<}r#ojEzPxOv@#XspM>>&>8vqVmdebevip0?J*
zn6+l5-FKJ6$#G9y>fe6b8v1?KhZp<QTQ+xk{JlEO|Nf)u=gzy9$K1Ua{`BXht?zfH
zo&0rehr)p}9(%5c%0qj(R9nP3UI`nfN;#S)E&H%gOZWw^R+i|(tLpnsUZ}TXXIfz;
za693U>*S~ILd;#keGx2I)%^S?PCT(!XkGlYdk0xK&R1D|@>{<6VPLa)&x|zJ=1o0Q
ztUAh<f62PMc~0#oCy#T#n`RoF4ze<|ixhCbUtZsNbxB3sweP!<QWmrAW?kdImV<xt
zvELb%Cp?rs%-kb5dDg_ZdFuqW$fd~hXYD_{clO;C=hLHJzT;hcd+W{4==kGbjKXF5
zH*Wsh`o6q;QQMrvke8QN24A^h{9&7vM^ANzhjQ}%hobMe{&P>Td6Sc7F!S+F=euuK
zJUZ3A|55+C*qfyhFAj(8ToZrFVBVkl#av~lcst&8Pn&S<(w1}YTJ`UinKXT|dmoat
z-<ad>=A=iDFIA}s&ac@sVIpVvPyOFpBfRxrhws05wNybv{<WKnw9b(ayR=t_##J`#
z<a;TfA*GY7u|R2^*N+C<wW}T%Z%oX2AL(u;uvF>Uadyc`9XE3?ZRT}<nD^C<OSFyG
z_jJaYmwLB<R#q@SPjkF+bx9$QUSz#jj>w-D+g%NkLWZ+#3WeOBM|w{?**eu#@ZJlx
zg6)lw0o&LWe9ZcaJ5y&HKmPb;oA!gB+w&itP(0G#x+k#9D*f~ki<n13+Sh!mwBzr_
zeOVs+m#Nr5<Hej0zZ4#*iM;&zGxhJ(t@`?glke=+(Fu}2Fz4=VnMKOeZBAY)Fudc;
zdGA+Y=YsbFu97PkzA>DxcyYbvlb7xs@--hGXx@Ia@B5UDSN88uEmP}0`Eu`{1#PGP
zi`_aXqWYue@XNUUaSuAyiFvQTI$zI3!n<@$>J~r8zURl}ej8cVe{0$$5_9$O-Pr;E
zs{U^?af}zQa9DKzkD%+JUuh=pm29PMnP2o)|L6Ib=pOxGntA$yYsG8+eeg~DmbEXg
zkF#{%<arg*OKl3C&8d?Y678r9Up?7yy}`+b5}AH+Q>`bZvM>IBTq7!wpIo}{;o}(*
zvh4lF&&4*@h`!^u$zx9rK6p-y@xlc8?67~DJenWA8%8Dvo_t_`u;)jijYnZngMG@X
zZ8Hw^-S$6!A>Pb1a&rG9`>3t8sh)irU;7{Bs65*Dlc!nbtk)aod4IOPc>6wL{zmRg
z5lSB#C9JCQ#r&UnnhNBzW%}kVj+tLKm5F<^sg6I>UW?U!b7vYV)IN6OH|Ke^X34QR
zWqx~iO9sZ=5}DAOHP>^}`_TLMcJVE2`r>=j#`=zS?Wx1%owG~=PE9I0-yHO*;(Zb8
z)^!tuROEMN8kLw&`gZ#1|J$~;{wlGxV%q!q4ho8$^eW!qKmEAg&&uE{0_6$^d4lcB
zOI5V`f4|o+F#fXQ|M3sgV@}Wa&l4)>kF9^O$y-HwnP$(iU2_lnedu5dc~kdjOVHuJ
zuGY6gWAAD$ndJF5^TA@qs8`xc*__ro$SxMtEB{#Gw0>{SlE?Y0uDZ*9e<QK|b?=tA
zNR}SMobdTcO7C~S`#ND$|76xXvO6AKU6Na;9~*G!d}AEnpO_ogKkjT4cwCq@&Fai$
zY5r@%`GI$LpPbcKSf`bzV)Y?e;_k1j`ip+O&5Hk9eR9pNlPnvm4ze4nHC4Lbd&Kc(
zos!_mo+rQmbKW_Zt;XZ^{Tj#5&57#Q{pangO}oii9(VUb#-=|nb4qfGHXQjgXU8hb
z+%I~bCX1M_slQj*5To65BR6sR{_LViq5l2;mra$@_19~1?Op%h>DJ!pUtdqTbo_bs
zuYUVozqRTelWn&ATOYUUvHnG=b?>{s{%KM@KmBRH%FM|ubrW>6q8~6Y2)a7X(&s+K
zTEy}G!nAkuKYfhn{G)hl)3aT#udiPEerslJ{QiqU`s=%&b*?(9`{@T)f%D`$JOV0j
z1QiavW4rhvphPTYdr^$VuaB+M-?L0`l)myLuDWmT!*5POpXO<E-DH_$dzIBrDL1_7
z^uBc*(oRL8TcVEY_Pw!Nv-0ZetGm>{zG}9+y6WB4&f1ujwd)TzS7={0n&A+*!TtZi
z1rHbcS<bt0xMI=!z?&9N{>enlo_AJpiOemV_InGA7!B_0RKzWEy!GGi<5!_To;!;U
z-k8U8<YL+_@0I?{3XMDFi$ryEI59j3Vc^irJ@$U_?}jO%=g<ELH<;bBi#2hH>f?#a
z9^|l^UgPNMGHGpl#Z+4HH*w9Bvew9zodpNkFXpYwW4rCq8*$p>?)+shO3kAbTzfye
zNZgpOejxeRH056_zIY3NnY(FMXU=UE=TCRDV&86G-+VARDW1J<8pD@qZ#=E1A3l&W
zjlrRieTtCgbRh$?_3;-BUhQghpX9aiN&BB}{`#hc4?BB8R-c~ZuwstB`0ecZe$#vA
zH*zZeFR#_zc-s5w-Mp1jU)*GWul^e`>DfVtCeEFm;p>hnym)kc8b4<~A8%avzZ(BY
z%P-D*4{eZtZu;xoziQj9Q;&QpQq?X=i#hk>{}<`?+5a9CC!|mIJ?SMCoV(gck>PYr
zVG6%s*4Ds6BW+2=2V6Q&ZMI3B_mw#Iq0woDX1<POnTFA^h@%pV6FzSEpr6_kxX0b`
z>x>d-`M6bO*^_7P@~fG<BH%YmYxgovhSUxEY~Oh|=t)~}JTy6!p0U+^uA;17P#UY^
z{#h^VFYCrSyQvsFy6M~)?76Kz`cZ(|olQUMYd(1YPRp-vZ;Fq3nSM(0;j~$<FIr?@
z3nzIU>NO3o7wf*-UH)3U{_c@mSI_*~w_$1aA)SEJ1!bQr_ddz-P`o4kvFP^irw285
z{NM4u|6J5NF|J8lLs$L$^7BXY$rC(R&o{mMZ}+oC>Ef^VdxHK%?RU{Hy1M#gX~@?w
zY4JU6$G94-u9~Dbo$~345&gdG<I*{^u6=x<apJ<dJ-2u#8{Xgkw&8-`uXtgf>2ev~
zZPy>@y}Khnx9507>i5uJ9J_D5tzN&i_@Uj>|FQKfKW+X$vd#LNu-4Wjgv)Y6UViPA
z$uF;;|Fpr!^P5r*)4dhyQnP#aTh(!CK1g2W8`k5mz3=tKsP}t~>W=<fSGKPD*Xk8T
zpHg@2TsM73^6o5yC9Z<$CNm@s{y+WYYl~;b%riGmP4M<herK@m)P#t##>Ku{HXduQ
zvD5oy_cFp@+5?mQf18@iL~5tbbG~Qy$wgsd@(J}Te7Cf^4b9k%Jj1t5h*bJ&)V|Bf
zbc<?>;%41#*3+aHOjvS&+3@dQv6R)kRkI%{yw~6N^Jli`{<!z6f9mi1l`AU#OTTWp
zSKS32wQPAMC5KM7$LDtla6R-(Ja+Bq`olsOcUMfV{`Nr6UQRh#^oP#*+4IfIUw`nA
zc6Rp`FO|)F+Q&KTx=h$(_IvC5#Wf_RzVir6%$j(3THUhgl^Qw%b?WE;vn=CR{+t&4
zB)(r_ZHB9#56}L4i>;0Ch{tUDc2s*I%b9~`+Wy|ZW*5JA6_?ACt&+A;5Bs0Jw~i}v
zx4U+EYf$%sjMFb;{~ofu!SB?pv-mN`(jPBXKm5xUS;DeDRw?q+!gWjU?pI&5<JZ;V
zx{aZ$SHJrf9>C%vtQF5>6PUHy^9cU~M<I*UYwt^+p5uRJ`oPjyZ;zZ#!u1Zu0}Gzc
zu=LSWYr1j3;LF7CoBwWVmh;ei`fpa@7Gch~I-8tNM~-b>QN6{7@$`Y2WqU6j>|J&7
z!*><|_4QBwmHh2n&;DEd!B*}yDhaNir0vAI_shF;9BpL&aq;`*Ne5eQ|1=Gqu()pS
z)F+CuI_cssI7A;`PgR(3=0|C@l+%R}8@0%phs>H~7JLw0%<A}5xc|e(n4g8ax7<4L
z@O#fLjY;(meeXB?+~vH~vi^_gB$EpXrfq?xzx1aD@iy{AZRPwd;J()C-<6Z%1uXME
zm481d^7Tbv^4&??MQ8u`xA3<YHO=!r{)I#C`OnR3XHV|=xifs;;nQnx){6Aj?X6xD
zTK>(y{^0Z&j|+>QK4fT^WPM-sy<EeFJu1ovA4v-^ux)t2pxea2+|D4w-XO-oz%-+P
zeGWr`L&34N3W3wo<awo}yFCtHi{C83$tz+sJ7<Pt<xbZviF-eP+{nP--SjaaRaB;F
z`o?r~{g2;YiG5>DR!hyui(V%q!PnS$Q{E|~KYEb`hkC<NrXMALr1KmLI2JbC_+7zV
z`lC|y<fn=WUoTY5c`UrdVLo@GmyNN;@-HIVo_Ck!UB3HY|NY~n0EgEca_oM4<sJDa
zw9N1MS|;&*h6Tgki3-irE4iJRo@P!wwnp?xMOl=?BvS#Qp9;}OzEA&lph5RP@Atf=
zapuMUukYtCuX`59cly9vH9^m3i#DaMv7XX-y^d>L<a0gsjTbK|{uP(3c(}=;wk=J+
z;grArxtQAdd-k|o_0U|drGF_cE_1s2$><+-ewqB2ie|6jQp##}t58XJdh5@Mw&)d+
zhbH*dBnnBa^p?!(U$E};>i0~cTk6Zjzx8va?lL~C_@{LCcFXRF^JSuHuMOE|h5gyk
zcjxB0u%B*RS$b7|q0Q5F?p_~%e@&ds_sPHYdq3CcerP{9qi28YW6zb+3Hx4iR-aDJ
z44KO0t=n>i)67EhKy_Rj|C>m!Y2CI9+$VW{loUE~_xPqCb<8)-H@^RU$;{kghs@#I
zuf0WUv^QFr94_^-y5JBYF-5@f>76*gD7RBR4|)3L3I~^Je%zEAGQ&t>>G7L2@-55?
zUP)gT9=%lBocvGvqKJOF_UhLy&u%9wA3DE;`{?EU_K}lp?Z2}+3Hr0h?4L5>teLFa
zBeq|$C$F5Ve3-QOmR?3)#3pAO-d*M5KI%E*|DXQnWZ&k#CT@2s_tj&!`?~UVj9vdH
zCVfd-R{G1jb@B#H-TwhkKkoVPSMkNaJ5%54>8`H45wcY2=jZDlSANYoVc^AOkt@8w
z?$||xa|?9rGi(K|V$S3%cCIY5`~399$;0BQN*qsC=p4GX?qlnuo+l!4@1K2*tXunk
z{-^Jk7H?bNr+!5?i<5WN)J?yxyQ#ka5Gr!>YGcD&bBzrLYz`JRe@F-xll-OMW65`L
zeaQB<g<ETV@BOm;e~G*B_CAZVxf)mOrma45{`t%Je>Xx7x`^65oV_=^lU=3AP2<o0
zw>OF&KYo0pcE`z9pZC-G<C!In&vD#ce&vr-`u}@#eq7HL{nxZT_3W$vRnF(G{K)ch
z^;t3{Lqh0b{kOwM4y`P8TpSePv$(=OF=j#6miLW1lb6dh9se91S-AE4I^)b;lMdfV
zEYkT<tMu^Ilkepr$;}EE9j8VdIP5XUfAZ6M{#S3O>nD{z_}ez2IPB3L@q=nr<?GK2
zJ#hM!_wtx;;FAJ_x8lFe8e<Ah-#Ax1jd7tV*ZpbfPtUEtb458aTkHJ7^RMRbI(Q*I
zPEYEF@Gi$6YMJ$=%Rj{Vnaf>uR8~BC@n2QDmy~Crh-j{G=IpBqKRsv3EQ#XaoAfTR
z`8QXugnH7Px(4qB2Nh;EpO4;CJK^xRb(7Rne(n8m(?fuVFGJKksD&pg{{PXq3g^N~
zBeD0t{a>v4W0zhpXOm+&)mZCj%z0TMsn=8LdE8vsgDfVk<@<MEUaQo({K@@f_bw5c
zg@*Gd&guCkYLgUq;>I$?*Qeh!gdgoW`t7>GUY+A?Hc=5b9G`qIdv%aq`s~x23j%n|
zPZ%y#SofT9L(0m9$DS0;U-x05M4VCm*%ussqCYJc{LOw?%PzJkrqnUpc43TJ_~!L<
ztR%|frsmk13KfToPM5oPVeW&@uqpeW+t#-!PH?|H`}*#`RVRP0pWOZG%GZaN8UMxC
zCgmw@?RfNKeva+z58FzW*cxM^SQ%e8>h5%kn`snj-K=|6R;9-JZohiN5jC@wMsM0B
z@)l+W|Nq5%_@$qN)Q^t*pNARy_?y4;36#X;@ozA_)Skco`{e7>GxQ?cQlG{&KUw_k
z%%Otm^DCamdx=avVLIhB>-FN7d!y>>IM>~EvRkp9x%YM6yPr0f<E7`{lDro5>`VQT
z?U&CMEmao%?X2?C$H+$aye)^tq1ThYGBa7dS+6H<u{qS_QAMfI|A^#7zFGeoPsKD_
zMkUM&-JLMW?AkZ;_Un>;O<fliz8(-hIg>Y#C-zI?@dUT4KfbS;#b~_w@x#*&GrsZf
z5WbOGqj)TOW(&(fQ;Vkp=8vu}+I4i2=@z%Ql_rrJ*JL&9zL?O}x4X4MaUVmG(O(<0
zMFslP4$6oImhHH*TFdlr(fY`mO<N|my%3sd)|hGe=XK|bto<!F%2KwQzi&59b1v=^
z`SO4LfxtNZQf0}P5sLp0)=z)AU$U|56{EyycX?Ar?z^>l^V*nB`B*>wnX~HfmHy=n
zY{8BdOA4!&FAYu2h?Z;06u7e__1JllZs$NLm8Q?->vIe}k~;m3xv$85a9{GwVfQx)
zqmAWrfAz>p6z;edytP=7H}&Ygi*oxs|NfYqe6RY)zHjC3=e}-UA9OzbT>Q6>ffp}*
z<OvYC@F857%jbWKO6N9nCZ@dy52UVZOn$I&e}-}f%iqc$vdLQtn(a=XKCtB9{zAKa
zbE$QuUmwhKPF=R)mBO;<?fbo4eNt~M<m-ET_p<K$;@^1zjus6&1s7)Ce(?31`ukV6
zE?#80dC%Yfqsym_a~Di}T*j@lscuHp<%DJdL1vTWdP{cWWgmMCj{Np$Qw{j{yR#{3
zZukRj^X2->I$oI^c%EVZN`39SMQ#h6jwbd@yZ$8ZcG0Ax7KS%|-@d>8(Uu&m3+)na
z3H#4qIVIW7BN)BZz2$50M8EGb`vQg3Te)_Mrz~DA#cg7iJpc8Y<I!%W8#$V}Z8y%^
z^|gGZ%Z6|4hDSc$TC=>9HRYh&rS^u9i0CVwD#{_z^7nl6-X}-0bhPgL`1Y&EZT}xB
zN?hv4%iRlV`}bVvaM`}!Od_UA=-2lBldrA%{dSi3w=CNyU;l2{_pYAbWx8+T>JKs2
zpDMiT)UAb|URDtNssH*$8sq)-;ZARAPlhhAZ?Vo&JTAq*>H0>#;D?9ana>pdIa%vJ
zOU$ubcYmmE+ADnD@+U`Db(Pxlzfx^RLN#qOKWZLx4Ar*%+ud{b?t9_I+aIfm-Myi1
zJ+U)f*|xs?{ag0VN#WkrXLG*F&p-aZsp)RZhkKE4f3<aJKl*mqz9H&aS=p;8ToqT8
zc5e7(e6&?&-psB%D;samU-LAV94o$@`61i;o6`0=&aF3|#P5g_>zm)2;IlGz`?l*J
z`JdV(C=@04f4Z{!eme{M(&M&=?;K6qnRwvEvgozDE}7lhW2drl_jJAQD}Ee0Vxp5-
z{885IQLI(;6$QuLcaJ<R<cpp!6RN$awSHT~(bAi)Ji2j<W9H_1AE{gYCS`Bciskcp
z&WXys-^6k(G&DTUXqC^`rh=b|O)DmQq#j$faBb0Nm6z+K--o}x{yOMqi%R6z_v>%o
zS^Q7yOPQ^b?&pAycWVCZlZjV<y|>!G>RzA6{uzmU3-`?4;%mJi^0CY6C7rh><b_A=
znZdaCQbyMEkBjW){M>Bk`9gVCrumeI>t&lxJXtFL#(I{|^x{ep-tb$&`S;5nO_gbT
z_gLqfRC=50k^1=}yQcpr5<Wf0uQ$5Mp)LN2CZA)<=>t#sZv;H=64@uTB4vty`767p
zyJvQ+-ZsD9;EnsAU2n`XZEN;(8VavCsPyCy_w=u3%<FgQhAw$~`cvjZM`5ci+h5gR
zs($zWqeg7S`&Un{YG3`gGj!!;_2+-9%~m}9_eaFn_12}F{qc$3`@ZjKTW^;9ciui5
z_Y0oaFJydh`t<d2-OGv7bpO}KZPogkDABy{^Yi_J3diR4#mkxpoPSVxPf2v)u^HQr
z#?4o;bBHZEyxw|RWFCKn$NH0N?Xx*2y8f7#e6zw+UN0+DJZz7%(^Jm})706_CmYXI
z-6!c37yVac%cRxb^=o3+W?l>Qj=A#btn=>e$2Tn2@$fk-Zx_$Nz!&N`OTYUSYxyPd
z_%%WCS>J=MmTdGauU~uT)$#h+)%E|s?pnsbwru^aiB_wx?KA&cCAmI)UHsa*t-r4O
ztO;LRxAoTEU;CRks)=S;d=h+h@aLfx9)YQB>hJw*-p@LwGutV4kL1*Q?|DyptSG;6
zi&fi4|IM!VTkKX}J+yWC<=nMpJL|S4@8(}ywrPF%?scl!PyhPXt*Kl8>w0!{!3Gb@
zJ8xY7nO?t<P!Mx{cCtGA@k!G&?*DzK5umsC$HfFr-+c$KMMt-3i=-W7)hOJxJM)#>
z>8n+e%v*Rj$0=nPH)`m`%LN={bZB_keC+Ugr?mVZiARD$4wM935}Ex?H_F@m^w*{i
z6CwGBH(x0Kd3W!A0sq!KhC6=GBmGyWAMSYg;k?NH9x<on;$@%ie)@6ajqlu}okCI?
zVezuAr9TcPG`tSA+^rhf+q6jQn){9~50)-|>LEBYm+=5cLyHVg`5C^%b!!V6OA?yS
z{kX(<@Mw=iNY$e3PS?|iLhG*Wv)@{t9kW`@^5eN{;YX?kojTXQ`mp}i#DBhhW!h_&
z+11b4^&x+9htKs>MY7X3d+z-5TmNU^O`l1zk7xL{s@rVS7VWwdrkx@CsU_lQS({;N
zkz1#pH=D*wj)Lc`Y#K*)R6I=9No^L9cvBNS-+S%j)fG)03CkwPSKhQZrT<LhbW*cO
z(vHpQGdL&IckuEXTyB58n7P`vepglcZ=bV1Qg37bT3pSsVD@|B%Q4fK`NQNJ2QJ4L
z#r)G@O;BdD-@oQT^!ojab~K)tviI?Jt@60F7v^%sUtird>zZEZ^o9R9J_)Z`XkXFj
zXw2G{+LEq+@5P)6;jw?26xXs&eY>ViKp<Q6{hFgIB<n9|Uz<>CaJ}K}g6vnKcR#&$
z`H{Rof6mK8M>T)H(*JS$>#D7pdihcN!{54!RvlFpzIt)K$oek^Dcg9mSa<>wEfz+e
z{8Knf@aSu|dnFyQF>Ez^eBC%c+$&+~G+rDcQN?;tpSSYT_0LkzPZg!yUobbu*K5u6
z-nhS2)$60)JwE!aH~ix7(5Ff)mj7>aoH^Rx+x9fs^ZJq2oeO7e3BMfsYvbMEy0xM&
zHrA~?{9wVHz3eNTj%~Tz*4Zv~ZRNCS@&0`}#!LEF3VIkwzPr96O2WaMDNoAIwl{BY
z*o*!1Bm_GOq})!+%wx_!WF)Y4di25F{nq-oA{ROzHmYpTWqxBmtzSZ#P3PYZsqJUD
zc7ADazdc`g)uCUvW-YlP5?mnrXThFJ=L$MFdH*lcn}5w^zs=S{>x|%kJEFhyNb9y&
zJxXV&F8>wBv9C;j`rR*$^(K>i8NZxeDdw6JwnE?*mkita$E&SBA6EZkYg-oQvhZs1
ztgQcCTeLXs#W_W6Gw00|%Y9{MvPM#-w){#^&HFN*o!4Hc)>q9^>Rr8io&oEJkH(Y5
za=gSMFHi40o#)tiNPKx2pO8h~za^KvCj8s$BvfX&?3A5ryxzN)ywf>P2GmPGFq_0G
z7d5jdcR`|7teIDJ&f#r)vTo10b-+>a>#@3py?Zv*T-Uo{9@ixEzaUbN??+Sfn=Qdo
z1zM9PzAv1*ucNka&)S+Frn38{uuPs5_inbx=0GC{%fk#4qMts`O$cmd?-tp!GxHDU
zQyytOj)Gqs84{AF99SwR@P3_aU`^)f)`QIL%&o_mycAg~4?9X*{je_){bJt1yD0v}
zWp&-Uhi`iqUfNuc^q|Ggw(^p|+{#Nn$F;-GYuWEr<qx#g_L<T@=?<fCs{AkJUh{~&
zU(b8_i=;h&e>fx%d@`Bk-QvoS(ti!@@k-7|gLp0kr0lY<TNSflS{u)L9(G9y8})>%
zUwNip4qf56c-f=+R~)-_Y^tsv41M?bN!FQtA7|e>{p+5*+Jo8E{r6u8A2grdso1an
zW3h`bd+p=2>FeX0yiYBjzCwBGqz7d)B0l`tvqa#?^J_ms7Iw~@x@(pr!~JUZnn+8b
z<3DS|3*K(s`aP`3-EZw;q3AagGTo!DPqU9(_$Z*}OS#Xv&M8McS5EB^SvKp;f7S0H
zK92PtK4b`YK0Utj)9DXu45Sh>KBpe!%ez*;hD)*YlzZOC4u=AHy944p`4J8_-0iKb
zUFS-EczwJh^1)-P&4I72DlH{bF4R5pIm`7SAfL7Q<K_m2X#$5Cv^Wf=vuG`BS<F-~
zB-$R7yyMoPbRC8l3&rMUh(ve)(6Wo)S-R$X`5}2Zm&cC0-%S_R&dYJAKiGO%R;h5o
zfqcjH>n_EYOy!;XwJ9=9zV7#hr$4!m=+uY`o0@nEZDoC>oBhE(QnI{`Q}0h?eLTzN
zP>1$s&3c7v?$n67X8G=0W#aM8er9vvD#Q1m=gafo*PB@BmiqONc9=`^>!zJ16X&k)
zIkq5l{|;5PoeR_Fu30m6_vfiK6*)dX?X&yV%TDLMFC374{O-j%wRrXNpFZ#2zuGYS
zt5iViIZcj<=0TB*+$?XuWO>};(kA-K_0XReI(w3|)7zHyF#g&9m;HgyZ|1-04i9bL
z%{_hTd;iIg8?&DHxn0<{m7}Dx$~nDn`h)oyjZ>9<u63`OX%zcIbJj7A9P>=Q{lD9|
z{~cu5zVU~O*sn~-Je$yW{_CVyDla|#Pw?8;xc48Nw|$Jtzx&Ry|K6UbF&(}8_E<_w
zK5n?w{Y~&)oqF%qigIVOyI)?NKDqk+v<>fa^n}A7D9(T6e#x8thR<8AYv+W7I6Ajn
z?(aR)+$W!7nmt)*C+Cb8bC*PY(!X-*DC3kg!ABp3ICx~Q)VM27J8>=cf%*DHhq$<W
z^X!|7?0$GRKY4V2ub0x)yWw$Cww4hqrv#^d|E;W@W%28k{aN>&^VAl||JrnzeTPw{
z2wSz!pY`GL&6|w)YbJ~K&dfgH`JiOh*8V&5wuT4(GY`l<|4VrN#nt-n|Ng9>=W^}a
z{vRn*Bo8qh+aG-RhnkK+t#b30yRY<?o!cR@pI1;)%_bo=;=-@<f6Rg^TxZQm{`Sn{
zKg-00hQ1$O&;PLB=hsm;D|HQ_2?ipZJYDt=W>?StRsW}L@6-Pe1ujmPaXk3NrAc}H
z^g}IwGXGxE-M#l-oK?>6e@pi@l)O4!*z^CxgDK}Yq*NcOHP(OVZ`;*z;jzN0khR$h
zrP}O&)Ld$NARIJNjz4R|{G!uRTTWh|ACPvt%fw)MPY#FuAAza&yFdInR?g$OO=!_^
z{pH>pjpYvVihek>W$W(9*zYe+2d%E2V{G=-D|bunlfU0j8ZdY$ve?UCtJ(kM;h$=q
z(>>A0kFb56bNO(|sdY_1)b?0(FU)sdD<yeq>Nbw|<%d7GwzDsIYjXUL&~-ICJMP^*
zzxT)3^WOU!7VP}bBVj_T^sP7gVi~nu>$#pTKV-%#A?OfuW6F)+zeQhisB#!G39=ti
zpLcUfMd+MFH{MTId*vDLD^84<-Oh4BSLB8E0j~<iiR>!3r#^||<a3G=S9+IOvoEBq
zW1*L2%!K~!VrQ$&rTc~WQ}2F!ckA-~`zEQ%Zi&C;pVl9{seZx!Z^@zfTOAe)Cp<oA
zoOe6)Q5)M{)9#;@o9c3&ep`8}{RiXuw?}3_%hvk;S$x`T&0}X4x%@r-?@D8b^2$~r
zp6A<)rOIpBTW<0EJi&2v&krHnlPC8I?vB`1(a#c`URv?<?>m=gYOG2P>4EipR~KYB
zh-I~G+)%;S;=;AcBE{oOylsT=KklvRS2iSnoo=aXym|FOu@r88zS17^AKDA23Pw!1
z>==9N^+y+D!Bg1+_0PrrNA8N3R%GL`IV*ax=j)kk^&1aP^2!uByfm!Sc3Sac7P<b#
z{pMvd$Cx-ebLA5Koh9Pi7OuK~d(G)~{k0XBoR{zD{PV1&;7_>eoEN%Mms3`KP_j5W
z;qLkJr{W^p%2)H={WWXH_oO>2^_PF$-LS0A`_M1(&AU0iZdluP=H|u(MSaf$e#R5_
zCuIM%*z<9nwNzlC8MpDDJu!P659A-Tm{XW#ePmnr%D`z}seh_3UMm!APD$WBc12j?
zSkTFzfq&+d`(1dZ`aDyzD*I2e%)XL$a_ycCfAh5&P1-n*sz1%Go!`G?-_acIuyg-^
zxK_BYFFjm&qQ&2}u65qb(g}exUapr3aQY$jDC@*zKCNqM5{2!TAFhiE{?G5U`c+<-
z?SH|!zwgH_-~N2%M2Bx2=9=thXDV)fIAhwMSGzluPj$Vow6~4fy?3pR!`yr4e#g3Y
z%(ju*FUh@gb?~uiiu+F`pZY%ee=wu`?-L6D4V%l>I9=xcuMsxWSY*#@rmk!Gu8+GF
ze>QRbT`AP3Uv9tO!;wesD&LW9j8TU_w;f)8l<TXIPPvU{E{ny)sa7|CY9_V)oxO+c
znO)ZO%{$+TJl%e}^+;6ez1=#Y&i2|fEgp4naC;v6qb=}o(SkT#>9$XPHon=;EGpMJ
zfBdXH_AHTM$(27l_&<eDpWgXXyf?|GV%O2@;^L2d^eb$BMOS^^@UxJwxFGzrd7ZF$
zQ2M0)zi~0%aXSyY&FcAPXJF!U$0qeZ*R<K0SNqPty!`Zkjj3OJ{7$3xEB0k?x0eXo
zua~x|Kgd%qqV~JHyy<kv$`g~mS5~#I_x)K|lkLYQH8cI>W8ok1S`p=oj_E#6ZZ6)N
zKmFOCC9Imp0p))t&F0KnRyV0&-Hg0S`_rq8b&u^o`Ec+1NzS?{KXoPsb_l;vc>L{v
z{lAHR7k_4Fco*9ieSWe}uyV(q+DTmN*gb#l?A2}EzFfe?-DHN}-Zv55o@?K{skiv!
zyrKNb)Op{kZX9t~uu(?m<VSVcmXoqEPmVlOc;9&A$~*C=`*an4blv)2a$4wd-_1}@
zn>Dp}*T288ZO#0pUF%N&{{Q04AEA_|`^{hUX<c6Ll(pj2+3WwFn7uvuNoU1c3%$o1
z`FdtPTzQ)R!EZe~zr6BGA`#cu++Wdhux97`-0)R0^ODuBJ(~PdaC&L7`jXh^bGo<1
z{<`8gC%kHLVW8UaZ1GE1;ujrNwD~Xo{YBhf$&*R{T{W~1D^2TLa_N1>?|T+4ttV=9
zr?Yz>xH8xCxPrZ4`EBXQGYX7KpyQ9SA`TX*oUYz!KDDld;a2G0eU9EyvzyCjf6m$Z
zd5Y!v<nwda+~=-O*HWI)Ca{m`G{483Y`emummU&bT)_*E>bku66n7_ZvZ2S^H(VLq
zDsCRG4NOVgtKD|YIF#qHr2K!&zxk`yul}?7{{Nupo;S}6ew|xAFHLoE>EEkG*MFD)
zI#VOlc4wBy{)%T7tWLu7wB|YbToin$R>QEAY4UrvkN{&jRfEEtSDrgw$zWf8o4LGA
zYIm``^b3!xw`Aw{GTqqK@AN15e1u`dT#G*$3d%hX#XH|D@>1t`z^E|u?<@(OkV8e9
z=VIf&&AY`IaIUB*?J3)I-HWBA*?V=%-^@y=VP7v6`}3rH_42OwzJVX>PXB#k_Wx5O
zPnzJJP1l0HFtqvxxm{KI*pjk<JE2p%sgT7oKzqjdSL#2M*T2YL{ph+o^OQM1*4ZyI
z@?qDWFU!JZeOYZviKd;Gr;^a^%>`~u+-*(~{4=bN9pd;PBBbnLJaM7j%qbc-oSvUZ
z3H>lbXO?DQ;)YLkJ-qf+a@$(^-B-xI=VNc(#$1tEc1dii-`y=sqo-<XKf1hbvn9XJ
z367pX6W-|-jB+#oE)v(d(`j_i)3|iK6}#lAE0<%XA0EyPej~5!c}bJ$*7oG!n&p$%
zz3AWg_V&juJL8>h{5(6Mxs1{KdibkZ{oa*!|4-J~pUQl*PtP#+TT`jYVzwzy5BQ0<
ziEmDF=)Tc?H^b^Pn;ZM#f_Rxj@A(*0UcG$7apqrX>HqD$YuCNl5?C~yb*1L*#LPFl
z-p$LbtF7l!fA{j^Q(wQ7?Xl)%eOJ#_xn=*o@YyQmyVKeHMlOpvmU8n8s?R7?zj;%f
zap1~jj#KV(w<ZQ%Q{;L&No>mW^}#`3>~;$ZNVIu~$}nnNnV_N~)Y{UNup%HwgqxMg
z$zZ_=4L2^<7WJUY?<aSL&sdpRaqZWamAj|D^$c!&P+s_?=J6gIGk&qZZIf1YK0kVs
z^_6~mb}rlWnjim!^VRR09$)=+`?@&6)w}Zj|EGL3eK70Z{%hA=pFQ(``bI)S&b6ST
zG3C#<RhyrhmFT@*nxJ>sN`2qI>j%x(Z9aJZXt+b>0;k<fO;Lrf6Rv+dVJWa+ZRpCB
z+WnIzFmMNW^cpd4HL>IL<(SRDbSEf?(d7MpFO#TKcT)VOuQ{RoLN9vJf(MK$A``TP
zgjkvmI&Pe!BO=Jk;^<@$pd}*Aa!9Rs?LNEPSJPz=d2N_nEEv2h){kAe;Pk(z^V=SN
z>{C9gGC7oQ`QPf2vjHbh->S-;qWUj5@VVYi#~;P>u2;Lwc1`^IPW-#J)%#`x2l?OI
zPIB{n-T&ckS;^Oypv2D)cIj)@2A})Lx6U{F%9IUP-Xu5uxvo9=@2mbFdtyI5i4Bh5
z-d>(3H6`qH_s5(~pZvSlX@9ttv06p;=vr2eOeQ)0_uC}iw@ut|pl#-n2Zq|uPwu#<
zT<DPy{gt_aQB9;n!-<=fQ!#Ntgho3TtCM5Ef)EW+&K94$_id{0eqQ(ES;!V4pS-xx
zSDeY`cfa~$JO6IQ51Gx*e><9a>;hk3fARO-yt(GD7yVyTS7A~Ze=GMNr|#|h|0}qz
zus@8f?M*%QFXL`~++uYn>r?JqkL|wy+{|UN=DyVIb%DJl)k#UY$-jSBN=|6mC~;MN
z&qbv+Io1D{JF=raf_o!#m#@8e<j{ACcMH9LtIYf{ZQ)f#S1s2};}DYxQ<Y+Eydsux
zW$dtER1uk?A_{W2!-Rk$32t7Ng8?%FT8z0^jx+{z-uYeq^ZD`Uus^?cd(FBVWAw@Q
zoR+iW{U1dS=g!&hyT0So|Nky|yY2JdTHJ`p{Pf^*rNFOqmd}^0-|<P>a)rO((+?*e
z{Cu|8^sCx-(GM~w*KE9c*Dg3;uSaX2z^3Ji51HP*s(&B<^47bZ;Ya6di#)nqpHlG3
z_}c$~FZ1uuIQqKkb5z>PIqs<i%tqed7H2891T7M1N_upmD=LUd^TNCd77Ppmf{wEc
zc(_>0EA8$7JN^FpM)CFE&4sm+XI{szt*!qko*nyd)9R>KAur}Fd;IEoQEXrlUwrJp
zb)l=v@7`zJW4?gJ@?e1~4@a1Qys6eF#-(#Mg!jvOT;7mn&J-!Gu)ZbP%_u`_v7b$b
zsJ>Y|6UR*Ewj6=N=&5cieV#{JKC8C)9uh0n=rcWP;uc{e)6ZL$`$gn^zO(-0ynikG
zEOX26XWd^JdVkl~`1&>RtIOloOuD{%_4}>K5d|6(V)`>L3q2}}{*x_ZBO>9xXG?+h
z$9LuRJev-jZ9bEyKSf0&U*^+VqtxlLOBmReo-tsMx^!KJfn!p`0|u8V&ltYEp3(ld
z+PTK0JGRC5<H73(ewZ)_x$rUQ`uoT*UXfu4YG{+5c4F39M<vb4=7(}p{2gW0JPmSx
zPxH<DKGC+RHZMtF!Glv5oJs=PZp12NK5d-+?Z(-yXV`dk_`Xb^@$_!llqu3Y&h?Dk
zA6YBDT(_<%s6L%JVNHvNjmi8&d;dw+b?i#}dP>aVa&Jww!#3l4v#oy{#=18B-q^YN
zvy+*?gcJ@fr^5$&71r9xWq90QRGqoGv5NcD)@>ULR&Y32NzY+O><kD=KV8F+m@T`p
z^U2k3EQ^)qXLenEe@)M+`ft_x?vVIfSG_v!n|{`wrkNhv`&u*QkXBr1+3lkn^Ai8Z
zFXFo^UuXIJ-l4=Wk#-Mp$>Z#Q5>IBo<JKy=U4J$vVb#{xb2k1p4EsIp(dK8&%&tA5
zE&N*y-dyexv14=6_0u|d?9t_oXHEuYaji2rHNkzW(W5hm1D1zY*VIKny<^Do;lU;D
zd0tnqE~)L}RdK&NJ6LVrv6IPyP0YuqHLt3#n$R5a{7-)6U+;U;-)%m}#%xj#e|^24
z^N;)a>30q+J>$$Z{ZnlX_m7&KnjD+QD;xRz<sDyiztRhd`TIuMVLJ2XIK!TFhD(L3
z#CE+C;<|q|G-Sq^7dk6=7EIYMYpk<AI_`m+pZDMD!;-dDnT6ir?{>Y7UKJzoYR&#v
zk6j$~?mjc*QgpW!iF+aPueHQ?UGC+(HoG?$#jE^TTNk_B$kj>WN+iGN4HvFmF7k$U
z@dxIx205s0mGoZmSt>xccK#3f=KC9jZ=PO%Vg1Eb(~s0Un(uMj8ymK^{MMG(Ys+R$
z6Y|pjv}wZg$7<5kFSQf}n_Sz=-ub|HZc*CB(+}K#JH6WO_AdMK+tPBuil<M1UAedZ
zUEkf`>*5!CC$CUfRaEXjwy7lQ<lp_R-U;ps`zAiTHEm00iAv&^&$r&i*ekvHzTShu
zVEZ)}sm=DAo!;y}zxPYsMC+`VxBHE>J9R94(sjkwUlzV;_q~*(VD`)C8<GotPh%{Y
zdnYUZQf0~yhXvMtkxgH|=di99pDEuNx68z!<;j;XYBRn}S6_H0xBmR3(plS7qra~D
z`ugkotLv}scRK2s^6$TxapJtG?YdplZ8IF7IP5<an|bo@edkwgyUY~bx;`8^{-9-v
z;rWoSC)^V3PPlB{uOphHDe;BdA+zZ&YhS*dzrXIWACW&Ni|t!;ncZli$^OM56*K<+
z)HXjVpjYH3KjFl)HP<dy9NgBll)psz!TWi3YIp259bWo=!S6@QPycW+VgIqii9L5=
zoT;A3hs)Du{piS&T3>Ux;p@-iQ#7u{ZQNs2AE=RDy}rC|uf@91MmKr>rsh?bLOnJ1
zNO$vb#W2q}Gu7xp>-s#7@ACdF+cnp?|1WQ1dOT%!cmSX7S<`P4iJJsO**7?b3wR2f
zvle9TJ<PYD!Hj=jBZp={#HED$2Yt7EIVHhoa>P!|DesSXzIS>0ixrDM++}m(%(&(I
z$Fk8t{t(-<MK51^9ltueoK<)hr<i2;zek&=C;eD(Usq`I`SKHC3H?{kH@wW5uwJ5e
zssG_!Q@!NQ{{MgcOrAvfEpeOmj#E1${w_NDe|n6U@vqgpBg@wLzP|m~*|8xayz-Ow
z?!v-~3>}5OZ}0v-WNJ=4bMAlN5k>BZ5OHROj*Dw8cKa9bu73XBK18IYqhXt_;k~>5
zhJVs`ADrONYRJIg%p}3Iv*S=KThqZKCo)%OzdX&t&bop5^PDJsyBvu^an8TO`#DY}
zU-dD%YNh>0-9N?i)5hB8>-#3%+p3>@>V0bUS@nOMoSn}i>-Gy8b{xC#_22rF@8df*
zCrmImi9CG!;mf=g##L;E%M31wdr!I)pV8vKUy19Sm~gDuj<dylCR_Kk9bR45{69H)
z8z;*}<41cs6gQ^*&}#|EO44sVv~FEq?ytC}?a~?7r^igq()e`x<NO4hMVxIHKkrc6
z_w1<5&#bzT7`129uf7%*bFcmWL}8Qdz3&}vx<`~aFF$nXxOzjvUMb?&tA^Vm9ST#J
z8m6q0In8z@<y4UH2Vag2pZ{+(RTi?X$TQ5Jy?Rnd{GX<WODwqMKRjq$uQionzN^3d
z`6u7sAB<IUYo9ga&6N}T=P%0Y{Of-~Rr6SeLe`0J_gR^z_5UqA<$NUnyH?ur`wzA6
z-(BBi&oP5lw@gyoa({7R{T~CLsTUqAbG-cJ(dP61^Y6!bYi_QypSREb#E<!LZ{y<h
zD`xnwO^w*kQX78tVML<nQ;$O~T_1jKKDTdQ`Gw?pbuub#8p6uA`<lEwum0XHdy)O!
z$7Qpu+hxpePyhc&?V!y}{n*OW%=%8#&U$5Y9Za`tIA461$NDX6b>Mby(fD1w?rB-m
z|2v=MWwuLYog^jOqqVl;MMAVz(N>W!I#Jo_H-Z*R^{slb<WPQQU2~M$#htoEN`Lm>
z?`jHf?THK4&NyhGVEex3o^|)_{LN-er%bubXBoKMPbf2szpsCN>DTwycU}*&(o$&(
z+xvOph497uU*)XVDcQ6jLs+&#ao>-bcJ*f+Hhew!C1yX%vRU#hzT!_O)ouM&q`#;C
zPurcJ>X&@Ix@3j*-ghnAs2KQ2?C|W>33t>VOtn~QcUXG2?%l}T|9>)1oo$qHJe>Ku
z(eB-H?&C)f_0+^K@{}}x&{V9)@yC8*UD>rg_qa-YP5xNVuD&wqgx40SE*mdr@2CGy
zb4l*qsW5%jgURpjOPM8<o$B}ebA#`z+{zELUQF2OE@@GHR@cX%|Mb%pMe6kzqHJZB
z&hHnp;QPPglKtHnvFloQ<CebrBXGTZw{ZDc|LdOu1eg91x-Wn3c%Agr?kkH8kGnGe
zZ|68rFt=&(m6Y`F|EH|`e^5<U<kMG9yXI@s%XOM>o!@dzFz&V*=jnaT=6*rtcCwL*
z_33B+$j1F^v=K~KRq^+iF}cIM|AxV;&yzY;1(P2($Qf(i3u^kjC&R4%*rcaBN*-_S
zdTL`J-`i!ih3Bc?g9g9p|2PyY<~AQ-^87G=qP=Lxs}!3(eIJgwakGX_e0F8)pM8bx
zA}e_{_~gv>jvOicY#zLH$J1|Z4O)-7nmJMq-2AXSmWPG^LZHJJhE01ISlOj|ST-LK
zKbhEa;9he$$6U!4IhKuE)@42pxc>C(R7K^I5DA5(wT~Uym@6LDzrJg}drO1U68^30
z+U>s|kCHqsW|{qL$)2uh{NnBH28MUv3bAiZX4GEUnVh4!UENE@-}Xb&N7Iaj;jRMa
zN0<t3H}hT=yY%@=h{KsH7w#Td6{kP5+eDf_xBJslyPvg7r#bF26X2b3^8V9F7bh}$
zY4o;Ceza41dEL9}xt&}RyH{^DRG5EQ`{mbf9O{NUBl)*`xCP$auq!-M*Kb>j+1pp2
z3Y_;R^6j+0e^9P{)xxeeMO6{w<@$eS<ZSd5xN!Dl{O|JJr}^GqpT6ts%5{6%wra{Z
z?I_GD@7wuw`{Tzd@zd9}l)t`j5pNs2W!eRa<<mXvWjr><AG)&r&O#UE+%1JYzjvok
zfBm<XWqlQI_6z;on3f+;t+ehf`d>Sd$xwf(_>Jm$8MWV2VmNF6><W-^-JiSp&b8KM
z96X_Ys@2>A|Jf`b$bG1qDawDlvm(!$Pq(m_|8>y9&8sGR8=F}Fy11tLz#Wy{JJWL|
z6x9;v%ulh}k@?=kcG-(HK~vRv&y~x~7qH@rUb<&pcRWLxQQOHK3fy;}ORI%4aNM`3
zov84mzfu43R^umb9VX|l>f~EST<fTKuq5xr>)Wz_1w|IQKa+}Ruok#{DTBF$Z~G>W
zq}vvc1e?A;_<NJh{dapw(cO(AsneZHu6Zxgd=Ys4*D0YCuF&F{8{YGlM2OG!KB-l7
zT&m^JD%;8Pc~flaLQHOW`18z}csXT(Va$fjm$z@<xi2L6)SC2*w%gA0Uhx(Cp2WOQ
z=udd2^zG|smzw6SXwYP1lG(|Vy0G{`LEWE-#-jbsb3)duyffX!cJr^FLeuFI-}lc?
zeK28L-QD@k;k548ht-VpKW&)4`2*kI?fx64&Aj-gpKtA<nEktLE+j3xvM)^a=Y@vk
zQ|bCQy`t>q&Ti_tnJ6t_z2l461_Alq`<Cg+?*CDH?5ce551sblr|*~i-jn=ExaiLQ
z-}82^xF_8;`JK$ur#&(*7qrhW{>J5B{q4J5{O6Ao;#sCY6A-_9_bpraCr=L1zg#a)
zIkuRekF;<(H)rjdl0%xy9<<ombMmKEzkh#c)t~*kyZbhuc9rLT%n+4uwC3KKD}~)f
zTWUl_x+lC@&D<l?a65Cx**$AlUB9a)^zScIkgP-HVxOBKp?umxeitg^tg@K}GTpLt
z8}}<&z6<2~_fqH!-;sU4GK2p_3+?{+SI3T(VS~m}^FEt9yBXfZ$?C+53X5u~Pi3{)
z`7Y05ji_T}#m4}v89!P?1smiQW_cRdMVu0;^^J8~y;f@$&y|YL(;|+hI^=V-^5>fe
z?XsFyG0&PsX5Q(C1`QvNHmv3CIrjH-UY1jhsQ2>oM_ZVtm$m(Q7cF|Pf|=vWwl!}!
z#Mk``ns;05$F9ratM|<My8fV`a%9cwgLRe@XFRU?zj(2=CgbCU&i?+7H_f>{O?&tI
zJ^N!X%zE|q_VoL!SbzJ~tzQ3l&OxbrhebEO`}%jr$9adt_bTvBaFqR`y2bA}KW|*q
z<lnm|sLtH(u72qy@B8G(sXvT^lctGniv7}5`0szhlnIh=e>wV}k84^}IDhTQ`1>6_
z`)>KaIdHeOzBS|7tf}UObA1`tq<%ZSH|xPBVbjQ+J*D3dhl|ge|K3K_A^S8xmqq1!
zUCI06E$0O9zI*rX-Pcxy4)@N)KHI}pZw!~LS6CP8ZMHQ`vgy-)qgi$DoNgOV&%ZiN
zq4m*6tHPXr{#R6Hvbc%H&UnAe>t@NjTf0v`Gxc(6<t`We<m;XB&poNv^^u>5WSz6O
z6^G=6-k<Dke^s(&BQEqV;+$}Q_h0cP`v2GM-lz6v;{N*D3H|#H=B*Z;Zf5eM*i2#J
z_Y*Bw7w1PQIV|tG`#mrA<yqSfjp^@A<evQYJNwh_*Tn2K7HaQ4?Nt4B*oiqP%O$E`
z=uP(N@YAn<ag@dDmb2I$FOGbxQFJh8=Hq!E&dhr!vb)_fGobv^Ovfv}r?*X<@la3h
z;)A!nu@jkirf-w7*gs!>+R^BOYq7VZavuKP^zOaS66ubN;)&;<Zab}IeNAt#y6Tfp
ze^+1Gp15eM*T(nJ@z1|?UeA~tAGWh*w~IiB!}sc&rti0LIz{T+|LpaXYm(S8SLb)s
zB=Iex!d{ItnYh}+uh(R3llz|I%*VhWR_i#+pht_f^o38jZ%2H``mlBR{kwP1THhUZ
z|9buZ_qVQRi=RIK_t(3!wLOuqzpm8V8~2l$<#c~@{*5lj|IHpxXY;o42u*AE+bQUJ
z_Sg3&3x&^heAAZbiyz5rT)V78;*iZOjTJ2E5!<KgpW?sWuW?VN#^cz-d!O#8tKQK*
zVBgfU{pGh6de!&km>XVSe*P$Nnz^EmQEP`|!Nh;Fw>#e4?V@1tM!MaH_mAtrm(G88
zZtLBtTJ`?3k#LZ|H@8i0Tgalb<?3rzU;Q1i(dEyK_cK<%-*<JF+77qBS&`Sb_Hjns
z-o2-knYVr}-_3}3Gmc$!{XhLUpNv(2BGZFCCtc=jW&NdU^_W%7d_&E_)@|Y2TBBdh
z-r}))@`r+lPK*T(W`Uc&Z(h*HcOdDQ@<YL#Fl|%uVxxXlJKd?C%ZzeXEvi{^XT`K%
z3*CfNa$`@~FviSz+~jxPyX?4+<e$$I*=4x%7kpCQajcVjqWH1Kt7*MKdYzNMCG1?z
z?Z}qJ&bEdla&3R0RG-q`1<Xh1FVx(tAF=t-@f_ah()l@y|8lM|er?3Jeeo~u-|f;~
zd`I>vEOVI1e9TOUSGVieeH~fNGc|X%y1qT;&wXq6D&gdBZ!`J?-b6p;d2cQ8!=h+o
zOXIQT`~MX$t>2OQGyQYRY2I5$1vc(sJ>PfDXN^0X)sM}NwpUg}NuOG?!~5ou4yD8i
zih`|udl<i(FyH!A>fd2{jp^3R`}NZWlfTv-oGcZ`!x+NOAT?WoO?QFIJic5Nvm-4R
ztPg|-C6uh=+jCnu<Nb+~Szm*`SFHcK{@VVyzoojX|Ly&?|H!%RhqmY5@n-w%ebDLV
z{lAWhbKkw+{oE;Kr?Xr2qiu@!zc;ch<hXqIeZt3$UAA`^w}0hc={4D~=kna!+y0u(
zJ7}glGdwKnYO#@j-j7YrI+X=GMJHV2l)H7PSi+#7+-Xg5pZzth!@*M~%k#+Ol+<34
zwOjq<$cerB-G**=M5jGe&WX|FYwnnnKFzqFapA|CMUwR@(++P9mC{^rXZGqp?)r~P
ze0lGhPx&;tcc%IT&V(Z;1QL@TXRkkgFYSJehR6MiZ~goI7F}P;S^jeW)94=>tM4ze
z_Z6(Yx-Ry~=l}2bK3Ulo_w`Voqe<rc@8_Gca@Va|$v%x^)$7jdot8&AKJZ=J8_#PM
zU$!yy%l6mpR#$(0t=bs2bjhP@vEOySpPznI<k7{3_0o(STlO^s9{Oc*VW<4TR~-#G
zGPT?Z_UDS0^nJ5=>a|nACh0$y!-=J-zHeX84Bo$EW`+T;{`W(Af@kFtrX}3DqH(f;
z$J9nysEdvLfB)U(58j79n-dsY8r1MHHli$=L1X>VV$QGc`Lh(S)v$=Q>FDMB53s*y
zk#J;j#MgwPC0e_fy*RyFBL1zcDbjm?{)fMSifDjfoY;z%_0BiM9VF&2v0z!dg#U!{
z39G*mtN%vt`ug&1_Sd2#MS=3`JtuKlJgGW#UUehOw{=t2vV;hk{khq`{8QbIn1ziO
zSN279{eRlV<5t)yaeUom3)4-t?W_K%t3F=(-LrV*tFL*PRgU);Y0UasFkQ1t;^Wj)
z_uK6fkDYBTl)snN`@u{xls#~5e<A}TgPiiQnisaq5BwJmv^ZcoXXAsm?2-VU-i0eq
zWr$19H=WUw*4sDzYVPs4hP(MWPp6bz-5>g*I#A}9(yyi+4v&2nd<cxVAX}#^)$iRW
zaivw%w=MpYHIHz?L3VG0(qxa^dVwo8CSRT?Tsqv8$Dx<`AZgK`E%J%whg;9+pSC(H
zeSTwZ?L?*PSMEGHRO4>v<Lx!;@Q25_tT8Ro^`dh&1gy6F{A$JO-qPNhSuRswcfJna
z{mp;U%Uw%$PxIchx41*2ShrouA^Y>(!W`$LCphlye09HOe@bneYBlo%$7LTM`>fTn
zjZ^36brPR-ApFARx%<)=tT8ZESGZdC|Hm3XQ``R=H%r&AWsc^nzWd!UC#U70TT57e
z$(f$t5AS|8I?0sv-=SRe>eia=&(+gxtZsSOD$EhnnwXlcSe^B3Luyw(SHGQm?vG=t
z%#TBQUo2g`X0fN>?^kU<Ri-<K_dWMNo%~IVt*krv;Gd^EpQmxjT#}onaH{N|-L@xx
zwDwEevh7@Tr{5>y<ubm4Wru7$mu%sFDUxs?amy*`NliS*=7rwoeX>HfKj44I4!$En
z>i2hwx_ErzWG}K4IPrWhhwZ}C7xSK|WglEEeSG$tCpRQH{A-^mJfE5LdR1D_rK-4f
z8t+To1Fs)<zS=f%@mY4;!*2H{zh_&%##%~w!KsB?l{!9unfpz3eclVDXGSOgi+$O*
ze)rR-?{B|NwvSkUZGX_c)_*(8)<%@?F-r*25eVD0U*G||XPxyPflc2Y2yD&270=Zv
z5OC1I@g{$N3%7>Tgi`mc$!vLo^PjM+5!h;B%lG~N<8*NiZMj^BjoPcEUYIEe%|33o
z=e(U=&IJLMuO>hBO?DQ@zb*QF|9-{I4|A8Pgg*Yi;`R~GTGd*ScX}KD%bs;*{r2~Y
z=#8|+#tZ(GwlmI_KKpV0-_Y~TC+Znv0uO46@$A3fdnIoZ2j7}WnNHGs<Fj-NrIsz&
zv^%J@@sr_C)9tHj?|ps$Uvi)AnK$)`Ep}J5zE>w^oxEKa#awwN<#9v)-SW;ew{`~q
zDEhwsMZr1C$yYzc2!!hV{n>1g%)wjX@%H$Wk43NfZ)i9j&{q%Gci=WV$4&!|xa*r+
zJ~h6WV4BRCa4xX-n2V{M=sU^KtYs#f-m}er@?GymHctX~tg4i`(vio0v%{>P1nqEs
z)pg;+wLOX1=aqOojGAg&=PbV$*gf%1?Yzqh>*s#BRXhLK6s!4<Uf)!n|H0|q^56Qa
z*@_-|oX%e&-OVm$c;K;^^uI3~R!YoD^j{I~&$0I`uk)t4M!Ej~;~u{Bet0{hP`Br%
zR+k`S<^tb$f1b(jFSS^8WqR%E-S2j)?0scmp?v!8-&Y1I9P56+R}*lOxPMHMr^ab%
zi~z5c;EKh1++EVUEgBdPt0XmWp2^`~T08f#(b_(n1xFG!ZZp1DHux#VX)o7aGVj=t
z#sv}lT`yC55}xQx4T=hniFXdU??3B^CGQuuiEM1Edlq@UUmc?G+$3bdnX7yMS?t{;
zcd}8SOzwhS<kSCm@(VVfUdL|UnZkNN+T=EKPHsiQJ6)dRb{+=ezxV!pxU@%ix9Cza
zi=?F0ZFzcc=dA6?GcaR}cl-b8QMY7&`^my8KFLKKSI-AO&^s^CIR8c3W3$RCv5sE*
z-b0Ju{_6jKc*7mjs(zm(&e{8J<fs3WboF=ioqp)jubjNcKicn#bjjFkw*M~>UiIYo
z^!HWVldQEG@A;hiBKG5VysYoWV-r#Z&CfqwU2&jl#`VvRv)RHLzg@5Re70>@`n^9+
zT!+~7KA!GzzY)Kql{rT1?IorAr7xz}{EOdzc;^cDS5l{QHrUIwh?)x=z0_#pESKTx
zdqbrvK3(QVZ1wZMtEYee@Qs;y>#pYx(LJ)q6jGn%u}osyx~z6J^Yw%II+qMygzhs7
z?da9#oS5)1{3ibi)#WQ?>w_;lSl;)WW_N~9d)>MJtreD?%%8ZD8@|3=c>c7(&WArN
zQolS%`~F>S!>JALuXcVY68>AX`s0h%;`>K7bhO64FTY+_`s1FUe1o%=&kYHQ6+8ip
zl$lxfH<(ynsjq*2_7vZy1xpsb>h!;6YA2tN+frvH_`rS|hw>z$**C%)lA{7@vp?Ng
zQ{$A!!xz2TP1$$0vB3V976zK1RkQq$O%rTlYn5!Dz!C1?!nUqrx>WK~*GZ}0F5Y|7
z+wn~MQkzicf&Xh0BmCdpwF&eLZ2qu?|B-5rxqiv2Yvv2`Ef-$69edCJsgAndqPT}Y
zew=*w`*d>KMlGwB*mwVS9iAKVXyMZI+C{&=mH6EkbXmg}yY+)lSjL7=_1o`${3kPQ
zHmk(HfP;4TpKmxGuY1R|;oxoUxlbPUMKP*>{#N`&>B$s(ez|F_^Y*e^R<!Z{3T=yC
zoAR({S&X{;i{E?f#F!$k`3MQD`aVUiawenuuTAq+lnM?MRhouf$m_h9_pHxqyBqWD
zn&1C-I9u!Y?YsZ0DO>JM;hf#ePj7TN8DGc#f8p!41G57>Z|=@pW8-!9<MW$mq8(mz
z6c#9oH!TYFSshp*$CdQP$!%Xa_ji6#i?1dtJ%x;vG(;><sJDtJasJ=x_G^XB!?><K
zXJ?DNct^S4n$E>P`0sv-e!czwZJ&(Vnx%XbI8L9<o99{)EVZ{tJbCFnc9!keS@vYz
zkl+rU>$CrFoBnK>tqm(U%Qytz<ku{2)Mkr&zTw^3mMp8DGkeZ6IsRYxd)CS~wU@v0
zrX<xqcXeK{Vq5U{y*z90KWS#_UiI#}{#)L)Mq4=FS*>UJo}B#gQliem^E=;uF&6te
z>0jHao6Nh5ZkMl5mGaOyeqryTSH&iRF4~4Q{z*UN+Gh9%xPO-OjkuNkq(5*`-r=n0
z$G+W?4V3)%IO?InBukSGI+n|0s^{KfnS1${_uJER4trK9z2%>0p)7lPMp4Z6P)SP-
zm4hZGY1huXsjqwFX;r$D%WQS|bLY0hA!aJS;+p(d+Rh1&=ChxqAAi4@egBQ}iyt?J
zIo+K1IsB9K(uW-%0xr&3eB{urBCYf*lb`>Od-y-JNj#coioC+EkEgn}bW~*6PZ4T}
zz8?Ma+Vtx}GnSk%j5cYU{b-wC?W-sj9w{H$wmZLT&xP*w*Ztmiv^GZlqix3C<yPNW
zoMi8dujH)0e@W=3ZOPgRrWaJy7_*Ji-~Gw7zZ(%TrLZAn;c6Cbg&*4Nou{rmY*Ie`
z!>&=lpudw<=jpHS{L>dVgfGf)@|cj|bK;|V;o*bq&G~0KH}mJaZgjD=$(CQl)U;Zh
zc`bXxHtng#UsIcEvUMG%ZcO`Jn=P@wc+s|RYXk(QdCM7;=@dAopI)yKu^{t&i{7n+
zbJhy{JG>|CSX-LGrsHjgr$6wS&35>O+otLbuDhCUO*tMl<#_ed(~kwidv(e^J{^=O
z7muiaxW&KQk@L^N&yJ^EWTkC*m)4{u{dPFKX6iSW>Gkh?_J7#<p>a)C59@)6yE|+X
zv<hyxaK4@|tMcF5^-e@l`2QyE<5Lq3nMl3TV>l}%)47lT;m0zw7eD{7@BdnAd1d>b
z`Q=|Yc#>}%n0#RUcCNW$Ke=R=pSM{r9rxQL=8nhg#I~TBUCR$W=j~j@UbT9DrSFwb
zjc={~Gc$+Wp8vPbpGj!tv9lHJy!^JaTjV)wKi&Qowa2&6&^-5q)GX#nZx$u2v3*~k
zVk!Ts+GW1?lzHz%;+9yoY^{FpKJWi_y{B*H>7O|E{OGOBP|Xh?PWSD*@T6sZhqC(G
zn#M=r9NgPAc#<?FeEVnbxT+%`y5P@Ty?fK&`u%<1cR23H+bwy|;y(RvvJ`P&C|dT!
zReGCBON<rwjdh)3y>B{iy)930X<t!RZZJFk$#1_~3y+q6dRCLas*3x8{iecYlYf}~
z`djwrjL%Q|Iv=55k((ko@{Q{i(kC*MuFR=XDOzgXz2?&2?gtGG=Po$q?Dah{dE#UB
zi4!7@mT??Y-MCAB!`DSjd-LzUd#EgYY~}iiojE<ZmB%k$xRZ0h_R*YAzgSqzi_L$V
zOlbV|^ZyymHla1IIHUrF-=BQ<renv=vxbj$O3dByZ{DlG<qw5p*gvOA91>#nar*jy
zflX`3v<(khqS9i1@c*5V6FL7;*Xg-^Jl69I9@MOl{K0-UuXD|@_g%N-=C?ikzEif{
zaT2@oIk%VzU!~m_{&~Ut=x%mGq^tMK)w$=J6c;>ucQpFK*%k3JrdrcZNQE7}`dh(t
zp=n6b?Ec_?>BbxN`!DI9_;OP2)2~ohgR0Foisf$m^7(i+8o#bTsAzIz?c%m+x&Nbs
zdUyR%H9qL5{l7Au?|I4U?-RGJ=}z@rmF=@Tdsc0xMtunDomn@Ee{-LE7OQ^d+3u5}
zQXdyTDB%)0u=+~magVTrU8YxTtxlaS%{J$ny?f`M?h~udrPa)uuf8mL<@(K&Ute4y
z5TV0UIj_^lz%9yR=cO};1sC<sYM8O`_0KBq<S4eYx3X4P3--;+xgEZ0@|)j#1QH|^
zET^!Cp0M+l&$+PQ;&NkP($=<(0Xc?QlM3GMjNVpL`q}rZy3_j2()9}pf4%07?Cl9(
z@R`5QRww*U-11{@@6Qr1-_N)BEYoh`h^vXx>uNV=gsw4U>GL-ISx~j*@8Rc~Ys{Ng
zh<(qfdXXo7t}FJxyR4b_r&|?=e0)nT@*R4;P^WIW^pPb(kCesg7C-&@T=lryk$u+Q
zQIA_WmHRbJcys!;P1cIt)U<7CyVXC3L$=P``mee*Zf{$xZt$&QU*4m|mziRI*S7rP
z{byOYbLFp#hEuh*JGFe?e+~1laaz1jw6#tB6Vs+s87+olQ(fZI*k$<I^mg8NSaNK_
zOy%R#U!+ee<b5!;>~Hvt+=G(leF`Z7;aT6Ce`%-Qf6XMt`pD7uPHEthxo^MMZ2E7_
z$;rv4vMKNEayz|R&H3w%#3Cz_>i6EOUd|j)5S!Pyd`WV6;{nOiS6_8QI8LYijGZi(
z^?mREmD5EO7TSC=ylDUNT>T&OFYQU(Sx0+1Wb0p8v*e1;{`M)&x%s5oepZ<nz6}#a
z=84{u&=l}*)Sa_+&4h<X?%jFea^}Uns93fQoyUS+&FGqc-KE9pUCuN6sqNWedQ*Iw
z6jdkLvvbF7Rcn~uS(-Mtb>$^i7f;=_i*D-IM7`&Wvs!ia=}rG0Hd%wY?YB>Uzp_o#
z=6k;E`GY@$;y3Sb^lK6oi`P}zwn4Od{>I=-bB?k|p3seaUZ(AoBv{#T>+j;+OE&_J
z)~u8ByfE>+gLTx|UoCqh=T3Ief8H1tahL5`_|{sPlW#jWU(j6qBFJ5Q&H1%$`<ib#
zXnkEJGcQTJ`>oICR<B<jp~h?8n>6`+ouL(eF>7u8K}`e0U6UWQ<heM1KeYDWn|-fm
zG)(LN>c7Y=|MoJ4jq9$6-uGSb+O*cPw{hOG!)3dKx$@rcPQGV1!|J5@?YH5JXV%SV
znUi?$dS{|LOT()J<)?g(Iqi6z-MB?d<bJT<znSk>ZswjXbpGMno_&vdz4O1Vb-aDm
zV9BygZL=hNem;&bnPT;nrSzH3n)B1%?cDP(?$yQDPZawb0>7{S%j&tYbK3lvFOwFn
ze|Y*sW&I_klfR!lnDKnQ?38?`32*GUpS`?rJ9Bj>8_%qeyfvZuv2yz?+8&<JUr{U>
z&r@`zWbN6-3G0_|ah^GP`{oxZy={KG{PuqlReSl}UHtg5J7(6CF9hB*X4x{yE_;o_
znX~e1=DYWqwmgjbWEgK!UHIc%<N9TP-yIQfmzc7rC3=gpXP}z@rQ@m#{C$en6)E>j
z>b%wMCe1o)LD<C$<;-h;2%o)k@YSdOTJ~MG`^}c0e0NGE@?K89<NO*e6Om#;>5T6+
zdlXdS+l&sct-tn;ySm=Y-RRHDRwso6&Hu9XJHMzGz57?NeE)|N7cUh?r(N9>cgkS%
zrj4~BYt%CK=0#@qnpATLMO<0K7qaHgpT^WBN;wvjm5+vB(yrUJd(z9wx)oD@Z=TOT
z>3<xL^}P0jKjZexP2BbRLttCX>1kcA|9J{>Y}u91NZXrBe-rwg?Z15E#;ZH;zFD{X
z?xP=zx0$+i@$KH=6|~`d(agQIO%m*d&PfJ0{#$1zO3c((U$vj(b-ezr|B4J7PUgK$
z&gsiPZd5bH_s8B#X`e5rRg_&0ocHPR4Fi!+@vmYROv;MAXOP+6)LAm?Tl;RQ0}FIE
zINtWn<TbrxC}p<ohW(8{dwlo*ne`=VziUFiOXc1ykMIjy->+3~Q;g=!Ph+p1AN;od
z+Hd1myVUQuyY%e+=J#{f+t}}FqGdlA7&tdL&NAfjVy$0jS+;KW^`)o(@0X1G|NsB{
zd2f^cR=3vd|NqkZZ~XhO+52Wo{;H3A=eyuO#~-#Q3x8~wkth85VCww`^AG+u4F1@;
z@ARXb)yXy7_kPLkK6fF+%cAylLH6`3+Z5P4b!`g&IQ%*<*K#{w`1Mr5EmO1Rg~>lX
zRuF%jrDV^3>9-7lkHSJdIqt3~)7CNmxncT^)Dn&P*YvJF*}6YFsA{9=UcILJ4UW2v
zJ6|vwYyY?Se)jk6H72ib7VP={mwoALhJaj=`}$|6>9PFv6&90Wk*@o3`bWFezWv8J
z`OdyGKi^m0qW3@f2>ba-m4a=nq}J44S-4u~rR0?DyY|;iF$>+sl=bo5qtD6A_nqwj
z{uX-aFum^Af<5jQz0=%|NUKM#D`-%&c(QeA6HlAQ)rWW9ZaVJP=W#VTpzPV@SsH#8
zt3PgdHrqMk7O%t>zll+t@|)`~Ip2_vQMTQ^Y>RH?enXonk2n8QdL|O;u(EJrQ@rUD
zh5K#YdqeBDb1Egr99$Km{%g1ABZc4QpA09g4Ac`++m(|1qm$#J%K!B`=5ORasCF`3
zma}!>sJ?$)VfF3P;taxux_>^GoZWKz>h8GJ!OOF6yr282W~=V6J<~(IkKA`$y?>c}
zTjclCPyb0N@3!jo&Rn-P=?|ZW_U`g`e@?Py{7QB+@lE>>aZx$jmGueh((S&_;@Sd^
zJ~e#!_)fI-ciHdt(z(a=FYRitI^wo<zwP!Tm*#xgV4~TQe(I;2;_<Zm{%-HOzpgo=
z$#%f>mv~I=td^jS2hXQu{nt^8T^8Oan$?n4e%iOZ>y)32a@PBNf0nb_{}-D#erRa*
z*|+xB_Sbi|zrLbb{UYq$@w(M9OLu*(d)*Uu`uBsWf0Q_m1uO|Tv;XN1RqMUCYx@^G
z{gD>!eYSaZMSH#bik1snEBNP|c+C9u+g&&O*4NNqU#8!Ujg1Y~^?ZHxwP=uj>i>Ts
zeh;Ez_KQsPn4r}zXY~DoWSPK1=6QeahPoAPfBKO%{_3QLklsBjo=a922?Vd-nJhmc
z#)bKXAM?%`HUbr8|FYCNO?RpNTmCw%<7w}NDdC?t9G|v}O+xtf{j*Dq_iJCz(rG?>
zx<kvQij$AEOE)J?{lssznauZv`8v*R`NHw-#D8&p^V2Hkcka0O-n!26E7Rit_HX`c
z3&qVppZRtx{+-GK<<I@|-#unualQS=ob#@y4F4@yFmuDrNt0TC{0mcbHEi5@>6f(O
zhL!rye#}h0AYi@l*IR?i#A&}LR-G(wD=v;-w72SD-1;919}gXyt+t`PAfe#f9kb)T
z?*vMCEY2JKUnkwy)^x>AYHR<O*~&Jh+LwPs85G{Wd$jKSv4xQv-4BKwk~;Rzp<uQ`
z!`@jeQ|~S5o4@;?nTuWC_rJf_>`&P+=dt{rCW&O-Pj2yY{U6IRq+B20diD3CL8MgL
zCEtlJ<<bOfzn#-R*v{P@HNo=J;^NqP^()G%i#G4Swj^Hge%3p-uicmaTs<ZEqkL7`
zWR?Sm{+P<8G3<M|yWq=~j#-oPj~#2ezNVm*zpZJ*$z@W!%87@c*Lc3oo7FhQ^sL`k
zb-9Ez4z5cZBU*mM%vCt#yX=oul!{IN>*I^}%m078>}%cgKxZ2cgZj2p!9fyB*-wA!
z2`K*(yS(mUC8z$Z#;tMu$GQy<<o(mLQ~%d1d5GcoLB_bw>96BEET4RGKi6!i{YP-p
ztxta=_DB8OTl73w<@=r+lSJRYWO6cmHo5EeAMNYy)8cREix+IV7-Sf*>ePwoqzNTI
z&h?}#e1GvPIq~paW5xW<Y;5uSzC`^IDEH#nopXz05wpKT*LCGX4Ywq?!~Y+hRK%tJ
z%wBWDkMOqi$s!-FEMyYMcy|9k-`jid=ZTm87RfaH6PS@-`>!b3o@1i_@8^NPzCW0C
zRi$Xg-lISMSz0<C%8Od=Jn!az?odGs-!zqf!cp;m>azRx$1`Wn%)IBvSz+~|`JIdQ
zExrA=^`dwGtyf<?e@&cniSG|Z)6CKz0c)L2qEwDtaN+&TB69Ufv+P7}sU43Ke{?pp
z+}|T#SIGY4>Lk`r?oKTKFP^R|GK{I1`=9s9O}E`U@BeQSl9x}~V-dbQ$iO1=-yV??
zCTYc-Bbo8n<5#Tu8}T>dZ`C^gxP7W+KW3``xOH2<yzcRX2N!hr&%Y8d_jK^zO3!2W
z_p_KR;b6*2o&Np*;no>feyx1$V*m5~FCm`Hr2Pf1X6Xy(|5lf;)KBf_WP1K#>aJ4v
zKmUGQIG9yip)!sC(Pg{MXQRGLi$0h&?fbs!uc!8Wd${|1_rI+-RCMQD>i)1(-&^w8
z()crrECPgY8uljsE&Xlx)NGGQUWQ&=|36NCDcO4a2koZiTJL4w3TfJk?T=b-7oBzg
zk8$kP9q;$ot^3nb{%`%yuPrjxQ_g((Tlu<ZFJsO8`;$1lew+UhdRN42{^V|*w$Rk4
z=Rd{%Z`r+jm-d&O)yG7%xw4vmEoc-q;=8{fb%lL(fvBf$;(-$HEc>4J>)O{Pd0#v!
zkKV27H_!I>rSJ>M@-f@a$7?FT|M2y2N3ZH3H(`TMGH#!??dLVs|9mk^<5Q2#syuda
zy^EKtzTSPcE`0U%rqhexP5Jr$=ZD8{9{t;+ysT%9kzEA4g@cs*`LGib%9ZQc>wm<{
ziN;S_-QxFcHTORG`zs7S_x2uq_utTO;r<_4O=>~$%WnM<%4;~8^GB>w)%W!7eW4F`
zTe(hcf6yH~annza-+#<>@7GUt{m){PBEL<Zwe8Zp&)!wvA53?)NmM!`ASAK3QpMYE
zPo6-zar#OjZmD194lzILH!Cg){LbrLIlF#I?(g4II>k-)J4HX!w|}_nYWCO7EAHNZ
zeb4L4y<*otr?(s1^Zc9<^5*-2_+NVF&n_?jeEpn3@SMK?yzjn#k~~|_vRnJ5)P%Ys
zjVrqs%*xsATcKk2zWu?j_qQ?%%=W*we|T7-{lwhpZ6*6Z_|B=UzLfkWvaaLa>S_Dc
zmhNA<lS4b+;lT6hlcq#8tp9(Ich|#NXI2Qk`@Vg>5s&cn=|b_6hb10){r@ey-}A+;
z-4pizYWf%O<ATByqg~UKJtJ=NO?Nf@Blvi+@Xj!i`Hw5!ZZrR!eKXuQR_XKy&&54g
zIsRDh4qyKC`nhRE&VjElT7T%h*O4^2d$xpdq;}kA_g~7_*Tk>=Z(FanY|ql2ZM^X;
zS3V0?-`D^CXtl1^{)2H_vpx4FU5`+>_lwQv<ndX*Vs=iN#d&M;`LDa*eYC%Ld+X`X
zasMa1e;3cv_WaC#mY)SP&-$&tn5w=$d|l{^nepufuT9>~__KdWgx#In|M~LXPv$9#
zj{hsHnJatz>gsE|qjo=cJ=OXC|A+aL*2ZnDUOg#q>+86lC)<B=y{}&v^8H}oT=mOo
z@jA=CA7Ef$d+s>PuqTVP^pe=jV`XYD<NhX1pWU2vJ^uTzomXFNto?p#s-5}E=zYK6
ztDpb>|GxL#^&av6|5jhE+8eWX>ASTCoslP=vnPAlPk#RNPhpLc{b3gI$J0#TpKo2z
zyjXYj0;O3!UG4lwjb6Om_P=#i(aYcOzr-DiKKt_5p{1+ip8veQ{+iyjzx8z+>-O7+
z+sD1%TGaV^y<>G^g+t<t^c4NmH}2MjX}_=)+ACRlCHet}Z^Y8NXEyGSC-N>=I^Xv<
z*@t_tAyeC{)~gHqOjhat-DWLR!*@Hp|2wNuRkljZp6~|?RLrK#FPIqYF36_v*>C>e
z+8t_J%*zb?Gf%Ic{OHiMH3AB~hp$#O2c7!#qe4Y#hxSjsCjQ_EURDlUvF^SH%MPg=
zSLlhl9NWLv%%tphb@s>29cIFF3+L7EEI$wuuyBG<)0$~V`fpo2zEWFPq%?Kj3DdX#
zf9-m|B{E=Xan<Ihlb4>#k30G6`-{1IFIkG*3XUu(i+BCE)1Gyg`Q5#HotujKt^ImB
zbN^pG`j@w#|B>9e_p;ufo_y$Y%X&4d!0jQ+uNz*9eTuc!)0a$peO+_MPOW)c*X2*G
zo?5k=rD=U_9XI#f>U&4kcYp1v>bN##ukI~<&!}Hn#<EY}ZQ38PGW=rw<X5j=cdiXv
z`A=@emue=H2Rq7rlNweS-1_hI=J3+V$9ZahD&BqbqOAYoR+~LSd!{EnPdZq&du#C{
zE9IxVqxQ$GQri_@x6$=b@0YDVz1s@Jw>T|-aM-saZRX4O*P@>O`=fcbCnQYgjkSly
z!u8kAJYWA>ee3B*S`%YKCjI(;>7n`BnoB=lx;sh4OuL)kUbEvYZ;aw2)|j(GFRS(c
zaTd&65IS#@n$C?cU(a9v(mZ?m!J6EU`+l!XIOK7x{=-zq(qGJWQD3i2EG>@unzb(W
zi~p;;!nd}+Ze3fP{4ZJY=k>#}!h4e)F04}cf6#8~x9*ew<=t~`d{0eHmA@AIPjcSP
z?3XKA|FcA%l*rTiqrN0b)soTQsnveodc(pO{XbT-bXYB$Uzhbq;O6I(*1wL%e(7GV
z|9|xf>+QN#Uw?gnb#DJb#n8I%EHgJoa7gTDNeN>)_@4iKyfo|4Wz{YA+{X?(@3U|W
zk!-%awXN`B!)qS@lnagjAKrNIDum(g!G>GBLOOfSCax@y(XD^Xd~?OV9d%WIMe0f`
zP15xLSULm-hQBkGe6#u3@f#|Sn`KMmxEDJuzolanzq9@ET5gYPqTlb{*<YJIIZU+C
zwJNFc-5$e<yDU^H?rQ2xvlC1|%>1A4<F$axZJjoaGhVGuTN?jYd_U`t9|E^It{857
z@%zn&|HXeqCeOQI=3%(u+yMuHuYXK5%IyWCg$!Ix+L!s;*1Wh}^}+kt?4LykHtlB#
z+cM+ckFt+#kw>q`zhA#I`ftI{?5nS?ukN1p`lisGdD@vp9D&>IBAISXJpFISrL~(E
zq+Dv-=)_p^{>BUCuqLMEdKTd_I*b1|?OwF_-eL{*cnL|>_QS62(VGuioT&1j{QsFs
z<%AFC#ZT96y!-R@<J|YI8?LYZe*fW~N0+tYrzi+6{BofFAb;WJwV~P5Pwgz*ThqO6
z)$DimVe)EIw?4f2)FDw^QfQI+mzS#*r%2W9sbnhW@-JmpzfthZC;L!rb5O#U^nXgh
z?&nwiovK?GZ)^8NY5D&%?#pjz3-8{OTC#Na{Rus}ezu>lHixADd%tUK_4<APV{Bqe
z&1Xzm=db#5&hb=jW~C_$|5$j|ZTzS(hxOC{szv+bU#tsl%6{gkF_}5d^?Upb$B6$i
z`z^0EyBXjAeB+-@%%uCO4_7;1+|PgbPsBgbx1zoMR-*DUrukpj`_}9?fAu$hO;F|f
z>RsQw^Cs)Re(zwZ|6t0_Klis*U4QlJV@g#<`JB)S?Uy@WI#fJ3{Dp5fZ+hVR3(^Al
z`hWCyUD|Rq=rarB|Ib&ZwZ{M6Gym(=>w@Ql!k_LuVSoMZ_0GF?6VpF9-%ySZU$<oc
z`53D?^6TZ)-rxP6{`vlor&s20s{7y8tMhK(jd{Bn{|LP4ii+01`eW8rtKbQBoEAR|
ze%9r_{&4PX$V&F5{~s=F?0a9nb46bHPobaQ%W9UsUiLS7@BXcO<M%!N=yrVLtEo3s
zw6h=nI{o>|u7i{7e;tk65xzKEeb)b9R<3cc?oa4*@(}%d@<M$Qms4t9Qk0MU3CCmo
zUT;s9E1ZcvUAE<4qu`I5yDh%0soSd>+FDxv`uam>Yk5tUmF802uXKNeFWUHXeQw>x
zx^?mM^`FH|)}PW>#Qi#8x+d#gWx@Mv*6d%tlY7-)!BdZ3U(AZx612R|XI}56%D-|Z
zX7m5;tP|2VUAq5BkVI8=L0)*=uJ@;(sO`~muil*XCffA>hm#hY15aL?^-m@IZk5o-
z$^RbD`+qKU&z1ixgLm9u)Dr3N2;*vPRctf}(G%lhb#g4!Xi-z;>~KmFUboqQ-p-l(
z^Xqg2k7pK6clEgT_DSy5qt^Rwm;dEE{8nAxP04$I{<~+Kr+e2r{<<k2>m)mWt+x04
zCvz1&ejj=n)b-KKZ1%r@Ev0pG0){L0xtw2P@$mYI>lK&Y`ukse{8b{{UScbQ)2H_B
z_x*RWMz_U>RUB)pdwuh}dbR3ZIT@jy*6I~Pk$w`6+FnYnVY5tmUfeQdXVekt&=S((
zb$Xa^A;3$CtHo(y!3rNIVUCt2ZNFbDKUdu=|M7R%mX*(+tzVlKdNFEYmF(Z%*f|?N
zy!k5^|7E>e-RtiYb365F^q*SAeOmCnTKdxGDHq>0e7ds#PU)i!e|cQ~{x2@mo%&X1
zPTW<sNfP|JTBqVI-d^H3!?Ha1OT)8xja`xRqFa@P0wynTjAj*97IaP9z!A2D$=pW<
z6qa5>QY}r*0XtGOgt=OdIR@+qQ5N&PTlIP8eY?6_pG&4cS{|Ej^fGekRX;XA8A<aU
zZ@zt~mfMxP{pZhhq3!1iFF&<ABo`fT_gC`ALEVJ-%Sqd-ChXUj+|Bs%8BeuTxt9IK
zn$JrW4H~)5vgq)Li0%5kL1E%{39X<+iKr*eDh>g=9y+aNZeY|C>Bth{WMy&;STI9N
zlAHC=LWdO{8sc0nhm;IC&L|0U%vins_T#JX_uHH6J;^O!v+~%>m{iplDJNfE`t<2z
zyx#O_)g7~nUrpQl`6qAGE7_NKPfTB0|8((pUbzLUZiRk((_sAbgNy&ci?ccX-S?|r
znN~j8#p%=4FIAhT%kL0faw$HBBVbvLQr=5hSz}?7*7|}BUxkE8M<=pf5n=i=WeT5y
z1!$+Nhma7fle58&2on+RR;NZog%Ay8ftDuS;`K7|x7VKialI%zSnYFc+@~L_&VF5c
z<cH+e@3k_&o!2ivUOs72`fX3=0$Ya*RqJQ{l3m=qFm|I|@R9#a7k<6-c^LTIdY6Ct
zq)h(!$eXf1%YECUGk+vaWSMtr=ePY+kM_Bl-H2$o-4K>!dY^UgoNg&b30LcH3K~r&
z4WWt_3M;z4y2vo<iA>QD5^7~y=qM1PC(6Ux^e|yXfSQPKtCOQ4hmVpfXOCB6_CB@h
zztv~o_q{4qDh@vTzUtBLna`HGA1L@xwZ-zsxpk#iUi@nfd3-;-xWPJe-NrxD{#|rh
zy0Ez85BHbf=D(l*u#dAl5H<ho-#wF#{gy0Mw|(OF!L!ClpZl5mwS%@N?f-nP(|y+}
z<+}RkguDg!+P1_qeEYv!Y~tb%yZW4t_XdWnHC0ZuFu4<TsP)tVU5&mEe2f|*Q#6dY
zTG<W;?C{VN;Rd;0BS24tv&AV<VTOyFq2=u9@4vp^dH(v_d`p|>tv}~n{TR13midN7
zRqgr3m)<?PCvPJAe`ke_?~3pB$FpBc%Rg}3{^7ZGe`n0zxTXjGOtw>;kN%jpBkyv&
z%cmLP{i+K$rp5);>+vOCSz`TWPZ6uk3KrQX77uOmUVd-0SQEMM2@_YxwZ=&GK$8ct
zL7pv31DZIP8yFZ^Upvk+;^|_oUsxFK{i!Yc{q_BEf3vr~`uh57{`GZFcdEy|jau~k
zZS~6NukUuRijV7f61QsoRimFzCBF!}c-w2TJ*~={ShZH%T7CYn?NQrbzk9tkZ|VCh
z3*J_F$Ngyi_-o^>@2`&6USBQyPi9xtv4G3_r~jK`%29o&inlM~h|2P7)8-!*n|0Fh
zSl_-$M)CUp9?o2z%yO%KYt0v@rTVWQ^d$xMOffaC6TBo>>lD}!wc7YN$K97TqQ?*1
zFF2hv<I2JVrxY|~zhtiFyuSMCj?{?B*Eb%SF7<qW+}o(XQEMY+z5M_4xVGE<>vOz*
z=)SApvx}v@{PSPlqkhFPTkhUI8`^xTN%Ca#zdx!h7hctzDti8q+vohsS5IGtJxw<4
zI{P}bs8}~F{`IP@_1BE%|9Q~TqrPtMIj4B(u=gi?Z`sDK^{rpGH#~OD?x^=qC+lWC
zpDrK%N8O=VT8(Y_^)&83yL-G`lRj?G4$q#xIJAD9*4CQvU&oKvhyIyVbyV=x|3wFH
zsqAOnX)GfY!gp`K@U6Wz=ECu3FL}O>QQH{0TD<GW)^A(G?;kr|{_5+l?@vD6KbT(l
zVCtNsAI{(4N`Gjrx0&s7ru^*1dh07&xY>@^2;K`%<o_<}z0Z7S_3rRp*`LeZKS#Ie
zsfi!Gui(i)b$(Np?d96nANNMv?D$$OGc(6FH#hm;60eV9_hs%xJb3gYs_pDav+H5Y
zg61#!<y*1SamJgq5eI)Mtm$M^uJ(-jq4m~uTF9Npcb4pX@@wg|>$~?&e4R06{-d)|
z@9rP^bIFl!Ua_2L>vWldJDvX}eI~WcPu=z{Vi)K7*T?uhW`5lIP_5v8UZPsv{S8_t
z7F1;HQU1D)bNQFT=dE|+I86ed$o|mY>|$2zxMDfCl*|fK_M@T|%5xI#Pk(*4oqOA~
z5N3soU3P!I^{#uj^rfr*6TSCWgyVC+uD{T=<^NNi58tFLALO1$Y;by2y`OLWrPe>9
zMLo_hO)T9f{M#x1&s1xo@P~&I9?ffSY$*^47Fg{X{m=LL`GmvVsa0pMpO5{wrjFCh
zwB*nlftxo~Lm$SPEj23CoS9%_@yF!8{U@(z%k@XgE-M_ow>M(<>UB!W7p>2)k@_#I
zti!6Mb${0WGLe}-RCH>N7Buc$z0sQ0ET;0o{F3jkKMc0~xKn=p+@dbm>8-EKthAnN
zT+()9$&O=_FK+$+waJ^Myk6qh!JnUsAFuvtzi;#X_tG}if0HI%5A_$WU;QmfGoW74
zKX&@nn>PQC37m;AY+>K~Y5&404)qRwI~y6Q%GdvRbn0}K=0v-4(+}=xyZ>#htM1oi
zD`@CrI`#Ex)%Wa6Mc?l@*4Bsp5e$ygKWL~qQLTL{+kV5amg^@R4li!GoA#h*nLXd4
zKYSC+e`~-0;$a_F*JA%<q5q{gi-)(j9-CDCZq}EO1$#XbKVLtgcZ;WMlI8mA>#wi6
zzW$)Np54xR{qnfH)gPSPYd`&;yjT0r9p|YJZ5j6-)^$;4=h|EswC&&PgPhVy)w1*U
znW_Crsr&Qw3+I(7^#|Qd?lfMCnEJ<_P3U3fZl?5jz1N>^l$ig%@_f~;*WZr^{GamJ
zXwTE`v*C~GrS?Y5`d(@K|3v!!hjH(xYd_oaf7;ZawXd)KS1+%hW*&Zh_8f&>J8eYe
zwce#I*vr;)zW$)3;hEd)f8#%U?)~AFoGM|vY2)|8OVJ*8V=v0z_tn_6Ra3Iq^-W~)
z^O7A)<JQ0Y$MI~x_6Fy@=eYcY?@zfstKK$1rts0_`mcw-1;w4(di2%3*FRU>pZu}@
zZT1Gw30Lp@5fr?;`l7A<>wu5nWm{|$#is66{~fs5r#|hxq}=P><!3AFy@PXQ((*q%
zytXk;&LjM*VgAQx?dkVlRGQZ=n=*59NZcOfxIcTYtPj(^(X=$OdwqJDN71i`am$u`
znt4B5ebJ^pep@oG|IC{IA$DrBVVvb;BdfnvKjzD>@RgN)GSO;p+?%hTq{Q}4n3?fE
zUcA1|zxc4lek135FRS;nnD5uFdU5yH*UDFOUDPVJf3CctQyXO;d-m1W*Y{V?`~G_U
z)n5_)EFsfA?st+{!N9=U<~Yl!XBBJhg}!x8HUIy$>@UxH&T9YpYt8@vNB;i*w*Kn<
z|M6e-D>c^C-_QC~So)$r+;(k6+V)S5?s9T<XEvtI{?&cFc2Vs*QAd-E_uIeJ=%qFv
zdS+X@<Hd)F^|h;Syo-xHxk|VEmReBN-Cx%$TL0$9t#_J!O8w-Y+7_{m7u6Xr1m*uU
zyML(o`ibeb;;Ze$uU~w(rSp()RM(caJC8TUiQPY5W~CH8>1XKm(pPhBSK9oHmh1fg
zC2zm2@8<M>Tr6Ab-itq<A}>2@y3_u4lXX+;oO8VESIes_-WA#U>-($o*H?W!-Tn35
zq$;JqF}sxZ)}LCl_Ui7i@CiN1hVx$?b<`~iH<>cE;$`fs>-!^izgib{aer*_zvW*0
zuCFeAcYXE2mW_&H-uDcSAI{z5_v1{yeN$g!Ps)dtPK<l=tJc@;y<fk2-_^NF(^m3@
zzi55;>UwCcd3N`np!>q-)_K3a_SR8eZWZ&p>$`WyO?r2IRsPoGxmD}7sI}IuzMg%l
zYGa-LEY2v8n;)F--u<b1+*1BwmV0orrS%o_W$PXGR;Jm#+0Q1@*0|Ja<EQ_J4)r_a
zU)$zizwhVoxUcc^rtJT<bNy5G`M-AlIvBV1N7C8GotnO9uT5r?XxIN2&Ahbp;_9vO
ztlvKV%FkNy^#2d7RZBGzwB%fD<K+w={(AkvS+n@?)C!fBJ3%E?1$EZSGo(*ytL8c<
zr?7Lcf6%_O<Nj$Db4yO4L#I#c>MZ+zIb7-Vq_|1fL3=BHifbD5`A6*M@-WnvnHKaV
zqkHFay;!XXdlr|N({et4{$}>M)PI;5yZ3hH{Hy<?{z!!D3K}eBVGN!)!9d;c|CG*K
zuDq4G(X0Q2OV-rBeW%>_aNn+1^Fz9)KU%+M!@U({@ojAPO8lbCqE4?o{J-gK>Z#;3
zNp^YHtCc7A&pVlS*mdvHpUNh7PQEXq{|LI@-BI^9nSXWvzL+xq{a;J-HiWMBe0BVY
z=H*@QpDumS;xo@|ncbRwJ6X?MRFD#u)m*`*C8Ro|k!yvrPHB~L{=|=){Ui@D{<xu*
z!~f$0x5K<8g0h_nYyM3WFyOL~*vgz7)Dbc9&z1j=*4<osUi!~h`L2c6<7IT_-_jF*
z;TFFxcG33w)!{2=f2?eJKdp!B?DSv%UdMGz=D4!9_P*+^De_H1#oqf_mc}J3Cha+T
zch=dPIeGVA^!!cwTlMbGqwo)>DiihvMeuq45&yJh%j6oni{~%>-hHK^p7mbMrrrr#
z@3)@Xdt`l_{qryPgxBnF-`^RtK5c)J?DwZyp8s7>{So|LChzL8uk(J}`|`j?+!y!N
z@3s_g)2t6me%|_P$B&)YFYuhQt)Cw1eJ}q1dKTMz;jbRW*Oq68KfHN<!q-cQW?=zM
z;>!M9JhflvulwTe`9xRvU-ZqoC9BVPGJf0l(?fOkM-%k~_xDr$_6SDZTYr7|?=Mjk
z_KE(lbMi{oT;K95@1m+heR-Asj(gVYS>{C^yfOP`+1&dty!q!Xwmf*#d7n2+imSnq
zjI!|SH#uH>Z8@^f=Gr_l6+?}mzArXTTw1R-<GhgC%Gd8N-u_yrF|($Loke|v)aI!*
zkC(elzNjBtC#92;_4-+G?*67XmO-(1fA7C&DBrpJ-9g(5C2bS4w?Ee%+JE)xgN7D4
z{`=~O!^~%#^eOr|=~nc2QQy7qrXQ?ZGMlaFY0~xgho^1o*PB_}yKu^seT|yW&zD)<
z?-Y7!D4Fqf+1j&z7ugpJpa1{KF@4rQ|M}N93tYKizv$@QUk~s8R*L>JMKIzU#}a|e
zh}y^>S?iQnPyF^bHa50OFMh`4^wqMuzyHQeufNiHw^}pd+Ft2ZM?O9Mwy)~PPp>)g
zZ9iVSJ~ip+{`y4s-P)?D|5u-UB3Rz#E}XQ#^RN7puhmanr^Z&DTEF{<UFTn6O`Wel
zw*L{BIXnET0|Nu|Wye{@Jf~P||Fyh&wLa=^()+FN-Y-plTa~%=)ujK|zvkD!dR3Zy
zRdj#cgJVDQ9((`)*HYVd^7QpD?W)V)Y?=Qx+BdYI<=^^`T2F87tSqlzeX{)4*Ru67
zUOSWbEP0ug7xi@Z)z=3_Sv{Vtj$`4iJgPtAVz0XRnevVQoVPFi;C)i#pT=){mXLLg
zW&TS4quy<3TfbE+TvW0CU;O&GA5-5wy&AnWWnZ%J*Vk9eUu+C{ef`q+S9i}}`dYHn
z^Y!JZE&KL{m+IdUc{H_bDa*X|&bz&{ubx>O<MsX3-IE#5S?2YI_Fw(AzE&^PXRq(C
zt?#ezkFbwh9kIwZX5l^k>sp_0xhiw)ke_~iPu#osFum)mr>%<*pM5^wZR@YU>bu|W
zk63pt^wq)3*Vl)y4;3yi+w0wVv3k>|Xa0NtsQ9QRdVb^B@NbQs%#Q!NdYb149RL5I
zZ;e6^@4r9a?W)(;_nkU6Df-Rz>AT;5ZCac9BfG?EURv;ijc0^q{)gF3)6}gN@qQY2
zH1lnB_`kjC(_RSHe{j8EcsXXjtHJSyR!r44+Y<Xvxy>|b{v+^4Nj%Q_*Pf?mLN5G&
zu%)B?-W~bXqNT6dpReDbIO(X?zP-P`##VjLo?7+&_5JmG!x!z%{%Drw5V+?ttNh2n
zx!T<&vH!B!dNL#9TXrUgEil<W`D%Ub{RiE19wwareEss|e;YziXJ7K$Yj0Zhw9YQU
zVX=g)uZfct&y(Qw;oFM^gZ3S^`L%{k#!z2!|Bvpdjjz`I5#p?!9Y1Gg%?dt)P2I1x
zcD~!2dgtH9PyfH|tkan`%{k}hoav{64sY<jdVBXDf%zp1-bE|)h1qd*#ee-#JgrCQ
zm5o*Xn=6l<%#X?}_CM~(_(*lq^|kr)_RZOLHuCkSOC{?s%FMo4{W)L%{xdcAe^0;2
z{`&r;kJr9VeS@HNfK=@1?DhU%tG~p(^Ji(ft0vD97kB+>@9A03z7~I-8Z9*YKg;wD
z=EWbJa+l_vkJn9Zjn>?7WFbSEx<kZk)vwN{>shL`zuendUbi;f<=_9wN8F<9c9*{n
z<9O?%p!54fBFj9ce}3<hzfHNl{ndTDx}X<r)z8--n^3<fRlS|-`}#@Jf2+M;Pk%T$
zeewQxS;zK@?ss{AeYd#6%d^k#xnFbM{a=&&_P_n>!{%Rn<!xwqDTaM({mJU@_g{Ug
zG7q1(a&~=OmjCi~Ys#wLPOpD3W!?KF-&6km7xg;3_R_C|u7AQdxaZz_=2>*hnyFI1
zYFm1F>4QnL|Nc7qL*>b2%er-c%px;aTI=6kw>#>`e8KM#>o0V@&Dt9m8F*vTlP^I_
zWAA5&7eD^HIQvn?zWp3085meN9cLN$ykafQ@N)l}{Ot9S()!i%Yl9!}@BVTBpVi*f
zxOJMpcWpiW_1(qTpqB@ElRk65pYSv}e7)zVLeUpfV^#->#<#t>n^U;))kf~SzgL$h
z@17L(<-^)rTdTHAzxlB)N_BnsqT2e|QTroQ_7q)SX6?C8AS^WW^`&!LooyD^W`A8-
z74`9J(cY|9{ntVJc5PjMZTGwEw-Il%;_c#PO@sBXw45xutfTk*YxmZ!b>8cv)?M3|
zy6S^#q>F~k>e*k{&3YRfv^q>bdHak0aP6yo*R-Z)ZF+zD;gL`KS;}=H<c>dIt@dR7
ztXJ1|N5;L5eYEdt-|Lx0ldlDH#a%sqQ0{U_|D!Ean<ekYCtC8Q@3x;k&->FQ2eF0L
zVV(Nle(YI(!EBM8?9N8^OUvq6ukZVw{$O(Pir-&fe=1K~|FwJ9*LQK(W3Q~bdv@1R
zeRD1Sx+PM3#3xURTD7$O(f0pc(>^u^x?C0I>`|Tm$2hSk=Ih7VRYet@F^P<yp4tZl
zdRD%?p7r3>Bk|pnukBiOeMxQBk$HM|TZ)A){5Z(i^yA-~thp?|?H5h7m%eG`{D5bN
zqv0<u$DD!<>-wUb7$sUR{D0hWDf|)FfwRtb-+VH9C;wBr*lQ4`c=gpE;e4-eesx{X
zU(6^Et7`kq)%wrB{`&5%1<}tVJzw2lDHU`3V{i+r=$j21=h}H#m2ztYuQj+|JnG&4
zVBWzQ&OBMUCim{o+wW!3zTUR(Y2crY6K`xX(vbe(dfG?Q_T-=G_2s3nj>HSAUcEkH
zDzi1?hDl$-%X*$xyf@JmHasl<OMmyfJ=$gpRsLyfWtOb2KUe-QwXJdci~d_1ets-r
z)2iDW|M}@3#cy_6*EG%J^{?)aSbb1-=R~oIp;za;{yIx~@+s}<(=Xon8__j2`s=#u
zv1{u%wr8*U+Wjf^>l2ml(@!op`kz`;n!o$clGIh7^V7esTw_u5Z{NqJ{a?J+$9-u3
zI%VJ1$)D7JeaeiAtNw4d^ox(ye{RtWjyoJDNWToWYMb80lv@8|m5aoNIbH`&7$p5#
z%Db=Tk4Tw$dKuH<%~zfWO36<@sF>+1_vT5wR?H9onpeWNUU{2ke@l9}Y~^~lcRylf
zuXnC@smST+ESdhqQ0K;y?dps6r>|Rj|A+4L#j<aYmT@F!Uj3uG!MXftobbE1_8&aF
zt@mgBUw!(;*0>!W@n8S*Wc0))znpq4Su1h!mbU#rRQ9>~^vs`k{DKji#Mk-r3TsZs
zENEt}Ql8(W&$nPo(wiso(&_S1CtquQ2@CzUF?{v?MVqpt{wC~ntku|kw|4RSFMe<4
zUW)tj#N>~V=v~&t?{|KFUHAL)tLra%_S@FXnjPOH9n8P+V4Pl3RLtiX-_TcGS6_>1
zzP_lnZk_k^i>2mkR=a<c+5aQU=gR&IT`L$hL?)<&a<aNTRM-)rC(6az^bm3Y;RL6o
zK+bb@d%xemUZ+3b-fH^PV|&7O%-sF$eb%SWSbqM?x5MU}=lP{if3hpQbm7z3|6i4#
zns2_<_J7L*`%iPfxVqZ+XUpte+%FmM%lg6m^<FI%LH{ls)o#d7{UN6?|MvFoFwf(q
z59*4a{7IXY;%R>8!25zD57tLUmizwvc5J0{RJ&bN_<GX>E_PN$jwY=mYac79Tk^hW
z=HO$L6X{qX%Ej6AFkwN0jyg9hQ)0k^7%gM24yVL`1tCi1Yj@qR%f4@X`?H<pa$)VD
zeADMgbH8Z)lG|mR_v!5%lltj0dlw7nY%LHu_}gLKZ~2@5xWd14YUxfeuhHv2Tljyo
z^}5Wty1_?wzpYrBXm}-l%4Ej5GI!@3(C>UJxk0h2IU@D+{M|_pe8df=#qaoY=8^I7
zL*HXgHOdBVPW$uly`=T#t9KYw_Ie#mRJy=$FmtWJoC_Rlz$2|HqMW=gjtM)am`HH3
zIz4pM2+(ro>Hr1v-FxbN=5?#jpMHPacC}HV(dyer!`4R%FL*eA<#PYY%a@*7CwFnu
zpV+O>4n+PqBy{_qo+0}sJ$>o-|MutWy!xlh_WGJd#@YQZ_VU+SKV6@2!g^bA$sY#S
z+%0l*-|duY&VK6piI2I{bdgHA{M7r6v&FmLFee{hx$*9zii($oh6=`4wEQ@Q6$>V{
zs`h9o_%VHBXEYJ%=n|1?ZF=NzAjU|Po0aWRV26j6AZM#nVj$;SzhC!Dt^Vxz^5f*>
znCE_$E00@h_iEbo&MwI*t*UXacpZK7t-5G+r|BQwe*y`Q@@n$~+uj`yQ1GAHP*VEv
z-|U;KC+~GhzAkTkZqeSFr}}yu8}hgPRBk%RUmxZpDL&6UWb>C~#RD!aR)N9qtSzd$
z^s{>--l^U3KUOcg^7z5h{tU+J4;b`WMP~|qbeVg#Q7B6<gZl;_qngMB6;&Ztr{+S1
z3<Gh_)<XvacBJU3a<m*%G+5E2C1`v!E4^y&XVdp}`BmY%@zJ+?pY6OlYrXQZ&+lH8
z*|@hSKKSwPl3tvQ`&psC3V}w7f9)0Qoy2dwROEc~{DJaGORgg)<K{NGADX$j|1E>I
z%nzw}_2oOR{V>g%^z`@5f8R})Z8*PC<^2EImo96BC>hxqu%_8K@u%OPe<#6uPXdQ%
zK%!$PW62}6unw;hg}MaLxtSeaN?fd<6cG_1#?#7lIN(5xmIzlzQ)0l305ws8neUHz
zzJGdNxBU4&74527XQpp`wq87j=f8Wnn(xVvAM>|g{H|>z{&B<ksf)hGyWjq``(O2w
zh4I&0779PVa!A;uhf`*MO^PGqgNV6vpUs~7D{Aez{rq1WyV}djzm^3F$2`&g^RBr+
zR(g{9d;d;<e}+P{m65yTqratmIJM~Szvo_iLoZZ(nculj^o@1VItQ7*wE+x@sheB>
zOcoMYa#8tA5X<%48+@P>Sye>2Seg<O7A(+H;b~(!7_cG0M-}8RhZR0rBAl%*YE_?K
zpZQ%~zH5E|);nH%I@c^oSsSU^cmLIgSO4p0+TMS2&GE`VM)tzz>wio9JG-Uh(&c&)
z*5lJX8e{ISfBCJ9<#0{BJ*$B6wPQ*_wWk-$H2$3aH0<pzopt76VFKINH1BlI;|qHI
z?ezS5t!a-Ps=3eceLHBnRwe(P(Yl$+rSnvOFMK0=-t?PTID^>zPmC;_!JLwDNt@L|
z_B5$1a^z|}z`($~-Eo!)FBfa=LQCJ*_t)24_dU1w|C$Z{ueRAo)?chu`u_jh`v2Y^
zxt?b||My~D>HS6KMNz-j?XB8WRhb)8(3<d|{g9|v_=R`(v!|a8SG{lkI`q}~t0!$s
zqgS7P_00eNwY?Wq=6`UP6s%{@p8o3og?CpsUtjNO^>1m<*De26GM-v{w<|ozzV@sC
z*{gSd?fmui%Z9GGvF7s@`zHVMO|lH%r?=O)ZfE?;%j*08?Au$mGk$&eYW``z;`II|
z{ol3df4Il~)e-AMSMMo5{4)CMslC-(D{Zqs&F_6xvul6nnzFr;=l&>$+f_Z=qr7|i
zHLb53;yyPqoUSiic=~hHsl8c0E9VwXzIHNC`Rf0O{XG8s5ie(5);;?At~GP~&KFLR
zH9|qPtL`#;?T#&1U0=TW_uK5P^`Tkn&%cKFR$Yy)dirz6lf(AM-)@qs-{3R7CyF_2
z^8cRcRlXaw#h&d~`ojO}=+D=WSN5;hwJNP&x8HoKb4UEORo8dFUw^e~Z<aqx>Ak1>
z_Fv~ez4u?&lX)G-EA95*eg9+TxyARszIyz8{Z9VNzjnllAAfoG*Rj|Czb1Zt|LW?i
z_t*X&`7x)YU=7#D3k>sF3{Rf8(%pZ*^^4bm9K#1e8?~kfB`)|g@7w#&g74$H-k+T=
zb$!~;ja3itmwtU6?)z%1{_0q-tGkZ=@afv%bfcu``9Dr>wN%lH&2c|c-~K$cI9G1|
ztlyKb#|Q3P_ebER_1B*U)7#yCy_ep?A{m>ysfTCw5$@&}&!wj}>~;wFz4)T-?~lQ$
zRTY|@U7L^JFE3ADzbMbnV*Z3JOXS1v7gcSU<uNz>erfIfjuQf`a+jI;Jkr*d{#@+P
zeZlx-<mRf?mZ3!(|A)ydvOkuwz58#Ycb`D;hR^P6=5@Abea{Yh{Y$z1!K9uo5q@<V
zOL#BjdF3?A%vY01Yhk;%zHQHzq-3$K;;&E694`46S7rFO;Ll%APBzX9zrK{!NuSj-
z<ga|pK94i}naPBIoCcq~;`6WeT@QWGWg)(RYhH|nqxIr@{{Hocn7DWRtE$_l93Iwp
z?&^M1l}(f1U$<D=)AN1p>w|(hF2Cj12v@$&WtMRHzsU9Gk@fwL+W&E+3glLQ{SnuB
ze~tWFkM))j?;>mGGHGuux+>usXD#{NxAcipvclqjCl~dl?UmE|{Pmwf=hyQ0yZ=V5
z+aoaZF4NbDejB4pe{Oy*>t3}vD7}8xZ~Jy_!!FK?vEmojt^X41ovrt5OV<9lzoG~A
z-T&?So4sTA>Zr+Af6w|Kwcqvd(Z8H;CEs`MkGsd$7Nk|;!N9<h?l{Y&SBtgw`a;c1
zkL&iow2vx%`ZxREHT`RPEn@p8cfMb=@7mtbbN?fDM}3{#8KkuSPx_|S|F(wzx_+|s
zuWq>DuZ@?q|G54Cd3DKscY(Zgy@P5p{_BsaeBb{{^ljFP*;|uVEcM&AUe!DH>vyNA
zkEb1tuV3x?dU}25i{pKo$G@I_x4y~R{m-V#e|y_@Myy|UGV;~m_^+#E{;mA|`*q0Q
z$5S7z%iGzavt97@-6l!K{Dar)*3|fC9gV$zZhhVQSIh6XtdH@%UtVmlBG|E9HL|W~
z<yQOe<pnL>LE#rS#lH@H9hA5B?&+fYYXg?9ewj7<Sk<eEi6^tJzW#sJO85GD-`6W`
zL^ER&t&@!oFP--8`tIPnMTK0^Q<JB@dV1-k%iYJF-~89F;)_q)8TJ43^-Jpeum0Cv
z#`g6=dv!%*{r|1sw_i2$zb?MI?&=fK`>Vs(iTK}t{bJ+#+K8-4i=Te?+s_xW@yfN(
z{l6ytnH2ozs|+jGmpFg5fPx<mp*349UmScFAGIg{$=}+zt>xjhdXlzTdv;4PTictm
z>L=ESmkA`YOqR0w{>L~tyWslii~qyw*3~U%f4+Bful;{EIVsN_2@k*iv+rMD_wL_+
zTW3L$)&Bpyp9{#_+_`7}<hr!p>r0V4Zl7vc@-O~iyJx1)p<6ny|4&$PGDQAEeCx#Z
zyBQ69^=B<yGC%Q;kRX$X$%7>VHv8?C2At`v4_99o-{fU}L+m>LHLa~HHTRXi4_z^z
z`-SJd!06Isoe9zt>JPGfs_+UHTA*iOkhtKBzu2Zr>^*|fUd;a@&)Uavbnla|OG>;k
zqh|m8HK7mJG@Bp(csGCf_1gS-yHfYx+FSLt`d(#i*1EM>@BXHm?4KO<we<djju{^}
zvE6S9>RH_IErV%x$jep#Pq<y4bzyp$q{^j7f9l_ee~8_)?bg>H%1frluUmI*)wOl$
z)4kqi`J26a`M1gYtb5g_4?aJb{a@exw!{13%x6CPEDp@7>VJLu?zbY%dHer74>$Z4
zKkfHh;ibF3PM!Yu*XQfU*$);xudA&{yH{IvdV^EU|M=DWdGqGhz1;O#C_=XX>D~LO
z#}BfUPpDg0r@ri3%-ajn&JIie%r@dX{4?i%-1@-PvEIQ^`Ysh2t*(pWm#eL}t)I*u
zCBH@_{@T{&?a5Z&tKSI8UEM9e$o&0rH}QkDCYhIRtv%k@r~hlR{Pby`{#JGDm@EBs
zdG)`qUFJO^-Qp5~dB3hkm+RO5TmNl|*Y~~mCpa9iDyUss*F8Ob{Z!HAagtj57Q2M?
znC3j5EPqhG{PY)}{So$j-kr9^#cN;8@BaTfW{LK$f5GDV_v<^hHL3&(uG#-N_RD|M
zlw*694@Rzz$?SN)`fuLki&goWy}i|zA+Izhf6sbcX>b1fi_iM=@tTv~?NMD?GU@kp
zUGrye?f%y;+sov%ujYK*TGi99SACsw{pag9O<}xG`_q-?@x1+eXZkInqLtP6emvw@
zzxVvV(zVzAu0Hu<O?lj2-Qv5yG`+u`-f1huy(skc$+DmI*S}g$nKl0d0|T>-<1ABN
zFV><$dn2)@u|@04|Nqsx{^zxL{r|9a6?JRM4;;S#|Eq4{{4IOGtusIJx4QoOuj{K`
z@7neC{kw_TtJdrPYpD{f+sA#iY3r+9&vw5Gd%yl=)!vxbD?9#1fBkstn}7WOrSDdK
z{boJ$_=&xHTH_ANeVhOFvVD`>2}`c(-mb^}7E?~fGXD{J!jpVhxK4Hb!Mf#=Zw=hH
ze!G$8a<!=L|JTy><+sk4YM00D{kHFK(dyIxzn}iKQ+;*VtGi!yVuLC^<(kai81X#t
zm$vM`9WC?se_xw<GU_*H`L7?|-`}pi|7!Z$DqqgGNo%G2Bdgb!ZS>8)zF#*qq$}ij
z$?IJ^Uc_t-T_gVL^~=&&eeVa`w`9$%-*NNxrx(BcGPmY;ynm#m|Mewnd9BU79p&ch
zJ*U^quAl2Q_jLW5K-;h>`_}#0;&W1p{p+f)tGCv1{>#2w^EdU^mDH6&{t>Is2kv0!
zep2;ljrL3-VJD`0djtZ^*bW^2Fgr0iJ2|&BC#UuQ<KOSk?Bor1w3U0jZ`G;&k-x*9
zhEKm9RQ3L+-WwsK$aIx|SO0T*$M+fDU*xs^>(r0N_g_z$w=vtkcW+H-e0l!7n-+?*
zUSHQV%#W;}Z@+k=);U*=otLJ3W&Zk_{Z(SQ()HE0?(6c}FaO({H0%4-`17^@I!><N
z5T*8I`MLEc+{zR5XZK0|50j4!H@Xs8UvpASPh_Ij>Hk&Fr`}$Dee>Plf2FwhpMGBV
z-!|@YoqyQ7jrXp93e1jQed*VqS*!lU-27^$A1-ej+&A<1>$8dC|5bhGUkk4KljqO%
z(jawan;9=}cWOpn`me9)Pv4q;&MqylUn3~@q&dFQHsNnE>+_x8T&~rhoL7JK%6!4S
zBKvP|oT)4PC0zSN+Lq}}pPug5m3b*%docL>K|@vJ_lMHoz28!?I=&>fY)9CW^Rqs9
zXnem>vuEc0jO9Fk6L#+}y)v)+{$j)FfB*fDn{+W|_21KTZ(Z?UoEX>VwKw(G-2GwU
zbGkn-P0f0LFDmlgp6mBtuly_fCoE3=>(zG~%GN$TQu&&1$@f3=*4J&#uW8cz`uhHw
z@Kr*urmtF9THh75KkVJ!=&$cvLQj8+Ff0Fkse1DDLt0;3OzW&eUi~$HF(HP5fmz3K
zmT7MmYw<7f*!6W=>-PToo;^ML($%1Un^uSYjo#P!fBM(cTMHNVU)SFGwo3on>H^;@
zi?*>zJ9a%-A3kaJ>(#QS|A+s$)e*JRHuBZFl<)ulCQpjmU-NrC%k12*H`IHl+9#Tp
zv3!2&Fj>C;|EsI7{;w+eo3#4<(a?Wp-f{1|<0e(@+V|wsgdf+p#FlML`TPHG_18?j
zP2sElhqLV7y87#P{nuBsLtp1zo%`K<ZT$M8JF$0uxU2qNef8?<cwzmu;kx%vZPk}n
zUAgXmg#PuVTeG)Lnb)bkvrhTCMxV0i-PO&%uD|+Tzi$0MziTVPAF;bfc27yYKX2b@
z`S?YlRoeIG?dMS!clxD#cU|~LyS=I>Un~)=o^w=)?QmIL!0*EQ|HD=4{(k?x!TrVG
zbwSx7r-QsNg`4YEU75GT^R;Mf^~-I0BUAscsTDk%ZesQ;VbzJu`G@ab)!8ho8qM3x
zCg*CK@W4JUTDrVkJ$q`^mGCK(>R+>je&aa3skV;OdT*Wa?spdP+hUepnf|Cc!<g~o
zvYwf(VLAKOn+pG$vgCVS!rouz-t`M@>yiuvpE~U0sGS+$U$Fl5?Y~uTtG~WJo)xF(
zA78gKev#K&v1OlxBHbL1M%nCE*I&;PG@<O${0WU)ROdg+`uDZye#XYC^`5T|r(b`q
zuikFH>+gzd-4}O%sjz36eV^;=>V0Q&?<Y*|-}?Kz+rcHP{tE;tPn0%qdpFtfk9wr4
zwCd$QpQcXUP<!?K>(FVjdDVZOL>#>)%KrN5vFqHQ<4*0B=Rb65>B7sYDba8G-|XY)
zRnD0I>DWQ0zokv5bIR81oYP%D)%nyPp^ptA=UWwieUJZ`rm{r;)B5bF7k)A+`n|E=
z%d@`E+p9mrnRCkY?5}UuRGfMD>fLqz^Uwaa*tzb$zp1u<#-__Zd#2fIKUtDwn?GS~
z(7QKBo~@aG>yO)tujWlkKF9UmUs+imw==xS&ck$SrLJ<x?x+9n>;HRXvP9JWZS9Y>
zzfR9_{j&W3L2(<?pLzX1x2AaC`Sh*(YGK^Qw*R_&udXlK>-{|LUFZAXbDaJz%HOrS
z{OaBBrW{Yc{on6&Zc^1$tvB=je~SGjQu0T28u#6M3=B-S9A}yFcCi*!iQixE`}%67
z-TwF2m#V-2wejxVU$@>yyx(aW*RTKo->$bYxsg9KZVL4%oZKgx9lySA>#hB-j&?=w
zYstQ*^>lrB?7yv1@m5=ZYCI13e@*XN{G_Wa*Uz5zzrLI0PEB~$0qKi(EU)bkTfINF
z{MXg0qnFEnZ9gr%>gcU6|33ZudfobK;aAg#{0ohuzWO`wWVf2y|LS$<s;gORtG{-=
zEPWrg{-U8o@UN?{_S%2{b@bPUx*cv+Pk*T0@|Gxk>3{Uiv&7~^<JZ#bBUX#9UcEkM
zceL-TgmA5$ci-G!dHHKyXw}y%(>gVG>eW4Y8PT=#o%cKMS+B1LRee7#T6DMSYy6Y7
zv!hG+F1v3{`db=vx$27hqU%}DAMPvvniBL>)sy8-f6Bfi>C0A33~e`itG~o-^7&U^
zJN|VBRq2MVmS5=qOZCOP`ycHTDyMBvcq=pS+Wwf`f4<2jJ~rC3|Mlcq`xhAszk2oc
z)t9ipN%J1r-q0*<zNP$c>rL;zm)QsFX9eE;o9>d(UCY*^wOKNI>aV?pf^wX^$KBnu
zK6sX2T2#F(=HHRs{|$|uKe$UtDy9k;)K#5Y7gcla*O8jg>_sU@o^9>^n)UV6-qgF5
zOX5y8{{0>lx$^z4PZJil#iZ3w`#tHmyO+GJTm6BYzrUW|e_}RixBI*Qwj~l(vHf*t
zcg5?qMP*;T7XSUw(!^N7Kzl!CwUYmnUtQPew>^J+`n3OHyZ@HPRn3)N_J6%g;r=sf
zhb6!LS*BNeJAPtkQEYO?o4U2asZ$J7{hz<?efH7*Zj?&M^piZh;$6T0+beGIL&GE^
z`NR5Cd#5~lfAZ@OlS|wG3h!5ZpPc&Z)fvysX;D|jmd1U3Hvd-6Jmty{ZIjO^{!n>$
zT<=-j_u404{_l6`TDQ@n_0PS%zn=bj{rQ(g^Z%WB?zL6l6Shx|dz<uUNlMK3Uqat?
zKWTrrU0b)KXa9fo)z{Sa-nrx7GV5FX|G2-x{5M6i{T47VFt2i)W!AfjwSHk`xYyn9
zzy8clvaNqVegD39uRDJI{=YB&cbNUJ*w9rjzk}k}1y?;aU;RFFzW#st#q+-=Ze4%A
zS!UYj-$#XFV*KXsFq|FwvNry!QKxOp-<Zi)e;t&&ovY6CZf*SY4ZoM%KU_BPqAsh9
z%akqFTWicEZGW+BeEa|38rkoCcg>G|zkewE@#4@f(eGxfzP~y$J#HO;R!aS$kI$bk
zUiS6pm*sMo3ypqlymfzHWk&bb;)`MX!?mxix_b51zw^o`pDkNw{q^+ecW>jn|Hjxy
zb$qS<YHPYae)ZL_*VoDH4Sn_X)%|r5>sF@h-p^w(@or~s*3b9X4b@jqfA#fs=&Pqy
z%FI7jX7oS3+c~Xuzi0fSd(W=!UEOSdE%f!pdF#XDw%$_a-888_d-g|`)3wGrd#-r~
z{ao|#`&;{<il=6~riZSWrKEl~Q~!0yzkmBnO8+MPE!|zMzq<4L>-hEIrIStnDf|h0
zKmDThrnoEjHth}%ZK^R}x^PKVj$PG;WaA6I%m3`!{l~rd!k7Qv_g=I9`u_Fgt-V!8
zqgQ`$|Mg`b$EzZ~qQ4hpdi<a5yt|rZ?%m(|I<mhGny$Q*y{>X^(X5LSf4+X~`}M&i
zJb9tT{TEk%iOp93{)(d`Sp3u9<oz5Onz>bGseksr@4nk&V5K7JcR1JRgZtstYraOW
z3H?6ZeDTqrJ->HH?GIZW*75mP$XfgBlaCz}KPGf#_WqAQSGMTd${$wmO}~CR_dkou
z`8h%HHv78nulG&T+?Tggce1SYKi}2)=fdU6r#L3<-&a+)>racRP9N{@b^6DzWuGiQ
zb#}(89~zf3zFBkpty8kLxTTk|>_u<?^pkNexAH$;Rb-b^3|+ie?qsyd)NAU3FZQZ0
zPK;h7UKIagUHE#>2mLcDxBU_Q#M<L1!H|9A&(FOT$~TgG>z2RcPJaIXROiZa@A;?y
zyT>~$Sf7&ovm^Gtck8dG;omkN{rSqvn*YD8@4Eiqf0mSA?^c<Aa{d(Gxu4n-)zbgI
z-?K43@8+cE;qoFmIr<aq{zvRxe==?VxzOe6|2vd<Qx1IJ+xpSUSp0q6?zz5KTI3h+
z{*-CI>*|-aaeK=n-1f$GuD@vg^~$`igH^9TmH6!u{<$ehWZ&9@B0KNsv$Plrp0B(2
z-@bO*&yTnC%P0TZ>)V@}CVa8_YnPATU+<Usm%pF>_b>kYZ?~86O}bS=_ZMY<iMrm*
z$x~CG(|><${^a*up?`nxQT{Qj<!|=IgukLsw<fLs6eO$osp{#kEg#y}?*4gKW!-)T
z2G-M#v&?x<vDRJ{?D+2#aU^}~>!7IXrP(6WRjx*TnYZ=V&R<9G7yUZD|7z3MR|iXz
zUab6Eoqm6PU5-U}@%-st@BUW3d$%`ca_2hrt?AkEYwOmno|?U8yH%6Mqwj?uWqUSn
z=&!%u-@W_mAIlz-uRo43J(jEwt-n`&_upRC@A~WO*4M2R-uY+ROOIax@y2pr9{d#X
z+`9gq{LBS=UyHAAoBD6+l3SCn%T{gK;x$$O+PYW&<92q&tryaoYgzmBZ;4KO-u*qZ
zg}N`*hK6{3z5DBGY|-`Ap0Dq(i4*>}>2J~>^_nXiGPS;Do&H<wdFku7rR>vxZGUz5
z?)ttf4?12(y-nR1_eVIrchkwwdFAF=0>=X~mh<xN`Ecas*<U*&&11vAtcf|YI(+ri
z>?w2Z{@e8KZ~m{HKQxxa%j{me<H3RK4?S11PZ&gn?5yXhJ#+NT`d4m$1fD8v^IFjI
zmF14o?<r3^TAPHQ7U-;5q`!4~*4HEdwwkZL`}BYOl{=ip&iC7TjbuL-9{jY;{@Tk=
z#~+@2cx~BS@js#w-=x=C6yA82Q1?Fc$<OOYf?Og3Y_iq<J-N8bzT<FK+s7m4KOEh*
zpxu~D;{DyCmtOymCr@~x>;3X;&ioRQn|Bn`{dX$v>)=zq{r0`;-Q?}B&Pj%xS^cBm
zbzSKCf{9wD5}QIY4(rALJ6O_YRbV>X*!yH<oz0xGIpIgW?lcJhso(Ui_@D8o@@*T}
zOHWbw(v>z*sQpXH-?jZ`TPl_pPS5zUN$`v9@gHZE`Zw-BD6?mY(7)YLe_B0kXVo9c
z`0@DlGNbtW7xWb0XR7dp>95|oG-}$#s_Cn)Om6yp_tUH}$xD28_UfO%H(d(z39LFA
z$JNy||AW_$|4jTJHyv7LwVxw_qu{V!VfWL$Ey<Jqf8$!CP?y<y;&<x4V?Pe<FPph_
z>gxSR6X)LVu>F+meCyNtDS~d9hkvvkHu5{S&ox=$;{Oybiy8X`dxY&-`qd}rc<e1N
z<*)aS<P0)?KIwnd`iSK_Yf=&fL{}&W7d~#OU%hiR=bWND%s);xFjzPop0L=W;Y;?d
z8`mXba<4Eg|D0HIr0Z<e`F;66`8j@AJ$L=}N6qQmpR?;Z9-I93{cQNrZq?3{?|(H}
zZ?|UI{yzU=$btJ0_r6vCQNQO6OX?BJy{UpJcfWtL>6w<VY2ws8FZf{1^B-O-9P0PJ
zejneo!|S1N{xL?*-|Ed9Jm-~ddgI^wKx@YTYx`Bd*Kxj=c3kZ^C5m_MLen40YRiqD
zC#?Fp@q$0^!>wz(XY5zW$SJJj(v7kAGfiH!WJl@!*9PJCYZlcACfW*bx)%RwV!`x0
zQ#P+<9`{zaPm2D{;#Rx2EVw<h>O<&1P8GX`<F4jaQIGT9AM&60((~7?Z<}`*O%CQY
zKEtlTdx=XlX1CDK8`m$me0x~)T=GDn+WB7}{EAj>-jK2-X1|Hi4*QR6y8`0hD0;X4
z+im#y|DlwuU6T9Ma<g~7+avU1(u=Q;(%QNg9)CLh-~UY)3Lh?fBl_M(wJpL~qupxJ
z@9Rlhx3i~d{8LysRr%wk<9`G`|4Wa$_Irb$!>{Fw&;65n*Hm0Gt%Lobv|4HYqm7#m
z#V<USt^f7$mW{XKdMtMRJ^t?cMUBeq(_inBypa$bF~RxEgJYR*lO1=?_`Uk<XO;Dz
z!lZ)gK2`rM4muNm^4I6suUTIMG;S2GvShpvC-11|zajOfjY!y<ZvW~vWn#fR*)A@b
z-)=2leb8R?U|;k5I*zq^|2r0y_e)3G9sd-Zy=?#f<9Q`BAKweJey{%i)q2m;%X{|q
z$G(0NvDEib3xCt{2fxJPS*4d(+tzhYz5DB+{-O=jUw@KXal!M~-IweApM3rAUc6`j
zUx_Y-XWs98153*id2{U*GqQi*<=^$ebN7Qkp`}YpAFMC4_-nJA)A6u;e22=foo+sk
z-^=fpmOj;$zP|U}UU9B{SKoKediU!0A9vs7e0t{h_D0S6o89~T{q=pP6ek>u_Kx!m
zyruNyOVm2oqWCbaYl}GF{{MB#gMoo1!*P~*?<>|qq5BI>f6djt#`im7>DMCPYwNpT
zitc_JynfNnU(5Jc@87+8^**(&F0Y^bjoLB6BTlaF{Xt37eK&-!nZJJhwfJq!uejY|
z9s94o`W<()h%Y?r>+KKcioMtGRQz*m_pKfC`;Tp{+5ea8^Q9=`f6M&B&)OdpnfO=#
z>iV*^zx3<&t&jWbbMaosfqgr|SJ$oEy?*vJ{%d)U^uH!qU0=U+{Uk?kjdj(3Q*T{8
zTlzh2*ZU7s-+bTxp_hH~rDc^L@2{=f81`D~Z~WJc&M-~es<_a)m7U*z{aeGG-F~F9
zUNj_W<K18Tv&CJm3;&v!>a{j(qqpE{%hS(Z-(Bo?w(@nz`S~A<SEU@j?a&-_p*BYN
zYNMh2HSPJkzUHlu*8jgc^xxi3*OuMYFaC6c*YI)uqF?{^{_tBd`^JU7bN^Lxl7D>^
z5>&bL@TyvkVy3kA$89G00ur1R>&%1JynJ)kzvIUX^_zY_!W?6(w%2QYnQr>&_jC)#
z$$!K?|4GSsAe;JskE5klh0(Hv>qmY{Xv^)DJ@p~M$6Bb>^76t}83Mm*>$jhXmD+!?
z;BV<qPUqeW|JFv=yuK9vIgX?B(a&AnI`t9O0#Vn^{&{gM_9)4|v!lR4Of;S~;QsBh
z$?2yj*;IdR+AE~9|8>V(Q^(^x_BG!wu6wu6?_$jCBNH+N;uvmglsl^Hzka`yuV&)1
z_#a9(88Z7~XUvGSNcr0Q_0*pDN!-ihV)nj&vir+=!Jt16U+=$qxoSgPk^ZUMfA5tA
z{q%~NU>uhe?D=n-_1~mFty6vbU!A|%UVEYR;o|my$$z3}G(Fk>A&~2&VAbO952n4j
zVdmB*zn^86M0I7HVh8^<jn$JcR{6Vh?G?QGXyJ184G+axlmCjY`Y*n5%{>zZ9RX{W
zf=4soKD>BbU|&VN{`VtHn!nS3C4UmT@Uh)JZ}mS;KA*Yj@ogFcFDk_NGZIRse>tYO
zFIaNYqJ8oImfhG{E4KVoX!%3iIsbJ+r+?Z1sUm%kK8xi|1^?@>{<plzFWf9E`eRb@
zKC1+yBl{oi5}e@|{73a!fYpo_`&r^LjpG*Ss^4D!aMSKEpI?k?LjNpP=)GOn`Rmai
z*XplD^_HS5zyA+Y`EIQCv&Jma%2SDNe<JUlC!eohIr~HJ)oVZ9x23Pwe=hj?W%u><
zkChiNS+RAMUVOal=!1?QUHNt2rhl}&!NYYriI>0bYexSK4ej&#b{lQfC;$HcC1&Yo
zj=t{I*&prdmlf9^G<0X{U-o_ZAK#mQ?#90k`pG@xu6>h)a_#x{{+pIQ@eh^F<>n~t
zm0tMPd~LbMmiOUxLbujUiZ^`z_THiLJGbAz+FSj#KD6Z8);Nw%fsGI2zL?yvpDiQ!
z>(k52aXacbetA0nSo2}}jXPzN{;qG?Q*z>e&{IX_p56NUR~-$%q5SEY=!2g1o{iU^
zEcyG(ZS&shW#%sT|E;~Hec$VP=h>9qPqt*NJ}5t{=TGZ?^GaL$R~)4uHMXiR<SUQk
z{Jmc?Y<cZ3q02VzVc%4XR{!<lzbGg4Ejc~_v_8N^gp<|jprb{Ko(K<XOJl-{2rUt*
zPPT<kJ6zWJrtiPrvs_NtbGoO#)_N_eizXKP7XAORKYZ=p_-k8E|Ni@R-lTnEF8dVj
zpZdIh`RVI_&ua1g&zN#<USE9TI^oxgIP_n>m6;%OP_S;AmG(TAkIT<SmWs>#4GwxY
zb;hoVOUx?H_!#WkbK}o+#+xDWDt9*a$ArKBBDB$KqrAZ7d6#VGS}A^2i}SPJep#R;
zLvpLc`*#U8wl(&47fQn4yv#hY!L#mPf`Oml6ye7y54G=lIjov9;ZHa_qfW>~12rRF
zr?!LzAvz&~EpDst?tZm9x_r9kwM*;E&t47nE>&xLd+?K9`iqMBt{3F2s^))T*u(l_
zqIAm4FDG_$g?{oD<_-P)*Ob4pm|xkwPJ2IV-{DDn|L#zK7teimCTpDt^A+c^?LEJu
za*mumm~SYeRmjvZPvj+o_7eXJzZcd^>Vs8ho!=(!&ig*$(RW`4-tf&QRT)d})vamk
znrp48`#r8}OQ@KiaENa4x*u!OPvs~|`94p1wkfDuc-fDR%=MYg|0gVxyZ!yYaB2B(
zYk7k!JoWZ(PaHh#^Z9D(<UOsaeqrp48Z-a#O6XkJ*s{dC=xbhO_5(?SuSZSenO<Fg
zwd%md$<tQX#(DM!w60t-A=O~v>xb*tzjXR9dH&R_z?#^vR@tlS_lHL5*niA>wPNYg
zrS~%Gm^c2Iyk~K(dTFRY=LsPx3C1;3W=UmjKk@$Sz2@t7aW2=NXityb6EJ0STLI%$
zS5?sju{|dO9L%4IU(v`5+0k-yrR+z=UsAWU%vMWZ4N|yY*sf*RZ!-IF?qSIa;Wih;
zrdJE?ZT#v|gCCc6{|L%F%J=BSbgpbC;q8S2oO%{QhIgiTrWtSjYb2idr^@;2&-TT)
z^uKxf7Vy10c%b@t6Z@8^=X(@#W*%VFxiUpVl#A2pV8enL25KU#tc(s5LR5q~1)UTF
z4g~0kaLinn`u^@Z>$N}c#XkKi()GNb%ilC}!PDb+XUSKU?f-w-_RHIHg%6uGR@}4o
zme}aDw>2yER_~LoQr?g5^dGsX#-*R`Ao`^1;abb%ds%!<^JbqfE^IrU7sTssB(5zk
z@mldgooIEw>XdykvhzN)osZyo{POL;d-5i2C)F-9tg+5=C|Vf3`;ye{*I~7{LS18y
zW^r846XImBxWD*w_*>ntFV<vu*%kTh(&8z7qTdt{q^UILgM$ocY1Aq)9+pE#9X3ob
zP!Vcnb97u0p&`cA;xqTk{>YEf`^8Mpd^?+1vvtqu%u*AJww~wKZrjV^@AEtVVcsKt
z_1KYn^Pf&jh*JLi_5DV5{%3U(cjbNm-?$J}8un;fg6fgPSOrHNGk!(cV}BxUFIiRg
zkpKI`huY%o**oUVdg7lm=R<nQzVPZJS<jZPSCQ5(OKX;I2xt1U`}@=PU1#RKobZl+
z=b46z1+F*$OnbgQaR1I+590v->mQaU2rv}CeEW^BFMMmE+lgDNV;<{o+T8U0*QK)e
za_qfRR!%<6P{61XGU0-XbZbXr!h#45BQB0Z3msNW$PzR@de!q+>2;m6Ue`RbUgcIV
z$yzeaqFwE(%;f3+?!~7sJ@fzj?^@Y!u33NQ`&9qW*Qt#!IGgm^H~zZG2m6HUGMkhB
zIlq+>2;cm^!E)OQv*r_*ioWIr-u+oW!)My&8zsvNTkn57oVq$~-9$a@a51$%%k&RD
z&{(^&_TDNH@41^#t+J{V&9d2lA}&8c@5~7X<IZDueb<=8JCz4kc5dF6|Ln|Sdq%a0
zV{0tfjCw2c8zNUpc^0p*0A;xzDT9ntMMpiS>YngkYr&B9@b8w~<&&D6*H0*U<ZBzd
z{qLWg;;+VQa^IXhG4&UhSwl&~`DOQ(8H+Z~m!5hkSm)jnZyhPVKb`Yx9Ohc69(Xas
zP2Jmv<@nV{zBgx`Du~#c7j3xX*7Uvg2Y$-0yl5&Ka&pQ_)85N$R__nggn#*WsQZv$
z!D+vfOEadI9e+{rN1s7AMWDlEuZMP^fq`%{kLONSzM>7xGe7UvEZ|CSS^2UlYtj1P
z%c5D2L)ovMR4wA)E*rIS_v&qPviIy4YvXiM&g!nHzoo0tz#wSGbb!%3;cM+*h6fA`
ztf`K(Ecm!s%l4Un(+&T$zW#sxzrFYWfARWm`v2d5fxVyCmS27S>-%f@t?#FAegFUc
zzw6Jl-(Npj|8$c3TGiyQQ#UKD`&9K`y~FD<?~~7;Ilj+0aqMu%=3l$MiiMV+yes_t
zZ^U1Z`D^QTRtbhytvCI$HEz@QFs)z8m;MNP`OZ9BP~En0-}bep9}l_2KmN1l#?Qj$
zAfNpo>zrJavj0zd|DE;vl(@i^E`g`kMt|M->FVR1!TYZEO^ko?(lGkPj`_1AZI<kL
zQ*&qj-d#<SKHIK-xz{%%bW!<<y*B=@FIDaRbl>LQroTngukV?)_5H-yyO%>x9{XOm
zH}(Hl-H&!>_I}x0e*V_ek0$&7sLv5#{k@89Z^G`^6T*)aMeY5l_4mHc_47gTUPq>>
zCms(LUD@|z*6v-qXUF}m-hX|6#H#n#cK`MFeD(H3CzJQPuv@RVO=NOzR?ghh<Z6C@
z7fT_tZB^5YhuaT#zFcLuUbtt;gZ{bRL9ZKk>@<(rwd(J+rB7F{p0Tg<8<%=pA){SJ
za^TAL2VFJy<Gbbw<p$+S9KP~GzbWhaiKN+6^YxZpijUDgEH-nqM@s8~*Du`zBhnvz
z=~lnDR(sO5-Ql6CF<X58#oW#Kr`6Z#xK6=_|HDk~pH7L||LWI$G?)`>{otUqAK!w@
zKkL38G(4LAzT~uMoAPYyKcN@6lFb%s9A!T`VgApWe0h_3{y%dHuSo5?oBm_X|LxY=
zlb=lTUOoTGtM#tkUsvuCDPdOYHA?Jdto@;*GN)idYGakA{i17z{zVfu&ba?kuHz7s
z8CSM@+LKBD_9dBJzP`FH=-q$$yn8Bn-YS)5_OZRI|9rJFs{hp0-@*U%*B8H~uXe4w
znDjAnZ_%&tQf<%IH*M{oeq17Fp;F?Nc2H;UoBy+HDnkV1WG5Z5^7?owIXtZVo9vnt
zk5``!v)9~Fuyv|8?qZ)he{sPx;i3}XN`uwMjvfDFzWADlR%x^Iuix%ndjuxME*Fi|
zda~u)#=5xpy;a{$mrj4SbbZ)6pRez)tc+V{SES3Nd&cy?c>11lyFETNH+LwA&l7RU
zpS*iN=NY>quE;;${|)Av|4=e!ntc0H(TYF2rLW#snYY*W^n-5xBkDQ(%RS}x-MA!W
zF85=WG~+5smz)**C$uKZ3rK&}yuoLF?|lCGO=~)Tf1NgG(%;m&POCk~ZG0G*{|Mdv
zqx{t9!^!vYQyg#1oPS15F!MoetB1L&UC#FM<NvOH^M0fk@9t5@Y1kRN{s>2l@5aYE
zY;3;kH#@y^-8$Xsr{IS_dY=2Ge9tF`+n;>@y6Ms;NtxX}Q*YGV%l~M)HvEr@^Fkvo
zW`ndFKa}jAUwGnJ|DkU}skq`x_nX#V)z|+~&Ykk0Vd2JeNh?Bn8|C}k#Df_#WEPs-
zkPu>jIenx1p;^qUJ#HL+mAroL)8ut&+TlU9F%#CrPqOCt@J97Mhlhe1x0%Q!*E)lo
zpDJqKPQP>7S$DPm?)`V(cdySr&EN5Mi|P7@Qj#mon(y9!ef_u4_ggzX?)_I(3Aq0K
z@qxy*PCwjIHs+l#`EtEe?{-e^+*$vEM7yt=D|0Wd??1Ktx8dh!-A}$={dMqAQN>1?
zol)_EzHh7}>mRIH*lK({QQEn9j(@@>;i$*j2NjJvILq}9O3eIx`m5${b<O2<9Gw#;
z1^scoy<zR8I_-5EOD6qjn*96y*W$1V%Aa08{j2r+?(e$#xW7X2Gx};S*6;sw)R_Nm
z<nKxH5Bk18{Fz@;^8eA6XKC^Evm$0V${1a}-?n?#@9gY^?b9uJjZdBa_v`z!-LJRy
zrhNVX)V28O+vrEvbpPM_o*n+`a;4AvzxVDxw95&N{bdtc{-EpkmwCOh``-EZ`A<Jl
ze!q0p=Jm@|@0%~0A$Wm-f#r|mEQ>xZ)}o95xk^4o30;33bZ56vmyZAMutIyI>#ytn
z$Itz?{??~&8><$Dzm8v9xAoWctgo+Q*9G~lzqR+))u`qCE2r6Qs9XPQ|Nqt3S4-Ob
z{~vwQZ~OX~eQ_(Lzh1gpvgvKQ*0udnOS7teFVB88#pwMtf4whzzc^j9%nTB}zj|`j
zm4EBSUrdN^N}e#=fx*&viFobmiA<gP7tM=b?X24wFSh>I{<1B9t7aVkU%zg@Q`TRh
z{co$@e>L(idbNhz<<9G=wy}@JMEzHNnA)CJUvcVf+}^6UMH}y~68>9zYj4%zm8pV9
zel3r8z5b&AhT^$1Z%%&*-|O<~adRX8^P~g+pRN?zXwRK>_1wGXUy~f~9t-*+5V&OC
z|EIf^{=c_Vs0)~*TEM0ue^pBT{JPI=Z1o>}HQA*clX~~ZEsW24JpJ675dWg6jWMS_
zO0NxHA3rThL0k37)D~-*d9VNUUzsCu*s1B){wIgG>)L%Sf5Rdhb#|rnuBY#JT3$W5
zH!JD?t6ism?bGfP$Su6rD4_YHqVd_j#!I}Gl5N*o6BpcY>InC1{OTwh(`EmxZ<*QB
z(&yIoEj{u_e`FQ6NbUMt@MYHYYx)O8L)$g>TZ)=3O}<`qXJ5B?{hH7RlcwFCRHa(S
zWwYNS;dbaF&6iJ8zdpGaA74LV)4#8WFT43qbV*d!oT=sdeD#f)Pd>8r&95=uqt}1(
zN&i7{jYHK_g&)7R48Obiu&S{B>g(%6Hdk!&n`C0Q&`fh*@6Hb`57s0wnb<tFjhOp+
z<Ign)&ff}6o<8`YeCppHvALTM@8#z|<IODXc-@OrhtJ$+X<c22^v3nmPnXFGeJ$Vg
zxxDn$-uo{9*6Uth-P>gSHg)>@4>RS<Wd!E%HvIFbdoWk-R7}a|%T;EQ_1E`{gfIKP
z{_OL(r>AT8pYZwrMOW#^WVf^XHl3`x{?K~6^XXFxb)07J9-c108?&!}`iq{4=9A@F
z)WTcz0y;Mw-C;6cYNF~3;cMcCijR+3{V|zd{zKVb-pu9D-|X{-vw3Gc@>M)sqd!e5
z%vbqK{67Uj9{c{wH-9WNIx5`4a^dSU%d`D9K5YML#Pf_Y|Mv?X?>_wh@y@iL`CrO5
z{o9wlb$|HZs_83~l#V&vZ~l~K*c&MR{`=9d54w*{7N|RSZ%cgfd1>bPuWG0NUnJP8
z{^#ZqmVJ{xte@)W%=lD3+HiCHKW!tX89SY&6|Tr1lH~Q1;EcF7<F(QJee%2SC#e`}
zOgNq9D#cmp7g97UeT&;W|ET#_J8L9-jxU^@ao$fV&6VHt)Biewjf=Kz?>k$`#Blom
z3f+ag=YG2hWawB{{%4&ny(+c6*rSu{)^**zEW!4wxBtcbk1JlkJ^Y7?+L1`B=_@{&
zHmhrK?K<qedA-WN|5KI*r~OZn+x2k8*Sc%__2pK-E%w;8HgxGO{ny_gEV%J!g~<7|
zBXie%Xl<W-D>C^X=jmyt8i|j$uFHM@!SkXVkJaw?)vLbj>-zfY@uQwiObo~5=N;5{
zy>fl>>3Bo$>DT=nrI+}-SK7wCpW5M2B(eG<YsECln$uqzOV1pCwE60jd-Ly?F5lpq
z=+&~qy7tL2Klz9G_WhU5>wn+ds#~9?`cCon=@;IHw?CDy?p*zT-}|dizhwQ{vi<kM
z`!9q_U1!{1(X?h}+oIB|7wi8Y_S+*cy<E|=ek0%iv+4C$|GQUeh3}5Pzw6rGh<AJA
zkG;))U#gh*OOYcj=>MO6PS$Vs|9!x~z&6)$mL;DTYxyN%?Ok7AeO-5L$=#y)*R{S{
z*91NNwR`>l>$}vqzWSQHd)KaI)35E0p1W?{)g@c&uEh3zdzBY;m2+*0`q!k+g?D?(
z?>f)){u+MK{Kc$aaepK1{C>at%3^z;FaDbTwbIHI7QTwFQLk1Bt$s11`)^hDq^w#m
z&&Y*eU8nEb>fxlnd!1Tz*m2Ld7j2_9>aUMq8~VNcZq?QJ9ZE_?^|5zPXKZ>t@9EYR
z-EWir%$0a~ShGB6{pnTz!=L}Hk6x?(>U+(8;k&=??2ns&jce=mS9kV)dSC3GY_hZB
zl|aE6oBOXd*T!XCTlCI9Zau5SiH4~{x3VKArQT58ap3d=zW3{E@Bc{WkU9P9=ic=P
z&CJd{Q%js|?j^lCHgxsetE;D<+Vr++`TD5NU3JSg={2fn6s1p=4rdi|c4Udr<y~i0
z`L=3(8P}ry5=Zyy@l?5)zkGVlfBOIJC3`#8rt0L%_Ez0K`TAe(?vtNiEt_p37W?Z+
z)zyH0UvUlZihlvVvtHRwHJZEO@BN2q3P&F}NSs^wEV|FaadG^H5H}lZF?Kf-J`pzG
zmjAK6?XJfXS$NOvUt`c$c4skb8$-eWdwUKx+*LTZ;5`4kb9Vn<{jL$d?A>sDqW!Z4
zi<GT71n;aei<|#&r(j!f@I$%xQ{8v`wR_qwcl*EilOJc+Cn_D+Gv2df%6$J%I&zCT
z=X1WiT({MHee97eE;E739r3z%3lz)m{7^W5p1W_3uIK!nx#_7-uKJ2!Pw3|1dZC~9
zTmSvD$IR**-0f<%yxm!|Z%>$Wg>BY{>X;iEN4D-QHx?5Ls?>kb-QRh*P5<l4_`Bch
zC)qFh6?E_D_4S^`Rr=uzCG*!Ul?v+I$YsUV{#z|2t7bvFM(U|B-8*cSDSx&)Y;{@E
z{_Tfm?>E+{ZRbm>j&}GQI(#FXbJ|)*m4h3$3%H(XniM--+(z^M=3-~{=RXv$`JDFp
z>$0<Z@^;(s^{3)E|CF3wvHJe3s68U-Ij@R)`uWcN5%@UQB<9JdpT(bkK9p8TITYPe
z_3Dx5^S@2S-doI>jH9Y6SoMxnRZ3>02+Hr>_3WTp{C>@yPxf<GY6&Mj-2c^Q>$jpc
z>-(kuCt6AW-gi*3an_85p7ZNkziTvyGv8mh<^Q~$Y@FG@Rnm-qv&>eK_$&DO>g!8)
zR_5Qk%eU+>Q?K<uBclTLgoGnIcOQ&d)4o8Uy+y|~X=2~50t^35J9b-&KU`U6JA2Bn
z^`&p~-Sj1AulLc&T5mr8^4_T(nK#z&W+^tj+#Itu<yW0hw?fCu7dH+#)Rgo8DM|6_
z<bN$GuiR?E{ryJs&Y()4xbFe|oED{*n<Wa>zp-tI&9?Jj{>P)EGScS_!|%x!u9EB<
zo%}n#^ID%huJyssp_}#1vr^#|J5;Y8v5(p{eZx9|X(FAao0!zkaGQMF8FWVdr}yOJ
z2g9HC-8``~JAUCTb-ts=7T=qA@`iGod*u7D1@nuAG@AKl-0piYWz`Y(`iC8}ZRa+<
zb8~ap{CI`z55;wRey=o$aFJY`P;lxO+u8lH&vUCCb_w3}N#Fh9LzYj7-K`%kbGDrQ
z<h}o(c^^lokBWWD@A|^0vRqQ({AyXu2RmoT8sxWoe3jDJUa<5LvuRS}Io^{q*}g?C
z{<>2qHDt!CKC!x!kAJVv``~T7X2z)_r~lhHC$w)i{4TF~@(XiZ@1-Q8DqYXD+iUl7
zsq8#A>C~!#_LSv3oW7P-+pd26oPH>DOH^lF%J0p=?_z|)JvUX?yPmvN929&!n>|ft
z-O~5<-{d|Q==bb8=9+ZbL}})oo8QmujceJk@ye0o=d?2Ki25aUCr%Oz-}rt~;7TSL
z-j(7<gqkOBK2cfUzH%1p9YNns)pMs8&G=HHvg53BbH`l2`$9hYJ9|@RPQEO?clDIb
z()F)SEqWiSYu|FVZ^Fi=b4}lKoBU+c-bHO`-hTeiy)@|ue~Nw=Y-;Vw->!dbA-7s`
z_gAsW*Y%&h+xs(o_w855*T1-!74G!t^sC+9s-9f?=f6K(u|q=Ag{7qP?i0g#PwT_I
zBfme`Y(D$ul;i!uyW<ubAA6Ht{mW(N$N#HMvx2QlRZnJo+ON2#Cgne;uM6+wkn+j*
z?}w%Sy<6!T+xyOcwcg#mO-*ado<G$)6n)1sc;eUPg)GH0-tX1D;H`FQ;gtE&5BW-;
zzk9TO)A|^rU;oz!c;ESRCfRlR?xj^9=1-T}`}g+AC;#2ctFl)|EZO-!@9&dO-)~OW
z|NGi6{{2ON&+->LU1Fwx3SVR@e(d_Y>$_dO?EHHlR)2AqckSBi#kphl2hZ}Sb&mdf
z7#Nrh9cNkgWwDmmZ`Au+bNYJut0#Mc_RL!!^rQO!|L_0*#(%&6>iyQDb&lM1%h+Fs
zZVg)-?%#Xm`s>)${A-N$|3CT8VjEkus51PI^jW64w{(IFO*fjpnDt|7QoOod-PW)_
zNyd8dSO1qL-}yR8MX~w6%=){<KGx4yuYNlD+Nw`~>(y7EeKCEtK#9P)=#|T^C(hik
z`^u^-?*Atr|M&50(%<Z@>)sUKijRAzeoenKykmED%fGW-SHgwwzh-~U{;F(x>HE?y
z|HvAzJ#+kz++8QK|C>^N#OluQw=ui-hpX(KT6K5*Q<aLyzu)%8R;23-p1J$|mcHj}
z@z>!;)`!G2Z`Z1emy_bZV|(W}%il>MVMc|f#dq&!x{LpxwKrkE=*?Z8wz|I#|F62x
zwPU?&$-Js#GnfBA<Mw{~zs*-!O!MlWuaYd$JtXr{TIz(#*QbA`=88XF|6p>KE027W
zvAV3|slOsmRfRu;jvM*BK74&=jc~=8r&1I9)}QrNVf<TXy?X2Sy?drvXk1GC_4J3=
zkKFB>>osPv<-C3WwYci}jvr|{|0n&JaWnOc-{cE(!zbLdnHTr|>#Df@YqLuKChhpC
zzx%HE(w9Y(Px+<F>x!&eeZJeL;=-d}hKrp)$NrzKHU0XWF8`GMTl80-N;mrT$(wJh
zzW(m%DU;Q<hI>ETZU27uws(L2)>$roePPwgdOm)irx(QbwS}yne9rZ^#?OuWU-RtT
zoAqaV{hE0j!kg@lmCX8M-q$=m=-&SQ*LC*)TJ*K*`Xf*4x{N<F>-L09<d|X6GfTjG
z;-b<=8!s{)(7bQ;KkCnwbuaeUUvd7rK4ATRuK%u6V|Lx2(09PE;oqC(KUF7jJ^%0|
z&u`a?PszW&gmhM@Dls}2T)3S5s&r;&-A3;xci&(9xc%|gSO3%>75#d8dU>VIlX&O0
zcky2f{KLX$+*<xcFM0YE)w|oXl|Dv)@W{(Ft?aA%`||(w{hz{v;<N6quiL6VVY>h4
z>(3jfI!Jvu`@Z~m>+|v#le%|(aEe~fA6kDl^y=#A#lN(Jrq91qG<W~W)Bbn!AK9+2
z*}vYU(sz3H>x(x|e*Uk&DEmv?>Ts?1_sjkMUku)QR!uuRpi5ByqQ57j!OVX>Rt7UQ
zol>>dEnOGERrb4reQT>@L)pa4RqkckN5uI>c(azN$9IQcS#)p09^+4Mjm?huZP8og
z7b|>iQ|q?B)6&aVU3w8ebI;DKG?!;;$_9tB)w?V_&Q6$A#?Cxb(n|dCyn^D#et$L0
zx^ki&wPt7Tn$T3PQ_!O+zHZA>-gQ2ggE%)FNaYY{I=N)hm9FqzlQ>Tpya-5JzCyx5
z!_8~0bxXyWug`*RnA$a|EM2g=E%S$j<-E4J4XQi%C%8+lXbEwu+AZOKPE_>$t*^1M
zn+0A!WeKvpbaU!7vllNGF3l||i_f%)w4H0cqp(+Ee)$7~D-Vv86`F3C!)SD+M@x*0
zv!gNbKtzU^m59?qL(ZFbuHE@l^=kXxIPX(i6qh|MIazw(&+*BV|I~Z`p6q+;pWW^H
zl~>Bk3nv$*|NPt{*Y@&Pwf#)4;J340TNLWA`lq<PFl*t4TtA_b1-g&zn7TJ~I(f=3
z+H&7<=6dd+=W-Rdf>qjhghaGMo%4#zWW)~~I`qq|Gk$XBT=vLEv!;iJa~V7gTfXm1
z<Tcas8)=;}-~X%?tTOLhtpBV&_*>V+NgQzn8>TVKKh$1%qW5XVxkZyYEpGIA-H~B5
z5Sig2Cf&x=m~dc*ffx_#gw{X>9}Pi47LRA2`^w`gXFXF>nOU?{ZPnBzYK^ab>#voV
zWFM|6SNVDWckP+H^pz59XMTIew{<j}w*RYiNdBnM+uNaQpY~e3*nhPucD;;n@j{co
zvt%>2Z+)`#ef4*h_u^+1<0AxK6$ISXz9PyVdH?9IU4p4U_MYp!@TFQrRO0d8DHf?$
zu3DFdEix7rf2zH4s&?tgiHatB?riv8UEim=+QR)iZ_qVai|dV^2`{rxW<HAznl$Nc
z+r{)XzM^L>-k)C2y{2yK4hu#Jkr^6dT&=Cl4m%<Y%!F8*850)VP!SR8WLYG<fBDR*
z=j->ko}cSAZKj}cu$GSg!m#`Qs@S)l_`WF4KkRm#ZqfbSJAdu|k(Kk&<o~o>??s=P
z*w(L`F+a9S{G<Hz*_`XP*Y%1|`I*O>_vZi44BZvmxlgrc-fxP3(%P`MSYU=<&La7m
z{eH|-PTudmZe`nNYqB@=exGZ}S*17no<G+Kn|wMfwc`itpJk%uQgd&4m|6GD%?mu-
z8Df<7Q|&RwE~oB$YFwKWF9a{ScHMDS(XKtKe<ifGSj5OMDv3<d5aZ%(RZIZQ8i}y7
z9BNo_L&KDt)#ae0hKG~8se9MUpu8PHTf?Wjga)_j?cld%K9)b_|EAuv|LWXQzS_Q&
z+f%mwSy9x#{a>GKdp*x_a!q-E=I*4qlLh{D+}?j7p;hOe)c!#6|C#%z-u;zOpBd}Z
z&M-OmqExortNm;mQ>AAyd}3u!K6UZ#+qGHMJ;uz}7cKT&;lgYbu;q{b`ehgHEZzHh
z_C%)!!^Sx}roy)?WV;_9`d_&4u6=x!o$$#QXPyVXd_5zbQDE`c=@kbYzNYPL<DI^Y
zX;1Rvn6%`J6-x@fodHjg`H0A}IURD?us}ydnDbC$!is<{5w6yzri2wPDZ(7BkM1PD
zy%HFc_tZ0}bFyZUQi)yqmoBq9{TKF=&G*&t1TX5{bNuU0v$CVCU)QM>_1}H0R3M#y
z=U<`h7TILeT#vf{dRv}dDV=_o>F(cCj{18mFa3=3eEZvP&4exS^?x_<1mrL<*WOb&
za{kC4{c;h%*Gn&6%CKLrz<WO~>e8HV`})?4r537gIT7u{B(iJ!^xE~Ump}g6`t17W
z{{M4+Z>ZcK>ApdYt31BqR)f-q!q-n#(oQU^*j%<J`+E6dp`PC_SwBsf<hwf2KH2YY
z)#lT0nHw0jL}p|Ni-0|xpp(v}+SHh^;6#!#_uP=0djG7Ip(;K*!*{Pbz0_38eM0rx
zH(&Q3*x&fa<^T34fm^EnH-B)={C#9zf~EMS@BgEI=H}VCY+EgL>6+2$A7MX!>*^ls
ze|qCLQ?7OTk^_5vPl|qyulE(P{(hqJR$SY&8wx>%mwvgFc~*+I^cBzf`z@Z?<G<DI
z8Mh85^qvwePMx+|O581P?&kle{faL&-0!-gzvZstL*LA^e9D(^ef?-4zv{0qyL@<C
zl&ei@fm)v8fiKs(#2IXS*1C#j3ce5Tnp*ZJbXl{V|15R}1{N{LSyp^qtc7JQUw?g#
znYK4>@2^w-aqGk0mU^xgOFr<0WA_i$;N>so-hX{Xv3lL~{SmwWR&RZE`ft_Nc=`23
z7wz}Idr|ajd)!g|*VmhG{r;L$7I*T;)b#8Re@j2z7yh-$zxK}bTc0*@g~zS!`gpo-
z&Ak7wt{?fc>3&G)_g}BSo<8Zi`ir~u>i1t?i|k)Le|`PiSly~$*=w)vk9!*%=cLo~
zYjx|3r@KQIc>n17nps=)rE1^rs9AOK*Oz?#=3jp;?rPW8RdcK2!|V1<zP9A+)!1M6
zzfN`ew^ixM>uXCp!`5YOt@_%%Xn%ytPTiz;f0N_ePWtj*tGNDe)Am<?>Y5bgt{;tm
zeO1SIwfBR`yN>P;&gZ>$edVRS;^9x!tY(~keZAEF?DZq-Yc~D*@{d#b5%U54I<-qa
zybSOCAM}OUhtJsaGwR`4zK~6YkHc*D$L#<0&*zo(^}?OuG57YGRb78Avp#0^LDk!L
zrXQUsd_m@5+tOVhBQD+A<FvPb{foUydt6@@d=k8C{aXIryT4U?vv%y<bFAgh;^+JO
zH}E9!+n6xh2vkmy<v+W}VlwxQxy$Puqj!CYy1KUNd%AvopZ@7;{ljJVUO!Yk-j;S}
zcmJ!`Up$@UWLC-KeVSEq;`aZJ3+}%zymx=SXPe0V`W6nM+@*Yb5^nxX{<Twm!t--`
zgrx0XUop|W_uYQw!Y}Fjqn?H_t=PRKZu)h*_`e>Wn^{%n_qXQy9caki{irQ<I;&&-
zgGCd6Ix=2M@%;Wo!QtWC`O{y2xcOn`$;WjG2Yh346aQReIbu_rv44G&?xB;t`-6G?
zTV%O3_G{+H&RB1w@*yntuI<AcuK!NH+wY<%blB78byE5w@qH8HcTQiW8r~#hF2?@#
z$JvM<TKQ+QKJtC^{W53Q3Hv7fvIn=HoPCgd>wK%1QRS}>haOkEuc=CD&W_XWlPPW1
z|EDr(#c63tXM4W%hf8K`Ww|A{`i1=sWtEDwU(44e^>lCx8EW}`{2085bIxaWDZ}}*
zdu7V@@A1=)FY(*@diwu>hsh1ATh?${F8?p+*Bn!D{A-u%tEluZcDw#;F{;~Dus?kM
zy(MYe{(n)mx_RRGSK0lagk?JR3AcpBIp0ta>fpJ*JD_Ll!TTqFA3wx&zsWLUf1r5P
z;;&U-W4$MTssHH2F?qT1ulIkBZl2dNfA6kY@87Kv5}W%ueD$X4`d1$UeDA$~cXfYc
zWc~X60^xP<t{voDpT6$DdwNmTm1)k-;gSD@4`lLBlP-Sx^nYxG>G!yxy?5hpEOO%Q
zJ1*sO>aJ!KOX2!uwKr<qY?du$RGu|I&QBt1efyEzC5_9Cv)-M(l`-%8{_gjpCtvsk
z6@{-0U9;(5XYenb{cg>l1YbP;+Wqg<f2ZoN)nB`}_`S<_Z`va$@Z);ui%qNF71jK|
zd@>|s^%?hS(3v0K9A{bef%l*M57(ak@9K^7zrJRF&3c&^zb5>(XkhKXb^m9@{j`rQ
zzyEht;niQCl?t_HR=)0>EgiM<wVJ<v-!iM|WxKTNW%ibwzQ20b{ypI*Ufu9sD!+d9
zqpO$izYZ^YC4Khos#X7Ys9s!C;Qc@NdL8He^sN!IrN1&aG<R(NJN=+b<Btb3zGt34
zs3>1w8?f$0OW6M4t6490McPJfjakR9X7t_VSNz{vAKmZM?*|1|dCw`Ip}p~vecj%2
zQ>pd8Ht+lW{)2~8rR4SOm4C%{AJy02wL$f+_^<7+FBP9Xdu#63N9O&KW&iaPj)nbG
zfAY3^$&d3~DU0mGR==FSerf*oB|G!4me%c>eB{}lr&&p>cw1Nhw2%88x6V_0b=>Ox
zF-v#q?b(;F|N6n^Nj=4fEKA>9U3mSyeDD8)BMS3gALRSA!eIL=b%UNv!(uU}__)35
zYirl-42#+H>uRC){q?E;fAn(~yH!Nlh~HRv({`(F_AcG<hp%3jm^}GxP;%wqHIK&M
zcK$KiXRDXqQk^YTclqPO&R<une8YFYi;1oBk5~EhL_k{ppevgJcaN-F?dgK(+a<I9
zsCn_pc{u!1vH#FgReV|d&*}e7!adSeZ|e5`P&v-~<&*G!F_%j({!i^Z#26P*xyhnw
zEBiG6`PbL|TDrwe??0#5oaN6s;~#OG-!WNZ_Fu5iNPG{+?DMtj-^`m|TcI`o?koTI
z9BD~1WtRIZw%=6w^1Zh5)omWbp2q*@gIbPl`u~%|XzRZh%L^+v{n`@WY1FsOysBRI
z7>nAi_i-gF{|J3|oo@C1OVyv*_aC(M%9(dfztea9OVsIKKa@}0-(TBgop~v0vijG(
zRjSo%yS7&D71REb#xXDZ|HE^Vt`qJ^IGot{`stUDrai~(|2y62_~JhKn%39#*8=4>
z)>R$uD*pdenV-kAze#fc>Cee(_NPK0vtOv!FaOzn_Ii^MZ<Fx*xWs<@^&0zp?^dKt
z((1VwGu8S33t#X3=ihCbboI^tUpMRHO~duuPMD-#(E4Xl@sA^U5%bezJ<y1{R<Ccy
z_32kzqo#PDOA2vvFIrl)`D@o(4i8yJzVBIIuU2@atbeJ!>%+ygZ;yUny2!WXkMY7&
zr_LPSV_+Xwys^sj_eUnZfb&Kj2m7a8uKGMRc-#JOUP_16n~WY`Gpk$kH#JDOJZxj#
zUi~T84H($#iZ*<EKGRjcNz44v-{>zs+83t3-@Wg}{$JO3OJv8bSs(X)>pbuEQB(Hc
z`ud`Wvwq$ClaDuby^UG4`_0$Cn_hpqbU)?Q`Pb}E?mc_-_5Gb`PEX(caf>Q`Hh<ms
zNFCEDN58k;p7`Cj?SA;~1>LV>PrkSsHM@G+?@7@&X6ALi{vR&*VypRt+nX8~7}zWw
zXIb-|Vy$18d%{1wc5V6g|L+C=ZM_>c=|TJd|FQLxKmY$9dieF;`1`Nd@1MN%{mY2j
z?0s8Tum4(UoBj3lSEKONcUmgk*TxDxnc*Jc`gnirx9*^_^?g5={(ADwb!yckt?6H{
z?As6~IO$DC_Cv{6OFlN&q&x_jfA#*f-SJ^sS6^S>ANRL<Pr1bX)fYUgOX7cR`WyZ0
z>GY+Q>QU<r&CBbjADgUwP%&uh^w%51_J-|MczVtHVR_y9TUUc>_M7aqUlaOzr7cg@
z7uDASQD34ay_cGL(4hZD|KzQwcmA#Z+WmF)*6S?w6J|b`$obv;j_)<`e>+1q#`wg2
z_k1W=^jdY%O#N$%Y=fCUrYu*A`K#Zt;>RI}KaI>$U)Qr(m)(slt^G4~dS&USzn`wJ
zuiL9WJ7cfj$`9^!72(hJ{uh-$^4aLV{<>FA=N$JJxbJ=X+xXs|IcaOAUh#FDTXg-Z
z$KJS2f3tS}yS}sY-LBLBR~EkR;j0rr<5D4ImD2Zw?f8~?3nE;OE~?r8y<eH@gvAd1
z7XeLYBtkP=_S*k5&)k@9uf(dP%(wDUSM9wO%M#AiALRVJ@m%?j;-t10xi=i|X#ZzP
zkzrx~`95I2u7Ftm&t#sLFPE=VSvP%i>7&l`ccXagUEhhm+p+6z)sDWp`p$LNbc@1|
zl!PA*Z*o#@`zxIK^*yU#anP2FJWR{;_n1UF@g&Gsf4F0#SLc1PbN+EzTiY2kO7c#~
zpWJKxTR68_v}((-J*KOC0)xfB&ztZ1F8(Hau>4OWm39>;hn6V4prY%mubzB$|DNxU
z!@>)fz4h&VB^{(0?9I5l^UpVfQ~QrOrHL-)&|3O_`g)l+Cwm(k7TDSQZCP<mLMe{*
zgRf<UXm!oeV*&d+dlqEQot&!t`%TClTkiH_32);cOct8)_h<Rus`b55zy4+K|KRC2
zcYDeDTc_t%E!q9Idi}he;d}gc=IsBU`nOudJ@o85VY@2_RzwT#meuD!`9^>zZmGuG
z^9e<NzcwX{*u@3h7dliTb^7al{k`Fz{f_*<BiLR2`>y1E@u-5im;RrWInMn#RX|{~
z+K-vfCwG2Z8vE+kmezX@nm48@{+%<w`fAU1a~aFp>vJz%P;cL|zxvP4of|LgU!{L-
z9@od6(e}sMVloYvzRCZYc+~j(>xV7r*N--FSpP76dHTJ3RczAl>DP}<&K0!3_-o#d
zbtyh=E^p<3JaYE)-2N}xMg7BO|NKA4o75-&`Okg$L*w?!eXl<*aoblsr_a7^^2$5P
z;W7Jlt8Bye?X3S1KQ;fiYWUu=3H{uEtP_$Hl1yFpt#xLOyP@=O$5yGwJa!z<ljOS<
zzr|laSaP?q;rJw{_5FosKICQE?7yD0PC9CqRZ8KE1v(2_)<@0_F8pxj5z~bO?GtW)
z{Wxj!K1YGTUByevzs*XK$q@Lrul{CSl=&ZZa~a7x#fHS-y#<cXmY8h#vZ8(8!aLkA
z^nd)9_7O2q{iWE_|3~<22RrBMABQ6h+=F=nr|oN+*XUR=fg@sb_RE(sd)h7Xd!DcI
z7p+iQvns$(=wjXV<y&fF{{Fi9`?{kN!_3`Ud5#}HIAsb6{qnSvpSF2Pk=kRMedi^o
zY|`kPUOy#7V2}Oe&(dDvcXh8fd92GcUG??!2k&HU;jbIrqOUI4$1{OVU-Erh$mgq*
zHbqA4yD|Cyl|+L>dTqbUzS)>(_eH#NGA*r`tn?szkEZvhxYFo%ZHzmfDtJ%k2+IBM
zVo@&7ve(w+<?jzu-!=DzzJBAGp?1okBYd0t_g5Cp?$W_N4;$_k9a&Rb6*p<y&JS+U
zeR}V-BCI35pXpvIowhTh^y$0#Hx>rfnaWsX{<GRP{cT!^iSaC@_(!Sv+Wb@d&qUvU
zSo^+v*Pj`4?(Kij_51qkY4gATId;DHxl=Ol>iI3-_r)w-cjD}quk}$=!W6&nHe2O;
z@#(#(*Ldx8?u3O&|6aW>a_i~uuf!IsJw3P2#4cp&l>eQ#w@)`+r}aH){`Z%1`(B82
z3D*B+J$jm7z5dYMvlqT?Z2qP2_3){ln>N>89!)t{X4~Dr;MdYyyHEFj4Xe6;Z&UR7
z=TGNf-tSaB=bql)e8c+dX?Op<&)X^x{GYGj@4we2QGce!znHW((BO|m;i1>%*KeD?
zK51hYdTML5XZ=DwmR~!pUU%&J;<LK+)bx7{46K(NXIb~XVy$0TTdNl<-t)F@@2&4e
zx1QdbYrQ&V_4VCB_t)=UWwf{U+&T-*FJ9a0*O#sFtPint{q=5bRrW>BzgMrX-stuI
z`l_XGWq(BNb>12FD{k^XudOTfHq868`^Nhi<0W6e+Pi-Z6W`Twfr~l1`GNX>?V=~&
zzy6Nh?^1f-cJk-{U*E>x6PiAC|L^d>)%&h5+4_2?ZdUZ4Wsg0~e_2~kda{4%D^A_D
z^?hmuPxfzJucj2oT3UYTtlA>igr#v>*75;v(f4;|yRCj#wQSdxsEyv`xAuNCfBGW-
z!*u;Rf$d+vuB?rU4&NQsvGvNn^{Q)jN37WY`=7vRcdL(ql_FuF1F52R)@|*I{IT`L
z(pUP2*7?tkYTB>goAh4#&iBVJ6lNY=B_{3DKU>o0nV)WAD(jV3=ii-QF>lpB?oE1D
zGXuT1?)zG3De4w1nqj?l`s@3v^{y{_q<wvL=qmO7f99<(>hAmf`1LB8>EUfU6&DX0
z|8g^PliiV|o!w$8#ZtNQ`@7%L_V-L?zOE{_-!!do`=sW>x)~k3su_>m^MB1-+|WAz
z_n+F?<#*r4C#kRInlH(|zHagEMaLMK?q1-OkNh6>@xS=r=(}0#y8nN=TanMQp21|z
zqF1H=z7`x~u9u6DRl2upj#R~eos+KLxK2pkUHS3J9)X{0gzWRrA6)h3#QVhsYUVd?
zIx}9o7%jKL<6-)C|2N84KVC~{F0OR{X<Pfhz^T0crfvC$#PSEx|8_1o^5OiAg3j5|
z971QTI@Du-zyJ8;z^lyi`x~Zxm-0!xw|s8G>Gwamd5-R|;En8@S^56!x~IFouC&=B
zEEo5Pb&8l%nCk1l%fdfCaLLzju>QyCoh=u_!L{qV>^TXMFA=J{4*Y!{z$F)^^zMU)
zN8!;udk^M)<e&BEes%k&ne(6N_U&eQQkXpHNYV7h*FHOHm&Jbx)~WqoU7k`^{!~pO
ztZQP`o89Y{fBCoe`7|Xnw{;@R!&qZkex^GyEE25u_@#gI8SkWkDCY!MKc>fz<POUm
zo6mnq@u3Z?sixqs^_O-0kK3Q^bC{jO`)gW;VdFZ+nt63=+w{z}TT7(Wj`NA_d92U3
zZ2#&{RrU3=`)54)we`dNzR>xXs;B+8+uuGTcxi3+q78c5r^CZ_1iw%Jf41DE=AM83
zhnedZit{y`vHy5=TcVlGnl{6^Do)3Xrm{2DKfb^J?7yT;TU)-H*4dw0k1x0Xnta{A
zOSdmgzh-|^vCY3bQeR$m{(jxBWBt}sXlDukEcto+ZkO&3@=~oo7<+ry#k+zB81xsH
zt@Ermt$G@+=Vf16p3(b5>CC|m`|oe^zWg@!dtjCN{#QZI|13FOyJSbfrfsIK|M(x?
z3Ea3?R%T(NQPdx?_eC35ym@di_r<xBUwPFf=WPw0WbXT|y!zJO3Wtdc7p+Y=^Z2;D
zUY)S5)j{Qin*S%iuWzz_BA>W(ZS~hL`@CN22F84kd(&2ay=~0|rP?~1UHTo%cX6hs
zR;+z{=k)*T-6He$hR%C`tmMDzh9Cac8vF9~zrK2Y%S^WA@iO_jEfxvpC;uK4mz_NQ
zp=1Ett&fd`JUj+hbx%m}A5A{MaiZ`f`}%{dZ4!}{vVu}Bd*-ss+32iafBN;Sw`c!x
zEcP&0IT%-{{p06j{YB+gUylB*-{Ni`zCO&d>}QR_qh|fp|6e_tdN@jT{p}!~J*`t+
z?OQkNrHRauuD^bo-C@)J_{q=z?U{c45ogK0r#e-!IU85F7Nu5b&*Plhv*y70WmQI(
zf_@z}UwkyK^HR0otk<V|dPM##oYeL}=bKr?yTy-{C)*h$NKgL~w=?US-v4l+fBT$m
zjOL|i)?dH7X44EWnKgOW_g`Fk_m+L^TF)mYR=duB@p04+WY2i>BJV^+S<SpM;rFak
zS-Kmq>tEd;zFu^$OW9P<S*pelf4{zXdC7arMP+uGQ#|=6#oxK|U+uff-_@-z`UOkk
z<^O+C<NF_X`Fj0k+ta=m>#wU9e|@nt_ItRXX6No{&%TuJo*w%8>#0qa@7{k?`XtrB
zs3$U^M}(WLnK4-*B2Pq_mF=M8h5#2qPL3n1C+&Ry?&O_`#k2L6rW{!=%3EMp{x|!p
z{@EAv9qq#&{alc0w(mt%cf<#Mzq)<rc3e!p{X?#O`xKr1(muJ{cG~z)uUx$-^<%=7
zdU-$DBPJ_s{>j@vi<>!#<Jp5*>RuH#DVJA@-(<`6><ZSB^j49pczxVM;*!=^hC^|}
zH=nW|jsNlT$Ey=dnV(gqui@GMEO%b~Y0lo?P43Zc6K1uPT>dr1Kk)hS)Ax%Gw-(G<
z9CK-HKvd7oq$_v3;#8xK$8>s!>Ly%V``*fCa(%Egc)+(uLrjRX*)i$B3@<Suk*21E
zg(6Bq94&{G47Tsv{CNMo&b=vXi_5w8Xmzd#zEM~GYtHRIdVj@s+H2Z0t(|rD&BvBS
z{-Lv1{7h#Q|IBpz^800W{B9MWe(mzuP&9kfah@Nkf9EA}-d*e_ziyo<PxFMgR~Qc;
z{?oT?(eAp6I^J#b!lZVuTzMntq}&9#q=$QTEHZoiS4k)OzWm32?ELp?_Lpx&&lFra
zIw#)s@6Y8u8hl^RO22S>cz=1C_DZism!E3qcWhJJaG7<fl-t(z8-k_#^C~Q^_gv3!
znw56?Z}L%gMm>=k9->^V%}fb1K*#BFwlXFvMyQE$@ir|KnBk%1_Ik~=>c7P{QQ1@0
zZ8SY@J^j@BfOv+gKgz%IFI`%>q*rgX`Ca`F8;<Y(_VeP+zZ+~11y|a{IYo(nzhJoh
z_w)4ckI&tDmBpi<*}GewXI_B#={fg>ufG3k|E(}#o=|;x;$H=G?yE<pvYq+*u3~01
zPmYiHg#4e(&bR;bf3H9D^3kg+AH4RKz7~+2^R4LW*BJGBUa7mv`CsNvpLVT4b3KpT
z>{nNvLs&mQJl{0Y&psw)`}_a%zTDsKGDli!NmE#I)0DtZOAl>XFLh|?_KVJ^mnA%4
z)DoGZqarQEa;R}Zh>nO*E7Kx}2?1In60J@u<yFsP-j;4TsWM@A?@Xf~OWhsc-;cNW
z|9tV6Pam%5Z@cs~yT;@@`{7e_<rgm5Cb{bV&9}GX|6Hyz*}_?8#W*3N<Q0pO_mg{R
z_u98F?)?4lae%|OJI!Yu8vh;KeQ*2q10AzMlx?yiO_lDh{PpW|YQp?QyKe@3y>l%n
z?T&`u&)@t{h1ErOT;25O+V$n_t}%KX6Q0iL+vFp2?%me?=c4*IPl}LZ-|xTY#EE4W
z9&$&CbsK)4XX++byCqyHC^?gz(I8~PJ24^FR>s7D90L(<fwo426&@O*0&_Es=e_RA
zS#NvRt9{PhtW`^|thA6Z+jB9FzjM94$@iPTeqWwhKX>sX>!+`;t-X9;`O{NzXRX#n
zJId~^Z+g<Cr+vL^=^?jCw|{M7nz8HUp?$>-VkZy1;12P*zJ|4H-TOU0mx?{&U3Z$f
zY2281BuysA`BC`2+P|#P$)~5z(z?v$e(CzzhdEid#H(IKe{;FIJ6hwVi29D_ZLMeL
zxi_uiteW$v&~I9#-t;LhL4Gc#pvk0}|3o!(?g|FoH$OIY>ahl$R>fNjD*C4|?lk=L
zy=<28{#B<=uyP(XyJoVbmL*TipI0nXb)U)e83jJo*L9d3ibEnJjrXj-B&(r&@WUlO
zrO*9}K|a4HMvEHQMWvNl_iuBmI#+bh$5@K*^tvGL8pr7!jAt}Xcw9T6mn9gbA{gU*
z;>;}vFCUK!0%!9!G4?K&o>pusT%9@R&SBAXi~MO<lBKKu+H5}FeDHU;VDul&GX|^+
zLYr8B%r-vrfB)W#Yc{<zDSMcAGb_Qhn@uI~Thj5KoL@<Elp3yhXt3O2bnv}lyJs6u
zEOP^++RVRaqzobtCaZc*`(AaqG<d<U%}3WAvdMmAqw}}CT<-0%i7E4z`E<-@nKrjl
z-+=jwQ01n3CCO)d-yC__((tI-DK;r|#V^ip9bxkpx+_(%U&*Uwf4f@g^)u0eHMcG<
z*U^ZG%HO|avGtxM0elTA9m!FTUwmSA@mty9G|^;<+d0D-*^22`Lwc7A1SuF^RoFaN
z{g{H#bLKq^6Cxu|-#upF|2gJ)-x4mhoQ!=_ggS(TpD49uUwW+iTs8g|-+X87<JYQk
z3g*tboy1$gb<ClGS8C<sm)~pZ-Sg9zuaB3%(7CFavC!&7?1|#PjQ>CEDSi{D`6|S$
z?VQmLKL!St6vtUM{9LTH3-#+Z)~yR&w?FRPy7-7CTi;*(9n`%4+WsK(*Wy{;Ti?Cz
z|MuVYJ!iODe7NeW&Y%B(sr$aZ{wieo($!__%<n&6|2V_;(bRuC>-H93E1V(}_3yOo
zQqA{YHJ*6d*WWYC|F61j`>W@PJI}=5`K58lfAgn*<=b}bST%L-+S2RmR<GDS@5)N;
z|6kl!yI+4Tu7B%>eynHh8cW+C#@v_}ox7r*{*AnQ>s9pM?7ZvQ@vHT(o_~FHhWFcl
zOTH%U%~HO)^y~gNQ+M=-Pkuj5>5;8y@2`0~b&l27uMMpGugE5BQxyL?*qiIu%~O8w
zs%O_Z+f98`E4FGye{7j^;o}qcT~Doxo_{4g^?N*<H1DlDjjC5Nis}!y7-h^1`*Fzq
z&)1)aU!Lcmvex$B`k$Hx`9IPp?rVE`-!V%6huXb%i!`$tl{LoY**XRtQrB$eZH+7n
zKWg>$ZuWY?{qg<He9118-%qJ1ED`#DeLt^{wrAsn<58wR{{PWlR%|c9Ar*bOGUCRp
z|83j74&<y3Q1naeZJJ~G$NbdMNRFHKzpv}w|GDnp|ENFe$u&FwNlvZ3Cb`e&_lIcn
zci-P%y<D~B-uke;`il&HR$iK*5Wo1#ydv#x{ZF%1?5BO-@B04eq>?Eym*Tn}{Bh6d
z@qDrO*VbQ8>w2C)TIP36rj2_&%jG50-^^un)Ss{^IPUD}e}&nHJ)hKHoUz~J*d@0B
z2YcJPBQh=qR~C0Z{;ztyJ)<;d=dQ<0g#|};i|GH@&Frg^($>?c`t<QL^QRjR7nVQy
zaMAGTg6xQ9hW`(Ge(!$$!9UOB$Ki|Y2`Ap)SSRI~_(<d0#xvy)Po4c)+mYgx`@U|k
z>UX2BPya@3UG)E+VEfea4t<Z0%y#7)w|=;re4IBxCM6^OuXOyJS6kzM#7p^va&(4S
zgsQ0>d1zw){`&e4=dMP6oK^oizKxUT`>e;k#V@s0j!$r~kINGC`Qg7Ut?=vx>u^zb
z>7MjMG3?9gI7_-eF0nrQH05{n|5K_N>c5o#?ftd??45mE)tiL#z6M=yJ@q1&r&lC1
zCf09R!EgPa1v5A9JISNTdsg;eMC{bRPycUalfE5MsP;O(kH0yg;nF(kZL{aL{t7KO
zeWYD&Yk$IX)xQf@9)9rf<&E1TG^26mA*Sw`9j321R$jjGXKBfe$l7}0%80o$XZ#Tk
zO1AQS_g;N%Rg=-ZV40oS*SPnG)?ZtFa`%<pH4ZCY?%th!N-2NA$%pbe|5;KOoZJ^R
zUFQWi%f!o8v%CH)Ce<zd^lR0=|9_=*RaWjaw6w3y5$fFgVCIUyHTyZTZ0#)P?P+FA
z)r+axefR!C{$IlF3H@7)J?vS+vy0WIU%$Wp)u*bjQQ!AJF?{<qdDF(H>ulWj{qLT>
zJM@0dPMdx6H(KBA^M6<M$EM9KWqx5?d?hpUr`y+m>Pm*C{_}o!YtQDpEyh3ptjP^}
zEBd^D_xs&Z{iXFyMVWj4Cw{(uSZ{H{;k&nl*<Xk5Qg@2mdh7d(ot_<cw!R8T`TW1@
zY|QElt5z^DFkNz-Wz(<4TC*ayRKITD->Sc9lfmQ1SN%hOudiEM@ay}lyMhM}|N7>y
z|9btuU(@&Pj+f1Tzi;X5pWCLdpSEsO=#C5bWxwt3@D<tg^?IGaE7^nmC4u4VJzw2D
zEvjAq>uYV*cNe|Y*O!azi%>eXuzhzoXT1Kx`|0aXEVS6ikz>B-)t#7!41pQK43n(X
z*RTKkYx}F`*<Zs}S{O%$n_0%I#YcUq-<q^GVqN^w<d;=`5$&(ft$Qw+d&4e%jzhlR
zpQ)Du);&{O6|V9%zqD%Fj+^WD)^+^5a<H_}c75I2vbCFEZ=IU8HT<)F`L8ej(&cq~
zyIx&g^>n9Y=*QdB%Qn5=vHNfI*LBZ!Km8pqo>emW8mDdc*QCAKU)MeV=EfFk#3TD}
z>#wh=_v=erN>3jCZ~LG{{qI}-x~+BVMf~Ug+S>kWZ{_PRoxb|7w_Yvw&e(MQzv!2(
z>VhBTP3Cn@nbsY@$o78P8_Bo7zQ2w?nf3ACrZ*~)9{cCTH_4T$tUmPjR?*+U&|3BX
zmZFkzAO1hl`TVbHHb4LUNiV*>=zI5Z_n|tDw?7QIPnHUo$(-YJu(~QFm>&P<>n9Bb
z!`_2ipS+s6_j_@CQStj<`>wC%Z^`<-N7=Q@pJ#&Alqamr*N^Fyy*}sq?U;u-_vDMI
z=BsyCUs70CA2<Etr$0yb&i;D(Q@obyYpwlf4Kf+Ix<5_*{>xa|ZsYDFNxHIqPrhZc
znVo#S{$dR0{u4eqF85E`nz3!lH~Y9K@AZkwm;0Tx)<y5${Z>xzWn6Mr`<JN6oSiON
z%}S*lFMbyI`15~PQC%0TexYY;Sk+wDe|w(CANEwY*7K-0y<ojQVpCoHynP|Z`lGzg
zWJ_;c?-cX?OS<~rdo53YEzNp$eaXI0b)J9ro%r(Vpz8g?4X(RuQ@;J@EzhkL%9!{5
zuMo$5*TfCZ%Y7H^l@Igx|GxKhn#k{YuR_%IDs4GloOfTf@9OT(&!XSO^LI~TU|`wk
zILnsbi?x1XVcEL!Ti;(De=T16`u)|NzgNc=cz?Zq|G)a{Sp8S$uD`!_PyYL_*C*db
zWW9~}9r0IU{q?x5`fEaq>sjiAe{6cWZC^a+5A_f8{{$`l+O=+N(T`tS-?@hWNR7Pz
zTJ&?>roUM}Q@gJHuU~C%o2>F}f8f(oQni(DeEKH;J0G92yl2vnsp~$v+?(|N{F~JH
z?7LU5t-cyEYjxe)xK5FIzume&Uq8#duhTy~H1?;)rdj{vj_Ul|TdEztzHHNaw}QL>
zj9<hS+G^>SE4;U_n_jqYZ^WK?@%GC5AJr~>sUG*Y$~$5w=Q_0=J5EJ(f3Cc~J7WEn
zl{J6w>xb70?Y$^_e|6aEsTIk-Qs$RJ0&h=PoZc3)`ijfZyNq{IwCmSKe|=lE*FXHv
z>Fc+?M}2>N_131pSud|PZ+(Aleem^mLEHM`T+5!ixV8cY{*y%<b_Yc+zdADgZiV8D
z^z3iS<)1cujSR0{6Z)F{e29P6{q=QwRaaZ@W}Ulqf?fXi2z$=ws{#z1&J=$)-4M5R
z;(U=jQ@LQn_b1dsgEVe-Y8%{DerMzIWnZ5EuP;7(Uf*3`b6+=bQkAavqmwJmS^ATI
z8fWQ0*NgbL-T&$T7*mBkcHNo#4}5u9!?NF$N9>=l`b=%@)t7$PZuz@t<BvsNM#=6{
zJ{eEH{uSFVxXZ-iZT#Ms*Y>!tzS66|rv7BfrXLlTHrBX*sNxec`?G6e%YKu7{RguS
z7%cJs7VOU^ux#$nWr9`_CtF{9_5Y|Meez|w>)v12*LuF#x<6>roX)zK_+@(WdhwHl
z|7IzjacE{$I`Z^J?DRt)U)7$`UdiLuKL3BJ<srs7m;Z=M*gd(Va#-NWwB2j#4L#@n
zC}{q2{c_Yv{=H@Ty7Dj1S+!X2o@46d^6tOzwu^2OzY-qutKVApV7=e^eR6pLy#G1l
zx=)_{``z?p#&Z9If+mxmtlq7E<gxFuGFcwW9X}WJE<LtOf0KZ&z`Qy0?pJ!Ze)ak+
zxADmFW75W)FBB*9{I;o@^_ZW5;m^j43(mWJi>#UTNhr>D>od!<OTz41Dto46C3>8&
z-Jkr|iLL33%haF*{vuwU37^yAuNTWnPWu1R;z{@m*)9L7{%J|E_|9AZ^+i+FzisiK
z3RxCUmc5mt*fC+r<r~^7W#)ajGQaqa==)!~uMU-2zqtGQiojVZ4{g>5Et3vUt?=G(
zyj?j_n4SHPb+&U+8!LP8vE#0yrZXJeUEBY6y<~e7X*5m2=tJzGIeE%)3b#tkZd)DR
zID4iaN8hZS>8q+9t`bS)jyU@M^(j@Ig2cU+e<OcA^2~7NUoHB&{ZWcytoA?8Nw-$N
z+xka5*}Qb}_q!L?OyJoqxnlq9f9uZamb7)+?>{Keaft8m<0tIjeGjqhR-frJ^Nso*
zllS8LULQ0y+hFYLmAp4}@@&g$megY>?2EsfFFJZ_u4?QvNfuxJ*E){7KFiwv%f0r#
zZfouL$+!RQxijVO)AO&6uG_yQZolE&`MWO6zxZmSWlft*ZjJT#Nj#<RKWE05W}W+b
zS8lWI)PG0M9bIFqma+Uvze|np{hc$H|I)6QGe`NCCinGSUr!}f{r|_&Vx51w=<6dc
z|L3RwY+0}RJ@s$hI_;~EZBwUT|9>$uZc}dq0|V1l$62=hS**2x6?5wL-nxEkqJ8-8
zw^@I)*2Ra{tqc9Pt!`)BYW=IP{;7J*dp-UC>Z{_P|LqLduhqITZ|kd_o)0EZmycar
zwz<t;{=<FW&wqp6&R4uux}F_cE42RC!O+sxk2*c0j>g0X-3`BWwdmivy1n|XWh<{u
z+plu}hu*uZ7w-hGP}w$LJbiin@h?B-URrJEulmpX<j?>0S3erXY+D<vJ)_l@vo!ls
z{EKrOUs-1F?_Bl!m3!O!t@T&;cYZDUnRRu4WaygsHKFCFf9+}c^?&8>@Kqo0YGh5G
z{xV8P|Nr%s!C$XuovjwWdi{Ldx}Vuz^>O=y)K_gui+c5b>&ka~qnv8~Utir<8|z({
zB&+;C=A{0k|51Ol*0)&nJxRZNU0l5O{^~E^Pae^I<QslJ^i%k{&`Y+3YOh%3q+|XZ
z+H&=m(EKkRe`jyIxi>ER;qIs3{<%&+@AB>0ldi9;Hn_wYBzApp-kN*R_UGsNCL!Ce
zUy6>`eAu)7^xeDpS082Hi}Afa#r5@PvE#<llO7d{zJ9dn|8@1B<=LVY&JN0(#oB)U
zPkr`=?T@FiCrd<SZR9WU!1B1>`)fV4*UXRpy4t_V$-Pfbv&O7+Ug^))<45m#>)kmS
z-(xCrL1OabV`fWtZ{YKv|NHhN;prED{fRBv9JsXhep`;eHCGt($$GD~8SQ^x{Xd=g
z^|k)dcmKk8A3vUGZsM%Sy1rAEkFg-LY5qkCecL*l8UMrVor>1De@T0P<ocfQuSM^o
zPd?p$zsWn|hFxm-!m3U2b!Ye9{kQ9HjP<)cuIGKfN3VXR8(4EyXz9JjHBD^C)s`Ip
zl+zczdh&(olTVj_SN(o#|N46IOKa=)mhG+Uy7cqR;^h~%?SH=hoTaSU-B(}kvrM~x
zc*lO#`#-C9>SZn2S);vo=4Zz66H}*N?_Rvv>s!6;f8K~&Cts}d>{zq(gHzJ~xB1uC
zh5pym{OI9T8@hXb=&G-WqGOZ{rH-UFFlt<x;3Fd3+R2=-Bf&(KyS3?I!-@zEGcMj1
zM}Y}P)I_h{dG>Gnu3anlX+4YEx%<rP$reWMoxAFe*T%{_?3VYvSKqd7%D>%LJQ{yY
zi#mT~`H$(l_Qrib)Espwre>vH`l&>{Lk)3HzuNU#)tFl6FS+$f-TnMmQR!uTiyw3K
z$Q!ri22NzY^3vri`wH!tiCy3FHZPd%c%u5b^tS9RDISjN+GBHSj}$*mdAqFBuuuQt
zoj)bof%RvM_pB=vHrnEQae=U_HGAWt#e3iQshyrFF<Iq%zt4;(Gni^F6`m_Q;`{qa
z%z0svqf@I-Yq8eoZYW@s6PchU!qv)jsL}9+jyspGi>krC?BH3?dtVnVdEd5bUD~Sr
z$m!v#o(Hb|Kk(PSJ5cSdXZ!v|6}9){L<P(4@1I-s=J<7!fBRM~imp;vuJ=9p%?#@o
zaZ+--_j)=WzP+j3B>8V$a>@5UyQe%~tbEd5KgC9R&Lxf;M}?0DM|&^$Y_n1J_C{@E
z?~@;IPkQ6M?f9o9y6Fku&q?mz7XIV)9k1r!Q?BpP*N{)Vb^qk+^)BB}JZF@Za9)`3
zlJmZ`P4%TWb3%IaH)PM_x-nZ+N8yL?ZFWX2ksdD*VJ?<K4I33qB)D0f6&(!%v_ynD
zn-&WSxVVYAZJ%c!^|7mQsrS-r*`>lE@y?(3AN;ZZ)1%~F@1FdMvQzo~?|kmcL-%V;
z-v6KZ>;9vPs-oCgJEht9AHU?9YoY1NC*!d1|FSK*lJ(nawj{3p)%WXM)BfheRnfMO
z|98$1I>`1tK50q(?BkWoH_ixN|NhS1jWfDp%hd#5UvstI{~+I6tbTdC*4^BjNrg8n
zp06+8&KJT`bS1>cWzU&MweM1oS$+(9Y!?{UFS*HP(Z1B$R{s_i_dx5&H`7%2Z&l23
zytg7|^5&NEn-wctCrxs<FH(5Gs30;yON^_P=~%;t1v*l~oJ<cB7K&)7b8)&TDs&3Z
zDz5u4Q<v)XxazcNXX(VLdN0+Pr~RJo`}w-zz4*ER);{l_e_G<#i}TG}-kraA^U3#M
zmpjj9^G_9(YwTD3tnFY_Xr=#s@6laX8;?J=uRrwdSKr_A4tpin@E0pz{NLj$_AmU|
z$LH=3KV02?^L{|n13$Sp+cK}7(pvB9xF}&}MQ)tw-rpxBL!CA6+?Lt*Eqj7{+0~2X
z#?1n^|1P{2|7~a7;?q$tpC_J`FFeltR5YMslbhVShY10R>aFS(N44X=pV)JK>*c)x
zpYDLx+fP^~!Y#{maKeR%JTWegL(PE-Q=HPvqt46ReH8cHR`uOeeXUQcy;U|IvA@3R
z$GL5P>yK}Lu6F9@(|VrQU(JIq?(g55!hiMuk8dv)&Mf*r@!zZku0?y5iyynyZ4eZ1
zUff~uFHbCE;o2Qnj{P+;Em<(@@Lu_yQ~qwB^Pfp1cKesCe_6qgQetDy=6v*)dp9$4
ztLW?AeKF5XE!Mf)27SCA`0L%Lo|2fw|NEoA$Uc9h`!wTEeM<QG>)ajQV(Z%8ue+t~
zZSmYM`{$ZIYt^~eHa?78Y<+GHTSoatyAQ|s7&SzCG{m@gK^r3?49rA1TN)D<f{Ghf
z7bhX3!dIV<&$HDpt-i9W%ds`;l8L?Xx}D4VugGujee|Mo&watT+OJ-%I(J@38f;n5
z9)Eb<>FP4hj<!GXulbm?kGVXFiZGGhd#}CuXTyEJlt!sjU+o$fEqx^2r1+pJGwOb(
z|NmEicix;IxpDrpuQNY-F$m;_YHI1fQnvjy@9XOKnap*;?|<!Xma|~rd$4U?CmWwO
z`_p4zm>wUw!Tve^NVV#Rk8ghLa?F0Cy)S2a;?8a+)eWzD8sBVLIGy85XZ7SaM|~<n
zoKC%c0=l5GN5hqym+7FxhB-PCysd{0I&28=5#eTKNp#fc08P0xDH`nk^*-`^UB$<(
zSF`I}L#~CKTDH|w?(k<R?FaHR|G)mkzj^-l)%Hic?tJ{;|JT~}>zAkJ=g#_3u5Iu*
zCH{K+-&_X?wvJsZ+r{^XSawfW{7|7De^zt>`@b*y_i-M}Qs(<O=l-hZJqNbvhr4e(
zdO&#NluM;sI(~i5zx%G#QqAoA&YPW&A5_Rh$*;Wj<_O<sYwp_kW6QUG%1X2Lzo#wp
zsZad!>&Zp0;^l*16fk7`oD<4mP*f0<S#$b9tJs-et^MD+1X{H2Ph56+`IQUo3=B-w
zj<f9eyI5<l-sl&+zR*_n-=?4YO1pMi?f$>(Yu(j1^F{x^7X7vJmu}r!*Z==#t@C$z
znYUrv-c;}I>(T$0$Ndc|i`ShH^d&aFIQG}(MgODrN0#j@*S%>||Ecc(8SSG<dxeEx
zhr7g^U)r<%)~la>8mqS2&G*UJ<=bDhy7Rl!{Ua$a<G*^B#7sIHTXkjHn$Wf7CQ%!I
z?ebHW*{1q+)z{?x*B0%~`djoj>u=WI*tqqt*8Po({2#7$_4L*~(^b#D)?DWw_k4Z$
zTJF`WcDP1G|BdMU`m#3Z{qyOiySJ{o8ymK^Y^&<p65pz?H}YqdIxMv@^8YAabhW7K
zcZ7ZTwCk&{t?d@Adp_yu*He3AYNhr^t)BFJ*OrPs;p^7N)&6|Fh}YA9^7(knOYe7o
zzRbSh*r&^No;S{BpO4AkU$-^%<m*dn`TMS}s(ZHUgXiAgpRafAzx!+Buk8=|&R$$x
ztpEB`)vJ9<*Oy(2S@OlRPFm~mr}C^j)@gO?{B`fn{jurg?w8u@En2UX`27CS?|Mr4
z>hAR{R=(QYvESp8ZcnqfX#K*qr2bsE&P{&a{<HOq%j;MBzSwK9wDk4;#k;Ei3g4_>
z@ACd))!yn)lT%(j{rYNi=le;k&Ux=sm0t2C-224^@8VnH61kGQIRm~vJwIV)_Wi_3
z7m7+h?%qA+`;VCqQhT;leTuvPDf{Zm>_uCvzU*tYTDNC;LPoK}wi=1*oaO&7D((K$
zRCZ?i3zzsc!MB8C|2f<KUQ}8CU4QY^-IFhx{!m(d;ojQRxT=p_{&y=B)#|IhUh&z#
z==tA?_wUSWc~vuG_Zruv^})fv1VygL^~bOGEiHe&>PwhM{Qvn^`%h)xf76v5e?H!B
z`vC?9CQrv%cKxeZiwgJu4{Nz@TlM|b-K%TM!*9KQ6t-*s|NsB4{=U7x_TP4^saF3P
zIfQCH6(>)>zuxuNuC+;TlP>GrH$U_4-_=XEHa|J}{(tsYzZ&IGy`ZXBd#e&-EA($~
zKmGCU-1-&WEqPw^k4+FdDBgGE;LqfQ2V1WHI{RzS^!<;rCSThh{(jfjm3rPRDSL1I
z6+O1^+0M#WrS;eTudUl^`tx;|f8PGDtF{^i)_-*?@P0Y*(K^lStyN!(qE|)jzxt4S
z_ui|M!%zR(U%zg#ZE4-sTUWjY*8YDRv-8=7sfQ;`zP3N=op;Rsxcw2UFKT(a*8Tpe
zeQots-ON8*vo%)FzxK3BMd+92>Lu&EMX&4Ly8b$R*HNW#*1vZTmpop7#v|`quRVwG
z<eBwG|6i<+%W14U%(U+0_t*cb78t&ai2o`cRQ0uN?I->C)&JK2dRn%oYg5Yp2W^*X
zgsPs#Ir&azW0}@J_5bPcCu_sk*G$!MpK>g@`;*xByH$t9!m55gtSO7JiVaHnY4mE%
zn!3G3?{}<seSe|ddR}F%_>!+*?yZ-6y=vFz>$h!f@83Ogw?gIJbJof;+j~X-#;5Jc
zamu^+_4k6ieerIm;yFYv&bU?*X87#wr@u0tuR4xQ39s7cAE9I`ynL7c)!SeHw3nY;
z((`0{Sj$!KUq@B1-d|){KKb&WIY-=W)-0QC;%{4b`rn`HlP|xXd||qh-e1YPr(diS
zdi_@LeSQ4twCdB@SL@fT*#Gs~3jgxBjp1wSHg$YaUVrh!e%I!m@%y&NSM7{nvbH>K
zWBj^6bMYbv1_q`|$65CLr&vpmEX=E||6lw6|9#)PRqx7Gh3{Pdb;>(x)~nwyPOZ&;
zwm)v#tC`-ldZ8I}!@9M$7VrCbvtZ?~qxN51Z@t>7eLa5l^w-m0U#(xwRMBUudtiIn
ztbav1YvMmYp7z`}wq!%V%jh#=^8YrLIDJ1ErFy^i&i7XjuKCsc4YJw$G3#;dANN;R
zFMX}ry*2CllhsS>Uww|P+8h7+uCUFkX**_C+$o>pUnpg=J#Jan)#O!IcYnINRQBKc
znDtB7U)`5!8#C?o)!%=s@?-t`|E)>dzx4Iaye$3A6N}GW_!trs`^UBF>t+9<n0>x#
zv&3Ir?^s{qthVaq-`M*v_UbP#FVEPk^Q!9oz8As&HZA&p`N*%QKa`i7#n`Ll*PSoV
zH?#k`>TBh@<$Yh*pUphHlJk$>)~(?Wx@XOv^7Y#4YrFsavFkkcNI}K=^y$gh_U`_=
zQZKsuOSsYhSBc_vzyGLT6nk@m!&E!g<=4?)+M%oMT~9?%x;SgE=!x$2W*qa3zkjco
z^Ka{~t-nlNDl|^>Ym3Xf)*lqq^*OTJ_4@O5%lrRk?NLjv*Wr@j+q2{OzyER5{<~H*
zdh4&Rdlz$k^5NohcPndlevbRL^+<WuspU`SGCuw*_}}3ArC-M1IDW2g=Kh?KA18VD
zuT4NsX7#?S%eI8Cepm0-wJv(K>$(8<=huU;|8W$Y(7b0-{7K2~@6RUG{crNkx<BK4
z^7`_ucTc~r%~qeZGdnD_@~?1YZT*`ccHM$JtitzyDXRYU{(bVjUe5lfzrMd$KmTk0
zi}eN3;dTAfpRYS}|Eu24Pj&SiF26+1Upy6FyJ)U$xc=44k*s;0uUhu%e~kUL&pIo%
z^wyWy_2H|JBpEp{FfeyG&a&@+#adc)wOn;!sr_ov*FjMmer?oW?;8JFwCZ}+*H>Rx
zZr}asdtJ-F&gp4~%RPU;Tenv9b@%$Hzj1rxx7J_N-uLutwal(}olpO5|MS&`t+W2$
z$-^gh-&?q8lGv&e-37P)z38|3TKZyr*U{>5-+EzFt^K7hT>b9;*>~~QyLIcoR@TP7
zTbVNPom16Dm+PIP)%Rw7QL8`UUhCIy{dN7d7^$<DUKw9o9rg9~*Y2<HZ|aA>*jj#2
zwrW$>{<!sVZ=+Ys{=M~O`n#RsUe{0Vtq+OYoun4Wo@=n_<B5Im{Nv??OIKgbdOyu?
z>eJKf!|cOMifS4*A5Y%7VDE=(J3TAw%M}u$?zemo{w-6qewXpKV@FHHtJjAwdaAQ3
z>z#l2{_x%R*FRCtTz>z>`uw;(0uhQAkA1nIovP|^#&Ov(Q_b~H|HlYCFyW~_pLzRh
z{k8aY>#qexU4QjoXV-e~r^nj7j>vOP3_2Y3yYcD&J(U(G+mmV&`Xt0T?EY)axwu|*
z!}Au~{8v3S3O-fqo_OwmD9+Wjkufgf_F|7^`PcQ&&OiTM?Zxs;y}fboa$P5LtDV{~
z`NzN9lo$K<?Y@3xmA3GS`LjRO*M5DpLo<BltgM5aY0IyssFa9k^JxEZPqVx5{p*pc
zFZ;Sh?irl<ko9i*`5!N4{AV%kY4UrhTVHo3W}f$->!<t9g)TGI|KDWf?IrxLs_n&&
z`und}xLy67^7xq0d(Ehc?r)BNVLM*8?@?uS&$-&^*FVdpa+G*x&p7`1--TP3b_-VT
zH!m06r5k!-{hs|CumAN1RlRz@@AsdjvMp7dMT$O`Z+6xHy4z$~JKMG{(Qu)>J?Dg<
zx3>$v4y`@;!h6X_re(imGxpd2TeGREe)IOcHJ`Sh=ic!0^@oRx(|D%G$$G}WG2Uc<
z{fqLCr+;h8PNfB$<hgKn`VnuJx`v|qj?0_{rQ0@2p89I^>;F%+<n>OUKK<_4F+Yjt
z?8^Lidj#EHhL+W}9(DbHZBM|FC+WNFH}96d-Z|OFcxsode^~aude^FV)sqihmzT2d
zdiwFN1@{+^75n~uIqTUL&iyyoZhwn!znECh=~+$k|2Vu3tx^8e@mcnM+}^NdtHoE(
zKao}bbMxQ$XJ<DX+^<;saH-L@`5L<=-r4PcqB8T-llz}zzdqHy=P!KjaN(aH-G4c~
z@5aB@yuN1p<ST~%cir2wsI%vjp6c(}7iPVC|7`N))6-r(O8@_f>#;VYlE@St5pGtO
zLk=4sn2B<6g4*yQ8m8Q=PK^c=T+{@W%~#bfIh&Mn_jSasb4F7Qj|g5Ukz31t(9YfQ
zS8eD2{Ygh}Z8bBkj`Fq(N<BFHVJqw6n9#DO4evt^XffKUU#z(GOaJM=*Y9e+M%{^*
zKcD`BZ97NyJ6rW4kDPB`PNgxr+i|;BuWtN$UTRgLtj(|E^2hfHf88f@=7^l^stTFt
zk8{h9FFe2z@LTR7=gmyf`f$Hr<#AQLdv`ZJvM<}b^IQ5q`EUH2?y{PNI#ecZ?%Gjr
zUSyS_yL7db)`R<|C0qEOX1wd41KRXFLnDG)i8V3tK!l%|RBO{hhYcYrf}AWKAGyCq
zJ%4>v>-&)vi-Tu6eGM(GVZQw3|H&HpN74^WuGLE4Pw(Ft{dU{Ce|L=SdF)re-M)Ii
zvd_UMPapOFu{2?4GhHJ5dfVB~jJswv0Xi9c7FBLChc7St^gMIz<Z{6!-=BT_$7c0s
zbHerj7X1n0g6iJe8afv{`W*M#!IW_R+S{7va(%TQv>dp!nC#?o54r2swibj-zxuv>
z_tvD8`2TimkJ-dM(9Tqq>kMdJ>%e^B{QhlOVpC5}YyP2WcrxnR<?Cyg6ef4@XuIu;
zHrL-~ymVoi)umhC4}g2tJtEv~Ovf88<miY9v$8C5*eH@B$jNd@xqR#WzKT`*62rFY
zt^4lky{q*_G4JDVQYDqIBERnV68&^}z1Q;f_kPZu6Mg#E7oJ-0wFho~_wHl=xZ8AB
zV!P{G^G88%c2A%CKXuNjgG*-zCH8;Ve!uC@^3db+wccH=KfL1Q^yuvVnFn3}ikZz>
z!=NN$E%3*nF)drXvzW0V^N`Ad3A6Sd@OT?2;h`RPU{0nf({Igp>)XwCKb~{=ciE%e
z=AX9<U49t-YSQ)%E3OOn@UMUFuAyMPKSjWE_pHx1b=+PT?AX4^)BN-V3q~!G32q|X
ztV|CZHY)gNadCq7bw_BZf*C7XS_B1WotbZ!wJIsUbyLjDR+mR9er;X<KmOxotE$(p
zJ@W2P;d52j=liXH|8hO>wr<;<-1_%Nvi3_>cj)Pt{okH^f8Y0Ke}4Zw;yb(a_5O@a
zPZSq+FL?fG|J$q2X0aZ8$z6Y5{fpf6#S1?<e16mX?y=sCS-<sV^)=^SQU5IWZ^oaF
z%I`^;EvonT1afSX`Q3Q_(9!y`1cqs$nlGOVoATFOPmi%t-`sUn@zc&Pv!5od_NiFY
zv+nQe@?9m_YZq&-mo)S)R*!0w-Zy>D=BSfZ34DxdA~V!fq@|b+Hf+c+6H{VsS?IVS
zKu3hTrAe*g$5(U9SLx=Fl`BiSt|jGpE?O5MXTSc>x&9yWe>cCLy#BsR?xcx--mg2q
z_M`3D&)@y?i_Nrj-D5v<`M;a9fO+qZ<GVh-ew<%6$)#v<A?v2&wZ0V!2lvl>)bUE7
zNbhohh{#7<!Nbeb^Rqls>&vdm_Uh*^b-#bJV%^by4-9;BU;aIMXsdwv=X<w!uO)o0
z<@lHTV^vht3dcP&^44{0<Q-vica%T+W1pJAtP6JzFZdCy6dLiRMKVq6VR@U#mf|Vz
z9+fP!nR3T#|KcD+@1zsw`x*+s>l-6DTS0!kV4^0(dgzFwL4uA6SBsOQzyufH)7SN7
zgT=#lU70jhA}}s#$%E*{OYSedT(ke#|4E1M{nP)u_o~cEhwt&ZldE@?ddBY-n;%@K
zX{<i)=`V8y-vs|tOmo`qy3TI7zv0^FJARhM<^|>l(#>{u3Lf|Ja=6I;U{d(B4+4@0
zJr}F|5%>MfzjBA(^BasRPuZ?{T-LBIUBKMr`(499Gx)x8GILXQliMomr4rUn1wO}I
zHZS4*{z9Qqrz2?wD_{4f86UUs&ksKP`B%`My*5JUcpVK}+I)pgM4B8|U-`MFg?++`
z&mMCspDE{l+h74&m%B=ghqdWp;({3_(%dbF8WR?PPoi)-=x8uSLs03{x$19cPL`U!
zzBO~zO0U_WuUzKv-~99cMb&rzU)>+7&3{k4`Q-1yb8AI@zdSzwRc+q=&xd{oForL#
zj<2@qYEAjRs%zf#*ZYl*PyK)6&yBTn7V2Az-~T$*u{CGroZpF0&&B)PEDn#|dPRsK
zZc27~$e(Y@oLlnWil4O7PyVhN74I2WU9<h_zkNcI8+<&oud!Lq+t;|-?_(bOslVE{
zU(`u|yIB2??VS4=&Zq?jo9bgrw{J}7VmZ>|c(GnLa%ZQ2$=7=-^%hf#x7~_d+OkBo
zx$@4GKMx)-FfcJW&T<goVl5O^w3W{K?BQPj|JRQD+4q$HxUY@*zuNWJ(OB*Hldt|>
zUtOBK-}}|^?9d}RyTi1v{$3yUH|qV@LwojB*@tPr>)f^Kf4J(tr<dOC`TAX+H7=jU
z;<bD6&gzf%zWmRQcz^ZvEbn!pMLRlX|E+$uJDk7t=KcEh;VSd?)@;7@<;EQ!?O(fo
zUhVH(=c%`TTC4u+pu54|#c}@jKQ-4~*SmV|mu}Vr=aoYKaX)4%g|B+{D17zr$*<L3
zW?$bmecAq*RqMQ0Pr4}OJ!$@~r>^gQhre_B|2OLU>+7#B#r}F2x54v~#$(>v8Mout
zH~v4awR_s%h)D+>RVPOq1$V8R{59*q^tiou@7#Yl>GbK-rY$e8uZ~_{R{mDz!Mkji
zcdxH5KmPCflCLjzzyB<hfAZshP5b>vBGZ1_{3}}j;LS4s&q;bpzvn&Oe~_>GUi{zc
zBY%|FyJqHpzHxTj-dloGtM%%`f9;uG=cHD9_j_Do#fjFVzossW`@c}Y`^nwkVY|c1
zS|?7Z>)ol|WHYa8)pg18Ut9Kdi`V|j4tf{p_y6<-=Sx#hzWDVn^_`@?di`df-<$qF
zGP-G<lKz_Y>aSOOtDjB2rholv(Vgvg`tR*oB>U-yvewq8Ve8cwz1mw9v(ER){*zO6
zy_d&H*(_rEbu`vqec@GIVdq7;b!&eejI25-p{7~4w|4a%ZS{3a8yFZE|2occn4kr^
zAJM<Kkn8J0d&z(C@&BKPysY~AVDqmX|C#Pa?YtYGb!5)<v$2}9;=?A-e;U4a+PdYh
zVs^j!cJ<x$-5p=2Oj{Qt@Ow>o?f1G!md;~e?tUuGZqNEO`S<^q(O=86gYML?J9;)e
z>(jd(b?c57#jXo~b#>Jo`PIU|zW(~Muf;1%y>(@~%V`(!f9k8($L$Z(zS{LQ!9Q;I
z`)84!R^N|=o__VQ{MOOBt?K$)1xt?{z4bJ%<7w1U&|OER=la=;s;25))6Cy#vZyv@
z_3rhWA~z>rEcXqq+Tyxe@7n4!PWz|5{XKhk*O$o9>n6Ka?~YmWF=fi@s!Q+py*`;A
z`r=*aw~evKqPAzumHW5;NJ;jrNmaVxQ)aFdESNv-_kOq2Tg>+-DIS@<>(izDpRXUU
ztW}fxI<-nCuv~rh-K*cc-~El)RA2vZ(|Yy9dEetYHCDT?zqU7JXRXlg)k{mm<8&^5
zi$CAFzC$B-TYW;hK=x0y{U1fsQf4il;-1iEXI!~5_UJENw_hqj_cPY&uZjJ9<<I_g
z^6S>U`k#LH`f8@1$24B?|9iUob7}U24qv_}o;hJtb?@xk5kKSHuW9Qd_C@Vnw`Z^U
zucP-<rp?{^IY=h;^S_N@Pwzdco1_yQGvRNQe)#(Qd)d>k@1Au=x%XL|#2Q!5Q@g_d
zR*P;izqbDB^4;GwW}FXTU|_FyoaHFs#adJ)cI#KT_I15yTVLHxn)FZo_5c4T|Ng&!
zYE9YN|Je`!{o38}^<{0U>2<-+A~W)Xu3!0jtXyIFUr}wFy{o6cI&!b$|0Mr^+&MWs
zKd0TgA9r!{<)39s_V1l?Z1ozuxWX@823H*4t4(RzXOi4t{Y!Q_Z~3Qy_cM;`tT@^C
z(Q5MRUt3?e?Adzs{;HDG|6jO=Tr!>?ws)D%I{wss7vuI;f6cqTdQyAc`<L+|{;$qo
z)4R6hk;=2Ub@`$1uPl4@J?d}KeuL(J{;TqNmej5Ddb0YjA6IPoS-Y#Dzjof4$K7#4
za<A=yY}4g=|EGJpp0v`|nYaIY(XV6c!`^9^cI_>?{$TOp;=g%H+&5?LXGuNt=SyM9
zs^?Ed5<lGzk9{xY6SaHaFRvTtFaEw;l{bxJR`%CBOV=gkDV}YyT9=)d74^lacS{y?
zu)t>zKIWi{)ok{fKYum7xo=SNpLb@`-t{TJ?;n)kUabH8nz7V*9TlUCH%_ZbwLf^8
zV0XT=yZD7dy6tgxXLi*tqY(F#zjQtQ)z#f~cE2sXf23}&-K&4Qe{oDMnowETeD|E;
z+S1&)LO%Z|&Dr?v$W+HW)8<&57TsSZm{DhcBeUdF|LMOK&8$2tUO)J^V)dQUS?*__
zOgFtOd--3>9oHjOY%#At9Wi0wC3gPMln*^iT2vZl{rq{R_iam5sdKLWi3k6l+D~!U
zb}IcfZQ93E6Q`@M6RNs?X64?F^+C-a)24R6`ns2EQKn&p-H+-uPk+6Z^W$45%lXRS
z_!j=9>-Jp|&62c!{q>u`pFa{)1*+?hO_u+$(em5(`>Xc(ui7$2z&|l``olL{>YjE@
zRuN`jDxY%cQhj>0qp{j)o0LnrX<Mc^Bp&gWjaSP%vcX--QDD_%yOVGKMWm%g{C+h3
z`Wvx1wG%>W((B)vvOm1Nj%iN6qC&m<p0yI`;s4v08m1lcijrHhVWm%uXy4*XFRFiw
ztpB?D(eAj8N#PdrULT!!L!;=ut521}nVCWHE5y>Deoy&VZ{PjFEBa2c3UBa@D|cHe
zR7$*dF|Mr)?b~#Fc3zv1?2M_4C6-)Ow%YvdU}b&n!JK20)!x@^ebIk(SKWm*|2Ll6
zv-<56%VeYZiL0yEA6|R@<oDH&IDWqTo)>?&z_p+I<ZiJYe)~R1xISCFa#_u!8?Qt1
z%=Kq3z0ogp^G#Mr?Jv!jwXxwBi&EEz{k>@S|M<);sVkPhwsN=|8pE_|t$zGBwJicV
zeDAh=me*ajTW)E1&pgS8O?{_#eqJImS@f9J*XjFq1&b=)XI#r~|4m@UyqEv2=Bq6K
zAAQ%;KDO}1%e(LX&OVsNwL0Hc<6rRO$!p8o4Axc~>xlnwI9RcD<<w2TYBScje6(G=
zKkjepukBywb={l4{Kv;R(S4RS1}|P~T(nuP-xR&zm#)EYZo3!ubwPVrjgs#a{Vjd}
zS@4GRKjqV|&H{h>3xD1{{%QYv{pD}Fbl<O*FF47(`INotyz7rvPyT+LO~CGc)@jqX
zIk{|W+4dz*aZTRIy7%j@6agDOx%TsuA75AbbYimp-B-f*Vx}E*@iS!C+W*Akyz86&
zKf3JV@;=PyTPu0AG(~-9k^ing{<f)}jdy2EzrROuO_vo@hLe%%-}V<r`XzpzD?J=;
z{%8K|FBy6I=e~%<`<ES!Jnj>A^>rh6(CyFD;y<mJ{Jz=s-%+l0_43QzPp{UrmeY9|
z@a#^>&ig&>>#8|E+>hjBk#boS6fYU^<V$spN}B#RUS<xN*mcVlCbnIjw|&JajmB+{
zcc1<}|N1^9*Cq99a|L$qFMLvdt3EW&-b3Yh-X_lQmt6Ng{r%eUsfxMplE3|@BP$iB
z7Vi!S?7O_|$)Qt9uPuxIcAb41$CGk@!%HU~|Mj=h7xdo$yS%n{t0YV5H_0h~Qzcej
zzwi5R{lX;&1sml{o;|<zce!fDUZ)uIlLeRFd^K;n?abt=<CCKpdWmDJb*p&tn}gSH
zee;SE-M44qjJnvCXa4J7D4WeOu)Xg*v$y5p&bOL1!Fzw)vw1RmKKs{K6H9+=Om4j!
za^chc7gvSr7o9$`>x=S|579Ra>axC1+kQ}X)nneKiq$FpW&5IS9@os*n?KFKj{oiJ
z&Hjw{>(|$H-~J^(S8+x*Q~ZwU|9)FPXR(WnR^R`!>6Eg>58llE(^~E9+4eQ1Y?F5A
z{kc#5Y=o+APKs;y&Dxhiny!7n*H%P7Roq_l@7hhN)Bn}_uU^{p{`{)<*55h09MAqq
z>Etb)TYEjyM)!|VK<B4@`%TW6d}w?>`R@BA;dLDEt9LJb<z!rZ`;-4h{rd8M9DNV<
zZ~d-Hm@GBLtM(gr(c=rHr>9+;%hHl4F0nOb(+5X$d;N_U>`(MbE=>Fu{nOyeFYDC5
zHB-tb<==KaJZJa*@>id|e|?SHTP>1(`E<|w7@<tB&aL-<zJ7T0Q*P2<!SmNQRlR)w
zQ?0>6Xy<-$!COb{*=}p`f8Kk4^7R;Ry{H=RkNVT6@0ubozq~8^tLv+;FV{@Z{>hW%
zUHkKf>X&KXX0ES4pZ;!Q&+5H_)jw@zw%W#j>S@lLzO-P<m0zAYx9>(h47tBh_q6Y?
zN2{;Ludmx1ufND^t!%&FLe8kln*HiqjCUVkU|@aiILmQD7Hd&(veMsQJ8ylh+M2ib
z>#wi3zP{QUxAxc8s4tUN*OtWpi4WTTf4%4FxV<s%m%e^m?)P4|y3!;lN72ahm+P}F
zTb2IAeMw4xu5j&g(hu*$mzOhyuHW+W|J$Uco(YcESBENn_q;W8f7sg~rl*}>r&jHZ
z+Z#M#O4}o?cTx?Kw)Nuw|Ht>4WUsw5=hNLCVODo?&PQuDzLHnd@10p$sK2!MLs%SV
zUHykwY)e~z?q6KvcJ}_;S68DphJV&CTl?#KrOp5MS65%(A2xaEOTBNa{|2<3d>j8&
z+46pEli<#kJ67H?=H2dn=x<Gq_`KXCS*fi>dn4m(T&uqN-+J9W?fSa-Yg)U$7OkKB
zv*h33tL?$FHvD_}Kh*Q6r@PB9oBdJK-e2AQI$S>L*Zog<>-Ju~d-dL{Q#<;@Rp;$Z
zeg8`@_Fo{g?!EpbgStKP>spgOKb+%wO}l!v{Gx;E!eQ&e{{Q3fw6WP=CT+gI-0t<X
zz2ZxMJzb@>w)$(;&f2WK)zj}B*~;+m>B1=9d*><~k5&KLee;6e*7Dn$)APdrw9Xf>
z+gtR19Vbt=RQBuYn=2H*?c4V0?WbS1;p<|(ZNt05Z@qey((-%a`d7C^yx7iskP+It
z<L#PCi!Y@M8!oDMJ8<s1KmWy>7?qO?{=MJer1HP&ZFYT2{`7hLwiR9)PvdxarE3*_
z&XP)9Fva}+zXIp;%{AwP4sUW2uzzw^|DfmN$|^R#&ElUqxaLH~9ya)S>B|&D`>FQ-
z`BKu{l(tw+)r<QWz4i9<Pp|!-c5Y0mh%~&};PPwE_2;ay%jR}07eDaT|M%X^eg04X
z++O!LeD}$R*S~Ro*B6e{$u7P+rDD^AiT8JH_Fkc=+?le?TK?EfYlZ&gzr534drf@*
z$m-&B#*|Gw`TCRJd55ch-%<Df(ZuRq`<+yFs=pDEVx0F?&$&Esn@e-vf@z5_*fv)M
zOC0@nd*vpTUJhRSn=HrJ{I48Y;oSN7hVsth)7Lj&%lChC_u?s^!fByZ>a+j<*4}^R
zn73Zw?vK0DXJ6aBW|8&#eXlP5{#vbm|A(I7${_hozB_6=mNXncdg!sMX=m%EsSkb_
zHS9YgsB*vVca49@$@zC2bG2S=sw;~v>w2`Pz{`E-3DphBl|TM8fBZGU{F%(m9k$1W
zt-K#<W`4XldG}d`1not2KlAj@Htc%kEg$~=)AaVDWAQ)SA65VJc2A9s+VM-^_a31f
zWtW{#e<qtREPZ<CAw#9@|4mcxFFz<5UskEm5%$*N{vng)8*j_Loi8QyIPLzsP>~qP
zqLrIXFYa((aA@NVql+IWTEFysxqL^$(FwnlIc9IoUh{9q-SbbreLpHQV~<+WyuY_Q
z8o%tj_(kB(e7o;2_N`v>|JAeJFL}-y`}<G+{<rb%_kUfJy8l<Int0T+7*FowshN}a
zw|2tOh3zX=-BH(H_%re3AO9WxIebfY$=;g${W`~+`#FO4?sG1ctc$7P-XfV*?QArg
z)n}IE)<@N6trDhRN^mZ^JtM~aNAGc-WV_7%-+i0*er1wt6yC9agPYEkTV3(fuZ4=A
zzhATW-AAX^Z#hSA-@KsO5h?KJ?fH)>U-tgxpXT$ksJ`ymt;TtB3*67=<bD2JX`lW7
zpvVa+r`PJs*9ESxKa<|RIdA$Sp93pdQ%_t@(W<P!Zmwp$x?p11Z<%egUa5=jmp1-s
z6F)io*G^sey_`S#em}kSO(XGe`FXyzi=LR+iAk^8EEe-G>SXWIbw%r^s{EQ)E}i;&
z`kR)6-=rp0O<SqCeQ)cd6I<uSCv?OWi~s(*zSMtdNJL9wL)tr&s*XaQs_iN>Kku2X
zSQKSl`*q1J{U)dM<CX8$d4K*BK3~0UUDUSrL#cu%)-I~GzS^XI{NLH5-z$xN{SJ&O
z^Viv@FzMczpP%b5RL}BB_-XrD{q@!F9e?(BXz$;>^vOT{>K*HUX?xc%dU`MZ|8JKo
zlRp2nc_NViD*WWfCDp&y#LoD=FYfor-~U6)f9iY>->%;BZRJws{Vnfa{i)@={<?a7
zsc!a@7x$+vu9<)5r+)3-YrgjHP2)EIb>R?tY`bj!9!4FJ86LviqKA%6xR3xE_UAb0
zura_ul)I%xsrc=_&2@VohDsjtS{WFu>8pC&d1~$1kL%Cdv)<iVQ}9Rm;HMuZ_wpF+
zc7K|j{$A|Aho$);jomkY^<Nc{$aH(M=C#Y}EL*neP4(-1^y4`iC+JVDc~rk?#*0~B
zR`G|we=(cuw)K?R*=q_oRYDdl+r@n>{ij4p+LSADf4CNVUYNj>V8-j}cAjB}&%x`C
z&Cl=sjfnUCs=K#A%JPx!yLXHl$2=y4xY_Ti`hIiASNV3^JFowiu8^<M*Al<8sBd29
zsfq13>pPEM*7Arw<8*i5|EbqHmx`QNC|t8=1y8`M#kub;K4)$KdtQu-wUsepMnr*_
zNL$lEhYb^aRD}7M4o$RhQBmf4T6pj0qt}yOsm$aINr(<SuzR}5?aR}4)GFUT5%uvO
z&$q96lVWSN4#n%Sl^EnKsWcN@Zj*nzlqtaML)yFl5#eroAIsi-V;X$_Zt92fh*`_+
zqJRD~?$=5G_NMGgRbN=0eqc(u_Dr_uR}Ojh|K+dn{^XS`kk3ELoTS_I^w<(LORhr~
zzDAm#ev`^*FndDlj9}MKpR-?@@u>(&)lSl%ub8*)gO$+|q5skwKi&D}uW`GBCp|6y
z?Y?#AZ~cBJU3Yo6zxs^AkL%tZePI}W{4+~psfx?{SEhFMs}=hF*F4zUpto*PhkWl6
z!(YpufCiK&=%{eBG97GK5D_3I#o7!x@)k6*+^}JRpOQe!q327Jug|^r)Hmm~R#<Gn
zk(eEyOzd7-_CE{mzcTy(kM)0VofE%#^V|f{n~9ry_w$+hKi)7us<8Jv+y6DoWn;HS
z_s&ktJAdN9+%0-jKkb%(_;ttaiffY`^?o;|Z&*{?#cGm&k3r<&t0z4jf3oztfAybZ
zeQ~BLE??GzUv=9Hp(RdrmBzmWD`a-;{ZlKq*KT6wz5~}3Ud_mT!?g9;@|Tqfe`j6V
zchUTc_>$elr{;f~Q^T&v!;+}!x>(!6bn*(@Gg}Ql-DSwIa_iydIo{pBFznxX561};
zyYKz9(Y-Ngq3%iTig=3!1{=cP>a4QS_|$j+v|3L`L|KgKNWq2xKQS)WwuKHGLNp|V
zS&uAK<nYlw>wA8F?(C~RkJhEVubQ(gn@?qO<IlC9{-pnzH9tZ>Jbs;d@Xe{VQ?tan
z<oBiDPWZGx-tmOC{5>1@^zBz!PJe!XGw0#{2e;Mawzbt3#a}8g|5<+JU;Tyvu`}Px
zJPn?!Zv1>I^!Cl=PnO1+zG609eADQcMM!P*cE?LGulDc%y!ZFJ*t*G1*7rC0G++Ot
z|Mhjh-u>u}zmEU-<-axb$NUXTju;>C<$7=Bcu?}xVVNHx_xipu_dRSC(Nl`i{r2vl
z{*|%^!6gN|Z0<aJFMQ_B(n#NIpAE-PzcSR#j55tSe|O#08(q?lTzT^uChN?6UBB{l
z`Q3}`j2a>nd`!96m<}~;$S@F7Vr^wiQi#wH<rZ{$sL&}fxqsWed#0z)n`Uay4be(j
zHP@-YuJr@QC2RiTSN|&i{i=_hF{LWLRrAK8A1+>@;RUXT<N0|P21J%0eY#!0Bdh*f
z{X32OwGImZGaTmX#!I!hUcQ<a_34K1{uQron7W2k{k9W1=O+4)*|&Fx!qLAkE!^rS
zzS|*?&HZTc?T<SI@~>Ih&))q<e1i4g;NMo8Pv3vKOKe4q3TMaeb!+WB3q?(Cx~<A_
zWzY$=y!`jxck%S^2flmX;(l4aDdA^_e9Nbw$M>&Z^7-qJ9SxI;Km8U7b!t&MEnAb`
z=B2b$_{yW-g}<JtRe$@$4w`h)aN}ZaW-QpSz(j;w^sr)*MueXzH|t@>f)y_7o{R6R
zu{wFSrs!JX?9;Akn^Mndy($q-f4yJtg-vl&M)Y2<%X=E)Yv-FRD4Dszrel8Q=U>aG
zzG(H6>IxD4{&iv&kEWyJbkAdX@8{brFTbbt^L6!!4YTu_bEf@!nJ%5)|7i6RDZYP(
z2k*Vq<_ipwz0)^q|5Ve!6<n9Op46|K^PM5Fw3s39(qm3hd%2zVdhbPEZ+UUz<9`47
z>;E$Asw!Uo_Urso28Laec1Hd-lv%%Xe&ErU@r}Y3pS<oBPYG6h^E}$??7d%CM8jF{
z*Ix>83R!ca^poESeP-Q#8`p?;tE@Q84chiDG9gDqScvIh!-<FjQEtw|j1C(>(-N(1
z4+9psw2Aj-&p!9_^X_$ZyH{_y7vtl5YWfmCwt3fW9-3tMRYkvhEPwI05ce;u%NMMY
z9mL*G_<me=g7x9bFZ)^*IR1)TCtrNU+Ev5jqW$Pa-F()zglq10qKl5kR;9n%v$e6d
zXlh5;CR4^8I);s+k4sNSJ(WNFWq$GYDfRW*6V%R~WZ>S)kWu>odA$GmcE1O&**l*z
z&8s+E_jc*+6$--B{oXn?aG&7cxTSn&Ls-ndyK%4Q_n$Z<_@>f8{LhZ4@1mEFPgo{j
zTy!Ahy4&xs$2RX=emHbu*`{aTAIfDYzx(*2xq*R!b&KOHCxI@|);&*;HRYey*7*DE
z4ZmOdI`;0rO>e84<o;gU|NH&c;{R9w`R$AUy)XaW_0ZShSw}w?zf;-b{#Sf|r0A^G
zr+o6P3S47PzgY9?P}KL^ua+kLe-a++eSdxE)penzTFNQk{$Jg~o%%*+THoiY{qh{#
zk=r_>Cm)*oM&N~p!jF6Br(eIalz;iUb?bM3&3dx>U3+<rf9=}x^0M_}{_+2>CC07k
z@~n5yTN55|pgp5khcD}BhGN*>`&x&8YW=;g-?4gq#QyNBd+u?hF8AxM`Tly{)k(#7
z>zYclW6w^1wWjFqlkfjoRC>29c$72G^qbJ)DesF`1m9h>H#RWL`pdrcHSVRww>I9`
z7i+oqpwZ4}uYY}i<XL%>-#Kx<`u#7$H<rb$QkSs()_wIzMRLg3*YRt@tWO`*|9bWO
zuOqKZUkCj<8m2SH>CmUArk(F@xhFk-r)8?PBy&Tr=1K4O*Ir02y3-J`v1ZH1_(L;8
z0v78^n9V=VGdV_MdxcZ$G^cYrXEx28)O5O7sC@nWLo@Yu>R-&OS{twQ<=)!+X=*|n
zs=40PM*Hu5{bBCBZ_nd*lq{Bue7TF$`~5nLtGcK6zp7cBcI)x~BNEd(((}%=sIRK}
zKg;Cy&-!?lu+)aK*se`~e?EA(`t|D7?)6uHP2(3{GrwcU?AjEyB`0lf{Si2LBmL`-
znBK(N%U7FQEjF}XlXjV$WdCDVgY(q|8y>0L^Y8zt6lovAtNV}ht?7v`xe}#6)C^gl
zC2er2<E#CddMwJmxajhSo1Y#(bQHL_{?DhX{ky)uzUqDc{_DHnv&*L)ovdHR$Kvlc
z`QNXNcU3knD3IK<|9VRMMB@o1R&Rfs*KX{svJot-PGx(w{^E^ahU@;fblF|}^8VKM
zoYt3RU+*WWDFsEneX1Jpa`&Xk$0v&KOxmFMsfLl$Zoz_!69p5`n0fxcyy{)cA+hrM
zEdm)6qb<yWDz<;gs9iKI-hY?%{tNR?zW)0;=NwPzt(U2jFV|n)^?lXGo83$P7FDOb
zzWMs$j%kS+W!Km2UmLzkuIlNszg%kXoa(MuRGhw6q!PR$Ytn0zCxKt~DVMQK+Mp6z
z+hl$BM(NWfR-gWHZT0lwV0mWv)As5kqrc0{-^{7-Daqe{md{u3zP;wD{VhFW%RI_=
zKZ~0Xe*g8<D&v1G8-6IMm{@8RWY<PKDrNe7_N(XYJ!&EE4?n)O>BEthe@Q*_wk}To
zrM>E)WRqFZc_R+(DNP;AT=hSRZT+EVxnjD%H(zn|?l*CEzv{k~XIb2Cn`9x7vWLl=
z=e7Qx?O!6ow&lqP^Bry6z5ehS=`XvQKJj(0|K#@mhHGNC^PTVEzwD#FFU^zs9CbD_
zu<p<PLMFAJTLr4z?fy^uv~ls`kl4RpPAxn5TmD4FkCSuP-u)+NxZ=7I%Yz#t;bFqJ
z71_;9uk6%(Zt3-4-dRRvt`#<~l<q#ZvH8&FkeAM~mcQtcgunOxk77%9yyCyGuBz}S
zTk`U~3sXE+ui5uv|Gs}KTvi=aea7?Q;=7QI2O8d84~XQ9_{7{9H|gelEzOKOn=k(R
z{eI89-9LKHEZo_7?%uzhj)if5p33Qc$vgAlK)T}Xood1C_ooO<Yp=Srj^mWs%x{q?
zKmKz{8H9c`Q%m0W``-T-pWb?go}Rb2`uCF`*N<|^3(va9_~Dh}`6=;XKVP0wSkSvM
z^k14r{=Hv&?QdILKDB>?YxSps<#oR;mOgBGWuFoMU+wOTtMPN14Enh*hI;L7={x&p
z=l$9z|K99V4vCxoH*D3Hr&B^EJo)th^w)pu-z+V=zxn#dg&z`g)T>_JPggoNp?>AZ
z$a<-z_wK)$bol+IqFJ%C>`p$ZyE{Ade%6;wMbq6RGIxK!eS7=%$tO>~SbOW#UeW6{
z%X@AY$G_j&z1Q_?PV&vYQBQt-`qA%nf6u)6U)IdK(K6{<T<@oS?@yagUwBdV-K5{)
ze~Z@Zhkw#74}W$tW7gk<%TKKD{hD-9;oYWJuWA?=SlAqAIZar_TDvg+*Uq@TzjiuX
z9{QWTwQ7;r{{R0vr#<nH_nvxg{nh*b|NsAd`Q-QPtFNx=KVPSo-7FRN$INH`{Ua}<
z7wua2u3fWu-_#X>nt5-lSFf#}mZzGuJIwlx?<BTQp3GHytG}tVPEWXaKkLDXt-CHp
z?7FJJakArU(_W5K^Vdf9PuGm|iaz=4*Z=jWkI65Od%ybf!qZpdv!V}0Z7tsSy7XoK
zuCFipLbHEOpIZGWeCba8ts09e^PDpF=RZ+Y{IXy7eo>Wf=!JCx`%j<d=$>@#c+mCo
z(xTX?hxcE{y*hr@zIDX}X~PR4c0qUjLi8W$X<gkP9~)Hlwd(2*6;;dRsg+Cqzt}bX
z(2>*qvU*0h9w+b3|67?R>2`2VQ@-naJ^zTEY0st~ov^>RS2p26`VV!r1IKdYi<Z3B
z|NeiWo>tA*AE%Q49C`Mi<EP2V{2vdmHN5?Ix?NxW&7CcO#dP=U{@W`4`cjozq-}VO
z@CDT+Yqni|qOCZq^qImRKc}>F+SC6}jo+R#;rq{xT)%dGIM){Cvd@xh$&UJ}>FYHw
zmsvhvr?j{FYv#${{|tf!>p6_4y$iGOd8(UzGsVo=`8|`_UTNXl$Me3{*FO35w(67U
z#uIbp4)U+^%l)-$$LyH>uCKqo{+ji*>rLAxqmp+Y!bN}n`}Eze!upMR_1nGGpDr%j
z;1hdaSI7Kj&z=9fH&0F5_JaSL#OJ`KS@A1>vY#_3xN?5eN$pS3uCL!l@4vqEYx>%%
zZL5D9?szF&+qU#?;+Ng~-|VS5HvRhZbx->*Kh=zI&Cj^?Mo8HHwfg+fRK2H7`wqE3
z-*jP_ZJf$Ai^2=7(Hp)mzZjFZxpVUMC6Dxevna)WGktx(_WS)r!>F4|`*Xg8-{d{}
z{eD%|<-fDcGUhyHPrX#TPiu`icYydNVW(TiGPIl}+w+D0U!Q0DI`;dm$K|^pt55p<
zM(|9|me^LS$-0)ddwo}{o9uoQnD_48-)X@Ds~UcW9TiyP|2V_?)-+DGyD!5ocKko#
z8R)KVrqo<xb8cDcW7f$xE$#f}|K0qm<(+YeWtOzw;(0<F&)?qt)#dM}JNp+JdUD_W
zH6wWchT}5dKD?^Dby)1t)B4TV-}vj8MepBmGVW?Ue|hcP@7n{Mf*;iR+Ue%EK09^q
z?Yf!!FTB2A-=uS)Zgp(*1UECs>x*Y>yZ_qn_m`+Y5q;WA{xAM|XWx>neZLjDHotL9
z`+ND#tn8ba`}h6+qpxD5GJ8Lpon(HXnW+Ef%DSUBzIZL3R9e3&)Ba@juXRhezPfV!
zuB_Mp)pu7<u?pnx-rDufH*{*<-tYHb#Ya!gJ{h#!YuCx&|0J#VZO+{v$Kf7VWxqmY
z*4*z^N6)W*6XIF#61G3QZf)1~*H`P;`cJ-Je&l56C%yQ|<=ggy_AyU&oaHQVinaE8
zS<Cm=cfXg1ZS;NpdG+n6U8&z|@2;MjE&f{kwaA|Br(;~BzDE6dzia>1&KjX)=jB)9
z>&3gm<JP%UZTxlhkNe8JPc?s6n%vy7=5OlT`VDub-z+(9R(~yS;lDEr8Jyg?+rRJL
z_;K6!DZ!Gg^=sP?imZuywfd3vwPjyl{(o7sE^^)V?^bV4{@CZWv&5G#s$^~4_o#(;
zJ1w?mU5!{*^;N0HXWP2jCz3Un|I&4d&(>u3Hr}P0y-wuvuF%xfFaE`sled1;jB#HV
zzb-Ior@^r=(}OOWW__yKXumF~+R$xv-Jidi;fqe+nDw9c@9M8FO~YqjU-duk-JeMI
zV_xh5FD|TGefDJUdKT66-sybnFK$@9-u3m>s<=qos11F4#e=Hm>Yvc6aqwkf{%9s~
zoIN2W@ckFvWjkx5_sYb~T2zw0DRr_^rI*k5vRBjgCjG76yK9$P+4Ol!kNp1;KSzM|
zl<D2tjn|joQujRa=jw}}b$egDw%UAs_q+KY<(zH*J4JAOxcBkGhV>7%**-Vc7VHoA
zd@=Qr-I3F}=fg!y;_pA%;>PrTr{(UyV%qw@{!ivry>KPp@O#PpN&iK1YM*l^Ys5dB
z5Z04ttA2gaTk+*P=DS=k`lTXdeLmi2x5JX}e=qFWUZw6@Wp{du@v7@lb$_eg23@LC
zJ{4neZl~S<*LPKR?|UmAcK_YCiCORO{fRh}eoOmE{tcDqXT)E4?-x^2uPiQ#y;~)I
z^>yg0PgMu&_WtDMo3Z@#>aP`P#ntQkf?wZ_|M{vj`^nd-@2i{MT-*8C<oBk}7oJ&9
zK09CEe|5_D|0(nDKFWxmzgjd~@5xu&lt2H?zDHkODEebxPS)C>?CFBv-KX!JerNyF
z_w#qm{#zPu9>03Zx+PyT*R2C><6)L`oaH>>6>I6yKXVtF-mm|D|L@=bzm9(W|Nr}~
z>#xQCypCU6fB*mQ@7M47umAr)eEpjJ*LFwk-IupC<aOz-)6c%8dfBBGe!X6A__1Po
zt;eY&Q>N;MZN0MoP0*F;CEcM>*UxNySrdNt*WPz~r~gwoegA8F!mgK*d*?^hw0M_i
ztTC%PX7c=6@Ur-D&s%$AtG3rGeSdwg<*MngtuNyRQ~&?+pX~Mf{f_RiRlCDhg|2=U
zwR(Nj<ZG*X!q-RrE!v${z3XeyUfF-1m4>moN&6<Pi!<7(GBNe*`y-OU-{#C~d3x(%
z-1=WnpZ;1Sdh6-0tyyc$SF7%OdgsVvpMCu-+vCfxbUf2cdGc;Wr{3SHd_(!jy8paU
zY5QiE{_pC#nX+ho$kD4G+%N45@wb^;Sn%ax*$?Z7pC6?yidn^Y-@LrMwDgj;_v`!X
zJ<FdxUMyv~@`dnh)AKVGTPAJVz5iX1`FfZ4M~fbE+zae`eOG3OXJyKk`s6*kuCFeA
ze|3M%MD4#-?><UJhXlF1JU&>XuEXD<lkNAPbJm>|F+W#3MQ;n*czq+|&o3oPkL&i{
zzrW~e?ETs!Uy_u|R_w2z@KUVDZtn??A19ywuT8l3_uES6gqG#{UL|j<_O$i9czHSh
z-7Wi{x|ym!kF0-eZWJ4Q_22rdDxZEV3`*;1WQua}%sLeu7<)d=`sUrgi%RBfs`~P$
z<L|pQ@*915>?Xebed+${{X)8LUmCgYJ^xK1EXI~^@xR<%`~GE?Ec;>>6s3IjMc@6s
zQKwG-y&Jzabl!fx&ChQs^ZtoH;{W19)YtWz`Fh&>lXOG+-}#F_b`w7TOvR>X-}6fv
zSLE!?&$^`CTd%rMJ$swwYt`%1S808J{&c-%NJU@$-=zEZQ^c1&`{0v3wf9Aucl5WJ
z>lXc7_xr8r?zdTc#h2{AzCVoX$rt7IXQHhB7VW<Mch8!v-wHh?nuk~ZKT-6o=7-nw
z<<sSh;xqo<y{fu8e4TXFdS}UBr@Y@h3jet9L&X1efl}Y~7ez%q`J$@(>(n1X*<Ha)
zmVEnK`RB;pyAziFnb&Tpcm2hCeUYu<(Y6^r(;Us_y{mUpRXxe50vdAQW@UMpcpyVV
zO}UL(5wsFgl$+D(V4;VLi=c4z=le0=_vih1Usv{M(q>P?87r;2_OU0fvUfgdy6@qQ
z|1sq?e^j~mg;!*JyrNUsUmE@E{<_cadusG;g>~L>-?^@Tb4kM4cMn>te07=`zU$9o
zKEL(t&EWP096CA=gFAok_&D`t&gVyazl+V-CVKeNyZ&2M{&z0^m1jL{m=}99=tl$F
zMdk@P|L3KgnrnVx&dccz0%yWB@7{c8vGePnM|GEe-cU5Ys4l>@+k4ZysL7X4KKyzy
zu9|u61JU<dJC`Tu?X`B8a<+L^guqRYTAtm<WYf2*RQpd_Y5ea%0i%w{1RW7=)<Xvy
zHmuMP6Jc#xSZENUC&Jz8cF<wP9Hp?`)m3xf-`@Q(?q=o6$gqb~x9P4_Zag8}|I$it
z>)-cp9|v!`K0{3Bg5K$!%RL{lzq?-_vHL~alxAPmIUFzMuy4BF+<0GsfA!Q4TNghT
zI{rf{<Zgn%o&S8n^F$9nc=DKg!}ZUn!gD3eb8e;?Z!7LQpS_N~r)$2zbFJrg?`K|L
zpHt)a<8*!Galu(P_!e$s-#)=>>;3EB=j1%C(|qxzJ|+2Gt#D<>6~)_+(jT6--G1Uu
zzyF4~KNDEC#1-3oPF|J!XMysaUl+n-9S<;SiA?Ykk!EE&Xt3dpftXP5p@Rh*0<=^(
zTb)2_UeyG(tgqay{+)mO^SqN$Gb8mw&pyrbT&cH1W_Re%9rq2NJwF`ZU;gacZ8LMZ
z+WV_)b2{a9wtby)WnB;Nf=)Y*S0D9CziDrLw`ty<+YSGfg;cBF<gVPx=^y_<-}v>a
zYrm5N9p&P=S#Juz=)J74>zCutRrTK|R2~y7TEBnwxt=78^*_v}vOYaAse^GrbmQ40
zrKX#-bjs!4|GIy@-uU}o_y6bSzgzqD=InU)&2qnAO;Y(`a`oT+RaNPSGG!mkcxh^;
z(qvR<ReFLQbozphh-9nN!G;Y98WOy$hYmWdh|p4(Vs&Z`R7lVf=V@_D6x_M4;(6|$
z>id>QCKcY=axH1jUEdhLwk;2=`5K<zKT<2R-mF~Kox9fV`D6LTi(C|VnHT?`U$pWM
z&y@c^{>(A_xm<-eIil!|=-<bJ#kUu)$omzv+(oo~=k*n~`Pcq7>TrhFSAL4ES@3Yp
zf9vA7k25x}irW+yQ>^<&X8rDog16p0K6TvkPv1L%>pyJoo}YW?@B6gkwa0_fTCYZ)
zRBM!2Z?Jm->x|9?lB>4wn0RvCCxgO=vlWW}`nfDvWpZ9^{$qpAaBUrhMRy-R;Xila
z0i&A8gf>v59^SAaM8i~=)#;$aiU2nitrn+;g7aj`!{2|sJ@;#A`I={2cCAX~GYxxD
zUaxX@yZwd2v-cgAFJC<A`lmM&-hXFbIQ8_4#rNL0{NGgk_HX>7+U3R%ZvR+g_}*21
zF`M`;b&W6elkOF$B<D3b2j|Z?n;dudz@FI=)rBAWW_3qY<rr?gQM>5=ifZOVpA0H@
zuzZ#H>GtN$ddAQ%R+lTzU)j6pU{(Ik`d@dyy!I`$`&pjw^Jl|4eu?-;%%^1-1zL@p
zcW+=(ys}fvwNh?3BWONgLXIdG$DxM_2VxAQxFtc!G(^LdTaf9Yzz&ym%j?E9zbp2Y
zA9__?JXK|3mqmAh#eRu<72?IQ8S<BG>;shwU)0%1<!o+IjnzK%PfhsumIpSIHMJ-F
zYmAi&S`gq@rtzWlqWGWZj=J@M$<b|$F*9-;7Om+!8n!djYW24@PQT-0WIFvzj8|Te
z+czcdZnT`|vH4xRb9Y@VbdVKsVO_t`a_3F~iHP+RUrv!pNLAQ<{>#pzj4Sq~iJz`5
zXRzDq!1I6qe$F4c@l_{G0|M*VF8F%ijyf1OF|{pe(!J|De>V4SW^Q0q5t*VPD$L4s
z7<7rF6t^JL!9s%=9TBNcmO}+AT(ktGkDj~xyZV*i=k8V2`C6~E(u{+{3Kx8@_pa49
zyZb5Q<pb}VA2vJh4|rcA`DT*WZ2N2TA3c%kJD~S+ht9)a+rQ?&FVlIxrf+@u+F4)I
z9|Y%_ymK_@Z=37)Zr)EH-vg%Ct8O}NyvF*ns$`4Sr9Zy@hwt#-uhuI)rDpB>&Q|sb
zuSHGtOXZed`xaj~Rd{szi#YcCcDK5xF#Wi#GVLZyZ`G>oYP}PA?+;)13Si&lz@*Fb
z@#4N;CMzzPezn%%`n4_N_PM9~Ty;O)`XHKlZ6?R{soI5c6WJLU*bh3+auMWWEn2?*
z!oIgrYpdRGJ@sP!zW;H5tEWGG8&UuN|NE`UTMPeR-Tk*(a{8XRdf}zDYs0<odh2a{
zcYR-J-W;=>C*m>kuOhZze)8eh^?2d+>vO&b_MHAdd!1Ul-*cBZ^{?+cpT4dC^{ZXW
zXWw*9tuX7b((+%g_w9?`;4QD@P{U!``0viU{PpQCdh9rgnr=>XOA-0MTDB>B%flt-
zjFTIF|BGPT^Xc>dwB7HncDm~R(iSbhbv6Fh*Qzh~))|HUcb@MbY5Vuh;eytazpp=N
zFUwzl<e+uQ^9dP|&(44P(bjz__LuU7rTem%PQJb`dw<8<$a3}d@oQFH)3mMLTFUi1
zeBRaNOCy#%jV!F6apK70fA4-xy!%<dCh6_Pc+n%*R^RdW+52VhudRGfy?3k+GuQ4`
zo51_+-<ooT4bdz~#Y-x7tQD42U4PyG))!ZsS?m2@-(Ov~G1Oy5%J+XeoiAQ`oXk+k
z`dGKc@T1ry8J$_)(sEu?H@*MLS9s}VV0y=g&gJz#Uo!tVrFrg+vZCU&zTmPOCVUd7
zHBvWT)fLdIDw^Q6P%f+KSNx)fzfP^+KQH>-&Lvl)zpn528}Xvw`-txi<7bO!76qt1
zyt9A9eb4GoOEXON$9-pY<?+d|>^BXItuycKzw=Svp!ttUGEZQKr$g3-my7@H{mm2e
zq*6b=U2^%AA9>4`IsNwV$#RZeu(Culjs17Ug!~7c-0!8|?{8aFE|-zgJ?s6Z2~YPj
z%V&M^@?QT_fAW{`qqSG#*H^Czl=vI``p%QcNwefnekuKtb*|;&S&bvtTO#)Nf8#tM
z^l_<!m5XB9j!kb=|F7A2Pnxqz{N2II6$Y<r_L-fIKa^j~<y~oO{^9F1-lRVj0&dfv
zEcjTsN;qt3z+V1Si~q)JpZ=KlQ}0eBmm;sRd&a@?s}V(Ccrtdyyy)KVcf;cakHYof
z7t_Bqye<`t^jydzXnu=5Y4@~wI+C76HS_&muGfD$`C{~I5D1De5Zt}lW5GUy8%-;h
z|C2r@?i{%O{I4qaH}gVoH8Ht2EnUPr%lYv0j1wJ4BM<mA_ppE0lf7a3;&AuUOcj5{
z$Ep%~(r-dH^>G=9zkQ+btG@r`^Ye+u@4nZsnfL#U<Rbn}*DuyQ?cB0W?T+lp*uQNp
z$^9#)AHUh3Rv&z2*>q|9lkfZ=Mz8pNK1KEQhsV~-U+UXN_{#sEV|?KCE#7<2;xm4J
zp1%97{66ok>67nl&8yl}``2P2Px9h}9|T_?`20&_`pMt1j~AIQ{C)IK<kCO4*L|4I
z*BftF@_W~$!(Z9j4*Mnle|YrA|LZqfyqotH)%>5gY)xJK%lE6f?(=+AI2h!X;kohC
z|1y`Fc+FLY;g!PE&P{wd$Nb?AInh72t{-kc{jt97gWA$FX=|i*Jztx(%Js^T53j#-
z)rQCk?7Cr@vD)bM$Hm7QZ`W3&$rn|xZ2jc?<k!EW*UhZ&KU{OZZ*k3K`-`<(m;C#>
zPpaX2(SdJsHh1VLtgx2P{<qL-*&eSs^J_IeZcpib@nU^gtLyTbdG~%4NnF__@b60H
z{FXcB%-=7&AN^&tn8`N(b7AoMQg72mS0-xweRgMF{Lkn=hxM9RZ)Mj$d;GKBEdIA<
z!NHTO&ZvFc=NOX|GQafmlx0aL&C>-oZi@b29_ujYhxFs8jD34D6z2ZVy&_PNnXRL^
zt%G~U#NsGn*Za#Cbnf`^@V2FxfM<M(=gP|u4;FhD|C8|Zw%W_)(`7pC`Gw{DH{)*B
z{`CHI_iF8((1r2GzgWL4*|EP+a^Kq)sjc%Dih0jH_+a6S{r=A;bIvcG>nr-NZ{qw<
zvI?im>f5qg&nZ>E__MES-_)BtJi#ukn|}CZRC~@)J^AD7L!)G!EjC{!-+N#5IJ`N>
z`+c=Q;MUd!>oW?=dzJ|sd#}(F|DUKS@^epw!6!CmDM7i7Vr{c*CQXsQBXRG6<jLrj
zZ)#R<G`qhr$#1gi^Y0;(PQE(#>c_kL59YWfIjW2Al%Dy!`oX%H`_n6`-4|%qb}j5R
ziEx~eupvBs-oDFQZ@zx4ANt;Y^6x)imbdssKmGG?_mP}*(;w$^+xGQ^es1Z0@#@3W
zw|6GWcQ5;)CiH#r-c#`tLT09_KMmdSmdWApnw>5up8sW%TEACL>(0M}=X>OjyeQw<
zX(rE-u5A82x#ynRk(!pJi;q4_KK7?JI%ek@G0Cuxf!>pMR>$nxmG|WQ^GC0Ky#MIL
z&iY)?rb5)tW6uroFQ@<S2w5=o^V|N77w$)VQa5z?mbvWlAM=CjUzh)^2-~pn*H+tR
zrn{e?E!MQr|5T|T@>Dr~e%XVLSu=8$ZZG(IcgdbhhlR~6x3UyYe}DVIgjrjjKW00>
zr({R<{qmZH)<r>l2iYF(5q8~wQT_9ll{~xe9PRRAzWZ(7`6)_<PgxihzL>O#-mR&>
zs@<XK7RSMS&QVJLe_8nD>PfF-@88=!c|$<--Q};<_nxkQeRuWr(p&pq>;3*CD0%;V
z@yt4zH+$=sADQx9Z|ViJrT^l??Kb_t|8?@a-RI-aI`p%eechX>;T^gA{C{y)qxARn
zlX^`nwy%8{;B3f#FLs~di+DZnZ+`FdCzr1Y65qY!<l;K(eHY{RPmJ>W=oRPnGce)(
zKI!*)Z(NeX-*Hs$mbA?;f6jUD=lA;Xm*1m*UEh26_t`R`&hPW)t&My4J$$nD`iOng
zmwa*Gy-shzB-c0dnmE^WirU98FtDz0oaHi6i?#fc&?TO43+0b$pL{)K^7mWc+ikt4
zXW#$)|Ne`O)nC6~j7eKpkpAn+)Vj_~A-g91QvSN?bjYH`&UMz$>|2Ze$p6S&JNf(n
z<~84SH1acY{@>cDd!?m3X{vhp+0%Eqe($^<TWN1U{Up~zvz<RpH1EG(#PXK^`Ub7e
z0!Jywa@()&mEDfBga3bcUpRS5wUZ_L-MYIx=1Ko;-4;o8|MOp-eK2=UTCn?jch2d5
zKe(RuoTeGSs&4AKtB>?n&#H5Ed3VuT{rLZMb+@Hg|Es>9d~UyUjJE5ky~3+{-UZeE
z|F>R!RefkveB9*g`&6U9zPr9BeD|Dxf3(j1U0m_x)y1dhqrY@tu`KP1`=j|YZGY!<
z@6#WDf4y4ab7h^)_5Yl1?df;wf3duBub;lZrItU|bK2Gtul?ELXVyzz+JE+>-geu%
z?&%p5Lpw~lLeDI={i=NRZsM%(|9pS#xt{uS|D@mVwz9N5J0`jQScJ2gfpkVx$ijtp
znR{DUL)ogYaAe$WIHRD(He<>`$>QjZSAR?v<}5coxb<$Jr0xA*`BzI%hV{LA8@F`r
zwIy5YuI-MNUHvnC-Nfy$H@VJKepc+$`$EraqW00a?sAFfuL5E1->-@LsD69+@=wP9
zLbl8~TaM1Y|6t#w;7O7h5!bIDmN5TmzxT!5d;U|Um+Lk!j&9a43Wx|u&swo%L2J$H
zq6vA&>%2Wu*7h98nWdor<F(tVf?Id*&ks=j<E6cC+LGt1J9g*otXD0|`S41_N#YFq
zzRqQz=bcWjt&)zk+nb}5#wOGH{O8J^Ehfj4vp6oU|MH<hYxXU7o|^qO_0AfOxe1+t
zi{z)epZvT2$)`U)k<x0J@h2}ov3%S7Pf79Q&dr%D(?6`epmt&BD&^Z&)2{!&zP`fy
z@JI8y-Wls%l0T;Jykzg~{Ak<$liOa%ntuD0uRed<y0gD;zdPHx-tmv)4QIjscfS0-
zr74@IaLZV{T~=)7<eN+Xo_0N1GymwmeZm_p|NUR>@_K96qwxDS8|Ef_-u~(DKW^T3
zt$PW3MT%_C?6Xdrb&v5p&%Av<7r*-XSWT-+XN}!&mPc>*em8EN72j$4?%Yqi>UDeO
z#r)jHdu3{!<inR+*Kajn5w~MzQDD`sxEcG8{q;@x7{BjOn!-Vw=jPJ#_x$&Ny7_;T
zK$F#z{c0DD!;02__-MD^Fg5UxqpE7AZ~O9Zo-3}`{%<n6UH$$3gUPzRJQH2xf<9dN
zeD&(dv-*p#YZ-4zG!%bwob|x_B;WJz`#bpWTL1I6k4tWU`uf#=o#pu<JL}f3`z>A;
z&B@UB|LJ6&r0jnJg2z{;ZVEeX_&xZ?LdBjK#wy9;)yrJ2<#Xmo{L+sX)OezHWzFBX
zs(-rwlKrDE`k((1a<kmX<x1*m_i%%Tzot#KPk$D#`)|5cdgq_x)&HEmj;yH_Uo}~M
z&ZN4&GZ`sG`!~e5_}X?>l;4eT&g8VOe|k%;US1~UmEDys(gvp%eP6$Qhk;NF^ZQLc
z%i4c9Y}XY1Y5Vb$`12-5z3<&SdP7eBy6(C6X888{wb}c>SGpECojoKXd4#oL($lwU
z`7{1?#s58d@&41^Y4vOWpY^Eey!3lfZT;ku*JonhJ6{nzw@5TyPUz>ZHD*`YW<9K8
zQMu_>S(nA-lc?{$_SYs8%lCojKluDESo!tbA4%bB{g?i1*vX>0^}4U+#?w9G9|AAx
zeA+zq@ay}lS)W+Ht-ZMa^Ys(cZm!MVy8eSxRQ~&ddEfGPy4c(HKVJQ(S)y>$<NDOX
zL-9Ym-v6DXc>Kwq_=!K)C%uf%`c(D(n!o+OSnZeDtJhEZa;;_SkH#6Vw$1n{?0x-{
z?3Z`1H{E-aeRkQDc_$JdHO+rr_5N<Pm-k~+6Q@n}Pu{(IU-hM8)%CsE*ZDhE#NIFQ
zU;1n5>+*2bx_?iM_kVCLzw_0l_}Z#H_nwMgd+}>hNXY^h7pMJ;4X^&P5xN_7@O)Uu
zo&W2?85mef9A~)-da;&j3E#cG`rp=D|6c76*YDhA{=X}3^7Xyp{8!Ik*T25xYts6!
z)rYLER{ixZTeEK3(od7c>ipKn{T1KUJhM}zdB@koQf4|cUM6O06td6%^;gzNb<N-J
zs-IWYPxA|X_4VD=8%w9n{<>!W^$E>ulM@u~FLHACS|6FUE`P;=#DwFK@og$eQAMVv
z{Ig$wxKp~iv#R&>^3Wxjg86MJ`g-iep=M{2rConW{n;S(Puptmf89kt>(<vqec!b?
zUQ+O^?$yPquS7pxdb(^;n(zLFFPuFu`YLQWlYalX)c(BKUmJEWNaUHk&SlEo>b+Ms
z>DI6Oz4PRA*<ael*B6z)`WiAj=+^}CZk@gInlF;=|2&J=oVc_0>MgrU*3z>lR?mu$
zbd8#`<ZIpUG}pdY{_eeDTW{@ut!e8uHM?ZqUh(qM7dy*WzrJYQ{`%@Wv)t=To=u+p
zb^X<KuTFX2Pdn`u&2y&2u(adrkJaBEUeRB=ko)3|pWeGqe%$}<j8pWAvYop>uiWLH
z8mfA-^wrb2>1ULta73ND{pj_ZCY!x>YZTqr-mBktT+Sr^>sb-`8%EEL`1>_@8o0>c
zXx2QLA=CUq_T<<1(<=Xdi#VA2_uJF6YZsc<t$FvLZ*P`=)cWwhS<xKAy|WF<cTTF7
zckkt#9k?UPq|o4x{fYNEckG3v{}`#1T)g}5aNDEEf1KXa^@}!&`ODWFJMDL{P(b0Z
z@3W*ivCT}+4}3avZqb8fKAAG{d3DDR&)HI2@F$@De*K!&zn<?jdba<f>HQ?`87Kb<
zo^?3++g!1$dbeobGQBvpz5KP;J8t$)E6sVo!S&fZ*{%f@zk?3HH|KSB*7;T*)_%7B
z&-3_x-aDsX%)RyX*7xi{vCiB3H*D3MbYf45&!NI+b2@gLJPSMU{g!y7{px?Glw!XB
zP}=*>&Tq}X`l2PezrWP%ww(U)(a*2H-~C&9>*K%QKPP6ZpZ|4A>C6AbVC8wvF6@bX
z3rm%Q7t2W4U;Tc+(x&#;*29|wt!kSLpPurxTEG9o8GF}LS{MCe*O$M_zWZJE>V$xg
z{p+2o{zm-#73j>d%vEo$)xLuYJKr44(^7x_)5lG4(;q?3c{cHjEcEoBU*2d^HtBba
z^>_Eg4{qtTeXm}fa<ORbdwo!i|Hf{WC6~V@ar`cOeDT{))x!-8QBVH<S2In1pZ~9D
zMgG>u{|_hm+OFBZBvxeJezouKww}J9D$=i(zwftg^foi~`tsWEahdk({GaZAv-IMe
zeRpkbU)E3G^;x|9*3&coul}o^zZ<_d%hBJ@-#y{aB=0}sZ+rgE+`XDdAS1~Cf5tp(
zKHoVjkH>2@so$UDJt?|=zsu>e-*<L>ObJ`@&gY-=?%#?jr{7l9O<!lXKL4MvX!u=a
z!Mkzq6^jn-4VG`~_`QBJ-_iRg>nHb`nC#lSuWEY9pYtzeRsMBPHGW&9^|Z3?h4ata
zdHb4je!u^G{e|c3x^=%&tB+JqnD%}3_oq+4*G6jG)wH+&Uwb!7=wWfdnjfpeK0iC6
z^|fnKe6;DWJxx=8IIBHToxQ&-W%9r1{R|8oxsJ13CuXr06~>B}D%#fV4fWl6J#1t6
z`q0?V(*IveKi=neO^>rmd)K4gyLYb+R=KJ6p?z<@`}zleu4{dNFe&hP!UUVW@2>9C
ztrmT}-9me*QQ6)Qbw|C!Z+~j*Uw=LQY?NSlxMqFz`S{-7*M9UQzMo*1w*0l0jnL`T
zofq_XOW*d;H#nE!K0i9t>TG>{lkTzT+buftOaDLoZBdZ6t0bf=zLsn5Z4dcgq4dCK
z{6Dqq!tcNOZytWMB)#Uy_p&wX<3LxpCH?3={a0H5T$)|1-s}8Z&js|7!&dL#_H1JL
z+r_V+tao|OVZF7q>i45M;h(GC1lq56c{4rg>&;g&+^PSA?v|fCUEZbsXn)*_@@F%P
zi(bt0`ksAnmgVIY&4!=UKbGr!{MP^T_s@5;_R8*$n-Csz>gwzGwK2W-v-Muj`nhNS
znH-DL7X8_$-`vao_GgvokH6vP!}?0rO<l74Ys$a%Dt^;#EIbWT{R(rB1^n}$67~73
z+2ZW~lkeAaoIfa8$)9twKj$dVj`EcPPV=fJoL~4vyvH!~v2LLM*FE#>r}kV?|6cBJ
z#q?WZW%uoXsQ#&z3hPdDY%Oo@x#5&@+2Y}J=FTkFiuoZge;til8*chhcA*-p@OS-;
z2<JYTTg_%wyq|;iO;+yNSF*1w?Dhq%R~a!28*i%n&d%8I&U|{M@})Sd-%+vpmu08l
z%d=Us&33U=x?$?OPgVar1WP6u&uo4_?VA<fwa0N=r^~Azi1V+H5<GEL<=6icCRfvM
zIQMLNVms}P!V}e03$Mn`ZvUA*)26-c>}G4qWNj(Bee&)7>vbG|T0#W_!krg+|MKzZ
z`#tqtNKo9TTFcNI^~rl1PJ1~wd|l5fo9pg={p8184~wO?X&oqEZX?;q{j=ccj~6zq
zr~aPZQU0mCY~$s9%Mu<bKh-Vt{m@&tecK|J*>OLMKX`=KeyZE$HzVf4mruWUBzR~`
zn%(F45Nk8<=DmkTE_U{ZPnOt4xLpYI>VKTf#r$}m{ZISS#iz{`1((R%Y~hz#YRBy_
z@%@lksYwpU>HH)^yP28^2X9S#Cv)=26VV0FmKXmQa4$Z%{;b|rd#?@g*PBix^xZzz
z@nKzU)GH&ix_GUu57%raFUTtC*{t%vt?%I$TU)01jqDX)PyYV0RCz(*^C$DpE%oA2
zO-`G!<`J8znUfj!+5JyGDK9?~yChZOXl-p<mFvCG_G^!S+RqI-QGL(<{hff0cZ=9_
z4Hw5Pa4LA(>-hSQ`;IWDkd^c1?sHkVEc=~z8k6`g=YNs4e_wgO?61nZqwQ;Kzue7x
zap3jSy!ER-3SDe^yzidJ)mM_!xm#mbpEQ45RX^ur?Yf?r_g}j<mhP_!E-Zf?{k*hN
z{%TTus9G;ShcWw-mlnr#3LcpKyd?8o%W;{5slSZ#e_^*~(I)$&H<mbumK=V2(ZBVX
zD!-|QT)=_MjX4h)PObmCuIit3+59c$yLuFtt$!n&dq?BfKT-3^k8j^EU!HR*PVwB0
zWUp&A`#&qmOxr3}dp{*)((g&vIC*}2pHriI_uZ?!AMSIHJ$zq0)&AT*iPGOE=WSK`
z8@}FemAl=?ko>971$Bj6ADmIK^Y@Au)MWdd_w8NK6a}6Xv4nWbDK#ENCYJyHhU@+2
znmRjVYbgJY{l8;&+ty1@yHd|}Fs?RO{m;QyOBb>I7Z239ytHQhPy4FcNjHpQ;w}Ez
zP33HnNw~J)!omi}zbP}F{k;Dt^1jNdnfXWi+2yGjpEK8_8~4Q4>+jy>u=uRQC&M@?
z{l7oo*sfT-zP`Zy@xJ#L=6$-_xw3lq)Abs?_upqt`hSs+ck4%%I%z9Dfd@}#e{i^T
zZvV1%oUHq9CD*6R><eW%ym$4Rr}0U<RF&1A+`f2J=l$IOTVl-q-8$SJD|BK~)c>R9
zwr*$l9X$QFBI1sLq4-*bgE7YOfA+;zsEf(`X{y@Ca=ZQFpZ%6oO@bVrIZWJtZsTR~
zck9IB#qXw^S#(}8_HEFTsCviXm$zBW>$j}`5z~>mif86i9pS&}r6=qC+g1uVlpf=i
zv3L3U>-<%Z^ZqPe*I#)@is*~BJ@J(}Q~o1l16S!Y6JeXfb_=fF`Txgvr|s!^`*xpv
zc}sB4DY<?AveNQl3#NAzq-%v8_DK2g*XfEZ&*@`}*lg$BWRv{X{(YYvi^Vy=hrd~@
zejLhFWnwP+tnuT|mIMa_4uQ9mnjSvBvHjkT9`hyl)~??CN82pxw$z+ihtfX%-+M<`
zJgr#b^YI<$IQeJ2>v9kAzrSzO?mLHM_2R#u59qd6j_UrUzV4^Oet8yle>X$T3e}vr
zV@rS4s9uSg`BwcmU&u|)l56{AJl058^L~-cm^w%KV9^I5Puc66rv$0_T>Sr3>8$?Y
z+R!sw7Js+D;giXn|K!$&2`4H&Ypx}(@4Q|VQo8xSPB~X(^{2?To7bOym|v5hCcOLV
zHK8fCq0^qU-Q2%w<+j?lS01s+SFV5iv&er>lPUM}FIEX1sqE_vRCLAvw>@TVQ*z;B
zcv--6E&0&q|C7I09X6cSzxkiyf}Cl|?|+`!Kh+`2zdl^#`uDxsyiq-_5x){2>d)jp
zvAZ_+iR!ICF;{+he+bcw-EDOEQr_N3t^MonIw^7AI5oNT{kr1jeYWerW<?tI*F55%
zZtyO8?>}WJ2_HGv(3ww)CPaVTt$2R=Z-eD_89{d!R=vNQ^knt5-HIE$@^e3Tn&;p9
z^3&X3{k!Y5?n~4EZB9S_dd<{p{w1&0{5#7M+^7Cuz(>AbPT|cAkJBGtU%4La*;#wi
z;rUm)NB8#5yZrm=jK1GL)%;JG-oJFEbIsJtg4g~%5v~5t5`5%XW%k!({ol{mFXD{;
z^xyV)^M9Y##aAS?-|bsy|MHXg<n#6KkL2BDuD-F}?|%K~H06CiCrsnI`nvSA?7g+s
zmt2}&E?Ir{`z~kB>dV`IzQ4H3^?uY9(1sZ|5uw&r#)KUaL1IFzO$!qpLUcr=S)G~^
z7DVWYa!h&l^Yz=WpFh6Jw^^U-7pUeF7Pse}TD$1||K`U^(>v<!tzhrJt9P}gEq70?
z{A1qmcmKI%Tg0=ZQh#U}E<gYB&z%p?rpR+%kh+_c74+rmf|{@YLl@ai-sAbpR_auB
z<-4iBRn4~V+1uE^d4KPpngg8vg;Sn<s5}w2c%5x*aJ8CA$^Dw@oB!VBWPMw9H9P#F
zkJswu!Yfp{GktxQ{kZ>=ef{VEhJXC(WsRmX2%TYgA9;JZQ$xi(|EpQW=QR#myNPZs
zlwEi=>C%=p5l@;pPS15b0Gjl@AjaL=%;<0ev_puqr7>W^6&=utYzG4^T>3>#=l#il
zeD&FXIjwhFPX&DoR{fou*LHNpzvS<}hCd#@nB2YN-wKNdZ~i;}(2dVH(73PoVTI(y
zf=^!#9S(o!6BfVPW2?;ala&>|SN0!gUO%60)~2i#9LvAIie$cU_gKKp*M0ICQhfEV
znrjd5+`|8D^=ZF5>4_Kj`ui};#P#b%h=?D3VsPJpz2uqB_k}Bt>hU>FXzre9*TlXo
z)Bb1S`B%I7uLUf)w%z+d?A5S_YggD9KdpTEX#(H6_ve;$oSNk_sp2E+-KBby4r=%F
zX||>)Yk%%rPyq6=jtXaMOJl==4A8WGM`MyjfQ~5WsKg|VIZEyZ`?tP-|9)L|!!CVY
zpQ*it)59<PaQl%fKkv`oedq1gKW{$$^T0cgJ6p_8zS%ud;=m7C_c_->UwpZ@X}R{M
z&iS#oq#su;zB6^tq9>1U&S0tAe{-^R#fK?JKA!t(IzM?*`oiok&5GQkK0RlYa<X4f
zw)+0D)xm_7Wp0h9Tjd+p^yxRRpO<y7y!QLf_N)hcT-Ghz5#+gEfcy8(Pt6vKJ+kjG
z<Q7$)+O=thqMF^!^wjChn|g}^53LGdYWQuhUtd?>!m#4oj5PD0xe|w7+ZP{}WGYGQ
znwfFM(9>A9S~q_m4`h}<M?{*9RWad0f{uhRE7PKa4HNW4xmlSOIvPw#axXId{QUmo
z`_bpuOD&h2a9L|g->O%?<X`@Ye<45L_R9YY7H4$ZdZxwPU+}SZbLWZfl5@c}XA5sF
z`Pm}r)V{wo`1-p)`L*?z|7|Hc+5G=A*ZDtzwzbteq(itj<S$$+u<zct{xg<hC+|Is
zF4E?S_BqSH{Nr7R>~tY-fu@K5le&|G_ub#H@7?CJ*FWU1_f9eVw`fXb+N^2I|6W;F
zS+}q4kZ9E5oUflQw=6u@k$wNI%6X}VnavL$-+uV^hgJelj@O@`Q43u5-9Iwv%+7@=
zuOl~Zy0^;KC@eqZ0OTqL5pI@4hYLL-45UO@TN)Eqr0A${buuk<+%P4{ef6ndf4837
zUKx|`dd*|{)`01S7SgjHFSgMC*gog>_mJ;*pWcrC(=9(|)1Qe48ou8-KYM0g(f${5
z0{3qQ-U<KqA!hXt-U}H`ljGxGn7{wsSJm17%49<PytY{nof=;JYUp4{vknT)KP<TQ
zbXDQk>8>B|RBKtQix=-VOnN20W0k)0;+ob+rVr}7WPNQ9HkFq3xK*xS;-#&~6P3K&
zsL@x|^oZV>I}Y|raqnKfde|PkkJ+x{){4)KN;CiNoszLzr}gR7RQvP_uMK<kjp|-8
z9$-`gEkjajWjfTbBf?CIhxO1wM~eszQ7)FIL}v{jEkW&LrDxuiKil~D^>ImkvDkN0
zLyLSv9uz*ge6D=2^{##Eck!1$I5PR>zlBbluRYW1Ju&Y<x{J|H7m52fpRh7Nd+;TI
z|I`i-mh1{9Hn})s@9X>dpStf~(o=GwT)V}sc4AprZcNj^;}uJ88Ldo-e6p|BfBCkx
zes!yJ9J2OF&O5Sh!@Momt6mCE`O5iWoyO_{0sY&BANH|LSz@bQDR21W-~+LFHe6!C
zELz?<5e|nn&($Bah)LK0I{tIQn<yF9mZpRqAvz+0pi6{T1gNQUOg-iGF8g_J^}VcD
z%VlrfsoLhVH`et<{o;rZ*6))Smpy-|UhY2Y+n4`JeJtnty0lxrz5imf@X)k|gtbqe
zYx(WoHvdrV{--wSCk<JT?^AeH`~2^fe*M>%HlF^-#_02P+yD2Ux?a4fIVLRiSo24{
z#q4?2&V?<K^Jmp^H&=Q*Dk^-r$KRr}{O6&rf44u{vVKnv+CTU7xw<=gw|(EvO0M~<
zd~08v<jnmtd%tO&46^v4n!Pu*B4w86OTEcAx2Vm%Dfmr|kAZ<>s^cs-!7kR)OX9Js
zr@j{D_`Q35-2RxO=eB;*p1-#K*TIYX*Vcc%|2lTH{Qvv4D@DS#W>1;w{aX8U_@{4=
zYhqKiRAPdf>;xa~w#YpvbNBU+rzdK;UM_j!y6%6>>Qb+%I#cVm#%&c`we4fA@Qx2X
zVivQuG@tK3S$Fib<FUQj&L`~in&cRznc1^t-p0H!ete!cxNQHaXQy}hUyM0!yX=wv
zv=yz@yEjSRSML9P^3l3i3Y+$w<-Nc8p@^f|x187flfG=d@qWhcms*pThQAJ9H*Nj3
z{Xwz!*Lr9D`)Z%{XYUl}`>~CZFKgr8a<08!xAiX9ZRcHe>bt&n>Ay;idmH_=?nF*a
z=bMY)_Ap%)$#1r@33vIc{ozvi^}r(G)BoP-{jBuzJ?6ExG<8Z-qnE**nU88rmL+u9
z`|Rr$nOc7S+*;kUuP;?S?Th|Ar^n)u{mXg#%;ta5ZZ(?yp0V}wsR-9q%oT~9X6O4)
z@y-6!w9B>sKq}k2OU$PoXNIoy)GUwlmznOPkQc^TAl`9rzj)s=p@tuql2ul`)BTwq
zw=3>{)ZgsgQ{(6Dt@>zu-75J0<3rCb=|#2hy;_~y%wcg{ev0A8$AYnni~HW+$Sa&$
z{(0R`d-2b&Iu_5<iT>25yYIfK%@=-`-FI@=+J5dZ{1C2dklztxu%cAZbU~f6;D&0J
z?ScizntxB1cY5^rP5Z8b#&?00w(=i3la(LIC(M&87P@R^{b~6j(Z-!=tJ=5oOv%+s
zSoicx-0Ice!#};hzU<cLYkDVq&rhn@^`!gw!GI$L^{$uqC_3KE6Ia~aEW3mGukV`b
z7OUf{63^_inAR%d#q76V@j?an)E&OfB|9bRCOiI^b)dy++RykL`>Zd_tAvW=l;`R+
z{eCmkvi*jmL2%;cyNhSXw_ksJJ@t)F_1&BKc~gEW-zw6w5$w36mOA<1$qxw*lVcUr
zc!jRL)KNZ^SUAQ1MU46-Q@;I^4}DkJab$_!QS&J(WeHaIMStYZx;mxS^HBf0DBGWh
z_DsKft&8`&-I8N<QJ4P4%%A4>rIG#W$A?XMx6Czl3l}L(Jowx8dBExTYf?5W(Vzc6
z|C0S#Pp<gz<VU|UOk7wbnj)_WJ%6_|%wSK<{Fa}Juh(vhyTo~C-Z@RFKjA-Z`1bue
zKc!3YW1juRzlk;M>1xiKeXcFb;^RKP|GVJBecwaBEB)Wdy2b3Yyo`w0mn?<Dp1SAD
z(oc&X<SR=(e)Ey8!jva&Ia>oiR*9=6IWG~l+jZ<n1y8=io3Hh6j$7|5wCMi5c5CMQ
zucv(f_-`>@F;6~APUl^6Tu#=eVC4lWRR`{Q@;e+}qvpW><Vbyb>w@kBIcZ%lY{iaC
zM+=%PXkWH-*B<7?NA<2*Ar~L?eXrSB%{f){bGm!?X1Q;dU)A5Q68ic5k?ke!fTy~r
z;^(v!z2Xbyp0|%9Dyi+<*S`x(d8WQ8vn%~(zbb0FzWUeVyG75}hwTpg8$UbT`pqNN
zMKbamzO=Ni_%UJA25*~f(#DCC4DPchZt7vGUG}Qr3+Md4e~164n=QC5Bz17ZisIiz
zHuLJA+)}IQS3mSObCH*N`BE(bw}j)_PqL3MpQGhF<NucBUpIBe|CzgPX4kuG^I7Yx
zwMCEd%U`>5VZEM(Mz3<5@vBSyqW0gkn>_wcSjhC{@#pI=c_vED%lP`IdhgdOp||u)
zCf~194!qVg`I^AL_vtV5+uw2=F0@G4#bWd;HJW$b837-Ixwjg-jvP4D8P3|H@g!F0
zPW_kGIfbPREA!r6(=u(eWcK6XTi~Jl)?kzSh6jiBuOD}5w!XL0@XdzLg<D+tv+q4>
z;k}&nN#E(s(<ACU%FVMy|IS|g?aIfPFS9%=PkX3k2wki(P_lPf`SHSj=1{9mKkC)b
zy>={ICAR6qr|`P?L%LgkT|TsI^D|Y>iuW%B<z&h>Uiug(xqE)Td(?iX{S&WEs7^31
zIOVV~?~{v5)&E-G;5q}9w{zKr6S@Ui*zX>%Z~1+j-{r|V7WSWA{!czd-Ddg4)Vyoj
z-8XKVY6X9_E;Q`k^ZRdp$;98Ye$W3V*`K=4;(g2B>+>S2fAdP1OncIDbFY8kC4G-|
z|J%)F1;jUOiR0Yzx&Fo$t{v8io2NUdepiqBALHb&*Kz5VpB-aaVcFW1mvebbSseGb
zn|%>pKc_tN;X;w_$64;>|JU!Abo|Xy;{W|d))CX>HAVmbgm85~?iIh27k($v;AZOC
znv^wHRBFoicfPGYqZA#t`~AKz&!${{9;lkwSEH+(yq@{cIlE>F9y=TRHQ_uLPT$bt
z^=NH8elR5E!^c@WZL(hUm>zfHUlx$2=9FJ+cV)NwvAU<T{pWoRYW;XAPAMtn?DaoN
zM{mVuNxu@z{PIv@pN-ke*L?o|w@?23D_(v7!rEtXI(6$=nk!$-u@e?}KW{?ou~Sbz
zp4N4&<0;&HdctGzoqkaY@~?&RdC%?0Nh-atTby&(Z}xeYH`jd$?K|r;LwRQHzfHBz
z;y3z!U;0Zt=x=N!SIyns1rOe+o?gdu$mwGJr6<!18x-ztnNYGIt%d!&+MDwA$Bw*_
zGK%a7%j{fy?^2xS?(OQ=x4)>LuKsf6Pn+UPm4bCLmv;TmQT()hV@#yv_VW7LjXK>i
z^RD>+6Ev0;co5&=RwlSmruo8!jurn`{Ve>b(6MCl$BJzp*Y`4~{)>q=)SC2*C4I-O
zzy3$KIH#R`?|=R4szY|a{zukrt*nXU?V7HBQRwyL!=)<E(r-wJKH^*$SlRVrhWYmM
z@v^Fuwwtdl|D3*lZf5ap+j^zO+cVF{rw8POge9G>{;9P2`LUIAga!V6oN~M<v@3S<
zwALl_EboP7cFdXHv3tdydGp>cJm$#xeQ(v;!r$dCmAg8Q)C4``cKc=X&Hj?w$zS>Z
ze%4-ov%LDr?|ZAS?|XM+_Wc*(Irh(4Qu=@IJo4mYNoW4vL*l>0{;w}DEwAfrnb!4-
z$?k8^Hz#Ae3g4th_K|sii?&UFe|4s>_nm(X3>;4#XSq#W#adqdf7;GnRgbJ*y?gij
z-THlhFKpAgt}(A}YWBWs`=kD5?Tz@cKl|zXuSs#I5BqHPth}~kt&r~Rq#r_i``67q
z?jHaB&J@4zeeeE??GFE&weQFL>-?RQol^GduA9$w-1~n4%a0?Q_{#6C?A?;#%f8ut
z&s5eY+w87;e*JY)@z2B956ogew)`(B;<7&`cj2L`oxlBw#&kKkxF;$P#ow>BvDW3E
zH2tEDdG@!@+GSGpMSfy`avnF$nAmJC`1<Lr%+GD>tIixrIQMqmp0K^5r(|zG%Dbjn
z6{MQ$8=70UHg50PUz$gXl(V->2fnY}*Yi~TPuE|k<k&yCbr0I*I^Nl~C7%4b@X);d
zk}TDK1EX7Bt;`i(^0j;SzNe2=?iQ>Mw}|{7cF*f~{k+A#;okqx6>D#`zrXnF<<JwF
zDZ9R&GWz}Jn3UceFXpHJU+tCGKDn3w$4*_JSBqYU`m2k_?K*D1y7O;@&6LO=ZDPt9
zxr^l|$>_a2Gx2L&Ae+49dJDJGzZ{aM*IF+8CHA$HiNDnLmuiU6^)+!V*X>`2zt;L%
z^ls`673KNnSv4YQ&(7*Ty0)^XZPuslJ1gJJoSG`Nr*cii3Tv(_>L%&;FSs9^BE6V1
zinY3(e{p5KhtHLHyubPdjb^26O21=wWSiQI(}G`h{=VXt;F-Mm^w;++rmxz&-o1*S
z>or64dy#OB*Z;3cd!xS|uA8A^<SBl!Y(MYe@QXhzydP?5D=8)UepB;`zWeo}a+lqb
z@aIb({C?W@=9i7fkJ@*KKDa9GeW~&DvihraDGEn+XIx^J`=Z~!r=V3$<xx~Xo%i0Q
ze2?^OUtM0OTX>i^_~+r`3GN<q+m}wx|Iq39x7qKCfknst9ha5jXL|6-9}-%+^`yLj
zzo+b6*Rzto@pB{>7M}a{{^IVIynk6=v$nom^ljow&XQ`qqXEGiEqgcnoojj~tFu)}
zM(0OT*O4dYpN_xgS=6~+vu3{SE6#mlkNP>Td7OWDSSj+}KKX4MxBZLrUwl%qz_CTD
zY<Fk%&Btr`ul~$BIK8MM<%gcZvHp*i0xNTZT&7Q&P?!;)+~?YNc){h`i0do5S={Xy
z!=J5xzhQ0CkH@c<@pstD%w7HWis~*CeqFQv>$}v;e|?SmuJzsZ!>KK^K7GF}@%??+
zbE{`7h5oB(U-J97GaxOXWyi$-oN~@bIoKLw#r7U_WIOIAciya8{o8L1g9WJ`*KK>e
zK7CHetNs7Mdr?e4(YLQWr~lT@$yDC0eez-L3WX<uTJQJr8M;2amH+m{PQOq|<;?e=
ze$5OMHm(X^n)Yz@%bg!z>^9Gv5Z2fhY@WJw#nylR`o8DmGYbEI&Tqb`C+vReZ}r!e
ztir#3Z~b2We{o5dH-GM#gW{~2k6IZX@c4&ctvht5NuNWZHjnLUsqJ~08_~=S^Gn#j
zt6%Ir-PROvGOfggeTLbi`p^f5qS8;!SfH`C_w+^8>0c|~m`K_j|Le#x>2T`(#KXze
z@lPMNUaZUcYWm9RzSrIA;Ie&LpW`F#zOA{Md^IBfj-_3$#V?+U-3A*DR6n^H_tm!Q
zd)F8LT}OXy{Jb*g-Tqy#=c{j9GfiAD(Xr3Xj>985-OJc-e|b}+s^7vBi#P4!J9N6>
zrvGDQvpx2@>|Z{&`E<=>iu#k_QFb)#ca8Sbd&<w&SRFt0{}+p@&8nSsoLZM}yk=3|
zdvp6iO+F(XnJ4o$PAH%C?cPMw$@jizEWU56^3yZ+aMD(n{qk2<S%vz?uRX?JcfR?<
z-QQQ8r<%I^cK_M!XnJO1x^92d{Ka?vBy_H2>*W@YUi`G={_5~`;Y;7`4Zmd`yeld8
z-%J({3CTV4%wNo&6wp5V_`3IpCdJLa*q!oim8!vwIXmMIZw^|0;xDU~yU_o~;W{yo
zE$u=xPh2>1BWGz+!|VRnTQZmVxo`1%^`*HkLzk&%o|>er>w<k8el>q~^Z&TeeD`qX
zMG>YoPaiK$|Hfgn#{Ky5wXFUQ*WR0ozcJJ53gVjSzV`V4w$>%4Yuh6J@&5f*r1pn#
zriJz4nKG+?@L#Ld(}{75*>zSmeEFx^*-IDiU$H3eO8m;4mc1|P)xSP}I4Mx;R84y7
z%N1Qgk8*PNNC-|e=Kb_JO>K(Z-pIQ9cOPxEXERC)oGdx{*7U>bk~aA-Ce@rV75Z&k
z=kjaU{9C)jCmgsh;Bo5D&24P&{alP^wVgG5mD;}T$?j4;vqu71^<{c?7Duz+K3QNU
zkWnHnJ;Oli>O;Ro#ePPu*IV{0|JQBd**;NuXZzjv)8-cnJ^Q>N({_Gqmq?OT+hd)*
zbqtqyFFRZ*R&c8MFT62q??qenI^Psmv-e#G<!XLcg~%xPeyZ8IEdKj9;j5EePrm%Z
zU1gcsdBS#DxBs-7Z=E3=T{Zi+XNkX6nONjFZ<G8{xy(KlPlbt7jPG9m&fy#6_Pjjp
z-nls~ww;zUK6qW-_U47~#2vSIqxg@$3;elsalnk;Gw0_xEZ2XvMM!1lle4Q9Mp(ag
z4M=<}SaRXut(l$+|LmI|y6tc66c5?evuB+A^X$fpiNc{Dzi$7}B5vO?`}C*UCqKUY
ztmUr$UEeJ#Q(JaX&%d_jhe7B3pqyv_uSpy~7QeIDHhlR}tIoqrU3>j6xnGo)m08W%
zBYv49ef<N)>b>uu{M`B_y!6SR_=7z4^65`^fBOGQ>gl@;uWRpE|K4&?)9AvMyN~Yp
zaev>veO-#Pim$;Xf1`@9{abXrcfVJZwBP=9?&Le)ufGuE=>NTEMpoLnE&pEqURkor
z<4xK7S|`m3OZK095f-I?*Kex*>$@*68Jzf;_;pvi&5y$ur2b0OX3yF8kA<f<#58gC
zy?sBD3U%Im{qT2p>pg#|Fa7boOL;T7)7M{a+dZjETmRJgc%Lp=>+OgBDSrCV{&%f(
z&YZ<L^5;07{P=Xs;HRpj$B`N>r9YZ-F+Z*HQ%*PMimy~MPKh_wefGU7Wx<TuOUqxo
zK6{iexa#-b`jpyBHQT*@+t!D4Tw&jQ{pyXoeiw^=J^Zz$=>D2bf3L4QJgqBh``44-
zU+wc`lG%CvWW?@w@tdmuUtPGNeDd@EU)<M+pZxyowf_5cTR+-I=5>E&k}rE|Eu!`A
z^K-}ujJx0|*3yjly1jAh%gaqyi@uI~b$_L>{@<@wU;WirPk;3^Z{_5#uXgRbruXV!
z)YexgANp;1EovzFc)j;$f4iC$XBRiDEUKQcQ>*ge@kvtig!WD4nb-YFWbeO?A$z}n
z)V_XdXIRw1TN{5}*|#=s@3|taX*I?1>aYE8^w@U4X4!v#CX<xmwH@hhoHZw=Z#ZWn
zV9pf&>iL_=kJEeuUkiGe+sX$^$*}(aFQ0eeWbxi9NAm9oUP!LFcl_bLXh-XbkMsAx
zx>$Sn?w6hGUd@UQ+ZS5&Zqx7A7R&6+*4~f5K3%DA-QlFXuUmEh`F#C&^5)kHMTM8e
zwZ}NJF1~t~AG53eTA<usomW>wC%+T_pYIW}_gv}c*Y)RIzqI-M-G57{R(*Zh-dO$Q
z&Rs8;?mj*F`p)px*RR%b|0+m)BRG@UE8sw^+L3?VSF@^*{QerZI!x>OdX6hEZLfY;
zySRh@R}|Ca_m^9|YIelD+w|V2ms8TJb9$$9+)UYb*Y#(ujhIxa8>zbb?!T=%ySBR=
z_Pe0{R(*E#v?-_5WSds9_%Aiz(6nUF<35ha6Q+}|f0^qj#&Rk4rIq$K88gQRf4+og
znq1W`{iY^obmq(|4KLBQPM1%2zyGogT@w_wE2*yj+t!PvLThr*@<`9t6)ioy@t33W
zp0wGnuHFxK-%v39+0p;1Q@rlV&aXEoAND@<>EEOK2_{EwZ_av=*?aTw7n4U@c>cTj
z>g~KSf3~qu#Wt_ZfA_g=*d?x6rn6jSg8$+h3;*9&zyHhlWjNQhEf(nycXS=de%Z4B
z%bEF0Uhd50HTs%4NhWD^)UsB+o$5`Q)gMD_i;M2p>ulFwsJCbSA+^me)oacq$u%EI
zJlQOv_@=(+YVr-1r$>J9ZQI2hQWd0{a_HG!!;jsf%<}qwV$FT0{5xNwttPdrbpKM%
zfb-SgB!#4Yzy5f3#@#m-;k(ygcGg=M=lV1Gi0$q*H@?n3(<a`*cj!;;^qIcrJ5x&5
zZAtn2_AKw=lRFZ=RP}macl&A3Qhq~Kd(O1L582cA?_D01`ZksOUZd^Kn^$hh_<GHr
z=e>IMjG&L)&uY?Rf2t`+{!D)$_Svl}zT>=NTEeP326?mI2+zszTHYId_vYkt+YhVU
zn`(MC#&*#s`ET4`W+>c}(D<l7ci+LbM=q|JPxlu;oV}GHs3l53W5xZJ90}nCJ->Nw
zu6z8~qO0$q;JdrMM;{i-rKu%H#>@E4{JK4(@L1HoABW@4`<lNK-H@xDI%C$~AHUxD
z$G%w8wsZ51u%`;2RQfo$rzEs4zy0sfj^D3u#mX)BEsBnr&%L{C$Gv-o@j0sB(r<(}
z{ySN}zHG|rms?JV<Ua0BJZmxa`$4mfyLZ3;;vD;3wLV5UI%8&;xca1@{qZaU^(;J_
zSAYA#Ve0m8`kSx0J2JfWzZIQyZ98;q)u&%=?a8-<@4h{4xy{cgi(k*G^Y5(R7cV|#
zE6vuw`Ld<RYyaI_54S75-%@UEIaz+e;X-ds<99Xkybr$}xE)oqwLQSCynOkjca^$X
z|2Ds!+UA<I)7PTl>JP~o%?A4pTk^^zyb&;X#`pN|ob^SQ=IY)5ZMm>z^^2k`m$@tp
zasqALH-C5|P-!=Rxz_vBZyrVd*_BkyvZP4R;f3L~)lsi+aS3mp9PiR8D1VxlKl9zW
zWaqa1>eH_k`qdrcUAF$_N3Y&ZOYchxzxknO!(qezH|xRf;~JkGlJ)+4{gT<fd)x9B
zlemQN@?U#nwzXWJUMZh0k?o%M$7reDIp^lZYylJgt50_iD_VbM-I<E@E2VdxQhnHI
z_&)EFjl9F2u-W(CrLA5e$*g(pm)3jB*x#%kuXH1i<;)UxtbcBG;l9Gj_uVhvJSzH=
zeE&v#ujwhvQpd0T`=0nbePK1DTS>QFKg}vuKUOp0>EAhFYrNJy|IN#7<J`0ERk~@O
zNABBupB5QUtYa~mZrje$k~e>;Yi6zJYT-wA-=!CASkwFUr)%Dyy;iNBJW9?C)smMA
z!Y54c-Ft}nQ$=m<oYk$$@n1@Kc;h^@{#tcSjK6sL=KJ3JTmNtFj6ZW|^QG-NFN_ag
zxU=<>@=Npj#pf;Z#mjAM=UjF3G}Mx`yU&~V)wgDTyHJI^_t%?+ADKQ?gg@Mq_{ZS)
zIn9Nav?7167UaD7R^;VjpY4n1Ykj||@OGwtVw}O%urHpABZ{Z~yK9gTwpChh_d}OA
zHI0e~Kjen%&-^{bTgP^~>f#Xh3Ga8E*!BDM8y)TP^X)AQe<r!@aGtPzk3-<Zxk_(y
zL~kc9UCi}Su1dtgi`O;so#MnS-Suz2{y8zL-}0ZGjLWYl`{rzT?vRk)w!cp$YtJv<
z5ARLfcG#`|buOs+X>iZPGHJz!S^ISa&df?)_k6>C^~s-~g=W6A{o>oL7kab1pu79h
z{e6dyMJJTc?w_|TD5L&V>nWFQzxjOAi=WyBIUX_I`z?qsc)od)>>-sp&%U#BKlEqc
zS$%)e{*AW6zqX%#RQvsjx@6|U91A1!;&mzKSN@-VZvFa$Jjw^A)F-Ro``UI|O42j;
zbxYrK`;%|$=j%TSF|pUb^ycaNd0i$|-#ET|9oy1%*yVh4UE(X>>`%3Vfx%5x?|Pf7
zbMG9{^Y5SU-Yxe2?xh>byYFg$<6?F8oc-}X*SeBlMrW&Ieut;>U8}EOpL^%X`MC)P
z{NDY_s?h(lXCCMFZQWw+i@B!$dt`s!x!}q)M;AG^ee>h?oDhG$On}GG`2LSiX9_MJ
z?@Z#)U$*@3k+0jQbap0tmfh|1-+uq|g24HE^Q(V1-F><D?!5`hcY})W3L5>~y7vBa
zCzGz^gNy$D6nkHNe3ns5Nm{a^@}%n~Hq*Dhj@xSIKR>v7(eKw);qUZc3G8rsA6zT`
z=Way#{TmUU_SNTXQVrI0{!jX=AFldKzo2jF^!wGTMNhxIUo?-?dfMF6U!-sD-tcGE
zylZ+FrhQqqb=~W$zp7Izj2BJQGyk&I``6c`?VnS=u3NR$DtP->0R!*pyI(Odu;x3?
za-aALbg@p+_m8&!@Bja|_x}IbzjZsy|7%?juNPYG|NC$D{{Qd)|KxuEkTv;hf|vj7
zXRA`Hp1nT#@c$>BO*Q|os@<NoNa=g+>aR+Uk6gJQWCs^_I)(B5uHL4!S3G`g*}7>h
zqVv7J_@DoOZQZ)-tFHE4Tl%&B+U~bPa{lt0PqN(dfBxqQ@9`)($5-acFaP=Xt~<!*
z&~QkM_4DZ?Uu<`CbF@wWvq{o$QGB`A-77~z@BRJOE%Ec}|1kTQb=TLu?wq}J-S4|!
z{BITu?VI}Y{*l%H*2NvEQs-CYdOO$i{;n%?{@Ryj$FG)MBXlkB(t54-7hl6;+&%B@
z+!OS*c=7hypyJa<LPO(g*A`gZo4>PXQq(Td&+4AF%3l*>i%(e=&At}#c8`jB|KHR_
z+jp7mcc1#BF)*oHP<&&Z<j!}mq+hRg|NCo~f7tHzQM<zo<EzBQGVAC6<4oRed$w)q
z&(y!=rq5lEEPnc9BhRfl*=l?O>y8|`?|ic>+9~V9{-+;BPyLQt8M^c7-KwZ*yhlSg
zdEJCIRExJKEUG9z$Xl?f(EIfEL!S+vIZmA|EGO7Fvr272_OI`M1m;w(|2^fAmEA@D
zf3^Gk(xi?(`SPE8y1Y<n-n;!RQHgdYs?}_#4o>S;ShL?(^u6qKwd$Qe4@xfk+O58O
zzvGEYp?eQ*F*-gJIQr|Kf4ZKJp2y<_3&M}j+8zEku_d%7SX-(8;qMFDEfQ+;oc(w0
zVt%ajQQPRv<TsDPf8_Jj2F7=?v~75>S<P5|S=@phlLVYSWCb2ODK2MY{gRi|!@=)*
zDu{Pk;jh2noaZx{RUSY0y03o2evQ-TA7_YvH!<rE{`Aw%qWndF+>X!}!g+6bPA9LM
z^Y5U*m2@7qnDj&?2kU=N_w0Xj-LU+9xbC}8Rr~+ePL|!1$=cng@iBFK^}PP_r|V+=
zJmPixp;S>Kd1KzeCGTt9pS(F=t)72B<752#&~-tDq9?=em)0%}1s$_h`~80H6hAAM
z)wSQaYn&%eG`M`&r%deWBEI!4KW^{4YGo(zrQ&+J%>IZMKdj#@&ok4nv{SNwzhUBb
z%>}Dc|9-!DWXIo)N7jBz<Ooc9(|oHwBY37A-~P9$H+MYo+9dXS&#dM9onj{bT3hN_
z7gEU0arcM)lTZKOeEqZPSj+GI`@VZ-MBd)_`z@>K{dBo~hX3DhDm+kNz`m)g?e3$!
z*0oa}UfKG8)4g}zTidIwudWSUe0^$JYq^!s-EV=?`|1K&+Jzo$y`U7~_weHUeZNhY
zTcqEz+xE2eq(XvmTe9c8A8GUMtpCRCZDV`xxOm=ywllT!4qIRU)*Cz9Ww!2gULCu0
zP2ZU3R^(Of+k9qa{f%O){1snz?SH@T_Zz`(JL3(NzOS{;9=F;4I_`SyLdoLq_wAl-
zzOu1*SN+YEi64d1e%sc`6n=ca?^DIUOWM3sT0Q*s9S*!ABRlinlz+cjT6Yv4-)8-$
zaE7qr6$`Gr7uJbC`7pU=bI*IXN8XhY%l^K(Q+B*~>%XwiKPKfJ<#?*Ax$=I7pYr_u
zrcv)U{ykZ{(Er|gGhu_EDoxKliu-=El<%zOU38`_+xq*tFZbsN-F-h(*+--Fx#7I`
z>TSO!#L3Qo|9<Or4VU1OY_&T3SE9dfeouQmZ`OQ+>AsiWFTJFhU$bl9-nh?MZ$rON
ze!0G89{>M@j&sM}<eYr?e%=&Yfs6kqpS=3vY46u7r~mo;P1iMlapmi0U1Rp>jLv!9
zo3ADxPX4=R=Eq(8^taYu-|=U@T=d&t`|e(z{FXCIWOJ3y>d>Y|SHAhn|NH&?;`%hv
zY5&XX(>0@vCae|yyZX0#^H$sNK%2i4l8q)+FD|VY4zu_*mGjibNd2$tuEpLlP+t7%
z$A9T?%jMFk^|dFj#$MU`<kP1s3=Hgt9cOt6aj_QPy%X*cvnw<ydeW<Z){B4dkD5Kp
zdw1BMXYps(=E~oncl?9>(qGqiO_W?*oY4EsuI|OW(7#pMiAveof7hP=TAnp`q1Rv4
z>1WH<2CYA}C+KU;*Nh7}HNsP_e7^U)l>6Ml-aAv~Y5uBGT=%JR>le{E6O_~9a*lpF
zcYnEDCa;kGoz%lYn`%5Sf8LaO@a<EURC&(O*tWy9cV}%l#x-N=6ZfE5onJ!kurx;6
z+dtBOHe;U2|7YKtWIc1sbxREkpC7;ZpzUqid4Wf|V!PueFEtI@EPGwB^zOfP@vB=;
zzg~D@zxg`HyVrH^zVN;CZ)e%s^0>~>d-2QHXm}iswJxjFkNU0dy?(kY%g)**B3#GU
zt>rxRVr$m!)mw{Y)=d?9_UQVOubFawI(NiBuSwec#O=>%`wagtHz#a#eEnB9<G-!S
z$GXdZWmk3nt;&yB^0mt^Q*YgSw-vk1p8exwja+tZdyvDbLxHQ=F2%m~;Hh!_@%wG>
zPCfHGYzKZdH9xqWvFC}%;+eMHe-f7j$s|2?JN-p)`6<V^Yn{TXlSFMbPB|@^_Ez@q
z)uyfMOIPoEweI@gwI14~<vx2O)t(*Q7{$VNy!PhlId@977^X@@sC2a12dLaG+9thP
zz5DLWlrN>{j!d0l{l3%9D=z+PhoH1+CSQ}~reH;V$Kaexx)!%hd~DWz`?uF=TV`m&
z@mXIJ-4?Ad<}i``=fB#@*O7O7xcG{5?{}|UsUyPwdv#HuqlS4;)?sH6voOKTvqe3I
zulM{Zzk03ZYSs60|6fN1S6Sw(|Cv8mG(bf{^yS~Yk1otLB0U^gZd{UIHW=vleiOTQ
zH}%w?c%zrg>MuLRPdfQ@$=oKV)P)b3R>?KCEPgJ@T&Jp|Dz_s#&}p5hmxfsXYi$*Q
zFFf1MF7NDaOB9e_emms%{vAo5Cz(A~ab3&(L-%ptv<(v@`z`FAxSq6~`=|7<`0RaI
zQvLM-*FCz#jF+ydd$dyP_095{iyZIY@7KR_B%LKKMf_=GfX2Jq+x4>Nn3UU?_SOHH
z_wClCBK<2VOOBrWGwV@++wbG2{?Frmlagg7{o5yU>+F5+^S)dZND~WtypA_<{?EG3
zr>@Bx{4!o8*(d3gm_)ymv|RA$m*45~qw+Gv_UAY=W@W8;zGbJq&(=NjOmj6t&R@K>
zW7eJNXX~OP|F7t^%4Ay+STrm9Qpm!H*YDqoy}9zNxyt_T<p;Zu3U9hIMfGY(*ZsM{
zp0B5@`>lTda?``!**6{B*Ea6?oxUymq_D+5qutY<x8!zB%?UZuWAmp*RQuoq)4WaJ
zFJJ8L{XX|{{`B9k%U#auTOHkY=KK5Q>Qf$7|MY1UdbU-|a!&2c`4x*MZ5NY#Y!x==
zhwGv6eLpo;KH2(GJ#KZ}nlNwo?xUp~zjHT)Se^R#$>q!BZOn@v<Vn{iMt`o*Zb>`E
z9?-j{LL)woN3qlJXKE>*{-ma?UEJ?KUvC#)`M)*y&dg9}jZHmFyd6J}^~`WO@^8kk
z@Y=igy)zGbUYe@s5@r${^gJ=hAXTNe`9S>^_Rryy7JTJ<SDOFroutB_2{#iLJh~@;
zIj?cXzlR%tM!55wO65#k7a=ug{?V?o?RC?p{IENvW%%|MpOf8>l^pUyi%mC3m23)H
zw$yh1`S=CW0_S&`^@aY?TlDnnt4C_;=f6I={AXGD+rRu1cFvbOGS56x^6Y(s1eGv5
z73&o%o`wV*s(pQ={p90YbJU7;q#ilXUs2Yl&fliHzCCSe>{qb|6Z{?@T6uiIjnnU?
zdam~HG^fmXDzg8}*L1Ueoq`F+uT09=boHlA_lfK3Z`S>LvmtycZ(QSix!<qV_x<V+
zdcX6M^`3o`>X(<M#G3i@{<=Q@>FfHqe{pj?(pY8+eX7a-rOlE$=V*V;yg1h<-+!G8
z{kW&@`(d}b(3?{~?%vuLyIXJ1)9{|}pK9_$U&)x>c;)%<$UMGn26nt_1ByKOw*E+s
zoXzz2dXt?|3*VkkC*S_%=*g31JH&eD(<Lk8KWSD!EfQXQcKTfYEO75%jx7#ejx{MA
zKAV2l?OrHy`snke%JR9Yf3q)qH5Gi<_bKke|Fv%;=J~%ZYZYky5P#XY;_FVk{|mPL
zST$wVZ)N6pjFFRh)*svB`Pa6oPjO<VN7&Dl@0b6kT#KLj=gRTguHQvJwEHJDm)1V{
zzHg1-)$P}wFTS?l|KFSJ+jpkP2+pYte{K9PC{fuVefE;=tk*vD`(IvKX8oH(?Cr;E
zZ5MxfXzsI^IQ8y)^ZM!UviF~UHnG|z|HkHbzo(0SY>D5s?xD5%)W6)4yDzfu{mvQl
z@g@75bMg0U^CqM$JGUd(>)$TRq#v0#ZVRv4?=^Sp<QKn9zwfTEy8Nd{$h&6S)(j<u
zz4uw&r@h|u=F_hgGgD5^Kf=j*kJ-k3PE%FTZmkvZlN8$b*ZDtUyQ_UaW$wv)tFM=u
zaI^$-zI|%5_ol@r9;xqPM!%k>=B$mbtY7u-zkbNSz55rvFMV?CUGm$iCM^l|PU|}_
z4_b5^AD@|EJ7K1W@1)vX`=|MOuC@Qe??sws{GELK#r5*quc>?Q$LqgX_bYx<+M;Po
zYhA*57nRq}IsMEi_my|ej_m38i>`0_oAO|H+0UXwyS5g-5u1AJ^nB@6n}61%ZK{Zk
zuiRx`xA)igZF|qHsVsfr*;QG+HjIDu%YvUa{Y^#5JO4ZBpF8!__Wz^YqPOv1Uu{v=
z`@q1!Vd*%_W0DqY>5KTfy}!QxdcAt<yVpzKUthK??&aUB=eG9#`tujGDSv<5OZ&L@
zTi;#3|8MXA$>ASGUtfK7_4T^0*L&Ap-Ph+{EG0ARw6bqWoV|WX!{Tbm!sa(G8Qqy`
zAJ^>QGm9;^-tGGC<NdeMXSBZtZJD;!cJ(Bi&p!|Cm~;C;bA9Tch+itFx9yOc6eylJ
zN$ZW>V(vEo=Lze1N}8GG-tK#zcd&i>bl#(>4`%PUt#<!@bcDhA$0=sVHovLqTBdg2
zN4QSV?ELwfjI)V}-$l24&sg)L|N8zg&#0$m8<Y3%Q~tX8>x%v`YsI;L>qC|*n!oG1
zSzpiV|GmJ}Y|X|0pC^8BnX_SDY0UgZ-<O)$$E_{78~Qr@NVvJL?%fp0-Pd<0Y2SOh
z{>IXM^%thCj{EXItZc9Onjp`j@A~U@|E=01Fe_MEJ}fuNK74)Yznw)TdsP1DhE2Zm
z(tLer_L}+oU$YlQo&FO1zc9&2%8-vecVV{vl%I<q|6Te1l%nWgCy9-YzdmR`s<XNw
z{ynVk-FtnREv^1hYj<7!HGNTSRiOXVHqO&iwHV)v_bjUAcG&K?^GtEh+jrV~YRc~#
z3DmgA+?{YyPIlu8`CB`^w;TPM{*XC1S$$ctyhqfJnI5VNr;e;q-Mw<6{HD79-)lW<
zS6^Fw_0>z;s+H6CY<>Il;o)SbQt602LVusx?YAx2FlU*kW}Cm_Bdyi54vN0FlWMlw
z8eH6Yv1b48w$ptkwUxaR+pP7hr=GOS5xal-)$i8MVxbnR_yf`FdrV5RuZR^M$f-Dz
zvuU-%%Yed5@u`3O1CE_Lo?^=5`fqywwh1qnF5m3Zv(>x${N$r+Z*wG=2=|;)HJjHl
z^QTqym%2~?{3h+KTDf|be|X5BYyNCS;hO6!?Y7K2Y&6UA6jNv_XXN+)>P@>IteBG~
zbh*I)t(x3*?Z5AGUfAEx^l0?=<(szuUSLP<x5G?%XW#qr&oBH}wEkFL^(F;}bVcc>
z@AcJ`SKNQuGcBn0d7Xfg!DEw96ZwBKt{0PwyfzBPT<pL6ef2c6-XHQO6#hlL-#h<c
z<zuP$ysv5`v!>er5%h7KlW3}XBw<C*m$QBr%`)?MJN^3j@A>}ti`KV3Jvq%Ic~0R(
z=7cwu+f!dpNN96C^8eVksk|4`jeDex=UmBqG%2l>w{Y{TtFjt3l?G2<ta))*JaWsm
zn7b?bZiTlmUm1}2wn>Y{Bc$ocyv2{|zC<ThPu(K$Xv@)ir}@{MoK&y9?=;sjf3ICN
z8Ogufd~)B+4?KVT-(RPQ8y2gJzV9tqpT+j+-mlV+$F}^sVtnx4&D}2)Zv1Uk@2&sR
ze>Lmtbkh^t_bpN0{lWi&FV7*htjq!qRk2TTQWMWN7?_9s-B5p{^|*>!^1=Hu_f9@m
zy)4`IOXZ9A$I8!ZN;3YKu6_Aq|C`U<%`eXA*<Sc2EbzYOOtFSW>r8jcu==mp<0O~6
zCH?)DcDp2M_Ti5A>R)-M>$3YyT7M*HV*Evq`AwhmKD_!TzGeS$t|=O`N?CV2<lbKO
z<AOrn%BoivQ}w+?@82)7iWDq*{+6RTFfK_#f#c0orn0q;{@W&G-mdV{fA*nNW^Y2k
zFW>sf$A8_PI;Z>3O14>b(fNPl)fZo${L_Dl;q)WYlaANC{;t)p+*f^d^2x^?mbN9U
zK2-?6{&{ZOscCio??1Tm9^(1^PiE277A8OD)boKs^#%9sB9cAk|9KjJc%w<a*-!1O
zUAu2jE(w?Cl{&CD=^+30@ACg-%*84~=RHv8{Qr6C{F#X#HyC**b=uUgb-9^Q(9EiO
z^5IM2BX2)^TYuHKb{n&9utr~uw2ISpK_-j3&6mBt#3!{DRocwo_g}=EPkrSxo%54_
z)^zAUPJ4K8`hw%$TxTp4ckus{6@0A3Bq@~qU}L$@i;7=U`=wsh8|?5urT*l{iQSg+
zZzbH>tP_0lSH-`)Fl)(@NRHKu{-(ZIe}!ZBe3{cv-cLBSPrs){$hLQn>Z}J(=bpOt
z`p!~$;aNBG(+m$6Zcg7b-}Y&jzJ94^{|cj^YTNs#6pVkF7bh%xzh6f{!fJlp&D`C^
z>)KmNIxnUEHcMGJ@3{KjI?<;}fopH3*KWIQIMXJ4xvc%C6$QU$-khu%dZ9`p!8vnI
zdRt?Mv*zAvq4shM{cn?d9xJrfiTGG_Jic=*-6gy8=M29W6R${pWP0-J`eD`BD@y<S
z^e<l0skE)~pPYSra?QNoRa4x~Y`x*{?)g(ueO1|{Nz?r0f6NY>B(^uI?bQ2?pI?>U
zP&s^ZChO)6cY|XC3vb+P`jz=V@b%;6^WCr5cJ<zPd++!9hb!h)e>pn2d}+PIj>-_b
z-y3eSsMgvpDx0>^dW)HikXhpwb=MZ%<j|5SAAD-|-Tr#T;tE^*CbiU08{fY_Sp0GN
zw3eq&gY|aWa=bsAvGeYYxPpcog3cc=RP#OETgRI?|6=LS*Kd11iHM(%Z<U$;$};@3
zl9bQyKQq>!zL6DkZ{LFn|NrMZmEKU-(YM%l^`FG#Tp!O@8jc3Zcb}f_UT%Lw)zZHD
z_LS@Mf2Zl*{i^6w*Ro~8`PzSbbL|!7QW8@Bo=Cd5&;P7<&HTFPCtv>a$!@v#_{d`Y
z=KSE5k&|!gZN7eEv1Z?V+j0}mV=SWFi;M3lUy?Knc*`eoOS!tf>F!(k8`Jiy9MiJ>
z{r)t|?EBj|yQ1fOls$dQaq^bE7wtYo>i+m^(f?HM-TJ*#7azUf^W&~l?fS4)QHv|?
zdd22WaDBS8eA3M`@ypMrUr5}3ODw=S_V%3baz>N6f4SC&m;dX&yT01{{^}2&>$ZIQ
zX)W%wTm9z#>1riUpM3iN{qg4V`i1&x#=rJ#zZ<eXJoMW>(}?*x^S%FH3w<q`x7F9E
zK8AsTJJfNOr;rzGY3<3-(CeY43k`i=$KK)i<8{6E`~C3v`tSGr`F}mqyS6|4<&>5B
zYr<>e<CU&^RL?dvj$f;MJwvqeUZ|pBv-N?7#-@aMJAM|ZN+0+q{`I$yx@I>!=T&v-
z^7~BvHFEbCea+hY<of#9H8FeFguYn5bHT-lrzV=Fg=+uxZi-=If6e&e(ADVLU;FP`
zos+t0uVOcA%6adZ?Q0~M?20yTus^)y>9U%gKb5}Nwmn`mZPTNmnbO8@8Qw-bxuLf>
z#WF1-<D4U>h2FF;-!3+<+F7^b=;c#M+wCOgCF}mXAD<`bn)&rdg1?jPzpkT`um9h*
z_04?wzyJ33{j6J4svW+%+|>L1_1$rArSvwRn{srI@ti;N<ev*HyEN;6ZCm%U_nJ#W
z|0W&T_w)aQc3t`Q8)1n7v)h<I=Jg%(Qek;|<(`$Zt@-vI<wo7e3r*A3g?`VzqA~qJ
z_pcuhrbdfy*Z;p-=)TLJtFPl%yF8haE_J$gVY=T<xxZHzZ!OvTWpBN@$@I^judkk2
z|5-oWe51VS<Y%i7Hg0vkwV^&ub>%9r|E>}9qQtu8{Z$w0yT9J|XR21r-hX@VPrfMo
z{nypI7e(t=|G)oScT<)9iv<hZ&Zxatoz~3u{kUF=YuC(2zSSEhsJpL7Giu&AL*8?~
z=>i|#{9Vnz9{+b=EwfD}{M*j^YeGA(3dVo_w)fqd(0@Ct=e+qZb?%YTh8ycVxTSMG
zJKp~5xx!%I_a4E7_8oQmGqXfy)G;o}6RIs16gU%nOWx^TyQ(NlqmH7>BX{9~yboLM
z&cAYTPU<g}tz1%Ho!2!p?tAgK?dRI?*sBHU>3L})7Y^lp2-6dd)XezyrRm2y{jvmk
zpZUcPvpF?&)+M<zzDhaRJzw$Ux4ZR4Z+$cVhWJ)(+8g)QW3G9Jm9jL~6+Iu5nbQTY
zGEOl&5E8{Hp_Ws=@~1sd^@n>;CC`_faIy8yce4uJSMu+ZHLLi(<4mUSN<1=`v`w*U
zO|&>&^=G#JrOy{8+D55G7V91{xN!e_^|WS#gi4#=N1T?q<f>_A@NbN%E4kkFcj2x7
zM<T2yIh>o6`%FOUE1Te(j_R9L%&If)u0N!2TesjSvz?dMgwrZMueb|$-ryBx+;RE0
zf3e%NgVxQT971;$PVSrEyE5Qy^Sv5Nb;Fs<b~*GrJ$|Jm)U@%Z(uAnkrFS1*d2;=q
z+;@vd;&;qrYA=`H+F8*b7Jo!m^};p|o}SCkIx|~hmRT{^EN1Ik(Y*BQLXp#Z98x~-
znPcM}c%fRjT(-Etc3xYkn@gU&*ba$A`EM%o|F-U}QT)ZSf9>V_KP<S8A1v~I{j12}
z!SvlL9)4>*)BfMJzQ7^%%{=qZ{0-^VGmTv@b4I*on)XuU)WP!%tFM?XE(xkh_A(5p
z;;6Ujc5In{>C>~N$GzKAnKat64=65kyJ7icHKSdbe9xQG@(Z@5pK53C)`)77{hFnk
zr#0oo{^iZfHCMcv{33cMQ$c`>LSkIq-It+HckE(|Uf#2%<P|sH+Ubmc4s&EqtKGBH
z`HJVKOtyJj*ZtZ1z0E)>t;lZM<(TIaRaeyhZGFY{>dH&eYW3L6`)WN?vz_D*Om~XU
zm{OwmT%vVyo7`z*j@<SC@2jtluWf7TP0KU%m1W=X<5Qsa$%nBPm&$#&B{Qpr>@yJ)
zTjAxkjn^@^dBS7HfByGBB<6mepRkZmbaQlCGFxqG;8e{i-}Z@3`DOShdDcVC8)ipk
z><;v&cH6~2_UsQ()_ylP{K1?X=3*N1nRA5aI<!1Av%k0Jl78YVw#jVIIrhDqeEskY
zo|)aB>T4}R8qF)~8b7?#t%-bkEB#s1j@;=^r&BXNek`#1)Op~_(&YBTyS7Vy)oRb~
zm@!L3t!KuL=1I$c{oeaww#|>jF==J?S8U}k*vx;-C#5(o`nh1-m1Fbga2QD3`|Zel
zaQ-XNhsz>l|3xo2Q}gzB{CfBOZjJ}uulX}$-Cpsfn}j`e_Qr(lI{AB7mdVuHi7O8$
zK4w~fgE>9w`s1}%|IA!wFk$EXduuzJ^cidKUFbga&;Cl)J-sLFK|$<GodW(&6Wo1w
z&vEu86GB}M9qw2jp}#UZvR#{xw`RxYc|HIB`e|0=UjDz@>YM57(lTDvXz$X(edpuP
zE?&0xoN8Zg*pGb1lh3nP9?`#$CmSwYck%T@yZd(R->YI$R``4P+y6HVOIpRp!+)fu
zBFRTroGnrDQ|;;>6|0YNpUpTfoKYYbW-32#(u2G87t8NY|F6V*UoY|86y=R`?N_L{
z^s=9vw^uF5yS`m%`|m$FN9WEq=5~)hSfJXPdMWeY(iU<41zEPjzgkWv9G1*<?oq2*
zxbf$#X~$nQe@T5(RDIiPhO+3+GpRq69xrk_HQT!&p7qp?hAHkD)z6dn{oi=Sl1W+P
zj_QoW*e2o2O_rZuY5D&#f04H8m*Iq9<|yrThh=0WLzSP}uMfSt_~0Y^D=*GpE7leI
zJO7)oT1AEV#FKAN^|o;xwo+?iTy|jDt|=vp9^cDoO076B;p_f`^0}$6bk^@|_PiyY
zar|+_%PFbfmwldkKh@}Hd+gHZBL63Tvfsb&(uW5dW;>)Zs$UZ{*Z-flVE>Lvjc*gY
z7#Pc^9}WyyY1$RiyZPkT-_4BTmk*_hEIU2*)mizg5&BYGAsgG~vA%cftqs=~XYY#-
zxz3{Y@Yw1+gO@K4-!pKzCUjv@%9rqc`QEMTr9bP;pFOS2Ykk<>R^J4-vkf0V6qL$7
zPS;uQaQ1NTyl=ASk}L8P!lzH&b@0@U<^R*QdN}7gZvItrn60Ntdfs<s6Sk$|hl3vd
znP)#qA$q_4i#5WtzTC~2SiErg{X0TaBpxx(`{{n1Tkp*NU6&VkM_#@a?LPnSoe#ef
z7jw-ht=C)e=B)pZ;+_t^6*nIqnP<1;HrMB~%rowOW;`Jrl9Bqm>`dCO!#}5QP)X4W
z;*(09vDda%O7-#v`~GgWk`zN*?~cX&OMLF}|0{3tF|Cpkyi$|TV*lI1K=!%!{*?#i
zR||bMQ-3_iKzg0glvC6HxSL9d1wGxj{m{m{D}}{(-(F)g*ZxPrf|I^pzROh7mVS9v
zqtorh5+>W!w0!9_^YFEy*VlW#zRp=!{>!{D>reUQ^GgqhAJ)0@V2Y!xk$Zi5%kM@;
zZk_&^hnjXz-a3j2TjXEf_gwwD`lb9GaVOd{E+|h-``B8+_v+NYhdVN-Eq?vs?bm->
zH&5fdv{wGcJC#1ZGgDiq_}LwGWsy=oF#qn!OLyy4wAH)5CQQhSSsHK9Q&XT;-uHOj
zZ<oBl!Y{}2Ht2kr-e>l(;Q^1ugq1T=-)Qgk?>@X%;>GhLrQtUw8r_o&wm;EWxmZ``
zl;W$jr2(_`uc&{xcW=iE!Ku&pmcLh<x@e!|tGB|t<8I$5_Tcn*RPo&)O-NYpue|#4
zm#j;t{O|j4f`Rj$w(HGT(+!ernos`zrzUK0<=-~j+ikyRpZitN&9Xn-anilNe@?bd
zIrrqeOL+d9iTa0wFJ5r|XO(!k`1sF5OZM8HohP2@8-9A3pVb~iL)Kz<`=+TI&b<2<
z|LsGN?)xp-{L`X}H~6{lH~78Z&OcrCii(2CWT_1e@e@7^f2#ZU_hZ78wX+}lcU{|m
zw@LSH>gkPt=DqU2B(=ap?fvPRsrR$)t-d}zJoL@_O+309x|KH+cC*d%s%h!$e!4AF
zRekf#A1Pa`*DsnC7aVsm?Nyl3<9n00g{J(z|MhR>iT%7PpPzjDy>wdq^bgbf`M>Y`
zTJ9(=Kkv8b%_)1|`%hYb#sAR_<=uZHjzr~ew_UtFep#~h&uZt$w^#38%h%WXp7lRY
zbs_)r$)#ETRY%TW_fz`4_cwRR{mk`=M>QX3y4_oE8>X=T>%{8|OHbcAHA`vVPg{Pm
ze~W&<4~c6lH&tI)`)g~}^{u|5-|I8x{<6LQbMLp6zb&ml>iygOR%Yeh>oxvXCb7S=
z?>zrj8~^g4_#OrZE+fZTo|Cdz%g_ElQu4yzH~iMqQtg^or}kf6ecd){|Ld!-*DcMo
z+-EC#@4so4dhOqJv5%(SUngsHYUgyVa@pXkUccjQ&p#3l-JjT$I5Tv)Z2F_$DwkEZ
z$8}x4Z?j)t@Z+cWRZrizO}S%wIM;kLhugLKl`l-)YOeDx;`y(>cj_|Jwq>Q-QY~Au
zf7*oamQFaZ_^|bM_Nkrv{dT(!nQ=~UlUZM%*cWsB<JVKU+tYtH?I|v?;MiQWO>EWu
z=*&a+n4_2PSKb!2B>eO{-M%I<;fhqzP`&u9tF@<({P`zl@3$uQd(5iuuZ~Q*x_VB_
zybafCKU^;PckTbHlP}BvckTX}xH`0ahuXZuIoo8){N_|<ti7cyp8a|E%Bok>I%d7w
zdU$JQ@ae1PBzL|4#^aQ}df!UhtiRd&en-7qx&E5g?rF>8Pkx>LxVF7hqBeg{)6)a*
zx8z2)P5i%Ot-{hfPoyn8Wv_0j?%z^-YJbPPi&4?LCtY24Ez~sYZ_5r9HRerEd+g3m
zyQj0L;MJ{f`@czxSj5&AmH)PX%l%^Rq;>08UyYd`;vT=c^z!Lk!PfIlH9J4Adpx!G
zM5Ow~rT>^EQ;Lfj7khfvRoF~Arn*%8(hj{D8{?*$l;u7Xd#cW8a4OE~;^`ktZudM^
z{>A0<;gU2rcj+Y=#jvQR46~Vh-ge2)%M$Z`zwWrP?qbpN=W)~C&+?eMX5P$5O>T3(
z#UHNd_p&_bEB0%(+qY|**+g@fIrF5q-O$wHusdCKRh&P{aHgN$2WQ^A*=x_QKeXho
zq@RJS!j@@s*>5KMiS7S0p)Z;z*hl5I+<R3c=St}rUiwmfYPGVd?M#)dvR1cE57g(~
z$Y<NLdcn$w#3ko$t`%6feBT=7DHDE&-z#=qdZ6S}-A}Ox$20Dy3&bxzYAP7Ew{GjN
z>-DR}Yu{-XJY4Z^(<C{w`=2))pZX}VmT`lr=L4&qU;joqzHbUYu)ouSb>~Fm0QuS#
zN;w%DBQ8kw33DjaB%T%D*R5p1&m^9q9mURAlb=vA>ykp+4wr*B8oJ-c`tnRu51JJ4
zQTpZSgr@w%X<Tbk0s>n6>)j8h-tBXAV}J6wyzbrGi8^=YajU&pzw0EAw4FhX=o8m3
zkJtP8-~D5+aH6VyLF=|$kL6+UZXXX+$!<;9RZ%u+;Q^C~n}WALgddYyq1^mVOY{1*
zH){kZ?2#9e@_DYOE&k+3V8%}S`hXynC-)p5u84|1FmcEG&qY7ZWt1u!3q;<$pmoD6
zX}RO6qkLgn-alq$>R&u@<<X!0+wRuhw5!+V;i#MtdCZBaw=9AG@7-G7g!aW1`#EJ+
z@$C3Nt#uN=`t?^FoC<==R2Bq2fBJ5})?tTt)77VH#)-90WUgXK$aI$%`<n1tnpKU*
ze)~alxmpLi=l|__FFgIK)G<x|yGT|3-X|Z*uLN{#RtvhUyt3-fius!-Jy$vBzdp#3
zlf&9-%BT3+_QHkz54XfvS?ZSG=@qy#`Tx5p!IMuX&OfaBXN{>>WdGLf56^skA9PUn
z;r)+0zC4KR4`z;^Zq_X-e0}}$KXb0?7GKx9H&OfDw%=R=jSa0yWlvT-eO5istl~$m
zSgNXv`Wsz_!1W)@_s{RU$M(9eCY=3*gt!Lp%*hD{WiRfVbNivs3i(a)%JDzf7m2Yx
z;o(?z!PUw9-pBY4PP&udoqY5AnV=rug1BFkPikL@$>_A66t8}5&)klEpY1o7XB|?D
znEd@n8}}h`(YP%>nbY6$&&=i%m$H0Q`2PGiE?(Ka>z!8xE>vf+3zI!3trzq9NlpLG
z-%;yhR_y<`HEwHo+}gPAyCJ7M^sMLK`=JwI%*ns^jZwgh)}0|m`-7V*C(cTH_hjK7
zfvO98r!^FGHs8w-JDk$r;lJX?grX|(6&*9?DnI_TeZxwpcYjMm`4|6x#S*<<XWpxq
z+ltms?AbOWurtfHDTp=o;_{tac?J1?y3g!+W90Z#spsoA0lQXFvr`(it0NaLJjuzG
z@;m7A2LET*C7k|5Ybv~LJ9~*kERgGDyz}mjOX{C|x_*|E=iid=fu=Qrf400UaIfCY
zd)~hM$lbeDCfn9=ZOMzT_|foIKstQo`4~f${eL3MeLie3am(~;ZYq9ovwr?E^WKeb
z_3{oqo^(mu=3@He3=yO12it;rIqxm8{U<Kbs?{0dwP^Fc^8X7S9(UyYBci;>r({7u
z|F<LkslVk8h7}x0;gHeWCSmW%YsQ@A65D?`IFG}I=eL1;&BpGAN0*mYMse-G7r)TV
zraj%6wKAM7)XrjtY0howjqJVqcFo@4V0y~n-HQ*x&mHog&tJ-N%k8!!fAjepA;<I?
zmOT+GnD+5(TH^O^`G}rfThw=!rat*RIestqlg}?}^%u)p9#*LhHTV=+#d71o<ZVLr
z@ekwGpC`Y6w<Y&$n6$<Z3;A^tlluP^Yp0zHT)*#A_uVHSuWtx(*}mgWsd21tSs9O-
z(B1pNdzZ!Ep7H2olikJJm%jNYX@3$hn{b0mapGz36@J_%n?BxOs4piV=bygo@SPpT
zQ$=`>cWf+EGwt8})5bK4C$%ECGg<li>n|w{dDR<i9zXE7ZMAf*SXNesRLTi%&4urU
z%l}>073Ful7b=k|&N9)~arf=>7fvsJ<+3;Ncu7Ujl?|tqXU$w;`hM;y^W6#Gc(|7o
z3%+;}+ut{}ec>8|uESfxxvGx5zP4<CkmTu^-oLZkRUd!nQO$lMDaX31WWVXQ8}&Ix
zSN63yslHDxI>sHe!Cam-G_CD?(~}>&u1suPS}6A_^zqJ&Q*u6O{E<JaxxZ^1oGrPb
z>u)lX-+JCpi7NZzTc=+yz5Y<^y88RDKG&`(@Bck=Qi=O~@Ndfz?X$<V_o$x>b`5XO
zj5?_F^`m~lltXVGX-}WL?dJXsjoWy*m>d$XIQZrL{P=fvdBT%_d7G*^IA<H~s*I?+
zcX(O5tJA;x`p4td7v1sxX%qEb)4RX=mQLu4xxe)NpKBeTF2CO3S@)d%m!`HPA6WT<
zTj68-<B0FK6ZPJ{Wy{eyX=A>eYYInpLQeT_n|fV^l`B4&DIWafa3G3v?;ZQSXQ!<D
zz2TQuE~jkX)s}hIKMw8teCgZ0nmSc+A75#!b?^6n<68H7@B0d)%KV)tXXecRS>tl)
zj$qjIZQo?n<;=7H6nreSXk1^ixL$AhvDj0|g1Pf7KJ8rh=Jd(e`;-h4i=PQ8G`!>f
zG2L%o{K*eLv%EQ`e`Q(Z{+F@fv2Jbe&Lj?joxb*Lx1Edseu}TQ-x6ECY{Sghd+$#^
ztzG|mdi~l;i*<WupKtjj*`&~LS3kfj#r)lu=CB7X>+GkOM&?w0zMk^#-r?E1{lCs?
zkA8T1Q;kHGYFqiIib;Rlw%gtM`{B|2Pt~XEZyKnsd-!jEN8j(uyNq~luH>wKUXs6G
zT<*)i8qZI1U&5~C-hK1hb=gU6apBUs7t!C=yv;uQsq*L7`gD=S9rnK!lNYO>zNMS{
z>!Doe^u10Q&XP8^|LbbIq8_e(x_keHNkQ@JPjo%pZGYBf=~EV?w2y@zUD5N_M%q64
z_5Y)MAya?y|F5PuSDsjM<$nX{x+yI&F3H1;6D}n9i1KhYJxtg!K}$qvV$(v0&eoc`
zto8eh%VKr+aYm{NT?&1XrEm25N59kG|L^VmzvgGjC&q;zf11}@If-G0{OPCnRa)Ns
zj#~e^Z_NbB4(m7jR_VVxtIb(kb8B|`kw$5q9|xB{_Q^6Sv2eWcW_k5L^UNoYZ@nux
zYH{}OHP(I254SnV{fXu7Xxn^K@Y??M&As8wHlZii=RG|qWMf>jsQ7F8s_%>c=0E3t
z%6@*yNlQ)%kyqmPm;CR!r@!O<1NG1~D*fE?%<Cp^%~yL8p=$B^ab9reE2XDd@g+|?
zYWG}V2OZqwrot`PcEr&lz(*8x{+PptDNaHHy*4t{hl}_3OL0Wqo-A1t;_f`_|Ls5W
zA2X*v4eL8(e?2HQ?9qR<6_X9`>A$>n@LjCosRv8_thn>H_g0js&e&X)@%-!iKl{)B
zi<3@$Jy9a}opyBD-}Ixu4qqtbE#By*+s=Lev|f2^Yi=WJm2Kd)^0Mc<q#7I+M(aNL
z!WbZ+I&tQTr0k>T|Lp#Gd;XpO%zF-%2=Pu(ygN&=^|!i#^9r3t<~i%H2*1_hdiv_s
z{zYX9M^5yHY?aYjzvS$eh6ZCLr=|Ikt-NO<r|DKTH!#YGOz;sAYHfO$upmK0LYkLp
zVZy>2YHC6)M^vk{itkI#so1qQDO>DurG(d&&V{>P`x);1c>UOZ{hF)t(lPcS_1*bW
z?p1gGP0la){L5f<>(&6p>3c1Hgv|WI|5*Oe=XvY5SooP-UtJsh-}JG~m%4d;@)y5-
z*|))>Qe*FRt+Y9e(~lTGQdq}-`Ay3C1@rYZ9F}cxa6h3c@%QBazTLm=KkRG&k@3}d
z(pq7K0IpSE<r!L>JeosYe(zFBZki=0HGk8t`j;0g>#Sb>e}1Ir*}+H;SHI~?wQb*)
zdmr2NwcvC@0qE{KHxcPJrb7)I5^_p}SzQ_nR;1{-a!fsIx37HP_PDyz>1uDaEY>bf
zv|1_4SN!?^)r@tyKiLnwx1Fvh)+0T|_=3IWXPqq}uU74-FZ%LAJ#I<vE`uCfLz$er
z4|mOVzPRL?&6D@W``&JNn057c!2aKVYWCQ(|70=A+L$qgvH4H6SlHWus65j@b%kC+
z|NKsWu9vH{*=N7(!Hdo3SeGpNU2XU`KW1Uo^abx?atu#*F6_VeEjYUG?<BQsk?Rv{
z!kBJlRO?ThHN&#vS^1{~<^YCG@w0+e(&D<9Rxxm^bc;UyHerih&i8BGrr{HtCK*Lt
zbvZC)uTH}QMk&yVQ{1dh2OD-oXozvKH63i&I6+5*o7L%X!NwS+@Z0O_v)@0zUApdR
z!7aUzJ!@vD3#mIV-1q)&Vfn<D+M(ay)%>5k=)B&d_zRY&QkK|e|9e$^!E1YCEbk*B
z>5}7SS97NPv;27LW60;eSC_t@s<YqHYr_^P%USfRsw3c}lVv+++OOJ`Vly7Fs5~vL
zy}9rVUv_|KLi=LwuV*A!8CPiu=;|EU=Retb_U1JVOj|@h`lg23tbVtt>)wjj4xb}i
zE{X0s)UmDh#?4;yNxhW^)ITw>U*$|&W7D2nrB=MaX0wKP<_m)h-tQcGK9}}}w?2P=
zfxl#u_f?kkjq6t)mu)=2s30<-$C;ax>9E6w1P?JT)~43v9Ue;VJ#+FauYHcaa#e3>
ztm(<no!YCrFFe2R_Q(DDH=)AY?i=l$7rZ?FVAuYlo3FMt{5t#W|KA<+ram~b>7B7#
zUD@#><_T)`(Ki{hs!z|AF@ITCU0f3wC?j@b-wWop->LcczwUhBboarBW7Y0|njiMR
z`@8Q=W$o{;m)|UUvqQ3(LsE)mg+tlN_+|OeDo^Mh(b{+P%lYWfht|g)dAQG4?7Mh^
z_k$loc3<km{BO^C(%x3HZUJN7>vM<m|DSKIS@C!CpEH#&7;NtaFK)`pZr*))o@Z+*
z_pgl~cbY!woV7dqr`F2bHjD>A_Z=P+;g(`L>bN05OGKKB<xoM#870BcW8cGPy^S&3
zn-|>oDsa+{2Uq)?S8iK>{_pX|f8y77YP_;P&%Ud7-~0CtN9HZLy}O0u^S<2;->Nh3
zrmb>XsdZb`v~s<JJ=gmtA;zyozJCtyV-wDr{mOFh{0A@o@0gzX_s0E(B@)3Geuv-q
zb>3TVlicE;^FmMWjr!NlcW+B6=K+S@=lu1<?s0`ie%s^pd4F$p>APhNHnWA^S5BYr
zbt2_`!#ieU)@GX&(N3GQhNTfLOCrmk+CN^`?RdI0_;#;pajyrXUiq<yvo~>n{gL@}
zz0~TO?{Qm}eJc55rTr-O)-A7>2Mr1s7}&Ns&hirKVy$0T80#H-eLZO3sOg%R?S8xd
z@Bjb*-~Zo1_v1fjo%<iL@0!-ux_@R{^!CQ@-g;_zg#M`<+0M8X-qI>Fj?HNb`ai?;
z=DvW>Th_JzQY*A^axDB7|Iy-QbY^YQ{xJKvUFKg`EwYb{{kE=dxqoTV((o_pS1i1=
z*B=kC+bUSwyOOuMt)R2*)E=oNhwJ|`d0XFzS*KL^ragXX)vwx!lvO>Qvf5MqukHUE
zmAyY|b<~foRe$B9o_zcIskZvTv>VU&+t^Nin_uR_`+lzck}cjRZ(YwW-dZ18fA#$2
zhZ;LW`*~8k;^RZ#SM5|^bya`YmN`DtZGW!?t-0CXWRw@mA7oqpOW!w4Gwb&pr!DUn
zrC$3h72khqZ?DCn^n$p+x>?`;27dTk{b<S7cY0sLtfyaFb#2|O)vn#wzi3X`ThAx=
zJ^noN@khQ&UzArg78Tl>m%i9qGAZ=#lTWr)TXy}LF~?xjlYjRM?tIwA*y2&H?>41m
z{okO^I|Wz8?R#$3cW#^E{|_<gMSs5^+8gneb<g!hy}V4@{&EOUn>R7De81kBFO}tb
zbN0B3|KFeG`8!<JE1l0fa%TN+mQSaql;l)v82$arrFQ=6J&v8_^S|CRW~j5gzNm15
zL(=oWeOnJF<(GadZ0p+|H1+<^-UDx!)oC>@PB`unZ09eq{m{dxhr1I$e`mdR@&8H>
z_D>uN{~X!x%-;WSp)SL*hswT*GY=g4zw^Wwnf?C{{l4%tSlV&TX>Wer(vzQSP6vzs
zuk-u;rFq%}qoW7ETTaoF3=H^ZGH1gSQ_c@zo7$MGPQ@Ir3l&yaxQlg>*G|XJ?dE%a
z+Pz%QJxScn@3+?(t7mb}|5@*v&0M&}!07${;7^++JkCElbba~L#E3;d>aTyCes5LZ
zQO^BZGfqCbcKyRef9_As)e++V-gFiE|IuzsztvcAvF-B7r#0zn6BqtBFQ5MRifHn_
zzsqgyw|<h_*JRb1xKcd)_0{dFUUyedO1(bA`TqL(S&x&Gm{w%O?3@vM$o9|Dc)#}D
zFAv-QSAU*+VTQm~zJE1yxxU^h?=xXl|El+3e#Y}}?Y19I{@6cnf8gf(^Zk}RP4@pD
zwfbWHXTQ+HJ-;s7f4Z+AShZvF%jy4>I+m<5ew;s7_VvY_yhYU~-~MH1;<7e=`ll!E
z-I4P=R#WCb{dKBDBTaQ$i#3l_Ma7RfS1k6qtN&MTUtQ+^DBSwmy1rj`!}$-jMV~C%
zSbJD2jeoE4^%Vs*AFfY2w`hv3{r4NoR>r@+X?y#tV0GAB@rPL~-|sBbe!OJgGUlJJ
z-#T);aow8s-?jR7+)u6R>#r|aDZlH_tquNm)qC$o+dQ6<^O4cQ|EJCB7=h{2Z(iTF
z>BB3%r+*?VukbS8T6lbgo&Ob!#92As_QrvCCZrU6@`!mSt?_1lyp`0I{!7ZQCboR4
z-1dmSZTl3J#KhSdGBI~~{@zV=OD(bbsAhPYTmODR_tfl1JDc7#vFo!t|7n`%aQ(T|
z{uUnL`UgD+zs$0G_)$cA>5<a2zl5*HP3xG@v32&s+?NtZuk0%QR4tGb78M|I;&Ah|
zb+2BV+_p&HEAG&J>af{0=go4=_Hheq-@PxdIeK<|twd%^{$&=Qg>o<Td*p=c1COMy
zfBdq}z#zQyX)WVK_m6R08=dC1KAQ8!|5zH2c8hMoFTLj(30A6m3NM<<&hQUu4!m;i
z%4@!&J$6g&z4gy8nV=!IE8JlDlr5LeZ9T8QP%`EB``@;;l7|WpURA&KPk7^8`<5M6
zM)SY)$J@>n+anbF{qBj~T-qX0(o<(yzwy*dI`(4L8)cbg=kK0;mwdI?_x}vH^r`bF
z^Onm$;+^7G^x5FfTCW(><qK<;=H0t?qGjfmo?~g+Phu3S;{3iwZ&6)(GHz}7+PmDM
zz531G-__g8p57Fjv;WsW?$gs%7Cc(|{n?BYXC0n=d0+cwnZ|7mzH<A?U#)i8to%M#
z+xn}^tNM1%T)|%-t~~m6rz!4Be66R5QZq+HTFl-1NB%$PpY->Yh0pD2Iu~QN=iPTX
z{q=QyJ6D)S=|_p0_=Gj<YwvEl{OH~;QI-vUt>0_ki+P6c`4_6NsXqUS@eTQm|18UD
zjb}VPUNgTsYt=D-+rOfH<#*q0e;@vj<J}**ppJV_?`~H=vi|1BqO*1D|F}M74UkNI
z?CH=RS-1Q9S5~n!`DZG>UlqCEn|kl+{i!uo>-M^SinrCiU#`DU&of&zZ12;zwq^To
zZCn5K>)ML-7nzgRz143EJN~Wv`m}z%(EnL~zd7yw{e8vczR0f{|JUuGe_{K(kj?dr
z$}d^wbG%*hZ;8p5{dE`5ai0%a89!Ok_Wz&IR22pW)~}AUye6$;tzVe`>#O+x*VjO&
zO-6O?+S>ee^`p?ASO13gubX~<{i?NDanrRg%+$*dxBYu@jo#<$xB7T4E)VPbnms-H
z&Zn5|WlQ%*towWQ{ngba?kOMY)$h!`pY_x3dw7g_h4S9_lMmMZHuwAzvv_~#qNQ)E
zx8B}b%$c`8KWt<F)!%U)yS64Ts!rMaujlu_39s+@U;pyo@O@Rcs%Y2NQ){EQzFS{<
zdh5DY(NpU4|K8gF>fx{buP;X()L;Frtz46<K5uPYl)9|_s^9BUi{5!CcOR0!w(2)a
zOZEM>tnM|2lb?R6dcUZ;^XvCl|E9k>{#x%;PTrws_hKX5ch8k#tzL3&*&WS=t{t!c
z3NkV-v#_w@z7$_4{L?gJ{pIYhtF~r+E&5xPAFjXj^x`s?z|Lc}8~oda1Y>vFpDX;=
z(pVJZU1s36D=YiO6@fDor<&OqRs7yRjqAf|Pi=?O;lDVRX_WRDB~B4ee0Flt4M&ya
zf9(^sr6>LTF>lgN>sRZY)|-o~eoGE8+LU@ZSS0FjS#0#D^YMIkf4iRQB_veZAK$4s
zWz%2gA58+S+fL1X>Z!idzA4IVkxE<C;WKGb@t+RLWhsPn1b8=T8@^b4n0<dz-flA{
zTeGyK8}rL=*?Pa4_in@ePv46pl#Tc6O1G%VS)MDHdO-Zg-u&~YIJi#Vmh-c)&gru9
z6FNQr#@u(y6@DkhSMPBwufA`w*dY0d{t=y%FJCS4baLZ}`}6((%0Khw`AN^)_Imo9
z>wkn}KRsXdk(I~W(RoW%f0h5@!oA1kW9{PJcg&s?y|q4}x%{-Cj#;llvWkZH&kT#q
z8y<7sXzfpGxhNIF_P)|qWdECESw8*=+a|rQSu>wg@4<Y2jXYD=$8%e|9gg3cIM3W~
z@w?i8i@s}`aJ21h+o>ad>6)Fn-u^Pbv&W8ibnkubCm>hIs8}|0`rW(7XLN4=p}1JO
zXB$t_^XD<^1)uIweX;xLr0aWprFSoR+P?1g0*&N<?>OekdaqjMD*y7s){2vRj($4$
zg*`|kp`D+{<EWHr=#u%auifT(mQ>^!PJjBVciz9aHS@2&I$8GK`R}`5Tl6<orR>wW
z^77^O4cn*oR4$H}dB@USoVBrTXUK}Zk)ro&7hSJ?^CIBI_4Oy5=Y(Hhyx`_Bf$XA%
zyW7G(|64G#jWPWAk!N-s^-1sdJvF@1{Z{wY?+ZVTp6_+4$f%2cx>K+9_gk)aCwZ2;
zS9Kq<-urQzBVV)1uiQhjPOJVd)K^otH$Shwx-ZJGTYYN$l+!a5u7tOop4cV*IH@p%
z<0%KTg6GqFP5%y;&viZD$9>*dsAl@pCaZmiYri?gsxUAARFNNfUi_Gw*DI%c{_MV2
zJo@#U<EDNqbKITgJc&c<LHC>azxV8a7y0Vq?XOp+`JSD3ana{f>g6|7PW$$~yvNZh
zU^XFBkI$j}rdOExrp*!s0<sVFek<O69_X4_Z7|>d{7cm-ANGHH=CA&J^7&IMOdJ<>
zym>wMY5nb~a(gVc%db(qbM1w|vQnd!?>F=mxU8`-Fy1)J{cPR5ui{QhzK_0LNPi=x
z!lRgY%xIEaa^T-dP8^T^w&tCg@y%_+d-0z(wn3@qo}WHs=QFMQ>g%g@QJcKqt>1UZ
zZIbjcw^UuB>fNvJC(Nr{z2o@l;$DMW^KN~=k*{K5-?peGDfV08n%b3HZNE7NZ+ttk
z;bTi|OZw-Z1v~xwe|idM?7gL5CTc2aJm0kP;h*^0Pu;rbXPJKs+H3ynRGk0CgrC*H
z%gc_Ru{KNjnf-oS=+gV&Vy^7_ee%`Uhec1nZ=e3*&8y3&vTwZ+6kMtK*8T7IU;1Th
z?<Z_IBWQeae`WRF#NF?wY}*v;-=zHX^YrX{e_PJHUs7{@p>AaMeec+-7j5f*fBJ2)
zM$0Q%iRs46i~AE7y*qZw@bnD*`?cQF^Q+$&@4f!n=gZ{htLlzS<_R{_*mmjsYTG*N
z>HEt|*5C6s{9gP0?p>}aVSB~3e%c28p7iG4d)Hl((`&!0ertVyJ?i7u^>+U3SAYL4
zK3)1cZnaqa|8@2AKIfgBGx`2?y_f%E{L4%EJ~-yc<O=P6(<GObz`($+<v7b*=oD-1
z^@X~1>o5I}TU)m`Ze#qK^w%QSb-tIs*1x{K`(@Ssu*ui-ujyZ%{$g&RY0av`mTzoV
zDK6Qb@%H}fRa?8ap4ywLd)?@2{Hx=q>(+#?TXn|S`_hsoS?L?kS>}A%JoWLR)fayk
zISCuj@z`H}wtBu!&i-oo+EZ51Yr^k~w@kTPJJEK(dC<glJ7%w*R6A=?>bb3p<CYZ1
z{_7MCy&GG#F>coy#nrt(#gF`3w%_g0>HpuOY~#I-XWcb_z0$Pj>-6mStn1H}j!*yf
zC2Cq}x9q1i+ToMttxcMJo&W0j>#Kghjo-KQaM_C%?@woK*LQV`ayhaU@uqz14*Ie&
z<;(w{?@KFflinunkNcbY>uc4{xD7My_kVpdW9AIAl=HUfCcOSPRE|5j&5QoZV%YQO
zlmz#+1&7rqzm+%Dk?^_v=jqPy3re?iV}q)We%X8f)!qB6_lM0ofBYfeoZp=x8M|4E
zn;6dSFsk{#`~8ORv!>^lm`pz$9L8<(t4V10{-~)Hn}p}O{kHwjA;)xK+Zyvb_kY<g
z`MP?>)Rz{mscF|da;~oWXXcu#^6P5p^~55h!x#0ltIkJi{^mW;V(Zq~Td_Oi`TLEJ
z?!9;U8S&)bSAF%y*MFNH+0C?1-aN7Jj?J~asonE-Tyd>9+n@3G_FN|QlTUVS6W>r7
z5_fFnv`HJEe0^8BHtuMR>)E?+;y2IY=8atcrv2`dAN2?8%=j$2y3Vh*Tr~aU)7Rgm
zc3sl_aYJ)KOv>*I_gC%_o@6y+x=h51*c&OY*;Lt=Uf$ru|K;DEdquOSfAaLyuWj;u
zH(UKo{>3B9_q^|0l)hp0i>W2^Hq@>Cb^eyJc<+<nf2%*V@7wzB(6h-pc}!+=*nXZ6
z{Cn@Zsf&kkCI87U{x_l<-g@rm(5T{!7F84BnSb)(-zLk?-`4ZQ-jLq=vrwWwl68Zz
zN`1lt=l7ZS_zqpUZ_4X^<?3wpfPecA&kUbh#FseB{cn@({kzjof2;au^0jc*T;FQ>
z7rteM()NalN9L#heip@CK52<wmd*UF(~Ew6jk~A*_0_JnPj&zG{Pq9+I-z~+^y+tw
zJkPH0+r`<sy~%jG3a`rVc9tzlyjxaZ&3S8j_ceRkjr&edPUkybykVg1Zz%tFPcHkq
z49Wh<KkjTaNU*){sm-?k>}vJh^$Uak#!o5T(&yY+?_A7M^XSK)HFY7T3mohM67Syq
zCf6KrsZe91aguSB&^_Z{#uksvp1k-hUw-q5>yJzMo0s_JyfF$>xH3PxVAI*gS~Clk
zVx1a~?B6CmYVTj)JU7Q?M@YeT-KLBFRfPxjbLM5I?8sT1F#mP`(&H9Myf5W0Zk_yO
z>k((S^w**BH~C$D?XgfdoE~tx@c-Jv-NKs}DYCcCdw+`Ynn}ijgyVM~Z5Ha|GjC1z
z^E{fBTDW`u9q;zGy&t?+nTSX_p8Wbp>9<&3z`^a?O6B%PNZbn6*f_b=+vNX3!x%q*
zp-(P8iueAQs0ehI-&f!D)2eQb5RYdOw|iTa^ug~vo%RvRT70fDCSfwhyBNRf=yYxW
z%bn)GZs%dQq84VQ*4~&){|*W!&a9h%df#N%6a$xCGoIUJXzK*O`zj;w&q~EU(J$xW
zZs#EDpSK@Q{%Lob<5;V5tNz4*+j=db%+LC!Z~Po|F=Gk0;i2w=+@90dA8QycRy`yv
zyIFRzv)`BRS=}=8_1B-Xi}~bu`~39NOQvdU%aA@<6#G!t`-Y;(*YDM(m!3swx@M@&
znqZ)NXI|)}g;|YV@0YyZaIwY5zkl-CtXnDx5ADuuaOYj~zrOb5ilUjt@iQ|;f@Zb7
zXN~_`vn6fWbM=q2-^tiUJL`UK>GpL0S7b9mW<%?u+Rv?z7b%^utFK$v^t$`6<^Cq8
zt;Vy9W0Q5>x0mbsy1Zq3_c*1$!|0>p%uhd0URw30S~mLg<~M!2${zI0c$D_(>H9SQ
z+0Q=ozu5Xq-?Oy(Q}v=+?q^R#0`?{RPw3yaG3C$oLl+wNpTD%czQ2EICO0SlQrot!
z_p3z=?whl-#?AiwtL^u$e=XActM2_53Kgu2%AXs*dD1@VcenbrYSzyZbAF!p({|gQ
zrU(7y_K*Hu{dtUQ_cKe)9?{IaH@PQEQr5{Vu`K+(cjEqURg;7NPkLXz|7hO(^7@53
ze+1uGJ=&Xgv&Gr#`7d$No7=1QUET3@o&EYv96Vb~_61zj&z#?`JALw_@F4H<Cx2d-
zMak^*Pv6fMTl({j&GoFmZR>aaS*@L@e(HDkt0^-tpQ?0~eY#ZctH76cJaaT{_Nq17
z_Xa&n_MFuA<)Z3MgWd1XY^r&3`gP7`_dj!|{n`8Jy<nW)wiD(11Gn2+Y3{!mvHHU~
zrE9B-?*9E~Wv_nF>?e!7-Qw#P67OBU{h)vMj`K15R26qEt-YZnDs}vHVSUZ2Kf7f*
zgucyvUoZHVGt=-PKga*@w+YWze0`O*w&?TZ>J|Tj1e;~!%R)93E7|P5vNBNQdid!Z
ziV}Q#{+{D}-*|8L;+>Zn7}%aW&hno0ina7bY<c;muex<xRj=MXo84aYVg8A&|Lb`x
z%vMg`cP)Nx-NyKJLHET~W_liv^!`6rw`T96>&t&=m@(cC{qn(MM_K-kb#M0G{d2YM
z=;G^_uGSXHe%pF(>#vRKCim8sN2RC*3!nPEVU2fZ%)SF^yNVavzx}E9KeRYhL-B|?
z%UbbJtNj_5c*W-Xh~{g2i<>HaUwx%{7I)r{%2~%&$<6-d_x^2MVEi#JtH_Iiwpsq+
z9n<~gTFw0V|HfvmnZ3V0<L%pd`)nqAvF=TN`Y7+e>#D6@b?bi}Ra<rSb?EEM*=1|*
z?>wTtz0|kfwa7nw-xC?9ZCA6!Utj)M^jx_7^VNTXf_>Ffn#+X@_2S)A@9$b3A2s>f
zuK3P*zn@1~>DR9>ty}x)r&Xbfm~V^O^^j9#EvNS#lvvC^?Ptu9@46>nybSw0>BYXf
ztFOD??^?b7(e)McYo=Y;ly~y2v>lga&YC;3C+wG85NP_;MegN<Ci6#k&rkKQ7uM{4
z!#`c$WY4_H{GL+_`#6@K_LJGO_N?g4kJXwso$7}!q-QvtjuGTIB>VBl@+np3A4L~^
z{kD8>_>%n{>m&VU%9uYr_b=B_((a4mDzWq$yK|p<6l`_V7VdG|GV%GF%ewYAUrFp~
zuDffn>~7`PS3(;6*G<^hE<UnP^`}i|d}QTABU8(wN6ODX1x3I5)_uAlv~;`m;mrR#
znuQG?R#ZDIs&0~UJ|<t^zGW82v9gpx@6?CirBW-!F8vekf3sZV=VF%3Df+G18uS0V
zzt0xgclPn8U)RsaZN0T=t<0gdpZ@KWk>K2?efHq5+g6jrr+@mtBcMMep&`MQhczp5
z@%Cpg7kMxIAHB&*PSASA@oK{}f7@DizkdAt!qeSFUnaMH?UPoky%wgvb)uO}%u$Kg
z;fwa(NfcTl7QVau-hYh*!<Kum{w-}$+P8@P*<{llP3LDm5RaGJ?sEOb``F@7r_ZkM
zSm+-bCvjz&VID`^;&%d1kIxf4ocAx`_!G^gPrv>VyOC(z8@XnQn1{B#q46W;Uk4|@
z`~P0OzpO-i(ah?y_aA%;PX<_+``$0rFH8A){yXOzo-N02mj6hL66+B0in;%5(j_aa
zaGs}|;=sd*cR35UZv1{eZtwcvPv3v>TBr8s?w#4KXPp%sbU(B<|Nb?T$MR0`!Iyt@
zHKf_raP3h}-Zo`I()!b{zeqh%dUYdpX-4ALN3Qq2*Pm0Y$n~##|2{L%JOBUpyBF(>
zKc-%Ae$aA_Pd2ZU{f~d`qA%Mv)~$WFHe7en*CS~X799Rd7X_Z%QK>WkjgyDMvnDG$
zo`W(ajSao?Ou~L`IC6`#r2OjoX9BK|Ip;svqV%qaulKA4?<@7<SqdRv&fos$eYCRn
zzwK$hW%rL<wSV#T^5?a6%h<CXEZV)lYLnz4Rbij|VO$(rHhWtb)x`?utS=F_*gP}g
zx{&1W=!25)0uEG`$!NG9zP}|{!d5S?`&-wS?geovGwz9{{Q6VlCF^m?!!zJ)hP&ap
z%EZ}Elrq%IzrG99pMGhT<o1bN$9UFB{(16@d(-NG4J!|SYV$fPdR=<s->l!;4jtdg
zY<cfig}tows!J+g{;y23<y<d(^1;4cpMFfOt$zK_`@@RmyN|WJX1sN7oq@|woBf+&
z|9;!`R#wm7FYc(2iRvB6j<(&N2RQ>g|9(sPd*>F<Ie9I=O&iPGn7UpTnaG+{YVYbk
zKSg;4&#Wr5xCvgJucF)M*d2ZB;c_;7Z{_2M^4H}b$_lzGtWT=YFY0-0#xl$4wHg16
ze|LrMS-hClcP3}LUcE}G;h#x67R4O87qcqUR%mtSk50X{LVBO7%Kthv6^n1uy<ph0
zjf0alq@wdu{S~>5b8Q(Ov)S{z%sBMo#Nuy0H8*#Ohu_?(y=T7PueL4z#h-Om&DK~i
z+PVMugx6aP#H_2&|9zj-Ir-$DdGDjYC0abaws}LJTJrNI_5JhCZ1Hp4yfoyLciC6F
z)4OH%pM0pa`|a%;uT(DHZvAF8<Jy7`O$ANDyPv;VdSw5;lG>C{|NqAMUei)v{y+Qf
z!pMq!@6LEPC9bmTUfS(fd1~K5Ii_UR4E-zh=cM)C?eCc~g?I1zqU-Y?G;z$(je6An
zp}+R0&5QNtI9y}z#a`Kc?EdGtKaZ^UZohD;owet3wwP;Gz529A^|cdR@A)XdKl1zI
z?~I+-t`^^!sO|sLrsw;Yti0&A_iO)E{ixMDUO#8kZ=*IQ12fI%ymx>2e{pW>tLt8T
ziv7d_o9*>)L}uH+>wHsLe){zN>GzA`YrnN_jW2o|ySA`qpI(dg^yKyWiL>(eznUcc
zE%v^D+v(j;e;lfJzVqL<=<XHmr{8-1z7qbu-#KdYyFY##MOP}XYP}Zw<A&nB_vbdA
zJG=MlHCyl3({Ih#`TNbIshi&a2G8Q@m<V^Wfaa`qT#Q;-4mNB^&@ty~aZ7Z2w|$>s
zW#sC-m^GDe&K~tM)vA<PW?odQeK_~}o!`%9|7`MnJG=AlyQ^HHH&?8Au;*j+su^1(
z&n{do9UbE9bLIclk4`Gid#C={e|OFo_ODW1_2MsTx2<~2aNy6<^*PUGyLIo{YZm^C
zKZ>>f`u(~Z@i^NM(d!dLr>oq1{a&Kk;&fNcn-7ht+xcq!Or{)vy=v+|W1B@1#&=l!
z(|&t4ef_6d@~~#+T|LpWyWM|2%xf%Kyy@QlbL&o-Mowkk=Nt2W-W*MX;<a<zb5x}E
zl}g9IIr7NGBy;U7-&c>l&Q<$;HU9qPAlUP1B3x}OO{oh*bVRr%kEr;Sp4++ldey4a
zUfr{&M;z6>w(6ey+lu}d|6PCVZ;*;>-2b4qET?Mvp=W+|uTvJUT=wc>>Mir$+Pzbm
zQ(7N7db(@oP5bwF%K3d67Vm%N>#MV$s`=z>%K!S-jtd;FyxF(^-uLflU7f~#c>n6_
zJwk$B8Nxgzo954!bPN74CwZHJn8bqjyM7x^4^O}I_}U!tcKiCdJsYS0-R`Pd_S*E?
zl)aNP&tHsC%Afe`!kT-Me8;b;o|kOP6RD|;)tc8keeIm`{UMFD*0%ZjS}$fx@9=8+
z_;s(WL)5g*?52WU|NTCm`}#e~^Pey~qk%||Mwn1*J7dC*038i(PL_jC8xnLhxLJ>=
zc)gze<;v4i&+xFG$J%_Q{;Jm}{_2%nUO8>S&#U(KH}<l8Pn|z&cdhSfcOmiZ|EETM
zDxY;f;onM&lIXl;2H$?1jMQ!{OVAIt%wF1HI$hQD(wp6X8=lWsNnf#5kL8E|y!J0k
zT2?WCz9HHEZOeCwxJ_dI*QD3#vfpXSv((`55Wf2~v*K%QlX|FH@H{U5U|%b_HFGYR
z-&MJOcUI??DbF)n7u~oi_o{o=_S+_x%D5&5&17j2bUod>ve#aG#={j{Z!bRm>lErF
z9OO2;_wCd2j=EdSx#=YupBHLcIlPo+ZeY|CnXpWPyOn8Sib9A6XsZ69qrn3obAgUS
ziayi6$8TR5(sDIq=CPMutFCkEJNsnk-zuJG{YUs)yyvg=d-qSBB7Z<#@6px#`bllG
zSAU$UxG(R(BD>$OJoou-%w7ApGE=R0lF+J#U$a;9ZSPUN^h#Yl`~B7Xk)oxQFCNGE
zsoq_mHh25^I~knXYqZ!msP3-&+2%GYb=&g<_Cf|Wu72I=$<vEpEc$il<>`fjH#}}c
zd;MnZJ!Ly-MmO^XPu-14><Nc{6|MLw7TKRJle2#Qql1$fYq{sC=I2-}vM}&l|7zx%
z*pQgjcXG1|rV4Q%6G@w7xFvbkzr)VDdIsSM7NB)hIwG8{O^t~QA~e*5Sle0?CIsk+
zaJ4owCLD;-66csWE9A@C)MqZ;NqVbhdtIA0OWxwz?BhSS%y*Zs>)xaCPxyb|eU|I;
zVn5PiEYIx9-u;Vh@|O#fY%;bTj+mWcZgB1BUc-}h%QrGk`T09!tN-?oyFRT^eUOx4
z68H60`N7(siDyiw$W|UT*W$=tmB?}N<^IJXsVfquU48#$Z>{~$Zv`sb*CeX&*+)P7
zS@>ba(qm4((jT-Lt{5sgrg$(guH9PB`7`OWzWWXrmRBZSCd=;~Ui#y&?V3F~`kYMv
z=U;FTw&<9@X6Cs=lP#T>s_50Nof>whM6jLD;Q^zLNDnBc`yM)M63`J*W<7K?U`L3O
zYvw+?(pmdL_Ds?eT9r51^oV}rA786EJ32qj?%5yTTB~<skB#!@w^pY0(to4*OPl>?
zEuT7l)zRa}>v=yd4GoOn5phN3xyiztjSU*-x7T|<eXbfXr*gjA{_{443;$i&J74tT
zKexBn4^9jFYk&Jl&)k(W!gp@H$0;{IC+@CY?M?5h0IPW)xr2iq`p)n^YBE_@IXx`G
z-|yS||5Eavm1`DGmQmO;m0iI<`rM*<+49YMPN$d6GTi&R@>fl@&Gxwaay1PH|J~K*
z6qp%)^^{uo>PeGdP0cLZlCypHZId4%`)m~+Fsg)1IOD-B$aL7rAi$@co8^#7)wH|$
zJ!>mo>jlL<S+O)|x@l>Z%=9Ps%dXw2|I+;7(}k+c*Um>xRu@hFcK*~a+y5Dp{y1;{
zC9he})U^KJ>Z?9W^F9Bs`Eu+3ywA4-JAS1<`JGeOf5_9~rBG3&55tms`u{Gd?T^T@
zaNtc3@BF%8qHIgaqsv;qgj*hb-qI}4ZMV>B<|@k==ZCG2uJCH9N{Ov|JG1YU#<7>L
zxK01<;rq42kGGod#2&{AuJWG-_eEd46FzmVyh+x-Rxk7Q*00|}9CqveJjI|8pvda}
z^5yk^jDa8b<m^#ZtSdP7vYO|&9v=e(%OS^EKEhm}vogzdPsRU#7khPo%>J-xSNpGa
zfAyGpb#>XU-BIhqcTc{iy>(aT-=DU>zK7q?Ff)oRcp~mEUKF44X<psZQ}yBcS5M}M
zO=|mn@9(#*r&eC&-Zv>)y^l}#-RHZ%l_$p8#|a)X7kD4{KK=XlbFQDIQfn6-y|sJy
z&w^#kcfXh|dw<u3nMR!J^s<xIo?058@h*Jv{_iX{Upv07kK5_`(fl=Q=D)q=cedB+
zm&UJ){pxz(V{gp%boG+tP5*VZe^+eu+F4nr-h6xYqjeWcf9<{X^rCUGXS&kUFq7&3
z)u+FG!7F);?`L(T<ixpa!nD`Fo)O;l`)Boj*Xyr7XW#6dVxaH;^6TO7mN(z?OzPk7
z`yFF#F0CiQ=YRftrDys5jK81Ohe`{Uy439ruj>j3ee&U7v(wyro3FeII2o(_HR;)@
zgPigP5#Lj0YJ)b>z7K!$<-g^08~Im%XLrxr_P)OEkbA$(UrCj9-qWvd`hQ<ABP=w<
z_WtIp5}#7Jn*MIO_gl5$gwlcawXIL2jfJbr1D~(uyZu``MK-wnc>E^W_cwWEeG;C2
zt>>A$;Wo?pDZldX*M94~`)%*lU*GF>pVZybkIV7x7x=CG;nT%Cs^zKap>vt$aM`gk
z-mh8C_atZjth;i1Os2)Zxqbgl^!<Go+FzM|->JVh-tXVV-|B7q@BO~A`Rb*||E1T*
zy^9T9v;Tis&#_Yn6<O12#bP@qN6Q>BUhy~5R<rhg?X>rAzRD^&EdI6TY4rK}t=HDc
zy$^3PEWX&j<9?-WZ2cylqcy+QyC!c5TOT(mv&#MBrudR~9a&7}laH<Uovh&b^=nzs
zBZGTh{d(`d->EVE&3d#d`nZeW+*dJzvwH%y!yISYud*|*{l;;gS1IskSs?Fw_jhUr
zLbA^T&!3gs-#cB@eWG3e^!mjI&$X7XG_NaB-BP#SY4>&Y)<+e6%T8pK`t$k6X7k0~
zdd(6q-~T_!d#>_YA*+v<FJ60pu|9o8t^fbu1$V6X{=fJ1L~>y5uhz0#kCV3_TGJc<
z@<V7<?lLQt9Z!<QpR+AJ{(4_SUg>jRt|wovAF}X!DDhji=6~vjd%v$tf5~?Agx#+n
z=L`6<XP+$p)>2-#^vN#I)la%VPG|_6Vh|@GrhH@dS$TODwSzS#PY+ZaPMcU6dTN^0
zZ&|yw8kx-tqTdHvzCK)ZwN`X`k@Y=)?*DuHc3nN#7~5uNX}&>mjpe4o$=f0}z3Vuh
zq{hP*R{V29%+9}mX3hKSRXD5edZ3#5Y?aT3uOq9jpM3s%p-$|-wdId8w*H#9H)mc_
z>CPs>2O-{z)<)jawr07$z1o_4dg|V{C*SXx_iO$Bu$(P5%a#Y#E-WnLNc{5KdgnX8
zv)?$52JYTBXU=^6fA`mx=LdI-pIo#1{T9WkFW#G9iQmb|-+kt9$*Yr|Uw)*ln%mjm
zqmc7OWS-nl)#Nvq>eXM>r|!<*w0%|WJO_h+9lmNy+)}<>p8h6&R`2)w_IBshYCeB{
zaqs=UZ%_XC?VoHn|3zN7=(OKI!}t7+TkrC*=>N&*@3$Gf`zxSUUVf=wOT9d<|0>tY
zpsi1LZ0afh{7vu0r)NL*UAVC~eAn`Mn`S+_zRR}oL)_X^>z=<6IO~1qlii<P`!34)
zsIK&@JO5&z|3mpBlIoAPKY!1_z_QJ8md|7@*4pb@!uQwLZLGilv;N=D@MXJp+Qx=|
zSpWC?|NpDM|I!Z^4!SOS_3ka@YyPiUpG=uPA$d#PC;ylbrSNOZ)K%Yzq&oz~C?EPT
z<CA*5u*J)C%dXS2tQYq8{h0c`Z-3mj`(-<yUX{FQcYF0hR=dzPiKYJ@bacFb`)-lp
zMwYkB-$gm^`x`5L%5eGNR0*j?T@z-hI`_;@tQS~x>1|YgT*sxqRa+kIpVK7!yZ&qO
zr}x!U&!`(q*8jGxS#j+*=OKag*r@a8Jl}r_yZZI$7ylP?1+@Q0t;}AUe_iYA>aQzx
zvqCG)cUJv)`hVB=qLP)TzOLLG)3wu9ij%c|@zuLAcUDi?^Zj*e)YX`In|i)heNC#5
zSbeqY>#DCwe|@;VWO_c;yWF{7HA`Ln>x%gzx%Y$JzP~T2v0n7=Uy$+sM?0_QthBzq
z@B4p~<`-LC7r%PGvh!Vj)Q{G#zg2tXLmzIOzN`21MgLFr)&IK`{WF(|JpbGFa^0$)
z%ME+;0;~6($;;pHOr|JmV$lkVhJW9F=16}2z5DCx37hNpKk7XB<dgCI(>pDH2F&{y
zAA0@C?yp}u9!%;sJv04S_{uwO-}@8}h<{o2FHg4f^W^8>-)~Vp_TTp6_g;Sg-@D)E
z#jj*>cX8rbz5XC)?hCv9wtg9T@xQkId0IcmA*HtJ`>Q*Gw)bDR@A@)tZ)C>Iin$KX
z--L}NZm8r*Wq#gwC+DWa-|Byfd-f-_#J!u?_xo+>?%!{|9uL&0`(L;G?;oqV?cuA6
z-ij?0wc}`sG`4vferw0x|EDG5m+nj2rJr^0^}qj4H)7wUO#FV&U%uvo(S%UD^M~)3
z>)sN+`s&H_U$%9vCDW?@*cQj98*NmUa{QmzG{xl&ZwuSSX{@p0VU{bpzpn^C`SUNg
zzEwki@6lTR^2OiNCC>LuDc0am)lAG;ReqC|cfr*K-()m88ovH`=$2ZX^!HnG3tt@n
z$(O&I0%i6j>oHbu*l{B?wMgRQwY}&2j_tOUWfD$)(^Fi&LwWBznNPR0>$<L|7~ju}
zpXU7Tw@cdEpEt#(XT6!XJGLo$!uK@212eRFh4rp}x_n9NH;Yq{JOk(L4bDlcPV@17
zDRGu+JGb{x!HmE2<z*$=8^!t)dAC1yeE98K**lS^HQncSv-UixS@r&U^5bOAW<#sK
zfX$vZ9_9NtPGi<J+}Ki4c_qgtlYRI0%Ku4f$E-p%*1q3yM(Ne>_w%QyK2B{r_o<@l
zor%%)C%?b#J8j8cJ7=fo^8arvqj|POiSXpi{CBr+r>)ukHHPKytCnpF{=WRe^pn4L
zJu@q_ep5I@Os<>H(rw1sw|kHNo$!9&>0Pm#)UABisqe1)S^InU{<~jmzu)XM$<Y1Q
z_4oVz#L4qy-R%G0d}nKOJ?-xlx0>2{drao|-+$qgv_`BWmrc4=;NpzOaesf-?EUop
zmeAWT7n*k8+57g}_kI6kmi(W-w)$@T-0H8__oqq9*1pgCez|kbUU|p=_fzivKKbGQ
z_xrWo%4HLLm)Cy3pMAGu-}}0HeYbdf{o3#Md9Mp!oL*Jo*Zt3Dp0DxG-xv4Se!si(
z|AkM>%cmdt{Qce?zu9r~_mqEa+AF;JT2S`#->ct7Z%y73e0TMZSpV2-FXqO~{r;%B
zz9nMWmIVw9tVxcue1*MO%a1J7y%_s<efh2L+4t91W{dy-xA*_6lK*$B|6jfH$N#l>
z_C2rdudim^U4QQS?)UqzFIijVJ?YBJ$5LO_=T>dXkLnGxm^4}E$E>^Ji^}8PMof-;
z8!_!S%Z|5J)8p^F-*>`y`Mve2XSZfwJ$hqN)sn9-x-zZy$8Gt^^ZxF4`%0Vo-<@C7
z)?d@RIw@*v)YnJ*R~KJ1+8BEBiKxf_cYP)AUy2?H->1KN^40TKcgO9Ho2KSBb=CD<
z*SQ}3ULUdj{n{&wEX+BXJ$z;_-dc4u!fETTjd%WSTJ-<d_mG+PGj6lVq(?r|TJN5;
zZpBQ?yjQPYy;i%BJ>&Lt)&FaY-iRhM)t4AOY10h+{@_q$gdN`%Hbd?`CNqBNKiwyM
zcIzdz8sFn?ZQN>gEeBQC`-A4%ufD%J>DSXpHP!oP4Od9>SZtLSPmG>Dk7G_}o$%Hb
zFOTm^d&Kvg?*q%}3jRkt20!ap%{?2o^hd|*psOO=4uwo#E#FiTSUdG!V)xshC+#2Q
z?S4~d@ulJA)pOd%T`t*sU#}I6+qGx*XAT?tV=tb2O?Tq#tv#Rj*sZVXr!(W!GWo;%
zBtEoU-%-e6&;I*q#fA31`4{8%h($IP&S^hC<KgXVU7xD|I~tiae0O7x73{5krM%_$
zbiLC$>Mz2>zw7K={$b)Jw)Ep+!9o-2lnyrS_mEw&b9Kk`>-(d+;y7OP)alFW%)VQA
zt^eWbP0O!`DD%kwC|K7YxvFITpChMR+mD`R5jV-*^i$>hM=O7A@juVK<fX3vIp*fd
z&H9lwt@_Op`FD-SBmcW3S3j7aV8&t@>0x~%`hUk_8~NM*?><jT|5AB(<-PK|SzC+$
zZ75zny(pnQ&L&t;a-U>YoE3L>_>l)PM$5j%8b9~^7`?UqqGEpjm7^zr%bIhY`Fk(Q
z{{70hdw%P8#&2*^d2;FX$0Wri6E4Q>X1ROc)Ld#}zHN3;)mQya*T03eYs+7+)@jz1
zP~hJ<v1gy;<LkS#jRil6FJ3gic*Va&i63)rUu&7Fd-u!wKklK^#k6YFe5Nh`{A=I0
zeG1#N{QsNF8Ybzj%dC%C8$RpzD;DWHH#-jgWM6!KaZ{m5;!MS8re3+%Kh(ZD7pb-J
zO|O0Lz1m=($>jW|!icMi;o8c{`=Yk<PN^%diJt!A*ztp3lzQ(s70VyIfBx~8o0C7s
zzwKXIwI^I<af|-#6=4zi3ycn?1{q{@zh2|9j`Qz(spVU>LjOP0+`D7-M7}RuzpnFI
zx5iDzyXpI7c~<Y+*LI5qFX_7b{bq0UpGTp=@n3m`-0nYF?xVB5ZQB1=cN`;SD%Tg<
z7dT&4{#Q`9)Mx(GAB)vX|8A=O#=WfnhQwF9lb_%J6ZvpyhM7|IytX!v$8ldGv#;d*
zT`wc`VE45@cTer#TD56)@2vO_JsG(H@<yG`k%t-Qf0vK6+tl$m?f-*ml@(TdW}e#1
zSu$;}xOjf)9|grtZA|@V-_H|!wRip3uhYLS5373eslF`y-q(UF^SgY%J4@MpcIvsN
ztl8_8{HdmXL-&2>hlX6!Hr$Gl{>drX5_kO2b~a{*j##dGE8os5Qu3$&?Nq((`6~RY
z;l1DgymjBlMc-R1wmhh%?0e4PHEc}VBmV7QwE1h|`!696wtxNp_0CW3wo=<a;)(W(
zJt?ZG=kxEhnCX9WkMloUDf>NYP3+>otFy!VweOc1KYbtmy>L65>Fd~kj*{y8#it5?
z_x^nSw4sj9s?&@9?VtYZ^PSvz^SfP&)xSS=*OqKyy~^AF=hny4mDamH6<zu(`)GTF
z%E{^HG;8Yje=9S;@?vlF^>uqMroL)*{n_#U@$bK<`C>Vpem#`hcTMkF=#kT}?}uCI
zP0cO(Y5Qc?k!rWoXa5%MU|?X;b)4lpIg7RQh4<XG74HA*|NpQ56LtM{{M!2e_1`v@
zf4}v6Yw_0Zt;PTU$Nlw~zqX`2YJb+de}d~2s~<i6T9k7u&LD7oS?TMCjG?a%%~8<w
zuW(58?6yB2vSj&%rITm9mF@mo{B`yJX@9f+#y_&zeRBH6rE%+vuP=V(r?+lren<4b
zofTO-%j?$P+4bbd|NH-9&wJOeezNb^HU8;e-;|dh`H>v9H^Sh`*7EGT*9Fa+3@@#m
z>aBO8dVhJw&zhjGxiwQ`V`Ee9tqwo(UEMamMSGh6`9J&q&pG{c<@$*1`sbf@OZxTa
zM(@ji8Mn9S)z6IZ_!6(9;nl0}FSfT5jul$}{<o;sr}bY{-&bwD@>l!%$yfhZ%dah&
zn)>edD#f^shb#{{$$nqzd%gPNx&1rLPv75PK3Q$*=YU1oUEwcwYR%%%mr=fAk#Zs6
z?U~(gH>ZWa-}JsbWzUPhwSUaxzW+bFFK>(b`l&^;roE1ObZz&8?dt8%e>|#7WInLo
zqnnxgSr7C1e*&9iqAgNI51qev|HZnK&%f+@FnJMY#KX0Ii%!`4fB2g^f8F;uo$33Y
zpDW$=`fvT$?f0WgZT59*e*bf5b@cW9B@_Iv{#p9;?fSUyEXU=2vM)<tDXagjaBhB>
z<hxgr8UIgy{^$AfnzPHv@4_#7{`f3lll6Swbhdu-neg(Z?|-EAtFE-FpMBo{<HFON
zhxXnpT0ilVUCquFyF1kWJQ67tJT$|YlmCbEjo0f>l`yy1XbUJuZQAolto+FO-|-rM
zo~bJQY3ft3e!4oGo0EfgdVApd$M^o%bx-~E^s~-7m$;+4?^C|>zb?3_98`W`>8=0Y
zvbX<G)GxXD_^`&@yA=i1@BaE7zjv*$`{9%acMgi=JYCHv?HA{o&u}r@?SB=gk6@aY
zv1P@Ty`O5c-t|o}Ycby7r+c%&dq@5Lhe>wVpC;|^6)oF%h{<NAV)T~szmdPL=5^lt
zyLkWfo2*)|&Nt2YuxXFs-)Y}ZKV<m!`i6gHb5cw(`}_SGYqF)DCEwDI+CS^$%l$7y
zPyX5cx6j;~gJ+dp{Px%9!rg=J`pulW^<k5es#De9s_kokzu)(pC27l~cR#D&Z#WtE
ze#%Q<{&%&LzHeT9&CDqL&hPI}K?;uF?Kc$oY_`u|G+g)dinp^~e!pL}`eBn%<^L<{
zYNDYTc|q|B6OwP3Ur%T=EPwOnX~JZqncFm3oWJC*-u>q3`~F$#C*Qq(b-ecbyEUQH
zzQ5nQt4XHL{f2O*trYhc)!*U!Bdn$_-FyC9vGwC6o2QvYzu#Yc{gZ>a)k&NA*Ejz*
zQEPwmB(QPg-ug+uSxRp1-&tn=YtN)_-xuz8`nCV9n0}~LdClykMHWxq7-z~}fBfm=
z)AxI&zwiFNJF=2RFXq+VPiuD8zKXr|E$~wO)QG>lP3yL(?Ty-7H2I>$^Q*kObieF>
z{?UH*r%z`~v+w!yyomE)RGIltRKn+>rqMIcoRjMnixn>G2XW7kVwveT_1fzqt=|S8
zgx@{76%l>>i>axXPm9*0OR=YGT9a>id;G0DnBE)2@}auKT58>v#|fM*1q>Ns1<(C@
zR_@&_7Rq>OnNZ`+!qs0N1YXx|+Bczj-)f&5tM&YR_wQ^BIa8<fJ|XUkvu)Sw?cS$P
z@Xh90UHz!CvZMP%P=o?k3$x0(7|}w1-9}rENH13-PAdgX!Jb7~3w=$#+!i}DVQ$Zl
zkPV9>OL~ItRlK*+>{jq|t}=K~B(v$ErK{cwR?ADPd{UJ^u_>MS(LCkW{X^HvKFrwl
zZc%){vhI!!Ddse_W5!A9509&J9$-|O`A<1rEi_y;)%1<PuJF*5rH_1;RO;Ol{hH)?
z%kSW6ufFMNe*)7D4OMG0%dXs<-NVOy|DCOgR@VBt>IQ!U-%S;<a-1}AT4ZSXd$;Gi
z;`M$#v(eDfiG6t}Ykuy9siv&+%))#2ZrKxh;I)F*`g8ME_rzCx+q&#b?e`m(Gq&X#
zZ1TF9yge!Q&ey9C{a)F$mPH*&DS9{Wt=w<xz3a~>WlhYua%a=_!y(Zw|LRtonjgCN
z^J4AO^w>GI_WJH&WyhD-PY%5I-FJ=Z<?!_4i`EjCHNT$T@^060t?k}o;l8_<e|WDL
z8SzHLBw_hp_SrdnMr!lcr>;{Ke)nNvPl@6!ekOI_V+D*VS0<d1;B0MaY}_zGM?|ZY
z<EX=i5GD7j$M<8tUwb=q#j1xc2fLQc$yz?~;D<SrSKh2p`}_OgKb>7^di>w|j2Epn
z@GRM{>=bAHy+7#i)yvK=&R;R^x_*O|$ujQr#V4H4=AJnCU~YTnosb7*j#VGJ3-6lo
zCF$Ou{q^mP*R4NwCD}htzH)i?nHv4_%ze8zE%KgLJc;#4U(<zYCNBiuzuuiHz#<Y<
z={jA{LSOU0XG(g1<Ly${*=^mdGsGQKEzcO8-Ri&NHP`Q|0}>3y2Spj2mP}>*UijtT
zi}u}FjF$I88|zMO%m@uvyX?nMz$hm&K}&?Im6a)BK}3NHS1ZdwSAzvwVq6^_KOYr`
zzFx9l+;3;hs;O+DuJIEW9*eb!?=Jsv_1ZahwHIx-k6iMbVLye_T>Z<}6WsqdJbiB$
z{J1Fg@#5<F+SZrWE}8tX?Dw22``70_a9ni4rfmHO8^dXhJ#x4Izh4|$e8-kOc-|g|
z%um0>S{>h=+JD7NH}QV9eRKUZzvDS6daMHLp3mLPrmLQD(^b@FM$fnHjt$E_bq)sd
zzfogU6cVpnSGRxt%m1Bc79=oDSoZOGpLs~t-!%t*EwR}vcfL`$Z#~;Gqq4Yl6W1>E
zxVz88S!mha0Cv!R(>EeqtzC_Y3O6*|xLTQx1#-%5oj&{H(f4tt=azZ<Oxd+6#d~ka
zg(v&OHr}hqv;AK_ZLQN!&R=H5;_LRxYAr~A{A>The~&$K)f?xVuD)dbTmD?d{VS*3
zf4O9Q^ZZ)EW#`xbBkkZCp0CrgSVEllrLW)Iaw6@&#w-!G4_hbogtOPTDHY$pa_8Cd
ze|5nhu3T_WV`@;6mGWD>QnokuRNn0g5xPmsZf=_>ver~=t=qGY$=}Sfx9?A24`9}d
z>)iN+`37?&hw!%KaQpp-W<EAFXa3$&^vmksN}d9(j;-%LU3^>nR8rLKbY#}+8+?p9
zB2zSsgn5|`I&4^=V<Obawa{S&XpeE{5f$Il$Ir)<Ew}YOwbj(^%*5-q?i1c$vROT)
z`tRfRRGI0S0<UNHWJkGQ5kG!nZ_q-m?&E&PH;Ep<QCxSuuPtw-?RWe6%{7mgtv|83
zSML9|?M(TlZ|#~NetBW=w5Dj&i=+P{lfLhny7F+0ZckqJ%Sq?sgFmge<DW9?#Ba6T
z*1yY4W-%`Ow7b*0RyjiZ`QPcjzvsXC!nwn&VLHRY;M<LHJahjq-+lM;C5!9_T)%ex
zTHfoIe0p-9A=47~TZeDGalGNd>YI`B_m%LvN1cMb;Cbi{4Oeb9rlx`g5dmUCyiN}t
zb_A>fO;9$1W~<x^_gP+lxky{}YSy}?sv&Dn&CNfxr@vmi<<_>m_VUd>YyH<<erYc8
z(<$|9%@%>vkKcDUy;|}|WPjuDQ~M8Wf3^Gg!|&Ps>7v)>#m!ZnA8;Y$m95(8L(eTa
zUK{<H@}T4QN^9A}=?foL=!O)|N!upP5|RCPwPW`rxf5%@wy(MGXBxb!^THi#pV!H&
z|2S>a{~Y)D*XhrCffECjEH1v&Dm^~ed14M*==!=%eD-|%qisKxztvEXGjDwLyRJ)~
zlQCvRS<^RPmU~+`R>$ArV_;xwbDZTT+{Id&5nH#;GduRK_w;S+d|$`@+Zz6BNp;lU
z?3LO3uId)v7ru2p>+#>ULAB{`v%gl_M(i@pioN=0|Fu<rHcg)X>7YFC()>$nUVS~T
z{q_2y4R>#Cynk-%<WKwl$CjI3<<yD2`zL&VibA?S<L^8Bbsu=Y`MTZV(uO<Byqwoh
zShZ|7=g;zEdr#ZxOz08WXBGDBUCiQtwpH<w#p^@A-`c5qNBU2!VshsFsu?qH)czAY
z?pyPHmFCW86(Q5huReaUrfgkgZGFtLua$b=W4+JU^@=~A=(yl>;s3>Vznd<)`mr=y
zqx)}G^wRayfA9K~vhMlSHuKW#Yu3Noi&wp@kDIh^cemjd9`m`St8bR5J9Ue!vJv>M
zdh(>2@VCCJXT$H8zP==U`v29KsK|r~cl=d%*39qz@n@nXyGqHmnH=h;7A^OF_SpK#
z=TF`z->1YcT*~dZ<;4Gkw{P^%I?wF>Pyf#MTlO)3_TFD#w(gzpHLcTT@4lxd9%MhZ
zklR?Fb3=Lg<yEiu#=W)vC!~|Qc<-(Ezm>yy|0!jQ&6;(*_S@@P@s%ITbp@ZO{Ple(
zxk%9VDx2N+-I5Ou7l-}WE(coS#_XFP#HGG#-|rtY_j0~&dC9u2bz_pz#Z7GD3+xWx
zbX9!$uG7szK=Iy_xb-{abNcdUc~yR3m)ER2{Oh>*V<vgSZPP0MZ?ScoB;L~>S=V^w
zou;Fl^n3N&D|hG~v%UN6<g+JtSzc~B8qt}%q(D@{yzO7+m5A?2st0Eu_MH9fcT1oB
z1C`t#HTV9nxW2nXz5LVVqL^K_@h`rf{&gy;qEcms!?{^Dn<p<02)(<-O5xSJ{BJir
zAAV1<taN7lrZM5b+&%nKQ$^;*KRC4Xd3>I=a>5!r+4$yf-uqooRrlC!mpW^=SC;kT
z6VVgemo0Wh3C^7UXM*Bp!GCY3PD$N8`QqR2mPx$R`W-B8Oj`L-a9Z2tX)XI#s&2Yj
zotgGtw`0<$IbJ_rl^wsjzHF`j@3lXle0>@GU`5dMjjsYUPVeB9Utm5z|K8zAVvC%A
z@G+M^owxbOnFopcxS!3-3j48cqM*SAmu`lY|6=B8-(R-x*YBJA-pnp<<2L`Xj{oV5
zgS(b~`X&E2uU}LDWXz`8|6jGgp7amvelCCiiOgFCJ)MbLzs5<nDfv9$ZPfTN;f18g
z-`ie?{Lg>N6}Ac&^q5p%ezLgf^Ul5HWp!OPeNzLx(#k70-Y?7!WfRM}{_Es}-`6kl
zso&)NJiGk;M_F~z{Y4KZ*=xzIGt}6zboWa+{p;bSk~8*%pLVkT;y3wFb$-v;wqM(4
zR@UGDoBA_x-EN2OiA&zU?D}}H<;Pq7wUsSAY8ATqRhw^I&wiEp`EF{<p2P)<S(~Pm
zZF?caaoBU^>km7&vONnwWTpP?kIm`g?BKiKz5La~R{T@<E?*hbRv171jdp+Yfv`HI
zHs<w91d6I2)=Wyc@Z#6+)8DUgDnBZVnw&KMzq@wh@450X^UI3wy0&$&eO{(;@%g?@
z5x-*fZy2nWnLkUaef{Jc^0#*Q%-moq+2dV0{r4{MS<X2trN!?}?bqP^E1tc4{@DfQ
z6DF)LS~*SR=w54){r8>!F`Vr72;t9r*P2<;v*macU-2Io`DYiamkYdOpML#q(dqv;
zkDRMXIo9;2a{KBR^F!DD%(}4LqBDNay!pRt=GpG7TQ_C%fy1o(;(r$<%>1sq{N&xA
z^-YiNJ$-Qh)mq7K9OnBC%+5@@Yd!6C&$lS&b>X2~zi_U-wjz0_lYU+2P21Z1k4_Qq
z(>ZQL_dirpHo9kZ*DHSiVVf1sp06yH2V~T%FRF5_tbhOekI18vN0!r19sP0dzQ@kA
z_-DFvzjZw?l-pQ-qtSog`|Wq<c17-ZzdtGO7vqwdpMC`zp8NOyP29KlH4j>L@7`^a
zJH_tCD{u467uw(U{&Rk_`hNGbL&^rHa-N>`);mAdWTN4oz0W3b^u^R~jq}{}WY3@f
zX{tH@S~eGL+k5oIQdTDU=zCmWbnia-^?&#Kudn6=ZfKpW_x`>5)1Mk=vOb*5J%9b=
z&Hrk1ZJR9rAJ+f+_t)<GMKf7{?|paQqeRH>_vHKU_RE&@cM9JW>kX_w;ZZ4n?4xzj
zW_L-8{wMpzEBc<RzuznRQ@{EzZ`SU8&-M#UwHH3J(SNU;((hKOx;?J%>yygneVg^A
zYEu8doW%3*PtU$|sdiuX{;OiEbj7v5T4)JBeX=J><L%uf@%n}97W#Lc?!UhJqG{&8
zzZa{YtbWr{{%>c>pS|}dtuoub<JQ&EFfZlj^;Z}e*iJjn@|(PhwY;+ahg-bUtLO7O
z-(UaQ`~Uy{|F@3*|F2%PH}clTzm9W#K8kHsd95=){+|E(BmW%Se3c%{t=N0!?v(2c
z{V%V6{<ZzHho$`8yX!r(3_UHguC06e^vQR@M;woP|FGI*uFn5=&{Z<`OOEWFHv-R1
zS>B3Xo!it|IIq!j->3LqF>4-A(_~xu#qmZhC*_MS>8#)NC1&04b?RR&@2&}db>yd|
zP1T=y@_#*wzY6T!oV&{|s=m-7KkZwMx!|-G`CZQ@6zOZNmwg_v{{Pg7f8HVc*My%g
zcit@&*Jn57PyT)Hb^r9^*VnCc)zc4uF|Wg5QU9F!yIs$}sE6Mz4G-^9|N82s&-B-^
zCnGi&F5ANvvpK*`zx;iP+Z&Je+{cIiHUI8>y5Fh#-PZkaldiA6dQ$bqx8C-&u2U}m
zcNTn`B$8{@D-k=v@T2&0OWoRqKkM!^$p)X#lW_=r^6`kx^a+!%<#{S)y}$VWYVe62
z>wdXb)y*hK>{WS_eeCz=l)Jyx%WwR={WLLS?L7NKuFPe%%#Y#?j_gcc5oVjq`Xke>
zZ;My`zJrA_yB+5wHBOk~f4{bE#UXZqiXRgmu79}lNwv<gH<BmCE=*R_-r=_XzQ4&F
zpLv{mpDz9_s*Jx;;7}0N;oZU>A}45QXP>azm8GiU(wP(w?(aWZ_tyQmdQ1QOSJ%h3
zZr@lB@rpSwDPQq`XR?pkuaX^hFWETR*)PA?pXAkfzxsW66X%=U`M3Ggc3o?76FPrE
z;oNgsmN$2-HdWv5aOf*+a&oxu`?J!ccoJj3)4}}p0iTl2$X_o0eQJ4zciqMih0-Kb
zec_-V-bU}Q*GFVH-@Pv9()iy;b)(#h<xEp$_uS-)%UB>28YlYj$djK(xL=!9XC9j*
zVfp0qFCVeV76)sd&DWXFt3NsW(zJJ%U+xl9dtd#lOE7C?Qm2UFr;3*qiq>r&Hv8t6
zNwr_vy>DNK?Dy@(GtYPU{5I!}shX}4@y!0sEc53oQ|1Whi0wP4dcRWs*U^CJxV>t!
z|2EbOEDc=LC28gHvd!R-M*j-o`AqB?|DSLEtI+?Dlhc0xg12oS)V{HN?mIPk-~TI3
z%+6^R@qe4@o=O<8u}M!B%v!}_skfuJOT2pT_dJbr|K5E2{6DVkitGEmZ<g}hm(N<!
z?VUX7)wc?jeW!HxKVNTOX}7m_?!VyYsn=J&)SSM$WA*o_eN`_-uV08M6=v+&%3d&q
z=U^J!^(UTk(v=5AUwo)8S-S4`m;LV7S~FU<B$>#}w|{7)5|i-dG3Wf+Z(V=e*W?%T
zn0Y;yu-a|hWB7M<yUq(90lN>W*(|fC9A2{X=J$yGmf;`QzQ0)Yw`h<5?w7J!?^P>p
z@-MyO;Ohxy2{CI_$SRq}GG|)VV~zhCGmbhQ?-Spmx|_de@74a?Eo*+8Tou21d+E1*
zLIOu$MBnS{3BP~Q<<p;K6Jk|Z7ay)vbeMMYtm~IJ?jr?<<|KzKYMiUKg8x|N-{>x}
zAdceDj}za2OSZk=a{OM*9QU*D<60i>_TS7={d3p;Nl&(_widJ9J13U1NA5dc)eZTl
zEYX~&zfS)C>y2SA^Sa9?r|odbW_TR4#Ouqf!pJ=tM^5(DHz{o}yx>r`^PbsXhrT;K
zm!Ew9y|Lx)*|W3ri&mW5|Ip%W{SKq+x6~r-Y(GWt-2G`|{_&|n!;T%lUD(Wi=PvSf
zYfH#}zR=e9R?{hkKk>V7G=yv{GLKTy4SH_zU&e5;UirR*x>{@(cmGuve9JxS?620&
z3-i{@`npnmmD=v7kIG(3xV_(N6<NFPN=Ru`vQwiqSD!zBP3Zl-->P0lF4)rk^X~hd
z>%*4yXFu5;s<VIlbFCjQpV^AVUQg<IYGkZz{VVzI?{bbAJKxN`C!-k}vbc8NS%-!E
zT%XqTYs7uteC(d+y4w6tFaCw+hHd)VV!7s4yWFF=)$cCsKChngwd_c7Pcu(+|4;w;
zxo+uiBcAe3o8?hh&yw%{{`R}yEUk7`Zv;+w?%Vg(&MCQnRb}<}E7#AjSn1^Q_(Gq6
zUe!-K`+En4Z700@Z{sLjv!z}-Fk<(<@2lS|diQss&;R|M>z&J=>{<Hy<38!wOl$u=
z^Q@ml+Rax|@mL&GrTyl*|NX>s@7~W7|9*9?`0v;1yVhOMXw#hIpZhLPrv6NeQ|TJ^
z!x3#y@809|toR(eMK+`EP;IvAK1=0EPd<KC`kZBcVq(&}_x(qU@>-8xzGfW$;$P^K
zMHQuP^*VDtoct92%<I^G)A#F(Km2z5XY3#S>U8V-3s+vRQZHPwe801GZguXT?($Ec
zel0Bcz3`UZ+xWY2f0NeE|5~+Bv`(Xcva5vAn&S7TUA%AY{8||@=Xz`w#}bpQ`}>nv
zc7Cs8U|{>@ILlx76l>{`za{(akJhz*|GQfL|Nr|(b>g!{tIpId-yg9*V%_WO`@`4A
zEV&!~wYYiy>;G|oQ{(m)*S)^G-h11w(;xRc^vv0kKUF+##y;!!dLiBK#Lc3V3%B|2
z`0ZgF?Cso9AHRD3`rr@W*4EZ-z4f&y_TSd9<(?K(->3g_d!I33&S&oW3%WC!8f}jB
zIUdj17n|E!sLG(Ey;t>LUDRp*c=_+&WO}E^y?#04w4dvS^DR98pILr+75+1!Iq2{S
z`#pD7yo-DN(zg2l?Ah;}D*vBc{yD_W+3wr+Io*#pJ^Ii!r(-7L%-+-dcecO&a?EA2
z$-WMklb>#%{MPzi>-QX8mhNRqYSVaYcCMKoaK}fgu-eMHyldh)?U&;B<QJV=y?f2P
zNxt^878D-Ub>6!FiNxpiudhD&Ue@xx^GCGvt}vf@(feNQ-1$%GZ&c@l?pNF`ZgKuo
zYASX)d_VbMYnZ3H<}!ixPN_P7*M(?i1pV3i>iLOvvG?^JO*Bk-tXuH>)|TnJZhsf@
z3w;{1Wcv&6`#Rh9-p{&JX#3MQX71TdWgKVE9$mS7;otjZSEk?bGc$9Fi^yuYvb8{O
znNXtkx!CMapLlCG{;a>?C$vL8IdLHy!<Iz7hF<|U%P#ELe^7s0)Z`w)jT0vc=~^6)
zZa=0v@vqX4*`2)q;}&d8vv>ORw(7&ay6L;a*Z#`Xe-<tGddBn5vy+wDS@`!Qzs@O~
zo&B#dZTr~{zBjwB*FW04;>KiIANP+-?PRJRoQdV{jNi=jDC*jtnzl*(?{^6*e4Avo
zTy;udhnJ?gj{IC(o?UD|wyUP6D?B~@IdNum*6l}E`%IcYIrQ0`<kk&Uw>xCExY~Am
zrN`%+FUz$bt?*X9C4Tj^{(4`%d%41Dfr3?+<}&fh%;Zx&lD=TFEYFAd)6I|70(LT`
z)rZ{+a^#a0=KZtr*TX4S|L;E}r}}Gk{h1^4KX(>B50rEI7qsuw|49!81&(pV-aqGY
zulLvs!(|QX*ER=7IfSn7wZ4>`R3n|$>v`~T<;t3`f8_*T%1nE|Y@7P_K-)V8i_eHB
zJ>9(^U*Vv<-pb-bOZ$JX))QR3`A==;vfTfUQ7YRcxa)#A>=(7E7t8;zy}Af=J>aXo
z)zhM1FZtObGO@?W_{RNPEO%Ab))u|GU;9|^R*X6KqZJydUR58~pI8`KxHQq}a@36L
zEIGYh_P5nG{gv$tbSu1BG3$%*#y0{}AD>WPzd7IU;G9km+j}P}d#=7#Ybl$x!>de3
zWx|>*vKa}-W4sg2zVdzfm#^~m+ja@=8*^JL3~%jgo;9VA@vG+K%m3U@e*Lc}_$9t>
zh5!EG<wvgzufDeH_on}$J))*+(<fUhSMXG6?tTAYmVCBxjd!iMZKhD7jgxn{hWeGn
zHEK~4S^oY|{`pXU{n^>P97}#aU$g09@&2;PPqRPO{GR<I=a1^6?|Kur3(R`WC-(mR
z<yo_ulazOs9Dlo5(s=Ui|1<5~U*E0T`0vTC)~xmOr|(_g((2>I|FZm6cWYJ2r`5Xo
zyZA3nQvYAO`pxT2vEqBXZl2n|nNO{LcJ-~<NqS}%-YaZZ-l^~{`obp@lZ&r8;?>uN
zPQJE!@2w7PzL>U)|1a!aF1w*-Y0HM-+SKRk7b_`FvVW=<t-IRw{q<0>y}yp$y8c@0
z(ci3fKWn`1Ec={s_EVkoG+`zt^EaD&JWO_YpRls3QuT@S%l~5i_~E9ux9PXPB(Idx
z|IaZY&?agAuG9+)D=zEJ{V#f@K$qo)W&Tgw-CIs&&pMt}r7$Vsv-@vl{j){)!``cx
zpMJC=`jzodz1ZlDxAz|r`S8NIXP?{mAB%Q;c=7jsM7qSy_UqTbU!QPv!I||R<}Grb
zCg*YK{weM^Y^LrvuC%NTolt0d|LsR9o+o`?%zA-7!OMStQ4X)M-f7Ff?ECSS;~#~d
zeV3Oo`Bb&t>1*9y!ItmB_iJ`nb8@LZZ+-bkJ?~@5tP=`Gm6`WfeL6pXzrL>4<&&>p
z$4;r9ZdJA=FED!FO$m`w<CE{M`LEtI_gqus%E;O4Cm-6QdcWBIpn%&Y&+H@r;(mYG
zzW4oW|JHVvnfq(@HI)Z1>78UL+9#y><@DbD4m;NUegCk0^{OwWlP_<N4Sp`Llb1)Z
zUhmuVjm)v%C)`_fM@cL2adcavOU*au?BEqjhu>^Jyrs6fL-A%!Sj;z$_0^x=DIHXM
zsr_P4Q-0Z76UC=HJ)hTX|KfD~_Mds4`(K9i<?L@roW@f7chSztH}{{9uSr<&{magx
zDzo1)yERSbeLpG7CGuwbr>ygHEEWG+dEeW*Gwu8ax7bVF-gi&l6>9x{<f^*Y-iZBM
z-$n1cJ+V@JVotd8jHkDED801i+57$I#_x4;=3ifyOnCnK&D?u;F5UfFFYLVd;{I26
ze@pEQ3%xVb()L6DxBsiI&wui>(&qokyVruReixYk^#B6{%Vft{{*zy^mcQ`sU4MSo
zx^;WAJof)y{cdmZ{k7$`r)#gCEc)N^Q++>svd@3r8!DR*yfrOfp`m<f(bo^w4$tqe
z`DW>IJm@c<`}?x3cl&<UE%{V+J!oUtR^6+@am&2Azw-(Ry{vqG_kNni{oX98#kKuY
zmG{keIjsKD{^iot-F7d^J`2Tf4Xx`AuXy_9a_s7<x{kcb|9pB&ul>(BbH(rBuJ=!V
zPLE$HdSdqM>wB-Rx^mX#)Bm{daqpBqRn+bCy>9M3W#4k|!`3@a$KCVp6U?f3wkLIQ
zQAOPCa4Vs&(iZDi#wlIX-nG@rds0$C{j3Xf>pJgDm?*xpH(q^e%HMA-+w0fIuAZk|
zz4}Pho@rZk*Ozgz6sMcHo?0rtQ@MHZOMTb!M?FuMPv)Gar)F_Ve|CVmP3g`0fA4qg
zeX=&TO7n8||2bV|8BrPk!**0n>#y-xQ8{bo&)3_<_l3*f^`DV(dtt4hcZ~lL&#Zf%
zE^1;fZoiv6ZiVe||7XW}BD?ZX;mP>$hZFPO$4~A4b8L^*vL}D;AB@>j@JelFocjH$
zxx$j2{YOf8E}x$J`_+}!Y4Nq|)^xn<^=p~A{BCN1yY2T=IxffB8jmg97VVkxIlef#
zp<(|=SL1fq^LJlgS6{z>>#IK|J~G`qe>^?EVPcNVm)+NA%-*}JwXj=tdAm^Kz1EtP
z<8o_H+D_?P<IenYjpqL;f}P15_wBvf^)>rQ_R<&K@1{RG=Au)0(EjB6-%Zlz7K=aZ
zcs|{vX1RrD?Vf+V&S9(LkDZr`I}>xUYP-3yyxZP;dpCyfYk3-9T6*ovNxiwBn{0b_
z?!0BaKho`U?DxsHu7CS2P$}1Aztd|&<d(V{?e(?a&vpHsuthU`=N;pa8IJS*gx2#(
zrlhZYZ~cGr<n#ac#RmtSpYNLBJ$u$)&NcIm|0;d2{d_4~;W+D@*!%k1UvZo~?sB+q
z#g6LQChNm2Gw%NElJ5S#Cy;M>eXz7u8}m;uk<0_;7iBeB&GXkS|8wYR@(nfP_8_C@
zic|Iq%-*;AdB+#i50kDh{~fU|F3Rhd>8cy&mYt1XynD7tT7T==`}ZFf9kTO3nlSBE
zu!Q-kpYdPwR95MIy#IfN;+~rR=Gv|6CkRT^|7*BE*~~|trQYh{&Vt~o<V7kA8~VAM
zzW&;>eR*5I=<_#uJZx^pJ(uJJRzzN@IX=7o`L+Ge%{M-M%In;fw&H)pvHAb1{y8@D
zs5k!-`(9YQarUlzb5>8j`gP5Gk=@_V-&l0x-kX09W_^FS@>7>y==&)?Z(65r?F=a^
z;!>A(T<KHyo@JqU`KJvbS6*+K6LIF|vG#!4b2X(a4f@ZomfL<z|MB)+B~Q<tx8yR=
z{O|NSw$r%SkU!Y8!{+aIY2kbR>GfuuzeHVji4~exM%>~l$|`7#uRi;}{Ka0+diPh?
zmVbS!(S6U~H9mNG?f26+ZSE(v-us<*`hR4r?SJQ@PWA17y;puQ?2F}ZQd+p&_wY{6
z$3m;V+S}XpKYzd9e){+7koShN-`!5E&3b*|-<JJ%<JMpOUOoT)&G4PCeuI`)-dptV
z_uqu0d3&SVfB*HE`g&^BmkGZ5207FB>&EQdbXE2HJw^?YDH@`jtSpBdH6jW`!259|
zK)bJ6k0=|gu6p|RukHHsGj}?pW*Ubq=2QC>KjHbN52fG!e6-J>8|9o|^z*avgW{7{
zi(ghf{Z+{QgKbUUt4SM+-`+5C{BCa}Z|&l*za!s2qf2n9TvXqs_BVnKRa2WEZRNXj
zKQ{j0tvBzbzi8T=?Rnkjaxi_<{^_;qOBJ{{-5gfN-fFrZe*WHcm8G|?Jbt*xfA3AF
z_cbMLi?^InQaZGK){3czH>Lg(5LGD4;rS!p8MdaSVQ>7S4}0!)7&17C2<UCuChsxX
zc4@-{&_raG2q!BuW5NQ^oE#fdW0FFIh8kBZ*AasiE`6c9@1MV4z3a@Q9Zv<L)-Kho
z@je^zWoP|_$EWxG{<rk6NWtOyGc|S<);hZC{~zhkn{~0a(&8pl>HAILmuoJ%-H1BH
zP^K^`^}A<9rvabHd5e4NDyn|WdRw}(mFHOPojEBJ`)c=vY~$KD<I<6)yi9?$g%_4?
zRgt;X_ViOq<hoaStGDFv@OVf_yF}WRINkrb@cqm=*Vv9+W9ru|ozt8!t%}2O;u$60
zFaf=HB5eF`WEvm*F0W}3^YW+-6W*!v>UOFhS2N`BaUm|2mW7ERkMXp!GCFPuNE0z;
zb$KW>y?x!ke=X16tKK`k>!@LIpt|#cg3phwYvOHwyqxdHdyD_gU*SuCF38O7pX{4e
zuYdZ&0V$UkN4n#_&M`Ns`MFwUa;b;fodVW%#pl1=IdQq<fWqD~d6pk75f9>Lnojr;
zx;OH5<tp9>@e&T}cEm|7ztfnP-NpWUPuZ2r4|v0mh6imq@ksD!<P5!lKJnQ5;rFlK
zUcof6&a3sufknIZCORmq6&+n2!Xy(A#<V5j0z2qrY84S3R;Gmo3nIBxlv<e<I&7Gr
zC(7O8^)Al%{`&r{k4=`XpDC4GwDQPAD>?P=n;zZe-}=$i|NrNTW9v^a{ry<?o$Z^X
zdCjW5+m23&KI3M!E!f}mOv?UKy#YdTM{mD;diP*WAfK$tEzQYycZb(2Z@qb9O7o?f
zv%&KN!%u$-v{3O4-yUaBkj49JkLcm2+4l>IgrXLCrEGo8v7|cRKx9jrBJ;D#ty}-T
zzQgJHQSUfYc|qy!It85_`)*9GKW)VBzIg(p_oWFND|arpRK4P7vcMbcrzt8XGOR2Q
z9X13QhzN7C9CTdaqan)0+SVAz8F`-f^W#<P^J`PomT`th8)XNoHE(vmygfGW_qQ3p
zZa?q(bL+dAXJu8|)!129J0AVF`}_9)_vp{uS$%Ek>2f#LY~RBmRl>)`{p5w@-}i4t
zr0<tC_?m8bdQ)oU=bf?3!!NymI;~yT(>Eo{bbjs2Vs_Ec_)Rl3=IJmb>4Yg=;xfu`
zGGfy8_Tqi`Y?6oJ9+nByizhu{`~PnBnvlN_3+p%7R<2dGT>gnsb=fq@cHKk58jtjX
zy1qonFlvc(XasSyvN9$dh%gW_YGrc_1npE6?C_bsH@#mr)c4eD-Vjd98D6h+lONYU
z+RN|u!~Xw{&-Tk@Y98*$TPPoMdc7=fdx>av-}T6ke%5~l=lXxYeZOMO>aulyzu(@v
z=Tsu-ZFcOPbH(4dt)lr7J9xT-Gx}6C^OTeXH@zyf{bjAQ@$Twq+qvEo#NIzB-<NyN
z(Y1lCY|AlrUN6yZ4$;oLs#^T7>bL&j=I4DrucGGpy7+5`hrZ?Zyh>fsdeSTW%8n(1
zM^;!cYKU}b7;&|>G!=l(dFNtvI_M}7&?d^o>7||@8NFZDziv-<)t*^-H%l!a7n{5a
zDlz-m*SzETEA{l$nCy3N{wwpxzdieLiCp*6hj+HDznj0|<Ds788@1}zyWitnZ2rMI
zf6pDp)BGJgKmV@PK2(xqdi+TJ_Ma8Yg*LoB$6?&{=3d9m(;QnbpJd*3Qr!9U?l)V{
zojW~W;_mm-lJb%l&;QoWRC%-WY4GXUEhTfN-@LS@w5CDLvTXigL2I#slS!*KD$Q8S
z#Kohi+wg#afyv%+R)7c>YkBRz*1a76qjv9)S(Rk}AvCuB>-69M>;J#{|L<>o*xT%{
zb^HJS%@VjVk43r4Z)>f^l%D5Jg?5}rrXAe<-+b}rx211=b@uI5T|IwI{JQlQ7yZq%
z<0!g&XZb#X^WKUNQrn*{|8z{I`ft)((cfYAaqFTFTCeUn{``=np6~10h~Ah#VcNf9
zy+baYd{xr%C9UfG3;%HS*B6%Ad)G8`^1bDKnOn6sX@A)9q|K&PN8|SDy}G|<QF{1#
zr>$Wv-(QPX?FnBK6kByLLg_>D-Me0`S$~#a-1SlIX;PSJ?ZUlx7H!#){$)+wnwqJv
z#TA9Bw*J~^9dB^>`|s;r&sPhjDZleQyZ35F+>%ErPvYLjn%>$K_u<?hhjX&sOwD_{
zKXF$tT=f55qTodDDf4x@lr!h-J@!jH&}f1aGq20-mv=w@Zdv|)cYM?C`_-4%nC&{q
zdH1`$o45Mve^vY9zrOOixnb`2O7j=KbzAj{{`Fp}HhKN}e4LD5{GV@}cmFB=w4L&2
zg6`>=za*#sf8Y6I)?L}x!hPklcSh(h<c}9Ds*(<hP*mO@Kga3VdHL&;K28WZ|9t)4
z`wu1rmv2&g@@&$p3-OC|9NyJOM#gdK*B#kcw0_U!?KMw+e=U~JJ(83i`Z~}~Smd~_
z%>?^3i^Ny@g{dYfKi{G|Y2CFy&HA-x3+<A9e}zw4{Y5afaG&p^@+Ta=zSVl#PqtQg
zeS2RY;8}fd&$g9y+WJ2#BNio|eO>j%E%-0z3$LGru~ovNRrj(ddw-7o{OR9DUj_!|
zdycaLrf9JiT~%LbtG{k{#JVSQlU6O;UVeA=^wQU_md(DlYx<Itt8a$I-b&wlYID`r
z&|kY&Jnh<Brhh&3^_1x<f4=Me>u{XWe{XG*=8Z(tMD??EN(X0bF?-D6@i%`>)T_hc
z`xif7Uw<=UZe_E1a^l}8^^3pO?M$xC@^+r_wrbU?iTB=Rt-CNSe6`T8U3-)MW|?1D
zH#sR$Hek;F@V8m{al1BE{lBrUdeXK1Vb;^HYi&&yJ^fm|-c|76k5gqYqC9J}_C`fk
zN7wC*+w?b1IrP;bxvQ)GIx3t!VyVCGpy+8AKIXdrme;?ur@jwQa#8X5P}^zfeM_1(
z>foL2<>u={@9m9^&j^XM{rB(ty>ka!=FYwHw`noA(Y=og7haP1%=PG+nj*{Z#gcX9
zn-}!7Ofc7#eX5-K<!a}GoHdJCwbRaL#4Xv{wKwZsmhzXl2lpRU=%{u7`g;EfXHW~T
z?e3G$uUEZ(E4r!T?+w33e(|R*>~1la-}o$PyLtUf)#KugRj<~)^YP2xZy5Wv&805m
zvy0Ono&7HFey`VE9p5qKmUFV*`+eKrZk{aoZ~uBj`O*t%vZqx~m+cEyPUc?yE>3wR
z=enIHtA$sXKT!GCW0m-A;osw@&z`JjnU}BiiMu5JOj_e@@#+6Qoo+q<_1&~ro9=zR
zCA>EC`jt=<H~+~$*`n+3EH*hW`s(c8HtWsR*V`w*{6Fd3`;Q{)@BWW-YX0Q#XF~0?
zO8qX?%gfd46MCwBbAn&}XIX6Yq&ENHk7C_kx9o|I$M|pi@73GOtK?aGech)g|9!uv
zzi_QfF1(gfUH@Q#((li&|8~iqY)w{*Ee|cN@6xl1%|9Rg=(B>YykpPbx#gXL=fgg>
zv)4T|EPl>%?|Ag;D<5NpR$qI)zFz0L=-0KOw|*J4#$SKbU46JKHS3pLP<>kbyO-}@
zuKsh#xPJMunbzVReO?Rhxt%k*D3KfZ>9gU!|0k>eG6}t1l{`sfN>}Xnv-*psYDMWc
zUB9<)&ALtdwNJhe_g#4D<j;Di@~v+9-#HVxFMjDrj?TP(?Qe8t_{!?nm;YYt|LiVb
z|NUCaw_}?+w1qbO@jw6h*Van6cmMUvf9<W(uGm|$_j}~`>tX)Y*@u;zSFK;a;aFwq
zwbM(&RZe|>9s2#pMn2v*JN-_5ewtOOBVk;9r|MIO=B0a4b|=5yk1c%>E&Xlx?ukFb
zpMQ^xTYJXvuF9(H%lE^>W3&DKpKN@x_rv_(UrlCA_nKZgeg6Be>-=9S{=dcX`uX3i
z_ve?~pM3YtrmwGGpH^R6@krly{`1-&)9cp0j99wgWy;*eJAduo>Lnk1clFL!Go$xk
z*R)=9|7DGm_kR6p|L;05Ft9K=&I%OqVy*S?{<rsIt<m=*f9C(c*!6#Xjq<PWhhk*D
zt$$Ph>VN$ATi^f3n*aaf|I@Dicl`fb%c4ZyitpnH-^=}T_owUTFXpP-%rBVK<j=8B
z?~b%VP21k>M}Fi?W%u0p+M)j2t_ekV{#MI=I$j^{b@%(Lz4x<@t$)$_>ZWMt+th;>
zZT`ev)j!Rm*0M)>HB;^FBM)sK{_X4Z-XCTkHUDb=((<KoYb*cG37Q@CXM41}kWSY_
z;eR_lWA2IP<#AruWYw+OUi#PS%DnY=gYK^he=Yu1_|CjS?zcVX{wj%mDn9*pb@HzI
zN2U?Gr(eDI_x|1MTHjyatbe_3YV+6k*H``eRQdn<+M1d5|Gq~0zMh)eEqW|Yj_><v
zr?=-et~%bcXO&^gxud`K%=`bfH6uTM^|$rW>&r{^f~y|J?fNnQn%;Z;|Hu1Qnv`9V
zda7{qDd%zTdB2Tkd0moy604Mc_g207(R%%bwMqLYFZocUxBvP2xYg5YXScJeO!*Ki
z`Txx(j@upEcK!M4QYvKs_M}iydX)L(2m8OIZWgwgH+`RH$Be!`UoS~#{@(Djoc)xn
z_OuJGC+6HQe^LGCp7hVk`hPFx>8*>|D?KOa!`rWa{hQYFnRK~vGEZw&Zeeeo<|O`b
z^6@{qjh{8V?+3lgFtWPwD)B?Z^v5OFb%dYnPg|q%JLr!@P+v#)a%qRfCt~a$Ir7_y
z_1vC)bo$9Rp^Baj%^|fNr|Yt$x@J9DpY&_uuZ{ORC#U?hTb~iJX5Y7%=c_lZe=EMJ
zqRz9j`o@lonLe?>$2{gwnRWJC)kK#MzovxQH0@+PWi9@9-sRZ0*VXT@+Qhpl$mMU7
zR>bd!m4D~F|M+qH+<ITzFaH<qfBmB6a?`>~|2SqIvh!b_*|%F*L&{ENo22>aC*S`%
zy8OJ;sllbo?X5L$-OT-^<@yOd(f3Y2>)ZG02$%N~uiBO230=QmZCxg|XP5oeQyz<z
z|6SYr`E!$>Oorc|NAI6&%(<j+WX?Tx)qnj{e`#C4-*?e^PSawChMzV<-@e6tubD6Z
zODM1N`@MbnrS}(Ik}WG;w8ku>*5}oxjelqORLC*EiFpz~PiW)5R|1)~+?h#FoTneZ
zXlM8D*ZRY5nu}Yv%J2NY^6uIB>$f=ZY%vjFciD3!^joarpItUCTmGC$eZP@4+jOqm
zr|NxA)(6_|SpUxbOWW6&33DzlypgwGz3q0&oxP!@dj9PH_bq?EYwhklb(N1S-~WHK
z^nAH=UCVs+XZ!zJpQ_&W)BbGr_mx#!!p&Y!{{8>2^yly8Z{z;{>${j4dh+?@zjd3I
z?fd&@-k-TUx3AdHa9Mxa#h<TURhl}={fe6;yn1p}_k?-x|7`!Zwfz3<;Opx|T`sKj
zo~Nf89~YawJN)~vDF%NfpMIVC<a+}H1JfhNS%FisSZlvezWi(9#IpTy?{{s@XPw&p
z|M&Yvmj8c>?tb|E>+4^Clm5ou+Mab^z5dC!|A!<`7-s(aTfM(yeaw=t>Di&JTbcgt
z5&cwEq1ZIhYR!J$AELcC+r1B7I$i4D_rCA+T1VBV_g}qw)%iE8`~UaX(~q9iw-Pz`
zUHJR$PseL(zqRaY)$NO0`ZBNR`>msQKWk5|y?gcZ^@YBxzm?vv{noZOURYOX&Blne
zQ9D)FRq9XQyZ>pqqIvDrg)>91W<;yp&9c9~zH8R+z4?;@?|Ezw{(d4hW_xAn{?}J#
zTq@U#lWu$Ze&73jyT87lymN2d->T?GSNr1CHaOpTfBt^$_xroP#ztL<7Y@I*`D^9o
zihr*_`!Sq%pM3DM!Y<f*&b9WVckds)d$z{kuJ?DMUi`k_Z;mdSeer_-EV=r9zkjV-
z_x{4oc=2%OnK3IP_e_1zek!3U=(&AW$(B{`yf5CbbG?4Q_WSjdpYJaGv_`Ujg=^C4
zUopF<+y7EOu;ld&RmryBZ*Cr9IQ{F|F0Eb99aX>Ibl>}Eqe;YS^RKTrSJt*ozHgtS
z@a;eEbTQT#^`xVJTXl0TO+P5Iq|5te@s&rGoBZEidj75^l>gNGODpEZ3oftg{`S+!
zGk*2D&X@1v{A2d&wt8N-(%W}VIW=eci91~r*X!GQxJ>BT*Yx#C)c>P@Ui<1sGWm4#
z>#q72yZ`5@_w)Lf*b1Gw`Rb2FW`6Xt`@3(4?hK!=zUcph7N2QN)!#$zUbBz=8Jq7@
zwe$Dulds=TDK&~){od_f{C~s3+q_9@PcGcED=uFA{)9Q|RaVmXS6^4!e`mg5wTOsC
zg8AKbzu!EMKW6;(W$Cw7MMdd?xu^bCuY6XR_GOOi`Mch6O*$QuPX1nJ`g-Zb>!ISW
z_rG|^`X%CFi@$ZryzGbT_Y15xKee-bY3;(dpTpOd3r~M~ZoApGADXVVi}ziRviO=_
z7ap<waQyb4O>_4DUobnYzJ8t1uV2qU&i9S0nzH4lY0&{jC6OsAB7&?=je!OUS`yrB
zObZ<~1++wjS`IY^E(lR7UuRTYx8?3<<J)=WYp-p3mG>!ahv0?s+HL>km(P9`A9;JL
z*xu^%c{(<;4`f*_i~lLKUo88(QRchjYqr%G*BL!{WG?IbCHU#I>WFofb9a8>dVF@$
zN^!HlZ(apA^Bqq=vuEC}dfAt*x&Jg5+?abr;3>Pk{QQU)ah8SKGA*CaV~)FbL#uz?
zk3+f}Cz`!q^v-4(LrB`8V|jr}7gbHximVL!Ryp;Y&YOmt0R@a&B2zS!q*@L&CMu+8
zh;y+VYBpGKMMH?QmC4azMSzx|)heSsmG^$U`@AYD@5q{|Mn9SgWGv+iA76d<{onq=
zYpUfRrrUAb`fu6K_+$C+lKk%>_VJHi#_eDIGxf#4XGUi2&-GqTR@eT%{iNEKwyM`L
zes5LJuble#Vx!WLiY4>Py?^u1Iv!&l@HuLS#-Db5gE_r-QgRimrfhvQohQD{Z_)Z!
z+pAtPufBbn_g&os7SrVkP4g2?^j2;c5cBWgn-J(`0P?_u77=0CNP%W+Q&ZrADH_Hc
zbETfz{&;TnaqY*TsL86&ob@|Btz_bxAKA(C-CyzE&-J2A|BKK>|9SuTT~>U$skqbp
z@TuD7df)Jr|8BS2+drn5nc8ofn$o%N!H2@PE9#gI+&Xil*5s(~opT<K_Hb-2Jsf6r
z?(iAbE6eW$|H+)X^Zw+W^Xmn+*0BUjzV=#kEdQ<L#McFJtAF2(n>*o~?nnEZRRyYN
zE4#OA83h?T^eP&xnm&W)&G}6S9x!TzOh^%tW@S3uupvQ1O^CCpB~UR$seFFrx;yuZ
z-@fjvST5?(>3hmgDCEN1KYYIbp5MLi@}yesqwW7+q2Bw7O~s!6(a(Nxqj3GgN?Etn
z;Ty`sH*3#nKD2~!P2rwI&Rp3EN8}ePpU?c_!#(Sn3eSo|vU}fe4OqG>y~F%&;gy4W
z-D`7p$9^n0ICcKo6^E_`?sLz3cSTHZzwKA`UqARBc3m;osSN#c&T*c@ES^NkAeZby
zN!Is;th;q@c<g_{&L}tYpR|U~m9DaBGxla!`$-EeW=vL)U}KrQxRiZu>HHF3&5dhq
zgMJ!)^T|9MG3~I=B&+RV_cv%qW%PV0lP~Mr_V)eNu7jc1`VVe+&t0u2{d6YlCe^ih
z5n{#{iWo$=Z4PdmK3CId(xh1jT+VqnF+S8Q3Q3e$ez_$0&w>eBHx8?QJ~m_0!n4O*
zB=((td-qhZxoK%rRm|i$1>&5mH$J$#hrN=!GpzLHjN<t`hV|~XllLh+lQHHoZjn04
z-=3Rs{IF<HblBYQAN;fazEIVwtNs1%`+);C6Kn+ag+AI9Ncb_TU74aG#Kr2=l(1ld
zhB!AHQ)9w{2n|s#k*3yw1tCgFo7cy@U-j&J^^a$B!``K=3Q_m)V_v-a-SfVG=7;C=
z6|cG0w~AqH;IeP~-c<OMEX{Df&c5$L{LR1Y6JOZyU3k%C&G~bs;Ftdrq853FPE4p;
zX_+6pB*5VH<yhhCnfs@Ik(bPo>YZO_D8ckk&gUPyZ^5f2Pek7Zdo13+Xx0Tc`?I-u
z_AkFR=pPO#-<~z?s!f!2L4DSojIHeJR9z>n40)=u*mRA^UB@8Ln4<|6j0Pc7G*m(3
zPX?edLT*;@Sha^L7fVY^!h$PKX`4Lnms?&ppA(oG>w7X(IDESH!n4Qs_#Ia$(2HMl
zYtPYxcLls3^}e~gIQ~y%eBQ@-X5u1$J5Tw34-7t?%p&!hD{+on$?w%Af7gAE>Z~cd
zu=}@IpR66Tn8a~Khus^0$Is*ND`hdw$gwTYebS&2m8TNgqGYc8Ux}yIZrTz>k+k<J
z&G%RT=rx@`|L1Xzd$H#;zZbr37Gl=Ou}@JJ-Owkd7h2+ROMv^<cLk|;d<+as>5j94
zM7mh(J^aF7$Nt+}x3z3z*sJ$fCs*w+x^Ew@wl)0L-p}5z@0+hWu6KRc>T97VKUMUf
zjG6sQ<ISX>Iz=46+J!CbBX(Xh5A%=z`hNEQ&iPl*PmQUKSvB?PpZ)hIBq;v3+r?sG
z*mS-4*Qet}zqja~e0Q|Grnltk>q|LzL%%+k*f+6!|Ne_|KC`|EzWVxZ^~#9X$A!-w
z*I8*ATqwWV^W>-hmA2PjzgijjWv|(5@mI&QBc6XPuG<@P>b{`hYpuIgewAj6oV<&F
zU47j7C12^_CyBD^jD#2emtA_dqwh())mQHr*Eds|@^`5p%f3JDkjs&{CyNC9^saMS
z?o--u&ilputM^*23w{(el(F#VRJ$I%Yu`)J*x0Lg1@9mI99ncS=I{0QPa-SpcGjMZ
zFWYHfzgvB6b{a>~uX_Kx-zVQqc^8-W?oZubmeiW*fAyZLd+ja1Z+rIC-JgGNIBdUb
zUh`j|{cG%>NvDOiO|O1B^8Nm@f~NoSze?tY*EQMJhhL7hy|1$_|NZp&`+mPyZ{K}q
zs`q`HeS6DQ_BmZWd)MmUuiq?<?)%e?iauT0yKr4r-Otyv_PyoqdcSx7{i4&S&)?sE
zZ|?rolMmLful;_%bh}#U;q67BHj}=-y}s_%`x(0HYFqw(zh7ng{@4%Y_dR|t&p&s*
z^LL!J9<=EH`y=lKOI_X<=P%FOwfp_P*c1C}_IxepQ{VLI(?|2ycTa~}?0d3)`u^0_
zj(j(N+wc4RZqciEb&p<jpZi%^{eIWJAG1z6n*YsydQ0Hn-tXs6-`CG(@BCA~YDZ|<
z)?e3k)t}4`w_Z6lc3~yw=l|x59DhA6`urI*?pf$KD`?6p*7}8p-t}J3U&l*_>eg-j
z^)znnyRCKW!q>X|+HvdE--5r{J8iR9uT6UYY}fVO5!Tbsr5@ZD|2|T0{TJ0$|HJo(
zPY&(g`g-e+JF`~XUemrJ(9>)Be)40N|54Y!R(;W3vNiwu?#Sd_nYKymQdfOlw|i^d
z)0NeC<J^B9)1MynYmr6O-K%#Obbnj-Jp5PO(LC*|@2@`p)MD{EX!7fS@0Kb3aukye
ze?NVH+WM%!**kW=e6{J+)+q6!Yp++Y-`ZDL`Z~1s`nsUcq3Y`=$9@XU4!?BuyuRt>
z_|<oggs+x2J!IYa`Y6k~#?78urt5d^G*r69d3yJLv7gtMe$D&y>*S>O@#lo%r03TE
z`TFyOztZxKuNwPRu1$M6!S`UT*F5XGo#AU;Qr^t^TfXi0zdr}HWbVJdx_)xi*^S0A
z6U963`Ruyibz#v_tuMN&L2~<QHRs1nG=HI9H+z+L)Vd}A)xKK<dUK{--?=XD%dW_p
z$KgNn8CNWE63UI`u6|UR@+IuurYE~zpIm<b?UgCJel1wG-@>&%<C|}p?mP8K?=L=@
z^hzys`<2e{=wCN$uLrkI4cmYHD)-?l|Bm&FX4v`HU%&Hc!<EnfE!3q9-ydUqy+2)G
zzt-i$Mn~>Mz1<&t<&}~4SJPkGo~Bis6jB$l-c+3=KkNU+jQLgn{Oi5HJ=#~kEAIKa
z$IIud{Z&lPm5pC?de;)CdrHE~pL~k*y=A-p>h4vZa<5gcuRN~5e75`j(pA>;_9_WI
z4dnW}-qSNOHud+_D_>2Q>Xq-TtpB(E_PbZp|3~cEcYVh^z4g<wCrw+jTD@Gw_0g`~
zi+0cZz1yvJ=b4=~vpfI0*1hj|w)<TMqYh}dvR}YPrzM`#wXW=0`RvfN#9bnJ-H*&W
zzI)9RG%{Gd<5GES%F9EYLXB2GjVB1be;>Nsb$Q7LsUwCR_5bsOPi3uK=Baz!=nSu&
z{hXVgElDD3YD%SVYx2#XzpSvjqZ=zLu{(Kl-}zd>k|0&@KfG$%J}<sZXH5ESw1P$9
z-kSFO@ZRO$7*&&Z+;MFECzEhh?eEW9m&FR^Zoj&mVf_mG#*ae3IQ*Z-J!JPxTj>7#
z>+7of^QO(8t8(3`@6DPXt_k(uPNg)@$v3q1U<`cqc(T*t1a?M=ng611x`k|7qP2Wp
z``+3o4E%=wJs9}E#Fa4ccmAKiATRx2fx-0e_dm4?3=9ml`yC(unZUrn@LT`w|KmLj
z3=Fd2Z||Bvz58B-0R;XFKQQ?g0b()ydwc1B2?K-NopLajfq}V!fq}O<H$T6Ofq@|>
zw=5$CBys%IyXD7Ey<5S+7(ao5eQ5&&BLfJ5q#&3PLcvKiz5`qWD}$MVfk7fOKcyHX
zE6~u;z|P?R|382Ie+C9Nu9Bj}Yz78~n38O;TNvSPfjNPFW&lVZ3P!UZ)v*i=AonmC
zFfcHJ7!0h-a#J!B85kH8a#O%|Fu?5q`*&tQ=n=5#iWw<6MPL>K14CJ6N-B&ErCDQh
zGxO3I7#Kv#a=`{6J3}P}A}5nllnPP<(gTv`DJ{wY#Tp|61FLLtNm3321H<a#l45Yc
zGeA5!JFzU;5TpUhhcY1)lL4axg9ifx7=wfu`MknH^c};T6`b=^GV{_wVyIZ+|9=Js
z2IIuCWM@XF19JZ#GDxR89gv%+*eDU~>)_xJz`(%FAi%)D%zWZw(mO^5){Jv(r$LSs
zO3u$I%1h2kW?%$E1_p`Zk`i#dL7ldYfq{Dm0|V2V;*#PjBwLp;FodZyFmNj{FtF}n
zU|@a3z`%Ntfq})Gfq{jcfq{7&0|T2Y0|Rq00|ToN0|T=f0|V1b1_tJ81_ov?1_tJA
z1_q{W3=GUZ3=B+F3=B-i85o%K7#Ntv7#Ns9YU>#o7~e23Fur17VEoF!!1$Vhf$<ju
z1LJcB2F5=O42*Xf7#NQ+FfguTU|>AKz`(eZfq`)X0|Vn^1_s6+1_s7k3=E8i7#LVD
zFfg$1U|?XYVqoB0$-uy+!@$5*!@$6^nSp_`hk=1Jh=GBJg@J+hAOiz`2?K+G2Lpp3
z8v}ztA_D_YG6MsTAp--q69WSeF9QR=5d#BPH3I`LKLZ2T0|o{zJ_ZKvYz7975(Wky
zGX@6EXABH%y$lR2KNuL;yBHYQ4>B-ter8}`KhD6wb((>J>k<P4H%P4p0|VDA1_qu4
z1_s_-1_mBG1_u5=3=Dit3=G_r3=F(a7#Mg}7#O(385jgtF)#?FGBEH~GcX7(U|<lw
z!N4Hs$-uxD$-p2W%D^Dj$iTq4g@J+XH3I|ddj<y9E(QkX>kJGmybKJ?yBQc*vKbhd
z9T*sx4Hy`h?lCYhuVY|fJ<q_vlEc8jn#sVx%D}+Dwvd5=*_45SMT&ue`4<BNn=Jza
z>oo=jmJ9|4CJ?=qfr05J0|Qe%0|S#U0|Qen0|Rpp0|OHq0|OHy0|R?K)NasOG+Yb}
zOfwl6m}MCl*pD(Wu&!ZXV5wkW;3#BZ;F!t4!0{47vlc_bg~OJCfjg3cfy<16fo&H9
z1M5!)26jD&e^`z&FtC8c(ij+63>g^Mx)>PP&NDEu{bOKYna;q#<jlap{D^^p1>}xr
z3=B-5G*!yLz@SlFQk)D;t4z>z%mSsg7#J8}VmpdUlJlYJK;kPH7>b`UFm!)mU|7Pz
zz_2}qf#Jkb28Qc57#N-`Wng#{&%p2_o`I3Yk%3YEFau*?D+6Oo9RuT;WeiNr2N;-a
zf*6?6N*S2CWEhyXEMs7Lp2one%*enTyM}>z-gE}$S1AlEYFrE~c9{$;KK~h5l5aAw
z)HyS-%$Uu<vUVi{%f9anEZ3(nu)NJ-VCA!BVAVazz#931fwj7cfpw}j1MB8n46MgM
zxnHEXq!>Amf{L9@#VI8PAiuD<3<bczz*dJ=6EMR|X-Et)fNLg@m!KHrP4to<oe!=6
zpz0YI*jRAZ1Pr?y7+6FYM2~|Nqtpb&`K5VaQ3S!d8(I?x6hmqPm=rO!Ks~e;NJpy$
z!gC8uK-Cn;EU@1|3<gF}LCpYi9*Aq4S`4X1nHw4!lo%KoSQ;7{L>xpJ7$q1OQ$H{;
zx@5C9G&C>?cCa-xG%$ivHd+bJ$jZO~O8hLKT!B=AgTxuWGcYjRW?*1=$iTqxl!1Za
zB?AM)3kC*;4-5<p9~l@J-ZL;T++bi}c*elM@Ej`tnt_4g6;vHaA4vQ<0|Ub?1_p+k
zQ1d|Myn~tvQUlTtG9P5tT?PgQ5WWgE2gJU>z`$?~YW^Njg~`Ccu$h5@VJiaz1Bic)
zfq`KQ0|Uc0s5pq;&cML1pMik^M4x3~U^vLYz_5#ffnhmR?M4O$hJ6eS3?Q>X;vjQC
z_5Lvi28PoN3=AOlVFm_<6ATOtm!KHrhOZ0^48Ir{7``zufWz|-0|NsHgZu;X2gvUr
z3<{@v3=9nSp<(llfq~&a0|O(d^#jt&0cHPXU;u?R$ShDSGm0>PB9DRLFQ`Swz`!U7
zm1kvOVB}(8U<4`p%)kJSo9_@lNH4<=1_m$&#mQR+28PEB3=D4=7#JQw<Khkj1H%Ib
z1_qE`kefi^ex8AW0Tgzia0k(#)D8+OP?)WU`W+N^ptyO$z`y_sn~Mw#44}9HiGlQi
z!Uq(Nps)so8z`(mVy74w7(ijXn1O)-WG={lQ2c}96J!QR-63fFfz-`rU|{HEU|?9p
zz`!sc8n??B7#KD|+3TS3wgKu6P~3pr0df~8Y(a4YN~>p};r$aDk03XK<Ur~`X%<9-
z;t3Q6pg05h59B^jItA$gg&indKx#qiK<)<V0fj5b&meJF*n+|r6rP|o0g6X%1_nj}
zXq+)a;)dZjG`)b#2k8gp9gyEaY>-}%d7mI@101IyH-Y>G3M)`L0@(#}D=065nsK0b
z2Bkkx`T&W6+zhe<Bn}D(koo_hX%m#sKyCzKko_R@Kxq<`CQdRiFkFFRkpDq^P`rWi
z$pHoihP@073?Q|jI0dB%klI`vwI@5Y_EbZvJy#&ro@xvXxxEYwt+N>z=00Iy*r>?B
zaA-LL!=*k3hWmXC44*?782-IrV3cBIVDvi9z*wijz<6{a1LL1g1}4)O2BzjL2BvjM
z3``H#F)+*KGB8K5Ffh;bWng}?k%2{VCIgFQIs;4WF$R_j2?mx)Sqv=8(-~NH)G@GJ
z$YEf4&cndUevE-NIEI0>WDf&t-%JM9b&?FM2VOI<o_1$oU=S-!DFGF;e0hnvslg?w
zMX8`VKOZWZo|_7m1r@d^wLor4CcJL|k_2I^%$(v91_lPBl_mKgL$p#7!J0u?mcb)2
qFU6@SF*{YkK+jUoM8U|wz|zpr$k@Pufq|hVttb_}djT?$ZWsV8JmCER

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/ffmpeg/testdata/small_100.bmp b/tensorflow/contrib/ffmpeg/testdata/small_100.bmp
new file mode 100644
index 0000000000000000000000000000000000000000..61f53a2a21c933037f004d6ae4319dc6065fb886
GIT binary patch
literal 537654
zcmZ?rH8bI000A=w1_liV1_lEr1_lR41_nk336L-Y15_Tufsi@N?!Zy*^1E=9x8fcI
z6|8!Yx9T1Q<*&Yvj0)B~z(j>>AHq@5x<_zSy#6s9m2P|rM`fFy!O)AcO$b!J`6X6V
zvE>y6Rc?I^MlUP3zO35z3O}mZ@dk$8)$Bl^+MVxlp}Jik!0285?)Qyg)U@XV8p0<2
zsc8=gHShh56}9gF3PEiLzQIxZ!SBeZ<IoQXYCrt5{qWD$BR|`Y{%Sk&r{mZ^2<kfV
zzxVY2-m@UodFp@XssC+Y)O_-P%PA;oJM+Ko%>S0NAOvA|ocZ5z2CA~{)c@9#2sT8j
z^X&hwbN{=~|L+8&_6z^pFZ}N!5%pa7-*e%A|9LPv`@iqZ|K5`b)N|s0&x!wi$N%>p
zLq~nbLE><-|HS|P6aV`l2qJate;+o~bNoLVLQ~d%{67?d&4idd@g!7m;>rII)PL$f
z2%Y@j4?_@fI0-QXWH`iCC;s;y2O+2<!Ttgn4!7<&$O{Nxz;q$hpZwp4iYA==Kk*dQ
zJrhs<pL`n31&d5N^?$<Y{}a!^(7FGU&i|iq{{NKA|0iApp}uSX;b`jB|5LC2p9n^i
zF8}Wb$4Kv4kOzBDfP$;{#D6q|?2`UdFnhraP_RG(2S$RD-|7GTXE6|1!Gx1gkAp%3
zDa`v%{GV_V9bpv&1p?UW$!GpU(b@kKPeb*h84FJ{n2D<Q_+MnybL<ZUbsqkShLAHL
zgbmN05C$aM;v&&9JERbRkd3=PHtzn2%!Y_G?*7=Y>jNCs?|ctObvxeSMB6{qZU0cW
z^&K45Zg~qw)mvXzZ+%_0<rM@$*l2=PTV7OdehxuZo1el_<)$ZaRK4j51XXT$RKEUk
z`TEDDFjTVc5d;;leO$cuapCHR5LB@00XB56U=<3=U49#aa+cl7U3xQj>CK!K_cE8>
z&RB9Ieerd8Wt6k*4!knTU49o*8Rf0Gho?S*)J<qr61*~kFbLL1r5m4Ot&hq!y}(}|
z!D=N?eN?#>K|(4ftd$hFN<vgdH9Owns*GxPz6V!EAL@3|qdtNpct|d2KlmL|8Fd``
z0k4lhRT5q5qt3HbsE@kO|L;2osw;a>|L;bsjCxOkDkJ*WM}5bjRS$AG2B~ErRSc93
zE=%E+Q9rDng2=+_uc1;OO}PTCk0xLFKj9*%c|gVb2un=_shhxIh*=-?!K!AgwGU<`
zg|$AKa0*|21gVUAj{bu%pycttJ;(k)s-&J{f4YwRLPn4(36b4Un}v{E2`6#YL=bU$
z)<-o+l@X*?8aee5sA9s=Kf+oc6|8vxsfi#Y*7^wEQ7T^l7^$O#=o}$;l(1GzkjkiH
z%PZuX2*M_*KEhKaVb({G-VlU@bd-?U5D`c#sA<n9wDbw*LQ-$*{;%+K4q?DW;I&fw
z!S8K{ezYC>(Q^1l>ye+Whkv&n`2#_n$3f+M_v!!Lr$L<~c>f6AIf5`C{iCMSupSe*
z4+ZH-wVnj^kI=eEooD|;x=0=8KwTvA`bXX8K{Z<6x&J+9K%F>HR}NeyL3&0#C;oTC
zJ4Z+V_aY%sIgi}^f%J)xNn}|hHd^PX=Q#Fm6v%ue)B0h`AXQTTaZrB&)DeJnj?n5O
zxH6DAu$~UF{UeCI5M9V@hzL>_2}}P7saotm`5#m!p8}I-LH(mi7qRz`CSCnM@yh@H
zs~|Mt%KzSr|9j8>?>PtRTana1g1841EZ~X;&VclfAS9%2g0jKNkn1CO=LOOjAgN;N
zKLP5d!TU!OPC>g}a5LcyXzg?Cf6vi>C<xpkg4aQvhkruYkeUcWqSZ%`{D@4#^DI^d
zJZm@a{S43ka0a|9gp~pB95w9v0Ph^t?|hH5bF|}K%~lYCR6`IFUK!PHd5hK+!qGo^
zRk`^kw12eeSrrUbZhVT4Ha#h)M*k>p#T__;bdGYD-_BWfD-Vpoougani?63Gx`y08
z!qr8}S#byJcpaoZ%3pmSt&0TV(zHG*-~19*CB1-DLvRvaAK~aCq4bX+V|S2V5@y{5
zu9Du>?Rp2PcaTX)?Sm`=VZ+lQt||#@l15JaZ3n)=t0X-2QP+w89Vh;Go%-K#5`-W%
z3#1x?lf=|V5RGl8Ktp*QXQ1^FCF6CF`l#m&sIG+ckKi>Cgh5h$1TSYmCC@QL{ROHN
zU{w+o>m$$*)^X@~UGH(|xEZ`Af-pc<L+TOY$Lo;mAY?Yg98&5dNcRm?D-o)XKvmN9
z{}Zo+dP)74|M!Edr2bQ&YPSbm`GP!)G-`)+ylw(`cnw(!!tOcsAA%s&5S)ZmSXk>L
zPz)mVlJInt&}uG-8$gv2co-034m@GN8SrYT>+l~0!qy=|tA;=W5lB5Fw9JS#!?y1K
zid+q0)<np)52Vt8i$J(&6%cY|gjpZeY<mN#p=!3hL936DD<iD+5u}EKR7{XTys9l&
z>Lc`;2vRp;tB*?8KQ3AKsA$cDqBW0-);xj?))lUPh;6X$0c5ZaUK!=BxPx3DLB{Lg
z^-;=#D`@o*gbN?I19gs8+=JIgkXk8k)jfDkgvNl>L=Y0KJ}O%G2)SDXXT$qPr5m3k
zBci)VFDkZx5WGHuFe<j7)Kid33QWGK+6F?9nh1@AR7|MkTS$G>fLbe|aUqpa<L>v!
z^%0znmUNr<env~=XjuTQK5Byu+#UMe4n|!^|3T3SP%o+T#Q$#a6jCQ_N(fpXfoG1;
z=8xdrqK=dQ+fRThGDvj>(F>VH>N@wIy#5iSCIU@Ao&Dc^28>Sr?*dO4L24pM=cxPm
z|L$Xi5u}y@DL($c`veHVOP*d>O$4eMj)7)FAd2B6L>vnV>zaYeG$cbH6%Dj}22Tk=
zIzAIm{GWK@fB!Kg1nE^l>JeCd1W^nkkAbJakW9wn28b-IR_Z+tn<E38IpGAj5(L{j
z@%aDANS!0lfFZc|(GQ+TngFhkCS3YI=?Zk(sOQ>$G}L?Jf8Wjjeb@hYU->`b5~z#R
zcao_3s1IBXK`R2J>Jm~J^_>J&M%`et=M<<H+>11q1ge|B)f~K9flL>I!W`mjq*@6g
z2vLbfBKM>~QYZiSo%jba3OOb8BK3(7)eyW&0@p{73J94*>k~n899lMnaM7|VW?qJf
zz%x690k3`FlSB{^cx8mOK7v#_Xe6i(+V%#Stls*%Zrhu>ZEtF~zD7fk+6P|mRBd^M
zRue&_(E3NP%IJCJ#^;dPB4iRabM&-i-4n=oSMl11g{$vF5RPi-0i^CISaAmpLHb0H
ziY9yM?W`rYvX<P)Tzoxq@%0Rh{t-kOaz`l-Tp#7ExPx3Pp;be0E~F-ckdXQaPC`5a
zA>ow~G6T6XLSy5pn;?}DTJ2N0^(C4JxF&+Ec>q;LRomX6)kJVEa%BYXAJy-C+py~$
z8iLnB$P7qoL?a<77MVm#<dEhTL=ch-AY|*o@2#*=yS9VBAoUT11XW2VK&3r$eFW*|
zKvpk6Y9a^;5rM2>fK*A4Y6zKxbd(@nr1rC*DhV=P*Lfb_k_Jdms^i@McJNpqWW=ux
zRzG%uU5KcLj)Mm4x{m$tI*J8hucW|Q(CQ;lAqFYc!6dvIf-`W{0?6f9AFQ_pSBzO7
zq0~f>T4`|9N1$%Vng0{dfqFd?FGK62?yK1Aqy8KJd$0YUe3hE@5vbBR4XueFY{;k`
zgoM<3pi1Nf`l=O36o6t7TsJ|~U?nl@BhWMuY#9u^Hw0(EE2GXszu^dLy#uLz;3Ook
z!AVFqgp-hLi)*+8t!D(ObdX8p3J8sj+$Tb-gCJbwItb2&)HrC>5S$CCZXi7%NM!_3
z4-o-ZLoZ?E^9t}tUHSUQ$n{a>#;3@Y5z<Nq?DY|(-hq>lwFi(&3bgzHTp#5xzYRf<
z$|!Hyt?Z?@VU^LX%*EGJ=U+~lcPVMk1$YI7#z5{OK`J9yee@s?wBqhT{_2OQ2(<JL
zQWHT)cufRhKx!fgiChiA*=ThTGM5(h5oRw5UP&QWNwquQ*6etLwLXGZPssHVJSh@c
zB|%a<Jn=&qh^zswkKnZuWO)O8#a-v||DDJGw;%uCdGdefX%K2V{lDWZXjvU(qz+OU
zwVeImdJcpj{U%7Q1gVLTyGW4w2vVa#>LW-m2~r<*T>Rg05lh7csgFQisx#1;EXaDl
zu9N@a2&F#4Q5kg|#nEGe)JI4psB;9V6u^~H-?9Ham<Y#;J4j&&>d3%4N>CB-G*<U9
zP~QO}+j9)GaE^rf2vSZSMVh)g`o9ki!5j>)0U@n@=yE%7`8?q`s7E&O_<smO^p7At
zCNK%Aj86UU2iHfv7ytKPhE_ve*Z!lSo}2$c==%TeYyW$%{O>pqszE^g8jSEk8^MG3
zgL+RORscc<=pZCy?h=`VtoDT0N1$0eu#cc23<)135>h2WV-u_rA_5_?=mM{l!>oNE
z)excrI{2#t86En8wcddU!ZR9#0m*%E5|TF|H4dDO%s|fL@Qe>(AXhz*dIur`VWW|d
z+6O|y>m6i9_14!AE~Ij)hINZ-w!Es@@~U?0YY3wTwyX|bE5X&ndr7d$2sD3GvFTaG
zrf1;p&(qTNPs=wxFW(62B2^-h<?El8uYX#$?g<1Hu6<m%_Hn_Q2l=b-Lx;;?E3xxd
z+=Gn8<uAWou;LD+0xDd7yKwpKf@QZLC?C9_4m3=+<XYa+>-meX<uAULweWJ<-17<3
zkH<_tj8+Z7xdiK@yj5sb5~w~xn@fV!L=X~E6G2FL9fZt4u7J?k#MDP<9VN8>5o8b#
zJfj5dA3^FPG!ouRLSnqB-TAg21HmgJGzKI!qLIky7HdNbo&_KbNbYDm@C{NE!AZz;
z5~MzAJ@ON>_PgcCzxLz*+fRZJq&{jn1*($Zl@WpgUJ8!fKO$Hk!K);!^%0~p0;xF*
zo_+$)WI<}Wu9N>ePk>Oz@&8@NK;0t9$Q#z45mrG+cL~&)I)*Tz3p6+bZgPWb5Xf?f
z?xX*^k3wrDC<C>;1ywW<^^o!qG9m|ILr8Ev0_k22czrbC`2PvVp!E@`Rss*%A=iw(
zCqT=#k?W)GtN**N{_nW*zw;^xL29M$>;JoN{O<x+MridBWJv?mzu>VoXx|XVMy`+G
zRT88of{>Us5n6o&awNEO1g}A`R575~L|G1pzB&#ZJ5UqAD-(K-{)1FN5E5PubsqW+
zuZG$W{6rw!6GQM^24O(59-Kt3cd%AT@Jx-I*&&q<ghZ=!Fa;qM5QIdoZXj$piCm|^
z*^oM@dh<(oO$1>;YNCowFCYv^7YWiif{0XZep#{U1-MoM^^ef%BajrVGJ;e?rE4EU
zYNC=gk4ix`(Iap-1XLM8dnn-g2v%Q!)^g`9yOqEEHl!;AuZ9Yi-O5{X1BAe97(kWL
zvKxg<K`4LOjr4gJ;-((;YuyU3fY2Ba^Rt)V&0c;N)<3!j=^3GsVCOxA57|K&kS-E3
z38|GJB&1e?lgPCa@`^k7)Dmn#IDDrPh*7@zIUK<&BU}u4eFWaJ1lqWSRue(ENYxO0
z*#m9sqsG0T;i!54R|sl3@C}0CH4!qS{qWC@!+$ys|7kn&ujAPNHV8WLzXP@=0a5`q
zpZni@?tjx+5Q2z6`bRA%{<oX}O&+x!2a)h92~tl%dP(hPLE~?bUQ*k6&@QCb3;)|L
zLJ_2H>O2n`(}RrEb)NxEMZr5nkje;B6LlT?-+2Vf=p3jHI`Y2@jxa{>p!E?%99$)J
z9sdtOpy4}6j|nmlg;XD5_LaJif^3192B{OFBt$Qa1QpJx{VYhu1R}wuB8YMP|HNbe
zCmj6`sxpp)NJupS;vNH4A`_5Ec;~412(+$5b{kw4!stDM)UkrEPKPb10BuA%37ty<
ztpfzFxad9ef6~SOJ(vIYUH#vMiaM|S@4NyM?6~&7;~J=Q)N}oR-xW}N^+D#mVD%1K
zHH6HCjG;k9AaxKj*>wuEUZwLiDAhsw=G|xicY`;?^_~TFsUY=H-*JouZ?N@mkm?Ci
z*TC6u21FcE`#{)`3J9_~4o(uB5`tDt;93b%@1T+J3JAi0WJb)&2wwYOGKj2?(5fd$
zH3T8yRSy~iUi%<3Ak_+T6$2N62!iVx=%5^23R$pX(+dbHN1d`S-S85Eiq}0aUjMXc
z-IJ2F&x+SPEn52wgw{O)_lzDDz}7H82IU|t79f*E`QT~@(ldh8M-Z}L*{%GgH;a~m
zQ1S9xB`a<hFTYi^>}K)O8(DMC7tKDA)V(91WUf_QJ6bga=hCx2!nM=^UQfX*CI|z$
zdV*9*xT+zzI9h!KseLfX0j!T2_I!fmg~q*~A+=KT{;!Z)sRh<ig7=Tw5B-MJM{P&`
zLF%Jca2E-(AOSK!2d|8pU<(_XPyTNPFY`vLkJ`c2Q_BgY72sz;^-%|SVFSEAf>%lK
zc_oN~cJS(j4x|A(<em|zg9E8kz$8lD1h0&6Rajsxpqh)~`UqsmQBXe%<Q}kT-LR2E
zXjO$;89~PDAaxL=kc6<2>mx{I)PD?AF(KDP5H_Tu={x$LjQVKe$^Vm1fzagBpe?bJ
zPW_((Mv$FQkX7G(Xa7&W1R90w1y?=DRT88=>bwG~j5@FX?*XH}E1)&}`0FF&Q9E4q
z5oETg8@wGGQU~>%1Fa;7kdS?6@cAR;`l##hA4tuDNn%PtYaj5kgw8|15nS;41W0{E
zxH3BU9bO;7t073egGoXLE+BajxjuqaK=2K%5H2zaua6)-AP9+E`#>rkxCn%cOd?l9
z$W;%V4e92<$K2qx3?>6o@4%~@icK#-LvY|e5kwFo0wK#cg6f^(^-qh}KLrg*ZFmW)
ziPk-b)IJ4k9u=*AQndO>;hLu)1nwLatRYYz<%6e+kh@5E;K90*RrktQ->X=2zhuSj
z%()jb=A4i2KjdAz!8&(=MPi?lcZH~B5|>^$yaIwVvX|b5uyetkBUl&dUJhts1C+$p
zMS_mk;p!g|>?^@*A4~?M8iJFQ*GE`;NtIh)L#B`*B&==%9qd5-cwNJ;5AgH}XCS9u
zI2)4YA?Y8D#H^1X6%*RphStM>(dr{eAE)UwXtoH_KSHjRh^dd7!6!Vlf$JkkFRAVP
z|F-l0TQ5LS8+alKbZ)~r&|wacnWOGgpxGiwHPn3q+KcHr{=XAa1;DEnI0LyR!o`Lw
zh8S`TVJxVZge-dmG+qZ8wS(3>NV1UrQOD8$9dL7S)JG5#kb5ybN5Nx*kp2;v>^}yY
z6@rWt!b!w@5M+i3Y1*jg7|3mCX7(T{099Cr|MwjM84It%AS>&pp8XF&Q_lRK21bxt
zX~L=hlg@xnW`Ng6klqk-mDF<;w1%PU#{bUip!Icq*Fn=vUFV?D1nOXdmn<R}CqZj9
zyOBsp&j=y{?-@ZVBM=Gh3U!?Q-+AtTI~aAG2hC6Qp9k%OhSx`Ei{CJ7B1jJiQQaK=
zgHro|xyXZZ$aPTX;h*s33dn1=;Z+ZavHwTg{vRO5fp3^~5JUu$6OkK)4SPP-@BY}Z
z=M$XKwC58%JEJim6%d4kS2~z$wvj6!WHvFC5v1OM*GH9`UqVD69U=$`8fpXWDgq71
zm2Z3w8nWB;q6*d(LavM;wGU*rsCeCTP+bG=1Qmm8A_%u&^%DrnTlol5+C%1IO4d9m
zUHh<j^?f)hT6M1ob^=50idzM%@0P8-U$*vs)fy10U30f)^_`j(*NW$#&6#>Awrz7z
z*#f7u31*@7%1$|oR`Fu`A%aS-Y~of-JUR?4Dv)&!a1v4zK}h8K2(wm#I1)}msv%?&
zxdK9CLq_c&B(70A<XQ<{6G0fn)JNr;pTj#xsB=l6!8&+-gvP+^F;#7QRkPzw9SmVs
zOb`*IStP^(Adn=ASwq3Aq$Y5JN6rY4iV07B1R1!4thGa~k6MoXZ#@ngh-*FhpDOhc
zWJz8-cr8TR`Ts3QRZ{x}Pz?oHN(bqYK?dj`^%1DDfDNocJ0nQ71GK)tRR<x9BkKa!
zAaIjAVO<-zX^^S}RP`Y10<%GaM?jSkk@XSWC=lbwf3zxzjQR*t89^qtK%FemqzQWM
zGwH<tNhiT88Ne$RCZ7a#h^C(XKjQ)v&ARY^#yM!MH0jL$$!9@>byF^b#_M`vqjTNY
zKx1@}TB-LMs6Ohv{=e(S{|Pt$_ul~3N1($uPJt%%dQSd_Ab5S$1>QRVseK@9lIkN!
zWz+#aBnmQK2kL4a``>jKG2Me!(ID4BXl#@U2x+k#NF22eqGEm2xc4)p0)mtEyFWru
z1Na0mv}y>xkQ<)OaaBghH4daELKdmn_6E5h1n&yLYae*k17SdVK$s-F#(^`cQ0pVm
z=-hh98o4K+vAA_lO4ozN;tIi)5vWfD9+xXz^E7|eV+hJy`6y@kgPaxjz>_kdeWsB5
zsASCpWCW_A);=s*_o#Hkqw-BpO4r=WS$s8p=9%QFC!)J{dsnS?&Yx=>->c+ZA!MDw
zZyd|49>^f(z`$?Gz^TW;q|U&k%D|w&z#z%MAPKLDAPh)N1R+VPk05I;kn1A|8?7dS
za3M7jgoM{e5C(EJ1ZP94Av6+RD?x_r;3T9Pf^Z>hI0+uLqe*=Psg=OX@W`o;;8_4t
zA7LG`gOA!ls-)I~-<uDD4sGZ-_8&fr)OrjwCDd{XG};DPQ`dS9bbejKng0!E{x_Zk
zq2?3+o4_?!Gi?3{dAttNhk~q#fb^1D&w*+mXnl14f9v`G?dSiufjdW?;AIT0C;xXG
z2cgbm|2x6mfvzJU280W$JC1=mN+=_B;F<z4d35A|2POjdXgZI8rk|iB*nBK~E=d3A
z$p3C|_0e+_REdDdBmaBAf?bC}Q%E4|AohYu%<(!<1q7aog47+LdIYn7bQIK=={*7(
zg#%6cp!Rg&9h|<S&@D?KXTaP88TW(i1MNEnI@52$+5Z#Hf|hMhI{$ylh5wVzfzXr-
z|EFGrqA3^tPrLko=C%I|?tsv|+y7_Z03A#>_3Ho0*Z)rfA6VCa9aQ!7fd}P!u7egZ
zbYA=4c@4zqxbeROTpz*vM`$yBkR}9(JPBH*38{}DBxphnatsK#(**A#K{g!qodX>S
zh1oyqIR{$j*?khU^0WKc{|==32(8jV=7K63w0Z}L3ty-JV!)1eM6QV-Y{-&1(5WU!
zCqsfdN8tVuq$`A#g!GT_)JOH;1L)A|BX~6gXAn~#q18mll@Xo_2vQS4`a$rT2vYAL
ztxo{;j39!g>z_jUN0s1?5`1*76kHz_uXzfY6#{jRHasm@|0I9i<NS4x3)Vl$UGosq
z#ekIFg{$uut$9$o?m^l5htT8f*50dJd#`r=gZhn+8rDCkTYIl=^_}`Pck9>QE?s;v
zcjnQwN&BK2R=MR&vyN`k^DEVGOp`K>5Yq7Al(%P=FkukXW#Cd`V3A>9mSkWMWnkcE
zVBlk5;AUXpVqoB4U|@w*LvRvO6G2F-)JKqB9;6zAlaQJSPC{xTG!kAJp)rtaC9G8v
zr1lx2^%10E!c{B5Ya$2(QXhea>|nD<El2;i9S2PkLDqXis-%{)pvtJ>%>SlSAk=)~
ze=~SV1EfBJuWEqIA3^FP2nnf`poW0ENG<1}^-=qI&`A!ErQ@w9|F<9e-v+KeKz)!S
z|2q!-Z#xX_l7I%vAa%`QXw3rY)PSldq}~w0`l#bDtd4^WyMgNz=qhn=C4yY?L)p0N
zBlw&VgaIjgK@||{EE1#!K_>f;{s$rOs2#laflnks81TxdA3Ved(GKZnK@Px~a`yl9
z%OEuK8mNAmedGW1E1;UF_vHWn(@1L(kNxjE1*)y4UH(7o+W+~t|1Z4vf5F}Vvv2>O
zb?g81+n_3G^6meVZ~vco3)Be$ZG8nV@a_WFN+9wUs6Ohy0UEOFzYIFZspl+cSw4K_
zN#_aB2p*_=OG<sz4<4-R1rOGBo&ycj!TU$>`Uup?!ciZgb(HW_MxY7^dGYqaUy!N?
zd2|k~8iJp@1sS7j+yA3=-*-IK5Tr6{#@#uBbdfN7L-1+{!oXD-)$MqPRtG_PLufS-
zL=ax@z!}K34>B84??5Ue$jBR<EL{&;ybW5v4PK`W;g+m>QoQzY3Aj&Gvi2EhVrcEN
zg4Iv*);<E&L2DnutE8fJPm0%r&J?ZK@}g?Xi;7Lp%GN*3UvVpI;pNN)mony@O`d)t
zb^3|487Fh+oK2s0Jg$3BXwznovL(ie{p!K>G9D%TrtysGfeg}43__L+JcbM$x(uwU
z3`|N4jI!WPkT3&-Ft{dyR7S}45qs{^+h{0n*_}M_q2G`t4R8{?(C%LL^4qZGcF;qF
zAUz{Y5>ms`zdnMD)WOeUg&iw|b}Bfumjpfm56Xs&-9d)zAahAzHp+M%eBiDQyeSFZ
zErK&3i4%=PYgs|KkWo8G1%yUo)<=*LJoGNofuHS%ezzV5A>>|C^KsD11n6oy@Y?U@
zv;UjV{%<%1LQSVYy`-k&;9k=4|IOeoRXezbY6UNNfK)~h5>h3BYPU20Th9D%Jqzv`
zo&Dc=2GpDeZG8gof$BU8?%+VGq(jj9sO>PUR)W+=U=rHX0rz@9RTV_zk^dcskq}rV
zq=rIP-+2_=&w`L(6S^QhBSQ62H>~1=)IMk=s4_yWhCquMkg5_$g@Q~%`c@!ikfr0`
z-AvF00LP$bto0rTEvo|^j&$;W-)T@kXv#%Uy)*sl|C!hS&%FG9>UmHN+qmz4*~b4_
zOaCX%`X4j(f8>P!5&i!oC;m^K{XcKz|H>`@8}|KgJ_uUOHtEv;c{l$ry8C~@!~csO
z|DXHl|IB;;r-ISsyZ<NN{y+H+2=(9j-+$+S*G(|G0UEFCxe4kkbzcOnXMolhr$A?V
zz-y&0@MeK_a6bstwE@>nkiE<uCqNP1c@lIu6=<6qY^Pi2Y0w&m&U63K>Z9(npcQxB
z;Bz^^{UgvqP{?x7j>CT-Te*=*%*qH$9fVvZA?+uE*F+Eoqyi#XAHgdi2m_K0;Unje
zoCv9d;A9=rDcs0e8@Ucbn=nGIgCKPc8VRX!;DQh?G6}7HHodOg^tx>0tI`cGOE$eK
z-3(=yZ+Ka;@m2YTm*pE?macnVvi4c&y65E^UKX!^Tm(LUyA*sVciB47>D*=O9u=>B
zShVIr@#^~}YaW!Xc~rjkamBhv<!c}2t$mQc?qTt|2c;VxRc(G;v-N4s=Es#A9#pTs
zS+L+-*0e*hom&H|mpW!oHH~gH3~$sAuF(moGz_dT46LvWt2GI%(DlfbF^>__@ney5
zU=THB5HMii)L>vyW?+<LU=RmaJ%S7jJm4M>7no#YV1OW21_l-+#LmEguRcOPr2*1c
z%2`3b`UuyeJLLWm(%Imk72uHW5~ODYua6)Mv|0(73#o?SB)ATGgIphhrjWM1s@e7m
zxiZ4Vh9^<vbc@V}R7P+TlK#<1JoOQLVFRd(bl_*}0nm^gxJm-8ZD=|2ujR<U#-sn6
z!M!9%mDF|yR2iYwN6=Bb<NsS=qjs46Bly||<oc);Tp6{23n)_SBT$6`u16rX%mCI$
zkUkouVnVBrKvO*6`UO$}L28+P@Kzd#NH1)OI8-Hg^)*Bp<n&PpIr;4WY2cB!X_x-b
zxC$C`n|=jUE43Z@U%UH%@!J2X^FM}6eB;^n(6Q!$Rq0LrjC;CicZ@P$m}I}QD0t~!
z|081Zznta&E4KZw+4aBa5U5_5a{m9!EC1)*{lD<h{{>I~&wuiN_LKj!9{-<u|NrE>
z(7I{jz5f&M{qMOALcO>D_ud9=iR}Wf_wKp?I-;TH-2c9_p!%pAsaAsQUxSR)LFyf}
z`Un))Cqd^W!dG{Ls<w0gyTHpB;Qb@~^$}*3)N$|^T9t&qK57TA+(srL)exM7)JKR8
z(Z27<{Ub;ngiNB<NANlb&Hx|Mf*6E{XK+jgWNrvT5?LQXhTtG1v;qRJNr1B}HomIZ
z_zGShfvP0%s%%I#RJ!gt1eL6P2C0&Y);%g*`>+@?n*&~WP`dVE3Fxf32SwmXpR#q2
zsy9Bb-Tbm{%ZuubPs_HxDBkq6VD;Uc<u}vjpO2k(EVO5jSK~(O?3rp|Et0N<TxLlO
zn!yZ;-V72>48pbyBDM@-_6%aS3=&{c*or~O9Ml71QDb0IVPKGBV31~DkO3o61_luZ
z20;b}0dO@0sf-}wavb2%ISvK}P6kGpIF}@!lqOo|2*QQ*kI?EPNJlAq`EBGb5~Qz$
z**SuUfI3HO?-#5EO@!eZtSemm5N)Im&P5xkgK&whk2bw7M?w{w-#`)i^+|}%QPnn3
zHH1_Xq3oQ6Ew_X2OM;|22nm-$J=g(svl1jRqml5W4qC&r`%41`YTWY`iuQkRL`6*p
zzd=y*A<$7mpjrvsKZ2b84XKY>j{R>t0UEV~^lsY0og?J>sOc1F$gb%)h=h#YffnYS
z_}>a!@(tpG`$`}aPXBL%q4qN%Hl);ooVoz%mvtZc-*FJsE$RSwj@l3YLt}vEZVvwk
z&Ep*U-*xDJ`w;|!^n+0PN07eKk^k+7p~~7IjHCbC!3bpOQIHf=?-6jn30%`bN9rI`
zHsJBPo}-|7CCI7<NCgDne*`*105Sn|1T-rI8i+gkzyAmbLDtSe&b31(p?5if_wT^R
z_j-^2pLF*BjLZM0U;01g{Qp^3|1Z1)>dVZ$0upaO^uJ=u|AICD6KDJn>iq9g{?97+
zi*53M!|)%9UVo)szDwBu7q<E<YWZKx>c5!9e<_>)a_;}NL;ky0{EzMXpR@db$@>2_
z+x|E00S$_Fodg|5*LVH@<a__8Kl(ra`Ts?){;zobf5~%D4K?@C|LG6^PkR88opK*k
zG4<d1-*@YO&kgW4rHlXjE`ShZ6&z$wYu71IEz@%feSIAymLWYINZkaPLW1l(g6w|n
zIR~oXy3T`YrLJ?JA>59$|65Ois!mYH>iB=iarA9R5C@zRtbMu;{(_^9gTLAj{Om+R
z(DmEMJ6^%;=6&BGsAd0;mi@n)_x*%$oA-PL)kol3sd>*=1OlJMjoc1I&Xve)<jf1n
z;PpE`!coJHcMUt<K{`heHl$2|lkkeEdh5&Tt<ZIGHCtXlP&MctBIsTrNF4+jhy#((
zn&?g0#y6!KUPH*z4X=yWy@aEpwJ(a-y@ZgEy+V*Xi9i+5s(VGN?-s4TTfFW;@wx{E
zYws7Xdr-dVN%^KH^_!nmthrmT;9Tm|!<iG0hSzWQEnMu7GEG0AUd_Es&LWvxHGo^e
znL)^ufmf4(U73MZ0fZPOL0ukZX$BCI1Wf=jii78XM8NfqFav`ScodEgJS7An`N5SD
zgv|%;CGmhoL>L%l7+B;PIMf)VEjiQ!MT}!*oy+w?Iv^DgCJC8B!mN@Y^%0R(5@^V7
z%{{dG2vRY@Cy(F^cr^rPKq??K60JVM*+oLCjLJ8?hE_(K-@toD@D33s1JXr8BjJ?}
zgaJ=)pz&eY1%Hs_icBIWW;h!;wPRLB4ZFWUE2BMM>-T~Xa+L(BjG7L9gV#qbhkmsj
z`UUPH{cb+|xB2kj#>0PGj{R?hb&)XZqsEianyBgc|HfmW&J?I~1g?)DyJg|^5vYrF
z@_!2qLFS&iPJm{sA#3M4j)Ce3P-O(JkJ=CZZ$J1C4MFOrj)RE$2r`rhsg>FefyeqF
zbr2E>s*E7j&=JtMA4oB{GJ;e|ATIK--cjh}6t(Ij<T?n#o^ay-gcG1e2%stnd_n@G
z2Q>ZS{~4D+-IiHb|IfY#VoW*<8hfnR{6A~yr|4;K{5tR3RoyhqJ)@I+Q$FmBi2FGK
z=TqXg_rz`Q@tMEkGk?Wn{Ef%>8=ujCe&hdw#{VU({!2Lim-qN@koe!F^nduI|7i<B
z{i%vg|LgbtZ$9+D`3z`<N!Qi?6Yu<=`SAa|XaASJ_`m$s|0OR$Xzug>v!DK-`Q-nM
z$NwkY|3Bd_s6Ohw`M)20WYC05|0i7fKM{5v5~MN$RWY!`Q6ZHMa%I$c9CS<~Xg&x$
zpw|iRB|$VIkJ>@jTXvp<)<>XD7I-WdQYFDxFA!fJb%HAzQtKmdHT1K2-%m&;f>%bZ
z`@SP8qdi|C-6D8>gq$VO*pO@s&(t+L-hr;egItQY{R5;9!c`wZCW#;<xH5VH8Ih~m
z@}he4a|nV|Ly(yu_`(Cs`lxinYY0NFjEdI2fFsZnh1E|WXKt6SeOS5iY0cK>4Le`g
z?R;Im?M2z<r$y@@=B~IEH}kZA^A@wL*-AkTLQVyOmbuK@@eE2K4AQ;~{B{i7Rt&5L
z;OktK7#I}6ogHcLq>lu+>JbH#BH*e=2uuosdq6^9QUF{N@xy8*9`Hg1UIqpT5@2AJ
zWaUxd5z~`Vvy-t1)^<-a3oi6ZZw)S*liai>p>0=u>rU|Ts<+@I+QNpcWp|Jfc-1$k
ze?(G!l)nag9yp{Xf|Kx?2$=z|jF1_~RT5@>1QCG@)*+G5+ms;Vb#M}1AHipi;0)x7
z39T~1<U*T0;F_pv=ew$%(0&uzr4^9GjEk(_{jqlU=i1$$>-K!XM3Bk|PC{#?17DG=
zq~=54TMmORNP<^Mt%pIi5@?g`anP9w%_l);2Q{4m%^cO80xfK4I`h9CywI-kBxnk$
z@ffH|f=nmD$;MOv8&Caj0(Y_?f-T2EBYVxqK=Vx<;JyiD$O}4ibmV{Aq5rK=^sn_0
z0<|3c+j0<8p|l+WscAb1Dy!R${BJt~s>nc<3uG}iQcVQuD?#g{!=PRgRPiy;!ijc>
zdaxVdYC6E3BhVTN$T|nugb}D;1{Q&i5W?0}bb=?IdX9m*MSbwSKSw}o6($}79l`*q
zgL;obFY<$IL<Q|jJ@<e5#sAZ;fY9{w|7TwOKj-}asVDw-@BUxE{(s@T|H%{n`&RvP
z%==^-`%N?Om7MEmal1EsmOnX+KXV%XW7q$~sr{cz=RXe^@f-XXGygAS1VSQ4|Ah_z
zOBnr^G5arJ@n6Q~zk<tu{ow!h+5dy;{>M)LpRwqF{>K01JO7s+{a<<Vf7SW_b(jCQ
z-}v8i_y6Sk|7Se;zv$)v#jpM^dGmkqYfz_Y-n0L+o`U*N)9(JCa{K?p8z9to^?&!J
z|Gk&~_gn;ZYCs1}odV7HU{*trK2hiK{~gEv!K)-tQv^J_1gWsPPJwnCL3YJLR{lf!
zN9`v;J#^49R>%Lh9sAdE1UicZuZ-{y%0Xn&x<Zg%5}IK9fuE2SbS?XTfNLUXF9}i^
zHSYO}DcHQ{YZH7#0=&-%xdN))^AQcz?fL+zj35oss%>vD>m!u=Ai!sF!zYH|j4JT*
z07!iVxqApkLi<OcItXdI5Ty1gUH_(d?W^()ugf>QhRhQcu76&z?ipm4Q2yGdd22xX
zRg2ewj-G><P`UYK!<Oguo1fLLe^9pKX3>IkDU%MwHm~)_n_(Hzr0kR<tn1GtW5*zB
z$iT17z^n{HpapNz42)vno{j*x3j^uY@PIotoS=a<&?-1?Fqaox(Qw1Y+#qV;x_H5L
zlQ5_jV^w6}(P9ubWRS7t((@O!OHuPLHIDCgDVP&jyE49ed&;B(nbVJEOh1(|{Z!iY
zGnuo_!)qc4gZTOgvr57`b_c4AaMVY5DkkL02yG$>Qv2W`VO0|0`UrpBgjok6i$Lll
zh)BiuHx=7KgL{w^i(Dn)s)-=t^}9b}Rzt`l@X83n098o`zBa<Dq~=54AqZ3@fqO}i
zzEabnKP^Z8wI2K5b{yJE!c`wZW|80ncW`dQ$^Q)}vD8ORNB=iLDgto%-vwS;02wsH
zS|5QcqrcF#c98B2xZWY7J_3!=fmfA7XPu7z2QeUKf=Nh?10@gt?}Q_;Qz7DzIuzt=
z*cm>c$_QK^ftJRB*Un8k{(sVOP*(`l8-mqG6Hb86Ntg-l3Qax>x^1EL@c)`E|MQps
zOq%*Ju;qbE$vyM*i`o(AR05Anxt|tyIU#I)MbPpRugN1`lSdrJpLxu`^O*eSHT^GO
z1nL9{8~+zE`7dhrU)<uqxcPrEGf-v3r}JM>`@fjMe_>;ghy=Jw(hmP`oB7|n^?%fq
z|0&BrRZ`(WP<>Q;>3{tdP<_;K^Z$f9|EE0oKl{=Dc~Ac@cmb-67QF%u<;{KZf6jA|
z_>BAir{4KL>E{0l*FmWN3aCEnz5qIX7j(=N>@p)rEdyH80PfmA4(#nX_OIjEzqTWP
zAl&xjptTLq+6r7Zb({jN@rK;<*l`wWX4?tSNx$u=^%10(1h17Kht6SEF_2+8G;w%+
z1gU_~NVNK>dEa+Pl|*EHRJZE`q*j7gMm4)XKoDAegw{W*+xDgbe788HN`e#%5E5P!
z!5Gl-I&g;wdR=(sX3)k}NIil^Lh7U9wXY!cQR%vu#cQ6HtbJCv{(1gd(7qw~3Wti#
zZ>qMwtKRxLXW5;MMK>ZQo$zno=2*JIEMum2Y@du*1-of7t9B@ZtUGAS534={vns6b
zBL$x5kp}mAM8WllAh?eM8C!$T^{_E8ure^PgDW5|aFxUjo)Y2!uT0<siwl63E-*_n
zu*)*=sPV~LN*efRJH?p<<~t|UdFJ*-*3OG*U7gmyHGBHu;`yhG=A9~>dopj%se*ZD
z3Kv|?nSCyG+UfKeXCc)PoFrHuWi7jvwd@vr)DFUc)I^vhXclSpT~L*@8hUyIybi)-
zAor4RO)9}VO33vQ=8<>cDhXO2LC#`@_m7Y+@`J9pL#~yu_LY$PO~~x3ZJ=rhQW-%=
zNJpt^C;Bz$klF{6Bv>Co>Ly5K1R)#2^%1;Ef(+b&dPzw2QS;%yZQxNj(5eRTRwKv>
zLXcfZkh-bi6sVU}fAW9b2@tA>^_Ut?{0AXeebfl<7D1|^rlbE`A?V2eRxoNg44P+e
z0e5zq5B_gC2<ak0J4Y=C|H3*-NOcFKJ_0YjgY<47)fs3xH&VsX314vsu4~|ZBKV#r
z(9(#bpsp6AGlkF&uZB<=UEm%QqLMlc+M3h@o;K<`0z16)1b9d4Nl>3?!YR;dgqaur
z&$<NKh&t)S|K0=to45Thn)g4k_rG`bf4jV|#*x1?{XfXqzZJE3CSvqeQ2!I3&M!Wl
zUp$)s`LzEF==>Kk_|LBo>iY;7|K~OM&j&{QhX45u{|lJ@7c>J^I$~!370myO>;30e
z`NS;qibwgEnC^cuqyM6Y|3ytf9VKap|C)aP&9nZySN-?u`5!*>fBcsJxrhE2T>f8t
z?SKCD{{=VxSKs_!b>n~YjsNX;Kx1^%pZuTm;{Uu?pzhM*SN|8i_`l%U{{@f#&%XD6
z=I#H}Zv3Bk?SDV)Mx^eupxOj<<O29|d3a?6oofSaJqF#k)p7U_=!%o0pt=cCF~Ld5
zQU}O5Uh65)3?rm})B&!NT8{jI_m42^BS@75=@7w5NKMoRnhE;R3hM#E1tH?#d7>Y<
zNJwP_?;pXrAQ7bg5u^?RWy4(`AlVaA2f@keU7%Uu+MVwqO-*P9-|?Yh+q<eAAF6hI
zfQZ-acvAyj@&qP9cNIZ46Txbt*Hxf{xIuG9u!0D5_Ga1Ur)8T#8-+kM5qP!;vU&ls
za1Jst1nCo%uX|p(;bq;%XLTDvCvsPBeOa^hdCk`6bz7f;*21m2oj&b&bj@mq#0jQ8
zRdS}u!fJtBvL0-bHlR5m9t{RIB?eYm1_lXmjRP4~gRHWK^l>=B)eSQP10xtQGcdr}
za1kc38pw(`4se~t52}C|gh7V$XfR4xi|Gc)*`*o;lv^cq`4-O&t6!7YwLQB3K+?41
zsdLVy&%2N^?^4RVO9^v8D1FxXl<8+6D0$}D<e6uaW}HcyaRy!!Au}MgQV!~19c<u^
z<oYOY^<7ZKM9M9G1nVPs#e~U#RaMa2{UG%bgoGsq_}#MDt0W?8B|P;Jau=y~=X*#}
z$4b`h`UI(T@Q{$22ukkzjJ--a_!H7OYB}@^GK*Ay_<!Tk|4qk0hc<w&EQjqvf~@++
zRUb8;MAS!(;O-T?GD545K$X!!P<_;R5VRn9kk?0@ur0ChDhaY;0d#=&@&A)f{GWRI
z|E!Du=Uo0j=L%>PZsKXs7TUUP{|i_CNt*FBsN<<)$!+7bvznn76?{&JxSZy<KPF&y
z9fXV@3K%~W)c+)8@L9;<pP0#iaR@U0FJS`e=?EMD7c_y^NBoAMS_xDonSiP!aqwWB
zsLp>@nSTs?cNo~OGKjqu)c7x~52}v@!Bvu|)qh#1|5~yCt#bd{H~#nN`tQHwf6T`J
zNk{+ZU;3YW^?%;A{}o`=bRASB_1px_8BKi%8o`_Y3^a(h^!5KGufPi)9{-<x7gQxp
zx%q$6b<mASkV|8`&;AE(DFW9=kd6;%wg|S25Lyj^kMISbn)s*f$REi18OYoaq{jqW
zzyO<<Y&!uOuj@Dg9jqg@K7v$C?U3c8;8`Th-Vm;usAc~T<k|;~-LU&h6ByO+`V2z5
zKceMINKI6|>ph%-GFS&5nXlOPu6*m;if!*ewG#MpPf!K1?G=2*0;Dnm_k%!VaggP6
zRhynwZ+>33=|$<r=imnOGqm~$zIeNI-Sdi#uORhN$=YX?n_fdMfo<CQs(usbQ0{_N
z_fqFw2%m7sr+Kq^-aM7y7EX&y2032_K?epd3kD|01du8NgDeAsJOlXL1Monc2zX|P
zA2y)_S=a_ydd<YZz{CIrkU9v;0aFlJNM!`+CGjyZ2(k*QOR3oyxhA?rmini4rqwRU
z?A%Z~bx*~-6V(e(m(4#@Hvdfa{0rIhFJ#QWm^Sx((wqxPb1uZrJRdvreCmv|kjf}!
zI;a{-oPIiS`e|JCQPz@MnM-eHF1-z_gOF#Du+&Gm#_pg8lCOdu3=TeY0`1l$5EpVr
zA99ZgPhSb60s<Wd2ssiELV^wk-1xkB6BLzfegQ|NTV4`bCBgea<=b9kMOc^NK^jYN
z5|WaUNwmZd8VUf<AK{upLawBs_0it1jr+gXgHgkQ9}Ne7G#>ofc<^V_p<hj)n&@{k
zX#VJT%aK1#NB)5jxIThxCTctdI;)}OENE`1{v@J*RCD4#Xle<(JE{KU|61@Oheq&J
zQS&iap9tJDf{wEt0j+IoJN&=p;QuBtYB~77`N03C1E7itHj4zFGXnR6;9VFH1H1qa
zx}M=MXl@A7OM;B2ffRtXv>k?Ywf?ss0wHK03TccEqQ2`eXzd(iZ3Bdah{J0iP`3y?
ze*`)G2DB6byytY{S<rb1(=Pp=ef9s`>;LCm{6FLD|A~kHw`}`gw)B6><o^M+|82A0
z=tsO(_PH-@|5Diep@9B7e!aH>TK|M}{`2Yn7dHAYW%^&j_`itJe<6eag8CpNVEA9a
z5F{mP@?XOAzmz$M6gB`=E+PgXDM6$Ef`*_TlaMiZu+H*7kKTU{#d{3gCm1;T8Q6Lm
z#g2+;-x4?YFJ%leP{ar{peJMdUp?@@dE$TT(*KUt|E;_JJ5T!Wy7GV2zW<Sz|3_W<
zpLh*~@~-~RyZXQS+W)Gn|7)-QufGmj>D_<l|I7#f=RE$u?Aib2&;Kuc@PGcj|FduX
zpLyf|jO+iWT?HKk0Xh-lJm?IfzEhwbxV<MpcTqvEet`6nx{m&92Or@J-$?|ii6B)K
zWE8aH7-*FXylz779Kq`&h<3DI5~TJ4E&M+4vkSJ>2(*y_TYZGJ8bThFgRs%6A!IIe
z1rhkDF3kQBa&CpQAvqe}{zNVikk2B6jI=<iA;_*1Xzc@Dunp=2ZGsQlfcK|tep<Te
z3G^5r@Ph5~b<e9dzG~R~0)#d`Yuo;&dCSYP<#+OCo{I0<9a6c>J!hs%#zd2dMg@md
ze*F+OB^L$}GX`!gaJ?e}9(IE)E#L!JI`9EE$iNz8*o_6&(_sZOIKTsO5H>^#v=0cj
zEeKNkaDyj|Aj=Yj7#O4&_zZ+}{B#4W{EB8L^=!+Vaj0<q>C(mL^A?`ZU2qPxZFc^}
z)VUW^=3Gdebvbd?<zxt&eLi{i`NWxL6Jfm}NF9_k;|!7Y5w<D`R3Bju*+B;GFe@fV
z&j@k`1;&+0pvnkdNg>xq1#2FlRY?#od|nBuRwA=L!a8sV7o<vkgw{*K+EIe6XMop7
zm<)K8gsVP+S4r?%2~syT9QxC6=uiD&P#3A;*#CxO|LaeHYNDpoAQDm^LHbId$_TvR
z0m=m*D1@~>YCj5EhXAQfkn5x71JL@Y=>SqS1gSodM%6%-1$Y>(9kOH&Z+!%@1yVz`
z9|o_%gBS=N!2=m}1hinU8(cX;4TO#ObsPfqZXj2IL-wxrodhkto_Y~f4NblHfBNPB
z(=Pp=couZnY4z6sS@S<f^geVhy=s_zSS5JBxZ5U9vmML^n^<%Ya2g%u(!0T9a7)nO
zy@dHU8MFTiR{v$J|I6F`m$d-ZM?wbw1@xeZPyau^0jNHbF#j)O2}0r~|9Le3vn&7S
zRQ=DT4X%=4^^v&6e`#CL+6D&EOAHL#7#Qjp7@8Qkck!!S7S;jPN4(mgDoNDrzl7y~
z1+V`GQU6Wy{#%v&H>m$_+VbCY=6|ns|NT#bs-%Rg|KqRx&$;|R_X?;!s<`~W^5XxB
z3;!F>{ck!CUchkW|BM^|=imRo<S_^>eDHtX-T$+0f~uq$SN?<0CD8cfq;sGHBKl8(
zE{uTOiw6$9e~^7cklF-7Lh2(3yW{Y`j>G?u&k1P*-+|I{<WJkNf5__@AnMWTBlvU@
zWSKXldTKrJ6JGT|&RBqSk>G-G2BZQ)CedmiI2TeA)$jTYVvtxLA<q<})kp9usbb5k
za`5gU&@wsjZ8(tn2y%%7NDy(q!&A`0;LT5qH#~xz?NhzwZQb?{4ck97?)ccg>wW#E
z=fz8ICQLl$U9(Xyeu{{F0fR~?gSZQWpdACh1p|j61EU%PgA#bZj~uuMBn}>ngU|kO
zfma|v>KaJ>0<Ur4TTtOsLh#xO(pTaD&j&%2LFywO@Bp1CBex>El!>xgfNgkXO5K9O
zsrzado~l`Pp?u-#vLzP^=AFu(cP4w@ne=%V(&nB|nR5YNA0^JXm^kBN!i+NsGtR_M
zKOGP54Z-Um$Vgqnv{P|YPGl~=2}huA(c+s~OQ4s@W`ny(kd6|Zgv=kIk?_h0je#_y
z1UeWTUMs;F@X83n0C$ugKq@9k1%yO`?j<Y&UEmHn3=qzM^ofv3wEhvKqXh3F!TU$Z
z3`oNUlAPddNR@<~_K?|-{t<NA2;65!Jrx|^ay!US0Hm)3Z=u1Lo<bP#Y6#ANWUKmp
zpFs%RO9Cx#*#Euhzz+y&KKK)YAOm+zhyFkY?%Ixl2JRY<BI={2)1ZTcnofZiEu8+}
z07i{x{x_ZhttAF6W;p)8{@DNKqaf6F6f|1|>3u+oeMn6KCtJWXM<DVLsDA`qM~AwK
zyAxJxK+Qp_iJ&X)Al)L!8ayx?QWHVQ&O@N8rwhDy3A%b5+|Oz`^1lVL`3QE-1El7H
z+yK~h3{<*KJr62rXI%I{`!eW|(vE%qE7$zbnEgMZ?Z0FGJLBk2%HEHJ%%5=T-DXmH
z%B=j1L**lv+Gj46A3SQm1hoGP>-`tg|1V+qU&`pef(59Gk+c3UZ3F7Ih?#(TMxy$l
zMF{*l|G72(^J)DT)cr4L@L$pbv}{}4^1p=ke-7#Y3_@QSMSpN=f_g@L#{YSZ{&SoD
z7qR;<Z~LD|>pzFsE(V5K3=F0WtX2$sWkSl`LQ1zJH6L=QeiPCMEdm!d|1ahAU)}$|
zcG7>N%>Rak|4mE(>o@(kpY-2s+kelK|J|;DP{^hKVORdAUHPAS`G3~M|Cty5mz@1y
za1z`UISM)*eFFFt)*079=MF7@@_*T55M$mAP$v;GN;&oX|0(A{X!6<r6HfjI-4=1;
zU&oPO$fs&U?s|aKED#b>p+MM>#Sf764{$b!0Uqvy)I<=n=kT9y@FjYkhkmz%QQN^^
za0FS>0O=*c7tx{393ijehOm)qCFBL(jeEX=>LB>Cy4_zOxeT8B;9VgI1KyH^R6`K5
zax0dhlFBV_D>lD@Zgt!Y8h(LH4ZvoC-juF`T`IH@bie}SR3Fgc2^*hQY<}9X`)$kK
z_pSRr)NOxWx#>~C;)@a8JH1Ng8Ai7%xa4x^g!0O}GVmCIjynJizDa=RfJDF(H<0Dl
zkUk4!<PB1-Kxz|sO$4c8AS9%6;Q%j1K+^)@@`KkZK+F_mU=U$omS^BlVGuUr()18C
zi_s1!bIzF=-mo@x`l*s-H**(X&zyfbeg5T)1y|A*TuGgOId%Tk<hhsQXJ1U1b18Pt
z#n?F)6K7wHpLsrc_Qk}R=M!d}i=BEJg3xLuNZph$?G(H+f-{gOlHk=4660p((wpSf
zM>#8?19!OUqx{tmA(auf{t@>22wpM48IZcEaKqEW4NoDJ5rl+PNyya@G8<A2m2G+f
zVL-^rt*^_rzAD@L3b`hNu!*UUAZZ&yLTVofiM2ig?TrPu*&sOuUJbz+kbOy@%4px`
zdQ$46=EHyM4*jb?j8Y%9fY1K{RZOtT2!3-Pr~`EZdwm42gWwEMH3X}VKvPJN3I$Rb
zf!8EJIx-a2N1%<ghd~=@yTH{Dyh>_11e(BVKMdLr+kG4~*3@_E|AaH3F(}9Zag)x0
zdO)??|K~6NA3ODfSHlz2?DL8tM}%Cra$D|TG1|zWzK==kAfwu4PQBZFdLIQ0K8qUu
z6F2@ZZuno?6f`0yWBy;p^uN5te?`mxDmMQWZ2!xHD<ff(|Dxa_HDTTVd>Y_Rkjg&}
zm0w(He|WV2i<^P!BT@7J!fO8+g#I&dzF`pl#;pxnQzvNlpWo!afYpCV=l{x%|G6~&
zGxBd{V3^LpV9dZ^!oZNkz*)$^x|f0H2m}8Ec9pMOdjEyZ|4TUhSMm9;8uMQ#`M*}y
zf8E^w%4Pozn*XaU`|o)0zw^caE|>lXT>znkbN`dUl~K;+|K;E+so~iF>VyBA_Wx}?
z^uPJg*S4cS`cD6ydg=fCJO7tF{J-qM|3!EHFTDAG{&i5rGy~jGg4ajg$NzR6`vbYz
z0bVP09r*>VeZWTmLC&LtEO-D_J>d0ta0YTustf7<i0(tbyTOO1cEGA3(9#7Mn_zv^
zybpZv0xF4IA3=5-HSPY=4BK6fTqVJ?8>C8tWK7Wb`A*OR26%l`v*R74GJ<TDhva=^
z60)%oULQd!qq0peVCVUO)@eg76@skO1|2K65i|!>wdF<SmS+{4pXMyR9?-elD0{Ya
zNDHS;7K3UqgNPjin*jr(7I+_!GPn~2SqLY_z#s<R^#dQ2gVZCC+J_aq+6_|mpw&u{
zE)rUO1lI-W3NeD0F>o+2iE#?4amg6U8Mx}WCOSoz`{nkeG%w2N-CQ>NV9nA?m5a_7
zF1}W{_*&M2D_IM!q|LjO241KDUaoKjQXj?7231DUv(CrQ1R+R$6hHlJ+_W<g6hHlR
z+_X~((@sHZqQvQ^A?)nMH_=cgxQhg@l^|;yKwT$jcL`J<kzOT%`bU`c5xinT8p;E$
z#DmvL5C*uH1geh;*FJ{VJGdCQ>Z9UK&mmP(>6VwcP|4;OkO~MF2~Ub7)kpBu4q>3w
zN050{$QS}r`>k%zH#n-_`yB~^cF5wdk6^u|KP`v;G#~ukeDHU}A@Fn(<Uj`S4M$C<
zK<Cjx?v1TK^S|LNXlw^GymJ=RU1|i^M-4~*HyrujbmV{I;r}gQ1h0l5gLUxvBgiZg
zgoM;kaFw7A5x6(hdiXyCwSfoRAk#+ehe4Z?I>5tsa4j8&5UaI8+QI5Oz&$L`8gKBZ
z9pvcIiDyB_kWM)VIty;<+5c0{fUd^tJ@UVC>;Jq(|HE4TJLZ4W341T)a#z&y6|dnV
zHno?`%Fmb;-?OQFVpsXgqxqjl<3F$Fe;(ccf=2%Z^g$CtLWci^3_-(XlHhGYpha%x
z|K)7|tJ;F<BgiP6m>GESo5_D6z5l{m|9MsZb1A=MlYh!CdzW4IE|2mLDZ~GgW*{V`
z^`AxLCj-wzHt{zCx}X^)F;h@&C1v(s&g#FC)qf6!*KC3{3=G+f?DC*<j~TQXSPdDN
zA{jX18Q5oXDR1S~`Y#RMj-=u7Uo-5#Zq$E+g#SiK|JBm}8y5Xnn)=^<(|_X&|4lCb
zw><yf;@p4dbN^k=|MxxvLWyVp$DaIOdia0w!T&Y8epm1O)v)7T)2<JVd)~Dj{5;|0
z@0pkWFTC-8+1>xE?t##fyZ`6k04?X3diMW>Q~&!;{Odjbw+DPtIJ`1~oM;NU@dUCu
z4!M5>seK?M%9@8mzuRDQLy!@<u7kfI{Ugl!2+}Qr57|LR>L6qbc+nlC-huRqAS6Tt
zGGYKBk^4uGx(T^Hssr!%11)I)XTZ9hpCEY?(t5<Kk07HZm0MrK7dw@0c>_nKn_ojv
z$tKX*K4lwUm23i`;*BpMTZBNngjPQ+Ui-Lc^Rv=zFB^8gtlab{sc)Bc`eX&?OeR%t
zMqy*np`)OacBH_aAo%#30C*q{GK0ebp4Nd^E0C%OJ}U&NRv_bY5Gi=A1gW;*Bqw;z
z2(oci8npJCS&M<!ghA4oSv^q7xyUMcl6CQdpswvP(~qajKcBJmdgihlsY|b=EV-Jo
z;7U5EPjoe7!L^J9*V5--O`m@?ZQhj>B$PboDrDLye#XVbndd<W+)Ij`ein|9>n2DI
z1tGH*-GCr?t%Oz|!7HYWCATs_>l~o<QRY$*1FfTkIj;m8vLmfN%3qCg#1Obv0^dux
zmT;{^us$l;`~p{f1Q$oEkKol1oB?Sy;Tf-kBy0!?N$PMCxjw4f`3X6jz}fK1sBX_U
zq{`?!@+=Z0-$4fJnhyMcR6|H4s3wBcNA=(;ss6}6(B{??|3T;4f%`|zkO8{0pjn2d
z^PnoJ33q+e1nvhxY7I!GgQq@%?tul5yg{lSNM+P|7&HR~s$3vbLa@39Vl1@wLFxpx
zL$0m>Z*=ND@xS-f|Nb-oCtm<f06}^{Q!he$KsihPhxWd3thlL{dQ8%H7rXTe2Ca<@
zYHJykb~C6SVAsCGrFUPz;Iov~UrBRN*G9zfzbIr`HFyyMq&^bR2USL(wF<_dDoM}~
zv?N=~5;R++ZV#%Dq`(tH($@c_t^Z4!f=1_fl>akH{$>!l#lUxkf%hx}|9J+{SNv+f
zMZi^(tnPmfiN6edk6FcD^J@JUHv!dpJlddokX7+No7_tVp6g7!6$}j7EL=(qY;p_?
zI$-3_z!1W~(9bHjfm`Fhgehoytg7pO^^pJC5&v~!{_Dm4S5En_k@;V;=fCy3|3>FQ
z^^x_(|5g|NyIug*N1mttyB_-=e)zxt?*B;}z9(<|QL*iN@y1V(J8`PEKCj#Px_QsL
zmc8%04}F<_?*GE8p!#Up{r}7EgZfHyu7H*|OgQzw2i!S=R7Q}R2tq;z)*v+tq&|Yw
zP>`AkLL%2k$m$_|AINM``+=Vw2Y<mUAT$P~_JPzy5E3E+seK?d5rl+@Kyn0x#8V&D
z?*eT?uiXt=Vh5^;c7Cee@ez_$kxBS621sQD88d-TF+(b&@~v;nx4tdg@&;ZXm4a_s
zh15rIH8s0FHSGFOzw>>`))y&DZ}_xsRf+B8wM=D@c4y!+U|>*#orC}xkrRWRL@Erf
zjQGG4I-u(qVD%9@QdbC4A3>@i$btk28!|lvmxV~NgJyy_#JPo)xTTCll&ytygA{EN
zb$p8L;#z~t7bdrF%$a(me8Ktr<yQ(;T+Lo~Eo<r3l!cem7GF(TbUA4eXml=p{^hiJ
zS5oIf$<(=*Q|DezMnXxmufXf0gc;`&W}Hi!b18B5MMz~7Gwn>wv@>Y+5o8cAcIs(J
zWrR#-EV=<7yTh!TAl)UT;XBYWJO~$aF&^YR2XK`Ht&i|@kswn@pd*IBo0Y(I5OgjH
z%qFKk!a8IJ=_Nr(cm;&UDBB8M3J$55;MGw1w%3q42tq>kBUJ2sR{>rK3gJS?>Rs=V
z5u^@+kX5@sz!7A<H@q5xGmz^eWH!7$!qY`UUUAoQ=ua#71fj;me<AfzGh{~+_}uR%
z@KLLvZqa#YmDF<nf799jjVJ!s9sOT_7|}n1^nDr+Ko``3>ZU{gTaZu_czqpciQWGH
zEeD_oGM5BdBi9PPgaT6UKxTy?b3Kr?bC8+{LUtVf54uR=IOyhu38(%~1mD>`^Wy&n
z*Z$AF{D1oC|C5jZ@7VsoXyO0Z&j0Sk@AX4pOWI#$)wsr>c%DJ(HiP&L2C>JC@^9Fc
ze{!k)5!C)KX$&26Q?U3iXZByl7=*+P|BLGX7t#MOYVcnGjD(H<OIm`q{0JEQ7u5VO
zpz)ty@4tjOXjDzZ?Z1Y@e`V|cG8Q1DVErF7j%V<nU->_)<Rb>|D-2xg7+6;_ur6ib
zSivB4mQVG8q|tvxtN+q^;Q1rL2MmG_*yUgFtAF5E{>m=>mR<BH1Mgl&fkp<dY8DY!
z20nWRP89|gc{X-YHg-`47D)zHDF%jOF2yMVTL0uMKr>D1uKzUy{_BPQ*AM$|5dL31
z^1oKhfAO0C)(iftp8KzL`M=(^|GHN}Nc-}CqYM9aPyaVP@?U$`f3sEJ)n~tmn)4uI
z)w|*~&r8<6C|UEMeEs9{O;4(~y{Oy%vSH`z_I)2F9RE4>-2b`P{x7@tfAQ`Av#x-;
zNWI7ZLI&&F4}OQ#L=dtKT!}#HBk0Z`@K9Sb^1cHkHl#j+=z^$+?{9^#>qe`L@br%$
zyI&zS5rk|8pI-;5i6E;_A@i;93<YOE`b4$hb*+fXXvarTl?1MpKs#Q+TU@KRzJ~8?
zg`BHUwfRjIc($l=%iGee?~69Q$y@g<Z{4%p_0MzHKQG?+vUnrt5~9k@FKf2EYTWt0
zbI-?)-5>gPziZv}v~KB@tceH0tCkyuHc1*qf{r9*Q3bDy1KmRh9)*J(ae#aa1EgET
z1+Iozz)P|rn}gsar0Rjp9I=7dFhJJSLC#EooMkEs9_5o|;L~MLauc?SQwS<I%jk8l
zSQyf}F1l||%JidY^DpKsy<V{9LGk*>d8-~|ExVJq<W|n28=3R3X3ht7g&_T)^!Zm)
z=3Pmhe>D}%NSbpgY0jnOxtAe~q&b(MbrWPY-K@(A(=WtNKN~;&EU1eFnm>XrW{97D
zI%e9bm}#e?r=CPZ$Tbm!jlVvEbeF&#B}7dGsgE!h<7I<xJcpbwbQjFNi+t%h8XGcp
zM@D^wIfz%h9yECbnL>h)kSQeikR7Blf{<u6QR$YKkU9v`9D=an^%0~Rf{>7UrwUvd
zVb(`gJKqsgA3^d3bj=Tr@j7^YRKNE-besY8TsyQb5~MzYEVqN#M@>im)kDx>@PT#U
zJNuf?g6=bFJomr(0thvo|KD={Kj@0{lc4#d2H1FA{lWi@2T%~GdIDEK@G1#jAE8x7
zpepI$|2F7WS<ou(mV=-r+@MMdJdOuaaOi*2e$eW1$Vm?Ur~Xd_&jHQ20-Dj7bp>=b
zLfgLoWo!P&PyOIhaYrlekht3x4)di9Ix84d7BEPzXOQ2@pm2^!^)jc%O9A~4!iN7O
z%|W|LB~1P+TK`vb_^)F7U)ct{Ai?CnnC^ceP4J|T+J7GP|9qPNxwJr4lDPSQIlKQ-
zR{xom|1*e#5WCuc1^fRdLH`Z?|7$schU^q<{wv!4SG4&rY4TrK9kft^f%6gr!!ZVi
zMGOoJ85kBYuq<KVJ;9}TM^q0~AITd0=TrR8AozfR^$G*mWd`1B3|w~@n65A|tYu(W
z!N6PsLIREqeD*B78Vu}8tZX8zY$BkOTNuR{m`Zt+rwMBRm$&?{X#Zc`^}m|$f33j(
zx*?z{Nj3bxYWRPF(*I_&|0|#QuX7bqAL(BGZ*mD#C0U*PZ*ly;!<PRFQyyB?9PsJ8
zls5lX;i{))>mGsbn+D%9P!76i;A!=yC-vK2H12xSyyso_;qRcq&vVfF2sB%C2zuo=
z;uvl4`Zq|Q1ya92Y7<EJ9I~PY!iMzR;gw4Z_yTdrsdOD+cI&?HkX{cm2{8>K0<i@`
zLTVyN?E~)twZcwgfV9?X_kOM23q6q-QVk*JFGzh<v+GmME^xPK$H$uOAK(Zw?g2S$
zp>7B0&;xj_RI?3K8C8K#W-i_EvS`yAFnU$I`E}vu*CpHD)$IDzxc__m!S4;bKGbb{
zQ@r|N`mFPQjoYl!XDRtN@tLMDC<ZV{x-oE?F)(U?`$Tf!xgl|IeE~TL4no3@M}Um5
zae!xbFzX{Ga6JVXzJvFJ*uiVF1wa)XiwwK4wt$kOnnk#=U#@>vXH4_*{As%i79Fo#
zeW7;U^}6*ps@C4FT6?=>?fsIqpox;~<)BhMW67=D#W!;n-N;&SEekd%2dR?a^%0~)
zlsFr-VgXL3%(;{@=MrRfcjAnT@zc&j`a!W%PsdC-6*mopVyB*rg;hgQQ%*oo#H3>o
zl(yg+8p>F3Eo;$@3@}PtbR&K7O=JWawS%m|gI7av23i*hTnCX>A3@fK!)qm^fjdzD
z2)qFAQ6X&94$eTEOCqU0!mO1bbr7UJf{>67JMx?n=zt)MJ`uczf-@i;BKY<?2p7^h
zg0La=5qz0<-JUNsQ1q>44?3#d`yC<+$#IZ&VEz8@4X`D4$ZNkL^$}!KQsd#j2z2OQ
z%Tdq;NgZeYx1Rgoe&K%$7&V;-RY}bk|2LioZASuaN;>wx7JNWZ;}OuZfu=*KRT8*9
zf>uKZLH#C3-PCsQf6IPQ{nE7mfAc;NyJg>h5Q21?AcKD38meR8|L%kTdk%vx?}psb
zFyZ9?sptMrKLa{Ju4DWEisk>)CjR%W_+c3JUfSg*yY6)cg;NZ|XBqfUGVoks5WdYI
z`J7SlJ-6n6eqB&EN6hfQq!DPGO~T~Av^i)&f}-_*RT~gewgJ@+k|v-P+L8vKDoIf5
zKcD)4E|nh~O5b?Z|B4&_m$Ue<Z2Mo_;6H=NHwLa(41DkSHUDe5{I?DNZx-<1!0o@G
zJE*S&sv2znOBwv<l6}p<dYOT7Ed#?!28KQchF%8d9tO642JTI4^2hje|H;{c&g*8^
z{LdhKj)8Fx1M4&f<|z!!-3$zE3`}955jieHW<FDPVNG^nO=f;|1|Ajg+<^oGgAD_N
z0|WbVF6F}lx}ZgUYL5RkUH+?k{MYvRZ{YLa$nU=z7;zN*H<<BX>imC|oB!2r{#U*6
zU+u<!)$9K?ul?7!`d{_@fAx$1^^g6R+W23s?ig?CTIZUrVf`m^=AExvbGLZSz2Y^X
zQ=lQmQQ7*3kn3`a*FUY>^#MLc2kIFe_}0GvYby-39ry;R7a(;+>%Q;sm2XW4el#Mj
z`GmKW;3`3t2)F`*tm1}T>;<V`pzQr$K@4zz2Xea#L?yf*1nKxdv^2ub9EP;hAT2jY
z9)Xb6dp=h0fu0fxJqu{}=bBxgs&{^@-ubb1$LE^upCG7e>$|G0?<%*vt=jqyr9RsH
zvU>B&+O4naw!Nv@@(MKIx#d;Owl}pq-#737T7Tec<Du^z2R}FOc~`&jQSST;v7OtU
z^XHmHwn^D$@o9xHNVza@KsKn#gEy#3f`{fHD-+<g5B#(Q$T>XlY6!w$2iH3+;7K3k
z<9ay3%i*}e%k9J&SkxE<j2L8{`1FEhoieoiOO0dOT=J#|G^|UTb|iDbg`$-=OV&NA
z*z~Mm%_C6bf937GmA7+O+{#&gGi&Mf%q7>-AqZUHLo1{CH!|m4hoFpkS2E^ZNt}Bz
zaqh+BxffIBT}qmBArZ8t=wj;J%b;p#_QkY0mr`e6OrCi@X~wyv8RrtFor#@#I%?91
zm`TT?CmxNNbS!SliMXl9<E9>u0i&qNN24YmjhTEndeWiDNrxgP9z?5*(!jG+pt&kS
z^-&gdItf&t!s{al1G!d$tpF#se?&}uRImoUN+P~Kf_Ic43`k{!Iimz0uftUz!D}33
z2BbrTECQ)?h^dd@6;t)@&)}@|t$O!YY*iAd?MG^T1lkyT<X_9N{~&bqe+$yN4eb~H
zw_gS!NEZoGA2po$UwiU@{qg^G$No1S0U^jrHc+<+T=g^^{NK1Agc|mNNYL=zVbJhh
z>tSf$2RgyDA5<T;9r)jN;D75uFggV42SG;Urk?&k`{MuEmq2LRdC-dPvi1MtXMFIg
ze_)V$OwwZ;v(W+ul_d;v^B5%7GRSOXkUPPkdY)bLF~8w!0mGldCVxasKr7ZnjY0E4
z;zs|)jX(>tC5=EmBPlab&q&q&zq;LjWh>AEIaxDMWh7zzU(x_{K98`@e{Plk%(5RD
zB;T?rd=u3EFKZ6cBCPYDf&U`|^J50Vk75S@wO#)k`~KJS_^<E!-^c^hNl~=_uVD6{
zOW{8Q&n*UqlMD>Y85ou^F!nPrO<-W@1=U9!>lvjE@@W5&wfQe@`5#mt37utN*vY`q
z!@$r1-e*+9z!1d1;LpIO%fM;C#HYf<r^3jq$^f~_nL~j=Fpym)hFNBhgyBU|qyKW&
z|Fv8}NX;EoCFy$p*Y*6b<OM=3IsXl3{+GT0s*f~ogQ_HrTmLm~{nxqqU+emR&C8(r
z$o$-Y!xR5K7QA2xpD!8It&=*>tzu=uq{Hb8E)}f24Y`$|VC8KPd)2)X(4fuJ^35;n
zz}F}=?fKNa_jCLHugLXL>wakO19F=KWVHaKnt+U$VUm#A1VTcp5-1y~_5oEx5bl2H
zIjFR#k03px>YX2}c6@-<L?5cQet^_R5E61Ccje}n<r|+@ZF*4+9)D}z{juY~HxL5f
znclGHQ~BnXsmpE$Og!dLxmqJ}vY>MbgIYL)h!X>c83Usp1A{gLgF3h$q{6@eStbYR
z5J`bIuA-e62OpZ_0(WqbD<*F6KpZD{oi;?A6)eKfz$V2hq9LyBr0*PM9a<ieHzB5G
zVZns06>|?)%|BYU;AG?KE46EH*Q~i!vHpJ1%A46sZ)PpIku>j0+M*lT%b*BS8D%cH
zmbUn6+TyFp^RK3WyG2<Gu0d*|%z4){=3YymdpUjX<%BsG62QkSB+dq%w~z?h-wIl>
z09iqoI{PA|K1!T^Hg3wP=!wT8CmxTPaw>l6NeGIYaw2-tkr;3_1gVT7Cmo5Jco0$_
zg-<vDs(Rpc5SRh!qhXHSK^8nf#_J#?F{5_iE)ujdg0Rts>EK*YmA&c#oP<m#f!E!E
zI+Ea};Lt7-yjuikz%Sb^SpNi$AR~34DWvt#J+q}q=Qg0#N5z|-!w2pl{Ui9qC35Eo
z+MNd<>kV29iqu7dw74MYylTe>2p5w0v6Ao#2-dXw0ucdMM$ldoq~1X$tM`6~quTvH
z;i!K9&xQlP;HdH7Z^#;krbB-q8<E<M{cizZYYsl*60{=;a+FZpDbSEz=jHz$SHQK>
z<^T2P|2Limb&)^^*`4^`aP)sYxJqgSZ}w?{^_5x<{BH(VNsaqqdteWN&RYN-iEsqE
zlnyk|cNlbhbnp596V8DSrI~&SbScA}i~pyb{NK6#f6c1@S(E+;RlT)|f2ZVng-`1O
zgXDe&?qgtdlR@+`qvSU(rT>EJ|3$Sy$H9pi{TDU{AzlN}2|WBp|M?96^XY^3nM#|2
z7HNy?{}<B(F{DjF3$xW-{;S&kmo@t@YxG~v7_{I`+yr!nHjmnWPMOyXLJ!%b&az0H
zW|F)jp#Dfu_rJU~sH$QR{LH}em_hW5jLCl;7tlzZn%#d5$NxI6|25tIE86`R*Zj{Q
z@R@;gE2wA0)WX2j!ob|cz|zgY(#gQq&A_pkL2?VH8mJzTvG~uW_Md_OGy}sH28K!o
zhH?gmd<KRB(7JAhL<R<P(6%Es873|T22ME!ZUqKLUq-=LW{KSbswa3<KZ_gulQR9U
zZ1-Q+4TLn^|7*Jc*YNnS4kH=T{%cM9FLCL={LTN$cm6Bg{;vr}s<;2Ef~zOxYyXw6
z{8zm6-*nG^hWy<eR%ODjjoJaV7O`E<h07w__vFpFSiSOI(b8MR%WfC0zF)fjaq*_-
zWm{j>Z2wTd>vQ|T?`;RZL3&1z`U2F!*#qh;HShi03PF3mw!*F!YTWa^anJW=7;4@3
z6I$<p`#X@D2u6Z#g@CZ(M_|Dj&HKN$9Qf7@?nyySfLQ@wzYW<{1X=I{=^0h;`c||1
zd)1zARiLp2P$q)*k07UZgXfGOnXwi;Ojo!4Q{DDY;K~SEA3?@IKvPJ&J~ZzB*tF+U
z`~I(8hko=Q`rf(cW6S1eWlL{lPC4pdwc0YSTi&Hmz%Y(M&YOYXjDba)fkmBxNtJ;~
z5rmi}8CawkSR_EZct97@K~9b12lr(l_XNS`i6HCT;9NfN@!F7Ob>Krv8AQQ5UZokB
zl^9r67<lv;<Qy23Jw@$ObfQ~aOXi06?8=;XCV%Pe($x<kUEQJ;cM6x^&R=#Tf7y*(
zB$T)Gdfw9Oxl69)F1ePy<XX<s>)DI1=PbFAx#U{rl4}`@KnOBDlsq3)F{RGCo-+4Z
z%G_(obFL=OxtcWl3S_D%an3n7g6wjIkjXPaC(*(CM@cizB~Ck^F!fyA<TKF|P9{t~
z71etrw0nO<-=Wa{!{HN;hEF;YKIus0q$A-I4n_6tkM1YAK0;pMKumoE?<+wnBQz3T
z89^BE$_SMKxvm0qm;<iyI!Jw#zwR--GJ-H5RT5;y9h8KfBZOQZA<rv8DkIRIqs^dW
z?(kGbSnDH5Y6lHXgA+fbia{nJwGZ+D9YmyV&zJhWUy;V^K)DIICW5fxl@Xi)sgmmV
z|3t2mAoWrG!QYVj2vQTZ9fKda{J-uH=o(pQm2}~M*A-BG)N$p1%a#Almq8<Vjb}l7
zW*bj}Zli$INA=*%KA=&%gZ~>3f>tcF9Ri&Z)N}xJd=O|U0%RZ1QSd$8C;#`I0bR8-
z>B@fyntTy-!9>mWze#f+yH%glPS`B!yp&OYB7^J%2FV@<p@m?ygIWG4m-0P9t*0Wo
zzh%w-%bSBX{wSLNS1<z&!HFA#wv-AO{O8jB&!Z2jj0BAT3!8wDlnH2Gs-PyQK9V*C
zjl3y4{8x7PuVnpS&iuc;$$w>w|FTy9#f<;+fc6CaVBomUz;uy;a}NW@ZU*jSY_d0a
zHUG<5{g*cS&nWSWf%6520?29|m;aEJb!zsYDoND|RH<>v{AXZ(#=yKDJO|Xuz}yP0
zk661v^%2`b2C<DCs-XHv+8lH;1FSx3WMF7y;HYKbZDJLvWD+c6<8x(Tv;Y-ctkMi@
z(xCc?BSc6kPgLuyg6R!OBT!8$Zv`5u({o4EM`~`M`bgdVzna^BhOGZOGyY3m`LA#b
zOMRq%3)Ev$yAG<7Tu%QNobiOmu8PyTT;8Qn)+t}!w@Ev)&n{<SX!DMg>1Xm5Udvi?
zBWKyI{B@5@x4eYZN09yzsOo|BTp;yP^WM)OL89v;&_SlKSsq9o)C{^Z4|;1Jyc&Wt
zAax%^CDcZ6eFQmu4l*kQAt5ypgsj>99j!j9-S@e6A9xEnc-I~%e}M<<YPNp>RY_Yv
z)NK7wx#fM;RuH#g_m}4VU)v7-XgTn$6?_qG{m%Ers~^TrJ?&YuQ73+ih;tc(ULu2H
z5QBgt1Cu@jgC=;3kR}6@CU{Cn5p-q(s|slQDvK0&RtU0p6|$}Xvd>f)TqOyBkCKDj
z=>Q=iw*^5uN1)T;Kz%3<c?LmkHfd84T~9UpXd~Yo$CzrbjINlfIZ53cOXeOfUv|EB
z&DDyvx69YuDqMayf9Y*d6|>}8?&9mYi?8P_zM8Z6Y8Dt}FS(Wl=0XJF;u#CBWGuLn
zy5JHB&A$xULzFV_N)l*j?sD>+t0<Mx>?@F|qWD>7;0SX5Pr{5d$+OR=&OV<u=R(??
zOAwSa<6Kn#iKzY);XOx#+7AS^?g?t$<K4K`y>_Eh%?6*gUE!0C#LYYn>LN`(8aL@M
zxIO~82jVI$B)G<e)SH=$uY(cj00(&g2r``nAt8fx5E5Pu!5L^Z5rhk=hR{gV%IHBJ
z(m|}?g?10%-6A*xe|-e0l^`Uf8iJGX@jCcMHV6YUOb6d<0BJR$b&k;LBS@d9YS*V~
za7_fMUmz2#kQxWV2Aw_!UX1|ZA}fPPLFye$5~99#?{{<r?j<4DM~w%6L+T^Ywj=N<
z4UqF&kn5x7L;pYrFM#i)Xg~YE`{Mtut02^R`G4!>|BV;^*PjC&*8n=Y;W%imUBeO3
za=XT(;EvUC&<%YZC;xYz`rm!_fA9JKz2`uv=feN~%l{`_1fi)HK<D!`?f74~{D0!4
z|IQ`PGz0I5*c@k4Ji;Knje%_k1M^k}t}_fmR~W>evdX<>k^jM~0XlU@*bsChgQ)p`
z8QcGI_MnvmGUlLhHbEWG)=>_%|D5U|#Gwm5<-qvAfCXsDft2NcL5=^65`P#Z{_t!3
zSFro9@B81-=f9dGXb4Wp26Uo=jO~AUv;X{R|5?QzFtA@_U|j}YfiRVUVHyMLN@j^Y
z94f!$to|#SgU90dzVNDoCZ2ShK|L&GoBvAopxRB|`aie&e+JRppfwDvGZ+}BFt9d*
z5OWs;TQ37!I|F+M1N%}2iESL}pksw3&Hu9~{b%5Pz`$^sk@qN@&?yGd>kLwNSf!t_
zNIhkf*w4VahJneIfx()AK?1Y|iK&`TWv-C!KM7;d4HOb)AiwB%{WtXeujBDw!wocG
zsOt7#)$PBc$A2Zy{|r_CwO9R@y!l_|&VQM^Af$Tdzv`X;a$qEX`@aI1A$8-w&Xxb%
zoB!~8^)VQhDq5#1+onr9<jXl1DSMO~gtj>3&53N^nlbxA*~)tbtDlr^cv-vSBc#BG
z4#dHBl|n}6Q0gOatppKAQw-U}jV1`=euv+h2dReY4}6B8+I=69_kBYykAOG^vKj$G
zA~))4_I#@a4;|F*|5*h_)qB5I@BLb_?{nFnj}^N=l<#_9x$|QM=xFW_727}ubXV{E
z*tqL+^PaCA`+syF`aR|7p9u$lbnbXvv-)1qtTPb}n_N;S>H1ZR8%DD$dNYXGF|g=>
zmmkQ2NjdOjkSwSN#3I4KBmzQ=0t|2jx~dye^+1l5gI}TwS?vbd-zpF7@~|j_7~EP6
z?3xU``V4YT0><Iuj#-+)HP%^^0_vB?_3g@>d9+~Z<&u@xtJdAGUjLwQ&Ar?ew=$RB
zOkaFGb;-@tB{$O--A-S0J9F{%j78VdAZP*T*aXOo3}{IJX!r|M+JhHtgO1uxnRf}&
z8%mi6+Q*s*8jb@knFFnwgY<@GUx9XuU`x26_0i0;ap1#$5@()G22CNIPndBga^lg*
z{=<>|hogEAMfDyE>Dm|AzT2&Oi)+<px9TnKHCsLFxB52kbS++Co;u4YVX9ru!r;bj
zaXkm)CLE6KKNQowFR)=VmO2PrV?xCTL45>Y+W@JI3P4BOL3bfx)<^J)39YXLsf<8l
zci8$zplz_5UO<j(0PU>?_lyWuM)3LwGHq176Es6wwd+$Ac<ByuMFXiyaFNKhRXuoV
z5N2gm3#-D=E2F*NG5bgG`l#{nU&sySnDtT1;s5aZsO`-E_Ot)nFM;Z#wk!YJuKaJl
z1gesd_aZ@SBFHrsCqVU4_Zd(f)O-Ga|Hc24F8`l;?f=B9|JzReui5)QeZ>ct`m35L
zyZBvKGU!ZTknLv>Y-Zq`!XPx4L1H(f!eJiuo1zAfBn>~ynf;J41@!_z(>f;q#ZCW9
zfKN7%wFlKl5|*H4Y@!CBLwi80;57bosDX-iM*06t>i;=(|8pDv7q|bf;SSm~#31~g
zf%PQ=|7St1|7tG(jRXIi2mROg`mbsaLUQ(?Q{aR^OAG!nu%2gNILg2<pMha61H%*s
zhN%n;ix~vBGRl6IGW)M)^Iu5gKZE#RAuZ6{lZO3&HG9zVJSBV3NS%bqe+H@l4D4qa
z7!ETqO$F6QY%L6I;PE=PUT|g9!N9eGL3$^bCTQ1^tTo8ZtRl}Dcpr!=-jvaJAgKLT
zRR5!-{x3=WU$XkwIHeD<@&_?6*n$Rjn57w*sztOGiyDB|lZ%`Fm$LY;>iplp@4vA>
zs6GNMv2*#a;tH*gRDJ%lxBWNX1geZ=@BWttS4p_)BfV??r4Rp?3Z2QIkt1aiCvTZ7
zW}PK&lPzVRC*xQk=Tl{tGAXoUZ|#PskkbY*s}e|!0I7GH_kM2L^Qi?1VfI`g>XAuU
zO$3>K0*&lJL?E?K?Y@t2RJZpN2!RI;kWHxH^%=5AyM7mFSpt0I0W#TI4bBc#`+t`2
z`Bt&(Q~A!1CA;1i?|fIb^IgU64>fzf)bIb^eDHV6p+ArnfSreax9<C1v-wT>yz61@
z`)qQTDEK#VS>`aPhBF8_F)$l2Fz7NcYJ-s`1G6>*n;ICYg2v5Q6d9Nm7?{Ku7)8OA
zkuaz-ViIIv5(F_omv=*!D+q#?x-kedu*$LsYjet&39C7XXuC@q_^8-KtJ=n>J0<G4
zr)fARnFkd*B{T&WPEKxHRXlB1@!Vsz%P-Wfxl+IGTJ74KwQFyduenvc;#%?Y8^z0S
z<Sf6Lz3fK%;_GRPuB9x#k+S$k+QM6D3vWUC4k-(+q(Cd9E0FpKws#f2fe2I|ftMwK
z)^8`zy_^U><R>1yaTQeQfX_iqoOKyAMhC8p;%8h0p_!ob7UE}~jh}fIau{9Qv{O-&
zjs<t`cdy^<P`1jUY?XV}dXMT2P8Dlxi<es$th6ayWmCAyp>&;R{Z{+Z)rRp?WW4I7
zJ?qp$I!%(M+vO~@&0l1ZHP<Y4hGs-JwBA9kgOJ(S>Z45X0*v&<H#3&pKt_-qvTzc%
zRTgnJIHXU6OrrIS;9S%$5@fs%G>ZhQlAsewkm0+6HII<1AvhZ{WCtO!)<^Kp5qv!Z
ze9j21e+20nA)inOU$g+NjKHn0%AFq}2()wu+)IMz7c>T>4niZ5XO19j<cbMWogvpp
zkRdxreN?;uXB}+*sP4e8x&y!J5B`SKN3d%;p@Vf$?onuc)DGSr+kO^Q8TDNM-*F9u
zS}udCq-Jmzsqw`BhU1`(NT9PCj{dJd^uO*Ph@5ct|Fp}XlX*Ii{;yp9Kc?@wL+VWx
z&%OMb>ls95G4O6+;9keTb(lfqEVJ|zE~VE(n*SsX{tIaR7uE&M1PSYd?gkPt_|K#L
zpGy;T=9{PqXqS+p)qizcP^BPm{$JV{bO3^&F6jOR5rhAdhW|x%|MP2nXOn-&F872-
z`K5p^sN14w|DR3eKLg8s28LS<!XIS}K}ENb-+v=-F!K4Y<Mv<G5o9T=)JxF1Yu2R<
z4D%Qm+ZdSIK~q0$9SrO<83YzFNIelX11;(n*Z<Eb^<Plyzlt4bt+<lie<eFmt)yu6
zUtIS;gU~Mqj*SdVs~MOlfDlU`18YA6YY(V@#M#2Y*}}lJiB0JgpW%NQd(f685uN|c
z65kkvKk_U871M%^G#Z0W4HDM-&mjGmfnzS{Bvl3%1_oaSrga<&XSlUM`<}$D{!82a
zSM&UD5d2@u>%Xe&e--Ed%1)r+J8ifBf`0$yqW`lj{I7QSzw})YlDP9<;?94$yC5WW
z=RXMD{V#dvzu3M1Lbv`a-TW_h_P=ueHU{-PVciH3y-0DBXeskJ8Ou01>jW9cJPqF}
zv*c;1la99U`PzNpXXE~#^?ScJ?*0Kn;8`I^y#T3qApN8Ey`S5`ouigLpCIB85|XeX
z>Kk@{fpll!Tv$y9t&c$5y`LNRey#^~i$GOU&E5|+dq36eg<i!1=?6h-A_&>A^Haml
zPjx##f!1m709`j$zw0w-vI*SEsy_@Gj;=lUw|f8Y>I1)O4}q?y>^%8@;;H}rC;s;z
z{N1tZOU<f>sgq9yRjxOSovh$g!l@n0AnwG#ZUpWGX@I7ISkxI<R6vMLnSot}fkOd=
z*rgd*B|&{6Mo|Xn43QLw0lHd996ZY-23|fV3BKn}nStAsLB@^GFiyq2#5k(cBY$3K
z<NAn}P2p|ZBieU_b?l7oKaeo#X!^`Eg^RBhExBH{>Tdn!XO$Zsm9KkHw*Fz!>bp71
z?&L1No4xclX#Lot8=&o<i*9FwyF!o-5u_S|lPU9Wq|Cnose@AIUrhtA*3MXP4Z4#E
zyhbi#-lcTV;oH#dLXgd^Dc~)uiL)<4R%=7%iITwEiy+JApli8jUxX~-j+=HSqVGsZ
z*MXppeV+AO9m-Z&<}WtOoM)0g$0U7@aoTJsN}X?#w!kufsa?q`@2a)7IrDV`YPt2I
zg|#ANj1x7Tb4~p#4SmZsT=L{?l0}Uo!G}M9Dov~vQ`&+nkO~)x1nriEtpLBCzW6$%
zN`f;WT%=kFQ6FV3yMtUALD=v*2#tYMCDE!rLY_8)EM~w}A60IBjXY!rs-BQmGC=Ak
zNKFJ95P&Uafb^KKlJIFJIHPv==W5)j27J5;X!Lu}H^`zp$mj&5K?tdmu+~SAyZPY#
zBgBy1QScE!NB_4T1Kk4)I<etAsEgEm@jqm|t^vF;wibMFQ1zkzpyTfjgDxv?JNdus
z?El^ipgE)bH6I*nkIO_XVm9q#kS$|it771+We}XsAhm#9<+y<E1xdqK3Kkz_O#aH6
z{*yBNF9qI$Dh9rPLDUem<p;D8RUcF(L2hu6H3LoeXxRN%wf-+-0;-S1j6k;tffVcf
z=T-gBDEWgy^a+FTBL<-xj1u=CYX+n(|AWTF*d8)4-(!>hscZqNk90i#YdM2fAQ*Uo
z)@;jI{AX4C&%kq=f#D3OGGdqos*IRB7+BgE*gHY>5#Is^iATaF|5R;3N2jvL{^!^D
zuVe$Nk3buaY(b-TvS$BzmH#tve`a7^$H2IXfqe=C+hhjzi3}W*K(|G3^fPd_Fz~c9
z2<+g~x+G%$U(WHrnjOd&tWrN1gg)>n{F2ZCEyLi^2Mspzn}F7+vnl>(7GJ}_G@XGn
zhFvU^QSzv$?oC01|5BEq%1GAXzpBT7E#LpDuK$&tK_ioj4*xZrL5R=$zkbetiLL+j
zPJ=2VDR5<kTpvl^231DL^^xqA|5{~x8C0_QG=h26g9LO#M2#ZFj3XpWqeLvzWE}IP
zJ*v%9rpHY_(|zD)(}7=&`+tHequoCss}vx88A$)A39{~vP<;fci6A7TK0;!DYuNn_
zR2l90+PL>~!=6v@`lxo_M|gb%X~==Lje}27fOL*(w!W(aFSh{gW;*nv{qRrFhP4Zz
z>oX>t2km8OJqhZnx19oY<eN|Yui5i0Z{3TOxi@@T513^vlJst6GDv5T31;ARW?;2u
zU@>E0Hf3Nn0M$pVx(uwk3~ZVp#G%2!p#iFl*pxv>XRygJut|aHBWRTb**7E#>ezzn
zBQXXBDFzND22m4XO%DynG`sMc;G#Ksz1yqio@!ljwQcR4?oAInwmj<G`nYBDlZN#V
zs@LADUH_nQ^RtFcPixmd0-=ph%2(ejTyYDs4k2&(&75U-Ae9khiX?OK&1}^A2vRGh
zFS?z!@D`*pN|}EHG71MFA^V0PtF>Vys6K)pumGu#QeoR#Q)XXD0k2y~oP9BA&ZX$7
zr^EV=1a}_{>N*hAbs)I=U_i${pO#&ojoV$TH#$|UvnyF;nZML5XMsW5EThy}2FWuF
zQ)WVtaoTK?v;`JfOKgi*I#;fDFI%Y<*&}3{#319zDB;E`<HfD&FQ^^Lsp`+F=*=MI
z$RKC|uaA%!@VY4t*6G7jAEhq_)k?VPBgk|T=2ltEx(QMrWiLnBGYhGeAZzU)B)m_A
z%*b2&C~qx#FA35!f|Kwr5`=*^g;ccpMKSnXJJ8r2_zYFZ!iI`1po1D9^$~;wRYn*$
z!&Yp6Q?~70*|v9(`UrA&BxFVjYkdS4gw$e{J3m4O`Y=hfN~&`A*UH^rAuEU={Ufw0
z3DP;LJMgRK;P0A)u#@W`cOf152j7JRxs3wCtvU3s{wU}YinjCro6dn2*VUZ<Uv=_-
z-O2wo;K?J<S>MP1cbx+r)YE?Cf5D31&eiASeO57OcQWwgF|cPcurx5RHZt&R;7~Xs
zr1M?Y9CRWax6XeKFk;pD&!YXGMdv@O?tfOD|7_r;2=Mz-d9*>N#t9mLPIZ&B0Zo@E
zS%E4e&|(IQ|I%h4B&GkKTj4*0&;<sTLk#T88Ms$7aIIqz+Qy>z8gfh=m->GOp4SZQ
z&$v}U9T{ymP$xpo4zx5*&-uTq73iD<Ceg19?AsU^RxvPiGca_4R$sGBU|{QD;Ob%E
zoXQ|Dhe7<lpwTZSyZ_?G|5;@I^QnQB+o?H$hUt{-|0~#lt})`0{msDon1O8>=qLuR
z$qd|6Kzcc+f~q9;i40ub41E0zA{T}9-$>d0mvj2B;RM<S#VY-kf#(^6*b9F3pIrL?
zxxn=ipYeYI{r?>5{~2Z8G6+6m5WmbIf16YFr>Nn7VN=j?tWtLWrS1PKI{jC2294S&
zIe^A6W$peexqvRJU`qRMKKsA?MbL&L@jL&;@BA0N@n8HFbkt7b_J4`npq`P~z5jxD
z|MT7YFMi{{=(YbcMLQVeQw3DKgw%Y5G(1GKyu@@p#r3>|4Z@|&<3z18rR?)`B6`af
zUGLiet7-eE#@#;}c73nk^Q(T(ue#me>vlue&Vj3vZ;;+l<F3z*yFRz<{?ZI)fVjIq
zHh|Zsfey*p@ex#a?0nm}`+Xx=ylKxT2x{E@v2piD@Z!DCt@}SWgHhw&k9E61U7p&V
z?;Cb~XxR0kY1fC^t*`61zV6)rwd=sQz9YY;9RD}r@UN*S{`VjH*|hyb#oA{@%O4di
zeUQ2ES<IB1-u+koCfx{{cE^A6P2a8yR^@y3k`@ZNwK8a>F-V0m@VkQQAZ9ZLb}I%B
z69!IG1}-BI;?`#1)@I<;03mi2P&EYU6LBjsa4Imc%P=sBgOBkM2h}?a(x6!+7G(xb
z69yq$MwMVui*)_aPM55Cq4nFcr=BTYe4~Etqxy}n8#lkL-}1I*<E!#D&r4T5En4-U
zWX+?Jwa<!HKPg=GxM1a@f))4jmfeA%?4>s|7F~l>J&-+~pi34O!>(XhbUS0=tt>Et
zR6y`<5r{GWM(VulkU9u*nq1nvD{0^k5$KEs6vm~Dd6yt{Q~KP?5C*&hl|J`!(u{Md
z;Kd1%{fGTpc6d~8aI0MBT)Eb%VvS4r8mH1#wgpQqvKLq(A&bm;CaJT`(`K5d%`{D(
zVUjf6Flo9;>MY~b`Bpj0oJv=@R;+c&ou%Me#xCc`AYjbEug1Wy#wwt~!mrH4Db2tp
z!N@Gez#xoV4Z+!%^$}7>3B7-WwLZ#Nas$>yf^JZPFKmD_ApIju5>hciNJxDIUiA%H
zUk9m@FiA*#ghs*#?vUq_Ae9l?L{iD-7e!!Hf^oDRq&|X^kSeJXeDXZxlm<9SVSNOz
zn6NS+%Ot9Ie}>dYkSYnYK7w?VAY|44pVhE|yL#~QZ>(h9fnQYz{!}0QTYC&tCDokx
zUv=z%*-;RxJo3Nt2xzdb?JTIZnZ5q2Tk{2}@Ffho4GfYw4E(tad^rq!1q_144E){f
zGBYF$uBh5Rlr;V)X7HOw7gQxd>LV6ya5bd&pG^-`8F3l@7cu`YX8E7b;6H=>e+H3X
z3^M;kO+bx*H5br^Q+adHvN|Pe(5#J&Ip|0qUeJ<&w+sw>!S@8s1>erFh=FYtgXn!h
zy&sZh|Ah_ybBljt;(N=l0qPcMx&Bvm0QF5Y9RBM#{#P*l&m;ezf$bRs!y52*p<eLb
z)qVz!DWJ-TyBAca2+d=Vcpz%_PuU(?AMvPwmfNX1g3k0-um#meLVEuhMLsbw-Ul5D
z#J-$?cN$84#4(A1r<XxwDzm~}CHp@bKL1rcK~q;EdjA>4KQgf1XW+QSDES&P^TlWU
zpT`hXAMxt^7tsF0t@fQ)_ocA$dvW9cQf8oCw9>Z!A@z~09e7=bJ*cLZvi>h?_g~89
zzpVX##n%5JJ3w2F)NlTmxC^R}AaxLA774-@y9cV0_-;Y#BcaRx#d9|^$R_hBxbZ7{
z@~gV?tGaS4*>Ne_va5LWX$SEdCGr|23fUF=R<7$k^moe1|1JA|w;lM~eBfWx{=bm=
zsA>1N<~`pb^%1D*+4Z?$=cmS9pPP1mY6i1GgK6M;q<-gn5CYdnHQQd*?|cj5?gZ6J
zkfqd+`UpJq_py2J$A&#0>UO;YS4MB^c73P^S5;j{e)S*!+kgD;q!a(9p8nr=<X6kC
z4`nMK#7#WmQoc?rb_$<!4TDiOgI*?sNj|r82cu;jgKaZ|LmPu*JA-9CgINuORsn-b
zGJ|3QgM2iDY$StB5QBstgP;oopCbdWB?Ffks50U-WZ=|fU{_~gS3}GVvC4w3`DT#@
zS3^Reb#ZL645B*h3O3>fJ{qnm4zV?Uxiiw+HWp1iQoG=M+nNVGo1S%Vebc-BUE7wI
z%^ROpuYF#*`f16^M}^Do=Pv;vXuY%KZZ-_%F1-yA%w2jLvOfrU4KwH<>%})ArxRo>
zyaht@Z)Aa|ia>P`a-V44bx3^#sf<#<l@W4%ls@-z+MG-2b1!Gk2cgv27ZawPNuGHg
zBA7n+a`Mdc3DeHRPdy#leK4qFPjKg6x0($Wd5bKv7no<wH_Zki^Q`%h;W&fj8TtuR
z&C=)C7c6xsTxOHI$Si%1Mfxm@^jYSqAY_s{%Pf74Rn9Wo{FQE%>pZGA*rZP4HA@Cn
zMywhPY_cH4DhZl^1FeULEHC7N*Fne(to0GBuXHVA(Ty}Hf-U3#FXw<)OvrsD;`>L)
z6%(W~!aVE_vnGOwpw&bWE=l!K3HTaW$fzAw64G4)tpmq#V=UzUVt5w`(oq6&x4o;{
z@d3hxuJGCM0c)jGvFj5qR0$f&`&td|96_oiNEfMU&$o)b-yx_9ywDE5z6i3)sA}IY
zI06l)@Ba-Y5B#bI_n|-o?0bGR9sFB)@PFmO|7C~%mmd6Ia};zDQs-&V+PRK{{}X0D
zQwUtkpxMeGRscdGg$%+448m~?Lb0IE5pO<&<Y8{(yP#v=3_;b0DEPiqQRDw2djI(~
z|MKYk=hX#uf&}#c3+wzBH~6pW0zv}n{~6d{f))k{Ul-H|tw691{jcx-U(@oxs_B0@
z8_;aNvhjam<^SxWHyD_XGBEXn_mh@`ckwiVHv0%15z@XdV+lF|n?nk8vxB%kXvdJ6
z3;2K?yZ_ow|Ml$ui);L66Tij4dV+yv7HIVXQzvM;hjS9BHsS4M;Dyykl5b_qK<CuS
znu3<taVvt3xdUydwFNEGQ?&dqVep?*>OBMNEztZC$3_PJ8K5eOcQOOdBnIw&5aQUw
zAby%l=fAe^e~X0wIwAkH?Lb#fun0e6V7kh{xSxUV46F1LA&rkbhM@Y0Pyat3_ykuz
zUC?Zruraizmb3;{I&#*a1?qA(pu_8wZ9#*Fau%RNBpBTOi^u=xU-w_{?0?x?|K(u)
zAV}>aeg{-NLFSUg?)?|N`(NnBe}QZNC9nVIJ^f!ab{T_Y5|4rduc9Njq63eTBaebP
zlb9~ExH+$)qkv|Bh;9h4ZoImCWpu;->3e?9I{kmb!T$|AzSnL0TC?M4<KBO@dw$iz
z`bv#EzcuXm+OYEr7=h+=8g{>J*!{K%2{rF})36=1yB2)t!JGP>ZyR>J11*Hu{iSio
z$EIEHYPUiUKxo?WrgiuGzJuSU9RD-r_@BwgeosC0wSUk1?v0NdmffhCdnUYMxkYG)
zs#7ViN)Us%D+8YyXlxC1Eu9hryFG()G>3f`uS-9J^AraA2@G~U3>Iw+X3Y%dEez)M
z3}$tVh9wMIxeOX9po&SvlY!rbf!BtC(*o4T;WT03He}$?1MMLK83<WEC(ppF%D`vB
zpy0-!9w=&?t`^c@mop={X>-Q(6Gh8zR<D0lzwJfyt~d2yRJr+i$+|}cYaZmUz7Hw8
z3RXQRSoI)p#l3=64~kYjC|GeXciH`%rT4Oz+|5~f55mY@dK->%mfXr+3R)uvUdfDD
z&kWi7nZ4+C7Nkl7lWFsBAR|y^H1B!}_<T9Y7#*a`11HnxUWW8~vgTe&opCm9;<1qS
zJ%R0egFE+!b{`DwKIq%L-K$}Xd)0cE@-_Cw%j}Dn+2$`YO`m0wG21wOmTB5-lT;9C
znl{@qW439^bfc8%w)u-ZtJir}uX8S1YMnLLI%}?F#%$}%dDfZpEHdZW7c6xFBgd*W
zu5}yCliC=x{216IKnpyWc^Q~^7#KOg2g5Qmz(5-K_KLK5myqit$Q4VFTb5we5M=2f
zXtf5l>LW-;3Aq}AvvJi&xH?L?YaZsVc?hY7;3WR~2(60*pG$%>Fl!}vm4wWIA8iLY
z>9=(2+tRIX%eK9PFiN++h1XM{x(QY>RbZ=<a8*VXyFL+9A60-C7opWh6?=Zb>!YfD
zzo3;7c%HZVz%NLhRlDa$?VcZH`~H>f`&YdGfAw+DAwo^3|7WfFU{SS?$8{2edI^JA
z5rc3(gGe3-3FR~JW-;(ZGO!0RFsHJ}H*y%<6105Grw7^`Bw-Ai7U9wQ&mi}OLF^Tq
z!WS;B|2*J3z6JFEi|T>uBYC_3Vy56@c^K|7FzjIvy(wh@T195+3#v^F9R8cQfVu;6
zHvgry|1%2yXJ9$bz_1Hk8MT5dqe=#bW(MXC2F|_QDmNs}Ks$Z-6#lb{gQ^T|@aPuk
zR3E$lT8{tKE&lV${b%4g&%khifpI#hK4R@*;GDp~GZj>u@b@$D^)YZwV-TLtrtn44
z8dM)CTmR=*{m&r>x=TXE8gzV|toeUg)Boc7{{<AkF>u{y;MxpAg0mQeW-|!Q08N?k
zOkfb4${>1@SL>dH)qkUq|K{=kH3I%Cn*3*%`p>{~pMl{Ls6OI8$slr<L+&}N7N|br
z2VY_?Z1i6QyvbJ_+-;Pw{x1nekorj08dNny>LVF5&}LrV(Ek<<|K<1nH#q-a`UVJz
z-UZb=5K`hUqCSF%2;YR(M@$F)Gq_J+5Q>J>M;vmt+=>poiWZEbIt)Ta?9$fUD&BnR
ze(dUz%*vq*D#_9g^}coc+O~h6apC{Olm8p{{;S&dy>iF5s-53!c7Cnd`L$uk*ZS>W
z>UVs`S|2s;degM~ZS$@-&AZ;z?RW?76Md-J_6mCZ&enI3`lxyL`>sPjCZ7C1>BOH2
z$9}c%dS9~qPGrwPtK3Bzp&g=5rM$+e3`!vkVy+Av<_yec3{0jB%!UkXCZKb8xQsvx
zr1=aO1U(tm;<y}pM7*c5c+F<<n$6%omBDEOgJU0qeGiLW8?$XIn^`4;ULmMHl8a)M
z4`C4ZVc@d`)khr049t2AjOq-`nheZp3|zVlBF5Zmu8Q`F){)gdh0_z;*5pq=SiAIm
z(}p{p+aGuCc-gh{RnxYY^_!p7ZUHSnC|Y~JVD;U?)%OcmgO*+8uf7i%*nw0=c`NSa
zt+<!J{BHhoXf*_>i6C91oF%uAQ8s9F4pI|ANYG$i)}kBPi*G_k>L7Jc#==`^kO4YK
zw+Kw8%)JH~uY=b|SqrXZ%)0^~uS=bMK5_c#xQWLidk*^4Z*eSHX;-q!zH~K&v@cz0
zQ?$%7XCY`fE@!@V{v!LrC6?LqOfqI0rOkv?M#d>KjZ<ctrc5_Xm~5On!?AdoZ~Z3U
z`b{py%WSge+2+i*&6#hPv(PSQp>@te`+}uTd5c{OmpRw0bFNuun$W_a>IvTM%LT56
zSQ!`?!3b6xF)+aEBV<N8=!Of>B8ODam4BBZB&=dW?jONBN+1Sok;6?01F|UzdCx4Q
z0>VndYa(O@yjFrUAhC>0Li$Gp>m%eLyMpyk;G=e!wGw<@3BrK%n~=xxAeB+c)>m)@
z5rI@pkZP!8%bSudZy;>QI35}a=`NwwN|^OgC1`-|GlZ<(jov>h-}4Q$t`*ils@VG-
zQomH}|5*Xvf>E{qC!{{A*!K&LAZjWP{Hi?gt78Ap$^*Yj5Bx3J|EJ;r=yagTr~mgJ
z{a-lurA5jHZv6%Zp=1WWXa><F2B}mA$rJ|Bcn1DR2JTP>&R_->Uj`;`2JU4-dMCLI
z|Fi4;XV>}9XABxTV~~Bvz`uxrVG;wwBvzSqJZis$b^mkf{^!#B&!-3K2*_Ce7cu<L
zAo7fX;T!|Q9v<Zf@^+xbV=8w4m2Lm)di*!``!DbOU)1uykjgg(j#~^&GZ+~985l|#
z7%D)g957ZhuvRg!*D!Ex=2bZ>WdiCu@T!1LKo!#eujc$;6+G#qY71K4reOG=RqQVV
z$6nA02`m#qE8;kM8Myip^^xF22KJQ<lDqjeK?~}1+(45);s*cO6+jmo$$(FFkT3;R
zvVuDQ`Bi>02tH%rImp1ZpFwyX6bY?m;5)z|ev(D;m#EQyS)2drZXjRCTm2VT`OhNw
zi-GG31M^u1?%hm6hZw}pv&vp$QTZzXK2%)L5W2Qd%p9~LUCR2ul=XiR3lM_bwWMhN
zU&-RXq7`UYFN4E>HlP0t-TxIg{1?0QU-~*|R!IEDe}S7IBz)(;@SXo+cmIpt`7d-A
zge30#7r*^q^d^YZy!oGD%})lSd<FquK1EwTMO!vmb1p?|K3P*XQC&tM9Zo3|UPVWK
zWoKR`Up{3&1}R?#0cQq2PcgH?z_RUK>p#ps`G5M+|DF5(H*Nn}v-Nw;*6&r@zgF+~
zR<Yw##SZ9+a8*0sRPB6Ix8n_{-r4#ROunw(`nr0{+v+WE8+Uwe-SfTg@V|-2{!cmh
zd*-qK(~o?geBkrM?JwKb+$)-TD7b8~VL%nHP8frP69c;;1A`VAX)rKpF>vaD5SK0k
zw;t%IY5{!)2;$dd;MZgjuxC*XVYBVvcbmxIF$;t|XEC@<XK<a);4%%|UFv7F>t--*
zV$dvNP|RWwiDKaNXJB?=;PPhR_FxbTW>AXaG0s-<t+Py-3OVzwc<Hr<O^;i4zirw5
zwt45<hHbBEw!A7||FmS?lcF_`a#lY8qkB24K`3kG9Z0zZ?-S)NzY9T-HFEI!2vQAY
zFS(nw_)ga1JDH1aXM&d~WP^4OL5JqxH!Nf<x|X^4I;b)NAK(V+3xH1?O<8aabg<7n
z(8WWLc_PTN1mv11Yr(agMK?0$T}qm9CUN@dm`TS%JN5=NgXV)=%GWp)F0;s-XO_9Z
zJZqtO)_jZX1s2&0%rfU0r_Hv?U1VRd)UAAtbLk4JocWg7^UN|peIiKr$0B`}S?Uaf
z_(^7|Gh9oS2R3XDYTV*hy23thp?%JLhuj5rU}OU}#4&H7Tk$f_hE1+D>x^R>7?j<?
zryj9@t06{2Jp^MRS3__%*7_)Q{$+5L1U`QiI$j5=hG1iN=qIn@s+Dk!;6Z94v~>>f
znh3&x#5g9Iv-&~qnup-z@c?W82;NsBxV!;U4WW^kwNmkxmk?9}Tl|1rA3<s&NJoj7
znh3It0a719Y9CAztv;&U{k3fOS4fqFRx6e5{ayyIq`>_ncx6<v57Z|D^^6YwhSx`B
z2me(b0iDKBckF-W!e?4Z%NdmN7&t;0cw#|FFpfbWmVq;bfyJMJ*_VOQn}Nv(L^60V
z@JFyI?Uk{(%&rHTA!647&D1kUy=GuI3+~I*GcYtT2rPosN35El`iM*Gzn~$gJELIx
zpHJxvsMo-8RLbz33V89DvMp#l3UXGPl-++8jsFZnPr#;41lLF9(8`FV4pbkpRWopG
z<W@Q@VFap2_|^V1i2X;aj}$HcOKSaR5ctl(zK4NfD`+_!+hhjLK16-Q*9Yn!aj#`n
zIwENdT8XFc2_ogpLF?=IR6!@N3LAiGPhmaKN#Oh{Kba-oGKijH5I(^mxrIS`8-w&V
zM%f)q@)rek?~9v&)}PDUfp#9rTm9$P`p+f)pMmQ$1LtK1j!TS!2U$grLh2({)qjwx
zS;z>qkV67o8A(`zDkFG(BxMe|zfa!uzr5*xX%o;nv1~s7wF>^rulsMX|G)U<|B_ch
zOB6(}{pY^{LV~yc3*P!Kd<(rk62AUl`u=~_tN$4$-eXWsU=R)BQ?%ugw`P?wW0N)K
zmNH@!)nyUY=a4k!lCuMi<0*LaDf@9Mhq1^7GKdB+FxfINIB@9Zxs`0MUHxv-@&A)g
z{BPU;zi#`_@-3e#wtlYM`MF}pC+HfUoo_0(zpB~xs&2=d>a8!Ux4x|3`M!DY=eGSn
zIu8EoI{2r3-_N?u@A8-2&zX5Sy5o>l`Yg?`PGQSz7OfZtVFw0QBk+N5s-TnKm~<F8
zj2O5~7`Tl=Q#@R{pq>k~R?=kP(q!P)0wFFH25wabK3hnAB<L}j!EFWzdCp?+p2Ofd
z8^m^=!eG@68oM)XV$dsR&?#k5NMlgX6}G6<@a=QYUJ=u<H*>;?$^|!?*52#g@v?8<
zo8G;zJ9ocn+wrn~+l!jbPb=0vDqVfQ2t4ct>F;E%yqB~3LDtGUkopKjt^nPt1Ch;J
zaSvV@LDtAYCW+80qpZbuKvP2CB?_RiI9R0vsdvCvEr9m7!cT95h=Z;LpMNz8Tp7XV
ziBiG$5JBprv^kgJrkskIbRx3<XkhzppQdeI4O`r5HaM27v@cq2Td>qJXQ5g891zM}
zV4ODBBz+EKskU+2Y=h((7TF8DYc>Wo@9?Tw?@+YVI(LB;xZ`7!JrB|evPhq0m@wHa
zb%tB%ijd}QL5*9yD%Us_F0s#<Zx2>!n={`!XQ6H0V#mCN?j_5;8aKPut~ZNs2KSF3
zw+dpZj2IXgQs-WTAb4ejJaz}^86lI9g$*eSu3;j0#e{q+IA$*iULQdi7~Q2?kc~^=
zgIMpPEw_WzN|3Qx<XQ>RKSCqn6%+E(J9vGB)=R?QQNpZ`;G=nv&Jmo1*F+Eoa%BYX
z6IEd>2#2ithSW-!H4#h%w6d-YTn!=DMDQvJUMrRE`39+yK;)hu72wWM<^JDQ2mVwY
z_)~M>Pwm0K5O(GMzf}kRmG1jraqxfo>W})xI~g<z7&rqN1i~3aq8WsP7z6_t*qs^J
zoETUgK#0Yafz^$H&5MECpMklQQGOzm`WtS8uWaCjbL^V`Iko?@%B%scUgI%h;4xxg
z@#K(-7t(wpWBiU+2h<S|)C1i~2f9?q>A$r8f6&bf+)t(TK|5HL9YO2a<g7qT8WipS
z3mJgw7Y5#q;Ek(Q3=CD!kvi5o2G%+T_EHA65(bX-e5z-}3_<5G2y6Xk5dANp30jz~
z>;$S2RBS-&B1F~yGw^<3;Ml>yw26UpItX!3V&I+xS_mgN2{cm2IhBFu9Jl&QdAt9*
zKA^6dzSn;h@IiL`YM}ERg!Do65ueU~Zq5JPYX4c}f3nEFVU&8wDD#L#{wcfC3to+P
z+*+U|8IWF{x-GZ{(f-dN@t=wJ1q1sd2A;D_0v8!XPO?axWt6_lsr-~v0~8uUhM;jv
z8LR)GK{?C+a+aWNu%Mfjz_%=kS%3!a#4P>`8vkdo{m<d|pP}u)?27*ar~V6_`7eCs
zzsS}9!dL$bUHQ*{9fSn0{};Rt>J5qB{4WSb!gv0Q-2E?j<-gS3|7M5&Go-Es^*f|o
zI20UMWNcWZ&6%al_#{oZL=D(PbvY#r*rm+4WG$gc&V^Ueom0V&TPc7+!k2-=mVw=#
zMLF6$dPdEHyK|2JUvT<==kD(<+dfrpdRw{aZQb@yjk`bxUe)gfAHKTx^Q04hrk?&k
z`S`ECLti_$J}y~sF0gE|reC$NQ4Gi#0_F^Cx}a5R3{tSeb9lkm&xtUw$uO`hGq5Ux
zmKJboFz{$HaH)gthvHNL9XZRT#K5f#VsI-lKo-AoD>3leGpdF&*tT+eP6Sm$zVjHo
zW-&NTVzBRLu<K*6?PYM9#OOCiIANVi?oQpZLmq7xBBwvhUi7+j&DZAb|5|tcZ{Gc@
zY1hx1ZEwmqJ}+MX7=+e7E?V=baP`BYHIG1x;$S315K{hRt$GAPEAM5kyqC4&LDq@~
z@G>eJblVQN1Y33|2aG_whhW_zc+Utj@|Fhf^}tEU0tPS_wBrVHx&pLT0v)pe>J)&s
z7r_=Sq|OB$b_!W6mo(#CWdBjGhHdV(8(pi{+m)=aE?jC`yxcr@p;g{O`{HFT6{{Ue
zmYZkKGtZu9nKj=kdjY7U1l}}cm@-o@eu_oryuhaI5uN+|8aBI>t+dTwV3RlBK5xEV
z?mS4}$1HV*QQ{P{)EVw&D??kh2RChXDp_uwyTCGgo+WJ5&L($}ZQf#s{6%i1D<Jie
zX<QSmK7!oegr!P?R7R<DFCy1R@G1$pCW5omK%0L_u8)uh?jSW0gv3=Jfv=hc#U?yj
zvDQtHQ~~K3;i-?{RT6T2gjppakK;ku$Q>nkt%N*$hgLDchv^{o5n2}sQYArKk~==2
zRZNij2+~V}kg%EvGGs?leN+akm>{!ARR{i*?*3k~>qq5&(8{{(E#K9%H!^7EGl<4A
zNF{+PBcVtJp+FE4^kv|3V-WHMA^rdckx&Ma2nNYm2I*c=<E5N>pLq<wLFyx3{r?<V
z{~1J<fbLV^HUv!&GkY*_2QW)rl`?!IY66-i5jO#~`GpKYdyY6{-ht{Pz8A8Fp!Lj(
z_Mr7_^46f~dl{?$?5h76I6gD5tYBc61Fnp!L4$RSRiMg<wGLDnf$Jk7%?r|Ip#3;v
zpq;A!Ih8;|TZ#^#8^)EbLAUP-f_4nOW8mD$z_NvbdnN<#YzF>m47^hq_$Po6#}o#E
zIgGORrOkip`~5cy{cqy`U(fTuq6O$+RRQ(?{A&OCwEqj}{^!;H&!hEUKpWgk()iA&
z{!LKllZgITaibry7XKtIKsGAagH{R3nSc&}Wf%X?!2Oef?I8o}0|vhHEJByrr7!R)
zUFT7IA*}meKp)gC5;p_wGXhmcHlWH#-U>t_*GFO&|AkEci<<xE5B+aY_Fr!0f88Db
z1y6$zq&^b7{9oYGf4*yo`bhZte~}x|`Uuory7OP;I%p%3?8=V}R!t0o)(lcEpvnlT
zKH?P7XA#zB7t>>vFkzE4Ws@{zmonp!apF~U=Ti;lQ4VBR4C7D=XOInL;B^HZB_SH5
z=+luk_2Q%>|0f*!*L4_#dXE0@J@&ur1PDzy@vnXV*NSzIvKCzNYuaj%GF`|nhe6UE
zeCC@v_!2r9@QzVg@X|F&@D?612GGHwVhoJp44~8elo;5+$A_}3ftFsgt1z%Dfe>U3
zHk6bHt;FV0X5dq0;CF=7N1X2cpkX?<=?tDT8T{t4MXeIc+ND>2-hIZ4$YozrHvTWz
z`M>fI=q&n{L;qX%{BPL)zj^z=hAqFVHhnB#|E_fX)6(@%i`G3VTnoBhym;+n2r6Cs
zxNO~%vUN{N!IczfVrUiU@aL>mk1|#~098gv^%1<tf)sAqOK-#LBghyXyfVrJFX0At
zkrqKWv4ZO$P;~>kY5^n&nz%<k^*SAVL0;;7(4}~gkvd4v2y)j$!n8AfExVj6*IVW<
zHP2mWp1aUAd%j85Jd><>Mj3Og^B1{Ptah(n=Tx@RGIxPh-a@PF1=irX9_yTiW|{L0
zQ)cQXOts9K52=p=nzp)CtadD3;!w2MxoELd;UcHPB~FD)EHh@CBu|6%k0AAtOW8{M
z!X>u(i)`{1Lh39~WmLG#xoD|-*-G!GE$($2K>Z_4U+{Px1Nd%1to0G-wpoaeASA?(
zY4fh6gS`y)ICKgLQY8^nA0c;<z$ZC??mCC}l^_gAeFP^VaR=!fK}dMC!Wr<k2b@7_
zeS|r12dSYTwGSEzs*J#QK0<DOgxo<~0$zy+C*k!G<RU-hnIlM*gxN(xu8$x!6nwc|
z6}BaIrMo_r?fwiypkX>lFA3f=f~>Er01wlF`b1y`w3oE!N9peGn5cN?*V@DXJI;W1
z6}27y?^J)7$Elq`DV;$zmq9U;K{}a1B!)pKhCw)%K{1~}uZBUZmO;CYLAQa)xQ#)l
zmBFx!N$U*1`AsIh|11We;#yGezqH8@4#lTTLYWK<p^SVIjC>NHffP0c28L(`;e1x{
zRjlHxSY-BdDIH~&*~1{TiGgz-XdkH1T@?$^ayU7g|1wsf%1G7vzpU|pPQ^D2%;&&s
z>be-1%0Y;^f`P4?fvpmXSSuK~b_nU+k+B5L8Oa#@XA=9*CJWkit7r!r5|p<99rz%u
z_Md_GHE8~bc?SdU5(d7dpshyyvq2=^EC$xi3{ocqbwD+mMdW|0=>JB3|214dC*6ta
z{O8vMApvbrH6*C}Ur_fypAPttJzdZ#6B7EM1$c61pkr4-%d>4jXWvN~{1;UG&n*0(
zf%ho`=R*eG3k-Z08HBH}N#5dAcqX9!R!|ePE<)7gzoZ3dbWYmlzmzQq$=HDIK!@Cf
zE^i4Mx8$?>&u#vn!R0?+)PIJq|3XXt^X~gEa`ZpnDG=hh@SpF}f9`YtdCr3v0$2Wv
zT=_2w9>Ej4{$KFge<5&pN$~oAt=s>l&-|CI-oqdh%fPS6DPzVcW6daI%_3#RB4x!X
zWx_0?$0BOTB5KGWYRD*N#4e`8DWS(9Z7!(dAgSvssp~7C8Nj3J!zS;`BI^yBSK@a9
z9ZSydD(_Sk-?XcD^Sfz#zPGM=l-7MXu70aa>TK(vP7V7q5%p*mac>4z69xta1_oa6
zWj&B9dmwklaWXJ)!tPdpT_?xL4pPa)4?cuPjDcAiwBnjo7Q7u*hJhW-V3%j$kO%dQ
zxKtQ;)EIa)KuE})K`olWvW~&Ng~73hDP+D}!EVd$Ye7rjCT{tYf9QYdiT{<y|Cb;A
zUv=bv_0j(|`~R2i{9n59XW`nfWgC8!uK!-T?nCLi55?<Vg3$WMMe80xY9jcdWMIa_
ze9(#o(B2csJW<Z7N04q2sAshNLC$he|0sL;-OLpr1S!)Xl~LBRJDE#wLyA5K37OHz
zge>O<FPXzK=mhRJL3&16^KaxVgkIYURS8~A2RT0>8GLd=X!m}niZxcbiy+7{XQ2gn
zwKil;f_?EapN7rB?YrEn)>`K;vIbWlw)vorjBVaxi_Ce(;Qo<i*8HHB?a{plf?BqF
zRIRluT5Mam2!iYj7un`7u*jNgoI1leb%t~4ilCP55M_?V%WMmm*cL9aD_ROdh07d@
zSGX3h@UC3v)4bKIakFJ+4~IoG1B(~~11s_M5oF&kq*j6~WrtKl$Ry-6a%h-fjNL&7
z?jWI!t1`muE<xfBQWL>Q^z$6x)exM4TpvMtNszfD<h~NtfjhJn4p{q2#MDP<6;tu%
z*JxD|G4&Dh!iMslA0Y_RMXK2KiR}6aR3+{CQ3giEJHHlg|6H=`TlK-e9q0b{UHM;q
z@Ne9r2YN*t7;G9C6jB&OqZnioL6wnI5`$zSgH#f`aV@`fE4Ov0pkqImWe=xCH>+7E
zyG1v<<s?q)DeU?e`OR-K==|p}0j*)+)%wpW_lSY*6a!l<14AIFGU62nZC7WJ1093I
z5DMOf+Rwl+8H^@?wxcpMfqGILm({>ybjbCQu;zaTv4^1Bg&6x77&;l4E1(EoAF)<|
zY9*lq(&kSUY(e#rviW~*`TxvPpu1lsOhKn`OPheMLlRa4ZyMq|462g&mNM`!V-TDR
zMZ)tL#E**T-I2EfEk7^_{BIHpI=NKM33Nx2s1E3KBLQ%qNKgk<8A0kJAp_8T77|9F
z%1G7(G$bf*{$I@EKffWkTO|9Rf&VW9>t_b``wScp7z8dei(F@yy3MEfNJ#U&q`@~y
zL(pztNej?vtJ2_uUg7l-yh?)9N8INBh3)?f#r@Z<{Lef8zxc}koV)(>?f=hp41_q&
z{O3ITpYzOruCt)}i0{&Wp-caTFM|g0M6UklyZm3^Dri}q@Xh}!SN}82eac{42b#MV
z1RbFUsgGEtty!h5xui|mC5%|bj6n5~hyjDBA)BZ+o2WLcgbA0drI4DNgpQAxewesH
zxPWFjr&0i@J`(d}5b<K*a$sPzWZ-e((94vttK`y8XAtoLZ8c>uVqnk%uR~A+lNt=5
zn}7tsoge{l1;h?sUIiJnVPjxm2bWGv;9F2xLDd#BM6oafqa+A1OEa*@fICex;EG8O
zQ6I4>Ft8{vFv~OWIx%P`Ft~K`hs{>2IP5m#apLBGg@^x_90#GoBmZ*`{4dz|KY#E4
zqCNi$cKt8i@xOT6zsjwDOE&x{1Eb1KU&=RpD&6oJYkdT{a}BbV8?uHf54?&SUKwSr
zc#ys9J_s$p3#xiB>LW<Q8&VTNNJt5ZTpvM7PI$Qr;$qZCkR@{vHIQ@XAh&@h&O9I1
zcf_k<i%roocx7anvk+1x8KuoMOr7Cax+1uJS5)6Y@A^%k<=ll!>>;RNseQpxtLz0P
zsk4ldXISSf3~b&O*}X5Yc^ha7sbr~r@e=#uC3Z!N?F$##7cPPf%~@p4b*oq%)UrLa
zeV1qTI;WE5kX7OiCCfpmc!g8xDzA#Q0d<>wTekVNZg(k}F6EWWBBBP4aYk^Y!`3mt
zXh@$3jf7N2Y4bpfH^2+pk^4uWej=Xw2o#1$s~upQlAw$3AW@LM7<4UE){+|#l)V&m
zQB?NQn>ov%lSi=n=vMY}$n~<f!NYe@HbgLI#T_sL%^^anCrI4{AtCh<B;_FAorK&!
z%3J>gj?j8Wpy?#=tx2UDUY2fnS-265Ha#zbb&ep%fI~>gURvaiQt{^3#hYJ4`bQ;O
zphq{9K{g_Rk8yyk{f2jokQr6GK7qC=Ve2S?7k-0A?J9SDtK9XieE0Wq*ljNr-~|kj
z9#i4oZ-slm<?Z>JzxP}Ip6~g4zE|w|Rde89_lf^gF8-f#;(x)C$IjWS1uQBU1Vb5E
zJQ#R_8N^~4q!Jk9QWylH8ARe4oaQiuu4i!H!r;7t!DKOm#&ibF9tO2e2Bl60wO$7O
z`HYq;Idq@$8od%U0FA<NslH?oy2c>Xz`$I{z@-WrSAmT1bILNXOM`YCaLcj_Xmbkb
zGB7DIfaVLhz-dwt#OB<j>+%+~%@e$|Ox6xG;KnNVpF!XV1H)zpmKFx)X4n{AJp)@k
z1A93Lv9vI7buoxNkp?dzP<H`M8;KkI=T!L5tpqxMRm||em_BGaNl^Vim%?w5^El5k
z@b6*}*v%lcgF$2$gYYRv*((B?{}n7j(@#3?pvfa0@Bf-kpp|T*y8nf>p(Awy`k>8I
ze7gU6bwG$m`@fJ5=xz%U?f;@W|0Q+)i>UwSSN+c_{hwLn3j@y^2F^PS9Jd+xE-(mQ
zW)Qi{D*KE}>5GWYKVe-^MJ8?XU)BuNlTxyPc8DbG{)^i}Paub^^yahv&tv(YA>cn(
z=6{B%|5=v&XIS%}aovB0&Hov;|7Y0ypJCsBhQt4vkNsyp^`GVRe~xqiInVzWJpW(l
z!hf;L|HZHT=erI<QkVYAT=}oR<ST>Ud<H>#25w6RK?4>Eb50oxE?G-XX)7)nYaS_c
zZb>s%2_t4P113>@MiD(`F?AMk4OS5?W+4qGQ4LNReL-a_No^-t10NZEZ(bD#1`z`W
z0RskJT?RpO1}*~z$cdv2dZ4Zlvpxfp9s`R$1DhcOvleJs0;qE&54(H;(o14z0M$Y8
zT8RbRU}gi=L<~IOxg;Ub*d4P31G5Y>n>+)PG!v^F1FHfEu_=HWIjkz6i6=%i24)Qg
zZc_$nR|e|>#e}Jrtw%!_K8)Y=Bk$n<ic=s|a_oQc(f`HA{udnnUv%()@uB}E2mcrD
z`(Lu-U(vQdWn2CgZ~R@h;aB;_-zDommu~n{vf*R#`VXZWUKg!>QLye=!MbPpYoF$?
zeVV)WdCr<=d262Ku6~-o>S^A}CwVKLf)QvfckZ(Lxy$b7Eq?%E<bo@vtflwhD09hO
z2ufdiH*LusWCS_z2eP>pa^??=eJ5r9t+a(8GG+d)l=-(Z7F>fYqRRj;oXc5sBX7x#
z{G~SwmfgyjcPVkoN&lwpE~RU1^OicmkbUkFhrFeBIg24D`8bxW4C~w%)wkcLVWVT|
zGH@-k4Ahq?T54YmTIp?^Hd`-chEc|xfTpdH-TMMsw!2rab1q%xSiHokWT`{ZVz3ra
zC&(&qp+(Momx|Q^E!%_JcX`)uaw=QtP_o>qY^8nK3J@w?=}@-HqjH09-B#DuZQk8`
zJnNUsh175>*?<rFLtp6)sgclXA~+XPB|-W};A6?*RT8v5f>h9$BwBrxw&)tTN+PyC
z%3OLAg3ziYJoOQLUI|hYK}bk130@z;t0YqEqvCbXk?W&E@bTcFGle$2fK*0hn-FI^
zK>A9hTV6uyBg~Ex@?NB}ZSRn)AqX2Xg@kp~4%S`zQnBj`q&_O!^{s5zw@UCP*)rHV
zhqB#Y^LKyA+x0nP+xwhdpGyz@tvUI>^X&hY6aNd>y|Js_$?Mp_AQ8^M?#{sL3z`X%
zier#XVUSJ&RYpP)45A4pnOlq}yb>uq%@%c>!Eqgf-dqqen#y1_mBC;-gV9U|vt=wc
zYdCbD@|(O7Hv`p241AYBCnvC%GB6b|aI1mJ4bY&90t2Tk1BVO)yCfrz94ntDGmkoW
z)<7KG5@rCGJR%H?t5mF?$k>7guf!okSSFz3Od0qOgH~v>wlc7_F>o}35N8tuXA=Wg
z6{tR9ZDkOh%A@*O)$YH#3#fmj;RqU?<5mJyN&H&>Mf5=%odh)h^D6)2R{X;y|CmAK
z27~w^2FW8V3dcBAPxENp7dL(-Z}DH<;lH-qe;xP#TAu&aTtNruNg0C<ZWRVk8wu!w
zPEmkYM%-HeIW_)stNv$K{Kp~xpH=2RtHgf>p`Q$V-x#<)fsTM+f6l;l7qsC>_$q_M
zEjGF5{OTV>bpA;j|CccXT|p~v0kT5L0<?-7GCGG@9|_q0m+|~BTJm47_dn;#|6Hp;
zl@X{u+61nWcK&DB4X%<7{bxA#pYg<h*3<vl&;I8>3#yWYFZ>s|_@C?Yf8J~Vr7rzv
z*!Z6zY&ip8Jm{!uUTX$ET}Dw84rx%8#3^mXBWKGaWzHjI&MsxbDq+MbVZ<b=&n%|S
zET+!Dr^3Lcz`!BRz#|Vj+C^NCU&&5f%S}YvU0lzLN686vx(;aTr~(6%4yaP#w*zhG
z;k5*vz{6q8z+ntpuE3-T9;}lE)i0nc7$A25L3TkxdPb1Sh!tEVv4bZUgg{5fut+kn
zNHQ=<gUc=nFeweb%w39sQJR5Ui&NfC%s4{NyTCquVr1j$*f|%=w!W%52D;a{@(gHn
zuH@MN!owg`a0pZx<sbTAcmPx-A=gJGoBot+`ct;ySJ{SNCF?&IulrQI{sW{wDqi;z
zvp&jQ{S>V}DgbYHg;Ybh>LW-^l)2<?#$uG32vRF$EWQI0hn=*LJnwq)yz5DGuO-dB
zkuv{Q^1Pc6lrsNT)}kAbowGShZsjk%oweXv^31cL-TPgt)>#*V4)C$gTWnvj+%|8i
zQ^9fvFzHmV+^Jx>eeM!?eH7VqFsg6AZ{sGHij@u}plZmzcp0cWQvzCdXO=lnFJ*>#
z?n3{@EfHON16sCw*KcwuTkcrA#JO~tbLldNlBHm@+^%S;b^apP$~D35yTZHo`8RKK
zuU_X;vD%|%y>rzX=c+YM6>D6o)_GKJ@UGeH(z4a7YqxjfO5NmcDMN2i3^Fi-mwZEN
zAp{9IwH1?uR7|k?2-;DCaKWn$FDK7OL68&9QORrIx(RfKL+X<2sY|ZIJ4a~!qpT%2
zvSD2$NM)3{^d`JM%2<92jvzfFIGM8oI-rNyKf<h!AftA<>mKK>dkh(@gLjVdz_-ex
zb&-nKJuhDO9Mn<T^t=#?o)&I;T8Pw3f>c0|{t=uFS_cm9A7R!<$YXb9+dg1L6+1qa
zZTnEN?OoZ94`n+(K*sJMB)CfYQoi$Z#m=u4JHM7f(5^3~Fa%Lnx$|T7?$2#U{!Kjp
zf6AHv6&v3AHyn`k>SB<I0aZh+ZVZA!3{uey(lH<;70n=-z#x^#z!%HFAIDH~)cWv$
z=_CKSHvebn{mGDdmBH&UgWYxpo6QUk+Zi3VGCFQ$u-VCAvy(~h6NkwUUL(-G4-9;B
z7#P~P#H?5aO+j@Kt0V)b5F?i`Bc}u-rvwAL7zi<|fo>~fumnv#FxWGIZdlL;i?nK(
zACb2DFKzW-)a<{Q`F|P1|EyB~8TgJdFzy7MRth>7u8o1Kje)NYgt%%L*ee-WRx!vO
zf$XBy_W!Tq0Xl+P)bKx#>VIw(&=o~|TF~=axwZatY5wO@{l_BzgF*fSlj0{fRnTe*
zF#}NT02%AkaR*gNS`PnJtp3Xy{1?{(?b{X61#Ok$*9R?x<J0{ws0TtqI{$gq|MDn(
zVUu~sEb)v%_#uP9bq1cx4BQtOIL|TgoMjNa#2|QuQRF&<<b5{zr`)Q)Mf5-i-$|H&
zj_j7P2K9raZT^c|fsnZMe{t*oN>(5wYxQ5o@;{^Pe>SK849Wkw>i;t={m--UKgXv3
ztn2?XuLYxZ|C!hQXIlTCb<=;2ZU4D;{Ab(spMCd#mIMD;5B_I5@}K3{f1cC-1<wCh
zy8K_}?0>5T?-_y@F$h?LD+LJ#J~akjb!HJgHc3-f2@`fH6E0bEP8l-}X;ThqQ#MIs
zR!Kc(aUE7sb!K5zCLU=9RuKkf0R|?15Q9^KiC2+BSe;i&PfXcdLElx$&|O%`f`L~V
z-1iY>U{Pe?R0GvPpqo;`{UbIb24;N*Mjg<o9is|3t|9e{AOiy*c;Jo))=^?%U|<1H
zC-H;UGcZensvbsZu(K6FB)cX9pAmzqC!1lol5eq1?zG6R&1rMbmaM*Aw)Iur-cOZ>
zf7PG-UwPtx=@C#Jly~5N{{H`k2Vv;Y|3YwmRCwTj@&5mXdqLxMMO*)tZ2Sj8>;II1
z(AVO1Um&wcMQdL`>ZAO%PYc&PEm-{o(lg3k{WNFQll+y>z&#^S{|Hha<t={zL6C|G
z(m%>xc0YGH=zwm>Oho$9yATAahSHYYNdcpzC3g}R-%eV1H)-MBltp(E=ih{!w2(3X
zR?gykIg9V)EWT5)2!tw^-^rhQDSOJ{*tV?!rHfp%raLBd83t4-yBEki=g4|hn<Y<i
zE?nkRu++J5nRDSXm!g%f#j9M3mOJDvHcp#um@?C;bX8=}!RY=&eob3lDptExtad6}
z31L7M-C5-=G|ic3S+K~zaZ7mTo}jj!p0yiXDpop{E^{hd4o8lqD;!H#*cB~xt6Ce>
zwll12Z*coA-^MNO)$81=*SS@#b*ownVS84s_paIK(X`FCeYbDx2Ah)k%C0F4LWbbJ
zFdL#Cg0V1bBDe@>nhZ8%2dRnRBwA&ZJpVFkO$54qBV_^3`Y2<`Eyze6gamE)g4IVE
zu!HZg)<<wbc$EZM>Hx2gAPf)*-mQk*KO(z6DgY1b<!^YJzu_@S^%10(1fNlYFd&0?
z;ED-*eFUkbO18a&S4ohH2}XV@+wrM<$CvUQUrKg-0aZz`%BXbLmx^7V>JR?xI`zN%
z^#7_IpS(H_iTZXiD8?~xdo!@PfjUHDVGLs7AS4mVAQ=Th(lHF;aSVbH4BSzwK@07V
z|M$A|U;oB`<um{J*8OLg^`Eow6+_5b2B)2%D#>gWgZXL(tL+SSyCL-vkKun-t^c4!
zYYd(2!sbkTMxZm0StJ-Zg+PcyoPk3eR2eY|GO(z#2%8B=#tTTs^GW52D3tI^goE#)
ztCLXQB4!F&X(ns}t&f-`{xk3%1=UBKlNmTCGw^mY@O3c=HZlk_FmTs0@HH@q?iMt>
zuIl{XB<R1E-+v`H==pNoDxiI=EDHab<o+@${^HO84Ydgx{1?&%ArWKHs%#NskX?|o
zyH%b3Yq)|cBhbx#=Kp1k{!3{87t{PNq6<R&;7c(C4M0_ru)%+R@Qf0->TeFY4@?rT
zz?IQG2L5Z%`iT1igWx45;VUfSx47jVaw)&$*LWwY|6ke^bR?auC1{_loGs`~R2f@P
zWh83#U&022l&wMgc4aI<b6)Ju|79cp3%31Np7Ec3&3}%ypvs74-G7#K(E5lGGH|!%
zKf~JpjO+h1YykIww*6<=^PlnXfA*80%1GcmXypULq(=-clNp!-!F^I0&`LI5RR$h4
zMj;($F(XD1113>@RtY0ENn=n=1ZFUc>oAFFGYhLS2q-adN`O1C+zgC-pgVn7ML_o~
zuuFqFygUjF{HnY%Mv@wKQkwSSYIdNS4m1oa3SK*>%)q1z8n5Fp0UcP%q7UM-sDq9^
zV3G%4#~=(kF@^y$G{+6@6S081%xvH}1tHM71ra@N1uH%^HyM)<RksZ5m^zQbsY%`I
zOXeSKTyv>)+x^~s&zpC>YT5m|cF)I(?Qcr<eJ$DdHE-{)!UKPE_y5m3@W0>ytTH<I
zzv%G);v=BJx}t;si}w95*z>=1$A7f?sC2_ONPSei=3UX+7e#AdKt}2cRzHE)M>(sW
z<g9{^)IH5v_87X`XgR1pf>%bl%kF0_y_dc0KJrQicx40`vP)fjI~k0UmfT5Nawlc+
z{glP`Qy1S&TLL;wD0kWYg5{607u`*tcRhaE`RLx`{*Alsa~J8yPErr3mvkzWcF5&5
zif2>xXOQ<~kaK5{bLX|nHcpu6QnVaWAGsE<a4T8uR<hc;a2cdN(odT1RJtm>Yk$my
z!vQVZ-Ky3)m92!=N02JUp=7yb!6NITC4LQ?BfIy7cJA?O+Ui!d+O={OGIFU{?OM6U
zscfZt^}2wT?cv?~!n^nRHEs2%S&v*FdDN_j)JJX&TYTDf1$Aw8tzBUeRmq{{$;2ni
zz%0bTzya<TL$=3)>LhsWgULvpdpQ}5;MEYEku>iTT9t&YKDq+ArVu<{cO6VZN9{6D
z>mx|51R+tbt5|j;bJ-0@=O}Z<?aUPf=8ur~B|+9UVAe-DYac-nq@$F(?lGj71X*Q=
zys81-KSHjLz*Q1x774V}VbgPXp9ns$1nDakZF*I-=@q;_g3l#EDkB*AzH}>2RJ`>q
z8iMqdN_KuO+4;G6I|!BU_)@n0bLrOil{-JT?Elqw^8bvp|J(O`&6s@FFlHK?Rw@I(
z7Xzmo1CJ+zkUwY#x_AhKXb^)~AcJ@igLnuCNk%Zp$1zAJGYCX57}R*K{w@waDVpy*
zXgG-J@PDSm|M@rkXPEn6uJSvB|5*l;4GhNX8H{!?nD1rK`N?JkI*FQH^FIUM3<idJ
z4naM30bK?zNl+ccA<D=p#=s@Qz$whY;K;!0&A_)%L~W;-=06E-(DixpcA#TU8AM+(
zFq~izxeGbpTg(Ks?MTY_Kbt)GgdgT(4D1U)NN^&9;6%{IRlYt3-c=0JJ2+H+NSXat
zbOoJRs^JFOvLtW%Us&@$ufl&;i63lIkD0|Eu*=@(Rema{@l8zkx2QI#u8}tRFK+?b
z<)ml}I&D?c5p)>1h9l^D3ptbj5_<ndwLwT+_rI7SXvB}-;6FdOG7>TbEvo~q_tyBx
zrSzIz?lz0`4F<8348q44gbp!?9%T?c$sm53LHr7{%uP<kH-eg91a&}1I*1v9&L@|)
z0iS~kUPmWs_g~WPzo;z;K~^$I+WnWX{m<a=pTqY*L&<-^{{IZi|1+)n&#)eh)`99B
zP}Q^UKg;_646FY$tN?YL7*_meS`8Y<W7+thfBS!-UH|3w{FmJRUt`^0&i;F1(JL9W
zDj3)l!95Yk!dejqK4k_TWd<Ql1|b~=Asr?mH3k6{2x1jdVG~wm6;fjrQezR2XXcj!
zM;sTpnt%+cu`@96F|i6Wv5T{CN-}XuGjd2Va7wfCDey_C3rK0Q@k@eh7EW*>BM3Tu
zlu4O^QwvlvF+--7j6hRMjJlwcd6+c7Gl{a`zLhZO3L$<~0Zms)BR@&|1f8H_r@~p$
zU7Hgo9Zs8ZGIzzT@=Z@__Pwv#^R|B1o2sqPE4RI<-1eek`<sgGZwj}4DBkg@V9(c*
z13wE7{4F^Qs)^v0QNh7~g@-`(QPJW51&99UAN-%c?|=5L|3y3g7i{}mxam*9#^0qI
z{+6u&Q@Z|p@!GG&8{QSJdy}{3N#2?#kopM1E?E5xI&HM_325?YC8%P`TJZ?f^j&ce
zw5)D9Y&`>b@+fC1=%~89rT6kc=hQvOS^6Mj83?5=y`Q@De#)X-3G=QcEx3`g2(<7x
zYuTNG7555O+$&jeJ8#j|oY`lRCml}e+!0!{+A(daVOXoSSE;N`x~y5OkaiHisw<0>
z6}yBHldvx6N+woWP#b_#gF`REGQQKTc$rJ#66eBYuEi^yidQ)nt+X##YMZy%Bz+F3
zJ}O%o-n}no!r|cdU66{yrFyk<)hegT)ehw=A;_k5xlQSE&)N-<y$7Nv9SQ8%<=L>o
zwPuZ5)oS<ZH7?bwU8+~RRjqZcT;p1~#=Ux-U(?p$_Fa&XJ<r+=kZuv2^r+n6S+&u-
zW^+jE9>}nsea=)R{|bI1Uq%&s1|B7FM~NG>sFH!3fk6PX8iI@9sgH1Uh(O&UNF9Vm
zf-57?cpYS637+~W9ekk@bkq)9AEht5nX&v9uKEbMe?(z@gloJGtv<@%02;D`)JG5!
zGMWcxgQ_Hq`lxvGYqUxVPi0iP^*wTBgxpao-tnn;$ET7VU&?lUtK0v(_1OQub0E~R
z|5w87Yo=L?nJo$#1icxUY#7*`7zDf-gaa5v0zuV~Xb_AH0ByRL3<Gf`!WpC@L3@sP
zLzP-CRDmla;T!*1j{avm{-68IfBp;q4KMvyI{aU~=|4lnH3p}B45k|y^tUn?ZfDT?
z&SLPF-|#<++J6R~X`qu67^E3k6q$KtKvfc(FsMG_76lb+oc_{U1xhBzHLT7`>4FBt
zBrX1{yZyKD{jX;IUqIz8lh}P0<^RG)pc}e*v_R_+_|*TiOWkGQKF7eZ5QGFLG6+p#
z5T48+F@s5Audv}cDYO5oj-WGzKvj}6=!7?M-T$2Opvs6r_%nmZT?WBh3?kPUM6NMP
zJY$o6&8_%fKn--{f`I0K0UZz$(E%NHCk?&?P|5&Q8A<4YuG<sW1+m4Ap!Jae_&gt8
z9nj_@cGcgUim#ZY9y3c_XArv#t&c>GFo+*#l0MHYbAeUvHn++{VeM}c2EQeYL7Q8p
z%|Kg<AZK(-+kvh$!mN+PZ9w%APvC#8{Qn}8{>#nz&$tp)B{8f4En{F<1+If2A}hd^
z(JIiW9mmH192>z>i~lps{>RYwk)iG;L*@~NfcXsCRSc}b3=9t73W5vN?gU|B25tpV
zeFQq{Uz34PgMmi@)Y*Z|@o`Esfa(`n25xx<9%&F_7X)i(2KQN*z$6E#++Y%7WD{fL
zkYeGM1!-Xt0atna3@m&MjNIVjg#%3Tg6kR?&|n?29s`>x2(cJ2up2XQ>VuIc1G6%y
zCSnw4;8Eh0F|r8EE$&&-zxCSmz4s>Uf6>41dH=yreFr|a9{AL_=UwIQHznJiS8RS-
zy5UjjrYGfFo|SHSS-$Oc@s3ZWyS^0e{RXLziVprOK<*rYN9qa={wp~64^$a}>!Y0A
z{|mSO%ijvBj0!+28U7Tl{ROIwFzO>nWt6w(3A{eaS^fxuvQ|98tdAg5MOom=2(-ch
zTpwjGxu3n{e%g|IDU0qTEVz}l=uZ05yP);qs~+X9ew@GhVfx~m$+Itp_aF7H-(;P?
z$S84=QecyebCIxBCZ}F3gK7waqzi+f4TFd^1HT#Q9CB7w2GFuq$i6KBP-l}#kzF_3
zBCZ2oAAveb#jBi(S2+|e104;Ny}%@WjuW^(ik)~Qq+_>7&3aI^QUj@C*1A-$h0Oan
zRIYZYT<zPqC1%3mgy|<E`VV?HZE~+$=U%<Wy?Tu+SUpH1xITi6%X!vr@Tgf2>J1^)
zN6-qZVm)LEDX?j0IHbc>y~HMSl5tp}vRk5*rafqml1+-8Pl-oVQ%2PiUi)A&@YF{{
z_K!e!<{};a4X=!_)<=-D*5TuIpekt@sDA{hk08Ay$ao#nVBKxx`UtX;0dnRlq<;iq
zLwZS&6?fTd9zyy@Icpywcab3dBa|v>{qw^0&+|7tg><06<8@CVB&Y%cuegKkOM<Mv
zgR>#^4qE>Rtv&)zExjw*f`W>+yeo#)L<L*l7HoZ+zvWHA*0+$cyYihM!E3+2l<of7
zc;HXh$^TPN|8Lp#J$v?b+uW7XZcPl*;S6kc3`|xGoQ@2Fo($r?45Hqkb-AM63}Rjg
zB;o}Ul<)(Qk`WAIp$xpiEFOJk7ybvm0d1V*KKGyL<bVEa|D`X3CdAeD{b%U@!QgR*
z!E!Hy$tDKf)eM@O7__%AsC{D5`z2=hpI75Q1OGt=hP4a~X$%Z;44^R?Sq4@C5Mt0|
zU^igk-Kb=I9dh1|h#u%fP+5on8s4B|c{H8Dy(E$U3}SBt_5X_-fzH@rQU1@Q_@7hm
zGo#pR(2NY{DF%W441xz3M9#Cw-r!XMFSND>9k>eFf2v{ws#*9|{xgdG=MuliE^?bo
z^caWm5mupHO#Is!`3|!P9%B%>&m{JcLHHvG3I1Xh`_C-?pHucfhdk(zI)1hPBHEz!
z58?(OH;5bk7c&CgCo5n8Iw4lX@ISBCe@?aUEQ+5P<!&=b-(ZkF59$qxo?#F<%^-S?
zLHq)P)GaoJ2OLV@1hhcw=p>B(i<yA#UKh9gFJt##*73izJ?PRRX<Ja|NYd`VybTD6
z*!>r>`Oo0<pDq4BL+gLm>His)fze8Ey|Wxlt_F3BSl9h$TmN5l!+(+W|JB$2ms|2*
zYt{$h=9|(vTiC+qbGbA!=;tuV1TgSAGB9d_)}Db*ssvR+NEkd0#-#w7@!?Yj^_4g!
z7}!KX^#}_elw=lQU=s$7uX0L&qmK(boC2A8W`y;X*g-uYCSE2sK^9JN7EW<clY?E5
zfr*0ww0j70R3;O6wi=>^7t~i`RAgXOgx<T#smZ{p3GTZ|f@>~T@XQe}1FN*Mkw<vZ
zgua!>r=9pY>B!fb&2P%qJ}X`KvV6nq@(r&mH@&Ue{H|*AyUHzZE4P47Ew0-2sdV@E
zvOPbF_Wvr{|10;v|NKM$a}R=0-l6|_hhTl8L;v#*g19*c{^#ubpS9<I&bGgqn}6kQ
z{*}G)NA8ASxf^~JZunNP{%hfeclqny<gI&_yY^|$nkR4snLmQmLC7Se%*g?Fjxv|u
z&s=^VGA@^~_zt*kx}UKay2>tZ#l506pgm2XjZ4dJWzN5xJoN-<4O`7Br@T2fiG5oB
zH8M8IA|}y7x;`ArPMne!jDiNN0=kSmnhYFrpfOC)y$syo88^swEqL6rGB619>IT`y
zHn|opbtzl|9;{pGR=&=?V!eCCT9;DLjYXC@3tcK!hjr~um~uR<Yp-|xCeTDr?OM0m
zwV>`#?ONxWbxzf5ovPP5R;}@C+8W(|C~nfxsJ?@cfjCH|<5~l%lH97+!e^Zzqjr#r
z3DW0sty}{#8KS0qjcdhPuj)<y4co&y_l0$W){whZthOwgshQL%<&w&v>BXh#A!Zt)
z;GAv{T#i`{!A0Qp5$32JW_<)7zC#<hgLIVe_m3cp8IZ^8ptTaLK7v<CkopL9tsV3X
zhnyAnAaxUTxgB`^2!DM9ub6Vc*ZD!JBm@aw?Fp%j^433r)I<n!{qy|w&*61YA#B@h
z;l`J^>LbYf5x6D-owkZr8I^2#SGf6Y;pVr+Ti+IKc>}453bwv21WzQDY<pL}^JDG)
z?~TX)x1IjqeCS{5k_UDr8#(Q37(@dZ7;PDt?HTyJ86*OrNYocp-3WU!2!pqZh<Y->
z5kyeX9V`{hz~jvz8tp&%ro*lOPIvwbT>j5;{6FV~{{q+l%U%T`*17)~LJl%$uV+wS
z&7iV~L3J^M(rO0v4Gc=}SoMF48h~mLZk2273YQd(W^qb2fp*h_R#I^>F!C@ks4(-H
z3n(5{x46Tn2~sI-3OX)ZH{icX_<v1j(Cw=-hW{COe=#szWe|PMrwLlB&87LDRRwf%
zDUaeeKE+4ea`$;vE(@q%6V!StVfaSI_`foE+pN6pe|bC5Opvtke}47<ERz2j_<ykp
z-(nHG!6|Z-P4Ezl;7&%qZJ>isc@HsgUt{3C!NB*1QREY=#D6YXP-Vm^3tAw@sqkMw
z9a<lWf$KzZBhVE{qK2TO_JoW;%NRJ-{<A240v*jKeS<;r8iT|cP<<qJmO<<+gTzHf
z=_~At5BW4+2x@~?bW0e4R!T@&fKCCDvHLIQ@L$FOR3Aasazpw@@;0EmB!q1KOFIAO
zP5ZCW_@8Iqe}P4y3W#wvbj1SKmjCP<L6s51>i?|kz+-gN|1q?^Wyn3t5V3>7eky}v
zIfGy%1G_&1XqAWo1A`)H>K}5-0lch2Fa$wccsS%hgCx943?L)}nk3>BX5bKFU=v_q
z1}}bN;s*`vvx`CNBhZZi@ZF1$EkY2I6;vxR@G>w7Fmj2o@k_Gsi7;>pFt7@N`cMpP
z;PnZRc>zvv2TGcOK^DBEK?yXz#2^FiJqds}BEg*qnN1L8;M0-zt&8c~-@NN{*TG-)
zJ3g1JeNnXf8Kgd{-txX`>${rmAF6kMuHXN?a{up|L;s5Q|0+H7H}4>LDc#}!`G-NZ
zPd<2z4!*uF_uzj>eU!QTfBw$@nVWy*Z~c?K@kidq-}#&VlyCb}yy-{L#`k$^U+1lR
zmbdO1q%z80{Wu3SeD@48d6cv2N$%>W@G1#1UY8A?FiKx~H*@*@jAi#SmxDGUWi7o2
z8{&IZu=+{v^1G??uZB%H=G(s4u4IK~{ABsS79p2%HlqXvjW8zl5C$1f25~zEVJpyS
zu{_$Ko0u5n89)IAnaJY<*V-(wz=E%rW(0?!D34B{ZETZU@iMpKW$vY`yerqam4lYI
zc~x)ls9Xm+%DZrxYvr1tww+1SPC}M6c-L?8Y}f$m`_!#-uUqF*yWXXCJ)~ChuHO{g
zzALnIPe{jZ-^MMF%E+T;J-j}0ty}}ClAOv`Lb^h(m1~gakDz_1inXp4Ydxzr`qgjq
zYupypx+}DOS3uKN_lD(W`O^%ds+E27^!;)zBg$PfdV<Sl!D}B(23matsg;uFUQU^J
zh4}giUOj;qeuL(ZAiX3=?E@h*7hi`kvX<O{&mVypORr}xyOF-^CLDn}O-M`cz#Sz-
zeFU9bTmiam1k!FnCLt^Cve!J!M!lROAH3WSLc)7Y5C*6k0?Q&QDfrpo;NBBveFPDK
zoa6wiq&9;#A|X#CA=O8qS_w9n^cE}*>Mj*;ep?Kxm|hodehpfxx8-Hc_7Clceor|5
zzvuA3`VH^G+K;LQ^f9O;G4OgZusSiY+cWUEFbH@u2>UPyc`^ulF^G7A5TpWv)IN|t
z5wZwGT*8lm*^(h_oy)HOlGp#s-1^US;y?3A&{AQ(>;KiR{pa2LpCkD_gZW_wt*s22
zTNqRpFeuJtke$z<xR_Dp7QfL80j>YM>i_w)KuA>cznIp49<d1w45{Gah>3y0oPjT#
zTlJlY-ggdl(5f~e<Nped|1Bc^n}`3`a|cb|2&?~RV132FaD;*N9J}0CalQXS+Ms<y
z!rK3Zv_Qur@N0s&;s&5?qzV@Qm2E%>R2kWVhQdVj{|l=9=aBu+B>IYh{|STW5eAXN
zEaJPFM0c_Xf)E4$Ar{dS4AL*zRX_8og9Zb|4E~E5{O1NEF+I@IZfV{BvihKd9i)x^
z%Nv4_v@xj9C8i6ir-Ze?@u<9Km%q;_c@xw#l03^Gd744|B%|m>Cb3J*QujFIAM+{y
z7u5nC+8}1|U)1=&q&a8^PSWnbm;(q&Is6y51udl$xBf3;`CrWXKdTK0F@*o;D)`Sf
z^S|Vp|AO283vK_exb?sM=Kosj{wpo}uf5>ERR0(8x_j(-=XfL5GC0p*&?{$<OJ@*r
zV_>lWm+jKvN`W0#M&YNq8Q4WYEo~kp20ldw0cA!$1qNPuc0oB-ei<enNl?!Saw`q9
z5U7j9Ap{<O<AimQn6Q}+8O`HlVC84v5Mt$%;1*Ee<db9K5(llygU(h%^zkyVi86C5
zvhpf}j$B|6hPCq{vT%|S>?3~AvIlN$al@4O$~7HpAJ0AXqkH$)=FM-**S;!W`>Jf)
z_o`jLi}(L8J@~(9KL`~b04?V(I0W8pbOgMb?l6d52ww7CbPyz4eBgiaf&T@&|L5)e
zm%HO{?vB6tTmNOQ|B=7xPws|adF#LDt^Z!Q=|ld8clqm|=dOJU8?1YlvFri7GJ@1Z
znadw$E&=s@AS)2kmfTBUdOv6Rqx@A*E7v`*So^GM?UTwikE@p7%Aa#SrF&Ok#d5cd
zNtV&AhJh6-F4=MxQ36^%Jj(VQ^46RZ226sQ9Q=xm98!#|0u0QMdWRL9gCIRqc;Mh+
z@CoYryCyc<mMn3uTIEo+&bfA@d;M0InoXWH8$GHwI9IN9C|d<tevmTbbn47Ap*{P2
zTef*MZuV;2?A^H4vwpL8{U-1FP0(SthE47bn>-pfyMq}{)oWdA*16S!2IU}KA#i=P
z(xqahOWAVI>NSp~D_kqrc-C%!m;mV?fmBwm@~mFtRkPN+cAa<aI=_aE0nHoz8rQp5
zuW=|@YLhd|ENzN;`c&)enJ#6^y=&K{%(;XM;i`|4=Uz^lcL`UO1h0V57~qZ)=sa-9
zs_)FjklQ^#B;-tWNUa2_hG0wVGQs^L$VeT8g!hk-x=5h<2+~S{oa>&u{B9ojpk3tY
zq`Xy-!{Hy0R3AZBhC>#IBUecvHn{gxu<lvm`sa|=R3W&ADg<|$AS7}}3BpFMm>}IH
z&;?1bOOfC+N}wfnJ3fFG%N_XIbofWx!Cw`d-vqQDk_(-{pq0VE>&?LAz`*9h!0!W^
z9uo8fjm`;pGVpsa2)Kigpc{j*2ZIP0LFyercLqUs6cJ%hkcfaAi)ys;%#T4QK=qOI
z&Hrp?{_|Y=&vO-YUbOuA{|pnKGuWSIFxtnUx069_BZJaB2Km_x(sLPP<}t`#=G1w_
ztqQ6#1av_4k(kzhRm=af>I)eda$q^fno~T1PvfJo?l*2N&?+?v%l~Tb|BZwH8w7yr
zBL&O<Y*POjSY9%4Tx1Zw!!Gw#P~*R#76?fg{FgEUbvGo9K^sWrEkMWesDLXYWgF0h
zktF!)SZ;7-#31~SLEtF^-(v=m!;t!jMSM4_&@N7~eLS)kBsA{`8-flaQnUiqWwPe~
zrA$CqWAGY*j_#2#_%EaPU(Vpag#LeceI#M<UqlCV)B}(5FHXfbtg<f{ByNBZs4@cA
zM-rzPMK7{RUE`2{EU58DMDxFtK4>UX#tc*$$=QI$<)rLEm65pPe`&}65_bQ^tUy(g
z1bE1f$N9fv#D9gh|Ay25%WwKGzUn{2;{ObjelRq>V<>sRkanISa2JE~3I?O;463aR
zQdJB>=?rYq3=AHi#UTuu;C-f&;9WWF;Q9g<-uP&w`ba>TL0FAVSd~*ugHJ-0PePST
zM3I?S3Oe$}E(Thq&nXNV!~;zU!Ou^?G#xUp1X-=k#=ywU!YRqYE6X7u%g!&u!Y&H#
z89^3Buz_Zym?c?xl{p19S$I^yJCfjYTX2&Z!My-(22M>)>u5fWXa-gb29+p_*vUC_
zuC;If*s<?->+T=r+ke#U`(JY4f928t<wroM>?mlZcgeB;CCC1k9{XQ%6jT|3cETP7
zE&oQVk8*eX&D;4eebe`>4L=LE{m)(p>LQhH`&Yc>Pucdbg_}MUZhV;w-UbU=@4fzI
z-kN6x>t5undXl;PVaBot=}YftEPIf*@=4*E=lN?vs9@cT+|^G(+*MDr=HH0uJ?7i6
z-8yfneoVh|K(mZ<5wA%yn??|WqBnz_D`*->z=(m*fPq5=*8LF!FJ<Hh_jDjj4IxWV
z;K4$W!ONxQVIN&<U$)G>Zk<!j2G{z{?)6(e8n*e=Z}G0%>{`9vxpJ*X&H8|r?XeS&
zBuqIT+OyBEbvvX!@@m-PS-%<H2?DM026vR4Yu33pY;vwy=UBDIrFyMf?Rv<F9H^oJ
zt?LG@yK}Ex<qleX2da#G8@Kp1ZH26A09RP6-6~c>kW1New~CeSm8(3eS9(-~Dl*5?
zWo~6lTuT?bl`U~ATjE~1%DZ-5^6ZOngjq2q&AF5`=MubU1ZP0%BdjER;10q-u9d*s
zkw8^a8t7Q{YZ;562v#@U%vy2_S|5RHqV(lA(w9Tm*FowdWD>T*0Z|`8Y9i3^>?+Vv
zhwwTGGM5A)A+t!Bm-j(dgCmdNLD-Of6Ea!22DBw1f9=x(@ZN0@2_CzH*GCXWG3vk_
za(x6jbG3NeYXkz1(Uop}U$Ns;?XGXl2mW*&`rmWpfAjV)2~#dwrY+(!F9t0_U~>d@
zXSf^~M7=;r*o{HhjX}^6goGRzK*)(f2!;e5L2LnM&@DECE)4umP==rj0|>c+?#JW~
zl1o`FviHBj$^W9a|MP>-nGwA9pZD^AfouO|R{dv)yU3uoi$QA}gZe54rKJoC(-~x^
zFi1~jkeb3EvzkkH53lZT0fT?6TA)?moZ8^!cJlW@=PEFSF)&0iFwNwU+sdH<x{ps#
z544_H(GIkC6}tb_5p*mMyTX4)sehbGfA}^23uyco1P|+h&gans9YZ7vF61Rlz)Pl0
zL5K4wT7phVl`;bLYlJoabIJc_llsCS^aix?hwnUt@O}^y+sh!nk3swjhvGc}4N%8Q
z)bhWi?SCn&|DvX#g$jZ?puUWN7H9}jK>I(4tM{MR_`i?^sH&1M0u37qX#eL|`^ql&
zmP7g;o8%p4u?tM1=NUyWGl^YclDGvzQcv0CUvaDa6VV0jSCTRYU1%!@UP>ox2O6W3
zcm6N!3PQ3@pz2A+=D)1%e@UDFiZ1_^3;r9{{WqQXU!~!bY|e4ch|O%ylNj`B8Pv)c
zq_P-<V;OkE8Mu8II6N6xT^QKi82D@$IL#SY)WO{%Zg9axu#7@ezzymFaY-`p$ubG4
zFbk_P3#&5=t8)meatN!k3Meu0$}{pxG4e_=^T;uA%QEmtgVsrMi8Js>g4Rj0@PT_S
zkRdoUlObGYaLvfYz{JbMF3Q3w&c-dt&LhRiFU8C$0<LA4!OrGlU>9TMmt*6T1sB2)
zy_h7kfVie#OiN^Xzno1DgMb5rq>qAkT}=PQ_FaGKPW-Pq_CM$N|BS=`vyT2RI{Ux)
zGzuy_`M==A|GZ=WvycDJIPyQ^00?Cr{GYKOI-^v5=zrPX|K&UX*X;XWyYGL=*1v@t
zzGbiakhthU<g6PB^R6Y%zaBZ|Y}C|qDf4fE7St_&l)dDB(duU<YhP5XeNnsVZT+T~
z)$5)XFT0yH_e%1_V?hmDJ@OV<B}_65sFt(MkTZ!9)eaU_cjr@Z;FY%Ek}zfGR|R!W
zImAJmxfr<_KovbRc$Em&jwfb-;1}WKRIsrNEp#qh<lnT;vvI3O!xjkgso&yLzXfvN
zgh$PK-^MK=9lJw1cKfw%_ioza*|5p2c0B~S)~o|{g1}pcAOm;KRcqYqHh4B{a<ALq
zQ4K<n@jCZv&~iJ-I&esJ1F22?8@B{BZS}6*;8C^KtzxxH*-F>a#m>bG9SY{z=g+mv
zooAmn-ywg2OYsty@-=Ri(49(<+6`h6yfVVdfYe725?&dB4o#YSF%bzNS4@y<2#ti*
zN{Caz!B;7PkFEt@I0fz;-NITQK?dvKH4&s5f^(s>Mc@um?((~k$_TVcZ560Wg499C
zB<1x{(b{K4Ye5HzK<b@BaAgFaOM;BoK}bj)1X<VsVMF>yMVnrMM(4J_DcSy}Z0EbO
zo$tU00e!CB`@QAJzmB8-E4F@2nsw8@e4Ds;2ZKr?Xkv)dm4VL_v{YNdk3q}_)cp}~
z1y?IBpvp+VfkD6yOMN8Z0IG)goe-p;3#e8CiMW7{{L(ClS@cq5?|+%2|3z+rsv+(R
z|M{<gDkhF|{~4OUF}UpnRYocs7*v)sC@f}>o5~<F8B`yM_A>}BV31nLr1XJb|1YQB
ze^xEf;of`({{=N)GKgIhQf-se=@r*KqGWNNTN`v=xv0^91zXTfd8$tTHC;eQ^vIcl
zE@0)?01ePd7=q546W0fw(!&ovGL8>?To1n{=wu#2-T%V+|Hbq`2y&8-pyq!*wg0?I
z;E_794-EV-8TihF7C#8?1=UC5`<SH;aw^@IG<q*@4qDnRX$$I}h?;_?goO1#dyxdS
z|MO{p?vLP5{?DlbT9v_T{9oAezpOcEG*3_mB*iZGmQmt4yVPA4@mtJd7nsE^Fp6Gg
zmAuX@d7Dk<9%Nm&i0*&LB0YHvM13S@4_d>Z;PPL_4O$;b+5eZZ`7dGdpWo;|kN$rK
z=T{8o_ZZApFsRRDkgH@6D`en}Vc?Ep;EG~k3u9pQ2i<tc?E|WkxV;#-y%_jyLG=-I
zPceD*5eH~;hf{({K%PlR1+>FRK!t%%iG^R0nNI<NSOjF5`DIx76xaon7$NH<xx|_H
zq}fH}ScPOb1f>`_g}~!%hzq8XN=ZnQo)J9I!~yCJG4L?32(j^qa|_C_@k_AqiZO7C
zf!x3>1X@SIAr9Ks1Ugz1$qWP&bW0AqsI5y-m2dWJm!wJ3Zj}sLu?&XAx(SQ?X525@
z^S}Dq|N2}1E3f=7Isd=x-2aLTAXIklf9cu(C1?H@p8TJG98@1=9|u)Tg(v?Pfl=Op
z|M45YW^ekQyX9x$=AUUR-o?&)=F@xCrTL_N-4WZmJua<>!>67NnRLpsWUWs60=vpB
z(bFy#u6kOr@lDm{cctrImacl5v+zz#|1p>Hb>>+MHKHepyH>H8WP+}6QV3#@^JI{8
zVHCG#5VU0A)nx#!g;Qi;Rsi=0MZm#A<oGHgEQnySFbM1GyCvBc%<*en3$Ksd>o-H{
zBcG;i-i@IBLEiP7d>TMqA<u?Qkc!5wc0H&Xg4Ifp+QGR7G@S&gp?q7m`L=9>jQc@W
zK7bb5RjzWYS`8Vmb1GR1IZ(*6dYxaxX7Ab!UN!5TOIO$zF0sp-W0yC_I(w!~&Mey;
zXqDt%w%o09oofYX&#ZU-Cf~*_puMzp8(eEp`bTJ0QsT_>keUdMB%?lpRZmwzcN&7v
zUxn94X$!7k_Kz|kGe^ro{UgX29i#$+R7ROAZfC8$13}P{I&ggiuZ)PPkKh#(gn`*V
zf{4H`s(^QqK!bJbpFl?HAXQR6xJrW5LC7TL+6H*71g@msmTmh`v+GOa{-2$P{`MXJ
z-@X4&(Xz)OEeF)XCNb-zF^Kvz@VSA;;>3L!B>Wh}y+BC9jX}(XLEH(1#2greY(NvB
zBGwGTwqRt(AZ*7VWQ#z8_MobY-xfp)*nvn;W#j}hna718|CA+oD<$7)(A7Cy7yfgc
z{m*j|w5*O{%YTOOmkeen81y$Vs4Ztuo6DdylR<6@gY-lOsZIv5RtBMJ2Hr9T=`9Ru
z``NU9bLjl#(E1O$0!iy1hvr{y&2L;9U%9nG7sB%B{^!>DuW0sP*9mkw9B2Z_0(1z2
zsK$RmmH&bopi|%kb^r5ggSt_mo6U7V$7u6t{o~R4$EW@ugw#N_59Br?0oDHks{i@G
zh+pMDoAgg6@w*HH*BJOtG4P&b5ID>rc!WXZJd^AVE-lajagejVK}WWlgRUi#Gx;xJ
z0@^&Q=m1_}uKSHc={2kD3pTlzq6YsJZ9tbhaOr?fMrD=$#wzig6;>aC7I=$aVGzH~
zEOn1r<~5h{CvG)RH%in9bc2S3)qioz|6;cPr5yjuI{lZj|1atIU(W5nwA+6v*Z<-U
z|AlQp3+?y~|MTlzWRTd+Adt$y6wbhI145jZ44jq>?A8ptj*Nm1pi_7FY{8@z1D_QG
zuO$PwIfH-&1E(Pavm9t*1+-d#j7BvlXrTh91dD(?tB?v4zak62G7G;lE1v=zzamIb
zP=Q@ofmKkBkxz+LNDZ_Q7qm1&lz~Bnkwb=!SCLObk6%)sLs*r8U6KLRyk`TixWhlk
z!3gR!F>*6=ig5BvafvAMh$(Z5X>o{Za)@ZL3u`j)DS<XngHDfOzy<^brA*aqlB@#C
z?2@{@@}?Ul&X@D;<_wssl)28h?{4Cr|CQJN*WLbKb?blmbr34P{J-S#|GcyRi_ibB
zy7s^F>i>!>|68wvP}7C~X<NRy^`A1T-eyy^+p+$DN7F%vs+|@kTg{3#8|QDZDB0@P
zvM+k(<+S-113Gsb#!Zy<s?`c=bIDmyFzrOqj8n1ATYZX_*u?f|c~>dfqzUPS326Fp
zD!FsWSThJ4G4N?JaH}zK%7T4_c5*Dlb7aICL?aO-yR4e2w{^zEkj7QsO<TMgH-m=Y
z>Nk7WfvP0Ph@3~wddMUZcqPLo7x1hQq%v}^+u#OXYv*3Q#-j$h;?ARHtxv<ofR?Sn
z?K^xLHhR{s^Qc)1QcwjNh;uGo=2Ws2e7x#Bhx`Rjg^Qev7JF8#bT3=(TC&tWZ@z8L
zT$`L(mYLJ-a_2b}E^;Ye;#vZ}exhovTg?Xd+Kry|o84<S`qXdpso&;ayV<jTYr>3k
zaFjUnJbINh`(onki)i&x(ws}kog*|hT3-pmMXrw^RT6T21e-s?K3)f}k06y1q#A;g
zRH=_39jILJQU}N|9c0BFq)LL<L=XnNJ}LlDAz{uRK_-ui!3)A6OTi)45S%RB__B2C
z+nPP!8V>ww-2bzB=jXiTk9^w>szgj>FfL$_2w~v$WDxLT5D#P!_h%3TcZkG%7(_il
zNX!LPABj4EYaAQUbv8nlpejiS%oel(?Nfk|0@ffk&^pK#)WC++O1uuBjupGTka4#9
zlK&<f|1)g+&wmEgC*nN+pZ&~#uJiwePW@+?^OM2zHiP*|XnmwI8#GuaGl@Z>k3qDB
zL9&B^tC&HcQaR`~pY1gU;U~=MKe%;4T_i5;|6JPt`E`E@=>6u=0aY>FI{*3fLG_V_
zE$FCHevSW}a{t++|FcW|=a2?1w-(X_RYv^U|GB}44_qJdfwuf0>LXro1tg&UUkHo@
z)Ip2lAoUT0=xqkRE1>#_?-aB?62HW+^;E**zpCqh6-UtB6G~?PmCgSvTY`|B6=?KN
z!W>#3F-kvS;J?Knc!x#)r<5tETH)3I&!hC0LG%N&*i#nqXRH#pStV{WNnT@>xydT?
zfJgZ`ui94;-9KW6|D{a-OIv`>`H`{vuk89?C-}c{)PEJv|I!ZsrJeuFx<czCQM>=V
zX8(Eg{xeAYXW&`Sz_5UUF^qvBn1RijfytbK*_eUdoPo!IfzOEndJ3Ek1FsFJGU7G|
z9S6r{#=vLBz+u3^A`d!&7H6LVf2qX@>W{$cBW3|5Rsj`eJ|zZDX$DSdMqU|aemQnw
z1yGeFq{b$!&cvqz+StOt2W{B1NI;h7aR{q&il}jmDKPU%fR-^Z5ZT&iV_@KBU=?EF
zlV%oB;S|&2lQIyLH5ODf6;w237m@*Y95CH4#Kf&8Z5S?Z8ZBy>q~cMi9W}!ueW^zN
z7WuT*j4`YEGItpE-HqM#zx3+=+PhFxclUqoo&WW>L8$g7sA9_4^(S)mTaRhi^lJ8~
z<gQT3U7?%3MlWZrQO;`9{B>qUo2|;W+g9%KYC9e^<5I@52g!5J>7-6)H%VtPOyM=n
zlyE4KcP*E+FXS>vVo(WVkoRMda0fO2_$?W@^ch&SKx^Tc6u~P7#lTZ$Jm9fS4sd;q
z=_wkCh)QbMX-71Amd%HZeR<Yz_G$o49(mV+ihZ}54e%Kv&_X!yKpf;|B1m5evewSK
zegmXFa;^fM$>3hS#<y`(K+9I&#!a5Jpj}9yQ-dI9s}?S@$(wJLGuI+(wq?dli}V?m
z88hwk=DU?Hb17cpT(sCeZ@yjL9H*iM-c_r;t5$neuJWl~6Hvd&uNG7pL8?ekP!A4T
zAGz0VhF3-jGtR-!Er(Z3iQxJOLL%2n33DzWBjhRxQURfnc<LjxS)}wO*D{t~hxCsi
z>lrd(3+l3$gN|r`*F+FT=8C(Zi-VE+N03!^$n_MQ4Vh7bbci5z5Hbn6RBJVK!9(_%
zhmgt$UOmAXkX_q_YahcAWG)F(JwZr#l?1PqAY*ip^Hdvmfv#_E-}$+G<>T1S111U6
zMXd4|r2QGVEkKCh5>(3wT0=<zD_9kSBqCtN07B-V{c-$e3<Bm1{FYDz61M`e1*|~_
zm+^yZA^{{35l_%?AE%E+{UM>P|9RH`XV~&z=p<;c4m3u0@;}p=|DrqpGqiqWaJa`{
za*4rUEraSJ29=o%3R4+mdKo0U7{uxsWZM`x6B%?{wKo5EIQgF?;}?U~83xUt44OX$
zwEhX{{O1BA9vx5>!=Vitp%OR!FK!CD%#K0g4+Hl{2F~{kLLV8VesU=O;!^&@qw!5Z
z=a-P~KOx<Jko~HHx}aUFkg+?^r4O*V9wG4A+Jf4k%OLpGKe5WbU=+Q~Ab6gE{}dRV
zU=TUWqVP`I9CT2st{doLBYCU;3g)0IUlnXXJtHYg&@tT%N_QFLZ!y@PWC*^?5d4I}
z{sM#Yb0(v2au%S2Px+PqGm8FT5`M)j@|scn5wqkICh3P9iqAQfe+q)<n?UD>n}C)s
zNZ9?CwEr*R^j|&jzgqZz#i0K(p8tg%K)ob+m;Vy3AS7n}pHb}}tI9?OuIUWiE(~n;
z4BQ%^eI?v#pleTARYCi4cr6%sEf~0+L5Rx>bi@<4KLc+d1AiEUP$Yvy3<F;f1Ctg5
zgEF|R!#ZMytDu9d4Hf`hD#Rhf#H+}}ugol<!YZf$TJH_o)xrZRMY$xIh1FR^G(ZOm
ziRcNaddQoEgAx)0c;N(#I;hlTR|Cz$vnes~sj-Ua@Js2i@GF826av*s#5jo=<aj1Q
zCLU>aAr&5RO@0|Y5k+H3Wg`J8O$H`j3=eQJFpBakI*4m}2paec8wK)M=7>6#Duzz6
z$XRV!cgCdhxJ2$j@tgz39asGpe+XUiA#ll??A^c1j{VQx@il7s6R!#9Y?}`n7j4kZ
zS!IyD%rIxURpBa!@(tF-AmmWK!LeeaL**8S$}P@yJN-J3WG%Uox%j$w<x(bt7zS}0
zE>$mnWhZWVI}RCZc4;e484Fek69zsF(7j8n(%_mJas(e_tP;cdbQNId5>u71Pj$-d
z_pDyyUAqoaA3@eLK!)a!>m$#GO+L+AgW7ibG=NstfyoWt^&sR?vlcQI=Tx@brDCOP
z<toqGb<qA%)oSO`W!Aa#Ky^^|9LwxE7Flzxa^_p-&a=sz?@+YZy>gXr&03%8HC~l#
z+)7t?R;>1^UgurA!Mk>YU+o4^Wz?|QuL0C$gA62k)@(#Z@zc-35oX;KKl3~sL24yP
zcL}*NLSrM>O^I_aB3De1TPh$Va{ma<&RBFUZShrjl?1Jrz$boV_L3moCCI&3kQxUv
zUWcbX%2@%vRv2=*FqkBxK7y<QEm-pij-Vpo?h<%G4rn7SWGD|(9~EtS1v$;TX4j|c
zoga(WJPn_4(l~WKr*$cVYzPCp9RssD1BVR*k0k@2H3Odo6mgp}aG5Z0n=<fOf)Pv{
zu7KYRR3Gu0Fz}jy$_+@BgjOH%*)Z@yx=z-hdQHF@bn!H!8G}oE%IbGQTmK7f{m-)d
zKkt$MpuxIxpvs8p%zwU3{~4;EGg#eVFuDM$kJJ{U)JKv{puswx6!p}-R)_z`JpCWI
z^*=-OTL$glj5>dK)qiuV{$y4KjotC+fDSd~(*4h+3pxl$Ko4}W9h>5R2JycPg5Md0
zJ~D`YVUh+R7KL}5s-Jn(fADJj<kR}cuMMh<1av?v4Fq*S_d5vbfb<GzgVrGkgRjFA
z)c((}{)t)o8H30rM13T9ic$KCfX**9r~gLYAf)E_U&a2vuFHQz570$>Qf8pdO(3h)
zA1MX>SFQbTG2y@I<o}BG|GBLGG3b4gHvO+^|6fY)KbzEF2EiB1BCnYxpK!>%<WzdW
zukjAD!$`^$boPRT8EA)*wBvs{@H~;K|9=_Z|B_z+l>`1O`Ge{sX$Mfn#BK4PLFEqv
z*CPh@DWLnFIGq?coj{cl==4!VP<_Ov2A*ZmXW%kr;BjFR@nH}TXP1j(SIy+o$l=m1
z;?XVP(kWn&O9IzNO5jxi#20{&2_1IOrc?$I23AQ%9tB1|B}P6a&@mcJg5Vvg;A`@D
zq}j!_*~PV4MD<xj^)+l0%DUI(b!@VWXyTNz1@EzyXB0GG6fpqZf5#@rz#_xUr@$|z
z!!NGF&MyOAs7&N|6f3AV#3&5vn(@jr@hkF4sEf+$aqvkpfcC5*7vh`@3<AueCgPeN
ze7ZgY`hJXhi7bXG42A{l)>W)tGeo0S$Q2(q>Ab64ag;x0qd@X%vCNHfdFzylHYwz;
zlFMG9k_AHASu4y+H#<~sv#Z=<Te8-wa232ha<1BJU$NPta*J2%fr!cH3Rc~Vm~>3k
zIiG>wnnBQvLE3>))C9C4h+7MEc{+;%=(;5aad2QlcK*UQ1R*<nK-kO->|#us{-!Za
zP9;l0tE?M9%NU@=K6thWQXfGM^YL!l65Itn!^g80)OCUn$3Ye<IF>H6D_U%wzreO|
zk$v$J*UD8MHEW$qm)Ydax6GbnojcDucb<LWBFExoZWXIR3+Jj<L)LW%Hf#*2-{4!b
zKA?V6P~(>1rmcS9Iw-JVb0C-juaCUzHX~O?o;4ferk#nKb_Q#egw|t%_MV_Urt^uj
z&nL~f07nomgiM-qA$jh_<hd7<=3IbOPswvHrp&tpKmHEmPNmC`Dhbk8$^;)71X*YY
z-WUtIR1tLb{L<Tyz7m9l)KEwZzi&foB{VV{e2YC?5Ynr{By(2Z10%??@O!yy?n4ly
zu7T`~gOHGZQ{LJKd21g)swCvqdXT<S;riz#n_iV~cv-RWRn5j%t-C(=?fG1@>S0pX
zKD)G8%5J61YN4Rc3#SPKk2wRk83U&Ys4U_(WZ*Gk;4=Y{5H_z7103<2f$AL|GX@Yc
zVc;==O7Vc%kjjYPiUEYceI-8d>;r`4H(}s60bPH?Y0ki4!ypsQGVQ(Q?*H6-|FiD|
z9ZJKt^*`U4|GcL`_q%bN{x7`#KSTK&2FEK5=BF66_cEw#V^E&QAU}&iu8Tpwn?a(A
zfjO1IYL3Bt&^gi)C;#)d|7Qq)%wY9`LH{m;@m&V1Ck&b|8C0J#sD5P7`5|ibU)TUz
zj|jqUS>@6C&!PUGS?NEI!Y>Y)Z!FRun5Eux$-d#1d&@5OnMe5tzvf?1jRUTjK+EmG
zwGyNz64eJC4JV-epG)a2gZLu`kuwZJrx*l}F$f%G5WK^y_(sa;zn&ZD=5Zrm(7{An
zp8t(v|C=TL*NOfw>hhmK<0*sHcZT|(;tRp&?W_coi~qAF{b8`WB4zwv!x1#4#U%Nc
zf$t3i-y3Gh&s++>c-6rxEA&Czi^MHKhb@R%f!6FvIDqDf<h=eX`u-R9{Vxfwl~lw2
zE4cj^vHH(t@`FL<0kdc;14{`5n;8R#1%r?ts7UA1VgOY`8VuZe3~V+Goc0X-@eI=0
z42mrbTD=T<vlvVkFjy>SuvyJuy9Pv>bTLR3Fo4d=Rw24RVg}9KfF_%G7(ksMAyEH_
zMT&__fr($4fmac<d=9j<Kmv3@6{{ozj{=LBHm9@!my{)oh_RqaPHe-D=*c&dX5CMi
zcu_xL0gGxZ1G6;)n<Z$xSJV}>kcL$Qv;&t%g<slOSW1tTM*&>B5m`jDfNCXX&=y)o
z9yw+{1#W&Rj4l!zXm>5M61$QkgQ6FMya#BkOf`f>FICjOOvtTOA!M>b+<e{aHRcuD
zbo162=C9PvUaFV5L??Zbe%4Z>+!bbpYYeiNSrn}g>Ny$Kf5NAApMB+etCCgr6`*R!
zscM6B^+uQKE$(&O13C}IO}~({{HA}`9!?E+1`c%=UPU&3WmaA(&>`obt8DSCEgX;v
z2+dM(6Ghz0$TP<=XKqmKI=@EHVs9^Stpr)u?NI~TKm=K{4Vg;{?cC$vyv?(A17sP4
zW9bUpe9(1!Ryp%6vgTT6&9%v%XPGhEHfKKQ+VQHj?q$oJKv%>rvM*fXQnu2gYOQzG
zS|9K-hJdE6$Oy95&cAUB2sLc=2P41wEs(m&uVJ%q{U+bK&E7Q|A*5H$das)GkopK$
zl?1JWW}Zhdkm{qv*@#*xarXK6nP=hk5u%;~FT;aWPmsN|uv+O7xP}6e@SA5LwNmEd
z>)_2wpo=Qf7F|nUd=tDr{6^;Dn~>A(Ag4XRD=c`$l(h_c{Y3WiyU1+hHF(HtA+Xg-
z(E2E6HLOZn{Wxz8;@A+-PHpgE4)FRYe>HSu53)@OQWI5ddsn^lW5do*4Ld)Ttb7#N
zcig3DrJ{culU5vqkQ)P+B?E^!=z<u?H8C8<4D3cA#AN`w-HgWwdwm4y2Z1UhGf-s&
zsgHOJp==&AaIeY|Q6KSILZ`w(SJLP)uxT@Jn6WDOiC9*I*X&l_`rqutf0b+hId}hO
z=>N~K;6Ll(|J*14GamWRe(Jy2rvD7JpBTLEGT5AF&^y4Oxtl?A34_Xf2E|?m>2?Nz
zA~xk#$&OdfoBoR)`_HiOGegil&^9IWrwry#xx@df6#SQr`_JO}pF#gOgW_8T>96c6
z{{*%Fi|T`x$qDO&&L#!*mGnX5W@1|ZMKu1i%Y9}Le*;-Rz##UTMfwAq!WT~E?_6qs
z__Y2*M(u>ai?oGxLFR}WfVxYd`bgq2v_29(!615yN$jDp`X?o;|AwCbjeJ2!*XO^U
z?|;n@P<^Bs@t@E3KZEyw&65A3^Z!dN0iBY~u=YR4mj7Ja{+mtu&k*uX+yFGtCu#7X
zS?V97&<7^rkL<GF`Bb2%07;mE_AyD@f=*qKwg;cO;0T&0lJ)#A=LM>a#C`uud4rL|
ze<sjLN|zZJb~7+!GB9L;CW$yK82Gdp__UaKbQrm{8TgG@C9D}F{8&_@nT(nQ9VfE5
zEa&i8&FsFJ-FrKO-(H4*{S3i}7=jP+yUk$`&jp<Y$Do3Hg&U-Rg4`v<1{xUyol(id
zz#z!LC=9xul~ELQUL1!U6TdR3N@5dbU>0KHlxE|V=ate^HFA?PaOIRX;FPpr6*b|~
zP4&oLY?im#Jb#Nz=~lP$?T$&a_zf}`cpX4{f<#={RYL_eyctByLAQ!Ch%m58@ri2k
z@XNAtKrW<)IEEm}3Yu495o2JNWCbk?$J~)5D5UFS8&Pf?*`(%Es_0R!7usTxG}9q#
zp>5#?^XyeRS*vxjR_o@i(ac(5Ub^0_WSx1@T8rX!7RBpK3s##IuF=n0YFoY~dg`Uv
zY3DtgcH393cdXv%RK3lqdYenlCeS9G`W@a)yMud<CeFH?z3hff`AP;!3(z4FEE1sG
zBta)maDf+Q6ZFd<P{6DvrW>Fi)$L!k+OKgNWabF8umQYg8&VTNj#F@}0&Pcv>=<&d
zUguP{(!O|^S=L;$jM)}hb1k#yS>?>P&s%7px6rw0saxp^@2a(awHv&vR)bb7RIc%?
zUgulC2|fx3uZaSix53!~O<O^gQR5a!W#r$m6<!}fM)16AHbRhBHE2nmSIzp^sV8Hn
zo{XDz3WDONpGHOrGtMBR#F=N~VF*$g!AXcXoC~jy62M&~SbYTQG{M=J^-(JLP9@0V
zaI_;^GeBFGZoum)NR<Sso*)$y_~?e)kV}E#_tZmf9)Z_Q;PE=>!aU5%2rP0BJOBt<
zzXz$3vcNM+nQI;*q5GL@?&qunqqPsR*W3pk_^|GAG5EOe>aDNqcfN1h`l@`@{j6zc
z{i`<_M|Sg@rZdR8Gw_--u<C#irw#+B4g;4K2yttJ?lt4m1+|m8^}wVd12+u8J4bK^
zqyxog0_sLVdQy=3i^mA03n~j9+2ghbt=i(WW#G1Fko4v-NKy=+;#$2!Z`M28m4BHI
z{FlD@U-Z_0mXrS(X8&iH_J?83f3`jU+4lcuKk#2}?|+8L{~1E=GT0qw(BH|Ry_G?G
z9fR^R&|(I`b_UH!;#H5iw*P0_{GV^je}<X=8OpwZP~#s^)wbwA$EyFF%Rr}sGqnC^
zDEPz>_?E%q3WLU72EE4&+W#4JL02Ym8-OO2xWJdL^6CB;)&U*DAfWn(Q|>FP%xh+;
zm(0>{nWf*d$baBe`O2gAOHdnBmx&qu7d8AZZVW;KI{!J9UNA`BXBI!jD0+fH>=c9K
zS!T)CVmkknY(ZB@=(+#bb^EX7@n6U5znuGjNvHn|w!hhY{!2~$Z?XBm-roP3JN}#Q
z`meSAzvZHzD!orMqTce@U6eNXuW9#RRQEr#^nV7C@2rx4xK%*|hR{<NtUx^@3H$%z
z;A6T$M{_&=mv;Ft;r3t315^#k`u&&m`OjthpF!s#gV<>Xjw%L*A_i`M2A%*00XNVd
zWKMeq4m;3lI(}yc=_p3ER0fSE2CFFy?t2-6PB4UD0ilTN3^BJE;vX?2KV!&vz>xct
zJz_6|Od|t>DtP}92Y5|2xIGLl0%2Qw7{F)#Fz|!sgBbW37zMx#K~R+hIub#IfkBvs
zLz;zOkri~KgcxW?600}^hm@F_YgAUJe?k?flnI}N0l%ant44xp#00&>RT|Mtl!Io8
zy7Wj`S8-@(F^Ksya5yqBS}<@ra4AMf8D<G<MS!~A3?N&WIAz)Slz0WznAoI2-5;C_
z=b-L}QIMPX_`q5q$FcH(F31ouma>U<E|}?Gv(%|%xl_q<m%>HvB}+X^*Laq$aV}YH
zle@&VdZSO%4v&UyHYKZUOIO;ItgtO!X;-q!u4I)>(F*hY#oEc!-RrkzExM65|6*{@
zK9`zJuC<$;Yd1U9Y;vvH>Rz|qy<w+U^Pb@DgNZZG=PtYEShb2n&Xj>ckdcvxk&y>{
zj1nu%or8gf>_LW*EZ~#?=?L?KLX5*mKtIwjrq{b{xqH=m(6Z|4^{&;>eXK5JD<Q#X
zU%133Z=r4eBInW-p4IC-L6^X;u*jNgnm)@id!B9nB6tPlTeIG;b^~On4K&&Yo&$pS
zeLy2`4Vys=wi~wk)Nk=^*a|}6_1ho@xK9MCjKIAlNY4mTi$Uun@Vt^&&3cF%Jge71
z>LcVTDQ?;+<SGfmhSy4vdIy<=*Fg{lye5J$66RcpM;h6KS4PMT<cbNl@&R<&AEaV}
zkhtn2<SGer5)meux%4)?VuGw2hmf!tD9|zq82c`GfdsV9%2|CcYxRSSmG{!XC}Z`5
zw3YWVR)bLX+IzX{ALOokn795B<WRd}@YS)!Yab`iz3kt#T_=7Dk3~LcdWg%0f!z``
z4a8~8z-0(R@cIZ+8G%S}O~j+mz@rbUR&dov{IF_>&xC>B6ok+!CWuDJtP{UIgSfkZ
zL5iMVlV{=L+^NU&mOQN92&#-+Hh@Owq;LKgy8d7M&VP|J;A?bNf)+M_mU*xE&$bj)
zAF-x?VDP=hV7i|{V-thMY6it63~F=uop(ytJ@?!ApK}}NRyh8x|9Lk3m)-bZdHa9a
zP5<RK{}<ZwUwG?(!FB(ow*S}K_Fr}5f76-&C2IZ)1pQ|)dC8#qmqG3ugEDBp5}W3K
z4sB2ui4VM=R7Cf`gzkS4&HsWL-}%+PawvRall#CT4@PpI*c85Ssr`Y>D+%j^Y9&6c
z{~U_X86@wsNu6aDKgA$%hDG)wm%=AGlmDs?pk)kN&i}QX|EoHKswDM*|C~nu8H_(F
zr2SW4^j~h~e}?u~40#tBg10l6&Sa1-V&F+;VCZL%-5{y=U)2g!A8{!B=aBi&uM8S#
zlrRDvjw%DLjAZTqL+T?bd(gUW3CI5uj{ilRKvj|qxO2qk_@6=VKZD9m2FY^_%2OHS
zCo(9fGbpCAsYWx%1TpYBGqBq-@Hp@)gzy>_idk1N*v}LUUdxkkO(^XSQ|2?K%x4Ta
zubB(pGZcPdDE+}u^@+3XpF-g^2IUU$Vg_aK-cmO3s2!x7WP){t_`tOfA83q=fd@Qv
z#tH6Bae}q*LTV)@Hc4ha1$H47CO&BfPBGA69h(HVw3W7Xl%kOjqmT}#ur`mF9)n_}
zyi=8+OD~Ig1A}HEgH#fOXb6K~0O%MHUN;6dTLuO*1_lcTUN<q_Xa)0Beib_gc15s7
zTnvn296XAG;z}HRk_^loL=@soVAl$=@hb}|TPPZP*o9<!#FctxcZHNqbt|4@pEK1a
zZ=qxHGN=3n4!QFjauz$}EVj;BY?Hervj1qpv@>2!I~*(5J5;Q(D_iAIw#K1sjZ?)s
z`_k108FN%(Cb-vc%~^6QW8vkX?!Ayxbew87Il@Nj+#7a!Hti1TIsmDUyj!-2YCAE2
z>U40pfmXId!kGwv&{hfLlv)VM0A9QY9*O0U5s=iC({eJf2{8(+c2Aq&Rl3Zta+OQ@
z8mH1#_N6QBN><nvEwwFNVw=CnDrdfB_B_+{Sr%DyZSod6m#%QDSnXN8&b@MtJy?Nr
z=?agkwSf(rgPXR7wrme+-UdP8Z979-wg)$D4FYd^h4g&_nzs5iZS`#gZMlWiM<5cZ
zJ_2=!U}JQk0XlG(3Rh)>Tpz(JCgi#aQW?R?gc)bzU<g;01gVA~byMQ(^N`gJ5J5B&
zUM1nGkC3}aX$!7_5csrkNG}P}MS`$lB<%19&^ZsFi$yY)+=BO*AXO5iCPF4*H4&s%
zLavX%iyu}z$O0kI+47L1tFl%;&0h5kbZzn4=j9upS8RGwyZL$f+J{*)&IUDXwMv*G
z<xs+=9>yT%%D`d7z@o>%q6I?i>Y#oQj|ONim|Kehf_StUcyz!BToYl|N1)+2Lk1p0
z=u{EBK7v$60v4b>caW8M{NO5&+Z430LE4j9KUUhW*`atvNdJlKW%tWAzN*~vHg)^&
zs5Kw>*Z)^M_+Reff5j*NWgh%je*WL&DQFsqdDef<1>igQX8dQUd(TkynxXk8L*;vh
zq&o~jXBgZLGWeZj$i6Q;^*_s=|4h5UmDdi?S_qE)|5=XwXFd9#^%xi({?C5&Kld>(
zI{u&U)PMdX|9KAnSK9esVEKQRqW=tj_ZjpaGN|5QQ2WHB^Mf0*?MU}Ok1l9&IB5To
z-hY1m|2*2DdtJHJf3qupXP5uTD*K*Q;UkCg7f#jB-0EMs)qiuU{^V46#w_)KP3k<0
z1n68-Ho5COia(UhLC3VJIsDhK2O(9b|2m%k<y`->>ip->I?kkWkV7Mxf!Bk9K?6KT
zF9a^bLG$Je3}u3fvjlWNhkFa^gZ4lPX@L$D5;6QQVftUf3bcVp!v4Rc1L&{?N$}+n
z(st0gNyzEHu=9U@um9X`{~6qVGT44#us_aVf1KHFC4<3i27_h>n|@)p4hEGp20m8?
zZYKtzXaVCIPWQQj{!1CckFsQ3V<`B{Q1YFj{3k=@Z-$zG3=N<YzZu&8GxYr9nEzj=
z_Y;Hu6mb7Y0o;k;1d|-#>ISy?lz|Ie9dLlFCr&U4X+J|MFAlIz`59S6SombQM3g~y
zqH#&G@X9mu$T9G0Fz~1|u!=LVh_P`?3y7++NcoCr#WE;mg02r1iDnQ8XW;V&U8lw2
z$iVKv!0ODv;>5sc!oXm}z;4FCXUinx&!rO1DsBrt)|CS^<IgI>z$M8iA`d>IK^W{d
z<USpw>=tGa))&+X)N;)-jBIwyoa|h-#It6VXXSFIq6H2mOKl4mSrjcZ&0lO@xYV+E
zxlPduyOLE_xl7Em7J@GHDP5I0|8mB>O935woU7M6Rc&yp+Td8Z-m!AMbM;1tigiX=
z3zVWJcr<LuTXrjJ@ztR2y{@&JU1~NtK~VJ;CopoY+aA<)Ab$GUoMo58C+?Foa0S=h
zkc*!n<Dbmn@*m=62#M(zh&Xjgh%L<Eeh_#a2!j}CWrc__lcEEUak#8YhF(mgZT1BB
zvibgX%l+!sgtu=EZQC8#wB5C0t$opQtK3Cqne)st=b2~Cx5$}qmAB9~Z?R+1a*wLD
zehr(wsyF!7ZHCkukktjC3Z-FdVAJ-1#%&N3*t8viz{7F=&D;E&xA}o9BS?MZ3+^9z
z*KPK$+w5Dn8Pq3&jm|-;A+&B0WVQ%GdQ`22qnIftAPBiW!c`?9*GF+P&LCGw5H`FT
zf-sWiK$nH1)kF|3W_^@`am^pRN&;0z3!pPfu-XTc3$2eJ2NR_)zKPjaLObmNqhbQp
zN3c02M17RG{2`<=%31X!d)2e-RnH(tTIH>{7dPXacjH!rv{}-gbxa1S3=%#JoVE<i
zMhr}b3`{y8#H`7{tO=UD=G0{1RtF<+eZ-^9z^en=!Of?~AYhC@kh%s!3YaoLkf0f8
zwGW6iXAlG<0W$`EbI`UFJ`3>LZz~2#XLj9i6|WM<+}UwmJF@0ps#yJ?V)L8Y9UrT<
zzN^~$E`H<J@Re@`HvHE)^Iz%df066|*-riEz4%}E-hYv!{}~!SFqA!GU;dwW?SF>p
z{}~!TG1PrwsQJWG_JObBlVa@;gP#A2i~sAa`_HiTKf|W~%)9?{9Qe<%|3BBk|4fHK
zi185U{yx^jps6UXqyO0s{%1P`M!Wy>9{R6+=)cDP|4tkJ8!!B?p8KD{<~@V@X9lHr
zEGqxl)Ib|%`Sd}XXZiF%wGyA+e*t~))vn-G-$Gh{_|<=Ms(fWr{KTsGo>lQZliVjJ
zxlgPzkC;IHqw{Q1=b5A~v&-M$Q~sr7{$Ilhgw*Z+YdQW`cL5y=Az=QWLFqpO?<xj{
zR&c4V1l}zo2`-}fAdPbdh6)jtx%}Fon<j(|KnnsT4gbrUg0{QLSpJu_2hSNffp)xt
zw!7N>m$3)kXC!V18oLv6`Y-7CpVQ?(hx301_g@Uo-<Z5FGy7cO^W4H<v6$6mx^mK1
z(~MON`h}na28F#Cl(JRaCrC%GS5Dl{nt5HI<Pk%~Z>F063{Bu_sQo`f*MEjS@YLSa
z|3WMOyDa?=I?E0;d87g^*ttN{E}-clE?AWWsfO6WTsVnVA3-W90Z4ts$ScFdC(ACN
z#LB0@z@^Fny1Y*a6ckKCyh2Kh;$Dp6UZ84-Czt_(c)dVJ_;R~2a5^!tyMhp>4FiWY
zXg-wL2sEk0W6v%LLJVAxd)64hj^SqE66F$*;ue%<;}8Nbv4EG@tlCa-L7A=5WwYZd
z=lPc}^eLHVTQJWOyou7QYDH-0F3;L^mc`30ik3l;ZShL`($&^^OHI<}sYUj=SFO)m
zawB)~_2BOPPF3rit2esVg3fJ&R62HLYmBoODn|BuHf}9ibuW9#wcwt8?)6)pt2a7U
zZ*;ER>|C?et$qh&ye_EgK<w1hIm<44G_B`TFa>u%2g3peNat6Wg;$YB%2Zz0L(4AK
zE~wBor8&50VnqGo#Ev!by_@2DH^udCOX%Mb*S9CGZ%=g3!Lar{KJ{DNDnTcE+ZQc&
zC|==M0=k;qqh`HV^#;Fs&@zLd7Eq-V*t9*Uc}Gz5j)3MJL9M$2APlfbQ1cE%O$4cx
znzsdlD<epK1etz<kJrKLBVTZ><X681QW-%`Y=Df*LFyp*0G(&`I(TK|UbzNR8O2OF
zfvY}3n^%JOkKhbQO#~+)H4%hN0uR<9kJmwJA_xhuk01<qFA2hcFLHn}u7ay1&=EuM
zYn3o6CTM*GxhfrUa4TAsl(zH+9D%2tZX%P}%Wr2by9K%a4{{tUWDR-os>da(pOmk8
zTE6CK_3G!POCJ`_x)Rg6-!)^NwqJ{sMFxYcFKF2tt3LQrW>wH+D1#ih`7Ou5qQJna
z$iSumLL5pATq>X{iAMum8R;<a=`jczG6<S52$?bn88HYOGYFf3kdOrfI<jIAvI6bG
z69yl@AP8Q_EnvqW<;kfPDdS#Yoi^RKVQbRVQ`yV!mT!JpwB-%x>WZx|3b#Bj-t?|`
z(>sebKTTJDW83y$_UwPzgZ~*Oe_<&9&rtuLZ}NYJ+V2cu?-{(GFeE*boBUsS>wl>=
z{}~qjXP6H{j0^rVF8|ND{y*c!|4f_yGw=J)bNoN&DbTfjT*v=&pZL#p420MZgK8g!
z{h+(E+4lTr+4Y}gI|xDQFVL-~i~h6C{m(J~zk26?hNK5f)<+mLHZmyhU{Kw~sri&g
z=LL_>FFw6Le0t!DNe8^_!2q;k9JD;o0MudO(fZG>_MJ)b6NBt$2Fdpfl8+d~A23T@
zWtO_iAbFET;Q@~d=!`pMd(drts&=5`Ak>^eTUL41{xb-^V&u+XVDJQ&hLC0_18km}
z8C-HUiD;}6)%-854>~dkbPPE77Ig_*=(wDu(|<|V|KiS|^HHT7{zH(UBd8`~0CkK0
zGX(r(2>itm@suI*8AIYNhJ>38!6z8J4@jr%v~IcS+^~<uv7SLHkwG?v!Kg_gWQk<j
zeudm)48@N)>fSQ6{AXzY&(H&|gC_oGnDU=tD!5@V`#;Zy|3(Y{GZ;()PaZ+`A3-`h
zko$=s&3G1YhX`I1LHbIN`Upa@f$IWB1_n+>UKuViRnWx~JWA|BYApQ941CfIeA0|;
z+~B}r<KUBI5p@7>5by<ci}?H*K**PY&ll9S<n>_Sab@6j10fz)1`cP?&5r_Z3_MN@
z+_nsC77VJM4C>wtY_g!*4K#(s3hG?2^Ko#Au&@b$gPMVXK}bN^-l=erUG6-y-1#7s
zv%oZazG?PCgS0u?iBm(m_9f3g@71&oG$mBJ(zbM^RnansvQ<tMYitXb7^lxt3hi<!
zUz5M|X71wa;k^f4tJk~KZ1QN>=3c+myLqQi(+;<qO-31W)M6(1G;J#Z*GFOf2i+UD
zJJ)V@t=Z}Uu8BMvcX>AL_G>>7(sw9%?!~O7*X=4-vr6iL9SON;0~{nYBp|!PAUz;9
zu)PAHnIKkGMp09CZ667nSY@9g^Mp2+;yFQ$s{&d#hIVa>>)VqsVPEXTLvfQ1M@~2t
z+OsdT_h4x6!Ei8&>OB(PbugrTpMTR%pZYC+ji9?8Ak|H9+wS1D-9fFpLRxo)g4tk^
zy}@mJgIf0lG(+nnNUapmyu-g~yMNP8|E8UOjXV4rcLaj#qrj%^pv4JIppiQ0tWe_?
z(8&+5{t=`;@~+tkse|CP54`H}1~1I>sswFQ@~T|pS+N>|;PnwQ1G#R3)JO11rP%4G
zAqZX{#o_KK#m_t&JM(Njco`m~hC(JGwGw=BIBZQgXvH12DhZ`Nf>%k9*(C@GJ^5h~
zv{r)DM`&c~lIswZwiJ4|MaGgFkVz=`Iz4!OlmouHqF~j7;#H4}S3NFVc0X<QmC)9M
zR$0rmL;3`*N*I(PKzWeKh=IWXeB`nkX!$FH0{E&IS#W(M&%i7Xs*+gbK=l#3A_IpK
zs6GNMveE_ZYk<^80>+>!N!S=vC4nj?Q)nkh&>UJT37COSj1d5Lg+ONn*)d2s^Qnib
z+2>lvbcd8KNu6-8a>>=|b&u;dJ}ck+vUKC~{EaX3H@?i>@HltF<NS56^4GmGTK3s!
z*=N?R|Ah{Lia3V8_Y4*PK~+-5BZjaG4E8q|9Bwi=A7==^&Cu{oc<FzU9sgwx{8u{o
zpMTwdmQ^6cu>L>Cj{owv{;NOwFL()5CGnm9&w1=W=TT67#C;r5AF=NK&%E<L^S1wt
zTmCa{2hUF}1+RRldBc!-lfnBWgXMMx-E9mCD;b2AGYBqY5I)Z!c8Njk34`owM#bM;
zTHwkETru(JfNmfHRY`{b1@-^)>;31}`pu^Di%H=dtNaUQ*=Ou>H`!!wFi78KReZ>+
z3OWW{+5W$h-G3EZ(EO3Q3uv^BMfMv5`vV4+L|A=<R=%)-#+P{e<&C#W>i(B80G%r?
z4X%vj?f%O+{+D<8FXQ@O*6qKvI|#|SK&vEa$N!?P|Ha(?Gy42z_WREe{huN3KSTN#
zhU_0)W$)RFUNc0VV~9LwP<hUC((}N+Gr|FV3>x_i>Uo^Dz1s2X74wd(7M*6PdMVic
z4O$aT0L|PnO#9C;0~}m)KuaG5w*1$h_n$$3A~;<^=8qtg{+yt4o`DHGm;<SgIKT~i
zHgF$`6D$tDxd3u-G`P>gCCMeG%FM6I#HYe3qQNPm!N4!WARx=c&I1k?W;PyiMj=~9
z(7{vwpv&%sf*FKE7=!{r^G5>SpxTGu12mDu53ZQ_JV7F2J}k<SENT&o&ZSA+>vN~=
z5;JfI*GG`m7hr=yP6gLZ42+yI7GANHcDeJ+)28dE&9KN{Xqms*B5#pl`drQUsoIHC
zlV+UBS#ra_eYb7tO3<uO#Tv)TwJufboGaHlm94bQU7#4!>0Y%SQXd6(?)3zpXzkg!
z-KA!ePs^^r_PySX+pP<iY9&nZY1&q>>{j}MOW_j^`LylvY~JbJwA&k84f%H*^ljT8
z+;cR1;?dOkm$Q~$_wCpvrezNvuS2enh}>EQ@g^B0I|G9t3$LQ2l9`rifUQTeS6p3G
zUSCYZ!i=7^*;95E%-o+b{XoX_196l0$4uNC-w#4jefy*O_DA#|4DUM-+`T`zdq1Q;
zitITY*>gC&>tJx(UU)SW(!K|dg4=e7f)Thf+8xxo2ZH>YcEW2SSY?E$k9-@pL)OcI
zx>pUL%E$+{p9oYL)o+6Ij}S}dz^mrql@X*X<X*YP9aarNY9jZtm5|EFvtl)JHH6HD
zS4r^t2vQ|MDkEePYsCbqcOdmq>`c%)aEJ&p39pYJ46MDRlzErn{iD?RS0M|+A+--C
z38{}Dog?^=9ddn?vE&w{GD=-?9o$R0mAVAFhCFTQjr3(V!51olCX%vN-pN}5LM6-Z
z6)n9}Joi#^@1fwbrB<;$n$Ed`+M!J1R-iR<Y)YWIhEbA%K>|GfBL>>y0lJM&lz~y4
zfl(5KnB_pnyl^Wq@F;^iM_eilpw5vd19ZGjhe1G}fnOVhgbWyj4M80xVKW91V+K(Z
z22oQ65fcUxGX_Cp1|DMuAxj1sPbT#cKI=T~h;IARl@Wc1lILH}UwyxL{o{%)FG{z(
zEZzj#jSJd`3%>uNa?|V5b<cg)y!T%Hj%5|7j|1whZ2ix${Xg5j|AIUJb8h@Ez3M;D
z?Eee}Ul_d4GuU5ZaKFXi_lP0-JwwiW(0qE$e}=rT4Bh`Z=YvM-#7_Tre)r%0)qk1W
zAS8Mjv}=j=7^ttre*8b%vH#2m|1%u`b&(i${%76{>QAw6{Li-GzwEOAeAEBSRlQ;G
zKgggrgF&T_LA#wnt%*S_l|eLxK_m;bo>`=gL86O6bTg0A0Y3F7LfS9bH9=<(vFm`B
z<mvwB)&(8S%Bu^itw6^LY5xIT+p6%EN$xd++zTef*SuPw-jJdr=p=1f+yBboLwmT4
z{xitkXW%%=z~sxoV2WB;fOo-w1U(o8P73Kf<umv%VDw+o^uK})s2Wo9{;%NsUoH5*
zdeVQj#Q&g;u3n%8-qOzh`P}~tc>ZSy`OgsbpCSD}L*{>mqQ4Af{~4-&Gt~YAtu1W$
z#gTZDJLQZ^=heu0uR<nYS4~;LVA8~(SHtfyRV#UeYVk>pvNH_z?|A$Ef;vRg!5yL*
z;H8NR{xd8DRYnYpL5mm-*8XQOo&sCX0691gGWZ5*T|;Jw!1M2*(}Y-HwGyN<f^-HU
zBonBY#464utja8;!6vHBC2h(gs?RE*1Ugccl^=YsD+3#^5Tl?utEe@Da1iJQVbKr}
zDI5f@l7blc0~mM$!N`Y!#|Jcs$LGzU94%&9DCS-lKjC=QrWchPpSu;zV&F7}Ev`Tg
zb#?|eDGlE$pTc=&8M7_3=b7XzGRs?HUAWXdccEU&EcwuG&BUq6v(INQz3Jb%*P&vK
zYwbqJ8KI8V>m94t+m)>{&0Q!L*5y*OA!pgG)cKeFJNG)*ZnUr7=uorCwQ;*w+a8~e
zeXfn$9l%pao{ih{mfgu+b|Z4q5udg_ejWR~+V*(2@AdCG?B8`bu=j9K-;vm<C)4L&
zE?M&+dG>jAmm~&8d2pRh<X$Rd-{EIN=ApR2!6MASuD~E|$z|v-<(z34)!<w(GpK$=
zOwX>=sfQCLpGcenTKpU{<wWGf;}H{%MNT*wIpJhf|B0yn6Onz#BYKZT^?*=h_tD7i
zqv1VAL%R-#b{`DwJ_xCd!a5IxcI=0s;LiQQo%@42_62q93vAyT*uEFSfK)>e65Ki3
z1tB4QB3QQw)V+eNmxIg`LFStvBxL#tIz0qgzYRW571A?;kdP_~(m8_ni69JkP2^dz
z+O2d2avg-srm#MO58uUtyGY2@5S$IEhLA}}m6QzKJd5ZkfxAmrQs-ZV)J>Q*5kv&D
zK1y431Al##w)7Tcxj4K&%3pmyZ|wunB8SD-6K9_f?>gvIy2d1Kvb1Xzmu?K`+$t^;
z24-E*&C3i*;KMDY!8H-+*h$b9574Sw@LmrFF>qxh4LSppSq?OI!6wVVDi1>J$_!j;
z3|yKB#H-1`tI5Ep!@#c#T4Kke4;szm(FQFD2Q7rqWe_%HP;eJDjnVL}a!i{XP`e>@
z($S20mkO8PF5URFV#|y2&Ckm>KQG<#vSjlM$eFH?10qV-Jug}N%xBGepEd8fH~tsc
z_Mc(@f2O1Vd5`|*JN94d(0~5z|CKiWSKRR5Xx)F+h5s31-ZS{zX9#%05c!rN<T^v}
zHHP3@4F1;`N<Q%~{m-)VztZ*p*3bSM-}^6n=fCo!|4I-3%ijJke&fH;<^Q~A|Fa*5
z)kpjOGwcG5?=h_Z&oJ*l!=#@KRgV~wPcyh~XE0sPpgEaAxffI!Dc6JQBcVhF-ed-@
z1P10(2DS<YhB*u@ix^lBF$kS!5dX-i{FhbhKf4a-{5N=g#1F2L1oi$4fX)~C$)oX=
zL;WL{=4T;&P<<qC52}x3Y(Vvqy!(HC^Z#tB&p@YtGk7yF7{ba6@Yyw73Y@~44E*7I
z%GsRCcO;G9@Ed||&5*MK^{|xO|LceRH%|C(k@{aN6;vOohy0iG{x9PKLc%`(C4>I6
zC;sP2`_EAHpSkouL*0Ld#{Zy^x3>SRz5f~7{&T0C5y-sgJ>gE=;`h<B?pamt<#z06
zFlrF-ou!w)MYH^@LERO`rjG&>!5yO6{~6|hE2D+~8J2)Agk21}zFc72f1~yP8Eoc)
z$Lo|}eIm%(Iml`&$S@sdeFW(fL24ofNL>S}j~Ip71(cYDG`J-7Ii*Zk#SEaQq=I&R
zf(lGFULghnGiG5+29aO}iEvP51i9K?IEX<o2sC!b<<G$3%fRdmLaa^<(h(x&`6A}|
zX6f^)RzJ#EaIIv`qp*fm@}9-UZjs7np8UeH;LaMv4FaMHCU(gayz=K*<;=IrnQxZ2
z#4K+Kq<^HHI9=SgStn&i>fDQYEA9k>Cx*P5ws|*i_h{JST)WY!W`lF}daI&k3K2c7
zwHtGm-O5~i-M4+WYyD=IhHb8m+ufUYdbjWO?cDF)ywkOIvsKXw_xi25OK;~bzZo_8
zs9(pv;NHW&o%{W|4g~fb4eU7@)ORGf|7gsV6RC4AR&IP+wCbKuW;=s~8F<AA8!WX@
ziDnn$6ju|`ve$Kvu!$-T%<qV7n3pwiedeSc*;969&p4Pf^H9c&V;M7!B~Ce+IOSy2
zq+?)oB4Wa^i2f51;F<_h8HM*8jp{iDs);%ehj$(h?K&LNc_^goKyW9t4hm`C7t+2j
zxNUDpJE$6hR7L@9dr(mGE<%;j4#;pH<lG_1>8z09IA~1-o+^SanFH5HXfs5RYRIE%
zEppEYxf*gWTj^H10)mk1AY?YYK7ugdy`-3FryvMd7YWi`LaUG9TzF*!VIWsV$OCuS
zsw6~x1YhI;sfHjVSnyg3`0#Kz3F#$4dQ7R{k-D^H*VC3=Pg;5-Y3YrWrMFU+-b!0`
zFMav_%oTStmfrzgP`l__^31a#UHbwmR@tUbH}I<wGfCo5^Z@luICa3)kQ4*xL@!bB
z=~bXT7qB^T$ZA^w@LqTUP=B9M7_<+OQIvs6jDbmnfkljgMFND_q(CPCbILFvkQ@W2
z9B3$yLkV_Dm_7ri0ccK2(wRj&Lee2uJF45WVrArn16d0%=B&6=xb|V;rsoipxBdy_
zlvl`!uMiS)l3wQ8*O_ZyORxSfyY@fhf&bj6{_|h@FMRDk?}h)&$Nn?y26cHDmV>4$
z*e3s1-tgaK+kd8({|sSI7$ToD#Q$N4{=pFP0knar<p=lr|B9FY8$J0ib?3k6_5ZT>
z{wqBEul(e{>a+h!&;H9l{x5v{KgWgtjK}{o90m7}w*6;a_8&C#RP>4=^elt<ItG>5
z3`#v9q}t4&(!`=y#-N_hAnDB@<^h@$XESBs&}LxOU|>-JPrw;6u-GuL6|##rams8L
zQa!+{_DWRmlYlO0_>NQiKd1J89s>~KHvlb%;7|vx#}m;1FK+f<#`eFo&3|!l2S>*1
zzp&$f28(wL@>dx+^BEZ8K^L`42QtV-gYNQB%wmwwU=UitrnZMo<3FFle{Q4y0_LEl
zbc)XZ6+QoJ2md!n{BM}_Up?->Lim5VfdAs2{~3J#vxfX<Nd3={_n)B}GzSOj4YmJg
z=!T)b|18r%GeN@HCnXBb1x>k?w&Gptk_Rr$`}tfZG3eKd`pq@Y*{xS~#<cM=bH_KK
z>EK!$(kEI3u8EfYXIS~4VHN1~46Ysjb$9${@Lk6MTF;;is!%|aGLV%Fkhvj9eFPyv
z6$*G>4A&477pJ%yue26}kUEoyCYz83yRarZzZ~fFZ;-Lf3=CXM+@cJSb0qlPKsOGH
z1v7|;fYy5p1v2mjFmQT<u7hN8VqmocoyEs)&!ig5YLPGCTopC#RLX)YMJsRTEWWDY
zRjlNa>mE~P=AEjlX$(3NoRNc_OOQoKSJyhsE^EF^!BVHfMNWl_Ec2II<}bC*TWp=T
zST|w1xL2b=+N_*K*9unL3F|xLUcc3=X@^(iF3*OY&NW+|YqmPoY_h9ZryM)kzH)uu
zvO76TZiV!LIzC=4J0QrXZI@5$KA+Zop3OTQsy4VcZckfqt$fpy<arl@dk+Wq9uDa{
z64G}hsP|Y%|A~+Z$3iC_kC}2JWzL11)enj{J&*3(%AgSfn#2K(b&yvRL3*r0pe8G)
zCWnH9q-BD>f0<=muW!-9@WyS)y@wJeo=Bf@K6%#V#2FXkW}J(eb|!l2$%skEA|{;x
zp$W&}br7T`3hz4}4(=93^d5`oJr>>r?iO_(4(m7sLg3y|P{*F2jy;f?2vQjZw}Iw{
z0^9ZnwC;nVwp{^jXw?vCp2)X(yKnP$NbLjf4f!^JR&;|;w*%cC3pyCQ7PKS}bf^Ms
z_mEf3ddP|e&+2tvHR~Z25Ts88se>RST89WSOov<<xs|Q}SH6%c7)nBFrKm~AAPnTC
z4so!`C<cjvwPJ!)Kyfq9#LPGYnnIe1*hmYJf*j@msg)okq;7(Dk>Ct)=LmY%5Tt*E
zyhjO{4X=jK7|>d2@wJr2*HV{WL#vNcmfT5QdN*z9t@wGDgL@A;RIM{en<W*{B4Cll
zpb!K){Ef|ofk_j*C`%qZfG+_aorBawklF{b36U3EF+oHGKxZR^HpL5pI!%nCAT9%#
z%^(7*o>(OrSS3J+O_G6Kih)fUgjnT4J2=?YK&!z;ta;UZWo^>T!y4RjXT>yc$eMYo
za>dQ^b@wXP-!EGCsA%1zy!B7u2y!|Eq$Yxr;0rD@*S=0){Zek-f302rg)aV=y7^!5
z3g`p|rlbEE4*zG^`JZ72xX_#NpP~3AL*;w575~k4{1@r^&ye_@A^ZzN@COFJR}4x2
z7?%7O+V<b<#ee;W|HW>AknFwx@((~&5~My-eEMJF!GD1p|2fZtZain$3$E$r{by+U
z$dGi4A^0?d-Bt#(wG4){L4$Sry$sr&pvp)lg@Mb4fk7WU5(T*<Mi^9;gN~J!1Fswo
zU|@(~VCZIGn90C;l0oDKgY*|xrQb{{;Pdee{_`6C=QjYIAI_r<T8}57|6kPPznJ-d
zaf|;Vmj5MeK|6y)o&WQ?{$|jB%AnrMpwTB`o-1sWA#7PD<xtC~U%{-B%OJa1z~C5<
z-hV;k|9qzZMXW(f>6BeT^^vMC=!jJ1kpD8l|Hb|OOZohl^#0Eg{$C{dKWp)So~r*W
z&Hvd!tFr$y^!{g<07Dc1v(5a^FzLTY&MBFaOW`x`XRiL3vHY=b&k0`Vi3~bb;sNu`
z3-%k<p0{kh&ei`{avo^)2;)*vZwS;pSP8C-*8XQ$_n&FQf36+>4fp(Kh}zA-pa~8r
zR`75f<Pd4dbPQx3oE<Dqs6Jv9FtGF0xAkEX(PR)%W#Cq1;86ly2n*Uv3t8a7#lkDj
zz^@HD*ND%Rf!__hq`@0BSSK37AQZwN7|g)$%OD!eAsfb{7{#O-%V(G=<XYq3w6l2q
z<MhSXE7m{MOP$WZWy~OK!@#Y|z$nekF37;d!@$JJ%_qttq^oBWZlArtA!ngO{sPw$
zP-O(Ek8BH;8mG;b_G>lDm{YLqR>6up;r)j_8@GEjY=g}BxHs$sA@G_6owPajmFshs
z+{#&UD`L_K|IYnBZM%Hicl);Q_UqW=-*GUY^N?T1KG*uKZVlU#=3Hvt_n~mjz38c@
zBPN{)n{X_&|0r^O6yARrw0?N*rTle|Gp8SxbT4LLmt$ZMWdR)(3ken?cOh}G@QMpb
z87ONznz<zUM3qMuPtIvslGnGjeEPnUnJ3C-pURtYHfP%D<S8H&JLPQ5<TIcOXwr%B
ziJ)n$@QKI6Ae9lUPXwun!g`N|!D=GN=v+9sX9TK@Iu3<)><9OQK=l!%4uX@Qg$m%x
z$gg>iU-KSNrPHzlJ}w7dsIbky2~-*RG=YZZ{2I3**GHi3Mc^}6A(KR)OUY|CdR2pt
zNQG279#v~049E--q&|Xlh#+i8eFW(fxtFa(Mv!h1oQ#=t3=PFiIT1JI1cmhxS}!Sa
z=GjDyDhW~z!AVGc1R>#-5fbAv_@E)sARgpIRu~DoJRMdmU4;+XL1ZBn6Qn+ZjNPRz
zy^*>EbV+*V;_JDKZ{{qzkv`*WWY_+n+I5aOa}7f3r0g;Tv;!E#tQj~p7+91Um}J2x
z{6h*!NR7h{UTO=egCL7+xxig}hzPts0#`|(ItWs8fwsnjCe0Z{KvPT1k_;@;AjBxm
z07CK%EJ_T#dJK|Q3~K(OwrQ$Ct=1W{1Dbb$y6tl>XDzz}Ibb|%?W62<k0I3%q)$||
z?n%))X#c2S{S#1S1g?p)R=><%^U8kp7yH#;IQIURyZm4D&VQcE|5*<GXV~|j;mCi6
z1OJ(J{AbwqUv%YvzD57p8h$gx-e9PCrn2R~>hAvx9e)|(elmoAVhDf9)&8G<?|=ET
z|E*vB*S+(f``Ulr8~?@c{g-_3U;4p+xyS$Ip8c17`d{ktfAI(Z1+If^;XDWKC9VI@
zFzqi#<6D;6mpX+P1wyy-*mp7LRxrrMfL3bp+cK~ifm@+G;Grb=uo7mAlp9=0*)p)W
zGqBaMi1cvF9uw2JD5UyXT<eFh_J0wb|AP9U71{y@|AmeJi<p9{BoWj9!Y2QPEkLyr
zpT~bj>wgSZ7Z~)9GU)U&sJ4mvFH%q3!eDTKL2Vne+AluC|9pm^%kl(FLFyG;{wult
zS91Ta;QAlbQE~<?tCMp3&m8)nJN7?A*?*>b@RIBv*sAOa{~0ENmm)wG#m)TBz3@Nt
z%>SZ!CzPtM#?HP|wBcj+vWJmV&I`NsGiVgb1kSN5-e=x$#<A-f|CC>f%fM&otp-;^
ztD&bDF>LtHun~k<w*S{X`k$fvCIe`{k1D9r0i8Q42-+?LTJjA)SQ|2z!~`4AgAC=N
z1tuE<lYqFAiK31jr<evKp8^w?Bm=7ms3Q+bt9+n+pv(eHJdzATnk-_v3}TL;8?^+T
z8N@vqWCIzL!q}9eIaFeKRD%VzB7}9KgtQ~sjZ#e$CVRK<t>68zZ1dCP1()JFHiBv*
z$hb6UXD&AbXjTg{nZ?1tAT6WgZJD#sDtD1}{sIsxTxwmo)H)A@OtTlthjtrf%*$JH
zD}VXz@czT@joUq%cK9^!@oC=U-n7@PagRsy9=FC_W`(P4D>kOgyHd2~QS9`yejWS#
zy7qar?C|Z}>({j}u=`+O_d&m|1K_Em-C>iCWiP*-w&+^y^s^C@PK5R!4e2`))^{Yl
z|0o27_8*R&emZXM1<-P)C09%f7BC2zGcao~b1Q=e<e7NEXT3sBML-TxZcwIXS7Q(}
z<<Rg~bj&acZ*k9=8QQQpsrNwQl+#&ruO!Sk7dP!p_|&suQ_jRpITbbOL<|Ih*%1?t
zh4&u~?K>XYcRZ~BL|Fd`2pd`lfgKsvdn~l)C<Fy}9S#Q16NPjg2<rqP$P5vrPXt+?
z09io?VF$JD0<Gc(_laO@6?Q?Uhafc(q(cO%gBn4%jYGJQ`p5@-w=86&&a-ACXt_fD
zCQy9@o*_c6gCI2yr1pW65J7j?@;OLX2)QzHEm;mhF2&0r47AE9X3{ZY>Z9nXC*fTr
zto<W+eFR}Zj(5OZi3hKYFb}>%s)<0S3_-3<PXQgudMRn%6-afHJpU@>{BY#@2uTX0
z47o}I9fb$3kCNwJjGlTjwELh}!#3-jg=*nFk}jnz`U#*@efTXIShc_d^-|!PNDMqK
z2RV0z3%p(kQu{#G)56)@44~#EA8fD=QY#6;x<x{uL<^c)5&?CW7{nM@q!?Ib7?>m(
zSmZ#99E1$mRh-1FV{`%vZBzTg>Xs)@I9xFILjI!bMN4nxt$L8Z`eF8(hdFB>Wq}c-
zZi1Zb3OUmi!iJJ-p5(20lDX<d=BgJ~%ieoz`73krzt;W#d{_T7odj(T;)D#T9R_z{
z=Kg1>`@>NDk)h}<L;53z<g*Mt-!%9C7hL|IA>%(o><@<A|MGMGiy!@O{`9}W{r{Rb
z{&QUg)kmUtz*W+{|5Eq=%RB*fj>PW&7rXyo@CL{t-b?@aF8>!e@Sl74f3g1G450@Z
ztY<SQH8BXqGO!0RFxZ2Rb!9eWV9*5z5f6BL2|1giu_1dBH5nLmKm%@|<KX5nFs@?Y
zxW**<m__C{uj+puEzp8+Zf($BS^>lVf<_?3r~jYZ0EC!q{<As!=XSl#V0xOtU?Q*Q
zDvP3{hS~dg?ar~99fQ<F&??CUy*`q6{4ekLU(5-#vW_S2zf9hLw&wqwZJ^;eP#rV{
zG};MTn>`J@B5oFV0%+cUfn}f-Akro0RBErs&AD5&;bY#)Co$76DFn}9P|KGNnC(z{
zz`Ws%Q}=bD>3<cM{b$_(LX2zwGpz$JN8IqAVKb<;z`X50>yH0ANB%QR`vJNDi$RWo
zk%x&xn1M|KbmuteusC>q1PL@oa9~32!bYp&8NpQ*Hv@+h=z?;7MJ9el2IxJlLd-mJ
ze8S3H{PHY((ky(^(E3Qql|j~nQ8|=TJC;o=j!i3$S2KxUJ4Hk{T39DaSSN~0IZ(p6
zET#WY!kkOBJKq<rf0(iKx@K?<Xgq;Kg@IKOoG*C6%b4MFF+x&Wo|ZWat@9S!7A~?Y
zT5MCa3{oFi<t{PFTA&#})i8Z-=KQOrYad2T01d=}79q6m^K0Gb)q23I^?-NVe($#Z
z4%J(1$~VN#I9IXhMasM@0bK_|CL9UuJs8w?D5&ocWKb@+A2dDW+qoAc(tkK?(y_1!
z$AZB_cEP=eLwgU0feU;PW5SW>sVAdnoy}N&t8~MI=xN8y!)pu!%e;fLB@`^!xI`I1
z*I}@O8%_)i44f={vXaV1N~Zo+zUkg+t+5r;)7#e-PTWyE<7oMulLfQS<<2;rH1$m4
zl+!U&PQ^?)6*lEe@Whjm6OTtuJRUXSSQM-W1gQ-}dXGV>AqW`)?h1iAM14m>dk-Vm
zM8RE$gTV7dAzg^&3XsYOJV3V_QW-(2Ay7>O-cRJ$vctD|yFY00_I7YJw8OUn#DLU0
zpvnlmUJle1g4IFrYRD6OC4@)y1`wlm184^m`0P{6`pCU<4WwJ-UbzN*==U0zvXw5l
zJ4BE=$fbB097Rt&3PI=<6KtO>WYi8aixfNcBq+*|NO-LjGwl?fiV0pHL8>P>2_L(I
zFd&^HWHK3i94qMFiaD2(=72`+z_-(0PMUWGhAu%WBS^)R1gfyEq|UzzsfHjVc;nKw
z^!Zm)=3Gjhb~?J}U`X9&=iEh>G2I&8RWfD?YzkhWRc&mV;NduNa8q0mJov^3?g~K$
zM%ZC(Jaz^K$i8?81F}0FQZYelC0<Y?6Lg|7KbQd_;k_ggP&LOY4_d3hr^g`eB%~jv
z<X&PK*X>@iJi23d;<WQw^KaxXyI-^#)Y$>u*0lO@?&`-`s~%;88M&(;=dVIStMBK-
zP~OT%c`F}fuYQ`n`f2Qn*Rd;Ji*Ed_d-A`;qyHlJ{xg8@HsZPPpZ_xG23V0Z|AnUh
zXGnR@5cG^8<OM_U4TivL4Doju^Il7A{I9edJS|f8i(&46mJR>4ZvVG_^I!JLf0@hw
zg|C8;;PwB)H~)*>1=UL85B`hZ`!90mzvx}i8AJkC|1(|zA-x;_<xc!(vg=@AaAROF
z1|ddG21fAiV9@E-3gGUiAh>r4DNBhbWx$guzRWz)3__E66c#edUE<Na%d7ca#NZ$3
za6CiM)DpKo=-5>r_5Td&|Cx3EGa7tn(E7-rd6UoPjg0FLDd%qzCZOG~qI&;@!FOAT
z=>Hco1D%p8>jpaVPQe{?u!Dm0e|e|>eD43n{Qoml{O9fj4a6}_f~|#vtU!P)MVJp>
z5w{35_{P8bKlk$gvQ?Lq>aWMozEiT{W5K#-;Zx7*Brj#qFO>3`=u)=Zq4~7$gc~w5
ze`ziM&$<zGtSi&D|4iFJE960o<adJBgg{QiP&)aabHjfIqg+r8&L$x!rNbej$RVQ0
z!XpVzmhf<c9ApRCY6Kx6yOUVK1u0~C1Ng8wMgazPF;-p~1|De!c2EO}l~<WZT$@uw
z19WFAr!0ezA)k^Xk5LT2X*{<{B8OodqfRW7ZXAbhDz`xfk8u)@aT1?#0)wKTv|D9#
z+s=^ABLzzzlx}*Gyx<a}k_!WaA_u=RBZmyQg~kugGVt*(0XaQC%e>__g{!QJm)e%C
zur6I~QMAIcaJfy%Dyza(MwyEYGUg@BI9I&(Vf56KzMcC)eVn#~KJACR+YUmISL*@S
zhTRU;+oC3)DP8xxao6X#=@<OF4+i!ghV*@)Gemud0(%d7ckT7<+3(%H+plY%fA>K+
z0#!q>E>g&ZV-ZtMCC)jMx#UXmh6mXzZbbI&3hvkv-MC8DIfsEo8+5mef*XsvuZB;V
zQ`&^^>b1%3J7c={rA|4XGW$yEoU8G(u0~J07&{eI0fkLE5ego53!88(dgAfuiN|C5
zkHz#u*CastK9HISROx`bKM)4;h+KFN%Bs2G?t{VI2ZOo}1c7@-NOcfsh6u8O2vQ9N
zv}_M(*$%0LAhSZg&D-H)ao&wvy&JdsG;Q-~LK&9>ZC?d%AOh7p;NB2~;ZeN-g50at
zLy$-1I?pQTp>rOU>ktUGXaQ0mxq){TA$N!%T_nip9HcUGEm;n)j9iMBK`J8%iCiT`
zLDzL4W|1HhNs#&|YRU-+16O?nshi;S4zBtL-eZDyl#nYU2s;s6B|-W}$#X9w*GKUF
z5u`E#FKM^}t$o0!f~PIIk+R@g^4!aDGtNa!IOg5F)4pJ-PV6KFuX;X<T+pQt;%=a0
zv6*zhdxc~`9dpnf%>v*|1Q~yWjLt!3iXp9ZNHqizK_0JzR7pJG1|fLd12hT32RhS|
zSqQYAfmNJ=T^4i=w1^(Jf|aaksG(=3O=LrG!StBMby@v;^5>kdTz0c)`Gcb65AwhW
zdOg#sM>(q=WvzOY0Xn(pLCz{rr30yYK;)`>d2j@-kFr)h$y)UUQXi=t_-}jjzu1HS
zTvz@xocPai>A%30|Drel3!es0npgf|NPoo;^^PI(Ekocn2KO@z0cRPKA27}SFS-ug
z?4S9cee-|8z5h+0{Wp62U-AN|N)oyPt&aq+gT?^G?*13K`(OADXg!1A&HsYeK#1Ye
ze}?n_jqm<vXuih4;LgC{!N6nzx>$i#mw{CmbXo<wIs+@DJ_4T!054F9Wr%?X`Yae2
z>=+nI85n997&b77onlk^Af)$)L-#+I{(mk#5E9e@9S_c={hvYYKd3U&{={PRh~4xl
zpZP0Er?2wX|E0}Al@Xu%e;#$vbxXqLp!!J06;vO|yZx7U`>*KoU%?r)v|B#nKU>Rx
zp$Xtc8q;BG;UJ3;AT<$WQQUISG?3`V{{n0NE7o08th*LH{YJ@#kA>@>r!2bV*>H%#
zyi(A)!>MGabKALq$+u-^{nlFkpKa5Bo*n<$cKv7F3EH~FvJ1T0WH)G8KJ)JX^2h!w
z9RIHrJq=W+vrEdUTMEc%GH{7AfKH!ggLQ}?=@W945Er;91({KTq*cfq6GRq#;Ssn>
z0@X)6(hO`eEIjZVV`UjQWVqyPmCQmV>@q~HQ@KqNIgR4kjgt8+v&5{6rR>Y4919dZ
z%H>@OnbbqXol5;G*SJ<~O`mhUe9N<#nWq^ftr-~PS-9mvQ>CDT;2?WoA;XCb3=F)A
zhJg;H>+Fix*px1Js9bGVvCgJ!ja}Iq(1Aadn{0~LnCCBzpMJJ{<I~jnm;Jg9fR@8`
z9`Wxw>fd$Luk(m+$6=rLLmtihTpM<W^d3u@d#z^cyQJAyLM9vw1owRcyAK9*9SH0`
z=-av1w{x#|*Iv(#-9g~G39`C7u=lWk_rcKK!{Pl$K^+!w^${`URMga?iF3|mueedL
z_HO=?3(*ty#5S*SOrIcSm1r1P5l}ocv1L`^)cv&!PuDHFQZnOI#`II^(@w@tIU76W
zY{cYqp%YF+dO)EQkB3YE^~J)#U7^VSqtSgJ1gVLjRS&pphCCJr=?6jPhCqwAyFjbv
zLc0!!bR7r*S3^Nv2Le0y2ZC47A=gKcHF97QyrH#ayMOaGcufRhpw&l^S_!g#8`2em
zteS(=L2wdM6G2EwW#nG99)Er0R<Rn6JS$ei>mx`t<W{-@UK62JMg;4lh)Ktyrkp@V
zkd6|hZbCj*2y=H5<kUORkSy%TJ4p2u1773+Cm{>M<7c0Tpp-cmQoz?LfiAFs9e<Yq
zyHE*IA3^S|05AW(nm+$BGRj(XEpzeB%*8j8=3R-OeKBs@nb6+DzV$n8^H&-tOjh!5
zQnF6sQ}<()v;b}YVU>lgOb~z##6j{eWX%$Ugv<^@*bowu>mjugq&|X>kopKha)384
z@PoTrBA^jJW<>^GZ3bxve*F*`mpuLOM$63U-qkCE`wk?|IhVHR8t76d@D+ieYid{C
zgPaZlsgJV2l~K;BC!jNnR^H25{UC1@2tm${$Xk6EjBe+xzFn~DUcst+Ijf%Jta@Uz
z>a*+SKO&d^D?R+rbK^hr`Twj}{&QUaFMI{Gl~rufe}>fi41Nz70v<8=J!bHJzz}dB
zgxoJML_B2Z`YyiWKg0U}jGO-p9{jI$>%ZE=|B_e!^IrSUcl|%#wg3Fr|MT7gA^to6
z`C&DY<jwzL*Z#9#{m**&zu>w55?B9w9{SH<SP42Qj>U|D%@A~b59IVDRt@kvI%NjX
zEzc6*!AxT7Ayjpct)T4SwZu-`^0^G^ci4<y@M?oLDDmroHqVOb{}<E$FKz&;bcFT(
zi-6BwkTCi$XYpUr`oFBne_^fv9CH7eCI53MgRWJQwD_;+0IG%*+(8>)<=p>^xcwJ)
z{m&HiUm^q4Ct{oh?gTA_t%zF+uGyFW2OU7M0!*$1twZ47_@8(Ef3@!WQuUV;=iVz`
z{~>qv<FYMpvKBoQ@#*HWYj!Bv>DzuWYRWzRMV~F#{AJzopM4kTY(SPh|5^5d(H>A`
z#I^4~`|khZC;uy7{cqHNg@N6Nfk8@0%#2+`7M#W)M>Ig{8psMy5pc5#w&{RD4AdWI
zkOJ@2Rs?q{6~I-wEHi^38>0v#moO`zI0KI)=y+8wanOn1Jlc$+CafCak~Z1=Ho3f3
z*_>AS+%|<mPBqfrt;+swYC)Zf;oZuSz1Hy)IdtMg?W(*>Hdqy`&6sz)e9N=+CD()u
z0~i=27+EF2gR%VJ78*Nvs1UN`Pf*^-&!%{dL-_`WsturXd+N8kG;H%|+~v`@%d2sh
zOZ67J(sjwRE;sG@RJ#6YVD~})&iw(MM*_Q!2KF2e>^UCPdm^ysIHY3oYd_%Eejv2x
zC}=Ho|IzT?!;lg?sOvy5s8-q+(6uk9YhQ5p{=n}2UM)L<y7q<l9}by#IAZGYh$*Kc
z!QGjtNhd&CdnTR=={piN<y8Fai=biudFP_19SiN*<JqvmyM9AJ%hvGjeK8Xb$4ojF
zJ@F`X4rub3n8|0NCZ39%a1yc#4wH<Ua4Zr$O9dJmgb&2^Lf6PaIziz`yN4jF6vDd>
zLZ*kn<i603y~ulrz(aG$-6H6QR=<{Ael6fCsd<NA3kbo+=s*h<0vfmZH-L}ehLMmN
zB52PDvRb=(17t%h=yZkZ4PMn7KwY7#^~j`K<vO=Y*f~Fy>)b2WLJ(wsE84mRx6&2P
zrOP15sbnd<GJ<rAoQswsqo@f-q9z=H^pB9q$caazKr0WAMW9wmkl{Nx38|hSB=YLJ
z*y*R?^$}W?1h0wU3`ms(T820KJX(DOsiYFXH57P83DiG=)JiGyK;v?d$|!5mwX8+g
z(&k-C1?^qB0-D_HI^f#4-KJ!fM#3~1zYZR|3I^Q-2H5}xK?~504-B&4HQU1Ak`Uel
zf{brK7A!%I?_hzoJRvm^G6^XGAT3YKx`_=m2E`=Iz$VVXr^+T{ETrqLVjpW5Smv7A
z<zK!qp<`3()WbP*PGu~<nzQ_7&WbxZEAC{j0G*x(uaCgDV}Q=k%UJOM+%tNVx%?sc
z_;FBe1*))C+{;^WFK6X#FuIwu@@5`*4=<?NTJgws^%uWA|K;!fSGfP5?b3g)YoN-A
z<NAM*tN%fZ-^xBP1fOMaxyImdiNWR~gY6{-#|sReHyEOyFho9LX!$4#nPK1XU*W=k
zqeuT`Z~Pa#_+Q{AqCVoj@t^y~f1aBlBya<?vQG5sf6=S|*{=L&JpW(d?0<#3|Jmw~
zGqC!Cmd~+SFtF$|FzJGh@?+8fovOsF23jt{tOUM2M-;V|A)dzw9<74h)aj>S(!`+q
zh|Tx~5BPWo&_X-Y|1zfkB@99Jk+{Ks3FH6LX8)y4|I3>Hm$w8l1l0dCN&aWx`NY8c
znMEE{TS=Jzm$&<`;0{7^?x0?hD7Z>u3HdLR^Ph3nf2P^6`e?y_hQ<Fu%i&gl*CDI|
ztyE-O1FDP|*8dmS^j~D#f8!Z%<XdjU&b(E&>2t}ZR~6gdweJ06lCzrGyv90zi*NhI
z=&ASZH~qKW06G#7ULP^-`OgHdjM(=5=i2w5W6yt~Bmd<u{tw^slfkGQGzH15%)lWC
zo|NGLEpGr_y2rr)8X^_~*EJFhjA9JT5}>eUkzry|U}91L*F-8{QW>21MHm?PSQvyD
zxrEsTBpEnFnfRoc`DGY5L>ag=C3Ssd?DOQE3S~U2<h*NS{hH<cTNDGk)x#$m#7s7d
zo2nKwK|gh-YvycT!(;)=Qm5?YW;rXf7v3x1@+@!lU6<r0@X$9uxIW?m*PCqMHUng;
zi(B5v&$()|SK}_%`mG*KJ3zy6?R)*&4+L}`3hF%M-?rbQen-sYGnJcP)@*wdJ@qtb
z2(I@;2)G*Z?>g$+aTwA?^6fYbIejj$<Dg$FXs2gr_d$4l6bkAq9SrV02syhTpbJzt
zCCoUJI`?A4)Z-x&4~I-R1{$WDa4f9v2&8WUnRN=8a4c%tndEurlINXEn|n5D;=z~+
z2gAETx2eWX1eKQHp(N0<gs6$9&?+NH4+xoz>^~aWe-u<1^&NrsjKH-LWX(3TCW3T@
zz`I!?>*c`J5NNpqcw6gUNPPs^Llo2j+EoN!J{N$rhsd{iCvts+G-m{=k08gK`ZjC@
ztxTu`oqY;rfNLUn1%$N@g49Qlp*eVc1X{5Gu8iEu*SM9hL0-a*JT&K2vJ{RW)ewY4
zu8AOQNPPq+;Z+i(K7x$eftSRCDkf0f1RKYLNWm*6NRJ6JhzB9jW|U&5pN>XC@X83n
zfLBS$bIv8tIhP1V@pI0_&p8)6=X~rOQ11zRmD1(Jxloim=W5E_YYFqNCC<N|HvdNQ
zoU2JwPet||jBMNPU9r+Kb)rT{gQ80okA5hpyd8t62_v^6=rS?}$ixt2Z#$&wL0&Th
z8B&0VKzc^VByxQOVMD4d$ogAOa61r8GD?6>W017q()19qPSgx2Gl=VQEuQb!v^lD8
zf5Obu=?gAF`$R~02xc$44@Zz%DSgG=^c8nAm*3A^ejj|y_yb6t1=_B=;x>d07YA#(
zle79>;>wqC%btmD_^*HJzr^GJ!uS3&U;NK?<3I1s|NOWAD<A*QF!2vV@KXl&n+$f>
z8LY1`SeyYNvl9$9=NP<CGeq8GX#Fg<9JJbj<G_E@$N$Zq{1><g>R9n!|Ic#`RJCzm
z2a#OY{<B^E&wdRg$bI#{;AIeEzVe^p?0=Ix|0NFmXVA-GU@&1|(FHA3X4PO|(_~;#
z2USLl>I{tP49v=)twyX0;08SM*(}7@N8r;2K+}0F4E!N7ev=t&o->#|<kJ2xVEA9g
z`oFBre;NJ%Jo5j!Wd8Fh{1;XQAwiY@5X2?>pHbus1IG&n*82=R&o~wTOB()%*GEd8
z|E0Y^NZ9AUkoSLv@c&YU|2by<XMwDsn*;6^tpM#0f-i()*aTjLunAnbYy~Y!G+yyj
zsQpIR#H;0-zSZshRIvWZwDbRyW?x~qtuaVj8q{?$YSPV!ZT};-{b%3upLf@Pmc1YZ
zYB1~tomI=RA4;+v{?B{rzy9g}3?=&**zFk@3|JThSU?pJC%8t2w1~tQSY;VlwZT`G
z*fX$NGBDVJkqZNxHv@kZgJKSY<s=5b<qUBf8Laylm>d{DEA8b$t9O``KzF8t+G)}(
zdJ&qzwPIlt#KR}bMfAx>_A5kBQ%#tyoxDINWuZytB9qKTIvI0J3zwS2^f9Q0^B8BE
zL{HRCo|(J!cK-SY>C12UWOsu5Xzbt|0U2|Etgm1OPh)Y&Xt@Tp><ei-;Mug>w{5>)
z=b^x!qrp8#Lwb*e_ni#wIS#2`A}5?opLacO>bZ#i6OfWTu<LMO*WrN9Ll6|)a}=`A
z$gg!@K>LA!j(w0BmSAwf9^QM{zwe-L&wfZv6x@5*zjJ@&)RP%YZp6<x5jp8lD7dN$
z>)IXGwL7S5dqBrlpQeM}jR!oMcKEjM4es0<(RV0z!tv<7qY-^aAk|9Dq!UpSPr>^{
zkkb;P!N(?q_Z<%dxgWGk2)RCjY!|{L!+Q^h_d-YHK!b9yRSK}0XkS<-s9O}$z6V|n
zK@L?2Y=`!Yz{deWPgDqK2CbU&ZQcp0lE4SnA+MB!tXqKeh9IMJ5E8jQf(*)eRBwQf
z%fT6-3aDzWTh&?+1Kc@st$^-tb+1?psf-}?5oB}@J~Rgzor70Kj>Su`)<jN)OCShd
z6G0e6)<^K#3RiuEJg)?2BiBdBY)F*^*{PH`>rCRTGx4*|B+fY(JL^pBtTWNG&c@9J
zU800pA0^Gc5;fyO*pxFNy~n(o_c)cVGD)AK6WJx>S;c9Z$e<d?AYsSAsn5V5&%hu9
z9y)_u_{9yLU4;)IKx!XI83<uRRwzN(kkzwTYbA&vWWE@Dz9r~@eF+u`HEv}qDI-q}
z_cYs>YR`gc5%o)>JGW*`JCrr=eBPqVS&Oe`FS!OeG!OZ1rmUs+;3#AHos8vo(w5&z
zU3NQT*}aTq_aL<rqz;1AM_DVuw+Di2D@ea7YbCTkieLU*cE^9Si=g_5?<Qyfjpr6<
z)Q;!If39`^8FFthc-{tej;ybM>Lcs(3}&YoEWpTmH-pO&hUSlQ>p>@Yi=F##_2R$!
z)&G3wLG=;$RZ!K#1+J1fum0z}3aXFTu7W0~xUT#cxb$D>%72Cn{~0cV>LZ5IeW3es
z7;Kn1jhHwLL6s4cDyTAIR%2k&03lXY26lA@P8HA*7NA`okj674B8Vg*fe${YO(oMH
zX*rkMM+WmpoLZoj-ZIvp%7tI)KLgKa2G&;$obMR8-!brhVBq=6!2bh;c)l`ly<%W{
z#=!P~f&V49GJ1U^<NaU8`@gUc2r)+d7tj08I0?3_ZU(4l#Jmnv^)PJu&$tz|4uNU=
zf94(k8F&0=*#4h;=YP?||Lr#a7w@?n*mI$7=ilBl|1*}~nSSB_)N}t8!lp_GPVi|t
z9Xaua&#Eu6yZ;OB1Jy^&;42ZC5Bz67@ShWWd^+a=&<W~nhyOF}|8H{PzsQDPV&?e_
z3`T4WLad<16=Z1*7pOnZAkNII${=dZENRaq>d7q^#HW)fZ(AknKgBS4g;vdRk6BNB
zw*HSj_CN5@f1SA>3}R+6h=qVRG^v4B*RZLvOInz?rN(z|_AOs18#`SmXPHIO3agS;
zM!73Y3s#%wuQASEVN<xuu6T`6-XhD=m4=bs4Dx|ohUsd)&6<hR3s&6AU;iLt#z|S5
zC|KW(100c%Y6y~{m>C$jd8N%fs<uS;9*3MA7u0(!sP|YfxJnA`IUd{vTH74hanQeQ
zKV*4x5V+qI2;Nc@(0K?_D+P8P4(>S`*mc;ec@OA<*RI1+lTJiUIuYJ~G`Ra<NY{bz
z-opX?hkUyC`FHO1>(~>}1*&KQdk-egy_mK5a_qEY{_VTHo41E_><n(-;oG{&yLp3W
z{XWO?ZPul$ZOYa-SFUxfUhm(u6LdUj?_tOQUG&7`pn*Z~(LIq9PC{xT$N@f(8WWSm
ztcf5!Bgol4ka;4|(F)-DD7X`}jVPoYR1<}E><s}QItQtb(9TtWY#@SELjldZ{2F)o
zH|_Lp-0t1D9a0lPSIvQk<{-5XgoLczhSW`vc_K)M2wwZ3F`#u2xIS{NSPMs>&QT?(
zTLh_(TuN7=RYtC`<=jpsOC5?9I~Ff-C|V4ucO3E;AtSgTq#A;gMAk=;&Jpq+B}o4W
zGH?f(SAthV$PCCj2Q(61AH~c7RYnOjPsf9+q}W+!KwTv8)Dq+#rNr46K?@sZo==>9
zHgU?y=)S|gEn6JQm)qsc(v9xa^e&aMNfXlXW0J9B;8$f}l?JbogESx^>x&q`%`-^8
zKwAn65rmWQDhX2Qz)48A2+|>f9DT+E8ua6kWf0V7R<PsI3s-Q;)(on*NbGZ~SQ*l`
zJ*@v=!pzet3$DN_pqr5EYavxn=92qxl)d;t_TmSbV3fY}e)`h;sbB<QL&;^JZV~t@
z+dB{vdO16|Z<V+DdG4xb=F7jBFaN@D@W1%2|C0Cr3taopaRpQviCq5AbNWAX<A2cg
z;;wfY?5{IeT?V6b4Cbd9%ug{`oMy1x%AmK3q4mAu=KtKs|EoOsulC@-<YiEe!+rHX
zq&{N52JI8^!1_m=SN?Ne`OkIvKlhdYyjT8<-2U&j<u8L=I;e95x}eqwbk+oyAp^G&
z_&P!Z26lbWss_j`5~msis}i_{0AGIvF2jf-Amy+iX!_5-N1<pxL)do)$4Bhi{{>A!
z2dj$d{pXPQ%fR-Uf$=^A%OeJ!=M2K{SY`gP%KT%I`OPZ(hgJG3gV+ZKvDfT!pLsPw
zHy=UzM~WW*l{`U51Ic;+7Y8HGu>ayIp!Id26M_1{$NMY?FU$t7f&-m&vEx7UcF=+Z
zhFzd_aZG#ua~=7wci=zAtXB?Q7gN`Nn{fVr@uuf3yWh^g_CK=!j8s6cQ^nqhzRP}d
zo+hsUA-fH<rjc#mf6jydnGS;x$HD&`2miAl{LgXdKl>rjvFf19q)z|ms653WkO;0v
zG+>i5;C?!zC_A4LgMc-MtOtXBjcC?(hUUj&vwm@|{4cl>v{*uD-+!S?{}mqm=Q{tN
z>G*%K-TxUH?lQz|XRw~eVAH_sGr_WWdBLiCWm{i4HEvT(U94BM!K{3%amgn0vR!7S
zJ57qWo0aagtk_~xz1^mElSAV+%j|g!@<9x`NopaTmPKpR=HE(Lc0FX;ac&JaaQ_HW
zS2BV}6d_CdAR9}C8MqW2vgT(`Jq4+5K!?bIDx>3pT}Ok#2;4<F9N2L<sPjlr=Mn$5
zgF)?wLOPE?R?tDtor9C!ZTmf&c1KJ+m9^+b{PeToy`TjEVeNau+V^;O?DFZ{?bo);
zzkPR5(;ol&onftqW4ceK%()ad?MztbKL4ie-i-&GDz-ZnU$iecXOeQnD(jSM%~r3*
zUBR7uLqR8$9*zL7!Gp{(A=g2W&Jm<CitIZMImQQC`}7@&fFVdf2u?z)A#e{4bi7Z;
z!QhUA!Qk`df;;zuPgmFj**X;5wi{j_fexJmpRo|svM;D*A97{n4?cg}r)h^z(+=;(
ztzHdVAaxDoCL>4>2vQS4*pNC1LPGjPnDr5~4gz<DpzNx(po`8?>m$&*h4M8nrK=$8
zy5Zd-cufQug>x=i3_;-IIG|l2<od`Vf02FOLWlfCk^P5}5xioGm~bR=;!y|+hwd5%
z?Hz{KL=XmA{|H_m#UgD`LavXHH!i`&;eDl;8E4{WoQj`uDt5-HcyQ+kQXfG^^T6kX
zgAUe+ns6+*bH8`pX1l^=x+zm+Lt4aKidc*i7!<u3#2grS^ca{`!2{R4;EVvNi69fH
zkopL%CV~`v@R|t1fY(P5F4p>p9W-LkA;m1NBcfp|Z4qYRn{5-<?wda&sCq?w$JT^N
zhqLCKO`Cr)d&xD(DS5E7#KE@&F1eez<Zi~|dqmbp8Ot7k5V+q2+OoIecFyX1AhhBh
zs7hLTFMHYj%w>-f=HAqv{laF=Po|^)#cuu=zx$u>3TXa_=f;2GOW?+Q+9w9@>kQ7f
z8SJisDkIBFpvnj|WOtgu^aQ9fa^BA}?Z48N|KgYbt33i$Mp9Qm-6BYR#0~2c@qmZv
zAmeqQ%INBU{u`hwiT~Pv;amS1(l;=01hNacG4VPwNZ2z-Ixt8%GRe9y$-02pLRJiX
zW}uV$Kv%SDFtDqDDij9t>LYM_fPsg>Z?;L>WzNL^46e`M^^u_Fe<q>tpq>%$YZlSZ
zd`f@Cwf{>RfRKa%=&n66o&Um`pd}4ry8i`rQR*WVZ$y10>HD8E?7u+le}>}U47K1x
z80LWbN1&0q-Tzq*{O33fsxr9tf$Ag3Y6P|epnAmO)PI?E|D1cTgwB7~dE#&5fv@H3
z9&{c2oV)meLE2)yl$B8vu6j(nAF}AR<fi|MyZ&<>`p*rnjF=9CDkD&pbO>Y)JGe^X
zIPzcg;(zxY{~1D-GBD{gFsQIFa4~}por7#T5CWY;#h}f=YoS_oJbcYhi8+6im;Yy3
z@}C{D++q#rfI*Qfpsmmh2mY&``Y(Opf7qV?(v5c%6P7qN9!*<trC`;);!V$;o3`r|
ztkW;vXi>S{qH?=c<z9>O-By)*&C7P#*X^>c+2Pi{$FXU<U&Cg0%N!P?^q_{VzHJAS
zW?ze)e<6DAIaQ|w2GDt=yx?9Nqz{W+AAwd}NH~=z_Z|xG0}su09`Xl`!hu#SAlFC!
zZ3hF|4+VA{_G#W1+I2L%=QwEjTtDd0Ie2B{-*wo#ZNEq3u8`hi84IqZ%()cadpM|L
zUr6h&pynO!E!#X=w|h2j^=sSZRkg*ve4|_GW~ajS-pxA`W}M5IeIc?3G(cyZxk4p$
zv#j45)zGbug%=|yoQ#=#CVuM4=!r*>_X<Vyg9hv1eIiIr1R>#-5p;b*FSr&1kH|r4
zB1km^+B5{Ni9r1z@L6-9%BbTYqz4q(zB{M`Q5nH^5<z=L;4>Bin)ZMY=wb-aNF97L
z5xhPEt>s4DLj<XYAS9$3@~qhisfi#YWNrxFErM1+;K~Tz8v<2Em1`hem-00(<)9r#
zZWXIRl@Yi;0$nu=K86ln86geLf$Af0WwZ#=6>=(Ef>|Hg=Pg98j3WCFqt!>qH4%gj
zsi7d15nBHUt&4<QH$m8tc_j!LgLL9h)U;Dk(@sHZCCEW}kR=e15j;p`6f@&YBq(_t
z5ANLWQ@zG6cb;i{r;1;NyiGE{PB52(1L&p(HaT#s46<DaQjb9D8X`%^<Pp5i!sJ5C
zgsfx`0G&i9q$8+eCu$a<?vbSz(d3vt)w6tEc<Y|X38xdLUrLyLC2ip?<O5yPmfTBQ
zaxZnsO$bU`dMkbD-Snk*K?fFr2jw6`a}dGwrJxE6t<K6^dOLgh-Q4B3;V5U>t&Bz2
zQm0>x={sRobJ)E02*d23Vw?Xn9RJUK>%ZhJ(9{pxCD7!N?CJju)BiC9zh`iN%;0#1
z!R9>ZyaMyH4CW^otWGmn9b(Ym&S1Ght>U@h+W%^YKx^t0?|_Ejgsy;Qi#TEZBgl9i
z?^V!3JD#imd9H$X$Z}o%FL3og&&B`xH~%v%d&i(#${?G{X$V5RCdq;p>B1JN{3eM!
zx>3w3flM-Ppo^RYO&E9$82GdpxYa>5JE&ZR%raoc7o>TBDGu48EDqA1xY2mZ6W;v)
z3|`MUwf_s5fG$E3()!OR{+~hgFSp`<ab3^?1{t&e5~lyfjQ<N8g7(kyYyRic_%EOX
zs*HGaL0f3WEI?Q9DSG@@@&2#q2Rhta#`nLh-+#Wa|3Xp!8FK$JRD#cBm<?LJz_A6i
z5RU!ef05Jw6|VnRx$<B1=zq>5AOt!A<LG~dQ~!nb{I{I>)O6~@+|3`W5B{&-{IX=t
zqx5CBy*l^lq%8DlIT$kgUiiF+Ml1g~Z2vEI3}gb!A<!`z9N?OW^B}0l#C8x|-yQtV
zbNIj5d63l%?lTzJBN!MhKsU59sDb8y7_`9K7WBCS`Yf;f*FEu{q4zVxlwZ(IyWjyv
zu0x=$XP`T(cKsJR^<Ut`f7MI>8D@UsPv4`{ax-?t(}dM8($+n;Z{K57xYoLAn?=n5
zi<$$*l?N>9j@q`IG_N~s-MHVe{itK>LD$Zs!Ci+X13DQrQvI6td$phT@41k&_*(qD
zi;hVx46N#)o6wm=VAETW8Cyu*1l|oH>h6^_Cu+j6ppJdM9ft#ZjzgAngStpPppH^N
z=Mmr5Lq07BLwk-#O*|7h;dCfyWgY0mxuD)-kfTujyAJzw9rA47?^?IrwQhU#l=B%2
z@5D?#=iheFyJ@dS{jR{eUG60tw6oXf<g69UTrZNdNvz<gV)X^Xv|suuKOD1vTBm;2
z4*4zZ@|sukKfCgOwXiELC7&WDo(bs%ovIC4ybU>CJ7U5y$oUDNL*&3mn}P=JP`gFY
z%BTml%n!L5f>cJK;88?Sp9tI=g498fDWQ<I1HrBPA$y463l$(-@G9;-piWTZZhtre
z_lf)(LEBn=8@Bs4fQIS7gLNqVBS;+tx%d@Ag6bpio>q^V^&U0r-K*ERSFeNYY;}Y5
zfGXFxR<3cWSPeqpdIwSmK?T8gpu2!cXVAjA6)vT#oJ&?Zl`MBgTF4EliCl`8!TU$B
z$_QE?VXchp^A<+*9g65Xgj_2@s-%bsM<OO1fz(0BBwBrhRxu&hP>?zZnS|6t5E5&B
z6gBM>q&|YL43C|8HhTK$$f+lT`wx4x>~N{tV3xB;Gon+{qmtJ=kwHC>LCh931Oz&P
zQx-IC2HNriS*3tkb`dFpSq(u%Ae9jlxX;DTz#zgdpdzSZr)C+f?UQF6Q|FX3&8KWZ
zP{a1<&I6GXPRCEX7(3%)(%fsw^RGe<ECQVlvE*Lr;=8F!Zl)~0k-qG9`m)<;OYWpC
zxs$TwP8zH}N?&>xGFz0f4AkYxUUolg=>r5>b|+)Wt+WMKlIC29oqjrE(lNh|J<hco
z%`+D(hxRkLbeq*27Fz#be9M1^WB++>|ChQAt&g~`{}<f<pP}^)v_5jU46To>E<o!e
zivtX*YlTuS`^@_<x%I#0)&J^`{>xm4)<=-pB1oSIvY3JI8lpbpz6Khv<2(o2d8Bgg
zKSTCr2D3Ur@5$=^%?1%YI>AlaL5-Sz)v`|cBBn{)I+36a$YKtlYX|v_7=-i~1R<w1
zaDqpl*uZ0|$dSbeE7LGLwvcoCMHm<a7?U<x&3-Lh`k%q)C8sv%Bsv+B|DwA8`BgzZ
zAbHdO%9fxtb+YFFh4laP>-^`^_|KvCpIh}mr}BSJ)&D#i|2eh(3mE<vg&d~p@n6*k
zbi0I-|9?4uXniCQ@t+~{C+O0sKG5bP#<l-BxBh24@SpSWf6+7lwIBUAd-h-P;(z{A
z|9MaRXFdTM6O%vrU+Tzz*QFngC*O}-@+fc1r`oNrOV>WmTl*+s{uS5SUFP{~e5YKG
zT<}<T$#<L0p!!Ji<bSp!Sn4CTgP@5dj{X0+kNg+E@IPwxAED$eyh@P_ye^E4h71gP
z3>-E>ilI^o^D6fJaewsR=K6n7WmNl^p#yxj-W1SkK(0goB`<=`cxTuJs*HG#{#U;6
zpJCQ#;p_u?ZMVZ0-H%`OJYmIM&xyxus<%5g?Xzz_ZdG^4y!MEF>lsjG)O6gb<Cxck
za{*H>gv_{-z38?_{SF4zB+J}Yfqj>}+E3@LeUP#IR&vWSMkN=}HRznu;4TuR`^E)}
zM;`FrC~bB3N=SX=kF=0G1iX$e7*Zv59QAEI6gBZo?vne-v#y5qp78HF9Nc$2sP~wE
z*J0m|gFfvCJlprXH0`pl*l1m{)}wJx_=M9@6VC*99rdW+<y5)FK4*nP?n;Y_osNx%
ztS8^}UHaO8<v+ItU(K@r^EqFaa=9eobV0!K0iXFjJ}pq2L@(}+XXW>ZiD$yWJBdKI
zJb>?dfb3lj?K=)RIU%C|1Z2fRMDH>9NFAaw0@p|IItapm>>+|zM!~S#A0YJ+s4_yW
zk08|$goI2IAu6NB-LRV=KzHO}t&gzQM9>}(xDIlwS_`R+++jDgLVG~q`Uqb8K)OPp
z3aAWpy{uE|Dma4FN6sZHA@z|HY_<raGJ>9(>r}YNA%6j`n#ewHp?%)MsNO@!D7x=3
z9L4m5mV!g7CpZbIry#WwG6|`eaIJHI)Jl+=2ts15kD_6##1m$mNuGTkbiCoTQxOx6
z1a|KAtl0=UYcYO`l3%lgO&W(z7?Z3G1HT^l2rqtcIY!28IC7zf%tot#Am@O=>m$e%
zk|>*y2EUq<l4YntK&gFvi(Bq2|FXpa^_xOlwuf{bi0lU~n2(!wDt6}iggKW|=HE(R
z1geR^H!j`GSOOZdgWO#U=|Dk5GMC=YTyi&S>Al=#_j8xs&j+L2CHJ!y-btT#GiLI+
z;I6~|ZTr1zx7!u0vQD0^8``PpRViYdBdi<FAnwTEIniYKd(b&E8~?NI|1Wa(zxeh4
z{OA8Off2{*{|qH>Ku4syKVh)H%wT;UG+_jqGdj*-bBaN4HG}yU*`5~`%l|8#`fvaI
zzv4a69wJWg+z_Nbg7%EStL*r$g3bjJ0C$UcuYpGDxG(${zw_UA^%sVu)kc}CtqM2W
zm#%gwTVtEQ$Rus1PEd=KLy@3aDu-?iyRt8XgcAe55eNzCfR5Z`5d#-o%&@h=Ot9s_
zkYI=G`GVI+knR!_c$5!vv>-o2(rUY<?>IsA(F-nZP)|qN@V~SnXor!q)qi!{|Ekvi
z6-@uj8UL3u_%EUNUsMM)Ovk7BpHt;8r}9rOm0x`7pq)yP`bft8zmhNLG;g_p|5E<{
zr33!+hk_78&OfF~Q2&T!4(OCQmaU+L2kb}w3!ejBQ)uw`zs7A)|3&!Bf3DO2*-wB-
zzVrVL_x~4P`onwftEe?UYqor@*zi7k?Y;Dsw_>JWa%<dU-+nxJ_HEa>kNuXt(cbaj
z<REx}?%;pkegFCPgO)Kc?FZF*kclMFT+-qH0%!iKUIwj~V3_uXq4GRK{sD&OD?AHd
z>m2;Ad=J!$5WVqV^u~YTrT-Z+E-@rOVaR>QQ2U8t;(y+Q|0U0Yjv->$_Mi3Sf9B)=
z)vo?$+xnle@Pb&|)5yhllGi@V-}E$O`YD^nLoS`C>^lxSbRKc+J?lF0V%+?@2@CHR
zEPs%*_)g}`t8sm&;_5b7M07C-xEX|YM@_r#)pk60?bE7VpAskT=e5m-?s^6_5h3R@
zaDW#%Ko;HcF)+yTC^$s)9*gQf;oW=Or|*P+|4F~T6Jfm|6w-Sl7`(6nGINwQ|5nc8
zdyx}P`*$4lY261ozs?K1SqU;u70|NPr(vUU@fMSkt(py|^;$0|*B=rq+REH|UuD^U
z=_CK84*!=q_Fw$qf9<~i3`zg_EdDcS{pSTE7R~=Gn*Z5#{|lJ?w=cOG)czuT!pU&h
zS_Q~%Bgo=9(6Vsw5krvb3ABV8wxABQvJQTn0{DD6cy9=@UJf!t1X`+%+Bt&uhFbT7
zCWgTG;=$H$gU_7<kIR8NN8k<-G6OzRhrD*qvwpK@J#-;As4E0J>IY@j9BO^!R<#y#
z;v8gs0)zy0jw)6oDx=a>PGu`W2wWdQ`b5y#qLP)a#n3ITNF5^Rt|G|joJ-*%Cs0ka
z08$@8svbCLm%G3&cY$rreA}G)$dyr4?;&_)6y0~2lKKczJt5C7#h~u1h15zAGIHuk
z$dFysv{R5>O9?a1#7{pRJN0Db#G?Tndt7QZSY$8Iik+n7*DPdL%&8mBAnysfv5rF>
zbnXEt!9z9*A=fFCu(8%h5JAWZV631mu3QRClBPnMt}-@>nx1)v5v?}K6Kr$lyOu0>
ztJ&n+ywksZcX0Q9&^GN!C!(iaOq_E)W$_JA&uB5IK7v$0kSYmO4K2M5VP`D9lfLM7
z=AzrF^R6e)x*RhZ)Y$WG+Ur=c*(h^~dfarm&^|GbCT`nuE~7LCwI~KDPX=Kp1{OUA
zDHrL!Ywo+iOOChvXWIQ=;3Q~65dV450&Ip8{}~qkXDE8b5crP4<spN^6$YCNpfwD}
z#~3V*F<75u&|bmsc}#2OSBLfgtsnoldh}oV#(!~0Wpo9!l#cT%Xw;4iJZ;2t4OAKN
zfh!}yYv3Wf%b?B?)A9e(otK0fP6l?I3hX%L+q~1QW|L#_GQ;F)>b~`2HhH{8iA?HY
zjI!<wg4PV&dJF=_OrrV>{F<P#9R@CN&j`{>f`l`?0)i|cf$Tem6l;+EEZpEKi5IjS
zB6Ob1>MsH{{~7#VaqIjSGX5`T@?Xi~zq%a=sak{jM+&B(DoNfHBqe3|Ur6&mx7vRu
z*>4QuZy7{ib1427(gt0AByIa&$`w%`Ne6)HBfb!n`iONdXrB@5Hc)-Re&oN<S<rrA
z^;@9&Ncsw>CBStWR2lJ{1@%vDPW{)~@jqbE`{1RYsyBbC+VnAh<D;B)prtI)(=Ix8
zp7fY-!DY@PuSG8;m;F`Q2-<wae(*p40Z@GeS!>630JN|HS|uF;UC1kS;=l66|0ZYu
zYaaS<bLhYK#s8l7{=3}#FLm|5=mpSyBEr}HYw!KfQ2ms_>oh~^Glr`740HZVocS+!
z6ufD7+kduGp!!JZ;(w9D|Jlp0Np`*nnRh*H!^^C-4-yt%bLl+g)qf#+$^G~hj}z9s
ziCFR^X!1p?>Ydt2v!#Q&8FVul#DW-D%orH785ne=>`P;3-tz4{mA33&#f}dN6ZTnW
z&jhX6V~_?f`4$9|T;O&iJGjZg1zP0KrDa{PGJVd~@R^srdyo6|o$%{B0a;%c(0K&3
z0k-3?SK}V{n(Yz2C(`ELNS<}oul;~q{SL>Ptq#>&e46*T)@*gESZ80l+9YG9VrUz$
zZ=X-k*?{GrJQuvSo_@=5?gP66|84jGmpBMQg1f<4AmKlQ^J@l;{|uV{dB6x#A93h`
z?)>$ry%*B?3Ua7vWZ&_q{u7WTUdZ(k{FEU`eFRxh2im>_*+T>-;k_Zygc0~sJmd}$
zWTXx<D+K8QfhwbxeV~K5!4(j=Lj*Z)4th~Ir1n83A>AS{3EfWws(`@t5r_e<i69Kf
zBoU%Ys$K_LxeZ<~2Vp=CodYdSs8|hInc#%f8*(gJ=~TMPv2=xF=?V}7(jfxZL7=Ws
z(Q=m}#F;;k0Xq2f5QO2BzW`nz!K)z%15z2m>m$&Hot}em61jSc?mLV;jt5~w`bWsC
z9U#3VNCgx=@hF}>O33Ftz)u{Co_aD8hQRgF+4$)o6gw4k@<wp?LGPw5wk6B-6Q`(#
zcgcBH3EJfH8ALKDc{4~@FbEhhaL9p<%Lbhx%mLoJ3Ms+hbq6K260%;nfe`bVK#h4O
zeg<xNE^%!hWm^e-Per>}U9U`o&?cM2K8Nf%o+V4YtJVfKYz}DK6WqB!9CYE=DbNDK
zxffIBUrbwgIb-qF%q7>d7TwQWcrRno{ftHT)8^kwntd&C+Idhe=r|NqztgjLg>CvA
zi<myWpcXZcB2n{1KD}TT6;IIl5<-TcVw_!(fnAA#S&o50S2m<qXb)&~i0>e%>CUk7
zKiB^M!WaMZf*b0fDTCSn8HztJgnwpmdBEU&ox%1ZgV}KqvN*(Gd6+?G5nsYJ@iqU=
zPXD)k`(OP!=s;BdE1><YY*#=l>mZBkAVYRM*Z%Wf1tG!9paDI$EB`sK|2MhvUtq&e
z@6O9nGakiEyd2hj*1Ku9b<t|`)LCjFor+F{f<}q#iarby&I|%(4D7lLY^tCe+r@Ml
z_|+KLMZg20jNm3TWEP17JgURXzzEq^2`SScYxN-~<S@YcP~a-eu{Cn-N1n$248d<X
z4F2<g=a1Cw|7$t^SF`;uXZBy#^uN6Ie^tl-O7{Py%>N5(|7Vo^#vpi{fn^s1-!caN
z<qT5y__f{%8-wohlXCg5;03CNqyzp-27-`4(0_rT{|s6Gnae>-zS-x2&aYtI4y}*)
z&Vj0ah1>rXZvU6O2CB#TF8t>?_n+?qXc@fnrT@~W{u^!jufF_C^qMaT8-HeRdX>87
zNy5_m;d8EePdx88<x=GQyYWjOdo6eowBm!ruKx;${<9wX&vNiT&%yuP2SB$RG9UcU
zcnE}84uJ;r_)q*7I{jbr+<(zipeW`&4O%tFbMe2(mH)z*|Fd2E&wuN`%7y=oYyUH(
z-e5?%&d~N-YTbXW)Bkzb{b!i{pJm&B)-(SZPk_|(odYej<LQ4BH1}4@`Zu|29_Fum
z>_6p<&%~1neTRLkH|Pa7v1vzv4*CXNc%cHCt71_G@1c+YRVxglT=L!t(=NwNxfnR{
zbjp%@F*DBicJAf3&1BFBVUx3CV320zRR*0Q44NW@ba;6|Q+^DhBHEF0rHeDBo{#GW
z9R%dle#o<9uV=?zpO(X(jR!pI_qf;Y@~qqLRlg&7($V5YSF`7yi|E=NP`Apva;bG(
zx2i{}kYO~ZMi8&23xlv8gHmSb!WRm=|BGz|U5_Gr@IT`&(2XiwJ3)Owi3$H1QvS1;
z|7S4#&!zpJ16&#LX#Qta2O&X+hgy-p0^81qbzgzhK9KqJ$O(rcCmf3EJrdOmKH9YR
zaB%lQ$id*ClTN!1Ll$g<Nbo`hNKFLk6G2YihL9oP+wmYZ5oC-muzhbp8}wWSNPUF5
zY7V(R0_|q?so#c-yz935)Nk``*zQxm1+;`4yr&hiPys^1D<DXH1RAV^^^73Z5Ts`W
zKYtsvI01b8sY}^PNbTcTvJ!#7^^rsI3WwqqPGAICw*ab)VD%ANWdt9ZbIM=j1X|#|
zz&>w2q#A;dcHo{7yfU)QnGdgtAPj=_QF#C1@czT_S_#g8)JkFCDJS@-9i)<i_mT+K
zN0C!c#!Wi|L9tU$M@%>t(6P_6VUtbqQlpgV8d1F}{tc2YrP8)(0!GmS8XoNO_UvLh
zprzeh;B!tGAvcsk5<D$>Opsa$QctmhOHM)130>T(45IqX3iblJK4R7xO70~Z!7avd
z{pM*i9r71BRjhNb-R#q{3v_@<|KW%!Ct_!uiUY4{Pn>fue)^S|DHp>goDJ$Z>C<|^
zv1+SL;VPr_1$xm_mHay;oXUC4b2xP37*xVQXO9b5F|cVfFsU<u_AN>=Fw24v=tc}4
z20^dXIag(l{nxqnU-km{0D%dh)yN#j{&O4y4IDA-`_DN4KSR}bhL|4=J}($tZ-Tl=
zX2%&UPBPdWW6<5sV7}I<<(KZE{~mY$8$bRJng_Y^A5tH&gSQ>QYa$2(GJgcGk9cqY
z*E#c_Y0>NGnU6y!-wp0K<x;uZG-s)L+;pXYRw0K{KK(cbc|Qh08wM^@23Bp*epn_&
z22KqIAx#EuC2)NNX;DBn#xjCxd{9jUnQMTMkh2IN@e7FsMg|6O+kk<CL8l;i`D@{h
z{|uq;ISu~{TK-qJ|F7fnU)u>(A1PV=SF!o8X!~Em=D&jNe_@0F%+fy?gg-FIUY4}_
z;F|VN+vf?3{9|^dXM%?R<?Q}L>LV$C&|sYexJnWX2G>WK|1j$#)<ggK&VkNKg49Q{
zw?I{r;AK!{#CHKiDqs4qdgZ_S@&6`U{)aAm7rE+F-qyFdTi&Ftej2~zUgGknsjFXF
z*6)>wn<JUOU8U*}!`$z32mZ4i2A!nAdk9n|u^a+bNel=7gU~+E>=N%WP<<qI9#kK(
z9|ei9p8+)?M6Q5b!f_dNEr`m6|7w^1^UnOwQ1p~@?thIv|Jj!R2i=gk_&@jF|J;}U
zvz-1faN)o7?f+c8ulOdui(d9PWAlf+wU2VwJWOBnL?m(|1GfYChIcIn26fQ6PYfEM
z4Sg(H44`9(#K4_XQ3fVGew6^n@-2zeuYd;Jrd>#1d>z#E>fasHwJj*Ci-B7ktezLt
zz~GQ#V3!6lK*JjB3=CSVGG6K-eUWVk0(*{ow;l9o-{aQ03sf039`J75=UKnUyJ4qu
z#YU&XC7u<lqkH$|%sZdZx634{k5MI%LDma2@XKk;z@yF}q{|?i;?#GQcg25>#s4|i
z{^wo~x~+p@>wo4={~2cgWr+XJ;PQk)3p7&4rv9Hz{Xdube@-<}WyGrfU(WZPecu0&
z&PySkm*DkLWdGra{)6Ft2O@h8NA`efBFLepp#8SsiV3oB2(p_MvSbc2Lj<XV;3T9n
z3TX$OAOTvr-441j9Ibx@Uo{6ClmpdAunrOE;8Wzv2*QR`NsxsKkVAeTRS%p5)j_aB
zP~EH6!>b|q(gn!q9OQ%_NY4l|D@0^{1n(cgYa-{OrI0=myfOlnf%%IZLDNI?A@vcY
z8nVrq52=Z4vgg53c-Q{$uKke82u{LlCCt?hVSR_f`VK*ArEt`WDH66U98x!-)k>hc
z33bR0YxNX6^)v*<O*<1g@pxe8e!s?Tt`%#oi<TH>%+inRR|{=Z^{bY4DU`BJ7Bq?C
z)AnLja%7b;V-VG6;8A2?mjSQz<_DE#pl$n*vI{Z+1S!GDCLxs(oP?MInM>kkU>0ED
zkYW~4VwbTH)^L$B2~)94(DW`b4R0_{=yl4T;a<GTzj~8j<Bs6Ay<zQpLfdwSHtz^(
z*c#NZ-MeywOa3CO)EOp`9a{c%N{%^VrU^pY0qhE{pni~`0RxvR0}J?QO3=N2e6XPp
z7SJFS=#mS0(5kA`?N0mu^PTxGcLy}-1G;5z(jSI7puL%(ZI%1}bL{)iz4AXp+kb|X
zFAR|{89Z+?I9_BhKgwXRk3nYxgX2+#h8IE`|LfiQZ}9|lZJ+ox&}ns${X~4IBXyAa
zi2Dj?w-Fn-10{L|blIQBsxP|zH`9AB2UYBIEna1oGFv0GP28mvbPBX;5QCH>=&&Xp
zJq9ivP|t{2iGf`ibW1z03InGC1DhzgaD$w`1epqi)cBBdK_O?PbA#Ie@TL2ZC}04O
z0<kg(`6kY}t~%{ML-<<`qyM7z|MlGe>wEmyaR!aosk{F-4E(Pi_+Q%fKbO&e2E{K7
zN{7{gzl8TcOqlVidehtZX%}T&&$8-m6Eyg*VE<pn^}mAWe_4Mhk`Dw`Mu`4V-+y+b
z@jBK+|9Q{-7rh2r%_w;bbbqec4bV{)d>8(UT>LL|{=dwn{|eXsD_!|7aq7SB=Km_I
zenxKiox10L)1hCD2Y<G1eHhfVj$1DXw55pOjzPqk!K&43)>DNO|3&xyXFc+t1-t^B
z5mF`X`_Hrww8EU{C}=!Z;tXg$i0|}&&SM~D{OA75U->V60kjK@_X=nPRqQrM0mIt=
z43mE{bi89|c+D{J5BJXh(kDS~7P$Cd=-PjUC;x?4{AK8U<T3wM+}8gko8Dxtc$Bg3
zp?K^B(D_14x(uwEAO`5NRPgEapiwwA24Oo6(=^SncGrfT{#}PvA{Y5Jp9$$Z8#dv5
z!n|v7bFM_qIvqa!giF;T24!ys200<aXse9w_}&e{C0*c+f$->KVBpp=@<<P^+7Q*a
zJ+5_cRO8P0=B>fyD}A$PTSauJJLicT$8jmRamcv{X?e<;hDhpput*v+@N06(nv1H~
zun222^2o6YC^1N-Sv8+$m=EdzGA;hkvGTvz;{SZJ{|goWX9)YxqW_;k<G+x~e?eu?
zA{GuMP<_Oq`kzbxlce2$i-OaBZ4V=QPek;dfQ$t}Y9&a06y0+;vg;sxN(j^m>O2Ih
zhQKGCLh2yo86wE-c#u=(AS8622z*;Bgbf*~0}svZgpiQh2ewuLR0n~sXNBF42dRVL
zq<7s`2!agCK~DRD*F^Bn5wuD|8l!`(TY%I=kb}6P^-<|6$ixt2iGnjy=Lpg*f{^w_
z%N+`rITS8)1|yfE<sb(57&_#d2+{+BkPf*E9P<`B<Su|C`<(d@WS2b;8A0kJI0>(k
zB6|)-^@5h(L8>G;iKjk_m~bQvj3D(9@|Yf|GJ>6k7Xf3S)klyKJV<2(A!DYT0^L;A
zvDd$8yJy2Dhw_yU`3o%4XPCwJ=!P}x29&9J=Bqj-N?U|W8F~w8I&;YxGm2_4@F_5G
zNHH)$&IRIt4e4MOVo2G61SVv>4z2-mdIKA{Y5)%_u_}TVFpFEV$~&@a1__zON;wy(
z2h<wGOtws&W1h9pEOUWb`W%zgSw;y{wIX`eLwn?XTf`hoxXm)zb)p#*13<%BpiYE7
z_!=h2$r9q=rGt?3#vvs(<Thk>a63~}R?SO&$v2Oq{}t~2m%I)t041)12J4_Fd~O5H
zbTBOW&oBeLaVGaKL)<$Czk3XJ=NL>6GguyH2)bt2_Fru4f7K)ZO&|U@d-PxY+JDI#
zpjjav@S+7i@KFq$XuYJXp!$gQGU)IJ(JTL%5B=Bfy)IpQ+`DkQb>echkZuXLS{}<R
z2JILIMLz}^Pf&d%Vb36L$slCHAfOK#spHoM)kmPpNETcl@quR+Acvzef+G%6C2@nt
zb|7sGHgN5O77GlZg<AsNF7>;#=lo}g`oL-YU()%%p%<u1(slW-=lWmY|G#bQf9Kr)
zZUz5kUH>yEd{*>(A5i<hZrlHoHUElMKFOGW%P#%Bko941Eku1J<M&_24^$<|1^nj^
z1R;h@@OWL{f3`XQ8CL&i*$Nu3gVaYNSE2Qh_|5;~H$lUg0+&Gbk;ujW(icHhlKjR0
zGUxs~9{;bu^}j*;C6VlH8W{_elV=M#<uM4^flj3Z-H;^#K3vX;!F>|<X3)YC)+3-Q
ziTMa<q>lOEf2IS_`iT3;f8JyNMNa$|J^7#S^ndOXAZ0wK|I1wbFMR=YB_e!wPvOdc
zu~Yx~HbO6E;#v1!<PfMnf}FVlsgDeg{b%U7>$%`{(8@0r+diZ(y`Q}DPT=%Y;)XGx
z)((RVGrzI8qLYSEuxmhNLiUvO=5;kQ4wo;vTDs_R(%fqav#*$BZc>g|=-+uFa`L6{
zsb@2n--pykaZ~oVR4ww(no>XSM9r$R)ho}Ww9W=M7vY(R0TjQiDvTO&N}lx^{_SG6
zrJykZ2`>h2Yw-M(DyVnHqyg$ygT@Jkz}Zcdfm@YPL{D1FiA&Oukw=b0Sd~FG)2#k9
z!*uY)h*Q8fB$j<;$av1+{(-^z34=PQJ`z+0)khpk|JfD)Gb{XORRY~>ZyfR8tLd&^
z+r#jl<B<9YT;hTEw1WFbkopKx?|>?!t^<&s5tI$tNrbdy4$>il)H@IoUi-iqkkL6v
zZwOKoK}g8WM3CLAkn^`8B%~&S)Io64vt~1-KJo&eeF~|Mpmk8ydbi4Tpl(qmV!{Yg
z8M(lY;C3ln3BMJ>xnw0;ePmy>92waaF0(IMZU?T6;PsJn;W8)Cz4Iu$Tp`sEgtSAd
zh8%JiK&l}KiCh!eWzVzCnj6*$IwdI_+)IL!@LDOn_h5MML2%SS>n2Ej6o%AAf-Jp*
z^q3;RtHg2DN03?xPQu6WAPiW2bP{>xL&T(GkrR)G^&Rr>23=p`(Xhp_a;<&w67%di
zMoAO2qB>MVYGu4jq@6NEtP=$cgW1$QS>$aQB+M9vwLqgltm5GA5@gZ=vLg?(C_~N{
z1lgF{;UbVdv+SUDK8qLww=9EzDw~u(pO&k*v7d@#nwEQxs!N`VbFQLehKx<Jgjs~3
zexQK17l*PFi=-)N*)z8i1BX0l#USXSb8c{+g{)nNj7mY=2B`tz`#5+Q7)=a|mI@yI
zFMIL7>C69`cR?$xw4VNVc>r3r&Cv6ochP@=^`L{P8QOm{^#2E~{%!cjQ1po*>lH)6
zD~49kBro44&^g{3H~t$w{jYo-bfkm8HRy^3-YXymq&EZ^ql2un<GuzO5#qT5I!V?2
z?0?bOj~z1CSS2qr2=7<&Y!tW5=GKj8QVU>E@#9hp<k1M`(+cI$2xe0AXHf8Dkac2^
zc3_hQFZpIy1eL&`jVeM6Yyu1{eBe$H_`m_s4q3>`I>;IZ$QUd<z##zw=^rsNh`Tu?
zPconVpCR@=k2z>Tf{x374Tt|a4*!kZ{+k5-w+#PplKI~v_rFH|fBCfUHf;~Qr@qhM
z{3w6(qnwquGnQVD?0u#hxrtNjzk=g`Iaknms<NK{6@327`-6~h;D6!3{|ssWS&BjH
z>p158XIS;0c?+mMVmthw=Pc+vCE=U@g>U{BxdB2#-~*Hd&w;8W$xHtwFM$>|aGnAU
zgbE${&*nRcfx!_v?h5w@hyn4qC<C(&L)ymB!=PKM7!LgxJp7*@Jdwn;A9TPS%ijO2
z2mZ4i{LgaqKkKpo0;fQT`_zBV6aP8R{pY##pX<VZ-piniuJ|v5j&m2e`d|9`f3Zvd
z#V-68Irm=}(s8>4n)4F9_Fv}ie~X9z8Kys0p7hju)!(Z9KXW#|PFwvnaoGcxwgdWw
zt8FT`dbJ-;oOdU8)r*q#pYvC}3!nciWX|K**>?i_FL<;c_3J(D*l}93V6#j8VgIgk
z&TYs2CtVJiawc}}RmfCn^7K<N6AlNpZ%&zcRM$5R+}Hs57QBK5vUY_5lyMlv7#Ji#
z)e~r@36jXcZ4yR7US0)ZVKpv(c}6xd@DKqv1CKNVuZ*CQA&aOAw}>3Cm;!@nxn=rE
zmc;)Ik^h;3|1-G%XE6BBp!1(m<v)Yce-4HJ+{*uXl>c)p{byGE&!q65L;JCq<wx`E
z^F9qvLVHgIcb|Zlc;UUE-cV@wLC_I%-O!`9QR*Xb&j{Wp0`0&B&k#XsBIG_1qyh?R
z-2<tBAYCDNH3aJofmUuq&iR3_m-B4^%@Dz>A>X>K-Zh)y$g6sjSM?_En$3_p2+|?)
zgk1*V0p86Ds*k{T<e{yggD+ihFJ0wcy2`C&B^-ek%N4JHkd8&m;Rw<vf{>sN5x8>%
zt&Cvx5oC-GQVl^!c!vnWfK)^FIrEWgBHOIFa1;ivk02zxK7ueJdq9;DB!b|z5^_xh
zse>T1evq07jf7N85Hbq&AXW$$QWHT)to2d!)RU04;t^9$giJUR+`ZqweYaoJX3yGn
zZe>gC^XFP-Of-n^FpQ|v@Gn(%Nt3pTlQaq9*YXflw&Id8<`7k91a+50LE~wR+~65=
z$Xq!jFHo0+4CR5d4`^&$0Mz1Tm15x2WDqlCl6PTIbYqb708Il)IfAxL2^xcrlHk#1
zU{zrN4S-04*6cBWPF)7~dRW1E7E%*IOoq4?BEkap3ooc&pc#|1_JQEh|Ef3t%U}M_
zbKpPsG0<8sgERjb=l*BNe8SN5mwVNJ{&nEnODFti=mGVTMEm|rO#Cl0=fBFD|H9k<
z^KS-i!?b(%U+>X>rEC8M!BrAuMK}KXi2pk18a~bS-x(5CXar7?bMF$jE91~lXH}16
zQVC<x2;$a{l(9%wb<9z7%2l?{lCVkTH;&=g3FOiA6HxbJlC)s})kk8WiwjxA*!U&c
z1*8~2F$G!g4XKPEo`v*`Akl|bC4tBHgq`G_3bp(Hvu6J0wfHaV@n75dzpCwjZTtWF
z&i@Vk{%d>w=W+Zm?D5~Z>wnV9|85grc}#kfy7ET;=0}C=9^|dQn>6jcUeazJU1)tI
z@AhBD15_W$`GM*qkst!~k<jJ;LN`DNQXdIl1+8R&)JIa6|4Uu|&wt@R=c)hFr$L*H
z*!?CmFc^YT2q?xOF$*Cftr}6#G2KG3;;k1LPJu3$7dZ4^;1FnG1K0ll><2)L`B@Ku
zswAevpejk=)PJEfpu?azj{j#n1FDQT&x32FbN{){f$B=$bKuJ85~wl~Irm@eGHCIn
z*tP$XH$i<mk*ohr@BQcB@n5pzw%O7jxtrc(t$&%Y^nUV+M+uAWWUqamxB7X?!n;ZH
z??%tK7BTsHNdINGzN^mNm%KYqd3Buh>OAh!e9)+JkAB%sv%;;8H3zKg_M2AhbZOZW
zGUZI_f=d~TuO?4F<=3<|b=EPT#<dI_ir}h;1Kibxtek|O`ve&T03S%m02z^nNb$0=
zO7QV1a`MTsaZ9uCNV4)uG4RST@X3N|B{oq84k0#vNd|!;LHmUaHs2VmzcQHrU@-a0
zp!t_U;}3)Ke+I?>?DF6$N$Ed_{C@`NzicZ16`X&YM*R1#e;nBQGNk7uygmx;-Va95
zDk-%4U}(qwuujk=MDPj-!a!a%M@&tGRvAG$L~s&P6G2GW2|u7gIdCTkQ4>L`BuIVa
zS+x<;8-g6F;9jv7as)SKeFPbxgV#im*&;lZk!$e^cxB`W8>4e5TxMUe6x1OqSn80s
z*dcE*gpD~;2bm;-cZlq==RqnXTs0A-N($-Phm63n0Er7Q*?TY)31N1QAXO6DL=vP{
zf>c9TNl0Y`CQm>v^95ZiKjl;u2!XC!ikNZ&g5XQ<qNkpWnsNd(Mb&>ev}a#X=WhSz
zZQk{p+$&ev<}a|!m}!_WQ758JC7@2xy-?gXUBo1sTRVtb)tyz|ky+dfG(g0o%)knn
zT@nCyJ=noh;b<irq)fm=B3BK#ltIRtAO#BtDC@C{fNq6m7Xz(oW|w4O6#)+?f~R*u
zD+3|94^ja^?)QWoi3}N(gG^CDx&{z4AtX0=fs-6V!!6^T|9SWR7diwQL1JkC$WZ$o
z)CsEk&yf0w!TTeF=X-|2FUs@(OK<-#u>~|PC$brI8v_5v{~R0tb8Z5i<IT9^zr~&Z
zt}j7T;Sx7MGe?k97dT-n>-evL);4f~k78iG_Ma72XDQ$MFMa91Ow3#cgF+RvI4PYF
z7DYD}MK@MuSAJc8Ve<q<*F5dOYJ;!_)39cv;0A5~N=4UvC7X0f<2VW30B!|m1|Bu=
z8ZCYXHVHNXSpg|^ZV@?f$TEX!TTmAX5(4nL33E1(1>DS*<dL^jiG6OE^@ZE=znIH^
zEhkWYr0ED^C_DX^wD`}U`G?uzzwoO6HkbeFtpCA3>2|=JJFzQYmTtIFy5UCLv=?e|
z8~FA9D?9yHbpEgG`d`80zmnH~dB6XP0sn=9K!_prKT82<dnx;D&>a&jn?U^|_9LJb
zZo(HqcjF3O2X&4_Zu}R#23lRfc@|V7h+h6Lb_KMsf&JuvmSg|<FaGBWodrHu0n+~k
zdlw5KE~*`<u;8or>Hq9UK#1!Q=&T0T1OHj}|7X|-I`#*$){gJwf59{VdCvUjI`yCJ
z^nZ@C|9QcP{|x9<aPAYJ(LABE|3%LK=LI8?tN+Ds{1?3rLL%2dQ(dBW|MOh=&o%Y6
z@VqaHYu_fVdzZ25Nye%tk@N5RO}XSb>AKI<TdsY#+$Y?z@402!cEzymqJGPHoyM!W
zP1i&luS&MwWb3{rI_oLt`X3Ve{u^xnFFfn1WBWO$j`JDwuGDOL(X!=H-MTv!({`{b
zyMmiiLf~F7Bu*h!68u~*tXYzofrXEiPlAbCmVr^2flZVN)H#x86INy71rI4Ou&{6l
zGq9wI>rLR){Li5Qnj7L$|Ie%jLaa)lp*be`{~)CFhe7@)x5ZhNz-M-4C%v1mh4$?W
zpKu_e?@)N}!N}fyk-ht%^F*D8K!b9SeozPal(~+BLG1^isAC^wv0PyLUI+tn)?7$C
z;=mvLJtKHE1ZHf5R72oC5$FIq*oM}v;L2ztd<7k3-2!BE4njgl<iH&wNTmZIT`E?)
zR<41}5JAS}AeE6@#cIf+1vrValL*#1axGo~T{{O}(GBSnLG}}2t9|By*DB1n%bf>7
z4!QH}bLOJfL38Y~5y&=cHYP%@j6%Bh;jfRd)=iK~3Q{paY9i3V4zQ&T#MDQSQ+XpM
zorstOs*Iw*l@aKs`$@;bCLM!RNs!4V&`{o_V=<GDMol~%*?S<YYj0@#j)11k-c_re
z3K!XD&$LLHWD?b)9ayd5k+0~Op<opwW)LK-?#82F&n08bB%;m0E6>0#$-o4^BMef`
zW6c6s3J4N~Ae9%Sc@Jsk@qwx*&{zayDIlaXh1~LoT%Z8Yr;thtGH?MQ!BR{D44}1V
z;!Z(@YHR)*Z2Ql=`9IU9|DZ{tNq<0<QPUrW++Peq_Zh4oGB`YA2)N9U^pv6RKi9JV
za(n+P9spg<!?W%`)9U|h>p{CC1yB6<dIzeKByRkN*GG_@u-xEdf*|z~M1=L)e>QLj
zis#aQ-6#JUCfs0<j$x3B=8|y%ok}NY#vp0OuIeso94hOWr599f9@}ma*I^UeWg6D3
z<yWETny+A$Dxw!Dtl<r+kN7n~<8=%?3>;D{eA2?QT5{@!;F<^$6p-;NxZff1g*F=q
z31>mjYD3M<$}!LQZT|~7fT|%iyZ?~-NY3^@mnLW#gIx4~h3)?hj{o<+^j~`3bEm#@
zp$i`tt-V^b_G<j}m-68&`E>uOID_gVB^S_4le|0V$_Y8&{{jL31p`3~hMDvKGj#t4
z)kiD-GjIIQum`jdj_(X;X}uu$Zd~E(|AnuEPEX>y462NHF8oKUkN7V9=Z~5XI&78!
zK2(J@WeI^U(es#Vz8y3}#C`-+8F3u~4b!nf7TST=fwLY4^@#aTf$Agf)1dl@{mg&v
z(;x(|k9bf07dQ>tw2NFHiCp_Hat%7qC3FjP*Sy|NP?Z$E_;LK|*J&#rrLA}rzw~k7
z^sDX@u6j+r;W_D^_ml@7Qy+UweQeox)2jcLRsUn#NzV-Dezafx-}cb|pzHs2FaOs(
z|KIb%fB9t}wF<Voc3;Yze>G#?m4XH5%+jWS?~Rp$owN^$Ur3`1G6M^#oiQ^bc#N8f
zhf_#~M^qg&pv}O?z%IrntjZ;>$;cxI8XITkV_@QCU`-d*>EqM;&!q*bk2t}V5tAxt
zl89LWTp7v#XHogbt^Hp$=(&B)f1j4?J}uXSd-i~;B+vlee#qSuX!Q}OD+I2OpfyqZ
zfxx!?@MUsvMsVA1I0|aTx0@AQ0f7eSAX7z<HQSJy$h&SctY-w;)(WnSP-cictJXt~
zKZR6Ap5RF$NaqNC2Lz<vaRTpPMP9`X=@Yq>t%TP^5C&R(gj@|lmn*<W=L$jdM38Yg
zI0+t_gRGq+wmw3uiL5hbLQrrg=$a2WiB>&9?y*89k*lQ8-h+_JD75z=q&~tV;WZTU
z@&<S%1*wUUNl2}POh!*V9X<6lxa)KZ(wTy*2cIhi9mMl&-0D`d!LDS5b?yS=)EW9Q
zy=uWNYQ9ynE`?GysX`{PeA)r5O72Xu)(jG6&^5-OGXaFaeI-bZ0BLw*W(TZg1xbRC
zK{jx<0<BMmGav;i+(tMT!hk3P4@QA>aWOFRF))fSaLKYs8Z(LNGZ+*(O}oLe=|9UR
z=(<*h?Vts9TwDLMZTv61{6Fi={|q@F7`)FjxIbX<d(IH_j3Mk5L+X2mydMmu{~5|a
zlS*vs{)-;^Z}#}V{fGbRH$X?B3c~tFklF`QA3^4igfIUWz6_d961WbU4UxP5KV;v3
z2E!`QbTg+3Xu^&|o`Fx5K~#rR%|+ZKNX{`;FR)ZUyiqT#K|82k-LG23sX*E?UECm?
zSIw7K(UD!+nt@jXJY~Sg%%>!!ZpkX7BCl=5z%B+G%z>2HkS-FW8iI5WF~gq|v}%iY
zvX<K!S-bzTcK<bO|Et^lS9AQY<@#UN_CJ%-e+KghYOVj7_x$HN{9pCrf2m{tRc73k
z>bvYW`+WSG2NB($Bz-pX==@i5`mX{;@{XX2Nx|j6k{jp>BvHTr3<>|4v;Q-6{^tZA
z2gke~wC|Yf@PEFu|Aj7s#yJJSm60&`oF#!vpacDQFM;YK@vG4O5#uqCAb;!<@H%Qt
zUtowZTiF$^&_4NJ{U~TV68jO*z@6Y>(7-I$K2S%A?cjfoBcQ|BA$1UB7K!iFe?IV}
zlJMF85F~gGbZIa5`TyMKK@JeShL|PfzxH4B)_<|v{}rzMXIS}7Wy&k_<$p6)eMnyV
zI(E^m@Htn4C!F)`I^ooPE@0*zy_xq-=0D|~^-*g1KmMiv`Ir7@SoEKH<$osdfq2Y^
zLCqV*lm8j|J~HsRv1%nr8AmZmJAt;Cfo^MJ0C$cceq;f6ksy1VA+0sYEFok~EJO`>
zr3&b_K~@GvZcY(7Idx0WF+>dPjBFz8f(l$B%FO(7ECTWj9DEFn><o-SVwzP#8vg~<
z{&TDR=T!R7ruv^%<v)kge>R2xY)aoaRewsmpEHbo;$C&uzv*gF*8vdf*%{QcGo*WO
zXwSZo?!6!c9yJ10L!J8}=bb`GXnoXyvTPf&jR?GWdl%$NR=<`VzRla=Y`?~B;7eLT
zJ6j>ANkB+%@ZAuQ6S=(_ws<vcf%S<%6%eE!1m8*I0bZ^EIrtPpg2v@Aj#hxIl!K6H
zHIZ}a3e1|wsbo1Ext1)4EZasS;e8@-w+M8`0%U)yW8OkAn(vqgMGm?1APC$unhPf}
zyG7O+Ga*$Hq#A;g@R|t1Ag?}x)Jn*e5#}@#j*96x<nnrW6@y$8fxAnmAY{}O5Cc*l
zfd=;`fd=|wL3a(GfOMK7CL9Ur+UMQ4)w6n?Tg7Up`~?=N(=6h9j3V0%f~qyWOH`fG
z<*XB=%|e89z4??Z*`<xxMbsF0<QUim8JHkfSi)C!!Se#u8IY<6Qr$pEtX4p3B1orz
zgMpEQftibeOPoVQky%PtOxHol)L+vkOto@%%!W@4>;7|W15G7xAN$XD6m-Oy@V@`N
zJN`?q{V%uazsvUj3X}gc#C`+SM`5oRqTVq?JYopC!w__rA?_u^%>O)FKqqQCzWc9p
z_rJ>3|NPfL2y#j)W_=`b1-yOoDrotZ=(YdKPyVwP?qXoD11|<v1x<OdNQ0JS@ToE@
z*zsw53Y$gAIi^ZE<w!VWN!aIz+GGmo#c-;JvMacQPx&@t;ME1~=LOxS4XTgqed1V!
zR0L$SSp^lqqp6S<0=$a^2|Y-C1YyHNj|n{K#>2qWA!D&m+UCEC>wi@%P<^E7@?YES
zzl_a)7M1@De%}ozfa)WLo&UuT|2Mq*Kk(>(jX4kPdk#4C9yClj&F{2Q$Ou#)DLejG
zcKk07MzW6o6<q!cc!H`VhPeNXnV>Zx9Mk_ZEc*|skGPM3>LY>kIO`*^EB_^~fu@t#
zPl67$1JzK;s~8x};jYKa&|;7bV3_(`?+mCT#(D^JSFFe}Xnn*4S=ew0R3)(=2aVTp
zpZd>r5>y%Sp8PL(8dMpHock|w?!Um<|NLh_{WVB^#D4`;AMsrQMJ4YQP;XA~8fXeq
z;>>@YIiJ*~y-8mBI&<~M#AWxQ7u<-Sb1Qb%4cESlX7z{UyDqqI`oYj~pKtajrWya4
zW`Gt~gO2{#0y+?n@gV4U5QP)}8TvngC&)CxbE7h#`9V;91nKWXZk>P+4zPmP%W;CM
zCrBQIw80=NRoKAdtPBiX%zP4{{t;-k4<92NXbhZ{Uyg}S7E~W`@`L--0fH)}+=~CX
z75}p<fDp6Ne`auH#47)vOYN7e<$tTRw_as`1Ddb-H(d?v+>cxzg>>%??%EU7xf@a#
zLh2)E9Rwbx19gkQJtJ`Kvj>etR6yYR$hUbr1i@A*Ku<mOYS@N~wm=T~K|ALMZT+@8
z(v9Juny7Lehy>S1?v-oc!*P(B$h~Z(J8Wkwo;t{}co{OnRSm)WL0|@GPb;JXf-F&h
zR6r0Ctpc*onG3IpAhnNe)@*pyW1TV6Dt!h7S*A^gAXvyldr1WABgjqkkRB7fuLP-&
zu#(7?6mpdWVZ%F0a0YxeID`Qiri+>as(s*<5rh#n<rJjr6g~BHBp8KG1RX~o&~?D4
zZI4I87Ke&8wgpQqvKN>nPSJ_%(F$%-^s1C|$rZCn6*CFr*7W95wr7*KW)Rf@HR-v*
z8+<^sIgn{qcyR}*HIPZXg$I=-IYH~qcof*gw0Kn<B=o)IoznD!%A}$uX=N>9ocB+9
z2WTsb;MxB|SN{uK2HhsWbm%|xPH=Z=_J4-XA563U>mB>Au;o8P?JI`R&ma``pCRNg
zL+~qx4A2aS_<{d6FaMiA`7eAGgm}Rh7(q{^1GD+B{^tiT?dHA$nrINZ@}K|ue~t_P
zT^|0I*!q(}+MfZmXI~i9Hvz5U5Mp4JWZ;x#m(=4^Fy&RU5mI;L*Kpxecji=cVN-Tw
zl(YaH^u({tz@-M7&SR1UO$UJPmE&b#;8!(r;g!;15|HPTP-kG~0f!}g0RyC?ggmLl
z3@fQ1u>&dJ%9KqusM`Nma{z5SQnmT7<oaLL<3GFke+K#g42kbGmx50d-3MAY&2#F%
z(T)E~C;lt7-DmJz#GrDB#pr^V*?)O^P-UcO|6kPsgp}<5E8G7Ub@?yu_MajCKU)r{
zf5Zi@k62;-Bi_^hg)V^Rj0CSi=Z}Q0fQG_&E`ip$iGcU0@>~WTiNJ6iv=LQ2YZC*5
z71)FDlm_NuA;A3w<FcrgZ)A>w&b{G01Ugcc|KNY#{h<C4%YM-E29AUO*$;!x5#l=y
zs&@oV{^vgd+MpzS22>>pp9LLx$A9`i@0tJn=Rgx%e3wBr5!XfV!Go~#pxCed=eh|x
z;)7xSC;15vwdcHzS^O+`*4>~fH+=doN!IR@u0LRQ`oHg^{|x=_89HAwbpB^({>RY!
zpJ6KKv<i_U|5*=%CNT|;{AcKU3tlKL2U^3=C;%cs!_+KD{)6;+xxiA8bDwy?quCr_
z5y;+WxHuPR%9T@sl}`bb(inLd*@c+dL^!y?3mZ5@85ki|n46GdI=8}qeihL2IVJ_r
zNx5uF|5;TY3+TVo2s`eOcOP=GY*5#MK=3(okWH+hWeJ_TL6s47R%lO92jW&%_=p@*
zR|ruV`L!UrMUW~9QVsbuZ3A8X&;&YK0Wvq_QxEC{!D}Bl!@U+fJyZwk7Qs72kl{GU
z&>W&e1gVB9*Lhd1^{xWl&<YuvgV#rpng~KdM&}^+hC{~XoJ&{0dqc=Ikwei^2k<^t
zr@|$W4XvnsBE;Q!kebLbZ$6{~f{=)6C});!_Ds8+S$1HCZT3uu>^TnEbL_KbLy&F8
zOgOShp8-MEY182dtv-Tq;k8mw*FHFc)IrGgQE>NuVyYxaJw;^2gnX7bq)GxEK!l?{
zf(+t;x=vG0MZi$_#N(m;M}xZd`LyitYux5pv(dd|xkK&(yYy*h@qGqi^_u<_+Md}e
zPASSZ5t2rJB3h0-N|wA*ddvbU44e`S%tGLnHe}-xq@Y71k+KwRnT!lfJWSl8TvB>6
zIu2^Kk;YzWCgIg?nZ1UED?_KA72Wh-<sj%n9JyPd`iT7$Xo8zz?|;Vapf)P&vi}Ut
zpBU0EGxYv4IPza@*MH{H|DgIP<S#?OPlnhpLLH#>3~J~9JHGp`e+N_<iNMx)3tR=Q
zUVzk1nDr4K_^=?`TmKn?r-Mf7Kqr_9!X^a-KwG7l#29#$7=+XrgmuA4AGDfXP@jQM
z2Xr6<s|o|NJa`R*D0oRBKd6LZ03W%@BgZMG!N@PiDy+oDF9B}xLK?}CD1ek;$RuR6
z2hxFpoG(-=rM+C<@;|6bvi+}O4Xuw@O#U;d{%6Sero9}z5@64N&<%Qf|Erz(Z+iQ`
zX74iwjYbBk9b6XIq^$lcJO5X-2O&s(q-YDOkA$86i@E-1i2Ki){hy)ZALles|A=KB
z=tLUML;rbBf$Afn>!2kKko9$t`iS>3s6G<D3|d*ubNN5p+5Zg3{&QXYuTZ=jRv$qM
zQamY%6}(!*n!#_P@SgwjCqV1H+4leEI{-o~;3|m?yy%V<JaET*3RE8no&r@y0w@0q
zo&xofgwFgIJPShn=MeP~s9SXXKi_rGMl-fcpc@-GuK(w~^WX6Hf2A}3ZPxx%p7PXt
z(pBFn*Zn76_3S=xI`dZa{{P+&{s%t$FTCMDL(wIMs;>-nKN*_;f-0tE;1xC7L8pl8
z9Qe=B{0KZPC<Pi12h~SB;6>WVo`rOa_`$2Yg+R>`kTS?FGj8x2OAe4;(2j3zP?lsD
zW963v)kmOB91tl!HcoLyPBD-yH2mE7<x)6g|HA7dHl_b;O8-SoKI?e@cg%m_Tm2@m
z>#%>v!NAV_AOu^A09ugHxjU$HH;4hMlJ*3)??$eM;58Ar_Spr!793O)?ec5c1*w31
zo45NkZG#~1#;x#yILO2h<c>VA21IWNtv-TmBSNl^Ak`3r1oehsm63bJTCd7AUa++a
zkSYm6LTVq#d=R7#g0PWmA2c?kGO`Cx41q`F7J>Ug2ogL!1Uh^hbY&j?5jj{jWScz`
zf*{opgtSSYVVg12CVd9H_Q7O;n*or<0F;DONf0uyb1xb~tB;VYB=~d^qyj=C@z+O?
z?h<Gr1niIp$WeIkDhb*r0uR$c*`UREr{EnZ$kD8DZrG$_p!MOs2mLzs_%?5MtJ&aE
zvD&U^sZI7Av(#xOv0Ylh4Qf86@~(MOwuu79;R4!T>`IQHP2WN~47|$VyJZEygA9-{
zZO9lAq^%B3`n0DQ8JPH4g%x;J9OTV{R9$n-!fH)pI$d*SGSr;YS^1me6sQF*aTT<~
zh36FLOf&WapiA6DxBll?@n2%rf9~G@3~3)3{BAN#{bO+aztpDx4E0|aBL6e^|6(Y9
zBeL?p__6;M&p{U@$X^2$O1$9NZP3OUaD60r8Pp+ykdV5G=h}acOQ7zO?BoBT%N{c@
z*fM|)!QcavkY+8U*x>~&uxAqntt4a>0T(sGu(i!1;2mM`bsdmvaUhLhPEe7>Ar4xL
z$SJ|XBMF)>15KPkf}I&`3m2G#6i1MzHDu}mVlNK^L$;L094WK^N_L>r>g28eOS*v8
zeluA9XVCx8ko8+(DQE=(!!FR3;tZSr^Y8z!cJ05`^#2Uz+Zg0O30eJDbo{U4@?XvQ
zzcRRUr0M{=4oTJ?v{qce71Tds%KFdH{)>GosNiK>4LTl;<KTb()Bi;-fR+b{UI%T4
z6}$0Y_!_8@zz^O81gVdBul#2}2d$5k>y9!oIDnV*6L|}vC<B8AXYszs!~gjXfp7hT
z)kKiWi2VR)9XQ9q|J+Ca^Fyj3@cfa$DKPi+f06V5#V&%*9v3?gI`UoM!heA)|3z>9
z7rzKPzLlx}KilO0(#!s<ZTv5N@W0HV{|ZO`%N+PGy!pS}zW?e+{_CFluXg*t`W@)a
zj50_6GnAiUh`Pj({g|QfB}4sphPD4iAS*nV{MXq1pP}F~1A{Sm4WT%=(IO3=B?Q+j
z4BViIW)K6NJjp1^z$gm3N{mGUbT%@hFasmxY$(WzBQ|iw1iDyLSXL)6Itx@Ef$p$i
z1GiNm`@y-vQ;py;KxR)7`BWbH|AH$2Srz`X$p7cixhrAw)*|DCXXVYnu04TWdmtrS
zXvbdA+Jn}E!L6X1XF-?G!fpnKtd~QsjNs#Pkje;L0qulTN${EovoeBINuVwg5x3((
zDkHR=M39;Yw513(F6R!eh9H%ZS0(gDJj{6_cunM5vfQP3nJf4v2xo972+|Ki8=6Dz
z6Cqbd@R|s+cpJR46*NNx?+~Gn$Ysy8&YTXZiEJ~c+h^nJ4Oym4#YCjmN090Xt<!{7
zE5SQW$TbvleFSI2t06c8a>4_Ig!h#|U8G4T;B!n-;93dRIXVHUi6A6o6BGQ5)~G2b
z!X_Mv=sOhJy+5dJr%%IX-<q|a6)Qao=h>xCv5fD~3#rxdFIIBRRI-hcGzk{fa^X|9
z6_7V&6V+zsQvj{RV}z8PkOC93{s%mM!T_1tplPMV$iOPZCZfo#WG7|ltKg7s7+7Hx
z*=8QsEiv`E$5GI^G!nN!lhka7LC2SI9{<mG95hqUwCF!W-6w|BM<A5^jv?eeL*7-f
zwf~Jy{b!i+pCKC5$Y+}VUupY)-8=tnUV|=@Bc?v$1E0^pei3x%tk|9Z3=XxRJ9j|a
zXCOxyF@b9ePH>wRQb#~8%i{ree<1Y*FSsCrTpJ2mVF;=2Il#pWD|onupMg;T)JtU(
z232a%K_1939lSJy6ljp)9Z0Rj2^-`|7gCraV(?$y`oEmje;JGaq7I<Fv5YqV8BG5(
zr2mkb5AGR(yO0c<|8wmEo$|+3{*A$Q53BBf8JGWBUjNnI|7*B{sw53(&~gY_dr+0c
z<M^M?`9DL<UqpSh2(%@ZVF#!p5I6(ss6tjyi{Jc@Ss#gj2R}je(Q(i~nR?Sn1~y-?
zn~AKC#6Uez!5F!Y%OWR0*G4ey1r63AS4M36L9G$a!~c1YgS$oGO-cMGL0o}Tpvp+<
zDrg&&*m=-tdwl1hyJcDS{b!i_iy`v`gXakb&jSo0rx{AFGR*uXz5l=Z>Hh{7{u^HW
zZ+hv!`IY}l*Z(VA1vN<cZv0oj@SkDv4+ftj3}NROG9IwD{NdgGpLaKCZ3DxS|H|9`
zGxUD2%3N=qwLwO~oq<V{fk_sG82A{N1Q-~2L9;(BQlJx7StUUyK(a`Hsw5^6P?f~Q
z3$BtN`3|xa9CUFKGp~$CP$Ij45@^Xcrx+{03^=7hS}AM{3=E9StPHH)Vv6a!3eft9
zSL45;>o?=@|L*12{p;@scI^oO&kaHD1P3k5Zbj5bkjug0)eyW6f-|sHMmtHWkC2xu
zKq?@3ZwOu!c~ozJ>?eX$M$m2%cxVo?iW^=Td4kVafDFn(CW#;<d{7Qj4S|lBD_#b<
z2?Ej;f{(=^S3?j%<jM$ghd884g4ah_Ya+YsIne2$?3q>>)2uV6gKDMB>9(2EA)|AU
zaXAPH=@VI{PQ$E>EK{ap)<>WVb770_Ahi!BiCG^(M)Tkm6Py9Bn~=Lp@cIbOz*-+c
zPG^PGN)fPAhTsgy-G8yuPQ*?-5jFLA^t2ODH1#BS7wu`tvIo$u_Y;qT&NAvd0-C<;
z*aKSgP_-U(&2Hfm+pM|P$x{s@yFmxXxtGa0W(%7q^6Q7OtNXDjIxt9Cf)4*-Rb*h6
z1dl8U!g@@Q%MKuGk7-&Z@iB5rFo>!1s+kMx`YYSUGdNT!q|TSv_uu&DfBwh+8Ls?i
zIQpOA#DC7SpmkI7hyQ~P?@Il{;PaTl`5uGQT?X%a48hMBVy=qx{FmMfI*Ngz?jJ+{
zf9CC=b7p+s|F^yi8nNNO0b0hu5AG%Lz-EgeeIf|Se)T{5RnVn-%;)}lKL5`!`wjzx
zDhPqbSRj2Ic5r<JuUX(dO31Moko~Rjp*F|}8$=Cc*#>0YFQlS@R7nEh`Up~af#-%8
zxfno4=5m79XhCMiA*}+qIm`?Uc3fid+$x`xtp7`y{g*KLFJb*(*5SW^%YO!w{|qs&
z)n@+ZJ@B7p`+ue_pjkGFIsX|7pE6i{V6gbWZSr5z>A#u_6sbD@*Kh*OL&-S&mvQ*d
z?f##~<3B^fd#1d<46VP}CWFde=GCClQLY1^?WmHM|4ZBgb&eqQkq~&g4Y_~BclAHV
zd2s*e(toYa3k(b{;1ZRKlmsL#X_O$f=$FkAaIJdaKhr_bNllCg|1%y0-Bb&@B>f0z
z7K!u3f9_MD^9*@UgVu@*oQ2j$GFShLo&)Wc;yd$S^b%-+1n9_)@^1_UZx~V^GL$}H
zX!*=A@jKIE&@wr;{r`DSf>!RapZd>n`ajE=|6G?qcL8%=`7eJH)a_zu{=krMmZ9{G
z?5h8=d;T-b1D{E?2sB*Gu<gIxiT_$x{)?ro2AxE~t^&H1o>ho}O_+s8nuSN2fnAP)
zO%{Y$Wf)i`K!_O}<4lmA5oEv|lEvW5dwAG|l^8fBcqG)tlwg|zSVcgGO>uHD@$xaS
zMF?nCGpK%LQ2QoidqF4sl|$JPuZDB}?Rx{jqj15Udqdj~hO`}koSzWXx+fUhfpU-=
zhyvQ6lSJ^E2-Y)#E>wV5MvyT&$b=ERTLf7$hrDVIQXj!<B0QB5Wc32HGOAn$sfi#X
za*!1Z5E9-qf^>)=Bz%Sl(m8_EN01JYL(x+BxE!Pcf|CyUi)_INQUO6o$a$xbn~5MK
zq&|YLQh+d^l@WM95oozW=5(u!X`spowLY>=n{Jgl4PGB%t&+gQq`Q$x<QfXICW4E=
zt0c4$JV*tESsx+ymEd&~ygq_3K(z+=1UyLp2$_V`N)QrKC4slo9*dcJJZ9?gh{;Ez
zrXGi)DWDz`<ODoOmkM(J1GIXYa0GN_Z|A<C<{d%xn|!O*`jjkl&7E(TJjo=oLocXC
z)w5XHIZMhaQQQc0m=CX#9fzzX=oE2oMbO3v(6KhW;AT7|zd*`EQc4uk)N+D43+xgM
z;u@^-j-c%u+Nr@aE}7o`Z*uED+pYf$m;SS!15HT_UIh)mu&nscP;#Fk;5mcWLk5@o
z40bme94<4s-DC*Az)=2yd+mSjwf`9x{bybFpMCd#_1phlpZ_-jpPR~m1GI`8)B^(F
zKMq;l%?n<9#RH!BhMb$qb`f+&koC>~4CWQ!vu>2ZB`Tzk136fa9oEx<)DaL85=xMM
zC8SD%48cJ}AaxUbmlnKA;sb9JgS3+&hC?blcm)U14hc}mY6lhu21^DOUj~7DV)}nY
z4E_u2{}(j*FJk_m!{$GW#east2O{nN1rCD{`%dt7t-_}a-uoF0UozNz6R?EVM@kO=
zRh>ag8q^%1^^vS2s50Vq|IZNnh9T!SL-S9TiP-BSiCh0AZ-e_sS3#qx@cKvueC!hE
z1!#Sw-*=UP(F435gA?Kbyd*nl7LA3IfkBOdQJ*1rwdThEtVco9g-i!QRT5}b+W}C0
z#JV4}s)7CRfA(Xb`bgm1fBv(em34yWK=qO270|IoB4?rXk<fWijSSkYHu=Bs{Qt`9
zL3@Zi&i>as|6k=OXkHA|^*aozk~mKP=REVD{XA$L3uM)$_{IOS*Z-^R`VXp-8h=PE
z1$VvMzB0`E&$#kG!zR!$rs)3v%4h#ub)07qiezArW#!YBRMcZ&6K3O+W#yHD)<>Yq
zNQQw~jDblM6y4CuhzmTD36Ec9@C+KbO~NF|z#$G^P$vOymM}B0i*XApaf^ww2?{Z=
zMF{KEF{pp%HTe&!jI#fEHJtNqJRi`p4^kh2dPZ#rK$nBJ?S*evMLx*~exy%8GiYK6
zxh8_J!NYW*`UqYJ!5Q$H2tIQJnIVE#L-0-zq)LKwA$=kU3F#cU!KR8lD%ZJJAl5Cw
z&zghG3L!7yhF3%I=^;o>1nC*!s*m6uBKy3BHhBwe^B39YErj=m;0#D*1Uiu$wxJch
z4uWkU0<D+Jm<H-4rBAiZoQ^zO1nCf2rA~t&%ao}W$x|SLa1yyDg0S(|M*$ss;8jv!
z=U#ZN1n(cAF(B0wgoIZ^a0a9rLMB7|4~O<2My`n<Y-B-{T50mpC@=z5OjAxkDk(_U
z3A_#*ePLeQloK(Njzv#A9@&31tn)xn>mI-QE#5U7T}oHk<SsHxouL;yK|8!v*}qoS
zHDAmwL)bKqTQ`hL)r(2iiBZA?bh9?M5@=x<Xq-wATpFXTP{muWkSqx)x7isug&5e?
z7?_n9BIbsl2OY>PapyndrT;8fLC2;EUH-3n>p$nd{|qJn8T{`uxIbiYxyN90o5A8b
zgWVMdn@bEXr`XcoFf9Dfw*hn-1M418!`0yx=rC}p8~=H)gAn9&AP5Psk9e<uDmyN4
zl?1AuF8!B#^544s00V<Qcp6#|yeb>wNcbKrNNW~Sm_TYG$PzM0m4RGC!NnoV6_~-{
z2_D&F6aZDkpw1VhwT#>w!WD=R2MB`3g*d0n>F(y&{4c2epIhrcm*#&)t^aKL{}~)#
zh{gZcT=d^=`+ubw{~0pAFqr*dF#ODE^k2a8zm&y)1-t(W4xsgQO7@^@k`x{OD?0p_
zaQrXr@}JZ9Kda|IhNwGCc^??szOzjNbw$}${%79ypJzYl_&D*4;Ny30;;fJOuYp!F
zFdPT<G7P3ZVBiV`4Z(p{K|_25sh8mco;={rxF`doBm;v41FJj(mkQ`QT^4l)-XQDT
zO=?^I>+b%~yX(K;zW>bo!INTpz~uh_j0gTRANkLI0<=Sx7kn&(@Hx<CSYhz0Z-KL*
z6NH2=gN_7eSPGg2W7`7S*~EM5Kj+E+EJyxxp8*|tCvXO|bdzBpXwfFmY0!dp&U2tE
zmN+hfhU_>lf+id#u7ZxIV3_iqq4oho(IbYgUkoe%vv2&*xEoYCavTGl?rXaCH-k+V
z1BV`?pt-5NA85%Cg8&1Q5NMc=NenbF%OuFa4C(X2PjQE=afJj68VMf3;}&J$6$fon
zV-{lIQs5BM;1pJ3<(FgNjFYwOlkq;QANRng?x<hGaliJRe(gJh+joR?><nt%1sRkB
zZB}jD8Po<60kK;VC(A)99b^*T34$}QRY2RpOSqxS=O8^J$og&2xe9flovo1da>#WM
zas>oAV*x_CR;&e`J6EyRt$Ym>frsWGtL7jz5rl+vh@4ATK<Xf;lI0KvgoM;bkeUcW
z!h1t#>lPrrA^bIweGceEZb&s`pEVmY5C<V`kd|#*q)xL;pK6&t)hca@b^27~PLM_N
z6s*WBaS{YsBu|0VL1-krN($)MgB2lHNyxPlysrdjz^f-nl>{Nt>LUmjQY%5~BM1qp
ziI7RmS}AhUk;q9$AXQT2#G|mT5NJ~pm<`=R3ta*+2~-)yOga_=Vw{MZcp|#*XmroP
zh|Yb1P1}9zHhWgA2E|m)EVHBu1`$n~fz@grg^JEOiZ+SjM&Ux5u3U<?oYF>2BHGM+
zN}y|8m;^z^CSsL1WWy#}4x*gP3Ld8t;+D75T=*>Y?tkg~pc4QX&i&`O3965TulyH3
z^PgeHD~7Nq4DMGM>~AvIUT3hr1*(#4FEiL)W^g>tknoIQ&VOF;jW8Vh|J%LzZ}SXP
zTu594%}~Sddxeb7q18v+S3m_R@0I^j5B_ss{Ldg6#K52nE=wWx5xmlYFd#J%q>4c!
zAqvn0Azbi$Gy^-hO5z2DDw7~+IRt3ABR>OZM<nD>AV?L4CrJ6hSM79gNiJuU{?D!Y
zpG6t81BF597lZ0w2D7^i4$nac$rgTO2!G7rc$UHPH-pVTKJ)(~w*O_VLG_WmJ*YlX
zw1d`1%1)q8lc?Q)M%P~q4qq7}ZZYP*XK4A#IuSHH#kLYO^vAsqG)E(L0aPDJ;qD(n
z>LbRJ|G6*!H=XsAK_D7D2rLZq3}_;ZM^uwtK!t%_9)!3Q7<iNz_|zD9RY9jJ@u)HI
z8ZdC`FfceVD5NqJ-WFZ_pJB~^Zt$9aXqB`JTqW%VU6aIo1XRE9f=BAaE`sVKLGbZG
zppCJYKs63%QOBzP49h{+#WEiNpGR~GbkGo}B0CG34&wu_*km~Z+R(&v`akD65Mn+5
zpY{BIw)3Ediue`KO;3`0!Ij$l|H5nj^X&j3-sAtdj({%SlDz&u^5TDngmuiqmJBRf
zGHRwQe6rvu<^V5i5rWMpLe>y+gGU}9hh-tJ#Dj$ph-L&O1{OXRL1`u)5Mtm`VCGe3
z6Oae3zEdo7h}me7{?fVVOHj+{z~)o_9lQKHb_KWZ03q=C3E&E77pO97+ZoWj1B`aT
zyG7vc&raC59Awb~V$~d^Cc^9ofh(hJklqlSgskF*bcH~*Qq4xM>J7;C5u_^wC*fx-
zxK^wMRY~y5sC*4%k_dV29CCdGsfHk=18mWP1NZ_6_zV%~5+d*lI(UZ&&VW=#kggEC
zA7q;|AATnhgaJ8s4r_g6i?)*pRv%fVO@Y)$SgRq3AfzUOlhD`!pKbwRgO;6uBQCgY
zH@sqkGvL(|q)LK!m*5>GI0If2!5NUs2$_V`M93thCc-3<Yb6L9(o2HWM^JL&QKX*H
zF|_&!GIj@93lTTvRNNG3RTVYqM8t$+5q(EOx(+~A;<;9=wJTg^oi*1mVX|IimsV(-
zns=3~Q=zm~iimNnkd7a>sym0AEu**@186axI%vlYsJjH2*2mMd17#<2F(U(mvTDjq
zvE%=B@BQb$|DXBtfA+Kg`L6tzx&}gA3;#3Z++%Qk$l!b*bkUyuWzcoxHW#3z(;0^7
zrwsG|3vB_Nbf<FVzuT+-#@9i&kaHpJ)8~WD9C2L+l{V0kx{IL8M!7D6)`mqr{?CxV
z0bGj8fyd>Lx08UTl)>XFkopKhLh2f1HY5NbA`lXy22vkEW|6o+OCXr|!ShFuP=%}{
zf#@YjGBGe{u?ssfNNy6;KEk2)i&OnKtMY$V<^OETf7w<3F=%{c*8js{0J;c?&+xyn
z*?$=;5RwO9)hB2BU)~OMbDyH^e?>dcss>T#|NOT98QgC&xLjk1y1<zCgrWH(C%8W1
zSoxoQ`+q)geI#=3zxZ{~-FA@iI-#ro1+PHw)rIUM7P|&I-GcWNXg#FftTzn8so)(t
zYT%k(h=EO!fm@A<-<pBfl7YjRfk~HvMV*0J4RmoKqdEh-5d)tk1GgmuuQha^saTeF
z+$Q#x-%`{63$6v7<<7bFKlgUfISEX={xj?aA+{s`xle)@qF(^5d*M3`T9?j$_P^Lw
z(5i2Sb)XGs49h@Uhqw;^=Q#?Rf)u^^pZD5-_6wlxY(iH-hYqnG1yxl%CqYw4pi#SX
zpj}iPr$Joav!JCBk{A9<UHmV8^1r~L|IEApa~uFIEEl-|>NAR*`)_vdKf{cd3<53;
z3?{5R>Y&~$17tW3GVY2Tn~*6;WD#UGr2h*Vxdffsz{$<RC&bDx%)lqYBp?PFJrK~>
z4xQ>>xig^oXh8E(PzBV!Bd}vvV8^atlu07cNj^cXyTCOOs6K+%K5z!)`~=88RyYaS
zLj)lqi?_k#Hu%*KFb1eM1gUo*l@WvuC*c(koZ(iv4paewNtg095ad?A1~kWAx(dVq
zZ*GOuL2jihAPW^-VMi-KdP9!I%ixufec=-LJQ1YcaRi^G-~_%G55k7rNd#dd&l5pv
zA50Q*9|UN62)u^~vN!=wLS}^^Gen?jC~b;m>SU`l#Bdy>_JNaTus#to!!%(cqz*zO
z;Z+j68iF(Y+jqmOBsc?7H$g~veFR~^D<fnEyh=i@i6HEdzC$5>hcN3S<em}oI)}(f
zM<OO3hSx{PrxJn3?$B4|K{`s1{t={x0v|ebENa5B$o``deTRZN_V_h!0}c6Bt#v70
z=9D|nCVhrQY>z=`i@sllhI^rgQ;MQ>tei=(gr2Xch7+%X6{naM6TdQOAOh4~;sfu7
zfQ(ro=O#+n;BJ>nWbT@W%2&XrD1hrD&hw!9Na-GEu`xr-H-@M)po_>I?l9P22Hiz&
zbAiG7JcHFa5ZmbtQ|xnw`TzMgfX<gTfBxU@(SMx_p!$gWI_OjZ`0Wyqo)Nr0;=S^p
z<1%Opo9p6#@q7P`cK=}z4gqf+lEGFVL8>H3Cx{um3Kvq>pw&B&ss}EJ95RsECCF?M
zq@4^|*$XilStWk90{GsX84P0U8Du_isQqG7`Ol&DpGWgQpZ0%d{XeXR|G16+^PB${
zG6l^pNt%NO>*TEe%US=IwfQdxxklCo#Fer9&!GE@LE|ff-3126%M6j{8FC&mG<|^7
zN9-%0^%2jp|H5ZM^^p{8NdsiPH{_Ccczq;#6?DL<@P+?|bKf&a=Yr0);LrzM*TpUg
zx^<CPgMr(efy<16&5(gnhk;2Igcwv97?c@U4H!TtGJ&fkMkDYZWLE~60?=}}oX4Q$
zlv6?79JWoMbqJu_825ncBj$skT>!kN{|mrJ>P~^GB(bZY!@wEVfa{}G|5<i|HqUY$
z{?BsaKi?(LKpgiKXniDf33TQI%Rx|;#0IXC*v^9LBes+OnU8^n@>q}l=R5je@C4{a
z4Bq|!+4lYCIRZKzLg*Z*KH@(5U+2ny>BIk7tnwHbO*sX$!JCq}z-a@L0FdJmSE~cE
zhz>HEhqN4#g9lV42}?3?3o<ZBY1+qyHy#XcJm}YW7*ZbvwQUb*--%owf#-}s^^t$m
zc6faR=@a=j?}SuC-i_ORns#_MZiiGo$RxM|0v&dWbi^F!yi-U`<O#a^VUtJo2KYh+
zNR0y@mxJ_x+^g0@dO(<@OZgh7(p65St3Xu}>~=g*tyH$ssbo2%K7v<9X!Q}iGJ-5x
zfY(Q$0Uz*n;mDN{<aRs=>5#hs-YtSuL)KYyv7n6E))})w(?hU*tdPnGUK2qpqtt2e
z`p7bMGQkSSG+`n#LaT`&TzGvH(6$R1p$+Q=wxf5TAZz=PNl2}PfA|hkJt5aekXi||
zVuJLHkjaRNha)B)4x4y5e9{p}j|sZk0X(S$u9A)+lTcak<rk2%iNYry2cx556OM%T
zABI@%*RjW|X`3r}-?UrhI;X-VmKn1x5+)f&cI$=Isd|?xx@1Y)rAV4Z3+adOs=Bbt
z+p$R+GYD!k@F_E}N`c2BAg744foE6<jxr(VN)$FTXsC)E)PycMWpnsH!-fAmcmDI;
z{?By&KgXs2{5StAT?Mso8B+f;csyWmy1`(76*OmLdzr!R7=!g;2FsHS_U9R#PqHLD
zV_5oMV8?&MTmS8z|Chf7x|2fq8fcpg|5Z@EhH^#_`0iKGkuq2RvtIhob{W*9SG@OM
zHKG?(HiD)+c#(<}aCM1rHN>-!8W}QY1QCI^Z{bQIwG~7bA_x(IlvEHlM4UX*jDg3E
zL0~SA%0_m@hl1L#d9?rY>ilQZ`Oj_mU%>Fcu*rWx)Bi$dpe~Y#`F}}M5Rx(fFK_i<
z$@;&d)qfdlka`BWBMeF>7@W2-xb0$yJjRfFi=pWQ$Ao_j^Zs+L_|LKh)IZ`s`Cse;
zv_6u&4O(RfxwC`!>VIDF!Uo~1pxsHr;2wwI#s3CNelaMNF)->faF{T#DS)PjST#Un
zaeO8WJVp#0dZ2C*i!uX~BDnUKVqnn+)kj>m44gI$tm+I5@(c{ZTpVf)nv+>w*E4us
zV2HfWQ1zX+_dm~~{}Su|3vBx@wg+@6FxLUlay!0LpgAMnbN_|Tf{^%mP_4nR8oXY8
z6X=`=-h=;HcY@aJu^;-+bs999$A1}AH}PHh&vgM*sj(gT&wlbh2ly}|$Psy<i);7)
zXW9$W#jyK7+kyWahd>MVIFCW+tprZ}=R5JA<rL^(B8i4$4BUaBP9K{gc)Skla2)uo
zV9>55YX$~i28Iv@h8_m)8SKKRxy8>5DLfQbe8eC)iGd*-yrD@4d=e4(a%2W|QS1DW
zs&%2=`~6yW1h?)FZr$(Gw#NswKn`4jHSYo~S^#nP1hj#UQ-EAQ1i5<%xf((~=@ecQ
zAy-3iHmJ!8nixXd2LY>(HhENoY9&aA2vQ|MCWhQ9*SS=I5OPiAQohEuVlBKTg49PK
zvTUVe=?cgU5qzxzoPpUVf)CB%nk0gBlpy1B$Rt{2WRpG58g?O(HMkCflUC`oAaxL=
zR>D#fK~{06P6pLPDU&QxCRrs-u}T6>8-e;kNfXRq$TYDZ6PYCRfe-rI4krVecffnH
zAO>oE6wtN{txAITn;;D2nh3&%*GI?<<h~Mw4XKhsdk+Tp9)#2gm?XT@gta~bRY^#D
zX(2r(NKJ%%b2_BXf{3HlM-VBv;gGwQ0-AS#no{+fykRTxoH8a^#&?=V)am$_YPe=8
z+9k`HhKTF?iD^3XDO&R>m@<oMf%YzON`tpr@POA)LFPUfNC%L<28)`7uhvG;G5!qa
z{tG??RS=BlK=VF=xBrVA|IaY_2Se~{2KW064%a}lMK+fhY%YQ7Bio}4HfKQhz*_ES
zjD5_#`M>PZ|BkQzo7@FeMp9Qnm5~s*GU5YQKs?|T-JDlIEo2_Zme^~c`iSw|f4STL
z8P<MgV734k1%lvVfA}&6<O&7SkzoW){eY&`;57?G9O4~#FhFV?2ni8{7=>ALL1c*`
zkxkPA4{bFuuufuNJ<1?_nL+v&o9cfCwf`(yp!!JM;=hQ+e^JZ-q86YkNz4eeK3v-D
zzqHwZd5iy2#{c<r|1(JcV&LD(AbXg>aVvxCPKL;1%msHD8s8(=N4$GMi=QF&5$HxC
zSbf9~u8erW2Qk6xBc3z=1uy<LTlJqorHX+;hk?ZaR2i`;FtBMt>mxoB22Op@QaUzO
zaD5~R>Qlh$BVGpvZe0cjMbJ4I3=#|+MGSf~80?RNrU-pcF+^QwsCo|CWwz))|JMJU
z`$3595NORd_nH5E=Rvz-1<(8!JP8_AV^{@h$*}DJo%6u8546XYaS!MqM3$qV$_r8-
z@m>bqqr`m@bPy2BvHz^cL0A5<9R=Nu&U6s80SZ(ff!9&6ffx9Jj(Y_U;(-o#JqlU|
z&v^Vl>*@bNyZ$q{_k!vpW<Ah4I#4SMGd4LuYv)A5MO8ESm20K-JCzLg8Q7oEGJB_C
z@<!S4laj$lC9~~Za`SnFf*2Sb88{92Bpsv;qTQm~k~;RJ_a21QH33a~0-8XT5u`ry
zYuN#r9)k6QK&#_$)knS!+mX;#NR<SygWwEUWwaS~{`O{Y9R#U~AS84f5xhQvbc>uT
z*E&^zDj;~31nLUGx<!s9D?v3;=_<z(P-O(EkKlC>oB^+lAPh*)2)RCjR6TGKGD8F*
z;WZJw8iFt&JtIhE1gVeU)ewYXl|Bo(KC*-z@&j2c2O*)$<S^<ZP-T=j$t-CCS{-DP
z(1(WLH4&)PZ3c~|Lb|b#juLX`2(vPR*G-Vh2#qAKK7v$CkiHTc30d#}Cm~yw(6|sm
zI2k$dC>%vkJQ_XmD5Rc>n0z#R(h*3l1R*0P9fQ<IkPZ}>JOMg)sC%z}$1d-tEnbbA
zy=ynPRjhU`S!$az7qSvhBcw^erwXzXPt+og&oEqA*PmO}olDLZbm<b0I_R)a7Af$A
z3lF$i2w5ivDRnXPBQ6m~*N&4cd;T+Q0iA~_cI!X)_5Z9FK|6-{uKpKV`JbWSHG}6f
z2A78nj&~Sru0!XHY)>%Qo?vi1#Nd8{L2jC0=0mY#|8?&CH+uG8@hV8Kz;)0}0r!>v
zJYd8Hnd|^pMtooepbJK?f+niCZ~srZ`k%qN8N8-W1l%)X1XoGi;88pH5DG77JBffh
zgK`vuQ3iud5Ce-g=*9%lDh0S2WWO_l=bJEnjVX>Pf>js1rv@1<gdF1_4eCuWx-fG3
zFz`3=sm@?lInSnkiB0P_s3x-hFKPc@(Bi*<`F~;4|ANN<g$zJQ%;-P2`d<#M0}K-D
z7(}WW)Os15Rx)_3XNWw;QFxD`@dHo)KZg1LIafmaN04b6k!zryk>qXA5<B7R{{_Ky
zJ^xiu2SMn@f6$@m*Fi@H@m&6Ivf)32N+oE#jzNQgRgr;RnSsp+R5$UPGVqx&u;?%_
zXn=!G4t&d%3<H}f1E&Q8w-W=W189PYK^@!&7X%I5iMlhGEoX7w!eDxk!C()A<xw`D
z8;tdTq-Xx;TK8XY>wm5T|9KAm7drW0^bF{H2BFjcd5`>O+5I0hf3)d8+n)dYM?ojx
zG4BDL>BoHVKjX3g>}UQ9T>{N^iJk`Cw9bDHv}TX>5a=Ey?&F{(<&1|w_oqW{Vq)0~
zI^>*j7wDi`j$@#EHzZDh>NuXQ|HTi0PJb0V{9p0Re}<{w8C0?u80?rpH^7Qv#vcPi
z2$y7yfb35(WzZo%Tx$P0)&Fy8{O3{q&#U&IR|SN4)c^CS|L50v&ZqrM$@PF$?2Ul>
zqtRU#f;&MMKm@e!3~1j8IYbUZLTVyNs}fSy!Fxc+4Dh`V&>M*0m5yKi7G&gGw;2^d
zZfo5LK8SlW_+Gq?9@U%Nt2TnsdU&4*vW*BbNd&2V+%VQEz_+qu?GQo6=peNZ8VRY4
z>~XABz`CauK3imyJrA-<0WvoPxe*VvB@@;yg4IE@Y(PCD5NVw@6OL?BX26ko%5(@a
zOP*$yJjE<|ib>)mlSEJ*WR^6+B5{IwLceh$2pJ{x871_Y#Y2&4TrV<$#1Vvq)I<;x
zYkdT%p5WCGoPkyoLAZG8BRs=*$Qzf!`VNJ{uC##HO=u$U`UvZtfRKs_QY9hRN1(yI
zNk@?TP4M~%>6{|aeN2&)j)e6e4C&q*)VVvPZKr?Z7Qfnc?q$neOP1T^F0f3UY!uf6
zS&66NT`2FAp==v3V-_J};4h@+Dx_e|Eos6bq`}Co2s-Kkx_1e_aS2<F#3sh!-IKHa
z1H*dI5jWiD|BK!P)kiEBKy?b|1<;)l3@P^+yq<&VBikDc*4IGOM%L#UERQgl9$;`e
z%3!>OL2{yb|9{o1{|z64wjF_PfV~0Qa0Hnxf{ZD_@0H-W3X<Zx3Oaa=@5+CcEB}Qa
z{g>;%4qDa)DoI3Pl?dcq7x)}Bo1m(VflIW4PqRiyw_3t9<=DyE9{H?Fo-8cl;5ju&
zO#~+)RSblS=52@+njls#L?wiT^u-{`Afttl)9oa{LtbVK3>FLw$qWp|4BXooB#$tN
zK4Xx5$E5L}#|Tsz30i>aBVPUg+?xMcl>Rfyer6DS&LFmifo}<ea211c7lX@cP<<3}
zl)d0CL&JMeeKZeLAF*u%EfL{74w|Duu8%}+{1>_gI$MeVDrkldxjy2)^xt&je+G?O
za5qPpfklCVLxq9e7&LarWx&91%D`a&>L0PFGcYJIuqcCW5azOE;IRhPN8C;fd{zw1
z`V5THpnemB9s{E>gLDsr-FgO_6QKIYU=M@QJ_hd-3>i-uy8kmR`_HuFKkM%QJcs{_
zoc=Fy5wssx@C4`_c!tfOvng11{TDg`s*hOq{%1Z2I_QJpDCiO;0r2pn+MWN>7eVVE
z1kQt&c7v|=1J_5)M?m!v3wXSac@Jo_67wF=DX+Yz|Fa(j??0OHjiLE1L(ezR2JKb<
zdA9%O-SOYL>HvdW3V0D6wp~{&=@RN)B8q>-RY3I-o6>(a6%gW70#`}k`iN8IKZo*v
z5yLmi4!^DAZhGgv^s7DWUw6c}d1pY|9>3P@Ak?x0QW^QR?C@{e3BHmQIs^n>oB+Cb
z2woW>_lDp#5rlzU8Tr+3fmcT03TPwjlGY92BTqMaRBmvu+UQof!41rSS4NPP+wR~c
zbMQeq(3B8(dI-e8xQz&YzyiEO1X;O_Rue%MFhG_r*yb;S_l6)0<lYe4LT=;^5u`qX
z*F+Eoq(cOrErRrkKsAv~`Yd=&WSutCDivB8Stn1oPKH)S<|)&$)<@<E{gC<yQW+T~
z^dZ+l5VlEdk4bC~q$Yyg&;e;hK<Xn135r}u6oW}f^8++R*p7V*E%J;Kq&|X>kfjcA
z5_#;7___&FB_Y>G;r)jpg2>lfK<XeoB&2#mUipArF+sXZQIn2AltFH!1#Q#?SB?Q4
zd%T;syVq~_tl#QVwZWlyg>~LytDO19sWXh@d$qz^Gy<yR-3#R$GDXbdMf5{>HGFxL
zoY-V+m_!XhM?J91f{%LO1s`RATz*1IQB0DXMcR&g>U*7Spq0IhyFru7B3J&iU;EE^
z;XmKS|EwoKXH7H2JY?{F!r*=nG;?Hq5i~|;eVV~!H-q&d4x0lEe3Ka5mpbkHumAYJ
z?n7{+_bRAH<GS{r^C}2I=G-Bq0C>5b;5E>Bs$5t9GoAf!eD}Zj!T*ff37`^^L5Kl#
z@F`?j9poYa$V7l5ld3nj`$U<DMNDBE8G_a_B%fw3zRghmkfHE8L-0HXu|#m+N)TL^
zq4i=geNCPSWRwrmGeRC1gshQ(j4pyVH!)Z<NrW&+b~8x#GDvM=P};?)b)LiU8js;M
z4&6)4s=FDaH!_IzG6;7uNS1++L?MHE1B1&-2G4a2;YZnWZZXt<<mm&g_hw%SI_ZLA
z4`^+(z&X$ikvMq&k>qVqvq9(@Xo($Yq1|=RJ?4Tp{|nss&vFhl@n^XCKZABFc<@u3
zfk}mdQ=5Uqj)B{ef!~fnz@CB6j)C8vf!~RN&yhjE8HD&;7<il*xZN3eJsJ4CKnNnv
z@4~?2$RL^`VA9NBw}-)eD}&y45YpJhV6=z9?<#NhC+6P&QcFPBkxL!^FLWMsEQ7##
z&_UmfyTSWmw}4mH?f=icA9T|n`yo)3#0plaaQDCY^Z$w$|4Uu^FMSzwjw{d6|H3E!
z^Bw*VIb9rdCM|fY5!-?PlDq%&ZTJsb=Q;5|L(NZ6Z5Vr>A>$Q8(;tS};J(-f(8Lx)
z^DPFwZ1A~us^IRSCHNG?90sOF2EGTvs^7U4{_`sRXI1~tq6VsrKvk0Je|GSFMqJAO
zIdxwOn0!zTI%%19*R%Q%c$3jCzXs5hkZ<$$fEEz)Yu)bKvJFDQ%QtXM1UhOP&VY=?
z!AVFRgiIpWLB4gHVbu_*Lj>7O1g?qJ!w>d>55XZbAoY=3`5Jfdej<34<OCU%D_QAK
zvfQzBg=6Uohmz$G1h0l56_7*GQaA&`hObz#FI)mSQ~_QeL8>801q8bfVgZB!sd^yy
zhJ)Flt6CxJw_)U5jLs40J_zJ_BKV>O%ap0s$y2S8rdTI~7?3^@gfvN<WSlS&(i?)1
zriuL~;BJvoLZ4xLuVFl*PXwuk;3Pc0Ameoq5>gojH19yGk04x7tpuA_f_IT1H4%h_
z^p7B9aOd8j4(KMPz|Oq^NNflfxsrm+B|%8gToTgx;b{FPcr^rP5YthDS5ol02AKh=
ziQsh;vQ)%`BM|e$`VIwk?elNn?boskv<|0!3urA()ml(5JAbZC#x$FhKI7<CgP>}4
z&mwiFba|_IdDBn{JwHJe2TmCac418hE(HcQQPBPx&`G3_q7q&{W0u2W0qI?uoByk>
z2MyLSZ2!-C6m$a-+tvRJ=l*k@{m*dtKSSGphJ>dKeovwGk<|rIePnTh!SVou-BAYp
z%?y%rMJBupx$xiM$$zz5pc~0WVf7KDN`llx5E4=!K~^$wUir^*{=eCS{|uD}K}Q3D
z_P`2)R$_y0HDv%-sj#zdq#bnw$|OSONrf%ojNKuSvY)y5wm`!R){b`~)Bk$Rf2CY?
zRMk8jbU_lxi*Wx!_T7VosDUB%J7iZWq*j7VTCszt@K{0DNHL0nr(MiIrMh4<gJ3fQ
z=RyYFr3`$V8Thw=NS=)hTniXjW-+kUGO*V%h!ir27BNT`F=#e1xU6LI*~k!f1XLf@
zeZX2DiCzaC86<V*ztkO2eFVCd@9KX@eI$JIzu-;KRUItnK#RaZ^^tBnXd4%sIcPqJ
z&wxR|je*~lLCBFo)P+IF34{b)Ks6Dh-eI?8V7Fypb!1?10Fi7?3_SKA#P7nu=ghzo
z%pj4@V7!sRY6pY%7BJe%ptXg;U^|1=L5A>)43(c5=75fK=Qs*lnj(DZzrcCW$*T<8
zK~qR9d;hcS_z#*wItZ$gm=A(FO8jR)lZL8S|BD?1%{TEK19gJ9kNg)s^`HL;XgH30
zFKDk5^MU_t2fzme&Hm5O{Ei|2Aw%YEhU5pJvlA<RadiJ@TKJ!DEx6)a{a<wTf4MFH
zqgOoDtKMN{UnU^x!_J?`FP_OSJxfk&rLgLEN$tOU%AngK(CQ-=<^LRt|M@lki&%V7
z_55#}_0YZKwO_+=czp!f8RXx*Eudw)PxDsamTkT*+kBe0`Za<^;UIMoyy`(_AXh+0
ztL8v!wlO+H8{t(Fa(x6Dj`IMo)<&+0AVYI5NJDdONb^LX8KRPvAQD^|q18u_3J5|%
zM&uy9AqX2@2O-x-@R|rx8QJARt072b1li3Bu7;pvbg()I`O@&Y@SYK@bA+gmATvae
z{X~#?B1laHS+QUS9-sqNLkSb1BzSZVtv-TOMuzdd@CwKzw#PW88;&4R1Bow4F9}jZ
zK}m4@LaHHT5;AZHA(5*j%sD1Vtpp+Al@WZ`61*~kZ>oh<M#v;&MhUq-f`}k@lpwVd
zq{_fWLMkKpas|k2Q8>~B6Qlxy)Jh0)GIYubw6J^vXowHeR|@Jq=HGSLuVbHA%Px<m
zZLam3T`Sf)6tA!^Tw;|o-!f&YepEN)#wB^@Tyg7Eag%64y-+?iS7vD|22p*`_C_`-
z@R8So;Nc5M#l(nIEQ9+XDw*EPUJD!sFIw0D+I7gV<-ZvC7%}G4pj)azD|<WsGo(Ie
z@PEYMaht*M5`#UsKC(OsTF+p(lR;|<XX<^G<NqBV|5thdx)ly`;~x7pP$vssA3+A|
zc&`12h(P9#*v|bIzyIHF(`N>WNRU&RrNP5)kQ27y%dq(vL=72S`$VD^Gx+ad2;9Yx
zbe1{qCPUo|rtYr{)BZ3m{?D@NKl|GMvh)8lgl=VE(+ADwfTpuqVJi<ItE!N^PBIfR
zUI!;3hp)0SFtRhSaDg^4F$;j&0GtvGyfUo9N(_8@Y!Vg>(h(pep1~lR#vqi&AehP^
z6c0ke;S54y41!S%LeUJO2@K*%43fDFnzan>i&^|uF@zsw&AtimAF-|Y&$#tJq<_SJ
z7SunIy!~JLE~q{dzwux68fc_W5ZpO})JFn0pr`I=Ec?%(KZAkYg+aiVLEMf(z?^~8
zmVv{Dfy0u4&76VRh=J9Ffz^zG*#yL9&}U#)W?)hRcPLcAq#9^JJEIP0MFg`x=(r3<
zBL>+bal6S3CMy}V7cgk7VbEH~puUAcV=IIHAqLwE3}N?0%R#k+^e)iAo!Hs`VrM~<
zO2P+0=W;Wy`OmcWKi{tZLi<3=1^M=Y2J{3^{1>_OpYu3qLjm`3&`cBaPS9dJz61aH
z5BwK8{9pRWe}SF<xo7@osCvzi@qi)rDnslQhSWO@Rj(K({%2nCUtsHh={^6Yc0*4K
z6x#J);=q6V75|mG|I0i5XHa;^Bm0|Q;UBNue}0Akd@BF>RsZv-fp#%NZgk~P293_K
zss3kD{Lg7{SK8^lLHud^yjwo?2Ynk31vYIDXxtXsv^BJ8t55T0pXSYeO<Uo}w{Z)I
z-H7NDLCQMh+aKVS5qy0DgaO$%1h0L(!CP68>mW#<2z2r`_+TH<kv>Qxa_}W{kSYl>
zTLc=K1Fz+V_lzLjA_xiZ86np}_JvCz6%d4k56!{rBisB%HsEu9Z1SM@LBJ~`yW9o#
zIrAOBB)Ct6xEc>}{3+_~c#yqCkhODG=`$b~<e{%yNCRywf-Il2N}gnqIKe7;l2!5~
zlcWg{1gU_~Ncb=v<@J$&GpMBjX>UMCP{jlrp@mdQ5HhfRH-v#)D+P5xZV~~F?;+Pm
zSgRy>^#mEjgOJE|6NC+~kKhbQeFPz~)<=-aC>+~iL=Y)RWrU@31U~-(vI+t&8#U=z
z*u+zz6HbQq9}Vd{6415JzjJR;>n_lJTQ%$5E7y3IEOpAAXPY+FG_Kb;yiv=qLfs`>
z(JocqJWNc_S6IV|N70&7QkOwM1+@1BbgVIawF6}M3sNF;%2n=5+5cbW{C~BZ{~0&^
zXJ~%RF#R{nuK&!Z|1%u_&vE!a!><3J+t6~}feuaey362nnZf2PgY8++GH?Cu3|gBQ
zVy?L@`fq;gzuU|I@^?W8GC=Ahj_WA(5fAK?2F|OXDhYJ3>gE4@*Zxa9_|FhFoq@rO
zfmN1)1#+S?2Lq@!fy{z2Fz{*VxhIRqF5?fK#}KfSA@&$^-c5n(XH4B+`DgrRTKu1D
zHK;z~Soh!Y@PG52|D6kFf(NM>U<Z^jqOOR5c$`QQGC&8JDq;h>iVf5WV&-Dt<Y(d$
zV&)fR6Ov#RR^Sv@;g{7BQ8JWJGnLb^7Sr$$P;vrQMpEGnl35I5=?nrX4E)Iq{4pRT
z7|H-b;QB}?7Btc*p3R_I$>6d8R3C*M0o6x!A3%3`%=^!_0@QEi*aIrGA>(xtw?LJV
z)Sdt0H$X_}256*C_~w5Qx((_gv7i6Xc=EsGbXa}F<qoQhIP@79Ou)zhRK+l8fx1qh
zLojqe^$v(D2i`oZ46cm8JCZ<48q~mnsR353#lR56pj^RVzJ|eQIjAzyTnnv_G<Gv+
z?qM+5%iw;2A>|&ZL%j)9CGj5vEo_iH{$Ktq_>_hT9~hSYXIu;FF)@MnF>xLIFLnjA
zUy1W5XiE{>9?*Of$1c!TBax&3Id*~@NFARTvhOfNon{C=4XTfdUkUd7=U?()e#?J}
zz5j)G{1@K=s*ia0|Cc)RU+U0*hS^^k0<SUXZDx?W&!YZ<U*VsC;(tLU5aLq-RY^Q*
zpehMcAF(O^XHx`MMjHQRoImJ>{CCK|?N<5_xjqVM+ybwUpmh*Ztpw>GL24p+?SssK
zR6yQ!n;~@&GD)x+g7l0al@VyyDtId^WcLtcnH;1(g49G164W6oS?N%`0>nTXqjM-(
zZjW?ZE2IK~kXWlBNR<Syj3D(9+S)nfJ`v<%R!E-+Qt#j*G3z6E&j{2xN}X&0UdRnv
z%MGrHED}M>6(E(7Nzw%5nh0JE!5NVH$RG|hRfN_vf^?A}G3DC;>e#}2Nr;Y8^ES--
z2sAbetB*iyDPd!G(8ZQWTuAkV+<^ksSsl>fJLKvKUK2q&N)Qq<%>=1~FiCi21Ytlb
zBM6Bpf>udE>L5%q5_}Rkylw(9km@7IsYKwVchLGMr2j-n|B0Z9#{wrD4emMW*M7ji
zeYaN==n_De@-=pa%PeyiTBOf1h@Yev-liT<uk2nRVUsFm7R?WuT5@NRw_}wu1Km}}
ztIWVI3tA%$I<XCMB7u;kZ93Q1|58W)%ijGjcke&f?*9yZ?-;s%Ff9Miwf{fcZqPJ3
z@8191^Zql`d}m00%@BN>!Sw=z-AM-1y$pss87ww2#9n7u^Iz)3fAd%Wjh_A&ybU_1
zj`jL~wj0p-Bc5xZu_MSx9pvOX#%uo>!0W-dF8p_V^q*<*a|RY81_mWYHeLoM_>Iz#
zH4MTG+^P%~brMkv7(#Y11Z-i5J;#u97jzMA*LQ|#e?WIwuL7NB#<1-_<DUP#C;rRb
z{BL*UKSRo9@Klr}1E^kuY=MNVhez%nLWbIqr-2y208)RkgKuhp+-JcDx;d0p1XQ#0
z$T9FMFbJzKh-tFP8?h;ybE;W!X}R+0c?uf_OIbuo+NH?5WXag)aOy-di1{)Idof6+
zfRI=MgJ3j+NGJ#i2Qdf+fGQ?|SO&ou2H`jcu`CAVat4RREI#WQ!uK=hUS+6%%iZ^f
z0a71r{m-`dKijeY0^s|PByNG?OZF~kiJkb(|3Wwa3*Y)LdIyArZ~qs*4O;fVxc|Rs
z+eZeaUIs>M1{OO84sFmm44`WbHNo4vRKVe*!2qg;v_K?iZ>$yrvo>e}9<x3JlOAZ}
z5~D7-s?r5lM%o}{3_1*)Rt%~w491ff^p`W}EMZVz$DpyEL3J~O+7<@&oebK08H~>`
zINxE&{vbQ)zx1~Mayvmw;1o{&S6=a-A@e*#$uow!4-EC+m|FiZO#Uyt>_6mg3*KG-
zMGyTK-SwYm<A2qC|7ExRm!I>Wt@$HE#$AT!3k)$A7~(H8<lkrL`^UcOzxXatABubL
zf6hJs*?0YyJ@jAw%zx`c|HbG1=L-D6pmLgl`zC|PBL<0ojMAV>d)ZX~v#Eg)Xc3(n
zXz2pC3aE?3sr;Wq^&f}oKVh32D!wnxviI8-9rJG3>D#nBxM_1p^Oo?Yt>H~uLmPI5
zHtYg*g<#bXq*j8JY@qeqKK0wc2)dOOQXgThi6DI<cvlF*fcJ*r3`k7`J?s=bF$AfP
zAk~m#$x2A=<5IfH8OC-lS?LTJpaZYvhIEMFB;>Ro<jxWDBoStngj^HZ<Sn$$U4T{@
z*@BOogAdT*s(s*>hQk?<`UuiFg3lI#Y9jD;Mexc9#7G3yM9B3Kp2`Sb6B)$y7{sBB
z(HTZ{!VxUipeyZ>t0WK`9L11s5enH1TEp+#ydA!d1(N}(jF9_EkctWG2@mk@5~L1-
zkdRslQWHT$AZ$1Zsdtb`<oXCA4iSNq@QMkuGC~#s)kKh?Jf!*v(o2FYzzdsr3Q`#%
z*GIuUM?-rL26lq3`tonu?pd=Dv?IH81!#v__AKj^$!1Z_T7Ko~?l}q$sS4In;zogz
zdR~HRP6G0#>|#31paUT#n0aMDQ!|3rITLn?9{8_u8FU1V=+*xcC;y8c`_HfvG>y)%
z=|AJP|DeM#=ly4&^`D{dKSS<UhL|S|o>v&`Pcc{>V(`4kQ}WJu_J6)T|23}tw|M<u
z^#-)+VY>mUk09f6@NN<J)&IQU1E`s<gYK~4JP*3$$msHa2K#2{8ZZV(#la3<kN_TG
z<4{tv3>J@^FA_F~!GAMb%mId+yPTD;7`nc*O$D7b%Ch`F+lK$V+d)$jd?)`4p95`v
z@xJ)q`rv=_#BOkr%LrZr&jwyV4z6uL1iV&)GayIAKn?)s1g$e?;s@2;9O4XIl8n4E
ztbz(W66*Xidcq1u!fNIc+BQ<gZt@o1vQ~a7j-hI9i5ecsy56}aK_v#E)dr!}D()qG
zhH(s%0Sx?Zpc+Rqg+VHXK_rerESy0!j6pbvK`0PW9|^@Wh-Na#7c)34X7$~GQXkF#
z&%P3LJ}ui`&`AY+u=Nbmw?R<`xpe|E@+olrzu*l}=ZOD0sJp?i?LTwLeFo_^aEHho
zbO-~JDyWKK0WYg#)?{GU2Q9H<GX(8{<u+sBHe=v3V-T<ao!-D{3R;QBW&~Ov&SnG}
z&0{wMDdRE)jrFlxFbL!`Xm>GaFJ{nM#GtkoR3$;`BaK}Qn!6cvk1-gWWN<jj5PXB7
z?kU5}|4gg@iy!!}zUDte#6AYEJq-Q_83K+oL|tS^zsXSfnqlr=P`hH$f94IK^UN8h
zeqosLpP}J7L)sOF@S_ZVhZ&;IGvqzuYWpR${J-k1|FQ@EOYZs4x%)r&UeF=xVkiI0
z9Q@BP{~trbWd^gA3=;bpWREb)zUEZ@%%<?4O#yVYAUn7+Vp9Vz>IT<GoJ#*Wl>T!n
zgU-_sHTtg;`q3isuS4lc=ZZ7l4LiLXb_O?X4sO~EsgFYHcZSsO^ljXNh9H#=xEcag
zM&Kh9K%F3XH3VV6J4BG}tB?u^PQoi7_|P1@CW2H(kg+(o@-@!LTZdqEkW=vrNHqi}
z9l@O=cr|2SybOYnYa&SP1DP8_W<&Z!$YXSnn#ekL0lYqfteu0LJ_o6cAWP;T{Udm-
zWR*SxvjRe{i7eBmTOb|84Vf?k9k!4HUBV4n#SN*9AZzE06Z;|kAj9}xIKwc$*C4J3
zQW-(mkopK-0U<LCB0JFPBlM0ExIXe}-in6cN8MspLvRszeFSGf>LUn=wN^szB|+Fh
zUHc#m2#K{;!aB7Csk0ze5`=_QN$~mzlYv|xK^8E8MUF*IJPtviW9}v#10UvaJbL2M
zs0l}+CLV^M$O)jO>7X?jy@!K)4~K$LP}c#!HqdoT?zJ0T%GcNyEVav?V-!C@H>_Db
zpjO?hT-G61#yVBVFq%)tk3-FaNzxRA#GOE!@xzw8Zu>8D_P^$>|8fuii{AJzedoXC
ztN+S}|1-3H;F|hhZsmXGdH)&O-ZC`3XXyRK&<$GHz*P5}q2en;)d!BrprIAU?f+FT
z{kMDlU;Y7TE`$Brf3~Zz-A3T4B6xiSVqE<X+KY7QKj(SSv6*s@|J%0gV_+}`F9zTM
z9S{LJmH~2x4<7@wEQ4N=Oypd~h@A|9n;4=`F=XFjsC>iF_LE^MxW-%oasb0F(4KwX
zQ{XEP&Vp_(5IOr_^3s3pE&oA_?ih@~b4if9&IQ36Y9JT(A)j##7GV-&V3T6tkY(Uj
zVBk|_5Y=IlF=kURXI8f1&~W6|b`v!85j2SqwTO{%$W(SOQ1mWU3#d{KuGfrgRg3P_
zPUtg9n{Jge*FI;yS@JYB&w4K1WCk&R23|MNiZzh{28mP#@e~G;GzP(B(6o_2B(y#f
zie?awVGvDWkk4aqU(6P;h9PVZWA0^!y60>?Ul|tuXI=$bgwJ~LKil#DyyrmCBzgn1
z1xf4%h%Is*)DPvp4w_dIx%OZB#((iE|78w?hPedOuP}%<frsV{8JIO0m^2ue^+1Qz
z@mhdtA1*V{+HVeH1`cD;*+DG&(4G;fPXr#pW6}lfE#)u<9WKOS0+MAjV&F6dRaiXk
z46+dny3<)q<}qlj2BUQhS{oQNw}FuMRtCKt3`PeSERHd_oMH&O!<zR<sr$e7)V~ao
z2N^7uGdONwaNEb=eV8HWI78$)P<0b`nIZKqL;iDy)C&x;Cm4c`FnI4}@H_@uQl5O8
zq4_n#vi}0xL5J7z9r(|_2h?*H-S=Pe(0{AL|D~7w5{P@opu3fU<0^yj0~U$jY*K%j
zr2n(Zffm)XDE((v{LilXpG*Bex5j^Nbr9lI`OhT#pG)JbfZlI8-;2624;)JmyH%a=
ztJ~pUztgXNi+{sb|HdtF6x6Uas9`It_CfT3Ak~du{Wib)ZN7C|!3eZQ4zfxC(hq`@
z@CpdRAW#ieu7O-g<PN)`6|#HCsSLDO4z#qXbOod)f{^ga$f<Y*Xo|ULIc6OM8Ja`x
z6G3Vp$l`4X38{%7>lWa>AqWFr4MA!m$mUk$T}AMk2(21|)IpHhA_xiT8Cj%G1D(_j
z?h`>OBgoo0vlP%&kx|kFBiPU!WQ!|gMK^p`ky(77aeOZtBBnlq$0J(B1nL|$ZiDoK
z(JCVl7ko`TyjFrR{91Nk_L$HH@gS8^VEb-=FhU-(gRlcJZb*lVKq@0-5>n~FNl3i|
z;UdpHLBx^QKVViy$Rgls=m>-i?>`Db5fhF<7-18Rg-tjHs*ENaiJk<ik|HM@ikxr=
zvRf*o??@<2OK|tWpsoWUUHbysKvPTZRqK5!S9z2!bIF}$oifops@*uOS<kmz$uV2r
zHeSXmMnd0PLf@NB+D+ahUUk}Y_Z^`6NcqZtp7WqJS}ITfD?j<qvgAKQ=mCb%(`*y}
zOK$nkyW&5?#D5GOUqO=so&SaV|I5z&FR}Q)(ppfvonzmB^C$nUo`WVb#BcnE%o#y<
zzg`2)6T!#pKtp!m`bgl~e~xqig>Qhb`O``PFRO!GZv;6#j1AmB5*O8Q5(}QL5WA2e
zcneeHE{4n-tQF50TE4SP0QHPmmxI<*vF`ZKe(*o&uF`X$HC<wt{)=A*RYuZR{yU%f
zufE~8Y5WuhZU+Wt1r}Z{1|CfY9!+L$Meym|JfOOXO_YsKmQzrHM?{%hOp8xamrKTw
zU(t+L)kaLuRmRL$-ZDVOCREWeO3f`%$0uFOw@^Q*%rvseDz4oksmC^bqGje(yS!Pp
zMGG8C7rT@#_pDyyQ@O@6ZKjfIHIr&IgMcRkw+jQiGpI@uPGFEqXAsL^5J~~nM}kq%
z`UtYBK{$y)DwDx&5tGjfhOj*hSr-`Uo^wz9#jx-{&*uMZhd|T)B3J)Q-Ttri^uOwh
z|EkabD?k0O{0xMop8c16`d{hEfBA=?<!T}q{|oN;&oJ>FZ}J5OnNIKpyJie5TA(Tk
zQXg@efNCYs_99*I5S$ixW2^>f-z$SAcz{k5EUpbY;EvmjLBxSUzy`D(i9rL@!vblv
zVPJP+5Upo0n8l#I4pblMY+}&a1X|djv6Vr4D}(+{P<>>2n89QhgVg~B&%F$BmpF=U
zF@)`7uv!VKkGu{s_#S2OKLYCh_#9*ap(CIg%6T1w^Ew9iouEEc+-=^9uYz;`Yi|87
zvH!o&&i`yXK*MxA2mcEm`!BTnKjW(ZJX2pY6rEr&-_M}AkwNwqgZvvveZ(gJAJhYa
zE$3!e{m-ces**q(U{ygAMr?}zMU6rCPZ=gWvCMemT6x^P`lNsTPUQN?uVFI;1=eo~
z1YZaaseq7Yf<PiQn;{#8z$9XU9A-^~TnC}`h9EOU2ohEe!LP%E)JKq&3Gn*JsTfo%
zK}g4<<&H(mor+i37cB=NaJL9v2O(EOkZuvAGD2I$4X=+Nbr5`8E7r=$CVL*NX9QU_
zHxp9zSf)*fFyOThwCVwOh#=h}hzN3hWC7md3R*-5UbkSBG{F$Id=7HJ0&-0R*-wOA
z85za(fc9y|_2|cTLl9cG$RM%<g5dQLgaN6QAZ!rXu-Uh9i%;`b?<Nols)-<dC2)P@
z)3gmz6Cp_OGD}}D30n35+X&#-3Od*jYexxd-2|zF;DdM&E`&s`kB}=JNHqi}A<E!f
z%qj`8WeIDi3EDa8I}|nH2&7MhNkY0skopM14xex+e8Qp7{-dG&M<I1qDEKry2nksw
z?$fjl)K@B9Wt+FyCVQTF+D!eJUbWyBC7&`mk0L3j9Dege2Hh;R{51k={|j&UFMJTR
z?TDf43&Z5UyemPMZ!#x-V6Z*IptqmF>^OJbTeFq__4ohR*zuoz%YTlopba1#t3Zd6
zF|Ys6v=y`v!r|3_!`uHkul?r%@1o_t4%$V_2VS(mbq#dl5D#n@66E|ko?D=MV1usx
zXUN|NsuVyQjv(u(A){yDj*@JWl+Q$l@cj(IyBQ*nFr-~%sCdcH{)1uie}=iB5k<z0
zpoJDJ2SBqbyl4N5Uj<EzOJ4pjef7WGmH)DrK}T^iANw!0`9DMTMF#gt430eve#;nq
zmM}O^VlZxEP)lKuj%46>VG!|Tkn&_u4&u}f6)=hsF^iY9PLZ-tm3PTf^3GQeDA5eA
z(Tix*k7+eY>@rE2V4gnNJZF}5!93fNCH7^@oT}Ej*Kc-j-0sn|19C>0SNTSx_?c36
z)eH);pe;NcE}$xj!;yh2m_a;&K{As;G!-;qBoxaa5D8vl7senQ4qELXp3UH}fZJ~!
zL&SEjs+*$Iei?24Z@v#SEn;=(zv0gR>f8RSZ2qsl<G<#v{~Ejh>+Jupap=F=f&UVR
zK-Z+Ip8l_Q<-f({|E3rIYp(szQ2kas;Ut4%7ie7^n;8SEF6cl8W-ZXY-k@0|ZE(Ly
z8&vy%PWd(f4bX9zfO<@9=AcD)EG7(WW(>^647|1sQoall-k^owj3yxUpeZM9&>{y`
zS3c!r2Aw$!`tuodH-M1NItCEh#Gto@!C)t7d4tg|2BTdJR$Cd|4l+a?VF=mB;Jk^!
zWix~8UIx#D3|>bVypMvaArN~ngU4<LuMG?y>o{YMh-F@6=zgcN@xSoV|2+Gkw;hS@
z2kld{KKx%{#dnL|Gg|fgyeiMwWbc!-e9oqJg<0$ytHcjB$^U$cpsI&c4b&~-0F&(C
z?M0j#AaOS3{{n`e#4UbmMIN`xyzW+W(4+3Kf8$=?`rZBw+rgb3@R<p)$_P>=`POgp
z1|!ef4G`o}vmP=K2U(K<u8csPBkvkel?3Sop^@<V2yH|TUJb#A<{<a9LP*F>5bzzW
zh^vT-S2z|ecPLy2sfHl+kt3{UWM90@u4pL)L24z)xE#D1f=mxt=PrQshLB0j%E&%v
zzFqb_NS_FP;1B3PRFq>DKr6Q)br6JvS30;D=CIBYXqXPXoEvoH0<2pEADx5LM;7sY
z7T_ZnAeE76TrZ>!LM9<K5rl-)K4_$VL_4U`0k<x&Gr;u`^t7C&E#7d{xD|2<2xQqm
zg!F9&t^9}7S&%9TLP9Dg<XuZ>lS+`j5}pw}TwNzfrGrccckhQ(QV>CSeFR}()=Cf&
zNUa1XA$_ID{=<>|hr|1iVpc{G6OKXZAY>A`J_?_BJZ!=-NKF*dcPI>0uYq<j1$XWR
z?HO;_>RGkky=<jx$#UDQ*%qnOOyfKCBbzk?tEHTB_`;`z%)Q0E;y?F_|BUOwgZ91O
z8M?nRO!&`G@`Azs9D~Mo27`kPTDutx7cltlVW@i{w(LLO?*GCEK-U|A`X;OYvuya!
zcksXVz5f=E{%c?T&wT?_AMt>fF+kSWabJh_kI?EP*316|@BcU0@}Ge_3cR{o2sEXG
z!0cR-`jXxgbmNzRn$uB-7}73+ra~b10nG*7Jj=KdbmTSDzW*FY{_~sxZNHVg4yuws
z*Lhz8ZT1&A^`GT9==5pLL;v-T|F>NHPIK}-{l&ixmi)0_@ZNIPOYfdjnprCp{F(*r
zOE|QnICY|hOycAmvUL3`%pw|1BI`|~8Y~i9ZPU8!Gbg#^O?N4r<z6z^t!$BZ^>VlB
zm7aBLe3~~0v~P#BXZ$<&2Y`C12mD%gIpwcZ3-0AL%wrG_0d<Su^%1iV18*3EbT$Zy
zCNRM3Bf(Jc+V4~b@oWa0IlKWI7}Ac4wm#vX@P)nO4MV{dhNNQ*G5Z)owlM^+X9(HA
z5Vo2jehWkTE{3eV4B2}bGIui+9A+pv&QNlWq2?Mx^&^J*XAH$p84@mm>LcYI1|}m0
zE-TP%5t9}Jv_8@T4bX9#f@&X569z8u5FD>1=u8G)OVDx!Hgg6}O9pOh26l5$eI)M1
zAnpa)Hp}Y(8t7v;0{4})85nFBI6WDZCNUT<W-#0Ws*m(HGU#svRYs8d$Y?jHGBVr=
zs*jxafhwbreGERk8QixqxbKJ7M}8+j^^w~?2LD4K<hY!{VHtzpHU{6V46QHpc0=ch
znD&51Wrg?tXIb~3XUb!ShVvS=dvu!)dRLt_OxVn$bBj^&Je%ZC9{K<LO8<qxqjOy9
z(E12cC9$dg=TQI8sstLfleGG+<^A6(^SX1<1CP4Hp7lrk8us|s?*>;!+aRNDzV(~X
z5N3S@IcFPI>1^_<-sD-e9>xV7ssLeQt%e|TLs)8}a?k~NkU9uLLTVxi>4ba%1h_*4
zuZG|Z$VeTeCbBD9Y74#?0&8UinH$1i6G7@ANIwX|2314g)445CDx+DDJ`rRr4y_Y}
zSs$6FOobpwO$6#3fe-%yZFGfoiy$*ZMhVbyIrF$)bEK1hOkzPDBE#5j!`N>9=uRjj
zwp%y46N1ocB7?{d{fKt`h;~qy7IaE2X#baY-DdP!sbRBE!)8P^L_vLoRx3dU?jSuT
z2pdu>1$6F4M;(auc!8b!kx@|B0Z69_LPF{w2pQbDH>7JH1ci3*N1)z=$aN5ejXabG
z={JRgE2Buz`42}R!*mcbtnWxzA4mi;Mi({lFoY2a?i@u-1l3>gDhyJuLF%Kh2}eTu
z4h44~4D8$=)PBIPd5=&1Hn++RHl@q0ikCVSEwIj>DHAtCCT<2p!v(eF-$CmZR)G2%
z3~T;#Z22#||G&wF|9YqXD=hiXkbQ;0<T!)gK?dbb3{oo@6lO8#tzd9J#t?9xA?g}m
z-7luK|GBpQ*Sz}Q;>mx78~^36|L48&pYP^>Xnk}Yv@4VMI<#}d4L+g))K>zJ*YTeR
z9a1gi+z4u8gQ_7G*e)UPh9e#iS>I0P$i0l=yBVU-Fl63hD1FS({1H@F&i>D^0<=_^
zVJGOAO_t;Txz7FvT@-i&WT5C((2$+LS<ql0(+SWC04#_8GoSs>dHFy4>Hkc}{tF)c
zFLvs`=9&L0$N$T0{m;JOKhLcHER%i-Rh?moUC-ds!(iXe=iRRmyU-$cg-hiY@5*(q
z#VZ_&7JF2!_O4y$*RavAb$eh3XrDf43wGCm(B8x0y@w<EjzsnykL)`h(R<9dX}3k{
zLJ`MW28Cz_UJnKi7cg>TV0UF;a$;a}V-SgDkWOI`OJfjB1YMTLAH^UT0lI5jIE6tr
zkHLHzujdK|pH&PV%NR^%F{pMk=uKg;UCa=%i=p5WL)S}|8Q)nJ|KwlypKA*Uac={~
z0^`bG3=2Oo%>BeL@fkzWb%v<@4AvVM>^Ac_t!0pCW?-^rV6g?&N(|bddW6diGzY|I
z1*%^lhcoawGVnTpxV&}@ymky+*3d-@oK_56_Mnp)xSbfdoj`0Zd(hF}oHn41w49dU
zMR!IFBH03VQyEOxF&M05Fj&iAv>t>^HZquRXRz48V6~gUW-o*DZU(PI41wDjLU)5|
zCBGdEKKnq({|H0qDF)x&44&H=LUu6(?O<@3&0sr)!KRPFs)r$CxAms~3WxtI9{F#1
z{J-M*|F%=_Dpw!&tUlmfdnmN^U`WdW)5P1Nj=LG8o-;^2XOjo*X64lc%@DDvfG3aC
z{<CR-4qgDQXHfsotny#T{E33=CzF&T_W9@CYqomUL2rEUuiN8aw+C7u!D=G!+6~B6
zl6U<kc+beAX1#m$I{5koa5V(2kKnZrGJ|l32)2(Ex@r#fgdh0QILP4%kd+A#(xrGA
z8bTYI!yKK1S3r<{5a^0<a4!kMg^$sp)IJ%rLFX!D&9%&!4Z4OPeU=3nK~A58)IpFw
z5!RXrQuUybkl7+gO$0p+HF1(@!bEsA1h0u8<8qMt$Sk(U41Da5No)_K4uX)7I!Hgd
z6N2=jI*cNr$S|TE84*(-c{OfELy*o9goJdCASBwb9;AN+S|1KRZU|B(L6(R^NJxDI
zAtCh<<UU9Lj$Mda38_8`=-3y~u@AXQ!t6T1>m$ffMUW~9P9iUUfOMLIy7qzAA;8Xp
zfb^2!^%0~R3hzG(sfplZME}8v{)3Sd4#COLz5`+X2O;VqT`EX@1i!fnbbSVRokM8X
z;gHTl!EJkkTKBltZE&hy>s7VFsc3;(#v-elZIZKIDKGiLumrq(ZsmW5wg1^S{}(^}
z-|p6bpBMjwum9(p^qa%?0fY592IE5vx_cP(S2D=WV3423ptyiRaV|sVQ|5L5MfU$U
zxbxrS0jNGwx&^9?cyEB}Bc2<eBWw7<7nFn6Fo4&4L-rhj7BgJ^ulVpk!>niEqux}(
z%ibWz9WXL5u&@fK2sqT}$IoU6-3h9avhVO!ykuy6&)oNiVa9)!#h}WFWizNU;yC=D
z^UQz#OaDc#f{^G{P%laF+<(5)prf8yPlGOr6FB#u>&kz&3;$V9{%6<^I(3J6`+wnW
z|GC%w=Unt(WbJ?bz5fmN{rB7U-*Vw+)3$R$$*Z|NdicEijnWr+)$a;#-4)!pEud+0
zX#0+^j-BD1yFz>Rh4&qZ=sOhAcL)*-QJ`tQW6={%Mo%~y)PBIeWUYG01XiOw2C+Z}
zPVmeTyECXVVs~X=abaL~X5a_}O&&>SGe~4W>m#8^2H_|M;S|s|C8LQPE{hrL<}f%f
zWC+_MQgBUe(htj}|BZKo7O@*0`)_sbztv3;GQ15MBQ<^S-}C|KHaeqQ|4pugko(2|
zax4A|6g_7MKEa^e2fjDK5>y{C8ZvNLfTn@?tr_^O!CfH-&=4Htb_ot^1`cZmb_)<<
zGY9RKWi<nl>^2N+)(osRAjD=3W?L|D*)oW_GD>(b@H;aw=rb@zaGLiq*lcAmTFqd*
zfx&zWgT+<`%WVu+yBMr@GdLY)a6Ja<0r?$c@ZSomlKgiv`0Zfu-4CjdLQXP-oo4Xa
z#o)CAR38Oy2h~Sb6B*1q84Q~l{1$32`l@mIKktVBOcNe6G+t4vI;32E*t_;nK-2M%
zmIE#&8x(wwahR+F)ko4V*cJW@Y5(Wb`p=^Q>KU;p{by1Eoe0FD{GUw~R2lK<|5tST
ztRM2<vGAgM`E|%TI-mM2z71PJl~FCIf8<-Y)w^~B1bNl0hcF<dN6mT&a<5(oL7;<r
zs^EwDZ1Akw2&sc0wGV_1?*Sn*5aV*N8}VGg`&b>p3${Uv6JXasz`I3|+6S#Zg51yw
zsgEGng*z85Me7Pdri37)bC5km$QxSWwGX6Lg3J@4)I{K}kTtgY$UJSPB^W`vKag4p
z!nR1AW}Y$?LPF{tcx43Pg64_99U?Pu9b}p?(IkEXgoGdXV~Dg|!6*(?4M8d+NPT1+
z1L_$;DkFVZpGZHtQ!lCmf{^PVINKn+4Xr*xo(=S@+2~!r3DQ6EY6NwZd>UXCQv>Lx
zc&~;nkSYm6`ZR5W_oN`BcJPBcFxNR?)<=**Ja~PCsFG0XBS>EfR4YO1Cio~HXq9#6
zUU;`CxN|S0PZZL%55j<ukU9v`ErPHi<9o>U6l7&Wc<;do(3y3IgS!s~cOQf-nS+zz
zeFs3LU;jY}BeZ89q&$RISfRbpv-KeL5$vcvQ1>dd>u`9_(Qt4B-mhhccjH#~`pq^a
zE5u878}(mfTK1o5*?&-dv;@3MaTRF4ywaur&QCx{a_N7D#48N$cR^Q6=<jBbU(KMk
zmceifpZ02QjTH<=i<O$caPRxCaQVO9<NuoXpl1c}-vV`!cy5A5?F4TA7q|)9hy<^X
zAm^!aT=~y&<-g0({|pu#;N3kUpzaX^WVa~;1BacAeJN+;Zr0H4453FEQm-;pJ!WZn
z%h3N1v;cU~f5vtH8Fqm78L@${zY(|y?i^hOjns)<1|6HmeHt{K#Bl6C!wJxGJJCy^
zC4ao<KvyAv?%JOJpP~8<Xh5?3HADSJ>AC-<m;V=C_+M)Nf600O)u(^sufN9>y_vyj
z7K__dv54i46}y7F&V=<I3+p`=+P5#Pe?O=(FyRoS4FDk_J)?vPr($}Jc~);VNSY(+
zP{p7U#~|d-!0o}n;RZrn9t>Qtz7ne&1CK9gVS`*YsFx&^1RA&#j$;r^Vi3<@FzVy=
zU(FDEKp_7TU&niudH*F={a4!ZpLf@Po}JLs41_L#X39jbf(~U8zW!h2#(&XU|Hbe8
zm%Q^|=Kg=RlmD5P{^xD_&QS1_Blrx1^%e&Cd<Lc<1_oCKu0RG6U(gZ@$oW6KwxDr5
z=$w%$=x}c~BhV}oyD?}f9g6{|GsO&BbjPFzTARnL2Rh7w%bJ1NQO2miIdh3w%yb5$
z2@J*)8SIWTSRDqPvTJvV!R8`^?IqBqBz9Lo8<9L7GI-x&@Vvp`cMy#BG6d{r2t3RX
za*QGN7(>KC2H%|w{=1o?wlM^)V{o3uU^#)osFOjrg+XVHT+$<@<hu+u+hl{5*_WJv
z%<@Dxo(!)&YL|Xl#djx{`V$7(TMVM_86-ZjD*PAF23>~8qX{~)n?ns$4S|N~l>hT;
z{FijOt`+*ix?rbM`2o+`b$(5o{pxlgP~&><x;5Ul>wN2>m63Pt2Ctg+$OuvgK}ff%
zwQvNfeK1K}6%eE{g7$;JN1j5~Cm@sXaXARX5!MNU)JG7~xoD{q_{?nx3E?{AFM`hy
zq18mlYqpWAA>=v;QWGK15P`Zy;Bh&t^jVOjek{{w!n#E>k$Xds3J6{YK`J14?E_(e
zj^l>S6TxdD(}anTwF;1VB1mrtGD&0{kC-GfiRm+m=`)V*g&@PoZV1wk>eh?wf+ARD
zq#Fqunu83`=|{Bdg}1>GWY!NSS-Sz$5pLMzUI$`8*q#lWys*_rpjrtwpa-vypj8sl
z`<49KcOq9wAU1d<9;EjK=?y_@A_xhok09f9@X83%8-i3oSnDH5O#~+))e{;S-g_{#
zdq1Qyf{>w*-VK<9R6`LM^$|oRgoGFfxyT8^23Jx?!h4QJ^d5uMM<HGNKsVC1?Q*Hv
zpwf84ZuTSQ<>2~gDQHzN!(vdCBzEk-)vNzOpZ}{J|Id(ojlpI;gVq5CjeQJ8`&e8r
zf)-FIFXPo(#jdi1A>^pX%Kzf0{_8*bulopEAIaSNFL(#kMMAER_-_2?y8#-q;|AZ+
z!+8-@8Hqgj&)#u^fx#Q;>{D>1#KtdSU1$|MnImEsN5n3Mq)W`jkC~g_@b!IXnDL)=
z(SL^3|5-PI)-bRg1l@2gZ~;^qiGV93&|-$mpv$s3Pr~XW@P*x?m;MV}{Lgb1G|j|*
z_&?{W{|p5W8JxB;IB#bNKgp1Fhau|{L*hw><l_tpM;H<hFr*$~joSuV^I+Y@VBV$_
zIoqLpXISsCsEMZ{CmjN#BaxFplXQ^fD4_G+dyhr-9gpif5!$-nI(NBTKo`4F4ufns
z=!hO}a5cp3$-wQ&z~=*6+ra0=z~#-r;mIHn3#yW2au~$ZLFWaDCV=WAiA)BY8KU8v
z8DbCc=A371crG^KI}f-@;@bY7e-G&NY@w5&({;qJ{+GD%U-BmC{5sK_p!!Jo#(#<1
zpo3L8R{v*ce!!6ZfFa|qc+zdjl)E}fo1_93h?v$eNGE|8pMwua<+2A&4{=(8r;tn;
z*v-Jm7+N1eb{?@9fOf?~_Qo=RSAc^SLKreI88a}LS_k$Q&%Bb^e@Y{ICztJP2DkGJ
z?iU!`ZZUY?W%j(s<Z+k5^F9c9J!bHK!4UG8A?P8fGV<QX;I@^)V>?6G36|LN(E2Fk
z07JwPwzypk{;L_BW-?e!WYB8`A@$`9TB{gT=Q9|sQ3ze>SavF`<49=x;lT3!_Ni-S
z-8S)>En<|r%^-E1LF65zKH}H<FQorpK=(h77U&`gPL2PZ8vl9J|BLAV*A9DWneyGa
z;-Fjg5%2m9-t`-NYq!G@s4@cAN8YvTAiW{9`p6U3CxX;R?$zti>LcXZ2bm42eL!1T
z!PkOA7?8>c)DMDmh)P#L>K&{kq-O-_{=kp-u`gT#semANvqDyDLr8di1n(0;Y9a^;
z8I;3X2f?c$$e<jAgj78c5^~}kq&|Z5j4<mXcr|36G8Iw>A(Nox3gB*$aRT(tR+IP%
zCh-#>^%0~df{><hpabaOYZVM*yCKz(ada=F8ZwORHi+mlh=A2ZkSZyvLpQP=g0NOb
zdf{!5Nj)?YvI@n!W+P-s7M=z`3~-MLefSP?JrJxi!ZK<HtB<gDl;B&IK%FM=4JMGj
z5@cQph3wcB*s%+88XiO(QcsaoAE9-Kkb6cjHgp3Ks9OY^dkXJ8h>Rk84q`=+1r?C0
z5K;$)^+A?`gU#$Y8rFLZ)Hwq8k0SbxK#)uKIs3MgjD1gq=6_>Y{GS<eap$W4jLZLv
z?*8w7;lIPl|N2wzGK9@%P-|z<pTM9zhe2g7U&njf?f*H;-Y__9V^G=1V6c~A`bVh)
z|4knMw|V_v{@#DtJO736g3(RTsWn32;{qVH65p-=yf;Bh>DaD-2GayC{FglcU-0;U
z2GAOBW$>bIa7_dvl=<{Rc|$e{Mr>mUIm(cFjiK~8L(><AiGLYpgZpvoL3?l*cK&BN
z1geaLFM|w}zVct@>VN4g|D`U2j_%|=16sqteC$8(DNx-caQ?sWrT@I=|8s(`Cty4B
zU*X_?hADp-tamVIuV66S#9*_7!E7~y!BPgJRSYI;7%bN^*luKS+sI(Mg28GEgV_W&
z`$nbcxgky4lO`MupKvH*;$g@Ne9#mgXzl%x@cyHa+CQZ0fNS+e!^GJF4pj_l2@KMq
z4E$aUJnm4$?a9FF&A{i!AQ;HN7sSBr4;r=Ok6=>BW{^r{5J>?c{&WWMd=~fRGV%Kv
zVh(d;Ut*|x&eQjualwC{)&E(xf=<q1Iq{$4%zvJX{{<jJl-K@C-uN$l3$!s-`u2at
z%b@+Cpp%^no-<@VWk`9z5P5?k<~Bp>Bc{w}Y#Db!+kEVnGsv|w@JBMR`ZI9Yf)JMx
z1Gg~)mk|hY8ZvMiGH~fKaO!|=>tol2c99rO7(grQ?7*jF2QV-M@)^$wseBaH{NKCc
zmxRw-4x78|4*xlw{&PG2=XL%s;QXK834{ck|MR%~=X3in<OM>!UjG?v|1g+7V6c3`
z;C+KJ=sxHOps?eN(fgTV4zNXSX7F0f;5?PVx|czIH-p?R29f&=0=F4tcC)D*mbPAE
z5U|rKc9&V?HWk+c;uf3vRIhR?U1AXZ!XW$!gd~5kD*or!{?D)fU%=qMu<m~WEl_t!
zK>NRx>kX~Y7d9oE9V>T0?zRIJGPN5ZMUYoDhyiIWL)OT_ODf31Irs_NkUPOa+t0xV
z%E3EC$h8k-(-34`95U%%w$i<9C1ia9goLczMn2;dzJnFcfUHbFKIRm%QVw1dL3&1*
zRTAc%M34#yxiW%mBeKq#YneUI5`1B+b=F*qthr_xv&}MQn}V;#vr3-@seK^z5rl-)
zMDTGrI0I4>L8>A6Dg_7wd3p%BTLh|%!1v`rdP5Kr`5aV8pU5z#*D$6Ra@r4k6}Leo
zbX-m+szV3dEkf%P8Ah}lgtuWLc>H2As8t_<x=2X%5oB>Vavg-i#<nsXUOj;p>9y~K
z)I<;xtv*8TB0(wz%>EHteFR_30GUMsS4q$@I!Jv4>21L)BZ#r#UHc*RQFzyW2#V}E
z7}mKDQXfIIfQ>pB4C-XT`dM9vgS!qx=8r<V4?-r~Bl?aww4ZY8y}&p7lfc|B92>yL
ze69Y^F!MXpivJ9Y{xB5pXK?Ig5RGHt3}WEQW)LZ5;BDh~KcKt(zxUDq7EAv#q}*aK
zJ1m&=o@dj4r4#>c-~2ay0xAq;ZvPj$145#=|AP?t)Eeaai0#UM=8K@}gXheDtq1=Z
z8qR{(46B1@jzCor=oD`af2*iQ!H8`_k=q#(FR|x8U}*fz+54Yi)_;y=pgAM9t)LsL
zSq}W?JPEq_6f#=`sgI;CgKpb_TyM>C{6EKO(7>JOrT;<~L6sEGIZ%DXdKh$BgZhd8
z43mF@=G>L$Gw7^f&|3vM6jXf<gX&xm(wxhnyO6<PHiOYz28&4yru~doHDdk~{c1KQ
zPdo;xkD%)z!3#?uYfi%Yj)e9e_G#K-ma|CSzmo~lIT8y1)knM@pd}5UTFDDh9|?sq
z2!w#veshN~$YnCfW`IWR#L^f9G8iNZ*gRK)>Z8a5>{%BX>YnrV|6p1CpLY$YKH}Q_
zAGtmfy#^YJl(-J6k|b|}x=VtG|1&K5#!&fzA^Q<S`eTNqdkj%G86vJTL|kQvzQK@q
zjWhd!V$M~8=pzy?eGD2!3}W8kwF<@zJSL#Z2vQ|+8G$Mz4s8ZjFk&|bEq>s!XApE@
z5Dj6Hk7HJ@Gz(kfSA54I^_))RZ6*JUg09bm+@ABe{^$4jFW~lH$m745CjvogA`zee
zB0m2GeE)O0{bzRn#OV5g!Q(1}-))A-Qw&k3nPU$!gzshuSjpfzkHKjQgUd7qwL=Ul
z2N=X2G6>#b5ZlVYyM}?MltH|KLAilZrIA5)27}CW2LAmFB1af_-!SmMV-WhtApVU>
z?k}Gfs4@~T_%Estu9CF>3+nt=^trDW`NpAQyHnLp$Q7&5zE3q`2^?f)f@jr6uj);h
z#T0ZVsA@f=G6GE-RjhTdSPNMr2k!xa7!|9Lt073$<5ITLt#pMO(v%Qr&2}mDfK#;E
z2T}(ilaM+HPTC`l(Lw4XNVf=52SI8a$P5vDN(ep*2WQyk%(sS241wz)3nVtAJ~GRg
z4XJ$~RT8WYnqiR&MUdJDGCBugn<Y<yGvE~vW_@Ie+A}f%FQ0?dM36p_3HUf}NUa3v
z6G7@Dcuiy&)d^WU2bm|*iR#dX^@-qva(b|uNI$F<4Iz)gVzMDqgpghmlx*1K2}WpL
zBnTHie}w2FL3b)aH-Lbz42GUH1gV(NdraV!50DiOka`NjhLC>kJN?>sLfE+KBgira
zI2qEl56%t-Z?=V>`2eXeV7-_lknO8MT?fF+_CO2yAiG&1l?$XA!cRg}f`;I__Cc0a
zppl{72Sd6Jgmxbc?%WSyK&(^mxNbh<Im4{)EKC1e?fWmg@IQm!d<MIIk)SyYb{!1z
z!3@mipe>WE+MuHw7*rS}GW41*F|7fe4kx<hKSSdOhT^vjbN`F%`)~g6zv0{e^0)tU
z-uTaT6LesjFqqA8{Xg$55E8rrI#iAG=6{YGpv#XquKZ`X_@C?c|A^!N8MO1kl@K?W
zWB^_FrsT%rw@M%i+%rnJ$WZu#q52a;|9^&=pyd`!ke<=5|BQ$Kv!DFWe;#xfhUB&X
z65yVZ^p*c27r^Im9{bOF^gsKl{{ol)i(Ua$Mm%Rhrx|dZ25&?<^q=eGf8I_18L}@k
z=qzGTn8u(ui$QKWgY+y0**OfdGeL_qROc~hEoLxS2tvlw7!0Q{ST=C@P4%eS64Q4g
zs_$UTgu{@9j-e9{h4deU?@A8n*ym8RMm2n*h;1c<N&<su5NNQD--m(E8-#eh7<hab
zczr<!)(HoJP6`4YUKh&16Tl#r%pjM|AYH&9k;@>Q&mdjS;JQLAW<NvtK9<Z247Ja=
z`o1$Q{?EJaKihWD(SmG8{&Su9&wc(s|7B1$Bys(})Q$hrH~-6C`7eCxKj*^#3=Lly
zvY#<zJZ4CI#t`?IA>uAW_#KAG`wUSJ7-AnX#64n2c*GF*fHCSUL+~*Mr$r1}T?}GL
z3>+a0tey<)-VAKujvH$-1A8h1V+8|8Jp=c22I2V(!lxJ{&NE8g<57Jntp8WX<G-Ta
ze^I0Ve0u){4gU+8fUaT}xBoBU@L$^TzpV3rITsL8a{jOA^k2algk+umOFR6Rb^0%1
z`(McZzo6ZJZioLIw*OgN{xdoKXK=aAV0nSTb_s+1G6w6V{DHf9G@mfY-r$m*#K70g
zAYctzoz7+m+QZ9W1v-n5)0~0RoPjxzK_HGnU@42#dIrJQ3=*GMWdHN2gYJkIH2N=S
z^q=40KR39$q!D`4H0hZ`^;*Xo=sE=7nzg<)Ya#Ux@@3x;5%6NU4ZhVIe87vhAsbgA
zB&1J-+#v$hLFH>eB)BGmS3r>MtIptR$hBlSq&|X>@FfcH(K*P79HbuvVMA&k_!u30
zjhsXNB6uAHStbXmj3Ctz%DoS{3m|<WcpU^Ej)OBGRgwj`N&<D1Ks}?Grs*@xGG>}(
z%(O|L0YTPj(=pd7K&l=Hi98}_kvs)7ACx!=UL_$j;FS?*=>oWC1RtG4u8fRhx=q0d
z(kFtDhEeTsq!-nu7uBW{)utWU3PCy%Ejke`df^~s5Z+=C-l8AYjEwX`n;;0Y4nh|3
z0Iyns)<odFX6|(x-0L<#swB`vQsZVwO#~+)RgzaDbd@+{-JMU<4$y5uO^{k?8|Fq@
zNYw)(!SxY{0Xwo4Qu{ziV(KGEr2ty=+pz~y69pkLAR~K$;G3A>6%)ADI1mV4OdbGU
z!UrcIV}lTLh#}#%B-|ooMsVkT2ny=h2SI_I`+Zt=Y4_YVo%WPr&QJag|C!sLGnh_g
z;D}^k@MU1}1C8FWTQjg(fsT}6*JToS0!?0f&h(!Do?+>KrZxZh*ZpT+^q+abf3eN~
zmCpQke*0hh`G47)(E5n~I_Q{g)+_&+FM)Q&a)A$06SxkpjKDi&*)IKOx%!{?-hYAm
z!wd{o-~;7gw;D;<=m(VvL~Y{?UeAzlku&c;L+ux?ZqSXi91H(5t^d!t?LWiL|E&8#
zl@ZT5P(Mrb>VK*0|0S=1&S4Nc4?4Jj`#7jF;yn4E=lp-cEB{3<|K~sVpX>C0&QqYv
z1X+&%XFK|z<uIsrV`zUbQ}T*4@j9pf2?o3E44Nw#<mNI+PiK&s!JssoL45&(@e(ka
z#-P*BU|z=)G}F6wJ6e4d(tj|x??7<x;gCL11H-+3t6}n7Vb^*F%~S^QV9-hi0bd4w
z9}t36NtpGKF!;U+;RH~9q)^HrTf`t*$RJ(L=)MY6ABF8@N<YU?^OURaJL97NeCz+S
z@A%KX=RanBBzNb(>}^nG1iBWy{SQO&3x>=m4C#*;5}q=|K4OTx2da-E?lHtZW{A4S
z5P6p&>JCHXZHCA*3_(Zv0ypybuNQUhXESMFRn26OjAf8YW>U;zQ)=MVZ0FHjAf&%c
z$Z)T?*&zvo=aPmmMD;$28+;Kr{x78WM?e=eN5iiVs*i+B|BG3IDkDipXniE>462nB
zo&L)^fNp${2UkY$`bgN}zp%r9KIi|uPXF26|1&!NXK=p7V15o%8QCvmuwAa0e3n=1
zDFf#b28IF#hE&j|07e}KUJKB<Mxg6=&B6U^Z3YH621b7dhN%pqs~CjeqSZ%2#-NG`
zGIpmCdebcVxns>bwE75Ixm1B}{dNcUeIWfHh`3MHdLQuGIY=i6-Wx)zj36^XAQCnl
z2Va%|scu|Lmb(-$gOHGgaF7}Yjf7M{Xe4A@4tYKZQU^i0Kae^InS@;ZU=Kd#6jx;g
zse>SML!b>rumL)V2&5)5%K+6%Ht93q*W@ABN62*$WONQvADJXh0x`hr<<JM^z-#9q
zm61vO1mn1V5JFl$2k8w#dPbmyb5Wg;`Uu`9GKlPeR7Uy{tp<^x%19Sh6KO}bYJn>w
zoro6Qux4Ex)exjI(hF^Z&j4aFAe9js38{~et0c5q35^S>o<K_-z*}fx)f1>ng0tZr
zB@hFAu_JPQ1e#QWof+=ew#%nw2c&L-kb&)c1KRckb?k$b^T;Gv5VYbBQl~&jNbLYy
z7qJIC+_%@i9fbVa_Q23Czcwg>*|Hl_(GVn&T@KNUi-fGi@N7Tm)p3Zs=bp&a=lsk5
zi>&?++T6jP$iS`5z^V$m;(<kmg-d~fK~z@3eDk)mDsEK_95%ciFLhV^XIKbYNWi@Q
zKlk?kOgsOx?Edfk^uN=S|Kbn+3*G|NN1y}iuK#Dd{-6KOe}TK8fjj=2{{^pu?mpnV
z0-90cy!4;r;(zTM|0Rz8XHbd(A4J0nI?a_qkU>nJ!EqXI$V!H=gA7S$8FKG4)PDq>
zk~bA>?<&we3!pip1EBk<g}_%@%UlDUz#wxKG-m`^-OUcGj0C~U?F7Lib-ZUlXNqzh
z`_FnDgqV;0XV?dFJm-P`(ue*_?Ef!+;6KmW{|pm;F_hkC2t3B%w4K3b9fQ>x2Imb7
zzPmsNi3e<9h+E5;v)iTNpnvz7sNO@-eTO0Yk%RjV1orF$4Yc+i4sPA=SiDv<Yyz8c
zF6ed%NCkvk8G$-Vz6|_+p#G6y0E18<gJ=kYcr=4V45)u3lgS`o!k|>nAW_C3Rl(rC
zQaJ7aw0~6doTu+6%cB1R>;7}^0Ig@>Jp7;U1ZWUa_|kvLtN-P0{a3&BU;5mCw&njB
z+P*X7ePu{{%aHsCgyJ7EM8j&Ih<l(eQp_`k*yjwfuNV^EGQ>S+2!Fuf^_aosE`!rs
z2FEvS)}I)RUNR^=WK(~^rSXkd`zNm^XyG)k=6_zz|2#S%#HRyVB*3TfUr_tMpw@p;
zz5jxG|3wTzr|!sD{g=1>uW0{Y#TkUuT>h&%|5tPPui^Ax6^xXfK;nvy|K;ufOWXgK
zariF_M&h>rCGG!<+Wr@?_|Iwio5|>lpxYbykYB7C2N)Q-7#O4(7$g{&d0DtbSOi2E
zxcM141VM;dl7Ug2flYvcQ<#C>oI~E9N$e80{B0Jw|9l$%g$(}-8~qnB{m*X#>Lm#p
z{Z|jVW0v^Zxq7{GHE7e2Th&_cs@2|At0DCcd<f1BHY)_Fal9+n`Bbg<u3YC?z6OFo
zognxftl-57?xm~TOIN`wAT)+s=?d49<q!m^iI7Qn)ngCduL|eF84z*X{6+S83n85#
zNM(dv0m0eGJtKI11n(I^>L5rD2trzB%!DI&hsX>pXpuexf{;(%hTI1Msfm#5AW#Ja
zKIg{-b(I3B`x6gZr~sNF0y97r5V#rw)kjD*5%LN;NIwWt9~nn=8iDGdPWb4YL1Y^Q
zK{`RYVa<>}k#=~qb~vaSg4aawsz)!hNhi1g4PjP8a1pec2*Sl$A9;co;Nj~ufh!|;
zt>oRf6<!&k_mv=L4Z%7`$i=%~%MKqfLKX~Y+XF#??Ry~z((efB*aznB2Hj2zuFjyk
z+IE5Kpxr(#JG`5>dpB?QY1xH=pcZ*!OnZT9Ox%K?74N8CL-r;b+q-$YSJO88+U;H)
zheW16S6uW(c-4P~vMUTqB@A3K47|Fa`iM=AfmIfi!5O%;^<4KKy2+)Jz^$98GXKBo
z0`LO7)u0P&*f;-Y-UYhR#`XDst2_UN@B9~njnQ%5`Y-(CzwiUlN^g!!pd-G8Zh+42
z7P<kdn0T-KXTR{@<l%pY(t`|a4h)RejG*htIT-jg^t_UJf>#JetYwHl#R|G#?*m82
zFNP`q+2(=HBVyYG>KU=@{m*{%Kkw=PVpl<fRI=Cq%U%bqVUW1^pa0B%?h~M%5$n<a
zyk|jOC&5dgF*?4ppqsL}Pl7HCVm<z!1zZ_1>;<it7CHT2^TL0vi~o%-gOJ6E|EgR6
zTg>^d*7aYu?t^sAJMpIX%6-4prvKKP|KD=SXVnS!Jz9=>x1WH_<puZb3+~w$*t0L7
zdoN^tqi6M2qm+44?v0>j45C2{pwT%`1|ClkgU=h(MFQ1L;L1oS5L6in2SMv2$yf%-
zGzOVWQ0GXcl0mVaLAIK~X^BAeUWV{}p#D+yGoHSm%nMQKBi`ent{<d6lD+v~{?dQu
zZT}fMe=(FiWk`L)ko1xv@c{@y>LW;%6mgFs?g>N2Cx+~=44K~;Qa&=oJ_FT99uFDp
zZ!p-tV6cD1Zu6Pl=CiQLS7DQ{qQ?J)4gU-3f)J!W;?e>QeDP?37Ewd$BT>Eo!aATX
zlCS}&K9V$t)<=+fN6iIvE*x5Yr0n!x*%?$H$v7bDBT4)JQV!6*m8k1~0f+x0p6_{`
zUN9)`WMG}kz@^5(AO&i=F!3<3aDXQqIT%=Y85jf^KuL%T+|kx%5O81+I>)1M2f04t
zHv!d0LdO4vjQ^_z-Zo8m<y^hqp<)eWQ5<r8<XW*7QYE>9r-UGtk$c5jFEE1CL=e&q
zGA;+(G2~pb(ye4Aq&|Xlh0sVyO#~s4Ya+B>54`(>#(=MmgH%Hh5?%qJF(7r2eGceU
zAIO>8kje;N6Tzw>&~8?6S7;`r(y>UNVTy#z)271_T76^*J5d3x4uaQ2#^7sPO~KPc
z@M;KBF&V}7LFyx;*gg;&Tpt<5K{vF*8Ibx2+9!%=htx;#a~1R>THy#%CFw-8Kq@0@
z)kjz>Cb!!45CpH7APjhAgvNl5<KbE#4z8zA3Tnt*!`N=J0G&sKQ6Kp>Z-;b1u=ZRK
z7q2&hu8H(**y`E1)eAbMwavS6t5?Go*V>IPH5)wZH+zA%Q+YLP@vPtMUc1q)W`kSJ
z21p=4atbaIv>Ci+y=UzPP?cS^+Ov9%XZ0Gdnzi_mTkU$6>a{v$>m9ldbIkcHw(7s~
zn*R)8o57c{+A%XrfN$331zm5-z|O!X%)lzbEw0AEpde+NsIu_0#!}FD5&H^I?ZC7i
zbXbtd!~g0JK{W#3P0;FY>0AG0ZvR(3_h0<LfBwb)LCfJ9zktr)p8Q{6?SJW0|7Fko
z7ry#m;n9EJeg7HkXR_#(Flgp8h<h>!I5C*Dibt+s2;RmJcaS0PE@Sm8hR)v%6aO<T
z03R8#@jv5s(1Ab9hyDwk{V#U;ztlAlQo8wH;rf5^OaFz=g4Q;$9tG7$+^7Ew!j^V(
zp8@TJ<v9hqDVzJ~f6gPIN{wL)XgLJr#y8P(p!E+D=l;uF0-4Nx;y>rH|E!1pa~=jE
z*2Di<4}<Q`7CiZ1YwLg32~TVqj<|H3fh<|^>)!3xy*sFTZ$Rhn(C&kQ?R#x=mn#Hx
z@t7Ag$VGvw8y;^4ZZ8H7cLpv`5QEo;fe+R>5)NVz31$!t0|mZBJcCps=m1r@Tn4!^
z2Gv>ytxgbfUdk4+iy>+sL*`kAnnyhSU)dJ^7hd<DXV-th{r|a+fKHATI{RPZ@_&Vk
z|HY2~=UMrmq4Nhr_G^ZeCk%;?K`8DysJe-H$`Jj8A^al<h5TiW{x6jHpF8S5hu?n&
z*Z&N5{~4_Qvl#wo)cen+1wuSp|9Q1Rh+7lX34*ZUq=5E+ey#s}nxI`gTpAz+f;yme
z0s^|A#nr-w|0T@O>myZXP?e<W4C*L>*v_EJNZIMXf;|W+I{b$qIfwrcr0D!#(dECK
z<9{jp|00(E#f<;+>-=YsImN)Sje#MDfgv7z;;S430}H5n0!=+a7@Q0YBFy~iOrkr4
zR8O$T{O42qFK7Vj9|@WK7c~AaVDewk^uMz2ZIie+&eiK3E7y8dt#+?m<yE=Ht8xvb
z>OsE#8&U^(Rjl<aU*l7;)*IF{axY!wS-u9+8-g$(GeMAvAxMpbi-gPpxfU&TMcpoh
zT>HSQ9;^)b=p1By0%YqDygqWsoo5d^Rer904$AecHraEmvu48)yf*}|h9LEhMfwZ~
z145dmPJ=TbeIl#0DOPDyAm?vG`bUuZ2!1;r+68%#3J6jkLFyn#56Bo+2f=F}$W~Sq
z5_U_G0r+SI&;mK|A*isG+tB(5QW@!mgC>m7>LdI;BkiDi?Vx(BWh|r+#!5meBe&Z1
zkZK5xM5~P8TrY5a1S#_&Y)Ijc++%{Up&OTARR+AOf^<y0nznhvsw#*GgoO5eU_*JJ
zo)p*w_}#>ieZC&m8(eBY$hCUCYxR1!nhmbtjNnkQ#;IbBbLCp+%C!z<s~pN!*_W=g
zEnaS03_2Mee%KTqhJEoe$PJJ7g^M7_p=hyV@e(}9wq%)Q!6K!cr4C((*=K)}So7a>
z-+zYCO$^K~;NuZQK=lz5KNFWYyQmTavj8Kn3<K!4J}CwvGlq&&lG9!=%>U1^5;SDT
zvGu?5h5t4$|I6R}FLMpF^;P6Ps50U@`JZ9lUxvyX3<+l#B9AhJon#0-!4Q3hA@?3b
z_Y;N%KUt1}E=OQ2I>TVnC+*rT=h3I=UMcHbDj2*-EMhrB)Lw?PlMEHl`8$6wO#IId
z=^1VK&#>)3^G?vD5BDihWh8MGG(shD<-gq3|1y_BmmCS60c|ycY>XAU1geif`(Drd
zhtx;hCqbn**AY-P#IO-O?zbDXE0+H>xH7u<U*g<<(bNCg4*zF4_@DUzs7ew%{a@e|
zXu$yI@&5uR{<H4=FShV2L(?^zh9hoW=iv2`Zx`rT4}Z{(fPJpj8+Bu+3pi9UXr?kq
zgn$O+c)US15r;ddO5*ln;PD34M|`lw45A^R%1APbK{5ff8%Q>XL9u{AzMMg$fx&nZ
ztK$OJ@SW_DyBQ+)GUi-hsCvNB{gHLSf4-IfIktf=nc_bBpXb<r!88BGF8&uj0<N{2
zKQZLr164-Jj~Qa`gAlYPdcY8NpCRxyL%=Hrug?sgpBP+zFxY=)u>8tk{+Yq#KZ6OV
zGD5D8Al)A>jsKWc61+a*1Mf5C)&#Ai76y;k3F(76QxazX<!nIXb&&p%y6b;+S5Qr)
z><A*&T>dLMf)J!WlD7j@Nsx+3&f&kTJqXD;fa)X2Op}b`e+}3FO7{OXT|diNy^uCp
zz#`kpAm{|1Hev@~^$WR3ksDk$i8Ax6Gm2~%P(H>i{hwR;zkoipJ`ytd&u<K^kBnnM
z<8^rIBgieQkbV$^^e$fmsgEGl5S(-acZ(qPksH#89A<q4=@Y^0BNwD~amc$?vDQJz
zf{>~QG9QFYLV7^RH-p<{&w+65vgaVzN4P2@NY!JWHr*_B8YY5XkO#S~HF*kXSpw3~
z99lJmIQtYnJrvgu=?%g65WyKJbrARfI;aS^K7w?JAgBGnY9h#nR?u1n$P5vLgjYsd
zVNE)rAOxw8kO$}>br3R%wLXFfqSZ&PHS1hqYhd8D5}binF`-Q`dDd@6tBi2fN6>|N
z;MxRU8NnIIwU2+(cK;^Og@6HVpi|3yo59@1ZIC0%AvF=`;9u}oZjY+9kjqLSI}0F3
zPC-V4q18wJ0;j@7PKAr?^XA*;%(cy#3u(RBXV0|FoK6tgXU~Kno75?`X;UG{E`1sv
zWR*U{JY}kM<|4bEqYSfu@~--?yX`+i>@EgwUj_zy21Y&xRv~r)DOoiGaYb_m1~Dcs
zNe%&d@J>~J2H_~f@_md;{<E+8&%F7+;KBdaum2l8`Y&(=bS41T&Hn=T{|g-W&oJ#T
zXrw6MI)l$shQL=0{!bWuA2Ime2cd`?3@J~<I({>iJZCVN#-P#1U_FV!W-^1@B1Z4!
z4E`G!!gnyFUtueK#L)Z^bRrO>?%VpGaVKcHhv5)t@J;mEf9dO>o{_?p|MHhXMX}%+
z&`Ck;r$E=Kvz`U@zl5*;=e+Qr<2+QH{rG>56QFCd88(CWF0BULi@~t#Klic!0;m5A
zp8YR$4uk~GfCltf4*h323R?WYeHwH$E61t-%t!z8ANkL)>A!N%ZPB{3ZmkEsI*&qD
zut0i7{%!mH+xGi4>@m+=s^HVbWmdwVl*Aws0@_=|69C#?#O24p>C3?72V(OCF!1_-
zsw!ddNS#<XgH$wwWD<jH8iQORgK7nXPCJ8fAA|J_uGn3?g;yA24=_aRX3IIxQ2UUr
z^DXPVf4s{;H*0Y2hTc7=eB{6Q4)9&%wf`BC|1d;<VF>xp5b&4L<3EGje>SK8%=Z7;
zZT_=Z{%1G(&t~|a!{9%w?teCI5aQ7N&#v>IT^EEnb^deefCl2Yv_M@RZdhFd8J&Z6
zj<o*sYW)|``p>TknwsI$_|K~j;)14-v_a!_Li+#3P5w)n|ChA^b$L`={;RtFS9Jj)
z6-YJY@E<f%=L9AlL3Ng#{eNU651vSpb@&hJA~{26mp~Itj{n8&|4W1WS(4`e1vNi0
z2;O4gp2EOT&%odgO1%v73=E14415gyY7D~L`IU~b$$|Sv;3|^e1T=*tZ1P{s{J*mA
zUE|od&ea<oE7!YMu5zzj<z2DbyJEE`xcdXCi6ED*LPq3Vz;i>;dso4$6r9RdI+d*i
zUH=WfcNLPs9g#M%qAgB<b0OIu?LaxF!X=RTAP8xnw-8?YK(?&HNk~oPkh=iBPyt>a
zfh(gqHn0i^a!WWe2`O9PHIZcoVt@`_4MA2ZK<Xn4@NGoMwGyOHWC^a1K$Q_}$sBY<
z4!k-Je%2iPf;>n+2+}8lS3q!vVQe?Ne*`%M6>{7h<eVSKR1suQ&M=}Cv_=j*MhEW@
z>4igAEWqm{t*|Bt(g|(Sf=v}6*FF%oR$v_*xmK>hM3DLjjdZDA>r%ZIQb|GTCU|88
zXW*=r!1WQNR)Uc5UJ`o61m08&ses_qOvnuH#;wp=2|NS`R|)D~)q(DRgzQ#;tjTe&
zTmxC#=~%oBvMkd&f01SG0?X`q7Flym(`T8a%`{4xYM4C51cnR~CK|*~Fo>U^AJ?xR
z*N-di;E@Pnz$02eq8+^21T@nJ9cu-*D=^vmF}>Omo&2%W9Quwk%>KzZ?~ml7|Eztl
zMZJ0$*gQb>5xWS7ptQJx7Q3K21A{07yBGt9I0KUiD4R3bs%CHE+X$+UxOV+ly7b@V
z$$zzLpvp-4{(r#-;I2pS7tn#ykq;Om?lT0xVF-H75cC{WANfCI2z$Yh_KYF<F+=n@
z2BW16`coO~mVmb2n{+c+Okr@E$KbUZR3ByE5NvqM(*IXzF6iKH-gW<(cKv7B^Pk}m
zXz{klRnWCc(2=@ppejk^Jg81#KLuLF&3N)Z?^RHJ#0Rd7*v^7h(=i|W&vgp4XBIR@
zw*Wl)2R>zpVefywlc3Ip;8{?WBzP9IFpvExXmpO_BxofbL>KGv|4c_f*S#^!`5{<!
zQmFcrNBd!q_QQeg;90`X-JsJv!1a-P<yNhj8A5h744UZ-($S#$NGJ?ZC2{+MDkBKV
z3mdf)4g%c|D-j8*kEBx=6p9%%su@iBS)JxG_^*-7IVoCshaurGL&R=|l;aF#w|FOf
z;a>7zV&i{-L;uB2fa)X0Eug!f7`i?%<h*A{c+3#`4s`l~>n8@scMP`QK*;hBgXs?j
z<NplC|C#mvGwJ?k(E%Y)HKYTsjKK8~o6dg@ZBTv0rS+dv6VxZ-29uy(5_q<VSNlI7
zc!o$o7qozZUlUp-q18vi2B7*#+Ty>QElPc)0v@0Pb&J45bC4=Y(H>DBftKDmfEVDw
z>LXAW$r&`YB;)X39?TH82USUsPL`qPe@(~#+D@k>^mhm=mNRh1gYU~w2F)e$sxSy|
z=Tkh&Ed8Hf{lB38e<6eae8!;qNXYoVsM&udpF8@IFC8n_J5;Q5t61q)u@Y1nm92zy
zc_1~BQ|T)B4d9ULzd@I=7DJCsa41>sSPHu5&Y^e(h*1I>)37gI2CsL}81QPyp=c@Q
zxEy3A2u|98uU>`MK@f&j&V0yyte`Qv?0JwWA-n8(kWH-M%4nVqxD$k22SM1@88bmw
zwPwx6Um2N!D<BJ4&j?;0nWjvIAjsAsi<C(gDTu|}kje-&C<m*L;1{z(7|5j=wAX`L
z2SKVK$f4YNQ60$j5v(`Vsu$j37}07N0b;-_BS;+tTE+nG3V|vhSY-sNl7btkULQfK
zCsOMpXbn}n(Y<yfq$Yxs$W;=YjkX>SRLg+pm>>*jucrZeyE=&5u-T_!Gw2{ASU<?C
za*b=ra?sI*c?<1wK?%byXO?y5bjWgj8}K?wi^K`w74=<)QSJJX&AQ=@`r(awVGR%j
z=}|)JZoQxy-N0&{fGRvlH?Ueauo|LL%cm5Abo|QkBX$2uHNOhR;2ygPCz<E|V4nMf
zbM8O!)&CVr4}eYvU=agdKg7z<z{=0SzyrRdn}>mck%>)OTHb_#JIt!*I;a3#^Pl_h
ze~kzK74L&iPE&XQI!qgM_D<DHhLnd4=}#Eq9)MO6hu>!izt0eSg(3P5L)I&Xq(=-v
zml*;tF?bweFj>c7w2;AY0fXgY2D=puW=k1t*D$zlW$@n35POcN>^b|y|Ki|<d~6&4
zGwk`#eDFW#+5du9|I1znjSVVZ{jYcxbV!}(<^R0kX(Ntfph+U}OaEnW{O7s!pY6<l
z_Ot()PyS~-3OYJf;uvUv3v}e&ba4M@J!lGv?ZAJ*Q~!m}{1-k8LIP+0^Pc?AdIW^n
zPJ=G1<v9gflE-}rw2V-A;eUpzn^M`EHLDImR;`4zZ4YVN9@M%ksC8Fh+kT(MT^5<k
zWqiB2O-n(YBk^bkkq8FSNCv@B&~{h8AW+4`8_2*N1VX%i47`2}eEtlAL7<~kMWPub
zQW#`28C0qm^ja9KXD|e;Wym}sRCkxV?g2y65r)WpLd7>^Tfb>d`mZ<Zzs8jRV%`5a
zYJUo6{bvaM&0zn7!R9NI`F}>U|6C^jIgS6b>4ADc+}fbd4iC7q1DO?qR3dPaPXkn^
z@PlWFAj){O5H%6JK7vSrDkHuB0$>E0EfUlLO(F4Xfo6_`wf~Fif=Yg2@M1Pe3($C-
zg57^5m;cJ({*j6^sCrU#fDX+;W{VUZ|0{qejN~2vBO}NZlAHrb5ZZNe0j+b8vHve+
z`(Mfq)M=7(0EtN1gC>~7tp9Tx{O8kn$iR1;fuWXxp^Ab10E^632Fd?CY9KcY8T=P8
z0?i)@8vPeG`LF18UoZTPL&X}0iZw3fi(SeWdzP*AC|!YBAHfgsaf07-2fp6{vSk%{
zk1B)>sfHl64`ewUgbgPl4OB>#WCJQI7C^c@$Rwo8W0$)CQU_Us8CE&-t#al=rNH$O
ztPYw7XCSXtut=W)uYI8Rv?A3<Xmdu$ogfI?6t-5uG-awu@)QUO?Gq(UFiV<%ynY*0
z8O2Y4oHYm8&1wvu8-kFK#xuMV1gVmc>mW#12vQU2fsXfS)s2KANY6+gTn*u>k3iKB
zxa!dis@Di<fFsSIdQI^3kakeLE)3}e)?p&d&Jm^vq$WZpU8>hYx=3&m-bF%V!0RK>
z+5m8^<PIj`wGxB@ua6+>A0V|4a@Psc5Ato;3ON84a)b!v#8=N+(5eGhSXaoUc$r=9
z0*mxnCdt!GlBXCYPBKpHH;C(j?jw%r)s5)X4Qtg3Zqf*>Q}eG;@vBhuE>-d>QS>ZU
z@F-IBELQX=Rd6qnbt#l}DO7ODlXJ=;h~%Af<(+fooN}aX(`4+@AxO?46AzMf%oelD
zU~sCo=sL_i_Xo?opWO5R^Dg`^oUxffA)A3+k^!_%i4%O?0}FVy5<3F}gQIKI{KY#M
z%$q`&e`471pK%N5Itk_5psklu;A7Jmw)|r#dCZV{g(2o9L)c}8@SC8GE%DD8q8>5C
z-Itp1UuNomhTOLdrQgJQ{+rDCZ#3(_Y5O;hsFMsf%NPu2GZ-%dT@vcFiNSq4L%>0X
zxbqBU&p9T6W~q30fJW-rkN)Sm0IH85tGgAhfO|%l{_~#&%^9&C1yx2u7ye6M11)S|
zKMSgl*v^3JBc@}Z6Lr|O{AZZ*o1ynF!+h{0%SKRr#C{N58G-8~k#qkA&VZ(qSPnz$
zBc6-@`ObhA@-b}t&oK87L+KTU=v|^|>vZdmLF%KRmaRc8TZ7wn2et0<ZQAWnx=tf<
zI-h+lqjolfObn<p5{Y1d>?#rp1KnpN5CW>3_`?|Z!$36=pFgxp;tyjGiewN;0M$nt
zbqpq53@!_KVs{DF-cg<Sfv@o~L(Vy_><dE0H&}D;fsWI0+s|OX19ZBP(K-gL3m~NN
zkU{S`gYFLoy`Rjw|5@}v-5(Ym(6R)`$Qz`O!v!8>gI7k#3`jKusdtb`<SL0z2h=};
zkb*k@h4nxPGLa;x1FDRW>mv#C|1wtpA@z|GWU$T|JU|B?rc;Cu**Sw|kz`>LM(`>L
zGK&PQmFz*CCTT}d?IUCVU)l~-A4%E$mw+JK|Kc{FDoM)zzn;f`Rr?>x_Irf&*Dy(4
zW|O<lAn^~>ebWO?X@M3q7=gAR2^s%a@Vtk$K7z~yK}X?AK*Mo}Y6z)50*!kVgYLG2
z-*5+KK<XVxO@v(aKq??)5;7u(Rt>?qpo><)6%eEvf|HQm5TtViuZC=3XZt|vBk*E5
zl&%n@K7v$F@R|sD-2$WzGE1EX=?6hb$XW$RO#~UqL9UNX!K>!rBxtz;xPJtxgCHcl
zCNhrig;Yau65a`dFd*kDKuCCHq#FrZt^nx`>4r5!_O!zLL@3=NaDAi|46TgRgX`6U
zL6s4_KEkYnv;%74$faU6Cc><e;3AM(39~+eR7_}<5rSK@0a6XYNk}yWC(-I7w8<vV
z`pu9U%BOw{q&@;&wpFzrv>Ogw4Y?FAaV}cyQn<(|e}QApT$}V6R!LLL;wG5Jbb}5v
z4{O#7Zh-V+wEZj9yh>Ev3sl_;l-+WaT(gy&(&g-uWo?sWY!aoc;-#$<BrW5_&10m@
zV<b$YBuv7@jYA0{36n4>lQ2o+P+>h^VLe|FeLsRo$RLPE%ZEWD!LE5X>)an~^L}!#
z`7gBjKU?$)29ZS203Cw}0|R7^7_y$0k%56p+A`ffwws~yvf@h6N&u!K|FxchcE9pm
z`Y(3pKmVrx3>6O;!X7Y$KV%5F#o&JvG<6>Pj3NFpL&6h=DgRAR{|8koOaJrl`Y(Fm
zKl@G)65I(MIxBw#I#knc1B1gR2D6o*Q(HavGXx%I2s_1)affljKiM_^#gBu=20@n@
zT>;fNQddEjy~<wx&vza)s>TK$juQtjw}Y&*V?Ont<uqt;kLCD(iBllE7`lHj)V*ey
z^q*l4sE%XU{GVz6f3D-8IU}KS|AoPb@6>;;W1#gCtVcjcH}ISWomR@U^FROW{|q_T
z80=>=IM25#SntzxFtlxVXxr|fmhC|;+e6y-1vKw+DcfL_GEds8i`l4%K{kOwBpg)5
z2uFdcA)zn^L1YvGy52}I6x3bf4r1U5X5b2C5R73E&18@%V9@Shu$;yaw2344EPLA<
zmBs&s8a^;YoM3a=!C<zEL3SI1^cDv3%M7wN8RXtEsC?m20L|BPD*xwJ{m-KQpH<^O
zyB28Jja%bC4|qHcxzgd+{4WSb0vi7XG(bCs1i|OI@oRw=%khJ|QSh1z(w~Bp+}fZG
zM?A192~rtBNJw8vSogn(-hX~Q@M1R8|KgVarENjI8%5Xu3NHVZoc}Ak{8w@Nuj&pW
zm0UoZk3c;p7tr=tY1kGddFTI%&LCZ~j^JTB`~Na91nCV)*#8%|`!8V&LNa#$W$gaT
z+WwcZ`7eQt9R7>h{pU9N&!qXES?NEw)_*ah|Ki5~MNIw+8A10V$$Q+@3x8u*w#Ke(
zjdR%|m-5AK;A0aYwU0wFXdn)B!8iD_Zzu4js~{3Q90#d!kXISm7cN1b4??bV(9Y9_
z)Io3(GX4gygCNV~Y{8u%NPPsUeITdFK}bj)1YtwQ{~;u#CPH1pJrhy^S!RGP4Yx*}
zErQGw!K)$AB05-y$Q(902VFG>o>MSNoMZ%>ErPCF0I%f+RYUO2MDe|lE)t|G1h0(X
zbr57k4%9P3noWRAG(b-Kfovv1?h`?JMvyuPa<l@3ja(T)MwYa|RT8961R)_~O=vX{
zybeNUU{*#D5qJfJ#(>vHF4b$DtJXlOA!HI!2f<c`gG3-}r@=iVc$EZaAlFKeC3#-p
z^?1l6q=tgmN1%g5D%QG{uW>3_4r=`5%!S0YMd~z@_z9q{P-M4WSci5{qq<+Ms#m#^
zdx?Tep1gCej6;@~Rf>pNys&AUkZClZVK|>&Ft2V9w{`#oacKCmtNXC2d2^_Gv8#Bp
zE4#8PIuk@}N-i8qF6@fV4ANGNGS-YT)=aWC_z{Dg1A~MWgRr++_F~q#KNzNe;$8RO
z^xA*XtnCa8UZ4&UyS%)L2@9trXbCM8b1^U|s5liUuKcgG3Ur<}|Hc2x_d$ya1h4;R
zI`N;O{XIj%35L-7p!&$`3aCnof5DLOk|FUack_RZegE~3{^#5IpJOwqRsxN`Z2(Qj
zb8Ps}vEjeQ{QtZy-^ELxGbdhV@Y&B`v6{hfIfL~^2De=do_iVM&M>rpW!?H;>ePSP
zyZ>eGfQI9wuYne3OP&WUoZ~zNni~?l2wH0=ei=L&bml+nng5I@Kqsm4od3_g_di3&
zGls%@42|y?rv7JI0KSTS8)zLJ?<r7y1X|2+7Nn8;_<v6Dp$)7@{_~vv&wc7Y>z@A%
z%l<QzKV)!T%b?!O;IhD}V!MCqk&xD1koqXFd0S8`=!!$@+~um_6M1cF7*tXi#3C7l
z!x#j@K}awZ)F*<}M?&EYLgCQSIgtpEIBy83KH?4oT`(b@%^+LIpwr1<H=7}9mr(h2
z;YmN#mi}ige#~IGl0k7QgTfRBiH!`(dl;1Nv1&eL)cM1t`xjEPaI603REE|^Y?`1d
ziAx<(A3@e0Kq@A*`bY?Tm>Z;z1F4UAwV}N#cufRZ%)kvE`r`u+)(L=nMv%%#L?3ie
zHi*>wFQNxp$p+edYWiQy0#qN#fCtt<vp?X8AyxPPn%<x)N!b-tA1OOQSI|j;yFAkN
zph+Ug8U{#xByESKJ`%J2FA1)a;PsIlOgpI4WCN;<Bpm)rI{X*0{x4<<svpITq4kll
zF^DT{^k3fNu5Q?Cczxttw#W^<E)HHFK`JAAwCh%jmq98c2iUFOXm_nbx<$zK5xnZb
zWI(z+klF`MLaH0<tho>dgoM{VmazN5A?NwPNi*<`c(85}bUzWK9|T#b0J*0XxkCgw
zg&R^IA@_!m>mWEAQW+V6k6AEHm}m^Stu<ky5x6D-?PiTboUQ<=hLBe&K&l}K2^o}w
z%xgpHAlM8M=rRaMR|rl*svgLS1)b0)NPPrZH3ui*GenR&2$_T|SAf(<$h8lgt>s?>
zL6F)9KMARloU7J=Dj@Ls2M8Oo!U35?u9zTfP-RpLU0Ds;(E}mjl@S^PQXfHTA9#HP
zudux9HoJqnMQ-JwNg~k7OmIzPkv0XirZKKdFRD#1tU)87O2e;01>6skcg#|>OO>}t
zma&YJG>aB82;$T6<573xR&(W1a^zNY;8L_@m9b!!GJ_x{NmC|CV+Jt;22p(mQGF&+
zeMS*IqL8Q#gNQb0K@q<iloU{Bz=H&J8MxIMm<^TvYkB9t<zM)oVab2t9si}Xb}=w`
zFfcHSifXudXE3rUGccL(x|A`PmospAG1zt5FL}?f_CM>E|NNK#%RKlma{a&5-T#W`
z{xi(|3#xj;AArW_!fr4`U1zBL#xU(4!_>bFbN}=21s%e`vhhFj22e%7wBbMVM$l=f
zpwnz1$BC~1PsexuW2pYXTKAcu{0&3cSq94u47y7h^p`W3Z(#C2!H{=TWBPxU{r?S4
z|5v>VLPF<3M|N`@2kmkdyZm4L%74Mj|9Q@V`b3;({<9wk9r`VG>c7a5{|rs98RCvJ
z6uw~Z_|E`2<N-Wb$8r=@orzokuVgs=U+^?&WgYub(8@aY6QFetJjefYuK&-_`IW(c
z4}<aq2H7Glulb>MJ7U`oM0V^9Z`&T;wllJ0cVOdgx3UeoadSjmn;Fz|8N_2jJtNSF
z9C*SAGHoOf#2^#`LZacI871*(&}j_9;SBr{p!>@Cqd|jp5;>syNUwvzX%0iu5uwJ%
zLeu_>&HB%g|CGUE9fR~@2BlRD@~4;-&#=nB;#U2@p!$zV{XehTe;(C;Tq^&$RQ_|S
zf^MCI)H}Qy|KaruuQq7097G&K3TXZp&;%*qhxL!Rwf^&fl|dKLX@j}i|GB`3SLZ)3
zc-<YSf20RGq>e}bKd%93?2F&vKLiOGf+{1(`E`)}v%<#z#mqp9*@R90i<*KE<j@8=
zoBvW4|HZ68of<Kl|Khg)MeIRH)c(Ji!++2bL(U))RHuOV#zJO@Af&wge>pqQ7Fs!2
zRV8T)8oq<nO%ith#cV*El|(H6E82jNge7S8g1Fg#QIr27#-LT-0)`-G%eX$&3Vv@@
zyvDj@tz+3D5CWfN3faC2-9u5l475H0TqU^_FLN$h3a^ad^^tAi66CwTK^>+1MUXXe
zkh4s&u9Smxh#>V5goM;bHqhz@v_=lHGQl!?4jfrz&4wd*ZwTBWLiB?mOXeW84}?Un
zk05Mw@LF!j%|wuz2tp#Cu>cvJgUmZXHn)N<4F?a<K{`anas6-xWJ?ibPb;J{g7lBj
zx<!yW2tq>EZzI=7paCV+!?*Rq+jPTPAxJ;0Sr1kpLHa=uQYW|p(g}i-T0u2%q!U!5
z6I7!eSdEFa0;-S^WVIw3iM4uis$A_<xf;1{LavD*Y)Bo1g#_>Sf$aK$Z2W=LN^lZU
zD?!f31Fy$}3&Q7~JgPUiRjzX>UFA>+nk2H!m~D|h!z3A085zX&Xooke1=XqfRVaCt
z$h+ptIOWLLrHfl8i5W+U8bu1}hw$qJa;SMQD>^gD+Av64F-e#+ikmTr8Zrp#GVp7I
z5T6#Pj^I{h;8tbeQh_24B?e5yrGhTVsldP?&%h?jz%EA|Qf6RPWMI%_Pz`2izQ{G_
z6~nyW40C@e7ad~Yjb&h9WnmE)leS=BRxrt)Tfgl^)Us!GU8kIue{x^*oo(xX{(Ybu
zcEoRk>La;_|M_<RXQ;jn8l8)H$PjUhA?ON2(R=;X|25D3*E;`S<<fuQga27Jf^I2d
z-T<nSm^c1s*#ufR&9Lb|!v^qG<I6!?YkBAYXXyRUTKAc??z3UvfBl~S5;;#8-1dP^
zkdvOmV7`bUWIsdIC5EYQ88`kDJ`6g9f%DjZj^m)C3B<2}j_>9=13I;v13XLzxq(9L
z*nfu3rwqQ^7}75=RDI;>1s}({2xL3suK%pZp!Jc^IZ$OJa0*l(aUA>4egcGePyFXO
z4!U=Pq3jWZ<uV4bW)_u7tElx6jk}{;_C>Vs2yNLK+Oj>Wb5B6SF56ttG6qJY0tSgV
z2EGu`a2#(418*Scd{oF{Z*E@(E+5c}dcFWqw@5S+R3C{)GYEl^Kn&;zQ^{OVeWcyW
z=scGp<CJjI6Sl6;teu~PD&I5s9bu4P&LFppLGlQL^f5Mt*IX*^A@vcD>OX$)ifdkV
zl=_GdT-`usf*@54=&A+S{1I|}1R1R3)&h-dK~zFWQ1t|^jJUKxl@TxafFOQ-(A^Du
zdjFB@BS;A^Yy?uqtqrQJ_;tZEO?v-@jsJ_7f+{Rd&HwD`pp_C3DGs&&?Arf%4gd3-
z{ui?NFKh>@k3{VL3)_ND3W6-66SoH)90VGs1Fu+s*GKa9DD{!FJ)%Amw*}Ql!WRF9
zE&j_}fsnWbs6LV~|1WNaQXfe<-`5CyYgx1!QXe^#E`+R4fSg}ySF{weI03Z3uW*?|
zA*eFKtdAg-5u`q{&0ho>FacLa@MUt4U8|7Qamb_{_*_#+O$6!tz-l7M$^`WK2wn}r
z8Q^MYHiU!^%0Vh4i}V?gng~Kd>LBFa5Ts88pBsWt4?%iEkV{(O9U{;?ICxyn7_&D7
z?h}EkA@Hsu!x+RhdGMMDd2R?&`@l)$ZjnCt_Hg($5Rk*SAvF<%gzO=LkdT^4E4TqZ
zHw3ASAiW`YWu%F%8iH3$$d!?XUj_1+w{SM{sqC0+cW@WUwGw*12b=*J%7c)QWq9t@
zpe;*SswePPCTQ0Qc~}o=n=O3HEu_wZ)I@F-s~wA%+2${@2CZ0_VVE${D6U5@szWCf
zyf^_|6Dhjp$vS39+oecYCrX+{3+e?6==kwycyTE?F-u#s$(l1unJ|m%f%-B6stkO}
z3_J=9oU#lYG9bh$&A=hWz$U@KA_hXN;vhDsGy|tJXn_H%I9O1GfmsNIm;@P^_|Xxg
z0BF}LqYwjOB+9@b%)}tf$u7kZG0k??4Th;-8D{?$nfHyM>>Pt$Hu!W?QwFzA(M7){
zmi}W{@}F}fs0?RV_n&DWXn;%T#(%ym|0ThQsrNra^hJiiYYbtxL5t`TA2H1PCx7O@
z@}2*RxBhb<0#`a~{&Q^k&$;P8+j>xC1nnQK|Ie@%jMn@I^_A9xml@3b&(QUSq3$I^
z&v%9i{~0FyXKnq-5O9RSU^#=udIs-94Dr_(%AWE}{LjAnzvSNkG8g`<-2%-h34<>s
z=LOd(tcO9HScT5~7wP-V;IWp$eXDT6J%+{~j6MGuW`h=xGi>?Kw&y?p@&Cf$dnkm?
z{}(y~(!zcObS4AyanO1^&eNbhM?zEoGelovQ14>k&f_yHam!m1(7eO9VXI%$=Absv
z9yH(P?M@}@)FLJc*wiqnrZR{|GYCb1hUo;t8TbPjczhXnyg*m)akwzBJA<x;1zp1z
zz#tOFAQjFa8O9)<z#x&xAd$o%nam&!MjCZo&NCTOPH~mo;wgQ^ka~$T?Gi)yVFrcO
z4DzcOq)sx(on@AP%c1g#Q|Ujq%71P(5aLk>ksRv(xxfb|@M(f87x3Y7yx?&*$Y>jg
z1lK#rs}~@fjCeIbE9C?r!*}5AMFQHOEv}HEJV?C;SziY^k5tG2v_gR2^go};e?GJS
z+{U1VeB9cgh2k6<|Jl?*NI(~KsDiZFe<{=dQfB`pOhBveIMx5ND*k7X{LdiwpIPHS
zgV}#}hyScj2*hRgpUdvQsNH`td(fDh6nGhf6y!i5N6>lTGLE2w!KEFb>lq}$)s~zs
zXuFfN?SCno|5BF!B`yAoTY->>#eWeC(BPq{`G3gy3_^yWDI^7#$2tM;?F&~s7O#cu
z3Un!4?OL=3QXe7rgCKiXosh1&gAB?+NJupV?+`iUFLKOV2uF}zLXh4NgoKDdwhkfp
ziLAlLCP4Z`)>*UR42U=y39f*kgL06zF=#1W8uY+z%e3iM8PhD&r$T0kAf#F9471c3
z<|)&`cSB4=n<0YKM3CMPq$Yy&i6E!_7{yO8il1N@*KY``cR&*p;OmD#45YoQ(6fEO
zBKpytka;3V1q7~rK&PHUcBX*2?fT)J`r)01;hhFy9q`J?AiNDyA3;dH&=xcVseK@%
zR$!fWP(5TsPA9NVJD?U<p9r}ELS}3FS8DiGz>gV(S4L<IJoOQzuS9lz1gWPmYbD5%
zJh(WdSLI%{-nC+_bLk5E!X;KY^Fh0ZQl{#~_UT1+Xot6I1lFneRw;NCNjvAtI%bJk
zB@3Iy2%1C*8-(&``f;gwaH_hq%iA+ZSbzrNMD#%8X?)<yh(nfvRg!^4f`Lg4j6@hf
z8+?So+fD^QE3O%YKq5>c42;5{{S*uW;O#$rpas|ryr5Yg1|IN{K0IJHH&~nxrd<Fx
zgP(yxgn>znomB#4tVWuA%`S#%-x#KU7n%1>dg<TjB@Z3DjvLh;@R@!~V(CAz#eYBx
z(pG>f4vzhx%1GkMe}QYD1=Uhl!PnvCJYons!Qg*|A@mkQ%w2}kZ#+9ehc+nP{?C2@
zG(g9&`9Iqx5Mtl(pJO9vu#Ra1c)V^sxT=D%H-L;{-}hf|ALtrxksbfJ*8S&O`d@Ix
zf5VOcEqDI6oBdxf?<s@NVFr)=3=zi}vTrf8JZD(=pYIT;N|L??s*=ROx50881Fg^&
zyYQc3)>j6v^$ZS68KO>r>Z6A5tP?;3gX}B*Gwt}#bpTW!37rCUtc1>k`bYdHL6s59
zLC}pt?5F>8oB}N}XK47&;JAZ9q@01bK+dhjzkI8I({|s6t^Unh0$M=((cEh{nxrid
z^Jrqy%VUs^W)KVqA^tGXif$fX1}<;VrR40c3@i>{<ix<{0_r6R27@Xi=?Dht6wn@6
z=~PgCB$2=%9L=Cm#$eXN;J1Y#<`6^3K?ds;pvoxvID^b`2Dz0CQYRVZ&%x^>UNz7?
zr#$LV64J-v10R(D8DWF;bU@V*yjB9&N01Rb2nnf=_%uQF5p?)Y6SN2%QXfHjPrTZo
zszk`(zlafN2u{KRG~fmrbF%?e3Zl0E+4cW3EC1!t0M$pJT1o3ak1l9$thfoNlOk{R
zUk;3f4M8`wg6bow{|t)%x%B_D*n;ltV{`t`?gFZkSRDUz+JX?IJ_1!nj-XzWB>2Ep
z&@p$g%1GJ)Bo19-X9wErBx?hju>w^|NcEAZ<$qywXniDX`d`o(R2c~y{g-oksO|sG
zI(NB45oj*hsbslJ;cDlCRme3FWQhXgDhD_T8mk9i|7~Bm#3pYcWU2_#4}uKPL24Yw
zyoHeC;~a9K%ef)7k7M3KNM&RXTeJXK8i#yx0$S|@=Yp#t(9j&H_5lsi%rZ}#2}aXF
zBCz@hv}q`9ibdKK5CgWK$TVfTY07kSD4J>pUI1(YJ6Zu+?|@e<K<Xoi2!uqdkBnmb
zK-CaxeFU$445L8R5M+`FQXfI9o``nHDh0@GL{P!-PQ9=W{m^!JeFUj`AT^PGSSy4J
zCv}3Gw1XNUH4%h_^ofw`BX|!8!a%Ni;A~C*N@Rpw9}%pW;PsRnY)KxxK7ueHJtH)d
zz&IYbqXem#Tx-_B>mUdNA`V$N;Zm{M0lahpR2hNC=%78Lum<(OYBj%d1&=~mmn?C+
z6mhFK0poCfgCJhL06uLmZVh)%6(=@Ddp22fMhPPZ0aXy<RfP75*rgd*#nBP75Cf?1
zBM9mQv4}xsS;U}%Oki<H7l~O2x=oc8+-ZX7VijRvl^}rFBpKMGKyKiaW#E=);4oIO
z&k}7rr#JsO!_1!ybN(<c{?EShKll3oEGzyq%>B=}5<H}~{y)QE(8UenS3y@}v!D9U
zcM)_THbe7k2Jedu?q?aiPcsBuU`W2kJ{5F?smz`Kq9;LB7{eOS7#-*O|Lkl3bFKf+
zz2QF#7%^@7&$#hF!)DOBafaRh*$@BcKKEbX@_(`G|0QpMu9ua&`(N?~=*A+3WB(bB
z{O3IiLVSz<GgLie2sy*xxRb$Y7enY-#<G`^egFBE{g>PJU*zC_spJ3E&iq$D`(JI^
zR|b#e3`SEK+*UHgon$Gz!_fMhbqeT&aOTzj8F&5X2CrD)ISm@_<39)5PRo7lKg$6S
zVn6<$`!u*`x9UH0`W*(%c?`U14B`bk;S+;uxBJy^325Br-wZ-NP222CS87Mi;I*k@
zP|9EsiDKXlVc-vA;PnI5L>z9Q4iTFZxF)h?V6kOjv1VYk0d<u4ycono7-XXu6f!_a
zJ{{CK5(x!q6pUj~%wsU<W-y(=VARVXQ_k+TMn3Kcs6LWj#vplu0ahPzD1)jYNF4+@
z&j-?rfz&CG+61{iLOy!|vP6LsyaAS7^FK1;&;oU`xWGLu9_{~Jy8rnMK|^X1cK^j4
zKu7|7*r|m5e+m2lLQelhUH{8^{TFrmFKz)kr<Bv+KZibOnIDJte;$MXLS~@tLeh5s
zMJ++6^a$ww=TiUAruv^n8Kg|m;y;h$e_p5m98UjP?f<jc{AaQH&tmbP)$%{5<$oTV
z|ALMn#BUEeRaMC0zmNl{(<EvKTHP(}09srpZ40V{q-;R-k(@1*l(7OWHJ7saFJT2*
z<|k$WB0>El@O={^`u{}@{>wN%Q}_93mA%rwV4Yj(GS`x&_65uB3zkFbBgn`br0)ai
z2SIv3Xs2pJ#^oT@kRABI1bAHoXF%#5$N-%!_|gY^@Bya~(hfXf1fC2+94m)h0YTXC
znh3Ib$|`*Z<SslIiRc+Y>mYD_1UYLCHZBLMk4#ghn<P(z)Ho2*FkvDxf^5k*0C$Ms
zeIjGnP9j`$Ly(0E`q3zx@gdzJ2nnryz<Y@F!dt*g=0I~p`eCiey&-tLgUrwgZh|1~
zphif21X;159aImgjNqhZU^N<oS3qbC<jM%Ge*~|OkQwkg2$=z|k6bFC(@Bu}2x~_P
zQXk<WAvF#f38{=A$BjZNBfC=2o@?v;MOHcUL5C_NPtl9*(~WG`3Tx2_s!{c?RPrj8
zamkf(Oc%9D5;BeCGYsW72;$N8=h5`wRCDE2abj0;K-5PfnyjLl3<4?)yoz8X&%g^q
zJaP;?atvIO44e`m#3K#jLIk;GK_Z+IAXzR+5DAi%X5f`!;FAR*2v<O!SwxvZL>(Uz
zR$~xWV-iwh7FK5yR_74aWDvAr5U^%&>Jn``$GqS_>!SY*bN({S{Kc^7KkG`+-9SvM
zK&Qs>9{JC87F=bV10CkZavW42F)aPh5OajV>Hve?5eAnd44x+$ik@ok1g(M-xdK`~
z$FTA*<2umdYPL0?g#n!F{<E(EoeIvf;Xl)+|7_bp$53<Z25mtVz4%}3=6`<h@uxzU
z{|jD#UY{p${y+Pr{|u-9a~}N9ap1qkp8rbQ{;N*=&yf8XR3B+9U@%zB;D3mr@Ge8!
zCx&T17?%AP+VP)Z+II%qISi^T3>LE)+*dKgo?tJz&(QV<R39z+&${+M)9(Mg$3QFS
zcuxQ4I}1i9L5Hle9Rk&ipevKkfLGSd`OOe|ia~w~1Ahj)O09X)oZz}0zIB`ZLH(m`
zzKvU4tJj-k&6D=(V9?28kc<b7)PX9a5Ky(k<;}qE3hE89I-=A^j1~-xwxB9WBoI*_
zNhdRi#)5QlcrtK?F^DEHNS86ll!Hi?cm}HlYN@BegLO+m<8=z>q5UHjP>%*O%LA#2
zASC3NQ%J3ZwLap8t)SxoS2vv6|2eckRT8K6e;&R6y!xPJ36KLwL2CfaL8D%v;WDTH
zQqCYG=?GqI0A6=3;P9W%?!SO7==LHhYtTFqi}rs`ga42<egY<-6$>)<pcUOR4j{Ia
zIq1$l0ew&>Mac5MkmY|iYY<|z{?A|zIzSJ4jGpO#7L)(%=7=gu)b+nO7)d&T>O@I!
zl>}P6XA7#8(CQ;uYiNB08oRRrRYu~L|KatKupVeZpp^X+Rj>C}*()83Hn^28b17Z|
zs*DO(IG`S$0O|gKDyF=JkWLV!4uTB6K|~;I+no8xbr6JYmpu<sAKB*2hs+Q`_6;GE
z;K~SeN|<HFOw8H`xh8_FmxIg-!D^zJknuOjpd9>SR*SSL=Bbk*XDL8dDS$UcPBQ^j
zMxd>c_$wnweFPbxGekWr20k$aVSwtO*lx(CA-$*$NbQ46!fGOvwR5_l3yDAr72y3K
z2m`qqg0LYq5rhQyj2g9r8bO<0k&gP&45)=7SbYRqR~AsEfwW=)tpY;kYWP*C`IJG{
zu{f11N3MpD*~m2!ob6P$5_!!7ygou^KsrjeY9g%SXcd!l)f$)TwQf~w?TVIK=PxqP
zo@bdc+bng4al%C1sBYb`R?VPBwSX!WzY0aqVi}h_3A=O=t7H-LXd%-`A)`nEgK%Em
zAPx;*J}n<^bx(FB2SynyRw*-f84Fe^6J`k`&<Z*cJqBT21QO8(%_~7jVO<8eEJOrC
z3hRP2is>?7ASNk8CTUZ41uH_xhE3j@P0p50&X!%lhC|VoQ^}4|%8Wt6UC26%y?Bq_
ztjAJo{!6X~^^f>B{}<TwUt-sP;luwWPl8Uy<^*pPWjPG$JMbL-&s_bQ!D|<T@dgmG
z+RWg(kD>IX{@VYl=l=`u1&xd`EdI~9;y?2$(BVRyYe1C|=Q_}BdMq3NGj9FQvg1Gd
zLC`>%$kqQMS3#E|@m&G+R>ZIUSGxOO;Wp?l389Psd9MEFzX3V|g8%G)<|ClPy+LRG
zto<*%>_0=x7Y6SW4Ek#r%=a>c-C->G#8dy5A?FT*;X(%K8V2ni29v35E=w6=PO_Ff
zWN81vJ{45MvaSEmv=>zSaGVAmZO4BGbf7oy@&6o${zK{`zLTKtk_hO^hOZ2+n;C=~
z83beGY}#FmR|Pd}hipmnY1-jgztyf_jatMM9`h;&`6LFB7zUvzQ0Iszn1RQSffKDh
zvIE^=%4!4ZA~9Gpa5ypu2QbJ*Gbm(&#_PnRL3@$7+!#QeDL0T_E-walcLweN1_nn4
z!!FazL(uw2@-&P5MM!<brUG6y2i`gasfi#YWP}a6o<S3I)*Pe{#i{)tjvy!3v1|Y5
z()-V?|DVqYwERrW^uLVRe_3<TTn}iv$LznPIcOqA(gJkZtFRg9ydBW}L1v%@Y;rdL
zWvu@5n}cSKM9lvSoBbEl18r_%()rJ3@SoG_Kfl9&UdR6;9{)wbNXp~Clm}>p4YYc}
z;=iETe=b8%&BdYjpF<BcaK~%(pWg^{u`J|P2tgANo7eQepyhud8_+rj38(*(&i~~-
z{!6?37qR;fT8w88o@TNGoggG(2QpmR4zycV+UCEs4R|66OoA#StN+5_?wf!i=*n_o
z-TzW{FO=OsTIZ~CEZX2+vedO`u~WfvhrFe>`3t~k5%Rtv_%0#Hm>cN09@r=xyfT9H
zeIOMOa^DBCjR;a1*?<pKfcJ?&NBv~YwS}PUnbw)pEisTq#xxKDUq1w^i6Fh9nU-lY
zp%qZ-Wb@R?kbaPP>SQzUVLs3!eZZ$ESb)amrkNy8fgq#ANrvFl$sj!=c%KNF0k4ce
zb4GDJke(5w4ubcFAY*ipqZJ@^5ZZaC@X81>Mh98N4X%vfHxq&H4G(SC3u!|^p`bY<
zcr~OG+@u@Ss0$jY1Jy@}4pBfYT89W;6G0eS0aeIV51g&xSAmR>>mzu@gr{zT)Kf0t
z0}LTc@{nsK%<htFC1{B_WN|n$iM29<2tukPm+G~U<$RD8=C*}PtnwB@>Lb&X=|=Gr
zv?DroLR&P08bOtjcd5L4p_EgOgk8F*HE6s})FMXAELPMsM#w0fUq3{|AV^TxUqIWF
zN8OcI$&OFio=4G|L)M%{(wI%!ltb2>L)M&2-jYk+l3URNffTH`!3a&9N70%`(VAP?
zibn-X@~But5Wl*efTpvko(Cc1C93Bos_P@F>m#P`Evn}wqU$NB<;I{M#vtX%pq9@N
zG@GI6Hvht3yc_=uZTm017gQf{>;QFVKxY)}{LiohG^-+T96S~mdzQgw4TJUy2K6Nj
z#%n+~s|FooD0#uK0#xiX%m(juT>@gWEdS585>X$qZ3b0F9D6{Okp%b@Zoy0cxi9|b
zzY6NJFzf==7XskBjX*cLUj8p|^S|VEP?aQb@xQ=DP%Xx=^S|=8|7tt`yRP}K(f^+z
z@;-y}Ne1_$49O1}!jCiP&SlW)WiXz?U^t1vWGaLA4u<R-3~k@JW`M>XIXC@h*^j7?
z_)deaOoG%$tOx&d9Qn_C0(8hK+lv37m38Jz8F;D~#FKQrr}<WIg49Rg<2$yyRBbRy
zSs>=o!lYZkARPyq`Vou-jn{$pxB7ri0%UW6_K(<{7+CE=hz&es$70JM<PGW^DJL+<
zrGwTph(v%E<8in!usSj@xquLZ0|SEt1G5KcZHaE1dG-;|dIsrb4AN&=<S())yyH~;
z%%%cfu7*(`LDwWe?t{?&FR1sQPZzX!n^XHgmkzWm1gVcWbU>96zcFa(HDs_$#qPhV
z{eLC9{|Yw$WvxL6{zzJY)=2XjfClbZwf-}y|7TDIA!fz@9BThL4MFvVuo<W-5zzsi
z6vwLnpWF05kL`a^H&A`VY5QN;?Z1HYe-Wqu!jAt1&Hpp1|7Vc?!yxycLFGS-_J3Yu
zkf4Yq=!^wPoBxtFAcmMVs0I`={m*3tI{1i1??0n9sG8$8{V!k%>L1B_{#Wt=F$Ast
z3t9aa0WTVtasX9EQnvqPzz8&v1g?)Htp1C_>LY#w(8+s(+MxPK)$_Au)(ZQAb)d>9
ze}O~ZQir^yHhJ?Q^%3NV9ta6Pr3YRgL8>GO7cv$HuWOL|KJaP?(g}jAfp8%;k#)vw
z5XzhmsgFRl54a98PoD}wAQA8d5Rf%;kP$ifOc1CVLaL9<QYK;6M`oaV@j#W4al&Nd
zgvq$7A;`&@5YjNN-ypURg3uOk!>b_(1JXHy)JKpt+wf`#!hlsqZIEq5@cIb6p9nld
z1gng6f?9NfT0j+3XbWU?4l+Qe4X%uIgBrC08^EXz+986~N08bFnN;_$RL7VkLau}0
zY*p`4T=fyWZh}-mXe4qk39X}qStTJ?Lj>87DhWb5RYDJra4K7AmAB9;XTD|jJoB`f
zMu}65V*7L>I<>-D)PoyUe5;haD-^xTW!;LUopU7}vnA}(#ck8Xty9IUQiROn`Hf>B
zYa4`&BKh^hg!O_2bpi!-{UMzu9t|%69Y0WQr57NmA1GuPfItR8LIyz~L4zQKpkaWp
zVX&}au#icxuxSW{6gCZpAQ7_=QS&fy%Lx2P(ke#MDn`~SQO-I^)H(%(EaHSrqXjL(
zc#H!XH2oP=y%}^9S#1m10;kDkuH&e>Alv_xYx*C)+5Z_9fjT=3t3l&z^6UOH<X>Vi
zn#G_wkwK$_L8XO3a~gy30tTyH3=ub&Cj4ia^q*lm_#}kIpeY-Mh5uQX|7TtEpM4!@
zB?HSAP)|hQ3}~iF_%i4eAf5~Vxi0+Yy$q5CEvxGS>)rRC@7#Z$%OE6h^*{fW|NQ6w
zi(L6He*M4bRnYz*wu7MSCm1&W=UDKcq4OU@_DhD?+YFIc7~BprSgd3)UB+OrfI)2<
zgT{OYm)#8ckHq`_bFTa^vIW$?<39!(C*(Z|T2{w@@;}#M(CQ18ga6r&{1-hA+Cs}S
z;Xgyn1qP)q2DUs#wN#7PIgs<)y&AT8HEeUQ+i9M+Qa)%3yHyQ?awce5oj@dLGZB9n
z15YrhgTv+l8n0t_1(BeN$q7`~u-P-P+A;8YFe=6{Yo;-4rZLDTFi1o)i1;#ayD+dD
zGq4yiFq(o8vpxfp9s`>x=wfcY2BW+K3<?_<WY;oEUty8E&Z_W{OZ6Lv8mNB+=@W5j
zf+l@9b->H84M7t(TzdaGb)jdaa%e%TA$HCGEE@k=HUG2f{Abhs&t(8wZY^O28uC)M
z{jY5MU&HagvCn@4-~XC!|K%+}NY?bfxc+}p-T%Vc{{__lbIAW^k^0ZT_l<%5H3Qc>
z2G#$<_W!x<|8v;<=e7FJY4)Gf5rmkWJ~KOiVQ~M>;P{cj?k|Jme@0u-G2?u;{~2`t
zGbn?OGlFcGl>?szq~!2l$>G16<9{vJ|0;I>6|F&5%bEX|GzHxN$ffb0UG+bkG6*q9
z|7DQ)#i0FP*!90i@PEO8|Gb|6*`5D$+kudf4XEn`n(Bfqw6g&n-5_Q4AF>NczyNf_
z1mu=m$atN3=3?9Ym5}<#K6i<I?h>24`CzmVzC0UJ^+2u@f}Fq)sfN%xKJcms&OqMN
z3aNmsAzh)Yxe%^x)?BOfSyt&YtTSdpR>Xm-o{VXbY6zLMOrHu`p8y$!1J4bCsv+2M
z3T7#jK>Z`so)PH0Q^+ZEU=p%20ZJzJ!;j!LO6Y?ed5T6tIz$i>Pqzp@MhB^Skn1C)
zYN!pilL*=^(g|&XtlZWKZh$b5?`%arYYueiTyU*!Fm%-%WZi;pKrIM?PwzxK<Wt+f
z5)EnjRUjisHKgWKrsh)y8L3nCE>-m|g`7HwM#AeOtPFVd1nD9nlW5%~mkQ99C3sKA
zxoQnE8*B9hsf-}~BZz5^r7Pg|ky+{tqr@r3as7IcUE1NTp!uV~I`x2B&@ptrRZ3nZ
z3LZuBu6c4Uprhnu9kV5E)5NTjrR`FrY?EbdKuFOhQNcPv&MHR6B1+mkQrbLH#v%&D
zwTh9oj+L{GK_HtrS?gGkpiLY?&^AWSCQi;KPR1@)#x7RcHU`Xyk+B0IS^HQyhj=BI
zWCF-JRoNv?-6dPYHAmhxPu?|8(IrdXDMQ{ZMaDT%!ZBUKF`dCUok2aCK{boPsGPx~
zkHK#VL;4wp+D8oi-x+59XP5`BUz(mWSk7UPY-A9xXOOI9kgjG>>|;=!#9+Q%vg8Z%
z#QzN4{}}pyf~uss;F@Lre@5`w9m{IanmV>^pt=Bbmcm8QQK($!{&SoGS4PJ`D=0v^
z7JzzH3<p4WJwWOszRUjwu7YYJ5pb;}eEGlV1<-{@yoW(mlE8}p%(MS1PW;bX`<p4|
zIz!+YR<GR*cIz43wt-H=v)sU-vzWndBSYd9hDrZ~HvJbo@n7g9=%_kKeFW+p9sJL>
zAGE%X^VolpWB*yU{b#88z~H)*LAaWMvrxda#5HTBU*k5P`Ym1!+q@gMJCtwIh@U0s
z(#D`yz#yLrTFD>~$sia7-Fd|A%fRjlTFk)V2C9$PUBQ)+69c;gsNRu_6gJM|H!Wh*
z$z)JS1YO3*>BPWb4!&JZje$WAj5I+T#Fz{j7!(;aszCLT{CWoI)eKUXS><nVD1YJA
z_{pUKx-1WJV>pjCXn{9mK8Rf#ROzs2{AUDrWSG=Jb3>q-NbNtPGN@O=rUg1nN6_NG
zu;qUdE6})>wC#T-8&E}{?(kpF{lAXqe`SaNvS$CKP5w(8gDN9IO;F#5PxU{$+<ykq
zzYHRO7$p9%EBxoN_%H4LUpDZ+Ou&B`PtZgXlhscKi|-72_Zif#vjzTFFZ$0B{-41K
zblD$+^?yFw{{r@)^Ap6tNBw{{v4Z<D3U>cB-2Phx|F?_$ukQ(}85OPmL+T?TJ<yUo
zHkJQO^8Xpc|1$8tXJCHHApDBU=D&2*f7zJ-N^$??qyLM0fqG$l7XNw8LC4&Q+kzG|
zNLc-svidJ+39668%>N4*fEJ}g?ze^1M;2L2><U)7f{*-i$Xg1pk09M5NR<SseTc7*
zAk`40#zA9)1R=elthtsMv%!@Ss6GPC{-jT}gms9_(xxC+M&@8Cczpz^df@dDXsI^1
zPlTms1g?op6Z=gQ`=K=vcn=YzGJ;$NfmRI}#PvWBT1|vJMhEE-!AZ!@R(O465Duz+
zkSim|1|l#Cs*fP&DnP0sczpyp@CQPo)klyGt&mmRkQLpa$|#@~ehwSt#A&o@2*O3K
zi6CrvH3X@WRJ}_<#~0wJkKol4{`v@U)d+Y5&#`m`W_Jlv0ilstYa-;iCy%PN9#v~W
z)fxDT6Z^s?pbbY^bIsFcnx)P#Nt|R5+ovDXs}tF!5z?d)(xevDpc+`O>|d+oSEKAz
zuIN#!2tFKD#wAzQHBa8XP}VhH-Zfvzqe#iCM9Hf}*{fX1vrOK-5Po{9l6#4gdx?r?
znX*Tzl6#4wTQLO5yA`7$$T_P@-c?H8Rf=AfaHQ;22|-Gp6{>#KkjXxcz&czA+~Ean
zC4;C`^RH3$t5))^Q1&WUa4%7GFO+r8m2=3Fv`!T?O&2!LWVWpm^zLAYT`p3zN38Cq
zSlvyIfHe%Vl?*(I41#G4{4or|2@JAz4CXU9%AT1n{?E|+ouTOsL(dO}{@<YU;vgNR
z`Jh|QnOA}KF>&nqFK`KTB?QOC|IBAV2cI$=`_I1TKf{7Q3|;>jW`X)*tULZo-}*0n
z3AD(8@8W;S%b*D?_AQ|6Xaz2S`c1s&L8mbYoc}L!`9El>_I}W^K!(}}41ot3{Pr<K
zonZ()&Jc8#CGIAF>J#?ZI}AR@8JzYqgq~xo`!2Tezs6xuKS<=*f3f5L1rGja+Xq6d
zNB?u3`Y*osKf{v$+!^;7j8-!6r!oknsrk10m#y=z-R@nx-J^PwNA)Jd%oP#={oGbf
z3~G4{GARt=@t~SWI0|%nD&#b8o&W|;PY~j81C83TIf3Se*qs<e1Go+IWStu%ZED%I
zve^{88F<V=7a)Sx@UwyMm16_n!Og(Hzy>~Foq<6)%f4_2gVb&Y(QOPO*O+APu_*rH
z(frS$0Y;kt*)>4x*%(zo{SF4De++8B8MOX0=>BIg{Lf+z>NSYlfaYuXHU6_JfX;K{
z(FQGnmbCp3K~lE=K_`@g_ngYwfoEPULAytVO#XuwNE`j<)B;<i0qUx7tN-WM|HG^M
zOU&eswB<t;o0F1;`?$1M@aeB%*51INv6ew)1%vu(2K8kOvU3>XFL-VFFSz_a!<7FF
zx&IlWKQXxdXK??|ZVlRDByI6u#`3?i^?xO+|7te>HSPcFdw^;~4cq?;rvD`jK}bLk
zROfPNfcDaIDF0`Y{m-HD3sj+)fo73}JpW7h{}&7SFB|<|C*{9h+JBAc|1$pn#hw0(
zI)FCGO2GO@(pDfJ@#uqY(}1kCP;kDj<@?kkdx3S{V(<Xn0-M}L);S9y^Fh%2RdZ+B
z=gzdtnPHzh(<Wzz4Tv$@K6kcV_8f5a1iA+ttv3X!abVpaI2&?|4{Yx$Xy1@EqB5EV
zs*KX6gSHES>m5i}2*R~Un*w1#>mbOu9GC=EL!eoqDIf+?eFQpo8(a;UB~5_TL=Y0P
zW*bUk>k~n$Avg)|6Cu|`kf|c9H4&r&g6tuJuwi{7(7FY1Wz>pX6B&dy=><1H5TqJ{
zoU{Py5TPB!4L)E&C#YIGunJNKLHb0H8V8fq^{>=H?GtJFRcQE>Y50_>dzY$vmui4J
zN06FG#j8Zwvsl@)7_zR?p=c?*4#H&Ms+HjN5rl!<QG!=YXf+Wg7hX4E)<>?DYd~g}
zEC-$93$BuEz)KoH*McNZF-e?c6h8qHHM)`Qx{>WV5p5bFO<I9<8vdZ8Dl`MDH3BNN
zf~vKHYqh`!R_K6_umJ5B1|NP75)7$>bW!z!8+3!}4MLj?LYs8K8@qIa>h*#f;Bkb;
zfV4y4F|HTh21x@@GP2DusvSQvj_L%Vm`>xEPJ_rc{fJiGux2gru^S-w2UMwem8g3b
zD>~<jS>=dYW-}NUFc{@C7}YRXHZyoGVenYYpxVg5k;1?j#=soGz!S+J7{?%2&*ivC
ztmTVr_iu*U=b-wi`#VGTcZQyy&?;#*sAI*r0yJ|Zb?(2!El`!je&Ij+1yFs&diuZ6
z@&61{J}^}NXXpUcTx>i43!De7eB(L`S_dw0>OaG>-wZu(7-s!q*bQEfeHL^;kkCbt
z8j*AV#V-6;JMo`y!+(yZPYiMA7{X67q+aJpy2}uFhQa@wX!a}h=6~|l-xyNwF+^Ws
zD7?Wi<u}uo|3XLpiyr$gcnEa467z1*IpJKV{tNH?&oKE5L(oYE<yj1z@eC4qh7l8j
zDmQvmZSkz$>QTMPp?H-_%xqrQ78c`b2IXwfQf+vBBpL}i)*EuBH=iE^FL*yJmj?rf
z3#dNgac5JBm$0i=@oJZKXynk&VUToTV9^I}H{%6YK+Fsbj9>&gH;@%X$t2rD>Lckr
z3{rR46drM?{^Qm9&!Y05N$Ee6@_$Ap&|V$}rT@&DpdmPJ%l~37|HXX%O9g-mXBmIc
z9#R2Q(B@ed+5b$6py?#gmK-b4w2>sZG6EeF=Lk9!PTB@E`66fgU&i{sh#44}{uea*
z&#MbM)K1j!Kbz8j2D#S^A`cmOw=yuTVqj<o-R!`S%fOJ&z)-`$*3G~_nL%_WgWOyO
z$r%iG>#djmSJ?$ZhHL-J&;GAi^`A8kw4$5A0JK$B)(To5sapS6wf?Vc^Iy^GzmoZX
zWeZT(Oxyr;PXxa%$aY>`&?0TnQf<Tkyqf<R6hDHHF?dy;>3;^B|3Y5>rGx*gg#T9v
z{;%W<LW-`SJ1HQitx8+{m$v#ZU<|5{*um$`s<_|R3w&vrv(PGc5qN-ZzIDz*wED;{
zX9gTW>La_HS#~+IY_mXB5@enTQXe5#J;-d!4EPZWplS%bcXbv>5LO?7m&t+GCxA!f
zAhi#qD}+WOS4p6*5WF%<o`QT!IHVeakdT_lG_l_#p$}3ML02in_d>cw@Y)B?(2wng
zAUxF&_&x|wxd$nWAS9$jG5{AxklF{C)Cp|?qb5+51TNelH4%h_6mgLKM3DLjGA;)p
zVcUo*;q{SDKow>+1nK^0`BmtE5u6LDi6Er9cd43JiJDi53JgIiBjoxBUNPaXn;^9n
zq=tgrgaxULAS9^z0QZ&P42SZSm<Y30g3lH~Y9&bj2y&aAYsqq#;$=>SOYFh3NT3r7
zK?_<y+nhnmCgB^F<0lx$_v%M?YDcysBi-muNX+U-x9dl@>qd8iP*gW)g<e#*eoQYU
zZ9rxWA*6mxFPJgGFm9qg2GWb2pcgv<Q`|6aA{-efOayJLPMUz6M6t7zK^K}pm)XSi
z>qd1OM0V(fw`qsBss}ehdra<yvX0sEj@eQ+Y0|c7g2plIMyVXeX$%S>47`pE4EhWV
zx(r;F3?j}9^2w~KnGCvZ;%WOCnx8Y&KW1op&(Qjbq4PUK*AIrie+*MWEAALqfEH9s
zU;i(08?=C&`S5?ii~mJ0{g=4<Ukp^^{AcL@&(I1Q73AOlU+4m8XDjC!@Y?c?{~4x%
zo7odVhYJau{4Wna;#>a0f5}sz{t?@O|7=G<d!rOCfo6Xg=Kg1BeZf%ij-lWgL)2x^
z+CR^$LixW9r~EgX`k$%hKSSnQhT`|kz5f|k{pZ{MpLg$nkc~(Fi=Fr{weCMd%}dZ_
zOyX?}ykVRgg%&9@{i@bGRc>;r-r`WX$slu)nBQaui&_SaA_n<P&^>x0v7p7?B2f$y
zF;FBG#~>NOAR59T9K;|L$iU~#zzf=Q#H*hp>)ask+9YLN%pe=ez@x#yEYH9U+2G8A
zR1<+JA_fK)5G9%Hn7@-j>NtbkX$HAx4D!zz<litTyk(I6#vu2dRpCFIBIs~$5k1fn
zGBNA_Lbm?}tpBrF{%6qs#vu2KLGmes%m)VP_Y8918I=DpsQu?K{Lf_$n(JXV`Ojkb
zpUL1qtMPwEQ&6`>%;Ud&;D3Y2|7!mKwLC#Lx~khfl{UI5s<EC;asi`Y0Rw9a1A{eq
zbWxdsK^lA-ych$6Ap?UM16w?UP&Na94TE4EgIF7bXe)zEn_0~>kwc(+)fx8xXFd8~
z=)ix@?f)5O{AZ~A%MkgW!RsS~{eK4M|AKD+g+2aDdH$F4{V(GJS}&(+^IzEt)KO9}
z{jX#OLSi~!nIs-E2y`$o6oPIz;+Vo9a)UwkA%oNl2Fd3Pa@QH;Z!oCcW6*fOp!SME
z`#pokHwL{w3}*isEdKMDfR+LZ7=n)DW7PwNfOgmsllZe%dGjrE=h@`Vx6Yksoj1oi
zZw_P=E4ceJ7gGB`NO*l@n+>|y4syvIgoIpm2fFPpW2SWmXum4jB5gPqykHx&i4|N8
z&4l!fEK;X|PE<&nVg*Cc)!J#GJ*~*q5QGh?j8Z0nvN>p#0+<2L?~tAmybgjeKs8Z9
zA83*&4s^N#q|7ml?}b-Da0axng4IWmwF+<&-Y0^zz#$}ZO@!GSg4aignh5E(R!BP>
zTKj-&C`f$-se>RS^wd-Eu|HY?p!x_hG>2RtLFyfC|4MjG1g(#J%Cx{8B1laH?;Jtu
zBUR60&}|Ez#i}rbTn%AmLn<IN5?&t>tdcM*BU~czE|p8gYWK=D5ad#}(yep_^bkex
zF+n!4jn&p!b1l+mL00uc&U!LSnqZMK$s}n4G^v1-he^@|Ncyo%ooJal5wsluyki<Z
z@C#Wd14)yh3C;8w(AAsZGd&>tsgX&@4g&CYY{;f;C<z;do@JE|YPo@i8gfCS3ifzO
z$blpl=`+kyr-2TlPn~9-&~F+CI)Msu4ie~WD1Xo$8OpBtat>LF&Y(m21g!J;EOSIm
zGB`D3c_iE!*v%Li4M6i9Y+ek^?hL{?VyXKW%5E`~-)5+K4mwY}`6EN?XNGR@z}+m+
zk_OJB|3xqU7rg<x(uiR_XnaiU@_(_*{~1<;hUq|uY0m+j>chSJzvyMq-9}tzK{Xe{
za`4pBED*_i?7#FC(5@(k86O#DeP-JHUj%#r9mC%LjQc?qitPFSI%odN@Bc5o<Uil^
z{~}%g8A{(Vggs)gKF;8EL!$b>+JgU*lm9c+|7Ixv%+T<PVbULlRiOGv<{W79h-=<|
zhSW<88q*oLs~7~Mq^+7=3YPj-u5&2g;9RxYykMnD>@*&iP6mTg2Gx8}eI${{ARGg#
zk3_+zq)NszNXIkCB{9gxFvvtRNJlY9MS@Oa5D#IHixxC4khHH6wk+e+N@C!31sCZu
z;QEM@fq@e|UI(gzV3--ifYe9I7Z~(zbL)TS*8k3=^_|1uFQ37G(7qw1|C}oSMf5;R
zwMDJ|3z>tiBWKY3&!GN^LF7II`(+01>kLBA7?i(r>4R=Z<u&^+Z1bPr=0A_+e^#Ua
zV3pv3K3&kM2*R%axo!Wm8~kKYe#0Pgm4V|V1H)nlh8YYD<=|Stn}NX_d<Uxm1A`RU
z@sbP-G7Jo+3_LEN%1Ahefv1vzqm+T8f`O}wfvbeUZ3EvP(3!dnJ3(vpxpw>)KKNgK
z|9_=j|4o+sm+AShp7ozU=)Z{Pe{rAxiUI#sL;fpy{g-g~uVDUPP~$&`?0;s7|6Fqa
z8AQG_FrQ&y*v7z63UZ9Z5;3b|A};^c!u~5I{MXL>Z(Q}?yzakw^M8w$|3=mSwM+i1
zr2UtS{?Ft6pTX`wv-W=m70?kz9Gd@G_5O34{MU^*W}0x$GIyRu&RomvIaWDyt#W5u
z<<5pJ)t+OQJqJ|vWY2`uN7mWXZ9v_nSs=lz*^rtDPC{-Mf{>6y<REnr@_E|kY185L
z5qQ(k4CM84mZ_61Qzt{JBuIS(DLvpMq-=qc$n}wNVn3+xK^>rj-1}exADRQ#N8sje
zAG{7Si0eVChIAp#X7Euz$dwVK0zxApeIf{{8`i25+5)eGAY5=w1nL=S1%vKsg^-xz
za*#8Bkn1C8O$5H5NDI1+s8TDS5>g*QY9B~N1F4TRe99p8krucbf|HN|Iyecbh9EVO
zibs*Md!e#>p{i%GvU?$9*E2GSwN}Cu1RXF{vK(^wA%ui<k&sEJGAx}VV(KI0Ly92F
zCm?5r*yb;SxCAoAX`Q>kENiwI__$p&q!y7iOay-Hp+y#G+qF&h9GJ*li>$ej6BMCj
z)_kk%1(5ku$XY&2h)B*ttLz1s$Tn{=8gj^63`b5#r)<GbI6-4L7cGUK3<<eK9da(8
zS;kD0wCN^k)Ahjvk~)!HIuUK4zLI~Xf>()>N1>v7ft*X8ltY%3O{$bls+@I#m~jM`
zx*LO-DFc@osF)IoHO$z^P;!$Y{|ZCNU54r>4An0g8s0H<eq-qW!!Z9p*LKj-1%WG|
zWef}}|FP`&FLCC-$`w#u0BSSO1C7*ys-C6bTOKz57drM|>cW5C!=Szu!#q%(C3y0`
z<LUnl^-mbG?}1R_6^8sr3`_rOoCVcN44Xg~rgNPJT^_-A@;}$^|J-~23+@6PoyWM~
zKST3N2JeRqRu>q2e~K0Tmz(%swBtWR(|?B2zYLAvSmykf-uz!^)qk<N_Y5988Dwi1
zSdtkeVsv~axRtMSD_>_<y3VR_l~&S1asNIB>kbB;N(RMj2GCJIQ4AuXpbL-0B0-%a
zP|pZlCCMa!Y9)n42E`=Mx&`SN29;!PgM2~L0&e{*W;q`Q79-Hi5tA6WKH_3v<Yr*t
z098N;45^P2ZOZmB*xzOFc*f`dpU?lluqOzKIe@N$Wl{RirUE+jKu`~KJ{zw-=<*{r
zP0;8ZgXCWZ!B-42Um2|abG!a$@%_&p@Sol5Kbz-&9^e1mKL7cAKJs|I7Y=yD?{iPw
z^EiXSItJ+$2CiZTb{7T~NAUUz6$S=D2GChmJPZun3=B*Nr$Jd9psTW^V>Ddb7`3|?
zq^lTsiy1ic8CVM$xXKu~${EC4q$mFfI00H<&vX30%!&WP`~U0i|F5>=zv=4#s<Z!V
zRR8CU`NHY;pUeHfv>WJDRZ$Dj$>9vrUm3WbFtA+$A)&(zQl}a84>LGlXGnX(Q}au3
z+JDtm|Han+m)`VW?7)ASWB)}?|Cc!bU*Z~QYXIc@D8)<v<<9??Ir5)>`+tdP{~2;#
zGMIj6F!?5C`CifGms#p+tIYL~*&>^qSzrX3A+pPw4?^ILLXhglK5Mo;xB>!IH{ey-
z5D^Fo=@vnD20>eu=`%p5_kb@Of}CXvAt5bK2piHCg^*UM(=1b_TBS{aR7r3WUK2qW
z=%XC4&XIA#M0i&SUI)P$pj+@DqjN~>x8c<gVhjh|ErM4@y3w7G4iTgRg7kynq+WQN
zP6Rqau7DtG6(I8wkg+%j38`@)q%P7VkyapL{WgTH;a>wn0aZwAwktqgB=GV%NKFJG
z;q?)yN<yuSApIjXuM&8D1n(TFdKRmA6e+sqDZ3XayB8?B<{{Taa5kjO$3sH8OQ1t{
z2~|n<Wh*cd)+z};#OG468d4v@7nVDhu7I4i2zL_X#46BPc6sxy!CU97bLZK?kZsOf
zP(v$cE@T%2WL(-lZ@xqR0_d1PE~x4OZTBu*0^RzZx7fa5DI^3Swm5>X0COx_?f^bm
z7>#rUpMH-_x)d!(pyFkaYsCnXke(*wPBF;oC^mWXt#amCWX(2Cod!9}*C?(Blpw<!
z)dQ=w0xC89%2m8d<XrL;oU#?1vK8%;<*XAFY-1!$L&UVh*=0N!B$HHQmNKNBW=K87
zkbeVIC6zs4sCoe!yX*eJuo$!&f&cP<p=;pI(d-WlYyV50{jYH0Kf|(Lpk)jT!CRIV
zf@Yc+7J#}!3`_qrYz6Oinhz4>KmK3f#D9i}y$p7X848~;wEbi#d&yAzjG^P5_@4jL
zS3y_4GHm?Maq>U^ssD_7{<H7+FLLz1;;H|#$NsAv{x7lpzwWI60=54c+&?pzJYWcZ
z$58a2q47UQ7kDsm(tprYSK~X-1*|4Z8AOU1SU`i277=s2syF%8ZFQ;MVw$s5E^MlR
zM>~Ug3#dL)$O0XODiX>d9D=BiAVYSLE|Ov@gGwrsdIqCfI)h>ovqn0bb{4B<3WIVS
zgQzp8UkNJhMHrZPK$Q^_4|oa*TF`?jaQV+49@u@EJN_?&%OggopPXL*d7S=p+W+UX
z0M$nvYX7-3K%0ytOu<JGn1Bj%cCG)MO8=Rp|1(H@WLNpi>HJ^7>pzqCe?~9R;268t
zf1%+2qGA8#WB)5B{?|(Xsh9oNEaRD4#5GQXnG6gu3=9UK@izu#1_mYYh@1!m=*nPj
zu#?aTP<_Ouo?+<M&!p4Mpis{sS<fI)#=u_8z*!2akC^fqd^ee|2i=(k+L$)|4@3JK
zhJsrR@h2Jlj)TTg4No!{oM2G63qs-_Kouze2L|CU46@%@RY87~b^osz_1_@#ze(+X
zo7VrzlmBba{x7@ezw(Oz;_LoPZ3Jz;7C-o(?<i>5B;T3;0_Xk<odvDElsgZqkCadU
zmp}gB_3(d-ZU0qL|1%hTlC=4x?DoehbG=3STF9{p*4Z=R^^tAnJP?8|%m(*^GG^L>
z2jbxMku6vRLPF{wNPPr8#%DUD#(|Tdd+yRef{^+M`D|^-cnG9Ef>%Ru2D~c-XCPNe
z@bz+#`Up}18N;r_gL94HLv!GTSo$%&AcR``=tOluY9gJe4mg5O66r>Qj#fafjL_;M
z?eJFZaL}R!cy9=6eFUk7Acy=QpMa_ZK4}5Ip9oY9K`JBv8V%I?NYlRpGHnE@iQq$U
zkR=Mp^^vA`sit?S26(CnxekJl&Z&45!RsSPtpqKN!H2X%N@GkCQa+=Rpfv{Idr2Uj
zBTN#xqeLVdQ6+(w)xql{m$H@cD#;OiJu##J2VKHdvc$GziGA@B+rmY5MT;RMsD>(B
z1Yz43FR?3HY*)0{z8ItoqQ<sx3B&~W?Z);+%R#7k8N?%?wSuLqT*}wDl&*3vS&50<
z%GcmQ5RX7>Rq%0t5H=<WQRxW2^TnZLImkzaORV!3Ip;03NuOn%JQZ}&ZSoYuxPHB;
zHm$HG&G1&WkS3L&1_i$wMekBMk0M#OWNDW~d8Z61+hlpWSTWNO2JJNYkbZ`ky-e{3
z7&6W<WS?iqxy?}cfT8XUL&rCUrJ#$DxGsRs24~*$pJC2#hW4+Z+gca?XK4M#Fd4kE
zZXRsZZV9-rG#gw~O#jcY^grXF|DFf_GlU*wV42S4b(f*>KSKi;HT`Gk{LZ`{bi@$L
z;r|R9|Fa+Z&v)cM>+b);C;!V_0-gB)I-&0Df02v-6)%IXixr;xpCRKFgW)p<qvs5s
z|3%aOOLl?^I_~oS41o_Aw3jf5^)qm%Ffc_kD5q=sPPEEd;ZU^BHg~yh>~sO|iOhB_
z42BI1>SYY_>EM-h0iYQokx&M)aL{HV$tVV?Xa?Ch2E`Nxm2?J;3<k|i2DKc}HYL?;
z2IWj9rFaJE2nK#@21Z?2?*rU9V&uctKVoFy^K_YbMg%nH@|q#;Erb6(2G`FF&L0^p
z{xO*RV$lD=VEBu{<R630e@4gu93KC9UH`M$fewOWxBbuV@Ppmq2aCgNX8Tuc4%Zk=
zPH{M|(@MQySA5JW|B!Rp4yV%X?iK4jDmS>7&5!Bc;+Hp>flV7UY{<k9KBt!xOmc%s
zHrVtQ=)Pc(8qfeE56BCgzMxecJZTI9Sqwr241)O#Tp0`;X$-uX3|wgpocVGts~CbV
zGq|5%usXzGw2MJ~8-wBo28B%^q`Z+qc_V|`E(U`m4Ca>^+#WH6equ=b&s_eWr{O<m
z*MGkL|Ll|ggN~8!`OnbvpJ5{S600Tu8CHVsDQ4X9pXC5(7c={D(AY26Y0w=aJZC^1
zaKZDSDM*P+|M^b*7d;9>YLosmcpcFW*=`np$S!xDP4*lIFtW~>3PRa4twDz*%(Kp%
zXANGD4H<I-RYR}}2$^l0F%xtjbUNsA2gtY_goNBK1gVA~wGX5p1eqs-)Isn;d-Ig3
zmJkFUud_fpTEQY^5@bpUnS^wOKqS&tL}>RAL2e^52A{=k6x#<ukcGjZ$|$B6b*)0X
zUR0ZYG>C-siICTCL#iRjh#Z83^o$^E2nnf=KwDr!nxIt@cs>ZTPXwuqAg3#62GnW=
z)<NcpKvj}|4P=ZCayy=ue~qSJH3UI+3_*@oQ1h!$0k4jOtkwoC-bPwD2U)Iwtv6Jp
z;!z0c3ZapZ%1GJ00A3#<Gcc<pOc4k0&5ZEdN#G1fd5+w5BC;|9RROTI5XdzVgbf+B
zb1q%sQnnI4X$3lAwQQMv*)qpc5OOG43P<+EOB_m;B8x-Vj-|`MYL-J><y5v3$|zms
zRJsaz4$HM-ty|?f(3R*FYu$-J9$>`~5^~*!N7Y)->U9vdXZ1P=f-Kd5usy5SxmB%&
zEa8AGCWKT>_QlIQikG?MFLKG5=ae<aHfuKM-rIy8lY}0lgg%|94(-Tx&G1&OkUAB=
z3T2;6d5=^z&wNFfEH&2@dAnF<i((=7CI+9i4B<N%5|1*Zo@7Y6$dGlDq2?8+K3e%-
z`UI#l0#!-JLHkcZCx<tGWT<=3Q2mCX30(Co0FTto11)J_SPZU`Al;>%{{=7oXNWz?
zAXv+wv0E|m3#dM7{?E`3UjH!hKkLT-;@AGO9|Nr?XFK>`^z?tB6QD!9Irf3Bc@;Ph
zIuuCk@_)Gt|E13Sw><n`Ys-J>>i-Pxe;D+hF<5_Q2>Zkk{~J`=+n-~QoyNe?$iNWE
zzz!O46Ev+9^JtOv@0R!P6m_j<uxen?FJ(|GWl+dxkVs(Q3uEB*VGs;p5DEtEB$9{(
zRYp?L3^K6{N~sLWX$<P=(E12m8G$w{Nk=e<1~G7(gGM+RASb$m>m$&B7#nK$g9)^x
zfz!pR{gBLp|8k4|>&*YJIOV@=^?#9~|Dx&txnuuxh5zRa`!5vrUo`eVj}K_28N2;|
z78_`N#BB40!T2?U#$^VX;|z-18KgEam@d%Gxa?SV(mZFsebE-X!p)9FE1im0xs}fI
zty$`mGm(K)4_w#qfGbB%FbP?R&H`JA&IBH~V+M`XF@WnM24@C=5C-8~5aQ1TRY`m~
z47}M4JQ)n!=?pAcpdORndIpoN3>q82Xgh=M9tPv1T#lCoLmtV*zm?DXZ&3eVr|Z9N
z_kY#y|4JSIh1&iLwESmj1tEqu@T$umaAnjFUJkJYbaXu9j{nTNKzpmX!IcsB>Hj=u
zL5TnSe}N02%1Gz}XyK*k`Tw$K|BD^_uQBC6L-1LH$URoc#~|lvL&n@dl~K-AP-T=o
z(=vSy2tlt~1uafMu7e=`AV|*$vpzBh_k$qy5egf;aTQ)GAy-DAOYxGYSf)%xu8Ghp
zAP5&!2Z47ILDtJbDji5ogt(qHt{)<fRv8&Wx<z18H>w+XVhD7RXjB`71Pz8@>kUEb
z9S8}pkKj{9@cKwAtVJuhNeguRDRg--yiWvSKx!g530lnyzMfS(pcYaMY5G+o*F>6r
z)f&E_8%`k`S2e(uk%~_l<U|EXl?3T4A=g2WS_#rEf=m^udJ?FQ6kKvKZ|_B}m~gS}
z!S~c7-<Aa#!Gn{KVJN4P<#wf55atLTTm(`(AlFTh9t5P~fUqI^k|1n23DE-A4rjm>
zIF+q*tz6?!z7oDI7jjrDWWhOPA1?f~A_y09dLHN?zRGoOXs8NwupUSXl>w=J&`6J(
z^=Kj>Zsi(}s<rM_>)k5XIajVlMh<1G>`Is0mMnvImvR?aX3sNCpJkjn!#H)CVe%Bi
z#D4wwUftMkjfghQphoq8dJX?N74IrFk3uDvTp{}mG3Oj^mo|QnE{2c|tns@UVoot9
zU0^7A#?bVIdE<YXQ~&v{{bvUsvmka2bSV<3O6mho;xzvPZ9bX=u9c>PmNbC6PV+!L
zBZiIty>9$x=z7h-zLJ4^ErZKh-ojs?nyC9fLpQjjn*N`48|d<Mjw7J`ww!xGV}PLZ
z!+ZaOx?P+8b03AC!yt6-zsQCEoag@YT>P(e_CL?|{|pKL8BFgmX#Z!>2c6*wV#r=%
zVA;vQ+sm!COw6`Z*s6`iFq_3No7J$8SwEjat$;xxi$OM%K{6F|vmU1}1D_j%fCp#=
zop1o6N|K3XP)uY{PG(R{2lb4UGC-t44oH_sDCqQWMs)_zfEe-xBHZBdI`Etkv|3^Z
zFLE&TYu(4Y{y)pI|E%Dn*SWX-=i2(8ck6$iE&ql0fY%4^|F3ZAzuwaS4Ax5-R8}(R
zY-G^c#Gt>G!F&q{sV!lUo5P?nmqB?3gHfw#)?U!j#APep$`-kmEpjef;#{`Gu6UtK
z<#N~j2@Je?Aj_H9z%>!1e*~$In8E!cMvyE5bAXhw+H*?=GYHp%kXS2&P$Pq20|Q4n
z15+UbLk<H&5d%X3=#&iqV+=|67_y!*)cxjc|IgC#pLgPa_G$k)XZ&ZJ@}Ftyf97f6
zA=3%qXzc~v&&bdRUUCT`!S`W-B6RkDhUNd6*8gYS@t=Lyf6l|89uTN9I`g0R+<(6F
z|M@P1PV*4F4BAs5Z~=6zhtNsTfS%IaZw$$M4B}Q<q-_E%R{+flL5`0Djmtsm9njG|
zkS(jIq*eMX>x|hp=`+v}q=rHpsWXG!@&KuWAQcd_ACxlHDs`G!@)U?5X!j6!<+fGQ
z6sx2u773Fe$UJ@`1et>SK@bvN`(Ukr(CQ#aw+Q)S2>qyTC;~56(2ME-BY0)hid-KV
zg0E_Y*FoCht&o}sUJXGQ&<;^(3#c+8u|5KIk-+s4q#uMrg6|}P^o$^tks7!fQt>H+
z)JO0t2~zt&>LW-U1eqIB@hE~Yz{?dtlSIfpBL$aS2vT&-Q*g<Zcg`WBK7#j^AVXdd
z5<Z%TRwZGrkB|l7H4%gXuSbv>$aM;&3j-k`2RL9Q;acFbkZuTksX3$?f{^gq2a^Hm
zzo1t}m7snNM9LL*FCnOkf%S&a*qHT^Th&^3r25FE3Z+VNs#s%Rw!*Fyv|Pj<cK@Jd
z_B@L$(7cjG)@+N6X{M=@jgltl#q}6Qb(+NX=!dmx2Gr~Lm1}qvOM4WGIp;8#mNOU@
zGdRy@@L9p&w}&C>1ViR+hPn@ITmDO*{LgU#R3C|6{m*$4Jmj(rJU}-Oe7?|BSOa(h
zxE`4fokHR{{@-x_e+IpN28Q{J3Wpey-$>Vki>ZEak<|-20)lHds6Jvj0O}tJ9Qn_^
z`#;09FQ8*0rh#l>-48k|Pw32l;q(6m&iv;%`=9;Hf2pJYRnGr+Tn?_1)Il3aSxo*j
z=>2EXf5M=8OU!ViuEz<h#I*+DizQs^S&ecS<l-2_!WhI8!6=zQC=PV}E2l35mlJ4p
z88^5}5)1;ZZ4e6w^^)Y`Kvj}rI)hva2+5~1NT-ADCFc)dV0B|))?{E%0pDrG1HQY8
zp8+(N#K!=dsSyBQttbGVyAft!FtI6KBedl|=W0-W#I_3LBgQqLgUZ>~ftG6UZu`%*
z=fC*L|MuJdGlcGEFx<vqxsSnOAL!_Hr~M4}dl<A<FeuJv&|bixI*Y-m)i`4}=qSjN
z<*ua*T}u}_l`eKFT@0y@yi2Ds2pNNF8&G`&S@OpL?iN8hN02HBBoD@n450oIF9Vwc
zk5mYwOb4S(2ZKaA1AiR@dl>^yHG^b_kntjwz~dIFx8*87829~Gp9MnFGyluZ{?9-8
zKUd#>(4p&)c<lQRseKqH{)ZqCcOp1ROa#|cQ$Y7Kf|mWy1D!+2wEjQK_WxXa5%m%8
zSx|k%3vO)iU->U|1zI2RodX@m!GG+(&hq~ZdB=?s)>x!%f~-}5oFE5TlK`)ekQc0?
zv2D_4Sf@=#Mvz${$VxfL=p2NER5!>Zygsr_0bM@?U78E7j7$=t)sRKPBuF&`Cz0zQ
z2pdw-!23ZE21F2z#9kSJ`$u}v%BVv(vK0w|wo*dsAjnQ42wNutv<(D8LaHGM39pQF
zLR&Pz=K~=3k05;_I0^3-K~^k4CXC?SA`Rav<oXC+6TvGZNPVR2U8)Q|-UqUQ2z1mB
zY{539_JK?cK}coyLI?vsC<kXiDkDfuB=4Lf@0??wHy;;r0Iw5(Y??+R?F$##7eXg!
z9EuhrBgf(;m<Td@25s|W>oI|DlmM@MfDHE`S4I#vyncZ*AQcU+iV3a^&Oob=oGMpC
zDkEePQXe^0uZ1IsF5>GW%pMb{>H&|-L8RQP*MYi4um$aKlU*wi19zaYyUNw}Wh)?l
zhuk;@S^H;^GuJ$O4rC3Uaq2Yv#7PDTeFh1AkiL>maD!$*t*m#sv}Y-|T`_}BB7;gA
zvsDd;_e$P?bqrCbI4WK+ZT>HD5;Tb;c<#U0+5c>N!NYOOK>LXp)`4!65IhMwM+kIe
z>taw916tuQ7c@s8dG^0d>~#jlY6iijf^N4!8<eJk3n<76gef2scu)QpzY1DW%&_Y}
z`*CnZ23e)B2(&Vs`v|BS5;y~z>6W_sU-j~Tr3;|TVMWgTS3CWmX*p<^i^1TVurX-Y
zC%+MB<D{@D=$--<&u{8JKNY+Wu^X*qP|RfzhyfkMEf@+Sc>)-CycxJWL6=8>?tXP-
z;B;c(_GIAqWe^Eu5DSJ)CxO@6$;C0q#xh7Ig3bWviDY06Vqi#NV8~)%nZqcwidF0+
zm-IPK`6~iyw}rKzi0VG$(Rn6d@Jd+i5(CE;28IqfpIfq9{tK@H_5K((|A*Z94!J{$
zaSN!Mz_j~6-|_z*H~up;-(fJ_!r*#@-Tf$|<9-I$Jq!-pp*4}-VsL%bWSY4X(s*+z
zUg8QGY6dqy@>jZ)tP3hzz#wc39**Mz&l#~ZFtCEg48he8D!~j^CJA2n?ajcS&Y&@e
z!F)AC@Nv+&e>o3%T7U6O`!Bxmzu>a}A}jxME%?tg1KiA*3hn|=0Z#)>MIxtx+a%M$
zyUeD7Rl+Ny3E=34D4qdY*}||I6bP(4|8wmF9e&Gy3Uss-AEa}19@P2eyZm493V0_q
z_%si})1Xaiy6gTilpeK8U1*oN6kZeALYB`V%@D!6JZQZh(1LAn#|Kigz;73_NSy{<
zlK`FxLau}0Y_sGkklP<1B=Tx)2-_@ik||PG2*QR`KoAmA?|{g7(54K-*lx&?KFB2e
zPzCsi9E1VC9S>3sq1@0K(F(aH4{|3FqymDkSb*#*f{)0dty|Cu1r5+aPT|%<TG0)e
zCxV;?0I7GdlJN05NKFJ9n)9uKR7~({2;L6@RXyO!NEv*>52QX)Myia^>LX?MLPghn
z2!fuC=9;GnLnPHl_Ru2@AqVY3j(>-e@cPIWTrt@fE<&!79Euh@qSi`~wFI^$%WR65
z+LkOsL&)7FNPUDxLW*TH5?;{|!+=y?pn<+h#F2Vft0d>DHI7IKq7hVCRIEiIVVAbU
z`$v%K28|05gphD4$kr+d2{F^9d<}?ExyGdedZiR}P!T+#=3Kf0w0fj)31|Uc;UX*W
z0z6Attz?=y*(3#2E9u5|8%MS4hc#=4H!BC$O8YenyHzu2W-}<IFle?jSkDBV;gfrp
zVcmb}Gtl~o^T2<GW#F;6<)GSxZQp;13;!iA{ujISU-0aInd9J*v<0A9B)&tSnF0oZ
zE(VS!2K~bZsXrO|{<F>kmuWNpGt2}RVw*u1*9x2iE%s*E3o4pG%k%m{QcP?9a~}FH
zaPmL@>Hl2E{)=4%t-E8|@L%}gf5DUg`HuaUKk{E<?|+e`{|v@I`Sd{NZSv@Y>LXEe
z&|RY9HZLV?Ux_*HVA5T}pp?fTmIOM8n<s#Q+aJW>^99vM9N@DT*c}+y>_JBZakw#X
zdob{Lf-WoP^#e`xOT{tB#DH#(6^~~Sj%E;wXOK!~SLhPdnj)orSk>&Dn)xeDtGAl=
zU$h*)YB>B<xBnq;|5Mrdue#MUPMH%79MjlM4-2mXEhl3DU)9031+=S_efxj*?Vvsa
z>yH1-d;YWR{qJ?_zwFxo3^uzN%(gPv>|!w8%wV;V!F(Nq)_exF*$nziAoY=X_HM|y
zw{y{AXV8+HrJ%lW?sE6?jlty$8N?kybr5LrIb<0FJC6E@0n|C-G80wulX95hlyShK
z@vBSkf0McY9ajF=Ujag53;y#?|Ia)FgczrTqih<OoB>|DG6NJr3=nZh^#qZf0<My#
zfO|&$;Eph)KAHkn52=sVf-b&i+x4I40HQt;xB!}D6ubl)uLB);co|e530(q>*YTeO
z)knr#{xj5_vrb!No3X?;Yc6<b4zz<6c?b?t8Nqu!mar)y$b1k)98wcONJxzXsi7bw
zL<Bw%2Vt0?RyUCKtH^8QAe9cB1l2pR1qtW_adDvAA2919Je3io8iJ6zs53;6$_R23
z1f(+34Qqw$i-*)cx?!N@bC5$jwZb5qi9nM?kggDTwx~%bxCwGc9)yH;g}{f>Aoqsg
zwUUN^jk;eoq&|XFMw))rYTgx)Dhb2@-$?}N6G3Vs$nidqQ`aDs5#*>J_>wvN^^qKG
zfDY0dLL=pzvf*o_?Q-Yg>L}U3DkeAs(p|F7UjSOkmcIZ}J;6z|x(UvOtZK6<UTRak
z6f(*OA(5*mv~>|^6%%|V1H5j6tac!tgfBIR58NR$Abll=%GD0AY71H)A=N&R9+PY3
zDm3I)wc4!;TARSstbx}-5E1AY9k_D@5rLB~70~_~yh?)fl0eP_FS>xNc!$i{*%d9e
zhOJ$<$eL}EHXSlLVUaSyIKEpys#_<#Lp`KcEx1M_uuj+}6ST%mEtA1!ra;V2&ei`l
zPlFad@tyn6a_~RHjGv&nfF+;{EI7~n7rF7D?-FP-TIlkB#f$%W_JH>st@!V9?mt7y
zaR!D829?zeiMIsWz{S)wa0hDse}*NXsRV|7|7C9dXE+X8?=5>4w3~^!|GymgCPALV
z|M|}R=RN(O;S30Q?D@}NGl{`wzI4@HhGn362hJ1!dC&Y8tN+hn^F&bZKQH)ZI$nLy
z(QW*O|9SO5_inSRJY?WK#lT(7z*54%70AHh59$Z;crkE$GjMr<&P`=^Wni;oV6kCf
zb^z_%VY3I_aK!7zARfRV70DnU#~_i+AeIOk$l|MI;GNGPwt_+QI<x#kW|d!@TK^d}
z|Fh}+=QIFa3B+gcpU>bwk3MMGwxG#>2BrTDT<00M&Wld?Z?qP4t2@J%{|uW!)eysu
z|BO39US`_!pJ^ZHDiEQg|CO%(XGp%rpf#7lauWzyuV=7Y2bwtojnpk+P@BPO)@hlw
z*R^P=OW_i?q6Kb63tWpAx)v|AE}rL7vplqV4ug_2c%cF>Xm1pBxdkT!XcUp3fx#Gb
zE0b6@gLw}__$rq2Q(S%DMQ8r!S@>UQ`G1z>;12IXaOZR;c-6r)aQA0AxE?_xXM)=j
zGr-Cq6%(Z21nC4#0r!s}6Gr{u7>6vYn+dMS7J;?^Fm3|ftH^QiKhMeke5XMpbpjVb
zM@R`?23=eNx=ZUSm~r_(_l5u5CqYPm+kb|JGgfJHZ8H}@&Xj}HK9GJ8WE2ik<3LEr
z$^=LXhmer@AV|FfAtAL7gbiAj0PY8Y?{}DF3^^<XX`>Kw1%$>nN$3Mz!-}*Z0a^{k
z^}wqhc#VUH0qGFINk|<8A>lQVVMHqg8HBeWBgmzA@Ub{ZO$4173PYJ8f>cJJW!vC$
z#<YX#H3I7(m5~N`{Whpd0@p|II!H617P6Kb!hqCAkje;N4XJroX!%xYcvqlxiy)Pe
z3it?6&?%tc6XzgD{Xps?$haJYgzR#K^^ZV(B4}^OHCw?o8?7dicg~S>%0{b?kn1L7
zHoO`_X4rx&BS?K@i%})P>n3D|ZQ&Bz!X?&4AOxwKAoUTXhJujr`UsiffV#^G!i7{x
z@O5`^2BdC+bKwk#IJ`c>$^aGqm7vN9R3(8=I)c<k*vOSG6)VxIB-hGS$h8T)u5kr-
zd?5R(+$+~WR(&JOLfDZ08iWn8(W!E^L-|U`S`KJ03B1N0vc4S>wpKZF%`#>}+KpCe
zlPppum?cgE9f2O*q7w-|s4BEc+Ovm2uaH5xfx&Vb!<4U@r$LKydCq_e5Qe>=o)N>^
z|4c{!^IiZ~Mi)Sn(fk+wvu*`<oo4=L-u~a>(0>Nq2@FCrjpLsQw*MFH22UbDrj{0h
zDkJ9Y|AjAtZbxF+4=S!TZv1Cxd%~bNiJ{@6$m##`*FYOmK=skl|9rRpvs9d8VDMqk
z>XoU!$FSr-=w$EXpatPl&Howfp7Ck_=hpboq6WGnmERC_dYiDxe-6$64AM6k825oH
zBZeFXW-kUNPiQ}g-5pduLk{s~wPj$oW?--dAr@OueI(+`t{lUtlFXo(0IH9K<3RP1
zP$PrDA_j?73}QD}6do}t|727D%b*H6Kmc?BgyDaFLuh@(qYt`Nh+FSJlfqL5mXi#H
zFKo7hY9a<O$+i<z8A0kJ(9s7wKz`>x^k4PHf4#;38SFQKR;}A?Ww6}{8bec^1?m|Y
zEMicZ#%0}SowMJyXsJ{FB99Ufaw%HiR<g*ZWWIg*qJXmL4AM5RaXCH)1_=fZ6;?4L
zY14QUzY2$(HK`LX#xD7py5_&_hW|F}|0^#Ct@`6$@Sk%YxEh)bu6m||>zY~Mx(2x#
zf{e&P>L5sk1z}GIS6lFI(G>9L9HeIiAtCdLQ^5w#gpSu?t&fDRfT|>ZNKJI*zrZz6
zUy18H=-?^tlmGR${%5E?X_YqDI(?o+`Ya3ZT@R3|#}YOx1mQyJ9n+MlXb5sD3uMd<
zLPDp35+^~quznC)b%TowIabanegblRgfa~Tu8$xqw2`YJ{pe0KF62Z_2npXk1ZTk4
zDu7SCM5%@#Rg!*Kvwj%pc09;23y`@Xc((|$oEvmXCwK*&Cg_sZM)=$iygq`Q;S8;V
zAh)%GFK>m^L=aNluNsap>mv>C3P^nfuY=&dAr-F@&@t3Vm5~y78xg!`1h0u;l~KMT
zc)0?+K9YCIg4aioY6wokZW5Vmn====4nk&I=gvbzkctU3a0Ko;LApzD5>ifLl92ic
zlZ4btcu3^B37PFsvK+FI4_@g&7!W~Z5+d$girAcswLZcWgbW5kY6nyjR3BkhNst;1
zjRakE3*LnU8Zbn?S{GgifuvxaGDtTKVhD15<b+f&L&CwXXsJW~BG56j;Nd&-^vPyv
zlT4Fmm?TZtkDI6+1=@C}72aVI(PbIiFJ#jMT3r^i-tz$H$Znp~|2Z!G=eq$qypHc2
zXtf*1+5gO^{&Rw_kYL;cZUfH(?TzI=@Ly*7e+I+33@Hx;+QIGQ31D(MXebXfD7W^%
z<mvxfxBrVB2NlSi7yidw`Oo0c$H1V<z~d}cygqFGYl}<&ZLj}#y6|6f`(I7pF3_=i
zCavx>Ub5~1ZR+Pc`d{+Gf7!183=S{&wElCegLaY%>wr%3<kkK!p!JVU;S0OOZU%-$
z3=HuM3=!Z1b9@*WJQ$e08CYEz*xW#8rLsGN?vh}!Wni)f-EhQY4dR09BQ}j>21W1@
z=z?(!JUtA86B&3nGl=eHka)x(|B^xR7mFI`fNpLb&}|69#{b1lKnQX>D!cK2Zu9@r
z`u`a?9x>RQ^#k|#7&e3U2!ldtH>hjLya%*$o_qg)-h==74*!?E^xyNue}<St47#fr
zthO_lu4m9)4mucCZ7zfEd<LZ{vR;#%3wC&vEON@51G)FdG<UIa_CnRvW!l+m6tmVd
zXcsUD`Z4HbGX$*^ue&Nc^^ew~|I+J0H^uX9`OmQq)CK@8#h(RR133#EUo*knnc#lw
zEO31U>GeQ{;2<3(2pd@(K3E5-caSHHAZzL%D;Xw%NARY>>Z3*fL8nV?0PV45*$+Ae
zm-FO*&a>c)v@ZYWx(G^LTvz^cUHZ>+<v-6wPy%B={h$5xf9-Am85+-+q|Px-n~Qvc
z9Aq^D^7tF1*JF}A#RR<G05ax=AQLB<!4RYq1m{8+rU?^qRX|Yj#C~Hif?WMz1nwMx
zW`ZEwh2lUXav&~@fn4=K>Lz#(2$=!t2SG?k1q3G{tK(qH<XUthph!Es840z*dqa@=
z2u^ARgRV1x)JNdTs2+5lSWrEv9|Z3ZAsw>-s**s*N>@SZAY>9!8L9bJse#upX!usC
zc~^kO=)g6RhIfU!S2^-}IryqMNLL7%gmj1?B)BqyuA>93TX4-&a><1wMdutvmn=Af
z?C6J+@{U=^^${8yQYG2u%tfn=;9Pip1gV}NB)nF_WLW1f!mNzYM6lLMkg^vm37HW_
zu6nStG3z79ay~Q?QXe4;Lh2)I<Z_n^XqAM_fS7<xLaG&bT?R1<!3Ebr5H3U|q=tgC
zK@9MC9mETez9F2nFJ1;ZbSNKmHnLUDT<h%V*4fi7)909_fG#dHhzAW;fU+vMJ~D}0
zqT)A&!C|s!&wY{0po6<v&;MsR2O7H*ya-zD#&-HY>lx6!d+dk)GtB=7niX0CT6f33
z=fBv*zYKY|_*(z-wSw1QO#l~7v%#Zzi$O=HDqZ=naPdFyK2Yf-cK5%;gnJB30Sru*
z;6nzi7!+a{q82jrUSY`Ez+hIxte(QaV8s)=E*X3)pTP0|d`JIFUHmWI0qP$K=z!`Y
z4#oe%I{$gJ{xb-FXW+QQz<!&7a}5K-ECx_z6b$NKG59kuc`>m1g07R`@?zk22O}3y
zp9nOCWDlB1Vs~KR_h66;231Bf;S2(?AjI1Xt&ha_GDtoK)kpF_m{tF=Yy5}QN1~?x
zCCxxc$n?LE>3>es|6FGOmCgV2Dg9xPTCLRv+Ug6rsF(G?f40LQ#C-&`l0oPgXeNbW
zBWNItulF;9&n5<=^`Lv?b(b@!F9g*`S_>J}W-zEu*Ns}_T(H9_Z;pNTOvk+WcG+``
zvlm(yuQV>+Vqbs2sr!QGr0d~}?q_WN;I;9;`?~+SOaJRE`OmlLKie#Dm_xRMfv5IB
zCpgUocZ(q8Y;ZLM>G(_s*By{*2r>i*sf-|9BshC2I5sDOV-_+lHyJ!qHxb+m?gx|b
zeiKA$8n`L46122~W&eMU!=ULx-n0L?&;RGY{GadYf8Hzqxvzo{-!;(Y0=~=txz2-5
zsbW9<UvKAshURm|DYK1|XPKu=HwPUQIStYkg0D9~R65|wh?x2abV35gQUpl7V-!CD
z&VYA?KsSJc&o~7ga|%8`0dy4+r0RjyM`#rgo;nCpA3^FMI0>(YAh!=dDkI1OIq>2H
z<T?n-)(UHaS4JQPQa=bXI;S4os2<!1u8bNXSJfa_MxY7^+!cZ_Kow9x73jiN*kKEh
zY6wC?DkC-U>ILNb2vie!m4iswLELDS5v1yYlaLj3kVOj$E})?~Mc9f3Wu*EDR2jkQ
zBL(M75CWetqu`V+@0bNakn=ntBwBrBn={uYdk*~U9mokh5E5P`LFynIq(yoT`3rC%
z$ZjU<{6(OO3F$-#<SGf?tAfuu!5B*+OUfY?5E_ZK4#Fx3FUheoAY*_;)<;fd%dsKQ
z@EWAj!9zNN?}`E`0M|#LN~aQfA1<_RLhD9>t0X7bITaAE!B0MSDqP}}zrZPffn)w`
zyPO%;ki+`YXIQ09gRH^RkMGrw@70d&(~j*^kL*#8?2(xH$p6BB^_!rh5|~c>X9aJl
z=RO0vAC~{>e~x|s8D{(e)kiBpE9>}=|7V;3pP}(HLkoD1Qrmyf*2rG)pwMzq1tfR_
z)E)+P8CL(dI``l3=znhAL{JZiPmzH~UdldJ-6spY%|{bVN-{9;b8x9~7Vit%^I!7P
zf1%U=1&{uhKL1~$@;`(5D>1GAd`kZX6#h$V|L2hZ#=v%$fn_NJ^AZN;RtAP@28L(`
zhFAuccn0<)2F^Ip^-p{W415WoiiST1w7h}aA2fW&>dL_C2wHT<=L<UATP&7AEEaT2
z6i7`MgFr6>*G2~MeGF3fSQVbIDE;72`@^a6pI7I<sQ!Ox)BmDoAS7V+U%>1?zX=G5
zn*L{y|H;6$n%Dk;$=3h&2mVX!`!99izt+zGiW~mRF8$9j;U8ngTZZHt48A8A>~}Gk
zYycs{^$fbJ8MKx%D9>Y1nZ=+mjX|Q3L9JgPVXaiwc8>II66MDP>uyN2-skFmFER5k
z-@^YQ%l~sM|IfA*JRk<SNO2ChzccSY<HG+;i~ci$5vW_b09-dg#^@lMmmzFOUkTC!
zf{(%>*GG`yJ4mersc0aJy?c?WB*@}PNMC6Zc<c@`oiqhp-_7{Xx#U0RmjA;0{>z;F
zFMsC0&V~Pam;P&C{;z-izurv{(!Kp(@6LbSTmKF3{g=G{U;5^M>C2$yD+W9NGc=vm
zOIc`?vD7SehAF5@nhNO(K?dUBl@Vm{%_4b<X~IN!m&Xh?7H5i7`#`E2WDy7(eqs;k
zgl*Ve0H902<9g8UdVrj0igqVBa(x7^aUcxH&ER-Q$e{`l60$@ALP92nz_UWoItbn~
z0$l?EI(!?tPytryKx-c@&~gP(WdyE|KvPAKaXDOT6`;!%z^DB{=7}KH5QK!x7D1{Z
zC<*BkVbnybo<*vjMe1Or;!z0M)~e!B2uF~k=OA^E0&K;CvTGisGJ;k^&N<4?SrDY;
zlnFtKju~hOQWHT)GU_8p#e`fR!52kf)=H2==iv1bq<R7!A_}RYz_Uy6`Usf;u9Bej
z5u{dvlV}x_WAQRbS&d5^Yc&KH#H^1XBG|}f*eWCFu@7kKLG=!}LV=4zMBwMm!sm(L
z%h%xw&=`<j66EwG*OKL~CCee_vbq#5aw%NoR4~UOZx$pr9kS=xX3T`F3^xU@!2@li
zO`c+uGF>lWGDFm2#-`J(+d=gaXn6k$XeOHH?0<GJ5;*psaXWbF-OB&$`~M4_`p>)U
zKSTLThN3SFwf{k=5xiM)2559n^x}W1hu~!lOF^^wE;s%&WS#=exwBa@FpEiO+fChd
zBe-H3sCr`103Svz2j0;m!Qj|qx%i#X>HqvE{)?RWFM9GnW72;HtGD9X|GDKs^^v6J
ze+Kb)3=EeT80IrDECAO>jSQ^m4D6W<e3=YFxeOwi45C>KVi_PLk;)*J%pep7UIgv~
zTHyd1zH<hx2Iuz!-P0!+!N3&`x+sdLlR>B-ULUc@KSr*P#0)_7k+=n@KH@k1&ujdj
z*XTdD!GBiO{|vm(8Q7LHu&-b!ek42ZKSS?3hT3}!8CMx1jxu;|XRuktV6>dUd<}!`
z4p4n$yAxC&X)I$<UdSLjgF$i%gYrxU?S<@i%QfOpn6_TB>A7h<>7nC-_lEO+m@fLG
zu<*aa!vA~=|MSd8>g>z~jh8Y&dP9pql@Ze-a6JX7iQp>dplX5FN01&6q$YxpkeUe6
zZ-NZdO$HCWLdNC#VfE2u@B%!D7DzQT9XuX559A)kh5y+Xfe>hM=A!=$OaFn;lJCsR
ze=*E{!!YkX!@{==i{CRW|G==~Bh&idT-*P%90ILo5jg`&OD22&Gjv?mOIfIwvJg@q
z!7Cs%2BbcM)I<;xQZYf+znLaXgfL9vCqQ~V5H7N~F?dxrXo*5R=n@&oMGv6kwc|h+
zu<FNl8^9Ql`UtZE!t4`4x<9yTB1i=UAt4nIG6|`SAag_Du27S9cr%2d6$YK{h4h2a
zDkC@-(m#UKL>l1Ru_0X{2np&C!MZ|Fc0iSyUxk`)mAYTGiZ_U%=3AxWU7_k#uIg2;
zhQv_wECcn42-Qc()ewAhE2J_)o+nar$yIdDfsl|YN!d9IQVqdLw8}`qAste7VI?uE
zCrlB@m>wDluaeLhxc1&c*6Be=%uzmD=wb=bYG?RXT*yLMT(uHh9L|7OPw+uMcpZev
zK<OWqV4tFLE?tHdVVVZ7gRnACM)ts~yCKTZsv$TRvMK{|pdX}e0!=CvK`U$fLeRh+
z<jh#h>^Yz{;aQ;b&mpIvn`O<0>|HWVpJA9dTRFH_IC-&Q<3W`bZyb;QSG)dS=`yH3
zVmtGn<@|pR@PHogng7yfL1)d$9Q@BP^$SDMBZkJ042|CyI{z^&293^1-1;wZ>p$qg
zy3L@gUFEL*S6cg@!E`YLgA)UTB?F6!g;!Sfgf%jTzTkCYA`GnjppyhZdrmnSq!JCP
z4l=C#&$17+jg({8e@0Kxh1ycO|9KVub1MJm*Zj{S^%!))0s8_5_5}<)6B)QCFbK4O
zkZ=uyNG*e86$nXHGDuZ2$doWh6flUUFbKpm2!u26hk^FV@&tf-OngBgE^i0}dk6z}
z1Os0?gK!T6-#P}#oeZ)!85Qp`Dt}>B|G};STE!<~^k2dZbYX;$`G0<65E3@}&#LmD
zLF6n0*I@?M`3wxx8Q3PWYj0yP*v6o~mO*7LgT_V%wT;Z0TNw1WF`DdVu-wI9e}KX5
z0E72Y2H(RB-Uk_ik29p-W~h28-1|pj!GF1B|G5`}5Zh8vR|qr>1Rr^u4;}}Dbav)|
z+4DhF0JJx>0L-2b?iN8*LdNJI9Vp0RImp`k8L$cka$nUHFc(riLAphdiV4DosDV^c
z5D|!hu#?`oR{j@Q|6g<Uf0=pzrMjQ<)ZUaWyC9l(f;V%IP~Jh=%A<<4Ck<Q9Sae@@
zn0(uD${qWuw;iY7g&^}OH;pG;)tG))f8G<#1ux9je)Zh;ORoF8Udlq9r1_AwaLCmV
zW*vmJLxf!Epw&l^S_x7I8O8O$8IY<6R2jvh-1Ptvht)@&hOw|E3Q?Vgs0dOMK}h8K
z2vYl?k(kvGq%uM}f*V=^LApf{QY*LtQuSyDHEM&GD?qql64WPxjL|{rBlUn<b=c?}
zwDv)*j6hu>|4K-Gq~cuxsgE>#t5m(pRXoc;$g4yZd|NBzfCb3GK9JEl2&v)$?HQ?g
z6so!xsJa&@!^YwuT_MQa5Tq-FT>Buim7Q{wopRvSkdjj-W=*8vkdD7T!dgio3u0DJ
zcos!~cQY-t&Ru|6D?#R=D5;O&^$souW_<*&n2;He+5~w&6uh?uVIYx9ol3AEhzZCf
zq<%poAqqgJ)qz*>Ih3!o2k(A`ETw}hhF3#y269aVua6+U1N$9%=`(2cBlv<d%iMX$
z2o%)di)QTeS6XK;6-imb6ElY)ZV^M(K8D4w_z(Q&y$nJem;W=J1zq_neCfaN(f>UA
z{wp2-uYBS^?;_A17S5&r#W(+#Is0Gl{(q@E{}~Sb2hD5m1nq3)J^G(*;(vy!7X|^d
z83bb(7!();4H-lX7+4j-eIg-7c0o{;#3&3p5<#=TwEh^wd{BKPeHOfE+#XaPiKzb<
zRQWHg`=3YcKeOZ`29Apie9IYxR<lVjU=p9hAlAho-pwG@3_>!spex$tYC+dh$dxfj
z7c)p^Fo>ryh(t38MS^ZEhg=aW7|OsK!oU-ZQXlcIh1W+6il5;1k&wZE5u^Wn#vsIR
z{GVGNbhkJI&r1f5gA5Ei7#L=P=CD}$8Mvl0Xs!nx2c)!`L3SB~)KUiNWekcd7!0;E
zc%R{mxyzaVQnLQ1TJL|8+5e3f{Wn?sUuy~I$|tdT|3&A5_NB5TS0#`R&TLo(1gUW#
z_0c?V?E|Taz#S#V1^+?Uc0lSZWJBQf5xm}kbZ{Uf^1vNL!Ax)^1=)o(9W)BZvfw}0
zvj410!2^S<Kt~~QZU4`?4P0sVePPHt!w|BK!FxG_|7zx#ZG7nmB}$H~HC;98yJa=)
zk>{Kzz6)OXEqv*}=#}r{m;Os%`!9VRvg(7^@;4q!Ul}iYp+E15{Iq+L6K)vJdnDd+
zLO*qpPSSix?E~413Lzo=AUFvbcEcngl@6SQ^p8-s3xRt^$g2?`^$uEn1gVDLq+x6)
z_~On^(84*?%E%xRa<C7Sgw#HI;cXBG8c9TD)S?C6yQ%{kphH|sgj^qK1vY4)pgQDg
zNIjrd&A&zswucC<K2rCqhE5OpRiM>JkUkNpGV&~gteS%?PJqt`!5NSxb7~%i5Cj>5
z!&)CfY9-`42+}7~bk0$5%2sj&BaC4>NF9VsLgpH<lE_sLRyJfV39VK_>nkC1;g=Rc
z81RY-R7mE{hmi0IDU6QNBFOk2au*5C1{cN9J_3F=*7Xu_K}g*M-w$P13c4yCQrbgE
zGU_9E)dR0(K-Ca<rJQ3a=ujZ|*%ZhM;B1I?@X4{D#eq&G%i+}!+%XWhK}fjwA-8E*
z7A&&JUkC|X$l$6qxSn#%o9|k**t2+rOWtCuoMlES^Z4TCDHm^6n10`G>tD0;|IN<-
zm%j*FH3zzBV(WkI<Dk8+@>l;WT>Gzb{lEUB|8lp%%f)wrX0(}i|Chb`U;G$&aaaF;
z_7(pncY#Q@^fe5EVc@Ic<QN#Zz&#@t26h1k25|-sC5DJyDpS8RtozS@=)cOL{|xPa
z*o^+O82sl{{m-uaU()bDgZN7Z-qj5JYZ;VxGUyy)(Amx)zluR}CJ4#&GRX8Y$agZx
zbuh@cgOFSugG>#BTrq=e5rb4ZgG4HWcpPZXNH_}AT@nZZb(A<m7`Ve2cv={QyFn&P
zZfB6a$)t3TLE#IF`fm<x(1s&Hga1N?piQ_UX8*+u|Fg*dWDq>hz<!j0YY7A6YzC$t
z2F7j%hDOj#7E=p@@Kgr**$l?(8GMg1B;R4EdCk}RhjZqCfd!!M5BI|VEQ>)1)B{=!
z9%lpJatJ*V5K`$tDjN968>D`LkdPV&JVeH@08~dXE(DYC`Uo<+1X)K1U&a6#g@cgr
z%|w&IYq=rS5TpW{3#$7-_1Z$vgcQT#|4hsNGq3v3vKn+iAH(c_0<-_{&-$&^{(?8_
zG=u8`2ID>k{Z<Cceo4>i+R-Zwb2iym9q?>B;@fp1YU+jLc{ei`-G-pzHBYOyysFyr
zs(kayvQ01Y*FMc%{kUM|liXzw6BgbLo^rvd^RQm&daJa#W=S(GlBd9{9%Kgcv89lc
zOCd8s5E9bmfv(Mt?}gtk1gVc8vdA?NgblBB^kci><8qM6AV`-7%x;I*N60-OI2&B=
zK+aMCT>t^8kC0FGF$iyiEZBySdZ8_F1UW(hPC_;cK}bkN10lg(p+>F11~96FFIRxb
zs>Aw3kSQVPBoVAz1nLmMY9i<ikzWPm*dO?YR;;V$;B^o(1H4$S5MB+z8PL%==N!n0
z9K13DU4H`>gj7b5g$nTHa|+Ix@=h7ZNX{`G8OhkEVy%hr2qITZkP#usng_I632R3Q
za^el7GD0RH4R}bI39pZk8Q{7JS|34bB}iclua5{a;H5OY>cPrDtB-6;mO%=6Y~&KG
z{UeA8$RxZ%L1w_uKS%D1!RsJo23!|JCD!@~;yZ{V;Uk$425h$%=x`@+FhWn*aLk+U
zl)u0wZ?PNb)~D6>g{x(A*GgutWJp}ekiMRw?T*M=&?T)xNB*<#1>MBQeH?V5fY6Em
z66gNQUidF@8nnn6R8?&T^=>3C{O3Ii9!i=5T9wDX;=kJQ|GMY?XP^1+zu~KnXC7z=
z4FfaygaH;XDWqZ^DBSYQaM6Fhga1WO{O4c$pP~2#pVfZ`o&O9H|2b6ti|YPoU^vad
zFp)uQ3!BX)cH7I`R!12$HZv$JV33~8AlDB<3SA5eT?~qy3<@0#3JnZ$bqw+)3^IkF
z%1Af?)I}1E098i(;7KLkKyYOg&cNNwAk@Xcvx-4t8-v^}X65?~@}HU2ez9u&=h6jL
zMnd3CPkaX8%830T1LJ-MhCSfxStm0vv@kHVGjR0?Yp#~_J#Ul!)~fBl$CUr3^Z(l{
z`>(P5zv6=beDnVE&i&60MxYzpA(acH?tqYxE)RSl4pK2emLfnD&j!y2!RsTiN@!)Y
z5Y#yW)gzE93^K3=uZ$qWbdWJO__Dgm;FTwHz!N}_j@bPFObh>Wt^kd~32gk&zu`aI
zDo}j{I#ze;UxrzK*;f2#Xnw-rx{N`gk%7N}L9UX)pqs(0hu34CX5@0Sv@MP`hy1%P
zgipB?J>y!!>}zQY@8m4Km$&>u#l}~SyFT}y{NH!tU)`<`RomW_ZFy6;{(145=S8cZ
zr7eFPH~+Rr|4H4Fb>_*lFzX{kKM3gzZFn67uYlkTNY#T}AAvRv#q~fcAhg8^`mxyW
zT?JP|pbn89=rlRVxu&2_5ac``w0j@G6%eRCLhb>PQ6GWF;y|ml(dr{`byKeq*q|9u
zuNeSc$PJkr0!x7g=pcO}NYw)&A+-{;Lj+#Jtp;8-2U@}nz8C^t8G*LAg0IGd49!7$
zMv#6Ga!mwb<EoF4N9rI|4}?TiJs1n;K+ETlDkJ1-NX{``#y%B-Aa{#klGflgTKGwL
zeS};|VOC6V5v!c}Xb4s-q3>^kwBeDfBzR{EULQdikW~{-g^LIx$N~xYT0X4x5vCxd
zkcSlb4n<2LTri2{m_EniCD>GAuhbxQ7QDKFFpz5^csB|n1@8?ZGr%qZnGbgx+yM{<
zWS24AU-0{$Tnm@E7A}K?hGXdp+rlNzg)3Z(S2-3hvCN*MoxDgZX`yiGHp|&}w0Hft
zJp)=91*&nj|7Tba-aoq<w4|AF_kYf#pp&{m+m7b_1D*9f6?{0r2GCL7d?!Fx@=4wP
zuQ%y31G5YG&}@G2nWgLu{J9G0M;WI57eDY{>fC?Hv;W2G{xkT377}nMfiAX_H2*KC
zzJ-CImVv86(PtM^{7Z&_TMQ0o7!3C^D6L_TUdo_29kf<WaUz4t6b9vPP?e-q&mddH
zAeqJ>ngr?!v3r9~Y~XMMk=&jPyxt%L>Kwu9Bi<DZ;+q(xZ?GudV^aLYq5gwU`@f*x
ze<_py;)eg36kjuNUu9rf&A_mPfxVYOYz~9jdIs+!46&EQD&7iA|0})dzu@ZsLTmnW
ztpxXg7J!$)%?6kGkUc@O!6K0Q0$#JA)kN^l5o9t5q70&C&VS~){}~|_6SzKNfOL)^
zOY9bc2k{_P7(^GOBAWvmc4L5?-~?IK09g?Msq#SQJN;vr_K#uuKZYs48D{@ynDmdU
z;}>i5SLUj>OqFlgTHZ^~{O_{!zw565%4Js>R9hH$(;37Id96D6th+=#x-?_v*%z+$
zuH6~dzAvu-Sn`yUsnbp;O*@%B`&{Py%Q=g#Wi7s%GWUGU^pkNjPp2)unzj69-pbo~
zD{tp4znQcAX7=LiS&ObE&N=5)yB>0kwn@?iV_4M#=@UU}A7m0z>6pa!n8x**#P*oP
zf~JJvH4X{`X`QxlOt*0iq8|k55W%Y+2*VI#L=N5$LaT-#=lMWLNKFK(gV0EDpQstU
zr?puxw8;RB@ZZo1Ib{yg34+u+5E60^5!xsm{7?l*H3T6c{U9~iIX`eN<R%CR38{}D
zJtO3)B1nA%seRxiv>F0mBM#ovin4@T5wy8APr)S*(k+72L=Y0KK9YCLQglE<jv2^9
zb8>d6m<W88Jm_LyGzMl3g)9Q8r!2tNf?H?JMy`+GY<M*UXF%=&N2`*oa^}Owk#I4<
z71Kg^l>}iR*GG_A2~Ii{E`qZm3@6w*dyo<sD+#&W!4`H^AEXS2l<L?>lv89KV28>=
zDhP+7rI1<!!o?&Zw?{#&fK*Hn5+Z_?gx5H5(~w1w*$@Q~5w!XUZnA6PGPfeo0iI4}
zE3NVtf^GmTT;W`}!m$|C@3So2V3f6zC2cuZ&RT}blkC%;F>C{^)D=DhS_{Xz=Re1O
z(4h+aCqd_<ieCKBf8amE#E%R$e;E2ewH41EkgUL|{|pB~M-LfIyUrjU0UoSlVqg$c
zP_q$=-0s-%jA04r##s5w{~5RbX9)ezVDpYo<3F$Ze_ikYiuV5*m?tnWbPC$6@+f}D
zm;IF?<{5+gWd`%Z3~Cz~<X3{$POHpfP@T%4HVs-IDb_KF6*KTfGjN15FuOA_I5IF;
zGcedNFxoRPJ2G&2fa)WjaL}w4UmJsHFN5$pM(J$~vbWfjA27&&U{U_UB>#g!_8WuP
zD+aE63?la!<X-Tb-BtJds-OAZrS8Aa%>Us_{yQ%I@3iE<;)?$g^Zzr=`NuTpAH#f5
zhXu6W08-{dsu;-nw*~(h=YtTa-vOzKAS9%IK~@Rr>_B=?u=<E`Hmp8^)HO);5onbi
ze4!nhnczx_asGeih5uO>{byMSt`<QD)BR;w@Sk}Rs9VIb1$0vt!<7FF<&PPn&oX%L
zWpLRJI!(fT6GQMmhTt6x&MO%7dKu(uL02gW=ZZQ_RP>u`lD^WlVwYdTp5WGlLCw3u
z+V@3t?u+R?lrZIZ%8b()^DgBrxlywAe)X1TRa>8zZ+eow;#TIe8(B-QXDq&&y5LgM
z-1CVu&&EzY88!8|Yr`h%<f-QI6CvACAvF=&xE!P!f|EqnM~2{g?hIo=l@VHf1gV1{
zCnrGI$h8lg4IYsLjm4odKz$<cVW%yS(@h~Y5x4>Z9jAcA0M$g0yIBoFoAijd6c4#F
z((tc=*GF2|=7u135S)bcgCI*3AXO5mPXylCid+*x>LWE+&j_+s0kbl~sEHs)p`z4B
z;LZ`KK7!Oia1yODf(+J?R3FLOrl3_rXk17g1V3F0%mCd#0zLw9wsqEAtjH#N9uk@Z
zuc2TJgsg4Od@ur?4gs&W;8RY>)f4LRdJFCI;0XPEa!3V)i-go-$RvTWzJjHod&~=#
zBHveT59Y$fAzX+8<hsDIcp0Q3g9t)MhzJ@9sV0yGA){>&w?V}Vmw_%%hxO5*g5Z^P
zaEsuAa1qFAa0m&Jg|HzcXyX#-I@$%cuoH0|!3W`j4sXw2;8486rF@NH{&J19c~ZG+
z%sNjPuK3`8?7!uW|ArU-^PU17%F2HBKj-=X!YBSStozRZ9;{>8{$K1Q=tw&Llc4D*
ziBtcD{bw>TI503+F&btw<Qx^6_=;ifABJWBIrse+Ir(3C{eOmvUpxl?nbiKXD*hKS
z_^%uEpF#C514AZ*#x$nPXTnwA8FD`|BtB&bxXfUCfI(*!gW3`X)!87VIUBUSRHYfT
zo<S^}f!l|H-5z{ikTwHoZMg~qgBk;q76>tzgU+?%jb&3WXOQk^5Ncx(oXa4(5VXop
zcr$~{K?aqR3}zP@obED&-Dk`B%Gdi}cFupPr644*{6EiP@Mt8YmVwLxL24iH0t3)`
zYzP<9%>l1NK-5HVDI^AV*?G`D5%<jhTr>W2&ic;*J}C~=OM=ZJF)acOwJ|LO_d_99
zCBjIiMgN%>|L0o{TAa_l_&@u6@N{0^Cx)4y+1CB%KJZ`i+<)0y{}t~3SGf6K`ObgM
z^Zyx^{A0*^$l$gYbT5YCb_U%Q3@Y;(q*@uon;7Kl7?hhBG|Cv%N`x%3^g`Nfk~`gU
zCqvHJ^Q~AN-n2EceNS}fzUaQAiBnFe&c2wp<W~FMkFzfPUv%^T%nSePw>>Leb0>fK
zmGt>%Qs<qEpK&5?!oiTX?Y=c@or@M*rc4E$pODyV1iEOl57Hrm*FJCtO7|xY)H#B5
zk>Dgm0i-vCNkV!+klF`Q6M+ZiIt(Jw5pwMV+sE1pzXQTBqSYY01#;dgWQGV*AL)jH
zt_X*adZ8`)VXcsw2ucPw!0RLAQ+*&65Hbm=gOEw+jd=c*S^-s>Fj5nCArYJbx|<a|
zLj<XP;FXao>~saxK2fon7YISN6hTN8*iItM#oN$%B2b42dQlO$G6J2q0I!B1D;D6D
z5qw+@vVu;*J`)+q*`^~SS(`L81UdH%jYO-Jz||1wI$$tk7UW7SYw*>LSnDH*AgoHl
zS06#tz-uB1gQWThwVpz+o8Sx1;nfpz-wLx<LKcCrA(ak<gw!|?wtc};+q}iLd5a-j
zh!ltfR}her-=SzJN`-<d1yKW00O^`R*bowVf(*)rRDy*l>)s(&K#YQu@VX4tO)Fdi
zsdSuR8sS_x!xnsX2)v628Q8QhTx6ZM*sgGydFfh<vUSpVYlYL6GGuS&s6WoU=)LNZ
z|KeBvOW*j<cIH3h@&AIy{)?UdFLn68$lm{gNB&Em0o`-NcLH<|EJOcO2EGUe!D#L1
zsX245Iqd)Mehf5BC$#&&$VpHM$ui+TL+EuDjsFbFpko=tjsJ7%J!4?l%D`2j?6XU-
z;;UfASB9c*4B77)BJVIbA7e1y$e_6#R2iwxX3(C?pgxm9p`Jk?A9T4Ms83|Uz+eJ8
zrwDY*o+5brtr`QPCIf>hNL(~k-D3ue=3EBOS_Y;*P<>={Ny_JqV#<Gm;{S$?|E;_K
z8_oD{KKH-!qW?0p|MSiK&oli$<Bb1IkaKvz6%D8>1gV%H^$Wb41M8<i>KX_M>KQ?l
z!AY=Q#`*u5zzj46%-}IPt{Eux5$MVf$QU5Z5Kw)z>_5|r|BP!uh+!q@DkApf|Jj%S
z=UV)qeF3O4VxIk<q5m`U{Qq*t{%c+OFMkDuB(MFKxC+{XEPvy_{#DR^UjCl{46#=j
z%=Unm3u-O_)kj)W84PAKm`!CcpT=g@!Jt{rAnDB@=*%E#+YZko(zlO^X_qaD_1
zk~G~ucaeM5M!&`#!JYe~CmxTVdNOtP`OF2Eau#3BS$sKT{`vIzXVT`KPMmcrX6n(9
zw(ZWP%WQMzf{sN6Z)-IIU3uJtwK9TKG-xEe8Ul^lA&tmEY9dJOgGoZ_BS=jIAq|k~
zBjlP0!iF5A0I!U6BSDoBq(0Jxoi7KjdO+uF>w>E!9oV`B9q`enkXyoagX(pI>LDX?
zkopK-2Z3(E1E25%?+`)8;vl3t?3QrI{ooK1vMd2kLMtHf^bmp#?ioS$7NOQeCE(r=
zXlE<to>q8G1Ysz`R&;}h=1^)PNPUE>GJ^DpAS)IWA)|A+>m#(92*QP&TZoGUS3on6
zWsxt&vPho=Ii43%0pTEL;~LO|b(cUD7NjOZBa!<@$ZWgZ1rTJPw-8wz+pN=k@K_%*
zi9YTJ*`*7qeejd;ngUZ9M9LmE)(5E?;DY!Wn5H3%K=j(?FM?Db5H6$=gp%O9dLb1P
zRJIVM&LT<BwrBwY70k0QTHsPN&#7RJQ{f`#qQz!;OZ3v_h^8!5E?TcR^N!b{|5_LR
z>s|RTbRN`QVm|>Io#Q|IpZny0#$%uZh~y9bXGlBBpf{ht;34~>|595)n<6=n|7Si8
zI@3)3_<xoq|G7N>GwA&kQ2#He0h+iG*8R^Q{g8p-D1**A$&_adRbLruzB7QTr1uO7
zcNx4-Fj#G4&|eMe7O75W(CT4OX=9K~Vqo)UU^Zl6&;@mf7!<&Jodm%v%q789k}7C_
zE|VF9PMt~B83wau3_?u|`in)vPO;a&lb8VNiSWz?54(ZK$`}`cXJ^1{(1L^o;H7bp
zt`Ma5fz=Dpm2$9}2(5B~a*<^r+95R&oDE^X&7Ak23A#SvFT>2g40HZ6%mWR(aW4BW
zy#Bw~rvH*#|I6(7FTLl#>^=|zF?Rk}-v3{A|9^!;pxQ@t?|)DYwHS29o#MIwYIpz3
zUimL@5rjC;gZi^P=l}DZ|IdH+Ki7%>oLm1hO#R4^dWFG#8-w{`29pI0ma|x#7PC0d
zW3ZUaq*2Mh6U)F5%)k%;IxUbXh(Xw!LBxkaGMqsvfk7vO-Mm=9rBTACQ!aFZQsfl1
z_!*i>v$fM_>u1k5&Rbw!w8Xk(iCy_}*P3-M)oUH{7n!C^H%XjgoG=-(bOCa39Aw!x
zWObZDY@cCVKV%;(a%BXmi6He3RuZ!sGK}hk)IOM`L1Y^Qfv1E(qjQE4?S>KUun{>>
zR|s-!f>uO}Rs@Kv8`ca#I-#IyNH4TSCm4E=0_eI25VrwR>A(-%)(xuH39N(ENAQ{m
z!a(i{LD=xABFI=A<d$$qO{D2x38`@)l@Wxk2EL>fa#bswRProS_9};r&Oz34Lx$$y
zogg?v*}VXn4L@T6zK9OKr3f-l1SgRzBec#DuKEbMPXuQ}Dj*0cW1R{?kYl5eN#xoG
z8+#_~2*jD@X&?lzl^}H!oP<?Ri24XJ>jWW@D<fn!a+L&GPmV@f=gxyvMi3IMKEmV@
zQy)P(O~hAB$n^z;4KMQ{GgA;QM2cFZ1Gq|pm<+9nAY+N(+6PyCM5^u91@o*6=0U0?
z`=SL7pi2<uyB5!PDPH1Iyu`YArFHR2rNVWR8A};bS1{ylWSI1rf9rpNv!GTt_nH5k
zr$Chv+mZhaNB*;)`46g(=KPked#~K`m3!HLrM>?-kN;;q{GapUfBx&B#ymsfSqAk_
z3_9P1H2;g~{uk8#&m#AqLE;{R_+19?v&wlN7%IOo)c#<o`N>fHfidkdL+}L#$GxES
z)oL>s6eck!G%<)(Fz`n(Ft{-=XfiOUGcd@5Z>AFf?QdcbW?&FwU{C^gmrNM=oEV&@
zxOU$ah&sh!wv63#uXOB1hLXp;tzQ@>{AGb0TMa)Q4pKWT01wk4Pxrvt5Csqt(r<#W
zv62vTkV%L+kU1boO}Fqr=p;JuDhRgupgs=g;{RMr{<E+A&$r>f_?G{Y+y0Ag{Li=Q
zKhFvf;#~EgV<iZ2uKF*r<-f{-{~D+M8(jLYd-cEi`TtT!{xh!y)kmzG{tKP}UA!oA
z^}qOy|6+Ik3*Y=NdgH&)RnUp+d}sd4ocJ%e_rJ>G|4fY^c!KtU>Lau14B9;mO0^7P
zg$(=&p!!HCn@PHaO+KGTwUk*cj!89^K{=6GJCnnrM98U5+P_CBY_fLJT;r^zHl^#F
zs<*f`ZF8*M=v2Gjv1XlP)he6PrG^=^G!rIigm)<iwd%+88^%pEj_Wsxp8z?u2T~J3
zZgzm|4??bqAWIRj)<JMVw8{wH{lTn>AR-2lZQz;+GAjhCk09f3kP$fu38{}X!<#k3
zp*4|iSTlInDwg_4E4T?>8NnGkfpt28hy~k_nh4MFK9DL2QXfGkgEU|_5TTFD!R`%L
z_AZ6oMg%3n^%0~df{>7PbnvPN&VbZMkZK4*DuTO3@M;KoEjMz#1F4VT19T7uXqXOm
z1|6h-1X;QOuYDj4NbLhBA$1TkiChz5vLW>mTpY|rsgEGX#iNn%DhbS(38{b}Q&A8S
zB7#XGS4=k9bI@uc2p3!*%>$3=fqF?0K{$z7A0dl?4=z{$ThxbTXD+;Af-^`h&q-BJ
zd?o0RzsMmUebGCq))7?iP%saIoQf9ML(x3jf;lcl^IVGNITp-xESTw#H^(7wj&0sz
ztLz1m2{Y95mPz%VvEA@q=hT1Wi=e~0g}~=LGavuYbnHLtNzk3Mpwmb9{^vXas;vah
z{Wm%FpJx&HHdwVU{A&OCRH5g1vnl*#5dOd*{D58k3q#a9j)MOTmES<9;sZm$3x<??
z3}F`-T=p^;uVK)f#30+oAfCj)6VAY94(=I=gS$l>;PvE;;9FC985jgWdvY0+8Q7gz
zRN@(;wktN>X9zyZV7Gz6XB%tGF@};`%pLC-rv71`1zuteS&as%ARtTT(CQkrIt5bu
z%mc3(K<)=YCXyhskg5l+5pFm{1hP_Y(SOFJpeZ4~mH$Q7|JT~`Uvc$+jfLN&r@Rzt
zzaiFnORe>aQsV{1+LMa4Cyn}Um`}cIGwX@%?59qPU)j!kYB~F<#jGbfv!3eAda5|@
zo$jh%;=BLL9s19)8?-x&VJ(>44!XCO?Kr5-AaNda1Bmn$(7>JSng0^U|Jxq;ufFQP
zM&2(5vqKCb8yUD)FfcD>U|h(+y@Ww>6NBhs27!GH;>#H1*Rh(m^4aw%h0NBCUuIFT
z-mPwLK<~-OX_pe`-%ML_FL%wug7r`G*FDTz`yg-K{oJ+p@>btXUve#Z_W6hjM}k^+
zfkuGSW*R0;hTMe*sgEF}K^*7|A9#HPsfOSrr0PMdcaXX8ItZBonF)f7!okQ^z3>)D
zWdyn60hxr<M38<E_{h^%NUa24G6$X*0`-g_+lU}%`#?^XgI7Zk2E6t`tALOP=-{Js
zkeW!tzXrZ$8*-L{nqLLH?}NsGTpA9ohCp``6)SlaL%5Kd2s}VnjCR}{WJC@^LeAgD
z)ggk^J8%+G?_iS9nh3U*TMo9RNWmc;w0sU}!U$dsK^Vx@khE0_1i|YXq8T$RQfENA
zNakrX(P|<H7hDa^hLJNNbr3ERSFL27H5;;?9F4TffiHdlt%ZQd!s{bk40x>sUe31w
zLgKHOupT7=ucy!$Sj%}dH578~^A?g(ACY9KZT=#Mf_e7&b0Nqke=Zo!w#lCj8nP>x
z1F4Uk3g$T#%(Kl~>`=H&EqAG0>Rg7z1q@j$89Fa>ul&q?=)dS0&>_#<=l?Sw2O-|0
z|M`!DuG`}}`CsJHf6>d})1*DOG041P(Ecf*|6g41Key(82AO{h0&f{OpD~C$6fpS9
zkoXl;AC-M(DE-7x_>v*}F+;*FhQQMdPP-XQ)-q^J1RYGu7s<fn%fO%m?g0sc`$Q}Z
zpp8xpV3Lo4K?FQrr^3MPCTd#1lzc$6>@tJvb_VOU48GghVvjSF-(~Cm$T;Ia`#jJ(
z0ni8=q?&+`^TB<VIpE<tNaX?-hcF-{L<*u9vicfQiy_xg$b)_h!QHME{~1^PXIlNA
zeeHkF_5Ve7{FmDIUt#xu{#E}O+8!`u9%hK%z!1NMA$1!=<}Q)qL$WoeHCrzl_usaf
z_P}AzGp7ZwJeGa%UH8>v<p+mFZ|oPo)}Q-AW7<Qp$&W=QK4h5ujB6cer<mNu{~SmE
zvmW}-Z~%0=Aj9fk4D0?fZ21F1JO49m{Kqio4@2KOhSDbt>DM^?ZZX)LVAVXvtbT-D
zb}xh2HU^>9ASAGlLFfRp+EyO@eNtYtG$U79=WcN=-{;eEIHc!v_>>Frb8e(8x|6o#
zUiz}zxoaNguDPGH`d-eOyLs#G7p{9yyx~#Vx<{pJAC|4WmofW7Xv=o{yoDx-lZ?Rq
zBWN`Qsf^-!^kYDmq`<2oI0I{agk1H&*|2&C)a$`oA3-W3eee<m<jTk(yhRr_D5o3N
z3_4yLbpKtGR#+3b8iJCLnn(xKA%dQC3fsX78k$2Mozo1cMXQV;6%eHMf%JnQTZbTQ
z#Gwk{Bj!LS{J;j~K=*@#+3;!zy=MffjEWUKK~)m+ovrW=k%~tlq#A;d@ZJ!-KEgF3
zhuk@W*GKY>Sr9h<`Uq0>z)9pfNX9x9QU@WE<|&Aib>VEhbrYyQvPgxJkctV7gw#{8
z(YzV(nh3H?0?x%`pw&%i-6b?GWMq%X`Ut6>g6@HWS4Q|5kWoQQlB9B;DvBXX3L(8D
z2ni8^kZ4vw>PIxMVfq3h0#OEG+vUx%&7BSD0omovfg?zT4poyo&jEa#IOI@qr~C!>
znF}nEW@;qQ(@9?>)p*!^-UHQL|IN?**F5)M8+-w}#D)KwXaB38{;$3CKSTZ-2K9do
z3g7wE{|jn?=8Pna{xhijXApeP!1aWI|0RR!TULkv3|T+fivBZ{y<;eO!;tfYA@wdp
z#3csL!=StEjFy46#t0QMa5;e&D)509lf&+>L(qbtgCH2RKu5U>M2Xn8Gh|)jEWXX)
zw1dH79fR*qro^)h<@Xu8J~7Pr&kV10AXN;c%7C!p)e5{$Sp>R<g8@RqRYGbdNKFLq
z6G0Zjfk~#N|5;c3XITkC3@iULt@+Qh=0E4k|4JMF%dY;fxA3=M%RR36%?!2^8H^j4
zocbjLXXvD?wk+OcSFzo(e!EZCp`eM!!ls@|S$r*H`K{bF_bax)D%<|5bQ|axgu=Ct
z^H)92UUolW;oaDIw@fGBHJfrzW8Z(T2cV<2<xl<B-Sb~&^?!wl|M}|wOJ@9I34Y07
z^N_*dDucug2AMky@^2Uv-!Z8EWl;amrTU*q_B(_44F=&W4BXopIJPjz&*m^#!(%&<
z-(iZZ-*mnB6;=i7J?r-cbRG)pKM^<MT-t(b`73Uft$$dy{$cf&XKe>Rb{+lNcIbW0
zj;Ey?9u%*;SGeY0?uuKv%WkC2znC=TsAu&WlcWg-aXor*U2tR&*QFoZ2{}ap^AuCa
zngn>IgT_FvgOJ&J;ca^1ZIHPk$YrdM$_UaAf>%cRVW36Y`ryh4JR%1=Sq@eofv#l5
zJYxa#d^zL}5xg3L_lC3sYPI}p;0SUB9vTVVIs~tbyg{8KNCgBYOO(A!zzp~p9jH&F
z>{*0_iXj)aDti{g_7i~);)c{g5E4=oA(QYiI!KiS?-Ripkg5kxLhB&-0G(r&oI@rE
zfjdXgkvh9nIlEMNO(biRMzAuHvP_28LAV%@x(O}|uZbWGw3-OQHBXyqmO2AcADO33
zw@3%=Z-O%{(r1{bO~<T^;3AM($ua{v&<EjSl91{NQXk<Wk*cA&FgCP4Lf!@iDUh*}
zuo4(FHi*ndEC1=hh15I9B(i(Z91n31njo@zWHv+@R0LcH;i->obLN8wOXu0=%yr0}
z=a@Gibksx6Vwb$7#(66>lIJlbEn&@B$53;eVbWcWwZFx7|7Y0)ItH9|<u8WrI}CoO
zLH7Zv{AM=&FRJ%n+~B{M0jNr1)&9?*_@6=gD~Ix52Fw2f-k|!3Deo^s@oR>nSD^YR
z@g{@c2?odA45lj?H0Lr%S23`sfh!{|@V(xwh*}8BVg*ekG0QP9XoKn_`AjA6sT@VO
zgle8JxbBA3N6bm*7^)tD>Z9rZQ0gOi6$7b&AQcUyZi3W7ka-^n8?G3x3?j1RKkF*!
z2|S$Z{&TJW&$IqN-^Tx(>;5w>2aoUde_`nR2tuXT7=pJj7<Mx#Rx_wqGw3%n*miMv
zOjVCxW}3Ujp=zgp=ZTO>r($PcPF!#`ecA2&bq|X-J+0gOq2u_k_7lGw4}GoO`Mz@N
z>!P*K3RXYKT=gt-)ic+{Z!M-hV3_oPeeP?9X>S=?ZZo7`VTe1;;BuM4_z;84CI->f
z45GUjgm*9qTwoBp#vpK)LF_Su*gFQ%w+tfh7<isB2wi3nyUrqhm{aBir{P+D^UVzU
z%>s^7wPTjr=56<HJrdq~CT{wL<T+Q9=3GjdcO`wnrL4tQvld^?UUIE)&7I0EPb#)N
z%v*ImYw6XT<=1nT-^^Wct7!Gz{1vw{XP@zJ++>?Q6Rkc1S4N=vNH-dES^}g3f{>6&
zAH67)McT;qj($Wtr0PK?;ewF4AxP~5sfi$@L3j(KCW3c=;FXb1DCp)pNR^}!(gg1l
zK^TzANIR$z`FuI#`UtW_0n!_Sl77{Y$_T9vLgqs1BT%;peD<k|cLk*OfsxSq2r?)K
zCW{qODx)HJWdymiRmGzSQU}3F<Z1}ASWW?UUmm==fmB1##R=d_2bqml8G))HNM+=Z
z2{~y2G)xDnjBHa#t&fmT)`hU)RS$MX>SXiO$rfo-z-W?1+9Zp#DVFI|!CZ*o6!Y|{
zaAcM`4S6v+q;A3_;gu0G1G5f77J<}9kUhD$>LWC9E6`D*^DMKW2)RnaWTTYQuoZpK
zLOT~R2?bF`(%>GXvVf2fjmWhOel~1*256Na+-hqK=Rpjy%UxgzM#w6$vaP@y!N9(Q
z%+1Y1zK0QX(;xVbVAwe4Jgc0!HhJ@H^Fh_LZO(j1kXmQWwauAtmA%L^YoT7+9KEzT
z3OP$v3RbEVZ%`^+r<k%vB6_im&o&{aH6rFugv}mv=>F%@|IcawLcIF_dG!8sYyTHC
z{LgInpU?9@L-JSV^q&lQ&l$3xFeKe%2tC8#zKg+PHG{!q2IV#e(Ma&7S2+d-A!z-B
zLUA#$DKN-HF(@Z8NaQexWijaXvxV(osC>ZR`GFx|KZDgu2JgLGiI*9wo-lNNW|;a9
zw4@DQp}>1L$P3{hb<jd^&j@V}2r|>O04%i#G!4YC7|dSypAo!SiD%w_o_YVJ7XKHT
z{a<j>H}M&N_~!pt+4|r1*ngd^|Cu}&Fz}`@Fobg}lylhhaM<*S_;zR|&vUO@AJV=j
zy6<T6w6p1RFDB1Cn>g!K>b!G_vrZ+?Ig>Q!Lh{^;S<7x^FTb6$>`u<IJL$`AW-Pyz
zx)OxqmfQ@Rb5SepJcI5u29bjdQpXwOUofbDU{L)B+Vd;<mO=C-gWyF5u2T&B+ZY74
zGe|6FkXp&0IEPVXKCj7qQJdwmZu8Xqm#g^CQ}&xH=GrV^Un}L?Vwf`9rEH}~)mpd8
zb>4NGqgvL*wr`ASSQuVCC$yl?H>1rZtHrOR&!>2TWkRh(O0QSmbeG&Yo<$4Yt5$f{
ztnzJG7umBbb?U*;){Ryf(+uKzjAJ^DV>*rEx{Ttwj3Mi{!Dl8IM|B!Sc3>jNqBsZ%
z>FL00CFGh2d>eQhxEg}q39cX345^PGq;7a41nGn|z$+urK?>ltb2_0dTCkf~H9|lY
z6Qmk~kg&=KbeIq1OgVT>q!~~PIadKv2SLW=G{H3ya=imxI|p0T4PCkbKj8<wW?R{-
z9E7|}!Q*nEeh_$i2+C0OEW)UXij+JGl{|`&YNA5${#NL1t;mZOFc)$|E)a*`8xCQ}
zI%c6Ecx5E#kO^T!>LVGubU1?4N3ymlAOzlFgw`_xjnr8uNh2XCt3(Kb)IoSiNUa1{
zU=FU2VB|zdeS};YS)@%dOGB!X(xxNVM-Vn<eFPCfu8fe`$kh-U8&V(PBGKw2Fc(@G
zL5fgJ60Ksg1sBwqB&=k{QZYfQ7_7xUrXa*Ra76$r+X*rt^%15OXd2-Tw#b=pfpJU|
z#8Na8rUiNlBi4HuVOpS927~AF=7Vob2ZbuUKC;POVwML&^4UuyQ|AdK&J#^pD4(=S
zIc=Rz%rX7=lj>gI<(=O18vSR}{?DNLpGg-~83`Kym$dvZVF9X-xZVCU#J*=p{LGN~
zh#~y}L-ZvEpTnR71WcAQX!kM5HZTZ<F)+D-SJMfjR76mYAcv$euW7M_LmiKGKcDSH
z2LFv>X(!klUvRa&2GvLQ>#)~HkWLVUgj70^P7rL*DQJui()|HfOiW8a$G)+y{Li}b
zKg$YmO|;-Y!^}SnQ-6RKiZA=my$pOqYvXH%^otBJM;OBQGeqoRh&jNJdx@dpDMR~h
zhMWTo8ubjqSqv=EJc<>vfeYn>7we=hu&>zc*SIaHbyrCHzQ~@#;k^e#y7om*Iutkk
zc+AwJY4gu#FTI(!;!ghR`+2MGWi7dtw%}UI;%kWuE{DxJ9Wdpn&!po)GtR0coCMv<
z!LgM=_yB|a3r4Na3@U#aWWIvVpb&Zn>K2JzW0gC~t#Vqx@PLTv5m|@b%5DcW{CDXD
z@6(UiU=+PsKWe2;_%d;~776!O-Gu2D*^A6FW}BwZGESRqSGY_gsD?!^f<fMaLDY<a
zO9O-iv>0T}7-TFNxHTA9bQw4dnY5#<QzqNw&oxP(YMeI3G-H}e$pV*>1?H)f^+Bg7
zfa)X2soJ396u^~{A!=m=8*PKG$A(oykfm|0kopL%GJ@AfkaY;iq+VzfXk0G5Q75bc
zg3#(C_!0&6;6_Mg1nD2asv*!d5b%>u;q?)`Lj<XhAT<$WZV0U=f>cJ3t`LNT*GKT(
ztSaDTa>`!i;7(8pa!sV<1?m$a*GJ%Ls1Qa%>mzVYgs6|Oc8lQsBL$aSV(KFqcs~ft
zKvX~unQ*qWZ5ncYgj^FL&l5>oCqXJ8WKz;H0dmGWG6^{W9$o>VGg2m*rA&g?N9L&$
z;oT)T!wg&{nL%o$wCSL|Z>iHDB&0sVBq8+?8i`ybVX`3=lUc@0T$K^BxMenImy=EQ
z9E+^E7FlyKt0A}utjL89>%mHC(Ao)PHkvYc{{p$j!Kwz?5K`H8xeH*Uj_~0}C<8K^
zhAay)99|z;f)CAr2*OEhE=1Hv@C$+=hNF>o*>mi3=Gy1XwF8AM=*$g^tl4k}K%~G8
zfI0Sg3vIIJIc3gw%A9YXH{BtBhHcg|i`0eM5qnjGwu{<c6Sls>uK$PM{J(%H2r*dv
zXR-ay5OJS5<}pLkH4utA!Qiut!EQN&;VcHtRtBju23}9_aX_-*O-9g42tlzqu^Hwv
zIIm>z-OS*>mm%r|L(WB}y2oOz&l$4MGB~Yda9qLQznc{_UiXBd;}gTwf1tzMARU|~
z;A#b40WASvgs}`vE&<<BxB|5KlwrkxmR0}R*8JyM^Ph7yxHmNW7sIUY3`_pA?g1?_
z6u<mm^6G!hEB_TvgO2-V$h*Pdx}U*h9fSUC28}5U`jZ$`I~e538Tir}1Tq-J@)!gH
z806yA1FOsuI^A+723E}RuUs5hy)>w9O<2>0sLox{UAtrY4kk=GnzP_?{pM#APW+jE
z{{MuNzgzcxsM!3pVD;Ukd8gxNpNN`zG`MeHK<jb;mSalpyBS0}7(`Ywh^}IgS;3{g
zncHx+p!r5&ho#~!D`nm1D0t1+3!GsbHrFU_D(H06lt~sTlPpsE%#wQ`C;32js%nL`
z=tj01#ZNFxo?;Z+2f7pwTn;b@STZp1Gcd4%5#*#?R`AM3X7C`YC<B+Sluc@2@f`Q!
z`3^ZVtkWl3rcN|V>N8CQ9m)+Ei$jo6-FlH-1`$055k0V;QJYaDeD(*uK7x$7!D}MO
zzEn5~Ilu=}*Ffqc2pd8|Y9c5JJ<<m>C<m{MG=mz|gX`f4bIBZJEjPRxg7=Id^F)yP
z2(n56zK00Hfb1bcCgJ;uAPiOSQV0SslPgy7f+D0zBItw>tPTQIMv!$2NYxN@^#XED
zgxoEHOch~PL%4?KApIa@61fhNu}hb>O@kmAn`8t6Z!AKoiNH(gkgFjnt3*l51PDT|
zi6CsV#7Uq-;FG7I*F+Eza5^-Dk`v5QCcxP6N@}uc>SRcjWSTnJG-ax3%2Y^Ygq0++
zZbGiAkn1Kh@D-?V61fgSV?*jMNPUFAN`ll)kg^(HbfYmKvdAPv0kQ}t8={615?<h=
zF(8!%vLM73^Xz%%u<=RAX&4X^vO5goQ)Ch{3W`i(-Og+au8AO(5u`q{%$RKfUJ?lj
zcH6Ak))_M`!Hf27vgcW6&2`L}XP-95I%~3B?lk+H6*iemjT4R<#2!-fdoJU0m(T1!
zujzjdU2u=d=s$zue+JJB41qToqRufypJ#|T#^AMs!C@tX@f-%NHU{Z(20kxXeI$;k
zhQKUlHW5ah46V?KtdaXUVh%CHpJmUy&RF}1xAPrW<!uI+Wehr#8SIxa1ngl+Jj+o1
zl%eYjs6Ltrp3{L$7(seKkSb{@xb|5MItGDp&3}e9pac3qyG>Vsjz?fx@t<kQe}<{w
z7$&@DS@~b)=zrxi|D`Vem%Q*_;>v&V%m39b{x`hy-{{bPhMsQ>A?Fy3*D`2MWl-)1
z4LQoxGe{OO$X77Pl`$w)F{r0A2nRDryD%tuv*`x%n?*^x=4%Gm8pd|pWK44{S?X1}
z%DZ+$c;}wPsV6e$UdUQ>EqCe7eDI`9$@+&Co1f;axSF~2QqtVB;gb%z*X&b`n!~BL
zh*fKzsO}+g{Uf4A`{b;TE4m+4^Esv-vQs@|r(VQby@<8uQ47tZ78=D(HH@7Cy}d4>
z(>S3MvIbi}y3-_~53<NjE4)KDs@p1cT42rE;Kud7)hmrcYQ-&+7zB0MrOd2dqix(`
zWYz6?#Ppbib;Z;jwVhI}gG$XJ8g1h{OcQ${$Rwf1D6Y#e7P?baH?mz1bgDM!JZ-(m
zF5QSu{qS!6@NQV2s1>{j0aOz~L|}CgXoIRAbU7U2#2!dx1mCX;K0*PyC=OB$!AWRM
z6xN^>QU^Ip0WuZ`sgEG*<sg-jT2P%@5MmDzyaIysgWz1yQaYsN3Xq-=ygov%ebCsD
zItWfeY9dwdQWdWfFe=7h4JmmPD!Laay2A(MAbp}jMb~^>H4&s+1eqj4tB+*Cvqg}B
zIC=Og1#oW&YfS{Hk|5O(oJ3ScHp!6w5%KjA<lcD*X$roN9{Cm$NDYNd!fPKe1802%
zs)<rT6%+CG5qNVGbZ`&3_Q7N$*GCXpWD$7Xgv_vj9aj!ng$^l=v67JD8i|B<tdNUn
zG&ZCzfRK<n2vQdiLqaSfMkPcIa*YFFL;4_SPO{0KV+TeMSxAiou?s>%{0^_3;S5Nv
z4KWSE#zWd<&w(vFnG3FuKz&b0ePoe6(;|DOZT3v-%;~mZ22^FvRNK7i_IcCoa;Mql
z&9cdvVUarDB6YrI#5UECO(M>Bgq?1)n?GmJzt3R4lfmH-L%=SEz}*Z%J3u`hm(>iG
z3mEkKK<BCn27%5DXVhQ-ZGZ*WLkt`Y4BQM%*24N3oUsQLOK&pdJOH8mdkhuNStfmE
z=zGr>v64Z)fI+pI-Dw7c-$sVSBMjB|*}L8`O#8(!^Do2f-wg9W_ueur{|~xhaQ%PQ
z&HvfA{^#2EpJ&H^zU}|HHvebe@SkPvf2NiHxi^6j!_xl@Gk-Bm`O3ZZzxLJtlGpwV
zp8L;r9)Wny{pUXWUt&Ax*lnitn+%q#8B|&s6q^_n3m7!Y8BFTgY&+P^x)`+U8DtU|
z*t{4Rv=~4edo>t1j2I-H85F%4)FU|!lLf5v1a0yKY|F)6>ZE-;W&FCN0(#{_CaNaP
zHq2RWRl32xYO8boR<E`_eys;Q>UZhH&K7lRQSq6i8#>!Ga;{m_JoDID=CQMk<EI$M
zPcclGXb?ZaG`8O~w%;snf?3=I!-P%{itjdv>w+x6Hj3>tjOjIx>$ObiH;?NzitI3m
z=rWG(wN9TJ(y*>@?#YHVH@dbx>Dclpt?xio)0UdW7bfj|*1GC?`J9u*GmaNbJ(4nE
zUr56```p<^34M@y#4xhiC<?mw2QtnESth3!)d5<V9nlG{k2>|kJ0Z0XxH4)5PXj?F
zgW4eGz)A4(Z0z+BtX9HaAAzQdz}+J4pn4rB12hl^sfi$~6jTFiA(fF@P#wJ9L0&JX
z4ZfQd(iPG`s*E)Ksv$Lzx__lQ?8G^EH3X@8K=(etj{1SrLEzd4bXzM@WmH0BWdx~`
zz+|C3aWxU7K0;p04X=ja4CFdU4m>Ca+13iFh9D%o8j`X}g&;ijk(6aJ6u~BpkUK|`
zmI;uW2tq>YBT#2NaT26fLavFR?4$`MN$@@qN__;bp(dF_sv$TD?jJ#FCHx&F<oXD+
z(}cV{4^ju=B2Ck0n5NG#L+v-2Wx%&VK{rN03S?X)q|gSFgzF<n=?x)~>m#ss&_EUy
z8OU`ItgZpwOAfIenS|6w(Aoh!N{Q)NNHt`ZG20B(y8+cEcxoSLg#xSDAu261W?H1r
zfYkDknFZ(wYW7UC%<0x(gjOF}=S&4vM!C~$b7$J-&a}x~Y?HazC~?1j+#WgKXObQd
zxvgI_=-p$`+rXf`mcecXgWCpBW#qSo!F?Tr;|f;0xeO+g85I&4_yZW&^%xiw!L<<+
z1A`C?w=9FOkBC7gL&y%Pyh{ufZx|}yGE~1}?fS_-_a8&$Ee7)r27yEd)oKpsnG9ZQ
z8R8By)IMZxd&My8ALn9F$A^8>f9{?C1^53KIPjly=YNiE|G9R6>Ld2e|GBsP7vA$<
z?)ZO=i~n`6fROsR|Kj`pv#$niW#!-bU-metG7`V?U-Z_0!E68dE`!kp(EYjm>;5xL
z`@@z0kimT?gKi&#atni2C4*Tjt4%wDc@u+51%q%ps4`;nV&Jr7;Id*6wiVC_;?fCY
zQt@Mu4`fgXV$g_Y(NE&GC=z$66Z2@1@#|6ypQ08sQz3eqavX@1jhHMG(I*|=D;3xy
z<k=$S-KiQj#Vl@#W#V#+m<8t1^C9(-QQTz1*h%_v{rYjB%E&af-z28bG`8O`q0=C~
z16&zF>m$%X+2H!fEVjoYzRxtK+b|M@^rJg<BHE1Nx@<G2dX_E@ZCszwvCF4qnRn6r
zsK#~49b01>*9BKB_sE@Nl{UpJX@YKar%qG{XtPgbn^9B?a(x88O9-(`NH?MrQXlDs
zb-?N%$Z7<{4R_!Q2yt2hm<z3sAQccg39f4(H4YjH8G?h8;3*-L`&iNHBY3?7VL%qk
z!7s&w)JO2CBIr&c@NQN}O$4ci;3T9DQUf2efLsAVY9(dQVkHmAVmZ(%1xT#~ADvTl
zFMzPY6%eR%1ZT**=ELhFNIwXfg!G2sH4(1L2(21|a3Qr1a(x7^i69I}Wh7;j3a^hq
zD;6N5bMR^i#(<2@!AWuRI0%B=^9vzO;wQkXBnSgiPeDlJ`p5)aD;XvAnSjRYu+&QM
zIVN~bgo^<m#Dgp>hj5YEXe;#KRTaGc!dfvQ_n07T>x`Lb$Tn*>K?Esvv69GzKN1^M
z0oi5GBNAC>%_Xw7g48C+br3QeQu{zih-2)s=h%WT1cCPrAtpdbcnt!njLZntN02at
zS4p612yD1n#!PSnV3uk6EXZzXaP>6P4BQ@o1~|A1Gs>K8l0DZxYqo6$w9{mrJ<~dS
zrfuc|5XzctpEK1cWr0D`JdLPT3PFnmU8gdbv@n>oGnjWVIL&47UdG_PnjvTtL)322
z>S)Ue3<~88ykX#zf(#g#Ef_d`8Tf)21acVUs~H?OF(h4PsD8=N^ns!KJ;$Q|Qq$ft
zgsx$bj$_~rV9?0tckW{FoX4HKgT3Xh?5xj9%l@mZ_^+_wyUd)=V*O88YA>->o?<LH
z%29EQzy2(D{TZHyvpikb#il$IoAX+3$!CR4|Ba6P*FOJW=fZ!XBmbGV{s*0bu<k$8
zp8xzOL4$R&SN>~W1tH_}phJG-`o1w_U*_@N%wRf~L3t{JY(InO3<lw844jh~Sb7;)
zmojjyVBp#aTIVLXo<VRulR`I_#$*QdbS|SJIgbwg$eE@oOPq@~`8Dki?K~DW>3rhM
zYsqu(WGs4^w)lS1{M$+MZ^zHM5jEpN*pxHA9fxfzHXCIwQH`0d9Wz-wX0l;izd>vt
zq~6hw>(`6zgY<wPC&HOV_nO7@LH7O_#&&`c_}U=w(gL&C9@Cg^2x$=2Vh{zoZ9yxz
zNjI!nJ)l<6yF|sWT-(1wH?T_Aze3ZyM9#lLKA=)1pboScAh1q14AdbqjBJJ+iJ%wJ
z1VYe*vOzn8AgkRV{UF1Lb_4KsA@FJhNKFJGvD8OM3$sC|_voP9`VBc&4si)MtR@1@
z5Wy=TFryJt4Qavdzk~FQ;5Xl?!+JoF+6S40)JhN%A`YpHpgki$=yl<sV-~=b4q8nF
zsgJPMM36PxkopL@-hmIzL24g)*E~h{0(sXwdDlESms~m5Y&q9#2p72yg0LYq5rl-)
zNAT6%$P-48ss}`ZcM?GkTY%h@2U(>6A*Jn7KnONrByEebw+PGV9IP6Gc8)BQ;WZJ2
z0jZ3{&Ep_75rl-CY6z)`AS9$FLM9<K5ri~K>N85}15Y8rdrY9!4v>x#gfvN>f~zJ%
z7B@?shFk|BvmrGUW~~ITkKi>FoPk+GfeKQL`UqNDqKc!{P*`gsyn>)RIBc`#5{V$S
z1MwuJ4ubW1KvOmlHnOuJ&Vz_RWFaEfS)frhNKFJG;gu1b0k3y38IXzwR1<+ZM9>-s
zYkg#qKEot!x@kJ-OnA^N61a0}lsVfd6I2;N>LcsS=@uE&EHb89rOmZUn`@If$tH7>
zaq2>ov_%F<oAeSkXvM9Q^qtFWJApy3kwK@O!FC3N#|nn1T@t0&WUFq=r|o6*n9rtJ
z#2}u)r4%lzn<Q*pE^1oIVm?{YX9Yvrb<z3{4DDYTI=?ea`N6yVKU3XR28(_MzCciY
zq@E|{HIX57m3aOUhSGBkS%(?Y4lpF`VMyA^kg=a3{|Hm{1&QuE3X`Ac%z0(F@V)W!
zFE$(gYOVMxKlhd3^k@82pD;{%#L#`4Vd4{pg<lvp{uej|x?h7~*MEks{~5OYXV?tB
zePhXghUuRfTAwo{9AdCoz@XU4z+cY5SH>XN%pf;`L18hI+DcXh5aN{H%_0U~bj`bk
zfqNx`;Cf!|X`;sSbV8<BColFa-W=9&z`t%^SnJWq-cxat&c{u=oH+AZ!mOJKvu?)E
zxsfpUX3E058A~6gExD7n<WA<2`)LdBq|CnN-L%&<ZLUu26vMcF!#Ges$RwsuH>MZ4
zJ~EE!HjVBzjqZi){Q*}-osiig$Wc7TQJrvP5Y=KB-3l3DGl&EaokcY2MKqd#k$Gg3
zVQ`IRRI^586Xc{F-7ru!WEj~DsgFR@J&;w_;BBXnxgJP;1S3I%au9LI3IrI5xL^o&
zf*feSDy&K(P#<Z6PuPakKHxQSpmo|{ZarFMq#01J4n?(U{xuMUT;m|uJCGS7HQy@8
zS=^A7a+-eC$h(IiY<MRK&QSF(1?^Y$0*%hWJ4BFb2tq=ZZ7aGLqSZl=nh0JEL24gR
zWdy5_AZ+A12)=d>QW?oR=fG<s__!Q|0jh@JQ$?`)2vQlz*n>8=LMkI!TX1D$mxA0a
zg3J>^NVNJ0UI#%Kk`_rI1Y5u$X_)}8jKt03AaxKXiK{*`OzcNPMoAMOA|@%5Oj0Hp
zB~5@M6X>!B^hydore~5e2{F?I9^!-4L|92k1%yUgq)tN<fz(QvRgzWu46GFpnjoYY
zhm)|99CRuy@oczusxu&cA551ZTZAbK5rI@OkdT3}v60a6Oh|==Ou`S&g<LoSkpg#)
zK>M97z_kzLKzJ+Amgkuk>4>XIpciIk%!V*5)8|;E&4z_MND!jYCVd8|w#t|ZimdcG
zmZ`H8L%PL08@Md0S&T{<Z2EX2R<ZUzS6um@ZSG%&_U8=A#~9pJGI(rYh&;%UeTAX)
z9z*9#hM7OP7yjp30A6U_`;&3bf984rh2u9cC>1mC`Z4hNGiVgadrV??>0)>3VsLEb
z^zIi4pP`Vv#H4VoUG;YVuA@;?FN9COn6Thh=JNYlD<0&oew?@FN$T>4X)7M3t$C5S
z{BiV>=RvdY*-gG@-g8}i(tV4~|BY_^*SYjx@AQB9J^$5ae`Uxx#9-3RAXUh~mj*(t
z=?q*s48k1@qFoFUtJoDbu}GZdmcPQMa+6!}8jIXPM(KTw%1hWZR&pB8<+E7C?>JG)
zXO3*>B%PFn<^?O=>UX#|><sQX8aefJ%#3sCi?8RezF)cVX~T{;T?fAO9QxL`|69Yx
z7v(D-=PkIEzx;0M+{=kG&qq!??pnD<D{i7*Y@Z(F;?&qa$Q~aE39omIW4iUD!PQVy
zCwS3yyJ1v2q&|X>kR@;iksaU@vq3j{!}bTYfx9;CkmYb-L68V!pDAR_4YE8NA_A*l
zn&AxafLk+UD-eXO6WRi<lAwcfuu7*1Z6FSGq&9dgPAeGF4}$cGAS6fxGBE_HcVHxF
zh6sM1517#asgfX<gM-OhNCgBTA)Ou2oDpoV5Tp|XCn3kpY5G+|mTGJGRzVmjBzT?(
z(jihos)H0gLC5<*Dj+xs-#w)0UWkOCgL04x2uwo8<?<C=@*uSjGAZwz1KB_XVL)mk
zIj3w{@O~l%r)>D{A^3fH@LP(YwGz@D;&R~4t)RmeAX|#yq)nQ%RSKjIf|J-QBcv5{
zkPeZgWt_Nq3>p$Mi-w=9i_CymPmq2SGHH;|2dRB<k?_h0!hrOWFiA+|g;^g#wmYGb
zSgR+vAlCW_QY%47NC^xn@jwL~YKI6`Dndu|kPCL(tl6NB6?nJ_Qlt|}q8UOt7vV@8
zjzP8p*^!VM2ND914i21+$p9;!4I6fYkH4WaAa{;Hl;Nt6AhHmZn50G8Y>TwnuowYV
zTM)K&+H~u*>CiSt>MV<tnfggHb>gQf`cD$HZ(=ZMV{n_n(0E&Y{ePvM|D`tm*O>QT
zxbuxj!waRpZ!+`#t1SC3x#_>+uK!9q{_}yBz5QpH_?Khxe};*_7`*2&NM?a5BR+oy
zg)DZ1DhAbT2Av`X$5xTBS(-U(O)GZV*X(d>-tXUaG-C3(@aY$0=3L8Ib}whuqk?tM
z>i2%F*!87w^V_^FA5vF8k6-aRe#L9Q#qXSFJQkgBS9;oG=9Rx#R{aKD_LOso!Fwr#
z<zxoa9tMMUA+tUaiwSb(Go_5Dis~#B)mbR4vrkm-0Kf7LLDgF#TK7e@9|-GS5Hq+e
zWwTetZoh=n8ea2-EM{%uo-@_s=U5i5^Jv@^+IKu^@|nb0mot{!%3ON8aP7mK6?Y0&
z+$mY}pnUDa%Jq+H*F7y+dZ%c~z2c?!^OxT(S@S4w*`0(L=X@Kt=qFEu^obzzL6G_d
zLPBa8OtwK}n_*Ntr1mk4>NJQ1FMflaji4LZsu$H}9Mx$W(`_2lX%OCG7y+t}AS)1H
zJsr@w2l`>4r3jF<2k;6AjRC2HNUDz@OW<(TN8kks^+<gp(Dg&$s;3UzIch}Sy9yts
z1J^|L8vb>VY6wn3>LWGZDtHA1VL*CAYOqrj;I$9r;vonZ&W6-PXhU<#;L1qR6MDQ4
zq(cND!G|g!DkJce5NJ>iTn)h)`Eo9K^3J)C`Uq0vK&m7N8&V(1I%dHckje;o1)ZD&
z%4K<wDhaY~0p2Zw^o&3|TjAA^ZHkOdGDrk=1Q1#sBx#WZ?+QT}QdWuL=5er^2+|=!
zuZ+aaV?gy$JZK{T<TggrxL!Cij_)-|=!4u&f=n7D^pR90AuWDD=_SGIBM1ZDOG2xO
zkh$;*2tNa@dV+HyB_5<q1{b{W6%ObO<boQ%N>XJJ)e5Xmw9S|a=@DQ`VHF|OMr6Mb
z!-kwa2&qjlNwd@$W~nn^^$Tb?4tn}i`cz9alr|k6@^A(u3arv+*dQUeAgHO3KGhn0
z)`w;KRNFKV($AQnpE1EGeu7S9m#}XapGPZ0)@IYiZzK+aF7;#B^Pgeof6(FKJN`55
z`Ok6ybaOiIzW+R{{xeMd&oJRP!_5EuOaC*pykIbFXW&a=5C~=94q%XoVbRE8(28fb
z%9jgjHBO)9P`TQtb$d|PzK97&;%A&nn0Y#J&bgHN7c-V#%~^S~bkn2e10R|WeXQB@
zx_I-Gymj~U);`Kx`zUky!<0pLe5YPDuir0~vrag5xmeUZ{^02<VUx6CW?02fwTPW;
zojAuNYN}rNME!_KI$_gv!lr2k&Cv*)tr<L7J9Mg6_++i{$vP3e1~C(j5+|tyw{TdN
zvznEN`!*Y<&vmU_<z2rax^ru6_x6;DJEJ=`gf}hoESce&Kf$ADvR7Vzc=a6boId}8
zNnu5EB1-0k*Q`uz+Z^5sLeYJD(q<kHY1^)!I7ut2LpK&YB^28Y?)bFpM}rXLlmmF}
z1F3+(!*S5Nfea$sbR*j#OR^yhFbTRf72GWXEyf1dN6_o);OCKohUTDya)_=FT1|v}
zP!B{3wivqwd{`V}U7Sux9SDJYLy(yuSf2>g4}#o(2dQfyq!#$fAxIsh71*F11f3*;
ztdRp<NE86-1cABrngNh%2(*F@xf+5D$03i!!FR1fDkIFU5TpWvlCTBakje;hz$v5?
zgv^GBK&l}Kspt+}u#H;#fT|(n-9wP72SP$BAY>9UA_u97ASApp0@X)O8IXw~ImdL!
zS#uB)-Vc(qONG}*pm`$jY!Rpef>lW}Hp!5+3ZRn~z$+HuRg#otGH5u?B1r~};B}Cg
zS*)aG9IkGWm{~LgA>T6sncsxmQxB=3AbSvyN&Wa<gM>Z^iOj%NNkL|sFiA-7$tY<8
zq&|X^$Q2MSHgbK0rUqFGnGG)z(aK~<2MUcuQ$q}w$Z7(!iosPMA(zr{HheAyGKqty
zZpG?m_!>nt{~~jd4j!BdJAn{!8X-g$a!Mg2NFjRBE2H$O5LsL#q#pz!k=0mZ)JF!H
z6ZJDd+c3?Nr>MlumI<585VKUW{Q~=j{}KoPvmE-*bl^YdG0^$dtOx&d9{n$L_&?kB
z|DX$1+dnb%{bZQ^UvT+<hVt7C+KmiC8PNJjDvsB%l-(*{+^gOoWx7Mja`%Q!e(gI0
zJNJh59g3NHJa*cN*cqqdXPrrydp35~sfejZLna;!o_r{3*2$D5SJReVPh4;{ZtkV{
zxz}T7U-h4U-LCDJRL(lSghet5OJoz5>L<=MNSbFEH^m}$vRTY@^Vk_?@iWchXBx-O
zHIAKY7`4POYKckQJd?P2hH<kDVrLr0P6DBXi7LS@eD<|6zTKwT3(a#ETjtJlC|T-K
zG*2U}NyIUeLDiQ**qnh|pMg!Cfn9?^(n{PqhDX<*LB@`O%Y=c?f<fL#!Zyn&q0cU7
zmPPhVx60*i6)W`;CTd4_Lh2*^=uSu-q!$GmbA#Uj1QvuI4W}Q`4(5WYB>jk1a0LXa
zl5`{6A-AULM6~EcK$aOGNJxbOI`;rv4e5k}7Cb=uK5!{mb%UsAu!`seHzC(Ypb7}J
zKGF^Xb$>KrB<ffkXhaU)0|GM|Gy_2MMBp9}s2YOI0l})CI(5HVaJQ%q(iK7`)xhVP
zLTVp)Wd!N_z)4lFay9P?XdUEL4rf4Sg&<WAgoIQ#kO~O0d=4T4C*h-W%I=`uL-6_t
zG9@JEg5{o8<hdbuy(8z84XKZi>mWEAbjAW~ZU|H>!N%f{D<i}V5x6FTR7tYnZV{v=
zf{^e!2$>;nmVj0tL8gbq%wi#<bC7xmD=BIkMQVMd7uTa7-;1?Af^?nGDkjJ-Z#2>b
zX^shAF+muZBYS8fu%jVB$LN9B*v=<6Pnn8b86mUbt0N!`NO6irA{XXpQkYz1#RS>#
zngz~)^c3J+Tnu=jjcf(HenA#QV<S5lZU`m=5=3Be#Qj#VdIuz8kvao{kOV<JAgk1A
zplStNk3eb@2njX{qyQ@eqy;=QX9Ziw0y%QZDs2h^B~P+WnQW3V(KvmAS=uC%6j0N}
zAfZn#VTM7`D#b~6oHqZJJM>@a*njS0|Je_LF85(P_@D0(ctyed{|uA<FiiW$wEVx+
zivJ9S7Z}u<8HDl~1S1$Y0~q9@g^df;e9Ej7yFChK`Ij&Bsb1n+v(&d?O;GE`@UCsK
z6ZXVU-jg=#aQ=dm-Fu$Sz4&Y5(RUsDp4V-;o4N2@+Pu?=bIv5qJsUIicvwH^7$492
z&6Wj=^iroAB~CI*oMe{VW17@$n$%^I*r}J;kBoE^`gG#Eb>h2C<3Pwbq0=b712Pt2
zlF+9c*{<SOVGz|~p3q{O*5#esZ4^?(uWAE6gMy8Lff2m2of&+XITM)63cl=#34A;P
z8v}zd1G}7rZL~{fyJ=Fl5oi}`zfoL|VQe?}BvMcn1F35ub2^3*pbid%G>U|rga8tO
zjIil}FA0LwK5!Cp8IX23&R!2><PEahR2O_Ek6u^<2!$e5Ltqh5MFZ>dAXiL~eW`F+
zP<0brtBnn52G?o?)o4Kw_}V+jnF+A7wLyI!a3`o1`7}95eFPyPwGX^21mQxKC_pBL
z)KJfqgI7c7br9@C1++mq$PAI9XR(4u5d=Z%Bgo0ykeUcy4T08ayCZfFLHa(ZWFB~Q
z4ob>9=fV(lwg^%s$vI_1)+fO0AP~bTL)Ixn#xWg%>_MF%<Z1}QhV+9Vb&#}OlC)it
zlx?DvZ6eAlZfr--!FLkLSSCP_6l@i@xOp7BK9V$xMn)2*Q4p?#X_N$x`p6`<2U1T#
z)`g<=k0A9CCJCvJ;8hibi&hiCx$x;HNHqi}O~5P3(MYr^3Bm<cMyZq04v|3XF2T8w
z^40=&Y!Rd~!d$ZlDZO!tLu4r-F$-r%$!wW6-6|dBUU*EE5D|EFLxwY;^D^KG8hC(U
zFrf90dCGK11%yn(1;Gr^l1R&xsUVa(*%B4OLmrs{x63MZ8XQ3d!6)=urA>jGW05$)
zDtVGoTE9_RziH}3Fq&+XG(jn4j#|b7hWwr49hVpue&XHupJDfZmi_;k4uKHQf&Z*K
zLAzF&R{v*N`Ja0OxIU`9!C*RxL86pFD3XCEm_a;*Nhw~$Dn-@1R4b^)Afm}AvCArR
zl6}!UkLu;#b*uau*GBYgO`d+BaM9_e&3C%?J?}sKrsvS>rX3H9R$b2ppMd~6mZxW*
zN8@J8{Ka}HGxSoYYb8$Bi|x~o>o*3UZ(x$xX_DBfo6rZYgL>f%wED;}t{sxsjN^Ot
zqB_(9s<cAtbV4gN0!!I7-5D5U85npNKxguBF);8lFmN$|_WbfPFmQqn<YD9n*G+6-
zE*}GfAOnZIgl)7%Y_)z&t8P@YP6TMcO%Hqk0&-0Rua6+pJ&-C1KJo^ykMzP@;5Cs>
zL<_t!!mN+r6$)~F1gVukl@ZdJr8>dj>M5)NUgKaen!v+upm`r~2d55F8A0nENbLhA
zHG*m&^^s;^HKg8wpP2w!fdCzdtATZepbSV&1epecR6vluLJ&5*8bUC@r<+1*B6#-)
zodF)5gVZ&Ug>w)#q&|YwL=X~E2f^zjcx9vru8F`?Mfqssa-bD)(DUWs6%dpm=ad6R
z+3=bOvS<NbAIUgmLJ?9;BxRo}1+I)Bb3@405Tr7K^pB)$lOZ(`goO5qaMVQb%19Di
z8A+MPK@hxiBxV*1=@Y>#AW5@mNF9VsLf6=UhnFF%Zj58P(dr{etpp+U;(Fj+B+M!a
ztuiveHUkBjjWWborw0*1tBEkVkje;7LUu)=)k<(Ka$yKz!)qc419{CkoQ+%`L6qSk
z;q?NfzJQRp)I-Z&c&7kXEJJDvOj$G$OKgYJ!X1E>fouZ2+Xx8?I2Tn7v>Gx`nT{WU
zs-cvr7RggU4AjcVGIcWC3W5wsutQoGmZ_61U~4k1k|$XtPOwRuW|K4xVu(q~1XK8V
ziHWmq(-vqHZL(}WEI;j@-qN??TmLH^_%C$mKmY#!Y`Z{+d*^?aP5-&){$Z%U$KbJ?
zL93HNDuqELjzQRufyowhZ!C*CgMcxMv>m6WpR8??qE|U+pecTWY5EMu(q-QD8+@8K
zdpB+JYTg{&vnOHZ>4cf5<7b|YpLsT7(uvUiqi&7646>IP<t#GETcVpdMJHi0q~&H5
z+XuRgJE0GJ(hg`W&NyzOQS1cBx@-OTZvFUf^VojV=w3+fgPd_~8q;YO+hr2oCS{w$
zAY{(KAPK8$*ue}AaAm{;CV3ec*uZ@yCNRm!0J?&Z6;wm9ipbkW*u|6^MYR}4Hbd$i
zNKFJ8okOl`AeE7RSSw`X4|q8oXb286&IXwSg496}5eNwpfsevL81VW?2YdpLR!AKL
zfvP0%XdARv3Tpt(1c60h^FE+@2h<5dT9pl6^9DVl2Xqb(xc1QsssYtIfz^<TNi(oo
z9gHA?kopKx*C3Pd$_P^3VBfXsTLrIuAS<+yN%XD|d{7R#4)Q2f_5dy6hEzkyq!O%W
z1gVL@OSp?5BXZy)ee&U?f=eFcgdgNS5v&IUt$^TMa5V&}j6kHUBWUfMj6)`<D+E6C
z2ed2!sSbj7i{Ld8qz44AfWV9-Sz8d2u>p06Ak`3rl(b0zO&EbIBdkZyLFywJ%LFO&
zI0y-;kHFOstR^yxkN_h|^GFB-nG`n-7dH)u<Om2!e0>C|l^`V6S_v))seLd>NbQ43
z!aGoC49toNDv~(K1T;c21zrasGa%&{G6^aD@RMe#(+DcStZ|S_bxc`^2(lUo8`4c6
zvOdD>-$9JULqcqUknjo!IY2Pj_^KhZ<Z19q3RDe2I!7r};hh>d1H)J>*FHkjm?Tdz
zNuB~}V1N$ugw;oo{*g`6G^>OukXDCbVlQ~V+a%E7UgB)C_?cn}3uMxkG30M&tT@2X
z_kelfXNI-^S+;;GBaS`)1^54F+wz}t_AiFU2kgO{7)&NJ$Y(MLMl%R_GjO^vFsp&i
z8sgAq5HV+vc3@HQWYh|0)r)2@N@g}q<Fw5acCQfeu9ggFkPB~DjqB4%ovxQY(;#b}
zN!}vE?1k!y(=}7(x;5^KnsUmfY>jsOBzS!UU8)LRDP$Ns3A9!rc7jpt1jtR?df*%A
z%wqaLyMDl{tdZ*@tJofs@HR!eWFAdFCNXW$5F7(LsG<R#hQtQ0jJUu(CP;k*u8u(C
zc+B7`iHm`OQ^G9JC8^dpy44_}iOBj0a`Y&qK7!8yA=f@gi>^VJrJ~)J3a^axLYpAf
zkXA^YW^k=$a4o2>2dRBRpxqpJeS}&EA=gTf<p+rR2wLL=R%-@UYXnq5NR5ChH89c$
zr~)B>Q1=J9CW5db)ewY)48%d^gCPAQcr^r>DnhP?lp)tWz-u2c19a>*r0PMdeISC^
z7R$lwBjm0Sa(x6Df>U%wU%3ripMa=|(CQ!v7hDr%gGuP!5TF?%@HQgkItbDyf{-%y
zsgTM@+75gr1Z<UptZkBvO`?oVqO^5_lx?D<O@f4VyrfM61VQQ}w8{v1;hZ?EPXwun
zq|D<aL4BfFaFrA-Y8r)H`@q?dY6wn3Y9eG3QY%43Aa^zD$96+HN8tOjK>Z^~O@v9p
zt07DVWcUtFLe33FCQTFjO%waksv$TRTKiy*=7H-YNO^@!BG*dDZ1_SxA{me}6ODuv
z?^Ge-^$~W{z@u1@z60d=bVywZserI+LF#;2q=I@D5bvXTfo!f>@)TsfX5ho#;nfhR
zXOs$>+JlI|T@GhJf*rzz*FK>6#KcLUS}7S+^_V4103q<9RTjxpAV*i3CQO9vJAfn!
zvy=%&2|b31y@rXsy73eA5++I~%+kzTA=7+Rf7%U|#c%Z1ep6iiOK!z?iMbyc+io$&
zY+<mQ#-LivAP~pE=?7Y6$K}qz>Bb-zz^)$2svXOrm%wh8DPUhD=2fd0+MyFa%_M!E
zRnbPLs+}(NJ6!5_*fnmkuG?r_x>h%Txkma@x!Bpl0aKhCj<~lS(au_-6E{&WVKV3f
z?zn!)+>mKpw@EDY>>cEDcp$YB<V;b>{!`@o$TF_iCUJsO#zg1LN%rAo47@tvDu)H!
zAz}f~6LEmMOUz(#H~~3fn~Q;gPf*jtEw0uewA&yQbTBFSY|>W9^pFAQU<A;q2=Ftp
z^}^dA^$va#xjurjVN*Zoqir~9A01eI1Q~n-kwKt+sqnRMkm?CCKnEgG`$Ui`2~_Vu
zY9AyLbU<7{6{H^ose`oqYarDP;v6~ffpXB=2R!};Im1T<d;>T#2{}apvM3HtBKLQ2
zu_4DPU@lZZu7==i=iq%JST&RfIc*!#GlJDbIgs5$NMmuJt`MXef|KBy2vi?II!97=
z>G1jpQu`p2kX3W=%1GKSMZp$?<ZY6{C{Z3<A3>@lP_1N@h*k~BTE)v+#miX6fe~n)
z2+|=!?h`@kBT2JZFhUxX19y(#HIal_1iTu8FvLv4AbSvyNkhn<IPeG_ygq_34B~op
zW4j>;voeB<U{*$$BFHrnG8^8Lf)qP=NJtF@IXeQmCc?^wl*wkvQ{W5vAPh+Hh=)Wj
zduhRj^fDkl7cAz3%XUcV4Iz;`ZdlY~tnPq&9L_LLnF?p4F(3w_3F7H~g2EGgm?5NQ
zfsmLz9f$~ogakW;4XSjKCKxC7L+T?VSknR`4ylG9^^qy~q+gT7Nk;J?^~Q<)MhQJe
z2|Wg2q#Hj$KXH<D!Yt{8Sq$kbIr29$R32h&Im^&~jk)&@Yu7!7x=Revn;Gn<Gia1C
zh^I2}1TrvtFfiCMaJup6#7SCat9aE|#!s})n(J1&!mnv-Wbc8bsb^BApG%&7D|6w4
z%q4fTmflNQek*mw?UZHr5*FP`n145J?(MM2*PR=V7!|Hph@Yz+J3%)d<UG(x2(euz
zFa&AH>Bn}0tE3)K(=N8pAhr)u6G2FLePkNbX%X9F5!+)N*<=yd?vYT>AfOMfgLuFs
zq&|WS-?6}}A{c`eG_}OQ!>8uz7FTN+)?*mf13N1lv`PUoItQ<bz<W+1C%-{=o?<)t
zph+vZ0a5`WFR=y*qSi-{+5|KNhi%~jsP@4Wht)@*+6TNE0nPvozQMXekeUcWqSZ&x
znkb+ObZjZO#(`Hua0X;<2vQS)j_!f&SJm*VP=i%Q%HTWiRD8;maV$%KERe%h6G6lw
z^^p?zIy`tC1ep&)CgC*^cytcZCjzZcP;$vtbOu#GxGSS<Ij0;*?E`1XI%dl_WWf+@
zstA1Wsf=AZa!mwb!)qe&+)%QlO%k{of>cJy@-~Tb)(MC%l2trp6b`vh1g?f4l@VxM
z4y_uJG>e6FizG~Az?D%f^4t)lCW2Hwn539V7`Q$H?L>g=N5DpcFAk3FhSW#;u*wKh
zB|%7dHAFl^A3T%+SyGNh!s{b62Cj+;R6r$6gw#ryBxc<NEeT<RLXb)gjfAxMA>}_>
z;SAwI%56L(L>Vz8{I)6V)db9)5NDf%kFYRJMa<3+V+BDq_-h<cr30%eFvAl$=y1gm
zL>zf+79xV&BQ{K!XqYe&SsWsZOd2Kh8OHZQL?HE%abmAgLXR=H>tviT*(4Fvy)ufQ
zpb<YqKW)BB?kb(qO$sGDBy%_NMK54*?PAc+V~~wu;PYf)bztCk;Zcj0_Ndkj>oiH7
z<xsfHp>&;l%~qe5T>%~Y!uyZLO*<1e<6PW~a|v@Uq%63Sx%6hnk{c;YZzM0d9=rHP
z$gK0pi*Nh)ANOtFYgxQfFMf)4%tVv;380yy*dFlK2~e#BsgEF|4Nx+s*DwZtJ`Z$T
z5af6Sy{HcTux2ChQ9Sw)t<DK`48n%s>IO1T1ZILp=O7F1AY3@f2p-mB;Zd=6iz_t>
zZvmq=qwqH4h<0Pp(Qt^lA;_c;goKW^f!Dl2+2FNs&}t|QG(e{j(tw6QRZmd8E)s&Q
zZNpymfGZ=dpc)-y6j-N&R1JaViTrCoCuT!thyp<yf*|V?kZ%-%^n>6e_&C!#4gWew
zjRV;e1R0Totk6aohyzzZkRA|(g!GIc^$}#A2)<4m&VW=mm?T;Sq~uWqS(5;%jF9Uf
zWHzKeQgX>dLZF%ma;6-Fgs;|yEKYz-34!(v!6t^lM<`^;IiR6TusGssJmhgXcufQu
zqk}A#leSKRFyyS0;0RJf!RsJN%LGWR1X(hNTpvl9$4Hrj>K)V$QLMCCtdwbtlxdU{
zc*z{3N)k7Tgx5i0CSedZqz*zRAv+P!NTiAhR3GU^cOnqBDhX0CK}ce1B|P;JWJVZ`
zM5~)nxbYJ(t0A}u{_+#KEJbFUrA$I_VMQ^)Qk^^n@cIb5cGUU^<S_7|OxP7eE~r8!
zAt8WYJwy$>R)TYhVjzv6LTVFSB*FT~0M_?`)Ho0lx$g`~6%Y}u^%1yA0?j9C#Lv)(
zpCOSrPda5GU(zy;m<8-1Ga1~v84L>;q#_yE92o?>1+^0ttn$U2%T$9}^%JJr7cBK|
z*cse$Fueb0!i;m-i*M#Hzgx8CQR(`p6<c1^?tELf>s{Thcct527H)c)x&BG|+DGZj
z??=wO7~Ff<sb-UL%1lUo1UlO!wg)`O0J`}U(vXAJM6uvwc`)iD_~?XDT#rS352QXa
zj_GuYuVoN52G=)eqjQ+`5hM5(JWd7%P5}*jkA!mLh*ra}W~1;n!?0GvuvVk+HiK}`
znF)|_Iplf=$_Cd*$b)Z?4iQ>4q!H2pxfKY~AtI(ef-gVN46N2hLXi3kxlaVCeIV0A
zpe_>Z?r+Tc2r?f8BWu<DYH`&^C{se-r7B2A_o#t8M3Bk|)HCubSMe-^S3?j6W;Fy6
zf%JnQB%}gDBa!PMWHw~B2vQj-I_D}n=R$f$$VY9%>mymmEXXJvq)I})kO-wRlCjGG
z9cl``G#qlQoV0D4lx;F{H6(490_heZuav{AkL0YAWU*Za0k4lB)exM7^n;K|wE75C
z6PZMcn?&NNhQv(5M2$m5jYH9DA_x~>eFUqUAQ=r0327PPs-7TgT%q@WA$6A^WeysN
zQURf@*^38l-iORA;O{FzDk(%ogH({>6CAku2vi+lIud+BAV>=4FjCnB`a7t`8YfJ|
zG#@g01_@7GwGV!ANL^!|&~Kj553OIo@d#nVNz+7_EG*Iu!N-P}B=i}_cN@odn<Vsr
z?so%^-9bdm6T8e4yDSsCEfTsQsY^G0l5YGYjpzwVp<Qxb6>LTcEE+*vdJ&w4$^7P-
za=r~_sWUuk)`oQK3~1dL*|#fs^8U1Ghtj4WNS(GnX~LeUjxABm8xp&=XHVLnKkabl
z)Wf+mk0(w&mN4aL_WVovi?8L*IUCZv#Uf*xZY<16v0ad35DcT*AsGbGoQv+%kLm^?
z@CgZ!vwFaX^Rz)~B?ud`PTL`_fkDCwyu*ka-1lJxuj>X?I$#E*R)XBh$_-xR0G5)p
zvk9-!k7x(o<^Vpi$0)qTFs#`iw8<c}2~y*L2HZlSN0LHs0)ou@AdSKy*GCXGd<&{3
zQnv_FB|$0|-Jp6%?StG)(g~_Tu7)6Nt$->>Ed!tNL2v`Av;t7h=Yh-z!D^psI0Ldi
z0bCi?K>9(DRoRff4+<N6!ZxJpfy@d)sv8Ii-8AG`hFl*(>KzCPsdsRZkeUdxdkD@(
zu7l)U^56(kAAzf(Jj^->QWHTA+=kadkj2}Oz7H}98j-V02Td4(t0BmJ5Rg6*WC=I)
zR3Gqi1xUREsfplyA1SLuSulchjv)OYv;$7z=lsZ6CV+?IKz$;l$_RZkk%TFzTLkGG
zNts5$D<epKBxVvO0Yl=(p%AvXaVQ)?Dj?h>=;$h)s1D=|g`W+{fN&DhX+k6Q;-LGR
z;I$H*fmRb?a?$E0!?=EUWrWFqmYK){dbpQNz>83n>It=c#w?sE6oJf=;4}oIK7wr2
z!l@pcYaxMvM+-y&)~W|Cg<RL;s*lj32AK;9Tet|M#xaZUGm8f;iUyrb6%Set4G~8k
z!i9)HG#bYD8pQR$WMPFrxH1Bb+JS2*vxH6%it8|s?}Vt-iJPbu)2|gfNiDKZE38G{
zyHegcSIjDf-#k;nqsl08ihbTf^Xxe~2@?&|CTYcYD+kxh_*4iv=kVC3@mQrP1=JWu
zwQB@|Muems^TeGCC0&bU0vfgA`i;_NTIMbYZrU8uyu~tex>i(&essH0T$gbS=okb@
zl?2Hw$T<j98AWyLN4M)mwSf=k0o@IxAJK}uO9)aQ*~Qc`NZEiF&OsJ?Lsr%aGcbS=
zlL!N=I0LIB7)diQh%hk8f}{k^blh`vLR&x=-i5Vl2Q@-!A_xiT4ME1@AS9mpNHYj}
zVK;J31YtueAWRZcA0d<Q`bZ00(ZEUg6b~{3R6PY$f$sIjIKLEL4Z#_ZK9QPljhb%_
zq#pzsbAw!U2i+osG$jPDk08|$Iti|iAiIYk6Go7=+4!p=NCgBTk?SCNSJ3%#axQtY
z&bhMAx$yc3a^jq#bFP9D;+$>dY6w#M$b%0)m2t>K?Gt6f>mx|*BZW~PN!z7Bb``;o
zQ-IV#kYx!nHpx<`OB5jWkqmg#5d3T($fhA=5_H-Rczpu$?jhuTtdK!D2q|S6C20}~
zLGb!W%s2uvE(fWDBuv8Kbr3Q`HxhIj6&i_DJ%Q>YNKS#1kcON-tfK_UacCs|`UrA*
z31kohUK2qWkX{uUiCHVbMIdDkoP-o#ct}XWNCJsmml0y)Df4kj;i->|6DC6X5fBpM
zW>V`TNQHvMc1TT^I0>QvR2jujGEJBa=fWmsKy!2OiXJo=6+Z!)gwzzIlE@JRk4i(N
zZZbSVAq==0czpy}y#`syW*pxQss*6hLG_eTTo)Xf#dScCd3>iuLZ@+bi*8u6oO_A9
zcO_`P23#)~$F})bE-IbARm>s`yefbdeEI<c0|P6+w3ev69=P`b>7+1-XxgOoul28*
zVU*aZ9o1wI*JY75$s%O}sD1(85CmP!7702J0iJ90qB?ZI2qFT>anN2&L@W5ZI`D}-
z5nYCn-J0P&S`ob_ksS=mK@6-048nE{N?|-kY0_>DI^h$|(iYg~uXHQh;90rZziFpm
z;|}lI?Y{LpT}n2Xq%6>gn4}okE$!VZ?cJ&x(5MmI0_ojAYof3Q(4HWq+kzlHCg}P%
z#3mr<0&Ga_gGOqAr+$ztBgjx2eD()2<_1}Q0PhgNYa&qh$G;M}8bV?FSK_IU)ch(S
z2sHKMU7-v+t_Qk28@w_B(gVUImAp!npqGG`Kx!OFl?3Sop^@-&wjq00mE8;B3}x66
zbI1(H#1Nc>^n(<@N1nnr4S^Y;vlK9UM(}C~!hqL15C-x|r=X2RNF5^h=p3YH1R0cr
z_l6)0NM(dZN`VLEWG$0rEt4Vj5uAj~2Z2dwZwONLfOfE&MM#=QLXenwq!{j<M35OG
zP`Ai90#q3pM?h*K<k|<BEovAnY8VX37nmfhN<y!XAcrf0GcwYo61KVtQYC3ecR~<a
z7YR}sA(MFOBjoA{UNk`%5M7Xw8A#cMNg|hiXeB44kjF<7s)LXVa5C5^eGLq|AT<$$
zgt#0s3qyuQ=vpBCMDR)gNOfa^tv-TC!Aazr2*QTAic}Igjv(;}A>ra^5ekuoBm+o{
z!xylb#KNl{NT(alg)&TI+f8HJ&Eh(Yqg#!lnvJ7dOyj!M{A-l_YE%R3RRe2H5_&v}
zW_y>;leLd!U=Rb(*TB~gK(_oKZT4Xjb&tu<pS2^nZjMfLvqpHmW_YtUtWl>Q*{mPg
z3_b1uqdwA(Y}W=KPpS(dLG3`S^^rkDmu7g6Mp(D1SEYbyikefddvc#o(XxoTO>v!v
z(<hzDoOwBK-i^F@H;NYBDPDHBc-h_Zm5(b|KP_GQuyW<o!iBe!C!Y;z*=L=-R5zki
zE3{1~v`I6l9=Se()KCx-vNAzCxE9o@L0Vt{IgeB$unrTcBhBnUY9cN0QUopk8sz;!
zkopL{8iG_esP&Ns@&Vdl1&}of$RxBk1io<<R2lh{A@_-ty~<U*E8smK<jM$s#RH^1
zg7kpUY9CB4a(#qc@4&YX!5Q$X2T~b9xR8DjxB^1kKm<BV0n#CYR6S@Uqyj=FW$e<C
zZ)Jt_jNsJ}f&o5jPRbUvY7Vkc0j&apa3TF8NM$5xnE*NU6jBp`D<J5(3Xo-Tpb7|f
zo)4r-f~=8)EY%h<4Hp4#Vuf%a6%c%qNZd3W)FFaZMxsXH$hD8Saj2+aFhm4W6G2E&
zr#=F7WiTEFWK;n{!s;o|d5kbNdYuLDKoMjh*GCXGq!xpdkRk{VsRuqU0*%C6UJe(5
z)J?>fpZKaONRdt)38_;sNn*^0yB3dPh`%sNNL7Mf8G%=_fs6&u+CbDm>KCX=n5RH$
zU~@M{-~$KHtT2r4#){xI3myifW`RU5gbg7LW4j>?v|0%+2sx9@C=RlW4N?t3NyFGq
zaL)*IXd7fF9h?Lm6b!l|q1!yJ+bFV4)vs2`yGqfk(jc-UxOz!)_ZG9*Y6eMj@L_R+
z;0_T#1A_nq10T4@#K*uO%qXL85Kw9yT%qh<sNquvsdwNf9)S8d;h;5akee4E849zd
z2N3}c(18_$mtnVowSaaB8ANs%$8^hh)=0Tm2{>0t`L^gK&$BOC?^L_qy=jYA$ARF9
zCnKkykDPu!e(u%yxmOF;KPlP#B5TEi)Wx?UC}Yu$j72vx=Uzyjek!DKlUec<jgTgd
z5Kxr_nl=K@5J6TWKuAd62f4F@%!aQwfHNQ~4Im_9(#O9BG}H#(lnPn=209rVT(f{W
zJ6IXu5jo@<2g>%V0BsHe9~=h}L9U6Qb3@?yAP5_ITn^ITK_=C_O5h06Cqk}zR6UDT
zJ&RR5iXfy4c(pd9Cc;_=L24Xi5>oqMlBhKis80l`gP^2yrabsSIa#L+c`(9QEC*Vy
zAY+#Ssfi#Y=I$ZLas||(xddrzD3Y^IfFQ`GAutJ=FoIV`@cKx~ECGaI8(P7O6QC1A
zpaD8?hX`I1K^WrT$_P>)q1Qp+mD`}o2*!Yn&Pf`FN*afX8wP`rQJ^?B1mBMUXF&21
z8VRY43_v%bwCRE`K!H_8ZIJN?WRhStgscEk4Z%rxO$1>;3M)J$T1|w>h1Wz72Bdg|
zw71bnw6d2}E}qK57`)XNvogZ7{}<UbxJ!@)(b#C!1e^;wqXtsrppf8N1|ka~K?{}P
z)l)(r#C*8bkhRv3&_gD%Rz`-PsT{;TCdlfI<9m^-6{ENwNaVr=;VA(wg2sR-fFvQv
zdH^UJykicsqX(h}UK7C?rZL@+JL8PO_swYrHLCg7srlC#MKuRiEzX*_KDK3%OG2e-
zV3t=xX>ev;RBl5|ep6stxo2FyTVhFIPQ7btmqkRaesHa6Bxqqa{DuYSNeIv?26WFU
zBnLriA2<m=4h~X_fh(gH@WBY6^>3gGE~-<(vx3Dig~c$1->Fg~dXi<{O0W7|9?e_*
zyY@gTqo^4dqGw)An0GC4{`JficS|<E$XoLye(u%G<@Zw--AY?<Eo0G*yhT^D=3Pvk
zbl9zUiB3eDdT^s=P`y@g1Edau^nD;z45a%5se>S$9S8}zmjP1mAd`^l20}vWAWgq&
z2m{nV@~?r<@1WI0pjrk}*FZ?58V6GEKuHW)NIwYDCqnM{K-u7v6QFAnkcQ(RwGzBC
zQiUC!0I824vqJD{2r?FjwLU_wec;<y;S6|9gjxYX`$TfE^X1^HwV}Nsa2*6W;1qPo
zwqrVSeI#R-0Y{LUNX|B0)+P;7`#?y@N;wFLIw%LNjHIm-z!ea5`znl`2sz^vGF1dR
zQ2|mDK}hh43gF5p7TOzvEZD}ej}_iOf>%AF#-VUFoFNKdu3#99t3DDl41gebHH5~1
z<R)w+XsjEO`{20@nSoSKf%;HLOZD(?bb`vl=b<3=5grm!j=>c`xCBYGItY^sDS_eQ
z5H95;yn;Yu;5QjfF=*Z&T#sPtLQ@H^0w5lNs)tlJ;9WcraR>=f45^i%Dv@kKRs&Iq
zOd{7u;4PnBxJYof1+?bP2)vpFQe_Y%A&Ma+vSLUbL<|Yl71ar;ydW3Mf$r&!=`@aN
zH;QV}3v18<FIUhCtg}q$wM(61n=;uhd6Glw6o-@<u9<V4(`P!T&$LgOY!caF7TIPT
z)@%~qW**fE+1CR({0&mSV3P0(2rC0wy?%J3K}3^UKozrYID@L6kbQ<p@)XaCm7Wco
z0y}nvPdpki=~(cjqXGSg0w){|n|dmI+R5lyXXEExOkZ|0W%0G}X(!|6UWk}}Dss}1
zh>3^er=5tLa4325LBF~+`qAy`!Jx_rPkn@3<6yEOwGV_;Lq4Skd`<#rJF0&rE)r5J
zAy-CfK4r*UWHzMBgGNHuCqM*ojmSZ2A2<oAbd<r}A4n$%QXfI8Avg)IhTsgy8V1Pv
z1PF<|N&#N`ATglVKp^j9g|p#DD<JA4a7`o+zJUmSo)3rt-qs4Kl^~UooNb1jZH6r5
z&O4iA&{72O`fW%ZgiIo@Sb%hnq(EC)<KZOa;vqPRHZBL}BG*LF1#)H)QaCCjF_SQ1
zFoM@R@HN|zmD><f*f1EuFbWifArZp>5yJpcqd*7)PNG#qc({<d36g{G)JGV-Cs3_~
zR!Jdq(JCx>eMFD}uZi$82-Z*#1&~sgBoede#v_7OU%<J@S}<)#F8LvBOf?V@h%6?F
z;=8zhP^A;!3(*26A-X_Pas38yJ#cYk21E@q38^EHYajjCPCN*?=K`%GpgkJso=`*Z
z_8hb-23aMV6jm;BKON2n-}}=6zS#$~W&m0xMYV$#9z?Z(jvkF@1@)LBT0y5zhqmYh
zH|Yj7Y6sNo1hnV{wdn>n8-}(Ug}3PjHJV1Wnntu5hqao5H<=now1bD*+9Bg_klcYx
zVy%YYf{@)nphH~4n)SmQ4I`Tsy-Qj3BW1mc9rLDpR;=`^UFXxh)w^knbL~d=#;wjx
zTV0yB`Su(PnS3H*`l;wyXXEBxh?;pOde+&fnP=kWUWlA=I&#vH@Ck<k+IM<3Z1SyL
zZJ$3^J-A*is1ASs2(u1CzM}zB6G2FLeT1t5LKatpbaqflNX<eF3GD$Pt&vmmDgo^k
z0-t3HnHYjpJ@A?cZBPzg8NvHL@X81>E(f`c6+Su#se>RpTajxb$O1W}Y6yBaD`wRL
z5rM4MM&8g0?-@ZmL`a97f(}ox%aB6q7D<CTMCr0N>Ck(?Vf7Kb8j`e0f=m@j+9W|H
zhNP{NzyonOt072zBo5ldikKvV%@f5!DkBNg7zywqZAg72X&Mfxk05hH$Ru)o1n(9>
zsvcxg*f1E5Ae9lk>cPrDIyMAycnFAu*GjM(xY{5L&|(JA5m+tw>mxkfCFGh2D;u%~
z36sQCJ>jp6Fl8a-GcFQR#zNK=L)0Lb$&|2>OLAm3elw9(LfE*}LuAoN_-Gi!FStmE
zuSq45YYJ@aPFMwm#sKXJ#aflX1tF_tAY6EzLXZI&QpQvU5r?#<AXO5CZ4}vI6xji(
zO$@_3jUu{?BYO?QyLCf5bwWBc!W%Rr8r8#_H6mIxBAYZKn{=a^b)%Xgw*nc2LuP!S
zm60K62DTNs8iE{-4LX4be0v@8Wet!z2tq<uTti5`um*#OCS~su8JAr1*f!VfDNY%a
zEaN*>g6dR*>osEfO)_R%7B6?L-yAaGNW_#A(KAlP&N>r6=Un2v3kh@2Lr~1j(-Hj#
zBPSdR@7`luxX2`Ff@*N1dO$5?k_c=62zFZ#=#n6`S_x7Cp<NFI8IFVWdf+6yj{{l!
zhD;))z<W{gS3{5r2(AXgg_CHN4y3Mu)JK>kq~3vde9-D6NWBBEhTsh3ItVgShg=6i
z*vLb3(D@+Pl|zt92O<bzBNl8U--73u4w(#+0bl(9sgGnFvZU-XByH2B!3en~f{e&X
z+9XL>Ll2aLuqADhAoUTrCQ1a8p!x_}^;pG;TVc79RnjaPynzUG-YKLSf{f0In?yo(
z4@sItN`ZSulBVHU5oVtVvN8dgge+HpS4P5y!B{IG5yJps1AjQeS|7m$wZmJr!&@Ph
z5rotY17%4_)`K_m&=|<|5iT~?%1AG&4YMY~C4#GlLKDZVlCX+E%4$fN3}Hi*LD*PH
zTvY>BakQEO&P7&?X$!<X$Rws3hzPP8WHwe`Km<W&sl|a#R70*!ko7{?@YMy#%CNE_
zH3*D^4#1-Ie_^tqb3ZWa4u}Y-*#o|^2f`(igw#6_#oz^O&|6I*;*dHBnS|Cv;0}&K
zSO*w&8H9D|fss~ZlU8ICs4|LZQIBX;k7(41Y|@Erf~0)dj1Op@2*QR}K#+<CPC{;H
zKqlc*kZK4{Lh2)R{|Z6NBnCw<1}SF-VOs`ScP4`vIiCvc_(`^fOB}1$`*rLI>N^Cf
zk78z?hSW!i^DZRKzX&0NyY~1rZ%vwZTsE*t+Ou3MqD{@eMm?ZbBM{t0LRvwGRuf^?
zK3Hoa$m#+}Ed!~KAZ!SUToXar*s3AqAvk#L17W~x9|!|d2SM0ql@4-!1UbD2t^iUQ
z!FxcEP7rdh2f02%o(zK5M{+KpW91+f5PaFToKp^@GJ;JE<-jW)JPhb!Ii&gsUKzpb
zBT3tIDR37_!X{1HHeCubOlO@6sfi#xAV?jA%$BfDl(2?WLvS)4QW=462FFn!fe-nK
zhSfx%o)Ki<5QK#89s<`y5+-42^$}#vHs;VAq$Yx_k%N;W20`%pNKhX!G$(A}53h!V
z^!?zKk+6Y3_-bIth4oMpa`-%igjY;R48-Y+$hi!yO45mFf#h7cIFS_-WKFDgWIL{E
z2riCR?~u)f7tZ<^!$(+4UAQ2mxQ3IEDh5(JV3KfU#53R)K^RyyLZtAJ5Z~ca0I3}y
zvijh=4<Kwz60Q=$Ac6$dNAOk$Qmq84iS(m8Ar%Llgwz@kE`$vsAw3!hn-~(gCW5e$
z>mW$K9zsIu5hw|%k3fg&fYuclgtusgcW6g+YK3=bhJo(s)(!`a8|a3$Leev&RR*tF
zkQwm$1tO>&-mDb{JqZCWg_U6t(FD09NZGrTLDidq*O)=VmR;RP%r;pouv|ZJibK&-
z$LjUI?YklMQS6LUaWhZH&pMMd_k7a4OG)!CB`yS^fX>~TNmKlrw<v|RtA#YF1~)=h
zEI|4`@Ja{H1s?$ix~u^%1vw7^QrDo7kV*$mLS*4wI0IS}f!Ah3Y8gxtQpG?><Vpuw
zM9r%NwB!K1N&#Niz!{MG2&4eKKn}t{?ht_%&bb$=c@(L-7pj6c4MDm*m?XU3ft;|7
zOhU%r;3RxX2*My-4MDDcfbSTBU4#d&l9HwEk|e=M!X{PHHVt&hk4>r+5<;FEk_2D<
z02`MBRY0iq5uzpnRYvA9ko&<QB&-gCu9^dPh+-v7V?<4(KsP}^)-8ZX>d-162p3Wf
zA(Nm5+u##^Kn(EKA$T<eXNVXCL6DGPfFM#Ggj@j;tdAfy6y540w0a6&9}!a}A#Zyp
zyFP+w#9wY>me25B16nnKSrb7-(3BCw#k3St7p59C5v=}#)FuSyYv2mt42VYJNR;{r
zUNJ#x983~Y*FeM}Y=cP9j1OvULSSQpegw2nq#FsUL?EXeKumxQK|`BKdZ2}Hh~5pj
zbJPK;oAkq)4Z>SA!`d}M+cd-4HN)Dq!dl?<kycofb~va>2I&w%rhXt*4}?UkYqY@U
zk)jDAb0M36G=u6PJ5qH*>I@^BZIe2T!fVCNW4N^gB<xd7q8p7<XF%#Bzm7e@{f8r_
zoPg9vaWhXR%{`wu_hQn#OR;m$B`vxf+_O(Mti!v0gKA`#N>GDZNE6oj2wwZZ8Ssh;
zx;xdsQp2wTQsZcX+3@NHD+9U4fvkmtkjV85u2pc5`Uq0dK-drxQsY3x(MTm&9fY+S
zf{(vJY9df&<chfY4y`^ys*LjBH4(f5LS{hfAY>9=A3-}o;Mxb74cRV)Ai?z!WQ`o;
zHgHINBx#!_fm97asw7G46i9CfLP9Pi0uRc8DkCW?Q0GX(IuWv14$&<F*F<7w(U6)5
zaw{vi0z$5gK>Z`M`UrBC0;B?xG!2Jzjv#dq{A?dMSKK%h()WSaKFAD6H3TOi^$~J4
z1ZNBB`ynGp?SqTd32lKRNEZo`l^|IUxqpPrhV+bxB=sXe8w26B5~K`)v}}<{OmSG*
z1=<A<zX21%fE?9=mBdw+q7~!t5*Sk)mk4HML5v8*G)S$1NkU|iNlY~m5o9&UY+U}O
zmN-&HLtx<wOeOkmHCQ}?RvN%-A_xOgl@LvWR(?Vf5~LRcA+e@IT~MWiR3AZmMs1L0
zm_Do@q!r$v72cp5-UvC<0Fvy{Nd2&8$W~KQN$9}{;0-|#HfS++a4l$ILwJL3aE+jO
zBCB?&fMte8V1rreH0PqlE;So`T6P5Y9*mfD46Qy&oO>~D-uc9Zm%{oF%7u4&*Kg2@
z>sJn}R|{!U52yuAAq9YDh~TvkG6OUv1YRHquYDj4;_D;i$_R3g17snbl2-|GeFUjx
zAS7g8Duj(pLc}3GBS?LOOu`4^AY*Y5DYQNjyfOl9S#>Xj49$TD;_~EO^JI~bylbAE
zOD;0PRUaYOK@c`%#RBp5k(5oUv~8NCT?!b1x<!!s2tq<CAP6aGodT+ftWu<{p!E@C
zwYH>X9DK<fq-P{<l^|}FfVDmnH;;kTL=Y0WK7x$PftN0T`bQ$*!%oFbBH=X=q%wk3
zJs{FFM8Y%##6UXaM-;YD0l69yH4GLp2!fFC%1Bs05MKKT8~BSF_>1WK;X*pWO;{16
zhJsf{NDRc_1ElQ-A(87Nv<!=@K!6N-=!LgH$_*F^T{{7pu7Z%r6%!g8QUMW9LP|eO
zlKAo&xwNMi8&apBk-E{)p)I)EK(!>&-5;RqKVT<gz}1k)fCmUM+Tn`944l;vq-sDW
z!BrBz`Uq>)1MAwLUnB<+hiCzv9SS~m2bPKui3-w@K}(amVa@QW2blp$_WEJ1(DWS9
zs1*)AU?&W84G^XwU~{n7N02H8Qb|GBct}u91Uah*d`^OPV3nqSg@9QCn@*UpO}18W
zlWEFSr@}=pH5&pt_JmD10<Vu^XPkocj}qrzjGcQfap9$iiH9Wun_Vkc=_XE6^shy(
zk05mrsP+Nh7KFmUQ6Hgoh0y99WG<xMfsl~ONZGp-`FtMaN(a6Q4owg)4ykC+NVGKx
zkZK6I_Q7O>tD$_@%n_*ek#WhDamkf-&XIP`k#)|2R7SF3wyZPa1|mqM16d#kXG6G<
zng~Kd>LcV15rhrt5J3)4fYd|~Hn=iMk+e&autmKS9NamA)I<<c+A2lL5_H@g_|#L-
zHX?Xs1R9RBN)WY(L#~e?y&*UWsfOSrq&|YxL{VTyjJQd(h)E=JeIx;{aUfL>f&|w`
zkQpL439paf^$wgNtRD!PInoaV)kF{yykY^;4-(P$6V~%ZtB-{Bd?5%jT!2aHfRF!$
z)HNt1xWx!*H=>Ax$0Z;l5WNr*YZlfEZ-L}@%+do=0b!D8)exKunG3)}qE%JkA{0xJ
zNK845Uk#*g!moyIvPj;9)E8L1jHHHGSxC19nbeB{?T<#TbTHXE5iLl9h#@t2B<e<D
zABcn0IA9Vn&W5#lgDHrd@GwW!AoU330w6Hi3aOR!LR$>L2-@ob-@>35)&RL>TR*%J
z*L)D<R8k^qA9%%tsD^^-Axq#igKIT{YScq&RfDP_{UbrEO!a_z{p2aOg^QeOHu$vd
z3hFx)HtAUG^wV)O&cw|)lQic-+*}ZfnR_8-?uDqS$ECwMoJ*G(rA$@wty2qZ(hR8A
z@UMexS=E5tUI(syAPckA{3_rKwAu%mi(K_!vLV$FoP^Xzkn?$vYaICQR0soJ-9X0T
zkk=%jjlw}xs(|kpg4{F&=^w!>Ajl9LgbQIqNJxDIsdSJ@cm;&UK(2b=Y)I9EMnZ0a
zfYd%{XUah;BM1qphTtUZ4u~WvThPtmkjw8t_pw5HLy$ViG8wHVg7l9d7qg0+$3TYW
z#LZ(wE#g46l6f5X>{IB3k(e2DTu#gkbW1p7h6qA}t072b1R@~=bRZIP><{eZZAeW7
zs(MU<#o-9PtrdLK4_YS(UKyd+MBw@eUK62JLy)=&QWFX5`3mX!Kyv{+JHQx_8Vd9F
z6OGV12m)0}NOcgTCW4TVng~K77YoR2NPVOo-i%QtVd)>CRZpnp7-*D+7WEONxTaeT
zLS2g>jt7&7`UuauH8d$m2!pkwR8^3g2+}2itTaHXXi&yo(Zu1g3^5rkLUkfg`aZg0
zt>8KcS&-QJ2sw?x*_ic_PG}Rj8fu1-p!x_hPynfqAT<kQfdPcn38@3^oCT+ONZLjw
zA@vcwqQTEVu7)6oBxu2oiG$Qf8liQnLDj;R$*kI;yk@EJ`pB_*y;sW)czqN<^KAUg
zvq^I<#Lv4JKks7fyo>P*F2~J0Ef?ACRI=1GeTIT}4d_T!f6$F}kopKx^}tE^!UG5c
zxdMW)F{>W92xR*YybeNUV6BhPcBg_C8^HFfLh2(32^o=t^nK7s<ncH7-QOTV+;c;6
zE_vXiwxNS^XtfWVE8~<6LGU^VnSoXlLAWvwp!-<y*GKSb2(eJXHdzXU5+!Xw2PuGR
zBJh>0prJW%l?1AZQ0pV)EAS-2vqF-VagcrxT787v8<GGoRDe`Na1ye&2vh;V#^|8u
zoq`YcfmA~f5>y!(haxH?6VO?6kR{yk0XpP52+l_IiNG}xgo{}p!RsJohLElgWUdpF
zgzTQftdF!p>fi_>3#qw~^Cng{yl{Xp;Pnv(1GK^%vd$b%VisR;5j-^!Rw+n%h)H6W
zy|_fETX7(phRa6E#3A(&h(z@@qWXXbltE-0ysiN=2-NkExj%SaLp%f0ok3O)5rL5K
zfm}#Z!B4`6<KPU?t{-sk9&0+(fgNqF2dk36RS$G=DQLVdxJeH*h}{6HQy{kjA(LQ}
z!CIOiBqaSqNRSq^`5^EpoK|2RavcO=!)qeQQaAO$YH(##tsYnfsgDFLlbN+c1#Gf3
zLYfRyr&;CAcc@zD)3z(H_h9J6qcPJ@#?LwvJM(Pf+>7z^E+@>t5<Bl=!on*t(@%o>
zM@5TmvgasySE++KN1!u&AgdJMy&-k~N=O|9A>p+Tg5gu9?p=zEAe|aC5q0opRpd?(
z8XMBJ!Ahc4H;|eLLPDglovK}+?2dvIUGpGF!6g^MP;^BsOMuikaxS?L1~Mt*lr8I=
z1F44KB)l4eR658cqymDIkeUe636il(N3MF1*^ntA1PMOx6tXe_a+ZR$bpmK04ps+2
zCWF9ZbcvuVhb%!Gh#(yz8Ou2Ez9G<=+mQMQLc*${STVC$2pe7jiJC@1M&zJp`#`E8
zaBm1&6M@e=h15Zi!8&M71nCXII!B<btVoOK;I$7j15))MlR|m{Py}AI02!u3u7Kcd
zAzdE`!mNoP`xM~y5#%~Z_27C)?V}l7s~KDisgfWfkerB%#9AikhJhBrLY68KQy-!2
zfr3{?;C3~EmNUGtg)mUdWW?AJW|<FBi77rLL?Dg_lZbi;%!PXyAr3!V031ZfLuc6Q
zBS@_Q*%^vlr{H0ub%Y@%AghOn;IDdc$)a_CuqHWVK}cE!AH@KwQ^1u7bQV=Rs8Khl
z5wkvmTrj5-QU})yJr)~0o&e7>$Q2Fp8F8S?xKTI$VD9}<3#ifvs?i7n)ko?fwJL#C
zLRKk|`bagfUMFFqMb2D@s&$^t+x@%uhfFvUJ?%u?%+s+m&nAHSN0;O0U5=f1F>d~)
z*cqoJL)z@~=G*1YQ}V6?9nj}nr3O1J4s<~(?8*i;$a-uD7q&|Xvp!PuDpB_?g;YjZ
zNwf+G&V}@M&}t$`MT3=u)I^Zw2ap{@knuNU5?<rL8SoVd@cKx>B^SOZ4!HtCX2UBW
zGzPo^g4a9949I;$5E5P!K^Vw&5HcHH6G3K$AUjwg^^ufSJfupJvWl0qNCNeYtl*2~
z5~R%I(CQ<2Wh4b#K?kXQU?iwMLavDrm61s#ygmY5{Q$l$9K--uMxsWbrF58;5vU&o
zJKhIZeS}>5z}e6`2woZK1qkT+2*PHF;B^p$0jY-IBxLOkG6}7rf@(nZPH+SM0X<0e
z#6u#N1ITR1$R1{uq#52yB+`m#(*lk25U7tJ1s$A(mw{*lN~m0Tq1{fO+6S_!3VE$N
zCL6UjLAa2RIHKM`RSDfn0}l{L5P`cz@G1#WA3<cXlDMh~tl|(8Fr{GQX7B+t(3)yY
zWym7%Y6z09Al)KJp9Mm~Ya9?4JO>0~!1_nvY6#p30`+K6>LcAC(CMR~ln-uwLE2!D
zDhZN1;9VPSaD4>NJ`i!uz-rLl4M8=Ufz_Bu!@ma7A%d(f0FmHR;2<|Os0UU<I!dbk
zmEzVZ96F)=7U?R!HCl1~7CCe6tJZilZ}sWi9n^m?Y|^pFsV8G*oQa=(K7JmkK1!N*
zDQ3o*gz3lS!`d7(XFFxh(eSR)_N&$OtA-p^3c4~CR`0-v-5?D3XdCEoIatp~4ZQJ3
z1?h-vczpz^bl_aLN{Be5CW4TV`Uq<Ugj~^phTULm;lN{YD4if>_X0=-1g~@;T;#40
za>WF%kI)$K?hhIRUJXIkCm;@0z*xTxpDL2INduj63f?q?Tmd1o!9#PP3m{<C5a>7s
z2per5tE6QD{CZYN%LMRz5NK8i(p{3V1XWM)`UtW(LDDQ5QWHULg+QJg5(RB6ih=fw
zz$+FYY-k+>z8oBW#scEJQ%Ii(OoA#Sqz(~$wg@6FW)LK15G1M}h>1k>0#FcmDV>n6
z52W@%CgF7ul%eG%q~!(AN^k}w$AOz_HJ~gB&T+&r@YG7kQjpREULT<`h^&m@${^(%
z@g%(Xg)>lVB1q|tue7Ib4F#zqFl!%75r|4sNw_z`4CJ{v#0oU96zG;Za4-^{wSiX(
z$U|gsLGl>T^#h@x4hy8#fTR<WNl@j|fMqrZGUf(8;2M&c@z+Plxd4(EAXN-R98&K<
zNQej|`#>rh%sL49emYPEgmf<hh>KJoK~9HL53GjNN1VE0Jf<m%UX?lt6D@P+LFyy#
zj$J|hpdq`6DJLSQo{XM;I(E**xVe`S=3D@YOgJFnU$5rZV39CM%eO|`uU5mi3RDw$
zSAa;=`UqYPL3Rc~xX8UBNc{q@cT_!#A#6xp17SmI9Eb>HO#-B+10m6BBD6XPnG5L|
zK}g7;oT6(UoB^qTAoUT1M5~O@svZazt=>WA$~tCYT`vb2h(jhJJs`LUWONR*K7y>H
zgLH_bZPTP|QXy3j8VOmdja(l=DjhfpuZ-XfDd<fQG4PrQ!hrOKkZU5y43P+E;T&}B
zoUm~O1PK|1L+T^s^W`9H_!u3ex<Ml`2jwuUAyNH6lxhfED+%lRBUeChHnakQjnN6|
z`Uq%)DkFmR5quy5l7%(FgW-@t2{aO}0J-Xc-?xIA_d&%4S~Udcq7C1{#W8zQXetSE
zA)|Z{WsnjWlf<l>z(ZJwZUR9r@%06yCjwEAS^Hp$KvY6>A(N1)e@wq1i-0Q{l#V2_
zEb{4UFqM#62~<guSHB?ZLhJiLsvE2%a*qaD6NNQsh17wz|G<{IA+IigupzY%#6Yx^
z1{qaD7KHR@;K>y&8N&zJu#T`nDj*06t$N@$(*;&*!+JxId*~qj9XK1rK&yseTy_6S
zkZeE|xZ0{z_pgAgf|IaG<IoA?Gfh(QuFy#8H_e}8U$x4!Ws6Vy&fvZSVf_a~`wm1*
zJ{mgVP{@QM!TpDWyLb4uZc6UnBIT5=V3%MTT&m?)q3vI(<y)lzyQ@yarwkdvCxdX+
zN01r^LPF{tT%@vRF{A>5tbD^!4S{++keUTXB5zZL&IG}vP`H?Nkg{u@qH~UtORgf)
zI&FDp&^_Ra&N+}fSmm5E5y&YUQU_sHL&*Ijc)bJf3PI{4I0@+iLFyw238{l1B=Wc%
zWFM;(_$F4!X+QAF2vQ%x>mXzXyc&WtAoUTXHv}OeHIb-U1O!3uArdzYhujhlAtAMo
zgmDA}i5rDO5x55gse?qp+gh=nv4C6+!PzJ^5%Ojta0LXflHfIwpq?)T3F!J@MSR*`
z0!Rqbu0tjv?LA1F5VJmlWJb*SS$GDAGawZca!~+h>xDKUS2VcT8X*l>r66?*=_F*O
zL^DJqT7itrMXr?yRzPS9AhiZDB&1G(sD#u$kP%2+B&0rt=z`Qi$Rx7+G1;(M22mwJ
z)Ifp)nS|9NkmGR(51PT$Lsdd65%8oxL`ny#%LA!0FiCjAKxRPF54<wc32T7XK9G?&
zBre22NYw+WUm)f{*k~kZ;s&Y9k1PvM+aNXI-J|eI2QCGvr{LlmuzOLp1FN-Q-5f~e
zf`x=tK%hPmm<`<lgtYov-M<34K4R4j<~E8~^e9z~?$Ax2Vv;}Ku5zVg%{up{Ev|JN
zoNLy3wrq22-0V=Z!Mu36ZRsM@+*x*M{Tv37TpC_#E@@hR6&gNe$n_Dt0)o^^5H8wi
z8>IGukhp3exH!DtLGBqr1d;nbFgA494XMT<tv*5?l!H_{5E9ZGl6OYjd?)XirQnn;
z=bR}EzWEMRE8(t>Ad9ylw?e?HAvgoACPJ={Ag7)}Y9dgF2)vaQR3%}YbShz;C~lPi
zsgIEN4Z&+7c<m!@9tU2T0Gbbiupwv7q0~f><#T2ckclC*`Up}1A=gLHng~)QA=O8?
z&RBp~K#<Ku@KxN%^%2VAZCHH-s)=;{K_qy>h+t)eTp#gkc|yhqFiFS(j2eM;@Tv+@
zPr)-L8UwTDf>cb%`5nTBlogP*5bzoY&V}fLkf81rxMV{v<<QtzD=cI|cwq}E9^qVM
z2BdJtN@AAeXcZ6|7hacPH4Rx1Vlqqwc_a-V8=?!jCW5eG=0G=g;qo1_I94_!l;9*<
zXhXOVSqPgL5?Ws%MJ1$114#)4NXR%FHWD(S10qqo?P!f9NLtbis)r<CG!iWtgK8pJ
zHwQHA2CkLhGd{@G5c+itsFe|@%LA!#z<nRc`f5lu1g(RRccjAVBgj%Wb^l78fGRD&
z3Mrd3HjN+--6%!3V&jNbo5Vib%&AU!vmA;SI+ZSVEnaAqG0iT0ibZ0dZdkLbU$wkT
zmbgultVKA3v=sxlo}^x&c0h@`Pl2L$iGo+L1{gsqAUFxBbl@apbPmr63CN4I(Y6Rd
z>L4YLLPhri2!af?p^->65vcov*6Tr&g^b84x#YqTygq_gKyU_pEDn0SHteP$$OWu$
z5`NtSNW?J<-W!5fI+zSl1!R{ZW1k8+=Ld2p5u|4XsgEFgS3%W~bqZv34*6Pe(DFGe
z(BX2B9Yc`H2x;FCXr2hUGJ@1W@G42n40PnFq**MqPXwunz$Cmf5&>Th4!I9P%p^?8
zI6?}uTLil?9C<hntv-TRLvRLsl>&qzq89+El0aP{@Z1ozRsz>YplS$G8R_~%2Ia8!
zi6DY}+FlSOpzX!4<q1KUH4#JvUJao!;8_waBco+=I2TPBysUsP+e2f(OAp8bKL{7n
zRK{BLAy++E+3->qQi?+MGa{>mE5OQtmH&vI1%44c^#YptaQA@Mo`LEh`1*D{42bQJ
zvzw4fG{fQE#1uypfd>Ud9KJ{ZvOEB5m_wFKqe;OV50J=0CLuKrrU*nFjYMv=fY`yc
z+QGGu?WhnEQ~`knArm?v61H>=>u4LiWd%=@$RhCc2~XJIK{L?M8HfR{Qy}#STmgiO
zPGamch1NT;x(2kq8obU8az3`2Zxuv=nr{{4oHxj7H)Zcq6`wNDAe&!>oI@6iS^%S3
zu$Xm*if0+<!q)I+y{I<rxE_t@PK~e@8LtX{i&R#<Xa==l2Gt-2DF+5|TLuYp238dY
zRt*tNADzHbRj)kc`Uqb2AT!{zLh$+sZLbidHw3A1aFLL;*^rtDLc$jzVAe8l5qSNA
zhXJXKAVYKT+6TgrbIOMFiQttHNW?J<v@C&8eFUnQV9OOS>mV7sba<}^QXfGoBWb%7
zNPQ$@oeJMH1YyAIBgj}BWS9=TN&&Q82(o4yQW-%>=ss3ZeI#ZU3#pQjt06dB(hPC-
zDdws<5z}yC@CG9I5(P+ogwiJh_l%HhA_yB^AHgdi2m@04V3MF}2we4m7~uK{Ix7UL
zh9Fgvkd7Z(eFUj`FiBomO$4cru#75zTY6~K5S$Com1qpO0(h>6FyQ$ejiDA)2S><N
z5~Kn`BOyf@ToAb=gcql{81SAK#1MFo43~Pei7H5~fro@t5O5Mws?&~yxCC;T05S=&
z5z|u;aWoQB8C(Q8Z1J-p6%Zs;;UuIL0p~&(kQxWVhLFf(YoJO8wS$8q0<L=Cb&xjr
zI2}+O6jXyCVf7KHcLVOxpd~h~z&gmfH7E%_&kd4d(ULKU8(58=DUfO<$gWaM5^~@h
zG6`8Y4H1E`p(JeSHE7M7Uj<}&8=Qo!$JPp{Qu8TOaLVD)i(pg<WK;=cQVw8{^JY-+
zWl#=aPzz#E4`$E^XVHxj){Em&59d_&V^{RyQSswY@M4#7XArVw;4+ZV2~zXUSM$zS
z@G6G)k8qs}2dQx&YZ4$NL<CpuqvTPD+!azp+7kq?Yp^o#)H{%M+Gq+O^$u1NxekJ`
zA=5yh%E&Pb)cpZhNuYWM>8=M%E?Q*-S&9I!j9@1!q)6MPK-SAiTcyCeMUa{Z(mz5z
zc^g~}!I#V-_7K5pr8rRww9^$3V|0*eNCI><UJQ%`9j5?Ws{lIT2eMucQWHV$X@&10
z0-b|u916M_96T-uS+M}AeL%~$k(Y2A1qvAi3BgX`hSWsxp*gh52vh;VYa%cMS`&fC
z<wSLTMRh@E{y?fBIEma9g3S|o3TQ(Szm^9w;?s18j3l6u8Ua;m{xw(;yjcj!x!}AH
zVL);>CJB*+kjSNkYGAEuU@g2#f-@ks7^DRYuYm9{kn1w6Y|Q!yE&^e|i(;${Ncm4Z
ziB|HH#Kr0WG(kxJML(<+v~x2Qv5ON;nO0a6F}}mp1*v5qH4&ulAc@3UA3^FD<Vb{w
zV3Now1j>fhJCG^~Q3t`qK_Y>5kZB!=7C0M{>L3Ga(3%Lmni`TyA;}w_p5cRX=#^1G
z6^a_aYDi@S*<K30n!&FcOT7cW6cx058eHi>D<i)OP&EXuk}y}hfodi2ZXiwnN;RJ{
zY1?#8?NA1JA5b-<=*O-RDrk}@VVxo7lqcs^rsiKK=UygfnJHzODr1(YV3jIu94oFL
zDXtSPsue1!8?0@gsux_Y<zKAmT_WdEsP0*$>0LrheFRySfLRm4=70*pYqOz)a*(bN
zygov!gU~7$OfG2ofeYx$Z>%*ArXZx=K_em65S)b6N0{}FoMXB)c-Rem*F!3TMBUAb
zsENR5%0cQN$bGDkD_P-n5TqIc?HjU825le$uUmk0i{P_D;KkdZ6Mi6jS0N<0G6K~{
zBIck;B1i><Op2mZMxZ-eA(fFhZ1FZ^$sD96f{@5Za7)7KBlszE$n_EOy?AK#5n3H2
ztRD#9Oa!TqgmwMl49xlnUJVI>m&^&kM&uyfBDDGlttR5rbjMX6fkwZ<y(CDUMJAE+
zCNdjY4Kf>ECZI7O^$}*3gj^p%*tlk<Amtm@ItW=1QY?eIMZpbP!3`QA4G>wlIDQ7C
zWTrKVRxjXk60Vzc@KqV$^KH<~hjTG4f>$VzK{;F|Bd@fC)B>253z`Tdz91xgU=5-d
zPU-~K!4XstX%`Qu>Otay=6AqzJ&=`bkZK4t$Oc}h2A72}P-+>Z{thG!L#|qYR5V%v
zRaya6kopC3&jM83zXo3W!0QxHt%P(J1E@lQEvSZ6KxiTmt_HY<g0SJFl24hUcd3$h
zg^F*LT0oVOPnncMmat{Agk6SqV6|Q9B)8mIu6eVavS-+)Of-yWRr4z0H;!eKbYT&9
z;+AvdS90Z7_7qa{5zz<`(+U#M4&c-D6EzB#v5bSPnwIx0lJ_jatYwfzAT<$$goq>8
zL8xrZm2#l@AjpglX1#+Zf?3hv7eT9lAY6C_1ZSY^8$#+6!K)s$`bf?$Rn|5If*?2F
z;UXa`<)9?^U>``W1gVUWdqWU5tUdyb)IsVXOcJ>Qg0La`h9G?+ag%5WL&7u$az`Fy
zdPp2RRRkZJgD~KGh#)l)sB>f(4DAgeZ6gvh41gnet%Q6#9{eC~$O%8lB%&HZtm5_&
z*7Fh3@dhIw5gjOkS4Qyq2r@$i=@ubZLvS{vGUC&8hfJYjl8_#gs$Vs7<)Vg>ks;Yx
z14m~Ht{$y2(hLA~j^I@igaPRyLE5em5?&uc7?7e2jf7M##E=k;aFv)vF;)>;*F+HW
z;S~ggfz>4tL5O-Z5?8GOsed6FiL8hrp#&k}#zMGoQVYCG8eZK%D-J?O;y@x1LPE4)
zrV4G?@@XCL5FAJWxW<9hDFjGJZyh|^22qJp2Z85zpd#3kvASP1sB!`Kbl~?aK<X3?
z-zvzKQiuY0WdvbB>MVG*LMj8Y<ww=K6jC24dX+1Bm8%C<D|(koJ7z1o7h1-5dgjk`
z&6(+tKE*0|f?-s<yj!ubRXT@G7=yGcvzQ~BqzkWtvyiF>zp^K<qC2;|JEyEGgR}#K
zxD5lp34^$egiWfldx5-Xk+f?b@*)J(`UTSKf!@vl-jNCtK~@7fp%g|!Yajd_BK)-s
zR#`}0gNuY$J;)6B5eo3d2_Od2o#62L2+n|1OmcRq@G1#j6G1B->m*zZNF4<497$Rv
z!D=7q)eqw4ao}zddSwJzAP1?Kz-*Mx5u`GL_l(3%qTvX#ejBqUf{w+(?!^-acZ(pE
zkvM971R0BiS4Lun0g(C#UI8I9kgFj$8!|BjCBb8KLb~2Ux{%7q8(tZS=zum8!MZ|T
z@SYKPdI)87j$g|IUJbz+m^Bgd30JEAm2d>9q2TRCWCr1tuHfDlv@`&-RRXJ20;^Pm
zs?iY2xC(f51-kM)s0J<t-7gQGbbuF<L^9x|C?*3k&4EV3%VcB*T6I7k7qaRMK0bzQ
zJ{~sIF7U2TWT#?kf%kNfrLcw)N>E_T*XRb-V?~gT45VfuNJ2U<Xv#p<O+YQA?!ZNA
z1yn(nQfq@Z_@I#xS!4ysX$+YSNxdi}(h4_7MFUUL==Bk}I|HeGAYB^`@K_v#jlTkd
zT+9t2kyS!O;FS?%A%UuQshn?_jCYB&LoTOYG>>7Ns#m#2Sc^&01hcd$*4Z;1()ta;
z>dpQ0WlTbNqzxGu#Ti(HSUANwc*Hq*B{;YxIk+VmxWrlbqy^<P#FTXf6)Xhg%~`cP
zRJ}6Pz(aPbpe|COiU$a(x)-P-=6%p>A4p9Et8S22W#@x0umcUUq4jzczzg9Z3$U@0
zkd<%9Qn>0JG;w(CBjcD3L6BA4GDw{u!~h+*mVwkYD04$7cj1BNgCIR4S=$syWrV8&
zf=>*|StrR_B_bo_nh2>H0xeMhuTKD-xed7*4_pC3PgH=gA*;AyognB85qMk<Q4>MV
zK84grkggDfM6QXTY;ZqF+%Oo9M2!MP3<Dr%DL_eZ|47upU(7H-6uf%~xf+7A;dKz4
z0XppmykbE}#}8B`fzRT`tdE4iXDmQS0c}ryEe}Y=#HZ<jSs%gcAP9qB-Hl(}4QqV_
zuae-Ekt%GN5IoPKF;H?kOff_dUMxVkkikGmeFP`rwGyP#Q46SoFw_I9k=bxT(3vWb
z_At_360C(S+HfCyqzI)D23KupB9PJ^xiF_L8)81h&7_(P(F-R*o&t|XBAWxLk09pb
zCs903Tzv$oj3B)j<XQ$_y#t9aOcJ#R1W7HB>PZ7UbOsqfBa(#dB!w6PPjA>6Nc9n<
zu7S+;!0Q@h2B<3W22cHf>n0EvHZF(EhSx8U>PF3{3{3>9IHWRykg`6d(q6^<CW*`%
zp-k!_YTgwlNfUgkR)n-{axPwI7~UXgp1>?^&A_V4z`)PIz{&suj0_Bn3=B+QE*k>_
zI|Bm;0}~IkfE1^M0gtpXgQ~NlTMBCJ0~&1uRYtG_5FoV=GKty`BDOw4u4^#akopLj
z#9Hsb1>w~YoB^qfkV!~21n>CB*n_$}kjhBTE)`NAffmTwrok7;!5Q$H2>DuY%vuRD
zB?LL+6uB~zwSpcu2d;YHbr6&RUNVPrDIR3OHl#8F-x3aAt^k@Lf>b?V5>y|F8ApLp
zgt$p0bUzVz5goMl0r!a@yIJ9D<Ur?d!|Ee3gCIx`2wD??PxyiKk03P>gcLULhg3Rn
z281i5?+4jB1Sdh&kgh*y;T&x39Hc%H(((i&=m86m%1B7d3pN%9UC52UKH}8?RYv^k
zZjiO6Xe7LnQt_*RBZwe!CPijLWRXeig#fIoQVytuS4PMT$ZB&m61f_Jvmvc($jBZZ
z60JUhEWtu9LNVFM<um@098(rqFEMP03CQ&bel{*MA?6_OvVn*otB3R}kx5A4fT}a_
z_!uZCkk<)7*l;Oq3`i_MNcfTf2$vuU9qa}7mmxI>CP|<g0?+9n&+p($b;v0b-rQ36
zuY@$K;3*kWxxl$-Js>z2vY8Z_gzxCVWI)a%#T18#K-OJD7PF~&l_+@?D|i%&8ijLf
z_%ce{N|}WDXEb>x)z}54i)h#~aELQ7aDl2IVlfK?12==Xfx2s)rgy2bYd)k_f{@Ct
zd2j^l2Voh7!|Vh>`azI`5ilzqxCnB+gNrTelmXwe3hDAdNaPv^A_A><VErSw0y&Iv
zIY?y$In@WU8Ua=TB|$D@m9_(&BPV5>45^Q#t&@=JBY5qD$&j@Iow+SznE*$SlM^uO
zBS@tKAtlTrB+Vlw%pyR@6x0ubio+HyfXC$`!8H+-1aD=9)<<HnokU{bbLZeS5prcD
z4DK937cCeDL+T*p`UtXV2vP?jlgQN&bSDw=DsJ$&9Aw=Bv?kK=12Moy{Xph}z?G3F
zyc!bH@)Feagd=b@1getwG_j1%L3&048t#zJ(SX!PkkUXopb~<RD<hRa&`hpcK$WV0
zC0dn)%!O1*km3$cO$3ob-qH)dwHZ|KVk=$Y;_zY|QmPYA!Zi}fAl_t%N?gW5WRdG5
zcwGb60vY*K53GhTKxAMYNLfHF+~Zo93kHy7VSxq-d^8eIeFUv~uua@RdNgSD2&7_y
z*B6@pm5_=FQhgvu_@obvfou+19fXw&uX-Rg5xoBb;Ubd|l@Jj;H4&tKfz&m~2f;za
z(U!MC1mW9%)V)iUy-O543Z=|rxYT_ZB&`{QOqiri8F<tf7zDu`A7bkvm^u~)b`cgO
zYc-cxRrf+w_d<9DgvP*HA7QPDAgi+Bl@2BYSJk87lnq(=hD<{08X3oQ2m?;Ss~b22
zQXfg#r=lU`o)M(pfz&e4)8s&x4S}v40<YGFUdbwHlK|PZirf{#tbO1j@Ig6vH3VnC
z>m$g51jtraQBy>>2-FXP)I<;xTpvL?N)o1^8(P7YQM8ybs2>Ebh9Y1jN_`}56b`S6
zARQuP5^c>ka(x7?i6H08>HEPeBS`HdsOJms5TV_U2d##{RS#(49Hb_KkYGWq^%1-#
zf-vycN4y$t@SYK$nk%20D_YM8jjQBS21gJnNPa{kLG4EHI#sM$9;+avGD0RH6%+CZ
zA*2q16irx2aN!22bciR>icqZMONhc5TwFt@;Nb;6eg?eoCz65HaEKsQ64O|SYq2WU
zfK?w*HK6MZYCzYIf+yxc)p|fJypD#34C?5aW?(gZjR95$WHmLu>INK5_!NLw8X(sr
z$ZSN_1MAD6h`_2L=md~oHD+Ri?C8N1hloIG8BhX7AAc)V^MN8r<pQs&RJ}_zU?*Th
zq~Ii^eu0qiS_aOAbb=rxWc3<^4N<A;U83q;qT*jF>yamB7{DND!NjY<z^cH&zyhmW
zNTvl9J*~Zqbv?>-+)GtFiWFQB9U{=-Z{VAQlw5M*brW)}1gVU0Ejhqi4MC2KL#}Rc
zvE>{xAySZ9243mlVL()(RYp=6Q$mm`3DPZslaSg+(k4L)d`mdwE<DIx58&&E65%sM
z@Hz;>fY(G22BaF2GLMrqi<L2tlL0dz(?F7-yN4j_6F`$hqNY*s>sdkFBC`m{>NqGH
zat{%hgw;L~qDJ9Z5vZzy>}Ca@HHTac2^$7ut%)FqZ^P$<APjiDgUo;o$3dzd5q)1_
zJ<yUlVLcz{SRA<C0aZpY24p@6MuN5x32Au>YI;CwA7oNM!$UyB1Kch0;MH&kGe9f4
zA$=kU39pYJl@XtsE953eG!jzvs328FN<L)}LA06(&V|S#laTz5i&XNjfFR^5NjU(q
zd_vW~5;9bXTsooDO5lP{&A*2D$_TWr39JiJJVHxg%oz=&9s#a$9+x<z<cAzX0cYc3
z;4%l7I4+}zQIAp+L8>GQ>LbV~86+g(BxK<<G8b|R0fdBzL)ZusHiQd_XtV@_Qu|<5
zNwAv~AQM2KgRH@K`XHx6{A_pvhR^lDt~~|S4xkFc3wok8%1jXK3I^nw2vV(})jr4v
z>>$@c$ZSZhr0iLw;#I8TUn=R6#i`=Vz#zxKAi%)DNpd9w(;zLbYUNdA>szYhUaH|-
z2wsE$t&bol9zbdw2pd8|L?9&Ang}8&=afxSCkRsYppl>@a1NP}S_W4&gk1YTR>UET
zNZO@H+JTlOK<XekiBS!KM(4m)QUbVV1R~*;5u`psyY2x}`@q)7ffma_L?FE(NChNq
z7Ap-q$p<0==_0|aAxI|(-amrWM^F;ACW2H+kjhBdAQZwtCLt#+pw&c>$_TkW5=OlM
z0#Xe@NaQ*QnT=c@3F&&{ua98W5c~vGP<;fhj8N($NKFJG!POA7R)W+)e46f%Y6wp9
ztGmJLBjj8LVM9(V!Ig_4YS2hESZ#%=3!dL04N*83Qr94pke(5;2&8C&6jPWaL_K6V
zx~g9_E`(NLL5f5)QWLh!7_$oisk4Z$@^IA)5OGp#6NoMdiAyhJMhv0`ZEzAY76<Wy
zW<V{dLj<mdARSN81#p<PGkCo;a=j0!cW{x2Y6WR|07wwEmO(@xSQ(`HfS#O#w`UDW
zHn>P=a)R8X0A6nZ6+!RpltIMdeH;iEl5Qc5F(t1O2m`XN0GWhTF_5YU(u;u&yA`Q=
z7D1{ZNSy+yW#DxXoC^_waFJC)R$oK*`#{8%yo!~*OH^HRL`<SNrL7s*)EF3ez_k)1
ztezo?=9MtEbct8<%vW^Dl5@$Eb<ULsBS>8X8FPcPv2r0*4}^p-Fn}|V>m#&^24xcv
z>_8r9?SsUHtTcd(wjtLt_}TDE2R#0k3?bna5S$@t17EETx)vNVE(foSAPfoXcnCt>
zF$7w(4e1BLZXJq)R6}qQQu`p2kQ*N$cR)bc5K`PU9NHm*T*(R}Ar+9AF-D&Vwn_m$
zH-vl&H*|W)AXG#@1d6~%D?oZf&~Z5fe-WfXIZ-edGBk%qLi$9=B=VYV2pf4P5v(!-
z)j_a6kq4|YLNEk0+>sHa4ieDt;8%CYh4|Fm_|)8Z)IkW=8Z3oZF_2t|i-apfV<4+X
zu9e^!AB_R(a3L+0K(2=1Y+~vo72hfq-zr>{5oG@qvaE(b=<a5;X1oT*v^czCf)wV&
z*GHf(7}6*oyqbV8FzXbE`4Cw+iPbuYN@NotA`lX*7PPTA{PhvEj>cGBfU72gYz_rg
zJ>V(^odK?oATbCbAypEj4kD&Lf}|o`H4(}P8>~K3^R9p-OGsjau<?`dbgKkfOHiT&
zyMYd~K7!OikX{Ugt>RG#sgEF438Zp?R5~P)N*+au?u8I73Lb?D9)<e;WyZl(w*L9X
zuBpa$0ip^<;N^2f)kF|Qx)#B%0ojT!S&A-MGEO=0`Uq0LV3MF?-;i!?fYdv1We_g>
zvIcnn2(9YDteDUgKx!puO=Oo2Vt@zS@Yg|7_NkDr5L#~tUi-iqD5ofZHxNP2GKHLO
z3aN%5ce6rjA_xiT`yi8$`UqYDAv2JxA;__E;04>DRSJ+g2)tlB5<)_IL$DpJ5U!X>
zn7BzKa`y+phSf*#t`MXS5;6#atXKeTBZ5~(`XP|BeV{cFq;mwWfJmx_AXSo(E_635
zv?c<t+=kUb@D34l)f{Fu1Q9{5i6Cq|^%0~Rf|HQ?2-3QPv;dLwAXYZyk`zc41|hL#
zc6c5~W~lg-!MTv420}s#D>w-e#2oX7R8kOSkdh5TVv0b-Ng|QE16bMc4h4dNZ3{43
z`46cnkV$wor;R#Dh+NA+`a8(Fkl7HGkmc66OhZ-=Wuwjtq5Bc3mjta=z_kfvWj>ye
zgakg3B$k>5a%>LrDWi~@1*vy~ymA1O4ehgdm8-%KL<EzBD^T?+SNAGc^DKjqnCTq(
z^fpLEgDe7JBa@Jm+%QSV9v|Y!BJc@3puMOH9!2utRR>BgxoXZCI-YrkE(r`g8sPef
z2~yV(LkcL{T6m|byXPr8XURI{gOGDBs8({xl1D?}Y6w!};3DD6t>Fwv)dQ(tkV%L*
z8VOeb=?x*1keUcy-N0)cOc6+>BVn5iM-UNY5<Dme8l6Ks(G)Tn1i9-0LV~Ucw?quY
zffvYu_6k8Zt76{73hDbmNN@!dBLzA(AqK*R)I{(BI_TkY@LR%>>LW<c2(o?~Q4JY|
z3mb$&M(Q985DA_eg49Hip*c`x1nv;Q8OX=`KySnYua1Lsg~asy;7AmFz8o$RQU{6X
zcnfKHf)85&F<=YlKpi5;(gl7kL_Y{p0l`T~?SqGeub_i4-~)7M3`mZHkeGQBO$4nz
zg3F@SO7MCIKLfdjf)5hH8Ssh;q71UE0$0(8r_3Z(3caib_ZJY=PyloVAY_~njU>B1
zf-I|m)E)TiBZwO0nh4T=f%a~YY8Dg`urDB9!&1wDJ2(*a@cIZ>P?IVSk4MPv9!Spx
zLV~7yyem|Y5a>uZZ<HfO(dr<~ibmbL6uGW}w5%|dLDZ;tmZ^A_sUabF8iq3<bqyp(
zKuBZ}2pdj<huT0Vi;`L&DZ3ZIt13m;d`NvH@0u^`nh&XulwI@GT{3mO@{Qe+82Gfn
zbr1>l5wC)knP;-Pd!CY0rmR!Gj3a2s4zoUzbIyd<LGZc;nE{bS7J(dw0Iy<*VSxHU
zNWCGn`Utt|A*nt>u6-c2jD&R}L=avdA@_P9r)q;Pdaz0mHwT?52baZKA3<s#G!k6(
zfT|%}^%0^n!l;ix3%0?F7Q~IhC6ET|K(!CJK0;yOt&fmvBFHIokn?;XH4%gq)AJL>
zT@AtNAVjwaUI8I91VLL_5$7rptbicZ4M7sE8iI2nISxX?vm^$i7&O!B1tOKaiopo`
z$Oc3sd?+4&eS{_pDY&o-LZq;gka`NM6tW;>kvZthf27$8JXIB>=!P#vhRjvrCn4oL
zq*RBje1@~}Fd!R0kx97Km<(iPa5kng{33XK4Us~waWFfo$g;?6<j}^-hSWh25>m@R
z*vN{}8aArlrI4Bhq8`GAlaQ(hK|*R7@Ja(ndWNWkD1g*N${wZ49;I+Lh~ZTNu3U<g
zJ+Y!<uq-+Wu7-+WBu0JYnXlxT54rb8#WNp)z^5WWsv*dkau7A3%E&ce-lb69rBKEt
z4^$<&=F7Y0tGHxpc;p(mCNl7AgX<#_7TU2&nHjhyD!b(<IA_W^=E*tc$vEZ6IOTvU
zAkdYm>2l8L3eK4zSr9i9xyC_e!!N3X3*xMgz(*y(S0+F@K9Kcq$Q2MW8#EbYn+Dyi
zipBshRDkq-AcrSFNJw=9ziSmTPlP!E1mC*~uW`T(&^g=U=AgyckbV%j8iH<Lg^7TA
zN$@%dT>C)B;v~(W3+EvHAmr^r5H_SH5;utit%w7!Sb(4KBWfH9Jy!u-AHjP@C>0Q_
zN)pl!5jF^goOB8y1@%D(aYIJu@Yh7bdcKg#2s{c0Su6)Sb{ov~MP4a~Rv&>k5P??8
zAy-C_c_P@(R#2^krw4?*jR><og4Z~32Cu3MCW4GwLrAPy4=jkts*0XPU{tK=iO8go
zYDfi~wN+r_@bCe7tPExEQX;D@<njuWO?-U>(TiM!Vr4_hWH<>?0~syEPeQ9aziK?S
z52h5nT7fh0*Cx=jLy=Y;z@3Vf0d*C4{0-t4tjgemkXv{lDv{Zc(8H{r;Nn;r$gK<H
zx&~5HKtyn<fyhEOlOoqgkopA+iBum!=64{q5`=`*L|8~@GDf6u<em;V@guI`hE+iD
znh3%`+&~98>{{L}N8T+5vUXb8BTv~QPth$0guv^oA>ybc==1|Q=K?wB0vVS)8J9fB
z3IrvmbanR}^6Dc75d&SPI3?F?P>q9HAA$Gyq{%v^L2hIKb2DTevmg}>oRqRlm$FNj
zama))AS8q<iCT9+yFWN)f{-g92pd!lfvX!3!!`~5JZ+>?wc+&<WI=+obrQTE1ZP0T
z;=t7nXek1m0ht4WbHNPgfpU-oe83eDyfOkwLAI%inMGr*k0iiX;6W-QQPU_$9fV9G
z*GJGA2VM<<Cx#&XBgnX%C~S-lQY8uLhoB)r{XjH?Rue(GMMB`KA3&WWeLqO0BMd&w
z2h2v)N60l1gblBcz%>!%bOp#s3m_8S4?^o7;i``?D<Cuxcr^rJVAezs5lBr0AtAXB
z8;QuRIGT}g1)xew(Gyf#K}f8X5gtK!)dpuEmsgl<ti>QPg76|27Xwm6LuB#SL=ahc
z$xpCCfp@zgR;Ysy2~-cPhV;L%j19sE2~mgLAle}=#7`2h5>f|Yu0w<LWiUz17{Mh1
zsb3%@uKEZb>2L-_C6?O9s{~TfAV`l=RgY50d?-TDvlzn0tcf6%i=t<-f=3Z@9fZn;
z)Hqm3MR#bo2)Pb|)I<<c(Jcp26XC6o<X!XST=L|c3qS*FsP&PGOQxP{B7>kVxPQb9
zCW#~XHFX`L6kW35^^vq=mb7D*tWyT~%(ql|SbYSkm1G>#CGAoq?9!ziGLfq!NPh?3
z*?}<NqiUcE1@#68hyp45WH^Ffs|rz!Ai<Ry<RT#~q)jrs4uWhBf^Z?V53Di*Es&G6
zjDt-Efx17C3x>cXsCt6W1WB2LIzHe!2uuG6yi^<1{ee_Tl4jA!JtJf`as>ooL#Bk#
zNO*k&KY1InQcl<?5QGeaK`R!(9U|mvNKh{r(m4{;3x-rikk#6lH4(frg49Q#!+gMJ
zDZtlkgEtWQK&l=z5;7SC>jyz<A_$4ECW5U@fG(dyt8U<2NOc1z1=L&xz#CoRA`k|o
z_Q52PPppFEP;4YL>nVFeb1s|#$-MANO39;08L2XYR7sd5N%avP#aN}VmWRaDP!Kg(
z3wB72gP+8zk)R-?rhwE$1WC|Qe&C)CL>Gi4Xa&5YL{o{`*@09>AaU@)YnZhUvIwNk
z!W!CeLF5R6S1~H!AvHDFN;XV2$Soqwng~+AV3I2CC6M|EvbY*e0i-@saxXz3P&HJf
z-~mFAWDFr8^$UyytucV)2yg`iS_r4;4iW^D&^v(SUGpGy5Hbns5J9RTIEmChg5Ue2
z;F1gJ4S|*%IOU`DkCdG=^xcveL=3={5GnN$pN5Woq@oMxS~?l~92xr@DTho@T?0OE
zN6s-7jMAaoe_)3mh}))tsw8mF2+|>f)h{TgBB0er@G3?IW)8eElCn?6Qy)p$q(W*U
z$lVRdY~<PpQU@WEX!Q}e_Cc$RplcPd)<=*bJ8;zl*})1S5%m$g0)o^y@F6(psyXmj
z99jj0#zm`-Aj{;$jYEZv0)-3%K$VeUFlbN?qgy1T9|EbBkn1D(B}AAELGUrBMAk>J
z^>Uyk3TX8acq^-ifQCC-&j?Zt!AW@k2wnj}7?4^9lZ4ktm{kwAvJ)mk88?7kd=JZv
z1>j5vt&AYK6+a2dvyeOvCo$_IWD(@z2sDC+I*gBX-U>|%+8SL*3mg{-FDlU(kjjW|
zB)n>X)F}jOB8UPN@Z=Xn3));5+(0x2sB*zL0u;Vj8qPqjXdrA{0f;6J8F$5u5lj)x
z`Uq0jAd5rTSnDHr{Q~LB!?|z<tQsl;Ptbra`cd;J0;57Tk3uE)5=FOSNcx1=FJLYx
zc`LdXKoYj1dx5-rfvjtuylWm(E`ZiMxI{4PBRSV>IoE7?w;To6Yy@&aoQaKEALYt|
z7rDthgBE7Xf)~QcI_D}nW+*vj>bs^eh?<aGAMvPZ+eIijWhywN%Q$3#5xC>ym=398
zWE?@manMS~4%9i4v`c}ktcD-*CT*7lsaa$|)k-q5xQtCA9Ld@y$=bq?hJ$QSg>#c2
zXJ|v}EEyZnk#Ugod?2-tlufFnbqb_Df{;>{$smSx5>f>e4_me!59tlT>Z3$)s|0vu
z1nK+0Nk~lu=Zc%hz;3|<F~A)nNHr7-u9!fT5qvBTYkdT-aUhixgbP_I2Vq0@6G7OJ
z`Uuh|f>cHj60&ay(g}k0j0}Q6RgYdUv>MV27SIg>vGoE#wU53Z^7d5(8!RiR>yKO?
zq4k5%xUkB|3$=d)nls|lbm!Od;MMZr(RAkpGvE~vpC+_JB&6Xkq~R{8?j{I6h#P-3
z#0Tyc38=a9tGZxCyvj~k5q0Y$NKS{>L=XmY?L&|a?`pvrkSYnhh@u#?l!J&s$}v17
zyc9*NhTvSf)kF}B;I#&XtKwGyKaT?53xiCJA(N2$2(A|w18OGHCS^z!13!WQ&c$Rv
zmQX5t7Q+J*9&}h4knkslgc}I&AHnM*<O&Fl4X<wC3`l*1n*`mY0Feb(Mn!5K(E12e
zB_X;tpgs;bxk6`u;1!UpYaYS+2vh-q>nsRE&LtO80b!De%E&bv*ZxyE7ifJX=adbp
zj38?d6dcl(owE$wQW?d~NUo1Kl(cNak?SL9)dSnh1FCv#(<H$NB!%eOq$1Zll6Faw
zcF@WNa@Y=pl(9*KA8`Pwn51kI!5VSaM^d)QkTnTV(mGYzDn-&7gg_M#?2<dk?LtVE
zQ6h3>1gU)>E9D@hq$OzT2f5yXup#vkysm-QM<V9YBIeP^^%1-pf-@jB4!m0gVL&Px
zI0>zBASd}Ctx|x@2SG?k9fVB6>mz7oq#pvRjPwIR19adjNkBIUf<Tp#UI3y3f>lhA
z6BQse5rh<m9J>uZ(nmns3sMas)jo*bLl8m8`UK2=5Raxi9AVZ+keUcWLMkKVa}{v)
zjIdTl{HiWkD<fWICzMeIk32;*gr&!XrL72QTY^c@NIRq&f{@@W4-!OXpp`kOT=-NJ
z3In`|q6o7(gOr5GWgS*Fyex%RKm-|>r8?C`5aVOujt^S>f@vK{J<{rE<dxF6*l0Bl
zJRFe4AtP^aDR=-vY9&Yw1s5cmQ4DWUkjeljBv7|S3AEu9Isrs*x2ZC8OKBl&(HiJ-
zA2p9c=vmh8p!<Cw{e1{24_?Ct@A5!u76=I!gfrmc5C)tC)kL5Hw_F+5Y#DH$2wo*2
z*FKnR$Y}|%4X3$~!+hkNa-^KHrJS-M^^u%ox}r;#o?9BDq$T(iJ67;6ByiP4kYH2N
zwTn`6$^_lH0P7!t>K(fbDZ30w+jL3WbZMJRX`4(1n>0n+bP2n3aoaS=(rbC!czN4+
zHT!5a`)C!1NEL@jRr@ei`!IDdQnrs$w2hFri;=gBk+hAIw2hOpj+M5FgRDJ(^n)Nh
zCP;5c!8S$S2DK&vb0H&gkeUcm2Z65qwoHcAM4+w^wqtxC<8q+6AxM3M-0Oj}Ma-ii
zwGw1y0;E=g)I<;xUiF|c;A;{fRT8}R!DK+r@IfXaH59Za0-vG)sfpm*g+vX5A@vc2
zgw{mhbqfM|0fPF0keUcW!s{bw1q7ZR0<Tg4t(Sue>iP>n4mgF80$M&o;QJsTRSy~o
zJKYqtQjVDV2(n}jQU{@tXtP3?!*N(@A~h#|Fyd8lz>0X3?UCDW$fMs-Hn<T8&y_?l
zK+R5h_d+;Au7<F(Q7fZj<gy6DhLmQw>LW-Eg=`4c0u!zPQh#BR1gj_NDuCBD@cmYx
zx(2C716P2~Afi4}_9}-X<XQ&8hHPYpkPs1M5*~Vx5QB4ZF(AsYktodxNF?GSArXz0
zgw!&S&Jm>7gFNGjjSafl2T~<L@ArY+s^DIz2(Ex&)etzLy5>WYG`xNxo&m3kAPmg<
z2vXNz)<lrQdE}gO<eYL~YVs6Z^5mU!WgS745xhQ<wo8H3M@&*yB-ckQ3VQa@korgp
zTn9l`DZncuDZ31KeWYNMCT5cgsgJ~MlH_dS<Za_s?V=#{k*Z^qx?_Z<bCjlIgr;MJ
zs$-0*V~nChoPvF<lwG`(UA(kSoU~0G{A>hpKL|3K2dakTZIb0t7bie#BFOm(mdTLH
zNXjx9ULQf$!hyR&pn3<)g)$(u3}l_Qs6`wYfyUn;H4&t)fsl|oNWwG<vQAsf4Ad=x
zNC}%op;beorcscZ2ts1*5J86H;H%@1Ya$36dC8n8w)zNEG3kd0>jgs)yguUB_2<|1
z7tjlU)I<;x()SV2^?_DB81<2Wu8)9@A98&ppydNa+UR{E$c^D>^$~nB2+|wk(RAn5
zaD#M_AcxCA`ax(l5rhk=k1(qtxCo>^Lg@z~RYN?=_E-_}W^H)W4$gpQPe`^!CgD=>
zfex@NIGZEYN8llP^!^c~feInfDk<>bKV--SN`jAMfh;n|(m%ph5`xzF;Sz_`UuYy|
z^+cWsd36?OiU;XL0_2_xCL3OVLAD8FwGr9znDq#x*8>qpu8Gk4L6881D8ouZ6u?Qe
zc*I({kRb@!H;S#^0o5;%JLBN8;Eko&>m!ItNM(ddf~JZfiCNAi7mWeo%Dd*lr63}*
zpot#%Tu%;+3$0%udwd`#yg~98L<CAYWy^tjK-n^&(K(3TT+op|j#;2Z*Wg`3(oXqO
zj(O5{DGJV+dY&0fvNjA1kc;++-V)0wt!5dlV4I`>n%M!3p+VMKE7+ze*rq7irYPH`
zs@Wu|SS1+P1R2-_Iomrq**OL}nFTtT#W?H7IP0gmX{Nht<$5XQ`>2%qE0zT)RR$?m
z1uInrsnmq1mG~JJ`RJ#6nx%P|`Z?SAINBK5`WxE?DBH#>gS%Jqw(*d;Avx<LIdIhj
zs)itga$qi`9|SqE2XjOYa+WFDnr(131X(7BSf>rCcQ8pv&j>OT1h0LN4?BfaJqXe?
zTm*)odxgM@v>{uEFz<R0H3&sR(4LWDFsR-EAEY3x9|&hd`b20e<sh9PNY#VP7S#0@
z(hC614Ix!SkbyW95~*7RsfqZtJRyA|Sk(i%dkD@3FIw<~S3}4Qes$2*50GjIP9oPw
zkd+B2q`EVtzk`Rw+7IGYall#`@hICv@(YB7S3?K}cq{^v2O;$igbkYZ1$Uj`TySLs
zTK@pb?BMzcw5<<yxE@jgp^<3i7^G#3OhRfX$hlsSF+gMzULF$3K&y!$T;u|ro@_{M
zqU2SAS?|E-dLUH>q()E$-H%fW(M9A!0*FR561hr3lY(%G3`}HYXz@r!)dSH2OGwaR
zVOUB4jkdw-9S8$b#bA(#8|C2FD1h%)0PpbuZ|wmkQOFW&7nI5f($j$?X*dZHL?dAp
z5a<LR2wMhR#XwFofYdt*PT7ji@cpNm5J3=$R3AZfK}YADvO&9h9I~VwvZNgIBpq_4
zY?C4N5u2g|`0^uG1_t7n*oiBd`YYNc!|Nl+NE&4HOwlG;!8%F7EJn^GO2xoUS>M&t
z)WXu#!qZC6$JQ{yUOU=JFV$58xjrfnREE??H6dy>A!=m-#>Kt{8D8dT9;W^-_CAg_
zhBkhNHhxMraY{CEa$qEBog!hCj8-3kdP7!;kjhBPDpA5ZUfc>Y1cyd~Iz*6z<G|Gr
zyaGb0kHFJFprr_C^%3Y)ZE(*Btv(VliA3uPq18viCSk(hYgj>hg<$m&=<GN!8@etI
zyjmNrG7<$Z-WE0pMN~irLGbzrR1HD8Mf!n|rP?47{Xn!j2vRW#f=A&5bo?Oo5lRIF
zo+N^cK<Xf55`TRJIddDi8iKGP^$xrmf-@j9L@0F-@<Ijh;2UOTgsToh7N<&m1ka?n
z8K8aTvfyJ+AS7DtLy!wAe-N!)NC5>atU&!Qf`-5qU=)&AdRUMu2~JX2A3@4<B1w3c
z1XAhXAt46B)kC=Ck&sFRzIhqV5Qwa@Cv+1ioQsQr8Nx6T=mZcX;_#D@D8^1gFVsfV
zM;H|lsG>m<#8MwAxxgl9!1WPmvyTgChzvAJ25wd%kE%h^Hhi5AvJ_;fO#!_808tG&
zXUc$&H~`f(NDSoD4^X7Q^^vS&wyYy`o(Qsr0n}-N?froqh9GU1BJY%;<DSN$>;$@k
z9CRZ&!FeON0x?AcH)V%-Wruh<`$Sp01XYJPb*BVlrzk^*2y?pt19MM3ZBsR6U1L>Q
z6E(SD9iDIlfjmq0B3tfyXU;}f?rtxxULT$gf9|e8-U-26lR|l>hI3C1=bjwKJ2irT
zMmXp6FpmCUzTQBdMlbO?cacN~{X_?S9qV8n>tI=%Xjz+RF{@-TE6{upWEu!m2Z2ZD
zAXk3NSjNeKPLKmtLlCx<c?_HZsfjR2SiJ*T8i!sJ!TUjQ2IL4+3GiKbqGk~yrs2XS
zVeozsqB1gxg!F)f4TB+_BX}JIKWZD!fSe`=sgFeTL*NXwng~+kfJg&>=r$q)e@JBn
zsfHjVXsI@Q%{F*t0(5->r0;`T0YPdaNDl}^!UyQId{9T^Al)L+JQ2T^C;ZX}czwjL
z?gp6~g49HCl26SQ!iKC*z^sV`RbAi+UK1f#G`uPf{78sT*`6OfOov$kaVyzjBIKMR
z>ym|zz~d5VjY9|*k{@NAb5MseaMnkVMk$<xR~cvwT=fyGu%fg+Lhf6^+3?DUat2!Q
zP9zs=O+=(#;+5g5Q!vG0bqe}GD;{yQc!b3lw3|ab7g8O-Nm#vrR_UN{A&UWEBw|f9
z<W3(536X`=L=Y0Xq5!Erg3RVXlDDF39=s-kh$EA-&N;BIO}4aC24;OE>y#nyl%e39
z2|fM5DFeX(kIO;oBPj>y;oG1p34B5V<VXZiN69W7G!bK;s_mA_tKkm1cATvGNXb4<
z(J4jIDMj5WLB&2+(;`4Y-(5-5T3pURPFht?T2)U$!azyNUxOo9hbPOFIp2z-#(}ND
zg{#Yxv&Wkoxjvc_#x*63YjPOx<S^dpVI0#!+53Wcdjhy2^--LyPQ0D2mSrGjeFUnB
zAblcmeFPbWgH%1>NuoH&S~ye^`z;TUd+xy9BIr78c-4cI0jZCmRSdW$g7ko(q;Z6h
zQ8)xa>LUn=+z&!N(G*e>LC)}jjK9H2VLj+D9jF2VcY+WM1AoXeInd?c;C0%Nu{cO?
z2wwX@DkkWH1W08BUYP)?gM@TJH4dl_LY^A}*GK%C-XNp}s)P7I$IU?*kXa#kWyGfm
zx*!jH^@AIv(m^I66%ZPUTpvN^gW#2spsEXz^${PosUpmp2qFR<VF0(;;8_ff0jqjo
z8~2<+txa%t1`U`)vMnSRLr6%aBL^A`0aZ^BE-n(i9CJmqZy{w7q-esdrywHe+A&55
z(TY%b9fXTPVU>hgwqq3`z7m0`gct&06C@#u@sN;h%UBhl1s6ds?(ipE6G387*}VV~
z(L|7tY6y}-@*!0W5{amZAn6d2G9fE=AZw~+ok3|BE(_lTgk0A^*bu!CHk^cK7I+2$
zop=DPUqG`zkaMjORgY5!L<D>o0`z<yd8Z6HNAOh(cA)L3@=h6wjvxe?-+^onf{>7d
ztTo+I1$2GDw;;j$K147W1r+t|G#$e<971&*Lp1FI_03&04efMQH54SIRmFrA1o<?C
zxOBw$tfW}&<+(z%7{YZKQ;iw2EtyO07%H7un%$Y&z1iCRINJR<di>cY1anOeWuF|%
zJ|&D}N*KqCFxKgzEK@?5CI&Ke__DNmF{V0*r#ngNTlpB;_(A$d64r^3{XtSz@h~=M
z6b@1oK}g8@1k{=+2FwNB!72&8@D6c+53D`{^^BkukXeK%5<=_yfG&4{9j^_ogTNzl
z!p0H8#-N+QA&cTfjl!XOg)qkDAhi!@X&iV;2)PpkXNwsG!4aYw(hr1O4h}g`4njgE
zhD7uNAhi!@EgY=sfmBS8!8aitKWN1SzV`ugqz`&!r0oTt7=kk}>myj72viv%S3nRp
zd~pJD)q}}~R7S`oXrThQ<AdBeg7=L0l<n~#UL`wR2%g12!v)|maClyWGvM_Rs1*m^
ztOu@)av}K?GKc|D1Ig)-_9ukIRSn@1$1Ic}^%15xR0N{}!d*gw`d09Te{cq_iU}f4
zYW0M*kjEoPuyTPYz+(a#QUq%mxB~oviB}fAK7zy}WEK)K{|Jpij1~y8xPnVAQdb2s
z$Ofr*AZ#cp4{9Sp#6ct^9YRv3JotJAhzMqV1WChaB-&*_a9Ma}foBbZ^%3;=Yt-`)
zkn1DR2?=(opk1r>si52C!25t8>#jiu_}C_ExTXl|`7$u@5m5;tE8$nvx6^ixP_YS=
zHgFc!uojck7LZVt7nhP1l~5GoR}|uh)JHZl><$XtK^ma?D8+~&)10xy7E~X#da|^6
zv9|hvs-(U^j{YFdNg-^I`e<?}J7#^9VlS5FD4}oVqi^LSWf>u39R*pB4e0^F*wFe2
zT;qW19klugvIqf8#!8sRV6Bg!)e!77IYb2nTa5q}F^L4#I3{6`1qou1Y6wO``$S0l
zh9LD3a?c3DhSW#!nIK3V1mVJ~9!O;bIz&z{0C_|XQU@WE@IDb{eFUkSAoUTPgw#Y3
z5?&v{YacWQa#sk<2Hg)1?hv^{NJt$7At7BM%&G?>f?V|=vq9Amd@Cy{^$}(T#G`17
ziQrj`BnC8xf^#|~zrzRm<k0s0fyYDOWf4IJ(nJ8HF%0ctq3n}_AHd?4gR@>ErJjNp
zznBaK@cn4Slcd%TkTuK%Nyvg{Rp{ZL_!bbNIRMi~a1pGv2}BmF6q+EUcY|Dakj;j4
zMzE5Q(KZMPQ2?n~FiEH^Y!i^IOO`YeLQ@Yq4*`;Tok4wk$h~#=N$AKn(ntd2L^w&O
z3`ua8N6s-Fauyu4Pvn>-2b&LqthI)b;L{Qy_b)&c*r&p4C@H&i<kL!}Y*Mvc(u8#U
z7#R3T*nGsmz{JR`s-k9OVy2;`BP%5-DJCi@#3?DnDKE^WB*v>O!l5M0t}e>1CBbDT
z!)&F%9-_q%p~sMJ!jNapRA$du<I3LV&C=%0(&5e0<;&LV$2u{Hb7~0N)DX5Aq3kom
zIA(-!&j{h36w1;U$k64-)b7oY>mZuzAgXKWp>OR4ua876K+{9uDhX5*K~^R}XNAlX
zq|6c^BX8ge2-FFJEI+_Uf@gvt^Fb135s=|HOcK#8Lb}#L$T$>)zyoyfZV_lv9C&>K
zr1pW6pnJd#14Lj5QuUzRd<VJz4opI79LS&?goKE|N%+1YI0G^Y2Re>h2XW&A<cw2D
z?IWn^31>jqkkxUxNJu{jTn%}^M(RNI5v2A(BO#R$pPH)>4um`|hs?%RAMvX=K&FZy
z49w0ET786B6G22ss)pc-@zh7ME?JP00Wuf@pIt&L5O7sY)T)n=>nTO|LO2(#P=#<I
zg)ml<_?if!600G21PRteXbSKcg<lHdQ!0|AjI%*T*w9HxI>ALk(io&Bf{+lEXbB0<
zg{X(Li)5TLk?SKF*KA1t9hro*qvV})<eZ@QsG`+J3eK5u5zwim;I#_Sl?IMkQVyB$
z`bYwN)dHxe17SF1LXNz~RUb*&Wk3`_YADFcH)-268E|EUTpvkVr)W5(i0TJ2FbFU(
z5PfAiBO?ztuZXayps*+}zo4)Hzpwzmgdm5c5T~pl2dGLCW>*qsR}*2=6z4RRX0niD
z4bni=N2PWQ)h=wUUd*jt%$+{0oj$CP`e;fpE2KV}5z0P2m}`14*TfKJT=kKZWrVm@
z0_d_K=s~9N1#+-~IMaAZ(|F|i2(uazHvwI=3OO|nDvP@gf?w_cnJt2Ji{SN+s8Kkm
zGBOT@_l!U_5oA#uYJCLh5W(vs=u!mmln`cpB%&7px(E+^3m&8<f{^ff2P*?o89_)$
z9VDXT1F3!BB%}@!((=Mp0SRh)Lh2x761hHtER(}k6G5sZczuLi=|I^0%1)RFYkkCn
zV{{H)2k|M}!`Zw_c95zED+#Y+5Daj>gK{Bk+$1zpqb-C0uY!P-5AfoGNCwh$0CXq@
zGVZ5<d=40ltqAHLA(rgos^gHw2^Pp`r8=aRA)Z9jPLPXM-4NswG!RVzHvi%rNrMlj
zAum5b4m}7PDvNiV4boSF)F5yYY2*zwss<NFW<Zh+oP<lkMIbc}WFZYCQ6bkdP+6n|
zDFa^r2G<TgBM!9dN8Tk%&N&mZO9)=?Kp2pvacCrLeFEsF1{udxI07F`nyTP{jMC-o
zQXvRZ<3P?bg^&_<84{oks-WZJAhi;_K9UBV-jgn2lO|!4Dq)i<X`3!)m7?m9DrykQ
zz(~}19Sb9q02iB>0FSsJx40m;j1U(H3G&DY@_-m1E~l(8my8gnj1Z@iC<vKIGF!;9
zhp00|=`v-Qg8E10b_}&HY#rV#?Ox1%-VFUd3{(6WW&|_O2xgiQ%rrBEc~&UP^f1Qh
zVT=<)8Tx`4y8Id1eHaQI1q&So^)1~Dtv#eHqNFUMAbV6{hnGTD#6d|>(-=rignYj$
zyw^jp<AdBGg48~kBxGd*e2*$}eIx*>hTtSpeFUkDL<|EU)exKknF)fE@Qx2M15y)-
zAl>_bRvAGmAP}kL1+RuMYa;x05Y}o4QYG=LyYp&55MnbCa(x7;gCHbi<Eo&V6IKMD
z2|{DQm&rk<jR@96@M;LN_Tg5r!9<Yu9tsIr3j~>^g|Kmx&_N7vmWO0~5DA`#g4Adz
zWd*pN!X*MJTS%&p6kM|*x-dyd$qCuX2rs+9g3tj#{5>XkF)j~2V3T+ftvVpcg>0oE
zo`gG$NCw=SNDR_yaB!fZXn}+_yvBhuAaw_%UkshQ0gr~kMIZ_wByt6Tiw&R0fiR#c
z$|Vb)#*pW6pxa9!vd)>(P8kp+17^!QW<j)r&xnK8NAfONiq1KpDhYfu9i&5qRt>?q
zkg+&fr!;s?1YuBKA3;vqk+MletB)jX(!{J%l<ktmjlvn2_>m`vFxkvZEWBLYf_(hq
zg1q8_ykY`e!o2L#f}Ao!T+#yEl6;(!{OnQ!9MXaukjh9#h*MF7Ls5hSxjxD?1Jy@m
zwhVQy?A?Csoj$DnJ`5B6n5G6W%nV_H)JL;ISs?Y%jBut&VW9e`(~qIm3tAuPS-28e
zABmdAh?>SA*GI^;3>q7`x`9hUdO*lh$eV^B^%0^IWDqJ~7z{!Nq44?$dWf7s5TqJ{
zlaOi%Qole*<a!4y8+7ZCzMmk{_EqG{2vi3luiQpjr2y{`K^XAfkbs5<gbk@{@Ki$(
z1^nvnSnDH5?Sn>gD?3qGAHgdo%%M57-VmfpLaUE3Ya)mUN=<}Z2O+b;^${#Tf>y4=
z+3@-ZnSq@5Atec}0tZtZzGMQO0dDakHtNEr2_R)7CW)oibIAf}2k-EO^tm7;QbCPg
zRe_~Yiek*d8QNKZ9rOp*g4G=Cg2dNE5S65w4^xlN7x2)-^c_S5vN#Q{3{s(>k!Vqg
zRv$rRu?j+@;3T}(fG{B95H^GaRYuO4@R|tDfQ+6&>L5%Kp3dP36517V%0{k_;I)r}
zbEZ5v*~>a*$T?@qIHp5a%|TXWL+)+>&;CGGEI|4}P&RmO2z*!^=sr7$6oiBbf);fn
zsev473hoxAiGhypNfWh77qv<kv(6B)OjCAD7BdNBU>CrwgBZa3W4T#aML0Mld3ogo
z`Q!xoWcayc__?I`xg_~GB?P#ng?S|fA&6T-fJ;)4TT+l)T98v#m|IPRLtT{9Qj*0+
zhCNu7AzGI;%bX$KhPm9Hq1Kh9-J7k$hrP>(y~mGpVi5P#F#Z{lLNlX;=fsN4iWZs?
z!#*W~c|sUtUkF2&A497rLy0q2i8Gg>m5Y(JD`c0Dh<Pk{ye<yX6%sRxMO}&jsgJ<7
zufj&*AoD)(8VAmRS2_|VVUUFf5K_V<3?c%leUM4$Oc1;i1g?hQH4*f#2aK9X2z=T$
zq;o{DCPJ=pK=qNX4_pMf0)n4$iV(zBA3<syK}}Bxg49Qlx(1bm9=MHl0}*75j$6YG
zbn=h7E04M>T1^D0hTtT;bA;9p!Zi&9uZEC&Mvxi@lZ4knkTE(wMQaG+Rj|T^AlU?k
z1mBPX6@jq9BxHCTk}Ki$5rm6ILdp%aS_vYEMnXy(WD>Gh9SaHWf?<xvV5xU-^}=w}
zN03?xPQnUsc=ds=B!?8&s3cq?Q4GxfAbt@H=Mk_4;xAMeB76j?Vjy7%ndyUwK-5Fn
zm?UOR1c_GUctm5vYXP`8L<=4gQiEVBpu9c;b&(uF7XyK6AE#`2I2+bWf>cKE`Ur9-
z9i&Qv<O|5i8-xw5gJ5+NbO_Ei4N~<$)Idmx2xP?qL>aOOygm}OPKDG*qE_jU`bfeo
zf`LmItv+I6VB}z77vkm-7Z8-@=LaD^ZW(?a8GbHV0d5&V9#FSPkXuTKM@onfTqS{O
zA_)O5P$dQKH>rtmsEKeu*L$mh>Z43EhHMLl5*vmJ2Zjb0hGuuB4lmYDZ?;LnJhP%j
zmZZupNtK%yFE%xdcXBw><Zz}5VT=>QnY#TM+r1geTzN{IxeP3w4Xs=ttFlGRV@1rN
z19afZD28Bt1h0S~40tsJXF#eUOcGubL8~Fy5FE4z1iog7Uq6H&%s>c&>m&F%a&QJ{
zqY&&oZCt$}c+~@EAgUYWt`N3;L-4sFv_27}CW2H<{OX`Zai}#BXw5d(Y6vnY$EOCW
zfOynhxxk1AqdtOHLy(JycvW3+)kj!+J$%ackX=IDN_O!22(uc3i@@t62m@Cgge(rp
z7bqn7-eIT+gpEW(Y9%-cshHq77g{la`&JMsXnBHdV<2*u3eLt-6M;|kf|NoO)<+=i
z(Ao;J4+V5FG^E5vCXvg0%+eUY2(n&c*!XK7YRKZ8sexB3@Hzr6g2{lZ!ODQdGFB;w
zAVdKk5>oqMl8}UfoUTyV;FJc@h0;F)--Rmam?Z)3$VfTB_N9W=%Y#?GLFR)L!1WR6
zs04>BP{rg3B0++n>r>Md9YF{(A_u9JKuZxEvSdID5;Ei*vfv1O!4PQSoD}TzY{>kP
zvSYHMeWId$qOxPMvVEemeWH?mtdf1Kx<ib*LyU@jEF3A@#XyjfZM1@IjDl^9l5Mn-
zZM30FsJfLm8y73QK4M{D;AUfx65x;%;gJ&rRX|eU4v{oJyNm#bi~zeVc<BOYK^;<k
zBrU)zEx;?w&o0Z)t|G*yBE)7O%4jUX;w;PHqsAGn!w{p(kZ#P7Y08jq4npNN4D~K-
z6;2$L&YUeiLcO8VlOyGOf+X5}1#6tyTRk{hJvi!IIqO_Gt6ey1TsaGz`STok_03%j
zE!`k%4<I9Qs3iJ9rjTQ_A+-;jgx5aE3`mWGSrb7-L=A(nB1F}r9|9ka1Fcm6Zzh7Q
znuGL)Af%vyzaaQH1tIVf1;}I&gcQax76<7Wf%dEF`hd<jMJBbeoNFqeP1+I#aFqn@
z6G7@7tR$o#1gV1{Bt!&4@~XPP$LJtzNR@=yErN*PuaDq04lV|y0s@oPJm78-a;3wo
zV8tVEi4{TX9q{M@lntqUAR-Vp1_`f^;0#D^g|s0d<pZQXg0P`w2y6fZ!i5wyXrwgk
z5Eb-_3EV$|)Jkv?seS>?2teBT5E3$s17Sl+@Q5Fj4c<is9}+~aj4;@__$wd`S?qNj
zhOtm()TnWw3qNoe1*(rAl>%lh0}+A9Vx8r|Kf;E!eu1o5!zzVG5SmWF^#uwWIn6+1
zAtDeq5(%k~q#V<w96`4yNZ6%IIAmh2k03o7Xwr4alyk_0oPaIwm<6hdz*Q2Y4pMMP
zm9tBRu%Rj3AxqLWUCJg++BRJpi2=F6LBb|Y(l%Y(I#t{{6>`guf?WdS`UZL1cqO|8
zMca5qyBI~g7&ZH7Rl6u<J5Z$quZEOtqakcXyI4iLSO{C+DOkroKtw{Afti7egNdJ)
zT~vTw0<<Q93pDH|z%9+k38|03l@X{uk`Z7BRYpRf@j3|sE-4{CDIq>-0bUtFK6yb-
zS$=k9K~^;p4l`+XYXu%pWj23Jo@gD0NG*m$U4~?Rh71FS9216e8-`jZmU2h-N@va{
zFM-ZL@%~Wh?m+R*0Ff4N{&p{}CRf%9d*)&*hCFkITr-9wQ^rgi4g(8UV`~q{ec#A6
zk%&n&7=iW#2^&Yi59)!}N61wV8XHm*K}bmLgGQp&M35~)5E63TD(HGUjQR*#8R-W?
zY9a_JpzkMu+B3ph4PosHAy-Cx+FpDZl@VmS5U%<NT<<`7K$xUE);b7T5K`|zY9a{9
zr{)Up5Fs-lH4%h_)IMk=uF8lPcES%@H3Y7Auxus*i({#eu+~HnL1-NWseK?Mr1n8(
zLqw2BXkLb_ZpBMNYb8h-f|~@bbOMi-K+aLYLW1=oS6<Ls3A}a!Qu`p2(vDd&PT9!n
zA#CV^K-fGxyeNh<AVoBsgw<at^&LhXL~5NyzIwRBh-AS1MI-}rU=5-XUX{SPkd**%
zaY!Ws=OQz(N<jqilQQsrjZ+4wIslJ}LB_ryO&VFJG{gb}P_+VDv?gT-t&haORT6Y;
z4QWLjq?S>$O;EQ>RCCNwbIj0jOwo2oHgHNbbWXBxjx%?PHFJzJbBwcc4!3X&g^*?r
zVHQpi?oJ+Vj_!U=PQH$gzV`OM_V#`b4*rgg{*I1*4h|vq_96E6p^kQ;j&>0amQhaD
zu`ZUeE|v+-W(m$_$<F4<&gLo3CaEr_+3xz;?)tgzTKS$jxt?nI-WvJdYWd!3)!veo
zp5is$LN(q(^*+K4z9LQjB3<D!O`#eE&SIV#0;VDi8vINe{7mxvYzhMGGEl@JBftTv
zm7t^mhm-(^B)F=Q6X1{+<Wv)6(G+1f7h^CNWAKt=2+?4T)#pew=1DN)iZ|u-)8KKE
zVQ`jaaFJmMR0fUR6<RVDTQfJfGq-rNxB0U7h4N1h<DV4DH#3xPdN5Cik4U?>aFMfo
zhNWntF;BEEo2$BzhnA?Ssi%g84`ijBuvwIlX`~3+HdWJbVbc&1vrvdQ+9GY_ng~(>
zp^@;K2(1o+b0O6boP<<5(1AEu-v?6lz)8qF5gLhWWdfvn!df3e>L6qi*A7<rxEzFm
zwI+g$*YT^n^Qoa9?*plNu#((rE<Eb4+-fe|YA(oi5R?ty$_is}t2l6jx3wZq4Dle1
z%fSceAPjI71FCx9RT5kT%mCFsJc>|~8#YhGr)Z7bKY~;}SV{8gBN?Q+3C_k-Nr4Iz
z@FWy616HJf7DzzO`vQ&ofzO$wU3~<pm5@tITqQ81;Kf=W;SnUYs={LwRw;<9h$kUF
z#jhTs22NrshDf1J2!fUYfH#L?m4yhxNl3i|nYe+l(MU)wgGtIlY9dJDL8^~n2???8
z8hYlN9jHnIb&l-PG3z5p{iWcLDr=hrsgIQHQkCseHEa_!?Gp7J6Cm}Gg>#&RbAp9)
zf|YZ)wM&GxON5nkxS2zknL`+IeFUk7{2d(woSYyi+{p=qoE^fQ9b#N;V_a+#+-(xv
zZBpE=Qa!BG-L2EytTNm!GCeHwyo~a^j0(N=3cd9Td^8JuH1oaH3VhV7y(O!>Bx-$x
zYr&Nfq&{j65H0hR$+i=8RpBubWKiV+RY|hEEV8^T()?`F{A`e(5u_%P7T}f^;09Gj
z{Ok}BNPVOs%%&^B?I6$Us=^hm&y!*ynqeiJW+51B%<iwsU@gjE%Fkde46TnMwV5IH
zQI#V@i#L0B5Kmtys50sc<eKEiJ~fc5%TKJ;Q!vk7I?Y@-N{2OElgUF%)LU0V+1Op(
z+#53TCS)2ZWEu%M%m-5YKuNPuP?ZEe+7wdVK&l}K8<P}<O$LF^IK?q82d{wO3`k7`
zA#v45Xmt>rE2!rSuaB@+J!pcEdIz~Cg0LYSB1{rbH3U0I0kl{SvSbcSLaQXKbr3`l
zQsY2sA_xf)fsowFPFzZk@ZCc^N{$f3rEJed%K38ON(WRQ;b1^(B1n}4uaEEy&>`1K
zT(aiUwrS|d4t!e*-u@B14nk%~+od3LWx%`6A(a%Y^hk#@5S!V-Cr?7UQQ-OrbPgC=
zVFluXTf&g?1V(}uK7pi=25k^raDNJ|%7a!$&Y6&M5~3KwhLdQeF>;@a7&b&Z+%$p=
ztd$NPL4pS2QUGxnCW%!&L>ZbCdVK_2fd(C<g$1}Ac&-px5FWYk_{FSukVW9L{2&JS
z{2XYqA+A1>v`vHT@qyGz(oSj8PHE~6sTz)HhW1f<HsN;mF4i`VzSg?F*1E~I3aR!=
z#jdg?ZgN$gGIhQRt-dmCezG0@G9CUho&Mqw)Zr)A;Um=HBh=!{-|WNJ?#t8e%hTxt
zLhZgh9e%tJ)aJ+A=EvLW%hTq^+vCsQ<qINv{P`#P3rz|To*E!L#b0QOzX%u!P4O3+
z7|1svfOkRwZ=WA`pC9Lh0Pd-Q9FzUoCI_%g4q%xY$T1akP!D&15O<B2NRb1VmkNi4
z5Q91|g9aaiJTId>FQWo4ivlkTWL8KD+$Rzj;t&_&fYw2REE<9==3)#s(u|%84E|~i
z5jqU9hK#8u>{*sPu_inL>I_yg42HrC>Rb#;tPI**43-k?eo73Xnr!)Ij3w4=H4Y5T
z?x2lG?Ox2C-V8l{OcTMV$CtO=gCo&eF4kPiR87%TO;JI|TFuB&+$31iEEGB-huJfN
z9M@wSB5WE0S(^>-@1QY|57Wk9A3-Z1_(2Nb*&le{2Yz}FsA58rf>%9o2BZ!`CgBr9
zXtfV$st98&4pJpS4%vo~nAH$?vnuG!1jsE9Xe9FB8z!3%cF_amJReLFQtv=WczpyJ
zlmpEi!8$~|%1)4)2ufnBjELzU;jfR7Ya&R`2)W+jk+<ZMHOGq3D<j)9<lT8tHn@hu
z%0{k$FzX|Dl>{wMAgx?538|+rN#t?~g$*8Wfz?EyQ5tAngS9ep%!10o3Q2S>k}kAT
z7mZ6y9fYg~Ye|k~CNdXUFEMOnz0_r6t(_rOK%_vo#W`fc*^pWW60?wA3?z^tBFH2>
zmgSu?P~sH1enDb`sv&SU2gCr^N09SHL0ok4WEsa)Df?s)f=Qv(M^cWdQjV$W4yg*3
zaSD2#lIl*{I)>Uhh7NiPZYFAR7ShSK3I$G5#jdgy?ozcr@~yrypeo5%s?Arb(_b7^
z6Zwg?`G^p#kJ^2CT9Htv4^NjbF9c2S6P(~DIN48dq7VN>AHm6f!jt_3r}zuOY9ek(
zebnd2395`>_0eR1wyA*}eSus({v7c7$Wj<o8KKoj(!8v){Oph&Mq+~O;zAq>LR^Z%
z+}a}Sdg5G;3T&<_Tp`-b;d*TG#>`1(Y#HWUS(ZGZIxH^I40-|#n%oR(oS-U6hnvAf
zn8{z6AzYgy-;A-so~yxyvDuxa)q}Clk)hd@q1}@KvO~7hi>uX*Ezw#o&O+K&Pt(Cr
zN5jNf+uTjUG(^%YRKx^4PXwubFiF!8A(LQ8KM1WF5;qQojJzS&K@c`%H3GEu0k1|t
zW+N&fq)G?0Ob&LQHt2E(*u)UL4iW<I7(%Xt;A{cZHFC&R61?g`V_;T5a1n4#<N+qJ
z)JK@L59Hz@2np%_V6Bh%RbAi+QWHT)P8BCk6(@LQ1h0>vbr7s`1mdFBN031|Op;5{
z7J@LVA;`WV7-_|=07Z~K5u`GLu#u}Itd$X$thtPJDmnt+ZZB(-212$evbYhJ8Vb_e
z!ssX=S6gs4yqH2^fajN?GtS6t=!ul6a$tr$X!#T9NPqa6C=dgws|6_=ags>&5n8h!
zQUb$C$SgTzHi{PY1u<qKtARU=NCu?(z@r$V08V16giGP6kC1C0Og3ixqSZ<$6%DLf
zfeI3;k3cmMq&|YEMAi-#QL>Mfw~5j;wv<xQQkCLUk>s^kU~^UDiPmFIGUY0@VF0fd
zV{G+c?DApg3t*WR%sMTYbwVIVpFdk)08?K8(}W-v2%|56X<`7=1b;?M)a%dK>(AKj
z&)DV1&<$pE`!n|huuKGNfsnoa%zXhYeE}>G#)Kd?kQM%Hef}JM{-86od;HlTsLP)X
zfr7ZYg1BnDg^Haxebm`4LGwop8vG2ZybLNl49dJ9q{7Fb!pEQ{$e=F7pef3rCB|SV
z$zUSG;I7Evt;!Uu&623kk!H$~X~CUq!&qR?Qe@9t=_Zn3Cg>r@U@ppFz{jA;4nn4U
z431Kav6>8tx=h7Zpi>%Jy_nm)S*AqDZLPH5-x)YPQL`_YtHYP2&yTa!l_AttDa=k;
z)!a|n)JNPn6toq{BwEZkO41}!!Z-r5IS6ytD&*WAto0G3GJ=!HH4aWTxMze}A3-Y}
z-1QN>>VY%h3lfN_eIS()WWhGP-a%u4D<BWFeh@MjYfU7m?gsDvz$+u<`UqYJp)q)s
zvD87BV|1WuNXd~~(E-GO)kkP`5Ty3OBq8+<uYwJqqAl925WMPvGmtAF2pd;51Q$oI
zj9}}6K$VeA8lK8X))q9Zhh8PY>La)~y!IiM0U0MmB|+5_xEg{~Mlcd|Tt7$(UTq<*
ztiUW4AtKNk3cRZkxunI#hSx_#GBC??{36J84KZw7?xdEu3{npW(@eOmj6)`*mVtyS
zeiEV*N`fN`TGt@g9Y}1*{!mF<@D?6$or2syLKO%1jF9Rhh%AJZvQL$?OHs6o5j6@B
zmQ`lu7nK%ZQ4r;{k!5yP;fmH{PB7*uwqmGuVr%taZ1-aB@?q%pXPz3wGBt<=xjyO(
zV1iUPpt=dQGV1qZ==Woo0IrN6^%1NxV(j*3?)GPf)I<;xQW?SPBM=u<8KKul-F~c{
zFx2JG))Bzb?$2KBC0OLh;ibxICCZ>B$e_*#s*hB`l@X*$QsxEKM_Qtc`qE5Ba;(<M
zj80k{VFtWm2E0iIoEc{P*;c%{HvA>d9OZ7@)gEGX-jYQg%Gr)Gp~gZUYOHoLoMs}7
z)*=kfGR*PX45@}}g%%9ub_{jS3=OUfkostQqvwPur4C<~386fbf_OVUSwd}<LT!~)
z%zRbN{Ul7nBu&G`!SxZSG6L5}p#Bc{1_wy(10m7sBY5qD#=uztfjdW_@i*|9a`2i6
zS^=STg21<aLr&F(D}Yx($PCDg4`@*wxMu{}#0sf&FiCj51DOeebCEB<gH%S~TZf>l
zwef5nf>cb9+6P`438<m$W<{%gFu9oZ5vayN?hV1%+=>oR1Y0skYJJ43U<0prFd2~A
z2bqM|IB*81j2R}9v`NB>pl6uaCQI2SL+)WgCXr?Fu;H~5gn_k6f=D68`cP{k#BvM!
zWEn7mRYuT`610><s)-;X_)AD+S$GWvFMi>rE?SQOt^f}MQv2XhKxZk4Pw`j^@2bc-
zWXhv<8zH(N3P48+z=qTywFxAgkx5AWf{v=jw%i3Wcni^si-gxia0aAKf!@Dhn+6iM
zO969}B<)ZTBq2dsP|#6myCfO=WOe&6VQnW#5h+0)Ar%1z4G|_s1qKgw<|ti;1Y@=$
zE2c6#wni70Ru7JDZ;l=x&WU~?)Dyr0L6B}vw?8waN}Axu+V9KK@5j^+nnq!mh*lp3
zFm%HZL_MS?njFA18P>7t_hXuXsHa#a_%lO#TM+SHf5sk=nIQAK{Fu7^nY;WsI{nzI
zJOv>2k+m3uwh*X3(&T5*<Y&<4W6<GeFc4xe5@E2HW^k5g@KI(C)Zhrz;tJN`3D)NG
zSLce<W==Hb&bMYSwB@XK;b`{}==2xu4VIi3DL*k%zAs$5J47PaRxDVV!9xnPxGq_r
zA;o~90K74_%AT>@h9TFODbIvC&73F6m?KV)Ax4KG#gH-6jLpeZ)7?r>!ZbwOBv{HM
zOv)ro*f>hqI0`Zf2dRC~W`f{?PzJ{O1V}XmxxoQKLTemw?E^hQ4t%OMlx^TIYTysq
zwF;?mAZ+NSA*8EUAxq=XNC9o=QF4%;j*u2Q7b1>aB_Y>7kopKh!Yd$N4R;6|ZQ&ev
zWddZO0(5->ay100jF9_5$n_B~($E~dCW7{TzzgKi)@<WCU;*tgALMEX$C5de)eGQi
z2&E?CQ84FKv_PwpkZU3cn@iT52OHv+F+)aJD<espB;;BKxh8_Jk!A6)Aw>j{B&0M$
zR6uAI6Sy*hR7o=S&@Cb0x(VD%qM$y4l-Q6`mvRzPhSQED*@fWQI%0G|>LZ9si2EUI
z2nnx`;8h8vCW6KqjtU4<7Oox6097%NiUunQt$o0C5JUk~1f?>Pv`c~{Ge~^|ISWo2
zB$%XT7b>LXh+H2zD>1mMF-7X2)JM&3Y+YXLJwBWhd^slga&-AIK@g;y)8)t1<HykJ
z&j_iH`g|Gtd>JSDf$F0^Ur-kbQjfqZqwWBZ;$DA7NPRTPpK%hbV+E;qCP1Y?^%10#
z)#uCD54HufKI-!4sPYsjuxIm9WwjIm)kj)_3_3y#y21?lLJS7N45s1?7E%nZiVQv)
ztU;Qbff^j1N=(i&3^pPRwxSHdstmFE?Ahkbkou_Ai?7W`s5e-$Gf=$6N2nuEv?EZo
zz(F!vo5f#|AxwiY*OIl=k+;r`wb`4i#gn_vnYF}<GtY!M#gr@2h}}<)!BdhUNS+~F
zg~3us!PP<sxjqsyiWD-6M6Qn@vp={<NbQ403PY+J7)h``f>%J0`5=@U2S<GbACbeX
zk02Eg8i`y7A+sSh5#&S#NF9VsB2_?;Y6wmeRUh%Ig4S%qw-LdsA&@xI`UJ?)rpWaX
zt_?&Gad^E0-%kXol_013V9pT1MYutIA{%g(gmmDKq6HP}BWdd-tO#CDp;b3n>LZ&}
zDVtP?I5G)t#HK*BL&^%OkeEdrX(EvF*)d(pJ{4LMfkhzuC?TYTeX0aZ7G*pTJk~^A
z5lCrII}&u|fn7SLo6&5Pf~i3c69^kRR)#!ohQuZ<@a;iIhC(|pkfB{{q!c(sL6u>w
zrIE5th9o4=b>23~kO~O0`4rBUw@H=<NhQhHBr4m*NSXve>LWD)1}za5XL$w>6^3YC
zhD2k=JZq*BM~+%omO3}qHZRsrUyfcF>hWQRAc#o6FI&GaTaPbGw-56KP-Vn4!H0Pw
zxUU56<1qL8G4+DFQOr=%4|Gifq~4hjzyL>(TCCq6D$?%{I<RyCxPpUJbA7&y-M$Rn
zzM#cyz5dLdU{vePQ)tiNqt0L@%AmssT7+OM%wQ@CS{-LA&)}*8LbeKQR<g`$B5acE
z3}TE763h%*Tnr{6jDboFQQDkErc70~T&*rF%}z``t_)NC*e3WgObue`^Ji=KV5oBD
z%eP>T)M1Fw23^Tl?#kcnDOm5y+vv_)@5)`}$X8*{6K5_Ss>f%pDrqP$tgWDLqG=(c
z=d57l4jPm*43;(ymN5wtHwqUuKv|O@ZWs*e<`@Qu8wJ9Vm|*}EA>FJhsuv7GkZK62
z*8{I^5DdKl(B*fqBNQOwDAf?gSqeg+!{vO0^?Zc&y@d2UA@vdTkZo;05p6#qO&<`_
z@`BVvpejZidb$E~1%$>H)btR9-2;wX6G6_|#;lJ7)!hZv-38R#kdQ0#03Do-T>C)S
zkZT|WRU8Dt9U@3IghoPY9840q4#LBRS3PJ9cumBwU;{>0{II=6yowe)3g(bb5Ka=@
zGlGxQK^T~Wb(}J0SSurGYwUw~k~T@=U<9wNKn&{?N$V7dxVTLca+M?j9`1t|An;-Y
zQc0na5LqHgcri$%dbl!NrK<$E4nig&DzWz0AcZ_GDa^tjj|f`5Kz%N_n~CWF<lQ=u
zt_s9E5MSdVCGAp>W#JJa1HRrA!i9%676!HoNZK|HQU_s@a1+o{3xo@)hTx>EZ3=`9
zsgGoA5;5x|O<`t8eH5hws*eh6SxTKaYh0LXT|w8OcKUMk`EmBb$Q~bdNM+RP%Q3-^
z9j!i^=*u$6j}_M6VV(f1j36^ay|DVI-;V)O`}F%W^!YJN@CBiMe~=)Aoao0i!I!be
zo1w>>q1OjQc6u{(`7m_*GWPf}wfQi#`7l(tu@~4u>mwr(P;baun!!ef!A6e3PJzKi
znZZ#3)KOC90No?Y4!*rllz~B=9aJ9$DKkRqqY7(|W+$d*C#Fs(h8|aj9#4iYcZPNk
zhE6ZWY8U=0XTEeR-e^6>Bm;(YQ-)G&=5kxsG8?8+YsMl=wtO?@C}ZJBBOxaP6=y?L
zOFdg_1ABQxH$`I)$h?oFVW6~eFsL%ZsE<HZ61)OJWgy=uB%&JxLfGmWNR5Lak<@^$
zw8M7A17rdSQt!Y>Zy{Z8P-TQ#AAzbNNM)pnxP(Yh(-W=g!ODd#P5_;#z^CZ}sfOSr
zW_=`}=7y_2f>b&JYOW9l77~0R5n6qOwvQEF6A@YIpeaMEhTvR0^%1BtqF#L@WtE5@
zp;cItHVGIARC$TpB*2k`Z6X{&$_-Gp1)HdXh+rk9?NZRHGYD4_qmhiin8Io}q*{cR
zrD)|U(lRaNDe`1UDNREXHYr2>nh0VtxIU7xONVwhVATPn4nig&euRe)G6T}p#U!P`
zbr3u*h+@DT0I7rEuEMN|pdzp)4c0h?)IN|TDr1u@ZJi_sMheym3f2iSw(&By@yfRG
zlE$HuVhRGh;wl0R>cWgpa-jZElrBS}2}6+$ONA3xtqWV7D|?$KXQvNOpC5OxFV_S=
zc5ns6GQp1(j{1FBdi<EW!F?I<{1Id`4g+X<D3D=N00>PEWSAVxG%=8&4~!-Ru}lbJ
z=?!4$4FD~B08JJ7u}%c7=;od1$35AfW0D{H1b<N9s?VQoLLf(%FH4U<+k{}Ai6Oib
zLU|^H^7MxZ^@a$v1xVC;3dZU)c*`<)$uoG$LC;d~l40<YVQ`RPuoh>KWdmK>2RY`B
zk%57WfkBOn!9;>1ScNHChpWVdq1u|g-I<}&m7&gsxze7Y%7LN5jkVpAq1}_A(Sxhr
zjibm(Ak&gP(~2#_k~PnUBhQAT(2Bj#iapbuKhvDw-&EP(R9VZ&UdO~y+Q?7J&{xPX
zK-efy*dSckAY9xqSR6bVgf{pF;X->u;K~Rx?*kD5U1kTVT;L?Q*CVFq2S?DEAXt|N
zQ7eH}BCkn+uwk{24+aBKA3^4WAYCC+>myz*PhKrhZ3XTSf!1$>87P$zW=#Y+X#uHQ
z<OZEEQg`M9BVIKp6a*fugV##D;L3<!*$Gk~p;be0E~Gv}t7VY6@G1ru1G0h+lLQab
zK{`h^{0dfl@B`=+%t2KWoQuRjo-^W>G2@gq!HPI!jNu4>Wh7;l2+8bl5~DIofYdec
z$_R}Ckwqr47A$Z<cs~omfYe9ueilR-+VmDgkQh=LI`sgaj6h<8C(V=Kb*O}GqNH7t
z6nNDZyud_eU>5IiSxBi)Jc(Q*(1Hzj7#ag})&`;gUg<#UBRCh%kg!dLvq5z_Z1fDR
zGQt@h;ITMpU4z7y1oyI$NvIm6`Up7{LD=wQ31^_yM^e_YkorhUT#=tgOo<O#A9<=W
zMC*a-qe5%uG6#-Y7q%u(?k->6-T;A#fqZ>_-0=Dcw0gl8R4aihkw6a6qBzhhZkDMb
z9MeNMW`uHrE=vtzn-Rt`BaCHE6z9xvwkaXZ)5F=OM{!OLV+Ab{4`!Vl#yTY&blLZm
zVAd%?oFEj)H#vZJk{|mdKlT~n+|$FjriSrO2;}JYW9<*(oEXB}AH?1023p(J=)uz9
z&Rp)unPJ2bs?HFkf?gka%Ys%iScx$xaWhCUGq8XwBvu9n0VW1@ZU$3H&M<Y>XdSL{
zbLJWwjtM@j{oXA7p(0zWt#;Pi%}!L962>(lh^@t&x5b;c#6_skk-x~9Ki`2T-<Gr3
zk+;a2qsW>g+fp#wQZUFuEyPMg&&*lR%vshXK-MHc)FfEcB$&wh2vQB9llp<6kvDi{
zq!$3GZbWr`;Yb)(6JcdQ>LB=1ZCv|?z}+QKeI%gc4np9uIL!J8J|85g=?ST2Al)Je
z8$$AEdhlpMHWNW_28VZu;0$<u#HZ$lj3D(9zp5+0D(rAMv}y>(h3p=Ju2=xi7C~-n
z6;QSpfGyI7S3__H+HwVWO@yoV!4$`=i6A17`iM{78jLK_>LbVy93}}_!vI;aK(I36
zkTE7$AE7Cbu#U%y#H{1t2)RmvlpDm5QntzP@jysr1ShFgA4$R13QB@IV#wvHq#a6q
z1gV?gv*c)nt+X9zUJO%^G!bx50o4uY6(y!Dyw1X`P_T+XY8gxtQ!zvhvW@Vn2blqH
zAwU?AdIu69k~W}41rQOOBvKs&t#QC2$TblP8`iCcCm{*zL<n2L8q~FywoZa1QAkZB
zYn2FL$XO>z*~LrP#L8PGN*G5;ODOU4h^YxMXp1ttDl&MgGQ=7%B%3ki+cK0mu~xgX
z)O&KY`*C#z@^<)e_5|?u`SW-C@bm`=^alv^`0|2|Bk<wt@a1pv6>RhoXz&uM_Y`dK
z5NPlaX!7Q7^x~`c;;ZxIt@YrkcH^pWWCb00?aW^7#8&G9It05Fd>VGM3(M3nvAMBI
zot}J64j@!v%T;Q{o@v68pv@4b#t^8;;49DIBgfz)3qpRf41Tf<9?}f15)5v#Aml8^
z;Hto2A;VxO%Ag_0pvKRjz{8-(%b+dBZ6Gb6BPXUUD=I0(Dager#l)b<2|EABLxDL#
zn<34Rq1>FI$_jK!YPADHl|5H)kZgaLR)f2At(RbvzgSD4M4h)#sUvs3HEW?gQ;`F6
zkt0KaJwu@lLy;{*sU1_P9aDilTY)`Wyp2eby||5yzNMv(qKT`rnY*Y_peSr^2+|u8
zF$hAe$JP%7txo{Y2ccI+2LAB#6NK~vgduAWAhi!N8zQ3TE2QfKseLd>0bOr664C|L
zJD8^^2x)nOkhZ&!wmbaB2T%p1;VlS9NY@Xct&S7WfY(7F222*TmK$FCpfO-I5n|ak
zzp9IXnk!@`2(ot-rB4K}j9@cF$g30}JtMUG2(lCbP7<tuAe9T8M6QqEf|v|0d21dp
z;*$r}L=Y0xEdqCrxa3SRYa)mUv_6tG;l|cEf_IDH3`xrb{0NfS;Uw0|NCK%cg4aqA
zhJ<w@8iEuixJWVUM63we5aj+5E^}~+LrPIJ5?nnY#uy=$5ro8Au0o1mTvEi=N@UeX
znDq#L5#%x-nGLxS8<~X2BG*UYV1cgo$DA32gdsc-kRt_DQ-J$5kXV4Ifv{2P8d#r5
z!YUbpAhHrx$q)vJ#9beWTO~k}B_sjMSSH9?CBl)6WrDPQg1B`Iq&||BQsLziQ5R&;
zmt=QWX7JHqjyGaRvt)(TM^!G&RW8i+ZcL3HEY0pLoxWTXgM@l~c{{zhAO{gNIx$t*
zGnQI0q#7_J=`zG?GQ_Gg#H%yJsWHT<GejvfL?|<aDKdmBF+{5|B<nC_88hS<GZdIJ
z6k0HpSToc+F*UldG&(U&4iT9juhQcq*x@G9;U-e=B3R?dmu<$Dq{|Sh$`Gu~;I9Z8
zk@Jyd@RMiomt*ji0o6y|att2wp!&#Ffx%yk(MXIzk(EK1fk6a3PsGo_V5KVMZ>2BA
z!y>}Qz{d{iB1tiUHYhnsF?cJprRX!H8-XgLDl3LcJBBKIhC(yOG;M}tErvu5&`LRf
z8HNZ|hGYYVY;(qZYt{lg#zK3hLI;LI2T)}MuaEL<S@P{z<E(|_ZA2_Bbs+VTqKPY{
zK7x$Dp;be0E__Kgr27LWg^>0K;i`|2huzR>BBX8+qCOJP_7u<tji*5d<=`Z!GSc!C
zg4IW$ng~++z$+sSZ_qLZ4bWr|th#}+A*Y){`bQ8Fq5!gB8!{FLKKK-L{3)y&g7=J|
z^$~o{HrDzGa<nO&ge+8m*GGKdZV{w162N`lDW3WWvzEas!X<Bw6`|EfJhJAXYDmr$
zN@6?zhZDY*Th@dNX;2Pc`@k9O(nb&j$;-r(;#P53k*HN196@R#2#H*e;9*O`iXAjH
zaB)lqN+}1^j;RbG0xtb<m8%lAiOAhLahn84eFPhyL|Fh2>B=EY2HOsC52+-iR41N<
z)IN~O7f20)pTx4?8GIrKE?szB0`IOOhasdIg3R3@&*s1f(9juHDblD&$}(BXGFj3h
z3Bo`lk+nl=9|$R7orqZ<$$%SI;^uMUW(gnzZIeaJW96;lC5$5!r4&SXh4sZ4EaW-8
z)EIp=S;F-hql{P}=jY^DvE<vbmpgD*IPx^R33mBP^ae_H`-^va33qr3HM;Uv*|8N_
zapssX2WheTsWEyeGrKD>`Dt+bs&jd%uzM=AxvR3cDl_?LatG=O1ZnXFY4OGCamDI!
zWf?ORS+lizaI|}Ib$GK(3gw#-B|0gDe`2Umf3QG{k4Tf3P@#iNwv||fu1KI7ueUOr
zyF8PtJcFw|gNq!4vn+$N41*htZ7j`XEX^du&cqBJl!FsY970?IQgALa1A`GigRK;(
zY70?gC~@Ga_ZIK;7j5+xEO+BBb>XaY=Bsz%FS6oH)@E>%W^fW`aFSr~R%FOD22H4y
z*fN&bGM3sil-h%;q(TRVe0zpGJBDHh5Gt}|D7Irrv*k*&<uWukGBh{hH}d5-@|7?M
zkT3`U9m|7!Bm#^LuYlkT=-?Z;9|WmogmwLebo`LTA#CLO2vz|h)jp7(5x5S5)I<=H
zU&|9*6M6D$ftJ7tYI;EGBS9@U0Zms#Wu)ne=p1QyLuwfaiPqV{!F2<l>I1KO&>6h2
zaXI)*5WlJmv_1ll!a?dCOcF8#2bm9okdVrVU&)Ri+a^{>O$3nzAD#d@KLK(dE5U;l
z;Jb%#E|7!I3V~P3L3%yVnh0DaVI7O(1b2(L!Icr7`Up}JK}b;MHIKtZN?InMNr_s-
zKoDB31h0(H7~)n5$Ox^vA;?85^D(&uO+Y9BH^Pw%RXAJ1HW{rlf^!F3eFW*z;Hph9
z#o_f4dMyAOC4*>@v`&FAFzX|DC}CwlVg$L?z|TgjZs1%{^#QJ!;37~4j`cRU>LX}!
zHA@gPjhC=a6gG>Ivy786jZ&6Tk`NR(m0`11;_*^r^wD4r&|vUWV+d9U9Wk0@z>s6b
zQtrTA<;>UOF4W;I)*C3<A1pmFNP0q`6r^ry^pvP_5{xqwj5Op4*5eD%=0>iMJk(gd
zG}xULm~13Lqhxks3_;2a;TkMi#tg+a?2RrgZJwO19*jNy920~2CI<8M2MhEC@plGF
zw)u&bILqhRN`~tQ2deRUDzdo9FgU@GqcnpHxJq(YWC+&fuvh0(;bCB9U|@vTJs<`n
zvVa5+i?O=Aiz2h59BAiJq&9Pr9ap(Ce~lBzlqiKQ%^uU!v}>Jso7{!+&DoN*8A7!f
zBlTD#^cW)b8FS1S3aps0)<^mF3`JlB8No}n;Y_jSFfcPTFf-&c^x-q~5!d$@*GCzD
zL#~_P)exKkscRs8B6xiS;)2KJgmnBs2((Ds7x`RM%=(B=+mjEYGJ^Dvz*9dS@cIa{
zHXBkOL2d(w_J-8G1mFnPIl@u<z&8-V8IWZO=+zLUiv+HWpz}n?^$}zy2s$?e8=6C|
zk|324{0>%leFUj)aMedVpfh}IVU-c0K7v;_1R3PjN4P2@V(KGEH3TQc&0|PHQsyyI
zU@elCal|0BYD?TI4%dhuOdNfD4^g3D4E>QNg}Xiim$ZoWf$+*m9I==Xw1yB;_Ck6D
za5fr))cz5qPeD8h>9tAOCK8FHtrM^!SUrorS`r)-&{h3tZ1^}TvX^nyK2lck5F}|C
z2VvkRG1C$r^>D?QsSCM261PkeHjR_Fj+HWt&{dRFkr1#|W4BRdGnVJokzmsnVAbMh
zah78AR^m+3W6m_?s&e3{bKz<C;_C9{oe<0eUgX9-F@(E6SfDqMuf<oY+C?zLfHy#g
zGeDc$UyI96m(xp=)mekxRg2R=PDp`|NtB5}l!-x|hrw8a#a)3RK$9^`pP|5%sm_kE
z-JPr5g`v-zZJHnRoDj}Q0Sr@vnII>4SGY^17_<4Ruy`pldMPn@DKWS!fcivk@*sw*
z9D}PIgRdN@f{WDSi#8CjkzmkbV^HN_(BK9gUZ>0p8k$q)W-t?G@Ymrhag#4~S4cGA
z2~uPTQvsngebD$}hCV})C0kdRQnkBqq7g%)5krzO2qhXbr<k$j+b|c}u~s@Wl{qk!
z+A@^cF%;T^P>~H&kquJ`7?s&DmDw=mTeId{v*y|`=h`sa8SB{@>%poRUlDy@7~2n%
z0j+>wlR;3fuD`IZKcqf_kkHx(Rv#gEj*!@p86s~;R|uKp*8){QU=lQB2WCL~M}k^z
zVB{j8=?qz?Eu`TNMjk@2Y6#bnKG3yrsEZT8l@4Sq&J9-exME{~wzWbwu|jGh2#HxA
z!K)!S15y(~Rw+O#BS9rQK_xrnc^?=Xbk_r}4iT=322w-el7)*yCWDaI%fZL%@Ki>S
z`iM)$7=qxVb8rT{8iF%Os*J?Jl@X*qLe>Z`A21o1B?hE>1tH<Gu*xVNxif`}O<FYs
zuf@Ri5xh`EW{BG)iQ$+6gE<U3hYl}_G2H-{B8h>kJjW#tua9t5C}`qXD<f%Z^lAvM
z3#|?UhYb3Rp14&4-20HxGi)UG`UqF`gqd<6DzS>g1>xyO+$s)|qQuN%LC7>-+$=%d
zGD*ZN9#S9aDM@L`iJ8eW=!i0iGcyP=Fvv17sB$qlh%@*ob3y8(3Olws7oHAp&_cWZ
zAnr+_eEmV3{Xv|)fqWf4oK4;mRn7tdI-Fi=%$_PNUaG9#T5RrWOjhy?`XUTsEDY=n
zpq`5W1A_)HgP|CcrxHV;7E_KPs6J|NVD9kX?(t-4cVTF^XXtfhm>$Y9DS%<3FUtfU
z<|0Rtcs)jUIR<+%1_ucca+YCml4fv_WU!Y2Atz}D4@m|uX$Ds*25$uhS4DO!F$M!+
z24gV>U2z5jDF#zn784m}e;vMb7qv=nm0Ev|d^@RlJ+2fZ?o?x*3`2%&6UIUd)_gPO
zG$V#0JFa{Ou525&LMOg#8;%q+wpe|J6hnq|6NWqsh9VnKeN<}8P+$i_MAk>(O+cXf
z2wKaaRXW(X(8>i|@4$LJp!>d&#i49nAMnT<bayJ44VodsQ4JyLBk)8IXgxNh_7Tu@
z1tVvE4JU{QxH57V(ty@S;64#({0%ZI1gnD}H4cQt><59wA$1V9vNME%Msh2IjyZ+Y
zK9HISLP8Ih!?9QnvP1zg3I`z}^^t(0t$-rR89tcx5u}zuCgC*;eg?eW!Nq`#)Ztk#
zhg>~zz-l5+X(LY93=zEQL1wT?8A6blSu}oxT<?gR$4Ht*OM<naiR0ozN(o#fa(Mz{
zqm1t%RY{0>D|j6NT53gHHw|2ZfmUw73`Cd)u~)(-7CQp3C&X;4gAD}r5YeRI1vQZj
zY$Z8p*%&TyNZ(D;CIJ=**q1xPWzn1m=fV}BF_5citZY%F$_TV>0mpb6#3FboK`Iw1
zt9WD)WHu;ikr0*W(Fd=OM9ibXC|1-W4pcFj#VXjwNSTMJ8@cg_YlCYH&<Ga;1A{0d
zg8~nytrUZYDo3IrLz)>&g#$yqJ6oGKONTF8lPg1uJ425j1LzFZ5YX{K_3mO74*V`E
zY}V2Y=2A=+(#*y(YzC6dBJ60RXY5Lx44MMWK5`5ZT1+`6%mo(gjSlQxo&qh-JRR-=
zQzBJ5d?jWEvdj!*>Gx&m_GHL+;z=-OP*o69RS=U@5R*}slu?pJLkbFV^7683GTcfM
z>=sg>{jd%apu6)xm%7R_ILR?M$}%{~GC0dI1gbJc>T<>E3&rROW|<4;+elQ}a8_7z
zG&*tj_=@!hOZEjxbO(vFfi7|aEe<Sq5h`}z%d_IoHs{GUW63sSDY9lLvS!G)1))M4
z&=NZcS!&H#YR#B$&7N<~o@>pTYt4$>KN8gQ5!CZRtB>Fn3Qh*NK0=iR_j<H^1hi19
zA;{Wn<T?n#hK#~N`a#GfpC+g#f{>8f2V5Dsf;vZ<&X5{MP~8oT+y&Lq#@}4QvqGS{
z2DMf~)JKqm6kwxpp!M6R^%1B~1gV3NNj@b<IKo;VaVyzDI!CzbBgk+Z^2yuq`Uv^1
z2U=7{i1i8Jg$nSh2j1VoWI%dG`0FD`O#~tFS4Nn!@cIZ%ijw+B)FK8_DS%1B^$}QE
z9HeGJF4G9HNvMy+tz)3o(;%ylApJPBN(2uVQZHbVxU^ucgT$;8MXeIx2omxT5>juX
zt%s4aN+c8)psE~M4X*S9Q3I*1;3P;ftV4v>)B#sU(IV!tBIclulDK)CyiK%}d6=$+
zkE(%-yt1y4m;wVMHv=QLG&_qDAGe(}qCTp2W@`54YV&64^keVw=jiffn*hFPq9=f*
z)0eTzNwD0W&ry-tMux#kmeod{-Ata-Qi(@JoLg3ag_}=?lShh|ON4`!UzUYIjTf|1
zE=q?b-<++?j<>~`tIt=Y!(CuyhT;E9bGKEyEr{S=7|Am$f^BLrW1b^-x)qDPm4UsL
zfvJs=p@p7-nW2$|v5A$bnT@%rwHb(EW@2h;Y;US!q9tjkz-%D}+7Ih24O;NvDi5xV
z<Uo~?i#+Hqa$iM;043HCHO?e`?o=Z_$Vov>&OGfNg5Cb29X>*x0YYuQ{B8ci?ExZH
z?xK}$BBhRE`PKrNCQMnT%=s1!g;orCHXsD4i6G=4sgIB=9r)Z3To6(Lp^>E4N061+
z@R|t11(S&3I9N>txq--)SJ{~x1tI!G$XDROD<eMGQQMH(2SP&XAY>BKGlHBf2dRl5
zByxQOTP6p+cnJ9}JgmJT<h~EG0%UQlY^?PWW*vkq!X;x2sf=*-k08|$oD?;U!h?`2
z9Y{@ti-gZBfyxT-1QUcI0&TBCTCLG&r3W$>R@&fNT!EP9f|OC@k+8Z9qk(N5BaRJW
zwFPTCA5!KM*)77`vjFvv;Omyr7`W;YT;f>WFKLP4eXBT#N=Pk(hZMDdw*)}-J6cTy
z;fk9>O@o+;OhRHASp>p{q!u(1<W6u}f}}2F5=|w@ShHv;^BCk53t_`ktdx0-oK=jJ
zS%ijFpn#I40w;rpAe)OEgO3_Rh8buVQiD5Vs}Eb7FMEHOz>Qg%XZz!3M9EK!kewDM
z(i_TM>M5S%z^7wmqi13#XXGhs=pkj~DQoH@ZW<tE5iDgBEomL4VeHB!s;<t<U?Rm8
zr^1k~%aEZDLIs8ldHM_)x(s>73=Mv2DMnnWMoifj95s&2jcy#tjta5XQbHy{VisW{
zrr`pH!NSH7!p0FoM&Uw6;i5*7U=%KD6fS8LF02zMui-4C>Y$^bq#_}uC&8gF$!Vd$
z>ZHYQBFhfik;KNJz{X%B#_FcPAF0U|uP;z&##nB{-s#TO<HZX)z&wzzFOaL-kG<W8
zwbPHi-c7K^nJ?2?ILU-3&X_CCm@CPMDb<uU+kzv<k`pppRA|ju2<{w}T7ypf%d=+6
z0V7)@U0WkvNS_F@{Rg$DgLy&%LIgbQhCBoZWkc4*32AwuEP;b8%m$s|0~(Hl)Gv^p
z5rl+@KuCCH#H-;3L6F`MzlIAKIq_>aK|~;Z9LOG3@MI8VaRPJ&0=NPKcXPl;CBV6;
zH4&&jg7knO<8lxZA_5_i`$6#eAP@uGSAx_;JUEtcL&oSJbr6iS=7rsP2k#Jp88+NV
z=ba)~G!Qn{Dux(A<b&q8VN2%V`-zZiA51p>`iMiy5Dnp}j382wOf3c;rh{i~TnxBk
zF|%k$iGfxhp>fgbBd`|2^%1D>fsO5ewIHf0!dx8d)v;Da;?^;c(h`$|TZATsrzA&{
zf)w4jNEyontO#E3;H+3tJ&5K!I2ThpL<-H3m|P<3BY5~wg@IfXLD*=<!qcayX%svW
zLfDdK(GsRn;wF(2rcrWMpejk%K1@{IPK}>MLy*l~ks&~fA=waw@=QV7g^H~hiYytL
zT}A301#<NnDy<o6To~%z8H(IQGHkevEuD=mot4e~R4n`zEd1ro{iG~{CC!6`O+!S?
zLe-62xWzQI1Q<-Exf9hHk~JB!jTj0oSgWj=Dy<mvj2Y5(7y{%NT!k6jgc)1}8A2o(
z5>y%dbhu-!rG!m`g-n7VcLNC+gu;<9_%t42gHR#;5HbA_af46|Lmw?;KYL?SV|7g%
zH8C><0Yga!JuwCuRtA0s1~CQ(ITi*Z9tL|!jxaU$C@t<hV}^1Y_Eu-MR%bTIX-HkZ
zY+b%=9loH!$$B@zDo37lOTh#qu2>_^IAgA4W9Bq7wj4{&Tq~|TE0#PfmV8Twd`pI6
zONJ6FhCFMgJZmP%dIo~^5w1!Hk2rWyoF~@G2z;g-=tu-ey@QK{)JKq-2tq<?A8=*l
z0;-HOoFIK4to0GR-oeE{u8$x!4upi%L=X}p0<DQ4T_Ny15xl<xVkkQBDmozdh9GR@
zz7MPpLadsD48(yeAVh}<%mvj)kX=HM3J8rPwdz5xhTv>?eMIE?ZFuJh(m#UtgCGpF
z%7{bCP{K5dBqU`Tg@z#7;e`Mi15yE@35uCPyGsyR5pz(#39W>o5f`}zL}sJaN1~Qd
z5CnGyq_~EYaB&C&A&$Df9kV=#i$H23td)+8Wr7?Af~dr-M{tQioCNU~gpEvM>Vn9^
zorlH{wTMGQ(3NahHVcr_JOFop#If~jAZZ2~yco?Ph%6=vNsVw4(r^+ti4*}NQOhVn
z(=a8=P%cGVB>^Ta2`*1%248iCG((0QbI|tHVk?FcYql&yhDap_A5jKZAqFRY20IZ3
zA0-AGH4#r^MQP(8Ny7jkqi|v42*_zUkb6<NO~bfN!=#P9xRfl_#Q3zOh0`sC$~~28
zy~S(2#j8E|Ydi(=?Rj%;x#RS?LsS`!WI6RDS=B{&bftu(RBY4@Jw;7|g^dD*41$CW
zgM|$Jg$(_LjRL?Z5F##Q5Cl27R75{S)F4#OI80J6P|+knO4n0BQWv~C7CJS?!^x%1
z&0s9d9;C_;rOT9S0@{<?;Kb18!O-f#Q0K_d>JB=rwauNO)s3OffwjtpG0Q?Q!;C-0
zR5-;{IN6dn%|;;GhCkbeKhK&w*NUsak}cPaIoF&a*PJ2Kf+5G6Da(dA%ZAz3SliZE
zTU6IaRM$s9&r<+)PATTu2)HUB$gB{A#8n4D#9=G6(XU&DtVw{^P0*@G9kwh%!v%6W
z9HcUWut6m1Rd<l;34XH!T1^D0V&EjCu7QvcLFkAaxW<9^ci;?OB^wN62d|Gn3?(oZ
zebpSk^>W|}2&F!PY+}V#9}!a-amkv)>m6JS<oXCQHv}h<s~)Uu%-RPc!X>E>N8%=t
zcn~~a!x?Cmkrb>t!z>vfBEn`-!e)peK{2yvNUIfI6Tuk}^+b}8Vu}nBxeh{M!zv(1
zH3TQ&RzQktI0+YrFc9J-)<;t2aafUzc^n#oD1+A{D2|713PTozI1J(u2pgG1RtaH4
zWFeJ~h<Pl8fl0!p@YF}}-Z5ruLTo`MA&N0oB8x++6=V`qy@*MqC@eKXk}Gn;hSW#G
z<`F_>;ffX^f*LN`lH6JnT!GpQna-ju{=(I6Tuq+5^=@2Mjy$C{?73$A@fwW&G7LVF
z3=U!pj^Yev3VhzC%F@O`(#Amo2BDylG?QR4(-6q5+&pIC+@@jj=6>S(ZZ<k9=IZiM
zI;<&X{ADfzmF~h#{$lMR(j_iJIW}DJ20Rg(tTyVx_F7{0Ix5Zv8rr75I%a;LgGpia
zk+4w!q)Gyl0YZiWLIy!X;CoR)2lD8Li0Fq%=?6*Z2FU1p%jkRSnt7?}If=_^u<?j<
zu(I)TaqI9f7znTgszB?bLJNi_7lux6hDo73%d-?`#|riZu=Mz{ba^w?JF-<;Gh|r^
zW?KrSTZpAwh^5&GWI2cwI*ArKiIzAD7TNO`+i(_Ha}-#C>Z4Q>h9pCVWK)Jza|TO&
zbx3_AqT>y(k8o8zXyW+$LC~E+h-KK&a}wb74x9n0kI<_jaD4=-Wxz+3f+`?1h8lrE
zIe3)>sd11=NKFJGA>vf2k6_CZpljzKH4d(=tPoi^$t7=zr#>QB8R4pZ;Nl$8rVxZ&
z`#{)Os~$8#NWFuLgjYsflKRMb7C#%3$1zFBupWej)IJarSp>p{)I<;xOL>Ai(*&=l
zureS;C}<%BtbvVO#DUlrQQ~L_s*<SjK#&Pg_2jhn(X_*BD@+Dvfe#VES=vMEJLvLe
zcsSr<pt*z~7h=1ZSu8FGU`Rp32Ogo2&JJ?jK_pwkGzy{;IgLS53xtGN0ZB`ch7E)b
zAt5ymWLyr?YJ#vOOk>1Nq9JTZoh5A^DP<NRq3bEC<>sQN=B%q4Zmtw#qaJIm7Hh2*
zWv&)(qU@}#Vy~`Xpsa4Bs;QxAuBB}$Z|I?5<SB0)EN=`t$Xe1kRLnR+%s2vmH#ewa
zGL8^60Evqlg+uyHlE$IphQad2K_FxpAa58TXW%bq;4fw1FKZkmX%Zx6=r3XzAYvFG
zY8VWvYrt#~!vN?dbO!#y1|S8<Tu}pmF_;1&eP1DcUr_@;F++bjBTqRaPic8YR!(kX
zQ3hLih7dJ|1S5u0D~4JpmSzvo<$4`nysL8bdm>coyu`YL#5w~+dc62sojG#ALvTe7
zT*Z#u)h>ed9-<9iLJeL*jh=$_?)-HwJasNSg*IGSrp$p_T;3`yzG@7CT8xHTiWY_%
zl6oFu+O9&-Q{x~zgFs8;AiW+mQV=#1gr5O9ApsXDq~QU+NC>hz4xNP8O`!S++z;YW
zcST0rYA#Tu?!*m4kaOgS8H<C|K~Pe~fg8M`72X>{W`HJ%l<iSEN6J>b%2qszpuP{J
z0)p=tLS{e)<#-jW_!TYS2wwX@RxF?m$3eK@iV0dbamib9$eLqC>@sE$1g~+hGN5&k
zv?-UgDWvvclQCw4o&3WtWr9FR%ef(AaS#$$9Rv~Ql+Z&%kUWJ<;;)r3Wg(>jgoM;Y
z5E5Ag!WK1+f)ogFF1&=n9Kpj{4Iv9cN??cySV;3oF*F2Sral1m5uREAQvPF-khRK~
zBupjrlnh89;2|Ns3`_?ghZwRfd}SI&y#oq2O!pw017Snz4oJ;{Ov0-h$b=4ri(K_U
z*l<}07oJoIGLY*dNs~xP@PrZA5S02z#3T|_ahQZDnguDE1^HPRc$?}58_NY7%Y~aN
zM3^Z?nyH4HD0>=ec^PTj8kjg3o10s>nOnH2nE5Iidm-0HqDJAu2BD&01icXz%mxV>
zg^L;ng9O31FvuDP$bu^)c^D~e7$60zR{Y`h5%dgF)Y?Z#-w(Maf^Z>c!9nik7S!_*
z0_{Te71r~S)^`Ifv`|#$;Ni2DWpGpmozD=e&XA%Bnzza}0IjA-RcDBnWeAjIh)`jO
zQ~(`eS8mEsVaAYQ&X8^fI;5i1nYYSWpw3;S(ObCDTe#U<xY1Lv$wQ#YL!i`wFUyoU
zNSoV7mDNj`!CQqvS54MLS4BeCT|(C#vp&LK<Dkjns(K*e;GIG6CE1|m*#zq&NM!`+
z=0Iy7cr^rOaH=?Qsye_C<ls0`>mx2@doHA5I>-bNoP>?VA;#-?lq~VrNAQ{m!hlys
z{E8OH^%11@!6Z3k&B0ZYHHR#EWdx~!&`7M+5L^&feFWV)1g?+RB#l7`wt)yz4Z%rB
zy@N)wOBz5BT4ls3p(hGjzyn&k17{%DO>j1R1YX2694itx4M#%~W)Tu*5txca%_2n2
zB81JMNXRT4&K9wVM6Pj&VMEG5EF@ZG1m}WlB`oSm5l1e-DP%*c65@OcTEGmck05;z
zOkW_I1C@n%oM_)6*GJfDACMbR#nJb#K`IV#MSx{$64IW5EHHrdSuj;X)IdmxX}Cy8
z9fVAZn?#BkM?mx<r(RI?WE=slr@);WNPQt@5+-gM4mlANa(gM{Tu@nqU|EA;X`@gX
z<1i`X2r1(TaieevFewhpGLU;wA#*yoNJ-;R5Hbjs&<~O{43snslrRX8Gz8t0DrN{C
zh=Y-kdPmT}AC80#{2>*PFw)`JXy+aX>-mU)yGVk1KD=6<!Uq0QO8V^lGWLor?wULq
zCJcF2Y?XEl6}AiomMq0K?B!PMS-K1XDh$rz3_9El2K=B+OkPS15&8^qrc8xa4B(T=
zS?gTcJN$S%{CK<kxI2BhT0PksU6~3TdDG09owY=)m3Y);MYI(qWL1rnw9Lh{JV8ju
zTTI6raz>nxE@%%CT8)Fuh14=|68Gqw2U<-8;esn8cktFBXy*vtCxY)Af~-}5^m-t*
z5;6&?ec+W5rz)tM1M2BO&XhwUA!`!2lpL`lcy9<^2O%@yJtJ_32r>`{=@}uBN|roI
zmS6=SLF7sY)DME(1`Z}6QsC;w5?&Lbjm2@unSu~Tw+O8of^#8N4~MiVguy9e2C0LP
zNl@nq)<1&OM3CV)v|0wt)#s3cB6dkVc1b-hNka(YlrVrZI3)~V2)%y<uZG|ZcrHd`
zK-QoNn}neuNR5M58Ns;_Sxi#MEF4l3!5Pq62|V8f>0uE=;wq^iwH_u3Ev1PrpNUrw
zshEgY4^fF+gWzXFY5`b%fnEz>ng;2oh?z%<nngflkx59nA&WrRBIdEeW-+3sAOx`y
zjTAMFK@|Z_2trIlCedmcNCkma5F&+(gx58Y&NZA1VL;O)cy%;{4N0_+S_#62kdRR|
zaiefZwF0VHj6*>cigBo@aj3XSu!L!dq)9NON)k5;ls5{MH3*gjS4ooKh1L*_5EG!8
z1$_M+a!mvpaD%NnfQXBOZ|j!O4+0_ZiK7w*0pj}pkTEyV#E@YCqyoZKA3^#-@M;Kr
z5(45h9(a8upzAGc;4fhsqNHUmBBN<8$>65JUEw0o<R{hP%irP&Ix4o@fxFy_J<pIK
zLWkW`mcdqr!9pB#9E6JuL#PfztO;YG6+?w1Ypn|#=z>BQ<`yr`W_Ok*H|9oHraC8v
zTsy8bb7mK9Q3rKFeN`DF4S6Lka}^y+P-Ud$DWc^Gua96mRZ*77!D}C47?64g7YVy!
z)fH0d2!IFWARQt&3Atz$YkdS7Y6Fk65v-3Ol@Sk)%7{zJk(l}jR@WfvBS_7{tpFOh
z<5si)A?!=Itsu97Lr*RRO%hpBw?0Cycd)YArA#;>2<eOkPH7`>?SsBr8&VlTY9a^;
zsdu;}4IvB+Qjc9s2N{Wi^CL1TW)g-6K`J9;1>&aRaIU0jI2wXfOt|VJNM!^kh0P<7
z%QTAE;9eMJeMFI&u)SK4qMJezGM0tQ5KMPM3Ug%jm}-z~aHuS}iwb4K)<VO%5Hlep
zr2axS0m4SM5qZ`IA`UkS!~lmmq^7`CzmO^pF%3dOY8(h##3T~JMI#}#4<?CRA3^FA
zNc{rpycmZ{ngoL?Ba>iJqd*8-6uRpNy!Qv3N5oBn#X$-JM2(<G*f3ZaJm~|S`Y{NC
z%mhJtSjZ&g>}zBaauX1w4#G-8sv#lp6{zsaNJQUH2zJCMWY-Tw3ZfWtLV}d8gSeKh
zhNhmHx{ig8ij9FrpqWyzg-W=ELb!#3tDdrhrks|NjJ&v@f*`BB0E?*@gQF5hlpaI8
zF>{eELxnR#xdTIuD?_CNL#+!#vnx}xD^r&TLx&qfu04O61&5KAu923msHUB$rk$X=
zn~;XPh=v~sX?clgc?oNJih#Bbc|f)WA(QYG2&fF`LIqq^4@4ZcIS6_@0=yc6GvL(_
ze2pA*<W0?qAAG40q(g*ELWbJVNXRM$cr`?jf!rJ7R<h$#gj70k64d2^*GI68t2XfZ
z2*kjsk02v;klTeI^$~;(sfi%7LJ$%%H-y|p0(Fk$OgUuD;XNQYgH75L6QR^Wu!$ka
zHX=w@h#g!FK{`a>HQNU4(gv9I4y5)$CRxRGSjBbVBXaCw2sW}Ha%IFWrUS`S$Ru*z
z1ZTr*9L)Mi%p?r13^Fp0OhPn5bU{cF({N#vFhNrgg4aZF2E4Q($N&|9rl1Ka>?(1L
zD?)S)1`=KiAUg))7i1AQ8=@X<0uC+<VL)mS2ni8^*2u7$6e<Yjih!3^LX3rv$3pZX
zlMpR<?1i`)L_)$Gys{dt-hq#yVTxnc9mpbR)eRaKvSSn?2w_8dE)WwS^E<)@p`ZyJ
zgHU)i1ZPMXM<CZnpiAW-6%*_ZZphvrQR7fyLr`UeSsx+Ryg?Sffmb6S*F+FD<m_uW
zDW(VN;E00PDj-)zkZJ{)6vTG&8>A+JkdXRFP|p{v%U4j(N7leuM&HT6(9FQl%*H^&
zQdiZ-P}bW(##c|wQ%lHIPuW>VQD0qAQ&C!mmr+5G)m(zXMwU5RpCQSNt;C+8#+|v|
zgQ?kvZC$D1!VHzZ0N!q2u09{;HWyHRlx4$Xre|oPqc5syC#Gc&sgH!!eL)CZ6A5d2
z3WHWGps&hCt%Ka*!)pQ>*hl2hDj+m2yc&Wt;MEX(P!3WbK@QDEu7)6Nv}y>>MXP|g
z6&(m5*wqj4O+%1X3NR8{AA#x}1<?K=Se1mdR2x<w!K)kSSRDAOJ4j6gA>j%jl@Wvl
zO&B5FJ;W|+0z$Cm3ivA{Ht?!B$eILD#RRU8;MEYE0k4c8^${EDh#b5QLaU@8^%11H
zfmcAB5_%93TqJT$1YyG~7c>TRr=DRjgaIdoO~T-8NbLh5A+-{O4XKHcNl1N!ObVNT
zHm8G%HE@9jVL*x+WD+6{Cy}L4*cjuA5ZmDziDaOapAasj497}B6yPDzbU{jcXsrS6
z%V72;AtKP8CU{jdR2R5fK{gOs98!Z|m4*0S7&gy>>{^Hvy!Qby1nXFth)J{vc-<OE
zJFJfkawNE#L!PODl>g8k3er3yByHd)A+aW89139|lkilB%m7tAu=yR#`bZR9D?wHu
zKuPeb1IXAKE>gfK5QGea1wcE4K$AYuwb|gIHb|ccLc%K-2p2*k&;CHVMQ{=_Lj<l$
z{2(Nxeu0zVO2<bS^;|ee7fBR+qc>Q}M_AuWSl>$!j38_=T~Bd6F9BU|9!+-vZBHRx
zZ^$koF@0}QJud-mJ6;WINj^?-9(D&w(D~tEx(ty93>lUTdG-uBHlW*%dLoqCgJo;o
z`O6$Qs-2mtoEYP*L}M(3RrMTH^&EIK-NDF%R||T75M=!}ghZ=-AY6DIgq4BZ!6C?o
zjKx7}95j+w*$E;dsOH44;sB|SAS9$F;#P5h*FKmG<oXD?CgKI}UByB|w+`9BIziA1
z2$>7%@8BZ26|9ge8ZLQDNbQ46LdNTO6cAV7@yJ{9Dp)~!Mp(``l`(_Q2SFI1>IU9F
z0<V#StdxUSI(Qh!Lvw6W`tbS)QURfn@ahS|K(2|fRz_^1+Q<k}2O*Pa)exKu$@QQt
zX&ee+W0G*ipfP#lPzl)3A967QVI$W>5H>^{ULHaiC{+x2ngAjWCovU3MR3$dP+721
z#IQkyEx3TC76ZA0kbw8^AUzAbB&eu24ack?kR1jO7C3`=S%^xwMtI=BMKBmx>LXC)
zg1ibEymA0ESp!}SAYmK<QUFU8$ZTQ5U{T{xxClHUA-8rg(;KD;p85z{0fCpmA=nsm
zK#*DqG7|(L`3(aEi~{)$f<TEMw)_A-^@FxH8(t+LGa&T~q#6>@_k%Mabr3QMQ3)Yo
zXYY7}D;>}^K;Y^L%7D~ILb_mCSbZd9;4Na{Eu`lurVB!_Gkm<^=e!AOxryj{3h6rV
zY1+sN@k;S?IZ1<VwFuK?2-gE$LIJvm!iXVSn;}w_AxMrPOo1Up4s;W4vNnUSCQGnB
zm!g)9s-6R{mIt?n8;>TmJ`zy(g49IdBehXFM_6kgOhJNm5V8VDjf0hh47h=6B^3wE
z`UtZELKC5SeFR#QpkM{BjG%oV`1N*3Q$o<eH*kG~Tmf;)nn1WHq^voQyd`7@E03%>
zH~4^4ZW%M^8K<cA5u`HWz^IQ{krrv=s*l)EkMu#Vk03P;Cdn$U!!D@@=^w%CBU~LK
z<jROmR2#C-4w*!*n~>R%Q40Y`D-m~<1TP}sMG7WE7;FgWoGx6o2GQd15{y^|xU__f
z=MY0eN@)^Er1n3g2Ek84`aUF(;Lam1_h3{Rpv}(ku`*0q%s_+pQsMOxCIe~?xb}pu
z96+vOz#$Cn4T0AWAoq_jZG<EbNJ;}OkOnU^fHNSnxJZaDWD-)F5JegViW&t%Y8*&M
z2ALEy2olwY%0hAnL?x6B9+!itN3M4uY)EwjVMBUAu<<l#CrC)&Pf*VnQxLS84rzc+
zRM%HT2XWvVQe_0{F+pl8h#*9zxSp4|o|mw`hp@f}=rTJUKRyW3_Ttm_64G%N)OO<0
zauv|?6xDVT(y*77kQ5aXc2Z*X(-X|M<|}m+sc;r7bL20x<IgweNYLkwP-iw4Wik|I
z&=g_N5@k@96VcU>6VtGj(6Wb|mCdj2CZOTYuLU~82l<p9NQVd{1wB9;x(W_?MI3|;
zSq=vuc7rpp)-`ZJ$o;CwB)l2|F~Hp-$W7ny^#(lPh1u}hhf~Q84MAptu=Id%^^Cyc
zu=9LCXUf4V9dw4G1#)KxD;uS*0pGX^6EWtJGlsEEK@9Nd9HiF+uZG|ZXniDW0+|DX
zlbF>FtF#fMK7y>xhAV(@kxA745$IwF%sL271X3A+DkjwB3TU+wT4h91eFVw%{02dg
zh9ab5!bJ*!r+~2*BM@^S8sU`}gaPRykwjv(0*@dubr3-{xT*tC;f!1|V`HOLPnb0k
ze(yu-0=NUvoQhm`KvW_tK$eBDF>42??ZosS!EFYp`N%$n)klzO2${s%KY~OSBvrtx
zAvgmr3ui!7LRv=<HZqB|CW2HYa8g)55QM<fH&8*0`bAVfP!ys}KTr^iKw9)c=lOu<
zj9}YQ;gt?D1KPU*Zykd4eK4yYK|NmqT^~rTgiJ!}7YHe$<0GO2-dL&&s+9z^y&!cE
zY^M)M5F#R?<0Go;3$Kra^xPr!5s#J+pN^k^t`EFE;?;KJ)pirnaun8dP*+ivk(6>(
z<p?vAEU@7(vJ)(I;45|DE41XwHfD*@W)D+kv{T@<k>fUz=QLB~)>e}-)KQYqvX|6$
zfYe8j8V6nz@oBoFoEHZ^Bmq+I2&%im>l$PRq}PLI{0&|QAv2I`BFJ4rplS%dNSjc7
zgjN$lxR~`3kAgixL`;2zwE{vD1XVOB)eyMq0f`V*A3>L7OPj(gAT$QNx`8v$>LcU|
z2#t+aAF+aV4}sQlqt!vkTw>}Y%*u$Q`UsNKA?-ut5&#byQih<B5cOE=BS@`;m4ud)
z;4%=Ijb=F2xtJw3bw!~45%3xhWPf3@A@)M*FX);^;|R#~J%kj)R*67#kxYW?6fAWO
za=nAfMua?g`Ubfg0xhcsPt(9_8OYfM5+-4gM1_Zhq!u(1Ya)aQLhBdE>H-`jq-KHC
zAdq#}q6U8G2wWc_*Gdpw(7Fjymm!V3LFyn(60$rSQokVAJCOQFK*vu|*B=={>K90D
z0%1dHB^c=~q60^u+6tmZNXrYX050V%q#Yon9RR6`AUApoYPkq%x$tYd@oBm8Xt{D}
zI16dm^Q+p(Dj6uK8XIb98fj~pYAYLSDjI7j8>uU4tEp?Lswt^zDXVEqshP@ZS_o;_
z3TxW&XgTv}IrC|`@@ct3Y99!RIRpo(iI8g_JZ$9ZhA>;%2~-V%N96cb96;Bng6kmU
z-Vo$$1Y{978&dUfqjr%Xf{^|Z!D<MufWrC+QVl^mH4ru?36bTLHGz!FL3&1TK?nn_
z_QB-Bt06Q7T3v%y6G6BfQihQ2tL%~ntde@H;EhGdwGSE_UKwHbkFeH3tRh-)1j%2>
zBqU#Bl8~GZ5l1T@$lyZiBQz3T6QPNN#_e&85F&~}+&wb-s3Ez`2bbrV^$so(i1`p%
z2wRNe`Uq>tfEG-G>lA1;hqQhY646+7K?Fex!0Xu{^$~QM1~x|nshH4eA4s}F6NGa?
z27;GW!x@kih>L`nhDkzd6JhWW9Fz^NQy^n&BH$x;L=F7l4A5YmUVsP~3F!sE5hSNT
zM&U4PA50Nq>LURiKL`@k^@r9kNVn91D;ghojRW0qs^tk0ha3tAIg<yesshzV0-An&
z+Fra`o@n(EKe&s;tL-MN=^&!zsH$b5qG@idqiw9CZLX^dLOSYZ+G+;cItJQ08rlY$
zI)(~5)(Se-qS_9k+77(hE<9S!yqYc`1iR3IR|9%}DW4kRCOcwkA7Xk&D19GrCkRv-
zDT9_PKnCUbkWNT|S2~dT2vW=7A|X8^2nkU`YJJ2Dz2X5eLj)PDLzyQ+S}BKF-Ehg7
zLh2MuDNgXP8$=d#_cwTw2wopSDjkR#2pf|`t9{^HNJWD&Hv}4sgG)ge%;I|R$_P(=
zgj@$9vylhruvSKFqT2Avh*d-jnm1uX6OasxnVsPxg0LYATm=GLTm-x^5MGR+F)(W!
zhzO!sLoJ0M;+Q0wu>`r8CQwTRQb!PP8uI!>h+h08#AGxz5JBuDq)wq~H4P75>^6ee
zMZ?!ugDN9fO$4c5ApIkFS|Wx4I_w&Jlqfu1;b%b1grsW72|N%IB7#DK>m%6e0#N@*
z1bpxglmypAkorhOFF;t=A5tYjO#q*q0I7W-H4cOgCm|goI0>m`AY7DQ60CD1paa@i
z1W|@eg2w2yydc#LnDm6#M_3smI-tc20y^#jnttH=$dgygLsS!lAUD<tYkCNS7a{O!
zd2(rZacOw*sr&G%d2_0RxO|!(;AIJ*`5+E;R}OVo$od4xh#a4eJFm7Im!=1&hC8I*
zfz19O_l)4<Z+I#rtaT7(rGq8{t%D#vBM1rZA3^FJTqNYg9!On-Ov0CEqcPweAN&k%
zc{>P#*GDK75Ylo5kT}x1IINWpyvD&?x_~SNuZhs=Ajrvbkei0!H4YjBQn_G~@V*ZR
z(t$tV5jp6QKFF03ygp);)MFOcK}N`R5UaQji<mY9LFyx1H4$26MC<wpR1~0AEARph
zQu|<%a0P>efve!9TpUt3!xxniX&|INB1$n?^$}#98}jT7#4bG1fvgy$4BS5gRYnG(
zkZK6AT@=Rv8l)1zO5(45;Ifcziy%_73R-bs6M@t$keMA=4FWp<4O$aHY9*xlNLbfj
z7+i5cHR=Vx^+Ku`NDTsEV<jO9Af$k<4`k*D!~l=iK{`heSs`7}gb}3I10liH5WGqP
z)klJw9+1iiRQqUwM1(-yCFo!szqT6)Y54MM`0{Fb@MyZj>mxo*cjWqrThkl4KH^mO
z<kfKJ)qq~}&8gwWso@5xk02v*khv#L4R=lr&;}w%)q_Uzs=Dy0fsRe!S9O6@NrVUA
zR1sI&q18diT;%!)ddsS^J^pG4QW^2Wwg{0?A3-V|JS64y5xjqdQuWB1VBvyS&0%(e
zAT<$Wmk?St1m|MbL=X|=>V_3G9A^L_k;ReOm^BfkR>D;u;i-u*D<f7BEkqWB4NBn3
z$k4HLaH|qB_6{MDPdkB#z)2C<Ir;E%gCGM`py8;(uu2g$5KRGAL&z3{m$#6^(6Ek2
zA*+EagH%Q!^;GS@K<W-C39gUe7GY&TYYn6&&;)|Q2z(b0WX&`9rXDo)pwQM25z-IA
zTDd?3aV0RU;t(xx5~~_yK}f}fEF!EQh?&SiBKn}!Zt(g@NY`6LA5<%W7gzgWtyYk`
zLf936Yc6Cd$fz1<nVc4MH3GDj0WVQNu7e;Wau8X_c{`9w2VNh6&wqoSdjOO2g!FnK
zBt(~hwkMyaJGh$zV(@9YLvCb%ta$?);t7!z)N%tM@Dey^O{C_=uMRyI4x%1XA3@eF
zh=7h_fUa18%ncz|Mv$6_)Y^w%)dgPZAlEp^Y-sHRzD*Uv7Ep2oA!U03FydFT!$k1P
z2$=!frV2kl0m6W68UnLX*D&Czi6By_^^rB~`XOsjw+P%zf@~4uk~N3y65^3Fg(FB`
z338(lr1n8(LwiHu1LdI06=aOzT_LCl_~dQm^L((@O5myov~LKqS{qJc%mg9&M6iQ>
zAm_`0j+=wjN|1U7QU@`M>%kGLxE>fGdPb1`5vQm&C+ZF(<k2}yHYj7lY9jn>$k+wE
zu7NYCTOWZ6G*ZTANL7zkep1Lqt82*MV)jSyI|*9Xz-lpkiXr_J)Dbgu7b4X}kh%#z
zJ_a`#62ed}YJCK`JPs-aZbg6;6I~y{(;Hg#K?zq-KTv?U^bG0az^WKfO@yaDf^0#B
zR5T*`eh>yGiM$R0A`7pNKvfcI9R#n8Ablb{^%0~J0h2`4N8suSQt5zh_lER|AV>E=
z&W?lBGW;4Y0-CNu8lVaYvp#~1!okE{`P5v6zzA}21ElW*se_QuMj*95f>%bM9Yc^&
zIAteDrGtkARYppVcq$`)=r&d8J$H~Q2}fmQ3){PD3pzXjHV_A?ZU~aF3J7w*DGBuv
z{JwAGItZB!KiU*>f*hy<!l;j+H4)PJa+viIxH2+?*Fg{lWLX0K`iNZ&QW@z%>Lz09
zBS;;DOp;n3L24PINbpfwLV5wh`hk#HU<e7Vn6M3ufC?6j{uER_4spny4iZSxhk)RX
zZDJVc1vviV9b6_Om(K**@E#MX7Ewn%Mtvkis5T)c(1?t8NR0s6bb~E1KvEZ^SB`}Q
zA9)Sy-GGkYft!ZLK$))r@A`p=K<f)|s)p1`5E9u;F+D%Xwo_4EUkDpsF$tlr)`ln(
z(e{HQaUFkg9e*enymA}4Zi2Bv2Vx_$;e8wdZBIxI0w;yE&_%#fp0JBk5r>yTDjhKC
z4(aB=N$5-vcwsi=XgJifvfW`^P$ea(?gkmD6Hs##P;=u^cjZ=tUbhPA8Nm<dfino!
zN5~Zr8k-+)O$4cf;FT0;;Q?x;!=q>msdtb`NF4-U1&8Z;JMba|8&GFQ-i}w^4#a>i
zQ~(Pq*ud%?P>sWj!bQ|XT(XuJ$Q;r$;+8Rk^pD^hg%Au`6D}EJ1Q)z$0aW`)n?kA^
z5F1<*aZ4G2kpZ`q0hnRLEos6nX~HF83}+y(mqTSkkKN{w(#Kp52dQfy^$v`LS4Pki
zejukSK>A0hB&cV^E~dvWriXuY4tao%LqrQJBB~~WjHQEfE~Gv}&a=?$kI^Uv6-xSn
z_-h}^Wr-;s>8XZb1%##mQmudn?g-U%5S3IW4MHK8Uqd=aq6VQt`XPdP!N|*6An}MC
zh7b`5i7XChgQT!kF_7qnCK&K=87?;Bj2+mzYe+&xR7?i`$cYei&?wkINL@n=iB=y$
zxM(D#mO&;_IyFdD5@bgzwDy72N}4`m5QerNWXuiJMFKC(M&d$de<0ICAhv)uXb28U
zYCvlfaJ>WV86mU5wG8MyQb^5$jf5_HgLHQI)j@R%q>2Hrhy&dy1gUEv{T&z^RQo_g
zV7(!b2;>$atS4r}>myJ%2Rt7HshH4e96Veeq~SPtrGw0X*GHh52)q{+x!&Q0UG@!U
zKx!fgNlbmjBX13=hRCgtFlr)5-Govfp;bCiF1S7dRXX6oH)IBKeZ(mZsf>)kl@X#o
z;*u~%MyPcV^7-4)`UtHuf*hoPt3HApf(lua03jhY5u^tMTeqOgBBskG3PC7`{~%XS
zXhU;ol@YtJCIms|YKbACMTcGhXjLjW??XkvYyrIhv~dzpp+txJ2x%uH3B4*xbm8rh
zfd;Hd8E*t#3<^Fd0CEKn9un$1ytcz@1acVQTd<+AMoe!PtOj{qG)k2L?!Q3$I50L^
z4MGf8OwSL5bOXd-NL0sPRL37A2$6!3klqcXCKA^3MefKTS0c!4wAu%m3$KG<ofpt%
zA5evYH0H(!KKB5!ni@8t1F2aMBxF7aQURfopxOtf3(6J*EpG#jut82d09QBADhZhl
zsgfWfNZlFat_>O+bUXrNEgbabZlntuAoUJb5;VD^>H@E8AoUS4373K}xFL(N!SxZS
zVp4O092|$0gfGknF_avMtdF3T5lYVpdfOdr*v%SJ2Vs)nP7wOSY`7rgL^x2Dgw!(v
zcZ(1`ATSryMFKM*;-Eu&;FCmH8Qd~vT+*iS+6OWy2O%NzL7dV?5C)ut47<TeSRDlE
z8G%UH9pB*Fg^;Qw4oPDWLOSFJvjT$DL6DjSQm252=0KA~;LZ`WX9TK%#I@PMh)qn3
zO;j5sBBsM83SBz~sg+p7K=l!XgjYalwGT2EUK2qoAT*Mgnh2r>lG7o3_mFEM2ph9L
zg3b<rYb%HvT9A+?H|<DhEr48q;jIzi>)|02>jX(+PCtYA4(bxTwv$>7A@_qss8k<8
zYbA{S3o*41rW#1S0I!V1bORuL5QsQ338_s$i`I~;4CLAeD;uH<Qhy;UK&yt}TuA)_
zAqmz;f|{U796n7CK}}E8%E${c_y!>%>*XM|40!4XR3-6gVyTHBbr7Vp17Q=aj|9{}
zm64#j8)Sry4)qa_k|VSq1m2?xsf>7(?coei1%$R^2v#3Ks~br72bJVkbl``JAk|0k
zY6x<j5SR;InE<}}0W>89J6Ic1^}tseU}Zq+BXD(tsCU4%45Ytf36TQ#hCqEEuoQL%
zr;Hiod^u=M1RjWkS3__HybeNUz-uB11707&sv&(C1ABc0>F>Zu^s^M;H4&Tv8Eu1+
zNEZ^pD<g1y1g~yjH4&s@Lexjd^$spJS_Op0h0ef2_TS+r!56PU>LX~~gftot6$dYi
z<=6G+*Y$_iP&BTOh_B-yr8*5saDRbd1%#%6`1%Oya<DA~dND|9ptTrseTu|}nh6<O
zgXn^=!TW14!xo|*Ssb$lAw~pV3*cv<uN;7^UIUZJbqBH`5OH|*f!3pe$ihj`?jEGd
z3sTEKNQ~+O+U0@BLP$u@2tuOubl?>Yzm_MTCTK_vJevbyKr0$Z1q7*9;3Ry02Nwf8
z&W0F;L#>aXRS)vvJZS5#p`&f!D}f-L8e|eO$c9GpDm#HLutOuk_u0Wu%!V*PH+O?;
z7D#;rsdq4!WJAP}NzefcN_KcEBVI)Z$O<}MMF-H%AaGp+u6iI#<3J?%P6zM}ci?;O
zcoeN6B4849*8|qN23{+HCVD_C;^5T~lmV-8p!E^BCW5fR)ez`NANbim2nI?W1gVK2
zq^t=yWPlDlC4^iDAurnoS3?Gz(oh7ck6?8WyfV^<+_(xQ!M!AQ2_p~!9q9vKEQeAb
z!RsJo2BhPILPBdI4K^_iQ0GWY3yowI)kK~TViwh67S&=A)&x~UqMEEQ#38B)LLyr1
z!kQdl5?TR?Xs}~LB-KZtIb3j!Lri@Hsg<y(M6SgsW+Ru4v|vL@V4_HHd5%`y5abeN
zGOUgO9SZ<nPeOcs1UDa2G2!BZ#KG<-H4KTVebCe(S0ALY!8H-2w+^pUkQwl94yYOe
z&(uK9e}hyXSV@R>OetLT5oDz_gyhxofFnrdf=NPFvLRJQC}&^8t06c8S2Y9?hfMuI
z`bQ9w*7XsjW+9_K0v(hM?gSC6kGSET9eDQ#8v{}wfhr>fd)QbUs2YOTO5l12>0Tj}
z8&?U}N1QTd;JOJ~4Po?yAaxKj39XMHOSRFq5uw#bkXa$nz#Wci2(2;#PXj?`d^n_#
zD<A|Ld~`N^kj;Qo+=x>gTp39i!FooJDhX5{A=f@=Y!)%lqz_7EB&NeIt_`k$u-8Y>
z>IUhYA4q)!s*FUmpt7QxEbtl!Ha!HbjD$6@Rz~b18j$=*JP8>OL9S(>MGCmAK<*_$
z*%a3}$aZ0}p|&HPicDP*+~u>ckUkm$?PtNQ9$i*I-%k)$(TGxMbPKKI2lX$&Rv?Q*
z=2;*jkog@58$uHAeuzqt71#nFmpBz-N7o<Jh(hYuAg5kP&4O8*K*TXg<bDliZGtHR
zA5Y^)s*HHGuq-wZ1a*s`T_JdN17|??1YweJ1(50nJE`Hqui*mO0R)@(0j)QHu|eBW
zA#Ajs4q`qCfBy)4rz-OOL6Ghb{+oj!M`vT!K77iye9E@GN;VLZR|z`Z17n~p$p$S^
zfb@Wn_W^->Ly$EIu$l-mA_uCE;MEY9yd|8C#=tpJ2R_M%Pr;l|0k%I#9#kK}IziwX
zN7fv?707~9#)4bMf=AYpN7j-X#5P0R?|`v=719ZU>|KShk-I|NQbs(|h|NUc-jES8
ziK9LOkIqSgNNh7cpt=cC6M?6J40t3!h)YZljC8ofuprF!*lc22u*yga+&O|&NgD84
z2F?I=d_+OL9u9cL1nw5Gh^VoMsG-z2$n_DtlETKo%%ONhKpQ2%yUQW95`=`529Q=O
zv<`ywp&;ZCua7WmK$wUR<a%f%5_E$AL>5w3<0l35{RF^AKttA&L8=2{NRo<lh#Mfr
zVrqnlBa;yI@Cy+j-hq=4SvZ&A?WLGD5-9>rRN!R^5H^B@j4D%HA3^#!)T)o*^$3Ik
zuZ-Xo3Pc=E!fPVP6|0a<K!V@{vEf><GC<Qn>dt)X&X5`hMq;UtFe@Xp`UtJp!wXx7
z0Ga5491RCyL+ToWBqyw=1Fw}p^$Su3#G_~ptA^mUk{!H?0V}qH_JGK$k04bNghamQ
z4&3p9E<b==8w4GN1CO>rH>iTQtioy{NIwWvAAu_)Gfo+3eFUzFKot|D2L$OHVUm#A
z2XZ_D=5hrtNkeWalnMyBGUAjr!e1Xjsv9H{vH}57A3<h<z||0>GSWk?j39LoG6|m;
z0x?jA<}_GE)nN!R3J0l@FzX{05e+aRTpzIstFsBKqtsd8>IpU{3%VhN7&h#zdv6Q|
z;`Dn+Jp~~_O<70*LL5nCc{OO1fo`IK?dS)wA!RkB+$N3$b+V9a1pI9BJPt7vt6qo{
zoJ3wBKnxqJnfL_}H4Bb91+G9?&sS8}2l<##FdMYg4YLP?yq{D^%L~4Q0KAP9w5|YB
z0iltQ`UtZhgi8due*{?t2O;703mOA!ox%@35&_b|K_hXghlqnJBQ;k6b!Q~xD4_0$
z#Dz-ngZnuA;1zBVHe}?DSJ@eioM2oS18MC6qz;1AN02?I5E5R?pfMn_0?PKtyHK(2
z27=T$FcMTH@hjQzDOzLe`#^d?2oj<OPJ$<cP!`KUDji-0E7(do5Cd}cDntrSLMtO#
z=yj`*`UtW`2(3PXRYst$5V!(@)I<;xEC`+q;*c?hS4qeV6(IEyhm;|tx&iGog6<)L
zFWv^#N^k|>Y6!ka8+p7A)F(o!fWTD`Xum3m0Uo%6?pg&Ok^tHY#18Eiq0~pnbr6IN
z9+X3$8-fc$>K!Nv=>&m$K+u{9Ha7%cG6$}VG}u9%BWV8!Yo!CJjIff#)IrE<FsdPV
z#YCO@2woNu$-rOqVagI|EL<6;38*5VIta5qg49X`NpOV)t&dQRh28;5e?emEAY?W0
zDI0i*K^VBiA+p4i;0g$OG8>EyI_w&2eI%p>y`>aV2O*Pa)ewXWUipS)s7*k_9a0~`
z84#|ZCg`A1$W#s*iB=5}?C+peLvSvn-hr;oMp~5(sfi#Ya_z&f<^V=ckV*%Qgw!lh
z61*52Tp2l|S4PTM7G^{4ae&t~0<gp4V6_QioDDLw10gYG;UX9{6sSIeZBPYmJB4?Y
zkQc?l+29HYqzs9Hr9OgJIuHhCeT1ltQ0pVel59wQ1nCXID<e+GQUuVlZF5Nfh)dcW
zG9?7*2SLW=AS6TtLb6L6LC(;I)JI5rh={6>AWOBu^GAl5l@X*91STOX;y~39p85z<
z0ilubst1h$t%Ja|50p))K4KRJO&EdajMTtY5@N*y)~W}qAfC}V98#XhTgc&TTqTTv
zmJdk?mpMa49MUT!SQEh&z|ZhRuGH|dNvo>h+i-}i+Te8!@)9*T8zKwmf|d;s>f=xv
zaj=s@Asrbc61w0Gtzv@oU*OdToB<I>CLt9HTmgK94L+oXl>yO(M)GQa7Pi5L&Y;sb
zkSYdJgTPP2hF3QbS;#^-2pggut{xWyG^z&c`+#d7ob?g38iI9vpeMqC&%}oGebBl&
z{3;HDDh}{^2U!+UD?wMm!3O1cQBRG7Z$yPxKxhm|bpuxhS%(0xkNCioL_CV%LwjH=
zwBeNzH`1OUaD9YQ6QNBH@yT2AfmX^P2IwGl6NH4U!G_i~;CUZtj|RL&2v!q;M(3bv
zz^9l(Yag&0(6R*h-c_Xa3CQ&kqymDR)dTPT!0RK(cpbNt5tk&qH-yPR9Hf9&8F5O0
zM&#H;b-|Sp^!x+}8?!#*6x9VI&^dBwl@aJ<IoRkNw9-MYfY8{u>m&Fo1<d{tWJC^Y
z)q_<K(oiIxgjYZq3@_~U*Jyo&cLoh_)q~#o64pa>LMbihvDG8s@iAY_N(Z?DLSvKR
z4D9Jd8%z2D84f$Y8mT4%?H)z$`@q@Y{t;;N4y1B{)HRT51+4~wbCK0RRN^P$8X*kG
zf;VF7BeePjQu|<%Xv)yIkhR(1Y6#vT!d@T2D<k;EALv;Jux<`=bpvO^>m9fVgn_H?
z0}+RiNEHoYkPT8LL24pM<pN=YNziHp@L(NcypCJJhD+WWq7sb+S3~gr5qf<DscRr4
zuKEbxGeT4}gz6&>X;TP-*GK3Ky!8>gv=OVM0eln=xjup}(#BmMK{`he61hHt)H|po
zdjAM=S^~Hlf>cJjm^BeZ1i4#8ph_aQKEhh>;1VRhCW5HMsD|L(B`oC$sN@-u^%10(
zM34lp2Y^;OxCcVvBcc!n4XY&3(mR|dt$}83&?+NH{|FSz7_|v#=?jJ+R+ab#(W)T`
z7hH)Tj%$P0M}nHr6Rk1p6iD@fNkUZetGhuM5E9OXGa%vupz9c*N4i03A|7?<9jTZ#
z3q%C1LV<H3vX~@X0i^1|N+Q=syef8J<N&XL;0)vqK0M0y<kd%z3I$%{K)B%A1XSt3
zXLI1a8;BHEac%`0h%Af*)kl!74WzDtS3q3y)|d#g02_L$j{-`41gVA~)eR50a|G`n
zK?IRWL}esn2ATAM^pC*PL(p{#9Ma}s1g(rXV7(zuDHDSA5u|4XC*hS5yiWw_7GbTA
zAgkkGBxs#BT786C4S@#ekWP&Qo!A3ACjrE!PkjXM9HEWZ!Do(eRXte6A+-$gB&qcg
zzos`y2!H8>DGO>sV{7W;62~+FzX&dKaETMFgU}Sf`$vPJJ`w^g9)Q+6XyafIE~Gw!
zupuP=(4$-yQUSrYmx5~|$czv6`UtYR0A78-8JIN@L<Ft?a-{-%JPj8EQVk)KuoK>(
z!*pm>47`3pW5AW6?^i`Sp9gDY1Ui!kHUtN*dT`c9yebaJRT3HX5v2BkkITWiunNT%
zYrVsxh*KP{5zGMXSA|q1B-KaYDhae!0iqX9LMj~)SJoU-A3;RWNc{B?xEg{~M&^*7
z5rjmmh9D=)fk<i4f&|D4I>_n;cufRhAlFCmItZBoR{*PqAe)8^AT<$$g!GTFRz?F@
zA3?_AD6fxLh1B4P_?if&5;PuyJPr?I<5<E(<NAp55|7FzV62BntANnBL{~$Q&XG6T
zkpj4?MYuRI>fs?kus(v+LC7RrFYydmLc)<?v=HI~+FnQ%5TqJ{lY^o@f>cI4>aLiJ
zt06TGq)tJrjNmucVd{dY$3;R`XoIUEc!!Am`UqaXKp1?myM!QIWRhC_Bgj}BxH7VZ
zY!QMjhXW175!pWica+fQhL9^A$VuABBFJnC>myJd1i9Y<Lc;1JPH7`t^$~LIgUQCI
zhEVDw$lg_4H4#J{v`8Ct#}KGbM9==wXnjO{p*K(}k=G&;WD9_LcJPV@KJtm5fn!{h
zDuZK$`bWq$5rj=fI1i`#h*tx2Vm6ODsB?r_xghrYVDx&hDn?chsd})IG^&p<`$6z3
diSqggQrAH0BM5u6K7!OuqxBKIbHpm71_1OEJdFSV

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/framework/python/framework/graph_util.py b/tensorflow/contrib/framework/python/framework/graph_util.py
index 9ba9c77b92..a18ff2320d 100644
--- a/tensorflow/contrib/framework/python/framework/graph_util.py
+++ b/tensorflow/contrib/framework/python/framework/graph_util.py
@@ -24,12 +24,14 @@ import six
 # pylint: disable=unused-import
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
+from tensorflow.python.framework import ops
 from tensorflow.python.framework.graph_util_impl import _assert_nodes_are_present
 from tensorflow.python.framework.graph_util_impl import _bfs_for_reachable_nodes
 from tensorflow.python.framework.graph_util_impl import _extract_graph_summary
 from tensorflow.python.framework.graph_util_impl import _node_name
 
-__all__ = ["fuse_op"]
+
+__all__ = ["fuse_op", "get_placeholders"]
 
 
 def fuse_op(graph_def, input_nodes, output_nodes, output_dtypes,
@@ -126,3 +128,27 @@ def fuse_op(graph_def, input_nodes, output_nodes, output_dtypes,
   out.library.CopyFrom(graph_def.library)
   out.versions.CopyFrom(graph_def.versions)
   return out
+
+
+def get_placeholders(graph):
+  """Get placeholders of a graph.
+
+  Args:
+    graph: A tf.Graph.
+  Returns:
+    A list contains all placeholders of given graph.
+
+  Raises:
+    TypeError: If `graph` is not a tensorflow graph.
+  """
+
+  if not isinstance(graph, ops.Graph):
+    raise TypeError("Input graph needs to be a Graph: %s" % graph)
+
+  # For each placeholder() call, there is a corresponding
+  # operation of type 'Placeholder' registered to the graph.
+  # The return value (a Tensor) of placeholder() is the
+  # first output of this operation in fact.
+  operations = graph.get_operations()
+  result = [i.outputs[0] for i in operations if i.type == "Placeholder"]
+  return result
diff --git a/tensorflow/contrib/framework/python/framework/graph_util_test.py b/tensorflow/contrib/framework/python/framework/graph_util_test.py
index 0c531fb290..b8a6d109e1 100644
--- a/tensorflow/contrib/framework/python/framework/graph_util_test.py
+++ b/tensorflow/contrib/framework/python/framework/graph_util_test.py
@@ -21,6 +21,9 @@ from tensorflow.contrib.framework.python.framework import graph_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
 from tensorflow.core.framework import types_pb2
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
@@ -81,5 +84,16 @@ class GraphUtilTest(test.TestCase):
     self.assertEqual(fused_graph_def.node[4].name, 'E')
 
 
+class GetPlaceholdersTest(test.TestCase):
+
+  def test_get_placeholders(self):
+    with ops.Graph().as_default() as g:
+      placeholders = [array_ops.placeholder(dtypes.float32) for _ in range(5)]
+      results = graph_util.get_placeholders(g)
+      self.assertEqual(
+          sorted(placeholders, key=lambda x: x._id),  # pylint: disable=protected-access
+          sorted(results, key=lambda x: x._id))  # pylint: disable=protected-access
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/gan/python/train.py b/tensorflow/contrib/gan/python/train.py
index ad2d5eb86c..e9443f766b 100644
--- a/tensorflow/contrib/gan/python/train.py
+++ b/tensorflow/contrib/gan/python/train.py
@@ -422,7 +422,7 @@ def gan_loss(
     ac_disc_loss = tfgan_losses.acgan_discriminator_loss(
         model, add_summaries=add_summaries)
     dis_loss += aux_cond_discriminator_weight * ac_disc_loss
-  # Gathers auxilliary losses.
+  # Gathers auxiliary losses.
   if model.generator_scope:
     gen_reg_loss = losses.get_regularization_loss(model.generator_scope.name)
   else:
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index f1debc8590..2d42875b46 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -2561,7 +2561,10 @@ def separable_convolution2d(
           regularizer=weights_regularizer,
           trainable=trainable,
           collections=weights_collections)
-      strides = [1, stride_h, stride_w, 1]
+      strides = [1, 1, stride_h,
+                 stride_w] if data_format.startswith('NC') else [
+                     1, stride_h, stride_w, 1
+                 ]
 
       outputs = nn.depthwise_conv2d(inputs, depthwise_weights, strides, padding,
                                     rate=utils.two_element_tuple(rate),
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 27bd3172d6..ae64b75d93 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -3326,16 +3326,24 @@ class SeparableConv2dTest(test.TestCase):
           for model_variable in model_variables:
             self.assertEqual(trainable, model_variable in trainable_variables)
 
-  def testConvNCHW(self):
-    for num_filters, correct_output_filters in [(None, 6), (8, 8)]:
+  def testSepConvNCHW(self):
+    for num_filters, correct_output_filters in zip((None, 5), (6, 5)):
       with self.test_session():
-        batch, height, width = 4, 5, 6
+        batch, height, width = 4, 10, 12
+        kernel_dim, stride = 3, 2
         images = random_ops.random_uniform((batch, 3, height, width), seed=1)
         output = layers_lib.separable_conv2d(
-            images, num_filters, [3, 3], 2, padding='VALID', data_format='NCHW')
-        self.assertListEqual(
-            output.get_shape().as_list(), [batch, correct_output_filters,
-                                           height - 2, width - 2])
+            images,
+            num_outputs=num_filters,
+            kernel_size=[kernel_dim, kernel_dim],
+            depth_multiplier=2,
+            stride=stride,
+            padding='VALID',
+            data_format='NCHW')
+        self.assertListEqual(output.get_shape().as_list(), [
+            batch, correct_output_filters, (height - kernel_dim + 1) // stride,
+            (width - kernel_dim + 1) // stride
+        ])
 
 
 class ScaleGradientTests(test.TestCase):
diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index 96a9e281ad..52460123cc 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -191,6 +191,9 @@ filegroup(
         exclude = [
             "**/METADATA",
             "**/OWNERS",
+            "downloads",
+            "examples",
+            "gen",
         ],
     ),
     visibility = ["//tensorflow:__subpackages__"],
diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile
new file mode 100644
index 0000000000..78402727ab
--- /dev/null
+++ b/tensorflow/contrib/lite/Makefile
@@ -0,0 +1,147 @@
+
+# Find where we're running from, so we can store generated files here.
+ifeq ($(origin MAKEFILE_DIR), undefined)
+	MAKEFILE_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
+endif
+
+# Try to figure out the host system
+HOST_OS :=
+ifeq ($(OS),Windows_NT)
+	HOST_OS = WINDOWS
+else
+	UNAME_S := $(shell uname -s)
+	ifeq ($(UNAME_S),Linux)
+	        HOST_OS := LINUX
+	endif
+	ifeq ($(UNAME_S),Darwin)
+		HOST_OS := OSX
+	endif
+endif
+
+ARCH := $(shell if [[ $(shell uname -m) =~ i[345678]86 ]]; then echo x86_32; else echo $(shell uname -m); fi)
+
+# Where compiled objects are stored.
+OBJDIR := $(MAKEFILE_DIR)/gen/obj/
+BINDIR := $(MAKEFILE_DIR)/gen/bin/
+LIBDIR := $(MAKEFILE_DIR)/gen/lib/
+GENDIR := $(MAKEFILE_DIR)/gen/obj/
+
+# Settings for the host compiler.
+CXX := $(CC_PREFIX) gcc
+CXXFLAGS := --std=c++11 -O3 -DNDEBUG
+CC := $(CC_PREFIX) gcc
+CFLAGS :=
+LDOPTS :=
+LDOPTS += -L/usr/local/lib
+ARFLAGS := -r
+
+INCLUDES := \
+-I. \
+-I$(MAKEFILE_DIR)/../../../ \
+-I$(MAKEFILE_DIR)/downloads/ \
+-I$(MAKEFILE_DIR)/downloads/eigen \
+-I$(MAKEFILE_DIR)/downloads/gemmlowp \
+-I$(MAKEFILE_DIR)/downloads/neon_2_sse \
+-I$(MAKEFILE_DIR)/downloads/farmhash/src \
+-I$(MAKEFILE_DIR)/downloads/flatbuffers/include \
+-I$(GENDIR)
+# This is at the end so any globally-installed frameworks like protobuf don't
+# override local versions in the source tree.
+INCLUDES += -I/usr/local/include
+
+LIBS := \
+-lstdc++ \
+-lpthread \
+-lm \
+-lz
+
+# If we're on Linux, also link in the dl library.
+ifeq ($(OS),LINUX)
+	LIBS += -ldl -lpthread
+endif
+
+include $(MAKEFILE_DIR)/ios_makefile.inc
+
+# This library is the main target for this makefile. It will contain a minimal
+# runtime that can be linked in to other programs.
+LIB_NAME := libtensorflow-lite.a
+LIB_PATH := $(LIBDIR)$(LIB_NAME)
+
+# A small example program that shows how to link against the library.
+BENCHMARK_PATH := $(BINDIR)benchmark_model
+
+BENCHMARK_SRCS := \
+tensorflow/contrib/lite/tools/benchmark_model.cc
+BENCHMARK_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(BENCHMARK_SRCS))))
+
+# What sources we want to compile, must be kept in sync with the main Bazel
+# build files.
+
+CORE_CC_ALL_SRCS := \
+$(wildcard tensorflow/contrib/lite/*.cc) \
+$(wildcard tensorflow/contrib/lite/kernels/*.cc) \
+$(wildcard tensorflow/contrib/lite/kernels/internal/*.cc) \
+$(wildcard tensorflow/contrib/lite/kernels/internal/optimized/*.cc) \
+$(wildcard tensorflow/contrib/lite/kernels/internal/reference/*.cc) \
+$(wildcard tensorflow/contrib/lite/*.c) \
+$(wildcard tensorflow/contrib/lite/kernels/*.c) \
+$(wildcard tensorflow/contrib/lite/kernels/internal/*.c) \
+$(wildcard tensorflow/contrib/lite/kernels/internal/optimized/*.c) \
+$(wildcard tensorflow/contrib/lite/kernels/internal/reference/*.c) \
+$(wildcard tensorflow/contrib/lite/downloads/farmhash/src/farmhash.cc)
+# Remove any duplicates.
+CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS))
+CORE_CC_EXCLUDE_SRCS := \
+$(wildcard tensorflow/contrib/lite/*test.cc) \
+$(wildcard tensorflow/contrib/lite/*/*test.cc) \
+$(wildcard tensorflow/contrib/lite/*/*/*test.cc) \
+$(wildcard tensorflow/contrib/lite/*/*/*/*test.cc) \
+$(wildcard tensorflow/contrib/lite/kernels/test_util.cc) \
+$(BENCHMARK_SRCS)
+# Filter out all the excluded files.
+TF_LITE_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS))
+# File names of the intermediate files target compilation generates.
+TF_LITE_CC_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(TF_LITE_CC_SRCS))))
+LIB_OBJS := $(TF_LITE_CC_OBJS)
+
+# For normal manually-created TensorFlow C++ source files.
+$(OBJDIR)%.o: %.cc
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
+
+# For normal manually-created TensorFlow C++ source files.
+$(OBJDIR)%.o: %.c
+	@mkdir -p $(dir $@)
+	$(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@
+
+# The target that's compiled if there's no command-line arguments.
+all: $(LIB_PATH) $(BENCHMARK_PATH)
+
+# Gathers together all the objects we've compiled into a single '.a' archive.
+$(LIB_PATH): $(LIB_OBJS)
+	@mkdir -p $(dir $@)
+	$(AR) $(ARFLAGS) $(LIB_PATH) $(LIB_OBJS)
+
+$(BENCHMARK_PATH): $(BENCHMARK_OBJS) $(LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(BENCHMARK_PATH) $(BENCHMARK_OBJS) \
+	$(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS)
+
+# Gets rid of all generated files.
+clean:
+	rm -rf $(MAKEFILE_DIR)/gen
+
+# Gets rid of target files only, leaving the host alone. Also leaves the lib
+# directory untouched deliberately, so we can persist multiple architectures
+# across builds for iOS and Android.
+cleantarget:
+	rm -rf $(OBJDIR)
+	rm -rf $(BINDIR)
+
+$(DEPDIR)/%.d: ;
+.PRECIOUS: $(DEPDIR)/%.d
+
+-include $(patsubst %,$(DEPDIR)/%.d,$(basename $(TF_CC_SRCS)))
diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index 385ccf4680..c7464bcc9d 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -1,5 +1,5 @@
 # TensorFlow Lite
-TensorFlow Lite is TensorFlow’s lightweight solution for mobile and embedded devices. It enables low-latency inference of on-device machine learning models with a small binary size and fast performance supporting hardware acceleration.
+TensorFlow Lite is TensorFlow's lightweight solution for mobile and embedded devices. It enables low-latency inference of on-device machine learning models with a small binary size and fast performance supporting hardware acceleration.
 
 TensorFlow Lite uses many techniques for achieving low latency like optimizing the kernels for specific mobile apps, pre-fused activations, quantized kernels that allow smaller and faster (fixed-point math) models, and in the future, leverage specialized machine learning hardware to get the best possible performance for a particular model on a particular device.
 
@@ -20,18 +20,18 @@ In the demo app, inference is done using the TensorFlow Lite Java API. The demo
 The  fastest path to trying the demo, is to download the pre-built binary
 [TfLiteCameraDemo.apk](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk)
 
-Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera’s field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified.
+Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera's field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified.
 
 ## Building in Android Studio using TensorFlow Lite AAR from JCenter
 The simplest way to compile the demo app, and try out changes to the project code is to use AndroidStudio.
 
  - Install the latest version of Android Studio 3 as specified [here](https://developer.android.com/studio/index.html).
  - Make sure the Android SDK version is greater than 26 and NDK version is greater than 14 (in the Android Studio Settings).
- - Import the tensorflow/contrib/lite/java/demo directory as a new Android Studio project.
+ - Import the `tensorflow/contrib/lite/java/demo` directory as a new Android Studio project.
  - Click through installing all the Gradle extensions it requests.
  - Download the quantized Mobilenet TensorFlow Lite model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip)
      - unzip and copy mobilenet_quant_v1_224.tflite to the assets directory:
-       tensorflow/contrib/lite/java/demo/app/src/main/assets/
+       `tensorflow/contrib/lite/java/demo/app/src/main/assets/`
  - Build and run the demo app
 
 ## Building TensorFlow Lite and the demo app from source
@@ -43,39 +43,45 @@ The simplest way to compile the demo app, and try out changes to the project cod
 ### Install Bazel
 If bazel is not installed on your system, install it now by following [these directions](https://bazel.build/versions/master/docs/install.html)
 
-NOTE: Bazel does not currently support building for Android on Windows. Full support for gradle/cmake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) instead.
+NOTE: Bazel does not fully support building Android on Windows yet. Full support for Gradle/CMake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) instead.
 
 ### Install Android NDK and SDK
 Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and SDK must be installed on your system.
  - Install the latest version of Bazel as per the instructions on the [Bazel website](https://bazel.build/versions/master/docs/install.html)
- - The Android NDK is required to build the native (C/C++) TensorFlow code. The current recommended version is 14b, which may be found [here](https://developer.android.com/tools/revisions/build-tools.html).
- - The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android Studio](https://developer.android.com/studio/index.html). Build tools API >= 23 is required to build the TensorFlow Android demo (though it will run on API >= 21 devices).
-
  - The Android NDK is required to build the native (C/C++) TensorFlow Lite code. The current recommended version is 14b, which can be found [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-14b-downloads).
-
  - The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android Studio](https://developer.android.com/studio/index.html). Build tools API >= 23 is required to build the TF Android demo (though it will run on API >= 21 devices).
  - In the root of the TensorFlow repository update the `WORKSPACE` file with the `api_level` and location of the SDK and NDK. If you installed it with AndroidStudio the SDK path can be found in the SDK manager, and the default NDK path is:`{SDK path}/ndk-bundle.`
 
 ```
- Android_sdk_repository (
-   name = "androidsdk",
-   api_level = 23,
-   build_tools_version = "23.0.2",
-   path = "/home/xxxx/android-sdk-linux/", )
+android_sdk_repository (
+    name = "androidsdk",
+    api_level = 23,
+    build_tools_version = "23.0.2",
+    path = "/home/xxxx/android-sdk-linux/",
+)
 
 android_ndk_repository(
-  name="androidndk",
-  path="/home/xxxx/android-ndk-r10e/",
-  api_level=19)
-
+    name = "androidndk",
+    path = "/home/xxxx/android-ndk-r10e/",
+    api_level = 19,
+)
 ```
-Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md)
+
+Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md).
 
 ### Build the  source code
 Run bazel with the following command to build the demo.
 
 Build the demo app:
-bazel build --cxxopt='--std=c++11' //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo
+
+```
+bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo
+```
+
+### Note
+
+Currently, we only support building the Android demo app within a Python 2
+environment (due to a Bazel bug).
 
 ### More about the demo
 The demo is resizing each camera image frame to (224 width * 224 height) to match the  quantized Mobilenet model being used. The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch 224 * 224 is the width and height of the image 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. The Mobilenet model has 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The Mobilenet quantized model is bundled within the assets directory of the app.
@@ -95,7 +101,7 @@ The demo is resizing each camera image frame to (224 width * 224 height) to matc
 
 [On Device Smart Reply](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html)  is an on-device model which provides one-touch replies for an incoming text message by suggesting contextually relevant messages. The model is built specifically for memory constrained devices such as watches & phones and it has been successfully used to surface [Smart Replies on Android Wear](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html). Note that this model only works on Android as of now.
 
-These pre-trained models can be downloaded from [here](models.md).
+These pre-trained models can be downloaded from [here](g3doc/models.md).
 
 ### Retrain Inception-V3 or MobileNet for a custom data set
 The above pre-trained models have been trained on the ImageNet data set, which consists of 1000 predefined classes. A model will need to be re-trained if these classes are not relevant or useful for a given use case. This technique is called transfer learning, which starts with a model that has been already trained on a problem and will then be retrained on a similar problem. Deep learning from scratch can take days, but transfer learning can be done fairly quickly. In order to do this, a developer will need to generate their custom data set labeled with the relevant classes.
@@ -104,7 +110,7 @@ The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tenso
 
 
 ### Train a custom model
-A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow’s Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model.
+A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow's Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model.
 
 TensorFlow Lite currently supports a subset of TensorFlow operators. Please refer to [this document](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for details of supported operators and their usage. This
 set will continue to expand in future releases of Tensorflow Lite.
@@ -128,9 +134,9 @@ Since we employ several formats, the following definitions may be useful:
  - TensorFlow lite model (.lite) - a serialized flatbuffer, containing TensorFlow lite operators and Tensors for the TensorFlow lite interpreter. This is most analogous to TensorFlow frozen GraphDefs.
 
 ### Freeze Graph
-To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as “freezing” the graph.
+To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as "freezing" the graph.
 
-The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md)
+The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md)).
 
 Graph freezing can be done using the command below (and modifying the arguments appropriately)
 
@@ -155,7 +161,7 @@ Here is a sample command line to convert the frozen Graphdef to '.lite' format f
 bazel build tensorflow/contrib/lite/toco:toco
 
 bazel-bin/tensorflow/contrib/lite/toco/toco -- \
-  --input_file=(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \
+  --input_file=$(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \
   --input_format=TENSORFLOW_GRAPHDEF  --output_format=TFLITE \
   --output_file=/tmp/mobilenet_v1_1.0_224.lite --inference_type=FLOAT \
   --input_type=FLOAT --input_arrays=input \
@@ -183,18 +189,18 @@ with tf.Session() as sess:
 ```
 For detailed instructions on how to use the Tensorflow Optimizing Converter, please see [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md).
 
-You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tf_ops_compatibility.md) for troubleshooting help. If that doesn’t help, please file an [issue](https://github.com/tensorflow/tensorflow/issues).
+You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for troubleshooting help. If that doesn't help, please file an [issue](https://github.com/tensorflow/tensorflow/issues).
 
 ## Step 3. Use the TensorFlow Lite model for inference in a mobile app
 
 After completion of Step 2 the developer should have a .lite model.
 
 ### For Android
-Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app).
+Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app).
 
-The [demo app] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app)  uses this interface, so it’s a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk).
+The [demo app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app)  uses this interface, so it's a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk).
 
-Note that you’d need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build).
+Note that you'd need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build).
 
 ### For iOS
-Follow the documentation [here](https://github.com/TensorFlow/TensorFlow/blob/master/TensorFlow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app.
+Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app.
diff --git a/tensorflow/contrib/lite/build_ios_universal_lib.sh b/tensorflow/contrib/lite/build_ios_universal_lib.sh
new file mode 100755
index 0000000000..cbc96e6edd
--- /dev/null
+++ b/tensorflow/contrib/lite/build_ios_universal_lib.sh
@@ -0,0 +1,31 @@
+#!/bin/bash -x
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=x86_64 -j 8
+make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=i386 -j 8
+make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7 -j 8
+make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7s -j 8
+make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=arm64 -j 8
+
+lipo \
+tensorflow/contrib/lite/gen/lib/ios_x86_64/libtensorflow-lite.a \
+tensorflow/contrib/lite/gen/lib/ios_i386/libtensorflow-lite.a \
+tensorflow/contrib/lite/gen/lib/ios_armv7/libtensorflow-lite.a \
+tensorflow/contrib/lite/gen/lib/ios_armv7s/libtensorflow-lite.a \
+tensorflow/contrib/lite/gen/lib/ios_arm64/libtensorflow-lite.a \
+-create \
+-output tensorflow/contrib/lite/gen/lib/libtensorflow-lite.a
diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh
new file mode 100755
index 0000000000..778d618361
--- /dev/null
+++ b/tensorflow/contrib/lite/download_dependencies.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+DOWNLOADS_DIR=tensorflow/contrib/lite/downloads
+BZL_FILE_PATH=tensorflow/workspace.bzl
+
+EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
+GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
+GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
+ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
+NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip"
+FARMHASH_URL="https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz"
+FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/master.zip"
+MODELS_URL="https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_224_ios_lite_float_2017_11_08.zip"
+QUANTIZED_MODELS_URL="https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip"
+
+# TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
+#                   so work around it by patching the source.
+replace_by_sed() {
+  local regex="${1}"
+  shift
+  # Detect the version of sed by the return value of "--version" flag. GNU-sed
+  # supports "--version" while BSD-sed doesn't.
+  if ! sed --version >/dev/null 2>&1; then
+    # BSD-sed.
+    sed -i '' -e "${regex}" "$@"
+  else
+    # GNU-sed.
+    sed -i -e "${regex}" "$@"
+  fi
+}
+
+download_and_extract() {
+  local usage="Usage: download_and_extract URL DIR"
+  local url="${1:?${usage}}"
+  local dir="${2:?${usage}}"
+  echo "downloading ${url}" >&2
+  mkdir -p "${dir}"
+  if [[ "${url}" == *gz ]]; then
+    curl -Ls "${url}" | tar -C "${dir}" --strip-components=1 -xz
+  elif [[ "${url}" == *zip ]]; then
+    tempdir=$(mktemp -d)
+    tempdir2=$(mktemp -d)
+
+    curl -L ${url} > ${tempdir}/zipped.zip
+    unzip ${tempdir}/zipped.zip -d ${tempdir2}
+
+    # If the zip file contains nested directories, extract the files from the
+    # inner directory.
+    if ls ${tempdir2}/*/* 1> /dev/null 2>&1; then
+      # unzip has no strip components, so unzip to a temp dir, and move the
+      # files we want from the tempdir to destination.
+      cp -R ${tempdir2}/*/* ${dir}/
+    else
+      cp -R ${tempdir2}/* ${dir}/
+    fi
+    rm -rf ${tempdir2} ${tempdir}
+  fi
+
+  # Delete any potential BUILD files, which would interfere with Bazel builds.
+  find "${dir}" -type f -name '*BUILD' -delete
+}
+
+download_and_extract "${EIGEN_URL}" "${DOWNLOADS_DIR}/eigen"
+download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp"
+download_and_extract "${GOOGLETEST_URL}" "${DOWNLOADS_DIR}/googletest"
+download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl"
+download_and_extract "${NEON_2_SSE_URL}" "${DOWNLOADS_DIR}/neon_2_sse"
+download_and_extract "${FARMHASH_URL}" "${DOWNLOADS_DIR}/farmhash"
+download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers"
+download_and_extract "${MODELS_URL}" "${DOWNLOADS_DIR}/models"
+download_and_extract "${QUANTIZED_MODELS_URL}" "${DOWNLOADS_DIR}/quantized_models"
+
+replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
+  "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h"
+replace_by_sed 's#static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );#static uint32x2_t p2ui_CONJ_XOR;// = vld1_u32( conj_XOR_DATA ); - Removed by scripts#' \
+  "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h"
+replace_by_sed 's#static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );#static uint64x2_t p2ul_CONJ_XOR;// = vld1q_u64( p2ul_conj_XOR_DATA ); - Removed by script#' \
+  "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h"
+
+cp ${DOWNLOADS_DIR}/models/models/* tensorflow/contrib/lite/examples/ios/simple/data/
+cp ${DOWNLOADS_DIR}/quantized_models/* tensorflow/contrib/lite/examples/ios/camera/data/
+
+echo "download_dependencies.sh completed successfully." >&2
diff --git a/tensorflow/contrib/lite/examples/ios/camera/.gitignore b/tensorflow/contrib/lite/examples/ios/camera/.gitignore
new file mode 100644
index 0000000000..9e8962f4c6
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/camera/.gitignore
@@ -0,0 +1,2 @@
+/data/*.txt
+/data/*.tflite
diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h
new file mode 100644
index 0000000000..55891c3ee1
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.h
@@ -0,0 +1,21 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <UIKit/UIKit.h>
+
+@interface CameraExampleAppDelegate : UIResponder<UIApplicationDelegate>
+
+@property(strong, nonatomic) UIWindow* window;
+
+@end
diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m
new file mode 100644
index 0000000000..128266d53f
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleAppDelegate.m
@@ -0,0 +1,44 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "CameraExampleAppDelegate.h"
+
+@implementation CameraExampleAppDelegate
+
+@synthesize window = _window;
+
+- (BOOL)application:(UIApplication *)application
+    didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
+  [self.window makeKeyAndVisible];
+  return YES;
+}
+
+- (void)applicationWillResignActive:(UIApplication *)application {
+  [[UIApplication sharedApplication] setIdleTimerDisabled:NO];
+}
+
+- (void)applicationDidEnterBackground:(UIApplication *)application {
+}
+
+- (void)applicationWillEnterForeground:(UIApplication *)application {
+}
+
+- (void)applicationDidBecomeActive:(UIApplication *)application {
+  [[UIApplication sharedApplication] setIdleTimerDisabled:YES];
+}
+
+- (void)applicationWillTerminate:(UIApplication *)application {
+}
+
+@end
diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h
new file mode 100644
index 0000000000..fb5800e86d
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.h
@@ -0,0 +1,48 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <AVFoundation/AVFoundation.h>
+#import <UIKit/UIKit.h>
+
+#include <vector>
+
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+
+@interface CameraExampleViewController
+    : UIViewController<UIGestureRecognizerDelegate, AVCaptureVideoDataOutputSampleBufferDelegate> {
+  IBOutlet UIView* previewView;
+  AVCaptureVideoPreviewLayer* previewLayer;
+  AVCaptureVideoDataOutput* videoDataOutput;
+  dispatch_queue_t videoDataOutputQueue;
+  UIView* flashView;
+  BOOL isUsingFrontFacingCamera;
+  NSMutableDictionary* oldPredictionValues;
+  NSMutableArray* labelLayers;
+  AVCaptureSession* session;
+
+  std::vector<std::string> labels;
+  std::unique_ptr<tflite::FlatBufferModel> model;
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  std::unique_ptr<tflite::Interpreter> interpreter;
+
+  double total_latency;
+  int total_count;
+}
+@property(strong, nonatomic) CATextLayer* predictionTextLayer;
+
+- (IBAction)takePicture:(id)sender;
+- (IBAction)switchCameras:(id)sender;
+
+@end
diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
new file mode 100644
index 0000000000..ea398ad14e
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
@@ -0,0 +1,506 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "CameraExampleViewController.h"
+#import <AssertMacros.h>
+#import <AssetsLibrary/AssetsLibrary.h>
+#import <CoreImage/CoreImage.h>
+#import <ImageIO/ImageIO.h>
+
+#include <sys/time.h>
+#include <fstream>
+#include <iostream>
+#include <queue>
+
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/string_util.h"
+#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h"
+
+#define LOG(x) std::cerr
+
+// If you have your own model, modify this to the file name, and make sure
+// you've added the file to your app resources too.
+static NSString* model_file_name = @"mobilenet_quant_v1_224";
+static NSString* model_file_type = @"tflite";
+
+// If you have your own model, point this to the labels file.
+static NSString* labels_file_name = @"labels";
+static NSString* labels_file_type = @"txt";
+
+// These dimensions need to match those the model was trained with.
+static const int wanted_input_width = 224;
+static const int wanted_input_height = 224;
+static const int wanted_input_channels = 3;
+
+static NSString* FilePathForResourceName(NSString* name, NSString* extension) {
+  NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension];
+  if (file_path == NULL) {
+    LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "." << [extension UTF8String]
+               << "' in bundle.";
+  }
+  return file_path;
+}
+
+static void LoadLabels(NSString* file_name, NSString* file_type,
+                       std::vector<std::string>* label_strings) {
+  NSString* labels_path = FilePathForResourceName(file_name, file_type);
+  if (!labels_path) {
+    LOG(ERROR) << "Failed to find model proto at" << [file_name UTF8String]
+               << [file_type UTF8String];
+  }
+  std::ifstream t;
+  t.open([labels_path UTF8String]);
+  std::string line;
+  while (t) {
+    std::getline(t, line);
+    label_strings->push_back(line);
+  }
+  t.close();
+}
+
+// Returns the top N confidence values over threshold in the provided vector,
+// sorted by confidence in descending order.
+static void GetTopN(const uint8_t* prediction, const int prediction_size, const int num_results,
+                    const float threshold, std::vector<std::pair<float, int>>* top_results) {
+  // Will contain top N results in ascending order.
+  std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>,
+                      std::greater<std::pair<float, int>>>
+      top_result_pq;
+
+  const long count = prediction_size;
+  for (int i = 0; i < count; ++i) {
+    const float value = prediction[i] / 255.0;
+    // Only add it if it beats the threshold and has a chance at being in
+    // the top N.
+    if (value < threshold) {
+      continue;
+    }
+
+    top_result_pq.push(std::pair<float, int>(value, i));
+
+    // If at capacity, kick the smallest value out.
+    if (top_result_pq.size() > num_results) {
+      top_result_pq.pop();
+    }
+  }
+
+  // Copy to output vector and reverse into descending order.
+  while (!top_result_pq.empty()) {
+    top_results->push_back(top_result_pq.top());
+    top_result_pq.pop();
+  }
+  std::reverse(top_results->begin(), top_results->end());
+}
+
+@interface CameraExampleViewController (InternalMethods)
+- (void)setupAVCapture;
+- (void)teardownAVCapture;
+@end
+
+@implementation CameraExampleViewController
+
+- (void)setupAVCapture {
+  NSError* error = nil;
+
+  session = [AVCaptureSession new];
+  if ([[UIDevice currentDevice] userInterfaceIdiom] == UIUserInterfaceIdiomPhone)
+    [session setSessionPreset:AVCaptureSessionPreset640x480];
+  else
+    [session setSessionPreset:AVCaptureSessionPresetPhoto];
+
+  AVCaptureDevice* device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
+  AVCaptureDeviceInput* deviceInput =
+      [AVCaptureDeviceInput deviceInputWithDevice:device error:&error];
+  assert(error == nil);
+
+  if ([session canAddInput:deviceInput]) [session addInput:deviceInput];
+
+  videoDataOutput = [AVCaptureVideoDataOutput new];
+
+  NSDictionary* rgbOutputSettings =
+      [NSDictionary dictionaryWithObject:[NSNumber numberWithInt:kCMPixelFormat_32BGRA]
+                                  forKey:(id)kCVPixelBufferPixelFormatTypeKey];
+  [videoDataOutput setVideoSettings:rgbOutputSettings];
+  [videoDataOutput setAlwaysDiscardsLateVideoFrames:YES];
+  videoDataOutputQueue = dispatch_queue_create("VideoDataOutputQueue", DISPATCH_QUEUE_SERIAL);
+  [videoDataOutput setSampleBufferDelegate:self queue:videoDataOutputQueue];
+
+  if ([session canAddOutput:videoDataOutput]) [session addOutput:videoDataOutput];
+  [[videoDataOutput connectionWithMediaType:AVMediaTypeVideo] setEnabled:YES];
+
+  previewLayer = [[AVCaptureVideoPreviewLayer alloc] initWithSession:session];
+  [previewLayer setBackgroundColor:[[UIColor blackColor] CGColor]];
+  [previewLayer setVideoGravity:AVLayerVideoGravityResizeAspect];
+  CALayer* rootLayer = [previewView layer];
+  [rootLayer setMasksToBounds:YES];
+  [previewLayer setFrame:[rootLayer bounds]];
+  [rootLayer addSublayer:previewLayer];
+  [session startRunning];
+
+  if (error) {
+    NSString* title = [NSString stringWithFormat:@"Failed with error %d", (int)[error code]];
+    UIAlertController* alertController =
+        [UIAlertController alertControllerWithTitle:title
+                                            message:[error localizedDescription]
+                                     preferredStyle:UIAlertControllerStyleAlert];
+    UIAlertAction* dismiss =
+        [UIAlertAction actionWithTitle:@"Dismiss" style:UIAlertActionStyleDefault handler:nil];
+    [alertController addAction:dismiss];
+    [self presentViewController:alertController animated:YES completion:nil];
+    [self teardownAVCapture];
+  }
+}
+
+- (void)teardownAVCapture {
+  [previewLayer removeFromSuperlayer];
+}
+
+- (AVCaptureVideoOrientation)avOrientationForDeviceOrientation:
+    (UIDeviceOrientation)deviceOrientation {
+  AVCaptureVideoOrientation result = (AVCaptureVideoOrientation)(deviceOrientation);
+  if (deviceOrientation == UIDeviceOrientationLandscapeLeft)
+    result = AVCaptureVideoOrientationLandscapeRight;
+  else if (deviceOrientation == UIDeviceOrientationLandscapeRight)
+    result = AVCaptureVideoOrientationLandscapeLeft;
+  return result;
+}
+
+- (IBAction)takePicture:(id)sender {
+  if ([session isRunning]) {
+    [session stopRunning];
+    [sender setTitle:@"Continue" forState:UIControlStateNormal];
+
+    flashView = [[UIView alloc] initWithFrame:[previewView frame]];
+    [flashView setBackgroundColor:[UIColor whiteColor]];
+    [flashView setAlpha:0.f];
+    [[[self view] window] addSubview:flashView];
+
+    [UIView animateWithDuration:.2f
+        animations:^{
+          [flashView setAlpha:1.f];
+        }
+        completion:^(BOOL finished) {
+          [UIView animateWithDuration:.2f
+              animations:^{
+                [flashView setAlpha:0.f];
+              }
+              completion:^(BOOL finished) {
+                [flashView removeFromSuperview];
+                flashView = nil;
+              }];
+        }];
+
+  } else {
+    [session startRunning];
+    [sender setTitle:@"Freeze Frame" forState:UIControlStateNormal];
+  }
+}
+
+- (void)captureOutput:(AVCaptureOutput*)captureOutput
+    didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
+           fromConnection:(AVCaptureConnection*)connection {
+  CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
+  CFRetain(pixelBuffer);
+  [self runModelOnFrame:pixelBuffer];
+  CFRelease(pixelBuffer);
+}
+
+- (void)runModelOnFrame:(CVPixelBufferRef)pixelBuffer {
+  assert(pixelBuffer != NULL);
+
+  OSType sourcePixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
+  int doReverseChannels;
+  if (kCVPixelFormatType_32ARGB == sourcePixelFormat) {
+    doReverseChannels = 1;
+  } else if (kCVPixelFormatType_32BGRA == sourcePixelFormat) {
+    doReverseChannels = 0;
+  } else {
+    assert(false);  // Unknown source format
+  }
+
+  const int sourceRowBytes = (int)CVPixelBufferGetBytesPerRow(pixelBuffer);
+  const int image_width = (int)CVPixelBufferGetWidth(pixelBuffer);
+  const int fullHeight = (int)CVPixelBufferGetHeight(pixelBuffer);
+
+  CVPixelBufferLockFlags unlockFlags = kNilOptions;
+  CVPixelBufferLockBaseAddress(pixelBuffer, unlockFlags);
+
+  unsigned char* sourceBaseAddr = (unsigned char*)(CVPixelBufferGetBaseAddress(pixelBuffer));
+  int image_height;
+  unsigned char* sourceStartAddr;
+  if (fullHeight <= image_width) {
+    image_height = fullHeight;
+    sourceStartAddr = sourceBaseAddr;
+  } else {
+    image_height = image_width;
+    const int marginY = ((fullHeight - image_width) / 2);
+    sourceStartAddr = (sourceBaseAddr + (marginY * sourceRowBytes));
+  }
+  const int image_channels = 4;
+  assert(image_channels >= wanted_input_channels);
+  uint8_t* in = sourceStartAddr;
+
+  int input = interpreter->inputs()[0];
+
+  uint8_t* out = interpreter->typed_tensor<uint8_t>(input);
+  for (int y = 0; y < wanted_input_height; ++y) {
+    uint8_t* out_row = out + (y * wanted_input_width * wanted_input_channels);
+    for (int x = 0; x < wanted_input_width; ++x) {
+      const int in_x = (y * image_width) / wanted_input_width;
+      const int in_y = (x * image_height) / wanted_input_height;
+      uint8_t* in_pixel = in + (in_y * image_width * image_channels) + (in_x * image_channels);
+      uint8_t* out_pixel = out_row + (x * wanted_input_channels);
+      for (int c = 0; c < wanted_input_channels; ++c) {
+        out_pixel[c] = in_pixel[c];
+      }
+    }
+  }
+
+  double startTimestamp = [[NSDate new] timeIntervalSince1970];
+  if (interpreter->Invoke() != kTfLiteOk) {
+    LOG(FATAL) << "Failed to invoke!";
+  }
+  double endTimestamp = [[NSDate new] timeIntervalSince1970];
+  total_latency += (endTimestamp - startTimestamp);
+  total_count += 1;
+  NSLog(@"Time: %.4lf, avg: %.4lf, count: %d", endTimestamp - startTimestamp,
+        total_latency / total_count, total_count);
+
+  const int output_size = 1000;
+  const int kNumResults = 5;
+  const float kThreshold = 0.1f;
+
+  std::vector<std::pair<float, int>> top_results;
+
+  uint8_t* output = interpreter->typed_output_tensor<uint8_t>(0);
+  GetTopN(output, output_size, kNumResults, kThreshold, &top_results);
+
+  NSMutableDictionary* newValues = [NSMutableDictionary dictionary];
+  for (const auto& result : top_results) {
+    const float confidence = result.first;
+    const int index = result.second;
+    NSString* labelObject = [NSString stringWithUTF8String:labels[index].c_str()];
+    NSNumber* valueObject = [NSNumber numberWithFloat:confidence];
+    [newValues setObject:valueObject forKey:labelObject];
+  }
+  dispatch_async(dispatch_get_main_queue(), ^(void) {
+    [self setPredictionValues:newValues];
+  });
+
+  CVPixelBufferUnlockBaseAddress(pixelBuffer, unlockFlags);
+
+  CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
+}
+
+- (void)dealloc {
+  [self teardownAVCapture];
+}
+
+- (void)didReceiveMemoryWarning {
+  [super didReceiveMemoryWarning];
+}
+
+- (void)viewDidLoad {
+  [super viewDidLoad];
+  labelLayers = [[NSMutableArray alloc] init];
+  oldPredictionValues = [[NSMutableDictionary alloc] init];
+
+  NSString* graph_path = FilePathForResourceName(model_file_name, @"tflite");
+  model = tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String]);
+  if (!model) {
+    LOG(FATAL) << "Failed to mmap model " << graph_path;
+  }
+  LOG(INFO) << "Loaded model " << graph_path;
+  model->error_reporter();
+  LOG(INFO) << "resolved reporter";
+
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  LoadLabels(labels_file_name, labels_file_type, &labels);
+
+  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
+  if (!interpreter) {
+    LOG(FATAL) << "Failed to construct interpreter";
+  }
+  if (interpreter->AllocateTensors() != kTfLiteOk) {
+    LOG(FATAL) << "Failed to allocate tensors!";
+  }
+
+  [self setupAVCapture];
+}
+
+- (void)viewDidUnload {
+  [super viewDidUnload];
+}
+
+- (void)viewWillAppear:(BOOL)animated {
+  [super viewWillAppear:animated];
+}
+
+- (void)viewDidAppear:(BOOL)animated {
+  [super viewDidAppear:animated];
+}
+
+- (void)viewWillDisappear:(BOOL)animated {
+  [super viewWillDisappear:animated];
+}
+
+- (void)viewDidDisappear:(BOOL)animated {
+  [super viewDidDisappear:animated];
+}
+
+- (BOOL)shouldAutorotateToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation {
+  return (interfaceOrientation == UIInterfaceOrientationPortrait);
+}
+
+- (BOOL)prefersStatusBarHidden {
+  return YES;
+}
+
+- (void)setPredictionValues:(NSDictionary*)newValues {
+  const float decayValue = 0.75f;
+  const float updateValue = 0.25f;
+  const float minimumThreshold = 0.01f;
+
+  NSMutableDictionary* decayedPredictionValues = [[NSMutableDictionary alloc] init];
+  for (NSString* label in oldPredictionValues) {
+    NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label];
+    const float oldPredictionValue = [oldPredictionValueObject floatValue];
+    const float decayedPredictionValue = (oldPredictionValue * decayValue);
+    if (decayedPredictionValue > minimumThreshold) {
+      NSNumber* decayedPredictionValueObject = [NSNumber numberWithFloat:decayedPredictionValue];
+      [decayedPredictionValues setObject:decayedPredictionValueObject forKey:label];
+    }
+  }
+  oldPredictionValues = decayedPredictionValues;
+
+  for (NSString* label in newValues) {
+    NSNumber* newPredictionValueObject = [newValues objectForKey:label];
+    NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label];
+    if (!oldPredictionValueObject) {
+      oldPredictionValueObject = [NSNumber numberWithFloat:0.0f];
+    }
+    const float newPredictionValue = [newPredictionValueObject floatValue];
+    const float oldPredictionValue = [oldPredictionValueObject floatValue];
+    const float updatedPredictionValue = (oldPredictionValue + (newPredictionValue * updateValue));
+    NSNumber* updatedPredictionValueObject = [NSNumber numberWithFloat:updatedPredictionValue];
+    [oldPredictionValues setObject:updatedPredictionValueObject forKey:label];
+  }
+  NSArray* candidateLabels = [NSMutableArray array];
+  for (NSString* label in oldPredictionValues) {
+    NSNumber* oldPredictionValueObject = [oldPredictionValues objectForKey:label];
+    const float oldPredictionValue = [oldPredictionValueObject floatValue];
+    if (oldPredictionValue > 0.05f) {
+      NSDictionary* entry = @{@"label" : label, @"value" : oldPredictionValueObject};
+      candidateLabels = [candidateLabels arrayByAddingObject:entry];
+    }
+  }
+  NSSortDescriptor* sort = [NSSortDescriptor sortDescriptorWithKey:@"value" ascending:NO];
+  NSArray* sortedLabels =
+      [candidateLabels sortedArrayUsingDescriptors:[NSArray arrayWithObject:sort]];
+
+  const float leftMargin = 10.0f;
+  const float topMargin = 10.0f;
+
+  const float valueWidth = 48.0f;
+  const float valueHeight = 18.0f;
+
+  const float labelWidth = 246.0f;
+  const float labelHeight = 18.0f;
+
+  const float labelMarginX = 5.0f;
+  const float labelMarginY = 5.0f;
+
+  [self removeAllLabelLayers];
+
+  int labelCount = 0;
+  for (NSDictionary* entry in sortedLabels) {
+    NSString* label = [entry objectForKey:@"label"];
+    NSNumber* valueObject = [entry objectForKey:@"value"];
+    const float value = [valueObject floatValue];
+    const float originY = topMargin + ((labelHeight + labelMarginY) * labelCount);
+    const int valuePercentage = (int)roundf(value * 100.0f);
+
+    const float valueOriginX = leftMargin;
+    NSString* valueText = [NSString stringWithFormat:@"%d%%", valuePercentage];
+
+    [self addLabelLayerWithText:valueText
+                        originX:valueOriginX
+                        originY:originY
+                          width:valueWidth
+                         height:valueHeight
+                      alignment:kCAAlignmentRight];
+
+    const float labelOriginX = (leftMargin + valueWidth + labelMarginX);
+
+    [self addLabelLayerWithText:[label capitalizedString]
+                        originX:labelOriginX
+                        originY:originY
+                          width:labelWidth
+                         height:labelHeight
+                      alignment:kCAAlignmentLeft];
+
+    labelCount += 1;
+    if (labelCount > 4) {
+      break;
+    }
+  }
+}
+
+- (void)removeAllLabelLayers {
+  for (CATextLayer* layer in labelLayers) {
+    [layer removeFromSuperlayer];
+  }
+  [labelLayers removeAllObjects];
+}
+
+- (void)addLabelLayerWithText:(NSString*)text
+                      originX:(float)originX
+                      originY:(float)originY
+                        width:(float)width
+                       height:(float)height
+                    alignment:(NSString*)alignment {
+  CFTypeRef font = (CFTypeRef) @"Menlo-Regular";
+  const float fontSize = 12.0;
+  const float marginSizeX = 5.0f;
+  const float marginSizeY = 2.0f;
+
+  const CGRect backgroundBounds = CGRectMake(originX, originY, width, height);
+  const CGRect textBounds = CGRectMake((originX + marginSizeX), (originY + marginSizeY),
+                                       (width - (marginSizeX * 2)), (height - (marginSizeY * 2)));
+
+  CATextLayer* background = [CATextLayer layer];
+  [background setBackgroundColor:[UIColor blackColor].CGColor];
+  [background setOpacity:0.5f];
+  [background setFrame:backgroundBounds];
+  background.cornerRadius = 5.0f;
+
+  [[self.view layer] addSublayer:background];
+  [labelLayers addObject:background];
+
+  CATextLayer* layer = [CATextLayer layer];
+  [layer setForegroundColor:[UIColor whiteColor].CGColor];
+  [layer setFrame:textBounds];
+  [layer setAlignmentMode:alignment];
+  [layer setWrapped:YES];
+  [layer setFont:font];
+  [layer setFontSize:fontSize];
+  layer.contentsScale = [[UIScreen mainScreen] scale];
+  [layer setString:text];
+
+  [[self.view layer] addSublayer:layer];
+  [labelLayers addObject:layer];
+}
+
+@end
diff --git a/tensorflow/contrib/lite/examples/ios/camera/Info.plist b/tensorflow/contrib/lite/examples/ios/camera/Info.plist
new file mode 100644
index 0000000000..f3d96bab16
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/camera/Info.plist
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleDisplayName</key>
+	<string>tflite_camera_example</string>
+	<key>CFBundleExecutable</key>
+	<string>${EXECUTABLE_NAME}</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>${PRODUCT_NAME}</string>
+	<key>CFBundlePackageType</key>
+	<string>APPL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>1.0</string>
+	<key>LSRequiresIPhoneOS</key>
+	<true/>
+	<key>NSCameraUsageDescription</key>
+	<string>Capture images to detect object</string>
+	<key>UIMainStoryboardFile</key>
+	<string>MainStoryboard_iPhone</string>
+	<key>UIRequiresFullScreen</key>
+	<true/>
+	<key>UIStatusBarHidden</key>
+	<true/>
+	<key>UISupportedInterfaceOrientations</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+	</array>
+	<key>UISupportedInterfaceOrientations~ipad</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+	</array>
+</dict>
+</plist>
diff --git a/tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard b/tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard
new file mode 100644
index 0000000000..0f10a22e41
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/camera/MainStoryboard_iPhone.storyboard
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="9531" systemVersion="15E65" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" initialViewController="2">
+    <dependencies>
+        <deployment identifier="iOS"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="9529"/>
+    </dependencies>
+    <scenes>
+        <!--Camera Example View Controller-->
+        <scene sceneID="5">
+            <objects>
+                <viewController id="2" customClass="CameraExampleViewController" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="3">
+                        <rect key="frame" x="0.0" y="0.0" width="320" height="568"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                        <subviews>
+                            <view contentMode="scaleToFill" id="12">
+                                <rect key="frame" x="0.0" y="0.0" width="320" height="522"/>
+                                <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                                <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="calibratedWhite"/>
+                                <gestureRecognizers/>
+                            </view>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" id="iD8-yH-eWH">
+                                <rect key="frame" x="0.0" y="454" width="320" height="33"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                                <color key="backgroundColor" red="0.0" green="0.0" blue="0.0" alpha="1" colorSpace="calibratedRGB"/>
+                                <fontDescription key="fontDescription" name="Menlo-Regular" family="Menlo" pointSize="20"/>
+                                <state key="normal" title="Freeze Frame">
+                                    <color key="titleColor" white="1" alpha="1" colorSpace="calibratedWhite"/>
+                                    <color key="titleShadowColor" white="0.5" alpha="1" colorSpace="calibratedWhite"/>
+                                </state>
+                                <connections>
+                                    <action selector="takePicture:" destination="2" eventType="touchUpInside" id="BTy-7E-XUS"/>
+                                </connections>
+                            </button>
+                        </subviews>
+                        <color key="backgroundColor" red="0.0" green="0.0" blue="0.0" alpha="1" colorSpace="calibratedRGB"/>
+                    </view>
+                    <connections>
+                        <outlet property="previewView" destination="12" id="13"/>
+                    </connections>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="4" sceneMemberID="firstResponder"/>
+            </objects>
+        </scene>
+    </scenes>
+</document>
diff --git a/tensorflow/contrib/lite/examples/ios/camera/Podfile b/tensorflow/contrib/lite/examples/ios/camera/Podfile
new file mode 100644
index 0000000000..4ae6fb6b94
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/camera/Podfile
@@ -0,0 +1,5 @@
+platform :ios, '8.0'
+inhibit_all_warnings!
+
+target 'tflite_camera_example'
+       pod 'TensorFlow-experimental'
diff --git a/tensorflow/contrib/lite/examples/ios/camera/data/.gitignore b/tensorflow/contrib/lite/examples/ios/camera/data/.gitignore
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tensorflow/contrib/lite/examples/ios/camera/main.mm b/tensorflow/contrib/lite/examples/ios/camera/main.mm
new file mode 100644
index 0000000000..1a9e542f7c
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/camera/main.mm
@@ -0,0 +1,28 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <UIKit/UIKit.h>
+
+#import "CameraExampleAppDelegate.h"
+
+int main(int argc, char* argv[]) {
+  int retVal = 0;
+
+  @autoreleasepool {
+    retVal =
+        UIApplicationMain(argc, argv, nil, NSStringFromClass([CameraExampleAppDelegate class]));
+  }
+  return retVal;
+}
diff --git a/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj b/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj
new file mode 100644
index 0000000000..c98183276b
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj
@@ -0,0 +1,419 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 46;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		1C3C9DCC1ED3AB4200B8B5FA /* main.mm in Sources */ = {isa = PBXBuildFile; fileRef = 1C3C9DCA1ED3AB4200B8B5FA /* main.mm */; };
+		1C99111C1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 1C99111B1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard */; };
+		1CA5EB931ED3ABFB00247A34 /* CoreMedia.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1CA5EB921ED3ABFB00247A34 /* CoreMedia.framework */; };
+		1CB47D491ED3AD1700DF7666 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1CB47D481ED3AD1700DF7666 /* AVFoundation.framework */; };
+		1CDB2D491ED3A9CD007929E9 /* CameraExampleAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 1CDB2D431ED3A9CD007929E9 /* CameraExampleAppDelegate.m */; };
+		1CDB2D4A1ED3A9CD007929E9 /* CameraExampleViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 1CDB2D451ED3A9CD007929E9 /* CameraExampleViewController.mm */; };
+		1CDB2D4E1ED3AA35007929E9 /* Info.plist in Resources */ = {isa = PBXBuildFile; fileRef = 1CDB2D4D1ED3AA35007929E9 /* Info.plist */; };
+		54DC6C3C5F734F3A58069F0C /* libPods-tflite_camera_example.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 3BA8BF92C84895BFE59D8236 /* libPods-tflite_camera_example.a */; };
+		AC1F82661FBA3CBD0052BA77 /* labels.txt in Resources */ = {isa = PBXBuildFile; fileRef = AC1F82641FBA3CBD0052BA77 /* labels.txt */; };
+		AC1F82691FBA3F930052BA77 /* libtensorflow-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = AC1F82681FBA3F930052BA77 /* libtensorflow-lite.a */; };
+		ACA1A4CA1FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite in Resources */ = {isa = PBXBuildFile; fileRef = ACA1A4C91FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		1C0D73481ECCC41B008C1DAB /* CoreImage.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreImage.framework; path = System/Library/Frameworks/CoreImage.framework; sourceTree = SDKROOT; };
+		1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; };
+		1C3C9DCA1ED3AB4200B8B5FA /* main.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = main.mm; sourceTree = "<group>"; };
+		1C564C0D1ED3A92E00087306 /* tflite_camera_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tflite_camera_example.app; sourceTree = BUILT_PRODUCTS_DIR; };
+		1C99111B1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.storyboard; path = MainStoryboard_iPhone.storyboard; sourceTree = "<group>"; };
+		1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; };
+		1CA5EB921ED3ABFB00247A34 /* CoreMedia.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreMedia.framework; path = System/Library/Frameworks/CoreMedia.framework; sourceTree = SDKROOT; };
+		1CB47D481ED3AD1700DF7666 /* AVFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AVFoundation.framework; path = System/Library/Frameworks/AVFoundation.framework; sourceTree = SDKROOT; };
+		1CDB2D421ED3A9CD007929E9 /* CameraExampleAppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CameraExampleAppDelegate.h; sourceTree = "<group>"; };
+		1CDB2D431ED3A9CD007929E9 /* CameraExampleAppDelegate.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CameraExampleAppDelegate.m; sourceTree = "<group>"; };
+		1CDB2D441ED3A9CD007929E9 /* CameraExampleViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CameraExampleViewController.h; sourceTree = "<group>"; };
+		1CDB2D451ED3A9CD007929E9 /* CameraExampleViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CameraExampleViewController.mm; sourceTree = "<group>"; };
+		1CDB2D4D1ED3AA35007929E9 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		3BA8BF92C84895BFE59D8236 /* libPods-tflite_camera_example.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libPods-tflite_camera_example.a"; sourceTree = BUILT_PRODUCTS_DIR; };
+		3BC5BE4BBD09374D3E98F082 /* Pods-tflite_camera_example.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-tflite_camera_example.debug.xcconfig"; path = "Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example.debug.xcconfig"; sourceTree = "<group>"; };
+		55ED318E8D29C8AFEF03DF1E /* Pods-tflite_camera_example.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-tflite_camera_example.release.xcconfig"; path = "Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example.release.xcconfig"; sourceTree = "<group>"; };
+		AC1F82641FBA3CBD0052BA77 /* labels.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = labels.txt; sourceTree = "<group>"; };
+		AC1F82681FBA3F930052BA77 /* libtensorflow-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libtensorflow-lite.a"; path = "../../../gen/lib/libtensorflow-lite.a"; sourceTree = "<group>"; };
+		ACA1A4C91FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_quant_v1_224.tflite; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		1C564C0A1ED3A92E00087306 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				AC1F82691FBA3F930052BA77 /* libtensorflow-lite.a in Frameworks */,
+				1CB47D491ED3AD1700DF7666 /* AVFoundation.framework in Frameworks */,
+				1CA5EB931ED3ABFB00247A34 /* CoreMedia.framework in Frameworks */,
+				54DC6C3C5F734F3A58069F0C /* libPods-tflite_camera_example.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		24D7686C331131624F4454A0 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				AC1F82681FBA3F930052BA77 /* libtensorflow-lite.a */,
+				1CB47D481ED3AD1700DF7666 /* AVFoundation.framework */,
+				1CA5EB921ED3ABFB00247A34 /* CoreMedia.framework */,
+				1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */,
+				1C0D73481ECCC41B008C1DAB /* CoreImage.framework */,
+				1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */,
+				3BA8BF92C84895BFE59D8236 /* libPods-tflite_camera_example.a */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		3E9FC355632FB928EA23BEED /* Pods */ = {
+			isa = PBXGroup;
+			children = (
+				3BC5BE4BBD09374D3E98F082 /* Pods-tflite_camera_example.debug.xcconfig */,
+				55ED318E8D29C8AFEF03DF1E /* Pods-tflite_camera_example.release.xcconfig */,
+			);
+			name = Pods;
+			sourceTree = "<group>";
+		};
+		591157921CF4011C00C31E3A = {
+			isa = PBXGroup;
+			children = (
+				1C99111B1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard */,
+				1C3C9DCA1ED3AB4200B8B5FA /* main.mm */,
+				1CDB2D4D1ED3AA35007929E9 /* Info.plist */,
+				1CDB2D421ED3A9CD007929E9 /* CameraExampleAppDelegate.h */,
+				1CDB2D431ED3A9CD007929E9 /* CameraExampleAppDelegate.m */,
+				1CDB2D441ED3A9CD007929E9 /* CameraExampleViewController.h */,
+				1CDB2D451ED3A9CD007929E9 /* CameraExampleViewController.mm */,
+				59A3CFF31CF4E68100C4259F /* data */,
+				5911579C1CF4011C00C31E3A /* Products */,
+				3E9FC355632FB928EA23BEED /* Pods */,
+				24D7686C331131624F4454A0 /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		5911579C1CF4011C00C31E3A /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				1C564C0D1ED3A92E00087306 /* tflite_camera_example.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		59A3CFF31CF4E68100C4259F /* data */ = {
+			isa = PBXGroup;
+			children = (
+				ACA1A4C91FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite */,
+				AC1F82641FBA3CBD0052BA77 /* labels.txt */,
+			);
+			path = data;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		1C564C0C1ED3A92E00087306 /* tflite_camera_example */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1C564C351ED3A92E00087306 /* Build configuration list for PBXNativeTarget "tflite_camera_example" */;
+			buildPhases = (
+				66DAEAAEE9EF6550C3A061E0 /* [CP] Check Pods Manifest.lock */,
+				1C564C091ED3A92E00087306 /* Sources */,
+				1C564C0A1ED3A92E00087306 /* Frameworks */,
+				1C564C0B1ED3A92E00087306 /* Resources */,
+				00E875C3B066535AE6B77101 /* [CP] Embed Pods Frameworks */,
+				5C2D02120E3E5E09567AA946 /* [CP] Copy Pods Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = tflite_camera_example;
+			productName = tflite_camera_example;
+			productReference = 1C564C0D1ED3A92E00087306 /* tflite_camera_example.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		591157931CF4011C00C31E3A /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastSwiftUpdateCheck = 0830;
+				LastUpgradeCheck = 0830;
+				ORGANIZATIONNAME = Google;
+				TargetAttributes = {
+					1C564C0C1ED3A92E00087306 = {
+						CreatedOnToolsVersion = 8.3.2;
+						DevelopmentTeam = EQHXZ8M8AV;
+						ProvisioningStyle = Automatic;
+					};
+				};
+			};
+			buildConfigurationList = 591157961CF4011C00C31E3A /* Build configuration list for PBXProject "tflite_camera_example" */;
+			compatibilityVersion = "Xcode 3.2";
+			developmentRegion = English;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 591157921CF4011C00C31E3A;
+			productRefGroup = 5911579C1CF4011C00C31E3A /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				1C564C0C1ED3A92E00087306 /* tflite_camera_example */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		1C564C0B1ED3A92E00087306 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				ACA1A4CA1FBB6C28009B8D86 /* mobilenet_quant_v1_224.tflite in Resources */,
+				1C99111C1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard in Resources */,
+				1CDB2D4E1ED3AA35007929E9 /* Info.plist in Resources */,
+				AC1F82661FBA3CBD0052BA77 /* labels.txt in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXShellScriptBuildPhase section */
+		00E875C3B066535AE6B77101 /* [CP] Embed Pods Frameworks */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+			);
+			name = "[CP] Embed Pods Frameworks";
+			outputPaths = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "\"${SRCROOT}/Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example-frameworks.sh\"\n";
+			showEnvVarsInLog = 0;
+		};
+		5C2D02120E3E5E09567AA946 /* [CP] Copy Pods Resources */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+			);
+			name = "[CP] Copy Pods Resources";
+			outputPaths = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "\"${SRCROOT}/Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example-resources.sh\"\n";
+			showEnvVarsInLog = 0;
+		};
+		66DAEAAEE9EF6550C3A061E0 /* [CP] Check Pods Manifest.lock */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
+				"${PODS_ROOT}/Manifest.lock",
+			);
+			name = "[CP] Check Pods Manifest.lock";
+			outputPaths = (
+				"$(DERIVED_FILE_DIR)/Pods-tflite_camera_example-checkManifestLockResult.txt",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
+			showEnvVarsInLog = 0;
+		};
+/* End PBXShellScriptBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		1C564C091ED3A92E00087306 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1CDB2D4A1ED3A9CD007929E9 /* CameraExampleViewController.mm in Sources */,
+				1CDB2D491ED3A9CD007929E9 /* CameraExampleAppDelegate.m in Sources */,
+				1C3C9DCC1ED3AB4200B8B5FA /* main.mm in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		1C564C361ED3A92E00087306 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 3BC5BE4BBD09374D3E98F082 /* Pods-tflite_camera_example.debug.xcconfig */;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				DEVELOPMENT_TEAM = EQHXZ8M8AV;
+				INFOPLIST_FILE = Info.plist;
+				IPHONEOS_DEPLOYMENT_TARGET = 10.3;
+				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
+				PRODUCT_BUNDLE_IDENTIFIER = "com.pf.tf-camera-example";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+				SWIFT_VERSION = 3.0;
+			};
+			name = Debug;
+		};
+		1C564C371ED3A92E00087306 /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 55ED318E8D29C8AFEF03DF1E /* Pods-tflite_camera_example.release.xcconfig */;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				DEVELOPMENT_TEAM = EQHXZ8M8AV;
+				INFOPLIST_FILE = Info.plist;
+				IPHONEOS_DEPLOYMENT_TARGET = 10.3;
+				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
+				PRODUCT_BUNDLE_IDENTIFIER = "com.pf.tf-camera-example";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule";
+				SWIFT_VERSION = 3.0;
+			};
+			name = Release;
+		};
+		591157B01CF4011D00C31E3A /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = (
+					"$(inherited)",
+					../../../../../../,
+					../../../downloads/flatbuffers/include/,
+					../../../downloads/eigen/,
+					../../../downloads/,
+				);
+				IPHONEOS_DEPLOYMENT_TARGET = 8.0;
+				LIBRARY_SEARCH_PATHS = ../../../gen/lib/;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		591157B11CF4011D00C31E3A /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = (
+					"$(inherited)",
+					../../../../../../,
+					../../../downloads/flatbuffers/include/,
+					../../../downloads/eigen/,
+					../../../downloads/,
+				);
+				IPHONEOS_DEPLOYMENT_TARGET = 8.0;
+				LIBRARY_SEARCH_PATHS = ../../../gen/lib/;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = iphoneos;
+				TARGETED_DEVICE_FAMILY = "1,2";
+				VALIDATE_PRODUCT = YES;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1C564C351ED3A92E00087306 /* Build configuration list for PBXNativeTarget "tflite_camera_example" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1C564C361ED3A92E00087306 /* Debug */,
+				1C564C371ED3A92E00087306 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		591157961CF4011C00C31E3A /* Build configuration list for PBXProject "tflite_camera_example" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				591157B01CF4011D00C31E3A /* Debug */,
+				591157B11CF4011D00C31E3A /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 591157931CF4011C00C31E3A /* Project object */;
+}
diff --git a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h
new file mode 100644
index 0000000000..94046d9728
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h
@@ -0,0 +1,21 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <UIKit/UIKit.h>
+
+@interface AppDelegate : UIResponder<UIApplicationDelegate>
+
+@property(strong, nonatomic) UIWindow *window;
+
+@end
diff --git a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm
new file mode 100644
index 0000000000..fe26ceec42
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm
@@ -0,0 +1,47 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "AppDelegate.h"
+
+#import "RunModelViewController.h"
+
+@implementation AppDelegate
+
+- (BOOL)application:(UIApplication *)application
+    didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
+  UITabBarController *bar = [[UITabBarController alloc] init];
+  [bar setViewControllers:@[ [[RunModelViewController alloc] init] ]];
+  bar.selectedIndex = 0;
+  self.window = [[UIWindow alloc] initWithFrame:[[UIScreen mainScreen] bounds]];
+  self.window.rootViewController = bar;
+  [self.window makeKeyAndVisible];
+  return YES;
+}
+
+- (void)applicationWillResignActive:(UIApplication *)application {
+}
+
+- (void)applicationDidEnterBackground:(UIApplication *)application {
+}
+
+- (void)applicationWillEnterForeground:(UIApplication *)application {
+}
+
+- (void)applicationDidBecomeActive:(UIApplication *)application {
+}
+
+- (void)applicationWillTerminate:(UIApplication *)application {
+}
+
+@end
diff --git a/tensorflow/contrib/lite/examples/ios/simple/Podfile b/tensorflow/contrib/lite/examples/ios/simple/Podfile
new file mode 100644
index 0000000000..1740ad6457
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/Podfile
@@ -0,0 +1,5 @@
+platform :ios, '8.0'
+inhibit_all_warnings!
+
+target 'tf_simple_example'
+       pod 'TensorFlow-experimental'
diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist b/tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist
new file mode 100644
index 0000000000..1a3eaa8a2c
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/RunModel-Info.plist
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleDisplayName</key>
+	<string>tflite-simple-example</string>
+	<key>CFBundleExecutable</key>
+	<string>tf_simple_example</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>ios-app</string>
+	<key>CFBundlePackageType</key>
+	<string>APPL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>1.0</string>
+	<key>LSRequiresIPhoneOS</key>
+	<true/>
+	<key>UILaunchStoryboardName</key>
+	<string>RunModelViewController</string>
+	<key>UIRequiredDeviceCapabilities</key>
+	<array>
+		<string>armv7</string>
+	</array>
+	<key>UISupportedInterfaceOrientations</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationLandscapeLeft</string>
+		<string>UIInterfaceOrientationLandscapeRight</string>
+	</array>
+	<key>UISupportedInterfaceOrientations~ipad</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationPortraitUpsideDown</string>
+		<string>UIInterfaceOrientationLandscapeLeft</string>
+		<string>UIInterfaceOrientationLandscapeRight</string>
+	</array>
+</dict>
+</plist>
diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h
new file mode 100644
index 0000000000..a4b358b4eb
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h
@@ -0,0 +1,24 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <UIKit/UIKit.h>
+
+@interface RunModelViewController : UIViewController
+
+- (IBAction)getUrl:(id)sender;
+
+@property(weak, nonatomic) IBOutlet UITextView *urlContentTextView;
+@property(weak, nonatomic) IBOutlet UITextField *urlTextField;
+
+@end
diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm
new file mode 100644
index 0000000000..0dafb1f61e
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm
@@ -0,0 +1,221 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "RunModelViewController.h"
+
+#include <pthread.h>
+#include <unistd.h>
+#include <fstream>
+#include <iostream>
+#include <queue>
+#include <sstream>
+#include <string>
+
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/string_util.h"
+#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h"
+
+#include "ios_image_load.h"
+
+#define LOG(x) std::cerr
+#define CHECK(x)                  \
+  if (!(x)) {                     \
+    LOG(ERROR) << #x << "failed"; \
+    exit(1);                      \
+  }
+
+NSString* RunInferenceOnImage();
+
+@interface RunModelViewController ()
+@end
+
+@implementation RunModelViewController {
+}
+
+- (IBAction)getUrl:(id)sender {
+  NSString* inference_result = RunInferenceOnImage();
+  self.urlContentTextView.text = inference_result;
+}
+
+@end
+
+// Returns the top N confidence values over threshold in the provided vector,
+// sorted by confidence in descending order.
+static void GetTopN(const float* prediction, const int prediction_size, const int num_results,
+                    const float threshold, std::vector<std::pair<float, int> >* top_results) {
+  // Will contain top N results in ascending order.
+  std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int> >,
+                      std::greater<std::pair<float, int> > >
+      top_result_pq;
+
+  const long count = prediction_size;
+  for (int i = 0; i < count; ++i) {
+    const float value = prediction[i];
+
+    // Only add it if it beats the threshold and has a chance at being in
+    // the top N.
+    if (value < threshold) {
+      continue;
+    }
+
+    top_result_pq.push(std::pair<float, int>(value, i));
+
+    // If at capacity, kick the smallest value out.
+    if (top_result_pq.size() > num_results) {
+      top_result_pq.pop();
+    }
+  }
+
+  // Copy to output vector and reverse into descending order.
+  while (!top_result_pq.empty()) {
+    top_results->push_back(top_result_pq.top());
+    top_result_pq.pop();
+  }
+  std::reverse(top_results->begin(), top_results->end());
+}
+
+NSString* FilePathForResourceName(NSString* name, NSString* extension) {
+  NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension];
+  if (file_path == NULL) {
+    LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "." << [extension UTF8String]
+               << "' in bundle.";
+  }
+  return file_path;
+}
+
+NSString* RunInferenceOnImage() {
+  std::string graph;
+  const int num_threads = 1;
+  std::string input_layer_type = "float";
+  std::vector<int> sizes = {1, 224, 224, 3};
+
+  NSString* graph_path = FilePathForResourceName(@"mobilenet_v1_1.0_224", @"tflite");
+
+  std::unique_ptr<tflite::FlatBufferModel> model(
+      tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String]));
+  if (!model) {
+    LOG(FATAL) << "Failed to mmap model " << graph;
+  }
+  LOG(INFO) << "Loaded model " << graph;
+  model->error_reporter();
+  LOG(INFO) << "resolved reporter";
+
+#ifdef TFLITE_CUSTOM_OPS_HEADER
+  tflite::MutableOpResolver resolver;
+  RegisterSelectedOps(&resolver);
+#else
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+#endif
+
+  std::unique_ptr<tflite::Interpreter> interpreter;
+  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
+  if (!interpreter) {
+    LOG(FATAL) << "Failed to construct interpreter";
+  }
+
+  if (num_threads != -1) {
+    interpreter->SetNumThreads(num_threads);
+  }
+
+  int input = interpreter->inputs()[0];
+
+  if (input_layer_type != "string") {
+    interpreter->ResizeInputTensor(input, sizes);
+  }
+
+  if (interpreter->AllocateTensors() != kTfLiteOk) {
+    LOG(FATAL) << "Failed to allocate tensors!";
+  }
+
+  // Read the label list
+  NSString* labels_path = FilePathForResourceName(@"labels", @"txt");
+  std::vector<std::string> label_strings;
+  std::ifstream t;
+  t.open([labels_path UTF8String]);
+  std::string line;
+  while (t) {
+    std::getline(t, line);
+    label_strings.push_back(line);
+  }
+  t.close();
+
+  // Read the Grace Hopper image.
+  NSString* image_path = FilePathForResourceName(@"grace_hopper", @"jpg");
+  int image_width;
+  int image_height;
+  int image_channels;
+  std::vector<uint8_t> image_data =
+      LoadImageFromFile([image_path UTF8String], &image_width, &image_height, &image_channels);
+  const int wanted_width = 224;
+  const int wanted_height = 224;
+  const int wanted_channels = 3;
+  const float input_mean = 127.5f;
+  const float input_std = 127.5f;
+  assert(image_channels >= wanted_channels);
+  uint8_t* in = image_data.data();
+  float* out = interpreter->typed_tensor<float>(input);
+  for (int y = 0; y < wanted_height; ++y) {
+    const int in_y = (y * image_height) / wanted_height;
+    uint8_t* in_row = in + (in_y * image_width * image_channels);
+    float* out_row = out + (y * wanted_width * wanted_channels);
+    for (int x = 0; x < wanted_width; ++x) {
+      const int in_x = (x * image_width) / wanted_width;
+      uint8_t* in_pixel = in_row + (in_x * image_channels);
+      float* out_pixel = out_row + (x * wanted_channels);
+      for (int c = 0; c < wanted_channels; ++c) {
+        out_pixel[c] = (in_pixel[c] - input_mean) / input_std;
+      }
+    }
+  }
+
+  if (interpreter->Invoke() != kTfLiteOk) {
+    LOG(FATAL) << "Failed to invoke!";
+  }
+
+  float* output = interpreter->typed_output_tensor<float>(0);
+  const int output_size = 1000;
+  const int kNumResults = 5;
+  const float kThreshold = 0.1f;
+  std::vector<std::pair<float, int> > top_results;
+  GetTopN(output, output_size, kNumResults, kThreshold, &top_results);
+
+  std::stringstream ss;
+  ss.precision(3);
+  for (const auto& result : top_results) {
+    const float confidence = result.first;
+    const int index = result.second;
+
+    ss << index << " " << confidence << "  ";
+
+    // Write out the result as a string
+    if (index < label_strings.size()) {
+      // just for safety: theoretically, the output is under 1000 unless there
+      // is some numerical issues leading to a wrong prediction.
+      ss << label_strings[index];
+    } else {
+      ss << "Prediction: " << index;
+    }
+
+    ss << "\n";
+  }
+
+  LOG(INFO) << "Predictions: " << ss.str();
+
+  std::string predictions = ss.str();
+  NSString* result = @"";
+  result = [NSString stringWithFormat:@"%@ - %s", result, predictions.c_str()];
+
+  return result;
+}
diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib
new file mode 100644
index 0000000000..93f334b985
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.xib
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.XIB" version="3.0" toolsVersion="9531" systemVersion="15D21" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES">
+    <dependencies>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="9529"/>
+    </dependencies>
+    <objects>
+        <placeholder placeholderIdentifier="IBFilesOwner" id="-1" userLabel="File's Owner" customClass="RunModelViewController">
+            <connections>
+                <outlet property="urlContentTextView" destination="quY-AK-ZCn" id="YjW-BO-1Ta"/>
+                <outlet property="urlTextField" destination="hPw-q5-vh5" id="wmc-b6-2CV"/>
+                <outlet property="view" destination="1" id="iHm-Rr-4wj"/>
+            </connections>
+        </placeholder>
+        <placeholder placeholderIdentifier="IBFirstResponder" id="-2" customClass="UIResponder"/>
+        <view contentMode="scaleToFill" id="1">
+            <rect key="frame" x="0.0" y="0.0" width="320" height="568"/>
+            <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+            <subviews>
+                <textView clipsSubviews="YES" contentMode="scaleToFill" fixedFrame="YES" editable="NO" text="The results of running the model will appear here." selectable="NO" translatesAutoresizingMaskIntoConstraints="NO" id="quY-AK-ZCn">
+                    <rect key="frame" x="40" y="99" width="240" height="168"/>
+                    <color key="backgroundColor" white="1" alpha="1" colorSpace="calibratedWhite"/>
+                    <fontDescription key="fontDescription" type="system" pointSize="14"/>
+                    <textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
+                </textView>
+                <button opaque="NO" contentMode="scaleToFill" fixedFrame="YES" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="AAC-Bk-PCC">
+                    <rect key="frame" x="76" y="37" width="168" height="30"/>
+                    <color key="backgroundColor" white="0.33333333333333331" alpha="1" colorSpace="calibratedWhite"/>
+                    <state key="normal" title="Run Model">
+                        <color key="titleShadowColor" white="0.5" alpha="1" colorSpace="calibratedWhite"/>
+                    </state>
+                    <connections>
+                        <action selector="getUrl:" destination="-1" eventType="touchUpInside" id="mdP-nK-k9T"/>
+                    </connections>
+                </button>
+            </subviews>
+            <color key="backgroundColor" red="0.78314738357315861" green="0.79869981749999996" blue="0.56305065858222869" alpha="1" colorSpace="calibratedRGB"/>
+        </view>
+        <textField opaque="NO" clipsSubviews="YES" contentMode="scaleToFill" contentHorizontalAlignment="left" contentVerticalAlignment="center" text="http://localhost:8080" borderStyle="roundedRect" placeholder="Enter URL" minimumFontSize="17" id="hPw-q5-vh5">
+            <rect key="frame" x="0.0" y="0.0" width="280" height="30"/>
+            <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+            <fontDescription key="fontDescription" type="system" pointSize="14"/>
+            <textInputTraits key="textInputTraits"/>
+            <point key="canvasLocation" x="795" y="44"/>
+        </textField>
+    </objects>
+</document>
diff --git a/tensorflow/contrib/lite/examples/ios/simple/data/grace_hopper.jpg b/tensorflow/contrib/lite/examples/ios/simple/data/grace_hopper.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d2a427810f679db537236c5430873a81a62ef412
GIT binary patch
literal 73746
zcmex=<NpH&0WUXCHwH!~Mur521O|rxZ$;w*GV)9Ei!<^I6pZx@7%ZGTeVN~K1*I0}
z=aiOY=I1dmFfa&!)G{zKfOIi{XpkxvUJ17d1^1l%q{JKrpUm`(lFYnx1;@PfoK%n=
z76t|eIj}lOA%r@|oRZ9v(iBt`%%8afiZb&`6x?zW(~B7x7&t)&fG{K2JuF;&&iMtE
zMIbxCY9LZzIduUMuf&4Hywu`U1yn;AxEL5g3K<v}m_Y7e{wnUApOast;E|Y<R+68W
zst}x70&+9RMh^!4v@`}rhMcrC1~#xRMySZbnX}*`Mhqa|M=>zKMNGjWx(E@lO+TPE
zg%l;`6{n>ZA=^~I!0;a{V15Cz7pv5NO5{K;V&PzOFU?Fz1$mW00PJQ4CI<#EW&npH
ziyUugkdH!sS!z)cR3Rt^A#ze|!8w`95Z#q<zk;00z!b;8z{JYHz`(=c&QQdV$dJsC
z%AmmD!H~~Tz)--D%1{Kc4XhZMfW-|cdRQ62<{(RBV}pGnDCp_ztPq-4l9-;JngUA>
zjL?)KBJPt|nOdaa>7wACnwMIXSdw3);FMSlN^l_mFoMnC5%o(gR|wC{OUW-URtU)`
z%}vTn%*<h65ZnlLZ2|)WLjeN=qXGj%9RmY{;8X?)21c|T<iX&<!0`Va1D|6`eo`t!
zN&`sm|Jw{L3_Khh9Go0HoSZy@++5s(qWnBO{Gt-V!lJ^$5`sKn0Hwfe41$rDo12%9
zSAdUCK!lHvPXt8pi9oanLMR?6g`pQL@c#gVAO}MMLjp6S5(ASUBeNjm|GNw<3~c|8
zFxoRPFtRd&0W^@A85o&ZSlQS)IJvkP(Ipufn3!Rb0t}2SjLb|dOswo|9IVVd#*7S1
zg3K(6LWZo4fr-LOg^fmRA`=&GbW&Ck4LX=)Y*N&8(V1O!Qt*d|ACrqs#TIR9R&$vg
zlHz)3afzAurO-#IpEkD~zFhkFvzz*qC9PYg9=Y=5%hIh!uRi@6mR8m_ZP~VC*PeYd
zcMnf5Z=b$=`|<0~zY8)jF)=c+uz=mf&c<pAc7mdz5Q}4AVxf{St5M@b5s(WG23`DM
ztdjJwsA-aOuxN6z$wyT-kOSD&Ob=ahnH;iMJjFHCtR(dj+zGf{ppJ0D|62?^%wRhO
z8SEK8AGbYlljYzo4jBR2-SKa=W9_Zgmp?Z*7CbmpWcn|ErLyTJm-q1B%g*_xH=X;H
zZ)JV3{%cEqos-8ly_vh?cV6v`XS?q9hn`J7ol#r9^;-4buYs@glTN;r53`yc{<81<
z%eP<e#|0kWwpZ%dhU>5YF1;xxwsC9t_RE|9ZCl4PK`pH0#{JGoxAelq;`~InJWYyx
znUcK2YPN#xIvMNi!?XW0xR%_H+x5>rGEH{sg<p-UPQTo+&vE6s&BD1;+^*gF&#)<c
z;w!`d3=i$j?d<xPS8Hh=azCsm=wR;JxeIKMt(wPp>+dw1>e^NJL;tm2R>}Ux&SAS)
zSwk&F(9%F|<3#x%cdolkl<a%_@8yl7f9KWhNDjWDx$9x(%d6opZry6mHBH*OZhLKh
zY0v%bFQUD!y`C9S>%TTdesx5;x!AU<nbRI!`o{JoGIie8o8h+mm*%d?)?Iov|F8Ip
zwO9T#tp3Zawr_Es=-lGhY`LOOF1P#Et-qO(t(9A5x#2&9-`k$-`*rt&|H>~7|9tq@
z+^JLk?Y{c<)s<a6Hml?32JAf&a#}XW_Q%fZHud%2YPw#3)%>fvCbjg}){UX<hHno`
zimCR?DCO@hIPLoRm`V!&X-%PL@!fOVcmDig{_t|_yoo2Py;t1h$p12}+Sc6mclPd|
z-S(H3iS{1J)HQf`NB*ha`q%sX{a@K~+kDCC+Po?D<+lF}n{VW8*nV}{_Q2SX+o`+l
zFNbfw@Smait3XwF)UT~qeuc67|2piu!}8zMh_9vGx1wK1+<N_2)z)?4nfqI>?mJcV
zpP~NpuC3YPN6aNzFRs(r`ux<J#BVmwm$7SZ)1K?j_HX{oGTBG*&fDHGF*h=vsjgpl
zTx-`_0Y`@3zJ;r*MHh<AZ7O&9HpN0%^N?5l{|gVLXI5Lq-r%v;-1KmAN=D4LtyivH
za(`yJ&Gz(ym-Ayceb~2NZel`SaeQI&#!oC)4EEal{&>4-z27yv+jD=}f03JX{#AXL
z@vnN1^{b<P{X00d-_145;%)K#io4&eUfKQDTYT@g{Ga;^f93x$on2<;C;L75X~?T}
z$Lp6D%h+}Iz2LKVzrVe-{-x*v!!wNwFa29_#g_f5-2ThE+}Oi%|1+$<UAb&d&8+s7
zlep(*|IPQiUL_XZ{Bqm-i{ZZW)-Ty;HTjNJxo6qS!s9zz@AJ&Q6T5!*-JOBVcN6B%
z+I>ah`Imb=$6h@Tt#&{1<kspm8FraQbItjG)n{oxTruIT{O=u>U*gM5*Y1`&ezBPE
zS!8s#43Ep!H@ekflk2wqP03}KDAk$o(7SBIU$e%Szp6dHUEO^9rSTW<CBOE5HD0o7
zdrGO&pUMCDHtQtF@B0?|%e_9h>cz%?YybM4$V@g^RhThdK<2L9-&0#QANm)k_3FG_
zR7&J@{<inCAN);!U2D2N-~Y^J(QT7*a-*YU{Gad0{S~!3a(-;pzBv1ZyQGu3jgIz}
ze!Z8tv2@2wwL4Mkl5f5?J5#mlulnbGtFPClU+rHd!TUAm5QB-|zIWBX`>s0cFZvts
z%4A1)Xy`oYWVuthng5pkXZW@1o7$WU>JfT2TMk^RZ+~0yu5tG4=~w5yS<`K#vu!&2
zzU#Y=3&!2|znfToA?u}URhF0-yW8$%wW;f6_Fr8So_#ana<OyFA-TJ?XNz7kn>@@Z
zD%um3TV+{u#^a=W)m8Ro^8~+%9NU_9BmV!z=T+~o=iJ+P>Enmb*{cn=UO670y5@HJ
zE!Mu;X@O~$t1qYS_%}^={)w-m=hxRq6jUAl`10V^HxlQ2<|JIus@*p``*&>l_PY~b
znC?&foBU<o^w0KwM^8juzrJs^RAjR3wqq8~vmTV~T$lMvFMHM;0fryihQ}Ed+~z3#
zyR^tV@7jZZ$Ff)E`kgJEIOWmvsx=8_RdI9u{>t6nJ3nLF{)NBfpZyJAn>X24%y`Mo
z1iei?)qU0sbFb~z{BnKwuYJqDo_qZG{^SX#3|Ez2zm>G>hU$_34C^lyeL2^CA>(C4
z$aV#uo;y`hwqYL*=RW#=-DB>$FZ2J2-an@`=jMtdam$LP|1+ODf7)Urk4uZ}pC3H<
z+dlO8`s=%H8*6W{2<(sD78)EElWn)z@V@VU?-&0j|1-=eshfSZAnx@$ld36OuSaNa
zJKpcZ`Q`1(cPAEKt-p|McKQ8(2Cpr#-J$uvZF8#&zx8?;@0@=wI<g|~Kf|T!t3C79
zPvS1LJ9qq|NAAwh2Up?~RS&;8qxiJ<uiE*jZ`W?izYJM1b7n+Ia-UL%c=V+Gk%!Km
zUbfBljq26*>u!gbJ#N3+bj<Mn^5536Uu6C>`2XWgxO&03Jkrl~ho$M>KE|sTBWrUD
ze}(K%&0F;9-_-p3zw|%f*d?=m&5oa@QXjNW*7@>FZQAkWO-A{P_h0{-<y6<s4w1Yv
z`-)-o#L%-lznj?XF~6+9U%up-$^PP*KYvfy>7KMsz<rhS`cKP+g_jyH+atT^XZ6R`
z+sbzK>iqWKG;!y?=`a4?{?G6_>&7~#jk!AOzceilz9Stwdp2M3>KPmF-z>Y9`=5cS
zU8+Xx`eC0wp_IHQ*{A<AxE1bGQZRNi6b+Dxzh@KlpJ7q|<%Vfjf*18L)m(ix)&Bpb
zL%$wRy==8U>hNl=Yg?y9T}umQTCwYL`06Qt)|ZC;n)-I}_5Td4-|C<JeLZ9U&)@9J
zGI@*J7puH!mYesydhX^I?;I;G@C)_#_vKl4DsB9E_xz`*C88bDRt)y<rymnOttfHq
zsrtJA4B~kUBBw?qyZKHP|F!F#X?t#|@=xBf>%Z$}z4|rzz~R#w6LMCr$)0^euiEYX
zoAov?Z?1cJ^Xro<CQCXK-W`5hTmMY&{fd}ctp@3tlapW1dt!K|Soz<ibI*+aOutsN
z{=>$zmuyxXa}ZtQlKS^enoa#<%Ne!VEAL*r*?l#;*5?a%X7&Ny;#=jBrDE5vZMq-Z
ze*fM5H{rML^qAi{;~tb%daicb{hd$V@s?j$WoFuQ<LvGK3<uYoGAbzkHMe4G?2ZQ;
zZ<_DhUohQrhG~-d=kmLCsdiiUKhXZq(DE<K|MhdG)2q%Nt4I~x_Ub@&?ESL)*|+yP
z%UN=qINWj~p1)q|-)0pbrBp#ZMPZk@>%Zs!TDS03dECS3A_Mts_AN%cJ=J$_w>ov}
z{G#?3j~+gLnsOlby7=C=IdAlCe_4I^-^_Ra8Ek{rzkbJ;eAMS~>9zfC+Y=h)=X|PN
zwJ$#CtgO2Bs;8GOow^^nUP4$ed2MoR^1S=G_8+Y4?O!hM@HnT?C#{hEXj^2Gq9$*K
z{Pe?%SLyD0Q&XN2`RYkk!mSSs)8$I?H~%?up?blWJAeF^_ZS(>-MjebF<JS@zqhmh
z+<$HUCF_h{JDXv@NTP58@9+Do{B38YRjO*{`Tst{qj5-4PD>?gX~chq|Cb+BrHbCa
zW>8opZ5ll_G~D2&!XCpn<?sHzdSkce>ZOmi*0XDU&a6GV<jb$c)tNkZo&So}yQh}E
z&9?Pl9({9T>fiL2?_b16X}-F;`O4AV3TapET_TS2^z?++=ij;iLp$8K^xBiyG}m|c
zOW&7H+w;Bp-1psoo+}y{2~-9Aj@<Zk9;dLZ>Wcpitk$1T7I$Yqakx{z)8BmU*O+f+
zYvxRsiu|IPzG>rv{LeZ4OSgZXDm1|%xnaG><-AoV&pbQ%`OL%{RptL)U#agtV{N=^
zT^%dG$la-z-=><)UvzxaotmyWfB9d0zx+OWu7kPaw5W(zGLOZA-pU+j&)qg#>iwcu
zYhtEfwOjLU;@5s=^ZMn%dAXH*Z(rP~)jqs`!`su>>UQsc^^N=0=~tG%M|Z1#$zKun
zIyNKy;*Ma?>|1~8zx-#IAO9lfKLg_?n;&OZ1^#UM7j2SQIQ_KV-tL2slM<5+ub+-`
zR(;PNUwHFBLtV6eY<9lquL)<eXGKoGn!ifR%5v)EU$yJ=|HyB?@}Hq)_fO7!t4i0e
ztl$3fxY{Sr{G0hFlCziGxwviZo3DD?tLxudzxgY_KK;Vix^rK9|C#K%UA6vNaHsd_
zjk|*VEM)f<m8V(mpL=UfdHoU9ZbA00=r5eljE^}u$?FGwyb_%oZgwl~)!M{q5r?84
z)K7X=J>C8Czn!IAiZjnhZe^7Il6o&VrK;>dL*D*hkDf=JtqJ+hptYUzl(kgmDZ^<G
z-+kP-t;DBE;&pu2<Kwrax9;Z;+a4YN^L3PCcFZN2;|<SjB(obIo+<dYbM<NOg?Y6B
zuk;tMG-o|uaQsyEtXt<ViZ$+AZc>-_qqgq;{V>DNEE6Spzic>aqrj9@_ToRo|10bN
z8G5_Bud#kvyRdP-?0WtFHybPOtb6?@CHmJs*7*Io>-%4A?ETMhadp>`CGpc;`3jPA
zxAqjDyv0!59P`}zbJ<p7{!^!CKVNrfqRL*W{|qbYLv>G!K6rBP7dLbF%?Y;|YUW=3
zHT7a?z59#$2lF3XmD~7FPkXV@>0M#2+wMl(|5kn2_AFD}wL1snZcE)S-`VA6RZ+p_
z_~6AZy}9kz>h{zouUj*x-R^Z1>rK^7w=&k$<@(+__MhS4=0B?Mzlz)Nbjn^|RNvLN
zzxVRpD}_ee4Cl=}^EH8e+BLH~$L&Ag4lav~t~ahdSrU-_H}^k-*p2#dE7iIF-)67d
z+P`ksi(21|jpxfQ9oxD7&h@Rc-~DG;tEYIb`cB-ZnD0W*EsuGsoSySse#vva<$n@t
zQ%}E^jefI!>vCz~u*<>Q4YyfT&-z{~t-o;oqV0cW_wCJ|re><9C}83kkvGpSbI!vx
z1=|biR+Y;1R23CTyvjVCeYcY@?RnSSmrwGN@9x^~doKO*udMRF0eg}xf2=c$yI-;D
zc-V!^>|FCZ^1EMN`}Hb)`9I++UN7Rd-e0W0(N0+3xB2kFqw;0%+kdWqE?0Tf)_lLb
z(weR_k>0nH`Db%fhpyQ-@rt3L_SwBXBGHK}`}ni_9+(SmTwWcjaM@~!3a>!j!oGqj
zoIMA7+9vh13jAmIfA!gahGT5O`L85=*B<=R6CbplkAKDY-?RVDny&vbY}e$|n;g`f
zC8XFJRL?j5-uL`nsD<zGPwppY&+O;RnaFk|CHbG$!`^4g4;RhO+uJ0W+dH3qNAc$X
zPTt@fAtpbRLX%%rY}@WUTlq<e;{B6FQdeHYN=vVrGwuGIJK{kHGVYvxbX#`!<>l_?
zm+x+Vxlohi$jltO)r*BS8D;N0aJ24TGS`2m#*Ld|kMEu7`u1z;4%^nVd<@MymnZ*S
zVK+1T&h=x@CS7K8X_gZC<=1&jpuxk;MDa@bHN6e5^LJm**sS4tPBcutf6?7}=NJF%
zvb2w|I_5p+^(N--d%1;QUI~3STNU#@H~Ej8sr@sZpNy{Oe<~;oXfUu{jne;KRLJ@9
zbWf;v$45T-X-j4+TkHL54UgQn_u$8^i|;VcxSp3C^?t$EICm|*!Xj4%h8qmqisw8@
zwUj*ZUQ<r9zE}PKO~?q9qEuLFQE_H|o<eedZb4#6W>RKOW=W+&W?ou8Xp|!qG&Tbo
za^h!jWJqDiXGj7MQw1<&Fyu3oFyu27gINU(3=F~qR46c*G3YTEfDDA#&Bo&u;Oedr
zSegj35@Z`Y0~2VJlo9*r5C<CwaImp)@Nlqm@bGbQaB%Sn@bdEU@(Kuohi~u??l5w4
zaBy;Qa&vNW^KtWX^9l0t@$o^2g`gCE1Hhw0OmR%CXrn`l|Bo;nV_;xpVFra1$Ri*O
z8USKrVP)q4F&L3$7#P7rLjs`TBNk>>7Is!vHWpT9Rz^W421RBTLq{RjKqa=s!bT(E
zi3^?BHy#X1DpD5taIwi)^x?-zi<~zd3QkrLyHwn4^61m##T+iGYKKEoricqNFflT*
zGP1I;v9qy(ox;c@$gF7SD8v$&SlFn<I&q_s@WO*mK}8}-i#916C!1V+*fdFn?Zctq
zVo}wPmz+)iqYTnK$p~a>ob>#zM$<-(UXB>1t-+k%Toy7&%4r4KCrf;ldE};b^Lwg7
z@@8|7@~H{|CbyO@_2lqUV&XhA*Xy0&uGM>ELvFl$Q<S?^SD%IXOv?_Y{oS)nOeU@i
z5?JXJ6el?4`&>Q?g-QO~w6{#&-nCZNJ#2o}!d@4qm<wtQe>Yy1yt6BWr`ToAgzY<(
znYz2@Fw8nL<Gdc*#HS3LTI;v6xc7cwHjH_$bAHC+PAyNC$HGB@o{q;Ss7$Qir&t)S
z@u{Kv_I;rZJGV5JOlJy9O>0xnf0UFQsXpy=myF}LXUWU|%-}5#x@&pLW5FE*u9qFp
zOqK`FnEb8UBV5#RmHvI5@7=Z2ei!W9p>feg(Q?`H<QAt-`TZsv^Yt^W>xEYC{dw`q
zy3%UjNzFSm>q-wC?#M1YW+^=H`_8K}>omjzgWK$0oWK9w>i5yF#~#Q0OFe!1!jtnY
zau1~rJX|uZ$BEgvza>2D)OL^8+t&6yxii;9J#hkyXQ7~`&4NZ7|LYQ8zG@zGmuYy{
zJ~eAu?Tl8zO_HL<)+r0O-FX+XGB31OJ6&+ir0yFVU)-C$>E&r{tMrVe9;+Fy8EVe#
zH(0Kzr*%E`mUZ2mJhORA?`<<TycX_tP}%dyw2s54H(U2@tgM~%y147$df_X*%Y&9W
zd~lB9nc$}P^;Orjw>7sG@+5uneOh}zW841S_H!c_p39if^Y-Jd^%<8lSvL6Y{rIfb
z&*agab(^~WCNDnv&!k%Ty9ocRzuu)=-<E899hDNc>xpTal+@%o1+x+l-U<%XKI-LX
zYkGaJt#jRtRZ+HUzuq`oqQ#=|?V-iz;FZ_5tq;4k;bLHavD^_x8NMRfjrYF%-EFqM
zGiaIi*Sp~pzP~IrGT?C1f2)yq@44BxJBlkVhVF{COHrOu*d8Jk8Mudum0i+BjPXCi
z>P4^Iz5ixa@v~=5{A0N7pGa-6X8V!d*Y~`&&AS^b9T8kMZJHtDQPK2=r)6AQ*hM%D
zl_KPRyxp4{9PSxfJgZM9Z(C1SPc>u2!zaawEuCkoT?+m)+<yI6X8z~rU#0&B|M>Wa
z`(ApmqQhYwqsNk_QgPfKiVre%EqBNW91~RxeV46ia;136)-|k0Q%#s>>D^{OeQxEw
zS+>^pmo6z}u8wo9=gR7Lcz3(&^Qm@A^+kH|%FVs9XOEn@D*Wb>?MD4`cGFgdPO0j;
zxqL!Zc;jL*A(OpK#fBI2f0keP9xI<6H}!7V)!M^nT@w@-8rT+g*Ls;<7m3d=^$A&0
zn90RcB>P;>W!=%M;V(=sZ3whkyyZ*V@4eTS{*w0EB_bSpR<T2kZ@K!dZr^v;<FnW3
z>ZOMEI<@Gum#uhz)pzaG)pz}7X9g|1({RW_?D_tnd*AmSc4V7tE*7k-pphA>Q8(ko
zhn?YiMKT#vVzT`%>#JP~E6<wC;}E)H>B)mCD%J`M7*~k=@n{jg`L?|&_I^i<X^K}v
zH?!NK?~Hv4seeCSwS3d5!ZPIq^WB?_YeTYDDQF&X*?g&k;fK?i!v}R$)-5~XBcQIV
zk>D_6bxBjmB*qyJ`51p#B-yN!UCnx+$)}N3z?1#$F}?}Ta|}6_Z+W1=V6f0CWEqQy
z9XG2?6th^?%!V2BPAGORa}MO#@$kS?K3z#>&6SV1IKLfG61u1$vs6S<O?{ut1L=de
zLYNgTITJQ~y0)ifqFs__<c~wKQTuc*+pptl6r0v5<1q7`>;z}eql`z{7=&;0UtW6n
z?kqj89fxOK%{rN?JkRJHSHls{^lt$^)e9Cf=sC~UxxT~s&r<iS=1B*f7?iiHt4L(v
z(RDoIHBasai^A0gql4CGd@3i;$~Hd!P=c}P0I%X9S0``fTg45f%9Hn8QZ<^W{GvTc
zU<!Ai!@A^Sa#G@*>Ve8LIKQzrm}))ZbXsA%fon<nl${(3OahMrSz0VQKZ`nd95&hz
z7$ADJ<NX;Gi>(V7oLE>3Bp&{~a3I+uzop~*0lg4)|B@*_2c6Ouy7Mdxf7O)UuBj}n
z+@Hh1<ng&iYf%d8L(UY(qh2yj1ucvdC3g7waiuT$*tje)WP$+8-vf^s_*p(WxL-(E
z9AmO|u~1*+Y5hlSCrh=oW=cw)IK}O9fYsA><rfF$W==Mjm7WiOBx+Akbl8!we3uXl
zXVasZ&n*}a#m=*IU|?NxLcusgVOQn)6j9aVnwktA<$l7mE{Qyt@BaPp#;0#4cL_7F
zWED!q^kubeEl|*S%wn=LwEE0PE$@Y_g)=n@A2GS~CMvLNPP5-=|K`vNhT!bR3Qiq?
zvh=5&uA)vYdo5k2Fl(-xXK3vl`=K?d$%&~(Y8m&D100vw?Kt1I?#L9V()si`Q{7KB
zSk_{hz@q%XS$3k2KdhGxSa3(g{{Mw1iS3@6r_`QKVBT^})WL71cPGcZi;|@!FC6-c
zDjU^rOHAczeQf#rlGXK19xIs|!dqnw;@@`OtCU-oy#KpPh}WKHBAz$PwT|>DUn=D6
zIdZ1qT@%-|`yKbvOwF18g)(cle(yM+7?tqMP~rSNp6r)r52m#<YusRp^=Rkp@oK!&
zr2XiRPLW|;Sf;7UJmqa_iL6V$O`U&Bl~0iSplV>4ulB+E8n2U=9nF|k-*qd_7G|}}
z@mj`ek$ku(MSF&~#KIGzYZW<I)-AEIlQ^Dm>8*oesDggWqf({S4o;I9`a*?&Z0tO)
z7}B+SR)o}h{i73|3Iin`OkZ&H9AkyXw8F_DybQixuN*{#EmdYq99ZqCvU1M3EHUk!
zfBadb_>-8N7}DF*B0qFa5fFaMI=}sA65FJL2TrZuM0%Q@vL0x-B-6|0;CkWo5tmRV
zi}{*%$#>Z0-f!jSUg9q)dPK+Y1k;q@nNvEZSnx*q{G4;`jMgG%8ztV$#j1agFZosV
zuiy7es`jgOJgHJvJNt};KK9!Cz7L;$b!zRc4-3}^zFY8MmBgO4e!3pd@1|b$*?rsW
zZP&?@ZgUNO_ZqWod6`_;UGBJTuH6j&Wp8uKHa(l)di3&#b2i)8+`4@rs9xl~tNxN#
zAEav9b~(=4Rd1G`v+kX4xpn=OsBN29UaYQ+xxiJ-o-A6o$=dE==<H{gV=T8Dt@SsX
z{84P(+NhRC`*+)Z)z4dHyDKhh&z_W}_kSew@akS#xW6=*RawsP?)Kw7sho$|w}vma
z{H|xd%P1oB!W@QYDt+Far-BW8vn28|wjSBKuDWzL)3sNOM_djFcs1I0OUnJQj?B25
z%bB?1a%AHfp(~6t{xk5c*!A0sE51zkFHh^9l<8r6pQpZhA2?SrvXAkd$Sm%C7Xmo@
zR36;*STLte<(qrP>*ZClI^TlJ6?w8_R-f`<NuC`m6SbmnmFdG%Epr?W^rsZs-#-19
z=jr$LXaBPOX9)dg6|1oK>3@dByWZ#Akh)#{cjeTQspdWmJIc4T_FdmlbLeI7&7Rb~
zw=?%N*oqff$Z-VOY`G*@nsvsa#rTMuvnT61mUgCHjm|gPtc4yq<({<Nw7#-4cG2Qc
z_f0p8^CKIt++U&Qnqb?o{Yu6J!E^uo!*;K|yteeJ`_;ngW|>bh!734(O2wmc=9(7f
zI~T3foV8Snf1hPg|IEXSv-uAFDtT{OZzR7b_~q<Xy92^sJYx(oW>=6evw6BD=HI-(
zOGBrse6^SqRBZUN_wd?Zrn4jW?7N$LGdHr`*Vgf?R$0)Zd*%Nb+HB76iM??rZ+dvO
zuho=X*?@g_!Y{8~RdcZSX-@9LyFa71t2eLu&+sa=iZ5`HUPho4*CK@p$A2eTU0u7v
zIJ9a_hULx!X^dJsRRUMVhO86Yn$^!c$@0!B$AioD4@*j?UEi6vop<xiH>>0BG`^Aw
zd;68kkbTAbO~+nk=$9Y)aaTXE>y+GOor=<m(>J0s<KJ#|3f{HAX@R?Gh>Xa*8>ch%
z7`86tKeE_!zSyzOyWH2UBt!pNm(>OZu3N?#cJ{E>Za0~E%X;HkvbAr<<d&D_=x11+
z`_!+SrB^ncsn{W)DDyvqjGNu!aCgymH_L*3Z{~OA78G2$&Z{uV&sgpRV<Uqer;doc
z=tEI2anU!;uF|`<%sQ;4SGH^ilfOazQOPS+Z3}ctZ%YTwIk?hCYzL#g_MXGDHFn9o
zHmb`y`faLzdtu6D^P>xx7V|K&hD%mBmK#p8kW^FdFB8zsd%IpDHa8$7OM7N>%f9nH
zUfyXQS{<S)f;P%AQCX`+(k5kITxEAjh%+QBD~R)0W%(&F4GvGmBZ9a28JBL(XEZq?
z%(==xt5apFqa%-V$@3elR(G_sy)4o>aQ?85ScBhk&Y4r*w@B-%>nsW1$G~9a+}&_x
zM%fxBuIZdT{~1yYl{_yw8Z^lKUMP_Aa`_aIhy8DtN$^~7IVPbH`S2#cu0kL4Lu2JV
zoKHDSURl&~ziI9|v5rAMhrv*N#RL`3pmv!DN*-rVE6;E$YHwaLML28ql){$QClgJK
z<8K9U^h$ZDaWC<z{V>_X<+6#MgpkX^*GkeYJ^g!Bm>QVaZyt@kw}B^1QQ_H=Nj|@=
z6}F1(kbJ@@QKw_nyi~wslG99$ha4+pmU=DXNxf0{p@D6I>luD|Aw|9E8b<7lD$@))
ze6Bo8=(X~ilg4!Zm_+L-ktGLOdS{7fl~$B<6v`Y73U=X2zt6bY;icli+MgZ=R~%vB
z<7Y^5v#2=0uAmaqqtbau$Wr7<^}?jZFP%FZm;_B`W=)!S<j@SZtS+uOOb6VW52Ux}
z2t1k`qh`B-;emI9V$8#*LWZVBD}vG`6Ox2xbvg;O`V_QpJtEE!uFrSFM{o*b*oOms
ze-aNRh-xeOm$=;Zcs^~S?-HgbR*nU#3+A_;zx0;-*@YLAqkJUS{GPH*kSTHg3NHRF
zs}!BWl~x=qIPIgsQvd(r6VomOL6ZrZ2Yj8fdi)Iwi&$h9>l84uq=fq#J54H-IO~(F
zoYp%n)xM-Ozs9pnAd-Jk!<19<;Xhe+&-4;lY*4buVae6RG6g}K>Z0b@1Hmk!Gqehu
z|1+FF*c~i3(Q4LYBb6qh1h-nlWfNIUvRGb4J~osPnHpBadAKDlxKQMTw&}kU>$^Yv
z=yOqF2y*FZoG!B{D(hEqkT|17=K*m=t+#HD{F{4H4A*_}-pJkIvFZMiBcEJ+jaeBq
zk2;=GbJ*J}k+ib&lwu-7>*+3rnJy2HTnS-TYi^pv^618s-6u9#cN%Qj=CPvM>AA$&
zO9u+3G!?|1IJs(<$|GUZ_Yp_4TH{n6PwxpW{?WaoO331@pT+Wg$-gV+F4?(CuH-?3
zUSN;F?rDyxcCsI>JsDmK9jLq1o!i+{m*{zAW|G618IC7?I-8X)>J&tnI;i<-6mTp#
z9QxuCm-#*~VSR~~5BWQi8<iGaIl5xqBeufZRw}l84PAE5l934WToq(zc;u#%$L!6<
ziWVwm=Q>zQmI?=EUE;N}XxQge_jvL7`W^onmTdpeP-XX0*52!%&3}fs`>QP8JzV~^
zTq;J6H+*qlg2g$-WU;*2DLXo9e-`fDAyu;bYQ-h?sWAc04jLRYIhLHhDSNw8@98_s
z&u72)79PFP`q}@!<XpQ`efQVcD#!+0=i_y7GD|h=ac|99C*-sI@J(sH8Owv#2SiBe
zT@1dOIjL16q?#r4^!k%KZ*uOxs+#;`+J6QqtNb&Ej(zznw%%~Xfn!Z|q8jyW7LO--
zJ_@nWtJHtjxAU{tVz%p<o@ex`S8ZTtoy1i8FXD;c<yDD%oNdW3`oe$L*1t}?U6lXQ
ztZJUkv|X;k8?+{|Eo$<r-zHG}?7~vEMU$Dgd}ux8%%of(yQ=t`+vDZG!fRi3ZPooI
zEj2waT{pc~XQ6-l%E+fvZoS`hHtOG#vO7~kqZb(j?K`~d&$ji>X)nB_doLf>+%$c~
zcOfR8u)-r%hgW2m>U}QW6;k<ytGi6b#N@+o|EGOFxBc0<Ysbs(y1B2ozXxw$cH!0*
z9j+{<s@S;2rkVC)T|R#b-F_at9IJWpule@3yJdT=qt_n$V6`+zHf*bx*q1w5#$Qcq
zy?W%9ddbYXy~^_1lEs?mrcTb!PxXFsd6EB~N56I+Ea1F3;k18Gu9@B3dsegl?#?@%
z^?UY~@Ex98W^Jm?YM(Y+{l~szE2b~|Rkc6p-PzkS<L({pO<o;)Kh}cFWX0vrSBiFO
z*}Qs^x7O6|t!;ka>rG$lPwk(+JL1;1=!>;66JP!7?c1?q=jyiYAsM;qGyg@``r7ka
z^enku@6EDdT`hab{S*79-+rq(ZxR39_ov-g|Lk9!?|Wk@S4pq>;<~%x#kJEalcV?j
zS#y6^(x3PDb@XB#-(}_=yYqd+9+MrmXBP)Ou-uwy?_3(VW95f>!+&m$dupD(n!E2!
z+`JiU(>J!WT{i!+I&fu(@UySpwO0AMpN!|}l*wEDO4~bC`|PoE{~7X@hfLeFHoM%a
zd5(B+W_Z%Jh1uyX;k)!jRbH=kTUeOvb=^7j)9vEdzQ@iN<VW7zb#E)z+uXJ87r$`d
zh+3bK@Y%HQKSN06*S{wo-}SyI>%Z&kz4zJM-)?=)^?j?B(aW>Nxi2?u{3`WpI_DA-
z!+T-HpZ@N1uj0?VpP6s6GUj$v%;haRmT5=Fh9@jLwMN4++q*X{D|@T?)n#XW+v304
zUEF%}PM-PumHCOefmgKe+4Va5uFuRiyu9+Psq9O$#Zs^0mOp)URcQ6aZrSsvLblzX
zbXPcg`=;9k+g8?BE~(nSer?$7?#ogoFKv64UHi|l>|Xw!??=^F)p@^@4$jJxHI7^M
zJmYGuH`ju?6{ZU;_jTVg33vWCf7z+|$yd9>1OB|Z@m5#Mdb#fEcUPxwxml2LWqHP}
z{!iPMy0=}q`d!+8mipq)@~d9l4%vUFO6})1hSgd@x?jIYZjYVaxyJsPozhc|=c<>*
zJe{7EoOi3dJ9WO_hV^cXU&fg1Jh!65xl!5q)(-i;*^BOPc{l$`#ItJ|OG|xyy^>je
z`~FsbYo=2h82<ElywSFoo9=H;dd2AZb#?Im^p`L08hez@de3^rHhyE!+;HoEx>|eo
z%{RUIoq2nFbl#!vJn8(fxz1VNdQF{Huf21f`E=l{*{A)d|5%(aKiS%U)z$h$-`8Bv
z_}RAVt>6-!e<*l5hwr5>mg#yIWpnomUW)bJ7M-`-aAmsRr^(uZD;<w)XlKo+m6~<q
zy78%JMVTex+xe&7KD)xy?A4d`v+S6*WKF!W^veEoC+_o=&dlDv=dhm5qg_*yq9je+
zP9%xnQV?R=delwRvOHOGdYE{1ZTzLUpxUj8_HLU?M1n*Y{A{Z6?Fo6Xq|icU=`mBy
zWve!6WG;6TS}L>m5Q9sAOhZZtYdhmaS0lB$e*&B_*DX)Tr+9L97G!97DNK@J5w&0l
z@l{FGP!Vvs>JTTXqq(iJMM36aM}jfSlstt(%k&oE6%*tFRl@oHJg}&2I<2$0p|?n5
zW{9HEqYXR-MtR#kQZD{3Iuumt5h8T__Pf@l>x8t552UOTW6PM?Yc(;XbA1UDpMp1M
z-_55t89B}}&Qs=GBG91ZbJD1(cisdJhNeS~arS=<7_OU&C}d{3ynSad$MB=Ws?~=*
zTF)3c-LL<?MqsIo3geumHVTXkk35)Lx>E1+@~}+ka+vLudY7dy)@z~94sQ*YCYepf
z&*GC*449@XT0RmtWER(CSTkkG`4pwc&7D?Giyqh{G&LSvBDp}by`*0vlwm>|V^D{i
zyo;);)DkV0_>2-m8BdD^6Wawu3)NC4e|44!n4V~NSaap0#LF5Btdbm7`U~=eYFN(V
z^yxXyQ6$7zB({pZaGIpkvBhrA>*8KHP5N}A?a5}Y^zOvQC$9om2p(7vw9_TU$hAT6
zAcv9dgr~|gorGHVaK?!1b9iZ{@LrjH<wwBssls{!i*(ifwoF>&Y2ez*6Kene(&OuV
zQqG>1lQuaOx=gW8+xj=|Wv0VV`Ow~~iOq95?S6Fal+Z9b{;Fs0gtm9vN~NErR;5h-
zag5EXJT(1*qqzQzT7jyR3wyR^&)<H>#4z}0zN1mWvqKu9CodRIV2~_xU%x$f(&?2}
zg}+N5o0_eai&I$AC(-e#?e*5Ux3`zZPFt%S^s4)=_oZy_IUl$T-{#MrxvOU5yrX}=
z-BFztqN>^(xH9Q==F!67rWxn*CLB$gC|&iJfB((gspj3c3-g_3TwVH9{!OoTO$)=~
z?}gnz&IVqV5}c$^ZhSFdj-$^Hy}}vSwp`>``e0J0!i=-&5+2^qcCKLXGP|AHd+lkV
z-=&lDmbx=NYhgdF6l{}py2!h_LNJXX$RYHBe~lN<1<jaa7KgJgl<qzK_O;2jrRrjq
zOVlP^a&D5)b$@J7(yAd8<1R5L)758{LYS7x%*;TWFvEyPoN5UPOFWK+25~-cIC$gR
zF(tM=XDgUZBi6k?(y7yOH8$9I&S%3(X9T`2v~<(dR(bpQP0uVVr){narUbNe%w3u6
z=*f9@Qi2=H$Br+`6Qh-wR&mNCp6BIz#xe7UlZJy*MU&loiQY2@IeX8XkX&E>uJ8H<
z*Y>3=xxB6mJovWzt!Dhk%GolBEz1>RHFXM%jei7Lt2^#m|97+Z6<=L;=i>}sS-%f6
z%hmrqvHzmue+Dx<P|GL&mFf@qy)x|u@vqDs=6XgxbSpmMkia6gYL!;)I)T4u*GkWQ
zoY@y2x2!28IIyFEb*J1-_p-xll7EEncz(8W>Ai!2QFkK*LWB)<svjp`x+Q-_x;W<1
z`LnD`QeM9Nc5Lel^|v8h){6@-Hny+SxjO4cOey~+;j@3#G@tD%d~g5s{5Q{!*Wc&<
zGkvc=-&8jESL%|6Ki6Jq-YEEZRc6K%mPx%_&qcrge!TL}jl$)bDQ_K2LaZ7!R$SuW
zrSqRb(=P1twx#)3=lbT!SN%2Jws+r#K-U&+z3dI^|9bDaGV?#f%iHJwGwk^1u6*Iy
z!k5$Ltv&s1=K3pBD~;k8^|GISKY9D^8%uY84c5B(Hc#=(8+E@&Pm;C@%H}%W|Ie^7
z{9m~8<NY@xB6G{OyeWHKRkm$Q+pG|cErz|ay;*Jn9josIY)@Pnw>Wd-f=S;eTws_v
zvv<0lrMc!F@r{x~7qzZ0ROfh5|Ig{3#k<|CJ>8iBJJv<t4*W9nOHkjAcPqE84*jxb
z#g(-$FXwvo7ss7dd@A{~%J=Zs;$ov|GalC4u3NYItKGHR(=u~y_qndh6yLS_ufOjR
zc9BbGqZXT3nk_fwF1qt|Vfx8AVlTDI=UlkB?3wAhtcOQcj~X7cl0TKLoi+XU)}ymm
z>|e1qck=QVcV1@42j1G7wdG22*izpG=5Ndv@8b<TH^Xnzx2*x44Vx_(Y?KaOyzBCB
z<|}L6kKf+ySnjZQy2#<mUarNiPDiJ&xb}JT+rKNeeTiA?Wtg``l}l}2$*G-ki?&po
z=Wf4zZ`<=|UMamd@8<gL=CJ3Ub3J5vXmHqio2;tyi&lTFyPCWC;_kG&+b^y6{F-6b
zP`9e2!|ifGv8}jC*5zfU-?m%J2bHW}zB%BI?l)<-tc)d>v*I*mt0W5}55CVXDYjhq
zpW(`NvENee`I{G~Z;3Wry42vop<m|$R(w47F!ZYF&hyW%%G@*Zdv|k{#m^0g<=?3n
zTC6(GcKhH{`S&V!x83f3Er01o?C!jE$AfCQyK?z1zO-F``K5N$w~*vm*Ed(6)Fj+3
zIi0u5?eQ`_ZK-!Vx2<+t`}p9MS6@qQudA-A>@BbQRa&{&@VfPjOr6_r?>tbsT=HOf
z(=Wb`mv_G^?pf})>}K9=_uwnP9=$z!X2m{#{|i#HzkhXiK7G57mAh0kw9WF1X|3X=
z{+Dv|mu`v)U%q2wf34TnT0sw?@3UrY5f2SFnQ-6yW>0swRG8wqTe<zO%(?b0%{_Yl
z%CF*}?{+KoE*$&Mpx0cl|JN?e?b@?@%N{2eC+{%6w6xgw%e>cB>#j&Vd#Ms~+4IYN
zyI4bibFsUsn_{Bx2i^VNyZGvA53{9WcdjkF@V%<#>axhdE8gdyURis7`Qsj&6@hQN
z7thVOSGcbJ;or%wt4>}wn}2xmzt1hDSNCps_4nP>sXMnVIlX*K0IyqnChv+5XQZ@3
zqUIkLeJ&E4u=7V|fui-kFS);#uadq}{q@_A)A^UUr*AR4AGCulSmdka347Cf^KG)u
z-c^_N&)U-9F{5l*OZa+$7cXxAP>#72sy+S4@AGbd`tQuIy%@i21%q|z-Rv~=8+Eg{
z{by+ZYGvPK<=E%_pW*ReFZE^HqjD5%cXt-gULlgbb_LsfS-rk(i}=p92LG7Zy5nti
z-D~GBw_7*sX{N}DZ27y)`pog#XA;l$x_-L%?&SKNH@x3#B>i@u_}YZ?lg&%fdhY$^
z{~3&|HnyZ|?7I4U+T27PE1Rz6dBzdzh0mU2?UK19cPQe)SJriBTbBynFO#~soLT3x
z=tJ4ftE|ip*Dv~YW!CN36a1%lXV{$l5SH<I!+(a(`A_>SE7J=T3&YNRo4B|zuOu|l
zVe(T&)ujhM?cB5OKZCN>ojRYq<^DR;+O)ULF|1fD`&q-o_=uc*n^fcCX|95tA)zs0
z+_M@v=CLkwO=P;Marma?Ia85d)dilxZ0o%5OyG2_Pf}XGjUk}mnXsZ?ok3%eqvRs3
z0}4V?0WXyej|SUs=2K`5T$UP?u<%FXt_c%XcxZ@<II#!?-hJZ0s46bBV6jV^Hp5h_
z=;?<Q(r-UzQJQn`XtBRU)(X#6Pc@wx7EL)IsPb#?gefTtPHU<OIFt#wPGmSR!z$eW
z@*4q$Fqh6#H7<XAm=-QQvcN*l#;~PN!_;Gn<ij4J%M&Iol4(4}*rV&bq&2X?NPhP*
zalN$;LXVtM4JN!*O0AIBt5N6n4tVA3)u^KW<pAR)Pop573$<zjtZ8j^$zPn8TryH_
zV39m;!7Gt|D7K^cO@jkRhnLQ^U`<7i37#h=iLxx$+1$jyP!Oni|9hg6<$?*02bvZN
zRva)?QaSi{nZnjk@dH}TQ%n}f?%gCOQ1{YilU{jVoKOQx@0Y7w0t@dhSU9~$!$bHI
zQ{{Hg59jxZ7})>6{CMlsP&J0I$r@*F=H%<=uMFO};CI)V701Ocvn=2{7P#}}y@~r?
zWpB7<5x9l(Rmh|XcIPwuT}%4?b8m~z*8O(cEO^3KEsJ8|nGrWnJXztpwEKuM+tm{D
z+xaWZb|;7K%G%m{MfQfe1as0&zNf*}tJ8h2pUaycvL(yo?$Tg&h5VG{>&r5OiyN*i
zik*1=*p7<HAr|w`?)@}db9Y9{>T5sT51rHewz!wU*FdGYf3wYxqlcfBuKn?<>$+&@
z6*-0_X%6a=M;#aLin-2Y?f>?stQ^zQZLg!YxqbeucZfmed8R}X|9*=X@tQ9;Ef2e-
zmsxeG@*S7T#PiK=>k3Vsc$VImRe$nY=H1$5f%dAWESQ{J-UUC6d8a6&GC$X+ezrBk
zEvw~Ur`(9n-eI!SVZw^bXKY)nG<qYqXq3&>Y}5Rfd|BRbSLk2=En(ZYbJpg(kGgqt
zy7|L@YO9myf0=)B?Xj<G_q-DS*Vk#)x>)VW@AF+bMGvJteILtk`rTe9pDjEsVDp~c
zh0C*juf{Oy@vz=1nCU37^~IHiu~%O6>Sro!m-(Qq@g+TYd&1B9%Ujv*=K5|Im411F
zh2gpFAy>nGtJsj*=;>i=Lf;n_KmTZzE#~%m$M&F38|f=StwP7e7czIMw>+79(NyTs
z2Pv-gGh9?wRqs#f)_07!bk}OyrAr(5mOc3EZ8Nzga;J02ysjldtG!n*T)2Y6_(&38
z#9^jJ6Bmc4iq@)P_h-ztE93pQRjf((9m86O2Nt%AZ;EO%HTv{rcC_#Hb<2L6#A|Y^
zN=QN9hS6fq4)^4r`GNbteQqe*m;Ng8N6d<c|GpF+JJV`(lu;&K<Mn?A-8t8D^0VD$
zttoEqIPiGM`M2);e~<rX_<Z|i8UMW!TkT(6r7eBY`%TZ@%WrNDzrLcR#YLU@$DWIQ
zm$*~zB`U27c~*OI>J=WId*%!t<<Iv!Pvtx;`BWn3u!r1Lwj^troZQ>BiSmVA?~mIb
z`TL+MQ}=XRwshZwm0{BZBdj{5zAj`bnh@of_3hZKg{(Y>G<lB;OUKP}%b%urUWaW3
z<70+8`5L{bwcV$G7JuhECa&<G!B4$r;iYY{p1W?`%L_RgExY#6gG&WrSJ%y-bZC>w
zEqk-Yb#`vn6F*+M@#g$@x9*fBnh^n6%5(1KtN6yu56QCnz2?Q2ZTEjw@n@Gy&&o5I
z^U|!<hWGF(i^`puhWF2`{2uY+K|#Wsg12r_?&UxC+>hKm>Fzt9D^-^dO3n?u_AOe%
zY}&0nbM;IwS6{2BIoDpT`=+I!`fql<;r-`*kym0fom$<kSL*dfpDHSy?5wr@@`}=s
zFH+I}8T^0NO%<D7T>16tN}2POuXdllHPv;ld+_pr9oKi1e|~LwWV-D;nT0;#ceI3J
z&wP3=Hoe$7cyr#8XXVoZQ+|Bczj%MziA;`X{}}{LtIoD=J5zo>a^K$8N3Wli{W{%S
zEg|N+JZ$CSs0E_0*{(e0DO>B~m2-Ld^bbel3-6WNK70PPdh1i~T}SGTR!?c0yy8WM
z{Iu1py`!ZLl?u7f<T#ivtG(uz@zNYC>#SGzX6{%UIX!e`^xccA1Gh$9-#68(&(x&h
zRcU5uTD$T4qZhU(9d1eF-90Z`KHK=*)6-k0%+2&)EIfbf+faomKc)s~^>IqxPQANz
z@6)YEzi!yOX<^{hJYAVb=R?;Rr>|)Dj#>G!PU5<`v!hDi(T9ist~imL^0A>nK{LQ|
zvqQ4;(ZGibS?&hg8eXyW-@f_jTJGK5<u`)v>m1!Wb?=I=wp!CgWPVpUY%reARp)X4
z%Yi$uE^WKJF3YNQ?bFunE4N-xOnwowaru=3b-!tQ`?9l4&-`cbGWn~xujKVF=j61<
zwduZJay7&@Jw1~){o?Wk`+c46>)*=BTd#ZbN=e?E{|qZ`U%jpWR@QO;lKSV`d%i}7
zSiZS*@#&=#k5{h~3hK&T?iH@GMZfUiv0klhPyQ`BBP?MUdE-s_-o`bz*R4DEe!Hn;
zc5$F&xo1;Up3RnZQb*+H-1po5XZx{Sy=6D@zFfY&-P(F}*V^KjL8(XW5)Lt+*DT($
z{n69DMLWgC^=2+M-gbFO&DE>Y+H>=kMS7m)SKXJtX>pKRRn111n^x2I&X0H}Yq%ro
z&aB^a??$iPw==`+(Y3jIb@j9F)T~?S`)AG7GcV3Bz9jPE{eOn<vCrN{Jug^s{8Dw?
zgVP45ntjEB7v0y^{KH!sHS5mvkn(5tF*$u-w?+jn$^Xu6_5DkP<BIEsE%SAqeP^w*
z-CsS+Aa2jTvX=>ymf8EsdOwuc?(!==%sOR@&EwC!)9k;uy>z>FYwqgI&`aM8n66ti
zd=LHYd;L{q(u!XqbE2NLWodZGv^j5>wr0`o&$=tG{obiQ^{d1;Ue#5yUh@sEFr9zY
zHL-k#h^m9sj@rQO{ePshV^`ahh@S79W;f||^tt}ryV~AX<zm-cr)ppNs<rX8M_uGs
zO~Dga3s-)7ysl*X*}scRGM1K?y!KxBb85!jh6mq0Gj8ph*Ko`z{#@Gj*M0sMY^DD*
zROU@LD_y<o+^tKEJJuH*+cMWn(s#}g)4oEhpZmD-x)!gLGL6%2`0V;@W|auvvPYNt
zWcOU;o|b#AD09bIl~*$!Te;?SP060Nd{b`utWaCCSd)M?AD-_p`88$Mh03rUO!Jm+
zp6!#%$d=IY@!Iigwa%AsMQ*$IYunn&6*+qKX3ehzx}p}|{G;Eu_Hw=1+Evrc_NL#w
zt8AMcJ?Yh!b#j)x*5<q>ZzETHdi|fFOyOPp=06^PZq)ur7riZ5GsE}N?c3^8Qdi#o
z+~s~oZ;tiU{;$<)YyWnHZ%Gd7m_5s4-~4&=KOXy@_$u1=+gyi(FYaAr{%W@5m9F}m
zJw>7U_m)eRX6@})k>qjH;_Nlssih~LYqfrLNMO<7&(X6Mzc_EZ+Jm*tgG1usQvo5@
zf<wu6OvSy=!*ZF^?!7j;*_m0udUl1UX}@}`{$H)ev-x)}O1Vg-h%Pw8DlzBrrQ5US
zC|f*L4W6j6<K)qo4M(zmI!x+0zDru=xaUdhf+-$>CEt8RzqPtHbfkDVG^Wh^>C$2#
z+@Tt-vmjHC*??)4pNfp5mfBUd)XE(Pj_z^k)P8s@fMI9q@s}UkCJ5|uwchaExg$eC
z^1R?ond*g{Q>QsBY42f@T03V7t5$V|bK4{jl@`Vrk12}A2A*8u8?6l*TrPWe6`rb5
z>`|C(pn9X3MO*KO)+L`4nMYO$-V>7YnJ`t6(@%CnSUjhSh$P=l2ZiapULI;L%Vphp
z6ohZ3%(F9SSMHlUX*%l>;Ug?9X^b(&no|}?IC?%lA|m<Gz)6y+UDcCkLtB!gjKTNA
zXO=KNosenFZL@%}!$}~`x_FB9j4hl~T<3BA{cu6VXPxclCdUjem!{`m9TYS_Zwq!3
z7M)^qK6tW{nxmXRmr%(gH7AC7k9nVSPSkFH*T($$KZDWwBYP4Z;%=_feAmh#|NqL9
zE#<wZ^QX?e>&56;9Px;$l{w(B#n!j=rd6j7&YS=0Xc>$A4ACR}dxYW-i_C5>7Fe=H
zc<FM+)o(ig&M^DT7aDT<uB>>T@J>&iqq{uSs!l)s&kzv)%C>9T(-M2r$1}Ma9GxX)
zov%rU9W8b{_iK0NtOqZz1O`j2{4;Y0?@L>Mt!q{b-aRusVfCM(rtCk%Rr9Tfx8A-`
zxYV`aiBybxbxZQr<l38#p(^)Op1kbkUVbL`bjDq?Q&V+c2d@lWuxloZmBnX|3(jem
zLcCvf%=2IMDrbT{rwX6tvAFvczpc*fOf`&l)2%tCwQ+er|For|z0w&yPnKM+NSwLz
z<-MN!zuHzyXXa#FR4W&4o51i}S>oMY-cP6cePYZDR~BW*vc0r4;Ym?ic<H{`Hi27Z
zxA!uqJUkV5Y^~m3$K*9{WnDMsZJG6U-_xn3tX5pdthei1tz5f$v(nj(o`LVoL?t4N
zUu`*giii95s<17wg$2(x_OP5e<(O0!oLeRn#l~m2{C?TQoj1a7uZs!VEOXXZBk-WI
zshq8(%HI{A=f{6_-Zi~yWn}xmNp~}EH#T<|mjta6T+6|mGR0(O;_t=9`@Z~r@0VA)
z?e?a6?VtYk(stsEcc1mjx=pHdT64kAn$!Ks;k1Z=MWJ_InY1Pes<IwtnG&}zAW(GI
zzL~numU}+aG|33tSvJe$th=O#mV4|^_e;C?wtX<~epNSD{PLw^bG%kWES_kwVrxv)
zf`?+aQl~$c_v<~LlJ`8{$7*kP^Bv(C8>Updx0@98wl6UM`-i*jve~zO=hvYeUJrWT
z^?T>}Eq!rydE~+~1vAQX7R}xF{@P#LO;>mGGOm%zxF{4np=ZLAn69t|lXEYYiPvm-
zt(nrNf7P1p{^e55n`RI0#0#vqXgs7ZugiURb6Bc_tzqE5*@qH#Sbx}M8T$3@xu=y;
zzuw+j9bQ>|X9ttN#&5&Rbzv60``(z|y8lYtYRZz5rUg9n#Eh%Hr2c2<o;7)OP;=d7
z_vM~-xxrjV7tbwKj`pv=lX+=!WaNz&?RmBvTYZ}Co_;@ec}>K}E7hF^46_!r{65Q(
z9PDwTAwd74Y@W>Ow9Efu>{BJyuZrEV-z)Cpd0(YuR+Vp0lGQVIF3j8!edo@(d%bSE
zR{foEY~4q{S(BJnY01U}pFL&}^T2D;I%XT&ll+q{E*`f%x%`fHg~>0gd7UdiA77lK
zY1X<|`}MCaFSoE?UUaLZyV-$PZ*AaIspV78eOcU5svh)C)_+InzCD$EJBzOJ1}rt%
zxX0GYmp57ZTe8om<-u&z_A_vvw0_vITd@B^xWwU2nr^EfMu^oMf3{3|^Xof)x2wwy
z-<6(bzH!!P*|(i>A0F7My$stFHUE|MH0$f9-u}9`-!t;<y{S`u&!_LKf4OGWG^5p1
z_iTOf>OVu&`qg2xv&_S9^f9b6)~IIrt*$X?s@vALD~_fXIM4d_Y3CAAzRWm#k<hyH
zXWQ<cv$(cm>h6i%p`nM`mzL|by|}gIdsv>Vh0a<%Ewh)uSJX=RE%A<iRh1vQeYs!i
zmz%4m&(r=Zd|6iG?CG+b-(o+!pRJqaczf^M?1I+?*CxNr+>tkJ_Jq$rOEvyZuV1<F
z%;WF3zx~~HdsExnop<hE<j}0TI`50$A&<Hldvos_ul-fMwCwNvW!t6&t#`e0cSlT^
z;+r+k=9b1dJkwtjeR}oJ+)qVjM&7-3W_LpF+@7?o%jEabi*t-UwGz%PzAUd}mAqWG
zL^Ahp`q$P4Vf&W*Em-PRGv)N`r6#-QJ6ArkegAa2OFmzzRGIFr_fn;6+m;y$#ch_j
z@7h~_^wjacKb~yemtLwK@|EqHUU}i=ZMiF7hOEqpxm7YdVDbFW@R(WGKJUKtW2f?}
zH+w5TeNSB(p0MrSozEUtduxyFz8es>X8(-sdQWQIP3Np#e&TIZ=4aP(-TU3Er_7m^
zd)f7#;8Lx*Q3s`YFE6)R*tco>i|=oLmtD_vxL7*%t$t|hudi8)u5CP@artA_p8Ls<
z`|_{q|J;1}YHqpi>C1oZZhgPHeb0u)9=A>3Wv`gEb;dQb1%Gzks<)aQ`K@x%iT@1q
zVs%cRUba1V_j<S4&N=UvJdUh+c;%HqhWVSFv7&cRKVSB<?9`MuX1BAmLODdv7QH&|
zbGZ5DspGHn4-0S3dbOzRa(4LbThhxXPx8zR>a4$N{qNQ1@WQ+6*7dD*T=d{YW{k7u
zw96+quKKDuO>9=~B_~ng8NN|HpHAtS+9gE9Z7?o)DciSV+O91bQl?ygd{-QLZup-;
zB;0KG+Etee%*$^V+{wKmlMxlR>e;31|K_iH_W4S(`00<wek@z`XUdzXiIa2x#CvYL
zv3=i$9b493+Rd8Xz3pyQ`j;!yR=<?W=g;-hSN}JA=AQG{ms#_B<=i)U@lGt<*0DPG
zuZiWeZx=PCr<*&99AE3#@}I%@a`Ar#``Ldsp8Hlm>-W4fuS&k{U7oXLW7g!%T-Ph5
zdg@oM1|B{B;ppA@wq=VGUQIPy-B`E4Y%;gJ%lVo2Hnp3smH+hT;<UFr&HtEezFesG
z+{<-&%&W6cub+->uX!cCZqY0Lpl5qeCQB#W>dO@~o6Q_0yHxIxbFI61Tu0u2hELr~
z%CqhL^EOMHo|d`Jz1xjdH`Cql%9X9ZW`DHEy80(@viGxemv_<L@8x$~UUaH2muuQ~
z_sc7;U$H9Lx#IcD{|vLv`s4&|f4Ab^FZ&hWmX-YbQ?=;OtC>sPGp^nG>Kr2WdZml?
zmFoWtm)6vqd)w*T1ZKv@h24`ZJ>csVdgt5LN!iR+ub-ZHnY-m}wcgLn#Q4h9FXQum
zxBB}%ymp^=?pg&~`|jv3`)0q||HCAITl}<I)eW(?VsF2lE?(Vt?~v=;^sJ?EJAF-K
zea{s$-(3A^-Z9fHKlfkB$=?6&?(-G8<<@qqURdAS#y0bd?o5w8I~puwUu6_$WtA-p
zTYBcavE|~)0k782*vluq_4?}a?Hih+vnviod5M;a+sJvYe`@XHrS<geOHa-4V;-{)
zy87SR^!~uBdc&M|<+|(E`+WI!KC^a5o~iHK$GXg{QKB=R?YMKLQa3qk@7hxSz<c#i
z{}j5UpUhjdI5qBb`M%AYa<${HNck16_|Fi`cKZtNrDK&b=c8u7GJj+Eap#Nv*;{fo
z%(Gt|4_Ul~F@N{%%b9h}XEbAs{npytnHq6p%bmM9b1Rc~zOA(_cezlkadH3McUd;!
zi{xUSh2)yInJt^|@A2nWT}4moiO0v2uU<X*_=fF<*{e=jt@X3LG&g@|$=|EH&!+Ed
z6{=O=^UCbk`DYd;ervUwr!B7Un{QK<oqG3e)%2XDJ0o_~B|ENkv0PVbF0jip)1t5J
zINK}zsnO>Cx$&#lUfp(n-=E{bTT2Zd#-Dw$satdJ*O}}6mwlg`^ZU-mxSLj|t6sdW
zn)X}u@!qHd%{~TV8|LqN5V_X<Y1Wlz$8H?reZOtR+>mLW|E74|nX}lt|Ezpl+>g^6
z-tITjl`8u!{d}?S)?}X@3#_!Z?6%`uDs%m-*}{X1cAM^)wk3<pr_1B-iv@Yn&%W*s
zE4}u%K5X}$`YV-H!aaBYy-BFQEEDtmYP^98i(<-(zP{!1)3wbO=Vwidt&-6D_^>E1
zdRA`0D-TCw=NbMbChRO>3t}veZD0Of{#A6Y`IUmwEcZ*R-l$zX^@;sf{?Dy$=RG4C
zO%8djw0rT|Evqmn(lmH3sHy+rfPkQ)C4-UE<`#xa9wA32Y&yhj()XzA>RJC50Y<9;
zNe!XzGm-=~IJ9k+&rwv@4rJ3xQJ-~?gR|gGWI}a<c?7Rr;5&;mzY{x{91jbtxG<)2
z^fYj6`uCYtq?lD(H`3{n2dA;oBQA|e4hGsDN1Ya`dD@+DnVB`gsro@f8uyX_rjQ3=
z96>2gkA(NKFiAcuzOX_yq~H|KEYTyH6F<!Il5x<RQIxcNzm7tig;KJqqslbpV~o9*
z7EHP#;lt|8mvP4PFw3MFE;`Q`&n&sp$=dFf94;Wh=@`iD!K|t9AT&&R@f2R80>?*6
zp-;nhXlew1o5RGRZL}~|CGt7Lfp(!(qv)f;5-gcAsZ*YI{cvee;7D7@G>5;ugi*<Z
zBY0osV;*P06Z6IE`{e`{yyTkYbE@xwaDR>l!~d&~MGvw}R9IB^E>BQ*=R_8kj8EtL
zCE7M{EjVe-^;@aBMC7wjh@_yx%i}@X-}lKq;(r+19{kd7+0qR)JNFgKxL&>+xFuSt
z)b?o4ujCm_&*e(>oz^M%?4Bi@r!~ud*;lcy<^_UF9j{IiIIO_5ra|IE{9V@B)iLw$
zP4)D=&djvp&l6+YIW|gZS-J|3d@dXRP87Jta%V!r(-jNeo>NQS86x7@`J*~Xpf2<K
z#H3uU<Gw-1&F02BO+KNx?!vsq&YN}h^41z&v<tF4`)ZnrP}D1zmqCmwolA<V60-eU
zJy;eTw@F@FGk@9huijZTI!nC!GWOpT*zJG%ZvH<*Rd!G19bfxHf?vi?Tl%@{>VF2`
zE=D~Ij%9D`Ew{xUcHQ<)Ha*~vKiAbiPMY!Nzb4+^_Vce^bN|aV47n*rrRhobt&1#o
zF+94ocWzUWVeC4Y)J)5(=F<z!i~73V%I#-LyWPKX+vD!GS!>o7=WySMaMaK8-FPtb
z%5t~e@^SN5`YxHi;qe6J{qK%lJ6v|hY+rWNSBc|ltFli-%~*KKG<@p(;;5zN4cD(|
zJupyHmk!#_&i37HYk!Dakzb#M(cev9)BRqjn&0LUWt_B~?~(Q9zSEayZ@XWf$GbLc
zufZip!>8=5!bY>E9AqhtpLA^X_t}jrb~Pkzn>9(wpY^fC`-nil!pdN~OE>P`_1OBx
zy6&^r+HCvKW}f8sn(M1;cw3eKGi;dMRe!{BiLmVYRlE9gUY@op{m!*$uGKo8nP-o;
zgjG#W+3;#_+veOkw|ZBd4{CK2)wO8nSas`WnYm`@O7$L@BAW*bb~HCW?$SE$_cJzd
z|MES{mwRtrw|-ZYp#R;1{2z;F-nqDA%lfcu55%r5@U`l-iK^Rn-0Nq+Bj2y$%a?FC
z2_6wT*`NN~pz8D8m)q~B#J^Umo87J@d;ZR;`pDH)v;H1gsh20I?XuYKkkp|V8HQ&n
zPc9!+uD!lR+I{7FiPpGVd6A2E7Ihs>U(GDCP%*fnH^zF(!<+LzE%f`l)TZ_K@9r;8
z-%AEQTl`gjHiM|c$F>G8U&mNa<(7qBS$#%NPs~%k&9JcC(rEfO*W7KdKBs6pp1l;@
zw?M>af_t4dM=RsPI}5~FIa(g=DQLVU{z9OC8QX-exyxG=1Xy=XczQsx%!wt--7{VD
zufUeh2`@dDJZQZa&li!mXoq*h-{lNK;=*Sxg)p^Nlso0RH+aYND!j^?qT-S+lD#DD
zanr#NEl$fxojr_kZI}Ktl<m8B-}C!-w?|9&D(3rK;g!lXSRJ)C+q7OJY4xmTuLTYa
zhc|CCR5uMdt186xRf2<k>WR85lY~4|yi1FXYQtMg4hpfe#He`{Hpo0xnCqfdcSymU
zt8xZ|Fayi+m(8y>9+|2u$@=z9o2Zf0b)}{yUcobd>|U@X<fsG7O3N8Xy!@PG9(X*y
zd7D*Y*Q`%>SbC;#Ff9wX#`MbF{M63Y@6NLeLs*24xjfZ4>~Lw{>fN4pCmnNxj%%#&
zJkm7zee(v9-9DGgnrc=nIm}dl(V%tNl=+~^oo8njF;3ckT3;u5+p{^RS2hJqou@Z_
zTg--oMHeik+Ib8cmv|SSTAuV`_P?dC=1zONEqbn3^>MFXq1(Po_3P}}Ubnq&(zjV#
zLNC9_NX@r@=I*-f{x0X0Rvz*1&TlQd8`xL(aC=-x_FT7J^8<Uo@@?DOfAz)7%l{d)
zZ1(5w&a<w*A9d)}*0o-}rCNOp11=ZO2o9FXJaf66<M7wG814xZ8y^@RnQ>ZAY07tX
zmswe}O4UD46)HWm?W^C(4Y}J(=WcjE#hlyydilakhtTSk7i`XyM!ieUv^@Li#g!M=
za{fB+D7kz4y_xs)sO!bPR;??(?OfUXa&PAR9gn4qo?GZGZd&rd=eI>cFBf;(&7YHB
zIo>(8F~nH!yY}u=>()jstc_}!e#N*_YwyXklatu4>EumQVEJ6A-1tyXkGp5Grqe06
z8a~fkiEBiKmRboE6_w@`7Iy}P2+TP1VS0+1`9qHI>FxWNPADxCcw?Y3DdssZ$L5mM
zbn}AsIh=M&d3`fZo++Bx@-tRkR7LS%s^xcA@g-jGeXltEGA*rUJf?p(L#x%;L8U8Q
zH70rUq0$YzrQ;*dKh?iizUr@YsgRS$3Sl9JgOSQzZBrg}RU1YWH*kmu_VmTLENEP6
z>ciOi_I+<ElS9bPs7H*Ef793|EO_9_B0R;`SVGiz!uLag%?>OlYmV>sQD9iEt{CIL
zDR_Pk69?xKEv=d9_xb8L0$5x;zl#d+O;z#qIUv-)&|v9sSn>Dg2Iq4s>p13bm#7FA
z2=sP0VAfAzQri@Fr}1$^o8r<10-Fz|Z~4!VBETdruwzckg+k#b=M<JhETO``6xkY<
zo+zAP_tSwPQ%i6~Q$p);i(;;+<v*ABu&}wV@+v<icZrd+&8wyC0JG(UDZ(cMyt4Qu
z9FiMiHoiF*;68(eq4RhBK7oUR4sRb?9$?DZbI@9Z$AVu%Yl&q@%fm@Covs)gmwC=A
zXkN_{6v)4gL2AyvdGoHuziH+FF)8+hVJi>+|7(v$tvOU0b~M$!&wtdZ!64z>FgxH>
zqWb(2v&Gz93`-seObRG-T)M>cq@mqmS&xT>`wp>8xl^h+<CMo@wwW#uKKqKK_sNLw
zQe3L0JXf8Q<&FHCBMYYJO?osl@q5w>LDR1n!dZkC^tx~|zK=<AV~SQ@vVQue<X0W9
zrmk^jTC#V?WB#lOQVi;S9D7eo=Kcw{K55f-IeX3@0X=q^;_h_M_h(+dp51*nDr?uo
zTpfW4>pcH6@PtNuUg@ECi6g*Z;*lRNQ?)Eb-8h626g)P%dfi#9v-GRMYG0$O@22Zk
zU*Em6Y}MCSQd7l^-qg*=V*0jofyFZRqYe?t4v*5((zqV&IDBupzocL7Yq2#G#b?YB
zX32JHSWxh;$LdL1sDYxo-HFqx9CqP6VioH*8D@)2oZt{>`$&6li(7|JCp$x^_3z|O
zf4Y|)>pk}JhUtT9>9!S*RUI9bI{a&*cU7-i^~Tn|HaSzfd%H-*@fjI<|HO`29tsz`
zu_ik1(+QKg@y_X=y;s@4<Nf<3<I45Slr1kCcEx;sR`Sy)v#NNl`MEjOnNinG*G?;&
z>aYJnPjK-HJ3CeG4%e9Lj}94bKF+E=>*(dJ5(gbut#RGEJ-GJD?6u2&*8laoHF06k
z_J0o-^G43tRI_$f^v;-uN7pTTRXdgJu0N80?{3c?+$PECXT^T%`LuVX2bTDn8!o=$
zuf1IHp7<ZbvtLvFeV@2$t)2CKaXjnw>6v@uubknZY1JQ?Rq(V_puwkCPg`VpvCU4G
z4tCBJ7L0v@B|m4~m34R&yYg7GhoZ~WgDXXrCoUG?c;@-=Xp)V29KUG2u940DS9$-`
zR8_<!7tV9$6WG8iJbQYW#e`5rPQ@V3&`3u8w`~&wCi%JUUFP6=lzHw!r`DKj!g1fu
zu2gV2;IK!*LP1wWLoI7Zfd?xe%W0zt49@c#0{ASMT+BR}&P$%+K0NF3n#}SU+j*)L
znc5=OEM#Ds6KK%naX^UkOu5rcEw6QH3^RYVFeuI1sNM3pMQsl|hlAqr4a)^@KV%hM
zJN?iqKSj?;2NailY_cwBnvv!wAz2}lm?`A>;h=ldVFq<Zo&FRZm*qD(IGfleHQH`)
zIdZr21phh#jy2DGdOjUvQ+4lAJZL$Q%Zb-wea;RCgNl<*hb&I>Ed04We2VC$FsVuN
zoV?ovxSC}AA`dD2d!fM@WKpIkAeuI5&Yr|p7ADR^LI<1}GT%z7s4zLEPtQ>hP-cGD
zvdO{V{60ow&Gr;c>kXWT887*yzCTv*AnMVB3w!rYI3w`*_ruE!Q>S15JK?~FlAP_&
zmTO&H(~@S=v41MVvxUZT2VOk4)e2sbb4#H473-TX_v%)>bP!s~F`>-x>I18r%RTK2
zyQl2=&+zcqF~yKL36F9wv8js1^JLPG$vMqoTEC&zz*%%h;w<5Lft!|P@$#rlv|~BM
z(xlQgceBu}!>0t;=A5cfd8o)ct3{HJwYZHz_?gSm!WnPcGD}YRC}}XcsT{GGu;7ov
z=}pXTib39Y-%eq03lUl%;u_fguAN6f=97%SAfJvC1E-OvtA+CxGk43%P60-lCkv*$
zi924)#K_0cG;@`o{8GLtyweRNW`2{<XwecjFh0ki%rLR3J;8-jfbm#ye2RrfnTATw
za!cnf&jpJ)LzNO3c|w#GL|H1AuuG^q&QMty^I=;2v!*_WCm}hVZ`Fhv9*Cv6rOR`$
zDKv<vr1@+V@Snl>bkaoQn;g@YEee>E=Ce?ErA)&Pzl1rd3_Q-yPm6qL<-71y(IQnx
z(V6-G^~a$dGbQ}*a?M+CSkr^&$ZwYgSMw%>b52_P==-6Bu&k5`EguE1{;p%;%;5E$
za<R~(V20wol!=}yu09K#r+<4a)U2Mx(ZHE_aAQ`<H_m6-t29^f=Wi>Va9FKv#Y6_a
zA5E*e9&{>cyzPu|2za*AsNkfRTSTJew@H->MjmcjZk9`Y0|dA?1o-To$n6jpu)w#b
z&|~H+hosLU%)2GFNqYEHGsG$$^I-a+W9`+nS#w_O1?eS89$F!NpIHRD)gGAi+?kgz
zu`{KkMVqN9KtR^e;^3-xnWr3Wl64saih>_YlrHV```VRXqwJF5d7@loj=wY0!w$u;
znk1%%a}{3oF}^OVnmXTi?fB0iaZ^)fg-}rZ!lDj2#-c@fGVBs_FL4?yHuB$Pc<-uH
zs~>w$bcKh*B!?LfCUrkQEj5+xePmDBtkho9%%-%K#s3*v>(7b^bTJ6a<*9Gd->>6p
zyyvQltkaAO?+#VY?)h<C{c6;IhT}!q{~7#Wn|`QYwe|j2fj<{7|IM3a$iG;9nSxdB
zKaRz=TrY(rkKTUPJ@4<Y?zmP>jSLpaNqb-2HS{QW`qqZ)s0`OX|Gs(CF6yO!6|WcA
zDzGr6ZvVT6U`4}*cP)mK)-fLwIkhXx<@uaUW9}^;6YXy*Xsui1$vmZ^!(D)9a({V>
z<Wx`n6vZX?m<q$ig_eA_JZ``HA>#^<P-n|ziyLo$+dg|3VAN_9{>al$-`fB5uKrx9
z=~i9KmQ8WCSXI7|=We`9#`dE-wQMGyTY3BHD$6KOCg+|{J=1ch=g(jEXvs;DiX~;M
z^ZGVz;a#S=LEmgTqXD1cro|g>W)yERT`Qa&#isC7a=z)cPHE+X+g*2VD`?A^zSX4d
zlH#haR(81;lYjAkYk26h@ZhOt%Ng%Ywq*;icJ@BNqAay!%7G7U+mn`kEsbLAVT|I^
z^p`yJ=gMhuzC-TJb>~Y2GS&G`&Xc>rf9u_hdua~>zNx?O`&f86ET8d#$;z1wN@{X~
z!oQq4lU`iav=ry4pFQi+l6v*jZ!!uut}!qiV%|Jqdj2Ny7q4fE?O1Z?w84ItGWRcw
zxxd&=$WUQ$S$dn}%TxuC4NY9OYbVYZ^{}`2CG@9v-E#SVvtw9vP6#+X)~Gh?Zr7VO
zRYK`(Vc!9(ux`e(Ngov^O^Ca3V)^Be!;L0~J(ayvE&VyVpLwga|J2qto_P7?>h^}W
zMh^@kPUd^Y>|r`+{Nb_e#Jjd$Q*Qe%=yB&v7v`PRo!P!`vj0?Be^Hr4JJ+*pJH82>
zc(ned^nse?O^o)85>t1j{hPdHn&?Rbk9YnXw<li_SQp?p=i@o$vg>KDcEw%jo7c``
z-Q!xcPJPDGV_U8ts{FU_Zn1-W)a=>Ig(iJJraUW3{mAhJ@4iV}y{XpP=$LQlu;aPN
z-p^0}nrGF{;Hl$Fs#ufJZfK>sUfA>;*W$AK&6aiCI#0X)Dv2d`B_-Pz-pc!SYuVJY
zL<0xS!$K?xVmnu_P<+rSb!GBVs~PH?u9v);EY}&u89Zxn)M~Z;JApy_@T(@~J!f+c
zXjNN$bLp6^+w|bVeI0EP`#Hx%H3XkuQWtEFmG#(Sl4+MVQEAhh&&MqOGCT~M$?iSt
z!L#W?916{rW$vjf?CpaR9xLAnILT77*Y;=HE|dQZXB#I^+t~eT);GZkD=rCa4VYxV
ziAyAisiJJMBA=6k(}J@V3k^CHd{4YQp=P+_6ce{wQ|M{6HG33`CK*mRXnaoY+DV_5
zsdv1*gx&PC7ATzYsw_XC*uruseO9o4`okth_eEz!W=!(q%;e(U%@}L(;V}b)W8pK-
zIR*kwD?5Ydzcp<U4PcN7uq^by&p#nxg<3=UVRnwBc8#lN((OK2lsPadB#DYLdpu<B
zWZ%4@AWooxLtMG*gFxCUC(B~yyF84`ohAq#&@>i$+faNZ-~{t6^|u1=;u#IPzaE;Q
zFj=Y4V$w_<1~<>RAdXN+)klx-v#bm`Kdbh0j=+VN%{u)(mTR?CWQ_IHRh_yj4lwh4
zVo?(LRWD*{v!KQ2Qr($H4hy#I-aebrj-ew%rRjm%sz<^W3C7`C*=NN!cO7b1*{IRI
z{69mdq67Q?3r~ONT`k@hFYRr%*4Juj6aO;Puez_It(J1Vy&N@Dcg1m!CRIc6`BQXq
z{Z{L$Prs=0(DGqV#J;=|=QVTgExf&Qt<)0kTghFYu1@&RAd*}D@?>G9#b4#y5uxe9
zleR8V(70rMu-!yx!3lxQ4|bemF1=E))Te4|tl7qeF<wjBRKB~uPXC#^^UCJB*Lzjx
zt~HxAZ+6cGUayKeiHq7B+K+ry+`6Z)FhD*+uX5#dj#4AeZ~?C~LN1<lnsvJtY~?eN
zdi-+Iswg{siOl2!SE5SR&+y%M@xJKMpG$s+U*gH;;GY<D^>fPJiQ0$0HH7kMw_oL1
z##zW9I$z^WlSlCZ8;{_LhjiNyKXP|g_|(9s@K9%xqhVVsL$Rm(`<4gnQ*@p`NO5ty
zo@G44h5eAuS{84Wr^k7OvZ{R&uZGTP&<NK!z3j<xp5kMM*{w%J)e?g>UCsZonYb)a
z?#gnQxi0n~i@$%A_7sawaXks|kYrz`W>W{A4?VMjjU*mUIP+Yz@Spk}^NGrB(;8bE
z*%WG~^jJK!IBCQue0V?KjgIY17QXd)H!c?@xC)r`OQy>*KdsyU%3NW?&eopiXV*#X
zztzR$aguS`VvYSZYt?_PJ&^s6ZMEIykTZ3{GVu}dzyA3@{PigJ%YTMfYs>fFOyfUu
z>EachOK;OJX3aae+9${>O_;O5W6z%pS?^0YvY35NGp#oKy{Nb4@=+028TL5Y{@)oj
zNBvyyA7P&IW0}FPkocg*nU@o5!zbJdxnbEoud$ceHfrGv?am2D`fnf8yKBEa=vIc+
zw<oH<Bchl72*^!5yKu*@*O%)*{kgJFFXN(SZm8CF*)vzom+570xVrAyw#k<9OU|D6
z*Jk)^x^tIVHM{4(O%G-_Oqu0iv1)~}r<d8fTJK+v=FRt;%-3+iz$J*?WcI&1(ce}F
z`t_P^D4MXs+OVfQ-&5zam06C{>{({kZYR?-OpGSvG|sRPsxQf#={n`MkEQ}kUhKMv
zzV3ruOer%GC4H>!NiR)tU{*>tnYU}xmsP(7+jLij7p{t7RFz`W{OabN(IOQU%wl}F
zCDrJTc+r9^CB2%&w`tRrZ#jtuHo1rt>J(qv$<;hd!ueLqdFSB!Z+qG<CT8d^`)C{F
ztRp1-YjxR)Od*Hn83#&FH+MS}N6nh5`nKD^TgJmO^kFlr7jO2??g_Fk3<0Tp4##$T
zI1~mn)fBaJP34;)cvgc`IPkVmSMG&aQ=<Z7xf85WGuCfk8am07^YDI;OJ`lVH*o98
z?~`8H%NW)8s$|vVZ^8RyUuP~|!%`sC@#er|RzWQr4by9Ht0$QGJ`&z;p|_oP<<d3!
zMj8fjhxrzl_<AZagdQ#Q2-Umg$R*=0++L#P*6>};a?96AM`L4-)CN4iq?hgO?6lxe
z?8avQ5}t)?IWBoM{|l5bwVJ@ISAC#(L(0Sjg%PE_nyW=sKDp&an_n(EQS7sjgJ-G4
zR}GUbTOS@c!E^9q!?~qfWTwV(R=q6b5<k9AVdtIMk@bGhLQ9$qILqe7MKt%6NENSI
zrIKrE72}zuB*PFWv0Y^9dOeAg)m<IFyH`n`YdpXZ(9Zf;qd4vR+4ZHK0(TrwOi;4=
z-g7Z;x8MC87uR~NHF%o0Y|kUHpm%+ldrL~am0w+Ww&3ydFcpmh$D`N!a8+^6RAqQS
zqqp?y#I;!;{n@8D1+G$Gda-u(rtalItG7K?`L5g+d-K|sHJVp57#TAoEd-};Qed2P
zwT+eMBF}SK<`vtjN;TQTmOQiV6Io@#TFl)yEtG3UZR4}6Ka!fGCa(xBwOZlLne`-?
z$zG&n?OD&{nWuN>Cm$9`iMlLq9`?ngD`{<1$*qp&nTux{es+&v)%w18$&z_j*evx9
zovprhi~VS+<2gx%isHRz`?fEc;HUk5PHC-erP{AuGVh{~X>Ul=VA*jpWv27N^8p#2
z3_KstI5=@|>s<b=(CaXrM~#79!Kn9Gz=>iRp5xa!L?WjbemurE?;P{pS%-`tF`Rfb
zfrCXqb0VjPtJi`6i}OdEm;+cvbo%T!FFdL~sjR84gCS$`lqtbF1yeYEOg$pmMFYIb
z9A~O}CJHI@-8@u~Ij7@L_RTpArl-16(iZcC>m<smGcI}JUN2F+q?z;K4dxw(W^zww
zuqbzC+!x9+X_L#Z;J+_aK1}kN_Cda-!^&YsLCl4uJ&C3ZP2~c(#6F&JU{K{b^DJbh
z;`B{o69g<oLf(EqC~}2`YekgD5fxpP2~3$57T@J-7))B39|p0%nJ{s+(q{kr{KWxV
ztXxu;79A^);@NWc##Xg`vKub&Jdc?!{N|8|=klon2_2h{|6LKl`2XUHnicCZOXqG|
zWcku|)4|H*i@*2XbuYb@S9ZHaXGdnf$;WdVcf~)wPkUKz_G+bh^fbLSZ#s{d@UX<6
zo*Oo4p;XfD>mlzf-4~W5dt98dZ|cwVO(kp1!dKkC7H|DGszLaM)DG9;XWxU(?wy<N
zf6Hdu(o<3oR|w7jHsh7JXvd;oXQb{O2;3+5bl&p2df$D2{d)Fy+1%r+UM0=m$F|9F
zbN7$VyL)%#?GE)gH?dc2hv=&NXN@z0p9bv{ytXgy%i8q;C2bF1U(Q?|c|O;wU2J(d
zv&^&|QSY2$T2yu~m23T};rX)N%!O-xKy|Fee63u!T&v29(w=$8m#K5^*tBEA?j4(U
za>m_#w>)s2hUhBGP@9L!GI~qr{79B<e$MVb+xl3k>{R1LFS^2Y5<<#!>gx}S?wk5E
zdT;*IsqZCnayeOJ{~X_z#q2*l=&_XIiS3_~Ew^kqzLL}NTE`N}E&V1_B)zvOvW76L
z$lmZxUiIhTEvx#)+g4fU-F3fuM^B(?$x0r1Njsa3r(UuKEOVAS>a#Iomw4!rxiY~A
zLxS4wXouPMU8tC_cWsCEib%nh<wBn=CUx;#6~5i$T)?4qan6nk$MOeBSDH8caPt-N
z2<v1LH`@I7Vo$JLaEHYy^^_YPL9KiiN5kajs7^F`S->*!x1Vsgqe=^VpNGqaQ&P{{
zMMGk)I!*3b#6Icq(se2mI2}?|%WRZhPZx^lROq#uWBPOR_oMHhR;GWgI<rFX;DW7>
zMbGXD(#Y)7a5+`|==Y}`q5oDEJzLhR%f~M3<>9LS-d*|X{eSx>+`sdmVWxDgZT;%4
z_x>I;KdWYwf8|!Zy+_eH7Hj!OY#MzW)rNKwM_)h7erdfzY|iNnQ-9IIOY?p#wrE&q
z%pj}X`2IhGN#KEyhadklyj}3DKCpb7+u>aMWvfna_P?@||8lxZE7STv;mzt*ak=b|
zyIrTQ_4=9-SnKrdQsIhHed7z013o^}yZBGzTFKvuJMT&yGUc|4x)bDj{@I&n&knyk
zdhy$pwX6nLzIFvIYfY&+cgb@48>hX6?*gN=rrm4`%w6Qo9p$&;!iASVS$^*OJgF-6
z)Up2zA`zKQ&zX4cs4izHO}+ooZQ8w!+gH4DjSli#_aX4$Iyu+l!4vmPOx~QfElW{z
z3)|&9hKkx1CyR}9_Xy5*nYQuQteEGM9ON>SYCmeOEP8lCWcjCCvvyx~PUh>qtg-Ry
z9Pat8W(G|wd{TnVMH$Mr2N%xFp2LyxHsg?X<)Y|ViBtV|UbI=R;B<Sf$5752+~f7V
z+?Ur}JeTiKsA*&W&8s|jbmj)-mN85SQFb`Nd|OI=)3dwGl55kxvCJ>{@MGa`&!f`2
zl3p$Lo?jvtt8jDWJ4I!?b6cxodRMOP>d#&_C*WvM@41Ig3w6`xYRMcp%FuI=$yoE=
zy{)GlGhC#mB|Nv#extr|Pl0}k9Jkb^;wc{<O6sMC?Vfs6EFt-}=xp8_A<Hy(FvWzd
z((n;7+p4F<<zgbWu1e4+$v?~C$;7y0GdJ=l-#d~&g>BN4x9z)~qklFsEuJ-L)-&bU
zl+yi|3RJdb?T}-va69^7=FV436kMkFm}>Q|O32>%*8A39Q>L}%Wx^7n>wF!2W_@}T
z<0j>#UXr8dDCOO;>|1lk#CdEc)~zR&Jd$X=!DMjIqr>c#XJv;u*E<vc6*0%=svZqo
z!L;VFXLQl#+F*5?iQNva{x#MG=HVu%+x2>UofRM66s<mc?9$OEyWU@2{jo|%&t>||
z6VhUCdJ(E&mbqP@JzX}M7iFxf_0p@VWbU(gBv9D<?`GYlp5&OrDw)}#;@c&j9$vXx
z$am}Z6<<GJRXwu&Zo=E1Y59AjKCZiW+adkPExrlDp(~QkPu7`t<NKzWaa%s!`8|2z
z^{gvfSNgtN?PIq5&|7;Shu?0y?q6TL^c-u<h5hd)F0)L0^=`+qlUFO2cKa_lt5SG%
znSyNf)sk0M3wNdM)o@<1LF>dxmLsOeCZ9a|W6HmzJH>Z)KPu%al@74&&175=k>+lw
zZRFz|adp{s@#RY%gw&|=&3msC{H^!&`u7>#clRCsDza)i+cgun>C=9;8a+3Tn_fF#
z$9-q%^uWk)>%8~dHeK6lbTMl7%4t{fcCO4c3VqD<I3{n^)X0{dX1A6HmzR6Zd+fHB
zt6|q%w`nitYr01Hf8O|?q0F-Fb#~y@Rnu1A^`HB;Q0msB3(xyR4Zlh*vtpIHwTjnh
zR@jmUeVNK;XK&5^mNb92z}-trgS;cdJ$Dq$l!|$kyZ(pGuE}rRx^<Vkt@tJRUgdFP
z<E4`Nf8N`-Ow}#E;~{A_`>u)J{NILG969fZv&!9gzE?H<Kf|Xss~x(vT&1f*vJ0{s
zF0WiNFX3&rtlpE`(}HrPQd><eLW=hKPrYMrkScqRd+DnETh-rL?AiK!xyOvjiZKm|
z_9cf7Pk7)U$zarSK_-T2#l&Fy6j46D6Dy2^CK+y(l$a%Lpceg=Cn2H5v$}FApM#;c
zq3V<;-JA<K*hST9?`}B2Ydm9y*4{q}Et2VWG76^5;*3gOKJz3F#&n$IJiVuUje^K2
z8MpKf0hy}|PZ$Cp|6X``7K;!!r)+a|qwukU1H6UILi_?NRgd51*t%qykjt+HFIndN
zN!YoMxm`kw*@VGm`3y<t?v$|XrYVQ`CO*{)OJO|3p~B#JV#yQ-X3vart(Oivnm8Xa
za;RuNeLByZf7YtSOIAPZFt~KNTV=-M`4S9y6E3ezQJv7Z)MSodgw8bCGp3411AHcN
zHbu3DvblPGXyCX!`HuB<(N>{iy_M@Y7J>G_UD}y@-{hCw*RxB#RxA&hX|_ON@*#aQ
z^Q5<HS1bCoGI#V#2GuVw^gPSG^sTPY5%rR)d6x^9Z~wB`+-kkE*B#psrM817Hmp5o
zT{Nje+%0<Yy4HzbURo`l?|5M1v#jItKet{?S}l9UI+bP3!T$_l$5_5faJ`y-M5)pF
znSSW4_^`cO_7tQ>^;Ikl$jZ{=oMu>X`(Lruv%A-J|DCpMQPq^nS?&+Im5=YvihjJz
zqPpXnKHE8qTc&#JX8O(M^_uE8D{-OUnwBc}r}2L8tl5sdvhFfknl9D2n1QSCoJDi~
zw(htMOLyGhHS=4&#^8)`piJ_m-);|`i?%Kg>veeclEp$MRpe8i*|(>=&v@M_$hS(!
zU8fm+C+fw{gcTAGqGSyg99vv|H#1kvbp7k4)>S%slX#4KeqC8-Yacr2%V)u}|MG6V
z+b+#&^(~Zh%c2LbS0%{0HYj<&nX%~B{W8PJi`H`P5zZFdXmLI0eR=e|=kbNRRI@EJ
zw@hM=;JhULP->P_Li6f_4h%=8^FPfLVoC1ld^p9`<8#@A{ZZ~!ot~~K6MJ_(zEYrC
z*>a98SWqy3W!PLZh2jps&l)op82{LCK;%Qm&ekx&?ieni$#FM&-zfcVOtUV!z0PcR
z|K&R@PqS}TG$|Ke{wu%d{rQZAE|(X`o{d_dF;lE#*Sjq`<y_XeRaczn9ora_vdP7;
zDDQLnhMfF8YovXr+^97(^Ga)~SW~p{)YP(rVg7uNLw39j3#yPQUv0*nH)plik#N&@
z9^tJ424@mQ+Zk55SspuFekc3g^{?g9O<VP3SMW5<;FS|xZ>(6Y{;sdtKP#xT<jA=f
z_qaNA&+k|vccriK(~6*fi%TATSs`^-*K*7A2QL=zT*_{IG_mGq)UhjfwYR=?&&XUJ
ztimOC@214FU;FIl*MF!#(_1(HZ|J`7zjcoP?7PnUcYT3w^2&xQM=F~S`tPYsnwjr?
zZOt{IUOk36Ogu~Pir4MWjemV0<nmcBuTYao>n@vKIcwOwH99&r#%L~gRB7pzWUW_O
zI}dC<?-J))YBp`3)+4Vu+<a4)ZhfQkB>L{joj-c-`@EelzwmGN%1u+fC)HXd2t*xX
z+WT(a8k4R2v&_EtFU^nL`f{h1f$6k2B^TOSW29ov#m4h3&-`6D)Ay>`@3fmQMOIDN
z%E+ZT@hOi_ip_qWFmvr)c{?(aIr-VW7JSUVd(JqoXx_7ofWoc=xz|3N-8ECsZt=|J
zuIhp--|781)zT39pP~F=j@X}r_wJm(91(HJDc}lkt-Da?v)}Pq+P9~2O%K(~SP>t2
z#N<-(ti5l3q-|{ZefCr<|I1~Mp7m<^ub6i5T|)N#rCaTFD`WoZUR`Axy_M57Sh8&9
zyV#xVvjg9_y?egas;m3H+v=m6Z>b%>ta!L({`4H#eUI1unzVSu+xT_+;-7J+P1{g#
zJs>l=zVo8l!ELemhvq%sAG*5S>C)ufoRwGfiZ*q1<^E?lwIt@PZeM@mp7^)R=H1-B
zbJ`Y7#;|v*D`TW;wv=p*I&Nz(HqEtYXO{4-MH|jecRKrf!o6pgD%ApB*X)1&D>pUv
z*zC8rzGp^#cMKJ0vp7=!O+DCf;iA39ftMA+6(1DNxp3_Die2x1{o2gFF5+}>;FdcE
zibs-vB&{%)J@=oXVBO(LuXD@VrLMlu`jvQWM!e_#+LKFr8&5r1a&=emR^gv!YmZuA
zp7#9W=A5eV^*N5U{1FYI`=;hPCGyXhk$7*{?u<)!58b_b<Kh{;OeUKXY`X%RH+=OE
z3|jN-MvT<4x$0L}dM&<nZ0D3?%h|g66yN>U&HO4W{&)GRwcCH?6e!*Mx_H*Tb)rlY
zPd&2#sU$r0#Gitm=}8N<!ya1wnY}6YKf}H7Z~M00KAQG#-{duiKPp&k=-+a=Z`zEP
zzeWGHel-hO8reGa_Q?PNPoI+787uBuwHiGS*|t2c<6o}QwO?!BCiRsabKNTK>^Al6
zhFtHgP03qc=oTF9&u~#Px-%zhsl??wnY}wNO;-(FyiE1g((cUA+%Taf3$)TUpEA1h
z%tT4lB|!D9VMOXxt$9;d=q_=Xn6`9UvS*KVyWy>s`*yBe@{;jfq2GtcI!V({_C$N>
zOxmoO_OH2m=bf9E?K)T8&C5{Tb!h4v86i#Swhy<w{f?bmdNticIe_&?_p2fm-dmd^
zzFkVPH0E_yefaN-R-Ty2iqO>UtIK2OEe>VKwORJk#7>=iYF3EvUbeS^LAqCGm6zO)
z$Pc+&T>bKGbcWWeK7}dY{Y1C>ed)XDxAfh!&xKBtJFbSDTfD<`!DqMACHLOt<(}BL
zTSUXqBiS<I@_z>XaNDSrr><8@itrx${PlP1(xXq=uikpOcdgjk`sJ%8bLcO9_3}w=
z$iI~Ao~4B=j=t?WH$x<5Gk0L|tov%ePX}Aw&)Bvqa{JAIrTz~S6IdFL9uCRP`1-cD
z=yK5f1zxPnDx)^3%y`;&cZGSidD-n!DYLkJyQ-tNhAg?|ecJWC>Q*iOZU15eCweVk
zck5EN{OV{E4~8W>TZA6XPfZGbbTm2qQF!`{B`>$?8dxo@^2s%i%Qfw0KEFDw)yQOz
z{$~l9wYmPgKOWuI@BPI*;?1G%sLLhC)L+|AFFHP9*J~4{&-E{l3*6j(>y1+1q9~un
z6YFFzZr^_A{n~Z0<=d{6t}R#`?KdgP-B?!iR=r-Xr`r7X4ZA1iKX=TXDs{BJa@ni7
zlgnP-J^k<g-RvuCy)H(l*oMzyntX0=NapmSS69}h>zlY{Zi{{Av*MScW-!CvcN&b!
z(`FR@IP8*VE@i#$X=2o)4_12<p4m;lv#!8u%MQLnm)r05=wAP7Hf8Ob+*=P$Ifpvk
z%wF-h^PAiJGrB9CX3tsqAo0)o_j1p^tyuQ*O5dfmSym>d_x8$_aG$yMtYS&dci&Z8
z&ss^(RZ&{NzuJc{+4oW2qr35`doC{9w0LLq)zr%xS5_T*bltEet86Lj*^sAxv3%Xz
zPHzv(jVjff5;8l|V=7N}KwwH|{-GcG_qz*bMJ#pqbx-eksqya6*5l6Mm&~TU-E~Ks
z>3LUXmW=16SfAHXsRyN3nD$Ovu{CtXb@3~!j*Av8+Op&WqtwsZbMf0(nO)J0x?Z?6
za`C0f4y;R68sGBHGi=;(J9yKMm3i0pdkbH#tSH+y<<iq^wi&N}rEA?i@n^}bDaHPA
zk7vJ%@w_=}$BV~0^PO4WEVF#Ed&<;HHD?u5HN|RQ_-9$@G|!SLbhZ6^QPXOcoTW!0
z<5DjXp&tv^7flL~^}fNMzR#+2^OO@8)g~RZ+ri?|e5_8+&|bMym%(CAUkp1(x^c{k
zmM0adLMx?sEQLH`#5bFAb8KN;@?b~Go(ex1l}(`>i5W8)DmouCFfLf3ZjkzrFL;F{
zuSW0=_3Ipl2NDECrYyhD!n~}a#j3c0t<sUzQG#KDSI<;Y2Mr&~GX+}3vw9jIv$P9N
zWOL~(7uvumWqE=jU<LcPgS>3X!V}*&sW}{P?tFYmfXhLHN#*1bX9>?DMa|W!55Fyt
z^<(i_$-02o^C)|kqndk=x&6%q78apK5vJhnJq#vn^B7LwXV}ubRJ_Gz<s{8xwf77J
z`6h(?yVStELTR1dQGtb7S<82cFo8~25Ki@R)7E2HwWjpny8FJ{9p8SNqP6;6t5r%u
z$+G1-h2M&Hd|J+<`dD)N?AGjKfy+bRnY=v|oT<gAP%NL4w==(H?a9BL;Rg!micAl2
zdh6vdfosNr%FDiN8G8&O_<|gakH2mW@`=oPs5~X()v2!rC*>9>TBcd6%T9l?_V3xK
z>1Cy>c7A!;HjQbinCJUGleBlj-#T9ZXHeO4dG(I#g${QWeyhry^SYatAF6R-*XboI
zV=i9(es1^8Us;dtoiAJ^{o1oat#Hcgxy$P}R&HJEw|8%uz^{a+1<i-|**w1X`^NoU
zliod>y!Cdl+Em64w<+pJj;+qSSMED)b$0G`|Aot{qP%B5y4rcX>F!pQm-Y+l&9oxc
z@bqoEvFgjWpK%8{7xAymi8WksR{dMDw+oAew#x?A%aJDvd+!I8`d$w<YrJ#ga<TL2
zz{O{T|Gu~s%wo=|)R{a@%`~#8B>22qs8#ss37(#lm#qBorbCN)g@#??d|_cBHbs}e
zA2}8Fu9%nDIrETu^@EJAAAL)nu|-Bo^%RMj2pM&~j|;ss>vdMDYX{@OgTX;_6}}(4
zdN(#PCwI;2hF^9wt`#!rB<J7SH9c*0rq|*uc9U7Ej>-meIQnFJudUNGZM?Aid3R6b
zQO+ytCdSzrTo7D%prKR0X0dv~#`W7{uWsIv6|(4y%C{4OHzl5~;F!{<Y_oXX-NIK1
zOAWXgKYg@#w6XeU)c4IL>!j>8&t_DpTQ0vWVz6*U_iCoS8!LY%ddGc_T2MCWT0((B
z(%!Xw<*S1~$#nfazvlXZf2*!ne;4}tTVwZ%h->u+ufIyU_Sx{)ezvuKKW0m>Yq|Ym
z<JRq3N2*%5%R(+N=*rK0#cw!$<>I$){m+8um3QSl<+<eZWSzQa=6{APGi%Yi)_D_C
z%jQPO2F+g99$1)V{gLBn=GK?fG(LBAzcW9^6?60WGCQ?P{~3yJua4U*AGDJFlIr4#
zTcf<YV}xg=`|D(E&z=7@b=l-Qx6Z}8Esk-U{PE!j_1Qm^s;vvl<63hauVij$GS%e2
zvgPj5Qf4Dlw}Zb<cQvmlYCAK9wN$`mt8(}(t*%>FdBuyChlH)KDt)u;`UTZ9yu5ma
zL2IARjJxN5RN#HI>)ERfdse!1|4w_c%42Snuk-qkm-Bxny-fC-n!T*5B}C;@6W>Hr
zyMP(Tu9t1S=s5KjcddH+teG!u`m^`Fn=DkmRBY+0l|fr2j~-le{Zi2ExL2pnPMh}Y
zWmm{e-=*s=U3zqFuKQb7J%e@nzU!{Mwo<(+AA024n$Y-wMLTwXyDWKo?ZmIVwW$Rn
zhdm1%q}IKel5yAJYRVm#3zkQXJJgoEew5X<Bxu<b)1EDRN)O9@SQDGdHR*|Oe9B{q
zaOvEyUh7OUwdG0;OP(95C#Z{MOIuxy37fW+^_I*EDWN<&!GfD+wwqp<PB;6yvUy3<
zQ65P}y~8~bJNfiFMPnWwE4*v=hc7evQuU-?Tcb|tD4z{@Y%^JZuh{aEr&8Otyto}=
z@=jx}|BhI{!h-z63J0#(SLsc+HQly5aH-crSzW(L=O5-JNqZgZ-5eWuCR-+T*+L#Y
z&(E9^t>;{xy!hbq<4<+i$FAI`oq^l7-&-k|Tk3P?az<d_Dwzu2z=WGCLl@_lG5fWf
zSOl$LD2@}WQQMfZAyw&_(dww@JEoh>mXp<Em*+Tn@5SxND5)N=q-ENgipEm{i`MDI
z8tYDZ^)0CP!umI5Q@&?U3tAm}Hdi1%wnxGGU8}If(OKKQa`_nSI!m3>K77wQ<Sp9Q
zvglXHyYEtS?yoFNW}LGzU}OD)e*urKGUz<m%3YGXYTMBhQ&0F!oGKDF*Gu)^)y(ZN
zftz-${&Rfk_7x#Jm+(2fKPoAjlX+Qb^YIlC2mN}L4ZKtTu^oHA((GA$?z$bzeXJf`
zkeYJK<y?M-?M1%panl~A`n~Lk4s48_&GA%`|DIsnb;-ApR+o2F-&wGHRmt=#0pE3}
z+>H(0ahGG&HPKYFNi3JA?kei>+Fi?aMr{5PzU@m&HwdiQu_VS^DsO4PwVnJI4)Lw`
znWpq0=A7Z=oZCWD&w^^JrZN_LYfiY7Zt$Gt+tH1a*YRu$nG!T5_`uW9s_Hz^InlYH
zCvEi#84XS;FW-D=(Uq->+!mEGt#f{)yq%jnedDsV8<#CjOG|IKEKz<t@2;=+El$O#
zi$}w<#jLj*<!v!OW){4v$>*urY~@!1CcX=~=PGb$RWB~~&s=q7$v2a2E1$7zo@s80
zczJt6ShmTw8I^@RMghr*@mYLZ^<23YY&pBaOZ`^YQcLdxJ9ixG)|xf#?Uk32TnT%<
z`xKN+vZm#=^RBybBgj;fl_z`7n^UXz?Ko2_ZZvu3(skG8syf}wnttfXXSd*9tsc1v
zPjr1P7AT*7$TV?Qw%w(LaaXsz;+nQf%ylV$=B#7UT6gYR`6(@)vsKpDfcL`9*5Bva
zLKX&oXPewvEi!>Gs=%{TVq@gnHJg*29V;)b==1eF{%)7<l#UgbW6nkUyo<Hnu~9BE
za4M%(?6O4*PkgT2{OP#M&fVrya^J;7J)gE~uIr}>mjvs1!glRl_~_p1S-fFab+4p6
z@cHn~?bW?|OSewtu4<pT!so*kQRV!lp8v|OXU(dAn$y1KsJ-8mSx3E0K2CC;Zy@O-
ztNUr4YN7f~UIvpSMh6v(@`DVG9V!W82^Ef97r3jZ*#<tImw3s;*+8YZWma?4r!JL;
zIv@TRwR_93XBrzYaHzK||J}l1(zNHWidub)5?6(S?febx%*MKY$|63K#9Y<7mOS7(
zz#Qb|pjG?9gO`D6d3(Y6+kBBitEwvw6;5!PS^lwsaj{Zb;lq9jmdp%24iBc}o_#B-
zgaQ;KBold^c-mGjQs+o8K56dJq<UOJv1FBhANQS2O<nEkT8mocH#sO)dihM^dX~U2
zlSf5u-SLA=>AbFyE6x_j-6)u06*93)P9Sx~6pgt*{WTa8o@$9$70Cz+v2;3_3Z)rO
zzv)rkz!el6V<W_7*|4y{`CS{M(=3yRCX@WPiD*9PjS&)IsQ-WE(b}*HQN5@6&OB&b
z%C$iGXmvE_<v_)rr_)*AH=p8eFbZgMOqCT<z7;BX!qZ8;$7)FfQ%q2{o}~)gnYq5)
z6Hi5+3Rta|<gwu4F`asDv1P&ydNKvUVsj_2$jaGm`*-W^Nn0;%T_?kFNroqMj@OC+
z6{loL9@Y~~K4;FXNofhWvsh=<{0)=xEY`KGaaDNmXq9@;!*kPTpW&Q%dczrK%|$z2
zGR$Odk~5fBD#Y+lxKFsZTS+-)S6j+%KI5>)$9&5**KBDy6`}KR!RG|OiHe6JU8c;6
zDOtd(ZsKZLUK1$s$BBDJ-^8$wTpm-BLsX3>Nft)TTV#1;o3}y1k>?WcXJ~Ll_WWna
zQ8M3@k!{#tJde3EMDje}jdQolYftB_I5jQwt}p-8l<Ff1VzN@74!Wo+OFnaXep7Oi
z@5MDz>R}uG+OC(~jorO>>N|;RcTTRWuKwIxs4Ex1+%}<O!JKPiLNY3h3VbIIsXP$9
z!=_r?o#`@3>2}_lEFl%X<)V&_tVy3g9a=nd_tj-m+;Z1if6RKg`tORxivwDphP>rG
zq;Iby{Ohbk?9yiw9409Ts{7x|Dc&14f8vumeppL^_vfUDv)?Aqd7E$2A;X~N_b1?Q
z<D%z_^H*$Kd}U$jE3pT?m(HsET^r?N{A_~nyZa2bO3lJ6c4)aBUTkLj>UQS7-8;6~
zFWixB>7gY&{fhr)i{t5MUxw{kadbUfUdy*%v-8zn{AW1jeQ{gK{L-kkZtU!QX)l8(
z2~5n`b?erJ=YgWOvKk%tLRiG+x(3fEc2oDbvG3l}G_jY8QrDR$a`#tsmj5kzVR8A&
zsU@O{N3>2A_MUye%4%{;s+H8*bz9x<UCzu*+F`iham%xtuB#q?mG-{0GqY6uU-TBH
z2aYGt9zQKwJ8|CDEYmnoou@2kWh}j?1sU$$R{Q={%;U00$C+YI)vR2yfNNLN;c0W9
z@=2#$cMakSam_Bjv@-C@%MESe#ZEhqcrHmZ<=-qd`|rA5sivt6ZvuI~2!%(VII#WA
z(M2oPWX(M^)j;>9>Dm>cz5)J*zi$bd+`emTD)H%o&Z<nEYOico6}f`0%R1XWF7GJY
zrMG?IuP%|B#V?wbzBR1w^@%<I;n=VCz)L1Vt7p0%$WrX7b(qIsu5tNo&rOF^EtB4K
z6Lv1EPQ{z6{Qm@IF5Yk}MC5MRm7SLglIJvT4k?@8CUxhokb2?4n#^xf(tYbxdQENz
zF8LSDJ?BD2QEU6HsCRc=1GDQd-Ck9+t?v5KW0%u5ZIfCNu-rdL%ap@^uf;i&TNipX
zx)$)sUQF7o6&9g4*DGF4o4auFtE{)*{_alOo~vz@d`)xRGSzc$yVi%i6J32Fb#at$
zRG&$M#F<4$AAUKIWwUh!_tsqwj7DA$m}@TjZrvii?tRd2&(gKqKba;)e+cp4H~(Qr
z+$v8Utry<?8t;ndS=U^xEisSQooc%~e{t58D_1PinkDX9F89=GIWcod(xpV*Oy(Ir
zCsUG>OkO;l#jJ9qW7D1DH?HDocUFDQTc)d(w~X5(AT9RzhNE6btaI*Zzxi@-x!03f
z-#VWj4)rR2EN=djahBwVnZ0Jew{6?1b|;u6Y<od<%zcxtPxH43iQafH>zu_8;mw6%
zfzxjvKAMt}`)jWA<yEE%j;r@PekgUjZ~jhE`S8qLrMt6a{kJJtO|;KkYv*z5c7Cf@
z$m^XC@2!wpwf)3%&aC)`zc+4QPQJ%CUr%ABe-5XnO3QN%&#jMRx@N!KHMQ@{%b0*b
zqeI_=?Re_8{Z<xRyFO^|f_<&#!rf<UJ{+9sdhS`n3MuViQRyy*C3*?R7QH%l@j;vE
z*@7UCLN0qA^}O!io`=2`2Yzskowj?e`wOvYQw4f+isv}#No_Lfy{y?jErWmCs!M%~
z{8xGi>{<Jtp-gn@)*Z`V8J-Vp_Kf^HS5+x{_k?4!bR3JGEoxDk?LYINow4W|W7VSS
zxk?+qNtBDub-gO-+8!;v-s9lg06D2SO5)c`-9@sa6{Nn+yYqLVVu7-A_ryuUTvDF#
zAKiAYtEijg*AUS7sGCpoRoF}SJB~ZT7N?y09?Ha3@^#9Q#Z3y2ihAc=yb_{6b5&J?
z-2!GqWBsjC>z3TkTW5A_UNhHap4t#Dlg5>H9hXafmdeHOu{>-)Dl}EIv2=%9+3kw0
zjhkOt7nkmRJ7r?Vj;rg6xR{xjORAnNWT}kdc{XF>ob=esOCy4JGuXaa@bMt`oD20g
zd!kBgogSU)apP>il4{9g^13)QdHsumyG2iL>Ni=3*W5hu_4bv=H#cl9%3XJ&{<DyH
zlgf-o*1HuiUEvJ%Tqdxrqx;Cemxr6$zLopgDn65#dWyqQ?|zMdhr@!2M<i5)UY>aV
z)!A<*k7(rrMkb~P{>1paB%WnUD?}%qVhGmReVM~pSZjt?0E1ZO(STYDCx)3x3p#zY
zI*ej1o^vdEX#e&UgOK8!>G!I8_>w$-%+?W*dr|O|b*4~DO5x)}(gu$HM`kSINnqTe
z<yHG3&CzM`oBs?Y0+(01DLHvQZh0smmhz;K)mnjZmDHUSmSy}ks;Zwg@3uU=A)3kj
zR*glF=ipM+-(}8>rVo4+B$;PD=DcI$^s$Ah@uT4V78M32uXVzQj~p;zwOE(^)v2k?
zePbY>Rl)mK2KoP2AGP_ZNX}oqPrh$uL&?d?f;kWO?Amyq?=pWD(}R6ag&)hxdh}&2
z;*^sRQV6NESy1fJ#5FJU?+1yc;>`9E{>3F%t^_khO|D?&E|{h*H2D*Eu)tZdtip3f
zj0$b9EO=7d3K<ns@A<_(Q<Y)4yf@i&)w}5xUyL+f6=v}rR-N^+ZKuc?m5&yOmc0?m
zGzq?wcyvjpfalZmhPS0I<#J>{WAhVYXzbwRbC3!)?ou@RaE^P{(ommvCUwr^o|=X_
zg^gOt7uZ5=7Hp9bs_{H7aeSpwOovO0pU=`8ixgKZunt*zy65l2&^I#_*BxMqJ0c*(
z#N)}elfg#erQ_;@MQvLa9*W<d(RD3jvT6Xk$#o^Jf~E@`1*{w@oehs4{%2@$m^4M=
z>8;`;2SOC)X$34Rv=n4?nI$r>-eXRTUCfng^GDJ?nZ;_2+<^iA*w>qctbO}BHf(9W
z<I<~HJ4_ca1zERyezDzlINk5tdfU&lyr;7$dZ)|W51RC6>5tbMRu$3~nybG(Uo_$G
z^QCXgqkmbqK0fQ{ViCL~jnOJB__vbY>{6@T+b=I(;8i%Qm*$e--kt1S@mA!<wHAHl
zfWH^_Z%dmUCNqK4-T3m7`%>4obZ(!vvaBjBRQubt`sH`$9OE>w_j9=H8&dtHc>9lJ
z?_Ggwt3yl|mNKj;HcVbyaI1Q+8WTeX@2bZJOH&_Q7Pu45oqcib3QrY1mQ;;(Pr{zN
zuiV{Vw$gmwty(p;`<v>z&g?A>xx#39c87dQX4aK2+Con+EZ@T}xqOq}yoIH!P2}AA
z&q;`Ot$n*Y^V+5V3}5*sHKf#kI8-UvGkf;kE7Q(BT5&OI_1%~3mTM9YUuN=qkS@5Z
z`_;vj9-FT$h<RS+%O1@&aozlX5=WLsu3eUSMOEUykEu|~nVH|*&gZRuIpMkF;yD$`
zE15oaluffSG~Bkm-fxYT(OPqp%9sx`{xclOoV87VTKTr~i;iu3Hu(mlUO{z~r@G})
zw|{4+#%Kj4JI@hWz%G2dC96F!{%t9<Y`*6K;a-EK7DpBq9{sxF?((u;o4_kt@wscV
z{;iwbrNz!JV`1v!^!L(Kr)^i_BUw80+BD7wH}ut6ovzv`H+SidCD&`GF*sGYO)t1p
zetPBI&=t=ns%iQ<)dln(NEh?^Gta9<@nF{OC(}D~Ue&GJzHR$Thh1~C`aVZpwwmSD
z|NM$P`=4d?Mr?;pzcORn`%-tu?#eaaBr{o=qudXlGU(dwp4)flJJ;-K5Bj!mEnlAP
z(<!wm_5QDfqaP;v-il(Lb1CNOQKqClN!!C--Fw!ZD&b_Wr@eJ{b#2U&XAI0Qd)l*j
zuUt_}INy^P9eyb)#*xpMYgVdcsTONSzOi4OV2M%1%&5{6Q+R!OHKd#U3`HHTU#gB<
zy2s#`lKmCabwL75>XxTBDX%TM)gt)b==+f+mm<Fj-g;F$YkTy3k37?rSLF6xF8P#U
z)0)9D`KA4v{j<-CS=%k%sNouQ?eN(XrH1^85odNUPp{CrxK8Sqf7+~;Gn$9}GrCH9
zd%40zzuoGaYA)z|+)P99aA(ht>pZ+CZ|{^hI%c_V>E(Y<He|`aGGeuyaaG>M!Mw`4
z*JbMpt#x{n9h7&OxG!G2N<MU}|B_YFN24Fl$<z5(9lLL?vBzh#JSSsL`xHLoWx8J#
z9wzE*=gwA(yvlPmY<K2%mIVUqoUc?EuHLuQZF0$_CuJ6A7Ogt6HE-S_@A*qluJ$lG
z<NvD1dez#sE|ZNck_?+BURc+gbT3W#)%2{GE5c1W?%Cc~)?Hfh++wkxmt(v6$&hub
z3x0H*xczeHcZN9(vrdYoNh#~ExObscZST1i{mUbE&A#<U_-Fk)SEH@H*3r9`O?0^K
zY3wXE>uT@R&2w&Um~LV6NaWS8$DU3%ub0La#`{`juRZ!{diuRqb(cwJw)bpX>A6cw
z_1;NOwzKaFX7Vt1T#GA+J8?WY^p@wdsx1sHS{kP}sqa*LlN)f~ckB5T$7W7AU}9L?
zSCZT$#W}%(andrwmwUzTol|?8ZfT!BwQiHm^|g2JpSit6DDJ-T_uk8m67wYaZzegK
zIxLHAzRwu)L|x%cpehGTP<or}#H*G{3=0`FG-v)wSU=lCF;(yC6un^MPM>|994cPP
ztlA7bMO!A-ojiW`%i)VfMg~_I1-z9zJ*twVc1->EnL)6Dk>#vo`|gcmK`y@%MOhj+
z-fB)^Ho72HWB9Rw)ga)3noU?hhw}7$1)fd}NjsOfsC!J3{d?hnXWyg+g6;dHR&nHK
zS=%I@V@xrbvq7!mfc`{2+Z`$ogvBPR>sS31xWve^=)*^Lj_R$PXDrGTV*EK6gpJi^
zGOIeyGvMV|q@Z|q-qau#bI-h>v&J5i5>~M2X&bm-6+AK5)5hm1594Z~{Q_Du{R$Rd
zzBFrRaHmHA<Ns@qP8vOU@aRrY*SsIvU8^QGOqUbf{))Hh`SS1Tw_*&B&2G9_`D0ev
z+()Lo3|7vnn#MOa?h^{?nso9}$Ck^BFBCH`X)5xmkTaAGSRwT$<^3|1WS5><VzE0<
zGW8^Gozo@TsLJs6!A8w`-X=qL;{z%T3q-jlcRB`6jDPdSkn^a^G3K5Wc2UM9QzjL%
zS|@c!p2(RY@N6AtbK!K+*43vt4pxRANM~$w;goz}_}!_Fg+0mR>O`iBV_PQ*3NMS$
z5IUNE)LL=r4LNq!L+RfS2hZ5zlVa@T*}iG_%U`|A{xf*Z&8nLkpeA?Lw=#LMQpELo
z9_IpHt9_aiHbwTQxV${kVl=Nt@!bsJ8Qg}qk6SuNanE6zc`lXBa^8t`k5@^Z<>>QG
zIOUL1qu8s`<n=9`Ps#C!M+@^IDcO!$X$7;AExHfqHP{CPPIQf5#2CIf%2afMu(oj2
zt!sN$+!B$<x}v?bK{R+~-~3B^S^X}%etTtl{pgl6mk%kIJykmCW}4Cc^ts2sIsX~L
z?qvSUWVo2z##QL?Dl~?5N9IWn&m~E>4W*Z^b+{m)<Z|?!j-yUU;hR2tt?0F}tA7i{
zJ6_l<vXo`hs<d6d^0#q+^!WCnTRU)y^lG-=*cT#4v)-<ZyP|%K^P9(yqfNUTw$w~`
z;h(dZf9d`&8{hcWs?E##`@Ax!^rZD(cDtn2_vGv){6D?wy{%cdv#qyW_pI{ESK2H7
zI4pRi?Cp1F-@~s{=JKlUeIl;$N5Spbi}2NceGgM3F0Xr)xbBj8;Pjcxd8-g_0D
z`C_hAsF8H^7pu4L8m_$I_Medu<>+~|yl{m~jPqBsiBHvq?BonypIu*hH||>8)m(wA
zUQ_0Trgq0EtIazcb!zgAU&|i$&5o<N-|km1%|hJMQC0V9zDaKZk9ym(&XhUHdaJ#o
zoYOK-JUm$3lV)1!opve8<%*inDYG2!RZg3ump+wxuv}nakm6*Omo9d<9$XcEsvFSf
zqm)$^Ryy^)`_9WnJ?au$R{RQQT{lx}$q6~ZNm)V59&t#`686tC_$pOvU>uph-EjZ3
zYYP~;{9NWNnt0Xv>|V9dT%}9jUh!V+=-u{u=ceA>odvNfmc0S$a~7?>TwT1q`}3|C
zze~;C8CzCn&;Qn8&&?7o8hT@+PD|Ofpxp1XWj9y-o4Dy#(5~#2A=R+~>(m2PeXMS;
zU!#3pWN+CzKK7|0smFAx`>v+$xLw!wa@R|1b_G5e?&?P?{xeLw9X&_pR{tfDsg7+-
zQa8W7+TG#hyGH1v&a}Jkd4KsUH9O|TB&Xf|H~Hn(T^&lv5)1E^EWLNt#X5iWtG3ou
ztxZCQeZsTzu9qFz_~dfQ?U$kxWuI@8YU9t@s&pr_Yvr}<d|#=#E0+cZF+7&?zPD0D
zbN}3)Y<|NS#>XOOxBpuAHvDq(e+EqkeT79@uC0INb7QC6EDZb?TIA`qUZZte|G8r;
zFW*YI{5orQ>ua4info?(Pp}mZFbc5XxGwwe+4O*zx&>CVw{D#`bE0knZ^Vwbv(@fy
z$<FN-T6{ol^`4V<_V13Cuh|w@>i%o$=AdU?;uE`Pi7N~D9eXiV_+WXj<(h($bF&ux
z-jerr=YsqG-T@5H<d+$*zSkfl!#vL`Bcm_yvi0kRjN*@zo1S%wdxam^y6Qv7!h*Ll
zNjsN|dtI4!VwabtjqlM|z6=eMiF1A@G5YL1y32w444Xu4VBf7hjWU4;T8k`&H*1_+
z5Tms^<?^k~+5#&%mpe%x+rSWcT4C`Hfh4Z&U%tIIjN5(S%0r`~$xFX&=-It&I)fn7
z%WpGIuiO$H$8fvNY^|C}8#`xF+vNWYZLCou+g5FtFe}yF-e&Y@>w#RW)RG4>eB2f%
z@5DM@kxtI*4t<-nRpsX7#}fK4Ut9SXEQvYgS8&z0i-BYRKC`fCVp}vedt8!wZf#WZ
zKrNwEx2TU#_hL$K>CcQEI*iOx*GvUOGUQzzO9xz%l2kSq)rvl_ewu9Lfh((*Gd+sh
zeCCK-rb*#j`AzPd9^U?0lO6DqHSYVf^xmg;e$MIM{QFYf(Iz>Y;|l4&b(lnJ)Z94#
zGjwsXDC)+#2Fn<FaLj0G%i6eLL0OQ?w{(tY4wLkIbmIlJX4E}t5k1am6UNASmZ8C&
zA+|+{jiV!Am&lU#>GJm(MB|I*JZx}KN_iBlXra0){h3UW=GTJ?k38M|kKbfG!qAk!
z>Lf5{_X9?TAV)S%-r^{+)Rj$4MfUrI=g2Hb5MfB0?2)OKpmM%UfZ=0f5oe-JKTjBA
znM2FzrgDxvh8+TXocehbeHhf3_t$5<ZDVxoc%$b~z2neK4slV=H*;8(++6&(@hzBh
zpO3xEfpLP?B$v<_MWJw21|74AnR;@zk7Qy^{J7W!|9;>xJ`^K-Ohhj?g4er$FKA=k
z_4{A@y>5S1JNHB?Ut^11^JhNB<c6S4J6BfaNP6Dex9RvLNsGex+p}zg-O9dvt>k9f
z@<2rKm(u3=W#6*3PyJph<#0FLYR;Q!F_%9btF}3wb!XAlj6Hjwt-N_(b$RflceTd(
zm!rM&^5tLMj}egeNf0U1pQahwsdqf6AxQUc_0B7M@7>Ld+p^Y)ZH2?*n|}=bOYQ{o
z9I%uLaQEViS2CLY*<1Wk`cB{9K?f6D7FfpWu^Fi>Tz>k9<orCrKH;dRD!p@;UEUF|
z{UT7cDQzLUhoE$jcX36MTkkF<wq=uc>Z#vke9Fn<$!7Fxk=K;FZi`p!G&OW&SSF+?
z@$SH~iXJw}=TfhqhFodYY?#+uc&Wh1?%WBdEQ3RJ0wN}hQ>P1f{a(mC&BnjQ(SXq<
zWWwX$>C6TOmgOJcwW}q!70BPJZqIluFyn&U8>xpg4{sG!R?ofl?tAvJ+mQx)rVGSO
zVzfB?ZSS_nW$SK*L@!}lUm+xuF591YV@=s^tN7&!rv0DgKZ^Ez+A(Kk%tpPirQ53`
zQc_EfyTAR-JJ(#SYrCJ!`H~CEb0T5{cZFJSZZ`^Pd1$+kF=Ac5Rj-y6SA(lh;yRmk
zSC7jHnlf;mQm&Y?F~;J!)qjQ$%YUT4GmHAM_+F3oy!2Z;CImh(`Rit0&amt33sWnP
zOU4lw@0c(<wAdVU=@(HlKNz3)Y~9o>A;DCMTK!*9r7M@u{u6TQ+QWe8XuE}4j~+1^
zJ<a5G6fXIi7@bpUUc*;5C+(8=?7LER3s=1g)d;HYULndZ>+tx)dGDUAZMKh=_8xWL
zyROvSV&j|h9uro2Hizw%eEB=D)MnAPkAEU$1%ml?SFvllzCUx!HLUHT>c4<X!mq!(
z?P`0O9l1ug+FQ=sQ)tdK6WPjZVY6Mk*J^}jzgx9f+P9DOpxu?9(LWYy>`a(uC9Wf9
z;hOfF-QHnW*>ta!SFW6qbjw}8j5{-nPhk>M>O;}KdH3XZ-w4=U&1&^lN`~dY<;LLI
z*L2>>i|lo|#Pd{@{h0QwEMJ?`B9lWS)>?P0lel&@c}m%i0^?tc#a{hqxX2&a=Vlfz
z;;36T;oA=0nu4d(TS9hqz5H@J@PX%9H$lt)43DO0JnwV8w(*0Bb??>Kd$0QL=RB%)
ze_4DzAlvhq;RUnTTGh2{xkMyiUS4*IgF(}$;LK6`rQ6=>*1Zj#wd#(=uY^a}R3_S8
ziPM!S+0v%G<y>%g+NFb;otoZUlef(%Zgp|0S@KA{KZWJ!4UtO=5-rXw`c{=17~P-1
zE-X~#v~_8I_{3YD9E-OpZOq=X@YIeU5<QEwvaT@-Ge)nhUL0M1`CF)WY*51r*$y66
z|IO2M%(j$>6xw{sxzg&Rta)uq_XS^16{b)ISI+xFp3j;~4KA#?c=P3riNCU(=4uMJ
zOv(t}r&v)?+#GU6fki^WCh5ZOv@I`}J!d<6{;G-p5uSwW+ZVqwo6U5kFz)kB!6jQ&
zmd!ThTf*U9WVJBFS%Jl9YxBIb>Shxhg?yB=gBr4<Op6t@J7kiiCrPc%Ii2=gQhgiO
zWx<t6D_0&b&)v1|=&n5*mw&k`yK44@2A;fcXAi!=bjizL^({mDN46Vt?rwXXd+O@7
z=_0&tJ7o13R(sBlk)3lSXxnY|3wu|mMlK0k8le3lM8(uSqg&?6<GzedcZ0M2T8~}X
zx78{&q?mD~(Y(Md+pA~3%t%x`m~Nshzs#WQx7M}So+rF^Y~T@iF3q+fn9-*(z`#Pz
zzDn~->#1NSt*`T5`4+2j7S0PkrX%K4{xWjq<>JyT;hD#@ubVn+*j~zxIJA_X=kG+l
zlUJ%s->yj3lXx}dCkMBZ(url)3+H96W=@J;^|JS=N>*i>RX}F`Hp6xAY^A*YMJ`+N
zS{^Z%?w(&Cbnn#U)z@^Fu3Yn}ZBos9n=MD&OFb^tW|hv<3gay(n|k3ALzwZ~jI$T*
zY}e1MyOTP3PVj#QG1i*yRd;8ev<Z>+-Yqrj<@!hSiw^JfGgkTCU&8R&c%|pzJ>Q+K
z)a;%4{h`ITxyL7{1c@-KvZ~(hJjUQM<G7H3gVU~t2^w6<%z@nRi#k{=T2(&22~cFs
z44cHqmHtLx&#Cqu1>c-}PVdm<niP{JB*8Jc>GAu9g0K~{6wmxPn5lI^#Zl(;{f`pD
zk$uJu%(n_olr7<PS;Zq%Q<Es`AYfDQ>tWXSlh&V`1Ef3^V?t)0f8{68HNiv6$<O&^
z>zOpw-+mm73=ft}os`PJu=0S)gn54+c(k>gpQ(P}Z@?oJ&XCi6$}$N;A(B%jOzJ$$
zC@^_ms>K@3MxBa}ZKo2<h1BjWNjzKJ7+`V6-;RkvxM^Y0H^mk{Wq~IuY1R+eg@ifJ
zcYbqX4PgpQQIIIsJL@iboFhP`EqG%5ZH)lN{}*01%?&d>_&}6H*>UHuv#DtfQ>89N
z@Ua=~%KEkJ>GtURVP(s07al2i-*dYA?CzpkFOyZ<zltn*z@e$bQ+!AJnb$gfy~kA*
z7hi3**;(`J-QJzGCzSTD_|Fg^(=~sU!5qKE69bffepXNZH+NU>!5+KbEAtn4N3imD
z9c7smQ_;tH%-ZYG+!<SMB+iLRyZkEGD(lj{?`6x~o(Q|8luZ;rxAg9doZsF#X5Y3>
zUA5gqSz(S(&@QLRuk%B`G@laLxg&9o-lElU%RUy|I^}C5b?`&m%#}S8+y1#qe>#}A
zJlA{56rFdG+nuAry#!Z0TI$F$DgDPHrOEG{3XaG;e0s^p>3hQdeadRi9!fPUQiOX-
zQl|<|;aauMscZAh4^0h1s|w$oVpA<-dCcw+GKop8`IR9Hb7!j20``W~6)p+$m0Ak9
z4Ch>a+4M-}k-@jW1uL0fnXX(R&MGO=5&I*)&_e5!_G<52r}d5(|FwJCe&YDlDyFW^
zmXf#29!<U#c<aVP1%J68PtFD1Y>`xn{blt1`r;Qmcdfeg>0IW`W0qR^Wv^aUdH<<+
z6WDc9R<||NWLw<HZ>N~AsPo-OomuLYn`!B?J|jb7(#Dm$j&AOFye-$3J$lu7jns6u
zNw$wQYWLi6S){e(S9b@i$D4-|`u2DIKb*dkwQbME_im~O^-MgcTU`En?AuH)=cc;T
zQql_~GNzr4;rw_|x$qTZ-g1$UcRhAf{xe)&ek<eI`g=-CuZDej7xB?W`=$ID=Cex=
zic}oc>&;$e>ik#zszOz0Oq>cEi>Tv@qkX@=Exfa8=hBFmmYWY2`5Xz|?&UgF?y>KZ
zJriX3dK0EHp0#ZG?AG$?R@HVVJw5FWTi;I6in~xC8kzU4S7GbJchBqv5-vT~@6xy%
zyg78!&Qh<buIWtcD!3kn%LHy*SNGv}Y3YvTUTZC1Xf0{wTJWLWzWS~?SC!o3kW9t{
zGy9os7Q5(r>H2GldVS;Ly=s#8OX@|{RE;dId+E%&5jDz&vU-!JP0(<jA+_Vt`!{`#
zm$$BpdiUlEH}f1v70=6k>odOXntig+%GrZqQIERhPp#?NSEY8mFWq+~x79H6ZARt<
zhibRNxL3T(ua|0T@fOxPpAVR?ZD(#!yS!3eOj_&cdA^)xM~$10nztq^Efrn%oSpsJ
zI`2+qhJ&Z=ejY2mvQMZmHRy;@>t3}>yo^1UU$$#zWL#>O6P%^@GWvF}FXNlSqimj&
z+*aKPV|q7t*`#kT>z2>8D&5X*z4gqK-rQ@)_vW)~`t~P!>t&uLS6w45&nY|=>XrWN
z)q1Jl$LI9CHK|;!TPou!V<ra*2EAIn?bU@3x7_?Qjddip2lja9H7@mNzqV&}fY!7v
zW+@3vQYEkbmEIguxqRIMpQuj{mL!WWW!bYY@o1^1pz-Q=E5gIreien8-Z`M<yD#;e
zj_+%w-2V)E6W2y`Ztcr({C6#I{eOmyZzUd_GX1Npyu#8W-e}#@9>WV;ThmQ<uT17F
z*(-VCqnfN$$n2l~CL7dtz3#oVxH~p~*~CbXiEn#~uT6KfmD0P$!FtXvMegJut7Nmw
zi*#OIV|%sL>x_iOiun(7@?<aXPF^`J>gKDE%$0!)#e$!;T)N7+JKOa^*xc*lEYtQ)
zj=CVo7~GfQGR5kC{<;UJ&Rtsa%2=VG_u|XkuDkP=M(I@FTBo>xpGUEGX?6~;wYA&E
z!?FrO?OT<0dT~41@2*h&$~5g^X4UziYpde|d}gvvZ!;9#ck-{>yKOJsKg;_pUeTGA
z?Roh9t7mQh8T`^Vm)&BUn&!DxibG`rL*U0`>9<X@qrJACRbTAgk@3U3vhq)itblLk
zWJN7kbMa2)+#9*K3m6MDq$a6YO7;KKII^>X(PWZita5jyhv9|`8kd$`SP;HKhI#3d
zM-S~@99r@0SD#LSu9oQOrj>`}mij)*{C-?8J!VSQWk$htWf9p;2bUf&T^{Ss%WGtF
zOt02E+_h(p=+m_ssw}$;uOz!}y<D~}ylfG}Q&uJ!L5n~3N2P>rbCsrLdcIqm>GXNC
z^%t>OBKvlxEz_D*_fMX?U%BqS{y~Yp>U+1gb|`LIa(oik&O3KD&vH9-s<D>oSKr)l
zaf@}l=~K+=;{RnF-Miw@e+It^j8mpcshi8xA7aV>@;myc*tH3H`G@tdS?;-g%52%!
z=~FA7B`O}hq{XP+VWA?a#ivoDut0W2K}671m6<c=EWRY8xM#C_=KM0*3l3ThD=qZD
z9b`&x`ON&H$){!WEPW0JR<~fG86J*?8qpJM>f5*$1{y32=*TGPQcGeGH<)$Sx}c$1
z$H_~E@i6nuBYihGm?l^susp!T7^Akj<CGvHQ;O;_ZjT99mnDbnov_Ste*K#X3>O#!
zRuu%==csKg3Y_>xKq_FxGoPT^xf4>vJPc$vFnDILOgv+q&ZB7AIceQGZoVm<@plpv
zWJ)IO5d8X(fmfZuT5#to4v!Xwy%SQzf{inetY8un32wioJw2g~IbcyjP^Fqd=Tiwa
zg{dc;ju!Y7pWY`nRfV5tO2q-gUCUM&#7hJ`$y;(Iux<@_4bR0JU-rE?yMIxAc;Kbw
z9&_VcbPDrdM^Du~w(MnY!LM@DxU8LLRm`#)w(l|zkM)Ya6tYc=W!vfrp+Yi?QiOuf
zikhcfayhgvbDE6OJ=>-7Gk4Ym)t#DlXUCu2ahf4}!(X4Oes|;T`W3ndD>Hf5sfZ-@
zPtxF5EZMLkL?O+{?nH%(&%(Hm7Rhs-TNbdG=rr&$JA_Q1l9ZGuxNTM&=VM<E%bkIH
zx!<2^iwe85?!|ShY^j7xEPanAHeUaxcDj~J_-c~A(B=OOM@4h@eS4m_dwF%*r&C*E
z7If;1%{#H<E8p+*E4O8(SM`Z}*AC*o*;wZH-BqAxah17o&xX{gL0cY9j#xCui>crQ
z!;BRjSEY=OO!c#!sC|3kt+<6;76Ody$E3e`vPJT6eCS;tY;jflfG?j*M<GYUs)=tr
zQmzPDF6%T6oM1cQd(u;hpBvaBQj)aQm_-)G7*7+(wA9)b7bH;kgKu`KSwpepUqjWs
z=Ar8@FlclYd#ro?Y2UNxRP!Ri$emi!#mhZ3RJ<7P&dIe(`+K|Ysqfjcwh${%jkM~=
z5f7Z5{EcsNO?sUrqP5^br%W$@%)>~Hy`g-+6Ji(G2k2IW&E%N1IQ^J*t?~||{|wp7
zvO}z1vgpchPl-6|m6xD<z9&AfwC{53nT?xr_ex}l6i-uF;km+7(`HfF6r=MNt?y<|
zdiL(|A*1_0)PJPD<Mx*LzF6$-(KAab1WHqaHEzW0sp~mc<)y|kaqXIjNS{>#4;D-Q
zdvW8s{g;Po-gPSuzwDcnFP8bNUbI5Q{8IGc)k1IhSd*>x-QAv-Upn=xk>n!3B-6*u
zv+v#(xZ+&#+?=8C*J7>v{~4+S53HN!Qyb`S6x4T}?Xu^Vi|hM2P8kJ-`YN^WE)jd0
z^El}6zg;&^FTFBxJwx)UnTLPaT#TyNxv_esZQI1<ioTDM4;)Wj9Km*(PeUd&m~);X
z-`DNAyJuvZbMda>dbvvFWBb$}N2l3F6=s<~-kG{H{f*|{mm5pVZu>8`jGg!B+0`Xa
zGG{QZ<81RWj(8Pqdt2*5toF)YMV_X4_K*51_m`a2UA3WhX>m~Zl(t<i{!H(?bYvBa
zqn5ThhGdp*zIFWLml(_74;J;a%<oR!eeLS)yj^8eZf!3+vn{qeG{Tnq?NWcgr3Ooy
zPh8kB$0hgW&hy2yoP*XYm&|ONZxFL+&#lKUcbv9l>RJdSYu)<Opc(F!S1?g2s!r&7
zVUPcftlOcnK2~gVZ7&xuxs~i#y34xDq;ZK`taoC7w#>8b?5)9bniq@buhKJ@J+@S>
z;To4tG3U_-w*E=Sf?r!*JQ=%e-kylGg_ePb{3CqD%)VFX7xup5ViDW?X+{IXs{J>G
zH@!RgYi~GLsX@*$n<THg?w1xT7#^gF8R(b&uG%v9SN~b2tjkNzH+CMI>l)9T?_aoa
z@t;$T4<>O2>|-kw&0D%<+0#YO3c`~%hONBZw?^#VtIcYj+$(o_uHX|C|GPiz=C4`L
zudQ%WH*|WVGO;w&SiCZM<&|w)Lw8K@`QZ0s!dKxr$1W{kD0Eo+D@-fYZ1>uPU#1Mq
zLdPerw44@bE5*MxV^!uA*Gtc(@^~`sD}|J9MNRg!F);s<eYI-O?pP-$=B{}^Zbo0=
zNnBmxq0n^iLE-mf%Pu{+ye#641oKvvNA9s#_v(m*n1@Yz_)+D8zD~%AEx&tPqo-ZE
z9NJgB#WG8bHCt-JwzM5@!~U)OlCgEy)O+bW6w4n9w|cE!@v_t|%a3#A0j5h{bqQ}v
zk}hs7&eOV->pn|pohIAd<SQxyGJ(5H*QjW%viNTG)##nRY|z%I#nYM@zcf3sD)=1J
z&ek;zQI=`>{mA;)y9JqVr&u25D0M%xy5qp^%^ZjK)-TWbbmgnyQO`*_i%x`Y*_v9k
zsB!TtuKTNY=5FR%$ZpVQl=0DL=c3w0ow212Uou&j9rblF5wGXCEMaCgf6c1jovRBO
z&U$inOFO=5zqssH759WadrsI?GM@Iy?z;2gqJ*BebKUKu%QiEMTQY<OF6EQ*PL_^a
z^xW1{xP>vp<aB4IQ^!)HIZT!T-<u2{%5^s$`x*A#zN;kv!yLKP_cN_@7PogyVGz-J
z{(GT*#-y32c&@B;DzcsMW@k&2+p`px%o9(!QXGYk350sC@7eICQ^ix&FLrLngezy5
z1Z3TJ2{6i<SooAZ=9nbaWgMjbTSb(G<;YX}8lhODCt-`0aq_SUi5AAF&7H8sX-<*B
zyO0SJj%DnVoxm`|#i3=grk4Q20}i7j9E_J{PuShQOGc_xFJp24CdLU2KPKMfNe(@=
z<g<MKhC{+C3+>*73qE8QSlx1D!2}hK!>q!G7#RdYbWSNY>bIq^F!=8i5Y~0pJEPj8
zWYIEd-jzcP94<oBxP*@;u3REG)A69n{uoW0EJY*B)I%~)W~F)kR&2Pa!}TFjN90Pc
zoPbCJ$ALu}6F+`z@QzwLZNZF!bBvQb1N~>;=aO8I&@LlXcOroC|D{KH#hpRk9Zs^3
zZ?kBfSf+VMD%H@S*v}x!O<hU#h}$K_6Ba+Giwb=^!WZ0dLPO}3y2=6u%?Ip4Ugbyh
z8AM!W%sf&!CA?g5;ab(BhBh4M)L33xc{O)PJ{8z-zGYHGf>K&j>j7VphDqO!<UZjy
z6rYo3F{$u<gzC9NiK<J!9gdqkVa^oqW8#LaKFdT!H2z-9dp@DX=W@g0ZGw|LzLy=E
zSn_Ggf~~u{lDy;urGCa&ytv0CHK%0rxs*=nZ0~x<1-tSdGab9{s9!MS#MBRh(>|>~
z>Lz=4;`t|^%xx!}OsmrO`SNyR_`5&;s^7zd%#|2T<4d@_gp~?gK5K+rnRkB4vEU{1
z=WH%n_9CZT?cxf1t!)Y)KlUy4TWY>jV&Nu*lL8<56n&2_31!N0+B{M4>Nel2+jp0$
z2j#8m;rPTByZ6FlE&C<s&u`wPRdSv!VhU4pn_!>F$^(-EzI{8e?7MB3BQxJsk3*Yx
zt}NeUb<DAsEBKi4=X~w5bn{iqw>*p3_Ecq8((j43Wu7YpR?L?(oL=<cc0JEV8^ztX
z?L9N%E-ahLW?&zueZyo@pYDu<{3!`vR<8<M%GWk8Bxb4Pnt5z3z2W{@lR}M@e=E6Z
z=V=QCn6A_iiGCF$$KCbU;DKbwHLlX_D(^c!m&E@294`L0TycTTh3!r<7BXo*c8leM
zB?Z?km+Uj-HHvnd-F2d&n(3U;s%bA7*UY?bY}oOkCn#jn`?N)CubhbriCr!}VaJmu
z*-Ph5^5*78<?UD!6WPf$bDHeO`BzNln0>pptiROUQ{d%<1M_~YFWFkX``yx|*=grP
zr%ibkwr8bkDp#(%(6f`KS8~cM3X2{mE_>7$`$F5(%S+c&WnSK1?$x)R{n`?F=HXfY
zFy}{?_DD-^S?92=cKQ<St?!ubPPJKjb8e81e6~+=-juUqLA&!U5=B<k$?H70eZ81}
z#dRjllWYGooH<h`;lI+UswC}LuXXIK$+<5D;$-3ro;?njIm!KI`oZr?<spl{wZ{gB
z37$9<!1i}pzuEdO)1Dn7&8wHZ%3^ukuOuufwpBjjSCyk*(4!f0EH3Uo+3~`v(?V9g
zT%8%Qw`&&P+!VFQDUJE*X}=zBZT=d>Jn`TOuhTKRD;I2<EE&3lNyebScoNTs2%Q}}
zR~Ej=GP!9a@%YRGoBP!v65)F)9=k70SnAX()^6C?XQX)cpcjj-&|!<|v8B2^j9lzV
z;%|%oGi;XH|H^fV*rX4~7O%AK<29V6ch}WXm#1i&i5|<D9Y?47v$k@@#Mf86nl7rj
zxADr~@I5axGbZ&L7WWCYg=A;zuevtNwUXnCheg(vREhfaD}#1xdazlnZd!0<VeB1c
zt+{T&%TF=S2*`3(nRl4s)}jcB(z0r)BNoBACK}gG&oRX%Flbs->#n%$e%ttR9p8nE
zOH7ha)oGkuTj=kZ&2#3Yd{AL;>De1gF8b`so1Nz9tS^(K%RYN$cX4E|>B=oz*H-cc
zbahF8`RvAf_JP<I?H%S<7X4>%Th!|+WBK=CUrs{&JJsG)rw<RfBcz-jb^kk)IQQU|
z{7$!hQ-icjk8-g48~onS!=*DT{#MRPtM1vMXN3|uL>A6dzb*J~%h}jXD>K3^Hcq+4
z&Yl%_<7n;c*q662F}_<BAUIQH&ZeA{_q%&-`?EIhSa)^%UstnO8{aca11zd~<i)0M
zu`(%4n)^DdBFs~y=*iWUJDyEWTc4BrbZyXK|CvWq95S6k>h$7u@_HFw^_uQ^xY8$S
zkxTi3m44j~rK$xA+A~6<JBoRJZpp0w&tPIVdseh5e~e&^pPb=~Hy?7Ha<}%i&)Iuw
z&e~Nr*KFNi&Q!`{5uVu2uz-Ox&}7!ab_Mn6j7sn11R6M3tV(j1NtG3z8^AERz{u4z
zom=F}Opm0BH4GOOJl;=w$}vTuUGWi<1M?>3z3j}qlU8cas(8rhrt)3*tMl{?79Znp
z-)CUx{Gi>R*ucGI%^jhl_T3D3+PJ%ZIEQrZs-0O|aDd}kQ<25O$8!`sojYfE3e4@~
zc*gPWT#RDD%s8Vnk9!s}^c<NM*`M>!DmifCkGdUhDJ?T5p5Z)kVuqK`fmqLZwTBp+
zMHsoe9@NcX*vNTYr0^7Xzo63O2cGX*m-$+13C#($aAsU#DEzQfeG?~Bwq}wl3sb<V
zd763`Ia*mvCQa64Xj{qM^DO;<m-LxMv76^#)|kBgl%Hx*TXF+;(?gve0|^hVWs#ka
z6umMGC#`(<kpB|PrV}3xM07U#r~GHw#Mpg#-dx?s%kBOoI$w%PZ(3IIn2W*w|K&$j
zQ4eBPwJ`17c`zxBPhy*dz`{44A3dtm7|Y)sGF!;eB-N~{k!sQ*W9(V>xW(t$oTBMD
zk5t$kG>oP*ir!<<N=glAIj5R7la(XIowZ}-6m{i3xf>JvtUU6MDpf0)ZCY_-j+^=v
z83)!BKDm1t&H~qcgq)Ypb7No?JbqZhLC9^U<N^n!yshsAo~U}T2Wgy;nap?N#;sdh
z7hh3y(he1F(e_r`IB%=#wVK78PnI034Po6CV7h0MwxP{Fxs1d06PsIXBJX_p&)~OI
z|6c#YZf?yT8!ro=<owUTV|q{|VTbcA%am0%>OCe4SyPHUj+9S2(6)eq$GxJ_E6pQj
z2cN)mt;CjvYZU`J1!}7IB)$lYw=ybHSeAW|&oJr5*^}3<pWCc=;ncp+1BJ(X!cHm(
zEUXqv@Nzfg-?;8|_NvmGA4Js|ZGyxzQ;+V=dGpKfx@)b{y5Lpm`wYyMF2BmhsCH=I
ziAjf^%{{tTt2cOs&X1l)x*H#D58kt>ZZ_Y#A4gwVd|dvZ=*_X|Y5U#WZm+!a?N+GD
z3I!&mm4U~mE@tLZ_F9)Mc5mr|tjH_6M|d91j9ML^c2~8eWt#2A^Iw*&v|GA4e%JPU
z)_L_SL#9hUw0zKHlzSt(G;-B~Gbe@j8I&!0o6z%Y)ugw*bJ^L#S1v!hWUhtehi|TX
zUR|?q+1$5JpIPqO43Rl8j8Z)lp1c#BS#aEP%hiQk4NR7Pu@jrKS6o`<pt<(Q#N*-H
zZ%Wl|el@GwmU;Uv8|#u)m$cNx-WL40{hvY9X6-+XMR&WWuUj2<P4~BWSajVNUDrFa
z4<=<>?O!eFHhWv_e$RC>)6Jb{UkWK)a(>IRpINmt_SLL-xyriS`F?&mgW2o)RJR~C
z*J)S2US1-#_;1v#S3B=Z+v;zY5xaJ6+rvVqbN6RGdbU{iC39nKI9u?uKYsoGrF-A5
z_bR!&?q-(IR4tz^TM{NR%{JLG&nR!X`?A)K!Z+Fl7n6Q%KIir2r}>rn@meo>S>3w3
z^i%V7y^8(cebx!iz8`igFDoiWQ%BqPPE_xwW${&dYb#2*@&oSrWIBkSx$xrR`|4cT
zdso-4xl+NpWwp+&r^2%T8AOT`UGBXJyp|Wa=8{RM)x>>mZGYI$&79n57qc@tHFNi*
zXIEGj9%VW!{<@3xZtK~#M~`kS%viQy%h{l!-}|d%vrDV<15AH)7af@}saL;WY_<4I
z|7R+5Rpo`(Z+bU3Drw!m0)Ykr58vt}v%A}`u$~X+yv!oxbd>4x%2-E<n8;vp_P;M(
z%I}6pnAolh&vba$p>WiG`rM3(MNgs^A9r4OWpASDy80!(@!<m8KDYk_GTxgVV%lHG
zpQyRyQMHZojNL2Vy|TZW)SF=X;K-M%re?p{({}!HndJ4jv)f+Rbb<TS%!k2CZ{5F~
zTyM6KPch=mEdLo-EcbJqSh**7k>J(XP=4)W6}~LRUUU7fZC$&aL*#Yg(E|2E2KMi|
zmp-qkTy}G3$*y$&=_V1`z3ROj`+S$qwptqdz;eZ<7%!p6DiWQwf@}8eOO)HTFHPq7
zVU<G~SFG9|ZMwARy6riIMst;>TES_t#c?zF3i(tr&Uzk^-!1r4B)gckPPekq$lUhs
zK2N#Yyh}C98;>dny}kGB)r!>Jo_tSSB_^L<YNztFF*^Th=$47+?b07V5VzP~vhBcW
ztDHOQZt)&uznDBR!tPT2k#6bjtWzUn^E|Js<m@WWSf6q1+FrY*<qgj-8|SLBfBN_N
zG553lW%p*Kuh_Le+=YS7W~#sWoo?UNetTU{FXf!Z`C-QLn#+3DvO83ze_d;re3Thl
zHvggg+anfVr~XJ>Ev~rY=IpgKOS6~mx{|vkdfU6k_nR`k#Cc}FG=F2c`n-p{{&D|V
zR-$W<ZoMj4nj5$^)8bReGRGs&4_QQe=<w*wnj|~tcxLahvsbq77A*|wKAUB#TA1OG
zYt{3VRnR-jd+L9Nx7#P#@~(6C)qJ!ieg51nmzUM@ofCBkayp?eXW%i@p`FjLeQREs
z_1?MGzollqw0T)9lD+Kb&MU8s^8Pa%mu#!xoZNW$Zu{N7NtsJ7ul3<gx*+V;u3pq6
z#t^V8tb)~HbGsyqN};4Dzl4H@%3p~!OiY=KUIHs`>z(i@WoHd=RPc0=-SFsI(}fOW
z&IA4&tSLP0nFq=aFdWEIXKrj!V{_<SX4}omB;Q(mEPa!J%&nG&hrc%p9N;izs;(($
zDOER~V3MvQ#J6nal$PKBW;n1;tCPFZ!W0mo^YPnLhUR7`p5louQk;rAIvZR*%7}4V
zoNln;S-{xRuqfR=MQst&<0*Sr^0|vn`sVDj(WgSku+?^_sE@UR5O=_tWyUQ#X37Z(
zJTl-k2`~tp!(ejC-M}D*y`$~K^NIee++;i)O^sa+e?Rb4W3imjx5EMlVjQ_D1dre5
zW7!$<smH#gg+bgSX8QI+TrAH#lPa7fOrGpHETiZ+uR&#Bisu>)hW}R{^{x2Ty7Y;~
z#)XR<JQp%8@)K&<;mz=L=0k>Q6AFbq6(h7bjTk02Sbm(6dzB;CziQIrmJJ=9<)W%d
z;Y<RL7P^<XysQ-ry|`eD35$_r!|L~)(lZ#-W<2b_EVxl9&DqX&V)Dy<Q}66*TWu?5
zx$Tr`cuC7fW(kEWNwc}kUi7N9U$u(5<N3C?Ra(&Bvofe~{`<a-!3)1->#qHx*=suE
z(sMhnn2G0B8U9;5<Dr_x7XA9`5<6{XZufYwqP$kUH7P4-`m9G4?tY(?0@pHzcOH^h
zGFM1zQPDA#>VSua{V6WY-QCg8PNl!<>3HiK%-6Dj;pPb?A5r#wQV$kIBsHsaGFJxw
zU8f}RaK%E+>D!bdzt!-hPj2|w>UktIGiwru1DEKV#(+r+9{)~cnx!=HAY*9fX5qRL
zzbJP#KDLAf-{vU3mGOG|_@*F}u&U#2q4g<0w0<k;EBn}K&2?U?cQ-yW*>kpK?VM!w
z#phRFF<<i0?n&2sr>phPFX=g4Drvr@Tzx6!!mI3USKOwrN|ms6tzTOIab@2VvwQn}
zlS3axe{Z<&^mJu|;hETg%bTkH=qXhDl&pQU+E#i4*IZVoZ|+KW%4;{>y0+Bny3beB
zNtzqAY$o_A=p=rbT~su=D}Tqjpq`w{C5zoQzcY{K^$*y=Ie|<4D(~O)m*whr;;-j!
zovKi}_X$Ht;?WzPYr0b9w$-*R+!k&6-f60y0n3MFheh9>F1ur1J-K+t^{=b^{d_VW
z95bJ~@%H@X)@v>IuFm#f+4*aVZr+XU+owBS-2L@#Kz8&SZ?CED*8esh)DoNjBj?ck
z$DfZHzSGmIz8CXdw6aiTlF!M)&*g_bjgNCUD88SwRC>+Fw-J4}1HD&Y{l0l-{p7OW
zcZ+<#=}gT0S)6fu-JX}H{N1;I%~>_s{?ye#{vXO)YF;e){&w2dS*mWU_B=6Vy7=Dk
zq+QSk>t||b{D0N$J}@mrUiSU7*x7p9xZc!B1qK@}kTGgK@k9Du-5Jl1v3ke1?RzE4
z+qXM^o9(`b>sV&-W}k?OSOdD&eYKU_1TBfrOUfj@_nF?;ThCiJYsswz!cJSO{HGi@
zU#<UdO|CZEw$)9qvKuZ>Q*!vw7qWO;QGe$3<~1R^R{i!m_C14BJY$u~RdyjkmHBA~
zGZT)pD@U)eJewFgk&7>VmihNyKK=#n+J|=3-ARu3_jWg87kkAe$ds99@lZIZ)_vP9
zDebcxr><O_`Prmd$I*ZDS-psLu@TpnTuBT_D>*Hf`K@-wi_n=qmsdE4cQI@_s<gMz
z{fb@&Lqot>F3tqQryl#R9$S&VJJsq%@VNzrRz=PXuUYg~&eVAL_v4QB+qXvso=UmG
zJ<-BVdTqDmmW9R-LYY<sOI^7fYIy3*<f}7svhSbbd}bu$Uz2+K#`G&rHH(9%dd;a`
z<Yl(*8mEM>fzV^=Xzi+5Tw96-ZnF4#A5*m6d2!mU$5*c|wokh~>sF2ibEnD#fu?4C
zi91&pT{B&}fNe|B`Xk}?Id;dUFZbOwWm@w=M%RwlSHEUSDYH$m7F8CR_w07skE5<C
zQyCMJU){Am$nBUBl(I+Q=(^e7Q4(y6j;bz5R7#h+yRSL3;pY~nlC6deYAbFAE_?L+
zN<is^Rbsv;T#FMv8%sxi$>{eGw|#eYjiT|V2KJe!W=VfrwDa?<ySL;0zI?kMvh?WD
zvqz5}W0kv=lQ;iCuCL&QUGdquXYYUITKp+@4mYDvslx*AH<!D_|1(VBx0~-bsd8@E
z&r^k;Zks0Ob6pkQ_4T04z1&+{b5l0fJ{MsR7d86kb4ZaZiNm=s_CjZ5k{WwqsKE|v
zgH{IyC6&huihKU9FTGRd-B_O(erZL3<?@+Rqe3!QoPWA=(~q71j+DmDe$BgoMU=)|
zv)K&}uibYjf9bTkd(OB1XI<HUhFKQ3-6m{OVDR8*mk3I3p4~7z*7UKmeqpberP21b
z!k2$r%XQtd7r&~KFg>tpVZ7nQ`ArU&N}jN>2(7f)JMrkZe;Z#}^O~%PvwAeacyZCx
zS!oUIHdQ_U#JB0))<4s`?Ms#QQRBpjhq24AEQs+d`l|W(V|MR`C%5KtUi>yQHL38p
z(UX8lt%eg_LryptGu9m4oN=V?G}GLu@Q~=j+`y?XEwr;P6;yL_H_tw`;nST`S%(Qy
zPlMYpickIC!zk72pYhGXsY1kcJ~N-u5eD~?Jqk(_P0Q0bIh<1FNL@Z0qQi88*?6M%
z^j$*V%O)P;b~oZ&f0cnjXEU>ik5SW%mSaKAk2M^Nr7AoV50;#ZXg%<I0b`p>-<2z!
zhg%HRxeDJF7WI7OveC)qVb`KWCjWg5QZ0%lhg$Vye;i;a+9~RJ$oY_*o5akd8BSW3
z3?C0kNlIz3czkorTGILUG)EDGn^aK6svl0DeI(Ue7z|cOFt}AN`c{5OPeDy}QI@})
zz?X><%=2OtclO-cvAk#ogRqYi)9QPo;)g`^Qm?L9l{8Z+5!8dc`lyKI*=8RWu}AHa
zqHpFg94gRNu~0hU$+IMpah_3##muMDrh?KPA;u^C^?o|b)VS42Bnt^GXJuAp_;<NY
zV1~Y)?Zwcuj#&}A%$I9@<$ECY@IQmMLtXNQ$Y$Bsxtq6b+>*<kmDrQ)z<I>o^ry$M
zU*hLxO;=Pob4JNMDBG{SqT{IL=~5H7)rDS~ug==NdT#Ub#FQyPRrAwoH@m*wUFjjW
zDDl4d>RnYGMvltI)K+KRx+bbIt-Y$(VWY2kXjbl{qIkJCdg`r(@rw$r&Nv#ano(fb
zRdavOy28YJW?i>a56>*Pbj<R!%MMY64}T07T6IoiEaYlTS~@$H-@WvZm$|~LrClm=
zk)0F!CHN1_sb0{j82QaPj-6N0_;8;=uu$ld8Xos4=UL@WgnV>pdEnw2w^wX+d;rVL
zJQa!L{M>`HyLV*dL~q>Ml&)9)X7#+xqrqFw9}4oZQG9TQXM@MJ-KC2ztX=lJ;7XI>
z>v!d%A%>cpn+ug#(~2%N&tFxTyWBIsN+qtUc!gPS*V?SvOOsCZJx$fh)vuh+`0q=?
zoj*s`yBDmk-EwQ|(o+(@*Ckz@)E2Cpar~{FruIiB?<r>{dD@4VObQW-@i=mH|I{sy
zw`Esd+bL7$zVhDA!gD6Ap$2E}=G_gy68*UI&iYp&c_lMkr7{mY+nUX|?6E+x;*dns
zsuigg?na^EyYyNVIo`I2guARrn(|yd()0A*7KH*O$CL^cCAFxV-=FtOI4)`a(4rbs
zdq_~j<HUqiJ3}QV)?L9on<SQR=WuM9c0x`2+oVj3V8sKIiXB@PGN?AZHHu)^U9+Ni
z_VwR^3M{GT(|?Kbl<%K;^(f!OJ$sa=O^$kA=<DEpTdwA3e8K6u-8b(BEY7l9E8sbA
z)s(E@jE{Q#K6%x)^Mk*9y>@no{^D|<%PXE$&TwR%D*1io-rnjYbEP>_Rnu-w?pdt*
zE%MO6h`6fRrGGbOgvCg=o~df=b$)PSXP$`U3)2Njs~ir8%ctuU-|f2gd$Cp5_ryC=
z%r}GmSBaRuu|K`${LTR0_~Mh#ugc2pb&R_2!11Vb!{jbCp+0|A)iQr~=gr4gZ)U%d
zp>6tO6@x?GLd&8Lf?cm%-#?sHR$#cPkWW8@>j86j!DsnzF%y|JgIRV6pE;l+(!%n4
zL)Ycyjir)nR@9m+9$8T&!`<>YKx*CE1(Pl~nRHH^wxpZ&tX!;r=4Hi0mE|+8&JK_|
z5PjxB%qIbXIY~E6J2}_#nr2nad7cs0u=$aY!y4&o|Cn~mg`6^*CiI+l-8GNV;lh$h
zJO18_6Wp+U!L6S2jAv~<80Y+x*m$aQVu0_HY}t%&rUG+41QJ@dJiRM9?c15cWDgf(
zrXr5^mp#`nvWOh<i<s&CqS<xvRlUs6Ojn7f;8$I9{x!W=($iwHc56?x-r>g**RDOi
z%KXYlh*OtgYG}`|HJ7dh&pNT&M#)R6)@`?1^O@EC2^}i~IiIBlIxb*dGFh!r#sAH-
z-nFfJv%X5Y%~0B!uBtrQjc>*LBGWBe+h*5#o%G#uE6esAM{mB_irruG?nhtvx3uc4
z=xeqvla7C(f`7LCGdG{N#5(Nm$rV2~^|>C+<BKhD?Yneq>fFh?D|vMjJRhZ`RX=X4
z|H)YRpW)qZ+1dV|)@EN<-nKU2{C|d}vLBxaxjClayeXId^J=Z%=XLKgI_v$nZr?U9
zPxtGc`F|_kxaNB5@|n!8o_*!{jE1mep%kVA>{D7i6%C$G@KnhRP-$<F`_k~H;Xsd)
zYf_NNOzVzcMyG^5IUHTi2){kX)cJ~Qvp<L8amx?(n+_-zX1-$){rg&|T}y*k(}XL<
z`eU1^$1m%L2YghnxlCX)R9(bubm-TAhU_)kZ@=Ep4SvL<lI6eU(3<%VyVqU*Ec<W4
z;=Q-ox6J>0b`?ASanBdm)o-QEKPV!SX_4Z!NYX*|z=BUJos?e0@8p^NE+sWjI4-3;
zcYEpW?NxQ_t`_<|bkNK8w>JE|@mAdVM>#25OFHL>imGfp7#VnF71tJ-SEdr;u|`o3
zr!9DPUSo4^ZSCHh45zR6p6k$Dw$bb0{<0nMpI`MBJUzP0H@Dt#UQOCQ_TW{ot_1I4
zNGohwBGgjm|6AE}^Vhhud+TDQ_AlRU|8Le#-WB!CQ_rrLUuNFVxSqE_VabogjtPcK
zO?86T7tTGpMry(3Gc5@_PVeJl30JffQcRww=DfqnH@G}A<4uE8ae<0=i@`kCW5Fj*
zskjJDS}#!0Cb06RLXD4MFK3@n$m5$VK3c*G%a5@!>=aPfPyE}X#5qqxV4fyJd(w{P
z4&^R~^GXZ!B854Y&GhigvKA7ayYxh;fVDpd8$;--r^*a&44MW!*F#QD`Ww`$?$a`9
z!KvC7$3jnQqooH<vkEX=;cPcbn0ct7xzGFd;T8>s|JNQ(Qsh&4b;VzpLAd3&K-uBf
z7i;&r2dv^KRf>2}7|0``-t}DSn#lssz>7Tv3TMtqacLe&KPL0+(VVoRH*GroZ&XEN
zt_WV;7pS;xpR;aJ{3@%d25WWwZakf@(f{PW!jBEdza6t$s%ak7e4y^`$F_LkID5xc
z@;uc&?w5)urZG;*$z5T3Ad7Q3-yJ1((~<^}<o<V^zJFWiN=;p__u}@YMVEdZ{VH`=
zH~$v*>c1~^kIQWKC_2^Fz20nA^|ZSugtm$ndkFW*mKQrUZwXPb+*B+jvf^mc-Rtru
zTJP4^UB9eXE3LG4ZJpb!t<&Xw_rJLG`~I1lV*AdRrH1G43r}r%^5m{o#`agmn-gy}
z1(mMKe>0nF_g<%_R)aGkJ667VddN_|Flx(d+oLHB3no~)D9o3!eyAm?utG>MCHI)6
zvhKZjrT51Rw_dbT+~MEH@R;vMIE$w85gFSFmYd!=T6lM`25U^5`%Lq9(}Hic0-P$X
zDl>`%9vQeNcKo~He}R+d)1<DjI3`A3Coxw?rwTa%PPr3xYqc($ebci^o6s~}ZNmkX
zrNKwV7}&BlF`x1Hr7$(}j5YJ`h1v~SlNg0!b}wMj(^|-;@Ow{!M1_gQgNg(55-sMx
zYe@<BoAS8RAbRO3p50B~4=9K#73WJ7P2ii{=jiZ8HNh&?PgzAiM5(i5%EU1DLlTnK
zrAMP}Po4WT@p4#iRcOS6w6z5#)#4dzmU29;;Fo-Czs@E%@0r<VnXV8yj=8IE%k94%
z|0?%(>1C$Z3w_pCyQp8@v~=R>W3A@Tp8M{5bZy>$hHT%wHJruGPT30<?CmSR>zmtk
z?&xmwf~^TGRlN)XT&um#zvz2foa<h=rFL&w_VUoB{h6LyS_<Z;RBn5mcgIS9Wp>W@
z#(NFi*|CCKbx%fxvg{L^Yg%{K<KW57vm;hrdHEq<ZC}XdNU^GeTkgNo)OEVleCAS1
zwUU;KzVWiI;Ev-msgh6M?pVUX@QQKmB#(q9(}T~NPpX_0-&xorXl}KiN&ER?nWvYQ
z7P-7TEpct{*?i@$Ws?tgl-K0GdntAHii3oZY!gew<!j5Az6pLFwo*ZpUGbrqOY@1|
zvpP!ypD?a${3VnnRQ$f@x0l^qF|*YUax?rzH8!r_ze;nG&sn!wxf!9Vrx*^@-nbE5
z!FYzrC(Oj7FD_7cQe~fra&d&*hh5P+1|bJ#OuIC*_|O6)mKLskf`V(VhP`B&t1^>k
z$7!3zW=GmBk4%``8j{*Eq3cD&SJn@V(#^*-RTzqOtS)<MZMwuM+1S#O7#~*sTC82*
zx_3|(3+IAU$_tB2qaL-L-_##mxI5_j4k67AGp|)We7XHJe|CjR_@vb(u8T7_{@c69
z>f&18btm7J-k$w*WkhDFb=lurDOFqdAC!&jRm}HX!^ptoWb#-0^0nJ?x82S)?bx9G
z%KFUrnfn%>zg4|+aZR-9EFR;&cBykGu7?#_hVM(+P@&T?=h@b!Uu7hHn4a%A!Pc<O
z{$a$&{Q>X#?wjvDT~>3ywm59x(t~}4Uv9Ol8Beoy{w!Z&ZSI=)J+~s&{i4|yu7%el
zekIzs?6a9M$w8%KlICrxUy}3Ul;=EE(3!wv!IAaI^HA)*6S0>#4V#we#S0nCS)iaU
zdAebKi-MP&<SC|AyZgU8GA3MMFR*BSXn*5Kk6K~?v&lS$BgGSpBm4d{yy=X3>&t5_
z?0T{|?n+yWz>)@$$(m^v5-JMrz6A%>J6kSsF)a7b+34wbsne5bt3b1z#zw`4CXdjN
zIp;t8I5?4a$_E7xzWd#08B9!GURsy){AtL+3o)Emyq(iFNvL@~W?=RXTzEF-9LtYt
z<<f0m!~QdDWxKRxPw@)Vpx*kkNd?cI=YQB`a-eNRWFOZIM++uHjdvU&k5{S+^*oo_
zv90CNysV|)PCrXNkZrEJa~-R_*|mjPeWyw->wiy7*z`JY)}D#G{~C9Gx@&NG=I@7@
z=RY1-7MeCET;x=x<I{``jo7X=8@nH#-myAl>Ai40+wDsw@4kJf`(AR@xvg4nU8D4~
zkM~V_QGB`~#BqA!ISUP?nYmtmU+&%4brZVIw9JbwcH-U*uj3Zp{?c6fYnSizr6tT)
z_ijy%{?D*e{P1-4mc@Q|xl)g2OPpFB$9si|g}ujARMas<C2>xO+4&i>Loc`59=f-*
z)jc`TH!HqlN9Kx)e++Y%MztQ6jJvluJE(wlYUU2x7Ma7#HZC&mnVNK2bBa2DC%@C)
z1q>FC54fbcGgUwFjF=c8Y_ybf63>%1sV6Q*S*_ZKoK#%$7FfFPIm_8H>8#IzCa0Na
z-DlR<um&*Bv#9X+tsrnSX>nM=!e8xEQ=3xuC^{Jbzy5q~<+k44E3{u`@T{_RV>H^i
zQ)zAUm-aOGOpjOp8BD)zpH*UR<!jSo_(FZjaSfZz-^!0(Q;$%;%yjzVXEQUeJ?g6@
z9<P|GrmA@NhOkDWR)O#_?wO*CSNvHZ)%wE7iRqg{-*(S+1<Xf7t$#1>|Ff{Vw)&Y@
z#uMAa-(8nKYY*I=`tsY`Z(B{`-YvZ{dzoME?)o_&ii&q;^dAp?b4+Y?;o_i@u2k#w
zbxARK(?!eXwmrO3wkzw@b(3?`V*~<T6g<o2-*o@0^4y6&GO7)clYdJnI%FKm*1!Fx
z!{hQ(qaa6%c}#&zRW@l%nO(Yqf3~LNv%Q;6an5OoJgpjcLUDzcC6|EDhrc}v*A`ht
zyi{1{;K!t3vtZR$567dT2@{@MY~>7b>^#I|x|m00`FH2e%1qXut@-754>9tb^;1mr
zHZ%~>Gj3j@B$8gK>Yvi1t}$ta#vD}!Mz5*IZ!$az(3m2x@z}C)(jlo*nczJK7-Bvj
zQTTSqhr#0EV{SQ(c_$p0X8BxUJeJMCtT5TL%(<OG{kVw%qbd_?jMo__6|q(!Cg)=k
z94`Z0PHFn6vzV;Ze9BePP}pdsmcrDa{*Y7Dtx`^tfu*TPg<qdPM^-^0zqoy4kkF({
zOcTu&H4F}MPk5!$Xle5J_FV?vAeM=4d}o;dUVMG8Z^zOdU;56Mde7XdJ43uN<I=Mq
zf^selk`28lt^YHWl<%K-*X`DA+v|QYHdc?0KHIxP)pKE}DdXYG)v^C9H2WJhp8DOI
zJMH!qmhW<cD_;GZaC_J0T(jxD7vm=FyY#X#D{$rH89TPWz5HpP*|e?9JsTo!aJF`M
zX|A$qTk=p<W0y(gt6tv2iF;OV%GdFp#qlg8f6`W&RhJ5n)X$tRlfLWj)W}uSbh#FK
z&RYKAc))AB$=hGm?!PQQxz;g0y@ch)`%U#v?N5FC&mewSW^v~3TC1-*=4w*r+t!t8
zeUV#roikHvVwA(R`&(t^xwhwpMw*rVxHGj%^i<%F3BT6pWjCLjB)+XeG4QeIuW#qg
zOJ15RSkktLll9Yo2H{D1p)V|7y~+(&$nxYo9=_pN-`2@5LTamK&2sL{Tl%a>*;77k
z-uf<u`8&2Q3El1(G`GrE&evt*o#2W4pLv{F_G;PWXD+8tFihbK{`balXMm}^*Txkw
zT00lb_ME@$is~s<b2(qbxj{2UBA2ec8hDP8sZy%Z^R&N7)ZHLusVj&7Iv+}U#Kmi*
zz|b}St=qe!>y~H9Jc(*h2vK2}yK&{_O{GT@eV7_n_oY=iyVslKnODEE4&5><>w1+@
zuclCphg8%>TfZ{ij+3jFY)<*I_`;RE?P~2y^Zh2Sua3Mockgb-D=}ZQo%p+~=AVAo
zV$Hj7_kP~hyY95j`s#9+f#a6R55wj^7uiprpI$DeXX|`Lm;augqwbQI|7P~jUuN{R
z?)Kvqo4O;I-IzIUCmj_kvYfLoG@BzOdD{dA1`Zj;tq}}9e6=@^C32`rnER*hHNO+8
zxaH;6^VNNfYgv!qmAcrp)k6M!+;Y=(YpcTUp7gu(Z`n`d6`fx5KbY)1W^qbMWM1vX
z9k1qIcwhbP>8{<}tKJujTV+LEvR0L95-4K{5y;|wdBNkk=YmOH4Jis5!cET>Xn35y
zJ;kC|S3xeQ`$l);+cuX(p-|Q$zk4?nyEwUZE(P?Qm?(GoA&0L4=S%~JXIplzV4BLc
z<5^|xgm1E@76KkzY7?$%8mu&aYY@SxBG8tWt&t>mh24AE{QZI~4GXNN9(6pTbUv_c
zi_)UZBQqB{&$+&J39FyN%ta~bLXixmJLY&wh^QqB$ykcbTe$R6A)`xIOGI*KaktY0
zk6BMmrYPLuIl&b!bK(r208eE&Uy=vQV$TWcO$Xk@{JSti{yxX59Xbb9*I70me%0c0
zK#MJ@K89I{FX7F^VmnU#kIpAmk2y3e%<$e7AkpwjMKe-O!a2ol(gVJ!;gUi#RTz{~
zygnDa{n*s&uBj#F;H~pONMM=p(J(=$rw5muS^4c01Eag(>M2?i0u~($ySp>b+BL9r
z+ns%P4=bKIepc|y-BZu%d6HYMtUnqsxxZY3)ns)?_67!DM+@)v9E+1jujw^qt?CWm
z^GeqKy4mcdVK?)7bLW0!>?!)^>pUU5-#EtqSH59(vA1}DYgu-GPl37e;R!-7eWsk>
zyv-~+KH%ML+rX$x58q|le9^W&x8&oDVn6xPkDKlrdv_nLO{<=h^09g0%`Um4NB(kh
zoDFVcSSQ%JId9tSdw1Onw=u3=;jMKkz}Kpm<5ix|Kl?*3&dgFvtUW0`Ei~fFiOYWd
zj<?*Tmj3n<G3C>o|Mn&qqx}C1n@icQyO-b2)6MqTyHu?s`P}zwqB-ldZeGosen~Im
zO7Il9V7VE^rK|n7Je1k}EZrxBb@$b&w!R7f8CLjjZWHvEdK`4)Q>C=t;gagW(<T!h
zd0t!^=(N&Pz5Z6m2^lwwj^YE8s_Q!VRGFTv*zdp3^nBEePw7rST?2a0z3MP7j+%K^
zz-{s~wom5!mhW8Yv3Bdszp^`yWG<<^XD=J{WAei7hMVSIFOC1ZcX2n<qN@s<y6d|Z
z=7jn!4SMw4Dz7qBWopTpfa&vs-hCJMymTi#>i4lu7SEYY)(aXOG%WlziiEBzdK`GB
z(wZn>IPrwe2f@Su4wbDQ&(8QfzRl&(GOOn^^N|A<b2c}HJXUV@(n@Dwbj+{6QYi2?
z_`<RbP6-j?2nnC<o<|KhC(Lq~#TsHUx$6)^ft=<f2gQW}JWfeMVj6po?{HE!4VfXp
z;UUNoe_Mmq)U&$BVx~ahq-y>arl*QEZW8Ah*af<R0+y}1sW3<9vhwtAht&Jr@A5Er
zH%KJvFB9rvGkDmqG=X8>M9srN9_bUyIQ+N|39tou3OAe*n`k<{=I=^gPsJ$@T0NI^
zt(arL)1e`_$BmmsvA{mlKTmyQ;;t!LuO0~Z*9drJhHyE)^<bz_bLa~-VrdFzo)cN6
zUA}hXSH1f?UNg;}qj5b$L0A0bscRp)&Rj_lU|KuFMbK1u*-~}SgMC~ziF@X6nNpCw
z=UeXL!yfkB4Z0eWEZm;WWL>0rS8_-1^<~?BFa2}SC%EFlvm(*|3_MY9v$Rcii+1d3
z*STfH6R_*|qPe>o_vf7X&+xrFZK;<6gWgK>zsiR;*I%mtH~skO+7_pdbx)KncAO5_
zT2{7fInVXWHwxaa`nmOn+=Hnin^wzk$NFqPbG*4Rduy^ZchiNln#L<uX+8XRewI#b
z^u=<~e7@h8uguDOC7_>k#UyMN*BL!Gt~1$lUuFHvwg1`sJXfxJ`-)rjSMp=Bf))1N
zyUZi7CA8w|f`4M}W--~Ts*_et+a+6iW~0W+lb3cbos}xIwwLSrl-FXO4W_eF?8G+6
z>~vcpzU|5lSNFZg=5p~B2`bP3IY;KkOZFw-)Yx8b-CLQdx<0dLos2Tq=fs<9Pv8A3
zT)g{=;H|hfEfZyK%zkw5&EYNLW>@PjFHdyYp1SY8-;7-2z(cczIueWTMs2)uZ40k|
z_0Gyjv40&eych5I&v4h)-`V%JKyDeo;=ShJpI-x`e>Jb3I{(FghG*8(7VfCI{PNwL
zJ%u-|au1gzUJP6MHbsYd>RZOFy<556<~RG_>sx<uZ{dcoYrEHdn{_MulK<5kSD&tR
zJ^t4GQQ{A+&7bc6nDxx;w@mE%l3V#MX-__@YV2^#^*8&cbjDa9fyYPUT5-c0<(Yq;
zy|RB{s~&b~;^p#PyT7cl+&1&ss)v7+tU{d{E8M5s6@68>w`<AG+k32TXUk1^khPQh
z?!y*O-crj0j0`S*Hb#w01Rh&1`dz<CZa&9^x92-QDzUK$tNyyi$jc$)wb1$O?;{3H
zP6`J-XSpn4x^Q%&Pm2GW4HtcF4rMmHox>!>a!AtW!>_~x${pF=(Jh}(zi#SN44A+)
zMTmKu_-s9`#0KVY#;2~rhDwYk788~qw4HEQL!?-}|4QN=$5UywH=G?*KZ`B!?BP1Y
zBjBj<TwHSJog>dYPby9}n#s&;{N<p;SuUpf5(Ak_S9yY3nI<_%rm~oMIRqMoxk<0E
z?vUi;e#kw6X<4%3;e8UUfxB9knn<u(oMN<!;da@`<Qm*{`p<?L?6=(cbuNfb$`WpA
zUCQk=q0y5?>(NSv;|vRuI9M!iG_=;=YtUl+DAug5E_htdKrM8Ymk(!Cp!GFI2akY9
z)hCSE0vd`v7|sWI6uO=eZChY`!b7-4Y}%ynI~tX`49gD*tNxDXF;LX-P`A6mF!7v9
zQ&_r9q6yoM7OQ8SHpUa4bzJ$m`SdZazR&ks)7g||JXF;lALYNuTh+F`Q}~c~pFo_N
zvll}zM{BW`TL)i}Z=eJB?Z@0}t6NUjpT2wc+V7>Yo0hD&e>uu)mv5C~%KWVPE7$)>
zUb;fhsij8wri|99Ps{G^h>pJ;7<6m3R?B(MaFc&(pN^Mi-wa%{*5g&G+tx34)VO1q
zXF1j0cv<kyGP<zx;N^o)lApNsdcUtet`X|g-Ne$$=r-xC`YClT(}!RFvK9ZoxW{DC
zis?Ctc?V){UfEhU*OZYfm`PbTp!uf6cK5(+w~URARw*Sb|307ht39>u<PN*Lr(Wr6
zd;U}mnz$_6yXQ}h^4`*O2c3&<#4l{PYm$|Hf@^`q!_>ay(=vi*ANS{9luj-#(OhM8
z?pCnBr;yd^mA9;#Q?#c|Z=A8r(?4&<5#}vk4(UQ6T5mXYB_G}wJd@<(rPUc8B%Zp<
zS7*|#_O`7aPOB$8=AG;%Y>?ugX3*VoK+CNl${~1?ryAFz<$JO^MSE3OxhZAr@h@_o
z7UlI=^EAsLK|XzVKbJ>>#qoPKT%GLnAVg({)**%!F(Lvpp0}{F3c0WtGl^+*?U?$$
zWzw$%fgMv%Fz`1tFkIo&5?LS++rXltP_lp{sNjI?-Y4}rssZi%YYvt<UJ9JREcujU
zQa}%vi0p)htc2znoz0pDXR&Z7cnHt5C_kvF(<W!Y;K;CY)?*!o(;jE<E{piik*OQ7
z$XT#OQ^<j<VB)XD21iAYBL^7P*)G;e<z`Z;SZ3g*DfHoO1IrUV4S^bkL#%u=*VP?n
zib+vuGG*A=s@}q&sMFPFsLXR=X2KI6K25=fm<ODb9v}1(Q)5}Ok%L3oCUN273;!8Z
z%Y)-HS0)CYa<^tsIDhJmean9ak<wktzAoP+KBu3&`yeWx?cxy=p<BIt7VS%|R^EFS
zZ1^(FL_p^9Q$_1ZLJ`3WHDCOinb0rcr@;D4foXG}^yl<vuiu`noBmR}NiWLR_IUB4
z`sv;A`8gl|Z41@zfAU9u=2>4Qj+=i@{ygfPygEAZWq9z;yEC@DTQ7Uo?dBu5*(^t^
z+y40eixn@jkN$eOuRi#Bc-+<Pi%x}yU3)e=@G2|Y&aWyBS44T;R`oag6#Gg=-Q2pz
zaE3+U`M1uEVGA^GsQ*?mitb+f{H)!cUDaRnug;Qb)z~yug~Mi-#B~;JDXYKXSHg9w
z^UT*C+MKmw&8_L7-7)-Wd+iS|Gg-K638V1={l+PpbJTX4)m{iOH*&k`(6fy3fPlsK
zn!xE>-tS&_aL0wa-?H1I?ggrF^&WgX(d5;J)mcGB#fBHxOQo({($ioZFA+77%Xbq;
zWA%d-tE#R)J%1*4_quJL8n^A<yV?Eqnvi+t;~GouJ3jyM(*Dz{8vd1KYBt_m^38v<
zJ<Z+y*7QGve{Ig&kh|R;KW+x_o0|8zZ+>%pO_lYUHCE?Sw%=KEaqHcm<=a-RTNqI}
z<NfZBm&%Ww&<uF1`Z{mX(#x@ywY#pAO~1S>+ChWmO1U;e++{hNwLxpErfOfaS{+)w
zDx~Oqbk}Z)*ZXa)W=`L|biZ%(6~3j3Kd(3VNUf8fa$NoHmH!MM+!gd9Ie#adkn^nQ
zN|uyUIKW$c)ga}h&yJmnLJiC_KTct1ys@K?{g^;|(T*n~PlT8R^-g{~ZlM_D>g2<-
z`+>*9rUbu$1Fj1utXRVO(ZJbBC`92GUsYmq&!GgPhJX_Zd<&}mB?Q`;56lbfQS00u
zrtnCuXpR`Wz|IGLs{RR0i<vm4S?Ec!Oz}w0GCg?ts1t+T4E@N?g%g8S&ui#~38*zW
zUSihyt!VkKSbFWTAfq6!@(zYX35$gsdxSD)RWCeZ8Nshn?8)3QU$pHN!)_+Ska@PB
zT4WA!trKF(v|MTX;fMl5h6>YyEVkMUF)|N=PkDwtY&e_H&fuuvl@;L6!RiqZdd$7$
zpfeBKG=V-H#bv%~+$&$Iws`pKI{$E6%rLF}g^&^7w|NJTad$TySs1`oFirCiV=u4P
zq>!LM)&6Z#`#CgL8hLddzVNd{W663MyA{1h1iJ0)cL{I?Og!Xb&UYs9z{hXp&MHfG
z)>u4cbLG?fd!eDy=|hWv1<N6+B`c;aaWOu}#dbndh3TPE%N%RPeF;&<3L?kc7zDYz
zy+md`)cM%A|Kjxh>t3!ldc+&;^gCVgKSNukr~S{ol)?}5Ip-VzU5>o&fn<5*7MoK~
zn;zYftV^|j#`bM*-+zX;$JWWbEYCjWHBV#tM5)6ccFpCU?WZW*v(R~hTI;0bS<B^K
z1YTVI>yJ-}{r^k5CU(jx8NQxZu<4MI@q6Pr$>HoV4ea|YuO4`L*;u@|=z^}##4wBW
z;2X1fuPwYJb*k&x)__NaefKMvXD{(hVSnG}|5<QXp6-zbg>$-F?w?s6>gcIIUF~zk
zqKPZc70p`C%%`X8;K*g!=urE)<ARnXtLsC~fNxJtdzV{OYBi<sYs^`|u*&Pah=7>G
zgvA1z<}j%&Kegik^Bk$?yM1mg_f!?pbZywPt8j+qL=RQ36_FnDt2yM-8Cr{uYb0^1
zFgOXRt(ft##blA_oHnM)L(?XhdVH|=(G+rIS+e^?3zN!91;*q1nA7+fO(YM_aBAQG
zx*=p<P66{bfdhI~I~^FgzB{M+mlZTJTQ(c>6&e*XrMWaK-oMSk*6YG5v%LMXMa(7+
z0|!3a&n*Rf2RKE~)+p*s@ZbL3fl;#cDU)MRx&>Qyqr$QYtKYV?tP<cjF!h1YBt`|v
zvleTnF$l1%IBMt<#4dWAL&2w^<h#Dij$}Tq2VARK3qqZI7#O%4&$2P-s7m?SzFI8E
zEGe)c#kk3>V)7vg?uPQIp(pusP8?>D4>32j=vnwc-DIh%-3x|WIo7Sd>&v(7S>mBv
zk-X*Ebpek4gg4@E@0h)=UiK%~y-p$U-f?#KZYGhi6{_O=lE=Lleh&Jb`l8F4?`G)z
zt9oDA@0v`=T`?(h$CmoPM`P`$|7YO;x&On%%XhrrT;9&~^mFmE&1-t=CcmtDS#A}*
z?xDXF^P}wxKGc6_%UvD(mtWQX+`n+fPupfYTw?NACHa`~@)gg}aB&Nvn5D&S9t>S>
z3{Q_S@szJv5hdynclf5=3AKfBiDy<69Zg9{YJDQGVy2(ai?XH9f>Ic{HZX8EFbXIf
zO<Sar$*1eQ<h+c1N{3hDGS7pB>Jk%|G<a&L@waF(_-k_87#y}-qr}PJ=dt7=hlu7g
zCg)vzE6X&TS=rQ@VvqQD#+`JXzi`*3{|t+^M@|2hE4m`)vdOx)b3FJNbj@2PXI(6x
z{_b`9QPCwqVXjNEz8zv^ZajF9lR?ts@ic);iNQ*r-aY&GYVX(Ef1|rsty>@YpCR+S
z>N(^83_6Y-Gu|)>PhF>5;bf?y__#$><Jklbg(UTdhlJ0tO^V{~>N{Cm&~t%tNu%kR
z!zGWR-`I#-l<_LG#@}$dQpLd((|y!U!cnWhPxjJd##1jRaXxNi682>GP%WOGV#u=4
z#;9RZkVoN3+tmgWx|V6NEbF#=Q6#YBnVMq5thX#OZYl}sw-i<=`&{T`Y8O4I$Y9d2
zbIP0s<{hins5PH*Idy`^(@rLFfy{&F)?tdPIt*<mO4<u>9xJx};BLKShT6iL2Oj=d
zxOC~JU)G8p3MNhKjMx=#-k!sklA(S^xTo{u<$qr|CzbR#D1UeQbi#l0wu%Fc#j`k^
zI_Jq)vzRo*ersn6b3fvwC)HVS_~F5jbu;}q3d{K1Q<tPTdHYK`7(Oex(DO4e*iLKh
z6m}uOBMS2-ENFaqmSKg;GRw{_4Z)Ht+FEUXIdHKRJ+Lof61gI5bDrgpSYJ@EpGJ(w
zVFB9>vx*``BxN3YI<>OMrX;ND)?mnb(6MgHN#lPfTo0Z&s=Ao*-i9=WBBck5T*Y$R
zCV6Cg9^N1)XTakXae=Rbd7|w+0kPEOXB%cYcP-F9ZE&fPztD4uN#pYAvtM>>5PF#<
z>B;f-QL=vCp4Hp#-F_$b=c>-VQx7}3q$+Pc{uXmD;t02i#iH$^Z$Gp?oi*vAW?A#`
zT{0<`eKTG)u3D8*{{Cuo-Sl7WEB`ZizN}kv^*_Vp<yWuF|5?wY@m&7;ow}I1IhMyB
zZna-#`Dt>@`zz-5+AB=2>~9k({Z~_bIsMG9z0dTg{*JEU|9^RxP^rvvjkXof?6;?+
z%;e-L?ulBKzAIbiNy!=QO(immC)XTP-Cou!G2z{X8Sde$&v<CJmtRnx_~*pR)#clk
z_RUg#wLkOZ7c<>W`zt!`mTa`=uk?<bxoY`U2G+wjA9FOUTCJ(RPw4k%!7a~QIyI-)
z70lwf%-EqW`jDeRP+h^lY9jMP7jDg|(`*zNIz=2f<po`sHG%`Wgo<s1q>n7GD|pt*
z-e-4$g@svFVx6QT%e;H{IXE~S57s0+xWq80>i|=>cXt%a2gcM}8#Wzj@-v=ZB6(t-
z#SE6p2?<4QOaU>QCP}Vj<tu76@>CRA$>1%)!nxw#3x=ZaXTCczo^YBY&dA~E$Lds`
z&M}4iVT+Ezteyl0$;KHHItpu5FUwbP`aEe+G~o7X+T%3y@l76<kf|%u()<cM6ta9$
zc8augEZlc-eh%BIm_WaP2EUM?E;XC#g}x#a?DK9E1YBC6Y2*{ca#d7Rv^ufDqCc~J
z%Of_g_Kg+JC1>3YEWe)<@|Iu4pq^yX5F=H<wWFLN$$YD2yAY$%;hu*E3mJBweK09<
zGDnuelq+n?+qd85adO}8nZdHCv0sok(}iILW7?wFBNCI@EOw?UDjLo?$riK2`9hP_
z*QcLNW|{`GT;*jr^xWdaq7z#gxZll*tukMrU8>vudX-JU(xBBVZx>B^ZrUpIXJ_fB
z{|sGPzr(H<@5w8>9lPQA&bp=#S1+-fy8mZ*8O&F%Q#E_ZHt%XLlNHgyU)HXT&<xDy
zHTrl+QR`{UX@zM<b*x;cpRleCYrlR+PK496!Ph1^<NSnZRkP)<UhQ}$pxAfd!X$wg
zy{voZWUW83cj@HRvraQD%x^5<IK1zt;pQt_SG%=_2kxq_*Lw2VbiG`-<^I%~x4#AN
z$cPJUT)8isyMy<`M}aHh8CPZ>W<FNk+xqT&eSG#EjalYl{U)!2idwYRUW)COlexa}
zzI3d_pG^lED%;|2?5}xfGWWW;Z=&4#mupRJ7C62%X|#zKVc&Q*AYe|1!r2NhHOEy}
zoJ%*coH(JEyk&+<^V1m?6;FAEjEyWN8K2_d_$D^hh(q0=!a>PxYSyafHVV&bm$*1j
zmpm;oq1mALjQ|e^r=~{s6k%1CMa$)GlpJJOFrD-F!gVv5&)#BD<i6teQs@k4xpVWe
z1zitT_h)u=bIh2aHRGgpBHJs6RRT#hT+Tk6TRj$X@jSK2EU;klY+Ct1fuWIu#V2W5
z?2*Fumq!l>T$Px2CP;;O%hr9N1-w~1dhT5Cu5bNy>v~PVL%&6zn!GeFacUGBTx|B3
zE-<NWks(V5S1X&G0S~|8jRo3=`JT20GE229xe13HWav0ND}Kq!3@z`Jg-$;X2F;uh
z^3uYKk>iB+reMuS6J`Z@H3l?zEoj@Z<$+km^S0t985K_9iN=#2a|ce^xt4DNQ%X;J
zvX|Vk19j8(G@d<p#6&~6-Oj#CivOl^3rP8{(1oY&T<8CASfDvXRD+$-#`8e^%nJut
z=6GCklb9)E(qrExV8j)^LDu3l2di3#M^O5a157Fj0_7~z?B6>v&x{IkOA<5UxbUdY
zl5>X4>J<%)S`G#b97lTm6lx3nCS7ote$=3uMIre<=hIsgTFO)%Cp?-U(=b6Oh^_eB
zp;rdYMh1LSSgit17J9fWDE2qVQ**1GaI=Xc^v#=x96W7zujX!zc9^AgWqHWoxgoQI
zX4wA9wi0n_bZ}fAKKoIt@XvKq{>tuOyCb@AeQsUv^8Z(M?{1u*l5};RL#ia7R<Op=
zH9p68S-xuz4(8h+(NyTLcgAs{*Sj8V4~{ke$DJ9R@$dwv{IkA0Kc_GxPdH{FA9U4R
z)_K>J*!ZuZ5i6A>O1y817hco-;u<sEyX3WU5C_X5zH>J?g4=hA@7`75o^Mun<65F{
zr=I4Kb5bjw`Y7JBy&&Dw`RnfohKUYujpbc5&o@0}y<Avm@i%}`;Z(W>gX4{b3T25L
zOZwa;Q+Q-G+6DF*z4_R@(uwcr-yQ~L8#UV#8v-mWeV$KhVicGdGBbtiLni}ocHE;&
zoS#~+blVg(aCqp2aB`gUoDiR5w`^{n$&`(Ly%h^3e<wD@Zl1D0z@(2wu*E`DDkxq^
zCO{%<hoWMIUcr)3;T;?d3SM3+3kw}4e0+99*44Q`%|O(p<3N_{>3xhU)7Bi|XjWJ?
z??*C&hB)I*K6i$7>>d+re>f<bOze3+smy_ahf6Q5fJ0@~#17#%4PHA{(m3Xqhy*xB
z`tvY0J(rqXDAnySfx%OVL3;UY&WCT>eFH1BPDwEa^<EKWI3g~r^0xhqhFUP|9#Q*b
z1{MWgg~ivBccg9j)~Up_qMb)fIXl$&@HW+K8L1`S4B`@#PB>XUa$~GrBolKbAdrD6
z!#aJ6I)h-Pljf{)pW}`%_?lPm3R}MCT}j8L1isDNtAt}7-RjT$6p%i1{zuvG3MaC3
zALRav{Iay#!Mu8x-qP~2W0$utd3ffOD&HnM@zAG#OlIBRF4uHhH##@xn34edMJe6>
z6q7r*zIIKWXqd(9;W#Dy_LVuamWnkBEOcsUlud6l*zr{K?e)T~x8LTKOTU^ockNp9
z@E!GR!L09;-+O!8UH+MzxbyDTP9K3)3w|%|`t$TvzJKoam%G#drT!Eb|K%BWVOdzO
zdHwTHsr_G)JSK&NTFU+26Z8J}-g{pw=52i&9PV|pS3@&MNNBN#>BO~+EVDY58a=ft
zj;!VI2zdO>E!$K>v1rkh>^6x+o1_q>yM+>WgM%E60u}8HN>*^B-7k34t|Y|i#3Xq8
z5T}b)hzH-r8$N1EW-Hhdw3;6&{)+2dveMH+&49y=iRo3yJ2@r>-{2rd+Z|349*PGm
zo^zN6)&$8&`%IAYHR$rn(lTK1KFcs&PAjDG5F@kaRppk8t9hL@|J^hAwo2&AoQCsT
z6{b4&3)ttq$Y9%&w4?0sl1UQDxBu>);5<uYo+RVf1L_{EK08hp8g=p{7Wtb<xdnH`
zpQ*N3m;9=I`UFqT_7($%G&PIFIXup@miYziS<cYsa6GbTD*H{o3{3-*B_~;LPG($q
zljYLV)D~_9iO{o-XM~M5%x=$^X*=PQ+C$6!EzLg~j!8+W@o9AT#ZG7wJd!x$F~?3X
zFINuF)|Lwot}CdRuoiGN2GytKHTY`@pIH@q;6bYklT@EF(@I6no{#NW3Yl4gPq_{3
zR&X&$NKRNRtijHtYU#<*z}(XGIxeAha@(pc3?d7U)q6CXTV;04klo{Sz~%^#$O83F
z;~0emr5TTw>!he`Fqn8;l>NIykbj;g`;KoeZUUE{Oj~5x-SS&)M<T<zd2Dq*gq{WV
zT#{j!VexoEz}^i9Vm{4N=~30oNpx^*Z<Ac))Sy-W|LV?tzM2mu`mMU2OY)a3P@CD$
zadhRgOh%RTTgkJ7SGfetb@}vc4_o1L>sxQvmlqaXXxf^-M~yvD-RALy?N?j(PCZ-H
z5VSaKnMG!H&*O7BpPHN|i12;vI=0T}$IRJNK6JfJP;z+q`(S(4QDYasKMyR<xw)mx
zJ0Zky)-&=$ubRWm!lP*$?DRC1J6jfTa_WCe<(}~<RP`7G=Ow958;3}T0}N>_e-c?j
zRph=lUEv6;c*Sxm{Wb%a$E>Dp8NvJ#g|n9DG@Oc2-tZ;wZbF&vtlqC{SEiP|UK{o7
zT<gJ%wUIA_KWB!PthAgjQu6TA`bF<Ewk^(C5tiz(Xu-DaOD=I7shc-<zxF;^gE_M<
z?t2sxn^EZ$Jc&=hAz|{A#tw-ok?u>r|7VzTmw#(+c=gh=Yqx4wd!?;D{VFCf%lzS2
z`yhwnKRr)B@6x<##lAR>DKh5rHT9QwcQ7y<>Dn@%)9S^cxhjPVCSUD1$)d2jiP=VZ
z{eOo44Eis<`u^;mdu*bH!&eR4MX_6E6?MpeDm%2$JNDgHn=e<kr8@spyS7y5L*mvs
zv;91}TOJD7tPs~c9<cUP)L+ic_ts{m|Gcs)_}j0vN2XWz1(v4n|MWWIIy=ALT8I6A
zqjWq%--}%faME!3$jy1GBcM^0UHA_7j-$ICKb;<VA))-m%ur7Isd1f?I4iGd@1A#c
z=UdU+xx1q6E^W*xj(9SI+icqE_&)}(_;fRpJehnt)#3z%C6{GSXfZByjl0ib<W%_g
z`6h`bpXRO=Gc6w8XH?<T@(khDFA?`iTGIH%|7F9>2F|mo3=OU8Sj9c#9v_pFVLp9=
zFTlg8^Lu~!&5jQr=KM~Wxllz(;}C<quXA(9Q;B(ZoCJl~8A6tHY1I}oc(QvmEfo~B
z7E()_60DH^TgBu^j#<~YX}i}>&3@&xKI7Dyzpmy@(F-PVPkxtm{g2_xx$B+fn3@a%
zn+jzoK5flpNm|G?)pc6oUAwri+}mUOZiH6dji|qPEpqY#X3yszFa2k*mS@>D&3WOi
zoPZ$N%{?bBzcu%*H8ivn+#b-iQM^wjYOzQ{{5z4inL<2=o^$;!Naaf9uk4GyYU}rE
zeJOXj&S|gmROXN~Q<THf^JV>=Caf(l_@EKMSL(ExH|Lto_uYA0x1T-rzIc~r(92)V
zg}TcwE}Lrjt=23vC;!*s(lwEOha<mgvt3zp(sRKlljXNV|4FR0_E1oh%UttYDbQ~!
zcdw2W!=w<&1ocUYJqm@|%BpW<!Z=i80#oL@EIlR>vO|00(+nE{FO@LcKE*{Dk0&_n
zn5vrIV=`fvzt*CU4mOG6pQcToc;3Qcszu@%gR3q3zNi_t`ycILWmx3h^!&CE(-kMq
z0#2m{>yUE=-JZ&)90OWD9GQCf-4O@I=vhK(OsR|_k3!}!Jmy?$aYAe6l*1YY8gB%=
zB0dN(cv+k%m~mF2cB=8W=S;B&SmosJa!inVY{DS$j(v-|iB-eS5bHA*oz6bzc}|{H
z&W&F7{dU;}bFsW7`Wi;BZU@gk@3!#I@}KS6lR4Cve&5dPx9EsPd)P9L{|x3S(p&WV
zg(sRuL{FL4tL(A1xYPEDg7AkX%c$Z?Bh?gk&Tq$_|GRRpL;b?CYTKynF9&4}_f0(Q
z*t)R(>T*^2?oC`vyg$jT|0VoQs>mV3hd<}!+d0Zh-yQKeGr!91@xSZ$of;iibu_KD
z{}Zs@D!T4=*|vQ1ZH=>YqmEtO`q#~(>X!Ewv5mKGtL>lt?(j<KInm!9?|Qn^HMMKu
zGXLefgw!*?dGF(7ES!1br1kH_<@;*88RwL3dlhsvWa5lz7KLpOdi9*<?%%~So#$C$
z%Z@o06on3)4HK4F=IrpJEq0zj$I~mF)7C8g`(df-tf>><8ci|1<;Hzy14GT_%z2V)
z+8H@yglA}moVWSm>>044v(rO0CZr|QD8`3Lpk!97l+9wBMBb=U1`nB4lUP<BW13SO
zV<gmcg<+!o?g!lr96Z5t@;(B5aheu+>sy{|df%?p+0g3{wRwJhi3yYA64~IOFcE8?
z%NxTy@Aqgh{J*w+(u!80z@AS_M1<dZ9(i{n!Kj1VZRNZd&n~={vRk{uY_I9A>*v3`
zTgIR3vV8udKGvnGGp0P=TytjUm#V}sjdRZ))rw5Gy6R0mOW_tL=Lu7^xH@ho2ARG(
zQ?oo$^<wxf7AH0?t`+{vSk49iQrKDFt}>rP-Ak;2$v54;vir<oH?<7+w-#<`Svwxi
zY<j@bVf?-Cgv8n632$1M=FIV!7tqrfq9Oje#pQ$Fe}=n4W)s(a3rx8mdT7S9ydsW7
zAHR>6!%Sbk6<M!mYZZ1|cJ5Z+!_g<gE-#T?G$Z4At-tHp%ikAoip={a-dCz!n`^Q%
zXx)Qf{>jmPn@cZy_2n-~eU>eM^5b#Km$vUNXRItv+_fio+q!Q%!dKlt^)<r8{+j*E
z;G*E_ta)B`^Nn{ddY<j~t<~&5gZ~}Pdr|i+zh+u}e0Wu}sypyduXo|Hcg?0nfmcq=
z|IaXc@zl=OJ|E6&2~Rq4<!i6w)#%v1n;UGW@7sHn`_&c4jNE8%@3voa&bv)~y>0rx
zw2oy)`<As^^s0Ik&T0zuXB4-+yrOJh&$b<_>v#TV2z#Y<@ZIK3l?m&fUuC|$ZDFoQ
z)fQdTp!CnBzv4wB7Dis)u{7VU_oir`)U~en8NAu6<ex2IcG8~9tGa9DL&x<k_XBTj
zXMDMF%8ch%SS8}vn!Ls4hT2aJ|GY9IdFS2NW@^jbo_56NJ>r%OWe8pN{a23dta~O~
z=T%I|o8lWaHS4B<-Hzo!585Zb3S06lB>PU>&7ePP?Yiy6KiL^)=C4iGxLnJ;cHPof
zp;y*bXY9EuWV2_hR%ms-u-zG6501I74bLx+o0#f$;?j%)wM6%a9@(!A1=9}*PHFs5
zVt4P+WWC(1?ZGdjHqEuly5N1(z;8L*l(g-0qe_d+CqL+%Uv)U`+vWNNA3ZLAiMSN;
z=oQm!H~Z6z3tnB_Yi|Ac$@3DOfPib$H}tx+o8JgEGF<*Dci!<8+pgb?vtYR7E8W&t
z70OY-m*wyCpTTJUak1#Vhvs^1eKjkqW!uEUpl6m}v-Wz=dVeDGOY{qK=PyM@%V#M?
zJC@xKeJi55ZLU@otJFf7XI*`|3741q>g)QSUH<BI`_3<1a_RTVs^9kgTXw(RhpXpV
z@&U!Inok2;e$LcC`t32BBPZwMzZ38M>R-O5s_W#p?G8pO!f)R(m3yVH=1{vTZ?cx@
zo{fjr>uTqIpDq^8krjQVl&iHhdS%O&g&*1+R|#r(?y6G_Z{lE0Ka{xm`0s?4%)F(F
zjE_0f3Zyy|=KWcCp<~M{V+COYKMx@e;oq(zW#=_i7gqW-F}2M6aiFk(S?}JJLkz+J
zLc*r(`&2Siez$Cl$Yu_gR5{<!&KIj^<Fg}`S5e5LQ|9Tczn7A3etKOdogcs*KL1zk
z$$gEHssZ(i$G4w3|G29|B7KFD@a;Js$FD37?BBL>*;aG*%PUmRUH3eEbZy(Uxt^{U
zuYB#hSoF-YL1@OrrjC1e;xm?1Zu<6HsrS<D)#kP9PM0k*+4gEi@$7GLmtS5@|8On)
zw&d(*FJA-{2o*XAJX);LbYknVHCMle{rgh7SM<tDuj11Zt1jJ^yIu8r-4kEy?6O^Z
zkNODqNG-kn;n=lP_w)ZctltwCv^4bH)8(Ou4~CzX%AbAv>)gJ(iI>HsvZddOPyaW2
z>94Z+zpCOE+dm4)UbXu_gXV;(sU1(BcAhln-Zi`RY4)ij?00rwVe~SPS5@%;V<yph
z<mUxWQ|C11C}#ghSD$rUnQydCbDG3I{jJ=AGiGV9a&lQG9kzJuXB8tT-SWKsX3GVa
zgEj%IDjiXs3Ok+T_48|(q#86POmr}?_;B_@01Km{V~9_&hFHNL(E}&dUN|tkjVRE(
zQNyFIGQrF9R&fK%!zJ2jlNcUb9w<;~c$3=5Ea4)~^k$kv|C^Ra7j=XUTU>h>Rz0a$
z-=n9nI9RB5=K+?p;+{TfhMg;-ukBc&5s;`KIj7~%<@Zf1LpL5WnlmM7cD|P2)DYe?
zlYEw{bi5K?a9ZifhjUibPrsjD9rmAL=fBD2qS*mr@7FnNNhR!DzwCVbY>RDQx1ZYA
zdt=+~tly=*{*hl#Yl+8PE7}sc>dW)$8EOYV90^%H>CBZM;h{eYmwKElIsCEIR5sf8
z%FW9qt9beHjwT)7c#zxM@$n&T*R|DA{XAFHmFhY+MZTS4)xFZ}SM<@lUoUg7j9%+K
z*{aI>tX<f~U!^<Ge9rp)vGej?+b!>+*M~*Mb^S71SpUoLS6Abeqg%sG=e#}cT~hhi
zLqB5f&ZA!QN@iKt&3$z(@p#!b&wVy2Prj5)oBzy|cL~#UJLfm+gcuI;F1xs5vQmo0
zvD2wH%DV3@xMB8c|H_rlg=;>>p4+uzU9v{#ijtR?m(QIte|3m%bNWql+0$9ucchkc
z@4Av1n*Zumuh*=(O9Y($PTQq3o$u4^Hr^H1GhcKo|9kwnW_xMWT=Nx~bEeGt+stAj
zkml9vGI2`ouT{-+Kh@oRv+7jN@;$$%N_}VB`8MjyUaLgW_g9|n_pZN`>i)-Y@|Kcw
z7fU~Lm)*$I46VDHomn2$yZ@E8saD`hhv2epdqS+DLqA=W`g^Q*@vp+$scE6BQ+?0u
zTj;j*sQa(dt%mD$injdB)2>`3Q*FF^SK%@dzx7*AzqVVP_a^yB;H1e@)aFf@lC?kQ
z$>t!@V0IqEV~kfgC7<zm%APO$S$T2Fw%ezs-A-<eGAnuaGVJT?>92IHHm-<Vdh%P+
z+TCA{``wPckh^!na<9F;bFZ~sUGZ?;R<WJGyzhKpw%4yq@$`1<qTM1dxi@Y3&mbc9
z{I|Px=ga9k&G-4JimGPD9$#!ZXY;4WdIp_*&W(1NPj8DK-k39=cg5re_2{m$&TVG9
z>%G1lt=)I+)$9B#v3r6xtdWk|9M&4ew?gvNe}=98XN9)bnX4?Ayn3yT;PRO-YUb&-
z*F9PASSqZ4<>>~MOolA(DG&Q5J-n~+Ha760tn!u{vv=oi_g;D5s59!j@RCoK{?ERB
z+GVNtpCR2n|K4|N>9n0&bo2FeUu-=dn5`M2%Xm)1UvFN9PJHX{fW0hN(sxZgf8yJm
z)7w&WckGplyYbHSfspzGi$j-B`AvQMV25Ut#7<3N$G%Dlt0=*<^H@2$yV@q$2`%Pn
zlQ`Vq;CYzS<D2uOD$kjU1)k?MB&PKSF;-7Gpy<K!xaU7ZiAs^UN>ky71`*DuP0th$
zsyLi;<<q>+%OH~Yv3jyZz|%Y{8}&Y!L<jc&*S9rWJ`}imEVAP8ivJ8cyWDpvu6dXA
zsC&XUcVp+ZE6>(t#}ws;CG@3-u6cMz>Ok$gzRVRY)A#pF?5;Vt<7@dVvjgpQ^X@jf
zxxTADzjf(GkE~86C+|&GVpkO=p5NblY<EjZljDTndnbFSPhq&u<mIEqI&WSEuR^Dg
zfRM<u>f(l?_@X1H+SxM?EO^M!#B!=fe3RG`4lX999}Z`Y{BKJnE+{OU8tZD<sd3Hh
z>&vi=t1oTW^Qw7WXXorcEO?evi>Z~lMb%N2`Jh3A80*XZM(?=0SLHrRnJyMOYpTDo
zQdGUc+Sy)vw`!TsF4bOix#G6endPt9&O4vA_x6r&bq_Aqdhwqj_`HZx!{VpY-#2WQ
zIW?2N#HzpA%I)^GySgcN&8GQp4!QR10x!$MGv7ki2j2KoxH&zh<RZuK2I)rDsqI^4
z`DFBYxF34hr>j&D%BN~Lal-pXyMyo2bGN^?n?2FGRn3IO=~;h4+4_>4++FjNcWS#A
zEmoRbk>SVnq>sU;=i_7h6ej7OIeX{tlb(?G##Y<EXk(>UxXG&x4>WgJ{O)YOwrhKc
z9_OuRJx@FPjTtUZkf~4Y=@KuWb|Uv()<sppE30<Se<KmyzO%$u&O45ImMQaGBaSJi
z2hU1vc^Q!2HRJQ+^0!Zy2XE(|zD(+s)Vx=_7HiIpzLKM1&ehAV*2bCoG&@W-AkV!_
z<I&9r3s&_M^xM7PuCx1I{<?@OqS=;DWSl2t{bw*+aoc#k)vSrUxsPLi+*tn7FlNV^
z-(6Srw3e?7JXd`|;mUJyYyLfnW?MuyNnK*lHtJOT{h(8znMr(`l-KizTtzB!CtTRI
zn_7;G{Vj+#61^-ezz|u$k=|puq9sAK=aLkoXr`_)W625iecUlotF8v?ede$*b7$;4
z6elocr?<$G6YYU~e7c_#D!xBuv6<XAc_J&j<ZmUD5IvWGm4^h1|1)&5d{DG-mU>k>
zfAZ|t4*Oo7ZepDKw(Fa6US>bP-}?UyVP>_tSL}Mdwyfp1Wy|8Xw3uuEG^dDnbJ4C_
z<=VNIF5aJ?^D(fU*Wu)w`a9L!vtDevc4u~NanSR?FRNvJ*;2NAe=K^p#Cy-hw0@@Z
zm#<6v#<snBU$!fkJt}LJ%+uo6#*SBgWMh_x&CD#@@pQVl|E`@Yi@xerS~+iGp1O9z
z4{7Px7yqVB)>*OhR(4&k8xxoLgE*=F!%{BAp4ly*e+30ZhxmR~tP8ESO0qp3FiXa5
z=>sDt#rApLM@3F=W=Le}oiOD9d*|DxM|;kF>r^T@n3SsA+@Px=Xu!b9_rgd|BJ&LY
zI~C^u^RP)C2V^Xg+Eg?=RFAYyR5{I~<gLIpRfh4>V$S*+-QtOASzQJ#0TV8ovND~R
ztl`Jm>8sKtnzDE9NAVd4O4eKpp220iVYc$BNKMbCMQjUNeg^XVPM7A+-TJjTD}1Vr
z#1Vz5k#bUtjAk{hei`&yrh582?XpF;e+hQyx`eH`w)a*<%{j(%ng$&Q=grufyJg$&
zvbKf~mtIcvTB+W0n9s2IWww}E+l8J>9Lqkat%%>P?Em4CV96ct*6tgez9mwIGfY?Z
zC{I53)qd)Ut;YlIem2{6{k`q-;>Ke!m7%#)zVEoYW~J^7(S41t<>uZp_nK<X-5!0>
z>}l_cU!j5Cw;o*RQRlc@`kx{8%eMF1qNCq#@7uNN*3);XR-q}E-Bz1w&tRYV(d*Tz
z^Q(6T6{v+xJ6oVKv#M(Lzbly$-!-`cdYn$Guf8j-esW5>Mqt97+8MIma?|B}-c<Fx
zv$!)Q(RO8Jok)aLev!%czuOB|r&fD4X9d+}z1y;O=^8$fIp1aZ+^XgaC+A)c$=~+&
z^BQyA!?}C6Z`&iic&%2**7ZM|wg$XXoYVE_TVL3*x~rve>&|9|zAM|F`}NeydF_#R
zbpCyHcauMJ%G}iadPr{ArhE5J-CKIsd{#eSs@K)bp!#L!^_K=7j$Zk5<?FAO$qUcS
zS!%7<wPWc&=97i<uGiYEbC>^U%$&7*_wDcd=I^{?w)fPn)~%OUTVB=CR!D!jI(TBX
z<<3{eQ}>*0e{*%+x4EY?cHA{Pd+gMuE4Mc-JsK@_cG;op=5dP`rk{D|8<?<R*K+^G
zIl6kU%$7!L@4A+^;`rWQuP*ZEdaW%!aOwJ<>@T-}=3HMi{pn%5!tS|;j_2Oav}{_M
z^m<m_iuhByvyO*WT#EJ&>sq<=&-+gS`D>TS{483c_x9MMxyyD7Yql=+{P^!oOkQ}5
z$cDek@mufziZ}eux8SerQ{PQnx4z{v7kQg~g@28!R8wEu<tuW}0;_M`>g}5LaL1KY
z#r)E(_k(KhnqA&p8(r0V?UO{Lzo*R6Uo)4O>B{T29<?ghTOoJ4dDpMfThE2Q?T8Ld
z$PL?L?)`G(p8I{KwTVI7zvfN&%IWaieBt9o_h+6yvgP2>+1t||$CsY|x+MDK^s`T+
zXNP1S(%vP|6S#BNtG!zy;sSRpEjo2P->mJ<?GQfGnXBe6U9f#hd+m<Z_v6hz|MFaN
ztM$vb+ul(R&nTE}WfQYq_2kokhBl+`*SEZ{Ufrl5z3V~i!;oWBm%P+ti;9V<=ALZx
zxRrgI@V05<dZ%9h2@3hupMBuV>$17)LR57xZIm!w=2V+LA^OFYNhh|q{7%<6wRE@9
z?X17%izBWs-yIo$dD*R$$$oRK&a81ie0tTKeP0qTbFML2sZ-MN_!#%CM-LaSDm6X8
zu%wsa^iAQdrMGrUZ#<`Kvc>#!#l@#L=F7dA)pJc(uXXd9?YDD}C+%OoJ#gomQ{`b-
z=LT;3${Zmlm}Pl*V%%N%K1=bSyBF?Q^_Sk!UAc41dcD=xmiw)l!|E=s$UZx0deL!@
zgv%HI#(WB>3e%e=f42U8pKr{>u!XJ)>sDNT`R#Fj*V^OS<*K6pOgT3nsk*u}eY&<`
zX-NAUmz)dBf<sRR9B#ZMYIvZq?SWe7F&TcH2@Nj)R@^9RULtJ5<iRqvpkdPtu2iEU
zI{~j1!Y9<zQ_LJ5&hq48;?OoxDLcc*By+Z&X^~LyW{bq5;lUeRIPbGhSu`mqcGG``
zk^>F)Tx!CHif8>^=;+k&VtUC!mzMhf7q;wNG?iEVjJxD9TPKH!!MBCN^{Wr3IZpV{
zk-gNGWloCnT<7)EWc?(sO+9-&<Xu0T&niu`imKe~;9JU0mr@vyoXb$18R>Sox?=Ud
zyim`l=B1r;E=L}|DfLq{C?LVE<AuBGoxarLkte@x5ZKWYxk|zILQ~><gAEz;8`Y07
zadNKbRNp1Zu)^n*hnGlq!vywYY*S^5{ftjB9#G$Wj6tSxDL<<dN4rUbS6hSoj8?@8
zKPlA?1uY?U76AvgY+LWcj0~*a&M`9*RX-e;JfItMkh8x&hv(-+@fkgrxC>%7$bMk(
zQt4FC_&&qIN2v3-{jC578|yPdGZ^MhIN%u5qB-Y$4a4Ob@wa~`UYdAW$JB{2T;xn2
z6N8i^gDKa)7oL4Whj$4^3W<C_WRV%b#{G8#!y73^-bXSbf;P_-mk2ei@SHlILt%=F
z@QR0D53pLWPC3tE!eHsa-S}yaqM+t;g<y%|5J`d9#=O|Jf=e763@UoUmVOLu8lKHc
z)kWo-m?oZ{F}+41L14PhF@q0bE{hHuvZ@=X-exUiN#H7+Vj#0;1=FLe3Xc*KC#swf
zl~|+c@!eyY?S#n=PQB&ZvI;#C7#<m_7JonV)@9D}oBRw(3;~n%ID!O()E{x}e9XYO
zVv(mx&u!_6elxN=q(olNxjp54zE0v4wul9Dnj9-R8q=6Pj3%v7n;asO^5|7#)aG|t
zA?pKH->cd2>B-H9OBIqXc`B}2F)8?6gTOZtRUw(^Q=G=r=C6}-s%#WtZl31C7bje3
zFv;bKil4EK?Sz=iD<(B}v9ueur6f*#dy0{<l(q0#WQ(V!p~4k`CKU#b?lz7lfh?v+
zoa~khKDMr0Wh%q@czTNo!<7J56%h^o%goG6p9ZS%aWe-9g&M2Htejq>t7ZIj&W4Vj
zNxj9Mx9%L<$<($dja@Ivc>kIhbE(|Wz`oj7CC7FKrFzP4a^zP$puk}`f0xoj$*@(s
zq;x*Lzj^5Dajrd?E3>!!Y~LFhyggOA+MU~M>z30qLsq1}>JF8=DSth*XLbLqTF=PV
zsk&>LnO9zUn(7*rpBlFEYn`(HEX^*KFD;>r!Mhw4vs7c7WL?tlFb0d>dHC_2m$_b4
z&vL22FSjTBXRtoGy58{Wt{>ZX&Ypd5<=uR(uIoIDuD@97({PAwmXFBH&Ldm*T<kM{
z{Yh}ki<J(R{`taEj{?px?U-P(Tq3F?OH+s`c4GGC+><Adi~W7G$!_JjmSm3jSv^6X
z>Wf19cWk`<^?6A2jf=NeURk0Mv^eN$>z7&ai&ids{<;2TaQ?%#Qu)|Y`=Bs!>E5i8
z6H8xaZcl5Q#-7di=E{c!M{9O81v7*&aV>JZ;$iVpwyP^jsi<^m$yytq&aJWQrXHMq
zsy6+~m$e#^W^ecPa<y$P4bHyzeTV6)=QhPfrW@^pO0Nfh)#WnT-Ze$;7W=2aQgt&k
z9hIW|G|yc5p|r==amgPwZ^Z_N--^4sJD;j3UbVR_A?53)ud(gYpNNesJ<V0_rmVPM
zxbL>r{gvNV=`K!;n`*Xc_j*o~S>LTT-*}~0ytV#x@y=+8=>DLuY%k+Z?R|TqG<N5f
z+i6k9w)-z$x<-ZN>Yr^t7l%DcKcM6oVDs+EB#oBV8Rvvbdd0Z<`TjWW+Hx;Dw!kc0
z)^-2gu(hR8Pxfui>RTE5D~hrBWZ_fhX?Z=xd-7Udnl69pbfI%j;uI^-N2Z<}-wzyN
z3fCz-r1)v=-z%9h?}9wFKYONj&ior+l6Xa1QH#;B$;tZKi<V!nSZ*tJOus1Mz0%Z^
zW%9S*+T4EoTm6stdA4ukpPK(}VO{<7{|s@@x7@H^@qJ@@aYDY#>Ffi??p$!_7df{r
zCD^mq!>XUDz@u`4(Q)CHzZVXOpW>XyB-PcmBvdpju;|G<@7j}=Cm)7)3)-a2kSO$#
zvTT_3{GUpz5d%xxGq+%aM=5H%Jf;gCOyw12a%t)|nQ}&jA?pf<%BN<-A5(bh-Y79m
zHV{ilWpZ7ydPTC$?nMa-^In>7<6F}>jn#0{L5Eq5Tuaoio3zA4UE8@tGPzN914Fo-
z-eVQkM5n-~Z=Bvt2h9y!+_LG}y{&9_HXgjyV02>MiK}-)3s)VRZ&+YF&B8T$iTStN
z7B}w}UoM}rt1xd};QKwrVROyI8Ppaq%RG7Ap7~;@O?lemki!<6&zG+=)z4ccx$|n4
z#dD+2#|$q#f820*m(26&mR|*m#YJs6HReBRley0GY$xNCl4;&MKEF4=$9Y0U@6Od=
z4n1a-(=v(rJd7uc8#*Q~_lRNnyFsSn>0yrIg3yV^t^qR-6vP`%)-zz}h!b)--(xZ1
zQOqOt8DS6Sn6q3F-SOe@_G3KDKPr7^oMOem8RIplaiaEpj^yL}w*`cFsujqDaR|F>
z$Z6DdFu1ueElIUI|E6U@p^O+q@hr!ju9cS$-)3nNJi4@7_3s4+KdwTH`oe}n2lWOC
z$19dwYm~g_n7KDSVOn)sf`Q|K-<r+MMprn3@9n*NA+{@1;sINLu*vkd77P>q&A*r&
zA){@2?yF1;H@9G8FE8(vTfOCLUs=aZf3<vJ&*k&do&g2H2KL*;R_Zy6KAywrYj{Ms
zMX}@6tbQi-9*aqR7J8B=4znpdzs=Ibl(blmO`^sBNCkfm1IzIis~VQ4{d@*SHY_J4
z)(JGYayRiGvR<$(%V37{@y+{~SRECrKR6!}@H{<xV*tM(1BbO&kNTSy4%f4xEJu{y
zy1Y{5)6`!jXe3iT*`k!mjp>opwMkDMzcLuNx2SLPp5w66=%!c21jkt}yMA^Qc+Rmn
zGR;YN#-8Ur=ULl|7TOp+o_?LLxOqzpE7z+zKb$!frbzU!afwUj7W6PuNalBFS+r%%
z-?<xwn2I$D&hO$Z2;xwF_e8--jc-C^5933=84Ay=9|?UtvSg$GDzU9bl8a{KaK^ZN
zm8snQK!9t9r3!~nSPYw?WPygk(e&GVS;DFeXI8{zXfP@sywUWbc~i1QRMZ7EpAU*E
z!lnjA43mFvHjoM4aQ<aegXL1L6qXAsKL~ie6#i&2MU%mnQ`LByT7jolO6LKcsfi8K
zc_R~e6vP>ALLB&%-z_=RcfZD(d8MC5@RkEi>O1@u=FU8L&M%J9O~5q4rN845kGRdw
z%PJEXt`tf|Nl95ZG$hTQ)yg4}BEGpt@)cjky!e}M8dqg8$=&cfDbC7Nz~kX4v&Tt2
z|JFHiEwR&GiSJwFSZ*sAA1j#iXl0YA<L_i<38Tvsr8=g?S8E6>a4MWt3-59YYL?%W
z$b4#9%BNOVrvD5jirju24neJjk*VDN+H)2#bai*yhnVaXt+7#<Y}D0oqKAh;>0BuD
zl$9)}Sp)0^8d7^U2kR8Hu2=beSo7f=3yXEjS})ID)_wfl0hWA;4`+nuxEnuZwN?tB
zmT}TiCAqy_qQlCw>+x?1C)u-4L+3Fwa_(K|wqP-j;_7>xN0x|YbuwFi`}cxHHbKU*
z(3M?hI)~R<s~@TQcX|03S2nUbGVed?ba+ZO>mi3%s%l=fkA+rnEYqFcaHjd|VV{!)
zPM#`LT#e`7ZC?4H=?%|?3Co)f`zJN1wWPB;)UWs(^;I@PuRc@Jx%kTBKIPpU0U`YV
zFFl`i?~>f%rKc7i()(>6aQo`bbs2G%S(~PM?*1@i(H_oKJeLni7KdD37_ni(mODJ3
zw7(oTGrcSKrFve9vUPCrZ>^P6!z!hiR`JGID=Mv;w4!k(m$TnYHM<{P9%{uwg>y>I
z^9ia8xOo)*TBxbZGN*&(%qc;?B!f`bzaIn{r|jUr*T%@nc;LxOrM(mQg}?d<wFIU2
zZ{nC3sA9m-#8lG27_j%l1f`if1lzB3I(4ql*s#<-^B4mMpSG&Yj6*_#cJd)i%Od*o
zWNasRyG)A|RZQh!Rq{N-e_O)!6wCRfso$KtTN)2|=$MK$w==fvNnjM%aYoO8+v6FN
zJM&!&Bb7yJMN=d*Ei9|8c^EVmuWX)i`G{9G!-<nc%&#IWd=yzlmS5zRIm*YiVv`4h
zUa;z!ZyydSS~oqI@suklQ13*LqT{=1wG(*UWxRLn5@O-L(;#}}kW$K$Bj+l47R>Yz
z)pF{);b5@flne81mL&%*HT^Da=sJ<ML!HB`UHORUuLS~4J6cp`PH73yX}K&Z&*3}k
zFZ1t2mLTS~sc+N|Nw74;DpWJRY5Sud=+m<F_7_L4uIjdcY2IyG7cCrE-p=P_P&7TU
z!gfQ9&#6f}4ji_7Q20x+;jP!YDI0<tg(of1Y*82Pe9XM@l<n&;PC@Ph)-4Yh7MI9z
zn^inZ)SJ+l{)UNjhW3;*hGGeck9!_6aH*W+)+m}9*q6!8B)W2frsbT3l?u#lT%C<=
z4H`^L;nVq84rY65Tv07*XqgvNq%C9`z$&%G_-&g)Q`d6Nf0x=Ot(egha(GF{KH;W?
zZcB9~I2ih8d#Gr8Y95nw;9P#J{)R&4gqY6~9OVX+ctV(`1u#Tia&*|^r=gI3B$4IJ
zR7N-32`$f<3Lf(ED6U$-!)Ci$!$Uwc$WKy&!9#(IzrV&}OM_54Pf8F2-vsF{3r^Dm
z=X)MZ+me;Hz=4B-&#7AVkko?(wQCNX5)yD(W~BYqfl=e}J*USiZBtx7`-XpX)VMtH
z%)}1|b)G$QP-{Kz7n#U#Ohj0h&DHp0yHb;dj~1)8fyDuJhPPHtpA}Um6?{L;^g*Mo
z_JV@U1;gpH?X(OHINmtSa+hFWxXRTmtZ3n~Y_Wk@`hA9`*0ri4EUZUUAK&IY&h*ux
z#j-%4xihFkgo){FsF+m21RD_+`|7(4Z99+qO-Na?T>hqF>!D;z4Hf5k@he{1oT_X*
zE;?h1dxN&&lxv0JGuS+qZ*?!xRardu^eTs$JYp=@_l7)ek(>fLZ`yK!2ZPMQun2{3
zjvUe7rv*(Ej;U}u>NRN(dyyk!z`Ov9G{*xzAKREXm(S4lpH|#9)g_df=b^QT(Q&yS
z#w_a#nOhiGCaG*_<qmix%n~ydG;)6VIiFfk@ZXKCJ1y!aKJQ{VmuAfAchb+mJY02?
zu+qbK5eGBF3g6o<U*VrG_WNJ-FJDdBrHr2ceq{K_q~7FrYhIS}h=JYC;L7ABA}X7W
zCEOi8CfQzg`OxBVde1x?#Y}F6V#i~Q6C8>a`BM}d?L?1DIB@#qoiuKGtFlw&k|&d}
zKR-v3poSpVLqn#Dhx%DgDhhl@)m%D`#VC3#bMJWDz*MtphYbHdfxxF0=`E~U?T33b
z`&URX6c%r3J+LBZ=7AXuJw8t@9r>$0oPrw;%+OrH*deU>NU=fGkyZ7hnm}XOk_lx@
zG1WE-XYYu&u`(Svm(don=66Lh1BZ~BVG!3hCzXT{Mk5~P#vsNA)(fpoCrQrv(7+-i
zBCPt*kSnF<hlGk&RuCTxPx=&<iJDD^81>ei(_T_mtIh1yWMKS;QSpgyke6G)aRvVp
z0|CC-E!}CWIaV@APFTsY>U_(x2{9(SJ&$OFwlFvpxy(GrX(XX@WaISjhaHU1_ehFN
z;#}px^n3XpFF`H?wHR?j&dt*Tx&->>GfoR&;5;&?SpF{inTiELb6AwP^n{KizUy{(
z&Jp{4?TuDmOYrTSFT0+epBxkS;F#g@H(AB8fpr|Mw^!X!wX87=eXpK(d#5VLvzzO0
zS$VJA)!4S}^p-pK0@?l7ok|G`?5+)0wT&+iNr=y_?z<Xs>s{H@oYz|&mq*Q>;U!{K
z%JQFqr)qiTTEBOfTKC;9cG|GwzTfO9?%BR)GAkDSit2xP?{(2#lfPy!FP{HkWb!vY
zefz!nPFr=w@}CsvN%^veRAwgcH+d;C>%!C3+2_x{`k86%ZvLM^>#Fqj=5Mo=4+c#;
zd!^#>-7B9pbUj%!=A1dBAkfN`lq|D2bAPVht?lB0PPatGma>NO#_JndPk;Y8^Rm>2
zOHUO<WZdghN_9{AX$2nj43${tc*sxa;DWX-8yV*`ygmM+v!o%QFCZx1$3Wy%WK&n_
zLm?C8wADIxJ7zMnovoND+{NL@m9q07SDH;gvW<ep)S?CUB`ZD{gfMv)xJ#Z{Rrv3P
zOQS)HKsR@g=z(<x)8ZuN{a|pJ)}yksQ%AFvCDW-zQa;7-9Jj&d!zwPn9yIZ1oxc?(
zaGXbEPH{W0Qn1Ekp)<|_O7k>q=LvYK3f{eJzz{BZ=7FC<Oh>rA{@aHfJ9mh3wba+>
zIy)#VJTt{8V8bz<TPqc(*(A(WS*R5#QuvTLYF?zwf$1recX5_Y7dBusjy&w3q*ls!
z(8>GT?|}G#z2|B@r|;N#bb-mMGW99Woa~CmUCG>mGg<t+cJPD+Fm&>3ajiUZR?SIH
z;7X{AQtgI0FLgTKHVB-o7XI!eEHFWxRraukE9Z;}$D5WgePWxeAj-ln;BVlRCMYJw
zAmDQ$W#^ebONSZHRF4HIvpODC6h5oDjG2Sa@pgxC%K?36hly8pn$#;wOc-ABR4^?0
zFhM}(X<&oPH%Atklt#z5r#T#i7G73VefprwZU&>FgyJ0bf};%F%7;{z2d-Ta6Q#w(
zT{16n!c2!2QyJ;TNtT?EBD0*GgcUYVkV$-3Ja27*Qv++LOUNOqc?SyqPLgKmjpPY+
zm}sXaCGMf&B+7DnPuW2Q4}}n?7NsBJj#iGGmOeiI#~7HVel?W|sXNUjGFfQC0_Oi$
zp0n^oaSFx#XSm@uzkJ2A+3Z4Z?Oxp8fBN<^XS@3nm);${zcQmhb3vxMNW{^!`G(;O
zU)v`67Q9oLy}GNmYv-&#Oq2U$5|}#s3*R?2#z^_sD6k8@nUiZXn`L*3NaJyh;7Kbi
zmKF3FY5(?36urdU)H?CJMKWhmF^?PHuS6Sx2Wl%+UJ7iEV3s?u`j&$Ho}F5cZ!)l)
z=P|ZC;N<kk`Z>GN1orp85(JiZ&X8ELQJ_6hF-4R|PELS7U-AHxM)N)QT|A#P++rt)
z8pUyQJPQ6che0SnuxH8gyL=2jT1qD#^5*krPLXe2`k5<0<js}tKL!VcLX4*GmvB9G
zbBc^Q#{==x6F#;SPBX|jpzu(|a+z|I6AQZx=l63g`&8b%Z#H=z%iR}npyq(plvf<Z
z%D)r)6uBf%a-6@fp&O@s*3^Gp0h{orr-$!yFg&=!_+IkIp{hj_x;A}iDUL4_aMQh#
zJ}GIQ%ms#7cipFSo|a5L#ymx&sCm9<`)4Kr?dMB6Erb6=)UNQmHs_MIPB<eQgDGc`
z(1O#PK7}pL(M>IZhxvthrHbaiKP7vrPj|+T76y}_73Y$4_o~+XuH9a|eaVBVCbJG~
z*?Vl(_8FPVQ+u=j`o_$gy=cW#zZ*AlO7&jG{hBtVx8(Axi<wRjSE#kK8{Yb=Hka?}
zAE`xsAt7r{8D;9oMlV`hv~qd7Irqm&evzwG!=2aNC{=sCAtLDB+nn6_+kQQ|yKSo0
z{wt|}-7=SkKD~2i%C*^BW#eY=wVImsE9$X&?b<7Q-Cy0aYP;`yewpTl&5O(3)_Sc=
z3^%)RdBxQEr{4MhHT!(jyms}r-4*-GBcg5cb62`v@7=ye^QP^th>L#9U#`5fce+){
zl{Md9{bvY28GL@?M)UlIeouF&#_oI88uUE(+?|Zee|^<Ivt3&L>({j}wX(U-Rxf%y
z_sf3<{cB$}FF!dYwoU6~sMSmN8-0r=mupQ5y6u1Io5Xzsja~aLZ+}<L$Ga!0<kf94
zwyR4@r0Qp_EB@=XZrS(US1t!nK7Cr|k6Ffpw2n;^gDp0fdfuO%pX*+p8(vjvZ}jNN
z{HUen2I{MB&B|^IUjAo(+-%uDH`eY9pWf5ix13K}$5m4^>7{$9^YpXRUVl2cN;T@i
zZDE(otXe)#tXqU8D;7@p^>8y=vE|{NJF7ztr!4lI+8t;ttj=;ywn10x>Eup^lg?c-
zb2S`O=7mT+<T=f$^FyzIN2R5Jp?Uh&M8QsHn-6UkI}4_Hvz}J<wViOp^hl~HmwS#S
zLx8%1`dyYK(?z+AJY;_qG%#^+G5hS0bnaWZ<diyZz>@i!I9yAf-k5QK!Clj(Ad{=x
zsNhJMqlKu`DybG8H#P1l44d;DKDJGDJR+Z?*s$fyPLIdG7rc}di8F3_>UU%cyUy<q
z4U7wXg<0OeZF=@ty{PeywE~Z7TkM6HolQIE-C#%(T9UM<(1VeWsaTcA{t>hAF)5x?
zeA*0cQxDqCJXEx5s`F8C8%0^G;@>O39`@3|%5Z3pNW)J5yNt$i0_~N|7Rr*z3><!`
zOs#SEIUG+c(3r=ylSAnIfsd`H=h?llVd7IYX(%}BpQCcVUGj{dfNRR%c?NTuy=0v&
z`DJ(*<dQm;JXCon?_%=N^MT^4OHqOrLi;lg6-Y4goH_8eeF4ujHJLp=8y|Cd2ups5
V$n-nGruxf%n}}6*JLn|in*a~IGLHZN

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.h b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.h
new file mode 100644
index 0000000000..98934ce41d
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.h
@@ -0,0 +1,23 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TENSORFLOW_EXAMPLES_IOS_IOS_IMAGE_LOAD_H_
+#define TENSORFLOW_EXAMPLES_IOS_IOS_IMAGE_LOAD_H_
+
+#include <vector>
+
+std::vector<uint8_t> LoadImageFromFile(const char* file_name, int* out_width,
+                                       int* out_height, int* out_channels);
+
+#endif  // TENSORFLOW_EXAMPLES_IOS_IOS_IMAGE_LOAD_H_
diff --git a/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm
new file mode 100644
index 0000000000..cb19377d7e
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm
@@ -0,0 +1,80 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ios_image_load.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#import <CoreImage/CoreImage.h>
+#import <ImageIO/ImageIO.h>
+
+std::vector<uint8_t> LoadImageFromFile(const char* file_name, int* out_width, int* out_height,
+                                       int* out_channels) {
+  FILE* file_handle = fopen(file_name, "rb");
+  fseek(file_handle, 0, SEEK_END);
+  const size_t bytes_in_file = ftell(file_handle);
+  fseek(file_handle, 0, SEEK_SET);
+  std::vector<uint8_t> file_data(bytes_in_file);
+  fread(file_data.data(), 1, bytes_in_file, file_handle);
+  fclose(file_handle);
+  CFDataRef file_data_ref =
+      CFDataCreateWithBytesNoCopy(NULL, file_data.data(), bytes_in_file, kCFAllocatorNull);
+  CGDataProviderRef image_provider = CGDataProviderCreateWithCFData(file_data_ref);
+
+  const char* suffix = strrchr(file_name, '.');
+  if (!suffix || suffix == file_name) {
+    suffix = "";
+  }
+  CGImageRef image;
+  if (strcasecmp(suffix, ".png") == 0) {
+    image = CGImageCreateWithPNGDataProvider(image_provider, NULL, true, kCGRenderingIntentDefault);
+  } else if ((strcasecmp(suffix, ".jpg") == 0) || (strcasecmp(suffix, ".jpeg") == 0)) {
+    image =
+        CGImageCreateWithJPEGDataProvider(image_provider, NULL, true, kCGRenderingIntentDefault);
+  } else {
+    CFRelease(image_provider);
+    CFRelease(file_data_ref);
+    fprintf(stderr, "Unknown suffix for file '%s'\n", file_name);
+    *out_width = 0;
+    *out_height = 0;
+    *out_channels = 0;
+    return std::vector<uint8_t>();
+  }
+
+  const int width = (int)CGImageGetWidth(image);
+  const int height = (int)CGImageGetHeight(image);
+  const int channels = 4;
+  CGColorSpaceRef color_space = CGColorSpaceCreateDeviceRGB();
+  const int bytes_per_row = (width * channels);
+  const int bytes_in_image = (bytes_per_row * height);
+  std::vector<uint8_t> result(bytes_in_image);
+  const int bits_per_component = 8;
+  CGContextRef context =
+      CGBitmapContextCreate(result.data(), width, height, bits_per_component, bytes_per_row,
+                            color_space, kCGImageAlphaPremultipliedLast | kCGBitmapByteOrder32Big);
+  CGColorSpaceRelease(color_space);
+  CGContextDrawImage(context, CGRectMake(0, 0, width, height), image);
+  CGContextRelease(context);
+  CFRelease(image);
+  CFRelease(image_provider);
+  CFRelease(file_data_ref);
+
+  *out_width = width;
+  *out_height = height;
+  *out_channels = channels;
+  return result;
+}
diff --git a/tensorflow/contrib/lite/examples/ios/simple/main.mm b/tensorflow/contrib/lite/examples/ios/simple/main.mm
new file mode 100644
index 0000000000..05cb55ddd7
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/main.mm
@@ -0,0 +1,22 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <UIKit/UIKit.h>
+
+int main(int argc, char *argv[]) {
+  @autoreleasepool {
+    NSString *delegateClassName = @"AppDelegate";
+    return UIApplicationMain(argc, argv, nil, delegateClassName);
+  }
+}
diff --git a/tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj b/tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj
new file mode 100644
index 0000000000..9277c230b8
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/ios/simple/simple.xcodeproj/project.pbxproj
@@ -0,0 +1,359 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 46;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		1C0D734B1ECCC460008C1DAB /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */; };
+		1CA45FFF1ECCC356002FA6A4 /* UIKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */; };
+		594C14AE1FB8F9B500EE8BFE /* libtensorflow-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 594C14AD1FB8F9B500EE8BFE /* libtensorflow-lite.a */; };
+		594C14B11FB9037100EE8BFE /* labels.txt in Resources */ = {isa = PBXBuildFile; fileRef = 594C14AF1FB9037100EE8BFE /* labels.txt */; };
+		594C14B21FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite in Resources */ = {isa = PBXBuildFile; fileRef = 594C14B01FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite */; };
+		59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
+		59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
+		59A3D0081CF4E68100C4259F /* ios_image_load.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFFB1CF4E68100C4259F /* ios_image_load.mm */; };
+		59A3D0091CF4E68100C4259F /* main.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFFC1CF4E68100C4259F /* main.mm */; };
+		59A3D00B1CF4E68100C4259F /* RunModelViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFFF1CF4E68100C4259F /* RunModelViewController.mm */; };
+		59A3D00C1CF4E68100C4259F /* RunModelViewController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 59A3D0001CF4E68100C4259F /* RunModelViewController.xib */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		1C0D73481ECCC41B008C1DAB /* CoreImage.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreImage.framework; path = System/Library/Frameworks/CoreImage.framework; sourceTree = SDKROOT; };
+		1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; };
+		1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; };
+		5911579B1CF4011C00C31E3A /* tf_simple_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tf_simple_example.app; sourceTree = BUILT_PRODUCTS_DIR; };
+		594C14AD1FB8F9B500EE8BFE /* libtensorflow-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libtensorflow-lite.a"; path = "../../../gen/lib/libtensorflow-lite.a"; sourceTree = "<group>"; };
+		594C14AF1FB9037100EE8BFE /* labels.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = labels.txt; sourceTree = "<group>"; };
+		594C14B01FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_v1_1.0_224.tflite; sourceTree = "<group>"; };
+		59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
+		59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
+		59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = grace_hopper.jpg; sourceTree = "<group>"; };
+		59A3CFFA1CF4E68100C4259F /* ios_image_load.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ios_image_load.h; sourceTree = "<group>"; };
+		59A3CFFB1CF4E68100C4259F /* ios_image_load.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = ios_image_load.mm; sourceTree = "<group>"; };
+		59A3CFFC1CF4E68100C4259F /* main.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = main.mm; sourceTree = "<group>"; };
+		59A3CFFD1CF4E68100C4259F /* RunModel-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = "RunModel-Info.plist"; sourceTree = "<group>"; };
+		59A3CFFE1CF4E68100C4259F /* RunModelViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RunModelViewController.h; sourceTree = "<group>"; };
+		59A3CFFF1CF4E68100C4259F /* RunModelViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = RunModelViewController.mm; sourceTree = "<group>"; };
+		59A3D0001CF4E68100C4259F /* RunModelViewController.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = RunModelViewController.xib; sourceTree = "<group>"; };
+		73DBC33C5DD9A526EE6D1EF2 /* libPods-tf_simple_example.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libPods-tf_simple_example.a"; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		591157981CF4011C00C31E3A /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				594C14AE1FB8F9B500EE8BFE /* libtensorflow-lite.a in Frameworks */,
+				1C0D734B1ECCC460008C1DAB /* CoreGraphics.framework in Frameworks */,
+				1CA45FFF1ECCC356002FA6A4 /* UIKit.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		24D7686C331131624F4454A0 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				594C14AD1FB8F9B500EE8BFE /* libtensorflow-lite.a */,
+				1C0D734A1ECCC460008C1DAB /* CoreGraphics.framework */,
+				1C0D73481ECCC41B008C1DAB /* CoreImage.framework */,
+				1CA45FFE1ECCC356002FA6A4 /* UIKit.framework */,
+				73DBC33C5DD9A526EE6D1EF2 /* libPods-tf_simple_example.a */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		591157921CF4011C00C31E3A = {
+			isa = PBXGroup;
+			children = (
+				59A3CFF11CF4E68100C4259F /* AppDelegate.h */,
+				59A3CFF21CF4E68100C4259F /* AppDelegate.mm */,
+				59A3CFF31CF4E68100C4259F /* data */,
+				59A3CFFA1CF4E68100C4259F /* ios_image_load.h */,
+				59A3CFFB1CF4E68100C4259F /* ios_image_load.mm */,
+				59A3CFFC1CF4E68100C4259F /* main.mm */,
+				59A3CFFD1CF4E68100C4259F /* RunModel-Info.plist */,
+				59A3CFFE1CF4E68100C4259F /* RunModelViewController.h */,
+				59A3CFFF1CF4E68100C4259F /* RunModelViewController.mm */,
+				59A3D0001CF4E68100C4259F /* RunModelViewController.xib */,
+				5911579C1CF4011C00C31E3A /* Products */,
+				24D7686C331131624F4454A0 /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		5911579C1CF4011C00C31E3A /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				5911579B1CF4011C00C31E3A /* tf_simple_example.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		59A3CFF31CF4E68100C4259F /* data */ = {
+			isa = PBXGroup;
+			children = (
+				59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */,
+				594C14AF1FB9037100EE8BFE /* labels.txt */,
+				594C14B01FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite */,
+			);
+			path = data;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		5911579A1CF4011C00C31E3A /* tf_simple_example */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 591157B21CF4011D00C31E3A /* Build configuration list for PBXNativeTarget "tf_simple_example" */;
+			buildPhases = (
+				591157971CF4011C00C31E3A /* Sources */,
+				591157981CF4011C00C31E3A /* Frameworks */,
+				591157991CF4011C00C31E3A /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = tf_simple_example;
+			productName = tf_ios_makefile_example;
+			productReference = 5911579B1CF4011C00C31E3A /* tf_simple_example.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		591157931CF4011C00C31E3A /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 0830;
+				ORGANIZATIONNAME = Google;
+				TargetAttributes = {
+					5911579A1CF4011C00C31E3A = {
+						CreatedOnToolsVersion = 7.2;
+						DevelopmentTeam = EQHXZ8M8AV;
+						ProvisioningStyle = Manual;
+					};
+				};
+			};
+			buildConfigurationList = 591157961CF4011C00C31E3A /* Build configuration list for PBXProject "simple" */;
+			compatibilityVersion = "Xcode 3.2";
+			developmentRegion = English;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 591157921CF4011C00C31E3A;
+			productRefGroup = 5911579C1CF4011C00C31E3A /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				5911579A1CF4011C00C31E3A /* tf_simple_example */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		591157991CF4011C00C31E3A /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				59A3D00C1CF4E68100C4259F /* RunModelViewController.xib in Resources */,
+				594C14B11FB9037100EE8BFE /* labels.txt in Resources */,
+				59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */,
+				594C14B21FB9037100EE8BFE /* mobilenet_v1_1.0_224.tflite in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		591157971CF4011C00C31E3A /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				59A3D0091CF4E68100C4259F /* main.mm in Sources */,
+				59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */,
+				59A3D00B1CF4E68100C4259F /* RunModelViewController.mm in Sources */,
+				59A3D0081CF4E68100C4259F /* ios_image_load.mm in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		591157B01CF4011D00C31E3A /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 8.0;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		591157B11CF4011D00C31E3A /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 8.0;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = iphoneos;
+				TARGETED_DEVICE_FAMILY = "1,2";
+				VALIDATE_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		591157B31CF4011D00C31E3A /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_DEBUG_INFORMATION_LEVEL = default;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				DEVELOPMENT_TEAM = EQHXZ8M8AV;
+				ENABLE_BITCODE = NO;
+				GCC_ENABLE_CPP_EXCEPTIONS = YES;
+				GCC_ENABLE_CPP_RTTI = YES;
+				HEADER_SEARCH_PATHS = (
+					"$(inherited)",
+					../../../../../../,
+					../../../downloads/flatbuffers/include/,
+					../../../downloads/eigen/,
+					../../../downloads/,
+				);
+				INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
+				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
+				LIBRARY_SEARCH_PATHS = ../../../gen/lib/;
+				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
+				OTHER_LDFLAGS = "$(inherited)";
+				PRODUCT_BUNDLE_IDENTIFIER = "com.google.tflite-simple-example";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE = "1072bd47-ff19-4e5f-8107-d912748f83f1";
+				PROVISIONING_PROFILE_SPECIFIER = "Google Development";
+				SEPARATE_STRIP = NO;
+			};
+			name = Debug;
+		};
+		591157B41CF4011D00C31E3A /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_DEBUG_INFORMATION_LEVEL = default;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				DEVELOPMENT_TEAM = "";
+				ENABLE_BITCODE = NO;
+				GCC_ENABLE_CPP_EXCEPTIONS = YES;
+				GCC_ENABLE_CPP_RTTI = YES;
+				HEADER_SEARCH_PATHS = (
+					"$(inherited)",
+					../../../../../../,
+					../../../downloads/flatbuffers/include/,
+					../../../downloads/eigen/,
+					../../../downloads/,
+				);
+				INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
+				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
+				LIBRARY_SEARCH_PATHS = ../../../gen/lib/;
+				ONLY_ACTIVE_ARCH = YES;
+				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
+				OTHER_LDFLAGS = "$(inherited)";
+				PRODUCT_BUNDLE_IDENTIFIER = "com.google.tflite-simple-example";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				SEPARATE_STRIP = NO;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		591157961CF4011C00C31E3A /* Build configuration list for PBXProject "simple" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				591157B01CF4011D00C31E3A /* Debug */,
+				591157B11CF4011D00C31E3A /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		591157B21CF4011D00C31E3A /* Build configuration list for PBXNativeTarget "tf_simple_example" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				591157B31CF4011D00C31E3A /* Debug */,
+				591157B41CF4011D00C31E3A /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 591157931CF4011C00C31E3A /* Project object */;
+}
diff --git a/tensorflow/contrib/lite/g3doc/apis.md b/tensorflow/contrib/lite/g3doc/apis.md
index e8f5566f11..fe208e47d1 100644
--- a/tensorflow/contrib/lite/g3doc/apis.md
+++ b/tensorflow/contrib/lite/g3doc/apis.md
@@ -267,7 +267,7 @@ try (Interpreter interpreter = new Interpreter(file_of_a_tensorflowlite_model))
 The `Interpreter.java` class drives model inference with TensorFlow Lite. In
 most of the cases, this is the only class an app developer will need.
 
-#### Initializing an `Interpreter` Mith a Model Mile
+#### Initializing an `Interpreter` With a Model File
 
 The `Interpreter` can be initialized with a model file using the constructor:
 
diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
index 121c4c2c95..9ade04eb8c 100644
--- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
+++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
@@ -54,7 +54,7 @@ counterparts:
 *   [tf.sigmoid](https://www.tensorflow.org/api_docs/python/tf/sigmoid)
 *   [tf.space_to_depth](https://www.tensorflow.org/api_docs/python/tf/space_to_depth)
 
-## Straighforward Conversions, Constant-Folding and Fusing
+## Straightforward Conversions, Constant-Folding and Fusing
 
 A number of TensorFlow operations can be processed by TensorFlow Lite even
 though they have no direct equivalent. This is the case for operations that can
diff --git a/tensorflow/contrib/lite/ios_makefile.inc b/tensorflow/contrib/lite/ios_makefile.inc
new file mode 100644
index 0000000000..345ed26212
--- /dev/null
+++ b/tensorflow/contrib/lite/ios_makefile.inc
@@ -0,0 +1,31 @@
+#Settings for iOS.
+ifeq($(TARGET), IOS) BUILD_FOR_IOS_SIMULATOR
+    : = false ifeq($(IOS_ARCH), x86_64) BUILD_FOR_IOS_SIMULATOR
+    : = true endif ifeq($(IOS_ARCH), i386) BUILD_FOR_IOS_SIMULATOR
+    : = true endif ifeq($(BUILD_FOR_IOS_SIMULATOR), true) IPHONEOS_PLATFORM
+    : = $(shell xcrun-- sdk iphonesimulator-- show - sdk - platform -
+          path) IPHONEOS_SYSROOT
+    : = $(shell xcrun-- sdk iphonesimulator-- show - sdk -
+          path) else IPHONEOS_PLATFORM
+    : = $(shell xcrun-- sdk iphoneos-- show - sdk - platform -
+          path) IPHONEOS_SYSROOT
+    : = $(shell xcrun-- sdk iphoneos-- show - sdk - path) endif IOS_SDK_VERSION
+    : = $(shell xcrun-- sdk iphoneos-- show - sdk - version) MIN_SDK_VERSION
+    : = 9.0
+#Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64.
+      IOS_ARCH
+    : = x86_64 CXXFLAGS
+      += -miphoneos - version
+         - min = $(MIN_SDK_VERSION) - DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
+                 - fembed - bitcode - Wno - c++ 11 - narrowing - mno - thumb
+                 - fno - exceptions
+                 - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) - O3 CCFLAGS
+      += -miphoneos - version
+         - min = $(MIN_SDK_VERSION) - fembed - bitcode - mno - thumb
+                 - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) -
+                 O3 LDFLAGS
+    : = -fembed - bitcode - miphoneos - version
+        - min = ${MIN_SDK_VERSION} - arch $(IOS_ARCH) OBJDIR
+    : = $(OBJDIR) ios_$(IOS_ARCH) / LIBDIR
+    : = $(LIBDIR) ios_$(IOS_ARCH) / BINDIR
+    : = $(BINDIR) ios_$(IOS_ARCH) / DEPDIR : = $(DEPDIR) ios_$(IOS_ARCH) / endif
diff --git a/tensorflow/contrib/lite/java/demo/app/build.gradle b/tensorflow/contrib/lite/java/demo/app/build.gradle
index e1470fe717..b76eaad8bb 100644
--- a/tensorflow/contrib/lite/java/demo/app/build.gradle
+++ b/tensorflow/contrib/lite/java/demo/app/build.gradle
@@ -36,8 +36,8 @@ android {
 }
 
 repositories {
-    flatDir {
-        dirs 'libs'
+    maven {
+        url 'https://google.bintray.com/tensorflow'
     }
 }
 
diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/README.md b/tensorflow/contrib/lite/models/testdata/g3doc/README.md
index 83760e420f..46b24248f0 100644
--- a/tensorflow/contrib/lite/models/testdata/g3doc/README.md
+++ b/tensorflow/contrib/lite/models/testdata/g3doc/README.md
@@ -86,25 +86,34 @@ same input.
 
 ### Models:
 
-[Speech hotword model (Svdf rank=1)] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_hotword_model_rank1.tflite)
+[Speech hotword model (Svdf
+rank=1)](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_hotword_model_rank1_2017_11_14.tflite)
 
-[Speech hotword model (Svdf rank=2)] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_hotword_model_rank2.tflite)
+[Speech hotword model (Svdf
+rank=2)](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_hotword_model_rank2_2017_11_14.tflite)
 
-[Speaker-id model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_speakerid_model.tflite)
+[Speaker-id
+model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_speakerid_model_2017_11_14.tflite)
 
-[TTS model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_tts_model.tflite)
+[TTS
+model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_tts_model_2017_11_14.tflite)
 
-[ASR AM model] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/testdata/speech_terse_am_model.tflite)
+[ASR AM
+model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_terse_am_model_2017_11_14.tflite)
 
 ### Test benches
 
-[Speech hotword model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_hotword_model_test.cc)
+[Speech hotword model
+test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_hotword_model_test.cc)
 
-[Speaker-id model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc)
+[Speaker-id model
+test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc)
 
-[TTS model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_tts_model_test.cc)
+[TTS model
+test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_tts_model_test.cc)
 
-[ASR AM model test] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc)
+[ASR AM model
+test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc)
 
 ## Android Support
 The models have been tested on Android phones, using the following tests:
@@ -112,5 +121,3 @@ The models have been tested on Android phones, using the following tests:
 [Hotword] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/android/BUILD?rcl=172930882&l=25)
 
 [Speaker-id] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/android/BUILD?rcl=172930882&l=36)
-
-
diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index 5d06165772..bdb5e01538 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -1454,9 +1454,9 @@ inline int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) {
  * {@link ANeuralNetworksExecution_setOutputFromMemory} and
  * {@link ANeuralNetworksExecution_setOperandValue}.
  *
- * To build a model that can accomodate inputs of various sizes, as you may want
- * to do for a CNN, set the size of the dimensions that will vary at run time to
- * 0. If you do so, provide the full dimensions when calling
+ * To build a model that can accommodate inputs of various sizes, as you may
+ * want to do for a CNN, set the size of the dimensions that will vary at run
+ * time to 0. If you do so, provide the full dimensions when calling
  * {@link ANeuralNetworksExecution_setInput} or {@link
  * ANeuralNetworksExecution_setInputFromMemory}.
  *
diff --git a/tensorflow/contrib/lite/schema/upgrade_schema_test.py b/tensorflow/contrib/lite/schema/upgrade_schema_test.py
index 754400e888..b5002e6f75 100644
--- a/tensorflow/contrib/lite/schema/upgrade_schema_test.py
+++ b/tensorflow/contrib/lite/schema/upgrade_schema_test.py
@@ -252,7 +252,7 @@ def JsonDumpAndFlush(data, fp):
 
 class TestSchemaUpgrade(test_util.TensorFlowTestCase):
 
-  def testNonExistantFile(self):
+  def testNonExistentFile(self):
     converter = upgrade_schema_lib.Converter()
     non_existent = tempfile.mktemp(suffix=".json")
     with self.assertRaisesRegexp(IOError, "No such file or directory"):
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index 5e40a13d3c..ecddb4b807 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -187,6 +187,7 @@ tf_cc_test(
     srcs = ["generated_examples_zip_test.cc"],
     data = [":optest"],
     shard_count = 10,
+    tags = ["no_oss"],
     deps = [
         ":parse_testdata_lib",
         "//tensorflow/contrib/lite:builtin_op_data",
diff --git a/tensorflow/contrib/lite/testing/parse_testdata.cc b/tensorflow/contrib/lite/testing/parse_testdata.cc
index 2b67052cad..d745ed2715 100644
--- a/tensorflow/contrib/lite/testing/parse_testdata.cc
+++ b/tensorflow/contrib/lite/testing/parse_testdata.cc
@@ -232,7 +232,7 @@ TfLiteStatus CheckOutputs(tflite::Interpreter* interpreter,
 //   invoke {
 //     id: xyz
 //     input: 1,2,1,1,1,2,3,4
-//     ouput: 4,5,6
+//     output: 4,5,6
 //   }
 class Invoke : public Message {
  public:
diff --git a/tensorflow/contrib/lite/testing/test_runner.h b/tensorflow/contrib/lite/testing/test_runner.h
index 04ee4d9f7d..f4b26949b5 100644
--- a/tensorflow/contrib/lite/testing/test_runner.h
+++ b/tensorflow/contrib/lite/testing/test_runner.h
@@ -63,7 +63,7 @@ class TestRunner {
   // Run the model.
   virtual void Invoke() = 0;
 
-  // Verify that the contents of all ouputs conform to the existing
+  // Verify that the contents of all outputs conform to the existing
   // expectations. Return true if there are no expectations or they are all
   // satisfied.
   virtual bool CheckResults() = 0;
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index f2fce2b249..04b0813523 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -129,7 +129,7 @@ enum class AxesOrder {
 // The type of the scalars in an array.
 // Note that that does not by itself tell whether the values in the array are
 // real (are literally interpreted as real numbers) or quantized (only acquire
-// a meaning as real numbers in conjuction with QuantizationParams).
+// a meaning as real numbers in conjunction with QuantizationParams).
 //
 // In practice though:
 //   float values are always real
diff --git a/tensorflow/contrib/lite/tools/benchmark_model.cc b/tensorflow/contrib/lite/tools/benchmark_model.cc
new file mode 100644
index 0000000000..ef43f64131
--- /dev/null
+++ b/tensorflow/contrib/lite/tools/benchmark_model.cc
@@ -0,0 +1,95 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdarg>
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/string_util.h"
+#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h"
+
+#ifdef TFLITE_CUSTOM_OPS_HEADER
+void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
+#endif
+
+#define LOG(x) std::cerr
+#define CHECK(x)                  \
+  if (!(x)) {                     \
+    LOG(ERROR) << #x << "failed"; \
+    exit(1);                      \
+  }
+
+namespace tensorflow {
+namespace benchmark_tflite_model {
+
+std::unique_ptr<tflite::FlatBufferModel> model;
+std::unique_ptr<tflite::Interpreter> interpreter;
+
+void InitImpl(const std::string& graph, const std::vector<int>& sizes,
+              const std::string& input_layer_type, int num_threads) {
+  CHECK(graph.c_str());
+
+  model = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
+  if (!model) {
+    LOG(FATAL) << "Failed to mmap model " << graph;
+  }
+  LOG(INFO) << "Loaded model " << graph;
+  model->error_reporter();
+  LOG(INFO) << "resolved reporter";
+
+#ifdef TFLITE_CUSTOM_OPS_HEADER
+  tflite::MutableOpResolver resolver;
+  RegisterSelectedOps(&resolver);
+#else
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+#endif
+
+  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
+  if (!interpreter) {
+    LOG(FATAL) << "Failed to construct interpreter";
+  }
+
+  if (num_threads != -1) {
+    interpreter->SetNumThreads(num_threads);
+  }
+
+  int input = interpreter->inputs()[0];
+
+  if (input_layer_type != "string") {
+    interpreter->ResizeInputTensor(input, sizes);
+  }
+
+  if (interpreter->AllocateTensors() != kTfLiteOk) {
+    LOG(FATAL) << "Failed to allocate tensors!";
+  }
+}
+
+int Main(int argc, char** argv) {
+  InitImpl("", {}, "", 1);
+  return 0;
+}
+
+}  // namespace benchmark_tflite_model
+}  // namespace tensorflow
+
+int main(int argc, char** argv) {
+  return tensorflow::benchmark_tflite_model::Main(argc, argv);
+}
diff --git a/tensorflow/contrib/lite/tools/mutable_op_resolver.h b/tensorflow/contrib/lite/tools/mutable_op_resolver.h
index cc1a8e27e6..be60cf476d 100644
--- a/tensorflow/contrib/lite/tools/mutable_op_resolver.h
+++ b/tensorflow/contrib/lite/tools/mutable_op_resolver.h
@@ -19,6 +19,16 @@ limitations under the License.
 #include "tensorflow/contrib/lite/context.h"
 #include "tensorflow/contrib/lite/model.h"
 
+// Needed to resolve unordered_set hash on older compilers.
+namespace std {
+template <>
+struct hash<tflite::BuiltinOperator> {
+  size_t operator()(const tflite::BuiltinOperator& op) const {
+    return std::hash<int>()(op);
+  }
+};
+}  // namespace std
+
 namespace tflite {
 
 // An OpResolver that is mutable, also used as the op in gen_op_registration.
diff --git a/tensorflow/contrib/mpi/BUILD b/tensorflow/contrib/mpi/BUILD
index 20ceef5004..d9d55faf50 100644
--- a/tensorflow/contrib/mpi/BUILD
+++ b/tensorflow/contrib/mpi/BUILD
@@ -72,6 +72,7 @@ cc_library(
         "//tensorflow/core:worker_proto_cc",
         "//tensorflow/core/distributed_runtime:base_rendezvous_mgr",
         "//tensorflow/core/distributed_runtime:session_mgr",
+        "//tensorflow/core/distributed_runtime:tensor_coding",
         "//tensorflow/core/distributed_runtime:worker_env",
         "//third_party/mpi",
     ],
diff --git a/tensorflow/contrib/nn/python/ops/cross_entropy.py b/tensorflow/contrib/nn/python/ops/cross_entropy.py
index 61c1d1c6d9..5045f2c957 100644
--- a/tensorflow/contrib/nn/python/ops/cross_entropy.py
+++ b/tensorflow/contrib/nn/python/ops/cross_entropy.py
@@ -116,7 +116,7 @@ def deprecated_flipped_sparse_softmax_cross_entropy_with_logits(logits,
 
   Raises:
     ValueError: If logits are scalars (need to have rank >= 1) or if the rank
-      of the labels is not equal to the rank of the labels minus one.
+      of the labels is not equal to the rank of the logits minus one.
   """
   return nn.sparse_softmax_cross_entropy_with_logits(
       labels=labels, logits=logits, name=name)
diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py
index 2ae529e015..98749cff7e 100644
--- a/tensorflow/contrib/nn/python/ops/sampling_ops.py
+++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py
@@ -34,7 +34,7 @@ def _rank_resample(weights, biases, inputs, sampled_values, num_resampled,
 
       log(sum_j exp((w_i * x_j + b_i) / resampling_temperature))
 
-  where w_i, b_i are the weight and bias of the i-th class, repsectively,
+  where w_i, b_i are the weight and bias of the i-th class, respectively,
   and j ranges over the rows of `inputs`. For efficiency, we rearrange the
   computation to
 
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 289359e5ec..9685b58392 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -114,7 +114,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
 
   The class uses optional peep-hole connections, and an optional projection
   layer.
-
   Layer normalization implementation is based on:
 
     https://arxiv.org/abs/1607.06450.
diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md
index f7a85557ca..dc92ae0c85 100644
--- a/tensorflow/contrib/slim/README.md
+++ b/tensorflow/contrib/slim/README.md
@@ -441,7 +441,8 @@ module. Consider the simple case where we want to train the VGG network:
 
 ```python
 import tensorflow as tf
-vgg = tf.contrib.slim.nets.vgg
+import tensorflow.contrib.slim.nets as nets
+vgg = nets.vgg
 
 # Load the images and labels.
 images, labels = ...
@@ -559,9 +560,10 @@ examine the following sample of training the VGG network:
 
 ```python
 import tensorflow as tf
+import tensorflow.contrib.slim.nets as nets
 
 slim = tf.contrib.slim
-vgg = tf.contrib.slim.nets.vgg
+vgg = nets.vgg
 
 ...
 
@@ -809,9 +811,10 @@ Putting it all together:
 
 ```python
 import tensorflow as tf
+import tensorflow.contrib.slim.nets as nets
 
 slim = tf.contrib.slim
-vgg = tf.contrib.slim.nets.vgg
+vgg = nets.vgg
 
 
 # Load the data
diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py
index cdb720b36b..3caf4e02da 100644
--- a/tensorflow/contrib/slim/python/slim/evaluation.py
+++ b/tensorflow/contrib/slim/python/slim/evaluation.py
@@ -34,7 +34,7 @@ the metrics and finally call the `evaluation` method:
       "mse": slim.metrics.mean_squared_error(predictions, labels),
   })
 
-  inital_op = tf.group(
+  initial_op = tf.group(
       tf.global_variables_initializer(),
       tf.local_variables_initializer())
 
@@ -42,7 +42,7 @@ the metrics and finally call the `evaluation` method:
     metric_values = slim.evaluation(
         sess,
         num_evals=1,
-        inital_op=initial_op,
+        initial_op=initial_op,
         eval_op=names_to_updates.values(),
         final_op=name_to_values.values())
 
diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD
index 45d6454526..f34291c203 100644
--- a/tensorflow/contrib/summary/BUILD
+++ b/tensorflow/contrib/summary/BUILD
@@ -25,7 +25,6 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":summary_ops",
-        ":summary_test_internal",
         ":summary_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:errors",
@@ -46,7 +45,6 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":summary_ops",
-        ":summary_test_internal",
         ":summary_test_util",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
@@ -119,15 +117,3 @@ py_library(
         "//tensorflow/python:platform",
     ],
 )
-
-py_library(
-    name = "summary_test_internal",
-    testonly = 1,
-    srcs = ["summary_test_internal.py"],
-    srcs_version = "PY2AND3",
-    visibility = ["//visibility:private"],
-    deps = [
-        "//tensorflow/python:lib",
-        "//tensorflow/python:platform",
-    ],
-)
diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index fe55bf93e2..703adb7b46 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py
@@ -21,7 +21,6 @@ import tempfile
 import six
 
 from tensorflow.contrib.summary import summary_ops
-from tensorflow.contrib.summary import summary_test_internal
 from tensorflow.contrib.summary import summary_test_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
@@ -33,10 +32,10 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training import training_util
 
-get_all = summary_test_internal.get_all
+get_all = summary_test_util.get_all
 
 
-class DbTest(summary_test_internal.SummaryDbTest):
+class DbTest(summary_test_util.SummaryDbTest):
 
   def testGraphPassedToGraph_isForbiddenForThineOwnSafety(self):
     with self.assertRaises(TypeError):
diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py
index 3fe421a7e9..54433deb28 100644
--- a/tensorflow/contrib/summary/summary_ops_test.py
+++ b/tensorflow/contrib/summary/summary_ops_test.py
@@ -21,7 +21,6 @@ import tempfile
 import six
 
 from tensorflow.contrib.summary import summary_ops
-from tensorflow.contrib.summary import summary_test_internal
 from tensorflow.contrib.summary import summary_test_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
@@ -35,8 +34,8 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import gfile
 from tensorflow.python.training import training_util
 
-get_all = summary_test_internal.get_all
-get_one = summary_test_internal.get_one
+get_all = summary_test_util.get_all
+get_one = summary_test_util.get_one
 
 
 class TargetTest(test_util.TensorFlowTestCase):
@@ -137,7 +136,7 @@ class TargetTest(test_util.TensorFlowTestCase):
       self.assertEqual(3, get_total())
 
 
-class DbTest(summary_test_internal.SummaryDbTest):
+class DbTest(summary_test_util.SummaryDbTest):
 
   def testIntegerSummaries(self):
     step = training_util.create_global_step()
diff --git a/tensorflow/contrib/summary/summary_test_util.py b/tensorflow/contrib/summary/summary_test_util.py
index 794c5b8bab..915820e05b 100644
--- a/tensorflow/contrib/summary/summary_test_util.py
+++ b/tensorflow/contrib/summary/summary_test_util.py
@@ -19,13 +19,38 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
 import os
+import sqlite3
 
+from tensorflow.contrib.summary import summary_ops
 from tensorflow.core.util import event_pb2
+from tensorflow.python.framework import test_util
 from tensorflow.python.lib.io import tf_record
 from tensorflow.python.platform import gfile
 
 
+class SummaryDbTest(test_util.TensorFlowTestCase):
+  """Helper for summary database testing."""
+
+  def setUp(self):
+    super(SummaryDbTest, self).setUp()
+    self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite')
+    if os.path.exists(self.db_path):
+      os.unlink(self.db_path)
+    self.db = sqlite3.connect(self.db_path)
+    self.create_summary_db_writer = functools.partial(
+        summary_ops.create_summary_db_writer,
+        db_uri=self.db_path,
+        experiment_name='experiment',
+        run_name='run',
+        user_name='user')
+
+  def tearDown(self):
+    self.db.close()
+    super(SummaryDbTest, self).tearDown()
+
+
 def events_from_file(filepath):
   """Returns all events in a single event file.
 
@@ -58,5 +83,17 @@ def events_from_logdir(logdir):
   """
   assert gfile.Exists(logdir)
   files = gfile.ListDirectory(logdir)
-  assert len(files) == 1, "Found not exactly one file in logdir: %s" % files
+  assert len(files) == 1, 'Found not exactly one file in logdir: %s' % files
   return events_from_file(os.path.join(logdir, files[0]))
+
+
+def get_one(db, q, *p):
+  return db.execute(q, p).fetchone()[0]
+
+
+def get_all(db, q, *p):
+  return unroll(db.execute(q, p).fetchall())
+
+
+def unroll(list_of_tuples):
+  return sum(list_of_tuples, ())
diff --git a/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py b/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py
index cccf444db8..a56beeeb2c 100644
--- a/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py
+++ b/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py
@@ -80,7 +80,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase):
         isinstance(self.params.num_trees, tensor_forest.ForestHParams))
 
     with variable_scope.variable_scope(
-        "DecisionsToDataThenNNTest_testContructionPollution"):
+        "DecisionsToDataThenNNTest_testConstructionPollution"):
       graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN(
           self.params)
 
@@ -95,7 +95,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase):
          for _ in range(100)])
 
     with variable_scope.variable_scope(
-        "DecisionsToDataThenNNTest_testInferenceContruction"):
+        "DecisionsToDataThenNNTest_testInferenceConstruction"):
       graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN(
           self.params)
       graph = graph_builder.inference_graph(data, None)
@@ -111,7 +111,7 @@ class DecisionsToDataThenNNTest(test_util.TensorFlowTestCase):
     labels = [1 for _ in range(100)]
 
     with variable_scope.variable_scope(
-        "DecisionsToDataThenNNTest_testTrainingContruction"):
+        "DecisionsToDataThenNNTest_testTrainingConstruction"):
       graph_builder = decisions_to_data_then_nn.DecisionsToDataThenNN(
           self.params)
       graph = graph_builder.training_graph(data, labels, None)
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index a1d61a7932..fce0663aa5 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -455,6 +455,7 @@ tf_cuda_library(
         "util/mirror_pad_mode.h",
         "util/padding.h",
         "util/port.h",
+        "util/ptr_util.h",
         "util/reffed_status_callback.h",
         "util/saved_tensor_slice_util.h",
         "util/sparse/group_iterator.h",
@@ -493,6 +494,11 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "ptr_util",
+    hdrs = ["util/ptr_util.h"],
+)
+
 cc_library(
     name = "reader_base",
     srcs = ["framework/reader_base.cc"],
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 223dd12f8f..b620127d90 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -455,7 +455,6 @@ class Graph {
   // the corresponding NodeDef to reflect the change.
   // REQUIRES: The control edge must exist.
   void RemoveControlEdge(const Edge* e);
-
   // Updates the input to a node.  The existing edge to `dst` is removed and an
   // edge from `new_src` to `dst` is created. The NodeDef associated with `dst`
   // is also updated.
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 6861a51795..efe8ac05a3 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -1068,7 +1068,7 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() {
       if (simplified_node != nullptr) {
         nodes_to_simplify.PushBack(simplified_node);
       }
-      // When `node` is simplifed to another node rather than in-place, the
+      // When `node` is simplified to another node rather than in-place, the
       // consumers of `node` are already redirected to `simplified_tensor`.
       // Re-push the consumers into `nodes_to_simplify` for further
       // optimizations.
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 9279514e6b..dcffb28513 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2583,8 +2583,13 @@ tf_kernel_library(
 
 tf_kernel_library(
     name = "batch_matmul_op",
+    srcs = [] + if_mkl([
+        "mkl_batch_matmul_op.cc",
+    ]),
     prefix = "batch_matmul_op",
-    deps = MATH_DEPS,
+    deps = MATH_DEPS + if_mkl([
+        "//third_party/mkl:intel_binary_blob",
+    ]),
 )
 
 tf_kernel_library(
@@ -6325,11 +6330,11 @@ cc_library(
     srcs = ["summary_interface.cc"],
     hdrs = ["summary_interface.h"],
     deps = [
-        "//tensorflow/compiler/xla:util",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:ptr_util",
     ],
 )
 
diff --git a/tensorflow/core/kernels/batch_matmul_op_complex.cc b/tensorflow/core/kernels/batch_matmul_op_complex.cc
index a58ec02726..96216764fd 100644
--- a/tensorflow/core/kernels/batch_matmul_op_complex.cc
+++ b/tensorflow/core/kernels/batch_matmul_op_complex.cc
@@ -17,8 +17,10 @@ limitations under the License.
 
 namespace tensorflow {
 
+#if !defined(INTEL_MKL)
 TF_CALL_complex64(REGISTER_BATCH_MATMUL_CPU);
 TF_CALL_complex128(REGISTER_BATCH_MATMUL_CPU);
+#endif
 
 #if GOOGLE_CUDA
 TF_CALL_complex64(REGISTER_BATCH_MATMUL_GPU);
diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc
index 1900ed8e31..8d155ca62b 100644
--- a/tensorflow/core/kernels/batch_matmul_op_real.cc
+++ b/tensorflow/core/kernels/batch_matmul_op_real.cc
@@ -17,8 +17,10 @@ limitations under the License.
 
 namespace tensorflow {
 
+#if !defined(INTEL_MKL)
 TF_CALL_float(REGISTER_BATCH_MATMUL_CPU);
 TF_CALL_double(REGISTER_BATCH_MATMUL_CPU);
+#endif
 TF_CALL_half(REGISTER_BATCH_MATMUL_CPU);
 TF_CALL_int32(REGISTER_BATCH_MATMUL_CPU);
 
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index a7673afd0b..822d72e068 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+  http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc
index 086369a9f1..6d9fdfcf33 100644
--- a/tensorflow/core/kernels/decode_bmp_op.cc
+++ b/tensorflow/core/kernels/decode_bmp_op.cc
@@ -34,8 +34,10 @@ class DecodeBmpOp : public OpKernel {
   explicit DecodeBmpOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("channels", &channels_));
     OP_REQUIRES(
-        context, channels_ == 0 || channels_ == 3 || channels_ == 4,
-        errors::InvalidArgument("channels must be 0, 3 or 4, got ", channels_));
+        context,
+        channels_ == 0 || channels_ == 1 || channels_ == 3 || channels_ == 4,
+        errors::InvalidArgument("channels must be 0, 1, 3 or 4, got ",
+                                channels_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -66,11 +68,11 @@ class DecodeBmpOp : public OpKernel {
       channels_ = bpp / 8;
     }
 
-    // Current implementation only supports 3 or 4 channel
+    // Current implementation only supports 1, 3 or 4 channel
     // bitmaps.
-    OP_REQUIRES(context, (channels_ == 3 || channels_ == 4),
+    OP_REQUIRES(context, (channels_ == 1 || channels_ == 3 || channels_ == 4),
                 errors::InvalidArgument(
-                    "Number of channels must be 3 or 4, was ", channels_));
+                    "Number of channels must be 1, 3 or 4, was ", channels_));
 
     // if height is negative, data layout is top down
     // otherwise, it's bottom up
@@ -117,6 +119,9 @@ uint8* DecodeBmpOp::Decode(const uint8* input, uint8* const output,
       dst_pos = (i * width + j) * channels;
 
       switch (channels) {
+        case 1:
+          output[dst_pos] = input[src_pos];
+          break;
         case 3:
           // BGR -> RGB
           output[dst_pos] = input[src_pos + 2];
diff --git a/tensorflow/core/kernels/dynamic_partition_op_test.cc b/tensorflow/core/kernels/dynamic_partition_op_test.cc
index 0e8fbc0a67..9a7ed0af21 100644
--- a/tensorflow/core/kernels/dynamic_partition_op_test.cc
+++ b/tensorflow/core/kernels/dynamic_partition_op_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include <functional>
 #include <memory>
 
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_builder.h"
@@ -23,10 +24,14 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/graph/testlib.h"
 #include "tensorflow/core/kernels/ops_testutil.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
 namespace {
@@ -153,5 +158,58 @@ TEST_F(DynamicPartitionOpTest, Error_IndexOutOfRange) {
       << s;
 }
 
+Node* DynamicPartitionNode(Graph* g, Node* in0, Node* in1, int num_partitions) {
+  Node* ret;
+  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "DynamicPartition")
+                  .Input(in0)
+                  .Input(in1)
+                  .Attr("num_partitions", num_partitions)
+                  .Finalize(g, &ret));
+  return ret;
+}
+
+template <typename T>
+static Graph* DynamicPartition(int num_partitions, int dim) {
+  Graph* g = new Graph(OpRegistry::Global());
+  // Always use a 128MB buffer.
+  const int kRows = ((128 << 20) / sizeof(T)) / dim;
+  Tensor data(DataTypeToEnum<T>::value, TensorShape({kRows, dim}));
+  data.flat<T>().setRandom();
+
+  random::PhiloxRandom philox(301, 17);
+  random::SimplePhilox rnd(&philox);
+  Tensor partitions(DT_INT32, TensorShape({kRows}));
+  for (int i = 0; i < kRows; i++) {
+    partitions.flat<int32>()(i) = rnd.Uniform(num_partitions);
+  }
+  DynamicPartitionNode(g, test::graph::Constant(g, data),
+                       test::graph::Constant(g, partitions), num_partitions);
+  return g;
+}
+
+#define BM_DYNAMIC_PARTITION(DEVICE, T, num)                            \
+  static void BM_##DEVICE##_dynpart_##T##_##num(int iters, int dim) {   \
+    const int64 items = ((128 << 20) / sizeof(T));                      \
+    const int64 tot = static_cast<int64>(iters) * items;                \
+    testing::ItemsProcessed(tot);                                       \
+    testing::UseRealTime();                                             \
+    test::Benchmark(#DEVICE, DynamicPartition<T>(num, dim)).Run(iters); \
+  }                                                                     \
+  BENCHMARK(BM_##DEVICE##_dynpart_##T##_##num)->Arg(1)->Arg(256)
+
+BM_DYNAMIC_PARTITION(cpu, float, 2);
+BM_DYNAMIC_PARTITION(cpu, float, 100);
+BM_DYNAMIC_PARTITION(cpu, double, 2);
+BM_DYNAMIC_PARTITION(cpu, double, 100);
+BM_DYNAMIC_PARTITION(cpu, complex64, 2);
+BM_DYNAMIC_PARTITION(cpu, complex64, 100);
+
+BM_DYNAMIC_PARTITION(gpu, float, 2);
+BM_DYNAMIC_PARTITION(gpu, float, 100);
+BM_DYNAMIC_PARTITION(gpu, double, 2);
+BM_DYNAMIC_PARTITION(gpu, double, 100);
+BM_DYNAMIC_PARTITION(gpu, complex64, 2);
+BM_DYNAMIC_PARTITION(gpu, complex64, 100);
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc
new file mode 100644
index 0000000000..d9713075be
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc
@@ -0,0 +1,238 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/math_ops.cc.
+
+// This file uses MKL CBLAS batched xGEMM for acceleration of TF Batch
+// Matrix-Matrix Multiplication (MatMul) operations.
+// We currently register this kernel only for MKL supported data
+// types (float, double, complex64, complex128). The macro INTEL_MKL is defined
+// by the build system only when MKL is chosen as an option at configure stage
+// and when it is undefined at build time, this file becomes an empty
+// compilation unit
+
+#define EIGEN_USE_THREADS
+
+#if defined(INTEL_MKL)
+#include <vector>
+#include "mkl_cblas.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/numeric_types.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/type_traits.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+
+#define MKL_Complex8 tensorflow::complex64
+#define MKL_Complex16 tensorflow::complex128
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+template <typename Device, typename Scalar>
+class BatchMatMulMkl : public OpKernel {
+ public:
+  explicit BatchMatMulMkl(OpKernelConstruction *context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("adj_x", &adj_x_));
+    OP_REQUIRES_OK(context, context->GetAttr("adj_y", &adj_y_));
+  }
+
+  virtual ~BatchMatMulMkl() {}
+
+  void Compute(OpKernelContext *ctx) override {
+    const Tensor &lhs = ctx->input(0);
+    const Tensor &rhs = ctx->input(1);
+    OP_REQUIRES(ctx, lhs.dims() == rhs.dims(),
+                errors::InvalidArgument("lhs and rhs has different ndims: ",
+                                        lhs.shape().DebugString(), " vs. ",
+                                        rhs.shape().DebugString()));
+    const int ndims = lhs.dims();
+    OP_REQUIRES(
+        ctx, ndims >= 2,
+        errors::InvalidArgument("lhs and rhs ndims must be >= 2: ", ndims));
+    TensorShape out_shape;
+    for (int i = 0; i < ndims - 2; ++i) {
+      OP_REQUIRES(ctx, lhs.dim_size(i) == rhs.dim_size(i),
+                  errors::InvalidArgument(
+                      "lhs.dim(", i, ") and rhs.dim(", i,
+                      ") must be the same: ", lhs.shape().DebugString(), " vs ",
+                      rhs.shape().DebugString()));
+      out_shape.AddDim(lhs.dim_size(i));
+    }
+    auto batch_size = (ndims == 2) ? 1 : out_shape.num_elements();
+    auto lhs_rows = lhs.dim_size(ndims - 2);
+    auto lhs_cols = lhs.dim_size(ndims - 1);
+    auto rhs_rows = rhs.dim_size(ndims - 2);
+    auto rhs_cols = rhs.dim_size(ndims - 1);
+    if (adj_x_) std::swap(lhs_rows, lhs_cols);
+    if (adj_y_) std::swap(rhs_rows, rhs_cols);
+    OP_REQUIRES(ctx, lhs_cols == rhs_rows,
+                errors::InvalidArgument(
+                    "lhs mismatch rhs shape: ", lhs_cols, " vs. ", rhs_rows,
+                    ": ", lhs.shape().DebugString(), " ",
+                    rhs.shape().DebugString(), " ", adj_x_, " ", adj_y_));
+    out_shape.AddDim(lhs_rows);
+    out_shape.AddDim(rhs_cols);
+    Tensor *out = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, out_shape, &out));
+    if (out->NumElements() == 0) {
+      return;
+    }
+    if (lhs.NumElements() == 0 || rhs.NumElements() == 0) {
+      functor::SetZeroFunctor<Device, Scalar> f;
+      f(ctx->eigen_device<Device>(), out->flat<Scalar>());
+      return;
+    }
+
+    auto rhs_reshaped = rhs.template flat_inner_dims<Scalar, 3>();
+    auto lhs_reshaped = lhs.template flat_inner_dims<Scalar, 3>();
+    auto out_reshaped = out->template flat_inner_dims<Scalar, 3>();
+    const uint64 M = lhs_reshaped.dimension(adj_x_ ? 2 : 1);
+    const uint64 K = lhs_reshaped.dimension(adj_x_ ? 1 : 2);
+    const uint64 N = rhs_reshaped.dimension(adj_y_ ? 1 : 2);
+
+    std::vector<MKL_INT> m_array(batch_size, M);
+    std::vector<MKL_INT> n_array(batch_size, N);
+    std::vector<MKL_INT> k_array(batch_size, K);
+    std::vector<MKL_INT> lda_array(batch_size, adj_x_ ? M : K);
+    std::vector<MKL_INT> ldb_array(batch_size, adj_y_ ? K : N);
+    std::vector<MKL_INT> ldc_array(batch_size, N);
+    std::vector<MKL_INT> group_size(1, batch_size);
+    std::vector<const Scalar *> a_array;
+    std::vector<const Scalar *> b_array;
+    std::vector<Scalar *> c_array;
+    a_array.reserve(batch_size);
+    b_array.reserve(batch_size);
+    c_array.reserve(batch_size);
+    for (int64 i = 0; i < batch_size; i++) {
+      a_array.push_back(&lhs_reshaped(i, 0, 0));
+      b_array.push_back(&rhs_reshaped(i, 0, 0));
+      c_array.push_back(&out_reshaped(i, 0, 0));
+    }
+
+    MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, &m_array[0], &n_array[0],
+                      &k_array[0], &a_array[0], &lda_array[0], &b_array[0],
+                      &ldb_array[0], &c_array[0], &ldc_array[0], 1,
+                      &group_size[0]);
+  }
+
+ private:
+  bool adj_x_;
+  bool adj_y_;
+
+  void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA,
+                         const bool TransB, const MKL_INT *M_Array,
+                         const MKL_INT *N_Array, const MKL_INT *K_Array,
+                         const float **A_Array, const MKL_INT *lda_Array,
+                         const float **B_Array, const MKL_INT *ldb_Array,
+                         float **C_Array, const MKL_INT *ldc_Array,
+                         const MKL_INT group_count, const MKL_INT *group_size) {
+    std::vector<CBLAS_TRANSPOSE> TransA_Array(
+        group_size[0], TransA ? CblasTrans : CblasNoTrans);
+    std::vector<CBLAS_TRANSPOSE> TransB_Array(
+        group_size[0], TransB ? CblasTrans : CblasNoTrans);
+    std::vector<float> alpha_Array(group_size[0], 1.0);
+    std::vector<float> beta_Array(group_size[0], 0.0);
+    cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], M_Array,
+                      N_Array, K_Array, &alpha_Array[0], A_Array, lda_Array,
+                      B_Array, ldb_Array, &beta_Array[0], C_Array, ldc_Array,
+                      group_count, group_size);
+  }
+
+  void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA,
+                         const bool TransB, const MKL_INT *M_Array,
+                         const MKL_INT *N_Array, const MKL_INT *K_Array,
+                         const double **A_Array, const MKL_INT *lda_Array,
+                         const double **B_Array, const MKL_INT *ldb_Array,
+                         double **C_Array, const MKL_INT *ldc_Array,
+                         const MKL_INT group_count, const MKL_INT *group_size) {
+    std::vector<CBLAS_TRANSPOSE> TransA_array(
+        group_size[0], TransA ? CblasTrans : CblasNoTrans);
+    std::vector<CBLAS_TRANSPOSE> TransB_array(
+        group_size[0], TransB ? CblasTrans : CblasNoTrans);
+    std::vector<double> alpha_Array(group_size[0], 1.0);
+    std::vector<double> beta_Array(group_size[0], 0.0);
+    cblas_dgemm_batch(Layout, &TransA_array[0], &TransB_array[0], M_Array,
+                      N_Array, K_Array, &alpha_Array[0], A_Array, lda_Array,
+                      B_Array, ldb_Array, &beta_Array[0], C_Array, ldc_Array,
+                      group_count, group_size);
+  }
+
+  void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA,
+                         const bool TransB, const MKL_INT *M_Array,
+                         const MKL_INT *N_Array, const MKL_INT *K_Array,
+                         const MKL_Complex8 **A_Array, const MKL_INT *lda_Array,
+                         const MKL_Complex8 **B_Array, const MKL_INT *ldb_Array,
+                         MKL_Complex8 **C_Array, const MKL_INT *ldc_Array,
+                         const MKL_INT group_count, const MKL_INT *group_size) {
+    std::vector<CBLAS_TRANSPOSE> TransA_array(
+        group_size[0], TransA ? CblasConjTrans : CblasNoTrans);
+    std::vector<CBLAS_TRANSPOSE> TransB_array(
+        group_size[0], TransB ? CblasConjTrans : CblasNoTrans);
+    std::vector<MKL_Complex8> alpha_Array(group_size[0], {1.0f, 0.0f});
+    std::vector<MKL_Complex8> beta_Array(group_size[0], {0.0f, 0.0f});
+    cblas_cgemm_batch(
+        Layout, &TransA_array[0], &TransB_array[0], M_Array, N_Array, K_Array,
+        static_cast<const void *>(&alpha_Array[0]),
+        reinterpret_cast<const void **>(A_Array), lda_Array,
+        reinterpret_cast<const void **>(B_Array), ldb_Array,
+        static_cast<const void *>(&beta_Array[0]),
+        reinterpret_cast<void **>(C_Array), ldc_Array, group_count, group_size);
+  }
+
+  void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA,
+                         const bool TransB, const MKL_INT *M_Array,
+                         const MKL_INT *N_Array, const MKL_INT *K_Array,
+                         const MKL_Complex16 **A_Array,
+                         const MKL_INT *lda_Array,
+                         const MKL_Complex16 **B_Array,
+                         const MKL_INT *ldb_Array, MKL_Complex16 **C_Array,
+                         const MKL_INT *ldc_Array, const MKL_INT group_count,
+                         const MKL_INT *group_size) {
+    std::vector<CBLAS_TRANSPOSE> TransA_array(
+        group_size[0], TransA ? CblasConjTrans : CblasNoTrans);
+    std::vector<CBLAS_TRANSPOSE> TransB_array(
+        group_size[0], TransB ? CblasConjTrans : CblasNoTrans);
+    std::vector<MKL_Complex16> alpha_Array(group_size[0], {1.0f, 0.0f});
+    std::vector<MKL_Complex16> beta_Array(group_size[0], {0.0f, 0.0f});
+    cblas_zgemm_batch(
+        Layout, &TransA_array[0], &TransB_array[0], M_Array, N_Array, K_Array,
+        static_cast<const void *>(&alpha_Array[0]),
+        reinterpret_cast<const void **>(A_Array), lda_Array,
+        reinterpret_cast<const void **>(B_Array), ldb_Array,
+        static_cast<const void *>(&beta_Array[0]),
+        reinterpret_cast<void **>(C_Array), ldc_Array, group_count, group_size);
+  }
+};
+
+#define REGISTER_BATCH_MATMUL_MKL(TYPE)                                 \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("BatchMatMul").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
+      BatchMatMulMkl<CPUDevice, TYPE>)
+
+TF_CALL_float(REGISTER_BATCH_MATMUL_MKL);
+TF_CALL_double(REGISTER_BATCH_MATMUL_MKL);
+TF_CALL_complex64(REGISTER_BATCH_MATMUL_MKL);
+TF_CALL_complex128(REGISTER_BATCH_MATMUL_MKL);
+
+}  // end namespace tensorflow
+#endif
diff --git a/tensorflow/core/kernels/prefetch_dataset_op.cc b/tensorflow/core/kernels/prefetch_dataset_op.cc
index 1a6b7e078e..b02269f525 100644
--- a/tensorflow/core/kernels/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/prefetch_dataset_op.cc
@@ -37,6 +37,8 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel {
     int64 buffer_size;
     OP_REQUIRES_OK(
         ctx, ParseScalarArgument<int64>(ctx, "buffer_size", &buffer_size));
+    OP_REQUIRES(ctx, buffer_size > 0,
+                errors::InvalidArgument("buffer_size must be > 0"));
 
     *output = new Dataset(ctx, input, buffer_size);
   }
diff --git a/tensorflow/core/kernels/summary_interface.cc b/tensorflow/core/kernels/summary_interface.cc
index ad28d77ffd..97c0c2c099 100644
--- a/tensorflow/core/kernels/summary_interface.cc
+++ b/tensorflow/core/kernels/summary_interface.cc
@@ -16,7 +16,6 @@ limitations under the License.
 
 #include <utility>
 
-#include "tensorflow/compiler/xla/ptr_util.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/resource_mgr.h"
@@ -28,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/lib/png/png_io.h"
 #include "tensorflow/core/lib/wav/wav_io.h"
 #include "tensorflow/core/util/events_writer.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace {
@@ -229,7 +229,7 @@ class SummaryWriterImpl : public SummaryWriterInterface {
     }
     mutex_lock ml(mu_);
     events_writer_ =
-        xla::MakeUnique<EventsWriter>(io::JoinPath(logdir, "events"));
+        tensorflow::MakeUnique<EventsWriter>(io::JoinPath(logdir, "events"));
     if (!events_writer_->InitWithSuffix(filename_suffix)) {
       return errors::Unknown("Could not initialize events writer.");
     }
diff --git a/tensorflow/core/lib/io/path.cc b/tensorflow/core/lib/io/path.cc
index d93dd0296e..83f15e134d 100644
--- a/tensorflow/core/lib/io/path.cc
+++ b/tensorflow/core/lib/io/path.cc
@@ -14,8 +14,22 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/path.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#if !defined(PLATFORM_WINDOWS)
+#include <unistd.h>
+#endif
+
+#include <vector>
+
 #include "tensorflow/core/lib/strings/scanner.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
 
 namespace tensorflow {
 namespace io {
@@ -60,8 +74,7 @@ std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) {
 
   auto pos = path.rfind('/');
 #ifdef PLATFORM_WINDOWS
-  if (pos == StringPiece::npos)
-    pos = path.rfind('\\');
+  if (pos == StringPiece::npos) pos = path.rfind('\\');
 #endif
   // Handle the case with no '/' in 'path'.
   if (pos == StringPiece::npos)
@@ -112,7 +125,7 @@ StringPiece Extension(StringPiece path) {
 
 string CleanPath(StringPiece unclean_path) {
   string path = unclean_path.ToString();
-  const char *src = path.c_str();
+  const char* src = path.c_str();
   string::iterator dst = path.begin();
 
   // Check for absolute path and determine initial backtrack limit.
@@ -229,5 +242,52 @@ string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) {
   return strings::StrCat(scheme, "://", host, path);
 }
 
+// Returns a unique number every time it is called.
+int64 UniqueId() {
+  static mutex mu(LINKER_INITIALIZED);
+  static int64 id = 0;
+  mutex_lock l(mu);
+  return ++id;
+}
+
+string GetTempFilename(const string& extension) {
+#if defined(PLATFORM_WINDOWS) || defined(__ANDROID__)
+  LOG(FATAL) << "GetTempFilename is not implemented in this platform.";
+#else
+  for (const char* dir : std::vector<const char*>(
+           {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) {
+    if (!dir || !dir[0]) {
+      continue;
+    }
+    struct stat statbuf;
+    if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) {
+      // UniqueId is added here because mkstemps is not as thread safe as it
+      // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows
+      // the problem.
+      string tmp_filepath;
+      int fd;
+      if (extension.length()) {
+        tmp_filepath = io::JoinPath(
+            dir, strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.",
+                                 extension));
+        fd = mkstemps(&tmp_filepath[0], extension.length() + 1);
+      } else {
+        tmp_filepath = io::JoinPath(
+            dir,
+            strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX"));
+        fd = mkstemp(&tmp_filepath[0]);
+      }
+      if (fd < 0) {
+        LOG(FATAL) << "Failed to create temp file.";
+      } else {
+        close(fd);
+        return tmp_filepath;
+      }
+    }
+  }
+  LOG(FATAL) << "No temp directory found.";
+#endif
+}
+
 }  // namespace io
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h
index 8d02baa5bb..47bb2b998d 100644
--- a/tensorflow/core/lib/io/path.h
+++ b/tensorflow/core/lib/io/path.h
@@ -89,6 +89,9 @@ void ParseURI(tensorflow::StringPiece uri, tensorflow::StringPiece* scheme,
 string CreateURI(tensorflow::StringPiece scheme, tensorflow::StringPiece host,
                  tensorflow::StringPiece path);
 
+// Creates a temporary file name with an extension.
+string GetTempFilename(const string& extension);
+
 }  // namespace io
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index d7afd02df6..ceda11663a 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -2333,11 +2333,25 @@ REGISTER_OP("Cross")
     .Input("b: T")
     .Output("product: T")
     .Attr("T: realnumbertype")
-    // TODO(cwhipkey): implement these shape inference constraints here:
-    // * Both inputs have the same shape.
-    // * Input rank >= 1.
-    // * input_shape[-1] == 3.
-    .SetShapeFn(shape_inference::UnchangedShape)
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle a_shape;
+      ShapeHandle b_shape;
+      // * Input rank >= 1.
+      TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &a_shape));
+      TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &b_shape));
+
+      // * Both inputs have the same shape.
+      TF_RETURN_IF_ERROR(c->Merge(a_shape, b_shape, &a_shape));
+
+      // * input_shape[-1] == 3.
+      if (c->RankKnown(a_shape)) {
+        int rank = c->Rank(a_shape);
+        auto dim = c->Dim(a_shape, rank - 1);
+        TF_RETURN_IF_ERROR(c->WithValue(dim, 3, &dim));
+      }
+      c->set_output(0, a_shape);
+      return Status::OK();
+    })
     .Doc(R"doc(
 Compute the pairwise cross product.
 
diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc
index 28f9969de5..3dfa776d26 100644
--- a/tensorflow/core/ops/math_ops_test.cc
+++ b/tensorflow/core/ops/math_ops_test.cc
@@ -515,4 +515,15 @@ TEST(MathOpstest, RequantizationRange_ShapeFn) {
   INFER_ERROR("must be rank 0", op, "?;?;[2]");
 }
 
+TEST(MathOpsTest, Cross_ShapeFn) {
+  ShapeInferenceTestOp op("Cross");
+
+  INFER_ERROR("Shape must be at least rank 1 but is rank 0", op, "[];[]");
+  INFER_ERROR("Dimension 0 in both shapes must be equal, but", op, "[3];[5]");
+  INFER_ERROR("Dimension must be 3 but", op, "[3,5];[3,5]");
+
+  INFER_OK(op, "?;?", "?");
+  INFER_OK(op, "[?];[?]", "in0");
+  INFER_OK(op, "[1,?,3];[?,?,?]", "in0");
+}
 }  // end namespace tensorflow
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index c63fb28ff9..6e98f12114 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -10,7 +10,9 @@ def tf_sycl_tests_tags():
 
 def tf_additional_plugin_deps():
   return select({
-      "//tensorflow:with_xla_support": ["//tensorflow/compiler/jit"],
+      str(Label("//tensorflow:with_xla_support")): [
+          str(Label("//tensorflow/compiler/jit"))
+      ],
       "//conditions:default": [],
   })
 
@@ -19,37 +21,37 @@ def tf_additional_xla_deps_py():
 
 def tf_additional_license_deps():
   return select({
-      "//tensorflow:with_xla_support": ["@llvm//:LICENSE.TXT"],
+      str(Label("//tensorflow:with_xla_support")): ["@llvm//:LICENSE.TXT"],
       "//conditions:default": [],
   })
 
 def tf_additional_verbs_deps():
   return select({
-      "//tensorflow:with_verbs_support": [
-          "//tensorflow/contrib/verbs:verbs_server_lib",
-          "//tensorflow/contrib/verbs:grpc_verbs_client",
+      str(Label("//tensorflow:with_verbs_support")): [
+          str(Label("//tensorflow/contrib/verbs:verbs_server_lib")),
+          str(Label("//tensorflow/contrib/verbs:grpc_verbs_client")),
       ],
       "//conditions:default": [],
   })
 
 def tf_additional_mpi_deps():
   return select({
-      "//tensorflow:with_mpi_support": [
-          "//tensorflow/contrib/mpi:mpi_server_lib",
+      str(Label("//tensorflow:with_mpi_support")): [
+          str(Label("//tensorflow/contrib/mpi:mpi_server_lib")),
       ],
       "//conditions:default": [],
   })
 
 def tf_additional_gdr_deps():
   return select({
-      "//tensorflow:with_gdr_support": [
-          "//tensorflow/contrib/gdr:gdr_server_lib",
+      str(Label("//tensorflow:with_gdr_support")): [
+          str(Label("//tensorflow/contrib/gdr:gdr_server_lib")),
       ],
       "//conditions:default": [],
   })
 
 def if_static(extra_deps, otherwise=[]):
   return select({
-      "//tensorflow:framework_shared_object": otherwise,
+      str(Label("//tensorflow:framework_shared_object")): otherwise,
       "//conditions:default": extra_deps,
   })
diff --git a/tensorflow/core/util/ptr_util.h b/tensorflow/core/util/ptr_util.h
new file mode 100644
index 0000000000..f902b3ffa1
--- /dev/null
+++ b/tensorflow/core/util/ptr_util.h
@@ -0,0 +1,80 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_UTIL_PTR_UTIL_H_
+#define TENSORFLOW_CORE_UTIL_PTR_UTIL_H_
+
+// Utility functions for pointers.
+
+#include <stddef.h>
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+namespace tensorflow {
+
+namespace helper {
+
+// Trait to select overloads and return types for MakeUnique.
+template <typename T>
+struct MakeUniqueResult {
+  using scalar = std::unique_ptr<T>;
+};
+template <typename T>
+struct MakeUniqueResult<T[]> {
+  using array = std::unique_ptr<T[]>;
+};
+template <typename T, size_t N>
+struct MakeUniqueResult<T[N]> {
+  using invalid = void;
+};
+
+}  // namespace helper
+
+// Transfers ownership of a raw pointer to a std::unique_ptr of deduced type.
+// Example:
+//   X* NewX(int, int);
+//   auto x = WrapUnique(NewX(1, 2));  // 'x' is std::unique_ptr<X>.
+//
+// WrapUnique is useful for capturing the output of a raw pointer factory.
+// However, prefer 'MakeUnique<T>(args...) over 'WrapUnique(new T(args...))'.
+//   auto x = WrapUnique(new X(1, 2));  // works, but nonideal.
+//   auto x = MakeUnique<X>(1, 2);  // safer, standard, avoids raw 'new'.
+//
+// Note: Cannot wrap pointers to array of unknown bound (i.e. U(*)[]).
+template <typename T>
+std::unique_ptr<T> WrapUnique(T* ptr) {
+  static_assert(!std::is_array<T>::value || std::extent<T>::value != 0,
+                "types T[0] or T[] are unsupported");
+  return std::unique_ptr<T>(ptr);
+}
+
+template <typename T, typename... Args>
+typename helper::MakeUniqueResult<T>::scalar MakeUnique(Args&&... args) {
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+
+// Overload for array of unknown bound.
+// The allocation of arrays needs to use the array form of new,
+// and cannot take element constructor arguments.
+template <typename T>
+typename helper::MakeUniqueResult<T>::array MakeUnique(size_t n) {
+  return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
+}
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_UTIL_PTR_UTIL_H_
diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md
index 15d6d77f5e..c52279b212 100644
--- a/tensorflow/docs_src/extend/adding_an_op.md
+++ b/tensorflow/docs_src/extend/adding_an_op.md
@@ -341,9 +341,9 @@ Assuming you have `g++` installed, here is the sequence of commands you can use
 to compile your op into a dynamic library.
 
 ```bash
-TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
-TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
-g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC -I$TF_INC -I$TF_INC/external/nsync/public -L$TF_LIB -ltensorflow_framework -O2
+TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
+TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
+g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2
 ```
 
 On Mac OS X, the additional flag "-undefined dynamic_lookup" is required when
@@ -451,17 +451,17 @@ Now that you know how to build a basic (and somewhat restricted) op and
 implementation, we'll look at some of the more complicated things you will
 typically need to build into your op. This includes:
 
-*   [Conditional checks and validation](#conditional_checks_and_validation)
-*   [Op registration](#op_registration)
+*   [Conditional checks and validation](#conditional-checks-and-validation)
+*   [Op registration](#op-registration)
     *   [Attrs](#attrs)
-    *   [Attr types](#attr_types)
+    *   [Attr types](#attr-types)
     *   [Polymorphism](#polymorphism)
-    *   [Inputs and outputs](#inputs_and_outputs)
-    *   [Backwards compatibility](#backwards_compatibility)
-*   [GPU support](#gpu_support)
-    *   [Compiling the kernel for the GPU device](#compiling_the_kernel_for_the_gpu_device)
-*   [Implement the gradient in Python](#implement_the_gradient_in_python)
-*   [Shape functions in C++](#shape_functions_in_c)
+    *   [Inputs and outputs](#inputs-and-outputs)
+    *   [Backwards compatibility](#backwards-compatibility)
+*   [GPU support](#gpu-support)
+    *   [Compiling the kernel for the GPU device](#compiling-the-kernel-for-the-gpu-device)
+*   [Implement the gradient in Python](#implement-the-gradient-in-python)
+*   [Shape functions in C++](#shape-functions-in-c)
 
 ### Conditional checks and validation
 
@@ -1228,10 +1228,10 @@ into a single dynamically loadable library:
 
 ```bash
 nvcc -std=c++11 -c -o cuda_op_kernel.cu.o cuda_op_kernel.cu.cc \
--I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
+  ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
 
 g++ -std=c++11 -shared -o cuda_op_kernel.so cuda_op_kernel.cc \
-cuda_op_kernel.cu.o -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB -ltensorflow_framework
+  cuda_op_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]}
 ```
 
 `cuda_op_kernel.so` produced above can be loaded as usual in Python, using the
diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md
index 0db5c6143a..f0dcdc47ff 100644
--- a/tensorflow/docs_src/get_started/input_fn.md
+++ b/tensorflow/docs_src/get_started/input_fn.md
@@ -211,8 +211,8 @@ def get_input_fn_from_numpy(data_set, num_epochs=None, shuffle=True):
 ### A Neural Network Model for Boston House Values
 
 In the remainder of this tutorial, you'll write an input function for
-preprocessing a subset of Boston housing data pulled from the [UCI Housing Data
-Set](https://archive.ics.uci.edu/ml/datasets/Housing) and use it to feed data to
+preprocessing a subset of Boston housing data pulled from the UCI Housing Data
+Set and use it to feed data to
 a neural network regressor for predicting median house values.
 
 The [Boston CSV data sets](#setup) you'll use to train your neural network
diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md
index 63742828b0..8d0eb7966f 100644
--- a/tensorflow/docs_src/install/install_windows.md
+++ b/tensorflow/docs_src/install/install_windows.md
@@ -84,7 +84,7 @@ install it now:
   * [Python 3.5.x 64-bit from python.org](https://www.python.org/downloads/release/python-352/)
   * [Python 3.6.x 64-bit from python.org](https://www.python.org/downloads/release/python-362/)
 
--TensorFlow supports Python 3.5.x and 3.6.x on Windows.
+TensorFlow supports Python 3.5.x and 3.6.x on Windows.
 Note that Python 3 comes with the pip3 package manager, which is the
 program you'll use to install TensorFlow.
 
@@ -98,7 +98,6 @@ To install the GPU version of TensorFlow, enter the following command:
 
 <pre>C:\> <b>pip3 install --upgrade tensorflow-gpu</b></pre>
 
-
 ## Installing with Anaconda
 
 **The Anaconda installation is community supported, not officially supported.**
@@ -219,6 +218,11 @@ ImportError: cannot import name 'descriptor'</pre>
   </td>
 </tr>
 
+<tr>
+  <td><a href="https://stackoverflow.com/q/38896424">38896424</a></td>
+  <td>
+  <pre>Could not find a version that satisfies the requirement tensorflow</pre>
+  </td>
+</tr>
 
 </table>
-
diff --git a/tensorflow/docs_src/mobile/ios_build.md b/tensorflow/docs_src/mobile/ios_build.md
index a04655052f..4c84a1214a 100644
--- a/tensorflow/docs_src/mobile/ios_build.md
+++ b/tensorflow/docs_src/mobile/ios_build.md
@@ -98,7 +98,7 @@ There are three demo applications for iOS, all defined in Xcode projects inside
 
 ## Building the TensorFlow iOS libraries from source
 
-While Cocapods is the quickest and easiest way of getting started, you sometimes
+While Cocoapods is the quickest and easiest way of getting started, you sometimes
 need more flexibility to determine which parts of TensorFlow your app should be
 shipped with. For such cases, you can build the iOS libraries from the
 sources. [This
diff --git a/tensorflow/docs_src/mobile/mobile_intro.md b/tensorflow/docs_src/mobile/mobile_intro.md
index 3a002c4da2..17dbf1c3e6 100644
--- a/tensorflow/docs_src/mobile/mobile_intro.md
+++ b/tensorflow/docs_src/mobile/mobile_intro.md
@@ -156,7 +156,7 @@ easy cases on device.
 Doing on-device computation can also signal when it's time to switch to working
 on the cloud. A good example of this is hotword detection in speech. Since
 devices are able to constantly listen out for the keywords, this then triggers a
-lot of traffic to cloud-based speech recognition once one is recognised. Without
+lot of traffic to cloud-based speech recognition once one is recognized. Without
 the on-device component, the whole application wouldn’t be feasible, and this
 pattern exists across several other applications as well. Recognizing that some
 sensor input is interesting enough for further processing makes a lot of
diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md
index d9e8875c38..44cacff5db 100644
--- a/tensorflow/docs_src/mobile/optimizing.md
+++ b/tensorflow/docs_src/mobile/optimizing.md
@@ -115,7 +115,7 @@ If you look at the resulting file size, you should see that it’s about a quart
 of the original at 23MB.
 
 Another transform is `round_weights`, which doesn't make the file smaller, but it
-makes the file compressable to about the same size as when `quantize_weights` is
+makes the file compressible to about the same size as when `quantize_weights` is
 used. This is particularly useful for mobile development, taking advantage of
 the fact that app bundles are compressed before they’re downloaded by consumers.
 
diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
index 4333f94486..a49973d550 100644
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -776,7 +776,7 @@ The output type is a tuple of three ComputationDataHandles:
 | `batch_var`  | `ComputationDataHandle` | 1 dimensional array (\\(\sigma^2\\)) |
 
 The `batch_mean` and `batch_var` are moments calculated across the batch and
-spatial dimensions using the formulars above.
+spatial dimensions using the formulas above.
 
 ## BatchNormInference
 
diff --git a/tensorflow/docs_src/tutorials/image_recognition.md b/tensorflow/docs_src/tutorials/image_recognition.md
index df13eabead..32257f87d6 100644
--- a/tensorflow/docs_src/tutorials/image_recognition.md
+++ b/tensorflow/docs_src/tutorials/image_recognition.md
@@ -5,7 +5,7 @@ tell apart a lion and a jaguar, read a sign, or recognize a human's face.
 But these are actually hard problems to solve with a computer: they only
 seem easy because our brains are incredibly good at understanding images.
 
-In the last few years the field of machine learning has made tremendous
+In the last few years, the field of machine learning has made tremendous
 progress on addressing these difficult problems. In particular, we've
 found that a kind of model called a deep
 [convolutional neural network](https://colah.github.io/posts/2014-07-Conv-Nets-Modular/)
diff --git a/tensorflow/examples/how_tos/reading_data/convert_to_records.py b/tensorflow/examples/how_tos/reading_data/convert_to_records.py
index d14c1f7c86..c89e839563 100644
--- a/tensorflow/examples/how_tos/reading_data/convert_to_records.py
+++ b/tensorflow/examples/how_tos/reading_data/convert_to_records.py
@@ -52,17 +52,19 @@ def convert_to(data_set, name):
 
   filename = os.path.join(FLAGS.directory, name + '.tfrecords')
   print('Writing', filename)
-  writer = tf.python_io.TFRecordWriter(filename)
-  for index in range(num_examples):
-    image_raw = images[index].tostring()
-    example = tf.train.Example(features=tf.train.Features(feature={
-        'height': _int64_feature(rows),
-        'width': _int64_feature(cols),
-        'depth': _int64_feature(depth),
-        'label': _int64_feature(int(labels[index])),
-        'image_raw': _bytes_feature(image_raw)}))
-    writer.write(example.SerializeToString())
-  writer.close()
+  with tf.python_io.TFRecordWriter(filename) as writer:
+    for index in range(num_examples):
+      image_raw = images[index].tostring()
+      example = tf.train.Example(
+          features=tf.train.Features(
+              feature={
+                  'height': _int64_feature(rows),
+                  'width': _int64_feature(cols),
+                  'depth': _int64_feature(depth),
+                  'label': _int64_feature(int(labels[index])),
+                  'image_raw': _bytes_feature(image_raw)
+              }))
+      writer.write(example.SerializeToString())
 
 
 def main(unused_argv):
diff --git a/tensorflow/examples/speech_commands/input_data.py b/tensorflow/examples/speech_commands/input_data.py
index 6d75fbb92b..751652b330 100644
--- a/tensorflow/examples/speech_commands/input_data.py
+++ b/tensorflow/examples/speech_commands/input_data.py
@@ -240,7 +240,8 @@ class AudioProcessor(object):
     # Look through all the subfolders to find audio samples
     search_path = os.path.join(self.data_dir, '*', '*.wav')
     for wav_path in gfile.Glob(search_path):
-      word = re.search('.*/([^/]+)/.*.wav', wav_path).group(1).lower()
+      _, word = os.path.split(os.path.dirname(wav_path))
+      word = word.lower()
       # Treat the '_background_noise_' folder as a special case, since we expect
       # it to contain long audio samples we mix in to improve training.
       if word == BACKGROUND_NOISE_DIR_NAME:
diff --git a/tensorflow/examples/speech_commands/train.py b/tensorflow/examples/speech_commands/train.py
index a54bcbdb32..f5bf04305a 100644
--- a/tensorflow/examples/speech_commands/train.py
+++ b/tensorflow/examples/speech_commands/train.py
@@ -156,7 +156,8 @@ def main(_):
   predicted_indices = tf.argmax(logits, 1)
   expected_indices = tf.argmax(ground_truth_input, 1)
   correct_prediction = tf.equal(predicted_indices, expected_indices)
-  confusion_matrix = tf.confusion_matrix(expected_indices, predicted_indices)
+  confusion_matrix = tf.confusion_matrix(
+      expected_indices, predicted_indices, num_classes=label_count)
   evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   tf.summary.scalar('accuracy', evaluation_step)
 
diff --git a/tensorflow/examples/udacity/1_notmnist.ipynb b/tensorflow/examples/udacity/1_notmnist.ipynb
index 39674e1aa4..dffe5d37c6 100644
--- a/tensorflow/examples/udacity/1_notmnist.ipynb
+++ b/tensorflow/examples/udacity/1_notmnist.ipynb
@@ -46,13 +46,13 @@
         "# These are all the modules we'll be using later. Make sure you can import them\n",
         "# before proceeding further.\n",
         "from __future__ import print_function\n",
+        "import imageio\n",
         "import matplotlib.pyplot as plt\n",
         "import numpy as np\n",
         "import os\n",
         "import sys\n",
         "import tarfile\n",
         "from IPython.display import display, Image\n",
-        "from scipy import ndimage\n",
         "from sklearn.linear_model import LogisticRegression\n",
         "from six.moves.urllib.request import urlretrieve\n",
         "from six.moves import cPickle as pickle\n",
@@ -325,13 +325,13 @@
         "  for image in image_files:\n",
         "    image_file = os.path.join(folder, image)\n",
         "    try:\n",
-        "      image_data = (ndimage.imread(image_file).astype(float) - \n",
+        "      image_data = (imageio.imread(image_file).astype(float) - \n",
         "                    pixel_depth / 2) / pixel_depth\n",
         "      if image_data.shape != (image_size, image_size):\n",
         "        raise Exception('Unexpected image shape: %s' % str(image_data.shape))\n",
         "      dataset[num_images, :, :] = image_data\n",
         "      num_images = num_images + 1\n",
-        "    except IOError as e:\n",
+        "    except (IOError, ValueError) as e:\n",
         "      print('Could not read:', image_file, ':', e, '- it\\'s ok, skipping.')\n",
         "    \n",
         "  dataset = dataset[0:num_images, :, :]\n",
diff --git a/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java b/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java
index 9a1b7592b3..a24150484e 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java
@@ -265,6 +265,36 @@ public final class OperationBuilder {
     return this;
   }
 
+  public OperationBuilder setAttr(String name, Shape[] value) {
+    int[] numDimensions = new int[value.length];
+    int totalNumDimensions = 0;
+    for (int idx = 0; idx < value.length; ++idx) {
+      int n = value[idx].numDimensions();
+      numDimensions[idx] = n;
+      if (n > 0) {
+        totalNumDimensions += n;
+      }
+    }
+    // Flatten the shapes into a single array to avoid too much overhead in the
+    // native part
+    long[] shapes = new long[totalNumDimensions];
+    int shapeIdx = 0;
+    for (Shape shape : value) {
+      if (shape.numDimensions() > 0) {
+        for (long dim : shape.asArray()) {
+          shapes[shapeIdx++] = dim;
+        }
+      }
+    }
+    Graph.Reference r = graph.ref();
+    try {
+      setAttrShapeList(unsafeNativeHandle, name, shapes, numDimensions);
+    } finally {
+      r.close();
+    }
+    return this;
+  }
+
   public OperationBuilder setAttr(String name, String[] value) {
     Charset utf8 = Charset.forName("UTF-8");
     Object[] objects = new Object[value.length];
@@ -297,8 +327,6 @@ public final class OperationBuilder {
 
   // The names of all the setAttr* family functions below correspond to the C library types, not the
   // Java library types. Roughly, setAttrFoo calls the TensorFlow C library function: TF_SetAttrFoo.
-  // TODO(ashankar):
-  // - setAttrShapeList: Which would take in a long[][]
 
   private static native void setAttrString(long handle, String name, byte[] value);
 
@@ -324,5 +352,8 @@ public final class OperationBuilder {
 
   private static native void setAttrShape(long handle, String name, long[] shape, int numDims);
 
+  private static native void setAttrShapeList(
+      long handle, String name, long[] shapes, int[] numDims);
+
   private static native void setAttrStringList(long handle, String name, Object[] value);
 }
diff --git a/tensorflow/java/src/main/native/operation_builder_jni.cc b/tensorflow/java/src/main/native/operation_builder_jni.cc
index e03be7b110..55d214a7c4 100644
--- a/tensorflow/java/src/main/native/operation_builder_jni.cc
+++ b/tensorflow/java/src/main/native/operation_builder_jni.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/java/src/main/native/operation_builder_jni.h"
 
+#include <cstring>
 #include <memory>
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/java/src/main/native/exception_jni.h"
@@ -262,6 +263,41 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShape(
   env->ReleaseStringUTFChars(name, cname);
 }
 
+JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShapeList(
+    JNIEnv* env, jclass clazz, jlong handle, jstring name, jlongArray shapes,
+    jintArray num_dims) {
+  TF_OperationDescription* d = requireHandle(env, handle);
+  if (d == nullptr) return;
+  std::unique_ptr<int64_t[]> cshapes;
+  std::unique_ptr<int64_t* []> cdims;
+  std::unique_ptr<int[]> cnum_dims;
+  const int num_dims_length = env->GetArrayLength(num_dims);
+  if (num_dims_length > 0) {
+    const int shapes_length = env->GetArrayLength(shapes);
+    cshapes.reset(new int64_t[shapes_length]);
+    cdims.reset(new int64_t*[num_dims_length]);
+    cnum_dims.reset(new int[num_dims_length]);
+    jlong* shapes_elems =
+        static_cast<jlong*>(env->GetPrimitiveArrayCritical(shapes, nullptr));
+    std::memcpy(cshapes.get(), shapes_elems, shapes_length << 3);
+    env->ReleasePrimitiveArrayCritical(shapes, shapes_elems, JNI_ABORT);
+    int64_t* cshapes_ptr = cshapes.get();
+    jint* num_dims_elems =
+        static_cast<jint*>(env->GetPrimitiveArrayCritical(num_dims, nullptr));
+    for (int i = 0; i < num_dims_length; ++i) {
+      cnum_dims[i] = static_cast<int>(num_dims_elems[i]);
+      cdims[i] = cshapes_ptr;
+      if (cnum_dims[i] > 0) {
+        cshapes_ptr += cnum_dims[i];
+      }
+    }
+    env->ReleasePrimitiveArrayCritical(num_dims, num_dims_elems, JNI_ABORT);
+  }
+  const char* cname = env->GetStringUTFChars(name, nullptr);
+  TF_SetAttrShapeList(d, cname, cdims.get(), cnum_dims.get(), num_dims_length);
+  env->ReleaseStringUTFChars(name, cname);
+}
+
 JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrStringList(
     JNIEnv* env, jclass object, jlong handle, jstring name,
     jobjectArray values) {
diff --git a/tensorflow/java/src/main/native/operation_builder_jni.h b/tensorflow/java/src/main/native/operation_builder_jni.h
index 2e72bd68da..cf0abe4829 100644
--- a/tensorflow/java/src/main/native/operation_builder_jni.h
+++ b/tensorflow/java/src/main/native/operation_builder_jni.h
@@ -169,6 +169,14 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrTensorList(
 JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShape(
     JNIEnv *, jclass, jlong, jstring, jlongArray, jint);
 
+/*
+ * Class:     org_tensorflow_OperationBuilder
+ * Method:    setAttrShapeList
+ * Signature: (JLjava/lang/String;[J[I)V
+ */
+JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShapeList(
+    JNIEnv *, jclass, jlong, jstring, jlongArray, jintArray);
+
 /*
  * Class:     org_tensorflow_OperationBuilder
  * Method:    setAttrStringList
diff --git a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java
index 6dc233987b..0a4a8cf4e3 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java
@@ -148,6 +148,19 @@ public class OperationBuilderTest {
     }
   }
 
+  @Test
+  public void setAttrShapeList() {
+    // Those shapes match tensors ones, so no exception is thrown
+    testSetAttrShapeList(new Shape[] {Shape.make(2, 2), Shape.make(2, 2, 2)});
+    try {
+      // Those shapes do not match tensors ones, exception is thrown
+      testSetAttrShapeList(new Shape[] {Shape.make(2, 2), Shape.make(2, 2, 2, 2)});
+      fail("Shapes are incompatible and an exception was expected");
+    } catch (IllegalArgumentException e) {
+      // expected
+    }
+  }
+
   @Test
   public void addControlInput() {
     try (Graph g = new Graph();
@@ -175,6 +188,30 @@ public class OperationBuilderTest {
     }
   }
 
+  private static void testSetAttrShapeList(Shape[] shapes) {
+    try (Graph g = new Graph();
+        Session s = new Session(g)) {
+      int[][] matrix = new int[][] {{0, 0}, {0, 0}};
+      Output<?> queue =
+          g.opBuilder("FIFOQueue", "queue")
+              .setAttr("component_types", new DataType[] {DataType.INT32, DataType.INT32})
+              .setAttr("shapes", shapes)
+              .build()
+              .output(0);
+      assertTrue(hasNode(g, "queue"));
+      Output<Integer> c1 = TestUtil.constant(g, "const1", matrix);
+      Output<Integer> c2 = TestUtil.constant(g, "const2", new int[][][] {matrix, matrix});
+      Operation enqueue =
+          g.opBuilder("QueueEnqueue", "enqueue")
+              .addInput(queue)
+              .addInputList(new Output<?>[] {c1, c2})
+              .build();
+      assertTrue(hasNode(g, "enqueue"));
+
+      s.runner().addTarget(enqueue).run();
+    }
+  }
+
   private static boolean hasNode(Graph g, String name) {
     return g.operation(name) != null;
   }
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 5e7a6c0b59..12d81c4383 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3341,6 +3341,7 @@ py_test(
     tags = [
         "no_gpu",
         "no_oss",
+        "no_pip",
         "no_pip_gpu",
         "notap",
     ],
@@ -3387,6 +3388,7 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "no_gpu",
+        "no_windows",
     ],
     deps = [
         ":array_ops",
diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py
index 2f89c006d2..2455395635 100644
--- a/tensorflow/python/data/util/nest.py
+++ b/tensorflow/python/data/util/nest.py
@@ -376,6 +376,16 @@ def assert_shallow_structure(shallow_tree, input_tree, check_types=True):
           "structure has length %s, while shallow structure has length %s."
           % (len(input_tree), len(shallow_tree)))
 
+    if check_types and isinstance(shallow_tree, dict):
+      if set(input_tree) != set(shallow_tree):
+        raise ValueError(
+            "The two structures don't have the same keys. Input "
+            "structure has keys %s, while shallow structure has keys %s." %
+            (list(_six.iterkeys(input_tree)),
+             list(_six.iterkeys(shallow_tree))))
+      input_tree = list(_six.iteritems(input_tree))
+      shallow_tree = list(_six.iteritems(shallow_tree))
+
     for shallow_branch, input_branch in zip(shallow_tree, input_tree):
       assert_shallow_structure(shallow_branch, input_branch,
                                check_types=check_types)
diff --git a/tensorflow/python/data/util/nest_test.py b/tensorflow/python/data/util/nest_test.py
index 0bd0a5f443..90dd7dfe77 100644
--- a/tensorflow/python/data/util/nest_test.py
+++ b/tensorflow/python/data/util/nest_test.py
@@ -268,6 +268,15 @@ class NestTest(test.TestCase):
       nest.assert_shallow_structure(inp_ab2, inp_ab1)
     nest.assert_shallow_structure(inp_ab2, inp_ab1, check_types=False)
 
+    inp_ab1 = {"a": (1, 1), "b": {"c": (2, 2)}}
+    inp_ab2 = {"a": (1, 1), "b": {"d": (2, 2)}}
+    expected_message = (
+        r"The two structures don't have the same keys. Input "
+        r"structure has keys \['c'\], while shallow structure has "
+        r"keys \['d'\].")
+    with self.assertRaisesRegexp(ValueError, expected_message):
+      nest.assert_shallow_structure(inp_ab2, inp_ab1)
+
   def testFlattenUpTo(self):
     input_tree = (((2, 2), (3, 3)), ((4, 9), (5, 5)))
     shallow_tree = ((True, True), (False, True))
diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py
index 31e9933c6f..3b295a7e35 100644
--- a/tensorflow/python/estimator/export/export.py
+++ b/tensorflow/python/estimator/export/export.py
@@ -57,7 +57,7 @@ class ServingInputReceiver(collections.namedtuple(
       groups of receiver tensors, each of which may be a `Tensor` or a dict of
       string to `Tensor`.  These named receiver tensor alternatives generate
       additional serving signatures, which may be used to feed inputs at
-      different points within the input reciever subgraph.  A typical usage is
+      different points within the input receiver subgraph.  A typical usage is
       to allow feeding raw feature `Tensor`s *downstream* of the
       tf.parse_example() op.  Defaults to None.
   """
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index 1862e325e2..17d018aa88 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -1016,7 +1016,7 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
                is_the_final_export):
       del export_path, checkpoint_path, eval_result
       estimator.times_export_was_called += 1
-      # final_export is happend at the end.
+      # final_export is happened at the end.
       self.assertEqual(0, estimator.times_final_export_was_called)
       if is_the_final_export:
         estimator.times_final_export_was_called += 1
@@ -1361,7 +1361,7 @@ class TrainingExecutorRunLocalTest(test.TestCase):
                is_the_final_export):
       del export_path, checkpoint_path, eval_result
       estimator.times_export_was_called += 1
-      # final_export is happend at the end.
+      # final_export is happened at the end.
       self.assertEqual(0, estimator.times_final_export_was_called)
       if is_the_final_export:
         estimator.times_final_export_was_called += 1
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index e4992afbca..d9391dd6c5 100644
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -556,6 +556,7 @@ py_test(
     srcs = ["_impl/keras/utils/data_utils_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_windows",
         "noasan",  # times out
         "notsan",
     ],
diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py
index b029e5161f..ec7a5dcffd 100644
--- a/tensorflow/python/keras/_impl/keras/backend.py
+++ b/tensorflow/python/keras/_impl/keras/backend.py
@@ -2487,7 +2487,7 @@ class Function(object):
   """Runs a computation graph.
 
   It's possible to pass arguments to `tf.Session.run()` via `session_kwargs`.
-  In particular additonal operations via `fetches` argument and additional
+  In particular additional operations via `fetches` argument and additional
   tensor substitutions via `feed_dict` arguments. Note that given
   substitutions are merged with substitutions from `inputs`. Even though
   `feed_dict` is passed once in the constructor (called in `model.compile()`)
diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py
index 40a996a03f..16109b52b3 100644
--- a/tensorflow/python/keras/_impl/keras/callbacks.py
+++ b/tensorflow/python/keras/_impl/keras/callbacks.py
@@ -768,7 +768,7 @@ class TensorBoard(Callback):
       self.writer.add_summary(summary, epoch)
     self.writer.flush()
 
-  def on_train_end(self, _):
+  def on_train_end(self, logs=None):
     self.writer.close()
 
 
diff --git a/tensorflow/python/keras/_impl/keras/callbacks_test.py b/tensorflow/python/keras/_impl/keras/callbacks_test.py
index 97a650a992..79dfcd1bb6 100644
--- a/tensorflow/python/keras/_impl/keras/callbacks_test.py
+++ b/tensorflow/python/keras/_impl/keras/callbacks_test.py
@@ -19,16 +19,18 @@ from __future__ import division
 from __future__ import print_function
 
 import csv
-import multiprocessing
 import os
 import re
 import shutil
+import threading
+import unittest
 
 import numpy as np
 
 from tensorflow.python.keras._impl import keras
 from tensorflow.python.keras._impl.keras import testing_utils
 from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
 
 try:
   import h5py  # pylint:disable=g-import-not-at-top
@@ -498,7 +500,10 @@ class KerasCallbacksTest(test.TestCase):
       values = []
       with open(fp) as f:
         for x in csv.reader(f):
-          values.append(x)
+          # In windows, due to \r\n line ends we may end up reading empty lines
+          # after each line. Skip empty lines.
+          if x:
+            values.append(x)
       assert 'nan' in values[-1], 'The last epoch was not logged.'
 
   def test_TerminateOnNaN(self):
@@ -678,23 +683,41 @@ class KerasCallbacksTest(test.TestCase):
             batch_size=5)]
 
       # fit w/o validation data should raise ValueError if histogram_freq > 0
+      cbs = callbacks_factory(histogram_freq=1)
       with self.assertRaises(ValueError):
-        model.fit(x_train, y_train, batch_size=BATCH_SIZE,
-                  callbacks=callbacks_factory(histogram_freq=1), epochs=3)
+        model.fit(
+            x_train, y_train, batch_size=BATCH_SIZE, callbacks=cbs, epochs=3)
+
+      for cb in cbs:
+        cb.on_train_end()
 
       # fit generator without validation data should raise ValueError if
       # histogram_freq > 0
+      cbs = callbacks_factory(histogram_freq=1)
       with self.assertRaises(ValueError):
-        model.fit_generator(data_generator(True), len(x_train), epochs=2,
-                            callbacks=callbacks_factory(histogram_freq=1))
+        model.fit_generator(
+            data_generator(True), len(x_train), epochs=2, callbacks=cbs)
+
+      for cb in cbs:
+        cb.on_train_end()
 
       # fit generator with validation data generator should raise ValueError if
       # histogram_freq > 0
+      cbs = callbacks_factory(histogram_freq=1)
       with self.assertRaises(ValueError):
-        model.fit_generator(data_generator(True), len(x_train), epochs=2,
-                            validation_data=data_generator(False),
-                            validation_steps=1,
-                            callbacks=callbacks_factory(histogram_freq=1))
+        model.fit_generator(
+            data_generator(True),
+            len(x_train),
+            epochs=2,
+            validation_data=data_generator(False),
+            validation_steps=1,
+            callbacks=cbs)
+
+      for cb in cbs:
+        cb.on_train_end()
+
+      # Make sure file writer cache is clear to avoid failures during cleanup.
+      writer_cache.FileWriterCache.clear()
 
   def test_TensorBoard_multi_input_output(self):
     np.random.seed(1337)
@@ -767,6 +790,9 @@ class KerasCallbacksTest(test.TestCase):
                           callbacks=callbacks_factory(histogram_freq=1))
       assert os.path.isdir(filepath)
 
+  @unittest.skipIf(
+      os.name == 'nt',
+      'use_multiprocessing=True does not work on windows properly.')
   def test_LambdaCallback(self):
     with self.test_session():
       np.random.seed(1337)
@@ -789,14 +815,15 @@ class KerasCallbacksTest(test.TestCase):
 
       # Start an arbitrary process that should run during model
       # training and be terminated after training has completed.
+      e = threading.Event()
+
       def target():
-        while True:
-          pass
+        e.wait()
 
-      p = multiprocessing.Process(target=target)
-      p.start()
+      t = threading.Thread(target=target)
+      t.start()
       cleanup_callback = keras.callbacks.LambdaCallback(
-          on_train_end=lambda logs: p.terminate())
+          on_train_end=lambda logs: e.set())
 
       cbks = [cleanup_callback]
       model.fit(
@@ -807,8 +834,8 @@ class KerasCallbacksTest(test.TestCase):
           callbacks=cbks,
           epochs=5,
           verbose=0)
-      p.join()
-      assert not p.is_alive()
+      t.join()
+      assert not t.is_alive()
 
   def test_TensorBoard_with_ReduceLROnPlateau(self):
     with self.test_session():
diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py
index e2a06e8e77..17a26f978e 100644
--- a/tensorflow/python/keras/_impl/keras/engine/training_test.py
+++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py
@@ -18,6 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+import unittest
+
 import numpy as np
 
 from tensorflow.python.keras._impl import keras
@@ -783,6 +786,9 @@ class TestDynamicTrainability(test.TestCase):
 
 class TestGeneratorMethods(test.TestCase):
 
+  @unittest.skipIf(
+      os.name == 'nt',
+      'use_multiprocessing=True does not work on windows properly.')
   def test_generator_methods(self):
     arr_data = np.random.random((50, 2))
     arr_labels = np.random.random((50,))
diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py
index 1144aa3152..a7ea3b48a3 100644
--- a/tensorflow/python/keras/_impl/keras/estimator_test.py
+++ b/tensorflow/python/keras/_impl/keras/estimator_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.keras._impl.keras import testing_utils
 from tensorflow.python.keras._impl.keras.applications import mobilenet
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
 
 
 try:
@@ -132,6 +133,8 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
         tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir)
 
   def tearDown(self):
+    # Make sure nothing is stuck in limbo.
+    writer_cache.FileWriterCache.clear()
     if os.path.isdir(self._base_dir):
       gfile.DeleteRecursively(self._base_dir)
 
@@ -153,6 +156,8 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
         est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
         after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
         self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
+
+      writer_cache.FileWriterCache.clear()
       gfile.DeleteRecursively(self._config.model_dir)
 
   def test_evaluate(self):
diff --git a/tensorflow/python/keras/_impl/keras/models_test.py b/tensorflow/python/keras/_impl/keras/models_test.py
index 86acac4604..61938066b9 100644
--- a/tensorflow/python/keras/_impl/keras/models_test.py
+++ b/tensorflow/python/keras/_impl/keras/models_test.py
@@ -54,10 +54,11 @@ class TestModelSaving(test.TestCase):
       model.train_on_batch(x, y)
 
       out = model.predict(x)
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
 
       new_model = keras.models.load_model(fname)
+      os.close(fd)
       os.remove(fname)
 
       out2 = new_model.predict(x)
@@ -95,13 +96,14 @@ class TestModelSaving(test.TestCase):
       model.train_on_batch(x, y)
 
       out = model.predict(x)
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
 
       model = keras.models.load_model(
           fname,
           custom_objects={'CustomOp': CustomOp,
                           'custom_loss': custom_loss})
+      os.close(fd)
       os.remove(fname)
 
       out2 = model.predict(x)
@@ -125,10 +127,11 @@ class TestModelSaving(test.TestCase):
       model.train_on_batch(x, y)
 
       out = model.predict(x)
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
 
       model = keras.models.load_model(fname)
+      os.close(fd)
       os.remove(fname)
 
       out2 = model.predict(x)
@@ -144,9 +147,10 @@ class TestModelSaving(test.TestCase):
       model.add(keras.layers.Dense(3))
       model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
 
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
       model = keras.models.load_model(fname)
+      os.close(fd)
       os.remove(fname)
 
   def test_saving_with_tf_optimizer(self):
@@ -161,9 +165,10 @@ class TestModelSaving(test.TestCase):
                     optimizer=training_module.AdadeltaOptimizer(0.1),
                     metrics=['acc'])
 
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
       model = keras.models.load_model(fname)
+      os.close(fd)
       os.remove(fname)
 
   def test_saving_right_after_compilation(self):
@@ -177,9 +182,10 @@ class TestModelSaving(test.TestCase):
       model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
       model.model._make_train_function()
 
-      _, fname = tempfile.mkstemp('.h5')
+      fd, fname = tempfile.mkstemp('.h5')
       keras.models.save_model(model, fname)
       model = keras.models.load_model(fname)
+      os.close(fd)
       os.remove(fname)
 
   def test_saving_lambda_numpy_array_arguments(self):
@@ -194,10 +200,11 @@ class TestModelSaving(test.TestCase):
     model = keras.models.Model(inputs, output)
     model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
 
-    _, fname = tempfile.mkstemp('.h5')
+    fd, fname = tempfile.mkstemp('.h5')
     keras.models.save_model(model, fname)
 
     model = keras.models.load_model(fname)
+    os.close(fd)
     os.remove(fname)
 
     self.assertAllClose(mean, model.layers[1].arguments['mu'])
diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
index 14b2f08442..47c5b4cff0 100644
--- a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
+++ b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
@@ -22,6 +22,7 @@ from itertools import cycle
 import os
 import tarfile
 import threading
+import unittest
 import zipfile
 
 import numpy as np
@@ -164,6 +165,9 @@ class TestEnqueuers(test.TestCase):
     self.assertEqual(len(set(acc) - set(range(100))), 0)
     enqueuer.stop()
 
+  @unittest.skipIf(
+      os.name == 'nt',
+      'use_multiprocessing=True does not work on windows properly.')
   def test_generator_enqueuer_processes(self):
     enqueuer = keras.utils.data_utils.GeneratorEnqueuer(
         create_generator_from_sequence_pcs(TestSequence([3, 200, 200, 3])),
@@ -185,6 +189,9 @@ class TestEnqueuers(test.TestCase):
     with self.assertRaises(StopIteration):
       next(gen_output)
 
+  @unittest.skipIf(
+      os.name == 'nt',
+      'use_multiprocessing=True does not work on windows properly.')
   def test_generator_enqueuer_fail_processes(self):
     enqueuer = keras.utils.data_utils.GeneratorEnqueuer(
         create_generator_from_sequence_pcs(FaultSequence()),
diff --git a/tensorflow/python/keras/_impl/keras/utils/io_utils.py b/tensorflow/python/keras/_impl/keras/utils/io_utils.py
index 1c8299c27d..a8fc18c17a 100644
--- a/tensorflow/python/keras/_impl/keras/utils/io_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/io_utils.py
@@ -63,11 +63,11 @@ class HDF5Matrix(object):
                         'HDF5 and h5py installed.')
 
     if datapath not in list(self.refs.keys()):
-      f = h5py.File(datapath)
-      self.refs[datapath] = f
+      self._f = h5py.File(datapath)
+      self.refs[datapath] = self._f
     else:
-      f = self.refs[datapath]
-    self.data = f[dataset]
+      self._f = self.refs[datapath]
+    self.data = self._f[dataset]
     self.start = start
     if end is None:
       self.end = self.data.shape[0]
@@ -78,6 +78,9 @@ class HDF5Matrix(object):
   def __len__(self):
     return self.end - self.start
 
+  def __del__(self):
+    self._f.close()
+
   def __getitem__(self, key):
     if isinstance(key, slice):
       start, stop = key.start, key.stop
diff --git a/tensorflow/python/keras/_impl/keras/utils/vis_utils.py b/tensorflow/python/keras/_impl/keras/utils/vis_utils.py
index ce2faf2d96..d56c4484ce 100644
--- a/tensorflow/python/keras/_impl/keras/utils/vis_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/vis_utils.py
@@ -120,7 +120,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'):
     layer_id = str(id(layer))
     for i, node in enumerate(layer._inbound_nodes):  # pylint: disable=protected-access
       node_key = layer.name + '_ib-' + str(i)
-      if node_key in model.container_nodes:
+      if node_key in model._network_nodes:  # pylint: disable=protected-access
         for inbound_layer in node.inbound_layers:
           inbound_layer_id = str(id(inbound_layer))
           layer_id = str(id(layer))
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index f15b3baabe..f6721de32a 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2362,7 +2362,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "slice_op_test",
-    size = "medium",
+    size = "large",
     srcs = ["slice_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -2942,6 +2942,20 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "prefetch_dataset_op_test",
+    size = "small",
+    srcs = ["prefetch_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
 tf_py_test(
     name = "range_dataset_op_test",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/decode_bmp_op_test.py b/tensorflow/python/kernel_tests/decode_bmp_op_test.py
index 783492a6f2..35f8f76991 100644
--- a/tensorflow/python/kernel_tests/decode_bmp_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_bmp_op_test.py
@@ -64,6 +64,81 @@ class DecodeBmpOpTest(test.TestCase):
       decoded = decode.eval()
       self.assertAllEqual(decoded, img_bytes)
 
+  def testGrayscale(self):
+    img_bytes = [[[255], [0]], [[255], [0]]]
+    encoded_bytes = [
+        0x42,
+        0x40,
+        0x3d,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0x36,
+        0,
+        0,
+        0,
+        0x28,
+        0,
+        0,
+        0,
+        0x2,
+        0,
+        0,
+        0,
+        0x2,
+        0,
+        0,
+        0,
+        0x1,
+        0,
+        0x8,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0x10,
+        0,
+        0,
+        0,
+        0x13,
+        0xb,
+        0,
+        0,
+        0x13,
+        0xb,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0xff,
+        0,
+        0,
+        0,
+        0xff,
+        0,
+        0,
+        0,
+    ]
+
+    byte_string = bytes(bytearray(encoded_bytes))
+    img_in = constant_op.constant(byte_string, dtype=dtypes.string)
+    decode = image_ops.decode_bmp(img_in)
+
+    with self.test_session():
+      decoded = decode.eval()
+      self.assertAllEqual(decoded, img_bytes)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py b/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py
new file mode 100644
index 0000000000..646324cb95
--- /dev/null
+++ b/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py
@@ -0,0 +1,59 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Test PrefetchDataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class PrefetchDatasetTest(test.TestCase):
+
+  def testBufferSize(self):
+    buffer_size = array_ops.placeholder(dtypes.int64, shape=[])
+    iterator = dataset_ops.Dataset.range(10).prefetch(
+        buffer_size=buffer_size).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(init_op, feed_dict={buffer_size: 5})
+      for m in range(10):
+        self.assertEqual(m, sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testInvalidBufferSize(self):
+    buffer_size = array_ops.placeholder(dtypes.int64, shape=[])
+    iterator = dataset_ops.Dataset.range(10).prefetch(
+        buffer_size=buffer_size).make_initializable_iterator()
+    init_op = iterator.initializer
+
+    with self.assertRaisesRegexp(errors.InvalidArgumentError, "buffer_size"):
+      with self.test_session() as sess:
+        sess.run(init_op, feed_dict={buffer_size: 0})
+
+    with self.assertRaisesRegexp(errors.InvalidArgumentError, "buffer_size"):
+      with self.test_session() as sess:
+        sess.run(init_op, feed_dict={buffer_size: -5})
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py
index 8c327d7e27..fbb13bb72c 100644
--- a/tensorflow/python/layers/convolutional.py
+++ b/tensorflow/python/layers/convolutional.py
@@ -920,6 +920,7 @@ class SeparableConv2D(Conv2D):
         trainable=trainable,
         name=name,
         **kwargs)
+    self.data_format = data_format
     self.depth_multiplier = depth_multiplier
     self.depthwise_initializer = depthwise_initializer
     self.pointwise_initializer = pointwise_initializer
@@ -1231,9 +1232,8 @@ class Conv2DTranspose(Conv2D):
 
   def build(self, input_shape):
     if len(input_shape) != 4:
-      raise ValueError('Inputs should have rank ' +
-                       str(4) +
-                       'Received input shape:', str(input_shape))
+      raise ValueError('Inputs should have rank 4. Received input shape: ' +
+                       str(input_shape))
     if self.data_format == 'channels_first':
       channel_axis = 1
     else:
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index 81a7cf28bb..bd26ff6696 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -61,7 +61,7 @@ class ReduceTest(test_util.TensorFlowTestCase):
   @test_util.run_in_graph_and_eager_modes()
   def testReduceInvalidAxis(self):
     if context.in_eager_mode():
-      # The shape check is in run a graph contruction time. In eager mode,
+      # The shape check is in run a graph construction time. In eager mode,
       # it misses the check, magically return result given wrong shape.
       return
     x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index ae2d46a2b7..3643861a16 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -1697,7 +1697,7 @@ class variable_scope(object):  # pylint: disable=invalid-name
   v1 = foo()  # Creates v.
   v2 = foo()  # Gets the same, existing v.
   assert v1 == v2
-
+  ```
 
   Basic example of sharing a variable with reuse=True:
 
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index a1e4305de1..e0748d87e2 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -200,7 +200,7 @@ class Variable(object):
 
     @compatibility(eager)
     `tf.Variable` is not compatible with eager execution.  Use
-    `tfe.Variable` instead which is compatable with both eager execution
+    `tfe.Variable` instead which is compatible with both eager execution
     and graph construction.  See [the TensorFlow Eager Execution
     guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers)
     for details on how variables work in eager execution.
@@ -1064,7 +1064,7 @@ class PartitionedVariable(object):
   """A container for partitioned `Variable` objects.
 
   @compatibility(eager) `tf.PartitionedVariable` is not compatible with
-  eager execution.  Use `tfe.Variable` instead which is compatable
+  eager execution.  Use `tfe.Variable` instead which is compatible
   with both eager execution and graph construction.  See [the
   TensorFlow Eager Execution
   guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers)
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index c39d0fa5b1..ccfb9aac53 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -65,7 +65,7 @@ class PrintModelAnalysisTest(test.TestCase):
                          '  ScalarW (1, 1/1 params)\n',
                          f.read())
 
-  def testSelectEverthingDetail(self):
+  def testSelectEverythingDetail(self):
     ops.reset_default_graph()
     dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0'
     outfile = os.path.join(test.get_temp_dir(), 'dump')
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 98ac197204..74ee1e5fa8 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -726,6 +726,8 @@ class SaverTest(test.TestCase):
 
 class SaveRestoreShardedTest(test.TestCase):
 
+  _WRITE_VERSION = saver_pb2.SaverDef.V1
+
   def _get_test_dir(self, dirname):
     test_dir = os.path.join(self.get_temp_dir(), dirname)
     gfile.MakeDirs(test_dir)
@@ -751,6 +753,7 @@ class SaveRestoreShardedTest(test.TestCase):
               "t0": t0.saveable,
               "t1": t1.saveable
           },
+          write_version=self._WRITE_VERSION,
           sharded=True)
       variables.global_variables_initializer().run()
       t0.insert("k1", 30.0).run()
@@ -771,7 +774,13 @@ class SaveRestoreShardedTest(test.TestCase):
         with sess.graph.device("/cpu:0"):
           v0 = variables.Variable(111, name="v0")
           t0 = saver_test_utils.CheckpointedOp(name="t0")
-        save = saver_module.Saver({"v0": v0, "t0": t0.saveable}, sharded=True)
+        save = saver_module.Saver(
+            {
+                "v0": v0,
+                "t0": t0.saveable
+            },
+            write_version=self._WRITE_VERSION,
+            sharded=True)
         variables.global_variables_initializer().run()
         t0.insert("k11", 33.0).run()
         self.assertEqual(111, v0.eval())
@@ -789,7 +798,13 @@ class SaveRestoreShardedTest(test.TestCase):
         with sess.graph.device("/cpu:0"):
           v1 = variables.Variable(222)
           t1 = saver_test_utils.CheckpointedOp(name="t1")
-        save = saver_module.Saver({"v1": v1, "t1": t1.saveable}, sharded=True)
+        save = saver_module.Saver(
+            {
+                "v1": v1,
+                "t1": t1.saveable
+            },
+            write_version=self._WRITE_VERSION,
+            sharded=True)
         variables.global_variables_initializer().run()
         t1.insert("k22", 44.0).run()
         self.assertEqual(222, v1.eval())
@@ -817,6 +832,7 @@ class SaveRestoreShardedTest(test.TestCase):
               "t0": t0.saveable,
               "t1": t1.saveable
           },
+          write_version=self._WRITE_VERSION,
           sharded=True)
       variables.global_variables_initializer().run()
       t0.insert("k11", 33.0).run()
@@ -982,6 +998,10 @@ class SaveRestoreShardedTest(test.TestCase):
     self._testPartitionedVariables(use_resource=True)
 
 
+class SaveRestoreShardedTestV2(SaveRestoreShardedTest):
+  _WRITE_VERSION = saver_pb2.SaverDef.V2
+
+
 class MaxToKeepTest(test.TestCase):
 
   def _get_test_dir(self, dirname):
diff --git a/tensorflow/python/training/sync_replicas_optimizer.py b/tensorflow/python/training/sync_replicas_optimizer.py
index 2a97d45daa..b52d101a21 100644
--- a/tensorflow/python/training/sync_replicas_optimizer.py
+++ b/tensorflow/python/training/sync_replicas_optimizer.py
@@ -99,7 +99,7 @@ class SyncReplicasOptimizer(optimizer.Optimizer):
   # Note that if you want to have 2 backup replicas, you can change
   # total_num_replicas=52 and make sure this number matches how many physical
   # replicas you started in your job.
-  opt = tf.SyncReplicasOptimizer(opt, replicas_to_aggregate=50,
+  opt = tf.train.SyncReplicasOptimizer(opt, replicas_to_aggregate=50,
                                  total_num_replicas=50)
 
   # Some models have startup_delays to help stabilize the model but when using
diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index 75f482e5a8..5c066e2bef 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -116,7 +116,7 @@ def flatten(nest):
   used instead. The same convention is followed in `pack_sequence_as`. This
   correctly repacks dicts and `OrderedDict`s after they have been flattened,
   and also allows flattening an `OrderedDict` and then repacking it back using
-  a correponding plain dict, or vice-versa.
+  a corresponding plain dict, or vice-versa.
   Dictionaries with non-sortable keys cannot be flattened.
 
   Users must not modify any collections used in `nest` while this function is
@@ -296,7 +296,7 @@ def pack_sequence_as(structure, flat_sequence):
   keys is used instead. The same convention is followed in `flatten`.
   This correctly repacks dicts and `OrderedDict`s after they have been
   flattened, and also allows flattening an `OrderedDict` and then repacking it
-  back using a correponding plain dict, or vice-versa.
+  back using a corresponding plain dict, or vice-versa.
   Dictionaries with non-sortable keys cannot be flattened.
 
   Args:
@@ -452,6 +452,17 @@ def assert_shallow_structure(shallow_tree, input_tree, check_types=True):
           "structure has length %s, while shallow structure has length %s."
           % (len(input_tree), len(shallow_tree)))
 
+    if check_types and isinstance(shallow_tree, dict):
+      if set(input_tree) != set(shallow_tree):
+        raise ValueError(
+            "The two structures don't have the same keys. Input "
+            "structure has keys %s, while shallow structure has keys %s." %
+            (list(_six.iterkeys(input_tree)),
+             list(_six.iterkeys(shallow_tree))))
+
+      input_tree = list(_six.iteritems(input_tree))
+      shallow_tree = list(_six.iteritems(shallow_tree))
+
     for shallow_branch, input_branch in zip(shallow_tree, input_tree):
       assert_shallow_structure(shallow_branch, input_branch,
                                check_types=check_types)
diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py
index c4020f4f3c..3d9e9f9684 100644
--- a/tensorflow/python/util/nest_test.py
+++ b/tensorflow/python/util/nest_test.py
@@ -385,6 +385,16 @@ class NestTest(test.TestCase):
       nest.assert_shallow_structure(inp_ab2, inp_ab1)
     nest.assert_shallow_structure(inp_ab2, inp_ab1, check_types=False)
 
+    inp_ab1 = {"a": (1, 1), "b": {"c": (2, 2)}}
+    inp_ab2 = {"a": (1, 1), "b": {"d": (2, 2)}}
+    expected_message = (
+        r"The two structures don't have the same keys. Input "
+        r"structure has keys \['c'\], while shallow structure has "
+        r"keys \['d'\].")
+
+    with self.assertRaisesRegexp(ValueError, expected_message):
+      nest.assert_shallow_structure(inp_ab2, inp_ab1)
+
   def testFlattenUpTo(self):
     # Shallow tree ends at scalar.
     input_tree = [[[2, 2], [3, 3]], [[4, 9], [5, 5]]]
@@ -429,8 +439,7 @@ class NestTest(test.TestCase):
     input_tree_flattened_as_shallow_tree = nest.flatten_up_to(shallow_tree,
                                                               input_tree)
     self.assertEqual(input_tree_flattened_as_shallow_tree, [0, 1, 2, 3, 4])
-    shallow_tree = collections.OrderedDict([("a", 0),
-                                            ("b", {"d": 3, "e": 1})])
+    shallow_tree = collections.OrderedDict([("a", 0), ("c", {"d": 3, "e": 1})])
     input_tree_flattened_as_shallow_tree = nest.flatten_up_to(shallow_tree,
                                                               input_tree)
     self.assertEqual(input_tree_flattened_as_shallow_tree,
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt
index 6620a9d308..7de4008c45 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt
@@ -29,7 +29,7 @@ tf_class {
   }
   member_method {
     name: "on_train_end"
-    argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "set_model"
diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
index 2d59299da4..9809ad52de 100644
--- a/tensorflow/tools/benchmark/benchmark_model.cc
+++ b/tensorflow/tools/benchmark/benchmark_model.cc
@@ -622,7 +622,7 @@ int Main(int argc, char** argv) {
     RecordBenchmarkEntry(output_prefix, benchmark_name, "meta-first-inference",
                          warmup_runs, warmup_time_us / 1000000.0);
 
-    // Time from starting to intialize TF to getting the first result back.
+    // Time from starting to initialize TF to getting the first result back.
     // This also assumes that only one warmup run is performed.
     RecordBenchmarkEntry(
         output_prefix, benchmark_name, "meta-init-plus-first-inference", 1,
diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh
index 4f1c61b8e9..358f82ac5d 100755
--- a/tensorflow/tools/ci_build/builds/test_user_ops.sh
+++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh
@@ -76,17 +76,17 @@ echo "PYTHON_BIN_PATH: ${PYTHON_BIN_PATH}"
 
 pushd "${TMP_DIR}"
 
-# Obtain paths include and lib paths to the TensorFlow installation
-TF_INC=$("${PYTHON_BIN_PATH}" \
-         -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
-TF_LIB=$("${PYTHON_BIN_PATH}" \
-         -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
-
-if [[ -z "${TF_INC}" ]]; then
-  die "FAILED to determine TensorFlow include path"
+# Obtain compilation and linking flags
+TF_CFLAGS=( $("${PYTHON_BIN_PATH}" \
+	      -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
+TF_LFLAGS=( $("${PYTHON_BIN_PATH}" \
+	      -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
+
+if [[ -z "${TF_CFLAGS}" || -z "${TF_LFLAGS}" ]]; then
+  die "FAILED to determine TensorFlow compilation or linking flags"
 else
-  echo "TensorFlow include path: ${TF_INC}"
-  TF_INCLUDE_PATH="-I${TF_INC} -I${TF_INC}/external/nsync/public"
+  echo "TensorFlow compile flags: ${TF_CFLAGS[@]}"
+  echo "TensorFlow link flags: ${TF_LFLAGS[@]}"
 fi
 
 # Check g++ availability
@@ -145,7 +145,7 @@ if [[ ${IS_GPU} == "0" ]]; then
 
   "${GPP_BIN}" -std=c++11 ${EXTRA_GPP_FLAGS} \
     -shared "${SRC_FILE}" -o "${USER_OP_SO}" \
-    -fPIC ${TF_INCLUDE_PATH} -L "${TF_LIB}" -ltensorflow_framework  || \
+    -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]}  || \
     die "g++ compilation of ${SRC_FILE} FAILED"
 
 else
@@ -184,7 +184,7 @@ else
   OP_KERNEL_O=$(echo "${OP_KERNEL_CC}" | sed -e 's/\.cc/\.o/')
   "${NVCC_BIN}" -std=c++11 \
       -c -o "${OP_KERNEL_O}" "${OP_KERNEL_CU}" \
-      ${TF_INCLUDE_PATH} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC || \
+      ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC || \
       die "nvcc compilation of ${OP_KERNEL_CC} FAILED"
 
   CUDA_LIB_DIR="/usr/local/cuda/lib64"
@@ -203,8 +203,8 @@ else
   USER_OP_SO="add_one.so"
   "${GPP_BIN}" -std=c++11 ${EXTRA_GPP_FLAGS} \
       -shared -o "${USER_OP_SO}" "${OP_KERNEL_CC}" \
-      "${OP_KERNEL_O}" ${TF_INCLUDE_PATH} -L "${CUDA_LIB_DIR}" -L "${TF_LIB}" \
-      -fPIC -lcudart -ltensorflow_framework || \
+      "${OP_KERNEL_O}" ${TF_CFLAGS[@]} -L "${CUDA_LIB_DIR}" ${TF_LFLAGS[@]} \
+      -fPIC -lcudart || \
       die "g++ compilation of ${OP_KERNEL_CC}" FAILED
 fi
 
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index c27f4953e3..2217b110e3 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -546,8 +546,8 @@ echo ""
 
 TMP_DIR=""
 DOCKERFILE_FLAG=""
-if [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.5" ] ||
-  ["${TF_BUILD_PYTHON_VERSION}" == "python3.6" ]]; then
+if [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.5" ]] ||
+  [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.6" ]]; then
   # Modify Dockerfile for Python3.5 | Python3.6 build
   TMP_DIR=$(mktemp -d)
   echo "Docker build will occur in temporary directory: ${TMP_DIR}"
diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
index 6e600e2dcf..56bff07774 100644
--- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
+++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
@@ -37,4 +37,4 @@ SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe"
 %CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY%
 
 :: Run msbuild in the resulting VS project files to build a pip package.
-%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 /verbosity:minimal tf_python_build_pip_package.vcxproj
\ No newline at end of file
+%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj
diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index f6e3d2e6c7..8520ca898f 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -64,7 +64,7 @@ reinstall_tensorflow_pip ${PIP_NAME}
 # https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
 bazel test -c opt $BUILD_OPTS -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
-  --test_tag_filters=-no_pip,-no_windows \
-  --build_tag_filters=-no_pip,-no_windows --build_tests_only \
+  --test_tag_filters=-no_pip,-no_windows,-no_oss \
+  --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \
   --test_env=TF_SAVER_LENIENT_NAMES=True \
   //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
index 44d8252a7a..832943ad6c 100644
--- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
+++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
@@ -38,4 +38,4 @@ SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe"
 %CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY%
 
 :: Run msbuild in the resulting VS project files to build a pip package.
-%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 /verbosity:minimal tf_python_build_pip_package.vcxproj
+%MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 25d327c818..47ca42d642 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -65,7 +65,7 @@ reinstall_tensorflow_pip ${PIP_NAME}
 # https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
 bazel test -c opt $BUILD_OPTS -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
-  --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu \
-  --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu \
+  --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
+  --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
   --test_env=TF_SAVER_LENIENT_NAMES=True \
   --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/dist_test/python/census_widendeep.py b/tensorflow/tools/dist_test/python/census_widendeep.py
index 3a55781496..8feb5386e9 100644
--- a/tensorflow/tools/dist_test/python/census_widendeep.py
+++ b/tensorflow/tools/dist_test/python/census_widendeep.py
@@ -263,8 +263,7 @@ if __name__ == "__main__":
       "--data_dir",
       type=str,
       default="/tmp/census-data",
-      help="Directory for storing the cesnsus data"
-  )
+      help="Directory for storing the census data")
   parser.add_argument(
       "--model_dir",
       type=str,
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 1a0145b078..3525c7524f 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -101,4 +101,3 @@ EXPOSE 6006
 EXPOSE 8888
 
 WORKDIR /root
-CMD ["/bin/bash"]
diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
new file mode 100644
index 0000000000..8180e5e7fb
--- /dev/null
+++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
@@ -0,0 +1,85 @@
+FROM tensorflow/tensorflow:latest-devel
+
+LABEL maintainer="Clayne Robison<clayne.b.robison@intel.com>"
+
+# These arguments are parameterized. Use --build-args to override.
+ARG TF_BRANCH=r1.4
+ARG WHL_DIR=/whl
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        golang \
+        vim \
+        emacs \
+        && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN pip --no-cache-dir install --upgrade \
+        pip setuptools
+
+RUN pip --no-cache-dir install wheel 
+
+# Download and build TensorFlow.
+WORKDIR /
+RUN rm -rf tensorflow && \
+    git clone https://github.com/tensorflow/tensorflow.git && \
+    cd tensorflow && \
+    git checkout ${TF_BRANCH}
+WORKDIR /tensorflow
+
+# Configure the build for CPU with MKL by accepting default build options and
+# setting library locations
+ENV CI_BUILD_PYTHON=python \
+   LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \
+    PYTHON_BIN_PATH=/usr/bin/python \
+    PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \
+    CC_OPT_FLAGS='-march=native' \
+    TF_NEED_JEMALLOC=0 \
+    TF_NEED_GCP=0 \
+    TF_NEED_CUDA=0 \
+    TF_NEED_HDFS=0 \
+    TF_NEED_S3=0 \
+    TF_NEED_OPENCL=0 \
+    TF_NEED_GDR=0 \
+    TF_ENABLE_XLA=0 \
+    TF_NEED_VERBS=0 \
+    TF_NEED_MPI=0
+RUN ./configure
+
+# Build and Install TensorFlow.
+# The 'mkl' option builds with Intel(R) Math Kernel Library (MKL), which detects
+# the platform it is currently running on and takes appropriately optimized 
+# paths. The -march=native option is for code that is not in MKL, and assumes
+# this container will be run on the same architecture on which it is built.
+RUN LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \
+    bazel build --config=mkl \
+                --config="opt" \
+                --copt="-march=native" \
+                --copt="-O3" \
+                //tensorflow/tools/pip_package:build_pip_package && \
+    mkdir ${WHL_DIR} && \
+    bazel-bin/tensorflow/tools/pip_package/build_pip_package ${WHL_DIR}
+
+# Clean up Bazel cache when done, but leave the whl.
+# This will upgrade the default Tensorflow version with the Intel MKL version
+RUN pip --no-cache-dir install --upgrade ${WHL_DIR}/tensorflow-*.whl && \
+    rm -rf /root/.cache
+
+WORKDIR /root
+
+#add welcome message with instructions
+
+RUN echo '[ ! -z "$TERM" -a -r /etc/motd ] && cat /etc/issue && cat /etc/motd' \
+	>> /etc/bash.bashrc \
+	; echo "\
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\
+|								\n\
+| Docker container running Ubuntu				\n\
+| with TensorFlow ${TF_BRANCH} optimized for CPU		\n\
+| with Intel(R) MKL						\n\
+|								\n\
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\
+\n "\
+	> /etc/motd
+
+CMD ["/bin/bash"]
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 21a44ee404..041f45971b 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -102,5 +102,3 @@ WORKDIR /root
 EXPOSE 6006
 # IPython
 EXPOSE 8888
-
-RUN ["/bin/bash"]
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
index 9bcc3925a8..3bedc8cf34 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
@@ -113,5 +113,3 @@ WORKDIR /root
 EXPOSE 6006
 # IPython
 EXPOSE 8888
-
-RUN ["/bin/bash"]
diff --git a/tensorflow/tools/docker/notebooks/2_getting_started.ipynb b/tensorflow/tools/docker/notebooks/2_getting_started.ipynb
index e171b439fe..b0963ebc3f 100644
--- a/tensorflow/tools/docker/notebooks/2_getting_started.ipynb
+++ b/tensorflow/tools/docker/notebooks/2_getting_started.ipynb
@@ -159,7 +159,7 @@
         "X = np.array([np.linspace(-2, 4, num_examples), np.linspace(-6, 6, num_examples)])\n",
         "X += np.random.randn(2, num_examples)\n",
         "x, y = X\n",
-        "x_with_bias = np.array([(1., a) for a in x]).astype(np.float32)\n",
+        "bias_with_x = np.array([(1., a) for a in x]).astype(np.float32)\n",
         "\n",
         "losses = []\n",
         "training_steps = 50\n",
@@ -167,7 +167,7 @@
         "\n",
         "with tf.Session() as sess:\n",
         "    # Set up all the tensors, variables, and operations.\n",
-        "    input = tf.constant(x_with_bias)\n",
+        "    input = tf.constant(bias_with_x)\n",
         "    target = tf.constant(np.transpose([y]).astype(np.float32))\n",
         "    weights = tf.Variable(tf.random_normal([2, 1], 0, 0.1))\n",
         "\n",
@@ -583,7 +583,7 @@
         "# Split into x and y\n",
         "x, y = X\n",
         "# Add the bias node which always has a value of 1\n",
-        "x_with_bias = np.array([(1., a) for a in x]).astype(np.float32)\n",
+        "bias_with_x = np.array([(1., a) for a in x]).astype(np.float32)\n",
         "\n",
         "# Keep track of the loss at each iteration so we can chart it later\n",
         "losses = []\n",
@@ -598,7 +598,7 @@
         "with tf.Session() as sess:\n",
         "    # Set up all the tensors.\n",
         "    # Our input layer is the x value and the bias node.\n",
-        "    input = tf.constant(x_with_bias)\n",
+        "    input = tf.constant(bias_with_x)\n",
         "    # Our target is the y values. They need to be massaged to the right shape.\n",
         "    target = tf.constant(np.transpose([y]).astype(np.float32))\n",
         "    # Weights are a variable. They change every time through the loop.\n",
@@ -621,7 +621,7 @@
         "    loss = tf.nn.l2_loss(yerror)\n",
         "\n",
         "    # Perform gradient descent. \n",
-        "    # This essentially just updates weights, like weights += grads * learning_rate\n",
+        "    # This essentially just updates weights, like weights -= grads * learning_rate\n",
         "    # using the partial derivative of the loss with respect to the\n",
         "    # weights. It's the direction we want to go to move toward lower error.\n",
         "    update_weights = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)\n",
@@ -743,7 +743,7 @@
         "with tf.Session() as sess:\n",
         "    # Set up all the tensors.\n",
         "    # The input is the x values with the bias appended on to each x.\n",
-        "    input = tf.constant(x_with_bias)\n",
+        "    input = tf.constant(bias_with_x)\n",
         "    # We're trying to find the best fit for the target y values.\n",
         "    target = tf.constant(np.transpose([y]).astype(np.float32))\n",
         "    # Let's set up the weights randomly\n",
diff --git a/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb b/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb
index 614a19c178..5585ebdcd3 100644
--- a/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb
+++ b/tensorflow/tools/docker/notebooks/3_mnist_from_scratch.ipynb
@@ -135,6 +135,8 @@
     "from six.moves.urllib.request import urlretrieve\n",
     "\n",
     "SOURCE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/'\n",
+    "#SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'\n",
+    "# for those who have no access to google storage, use lecun's repo please\n",
     "WORK_DIRECTORY = \"/tmp/mnist-data\"\n",
     "\n",
     "def maybe_download(filename):\n",
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index 3677aaa886..cc46dd5162 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -66,9 +66,6 @@ BLACKLIST = [
     "//tensorflow/contrib/timeseries/examples:data/period_trend.csv",  # pylint:disable=line-too-long
     "//tensorflow/contrib/timeseries/python/timeseries:test_utils",
     "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils",  # pylint:disable=line-too-long
-
-    # TODO(yifeif): Remove when py_library(testonly=1) is ignored.
-    "//tensorflow/contrib/summary:summary_test_internal",
 ]
 
 
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index a493c6f2aa..3852b251d9 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -33,11 +33,16 @@ _VERSION = '1.4.0'
 
 REQUIRED_PACKAGES = [
     'absl-py',
-    'enum34 >= 1.1.6',
+    # weakref.finalize introduced in Python 3.4
+    'backports.weakref >= 1.0rc1; python_version < "3.4"',
+    # enum module introduced in Python 3.4
+    'enum34 >= 1.1.6; python_version < "3.4"',
+    # Needed for unittest.mock in Python 2
+    'mock >= 2.0.0; python_version < "3.0"',
     'numpy >= 1.12.1',
     'six >= 1.10.0',
     'protobuf >= 3.4.0',
-    'tensorflow-tensorboard >= 0.4.0rc1, < 0.5.0',
+    'tensorflow-tensorboard',
 ]
 
 project_name = 'tensorflow'
@@ -52,20 +57,14 @@ if sys.version_info.major == 3:
   REQUIRED_PACKAGES.append('wheel >= 0.26')
 else:
   REQUIRED_PACKAGES.append('wheel')
-  # mock comes with unittest.mock for python3, need to install for python2
-  REQUIRED_PACKAGES.append('mock >= 2.0.0')
 
-# remove tensorboard from tf-nightly packages
+# tf-nightly should depend on tb-nightly
 if 'tf_nightly' in project_name:
-  for package in REQUIRED_PACKAGES:
-    if 'tensorflow-tensorboard' in package:
-      REQUIRED_PACKAGES.remove(package)
+  for i, pkg in enumerate(REQUIRED_PACKAGES):
+    if 'tensorboard' in pkg:
+      REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.5.0a0, < 1.6.0a0'
       break
 
-# weakref.finalize was introduced in Python 3.4
-if sys.version_info < (3, 4):
-  REQUIRED_PACKAGES.append('backports.weakref >= 1.0rc1')
-
 # pylint: disable=line-too-long
 CONSOLE_SCRIPTS = [
     'freeze_graph = tensorflow.python.tools.freeze_graph:main',
@@ -76,13 +75,13 @@ CONSOLE_SCRIPTS = [
     # is now declared by the tensorboard pip package. If we remove the
     # TensorBoard command, pip will inappropriately remove it during install,
     # even though the command is not removed, just moved to a different wheel.
-    'tensorboard = tensorboard.main:main',
+    'tensorboard = tensorboard.main:run_main',
 ]
 # pylint: enable=line-too-long
 
 # remove the tensorboard console script if building tf_nightly
 if 'tf_nightly' in project_name:
-  CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:main')
+  CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main')
 
 TEST_PACKAGES = [
     'scipy >= 0.15.1',
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 68d663acfc..b61012f71e 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -80,7 +80,7 @@ def _apply_patch(repo_ctx, patch_file):
     bazel_sh = _get_env_var(repo_ctx, "BAZEL_SH")
     if not bazel_sh:
       fail("BAZEL_SH environment variable is not set")
-    cmd = [bazel_sh, "-c", " ".join(cmd)]
+    cmd = [bazel_sh, "-l", "-c", " ".join(cmd)]
   _execute_and_check_ret_code(repo_ctx, cmd)
 
 # Download the repository and apply a patch to its root
diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/flatbuffers.BUILD
index e1563103c8..0a76adcf91 100644
--- a/third_party/flatbuffers/flatbuffers.BUILD
+++ b/third_party/flatbuffers/flatbuffers.BUILD
@@ -6,8 +6,11 @@ licenses(["notice"])  # Apache 2.0
 
 FLATBUFFERS_COPTS = [
     "-fexceptions",
-    "-Wno-implicit-fallthrough",
-]
+] + select({
+    "@bazel_tools//src:windows": [],
+    "@bazel_tools//src:windows_msvc": [],
+    "//conditions:default": ["-Wno-implicit-fallthrough"],
+})
 
 # Public flatc library to compile flatbuffer files at runtime.
 cc_library(
diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl
index f637873f14..8b73ddabdd 100644
--- a/third_party/mkl/build_defs.bzl
+++ b/third_party/mkl/build_defs.bzl
@@ -20,7 +20,7 @@ def if_mkl(if_true, if_false = []):
 
     """
     return select({
-        "//third_party/mkl:using_mkl": if_true,
+        str(Label("//third_party/mkl:using_mkl")): if_true,
         "//conditions:default": if_false
     })
 
diff --git a/third_party/nccl.BUILD b/third_party/nccl.BUILD
index 3a2a3afe46..b2b8e18824 100644
--- a/third_party/nccl.BUILD
+++ b/third_party/nccl.BUILD
@@ -55,7 +55,7 @@ cc_library(
         ],
         "@org_tensorflow//tensorflow:ios": [],
         "@org_tensorflow//tensorflow:windows": [
-            "ws2_32.lib",
+            "-DEFAULTLIB:ws2_32.lib",
         ],
         "//conditions:default": [
             "-lrt",
diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl
index bbc07905fc..c16eb3a12a 100644
--- a/third_party/py/python_configure.bzl
+++ b/third_party/py/python_configure.bzl
@@ -1,11 +1,8 @@
-# -*- Python -*-
 """Repository rule for Python autoconfiguration.
 
 `python_configure` depends on the following environment variables:
 
-  * `NUMPY_INCLUDE_PATH`: Location of Numpy libraries.
   * `PYTHON_BIN_PATH`: location of python binary.
-  * `PYTHON_INCLUDE_PATH`: Location of python binaries.
   * `PYTHON_LIB_PATH`: Location of python libraries.
 """
 
@@ -23,32 +20,13 @@ def _tpl(repository_ctx, tpl, substitutions={}, out=None):
       substitutions)
 
 
-def _python_configure_warning(msg):
-  """Output warning message during auto configuration."""
-  yellow = "\033[1;33m"
-  no_color = "\033[0m"
-  print("%sPython Configuration Warning:%s %s" % (yellow, no_color, msg))
-
-
-def _python_configure_fail(msg):
+def _fail(msg):
   """Output failure message when auto configuration fails."""
   red = "\033[0;31m"
   no_color = "\033[0m"
   fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg))
 
 
-def _get_env_var(repository_ctx, name, default = None, enable_warning = True):
-  """Find an environment variable in system path."""
-  if name in repository_ctx.os.environ:
-    return repository_ctx.os.environ[name]
-  if default != None:
-    if enable_warning:
-      _python_configure_warning(
-          "'%s' environment variable is not set, using '%s' as default" % (name, default))
-    return default
-  _python_configure_fail("'%s' environment variable is not set" % name)
-
-
 def _is_windows(repository_ctx):
   """Returns true if the host operating system is windows."""
   os_name = repository_ctx.os.name.lower()
@@ -73,11 +51,10 @@ def _execute(repository_ctx, cmdline, error_msg=None, error_details=None,
   """
   result = repository_ctx.execute(cmdline)
   if result.stderr or not (empty_stdout_fine or result.stdout):
-    _python_configure_fail(
-        "\n".join([
-            error_msg.strip() if error_msg else "Repository command failed",
-            result.stderr.strip(),
-            error_details if error_details else ""]))
+    _fail("\n".join([
+        error_msg.strip() if error_msg else "Repository command failed",
+        result.stderr.strip(),
+        error_details if error_details else ""]))
   return result
 
 
@@ -163,21 +140,23 @@ def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name,
 
 def _get_python_bin(repository_ctx):
   """Gets the python bin path."""
-  python_bin = _get_env_var(repository_ctx, _PYTHON_BIN_PATH,
-                            None, False)
+  python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
   if python_bin != None:
     return python_bin
   python_bin_path = repository_ctx.which("python")
   if python_bin_path != None:
     return str(python_bin_path)
-  path = _get_env_var(repository_ctx, "PATH")
-  _python_configure_fail("Cannot find python in PATH, please make sure " +
-      "python is installed and add its directory in PATH, or set the " +
-      "environment variable PYTHON_BIN_PATH.\nPATH=%s" % (path))
+  _fail("Cannot find python in PATH, please make sure " +
+        "python is installed and add its directory in PATH, or --define " +
+        "%s='/something/else'.\nPATH=%s" % (
+            _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", "")))
 
 
 def _get_python_lib(repository_ctx, python_bin):
   """Gets the python lib path."""
+  python_lib = repository_ctx.os.environ.get(_PYTHON_LIB_PATH)
+  if python_lib != None:
+    return python_lib
   print_lib = ("<<END\n" +
       "from __future__ import print_function\n" +
       "import site\n" +
@@ -214,7 +193,7 @@ def _check_python_lib(repository_ctx, python_lib):
   cmd = 'test -d "%s" -a -x "%s"' % (python_lib, python_lib)
   result = repository_ctx.execute(["bash", "-c", cmd])
   if result.return_code == 1:
-    _python_configure_fail("Invalid python library path:  %s" % python_lib)
+    _fail("Invalid python library path: %s" % python_lib)
 
 
 def _check_python_bin(repository_ctx, python_bin):
@@ -222,33 +201,36 @@ def _check_python_bin(repository_ctx, python_bin):
   cmd =  '[[ -x "%s" ]] && [[ ! -d "%s" ]]' % (python_bin, python_bin)
   result = repository_ctx.execute(["bash", "-c", cmd])
   if result.return_code == 1:
-    _python_configure_fail(
-        "PYTHON_BIN_PATH is not executable.  Is it the python binary?")
+    _fail("--define %s='%s' is not executable. Is it the python binary?" % (
+        _PYTHON_BIN_PATH, python_bin))
 
 
 def _get_python_include(repository_ctx, python_bin):
   """Gets the python include path."""
-  result = _execute(repository_ctx,
-                    [python_bin, "-c",
-                     'from __future__ import print_function;' +
-                     'from distutils import sysconfig;' +
-                     'print(sysconfig.get_python_inc())'],
-                    error_msg="Problem getting python include path.",
-                    error_details=("Is the Python binary path set up right? " +
-                                   "(See ./configure or PYTHON_BIN_PATH.) " +
-                                   "Is distutils installed?"))
+  result = _execute(
+      repository_ctx,
+      [python_bin, "-c",
+       'from __future__ import print_function;' +
+       'from distutils import sysconfig;' +
+       'print(sysconfig.get_python_inc())'],
+      error_msg="Problem getting python include path.",
+      error_details=("Is the Python binary path set up right? " +
+                     "(See ./configure or " + _PYTHON_BIN_PATH + ".) " +
+                     "Is distutils installed?"))
   return result.stdout.splitlines()[0]
 
 
 def _get_python_import_lib_name(repository_ctx, python_bin):
   """Get Python import library name (pythonXY.lib) on Windows."""
-  result = _execute(repository_ctx,
-                    [python_bin, "-c",
-                     'import sys;' +
-                     'print("python" + str(sys.version_info[0]) + str(sys.version_info[1]) + ".lib")'],
-                    error_msg="Problem getting python import library.",
-                    error_details=("Is the Python binary path set up right? " +
-                                   "(See ./configure or PYTHON_BIN_PATH.) "))
+  result = _execute(
+      repository_ctx,
+      [python_bin, "-c",
+       'import sys;' +
+       'print("python" + str(sys.version_info[0]) + ' +
+       '      str(sys.version_info[1]) + ".lib")'],
+      error_msg="Problem getting python import library.",
+      error_details=("Is the Python binary path set up right? " +
+                     "(See ./configure or " + _PYTHON_BIN_PATH + ".) "))
   return result.stdout.splitlines()[0]
 
 
@@ -267,8 +249,7 @@ def _create_local_python_repository(repository_ctx):
   """Creates the repository containing files set up to build with Python."""
   python_bin = _get_python_bin(repository_ctx)
   _check_python_bin(repository_ctx, python_bin)
-  python_lib = _get_env_var(repository_ctx, _PYTHON_LIB_PATH,
-                              _get_python_lib(repository_ctx, python_bin))
+  python_lib = _get_python_lib(repository_ctx, python_bin)
   _check_python_lib(repository_ctx, python_lib)
   python_include = _get_python_include(repository_ctx, python_bin)
   numpy_include = _get_numpy_include(repository_ctx, python_bin) + '/numpy'
-- 
GitLab


From c57796f366a0545a04424caeff1b27bbd629f8f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=94=B0=E4=BC=A0=E6=AD=A6?= <dev@goodow.com>
Date: Fri, 1 Dec 2017 08:58:17 +0800
Subject: [PATCH 0486/1225] Update input_fn.md (#14992)

---
 tensorflow/docs_src/get_started/input_fn.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md
index f0dcdc47ff..24bfdbdd2e 100644
--- a/tensorflow/docs_src/get_started/input_fn.md
+++ b/tensorflow/docs_src/get_started/input_fn.md
@@ -292,7 +292,7 @@ prediction_set = pd.read_csv("boston_predict.csv", skipinitialspace=True,
 Next, create a list of `FeatureColumn`s for the input data, which formally
 specify the set of features to use for training. Because all features in the
 housing data set contain continuous values, you can create their
-`FeatureColumn`s using the `tf.contrib.layers.real_valued_column()` function:
+`FeatureColumn`s using the `tf.feature_column.numeric_column()` function:
 
 ```python
 feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]
-- 
GitLab


From 7ab54c4c48f35a4107e6170cefe5c93245595601 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 17:37:21 -0800
Subject: [PATCH 0487/1225] Support compressed TensorProto format in constant
 folding for types iny16, int8, uint8, and bool, in addition to float ,double,
 int32, and int64, which were already supported.

Add unit test for all types.

PiperOrigin-RevId: 177533200
---
 .../grappler/optimizers/constant_folding.cc   | 24 +++++--
 .../optimizers/constant_folding_test.cc       | 63 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index cf913d6f48..e0f39c2931 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -657,9 +657,9 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const {
 
 namespace {
 
-#define SET_TENSOR_VAL_CASE(DTYPE, TYPE)           \
+#define SET_TENSOR_VAL_CASE(DTYPE, TYPE, NAME)     \
   case DTYPE:                                      \
-    t->add_##TYPE##_val(static_cast<TYPE>(value)); \
+    t->add_##NAME##_val(static_cast<TYPE>(value)); \
     break;
 
 Status CreateConstantTensorAttrValue(DataType type, double value,
@@ -668,10 +668,14 @@ Status CreateConstantTensorAttrValue(DataType type, double value,
   TensorProto* t = attr_tensor->mutable_tensor();
   *t->mutable_tensor_shape() = shape;
   switch (type) {
-    SET_TENSOR_VAL_CASE(DT_FLOAT, float);
-    SET_TENSOR_VAL_CASE(DT_DOUBLE, double);
-    SET_TENSOR_VAL_CASE(DT_INT64, int64);
-    SET_TENSOR_VAL_CASE(DT_INT32, int);
+    SET_TENSOR_VAL_CASE(DT_FLOAT, float, float);
+    SET_TENSOR_VAL_CASE(DT_DOUBLE, double, double);
+    SET_TENSOR_VAL_CASE(DT_INT64, int64, int64);
+    SET_TENSOR_VAL_CASE(DT_INT32, int32, int);
+    SET_TENSOR_VAL_CASE(DT_INT16, int32, int);
+    SET_TENSOR_VAL_CASE(DT_INT8, int32, int);
+    SET_TENSOR_VAL_CASE(DT_UINT8, int32, int);
+    SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool);
     default:
       return errors::InvalidArgument("Unsupported type: ", type);
   }
@@ -721,6 +725,14 @@ NodeDef ConstantFolding::CreateNodeDef(const string& name,
       POPULATE_TENSOR_PROTO(tensor, t, int64, int64)
     } else if (tensor->dtype() == DT_INT32) {
       POPULATE_TENSOR_PROTO(tensor, t, int32, int)
+    } else if (tensor->dtype() == DT_INT16) {
+      POPULATE_TENSOR_PROTO(tensor, t, int16, int)
+    } else if (tensor->dtype() == DT_INT8) {
+      POPULATE_TENSOR_PROTO(tensor, t, int8, int)
+    } else if (tensor->dtype() == DT_UINT8) {
+      POPULATE_TENSOR_PROTO(tensor, t, uint8, int)
+    } else if (tensor->dtype() == DT_BOOL) {
+      POPULATE_TENSOR_PROTO(tensor, t, bool, bool)
     }
   }
   if (optimized) {
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index c72ed96520..32a691d3ee 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -173,11 +173,70 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
   }
 }
 
+TEST_F(ConstantFoldingTest, CreateConstNodes) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+
+#define MAKE_TEST_GRAPH(TYPE)                                               \
+  Output TYPE##_const =                                                     \
+      ops::Const(s.WithOpName(#TYPE "_const"), static_cast<TYPE>(10), {5}); \
+  Output TYPE##_mul =                                                       \
+      ops::Mul(s.WithOpName(#TYPE "_mul"), TYPE##_const, TYPE##_const);     \
+  Output TYPE##_id = ops::Identity(s.WithOpName(#TYPE "_id"), TYPE##_mul)
+
+  MAKE_TEST_GRAPH(float);
+  MAKE_TEST_GRAPH(double);
+  MAKE_TEST_GRAPH(int64);
+  MAKE_TEST_GRAPH(int32);
+  MAKE_TEST_GRAPH(int16);
+  MAKE_TEST_GRAPH(int8);
+  MAKE_TEST_GRAPH(uint8);
+#undef MAKE_TEST_GRAPH
+
+  Output bool_const = ops::Const(s.WithOpName("bool_const"), true, {5});
+  Output bool_and =
+      ops::LogicalAnd(s.WithOpName("bool_and"), bool_const, bool_const);
+  Output bool_id = ops::Identity(s.WithOpName("bool_id"), bool_and);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  ConstantFolding fold(nullptr /* cpu_device */);
+  GraphDef output;
+  Status status = fold.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  EXPECT_EQ(24, output.node_size());
+  for (const NodeDef& node : output.node()) {
+#define CHECK_RESULT(TYPE, FIELD)                                             \
+  if (node.name() == #TYPE "_mul") {                                          \
+    EXPECT_EQ(5,                                                              \
+              node.attr().at("value").tensor().tensor_shape().dim(0).size()); \
+    EXPECT_EQ(1, node.attr().at("value").tensor().FIELD##_val_size());        \
+    EXPECT_EQ(10 * 10, node.attr().at("value").tensor().FIELD##_val(0));      \
+  }
+
+    CHECK_RESULT(float, float);
+    CHECK_RESULT(double, double);
+    CHECK_RESULT(int64, int64);
+    CHECK_RESULT(int32, int);
+    CHECK_RESULT(int16, int);
+    CHECK_RESULT(int8, int);
+    CHECK_RESULT(uint8, int);
+#undef CHECK_RESULT
+
+    if (node.name() == "bool_and") {
+      EXPECT_EQ(5,
+                node.attr().at("value").tensor().tensor_shape().dim(0).size());
+      EXPECT_EQ(1, node.attr().at("value").tensor().bool_val_size());
+      EXPECT_EQ(true && true, node.attr().at("value").tensor().bool_val(0));
+    }
+  }
+}
+
 TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) {
   // Build a simple graph with a few trivially prunable ops.
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
 
-  Output a = ops::Const(s.WithOpName("a"), 10, {3});
+  Output a = ops::Const(s.WithOpName("a"), 10, {5});
   auto b = ops::Unique(s.WithOpName("b"), {a});
   Output c = ops::Identity(s.WithOpName("c"), {b.y});
   Output d = ops::Identity(s.WithOpName("d"), {b.idx});
@@ -1059,3 +1118,5 @@ TEST_F(ConstantFoldingTest, MaterializeReductionIndices) {
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
+
+//  LocalWords:  NewRootScope
-- 
GitLab


From 1a89cf58c021ef176c624b4070ee8422303e29a2 Mon Sep 17 00:00:00 2001
From: Max Galkin <maxgalkin@google.com>
Date: Thu, 30 Nov 2017 18:01:03 -0800
Subject: [PATCH 0488/1225] Output unknown dimension root nodes with
 --vmodule=graph_properties=2

PiperOrigin-RevId: 177535370
---
 .../core/grappler/costs/graph_properties.cc   | 77 +++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index fbc52e9bd1..ec44d11bdd 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -265,6 +265,79 @@ bool IsEnterWithQueue(const Node& node) {
   return false;
 }
 
+bool HasAnyUnknownDimensions(const TensorShapeProto& proto) {
+  if (proto.unknown_rank()) {
+    return true;
+  }
+  for (const auto& dim : proto.dim()) {
+    if (dim.size() < 0) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void VerboseLogUnknownDimensionSources(
+    const Graph& graph,
+    const std::map<string, std::vector<OpInfo::TensorProperties>>&
+        input_properties_map,
+    const std::map<string, std::vector<OpInfo::TensorProperties>>&
+        output_properties_map) {
+  if (!VLOG_IS_ON(2)) {
+    return;
+  }
+
+  VLOG(2) << "Nodes with known inputs, but with unknown output dimensions:";
+
+  // Find all nodes in the graph for which we
+  // do not have any unknown dimensions in their inputs, but
+  // we have some unknown dimensions in their outputs.
+  for (const Node* const node : graph.nodes()) {
+    if (node->num_outputs() == 0) {
+      continue;
+    }
+
+    const auto& input_properties = input_properties_map.at(node->name());
+    const auto& output_properties = output_properties_map.at(node->name());
+
+    bool has_unknown_inputs = false;
+    for (int i = 0; i < node->num_inputs(); ++i) {
+      if (HasAnyUnknownDimensions(input_properties[i].shape())) {
+        has_unknown_inputs = true;
+        break;
+      }
+    }
+
+    if (has_unknown_inputs) {
+      continue;
+    }
+
+    for (int i = 0; i < node->num_outputs(); ++i) {
+      if (HasAnyUnknownDimensions(output_properties[i].shape())) {
+        string inputs = "input_shapes=[";
+        for (int i = 0; i < node->num_inputs(); ++i) {
+          inputs +=
+              PartialTensorShape::DebugString(input_properties[i].shape());
+        }
+        inputs += "]";
+
+        string outputs = "output_shapes=[";
+        for (int i = 0; i < node->num_outputs(); ++i) {
+          outputs +=
+              PartialTensorShape::DebugString(output_properties[i].shape());
+        }
+        outputs += "]";
+
+        VLOG(2) << "Node: " << node->name() << ", Op: " << node->def().op()
+                << ", " << inputs << ", " << outputs;
+
+        // don't log again for this node
+        break;
+      }
+    }
+  }
+}
+
 }  // namespace
 
 // Queue of nodes to process. Nodes can be enqueued in any order, but will be
@@ -1000,6 +1073,10 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) {
     }
   }
 
+  // Help trace the unknown dimensions to their origins.
+  VerboseLogUnknownDimensionSources(graph, input_properties_,
+                                    output_properties_);
+
   return Status::OK();
 }
 
-- 
GitLab


From 6e16af86658cd27b466c7c3ba270338b8f95f184 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 30 Nov 2017 18:24:27 -0800
Subject: [PATCH 0489/1225] Register more ops with bfloat16 types.

PiperOrigin-RevId: 177537667
---
 .../compiler/tf2xla/kernels/matmul_op.cc      |   5 +-
 .../compiler/tf2xla/kernels/scan_ops.cc       |  13 +-
 .../contrib/tpu/ops/cross_replica_ops.cc      |   2 +-
 tensorflow/core/framework/numeric_types.h     |   6 +-
 .../core/framework/op_def_builder_test.cc     |  15 ++-
 tensorflow/core/framework/types.cc            |  24 ++--
 tensorflow/core/ops/array_ops.cc              |  32 +++--
 tensorflow/core/ops/math_ops.cc               | 119 +++++++++---------
 tensorflow/core/ops/nn_ops.cc                 |  66 +++++-----
 tensorflow/core/ops/random_ops.cc             |   8 +-
 10 files changed, 150 insertions(+), 140 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc
index a62d233526..644abd5905 100644
--- a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc
@@ -85,10 +85,7 @@ class SparseMatMulOp : public MatMulOp {
   ~SparseMatMulOp() override = default;
 };
 
-REGISTER_XLA_OP(Name("SparseMatMul")
-                    .TypeConstraint("Ta", kFloatTypes)
-                    .TypeConstraint("Tb", kFloatTypes),
-                SparseMatMulOp);
+REGISTER_XLA_OP(Name("SparseMatMul"), SparseMatMulOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
index 3cc9d14411..650f8c7dc8 100644
--- a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
@@ -35,6 +35,11 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+// TODO(phawkins): implement double-sized windowed reductions in XLA and remove
+// the type constraint.
+constexpr std::array<DataType, 3> kScanOpTypes = {
+    {DT_HALF, DT_BFLOAT16, DT_FLOAT}};
+
 class ScanOp : public XlaOpKernel {
  public:
   ScanOp(OpKernelConstruction* ctx, bool sum) : XlaOpKernel(ctx), sum_(sum) {
@@ -124,17 +129,13 @@ class CumsumOp : public ScanOp {
  public:
   explicit CumsumOp(OpKernelConstruction* ctx) : ScanOp(ctx, /*sum=*/true) {}
 };
-// TODO(phawkins): implement non-float windowed reductions in XLA and remove the
-// type constraint.
-REGISTER_XLA_OP(Name("Cumsum").TypeConstraint("T", DT_FLOAT), CumsumOp);
+REGISTER_XLA_OP(Name("Cumsum").TypeConstraint("T", kScanOpTypes), CumsumOp);
 
 class CumprodOp : public ScanOp {
  public:
   explicit CumprodOp(OpKernelConstruction* ctx) : ScanOp(ctx, /*sum=*/false) {}
 };
-// TODO(phawkins): implement non-float windowed reductions in XLA and remove the
-// type constraint.
-REGISTER_XLA_OP(Name("Cumprod").TypeConstraint("T", DT_FLOAT), CumprodOp);
+REGISTER_XLA_OP(Name("Cumprod").TypeConstraint("T", kScanOpTypes), CumprodOp);
 
 }  // anonymous namespace
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/ops/cross_replica_ops.cc b/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
index cbbd19800e..d389050e67 100644
--- a/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
+++ b/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
@@ -22,7 +22,7 @@ namespace tensorflow {
 REGISTER_OP("CrossReplicaSum")
     .Input("input: T")
     .Output("output: T")
-    .Attr("T: {float}")
+    .Attr("T: {bfloat16, float}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 An Op to sum inputs across replicated TPU instances. Each
diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index 29cac26244..bdd5af064b 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -58,7 +58,7 @@ struct bfloat16 {
   explicit EIGEN_DEVICE_FUNC bfloat16(const T& val)
       : bfloat16(static_cast<float>(val)) {}
 
-  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
+  EIGEN_DEVICE_FUNC explicit operator float() const {
     float result;
 
     uint16_t* q = reinterpret_cast<uint16_t*>(&result);
@@ -89,6 +89,10 @@ struct bfloat16 {
     return static_cast<int>(float(*this));
   }
 
+  EIGEN_DEVICE_FUNC explicit operator long() const {
+    return static_cast<long>(float(*this));
+  }
+
   EIGEN_DEVICE_FUNC explicit operator char() const {
     return static_cast<char>(float(*this));
   }
diff --git a/tensorflow/core/framework/op_def_builder_test.cc b/tensorflow/core/framework/op_def_builder_test.cc
index c1511ebe34..9b24e3aa00 100644
--- a/tensorflow/core/framework/op_def_builder_test.cc
+++ b/tensorflow/core/framework/op_def_builder_test.cc
@@ -124,22 +124,23 @@ TEST_F(OpDefBuilderTest, AttrWithRestrictions) {
       "attr: { name: 'a' type: 'type' allowed_values { list { type: "
       "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, "
       "DT_UINT16, DT_INT8, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, "
-      "DT_QINT32, DT_UINT32, DT_UINT64] } } }");
+      "DT_QINT32, DT_UINT32, DT_UINT64, DT_BFLOAT16] } } }");
   ExpectSuccess(
       b().Attr("a:{numbertype, variant}"),
       "attr: { name: 'a' type: 'type' allowed_values { list { type: "
       "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, "
       "DT_UINT16, DT_INT8, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, "
-      "DT_QINT32, DT_UINT32, DT_UINT64, DT_VARIANT] } } }");
+      "DT_QINT32, DT_UINT32, DT_UINT64, DT_BFLOAT16, DT_VARIANT] } } }");
   ExpectSuccess(b().Attr("a:realnumbertype"),
                 "attr: { name: 'a' type: 'type' allowed_values { list { type: "
                 "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, "
-                "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64] } } }");
+                "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64, "
+                "DT_BFLOAT16] } } }");
   ExpectSuccess(b().Attr("a:{realnumbertype,  variant , string, }"),
                 "attr: { name: 'a' type: 'type' allowed_values { list { type: "
                 "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, "
                 "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64, "
-                "DT_VARIANT, DT_STRING] } } }");
+                "DT_BFLOAT16, DT_VARIANT, DT_STRING] } } }");
   ExpectSuccess(b().Attr("a:quantizedtype"),
                 "attr: { name: 'a' type: 'type' allowed_values { list { type: "
                 "[DT_QINT8, DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16]} } }");
@@ -216,12 +217,14 @@ TEST_F(OpDefBuilderTest, AttrListOfRestricted) {
       b().Attr("a:list(realnumbertype)"),
       "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: "
       "[DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, "
-      "DT_UINT16, DT_INT8, DT_HALF, DT_UINT32, DT_UINT64] } } }");
+      "DT_UINT16, DT_INT8, DT_HALF, DT_BFLOAT16, DT_UINT32, DT_UINT64"
+      "] } } }");
   ExpectSuccess(
       b().Attr("a:list({realnumbertype, variant})"),
       "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: "
       "[DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, "
-      "DT_UINT16, DT_INT8, DT_HALF, DT_UINT32, DT_UINT64, DT_VARIANT] } } }");
+      "DT_UINT16, DT_INT8, DT_HALF, DT_BFLOAT16, DT_UINT32, DT_UINT64, "
+      "DT_VARIANT] } } }");
   ExpectSuccess(
       b().Attr("a:list(quantizedtype)"),
       "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: "
diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc
index faae19585d..48849f9dda 100644
--- a/tensorflow/core/framework/types.cc
+++ b/tensorflow/core/framework/types.cc
@@ -206,18 +206,18 @@ string DataTypeSliceString(const DataTypeSlice types) {
 }
 
 DataTypeVector AllTypes() {
-  return {DT_FLOAT,   DT_DOUBLE, DT_INT32,  DT_UINT8,     DT_INT16,
-          DT_UINT16,  DT_INT8,   DT_STRING, DT_COMPLEX64, DT_COMPLEX128,
-          DT_INT64,   DT_BOOL,   DT_QINT8,  DT_QUINT8,    DT_QINT16,
-          DT_QUINT16, DT_QINT32, DT_HALF,   DT_RESOURCE,  DT_VARIANT,
-          DT_UINT32,  DT_UINT64};
+  return {DT_FLOAT,   DT_DOUBLE, DT_INT32,   DT_UINT8,     DT_INT16,
+          DT_UINT16,  DT_INT8,   DT_STRING,  DT_COMPLEX64, DT_COMPLEX128,
+          DT_INT64,   DT_BOOL,   DT_QINT8,   DT_QUINT8,    DT_QINT16,
+          DT_QUINT16, DT_QINT32, DT_HALF,    DT_RESOURCE,  DT_VARIANT,
+          DT_UINT32,  DT_UINT64, DT_BFLOAT16};
 }
 
 #if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION)
 
 DataTypeVector RealNumberTypes() {
-  return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64,  DT_UINT8, DT_INT16,
-          DT_INT8,  DT_UINT16, DT_HALF,  DT_UINT32, DT_UINT64};
+  return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64,  DT_UINT8,  DT_INT16,
+          DT_INT8,  DT_UINT16, DT_HALF,  DT_UINT32, DT_UINT64, DT_BFLOAT16};
 }
 
 DataTypeVector QuantizedTypes() {
@@ -227,14 +227,14 @@ DataTypeVector QuantizedTypes() {
 DataTypeVector RealAndQuantizedTypes() {
   return {DT_FLOAT,  DT_DOUBLE,  DT_INT32,  DT_INT64, DT_UINT8,
           DT_UINT16, DT_UINT16,  DT_INT8,   DT_QINT8, DT_QUINT8,
-          DT_QINT16, DT_QUINT16, DT_QINT32, DT_HALF};
+          DT_QINT16, DT_QUINT16, DT_QINT32, DT_HALF,  DT_BFLOAT16};
 }
 
 DataTypeVector NumberTypes() {
-  return {DT_FLOAT,     DT_DOUBLE,     DT_INT64,  DT_INT32,
-          DT_UINT8,     DT_UINT16,     DT_INT16,  DT_INT8,
-          DT_COMPLEX64, DT_COMPLEX128, DT_QINT8,  DT_QUINT8,
-          DT_QINT32,    DT_HALF,       DT_UINT32, DT_UINT64};
+  return {DT_FLOAT,  DT_DOUBLE,  DT_INT64,  DT_INT32,     DT_UINT8,
+          DT_UINT16, DT_INT16,   DT_INT8,   DT_COMPLEX64, DT_COMPLEX128,
+          DT_QINT8,  DT_QUINT8,  DT_QINT32, DT_HALF,      DT_UINT32,
+          DT_UINT64, DT_BFLOAT16};
 }
 
 #elif defined(__ANDROID_TYPES_FULL__)
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 9fa6423d59..6f4ea09206 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -724,8 +724,8 @@ REGISTER_OP("OnesLike")
     .Input("x: T")
     .Output("y: T")
     .Attr(
-        "T: {float, double, int8, uint8, int16, uint16, int32, int64, "
-        "complex64, complex128, bool}")
+        "T: {bfloat16, float, double, int8, uint8, int16, uint16, int32, "
+        "int64, complex64, complex128, bool}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns a tensor of ones with the same shape and type as x.
@@ -738,7 +738,7 @@ y: a tensor of the same shape and type as x but filled with ones.
 REGISTER_OP("Diag")
     .Input("diagonal: T")
     .Output("output: T")
-    .Attr("T: {float, double, int32, int64, complex64, complex128}")
+    .Attr("T: {bfloat16, float, double, int32, int64, complex64, complex128}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle in = c->input(0);
       TF_RETURN_IF_ERROR(c->WithRankAtLeast(in, 1, &in));
@@ -776,7 +776,7 @@ diagonal: Rank k tensor where k is at most 1.
 REGISTER_OP("DiagPart")
     .Input("input: T")
     .Output("diagonal: T")
-    .Attr("T: {float, double, int32, int64, complex64, complex128}")
+    .Attr("T: {bfloat16, float, double, int32, int64, complex64, complex128}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle in = c->input(0);
       if (!c->RankKnown(in)) {
@@ -1059,9 +1059,8 @@ REGISTER_OP("Reverse")
     .Input("dims: bool")
     .Output("output: T")
     .Attr(
-        "T: {uint8, int8, uint16, int16, int32, int64, bool, half, float, "
-        "double, complex64, "
-        "complex128, string}")
+        "T: {uint8, int8, uint16, int16, int32, int64, bool, half, "
+        "float, double, complex64, complex128, string}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input = c->input(0);
       ShapeHandle dims;
@@ -1137,9 +1136,8 @@ REGISTER_OP("ReverseV2")
     .Output("output: T")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .Attr(
-        "T: {uint8, int8, uint16, int16, int32, int64, bool, half, float, "
-        "double, complex64, "
-        "complex128, string}")
+        "T: {uint8, int8, uint16, int16, int32, int64, bool, half, bfloat16, "
+        "float, double, complex64, complex128, string}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input = c->input(0);
       ShapeHandle axis;
@@ -1834,7 +1832,7 @@ this operation.
 REGISTER_OP("CheckNumerics")
     .Input("tensor: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("message: string")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
@@ -4565,12 +4563,12 @@ REGISTER_OP("Bitcast")
     .Output("output: type")
     // All supported dtypes are listed here to include qint16 and quint16.
     .Attr(
-        "T: {float, double, int64, int32, uint8, uint16, int8, int16,"
+        "T: {bfloat16, float, double, int64, int32, uint8, uint16, int8, int16,"
         " complex64, complex128, qint8, quint8, qint16, quint16, qint32,"
         " half}")
     .Attr(
-        "type: {float, double, int64, int32, uint8, uint16, int8, int16,"
-        " complex64, complex128, qint8, quint8, qint16, quint16, qint32,"
+        "type: {bfloat16, float, double, int64, int32, uint8, uint16, int8, "
+        "int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32,"
         " half}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input = c->input(0);
@@ -4782,7 +4780,7 @@ REGISTER_OP("QuantizeAndDequantize")
     .Attr("input_min: float = 0")
     .Attr("input_max: float = 0")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Deprecated(22, "Replaced by QuantizeAndDequantizeV2")
     .Doc(R"doc(
@@ -4798,7 +4796,7 @@ REGISTER_OP("QuantizeAndDequantizeV2")
     .Attr("num_bits: int = 8")
     .Attr("range_given: bool = false")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
@@ -4877,7 +4875,7 @@ REGISTER_OP("QuantizeAndDequantizeV3")
     .Attr("signed_input: bool = true")
     .Attr("range_given: bool = true")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index ceda11663a..45ebfa203b 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -85,7 +85,7 @@ REGISTER_OP("BatchMatMul")
     .Input("x: T")
     .Input("y: T")
     .Output("output: T")
-    .Attr("T: {half, float, double, int32, complex64, complex128}")
+    .Attr("T: {half, bfloat16, float, double, int32, complex64, complex128}")
     .Attr("adj_x: bool = false")
     .Attr("adj_y: bool = false")
     .SetShapeFn([](InferenceContext* c) {
@@ -184,7 +184,7 @@ _HostCast requires its input and produces its output in host memory.
 REGISTER_OP("Abs")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {half, float, double, int32, int64}")
+    .Attr("T: {half, bfloat16, float, double, int32, int64}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Computes the absolute value of a tensor.
@@ -210,29 +210,31 @@ value is computed as \\( \sqrt{a^2 + b^2}\\).
 )doc");
 
 // Declares cwise unary operations signature: 't -> 't
-#define UNARY()                                                              \
-  Input("x: T")                                                              \
-      .Output("y: T")                                                        \
-      .Attr("T: {half, float, double, int32, int64, complex64, complex128}") \
+#define UNARY()                                                          \
+  Input("x: T")                                                          \
+      .Output("y: T")                                                    \
+      .Attr(                                                             \
+          "T: {half, bfloat16, float, double, int32, int64, complex64, " \
+          "complex128}")                                                 \
       .SetShapeFn(shape_inference::UnchangedShape)
 
-#define UNARY_REAL()                    \
-  Input("x: T")                         \
-      .Output("y: T")                   \
-      .Attr("T: {half, float, double}") \
+#define UNARY_REAL()                              \
+  Input("x: T")                                   \
+      .Output("y: T")                             \
+      .Attr("T: {half, bfloat16, float, double}") \
       .SetShapeFn(shape_inference::UnchangedShape)
 
-#define UNARY_COMPLEX()                                        \
-  Input("x: T")                                                \
-      .Output("y: T")                                          \
-      .Attr("T: {half, float, double, complex64, complex128}") \
+#define UNARY_COMPLEX()                                                  \
+  Input("x: T")                                                          \
+      .Output("y: T")                                                    \
+      .Attr("T: {half, bfloat16, float, double, complex64, complex128}") \
       .SetShapeFn(shape_inference::UnchangedShape)
 
-#define UNARY_GRADIENT_COMPLEX()                               \
-  Input("y: T")                                                \
-      .Input("dy: T")                                          \
-      .Output("z: T")                                          \
-      .Attr("T: {half, float, double, complex64, complex128}") \
+#define UNARY_GRADIENT_COMPLEX()                                         \
+  Input("y: T")                                                          \
+      .Input("dy: T")                                                    \
+      .Output("z: T")                                                    \
+      .Attr("T: {half, bfloat16, float, double, complex64, complex128}") \
       .SetShapeFn(shape_inference::UnchangedShape)
 
 REGISTER_OP("Neg")
@@ -481,7 +483,7 @@ Computes atan of x element-wise.
 REGISTER_OP("IsNan")
     .Input("x: T")
     .Output("y: bool")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns which elements of x are NaN.
@@ -494,7 +496,7 @@ Equivalent to np.isnan
 REGISTER_OP("IsInf")
     .Input("x: T")
     .Output("y: bool")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns which elements of x are Inf.
@@ -507,7 +509,7 @@ Equivalent to np.isinf
 REGISTER_OP("IsFinite")
     .Input("x: T")
     .Output("y: bool")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns which elements of x are finite.
@@ -520,7 +522,9 @@ Equivalent to np.isfinite
 REGISTER_OP("Sign")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {half, float, double, int32, int64, complex64, complex128}")
+    .Attr(
+        "T: {half, bfloat16, float, double, int32, int64, complex64, "
+        "complex128}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns an element-wise indication of the sign of a number.
@@ -533,7 +537,7 @@ For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
 REGISTER_OP("Floor")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns element-wise largest integer not greater than x.
@@ -542,7 +546,7 @@ Returns element-wise largest integer not greater than x.
 REGISTER_OP("Ceil")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns element-wise smallest integer in not less than x.
@@ -551,7 +555,7 @@ Returns element-wise smallest integer in not less than x.
 REGISTER_OP("Rint")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Returns element-wise integer closest to x.
@@ -569,22 +573,23 @@ rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
 
 // Declares cwise binary operations signature: 't, 't -> 't.
 
-#define BINARY_MORE()                                                       \
-  Input("x: T").Input("y: T").Output("z: T").Attr(                          \
-      "T: {half, float, double, uint8, int8, uint16, int16, int32, int64, " \
-      "complex64, complex128}")
+#define BINARY_MORE()                                                          \
+  Input("x: T").Input("y: T").Output("z: T").Attr(                             \
+      "T: {half, bfloat16, float, double, uint8, int8, uint16, int16, int32, " \
+      "int64, complex64, complex128}")
 
-#define BINARY_FEWER()                             \
-  Input("x: T").Input("y: T").Output("z: T").Attr( \
-      "T: {half, float, double, int32, int64, complex64, complex128}")
+#define BINARY_FEWER()                                               \
+  Input("x: T").Input("y: T").Output("z: T").Attr(                   \
+      "T: {half, bfloat16, float, double, int32, int64, complex64, " \
+      "complex128}")
 
 REGISTER_OP("Add")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
     .Attr(
-        "T: {half, float, double, uint8, int8, int16, int32, int64, complex64, "
-        "complex128, string}")
+        "T: {half, bfloat16, float, double, uint8, int8, int16, int32, int64, "
+        "complex64, complex128, string}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
 Returns x + y element-wise.
@@ -600,8 +605,8 @@ REGISTER_OP("AddV2")
     .Input("y: T")
     .Output("z: T")
     .Attr(
-        "T: {half, float, double, uint8, int8, int16, int32, int64, complex64, "
-        "complex128}")
+        "T: {half, bfloat16, float, double, uint8, int8, int16, int32, int64, "
+        "complex64, complex128}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .SetIsAggregate()
     .SetIsCommutative()
@@ -757,7 +762,7 @@ REGISTER_OP("Maximum")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {half, float, double, int32, int64}")
+    .Attr("T: {half, bfloat16, float, double, int32, int64}")
     .SetIsCommutative()
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
@@ -788,7 +793,7 @@ REGISTER_OP("Minimum")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {half, float, double, int32, int64}")
+    .Attr("T: {half, bfloat16, float, double, int32, int64}")
     .SetIsCommutative()
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
@@ -802,7 +807,7 @@ REGISTER_OP("Mod")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {int32, int64, float, double}")
+    .Attr("T: {int32, int64, bfloat16, float, double}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
 Returns element-wise remainder of division. This emulates C semantics in that
@@ -817,7 +822,7 @@ REGISTER_OP("FloorMod")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {int32, int64, float, double}")
+    .Attr("T: {int32, int64, bfloat16, float, double}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
 Returns element-wise remainder of division. When `x < 0` xor `y < 0` is
@@ -832,7 +837,7 @@ REGISTER_OP("TruncateMod")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {int32, int64, float, double}")
+    .Attr("T: {int32, int64, bfloat16, float, double}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
 Returns element-wise remainder of division. This emulates C semantics in that
@@ -847,7 +852,9 @@ REGISTER_OP("Pow")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {half, float, double, int32, int64, complex64, complex128}")
+    .Attr(
+        "T: {half, bfloat16, float, double, int32, int64, complex64, "
+        "complex128}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
 Computes the power of one value to another.
@@ -946,7 +953,7 @@ REGISTER_OP("Atan2")
     .Input("y: T")
     .Input("x: T")
     .Output("z: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .Doc(R"doc(
 Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
@@ -1064,15 +1071,15 @@ Returns the truth value of (x >= y) element-wise.
 
 // --------------------------------------------------------------------------
 
-#define EQUALITY_COMPARISON()                                           \
-  Input("x: T")                                                         \
-      .Input("y: T")                                                    \
-      .Output("z: bool")                                                \
-      .SetIsCommutative()                                               \
-      .Attr(                                                            \
-          "T: {half, float, double, uint8, int8, int16, int32, int64, " \
-          "complex64, "                                                 \
-          "quint8, qint8, qint32, string, bool, complex128}")           \
+#define EQUALITY_COMPARISON()                                              \
+  Input("x: T")                                                            \
+      .Input("y: T")                                                       \
+      .Output("z: bool")                                                   \
+      .SetIsCommutative()                                                  \
+      .Attr(                                                               \
+          "T: {half, bfloat16, float, double, uint8, int8, int16, int32, " \
+          "int64, complex64, quint8, qint8, qint32, string, bool, "        \
+          "complex128}")                                                   \
       .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
 
 REGISTER_OP("Equal")
@@ -1291,7 +1298,7 @@ REGISTER_OP("MatMul")
     .Output("product: T")
     .Attr("transpose_a: bool = false")
     .Attr("transpose_b: bool = false")
-    .Attr("T: {half, float, double, int32, complex64, complex128}")
+    .Attr("T: {half, bfloat16, float, double, int32, complex64, complex128}")
     .SetShapeFn(shape_inference::MatMulShape)
     .Doc(R"doc(
 Multiply the matrix "a" by the matrix "b".
@@ -2105,7 +2112,7 @@ REGISTER_OP("Range")
     .Input("limit: Tidx")
     .Input("delta: Tidx")
     .Output("output: Tidx")
-    .Attr("Tidx: {float, double, int32, int64} = DT_INT32")
+    .Attr("Tidx: {bfloat16, float, double, int32, int64} = DT_INT32")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(c->input(0), 0, &unused),
@@ -2160,7 +2167,7 @@ REGISTER_OP("LinSpace")
     .Input("stop: T")
     .Input("num: Tidx")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 59c4642e4d..102de94787 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -73,7 +73,7 @@ REGISTER_OP("AvgPool")
     .Attr("strides: list(int) >= 4")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::AvgPoolShape)
     .Doc(R"doc(
 Performs average pooling on the input.
@@ -101,7 +101,7 @@ REGISTER_OP("AvgPoolGrad")
     .Attr("strides: list(int) >= 4")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
@@ -300,7 +300,7 @@ REGISTER_OP("FusedBatchNormV2")
     .Output("batch_variance: U")
     .Output("reserve_space_1: U")
     .Output("reserve_space_2: U")
-    .Attr("T: {half, float}")
+    .Attr("T: {half, bfloat16, float}")
     .Attr("U: {float}")
     .Attr("epsilon: float = 0.0001")
     .Attr("data_format: string = 'NHWC'")
@@ -393,7 +393,7 @@ REGISTER_OP("FusedBatchNormGradV2")
     .Output("offset_backprop: U")
     .Output("reserve_space_3: U")
     .Output("reserve_space_4: U")
-    .Attr("T: {half, float}")
+    .Attr("T: {half, bfloat16, float}")
     .Attr("U: {float}")
     .Attr("epsilon: float = 0.0001")
     .Attr("data_format: string = 'NHWC'")
@@ -508,7 +508,7 @@ REGISTER_OP("Conv2D")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {half, float}")
+    .Attr("T: {half, bfloat16, float}")
     .Attr("strides: list(int)")
     .Attr("use_cudnn_on_gpu: bool = true")
     .Attr(GetPaddingAttrString())
@@ -567,7 +567,7 @@ REGISTER_OP("Conv2DBackpropInput")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float}")
+    .Attr("T: {half, bfloat16, float}")
     .Attr("strides: list(int)")
     .Attr("use_cudnn_on_gpu: bool = true")
     .Attr(GetPaddingAttrString())
@@ -615,7 +615,7 @@ REGISTER_OP("Conv2DBackpropFilter")
     .Input("filter_sizes: int32")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float}")
+    .Attr("T: {half, bfloat16, float}")
     .Attr("strides: list(int)")
     .Attr("use_cudnn_on_gpu: bool = true")
     .Attr(GetPaddingAttrString())
@@ -837,7 +837,7 @@ REGISTER_OP("DepthwiseConv2dNative")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
@@ -884,7 +884,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropInput")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
@@ -932,7 +932,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropFilter")
     .Input("filter_sizes: int32")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
@@ -980,7 +980,7 @@ REGISTER_OP("Conv3D")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
@@ -1073,7 +1073,7 @@ REGISTER_OP("Conv3DBackpropInputV2")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
@@ -1116,7 +1116,7 @@ REGISTER_OP("Conv3DBackpropFilterV2")
     .Input("filter_sizes: int32")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
@@ -1163,7 +1163,7 @@ REGISTER_OP("AvgPool3D")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn(shape_inference::Pool3DShape)
     .Doc(R"doc(
 Performs 3D average pooling on the input.
@@ -1190,7 +1190,7 @@ REGISTER_OP("AvgPool3DGrad")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
@@ -1225,7 +1225,7 @@ REGISTER_OP("MaxPool3D")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
-    .Attr("T: {float}")
+    .Attr("T: {bfloat16, float}")
     .SetShapeFn(shape_inference::Pool3DShape)
     .Doc(R"doc(
 Performs 3D max pooling on the input.
@@ -1253,8 +1253,8 @@ REGISTER_OP("MaxPool3DGrad")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
-    .Attr("T: {float} = DT_FLOAT")
-    .Attr("TInput: {float} = DT_FLOAT")
+    .Attr("T: {bfloat16, float} = DT_FLOAT")
+    .Attr("TInput: {bfloat16, float} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 5);
     })
@@ -1319,7 +1319,7 @@ data_format: The data format of the input and output data. With the
 REGISTER_OP("L2Loss")
     .Input("t: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 L2 Loss.
@@ -1341,7 +1341,7 @@ REGISTER_OP("LRN")
     .Attr("bias: float = 1.0")
     .Attr("alpha: float = 1.0")
     .Attr("beta: float = 0.5")
-    .Attr("T: {float, half} = DT_FLOAT")
+    .Attr("T: {half, bfloat16, float} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 4);
     })
@@ -1376,7 +1376,7 @@ REGISTER_OP("LRNGrad")
     .Attr("bias: float = 1.0")
     .Attr("alpha: float = 1.0")
     .Attr("beta: float = 0.5")
-    .Attr("T: {float, half} = DT_FLOAT")
+    .Attr("T: {half, bfloat16, float} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &s));  // input_grads
@@ -1402,8 +1402,8 @@ output: The gradients for LRN.
 
 REGISTER_OP("MaxPool")
     .Attr(
-        "T: {float, double, int32, int64, uint8, int16, int8, uint16, "
-        "half, qint8} = DT_FLOAT")
+        "T: {half, bfloat16, float, double, int32, int64, uint8, int16, int8, "
+        "uint16, qint8} = DT_FLOAT")
     .Attr("ksize: list(int) >= 4")
     .Attr("strides: list(int) >= 4")
     .Attr(GetPaddingAttrString())
@@ -1429,8 +1429,8 @@ output: The max pooled output tensor.
 
 REGISTER_OP("MaxPoolV2")
     .Attr(
-        "T: {float, double, int32, int64, uint8, int16, int8, uint16, "
-        "half, qint8} = DT_FLOAT")
+        "T: {half, bfloat16, float, double, int32, int64, uint8, int16, int8, "
+        "uint16, qint8} = DT_FLOAT")
     .Attr(GetPaddingAttrString())
     .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'")
     .Input("input: T")
@@ -1913,7 +1913,7 @@ backprops: The gradients:
 REGISTER_OP("Elu")
     .Input("features: T")
     .Output("activations: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise.
@@ -1926,7 +1926,7 @@ REGISTER_OP("EluGrad")
     .Input("gradients: T")
     .Input("outputs: T")
     .Output("backprops: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
     .Doc(R"doc(
 Computes gradients for the exponential linear (Elu) operation.
@@ -1940,7 +1940,7 @@ backprops: The gradients: `gradients * (outputs + 1)` if outputs < 0,
 REGISTER_OP("Selu")
     .Input("features: T")
     .Output("activations: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
@@ -1953,7 +1953,7 @@ REGISTER_OP("SeluGrad")
     .Input("gradients: T")
     .Input("outputs: T")
     .Output("backprops: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
     .Doc(R"doc(
 Computes gradients for the scaled exponential linear (Selu) operation.
@@ -2015,7 +2015,7 @@ backprops: The gradients: `gradients / (1 + abs(features)) ** 2`.
 REGISTER_OP("Softmax")
     .Input("logits: T")
     .Output("softmax: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
     })
@@ -2035,7 +2035,7 @@ softmax: Same shape as `logits`.
 REGISTER_OP("LogSoftmax")
     .Input("logits: T")
     .Output("logsoftmax: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
     })
@@ -2057,7 +2057,7 @@ REGISTER_OP("SoftmaxCrossEntropyWithLogits")
     .Input("labels: T")
     .Output("loss: T")
     .Output("backprop: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input));
@@ -2086,7 +2086,7 @@ REGISTER_OP("SparseSoftmaxCrossEntropyWithLogits")
     .Input("labels: Tlabels")
     .Output("loss: T")
     .Output("backprop: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("Tlabels: {int32, int64} = DT_INT64")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle features;
diff --git a/tensorflow/core/ops/random_ops.cc b/tensorflow/core/ops/random_ops.cc
index 5a436fb93e..31d9c82e53 100644
--- a/tensorflow/core/ops/random_ops.cc
+++ b/tensorflow/core/ops/random_ops.cc
@@ -29,7 +29,7 @@ REGISTER_OP("RandomUniform")
     .Output("output: dtype")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
-    .Attr("dtype: {half,float,double}")
+    .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
     .SetShapeFn(shape_inference::RandomShape)
     .Doc(R"doc(
@@ -87,7 +87,7 @@ REGISTER_OP("RandomStandardNormal")
     .Output("output: dtype")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
-    .Attr("dtype: {half,float,double}")
+    .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
     .SetShapeFn(shape_inference::RandomShape)
     .Doc(R"doc(
@@ -115,7 +115,7 @@ REGISTER_OP("ParameterizedTruncatedNormal")
     .Output("output: dtype")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
-    .Attr("dtype: {half,float,double}")
+    .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
     .SetShapeFn(shape_inference::RandomShape)
     .Doc(R"doc(
@@ -145,7 +145,7 @@ REGISTER_OP("TruncatedNormal")
     .Output("output: dtype")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
-    .Attr("dtype: {half,float,double}")
+    .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
     .SetShapeFn(shape_inference::RandomShape)
     .Doc(R"doc(
-- 
GitLab


From e361bf18a3c71a1ec9985a478c419c04852a61d3 Mon Sep 17 00:00:00 2001
From: Koan-Sin Tan <koansin.tan@gmail.com>
Date: Fri, 1 Dec 2017 10:31:22 +0800
Subject: [PATCH 0490/1225] add link to decode_bmp

---
 tensorflow/docs_src/api_guides/python/image.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/docs_src/api_guides/python/image.md b/tensorflow/docs_src/api_guides/python/image.md
index a2c8c3c3c9..051e4547ee 100644
--- a/tensorflow/docs_src/api_guides/python/image.md
+++ b/tensorflow/docs_src/api_guides/python/image.md
@@ -19,6 +19,7 @@ Note: The PNG encode and decode Ops support RGBA, but the conversions Ops
 presently only support RGB, HSV, and GrayScale. Presently, the alpha channel has
 to be stripped from the image and re-attached using slicing ops.
 
+*   @{tf.image.decode_bmp}
 *   @{tf.image.decode_gif}
 *   @{tf.image.decode_jpeg}
 *   @{tf.image.encode_jpeg}
-- 
GitLab


From 87e2f20c8b4f2ece313584c7c3c5588ee6ae5ece Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Thu, 30 Nov 2017 18:56:57 -0800
Subject: [PATCH 0491/1225] Automated g4 rollback of changelist 177505909

PiperOrigin-RevId: 177540002
---
 .../grappler/optimizers/layout_optimizer.cc   |  88 ++++++++----
 .../optimizers/layout_optimizer_test.cc       | 128 ++++++++++++++++++
 2 files changed, 190 insertions(+), 26 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index ef4b015295..cb8411ba5e 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <deque>
 #include <unordered_set>
 
 #include "tensorflow/core/framework/attr_value.pb.h"
@@ -761,24 +762,52 @@ class AgnosticNodeProcessor : public NodeProcessor {
 
   bool IsNodeAfterNCHWToNHWC() const {
     std::set<string> ops_format_agnostic = GetOpsFormatAgnostic();
-    auto node = node_map_->GetNode(node_->name());
-    while (node->input_size() > 0) {
-      int data_input_pos = 0;
-      if (IsConcatV1(*node) || IsSplit(*node)) {
-        data_input_pos = 1;
-      }
-      node = node_map_->GetNode(node->input(data_input_pos));
-      if (IsNodeNCHWToNHWC(node->name())) {
+    std::deque<NodeDef*> queue;
+    auto first_node_pos = DataInputPos(*node_);
+    for (const auto& pos : first_node_pos) {
+      auto input_node = node_map_->GetNode(node_->input(pos));
+      queue.push_back(input_node);
+    }
+    // The code will exit this while loop in one iteration in most cases, as the
+    // graph is already topologically sorted.
+    while (!queue.empty()) {
+      NodeDef* current_node = queue.front();
+      queue.pop_front();
+      if (IsNodeNCHWToNHWC(current_node->name())) {
         return true;
       }
-      bool connected =
-          ops_format_agnostic.find(node->op()) != ops_format_agnostic.end();
-      if (!connected) {
-        return false;
+      // We only continue searching if the path is connected through
+      // format-agnostic nodes.
+      if (ops_format_agnostic.find(current_node->op()) !=
+          ops_format_agnostic.end()) {
+        auto current_node_pos = DataInputPos(*current_node);
+        for (const auto& pos : current_node_pos) {
+          auto input_node = node_map_->GetNode(current_node->input(pos));
+          queue.push_back(input_node);
+        }
       }
     }
     return false;
   }
+
+ private:
+  std::vector<int> DataInputPos(const NodeDef& node) const {
+    std::vector<int> pos;
+    if (IsSplit(node)) {
+      return {1};
+    }
+    if (IsConcatV1(node)) {
+      return {1};
+    }
+    if (IsAdd(node) || IsMul(node) || IsRealDiv(node) ||
+        IsSquaredDifference(node) || IsSub(node)) {
+      return {0, 1};
+    }
+    if (node.input_size() > 0 && !IsControlInput(node.input(0))) {
+      return {0};
+    }
+    return {};
+  }
 };
 
 class AddNProcessor : public AgnosticNodeProcessor {
@@ -801,42 +830,49 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
  public:
   explicit BinaryOpProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {
-    is_4d_with_vector_ = Is4DOperateWithVector();
+    is_4d_with_vector_ = IsNDOperateWithMD(4, 1);
   }
 
  protected:
   bool ShouldProcess() const override {
+    // TODO(yaozhang): Support IsNDOperateWithMD(1, 4): first input is a vector
+    // and the second input is a 4D tensor; and update CustomizedProcessing()
+    // accordingly.
     return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() &&
-           (Is4DOperateWithND(4) || Is4DOperateWithScalar() ||
-            Is4DOperateWithVector()) &&
+           (IsNDOperateWithMD(4, 0) || IsNDOperateWithMD(4, 1) ||
+            IsNDOperateWithMD(4, 4) || IsNDOperateWithMD(0, 4)) &&
            IsOnGPU();
   }
 
   std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos = {0};
-    if (Is4DOperateWithND(4)) {
+    std::vector<int> input_pos;
+    auto input0 = node_map_->GetNode(node_->input(0));
+    auto input1 = node_map_->GetNode(node_->input(1));
+    if (IsDimsFour(*input0)) {
+      input_pos.push_back(0);
+    }
+    if (IsDimsFour(*input1)) {
       input_pos.push_back(1);
     }
     return input_pos;
   }
 
-  bool Is4DOperateWithND(int n) const {
+  bool IsDimsFour(const NodeDef& node) const {
+    return NodeProcessor::IsDimsFour(node) || IsNodeNCHWToNHWC(node.name());
+  }
+
+  bool IsNDOperateWithMD(int n, int m) const {
     auto input0 = node_map_->GetNode(node_->input(0));
     auto input1 = node_map_->GetNode(node_->input(1));
     if (input0 && input1) {
-      return (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) &&
-             ((n == 4)
-                  ? (IsDimsFour(*input1) || IsNodeNCHWToNHWC(input1->name()))
-                  : IsDimsN(*input1, n));
+      bool input0_is_n = (n == 4) ? IsDimsFour(*input0) : IsDimsN(*input0, n);
+      bool input1_is_m = (m == 4) ? IsDimsFour(*input1) : IsDimsN(*input1, m);
+      return input0_is_n && input1_is_m;
     }
     return false;
   }
 
-  bool Is4DOperateWithScalar() const { return Is4DOperateWithND(0); }
-
-  bool Is4DOperateWithVector() const { return Is4DOperateWithND(1); }
-
   NodeDef* AddNodeShapeConst(const string& name, int num_channels) {
     NodeDef* node = graph_->add_node();
     node_map_->AddNode(name, node);
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index e8f7b8ac3c..363b4c3fd8 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -298,6 +298,39 @@ TEST_F(LayoutOptimizerTest, Connectivity) {
   EXPECT_EQ(node_i2_output->input(0), "i1");
 }
 
+TEST_F(LayoutOptimizerTest, ConnectivityBinaryOpWithInputScalarAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto i1 = ops::Identity(s.WithOpName("i1"), conv);
+  auto i2 = ops::Identity(s.WithOpName("i2"), i1);
+  auto scalar_sub = ops::Const(s.WithOpName("scalar_sub"), 3.0f, {});
+  auto sub = ops::Sub(s.WithOpName("sub"), scalar_sub, i2);
+  auto i3 = ops::Identity(s.WithOpName("i3"), sub);
+  auto i4 = ops::Identity(s.WithOpName("i4"), i3);
+  auto i5 = ops::Identity(s.WithOpName("i5"), i4);
+  auto scalar_mul = ops::Const(s.WithOpName("scalar_mul"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), scalar_mul, i5);
+  auto i6 = ops::Identity(s.WithOpName("i6"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  // Make the graph not in topological order to test the handling of multi-hop
+  // connectivity (here we say two nodes are connected if all nodes in the
+  // middle are layout agnostic). If the graph is already in topological order,
+  // the problem is easier, where layout optimizer only needs to check
+  // single-hop connectivity.
+  NodeMap node_map_original(&item.graph);
+  auto node_i1 = node_map_original.GetNode("i1");
+  auto node_mul = node_map_original.GetNode("mul");
+  node_mul->Swap(node_i1);
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map_output(&output);
+  auto mul_node = node_map_output.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "scalar_mul");
+  EXPECT_EQ(mul_node->input(1), "i5");
+}
+
 TEST_F(LayoutOptimizerTest, PreserveFetch) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv = SimpleConv2D(&s, 3, 2, "VALID");
@@ -571,6 +604,101 @@ TEST_F(LayoutOptimizerTest, Sum) {
   */
 }
 
+TEST_F(LayoutOptimizerTest, MulScalarAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), scalar, conv);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "scalar");
+  EXPECT_EQ(mul_node->input(1), "Conv2D");
+}
+
+TEST_F(LayoutOptimizerTest, Mul4DAndScalar) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), conv, scalar);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "Conv2D");
+  EXPECT_EQ(mul_node->input(1), "scalar");
+}
+
+TEST_F(LayoutOptimizerTest, Mul4DAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto i = ops::Identity(s.WithOpName("i"), conv);
+  auto mul = ops::Mul(s.WithOpName("mul"), conv, i);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "Conv2D");
+  EXPECT_EQ(mul_node->input(1), "i");
+}
+
+TEST_F(LayoutOptimizerTest, Mul4DAndVector) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2});
+  auto mul = ops::Mul(s.WithOpName("mul"), conv, vector);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "Conv2D");
+  EXPECT_EQ(mul_node->input(1), "LayoutOptimizerReshapeNHWCToNCHW-mul-vector");
+  auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-vector");
+  Tensor tensor;
+  EXPECT_TRUE(
+      tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor tensor_expected(DT_INT32, {4});
+  test::FillValues<int>(&tensor_expected, {1, 2, 1, 1});
+  test::ExpectTensorEqual<int>(tensor_expected, tensor);
+}
+
+TEST_F(LayoutOptimizerTest, MulVectorAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2});
+  auto mul = ops::Mul(s.WithOpName("mul"), vector, conv);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  // TODO(yaozhang): Support vector as the first input and 4d tensor as the
+  // second input for BinaryOpProcessor.
+  EXPECT_EQ(mul_node->input(0), "vector");
+  EXPECT_EQ(mul_node->input(1),
+            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-mul-1");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 79ad4a423f3e9031eb841a164372cc7476cc112a Mon Sep 17 00:00:00 2001
From: Olivia Nordquist <nolivia@google.com>
Date: Thu, 30 Nov 2017 19:46:05 -0800
Subject: [PATCH 0492/1225] enabling Tensor._set_shape() to work with the C API

PiperOrigin-RevId: 177543170
---
 tensorflow/python/client/tf_session.i         | 43 ++++++++++++++
 tensorflow/python/client/tf_session_helper.cc | 19 +++++++
 tensorflow/python/client/tf_session_helper.h  | 14 +++++
 tensorflow/python/framework/ops.py            | 57 +++++++++++++------
 4 files changed, 117 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index 5fa1a7e8fc..d471a39b69 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -532,6 +532,49 @@ def TF_Reset(target, containers=None, config=None):
 %unignore TF_GraphGetTensorShapeHelper;
 %ignore TF_GraphGetTensorShape;
 
+// We use TF_GraphSetTensorShape_wrapper instead of
+// TF_GraphSetTensorShape
+%ignore TF_GraphSetTensorShape;
+%unignore tensorflow;
+%unignore TF_GraphSetTensorShape_wrapper;
+
+// $input is a Python list of ints to a vector<int> for TF_GraphSetTensorShape_wrapper
+%typemap(in) (const std::vector<int64_t>& dims)
+    (std::vector<int64_t> dims_local){
+  if ($input != Py_None) {
+    if (!PyList_Check($input)) {
+      SWIG_exception_fail(SWIG_TypeError, tensorflow::strings::Printf(
+              "$symname: expected list but got %s ", Py_TYPE($input)->tp_name).c_str());
+    }
+    size_t size = PyList_Size($input);
+    for (int i = 0; i < size; ++i) {
+      PyObject* item = PyList_GetItem($input, i);
+      dims_local.push_back(PyInt_AsLong(item));
+    }
+    $1 = &dims_local;
+  } else {
+    $1 = nullptr;
+  }
+}
+
+// We use TF_GraphGetTensorShape_wrapper instead of
+// TF_GraphGetTensorShape
+%ignore TF_GraphGetTensorShape;
+%unignore tensorflow;
+%unignore TF_GraphGetTensorShape_wrapper;
+
+// Build a Python list of ints and return it.
+%typemap(out) std::vector<int64_t> tensorflow::TF_GraphGetTensorShape_wrapper {
+  $result = PyList_New($1.size());
+  if (!$result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
+  }
+
+  for (size_t i = 0; i < $1.size(); ++i) {
+    PyList_SET_ITEM($result, i, PyInt_FromLong($1[i]));
+  }
+}
+
 %include "tensorflow/python/client/tf_session_helper.h"
 
 %unignoreall
diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index ad982e5dd8..e4bf09a0ca 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc
@@ -407,4 +407,23 @@ TF_Function* TF_GraphToFunction_wrapper(
                             opts, description, out_status);
 }
 
+void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output,
+                                    const std::vector<int64_t>& dims,
+                                    bool unknown_shape, TF_Status* status) {
+  if (unknown_shape) {
+    TF_GraphSetTensorShape(graph, output, nullptr, -1, status);
+    return;
+  }
+  TF_GraphSetTensorShape(graph, output, dims.data(), dims.size(), status);
+}
+
+std::vector<int64_t> TF_GraphGetTensorShape_wrapper(TF_Graph* graph,
+                                                    TF_Output output,
+                                                    int num_dims,
+                                                    TF_Status* status) {
+  std::vector<int64_t> dims(num_dims);
+  TF_GraphGetTensorShape(graph, output, dims.data(), num_dims, status);
+  return dims;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h
index 6ed08d3a58..bb7171db31 100644
--- a/tensorflow/python/client/tf_session_helper.h
+++ b/tensorflow/python/client/tf_session_helper.h
@@ -168,6 +168,20 @@ TF_Function* TF_GraphToFunction_wrapper(
     const std::vector<TF_Output>& inputs, const std::vector<TF_Output>& outputs,
     const NameVector& output_names, const TF_FunctionOptions* opts,
     const char* description, TF_Status* out_status);
+
+// Set the shape of output. If unknown is true, `num_dims` must be set to
+// -1 and `dims` is set to nullptr.
+void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output,
+                                    const std::vector<int64_t>& dims,
+                                    bool unknown_shape, TF_Status* status);
+
+// Return the shape of output. `num_dims` should be the output of
+// TF_GraphGetTensorNumDims. If `num_dims = -1`, this should not be called.
+std::vector<int64_t> TF_GraphGetTensorShape_wrapper(TF_Graph* graph,
+                                                    TF_Output output,
+                                                    int num_dims,
+                                                    TF_Status* status);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 5f945ac133..13e6426447 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -374,6 +374,19 @@ class Tensor(_TensorLike):
       A `TensorShape` representing the shape of this tensor.
 
     """
+    if _USE_C_API:
+      graph = self._op._graph._c_graph  # pylint: disable=protected-access
+      with errors.raise_exception_on_not_ok_status() as status:
+        num_dims = c_api.TF_GraphGetTensorNumDims(graph, self._as_tf_output(),
+                                                  status)
+      if num_dims == -1:
+        dim_list = None
+      else:
+        with errors.raise_exception_on_not_ok_status() as status:
+          dim_list = c_api.TF_GraphGetTensorShape_wrapper(
+              graph, self._as_tf_output(), num_dims, status)
+        dim_list = [None if i == -1 else i for i in dim_list]
+      return tensor_shape.TensorShape(dim_list)
     return self._shape
 
   def __iter__(self):
@@ -393,8 +406,8 @@ class Tensor(_TensorLike):
       yield self[i]
 
   def _shape_as_list(self):
-    if self._shape.ndims is not None:
-      return [dim.value for dim in self._shape.dims]
+    if self.shape.ndims is not None:
+      return [dim.value for dim in self.shape.dims]
     else:
       return None
 
@@ -410,7 +423,7 @@ class Tensor(_TensorLike):
     Returns:
       Integer rank or None
     """
-    return self._shape.ndims
+    return self.shape.ndims
 
   def get_shape(self):
     """Alias of Tensor.shape."""
@@ -441,14 +454,35 @@ class Tensor(_TensorLike):
     ```
 
     Args:
-      shape: A `TensorShape` representing the shape of this tensor.
+      shape: A `TensorShape` representing the shape of this tensor, a
+      `TensorShapeProto`, a list, a tuple, or None.
 
     Raises:
       ValueError: If `shape` is not compatible with the current shape of
         this tensor.
     """
-    # TODO(skyewm): call C API
-    self._shape = self._shape.merge_with(shape)
+    if not _USE_C_API:
+      self._shape = self._shape.merge_with(shape)  # pylint: disable=protected-access
+      return
+    if not isinstance(shape, tensor_shape.TensorShape):
+      shape = tensor_shape.TensorShape(shape)
+    dim_list = []
+    if shape.dims is None:
+      unknown_shape = True
+    else:
+      unknown_shape = False
+      for dim in shape.dims:
+        if dim.value is None:
+          dim_list.append(-1)
+        else:
+          dim_list.append(dim.value)
+    with errors.raise_exception_on_not_ok_status() as status:
+      c_api.TF_GraphSetTensorShape_wrapper(
+          self._op._graph._c_graph,  # pylint: disable=protected-access
+          self._as_tf_output(),
+          dim_list,
+          unknown_shape,
+          status)
 
   @property
   def value_index(self):
@@ -4521,15 +4555,11 @@ def control_dependencies(control_inputs):
   See @{tf.Graph.control_dependencies}
   for more details.
 
-  When eager execution is enabled, any callable object in the `control_inputs`
-  list will be called.
-
   Args:
     control_inputs: A list of `Operation` or `Tensor` objects which
       must be executed or computed before running the operations
       defined in the context.  Can also be `None` to clear the control
-      dependencies. If eager execution is enabled, any callable object in the
-      `control_inputs` list will be called.
+      dependencies.
 
   Returns:
    A context manager that specifies control dependencies for all
@@ -4538,11 +4568,6 @@ def control_dependencies(control_inputs):
   if context.in_graph_mode():
     return get_default_graph().control_dependencies(control_inputs)
   else:
-    if control_inputs:
-      # Excute any pending callables.
-      for control in control_inputs:
-        if callable(control):
-          control()
     return _NullContextmanager()
 
 
-- 
GitLab


From 6968ff07225ad88928922bc83e5522d4515cf963 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Thu, 30 Nov 2017 20:21:19 -0800
Subject: [PATCH 0493/1225] Disable tuning for now. Re-enable when
 measurement-based estimator is ready.

PiperOrigin-RevId: 177545499
---
 .../core/grappler/optimizers/layout_optimizer.cc    | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index cb8411ba5e..e9436638f0 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -1623,20 +1623,13 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   }
 
   TuningConfig config;
-  config.no_gemm = false;
+  config.no_gemm = true;
+  // TODO(yaozhang): Enable tuning with various TuningConfig choices wtih
+  // the measurement-based estimator.
   status = Tune(item, graph_properties, config, output);
-  // This is based on an empirical observation that if the introduced Transpose
-  // nodes is more than 30, not using GEMM implementation would result in better
-  // performance.
-  if (status.ok() && GetNumTranspose(*output) > 30) {
-    config.no_gemm = true;
-    status = Tune(item, graph_properties, config, output);
-  }
-
   if (!status.ok()) {
     *output = item.graph;
   }
-
   return status;
 }
 
-- 
GitLab


From 1ec61fafe13e5edce6e45d5a67e960efb9df618a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 20:30:18 -0800
Subject: [PATCH 0494/1225] Use latest nsync in tensorflow.  Latest nsync
 builds with bazel on FreeBSD.

PiperOrigin-RevId: 177545934
---
 tensorflow/contrib/cmake/external/nsync.cmake         | 2 +-
 tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt | 9 +++++++++
 tensorflow/workspace.bzl                              | 8 ++++----
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/cmake/external/nsync.cmake b/tensorflow/contrib/cmake/external/nsync.cmake
index 155c91cb97..0508006047 100644
--- a/tensorflow/contrib/cmake/external/nsync.cmake
+++ b/tensorflow/contrib/cmake/external/nsync.cmake
@@ -16,7 +16,7 @@ include (ExternalProject)
 
 set(nsync_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/public)
 set(nsync_URL https://github.com/google/nsync)
-set(nsync_TAG 93815892dddafe9146a5f7e7042281d59d0f4323)
+set(nsync_TAG 8502189abfa44c249c01c2cad64e6ed660a9a668)
 set(nsync_BUILD ${CMAKE_CURRENT_BINARY_DIR}/nsync/src/nsync)
 set(nsync_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/nsync/install)
 
diff --git a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt
index 594c2492d4..aaae18a313 100644
--- a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt
@@ -158,12 +158,21 @@ if (NOT "${NSYNC_LANGUAGE}X" STREQUAL "c++11X")
   elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "NetBSDX")
     include_directories ("${PROJECT_SOURCE_DIR}/platform/netbsd")
     set (NSYNC_POSIX ON)
+    set (NSYNC_OS_EXTRA_SRC
+      "platform/posix/src/nsync_semaphore_mutex.c"
+    )
   elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "FreeBSDX")
     include_directories ("${PROJECT_SOURCE_DIR}/platform/freebsd")
     set (NSYNC_POSIX ON)
+    set (NSYNC_OS_EXTRA_SRC
+      "platform/posix/src/nsync_semaphore_mutex.c"
+    )
   elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "OpenBSDX")
     include_directories ("${PROJECT_SOURCE_DIR}/platform/openbsd")
     set (NSYNC_POSIX ON)
+    set (NSYNC_OS_EXTRA_SRC
+      "platform/posix/src/nsync_semaphore_mutex.c"
+    )
   endif ()
 endif ()
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b61012f71e..25e036e24c 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -416,11 +416,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   native.http_archive(
       name = "nsync",
       urls = [
-          "https://mirror.bazel.build/github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz",
-          "https://github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz",
+          "https://mirror.bazel.build/github.com/google/nsync/archive/8502189abfa44c249c01c2cad64e6ed660a9a668.tar.gz",
+          "https://github.com/google/nsync/archive/8502189abfa44c249c01c2cad64e6ed660a9a668.tar.gz",
       ],
-      sha256 = "e3bd4555415ace511338fc27e595351738eea4e9006f1612b76c82914770716b",
-      strip_prefix = "nsync-93815892dddafe9146a5f7e7042281d59d0f4323",
+      sha256 = "51f81ff4202bbb820cdbedc061bd2eb6765f2b5c06489e7a8694bedac329e8f8",
+      strip_prefix = "nsync-8502189abfa44c249c01c2cad64e6ed660a9a668",
   )
 
   native.http_archive(
-- 
GitLab


From 6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 21:08:05 -0800
Subject: [PATCH 0495/1225] [XLA] Hlo parser: support rng and reduce-precision.
 Also simplify the lexer by regarding several things as identifier.

PiperOrigin-RevId: 177548483
---
 .../compiler/xla/service/hlo_instruction.cc   | 30 +++++++
 .../compiler/xla/service/hlo_instruction.h    |  5 +-
 tensorflow/compiler/xla/tools/parser/BUILD    |  2 +-
 .../compiler/xla/tools/parser/hlo_lexer.cc    | 32 ++------
 .../compiler/xla/tools/parser/hlo_lexer.h     | 14 +---
 .../compiler/xla/tools/parser/hlo_parser.cc   | 81 +++++++++++++++++--
 .../xla/tools/parser/hlo_parser_test.cc       | 25 ++++++
 .../compiler/xla/tools/parser/hlo_token.h     |  6 +-
 8 files changed, 149 insertions(+), 46 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index b4bac18bcd..45825c7c76 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2060,6 +2060,14 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
     extra.push_back(
         StrCat("outfeed_config=\"", CEscape(outfeed_config_), "\""));
   }
+  if (opcode() == HloOpcode::kRng) {
+    extra.push_back(
+        StrCat("distribution=", RandomDistributionToString(distribution_)));
+  }
+  if (opcode() == HloOpcode::kReducePrecision) {
+    extra.push_back(StrCat("exponent_bits=", exponent_bits_));
+    extra.push_back(StrCat("mantissa_bits=", mantissa_bits_));
+  }
   return extra;
 }
 
@@ -3029,6 +3037,28 @@ string OpMetadataToString(const OpMetadata& metadata) {
   return Join(result, " ");
 }
 
+string RandomDistributionToString(const RandomDistribution& distribution) {
+  return tensorflow::str_util::Lowercase(RandomDistribution_Name(distribution));
+}
+
+StatusOr<RandomDistribution> StringToRandomDistribution(const string& name) {
+  static std::unordered_map<string, RandomDistribution>* map = [] {
+    static auto* map = new std::unordered_map<string, RandomDistribution>;
+    for (int i = 0; i < RandomDistribution_ARRAYSIZE; i++) {
+      if (RandomDistribution_IsValid(i)) {
+        auto value = static_cast<RandomDistribution>(i);
+        (*map)[RandomDistributionToString(value)] = value;
+      }
+    }
+    return map;
+  }();
+  auto found = map->find(tensorflow::str_util::Lowercase(name));
+  if (found == map->end()) {
+    return InvalidArgument("Unknown distribution");
+  }
+  return found->second;
+}
+
 std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind) {
   return os << ToString(kind);
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 768c027a42..088902e2a7 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -1285,9 +1285,12 @@ string ToString(HloInstruction::FusionKind kind);
 StatusOr<HloInstruction::FusionKind> StringToFusionKind(
     const string& kind_name);
 
-// Custom stringification functions for protos that live inside HloInstruction.
+// Custom (de)stringification functions for protos that live inside
+// HloInstruction.
 string PaddingConfigToString(const PaddingConfig& padding);
 string OpMetadataToString(const OpMetadata& metadata);
+string RandomDistributionToString(const RandomDistribution& distribution);
+StatusOr<RandomDistribution> StringToRandomDistribution(const string& name);
 
 std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind);
 
diff --git a/tensorflow/compiler/xla/tools/parser/BUILD b/tensorflow/compiler/xla/tools/parser/BUILD
index ce936af6c3..97aacf6b39 100644
--- a/tensorflow/compiler/xla/tools/parser/BUILD
+++ b/tensorflow/compiler/xla/tools/parser/BUILD
@@ -34,9 +34,9 @@ cc_library(
     deps = [
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/core:lib",
         "//tensorflow/core:regexp_internal",
     ],
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
index 56744440db..04247594ed 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <unordered_map>
 
-#include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -153,15 +152,15 @@ TokKind HloLexer::LexToken() {
   }
 }
 
-// Lex a shape, name, keyword, opcode, attribute name, or the dim labels
-// pattern.
+// Lex a shape, name, keyword, attribute name, the dim labels pattern, and
+// other identifiers.
 //
 // shape    ::= ([a-zA-Z0-9_]*[0-9]*)\[([0-9,]*)\](?:\s*{([0-9,]*)})?
 // name     ::= [a-zA-Z_][a-zA-Z0-9_.-]*:
 // keyword  ::= HloModule, ENTRY, ...
-// opcode   ::= add, greater-than, ...
 // attribute_name ::= condition, body, dimensions, ...
 // dim_labels_pattern ::= [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
+// identifiers ::= other cases that match [a-zA-Z_][a-zA-Z0-9_.-]*
 TokKind HloLexer::LexIdentifier() {
   {
     auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
@@ -220,20 +219,6 @@ TokKind HloLexer::LexIdentifier() {
 
 #undef KEYWORD
 
-  // See if this is an opcode.
-  auto opcode = StringToHloOpcode(identifier.ToString());
-  if (opcode.ok()) {
-    opcode_val_ = opcode.ValueOrDie();
-    return TokKind::kOpcode;
-  }
-
-  // See if this is an fusion kind.
-  auto kind = xla::StringToFusionKind(identifier.ToString());
-  if (kind.ok()) {
-    fusion_kind_val_ = kind.ValueOrDie();
-    return TokKind::kFusionKind;
-  }
-
   {
     auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
     static LazyRE2 dim_labels_pattern = {
@@ -244,8 +229,9 @@ TokKind HloLexer::LexIdentifier() {
       return TokKind::kDimLabels;
     }
   }
-  current_ptr_ = token_start_ + 1;
-  return TokKind::kError;
+
+  str_val_ = identifier.ToString();
+  return TokKind::kIdent;
 }
 
 // Lex names after a % character.
@@ -428,14 +414,12 @@ string TokKindToString(TokKind kind) {
       return "kDxD";
     case TokKind::kPad:
       return "kPad";
+    case TokKind::kIdent:
+      return "kIdent";
     case TokKind::kString:
       return "kString";
     case TokKind::kShape:
       return "kShape";
-    case TokKind::kOpcode:
-      return "kOpcode";
-    case TokKind::kFusionKind:
-      return "kFusionKind";
     case TokKind::kInt:
       return "kInt";
     case TokKind::kDecimal:
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h
index 5c9d1bf391..9daf6a11d3 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h
+++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h
@@ -18,9 +18,8 @@ limitations under the License.
 
 #include <string>
 
-#include "tensorflow/compiler/xla/service/hlo_instruction.h"
-#include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/tools/parser/hlo_token.h"
+#include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/logging.h"
@@ -48,6 +47,7 @@ class HloLexer {
       case TokKind::kDxD:
       case TokKind::kPad:
       case TokKind::kString:
+      case TokKind::kIdent:
         return str_val_;
       default:
         LOG(FATAL) << "This token does not have string value";
@@ -57,14 +57,6 @@ class HloLexer {
     CHECK(GetKind() == TokKind::kShape);
     return shape_val_;
   }
-  HloOpcode GetOpcodeVal() const {
-    CHECK(GetKind() == TokKind::kOpcode);
-    return opcode_val_;
-  }
-  HloInstruction::FusionKind GetFusionKindVal() const {
-    CHECK(GetKind() == TokKind::kFusionKind);
-    return fusion_kind_val_;
-  }
   int64 GetInt64Val() const {
     CHECK(GetKind() == TokKind::kInt);
     return int64_val_;
@@ -114,8 +106,6 @@ class HloLexer {
   TokKind current_kind_;
   string str_val_;
   Shape shape_val_;
-  HloOpcode opcode_val_;
-  HloInstruction::FusionKind fusion_kind_val_;
   int64 int64_val_;
   double decimal_val_;
 };
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 47979ec6f3..ddc1e69951 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tools/parser/hlo_parser.h"
 
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -104,6 +105,7 @@ class HloParser {
     kPaddingConfig,
     kMetadata,
     kFusionKind,
+    kDistribution,
   };
 
   struct AttrConfig {
@@ -174,6 +176,7 @@ class HloParser {
   bool ParseShape(Shape* result);
   bool ParseOpcode(HloOpcode* result);
   bool ParseFusionKind(HloInstruction::FusionKind* result);
+  bool ParseRandomDistribution(RandomDistribution* result);
   bool ParseInt64(int64* result);
   bool ParseDouble(double* result);
   bool ParseBool(bool* result);
@@ -816,10 +819,36 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
           shape, operands[0], config ? *config : ""));
       break;
     }
+    case HloOpcode::kRng: {
+      optional<RandomDistribution> distribution;
+      attrs["distribution"] = {/*required=*/true, AttrTy::kDistribution,
+                               &distribution};
+      if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction = builder->AddInstruction(
+          HloInstruction::CreateRng(shape, *distribution, operands));
+      break;
+    }
+    case HloOpcode::kReducePrecision: {
+      optional<int64> exponent_bits;
+      optional<int64> mantissa_bits;
+      attrs["exponent_bits"] = {/*required=*/true, AttrTy::kInt64,
+                                &exponent_bits};
+      attrs["mantissa_bits"] = {/*required=*/true, AttrTy::kInt64,
+                                &mantissa_bits};
+      if (!ParseOperands(&operands, /*expected_size=*/1) ||
+          !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction =
+          builder->AddInstruction(HloInstruction::CreateReducePrecision(
+              shape, operands[0], static_cast<int>(*exponent_bits),
+              static_cast<int>(*mantissa_bits)));
+      break;
+    }
     case HloOpcode::kConditional:
     case HloOpcode::kCustomCall:
-    case HloOpcode::kReducePrecision:
-    case HloOpcode::kRng:
     case HloOpcode::kTrace:
       return TokenError(StrCat("parsing not yet implemented for op: ",
                                HloOpcodeString(opcode)));
@@ -1548,6 +1577,15 @@ bool HloParser::ParseAttributeHelper(
         static_cast<optional<OpMetadata>*>(attr_out_ptr)->emplace(result);
         return true;
       }
+      case AttrTy::kDistribution: {
+        RandomDistribution result;
+        if (!ParseRandomDistribution(&result)) {
+          return false;
+        }
+        static_cast<optional<RandomDistribution>*>(attr_out_ptr)
+            ->emplace(result);
+        return true;
+      }
     }
   }();
   if (!success) {
@@ -2024,20 +2062,51 @@ bool HloParser::ParseMetadata(OpMetadata* metadata) {
 
 bool HloParser::ParseOpcode(HloOpcode* result) {
   VLOG(1) << "ParseOpcode";
-  if (lexer_.GetKind() != TokKind::kOpcode) {
+  if (lexer_.GetKind() != TokKind::kIdent) {
     return TokenError("expects opcode");
   }
-  *result = lexer_.GetOpcodeVal();
+  string val = lexer_.GetStrVal();
+  auto status_or_result = StringToHloOpcode(val);
+  if (!status_or_result.ok()) {
+    return TokenError(
+        Printf("expects opcode but sees: %s, error: %s", val.c_str(),
+               status_or_result.status().error_message().c_str()));
+  }
+  *result = status_or_result.ValueOrDie();
   lexer_.Lex();
   return true;
 }
 
 bool HloParser::ParseFusionKind(HloInstruction::FusionKind* result) {
   VLOG(1) << "ParseFusionKind";
-  if (lexer_.GetKind() != TokKind::kFusionKind) {
+  if (lexer_.GetKind() != TokKind::kIdent) {
     return TokenError("expects fusion kind");
   }
-  *result = lexer_.GetFusionKindVal();
+  string val = lexer_.GetStrVal();
+  auto status_or_result = StringToFusionKind(val);
+  if (!status_or_result.ok()) {
+    return TokenError(
+        Printf("expects fusion kind but sees: %s, error: %s", val.c_str(),
+               status_or_result.status().error_message().c_str()));
+  }
+  *result = status_or_result.ValueOrDie();
+  lexer_.Lex();
+  return true;
+}
+
+bool HloParser::ParseRandomDistribution(RandomDistribution* result) {
+  VLOG(1) << "ParseRandomDistribution";
+  if (lexer_.GetKind() != TokKind::kIdent) {
+    return TokenError("expects random distribution");
+  }
+  string val = lexer_.GetStrVal();
+  auto status_or_result = StringToRandomDistribution(val);
+  if (!status_or_result.ok()) {
+    return TokenError(
+        Printf("expects random distribution but sees: %s, error: %s",
+               val.c_str(), status_or_result.status().error_message().c_str()));
+  }
+  *result = status_or_result.ValueOrDie();
   lexer_.Lex();
   return true;
 }
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 90cdb87a1e..69d48d65bc 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -654,6 +654,31 @@ ENTRY %InfeedToOutfeed () -> (u32[3], pred[]) {
   %outfeed.1 = () outfeed((u32[3]{0}, pred[]) %infeed.1)
 }
 
+)"
+},
+// Rng
+{
+"Rng",
+R"(HloModule rng_module:
+
+ENTRY %Rng () -> f32[8] {
+  %constant = f32[] constant(0)
+  %constant.1 = f32[] constant(1)
+  ROOT %rng = f32[8]{0} rng(f32[] %constant, f32[] %constant.1), distribution=rng_uniform
+}
+
+)"
+},
+// Reduce precision
+{
+"ReducePrevison",
+R"(HloModule reduce_precision:
+
+ENTRY %ReducePrecision () -> f32[1] {
+  %constant = f32[1]{0} constant({3.14159})
+  ROOT %reduce-precision = f32[1]{0} reduce-precision(f32[1]{0} %constant), exponent_bits=8, mantissa_bits=10
+}
+
 )"
 }
   });
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h
index 07e48804d0..7928bee5c2 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_token.h
+++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h
@@ -18,6 +18,9 @@ limitations under the License.
 
 #include <string>
 
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/platform/types.h"
+
 namespace xla {
 namespace tools {
 
@@ -60,10 +63,9 @@ enum class TokKind {
   kDimLabels,      // [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
   kDxD,            // [0-9]+(x[0-9]+)+
   kPad,            // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*
+  kIdent,          // other identifiers
   kString,         // "abcd\"\n"
   kShape,          // f32[2,3]{1,0}
-  kOpcode,         // add
-  kFusionKind,     // kLoop, kOutput, ...
   kInt,            // 42
   kDecimal,        // 4.2
 };
-- 
GitLab


From cae852a32ee8ef86d4a58512c1177359c5bfd465 Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Thu, 30 Nov 2017 21:40:48 -0800
Subject: [PATCH 0496/1225] Change bazel-mirror to mirror.bazel (#15007)

---
 tensorflow/contrib/makefile/download_dependencies.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index 19e25ad767..904118e2d9 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -26,13 +26,13 @@ if [ ! -f $BZL_FILE_PATH ]; then
   exit 1;
 fi
 
-EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
+EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
 GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
 NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
-FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
+FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
 DOUBLE_CONVERSION_URL="$(grep -o "https.*google/double-conversion.*\.zip" "${BZL_FILE_PATH}" | head -n1)"
 ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
 
-- 
GitLab


From e747fc911f0dc6f1bf0b9c0ac0b57ad1a704c542 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 23:20:35 -0800
Subject: [PATCH 0497/1225] Add additional linkopts argument to
 tf_custom_op_library.

PiperOrigin-RevId: 177555877
---
 tensorflow/tensorflow.bzl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 76ef59484f..709a2d46e1 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1197,7 +1197,7 @@ check_deps = rule(
 
 # Helper to build a dynamic library (.so) from the sources containing
 # implementations of custom ops and kernels.
-def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
+def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]):
   cuda_deps = [
       clean_dep("//tensorflow/core:stream_executor_headers_lib"),
       "@local_config_cuda//cuda:cuda_headers",
@@ -1226,7 +1226,7 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
       deps=deps + if_cuda(cuda_deps),
       data=[name + "_check_deps"],
       copts=tf_copts(),
-      linkopts=select({
+      linkopts=linkopts + select({
           "//conditions:default": [
               "-lm",
           ],
-- 
GitLab


From 370e521762f3cbd558a7e56992e3b062236b626f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 23:46:38 -0800
Subject: [PATCH 0498/1225] Adds a fisher block for fully connected recurrent
 layers.

`FullyConnectedSeriesFB` uses an approximation to the Fisher information
matrix designed for RNNs. This CL also adds support for dtypes
other than `float32` to `fisher_factors`.

PiperOrigin-RevId: 177558080
---
 .../python/kernel_tests/fisher_blocks_test.py |  54 ++-
 .../kernel_tests/fisher_factors_test.py       | 117 +++++-
 tensorflow/contrib/kfac/python/ops/BUILD      |   1 +
 .../contrib/kfac/python/ops/fisher_blocks.py  | 210 +++++++++-
 .../contrib/kfac/python/ops/fisher_factors.py | 383 ++++++++++++++++--
 tensorflow/contrib/kfac/python/ops/utils.py   |  31 +-
 6 files changed, 751 insertions(+), 45 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py
index 5f2b5c6cac..bdc950a4e6 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py
@@ -301,8 +301,7 @@ class FullyConnectedDiagonalFB(test.TestCase):
     multiply_result_big, multiply_inverse_result_big = self.runFisherBlockOps(
         self.w, [self.inputs], [self.outputs], [self.output_grads])
     multiply_result_small, multiply_inverse_result_small = (
-        self.runFisherBlockOps(self.w,
-                               np.split(self.inputs, 2),
+        self.runFisherBlockOps(self.w, np.split(self.inputs, 2),
                                np.split(self.outputs, 2),
                                np.split(self.output_grads, 2)))
 
@@ -584,8 +583,7 @@ class ConvDiagonalFBTest(test.TestCase):
     multiply_result_big, multiply_inverse_result_big = self.runFisherBlockOps(
         self.w, [self.inputs], [self.outputs], [self.output_grads])
     multiply_result_small, multiply_inverse_result_small = (
-        self.runFisherBlockOps(self.w,
-                               np.split(self.inputs, 2),
+        self.runFisherBlockOps(self.w, np.split(self.inputs, 2),
                                np.split(self.outputs, 2),
                                np.split(self.output_grads, 2)))
 
@@ -608,8 +606,9 @@ class ConvDiagonalFBTest(test.TestCase):
         self.kernel_size, self.kernel_size, self.input_channels + 1,
         self.output_channels
     ])
-    expected_result = (expected_result[:, :, 0:-1, :], np.reshape(
-        expected_result[:, :, -1, :], [self.output_channels]))
+    expected_result = (expected_result[:, :, 0:-1, :],
+                       np.reshape(expected_result[:, :, -1, :],
+                                  [self.output_channels]))
 
     self.assertEqual(len(result), 2)
     self.assertAllClose(expected_result[0], result[0])
@@ -692,8 +691,8 @@ class ConvKFCBasicFBTest(test.TestCase):
       sess.run(block._input_factor.make_inverse_update_ops())
       sess.run(block._output_factor.make_inverse_update_ops())
 
-      vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32), np.arange(
-          2, 4).reshape(2, 1).astype(np.float32))
+      vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32),
+                np.arange(2, 4).reshape(2, 1).astype(np.float32))
       output = block.multiply_inverse((array_ops.constant(vector[0]),
                                        array_ops.constant(vector[1])))
 
@@ -776,11 +775,50 @@ class ConvKFCBasicFBTest(test.TestCase):
       self.assertAllClose(output_flat, explicit)
 
 
+class FullyConnectedSeriesFBTest(test.TestCase):
+
+  def testFullyConnectedSeriesFBInit(self):
+    with ops.Graph().as_default():
+      random_seed.set_random_seed(200)
+      inputs = array_ops.constant([1., 2.])
+      outputs = array_ops.constant([3., 4.])
+      block = fb.FullyConnectedSeriesFB(
+          lc.LayerCollection(), inputs=[inputs], outputs=[outputs])
+      self.assertAllEqual([outputs], block.tensors_to_compute_grads())
+
+  def testInstantiateFactorsHasBias(self):
+    with ops.Graph().as_default():
+      random_seed.set_random_seed(200)
+      inputs = array_ops.constant([[1., 2.], [3., 4.]])
+      outputs = array_ops.constant([[3., 4.], [5., 6.]])
+      block = fb.FullyConnectedSeriesFB(
+          lc.LayerCollection(),
+          inputs=[inputs],
+          outputs=[outputs],
+          has_bias=True)
+      grads = outputs**2
+      block.instantiate_factors(((grads,),), 0.5)
+
+  def testInstantiateFactorsNoBias(self):
+    with ops.Graph().as_default():
+      random_seed.set_random_seed(200)
+      inputs = array_ops.constant([[1., 2.], [3., 4.]])
+      outputs = array_ops.constant([[3., 4.], [5., 6.]])
+      block = fb.FullyConnectedSeriesFB(
+          lc.LayerCollection(),
+          inputs=[inputs],
+          outputs=[outputs],
+          has_bias=False)
+      grads = outputs**2
+      block.instantiate_factors(((grads,),), 0.5)
+
+
 def as_tensors(tensor_or_tuple):
   """Converts a potentially nested tuple of np.array to Tensors."""
   if isinstance(tensor_or_tuple, (tuple, list)):
     return tuple(as_tensors(t) for t in tensor_or_tuple)
   return ops.convert_to_tensor(tensor_or_tuple)
 
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
index 5e2ce5a309..f4a017fc77 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
@@ -67,6 +67,10 @@ class FisherFactorTestingDummy(ff.FisherFactor):
   def _num_sources(self):
     return 1
 
+  @property
+  def _dtype(self):
+    return dtypes.float32
+
   def _compute_new_cov(self):
     raise NotImplementedError
 
@@ -94,6 +98,10 @@ class InverseProvidingFactorTestingDummy(ff.InverseProvidingFactor):
   def _num_sources(self):
     return 1
 
+  @property
+  def _dtype(self):
+    return dtypes.float32
+
   def _compute_new_cov(self):
     raise NotImplementedError
 
@@ -121,7 +129,7 @@ class NumericalUtilsTest(test.TestCase):
 
       normalizer = 10.
       x = npr.randn(100, 3)
-      cov = ff._compute_cov(array_ops.constant(x), normalizer)
+      cov = ff._compute_cov(array_ops.constant(x), normalizer=normalizer)
       np_cov = np.dot(x.T, x) / normalizer
 
       self.assertAllClose(sess.run(cov), np_cov)
@@ -267,13 +275,13 @@ class InverseProvidingFactorTest(test.TestCase):
       for i in range(1, ff.EIGENVALUE_DECOMPOSITION_THRESHOLD + 1):
         factor.register_damped_inverse(1. / i)
       ops = factor.make_inverse_update_ops()
-      self.assertEqual(ff.EIGENVALUE_DECOMPOSITION_THRESHOLD, len(ops))
+      self.assertEqual(1, len(ops))
 
       sess.run(tf_variables.global_variables_initializer())
       new_invs = []
+      sess.run(ops)
       for i in range(1, ff.EIGENVALUE_DECOMPOSITION_THRESHOLD + 1):
         # The inverse op will assign the damped inverse of cov to the inv var.
-        sess.run(ops[i - 1])
         new_invs.append(sess.run(factor._inverses_by_damping[1. / i]))
       # We want to see that the new invs are all different from each other.
       for i in range(len(new_invs)):
@@ -331,6 +339,16 @@ class FullFactorTest(test.TestCase):
       factor = ff.FullFactor((tensor,), 32)
       self.assertEqual([6, 6], factor.get_cov().get_shape().as_list())
 
+  def testFullFactorInitFloat64(self):
+    with tf_ops.Graph().as_default():
+      dtype = dtypes.float64_ref
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c')
+      factor = ff.FullFactor((tensor,), 32)
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual([6, 6], cov.get_shape().as_list())
+
   def testMakeCovarianceUpdateOp(self):
     with tf_ops.Graph().as_default(), self.test_session() as sess:
       random_seed.set_random_seed(200)
@@ -351,6 +369,16 @@ class NaiveDiagonalFactorTest(test.TestCase):
       factor = ff.NaiveDiagonalFactor((tensor,), 32)
       self.assertEqual([6, 1], factor.get_cov().get_shape().as_list())
 
+  def testNaiveDiagonalFactorInitFloat64(self):
+    with tf_ops.Graph().as_default():
+      dtype = dtypes.float64_ref
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c')
+      factor = ff.NaiveDiagonalFactor((tensor,), 32)
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual([6, 1], cov.get_shape().as_list())
+
   def testMakeCovarianceUpdateOp(self):
     with tf_ops.Graph().as_default(), self.test_session() as sess:
       random_seed.set_random_seed(200)
@@ -364,18 +392,25 @@ class NaiveDiagonalFactorTest(test.TestCase):
 
 class FullyConnectedKroneckerFactorTest(test.TestCase):
 
-  def _testFullyConnectedKroneckerFactorInit(self, has_bias, final_shape):
+  def _testFullyConnectedKroneckerFactorInit(self,
+                                             has_bias,
+                                             final_shape,
+                                             dtype=dtypes.float32_ref):
     with tf_ops.Graph().as_default():
       random_seed.set_random_seed(200)
-      tensor = array_ops.ones((2, 3), name='a/b/c')
+      tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c')
       factor = ff.FullyConnectedKroneckerFactor((tensor,), has_bias=has_bias)
-      self.assertEqual(final_shape, factor.get_cov().get_shape().as_list())
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual(final_shape, cov.get_shape().as_list())
 
   def testFullyConnectedKroneckerFactorInitNoBias(self):
-    self._testFullyConnectedKroneckerFactorInit(False, [3, 3])
+    for dtype in (dtypes.float32_ref, dtypes.float64_ref):
+      self._testFullyConnectedKroneckerFactorInit(False, [3, 3], dtype=dtype)
 
   def testFullyConnectedKroneckerFactorInitWithBias(self):
-    self._testFullyConnectedKroneckerFactorInit(True, [4, 4])
+    for dtype in (dtypes.float32_ref, dtypes.float64_ref):
+      self._testFullyConnectedKroneckerFactorInit(True, [4, 4], dtype=dtype)
 
   def testMakeCovarianceUpdateOpWithBias(self):
     with tf_ops.Graph().as_default(), self.test_session() as sess:
@@ -418,6 +453,18 @@ class ConvInputKroneckerFactorTest(test.TestCase):
       self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1],
                        factor.get_cov().get_shape().as_list())
 
+  def testConvInputKroneckerFactorInitFloat64(self):
+    with tf_ops.Graph().as_default():
+      dtype = dtypes.float64_ref
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c')
+      factor = ff.ConvInputKroneckerFactor(
+          tensor, (1, 2, 3, 4), 3, 2, has_bias=True)
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1],
+                       cov.get_shape().as_list())
+
   def testMakeCovarianceUpdateOpWithBias(self):
     with tf_ops.Graph().as_default(), self.test_session() as sess:
       random_seed.set_random_seed(200)
@@ -453,6 +500,16 @@ class ConvOutputKroneckerFactorTest(test.TestCase):
       factor = ff.ConvOutputKroneckerFactor((tensor,))
       self.assertEqual([5, 5], factor.get_cov().get_shape().as_list())
 
+  def testConvOutputKroneckerFactorInitFloat64(self):
+    with tf_ops.Graph().as_default():
+      dtype = dtypes.float64_ref
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3, 4, 5), dtype=dtype, name='a/b/c')
+      factor = ff.ConvOutputKroneckerFactor((tensor,))
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual([5, 5], cov.get_shape().as_list())
+
   def testConvOutputKroneckerFactorInitNotEnoughDims(self):
     with tf_ops.Graph().as_default():
       random_seed.set_random_seed(200)
@@ -471,5 +528,49 @@ class ConvOutputKroneckerFactorTest(test.TestCase):
       self.assertAllClose([[43, 46.5], [46.5, 51.5]], new_cov)
 
 
+class FullyConnectedMultiKFTest(test.TestCase):
+
+  def testFullyConnectedMultiKFInit(self):
+    with tf_ops.Graph().as_default():
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3), name='a/b/c')
+      tensor_list = [tensor]
+      factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=False)
+      self.assertEqual([3, 3], factor.get_cov().get_shape().as_list())
+
+  def testFullyConnectedMultiKFInitFloat64(self):
+    with tf_ops.Graph().as_default():
+      dtype = dtypes.float64_ref
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c')
+      tensor_list = [tensor]
+      factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=False)
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual([3, 3], cov.get_shape().as_list())
+
+  def testMakeCovarianceUpdateOpWithBias(self):
+    with tf_ops.Graph().as_default(), self.test_session() as sess:
+      random_seed.set_random_seed(200)
+      tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c')
+      tensor_list = [tensor]
+      factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=True)
+
+      sess.run(tf_variables.global_variables_initializer())
+      new_cov = sess.run(factor.make_covariance_update_op(.5))
+      self.assertAllClose([[3, 3.5, 1], [3.5, 5.5, 1.5], [1, 1.5, 1]], new_cov)
+
+  def testMakeCovarianceUpdateOpNoBias(self):
+    with tf_ops.Graph().as_default(), self.test_session() as sess:
+      random_seed.set_random_seed(200)
+      tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c')
+      tensor_list = [tensor]
+      factor = ff.FullyConnectedMultiKF((tensor_list,))
+
+      sess.run(tf_variables.global_variables_initializer())
+      new_cov = sess.run(factor.make_covariance_update_op(.5))
+      self.assertAllClose([[3, 3.5], [3.5, 5.5]], new_cov)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/kfac/python/ops/BUILD b/tensorflow/contrib/kfac/python/ops/BUILD
index b2272a4cee..3d731c7bc2 100644
--- a/tensorflow/contrib/kfac/python/ops/BUILD
+++ b/tensorflow/contrib/kfac/python/ops/BUILD
@@ -38,6 +38,7 @@ py_library(
         ":utils",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:init_ops",
         "//tensorflow/python:linalg_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:special_math_ops",
diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
index e822a1213a..cf734d56ad 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
@@ -38,6 +38,7 @@ from __future__ import division
 from __future__ import print_function
 
 import abc
+import enum  # pylint: disable=g-bad-import-order
 
 import six
 
@@ -153,7 +154,7 @@ class FullFB(FisherBlock):
     self._factor.register_damped_inverse(damping)
 
   def multiply_inverse(self, vector):
-    inverse = self._factor.get_inverse(self._damping)
+    inverse = self._factor.get_damped_inverse(self._damping)
     out_flat = math_ops.matmul(inverse, utils.tensors_to_column(vector))
     return utils.column_to_tensors(vector, out_flat)
 
@@ -411,7 +412,7 @@ class ConvDiagonalFB(FisherBlock):
         (self._strides[1] * self._strides[2]))
 
     if NORMALIZE_DAMPING_POWER:
-      damping /= self._num_locations ** NORMALIZE_DAMPING_POWER
+      damping /= self._num_locations**NORMALIZE_DAMPING_POWER
     self._damping = damping
 
     self._factor = self._layer_collection.make_or_get_factor(
@@ -487,8 +488,9 @@ class KroneckerProductFB(FisherBlock):
     return 1.0
 
   def multiply_inverse(self, vector):
-    left_factor_inv = self._input_factor.get_inverse(self._input_damping)
-    right_factor_inv = self._output_factor.get_inverse(self._output_damping)
+    left_factor_inv = self._input_factor.get_damped_inverse(self._input_damping)
+    right_factor_inv = self._output_factor.get_damped_inverse(
+        self._output_damping)
     reshaped_vector = utils.layer_params_to_mat2d(vector)
     reshaped_out = math_ops.matmul(left_factor_inv,
                                    math_ops.matmul(reshaped_vector,
@@ -720,3 +722,203 @@ def _concat_along_batch_dim(tensor_list):
 def _num_conv_locations(input_shape, strides):
   """Returns the number of locations a Conv kernel is applied to."""
   return input_shape[1] * input_shape[2] // (strides[1] * strides[2])
+
+
+class SeriesFBApproximation(enum.IntEnum):
+  """See FullyConnectedSeriesFB.__init__ for description and usage."""
+  option1 = 1
+  option2 = 2
+
+
+class FullyConnectedSeriesFB(FisherBlock):
+  """FisherBlock for fully-connected RNN cells.
+
+  See the following preprint for details:
+    https://openreview.net/pdf?id=HyMTkQZAb
+
+  See the end of the appendix of the paper for a pseudo-code of the
+  algorithm being implemented by multiply_inverse here.  Note that we are
+  using pre-computed versions of certain matrix-matrix products to speed
+  things up.  This is explicitly explained wherever it is done.
+  """
+
+  def __init__(self,
+               layer_collection,
+               inputs,
+               outputs,
+               has_bias=False,
+               option=SeriesFBApproximation.option2):
+    """Constructs a new `FullyConnectedSeriesFB`.
+
+    Args:
+      layer_collection: The collection of all layers in the K-FAC approximate
+        Fisher information matrix to which this FisherBlock belongs.
+      inputs: List of tensors of shape [batch_size, input_size].
+        Inputs to the layer.
+      outputs: List of tensors of shape [batch_size, input_size].
+        Outputs of the layer (before activations).
+      has_bias: Whether the layer includes a bias parameter.
+      option: A `SeriesFBApproximation` specifying the simplifying assumption
+        to be used in this block. `option1` approximates the cross-covariance
+        over time as a symmetric matrix, while `option2` makes
+        the assumption that training sequences are infinitely long. See section
+        3.5 of the paper for more details.
+    """
+
+    assert len(inputs) == len(outputs)
+    # We need to make sure inputs and outputs are tuples and not lists so that
+    # they get hashed by layer_collection.make_or_get_factor properly.
+    self._inputs = tuple(inputs)
+    self._outputs = tuple(outputs)
+    self._has_bias = has_bias
+    self._num_timesteps = len(inputs)
+    self._option = option
+
+    super(FullyConnectedSeriesFB, self).__init__(layer_collection)
+
+  @property
+  def num_registered_minibatches(self):
+    # TODO(b/69411207): Add support for registering additional minibatches.
+    return 1
+
+  def instantiate_factors(self, grads_list, damping):
+
+    self._input_factor = self._layer_collection.make_or_get_factor(
+        fisher_factors.FullyConnectedMultiKF, ((self._inputs,), self._has_bias))
+
+    self._output_factor = self._layer_collection.make_or_get_factor(
+        fisher_factors.FullyConnectedMultiKF, (grads_list,))
+
+    damping /= self._num_timesteps**NORMALIZE_DAMPING_POWER
+
+    pi = utils.compute_pi(self._input_factor.get_cov(),
+                          self._output_factor.get_cov())
+
+    self._damping_input = (damping**0.5) * pi
+    self._damping_output = (damping**0.5) / pi
+
+    if self._option == SeriesFBApproximation.option1:
+      self._input_factor.register_option1quants(self._damping_input)
+      self._output_factor.register_option1quants(self._damping_output)
+    elif self._option == SeriesFBApproximation.option2:
+      self._input_factor.register_option2quants(self._damping_input)
+      self._output_factor.register_option2quants(self._damping_output)
+    else:
+      raise ValueError(
+          "Unrecognized FullyConnectedSeriesFB approximation: {}".format(
+              self._option))
+
+  def multiply_inverse(self, vector):
+    # pylint: disable=invalid-name
+
+    Z = utils.layer_params_to_mat2d(vector)
+
+    # Derivations were done for "batch_dim==1" case so we need to convert to
+    # that orientation:
+    Z = array_ops.transpose(Z)
+
+    if self._option == SeriesFBApproximation.option1:
+
+      # Note that L_A = A0^(-1/2) * U_A and L_G = G0^(-1/2) * U_G.
+      L_A, psi_A = self._input_factor.get_option1quants(self._damping_input)
+      L_G, psi_G = self._output_factor.get_option1quants(self._damping_output)
+
+      def gamma(x):
+        # We are assuming that each case has the same number of time-steps.
+        # If this stops being the case one shouldn't simply replace this T
+        # with its average value.  Instead, one needs to go back to the
+        # definition of the gamma function from the paper.
+        T = self._num_timesteps
+        return (1 - x)**2 / (T * (1 - x**2) - 2 * x * (1 - x**T))
+
+      # Y = gamma( psi_G*psi_A^T ) (computed element-wise)
+      # Even though Y is Z-independent we are recomputing it from the psi's
+      # each since Y depends on both A and G quantities, and it is relatively
+      # cheap to compute.
+      Y = gamma(array_ops.reshape(psi_G, [int(psi_G.shape[0]), -1]) * psi_A)
+
+      # Z = L_G^T * Z * L_A
+      # This is equivalent to the following computation from the original
+      # pseudo-code:
+      # Z = G0^(-1/2) * Z * A0^(-1/2)
+      # Z = U_G^T * Z * U_A
+      Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A), transpose_a=True)
+
+      # Z = Z .* Y
+      Z *= Y
+
+      # Z = L_G * Z * L_A^T
+      # This is equivalent to the following computation from the original
+      # pseudo-code:
+      # Z = U_G * Z * U_A^T
+      # Z = G0^(-1/2) * Z * A0^(-1/2)
+      Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A, transpose_b=True))
+
+    elif self._option == SeriesFBApproximation.option2:
+
+      # Note that P_A = A_1^T * A_0^(-1) and P_G = G_1^T * G_0^(-1),
+      # and K_A = A_0^(-1/2) * E_A and K_G = G_0^(-1/2) * E_G.
+      P_A, K_A, mu_A = self._input_factor.get_option2quants(self._damping_input)
+      P_G, K_G, mu_G = self._output_factor.get_option2quants(
+          self._damping_output)
+
+      # Our approach differs superficially from the pseudo-code in the paper
+      # in order to reduce the total number of matrix-matrix multiplies.
+      # In particular, the first three computations in the pseudo code are
+      # Z = G0^(-1/2) * Z * A0^(-1/2)
+      # Z = Z - hPsi_G^T * Z * hPsi_A
+      # Z = E_G^T * Z * E_A
+      # Noting that hPsi = C0^(-1/2) * C1 * C0^(-1/2), so that
+      # C0^(-1/2) * hPsi = C0^(-1) * C1 * C0^(-1/2) = P^T * C0^(-1/2)
+      # the entire computation can be written as
+      # Z = E_G^T * (G0^(-1/2) * Z * A0^(-1/2)
+      #     - hPsi_G^T * G0^(-1/2) * Z * A0^(-1/2) * hPsi_A) * E_A
+      #   = E_G^T * (G0^(-1/2) * Z * A0^(-1/2)
+      #     - G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2)) * E_A
+      #   = E_G^T * G0^(-1/2) * Z * A0^(-1/2) * E_A
+      #     -  E_G^T* G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2) * E_A
+      #   = K_G^T * Z * K_A  -  K_G^T * P_G * Z * P_A^T * K_A
+      # This final expression is computed by the following two lines:
+      # Z = Z - P_G * Z * P_A^T
+      Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A, transpose_b=True))
+      # Z = K_G^T * Z * K_A
+      Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A), transpose_a=True)
+
+      # Z = Z ./ (1*1^T - mu_G*mu_A^T)
+      # Be careful with the outer product.  We don't want to accidentally
+      # make it an inner-product instead.
+      tmp = 1.0 - array_ops.reshape(mu_G, [int(mu_G.shape[0]), -1]) * mu_A
+      # Prevent some numerical issues by setting 0 eigs to 1.0
+      tmp += 1.0 * array_ops.cast(math_ops.equal(tmp, 0.0), dtype=tmp.dtype)
+      Z /= tmp
+
+      # We now perform the transpose/reverse version of the operations
+      # derived above, whose derivation from the original pseudo-code is
+      # analgous.
+      # Z = K_G * Z * K_A^T
+      Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A, transpose_b=True))
+
+      # Z = Z - P_G^T * Z * P_A
+      Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A), transpose_a=True)
+
+      # Z = normalize (1/E[T]) * Z
+      # Note that this normalization is done because we compute the statistics
+      # by averaging, not summing, over time. (And the gradient is presumably
+      # summed over time, not averaged, and thus their scales are different.)
+      Z /= array_ops.cast(self._num_timesteps, Z.dtype)
+
+    # Convert back to the "batch_dim==0" orientation.
+    Z = array_ops.transpose(Z)
+
+    return utils.mat2d_to_layer_params(vector, Z)
+
+    # pylint: enable=invalid-name
+
+  def multiply(self, vector):
+    raise NotImplementedError
+
+  def tensors_to_compute_grads(self):
+    return self._outputs
+
+  def num_inputs(self):
+    return len(self._inputs)
diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
index 6c1dd0ae40..ff8636785a 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
@@ -27,6 +27,8 @@ import six
 from tensorflow.contrib.kfac.python.ops import utils
 from tensorflow.python.framework import ops as tf_ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import special_math_ops
@@ -101,7 +103,7 @@ def diagonal_covariance_initializer(shape, dtype, partition_info):  # pylint: di
   return array_ops.ones(shape, dtype)
 
 
-def _compute_cov(tensor, normalizer=None):
+def _compute_cov(tensor, tensor_right=None, normalizer=None):
   """Compute the empirical second moment of the rows of a 2D Tensor.
 
   This function is meant to be applied to random matrices for which the true row
@@ -109,6 +111,8 @@ def _compute_cov(tensor, normalizer=None):
 
   Args:
     tensor: A 2D Tensor.
+    tensor_right: An optional 2D Tensor. If provided, this function computes
+      the matrix product tensor^T * tensor_right instead of tensor^T * tensor.
     normalizer: optional scalar for the estimator (by default, the normalizer is
         the number of rows of tensor).
 
@@ -117,9 +121,14 @@ def _compute_cov(tensor, normalizer=None):
   """
   if normalizer is None:
     normalizer = array_ops.shape(tensor)[0]
-  cov = (math_ops.matmul(tensor, tensor, transpose_a=True) / math_ops.cast(
-      normalizer, tensor.dtype))
-  return (cov + array_ops.transpose(cov)) / math_ops.cast(2, cov.dtype)
+  if tensor_right is None:
+    cov = (
+        math_ops.matmul(tensor, tensor, transpose_a=True) / math_ops.cast(
+            normalizer, tensor.dtype))
+    return (cov + array_ops.transpose(cov)) / math_ops.cast(2.0, cov.dtype)
+  else:
+    return (math_ops.matmul(tensor, tensor_right, transpose_a=True) /
+            math_ops.cast(normalizer, tensor.dtype))
 
 
 def _append_homog(tensor):
@@ -135,7 +144,7 @@ def _append_homog(tensor):
   rank = len(tensor.shape.as_list())
   shape = array_ops.concat([array_ops.shape(tensor)[:-1], [1]], axis=0)
   ones = array_ops.ones(shape, dtype=tensor.dtype)
-  return array_ops.concat([tensor, ones], axis=rank-1)
+  return array_ops.concat([tensor, ones], axis=rank - 1)
 
 
 def scope_string_from_params(params):
@@ -173,8 +182,8 @@ def scope_string_from_params(params):
     elif isinstance(param, (tf_ops.Tensor, variables.Variable)):
       name_parts.append(scope_string_from_name(param))
     else:
-      raise ValueError(
-          "Encountered an unsupported param type {}".format(type(param)))
+      raise ValueError("Encountered an unsupported param type {}".format(
+          type(param)))
   return "_".join(name_parts)
 
 
@@ -225,6 +234,10 @@ class FisherFactor(object):
     """
     pass
 
+  @abc.abstractproperty
+  def _dtype(self):
+    pass
+
   @property
   def _cov_initializer(self):
     return covariance_initializer
@@ -236,7 +249,8 @@ class FisherFactor(object):
           "cov",
           initializer=self._cov_initializer,
           shape=self._cov_shape,
-          trainable=False)
+          trainable=False,
+          dtype=self._dtype)
 
   @abc.abstractmethod
   def _compute_new_cov(self, idx=0):
@@ -273,6 +287,13 @@ class InverseProvidingFactor(FisherFactor):
   _cov_shape properties.
   """
 
+  # TODO(b/69108481): This class (and its subclasses) should be refactored to
+  # serve the matrix quantities it computes as both (potentially stale)
+  # variables, updated by the inverse update ops, and fresh values stored in
+  # tensors that recomputed once every session.run() call.  Currently matpower
+  # and damp_inverse have the former behavior, while eigendecomposition has
+  # the latter.
+
   def __init__(self):
     self._inverses_by_damping = {}
     self._matpower_by_exp_and_damping = {}
@@ -293,7 +314,8 @@ class InverseProvidingFactor(FisherFactor):
             "inv_damp{}".format(damping_string),
             initializer=inverse_initializer,
             shape=self._cov_shape,
-            trainable=False)
+            trainable=False,
+            dtype=self._dtype)
       self._inverses_by_damping[damping] = inv
 
   def register_matpower(self, exp, damping):
@@ -311,7 +333,8 @@ class InverseProvidingFactor(FisherFactor):
             "matpower_exp{}_damp{}".format(exp_string, damping_string),
             initializer=inverse_initializer,
             shape=self._cov_shape,
-            trainable=False)
+            trainable=False,
+            dtype=self._dtype)
       self._matpower_by_exp_and_damping[(exp, damping)] = matpower
 
   def register_eigendecomp(self):
@@ -325,8 +348,9 @@ class InverseProvidingFactor(FisherFactor):
 
     num_inverses = len(self._inverses_by_damping)
     matrix_power_registered = bool(self._matpower_by_exp_and_damping)
-    use_eig = (self._eigendecomp or matrix_power_registered or
-               num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD)
+    use_eig = (
+        self._eigendecomp or matrix_power_registered or
+        num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD)
 
     if use_eig:
       self.register_eigendecomp()  # ensures self._eigendecomp is set
@@ -347,21 +371,30 @@ class InverseProvidingFactor(FisherFactor):
       for (exp, damping), matpower in self._matpower_by_exp_and_damping.items():
         ops.append(
             matpower.assign(
-                math_ops.matmul(eigenvectors * (clipped_eigenvalues + damping)**
-                                exp, array_ops.transpose(eigenvectors))))
+                math_ops.matmul(eigenvectors *
+                                (clipped_eigenvalues + damping)**exp,
+                                array_ops.transpose(eigenvectors))))
+      # These ops share computation and should be run on a single device.
+      ops = [control_flow_ops.group(ops)]
     else:
       for damping, inv in self._inverses_by_damping.items():
         ops.append(inv.assign(utils.posdef_inv(self._cov, damping)))
 
     return ops
 
-  def get_inverse(self, damping):
+  def get_damped_inverse(self, damping):
     return self._inverses_by_damping[damping]
 
   def get_matpower(self, exp, damping):
+    # Note that this function returns a variable which gets updated by the
+    # inverse ops.  It may be stale / inconsistent with the latest value of
+    # get_cov().
     return self._matpower_by_exp_and_damping[(exp, damping)]
 
   def get_eigendecomp(self):
+    # Unlike get_inverse and get_matpower this doesn't retrieve a stored
+    # variable, but instead always computes a fresh version from the current
+    # value of get_cov().
     return self._eigendecomp
 
 
@@ -402,6 +435,10 @@ class FullFactor(InverseProvidingFactor):
   def _num_sources(self):
     return len(self._params_grads_flat)
 
+  @property
+  def _dtype(self):
+    return self._params_grads_flat[0].dtype
+
   def _compute_new_cov(self, idx=0):
     # This will be a very basic rank 1 estimate
     with _maybe_colocate_with(self._params_grads_flat[idx],
@@ -458,6 +495,10 @@ class NaiveDiagonalFactor(DiagonalFactor):
   def _num_sources(self):
     return len(self._params_grads)
 
+  @property
+  def _dtype(self):
+    return self._params_grads[0].dtype
+
   def _compute_new_cov(self, idx=0):
     with _maybe_colocate_with(self._params_grads[idx],
                               self._colocate_cov_ops_with_inputs):
@@ -497,8 +538,8 @@ class FullyConnectedDiagonalFactor(DiagonalFactor):
     self._outputs_grads = outputs_grads
     self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
     self._batch_size = array_ops.shape(inputs)[0]
-    self._orig_tensors_name = scope_string_from_params((inputs,) +
-                                                       tuple(outputs_grads))
+    self._orig_tensors_name = scope_string_from_params(
+        (inputs,) + tuple(outputs_grads))
 
     # Note that we precompute the required operations on the inputs since the
     # inputs don't change with the 'idx' argument to _compute_new_cov.  (Only
@@ -522,6 +563,10 @@ class FullyConnectedDiagonalFactor(DiagonalFactor):
   def _num_sources(self):
     return len(self._outputs_grads)
 
+  @property
+  def _dtype(self):
+    return self._outputs_grads[0].dtype
+
   def _compute_new_cov(self, idx=0):
     # The well-known special formula that uses the fact that the entry-wise
     # square of an outer product is the outer-product of the entry-wise squares.
@@ -572,8 +617,8 @@ class ConvDiagonalFactor(DiagonalFactor):
     self._outputs_grads = outputs_grads
     self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
 
-    self._orig_tensors_name = scope_string_from_name((inputs,)
-                                                     + tuple(outputs_grads))
+    self._orig_tensors_name = scope_string_from_name(
+        (inputs,) + tuple(outputs_grads))
 
     # Note that we precompute the required operations on the inputs since the
     # inputs don't change with the 'idx' argument to _compute_new_cov.  (Only
@@ -604,13 +649,19 @@ class ConvDiagonalFactor(DiagonalFactor):
   @property
   def _cov_shape(self):
     filter_height, filter_width, in_channels, out_channels = self._filter_shape
-    return [filter_height * filter_width * in_channels + self._has_bias,
-            out_channels]
+    return [
+        filter_height * filter_width * in_channels + self._has_bias,
+        out_channels
+    ]
 
   @property
   def _num_sources(self):
     return len(self._outputs_grads)
 
+  @property
+  def _dtype(self):
+    return self._outputs_grads[0].dtype
+
   def _compute_new_cov(self, idx=0):
     with _maybe_colocate_with(self._outputs_grads[idx],
                               self._colocate_cov_ops_with_inputs):
@@ -644,8 +695,7 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor):
     Args:
       tensors: List of Tensors of shape [batch_size, n]. Represents either a
         layer's inputs or its output's gradients.
-      has_bias: bool. If True, assume this factor is for the layer's inputs and
-        append '1' to each row.
+      has_bias: bool. If True, append '1' to each row.
       colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
           their inputs.
     """
@@ -670,6 +720,10 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor):
   def _num_sources(self):
     return len(self._tensors)
 
+  @property
+  def _dtype(self):
+    return self._tensors[0].dtype
+
   def _compute_new_cov(self, idx=0):
     with _maybe_colocate_with(self._tensors[idx],
                               self._colocate_cov_ops_with_inputs):
@@ -735,6 +789,10 @@ class ConvInputKroneckerFactor(InverseProvidingFactor):
   def _num_sources(self):
     return 1
 
+  @property
+  def _dtype(self):
+    return self._inputs.dtype
+
   def _compute_new_cov(self, idx=0):
     if idx != 0:
       raise ValueError("ConvInputKroneckerFactor only supports idx = 0")
@@ -799,9 +857,288 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor):
   def _num_sources(self):
     return len(self._outputs_grads)
 
+  @property
+  def _dtype(self):
+    return self._outputs_grads[0].dtype
+
   def _compute_new_cov(self, idx=0):
     with _maybe_colocate_with(self._outputs_grads[idx],
                               self._colocate_cov_ops_with_inputs):
       reshaped_tensor = array_ops.reshape(self._outputs_grads[idx],
                                           [-1, self._out_channels])
       return _compute_cov(reshaped_tensor)
+
+
+class FullyConnectedMultiKF(InverseProvidingFactor):
+  """Kronecker factor for a fully connected recurrent layer."""
+
+  def __init__(self,
+               tensor_lists,
+               has_bias=False,
+               colocate_cov_ops_with_inputs=False):
+    """Constructs a new `FullyConnectedMultiKF`.
+
+    Args:
+      tensor_lists: List of lists of  Tensors of shape [batch_size, n]. Layer
+        inputs at each timestep.
+      has_bias: bool. If True, assume this factor is for the layer's inputs and
+        append '1' to each row.
+      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
+        their inputs.
+    """
+
+    self._orig_tensors_name = scope_string_from_params(tensor_lists)
+    self._batch_size = array_ops.shape(tensor_lists[0][0])[0]
+    self._num_timesteps = len(tensor_lists[0])
+
+    tensors = tuple(
+        array_ops.concat(tensor_list, 0) for tensor_list in tensor_lists)
+    if has_bias:
+      tensors = tuple(_append_homog(tensor) for tensor in tensors)
+    self._tensors = tensors
+
+    self._cov_dt1 = None
+    self._option1quants_by_damping = {}
+    self._option2quants_by_damping = {}
+    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
+
+    super(FullyConnectedMultiKF, self).__init__()
+
+  @property
+  def _var_scope(self):
+    return "ff_fc_multi/" + self._orig_tensors_name
+
+  @property
+  def _num_sources(self):
+    return len(self._tensors)
+
+  @property
+  def _dtype(self):
+    return self._tensors[0].dtype
+
+  def make_covariance_update_op(self, ema_decay):
+    with _maybe_colocate_with(self._tensors,
+                              self._colocate_cov_ops_with_inputs):
+      op = super(FullyConnectedMultiKF,
+                 self).make_covariance_update_op(ema_decay)
+
+      if self._cov_dt1 is not None:
+        new_cov_dt1 = math_ops.add_n(
+            tuple(
+                self._compute_new_cov_dt1(idx)
+                for idx in range(self._num_sources)))
+        op2 = moving_averages.assign_moving_average(
+            self._cov_dt1, new_cov_dt1, ema_decay, zero_debias=ZERO_DEBIAS)
+
+        # TODO(b/69112164):
+        # It's important that _cov and _cov_dt1 remain consistent with each
+        # other while the inverse ops are happening. How can we ensure this?
+        # We will need to add explicit synchronization for this to
+        # work with asynchronous training.
+        op = control_flow_ops.group(op, op2)
+
+    return op
+
+  def _compute_new_cov(self, idx=0):
+    tensor = self._tensors[idx]
+    normalizer = self._num_timesteps * self._batch_size
+    return _compute_cov(tensor, normalizer=normalizer)
+
+  def _compute_new_cov_dt1(self, idx=0):
+    tensor = self._tensors[idx]
+    normalizer = self._num_timesteps * self._batch_size
+    tensor_present = tensor[:-self._batch_size, :]
+    tensor_future = tensor[self._batch_size:, :]
+    return _compute_cov(
+        tensor_future, tensor_right=tensor_present, normalizer=normalizer)
+
+  @property
+  def _cov_shape(self):
+    size = self._tensors[0].shape[1]
+    return [size, size]
+
+  @property
+  def _vec_shape(self):
+    size = self._tensors[0].shape[1]
+    return [size]
+
+  def get_option1quants(self, damping):
+    return self._option1quants_by_damping[damping]
+
+  def get_option2quants(self, damping):
+    return self._option2quants_by_damping[damping]
+
+  def get_cov_dt1(self):
+    assert self._cov_dt1 is not None
+    return self._cov_dt1
+
+  def register_cov_dt1(self):
+    """Create a variable representing temporal cross-covariance.
+
+    This is technically the second moment, not covariance, since it's
+    not mean subtracted.
+    """
+    if self._cov_dt1 is None:
+      with variable_scope.variable_scope(self._var_scope):
+        self._cov_dt1 = variable_scope.get_variable(
+            "cov_dt1",
+            initializer=self._cov_initializer,
+            shape=self._cov_shape,
+            trainable=False,
+            dtype=self._dtype)
+
+  def register_option1quants(self, damping):
+
+    self.register_eigendecomp()
+    self.register_cov_dt1()
+
+    if damping not in self._option1quants_by_damping:
+      # It's questionable as to whether we should initialize with stuff like
+      # this at all.  Ideally these values should never be used until they are
+      # updated at least once.
+      damping_string = scalar_or_tensor_to_string(damping)
+      with variable_scope.variable_scope(self._var_scope):
+        Lmat = variable_scope.get_variable(  # pylint: disable=invalid-name
+            "Lmat_damp{}".format(damping_string),
+            initializer=inverse_initializer,
+            shape=self._cov_shape,
+            trainable=False,
+            dtype=self._dtype)
+        psi = variable_scope.get_variable(
+            "psi_damp{}".format(damping_string),
+            initializer=init_ops.ones_initializer,
+            shape=self._vec_shape,
+            trainable=False,
+            dtype=self._dtype)
+
+      self._option1quants_by_damping[damping] = (Lmat, psi)
+
+  def register_option2quants(self, damping):
+
+    self.register_eigendecomp()
+    self.register_cov_dt1()
+
+    if damping not in self._option2quants_by_damping:
+      # It's questionable as to whether we should initialize with stuff like
+      # this at all.  Ideally these values should never be used until they are
+      # updated at least once.
+      damping_string = scalar_or_tensor_to_string(damping)
+      with variable_scope.variable_scope(self._var_scope):
+        Pmat = variable_scope.get_variable(  # pylint: disable=invalid-name
+            "Lmat_damp{}".format(damping_string),
+            initializer=inverse_initializer,
+            shape=self._cov_shape,
+            trainable=False,
+            dtype=self._dtype)
+        Kmat = variable_scope.get_variable(  # pylint: disable=invalid-name
+            "Kmat_damp{}".format(damping_string),
+            initializer=inverse_initializer,
+            shape=self._cov_shape,
+            trainable=False,
+            dtype=self._dtype)
+        mu = variable_scope.get_variable(
+            "mu_damp{}".format(damping_string),
+            initializer=init_ops.ones_initializer,
+            shape=self._vec_shape,
+            trainable=False,
+            dtype=self._dtype)
+
+      self._option2quants_by_damping[damping] = (Pmat, Kmat, mu)
+
+  def make_inverse_updates_ops(self):
+    """Create and return update ops corresponding to registered computations."""
+    # TODO(b/69918258): Add correctness tests for this method.
+    # pylint: disable=invalid-name
+
+    ops = super(FullyConnectedMultiKF, self).make_inverse_update_ops()
+
+    if (len(self._option1quants_by_damping) +
+        len(self._option2quants_by_damping)):
+
+      # Note that C0 and C1 are stand-ins for A0 and A1, or G0 and G1, from
+      # the pseudo-code in the original paper.  Because the computations for
+      # the A and G case are essentially the same they can both be performed by
+      # the same class (this one).
+
+      C1 = self.get_cov_dt1()
+
+      # Get the eigendecomposition of C0  (= self.get_cov())
+      eigen_e, eigen_V = self.get_eigendecomp()
+
+      # TODO(b/69678661): Note, there is an implicit assumption here that C1
+      # and C0 (as represented here by its eigen-decomp) are consistent.  This
+      # could fail to be the case if self._cov and self._cov_dt1 are not updated
+      # consistently, or are somehow read between or during the cov updates.
+      # Can this possibly happen?  Is there a way to prevent it?
+
+      for damping, (Lmat_var,
+                    psi_var) in self._option1quants_by_damping.items():
+
+        invsqrtC0 = math_ops.matmul(
+            eigen_V * (eigen_e + damping)**(-0.5), eigen_V, transpose_b=True)
+
+        # Might need to enforce symmetry lost due to numerical issues.
+        invsqrtC0 = (invsqrtC0 + array_ops.transpose(invsqrtC0)) / 2.0
+
+        # The following line imposses the symmetry assumed by "Option 1" on C1.
+        # Stangely the code can work okay with this line commented out,
+        # depending on how psd_eig is defined.  I'm not sure why.
+        C1 = (C1 + array_ops.transpose(C1)) / 2.0
+
+        # hPsi = C0^(-1/2) * C1 * C0^(-1/2)  (hPsi means \hat{Psi})
+        hPsi = math_ops.matmul(math_ops.matmul(invsqrtC0, C1), invsqrtC0)
+
+        # Compute the decomposition U*diag(psi)*U^T = hPsi
+        psi, U = utils.psd_eig(hPsi)
+
+        # L = C0^(-1/2) * U
+        Lmat = math_ops.matmul(invsqrtC0, U)
+
+        ops.append(Lmat_var.assign(Lmat))
+        ops.append(psi_var.assign(psi))
+
+      for damping, (Pmat_var, Kmat_var,
+                    mu_var) in self._option2quants_by_damping.items():
+
+        # compute C0^(-1/2)
+        invsqrtC0 = math_ops.matmul(
+            eigen_V * (eigen_e + damping)**(-0.5), eigen_V, transpose_b=True)
+
+        # Might need to enforce symmetry lost due to numerical issues.
+        invsqrtC0 = (invsqrtC0 + array_ops.transpose(invsqrtC0)) / 2.0
+
+        # Compute the product C0^(-1/2) * C1
+        invsqrtC0C1 = math_ops.matmul(invsqrtC0, C1)
+
+        # hPsi = C0^(-1/2) * C1 * C0^(-1/2)  (hPsi means \hat{Psi})
+        hPsi = math_ops.matmul(invsqrtC0C1, invsqrtC0)
+
+        # Compute the decomposition E*diag(mu)*E^T = hPsi^T * hPsi
+        # Note that we using the notation mu instead of "m" for the eigenvalues.
+        # Instead of computing the product hPsi^T * hPsi and then doing an
+        # eigen-decomposition of this we just compute the SVD of hPsi and then
+        # square the singular values to get the eigenvalues. For a justification
+        # of this approach, see:
+        # https://en.wikipedia.org/wiki/Singular-value_decomposition#Relation_to_eigenvalue_decomposition
+        sqrtmu, _, E = linalg_ops.svd(hPsi)
+        mu = math_ops.square(sqrtmu)
+
+        # Mathematically, the eigenvalues should not should not exceed 1.0, but
+        # due to numerical issues, or possible issues with inconsistent
+        # values of C1 and (the eigen-decomposition of) C0 they might. So
+        # we enforce this condition.
+        mu = math_ops.minimum(mu, 1.0)
+
+        # P = (C0^(-1/2) * C1)^T * C0^(-1/2) = C_1^T * C_0^(-1)
+        Pmat = math_ops.matmul(invsqrtC0C1, invsqrtC0, transpose_a=True)
+
+        # K = C_0^(-1/2) * E
+        Kmat = math_ops.matmul(invsqrtC0, E)
+
+        ops.append(Pmat_var.assign(Pmat))
+        ops.append(Kmat_var.assign(Kmat))
+        ops.append(mu_var.assign(mu))
+
+    return [control_flow_ops.group(ops)]
+
+    # pylint: enable=invalid-name
diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py
index d5461c9f2e..035f080fdb 100644
--- a/tensorflow/contrib/kfac/python/ops/utils.py
+++ b/tensorflow/contrib/kfac/python/ops/utils.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import random_ops
 
 # Method used for inverting matrices.
 POSDEF_INV_METHOD = "cholesky"
+POSDEF_EIG_METHOD = "self_adjoint"
 
 
 def set_global_constants(posdef_inv_method=None):
@@ -187,7 +188,7 @@ def posdef_inv(tensor, damping):
   """Computes the inverse of tensor + damping * identity."""
   identity = linalg_ops.eye(tensor.shape.as_list()[0], dtype=tensor.dtype)
   damping = math_ops.cast(damping, dtype=tensor.dtype)
-  return posdef_inv_funcs[POSDEF_INV_METHOD](tensor, identity, damping)
+  return posdef_inv_functions[POSDEF_INV_METHOD](tensor, identity, damping)
 
 
 def posdef_inv_matrix_inverse(tensor, identity, damping):
@@ -209,13 +210,39 @@ def posdef_inv_eig(tensor, identity, damping):
       eigenvectors / eigenvalues, eigenvectors, transpose_b=True)
 
 
-posdef_inv_funcs = {
+posdef_inv_functions = {
     "matrix_inverse": posdef_inv_matrix_inverse,
     "cholesky": posdef_inv_cholesky,
     "eig": posdef_inv_eig,
 }
 
 
+def posdef_eig(mat):
+  """Computes the eigendecomposition of a positive semidefinite matrix."""
+  return posdef_eig_functions[POSDEF_EIG_METHOD](mat)
+
+
+def posdef_eig_svd(mat):
+  """Computes the singular values and left singular vectors of a matrix."""
+  evals, evecs, _ = linalg_ops.svd(mat)
+
+  return evals, evecs
+
+
+def posdef_eig_self_adjoint(mat):
+  """Computes eigendecomposition using self_adjoint_eig."""
+  evals, evecs = linalg_ops.self_adjoint_eig(mat)
+  evals = math_ops.abs(evals)  # Should be equivalent to svd approach.
+
+  return evals, evecs
+
+
+posdef_eig_functions = {
+    "self_adjoint": posdef_eig_self_adjoint,
+    "svd": posdef_eig_svd,
+}
+
+
 class SubGraph(object):
   """Defines a subgraph given by all the dependencies of a given set of outputs.
   """
-- 
GitLab


From 6b6244c40197b34f49bb50aa52efb082380d4637 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 30 Nov 2017 23:58:26 -0800
Subject: [PATCH 0499/1225] Build demo app for SmartReply

PiperOrigin-RevId: 177559103
---
 tensorflow/contrib/lite/build_def.bzl         |   5 +-
 .../contrib/lite/models/smartreply/BUILD      |  85 ++++++++++++
 .../demo/app/src/main/AndroidManifest.xml     |  38 ++++++
 .../models/smartreply/demo/app/src/main/BUILD |  65 +++++++++
 .../smartreply/demo/app/src/main/assets/BUILD |  15 ++
 .../app/src/main/assets/backoff_response.txt  |  16 +++
 .../android/smartreply/MainActivity.java      |  99 ++++++++++++++
 .../android/smartreply/SmartReply.java        |  44 ++++++
 .../android/smartreply/SmartReplyClient.java  | 129 ++++++++++++++++++
 .../app/src/main/res/layout/main_activity.xml |  44 ++++++
 .../demo/app/src/main/smartreply_jni.cc       | 129 ++++++++++++++++++
 .../models/smartreply/ops/extract_feature.cc  |   9 +-
 .../lite/models/smartreply/ops/normalize.cc   |   7 +-
 .../lite/models/smartreply/predictor.cc       |  21 +--
 .../lite/models/smartreply/predictor.h        |  12 +-
 .../lite/models/smartreply/predictor_test.cc  |   9 +-
 tensorflow/contrib/lite/tools/BUILD           |   1 +
 .../lite/tools/gen_op_registration_main.cc    |  48 +++++--
 .../contrib/lite/tools/mutable_op_resolver.h  |   2 +-
 tensorflow/workspace.bzl                      |  18 ++-
 third_party/tflite_smartreply.BUILD           |  13 ++
 21 files changed, 758 insertions(+), 51 deletions(-)
 create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml
 create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD
 create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD
 create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt
 create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java
 create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java
 create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java
 create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml
 create mode 100644 tensorflow/contrib/lite/models/smartreply/demo/app/src/main/smartreply_jni.cc
 create mode 100644 third_party/tflite_smartreply.BUILD

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index e3c9cdd99b..5813b3de4d 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -223,11 +223,12 @@ def gen_selected_ops(name, model):
   """
   out = name + "_registration.cc"
   tool = "//tensorflow/contrib/lite/tools:generate_op_registrations"
+  tflite_path = "//tensorflow/contrib/lite"
   native.genrule(
       name = name,
       srcs = [model],
       outs = [out],
-      cmd = ("$(location %s) --input_model=$(location %s) --output_registration=$(location %s)")
-      % (tool, model, out),
+      cmd = ("$(location %s) --input_model=$(location %s) --output_registration=$(location %s) --tflite_path=%s")
+      % (tool, model, out, tflite_path[2:]),
       tools = [tool],
   )
diff --git a/tensorflow/contrib/lite/models/smartreply/BUILD b/tensorflow/contrib/lite/models/smartreply/BUILD
index fbdf19f205..733c3f4c7f 100644
--- a/tensorflow/contrib/lite/models/smartreply/BUILD
+++ b/tensorflow/contrib/lite/models/smartreply/BUILD
@@ -1,7 +1,92 @@
 package(default_visibility = ["//visibility:public"])
 
+load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops")
+
 licenses(["notice"])  # Apache 2.0
 
+gen_selected_ops(
+    name = "smartreply_ops",
+    model = "@tflite_smartreply//:smartreply.tflite",
+)
+
+cc_library(
+    name = "custom_ops",
+    srcs = [
+        "ops/extract_feature.cc",
+        "ops/normalize.cc",
+        "ops/predict.cc",
+        ":smartreply_ops",
+    ],
+    copts = tflite_copts(),
+    deps = [
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/tools:mutable_op_resolver",
+        "@com_google_absl//absl/strings",
+        "@com_googlesource_code_re2//:re2",
+        "@farmhash_archive//:farmhash",
+    ],
+)
+
+cc_library(
+    name = "predictor_lib",
+    srcs = ["predictor.cc"],
+    hdrs = ["predictor.h"],
+    copts = tflite_copts(),
+    deps = [
+        ":custom_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/tools:mutable_op_resolver",
+        "@com_google_absl//absl/strings",
+        "@com_googlesource_code_re2//:re2",
+    ],
+)
+
+cc_test(
+    name = "extract_feature_op_test",
+    size = "small",
+    srcs = ["ops/extract_feature_test.cc"],
+    deps = [
+        ":custom_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+        "@farmhash_archive//:farmhash",
+    ],
+)
+
+cc_test(
+    name = "normalize_op_test",
+    size = "small",
+    srcs = ["ops/normalize_test.cc"],
+    deps = [
+        ":custom_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+cc_test(
+    name = "predict_op_test",
+    size = "small",
+    srcs = ["ops/predict_test.cc"],
+    deps = [
+        ":custom_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000..75ed9432c8
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright 2017 The Android Open Source Project
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+  package="com.example.android.smartreply" >
+
+  <uses-sdk
+      android:minSdkVersion="15"
+      android:targetSdkVersion="24" />
+
+  <application android:label="TfLite SmartReply Demo">
+    <activity
+        android:name="com.example.android.smartreply.MainActivity"
+        android:configChanges="orientation|keyboardHidden|screenSize"
+        android:windowSoftInputMode="stateUnchanged|adjustPan"
+        android:label="TfLite SmartReply Demo"
+        android:screenOrientation="portrait" >
+      <intent-filter>
+        <action android:name="android.intent.action.MAIN" />
+        <category android:name="android.intent.category.LAUNCHER" />
+      </intent-filter>
+    </activity>
+  </application>
+
+</manifest>
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD
new file mode 100644
index 0000000000..f8767b443a
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD
@@ -0,0 +1,65 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow/contrib/lite:build_def.bzl",
+    "tflite_copts",
+    "tflite_jni_binary",
+)
+
+filegroup(
+    name = "assets",
+    srcs = [
+        "@tflite_smartreply//:model_files",
+    ],
+)
+
+android_binary(
+    name = "SmartReplyDemo",
+    srcs = glob(["java/**/*.java"]),
+    assets = [":assets"],
+    assets_dir = "",
+    custom_package = "com.example.android.smartreply",
+    manifest = "AndroidManifest.xml",
+    nocompress_extensions = [
+        ".tflite",
+    ],
+    resource_files = glob(["res/**"]),
+    tags = ["manual"],
+    deps = [
+        ":smartreply_runtime",
+        "@androidsdk//com.android.support:support-v13-25.2.0",
+        "@androidsdk//com.android.support:support-v4-25.2.0",
+    ],
+)
+
+cc_library(
+    name = "smartreply_runtime",
+    srcs = ["libsmartreply_jni.so"],
+    visibility = ["//visibility:public"],
+)
+
+tflite_jni_binary(
+    name = "libsmartreply_jni.so",
+    deps = [
+        ":smartreply_jni_lib",
+    ],
+)
+
+cc_library(
+    name = "smartreply_jni_lib",
+    srcs = [
+        "smartreply_jni.cc",
+    ],
+    copts = tflite_copts(),
+    linkopts = [
+        "-lm",
+        "-ldl",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/models/smartreply:predictor_lib",
+    ],
+    alwayslink = 1,
+)
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD
new file mode 100644
index 0000000000..3c882ffc43
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD
@@ -0,0 +1,15 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(glob(["*"]))
+
+filegroup(
+    name = "assets_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "BUILD",
+        ],
+    ),
+)
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt
new file mode 100644
index 0000000000..a0a5b46b5f
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt
@@ -0,0 +1,16 @@
+Ok
+Yes
+No
+👍
+☺
+😟
+❤️
+Lol
+Thanks
+Got it
+Done
+Nice
+I don't know
+What?
+Why?
+What's up?
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java
new file mode 100644
index 0000000000..02fec9ae5e
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java
@@ -0,0 +1,99 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package com.example.android.smartreply;
+
+import android.app.Activity;
+import android.os.Bundle;
+import android.os.Handler;
+import android.util.Log;
+import android.view.View;
+import android.widget.Button;
+import android.widget.EditText;
+import android.widget.TextView;
+
+/**
+ * The main (and only) activity of this demo app. Displays a text box which updates as messages are
+ * received.
+ */
+public class MainActivity extends Activity {
+  private static final String TAG = "SmartReplyDemo";
+  private SmartReplyClient client;
+
+  private Button sendButton;
+  private TextView messageTextView;
+  private EditText messageInput;
+
+  private Handler handler;
+
+  @Override
+  protected void onCreate(Bundle savedInstanceState) {
+    super.onCreate(savedInstanceState);
+    Log.v(TAG, "onCreate");
+    setContentView(R.layout.main_activity);
+
+    client = new SmartReplyClient(getApplicationContext());
+    handler = new Handler();
+
+    sendButton = (Button) findViewById(R.id.send_button);
+    sendButton.setOnClickListener(
+        (View v) -> {
+          send(messageInput.getText().toString());
+        });
+
+    messageTextView = (TextView) findViewById(R.id.message_text);
+    messageInput = (EditText) findViewById(R.id.message_input);
+  }
+
+  @Override
+  protected void onStart() {
+    super.onStart();
+    Log.v(TAG, "onStart");
+    handler.post(
+        () -> {
+          client.loadModel();
+        });
+  }
+
+  @Override
+  protected void onStop() {
+    super.onStop();
+    Log.v(TAG, "onStop");
+    handler.post(
+        () -> {
+          client.unloadModel();
+        });
+  }
+
+  private void send(final String message) {
+    handler.post(
+        () -> {
+          messageTextView.append("Input: " + message + "\n");
+
+          SmartReply[] ans = client.predict(new String[] {message});
+          for (SmartReply reply : ans) {
+            appendMessage("Reply: " + reply.getText());
+          }
+          appendMessage("------");
+        });
+  }
+
+  private void appendMessage(final String message) {
+    handler.post(
+        () -> {
+          messageTextView.append(message + "\n");
+        });
+  }
+}
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java
new file mode 100644
index 0000000000..3357fd17c1
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java
@@ -0,0 +1,44 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package com.example.android.smartreply;
+
+import android.support.annotation.Keep;
+
+/**
+ * SmartReply contains predicted message, and confidence.
+ *
+ * <p>NOTE: this class used by JNI, class name and constructor should not be obfuscated.
+ */
+@Keep
+public class SmartReply {
+
+  private final String text;
+  private final float score;
+
+  @Keep
+  public SmartReply(String text, float score) {
+    this.text = text;
+    this.score = score;
+  }
+
+  public String getText() {
+    return text;
+  }
+
+  public float getScore() {
+    return score;
+  }
+}
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java
new file mode 100644
index 0000000000..d5b1ac0ffb
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java
@@ -0,0 +1,129 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package com.example.android.smartreply;
+
+import android.content.Context;
+import android.content.res.AssetFileDescriptor;
+import android.support.annotation.Keep;
+import android.support.annotation.WorkerThread;
+import android.util.Log;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+import java.util.List;
+
+/** Interface to load TfLite model and provide predictions. */
+public class SmartReplyClient implements AutoCloseable {
+  private static final String TAG = "SmartReplyDemo";
+  private static final String MODEL_PATH = "smartreply.tflite";
+  private static final String BACKOFF_PATH = "backoff_response.txt";
+  private static final String JNI_LIB = "smartreply_jni";
+
+  private final Context context;
+  private long storage;
+  private MappedByteBuffer model;
+
+  private volatile boolean isLibraryLoaded;
+
+  public SmartReplyClient(Context context) {
+    this.context = context;
+  }
+
+  public boolean isLoaded() {
+    return storage != 0;
+  }
+
+  @WorkerThread
+  public synchronized void loadModel() {
+    if (!isLibraryLoaded) {
+      System.loadLibrary(JNI_LIB);
+      isLibraryLoaded = true;
+    }
+
+    try {
+      model = loadModelFile();
+      String[] backoff = loadBackoffList();
+      storage = loadJNI(model, backoff);
+    } catch (IOException e) {
+      Log.e(TAG, "Fail to load model", e);
+      return;
+    }
+  }
+
+  @WorkerThread
+  public synchronized SmartReply[] predict(String[] input) {
+    if (storage != 0) {
+      return predictJNI(storage, input);
+    } else {
+      return new SmartReply[] {};
+    }
+  }
+
+  @WorkerThread
+  public synchronized void unloadModel() {
+    close();
+  }
+
+  @Override
+  public synchronized void close() {
+    if (storage != 0) {
+      unloadJNI(storage);
+      storage = 0;
+    }
+  }
+
+  private MappedByteBuffer loadModelFile() throws IOException {
+    AssetFileDescriptor fileDescriptor = context.getAssets().openFd(MODEL_PATH);
+    FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
+    try {
+      FileChannel fileChannel = inputStream.getChannel();
+      long startOffset = fileDescriptor.getStartOffset();
+      long declaredLength = fileDescriptor.getDeclaredLength();
+      return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
+    } finally {
+      inputStream.close();
+    }
+  }
+
+  private String[] loadBackoffList() throws IOException {
+    List<String> labelList = new ArrayList<String>();
+    BufferedReader reader =
+        new BufferedReader(new InputStreamReader(context.getAssets().open(BACKOFF_PATH)));
+    String line;
+    while ((line = reader.readLine()) != null) {
+      if (!line.isEmpty()) {
+        labelList.add(line);
+      }
+    }
+    reader.close();
+    String[] ans = new String[labelList.size()];
+    labelList.toArray(ans);
+    return ans;
+  }
+
+  @Keep
+  private native long loadJNI(MappedByteBuffer buffer, String[] backoff);
+
+  @Keep
+  private native SmartReply[] predictJNI(long storage, String[] text);
+
+  @Keep
+  private native void unloadJNI(long storage);
+}
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml
new file mode 100644
index 0000000000..23b4cadc00
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml
@@ -0,0 +1,44 @@
+<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:tools="http://schemas.android.com/tools"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent"
+    android:orientation="vertical">
+
+    <LinearLayout
+        android:layout_width="fill_parent"
+        android:layout_height="0dp"
+        android:padding="5dip"
+        android:layout_weight="3">
+
+        <TextView
+            android:id="@+id/message_text"
+            android:layout_width="fill_parent"
+            android:layout_height="fill_parent"
+            android:scrollbars="vertical"
+            android:gravity="bottom"/>
+    </LinearLayout>
+
+    <LinearLayout
+        android:layout_width="fill_parent"
+        android:layout_height="0dp"
+        android:padding="5dip"
+        android:layout_weight="1">
+
+        <EditText
+            android:id="@+id/message_input"
+            android:layout_width="0dp"
+            android:layout_height="fill_parent"
+            android:layout_weight="6"
+            android:scrollbars="vertical"
+            android:hint="Enter Text"
+            android:gravity="top"
+            android:inputType="text"/>
+        <Button
+            android:id="@+id/send_button"
+            android:layout_width="0dp"
+            android:layout_height="fill_parent"
+            android:layout_weight="2"
+            android:text="Send" />
+    </LinearLayout>
+
+</LinearLayout>
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/smartreply_jni.cc b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/smartreply_jni.cc
new file mode 100644
index 0000000000..f158cc511a
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/smartreply_jni.cc
@@ -0,0 +1,129 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <jni.h>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/models/smartreply/predictor.h"
+
+const char kIllegalStateException[] = "java/lang/IllegalStateException";
+
+using tflite::custom::smartreply::GetSegmentPredictions;
+using tflite::custom::smartreply::PredictorResponse;
+
+template <typename T>
+T CheckNotNull(JNIEnv* env, T&& t) {
+  if (t == nullptr) {
+    env->ThrowNew(env->FindClass(kIllegalStateException), "");
+    return nullptr;
+  }
+  return std::forward<T>(t);
+}
+
+std::vector<std::string> jniStringArrayToVector(JNIEnv* env,
+                                                jobjectArray string_array) {
+  int count = env->GetArrayLength(string_array);
+  std::vector<std::string> result;
+  for (int i = 0; i < count; i++) {
+    auto jstr =
+        reinterpret_cast<jstring>(env->GetObjectArrayElement(string_array, i));
+    const char* raw_str = env->GetStringUTFChars(jstr, JNI_FALSE);
+    result.emplace_back(std::string(raw_str));
+    env->ReleaseStringUTFChars(jstr, raw_str);
+  }
+  return result;
+}
+
+struct JNIStorage {
+  std::vector<std::string> backoff_list;
+  std::unique_ptr<::tflite::FlatBufferModel> model;
+};
+
+extern "C" JNIEXPORT jlong JNICALL
+Java_com_example_android_smartreply_SmartReplyClient_loadJNI(
+    JNIEnv* env, jobject thiz, jobject model_buffer,
+    jobjectArray backoff_list) {
+  const char* buf =
+      static_cast<char*>(env->GetDirectBufferAddress(model_buffer));
+  jlong capacity = env->GetDirectBufferCapacity(model_buffer);
+
+  JNIStorage* storage = new JNIStorage;
+  storage->model = tflite::FlatBufferModel::BuildFromBuffer(
+      buf, static_cast<size_t>(capacity));
+  storage->backoff_list = jniStringArrayToVector(env, backoff_list);
+
+  if (!storage->model) {
+    delete storage;
+    env->ThrowNew(env->FindClass(kIllegalStateException), "");
+    return 0;
+  }
+  return reinterpret_cast<jlong>(storage);
+}
+
+extern "C" JNIEXPORT jobjectArray JNICALL
+Java_com_example_android_smartreply_SmartReplyClient_predictJNI(
+    JNIEnv* env, jobject /*thiz*/, jlong storage_ptr, jobjectArray input_text) {
+  // Predict
+  if (storage_ptr == 0) {
+    return nullptr;
+  }
+  JNIStorage* storage = reinterpret_cast<JNIStorage*>(storage_ptr);
+  if (storage == nullptr) {
+    return nullptr;
+  }
+  std::vector<PredictorResponse> responses;
+  GetSegmentPredictions(jniStringArrayToVector(env, input_text),
+                        *storage->model, {storage->backoff_list}, &responses);
+
+  // Create a SmartReply[] to return back to Java
+  jclass smart_reply_class = CheckNotNull(
+      env, env->FindClass("com/example/android/smartreply/SmartReply"));
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+  jmethodID smart_reply_ctor = CheckNotNull(
+      env,
+      env->GetMethodID(smart_reply_class, "<init>", "(Ljava/lang/String;F)V"));
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+  jobjectArray array = CheckNotNull(
+      env, env->NewObjectArray(responses.size(), smart_reply_class, nullptr));
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+  for (int i = 0; i < responses.size(); i++) {
+    jstring text =
+        CheckNotNull(env, env->NewStringUTF(responses[i].GetText().data()));
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+    jobject reply = env->NewObject(smart_reply_class, smart_reply_ctor, text,
+                                   responses[i].GetScore());
+    env->SetObjectArrayElement(array, i, reply);
+  }
+  return array;
+}
+
+extern "C" JNIEXPORT void JNICALL
+Java_com_example_android_smartreply_SmartReplyClient_unloadJNI(
+    JNIEnv* env, jobject thiz, jlong storage_ptr) {
+  if (storage_ptr != 0) {
+    JNIStorage* storage = reinterpret_cast<JNIStorage*>(storage_ptr);
+    delete storage;
+  }
+}
diff --git a/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc b/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc
index 1c422b659a..f97a6486d6 100644
--- a/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc
+++ b/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc
@@ -23,7 +23,7 @@ limitations under the License.
 
 #include <algorithm>
 #include <map>
-#include "re2/re2.h"
+
 #include "tensorflow/contrib/lite/context.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
 #include "tensorflow/contrib/lite/string_util.h"
@@ -81,7 +81,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* label = GetOutput(context, node, 0);
   TfLiteTensor* weight = GetOutput(context, node, 1);
 
-  std::map<int64, int> feature_id_counts;
+  std::map<int64_t, int> feature_id_counts;
   for (int i = 0; i < num_strings; i++) {
     // Use fingerprint of feature name as id.
     auto strref = tflite::GetString(input, i);
@@ -91,10 +91,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       continue;
     }
 
-    int64 feature_id =
+    int64_t feature_id =
         ::util::Fingerprint64(strref.str, strref.len) % kMaxDimension;
-
-    label->data.i32[i] = static_cast<int32>(feature_id);
+    label->data.i32[i] = static_cast<int32_t>(feature_id);
     weight->data.f[i] =
         std::count(strref.str, strref.str + strref.len, ' ') + 1;
   }
diff --git a/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc b/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc
index d0dc2a35a7..c55ac9f52f 100644
--- a/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc
+++ b/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc
@@ -21,7 +21,10 @@ limitations under the License.
 // Output:
 //     Output[0]: Normalized sentence. string[1]
 //
-#include "absl/strings/ascii.h"
+
+#include <algorithm>
+#include <string>
+
 #include "absl/strings/str_cat.h"
 #include "absl/strings/strip.h"
 #include "re2/re2.h"
@@ -50,7 +53,7 @@ const std::map<string, string>* kRegexTransforms =
 
 static const char kStartToken[] = "<S>";
 static const char kEndToken[] = "<E>";
-static const int32 kMaxInputChars = 300;
+static const int32_t kMaxInputChars = 300;
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   tflite::StringRef input = tflite::GetString(GetInput(context, node, 0), 0);
diff --git a/tensorflow/contrib/lite/models/smartreply/predictor.cc b/tensorflow/contrib/lite/models/smartreply/predictor.cc
index a28222213e..6da5cc8eec 100644
--- a/tensorflow/contrib/lite/models/smartreply/predictor.cc
+++ b/tensorflow/contrib/lite/models/smartreply/predictor.cc
@@ -30,7 +30,7 @@ namespace custom {
 namespace smartreply {
 
 // Split sentence into segments (using punctuation).
-std::vector<string> SplitSentence(const string& input) {
+std::vector<std::string> SplitSentence(const std::string& input) {
   string result(input);
 
   RE2::GlobalReplace(&result, "([?.!,])+", " \\1");
@@ -38,12 +38,13 @@ std::vector<string> SplitSentence(const string& input) {
   RE2::GlobalReplace(&result, "[ ]+", " ");
   RE2::GlobalReplace(&result, "\t+$", "");
 
-  return strings::Split(result, '\t');
+  return absl::StrSplit(result, '\t');
 }
 
 // Predict with TfLite model.
-void ExecuteTfLite(const string& sentence, ::tflite::Interpreter* interpreter,
-                   std::map<string, float>* response_map) {
+void ExecuteTfLite(const std::string& sentence,
+                   ::tflite::Interpreter* interpreter,
+                   std::map<std::string, float>* response_map) {
   {
     TfLiteTensor* input = interpreter->tensor(interpreter->inputs()[0]);
     tflite::DynamicBuffer buf;
@@ -67,8 +68,8 @@ void ExecuteTfLite(const string& sentence, ::tflite::Interpreter* interpreter,
 }
 
 void GetSegmentPredictions(
-    const std::vector<string>& input, const ::tflite::FlatBufferModel& model,
-    const SmartReplyConfig& config,
+    const std::vector<std::string>& input,
+    const ::tflite::FlatBufferModel& model, const SmartReplyConfig& config,
     std::vector<PredictorResponse>* predictor_responses) {
   // Initialize interpreter
   std::unique_ptr<::tflite::Interpreter> interpreter;
@@ -82,10 +83,10 @@ void GetSegmentPredictions(
   }
 
   // Execute Tflite Model
-  std::map<string, float> response_map;
-  std::vector<string> sentences;
-  for (const string& str : input) {
-    std::vector<string> splitted_str = SplitSentence(str);
+  std::map<std::string, float> response_map;
+  std::vector<std::string> sentences;
+  for (const std::string& str : input) {
+    std::vector<std::string> splitted_str = SplitSentence(str);
     sentences.insert(sentences.end(), splitted_str.begin(), splitted_str.end());
   }
   for (const auto& sentence : sentences) {
diff --git a/tensorflow/contrib/lite/models/smartreply/predictor.h b/tensorflow/contrib/lite/models/smartreply/predictor.h
index 3b9a2b32e1..d17323a3f9 100644
--- a/tensorflow/contrib/lite/models/smartreply/predictor.h
+++ b/tensorflow/contrib/lite/models/smartreply/predictor.h
@@ -34,7 +34,7 @@ struct SmartReplyConfig;
 // With a given string as input, predict the response with a Tflite model.
 // When config.backoff_response is not empty, predictor_responses will be filled
 // with messagees from backoff response.
-void GetSegmentPredictions(const std::vector<string>& input,
+void GetSegmentPredictions(const std::vector<std::string>& input,
                            const ::tflite::FlatBufferModel& model,
                            const SmartReplyConfig& config,
                            std::vector<PredictorResponse>* predictor_responses);
@@ -43,17 +43,17 @@ void GetSegmentPredictions(const std::vector<string>& input,
 // It includes messages, and confidence.
 class PredictorResponse {
  public:
-  PredictorResponse(const string& response_text, float score) {
+  PredictorResponse(const std::string& response_text, float score) {
     response_text_ = response_text;
     prediction_score_ = score;
   }
 
   // Accessor methods.
-  const string& GetText() const { return response_text_; }
+  const std::string& GetText() const { return response_text_; }
   float GetScore() const { return prediction_score_; }
 
  private:
-  string response_text_ = "";
+  std::string response_text_ = "";
   float prediction_score_ = 0.0;
 };
 
@@ -65,9 +65,9 @@ struct SmartReplyConfig {
   float backoff_confidence;
   // Backoff responses are used when predicted responses cannot fulfill the
   // list.
-  const std::vector<string>& backoff_responses;
+  const std::vector<std::string>& backoff_responses;
 
-  SmartReplyConfig(std::vector<string> backoff_responses)
+  SmartReplyConfig(std::vector<std::string> backoff_responses)
       : num_response(kDefaultNumResponse),
         backoff_confidence(kDefaultBackoffConfidence),
         backoff_responses(backoff_responses) {}
diff --git a/tensorflow/contrib/lite/models/smartreply/predictor_test.cc b/tensorflow/contrib/lite/models/smartreply/predictor_test.cc
index 2fa9923bc9..97d3c650e2 100644
--- a/tensorflow/contrib/lite/models/smartreply/predictor_test.cc
+++ b/tensorflow/contrib/lite/models/smartreply/predictor_test.cc
@@ -18,12 +18,12 @@ limitations under the License.
 #include <fstream>
 #include <unordered_set>
 
-#include "base/logging.h"
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
 #include "tensorflow/contrib/lite/models/test_utils.h"
+#include "tensorflow/contrib/lite/string_util.h"
 
 namespace tflite {
 namespace custom {
@@ -65,7 +65,6 @@ TEST_F(PredictorTest, GetSegmentPredictions) {
 
   float max = 0;
   for (const auto &item : predictions) {
-    LOG(INFO) << "Response: " << item.GetText();
     if (item.GetScore() > max) {
       max = item.GetScore();
     }
@@ -86,7 +85,6 @@ TEST_F(PredictorTest, TestTwoSentences) {
 
   float max = 0;
   for (const auto &item : predictions) {
-    LOG(INFO) << "Response: " << item.GetText();
     if (item.GetScore() > max) {
       max = item.GetScore();
     }
@@ -119,7 +117,7 @@ TEST_F(PredictorTest, BatchTest) {
   string line;
   std::ifstream fin(StrCat(TestDataPath(), "/", kSamples));
   while (std::getline(fin, line)) {
-    const std::vector<string> &fields = strings::Split(line, '\t');
+    const std::vector<string> fields = absl::StrSplit(line, '\t');
     if (fields.empty()) {
       continue;
     }
@@ -139,9 +137,8 @@ TEST_F(PredictorTest, BatchTest) {
                                   fields.begin() + 1, fields.end())));
   }
 
-  LOG(INFO) << "Responses: " << total_responses << " / " << total_items;
-  LOG(INFO) << "Triggers: " << total_triggers << " / " << total_items;
   EXPECT_EQ(total_triggers, total_items);
+  EXPECT_GE(total_responses, total_triggers);
 }
 
 }  // namespace
diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD
index 21b32d8434..751682215b 100644
--- a/tensorflow/contrib/lite/tools/BUILD
+++ b/tensorflow/contrib/lite/tools/BUILD
@@ -13,6 +13,7 @@ tf_cc_binary(
         "//tensorflow/contrib/lite/tools:gen_op_registration",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/lite/tools/gen_op_registration_main.cc b/tensorflow/contrib/lite/tools/gen_op_registration_main.cc
index 1b28b8bcd9..17b514c916 100644
--- a/tensorflow/contrib/lite/tools/gen_op_registration_main.cc
+++ b/tensorflow/contrib/lite/tools/gen_op_registration_main.cc
@@ -13,30 +13,50 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <cassert>
 #include <fstream>
+#include <map>
 #include <sstream>
 #include <string>
 #include <vector>
 
+#include "absl/strings/strip.h"
 #include "tensorflow/contrib/lite/tools/gen_op_registration.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/util/command_line_flags.h"
 
+const char kInputModelFlag[] = "input_model";
+const char kOutputRegistrationFlag[] = "output_registration";
+const char kTfLitePathFlag[] = "tflite_path";
+
 using tensorflow::Flag;
 using tensorflow::Flags;
 using tensorflow::string;
 
+void ParseFlagAndInit(int argc, char** argv, string* input_model,
+                      string* output_registration, string* tflite_path) {
+  std::vector<tensorflow::Flag> flag_list = {
+      Flag(kInputModelFlag, input_model, "path to the tflite model"),
+      Flag(kOutputRegistrationFlag, output_registration,
+           "filename for generated registration code"),
+      Flag(kTfLitePathFlag, tflite_path, "Path to tensorflow lite dir"),
+  };
+
+  Flags::Parse(&argc, argv, flag_list);
+  tensorflow::port::InitMain(argv[0], &argc, &argv);
+}
+
 namespace {
 
-void GenerateFileContent(const string& filename,
+void GenerateFileContent(const std::string& tflite_path,
+                         const std::string& filename,
                          const std::vector<string>& builtin_ops,
                          const std::vector<string>& custom_ops) {
   std::ofstream fout(filename);
 
-  fout << "#include "
-          "\"third_party/tensorflow/contrib/lite/model.h\"\n";
-  fout << "#include "
-          "\"third_party/tensorflow/contrib/lite/tools/mutable_op_resolver.h\"\n";
+  fout << "#include \"" << tflite_path << "/model.h\"\n";
+  fout << "#include \"" << tflite_path << "/tools/mutable_op_resolver.h\"\n";
+
   fout << "namespace tflite {\n";
   fout << "namespace ops {\n";
   if (!builtin_ops.empty()) {
@@ -78,22 +98,20 @@ void GenerateFileContent(const string& filename,
 int main(int argc, char** argv) {
   string input_model;
   string output_registration;
-  std::vector<tensorflow::Flag> flag_list = {
-      Flag("input_model", &input_model, "path to the tflite model"),
-      Flag("output_registration", &output_registration,
-           "filename for generated registration code"),
-  };
-  Flags::Parse(&argc, argv, flag_list);
+  string tflite_path;
+  ParseFlagAndInit(argc, argv, &input_model, &output_registration,
+                   &tflite_path);
 
-  tensorflow::port::InitMain(argv[0], &argc, &argv);
   std::vector<string> builtin_ops;
   std::vector<string> custom_ops;
-
   std::ifstream fin(input_model);
   std::stringstream content;
   content << fin.rdbuf();
-  const ::tflite::Model* model = ::tflite::GetModel(content.str().data());
+  // Need to store content data first, otherwise, it won't work in bazel.
+  string content_str = content.str();
+  const ::tflite::Model* model = ::tflite::GetModel(content_str.data());
   ::tflite::ReadOpsFromModel(model, &builtin_ops, &custom_ops);
-  GenerateFileContent(output_registration, builtin_ops, custom_ops);
+  GenerateFileContent(tflite_path, output_registration, builtin_ops,
+                      custom_ops);
   return 0;
 }
diff --git a/tensorflow/contrib/lite/tools/mutable_op_resolver.h b/tensorflow/contrib/lite/tools/mutable_op_resolver.h
index be60cf476d..906553da57 100644
--- a/tensorflow/contrib/lite/tools/mutable_op_resolver.h
+++ b/tensorflow/contrib/lite/tools/mutable_op_resolver.h
@@ -46,7 +46,7 @@ class MutableOpResolver : public OpResolver {
   void AddCustom(const char* name, TfLiteRegistration* registration);
 
  private:
-  std::map<tflite::BuiltinOperator, TfLiteRegistration*> builtins_;
+  std::map<int, TfLiteRegistration*> builtins_;
   std::map<std::string, TfLiteRegistration*> custom_ops_;
 };
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 25e036e24c..11f9aa2259 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -207,11 +207,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   native.http_archive(
       name = "com_googlesource_code_re2",
       urls = [
-          "https://mirror.bazel.build/github.com/google/re2/archive/b94b7cd42e9f02673cd748c1ac1d16db4052514c.tar.gz",
-          "https://github.com/google/re2/archive/b94b7cd42e9f02673cd748c1ac1d16db4052514c.tar.gz",
+          "https://mirror.bazel.build/github.com/google/re2/archive/26cd968b735e227361c9703683266f01e5df7857.tar.gz",
+          "https://github.com/google/re2/archive/26cd968b735e227361c9703683266f01e5df7857.tar.gz",
+
       ],
-      sha256 = "bd63550101e056427c9e7ff12a408c1c8b74e9803f393ca916b2926fc2c4906f",
-      strip_prefix = "re2-b94b7cd42e9f02673cd748c1ac1d16db4052514c",
+      sha256 = "e57eeb837ac40b5be37b2c6197438766e73343ffb32368efea793dfd8b28653b",
+      strip_prefix = "re2-26cd968b735e227361c9703683266f01e5df7857",
   )
 
   native.http_archive(
@@ -800,3 +801,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
           "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip",
       ],
   )
+
+  native.new_http_archive(
+      name = "tflite_smartreply",
+      build_file = str(Label("//third_party:tflite_smartreply.BUILD")),
+      sha256 = "8980151b85a87a9c1a3bb1ed4748119e4a85abd3cb5744d83da4d4bd0fbeef7c",
+      urls = [
+          "https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip"
+      ],
+  )
diff --git a/third_party/tflite_smartreply.BUILD b/third_party/tflite_smartreply.BUILD
new file mode 100644
index 0000000000..75663eff48
--- /dev/null
+++ b/third_party/tflite_smartreply.BUILD
@@ -0,0 +1,13 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+filegroup(
+    name = "model_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "BUILD",
+        ],
+    ),
+)
-- 
GitLab


From 32cbeaaaa4a1fe20b21e6a98068b175ce3922600 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Fri, 1 Dec 2017 07:10:42 -0800
Subject: [PATCH 0500/1225] TFE: Remove contrib/eager/python/sumary_writer.py

Use tf.contrib.summary.create_file_writer, instead.

PiperOrigin-RevId: 177588097
---
 tensorflow/contrib/eager/python/BUILD         |  31 ---
 .../contrib/eager/python/summary_writer.py    | 242 ------------------
 .../eager/python/summary_writer_test.py       | 150 -----------
 tensorflow/contrib/summary/BUILD              |   5 +-
 tensorflow/tools/pip_package/BUILD            |   1 -
 5 files changed, 1 insertion(+), 428 deletions(-)
 delete mode 100644 tensorflow/contrib/eager/python/summary_writer.py
 delete mode 100644 tensorflow/contrib/eager/python/summary_writer_test.py

diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index 55d768044b..6e9bb87d58 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -103,37 +103,6 @@ cuda_py_test(
     ],
 )
 
-py_library(
-    name = "summary_writer",
-    srcs = ["summary_writer.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/summary:gen_summary_ops",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:summary_op_util",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python/eager:context",
-    ],
-)
-
-cuda_py_test(
-    name = "summary_writer_test",
-    srcs = ["summary_writer_test.py"],
-    additional_deps = [
-        ":summary_writer",
-        "//third_party/py/numpy",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/eager:test",
-    ],
-)
-
 py_library(
     name = "metrics",
     srcs = [
diff --git a/tensorflow/contrib/eager/python/summary_writer.py b/tensorflow/contrib/eager/python/summary_writer.py
deleted file mode 100644
index 5d8c41b545..0000000000
--- a/tensorflow/contrib/eager/python/summary_writer.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""TensorBoard Summary Writer for TensorFlow Eager Execution."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import uuid
-
-from tensorflow.contrib.summary import gen_summary_ops
-from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import summary_op_util
-from tensorflow.python.ops import variable_scope
-
-
-def _maybe_cpu(v):
-  if isinstance(v, (ops.EagerTensor, ops.Tensor)):
-    return v.cpu()
-  else:
-    return v
-
-
-def _summary_writer_function(name, tensor, function, family=None):
-  def record():
-    with summary_op_util.summary_scope(
-        name, family, values=[tensor]) as (tag, scope):
-      function(tag, scope)
-      return True
-  return record
-
-
-class SummaryWriter(object):
-  """Writes summaries for TensorBoard, compatible with eager execution.
-
-  This class is the supported way of writing TensorBoard summaries under
-  eager execution.
-  """
-
-  _CPU_DEVICE = "cpu:0"
-
-  def __init__(self,
-               logdir,
-               max_queue=10,
-               flush_secs=120,
-               filename_suffix=""):
-    """Summary writer for TensorBoard, compatible with eager execution.
-
-    If necessary, multiple instances of `SummaryWriter` can be created, with
-    distinct `logdir`s and `name`s. Each `SummaryWriter` instance will retain
-    its independent `global_step` counter and data writing destination.
-
-    Example:
-    ```python
-    writer = tfe.SummaryWriter("my_model")
-
-    # ... Code that sets up the model and data batches ...
-
-    for _ in xrange(train_iters):
-      loss = model.train_batch(batch)
-      writer.scalar("loss", loss)
-      writer.step()
-    ```
-
-    Args:
-      logdir: Directory in which summary files will be written.
-      max_queue: Number of summary items to buffer before flushing to
-        filesystem. If 0, summaries will be flushed immediately.
-      flush_secs: Number of secondsbetween forced commits to disk.
-      filename_suffix: Suffix of the event protobuf files in which the summary
-        data are stored.
-
-    Raises:
-      ValueError: If this constructor is called not under eager execution.
-    """
-    # TODO(apassos, ashankar): Make this class and the underlying
-    # contrib.summary_ops compatible with graph model and remove this check.
-    if not context.in_eager_mode():
-      raise ValueError(
-          "Use of SummaryWriter is currently supported only with eager "
-          "execution enabled. File an issue at "
-          "https://github.com/tensorflow/tensorflow/issues/new to express "
-          "interest in fixing this.")
-
-    # TODO(cais): Consider adding name keyword argument, which if None or empty,
-    # will register the global global_step that training_util.get_global_step()
-    # can find.
-    with context.device(self._CPU_DEVICE):
-      self._name = uuid.uuid4().hex
-      self._global_step = 0
-      self._global_step_tensor = variable_scope.get_variable(
-          "global_step/summary_writer/" + self._name,
-          shape=[], dtype=dtypes.int64,
-          initializer=init_ops.zeros_initializer())
-      self._global_step_dirty = False
-      self._resource = gen_summary_ops.summary_writer(shared_name=self._name)
-      gen_summary_ops.create_summary_file_writer(
-          self._resource, logdir, max_queue, flush_secs, filename_suffix)
-      # Delete the resource when this object is deleted
-      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
-          handle=self._resource, handle_device=self._CPU_DEVICE)
-
-  def step(self):
-    """Increment the global step counter of this SummaryWriter instance."""
-    self._global_step += 1
-    self._global_step_dirty = True
-
-  @property
-  def global_step(self):
-    """Obtain the current global_step value of this SummaryWriter instance.
-
-    Returns:
-      An `int` representing the current value of the global_step of this
-       `SummaryWriter` instance.
-    """
-    return self._global_step
-
-  def _update_global_step_tensor(self):
-    with context.device(self._CPU_DEVICE):
-      if self._global_step_dirty:
-        self._global_step_dirty = False
-        return state_ops.assign(self._global_step_tensor, self._global_step)
-      else:
-        return self._global_step_tensor
-
-  def generic(self, name, tensor, metadata, family=None):
-    """Write a generic-type summary.
-
-    Args:
-      name: A name for the generated node. Will also serve as the series name in
-        TensorBoard.
-      tensor: A `Tensor` or compatible value type containing the value of the
-        summary.
-      metadata: Metadata about the summary.
-      family: Optional; if provided, used as the prefix of the summary tag name,
-        which controls the tab name used for display on Tensorboard.
-    """
-    with context.device(self._CPU_DEVICE):
-      with summary_op_util.summary_scope(
-          name, family, values=[tensor]) as (tag, scope):
-        gen_summary_ops.write_summary(
-            self._resource,
-            self._update_global_step_tensor(),
-            _maybe_cpu(tensor),
-            tag,
-            _maybe_cpu(metadata),
-            name=scope)
-
-  def scalar(self, name, tensor, family=None):
-    """Write a scalar summary.
-
-    Args:
-      name: A name for the generated node. Will also serve as the series name in
-        TensorBoard.
-      tensor: A real numeric `Tensor` or compatible value type containing a
-        single value.
-      family: Optional; if provided, used as the prefix of the summary tag name,
-        which controls the tab name used for display on Tensorboard.
-
-    Returns:
-      A summary writer function for scalars.
-    """
-    with context.device(self._CPU_DEVICE):
-      with summary_op_util.summary_scope(
-          name, family, values=[tensor]) as (tag, scope):
-        gen_summary_ops.write_scalar_summary(
-            self._resource, self._update_global_step_tensor(),
-            tag, _maybe_cpu(tensor), name=scope)
-
-  def histogram(self, name, tensor, family=None):
-    """Write a histogram summary.
-
-    Args:
-      name: A name for the generated node. Will also serve as a series name in
-        TensorBoard.
-      tensor: A real numeric `Tensor` or compatible value type. Any shape.
-        Values to use to build the histogram.
-      family: Optional; if provided, used as the prefix of the summary tag name,
-        which controls the tab name used for display on Tensorboard.
-    """
-    with context.device(self._CPU_DEVICE):
-      with summary_op_util.summary_scope(
-          name, family, values=[tensor]) as (tag, scope):
-        gen_summary_ops.write_histogram_summary(
-            self._resource, self._update_global_step_tensor(),
-            tag, _maybe_cpu(tensor), name=scope)
-
-  def image(self, name, tensor, bad_color=None, max_images=3, family=None):
-    """Write an image summary."""
-    with context.device(self._CPU_DEVICE):
-      if bad_color is None:
-        bad_color_ = constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8)
-      with summary_op_util.summary_scope(
-          name, family, values=[tensor]) as (tag, scope):
-        gen_summary_ops.write_image_summary(
-            self._resource, self._update_global_step_tensor(),
-            tag, _maybe_cpu(tensor), bad_color_, max_images,
-            name=scope)
-
-  def audio(self, name, tensor, sample_rate, max_outputs, family=None):
-    """Write an audio summary.
-
-    Args:
-      name: A name for the generated node. Will also serve as a series name in
-        TensorBoard.
-      tensor: A 3-D `float32` `Tensor` of shape `[batch_size, frames, channels]`
-        or a 2-D `float32` `Tensor` of shape `[batch_size, frames]`, or
-        compatible value type.
-      sample_rate: A Scalar `float32` `Tensor` indicating the sample rate of the
-        signal in hertz.
-      max_outputs: Max number of batch elements to generate audio for.
-      family: Optional; if provided, used as the prefix of the summary tag name,
-        which controls the tab name used for display on Tensorboard.
-    """
-    with context.device(self._CPU_DEVICE):
-      with summary_op_util.summary_scope(
-          name, family, values=[tensor]) as (tag, scope):
-        gen_summary_ops.write_audio_summary(
-            self._resource, self._update_global_step_tensor(),
-            tag,
-            _maybe_cpu(tensor),
-            sample_rate=_maybe_cpu(sample_rate),
-            max_outputs=max_outputs,
-            name=scope)
diff --git a/tensorflow/contrib/eager/python/summary_writer_test.py b/tensorflow/contrib/eager/python/summary_writer_test.py
deleted file mode 100644
index 5ebb36d04f..0000000000
--- a/tensorflow/contrib/eager/python/summary_writer_test.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Unit tests for eager execution SummaryWriter."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import shutil
-import tempfile
-
-import numpy as np
-
-from tensorflow.contrib.eager.python import summary_writer
-from tensorflow.core.util import event_pb2
-from tensorflow.python.eager import context
-from tensorflow.python.eager import test
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.lib.io import tf_record
-from tensorflow.python.platform import gfile
-
-
-class SummaryWriterTest(test.TestCase):
-
-  def setUp(self):
-    super(SummaryWriterTest, self).setUp()
-    self._test_device = "gpu:0" if context.num_gpus() else "cpu:0"
-    self._tmp_logdir = tempfile.mkdtemp()
-    with context.device(self._test_device):
-      # Use max_queue=0 so that summaries are immediately flushed to filesystem,
-      # making testing easier.
-      self._writer = summary_writer.SummaryWriter(self._tmp_logdir, max_queue=0)
-
-  def tearDown(self):
-    if os.path.isdir(self._tmp_logdir):
-      shutil.rmtree(self._tmp_logdir)
-    super(SummaryWriterTest, self).tearDown()
-
-  def _readLastEvent(self, logdir=None):
-    if not logdir:
-      logdir = self._tmp_logdir
-    files = [f for f in gfile.ListDirectory(logdir)
-             if not gfile.IsDirectory(os.path.join(logdir, f))]
-    file_path = os.path.join(logdir, files[0])
-    records = list(tf_record.tf_record_iterator(file_path))
-    event = event_pb2.Event()
-    event.ParseFromString(records[-1])
-    return event
-
-  def testGlobalStep(self):
-    with context.device(self._test_device):
-      orig_step = self._writer.global_step
-      self._writer.step()
-      self.assertEqual(orig_step + 1, self._writer.global_step)
-      self.assertEqual(orig_step + 1, self._writer.global_step)
-      self._writer.step()
-      self._writer.step()
-      self.assertEqual(orig_step + 3, self._writer.global_step)
-
-  def testGenericSummary(self):
-    with context.device(self._test_device):
-      x = constant_op.constant(1337.0)
-      with context.device("cpu:0"):
-        metadata = constant_op.constant("foo")
-      self._writer.generic("x", x, metadata)
-      event = self._readLastEvent()
-      self.assertEqual("x", event.summary.value[0].tag)
-
-  def testScalarSummary(self):
-    with context.device(self._test_device):
-      x = constant_op.constant(1337.0)
-      self._writer.scalar("x", x)
-      event = self._readLastEvent()
-      self.assertTrue("x", event.summary.value[0].tag)
-      self.assertEqual(1337.0, event.summary.value[0].simple_value)
-
-  def testHistogramSummary(self):
-    with context.device(self._test_device):
-      y = constant_op.constant([1.0, 3.0, 3.0, 7.0])
-      self._writer.histogram("y", y)
-      event = self._readLastEvent()
-      self.assertEqual("y", event.summary.value[0].tag)
-      self.assertTrue(event.summary.value[0].histo)
-
-  def testImageSummary(self):
-    with context.device(self._test_device):
-      a = constant_op.constant([[10.0, 20.0], [-20.0, -10.0]])
-      self._writer.histogram("image1", a)
-      event = self._readLastEvent()
-      self.assertEqual("image1", event.summary.value[0].tag)
-      self.assertTrue(event.summary.value[0].image)
-
-  def testAudioSummary(self):
-    with context.device(self._test_device):
-      w = constant_op.constant(np.random.rand(3, 10, 2), dtype=dtypes.float32)
-      fs = constant_op.constant(44100.0, dtype=dtypes.float32)
-      max_outputs = 1
-      self._writer.audio("audio1", w, fs, max_outputs)
-      event = self._readLastEvent()
-      self.assertTrue(event.summary.value[0].audio)
-
-  def testTwoSummaryWritersGlobalStepsWorkWithoutCrosstalk(self):
-    tmp_logdir2 = os.path.join(self._tmp_logdir, "_writer2_")
-    writer2 = summary_writer.SummaryWriter(tmp_logdir2, max_queue=0)
-
-    self.assertEqual(0, writer2.global_step)
-    self._writer.step()
-    self.assertEqual(0, writer2.global_step)
-    writer2.step()
-    writer2.step()
-    writer2.step()
-    self.assertEqual(3, writer2.global_step)
-
-    x = constant_op.constant(1337.0)
-    writer_orig_step = self._writer.global_step
-    self._writer.step()
-    self._writer.scalar("x", x)
-
-    event = self._readLastEvent()
-    self.assertEqual(writer_orig_step + 1, event.step)
-
-    writer2.scalar("x", x)
-    event = self._readLastEvent(tmp_logdir2)
-    self.assertEqual(3, event.step)
-
-    self._writer.step()
-    self._writer.scalar("x", x)
-
-    event = self._readLastEvent()
-    self.assertEqual(writer_orig_step + 2, event.step)
-
-
-# TODO(cais): Add performance benchmark for SummaryWriter.
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD
index f34291c203..5ee5f1ae76 100644
--- a/tensorflow/contrib/summary/BUILD
+++ b/tensorflow/contrib/summary/BUILD
@@ -13,10 +13,7 @@ load(
 tf_gen_op_wrapper_py(
     name = "gen_summary_ops",
     out = "gen_summary_ops.py",
-    visibility = ["//tensorflow:internal"],
-    deps = [
-        "//tensorflow/core:summary_ops_op_lib",
-    ],
+    deps = ["//tensorflow/core:summary_ops_op_lib"],
 )
 
 py_test(
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index e3cbd67721..48fc4c91be 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -156,7 +156,6 @@ sh_binary(
             "//tensorflow/contrib/data/python/ops:prefetching_py",
             "//tensorflow/contrib/eager/python/examples:examples_pip",
             "//tensorflow/contrib/eager/python:evaluator",
-            "//tensorflow/contrib/eager/python:summary_writer",
             "//tensorflow/contrib/gan:gan",
             "//tensorflow/contrib/graph_editor:graph_editor_pip",
             "//tensorflow/contrib/keras:keras",
-- 
GitLab


From 4d8277747afc62fd3959fc249545f237e5f0ca80 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 08:27:27 -0800
Subject: [PATCH 0501/1225] 1) Make `tensor_pool` support a list of
 input_values, so we can store (generated_input, generated_data) tuples in the
 pool. 2) Rename `tensor_pool` function to `random_tensor_pool` so the
 function name does not collide with the pkg name.

PiperOrigin-RevId: 177594443
---
 tensorflow/contrib/gan/BUILD                  | 14 ++--
 .../contrib/gan/python/features/__init__.py   |  3 +
 .../{tensor_pool.py => random_tensor_pool.py} |  6 +-
 ...ool_impl.py => random_tensor_pool_impl.py} | 64 ++++++++++++-------
 ...ool_test.py => random_tensor_pool_test.py} | 28 ++++++--
 5 files changed, 75 insertions(+), 40 deletions(-)
 rename tensorflow/contrib/gan/python/features/python/{tensor_pool.py => random_tensor_pool.py} (86%)
 rename tensorflow/contrib/gan/python/features/python/{tensor_pool_impl.py => random_tensor_pool_impl.py} (70%)
 rename tensorflow/contrib/gan/python/features/python/{tensor_pool_test.py => random_tensor_pool_test.py} (75%)

diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD
index abe4665caa..a2e6fa51f1 100644
--- a/tensorflow/contrib/gan/BUILD
+++ b/tensorflow/contrib/gan/BUILD
@@ -116,7 +116,7 @@ py_library(
     deps = [
         ":clip_weights",
         ":conditioning_utils",
-        ":tensor_pool",
+        ":random_tensor_pool",
         ":virtual_batchnorm",
         "//tensorflow/python:util",
     ],
@@ -221,10 +221,10 @@ py_test(
 )
 
 py_library(
-    name = "tensor_pool",
+    name = "random_tensor_pool",
     srcs = [
-        "python/features/python/tensor_pool.py",
-        "python/features/python/tensor_pool_impl.py",
+        "python/features/python/random_tensor_pool.py",
+        "python/features/python/random_tensor_pool_impl.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
@@ -239,11 +239,11 @@ py_library(
 )
 
 py_test(
-    name = "tensor_pool_test",
-    srcs = ["python/features/python/tensor_pool_test.py"],
+    name = "random_tensor_pool_test",
+    srcs = ["python/features/python/random_tensor_pool_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":tensor_pool",
+        ":random_tensor_pool",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
diff --git a/tensorflow/contrib/gan/python/features/__init__.py b/tensorflow/contrib/gan/python/features/__init__.py
index 6d0972f8db..50d0bfa17c 100644
--- a/tensorflow/contrib/gan/python/features/__init__.py
+++ b/tensorflow/contrib/gan/python/features/__init__.py
@@ -22,10 +22,12 @@ from __future__ import print_function
 # pylint: disable=unused-import,wildcard-import
 from tensorflow.contrib.gan.python.features.python import clip_weights
 from tensorflow.contrib.gan.python.features.python import conditioning_utils
+from tensorflow.contrib.gan.python.features.python import random_tensor_pool
 from tensorflow.contrib.gan.python.features.python import virtual_batchnorm
 
 from tensorflow.contrib.gan.python.features.python.clip_weights import *
 from tensorflow.contrib.gan.python.features.python.conditioning_utils import *
+from tensorflow.contrib.gan.python.features.python.random_tensor_pool import *
 from tensorflow.contrib.gan.python.features.python.virtual_batchnorm import *
 # pylint: enable=unused-import,wildcard-import
 
@@ -33,5 +35,6 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = clip_weights.__all__
 _allowed_symbols += conditioning_utils.__all__
+_allowed_symbols += random_tensor_pool.__all__
 _allowed_symbols += virtual_batchnorm.__all__
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool.py
similarity index 86%
rename from tensorflow/contrib/gan/python/features/python/tensor_pool.py
rename to tensorflow/contrib/gan/python/features/python/random_tensor_pool.py
index 0bd2fa3db9..ca904971fa 100644
--- a/tensorflow/contrib/gan/python/features/python/tensor_pool.py
+++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool.py
@@ -25,11 +25,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.gan.python.features.python import tensor_pool_impl
+from tensorflow.contrib.gan.python.features.python import random_tensor_pool_impl
 # pylint: disable=wildcard-import
-from tensorflow.contrib.gan.python.features.python.tensor_pool_impl import *
+from tensorflow.contrib.gan.python.features.python.random_tensor_pool_impl import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util.all_util import remove_undocumented
 
-__all__ = tensor_pool_impl.__all__
+__all__ = random_tensor_pool_impl.__all__
 remove_undocumented(__name__, __all__)
diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py
similarity index 70%
rename from tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py
rename to tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py
index 79318a69d2..9d733b6ff9 100644
--- a/tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py
+++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py
@@ -42,7 +42,13 @@ __all__ = [
 ]
 
 
-def tensor_pool(input_value,
+def _to_tuple(x):
+  if isinstance(x, (list, tuple)):
+    return tuple(x)
+  return (x,)
+
+
+def tensor_pool(input_values,
                 pool_size,
                 pooling_probability=0.5,
                 name='tensor_pool'):
@@ -57,15 +63,17 @@ def tensor_pool(input_value,
   `pool_size` = 0 or `pooling_probability` = 0.
 
   Args:
-    input_value: A `Tensor` from which to read values to be pooled.
+    input_values: A `Tensor`, or a list or tuple of `Tensor`s from which to read
+      values to be pooled.
     pool_size: An integer specifying the maximum size of the pool.
     pooling_probability: A float `Tensor` specifying the probability of getting
       a value from the pool, as opposed to just the current input.
     name: A string prefix for the name scope for all tensorflow ops.
 
   Returns:
-    A `Tensor` which is with given probability either the `input_value` or a
-    randomly chosen sample that was previously inserted in the pool.
+    A `Tensor`, or a list or tuple of `Tensor`s (according to the type ofx
+    `input_values`) which is with given probability either the `input_values` or
+    a randomly chosen sample that was previously inserted in the pool.
 
   Raises:
     ValueError: If `pool_size` is negative.
@@ -74,45 +82,53 @@ def tensor_pool(input_value,
   if pool_size < 0:
     raise ValueError('`pool_size` is negative.')
   elif pool_size == 0:
-    return input_value
+    return input_values
+
+  original_input_values = input_values
+  input_values = _to_tuple(input_values)
 
-  with ops.name_scope('{}_pool_queue'.format(name),
-                      values=[input_value, pooling_probability]):
+  with ops.name_scope(
+      '{}_pool_queue'.format(name),
+      values=input_values + (pooling_probability,)):
     pool_queue = data_flow_ops.RandomShuffleQueue(
         capacity=pool_size,
         min_after_dequeue=0,
-        dtypes=[input_value.dtype],
+        dtypes=[v.dtype for v in input_values],
         shapes=None)
 
     # In pseudeo code this code does the following:
     # if not pool_full:
-    #   enqueue(input_value)
-    #   return input_value
+    #   enqueue(input_values)
+    #   return input_values
     # else
-    #   dequeue_value = dequeue_random_sample()
-    #   enqueue(input_value)
+    #   dequeue_values = dequeue_random_sample()
+    #   enqueue(input_values)
     #   if rand() < pooling_probability:
-    #     return dequeue_value
+    #     return dequeue_values
     #   else
-    #     return input_value
+    #     return input_values
 
     def _get_input_value_pooled():
-      enqueue_op = pool_queue.enqueue(input_value)
+      enqueue_op = pool_queue.enqueue(input_values)
       with ops.control_dependencies([enqueue_op]):
-        return array_ops.identity(input_value)
+        return tuple(array_ops.identity(v) for v in input_values)
 
     def _get_random_pool_value_and_enqueue_input():
-      dequeue_value = pool_queue.dequeue()
-      with ops.control_dependencies([dequeue_value]):
-        enqueue_op = pool_queue.enqueue(input_value)
+      dequeue_values = _to_tuple(pool_queue.dequeue())
+      with ops.control_dependencies(dequeue_values):
+        enqueue_op = pool_queue.enqueue(input_values)
         with ops.control_dependencies([enqueue_op]):
           prob = random_ops.random_uniform(
               (), dtype=dtypes.float32) < pooling_probability
-          return control_flow_ops.cond(prob, lambda: dequeue_value,
-                                       lambda: input_value)
+          return control_flow_ops.cond(prob, lambda: dequeue_values,
+                                       lambda: input_values)
 
-    output_value = control_flow_ops.cond(
+    output_values = _to_tuple(control_flow_ops.cond(
         pool_queue.size() < pool_size, _get_input_value_pooled,
-        _get_random_pool_value_and_enqueue_input)
+        _get_random_pool_value_and_enqueue_input))
 
-  return output_value
+  if isinstance(original_input_values, list):
+    return list(output_values)
+  elif isinstance(original_input_values, tuple):
+    return output_values
+  return output_values[0]
diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool_test.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py
similarity index 75%
rename from tensorflow/contrib/gan/python/features/python/tensor_pool_test.py
rename to tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py
index 49b77bb3fc..cef3a87ab3 100644
--- a/tensorflow/contrib/gan/python/features/python/tensor_pool_test.py
+++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for tf.contrib.gan.python.features.tensor_pool."""
+"""Tests for tf.contrib.gan.python.features.random_tensor_pool."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -20,7 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.gan.python.features.python import tensor_pool_impl as tensor_pool
+from tensorflow.contrib.gan.python.features.python.random_tensor_pool_impl import tensor_pool
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
@@ -32,7 +32,7 @@ class TensorPoolTest(test.TestCase):
     """Checks that `input_value` can have unknown shape."""
     input_value = array_ops.placeholder(
         dtype=dtypes.int32, shape=[None, None, 3])
-    output_value = tensor_pool.tensor_pool(input_value, pool_size=10)
+    output_value = tensor_pool(input_value, pool_size=10)
 
     with self.test_session(use_gpu=True) as session:
       for i in range(10):
@@ -43,7 +43,7 @@ class TensorPoolTest(test.TestCase):
   def test_pool_sequence(self):
     """Checks that values are pooled and returned maximally twice."""
     input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[])
-    output_value = tensor_pool.tensor_pool(input_value, pool_size=10)
+    output_value = tensor_pool(input_value, pool_size=10)
 
     with self.test_session(use_gpu=True) as session:
       outs = []
@@ -59,7 +59,7 @@ class TensorPoolTest(test.TestCase):
   def test_never_pool(self):
     """Checks that setting `pooling_probability` to zero works."""
     input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[])
-    output_value = tensor_pool.tensor_pool(
+    output_value = tensor_pool(
         input_value, pool_size=10, pooling_probability=0.0)
 
     with self.test_session(use_gpu=True) as session:
@@ -72,7 +72,7 @@ class TensorPoolTest(test.TestCase):
     input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[])
     pool_size = 10
     pooling_probability = 0.2
-    output_value = tensor_pool.tensor_pool(
+    output_value = tensor_pool(
         input_value,
         pool_size=pool_size,
         pooling_probability=pooling_probability)
@@ -89,6 +89,22 @@ class TensorPoolTest(test.TestCase):
           1 - pooling_probability,
           atol=0.03)
 
+  def test_input_values_tuple(self):
+    """Checks that `input_values` can be a tuple."""
+    input_values = (array_ops.placeholder(dtype=dtypes.int32, shape=[]),
+                    array_ops.placeholder(dtype=dtypes.int32, shape=[]))
+    output_values = tensor_pool(input_values, pool_size=3)
+    self.assertEqual(len(output_values), len(input_values))
+
+    with self.test_session(use_gpu=True) as session:
+      for i in range(10):
+        outs = session.run(output_values, {
+            input_values[0]: i,
+            input_values[1]: i + 1
+        })
+        self.assertEqual(len(outs), len(input_values))
+        self.assertEqual(outs[1] - outs[0], 1)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 18e98b3522a38554349c4c4da71f9e9d14447e1c Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 1 Dec 2017 08:40:57 -0800
Subject: [PATCH 0502/1225] Split out a "graph" library from "core_cpu" to
 allow finer-grained dependencies.

This is a step towards supporting custom Dataset ops built as external
libraries.

PiperOrigin-RevId: 177595688
---
 tensorflow/compiler/aot/BUILD             |  1 +
 tensorflow/compiler/jit/BUILD             |  1 +
 tensorflow/compiler/tf2xla/BUILD          |  2 +
 tensorflow/core/BUILD                     | 52 ++++++++++++++++++-----
 tensorflow/core/debug/BUILD               |  2 +
 tensorflow/core/distributed_runtime/BUILD |  3 ++
 tensorflow/core/grappler/costs/BUILD      |  3 +-
 tensorflow/core/kernels/BUILD             |  2 +-
 tensorflow/tools/graph_transforms/BUILD   |  1 +
 9 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD
index a9a6ea8431..767e3686a8 100644
--- a/tensorflow/compiler/aot/BUILD
+++ b/tensorflow/compiler/aot/BUILD
@@ -111,6 +111,7 @@ cc_library(
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
     ],
diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index bf7d9cf14d..026a1bf879 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -251,6 +251,7 @@ cc_library(
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index 5a81438b1c..4cb2b99f9f 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -215,6 +215,7 @@ cc_library(
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
     ],
@@ -400,6 +401,7 @@ cc_library(
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
     ],
 )
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index fce0663aa5..4b5f67baad 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1853,11 +1853,13 @@ cc_library(
     deps = ["//tensorflow/core/platform/default/build_config:protos_cc"],
 )
 
-CORE_CPU_BASE_HDRS = [
-    "common_runtime/device.h",
-    "common_runtime/graph_runner.h",
-    "common_runtime/shape_refiner.h",
-    "framework/versions.h",
+# Library containing all of the graph construction code that is
+# independent of the runtime.
+#
+# TODO(mrry): Refactor graph_constructor.cc so that it does not depend on code
+# in "common_runtime/", and then the entire "graph/" directory can be included
+# in this library.
+GRAPH_HDRS = [
     "graph/algorithm.h",
     "graph/colors.h",
     "graph/control_flow.h",
@@ -1865,7 +1867,7 @@ CORE_CPU_BASE_HDRS = [
     "graph/default_device.h",
     "graph/edgeset.h",
     "graph/graph.h",
-    "graph/graph_constructor.h",
+    "graph/graph_constructor.h",  # NOTE(mrry): Don't include the .cc since it depends on common_runtime.
     "graph/graph_def_builder.h",
     "graph/graph_partition.h",
     "graph/mkl_layout_pass.h",
@@ -1881,16 +1883,12 @@ CORE_CPU_BASE_HDRS = [
 ]
 
 tf_cuda_library(
-    name = "core_cpu_base",
+    name = "graph",
     srcs = [
-        "common_runtime/shape_refiner.cc",
-        "common_runtime/shape_refiner.h",
-        "framework/versions.h",
         "graph/algorithm.cc",
         "graph/colors.cc",
         "graph/control_flow.cc",
         "graph/costmodel.cc",
-        "graph/graph_constructor.cc",
         "graph/graph_def_builder.cc",
         "graph/graph_partition.cc",
         "graph/node_builder.cc",
@@ -1898,6 +1896,33 @@ tf_cuda_library(
         "graph/subgraph.cc",
         "graph/tensor_id.cc",
         "graph/validate.cc",
+    ],
+    hdrs = GRAPH_HDRS,
+    deps = [
+        ":framework",
+        ":framework_internal",
+        ":lib",
+        ":lib_internal",
+        ":proto_text",
+        ":protos_all_cc",
+        "//third_party/eigen3",
+    ],
+)
+
+CORE_CPU_BASE_HDRS = GRAPH_HDRS + [
+    "common_runtime/device.h",
+    "common_runtime/graph_runner.h",
+    "common_runtime/shape_refiner.h",
+    "framework/versions.h",
+]
+
+tf_cuda_library(
+    name = "core_cpu_base",
+    srcs = [
+        "common_runtime/shape_refiner.cc",
+        "common_runtime/shape_refiner.h",
+        "framework/versions.h",
+        "graph/graph_constructor.cc",  # Depends on common_runtime.
         "public/session.h",
         "public/session_options.h",
         "public/version.h",
@@ -1905,6 +1930,7 @@ tf_cuda_library(
     hdrs = CORE_CPU_BASE_HDRS,
     copts = tf_copts(),
     deps = [
+        ":graph",
         ":framework",
         ":framework_internal",
         ":lib",
@@ -2008,6 +2034,7 @@ tf_cuda_library(
     hdrs = CORE_CPU_LIB_HEADERS,
     copts = tf_copts(),
     deps = [
+        ":graph",
         ":framework",
         ":framework_internal",
         ":lib",
@@ -2049,6 +2076,7 @@ tf_cuda_library(
         ":function_ops_op_lib",
         ":functional_grad",
         ":functional_ops_op_lib",
+        ":graph",
         ":lib",
         ":lib_internal",
         ":proto_text",
@@ -2094,6 +2122,7 @@ tf_cuda_library(
         ":core_cpu_internal",
         ":device_tracer",
         ":framework",
+        ":graph",
         ":lib",
         ":lib_internal",
         ":proto_text",
@@ -2176,6 +2205,7 @@ tf_cuda_library(
         ":framework_internal",
         ":gpu_init_impl",
         ":gpu_lib",
+        ":graph",
         ":lib",
         ":lib_internal",
         ":protos_all_cc",
diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD
index 6d796768de..108dc59919 100644
--- a/tensorflow/core/debug/BUILD
+++ b/tensorflow/core/debug/BUILD
@@ -123,6 +123,7 @@ tf_cuda_library(
     deps = [
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
@@ -144,6 +145,7 @@ tf_cuda_library(
         ":debugger_event_metadata_proto_cc",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD
index 29164bbffe..19122e3b74 100644
--- a/tensorflow/core/distributed_runtime/BUILD
+++ b/tensorflow/core/distributed_runtime/BUILD
@@ -335,6 +335,7 @@ cc_library(
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:master_proto_cc",
@@ -372,6 +373,7 @@ cc_library(
     deps = [
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:tensorflow_opensource",
     ],
 )
@@ -415,6 +417,7 @@ cc_library(
         ":worker_env",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD
index f1edbbb602..d6ce72639c 100644
--- a/tensorflow/core/grappler/costs/BUILD
+++ b/tensorflow/core/grappler/costs/BUILD
@@ -133,8 +133,8 @@ tf_cuda_library(
     visibility = ["//visibility:public"],
     deps = [
         ":op_performance_data_cc",
-        "//tensorflow/core:core_cpu_base",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_proto_parsing",
         "//tensorflow/core:protos_all_cc",
@@ -307,6 +307,7 @@ cc_library(
         ":virtual_placer",
         ":virtual_scheduler",
         "//tensorflow/core:core_cpu_base",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index dcffb28513..8d87915658 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -5837,8 +5837,8 @@ cc_library(
     srcs = ["dataset.cc"],
     hdrs = ["dataset.h"],
     deps = [
-        "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD
index 9216008600..58489b28c8 100644
--- a/tensorflow/tools/graph_transforms/BUILD
+++ b/tensorflow/tools/graph_transforms/BUILD
@@ -128,6 +128,7 @@ cc_library(
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
-- 
GitLab


From 74c5ff451a532e47ae1ba5a4f5b3aef77f84c180 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Fri, 1 Dec 2017 08:49:20 -0800
Subject: [PATCH 0503/1225] Store and log request URI when requests hang.

PiperOrigin-RevId: 177596564
---
 tensorflow/core/platform/cloud/curl_http_request.cc | 7 ++++---
 tensorflow/core/platform/cloud/curl_http_request.h  | 3 +++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc
index d01734ba3a..4581a0870a 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request.cc
@@ -195,6 +195,7 @@ Status CurlHttpRequest::SetUri(const string& uri) {
   TF_RETURN_IF_ERROR(CheckInitialized());
   TF_RETURN_IF_ERROR(CheckNotSent());
   is_uri_set_ = true;
+  uri_ = uri;
   libcurl_->curl_easy_setopt(curl_, CURLOPT_URL, uri.c_str());
   return Status::OK();
 }
@@ -530,9 +531,9 @@ int CurlHttpRequest::ProgressCallback(void* this_object, curl_off_t dltotal,
 
   if (now - that->last_progress_timestamp_ > kInactivityTimeoutSeconds) {
     LOG(ERROR) << "The transmission  of request " << this_object
-               << " has been stuck at " << current_progress << " of "
-               << dltotal + ultotal << " bytes for "
-               << now - that->last_progress_timestamp_
+               << " (URI: " << that->uri_ << ") has been stuck at "
+               << current_progress << " of " << dltotal + ultotal
+               << " bytes for " << now - that->last_progress_timestamp_
                << " seconds and will be aborted.";
     return 1;  // Will abort the request.
   }
diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h
index 2396593d6d..9e5ae61016 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.h
+++ b/tensorflow/core/platform/cloud/curl_http_request.h
@@ -168,6 +168,9 @@ class CurlHttpRequest : public HttpRequest {
   bool is_method_set_ = false;
   bool is_sent_ = false;
 
+  // Store the URI to help disambiguate requests when errors occur.
+  string uri_;
+
   TF_DISALLOW_COPY_AND_ASSIGN(CurlHttpRequest);
 };
 
-- 
GitLab


From 2c78d7bfaf3158df22401c03fc4de2cb99526d4f Mon Sep 17 00:00:00 2001
From: "Joshua V. Dillon" <jvdillon@google.com>
Date: Fri, 1 Dec 2017 10:12:55 -0800
Subject: [PATCH 0504/1225] Add `cross_entropy` and `kl_divergence` to
 `tf.distributions.Distribution`.

PiperOrigin-RevId: 177605983
---
 .../distributions/kullback_leibler_test.py    |  58 ++++++---
 .../python/ops/distributions/distribution.py  | 110 +++++++++++++++---
 .../ops/distributions/kullback_leibler.py     |  32 +++++
 .../tensorflow.distributions.-bernoulli.pbtxt |   8 ++
 .../tensorflow.distributions.-beta.pbtxt      |   8 ++
 ...ensorflow.distributions.-categorical.pbtxt |   8 ++
 ...distributions.-dirichlet-multinomial.pbtxt |   8 ++
 .../tensorflow.distributions.-dirichlet.pbtxt |   8 ++
 ...nsorflow.distributions.-distribution.pbtxt |   8 ++
 ...ensorflow.distributions.-exponential.pbtxt |   8 ++
 .../tensorflow.distributions.-gamma.pbtxt     |   8 ++
 .../tensorflow.distributions.-laplace.pbtxt   |   8 ++
 ...ensorflow.distributions.-multinomial.pbtxt |   8 ++
 .../tensorflow.distributions.-normal.pbtxt    |   8 ++
 .../tensorflow.distributions.-student-t.pbtxt |   8 ++
 .../tensorflow.distributions.-uniform.pbtxt   |   8 ++
 16 files changed, 273 insertions(+), 31 deletions(-)

diff --git a/tensorflow/python/kernel_tests/distributions/kullback_leibler_test.py b/tensorflow/python/kernel_tests/distributions/kullback_leibler_test.py
index b1d8da7716..d0fa1fe989 100644
--- a/tensorflow/python/kernel_tests/distributions/kullback_leibler_test.py
+++ b/tensorflow/python/kernel_tests/distributions/kullback_leibler_test.py
@@ -59,13 +59,21 @@ class KLTest(test.TestCase):
     # pylint: disable=unused-argument,unused-variable
 
     with self.test_session():
-      a = MyDistException(loc=0.0, scale=1.0)
+      a = MyDistException(loc=0.0, scale=1.0, allow_nan_stats=False)
       kl = kullback_leibler.kl_divergence(a, a, allow_nan_stats=False)
       with self.assertRaisesOpError(
           "KL calculation between .* and .* returned NaN values"):
         kl.eval()
+      with self.assertRaisesOpError(
+          "KL calculation between .* and .* returned NaN values"):
+        a.kl_divergence(a).eval()
+      a = MyDistException(loc=0.0, scale=1.0, allow_nan_stats=True)
       kl_ok = kullback_leibler.kl_divergence(a, a)
       self.assertAllEqual([float("nan")], kl_ok.eval())
+      self_kl_ok = a.kl_divergence(a)
+      self.assertAllEqual([float("nan")], self_kl_ok.eval())
+      cross_ok = a.cross_entropy(a)
+      self.assertAllEqual([float("nan")], cross_ok.eval())
 
   def testRegistrationFailures(self):
 
@@ -86,16 +94,22 @@ class KLTest(test.TestCase):
     for (k, v) in _DIVERGENCES.items():
       self.assertEqual(v, _registered_kl(*k))
 
-  def testIndirectRegistration(self):
+  def _testIndirectRegistration(self, fn):
 
     class Sub1(normal.Normal):
-      pass
+
+      def entropy(self):
+        return ""
 
     class Sub2(normal.Normal):
-      pass
+
+      def entropy(self):
+        return ""
 
     class Sub11(Sub1):
-      pass
+
+      def entropy(self):
+        return ""
 
     # pylint: disable=unused-argument,unused-variable
     @kullback_leibler.RegisterKL(Sub1, Sub1)
@@ -116,16 +130,30 @@ class KLTest(test.TestCase):
     sub2 = Sub2(loc=0.0, scale=1.0)
     sub11 = Sub11(loc=0.0, scale=1.0)
 
-    self.assertEqual("sub1-1", kullback_leibler.kl_divergence(sub1, sub1))
-    self.assertEqual("sub1-2", kullback_leibler.kl_divergence(sub1, sub2))
-    self.assertEqual("sub2-1", kullback_leibler.kl_divergence(sub2, sub1))
-    self.assertEqual("sub1-1", kullback_leibler.kl_divergence(sub11, sub11))
-    self.assertEqual("sub1-1", kullback_leibler.kl_divergence(sub11, sub1))
-    self.assertEqual("sub1-2", kullback_leibler.kl_divergence(sub11, sub2))
-    self.assertEqual("sub1-1", kullback_leibler.kl_divergence(sub11, sub1))
-    self.assertEqual("sub1-2", kullback_leibler.kl_divergence(sub11, sub2))
-    self.assertEqual("sub2-1", kullback_leibler.kl_divergence(sub2, sub11))
-    self.assertEqual("sub1-1", kullback_leibler.kl_divergence(sub1, sub11))
+    self.assertEqual("sub1-1", fn(sub1, sub1))
+    self.assertEqual("sub1-2", fn(sub1, sub2))
+    self.assertEqual("sub2-1", fn(sub2, sub1))
+    self.assertEqual("sub1-1", fn(sub11, sub11))
+    self.assertEqual("sub1-1", fn(sub11, sub1))
+    self.assertEqual("sub1-2", fn(sub11, sub2))
+    self.assertEqual("sub1-1", fn(sub11, sub1))
+    self.assertEqual("sub1-2", fn(sub11, sub2))
+    self.assertEqual("sub2-1", fn(sub2, sub11))
+    self.assertEqual("sub1-1", fn(sub1, sub11))
+
+  def testIndirectRegistrationKLFun(self):
+    self._testIndirectRegistration(kullback_leibler.kl_divergence)
+
+  def testIndirectRegistrationKLSelf(self):
+    self._testIndirectRegistration(
+        lambda p, q: p.kl_divergence(q))
+
+  def testIndirectRegistrationCrossEntropy(self):
+    self._testIndirectRegistration(
+        lambda p, q: p.cross_entropy(q))
+
+  def testFunctionCrossEntropy(self):
+    self._testIndirectRegistration(kullback_leibler.cross_entropy)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py
index 22687a093a..2d4c3509bc 100644
--- a/tensorflow/python/ops/distributions/distribution.py
+++ b/tensorflow/python/ops/distributions/distribution.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util
 from tensorflow.python.util import tf_inspect
 
@@ -43,10 +44,26 @@ __all__ = [
 ]
 
 _DISTRIBUTION_PUBLIC_METHOD_WRAPPERS = [
-    "batch_shape_tensor", "batch_shape", "event_shape_tensor", "event_shape",
-    "sample", "log_prob", "prob", "log_cdf", "cdf", "log_survival_function",
-    "survival_function", "entropy", "mean", "variance", "stddev", "mode",
-    "covariance"]
+    "batch_shape",
+    "batch_shape_tensor",
+    "cdf",
+    "covariance",
+    "cross_entropy",
+    "entropy",
+    "event_shape",
+    "event_shape_tensor",
+    "kl_divergence",
+    "log_cdf",
+    "log_prob",
+    "log_survival_function",
+    "mean",
+    "mode",
+    "prob",
+    "sample",
+    "stddev",
+    "survival_function",
+    "variance",
+]
 
 
 @six.add_metaclass(abc.ABCMeta)
@@ -608,7 +625,7 @@ class Distribution(_BaseDistribution):
     """Indicates that `event_shape == []`.
 
     Args:
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       is_scalar_event: `bool` scalar `Tensor`.
@@ -622,7 +639,7 @@ class Distribution(_BaseDistribution):
     """Indicates that `batch_shape == []`.
 
     Args:
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       is_scalar_batch: `bool` scalar `Tensor`.
@@ -683,7 +700,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       log_prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
@@ -710,7 +727,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
@@ -747,7 +764,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       logcdf: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
@@ -780,7 +797,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       cdf: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
@@ -818,7 +835,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
@@ -853,7 +870,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
@@ -899,7 +916,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       quantile: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
@@ -923,7 +940,7 @@ class Distribution(_BaseDistribution):
     denotes expectation, and `Var.shape = batch_shape + event_shape`.
 
     Args:
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       variance: Floating-point `Tensor` with shape identical to
@@ -954,7 +971,7 @@ class Distribution(_BaseDistribution):
     denotes expectation, and `stddev.shape = batch_shape + event_shape`.
 
     Args:
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       stddev: Floating-point `Tensor` with shape identical to
@@ -1002,7 +1019,7 @@ class Distribution(_BaseDistribution):
     length-`k'` vector.
 
     Args:
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       covariance: Floating-point `Tensor` with shape `[B1, ..., Bn, k', k']`
@@ -1020,6 +1037,67 @@ class Distribution(_BaseDistribution):
     with self._name_scope(name):
       return self._mode()
 
+  def _cross_entropy(self, other):
+    return kullback_leibler.cross_entropy(
+        self, other, allow_nan_stats=self.allow_nan_stats)
+
+  def cross_entropy(self, other, name="cross_entropy"):
+    """Computes the (Shannon) cross entropy.
+
+    Denote this distribution (`self`) by `P` and the `other` distribution by
+    `Q`. Assuming `P, Q` are absolutely continuous with respect to
+    one another and permit densities `p(x) dr(x)` and `q(x) dr(x)`, (Shanon)
+    cross entropy is defined as:
+
+    ```none
+    H[P, Q] = E_p[-log q(X)] = -int_F p(x) log q(x) dr(x)
+    ```
+
+    where `F` denotes the support of the random variable `X ~ P`.
+
+    Args:
+      other: `tf.distributions.Distribution` instance.
+      name: Python `str` prepended to names of ops created by this function.
+
+    Returns:
+      cross_entropy: `self.dtype` `Tensor` with shape `[B1, ..., Bn]`
+        representing `n` different calculations of (Shanon) cross entropy.
+    """
+    with self._name_scope(name):
+      return self._cross_entropy(other)
+
+  def _kl_divergence(self, other):
+    return kullback_leibler.kl_divergence(
+        self, other, allow_nan_stats=self.allow_nan_stats)
+
+  def kl_divergence(self, other, name="kl_divergence"):
+    """Computes the Kullback--Leibler divergence.
+
+    Denote this distribution (`self`) by `p` and the `other` distribution by
+    `q`. Assuming `p, q` are absolutely continuous with respect to reference
+    measure `r`, (Shanon) cross entropy is defined as:
+
+    ```none
+    KL[p, q] = E_p[log(p(X)/q(X))]
+             = -int_F p(x) log q(x) dr(x) + int_F p(x) log p(x) dr(x)
+             = H[p, q] - H[p]
+    ```
+
+    where `F` denotes the support of the random variable `X ~ p`, `H[., .]`
+    denotes (Shanon) cross entropy, and `H[.]` denotes (Shanon) entropy.
+
+    Args:
+      other: `tf.distributions.Distribution` instance.
+      name: Python `str` prepended to names of ops created by this function.
+
+    Returns:
+      kl_divergence: `self.dtype` `Tensor` with shape `[B1, ..., Bn]`
+        representing `n` different calculations of the Kullback-Leibler
+        divergence.
+    """
+    with self._name_scope(name):
+      return self._kl_divergence(other)
+
   @contextlib.contextmanager
   def _name_scope(self, name=None, values=None):
     """Helper function to standardize op scope."""
diff --git a/tensorflow/python/ops/distributions/kullback_leibler.py b/tensorflow/python/ops/distributions/kullback_leibler.py
index a6ab581cc2..829b9611cf 100644
--- a/tensorflow/python/ops/distributions/kullback_leibler.py
+++ b/tensorflow/python/ops/distributions/kullback_leibler.py
@@ -110,6 +110,38 @@ def kl_divergence(distribution_a, distribution_b,
       return array_ops.identity(kl_t, name="checked_kl")
 
 
+def cross_entropy(ref, other,
+                  allow_nan_stats=True, name=None):
+  """Computes the (Shannon) cross entropy.
+
+  Denote two distributions by `P` (`ref`) and `Q` (`other`). Assuming `P, Q`
+  are absolutely continuous with respect to one another and permit densities
+  `p(x) dr(x)` and `q(x) dr(x)`, (Shanon) cross entropy is defined as:
+
+  ```none
+  H[P, Q] = E_p[-log q(X)] = -int_F p(x) log q(x) dr(x)
+  ```
+
+  where `F` denotes the support of the random variable `X ~ P`.
+
+  Args:
+    ref: `tf.distributions.Distribution` instance.
+    other: `tf.distributions.Distribution` instance.
+    allow_nan_stats: Python `bool`, default `True`. When `True`,
+      statistics (e.g., mean, mode, variance) use the value "`NaN`" to
+      indicate the result is undefined. When `False`, an exception is raised
+      if one or more of the statistic's batch members are undefined.
+    name: Python `str` prepended to names of ops created by this function.
+
+  Returns:
+    cross_entropy: `ref.dtype` `Tensor` with shape `[B1, ..., Bn]`
+      representing `n` different calculations of (Shanon) cross entropy.
+  """
+  with ops.name_scope(name, "cross_entropy"):
+    return ref.entropy() + kl_divergence(
+        ref, other, allow_nan_stats=allow_nan_stats)
+
+
 class RegisterKL(object):
   """Decorator to register a KL divergence implementation function.
 
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-bernoulli.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-bernoulli.pbtxt
index cfe09345ac..ca96f4eaec 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-bernoulli.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-bernoulli.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-beta.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-beta.pbtxt
index 2e6578bae1..d0508acd9f 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-beta.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-beta.pbtxt
@@ -68,6 +68,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -84,6 +88,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-categorical.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-categorical.pbtxt
index d42b0e82e4..ff0fbb56cd 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-categorical.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-categorical.pbtxt
@@ -68,6 +68,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -84,6 +88,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet-multinomial.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet-multinomial.pbtxt
index 710164743e..d75e4a2f88 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet-multinomial.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet-multinomial.pbtxt
@@ -68,6 +68,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -84,6 +88,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet.pbtxt
index 6cc361672e..b838b9ae21 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-distribution.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-distribution.pbtxt
index 40ad07d1be..6f06b7d50d 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-distribution.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-distribution.pbtxt
@@ -55,6 +55,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -71,6 +75,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-exponential.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-exponential.pbtxt
index 8f34d25fea..d34f9cde5d 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-exponential.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-exponential.pbtxt
@@ -65,6 +65,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -81,6 +85,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-gamma.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-gamma.pbtxt
index 0ae88fba3b..df268b8d99 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-gamma.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-gamma.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-laplace.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-laplace.pbtxt
index e7cd595e94..303dcb4ed3 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-laplace.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-laplace.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-multinomial.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-multinomial.pbtxt
index 7a4a16ff83..ecda8acb15 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-multinomial.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-multinomial.pbtxt
@@ -68,6 +68,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -84,6 +88,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-normal.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-normal.pbtxt
index 14c8c34cc2..92b9eeea22 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-normal.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-normal.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-student-t.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-student-t.pbtxt
index 30db6d3f35..9aa7f9a634 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-student-t.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-student-t.pbtxt
@@ -68,6 +68,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -84,6 +88,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-uniform.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-uniform.pbtxt
index 46cbdf225f..d1b9d30696 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-uniform.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-uniform.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
-- 
GitLab


From cccddc83d527caaeefc86577c234e5dfd13b4979 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 1 Dec 2017 10:15:11 -0800
Subject: [PATCH 0505/1225] Seed the time series LSTM example unit test.

PiperOrigin-RevId: 177606245
---
 tensorflow/contrib/timeseries/examples/BUILD        |  1 +
 tensorflow/contrib/timeseries/examples/lstm.py      |  5 +++--
 tensorflow/contrib/timeseries/examples/lstm_test.py | 11 ++++++++++-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD
index 755b0657e9..bb86ecb220 100644
--- a/tensorflow/contrib/timeseries/examples/BUILD
+++ b/tensorflow/contrib/timeseries/examples/BUILD
@@ -103,6 +103,7 @@ py_test(
     deps = [
         ":lstm",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/estimator:estimator_py",
     ],
 )
 
diff --git a/tensorflow/contrib/timeseries/examples/lstm.py b/tensorflow/contrib/timeseries/examples/lstm.py
index 3ba823f638..c7193cef69 100644
--- a/tensorflow/contrib/timeseries/examples/lstm.py
+++ b/tensorflow/contrib/timeseries/examples/lstm.py
@@ -165,12 +165,13 @@ class _LSTMModel(ts_model.SequentialTimeSeriesModel):
         "Exogenous inputs are not implemented for this example.")
 
 
-def train_and_predict(csv_file_name=_DATA_FILE, training_steps=200):
+def train_and_predict(
+    csv_file_name=_DATA_FILE, training_steps=200, estimator_config=None):
   """Train and predict using a custom time series model."""
   # Construct an Estimator from our LSTM model.
   estimator = ts_estimators.TimeSeriesRegressor(
       model=_LSTMModel(num_features=5, num_units=128),
-      optimizer=tf.train.AdamOptimizer(0.001))
+      optimizer=tf.train.AdamOptimizer(0.001), config=estimator_config)
   reader = tf.contrib.timeseries.CSVReader(
       csv_file_name,
       column_names=((tf.contrib.timeseries.TrainEvalFeatures.TIMES,)
diff --git a/tensorflow/contrib/timeseries/examples/lstm_test.py b/tensorflow/contrib/timeseries/examples/lstm_test.py
index 56daa1e10d..3cace56726 100644
--- a/tensorflow/contrib/timeseries/examples/lstm_test.py
+++ b/tensorflow/contrib/timeseries/examples/lstm_test.py
@@ -20,14 +20,23 @@ from __future__ import print_function
 
 from tensorflow.contrib.timeseries.examples import lstm
 
+from tensorflow.python.estimator import estimator_lib
 from tensorflow.python.platform import test
 
 
+class _SeedRunConfig(estimator_lib.RunConfig):
+
+  @property
+  def tf_random_seed(self):
+    return 3
+
+
 class LSTMExampleTest(test.TestCase):
 
   def test_periodicity_learned(self):
     (observed_times, observed_values,
-     all_times, predicted_values) = lstm.train_and_predict(training_steps=100)
+     all_times, predicted_values) = lstm.train_and_predict(
+         training_steps=100, estimator_config=_SeedRunConfig())
     self.assertAllEqual([100], observed_times.shape)
     self.assertAllEqual([100, 5], observed_values.shape)
     self.assertAllEqual([200], all_times.shape)
-- 
GitLab


From 48ce41e160c2dcd8fde82db5b4a2a44a5f36894f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 10:45:02 -0800
Subject: [PATCH 0506/1225] Moved compute_pi from utils.py to fisher_blocks.py
 and added global constant that controls the type of pi-based damping
 adjustment to use.  Current options are "tracenorm", which is what we were
 already doing and is the default, and "off".

PiperOrigin-RevId: 177610677
---
 .../python/kernel_tests/fisher_blocks_test.py | 15 +++++
 .../kfac/python/kernel_tests/utils_test.py    | 12 ----
 .../contrib/kfac/python/ops/fisher_blocks.py  | 60 +++++++++++++++----
 tensorflow/contrib/kfac/python/ops/utils.py   | 22 -------
 .../contrib/kfac/python/ops/utils_lib.py      |  1 -
 5 files changed, 64 insertions(+), 46 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py
index bdc950a4e6..2d9b28185c 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py
@@ -40,6 +40,21 @@ def _make_psd(dim):
   return array_ops.constant(mat)
 
 
+class UtilsTest(test.TestCase):
+
+  def testComputePiTracenorm(self):
+    with ops.Graph().as_default(), self.test_session() as sess:
+      random_seed.set_random_seed(200)
+      left_factor = array_ops.diag([1., 2., 0., 1.])
+      right_factor = array_ops.ones([2., 2.])
+
+      # pi is the sqrt of the left trace norm divided by the right trace norm
+      pi = fb._compute_pi_tracenorm(left_factor, right_factor)
+
+      pi_val = sess.run(pi)
+      self.assertEqual(1., pi_val)
+
+
 class FullFBTest(test.TestCase):
 
   def testFullFBInitSingleTensor(self):
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py b/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py
index 55fe38e3e9..d255a6e716 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py
@@ -222,18 +222,6 @@ class UtilsTest(test.TestCase):
       self.assertAllClose(b, np.array([4., 5.]))
       self.assertAllClose(c, np.array([[6.], [7.], [8.], [9.]]))
 
-  def testComputePi(self):
-    with ops.Graph().as_default(), self.test_session() as sess:
-      random_seed.set_random_seed(200)
-      left_factor = array_ops.diag([1., 2., 0., 1.])
-      right_factor = array_ops.ones([2., 2.])
-
-      # pi is the sqrt of the left trace norm divided by the right trace norm
-      pi = utils.compute_pi(left_factor, right_factor)
-
-      pi_val = sess.run(pi)
-      self.assertEqual(1., pi_val)
-
   def testPosDefInvCholesky(self):
     with ops.Graph().as_default(), self.test_session() as sess:
       random_seed.set_random_seed(200)
diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
index cf734d56ad..cdae7ddc21 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
@@ -53,14 +53,54 @@ from tensorflow.python.ops import math_ops
 #   damping /= num_replications ** NORMALIZE_DAMPING_POWER
 NORMALIZE_DAMPING_POWER = 1.0
 
+# Methods for adjusting damping for FisherBlocks. See
+# _compute_pi_adjusted_damping() for details.
+PI_OFF_NAME = "off"
+PI_TRACENORM_NAME = "tracenorm"
+PI_TYPE = PI_TRACENORM_NAME
 
-def set_global_constants(normalize_damping_power=None):
+
+def set_global_constants(normalize_damping_power=None, pi_type=None):
   """Sets various global constants used by the classes in this module."""
   global NORMALIZE_DAMPING_POWER
+  global PI_TYPE
 
   if normalize_damping_power is not None:
     NORMALIZE_DAMPING_POWER = normalize_damping_power
 
+  if pi_type is not None:
+    PI_TYPE = pi_type
+
+
+def _compute_pi_tracenorm(left_cov, right_cov):
+  """Computes the scalar constant pi for Tikhonov regularization/damping.
+
+  pi = sqrt( (trace(A) / dim(A)) / (trace(B) / dim(B)) )
+  See section 6.3 of https://arxiv.org/pdf/1503.05671.pdf for details.
+
+  Args:
+    left_cov: The left Kronecker factor "covariance".
+    right_cov: The right Kronecker factor "covariance".
+
+  Returns:
+    The computed scalar constant pi for these Kronecker Factors (as a Tensor).
+  """
+  # Instead of dividing by the dim of the norm, we multiply by the dim of the
+  # other norm. This works out the same in the ratio.
+  left_norm = math_ops.trace(left_cov) * right_cov.shape.as_list()[0]
+  right_norm = math_ops.trace(right_cov) * left_cov.shape.as_list()[0]
+  return math_ops.sqrt(left_norm / right_norm)
+
+
+def _compute_pi_adjusted_damping(left_cov, right_cov, damping):
+
+  if PI_TYPE == PI_TRACENORM_NAME:
+    pi = _compute_pi_tracenorm(left_cov, right_cov)
+    return (damping * pi, damping / pi)
+
+  elif PI_TYPE == PI_OFF_NAME:
+    return (damping, damping)
+
 
 @six.add_metaclass(abc.ABCMeta)
 class FisherBlock(object):
@@ -466,11 +506,10 @@ class KroneckerProductFB(FisherBlock):
     Args:
       damping: The base damping factor (float or Tensor) for the damped inverse.
     """
-    pi = utils.compute_pi(self._input_factor.get_cov(),
-                          self._output_factor.get_cov())
-
-    self._input_damping = (damping**0.5) * pi
-    self._output_damping = (damping**0.5) / pi
+    self._input_damping, self._output_damping = _compute_pi_adjusted_damping(
+        self._input_factor.get_cov(),
+        self._output_factor.get_cov(),
+        damping**0.5)
 
     self._input_factor.register_damped_inverse(self._input_damping)
     self._output_factor.register_damped_inverse(self._output_damping)
@@ -791,11 +830,10 @@ class FullyConnectedSeriesFB(FisherBlock):
 
     damping /= self._num_timesteps**NORMALIZE_DAMPING_POWER
 
-    pi = utils.compute_pi(self._input_factor.get_cov(),
-                          self._output_factor.get_cov())
-
-    self._damping_input = (damping**0.5) * pi
-    self._damping_output = (damping**0.5) / pi
+    self._damping_input, self._damping_output = _compute_pi_adjusted_damping(
+        self._input_factor.get_cov(),
+        self._output_factor.get_cov(),
+        damping**0.5)
 
     if self._option == SeriesFBApproximation.option1:
       self._input_factor.register_option1quants(self._damping_input)
diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py
index 035f080fdb..cec018e406 100644
--- a/tensorflow/contrib/kfac/python/ops/utils.py
+++ b/tensorflow/contrib/kfac/python/ops/utils.py
@@ -162,28 +162,6 @@ def mat2d_to_layer_params(vector_template, mat2d):
     return array_ops.reshape(mat2d, vector_template.shape)
 
 
-def compute_pi(left_factor, right_factor):
-  """Computes the scalar constant pi for Tikhonov regularization/damping.
-
-  pi = sqrt( (trace(A) / dim(A)) / (trace(B) / dim(B)) )
-  See section 6.3 of https://arxiv.org/pdf/1503.05671.pdf for details.
-
-  Args:
-    left_factor: The left Kronecker factor Tensor.
-    right_factor: The right Kronecker factor Tensor.
-
-  Returns:
-    The computed scalar constant pi for these Kronecker Factors (as a Tensor).
-  """
-  # Instead of dividing by the dim of the norm, we multiply by the dim of the
-  # other norm. This works out the same in the ratio.
-  left_norm = math_ops.trace(left_factor) * right_factor.get_shape().as_list()[
-      0]
-  right_norm = math_ops.trace(right_factor) * left_factor.get_shape().as_list()[
-      0]
-  return math_ops.sqrt(left_norm / right_norm)
-
-
 def posdef_inv(tensor, damping):
   """Computes the inverse of tensor + damping * identity."""
   identity = linalg_ops.eye(tensor.shape.as_list()[0], dtype=tensor.dtype)
diff --git a/tensorflow/contrib/kfac/python/ops/utils_lib.py b/tensorflow/contrib/kfac/python/ops/utils_lib.py
index 9df07d69aa..8903c90fbc 100644
--- a/tensorflow/contrib/kfac/python/ops/utils_lib.py
+++ b/tensorflow/contrib/kfac/python/ops/utils_lib.py
@@ -30,7 +30,6 @@ _allowed_symbols = [
     "kronecker_product",
     "layer_params_to_mat2d",
     "mat2d_to_layer_params",
-    "compute_pi",
     "posdef_inv",
     "posdef_inv_matrix_inverse",
     "posdef_inv_cholesky",
-- 
GitLab


From 70b01d57e9032279e2d38cb83b8455336a731162 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 1 Dec 2017 10:51:12 -0800
Subject: [PATCH 0507/1225] Remove stale comment.

PiperOrigin-RevId: 177611737
---
 tensorflow/contrib/lite/toco/toco_tooling.cc | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index ca092b2d72..1d1d767518 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -202,11 +202,7 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
     // See the doc for --reorder_across_fake_quant: that flag is needed to
     // support some existing models, e.g. WordLens, that have FakeQuant
     // nodes in the wrong places.
-    // We currently unconditionally enable that behavior when the output
-    // format is DarwiNN because the DarwiNN test code does not make it
-    // easy to pass a new toco flag. Once that is resolved on the DarwiNN
-    // tests side, the special-casing of DarwiNN here can go away.
-    // TODO(benoitjacob): so drop it when we can.
+    // TODO(benoitjacob): drop special casing when we can.
     if ((quantize_output && toco_flags.reorder_across_fake_quant())) {
       transformations.Add(new DropFakeQuant);
     }
-- 
GitLab


From 00791693e4d32bed92fcfadf09da321c9f548bab Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 10:51:25 -0800
Subject: [PATCH 0508/1225] Internal Change

PiperOrigin-RevId: 177611775
---
 tensorflow/contrib/lite/BUILD                 |  4 +++
 tensorflow/contrib/lite/context_test.cc       |  3 +-
 tensorflow/contrib/lite/kernels/BUILD         |  1 +
 .../contrib/lite/kernels/activations_test.cc  |  2 +-
 tensorflow/contrib/lite/kernels/add_test.cc   |  3 +-
 .../contrib/lite/kernels/basic_rnn_test.cc    |  2 +-
 .../lite/kernels/concatenation_test.cc        |  2 +-
 tensorflow/contrib/lite/kernels/conv_test.cc  |  2 +-
 .../lite/kernels/depthwise_conv_test.cc       |  2 +-
 .../kernels/embedding_lookup_sparse_test.cc   |  4 +--
 .../lite/kernels/embedding_lookup_test.cc     |  2 +-
 .../lite/kernels/fully_connected_test.cc      |  3 +-
 .../lite/kernels/hashtable_lookup_test.cc     |  2 +-
 .../contrib/lite/kernels/l2norm_test.cc       |  2 +-
 .../lite/kernels/local_response_norm_test.cc  |  2 +-
 .../lite/kernels/lsh_projection_test.cc       |  2 +-
 tensorflow/contrib/lite/kernels/lstm_test.cc  |  3 +-
 tensorflow/contrib/lite/kernels/mul_test.cc   |  3 +-
 .../lite/kernels/optional_tensor_test.cc      |  3 +-
 .../contrib/lite/kernels/pooling_test.cc      |  2 +-
 .../contrib/lite/kernels/reshape_test.cc      |  3 +-
 .../lite/kernels/resize_bilinear_test.cc      |  2 +-
 .../contrib/lite/kernels/skip_gram_test.cc    |  2 +-
 .../contrib/lite/kernels/softmax_test.cc      |  3 +-
 .../lite/kernels/space_to_depth_test.cc       |  3 +-
 tensorflow/contrib/lite/kernels/svdf_test.cc  |  2 +-
 tensorflow/contrib/lite/kernels/test_util.h   |  7 +----
 tensorflow/contrib/lite/model_test.cc         |  3 +-
 .../contrib/lite/simple_memory_arena_test.cc  |  3 +-
 tensorflow/contrib/lite/string_util_test.cc   |  3 +-
 tensorflow/contrib/lite/testing/BUILD         |  6 ++++
 tensorflow/contrib/lite/testing/util.h        | 28 +++++++++++++++++++
 32 files changed, 71 insertions(+), 43 deletions(-)
 create mode 100644 tensorflow/contrib/lite/testing/util.h

diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index 52460123cc..3f1b0be1a7 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -111,6 +111,7 @@ cc_test(
     deps = [
         ":framework",
         ":string_util",
+        "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
 )
@@ -134,6 +135,7 @@ cc_test(
     srcs = ["simple_memory_arena_test.cc"],
     deps = [
         ":framework",
+        "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
 )
@@ -152,6 +154,7 @@ cc_test(
     ],
     deps = [
         ":framework",
+        "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
 )
@@ -163,6 +166,7 @@ cc_test(
     srcs = ["context_test.cc"],
     deps = [
         ":framework",
+        "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
 )
diff --git a/tensorflow/contrib/lite/context_test.cc b/tensorflow/contrib/lite/context_test.cc
index d0a104f43d..20d6f69a25 100644
--- a/tensorflow/contrib/lite/context_test.cc
+++ b/tensorflow/contrib/lite/context_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/contrib/lite/context.h"
 #include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
 
@@ -68,7 +69,7 @@ TEST(IntArray, TestIntArrayEqual) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index bbbfa3e741..ad76e90606 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -32,6 +32,7 @@ cc_library(
         "//tensorflow/contrib/lite:framework",
         "//tensorflow/contrib/lite:schema_fbs_version",
         "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/testing:util",
         "//tensorflow/core:lib",
         "@com_google_googletest//:gtest",
     ],
diff --git a/tensorflow/contrib/lite/kernels/activations_test.cc b/tensorflow/contrib/lite/kernels/activations_test.cc
index f10aee7017..33ca56e745 100644
--- a/tensorflow/contrib/lite/kernels/activations_test.cc
+++ b/tensorflow/contrib/lite/kernels/activations_test.cc
@@ -317,7 +317,7 @@ TEST(QuantizedActivationsOpTest, Softmax2D) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/add_test.cc b/tensorflow/contrib/lite/kernels/add_test.cc
index 8e12a837c4..ddf45bb576 100644
--- a/tensorflow/contrib/lite/kernels/add_test.cc
+++ b/tensorflow/contrib/lite/kernels/add_test.cc
@@ -164,8 +164,7 @@ TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) {
 }  // namespace
 }  // namespace tflite
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/basic_rnn_test.cc b/tensorflow/contrib/lite/kernels/basic_rnn_test.cc
index dfa75655bc..5ecccb985e 100644
--- a/tensorflow/contrib/lite/kernels/basic_rnn_test.cc
+++ b/tensorflow/contrib/lite/kernels/basic_rnn_test.cc
@@ -261,7 +261,7 @@ TEST(FullyConnectedOpTest, BlackBoxTest) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/concatenation_test.cc b/tensorflow/contrib/lite/kernels/concatenation_test.cc
index 94e5b2acdc..499856a93c 100644
--- a/tensorflow/contrib/lite/kernels/concatenation_test.cc
+++ b/tensorflow/contrib/lite/kernels/concatenation_test.cc
@@ -156,7 +156,7 @@ TEST(ConcatenationOpTest, FourInputsQuantized) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/conv_test.cc b/tensorflow/contrib/lite/kernels/conv_test.cc
index 18d7a31d59..1d0a81c313 100644
--- a/tensorflow/contrib/lite/kernels/conv_test.cc
+++ b/tensorflow/contrib/lite/kernels/conv_test.cc
@@ -434,7 +434,7 @@ TEST(ConvolutionOpTest, SimpleTestQuantizedWithAnisotropicStrides) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
index 39227b2811..1439c8bce1 100644
--- a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
@@ -180,7 +180,7 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc
index 69d9c5cc7d..dcdc5fffad 100644
--- a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc
+++ b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc
@@ -158,9 +158,7 @@ TEST(EmbeddingLookupOpTest, Indices3DTest) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-#ifdef OS_LINUX
-  tflite::LogToStderr();
-#endif
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc
index 8c030b0677..9b501878f1 100644
--- a/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc
+++ b/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc
@@ -88,7 +88,7 @@ TEST(EmbeddingLookupOpTest, SimpleTest) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/fully_connected_test.cc b/tensorflow/contrib/lite/kernels/fully_connected_test.cc
index 112e3f1ba0..a0f766c4f4 100644
--- a/tensorflow/contrib/lite/kernels/fully_connected_test.cc
+++ b/tensorflow/contrib/lite/kernels/fully_connected_test.cc
@@ -370,8 +370,7 @@ TEST(FullyConnectedOpTest, BlackBoxTest) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc b/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc
index 916a23225e..cb6038f900 100644
--- a/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc
+++ b/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc
@@ -170,7 +170,7 @@ TEST(HashtableLookupOpTest, TestString) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/l2norm_test.cc b/tensorflow/contrib/lite/kernels/l2norm_test.cc
index b1db89b8bd..30e103f330 100644
--- a/tensorflow/contrib/lite/kernels/l2norm_test.cc
+++ b/tensorflow/contrib/lite/kernels/l2norm_test.cc
@@ -57,7 +57,7 @@ TEST(L2NormOpTest, SimpleTest) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/local_response_norm_test.cc b/tensorflow/contrib/lite/kernels/local_response_norm_test.cc
index 63a8b0a3d0..d75ce258a0 100644
--- a/tensorflow/contrib/lite/kernels/local_response_norm_test.cc
+++ b/tensorflow/contrib/lite/kernels/local_response_norm_test.cc
@@ -95,7 +95,7 @@ TEST(LocalResponseNormOpTest, SmallRadius) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/lsh_projection_test.cc b/tensorflow/contrib/lite/kernels/lsh_projection_test.cc
index 1011927848..414d728dfc 100644
--- a/tensorflow/contrib/lite/kernels/lsh_projection_test.cc
+++ b/tensorflow/contrib/lite/kernels/lsh_projection_test.cc
@@ -117,7 +117,7 @@ TEST(LSHProjectionOpTest2, Sparse3DInputs) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/lstm_test.cc b/tensorflow/contrib/lite/kernels/lstm_test.cc
index be4c7ddbf8..c068286b0d 100644
--- a/tensorflow/contrib/lite/kernels/lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_test.cc
@@ -1081,8 +1081,7 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/mul_test.cc b/tensorflow/contrib/lite/kernels/mul_test.cc
index 4b858e1f39..4255cfe18a 100644
--- a/tensorflow/contrib/lite/kernels/mul_test.cc
+++ b/tensorflow/contrib/lite/kernels/mul_test.cc
@@ -120,8 +120,7 @@ TEST(QuantizedMulOpTest, NoActivation) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/optional_tensor_test.cc b/tensorflow/contrib/lite/kernels/optional_tensor_test.cc
index 8e9cc07656..17166715ca 100644
--- a/tensorflow/contrib/lite/kernels/optional_tensor_test.cc
+++ b/tensorflow/contrib/lite/kernels/optional_tensor_test.cc
@@ -334,8 +334,7 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/pooling_test.cc b/tensorflow/contrib/lite/kernels/pooling_test.cc
index e1b51ec7d5..01c91b2ba9 100644
--- a/tensorflow/contrib/lite/kernels/pooling_test.cc
+++ b/tensorflow/contrib/lite/kernels/pooling_test.cc
@@ -155,7 +155,7 @@ TEST(FloatPoolingOpTest, L2Pool) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/reshape_test.cc b/tensorflow/contrib/lite/kernels/reshape_test.cc
index 59ce7d5648..0fbcf6e6aa 100644
--- a/tensorflow/contrib/lite/kernels/reshape_test.cc
+++ b/tensorflow/contrib/lite/kernels/reshape_test.cc
@@ -83,8 +83,7 @@ TEST(ReshapeOpTest, WithStretchDimension) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc b/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc
index 0257c0b557..314a71e210 100644
--- a/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc
+++ b/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc
@@ -111,7 +111,7 @@ TEST(ResizeBilinearOpTest, ThreeDimensionalResize) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/skip_gram_test.cc b/tensorflow/contrib/lite/kernels/skip_gram_test.cc
index e7f6bc904b..185b64cb44 100644
--- a/tensorflow/contrib/lite/kernels/skip_gram_test.cc
+++ b/tensorflow/contrib/lite/kernels/skip_gram_test.cc
@@ -251,7 +251,7 @@ TEST(SkipGramTest, TestInputWithExtraSpace) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/softmax_test.cc b/tensorflow/contrib/lite/kernels/softmax_test.cc
index ec8ec03b0d..6c5338ff0f 100644
--- a/tensorflow/contrib/lite/kernels/softmax_test.cc
+++ b/tensorflow/contrib/lite/kernels/softmax_test.cc
@@ -136,8 +136,7 @@ TEST(SoftmaxOpTest, CompareWithTFminiBetaNotEq1) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/space_to_depth_test.cc b/tensorflow/contrib/lite/kernels/space_to_depth_test.cc
index 911f08a92c..997f354861 100644
--- a/tensorflow/contrib/lite/kernels/space_to_depth_test.cc
+++ b/tensorflow/contrib/lite/kernels/space_to_depth_test.cc
@@ -95,8 +95,7 @@ TEST(SpaceToDepthOpModel, Int64) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/svdf_test.cc b/tensorflow/contrib/lite/kernels/svdf_test.cc
index d956025e9d..4de2ceaf05 100644
--- a/tensorflow/contrib/lite/kernels/svdf_test.cc
+++ b/tensorflow/contrib/lite/kernels/svdf_test.cc
@@ -306,7 +306,7 @@ TEST(SVDFOpTest, BlackBoxTestRank2) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h
index e68e494661..adcdeddbfc 100644
--- a/tensorflow/contrib/lite/kernels/test_util.h
+++ b/tensorflow/contrib/lite/kernels/test_util.h
@@ -24,16 +24,11 @@ limitations under the License.
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/string_util.h"
+#include "tensorflow/contrib/lite/testing/util.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tflite {
 
-inline void LogToStderr() {
-#ifdef PLATFORM_GOOGLE
-  FLAGS_logtostderr = true;
-#endif
-}
-
 // A gmock matcher that check that elements of a float vector match to a given
 // tolerance.
 std::vector<::testing::Matcher<float>> ArrayFloatNear(
diff --git a/tensorflow/contrib/lite/model_test.cc b/tensorflow/contrib/lite/model_test.cc
index 6104386642..83a5150a46 100644
--- a/tensorflow/contrib/lite/model_test.cc
+++ b/tensorflow/contrib/lite/model_test.cc
@@ -26,6 +26,7 @@ limitations under the License.
 
 #include <gtest/gtest.h>
 #include "tensorflow/contrib/lite/error_reporter.h"
+#include "tensorflow/contrib/lite/testing/util.h"
 
 // Comparison for TfLiteRegistration. Since TfLiteRegistration is a C object,
 // we must declare this in global namespace, so argument-dependent operator
@@ -261,7 +262,7 @@ TEST(BasicFlatBufferModel, TestBuildModelFromCorruptedData) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/simple_memory_arena_test.cc b/tensorflow/contrib/lite/simple_memory_arena_test.cc
index ac676092c6..4444f642eb 100644
--- a/tensorflow/contrib/lite/simple_memory_arena_test.cc
+++ b/tensorflow/contrib/lite/simple_memory_arena_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
 namespace {
@@ -85,7 +86,7 @@ TEST(SimpleMemoryArenaTest, TestAfterClear) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/string_util_test.cc b/tensorflow/contrib/lite/string_util_test.cc
index 5c351638dc..d53fec7512 100644
--- a/tensorflow/contrib/lite/string_util_test.cc
+++ b/tensorflow/contrib/lite/string_util_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <gtest/gtest.h>
 #include "tensorflow/contrib/lite/context.h"
 #include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
 
@@ -111,7 +112,7 @@ TEST(StringUtil, TestEmptyList) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index ecddb4b807..3ff65dd381 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -160,6 +160,12 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "util",
+    testonly = 1,
+    hdrs = ["util.h"],
+)
+
 cc_test(
     name = "test_runner_test",
     srcs = ["test_runner_test.cc"],
diff --git a/tensorflow/contrib/lite/testing/util.h b/tensorflow/contrib/lite/testing/util.h
new file mode 100644
index 0000000000..4d4304f022
--- /dev/null
+++ b/tensorflow/contrib/lite/testing/util.h
@@ -0,0 +1,28 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_UTIL_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_UTIL_H_
+
+namespace tflite {
+
+inline void LogToStderr() {
+#ifdef PLATFORM_GOOGLE
+  FLAGS_logtostderr = true;
+#endif
+}
+
+}  // namespace tflite
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_UTIL_H_
-- 
GitLab


From 11749434e3eb04eee058a43a931a27bdee4916df Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 10:58:17 -0800
Subject: [PATCH 0509/1225] Make TopologicalSort return an error status if the
 sorting fails.

PiperOrigin-RevId: 177612830
---
 .../grappler/optimizers/memory_optimizer.cc   |  2 +-
 .../grappler/optimizers/meta_optimizer.cc     |  2 +-
 tensorflow/core/grappler/utils/BUILD          |  1 +
 .../core/grappler/utils/topological_sort.cc   | 25 +++++++++++--------
 .../core/grappler/utils/topological_sort.h    |  3 ++-
 .../grappler/utils/topological_sort_test.cc   |  9 ++++---
 6 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
index a2a2680c4f..1420fdb6fe 100644
--- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
@@ -419,7 +419,7 @@ void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level,
   // We don't use the results of this topological sort until later, but this
   // call invalidates all NodeDef pointers, so it needs to be done before we
   // start collecting those.
-  TopologicalSort(graph);
+  TF_CHECK_OK(TopologicalSort(graph));
   NodeMap node_map(graph);
   std::vector<RecomputedSubGraph> recomputed_subgraphs;
   // Do not recompute nodes which are fed, since the recomputed node would not
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index d2df8cacb7..49bdc44462 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -160,7 +160,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   }
 
   if (already_optimized) {
-    TopologicalSort(optimized_graph);
+    TF_RETURN_IF_ERROR(TopologicalSort(optimized_graph));
     // Make sure that the optimizers preserved the graph version and library.
     DCHECK_GE(optimized_graph->library().function_size(),
               item.graph.library().function_size());
diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD
index 21243833ac..534f7a063f 100644
--- a/tensorflow/core/grappler/utils/BUILD
+++ b/tensorflow/core/grappler/utils/BUILD
@@ -53,6 +53,7 @@ cc_library(
     hdrs = ["topological_sort.h"],
     visibility = ["//visibility:public"],
     deps = [
+        "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:op_types",
diff --git a/tensorflow/core/grappler/utils/topological_sort.cc b/tensorflow/core/grappler/utils/topological_sort.cc
index 77d4702d21..d87f43a498 100644
--- a/tensorflow/core/grappler/utils/topological_sort.cc
+++ b/tensorflow/core/grappler/utils/topological_sort.cc
@@ -19,13 +19,14 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 namespace grappler {
 
 // Kahn's algorithm is implemented.
 // For details, see https://en.wikipedia.org/wiki/Topological_sorting
-void TopologicalSort(GraphDef* graph) {
+Status TopologicalSort(GraphDef* graph) {
   OutputMap output_map(graph);
   std::vector<NodeDef*> ready_nodes;
   ready_nodes.reserve(graph->node_size());
@@ -63,17 +64,19 @@ void TopologicalSort(GraphDef* graph) {
     front++;
   }
 
-  if (back == graph->node_size()) {
-    GraphDef new_graph;
-    new_graph.mutable_node()->Reserve(graph->node_size());
-    for (int i = 0; i < graph->node_size(); i++) {
-      auto new_node = new_graph.add_node();
-      new_node->Swap(ready_nodes[i]);
-    }
-    graph->mutable_node()->Swap(new_graph.mutable_node());
-  } else {
-    LOG(ERROR) << "The graph couldn't be sorted in topological order.";
+  if (back != graph->node_size()) {
+    return errors::InvalidArgument(
+        "The graph couldn't be sorted in topological order.");
+  }
+
+  GraphDef new_graph;
+  new_graph.mutable_node()->Reserve(graph->node_size());
+  for (int i = 0; i < graph->node_size(); i++) {
+    auto new_node = new_graph.add_node();
+    new_node->Swap(ready_nodes[i]);
   }
+  graph->mutable_node()->Swap(new_graph.mutable_node());
+  return Status::OK();
 }
 
 }  // namespace grappler
diff --git a/tensorflow/core/grappler/utils/topological_sort.h b/tensorflow/core/grappler/utils/topological_sort.h
index d4d8034ef5..f2c9bbfa4e 100644
--- a/tensorflow/core/grappler/utils/topological_sort.h
+++ b/tensorflow/core/grappler/utils/topological_sort.h
@@ -17,12 +17,13 @@ limitations under the License.
 #define THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_UTILS_TOPOLOGICAL_SORT_H_
 
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 namespace grappler {
 
 // Sort a graph in topological order.
-void TopologicalSort(GraphDef* graph);
+Status TopologicalSort(GraphDef* graph);
 
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/utils/topological_sort_test.cc b/tensorflow/core/grappler/utils/topological_sort_test.cc
index dc99cb1052..ba0fe0155a 100644
--- a/tensorflow/core/grappler/utils/topological_sort_test.cc
+++ b/tensorflow/core/grappler/utils/topological_sort_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -51,7 +52,7 @@ TEST_F(TopologicalSortTest, NoLoop) {
   *graph.add_node() = CreateNode("5", {});
   *graph.add_node() = CreateNode("4", {});
 
-  TopologicalSort(&graph);
+  TF_EXPECT_OK(TopologicalSort(&graph));
   std::vector<string> order = {"5", "4", "2", "0", "3", "1"};
   for (int i = 0; i < order.size(); i++) {
     EXPECT_EQ(graph.node(i).name(), order[i]);
@@ -67,7 +68,7 @@ TEST_F(TopologicalSortTest, WithLoop) {
   *graph.add_node() = CreateNode("5", "NextIteration", {"4"});
   *graph.add_node() = CreateNode("1", {});
 
-  TopologicalSort(&graph);
+  TF_EXPECT_OK(TopologicalSort(&graph));
   std::vector<string> order = {"1", "2", "3", "4", "5"};
   for (int i = 0; i < order.size(); i++) {
     EXPECT_EQ(graph.node(i).name(), order[i]);
@@ -82,7 +83,7 @@ TEST_F(TopologicalSortTest, WithIllegalLoop) {
   *graph.add_node() = CreateNode("3", {"2"});
   *graph.add_node() = CreateNode("1", {});
 
-  TopologicalSort(&graph);
+  EXPECT_FALSE(TopologicalSort(&graph).ok());
   std::vector<string> order = {"2", "3", "1"};
   for (int i = 0; i < order.size(); i++) {
     EXPECT_EQ(graph.node(i).name(), order[i]);
@@ -94,7 +95,7 @@ TEST_F(TopologicalSortTest, DuplicatedInputs) {
   *graph.add_node() = CreateNode("2", {"1", "1"});
   *graph.add_node() = CreateNode("1", {});
 
-  TopologicalSort(&graph);
+  TF_EXPECT_OK(TopologicalSort(&graph));
   std::vector<string> order = {"1", "2"};
   for (int i = 0; i < order.size(); i++) {
     EXPECT_EQ(graph.node(i).name(), order[i]);
-- 
GitLab


From d8e98c8753e3d89e6d4e989e52e910495c4f64ac Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Fri, 1 Dec 2017 11:08:51 -0800
Subject: [PATCH 0510/1225] Customize request timeouts for GCS filesystem.

PiperOrigin-RevId: 177614538
---
 .../core/platform/cloud/curl_http_request.cc  |  29 +-
 .../core/platform/cloud/curl_http_request.h   |  12 +
 .../core/platform/cloud/gcs_dns_cache_test.cc |   5 +
 .../core/platform/cloud/gcs_file_system.cc    |  65 +-
 .../core/platform/cloud/gcs_file_system.h     |  42 +-
 .../platform/cloud/gcs_file_system_test.cc    | 649 +++++++++++-------
 tensorflow/core/platform/cloud/http_request.h |  10 +
 .../core/platform/cloud/http_request_fake.h   |  23 +-
 8 files changed, 572 insertions(+), 263 deletions(-)

diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc
index 4581a0870a..f7fbfe971e 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request.cc
@@ -29,16 +29,6 @@ namespace {
 // Set to 1 to enable verbose debug output from curl.
 constexpr uint64 kVerboseOutput = 0;
 
-// Timeout for the whole request. Set only to prevent hanging indefinitely.
-constexpr uint32 kRequestTimeoutSeconds = 3600;  // 1 hour
-
-// Timeout for the connection phase.
-constexpr uint32 kConnectTimeoutSeconds = 120;  // 2 minutes
-
-// The maximum period of request inactivity, after which the request
-// is terminated.
-constexpr uint64 kInactivityTimeoutSeconds = 60;  // 1 minute
-
 // Proxy to the real libcurl implementation.
 class LibCurlProxy : public LibCurl {
  public:
@@ -161,9 +151,6 @@ Status CurlHttpRequest::Init() {
       strings::StrCat("TensorFlow/", TF_VERSION_STRING).c_str());
   // Do not use signals for timeouts - does not work in multi-threaded programs.
   libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L);
-  libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, kRequestTimeoutSeconds);
-  libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT,
-                             kConnectTimeoutSeconds);
   libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION,
                              CURL_HTTP_VERSION_2_0);
 
@@ -336,6 +323,16 @@ Status CurlHttpRequest::SetResultBuffer(std::vector<char>* out_buffer) {
   return Status::OK();
 }
 
+Status CurlHttpRequest::SetTimeouts(uint32 connection, uint32 inactivity,
+                                    uint32 total) {
+  TF_RETURN_IF_ERROR(CheckInitialized());
+  TF_RETURN_IF_ERROR(CheckNotSent());
+  connect_timeout_secs_ = connection;
+  inactivity_timeout_secs_ = inactivity;
+  request_timeout_secs_ = total;
+  return Status::OK();
+}
+
 size_t CurlHttpRequest::WriteCallback(const void* ptr, size_t size,
                                       size_t nmemb, void* this_object) {
   CHECK(ptr);
@@ -399,6 +396,10 @@ Status CurlHttpRequest::Send() {
   libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION,
                              &CurlHttpRequest::HeaderCallback);
 
+  libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, request_timeout_secs_);
+  libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT,
+                             connect_timeout_secs_);
+
   char error_buffer[CURL_ERROR_SIZE] = {0};
   libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer);
 
@@ -529,7 +530,7 @@ int CurlHttpRequest::ProgressCallback(void* this_object, curl_off_t dltotal,
     return 0;
   }
 
-  if (now - that->last_progress_timestamp_ > kInactivityTimeoutSeconds) {
+  if (now - that->last_progress_timestamp_ > that->inactivity_timeout_secs_) {
     LOG(ERROR) << "The transmission  of request " << this_object
                << " (URI: " << that->uri_ << ") has been stuck at "
                << current_progress << " of " << dltotal + ultotal
diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h
index 9e5ae61016..cf249450df 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.h
+++ b/tensorflow/core/platform/cloud/curl_http_request.h
@@ -120,6 +120,9 @@ class CurlHttpRequest : public HttpRequest {
   // Url encodes str and returns a new string.
   string EscapeString(const string& str) override;
 
+  Status SetTimeouts(uint32 connection, uint32 inactivity,
+                     uint32 total) override;
+
  private:
   /// A write callback in the form which can be accepted by libcurl.
   static size_t WriteCallback(const void* ptr, size_t size, size_t nmemb,
@@ -162,6 +165,15 @@ class CurlHttpRequest : public HttpRequest {
   // The last progress in terms of bytes transmitted.
   curl_off_t last_progress_bytes_ = 0;
 
+  // The maximum period of request inactivity.
+  uint32 inactivity_timeout_secs_ = 60;  // 1 minute
+
+  // Timeout for the connection phase.
+  uint32 connect_timeout_secs_ = 120;  // 2 minutes
+
+  // Tiemout for the whole request. Set only to prevent hanging indefinitely.
+  uint32 request_timeout_secs_ = 3600;  // 1 hour
+
   // Members to enforce the usage flow.
   bool is_initialized_ = false;
   bool is_uri_set_ = false;
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
index 8d1a108f30..266879ddf5 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
@@ -64,6 +64,11 @@ class TestHttpRequest : public HttpRequest {
   Status Send() override { return Status::OK(); }
   string EscapeString(const string& str) override { return ""; }
 
+  Status SetTimeouts(uint32 connection, uint32 inactivity,
+                     uint32 total) override {
+    return Status::OK();
+  }
+
   std::map<string, string> resolve_overrides_;
 };
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 45e9b05092..ab82643ad5 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -94,6 +94,20 @@ const FileStatistics DIRECTORY_STAT(0, 0, true);
 // variable to a positive integer describing the frequency used to refresh the
 // userspace DNS cache.
 constexpr char kResolveCacheSecs[] = "GCS_RESOLVE_REFRESH_SECS";
+// The environment variable to configure the http request's connection timeout.
+constexpr char kRequestConnectionTimeout[] =
+    "GCS_REQUEST_CONNECTION_TIMEOUT_SECS";
+// The environment varaible to configure the http request's idle timeout.
+constexpr char kRequestIdleTimeout[] = "GCS_REQUEST_IDLE_TIMEOUT_SECS";
+// The environment variable to configure the overall request timeout for
+// metadata requests.
+constexpr char kMetadataRequestTimeout[] = "GCS_METADATA_REQUEST_TIMEOUT_SECS";
+// The environment variable to configure the overall request timeout for
+// block reads requests.
+constexpr char kReadRequestTimeout[] = "GCS_READ_REQUEST_TIMEOUT_SECS";
+// The environment variable to configure the overall request timeout for
+// upload requests.
+constexpr char kWriteRequestTimeout[] = "GCS_WRITE_REQUEST_TIMEOUT_SECS";
 
 Status GetTmpFilename(string* filename) {
   if (!filename) {
@@ -283,12 +297,14 @@ class GcsWritableFile : public WritableFile {
   GcsWritableFile(const string& bucket, const string& object,
                   AuthProvider* auth_provider,
                   HttpRequest::Factory* http_request_factory,
+                  GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
                   int64 initial_retry_delay_usec)
       : bucket_(bucket),
         object_(object),
         auth_provider_(auth_provider),
         http_request_factory_(http_request_factory),
+        timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
@@ -307,12 +323,14 @@ class GcsWritableFile : public WritableFile {
                   AuthProvider* auth_provider,
                   const string& tmp_content_filename,
                   HttpRequest::Factory* http_request_factory,
+                  GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
                   int64 initial_retry_delay_usec)
       : bucket_(bucket),
         object_(object),
         auth_provider_(auth_provider),
         http_request_factory_(http_request_factory),
+        timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
@@ -446,6 +464,8 @@ class GcsWritableFile : public WritableFile {
                                           std::to_string(file_size)));
     TF_RETURN_IF_ERROR(request->SetPostEmptyBody());
     TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_->connect, timeouts_->idle,
+                                            timeouts_->metadata));
     TF_RETURN_WITH_CONTEXT_IF_ERROR(
         request->Send(), " when initiating an upload to ", GetGcsPath());
     *session_uri = request->GetResponseHeader("Location");
@@ -477,6 +497,8 @@ class GcsWritableFile : public WritableFile {
     TF_RETURN_IF_ERROR(request->Init());
     TF_RETURN_IF_ERROR(request->SetUri(session_uri));
     TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
+    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_->connect, timeouts_->idle,
+                                            timeouts_->metadata));
     TF_RETURN_IF_ERROR(request->AddHeader(
         "Content-Range", strings::StrCat("bytes */", file_size)));
     TF_RETURN_IF_ERROR(request->SetPutEmptyBody());
@@ -531,6 +553,9 @@ class GcsWritableFile : public WritableFile {
           "Content-Range", strings::StrCat("bytes ", start_offset, "-",
                                            file_size - 1, "/", file_size)));
     }
+    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_->connect, timeouts_->idle,
+                                            timeouts_->write));
+
     TF_RETURN_IF_ERROR(
         request->SetPutFromFile(tmp_content_filename_, start_offset));
     TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when uploading ",
@@ -550,6 +575,7 @@ class GcsWritableFile : public WritableFile {
   string tmp_content_filename_;
   std::ofstream outfile_;
   HttpRequest::Factory* http_request_factory_;
+  GcsFileSystem::TimeoutConfig* timeouts_;
   std::function<void()> file_cache_erase_;
   bool sync_needed_;  // whether there is buffered data that needs to be synced
   int64 initial_retry_delay_usec_;
@@ -635,6 +661,25 @@ GcsFileSystem::GcsFileSystem()
                 &resolve_frequency_secs)) {
     dns_cache_.reset(new GcsDnsCache(resolve_frequency_secs));
   }
+  // Apply the overrides for request timeouts
+  uint32 timeout_value;
+  if (GetEnvVar(kRequestConnectionTimeout, strings::safe_strtou32,
+                &timeout_value)) {
+    timeouts_.connect = timeout_value;
+  }
+  if (GetEnvVar(kRequestIdleTimeout, strings::safe_strtou32, &timeout_value)) {
+    timeouts_.idle = timeout_value;
+  }
+  if (GetEnvVar(kMetadataRequestTimeout, strings::safe_strtou32,
+                &timeout_value)) {
+    timeouts_.metadata = timeout_value;
+  }
+  if (GetEnvVar(kReadRequestTimeout, strings::safe_strtou32, &timeout_value)) {
+    timeouts_.read = timeout_value;
+  }
+  if (GetEnvVar(kWriteRequestTimeout, strings::safe_strtou32, &timeout_value)) {
+    timeouts_.write = timeout_value;
+  }
 }
 
 GcsFileSystem::GcsFileSystem(
@@ -643,7 +688,8 @@ GcsFileSystem::GcsFileSystem(
     size_t block_size, size_t max_bytes, uint64 max_staleness,
     uint64 stat_cache_max_age, size_t stat_cache_max_entries,
     uint64 matching_paths_cache_max_age,
-    size_t matching_paths_cache_max_entries, int64 initial_retry_delay_usec)
+    size_t matching_paths_cache_max_entries, int64 initial_retry_delay_usec,
+    TimeoutConfig timeouts)
     : auth_provider_(std::move(auth_provider)),
       http_request_factory_(std::move(http_request_factory)),
       file_block_cache_(
@@ -651,6 +697,7 @@ GcsFileSystem::GcsFileSystem(
       stat_cache_(new StatCache(stat_cache_max_age, stat_cache_max_entries)),
       matching_paths_cache_(new MatchingPathsCache(
           matching_paths_cache_max_age, matching_paths_cache_max_entries)),
+      timeouts_(timeouts),
       initial_retry_delay_usec_(initial_retry_delay_usec) {}
 
 Status GcsFileSystem::NewRandomAccessFile(
@@ -689,6 +736,8 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset,
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
   TF_RETURN_IF_ERROR(request->SetRange(offset, offset + n - 1));
   TF_RETURN_IF_ERROR(request->SetResultBuffer(out));
+  TF_RETURN_IF_ERROR(
+      request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.read));
 
   if (dns_cache_) {
     TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
@@ -723,7 +772,7 @@ Status GcsFileSystem::NewWritableFile(const string& fname,
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
       bucket, object, auth_provider_.get(), http_request_factory_.get(),
-      [this, fname]() { file_block_cache_->RemoveFile(fname); },
+      &timeouts_, [this, fname]() { file_block_cache_->RemoveFile(fname); },
       initial_retry_delay_usec_));
   return Status::OK();
 }
@@ -764,7 +813,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname,
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
       bucket, object, auth_provider_.get(), old_content_filename,
-      http_request_factory_.get(),
+      http_request_factory_.get(), &timeouts_,
       [this, fname]() { file_block_cache_->RemoveFile(fname); },
       initial_retry_delay_usec_));
   return Status::OK();
@@ -852,6 +901,8 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket,
             "?fields=size%2Cupdated")));
         TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
         TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+        TF_RETURN_IF_ERROR(request->SetTimeouts(
+            timeouts_.connect, timeouts_.idle, timeouts_.metadata));
 
         if (dns_cache_) {
           TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
@@ -902,6 +953,8 @@ Status GcsFileSystem::BucketExists(const string& bucket, bool* result) {
   TF_RETURN_IF_ERROR(
       request->SetUri(strings::StrCat(kGcsUriBase, "b/", bucket)));
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
+  TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
+                                          timeouts_.metadata));
   const Status status = request->Send();
   switch (status.code()) {
     case errors::Code::OK:
@@ -1033,6 +1086,8 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
     TF_RETURN_IF_ERROR(request->SetUri(uri));
     TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
     TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
+                                            timeouts_.metadata));
 
     if (dns_cache_) {
       TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
@@ -1157,6 +1212,8 @@ Status GcsFileSystem::DeleteFile(const string& fname) {
   TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
       kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object))));
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
+  TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
+                                          timeouts_.metadata));
   TF_RETURN_IF_ERROR(request->SetDeleteRequest());
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when deleting ", fname);
   file_block_cache_->RemoveFile(fname);
@@ -1251,6 +1308,8 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) {
       request->EscapeString(target_object))));
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
   TF_RETURN_IF_ERROR(request->SetPostEmptyBody());
+  TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
+                                          timeouts_.metadata));
   std::vector<char> output_buffer;
   TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when renaming ", src,
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h
index 4b4853c838..7cfcebd5c9 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.h
+++ b/tensorflow/core/platform/cloud/gcs_file_system.h
@@ -35,6 +35,8 @@ namespace tensorflow {
 /// which adds retry logic to GCS operations.
 class GcsFileSystem : public FileSystem {
  public:
+  struct TimeoutConfig;
+
   GcsFileSystem();
   GcsFileSystem(std::unique_ptr<AuthProvider> auth_provider,
                 std::unique_ptr<HttpRequest::Factory> http_request_factory,
@@ -42,7 +44,7 @@ class GcsFileSystem : public FileSystem {
                 uint64 stat_cache_max_age, size_t stat_cache_max_entries,
                 uint64 matching_paths_cache_max_age,
                 size_t matching_paths_cache_max_entries,
-                int64 initial_retry_delay_usec);
+                int64 initial_retry_delay_usec, TimeoutConfig timeouts);
 
   Status NewRandomAccessFile(
       const string& filename,
@@ -87,6 +89,7 @@ class GcsFileSystem : public FileSystem {
   size_t block_size() const { return file_block_cache_->block_size(); }
   size_t max_bytes() const { return file_block_cache_->max_bytes(); }
   uint64 max_staleness() const { return file_block_cache_->max_staleness(); }
+  TimeoutConfig timeouts() const { return timeouts_; }
 
   uint64 stat_cache_max_age() const { return stat_cache_->max_age(); }
   size_t stat_cache_max_entries() const { return stat_cache_->max_entries(); }
@@ -98,6 +101,41 @@ class GcsFileSystem : public FileSystem {
     return matching_paths_cache_->max_entries();
   }
 
+  /// Structure containing the information for timeouts related to accessing the
+  /// GCS APIs.
+  ///
+  /// All values are in seconds.
+  struct TimeoutConfig {
+    // The request connection timeout. If a connection cannot be established
+    // within `connect` seconds, abort the request.
+    uint32 connect = 120;  // 2 minutes
+
+    // The request idle timeout. If a request has seen no activity in `idle`
+    // seconds, abort the request.
+    uint32 idle = 60;  // 1 minute
+
+    // The maximum total time a metadata request can take. If a request has not
+    // completed within `metadata` seconds, the request is aborted.
+    uint32 metadata = 3600;  // 1 hour
+
+    // The maximum total time a block read request can take. If a request has
+    // not completed within `read` seconds, the request is aborted.
+    uint32 read = 3600;  // 1 hour
+
+    // The maximum total time an upload request can take. If a request has not
+    // completed within `write` seconds, the request is aborted.
+    uint32 write = 3600;  // 1 hour
+
+    TimeoutConfig() {}
+    TimeoutConfig(uint32 connect, uint32 idle, uint32 metadata, uint32 read,
+                  uint32 write)
+        : connect(connect),
+          idle(idle),
+          metadata(metadata),
+          read(read),
+          write(write) {}
+  };
+
  private:
   /// \brief Checks if the bucket exists. Returns OK if the check succeeded.
   ///
@@ -150,6 +188,8 @@ class GcsFileSystem : public FileSystem {
   using MatchingPathsCache = ExpiringLRUCache<std::vector<string>>;
   std::unique_ptr<MatchingPathsCache> matching_paths_cache_;
 
+  TimeoutConfig timeouts_;
+
   /// The initial delay for exponential backoffs when retrying failed calls.
   const int64 initial_retry_delay_usec_ = 1000000L;
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 7614ec4d7f..01f4fd8688 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -22,6 +22,8 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+static GcsFileSystem::TimeoutConfig kTestTimeoutConfig(5, 1, 10, 20, 30);
+
 class FakeAuthProvider : public AuthProvider {
  public:
   Status GetToken(string* token) override {
@@ -35,12 +37,14 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-5\n",
+           "Range: 0-5\n"
+           "Timeouts: 5 1 20\n",
            "012345"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 6-11\n",
+           "Range: 6-11\n"
+           "Timeouts: 5 1 20\n",
            "6789")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -49,7 +53,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -73,12 +77,14 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_differentN) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-2\n",
+           "Range: 0-2\n"
+           "Timeouts: 5 1 20\n",
            "012"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 3-12\n",
+           "Range: 3-12\n"
+           "Timeouts: 5 1 20\n",
            "3456789")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -87,7 +93,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_differentN) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -116,26 +122,30 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-8\n",
+           "Range: 0-8\n"
+           "Timeouts: 5 1 20\n",
            "012345678"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 9-17\n",
+           "Range: 9-17\n"
+           "Timeouts: 5 1 20\n",
            "9abcde"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 18-26\n",
+           "Range: 18-26\n"
+           "Timeouts: 5 1 20\n",
            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      9 /* block size */, 18 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   9 /* block size */, 18 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   char scratch[100];
   StringPiece result;
@@ -191,20 +201,23 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest("Uri: https://storage.googleapis.com/bucket/object\n"
                            "Auth Token: fake_token\n"
-                           "Range: 0-7\n",
+                           "Range: 0-7\n"
+                           "Timeouts: 5 1 20\n",
                            "01234567"),
        new FakeHttpRequest("Uri: https://storage.googleapis.com/bucket/object\n"
                            "Auth Token: fake_token\n"
-                           "Range: 8-15\n",
+                           "Range: 8-15\n"
+                           "Timeouts: 5 1 20\n",
                            "89abcdef")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      8 /* block size */, 16 /* max bytes */, 3600 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   8 /* block size */, 16 /* max bytes */,
+                   3600 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
   char scratch[100];
   StringPiece result;
   // There should only be two HTTP requests issued to GCS even though we iterate
@@ -238,14 +251,15 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
 
 TEST(GcsFileSystemTest, NewRandomAccessFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      0 /* read ahead bytes */, 0 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */, 0 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<RandomAccessFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -257,24 +271,28 @@ TEST(GcsFileSystemTest, NewWritableFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fwriteable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -283,7 +301,7 @@ TEST(GcsFileSystemTest, NewWritableFile) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   // Read from the file first, to fill the block cache.
   std::unique_ptr<RandomAccessFile> rfile;
@@ -315,33 +333,39 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
                            "", errors::FailedPrecondition("308"), nullptr,
-                           {{"Range", "0-10"}}, 308),
+                           {{"Range", "0-10"}}, 308, {}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 11-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: ntent2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
                            "", errors::FailedPrecondition("308"), nullptr,
-                           {{"Range", "bytes=0-12"}}, 308),
+                           {{"Range", "bytes=0-12"}}, 308, {}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 13-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: ent2\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -351,7 +375,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -369,38 +393,44 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceedsOnGetStatus) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fwriteable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
-                           "", Status::OK(), nullptr, {}, 201),
+                           "", Status::OK(), nullptr, {}, 201, {}),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      8 /* block size */, 8 /* max bytes */, 3600 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   8 /* block size */, 8 /* max bytes */,
+                   3600 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
   // Pull the file's first block into the cache. This will trigger the first
   // HTTP request to GCS.
   std::unique_ptr<RandomAccessFile> rfile;
@@ -434,25 +464,29 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503)});
   for (int i = 0; i < 10; i++) {
     requests.emplace_back(new FakeHttpRequest(
         "Uri: https://custom/upload/location\n"
         "Auth Token: fake_token\n"
+        "Timeouts: 5 1 10\n"
         "Header Content-Range: bytes */17\n"
         "Put: yes\n",
         "", errors::FailedPrecondition("important HTTP error 308"), nullptr,
-        {{"Range", "0-10"}}, 308));
+        {{"Range", "0-10"}}, 308, {}));
     requests.emplace_back(new FakeHttpRequest(
         "Uri: https://custom/upload/location\n"
         "Auth Token: fake_token\n"
         "Header Content-Range: bytes 11-16/17\n"
+        "Timeouts: 5 1 30\n"
         "Put body: ntent2\n",
         "", errors::Unavailable("important HTTP error 503"), 503));
   }
@@ -463,12 +497,14 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
       "uploadType=resumable&name=path%2Fwriteable.txt\n"
       "Auth Token: fake_token\n"
       "Header X-Upload-Content-Length: 17\n"
-      "Post: yes\n",
+      "Post: yes\n"
+      "Timeouts: 5 1 10\n",
       "", {{"Location", "https://custom/upload/location"}}));
   requests.emplace_back(
       new FakeHttpRequest("Uri: https://custom/upload/location\n"
                           "Auth Token: fake_token\n"
                           "Header Content-Range: bytes 0-16/17\n"
+                          "Timeouts: 5 1 30\n"
                           "Put body: content1,content2\n",
                           ""));
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -478,7 +514,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   2 /* initial retry delay */);
+                   2 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -500,11 +536,13 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::NotFound("important HTTP error 410"),
                            410),
@@ -515,11 +553,13 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -529,7 +569,7 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -558,7 +598,7 @@ TEST(GcsFileSystemTest, NewWritableFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -570,33 +610,38 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fappendable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-31\n",
+           "Range: 0-31\n"
+           "Timeouts: 5 1 20\n",
            "content1,"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fappendable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fappendable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-31\n",
+           "Range: 0-31\n"
+           "Timeouts: 5 1 20\n",
            "01234567")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      32 /* block size */, 32 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   32 /* block size */, 32 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   // Create an appendable file. This should read the file from GCS, and pull its
   // contents into the block cache.
@@ -629,7 +674,7 @@ TEST(GcsFileSystemTest, NewAppendableFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -642,7 +687,8 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Frandom_access.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"", content.size(),
                            "\", \"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
@@ -650,7 +696,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
                            "path%2Frandom_access.txt\n"
                            "Auth Token: fake_token\n"
                            "Range: 0-",
-                           content.size() - 1, "\n"),
+                           content.size() - 1, "\n", "Timeouts: 5 1 20\n"),
            content)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -659,7 +705,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile(
@@ -678,7 +724,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -689,7 +735,8 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "path%2Ffile1.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -699,7 +746,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt"));
 }
@@ -709,13 +756,15 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsubfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subfolder/\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -725,7 +774,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder"));
 }
@@ -734,11 +783,13 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"size\": \"100\"}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"size\": \"100\"}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -747,7 +798,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket1"));
   TF_EXPECT_OK(fs.FileExists("gs://bucket1/"));
@@ -758,13 +809,15 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile1.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Ffile1.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": []}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -773,7 +826,7 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   EXPECT_EQ(errors::Code::NOT_FOUND,
             fs.FileExists("gs://bucket/path/file1.txt").code());
@@ -783,11 +836,13 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket2\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket2\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -796,7 +851,7 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.FileExists("gs://bucket2/").code());
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -808,29 +863,33 @@ TEST(GcsFileSystemTest, FileExists_StatCache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile1.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsubfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subfolder/\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
-      3600 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   // The stat cache will ensure that repeated lookups don't trigger additional
   // HTTP requests.
@@ -845,7 +904,8 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"prefixes\": [\"path/subpath/\"]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -854,7 +914,7 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -867,7 +927,8 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -879,7 +940,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -893,7 +954,8 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -905,7 +967,7 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -918,7 +980,8 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -930,7 +993,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -943,7 +1006,8 @@ TEST(GcsFileSystemTest, GetChildren_Root) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket-a-b-c/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -952,7 +1016,7 @@ TEST(GcsFileSystemTest, GetChildren_Root) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket-a-b-c", &children));
@@ -965,7 +1029,8 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -974,7 +1039,7 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -988,7 +1053,8 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&"
            "prefix=path%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"nextPageToken\": \"ABCD==\", "
            "\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
@@ -999,7 +1065,8 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
            "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&"
            "prefix=path%2F"
            "&pageToken=ABCD==\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file4.txt\" },"
            "  { \"name\": \"path/file5.txt\" }]}")});
@@ -1011,7 +1078,7 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -1025,7 +1092,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/subpath/file2.txt\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1035,7 +1103,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(
@@ -1048,7 +1116,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1060,7 +1129,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", &result));
@@ -1074,7 +1143,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1086,7 +1156,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file2.txt", &result));
@@ -1098,7 +1168,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
@@ -1109,7 +1180,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*", &result));
@@ -1120,7 +1191,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1132,7 +1204,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file3.txt", &result));
@@ -1148,7 +1220,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_OnlyWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1160,13 +1232,15 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subpath/file2.txt\" }]}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
            "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1178,7 +1252,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    3600 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   // Repeated calls to fs.GetMatchingPaths on these patterns should not lead to
   // any additional HTTP requests to GCS.
@@ -1201,26 +1275,30 @@ TEST(GcsFileSystemTest, DeleteFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "76543210")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      16 /* block size */, 16 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   16 /* block size */, 16 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   // Do an initial read of the file to load its contents into the block cache.
   char scratch[100];
@@ -1246,7 +1324,7 @@ TEST(GcsFileSystemTest, DeleteFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.DeleteFile("gs://bucket/").code());
@@ -1256,7 +1334,8 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1265,7 +1344,7 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1275,12 +1354,14 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/\" }]}"),
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2F\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1290,7 +1371,7 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1298,7 +1379,8 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
 TEST(GcsFileSystemTest, DeleteDir_BucketOnly) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?fields=items%2F"
-      "name%2CnextPageToken&maxResults=2\nAuth Token: fake_token\n",
+      "name%2CnextPageToken&maxResults=2\nAuth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1307,7 +1389,7 @@ TEST(GcsFileSystemTest, DeleteDir_BucketOnly) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket"));
 }
@@ -1316,7 +1398,8 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1326,7 +1409,7 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.DeleteDir("gs://bucket/path/").code());
@@ -1336,7 +1419,8 @@ TEST(GcsFileSystemTest, GetFileSize) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "file.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1346,7 +1430,7 @@ TEST(GcsFileSystemTest, GetFileSize) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   uint64 size;
   TF_EXPECT_OK(fs.GetFileSize("gs://bucket/file.txt", &size));
@@ -1362,7 +1446,7 @@ TEST(GcsFileSystemTest, GetFileSize_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   uint64 size;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1376,14 +1460,16 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path1%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path1/subfolder/file1.txt\" }]}"),
        // Requesting the full list of files in the folder.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path1%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path1/\" },"  // A directory marker.
            "  { \"name\": \"path1/subfolder/file1.txt\" },"
@@ -1393,13 +1479,15 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2F/rewriteTo/b/bucket/o/path2%2F\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the original directory marker.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2F\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        // Copying the first file.
@@ -1408,13 +1496,15 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "path1%2Fsubfolder%2Ffile1.txt/rewriteTo/b/bucket/o/"
            "path2%2Fsubfolder%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the first original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Fsubfolder%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        // Copying the second file.
@@ -1422,13 +1512,15 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Ffile2.txt/rewriteTo/b/bucket/o/path2%2Ffile2.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the second original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Ffile2.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1438,7 +1530,7 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.RenameFile("gs://bucket/path1", "gs://bucket/path2/"));
 }
@@ -1448,25 +1540,29 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "76543210"),
        // IsDirectory is checking whether there are children objects.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1474,33 +1570,38 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "89abcdef"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "fedcba98")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      16 /* block size */, 64 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   16 /* block size */, 64 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
   // Do an initial read of the source and destination files to load their
   // contents into the block cache.
   char scratch[100];
@@ -1531,13 +1632,15 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1545,13 +1648,15 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the original file - the deletion returns a failure.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "", errors::Unavailable("503"), 503),
        // Deleting the original file again - the deletion returns NOT_FOUND.
@@ -1559,6 +1664,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1568,7 +1674,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(
       fs.RenameFile("gs://bucket/path/src.txt", "gs://bucket/path/dst.txt"));
@@ -1582,13 +1688,15 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1596,7 +1704,8 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": false}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1605,7 +1714,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(
       errors::Code::UNIMPLEMENTED,
@@ -1617,7 +1726,8 @@ TEST(GcsFileSystemTest, Stat_Object) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "file.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1627,7 +1737,7 @@ TEST(GcsFileSystemTest, Stat_Object) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat));
@@ -1641,13 +1751,15 @@ TEST(GcsFileSystemTest, Stat_Folder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "subfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"subfolder/\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1657,7 +1769,7 @@ TEST(GcsFileSystemTest, Stat_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder", &stat));
@@ -1671,13 +1783,15 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1686,7 +1800,7 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/path", &stat).code());
@@ -1695,7 +1809,8 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
 TEST(GcsFileSystemTest, Stat_Bucket) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1704,7 +1819,7 @@ TEST(GcsFileSystemTest, Stat_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/", &stat));
@@ -1716,7 +1831,8 @@ TEST(GcsFileSystemTest, Stat_Bucket) {
 TEST(GcsFileSystemTest, Stat_BucketNotFound) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1725,7 +1841,7 @@ TEST(GcsFileSystemTest, Stat_BucketNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/", &stat).code());
@@ -1736,29 +1852,33 @@ TEST(GcsFileSystemTest, Stat_Cache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "subfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"subfolder/\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
-      3600 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   // Repeated calls to fs.Stat on these paths should not lead to any additional
   // HTTP requests to GCS.
@@ -1781,12 +1901,14 @@ TEST(GcsFileSystemTest, IsDirectory_NotFound) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=file.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1795,7 +1917,7 @@ TEST(GcsFileSystemTest, IsDirectory_NotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::NOT_FOUND,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -1807,12 +1929,14 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=file.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1822,7 +1946,7 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -1834,13 +1958,15 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [{\"name\": \"subfolder/\"}]}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [{\"name\": \"subfolder/\"}]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1849,7 +1975,7 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder/"));
@@ -1859,11 +1985,13 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1872,7 +2000,7 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/"));
@@ -1881,7 +2009,8 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
 TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1890,7 +2019,7 @@ TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::NOT_FOUND, fs.IsDirectory("gs://bucket/").code());
 }
@@ -1902,10 +2031,12 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
            "uploadType=resumable&name=subpath%2F\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 0\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: \n",
                            ""),
        new FakeHttpRequest(
@@ -1913,10 +2044,12 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
            "uploadType=resumable&name=subpath%2F\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 0\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: \n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1926,7 +2059,7 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath/"));
@@ -1936,11 +2069,13 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            ""),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1949,7 +2084,7 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket"));
@@ -1962,14 +2097,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" }]}"),
        // GetChildren recursively.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/\" },"  // The current directory's marker.
            "  { \"name\": \"path/file1.txt\" },"
@@ -1979,30 +2116,35 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2F\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object - fails and will be retried.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::Unavailable("500"), 500),
        // Delete the object again.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2Ffile2.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile3.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -2012,7 +2154,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -2028,14 +2170,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" }]}"),
        // Calling GetChildren recursively.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
            "  { \"name\": \"path/subpath/\" },"
@@ -2045,12 +2189,14 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Deleting the directory marker gs://bucket/path/ - fails with 404.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2F\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::NotFound("404"), 404),
        // Checking if gs://bucket/path/subpath/ is a folder - it is.
@@ -2058,19 +2204,22 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"items\": [ "
                            "    { \"name\": \"path/subpath/\" }]}")),
        // Deleting the object gs://bucket/path/subpath/file2.txt
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2Ffile2.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Deleting the object s://bucket/path/file3.txt - fails with 404.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile3.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::NotFound("404"), 404),
        // Checking if gs://bucket/path/file3.txt/ is a folder - it's not.
@@ -2078,13 +2227,15 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Ffile3.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // Checking if gs://bucket/path/file3.txt is an object - fails with 404.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile3.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
 
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -2094,7 +2245,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -2110,13 +2261,15 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -2125,7 +2278,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   int64 undeleted_files, undeleted_dirs;
   EXPECT_EQ(error::Code::NOT_FOUND,
@@ -2142,6 +2295,11 @@ TEST(GcsFileSystemTest, OverrideCacheParameters) {
   EXPECT_EQ(128 * 1024 * 1024, fs1.block_size());
   EXPECT_EQ(2 * fs1.block_size(), fs1.max_bytes());
   EXPECT_EQ(0, fs1.max_staleness());
+  EXPECT_EQ(120, fs1.timeouts().connect);
+  EXPECT_EQ(60, fs1.timeouts().idle);
+  EXPECT_EQ(3600, fs1.timeouts().metadata);
+  EXPECT_EQ(3600, fs1.timeouts().read);
+  EXPECT_EQ(3600, fs1.timeouts().write);
 
   // Verify legacy readahead buffer override sets block size.
   setenv("GCS_READAHEAD_BUFFER_SIZE_BYTES", "123456789", 1);
@@ -2167,6 +2325,19 @@ TEST(GcsFileSystemTest, OverrideCacheParameters) {
   EXPECT_EQ(32, fs4.stat_cache_max_entries());
   EXPECT_EQ(30, fs4.matching_paths_cache_max_age());
   EXPECT_EQ(64, fs4.matching_paths_cache_max_entries());
+
+  // Verify timeout overrides.
+  setenv("GCS_REQUEST_CONNECTION_TIMEOUT_SECS", "10", 1);
+  setenv("GCS_REQUEST_IDLE_TIMEOUT_SECS", "5", 1);
+  setenv("GCS_METADATA_REQUEST_TIMEOUT_SECS", "20", 1);
+  setenv("GCS_READ_REQUEST_TIMEOUT_SECS", "30", 1);
+  setenv("GCS_WRITE_REQUEST_TIMEOUT_SECS", "40", 1);
+  GcsFileSystem fs5;
+  EXPECT_EQ(10, fs5.timeouts().connect);
+  EXPECT_EQ(5, fs5.timeouts().idle);
+  EXPECT_EQ(20, fs5.timeouts().metadata);
+  EXPECT_EQ(30, fs5.timeouts().read);
+  EXPECT_EQ(40, fs5.timeouts().write);
 }
 
 }  // namespace
diff --git a/tensorflow/core/platform/cloud/http_request.h b/tensorflow/core/platform/cloud/http_request.h
index 02d9e9054a..95a436c622 100644
--- a/tensorflow/core/platform/cloud/http_request.h
+++ b/tensorflow/core/platform/cloud/http_request.h
@@ -118,6 +118,16 @@ class HttpRequest {
   // Url encodes str and returns a new string.
   virtual string EscapeString(const string& str) = 0;
 
+  /// \brief Set timeouts for this request.
+  ///
+  /// The connection parameter controls how long we should wait for the
+  /// connection to be established. The inactivity parameter controls how long
+  /// we should wait between additional responses from the server. Finally the
+  /// total parameter controls the maximum total connection time to prevent
+  /// hanging indefinitely.
+  virtual Status SetTimeouts(uint32 connection, uint32 inactivity,
+                             uint32 total) = 0;
+
   TF_DISALLOW_COPY_AND_ASSIGN(HttpRequest);
 };
 
diff --git a/tensorflow/core/platform/cloud/http_request_fake.h b/tensorflow/core/platform/cloud/http_request_fake.h
index bfe04f6363..22398d739c 100644
--- a/tensorflow/core/platform/cloud/http_request_fake.h
+++ b/tensorflow/core/platform/cloud/http_request_fake.h
@@ -37,13 +37,14 @@ class FakeHttpRequest : public CurlHttpRequest {
  public:
   /// Return the response for the given request.
   FakeHttpRequest(const string& request, const string& response)
-      : FakeHttpRequest(request, response, Status::OK(), nullptr, {}, 200) {}
+      : FakeHttpRequest(request, response, Status::OK(), nullptr, {}, 200, {}) {
+  }
 
   /// Return the response with headers for the given request.
   FakeHttpRequest(const string& request, const string& response,
                   const std::map<string, string>& response_headers)
       : FakeHttpRequest(request, response, Status::OK(), nullptr,
-                        response_headers, 200) {}
+                        response_headers, 200, {}) {}
 
   /// \brief Return the response for the request and capture the POST body.
   ///
@@ -51,13 +52,13 @@ class FakeHttpRequest : public CurlHttpRequest {
   FakeHttpRequest(const string& request, const string& response,
                   string* captured_post_body)
       : FakeHttpRequest(request, response, Status::OK(), captured_post_body, {},
-                        200) {}
+                        200, {}) {}
 
   /// \brief Return the response and the status for the given request.
   FakeHttpRequest(const string& request, const string& response,
                   Status response_status, uint64 response_code)
       : FakeHttpRequest(request, response, response_status, nullptr, {},
-                        response_code) {}
+                        response_code, {}) {}
 
   /// \brief Return the response and the status for the given request
   ///  and capture the POST body.
@@ -66,13 +67,15 @@ class FakeHttpRequest : public CurlHttpRequest {
   FakeHttpRequest(const string& request, const string& response,
                   Status response_status, string* captured_post_body,
                   const std::map<string, string>& response_headers,
-                  uint64 response_code)
+                  uint64 response_code,
+                  absl::optional<std::tuple<uint32, uint32, uint32>> timeouts)
       : expected_request_(request),
         response_(response),
         response_status_(response_status),
         captured_post_body_(captured_post_body),
         response_headers_(response_headers),
-        response_code_(response_code) {}
+        response_code_(response_code),
+        timeouts_(timeouts) {}
 
   Status Init() override { return Status::OK(); }
   Status SetUri(const string& uri) override {
@@ -160,6 +163,13 @@ class FakeHttpRequest : public CurlHttpRequest {
 
   virtual uint64 GetResponseCode() const override { return response_code_; }
 
+  Status SetTimeouts(uint32 connection, uint32 inactivity,
+                     uint32 total) override {
+    actual_request_ += strings::StrCat("Timeouts: ", connection, " ",
+                                       inactivity, " ", total, "\n");
+    return Status::OK();
+  }
+
  private:
   std::vector<char>* buffer_ = nullptr;
   string expected_request_;
@@ -169,6 +179,7 @@ class FakeHttpRequest : public CurlHttpRequest {
   string* captured_post_body_ = nullptr;
   std::map<string, string> response_headers_;
   uint64 response_code_ = 0;
+  absl::optional<std::tuple<uint32, uint32, uint32>> timeouts_;
 };
 
 /// Fake HttpRequest factory for testing.
-- 
GitLab


From 3aae2248bfef01990d219fc9434ae6eb2eb054c2 Mon Sep 17 00:00:00 2001
From: Akshay Agrawal <akshayka@google.com>
Date: Fri, 1 Dec 2017 11:11:38 -0800
Subject: [PATCH 0511/1225] Add `dtype` keyword argument to
 `_EagerTensorBase`'s  `__array__` method.

This change makes `__array__` conform to numpy's API.

PiperOrigin-RevId: 177614929
---
 tensorflow/python/eager/tensor_test.py | 5 +++++
 tensorflow/python/framework/ops.py     | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py
index 7a4593ec46..727f80efb4 100644
--- a/tensorflow/python/eager/tensor_test.py
+++ b/tensorflow/python/eager/tensor_test.py
@@ -106,6 +106,11 @@ class TFETensorTest(test_util.TensorFlowTestCase):
     t = _create_tensor(n)
     self.assertAllEqual([[1, 2], [3, 4]], t)
 
+  def testNumpyArrayDtype(self):
+    tensor = constant_op.constant([1.0, 2.0, 3.0])
+    numpy_tensor = np.asarray(tensor, dtype=np.int32)
+    self.assertAllEqual(numpy_tensor, [1, 2, 3])
+
   def testCopy(self):
     t = constant_op.constant(1.0)
     tt = copy.copy(t)
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 13e6426447..52c1c7d26c 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -671,8 +671,8 @@ class _EagerTensorBase(Tensor):
   def __float__(self):
     return float(self.numpy())
 
-  def __array__(self):
-    return np.array(self.numpy())
+  def __array__(self, dtype=None):
+    return np.array(self.numpy(), dtype=dtype)
 
   def __format__(self, format_spec):
     return self.numpy().__format__(format_spec)
-- 
GitLab


From 1a4d634a04915518788e14a6c96bcff4803e0882 Mon Sep 17 00:00:00 2001
From: DONGGEON LIM <ooqwe486@gmail.com>
Date: Sat, 2 Dec 2017 04:20:20 +0900
Subject: [PATCH 0512/1225] Fix deprecated function (#15020)

---
 tensorflow/examples/speech_commands/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/examples/speech_commands/train.py b/tensorflow/examples/speech_commands/train.py
index f46d5e59b4..a4141b4917 100644
--- a/tensorflow/examples/speech_commands/train.py
+++ b/tensorflow/examples/speech_commands/train.py
@@ -160,7 +160,7 @@ def main(_):
   evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   tf.summary.scalar('accuracy', evaluation_step)
 
-  global_step = tf.contrib.framework.get_or_create_global_step()
+  global_step = tf.train.get_or_create_global_step()
   increment_global_step = tf.assign(global_step, global_step + 1)
 
   saver = tf.train.Saver(tf.global_variables())
-- 
GitLab


From 1fa94d5a5da6849e78ee3b86cb7a5fd6e2d8ba10 Mon Sep 17 00:00:00 2001
From: Alan Du <alan.h.du@gmail.com>
Date: Fri, 1 Dec 2017 14:20:52 -0500
Subject: [PATCH 0513/1225] Only install enum34 on Python <3.4 versions
 (#15042)

Python 3.6 sometimes has issues with enum34 because the standard library
relies on enum features not in enum34 (see
https://bitbucket.org/stoneleaf/enum34/issues/19/enum34-isnt-compatible-with-python-36
for more details).
---
 tensorflow/tools/pip_package/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index c18f20910a..0aa16acd7d 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -33,7 +33,6 @@ _VERSION = '1.4.0'
 
 REQUIRED_PACKAGES = [
     'absl-py',
-    'enum34 >= 1.1.6',
     'numpy >= 1.12.1',
     'six >= 1.10.0',
     'protobuf >= 3.4.0',
@@ -62,9 +61,10 @@ if 'tf_nightly' in project_name:
       REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.5.0a0, < 1.6.0a0'
       break
 
-# weakref.finalize was introduced in Python 3.4
+# weakref.finalize and enum were introduced in Python 3.4
 if sys.version_info < (3, 4):
   REQUIRED_PACKAGES.append('backports.weakref >= 1.0rc1')
+  REQUIRED_PACKAGES.append('enum34 >= 1.1.6')
 
 # pylint: disable=line-too-long
 CONSOLE_SCRIPTS = [
-- 
GitLab


From 76b3531362775f1f8e0ca18581a1fb6c089831c5 Mon Sep 17 00:00:00 2001
From: Jeff <jwh0118@me.com>
Date: Fri, 1 Dec 2017 11:21:21 -0800
Subject: [PATCH 0514/1225] PeriodicResample Op: Fixes #9369 (clean history)
 (#14339)

* redoing PR

* removed periodic_resample from core_kernel

* added back the missing keras line in CMake build
---
 tensorflow/contrib/BUILD                      |   1 +
 tensorflow/contrib/__init__.py                |   1 +
 tensorflow/contrib/cmake/tf_core_ops.cmake    |   1 +
 tensorflow/contrib/cmake/tf_python.cmake      |  21 ++
 tensorflow/contrib/cmake/tf_tests.cmake       |   2 +
 tensorflow/contrib/periodic_resample/BUILD    | 114 +++++++++
 .../contrib/periodic_resample/__init__.py     |  27 +++
 .../kernels/periodic_resample_op.cc           |  26 ++
 .../kernels/periodic_resample_op.h            | 223 ++++++++++++++++++
 .../periodic_resample/ops/array_ops.cc        |  89 +++++++
 .../periodic_resample/python/__init__.py      |  20 ++
 .../kernel_tests/periodic_resample_op_test.py | 101 ++++++++
 .../python/ops/periodic_resample_op.py        |  30 +++
 13 files changed, 656 insertions(+)
 create mode 100644 tensorflow/contrib/periodic_resample/BUILD
 create mode 100644 tensorflow/contrib/periodic_resample/__init__.py
 create mode 100644 tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc
 create mode 100644 tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
 create mode 100644 tensorflow/contrib/periodic_resample/ops/array_ops.cc
 create mode 100644 tensorflow/contrib/periodic_resample/python/__init__.py
 create mode 100644 tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py
 create mode 100644 tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index b7ade95115..61f7821519 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -64,6 +64,7 @@ py_library(
         "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_py",
         "//tensorflow/contrib/nn:nn_py",
         "//tensorflow/contrib/opt:opt_py",
+        "//tensorflow/contrib/periodic_resample:init_py",
         "//tensorflow/contrib/predictor",
         "//tensorflow/contrib/quantization:quantization_py",
         "//tensorflow/contrib/quantize:quantize_graph",
diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py
index 1eda1abfcf..08247c6b38 100644
--- a/tensorflow/contrib/__init__.py
+++ b/tensorflow/contrib/__init__.py
@@ -55,6 +55,7 @@ from tensorflow.contrib import model_pruning
 from tensorflow.contrib import nccl
 from tensorflow.contrib import nn
 from tensorflow.contrib import opt
+from tensorflow.contrib import periodic_resample
 from tensorflow.contrib import predictor
 from tensorflow.contrib import quantization
 from tensorflow.contrib import quantize
diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake
index 4a61ed7a35..e8c2cd3473 100644
--- a/tensorflow/contrib/cmake/tf_core_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_core_ops.cmake
@@ -92,6 +92,7 @@ GENERATE_CONTRIB_OP_LIBRARY(image_sirds "${tensorflow_source_dir}/tensorflow/con
 GENERATE_CONTRIB_OP_LIBRARY(layers_sparse_feature_cross "${tensorflow_source_dir}/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc")
 GENERATE_CONTRIB_OP_LIBRARY(memory_stats "${tensorflow_source_dir}/tensorflow/contrib/memory_stats/ops/memory_stats_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(nccl "${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(periodic_resample "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/ops/array_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(nearest_neighbor "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/ops/nearest_neighbor_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(resampler "${tensorflow_source_dir}/tensorflow/contrib/resampler/ops/resampler_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(rnn_gru "${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc")
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 0128946e45..5e15a972d6 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -562,6 +562,10 @@ add_python_module("tensorflow/contrib/pi_examples")
 add_python_module("tensorflow/contrib/pi_examples/camera")
 add_python_module("tensorflow/contrib/pi_examples/label_image")
 add_python_module("tensorflow/contrib/pi_examples/label_image/data")
+add_python_module("tensorflow/contrib/periodic_resample")
+add_python_module("tensorflow/contrib/periodic_resample/python")
+add_python_module("tensorflow/contrib/periodic_resample/python/ops")
+add_python_module("tensorflow/contrib/periodic_resample/python/kernel_tests")
 add_python_module("tensorflow/contrib/predictor")
 add_python_module("tensorflow/contrib/quantization")
 add_python_module("tensorflow/contrib/quantization/python")
@@ -817,6 +821,9 @@ GENERATE_PYTHON_OP_LIB("contrib_memory_stats_ops"
   DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/memory_stats/ops/gen_memory_stats_ops.py)
 GENERATE_PYTHON_OP_LIB("contrib_nccl_ops"
   DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/nccl/ops/gen_nccl_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_periodic_resample_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/periodic_resample/python/ops/gen_periodic_resample_op.py)
+
 GENERATE_PYTHON_OP_LIB("contrib_nearest_neighbor_ops"
   DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/nearest_neighbor/ops/gen_nearest_neighbor_ops.py)
 GENERATE_PYTHON_OP_LIB("contrib_resampler_ops"
@@ -1015,6 +1022,20 @@ target_link_libraries(pywrap_tensorflow_internal PRIVATE
 )
 
 if(WIN32)
+
+    # include contrib/periodic_resample as .so
+    #
+    set(tf_periodic_resample_srcs
+       "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc"
+       "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h"
+       "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/ops/array_ops.cc"
+    )
+
+    AddUserOps(TARGET _periodic_resample_op
+        SOURCES "${tf_periodic_resample_srcs}"
+        DEPENDS pywrap_tensorflow_internal tf_python_ops
+        DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/periodic_resample/python/ops/)
+
     # include contrib/nearest_neighbor as .so
     #
     set(tf_nearest_neighbor_srcs
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 18b71d1f9a..7884631d6d 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -154,6 +154,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     "${tensorflow_source_dir}/tensorflow/contrib/factorization/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/image/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/*_test.py"
+    "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/python/kernel_tests/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/python/kernel_tests/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/python/kernel_tests/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/stateless/python/kernel_tests/*_test.py"
@@ -224,6 +225,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       # Numerical issues, calculations off.
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/concat_op_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/wals_test.py"
+      "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py"
       "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py"
       # Float division by zero
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/benchmark_test.py"
diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD
new file mode 100644
index 0000000000..25d700f120
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/BUILD
@@ -0,0 +1,114 @@
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+package(default_visibility = ["//visibility:public"])
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_gen_op_libs",
+    "tf_py_test",
+    "tf_custom_op_library",
+    "tf_custom_op_py_library",
+    "tf_gen_op_wrapper_py",
+)
+
+cc_library(
+    name = "all_ops",
+    srcs = [":custom_op_sources"],
+    hdrs = [":custom_op_headers"],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//third_party/eigen3",
+        "@protobuf_archive//:protobuf_headers",
+    ],
+    alwayslink = 1,
+)
+
+tf_custom_op_library(
+    name = "python/ops/_periodic_resample_op.so",
+    srcs = [
+        ":custom_op_headers",
+        ":custom_op_sources",
+    ],
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["array_ops"],
+)
+
+tf_gen_op_wrapper_py(
+    name = "gen_periodic_resample_op_py",
+    out = "python/ops/gen_periodic_resample_op.py",
+    deps = [":array_ops_op_lib"],
+)
+
+tf_custom_op_py_library(
+    name = "periodic_resample_op_py",
+    srcs = ["python/ops/periodic_resample_op.py"],
+    dso = ["python/ops/_periodic_resample_op.so"],
+    kernels = [
+        ":array_ops_op_lib",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":gen_periodic_resample_op_py",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:framework_for_generated_wrappers",
+    ],
+)
+
+py_library(
+    name = "init_py",
+    srcs = [
+        "__init__.py",
+        "python/__init__.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":periodic_resample_op_py",
+    ],
+)
+
+# py_library(
+#     name = "periodic_resample_op_py",
+#     srcs = ["python/ops/periodic_resample_op.py"],
+#     data = ["python/ops/_periodic_resample_op.so"],
+#     srcs_version = "PY2AND3",
+# )
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+filegroup(
+    name = "custom_op_sources",
+    srcs = glob(
+        [
+            "ops/*.cc",
+            "kernels/*.cc",
+        ],
+        exclude = [
+            "ops/*_test.cc",
+            "kernels/*_test.cc",
+        ],
+    ),
+)
+
+filegroup(
+    name = "custom_op_headers",
+    srcs = glob(
+        [
+            "kernels/*.h",
+            "ops/*.h",
+        ],
+    ),
+)
diff --git a/tensorflow/contrib/periodic_resample/__init__.py b/tensorflow/contrib/periodic_resample/__init__.py
new file mode 100644
index 0000000000..fde9091b88
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/__init__.py
@@ -0,0 +1,27 @@
+# =============================================================================
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Custom op used by periodic_resample."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.periodic_resample.python.ops.periodic_resample_op import periodic_resample
+from tensorflow.python.util.all_util import remove_undocumented
+
+_allowed_symbols = ["periodic_resample"]
+
+remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc
new file mode 100644
index 0000000000..9cee405cef
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc
@@ -0,0 +1,26 @@
+// =============================================================================
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h"
+
+namespace tensorflow {
+
+REGISTER_KERNEL_BUILDER(Name("PeriodicResample")
+                            .Device(DEVICE_CPU),
+                        PeriodicResampleOp);
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
new file mode 100644
index 0000000000..72e355deb7
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
@@ -0,0 +1,223 @@
+// =============================================================================
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_
+#define TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_
+
+#include <cmath>
+#include <type_traits>
+#include <vector>
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+
+namespace {
+
+  template <class IndexVecT, class IndexT>
+  IndexT compute_input_index(
+      IndexVecT* target_dimensions, const IndexT& output_index,
+      const IndexVecT& original_dimensions, const int& adjustable_dimension,
+      const std::vector<tensorflow::int64>& dimension_ceiling,
+      const std::vector<tensorflow::int64>& cumulative_dimensions,
+      IndexT* result,
+      std::vector<IndexT>* output_indices,
+      const int& rank) {
+
+    *result = 0;
+    output_indices->clear();
+
+    // un-rasterize the output index
+    auto last_reduced_i = output_index;
+    for (auto r = rank - 1; r >= 0; --r) {
+      (*output_indices)[r] = last_reduced_i % (*target_dimensions)[r];
+      last_reduced_i = (last_reduced_i - (*output_indices)[r]) / (*target_dimensions)[r];
+    }
+
+    // rasterize the input index
+    IndexT last_index_factor = 1;
+    for (auto r = rank - 1; r >= 0; --r) {
+      IndexT index = 0;
+      if (r != adjustable_dimension)
+        index = (*output_indices)[r] / dimension_ceiling[r];
+      else {
+        for (int qi = 0; qi < rank; ++qi) {
+          if (qi == adjustable_dimension) continue;
+          index += cumulative_dimensions[qi] * ((*output_indices)[qi] % dimension_ceiling[qi]);
+        }
+        index *= (*target_dimensions)[adjustable_dimension];
+        index += (*output_indices)[r];
+      }
+      *result += last_index_factor * index;
+      last_index_factor *= original_dimensions[r];
+    }
+
+    return *result;
+  }
+
+  template <class InputDataT, class IndexVecT> // both types are needed here b/c IndexVecT and InputDataT are not related
+  void fill_periodic_tensor(tensorflow::OpKernelContext* context,
+                            const IndexVecT& desired_shape,
+                            const tensorflow::Tensor& input_tensor) {
+    // input is a strided array (last index is fastest, C-ordered)
+    auto input = input_tensor.flat<InputDataT>();
+    const int rank = input_tensor.dims();
+    const auto original_size = input.size();
+    // original and target dimensions
+    std::vector<tensorflow::int64> original_dimensions(rank),
+                                   target_dimensions(rank);
+    tensorflow::int64 total_size(input_tensor.NumElements()),
+                      new_sliced_size(1);
+    // factors by which original_dimensions increases/decreases w.r.t. target_dimensions
+    std::vector<tensorflow::int64> dimension_ceiling(rank),
+                                   cumulative_dimensions(rank);
+    // index of adjustable dimension
+    int adjustable_dimension;
+    tensorflow::TensorShape output_shape;
+
+    // requires that the rank of the input tensor and length of the desired shape
+    // are equal
+    OP_REQUIRES(context, rank == desired_shape.size(),
+                tensorflow::errors::InvalidArgument(
+                    "periodic_resample expects the rank of the input tensor, ",
+                    rank, ", to be the same as the length of the desired shape, ",
+                    desired_shape.size(), "."));
+
+    bool found = false;
+    for (int i = 0; i < rank; ++i) {
+      // if (desired_shape(i) < 1) {
+      if (desired_shape[i] < 1) {
+        // only one index can be adjustable
+        OP_REQUIRES(context, !found,
+                    tensorflow::errors::InvalidArgument(
+                        "periodic_resample expects only "
+                        "one index to be marked as adjustable."));
+        adjustable_dimension = i;
+        found = true;
+      } else {
+        // target_dimensions[i] = desired_shape(i);
+        target_dimensions[i] = desired_shape[i];
+        new_sliced_size *= target_dimensions[i];
+      }
+    }
+    // at least one index needs to be adjustable
+    OP_REQUIRES(context, found, tensorflow::errors::InvalidArgument(
+                                    "periodic_resample expects at least "
+                                    "one index to be marked as adjustable."));
+
+    int count = 0;
+    for (const auto dim_info : input_tensor.shape()) {
+      original_dimensions[count] = dim_info.size;
+      ++count;
+    }
+
+    target_dimensions[adjustable_dimension] = total_size / new_sliced_size;
+
+    count = 0;
+    for (const auto dim_info : input_tensor.shape()) {
+      dimension_ceiling[count] = tensorflow::int64(
+        std::ceil(float(target_dimensions[count]) / float(original_dimensions[count]))
+      );
+      if (count == 0)
+        cumulative_dimensions[count] = 1;
+      else
+        cumulative_dimensions[count] = cumulative_dimensions[count - 1] * dimension_ceiling[count - 1];
+      ++count;
+    }
+
+    // ensure that the new dimension is greater than zero
+    OP_REQUIRES(context, target_dimensions[adjustable_dimension] > 0,
+                tensorflow::errors::InvalidArgument(
+                    "periodic_resample found that the "
+                    "adjustable dimension, ",
+                    adjustable_dimension,
+                    ", isn't greater than zero, ",
+                    target_dimensions[adjustable_dimension], "."));
+    for (int i = 0; i < rank; ++i) {
+      output_shape.AddDim(target_dimensions[i]);
+    }
+    const auto new_size = new_sliced_size * target_dimensions[adjustable_dimension];
+
+    // Create an output tensor and attach it to the current context
+    tensorflow::Tensor* output_tensor = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, output_shape, &output_tensor));
+    auto output = output_tensor->flat<InputDataT>();
+
+    // memory is allocated for these variables outside the inner loop for
+    // efficiency (although, I could create a separate class scope for
+    // this purpose instead)
+    tensorflow::int64 result = 0;
+    std::vector<tensorflow::int64> output_indices(target_dimensions.size());
+
+    // Fill output tensor with periodically resampled input tensor values
+    for (tensorflow::int64 output_index = 0;
+         output_index < new_size; ++output_index) {
+      output(output_index) = input(
+          compute_input_index(&target_dimensions, output_index,
+                              original_dimensions, adjustable_dimension,
+                              dimension_ceiling, cumulative_dimensions,
+                              &result, &output_indices, rank));
+    }
+  }
+
+  void create_output_tensor(tensorflow::OpKernelContext* context,
+                            const tensorflow::Tensor& input_tensor,
+                            const tensorflow::DataType& input_tensor_type,
+                            const tensorflow::PartialTensorShape& desired_shape_tensor) {
+    auto desired_shape = desired_shape_tensor.dim_sizes();
+
+    // obligatory type switch
+    switch (input_tensor_type) {
+      case tensorflow::DataTypeToEnum<float>::value:
+        fill_periodic_tensor<float>(context, desired_shape, input_tensor);
+      case tensorflow::DataTypeToEnum<double>::value:
+        fill_periodic_tensor<double>(context, desired_shape, input_tensor);
+      case tensorflow::DataTypeToEnum<tensorflow::int32>::value:
+        fill_periodic_tensor<tensorflow::int32>(context, desired_shape, input_tensor);
+      case tensorflow::DataTypeToEnum<tensorflow::int64>::value:
+        fill_periodic_tensor<tensorflow::int64>(context, desired_shape, input_tensor);
+      default:
+        ;
+    }
+  }
+
+}  // namespace
+
+
+class PeriodicResampleOp : public tensorflow::OpKernel {
+ public:
+  explicit PeriodicResampleOp(tensorflow::OpKernelConstruction* context)
+      : tensorflow::OpKernel(context) {
+    // Get the desired shape
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("shape", &desired_shape));
+  }
+
+  void Compute(tensorflow::OpKernelContext* context) override {
+    // Grab the input tensor
+    const tensorflow::Tensor& input_tensor = context->input(0);
+    const tensorflow::DataType input_tensor_type = context->input_dtype(0);
+
+    create_output_tensor(context, input_tensor, input_tensor_type, desired_shape);
+  }
+
+private:
+  tensorflow::PartialTensorShape desired_shape;
+};
+
+#endif  // TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_
diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops.cc b/tensorflow/contrib/periodic_resample/ops/array_ops.cc
new file mode 100644
index 0000000000..498799764f
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/ops/array_ops.cc
@@ -0,0 +1,89 @@
+// =============================================================================
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/common_shape_fns.h"
+
+
+using namespace tensorflow;
+
+REGISTER_OP("PeriodicResample")
+    .Attr("T: numbertype")
+    .Input("values: T")
+    .Attr("shape: shape")
+    .Output("output: T")
+    .SetShapeFn(shape_inference::ExplicitShape)
+    .Doc(R"doc(
+Periodically resample elements of a tensor to conform to `shape`.
+
+This function implements a slightly more generic version of the subpixel
+convolutions found in this [paper](https://arxiv.org/abs/1609.05158).
+
+The formula for computing the elements in the `output` tensor is as follows:
+  `T` = `values` tensor of rank `R`
+  `S` = desired `shape` of output tensor (vector of length `R`)
+  `P` = `output` tensor of rank `R`
+  \((T_1,\ldots,T_R)\) = shape(`T`)
+  \([S_1,\ldots,S_q,\ldots,S_R]\) = elements of vector `S`
+
+  A single element in `S` is left unspecified (denoted \(S_q=-1\)).
+  Let \(f_i\) denote the (possibly non-integer) factor that relates the original
+  dimension to the desired dimensions, \(S_i=f_i T_i\), for \(i\neq q\) where
+  \(f_i>0\).
+  Define the following:
+    \(g_i=\lceil f_i\rceil\)
+    \(t=\prod_i T_i\)
+    \(s=\prod_{i\neq q} S_i\)
+  \(S_q\) can then be defined as by \(S_q=\lfloor t/s\rfloor\).
+  The elements of the resulting tensor are defined as
+  \(P_{s_1,\ldots,s_R}=T_{h_1,\ldots,h_q,\ldots,h_R}\).
+  The \(h_i\) (\(i\neq q\)) are defined by \(h_i=\lfloor s_i/g_i\rfloor\).
+  \(h_q=S_q\sum_{j\neq q}^{q-1}G_j \mathrm{mod}(s_j,g_j) + s_q\), where
+  \(G_j=\prod_{i}^{j-1}g_i\) (\(G_0=1\)).
+
+One drawback of this method is that whenever the output dimensions are slightly
+less than integer multiples of the input dimensions, many of the tensor elements
+are repeated in an inefficient way. This is resolved by specifying that all
+desired dimensions are integer multiples of the input tensor.
+
+For example:
+
+```prettyprint
+`input` is [[ 0  1  2  3]
+            [ 4  5  6  7]
+            [ 8  9 10 11]]
+
+tf.periodic_resample(input, [6, None]) ==> [[ 0  1]
+                                            [ 2  3]
+                                            [ 4  5]
+                                            [ 6  7]
+                                            [ 8  9]
+                                            [10 11]]
+```
+
+values: The tensor of rank `R` to periodic_resample
+shape: A 1-D tensor representing the desired shape of the output tensor.
+  Exactly one element of this tensor must have the value `None` which represents
+  that this dimension of `values` can be adjusted downward in order to
+  accomodate increases in other dimensions. The specified sizes of the
+  non-adjustable dimensions must by at least as large as in the `values` tensor.
+output: Periodically resampled tensor that has dimensions specified as in
+  `shape` except that the dimension specified as `None` will be minimally
+  decreased as necessary.
+
+)doc");
diff --git a/tensorflow/contrib/periodic_resample/python/__init__.py b/tensorflow/contrib/periodic_resample/python/__init__.py
new file mode 100644
index 0000000000..36aeeb8da2
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/python/__init__.py
@@ -0,0 +1,20 @@
+
+# =============================================================================
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py
new file mode 100644
index 0000000000..1d727870f6
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py
@@ -0,0 +1,101 @@
+# =============================================================================
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy
+import tensorflow
+from tensorflow.contrib.periodic_resample import periodic_resample
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import googletest
+
+
+class PeriodicResampleTest(test_util.TensorFlowTestCase):
+
+  def testPeriodicResampleBasic2D(self):
+
+    input_tensor = numpy.arange(12).reshape((3, 4))
+    desired_shape = numpy.array([6, None])
+    output_tensor = input_tensor.reshape((6, 2))
+
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      result = periodic_resample(input_tensor, desired_shape).eval()
+      self.assertAllEqual(result, output_tensor)
+
+  def testPeriodicResampleTruncatedBasic2D(self):
+
+    input_tensor = numpy.arange(12).reshape((3, 4))
+    desired_shape = numpy.array([5, None])
+    output_tensor = input_tensor.reshape((6, 2))[:-1]
+
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      result = periodic_resample(input_tensor, desired_shape).eval()
+      self.assertAllEqual(result, output_tensor)
+
+  def testPeriodicResampleBasic3D(self):
+
+    input_tensor = numpy.arange(2*2*4).reshape((2, 2, 4))
+    desired_shape = numpy.array([4, 4, None])
+    output_tensor = numpy.array([[[0], [2], [4], [6]],
+                                 [[1], [3], [5], [7]],
+                                 [[8], [10], [12], [14]],
+                                 [[9], [11], [13], [15]]])
+
+    # NOTE: output_tensor != input_tensor.reshape((4, 4, -1))
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      result = periodic_resample(input_tensor, desired_shape).eval()
+      # input_tensor[0, 0, 0] == result[0, 0, 0]
+      # input_tensor[0, 0, 1] == result[1, 0, 0]
+      # input_tensor[0, 0, 2] == result[0, 1, 0]
+      # input_tensor[0, 0, 3] == result[1, 1, 0]
+      self.assertAllEqual(result, output_tensor)
+
+  def testPeriodicResampleBasic4D(self):
+
+    input_tensor = numpy.arange(2*2*2*8).reshape((2, 2, 2, 8))
+    desired_shape = numpy.array([4, 4, 4, None])
+    output_tensor = numpy.array([[[[0], [4], [8], [12]],
+                                  [[2], [6], [10], [14]],
+                                  [[16], [20], [24], [28]],
+                                  [[18], [22], [26], [30]]],
+                                 [[[1], [5], [9], [13]],
+                                  [[3], [7], [11], [15]],
+                                  [[17], [21], [25], [29]],
+                                  [[19], [23], [27], [31]]],
+                                 [[[32], [36], [40], [44]],
+                                  [[34], [38], [42], [46]],
+                                  [[48], [52], [56], [60]],
+                                  [[50], [54], [58], [62]]],
+                                 [[[33], [37], [41], [45]],
+                                  [[35], [39], [43], [47]],
+                                  [[49], [53], [57], [61]],
+                                  [[51], [55], [59], [63]]]])
+
+    # NOTE: output_tensor != input_tensor.reshape((4, 4, 4, -1))
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      result = periodic_resample(input_tensor, desired_shape).eval()
+      self.assertAllEqual(result, output_tensor)
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py
new file mode 100644
index 0000000000..6a09f70f44
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py
@@ -0,0 +1,30 @@
+# =============================================================================
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from tensorflow.contrib.periodic_resample.python.ops import gen_periodic_resample_op
+
+from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample
+
+from tensorflow.contrib.util import loader
+from tensorflow.python.platform import resource_loader
+
+_periodic_resample_op = loader.load_op_library(
+    resource_loader.get_path_to_datafile('_periodic_resample_op.so'))
-- 
GitLab


From 393ca061f4f2d07fa57ac77f71eede531279a8bb Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Fri, 1 Dec 2017 11:17:48 -0800
Subject: [PATCH 0515/1225] Make 'producer_op_list' argument to
 import_graph_def work with C API.

This changes exposes a minor functional change: unknown attrs cause an
import error with the C API enabled, but only cause an error when the
graph is run in the original Python code (the error will occur even if
the problematic op isn't being run). It seems OK to fail faster in
error cases.

PiperOrigin-RevId: 177615841
---
 tensorflow/python/framework/importer.py      | 52 +++++++++++++-------
 tensorflow/python/framework/importer_test.py | 31 +++++++-----
 2 files changed, 51 insertions(+), 32 deletions(-)

diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index ada8c30fab..7fd7991523 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -193,6 +193,36 @@ def _FindAttrInOpDef(attr_name, op_def):
   return None
 
 
+def _RemoveDefaultAttrs(op_dict, producer_op_list, graph_def):
+  """Removes unknown default attrs according to `producer_op_list`.
+
+  Removes any unknown attrs in `graph_def` (i.e. attrs that do not appear in
+  the OpDefs in `op_dict`) that have a default value in `producer_op_list`.
+
+  Args:
+    op_dict: dict mapping operation name to OpDef.
+    producer_op_list: OpList proto.
+    graph_def: GraphDef proto
+  """
+  producer_op_dict = {op.name: op for op in producer_op_list.op}
+  for node in graph_def.node:
+    # Remove any default attr values that aren't in op_def.
+    if node.op in producer_op_dict:
+      op_def = op_dict[node.op]
+      producer_op_def = producer_op_dict[node.op]
+      # We make a copy of node.attr to iterate through since we may modify
+      # node.attr inside the loop.
+      for key in list(node.attr):
+        if _FindAttrInOpDef(key, op_def) is None:
+          # No attr_def in consumer, look in producer.
+          attr_def = _FindAttrInOpDef(key, producer_op_def)
+          if (attr_def and attr_def.HasField('default_value') and
+              node.attr[key] == attr_def.default_value):
+            # Unknown attr had default value in producer, delete it so it can be
+            # understood by consumer.
+            del node.attr[key]
+
+
 def _ConvertInputMapValues(name, input_map):
   """Ensures all input map values are tensors.
 
@@ -396,10 +426,9 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
 
   op_dict = op_def_registry.get_registered_ops()
 
-  if producer_op_list is None:
-    producer_op_dict = None
-  else:
-    producer_op_dict = {op.name: op for op in producer_op_list.op}
+  if producer_op_list is not None:
+    # TODO(skyewm): make a copy of graph_def so we're not mutating the argument?
+    _RemoveDefaultAttrs(op_dict, producer_op_list, graph_def)
 
   graph = ops.get_default_graph()
 
@@ -489,21 +518,6 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
             value = node.attr[key]
             if value is None or value.WhichOneof('value') is None:
               node.attr[key].CopyFrom(attr_def.default_value)
-        if producer_op_dict:
-          # Remove any default attr values that aren't in op_def.
-          if node.op in producer_op_dict:
-            producer_op_def = producer_op_dict[node.op]
-            # We make a copy of node.attr to iterate through since we
-            # may modify node.attr inside the loop.
-            for key in list(node.attr):
-              if _FindAttrInOpDef(key, op_def) is None:
-                # No attr_def in consumer, look in producer.
-                attr_def = _FindAttrInOpDef(key, producer_op_def)
-                if (attr_def and attr_def.HasField('default_value') and
-                    node.attr[key] == attr_def.default_value):
-                  # Unknown attr had default value in producer, delete it
-                  # so it can be understood by consumer.
-                  del node.attr[key]
 
         output_types = _OutputTypes(node, op_dict)
         name_to_op[node.name] = g.create_op(
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index 4a215abd2e..5d7d3fe5e3 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -1056,8 +1056,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(123.0, a[0].get_attr("default_float"))
 
   def testDefaultAttrsRemoved(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     producer_op_list = op_def_pb2.OpList()
     text_format.Merge("""
       op {
@@ -1074,19 +1072,26 @@ class ImportGraphDefTest(test.TestCase):
           """),
           return_elements=["A"],
           producer_op_list=producer_op_list)
-      with self.assertRaisesRegexp(ValueError, "No attr named 'default_int'"):
+      if ops._USE_C_API:
+        error_msg = "Operation 'import/A' has no attr named 'default_int'."
+      else:
+        error_msg = "No attr named 'default_int'"
+      with self.assertRaisesRegexp(ValueError, error_msg):
         a[0].get_attr("default_int")
 
-    # Attr only in producer_op_list with non-default value is preserved.
-    with ops.Graph().as_default():
-      a = importer.import_graph_def(
-          self._MakeGraphDef("""
-          node { name: 'A' op: 'OpWithFutureDefaultAttr'
-                 attr { key: 'default_int' value { i: 987 } } }
-          """),
-          return_elements=["A"],
-          producer_op_list=producer_op_list)
-      self.assertEqual(987, a[0].get_attr("default_int"))
+    # Unknown attrs cannot be imported using C API. This test will eventually be
+    # deleted.
+    if not ops._USE_C_API:
+      # Attr only in producer_op_list with non-default value is preserved.
+      with ops.Graph().as_default():
+        a = importer.import_graph_def(
+            self._MakeGraphDef("""
+            node { name: 'A' op: 'OpWithFutureDefaultAttr'
+                   attr { key: 'default_int' value { i: 987 } } }
+            """),
+            return_elements=["A"],
+            producer_op_list=producer_op_list)
+        self.assertEqual(987, a[0].get_attr("default_int"))
 
   def testFunctions(self):
     if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-- 
GitLab


From 788e2ccded3fb547539831e2fe07695d274b2270 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 11:24:25 -0800
Subject: [PATCH 0516/1225] Adds -latomic as linkopts to support uses of ISO
 C++11 <atomic>.

PiperOrigin-RevId: 177616799
---
 tensorflow/contrib/lite/build_def.bzl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 5813b3de4d..d1fcdce70a 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -89,6 +89,7 @@ def tflite_jni_linkopts():
   return tflite_jni_linkopts_unstripped() + select({
       "//tensorflow:android": [
           "-s",  # Omit symbol table.
+          "-latomic",  # Required for some uses of ISO C++11 <atomic> in x86.
       ],
       "//conditions:default": [],
   })
-- 
GitLab


From 2d3ea2e921278966b67830c6c6a747213928df3d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 11:37:07 -0800
Subject: [PATCH 0517/1225] [TF:XLA] Remove unused class XlaLocalRuntimeContext
 and related member variables.

This additional parameter to computations was only needed for the CPU backend, but
it is not used anymore. The thread pool it holds is already passed via xla::ExecutableRunOptions. Also, remove unused members from XlaCompiler::CompilationResult.

PiperOrigin-RevId: 177618693
---
 tensorflow/compiler/aot/BUILD                 |  1 -
 tensorflow/compiler/aot/codegen.cc            | 25 +--------
 tensorflow/compiler/aot/codegen_test.cc       |  2 -
 tensorflow/compiler/aot/codegen_test_h.golden | 11 ++--
 tensorflow/compiler/aot/compile.cc            |  5 +-
 tensorflow/compiler/aot/compile.h             |  1 -
 tensorflow/compiler/aot/runtime_test.cc       |  1 -
 tensorflow/compiler/aot/tfcompile.bzl         |  2 -
 tensorflow/compiler/jit/kernels/BUILD         |  1 -
 .../compiler/jit/kernels/xla_launch_op.cc     | 20 -------
 .../compiler/jit/xla_compilation_cache.cc     |  5 --
 tensorflow/compiler/tf2xla/BUILD              |  8 ---
 tensorflow/compiler/tf2xla/tf2xla.cc          | 10 +---
 tensorflow/compiler/tf2xla/tf2xla.h           |  6 +-
 tensorflow/compiler/tf2xla/tf2xla_test.cc     |  5 +-
 .../tf2xla/xla_compiled_cpu_function.cc       |  5 --
 .../tf2xla/xla_compiled_cpu_function.h        | 20 ++-----
 tensorflow/compiler/tf2xla/xla_compiler.cc    |  7 ---
 tensorflow/compiler/tf2xla/xla_compiler.h     | 12 +---
 tensorflow/compiler/tf2xla/xla_context.cc     | 18 ------
 tensorflow/compiler/tf2xla/xla_context.h      | 12 ----
 .../tf2xla/xla_jit_compiled_cpu_function.cc   | 30 +++-------
 .../tf2xla/xla_local_runtime_context.h        | 55 -------------------
 23 files changed, 28 insertions(+), 234 deletions(-)
 delete mode 100644 tensorflow/compiler/tf2xla/xla_local_runtime_context.h

diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD
index 767e3686a8..5740c040e3 100644
--- a/tensorflow/compiler/aot/BUILD
+++ b/tensorflow/compiler/aot/BUILD
@@ -24,7 +24,6 @@ tf_cc_test(
     srcs = ["runtime_test.cc"],
     deps = [
         ":runtime",
-        "//tensorflow/compiler/tf2xla:xla_local_runtime_context",
         "//tensorflow/core:framework",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc
index 28ac40df18..53da2881b6 100644
--- a/tensorflow/compiler/aot/codegen.cc
+++ b/tensorflow/compiler/aot/codegen.cc
@@ -101,21 +101,8 @@ Status ComputeArgSizes(const CompileResult& compile_result,
                        std::vector<int64>* arg_sizes) {
   const xla::ProgramShape& ps = compile_result.program_shape;
   for (int i = 0; i < ps.parameters_size(); ++i) {
-    if (i == ps.parameters_size() - 1 && compile_result.has_context_arg) {
-      // If the compiled function needs a XlaLocalRuntimeContext* arg, it's
-      // always last, and must be represented as an opaque type.
-      const xla::PrimitiveType type = ps.parameters(i).element_type();
-      if (type != xla::OPAQUE) {
-        return errors::InvalidArgument(
-            "expected final context arg to be opaque, but got type: ",
-            xla::PrimitiveType_Name(type), ", from program shape: ",
-            xla::ShapeUtil::HumanString(ps));
-      }
-      arg_sizes->push_back(-1);
-    } else {
-      arg_sizes->push_back(xla::ShapeUtil::ByteSizeOf(
-          ps.parameters(i), compile_result.pointer_size));
-    }
+    arg_sizes->push_back(xla::ShapeUtil::ByteSizeOf(
+        ps.parameters(i), compile_result.pointer_size));
   }
   return Status::OK();
 }
@@ -165,11 +152,6 @@ string RewriteWithName(const string& name, string code,
 Status GenArgMethods(const tf2xla::Config& config, const xla::ProgramShape& ps,
                      const CompileResult& compile_result, string* methods) {
   size_t num_args = ps.parameters_size();
-  if (compile_result.has_context_arg) {
-    // If the compiled function needs a XlaLocalRuntimeContext* arg, it's
-    // always last, and is set in the class constructor.
-    num_args--;
-  }
   if (config.feed_size() != num_args) {
     return errors::InvalidArgument("mismatch between feed_size(",
                                    config.feed_size(), ") and num_args(",
@@ -474,7 +456,6 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
       data->temp_sizes = TempSizes();
       data->num_temps = kNumTemps;
       data->result_index = kResultIndex;
-      data->requires_runtime_context = {{HAS_CONTEXT_ARG}};
       data->arg_names = StaticArgNames();
       data->result_names = StaticResultNames();
       data->program_shape = StaticProgramShape();
@@ -560,8 +541,6 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
       {"{{ARG_SIZES}}", str_util::Join(arg_sizes, ", ")},
       {"{{CLASS}}", opts.class_name},
       {"{{ENTRY}}", compile_result.entry_point},
-      {"{{HAS_CONTEXT_ARG}}",
-       compile_result.has_context_arg ? "true" : "false"},
       {"{{INCLUDE_XLA_DATA_PROTO}}", include_xla_data_proto},
       {"{{METHODS_ARG}}\n", methods_arg},
       {"{{METHODS_RESULT}}\n", methods_result},
diff --git a/tensorflow/compiler/aot/codegen_test.cc b/tensorflow/compiler/aot/codegen_test.cc
index 0f6114666f..75026c57c0 100644
--- a/tensorflow/compiler/aot/codegen_test.cc
+++ b/tensorflow/compiler/aot/codegen_test.cc
@@ -145,11 +145,9 @@ TEST(GenerateHeader, Golden) {
       {
           xla::ShapeUtil::MakeShape(xla::F32, {1, 2}),
           xla::ShapeUtil::MakeShape(xla::S64, {3, 4}),
-          xla::ShapeUtil::MakeOpaqueShape(),
       },
       xla::ShapeUtil::MakeTupleShape(
           {xla::ShapeUtil::MakeShape(xla::U32, {5, 6})}));
-  compile_result.has_context_arg = true;
   compile_result.entry_point = "entry_point";
   compile_result.pointer_size = 8;
   string header;
diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden
index cf01bee325..35e50433d6 100644
--- a/tensorflow/compiler/aot/codegen_test_h.golden
+++ b/tensorflow/compiler/aot/codegen_test_h.golden
@@ -48,7 +48,7 @@ namespace bar {
 //   is guaranteed that no thread may call a non-const method.
 //
 // The logical function signature is:
-//   ((unknown): f32[1,2], (unknown): s64[3,4], (unknown): opaque[]) -> (u32[5,6])
+//   ((unknown): f32[1,2], (unknown): s64[3,4]) -> (u32[5,6])
 //
 // Memory stats:
 //   arg bytes total:    104
@@ -58,11 +58,11 @@ namespace bar {
 class MyClass : public tensorflow::XlaCompiledCpuFunction {
  public:
   // Number of input arguments for the compiled computation.
-  static constexpr size_t kNumArgs = 3;
+  static constexpr size_t kNumArgs = 2;
 
   // Byte size of each argument buffer. There are kNumArgs entries.
   static const intptr_t* ArgSizes() {
-    static constexpr intptr_t kArgSizes[kNumArgs] = {8, 96, -1};
+    static constexpr intptr_t kArgSizes[kNumArgs] = {8, 96};
     return kArgSizes;
   }
 
@@ -77,7 +77,6 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
       data->temp_sizes = TempSizes();
       data->num_temps = kNumTemps;
       data->result_index = kResultIndex;
-      data->requires_runtime_context = true;
       data->arg_names = StaticArgNames();
       data->result_names = StaticResultNames();
       data->program_shape = StaticProgramShape();
@@ -236,8 +235,8 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
   // Shape of the args and results.
   static const xla::ProgramShape* StaticProgramShape() {
     static const xla::ProgramShape* kShape = []() {
-      static const char kProto[] = {10,12,16,11,26,2,1,2,42,4,10,2,1,0,10,12,16,5,26,2,3,4,42,4,10,2,1,0,10,2,16,14,18,16,16,13,34,12,16,8,26,2,5,6,42,4,10,2,1,0};
-      static constexpr int kProtoSize = 50;
+      static const char kProto[] = {10,12,16,11,26,2,1,2,42,4,10,2,1,0,10,12,16,5,26,2,3,4,42,4,10,2,1,0,18,16,16,13,34,12,16,8,26,2,5,6,42,4,10,2,1,0};
+      static constexpr int kProtoSize = 46;
       xla::ProgramShape* shape = new xla::ProgramShape;
       shape->ParseFromArray(kProto, kProtoSize);
       return shape;
diff --git a/tensorflow/compiler/aot/compile.cc b/tensorflow/compiler/aot/compile.cc
index 2b8cc6024c..c87f2b75df 100644
--- a/tensorflow/compiler/aot/compile.cc
+++ b/tensorflow/compiler/aot/compile.cc
@@ -94,9 +94,8 @@ Status CompileGraph(const GraphDef& graph_def, const tf2xla::Config& config,
       xla::ClientLibrary::GetOrCreateCompileOnlyClient(cpu_platform)
           .ValueOrDie();
   xla::Computation computation;
-  TF_RETURN_IF_ERROR(ConvertGraphDefToXla(graph_def, config, client,
-                                          &computation,
-                                          &compile_result->has_context_arg));
+  TF_RETURN_IF_ERROR(
+      ConvertGraphDefToXla(graph_def, config, client, &computation));
   if (!flags.out_session_module.empty()) {
     TF_ASSIGN_OR_RETURN(std::unique_ptr<xla::SessionModule> module,
                         computation.Snapshot());
diff --git a/tensorflow/compiler/aot/compile.h b/tensorflow/compiler/aot/compile.h
index 965c296081..e03c5b1aa7 100644
--- a/tensorflow/compiler/aot/compile.h
+++ b/tensorflow/compiler/aot/compile.h
@@ -34,7 +34,6 @@ struct CompileResult {
   // Contains object file and meta-info.
   std::unique_ptr<xla::cpu::CpuAotCompilationResult> aot;
   xla::ProgramShape program_shape;  // Static shape of args and results.
-  bool has_context_arg = false;     // Is last arg XlaLocalRuntimeContext?
   string entry_point;               // Name of generated function.
   int pointer_size = 0;             // Size of a pointer in bytes.
 };
diff --git a/tensorflow/compiler/aot/runtime_test.cc b/tensorflow/compiler/aot/runtime_test.cc
index ac79c278c1..6d603a02eb 100644
--- a/tensorflow/compiler/aot/runtime_test.cc
+++ b/tensorflow/compiler/aot/runtime_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/compiler/aot/runtime.h"
 
-#include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/platform/test.h"
 
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index 6c385af3b3..542451ed2d 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -267,7 +267,6 @@ def tf_library(name, graph, config,
         srcs=[test_file],
         deps=[
             ":" + name,
-            "@org_tensorflow//tensorflow/compiler/tf2xla:xla_local_runtime_context",
             "@org_tensorflow//tensorflow/compiler/aot:runtime",
             "@org_tensorflow//tensorflow/compiler/aot:tf_library_test_main",
             "@org_tensorflow//tensorflow/compiler/xla:executable_run_options",
@@ -313,7 +312,6 @@ def tf_library(name, graph, config,
         linkopts = if_android(["-pie", "-s"]),
         deps=[
             ":" + name,
-            "@org_tensorflow//tensorflow/compiler/tf2xla:xla_local_runtime_context",
             "@org_tensorflow//tensorflow/compiler/aot:benchmark",
             "@org_tensorflow//tensorflow/compiler/aot:runtime",
             "@org_tensorflow//tensorflow/compiler/xla:executable_run_options",
diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD
index 459a582e15..9bea566331 100644
--- a/tensorflow/compiler/jit/kernels/BUILD
+++ b/tensorflow/compiler/jit/kernels/BUILD
@@ -16,7 +16,6 @@ cc_library(
         "//tensorflow/compiler/jit:xla_device",
         "//tensorflow/compiler/tf2xla:common",
         "//tensorflow/compiler/tf2xla:xla_compiler",
-        "//tensorflow/compiler/tf2xla:xla_local_runtime_context",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla/client:client_library",
         "//tensorflow/compiler/xla/client:local_client",
diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc
index e481796d9e..54f60fae5e 100644
--- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc
+++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include "tensorflow/compiler/jit/xla_device.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
-#include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/client_library.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
@@ -267,7 +266,6 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) {
 
   // Builds an XLA allocator for the device.
   XlaAllocator xla_allocator(client->platform(), ctx);
-  XlaLocalRuntimeContext local_runtime_context;
 
   std::unique_ptr<xla::ShapedBuffer> output;
   // Build xla::ShapedBuffers that point directly to the Tensor buffers.
@@ -300,18 +298,6 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) {
     OP_REQUIRES_OK(ctx, xla_allocator.RegisterArgument(t));
   }
 
-  // Make the final parameter point at local_runtime_context.
-  if (kernel->requires_runtime_context) {
-    gpu::DeviceMemoryBase local_runtime_context_dmem(
-        &local_runtime_context, sizeof(local_runtime_context));
-    arg_buffers.push_back(
-        xla::ShapedBuffer::MakeArrayShapedBuffer(
-            xla::ShapeUtil::MakeOpaqueShape(), client->platform(),
-            client->default_device_ordinal(), local_runtime_context_dmem)
-            .ConsumeValueOrDie());
-    arg_ptrs.push_back(arg_buffers.back().get());
-  }
-
   // Execute the computation.
   VLOG(2) << "Executing computation.";
   xla::ExecutableRunOptions run_options;
@@ -323,12 +309,6 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) {
   auto run_result = executable->Run(arg_ptrs, run_options);
   OP_REQUIRES(ctx, run_result.ok(), run_result.status());
 
-  if (local_runtime_context.error) {
-    ctx->CtxFailure(errors::InvalidArgument("Compiled kernel returned error: ",
-                                            local_runtime_context.error_msg));
-    return;
-  }
-
   output = run_result.ConsumeValueOrDie()->release();
   auto elapsed = env->NowMicros() - start_time;
   VLOG(2) << "Elapsed time: " << elapsed << "us";
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc
index bc2eccd277..3717c2cc24 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.cc
+++ b/tensorflow/compiler/jit/xla_compilation_cache.cc
@@ -214,17 +214,12 @@ Status XlaCompilationCache::BuildExecutable(
     const XlaCompiler::CompilationResult& result,
     std::unique_ptr<xla::LocalExecutable>* executable) {
   VLOG(2) << "Compiling to local executable";
-  xla::Shape opaque_shape = xla::ShapeUtil::MakeOpaqueShape();
 
   std::vector<const xla::Shape*> argument_layouts(
       result.xla_input_shapes.size());
   for (int i = 0; i < result.xla_input_shapes.size(); ++i) {
     argument_layouts[i] = &result.xla_input_shapes[i];
   }
-  if (result.requires_runtime_context) {
-    // The final arg is the XlaLocalRuntimeContext*.
-    argument_layouts.push_back(&opaque_shape);
-  }
   xla::ExecutableBuildOptions build_options;
   build_options.set_device_ordinal(client_->default_device_ordinal());
   build_options.set_result_layout(result.xla_output_shape);
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index 4cb2b99f9f..dc6d826a3a 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -67,7 +67,6 @@ cc_library(
         # Keep dependencies to a minimum here; this library is used in every AOT
         # binary produced by tfcompile.
         "//tensorflow/compiler/aot:runtime",
-        "//tensorflow/compiler/tf2xla:xla_local_runtime_context",
         "//tensorflow/compiler/xla:executable_run_options",
         "//tensorflow/core:framework_lite",
     ],
@@ -358,13 +357,6 @@ tf_cc_test(
     ],
 )
 
-cc_library(
-    name = "xla_local_runtime_context",
-    hdrs = ["xla_local_runtime_context.h"],
-    visibility = ["//visibility:public"],
-    deps = ["//tensorflow/core:framework_lite"],
-)
-
 cc_library(
     name = "dump_graph",
     srcs = [
diff --git a/tensorflow/compiler/tf2xla/tf2xla.cc b/tensorflow/compiler/tf2xla/tf2xla.cc
index a14c93a2b9..906f229043 100644
--- a/tensorflow/compiler/tf2xla/tf2xla.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla.cc
@@ -253,8 +253,7 @@ Status CreateXlaArgs(const Graph& graph,
 // Converts the TensorFlow graph into an XLA computation, by executing the
 // graph symbolically, with each op building up the XLA HLO.
 Status ConvertGraphToXla(std::unique_ptr<Graph> graph, xla::Client* client,
-                         xla::Computation* computation,
-                         bool* requires_runtime_context) {
+                         xla::Computation* computation) {
   XlaOpRegistry::RegisterCompilationKernels();
   for (Node* node : graph->nodes()) {
     node->set_assigned_device_name(
@@ -277,7 +276,6 @@ Status ConvertGraphToXla(std::unique_ptr<Graph> graph, xla::Client* client,
   TF_RETURN_IF_ERROR(compiler.CompileGraph(XlaCompiler::CompileOptions(),
                                            "tfcompile", std::move(graph),
                                            xla_args, &result));
-  *requires_runtime_context = result.requires_runtime_context;
   *computation = std::move(*result.computation);
 
   int num_const_results = 0;
@@ -352,12 +350,10 @@ Status InitGraph(const GraphDef& graph_def, const tf2xla::Config& config,
 
 Status ConvertGraphDefToXla(const GraphDef& graph_def,
                             const tf2xla::Config& config, xla::Client* client,
-                            xla::Computation* computation,
-                            bool* requires_runtime_context) {
+                            xla::Computation* computation) {
   std::unique_ptr<Graph> graph;
   TF_RETURN_IF_ERROR(InitGraph(graph_def, config, &graph));
-  TF_RETURN_IF_ERROR(ConvertGraphToXla(std::move(graph), client, computation,
-                                       requires_runtime_context));
+  TF_RETURN_IF_ERROR(ConvertGraphToXla(std::move(graph), client, computation));
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/tf2xla/tf2xla.h b/tensorflow/compiler/tf2xla/tf2xla.h
index ab99beebf7..473c431b12 100644
--- a/tensorflow/compiler/tf2xla/tf2xla.h
+++ b/tensorflow/compiler/tf2xla/tf2xla.h
@@ -30,13 +30,9 @@ namespace tensorflow {
 //
 // The computation is built in the context of the given `client`, which may
 // subsequently be used to compile or execute the computation.
-//
-// If `requires_runtime_context` is filled with true, this indicates the last
-// argument of the computation is XlaLocalRuntimeContext*.
 Status ConvertGraphDefToXla(const GraphDef& graph_def,
                             const tf2xla::Config& config, xla::Client* client,
-                            xla::Computation* computation,
-                            bool* requires_runtime_context);
+                            xla::Computation* computation);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/compiler/tf2xla/tf2xla_test.cc b/tensorflow/compiler/tf2xla/tf2xla_test.cc
index ecd15652fe..a9978e697b 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_test.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_test.cc
@@ -70,10 +70,7 @@ TEST(ConvertGraphDefToXla, Sum) {
 
   xla::LocalClient* client = xla::ClientLibrary::LocalClientOrDie();
   xla::Computation computation;
-  bool requires_runtime_context;
-  TF_EXPECT_OK(ConvertGraphDefToXla(graph_def, config, client, &computation,
-                                    &requires_runtime_context));
-  ASSERT_FALSE(requires_runtime_context);
+  TF_EXPECT_OK(ConvertGraphDefToXla(graph_def, config, client, &computation));
 
   // Set up arguments.
   auto x_literal = xla::Literal::CreateR0<int32>(10);
diff --git a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc
index 43d0e17c2c..79da701fd2 100644
--- a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc
@@ -40,11 +40,6 @@ XlaCompiledCpuFunction::XlaCompiledCpuFunction(const StaticData& static_data,
       static_data.temp_sizes, static_data.num_temps, temps_,
       /*annotate_initialized=*/true);
 
-  // The runtime context is always the last arg, if it is required.
-  if (static_data.requires_runtime_context) {
-    args_[static_data.num_args - 1] = &context_;
-  }
-
   // If Hlo profiling is enabled the generated code expects an appropriately
   // sized buffer to be passed in as the last argument.  If Hlo profiling is
   // disabled the last function argument is still present in the function
diff --git a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h
index 3c4314d498..e0ae3ed9a8 100644
--- a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h
+++ b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h
@@ -19,7 +19,6 @@ limitations under the License.
 #include <cassert>
 #include <string>
 
-#include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h"
 #include "tensorflow/compiler/xla/executable_run_options.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -70,9 +69,6 @@ class XlaCompiledCpuFunction {
     // The 0-based index of the result tuple, in the temp buffers.
     size_t result_index = 0;
 
-    // Is the final arg XlaLocalRuntimeContext?
-    bool requires_runtime_context = false;
-
     // [Optional] Arrays of arg and result names. These are arrays of C-style
     // strings, where the array is terminated by nullptr.
     const char** arg_names = nullptr;
@@ -111,21 +107,22 @@ class XlaCompiledCpuFunction {
   // Sets the intra-op thread pool used to run individual ops concurrently.
   void set_thread_pool(const Eigen::ThreadPoolDevice* pool) {
     run_options_.set_intra_op_thread_pool(pool);
-    context_.thread_pool = pool;
   }
 
   // Runs the computation, with inputs read from arg buffers, and outputs
   // written to result buffers. Returns true on success and false on failure.
   bool Run() {
-    context_.error = false;
-    context_.error_msg.clear();
     raw_function_(temps_[result_index_], &run_options_,
                   const_cast<const void**>(args_), temps_, profile_counters_);
-    return !context_.error;
+    return true;
   }
 
   // Returns the error message from the previous failed Run call.
-  const string& error_msg() const { return context_.error_msg; }
+  //
+  // TODO(fschneider): For now this always returns an empty string because there
+  // is no support for error reporting in XLA. Remove this once all callers are
+  // updated.
+  string error_msg() const { return {}; }
 
   // ------------------------------
   // Arg methods for managing input buffers. Buffers are in row-major order.
@@ -148,10 +145,6 @@ class XlaCompiledCpuFunction {
   // tensorflow::tfcompile::runtime::kAlign. If possible, use the functions in
   // tensorflow/compiler/aot/runtime.h to ensure correct alignment.
   //
-  // If StaticData.requires_runtime_context==true, the final argument is an
-  // XlaLocalRuntimeContext, which is managed internally by this class, and
-  // should not be changed.
-  //
   // Aliasing of argument and result buffers is not allowed, and results in
   // undefined behavior.
   void set_arg_data(size_t index, void* data) { args_[index] = data; }
@@ -236,7 +229,6 @@ class XlaCompiledCpuFunction {
 
   // Options and context passed to the compiled function.
   xla::ExecutableRunOptions run_options_;
-  tensorflow::XlaLocalRuntimeContext context_;
 
   // Optional metadata.
   const char** arg_names_ = nullptr;
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index 48cebdf74c..4c01e67321 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@@ -543,8 +543,6 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options,
                      options.resolve_compile_time_constants);
   core::ScopedUnref context_unref(context);
 
-  result->tuple_arg = options.use_tuple_arg;
-
   std::vector<XlaExpression> arg_expressions;
   std::vector<int> arg_cores;
   TF_RETURN_IF_ERROR(BuildArguments(
@@ -564,11 +562,6 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options,
       result->computation.get(), &num_computation_outputs,
       &num_nonconst_outputs, &result->resource_updates));
 
-  result->requires_runtime_context = context->has_context_parameter();
-
-  // Tuple arguments and runtime context parameters are incompatible.
-  TF_RET_CHECK(!(options.use_tuple_arg && result->requires_runtime_context));
-
   VLOG(2) << "Outputs: total: " << context->retvals().size()
           << " nonconstant: " << num_nonconst_outputs;
   result->outputs.resize(context->retvals().size());
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h
index ac7d4cfb12..380e24e96b 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.h
+++ b/tensorflow/compiler/tf2xla/xla_compiler.h
@@ -54,8 +54,6 @@ namespace tensorflow {
 //   +---------------------+-----------------------------------------+
 // Within each block, the arguments are arranged by the _Arg index from which
 // they were derived.
-// If `Options::requires_runtime_context` is true, then an additional runtime
-// context argument is passed as a final argument.
 //
 // The run-time outputs of the XLA computation are arranged in the following
 // order:
@@ -191,16 +189,9 @@ class XlaCompiler {
     // original arguments, and are not necessarily in the same order.)
     std::vector<int> input_mapping;
 
-    // Does the computation require the local runtime context to be passed as
-    // the last argument?
-    bool requires_runtime_context = false;
-
     // Input shapes of the computation.
     std::vector<xla::Shape> xla_input_shapes;
 
-    // Should the arguments be packed into a single tuple?
-    bool tuple_arg;
-
     // Output shape in XLA format. The output shape is always a tuple.
     xla::Shape xla_output_shape;
 
@@ -232,8 +223,7 @@ class XlaCompiler {
     int graph_def_version = TF_GRAPH_DEF_VERSION;
 
     // If 'allow_cpu_custom_calls' is true, kernels may make use of CustomCall()
-    // for CPU; additionally, an optional XlaLocalRuntimeContext* may be passed
-    // to the computation.
+    // for CPU.
     bool allow_cpu_custom_calls = false;
 
     // If not nullptr, populate_resource_manager is called with the
diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc
index 78e770c62b..5d19dd353f 100644
--- a/tensorflow/compiler/tf2xla/xla_context.cc
+++ b/tensorflow/compiler/tf2xla/xla_context.cc
@@ -70,24 +70,6 @@ XlaContext::XlaContext(XlaCompiler* compiler, xla::ComputationBuilder* builder,
       allow_cpu_custom_calls_(allow_cpu_custom_calls),
       resolve_compile_time_constants_(resolve_compile_time_constants) {}
 
-const xla::ComputationDataHandle&
-XlaContext::GetOrCreateRuntimeContextParameter() {
-  CHECK(allow_cpu_custom_calls_);
-  if (has_context_parameter_) return context_parameter_;
-  has_context_parameter_ = true;
-
-  // Allocate the next available parameter for the context parameter.
-  int num_parameters = 0;
-  for (const XlaExpression& arg : args_) {
-    if (!arg.has_constant_value()) {
-      ++num_parameters;
-    }
-  }
-  context_parameter_ = builder_->Parameter(
-      num_parameters, xla::ShapeUtil::MakeOpaqueShape(), "tf_context");
-  return context_parameter_;
-}
-
 string XlaContext::DebugString() { return "TLA JIT context"; }
 
 // This is called by the Retval Op to associate a computed value
diff --git a/tensorflow/compiler/tf2xla/xla_context.h b/tensorflow/compiler/tf2xla/xla_context.h
index 55d2995987..ebd758d154 100644
--- a/tensorflow/compiler/tf2xla/xla_context.h
+++ b/tensorflow/compiler/tf2xla/xla_context.h
@@ -56,15 +56,10 @@ class XlaContext : public ResourceBase {
   xla::ComputationBuilder* builder();
 
   bool allow_cpu_custom_calls() const { return allow_cpu_custom_calls_; }
-  bool has_context_parameter() const { return has_context_parameter_; }
 
   const std::vector<XlaExpression>& args() const { return args_; }
   void set_args(std::vector<XlaExpression> args);
 
-  // Get the runtime context parameter, adding one if it does not already exist.
-  // Dies if not compiling a local executable.
-  const xla::ComputationDataHandle& GetOrCreateRuntimeContextParameter();
-
   const std::vector<XlaExpression>& retvals() { return retvals_; }
 
   // This is called by the Retval Op to associate a computed value
@@ -124,13 +119,6 @@ class XlaContext : public ResourceBase {
   // run-time computation outptus.
   const bool resolve_compile_time_constants_;
 
-  // When 'has_context_parameter_' is true, this is the computation handle
-  // for an additional final parameter to the computation, through which will be
-  // passed a XlaLocalRuntimeContext* at runtime. Created on demand by
-  // GetOrCreateRuntimeContextParameter().
-  bool has_context_parameter_ = false;
-  xla::ComputationDataHandle context_parameter_;
-
   // Arguments to the Tensorflow graph, indexed by _Arg index.
   // Includes both compile-time constant arguments and runtime parameters.
   std::vector<XlaExpression> args_;
diff --git a/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc b/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc
index f727f20464..584417bc72 100644
--- a/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc
+++ b/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc
@@ -37,27 +37,14 @@ namespace {
 
 // Returns a vector of positional argument buffer sizes.
 xla::StatusOr<std::vector<intptr_t>> ComputeArgSizes(
-    const xla::ProgramShape& program_shape, bool requires_runtime_context) {
+    const xla::ProgramShape& program_shape) {
   std::vector<intptr_t> arg_sizes;
   const size_t num_args = program_shape.parameters_size();
   arg_sizes.reserve(num_args);
   for (int i = 0; i < num_args; ++i) {
     const xla::Shape& arg_shape = program_shape.parameters(i);
-    if (i == num_args - 1 && requires_runtime_context) {
-      // If the compiled function needs an XlaLocalRuntimeContext* arg, it's
-      // always last, and must be represented as an opaque type.
-      const xla::PrimitiveType type = arg_shape.element_type();
-      if (type != xla::OPAQUE) {
-        return errors::InvalidArgument(
-            "expected final context arg to be opaque, but got type: ",
-            xla::PrimitiveType_Name(type), ", from program shape: ",
-            xla::ShapeUtil::HumanString(program_shape));
-      }
-      arg_sizes.push_back(-1);
-    } else {
-      constexpr size_t kPointerSize = sizeof(void*);
-      arg_sizes.push_back(xla::ShapeUtil::ByteSizeOf(arg_shape, kPointerSize));
-    }
+    constexpr size_t kPointerSize = sizeof(void*);
+    arg_sizes.push_back(xla::ShapeUtil::ByteSizeOf(arg_shape, kPointerSize));
   }
   return std::move(arg_sizes);
 }
@@ -129,9 +116,8 @@ XlaJitCompiledCpuFunction::Compile(
   TF_ASSIGN_OR_RETURN(xla::LocalClient * client,
                       xla::ClientLibrary::GetOrCreateLocalClient());
   xla::Computation computation;
-  bool requires_runtime_context;
-  TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToXla(
-      graph_def, config, client, &computation, &requires_runtime_context));
+  TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToXla(graph_def, config, client,
+                                                      &computation));
 
   // Get and verify the program shape.
   TF_ASSIGN_OR_RETURN(std::unique_ptr<xla::ProgramShape> program_shape,
@@ -167,9 +153,8 @@ XlaJitCompiledCpuFunction::Compile(
       cpu_executable->buffer_assignment();
 
   // Compute buffer sizes and the result index, needed to run the raw function.
-  TF_ASSIGN_OR_RETURN(
-      std::vector<intptr_t> arg_sizes,
-      ComputeArgSizes(*program_shape, requires_runtime_context));
+  TF_ASSIGN_OR_RETURN(std::vector<intptr_t> arg_sizes,
+                      ComputeArgSizes(*program_shape));
   TF_ASSIGN_OR_RETURN(std::vector<intptr_t> temp_sizes,
                       ComputeTempSizes(buffer_assignment));
   TF_ASSIGN_OR_RETURN(size_t result_index,
@@ -188,7 +173,6 @@ XlaJitCompiledCpuFunction::Compile(
   jit->static_data_.temp_sizes = jit->temp_sizes_.data();
   jit->static_data_.num_temps = jit->temp_sizes_.size();
   jit->static_data_.result_index = result_index;
-  jit->static_data_.requires_runtime_context = requires_runtime_context;
   // Optional metadata is collected and set below.
   CollectNames(config.feed(), &jit->nonempty_arg_names_, &jit->arg_names_);
   CollectNames(config.fetch(), &jit->nonempty_result_names_,
diff --git a/tensorflow/compiler/tf2xla/xla_local_runtime_context.h b/tensorflow/compiler/tf2xla/xla_local_runtime_context.h
deleted file mode 100644
index dca420d6ee..0000000000
--- a/tensorflow/compiler/tf2xla/xla_local_runtime_context.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_LOCAL_RUNTIME_CONTEXT_H_
-#define TENSORFLOW_COMPILER_TF2XLA_XLA_LOCAL_RUNTIME_CONTEXT_H_
-
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/platform/types.h"
-
-// Forward-declare the ThreadPoolDevice so that it can be ignored unless it's
-// actually used.  E.g. some ahead-of-time compiled computations don't need a
-// thread pool.
-namespace Eigen {
-struct ThreadPoolDevice;
-}
-
-namespace tensorflow {
-
-// An instance of this class is passed to each call from tensorflow into a
-// compiled XLA computation. See xla_launch_ops.cc.
-struct XlaLocalRuntimeContext {
- public:
-  XlaLocalRuntimeContext() {}
-
-  // Kernels implemented using custom call ops set this if they encounter an
-  // error. The error is checked after the entire XLA computation is
-  // complete.
-  //
-  // error+error_msg are used instead of Status to reduce the binary size
-  // overhead for ahead-of-time compiled binaries.
-  bool error = false;
-  string error_msg;
-
-  // Kernels that need a thread pool can get it from here.
-  const Eigen::ThreadPoolDevice* thread_pool = nullptr;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(XlaLocalRuntimeContext);
-};
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_COMPILER_TF2XLA_XLA_LOCAL_RUNTIME_CONTEXT_H_
-- 
GitLab


From 8167f3587f3953e191dffca388cbdd4a837aa231 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 1 Dec 2017 11:42:36 -0800
Subject: [PATCH 0518/1225] Automated g4 rollback of changelist 177614538

PiperOrigin-RevId: 177619402
---
 .../core/platform/cloud/curl_http_request.cc  |  29 +-
 .../core/platform/cloud/curl_http_request.h   |  12 -
 .../core/platform/cloud/gcs_dns_cache_test.cc |   5 -
 .../core/platform/cloud/gcs_file_system.cc    |  65 +-
 .../core/platform/cloud/gcs_file_system.h     |  42 +-
 .../platform/cloud/gcs_file_system_test.cc    | 649 +++++++-----------
 tensorflow/core/platform/cloud/http_request.h |  10 -
 .../core/platform/cloud/http_request_fake.h   |  23 +-
 8 files changed, 263 insertions(+), 572 deletions(-)

diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc
index f7fbfe971e..4581a0870a 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request.cc
@@ -29,6 +29,16 @@ namespace {
 // Set to 1 to enable verbose debug output from curl.
 constexpr uint64 kVerboseOutput = 0;
 
+// Timeout for the whole request. Set only to prevent hanging indefinitely.
+constexpr uint32 kRequestTimeoutSeconds = 3600;  // 1 hour
+
+// Timeout for the connection phase.
+constexpr uint32 kConnectTimeoutSeconds = 120;  // 2 minutes
+
+// The maximum period of request inactivity, after which the request
+// is terminated.
+constexpr uint64 kInactivityTimeoutSeconds = 60;  // 1 minute
+
 // Proxy to the real libcurl implementation.
 class LibCurlProxy : public LibCurl {
  public:
@@ -151,6 +161,9 @@ Status CurlHttpRequest::Init() {
       strings::StrCat("TensorFlow/", TF_VERSION_STRING).c_str());
   // Do not use signals for timeouts - does not work in multi-threaded programs.
   libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L);
+  libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, kRequestTimeoutSeconds);
+  libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT,
+                             kConnectTimeoutSeconds);
   libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION,
                              CURL_HTTP_VERSION_2_0);
 
@@ -323,16 +336,6 @@ Status CurlHttpRequest::SetResultBuffer(std::vector<char>* out_buffer) {
   return Status::OK();
 }
 
-Status CurlHttpRequest::SetTimeouts(uint32 connection, uint32 inactivity,
-                                    uint32 total) {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
-  connect_timeout_secs_ = connection;
-  inactivity_timeout_secs_ = inactivity;
-  request_timeout_secs_ = total;
-  return Status::OK();
-}
-
 size_t CurlHttpRequest::WriteCallback(const void* ptr, size_t size,
                                       size_t nmemb, void* this_object) {
   CHECK(ptr);
@@ -396,10 +399,6 @@ Status CurlHttpRequest::Send() {
   libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION,
                              &CurlHttpRequest::HeaderCallback);
 
-  libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, request_timeout_secs_);
-  libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT,
-                             connect_timeout_secs_);
-
   char error_buffer[CURL_ERROR_SIZE] = {0};
   libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer);
 
@@ -530,7 +529,7 @@ int CurlHttpRequest::ProgressCallback(void* this_object, curl_off_t dltotal,
     return 0;
   }
 
-  if (now - that->last_progress_timestamp_ > that->inactivity_timeout_secs_) {
+  if (now - that->last_progress_timestamp_ > kInactivityTimeoutSeconds) {
     LOG(ERROR) << "The transmission  of request " << this_object
                << " (URI: " << that->uri_ << ") has been stuck at "
                << current_progress << " of " << dltotal + ultotal
diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h
index cf249450df..9e5ae61016 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.h
+++ b/tensorflow/core/platform/cloud/curl_http_request.h
@@ -120,9 +120,6 @@ class CurlHttpRequest : public HttpRequest {
   // Url encodes str and returns a new string.
   string EscapeString(const string& str) override;
 
-  Status SetTimeouts(uint32 connection, uint32 inactivity,
-                     uint32 total) override;
-
  private:
   /// A write callback in the form which can be accepted by libcurl.
   static size_t WriteCallback(const void* ptr, size_t size, size_t nmemb,
@@ -165,15 +162,6 @@ class CurlHttpRequest : public HttpRequest {
   // The last progress in terms of bytes transmitted.
   curl_off_t last_progress_bytes_ = 0;
 
-  // The maximum period of request inactivity.
-  uint32 inactivity_timeout_secs_ = 60;  // 1 minute
-
-  // Timeout for the connection phase.
-  uint32 connect_timeout_secs_ = 120;  // 2 minutes
-
-  // Tiemout for the whole request. Set only to prevent hanging indefinitely.
-  uint32 request_timeout_secs_ = 3600;  // 1 hour
-
   // Members to enforce the usage flow.
   bool is_initialized_ = false;
   bool is_uri_set_ = false;
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
index 266879ddf5..8d1a108f30 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
@@ -64,11 +64,6 @@ class TestHttpRequest : public HttpRequest {
   Status Send() override { return Status::OK(); }
   string EscapeString(const string& str) override { return ""; }
 
-  Status SetTimeouts(uint32 connection, uint32 inactivity,
-                     uint32 total) override {
-    return Status::OK();
-  }
-
   std::map<string, string> resolve_overrides_;
 };
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index ab82643ad5..45e9b05092 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -94,20 +94,6 @@ const FileStatistics DIRECTORY_STAT(0, 0, true);
 // variable to a positive integer describing the frequency used to refresh the
 // userspace DNS cache.
 constexpr char kResolveCacheSecs[] = "GCS_RESOLVE_REFRESH_SECS";
-// The environment variable to configure the http request's connection timeout.
-constexpr char kRequestConnectionTimeout[] =
-    "GCS_REQUEST_CONNECTION_TIMEOUT_SECS";
-// The environment varaible to configure the http request's idle timeout.
-constexpr char kRequestIdleTimeout[] = "GCS_REQUEST_IDLE_TIMEOUT_SECS";
-// The environment variable to configure the overall request timeout for
-// metadata requests.
-constexpr char kMetadataRequestTimeout[] = "GCS_METADATA_REQUEST_TIMEOUT_SECS";
-// The environment variable to configure the overall request timeout for
-// block reads requests.
-constexpr char kReadRequestTimeout[] = "GCS_READ_REQUEST_TIMEOUT_SECS";
-// The environment variable to configure the overall request timeout for
-// upload requests.
-constexpr char kWriteRequestTimeout[] = "GCS_WRITE_REQUEST_TIMEOUT_SECS";
 
 Status GetTmpFilename(string* filename) {
   if (!filename) {
@@ -297,14 +283,12 @@ class GcsWritableFile : public WritableFile {
   GcsWritableFile(const string& bucket, const string& object,
                   AuthProvider* auth_provider,
                   HttpRequest::Factory* http_request_factory,
-                  GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
                   int64 initial_retry_delay_usec)
       : bucket_(bucket),
         object_(object),
         auth_provider_(auth_provider),
         http_request_factory_(http_request_factory),
-        timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
@@ -323,14 +307,12 @@ class GcsWritableFile : public WritableFile {
                   AuthProvider* auth_provider,
                   const string& tmp_content_filename,
                   HttpRequest::Factory* http_request_factory,
-                  GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
                   int64 initial_retry_delay_usec)
       : bucket_(bucket),
         object_(object),
         auth_provider_(auth_provider),
         http_request_factory_(http_request_factory),
-        timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
@@ -464,8 +446,6 @@ class GcsWritableFile : public WritableFile {
                                           std::to_string(file_size)));
     TF_RETURN_IF_ERROR(request->SetPostEmptyBody());
     TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
-    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_->connect, timeouts_->idle,
-                                            timeouts_->metadata));
     TF_RETURN_WITH_CONTEXT_IF_ERROR(
         request->Send(), " when initiating an upload to ", GetGcsPath());
     *session_uri = request->GetResponseHeader("Location");
@@ -497,8 +477,6 @@ class GcsWritableFile : public WritableFile {
     TF_RETURN_IF_ERROR(request->Init());
     TF_RETURN_IF_ERROR(request->SetUri(session_uri));
     TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_->connect, timeouts_->idle,
-                                            timeouts_->metadata));
     TF_RETURN_IF_ERROR(request->AddHeader(
         "Content-Range", strings::StrCat("bytes */", file_size)));
     TF_RETURN_IF_ERROR(request->SetPutEmptyBody());
@@ -553,9 +531,6 @@ class GcsWritableFile : public WritableFile {
           "Content-Range", strings::StrCat("bytes ", start_offset, "-",
                                            file_size - 1, "/", file_size)));
     }
-    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_->connect, timeouts_->idle,
-                                            timeouts_->write));
-
     TF_RETURN_IF_ERROR(
         request->SetPutFromFile(tmp_content_filename_, start_offset));
     TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when uploading ",
@@ -575,7 +550,6 @@ class GcsWritableFile : public WritableFile {
   string tmp_content_filename_;
   std::ofstream outfile_;
   HttpRequest::Factory* http_request_factory_;
-  GcsFileSystem::TimeoutConfig* timeouts_;
   std::function<void()> file_cache_erase_;
   bool sync_needed_;  // whether there is buffered data that needs to be synced
   int64 initial_retry_delay_usec_;
@@ -661,25 +635,6 @@ GcsFileSystem::GcsFileSystem()
                 &resolve_frequency_secs)) {
     dns_cache_.reset(new GcsDnsCache(resolve_frequency_secs));
   }
-  // Apply the overrides for request timeouts
-  uint32 timeout_value;
-  if (GetEnvVar(kRequestConnectionTimeout, strings::safe_strtou32,
-                &timeout_value)) {
-    timeouts_.connect = timeout_value;
-  }
-  if (GetEnvVar(kRequestIdleTimeout, strings::safe_strtou32, &timeout_value)) {
-    timeouts_.idle = timeout_value;
-  }
-  if (GetEnvVar(kMetadataRequestTimeout, strings::safe_strtou32,
-                &timeout_value)) {
-    timeouts_.metadata = timeout_value;
-  }
-  if (GetEnvVar(kReadRequestTimeout, strings::safe_strtou32, &timeout_value)) {
-    timeouts_.read = timeout_value;
-  }
-  if (GetEnvVar(kWriteRequestTimeout, strings::safe_strtou32, &timeout_value)) {
-    timeouts_.write = timeout_value;
-  }
 }
 
 GcsFileSystem::GcsFileSystem(
@@ -688,8 +643,7 @@ GcsFileSystem::GcsFileSystem(
     size_t block_size, size_t max_bytes, uint64 max_staleness,
     uint64 stat_cache_max_age, size_t stat_cache_max_entries,
     uint64 matching_paths_cache_max_age,
-    size_t matching_paths_cache_max_entries, int64 initial_retry_delay_usec,
-    TimeoutConfig timeouts)
+    size_t matching_paths_cache_max_entries, int64 initial_retry_delay_usec)
     : auth_provider_(std::move(auth_provider)),
       http_request_factory_(std::move(http_request_factory)),
       file_block_cache_(
@@ -697,7 +651,6 @@ GcsFileSystem::GcsFileSystem(
       stat_cache_(new StatCache(stat_cache_max_age, stat_cache_max_entries)),
       matching_paths_cache_(new MatchingPathsCache(
           matching_paths_cache_max_age, matching_paths_cache_max_entries)),
-      timeouts_(timeouts),
       initial_retry_delay_usec_(initial_retry_delay_usec) {}
 
 Status GcsFileSystem::NewRandomAccessFile(
@@ -736,8 +689,6 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset,
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
   TF_RETURN_IF_ERROR(request->SetRange(offset, offset + n - 1));
   TF_RETURN_IF_ERROR(request->SetResultBuffer(out));
-  TF_RETURN_IF_ERROR(
-      request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.read));
 
   if (dns_cache_) {
     TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
@@ -772,7 +723,7 @@ Status GcsFileSystem::NewWritableFile(const string& fname,
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
       bucket, object, auth_provider_.get(), http_request_factory_.get(),
-      &timeouts_, [this, fname]() { file_block_cache_->RemoveFile(fname); },
+      [this, fname]() { file_block_cache_->RemoveFile(fname); },
       initial_retry_delay_usec_));
   return Status::OK();
 }
@@ -813,7 +764,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname,
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
       bucket, object, auth_provider_.get(), old_content_filename,
-      http_request_factory_.get(), &timeouts_,
+      http_request_factory_.get(),
       [this, fname]() { file_block_cache_->RemoveFile(fname); },
       initial_retry_delay_usec_));
   return Status::OK();
@@ -901,8 +852,6 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket,
             "?fields=size%2Cupdated")));
         TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
         TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
-        TF_RETURN_IF_ERROR(request->SetTimeouts(
-            timeouts_.connect, timeouts_.idle, timeouts_.metadata));
 
         if (dns_cache_) {
           TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
@@ -953,8 +902,6 @@ Status GcsFileSystem::BucketExists(const string& bucket, bool* result) {
   TF_RETURN_IF_ERROR(
       request->SetUri(strings::StrCat(kGcsUriBase, "b/", bucket)));
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-  TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
-                                          timeouts_.metadata));
   const Status status = request->Send();
   switch (status.code()) {
     case errors::Code::OK:
@@ -1086,8 +1033,6 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
     TF_RETURN_IF_ERROR(request->SetUri(uri));
     TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
     TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
-    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
-                                            timeouts_.metadata));
 
     if (dns_cache_) {
       TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
@@ -1212,8 +1157,6 @@ Status GcsFileSystem::DeleteFile(const string& fname) {
   TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
       kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object))));
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-  TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
-                                          timeouts_.metadata));
   TF_RETURN_IF_ERROR(request->SetDeleteRequest());
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when deleting ", fname);
   file_block_cache_->RemoveFile(fname);
@@ -1308,8 +1251,6 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) {
       request->EscapeString(target_object))));
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
   TF_RETURN_IF_ERROR(request->SetPostEmptyBody());
-  TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
-                                          timeouts_.metadata));
   std::vector<char> output_buffer;
   TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when renaming ", src,
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h
index 7cfcebd5c9..4b4853c838 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.h
+++ b/tensorflow/core/platform/cloud/gcs_file_system.h
@@ -35,8 +35,6 @@ namespace tensorflow {
 /// which adds retry logic to GCS operations.
 class GcsFileSystem : public FileSystem {
  public:
-  struct TimeoutConfig;
-
   GcsFileSystem();
   GcsFileSystem(std::unique_ptr<AuthProvider> auth_provider,
                 std::unique_ptr<HttpRequest::Factory> http_request_factory,
@@ -44,7 +42,7 @@ class GcsFileSystem : public FileSystem {
                 uint64 stat_cache_max_age, size_t stat_cache_max_entries,
                 uint64 matching_paths_cache_max_age,
                 size_t matching_paths_cache_max_entries,
-                int64 initial_retry_delay_usec, TimeoutConfig timeouts);
+                int64 initial_retry_delay_usec);
 
   Status NewRandomAccessFile(
       const string& filename,
@@ -89,7 +87,6 @@ class GcsFileSystem : public FileSystem {
   size_t block_size() const { return file_block_cache_->block_size(); }
   size_t max_bytes() const { return file_block_cache_->max_bytes(); }
   uint64 max_staleness() const { return file_block_cache_->max_staleness(); }
-  TimeoutConfig timeouts() const { return timeouts_; }
 
   uint64 stat_cache_max_age() const { return stat_cache_->max_age(); }
   size_t stat_cache_max_entries() const { return stat_cache_->max_entries(); }
@@ -101,41 +98,6 @@ class GcsFileSystem : public FileSystem {
     return matching_paths_cache_->max_entries();
   }
 
-  /// Structure containing the information for timeouts related to accessing the
-  /// GCS APIs.
-  ///
-  /// All values are in seconds.
-  struct TimeoutConfig {
-    // The request connection timeout. If a connection cannot be established
-    // within `connect` seconds, abort the request.
-    uint32 connect = 120;  // 2 minutes
-
-    // The request idle timeout. If a request has seen no activity in `idle`
-    // seconds, abort the request.
-    uint32 idle = 60;  // 1 minute
-
-    // The maximum total time a metadata request can take. If a request has not
-    // completed within `metadata` seconds, the request is aborted.
-    uint32 metadata = 3600;  // 1 hour
-
-    // The maximum total time a block read request can take. If a request has
-    // not completed within `read` seconds, the request is aborted.
-    uint32 read = 3600;  // 1 hour
-
-    // The maximum total time an upload request can take. If a request has not
-    // completed within `write` seconds, the request is aborted.
-    uint32 write = 3600;  // 1 hour
-
-    TimeoutConfig() {}
-    TimeoutConfig(uint32 connect, uint32 idle, uint32 metadata, uint32 read,
-                  uint32 write)
-        : connect(connect),
-          idle(idle),
-          metadata(metadata),
-          read(read),
-          write(write) {}
-  };
-
  private:
   /// \brief Checks if the bucket exists. Returns OK if the check succeeded.
   ///
@@ -188,8 +150,6 @@ class GcsFileSystem : public FileSystem {
   using MatchingPathsCache = ExpiringLRUCache<std::vector<string>>;
   std::unique_ptr<MatchingPathsCache> matching_paths_cache_;
 
-  TimeoutConfig timeouts_;
-
   /// The initial delay for exponential backoffs when retrying failed calls.
   const int64 initial_retry_delay_usec_ = 1000000L;
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 01f4fd8688..7614ec4d7f 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -22,8 +22,6 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-static GcsFileSystem::TimeoutConfig kTestTimeoutConfig(5, 1, 10, 20, 30);
-
 class FakeAuthProvider : public AuthProvider {
  public:
   Status GetToken(string* token) override {
@@ -37,14 +35,12 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-5\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-5\n",
            "012345"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 6-11\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 6-11\n",
            "6789")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -53,7 +49,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -77,14 +73,12 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_differentN) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-2\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-2\n",
            "012"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 3-12\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 3-12\n",
            "3456789")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -93,7 +87,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_differentN) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -122,30 +116,26 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-8\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-8\n",
            "012345678"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 9-17\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 9-17\n",
            "9abcde"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 18-26\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 18-26\n",
            "")});
-  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-                   std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)),
-                   9 /* block size */, 18 /* max bytes */,
-                   0 /* max staleness */, 0 /* stat cache max age */,
-                   0 /* stat cache max entries */,
-                   0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+  GcsFileSystem fs(
+      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+      std::unique_ptr<HttpRequest::Factory>(
+          new FakeHttpRequestFactory(&requests)),
+      9 /* block size */, 18 /* max bytes */, 0 /* max staleness */,
+      0 /* stat cache max age */, 0 /* stat cache max entries */,
+      0 /* matching paths cache max age */,
+      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
 
   char scratch[100];
   StringPiece result;
@@ -201,23 +191,20 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest("Uri: https://storage.googleapis.com/bucket/object\n"
                            "Auth Token: fake_token\n"
-                           "Range: 0-7\n"
-                           "Timeouts: 5 1 20\n",
+                           "Range: 0-7\n",
                            "01234567"),
        new FakeHttpRequest("Uri: https://storage.googleapis.com/bucket/object\n"
                            "Auth Token: fake_token\n"
-                           "Range: 8-15\n"
-                           "Timeouts: 5 1 20\n",
+                           "Range: 8-15\n",
                            "89abcdef")});
-  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-                   std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)),
-                   8 /* block size */, 16 /* max bytes */,
-                   3600 /* max staleness */, 0 /* stat cache max age */,
-                   0 /* stat cache max entries */,
-                   0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+  GcsFileSystem fs(
+      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+      std::unique_ptr<HttpRequest::Factory>(
+          new FakeHttpRequestFactory(&requests)),
+      8 /* block size */, 16 /* max bytes */, 3600 /* max staleness */,
+      0 /* stat cache max age */, 0 /* stat cache max entries */,
+      0 /* matching paths cache max age */,
+      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
   char scratch[100];
   StringPiece result;
   // There should only be two HTTP requests issued to GCS even though we iterate
@@ -251,15 +238,14 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
 
 TEST(GcsFileSystemTest, NewRandomAccessFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-                   std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)),
-                   0 /* read ahead bytes */, 0 /* max bytes */,
-                   0 /* max staleness */, 0 /* stat cache max age */,
-                   0 /* stat cache max entries */,
-                   0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+  GcsFileSystem fs(
+      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+      std::unique_ptr<HttpRequest::Factory>(
+          new FakeHttpRequestFactory(&requests)),
+      0 /* read ahead bytes */, 0 /* max bytes */, 0 /* max staleness */,
+      0 /* stat cache max age */, 0 /* stat cache max entries */,
+      0 /* matching paths cache max age */,
+      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
 
   std::unique_ptr<RandomAccessFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -271,28 +257,24 @@ TEST(GcsFileSystemTest, NewWritableFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-7\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fwriteable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-7\n",
            "01234567")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -301,7 +283,7 @@ TEST(GcsFileSystemTest, NewWritableFile) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   // Read from the file first, to fill the block cache.
   std::unique_ptr<RandomAccessFile> rfile;
@@ -333,39 +315,33 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
                            "", errors::FailedPrecondition("308"), nullptr,
-                           {{"Range", "0-10"}}, 308, {}),
+                           {{"Range", "0-10"}}, 308),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 11-16/17\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: ntent2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
                            "", errors::FailedPrecondition("308"), nullptr,
-                           {{"Range", "bytes=0-12"}}, 308, {}),
+                           {{"Range", "bytes=0-12"}}, 308),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 13-16/17\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: ent2\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -375,7 +351,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -393,44 +369,38 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceedsOnGetStatus) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-7\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fwriteable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
-                           "", Status::OK(), nullptr, {}, 201, {}),
+                           "", Status::OK(), nullptr, {}, 201),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-7\n",
            "01234567")});
-  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-                   std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)),
-                   8 /* block size */, 8 /* max bytes */,
-                   3600 /* max staleness */, 0 /* stat cache max age */,
-                   0 /* stat cache max entries */,
-                   0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+  GcsFileSystem fs(
+      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+      std::unique_ptr<HttpRequest::Factory>(
+          new FakeHttpRequestFactory(&requests)),
+      8 /* block size */, 8 /* max bytes */, 3600 /* max staleness */,
+      0 /* stat cache max age */, 0 /* stat cache max entries */,
+      0 /* matching paths cache max age */,
+      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
   // Pull the file's first block into the cache. This will trigger the first
   // HTTP request to GCS.
   std::unique_ptr<RandomAccessFile> rfile;
@@ -464,29 +434,25 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503)});
   for (int i = 0; i < 10; i++) {
     requests.emplace_back(new FakeHttpRequest(
         "Uri: https://custom/upload/location\n"
         "Auth Token: fake_token\n"
-        "Timeouts: 5 1 10\n"
         "Header Content-Range: bytes */17\n"
         "Put: yes\n",
         "", errors::FailedPrecondition("important HTTP error 308"), nullptr,
-        {{"Range", "0-10"}}, 308, {}));
+        {{"Range", "0-10"}}, 308));
     requests.emplace_back(new FakeHttpRequest(
         "Uri: https://custom/upload/location\n"
         "Auth Token: fake_token\n"
         "Header Content-Range: bytes 11-16/17\n"
-        "Timeouts: 5 1 30\n"
         "Put body: ntent2\n",
         "", errors::Unavailable("important HTTP error 503"), 503));
   }
@@ -497,14 +463,12 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
       "uploadType=resumable&name=path%2Fwriteable.txt\n"
       "Auth Token: fake_token\n"
       "Header X-Upload-Content-Length: 17\n"
-      "Post: yes\n"
-      "Timeouts: 5 1 10\n",
+      "Post: yes\n",
       "", {{"Location", "https://custom/upload/location"}}));
   requests.emplace_back(
       new FakeHttpRequest("Uri: https://custom/upload/location\n"
                           "Auth Token: fake_token\n"
                           "Header Content-Range: bytes 0-16/17\n"
-                          "Timeouts: 5 1 30\n"
                           "Put body: content1,content2\n",
                           ""));
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -514,7 +478,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   2 /* initial retry delay */, kTestTimeoutConfig);
+                   2 /* initial retry delay */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -536,13 +500,11 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::NotFound("important HTTP error 410"),
                            410),
@@ -553,13 +515,11 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -569,7 +529,7 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -598,7 +558,7 @@ TEST(GcsFileSystemTest, NewWritableFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -610,38 +570,33 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fappendable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-31\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-31\n",
            "content1,"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fappendable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fappendable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-31\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-31\n",
            "01234567")});
-  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-                   std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)),
-                   32 /* block size */, 32 /* max bytes */,
-                   0 /* max staleness */, 0 /* stat cache max age */,
-                   0 /* stat cache max entries */,
-                   0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+  GcsFileSystem fs(
+      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+      std::unique_ptr<HttpRequest::Factory>(
+          new FakeHttpRequestFactory(&requests)),
+      32 /* block size */, 32 /* max bytes */, 0 /* max staleness */,
+      0 /* stat cache max age */, 0 /* stat cache max entries */,
+      0 /* matching paths cache max age */,
+      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
 
   // Create an appendable file. This should read the file from GCS, and pull its
   // contents into the block cache.
@@ -674,7 +629,7 @@ TEST(GcsFileSystemTest, NewAppendableFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -687,8 +642,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Frandom_access.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            strings::StrCat("{\"size\": \"", content.size(),
                            "\", \"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
@@ -696,7 +650,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
                            "path%2Frandom_access.txt\n"
                            "Auth Token: fake_token\n"
                            "Range: 0-",
-                           content.size() - 1, "\n", "Timeouts: 5 1 20\n"),
+                           content.size() - 1, "\n"),
            content)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -705,7 +659,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile(
@@ -724,7 +678,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -735,8 +689,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "path%2Ffile1.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -746,7 +699,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt"));
 }
@@ -756,15 +709,13 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsubfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subfolder/\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -774,7 +725,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder"));
 }
@@ -783,13 +734,11 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"size\": \"100\"}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"size\": \"100\"}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -798,7 +747,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket1"));
   TF_EXPECT_OK(fs.FileExists("gs://bucket1/"));
@@ -809,15 +758,13 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile1.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Ffile1.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": []}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -826,7 +773,7 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   EXPECT_EQ(errors::Code::NOT_FOUND,
             fs.FileExists("gs://bucket/path/file1.txt").code());
@@ -836,13 +783,11 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket2\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket2\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -851,7 +796,7 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.FileExists("gs://bucket2/").code());
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -863,33 +808,29 @@ TEST(GcsFileSystemTest, FileExists_StatCache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile1.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsubfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subfolder/\" }]}")});
-  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-                   std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)),
-                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
-                   3600 /* stat cache max age */,
-                   0 /* stat cache max entries */,
-                   0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+  GcsFileSystem fs(
+      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+      std::unique_ptr<HttpRequest::Factory>(
+          new FakeHttpRequestFactory(&requests)),
+      0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+      3600 /* stat cache max age */, 0 /* stat cache max entries */,
+      0 /* matching paths cache max age */,
+      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
 
   // The stat cache will ensure that repeated lookups don't trigger additional
   // HTTP requests.
@@ -904,8 +845,7 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{\"prefixes\": [\"path/subpath/\"]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -914,7 +854,7 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -927,8 +867,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -940,7 +879,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -954,8 +893,7 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{\"items\": [ "
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -967,7 +905,7 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -980,8 +918,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -993,7 +930,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -1006,8 +943,7 @@ TEST(GcsFileSystemTest, GetChildren_Root) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket-a-b-c/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1016,7 +952,7 @@ TEST(GcsFileSystemTest, GetChildren_Root) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket-a-b-c", &children));
@@ -1029,8 +965,7 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1039,7 +974,7 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -1053,8 +988,7 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&"
            "prefix=path%2F\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"nextPageToken\": \"ABCD==\", "
            "\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
@@ -1065,8 +999,7 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
            "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&"
            "prefix=path%2F"
            "&pageToken=ABCD==\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file4.txt\" },"
            "  { \"name\": \"path/file5.txt\" }]}")});
@@ -1078,7 +1011,7 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -1092,8 +1025,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{\"items\": [ "
       "  { \"name\": \"path/subpath/file2.txt\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1103,7 +1035,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> result;
   TF_EXPECT_OK(
@@ -1116,8 +1048,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1129,7 +1060,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", &result));
@@ -1143,8 +1074,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1156,7 +1086,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file2.txt", &result));
@@ -1168,8 +1098,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{\"items\": [ "
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
@@ -1180,7 +1109,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*", &result));
@@ -1191,8 +1120,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1204,7 +1132,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file3.txt", &result));
@@ -1220,7 +1148,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_OnlyWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   std::vector<string> result;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1232,15 +1160,13 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subpath/file2.txt\" }]}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
            "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1252,7 +1178,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    3600 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   // Repeated calls to fs.GetMatchingPaths on these patterns should not lead to
   // any additional HTTP requests to GCS.
@@ -1275,30 +1201,26 @@ TEST(GcsFileSystemTest, DeleteFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-15\n",
            "01234567"),
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-15\n",
            "76543210")});
-  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-                   std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)),
-                   16 /* block size */, 16 /* max bytes */,
-                   0 /* max staleness */, 0 /* stat cache max age */,
-                   0 /* stat cache max entries */,
-                   0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+  GcsFileSystem fs(
+      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+      std::unique_ptr<HttpRequest::Factory>(
+          new FakeHttpRequestFactory(&requests)),
+      16 /* block size */, 16 /* max bytes */, 0 /* max staleness */,
+      0 /* stat cache max age */, 0 /* stat cache max entries */,
+      0 /* matching paths cache max age */,
+      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
 
   // Do an initial read of the file to load its contents into the block cache.
   char scratch[100];
@@ -1324,7 +1246,7 @@ TEST(GcsFileSystemTest, DeleteFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.DeleteFile("gs://bucket/").code());
@@ -1334,8 +1256,7 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1344,7 +1265,7 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1354,14 +1275,12 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path/\" }]}"),
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2F\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1371,7 +1290,7 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1379,8 +1298,7 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
 TEST(GcsFileSystemTest, DeleteDir_BucketOnly) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?fields=items%2F"
-      "name%2CnextPageToken&maxResults=2\nAuth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "name%2CnextPageToken&maxResults=2\nAuth Token: fake_token\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1389,7 +1307,7 @@ TEST(GcsFileSystemTest, DeleteDir_BucketOnly) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket"));
 }
@@ -1398,8 +1316,7 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1409,7 +1326,7 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.DeleteDir("gs://bucket/path/").code());
@@ -1419,8 +1336,7 @@ TEST(GcsFileSystemTest, GetFileSize) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "file.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1430,7 +1346,7 @@ TEST(GcsFileSystemTest, GetFileSize) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   uint64 size;
   TF_EXPECT_OK(fs.GetFileSize("gs://bucket/file.txt", &size));
@@ -1446,7 +1362,7 @@ TEST(GcsFileSystemTest, GetFileSize_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   uint64 size;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1460,16 +1376,14 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path1%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path1/subfolder/file1.txt\" }]}"),
        // Requesting the full list of files in the folder.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path1%2F\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path1/\" },"  // A directory marker.
            "  { \"name\": \"path1/subfolder/file1.txt\" },"
@@ -1479,15 +1393,13 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2F/rewriteTo/b/bucket/o/path2%2F\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "{\"done\": true}"),
        // Deleting the original directory marker.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2F\n"
            "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        // Copying the first file.
@@ -1496,15 +1408,13 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "path1%2Fsubfolder%2Ffile1.txt/rewriteTo/b/bucket/o/"
            "path2%2Fsubfolder%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "{\"done\": true}"),
        // Deleting the first original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Fsubfolder%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        // Copying the second file.
@@ -1512,15 +1422,13 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Ffile2.txt/rewriteTo/b/bucket/o/path2%2Ffile2.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "{\"done\": true}"),
        // Deleting the second original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Ffile2.txt\n"
            "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1530,7 +1438,7 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.RenameFile("gs://bucket/path1", "gs://bucket/path2/"));
 }
@@ -1540,29 +1448,25 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-15\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-15\n",
            "76543210"),
        // IsDirectory is checking whether there are children objects.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1570,38 +1474,33 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "{\"done\": true}"),
        // Deleting the original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-15\n",
            "89abcdef"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n"
-           "Timeouts: 5 1 20\n",
+           "Range: 0-15\n",
            "fedcba98")});
-  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-                   std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)),
-                   16 /* block size */, 64 /* max bytes */,
-                   0 /* max staleness */, 0 /* stat cache max age */,
-                   0 /* stat cache max entries */,
-                   0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+  GcsFileSystem fs(
+      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+      std::unique_ptr<HttpRequest::Factory>(
+          new FakeHttpRequestFactory(&requests)),
+      16 /* block size */, 64 /* max bytes */, 0 /* max staleness */,
+      0 /* stat cache max age */, 0 /* stat cache max entries */,
+      0 /* matching paths cache max age */,
+      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
   // Do an initial read of the source and destination files to load their
   // contents into the block cache.
   char scratch[100];
@@ -1632,15 +1531,13 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1648,15 +1545,13 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "{\"done\": true}"),
        // Deleting the original file - the deletion returns a failure.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "", errors::Unavailable("503"), 503),
        // Deleting the original file again - the deletion returns NOT_FOUND.
@@ -1664,7 +1559,6 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1674,7 +1568,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(
       fs.RenameFile("gs://bucket/path/src.txt", "gs://bucket/path/dst.txt"));
@@ -1688,15 +1582,13 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1704,8 +1596,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "{\"done\": false}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1714,7 +1605,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   EXPECT_EQ(
       errors::Code::UNIMPLEMENTED,
@@ -1726,8 +1617,7 @@ TEST(GcsFileSystemTest, Stat_Object) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "file.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1737,7 +1627,7 @@ TEST(GcsFileSystemTest, Stat_Object) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat));
@@ -1751,15 +1641,13 @@ TEST(GcsFileSystemTest, Stat_Folder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "subfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"subfolder/\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1769,7 +1657,7 @@ TEST(GcsFileSystemTest, Stat_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder", &stat));
@@ -1783,15 +1671,13 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1800,7 +1686,7 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/path", &stat).code());
@@ -1809,8 +1695,7 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
 TEST(GcsFileSystemTest, Stat_Bucket) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1819,7 +1704,7 @@ TEST(GcsFileSystemTest, Stat_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/", &stat));
@@ -1831,8 +1716,7 @@ TEST(GcsFileSystemTest, Stat_Bucket) {
 TEST(GcsFileSystemTest, Stat_BucketNotFound) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1841,7 +1725,7 @@ TEST(GcsFileSystemTest, Stat_BucketNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/", &stat).code());
@@ -1852,33 +1736,29 @@ TEST(GcsFileSystemTest, Stat_Cache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "subfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"subfolder/\" }]}")});
-  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-                   std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)),
-                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
-                   3600 /* stat cache max age */,
-                   0 /* stat cache max entries */,
-                   0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+  GcsFileSystem fs(
+      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+      std::unique_ptr<HttpRequest::Factory>(
+          new FakeHttpRequestFactory(&requests)),
+      0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+      3600 /* stat cache max age */, 0 /* stat cache max entries */,
+      0 /* matching paths cache max age */,
+      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
 
   // Repeated calls to fs.Stat on these paths should not lead to any additional
   // HTTP requests to GCS.
@@ -1901,14 +1781,12 @@ TEST(GcsFileSystemTest, IsDirectory_NotFound) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=file.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1917,7 +1795,7 @@ TEST(GcsFileSystemTest, IsDirectory_NotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   EXPECT_EQ(error::Code::NOT_FOUND,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -1929,14 +1807,12 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=file.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1946,7 +1822,7 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -1958,15 +1834,13 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [{\"name\": \"subfolder/\"}]}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [{\"name\": \"subfolder/\"}]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1975,7 +1849,7 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder/"));
@@ -1985,13 +1859,11 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -2000,7 +1872,7 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/"));
@@ -2009,8 +1881,7 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
 TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n"
-      "Timeouts: 5 1 10\n",
+      "Auth Token: fake_token\n",
       "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -2019,7 +1890,7 @@ TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   EXPECT_EQ(error::Code::NOT_FOUND, fs.IsDirectory("gs://bucket/").code());
 }
@@ -2031,12 +1902,10 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
            "uploadType=resumable&name=subpath%2F\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 0\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: \n",
                            ""),
        new FakeHttpRequest(
@@ -2044,12 +1913,10 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
            "uploadType=resumable&name=subpath%2F\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 0\n"
-           "Post: yes\n"
-           "Timeouts: 5 1 10\n",
+           "Post: yes\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 30\n"
                            "Put body: \n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -2059,7 +1926,7 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath/"));
@@ -2069,13 +1936,11 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            ""),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -2084,7 +1949,7 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket"));
@@ -2097,16 +1962,14 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" }]}"),
        // GetChildren recursively.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path/\" },"  // The current directory's marker.
            "  { \"name\": \"path/file1.txt\" },"
@@ -2116,35 +1979,30 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2F\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object - fails and will be retried.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::Unavailable("500"), 500),
        // Delete the object again.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2Ffile2.txt\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile3.txt\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -2154,7 +2012,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -2170,16 +2028,14 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" }]}"),
        // Calling GetChildren recursively.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
            "  { \"name\": \"path/subpath/\" },"
@@ -2189,14 +2045,12 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Deleting the directory marker gs://bucket/path/ - fails with 404.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2F\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::NotFound("404"), 404),
        // Checking if gs://bucket/path/subpath/ is a folder - it is.
@@ -2204,22 +2058,19 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            strings::StrCat("{\"items\": [ "
                            "    { \"name\": \"path/subpath/\" }]}")),
        // Deleting the object gs://bucket/path/subpath/file2.txt
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2Ffile2.txt\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Deleting the object s://bucket/path/file3.txt - fails with 404.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile3.txt\n"
                            "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::NotFound("404"), 404),
        // Checking if gs://bucket/path/file3.txt/ is a folder - it's not.
@@ -2227,15 +2078,13 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Ffile3.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{}"),
        // Checking if gs://bucket/path/file3.txt is an object - fails with 404.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile3.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404)});
 
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -2245,7 +2094,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -2261,15 +2110,13 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n"
-           "Timeouts: 5 1 10\n",
+           "Auth Token: fake_token\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -2278,7 +2125,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay*/, kTestTimeoutConfig);
+                   0 /* initial retry delay */);
 
   int64 undeleted_files, undeleted_dirs;
   EXPECT_EQ(error::Code::NOT_FOUND,
@@ -2295,11 +2142,6 @@ TEST(GcsFileSystemTest, OverrideCacheParameters) {
   EXPECT_EQ(128 * 1024 * 1024, fs1.block_size());
   EXPECT_EQ(2 * fs1.block_size(), fs1.max_bytes());
   EXPECT_EQ(0, fs1.max_staleness());
-  EXPECT_EQ(120, fs1.timeouts().connect);
-  EXPECT_EQ(60, fs1.timeouts().idle);
-  EXPECT_EQ(3600, fs1.timeouts().metadata);
-  EXPECT_EQ(3600, fs1.timeouts().read);
-  EXPECT_EQ(3600, fs1.timeouts().write);
 
   // Verify legacy readahead buffer override sets block size.
   setenv("GCS_READAHEAD_BUFFER_SIZE_BYTES", "123456789", 1);
@@ -2325,19 +2167,6 @@ TEST(GcsFileSystemTest, OverrideCacheParameters) {
   EXPECT_EQ(32, fs4.stat_cache_max_entries());
   EXPECT_EQ(30, fs4.matching_paths_cache_max_age());
   EXPECT_EQ(64, fs4.matching_paths_cache_max_entries());
-
-  // Verify timeout overrides.
-  setenv("GCS_REQUEST_CONNECTION_TIMEOUT_SECS", "10", 1);
-  setenv("GCS_REQUEST_IDLE_TIMEOUT_SECS", "5", 1);
-  setenv("GCS_METADATA_REQUEST_TIMEOUT_SECS", "20", 1);
-  setenv("GCS_READ_REQUEST_TIMEOUT_SECS", "30", 1);
-  setenv("GCS_WRITE_REQUEST_TIMEOUT_SECS", "40", 1);
-  GcsFileSystem fs5;
-  EXPECT_EQ(10, fs5.timeouts().connect);
-  EXPECT_EQ(5, fs5.timeouts().idle);
-  EXPECT_EQ(20, fs5.timeouts().metadata);
-  EXPECT_EQ(30, fs5.timeouts().read);
-  EXPECT_EQ(40, fs5.timeouts().write);
 }
 
 }  // namespace
diff --git a/tensorflow/core/platform/cloud/http_request.h b/tensorflow/core/platform/cloud/http_request.h
index 95a436c622..02d9e9054a 100644
--- a/tensorflow/core/platform/cloud/http_request.h
+++ b/tensorflow/core/platform/cloud/http_request.h
@@ -118,16 +118,6 @@ class HttpRequest {
   // Url encodes str and returns a new string.
   virtual string EscapeString(const string& str) = 0;
 
-  /// \brief Set timeouts for this request.
-  ///
-  /// The connection parameter controls how long we should wait for the
-  /// connection to be established. The inactivity parameter controls how long
-  /// we should wait between additional responses from the server. Finally the
-  /// total parameter controls the maximum total connection time to prevent
-  /// hanging indefinitely.
-  virtual Status SetTimeouts(uint32 connection, uint32 inactivity,
-                             uint32 total) = 0;
-
   TF_DISALLOW_COPY_AND_ASSIGN(HttpRequest);
 };
 
diff --git a/tensorflow/core/platform/cloud/http_request_fake.h b/tensorflow/core/platform/cloud/http_request_fake.h
index 22398d739c..bfe04f6363 100644
--- a/tensorflow/core/platform/cloud/http_request_fake.h
+++ b/tensorflow/core/platform/cloud/http_request_fake.h
@@ -37,14 +37,13 @@ class FakeHttpRequest : public CurlHttpRequest {
  public:
   /// Return the response for the given request.
   FakeHttpRequest(const string& request, const string& response)
-      : FakeHttpRequest(request, response, Status::OK(), nullptr, {}, 200, {}) {
-  }
+      : FakeHttpRequest(request, response, Status::OK(), nullptr, {}, 200) {}
 
   /// Return the response with headers for the given request.
   FakeHttpRequest(const string& request, const string& response,
                   const std::map<string, string>& response_headers)
       : FakeHttpRequest(request, response, Status::OK(), nullptr,
-                        response_headers, 200, {}) {}
+                        response_headers, 200) {}
 
   /// \brief Return the response for the request and capture the POST body.
   ///
@@ -52,13 +51,13 @@ class FakeHttpRequest : public CurlHttpRequest {
   FakeHttpRequest(const string& request, const string& response,
                   string* captured_post_body)
       : FakeHttpRequest(request, response, Status::OK(), captured_post_body, {},
-                        200, {}) {}
+                        200) {}
 
   /// \brief Return the response and the status for the given request.
   FakeHttpRequest(const string& request, const string& response,
                   Status response_status, uint64 response_code)
       : FakeHttpRequest(request, response, response_status, nullptr, {},
-                        response_code, {}) {}
+                        response_code) {}
 
   /// \brief Return the response and the status for the given request
   ///  and capture the POST body.
@@ -67,15 +66,13 @@ class FakeHttpRequest : public CurlHttpRequest {
   FakeHttpRequest(const string& request, const string& response,
                   Status response_status, string* captured_post_body,
                   const std::map<string, string>& response_headers,
-                  uint64 response_code,
-                  absl::optional<std::tuple<uint32, uint32, uint32>> timeouts)
+                  uint64 response_code)
       : expected_request_(request),
         response_(response),
         response_status_(response_status),
         captured_post_body_(captured_post_body),
         response_headers_(response_headers),
-        response_code_(response_code),
-        timeouts_(timeouts) {}
+        response_code_(response_code) {}
 
   Status Init() override { return Status::OK(); }
   Status SetUri(const string& uri) override {
@@ -163,13 +160,6 @@ class FakeHttpRequest : public CurlHttpRequest {
 
   virtual uint64 GetResponseCode() const override { return response_code_; }
 
-  Status SetTimeouts(uint32 connection, uint32 inactivity,
-                     uint32 total) override {
-    actual_request_ += strings::StrCat("Timeouts: ", connection, " ",
-                                       inactivity, " ", total, "\n");
-    return Status::OK();
-  }
-
  private:
   std::vector<char>* buffer_ = nullptr;
   string expected_request_;
@@ -179,7 +169,6 @@ class FakeHttpRequest : public CurlHttpRequest {
   string* captured_post_body_ = nullptr;
   std::map<string, string> response_headers_;
   uint64 response_code_ = 0;
-  absl::optional<std::tuple<uint32, uint32, uint32>> timeouts_;
 };
 
 /// Fake HttpRequest factory for testing.
-- 
GitLab


From cffd79f4b102c2082cbcc258abf7ed06df8c141c Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <1517779+sb2nov@users.noreply.github.com>
Date: Fri, 1 Dec 2017 12:22:16 -0800
Subject: [PATCH 0519/1225] Revert "Arbitrary dim for slice (#11140)" (#15025)

This reverts commit 8011eda4b70faac6025c6b0553c3d95474adb5fe.
---
 tensorflow/core/kernels/slice_op.cc           | 116 ++++++++++--------
 tensorflow/core/kernels/slice_op.h            | 108 +++-------------
 tensorflow/core/kernels/slice_op_gpu.cu.cc    |  56 ---------
 .../core/kernels/strided_slice_op_impl.h      |  23 ++--
 .../core/kernels/strided_slice_op_test.cc     |  49 --------
 .../python/kernel_tests/slice_op_test.py      |  25 +---
 6 files changed, 98 insertions(+), 279 deletions(-)

diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index 28a379774b..d46701749b 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -190,25 +190,41 @@ class SliceOp : public OpKernel {
         }
         return;
       }
-#define HANDLE_DIM(NDIM)                                              \
-  if (input_dims == NDIM) {                                           \
-    functor::Slice<Device, T, NDIM>()(                                \
-        context->eigen_device<Device>(), result, input, begin, size); \
-    return;                                                           \
+#define HANDLE_DIM(NDIM)                            \
+  if (input_dims == NDIM) {                         \
+    HandleCase<NDIM>(context, begin, size, result); \
+    return;                                         \
   }
+
       HANDLE_DIM(1);
       HANDLE_DIM(2);
       HANDLE_DIM(3);
       HANDLE_DIM(4);
       HANDLE_DIM(5);
       HANDLE_DIM(6);
+      HANDLE_DIM(7);
 
 #undef HANDLE_DIM
 
-      // handle cases which dim >= 7
-      functor::Slice<Device, T, 7>()(
-          context->eigen_device<Device>(), result, input, begin, size);
+      OP_REQUIRES(context, false, errors::Unimplemented(
+                                      "SliceOp : Unhandled input dimensions"));
+    }
+  }
+
+ private:
+  template <int NDIM>
+  void HandleCase(OpKernelContext* context, const gtl::ArraySlice<int64>& begin,
+                  const gtl::ArraySlice<int64>& size, Tensor* result) {
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> indices;
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes;
+    for (int i = 0; i < NDIM; ++i) {
+      indices[i] = begin[i];
+      sizes[i] = size[i];
     }
+
+    functor::Slice<Device, T, NDIM>()(
+        context->eigen_device<Device>(), result->tensor<T, NDIM>(),
+        context->input(0).tensor<T, NDIM>(), indices, sizes);
   }
 };
 
@@ -248,16 +264,11 @@ class MklSliceOp : public OpKernel {
         }
         return;
       }
-      // Special case for handling 4-D tensor slice.
-      if (input_dims == 4) {
-        HandleCase4D(context, begin, size, result);
-      } else {
-#define HANDLE_DIM(NDIM)                                                  \
-      if (input_dims == NDIM) {                                           \
-        functor::Slice<Device, T, NDIM>()(                                \
-            context->eigen_device<Device>(), result, input, begin, size); \
-            return;                                                       \
-      }
+#define HANDLE_DIM(NDIM)                            \
+  if (input_dims == NDIM) {                         \
+    HandleCase<NDIM>(context, begin, size, result); \
+    return;                                         \
+  }
 
       HANDLE_DIM(1);
       HANDLE_DIM(2);
@@ -265,13 +276,12 @@ class MklSliceOp : public OpKernel {
       HANDLE_DIM(4);
       HANDLE_DIM(5);
       HANDLE_DIM(6);
+      HANDLE_DIM(7);
 
 #undef HANDLE_DIM
 
-        // handle cases which dim >= 7
-        functor::Slice<Device, T, 7>()(
-          context->eigen_device<Device>(), result, input, begin, size);
-      }
+      OP_REQUIRES(context, false, errors::Unimplemented(
+                                      "SliceOp : Unhandled input dimensions"));
     }
   }
 
@@ -318,7 +328,8 @@ class MklSliceOp : public OpKernel {
     return false;
   }
 
-  void HandleCase4D(OpKernelContext* context,
+  template <int NDIM>
+  void HandleCase(OpKernelContext* context,
                   const gtl::ArraySlice<int64>& begin,
                   const gtl::ArraySlice<int64>& size, Tensor* result) {
     int slice_dim = -1;
@@ -327,7 +338,8 @@ class MklSliceOp : public OpKernel {
     // differs from the input tensor in only 1 out of 4 dimensions.
     // This case arises in the context of Slice of 4-D tensor in NHWC or NCHW
     // format over channel dimension.
-    if (DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) {
+    if (NDIM == 4 &&
+        DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) {
         size_t in_strides[4] = { (size_t) in_shape.dim_size(1) *
                                           in_shape.dim_size(2) *
                                           in_shape.dim_size(3),
@@ -391,8 +403,16 @@ class MklSliceOp : public OpKernel {
         // slice_dim is not 1 or 3, then we fallback to Eigen implementation.
     }
 
-    functor::Slice<Device, T, 4>()(
-        context->eigen_device<Device>(), result, context->input(0), begin, size);
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> indices;
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes;
+    for (int i = 0; i < NDIM; ++i) {
+      indices[i] = begin[i];
+      sizes[i] = size[i];
+    }
+
+    functor::Slice<Device, T, NDIM>()(
+        context->eigen_device<Device>(), result->tensor<T, NDIM>(),
+        context->input(0).tensor<T, NDIM>(), indices, sizes);
   }
 };
 #endif
@@ -400,13 +420,13 @@ class MklSliceOp : public OpKernel {
 // Forward declarations of the functor specializations for declared in the
 // sharded source files.
 namespace functor {
-#define DECLARE_CPU_SPEC(T, NDIM)                        \
-  template <>                                            \
-  void Slice<CPUDevice, T, NDIM>::operator()(            \
-      const CPUDevice& d, Tensor* output,                \
-      const Tensor& input,                               \
-      const gtl::ArraySlice<int64>& slice_indices,       \
-      const gtl::ArraySlice<int64>& slice_sizes);        \
+#define DECLARE_CPU_SPEC(T, NDIM)                                  \
+  template <>                                                      \
+  void Slice<CPUDevice, T, NDIM>::operator()(                      \
+      const CPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
+      typename TTypes<T, NDIM>::ConstTensor input,                 \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
   extern template struct Slice<CPUDevice, T, NDIM>;
 
 #define DECLARE_FOR_N(T)  \
@@ -456,14 +476,13 @@ REGISTER_SLICE(bfloat16);
 #if GOOGLE_CUDA
 // Forward declarations of the functor specializations for GPU.
 namespace functor {
-#define DECLARE_GPU_SPEC(T, NDIM)                        \
-  template <>                                            \
-  void Slice<GPUDevice, T, NDIM>::operator()(            \
-      const GPUDevice& d,                                \
-      Tensor* output,                                    \
-      const Tensor& input,                               \
-      const gtl::ArraySlice<int64>& slice_indices,       \
-      const gtl::ArraySlice<int64>& slice_sizes);        \
+#define DECLARE_GPU_SPEC(T, NDIM)                                  \
+  template <>                                                      \
+  void Slice<GPUDevice, T, NDIM>::operator()(                      \
+      const GPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
+      typename TTypes<T, NDIM>::ConstTensor input,                 \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
   extern template struct Slice<GPUDevice, T, NDIM>;
 
 #define DECLARE_FOR_N(T)  \
@@ -517,14 +536,13 @@ REGISTER_KERNEL_BUILDER(Name("Slice")
 #ifdef TENSORFLOW_USE_SYCL
 // Forward declarations of the functor specializations for SYCL.
 namespace functor {
-#define DECLARE_SYCL_SPEC(T, NDIM)                       \
-  template <>                                            \
-  void Slice<SYCLDevice, T, NDIM>::operator()(           \
-      const SYCLDevice& d,                               \
-      Tensor* output,                                    \
-      const Tensor& input,                               \
-      const gtl::ArraySlice<int64>& slice_indices,       \
-      const gtl::ArraySlice<int64>& slice_sizes);        \
+#define DECLARE_SYCL_SPEC(T, NDIM)                                 \
+  template <>                                                      \
+  void Slice<SYCLDevice, T, NDIM>::operator()(                     \
+      const SYCLDevice& d, typename TTypes<T, NDIM>::Tensor output,\
+      typename TTypes<T, NDIM>::ConstTensor input,                 \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
   extern template struct Slice<SYCLDevice, T, NDIM>;
 
 #define DECLARE_FOR_N(T)   \
diff --git a/tensorflow/core/kernels/slice_op.h b/tensorflow/core/kernels/slice_op.h
index 5fd6ce4067..0362a02133 100644
--- a/tensorflow/core/kernels/slice_op.h
+++ b/tensorflow/core/kernels/slice_op.h
@@ -19,104 +19,32 @@ limitations under the License.
 // Functor definition for SliceOp, must be compilable by nvcc.
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/kernels/ops_util.h"
 
 namespace tensorflow {
+namespace functor {
 
-namespace internal {
-
-template <typename Device, typename T>
-void SliceSimple(const Device& d, Tensor* out, const Tensor& in,
-                 const gtl::ArraySlice<int64>& slice_indices);
-template <typename Device, typename T>
-void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in,
-                 const gtl::ArraySlice<int64>& slice_indices);
-
-template <typename Device, typename T>
-void SliceSimple(const Device& d, Tensor* out, const Tensor& in,
-                 const gtl::ArraySlice<int64>& slice_indices) {
-  const int ndims = in.dims();
-  const int64 nelem = out->NumElements();
-  const gtl::InlinedVector<int64, 8> in_strides = ComputeStride<int64>(in.shape());
-  const gtl::InlinedVector<int64, 8> out_strides = ComputeStride<int64>(out->shape());
-  const T* p = in.flat<T>().data();
-  T* q = out->flat<T>().data();
-
-  std::vector<int64> i_idx(nelem, 0);
-  std::vector<int64> t(nelem, 0);
-
-  for (int64 o_idx = 0; o_idx < nelem; ++o_idx) {
-    t[o_idx] = o_idx;
-  }
-  for (int i = 0; i < ndims; ++i) {
-    int64 n = (nelem + 7) / 8;
-    int64 o_idx = 0;
-    switch (nelem % 8) {
-#define CALC_INPUT_IDX                                                            \
-  i_idx[o_idx] += (t[o_idx] / out_strides[i] + slice_indices[i]) * in_strides[i]; \
-  t[o_idx] %= out_strides[i];                                                     \
-  ++o_idx;
-      case 0: do { CALC_INPUT_IDX;
-      case 7:      CALC_INPUT_IDX;
-      case 6:      CALC_INPUT_IDX;
-      case 5:      CALC_INPUT_IDX;
-      case 4:      CALC_INPUT_IDX;
-      case 3:      CALC_INPUT_IDX;
-      case 2:      CALC_INPUT_IDX;
-      case 1:      CALC_INPUT_IDX;
-#undef CALC_INPUT_IDX
-              } while (--n > 0);
-    }
-  }
-  for (int64 o_idx = 0; o_idx < nelem; ++o_idx) {
-    q[o_idx] = p[i_idx[o_idx]];
-  }
-}
 
 template <typename Device, typename T, int NDIMS>
-void SliceUsingEigen(const Device& d, Tensor* out, const Tensor& in,
-                 const gtl::ArraySlice<int64>& slice_indices,
-                 const gtl::ArraySlice<int64>& slice_sizes) {
-  auto input = in.tensor<T, NDIMS>();
-  auto output = out->tensor<T, NDIMS>();
-  Eigen::DSizes<int, NDIMS> indices;
-  for (int i = 0; i < NDIMS; ++i) {
-    indices[i] = slice_indices[i];
-  }
-  Eigen::DSizes<int, NDIMS> sizes;
-  for (int i = 0; i < NDIMS; ++i) {
-    sizes[i] = slice_sizes[i];
-  }
-  const bool use_64bit = input.size() > Eigen::NumTraits<int>::highest();
-  if (!use_64bit &&
-      Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
-    To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes);
-  } else {
-    output.device(d) = input.slice(indices, sizes);
-  }
-}
-
-} // namespace internal
-
-namespace functor {
-
-// Template parameter NDIM is not neccesary here. The aim of keeping it
-// is to compile struct slice separately which minimizes the compiling time.
-template <typename Device, typename T, int NDIM>
 struct Slice {
-  void operator()(const Device& d, Tensor* out, const Tensor& in,
-                  const gtl::ArraySlice<int64>& slice_indices,
-                  const gtl::ArraySlice<int64>& slice_sizes) {
-    if (in.dims() == NDIM) {
-        internal::SliceUsingEigen<Device, T, NDIM>(d, out, in, slice_indices, slice_sizes);
+  void operator()(const Device& d, typename TTypes<T, NDIMS>::Tensor output,
+                  typename TTypes<T, NDIMS>::ConstTensor input,
+                  const Eigen::DSizes<Eigen::DenseIndex, NDIMS>& slice_indices,
+                  const Eigen::DSizes<Eigen::DenseIndex, NDIMS>& slice_sizes) {
+    bool use_64bit = (input.size() > Eigen::NumTraits<int>::highest());
+    if (!use_64bit &&
+        Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
+      Eigen::DSizes<int, NDIMS> indices;
+      for (int i = 0; i < NDIMS; ++i) {
+        indices[i] = slice_indices[i];
+      }
+      Eigen::DSizes<int, NDIMS> sizes;
+      for (int i = 0; i < NDIMS; ++i) {
+        sizes[i] = slice_sizes[i];
+      }
+      To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes);
     } else {
-        if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
-          internal::SliceSimpleGpu<Device, T>(d, out, in, slice_indices);
-        } else {
-          internal::SliceSimple<Device, T>(d, out, in, slice_indices);
-        }
+      output.device(d) = input.slice(slice_indices, slice_sizes);
     }
   }
 };
diff --git a/tensorflow/core/kernels/slice_op_gpu.cu.cc b/tensorflow/core/kernels/slice_op_gpu.cu.cc
index 3039b3d777..a301986f2f 100644
--- a/tensorflow/core/kernels/slice_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/slice_op_gpu.cu.cc
@@ -21,65 +21,9 @@ limitations under the License.
 
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/util/cuda_kernel_helper.h"
 
 namespace tensorflow {
-namespace internal {
-
-template <typename T>
-__global__ void SliceKernel(int nthreads, const T* src, const int32* buf,
-                            const int32 ndims, T* dst) {
-  const int32* in_strides = buf;
-  const int32* out_strides = buf + ndims;
-  const int32* slice_indices = buf + ndims * 2;
-  CUDA_1D_KERNEL_LOOP(o_idx, nthreads) {
-    int32 i_idx = 0;
-    int32 t = o_idx;
-    for (int i = 0; i < ndims; ++i) {
-      i_idx += (t / out_strides[i] + slice_indices[i]) * in_strides[i];
-      t %= out_strides[i];
-    }
-    dst[o_idx] = ldg(src + i_idx);
-  }
-}
-
-template <typename Device, typename T>
-void SliceSimpleGpu(const Device& d, Tensor* out, const Tensor& in,
-                 const gtl::ArraySlice<int64>& slice_indices) {
-  // Ensures we can use 32-bit index.
-  const int64 in_nelem = in.NumElements();
-  CHECK_LT(in_nelem, kint32max) << "Tensor too large to transpose on GPU";
-  const int64 out_nelem = out->NumElements();
-  CHECK_LT(out_nelem, kint32max) << "Tensor too large to transpose on GPU";
-  // Pack strides and slice indices sizes into one buffer.
-  const int32 ndims = in.dims();
-  gtl::InlinedVector<int32, 24> host_buf(ndims * 3);
-  gtl::InlinedVector<int32, 8> in_strides = ComputeStride<int32>(in.shape());
-  gtl::InlinedVector<int32, 8> out_strides = ComputeStride<int32>(out->shape());
-  for (int i = 0; i < ndims; ++i) {
-    host_buf[i] = in_strides[i];
-    host_buf[ndims + i] = out_strides[i];
-    host_buf[ndims * 2 + i] = slice_indices[i];
-  }
-  auto num_bytes = sizeof(int64) * host_buf.size();
-  auto dev_buf = d.allocate(num_bytes);
-  // NOTE: host_buf is not allocated by CudaHostAllocator, and
-  // therefore we are doing a sync copy effectively.
-  d.memcpyHostToDevice(dev_buf, host_buf.data(), num_bytes);
-  // Launch kernel to q[...] = p[...].
-  const T* p = in.flat<T>().data();
-  T* q = out->flat<T>().data();
-  CudaLaunchConfig cfg = GetCudaLaunchConfig(out_nelem, d);
-  SliceKernel<<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>(
-      cfg.virtual_thread_count, p, reinterpret_cast<const int32*>(dev_buf),
-      ndims, q);
-  // Safe to deallocate immediately after the kernel launch.
-  d.deallocate(dev_buf);
-}
-
-} // namespace internal
 
 typedef Eigen::GpuDevice GPUDevice;
 
diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h
index 7d42887426..de65147572 100644
--- a/tensorflow/core/kernels/strided_slice_op_impl.h
+++ b/tensorflow/core/kernels/strided_slice_op_impl.h
@@ -84,16 +84,16 @@ void HandleStridedSliceCase(OpKernelContext* context,
 
   gtl::InlinedVector<int64, 4> processing_dims = processing_shape.dim_sizes();
   if (is_simple_slice) {
-    gtl::InlinedVector<int64, 4> sizes(begin.size());
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> begin_di;
+    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes_di;
     for (int i = 0; i < NDIM; ++i) {
-      sizes[i] = end[i] - begin[i];
+      begin_di[i] = begin[i];
+      sizes_di[i] = end[i] - begin[i];
     }
-    const TensorShape final_shape = result->shape();
-    CHECK(result->CopyFrom(*result, processing_shape));
-    const Tensor input = context->input(0);
-    functor::Slice<Device, T, NDIM>()(
-        context->eigen_device<Device>(), result, input, begin, sizes);
-    CHECK(result->CopyFrom(*result, final_shape));
+    functor::Slice<Device, Proxy, NDIM>()(
+        context->eigen_device<Device>(),
+        result->bit_casted_shaped<Proxy, NDIM>(processing_dims),
+        context->input(0).bit_casted_tensor<Proxy, NDIM>(), begin_di, sizes_di);
   } else {
     Eigen::DSizes<Eigen::DenseIndex, NDIM> begin_di;
     Eigen::DSizes<Eigen::DenseIndex, NDIM> end_di;
@@ -196,9 +196,10 @@ class HandleStridedSliceAssignCase<Device, T, 0> {
   extern template struct StridedSlice<GPUDevice, T, NDIM>;         \
   template <>                                                      \
   void Slice<GPUDevice, T, NDIM>::operator()(                      \
-      const GPUDevice& d, Tensor* output, const Tensor& input,     \
-      const gtl::ArraySlice<int64>& slice_indices,                 \
-      const gtl::ArraySlice<int64>& slice_sizes);                  \
+      const GPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
+      typename TTypes<T, NDIM>::ConstTensor input,                 \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
   extern template struct Slice<GPUDevice, T, NDIM>;                \
   template <>                                                      \
   void StridedSliceGrad<GPUDevice, T, NDIM>::operator()(           \
diff --git a/tensorflow/core/kernels/strided_slice_op_test.cc b/tensorflow/core/kernels/strided_slice_op_test.cc
index 78bb15463c..281ca0f58f 100644
--- a/tensorflow/core/kernels/strided_slice_op_test.cc
+++ b/tensorflow/core/kernels/strided_slice_op_test.cc
@@ -76,69 +76,20 @@ static void SliceHelper(int iters, int size) {
   testing::UseRealTime();
 }
 
-template <typename T>
-static void Dim8SliceHelper(int iters, int size) {
-  testing::StopTiming();
-  Graph* g = new Graph(OpRegistry::Global());
-  DataType dt = DataTypeToEnum<T>::v();
-  int kDim = 100;
-  int kMaxSize = 15000;
-  CHECK_LT(size, kMaxSize);
-
-  Tensor begin(DT_INT32, TensorShape({8}));
-  begin.flat<int32>()(10) = 10;
-  for (int i = 1; i < 7; ++i) {
-    begin.flat<int32>()(i) = 0;
-  }
-  begin.flat<int32>()(7) = 10;
-
-  Tensor end(DT_INT32, TensorShape({8}));
-  end.flat<int32>()(0) = 10 + kDim;
-  for (int i = 1; i < 7; ++i) {
-    end.flat<int32>()(i) = 1;
-  }
-  end.flat<int32>()(7) = 10 + size;
-
-  Tensor strides(DT_INT32, TensorShape({8}));
-  for (int i = 0; i < 8; ++i) {
-    strides.flat<int32>()(i) = 1;
-  }
-
-  Tensor input(dt, TensorShape({2*kDim, 1, 1, 1, 1, 1, 1, kMaxSize}));
-  input.flat<T>().setRandom();
-
-  Node* node;
-  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "StridedSlice")
-                  .Input(test::graph::Constant(g, input))
-                  .Input(test::graph::Constant(g, begin))
-                  .Input(test::graph::Constant(g, end))
-                  .Input(test::graph::Constant(g, strides))
-                  .Attr("T", dt)
-                  .Finalize(g, &node));
-
-  testing::BytesProcessed(static_cast<int64>(iters) * kDim * size * sizeof(T));
-  testing::StartTiming();
-  test::Benchmark("cpu", g).Run(iters);
-  testing::UseRealTime();
-}
-
 static void BM_SliceFloat(int iters, int dim2) {
   SliceHelper<float>(iters, dim2);
-  Dim8SliceHelper<float>(iters, dim2);
 }
 
 BENCHMARK(BM_SliceFloat)->Arg(100)->Arg(1000)->Arg(10000);
 
 static void BM_SliceComplex64(int iters, int dim2) {
   SliceHelper<std::complex<float>>(iters, dim2);
-  Dim8SliceHelper<std::complex<float>>(iters, dim2);
 }
 
 BENCHMARK(BM_SliceComplex64)->Arg(100)->Arg(1000)->Arg(10000);
 
 static void BM_SliceBFloat16(int iters, int dim2) {
   SliceHelper<bfloat16>(iters, dim2);
-  Dim8SliceHelper<bfloat16>(iters, dim2);
 }
 
 BENCHMARK(BM_SliceBFloat16)->Arg(100)->Arg(1000)->Arg(10000);
diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py
index 6cdc7872f9..051a25080b 100644
--- a/tensorflow/python/kernel_tests/slice_op_test.py
+++ b/tensorflow/python/kernel_tests/slice_op_test.py
@@ -217,30 +217,6 @@ class SliceTest(test.TestCase):
     self.assertEqual(expected_val.shape, slice_t.get_shape())
     self.assertEqual(expected_val.shape, slice2_t.get_shape())
 
-  def testRandomHighRank(self):
-    # Random dims of rank 8
-    input_shape = np.random.randint(0, 20, size=8)
-    inp = np.random.rand(*input_shape).astype("f")
-    with self.test_session(use_gpu=True) as sess:
-      a = constant_op.constant(
-          [float(x) for x in inp.ravel(order="C")],
-          shape=input_shape,
-          dtype=dtypes.float32)
-      indices = [0 if x == 0 else np.random.randint(x) for x in input_shape]
-      sizes = [
-          np.random.randint(0, input_shape[i] - indices[i] + 1)
-          for i in range(8)
-      ]
-      slice_t = array_ops.slice(a, indices, sizes)
-      slice_val = sess.run(slice_t)
-
-    expected_val = inp[indices[0]:indices[0] + sizes[0], indices[1]:indices[1] + sizes[
-      1], indices[2]:indices[2] + sizes[2], indices[3]:indices[3] + sizes[3], indices[
-        4]:indices[4] + sizes[4], indices[5]:indices[5] + sizes[5], indices[6]:indices[
-          6] + sizes[6], indices[7]:indices[7] + sizes[7]]
-    self.assertAllEqual(slice_val, expected_val)
-    self.assertEqual(expected_val.shape, slice_t.get_shape())
-
   def testPartialShapeInference(self):
     z = array_ops.zeros((1, 2, 3))
     self.assertAllEqual(z.get_shape().as_list(), [1, 2, 3])
@@ -251,6 +227,7 @@ class SliceTest(test.TestCase):
     m2 = array_ops.slice(z, [0, 0, 0], [constant_op.constant(1) + 0, 2, -1])
     self.assertAllEqual(m2.get_shape().as_list(), [None, 2, None])
 
+
   def _testGradientSlice(self, input_shape, slice_begin, slice_size):
     with self.test_session(use_gpu=True):
       num_inputs = np.prod(input_shape)
-- 
GitLab


From 23e2f1524f8c7389e933ee1734429cb9df4176df Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 12:22:30 -0800
Subject: [PATCH 0520/1225]    [tpu:profiler] Add host-independent and
 host-dependent job info to the run environment in tf_op_stats.proto.

PiperOrigin-RevId: 177624829
---
 .../contrib/tpu/profiler/tf_op_stats.proto    | 27 +++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
index 6943ff5f47..e4a2b530c7 100644
--- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
+++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
@@ -114,6 +114,26 @@ message HloExtraInfoMapResult {
   map<string, HloExtraInfoResult> hlo_extrainfo_map = 1;
 }
 
+// Result proto for host-independent job information.
+message HostIndependentJobInfoResult {
+  // The change-list number of this build.
+  optional string change_list = 1;
+  // The time of this build.
+  optional string build_time = 2;
+  // The target of this build.
+  optional string build_target = 3;
+}
+
+// Result proto for host-dependent job information.
+message HostDependentJobInfoResult {
+  // This ID of the host where the job was run on.
+  optional string host_id = 1;
+  // The command line used to run the job.
+  optional string command_line = 2;
+  // The start time of the job on this host.
+  optional string start_time = 3;
+}
+
 // Result proto for RunEnvironment (the run environment of a profiling session).
 message RunEnvironmentResult {
   // Number of hosts used.
@@ -124,8 +144,11 @@ message RunEnvironmentResult {
   optional int32 tpu_core_count = 3;
   // The per-TPU-core batch size.
   optional int32 per_core_batch_size = 4;
-  // Job information including build target and command line.
-  optional string job_info = 5;
+  reserved 5;  // was job_info.
+  // Host-independent job information.
+  optional HostIndependentJobInfoResult host_independent_job_info = 6;
+  // Host-dependent job information.
+  repeated HostDependentJobInfoResult host_dependent_job_info = 7;
 }
 
 // Result proto for TfStatsHelper.
-- 
GitLab


From 487558cf40f85539f740959dd54a4f5eee8e0560 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 12:25:32 -0800
Subject: [PATCH 0521/1225] Log the error status for failed CUDA
 EnablePeerAccess.

This would help debug issues like this:
#14759

PiperOrigin-RevId: 177625164
---
 tensorflow/core/common_runtime/gpu/gpu_device.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index eff169640f..0fcea8ffd4 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -949,7 +949,7 @@ Status EnablePeerAccess(gpu::Platform* platform,
         if (!status.ok()) {
           LOG(WARNING)
               << "Unable to enable peer access between device ordinals "
-              << i_gpu_id << " and " << j_gpu_id;
+              << i_gpu_id << " and " << j_gpu_id << ", status: " << status;
         } else {
           ++enabled_peer_count;
         }
-- 
GitLab


From e0a668270742718a595ca9620727986de26781af Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 12:32:10 -0800
Subject: [PATCH 0522/1225] Update the comment in speech_hotword_model_test.

PiperOrigin-RevId: 177625918
---
 tensorflow/contrib/lite/models/speech_hotword_model_test.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/models/speech_hotword_model_test.cc b/tensorflow/contrib/lite/models/speech_hotword_model_test.cc
index 0b8266447a..f69cae8d2c 100644
--- a/tensorflow/contrib/lite/models/speech_hotword_model_test.cc
+++ b/tensorflow/contrib/lite/models/speech_hotword_model_test.cc
@@ -73,8 +73,8 @@ void RunTest(int model_input_tensor, int svdf_layer_state_tensor,
   float* output_ptr = interpreter->tensor(model_output_tensor)->data.f;
 
   // The first layer (SVDF) input size is 40 (speech_input_size). Each speech
-  // input frames for this model is 1280 floats, which can be fed to input in a
-  // sequence of size 32 (input_sequence_size).
+  // input frames for this model is 1600 floats, which can be fed to input in a
+  // sequence of size 40 (input_sequence_size).
   for (int i = 0; i < TestInputSize(input_frames); i++) {
     int frame_ptr = 0;
     for (int s = 0; s < input_sequence_size; s++) {
-- 
GitLab


From b836557fb1a5bcc9cc4cb57b77d017c5815dea43 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 13:00:31 -0800
Subject: [PATCH 0523/1225] Corrects documentation about what the mel
 processing expects as input.

PiperOrigin-RevId: 177629372
---
 tensorflow/core/kernels/mfcc.h                | 9 +++++----
 tensorflow/core/kernels/mfcc_mel_filterbank.h | 7 ++++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/kernels/mfcc.h b/tensorflow/core/kernels/mfcc.h
index c39f104990..0d5d9fb90f 100644
--- a/tensorflow/core/kernels/mfcc.h
+++ b/tensorflow/core/kernels/mfcc.h
@@ -33,10 +33,11 @@ class Mfcc {
   bool Initialize(int input_length,
                   double input_sample_rate);
 
-  // Input is a single magnitude spectrogram frame. The input spectrum
-  // is filtered into bands using a triangular mel filterbank and a
-  // discrete cosine transform (DCT) of the values is taken. Output is
-  // populated with the lowest dct_coefficient_count of these values.
+  // Input is a single squared-magnitude spectrogram frame. The input spectrum
+  // is converted to linear magnitude and weighted into bands using a
+  // triangular mel filterbank, and a discrete cosine transform (DCT) of the
+  // values is taken. Output is populated with the lowest dct_coefficient_count
+  // of these values.
   void Compute(const std::vector<double>& spectrogram_frame,
                std::vector<double>* output) const;
 
diff --git a/tensorflow/core/kernels/mfcc_mel_filterbank.h b/tensorflow/core/kernels/mfcc_mel_filterbank.h
index 33ea1bdb5b..a766a20cbc 100644
--- a/tensorflow/core/kernels/mfcc_mel_filterbank.h
+++ b/tensorflow/core/kernels/mfcc_mel_filterbank.h
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// Basic class for applying a mel-scale filterbank to an input.
+// Basic class for applying a mel-scale mapping to a power spectrum.
 
 #ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_MFCC_MEL_FILTERBANK_H_
 #define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_MFCC_MEL_FILTERBANK_H_
@@ -32,8 +32,9 @@ class MfccMelFilterbank {
                   double lower_frequency_limit,
                   double upper_frequency_limit);
 
-  // Takes a magnitude spectrogram slice as input, computes a
-  // traingular mel filterbank and places the result in output.
+  // Takes a squared-magnitude spectrogram slice as input, computes a
+  // triangular-mel-weighted linear-magnitude filterbank, and places the result
+  // in output.
   void Compute(const std::vector<double>& input,
                std::vector<double>* output) const;
 
-- 
GitLab


From e88855650435327899917afb6723db03a3d5469f Mon Sep 17 00:00:00 2001
From: Nick Felt <nickfelt@google.com>
Date: Fri, 1 Dec 2017 13:14:21 -0800
Subject: [PATCH 0524/1225] Remove non-exposed copy of old SummaryWriter

RELNOTES: N/A
PiperOrigin-RevId: 177631104
---
 tensorflow/python/summary/summary_iterator.py | 323 +-----------------
 1 file changed, 1 insertion(+), 322 deletions(-)

diff --git a/tensorflow/python/summary/summary_iterator.py b/tensorflow/python/summary/summary_iterator.py
index 301f560d41..6969c4cf15 100644
--- a/tensorflow/python/summary/summary_iterator.py
+++ b/tensorflow/python/summary/summary_iterator.py
@@ -13,301 +13,14 @@
 # limitations under the License.
 # ==============================================================================
 
-"""Reads Summaries from and writes Summaries to event files."""
+"""Provides a method for reading events from an event file via an iterator."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os.path
-import threading
-import time
-
-import six
-
-from tensorflow.core.framework import graph_pb2
-from tensorflow.core.framework import summary_pb2
 from tensorflow.core.util import event_pb2
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.framework import ops
 from tensorflow.python.lib.io import tf_record
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util import compat
-
-
-class SummaryWriter(object):
-  """Writes `Summary` protocol buffers to event files.
-
-  The `SummaryWriter` class provides a mechanism to create an event file in a
-  given directory and add summaries and events to it. The class updates the
-  file contents asynchronously. This allows a training program to call methods
-  to add data to the file directly from the training loop, without slowing down
-  training.
-  """
-
-  def __init__(self, logdir, graph=None, max_queue=10, flush_secs=120,
-               graph_def=None):
-    """Creates a `SummaryWriter` and an event file.
-
-    On construction the summary writer creates a new event file in `logdir`.
-    This event file will contain `Event` protocol buffers constructed when you
-    call one of the following functions: `add_summary()`, `add_session_log()`,
-    `add_event()`, or `add_graph()`.
-
-    If you pass a `Graph` to the constructor it is added to
-    the event file. (This is equivalent to calling `add_graph()` later).
-
-    TensorBoard will pick the graph from the file and display it graphically so
-    you can interactively explore the graph you built. You will usually pass
-    the graph from the session in which you launched it:
-
-    ```python
-    ...create a graph...
-    # Launch the graph in a session.
-    sess = tf.Session()
-    # Create a summary writer, add the 'graph' to the event file.
-    writer = tf.summary.FileWriter(<some-directory>, sess.graph)
-    ```
-
-    The other arguments to the constructor control the asynchronous writes to
-    the event file:
-
-    *  `flush_secs`: How often, in seconds, to flush the added summaries
-       and events to disk.
-    *  `max_queue`: Maximum number of summaries or events pending to be
-       written to disk before one of the 'add' calls block.
-
-    Args:
-      logdir: A string. Directory where event file will be written.
-      graph: A `Graph` object, such as `sess.graph`.
-      max_queue: Integer. Size of the queue for pending events and summaries.
-      flush_secs: Number. How often, in seconds, to flush the
-        pending events and summaries to disk.
-      graph_def: DEPRECATED: Use the `graph` argument instead.
-    """
-    self._logdir = logdir
-    if not gfile.IsDirectory(self._logdir):
-      gfile.MakeDirs(self._logdir)
-    self._event_queue = six.moves.queue.Queue(max_queue)
-    self._ev_writer = pywrap_tensorflow.EventsWriter(
-        compat.as_bytes(os.path.join(self._logdir, "events")))
-    self._closed = False
-    self._worker = _EventLoggerThread(self._event_queue, self._ev_writer,
-                                      flush_secs)
-    # For storing used tags for session.run() outputs.
-    self._session_run_tags = {}
-    self._worker.start()
-    if graph is not None or graph_def is not None:
-      # Calling it with both graph and graph_def for backward compatibility.
-      self.add_graph(graph=graph, graph_def=graph_def)
-
-  def get_logdir(self):
-    """Returns the directory where event file will be written."""
-    return self._logdir
-
-  def reopen(self):
-    """Reopens the summary writer.
-
-    Can be called after `close()` to add more events in the same directory.
-    The events will go into a new events file.
-
-    Does nothing if the summary writer was not closed.
-    """
-    if self._closed:
-      self._closed = False
-
-  def add_summary(self, summary, global_step=None):
-    """Adds a `Summary` protocol buffer to the event file.
-
-    This method wraps the provided summary in an `Event` protocol buffer
-    and adds it to the event file.
-
-    You can pass the result of evaluating any summary op, using
-    @{tf.Session.run} or
-    @{tf.Tensor.eval}, to this
-    function. Alternatively, you can pass a `tf.Summary` protocol
-    buffer that you populate with your own data. The latter is
-    commonly done to report evaluation results in event files.
-
-    Args:
-      summary: A `Summary` protocol buffer, optionally serialized as a string.
-      global_step: Number. Optional global step value to record with the
-        summary.
-    """
-    if isinstance(summary, bytes):
-      summ = summary_pb2.Summary()
-      summ.ParseFromString(summary)
-      summary = summ
-    event = event_pb2.Event(wall_time=time.time(), summary=summary)
-    if global_step is not None:
-      event.step = int(global_step)
-    self.add_event(event)
-
-  def add_session_log(self, session_log, global_step=None):
-    """Adds a `SessionLog` protocol buffer to the event file.
-
-    This method wraps the provided session in an `Event` protocol buffer
-    and adds it to the event file.
-
-    Args:
-      session_log: A `SessionLog` protocol buffer.
-      global_step: Number. Optional global step value to record with the
-        summary.
-    """
-    event = event_pb2.Event(wall_time=time.time(), session_log=session_log)
-    if global_step is not None:
-      event.step = int(global_step)
-    self.add_event(event)
-
-  def add_event(self, event):
-    """Adds an event to the event file.
-
-    Args:
-      event: An `Event` protocol buffer.
-    """
-    if not self._closed:
-      self._event_queue.put(event)
-
-  def _add_graph_def(self, graph_def, global_step=None):
-    graph_bytes = graph_def.SerializeToString()
-    event = event_pb2.Event(wall_time=time.time(), graph_def=graph_bytes)
-    if global_step is not None:
-      event.step = int(global_step)
-    self._event_queue.put(event)
-
-  def add_graph(self, graph, global_step=None, graph_def=None):
-    """Adds a `Graph` to the event file.
-
-    The graph described by the protocol buffer will be displayed by
-    TensorBoard. Most users pass a graph in the constructor instead.
-
-    Args:
-      graph: A `Graph` object, such as `sess.graph`.
-      global_step: Number. Optional global step counter to record with the
-        graph.
-      graph_def: DEPRECATED. Use the `graph` parameter instead.
-
-    Raises:
-      ValueError: If both graph and graph_def are passed to the method.
-    """
-
-    if graph is not None and graph_def is not None:
-      raise ValueError("Please pass only graph, or graph_def (deprecated), "
-                       "but not both.")
-
-    if isinstance(graph, ops.Graph) or isinstance(graph_def, ops.Graph):
-      # The user passed a `Graph`.
-
-      # Check if the user passed it via the graph or the graph_def argument and
-      # correct for that.
-      if not isinstance(graph, ops.Graph):
-        logging.warning("When passing a `Graph` object, please use the `graph`"
-                        " named argument instead of `graph_def`.")
-        graph = graph_def
-
-      # Serialize the graph with additional info.
-      true_graph_def = graph.as_graph_def(add_shapes=True)
-    elif (isinstance(graph, graph_pb2.GraphDef)
-          or isinstance(graph_def, graph_pb2.GraphDef)):
-      # The user passed a `GraphDef`.
-      logging.warning("Passing a `GraphDef` to the SummaryWriter is deprecated."
-                      " Pass a `Graph` object instead, such as `sess.graph`.")
-
-      # Check if the user passed it via the graph or the graph_def argument and
-      # correct for that.
-      if isinstance(graph, graph_pb2.GraphDef):
-        true_graph_def = graph
-      else:
-        true_graph_def = graph_def
-
-    else:
-      # The user passed neither `Graph`, nor `GraphDef`.
-      raise TypeError("The passed graph must be an instance of `Graph` "
-                      "or the deprecated `GraphDef`")
-    # Finally, add the graph_def to the summary writer.
-    self._add_graph_def(true_graph_def, global_step)
-
-  def add_run_metadata(self, run_metadata, tag, global_step=None):
-    """Adds a metadata information for a single session.run() call.
-
-    Args:
-      run_metadata: A `RunMetadata` protobuf object.
-      tag: The tag name for this metadata.
-      global_step: Number. Optional global step counter to record with the
-        StepStats.
-
-    Raises:
-      ValueError: If the provided tag was already used for this type of event.
-    """
-    if tag in self._session_run_tags:
-      raise ValueError("The provided tag was already used for this event type")
-    self._session_run_tags[tag] = True
-
-    tagged_metadata = event_pb2.TaggedRunMetadata()
-    tagged_metadata.tag = tag
-    # Store the `RunMetadata` object as bytes in order to have postponed
-    # (lazy) deserialization when used later.
-    tagged_metadata.run_metadata = run_metadata.SerializeToString()
-    event = event_pb2.Event(wall_time=time.time(),
-                            tagged_run_metadata=tagged_metadata)
-    if global_step is not None:
-      event.step = int(global_step)
-    self._event_queue.put(event)
-
-  def flush(self):
-    """Flushes the event file to disk.
-
-    Call this method to make sure that all pending events have been written to
-    disk.
-    """
-    self._event_queue.join()
-    self._ev_writer.Flush()
-
-  def close(self):
-    """Flushes the event file to disk and close the file.
-
-    Call this method when you do not need the summary writer anymore.
-    """
-    self.flush()
-    self._ev_writer.Close()
-    self._closed = True
-
-
-class _EventLoggerThread(threading.Thread):
-  """Thread that logs events."""
-
-  def __init__(self, queue, ev_writer, flush_secs):
-    """Creates an _EventLoggerThread.
-
-    Args:
-      queue: A Queue from which to dequeue events.
-      ev_writer: An event writer. Used to log brain events for
-       the visualizer.
-      flush_secs: How often, in seconds, to flush the
-        pending file to disk.
-    """
-    threading.Thread.__init__(self)
-    self.daemon = True
-    self._queue = queue
-    self._ev_writer = ev_writer
-    self._flush_secs = flush_secs
-    # The first event will be flushed immediately.
-    self._next_event_flush_time = 0
-
-  def run(self):
-    while True:
-      event = self._queue.get()
-      try:
-        self._ev_writer.WriteEvent(event)
-        # Flush the event writer every so often.
-        now = time.time()
-        if now > self._next_event_flush_time:
-          self._ev_writer.Flush()
-          # Do it again in two minutes.
-          self._next_event_flush_time = now + self._flush_secs
-      finally:
-        self._queue.task_done()
 
 
 def summary_iterator(path):
@@ -352,37 +65,3 @@ def summary_iterator(path):
   # pylint: enable=line-too-long
   for r in tf_record.tf_record_iterator(path):
     yield event_pb2.Event.FromString(r)
-
-
-class SummaryWriterCache(object):
-  """Cache for summary writers.
-
-  This class caches summary writers, one per directory.
-  """
-  # Cache, keyed by directory.
-  _cache = {}
-
-  # Lock protecting _SUMMARY_WRITERS.
-  _lock = threading.RLock()
-
-  @staticmethod
-  def clear():
-    """Clear cached summary writers. Currently only used for unit tests."""
-    with SummaryWriterCache._lock:
-      SummaryWriterCache._cache = {}
-
-  @staticmethod
-  def get(logdir):
-    """Returns the SummaryWriter for the specified directory.
-
-    Args:
-      logdir: str, name of the directory.
-
-    Returns:
-      A `SummaryWriter`.
-    """
-    with SummaryWriterCache._lock:
-      if logdir not in SummaryWriterCache._cache:
-        SummaryWriterCache._cache[logdir] = SummaryWriter(
-            logdir, graph=ops.get_default_graph())
-      return SummaryWriterCache._cache[logdir]
-- 
GitLab


From 71066d78d163bad92baf539cde54d167d758305e Mon Sep 17 00:00:00 2001
From: "Joshua V. Dillon" <jvdillon@google.com>
Date: Fri, 1 Dec 2017 13:36:03 -0800
Subject: [PATCH 0525/1225] Add `tf.contrib.distributions.Autoregressive`.

PiperOrigin-RevId: 177633858
---
 tensorflow/contrib/distributions/BUILD        |  13 ++
 tensorflow/contrib/distributions/__init__.py  |   2 +
 .../kernel_tests/autoregressive_test.py       |  94 ++++++++
 .../python/ops/autoregressive.py              | 208 ++++++++++++++++++
 tensorflow/python/ops/distributions/util.py   |   1 +
 5 files changed, 318 insertions(+)
 create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/autoregressive_test.py
 create mode 100644 tensorflow/contrib/distributions/python/ops/autoregressive.py

diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 145b9495ff..c5bd91484e 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -127,6 +127,19 @@ cuda_py_test(
     tags = ["no_pip"],
 )
 
+cuda_py_test(
+    name = "autoregressive_test",
+    size = "small",
+    srcs = ["python/kernel_tests/autoregressive_test.py"],
+    additional_deps = [
+        ":distributions_py",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 cuda_py_test(
     name = "binomial_test",
     size = "small",
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 0d12d83893..a8cf40c52e 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -23,6 +23,7 @@ from __future__ import print_function
 # pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member
 
 from tensorflow.contrib.distributions.python.ops import bijectors
+from tensorflow.contrib.distributions.python.ops.autoregressive import *
 from tensorflow.contrib.distributions.python.ops.binomial import *
 from tensorflow.contrib.distributions.python.ops.cauchy import *
 from tensorflow.contrib.distributions.python.ops.chi2 import *
@@ -91,6 +92,7 @@ _allowed_symbols = [
     'NOT_REPARAMETERIZED',
     'ReparameterizationType',
     'Distribution',
+    'Autoregressive',
     'Binomial',
     'Bernoulli',
     'BernoulliWithSigmoidProbs',
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/autoregressive_test.py b/tensorflow/contrib/distributions/python/kernel_tests/autoregressive_test.py
new file mode 100644
index 0000000000..b625093fb7
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/kernel_tests/autoregressive_test.py
@@ -0,0 +1,94 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.distributions.python.ops import autoregressive as autoregressive_lib
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
+from tensorflow.contrib.distributions.python.ops import test_util
+from tensorflow.contrib.distributions.python.ops.bijectors.affine import Affine
+from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import MaskedAutoregressiveFlow
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.ops.distributions import transformed_distribution as transformed_distribution_lib
+from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.platform import test
+
+
+class AutogressiveTest(test_util.VectorDistributionTestHelpers, test.TestCase):
+  """Tests the Autoregressive distribution."""
+
+  def setUp(self):
+    self._rng = np.random.RandomState(42)
+
+  def _random_scale_tril(self, event_size):
+    n = np.int32(event_size * (event_size + 1) // 2)
+    p = 2. * self._rng.random_sample(n).astype(np.float32) - 1.
+    return distribution_util.fill_triangular(0.25 * p)
+
+  def _normal_fn(self, affine_bijector):
+    def _fn(samples):
+      scale = math_ops.exp(affine_bijector.forward(samples))
+      return independent_lib.Independent(
+          normal_lib.Normal(loc=0., scale=scale, validate_args=True),
+          reinterpreted_batch_ndims=1)
+    return _fn
+
+  def testSampleAndLogProbConsistency(self):
+    batch_shape = []
+    event_size = 2
+    with self.test_session() as sess:
+      batch_event_shape = np.concatenate([batch_shape, [event_size]], axis=0)
+      sample0 = array_ops.zeros(batch_event_shape)
+      affine = Affine(scale_tril=self._random_scale_tril(event_size))
+      ar = autoregressive_lib.Autoregressive(
+          self._normal_fn(affine), sample0, validate_args=True)
+      self.run_test_sample_consistent_log_prob(
+          sess.run, ar, radius=1., center=0., rtol=0.01)
+
+  def testCompareToBijector(self):
+    """Demonstrates equivalence between TD, Bijector approach and AR dist."""
+    sample_shape = [4, 5]
+    batch_shape = []
+    event_size = 2
+    with self.test_session() as sess:
+      batch_event_shape = np.concatenate([batch_shape, [event_size]], axis=0)
+      sample0 = array_ops.zeros(batch_event_shape)
+      affine = Affine(scale_tril=self._random_scale_tril(event_size))
+      ar = autoregressive_lib.Autoregressive(
+          self._normal_fn(affine), sample0, validate_args=True)
+      ar_flow = MaskedAutoregressiveFlow(
+          is_constant_jacobian=True,
+          shift_and_log_scale_fn=lambda x: [None, affine.forward(x)],
+          validate_args=True)
+      td = transformed_distribution_lib.TransformedDistribution(
+          distribution=normal_lib.Normal(loc=0., scale=1.),
+          bijector=ar_flow,
+          event_shape=[event_size],
+          batch_shape=batch_shape,
+          validate_args=True)
+      x_shape = np.concatenate(
+          [sample_shape, batch_shape, [event_size]], axis=0)
+      x = 2. * self._rng.random_sample(x_shape).astype(np.float32) - 1.
+      td_log_prob_, ar_log_prob_ = sess.run([td.log_prob(x), ar.log_prob(x)])
+      self.assertAllClose(td_log_prob_, ar_log_prob_, atol=0., rtol=1e-6)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/autoregressive.py b/tensorflow/contrib/distributions/python/ops/autoregressive.py
new file mode 100644
index 0000000000..852298bf33
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/ops/autoregressive.py
@@ -0,0 +1,208 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The Autoregressive distribution."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops.distributions import distribution as distribution_lib
+from tensorflow.python.ops.distributions import util as distribution_util
+
+
+class Autoregressive(distribution_lib.Distribution):
+  """Autoregressive distributions.
+
+  The Autoregressive distribution enables learning (often) richer multivariate
+  distributions by repeatedly applying a [diffeomorphic](
+  https://en.wikipedia.org/wiki/Diffeomorphism) transformation (such as
+  implemented by `Bijector`s). Regarding terminology,
+
+    "Autoregressive models decompose the joint density as a product of
+    conditionals, and model each conditional in turn. Normalizing flows
+    transform a base density (e.g. a standard Gaussian) into the target density
+    by an invertible transformation with tractable Jacobian." [1]
+
+  In other words, the "autoregressive property" is equivalent to the
+  decomposition, `p(x) = prod{ p(x[i] | x[0:i]) : i=0, ..., d }`. The provided
+  `shift_and_log_scale_fn`, `masked_autoregressive_default_template`, achieves
+  this property by zeroing out weights in its `masked_dense` layers.
+
+  Practically speaking the autoregressive property means that there exists a
+  permutation of the event coordinates such that each coordinate is a
+  diffeomorphic function of only preceding coordinates. [2]
+
+  #### Mathematical Details
+
+  The probability function is,
+
+  ```none
+  prob(x; fn, n) = fn(x).prob(x)
+  ```
+
+  And a sample is generated by,
+
+  ```none
+  x = fn(...fn(fn(x0).sample()).sample()).sample()
+  ```
+
+  where the ellipses (`...`) represent `n-2` composed calls to `fn`, `fn`
+  constructs a `tf.distributions.Distribution`-like instance, and `x0` is a
+  fixed initializing `Tensor`.
+
+  #### Examples
+
+  ```python
+  tfd = tf.contrib.distributions
+
+  def normal_fn(self, event_size):
+    n = event_size * (event_size + 1) / 2
+    p = tf.Variable(tfd.Normal(loc=0., scale=1.).sample(n))
+    affine = tfd.bijectors.Affine(
+        scale_tril=tfd.fill_triangular(0.25 * p))
+    def _fn(samples):
+      scale = math_ops.exp(affine.forward(samples)).eval()
+      return independent_lib.Independent(
+          normal_lib.Normal(loc=0., scale=scale, validate_args=True),
+          reinterpreted_batch_ndims=1)
+    return _fn
+
+  batch_and_event_shape = [3, 2, 4]
+  sample0 = array_ops.zeros(batch_and_event_shape)
+  ar = autoregressive_lib.Autoregressive(
+      self._normal_fn(batch_and_event_shape[-1]), sample0)
+  x = ar.sample([6, 5])
+  # ==> x.shape = [6, 5, 3, 2, 4]
+  prob_x = ar.prob(x)
+  # ==> x.shape = [6, 5, 3, 2]
+
+  ```
+
+  [1]: "Masked Autoregressive Flow for Density Estimation."
+       George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017.
+       https://arxiv.org/abs/1705.07057
+
+  [2]: "Conditional Image Generation with PixelCNN Decoders."
+       Aaron van den Oord, Nal Kalchbrenner, Oriol Vinyals, Lasse Espeholt, Alex
+       Graves, Koray Kavukcuoglu. Arxiv, 2016.
+       https://arxiv.org/abs/1606.05328
+  """
+
+  def __init__(self,
+               distribution_fn,
+               sample0=None,
+               num_steps=None,
+               validate_args=False,
+               allow_nan_stats=True,
+               name="Autoregressive"):
+    """Construct an `Autoregressive` distribution.
+
+    Args:
+      distribution_fn: Python `callable` which constructs a
+        `tf.distributions.Distribution`-like instance from a `Tensor` (e.g.,
+        `sample0`). The function must respect the "autoregressive property",
+        i.e., there exists a permutation of event such that each coordinate is a
+        diffeomorphic function of on preceding coordinates.
+      sample0: Initial input to `distribution_fn`; used to
+        build the distribution in `__init__` which in turn specifies this
+        distribution's properties, e.g., `event_shape`, `batch_shape`, `dtype`.
+        If unspecified, then `distribution_fn` should be default constructable.
+      num_steps: Number of times `distribution_fn` is composed from samples,
+        e.g., `num_steps=2` implies
+        `distribution_fn(distribution_fn(sample0).sample(n)).sample()`.
+      validate_args: Python `bool`.  Whether to validate input with asserts.
+        If `validate_args` is `False`, and the inputs are invalid,
+        correct behavior is not guaranteed.
+      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
+        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
+        result is undefined. When `False`, an exception is raised if one or
+        more of the statistic's batch members are undefined.
+      name: Python `str` name prefixed to Ops created by this class.
+        Default value: "Autoregressive".
+
+    Raises:
+      ValueError: if `num_steps` and
+        `distribution_fn(sample0).event_shape.num_elements()` are both `None`.
+      ValueError: if `num_steps < 1`.
+    """
+    parameters = locals()
+    with ops.name_scope(name):
+      self._distribution_fn = distribution_fn
+      self._sample0 = sample0
+      self._distribution0 = (distribution_fn() if sample0 is None
+                             else distribution_fn(sample0))
+      if num_steps is None:
+        num_steps = self._distribution0.event_shape.num_elements()
+        if num_steps is None:
+          raise ValueError("distribution_fn must generate a distribution "
+                           "with fully known `event_shape`.")
+      if num_steps < 1:
+        raise ValueError("num_steps ({}) must be at least 1.".format(num_steps))
+      self._num_steps = num_steps
+    super(Autoregressive, self).__init__(
+        dtype=self._distribution0.dtype,
+        reparameterization_type=self._distribution0.reparameterization_type,
+        validate_args=validate_args,
+        allow_nan_stats=allow_nan_stats,
+        parameters=parameters,
+        graph_parents=self._distribution0._graph_parents,  # pylint: disable=protected-access
+        name=name)
+
+  @property
+  def distribution_fn(self):
+    return self._distribution_fn
+
+  @property
+  def sample0(self):
+    return self._sample0
+
+  @property
+  def num_steps(self):
+    return self._num_steps
+
+  @property
+  def distribution0(self):
+    return self._distribution0
+
+  def _batch_shape(self):
+    return self.distribution0.batch_shape
+
+  def _batch_shape_tensor(self):
+    return self.distribution0.batch_shape_tensor()
+
+  def _event_shape(self):
+    return self.distribution0.event_shape
+
+  def _event_shape_tensor(self):
+    return self.distribution0.event_shape_tensor()
+
+  def _sample_n(self, n, seed=None):
+    if seed is None:
+      seed = distribution_util.gen_new_seed(
+          seed=np.random.randint(2**32 - 1),
+          salt="autoregressive")
+    samples = self.distribution0.sample(n, seed=seed)
+    for _ in range(self._num_steps):
+      samples = self.distribution_fn(samples).sample(seed=seed)
+    return samples
+
+  def _log_prob(self, value):
+    return self.distribution_fn(value).log_prob(value)
+
+  def _prob(self, value):
+    return self.distribution_fn(value).prob(value)
diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py
index 41b86f7940..28c74bf981 100644
--- a/tensorflow/python/ops/distributions/util.py
+++ b/tensorflow/python/ops/distributions/util.py
@@ -751,6 +751,7 @@ def fill_triangular(x, upper=False, name=None):
   """
 
   with ops.name_scope(name, "fill_triangular", values=[x]):
+    x = ops.convert_to_tensor(x, name="x")
     if x.shape.with_rank_at_least(1)[-1].value is not None:
       # Formula derived by solving for n: m = n(n+1)/2.
       m = np.int32(x.shape[-1].value)
-- 
GitLab


From ae10f63e2fc76faf5835a660043c328d891c41f0 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 1 Dec 2017 13:37:01 -0800
Subject: [PATCH 0526/1225] Increase tolerance in conv_ops_test to avoid
 flakes.

PiperOrigin-RevId: 177633993
---
 tensorflow/python/kernel_tests/conv_ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index bf7245a2ae..a85134c288 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -311,7 +311,7 @@ class Conv2DTest(test.TestCase):
           print("expected = ", e_value)
           print("actual = ", c_value)
           self.assertAllClose(
-              e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-6)
+              e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4)
 
   def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, strides, padding,
                     expected):
-- 
GitLab


From ed9163acfd510c26c49201ec9e360e20a2625ca8 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Fri, 1 Dec 2017 13:56:10 -0800
Subject: [PATCH 0527/1225] TF Eager: Add SPINN model example for
 dynamic/recursive NN.

PiperOrigin-RevId: 177636427
---
 tensorflow/contrib/eager/README.md            |   3 +
 .../contrib/eager/python/examples/BUILD       |   1 +
 .../contrib/eager/python/examples/spinn/BUILD |  41 +
 .../eager/python/examples/spinn/README.md     |  13 +
 .../eager/python/examples/spinn/data.py       | 350 +++++++++
 .../eager/python/examples/spinn/data_test.py  | 243 ++++++
 .../eager/python/examples/spinn/spinn_test.py | 409 ++++++++++
 third_party/examples/eager/spinn/BUILD        |  14 +
 third_party/examples/eager/spinn/LICENSE      |  29 +
 third_party/examples/eager/spinn/README.md    |  54 ++
 third_party/examples/eager/spinn/spinn.py     | 732 ++++++++++++++++++
 11 files changed, 1889 insertions(+)
 create mode 100644 tensorflow/contrib/eager/python/examples/spinn/BUILD
 create mode 100644 tensorflow/contrib/eager/python/examples/spinn/README.md
 create mode 100644 tensorflow/contrib/eager/python/examples/spinn/data.py
 create mode 100644 tensorflow/contrib/eager/python/examples/spinn/data_test.py
 create mode 100644 tensorflow/contrib/eager/python/examples/spinn/spinn_test.py
 create mode 100644 third_party/examples/eager/spinn/BUILD
 create mode 100644 third_party/examples/eager/spinn/LICENSE
 create mode 100644 third_party/examples/eager/spinn/README.md
 create mode 100644 third_party/examples/eager/spinn/spinn.py

diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md
index dcc370cd00..09242ee47d 100644
--- a/tensorflow/contrib/eager/README.md
+++ b/tensorflow/contrib/eager/README.md
@@ -76,3 +76,6 @@ For an introduction to eager execution in TensorFlow, see:
 ## Changelog
 
 - 2017/10/31: Initial preview release.
+- 2017/12/01: Example of dynamic neural network:
+  [SPINN: Stack-augmented Parser-Interpreter Neural Network](https://arxiv.org/abs/1603.06021).
+  See [README.md](python/examples/spinn/README.md) for details.
diff --git a/tensorflow/contrib/eager/python/examples/BUILD b/tensorflow/contrib/eager/python/examples/BUILD
index aa21a6ab99..6aef010a21 100644
--- a/tensorflow/contrib/eager/python/examples/BUILD
+++ b/tensorflow/contrib/eager/python/examples/BUILD
@@ -11,5 +11,6 @@ py_library(
         "//tensorflow/contrib/eager/python/examples/resnet50",
         "//tensorflow/contrib/eager/python/examples/rnn_colorbot",
         "//tensorflow/contrib/eager/python/examples/rnn_ptb",
+        "//tensorflow/contrib/eager/python/examples/spinn:data",
     ],
 )
diff --git a/tensorflow/contrib/eager/python/examples/spinn/BUILD b/tensorflow/contrib/eager/python/examples/spinn/BUILD
new file mode 100644
index 0000000000..0263d21325
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/spinn/BUILD
@@ -0,0 +1,41 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//tensorflow:internal"])
+
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+py_library(
+    name = "data",
+    srcs = ["data.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = ["//third_party/py/numpy"],
+)
+
+py_test(
+    name = "data_test",
+    size = "small",
+    srcs = ["data_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":data",
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
+cuda_py_test(
+    name = "spinn_test",
+    size = "medium",
+    srcs = ["spinn_test.py"],
+    additional_deps = [
+        ":data",
+        "//third_party/examples/eager/spinn",
+        "//third_party/py/numpy",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/summary:summary_test_util",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
diff --git a/tensorflow/contrib/eager/python/examples/spinn/README.md b/tensorflow/contrib/eager/python/examples/spinn/README.md
new file mode 100644
index 0000000000..eb0637df47
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/spinn/README.md
@@ -0,0 +1,13 @@
+# SPINN: Dynamic neural network with TensorFlow eager execution
+
+This directory contains files supporting the
+[spinn.py model in third_party/examples/eager/spinn/](../../../../../../third_party/examples/eager/spinn/spinn.py),
+including
+
+- `data.py`: Utility library for loading and preprocessing the SNLI and GloVe
+  data.
+- `data_test.py` and `spinn_test.py`: Unit tests for the data and model modules.
+
+See the [README.md in third_party/examples/eager/spinn/](../../../../../../third_party/examples/eager/spinn/README.md)
+for detailed background, license and usage information regarding the SPINN code.
+
diff --git a/tensorflow/contrib/eager/python/examples/spinn/data.py b/tensorflow/contrib/eager/python/examples/spinn/data.py
new file mode 100644
index 0000000000..a6e046320f
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/spinn/data.py
@@ -0,0 +1,350 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities of SNLI data and GloVe word vectors for SPINN model.
+
+See more details about the SNLI data set at:
+  https://nlp.stanford.edu/projects/snli/
+
+See more details about the GloVe pretrained word embeddings at:
+  https://nlp.stanford.edu/projects/glove/
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import glob
+import math
+import os
+import random
+
+import numpy as np
+
+POSSIBLE_LABELS = ("entailment", "contradiction", "neutral")
+
+UNK_CODE = 0   # Code for unknown word tokens.
+PAD_CODE = 1   # Code for padding tokens.
+
+SHIFT_CODE = 3
+REDUCE_CODE = 2
+
+WORD_VECTOR_LEN = 300  # Embedding dimensions.
+
+LEFT_PAREN = "("
+RIGHT_PAREN = ")"
+PARENTHESES = (LEFT_PAREN, RIGHT_PAREN)
+
+
+def get_non_parenthesis_words(items):
+  """Get the non-parenthesis items from a SNLI parsed sentence.
+
+  Args:
+    items: Data items from a parsed SNLI setence, with parentheses. E.g.,
+      ["(", "Man", "(", "(", "(", "(", "(", "wearing", "pass", ")", ...
+
+  Returns:
+    A list of non-parenthis word items, all converted to lower case. E.g.,
+      ["man", "wearing", "pass", ...
+  """
+  return [x.lower() for x in items if x not in PARENTHESES and x]
+
+
+def get_shift_reduce(items):
+  """Obtain shift-reduce vector from a list of items from the SNLI data.
+
+  Args:
+    items: Data items as a list of str, e.g.,
+       ["(", "Man", "(", "(", "(", "(", "(", "wearing", "pass", ")", ...
+
+  Returns:
+    A list of shift-reduce transitions, encoded as `SHIFT_CODE` for shift and
+      `REDUCE_CODE` for reduce. See code above for the values of `SHIFT_CODE`
+      and `REDUCE_CODE`.
+  """
+  trans = []
+  for item in items:
+    if item == LEFT_PAREN:
+      continue
+    elif item == RIGHT_PAREN:
+      trans.append(REDUCE_CODE)
+    else:
+      trans.append(SHIFT_CODE)
+  return trans
+
+
+def pad_and_reverse_word_ids(sentences):
+  """Pad a list of sentences to the common maximum length + 1.
+
+  Args:
+    sentences: A list of sentences as a list of list of integers. Each integer
+      is a word ID. Each list of integer corresponds to one sentence.
+
+  Returns:
+    A numpy.ndarray of shape (num_sentences, max_length + 1), wherein max_length
+      is the maximum sentence length (in # of words). Each sentence is reversed
+      and then padded with an extra one at head, as required by the model.
+  """
+  max_len = max(len(sent) for sent in sentences)
+  for sent in sentences:
+    if len(sent) < max_len:
+      sent.extend([PAD_CODE] * (max_len - len(sent)))
+  # Reverse in time order and pad an extra one.
+  sentences = np.fliplr(np.array(sentences, dtype=np.int64))
+  sentences = np.concatenate(
+      [np.ones([sentences.shape[0], 1], dtype=np.int64), sentences], axis=1)
+  return sentences
+
+
+def pad_transitions(sentences_transitions):
+  """Pad a list of shift-reduce transitions to the maximum length."""
+  max_len = max(len(transitions) for transitions in sentences_transitions)
+  for transitions in sentences_transitions:
+    if len(transitions) < max_len:
+      transitions.extend([PAD_CODE] * (max_len - len(transitions)))
+  return np.array(sentences_transitions, dtype=np.int64)
+
+
+def load_vocabulary(data_root):
+  """Load vocabulary from SNLI data files.
+
+  Args:
+    data_root: Root directory of the data. It is assumed that the SNLI data
+      files have been downloaded and extracted to the "snli/snli_1.0"
+      subdirectory of it.
+
+  Returns:
+    Vocabulary as a set of strings.
+
+  Raises:
+    ValueError: If SNLI data files cannot be found.
+  """
+  snli_path = os.path.join(data_root, "snli")
+  snli_glob_pattern = os.path.join(snli_path, "snli_1.0/snli_1.0_*.txt")
+  file_names = glob.glob(snli_glob_pattern)
+  if not file_names:
+    raise ValueError(
+        "Cannot find SNLI data files at %s. "
+        "Please download and extract SNLI data first." % snli_glob_pattern)
+
+  print("Loading vocabulary...")
+  vocab = set()
+  for file_name in file_names:
+    with open(os.path.join(snli_path, file_name), "rt") as f:
+      for i, line in enumerate(f):
+        if i == 0:
+          continue
+        items = line.split("\t")
+        premise_words = get_non_parenthesis_words(items[1].split(" "))
+        hypothesis_words = get_non_parenthesis_words(items[2].split(" "))
+        vocab.update(premise_words)
+        vocab.update(hypothesis_words)
+  return vocab
+
+
+def load_word_vectors(data_root, vocab):
+  """Load GloVe word vectors for words present in the vocabulary.
+
+  Args:
+    data_root: Data root directory. It is assumed that the GloVe file
+     has been downloaded and extracted at the "glove/" subdirectory of it.
+    vocab: A `set` of words, representing the vocabulary.
+
+  Returns:
+    1. word2index: A dict from lower-case word to row index in the embedding
+       matrix, i.e, `embed` below.
+    2. embed: The embedding matrix as a float32 numpy array. Its shape is
+       [vocabulary_size, WORD_VECTOR_LEN]. vocabulary_size is len(vocab).
+       WORD_VECTOR_LEN is the embedding dimension (300).
+
+  Raises:
+    ValueError: If GloVe embedding file cannot be found.
+  """
+  glove_path = os.path.join(data_root, "glove/glove.42B.300d.txt")
+  if not os.path.isfile(glove_path):
+    raise ValueError(
+        "Cannot find GloVe embedding file at %s. "
+        "Please download and extract GloVe embeddings first." % glove_path)
+
+  print("Loading word vectors...")
+
+  word2index = dict()
+  embed = []
+
+  embed.append([0] * WORD_VECTOR_LEN)  # <unk>
+  embed.append([0] * WORD_VECTOR_LEN)  # <pad>
+  word2index["<unk>"] = UNK_CODE
+  word2index["<pad>"] = PAD_CODE
+
+  with open(glove_path, "rt") as f:
+    for line in f:
+      items = line.split(" ")
+      word = items[0]
+      if word in vocab and word not in word2index:
+        word2index[word] = len(embed)
+        vector = np.array([float(item) for item in items[1:]])
+        assert (WORD_VECTOR_LEN,) == vector.shape
+        embed.append(vector)
+  embed = np.array(embed, dtype=np.float32)
+  return word2index, embed
+
+
+def calculate_bins(length2count, min_bin_size):
+  """Cacluate bin boundaries given a histogram of lengths and mininum bin size.
+
+  Args:
+    length2count: A `dict` mapping length to sentence count.
+    min_bin_size: Minimum bin size in terms of total number of sentence pairs
+      in the bin.
+
+  Returns:
+    A `list` representing the right bin boundaries, starting from the inclusive
+    right boundary of the first bin. For example, if the output is
+      [10, 20, 35],
+    it means there are three bins: [1, 10], [11, 20] and [21, 35].
+  """
+  bounds = []
+  lengths = sorted(length2count.keys())
+  cum_count = 0
+  for length in lengths:
+    cum_count += length2count[length]
+    if cum_count >= min_bin_size:
+      bounds.append(length)
+      cum_count = 0
+  if bounds[-1] != lengths[-1]:
+    bounds.append(lengths[-1])
+  return bounds
+
+
+class SnliData(object):
+  """A split of SNLI data."""
+
+  def __init__(self, data_file, word2index, sentence_len_limit=-1):
+    """SnliData constructor.
+
+    Args:
+      data_file: Full path to the data file, e.g.,
+        "/tmp/spinn-data/snli/snli_1.0/snli_1.0.train.txt"
+      word2index: A dict from lower-case word to row index in the embedding
+        matrix (see `load_word_vectors()` for details).
+      sentence_len_limit: Maximum allowed sentence length (# of words).
+        A value of <= 0 means unlimited. Sentences longer than this limit
+        are currently discarded, not truncated.
+    """
+
+    self._labels = []
+    self._premises = []
+    self._premise_transitions = []
+    self._hypotheses = []
+    self._hypothesis_transitions = []
+
+    with open(data_file, "rt") as f:
+      for i, line in enumerate(f):
+        if i == 0:
+          # Skip header line.
+          continue
+        items = line.split("\t")
+        if items[0] not in POSSIBLE_LABELS:
+          continue
+
+        premise_items = items[1].split(" ")
+        hypothesis_items = items[2].split(" ")
+        premise_words = get_non_parenthesis_words(premise_items)
+        hypothesis_words = get_non_parenthesis_words(hypothesis_items)
+
+        if (sentence_len_limit > 0 and
+            (len(premise_words) > sentence_len_limit or
+             len(hypothesis_words) > sentence_len_limit)):
+          # TODO(cais): Maybe truncate; do not discard.
+          continue
+
+        premise_ids = [
+            word2index.get(word, UNK_CODE) for word in premise_words]
+        hypothesis_ids = [
+            word2index.get(word, UNK_CODE) for word in hypothesis_words]
+
+        self._premises.append(premise_ids)
+        self._hypotheses.append(hypothesis_ids)
+        self._premise_transitions.append(get_shift_reduce(premise_items))
+        self._hypothesis_transitions.append(get_shift_reduce(hypothesis_items))
+        assert (len(self._premise_transitions[-1]) ==
+                2 * len(premise_words) - 1)
+        assert (len(self._hypothesis_transitions[-1]) ==
+                2 * len(hypothesis_words) - 1)
+
+        self._labels.append(POSSIBLE_LABELS.index(items[0]) + 1)
+
+    assert len(self._labels) == len(self._premises)
+    assert len(self._labels) == len(self._hypotheses)
+    assert len(self._labels) == len(self._premise_transitions)
+    assert len(self._labels) == len(self._hypothesis_transitions)
+
+  def num_batches(self, batch_size):
+    """Calculate number of batches given batch size."""
+    return int(math.ceil(len(self._labels) / batch_size))
+
+  def get_generator(self, batch_size):
+    """Obtain a generator for batched data.
+
+    All examples of this SnliData object are randomly shuffled, sorted
+    according to the maximum sentence length of the premise and hypothesis
+    sentences in the pair, and batched.
+
+    Args:
+      batch_size: Desired batch size.
+
+    Returns:
+      A generator for data batches. The generator yields a 5-tuple:
+        label: An array of the shape (batch_size,).
+        premise: An array of the shape (max_premise_len, batch_size), wherein
+          max_premise_len is the maximum length of the (padded) premise
+          sentence in the batch.
+        premise_transitions: An array of the shape (2 * max_premise_len -3,
+          batch_size).
+        hypothesis: Same as `premise`, but for hypothesis sentences.
+        hypothesis_transitions: Same as `premise_transitions`, but for
+          hypothesis sentences.
+      All the elements of the 5-tuple have dtype `int64`.
+    """
+    # Randomly shuffle examples.
+    zipped = list(zip(
+        self._labels, self._premises, self._premise_transitions,
+        self._hypotheses, self._hypothesis_transitions))
+    random.shuffle(zipped)
+    # Then sort the examples by maximum of the premise and hypothesis sentence
+    # lengths in the pair. During training, the batches are expected to be
+    # shuffled. So it is okay to leave them sorted by max length here.
+    (labels, premises, premise_transitions, hypotheses,
+     hypothesis_transitions) = zip(
+         *sorted(zipped, key=lambda x: max(len(x[1]), len(x[3]))))
+
+    def _generator():
+      begin = 0
+      while begin < len(labels):
+        # The sorting above and the batching here makes sure that sentences of
+        # similar max lengths are batched together, minimizing the inefficiency
+        # due to uneven max lengths. The sentences are batched differently in
+        # each call to get_generator() due to the shuffling before sotring
+        # above. The pad_and_reverse_word_ids() and pad_transitions() functions
+        # take care of any remaning unevenness of the max sentence lengths.
+        end = min(begin + batch_size, len(labels))
+        # Transpose, because the SPINN model requires time-major, instead of
+        # batch-major.
+        yield (labels[begin:end],
+               pad_and_reverse_word_ids(premises[begin:end]).T,
+               pad_transitions(premise_transitions[begin:end]).T,
+               pad_and_reverse_word_ids(hypotheses[begin:end]).T,
+               pad_transitions(hypothesis_transitions[begin:end]).T)
+        begin = end
+    return _generator
diff --git a/tensorflow/contrib/eager/python/examples/spinn/data_test.py b/tensorflow/contrib/eager/python/examples/spinn/data_test.py
new file mode 100644
index 0000000000..e4f0b37c50
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/spinn/data_test.py
@@ -0,0 +1,243 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unit tests for SPINN data module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+import tempfile
+
+import tensorflow as tf
+
+from tensorflow.contrib.eager.python.examples.spinn import data
+
+
+class DataTest(tf.test.TestCase):
+
+  def setUp(self):
+    super(DataTest, self).setUp()
+    self._temp_data_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    shutil.rmtree(self._temp_data_dir)
+    super(DataTest, self).tearDown()
+
+  def testGenNonParenthesisWords(self):
+    seq_with_parse = (
+        "( Man ( ( ( ( ( wearing pass ) ( on ( a lanyard ) ) ) and "
+        ") ( standing ( in ( ( a crowd ) ( of people ) ) ) ) ) . ) )")
+    self.assertEqual(
+        ["man", "wearing", "pass", "on", "a", "lanyard", "and", "standing",
+         "in", "a", "crowd", "of", "people", "."],
+        data.get_non_parenthesis_words(seq_with_parse.split(" ")))
+
+  def testGetShiftReduce(self):
+    seq_with_parse = (
+        "( Man ( ( ( ( ( wearing pass ) ( on ( a lanyard ) ) ) and "
+        ") ( standing ( in ( ( a crowd ) ( of people ) ) ) ) ) . ) )")
+    self.assertEqual(
+        [3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 2, 3, 3, 3, 3, 2, 3, 3, 2, 2, 2, 2, 2,
+         3, 2, 2], data.get_shift_reduce(seq_with_parse.split(" ")))
+
+  def testPadAndReverseWordIds(self):
+    id_sequences = [[0, 2, 3, 4, 5],
+                    [6, 7, 8],
+                    [9, 10, 11, 12, 13, 14, 15, 16]]
+    self.assertAllClose(
+        [[1, 1, 1, 1, 5, 4, 3, 2, 0],
+         [1, 1, 1, 1, 1, 1, 8, 7, 6],
+         [1, 16, 15, 14, 13, 12, 11, 10, 9]],
+        data.pad_and_reverse_word_ids(id_sequences))
+
+  def testPadTransitions(self):
+    unpadded = [[3, 3, 3, 2, 2, 2, 2],
+                [3, 3, 2, 2, 2]]
+    self.assertAllClose(
+        [[3, 3, 3, 2, 2, 2, 2],
+         [3, 3, 2, 2, 2, 1, 1]],
+        data.pad_transitions(unpadded))
+
+  def testCalculateBins(self):
+    length2count = {
+        1: 10,
+        2: 15,
+        3: 25,
+        4: 40,
+        5: 35,
+        6: 10}
+    self.assertEqual([2, 3, 4, 5, 6],
+                     data.calculate_bins(length2count, 20))
+    self.assertEqual([3, 4, 6], data.calculate_bins(length2count, 40))
+    self.assertEqual([4, 6], data.calculate_bins(length2count, 60))
+
+  def testLoadVoacbulary(self):
+    snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0")
+    fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt")
+    fake_dev_file = os.path.join(snli_1_0_dir, "snli_1.0_dev.txt")
+    os.makedirs(snli_1_0_dir)
+
+    with open(fake_train_file, "wt") as f:
+      f.write("gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t"
+              "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t"
+              "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n")
+      f.write("neutral\t( ( Foo bar ) . )\t( ( foo baz ) . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+    with open(fake_dev_file, "wt") as f:
+      f.write("gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t"
+              "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t"
+              "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n")
+      f.write("neutral\t( ( Quux quuz ) ? )\t( ( Corge grault ) ! )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Quux quuz?\t.Corge grault!\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+
+    vocab = data.load_vocabulary(self._temp_data_dir)
+    self.assertSetEqual(
+        {".", "?", "!", "foo", "bar", "baz", "quux", "quuz", "corge", "grault"},
+        vocab)
+
+  def testLoadVoacbularyWithoutFileRaisesError(self):
+    with self.assertRaisesRegexp(ValueError, "Cannot find SNLI data files at"):
+      data.load_vocabulary(self._temp_data_dir)
+
+    os.makedirs(os.path.join(self._temp_data_dir, "snli"))
+    with self.assertRaisesRegexp(ValueError, "Cannot find SNLI data files at"):
+      data.load_vocabulary(self._temp_data_dir)
+
+    os.makedirs(os.path.join(self._temp_data_dir, "snli/snli_1.0"))
+    with self.assertRaisesRegexp(ValueError, "Cannot find SNLI data files at"):
+      data.load_vocabulary(self._temp_data_dir)
+
+  def testLoadWordVectors(self):
+    glove_dir = os.path.join(self._temp_data_dir, "glove")
+    os.makedirs(glove_dir)
+    glove_file = os.path.join(glove_dir, "glove.42B.300d.txt")
+
+    words = [".", ",", "foo", "bar", "baz"]
+    with open(glove_file, "wt") as f:
+      for i, word in enumerate(words):
+        f.write("%s " % word)
+        for j in range(data.WORD_VECTOR_LEN):
+          f.write("%.5f" % (i * 0.1))
+          if j < data.WORD_VECTOR_LEN - 1:
+            f.write(" ")
+          else:
+            f.write("\n")
+
+    vocab = {"foo", "bar", "baz", "qux", "."}
+    # Notice that "qux" is not present in `words`.
+    word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab)
+
+    self.assertEqual(6, len(word2index))
+    self.assertEqual(0, word2index["<unk>"])
+    self.assertEqual(1, word2index["<pad>"])
+    self.assertEqual(2, word2index["."])
+    self.assertEqual(3, word2index["foo"])
+    self.assertEqual(4, word2index["bar"])
+    self.assertEqual(5, word2index["baz"])
+    self.assertEqual((6, data.WORD_VECTOR_LEN), embed.shape)
+    self.assertAllClose([0.0] * data.WORD_VECTOR_LEN, embed[0, :])
+    self.assertAllClose([0.0] * data.WORD_VECTOR_LEN, embed[1, :])
+    self.assertAllClose([0.0] * data.WORD_VECTOR_LEN, embed[2, :])
+    self.assertAllClose([0.2] * data.WORD_VECTOR_LEN, embed[3, :])
+    self.assertAllClose([0.3] * data.WORD_VECTOR_LEN, embed[4, :])
+    self.assertAllClose([0.4] * data.WORD_VECTOR_LEN, embed[5, :])
+
+  def testLoadWordVectorsWithoutFileRaisesError(self):
+    vocab = {"foo", "bar", "baz", "qux", "."}
+    with self.assertRaisesRegexp(
+        ValueError, "Cannot find GloVe embedding file at"):
+      data.load_word_vectors(self._temp_data_dir, vocab)
+
+    os.makedirs(os.path.join(self._temp_data_dir, "glove"))
+    with self.assertRaisesRegexp(
+        ValueError, "Cannot find GloVe embedding file at"):
+      data.load_word_vectors(self._temp_data_dir, vocab)
+
+  def testSnliData(self):
+    """Unit test for SnliData objects."""
+    snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0")
+    fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt")
+    os.makedirs(snli_1_0_dir)
+
+    # Four sentences in total.
+    with open(fake_train_file, "wt") as f:
+      f.write("gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t"
+              "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t"
+              "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n")
+      f.write("neutral\t( ( Foo bar ) . )\t( ( foo . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("contradiction\t( ( Bar foo ) . )\t( ( baz . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("entailment\t( ( Quux quuz ) . )\t( ( grault . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("entailment\t( ( Quuz quux ) . )\t( ( garply . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+
+    glove_dir = os.path.join(self._temp_data_dir, "glove")
+    os.makedirs(glove_dir)
+    glove_file = os.path.join(glove_dir, "glove.42B.300d.txt")
+
+    words = [".", "foo", "bar", "baz", "quux", "quuz", "grault", "garply"]
+    with open(glove_file, "wt") as f:
+      for i, word in enumerate(words):
+        f.write("%s " % word)
+        for j in range(data.WORD_VECTOR_LEN):
+          f.write("%.5f" % (i * 0.1))
+          if j < data.WORD_VECTOR_LEN - 1:
+            f.write(" ")
+          else:
+            f.write("\n")
+
+    vocab = data.load_vocabulary(self._temp_data_dir)
+    word2index, _ = data.load_word_vectors(self._temp_data_dir, vocab)
+
+    train_data = data.SnliData(fake_train_file, word2index)
+    self.assertEqual(4, train_data.num_batches(1))
+    self.assertEqual(2, train_data.num_batches(2))
+    self.assertEqual(2, train_data.num_batches(3))
+    self.assertEqual(1, train_data.num_batches(4))
+
+    generator = train_data.get_generator(2)()
+    for i in range(2):
+      label, prem, prem_trans, hypo, hypo_trans = next(generator)
+      self.assertEqual(2, len(label))
+      self.assertEqual((4, 2), prem.shape)
+      self.assertEqual((5, 2), prem_trans.shape)
+      self.assertEqual((3, 2), hypo.shape)
+      self.assertEqual((3, 2), hypo_trans.shape)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py
new file mode 100644
index 0000000000..84e25cf81a
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py
@@ -0,0 +1,409 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import gc
+import glob
+import os
+import shutil
+import tempfile
+import time
+
+import numpy as np
+import tensorflow as tf
+
+# pylint: disable=g-bad-import-order
+import tensorflow.contrib.eager as tfe
+from tensorflow.contrib.eager.python.examples.spinn import data
+from third_party.examples.eager.spinn import spinn
+from tensorflow.contrib.summary import summary_test_util
+from tensorflow.python.eager import test
+from tensorflow.python.framework import test_util
+# pylint: enable=g-bad-import-order
+
+
+def _generate_synthetic_snli_data_batch(sequence_length,
+                                        batch_size,
+                                        vocab_size):
+  """Generate a fake batch of SNLI data for testing."""
+  with tf.device("cpu:0"):
+    labels = tf.random_uniform([batch_size], minval=1, maxval=4, dtype=tf.int64)
+    prem = tf.random_uniform(
+        (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64)
+    prem_trans = tf.constant(np.array(
+        [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3,
+          2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2,
+          3, 2, 2]] * batch_size, dtype=np.int64).T)
+    hypo = tf.random_uniform(
+        (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64)
+    hypo_trans = tf.constant(np.array(
+        [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3,
+          2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2,
+          3, 2, 2]] * batch_size, dtype=np.int64).T)
+  if tfe.num_gpus():
+    labels = labels.gpu()
+    prem = prem.gpu()
+    prem_trans = prem_trans.gpu()
+    hypo = hypo.gpu()
+    hypo_trans = hypo_trans.gpu()
+  return labels, prem, prem_trans, hypo, hypo_trans
+
+
+def _test_spinn_config(d_embed, d_out, logdir=None):
+  config_tuple = collections.namedtuple(
+      "Config", ["d_hidden", "d_proj", "d_tracker", "predict",
+                 "embed_dropout", "mlp_dropout", "n_mlp_layers", "d_mlp",
+                 "d_out", "projection", "lr", "batch_size", "epochs",
+                 "force_cpu", "logdir", "log_every", "dev_every", "save_every",
+                 "lr_decay_every", "lr_decay_by"])
+  return config_tuple(
+      d_hidden=d_embed,
+      d_proj=d_embed * 2,
+      d_tracker=8,
+      predict=False,
+      embed_dropout=0.1,
+      mlp_dropout=0.1,
+      n_mlp_layers=2,
+      d_mlp=32,
+      d_out=d_out,
+      projection=True,
+      lr=2e-2,
+      batch_size=2,
+      epochs=10,
+      force_cpu=False,
+      logdir=logdir,
+      log_every=1,
+      dev_every=2,
+      save_every=2,
+      lr_decay_every=1,
+      lr_decay_by=0.75)
+
+
+class SpinnTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    super(SpinnTest, self).setUp()
+    self._test_device = "gpu:0" if tfe.num_gpus() else "cpu:0"
+    self._temp_data_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    shutil.rmtree(self._temp_data_dir)
+    super(SpinnTest, self).tearDown()
+
+  def testBundle(self):
+    with tf.device(self._test_device):
+      lstm_iter = [np.array([[0, 1], [2, 3]], dtype=np.float32),
+                   np.array([[0, -1], [-2, -3]], dtype=np.float32),
+                   np.array([[0, 2], [4, 6]], dtype=np.float32),
+                   np.array([[0, -2], [-4, -6]], dtype=np.float32)]
+      out = spinn._bundle(lstm_iter)
+
+      self.assertEqual(2, len(out))
+      self.assertEqual(tf.float32, out[0].dtype)
+      self.assertEqual(tf.float32, out[1].dtype)
+      self.assertAllEqual(np.array([[0, 2, 0, -2, 0, 4, 0, -4]]).T,
+                          out[0].numpy())
+      self.assertAllEqual(np.array([[1, 3, -1, -3, 2, 6, -2, -6]]).T,
+                          out[1].numpy())
+
+  def testUnbunbdle(self):
+    with tf.device(self._test_device):
+      state = [np.array([[0, 1, 2], [3, 4, 5]], dtype=np.float32),
+               np.array([[0, -1, -2], [-3, -4, -5]], dtype=np.float32)]
+      out = spinn._unbundle(state)
+
+      self.assertEqual(2, len(out))
+      self.assertEqual(tf.float32, out[0].dtype)
+      self.assertEqual(tf.float32, out[1].dtype)
+      self.assertAllEqual(np.array([[0, 1, 2, 0, -1, -2]]),
+                          out[0].numpy())
+      self.assertAllEqual(np.array([[3, 4, 5, -3, -4, -5]]),
+                          out[1].numpy())
+
+  def testReducer(self):
+    with tf.device(self._test_device):
+      batch_size = 3
+      size = 10
+      tracker_size = 8
+      reducer = spinn.Reducer(size, tracker_size=tracker_size)
+
+      left_in = []
+      right_in = []
+      tracking = []
+      for _ in range(batch_size):
+        left_in.append(tf.random_normal((1, size * 2)))
+        right_in.append(tf.random_normal((1, size * 2)))
+        tracking.append(tf.random_normal((1, tracker_size * 2)))
+
+      out = reducer(left_in, right_in, tracking=tracking)
+      self.assertEqual(batch_size, len(out))
+      self.assertEqual(tf.float32, out[0].dtype)
+      self.assertEqual((1, size * 2), out[0].shape)
+
+  def testReduceTreeLSTM(self):
+    with tf.device(self._test_device):
+      size = 10
+      tracker_size = 8
+      reducer = spinn.Reducer(size, tracker_size=tracker_size)
+
+      lstm_in = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+                          [0, -1, -2, -3, -4, -5, -6, -7, -8, -9]],
+                         dtype=np.float32)
+      c1 = np.array([[0, 1], [2, 3]], dtype=np.float32)
+      c2 = np.array([[0, -1], [-2, -3]], dtype=np.float32)
+
+      h, c = reducer._tree_lstm(c1, c2, lstm_in)
+      self.assertEqual(tf.float32, h.dtype)
+      self.assertEqual(tf.float32, c.dtype)
+      self.assertEqual((2, 2), h.shape)
+      self.assertEqual((2, 2), c.shape)
+
+  def testTracker(self):
+    with tf.device(self._test_device):
+      batch_size = 2
+      size = 10
+      tracker_size = 8
+      buffer_length = 18
+      stack_size = 3
+
+      tracker = spinn.Tracker(tracker_size, False)
+      tracker.reset_state()
+
+      # Create dummy inputs for testing.
+      bufs = []
+      buf = []
+      for _ in range(buffer_length):
+        buf.append(tf.random_normal((batch_size, size * 2)))
+      bufs.append(buf)
+      self.assertEqual(1, len(bufs))
+      self.assertEqual(buffer_length, len(bufs[0]))
+      self.assertEqual((batch_size, size * 2), bufs[0][0].shape)
+
+      stacks = []
+      stack = []
+      for _ in range(stack_size):
+        stack.append(tf.random_normal((batch_size, size * 2)))
+      stacks.append(stack)
+      self.assertEqual(1, len(stacks))
+      self.assertEqual(3, len(stacks[0]))
+      self.assertEqual((batch_size, size * 2), stacks[0][0].shape)
+
+      for _ in range(2):
+        out1, out2 = tracker(bufs, stacks)
+        self.assertIsNone(out2)
+        self.assertEqual(batch_size, len(out1))
+        self.assertEqual(tf.float32, out1[0].dtype)
+        self.assertEqual((1, tracker_size * 2), out1[0].shape)
+
+        self.assertEqual(tf.float32, tracker.state.c.dtype)
+        self.assertEqual((batch_size, tracker_size), tracker.state.c.shape)
+        self.assertEqual(tf.float32, tracker.state.h.dtype)
+        self.assertEqual((batch_size, tracker_size), tracker.state.h.shape)
+
+  def testSPINN(self):
+    with tf.device(self._test_device):
+      embedding_dims = 10
+      d_tracker = 8
+      sequence_length = 15
+      num_transitions = 27
+
+      config_tuple = collections.namedtuple(
+          "Config", ["d_hidden", "d_proj", "d_tracker", "predict"])
+      config = config_tuple(
+          embedding_dims, embedding_dims * 2, d_tracker, False)
+      s = spinn.SPINN(config)
+
+      # Create some fake data.
+      buffers = tf.random_normal((sequence_length, 1, config.d_proj))
+      transitions = tf.constant(
+          [[3], [3], [2], [3], [3], [3], [2], [2], [2], [3], [3], [3],
+           [2], [3], [3], [2], [2], [3], [3], [3], [2], [2], [2], [2],
+           [3], [2], [2]], dtype=tf.int64)
+      self.assertEqual(tf.int64, transitions.dtype)
+      self.assertEqual((num_transitions, 1), transitions.shape)
+
+      out = s(buffers, transitions, training=True)
+      self.assertEqual(tf.float32, out.dtype)
+      self.assertEqual((1, embedding_dims), out.shape)
+
+  def testSNLIClassifierAndTrainer(self):
+    with tf.device(self._test_device):
+      vocab_size = 40
+      batch_size = 2
+      d_embed = 10
+      sequence_length = 15
+      d_out = 4
+
+      config = _test_spinn_config(d_embed, d_out)
+
+      # Create fake embedding matrix.
+      embed = tf.random_normal((vocab_size, d_embed))
+
+      model = spinn.SNLIClassifier(config, embed)
+      trainer = spinn.SNLIClassifierTrainer(model, config.lr)
+
+      (labels, prem, prem_trans, hypo,
+       hypo_trans) = _generate_synthetic_snli_data_batch(sequence_length,
+                                                         batch_size,
+                                                         vocab_size)
+
+      # Invoke model under non-training mode.
+      logits = model(prem, prem_trans, hypo, hypo_trans, training=False)
+      self.assertEqual(tf.float32, logits.dtype)
+      self.assertEqual((batch_size, d_out), logits.shape)
+
+      # Invoke model under training model.
+      logits = model(prem, prem_trans, hypo, hypo_trans, training=True)
+      self.assertEqual(tf.float32, logits.dtype)
+      self.assertEqual((batch_size, d_out), logits.shape)
+
+      # Calculate loss.
+      loss1 = trainer.loss(labels, logits)
+      self.assertEqual(tf.float32, loss1.dtype)
+      self.assertEqual((), loss1.shape)
+
+      loss2, logits = trainer.train_batch(
+          labels, prem, prem_trans, hypo, hypo_trans)
+      self.assertEqual(tf.float32, loss2.dtype)
+      self.assertEqual((), loss2.shape)
+      self.assertEqual(tf.float32, logits.dtype)
+      self.assertEqual((batch_size, d_out), logits.shape)
+      # Training on the batch should have led to a change in the loss value.
+      self.assertNotEqual(loss1.numpy(), loss2.numpy())
+
+  def testTrainSpinn(self):
+    """Test with fake toy SNLI data and GloVe vectors."""
+
+    # 1. Create and load a fake SNLI data file and a fake GloVe embedding file.
+    snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0")
+    fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt")
+    os.makedirs(snli_1_0_dir)
+
+    # Four sentences in total.
+    with open(fake_train_file, "wt") as f:
+      f.write("gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t"
+              "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t"
+              "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n")
+      f.write("neutral\t( ( Foo bar ) . )\t( ( foo . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("contradiction\t( ( Bar foo ) . )\t( ( baz . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("entailment\t( ( Quux quuz ) . )\t( ( grault . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("entailment\t( ( Quuz quux ) . )\t( ( garply . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+
+    glove_dir = os.path.join(self._temp_data_dir, "glove")
+    os.makedirs(glove_dir)
+    glove_file = os.path.join(glove_dir, "glove.42B.300d.txt")
+
+    words = [".", "foo", "bar", "baz", "quux", "quuz", "grault", "garply"]
+    with open(glove_file, "wt") as f:
+      for i, word in enumerate(words):
+        f.write("%s " % word)
+        for j in range(data.WORD_VECTOR_LEN):
+          f.write("%.5f" % (i * 0.1))
+          if j < data.WORD_VECTOR_LEN - 1:
+            f.write(" ")
+          else:
+            f.write("\n")
+
+    vocab = data.load_vocabulary(self._temp_data_dir)
+    word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab)
+
+    train_data = data.SnliData(fake_train_file, word2index)
+    dev_data = data.SnliData(fake_train_file, word2index)
+    test_data = data.SnliData(fake_train_file, word2index)
+    print(embed)
+
+    # 2. Create a fake config.
+    config = _test_spinn_config(
+        data.WORD_VECTOR_LEN, 4,
+        logdir=os.path.join(self._temp_data_dir, "logdir"))
+
+    # 3. Test training of a SPINN model.
+    spinn.train_spinn(embed, train_data, dev_data, test_data, config)
+
+    # 4. Load train loss values from the summary files and verify that they
+    #    decrease with training.
+    summary_file = glob.glob(os.path.join(config.logdir, "events.out.*"))[0]
+    events = summary_test_util.events_from_file(summary_file)
+    train_losses = [event.summary.value[0].simple_value for event in events
+                    if event.summary.value
+                    and event.summary.value[0].tag == "train/loss"]
+    self.assertEqual(config.epochs, len(train_losses))
+    self.assertLess(train_losses[-1], train_losses[0])
+
+
+class EagerSpinnSNLIClassifierBenchmark(test.Benchmark):
+
+  def benchmarkEagerSpinnSNLIClassifier(self):
+    test_device = "gpu:0" if tfe.num_gpus() else "cpu:0"
+    with tf.device(test_device):
+      burn_in_iterations = 2
+      benchmark_iterations = 10
+
+      vocab_size = 1000
+      batch_size = 128
+      sequence_length = 15
+      d_embed = 200
+      d_out = 4
+
+      embed = tf.random_normal((vocab_size, d_embed))
+
+      config = _test_spinn_config(d_embed, d_out)
+      model = spinn.SNLIClassifier(config, embed)
+      trainer = spinn.SNLIClassifierTrainer(model, config.lr)
+
+      (labels, prem, prem_trans, hypo,
+       hypo_trans) = _generate_synthetic_snli_data_batch(sequence_length,
+                                                         batch_size,
+                                                         vocab_size)
+
+      for _ in range(burn_in_iterations):
+        trainer.train_batch(labels, prem, prem_trans, hypo, hypo_trans)
+
+      gc.collect()
+      start_time = time.time()
+      for _ in xrange(benchmark_iterations):
+        trainer.train_batch(labels, prem, prem_trans, hypo, hypo_trans)
+      wall_time = time.time() - start_time
+      # Named "examples"_per_sec to conform with other benchmarks.
+      extras = {"examples_per_sec": benchmark_iterations / wall_time}
+      self.report_benchmark(
+          name="Eager_SPINN_SNLIClassifier_Benchmark",
+          iters=benchmark_iterations,
+          wall_time=wall_time,
+          extras=extras)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/third_party/examples/eager/spinn/BUILD b/third_party/examples/eager/spinn/BUILD
new file mode 100644
index 0000000000..0e39d4696f
--- /dev/null
+++ b/third_party/examples/eager/spinn/BUILD
@@ -0,0 +1,14 @@
+licenses(["notice"])  # 3-clause BSD.
+
+py_binary(
+    name = "spinn",
+    srcs = ["spinn.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/eager/python:tfe",
+        "//tensorflow/contrib/eager/python/examples/spinn:data",
+        "@six_archive//:six",
+    ],
+)
diff --git a/third_party/examples/eager/spinn/LICENSE b/third_party/examples/eager/spinn/LICENSE
new file mode 100644
index 0000000000..09d493bf1f
--- /dev/null
+++ b/third_party/examples/eager/spinn/LICENSE
@@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2017, 
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/third_party/examples/eager/spinn/README.md b/third_party/examples/eager/spinn/README.md
new file mode 100644
index 0000000000..c00d8d9015
--- /dev/null
+++ b/third_party/examples/eager/spinn/README.md
@@ -0,0 +1,54 @@
+# SPINN with TensorFlow eager execution
+
+SPINN, or Stack-Augmented Parser-Interpreter Neural Network, is a recursive
+neural network that utilizes syntactic parse information for natural language
+understanding.
+
+SPINN was originally described by:
+Bowman, S.R., Gauthier, J., Rastogi A., Gupta, R., Manning, C.D., & Potts, C.
+  (2016). A Fast Unified Model for Parsing and Sentence Understanding.
+  https://arxiv.org/abs/1603.06021
+
+Our implementation is based on @jekbradbury's PyTorch implementation at:
+https://github.com/jekbradbury/examples/blob/spinn/snli/spinn.py,
+
+which was released under the BSD 3-Clause License at:
+https://github.com/jekbradbury/examples/blob/spinn/LICENSE
+
+##  Content
+
+Python source file(s):
+- `spinn.py`: Model definition and training routines written with TensorFlow
+  eager execution idioms.
+
+## To run
+
+- Make sure you have installed the latest `tf-nightly` or `tf-nightly-gpu` pip
+  package of TensorFlow in order to access the eager execution feature.
+
+- Download and extract the raw SNLI data and GloVe embedding vectors.
+  For example:
+
+  ```bash
+  curl -fSsL https://nlp.stanford.edu/projects/snli/snli_1.0.zip --create-dirs -o /tmp/spinn-data/snli/snli_1.0.zip
+  unzip -d /tmp/spinn-data/snli /tmp/spinn-data/snli/snli_1.0.zip
+  curl -fSsL http://nlp.stanford.edu/data/glove.42B.300d.zip --create-dirs -o /tmp/spinn-data/glove/glove.42B.300d.zip
+  unzip -d /tmp/spinn-data/glove /tmp/spinn-data/glove/glove.42B.300d.zip
+  ```
+
+- Train model. E.g.,
+
+  ```bash
+  python spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs
+  ```
+
+  During training, model checkpoints and TensorBoard summaries will be written
+  periodically to the directory specified with the `--logdir` flag.
+  The training script will reload a saved checkpoint from the directory if it
+  can find one there.
+
+  To view the summaries with TensorBoard:
+
+  ```bash
+  tensorboard --logdir /tmp/spinn-logs
+  ```
diff --git a/third_party/examples/eager/spinn/spinn.py b/third_party/examples/eager/spinn/spinn.py
new file mode 100644
index 0000000000..963ac0e65b
--- /dev/null
+++ b/third_party/examples/eager/spinn/spinn.py
@@ -0,0 +1,732 @@
+r"""Implementation of SPINN in TensorFlow eager execution.
+
+SPINN: Stack-Augmented Parser-Interpreter Neural Network.
+
+Ths file contains model definition and code for training the model.
+
+The model definition is based on PyTorch implementation at:
+  https://github.com/jekbradbury/examples/tree/spinn/snli
+
+which was released under a BSD 3-Clause License at:
+https://github.com/jekbradbury/examples/blob/spinn/LICENSE:
+
+Copyright (c) 2017,
+All rights reserved.
+
+See ./LICENSE for more details.
+
+Instructions for use:
+* See `README.md` for details on how to prepare the SNLI and GloVe data.
+* Suppose you have prepared the data at "/tmp/spinn-data", use the folloing
+  command to train the model:
+
+  ```bash
+  python spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs
+  ```
+
+  Checkpoints and TensorBoard summaries will be written to "/tmp/spinn-logs".
+
+References:
+* Bowman, S.R., Gauthier, J., Rastogi A., Gupta, R., Manning, C.D., & Potts, C.
+  (2016). A Fast Unified Model for Parsing and Sentence Understanding.
+  https://arxiv.org/abs/1603.06021
+* Bradbury, J. (2017). Recursive Neural Networks with PyTorch.
+  https://devblogs.nvidia.com/parallelforall/recursive-neural-networks-pytorch/
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import itertools
+import os
+import sys
+import time
+
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import tensorflow as tf
+
+import tensorflow.contrib.eager as tfe
+from tensorflow.contrib.eager.python.examples.spinn import data
+
+
+def _bundle(lstm_iter):
+  """Concatenate a list of Tensors along 1st axis and split result into two.
+
+  Args:
+    lstm_iter: A `list` of `N` dense `Tensor`s, each of which has the shape
+      (R, 2 * M).
+
+  Returns:
+    A `list` of two dense `Tensor`s, each of which has the shape (N * R, M).
+  """
+  return tf.split(tf.concat(lstm_iter, 0), 2, axis=1)
+
+
+def _unbundle(state):
+  """Concatenate a list of Tensors along 2nd axis and split result.
+
+  This is the inverse of `_bundle`.
+
+  Args:
+    state: A `list` of two dense `Tensor`s, each of which has the shape (R, M).
+
+  Returns:
+    A `list` of `R` dense `Tensors`, each of which has the shape (1, 2 * M).
+  """
+  return tf.split(tf.concat(state, 1), state[0].shape[0], axis=0)
+
+
+class Reducer(tfe.Network):
+  """A module that applies reduce operation on left and right vectors."""
+
+  def __init__(self, size, tracker_size=None):
+    super(Reducer, self).__init__()
+    self.left = self.track_layer(tf.layers.Dense(5 * size, activation=None))
+    self.right = self.track_layer(
+        tf.layers.Dense(5 * size, activation=None, use_bias=False))
+    if tracker_size is not None:
+      self.track = self.track_layer(
+          tf.layers.Dense(5 * size, activation=None, use_bias=False))
+    else:
+      self.track = None
+
+  def call(self, left_in, right_in, tracking=None):
+    """Invoke forward pass of the Reduce module.
+
+    This method feeds a linear combination of `left_in`, `right_in` and
+    `tracking` into a Tree LSTM and returns the output of the Tree LSTM.
+
+    Args:
+      left_in: A list of length L. Each item is a dense `Tensor` with
+        the shape (1, n_dims). n_dims is the size of the embedding vector.
+      right_in: A list of the same length as `left_in`. Each item should have
+        the same shape as the items of `left_in`.
+      tracking: Optional list of the same length as `left_in`. Each item is a
+        dense `Tensor` with shape (1, tracker_size * 2). tracker_size is the
+        size of the Tracker's state vector.
+
+    Returns:
+      Output: A list of length batch_size. Each item has the shape (1, n_dims).
+    """
+    left, right = _bundle(left_in), _bundle(right_in)
+    lstm_in = self.left(left[0]) + self.right(right[0])
+    if self.track and tracking:
+      lstm_in += self.track(_bundle(tracking)[0])
+    return _unbundle(self._tree_lstm(left[1], right[1], lstm_in))
+
+  def _tree_lstm(self, c1, c2, lstm_in):
+    a, i, f1, f2, o = tf.split(lstm_in, 5, axis=1)
+    c = tf.tanh(a) * tf.sigmoid(i) + tf.sigmoid(f1) * c1 + tf.sigmoid(f2) * c2
+    h = tf.sigmoid(o) * tf.tanh(c)
+    return h, c
+
+
+class Tracker(tfe.Network):
+  """A module that tracks the history of the sentence with an LSTM."""
+
+  def __init__(self, tracker_size, predict):
+    """Constructor of Tracker.
+
+    Args:
+      tracker_size: Number of dimensions of the underlying `LSTMCell`.
+      predict: (`bool`) Whether prediction mode is enabled.
+    """
+    super(Tracker, self).__init__()
+    self._rnn = self.track_layer(tf.nn.rnn_cell.LSTMCell(tracker_size))
+    self._state_size = tracker_size
+    if predict:
+      self._transition = self.track_layer(tf.layers.Dense(4))
+    else:
+      self._transition = None
+
+  def reset_state(self):
+    self.state = None
+
+  def call(self, bufs, stacks):
+    """Invoke the forward pass of the Tracker module.
+
+    This method feeds the concatenation of the top two elements of the stacks
+    into an LSTM cell and returns the resultant state of the LSTM cell.
+
+    Args:
+      bufs: A `list` of length batch_size. Each item is a `list` of
+        max_sequence_len (maximum sequence length of the batch). Each item
+        of the nested list is a dense `Tensor` of shape (1, d_proj), where
+        d_proj is the size of the word embedding vector or the size of the
+        vector space that the word embedding vector is projected to.
+      stacks: A `list` of size batch_size. Each item is a `list` of
+        variable length corresponding to the current height of the stack.
+        Each item of the nested list is a dense `Tensor` of shape (1, d_proj).
+
+    Returns:
+      1. A list of length batch_size. Each item is a dense `Tensor` of shape
+        (1, d_tracker * 2).
+      2.  If under predict mode, result of applying a Dense layer on the
+        first state vector of the RNN. Else, `None`.
+    """
+    buf = _bundle([buf[-1] for buf in bufs])[0]
+    stack1 = _bundle([stack[-1] for stack in stacks])[0]
+    stack2 = _bundle([stack[-2] for stack in stacks])[0]
+    x = tf.concat([buf, stack1, stack2], 1)
+    if self.state is None:
+      batch_size = int(x.shape[0])
+      zeros = tf.zeros((batch_size, self._state_size), dtype=tf.float32)
+      self.state = [zeros, zeros]
+    _, self.state = self._rnn(x, self.state)
+    unbundled = _unbundle(self.state)
+    if self._transition:
+      return unbundled, self._transition(self.state[0])
+    else:
+      return unbundled, None
+
+
+class SPINN(tfe.Network):
+  """Stack-augmented Parser-Interpreter Neural Network.
+
+  See https://arxiv.org/abs/1603.06021 for more details.
+  """
+
+  def __init__(self, config):
+    """Constructor of SPINN.
+
+    Args:
+      config: A `namedtupled` with the following attributes.
+        d_proj - (`int`) number of dimensions of the vector space to project the
+          word embeddings to.
+        d_tracker - (`int`) number of dimensions of the Tracker's state vector.
+        d_hidden - (`int`) number of the dimensions of the hidden state, for the
+          Reducer module.
+        n_mlp_layers - (`int`) number of multi-layer perceptron layers to use to
+          convert the output of the `Feature` module to logits.
+        predict - (`bool`) Whether the Tracker will enabled predictions.
+    """
+    super(SPINN, self).__init__()
+    self.config = config
+    self.reducer = self.track_layer(Reducer(config.d_hidden, config.d_tracker))
+    if config.d_tracker is not None:
+      self.tracker = self.track_layer(Tracker(config.d_tracker, config.predict))
+    else:
+      self.tracker = None
+
+  def call(self, buffers, transitions, training=False):
+    """Invoke the forward pass of the SPINN model.
+
+    Args:
+      buffers: Dense `Tensor` of shape
+        (max_sequence_len, batch_size, config.d_proj).
+      transitions: Dense `Tensor` with integer values that represent the parse
+        trees of the sentences. A value of 2 indicates "reduce"; a value of 3
+        indicates "shift". Shape: (max_sequence_len * 2 - 3, batch_size).
+      training: Whether the invocation is under training mode.
+
+    Returns:
+      Output `Tensor` of shape (batch_size, config.d_embed).
+    """
+    max_sequence_len, batch_size, d_proj = (int(x) for x in buffers.shape)
+
+    # Split the buffers into left and right word items and put the initial
+    # items in a stack.
+    splitted = tf.split(
+        tf.reshape(tf.transpose(buffers, [1, 0, 2]), [-1, d_proj]),
+        max_sequence_len * batch_size, axis=0)
+    buffers = [splitted[k:k + max_sequence_len]
+               for k in xrange(0, len(splitted), max_sequence_len)]
+    stacks = [[buf[0], buf[0]] for buf in buffers]
+
+    if self.tracker:
+      # Reset tracker state for new batch.
+      self.tracker.reset_state()
+
+    num_transitions = transitions.shape[0]
+
+    # Iterate through transitions and perform the appropriate stack-pop, reduce
+    # and stack-push operations.
+    transitions = transitions.numpy()
+    for i in xrange(num_transitions):
+      trans = transitions[i]
+      if self.tracker:
+        # Invoke tracker to obtain the current tracker states for the sentences.
+        tracker_states, trans_hypothesis = self.tracker(buffers, stacks)
+        if trans_hypothesis:
+          trans = tf.argmax(trans_hypothesis, axis=-1)
+      else:
+        tracker_states = itertools.repeat(None)
+      lefts, rights, trackings = [], [], []
+      for transition, buf, stack, tracking in zip(
+          trans, buffers, stacks, tracker_states):
+        if int(transition) == 3:  # Shift.
+          stack.append(buf.pop())
+        elif int(transition) == 2:  # Reduce.
+          rights.append(stack.pop())
+          lefts.append(stack.pop())
+          trackings.append(tracking)
+
+      if rights:
+        reducer_output = self.reducer(lefts, rights, trackings)
+        reduced = iter(reducer_output)
+
+        for transition, stack in zip(trans, stacks):
+          if int(transition) == 2:  # Reduce.
+            stack.append(next(reduced))
+    return _bundle([stack.pop() for stack in stacks])[0]
+
+
+class SNLIClassifier(tfe.Network):
+  """SNLI Classifier Model.
+
+  A model aimed at solving the SNLI (Standford Natural Language Inference)
+  task, using the SPINN model from above. For details of the task, see:
+    https://nlp.stanford.edu/projects/snli/
+  """
+
+  def __init__(self, config, embed):
+    """Constructor of SNLICLassifier.
+
+    Args:
+      config: A namedtuple containing required configurations for the model. It
+        needs to have the following attributes.
+        projection - (`bool`) whether the word vectors are to be projected onto
+          another vector space (of `d_proj` dimensions).
+        d_proj - (`int`) number of dimensions of the vector space to project the
+          word embeddings to.
+        embed_dropout - (`float`) dropout rate for the word embedding vectors.
+        n_mlp_layers - (`int`) number of multi-layer perceptron (MLP) layers to
+          use to convert the output of the `Feature` module to logits.
+        mlp_dropout - (`float`) dropout rate of the MLP layers.
+        d_out - (`int`) number of dimensions of the final output of the MLP
+          layers.
+        lr - (`float`) learning rate.
+      embed: A embedding matrix of shape (vocab_size, d_embed).
+    """
+    super(SNLIClassifier, self).__init__()
+    self.config = config
+    self.embed = tf.constant(embed)
+
+    self.projection = self.track_layer(tf.layers.Dense(config.d_proj))
+    self.embed_bn = self.track_layer(tf.layers.BatchNormalization())
+    self.embed_dropout = self.track_layer(
+        tf.layers.Dropout(rate=config.embed_dropout))
+    self.encoder = self.track_layer(SPINN(config))
+
+    self.feature_bn = self.track_layer(tf.layers.BatchNormalization())
+    self.feature_dropout = self.track_layer(
+        tf.layers.Dropout(rate=config.mlp_dropout))
+
+    self.mlp_dense = []
+    self.mlp_bn = []
+    self.mlp_dropout = []
+    for _ in xrange(config.n_mlp_layers):
+      self.mlp_dense.append(self.track_layer(tf.layers.Dense(config.d_mlp)))
+      self.mlp_bn.append(
+          self.track_layer(tf.layers.BatchNormalization()))
+      self.mlp_dropout.append(
+          self.track_layer(tf.layers.Dropout(rate=config.mlp_dropout)))
+    self.mlp_output = self.track_layer(tf.layers.Dense(
+        config.d_out,
+        kernel_initializer=tf.random_uniform_initializer(minval=-5e-3,
+                                                         maxval=5e-3)))
+
+  def call(self,
+           premise,
+           premise_transition,
+           hypothesis,
+           hypothesis_transition,
+           training=False):
+    """Invoke the forward pass the SNLIClassifier model.
+
+    Args:
+      premise: The word indices of the premise sentences, with shape
+        (max_prem_seq_len, batch_size).
+      premise_transition: The transitions for the premise sentences, with shape
+        (max_prem_seq_len * 2 - 3, batch_size).
+      hypothesis: The word indices of the hypothesis sentences, with shape
+        (max_hypo_seq_len, batch_size).
+      hypothesis_transition: The transitions for the hypothesis sentences, with
+        shape (max_hypo_seq_len * 2 - 3, batch_size).
+      training: Whether the invocation is under training mode.
+
+    Returns:
+      The logits, as a dense `Tensor` of shape (batch_size, d_out), where d_out
+      is the size of the output vector.
+    """
+    # Perform embedding lookup on the premise and hypothesis inputs, which have
+    # the word-index format.
+    premise_embed = tf.nn.embedding_lookup(self.embed, premise)
+    hypothesis_embed = tf.nn.embedding_lookup(self.embed, hypothesis)
+
+    if self.config.projection:
+      # Project the embedding vectors to another vector space.
+      premise_embed = self.projection(premise_embed)
+      hypothesis_embed = self.projection(hypothesis_embed)
+
+    # Perform batch normalization and dropout on the possibly projected word
+    # vectors.
+    premise_embed = self.embed_bn(premise_embed, training=training)
+    hypothesis_embed = self.embed_bn(hypothesis_embed, training=training)
+    premise_embed = self.embed_dropout(premise_embed, training=training)
+    hypothesis_embed = self.embed_dropout(hypothesis_embed, training=training)
+
+    # Run the batch-normalized and dropout-processed word vectors through the
+    # SPINN encoder.
+    premise = self.encoder(premise_embed, premise_transition,
+                           training=training)
+    hypothesis = self.encoder(hypothesis_embed, hypothesis_transition,
+                              training=training)
+
+    # Combine encoder outputs for premises and hypotheses into logits.
+    # Then apply batch normalization and dropuout on the logits.
+    logits = tf.concat(
+        [premise, hypothesis, premise - hypothesis, premise * hypothesis], 1)
+    logits = self.feature_dropout(
+        self.feature_bn(logits, training=training), training=training)
+
+    # Apply the multi-layer perceptron on the logits.
+    for dense, bn, dropout in zip(
+        self.mlp_dense, self.mlp_bn, self.mlp_dropout):
+      logits = tf.nn.elu(dense(logits))
+      logits = dropout(bn(logits, training=training), training=training)
+    logits = self.mlp_output(logits)
+    return logits
+
+
+class SNLIClassifierTrainer(object):
+  """A class that coordinates the training of an SNLIClassifier."""
+
+  def __init__(self, snli_classifier, lr):
+    """Constructor of SNLIClassifierTrainer.
+
+    Args:
+      snli_classifier: An instance of `SNLIClassifier`.
+      lr: Learning rate.
+    """
+    self._model = snli_classifier
+    # Create a custom learning rate Variable for the RMSProp optimizer, because
+    # the learning rate needs to be manually decayed later (see
+    # decay_learning_rate()).
+    self._learning_rate = tfe.Variable(lr, name="learning_rate")
+    self._optimizer = tf.train.RMSPropOptimizer(self._learning_rate,
+                                                epsilon=1e-6)
+
+  def loss(self, labels, logits):
+    """Calculate the loss given a batch of data.
+
+    Args:
+      labels: The truth labels, with shape (batch_size,).
+      logits: The logits output from the forward pass of the SNLIClassifier
+        model, with shape (batch_size, d_out), where d_out is the output
+        dimension size of the SNLIClassifier.
+
+    Returns:
+      The loss value, as a scalar `Tensor`.
+    """
+    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=labels, logits=logits))
+
+  def train_batch(self,
+                  labels,
+                  premise,
+                  premise_transition,
+                  hypothesis,
+                  hypothesis_transition):
+    """Train model on batch of data.
+
+    Args:
+      labels: The truth labels, with shape (batch_size,).
+      premise: The word indices of the premise sentences, with shape
+        (max_prem_seq_len, batch_size).
+      premise_transition: The transitions for the premise sentences, with shape
+        (max_prem_seq_len * 2 - 3, batch_size).
+      hypothesis: The word indices of the hypothesis sentences, with shape
+        (max_hypo_seq_len, batch_size).
+      hypothesis_transition: The transitions for the hypothesis sentences, with
+        shape (max_hypo_seq_len * 2 - 3, batch_size).
+
+    Returns:
+      1. loss value as a scalar `Tensor`.
+      2. logits as a dense `Tensor` of shape (batch_size, d_out), where d_out is
+        the output dimension size of the SNLIClassifier.
+    """
+    with tfe.GradientTape() as tape:
+      tape.watch(self._model.variables)
+      logits = self._model(premise,
+                           premise_transition,
+                           hypothesis,
+                           hypothesis_transition,
+                           training=True)
+      loss = self.loss(labels, logits)
+    gradients = tape.gradient(loss, self._model.variables)
+    self._optimizer.apply_gradients(zip(gradients, self._model.variables),
+                                    global_step=tf.train.get_global_step())
+    return loss, logits
+
+  def decay_learning_rate(self, decay_by):
+    """Decay learning rate of the optimizer by factor decay_by."""
+    self._learning_rate.assign(self._learning_rate * decay_by)
+    print("Decayed learning rate of optimizer to: %s" %
+          self._learning_rate.numpy())
+
+  @property
+  def learning_rate(self):
+    return self._learning_rate
+
+
+def _batch_n_correct(logits, label):
+  """Calculate number of correct predictions in a batch.
+
+  Args:
+    logits: A logits Tensor of shape `(batch_size, num_categories)` and dtype
+      `float32`.
+    label: A labels Tensor of shape `(batch_size,)` and dtype `int64`
+
+  Returns:
+    Number of correct predictions.
+  """
+  return tf.reduce_sum(
+      tf.cast((tf.equal(
+          tf.argmax(logits, axis=1), label)), tf.float32)).numpy()
+
+
+def _evaluate_on_dataset(snli_data, batch_size, model, trainer, use_gpu):
+  """Run evaluation on a dataset.
+
+  Args:
+    snli_data: The `data.SnliData` to use in this evaluation.
+    batch_size: The batch size to use during this evaluation.
+    model: An instance of `SNLIClassifier` to evaluate.
+    trainer: An instance of `SNLIClassifierTrainer to use for this
+      evaluation.
+    use_gpu: Whether GPU is being used.
+
+  Returns:
+    1. Average loss across all examples of the dataset.
+    2. Average accuracy rate across all examples of the dataset.
+  """
+  mean_loss = tfe.metrics.Mean()
+  accuracy = tfe.metrics.Accuracy()
+  for label, prem, prem_trans, hypo, hypo_trans in _get_dataset_iterator(
+      snli_data, batch_size):
+    if use_gpu:
+      label, prem, hypo = label.gpu(), prem.gpu(), hypo.gpu()
+    logits = model(prem, prem_trans, hypo, hypo_trans, training=False)
+    loss_val = trainer.loss(label, logits)
+    batch_size = tf.shape(label)[0]
+    mean_loss(loss_val, weights=batch_size.gpu() if use_gpu else batch_size)
+    accuracy(tf.argmax(logits, axis=1), label)
+  return mean_loss.result().numpy(), accuracy.result().numpy()
+
+
+def _get_dataset_iterator(snli_data, batch_size):
+  """Get a data iterator for a split of SNLI data.
+
+  Args:
+    snli_data: A `data.SnliData` object.
+    batch_size: The desired batch size.
+
+  Returns:
+    A dataset iterator.
+  """
+  with tf.device("/device:CPU:0"):
+    # Some tf.data ops, such as ShuffleDataset, are available only on CPU.
+    dataset = tf.data.Dataset.from_generator(
+        snli_data.get_generator(batch_size),
+        (tf.int64, tf.int64, tf.int64, tf.int64, tf.int64))
+    dataset = dataset.shuffle(snli_data.num_batches(batch_size))
+    return tfe.Iterator(dataset)
+
+
+def train_spinn(embed, train_data, dev_data, test_data, config):
+  """Train a SPINN model.
+
+  Args:
+    embed: The embedding matrix as a float32 numpy array with shape
+      [vocabulary_size, word_vector_len]. word_vector_len is the length of a
+      word embedding vector.
+    train_data: An instance of `data.SnliData`, for the train split.
+    dev_data: Same as above, for the dev split.
+    test_data: Same as above, for the test split.
+    config: A configuration object. See the argument to this Python binary for
+      details.
+
+  Returns:
+    1. Final loss value on the test split.
+    2. Final fraction of correct classifications on the test split.
+  """
+  use_gpu = tfe.num_gpus() > 0 and not config.force_cpu
+  device = "gpu:0" if use_gpu else "cpu:0"
+  print("Using device: %s" % device)
+
+  log_header = (
+      "  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss"
+      "     Accuracy  Dev/Accuracy")
+  log_template = (
+      "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} {} "
+      "{:12.4f} {}")
+  dev_log_template = (
+      "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} "
+      "{:8.6f} {:12.4f} {:12.4f}")
+
+  summary_writer = tf.contrib.summary.create_summary_file_writer(
+      config.logdir, flush_millis=10000)
+  train_len = train_data.num_batches(config.batch_size)
+  with tf.device(device), \
+       tfe.restore_variables_on_create(
+           tf.train.latest_checkpoint(config.logdir)), \
+       summary_writer.as_default(), \
+       tf.contrib.summary.always_record_summaries():
+    model = SNLIClassifier(config, embed)
+    global_step = tf.train.get_or_create_global_step()
+    trainer = SNLIClassifierTrainer(model, config.lr)
+
+    start = time.time()
+    iterations = 0
+    mean_loss = tfe.metrics.Mean()
+    accuracy = tfe.metrics.Accuracy()
+    print(log_header)
+    for epoch in xrange(config.epochs):
+      batch_idx = 0
+      for label, prem, prem_trans, hypo, hypo_trans in _get_dataset_iterator(
+          train_data, config.batch_size):
+        if use_gpu:
+          label, prem, hypo = label.gpu(), prem.gpu(), hypo.gpu()
+          # prem_trans and hypo_trans are used for dynamic control flow and can
+          # remain on CPU. Same in _evaluate_on_dataset().
+
+        iterations += 1
+        batch_train_loss, batch_train_logits = trainer.train_batch(
+            label, prem, prem_trans, hypo, hypo_trans)
+        batch_size = tf.shape(label)[0]
+        mean_loss(batch_train_loss.numpy(),
+                  weights=batch_size.gpu() if use_gpu else batch_size)
+        accuracy(tf.argmax(batch_train_logits, axis=1), label)
+
+        if iterations % config.save_every == 0:
+          all_variables = (
+              model.variables + [trainer.learning_rate] + [global_step])
+          saver = tfe.Saver(all_variables)
+          saver.save(os.path.join(config.logdir, "ckpt"),
+                     global_step=global_step)
+
+        if iterations % config.dev_every == 0:
+          dev_loss, dev_frac_correct = _evaluate_on_dataset(
+              dev_data, config.batch_size, model, trainer, use_gpu)
+          print(dev_log_template.format(
+              time.time() - start,
+              epoch, iterations, 1 + batch_idx, train_len,
+              100.0 * (1 + batch_idx) / train_len,
+              mean_loss.result(), dev_loss,
+              accuracy.result() * 100.0, dev_frac_correct * 100.0))
+          tf.contrib.summary.scalar("dev/loss", dev_loss)
+          tf.contrib.summary.scalar("dev/accuracy", dev_frac_correct)
+        elif iterations % config.log_every == 0:
+          mean_loss_val = mean_loss.result()
+          accuracy_val = accuracy.result()
+          print(log_template.format(
+              time.time() - start,
+              epoch, iterations, 1 + batch_idx, train_len,
+              100.0 * (1 + batch_idx) / train_len,
+              mean_loss_val, " " * 8, accuracy_val * 100.0, " " * 12))
+          tf.contrib.summary.scalar("train/loss", mean_loss_val)
+          tf.contrib.summary.scalar("train/accuracy", accuracy_val)
+          # Reset metrics.
+          mean_loss = tfe.metrics.Mean()
+          accuracy = tfe.metrics.Accuracy()
+
+        batch_idx += 1
+      if (epoch + 1) % config.lr_decay_every == 0:
+        trainer.decay_learning_rate(config.lr_decay_by)
+
+    test_loss, test_frac_correct = _evaluate_on_dataset(
+        test_data, config.batch_size, model, trainer, use_gpu)
+    print("Final test loss: %g; accuracy: %g%%" %
+          (test_loss, test_frac_correct * 100.0))
+
+
+def main(_):
+  config = FLAGS
+
+  # Load embedding vectors.
+  vocab = data.load_vocabulary(FLAGS.data_root)
+  word2index, embed = data.load_word_vectors(FLAGS.data_root, vocab)
+
+  print("Loading train, dev and test data...")
+  train_data = data.SnliData(
+      os.path.join(FLAGS.data_root, "snli/snli_1.0/snli_1.0_train.txt"),
+      word2index, sentence_len_limit=FLAGS.sentence_len_limit)
+  dev_data = data.SnliData(
+      os.path.join(FLAGS.data_root, "snli/snli_1.0/snli_1.0_dev.txt"),
+      word2index, sentence_len_limit=FLAGS.sentence_len_limit)
+  test_data = data.SnliData(
+      os.path.join(FLAGS.data_root, "snli/snli_1.0/snli_1.0_test.txt"),
+      word2index, sentence_len_limit=FLAGS.sentence_len_limit)
+
+  train_spinn(embed, train_data, dev_data, test_data, config)
+
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser(
+      description=
+      "TensorFlow eager implementation of the SPINN SNLI classifier.")
+  parser.add_argument("--data_root", type=str, default="/tmp/spinn-data",
+                      help="Root directory in which the training data and "
+                      "embedding matrix are found. See README.md for how to "
+                      "generate such a directory.")
+  parser.add_argument("--sentence_len_limit", type=int, default=-1,
+                      help="Maximum allowed sentence length (# of words). "
+                      "The default of -1 means unlimited.")
+  parser.add_argument("--logdir", type=str, default="/tmp/spinn-logs",
+                      help="Directory in which summaries will be written for "
+                      "TensorBoard.")
+  parser.add_argument("--epochs", type=int, default=50,
+                      help="Number of epochs to train.")
+  parser.add_argument("--batch_size", type=int, default=128,
+                      help="Batch size to use during training.")
+  parser.add_argument("--d_proj", type=int, default=600,
+                      help="Dimensions to project the word embedding vectors "
+                      "to.")
+  parser.add_argument("--d_hidden", type=int, default=300,
+                      help="Size of the hidden layer of the Tracker.")
+  parser.add_argument("--d_out", type=int, default=4,
+                      help="Output dimensions of the SNLIClassifier.")
+  parser.add_argument("--d_mlp", type=int, default=1024,
+                      help="Size of each layer of the multi-layer perceptron "
+                      "of the SNLICLassifier.")
+  parser.add_argument("--n_mlp_layers", type=int, default=2,
+                      help="Number of layers in the multi-layer perceptron "
+                      "of the SNLICLassifier.")
+  parser.add_argument("--d_tracker", type=int, default=64,
+                      help="Size of the tracker LSTM.")
+  parser.add_argument("--log_every", type=int, default=50,
+                      help="Print log and write TensorBoard summary every _ "
+                      "training batches.")
+  parser.add_argument("--lr", type=float, default=2e-3,
+                      help="Initial learning rate.")
+  parser.add_argument("--lr_decay_by", type=float, default=0.75,
+                      help="The ratio to multiply the learning rate by every "
+                      "time the learning rate is decayed.")
+  parser.add_argument("--lr_decay_every", type=float, default=1,
+                      help="Decay the learning rate every _ epoch(s).")
+  parser.add_argument("--dev_every", type=int, default=1000,
+                      help="Run evaluation on the dev split every _ training "
+                      "batches.")
+  parser.add_argument("--save_every", type=int, default=1000,
+                      help="Save checkpoint every _ training batches.")
+  parser.add_argument("--embed_dropout", type=float, default=0.08,
+                      help="Word embedding dropout rate.")
+  parser.add_argument("--mlp_dropout", type=float, default=0.07,
+                      help="SNLIClassifier multi-layer perceptron dropout "
+                      "rate.")
+  parser.add_argument("--no-projection", action="store_false",
+                      dest="projection",
+                      help="Whether word embedding vectors are projected to "
+                      "another set of vectors (see d_proj).")
+  parser.add_argument("--predict_transitions", action="store_true",
+                      dest="predict",
+                      help="Whether the Tracker will perform prediction.")
+  parser.add_argument("--force_cpu", action="store_true", dest="force_cpu",
+                      help="Force use CPU-only regardless of whether a GPU is "
+                      "available.")
+  FLAGS, unparsed = parser.parse_known_args()
+
+  tfe.run(main=main, argv=[sys.argv[0]] + unparsed)
-- 
GitLab


From 5b420f6bb29cd0c3796dd2d7cb2aa4bf0adf620f Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Fri, 1 Dec 2017 14:18:39 -0800
Subject: [PATCH 0528/1225] Adds a ReleaseHandle method to the
 FunctionLibraryRuntime interface that allows for releasing the state
 associated with the handle.

Also simplifies the state owned by the FunctionLibraryRuntimeImpl. Instead of having a vector of ref counted Item objects and a separate vector of function bodies, we merge it into one object that holds the entire instantiated state for the function.

PiperOrigin-RevId: 177639560
---
 tensorflow/core/common_runtime/function.cc    | 62 ++++++++++++-------
 .../core/common_runtime/function_test.cc      | 20 ++++--
 .../process_function_library_runtime.cc       | 60 +++++++++++++++---
 .../process_function_library_runtime.h        | 14 ++++-
 .../process_function_library_runtime_test.cc  | 16 +++++
 tensorflow/core/framework/function.h          |  3 +
 6 files changed, 138 insertions(+), 37 deletions(-)

diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 23d0f331c5..4c87c922c2 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -153,6 +153,8 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
   Status Instantiate(const string& function_name, AttrSlice attrs,
                      Handle* handle) override;
 
+  Status ReleaseHandle(Handle handle) override;
+
   const FunctionBody* GetFunctionBody(Handle handle) override;
 
   Status CreateKernel(const NodeDef& ndef, OpKernel** kernel) override;
@@ -190,18 +192,21 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
 
   mutable mutex mu_;
 
-  // func_graphs_ never shrinks or reorders its members.
-  std::vector<FunctionBody*> func_graphs_ GUARDED_BY(mu_);
+  int next_handle_ GUARDED_BY(mu_);
 
   // The instantiated and transformed function is encoded as a Graph
   // object, and an executor is created for the graph.
   struct Item : public core::RefCounted {
     const Graph* graph = nullptr;  // Owned by exec.
+    FunctionBody* func_graph = nullptr;
     Executor* exec = nullptr;
 
-    ~Item() override { delete this->exec; }
+    ~Item() override {
+      delete this->func_graph;
+      delete this->exec;
+    }
   };
-  std::vector<Item*> items_;
+  std::unordered_map<Handle, Item*> items_ GUARDED_BY(mu_);
 
   ProcessFunctionLibraryRuntime* parent_ = nullptr;  // not owned.
 
@@ -236,6 +241,7 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl(
       device_name_(device_ == nullptr
                        ? ProcessFunctionLibraryRuntime::kDefaultFLRDevice
                        : device_->name()),
+      next_handle_(0),
       parent_(parent) {
   get_func_sig_ = [this](const string& op, const OpDef** sig) {
     return lib_def_->LookUpOpDef(op, sig);
@@ -246,9 +252,9 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl(
 }
 
 FunctionLibraryRuntimeImpl::~FunctionLibraryRuntimeImpl() {
-  for (FunctionBody* p : func_graphs_) delete p;
-  for (Item* item : items_)
-    if (item) item->Unref();
+  for (auto item : items_) {
+    if (item.second) item.second->Unref();
+  }
 }
 
 // An asynchronous op kernel which executes an instantiated function
@@ -309,9 +315,8 @@ const FunctionBody* FunctionLibraryRuntimeImpl::GetFunctionBody(Handle h) {
   }
 
   mutex_lock l(mu_);
-  CHECK_LE(0, local_handle);
-  CHECK_LT(local_handle, func_graphs_.size());
-  return func_graphs_[local_handle];
+  CHECK_EQ(1, items_.count(local_handle));
+  return items_[local_handle]->func_graph;
 }
 
 Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
@@ -478,14 +483,32 @@ Status FunctionLibraryRuntimeImpl::Instantiate(const string& function_name,
     if (*handle != kInvalidHandle) {
       delete fbody;
     } else {
-      *handle = parent_->AddHandle(key, device_name_, func_graphs_.size());
-      func_graphs_.push_back(fbody);
-      items_.resize(func_graphs_.size());
+      *handle = parent_->AddHandle(key, device_name_, next_handle_);
+      Item* item = new Item;
+      item->func_graph = fbody;
+      items_.insert({next_handle_, item});
+      next_handle_++;
     }
   }
   return Status::OK();
 }
 
+Status FunctionLibraryRuntimeImpl::ReleaseHandle(Handle handle) {
+  if (!parent_->IsInstantiatedOnDevice(device_name_, handle)) {
+    return parent_->ReleaseHandle(handle);
+  }
+
+  LocalHandle h = parent_->GetHandleOnDevice(device_name_, handle);
+  mutex_lock l(mu_);
+  CHECK_EQ(1, items_.count(h));
+  Item* item = items_[h];
+  if (item->Unref()) {
+    items_.erase(h);
+    TF_RETURN_IF_ERROR(parent_->RemoveHandle(handle));
+  }
+  return Status::OK();
+}
+
 void DumpGraph(StringPiece label, const Graph* g) {
   // TODO(zhifengc): Change Graph to record #nodes.
   VLOG(1) << "Graph " << label << " #nodes " << g->num_nodes() << " #edges "
@@ -529,7 +552,6 @@ Status FunctionLibraryRuntimeImpl::CreateItem(Handle handle, Item** item) {
   Executor* exec;
   TF_RETURN_IF_ERROR(NewLocalExecutor(params, g.release(), &exec));
 
-  *item = new Item;
   (*item)->graph = graph;
   (*item)->exec = exec;
   return Status::OK();
@@ -539,13 +561,12 @@ Status FunctionLibraryRuntimeImpl::GetOrCreateItem(Handle handle, Item** item) {
   LocalHandle local_handle = parent_->GetHandleOnDevice(device_name_, handle);
   {
     mutex_lock l(mu_);
-    if (local_handle >= items_.size()) {
+    if (items_.count(local_handle) == 0) {
       return errors::NotFound("Function handle ", handle,
                               " is not valid. Likely an internal error.");
     }
     *item = items_[local_handle];
-    if (*item != nullptr) {
-      (*item)->Ref();
+    if ((*item)->exec != nullptr) {
       return Status::OK();
     }
   }
@@ -556,9 +577,8 @@ Status FunctionLibraryRuntimeImpl::GetOrCreateItem(Handle handle, Item** item) {
   {
     mutex_lock l(mu_);
     if (items_[local_handle] == nullptr) {
-      // Install *item in items_.
-      items_[local_handle] = *item;
-      (*item)->Ref();
+      // Insert *item in items_.
+      items_.insert({local_handle, *item});
     }
   }
   return Status::OK();
@@ -617,7 +637,6 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
             *exec_args, [item, frame, rets, done, source_device, target_device,
                          target_incarnation, rendezvous, device_context,
                          remote_args, exec_args](const Status& status) {
-              item->Unref();
               Status s = status;
               if (s.ok()) {
                 s = frame->ConsumeRetvals(rets);
@@ -701,7 +720,6 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
       *exec_args,
       // Done callback.
       [item, frame, rets, done, exec_args](const Status& status) {
-        item->Unref();
         Status s = status;
         if (s.ok()) {
           s = frame->ConsumeRetvals(rets);
diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index d183bf7c97..575af566d5 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc
@@ -207,7 +207,19 @@ class FunctionLibraryRuntimeTest : public ::testing::Test {
       return status;
     }
     FunctionLibraryRuntime::Options opts;
-    return Run(flr, handle, opts, args, std::move(rets));
+    status = Run(flr, handle, opts, args, rets);
+    if (!status.ok()) return status;
+
+    // Release the handle and try running again. It should not succeed.
+    status = flr->ReleaseHandle(handle);
+    if (!status.ok()) return status;
+
+    Status status2 = Run(flr, handle, opts, args, std::move(rets));
+    EXPECT_TRUE(errors::IsInvalidArgument(status2));
+    EXPECT_TRUE(
+        StringPiece(status2.error_message()).contains("remote execution."));
+
+    return status;
   }
 
   std::unique_ptr<Graph> GetFuncBody(FunctionLibraryRuntime* flr,
@@ -498,7 +510,7 @@ TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) {
     Scope s = Scope::NewRootScope();
     auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0);
     auto x4_x2_scale = ops::Const<float>(
-        s.WithOpName("x4/x2/scale/_12__cf__2")
+        s.WithOpName("x4/x2/scale/_12__cf__3")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         2.0f);
     auto x4_x2_y = ops::Mul(s.WithOpName("x4/x2/y"), x, x4_x2_scale);
@@ -694,13 +706,13 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) {
     auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0);
     auto func0 = ops::_Arg(s.WithOpName("Func/_0"), DT_FLOAT, 1);
     auto scale = ops::Const(
-        s.WithOpName("scale/_5__cf__6")
+        s.WithOpName("scale/_5__cf__7")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         2.0f);
     auto func1_gx = ops::Mul(s.WithOpName("Func/_1/gx"), func0, scale);
     auto func1_sx = ops::Shape(s.WithOpName("Func/_1/sx"), x);
     auto const0 = ops::Const(
-        s.WithOpName("Func/_1/sy/_6__cf__7")
+        s.WithOpName("Func/_1/sy/_6__cf__8")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         0, {0});
     auto func1_rx = ops::internal::BroadcastGradientArgs(
diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc
index 142ff2339b..53a14121d4 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime.cc
+++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc
@@ -30,7 +30,10 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime(
     const FunctionLibraryDefinition* lib_def,
     const OptimizerOptions& optimizer_options,
     DistributedFunctionLibraryRuntime* parent)
-    : device_mgr_(device_mgr), lib_def_(lib_def), parent_(parent) {
+    : device_mgr_(device_mgr),
+      lib_def_(lib_def),
+      next_handle_(0),
+      parent_(parent) {
   if (device_mgr == nullptr) {
     flr_map_[nullptr] =
         NewFunctionLibraryRuntime(nullptr, env, nullptr, graph_def_version,
@@ -50,7 +53,10 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime(
     const OptimizerOptions& optimizer_options,
     CustomKernelCreator custom_kernel_creator,
     DistributedFunctionLibraryRuntime* parent)
-    : device_mgr_(device_mgr), lib_def_(lib_def), parent_(parent) {
+    : device_mgr_(device_mgr),
+      lib_def_(lib_def),
+      next_handle_(0),
+      parent_(parent) {
   if (device_mgr == nullptr) {
     flr_map_[nullptr] = NewFunctionLibraryRuntime(
         nullptr, env, nullptr, graph_def_version, lib_def, optimizer_options,
@@ -185,30 +191,38 @@ FunctionLibraryRuntime::Handle ProcessFunctionLibraryRuntime::AddHandle(
   FunctionLibraryRuntime::Handle h =
       gtl::FindWithDefault(table_, function_key, kInvalidHandle);
   if (h != kInvalidHandle) {
-    return h;
+    if (function_data_.count(h) != 0) return h;
   }
-  h = function_data_.size();
-  function_data_.emplace_back(device_name, local_handle);
+  h = next_handle_;
+  function_data_.insert({h, FunctionData(device_name, local_handle)});
   table_[function_key] = h;
+  next_handle_++;
   return h;
 }
 
 FunctionLibraryRuntime::Handle ProcessFunctionLibraryRuntime::GetHandle(
     const string& function_key) const {
   mutex_lock l(mu_);
-  return gtl::FindWithDefault(table_, function_key, kInvalidHandle);
+  FunctionLibraryRuntime::Handle h =
+      gtl::FindWithDefault(table_, function_key, kInvalidHandle);
+  if (h != kInvalidHandle) {
+    if (function_data_.count(h) == 0) return kInvalidHandle;
+  }
+  return h;
 }
 
 bool ProcessFunctionLibraryRuntime::IsInstantiatedOnDevice(
     const string& device_name, FunctionLibraryRuntime::Handle handle) {
-  return GetHandleOnDevice(device_name, handle) != -1;
+  return GetHandleOnDevice(device_name, handle) != kInvalidHandle;
 }
 
 FunctionLibraryRuntime::LocalHandle
 ProcessFunctionLibraryRuntime::GetHandleOnDevice(
     const string& device_name, FunctionLibraryRuntime::Handle handle) {
   mutex_lock l(mu_);
-  CHECK_LE(handle, function_data_.size());
+  if (function_data_.count(handle) == 0) {
+    return kInvalidLocalHandle;
+  }
   const FunctionData& function_data = function_data_[handle];
   if (function_data.target_device != device_name) {
     return kInvalidLocalHandle;
@@ -219,7 +233,7 @@ ProcessFunctionLibraryRuntime::GetHandleOnDevice(
 string ProcessFunctionLibraryRuntime::GetDeviceName(
     FunctionLibraryRuntime::Handle handle) {
   mutex_lock l(mu_);
-  CHECK_LE(handle, function_data_.size());
+  CHECK_EQ(1, function_data_.count(handle));
   const FunctionData& function_data = function_data_[handle];
   return function_data.target_device;
 }
@@ -245,6 +259,29 @@ Status ProcessFunctionLibraryRuntime::Instantiate(
   return Status::OK();
 }
 
+Status ProcessFunctionLibraryRuntime::RemoveHandle(
+    FunctionLibraryRuntime::Handle handle) {
+  mutex_lock l(mu_);
+  function_data_.erase(handle);
+  return Status::OK();
+}
+
+Status ProcessFunctionLibraryRuntime::ReleaseHandle(
+    FunctionLibraryRuntime::Handle handle) {
+  FunctionLibraryRuntime* flr = nullptr;
+  string target_device;
+  {
+    mutex_lock l(mu_);
+    CHECK_EQ(1, function_data_.count(handle));
+    target_device = function_data_[handle].target_device;
+  }
+  flr = GetFLR(target_device);
+  if (flr != nullptr) {
+    return flr->ReleaseHandle(handle);
+  }
+  return errors::InvalidArgument("Handle not found: ", handle);
+}
+
 void ProcessFunctionLibraryRuntime::Run(
     const FunctionLibraryRuntime::Options& opts,
     FunctionLibraryRuntime::Handle handle, gtl::ArraySlice<Tensor> args,
@@ -261,7 +298,10 @@ void ProcessFunctionLibraryRuntime::Run(
   FunctionLibraryRuntime::LocalHandle local_handle;
   {
     mutex_lock l(mu_);
-    CHECK_LE(handle, function_data_.size());
+    if (function_data_.count(handle) == 0) {
+      done(errors::NotFound("Handle: ", handle, " not found."));
+      return;
+    }
     target_device = function_data_[handle].target_device;
     local_handle = function_data_[handle].local_handle;
   }
diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.h b/tensorflow/core/common_runtime/process_function_library_runtime.h
index a267bc3601..3aa7b87286 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime.h
+++ b/tensorflow/core/common_runtime/process_function_library_runtime.h
@@ -123,6 +123,12 @@ class ProcessFunctionLibraryRuntime {
   Status Instantiate(const string& function_name, AttrSlice attrs,
                      FunctionLibraryRuntime::Handle* handle);
 
+  // Delegates to the local FLR that owns state corresponding to `handle` and
+  // tells it to release it. If the `handle` isnt' needed at all, the local FLR
+  // might call RemoveHandle on this to get rid of the state owned by the Proc
+  // FLR.
+  Status ReleaseHandle(FunctionLibraryRuntime::Handle handle);
+
   // Runs the function with given `handle`. Function could have been
   // instantiated on any device. More details in framework/function.h
   void Run(const FunctionLibraryRuntime::Options& opts,
@@ -140,6 +146,9 @@ class ProcessFunctionLibraryRuntime {
   // of the device where the function is registered.
   string GetDeviceName(FunctionLibraryRuntime::Handle handle);
 
+  // Removes handle from the state owned by this object.
+  Status RemoveHandle(FunctionLibraryRuntime::Handle handle);
+
   friend class FunctionLibraryRuntimeImpl;
 
   mutable mutex mu_;
@@ -151,6 +160,7 @@ class ProcessFunctionLibraryRuntime {
     FunctionData(const string& target_device,
                  FunctionLibraryRuntime::LocalHandle local_handle)
         : target_device(target_device), local_handle(local_handle) {}
+    FunctionData() : FunctionData("", -1) {}
   };
 
   const DeviceMgr* const device_mgr_;
@@ -158,8 +168,10 @@ class ProcessFunctionLibraryRuntime {
   // Holds all the function invocations here.
   std::unordered_map<string, FunctionLibraryRuntime::Handle> table_
       GUARDED_BY(mu_);
-  std::vector<FunctionData> function_data_ GUARDED_BY(mu_);
+  std::unordered_map<FunctionLibraryRuntime::Handle, FunctionData>
+      function_data_ GUARDED_BY(mu_);
   std::unordered_map<Device*, std::unique_ptr<FunctionLibraryRuntime>> flr_map_;
+  int next_handle_ GUARDED_BY(mu_);
   DistributedFunctionLibraryRuntime* const parent_;
 };
 
diff --git a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc
index 6bc8f980c7..270e46dfe9 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc
+++ b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc
@@ -82,6 +82,22 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
 
     EXPECT_GE(call_count, 1);  // Test runner is used.
 
+    // Release the handle and then try running the function. It shouldn't
+    // succeed.
+    status = proc_flr_->ReleaseHandle(handle);
+    if (!status.ok()) {
+      return status;
+    }
+    Notification done2;
+    proc_flr_->Run(opts, handle, args, &out,
+                   [&status, &done2](const Status& s) {
+                     status = s;
+                     done2.Notify();
+                   });
+    done2.WaitForNotification();
+    EXPECT_TRUE(errors::IsNotFound(status));
+    EXPECT_TRUE(StringPiece(status.error_message()).contains("not found."));
+
     return Status::OK();
   }
 
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index 305b140a44..d3d6358362 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -408,6 +408,9 @@ class FunctionLibraryRuntime {
   virtual Status Instantiate(const string& function_name, AttrSlice attrs,
                              Handle* handle) = 0;
 
+  // Releases state associated with the handle.
+  virtual Status ReleaseHandle(Handle handle) = 0;
+
   // Returns the function body for the instantiated function given its
   // handle 'h'. Returns nullptr if "h" is not found.
   //
-- 
GitLab


From 10f77231b005c76b5a771243e18384b4b66be325 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 1 Dec 2017 14:28:29 -0800
Subject: [PATCH 0529/1225] Automated g4 rollback of changelist 177633858

PiperOrigin-RevId: 177640956
---
 tensorflow/contrib/distributions/BUILD        |  13 --
 tensorflow/contrib/distributions/__init__.py  |   2 -
 .../kernel_tests/autoregressive_test.py       |  94 --------
 .../python/ops/autoregressive.py              | 208 ------------------
 tensorflow/python/ops/distributions/util.py   |   1 -
 5 files changed, 318 deletions(-)
 delete mode 100644 tensorflow/contrib/distributions/python/kernel_tests/autoregressive_test.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/autoregressive.py

diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index c5bd91484e..145b9495ff 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -127,19 +127,6 @@ cuda_py_test(
     tags = ["no_pip"],
 )
 
-cuda_py_test(
-    name = "autoregressive_test",
-    size = "small",
-    srcs = ["python/kernel_tests/autoregressive_test.py"],
-    additional_deps = [
-        ":distributions_py",
-        "//third_party/py/numpy",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:platform_test",
-    ],
-)
-
 cuda_py_test(
     name = "binomial_test",
     size = "small",
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index a8cf40c52e..0d12d83893 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -23,7 +23,6 @@ from __future__ import print_function
 # pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member
 
 from tensorflow.contrib.distributions.python.ops import bijectors
-from tensorflow.contrib.distributions.python.ops.autoregressive import *
 from tensorflow.contrib.distributions.python.ops.binomial import *
 from tensorflow.contrib.distributions.python.ops.cauchy import *
 from tensorflow.contrib.distributions.python.ops.chi2 import *
@@ -92,7 +91,6 @@ _allowed_symbols = [
     'NOT_REPARAMETERIZED',
     'ReparameterizationType',
     'Distribution',
-    'Autoregressive',
     'Binomial',
     'Bernoulli',
     'BernoulliWithSigmoidProbs',
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/autoregressive_test.py b/tensorflow/contrib/distributions/python/kernel_tests/autoregressive_test.py
deleted file mode 100644
index b625093fb7..0000000000
--- a/tensorflow/contrib/distributions/python/kernel_tests/autoregressive_test.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.distributions.python.ops import autoregressive as autoregressive_lib
-from tensorflow.contrib.distributions.python.ops import independent as independent_lib
-from tensorflow.contrib.distributions.python.ops import test_util
-from tensorflow.contrib.distributions.python.ops.bijectors.affine import Affine
-from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import MaskedAutoregressiveFlow
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import normal as normal_lib
-from tensorflow.python.ops.distributions import transformed_distribution as transformed_distribution_lib
-from tensorflow.python.ops.distributions import util as distribution_util
-from tensorflow.python.platform import test
-
-
-class AutogressiveTest(test_util.VectorDistributionTestHelpers, test.TestCase):
-  """Tests the Autoregressive distribution."""
-
-  def setUp(self):
-    self._rng = np.random.RandomState(42)
-
-  def _random_scale_tril(self, event_size):
-    n = np.int32(event_size * (event_size + 1) // 2)
-    p = 2. * self._rng.random_sample(n).astype(np.float32) - 1.
-    return distribution_util.fill_triangular(0.25 * p)
-
-  def _normal_fn(self, affine_bijector):
-    def _fn(samples):
-      scale = math_ops.exp(affine_bijector.forward(samples))
-      return independent_lib.Independent(
-          normal_lib.Normal(loc=0., scale=scale, validate_args=True),
-          reinterpreted_batch_ndims=1)
-    return _fn
-
-  def testSampleAndLogProbConsistency(self):
-    batch_shape = []
-    event_size = 2
-    with self.test_session() as sess:
-      batch_event_shape = np.concatenate([batch_shape, [event_size]], axis=0)
-      sample0 = array_ops.zeros(batch_event_shape)
-      affine = Affine(scale_tril=self._random_scale_tril(event_size))
-      ar = autoregressive_lib.Autoregressive(
-          self._normal_fn(affine), sample0, validate_args=True)
-      self.run_test_sample_consistent_log_prob(
-          sess.run, ar, radius=1., center=0., rtol=0.01)
-
-  def testCompareToBijector(self):
-    """Demonstrates equivalence between TD, Bijector approach and AR dist."""
-    sample_shape = [4, 5]
-    batch_shape = []
-    event_size = 2
-    with self.test_session() as sess:
-      batch_event_shape = np.concatenate([batch_shape, [event_size]], axis=0)
-      sample0 = array_ops.zeros(batch_event_shape)
-      affine = Affine(scale_tril=self._random_scale_tril(event_size))
-      ar = autoregressive_lib.Autoregressive(
-          self._normal_fn(affine), sample0, validate_args=True)
-      ar_flow = MaskedAutoregressiveFlow(
-          is_constant_jacobian=True,
-          shift_and_log_scale_fn=lambda x: [None, affine.forward(x)],
-          validate_args=True)
-      td = transformed_distribution_lib.TransformedDistribution(
-          distribution=normal_lib.Normal(loc=0., scale=1.),
-          bijector=ar_flow,
-          event_shape=[event_size],
-          batch_shape=batch_shape,
-          validate_args=True)
-      x_shape = np.concatenate(
-          [sample_shape, batch_shape, [event_size]], axis=0)
-      x = 2. * self._rng.random_sample(x_shape).astype(np.float32) - 1.
-      td_log_prob_, ar_log_prob_ = sess.run([td.log_prob(x), ar.log_prob(x)])
-      self.assertAllClose(td_log_prob_, ar_log_prob_, atol=0., rtol=1e-6)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/autoregressive.py b/tensorflow/contrib/distributions/python/ops/autoregressive.py
deleted file mode 100644
index 852298bf33..0000000000
--- a/tensorflow/contrib/distributions/python/ops/autoregressive.py
+++ /dev/null
@@ -1,208 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""The Autoregressive distribution."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops.distributions import distribution as distribution_lib
-from tensorflow.python.ops.distributions import util as distribution_util
-
-
-class Autoregressive(distribution_lib.Distribution):
-  """Autoregressive distributions.
-
-  The Autoregressive distribution enables learning (often) richer multivariate
-  distributions by repeatedly applying a [diffeomorphic](
-  https://en.wikipedia.org/wiki/Diffeomorphism) transformation (such as
-  implemented by `Bijector`s). Regarding terminology,
-
-    "Autoregressive models decompose the joint density as a product of
-    conditionals, and model each conditional in turn. Normalizing flows
-    transform a base density (e.g. a standard Gaussian) into the target density
-    by an invertible transformation with tractable Jacobian." [1]
-
-  In other words, the "autoregressive property" is equivalent to the
-  decomposition, `p(x) = prod{ p(x[i] | x[0:i]) : i=0, ..., d }`. The provided
-  `shift_and_log_scale_fn`, `masked_autoregressive_default_template`, achieves
-  this property by zeroing out weights in its `masked_dense` layers.
-
-  Practically speaking the autoregressive property means that there exists a
-  permutation of the event coordinates such that each coordinate is a
-  diffeomorphic function of only preceding coordinates. [2]
-
-  #### Mathematical Details
-
-  The probability function is,
-
-  ```none
-  prob(x; fn, n) = fn(x).prob(x)
-  ```
-
-  And a sample is generated by,
-
-  ```none
-  x = fn(...fn(fn(x0).sample()).sample()).sample()
-  ```
-
-  where the ellipses (`...`) represent `n-2` composed calls to `fn`, `fn`
-  constructs a `tf.distributions.Distribution`-like instance, and `x0` is a
-  fixed initializing `Tensor`.
-
-  #### Examples
-
-  ```python
-  tfd = tf.contrib.distributions
-
-  def normal_fn(self, event_size):
-    n = event_size * (event_size + 1) / 2
-    p = tf.Variable(tfd.Normal(loc=0., scale=1.).sample(n))
-    affine = tfd.bijectors.Affine(
-        scale_tril=tfd.fill_triangular(0.25 * p))
-    def _fn(samples):
-      scale = math_ops.exp(affine.forward(samples)).eval()
-      return independent_lib.Independent(
-          normal_lib.Normal(loc=0., scale=scale, validate_args=True),
-          reinterpreted_batch_ndims=1)
-    return _fn
-
-  batch_and_event_shape = [3, 2, 4]
-  sample0 = array_ops.zeros(batch_and_event_shape)
-  ar = autoregressive_lib.Autoregressive(
-      self._normal_fn(batch_and_event_shape[-1]), sample0)
-  x = ar.sample([6, 5])
-  # ==> x.shape = [6, 5, 3, 2, 4]
-  prob_x = ar.prob(x)
-  # ==> x.shape = [6, 5, 3, 2]
-
-  ```
-
-  [1]: "Masked Autoregressive Flow for Density Estimation."
-       George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017.
-       https://arxiv.org/abs/1705.07057
-
-  [2]: "Conditional Image Generation with PixelCNN Decoders."
-       Aaron van den Oord, Nal Kalchbrenner, Oriol Vinyals, Lasse Espeholt, Alex
-       Graves, Koray Kavukcuoglu. Arxiv, 2016.
-       https://arxiv.org/abs/1606.05328
-  """
-
-  def __init__(self,
-               distribution_fn,
-               sample0=None,
-               num_steps=None,
-               validate_args=False,
-               allow_nan_stats=True,
-               name="Autoregressive"):
-    """Construct an `Autoregressive` distribution.
-
-    Args:
-      distribution_fn: Python `callable` which constructs a
-        `tf.distributions.Distribution`-like instance from a `Tensor` (e.g.,
-        `sample0`). The function must respect the "autoregressive property",
-        i.e., there exists a permutation of event such that each coordinate is a
-        diffeomorphic function of on preceding coordinates.
-      sample0: Initial input to `distribution_fn`; used to
-        build the distribution in `__init__` which in turn specifies this
-        distribution's properties, e.g., `event_shape`, `batch_shape`, `dtype`.
-        If unspecified, then `distribution_fn` should be default constructable.
-      num_steps: Number of times `distribution_fn` is composed from samples,
-        e.g., `num_steps=2` implies
-        `distribution_fn(distribution_fn(sample0).sample(n)).sample()`.
-      validate_args: Python `bool`.  Whether to validate input with asserts.
-        If `validate_args` is `False`, and the inputs are invalid,
-        correct behavior is not guaranteed.
-      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
-        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
-        result is undefined. When `False`, an exception is raised if one or
-        more of the statistic's batch members are undefined.
-      name: Python `str` name prefixed to Ops created by this class.
-        Default value: "Autoregressive".
-
-    Raises:
-      ValueError: if `num_steps` and
-        `distribution_fn(sample0).event_shape.num_elements()` are both `None`.
-      ValueError: if `num_steps < 1`.
-    """
-    parameters = locals()
-    with ops.name_scope(name):
-      self._distribution_fn = distribution_fn
-      self._sample0 = sample0
-      self._distribution0 = (distribution_fn() if sample0 is None
-                             else distribution_fn(sample0))
-      if num_steps is None:
-        num_steps = self._distribution0.event_shape.num_elements()
-        if num_steps is None:
-          raise ValueError("distribution_fn must generate a distribution "
-                           "with fully known `event_shape`.")
-      if num_steps < 1:
-        raise ValueError("num_steps ({}) must be at least 1.".format(num_steps))
-      self._num_steps = num_steps
-    super(Autoregressive, self).__init__(
-        dtype=self._distribution0.dtype,
-        reparameterization_type=self._distribution0.reparameterization_type,
-        validate_args=validate_args,
-        allow_nan_stats=allow_nan_stats,
-        parameters=parameters,
-        graph_parents=self._distribution0._graph_parents,  # pylint: disable=protected-access
-        name=name)
-
-  @property
-  def distribution_fn(self):
-    return self._distribution_fn
-
-  @property
-  def sample0(self):
-    return self._sample0
-
-  @property
-  def num_steps(self):
-    return self._num_steps
-
-  @property
-  def distribution0(self):
-    return self._distribution0
-
-  def _batch_shape(self):
-    return self.distribution0.batch_shape
-
-  def _batch_shape_tensor(self):
-    return self.distribution0.batch_shape_tensor()
-
-  def _event_shape(self):
-    return self.distribution0.event_shape
-
-  def _event_shape_tensor(self):
-    return self.distribution0.event_shape_tensor()
-
-  def _sample_n(self, n, seed=None):
-    if seed is None:
-      seed = distribution_util.gen_new_seed(
-          seed=np.random.randint(2**32 - 1),
-          salt="autoregressive")
-    samples = self.distribution0.sample(n, seed=seed)
-    for _ in range(self._num_steps):
-      samples = self.distribution_fn(samples).sample(seed=seed)
-    return samples
-
-  def _log_prob(self, value):
-    return self.distribution_fn(value).log_prob(value)
-
-  def _prob(self, value):
-    return self.distribution_fn(value).prob(value)
diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py
index 28c74bf981..41b86f7940 100644
--- a/tensorflow/python/ops/distributions/util.py
+++ b/tensorflow/python/ops/distributions/util.py
@@ -751,7 +751,6 @@ def fill_triangular(x, upper=False, name=None):
   """
 
   with ops.name_scope(name, "fill_triangular", values=[x]):
-    x = ops.convert_to_tensor(x, name="x")
     if x.shape.with_rank_at_least(1)[-1].value is not None:
       # Formula derived by solving for n: m = n(n+1)/2.
       m = np.int32(x.shape[-1].value)
-- 
GitLab


From d0ae1064ed0bb4bd1aed00afd4235f4dd5c853f0 Mon Sep 17 00:00:00 2001
From: Max Galkin <maxgalkin@google.com>
Date: Fri, 1 Dec 2017 14:48:56 -0800
Subject: [PATCH 0530/1225] Prefix inaccurate costs with "~" in
 VirtualScheduler verbose log.

Fix some inaccurate estimates exposed by this approach:
- propagate the inaccuracy flag when merging device stats;
- estimate Const as no-op;
- estimate RandomUniform, Relu and Softmax as element-wise;
- consider estimates accurate for known element-wise ops in op_level_cost_estimator.

PiperOrigin-RevId: 177643976
---
 .../costs/analytical_cost_estimator_test.cc   | 10 ++++--
 .../grappler/costs/op_level_cost_estimator.cc | 32 +++++++++++++------
 .../grappler/costs/op_level_cost_estimator.h  |  8 ++---
 .../costs/op_level_cost_estimator_test.cc     |  6 ++--
 .../core/grappler/costs/virtual_scheduler.cc  | 32 ++++++++++++++-----
 .../core/grappler/costs/virtual_scheduler.h   |  5 ++-
 .../optimizers/static_schedule_test.cc        | 32 +++++++++----------
 7 files changed, 82 insertions(+), 43 deletions(-)

diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc
index d1f3e36aa8..1c2c171383 100644
--- a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc
+++ b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc
@@ -102,8 +102,14 @@ TEST_F(AnalyticalCostEstimatorTest, SimpleTest) {
   Costs summary;
   TF_ASSERT_OK(estimator.PredictCosts(item.graph, &cost_graph, &summary));
 
-  EXPECT_EQ(Costs::NanoSeconds(9156), summary.execution_time);
-  EXPECT_FALSE(summary.inaccurate);
+  EXPECT_EQ(Costs::NanoSeconds(9150), summary.execution_time);
+
+  // Make this estimate accurate:
+  // TODO(http://b/70031255): Accurate estimator for RandomUniform op needed
+  // TODO(http://b/70031363): Accurate estimator for Softmax needed
+  //
+  // Change to EXPECT_FALSE when the above TODOs are done:
+  EXPECT_TRUE(summary.inaccurate);
 }
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index b1e04ceec8..1c278a1030 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -25,6 +25,7 @@ namespace tensorflow {
 namespace grappler {
 
 constexpr int kOpsPerMac = 2;
+constexpr char kConst[] = "Const";
 constexpr char kConv2d[] = "Conv2D";
 constexpr char kConv2dBackpropFilter[] = "Conv2DBackpropFilter";
 constexpr char kConv2dBackpropInput[] = "Conv2DBackpropInput";
@@ -167,6 +168,7 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
       {kReshape, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kRecv, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kSend, wrap(&OpLevelCostEstimator::PredictNoOp)},
+      {kConst, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kVariable, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kVariableV2, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kBatchMatMul, wrap(&OpLevelCostEstimator::PredictBatchMatMul)},
@@ -221,6 +223,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
                      Eigen::internal::scalar_square_op<float>>::Cost},
       {"Tanh", Eigen::internal::functor_traits<
                    Eigen::internal::scalar_tanh_op<float>>::Cost},
+      {"Relu", Eigen::internal::functor_traits<
+                   Eigen::internal::scalar_max_op<float>>::Cost},
       {"Sigmoid", Eigen::internal::functor_traits<
                       Eigen::internal::scalar_sigmoid_op<float>>::Cost},
       {"Sign", Eigen::internal::functor_traits<
@@ -283,8 +287,10 @@ Costs OpLevelCostEstimator::PredictCosts(const OpContext& op_context) const {
     if (elementwise_ops_.find(op_features.op()) != elementwise_ops_.end()) {
       return PredictCwiseOp(op_context);
     }
-    VLOG(1) << "Missing implementation for op: " << op_features.op();
-    return DummyExecutionTime(op_context);
+
+    VLOG(1) << "Missing accurate estimator for op: " << op_features.op();
+
+    return PredictCostOfAnUnknownOp(op_context);
   }
 
   std::function<Costs(const OpContext&)> estimator = it->second;
@@ -366,19 +372,27 @@ Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const {
   }
 
   int op_cost = 1;
+  bool is_known_elementwise_op = false;
   auto it = elementwise_ops_.find(op_features.op());
   if (it != elementwise_ops_.end()) {
     op_cost = it->second;
+    is_known_elementwise_op = true;
+  } else {
+    LOG(WARNING) << "Not a cwise op: " << op_features.op();
   }
+
   Costs costs = PredictOpCountBasedCost(op_count * op_cost, op_features);
-  costs.inaccurate = found_unknown_shapes;
+  if (found_unknown_shapes || !is_known_elementwise_op) {
+    costs.inaccurate = true;
+  }
   return costs;
 }
 
-Costs OpLevelCostEstimator::DummyExecutionTime(
+Costs OpLevelCostEstimator::PredictCostOfAnUnknownOp(
     const OpContext& op_context) const {
-  // Use CwiseOp time as an estimation
-  auto costs = PredictCwiseOp(op_context);
+  // Don't assume the operation is cwise, return cost based on input/output size
+  // and admit that it is inaccurate...
+  auto costs = PredictOpCountBasedCost(0, op_context.op_info);
   costs.inaccurate = true;
   return costs;
 }
@@ -391,11 +405,11 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
           << " Execution Time (ns):" << compute_cost.count();
 
   bool found_unknown_shapes = false;
-  double total_input_size =
+  const double total_input_size =
       CalculateInputSize(op_features, &found_unknown_shapes);
-  double total_output_size =
+  const double total_output_size =
       CalculateOutputSize(op_features, &found_unknown_shapes);
-  double total_io_size = total_input_size + total_output_size;
+  const double total_io_size = total_input_size + total_output_size;
 
   Costs::NanoSeconds memory_cost(
       std::ceil(total_io_size / device_perf.gb_per_sec));
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
index 3a8385dd73..c6f23ee0aa 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
@@ -45,11 +45,11 @@ class OpLevelCostEstimator {
   // Returns basic device performance info.
   virtual DeviceInfo GetDeviceInfo(const DeviceProperties& device) const;
 
-  // For operations for which we haven't yet built estimates, returns a dummy
-  // value based on input size.
-  Costs DummyExecutionTime(const OpContext& op_context) const;
+  // Predict cost of an op for which no accurate estimator is defined.
+  Costs PredictCostOfAnUnknownOp(const OpContext& op_context) const;
 
-  // Naive cost estimate based on operations divided by device ops/sec.
+  // Naive cost estimate based on operations divided by device ops/sec,
+  // and input/output tensor sizes.
   Costs PredictOpCountBasedCost(double operations,
                                 const OpInfo& op_features) const;
 
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
index f19be4a0ee..60fc783472 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
@@ -167,8 +167,8 @@ class OpLevelCostEstimatorTest : public ::testing::Test {
 TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) {
   auto cost = PredictCosts(DescribeOp("Dummy", 1000, 1));
   EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
-  EXPECT_EQ(Costs::Duration(200), cost.compute_time);
-  EXPECT_EQ(Costs::Duration(2200), cost.execution_time);
+  EXPECT_EQ(Costs::Duration(0), cost.compute_time);
+  EXPECT_EQ(Costs::Duration(2000), cost.execution_time);
   EXPECT_TRUE(cost.inaccurate);
 }
 
@@ -176,7 +176,7 @@ TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) {
   SetComputeMemoryOverlap(true);
   auto cost = PredictCosts(DescribeOp("Dummy", 1000, 1));
   EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
-  EXPECT_EQ(Costs::Duration(200), cost.compute_time);
+  EXPECT_EQ(Costs::Duration(0), cost.compute_time);
   EXPECT_EQ(Costs::Duration(2000), cost.execution_time);  // max(2000, 200)
   EXPECT_TRUE(cost.inaccurate);
   SetComputeMemoryOverlap(false);  // Set it back to default.
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 6640de668d..1554aeb3c0 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -43,6 +43,9 @@ Costs CombineCosts(const Costs& left, const Costs& right) {
 
   Costs result = left;
   result.execution_time += right.execution_time;
+  if (right.inaccurate) {
+    result.inaccurate = true;
+  }
   if (right.max_memory != kMemoryUnknown) {
     result.max_memory += right.max_memory;
   }
@@ -538,7 +541,8 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
   string node_description = GetOpDescription(op_context.op_info);
   op_counts_[node_description] += 1;
   op_costs_[node_description] =
-      node_costs.execution_time.asMicroSeconds().count();
+      std::make_pair(node_costs.execution_time.asMicroSeconds().count(),
+                     !node_costs.inaccurate);
 
   auto& op_cost = FindOrCreateZero(op_name, &op_to_cost_);
   op_cost = CombineCosts(op_cost, node_costs);
@@ -647,8 +651,10 @@ Costs VirtualScheduler::Summary() const {
   for (const auto& op_cost_pair : op_to_cost_) {
     const auto& op = op_cost_pair.first;
     const auto& cost = op_cost_pair.second.execution_time.count();
+    const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate;
     if (cost) {  // Skip printing out zero-cost ops.
-      VLOG(1) << " + " << op << " : " << cost;
+      VLOG(1) << " + " << op << " : " << (is_op_cost_accurate ? "" : "~")
+              << cost;
     }
   }
 
@@ -699,10 +705,16 @@ Costs VirtualScheduler::Summary() const {
           CalculateOutputSize(node_map_.at(node).output_properties, port);
     }
     Costs::NanoSeconds total_compute_time_ns;
+    bool is_total_cost_accurate = true;
     for (const auto& op_cost_pair : state.op_to_cost) {
       const auto& op = op_cost_pair.first;
       const auto& cost = op_cost_pair.second.execution_time.count();
       total_compute_time_ns += op_cost_pair.second.execution_time;
+      const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate;
+      if (!is_op_cost_accurate) {
+        is_total_cost_accurate = false;
+      }
+
       int64 op_mem_usage = 0;
       auto it = op_to_memory.find(op);
       if (it != op_to_memory.end()) {
@@ -714,9 +726,9 @@ Costs VirtualScheduler::Summary() const {
                                : 0.0;
       if (cost || mem_usage_percent > 1.0) {
         // Print out only non-zero cost ops or ops with > 1% memory usage.
-        VLOG(1) << " + " << op << " : " << cost << " ("
-                << strings::HumanReadableNumBytes(op_mem_usage) << " ["
-                << mem_usage_percent << "%] "
+        VLOG(1) << " + " << op << " : " << (is_op_cost_accurate ? "" : "~")
+                << cost << " (" << strings::HumanReadableNumBytes(op_mem_usage)
+                << " [" << mem_usage_percent << "%] "
                 << (persisent_ops.count(op) > 0 ? ": persistent op)" : ")");
       }
     }
@@ -725,8 +737,9 @@ Costs VirtualScheduler::Summary() const {
     if (wall_time_ns.count() > 0) {
       utilization = total_compute_time_ns.count() * 100 / wall_time_ns.count();
     }
-    VLOG(1) << "Device = " << name
-            << ", total_compute_time_ns = " << total_compute_time_ns.count()
+    VLOG(1) << "Device = " << name << ", total_compute_time_ns = "
+            << (is_total_cost_accurate ? "" : "~")
+            << total_compute_time_ns.count()
             << ", utilization = " << utilization << "%";
 
     if (critical_path_costs.execution_time <= state.GetCurrTime()) {
@@ -738,8 +751,11 @@ Costs VirtualScheduler::Summary() const {
     // Also log the op description and their corresponding counts.
     VLOG(2) << "Node description, counts, cost:";
     for (const auto& item : op_counts_) {
+      int cost;
+      bool is_cost_accurate;
+      std::tie(cost, is_cost_accurate) = op_costs_.at(item.first);
       VLOG(2) << "Node: " << item.first << ", Count: " << item.second
-              << ", Individual Cost: " << op_costs_.at(item.first);
+              << ", Individual Cost: " << (is_cost_accurate ? "" : "~") << cost;
     }
   }
 
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h
index c74d80c2be..3018e3509a 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@@ -330,7 +330,10 @@ class VirtualScheduler {
 
   // Stats:
   std::map<string, int> op_counts_;  // Op counts with key with input shape.
-  std::map<string, int> op_costs_;   // Individual op costs (with input shapes).
+  // Individual op costs (with input shapes).
+  // Boolean field for whether the cost is accurate.
+  std::map<string, std::pair<int, bool>> op_costs_;
+
   Costs graph_costs_;                // Graph cost.
   std::map<string, Costs> op_to_cost_;  // Per-op cost.
 
diff --git a/tensorflow/core/grappler/optimizers/static_schedule_test.cc b/tensorflow/core/grappler/optimizers/static_schedule_test.cc
index 5de5933587..08580d9284 100644
--- a/tensorflow/core/grappler/optimizers/static_schedule_test.cc
+++ b/tensorflow/core/grappler/optimizers/static_schedule_test.cc
@@ -64,17 +64,17 @@ TEST_F(StaticScheduleTest, BasicGraph) {
     if (time.first->name() == "Const/Const") {
       EXPECT_EQ(Costs::NanoSeconds(1), time.second);
     } else if (time.first->name() == "x") {
-      EXPECT_EQ(Costs::NanoSeconds(250002), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(250001), time.second);
     } else if (time.first->name() == "Square") {
-      EXPECT_EQ(Costs::NanoSeconds(1500005), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(1500004), time.second);
     } else if (time.first->name() == "Square_1") {
-      EXPECT_EQ(Costs::NanoSeconds(2750008), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(2750007), time.second);
     } else if (time.first->name() == "Square_2") {
-      EXPECT_EQ(Costs::NanoSeconds(4000011), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(4000010), time.second);
     } else if (time.first->name() == "Square_3") {
-      EXPECT_EQ(Costs::NanoSeconds(5250014), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(5250013), time.second);
     } else if (time.first->name() == "y") {
-      EXPECT_EQ(Costs::NanoSeconds(6500017), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(6500013), time.second);
     }
   }
 }
@@ -110,13 +110,13 @@ TEST_F(StaticScheduleTest, BasicGraphWithCtrlDependencies) {
     if (time.first->name() == "a") {
       EXPECT_EQ(Costs::NanoSeconds(1), time.second);
     } else if (time.first->name() == "b") {
-      EXPECT_EQ(Costs::NanoSeconds(12500026), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(12500001), time.second);
     } else if (time.first->name() == "c") {
-      EXPECT_EQ(Costs::NanoSeconds(12500027), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(12500002), time.second);
     } else if (time.first->name() == "d") {
-      EXPECT_EQ(Costs::NanoSeconds(12500028), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(12500003), time.second);
     } else if (time.first->name() == "e") {
-      EXPECT_EQ(Costs::NanoSeconds(25000053), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(25000003), time.second);
     }
   }
 }
@@ -142,17 +142,17 @@ TEST_F(StaticScheduleTest, RequiredTimes) {
 
   for (auto time : required_times) {
     if (time.first->name() == "Const/Const") {
-      EXPECT_EQ(Costs::NanoSeconds(-6500016), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-6500012), time.second);
     } else if (time.first->name() == "x") {
-      EXPECT_EQ(Costs::NanoSeconds(-6250015), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-6250012), time.second);
     } else if (time.first->name() == "Square") {
-      EXPECT_EQ(Costs::NanoSeconds(-5000012), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-5000009), time.second);
     } else if (time.first->name() == "Square_1") {
-      EXPECT_EQ(Costs::NanoSeconds(-3750009), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-3750006), time.second);
     } else if (time.first->name() == "Square_2") {
-      EXPECT_EQ(Costs::NanoSeconds(-2500006), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-2500003), time.second);
     } else if (time.first->name() == "Square_3") {
-      EXPECT_EQ(Costs::NanoSeconds(-1250003), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-1250000), time.second);
     } else if (time.first->name() == "y") {
       EXPECT_EQ(Costs::NanoSeconds(0), time.second);
     }
-- 
GitLab


From ab0c520c7f58386e0141a3c515acea19033410a6 Mon Sep 17 00:00:00 2001
From: Yuanzhong Xu <yuanzx@google.com>
Date: Fri, 1 Dec 2017 15:16:33 -0800
Subject: [PATCH 0531/1225] Add BF16 tests for reshape.

PiperOrigin-RevId: 177647906
---
 tensorflow/compiler/xla/tests/reshape_test.cc | 854 +++++++++++-------
 1 file changed, 518 insertions(+), 336 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc
index d235b9a158..6286a89748 100644
--- a/tensorflow/compiler/xla/tests/reshape_test.cc
+++ b/tensorflow/compiler/xla/tests/reshape_test.cc
@@ -41,326 +41,467 @@ limitations under the License.
 namespace xla {
 namespace {
 
-class ReshapeTest : public ClientLibraryTestBase {
+// Use a bool parameter to indicate whether to use bfloat16.
+class ReshapeTest : public ::testing::WithParamInterface<bool>,
+                    public ClientLibraryTestBase {
  public:
+  ReshapeTest() { set_use_bfloat16(GetParam()); }
+
   ErrorSpec zero_error_spec_{0.0};
 };
 
 // Collapses 2-dimensional pseudo-scalar (single-element array) to 1 dimension.
-XLA_TEST_F(ReshapeTest, CollapseTrivial1x1) {
+XLA_TEST_P(ReshapeTest, CollapseTrivial1x1) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2<float>({{1.0}});
-  builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {1.0f}, {}, zero_error_spec_);
+  Array2D<float> input_array(1, 1);
+  input_array.Fill(1.0f);
+  auto input_literal = Literal::CreateR2FromArray2D(input_array);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+
+  auto expected_literal = Literal::CreateR1<float>({1.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, CollapseTrivialR1EmptyDims) {
+XLA_TEST_P(ReshapeTest, CollapseTrivialR1EmptyDims) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR1<float>({1.0});
-  builder.Collapse(/*operand=*/a, /*dimensions=*/{});
-
-  ComputeAndCompareR1<float>(&builder, {1.0f}, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateR1<float>({1.0f});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{});
+
+  auto expected_literal = Literal::CreateR1<float>({1.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, CollapseTrivialR1OnlyDim) {
+XLA_TEST_P(ReshapeTest, CollapseTrivialR1OnlyDim) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR1<float>({1.0});
-  builder.Collapse(/*operand=*/a, /*dimensions=*/{0});
-
-  ComputeAndCompareR1<float>(&builder, {1.0f}, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateR1<float>({1.0f});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0});
+
+  auto expected_literal = Literal::CreateR1<float>({1.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Collapses 2-dimensional pseudo-scalar (single-element array) to scalar.
-XLA_TEST_F(ReshapeTest, SingleElementArrayToScalar) {
+XLA_TEST_P(ReshapeTest, SingleElementArrayToScalar) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2<float>({{1.0}});
-  auto reshape =
-      builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1}, /*new_sizes=*/{});
+  Array2D<float> input_array(1, 1);
+  input_array.Fill(1.0f);
+  auto input_literal = Literal::CreateR2FromArray2D(input_array);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter",
+                                                 &builder, &parameter);
+  auto reshape = builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1},
+                                 /*new_sizes=*/{});
   auto new_shape = builder.GetShape(reshape).ConsumeValueOrDie();
 
-  ComputeAndCompareR0<float>(&builder, 1.0f, {}, zero_error_spec_);
+  auto expected_literal = Literal::CreateR0<float>(1.0f);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, ScalarToSingleElementArray) {
+XLA_TEST_P(ReshapeTest, ScalarToSingleElementArray) {
   ComputationBuilder builder(client_, TestName());
 
   std::unique_ptr<Literal> param0_literal = Literal::CreateR0<float>(1.0f);
-  std::unique_ptr<GlobalData> param0_data =
-      client_->TransferToServer(*param0_literal).ConsumeValueOrDie();
-
-  auto a = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "param0");
-  a = builder.Neg(a);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *param0_literal, "param0",
+                                                 &builder, &parameter);
+  auto a = builder.Neg(parameter);
   auto reshape =
       builder.Reshape(/*operand=*/a, /*dimensions=*/{}, /*new_sizes=*/{1});
 
-  ComputeAndCompareR1<float>(&builder, {-1.0f}, {param0_data.get()},
-                             zero_error_spec_);
+  auto expected_literal = Literal::CreateR1<float>({-1.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, Trivial0x3) {
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 3));
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {}, {}, zero_error_spec_);
+  Array2D<float> input_array(0, 3);
+  auto input_literal = Literal::CreateR2FromArray2D(input_array);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+  auto expected_literal = Literal::CreateR1<float>({});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // TODO(b/29185393): Make this work with the GPU backend. The GPU backend
 // does not handle zero-sized shapes correctly. Failed last on 2017-05-15
 // with an incorrect result rank.
-XLA_TEST_F(ReshapeTest, DISABLED_ON_GPU(Trivial0x3WithParameter)) {
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3WithParameter)) {
   ComputationBuilder builder(client_, TestName());
 
   std::unique_ptr<Literal> param0_literal =
       Literal::CreateR2FromArray2D<float>(Array2D<float>(0, 3));
-  std::unique_ptr<GlobalData> param0_data =
-      client_->TransferToServer(*param0_literal).ConsumeValueOrDie();
-
-  auto a = builder.Parameter(0, ShapeUtil::MakeShape(F32, {0, 3}), "param0");
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {}, {param0_data.get()},
-                             zero_error_spec_);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *param0_literal, "param0",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+  auto expected_literal = Literal::CreateR1<float>({});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, Trivial3x0) {
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial3x0)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(3, 0));
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {}, {}, zero_error_spec_);
+  Array2D<float> input_array(3, 0);
+  auto input_literal = Literal::CreateR2FromArray2D(input_array);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+  auto expected_literal = Literal::CreateR1<float>({});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Collapses a 2-dimensional row vector to 1 dimension.
-XLA_TEST_F(ReshapeTest, Trivial1x3) {
+XLA_TEST_P(ReshapeTest, Trivial1x3) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2<float>({{1.0f, 2.0f, 3.0f}});
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {1.0f, 2.0f, 3.0f}, {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateR2<float>({{1.0f, 2.0f, 3.0f}});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+  auto expected_literal = Literal::CreateR1<float>({1.0f, 2.0f, 3.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Collapses a 2-dimensional column vector to 1 dimension.
-XLA_TEST_F(ReshapeTest, Trivial3x1) {
+XLA_TEST_P(ReshapeTest, Trivial3x1) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2<float>({{1.0f}, {2.0f}, {3.0f}});
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {1.0f, 2.0f, 3.0f}, {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateR2<float>({{1.0f}, {2.0f}, {3.0f}});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+  auto expected_literal = Literal::CreateR1<float>({1.0f, 2.0f, 3.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+//
 // Splits an empty vector into an empty matrix.
-XLA_TEST_F(ReshapeTest, R1ToR2_0_To_2x0) {
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(R1ToR2_0_To_2x0)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR1<float>({});
-  auto result =
-      builder.Reshape(/*operand=*/a, /*dimensions=*/{0}, /*new_sizes=*/{2, 0});
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(2, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateR1<float>({});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0},
+                  /*new_sizes=*/{2, 0});
+  auto expected_literal = Literal::CreateR2<float>({{}, {}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Splits a vector into a matrix.
-XLA_TEST_F(ReshapeTest, R1ToR2_6_To_2x3) {
+XLA_TEST_P(ReshapeTest, R1ToR2_6_To_2x3) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR1<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f});
-  auto result =
-      builder.Reshape(/*operand=*/a, /*dimensions=*/{0}, /*new_sizes=*/{2, 3});
-  Array2D<float> expected_2x3({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}});
-  ComputeAndCompareR2<float>(&builder, expected_2x3, {}, zero_error_spec_);
+  auto input_literal =
+      Literal::CreateR1<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0},
+                  /*new_sizes=*/{2, 3});
+  auto expected_literal =
+      Literal::CreateR2<float>({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+//
 // Transposes a 2x0 array to a 0x2 array.
-XLA_TEST_F(ReshapeTest, Reshape0x2To2x0) {
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Reshape0x2To2x0)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 2));
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1},
-                                /*new_sizes=*/{2, 0});
-
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(2, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(Array2D<float>(0, 2));
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1},
+                  /*new_sizes=*/{2, 0});
+  auto expected_literal = Literal::CreateR2<float>({{}, {}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Transposes a 2-dimensional row vector to a column vector.
-XLA_TEST_F(ReshapeTest, ReshapeRowToCol) {
+XLA_TEST_P(ReshapeTest, ReshapeRowToCol) {
   ComputationBuilder builder(client_, TestName());
   auto simple = MakeLinspaceArray2D(1.0f, 3.0f, 1, 3);
-  auto a = builder.ConstantR2FromArray2D<float>(*simple);
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1},
-                                /*new_sizes=*/{3, 1});
+  auto input_literal = Literal::CreateFromArray(*simple);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1},
+                  /*new_sizes=*/{3, 1});
 
   auto expected = ReferenceUtil::TransposeArray2D(*simple);
-  ComputeAndCompareR2<float>(&builder, *expected, {}, zero_error_spec_);
+  auto expected_literal = Literal::CreateFromArray(*expected);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Transposes a 2-dimensional array.
-XLA_TEST_F(ReshapeTest, TransposeAsReshape) {
+XLA_TEST_P(ReshapeTest, TransposeAsReshape) {
   ComputationBuilder builder(client_, TestName());
   auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3);
-  auto a = builder.ConstantR2FromArray2D<float>(*a4x3);
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{1, 0},
-                                /*new_sizes=*/{3, 4});
-
-  auto expected3x4 = ReferenceUtil::TransposeArray2D(*a4x3);
-  ComputeAndCompareR2<float>(&builder, *expected3x4, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(*a4x3);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 0},
+                  /*new_sizes=*/{3, 4});
+
+  auto expected = ReferenceUtil::TransposeArray2D(*a4x3);
+  auto expected_literal = Literal::CreateFromArray(*expected);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+//
 // Transposes a 0x4 array with ComputationBuilder::Trans.
-XLA_TEST_F(ReshapeTest, Transpose0x4) {
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Transpose0x4)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 4));
-  auto result = builder.Transpose(a, {1, 0});
-
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(4, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(Array2D<float>(0, 4));
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Transpose(parameter, {1, 0});
+  auto expected_literal = Literal::CreateR2<float>({{}, {}, {}, {}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Transposes a 2-dimensional array with ComputationBuilder::Trans.
-XLA_TEST_F(ReshapeTest, Transpose4x3) {
+XLA_TEST_P(ReshapeTest, Transpose4x3) {
   ComputationBuilder builder(client_, TestName());
   auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3);
-  auto a = builder.ConstantR2FromArray2D<float>(*a4x3);
-  auto result = builder.Transpose(a, {1, 0});
-
-  auto expected3x4 = ReferenceUtil::TransposeArray2D(*a4x3);
-  ComputeAndCompareR2<float>(&builder, *expected3x4, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(*a4x3);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Transpose(parameter, {1, 0});
+
+  auto expected = ReferenceUtil::TransposeArray2D(*a4x3);
+  auto expected_literal = Literal::CreateFromArray(*expected);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+//
 // Reshapes an empty 2-dimensional array with dimensions that are not just a
 // rearrangement of the originals (split), but no reordering (no shuffle).
-XLA_TEST_F(ReshapeTest, ReshapeSplitNoShuffleZeroElements) {
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitNoShuffleZeroElements)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(6, 0));
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1},
-                                /*new_sizes=*/{2, 3, 0, 0});
-
-  ComputeAndCompareR4<float>(&builder, Array4D<float>(2, 3, 0, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(Array2D<float>(6, 0));
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1},
+                  /*new_sizes=*/{2, 3, 0, 0});
+  auto expected_literal = Literal::CreateFromArray(Array4D<float>(2, 3, 0, 0));
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, ReshapeR4ToR2ZeroElements) {
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeR4ToR2ZeroElements)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR4FromArray4D<float>(Array4D<float>(2, 3, 4, 0));
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1, 2, 3},
-                                /*new_sizes=*/{24, 0});
-
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(24, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(Array4D<float>(2, 3, 4, 0));
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2, 3},
+                  /*new_sizes=*/{24, 0});
+  auto expected_literal = Literal::CreateFromArray(Array2D<float>(24, 0));
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Reshapes a 2-dimensional array with dimensions that are not just a
 // rearrangement of the originals (split), but no reordering (no shuffle).
-XLA_TEST_F(ReshapeTest, ReshapeSplitNoShuffle) {
+XLA_TEST_P(ReshapeTest, ReshapeSplitNoShuffle) {
   ComputationBuilder builder(client_, TestName());
   auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3);
-  auto a = builder.ConstantR2FromArray2D<float>(*a4x3);
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1},
-                                /*new_sizes=*/{2, 6});
-
-  auto expected2x6 = MakeLinspaceArray2D(1.0f, 12.0f, 2, 6);
-  ComputeAndCompareR2<float>(&builder, *expected2x6, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(*a4x3);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1},
+                  /*new_sizes=*/{2, 6});
+
+  auto expected = MakeLinspaceArray2D(1.0f, 12.0f, 2, 6);
+  auto expected_literal = Literal::CreateFromArray(*expected);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-// Reshapes a 2-dimensional array with dimensions that are not just a
-// rearrangement of the originals (split), and reorder the input (shuffle).
-XLA_TEST_F(ReshapeTest, ReshapeSplitAndShuffleZeroElements) {
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+//
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitAndShuffleZeroElements)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 6));
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{1, 0},
-                                /*new_sizes=*/{3, 0});
-
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(3, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(Array2D<float>(0, 6));
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 0},
+                  /*new_sizes=*/{3, 0});
+  auto expected_literal = Literal::CreateFromArray(Array2D<float>(3, 0));
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Reshapes a 2-dimensional array with dimensions that are not just a
 // rearrangement of the originals (split), and reorder the input (shuffle).
-XLA_TEST_F(ReshapeTest, ReshapeSplitAndShuffle) {
+XLA_TEST_P(ReshapeTest, ReshapeSplitAndShuffle) {
   ComputationBuilder builder(client_, TestName());
   auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3);
-  auto a = builder.ConstantR2FromArray2D<float>(*a4x3);
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{1, 0},
-                                /*new_sizes=*/{2, 6});
-
-  Array2D<float> expected2x6({{1.0f, 4.0f, 7.0f, 10.0f, 2.0f, 5.0f},
-                              {8.0f, 11.0f, 3.0f, 6.0f, 9.0f, 12.0f}});
-  ComputeAndCompareR2<float>(&builder, expected2x6, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(*a4x3);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 0},
+                  /*new_sizes=*/{2, 6});
+  Array2D<float> expected({{1.0f, 4.0f, 7.0f, 10.0f, 2.0f, 5.0f},
+                           {8.0f, 11.0f, 3.0f, 6.0f, 9.0f, 12.0f}});
+  auto expected_literal = Literal::CreateFromArray(expected);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // The following tests use the same input 3D array; they test the examples we
 // show for the Reshape operation in the operation_semantics document.
 // TODO(b/34503277): find a way to show this code in the documentation without
 // duplication on the TF documentation server.
-Array3D<int> v_array_for_doc_R3_tests({{{10, 11, 12}, {15, 16, 17}},
-                                       {{20, 21, 22}, {25, 26, 27}},
-                                       {{30, 31, 32}, {35, 36, 37}},
-                                       {{40, 41, 42}, {45, 46, 47}}});
-
-XLA_TEST_F(ReshapeTest, DocR3_R1_Collapse_012) {
-  ComputationBuilder builder(client_, TestName());
-  auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests);
-  auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{0, 1, 2},
-                                /*new_sizes=*/{24});
-  ComputeAndCompareR1<int>(&builder,
-                           {10, 11, 12, 15, 16, 17, 20, 21, 22, 25, 26, 27,
-                            30, 31, 32, 35, 36, 37, 40, 41, 42, 45, 46, 47},
-                           {});
-}
-
-XLA_TEST_F(ReshapeTest, DocR3_R2_Collapse_012_Refine_83) {
-  ComputationBuilder builder(client_, TestName());
-  auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests);
-  auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{0, 1, 2},
-                                /*new_sizes=*/{8, 3});
-  Array2D<int> expected({{10, 11, 12},
-                         {15, 16, 17},
-                         {20, 21, 22},
-                         {25, 26, 27},
-                         {30, 31, 32},
-                         {35, 36, 37},
-                         {40, 41, 42},
-                         {45, 46, 47}});
-  ComputeAndCompareR2<int>(&builder, expected, {});
-}
-
-XLA_TEST_F(ReshapeTest, DocR3_R1_Collapse_120) {
-  ComputationBuilder builder(client_, TestName());
-  auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests);
-  auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{1, 2, 0},
-                                /*new_sizes=*/{24});
-  ComputeAndCompareR1<int>(&builder,
-                           {10, 20, 30, 40, 11, 21, 31, 41, 12, 22, 32, 42,
-                            15, 25, 35, 45, 16, 26, 36, 46, 17, 27, 37, 47},
-                           {});
-}
-
-XLA_TEST_F(ReshapeTest, DocR3_R2_Collapse_120_Refine_83) {
-  ComputationBuilder builder(client_, TestName());
-  auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests);
-  auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{1, 2, 0},
-                                /*new_sizes=*/{8, 3});
-  Array2D<int> expected({{10, 20, 30},
-                         {40, 11, 21},
-                         {31, 41, 12},
-                         {22, 32, 42},
-                         {15, 25, 35},
-                         {45, 16, 26},
-                         {36, 46, 17},
-                         {27, 37, 47}});
-  ComputeAndCompareR2<int>(&builder, expected, {});
-}
-
-XLA_TEST_F(ReshapeTest, DocR3_R3_Collapse_120_Refine_262) {
-  ComputationBuilder builder(client_, TestName());
-  auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests);
-  auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{1, 2, 0},
-                                /*new_sizes=*/{2, 6, 2});
-  Array3D<int> expected(
+static Array3D<float> ArrayForDocR3Tests() {
+  return Array3D<float>({{{10, 11, 12}, {15, 16, 17}},
+                         {{20, 21, 22}, {25, 26, 27}},
+                         {{30, 31, 32}, {35, 36, 37}},
+                         {{40, 41, 42}, {45, 46, 47}}});
+}
+
+XLA_TEST_P(ReshapeTest, DocR3_R1_Collapse_012) {
+  ComputationBuilder builder(client_, TestName());
+  auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests());
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2},
+                  /*new_sizes=*/{24});
+  auto expected_literal = Literal::CreateR1<float>(
+      {10, 11, 12, 15, 16, 17, 20, 21, 22, 25, 26, 27,
+       30, 31, 32, 35, 36, 37, 40, 41, 42, 45, 46, 47});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
+}
+
+XLA_TEST_P(ReshapeTest, DocR3_R2_Collapse_012_Refine_83) {
+  ComputationBuilder builder(client_, TestName());
+  auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests());
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2},
+                  /*new_sizes=*/{8, 3});
+  auto expected_literal = Literal::CreateR2<float>({{10, 11, 12},
+                                                    {15, 16, 17},
+                                                    {20, 21, 22},
+                                                    {25, 26, 27},
+                                                    {30, 31, 32},
+                                                    {35, 36, 37},
+                                                    {40, 41, 42},
+                                                    {45, 46, 47}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
+}
+
+XLA_TEST_P(ReshapeTest, DocR3_R1_Collapse_120) {
+  ComputationBuilder builder(client_, TestName());
+  auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests());
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 2, 0},
+                  /*new_sizes=*/{24});
+  auto expected_literal = Literal::CreateR1<float>(
+      {10, 20, 30, 40, 11, 21, 31, 41, 12, 22, 32, 42,
+       15, 25, 35, 45, 16, 26, 36, 46, 17, 27, 37, 47});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
+}
+
+XLA_TEST_P(ReshapeTest, DocR3_R2_Collapse_120_Refine_83) {
+  ComputationBuilder builder(client_, TestName());
+  auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests());
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 2, 0},
+                  /*new_sizes=*/{8, 3});
+  auto expected_literal = Literal::CreateR2<float>({{10, 20, 30},
+                                                    {40, 11, 21},
+                                                    {31, 41, 12},
+                                                    {22, 32, 42},
+                                                    {15, 25, 35},
+                                                    {45, 16, 26},
+                                                    {36, 46, 17},
+                                                    {27, 37, 47}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
+}
+
+XLA_TEST_P(ReshapeTest, DocR3_R3_Collapse_120_Refine_262) {
+  ComputationBuilder builder(client_, TestName());
+  auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests());
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 2, 0},
+                  /*new_sizes=*/{2, 6, 2});
+  auto expected_literal = Literal::CreateR3<float>(
       {{{10, 20}, {30, 40}, {11, 21}, {31, 41}, {12, 22}, {32, 42}},
        {{15, 25}, {35, 45}, {16, 26}, {36, 46}, {17, 27}, {37, 47}}});
-  ComputeAndCompareR3<int>(&builder, expected, {});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Collapses the low dimensions of a 4D tensor to get a 2D matrix, without
@@ -378,23 +519,26 @@ XLA_TEST_F(ReshapeTest, DocR3_R3_Collapse_120_Refine_262) {
 // Then we collapse Z be collapsed so we just end up with planes:
 //
 // 1 2 3 4 5 6 1 2 3 4 5 6
-XLA_TEST_F(ReshapeTest, FullyConnectedCollapse) {
+XLA_TEST_P(ReshapeTest, FullyConnectedCollapse) {
   ComputationBuilder builder(client_, TestName());
   Array4D<float> t2x2x2x3(2, 2, 2, 3);
   auto filler2x3 = MakeLinspaceArray2D(1.0f, 6.0f, 2, 3);
   t2x2x2x3.FillWithYX(*filler2x3);
-  auto a = builder.ConstantR4FromArray4D<float>(t2x2x2x3);
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{1, 2, 3});
-
-  Array2D<float> expected2x12(
+  auto input_literal = Literal::CreateFromArray(t2x2x2x3);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{1, 2, 3});
+  auto expected_literal = Literal::CreateR2<float>(
       {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f},
        {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
         6.0f}});
-  ComputeAndCompareR2<float>(&builder, expected2x12, {}, zero_error_spec_);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // As above, but uses reshape directly.
-XLA_TEST_F(ReshapeTest, FullyConnectedCollapseDesugared) {
+XLA_TEST_P(ReshapeTest, FullyConnectedCollapseDesugared) {
   ComputationBuilder builder(client_, TestName());
   Array4D<float> t(2, 1, 2, 2);
   t(0, 0, 0, 0) = 0;
@@ -405,52 +549,67 @@ XLA_TEST_F(ReshapeTest, FullyConnectedCollapseDesugared) {
   t(1, 0, 0, 1) = 5;
   t(1, 0, 1, 0) = 6;
   t(1, 0, 1, 1) = 7;
-  auto a = builder.ConstantR4FromArray4D<float>(t);
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1, 2, 3},
-                                /*new_sizes=*/{2, 4});
-
-  Array2D<float> expected({{0, 1, 2, 3}, {4, 5, 6, 7}});
-  ComputeAndCompareR2<float>(&builder, expected, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(t);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2, 3},
+                  /*new_sizes=*/{2, 4});
+
+  auto expected_literal =
+      Literal::CreateR2<float>({{0, 1, 2, 3}, {4, 5, 6, 7}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Reshape various ranks to a scalar.
-XLA_TEST_F(ReshapeTest, ToScalar) {
+XLA_TEST_P(ReshapeTest, ToScalar) {
   for (int rank = 0; rank < 8; ++rank) {
     ComputationBuilder b(client_, TestName());
-    auto input = Literal::CreateR1<float>({83.0f});
+    auto input_literal = Literal::CreateR1<float>({83.0f});
     std::vector<int64> ones(rank, 1);  // this is {1, ..., 1}.
     std::vector<int64> dimensions(rank);
     std::iota(dimensions.begin(), dimensions.end(), 0);
-    *input->mutable_shape() = ShapeUtil::MakeShape(F32, ones);
-    b.Reshape(b.ConstantLiteral(*input), dimensions, {});
+    *input_literal->mutable_shape() = ShapeUtil::MakeShape(F32, ones);
 
-    ComputeAndCompareR0<float>(&b, 83.0f, {}, zero_error_spec_);
+    ComputationDataHandle parameter;
+    auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                   &b, &parameter);
+    b.Reshape(parameter, dimensions, {});
+
+    auto expected_literal = Literal::CreateR0<float>(83.0f);
+    ComputeAndCompareLiteral(&b, *expected_literal, {input.get()},
+                             zero_error_spec_);
   }
 }
 
-XLA_TEST_F(ReshapeTest, BadDimensions) {
+XLA_TEST_P(ReshapeTest, BadDimensions) {
   ComputationBuilder b(client_, TestName());
-  b.Reshape(b.ConstantR1<int32>({1}), {}, {});
+  auto input_literal = Literal::CreateR1<float>({1.0f});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &b,
+                                                 &parameter);
+  b.Reshape(parameter, {}, {});
   EXPECT_THAT(
       ExecuteToString(&b, {}),
       ::testing::HasSubstr("not a permutation of the operand dimensions"));
 }
 
-XLA_TEST_F(ReshapeTest, BadNewSizes) {
+XLA_TEST_P(ReshapeTest, BadNewSizes) {
   ComputationBuilder b(client_, TestName());
-  b.Reshape(b.ConstantR1<int32>({1, 2}), {1}, {});
+  auto input_literal = Literal::CreateR1<float>({1.0f, 2.0f});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &b,
+                                                 &parameter);
+  b.Reshape(parameter, {1}, {});
   EXPECT_THAT(ExecuteToString(&b, {}),
               ::testing::HasSubstr("mismatched element counts"));
 }
 
-XLA_TEST_F(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) {
-  const Shape parameter_shape = ShapeUtil::MakeShape(F32, {2, 2, 2, 2});
+XLA_TEST_P(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, parameter_shape, "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 8});
-
   // clang-format off
-  auto literal = Literal::CreateR4FromArray4DWithLayout(Array4D<float>{
+  auto input_literal = Literal::CreateR4FromArray4DWithLayout(Array4D<float>{
     {
       {
         {0, 1},
@@ -474,8 +633,12 @@ XLA_TEST_F(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) {
   },
        LayoutUtil::MakeLayout({0, 1, 2, 3}));
   // clang-format on
-  std::unique_ptr<GlobalData> input =
-      client_->TransferToServer(*literal).ConsumeValueOrDie();
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 8});
+
   Array2D<float> expected_array({
       {0, 1, 2, 3, 100, 101, 102, 103},
       {222, 333, 444, 555, 666, 777, 888, 999},
@@ -484,72 +647,75 @@ XLA_TEST_F(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) {
   Computation computation = builder.Build().ConsumeValueOrDie();
   ExecutionOptions execution_options = execution_options_;
   *execution_options.mutable_shape_with_output_layout() =
-      ShapeUtil::MakeShapeWithLayout(F32, {2, 8}, {1, 0});
+      ShapeUtil::MakeShapeWithLayout(use_bfloat16() ? BF16 : F32, {2, 8},
+                                     {1, 0});
   std::unique_ptr<Literal> actual =
       client_
           ->ExecuteAndTransfer(computation, {input.get()}, &execution_options)
           .ConsumeValueOrDie();
   std::unique_ptr<Literal> expected =
       Literal::CreateR2FromArray2D<float>(expected_array);
+  if (use_bfloat16()) {
+    expected = LiteralTestUtil::ConvertF32ToBF16(*expected);
+  }
   LiteralTestUtil::ExpectEqual(*expected, *actual);
 }
 
-XLA_TEST_F(ReshapeTest, R2ToR4_3x8_To_3x2x1x4) {
-  std::unique_ptr<Literal> input = Literal::CreateR2<float>({
+XLA_TEST_P(ReshapeTest, R2ToR4_3x8_To_3x2x1x4) {
+  ComputationBuilder builder(client_, TestName());
+  std::unique_ptr<Literal> input_literal = Literal::CreateR2<float>({
       {0, 1, 2, 3, 4, 5, 6, 7},
       {100, 101, 102, 103, 104, 105, 106, 107},
       {200, 201, 202, 203, 204, 205, 206, 207},
   });
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1}, /*new_sizes=*/{3, 2, 1, 4});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1}, /*new_sizes=*/{3, 2, 1, 4});
 
   // clang-format off
-  Array4D<float> expected = {
+  auto expected_literal = Literal::CreateR4<float>({
     {{{0, 1, 2, 3}},
      {{4, 5, 6, 7}}},
     {{{100, 101, 102, 103}},
      {{104, 105, 106, 107}}},
     {{{200, 201, 202, 203}},
      {{204, 205, 206, 207}}}
-  };
+  });
   // clang-format on
-  ComputeAndCompareR4<float>(&builder, expected, {input_data.get()},
-                             zero_error_spec_);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Tests R2->R4 reshape with the reshape dimensions {1, 0}.
-XLA_TEST_F(ReshapeTest, R2ToR4_3x8_To_3x2x1x4_Dimensions_10) {
-  std::unique_ptr<Literal> input = Literal::CreateR2<float>({
+XLA_TEST_P(ReshapeTest, R2ToR4_3x8_To_3x2x1x4_Dimensions_10) {
+  ComputationBuilder builder(client_, TestName());
+  std::unique_ptr<Literal> input_literal = Literal::CreateR2<float>({
       {0, 1, 2, 3, 4, 5, 6, 7},
       {100, 101, 102, 103, 104, 105, 106, 107},
       {200, 201, 202, 203, 204, 205, 206, 207},
   });
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{1, 0}, /*new_sizes=*/{3, 2, 1, 4});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{1, 0}, /*new_sizes=*/{3, 2, 1, 4});
 
   // clang-format off
-  Array4D<float> expected = {
+  auto expected_literal = Literal::CreateR4<float>({
     {{{0, 100, 200, 1}},
      {{101, 201, 2, 102}}},
     {{{202, 3, 103, 203}},
      {{4, 104, 204, 5}}},
     {{{105, 205, 6, 106}},
      {{206, 7, 107, 207}}}
-  };
+  });
   // clang-format on
-  ComputeAndCompareR4<float>(&builder, expected, {input_data.get()},
-                             zero_error_spec_);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) {
+XLA_TEST_P(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) {
+  ComputationBuilder builder(client_, TestName());
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   Array4D<float> input(2, 1, 1, 1);
@@ -559,12 +725,10 @@ XLA_TEST_F(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 1});
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 1});
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape({2, 1}, {1, 0}, *input_literal);
@@ -572,7 +736,8 @@ XLA_TEST_F(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) {
                            zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) {
+XLA_TEST_P(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) {
+  ComputationBuilder builder(client_, TestName());
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   Array4D<float> input(2, 1, 4, 1);
@@ -582,12 +747,10 @@ XLA_TEST_F(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{4, 2});
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{4, 2});
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape({4, 2}, {1, 0}, *input_literal);
@@ -596,7 +759,8 @@ XLA_TEST_F(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) {
 }
 
 // Tests R4->R2 reshape with the reshape dimensions {0, 2, 1, 3}.
-XLA_TEST_F(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) {
+XLA_TEST_P(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) {
+  ComputationBuilder builder(client_, TestName());
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   Array4D<float> input(5, 10, 2, 3);
@@ -606,12 +770,11 @@ XLA_TEST_F(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 2, 1, 3}, /*new_sizes=*/{5, 60});
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 2, 1, 3},
+                  /*new_sizes=*/{5, 60});
 
   Array2D<float> expected_array(5, 60);
   input.Each([&](tensorflow::gtl::ArraySlice<int64> indices, float* cell) {
@@ -619,10 +782,12 @@ XLA_TEST_F(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) {
         *cell;
   });
   auto expected = Literal::CreateR2FromArray2D(expected_array);
-  ComputeAndCompareLiteral(&builder, *expected, {input_data.get()});
+  ComputeAndCompareLiteral(&builder, *expected, {input_data.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, NoopReshape) {
+XLA_TEST_P(ReshapeTest, NoopReshape) {
+  ComputationBuilder builder(client_, TestName());
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   Array4D<float> input_array(2, 3, 5, 7);
@@ -632,18 +797,17 @@ XLA_TEST_F(ReshapeTest, NoopReshape) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input_array, LayoutUtil::MakeLayout({1, 2, 3, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto input = builder.Parameter(0, input_literal->shape(), "input");
-  builder.Reshape(input, /*dimensions=*/{3, 0, 1, 2},
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{3, 0, 1, 2},
                   /*new_sizes=*/{7, 2, 3, 5});
   Computation computation = builder.Build().ConsumeValueOrDie();
 
   ExecutionOptions execution_options = execution_options_;
   *execution_options.mutable_shape_with_output_layout() =
-      ShapeUtil::MakeShapeWithLayout(F32, {7, 2, 3, 5}, {2, 3, 0, 1});
+      ShapeUtil::MakeShapeWithLayout(use_bfloat16() ? BF16 : F32, {7, 2, 3, 5},
+                                     {2, 3, 0, 1});
   std::unique_ptr<Literal> output_literal =
       client_
           ->ExecuteAndTransfer(computation, {input_data.get()},
@@ -652,35 +816,45 @@ XLA_TEST_F(ReshapeTest, NoopReshape) {
 
   // Since the reshape is a no-op, verify that it does not change the underlying
   // data.
-  EXPECT_EQ(tensorflow::gtl::ArraySlice<float>(input_literal->f32s()),
-            tensorflow::gtl::ArraySlice<float>(output_literal->f32s()));
+  if (use_bfloat16()) {
+    auto expected = LiteralTestUtil::ConvertF32ToBF16(*input_literal);
+    EXPECT_EQ(tensorflow::gtl::ArraySlice<bfloat16>(expected->bf16s()),
+              tensorflow::gtl::ArraySlice<bfloat16>(output_literal->bf16s()));
+  } else {
+    EXPECT_EQ(tensorflow::gtl::ArraySlice<float>(input_literal->f32s()),
+              tensorflow::gtl::ArraySlice<float>(output_literal->f32s()));
+  }
 }
 
-XLA_TEST_F(ReshapeTest, R4ToR4Reshape_Trivial) {
-  auto literal_1x2x3x4 = Literal::CreateR4(
+XLA_TEST_P(ReshapeTest, R4ToR4Reshape_Trivial) {
+  ComputationBuilder builder(client_, TestName());
+  auto literal_1x2x3x4 = Literal::CreateR4<float>(
       {{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}},
         {{13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24}}}});
 
-  ComputationBuilder builder(client_, TestName());
-  auto input = builder.ConstantLiteral(*literal_1x2x3x4);
-  builder.Reshape(input, /*dimensions=*/{0, 1, 2, 3},
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *literal_1x2x3x4, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3},
                   /*new_sizes=*/{1, 2, 3, 4});
 
-  ComputeAndCompareLiteral(&builder, *literal_1x2x3x4, {});
+  ComputeAndCompareLiteral(&builder, *literal_1x2x3x4, {input.get()});
 }
 
-XLA_TEST_F(ReshapeTest, R4ToR4Reshape) {
-  auto literal_1x2x3x4 = Literal::CreateR4(
+XLA_TEST_P(ReshapeTest, R4ToR4Reshape) {
+  auto literal_1x2x3x4 = Literal::CreateR4<float>(
       {{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}},
         {{13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24}}}});
 
   ComputationBuilder builder(client_, TestName());
-  auto input = builder.ConstantLiteral(*literal_1x2x3x4);
-  builder.Reshape(input, /*dimensions=*/{1, 3, 2, 0},
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *literal_1x2x3x4, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{1, 3, 2, 0},
                   /*new_sizes=*/{2, 4, 3, 1});
 
   // clang-format off
-  auto expected_2x4x3x1 = Literal::CreateR4(
+  auto expected_2x4x3x1 = Literal::CreateR4<float>(
       {{{{1}, {5}, {9}},
         {{2}, {6}, {10}},
         {{3}, {7}, {11}},
@@ -691,10 +865,10 @@ XLA_TEST_F(ReshapeTest, R4ToR4Reshape) {
         {{16}, {20}, {24}}}});
   // clang-format on
 
-  ComputeAndCompareLiteral(&builder, *expected_2x4x3x1, {});
+  ComputeAndCompareLiteral(&builder, *expected_2x4x3x1, {input.get()});
 }
 
-XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeSimple) {
+XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeSimple) {
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   std::vector<int64> bounds = {2, 2, 2, 2};
@@ -706,12 +880,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeSimple) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds);
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2},
+                  /*new_sizes=*/new_bounds);
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal)
@@ -723,7 +897,7 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeSimple) {
                            zero_error_spec_, &expected->shape());
 }
 
-XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstEffectiveR2) {
+XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeMajorFirstEffectiveR2) {
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   std::vector<int64> bounds = {1, 1, 250, 300};
@@ -735,12 +909,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstEffectiveR2) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds);
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2},
+                  /*new_sizes=*/new_bounds);
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal)
@@ -752,7 +926,7 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstEffectiveR2) {
                            zero_error_spec_, &expected->shape());
 }
 
-XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1) {
+XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1) {
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   std::vector<int64> bounds = {5, 5, 1, 10};
@@ -764,12 +938,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds);
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2},
+                  /*new_sizes=*/new_bounds);
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal)
@@ -781,7 +955,7 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1) {
                            zero_error_spec_, &expected->shape());
 }
 
-XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1InR2) {
+XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1InR2) {
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   // This happens in NN-Builder MNIST.
@@ -794,12 +968,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1InR2) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds);
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2},
+                  /*new_sizes=*/new_bounds);
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal)
@@ -811,7 +985,7 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1InR2) {
                            zero_error_spec_, &expected->shape());
 }
 
-XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeTrivialR2) {
+XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeTrivialR2) {
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   std::vector<int64> bounds = {3, 3, 1, 3};
@@ -823,12 +997,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeTrivialR2) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({0, 1, 2, 3}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{1, 0, 2, 3}, /*new_sizes=*/new_bounds);
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{1, 0, 2, 3},
+                  /*new_sizes=*/new_bounds);
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape(new_bounds, {1, 0, 2, 3}, *input_literal)
@@ -840,5 +1014,13 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeTrivialR2) {
                            zero_error_spec_, &expected->shape());
 }
 
+#if defined(XLA_TEST_BACKEND_CPU) || defined(XLA_TEST_BACKEND_CPU_PARALLEL) || \
+    defined(XLA_TEST_BACKEND_GPU)
+INSTANTIATE_TEST_CASE_P(ReshapeTestInstance, ReshapeTest,
+                        ::testing::ValuesIn(std::vector<bool>{false}));
+#else
+INSTANTIATE_TEST_CASE_P(ReshapeTestInstance, ReshapeTest, ::testing::Bool());
+#endif
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 232db28aad1d9377965cdd38a9e3d886c8eddc9a Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Fri, 1 Dec 2017 15:51:00 -0800
Subject: [PATCH 0532/1225] Fix long stride op issue

---
 tensorflow/core/kernels/strided_slice_op_impl.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h
index de65147572..afe3a051e6 100644
--- a/tensorflow/core/kernels/strided_slice_op_impl.h
+++ b/tensorflow/core/kernels/strided_slice_op_impl.h
@@ -284,6 +284,7 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU);
 TF_CALL_complex64(DECLARE_FOR_N_GPU);
 TF_CALL_complex128(DECLARE_FOR_N_GPU);
 DECLARE_FOR_N_GPU(int32);
+DECLARE_FOR_N_GPU(int64);
 #endif  // END GOOGLE_CUDA
 
 TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU);
@@ -299,6 +300,7 @@ DECLARE_FOR_N_CPU(bfloat16);
 TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL);
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL);
 DECLARE_FOR_N_SYCL(int32);
+DECLARE_FOR_N_SYCL(int64);
 
 #undef DECLARE_FOR_N_SYCL
 #endif // TENSORFLOW_USE_SYCL
-- 
GitLab


From de1ee126c094c840668d5e794e347159be66b23c Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Fri, 1 Dec 2017 15:48:30 -0800
Subject: [PATCH 0533/1225] Rename create_summary_file_writer to
 create_file_writer

PiperOrigin-RevId: 177651937
---
 tensorflow/contrib/eager/python/evaluator.py  |  2 +-
 .../linear_regression/linear_regression.py    |  2 +-
 .../eager/python/examples/mnist/mnist.py      |  4 +--
 .../examples/resnet50/resnet50_graph_test.py  |  2 +-
 .../python/examples/resnet50/resnet50_test.py |  2 +-
 .../examples/rnn_colorbot/rnn_colorbot.py     |  4 +--
 .../contrib/eager/python/g3doc/guide.md       |  2 +-
 .../contrib/eager/python/metrics_test.py      |  2 +-
 tensorflow/contrib/summary/summary.py         |  3 +-
 tensorflow/contrib/summary/summary_ops.py     | 35 ++++++++++++-------
 .../contrib/summary/summary_ops_graph_test.py |  6 ++--
 .../contrib/summary/summary_ops_test.py       | 30 ++++++++--------
 .../contrib/summary/summary_test_internal.py  |  4 +--
 .../contrib/summary/summary_test_util.py      |  4 +--
 third_party/examples/eager/spinn/spinn.py     |  2 +-
 15 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/tensorflow/contrib/eager/python/evaluator.py b/tensorflow/contrib/eager/python/evaluator.py
index bd0ab02ecf..3faaeef590 100644
--- a/tensorflow/contrib/eager/python/evaluator.py
+++ b/tensorflow/contrib/eager/python/evaluator.py
@@ -110,7 +110,7 @@ class Evaluator(object):
         return self._all_metric_results()
     else:
       def f():
-        with summary_ops.create_summary_file_writer(
+        with summary_ops.create_file_writer(
             summary_logdir).as_default(), summary_ops.always_record_summaries():
           return self._all_metric_results()
       if context.in_eager_mode():
diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py
index d0130ebd11..7bc5007c56 100644
--- a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py
+++ b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py
@@ -85,7 +85,7 @@ def fit(model, dataset, optimizer, verbose=False, logdir=None):
   if logdir:
     # Support for TensorBoard summaries. Once training has started, use:
     #   tensorboard --logdir=<logdir>
-    summary_writer = tf.contrib.summary.create_summary_file_writer(logdir)
+    summary_writer = tf.contrib.summary.create_file_writer(logdir)
 
   # Training loop.
   for i, (xs, ys) in enumerate(tfe.Iterator(dataset)):
diff --git a/tensorflow/contrib/eager/python/examples/mnist/mnist.py b/tensorflow/contrib/eager/python/examples/mnist/mnist.py
index bfb7d5a900..bb121c7704 100644
--- a/tensorflow/contrib/eager/python/examples/mnist/mnist.py
+++ b/tensorflow/contrib/eager/python/examples/mnist/mnist.py
@@ -190,9 +190,9 @@ def main(_):
   else:
     train_dir = None
     test_dir = None
-  summary_writer = tf.contrib.summary.create_summary_file_writer(
+  summary_writer = tf.contrib.summary.create_file_writer(
       train_dir, flush_millis=10000)
-  test_summary_writer = tf.contrib.summary.create_summary_file_writer(
+  test_summary_writer = tf.contrib.summary.create_file_writer(
       test_dir, flush_millis=10000, name='test')
   checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
 
diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
index 14c82c87a7..23317886e7 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
@@ -73,7 +73,7 @@ class ResNet50GraphTest(tf.test.TestCase):
       tf.train.get_or_create_global_step()
       logdir = tempfile.mkdtemp()
       with tf.contrib.summary.always_record_summaries():
-        with tf.contrib.summary.create_summary_file_writer(
+        with tf.contrib.summary.create_file_writer(
             logdir, max_queue=0,
             name='t0').as_default():
           model = resnet50.ResNet50(data_format())
diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
index 582f4837c6..d8d8644dde 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
@@ -95,7 +95,7 @@ class ResNet50Test(tf.test.TestCase):
     model = resnet50.ResNet50(data_format)
     tf.train.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with tf.contrib.summary.create_summary_file_writer(
+    with tf.contrib.summary.create_file_writer(
         logdir, max_queue=0,
         name='t0').as_default(), tf.contrib.summary.always_record_summaries():
       with tf.device(device):
diff --git a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py
index 609cbd2877..40919f2d4c 100644
--- a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py
+++ b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py
@@ -247,9 +247,9 @@ def main(_):
 
   log_dir = os.path.join(FLAGS.dir, "summaries")
   tf.gfile.MakeDirs(log_dir)
-  train_summary_writer = tf.contrib.summary.create_summary_file_writer(
+  train_summary_writer = tf.contrib.summary.create_file_writer(
       os.path.join(log_dir, "train"), flush_millis=10000)
-  test_summary_writer = tf.contrib.summary.create_summary_file_writer(
+  test_summary_writer = tf.contrib.summary.create_file_writer(
       os.path.join(log_dir, "eval"), flush_millis=10000, name="eval")
 
   with tf.device(device):
diff --git a/tensorflow/contrib/eager/python/g3doc/guide.md b/tensorflow/contrib/eager/python/g3doc/guide.md
index 147b7047f4..0095ffa0db 100644
--- a/tensorflow/contrib/eager/python/g3doc/guide.md
+++ b/tensorflow/contrib/eager/python/g3doc/guide.md
@@ -757,7 +757,7 @@ For example, to record summaries once every 100 global steps, use:
 
 ```python
 tf.train.get_or_create_global_step()  # Ensuring the global step variable exists
-writer = tf.contrib.summary.create_summary_file_writer(logdir)
+writer = tf.contrib.summary.create_file_writer(logdir)
 
 for _ in range(iterations):
   with writer.as_default():
diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py
index 96eb1b4f2a..1055f4563c 100644
--- a/tensorflow/contrib/eager/python/metrics_test.py
+++ b/tensorflow/contrib/eager/python/metrics_test.py
@@ -67,7 +67,7 @@ class MetricsTest(test.TestCase):
     m([1, 10, 100])
     training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logdir, max_queue=0,
         name="t0").as_default(), summary_ops.always_record_summaries():
       m.result()  # As a side-effect will write summaries.
diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py
index 9e6af5232f..7d3b8b7437 100644
--- a/tensorflow/contrib/summary/summary.py
+++ b/tensorflow/contrib/summary/summary.py
@@ -28,7 +28,8 @@ from __future__ import print_function
 from tensorflow.contrib.summary.summary_ops import all_summary_ops
 from tensorflow.contrib.summary.summary_ops import always_record_summaries
 from tensorflow.contrib.summary.summary_ops import audio
-from tensorflow.contrib.summary.summary_ops import create_summary_db_writer
+from tensorflow.contrib.summary.summary_ops import create_db_writer
+from tensorflow.contrib.summary.summary_ops import create_file_writer
 from tensorflow.contrib.summary.summary_ops import create_summary_file_writer
 from tensorflow.contrib.summary.summary_ops import eval_dir
 from tensorflow.contrib.summary.summary_ops import flush
diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py
index de6f2cd79f..4556162bfe 100644
--- a/tensorflow/contrib/summary/summary_ops.py
+++ b/tensorflow/contrib/summary/summary_ops.py
@@ -38,9 +38,11 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import summary_op_util
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import training_util
 from tensorflow.python.util import tf_contextlib
 
+
 # Name for a collection which is expected to have at most a single boolean
 # Tensor. If this tensor is True the summary ops will record summaries.
 _SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries"
@@ -102,8 +104,8 @@ class SummaryWriter(object):
   """Encapsulates a stateful summary writer resource.
 
   See also:
-  - @{tf.contrib.summary.create_summary_file_writer}
-  - @{tf.contrib.summary.create_summary_db_writer}
+  - @{tf.contrib.summary.create_file_writer}
+  - @{tf.contrib.summary.create_db_writer}
   """
 
   def  __init__(self, resource):
@@ -169,11 +171,11 @@ def initialize(
     session.run(_graph(x, 0), feed_dict={x: data})
 
 
-def create_summary_file_writer(logdir,
-                               max_queue=None,
-                               flush_millis=None,
-                               filename_suffix=None,
-                               name=None):
+def create_file_writer(logdir,
+                       max_queue=None,
+                       flush_millis=None,
+                       filename_suffix=None,
+                       name=None):
   """Creates a summary file writer in the current context.
 
   Args:
@@ -210,11 +212,11 @@ def create_summary_file_writer(logdir,
         filename_suffix=filename_suffix)
 
 
-def create_summary_db_writer(db_uri,
-                             experiment_name=None,
-                             run_name=None,
-                             user_name=None,
-                             name=None):
+def create_db_writer(db_uri,
+                     experiment_name=None,
+                     run_name=None,
+                     user_name=None,
+                     name=None):
   """Creates a summary database writer in the current context.
 
   This can be used to write tensors from the execution graph directly
@@ -498,7 +500,7 @@ _graph = graph  # for functions with a graph parameter
 def import_event(tensor, name=None):
   """Writes a @{tf.Event} binary proto.
 
-  When using create_summary_db_writer(), this can be used alongside
+  When using create_db_writer(), this can be used alongside
   @{tf.TFRecordReader} to load event logs into the database. Please
   note that this is lower level than the other summary functions and
   will ignore any conditions set by methods like
@@ -542,6 +544,13 @@ def eval_dir(model_dir, name=None):
   return os.path.join(model_dir, "eval" if not name else "eval_" + name)
 
 
+def create_summary_file_writer(*args, **kwargs):
+  """Please use @{tf.contrib.summary.create_file_writer}."""
+  logging.warning("Deprecation Warning: create_summary_file_writer was renamed "
+                  "to create_file_writer")
+  return create_file_writer(*args, **kwargs)
+
+
 def _serialize_graph(arbitrary_graph):
   if isinstance(arbitrary_graph, ops.Graph):
     return arbitrary_graph.as_graph_def(add_shapes=True).SerializeToString()
diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index 703adb7b46..f8da790188 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py
@@ -48,7 +48,7 @@ class DbTest(summary_test_util.SummaryDbTest):
     name = 'hi'
     graph = graph_pb2.GraphDef(node=(node_def_pb2.NodeDef(name=name),))
     with self.test_session():
-      with self.create_summary_db_writer().as_default():
+      with self.create_db_writer().as_default():
         summary_ops.initialize(graph=graph)
     six.assertCountEqual(self, [name],
                          get_all(self.db, 'SELECT node_name FROM Nodes'))
@@ -57,7 +57,7 @@ class DbTest(summary_test_util.SummaryDbTest):
     with ops.Graph().as_default(), self.test_session():
       training_util.get_or_create_global_step()
       logdir = tempfile.mkdtemp()
-      with summary_ops.create_summary_file_writer(
+      with summary_ops.create_file_writer(
           logdir, max_queue=0,
           name='t2').as_default(), summary_ops.always_record_summaries():
         summary_ops.initialize()
@@ -78,7 +78,7 @@ class DbTest(summary_test_util.SummaryDbTest):
     with ops.Graph().as_default(), self.test_session():
       training_util.get_or_create_global_step()
       logdir = tempfile.mkdtemp()
-      with summary_ops.create_summary_file_writer(
+      with summary_ops.create_file_writer(
           logdir, max_queue=0,
           name='t2').as_default(), summary_ops.always_record_summaries():
         summary_ops.initialize()
diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py
index 54433deb28..0b8e0b967c 100644
--- a/tensorflow/contrib/summary/summary_ops_test.py
+++ b/tensorflow/contrib/summary/summary_ops_test.py
@@ -44,7 +44,7 @@ class TargetTest(test_util.TensorFlowTestCase):
     logdir = '/tmp/apath/that/doesnt/exist'
     self.assertFalse(gfile.Exists(logdir))
     with self.assertRaises(errors.NotFoundError):
-      summary_ops.create_summary_file_writer(logdir, max_queue=0, name='t0')
+      summary_ops.create_file_writer(logdir, max_queue=0, name='t0')
 
   def testShouldRecordSummary(self):
     self.assertFalse(summary_ops.should_record_summaries())
@@ -54,7 +54,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   def testSummaryOps(self):
     training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logdir, max_queue=0,
         name='t0').as_default(), summary_ops.always_record_summaries():
       summary_ops.generic('tensor', 1, '')
@@ -69,7 +69,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   def testDefunSummarys(self):
     training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logdir, max_queue=0,
         name='t1').as_default(), summary_ops.always_record_summaries():
 
@@ -85,7 +85,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   def testSummaryName(self):
     training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logdir, max_queue=0,
         name='t2').as_default(), summary_ops.always_record_summaries():
 
@@ -98,7 +98,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   def testSummaryGlobalStep(self):
     step = training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logdir, max_queue=0,
         name='t2').as_default(), summary_ops.always_record_summaries():
 
@@ -110,7 +110,7 @@ class TargetTest(test_util.TensorFlowTestCase):
 
   def testMaxQueue(self):
     logs = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logs, max_queue=2, flush_millis=999999,
         name='lol').as_default(), summary_ops.always_record_summaries():
       get_total = lambda: len(summary_test_util.events_from_logdir(logs))
@@ -123,7 +123,7 @@ class TargetTest(test_util.TensorFlowTestCase):
 
   def testFlush(self):
     logs = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logs, max_queue=999999, flush_millis=999999,
         name='lol').as_default(), summary_ops.always_record_summaries():
       get_total = lambda: len(summary_test_util.events_from_logdir(logs))
@@ -150,7 +150,7 @@ class DbTest(summary_test_util.SummaryDbTest):
       return sum_
 
     with summary_ops.always_record_summaries():
-      with self.create_summary_db_writer().as_default():
+      with self.create_db_writer().as_default():
         self.assertEqual(5, adder(int64(2), int64(3)).numpy())
 
     six.assertCountEqual(self, [1, 1, 1],
@@ -162,7 +162,7 @@ class DbTest(summary_test_util.SummaryDbTest):
     sum_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "sum"')
 
     with summary_ops.always_record_summaries():
-      with self.create_summary_db_writer().as_default():
+      with self.create_db_writer().as_default():
         self.assertEqual(9, adder(int64(4), int64(5)).numpy())
 
     six.assertCountEqual(self, [1, 1, 1, 2, 2, 2],
@@ -185,26 +185,26 @@ class DbTest(summary_test_util.SummaryDbTest):
 
   def testBadExperimentName(self):
     with self.assertRaises(ValueError):
-      self.create_summary_db_writer(experiment_name='\0')
+      self.create_db_writer(experiment_name='\0')
 
   def testBadRunName(self):
     with self.assertRaises(ValueError):
-      self.create_summary_db_writer(run_name='\0')
+      self.create_db_writer(run_name='\0')
 
   def testBadUserName(self):
     with self.assertRaises(ValueError):
-      self.create_summary_db_writer(user_name='-hi')
+      self.create_db_writer(user_name='-hi')
     with self.assertRaises(ValueError):
-      self.create_summary_db_writer(user_name='hi-')
+      self.create_db_writer(user_name='hi-')
     with self.assertRaises(ValueError):
-      self.create_summary_db_writer(user_name='@')
+      self.create_db_writer(user_name='@')
 
   def testGraphSummary(self):
     training_util.get_or_create_global_step()
     name = 'hi'
     graph = graph_pb2.GraphDef(node=(node_def_pb2.NodeDef(name=name),))
     with summary_ops.always_record_summaries():
-      with self.create_summary_db_writer().as_default():
+      with self.create_db_writer().as_default():
         summary_ops.graph(graph)
     six.assertCountEqual(self, [name],
                          get_all(self.db, 'SELECT node_name FROM Nodes'))
diff --git a/tensorflow/contrib/summary/summary_test_internal.py b/tensorflow/contrib/summary/summary_test_internal.py
index 54233f2f50..80f60ae401 100644
--- a/tensorflow/contrib/summary/summary_test_internal.py
+++ b/tensorflow/contrib/summary/summary_test_internal.py
@@ -35,8 +35,8 @@ class SummaryDbTest(test_util.TensorFlowTestCase):
     if os.path.exists(self.db_path):
       os.unlink(self.db_path)
     self.db = sqlite3.connect(self.db_path)
-    self.create_summary_db_writer = functools.partial(
-        summary_ops.create_summary_db_writer,
+    self.create_db_writer = functools.partial(
+        summary_ops.create_db_writer,
         db_uri=self.db_path,
         experiment_name='experiment',
         run_name='run',
diff --git a/tensorflow/contrib/summary/summary_test_util.py b/tensorflow/contrib/summary/summary_test_util.py
index 915820e05b..bda57e6a0c 100644
--- a/tensorflow/contrib/summary/summary_test_util.py
+++ b/tensorflow/contrib/summary/summary_test_util.py
@@ -39,8 +39,8 @@ class SummaryDbTest(test_util.TensorFlowTestCase):
     if os.path.exists(self.db_path):
       os.unlink(self.db_path)
     self.db = sqlite3.connect(self.db_path)
-    self.create_summary_db_writer = functools.partial(
-        summary_ops.create_summary_db_writer,
+    self.create_db_writer = functools.partial(
+        summary_ops.create_db_writer,
         db_uri=self.db_path,
         experiment_name='experiment',
         run_name='run',
diff --git a/third_party/examples/eager/spinn/spinn.py b/third_party/examples/eager/spinn/spinn.py
index 963ac0e65b..a2fa18eeb1 100644
--- a/third_party/examples/eager/spinn/spinn.py
+++ b/third_party/examples/eager/spinn/spinn.py
@@ -567,7 +567,7 @@ def train_spinn(embed, train_data, dev_data, test_data, config):
       "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} "
       "{:8.6f} {:12.4f} {:12.4f}")
 
-  summary_writer = tf.contrib.summary.create_summary_file_writer(
+  summary_writer = tf.contrib.summary.create_file_writer(
       config.logdir, flush_millis=10000)
   train_len = train_data.num_batches(config.batch_size)
   with tf.device(device), \
-- 
GitLab


From 9232ef1bd38a6fbec2a62c2dcf373dcf0c01b6cb Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Fri, 1 Dec 2017 15:53:44 -0800
Subject: [PATCH 0534/1225] closes #14981

PiperOrigin-RevId: 177652538
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 11f9aa2259..44c7a514d1 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -505,11 +505,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   native.http_archive(
       name = "grpc",
       urls = [
-          "https://mirror.bazel.build/github.com/grpc/grpc/archive/54e8f37e537794c2d814c1604c1282125f64f093.tar.gz",
-          "https://github.com/grpc/grpc/archive/54e8f37e537794c2d814c1604c1282125f64f093.tar.gz",
+          "https://mirror.bazel.build/github.com/grpc/grpc/archive/f836c7e941beb003289dc6e9a58a6e47f5caa5f0.tar.gz",
+          "https://github.com/grpc/grpc/archive/f836c7e941beb003289dc6e9a58a6e47f5caa5f0.tar.gz",
       ],
-      sha256 = "c2166b6d96daddf72fe45b2c594210c65ca17ec3c1b2e12089159a9529edb5e4",
-      strip_prefix = "grpc-54e8f37e537794c2d814c1604c1282125f64f093",
+      sha256 = "676425fc19e0290443b21f1804e5d1096456b6512b349606e3eae8e63299e6ee",
+      strip_prefix = "grpc-f836c7e941beb003289dc6e9a58a6e47f5caa5f0",
   )
 
   # gRPC wants the existence of a cares dependence but its contents are not
-- 
GitLab


From efbdc15b280374607895ab0ada467de4a0512e0c Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Fri, 1 Dec 2017 16:02:57 -0800
Subject: [PATCH 0535/1225] Introduce tf_http_archive (#15018)

This new repository rule consolidates patched_http_archive,
temp_workaround_http_archive, http_archive, and new_http_archive.

The following behaviors have been introduced:

- A delete attribute that can rm -rf certain repo content after extraction
- Helpful error messages when mirroring requirements aren't followed
---
 .../contrib/makefile/download_dependencies.sh |   2 +-
 tensorflow/workspace.bzl                      | 385 +++++++-----------
 third_party/aws.BUILD                         |  16 +-
 third_party/curl.BUILD                        |  38 +-
 third_party/gif.BUILD                         |   2 +-
 third_party/jemalloc.BUILD                    |  10 +-
 third_party/jpeg/jpeg.BUILD                   |   2 +-
 third_party/nccl.BUILD                        |   8 +-
 third_party/repo.bzl                          | 103 +++++
 third_party/snappy.BUILD                      |   4 +-
 10 files changed, 286 insertions(+), 284 deletions(-)
 create mode 100644 third_party/repo.bzl

diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index 904118e2d9..675ab24289 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -33,7 +33,7 @@ NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.
 PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
-DOUBLE_CONVERSION_URL="$(grep -o "https.*google/double-conversion.*\.zip" "${BZL_FILE_PATH}" | head -n1)"
+DOUBLE_CONVERSION_URL="$(grep -o "https.*google/double-conversion.*\.tar.gz" "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
 ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
 
 # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 6b13271002..43c8de5303 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -1,40 +1,21 @@
 # TensorFlow external dependencies that can be loaded in WORKSPACE files.
 
 load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
-
-load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
 load("//third_party/mkl:build_defs.bzl", "mkl_repository")
-load(
-    "@io_bazel_rules_closure//closure/private:java_import_external.bzl",
-    "java_import_external",
-)
-load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external")
 load("//third_party/py:python_configure.bzl", "python_configure")
-load(
-    "//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl",
-    "arm_compiler_configure",
-)
-
-def _is_windows(repository_ctx):
-  """Returns true if the host operating system is windows."""
-  return repository_ctx.os.name.lower().find("windows") != -1
-
-def _get_env_var(repository_ctx, name):
-  """Find an environment variable."""
-  if name in repository_ctx.os.environ:
-    return repository_ctx.os.environ[name]
-  else:
-    return None
+load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
+load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compiler_configure")
+load("//third_party:repo.bzl", "tf_http_archive")
+load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external")
+load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external")
 
 # Parse the bazel version string from `native.bazel_version`.
 def _parse_bazel_version(bazel_version):
   # Remove commit from version.
   version = bazel_version.split(" ", 1)[0]
-
   # Split into (release, date) parts and only return the release
   # as a tuple of integers.
   parts = version.split("-", 1)
-
   # Turn "release" into a tuple of strings
   version_tuple = ()
   for number in parts[0].split("."):
@@ -57,79 +38,6 @@ def check_version(bazel_version):
       fail("\nCurrent Bazel version is {}, expected at least {}\n".format(
           native.bazel_version, bazel_version))
 
-def _repos_are_siblings():
-  return Label("@foo//bar").workspace_root.startswith("../")
-
-# Temporary workaround to support including TensorFlow as a submodule until this
-# use-case is supported in the next Bazel release.
-def _temp_workaround_http_archive_impl(repo_ctx):
-  repo_ctx.template("BUILD", repo_ctx.attr.build_file, {
-      "%prefix%": ".." if _repos_are_siblings() else "external",
-      "%ws%": repo_ctx.attr.repository
-  }, False)
-  repo_ctx.download_and_extract(repo_ctx.attr.urls, "", repo_ctx.attr.sha256,
-                                "", repo_ctx.attr.strip_prefix)
-  if repo_ctx.attr.patch_file != None:
-    _apply_patch(repo_ctx, repo_ctx.attr.patch_file)
-
-temp_workaround_http_archive = repository_rule(
-    attrs = {
-        "build_file": attr.label(),
-        "repository": attr.string(),
-        "patch_file": attr.label(default = None),
-        "urls": attr.string_list(default = []),
-        "sha256": attr.string(default = ""),
-        "strip_prefix": attr.string(default = ""),
-    },
-    implementation = _temp_workaround_http_archive_impl,
-)
-
-# Executes specified command with arguments and calls 'fail' if it exited with
-# non-zero code
-def _execute_and_check_ret_code(repo_ctx, cmd_and_args):
-  result = repo_ctx.execute(cmd_and_args, timeout=10)
-  if result.return_code != 0:
-    fail(("Non-zero return code({1}) when executing '{0}':\n" + "Stdout: {2}\n"
-          + "Stderr: {3}").format(" ".join(cmd_and_args), result.return_code,
-                                  result.stdout, result.stderr))
-
-# Apply a patch_file to the repository root directory
-# Runs 'patch -p1'
-def _apply_patch(repo_ctx, patch_file):
-  # Don't check patch on Windows, because patch is only available under bash.
-  if not _is_windows(repo_ctx) and not repo_ctx.which("patch"):
-    fail("patch command is not found, please install it")
-
-  cmd = [
-      "patch", "-p1", "-d", repo_ctx.path("."), "-i", repo_ctx.path(patch_file)
-  ]
-  if _is_windows(repo_ctx):
-    bazel_sh = _get_env_var(repo_ctx, "BAZEL_SH")
-    if not bazel_sh:
-      fail("BAZEL_SH environment variable is not set")
-    cmd = [bazel_sh, "-l", "-c", " ".join(cmd)]
-  _execute_and_check_ret_code(repo_ctx, cmd)
-
-# Download the repository and apply a patch to its root
-def _patched_http_archive_impl(repo_ctx):
-  repo_ctx.download_and_extract(
-      repo_ctx.attr.urls,
-      sha256=repo_ctx.attr.sha256,
-      stripPrefix=repo_ctx.attr.strip_prefix)
-  _apply_patch(repo_ctx, repo_ctx.attr.patch_file)
-
-patched_http_archive = repository_rule(
-    attrs = {
-        "patch_file": attr.label(),
-        "build_file": attr.label(),
-        "repository": attr.string(),
-        "urls": attr.string_list(default = []),
-        "sha256": attr.string(default = ""),
-        "strip_prefix": attr.string(default = ""),
-    },
-    implementation = _patched_http_archive_impl,
-)
-
 # If TensorFlow is linked as a submodule.
 # path_prefix is no longer used.
 # tf_repo_name is thought to be under consideration.
@@ -157,14 +65,13 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "57ba56c4c243f403ff78f417ff854ef50b9eddf4a610a917b7c95e7fa8553a4b",
       strip_prefix = "mklml_lnx_2018.0.20170720",
       build_file = str(Label("//third_party/mkl:mkl.BUILD")),
-      repository = tf_repo_name,
   )
 
   if path_prefix:
     print("path_prefix was specified to tf_workspace but is no longer used " +
           "and will be removed in the future.")
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "mkl_dnn",
       urls = [
           "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
@@ -175,7 +82,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")),
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "com_google_absl",
       urls = [
           "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/cc4bed2d74f7c8717e31f9579214ab52a9c9c610.tar.gz",
@@ -185,7 +92,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
      strip_prefix = "abseil-cpp-cc4bed2d74f7c8717e31f9579214ab52a9c9c610",
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "eigen_archive",
       urls = [
           "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
@@ -196,18 +103,20 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:eigen.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "arm_compiler",
-      build_file = str(Label("//:arm_compiler.BUILD")),
       sha256 = "970285762565c7890c6c087d262b0a18286e7d0384f13a37786d8521773bc969",
       strip_prefix = "tools-0e906ebc527eab1cdbf7adabff5b474da9562e9f/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf",
       urls = [
           "https://mirror.bazel.build/github.com/raspberrypi/tools/archive/0e906ebc527eab1cdbf7adabff5b474da9562e9f.tar.gz",
+          # Please uncomment me, when the next upgrade happens. Then
+          # remove the whitelist entry in third_party/repo.bzl.
           # "https://github.com/raspberrypi/tools/archive/0e906ebc527eab1cdbf7adabff5b474da9562e9f.tar.gz",
       ],
+      build_file = str(Label("//:arm_compiler.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "libxsmm_archive",
       urls = [
           "https://mirror.bazel.build/github.com/hfp/libxsmm/archive/1.8.1.tar.gz",
@@ -218,15 +127,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:libxsmm.BUILD")),
   )
 
-  native.bind(
-      name = "xsmm_avx",
-      actual = "@libxsmm_archive//third_party:xsmm_avx",
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "ortools_archive",
       urls = [
           "https://mirror.bazel.build/github.com/google/or-tools/archive/253f7955c6a1fd805408fba2e42ac6d45b312d15.tar.gz",
+          # Please uncomment me, when the next upgrade happens. Then
+          # remove the whitelist entry in third_party/repo.bzl.
           # "https://github.com/google/or-tools/archive/253f7955c6a1fd805408fba2e42ac6d45b312d15.tar.gz",
       ],
       sha256 = "932075525642b04ac6f1b50589f1df5cd72ec2f448b721fd32234cf183f0e755",
@@ -234,7 +140,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:ortools.BUILD")),
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "com_googlesource_code_re2",
       urls = [
           "https://mirror.bazel.build/github.com/google/re2/archive/b94b7cd42e9f02673cd748c1ac1d16db4052514c.tar.gz",
@@ -244,7 +150,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "re2-b94b7cd42e9f02673cd748c1ac1d16db4052514c",
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "gemmlowp",
       urls = [
           "https://mirror.bazel.build/github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip",
@@ -254,7 +160,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "gemmlowp-010bb3e71a26ca1d0884a167081d092b43563996",
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "farmhash_archive",
       urls = [
           "https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz",
@@ -265,12 +171,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:farmhash.BUILD")),
   )
 
-  native.bind(
-      name = "farmhash",
-      actual = "@farmhash//:farmhash",
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "highwayhash",
       urls = [
           "https://mirror.bazel.build/github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz",
@@ -281,7 +182,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:highwayhash.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "nasm",
       urls = [
           "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2",
@@ -292,7 +193,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:nasm.BUILD")),
   )
 
-  temp_workaround_http_archive(
+  tf_http_archive(
       name = "jpeg",
       urls = [
           "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz",
@@ -301,10 +202,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "c15a9607892113946379ccea3ca8b85018301b200754f209453ab21674268e77",
       strip_prefix = "libjpeg-turbo-1.5.1",
       build_file = str(Label("//third_party/jpeg:jpeg.BUILD")),
-      repository = tf_repo_name,
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "png_archive",
       urls = [
           "https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.2.53.tar.gz",
@@ -315,7 +215,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:png.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "sqlite_archive",
       urls = [
           "https://mirror.bazel.build/www.sqlite.org/2017/sqlite-amalgamation-3200000.zip",
@@ -323,10 +223,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       ],
       sha256 = "208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4",
       strip_prefix = "sqlite-amalgamation-3200000",
-      build_file = str(Label("//third_party:sqlite.BUILD"))
+      build_file = str(Label("//third_party:sqlite.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "gif_archive",
       urls = [
           "https://mirror.bazel.build/ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz",
@@ -337,7 +237,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:gif.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "six_archive",
       urls = [
           "https://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
@@ -348,7 +248,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:six.BUILD")),
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "absl_py",
       urls = [
           "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/231e3870b976c1dc61dce1749138661d21556028.tar.gz",
@@ -358,7 +258,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "abseil-py-231e3870b976c1dc61dce1749138661d21556028",
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "org_python_pypi_backports_weakref",
       urls = [
           "https://mirror.bazel.build/pypi.python.org/packages/bc/cc/3cdb0a02e7e96f6c70bd971bc8a90b8463fda83e264fa9c5c1c98ceabd81/backports.weakref-1.0rc1.tar.gz",
@@ -369,7 +269,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:backports_weakref.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "com_github_andreif_codegen",
       urls = [
           "https://mirror.bazel.build/github.com/andreif/codegen/archive/1.0.tar.gz",
@@ -391,12 +291,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       },
   )
 
-  native.bind(
-      name = "six",
-      actual = "@six_archive//:six",
-  )
-
-  patched_http_archive(
+  tf_http_archive(
       name = "protobuf_archive",
       urls = [
           "https://mirror.bazel.build/github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz",
@@ -411,20 +306,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       patch_file = str(Label("//third_party/protobuf:add_noinlines.patch")),
   )
 
-  native.bind(
-      name = "protobuf",
-      actual = "@protobuf_archive//:protobuf",
-  )
-
-  native.bind(
-      name = "protobuf_headers",
-      actual = "@protobuf_archive//:protobuf_headers",
-  )
-
   # We need to import the protobuf library under the names com_google_protobuf
   # and com_google_protobuf_cc to enable proto_library support in bazel.
   # Unfortunately there is no way to alias http_archives at the moment.
-  native.http_archive(
+  tf_http_archive(
       name = "com_google_protobuf",
       urls = [
           "https://mirror.bazel.build/github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz",
@@ -434,7 +319,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "protobuf-b04e5cba356212e4e8c66c61bbe0c3a20537c5b9",
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "com_google_protobuf_cc",
       urls = [
           "https://mirror.bazel.build/github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz",
@@ -444,7 +329,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "protobuf-b04e5cba356212e4e8c66c61bbe0c3a20537c5b9",
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "nsync",
       urls = [
           "https://mirror.bazel.build/github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz",
@@ -454,7 +339,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "nsync-93815892dddafe9146a5f7e7042281d59d0f4323",
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "com_google_googletest",
       urls = [
           "https://mirror.bazel.build/github.com/google/googletest/archive/9816b96a6ddc0430671693df90192bbee57108b6.zip",
@@ -464,7 +349,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "googletest-9816b96a6ddc0430671693df90192bbee57108b6",
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "com_github_gflags_gflags",
       urls = [
           "https://mirror.bazel.build/github.com/gflags/gflags/archive/f8a0efe03aa69b3336d8e228b37d4ccb17324b88.tar.gz",
@@ -474,12 +359,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "gflags-f8a0efe03aa69b3336d8e228b37d4ccb17324b88",
   )
 
-  native.bind(
-      name = "python_headers",
-      actual = str(Label("//util/python:python_headers")),
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "pcre",
       sha256 = "ccdf7e788769838f8285b3ee672ed573358202305ee361cfec7a4a4fb005bbc7",
       urls = [
@@ -490,7 +370,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:pcre.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "swig",
       sha256 = "58a475dbbd4a4d7075e5fe86d4e54c9edde39847cdb96a3053d87cb64a23a453",
       urls = [
@@ -502,7 +382,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:swig.BUILD")),
   )
 
-  temp_workaround_http_archive(
+  tf_http_archive(
       name = "curl",
       sha256 = "ff3e80c1ca6a068428726cd7dd19037a47cc538ce58ef61c59587191039b2ca6",
       urls = [
@@ -511,29 +391,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       ],
       strip_prefix = "curl-7.49.1",
       build_file = str(Label("//third_party:curl.BUILD")),
-      repository = tf_repo_name
-  )
-
-  # grpc expects //external:protobuf_clib and //external:protobuf_compiler
-  # to point to the protobuf's compiler library.
-  native.bind(
-      name = "protobuf_clib",
-      actual = "@protobuf_archive//:protoc_lib",
-  )
-
-  native.bind(
-      name = "libssl",
-      actual = "@boringssl//:ssl",
   )
 
-  # gRPC has includes directly from their third_party path for nanopb, so we
-  # must depend on their version of it.
-  native.bind(
-      name = "nanopb",
-      actual = "@grpc//third_party/nanopb:nanopb",
-  )
-
-  native.http_archive(
+  tf_http_archive(
       name = "grpc",
       urls = [
           "https://mirror.bazel.build/github.com/grpc/grpc/archive/54e8f37e537794c2d814c1604c1282125f64f093.tar.gz",
@@ -543,26 +403,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "grpc-54e8f37e537794c2d814c1604c1282125f64f093",
   )
 
-  # gRPC wants the existence of a cares dependence but its contents are not
-  # actually important since we have set GRPC_ARES=0 in tools/bazel.rc
-  native.bind(
-      name = "cares",
-      actual = "@grpc//third_party/nanopb:nanopb",
-  )
-
-  # protobuf expects //external:grpc_cpp_plugin to point to grpc's
-  # C++ plugin code generator.
-  native.bind(
-      name = "grpc_cpp_plugin",
-      actual = "@grpc//:grpc_cpp_plugin",
-  )
-
-  native.bind(
-      name = "grpc_lib",
-      actual = "@grpc//:grpc++_unsecure",
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "linenoise",
       sha256 = "7f51f45887a3d31b4ce4fa5965210a5e64637ceac12720cfce7954d6a2e812f7",
       urls = [
@@ -575,7 +416,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
 
   # TODO(phawkins): currently, this rule uses an unofficial LLVM mirror.
   # Switch to an official source of snapshots if/when possible.
-  temp_workaround_http_archive(
+  tf_http_archive(
       name = "llvm",
       urls = [
           "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8d26b8bee4d8e7230870a600bc968c7ee8cf6f67.tar.gz",
@@ -584,10 +425,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "ff5ddbe5af5e264426c8d489e7fddfc5ad7e0975f19cefe9db8c0a5d0faeb23e",
       strip_prefix = "llvm-8d26b8bee4d8e7230870a600bc968c7ee8cf6f67",
       build_file = str(Label("//third_party/llvm:llvm.BUILD")),
-      repository = tf_repo_name,
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "lmdb",
       urls = [
           "https://mirror.bazel.build/github.com/LMDB/lmdb/archive/LMDB_0.9.19.tar.gz",
@@ -598,7 +438,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:lmdb.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "jsoncpp_git",
       urls = [
           "https://mirror.bazel.build/github.com/open-source-parsers/jsoncpp/archive/11086dd6a7eba04289944367ca82cea71299ed70.tar.gz",
@@ -609,12 +449,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:jsoncpp.BUILD")),
   )
 
-  native.bind(
-      name = "jsoncpp",
-      actual = "@jsoncpp_git//:jsoncpp",
-  )
-
-  native.http_archive(
+  tf_http_archive(
       name = "boringssl",
       urls = [
           "https://mirror.bazel.build/github.com/google/boringssl/archive/a0fb951d2a26a8ee746b52f3ba81ab011a0af778.tar.gz",
@@ -624,7 +459,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "boringssl-a0fb951d2a26a8ee746b52f3ba81ab011a0af778",
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "zlib_archive",
       urls = [
           "https://mirror.bazel.build/zlib.net/zlib-1.2.8.tar.gz",
@@ -635,12 +470,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:zlib.BUILD")),
   )
 
-  native.bind(
-      name = "zlib",
-      actual = "@zlib_archive//:zlib",
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "fft2d",
       urls = [
           "https://mirror.bazel.build/www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz",
@@ -650,7 +480,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party/fft2d:fft2d.BUILD")),
   )
 
-  temp_workaround_http_archive(
+  tf_http_archive(
       name = "snappy",
       urls = [
           "https://mirror.bazel.build/github.com/google/snappy/archive/1.1.4.tar.gz",
@@ -659,10 +489,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "2f7504c73d85bac842e893340333be8cb8561710642fc9562fccdd9d2c3fcc94",
       strip_prefix = "snappy-1.1.4",
       build_file = str(Label("//third_party:snappy.BUILD")),
-      repository = tf_repo_name,
   )
 
-  temp_workaround_http_archive(
+  tf_http_archive(
       name = "nccl_archive",
       urls = [
           "https://mirror.bazel.build/github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz",
@@ -671,10 +500,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176",
       strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7",
       build_file = str(Label("//third_party:nccl.BUILD")),
-      repository = tf_repo_name,
   )
 
-  temp_workaround_http_archive(
+  tf_http_archive(
       name = "aws",
       urls = [
           "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz",
@@ -683,7 +511,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "f599b57aec4f03ad696044dd430b2d201864113937353adc346f53ad47991319",
       strip_prefix = "aws-sdk-cpp-1.0.90",
       build_file = str(Label("//third_party:aws.BUILD")),
-      repository = tf_repo_name
   )
 
   java_import_external(
@@ -711,7 +538,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       testonly_ = True,
   )
 
-  temp_workaround_http_archive(
+  tf_http_archive(
       name = "jemalloc",
       urls = [
           "https://mirror.bazel.build/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz",
@@ -720,7 +547,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8",
       strip_prefix = "jemalloc-4.4.0",
       build_file = str(Label("//third_party:jemalloc.BUILD")),
-      repository = tf_repo_name,
   )
 
   java_import_external(
@@ -758,7 +584,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       licenses = ["notice"],  # Apache 2.0
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "com_google_pprof",
       urls = [
           "https://mirror.bazel.build/github.com/google/pprof/archive/c0fb62ec88c411cc91194465e54db2632845b650.tar.gz",
@@ -769,7 +595,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:pprof.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "cub_archive",
       urls = [
           "https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.4.zip",
@@ -780,12 +606,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:cub.BUILD")),
   )
 
-  native.bind(
-      name = "cub",
-      actual = "@cub_archive//:cub",
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "cython",
       sha256 = "6dcd30b5ceb887b2b965ee7ceb82ea3acb5f0642fe2206c7636b45acea4798e5",
       urls = [
@@ -794,9 +615,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       ],
       strip_prefix = "cython-3732784c45cfb040a5b0936951d196f83a12ea17",
       build_file = str(Label("//third_party:cython.BUILD")),
+      delete = ["BUILD.bazel"],
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "bazel_toolchains",
       urls = [
           "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/af4681c3d19f063f090222ec3d04108c4e0ca255.tar.gz",
@@ -806,7 +628,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "bazel-toolchains-af4681c3d19f063f090222ec3d04108c4e0ca255",
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "arm_neon_2_x86_sse",
       sha256 = "c8d90aa4357f8079d427e87a6f4c493da1fa4140aee926c05902d7ec1533d9a5",
       strip_prefix = "ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d",
@@ -817,32 +639,109 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:arm_neon_2_x86_sse.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "flatbuffers",
-      build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")),
       strip_prefix = "flatbuffers-971a68110e4fc1bace10fcb6deeb189e7e1a34ce",
       sha256 = "874088d2ee0d9f8524191f77209556415f03dd44e156276edf19e5b90ceb5f55",
       urls = [
           "https://mirror.bazel.build/github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz",
           "https://github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz",
       ],
+      build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")),
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "double_conversion",
       urls = [
-          "https://github.com/google/double-conversion/archive/5664746c5e64dc265e7fbc1a890a6698e6ad0ebb.zip",
+          "https://mirror.bazel.build/github.com/google/double-conversion/archive/5664746c5e64dc265e7fbc1a890a6698e6ad0ebb.tar.gz",
+          "https://github.com/google/double-conversion/archive/5664746c5e64dc265e7fbc1a890a6698e6ad0ebb.tar.gz",
       ],
-      sha256 = "a0c49fb3cc8d34b2230d278a115f1bb266bcfcaae10400b84dc2a3b7dc2c8bc6",
+      sha256 = "ce651ba63faa55f86333f50bdd58a574327ca1565a65b875b11f5132c7c72bb6",
       strip_prefix = "double-conversion-5664746c5e64dc265e7fbc1a890a6698e6ad0ebb",
   )
-  
-  native.new_http_archive(
+
+  tf_http_archive(
       name = "tflite_mobilenet",
-      build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
       sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b",
       urls = [
           "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip",
           "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip",
       ],
+      build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
+  )
+
+  ##############################################################################
+  # BIND DEFINITIONS
+  #
+  # Please do not add bind() definitions unless we have no other choice.
+  # If that ends up being the case, please leave a comment explaining
+  # why we can't depend on the canonical build target.
+
+  # gRPC wants a cares dependency but its contents is not actually
+  # important since we have set GRPC_ARES=0 in tools/bazel.rc
+  native.bind(
+      name = "cares",
+      actual = "@grpc//third_party/nanopb:nanopb",
+  )
+
+  # Needed by Protobuf
+  native.bind(
+      name = "grpc_cpp_plugin",
+      actual = "@grpc//:grpc_cpp_plugin",
+  )
+
+  # gRPC has three empty C++ functions which it wants the user to define
+  # at build time. https://github.com/grpc/grpc/issues/13590
+  native.bind(
+      name = "grpc_lib",
+      actual = "@grpc//:grpc++_unsecure",
+  )
+
+  # Needed by gRPC
+  native.bind(
+      name = "libssl",
+      actual = "@boringssl//:ssl",
+  )
+
+  # Needed by gRPC
+  native.bind(
+      name = "nanopb",
+      actual = "@grpc//third_party/nanopb:nanopb",
+  )
+
+  # Needed by gRPC
+  native.bind(
+      name = "protobuf",
+      actual = "@protobuf_archive//:protobuf",
+  )
+
+  # gRPC expects //external:protobuf_clib and //external:protobuf_compiler
+  # to point to Protobuf's compiler library.
+  native.bind(
+      name = "protobuf_clib",
+      actual = "@protobuf_archive//:protoc_lib",
+  )
+
+  # Needed by gRPC
+  native.bind(
+      name = "protobuf_headers",
+      actual = "@protobuf_archive//:protobuf_headers",
+  )
+
+  # Needed by Protobuf
+  native.bind(
+      name = "python_headers",
+      actual = str(Label("//util/python:python_headers")),
+  )
+
+  # Needed by Protobuf
+  native.bind(
+      name = "six",
+      actual = "@six_archive//:six",
+  )
+
+  # Needed by gRPC
+  native.bind(
+      name = "zlib",
+      actual = "@zlib_archive//:zlib",
   )
diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD
index bc9e37ffb3..bf5310aa16 100644
--- a/third_party/aws.BUILD
+++ b/third_party/aws.BUILD
@@ -7,21 +7,21 @@ licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-load("@%ws%//third_party:common.bzl", "template_rule")
+load("@org_tensorflow//third_party:common.bzl", "template_rule")
 
 cc_library(
     name = "aws",
     srcs = select({
-        "@%ws%//tensorflow:linux_x86_64": glob([
+        "@org_tensorflow//tensorflow:linux_x86_64": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
-        "@%ws%//tensorflow:darwin": glob([
+        "@org_tensorflow//tensorflow:darwin": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
-        "@%ws%//tensorflow:linux_ppc64le": glob([
+        "@org_tensorflow//tensorflow:linux_ppc64le": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
-        "@%ws%//tensorflow:raspberry_pi_armeabi": glob([
+        "@org_tensorflow//tensorflow:raspberry_pi_armeabi": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
         "//conditions:default": [],
@@ -53,17 +53,17 @@ cc_library(
         "aws-cpp-sdk-core/include/aws/core/SDKConfig.h",
     ],
     defines = select({
-        "@%ws%//tensorflow:linux_x86_64": [
+        "@org_tensorflow//tensorflow:linux_x86_64": [
             "PLATFORM_LINUX",
             "ENABLE_CURL_CLIENT",
             "ENABLE_NO_ENCRYPTION",
         ],
-        "@%ws%//tensorflow:darwin": [
+        "@org_tensorflow//tensorflow:darwin": [
             "PLATFORM_APPLE",
             "ENABLE_CURL_CLIENT",
             "ENABLE_NO_ENCRYPTION",
         ],
-        "@%ws%//tensorflow:linux_ppc64le": [
+        "@org_tensorflow//tensorflow:linux_ppc64le": [
             "PLATFORM_LINUX",
             "ENABLE_CURL_CLIENT",
             "ENABLE_NO_ENCRYPTION",
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index 805a30d262..0f6c75a210 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -224,14 +224,14 @@ cc_library(
         "lib/wildcard.h",
         "lib/x509asn1.h",
     ] + select({
-        "@%ws%//tensorflow:darwin": [
+        "@org_tensorflow//tensorflow:darwin": [
             "lib/vtls/darwinssl.c",
         ],
-        "@%ws%//tensorflow:ios": [
+        "@org_tensorflow//tensorflow:ios": [
             "lib/vtls/darwinssl.c",
         ],
-        "@%ws%//tensorflow:windows": CURL_WIN_SRCS,
-        "@%ws%//tensorflow:windows_msvc": CURL_WIN_SRCS,
+        "@org_tensorflow//tensorflow:windows": CURL_WIN_SRCS,
+        "@org_tensorflow//tensorflow:windows_msvc": CURL_WIN_SRCS,
         "//conditions:default": [
             "lib/vtls/openssl.c",
         ],
@@ -248,8 +248,8 @@ cc_library(
         "include/curl/typecheck-gcc.h",
     ],
     copts = select({
-        "@%ws%//tensorflow:windows": CURL_WIN_COPTS,
-        "@%ws%//tensorflow:windows_msvc": CURL_WIN_COPTS,
+        "@org_tensorflow//tensorflow:windows": CURL_WIN_COPTS,
+        "@org_tensorflow//tensorflow:windows_msvc": CURL_WIN_COPTS,
         "//conditions:default": [
             "-I%prefix%/curl/lib",
             "-D_GNU_SOURCE",
@@ -261,14 +261,14 @@ cc_library(
             "-Wno-string-plus-int",
         ],
     }) + select({
-        "@%ws%//tensorflow:darwin": [
+        "@org_tensorflow//tensorflow:darwin": [
             "-fno-constant-cfstrings",
         ],
-        "@%ws%//tensorflow:windows": [
+        "@org_tensorflow//tensorflow:windows": [
             # See curl.h for discussion of write size and Windows
             "/DCURL_MAX_WRITE_SIZE=16384",
         ],
-        "@%ws%//tensorflow:windows_msvc": [
+        "@org_tensorflow//tensorflow:windows_msvc": [
             # See curl.h for discussion of write size and Windows
             "/DCURL_MAX_WRITE_SIZE=16384",
         ],
@@ -278,20 +278,20 @@ cc_library(
     }),
     includes = ["include"],
     linkopts = select({
-        "@%ws%//tensorflow:android": [
+        "@org_tensorflow//tensorflow:android": [
             "-pie",
         ],
-        "@%ws%//tensorflow:darwin": [
+        "@org_tensorflow//tensorflow:darwin": [
             "-Wl,-framework",
             "-Wl,CoreFoundation",
             "-Wl,-framework",
             "-Wl,Security",
         ],
-        "@%ws%//tensorflow:ios": [],
-        "@%ws%//tensorflow:windows": [
+        "@org_tensorflow//tensorflow:ios": [],
+        "@org_tensorflow//tensorflow:windows": [
             "-Wl,ws2_32.lib",
         ],
-        "@%ws%//tensorflow:windows_msvc": [
+        "@org_tensorflow//tensorflow:windows_msvc": [
             "-Wl,ws2_32.lib",
         ],
         "//conditions:default": [
@@ -302,9 +302,9 @@ cc_library(
     deps = [
         "@zlib_archive//:zlib",
     ] + select({
-        "@%ws%//tensorflow:ios": [],
-        "@%ws%//tensorflow:windows": [],
-        "@%ws%//tensorflow:windows_msvc": [],
+        "@org_tensorflow//tensorflow:ios": [],
+        "@org_tensorflow//tensorflow:windows": [],
+        "@org_tensorflow//tensorflow:windows_msvc": [],
         "//conditions:default": [
             "@boringssl//:ssl",
         ],
@@ -406,8 +406,8 @@ cc_binary(
         "src/tool_xattr.h",
     ],
     copts = select({
-        "@%ws%//tensorflow:windows": CURL_BIN_WIN_COPTS,
-        "@%ws%//tensorflow:windows_msvc": CURL_BIN_WIN_COPTS,
+        "@org_tensorflow//tensorflow:windows": CURL_BIN_WIN_COPTS,
+        "@org_tensorflow//tensorflow:windows_msvc": CURL_BIN_WIN_COPTS,
         "//conditions:default": [
             "-I%prefix%/curl/lib",
             "-D_GNU_SOURCE",
diff --git a/third_party/gif.BUILD b/third_party/gif.BUILD
index 27808a9d64..78fbd6c0e0 100644
--- a/third_party/gif.BUILD
+++ b/third_party/gif.BUILD
@@ -21,7 +21,7 @@ cc_library(
     ],
     hdrs = ["lib/gif_lib.h"],
     defines = select({
-        #"@%ws%//tensorflow:android": [
+        #"@org_tensorflow//tensorflow:android": [
         ":android": [
             "S_IREAD=S_IRUSR",
             "S_IWRITE=S_IWUSR",
diff --git a/third_party/jemalloc.BUILD b/third_party/jemalloc.BUILD
index a2addf2c66..1b0829b8fe 100644
--- a/third_party/jemalloc.BUILD
+++ b/third_party/jemalloc.BUILD
@@ -5,7 +5,7 @@ licenses(["notice"])  # BSD
 
 exports_files(["COPYING"])
 
-load("@%ws%//third_party:common.bzl", "template_rule")
+load("@org_tensorflow//third_party:common.bzl", "template_rule")
 
 cc_library(
     name = "jemalloc_headers",
@@ -97,10 +97,10 @@ cc_library(
     includes = ["include"],
     # pthread_atfork() is called for PPC.
     linkopts = select({
-        "@%ws%//tensorflow:linux_ppc64le": [
+        "@org_tensorflow//tensorflow:linux_ppc64le": [
             "-lpthread",
         ],
-        "@%ws%//tensorflow:linux_x86_64": [
+        "@org_tensorflow//tensorflow:linux_x86_64": [
             "-lpthread",
         ],
         "//conditions:default": [
@@ -208,8 +208,8 @@ genrule(
     name = "size_classes_h",
     outs = ["include/jemalloc/internal/size_classes.h"],
     cmd = select({
-        "@%ws%//tensorflow:linux_ppc64le": "$(location :size_classes_sh) \"3 4\" 3 16 2 >$@",
-        "@%ws%//tensorflow:linux_x86_64": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
+        "@org_tensorflow//tensorflow:linux_ppc64le": "$(location :size_classes_sh) \"3 4\" 3 16 2 >$@",
+        "@org_tensorflow//tensorflow:linux_x86_64": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
         "//conditions:default": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
     }),
     tools = [":size_classes_sh"],
diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD
index f6078052ec..e431f19382 100644
--- a/third_party/jpeg/jpeg.BUILD
+++ b/third_party/jpeg/jpeg.BUILD
@@ -5,7 +5,7 @@ licenses(["notice"])  # custom notice-style license, see LICENSE.md
 
 exports_files(["LICENSE.md"])
 
-load("@%ws%//third_party:common.bzl", "template_rule")
+load("@org_tensorflow//third_party:common.bzl", "template_rule")
 
 libjpegturbo_nocopts = "-[W]error"
 
diff --git a/third_party/nccl.BUILD b/third_party/nccl.BUILD
index 8c7b9bdbe9..b2b8e18824 100644
--- a/third_party/nccl.BUILD
+++ b/third_party/nccl.BUILD
@@ -44,17 +44,17 @@ cc_library(
         "-O3",
     ] + cuda_default_copts(),
     linkopts = select({
-        "@%ws%//tensorflow:android": [
+        "@org_tensorflow//tensorflow:android": [
             "-pie",
         ],
-        "@%ws%//tensorflow:darwin": [
+        "@org_tensorflow//tensorflow:darwin": [
             "-Wl,-framework",
             "-Wl,CoreFoundation",
             "-Wl,-framework",
             "-Wl,Security",
         ],
-        "@%ws%//tensorflow:ios": [],
-        "@%ws%//tensorflow:windows": [
+        "@org_tensorflow//tensorflow:ios": [],
+        "@org_tensorflow//tensorflow:windows": [
             "-DEFAULTLIB:ws2_32.lib",
         ],
         "//conditions:default": [
diff --git a/third_party/repo.bzl b/third_party/repo.bzl
new file mode 100644
index 0000000000..d6e5dfced0
--- /dev/null
+++ b/third_party/repo.bzl
@@ -0,0 +1,103 @@
+# Copyright 2017 The TensorFlow Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for defining TensorFlow Bazel dependencies."""
+
+_SINGLE_URL_WHITELIST = depset([
+    "arm_compiler",
+    "ortools_archive",
+])
+
+def _is_windows(ctx):
+  return ctx.os.name.lower().find("windows") != -1
+
+def _get_env_var(ctx, name):
+  if name in ctx.os.environ:
+    return ctx.os.environ[name]
+  else:
+    return None
+
+# Executes specified command with arguments and calls 'fail' if it exited with
+# non-zero code
+def _execute_and_check_ret_code(repo_ctx, cmd_and_args):
+  result = repo_ctx.execute(cmd_and_args, timeout=10)
+  if result.return_code != 0:
+    fail(("Non-zero return code({1}) when executing '{0}':\n" + "Stdout: {2}\n"
+          + "Stderr: {3}").format(" ".join(cmd_and_args), result.return_code,
+                                  result.stdout, result.stderr))
+
+def _repos_are_siblings():
+  return Label("@foo//bar").workspace_root.startswith("../")
+
+# Apply a patch_file to the repository root directory
+# Runs 'patch -p1'
+def _apply_patch(ctx, patch_file):
+  # Don't check patch on Windows, because patch is only available under bash.
+  if not _is_windows(ctx) and not ctx.which("patch"):
+    fail("patch command is not found, please install it")
+  cmd = ["patch", "-p1", "-d", ctx.path("."), "-i", ctx.path(patch_file)]
+  if _is_windows(ctx):
+    bazel_sh = _get_env_var(ctx, "BAZEL_SH")
+    if not bazel_sh:
+      fail("BAZEL_SH environment variable is not set")
+    cmd = [bazel_sh, "-c", " ".join(cmd)]
+  _execute_and_check_ret_code(ctx, cmd)
+
+def _apply_delete(ctx, paths):
+  for path in paths:
+    if path.startswith("/"):
+      fail("refusing to rm -rf path starting with '/': " + path)
+    if ".." in path:
+      fail("refusing to rm -rf path containing '..': " + path)
+  _execute_and_check_ret_code(
+      ctx, ["rm", "-rf"] + [ctx.path(path) for path in paths])
+
+def _tf_http_archive(ctx):
+  if ("mirror.bazel.build" not in ctx.attr.urls[0] or
+      (len(ctx.attr.urls) < 2 and
+       ctx.attr.name not in _SINGLE_URL_WHITELIST)):
+    fail("tf_http_archive(urls) must have redundant URLs. The Bazel Mirror " +
+         "URL must come first. Please note mirroring happens after merge")
+  ctx.download_and_extract(
+      ctx.attr.urls,
+      "",
+      ctx.attr.sha256,
+      ctx.attr.type,
+      ctx.attr.strip_prefix)
+  if ctx.attr.delete:
+    _apply_delete(ctx, ctx.attr.delete)
+  if ctx.attr.patch_file != None:
+    _apply_patch(ctx, ctx.attr.patch_file)
+  if ctx.attr.build_file != None:
+    ctx.template("BUILD", ctx.attr.build_file, {
+        "%prefix%": ".." if _repos_are_siblings() else "external",
+    }, False)
+
+tf_http_archive = repository_rule(
+    implementation=_tf_http_archive,
+    attrs={
+        "sha256": attr.string(mandatory=True),
+        "urls": attr.string_list(mandatory=True, allow_empty=False),
+        "strip_prefix": attr.string(),
+        "type": attr.string(),
+        "delete": attr.string_list(),
+        "patch_file": attr.label(),
+        "build_file": attr.label(),
+    })
+"""Downloads and creates Bazel repos for dependencies.
+
+This is a swappable replacement for both http_archive() and
+new_http_archive() that offers some additional features. It also helps
+ensure best practices are followed.
+"""
diff --git a/third_party/snappy.BUILD b/third_party/snappy.BUILD
index 9c00b7068a..fd48ed8941 100644
--- a/third_party/snappy.BUILD
+++ b/third_party/snappy.BUILD
@@ -50,8 +50,8 @@ genrule(
            "-e 's/@ac_cv_have_stddef_h@/1/g' " +
            "-e 's/@ac_cv_have_stdint_h@/1/g' " +
            select({
-               "@%ws%//tensorflow:windows": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ",
-               "@%ws%//tensorflow:windows_msvc": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ",
+               "@org_tensorflow//tensorflow:windows": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ",
+               "@org_tensorflow//tensorflow:windows_msvc": "-e 's/@ac_cv_have_sys_uio_h@/0/g' ",
                "//conditions:default": "-e 's/@ac_cv_have_sys_uio_h@/1/g' ",
            }) +
            "-e 's/@SNAPPY_MAJOR@/1/g' " +
-- 
GitLab


From 2270ecc169025c7fa33edd09700abcd72a777373 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 1 Dec 2017 15:59:44 -0800
Subject: [PATCH 0536/1225] Remove SliceProcessorConcatOffset, which is not
 robust as it modifies nodes which could be used elsewhere in the graph;
 SliceProcessorConcatOffset is a historical implementation anyway, and the
 same functionality could be provided by more recently developed
 SliceProcessor and SliceProcessorConst.

PiperOrigin-RevId: 177653218
---
 .../grappler/optimizers/layout_optimizer.cc   | 66 ++---------------
 .../optimizers/layout_optimizer_test.cc       | 72 ++++++++++++++++++-
 2 files changed, 76 insertions(+), 62 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index e9436638f0..36e5047d61 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -1136,7 +1136,7 @@ class SliceProcessor : public AgnosticNodeProcessor {
       string node_name =
           AddPrefixToNodeName(base_name, kPermVecNHWCToNCHW, "-");
       TF_RETURN_IF_ERROR(HasAttribute(*node_, "Index"));
-      AddNodePermVec(node_name, node_->input(i),
+      AddNodePermVec(node_name, node_->input(i), node_->device(),
                      node_->attr().at("Index").type(), true);
       node_map_->UpdateOutput(node_->input(i), node_->name(), node_name);
       node_map_->AddOutput(node_name, node_->name());
@@ -1194,10 +1194,12 @@ class SliceProcessor : public AgnosticNodeProcessor {
   }
 
   void AddNodePermVec(const string& node_name, const string& input_name,
-                      DataType data_type, bool NHWCToNCHW) {
+                      const string& device, DataType data_type,
+                      bool NHWCToNCHW) {
     NodeDef* node = graph_->add_node();
     node_map_->AddNode(node_name, node);
     node->set_name(node_name);
+    node->set_device(device);
     *node->add_input() = input_name;
     *node->add_input() = NHWCToNCHW ? GetOrAddNodePermNHWCToNCHW()
                                     : GetOrAddNodePermNCHWToNHWC();
@@ -1215,10 +1217,6 @@ class SliceProcessor : public AgnosticNodeProcessor {
     AttrValue attr_type_params;
     attr_type_params.set_type(data_type);
     node->mutable_attr()->insert({"Tparams", attr_type_params});
-
-    AttrValue attr_validate;
-    attr_validate.set_b(true);
-    node->mutable_attr()->insert({"validate_indices", attr_validate});
   }
 };
 
@@ -1240,58 +1238,6 @@ class SliceProcessorConst : public AgnosticNodeProcessor {
   }
 };
 
-// Specialized SliceProcessor, used if the second input is ConcatOffset. An
-// example use case is in the gradient computation of Concat for InceptionV3.
-class SliceProcessorConcatOffset : public AgnosticNodeProcessor {
- public:
-  explicit SliceProcessorConcatOffset(const OptimizeContext& opt_cxt)
-      : AgnosticNodeProcessor(opt_cxt) {}
-
- protected:
-  Status CustomizedProcessing() override {
-    auto maybe_concatoffset_node =
-        node_map_->GetNode(NodeName(node_->input(1)));
-    if (IsConcatOffset(*maybe_concatoffset_node)) {
-      auto maybe_axis_node =
-          node_map_->GetNode(maybe_concatoffset_node->input(0));
-      NodeDef* axis_node;
-      if (IsConstant(*maybe_axis_node)) {
-        axis_node = maybe_axis_node;
-        // A FloorMod node might be added between ConcatOffset and the concat
-        // dimension const node to handle a negative dimension index -1, meaning
-        // the last dimension, which is consistent with the python's notation
-        // for negative index.
-      } else if (IsFloorMod(*maybe_axis_node)) {
-        axis_node = node_map_->GetNode(maybe_axis_node->input(0));
-      } else {
-        return Status(error::INVALID_ARGUMENT,
-                      strings::StrCat("Expect either Const or FloorMod for the "
-                                      "input 1 of ConcatOffset"));
-      }
-      // Need to process if the channel is at dimension 3, which indicates the
-      // NHWC format is being used. As multiple Slice nodes may share the same
-      // ConcatOffset node, the NHWC to NCHW conversion may have already
-      // been performed when processing other Slice nodes.
-      TF_RETURN_IF_ERROR(HasAttribute(*axis_node, "value"));
-      int concat_dim = axis_node->attr().at("value").tensor().int_val(0);
-      if (concat_dim == -1 || concat_dim == 3) {
-        // Update the dimension order for shape input nodes. Note that the input
-        // 2 of Slice also shares one of the shape nodes.
-        for (int i = 1; i < maybe_concatoffset_node->input_size(); i++) {
-          auto shape_node =
-              node_map_->GetNode(maybe_concatoffset_node->input(i));
-          TF_RETURN_IF_ERROR(UpdateAttrValue(shape_node));
-        }
-        // Set the channel dimension to 1, as we have converted the vector
-        // element order from NHWC to NCHW.
-        axis_node->mutable_attr()->at("value").mutable_tensor()->set_int_val(0,
-                                                                             1);
-      }
-    }
-    return Status::OK();
-  }
-};
-
 class SqueezeProcessor : public AgnosticNodeProcessor {
  public:
   explicit SqueezeProcessor(const OptimizeContext& opt_cxt)
@@ -1496,9 +1442,7 @@ class DataLayoutOptimizer : GraphProcessor {
           } else if (IsSlice(*node)) {
             auto input1 = node_map_->GetNode(NodeName(node->input(1)));
             auto input2 = node_map_->GetNode(NodeName(node->input(2)));
-            if (IsConcatOffset(*input1)) {
-              node_processor.reset(new SliceProcessorConcatOffset(opt_cxt));
-            } else if (IsConstant(*input1) && IsConstant(*input2)) {
+            if (IsConstant(*input1) && IsConstant(*input2)) {
               node_processor.reset(new SliceProcessorConst(opt_cxt));
             } else {
               node_processor.reset(new SliceProcessor(opt_cxt));
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 363b4c3fd8..0b906485e7 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -44,7 +44,7 @@ class LayoutOptimizerTest : public ::testing::Test {
 
   Output SimpleConv2D(tensorflow::Scope* s, int input_size, int filter_size,
                       const string& padding, const string& device) {
-    int batch_size = 128;
+    int batch_size = 8;
     int input_height = input_size;
     int input_width = input_size;
     int input_depth = 3;
@@ -699,6 +699,76 @@ TEST_F(LayoutOptimizerTest, MulVectorAnd4D) {
             "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-mul-1");
 }
 
+TEST_F(LayoutOptimizerTest, SliceConst) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
+  auto begin = ops::Const(s.WithOpName("begin"), {0, 2, 3, 1}, {4});
+  auto size = ops::Const(s.WithOpName("size"), {4, 1, 2, 4}, {4});
+  auto slice = ops::Slice(s.WithOpName("slice"), conv, begin, size);
+  auto o = ops::Identity(s.WithOpName("o"), slice);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto slice_node = node_map.GetNode("slice");
+  EXPECT_EQ(slice_node->input(0), "Conv2D");
+  EXPECT_EQ(slice_node->input(1), "LayoutOptimizer-slice-begin");
+  EXPECT_EQ(slice_node->input(2), "LayoutOptimizer-slice-size");
+
+  auto begin_const = node_map.GetNode("LayoutOptimizer-slice-begin");
+  Tensor begin_tensor;
+  EXPECT_TRUE(begin_tensor.FromProto(
+      begin_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor begin_tensor_expected(DT_INT32, {4});
+  test::FillValues<int>(&begin_tensor_expected, {0, 1, 2, 3});
+  test::ExpectTensorEqual<int>(begin_tensor_expected, begin_tensor);
+
+  auto size_const = node_map.GetNode("LayoutOptimizer-slice-size");
+  Tensor size_tensor;
+  EXPECT_TRUE(size_tensor.FromProto(
+      size_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor size_tensor_expected(DT_INT32, {4});
+  test::FillValues<int>(&size_tensor_expected, {4, 4, 1, 2});
+  test::ExpectTensorEqual<int>(size_tensor_expected, size_tensor);
+}
+
+TEST_F(LayoutOptimizerTest, SliceNonConst) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
+  auto begin = ops::Const(s.WithOpName("begin"), {0, 2, 3, 1}, {4});
+  auto ibegin = ops::Identity(s.WithOpName("ibegin"), begin);
+  auto size = ops::Const(s.WithOpName("size"), {4, 1, 2, 4}, {4});
+  auto isize = ops::Identity(s.WithOpName("isize"), size);
+  auto slice = ops::Slice(s.WithOpName("slice"), conv, ibegin, isize);
+  auto o = ops::Identity(s.WithOpName("o"), slice);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto slice_node = node_map.GetNode("slice");
+  EXPECT_EQ(slice_node->input(0), "Conv2D");
+  EXPECT_EQ(slice_node->input(1),
+            "LayoutOptimizerPermVecNHWCToNCHW-slice-input1");
+  EXPECT_EQ(slice_node->input(2),
+            "LayoutOptimizerPermVecNHWCToNCHW-slice-input2");
+
+  auto perm1 =
+      node_map.GetNode("LayoutOptimizerPermVecNHWCToNCHW-slice-input1");
+  EXPECT_EQ(perm1->input(0), "ibegin");
+  EXPECT_EQ(perm1->input(1), "LayoutOptimizerPermConstNHWCToNCHW");
+  EXPECT_EQ(perm1->input(2), "LayoutOptimizerGatherAxisConst");
+
+  auto perm2 =
+      node_map.GetNode("LayoutOptimizerPermVecNHWCToNCHW-slice-input2");
+  EXPECT_EQ(perm2->input(0), "isize");
+  EXPECT_EQ(perm2->input(1), "LayoutOptimizerPermConstNHWCToNCHW");
+  EXPECT_EQ(perm2->input(2), "LayoutOptimizerGatherAxisConst");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 59ae88dac4399a8719aebe1b90f87f61fd1fd7e5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 16:17:04 -0800
Subject: [PATCH 0537/1225] Eliminate matrix multiplication with zeros.

PiperOrigin-RevId: 177655417
---
 tensorflow/core/grappler/op_types.cc          |  6 ++++
 tensorflow/core/grappler/op_types.h           |  1 +
 .../grappler/optimizers/constant_folding.cc   | 30 ++++++++++++------
 .../optimizers/constant_folding_test.cc       | 31 ++++++++++++-------
 4 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 15fcaa857e..571975aca1 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -90,6 +90,12 @@ bool IsIdentity(const NodeDef& node) {
   return op == "Identity" || op == "RefIdentity";
 }
 
+bool IsMatMul(const NodeDef& node) {
+  const auto op = node.op();
+  return op == "MatMul" || op == "BatchMatMul" || op == "QuantizedMatMul" ||
+         op == "SparseMatMul";
+}
+
 bool IsMerge(const NodeDef& node) {
   const auto op = node.op();
   return op == "Merge" || op == "RefMerge";
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index b1d81448af..47dd2c7faf 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -43,6 +43,7 @@ bool IsFusedBatchNormGradV1(const NodeDef& node);
 bool IsIdentity(const NodeDef& node);
 bool IsMerge(const NodeDef& node);
 bool IsMul(const NodeDef& node);
+bool IsMatMul(const NodeDef& node);
 bool IsNextIteration(const NodeDef& node);
 bool IsPad(const NodeDef& node);
 bool IsNoOp(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index e0f39c2931..84f3cc9df7 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1317,9 +1317,11 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
 
     // Simplify multiplication by ones or zeros, and addition of zeros.
     bool is_mul = IsMul(node);
+    bool is_matmul = IsMatMul(node);
     bool is_add = IsAdd(node);
     if (opt_level_ == RewriterConfig::AGGRESSIVE && use_shape_info &&
-        (is_mul || is_add) && properties.HasInputProperties(node.name()) &&
+        (is_mul || is_matmul || is_add) &&
+        properties.HasInputProperties(node.name()) &&
         properties.HasOutputProperties(node.name())) {
       const NodeDef* x = node_map_->GetNode(node.input(0));
       const NodeDef* y = node_map_->GetNode(node.input(1));
@@ -1335,24 +1337,34 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       // Simplify multiplication by or addition of zeros.
       const bool x_is_zero = IsZeros(*x);
       const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
-      if (x_is_zero && x_matches_output_shape) {
-        // 0 * y = 0 or 0 + y = y.
-        ReplaceAddOrMulWithIdentity(is_mul ? 0 : 1, &node);
+      if (x_is_zero) {
+        if ((is_mul && x_matches_output_shape) || is_matmul) {
+          // 0 * y = 0
+          ReplaceAddOrMulWithIdentity(0, &node);
+        } else {
+          // 0 + y = y.
+          ReplaceAddOrMulWithIdentity(1, &node);
+        }
         continue;
       }
       const TensorShapeProto& y_shape =
           properties.GetInputProperties(node.name())[1].shape();
       const bool y_is_zero = IsZeros(*y);
       const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape);
-      if (y_is_zero && y_matches_output_shape) {
-        // x * 0 = 0 or x + 0 = x.
-        ReplaceAddOrMulWithIdentity(is_mul ? 1 : 0, &node);
+      if (y_is_zero) {
+        if ((is_mul && y_matches_output_shape) || is_matmul) {
+          // x * 0 = 0
+          ReplaceAddOrMulWithIdentity(1, &node);
+        } else {
+          // x + 0 = y.
+          ReplaceAddOrMulWithIdentity(0, &node);
+        }
         continue;
       }
 
       if (is_mul) {
-        // Simplify multiplication by zeros where the output shape does not
-        // match the shape of the zero input.
+        // Simplify scalar multiplication by zeros where, due to broadcasting,
+        // the output shape does not match the shape of the zero input.
         if (x_is_zero || y_is_zero) {
           TF_RETURN_IF_ERROR(
               ReplaceAddOrMulWithConstant(0, output_shape, &node));
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index 32a691d3ee..a17ec733ea 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -81,26 +81,27 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
   for (bool use_const : {true, false}) {
     tensorflow::Scope s = tensorflow::Scope::NewRootScope();
     Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT,
-                                ops::Placeholder::Shape(TensorShape({1, 2})));
+                                ops::Placeholder::Shape(TensorShape({2, 2})));
     Output y = ops::Placeholder(s.WithOpName("y"), DT_FLOAT,
-                                ops::Placeholder::Shape(TensorShape({1, 2})));
-    Output zeros =
-        !use_const ? ops::ZerosLike(s.WithOpName("zeros"), x)
-                   : ops::Const(s.WithOpName("zeros"), {0.0f, 0.0f}, {1, 2});
+                                ops::Placeholder::Shape(TensorShape({2, 2})));
+    Output zeros = !use_const ? ops::ZerosLike(s.WithOpName("zeros"), x)
+                              : ops::Const(s.WithOpName("zeros"), 0.0f, {2, 2});
     Output zeros_broadcast =
-        ops::Const(s.WithOpName("zeros_broadcast"), {0.0f}, {1, 1});
-    Output ones = !use_const
-                      ? ops::OnesLike(s.WithOpName("ones"), x)
-                      : ops::Const(s.WithOpName("ones"), {1.0f, 1.0f}, {1, 2});
+        ops::Const(s.WithOpName("zeros_broadcast"), 0.0f, {1, 1});
+    Output ones = !use_const ? ops::OnesLike(s.WithOpName("ones"), x)
+                             : ops::Const(s.WithOpName("ones"), 1.0f, {2, 2});
     Output mul1 = ops::Mul(s.WithOpName("mul1"), x, zeros);
     Output mul2 = ops::Mul(s.WithOpName("mul2"), zeros, y);
     Output mul3 = ops::Mul(s.WithOpName("mul3"), x, ones);
     Output mul4 = ops::Mul(s.WithOpName("mul4"), ones, y);
     Output mul5 = ops::Mul(s.WithOpName("mul1"), x, zeros_broadcast);
     Output mul6 = ops::Mul(s.WithOpName("mul2"), zeros_broadcast, y);
+    Output matmul1 = ops::MatMul(s.WithOpName("matmul1"), x, zeros);
+    Output matmul2 = ops::MatMul(s.WithOpName("matmul2"), zeros, y);
     Output add1 = ops::Add(s.WithOpName("add1"), x, zeros);
     Output add2 = ops::Add(s.WithOpName("add2"), zeros, y);
-    Output addn = ops::AddN(s, {mul1, mul2, mul3, mul4, add1, add2});
+    Output addn =
+        ops::AddN(s, {mul1, mul2, mul3, mul4, matmul1, matmul2, add1, add2});
     GrapplerItem item;
     TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
@@ -110,7 +111,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
     Status status = optimizer.Optimize(nullptr, item, &output);
     TF_EXPECT_OK(status);
 
-    EXPECT_EQ(14, output.node_size());
+    EXPECT_EQ(16, output.node_size());
     for (int i = 0; i < output.node_size(); ++i) {
       const NodeDef& node = output.node(i);
       const string& name = node.name();
@@ -132,6 +133,14 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
           EXPECT_EQ("zeros", node.input(0));
           EXPECT_EQ("^y", node.input(1));
         }
+      } else if (name == "matmul1") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("zeros", node.input(0));
+        EXPECT_EQ("^x", node.input(1));
+      } else if (name == "matmul2") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("zeros", node.input(0));
+        EXPECT_EQ("^y", node.input(1));
       } else if (name == "mul3") {
         EXPECT_EQ("Identity", node.op());
         EXPECT_EQ("x", node.input(0));
-- 
GitLab


From 1ee6d7ccbcc20ac3051fd69d7377306e49f5b6dd Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Fri, 1 Dec 2017 16:22:22 -0800
Subject: [PATCH 0538/1225] Fixing the python 3.6 build error.

PiperOrigin-RevId: 177655994
---
 tensorflow/tools/ci_build/builds/pip.sh       | 19 +++++----
 .../install/install_python3.6_pip_packages.sh | 40 +++++++++----------
 2 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index 552df1434e..a37cf226f9 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -296,15 +296,20 @@ create_activate_virtualenv_and_install_tensorflow() {
     die "FAILED to create virtualenv directory: ${VIRTUALENV_DIR}"
   fi
 
-  # Verify that virtualenv exists
-  if [[ -z $(which virtualenv) ]]; then
-    die "FAILED: virtualenv not available on path"
+  if [[ ${PYTHON_BIN_PATH} == *"python3.6"* ]]; then
+    "${PYTHON_BIN_PATH}" -m venv "${VIRTUALENV_FLAGS}" \
+      "${VIRTUALENV_DIR}" || \
+      die "FAILED: Unable to create virtualenv"
+  else
+    # Verify that virtualenv exists
+    if [[ -z $(which virtualenv) ]]; then
+      die "FAILED: virtualenv not available on path"
+    fi
+    virtualenv ${VIRTUALENV_FLAGS} \
+      -p "${PYTHON_BIN_PATH}" "${VIRTUALENV_DIR}" || \
+      die "FAILED: Unable to create virtualenv"
   fi
 
-  virtualenv ${VIRTUALENV_FLAGS} \
-    -p "${PYTHON_BIN_PATH}" "${VIRTUALENV_DIR}" || \
-    die "FAILED: Unable to create virtualenv"
-
   source "${VIRTUALENV_DIR}/bin/activate" || \
     die "FAILED: Unable to activate virtualenv in ${VIRTUALENV_DIR}"
 
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index c354aaa154..ec7d9bf195 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -26,25 +26,23 @@ apt-get update
 
 set -e
 # Install Python 3.6 and dev library
-apt-get install -y --no-install-recommends python3.6 libpython3.6-dev
-
-# Install pip3.6
-set +e
-pip35_version=$(pip3.6 --version | grep "python 3.6")
-if [[ -z $pip35_version ]]; then
-  set -e
-  wget -q https://bootstrap.pypa.io/get-pip.py
-  python3.6 get-pip.py
-  rm -f get-pip.py
-fi
+wget https://www.python.org/ftp/python/3.6.1/Python-3.6.1.tar.xz
+tar xvf Python-3.6.1.tar.xz
+cd Python-3.6.1
+
+./configure
+make altinstall
+pip3.6 -V
+which pip3.6
+ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3
 
 set -e
 # Install six.
-pip3.6 install --upgrade absl-py
-pip3.6 install --upgrade six==1.10.0
+pip3 install --upgrade absl-py
+pip3 install --upgrade six==1.10.0
 
 # Install protobuf.
-pip3.6 install --upgrade protobuf==3.3.0
+pip3 install --upgrade protobuf==3.3.0
 
 # Remove obsolete version of six, which can sometimes confuse virtualenv.
 rm -rf /usr/lib/python3/dist-packages/six*
@@ -54,22 +52,22 @@ rm -rf /usr/lib/python3/dist-packages/six*
 # numpy needs to be installed from source to fix segfaults. See:
 # https://github.com/tensorflow/tensorflow/issues/6968
 # This workaround isn't needed for Ubuntu 16.04 or later.
-pip3.6 install --no-binary=:all: --upgrade numpy==1.12.0
+pip3 install --no-binary=:all: --upgrade numpy==1.12.0
 
-pip3.6 install scipy==0.18.1
+pip3 install scipy==0.18.1
 
-pip3.6 install scikit-learn==0.18.1
+pip3 install scikit-learn==0.18.1
 
 # pandas required by `inflow`
 pip3 install pandas==0.19.2
 
 # Install recent-enough version of wheel for Python 3.6 wheel builds
-pip3.6 install wheel==0.29.0
+pip3 install wheel==0.29.0
 
-pip3.6 install portpicker
+pip3 install portpicker
 
-pip3.6 install werkzeug
+pip3 install werkzeug
 
-pip3.6 install grpcio
+pip3 install grpcio
 
 # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh)
-- 
GitLab


From da105bfabc311840024b40d484dd1cd234697e23 Mon Sep 17 00:00:00 2001
From: Vinu Rajashekhar <vinuraja@google.com>
Date: Fri, 1 Dec 2017 16:24:27 -0800
Subject: [PATCH 0539/1225] Adds a GuaranteeConstOp.

- Acts as indicator for the TF runtime to make possible optimizations by treating the input tensor as a constant.

PiperOrigin-RevId: 177656212
---
 .../base_api/api_def_GuaranteeConst.pbtxt     | 12 +++
 tensorflow/core/kernels/BUILD                 | 26 +++++++
 tensorflow/core/kernels/guarantee_const_op.cc | 47 ++++++++++++
 .../core/kernels/guarantee_const_op_test.cc   | 75 +++++++++++++++++++
 tensorflow/core/ops/array_ops.cc              | 20 +++++
 tensorflow/core/ops/array_ops_test.cc         |  7 ++
 .../python/kernel_tests/array_ops_test.py     | 34 +++++++++
 tensorflow/python/ops/array_ops.py            |  1 +
 tensorflow/tools/api/golden/tensorflow.pbtxt  |  4 +
 9 files changed, 226 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_GuaranteeConst.pbtxt
 create mode 100644 tensorflow/core/kernels/guarantee_const_op.cc
 create mode 100644 tensorflow/core/kernels/guarantee_const_op_test.cc

diff --git a/tensorflow/core/api_def/base_api/api_def_GuaranteeConst.pbtxt b/tensorflow/core/api_def/base_api/api_def_GuaranteeConst.pbtxt
new file mode 100644
index 0000000000..b2a2e1aaef
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_GuaranteeConst.pbtxt
@@ -0,0 +1,12 @@
+op {
+  graph_op_name: "GuaranteeConst"
+  summary: "Gives a guarantee to the TF runtime that the input tensor is a constant."
+  description: <<END
+The runtime is then free to make optimizations based on this.
+
+Only accepts value typed tensors as inputs and rejects resource variable handles
+as input.
+
+Returns the input tensor without modification.
+END
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 8d87915658..a46fbbfc8e 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -589,6 +589,7 @@ cc_library(
         ":extract_image_patches_op",
         ":gather_nd_op",
         ":gather_op",
+        ":guarantee_const_op",
         ":identity_n_op",
         ":identity_op",
         ":inplace_ops",
@@ -635,6 +636,12 @@ tf_kernel_library(
     deps = ARRAY_DEPS,
 )
 
+tf_kernel_library(
+    name = "guarantee_const_op",
+    prefix = "guarantee_const_op",
+    deps = ARRAY_DEPS,
+)
+
 tf_kernel_library(
     name = "constant_op",
     prefix = "constant_op",
@@ -1193,6 +1200,25 @@ tf_cuda_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "guarantee_const_op_test",
+    size = "small",
+    srcs = ["guarantee_const_op_test.cc"],
+    deps = [
+        ":guarantee_const_op",
+        ":ops_testutil",
+        ":ops_util",
+        ":variable_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_cc_test(
     name = "identity_op_test",
     size = "small",
diff --git a/tensorflow/core/kernels/guarantee_const_op.cc b/tensorflow/core/kernels/guarantee_const_op.cc
new file mode 100644
index 0000000000..de3a2a1148
--- /dev/null
+++ b/tensorflow/core/kernels/guarantee_const_op.cc
@@ -0,0 +1,47 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace {
+
+// Refer to the Op description for detailed comments.
+class GuaranteeConstOp : public OpKernel {
+ public:
+  explicit GuaranteeConstOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const DataType input_dtype = ctx->input_dtype(0);
+    OP_REQUIRES(ctx, input_dtype != DT_RESOURCE,
+                errors::InvalidArgument(
+                    "Input tensor cannot be a resource variable handle."));
+    const Tensor& input_tensor = ctx->input(0);
+    Tensor* output = nullptr;
+    if (!ctx->forward_input_to_output_with_shape(0, 0, input_tensor.shape(),
+                                                 &output)) {
+      ctx->set_output(0, input_tensor);
+    }
+  }
+
+  bool IsExpensive() override { return false; }
+};
+
+REGISTER_KERNEL_BUILDER(Name("GuaranteeConst").Device(DEVICE_CPU),
+                        GuaranteeConstOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/guarantee_const_op_test.cc b/tensorflow/core/kernels/guarantee_const_op_test.cc
new file mode 100644
index 0000000000..01461fbb8c
--- /dev/null
+++ b/tensorflow/core/kernels/guarantee_const_op_test.cc
@@ -0,0 +1,75 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/variable_ops.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+class GuaranteeConstOpTest : public OpsTestBase {
+ protected:
+  Status Init(DataType input_type) {
+    TF_CHECK_OK(NodeDefBuilder("op", "GuaranteeConst")
+                    .Input(FakeInput(input_type))
+                    .Finalize(node_def()));
+    return InitOp();
+  }
+};
+
+TEST_F(GuaranteeConstOpTest, Int32Success_6) {
+  TF_ASSERT_OK(Init(DT_INT32));
+  AddInputFromArray<int32>(TensorShape({6}), {1, 2, 3, 4, 5, 6});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_INT32, TensorShape({6}));
+  test::FillValues<int32>(&expected, {1, 2, 3, 4, 5, 6});
+  test::ExpectTensorEqual<int32>(expected, *GetOutput(0));
+}
+
+TEST_F(GuaranteeConstOpTest, Int32Success_2_3) {
+  TF_ASSERT_OK(Init(DT_INT32));
+  AddInputFromArray<int32>(TensorShape({2, 3}), {1, 2, 3, 4, 5, 6});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_INT32, TensorShape({2, 3}));
+  test::FillValues<int32>(&expected, {1, 2, 3, 4, 5, 6});
+  test::ExpectTensorEqual<int32>(expected, *GetOutput(0));
+}
+
+TEST_F(GuaranteeConstOpTest, StringSuccess) {
+  TF_ASSERT_OK(Init(DT_STRING));
+  AddInputFromArray<string>(TensorShape({6}), {"A", "b", "C", "d", "E", "f"});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({6}));
+  test::FillValues<string>(&expected, {"A", "b", "C", "d", "E", "f"});
+  test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(GuaranteeConstOpTest, ResourceInputError) {
+  TF_ASSERT_OK(Init(DT_RESOURCE));
+  AddResourceInput("", "resource", new Var(DT_INT32));
+  const auto status = RunOpKernel();
+  ASSERT_EQ(error::INVALID_ARGUMENT, status.code());
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 6f4ea09206..36d27ea110 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -706,6 +706,26 @@ memory_region_name: Name of readonly memory region used by the tensor, see
   NewReadOnlyMemoryRegionFromFile in tensorflow::Env.
 )doc");
 
+REGISTER_OP("GuaranteeConst")
+    .Input("input: T")
+    .Output("output: T")
+    .Attr("T: type")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      return UnchangedShape(c);
+    })
+    // We don't want this to be optimized away.
+    .SetIsStateful()
+    .Doc(R"(
+Gives a guarantee to the TF runtime that the input tensor is a constant.
+
+The runtime is then free to make optimizations based on this.
+
+Only accepts value typed tensors as inputs and rejects resource variable handles
+as input.
+
+Returns the input tensor without modification.
+)");
+
 // --------------------------------------------------------------------------
 REGISTER_OP("ZerosLike")
     .Input("x: T")
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index 94eb120175..e010ecda8e 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -158,6 +158,13 @@ TEST(ArrayOpsTest, UnchangedShapes_ShapeFn) {
   INFER_OK(op, "[1,2,?,4,5];?;?", "in0");
 }
 
+TEST(ArrayOpsTest, GuaranteeConst_ShapeFn) {
+  ShapeInferenceTestOp op("GuaranteeConst");
+  INFER_OK(op, "?", "in0");
+  INFER_OK(op, "[]", "in0");
+  INFER_OK(op, "[1,2,?,4,5]", "in0");
+}
+
 TEST(ArrayOpsTest, Identity_ShapeFnHandles) {
   const char* op_name = "Identity";
   ShapeInferenceTestOp op(op_name);
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 1bf2b70c1b..6d649b1cac 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -34,9 +34,11 @@ from tensorflow.python.framework import test_ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test as test_lib
 
@@ -1090,5 +1092,37 @@ class InvertPermutationTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1])
 
 
+class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
+
+  def testSimple(self):
+    with self.test_session():
+      a = array_ops.constant(10)
+      guarantee_a = array_ops.guarantee_const(a)
+      self.assertEqual(10, guarantee_a.eval())
+
+  def testVariables(self):
+    with self.test_session() as sess:
+      for use_resource in [False, True]:
+        a = variable_scope.get_variable(
+            "var_{}".format(use_resource), [],
+            initializer=init_ops.constant_initializer(10.0),
+            use_resource=use_resource)
+        guarantee_a = array_ops.guarantee_const(a)
+        sess.run(variables.global_variables_initializer())
+        self.assertEqual(10.0, guarantee_a.eval())
+
+  def testResourceRejection(self):
+    with self.test_session() as sess:
+      a = variable_scope.get_variable(
+          "resource_var", [],
+          initializer=init_ops.constant_initializer(10.0),
+          use_resource=True)
+      guarantee_a = array_ops.guarantee_const(a.handle)
+      sess.run(variables.global_variables_initializer())
+      with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
+                                               "cannot be a resource variable"):
+        guarantee_a.eval()
+
+
 if __name__ == "__main__":
   test_lib.main()
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 38eff54c69..23aa74c027 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -70,6 +70,7 @@ See the @{$python/array_ops} guide.
 @@quantize_v2
 @@quantized_concat
 @@setdiff1d
+@@guarantee_const
 @@fake_quant_with_min_max_args
 @@fake_quant_with_min_max_args_gradient
 @@fake_quant_with_min_max_vars
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index 57573d5024..e79f2a56f5 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -1140,6 +1140,10 @@ tf_module {
     name: "group"
     argspec: "args=[], varargs=inputs, keywords=kwargs, defaults=None"
   }
+  member_method {
+    name: "guarantee_const"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "hessians"
     argspec: "args=[\'ys\', \'xs\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\'], varargs=None, keywords=None, defaults=[\'hessians\', \'False\', \'False\', \'None\'], "
-- 
GitLab


From 8b2f6ceeb02f5fe8b0d63a41e3119127c76907b6 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Fri, 1 Dec 2017 16:32:49 -0800
Subject: [PATCH 0540/1225] Log curl request state when requests hang.

PiperOrigin-RevId: 177657178
---
 .../core/platform/cloud/curl_http_request.cc  | 31 ++++++++++++++++++-
 .../core/platform/cloud/curl_http_request.h   |  2 ++
 .../platform/cloud/curl_http_request_test.cc  |  4 +++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc
index 4581a0870a..6575ee8c97 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request.cc
@@ -117,6 +117,10 @@ class LibCurlProxy : public LibCurl {
   }
 
   void curl_free(void* p) override { ::curl_free(p); }
+
+  const char* curl_easy_strerror(CURLcode errornum) override {
+    return ::curl_easy_strerror(errornum);
+  }
 };
 }  // namespace
 
@@ -530,11 +534,36 @@ int CurlHttpRequest::ProgressCallback(void* this_object, curl_off_t dltotal,
   }
 
   if (now - that->last_progress_timestamp_ > kInactivityTimeoutSeconds) {
+    double lookup_time = -1;
+    const auto lookup_time_status = that->libcurl_->curl_easy_getinfo(
+        that->curl_, CURLINFO_NAMELOOKUP_TIME, &lookup_time);
+
+    double connect_time = -1;
+    const auto connect_time_status = that->libcurl_->curl_easy_getinfo(
+        that->curl_, CURLINFO_CONNECT_TIME, &connect_time);
+
+    double pretransfer_time = -1;
+    const auto pretransfer_time_status = that->libcurl_->curl_easy_getinfo(
+        that->curl_, CURLINFO_PRETRANSFER_TIME, &pretransfer_time);
+
+    double starttransfer_time = -1;
+    const auto starttransfer_time_status = that->libcurl_->curl_easy_getinfo(
+        that->curl_, CURLINFO_PRETRANSFER_TIME, &starttransfer_time);
+
     LOG(ERROR) << "The transmission  of request " << this_object
                << " (URI: " << that->uri_ << ") has been stuck at "
                << current_progress << " of " << dltotal + ultotal
                << " bytes for " << now - that->last_progress_timestamp_
-               << " seconds and will be aborted.";
+               << " seconds and will be aborted. CURL timing information: "
+               << "lookup time: " << lookup_time << " ("
+               << that->libcurl_->curl_easy_strerror(lookup_time_status)
+               << "), connect time: " << connect_time << " ("
+               << that->libcurl_->curl_easy_strerror(connect_time_status)
+               << "), pre-transfer time: " << pretransfer_time << " ("
+               << that->libcurl_->curl_easy_strerror(pretransfer_time_status)
+               << "), start-transfer time: " << starttransfer_time << " ("
+               << that->libcurl_->curl_easy_strerror(starttransfer_time_status)
+               << ")";
     return 1;  // Will abort the request.
   }
 
diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h
index 9e5ae61016..b2a5870cf7 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.h
+++ b/tensorflow/core/platform/cloud/curl_http_request.h
@@ -208,6 +208,8 @@ class LibCurl {
   virtual void curl_slist_free_all(curl_slist* list) = 0;
   virtual char* curl_easy_escape(CURL* curl, const char* str, int length) = 0;
   virtual void curl_free(void* p) = 0;
+
+  virtual const char* curl_easy_strerror(CURLcode errornum) = 0;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc
index d476a1a4db..2d3e46edaf 100644
--- a/tensorflow/core/platform/cloud/curl_http_request_test.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc
@@ -219,6 +219,10 @@ class FakeLibCurl : public LibCurl {
   }
   void curl_free(void* p) override { port::Free(p); }
 
+  const char* curl_easy_strerror(CURLcode errornum) override {
+    return "<unimplemented>";
+  }
+
   // Variables defining the behavior of this fake.
   string response_content_;
   uint64 response_code_;
-- 
GitLab


From dbc1db8ba28cb25e4b902b790e4a0fef4d097090 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 1 Dec 2017 16:35:38 -0800
Subject: [PATCH 0541/1225] Increase tolerance in gdbt_batch_test.

PiperOrigin-RevId: 177657498
---
 .../python/training/functions/gbdt_batch_test.py            | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
index 16e24d97dd..dba51d4f52 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
@@ -912,8 +912,10 @@ class GbdtTest(test_util.TensorFlowTestCase):
       self.assertEqual(1,
                        len(output.trees[0].nodes[2].leaf.sparse_vector.index))
       self.assertEqual(3, output.trees[0].nodes[2].leaf.sparse_vector.index[0])
-      self.assertAlmostEqual(
-          0.893284678459, output.trees[0].nodes[2].leaf.sparse_vector.value[0])
+      self.assertAllClose(
+          0.893284678459,
+          output.trees[0].nodes[2].leaf.sparse_vector.value[0],
+          atol=1e-4, rtol=1e-4)
 
 
 if __name__ == "__main__":
-- 
GitLab


From 4173ac695eb7b41fc1885b08ed52904dbf2278c3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 16:36:37 -0800
Subject: [PATCH 0542/1225] Bugfix: Initialize template call in
 `tf.contrib.distributions.bijectors.masked_autoregressive` by first feeding
 zeros.

PiperOrigin-RevId: 177657607
---
 .../python/ops/bijectors/masked_autoregressive_impl.py       | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
index f51c48d2dd..06c7c61ec3 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
@@ -212,6 +212,9 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector):
 
   def _forward(self, x):
     event_size = array_ops.shape(x)[-1]
+    y0 = array_ops.zeros_like(x, name="y0")
+    # call the template once to ensure creation
+    _ = self._shift_and_log_scale_fn(y0)
     def _loop_body(index, y0):
       """While-loop body for autoregression calculation."""
       # Set caching device to avoid re-getting the tf.Variable for every while
@@ -230,7 +233,7 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector):
     _, y = control_flow_ops.while_loop(
         cond=lambda index, _: index < event_size,
         body=_loop_body,
-        loop_vars=[0, array_ops.zeros_like(x, name="y0")])
+        loop_vars=[0, y0])
     return y
 
   def _inverse(self, y):
-- 
GitLab


From e30b0babce133631b19de1fd7bacc84c884d6f55 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 1 Dec 2017 16:38:52 -0800
Subject: [PATCH 0543/1225] Adding tf_export decorators to public API functions
 defined in swig files.

PiperOrigin-RevId: 177657932
---
 .../python/training/quantize_training.i       |  3 ++
 tensorflow/python/util/py_checkpoint_reader.i |  2 ++
 tensorflow/python/util/stat_summarizer.i      | 31 ++++++++++++++-----
 tensorflow/tools/api/generator/BUILD          |  7 ++++-
 .../tools/api/generator/create_python_api.py  |  3 +-
 5 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/training/quantize_training.i b/tensorflow/python/training/quantize_training.i
index 40c6076973..17ffcd6e07 100644
--- a/tensorflow/python/training/quantize_training.i
+++ b/tensorflow/python/training/quantize_training.i
@@ -65,6 +65,9 @@ def do_quantize_training_on_graphdef(input_graph, num_bits):
 
   graph.ParseFromString(result_graph_string)
   return graph
+
+do_quantize_training_on_graphdef._tf_api_names = [
+    'train.do_quantize_training_on_graphdef']
 %}
 
 %unignoreall
diff --git a/tensorflow/python/util/py_checkpoint_reader.i b/tensorflow/python/util/py_checkpoint_reader.i
index 0cd095d9d9..8004898cbc 100644
--- a/tensorflow/python/util/py_checkpoint_reader.i
+++ b/tensorflow/python/util/py_checkpoint_reader.i
@@ -164,6 +164,8 @@ def NewCheckpointReader(filepattern):
   with errors.raise_exception_on_not_ok_status() as status:
     from tensorflow.python.util import compat
     return CheckpointReader(compat.as_bytes(filepattern), status)
+
+NewCheckpointReader._tf_api_names = ['train.NewCheckpointReader']
 %}
 
 %include "tensorflow/c/checkpoint_reader.h"
diff --git a/tensorflow/python/util/stat_summarizer.i b/tensorflow/python/util/stat_summarizer.i
index 8073919587..6aeaa0e31b 100644
--- a/tensorflow/python/util/stat_summarizer.i
+++ b/tensorflow/python/util/stat_summarizer.i
@@ -27,8 +27,8 @@ limitations under the License.
 
 %ignoreall
 
-%unignore NewStatSummarizer;
-%unignore DeleteStatSummarizer;
+%unignore _NewStatSummarizer;
+%unignore _DeleteStatSummarizer;
 %unignore tensorflow;
 %unignore tensorflow::StatSummarizer;
 %unignore tensorflow::StatSummarizer::StatSummarizer;
@@ -43,21 +43,20 @@ limitations under the License.
 
 // TODO(ashankar): Remove the unused argument from the API.
 %{
-tensorflow::StatSummarizer* NewStatSummarizer(
+tensorflow::StatSummarizer* _NewStatSummarizer(
       const string& unused) {
   return new tensorflow::StatSummarizer(tensorflow::StatSummarizerOptions());
 }
 %}
 
-
 %{
-void DeleteStatSummarizer(tensorflow::StatSummarizer* ss) {
+void _DeleteStatSummarizer(tensorflow::StatSummarizer* ss) {
   delete ss;
 }
 %}
 
-tensorflow::StatSummarizer* NewStatSummarizer(const string& unused);
-void DeleteStatSummarizer(tensorflow::StatSummarizer* ss);
+tensorflow::StatSummarizer* _NewStatSummarizer(const string& unused);
+void _DeleteStatSummarizer(tensorflow::StatSummarizer* ss);
 
 %extend tensorflow::StatSummarizer {
   void ProcessStepStatsStr(const string& step_stats_str) {
@@ -77,3 +76,21 @@ void DeleteStatSummarizer(tensorflow::StatSummarizer* ss);
 
 %include "tensorflow/core/util/stat_summarizer.h"
 %unignoreall
+
+%insert("python") %{
+
+# Wrapping NewStatSummarizer and DeletStatSummarizer because
+# SWIG-generated functions are built-in functions and do not support
+# setting _tf_api_names attribute.
+
+def NewStatSummarizer(unused):
+  return _NewStatSummarizer(unused)
+
+def DeleteStatSummarizer(stat_summarizer):
+  _DeleteStatSummarizer(stat_summarizer)
+
+NewStatSummarizer._tf_api_names = ["contrib.stat_summarizer.NewStatSummarizer"]
+DeleteStatSummarizer._tf_api_names = [
+    "contrib.stat_summarizer.DeleteStatSummarizer"]
+StatSummarizer._tf_api_names = ["contrib.stat_summarizer.StatSummarizer"]
+%}
diff --git a/tensorflow/tools/api/generator/BUILD b/tensorflow/tools/api/generator/BUILD
index 3896a21b99..064668a865 100644
--- a/tensorflow/tools/api/generator/BUILD
+++ b/tensorflow/tools/api/generator/BUILD
@@ -41,7 +41,12 @@ genrule(
     # every module exported using tf_export. For e.g. if an op is decorated with
     # @tf_export('module1.module2', 'module3'). Then, outs should include
     # api/module1/module2/__init__.py and api/module3/__init__.py.
-    outs = ["api/__init__.py"],
+    outs = [
+        "api/__init__.py",
+        "api/contrib/__init__.py",
+        "api/contrib/stat_summarizer/__init__.py",
+        "api/train/__init__.py",
+    ],
     cmd = "$(location create_python_api) $(OUTS)",
     tools = ["create_python_api"],
 )
diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py
index 5f1286aaf6..aab856b723 100644
--- a/tensorflow/tools/api/generator/create_python_api.py
+++ b/tensorflow/tools/api/generator/create_python_api.py
@@ -107,7 +107,8 @@ def get_api_imports():
   # Import all required modules in their parent modules.
   # For e.g. if we import 'tf.foo.bar.Value'. Then, we also
   # import 'bar' in 'tf.foo'.
-  for dest_module in module_imports.keys():
+  dest_modules = set(module_imports.keys())
+  for dest_module in dest_modules:
     dest_module_split = dest_module.split('.')
     for dest_submodule_index in range(1, len(dest_module_split)):
       dest_submodule = '.'.join(dest_module_split[:dest_submodule_index])
-- 
GitLab


From 7248c3ec2c87648fec732e17f3e749d12d113abe Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Fri, 1 Dec 2017 17:09:53 -0800
Subject: [PATCH 0544/1225] Small reformatting of tensorflow.bzl

PiperOrigin-RevId: 177661127
---
 tensorflow/tensorflow.bzl | 275 ++++++++++++++++----------------------
 1 file changed, 115 insertions(+), 160 deletions(-)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 709a2d46e1..0015eb0094 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1,6 +1,5 @@
 # -*- Python -*-
 
-
 # Return the options to use for a C++ library or binary build.
 # Uses the ":optmode" config_setting to pick the options.
 load(
@@ -8,38 +7,35 @@ load(
     "tf_cuda_tests_tags",
     "tf_sycl_tests_tags",
     "tf_additional_xla_deps_py",
-    "if_static",)
+    "if_static",
+)
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
     "if_cuda",
-    "cuda_default_copts",)
-
+    "cuda_default_copts",
+)
 load(
     "//third_party/mkl:build_defs.bzl",
-    "if_mkl",)
-
+    "if_mkl",
+)
 def register_extension_info(**kwargs):
     pass
 
-
 # Given a source file, generate a test name.
 # i.e. "common_runtime/direct_session_test.cc" becomes
 #      "common_runtime_direct_session_test"
 def src_to_test_name(src):
   return src.replace("/", "_").split(".")[0]
 
-
 def full_path(relative_paths):
   return [PACKAGE_NAME + "/" + relative for relative in relative_paths]
 
-
 # List of proto files for android builds
 def tf_android_core_proto_sources(core_proto_sources_relative):
   return [
       "//tensorflow/core:" + p for p in core_proto_sources_relative
   ]
 
-
 # Returns the list of pb.h and proto.h headers that are generated for
 # tf_android_core_proto_sources().
 def tf_android_core_proto_headers(core_proto_sources_relative):
@@ -51,13 +47,11 @@ def tf_android_core_proto_headers(core_proto_sources_relative):
       for p in core_proto_sources_relative
   ])
 
-
 # Sanitize a dependency so that it works correctly from code that includes
 # TensorFlow as a submodule.
 def clean_dep(dep):
   return str(Label(dep))
 
-
 def if_android_x86(a):
   return select({
       clean_dep("//tensorflow:android_x86"): a,
@@ -65,35 +59,30 @@ def if_android_x86(a):
       "//conditions:default": [],
   })
 
-
 def if_android_arm(a):
   return select({
       clean_dep("//tensorflow:android_arm"): a,
       "//conditions:default": [],
   })
 
-
 def if_android_arm64(a):
   return select({
       clean_dep("//tensorflow:android_arm64"): a,
       "//conditions:default": [],
   })
 
-
 def if_android_mips(a):
   return select({
       clean_dep("//tensorflow:android_mips"): a,
       "//conditions:default": [],
   })
 
-
 def if_not_android(a):
   return select({
       clean_dep("//tensorflow:android"): [],
       "//conditions:default": a,
   })
 
-
 def if_not_android_mips_and_mips64(a):
   return select({
       clean_dep("//tensorflow:android_mips"): [],
@@ -101,21 +90,18 @@ def if_not_android_mips_and_mips64(a):
       "//conditions:default": a,
   })
 
-
 def if_android(a):
   return select({
       clean_dep("//tensorflow:android"): a,
       "//conditions:default": [],
   })
 
-
 def if_ios(a):
   return select({
       clean_dep("//tensorflow:ios"): a,
       "//conditions:default": [],
   })
 
-
 def if_mobile(a):
   return select({
       clean_dep("//tensorflow:android"): a,
@@ -123,7 +109,6 @@ def if_mobile(a):
       "//conditions:default": [],
   })
 
-
 def if_not_mobile(a):
   return select({
       clean_dep("//tensorflow:android"): [],
@@ -131,7 +116,6 @@ def if_not_mobile(a):
       "//conditions:default": a,
   })
 
-
 def if_not_windows(a):
   return select({
       clean_dep("//tensorflow:windows"): [],
@@ -139,7 +123,6 @@ def if_not_windows(a):
       "//conditions:default": a,
   })
 
-
 def if_linux_x86_64(a):
   return select({
       clean_dep("//tensorflow:linux_x86_64"): a,
@@ -161,8 +144,10 @@ WIN_COPTS = [
     "/DTENSORFLOW_USE_EIGEN_THREADPOOL",
     "/DEIGEN_AVOID_STL_ARRAY",
     "/Iexternal/gemmlowp",
-    "/wd4018", # -Wno-sign-compare
-    "/U_HAS_EXCEPTIONS", "/D_HAS_EXCEPTIONS=1", "/EHsc", # -fno-exceptions
+    "/wd4018",  # -Wno-sign-compare
+    "/U_HAS_EXCEPTIONS",
+    "/D_HAS_EXCEPTIONS=1",
+    "/EHsc",  # -fno-exceptions
     "/DNOGDI",
 ]
 
@@ -200,7 +185,6 @@ def tf_copts(android_optimization_level_override="-O2"):
             "//conditions:default": ["-pthread"]
       }))
 
-
 def tf_opts_nortti_if_android():
   return if_android([
       "-fno-rtti",
@@ -208,10 +192,8 @@ def tf_opts_nortti_if_android():
       "-DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER",
   ])
 
-
 # LINT.ThenChange(//tensorflow/contrib/android/cmake/CMakeLists.txt)
 
-
 # Given a list of "op_lib_names" (a list of files in the ops directory
 # without their .cc extensions), generate a library for that file.
 def tf_gen_op_libs(op_lib_names, deps=None):
@@ -229,13 +211,11 @@ def tf_gen_op_libs(op_lib_names, deps=None):
         alwayslink=1,
         linkstatic=1,)
 
-
 def _make_search_paths(prefix, levels_to_root):
   return ",".join(
       ["-rpath,%s/%s" % (prefix, "/".join([".."] * search_level))
        for search_level in range(levels_to_root + 1)])
 
-
 def _rpath_linkopts(name):
   # Search parent directories up to the TensorFlow root directory for shared
   # object dependencies, even if this op shared object is deeply nested
@@ -254,7 +234,6 @@ def _rpath_linkopts(name):
       ],
   })
 
-
 # Bazel-generated shared objects which must be linked into TensorFlow binaries
 # to define symbols from //tensorflow/core:framework and //tensorflow/core:lib.
 def tf_binary_additional_srcs():
@@ -264,7 +243,6 @@ def tf_binary_additional_srcs():
           clean_dep("//tensorflow:libtensorflow_framework.so"),
       ])
 
-
 def tf_cc_shared_object(
     name,
     srcs=[],
@@ -287,9 +265,9 @@ def tf_cc_shared_object(
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_cc_shared_object",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_cc_shared_object",
+    label_regex_for_dep = "{extension_name}",
+)
 
 # Links in the framework shared object
 # (//third_party/tensorflow:libtensorflow_framework.so) when not building
@@ -312,9 +290,9 @@ def tf_cc_binary(name,
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_cc_binary",
-    label_regex_for_dep="{extension_name}.*")
-
+    extension_name = "tf_cc_binary",
+    label_regex_for_dep = "{extension_name}.*",
+)
 
 def tf_gen_op_wrapper_cc(name,
                          out_ops_file,
@@ -368,7 +346,6 @@ def tf_gen_op_wrapper_cc(name,
            "$(location :" + out_ops_file + ".cc) " + override_arg + " " +
            str(include_internal_ops) + " " + api_def_args_str))
 
-
 # Given a list of "op_lib_names" (a list of files in the ops directory
 # without their .cc extensions), generate individual C++ .cc and .h
 # files for each of the ops files mentioned, and then generate a
@@ -461,7 +438,6 @@ def tf_gen_op_wrappers_cc(name,
       alwayslink=1,
       visibility=[clean_dep("//tensorflow:internal")])
 
-
 # Generates a Python library target wrapping the ops registered in "deps".
 #
 # Args:
@@ -554,7 +530,6 @@ def tf_gen_op_wrapper_py(name,
           clean_dep("//tensorflow/python:framework_for_generated_wrappers_v2"),
       ],)
 
-
 # Define a bazel macro that creates cc_test for tensorflow.
 #
 # Links in the framework shared object
@@ -597,9 +572,9 @@ def tf_cc_test(name,
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_cc_test",
-    label_regex_for_dep="{extension_name}.*")
-
+    extension_name = "tf_cc_test",
+    label_regex_for_dep = "{extension_name}.*",
+)
 
 # Part of the testing workflow requires a distinguishable name for the build
 # rules that involve a GPU, even if otherwise identical to the base rule.
@@ -624,9 +599,9 @@ def tf_cc_test_gpu(name,
       args=args)
 
 register_extension_info(
-    extension_name="tf_cc_test_gpu",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_cc_test_gpu",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_cuda_cc_test(name,
                     srcs=[],
@@ -668,9 +643,9 @@ def tf_cuda_cc_test(name,
       args=args)
 
 register_extension_info(
-    extension_name="tf_cuda_cc_test",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_cuda_cc_test",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_cuda_only_cc_test(name,
                     srcs=[],
@@ -702,9 +677,9 @@ def tf_cuda_only_cc_test(name,
       tags=tags + tf_cuda_tests_tags())
 
 register_extension_info(
-    extension_name="tf_cuda_only_cc_test",
-    label_regex_for_dep="{extension_name}_gpu")
-
+    extension_name = "tf_cuda_only_cc_test",
+    label_regex_for_dep = "{extension_name}_gpu",
+)
 
 # Create a cc_test for each of the tensorflow tests listed in "tests"
 def tf_cc_tests(srcs,
@@ -728,7 +703,6 @@ def tf_cc_tests(srcs,
         linkopts=linkopts,
         nocopts=nocopts)
 
-
 def tf_cc_test_mkl(srcs,
                    deps,
                    name="",
@@ -738,7 +712,6 @@ def tf_cc_test_mkl(srcs,
                    args=None):
   if_mkl(tf_cc_tests(srcs, deps, name, linkstatic=linkstatic, tags=tags, size=size, args=args, nocopts="-fno-exceptions"))
 
-
 def tf_cc_tests_gpu(srcs,
                     deps,
                     name="",
@@ -748,7 +721,6 @@ def tf_cc_tests_gpu(srcs,
                     args=None):
   tf_cc_tests(srcs, deps, linkstatic, tags=tags, size=size, args=args)
 
-
 def tf_cuda_cc_tests(srcs,
                      deps,
                      name="",
@@ -781,9 +753,9 @@ def tf_java_test(name,
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_java_test",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_java_test",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def _cuda_copts():
   """Gets the appropriate set of copts for (maybe) CUDA compilation.
@@ -803,10 +775,8 @@ def _cuda_copts():
       ]),
   })
 
-
 # Build defs for TensorFlow kernels
 
-
 # When this target is built using --config=cuda, a cc_library is built
 # that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
 # libraries needed by GPU kernels.
@@ -830,9 +800,9 @@ def tf_gpu_kernel_library(srcs,
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_gpu_kernel_library",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_gpu_kernel_library",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_cuda_library(deps=None, cuda_deps=None, copts=None, **kwargs):
   """Generate a cc_library with a conditional set of CUDA dependencies.
@@ -866,10 +836,9 @@ def tf_cuda_library(deps=None, cuda_deps=None, copts=None, **kwargs):
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_cuda_library",
-    label_regex_for_dep="{extension_name}")
-
-
+    extension_name = "tf_cuda_library",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_kernel_library(name,
                       prefix=None,
@@ -940,9 +909,9 @@ def tf_kernel_library(name,
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_kernel_library",
-    label_regex_for_dep="{extension_name}(_gpu)?")
-
+    extension_name = "tf_kernel_library",
+    label_regex_for_dep = "{extension_name}(_gpu)?",
+)
 
 def tf_mkl_kernel_library(name,
                           prefix=None,
@@ -981,9 +950,9 @@ def tf_mkl_kernel_library(name,
       ))
 
 register_extension_info(
-    extension_name="tf_mkl_kernel_library",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_mkl_kernel_library",
+    label_regex_for_dep = "{extension_name}",
+)
 
 # Bazel rules for building swig files.
 def _py_wrap_cc_impl(ctx):
@@ -1017,44 +986,41 @@ def _py_wrap_cc_impl(ctx):
       progress_message="SWIGing " + src.path)
   return struct(files=depset(outputs))
 
-
 _py_wrap_cc = rule(
-    attrs={
-        "srcs":
-            attr.label_list(
-                mandatory=True,
-                allow_files=True,),
-        "swig_includes":
-            attr.label_list(
-                cfg="data",
-                allow_files=True,),
-        "deps":
-            attr.label_list(
-                allow_files=True,
-                providers=["cc"],),
-        "toolchain_deps":
-            attr.label_list(
-                allow_files=True,),
-        "module_name":
-            attr.string(mandatory=True),
-        "py_module_name":
-            attr.string(mandatory=True),
-        "_swig":
-            attr.label(
-                default=Label("@swig//:swig"),
-                executable=True,
-                cfg="host",),
-        "_swiglib":
-            attr.label(
-                default=Label("@swig//:templates"),
-                allow_files=True,),
+    attrs = {
+        "srcs": attr.label_list(
+            mandatory = True,
+            allow_files = True,
+        ),
+        "swig_includes": attr.label_list(
+            cfg = "data",
+            allow_files = True,
+        ),
+        "deps": attr.label_list(
+            allow_files = True,
+            providers = ["cc"],
+        ),
+        "toolchain_deps": attr.label_list(
+            allow_files = True,
+        ),
+        "module_name": attr.string(mandatory = True),
+        "py_module_name": attr.string(mandatory = True),
+        "_swig": attr.label(
+            default = Label("@swig//:swig"),
+            executable = True,
+            cfg = "host",
+        ),
+        "_swiglib": attr.label(
+            default = Label("@swig//:templates"),
+            allow_files = True,
+        ),
     },
-    outputs={
+    outputs = {
         "cc_out": "%{module_name}.cc",
         "py_out": "%{py_module_name}.py",
     },
-    implementation=_py_wrap_cc_impl,)
-
+    implementation = _py_wrap_cc_impl,
+)
 
 def _get_repository_roots(ctx, files):
   """Returns abnormal root directories under which files reside.
@@ -1085,7 +1051,6 @@ def _get_repository_roots(ctx, files):
       result[root] -= 1
   return [k for v, k in sorted([(v, k) for k, v in result.items()])]
 
-
 # Bazel rule for collecting the header files that a target depends on.
 def _transitive_hdrs_impl(ctx):
   outputs = depset()
@@ -1093,21 +1058,20 @@ def _transitive_hdrs_impl(ctx):
     outputs += dep.cc.transitive_headers
   return struct(files=outputs)
 
-
 _transitive_hdrs = rule(
-    attrs={
+    attrs = {
         "deps": attr.label_list(
-            allow_files=True,
-            providers=["cc"],),
+            allow_files = True,
+            providers = ["cc"],
+        ),
     },
-    implementation=_transitive_hdrs_impl,)
-
+    implementation = _transitive_hdrs_impl,
+)
 
 def transitive_hdrs(name, deps=[], **kwargs):
   _transitive_hdrs(name=name + "_gather", deps=deps)
   native.filegroup(name=name, srcs=[":" + name + "_gather"])
 
-
 # Create a header only library that includes all the headers exported by
 # the libraries in deps.
 def cc_header_only_library(name, deps=[], includes=[], **kwargs):
@@ -1133,7 +1097,6 @@ def cc_header_only_library(name, deps=[], includes=[], **kwargs):
                     includes=includes,
                     **kwargs)
 
-
 def tf_custom_op_library_additional_deps():
   return [
       "@protobuf_archive//:protobuf_headers",
@@ -1142,7 +1105,6 @@ def tf_custom_op_library_additional_deps():
       clean_dep("//tensorflow/core:framework_headers_lib"),
   ]
 
-
 # Traverse the dependency graph along the "deps" attribute of the
 # target and return a struct with one field called 'tf_collected_deps'.
 # tf_collected_deps will be the union of the deps of the current target
@@ -1156,16 +1118,15 @@ def _collect_deps_aspect_impl(target, ctx):
         alldeps = alldeps | dep.tf_collected_deps
   return struct(tf_collected_deps=alldeps)
 
-
 collect_deps_aspect = aspect(
-    implementation=_collect_deps_aspect_impl, attr_aspects=["deps"])
-
+    attr_aspects = ["deps"],
+    implementation = _collect_deps_aspect_impl,
+)
 
 def _dep_label(dep):
   label = dep.label
   return label.package + ":" + label.name
 
-
 # This rule checks that the transitive dependencies of targets listed
 # in the 'deps' attribute don't depend on the targets listed in
 # the 'disallowed_deps' attribute.
@@ -1182,18 +1143,20 @@ def _check_deps_impl(ctx):
                   disallowed_dep))
   return struct()
 
-
 check_deps = rule(
     _check_deps_impl,
-    attrs={
-        "deps":
-            attr.label_list(
-                aspects=[collect_deps_aspect], mandatory=True,
-                allow_files=True),
-        "disallowed_deps":
-            attr.label_list(mandatory=True, allow_files=True)
-    },)
-
+    attrs = {
+        "deps": attr.label_list(
+            aspects = [collect_deps_aspect],
+            mandatory = True,
+            allow_files = True,
+        ),
+        "disallowed_deps": attr.label_list(
+            mandatory = True,
+            allow_files = True,
+        ),
+    },
+)
 
 # Helper to build a dynamic library (.so) from the sources containing
 # implementations of custom ops and kernels.
@@ -1234,9 +1197,9 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]):
       }),)
 
 register_extension_info(
-    extension_name="tf_custom_op_library",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_custom_op_library",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_custom_op_py_library(name,
                             srcs=[],
@@ -1255,18 +1218,16 @@ def tf_custom_op_py_library(name,
       deps=deps,)
 
 register_extension_info(
-    extension_name="tf_custom_op_py_library",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_custom_op_py_library",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_extension_linkopts():
   return []  # No extension link opts
 
-
 def tf_extension_copts():
   return []  # No extension c opts
 
-
 def tf_py_wrap_cc(name,
                              srcs,
                              swig_includes=[],
@@ -1334,7 +1295,6 @@ def tf_py_wrap_cc(name,
           "//conditions:default": [":" + cc_library_name],
       }))
 
-
 def py_test(deps=[], **kwargs):
   native.py_test(
       deps=select({
@@ -1344,9 +1304,9 @@ def py_test(deps=[], **kwargs):
       **kwargs)
 
 register_extension_info(
-    extension_name="py_test",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "py_test",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_py_test(name,
                srcs,
@@ -1382,9 +1342,9 @@ def tf_py_test(name,
       srcs_version="PY2AND3")
 
 register_extension_info(
-    extension_name="tf_py_test",
-    label_regex_map={"additional_deps": "deps:{extension_name}"})
-
+    extension_name = "tf_py_test",
+    label_regex_map = {"additional_deps": "deps:{extension_name}"},
+)
 
 def cuda_py_test(name,
                  srcs,
@@ -1412,9 +1372,9 @@ def cuda_py_test(name,
       xla_enabled=xla_enabled)
 
 register_extension_info(
-    extension_name="cuda_py_test",
-    label_regex_map={"additional_deps": "additional_deps:{extension_name}"})
-
+    extension_name = "cuda_py_test",
+    label_regex_map = {"additional_deps": "additional_deps:{extension_name}"},
+)
 
 def sycl_py_test(name,
                  srcs,
@@ -1442,9 +1402,9 @@ def sycl_py_test(name,
       xla_enabled=xla_enabled)
 
 register_extension_info(
-    extension_name="sycl_py_test",
-    label_regex_map={"additional_deps": "additional_deps:{extension_name}"})
-
+    extension_name = "sycl_py_test",
+    label_regex_map = {"additional_deps": "additional_deps:{extension_name}"},
+)
 
 def py_tests(name,
              srcs,
@@ -1470,7 +1430,6 @@ def py_tests(name,
         additional_deps=additional_deps,
         xla_enabled=xla_enabled)
 
-
 def cuda_py_tests(name,
                   srcs,
                   size="medium",
@@ -1492,7 +1451,6 @@ def cuda_py_tests(name,
       prefix=prefix,
       xla_enabled=xla_enabled)
 
-
 # Creates a genrule named <name> for running tools/proto_text's generator to
 # make the proto_text functions, for the protos passed in <srcs>.
 #
@@ -1515,12 +1473,10 @@ def tf_generate_proto_text_sources(name, srcs_relative_dir, srcs):
       ],)
   return struct(hdrs=out_hdrs, srcs=out_srcs)
 
-
 def tf_genrule_cmd_append_to_srcs(to_append):
   return ("cat $(SRCS) > $(@) && " + "echo >> $(@) && " + "echo " + to_append +
           " >> $(@)")
 
-
 def tf_version_info_genrule():
   native.genrule(
       name="version_info_gen",
@@ -1535,7 +1491,6 @@ def tf_version_info_genrule():
       local=1,
       tools=[clean_dep("//tensorflow/tools/git:gen_git_source.py")],)
 
-
 def tf_py_build_info_genrule():
   native.genrule(
       name="py_build_info_gen",
@@ -1545,7 +1500,6 @@ def tf_py_build_info_genrule():
       local=1,
       tools=[clean_dep("//tensorflow/tools/build_info:gen_build_info.py")],)
 
-
 def cc_library_with_android_deps(deps,
                                  android_deps=[],
                                  common_deps=[],
@@ -1554,5 +1508,6 @@ def cc_library_with_android_deps(deps,
   native.cc_library(deps=deps, **kwargs)
 
 register_extension_info(
-    extension_name="cc_library_with_android_deps",
-    label_regex_for_dep="{extension_name}")
+    extension_name = "cc_library_with_android_deps",
+    label_regex_for_dep = "{extension_name}",
+)
-- 
GitLab


From 508abd1ea3850da656d706ffa7f7ebe09a4979d9 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 1 Dec 2017 17:28:06 -0800
Subject: [PATCH 0545/1225] None gradients should trigger stopping traversal of
 the backward graph in tape gradients.

PiperOrigin-RevId: 177662732
---
 tensorflow/c/eager/tape.h                | 25 ++++++++++++++++--------
 tensorflow/python/eager/backprop_test.py | 13 ++++++++++++
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 191e9c3413..20ed037c52 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -491,6 +491,7 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
     state.op_tape.erase(op_it);
     std::vector<Gradient*> out_gradients;
     out_gradients.reserve(trace.output_tensor_info.size());
+    bool any_gradient_nonzero = false;
     for (int i = 0; i < trace.output_tensor_info.size(); ++i) {
       const int64 id = trace.output_tensor_info[i].id;
       auto grad_it = gradients.find(id);
@@ -506,6 +507,7 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
                            trace.output_tensor_info[i].dtype));
         }
       } else {
+        any_gradient_nonzero = true;
         out_gradients.push_back(vspace.AggregateGradients(grad_it->second));
         if (sources_set.find(grad_it->first) == sources_set.end()) {
           gradients.erase(grad_it);
@@ -513,14 +515,21 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
       }
     }
     std::vector<Gradient*> in_gradients;
-    Status s = vspace.CallBackwardFunction(trace.backward_function,
-                                           out_gradients, &in_gradients);
-    if (!persistent_) {
-      vspace.ReleaseBackwardFunction(trace.backward_function);
-    }
-    if (!s.ok()) {
-      cleanup();
-      return s;
+    if (any_gradient_nonzero) {
+      Status s = vspace.CallBackwardFunction(trace.backward_function,
+                                             out_gradients, &in_gradients);
+      if (!persistent_) {
+        vspace.ReleaseBackwardFunction(trace.backward_function);
+      }
+      if (!s.ok()) {
+        cleanup();
+        return s;
+      }
+    } else {
+      in_gradients.resize(trace.input_tensor_id.size());
+      if (!persistent_) {
+        vspace.ReleaseBackwardFunction(trace.backward_function);
+      }
     }
     VLOG(1) << "Got " << in_gradients.size() << " in_gradients for "
             << trace.input_tensor_id.size() << " sources";
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 9816dd022e..90c0e47ff9 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -214,6 +214,19 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(gradgrad(constant_op.constant(0.0))[0], 1.0)
 
+  def testStopGradient(self):
+    grad = backprop.gradients_function(
+        lambda x: array_ops.stop_gradient(math_ops.argmax(x)))
+    self.assertAllEqual(grad([0.0])[0], None)
+
+  def testArgmax(self):
+    def argmax(x):
+      i = math_ops.argmax(x)
+      return array_ops.stop_gradient(i)
+
+    grad = backprop.gradients_function(argmax)
+    self.assertAllEqual(grad([0.0])[0], None)
+
   def testGPU(self):
     if not context.context().num_gpus():
       self.skipTest('No GPUs found')
-- 
GitLab


From 72b4acc3ba121af7680a49b9519d4d4470c6a1f2 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Fri, 1 Dec 2017 17:33:02 -0800
Subject: [PATCH 0546/1225] tfdbg: Add TensorBoardDebugWrapperSession and
 TensorBoardDebugHook

Also, relax the requirment on the grpc debug server address argument
in GprcDebugWrapperSession and GrpcDebugHook. They now accept addresses
with or without the "grpc://" prefix. As a result,
TensorBoardDebugWrapperSession and TensorBoardDebugHook accept both
formats, too.

PiperOrigin-RevId: 177663167
---
 tensorflow/python/debug/__init__.py           |  4 ++
 .../debug/lib/session_debug_grpc_test.py      | 72 +++++++++++++++++--
 .../python/debug/wrappers/grpc_wrapper.py     | 41 +++++++++--
 tensorflow/python/debug/wrappers/hooks.py     | 52 +++++++++-----
 4 files changed, 143 insertions(+), 26 deletions(-)

diff --git a/tensorflow/python/debug/__init__.py b/tensorflow/python/debug/__init__.py
index 821350ee90..34da44b60d 100644
--- a/tensorflow/python/debug/__init__.py
+++ b/tensorflow/python/debug/__init__.py
@@ -30,6 +30,8 @@ See the @{$python/tfdbg} guide.
 @@GrpcDebugWrapperSession
 @@LocalCLIDebugHook
 @@LocalCLIDebugWrapperSession
+@@TensorBoardDebugHook
+@@TensorBoardDebugWrapperSession
 @@WatchOptions
 
 @@reconstruct_non_debug_graph_def
@@ -60,9 +62,11 @@ from tensorflow.python.debug.lib.debug_utils import watch_graph_with_blacklists
 from tensorflow.python.debug.wrappers.dumping_wrapper import DumpingDebugWrapperSession
 from tensorflow.python.debug.wrappers.framework import WatchOptions
 from tensorflow.python.debug.wrappers.grpc_wrapper import GrpcDebugWrapperSession
+from tensorflow.python.debug.wrappers.grpc_wrapper import TensorBoardDebugWrapperSession
 from tensorflow.python.debug.wrappers.hooks import DumpingDebugHook
 from tensorflow.python.debug.wrappers.hooks import GrpcDebugHook
 from tensorflow.python.debug.wrappers.hooks import LocalCLIDebugHook
+from tensorflow.python.debug.wrappers.hooks import TensorBoardDebugHook
 from tensorflow.python.debug.wrappers.local_cli_wrapper import LocalCLIDebugWrapperSession
 
 from tensorflow.python.util import all_util as _all_util
diff --git a/tensorflow/python/debug/lib/session_debug_grpc_test.py b/tensorflow/python/debug/lib/session_debug_grpc_test.py
index e1ddd4ee64..99781bd9d9 100644
--- a/tensorflow/python/debug/lib/session_debug_grpc_test.py
+++ b/tensorflow/python/debug/lib/session_debug_grpc_test.py
@@ -248,10 +248,24 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase):
     self.assertEqual(
         14, len(dump.get_tensors("v/read", 0, "DebugNumericSummary")[0]))
 
-  def testConstructGrpcDebugHookWithGrpcInUrlRaisesValueError(self):
-    """Tests that the hook raises an error if the URL starts with grpc://."""
-    with self.assertRaises(ValueError):
-      hooks.GrpcDebugHook(["grpc://foo:42"])
+  def testTensorBoardDebugHooWorks(self):
+    u = variables.Variable(2.1, name="u")
+    v = variables.Variable(20.0, name="v")
+    w = math_ops.multiply(u, v, name="w")
+
+    sess = session.Session(config=no_rewrite_session_config())
+    sess.run(u.initializer)
+    sess.run(v.initializer)
+
+    grpc_debug_hook = hooks.TensorBoardDebugHook(
+        ["localhost:%d" % self._server_port])
+    sess = monitored_session._HookedSession(sess, [grpc_debug_hook])
+
+    self.assertAllClose(42.0, sess.run(w))
+
+  def testConstructGrpcDebugHookWithOrWithouGrpcInUrlWorks(self):
+    hooks.GrpcDebugHook(["grpc://foo:42424"])
+    hooks.GrpcDebugHook(["foo:42424"])
 
 
 class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
@@ -684,6 +698,56 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
           # to disable the breakpoint at delta:0:DebugIdentity.
           self.assertSetEqual(set(), self._server_1.breakpoints)
 
+  def testTensorBoardDebuggerWrapperToggleBreakpointsWorks(self):
+    with session.Session(config=no_rewrite_session_config()) as sess:
+      v_1 = variables.Variable(50.0, name="v_1")
+      v_2 = variables.Variable(-50.0, name="v_2")
+      delta_1 = constant_op.constant(5.0, name="delta_1")
+      delta_2 = constant_op.constant(-5.0, name="delta_2")
+      inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
+      inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2")
+
+      sess.run([v_1.initializer, v_2.initializer])
+
+      # The TensorBoardDebugWrapperSession should add a DebugIdentity debug op
+      # with attribute gated_grpc=True for every tensor in the graph.
+      sess = grpc_wrapper.TensorBoardDebugWrapperSession(
+          sess, self._debug_server_url_1)
+
+      for i in xrange(4):
+        self._server_1.clear_data()
+
+        if i in (0, 2):
+          # Enable breakpoint at delta_[1,2]:0:DebugIdentity in runs 0 and 2.
+          self._server_1.request_watch(
+              "delta_1", 0, "DebugIdentity", breakpoint=True)
+          self._server_1.request_watch(
+              "delta_2", 0, "DebugIdentity", breakpoint=True)
+        else:
+          # Disable the breakpoint in runs 1 and 3.
+          self._server_1.request_unwatch("delta_1", 0, "DebugIdentity")
+          self._server_1.request_unwatch("delta_2", 0, "DebugIdentity")
+
+        output = sess.run([inc_v_1, inc_v_2])
+        self.assertAllClose([50.0 + 5.0 * (i + 1), -50 - 5.0 * (i + 1)], output)
+
+        if i in (0, 2):
+          # During runs 0 and 2, the server should have received the published
+          # debug tensor delta:0:DebugIdentity. The breakpoint should have been
+          # unblocked by EventReply reponses from the server.
+          self.assertAllClose(
+              [5.0],
+              self._server_1.debug_tensor_values["delta_1:0:DebugIdentity"])
+          self.assertAllClose(
+              [-5.0],
+              self._server_1.debug_tensor_values["delta_2:0:DebugIdentity"])
+          # After the runs, the server should have properly registered the
+          # breakpoints.
+        else:
+          # After the end of runs 1 and 3, the server has received the requests
+          # to disable the breakpoint at delta:0:DebugIdentity.
+          self.assertSetEqual(set(), self._server_1.breakpoints)
+
   def testGetGrpcDebugWatchesReturnsCorrectAnswer(self):
     with session.Session() as sess:
       v = variables.Variable(50.0, name="v")
diff --git a/tensorflow/python/debug/wrappers/grpc_wrapper.py b/tensorflow/python/debug/wrappers/grpc_wrapper.py
index 4062016607..16b2018b41 100644
--- a/tensorflow/python/debug/wrappers/grpc_wrapper.py
+++ b/tensorflow/python/debug/wrappers/grpc_wrapper.py
@@ -38,7 +38,7 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
       sess: The TensorFlow `Session` object being wrapped.
       grpc_debug_server_addresses: (`str` or `list` of `str`) Single or a list
         of the gRPC debug server addresses, in the format of
-        <host:port>, without the "grpc://" prefix. For example:
+        <host:port>, with or without the "grpc://" prefix. For example:
           "localhost:7000",
           ["localhost:7000", "192.168.0.2:8000"]
       watch_fn: (`Callable`) A Callable that can be used to define per-run
@@ -62,8 +62,7 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
 
     if isinstance(grpc_debug_server_addresses, str):
       self._grpc_debug_server_urls = [
-          self._GRPC_URL_PREFIX + grpc_debug_server_addresses
-      ]
+          self._normalize_grpc_url(grpc_debug_server_addresses)]
     elif isinstance(grpc_debug_server_addresses, list):
       self._grpc_debug_server_urls = []
       for address in grpc_debug_server_addresses:
@@ -71,7 +70,7 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
           raise TypeError(
               "Expected type str in list grpc_debug_server_addresses, "
               "received type %s" % type(address))
-        self._grpc_debug_server_urls.append(self._GRPC_URL_PREFIX + address)
+        self._grpc_debug_server_urls.append(self._normalize_grpc_url(address))
     else:
       raise TypeError(
           "Expected type str or list in grpc_debug_server_addresses, "
@@ -93,3 +92,37 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
     """
 
     return self._grpc_debug_server_urls
+
+  def _normalize_grpc_url(self, address):
+    return (self._GRPC_URL_PREFIX + address
+            if not address.startswith(self._GRPC_URL_PREFIX) else address)
+
+
+class TensorBoardDebugWrapperSession(GrpcDebugWrapperSession):
+  """A tfdbg Session wrapper that can be used with TensroBoard Debugger Plugin.
+
+  This wrapper is the same as `GrpcDebugWrapperSession`, except that it uses a
+    predefined `watch_fn` that
+    1) uses `DebugIdentity` debug ops with the `gated_grpc` attribute set to
+        `True` to allow the interactive enabling and disabling of tensor
+       breakpoints.
+    2) watches all tensors in the graph.
+  This saves the need for the user to define a `watch_fn`.
+  """
+
+  def __init__(self,
+               sess,
+               grpc_debug_server_addresses,
+               thread_name_filter=None,
+               log_usage=True):
+    def _gated_grpc_watch_fn(fetches, feeds):
+      del fetches, feeds  # Unused.
+      return framework.WatchOptions(
+          debug_ops=["DebugIdentity(gated_grpc=true)"])
+
+    super(TensorBoardDebugWrapperSession, self).__init__(
+        sess,
+        grpc_debug_server_addresses,
+        watch_fn=_gated_grpc_watch_fn,
+        thread_name_filter=thread_name_filter,
+        log_usage=log_usage)
diff --git a/tensorflow/python/debug/wrappers/hooks.py b/tensorflow/python/debug/wrappers/hooks.py
index 4efa97973e..4306699624 100644
--- a/tensorflow/python/debug/wrappers/hooks.py
+++ b/tensorflow/python/debug/wrappers/hooks.py
@@ -27,9 +27,6 @@ from tensorflow.python.debug.wrappers import grpc_wrapper
 from tensorflow.python.debug.wrappers import local_cli_wrapper
 from tensorflow.python.training import session_run_hook
 
-# The prefix for GRPC endpoint URLs.
-_GRPC_ENDPOINT_PREFIX = "grpc://"
-
 
 class LocalCLIDebugHook(session_run_hook.SessionRunHook):
   """Command-line-interface debugger hook.
@@ -249,8 +246,8 @@ class GrpcDebugHook(session_run_hook.SessionRunHook):
 
     Args:
       grpc_debug_server_addresses: (`list` of `str`) A list of the gRPC debug
-        server addresses, in the format of <host:port>, without the "grpc://"
-        prefix. For example: ["localhost:7000", "192.168.0.2:8000"]
+        server addresses, in the format of <host:port>, with or without the
+        "grpc://" prefix. For example: ["localhost:7000", "192.168.0.2:8000"]
       watch_fn: A function that allows for customizing which ops to watch at
         which specific steps. See doc of
         `dumping_wrapper.DumpingDebugWrapperSession.__init__` for details.
@@ -258,23 +255,14 @@ class GrpcDebugHook(session_run_hook.SessionRunHook):
         wrapper session will be active. See doc of `BaseDebugWrapperSession` for
         more details.
       log_usage: (bool) Whether usage is to be logged.
-
-    Raises:
-      ValueError: if any debugger server addresses start with grpc://.
     """
-
-    for address in grpc_debug_server_addresses:
-      if address.startswith(_GRPC_ENDPOINT_PREFIX):
-        raise ValueError(
-            ("Debug server address %r starts with %r. It should not because "
-             "the hook already automatically adds the prefix.") % (
-                 address, _GRPC_ENDPOINT_PREFIX))
-
-    # A wrapper session responsible for GRPC communication.
     self._grpc_debug_wrapper_session = None
     self._thread_name_filter = thread_name_filter
+    self._grpc_debug_server_addresses = (
+        grpc_debug_server_addresses
+        if isinstance(grpc_debug_server_addresses, list)
+        else [grpc_debug_server_addresses])
 
-    self._grpc_debug_server_addresses = grpc_debug_server_addresses
     self._watch_fn = watch_fn
     self._log_usage = log_usage
 
@@ -315,3 +303,31 @@ class GrpcDebugHook(session_run_hook.SessionRunHook):
 
     return session_run_hook.SessionRunArgs(
         None, feed_dict=None, options=run_options)
+
+
+class TensorBoardDebugHook(GrpcDebugHook):
+  """A tfdbg hook that can be used with TensorBoard Debugger Plugin.
+
+  This hook is the same as `GrpcDebugHook`, except that it uses a predefined
+    `watch_fn` that
+    1) uses `DebugIdentity` debug ops with the `gated_grpc` attribute set to
+        `True`, to allow the interactive enabling and disabling of tensor
+       breakpoints.
+    2) watches all tensors in the graph.
+  This saves the need for the user to define a `watch_fn`.
+  """
+
+  def __init__(self,
+               grpc_debug_server_addresses,
+               thread_name_filter=None,
+               log_usage=True):
+    def _gated_grpc_watch_fn(fetches, feeds):
+      del fetches, feeds  # Unused.
+      return framework.WatchOptions(
+          debug_ops=["DebugIdentity(gated_grpc=true)"])
+
+    super(TensorBoardDebugHook, self).__init__(
+        grpc_debug_server_addresses,
+        watch_fn=_gated_grpc_watch_fn,
+        thread_name_filter=thread_name_filter,
+        log_usage=log_usage)
-- 
GitLab


From 728896dc77cfe37057400bc5e48795bcd9811e1d Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Fri, 1 Dec 2017 17:38:19 -0800
Subject: [PATCH 0547/1225] Fix broken test

---
 tensorflow/contrib/data/python/kernel_tests/BUILD | 1 +
 tensorflow/python/keras/BUILD                     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 4112de31c1..0e6ab1afe5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -422,6 +422,7 @@ py_test(
     size = "medium",
     srcs = ["shuffle_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 5959659a40..4a60b7835e 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -553,7 +553,7 @@ py_test(
 
 py_test(
     name = "data_utils_test",
-    size = "small",
+    size = "medium",
     srcs = ["_impl/keras/utils/data_utils_test.py"],
     srcs_version = "PY2AND3",
     tags = [
-- 
GitLab


From 5edb6fe8bcce4a9070baef862fba097b807ffbd8 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 1 Dec 2017 18:38:09 -0800
Subject: [PATCH 0548/1225] Share code of SplitProcessor and ConcatProcessor.

PiperOrigin-RevId: 177667607
---
 .../grappler/optimizers/layout_optimizer.cc   | 111 ++++++------------
 .../optimizers/layout_optimizer_test.cc       |  89 ++++++++++++--
 2 files changed, 112 insertions(+), 88 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 36e5047d61..96144abbe8 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -36,8 +36,7 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
-const char kConcatConst[] = "LayoutOptimizerConcatConst";
-const char kSplitConst[] = "LayoutOptimizerSplitConst";
+const char kDimConst[] = "LayoutOptimizerDimConst";
 const char kPermNHWCToNCHW[] = "LayoutOptimizerPermConstNHWCToNCHW";
 const char kPermNCHWToNHWC[] = "LayoutOptimizerPermConstNCHWToNHWC";
 const char kGatherAxisConst[] = "LayoutOptimizerGatherAxisConst";
@@ -954,8 +953,7 @@ class ConcatProcessor : public AgnosticNodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
-           IsNodeAfterNCHWToNHWC() && IsAlongDimC() && IsOnGPU();
+    return AgnosticNodeProcessor::ShouldProcess() && DimSupported();
   }
 
   std::vector<int> GetInputPos() const override {
@@ -970,40 +968,51 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   }
 
   Status CustomizedProcessing() override {
-    string concat_const_name = AddNodeConcatConst()->name();
-    node_map_->AddOutput(concat_const_name, node_->name());
-    *node_->mutable_input(axis_node_pos_) = concat_const_name;
+    string dim_const_name = AddNodeDimConst()->name();
+    node_map_->AddOutput(dim_const_name, node_->name());
+    *node_->mutable_input(axis_node_pos_) = dim_const_name;
     return Status::OK();
   }
 
-  bool IsAlongDimC() const {
-    auto axis_node = node_map_->GetNode(node_->input(axis_node_pos_));
-    if (!IsConstant(*axis_node)) {
+  int axis_node_pos_;
+
+ private:
+  bool DimSupported() const {
+    auto dim_node = node_map_->GetNode(node_->input(axis_node_pos_));
+    // TODO(yaozhang): Support non-constant axis node.
+    if (!IsConstant(*dim_node)) {
       return false;
     }
-    if (axis_node->attr().find("value") != axis_node->attr().end()) {
-      auto tensor = axis_node->attr().at({"value"}).tensor();
+    if (HasAttribute(*dim_node, "value").ok()) {
+      auto tensor = dim_node->attr().at({"value"}).tensor();
       if (tensor.tensor_shape().dim_size() == 0 && tensor.int_val_size() == 1) {
-        return tensor.int_val(0) == 3;
+        if (tensor.int_val(0) < 4 && tensor.int_val(0) >= -4) {
+          return true;
+        }
       }
     }
     return false;
   }
 
-  int axis_node_pos_;
-
- private:
-  NodeDef* AddNodeConcatConst() {
-    auto axis_node = node_map_->GetNode(node_->input(axis_node_pos_));
+  NodeDef* AddNodeDimConst() {
+    auto dim_node = node_map_->GetNode(node_->input(axis_node_pos_));
+    auto tensor = dim_node->attr().at({"value"}).tensor();
+    int value = tensor.int_val(0);
+    value = (value >= 0) ? value : value + 4;
+    if (value == 1 || value == 2) {
+      value = value + 1;
+    } else if (value == 3) {
+      value = 1;
+    }
     // We created a copy of the node, so that we don't modify the original node,
     // which might be used elsewhere. Note that this copy also copies the
     // control dependency input in the case this node is inside a loop,
     // to ensure added_node is in the same frame with node_.
-    auto added_node = graph_->add_node();
-    *added_node = *axis_node;
-    added_node->set_name(strings::StrCat(kConcatConst, "-", node_->name()));
-    added_node->mutable_attr()->at({"value"}).mutable_tensor()->set_int_val(0,
-                                                                            1);
+    NodeDef* added_node = graph_->add_node();
+    *added_node = *dim_node;
+    added_node->set_name(strings::StrCat(kDimConst, "-", node_->name()));
+    added_node->mutable_attr()->at({"value"}).mutable_tensor()->set_int_val(
+        0, value);
     return added_node;
   }
 };
@@ -1039,16 +1048,14 @@ class PadProcessor : public AgnosticNodeProcessor {
   }
 };
 
-class SplitProcessor : public AgnosticNodeProcessor {
+class SplitProcessor : public ConcatProcessor {
  public:
   explicit SplitProcessor(const OptimizeContext& opt_cxt)
-      : AgnosticNodeProcessor(opt_cxt) {}
-
- protected:
-  bool ShouldProcess() const override {
-    return AgnosticNodeProcessor::ShouldProcess() && SplitSupported();
+      : ConcatProcessor(opt_cxt) {
+    axis_node_pos_ = 0;
   }
 
+ protected:
   std::vector<int> GetInputPos() const override {
     std::vector<int> input_pos = {1};
     return input_pos;
@@ -1063,52 +1070,6 @@ class SplitProcessor : public AgnosticNodeProcessor {
     }
     return output_pos;
   }
-
-  Status CustomizedProcessing() override {
-    string split_const_name = AddNodeSplitConst()->name();
-    node_map_->AddOutput(split_const_name, node_->name());
-    *node_->mutable_input(0) = split_const_name;
-    return Status::OK();
-  }
-
- private:
-  bool SplitSupported() const {
-    auto dim_node = node_map_->GetNode(node_->input(0));
-    if (!IsConstant(*dim_node)) {
-      return false;
-    }
-    if (HasAttribute(*dim_node, "value").ok()) {
-      auto tensor = dim_node->attr().at({"value"}).tensor();
-      if (tensor.tensor_shape().dim_size() == 0 && tensor.int_val_size() == 1) {
-        if (tensor.int_val(0) < 4 && tensor.int_val(0) >= -4) {
-          return true;
-        }
-      }
-    }
-    return false;
-  }
-
-  NodeDef* AddNodeSplitConst() {
-    auto dim_node = node_map_->GetNode(node_->input(0));
-    auto tensor = dim_node->attr().at({"value"}).tensor();
-    int value = tensor.int_val(0);
-    value = (value >= 0) ? value : value + 4;
-    if (value == 1 || value == 2) {
-      value = value + 1;
-    } else if (value == 3) {
-      value = 1;
-    }
-    // We created a copy of the node, so that we don't modify the original node,
-    // which might be used elsewhere. Note that this copy also copies the
-    // control dependency input in the case this node is inside a loop,
-    // to ensure added_node is in the same frame with node_.
-    NodeDef* added_node = graph_->add_node();
-    *added_node = *dim_node;
-    added_node->set_name(strings::StrCat(kSplitConst, "-", node_->name()));
-    added_node->mutable_attr()->at({"value"}).mutable_tensor()->set_int_val(
-        0, value);
-    return added_node;
-  }
 };
 
 class ReluGradProcessor : public AgnosticNodeProcessor {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 0b906485e7..5ad2e25392 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -445,9 +445,9 @@ TEST_F(LayoutOptimizerTest, SplitDimC) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDimConst-split");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  auto split_const = node_map.GetNode("LayoutOptimizerDimConst-split");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 1);
 }
@@ -465,9 +465,9 @@ TEST_F(LayoutOptimizerTest, SplitDimH) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDimConst-split");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  auto split_const = node_map.GetNode("LayoutOptimizerDimConst-split");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 2);
 }
@@ -485,9 +485,9 @@ TEST_F(LayoutOptimizerTest, SplitDimW) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDimConst-split");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  auto split_const = node_map.GetNode("LayoutOptimizerDimConst-split");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 3);
 }
@@ -505,9 +505,9 @@ TEST_F(LayoutOptimizerTest, SplitDimN) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDimConst-split");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  auto split_const = node_map.GetNode("LayoutOptimizerDimConst-split");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 0);
 }
@@ -549,12 +549,75 @@ TEST_F(LayoutOptimizerTest, SplitSamePortToMultipleInputsOfSameNode) {
   EXPECT_EQ(concat_node->input(0), "split:1");
   EXPECT_EQ(concat_node->input(1), "split:1");
   EXPECT_EQ(concat_node->input(2), "split:1");
-  EXPECT_EQ(concat_node->input(3), "LayoutOptimizerConcatConst-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerConcatConst-concat");
+  EXPECT_EQ(concat_node->input(3), "LayoutOptimizerDimConst-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDimConst-concat");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1);
 }
 
-TEST_F(LayoutOptimizerTest, Concat) {
+TEST_F(LayoutOptimizerTest, ConcatDimH) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto axis = ops::Const(s.WithOpName("axis"), 1);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDimConst-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDimConst-concat");
+  EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 2);
+}
+
+TEST_F(LayoutOptimizerTest, ConcatDimW) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto axis = ops::Const(s.WithOpName("axis"), 2);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDimConst-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDimConst-concat");
+  EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 3);
+}
+
+TEST_F(LayoutOptimizerTest, ConcatDimN) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto axis = ops::Const(s.WithOpName("axis"), 0);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDimConst-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDimConst-concat");
+  EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 0);
+}
+
+TEST_F(LayoutOptimizerTest, ConcatDimC) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv = SimpleConv2D(&s, 3, 2, "VALID");
   auto axis = ops::Const(s.WithOpName("axis"), 3);
@@ -570,8 +633,8 @@ TEST_F(LayoutOptimizerTest, Concat) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerConcatConst-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerConcatConst-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDimConst-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDimConst-concat");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1);
 }
 
-- 
GitLab


From 01f097d789e88c58cfc16d5052e2bb83f6412ef3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 1 Dec 2017 19:38:36 -0800
Subject: [PATCH 0549/1225] Automated g4 rollback of changelist 177204171

PiperOrigin-RevId: 177670554
---
 tensorflow/compiler/xla/service/hlo_verifier.cc | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index ea7775b18a..4d02846cf7 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -147,13 +147,9 @@ class ShapeVerifier : public DfsHloVisitor {
   }
 
   Status HandleBitcast(HloInstruction* bitcast) override {
-    // Bitcasts that are not the root of a computation can be any shape.
-    // Bitcasts that are the root of a computation must have the same shape
-    // byte size as their operand.
-    if (bitcast->parent()->root_instruction() == bitcast) {
-      TF_RET_CHECK(shape_size_fn_(bitcast->shape()) ==
-                   shape_size_fn_(bitcast->operand(0)->shape()));
-    }
+    // Bitcasts can be any shape, as long as the size matches the operand size.
+    TF_RET_CHECK(shape_size_fn_(bitcast->shape()) ==
+                 shape_size_fn_(bitcast->operand(0)->shape()));
     return tensorflow::Status::OK();
   }
 
-- 
GitLab


From 7d82dbb42744a21ff05924e973e57a68465f3347 Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Fri, 1 Dec 2017 19:44:35 -0800
Subject: [PATCH 0550/1225] Fix a replicate_model_fn_test that is dependent on
 the number of hardware GPUs.

It has been causing failures when run on a machine with 4 GPUs.

PiperOrigin-RevId: 177670759
---
 .../estimator/python/estimator/replicate_model_fn_test.py       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
index 662021853d..91e4b9ba7d 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
@@ -288,7 +288,7 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
 
     with self.test_session() as session:
       replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, self.optimizer_fn)
+          self.model_fn, self.optimizer_fn, devices=['/gpu:0'])
       estimator_spec = replicated_model_fn(
           features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
       session.run(variables.global_variables_initializer())
-- 
GitLab


From 9c6b7080693661508b8df0c718aec35efef02e4d Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Fri, 1 Dec 2017 23:26:45 -0800
Subject: [PATCH 0551/1225] Disable the lite test

---
 tensorflow/contrib/lite/python/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index 89e8693490..3d6a3ec0fd 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -24,6 +24,7 @@ py_test(
     name = "lite_test",
     srcs = ["lite_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_oss"],
     deps = [
         ":lite",
         "//tensorflow/python:array_ops",
-- 
GitLab


From 48b31e31cd3c3f5b2d30ea9c30c178d3b017454d Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Sat, 2 Dec 2017 01:39:55 -0800
Subject: [PATCH 0552/1225] Document iOS demo app in TF Lite Readme (#15008)

* Add a seciton for iOS camera demo app.

* Minor format fix.

* Correction: Project file -> workspace file.

* Improve the error message when camera fails.

* Update README.md
---
 tensorflow/contrib/lite/README.md               | 17 ++++++++++++++---
 .../ios/camera/CameraExampleViewController.mm   |  6 +++++-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index c7464bcc9d..fc9144d5fc 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -4,7 +4,7 @@ TensorFlow Lite is TensorFlow's lightweight solution for mobile and embedded dev
 TensorFlow Lite uses many techniques for achieving low latency like optimizing the kernels for specific mobile apps, pre-fused activations, quantized kernels that allow smaller and faster (fixed-point math) models, and in the future, leverage specialized machine learning hardware to get the best possible performance for a particular model on a particular device.
 
 ![image](g3doc/TFLite-Architecture.jpg)
-# Getting Started with a Demo App
+# Getting Started with an Android Demo App
 
 This section contains an example application using TensorFlow Lite for Android devices. The demo is a sample camera app that classifies images continuously using a quantized Mobilenet model. A device running Android 5.0 ( API 21) or higher is required to run the demo.
 
@@ -17,7 +17,7 @@ There are 3 ways to get the demo app to your device
 In the demo app, inference is done using the TensorFlow Lite Java API. The demo app classifies frames in real-time, displaying the top most probable classifications. It also displays the time taken to detect the object.
 
 ## Downloading the pre-built binary
-The  fastest path to trying the demo, is to download the pre-built binary
+The fastest path to trying the demo, is to download the pre-built binary
 [TfLiteCameraDemo.apk](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk)
 
 Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera's field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified.
@@ -69,7 +69,7 @@ android_ndk_repository(
 
 Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md).
 
-### Build the  source code
+### Build the source code
 Run bazel with the following command to build the demo.
 
 Build the demo app:
@@ -86,6 +86,17 @@ environment (due to a Bazel bug).
 ### More about the demo
 The demo is resizing each camera image frame to (224 width * 224 height) to match the  quantized Mobilenet model being used. The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch 224 * 224 is the width and height of the image 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. The Mobilenet model has 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The Mobilenet quantized model is bundled within the assets directory of the app.
 
+# iOS Demo App
+
+Similar to the Android demo app, there's an iOS camera app that uses exactly the same model (224 * 224 quantized Mobilenet).
+
+This demo app requires a camera so it doesn't work with simulators. It need to be executed on a real iOS device. Follow the instructions to build and run the demo app:
+
+1.   Follow the Building section [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/ios.md#building) to build the universal iOS library for TensorFlow Lite.
+1.   Install [CocoaPods](https://cocoapods.org/) if it wasn't installed yet: `sudo gem install cocoapods`.
+1.   Run `pod install` in `tensorflow/contrib/lite/examples/ios/camera` to generate the workspace file.
+1.   Open the project by running `open tflite_camera_example.xcworkspace`, and build the app in XCode.
+
 # TensorFlow Lite Quick Start
 
 ## Step 1. Decide which GraphDef to use
diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
index ea398ad14e..10f31bb6f1 100644
--- a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
+++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
@@ -123,7 +123,11 @@ static void GetTopN(const uint8_t* prediction, const int prediction_size, const
   AVCaptureDevice* device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
   AVCaptureDeviceInput* deviceInput =
       [AVCaptureDeviceInput deviceInputWithDevice:device error:&error];
-  assert(error == nil);
+
+  if (error != nil) {
+    NSLog(@"Failed to initialize AVCaptureDeviceInput. Note: This app doesn't work with simulator");
+    assert(NO);
+  }
 
   if ([session canAddInput:deviceInput]) [session addInput:deviceInput];
 
-- 
GitLab


From 87eef3be40c7024f088844bea73a9a7c56c8547a Mon Sep 17 00:00:00 2001
From: Alan Yee <alyee@ucsd.edu>
Date: Sat, 2 Dec 2017 07:44:24 -0800
Subject: [PATCH 0553/1225] Update estimator.py (#14256)

Replace contrib metrics with core metrics
---
 tensorflow/contrib/learn/python/learn/estimators/estimator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 788d2d0b1a..05ed8b3409 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -30,7 +30,6 @@ import six
 
 from google.protobuf import message
 from tensorflow.contrib import layers
-from tensorflow.contrib import metrics as metrics_lib
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_args
 from tensorflow.contrib.framework import list_variables
@@ -60,6 +59,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import metrics as metrics_lib
 from tensorflow.python.ops import resources
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
@@ -1230,7 +1230,7 @@ class Estimator(BaseEstimator):
 
     if metric_key.MetricKey.LOSS not in model_fn_ops.eval_metric_ops:
       model_fn_ops.eval_metric_ops[metric_key.MetricKey.LOSS] = (
-          metrics_lib.streaming_mean(model_fn_ops.loss))
+          metrics_lib.mean(model_fn_ops.loss))
     return model_fn_ops
 
   def _get_predict_ops(self, features):
-- 
GitLab


From c9db2486ead95f000395af14919f0ae8f508e429 Mon Sep 17 00:00:00 2001
From: ManHyuk <manhyuk@kw.ac.kr>
Date: Sun, 3 Dec 2017 00:44:44 +0900
Subject: [PATCH 0554/1225] Fix typo (#14269)

* Fix typo : specified

* Fix typo

* Fix typo

* Fix typo
---
 tensorflow/compiler/xla/service/hlo_instruction.h | 2 +-
 tensorflow/core/graph/graph_partition.cc          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 768c027a42..90dc6a3158 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -427,7 +427,7 @@ class HloInstruction {
   Status RemoveControlDependencyTo(HloInstruction* instruction);
 
   // Returns the set of control predecessors (successors) of this
-  // instruction. Control predecessors (sucessors) must execute before (after)
+  // instruction. Control predecessors (successors) must execute before (after)
   // the current instruction.
   const std::vector<HloInstruction*>& control_predecessors() const {
     return control_predecessors_;
diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index 1924c05d3d..add80eda23 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -1152,7 +1152,7 @@ Status Partition(const PartitionOptions& opts, Graph* g,
     // Add control edges from 'ref_control_inputs' to 'ref_recvs'.
     // NOTE(yuanbyu): Adding these control edges should not introduce
     // deadlocks. 'dst' has implicit "read" nodes that, when we split
-    // across devices, are made explicit; Retargettig the dependencies
+    // across devices, are made explicit; Retargeting the dependencies
     // to 'dst' to those nodes would not introduce cycles if there isn't
     // one before the transformation.
     // NOTE(yuanbyu): This may impact performance because it defers the
-- 
GitLab


From 04e93d09455b1fd849ca0706d7b6825a11a45c5c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 2 Dec 2017 15:20:25 -0800
Subject: [PATCH 0555/1225] Fix a bug in CreateItem of
 FunctionLibraryRuntimeImpl.

PiperOrigin-RevId: 177710771
---
 tensorflow/core/common_runtime/function.cc | 23 +++++++++++-----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 4c87c922c2..3328125dc9 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -552,8 +552,16 @@ Status FunctionLibraryRuntimeImpl::CreateItem(Handle handle, Item** item) {
   Executor* exec;
   TF_RETURN_IF_ERROR(NewLocalExecutor(params, g.release(), &exec));
 
-  (*item)->graph = graph;
-  (*item)->exec = exec;
+  {
+    // Guard item since it is already inserted in items_.
+    mutex_lock l(mu_);
+    if ((*item)->exec) {
+      delete exec;
+    } else {
+      (*item)->graph = graph;
+      (*item)->exec = exec;
+    }
+  }
   return Status::OK();
 }
 
@@ -572,16 +580,7 @@ Status FunctionLibraryRuntimeImpl::GetOrCreateItem(Handle handle, Item** item) {
   }
   // NOTE: We need to call CreateItem out of mu_ because creating an
   // executor needs to call CreateKernel.
-  TF_RETURN_IF_ERROR(CreateItem(handle, item));
-
-  {
-    mutex_lock l(mu_);
-    if (items_[local_handle] == nullptr) {
-      // Insert *item in items_.
-      items_.insert({local_handle, *item});
-    }
-  }
-  return Status::OK();
+  return CreateItem(handle, item);
 }
 
 void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
-- 
GitLab


From a19a5bc3ea05c666936d5823da256f868c38362f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Sun, 3 Dec 2017 10:49:34 +0800
Subject: [PATCH 0556/1225] DOC: underline that tf.Print behaves like
 tf.identity

---
 tensorflow/python/ops/logging_ops.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py
index 08e3f83a0b..51ab2aec22 100644
--- a/tensorflow/python/ops/logging_ops.py
+++ b/tensorflow/python/ops/logging_ops.py
@@ -39,8 +39,8 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
           name=None):
   """Prints a list of tensors.
 
-  This is an identity op with the side effect of printing `data` when
-  evaluating.
+  This is an identity op (behaves like `tf.identity`) with the side effect
+  of printing `data` when evaluating.
 
   Note: This op prints to the standard error. It is not currently compatible
     with jupyter notebook (printing to the notebook *server's* output, not into
@@ -57,7 +57,7 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
     name: A name for the operation (optional).
 
   Returns:
-    Same tensor as `input_`.
+    A `Tensor`. Has the same type and contents as `input_`.
   """
   return gen_logging_ops._print(input_, data, message, first_n, summarize, name)
 
-- 
GitLab


From 2c385166a4c9eb16d7aca2a7335e96569c59d124 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Sat, 2 Dec 2017 23:46:41 -0800
Subject: [PATCH 0557/1225] Increase the size of sequence_dataset_op_test.

PiperOrigin-RevId: 177725822
---
 tensorflow/contrib/data/python/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index ffb5655c3e..15cc529de4 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -388,7 +388,7 @@ py_test(
 
 py_test(
     name = "sequence_dataset_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["sequence_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     tags = ["no_pip"],
-- 
GitLab


From 7bb11f6a66e50f542d1e29d0dd54eda102972b87 Mon Sep 17 00:00:00 2001
From: Utkarsh Upadhyay <musically.ut@gmail.com>
Date: Sun, 3 Dec 2017 15:12:29 +0100
Subject: [PATCH 0558/1225] Fix documentation for inverse_time_decay.

---
 tensorflow/python/training/learning_rate_decay.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/training/learning_rate_decay.py b/tensorflow/python/training/learning_rate_decay.py
index 802b930b0e..f0c28e7b89 100644
--- a/tensorflow/python/training/learning_rate_decay.py
+++ b/tensorflow/python/training/learning_rate_decay.py
@@ -362,7 +362,13 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
   The function returns the decayed learning rate.  It is computed as:
 
   ```python
-  decayed_learning_rate = learning_rate / (1 + decay_rate * t)
+  decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
+  ```
+
+  or, if `staircase` is `True`, as:
+
+  ```python
+  decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
   ```
 
   Example: decay 1/t with a rate of 0.5:
@@ -371,8 +377,9 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
   ...
   global_step = tf.Variable(0, trainable=False)
   learning_rate = 0.1
-  k = 0.5
-  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k)
+  decay_steps = 1.0
+  decay_rate = 0.5
+  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate)
 
   # Passing global_step to minimize() will increment it at each step.
   learning_step = (
-- 
GitLab


From 0fee74eadea48baec80a979763eb19096d148026 Mon Sep 17 00:00:00 2001
From: Robin Richtsfeld <robin.richtsfeld@gmail.com>
Date: Sun, 3 Dec 2017 15:30:15 +0100
Subject: [PATCH 0559/1225] [CMake] Extract list of python modules (#14877)

* [CMake] Extract list of python modules

* [CMake] Extract lists of python protos

* [CMake] Add missing python protos

* [CMake] Prune out test code from PIP package

* [CMake] Add missing python modules
---
 tensorflow/contrib/cmake/python_modules.txt   | 447 +++++++++++++++
 tensorflow/contrib/cmake/python_protos.txt    |  19 +
 tensorflow/contrib/cmake/python_protos_cc.txt |   5 +
 tensorflow/contrib/cmake/tf_python.cmake      | 522 ++----------------
 4 files changed, 505 insertions(+), 488 deletions(-)
 create mode 100644 tensorflow/contrib/cmake/python_modules.txt
 create mode 100644 tensorflow/contrib/cmake/python_protos.txt
 create mode 100644 tensorflow/contrib/cmake/python_protos_cc.txt

diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
new file mode 100644
index 0000000000..28710e3ab8
--- /dev/null
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -0,0 +1,447 @@
+tensorflow
+tensorflow/core
+tensorflow/core/example
+tensorflow/core/framework
+tensorflow/core/lib
+tensorflow/core/lib/core
+tensorflow/core/protobuf
+tensorflow/core/util
+tensorflow/examples
+tensorflow/examples/tutorials
+tensorflow/examples/tutorials/mnist
+tensorflow/python
+tensorflow/python/client
+tensorflow/python/data
+tensorflow/python/data/ops
+tensorflow/python/data/util
+tensorflow/python/debug
+tensorflow/python/debug/cli
+tensorflow/python/debug/examples
+tensorflow/python/debug/lib
+tensorflow/python/debug/wrappers
+tensorflow/python/eager
+tensorflow/python/estimator
+tensorflow/python/estimator/canned
+tensorflow/python/estimator/export
+tensorflow/python/estimator/inputs
+tensorflow/python/estimator/inputs/queues
+tensorflow/python/feature_column
+tensorflow/python/framework
+tensorflow/python/grappler
+tensorflow/python/keras
+tensorflow/python/keras/activations
+tensorflow/python/keras/applications
+tensorflow/python/keras/applications/inception_resnet_v2
+tensorflow/python/keras/applications/inception_v3
+tensorflow/python/keras/applications/mobilenet
+tensorflow/python/keras/applications/resnet50
+tensorflow/python/keras/applications/vgg16
+tensorflow/python/keras/applications/vgg19
+tensorflow/python/keras/applications/xception
+tensorflow/python/keras/backend
+tensorflow/python/keras/callbacks
+tensorflow/python/keras/constraints
+tensorflow/python/keras/datasets
+tensorflow/python/keras/datasets/boston_housing
+tensorflow/python/keras/datasets/cifar10
+tensorflow/python/keras/datasets/cifar100
+tensorflow/python/keras/datasets/fashion_mnist
+tensorflow/python/keras/datasets/imdb
+tensorflow/python/keras/datasets/mnist
+tensorflow/python/keras/datasets/reuters
+tensorflow/python/keras/estimator
+tensorflow/python/keras/initializers
+tensorflow/python/keras/layers
+tensorflow/python/keras/losses
+tensorflow/python/keras/metrics
+tensorflow/python/keras/models
+tensorflow/python/keras/optimizers
+tensorflow/python/keras/preprocessing
+tensorflow/python/keras/preprocessing/image
+tensorflow/python/keras/preprocessing/sequence
+tensorflow/python/keras/preprocessing/text
+tensorflow/python/keras/regularizers
+tensorflow/python/keras/utils
+tensorflow/python/keras/wrappers
+tensorflow/python/keras/wrappers/scikit_learn
+tensorflow/python/keras/_impl
+tensorflow/python/keras/_impl/keras
+tensorflow/python/keras/_impl/keras/applications
+tensorflow/python/keras/_impl/keras/datasets
+tensorflow/python/keras/_impl/keras/engine
+tensorflow/python/keras/_impl/keras/layers
+tensorflow/python/keras/_impl/keras/preprocessing
+tensorflow/python/keras/_impl/keras/utils
+tensorflow/python/keras/_impl/keras/wrappers
+tensorflow/python/kernel_tests
+tensorflow/python/kernel_tests/distributions
+tensorflow/python/kernel_tests/linalg
+tensorflow/python/kernel_tests/random
+tensorflow/python/layers
+tensorflow/python/lib
+tensorflow/python/lib/core
+tensorflow/python/lib/io
+tensorflow/python/ops
+tensorflow/python/ops/distributions
+tensorflow/python/ops/linalg
+tensorflow/python/ops/losses
+tensorflow/python/platform
+tensorflow/python/platform/default
+tensorflow/python/platform/summary
+tensorflow/python/profiler/
+tensorflow/python/profiler/internal
+tensorflow/python/saved_model
+tensorflow/python/summary
+tensorflow/python/summary/writer
+tensorflow/python/tools
+tensorflow/python/training
+tensorflow/python/user_ops
+tensorflow/python/util
+tensorflow/python/util/protobuf
+tensorflow/tools
+tensorflow/tools/graph_transforms
+tensorflow/contrib
+tensorflow/contrib/all_reduce
+tensorflow/contrib/all_reduce/python
+tensorflow/contrib/android
+tensorflow/contrib/android/java
+tensorflow/contrib/android/java/org
+tensorflow/contrib/android/java/org/tensorflow
+tensorflow/contrib/android/java/org/tensorflow/contrib
+tensorflow/contrib/android/java/org/tensorflow/contrib/android
+tensorflow/contrib/android/jni
+tensorflow/contrib/batching
+tensorflow/contrib/batching/kernels
+tensorflow/contrib/batching/python
+tensorflow/contrib/batching/python/ops
+tensorflow/contrib/bayesflow
+tensorflow/contrib/bayesflow/examples
+tensorflow/contrib/bayesflow/examples/reinforce_simple
+tensorflow/contrib/bayesflow/python
+tensorflow/contrib/bayesflow/python/ops
+tensorflow/contrib/boosted_trees
+tensorflow/contrib/boosted_trees/estimator_batch
+tensorflow/contrib/boosted_trees/kernels
+tensorflow/contrib/boosted_trees/ops
+tensorflow/contrib/boosted_trees/proto
+tensorflow/contrib/boosted_trees/python
+tensorflow/contrib/boosted_trees/python/ops
+tensorflow/contrib/cloud
+tensorflow/contrib/cloud/kernels
+tensorflow/contrib/cloud/ops
+tensorflow/contrib/cloud/python
+tensorflow/contrib/cloud/python/ops
+tensorflow/contrib/cluster_resolver
+tensorflow/contrib/cluster_resolver/python
+tensorflow/contrib/cluster_resolver/python/training
+tensorflow/contrib/compiler
+tensorflow/contrib/copy_graph
+tensorflow/contrib/copy_graph/python
+tensorflow/contrib/copy_graph/python/util
+tensorflow/contrib/crf
+tensorflow/contrib/crf/python
+tensorflow/contrib/crf/python/ops
+tensorflow/contrib/cudnn_rnn
+tensorflow/contrib/cudnn_rnn/kernels
+tensorflow/contrib/cudnn_rnn/ops
+tensorflow/contrib/cudnn_rnn/python
+tensorflow/contrib/cudnn_rnn/python/layers
+tensorflow/contrib/cudnn_rnn/python/ops
+tensorflow/contrib/data
+tensorflow/contrib/data/kernels
+tensorflow/contrib/data/python
+tensorflow/contrib/data/python/ops
+tensorflow/contrib/decision_trees
+tensorflow/contrib/decision_trees/proto
+tensorflow/contrib/deprecated
+tensorflow/contrib/distributions
+tensorflow/contrib/distributions/python
+tensorflow/contrib/distributions/python/ops
+tensorflow/contrib/distributions/python/ops/bijectors
+tensorflow/contrib/eager
+tensorflow/contrib/eager/python
+tensorflow/contrib/estimator
+tensorflow/contrib/estimator/python
+tensorflow/contrib/estimator/python/estimator
+tensorflow/contrib/factorization
+tensorflow/contrib/factorization/examples
+tensorflow/contrib/factorization/kernels
+tensorflow/contrib/factorization/ops
+tensorflow/contrib/factorization/python
+tensorflow/contrib/factorization/python/ops
+tensorflow/contrib/ffmpeg
+tensorflow/contrib/ffmpeg/default
+tensorflow/contrib/framework
+tensorflow/contrib/framework/kernels
+tensorflow/contrib/framework/ops
+tensorflow/contrib/framework/python
+tensorflow/contrib/framework/python/framework
+tensorflow/contrib/framework/python/ops
+tensorflow/contrib/fused_conv
+tensorflow/contrib/fused_conv/kernels
+tensorflow/contrib/fused_conv/python
+tensorflow/contrib/fused_conv/python/ops
+tensorflow/contrib/gan
+tensorflow/contrib/gan/python
+tensorflow/contrib/gan/python/estimator
+tensorflow/contrib/gan/python/estimator/python
+tensorflow/contrib/gan/python/eval
+tensorflow/contrib/gan/python/eval/python
+tensorflow/contrib/gan/python/features
+tensorflow/contrib/gan/python/features/python
+tensorflow/contrib/gan/python/losses
+tensorflow/contrib/gan/python/losses/python
+tensorflow/contrib/graph_editor
+tensorflow/contrib/graph_editor/examples
+tensorflow/contrib/grid_rnn
+tensorflow/contrib/grid_rnn/python
+tensorflow/contrib/grid_rnn/python/ops
+tensorflow/contrib/hooks
+tensorflow/contrib/hooks/python
+tensorflow/contrib/image
+tensorflow/contrib/image/kernels
+tensorflow/contrib/image/ops
+tensorflow/contrib/image/python
+tensorflow/contrib/image/python/ops
+tensorflow/contrib/input_pipeline
+tensorflow/contrib/input_pipeline/kernels
+tensorflow/contrib/input_pipeline/ops
+tensorflow/contrib/input_pipeline/python
+tensorflow/contrib/input_pipeline/python/ops
+tensorflow/contrib/integrate
+tensorflow/contrib/integrate/python
+tensorflow/contrib/integrate/python/ops
+tensorflow/contrib/ios_examples
+tensorflow/contrib/ios_examples/benchmark
+tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj
+tensorflow/contrib/ios_examples/benchmark/data
+tensorflow/contrib/ios_examples/camera
+tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj
+tensorflow/contrib/ios_examples/camera/en.lproj
+tensorflow/contrib/ios_examples/simple
+tensorflow/contrib/ios_examples/simple/data
+tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj
+tensorflow/contrib/keras
+tensorflow/contrib/keras/api
+tensorflow/contrib/keras/api/keras
+tensorflow/contrib/keras/api/keras/activations
+tensorflow/contrib/keras/api/keras/applications
+tensorflow/contrib/keras/api/keras/applications/inception_v3
+tensorflow/contrib/keras/api/keras/applications/mobilenet
+tensorflow/contrib/keras/api/keras/applications/resnet50
+tensorflow/contrib/keras/api/keras/applications/vgg16
+tensorflow/contrib/keras/api/keras/applications/vgg19
+tensorflow/contrib/keras/api/keras/applications/xception
+tensorflow/contrib/keras/api/keras/backend
+tensorflow/contrib/keras/api/keras/callbacks
+tensorflow/contrib/keras/api/keras/constraints
+tensorflow/contrib/keras/api/keras/datasets
+tensorflow/contrib/keras/api/keras/datasets/boston_housing
+tensorflow/contrib/keras/api/keras/datasets/cifar10
+tensorflow/contrib/keras/api/keras/datasets/cifar100
+tensorflow/contrib/keras/api/keras/datasets/imdb
+tensorflow/contrib/keras/api/keras/datasets/mnist
+tensorflow/contrib/keras/api/keras/datasets/reuters
+tensorflow/contrib/keras/api/keras/initializers
+tensorflow/contrib/keras/api/keras/layers
+tensorflow/contrib/keras/api/keras/losses
+tensorflow/contrib/keras/api/keras/metrics
+tensorflow/contrib/keras/api/keras/models
+tensorflow/contrib/keras/api/keras/optimizers
+tensorflow/contrib/keras/api/keras/preprocessing
+tensorflow/contrib/keras/api/keras/preprocessing/image
+tensorflow/contrib/keras/api/keras/preprocessing/sequence
+tensorflow/contrib/keras/api/keras/preprocessing/text
+tensorflow/contrib/keras/api/keras/regularizers
+tensorflow/contrib/keras/api/keras/utils
+tensorflow/contrib/keras/api/keras/wrappers
+tensorflow/contrib/keras/api/keras/wrappers/scikit_learn
+tensorflow/contrib/kernel_methods
+tensorflow/contrib/kernel_methods/python
+tensorflow/contrib/kernel_methods/python/mappers
+tensorflow/contrib/kfac
+tensorflow/contrib/kfac/examples
+tensorflow/contrib/kfac/python
+tensorflow/contrib/kfac/python/ops
+tensorflow/contrib/labeled_tensor
+tensorflow/contrib/labeled_tensor/python
+tensorflow/contrib/labeled_tensor/python/ops
+tensorflow/contrib/layers
+tensorflow/contrib/layers/kernels
+tensorflow/contrib/layers/ops
+tensorflow/contrib/layers/python
+tensorflow/contrib/layers/python/layers
+tensorflow/contrib/layers/python/ops
+tensorflow/contrib/learn
+tensorflow/contrib/learn/python
+tensorflow/contrib/learn/python/learn
+tensorflow/contrib/learn/python/learn/dataframe
+tensorflow/contrib/learn/python/learn/dataframe/queues
+tensorflow/contrib/learn/python/learn/dataframe/transforms
+tensorflow/contrib/learn/python/learn/datasets
+tensorflow/contrib/learn/python/learn/datasets/data
+tensorflow/contrib/learn/python/learn/estimators
+tensorflow/contrib/learn/python/learn/learn_io
+tensorflow/contrib/learn/python/learn/ops
+tensorflow/contrib/learn/python/learn/preprocessing
+tensorflow/contrib/learn/python/learn/utils
+tensorflow/contrib/legacy_seq2seq
+tensorflow/contrib/legacy_seq2seq/python
+tensorflow/contrib/legacy_seq2seq/python/ops
+tensorflow/contrib/linalg
+tensorflow/contrib/linalg/python
+tensorflow/contrib/linalg/python/ops
+tensorflow/contrib/linear_optimizer
+tensorflow/contrib/linear_optimizer/kernels
+tensorflow/contrib/linear_optimizer/kernels/g3doc
+tensorflow/contrib/linear_optimizer/python
+tensorflow/contrib/linear_optimizer/python/ops
+tensorflow/contrib/lookup
+tensorflow/contrib/losses
+tensorflow/contrib/losses/python
+tensorflow/contrib/losses/python/losses
+tensorflow/contrib/losses/python/metric_learning
+tensorflow/contrib/makefile
+tensorflow/contrib/memory_stats
+tensorflow/contrib/memory_stats/kernels
+tensorflow/contrib/memory_stats/ops
+tensorflow/contrib/memory_stats/python
+tensorflow/contrib/memory_stats/python/ops
+tensorflow/contrib/meta_graph_transform
+tensorflow/contrib/metrics
+tensorflow/contrib/metrics/ops
+tensorflow/contrib/metrics/python
+tensorflow/contrib/metrics/python/metrics
+tensorflow/contrib/metrics/python/ops
+tensorflow/contrib/model_pruning
+tensorflow/contrib/model_pruning/examples
+tensorflow/contrib/model_pruning/examples/cifar10
+tensorflow/contrib/model_pruning/python
+tensorflow/contrib/model_pruning/python/layers
+tensorflow/contrib/nccl
+tensorflow/contrib/nccl/kernels
+tensorflow/contrib/nccl/ops
+tensorflow/contrib/nccl/python
+tensorflow/contrib/nccl/python/ops
+tensorflow/contrib/ndlstm
+tensorflow/contrib/ndlstm/python
+tensorflow/contrib/nearest_neighbor/kernels
+tensorflow/contrib/nearest_neighbor/ops
+tensorflow/contrib/nearest_neighbor/python
+tensorflow/contrib/nearest_neighbor/python/ops
+tensorflow/contrib/nn
+tensorflow/contrib/nn/python
+tensorflow/contrib/nn/python/ops
+tensorflow/contrib/opt
+tensorflow/contrib/opt/python
+tensorflow/contrib/opt/python/training
+tensorflow/contrib/pi_examples
+tensorflow/contrib/pi_examples/camera
+tensorflow/contrib/pi_examples/label_image
+tensorflow/contrib/pi_examples/label_image/data
+tensorflow/contrib/periodic_resample
+tensorflow/contrib/periodic_resample/python
+tensorflow/contrib/periodic_resample/python/kernels
+tensorflow/contrib/periodic_resample/python/ops
+tensorflow/contrib/predictor
+tensorflow/contrib/quantization
+tensorflow/contrib/quantization/python
+tensorflow/contrib/quantize
+tensorflow/contrib/quantize/python
+tensorflow/contrib/receptive_field
+tensorflow/contrib/receptive_field/python
+tensorflow/contrib/reduce_slice_ops
+tensorflow/contrib/reduce_slice_ops/kernels
+tensorflow/contrib/reduce_slice_ops/ops
+tensorflow/contrib/reduce_slice_ops/python
+tensorflow/contrib/reduce_slice_ops/python/ops
+tensorflow/contrib/remote_fused_graph/pylib
+tensorflow/contrib/remote_fused_graph/pylib/python
+tensorflow/contrib/remote_fused_graph/pylib/python/ops
+tensorflow/contrib/resampler
+tensorflow/contrib/resampler/kernels
+tensorflow/contrib/resampler/ops
+tensorflow/contrib/resampler/python
+tensorflow/contrib/resampler/python/ops
+tensorflow/contrib/rnn
+tensorflow/contrib/rnn/kernels
+tensorflow/contrib/rnn/ops
+tensorflow/contrib/rnn/python
+tensorflow/contrib/rnn/python/ops
+tensorflow/contrib/saved_model
+tensorflow/contrib/saved_model/python
+tensorflow/contrib/saved_model/python/saved_model
+tensorflow/contrib/seq2seq
+tensorflow/contrib/seq2seq/kernels
+tensorflow/contrib/seq2seq/ops
+tensorflow/contrib/seq2seq/python
+tensorflow/contrib/seq2seq/python/ops
+tensorflow/contrib/session_bundle
+tensorflow/contrib/session_bundle/example
+tensorflow/contrib/signal
+tensorflow/contrib/signal/python
+tensorflow/contrib/signal/python/ops
+tensorflow/contrib/slim
+tensorflow/contrib/slim/python
+tensorflow/contrib/slim/python/slim
+tensorflow/contrib/slim/python/slim/data
+tensorflow/contrib/slim/python/slim/nets
+tensorflow/contrib/solvers
+tensorflow/contrib/solvers/python
+tensorflow/contrib/solvers/python/ops
+tensorflow/contrib/sparsemax
+tensorflow/contrib/sparsemax/python
+tensorflow/contrib/sparsemax/python/ops
+tensorflow/contrib/specs
+tensorflow/contrib/specs/python
+tensorflow/contrib/staging
+tensorflow/contrib/stat_summarizer
+tensorflow/contrib/stat_summarizer/python
+tensorflow/contrib/stateless
+tensorflow/contrib/stateless/python
+tensorflow/contrib/summary
+tensorflow/contrib/tensorboard
+tensorflow/contrib/tensorboard/plugins
+tensorflow/contrib/tensorboard/plugins/projector
+tensorflow/contrib/tensor_forest
+tensorflow/contrib/tensor_forest/client
+tensorflow/contrib/tensor_forest/core
+tensorflow/contrib/tensor_forest/core/ops
+tensorflow/contrib/tensor_forest/data
+tensorflow/contrib/tensor_forest/hybrid
+tensorflow/contrib/tensor_forest/hybrid/core
+tensorflow/contrib/tensor_forest/hybrid/core/ops
+tensorflow/contrib/tensor_forest/hybrid/ops
+tensorflow/contrib/tensor_forest/hybrid/python
+tensorflow/contrib/tensor_forest/hybrid/python/layers
+tensorflow/contrib/tensor_forest/hybrid/python/models
+tensorflow/contrib/tensor_forest/hybrid/python/ops
+tensorflow/contrib/tensor_forest/kernels
+tensorflow/contrib/tensor_forest/python
+tensorflow/contrib/tensor_forest/python/ops
+tensorflow/contrib/testing
+tensorflow/contrib/testing/python
+tensorflow/contrib/testing/python/framework
+tensorflow/contrib/text
+tensorflow/contrib/text/kernels
+tensorflow/contrib/text/ops
+tensorflow/contrib/text/python
+tensorflow/contrib/text/python/ops
+tensorflow/contrib/tfprof
+tensorflow/contrib/timeseries
+tensorflow/contrib/timeseries/examples
+tensorflow/contrib/timeseries/examples/data
+tensorflow/contrib/timeseries/python
+tensorflow/contrib/timeseries/python/timeseries
+tensorflow/contrib/timeseries/python/timeseries/state_space_models
+tensorflow/contrib/tpu
+tensorflow/contrib/tpu/ops
+tensorflow/contrib/tpu/profiler
+tensorflow/contrib/tpu/python
+tensorflow/contrib/tpu/python/ops
+tensorflow/contrib/tpu/python/profiler
+tensorflow/contrib/tpu/python/tpu
+tensorflow/contrib/training
+tensorflow/contrib/training/python
+tensorflow/contrib/training/python/training
+tensorflow/contrib/util
diff --git a/tensorflow/contrib/cmake/python_protos.txt b/tensorflow/contrib/cmake/python_protos.txt
new file mode 100644
index 0000000000..8a9c406d8b
--- /dev/null
+++ b/tensorflow/contrib/cmake/python_protos.txt
@@ -0,0 +1,19 @@
+tensorflow/core
+tensorflow/core/profiler
+tensorflow/python
+tensorflow/contrib/boosted_trees/proto
+tensorflow/contrib/cloud/kernels
+tensorflow/contrib/decision_trees/proto
+tensorflow/contrib/gdr
+tensorflow/contrib/lite/toco
+tensorflow/contrib/mpi
+tensorflow/contrib/mpi_collectives
+tensorflow/contrib/session_bundle
+tensorflow/contrib/tensor_forest/proto
+tensorflow/contrib/tensorboard/graph_explorer/proto
+tensorflow/contrib/tensorboard/plugins/projector
+tensorflow/contrib/tensorboard/plugins/trace
+tensorflow/contrib/tpu/proto
+tensorflow/contrib/tpu/profiler
+tensorflow/contrib/training/python/training
+tensorflow/contrib/verbs
diff --git a/tensorflow/contrib/cmake/python_protos_cc.txt b/tensorflow/contrib/cmake/python_protos_cc.txt
new file mode 100644
index 0000000000..d4a257b25c
--- /dev/null
+++ b/tensorflow/contrib/cmake/python_protos_cc.txt
@@ -0,0 +1,5 @@
+tensorflow/core/profiler
+tensorflow/python
+tensorflow/contrib/session_bundle
+tensorflow/contrib/tensorboard
+tensorflow/contrib/training
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 800143e0db..9d54380f86 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -120,33 +120,34 @@ function(RELATIVE_PROTOBUF_GENERATE_CPP SRCS HDRS ROOT_DIR)
   set(${HDRS} ${${HDRS}} PARENT_SCOPE)
 endfunction()
 
-file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir}
-    "${tensorflow_source_dir}/tensorflow/core/*.proto"
-    "${tensorflow_source_dir}/tensorflow/core/profiler/*.proto"
-    "${tensorflow_source_dir}/tensorflow/python/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/decision_trees/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tpu/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tpu/profiler/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/training/*.proto"
-)
+FILE(READ python_protos.txt python_protos)
+# Convert file contents into a CMake list (where each element in the list is one line of the file)
+STRING(REGEX REPLACE ";" "\\\\;" python_protos "${python_protos}")
+STRING(REGEX REPLACE "\n" ";" python_protos "${python_protos}")
+
+foreach(python_proto ${python_protos})
+  file(GLOB_RECURSE tf_python_protos_src RELATIVE ${tensorflow_source_dir}
+      "${tensorflow_source_dir}/${python_proto}/*.proto"
+  )
+  list(APPEND tf_python_protos_srcs ${tf_python_protos_src})
+endforeach(python_proto)
+
 RELATIVE_PROTOBUF_GENERATE_PYTHON(
-    ${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_protos_python_srcs}
+    ${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_python_protos_srcs}
 )
 
-# NOTE(mrry): Avoid regenerating the tensorflow/core protos because this
-# can cause benign-but-failing-on-Windows-due-to-file-locking conflicts
-# when two rules attempt to generate the same file.
-file(GLOB_RECURSE tf_python_protos_cc_srcs RELATIVE ${tensorflow_source_dir}
-    "${tensorflow_source_dir}/tensorflow/core/profiler/*.proto"
-    "${tensorflow_source_dir}/tensorflow/python/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/training/*.proto"
-)
+FILE(READ python_protos_cc.txt python_protos_cc)
+# Convert file contents into a CMake list (where each element in the list is one line of the file)
+STRING(REGEX REPLACE ";" "\\\\;" python_protos_cc "${python_protos_cc}")
+STRING(REGEX REPLACE "\n" ";" python_protos_cc "${python_protos_cc}")
+
+foreach(python_proto_cc ${python_protos_cc})
+  file(GLOB_RECURSE tf_python_protos_cc_src RELATIVE ${tensorflow_source_dir}
+      "${tensorflow_source_dir}/${python_proto_cc}/*.proto"
+  )
+  list(APPEND tf_python_protos_cc_srcs ${tf_python_protos_cc_src})
+endforeach(python_proto_cc)
+
 RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS
     ${tensorflow_source_dir} ${tf_python_protos_cc_srcs}
 )
@@ -192,315 +193,15 @@ function(add_python_module MODULE_NAME)
     endif()
 endfunction()
 
-add_python_module("tensorflow")
-add_python_module("tensorflow/core")
-add_python_module("tensorflow/core/example")
-add_python_module("tensorflow/core/framework")
-add_python_module("tensorflow/core/lib")
-add_python_module("tensorflow/core/lib/core")
-add_python_module("tensorflow/core/protobuf")
-add_python_module("tensorflow/core/util")
-add_python_module("tensorflow/examples")
-add_python_module("tensorflow/examples/tutorials")
-add_python_module("tensorflow/examples/tutorials/mnist")
-add_python_module("tensorflow/python")
-add_python_module("tensorflow/python/client")
-add_python_module("tensorflow/python/data")
-add_python_module("tensorflow/python/data/ops")
-add_python_module("tensorflow/python/data/util")
-add_python_module("tensorflow/python/debug")
-add_python_module("tensorflow/python/debug/cli")
-add_python_module("tensorflow/python/debug/examples")
-add_python_module("tensorflow/python/debug/lib")
-add_python_module("tensorflow/python/debug/wrappers")
-add_python_module("tensorflow/python/eager")
-add_python_module("tensorflow/python/estimator")
-add_python_module("tensorflow/python/estimator/canned")
-add_python_module("tensorflow/python/estimator/export")
-add_python_module("tensorflow/python/estimator/inputs")
-add_python_module("tensorflow/python/estimator/inputs/queues")
-add_python_module("tensorflow/python/feature_column")
-add_python_module("tensorflow/python/framework")
-add_python_module("tensorflow/python/grappler")
-add_python_module("tensorflow/python/keras")
-add_python_module("tensorflow/python/keras/activations")
-add_python_module("tensorflow/python/keras/applications")
-add_python_module("tensorflow/python/keras/applications/inception_resnet_v2")
-add_python_module("tensorflow/python/keras/applications/inception_v3")
-add_python_module("tensorflow/python/keras/applications/mobilenet")
-add_python_module("tensorflow/python/keras/applications/resnet50")
-add_python_module("tensorflow/python/keras/applications/vgg16")
-add_python_module("tensorflow/python/keras/applications/vgg19")
-add_python_module("tensorflow/python/keras/applications/xception")
-add_python_module("tensorflow/python/keras/backend")
-add_python_module("tensorflow/python/keras/callbacks")
-add_python_module("tensorflow/python/keras/constraints")
-add_python_module("tensorflow/python/keras/datasets")
-add_python_module("tensorflow/python/keras/datasets/boston_housing")
-add_python_module("tensorflow/python/keras/datasets/cifar10")
-add_python_module("tensorflow/python/keras/datasets/cifar100")
-add_python_module("tensorflow/python/keras/datasets/fashion_mnist")
-add_python_module("tensorflow/python/keras/datasets/imdb")
-add_python_module("tensorflow/python/keras/datasets/mnist")
-add_python_module("tensorflow/python/keras/datasets/reuters")
-add_python_module("tensorflow/python/keras/estimator")
-add_python_module("tensorflow/python/keras/initializers")
-add_python_module("tensorflow/python/keras/layers")
-add_python_module("tensorflow/python/keras/losses")
-add_python_module("tensorflow/python/keras/metrics")
-add_python_module("tensorflow/python/keras/models")
-add_python_module("tensorflow/python/keras/optimizers")
-add_python_module("tensorflow/python/keras/preprocessing")
-add_python_module("tensorflow/python/keras/preprocessing/image")
-add_python_module("tensorflow/python/keras/preprocessing/sequence")
-add_python_module("tensorflow/python/keras/preprocessing/text")
-add_python_module("tensorflow/python/keras/regularizers")
-add_python_module("tensorflow/python/keras/utils")
-add_python_module("tensorflow/python/keras/wrappers")
-add_python_module("tensorflow/python/keras/wrappers/scikit_learn")
-add_python_module("tensorflow/python/keras/_impl")
-add_python_module("tensorflow/python/keras/_impl/keras")
-add_python_module("tensorflow/python/keras/_impl/keras/applications")
-add_python_module("tensorflow/python/keras/_impl/keras/datasets")
-add_python_module("tensorflow/python/keras/_impl/keras/engine")
-add_python_module("tensorflow/python/keras/_impl/keras/layers")
-add_python_module("tensorflow/python/keras/_impl/keras/preprocessing")
-add_python_module("tensorflow/python/keras/_impl/keras/utils")
-add_python_module("tensorflow/python/keras/_impl/keras/wrappers")
-add_python_module("tensorflow/python/kernel_tests")
-add_python_module("tensorflow/python/kernel_tests/distributions")
-add_python_module("tensorflow/python/kernel_tests/linalg")
-add_python_module("tensorflow/python/layers")
-add_python_module("tensorflow/python/lib")
-add_python_module("tensorflow/python/lib/core")
-add_python_module("tensorflow/python/lib/io")
-add_python_module("tensorflow/python/ops")
-add_python_module("tensorflow/python/ops/distributions")
-add_python_module("tensorflow/python/ops/linalg")
-add_python_module("tensorflow/python/ops/losses")
-add_python_module("tensorflow/python/platform")
-add_python_module("tensorflow/python/platform/default")
-add_python_module("tensorflow/python/platform/summary")
-add_python_module("tensorflow/python/profiler/")
-add_python_module("tensorflow/python/profiler/internal")
-add_python_module("tensorflow/python/saved_model")
-add_python_module("tensorflow/python/summary")
-add_python_module("tensorflow/python/summary/writer")
-add_python_module("tensorflow/python/tools")
-add_python_module("tensorflow/python/training")
-add_python_module("tensorflow/python/user_ops")
-add_python_module("tensorflow/python/util")
-add_python_module("tensorflow/python/util/protobuf")
-add_python_module("tensorflow/tools")
-add_python_module("tensorflow/tools/graph_transforms")
-add_python_module("tensorflow/contrib")
-add_python_module("tensorflow/contrib/all_reduce")
-add_python_module("tensorflow/contrib/all_reduce/python")
-add_python_module("tensorflow/contrib/android")
-add_python_module("tensorflow/contrib/android/java")
-add_python_module("tensorflow/contrib/android/java/org")
-add_python_module("tensorflow/contrib/android/java/org/tensorflow")
-add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib")
-add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib/android")
-add_python_module("tensorflow/contrib/android/jni")
-add_python_module("tensorflow/contrib/bayesflow")
-add_python_module("tensorflow/contrib/bayesflow/examples")
-add_python_module("tensorflow/contrib/bayesflow/examples/reinforce_simple")
-add_python_module("tensorflow/contrib/bayesflow/python")
-add_python_module("tensorflow/contrib/bayesflow/python/kernel_tests")
-add_python_module("tensorflow/contrib/bayesflow/python/ops")
-add_python_module("tensorflow/contrib/boosted_trees")
-add_python_module("tensorflow/contrib/boosted_trees/estimator_batch")
-add_python_module("tensorflow/contrib/boosted_trees/ops")
-add_python_module("tensorflow/contrib/boosted_trees/proto")
-add_python_module("tensorflow/contrib/boosted_trees/python")
-add_python_module("tensorflow/contrib/boosted_trees/python/kernel_tests")
-add_python_module("tensorflow/contrib/boosted_trees/python/ops")
-add_python_module("tensorflow/contrib/cloud")
-add_python_module("tensorflow/contrib/cloud/kernels")
-add_python_module("tensorflow/contrib/cloud/ops")
-add_python_module("tensorflow/contrib/cloud/python")
-add_python_module("tensorflow/contrib/cloud/python/ops")
-add_python_module("tensorflow/contrib/cluster_resolver")
-add_python_module("tensorflow/contrib/cluster_resolver/python")
-add_python_module("tensorflow/contrib/cluster_resolver/python/training")
-add_python_module("tensorflow/contrib/compiler")
-add_python_module("tensorflow/contrib/copy_graph")
-add_python_module("tensorflow/contrib/copy_graph/python")
-add_python_module("tensorflow/contrib/copy_graph/python/util")
-add_python_module("tensorflow/contrib/crf")
-add_python_module("tensorflow/contrib/crf/python")
-add_python_module("tensorflow/contrib/crf/python/kernel_tests")
-add_python_module("tensorflow/contrib/crf/python/ops")
-add_python_module("tensorflow/contrib/cudnn_rnn")
-add_python_module("tensorflow/contrib/cudnn_rnn/kernels")
-add_python_module("tensorflow/contrib/cudnn_rnn/ops")
-add_python_module("tensorflow/contrib/cudnn_rnn/python")
-add_python_module("tensorflow/contrib/cudnn_rnn/python/kernel_tests")
-add_python_module("tensorflow/contrib/cudnn_rnn/python/layers")
-add_python_module("tensorflow/contrib/cudnn_rnn/python/ops")
-add_python_module("tensorflow/contrib/data")
-add_python_module("tensorflow/contrib/data/python")
-add_python_module("tensorflow/contrib/data/python/kernel_tests")
-add_python_module("tensorflow/contrib/data/python/ops")
-add_python_module("tensorflow/contrib/decision_trees")
-add_python_module("tensorflow/contrib/decision_trees/proto")
-add_python_module("tensorflow/contrib/deprecated")
-add_python_module("tensorflow/contrib/distributions")
-add_python_module("tensorflow/contrib/distributions/python")
-add_python_module("tensorflow/contrib/distributions/python/kernel_tests")
-add_python_module("tensorflow/contrib/distributions/python/ops")
-add_python_module("tensorflow/contrib/distributions/python/ops/bijectors")
-add_python_module("tensorflow/contrib/eager")
-add_python_module("tensorflow/contrib/eager/python")
-add_python_module("tensorflow/contrib/estimator")
-add_python_module("tensorflow/contrib/estimator/python")
-add_python_module("tensorflow/contrib/estimator/python/estimator")
-add_python_module("tensorflow/contrib/factorization")
-add_python_module("tensorflow/contrib/factorization/examples")
-add_python_module("tensorflow/contrib/factorization/kernels")
-add_python_module("tensorflow/contrib/factorization/ops")
-add_python_module("tensorflow/contrib/factorization/python")
-add_python_module("tensorflow/contrib/factorization/python/kernel_tests")
-add_python_module("tensorflow/contrib/factorization/python/ops")
-add_python_module("tensorflow/contrib/ffmpeg")
-add_python_module("tensorflow/contrib/ffmpeg/default")
-add_python_module("tensorflow/contrib/ffmpeg/testdata")
-add_python_module("tensorflow/contrib/framework")
-add_python_module("tensorflow/contrib/framework/kernels")
-add_python_module("tensorflow/contrib/framework/ops")
-add_python_module("tensorflow/contrib/framework/python")
-add_python_module("tensorflow/contrib/framework/python/framework")
-add_python_module("tensorflow/contrib/framework/python/ops")
-add_python_module("tensorflow/contrib/gan")
-add_python_module("tensorflow/contrib/gan/python")
-add_python_module("tensorflow/contrib/gan/python/eval")
-add_python_module("tensorflow/contrib/gan/python/eval/python")
-add_python_module("tensorflow/contrib/gan/python/features")
-add_python_module("tensorflow/contrib/gan/python/features/python")
-add_python_module("tensorflow/contrib/gan/python/estimator")
-add_python_module("tensorflow/contrib/gan/python/estimator/python")
-add_python_module("tensorflow/contrib/gan/python/losses")
-add_python_module("tensorflow/contrib/gan/python/losses/python")
-add_python_module("tensorflow/contrib/graph_editor")
-add_python_module("tensorflow/contrib/graph_editor/examples")
-add_python_module("tensorflow/contrib/graph_editor/tests")
-add_python_module("tensorflow/contrib/grid_rnn")
-add_python_module("tensorflow/contrib/grid_rnn/python")
-add_python_module("tensorflow/contrib/grid_rnn/python/kernel_tests")
-add_python_module("tensorflow/contrib/grid_rnn/python/ops")
-add_python_module("tensorflow/contrib/hooks")
-add_python_module("tensorflow/contrib/image")
-add_python_module("tensorflow/contrib/image/ops")
-add_python_module("tensorflow/contrib/image/python")
-add_python_module("tensorflow/contrib/image/python/ops")
-add_python_module("tensorflow/contrib/input_pipeline")
-add_python_module("tensorflow/contrib/input_pipeline/ops")
-add_python_module("tensorflow/contrib/input_pipeline/python")
-add_python_module("tensorflow/contrib/input_pipeline/python/ops")
-add_python_module("tensorflow/contrib/integrate")
-add_python_module("tensorflow/contrib/integrate/python")
-add_python_module("tensorflow/contrib/integrate/python/ops")
-add_python_module("tensorflow/contrib/ios_examples")
-add_python_module("tensorflow/contrib/ios_examples/benchmark")
-add_python_module("tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj")
-add_python_module("tensorflow/contrib/ios_examples/benchmark/data")
-add_python_module("tensorflow/contrib/ios_examples/camera")
-add_python_module("tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj")
-add_python_module("tensorflow/contrib/ios_examples/camera/en.lproj")
-add_python_module("tensorflow/contrib/ios_examples/simple")
-add_python_module("tensorflow/contrib/ios_examples/simple/data")
-add_python_module("tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj")
-add_python_module("tensorflow/contrib/keras")
-add_python_module("tensorflow/contrib/keras/api")
-add_python_module("tensorflow/contrib/keras/api/keras")
-add_python_module("tensorflow/contrib/keras/api/keras/activations")
-add_python_module("tensorflow/contrib/keras/api/keras/applications")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/inception_v3")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/mobilenet")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/resnet50")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/vgg16")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/vgg19")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/xception")
-add_python_module("tensorflow/contrib/keras/api/keras/backend")
-add_python_module("tensorflow/contrib/keras/api/keras/callbacks")
-add_python_module("tensorflow/contrib/keras/api/keras/constraints")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/boston_housing")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/cifar10")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/cifar100")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/imdb")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/mnist")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/reuters")
-add_python_module("tensorflow/contrib/keras/api/keras/initializers")
-add_python_module("tensorflow/contrib/keras/api/keras/layers")
-add_python_module("tensorflow/contrib/keras/api/keras/losses")
-add_python_module("tensorflow/contrib/keras/api/keras/metrics")
-add_python_module("tensorflow/contrib/keras/api/keras/models")
-add_python_module("tensorflow/contrib/keras/api/keras/optimizers")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing/image")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing/sequence")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing/text")
-add_python_module("tensorflow/contrib/keras/api/keras/regularizers")
-add_python_module("tensorflow/contrib/keras/api/keras/utils")
-add_python_module("tensorflow/contrib/keras/api/keras/wrappers")
-add_python_module("tensorflow/contrib/keras/api/keras/wrappers/scikit_learn")
-add_python_module("tensorflow/contrib/keras/python")
-add_python_module("tensorflow/contrib/keras/python/keras")
-add_python_module("tensorflow/contrib/keras/python/keras/applications")
-add_python_module("tensorflow/contrib/keras/python/keras/datasets")
-add_python_module("tensorflow/contrib/keras/python/keras/engine")
-add_python_module("tensorflow/contrib/keras/python/keras/layers")
-add_python_module("tensorflow/contrib/keras/python/keras/preprocessing")
-add_python_module("tensorflow/contrib/keras/python/keras/utils")
-add_python_module("tensorflow/contrib/keras/python/keras/wrappers")
-add_python_module("tensorflow/contrib/kernel_methods")
-add_python_module("tensorflow/contrib/kernel_methods/python")
-add_python_module("tensorflow/contrib/kernel_methods/python/mappers")
-add_python_module("tensorflow/contrib/kfac")
-add_python_module("tensorflow/contrib/kfac/examples")
-add_python_module("tensorflow/contrib/kfac/python")
-add_python_module("tensorflow/contrib/kfac/python/ops")
-add_python_module("tensorflow/contrib/labeled_tensor")
-add_python_module("tensorflow/contrib/labeled_tensor/python")
-add_python_module("tensorflow/contrib/labeled_tensor/python/ops")
-add_python_module("tensorflow/contrib/layers")
-add_python_module("tensorflow/contrib/layers/kernels")
-add_python_module("tensorflow/contrib/layers/ops")
-add_python_module("tensorflow/contrib/layers/python")
-add_python_module("tensorflow/contrib/layers/python/kernel_tests")
-add_python_module("tensorflow/contrib/layers/python/layers")
-add_python_module("tensorflow/contrib/layers/python/ops")
-add_python_module("tensorflow/contrib/learn")
-add_python_module("tensorflow/contrib/learn/python")
-add_python_module("tensorflow/contrib/learn/python/learn")
-add_python_module("tensorflow/contrib/learn/python/learn/dataframe")
-add_python_module("tensorflow/contrib/learn/python/learn/dataframe/queues")
-add_python_module("tensorflow/contrib/learn/python/learn/dataframe/transforms")
-add_python_module("tensorflow/contrib/learn/python/learn/datasets")
-add_python_module("tensorflow/contrib/learn/python/learn/datasets/data")
-add_python_module("tensorflow/contrib/learn/python/learn/estimators")
-add_python_module("tensorflow/contrib/learn/python/learn/learn_io")
-add_python_module("tensorflow/contrib/learn/python/learn/ops")
-add_python_module("tensorflow/contrib/learn/python/learn/preprocessing")
-add_python_module("tensorflow/contrib/learn/python/learn/preprocessing/tests")
-add_python_module("tensorflow/contrib/learn/python/learn/tests")
-add_python_module("tensorflow/contrib/learn/python/learn/tests/dataframe")
-add_python_module("tensorflow/contrib/learn/python/learn/utils")
-add_python_module("tensorflow/contrib/legacy_seq2seq")
-add_python_module("tensorflow/contrib/legacy_seq2seq/python")
-add_python_module("tensorflow/contrib/legacy_seq2seq/python/ops")
-add_python_module("tensorflow/contrib/linalg")
-add_python_module("tensorflow/contrib/linalg/python")
-add_python_module("tensorflow/contrib/linalg/python/ops")
-add_python_module("tensorflow/contrib/linalg/python/kernel_tests")
-add_python_module("tensorflow/contrib/linear_optimizer")
-add_python_module("tensorflow/contrib/linear_optimizer/kernels")
-add_python_module("tensorflow/contrib/linear_optimizer/kernels/g3doc")
-add_python_module("tensorflow/contrib/linear_optimizer/python")
-add_python_module("tensorflow/contrib/linear_optimizer/python/kernel_tests")
-add_python_module("tensorflow/contrib/linear_optimizer/python/ops")
+FILE(READ python_modules.txt python_modules)
+# Convert file contents into a CMake list (where each element in the list is one line of the file)
+STRING(REGEX REPLACE ";" "\\\\;" python_modules "${python_modules}")
+STRING(REGEX REPLACE "\n" ";" python_modules "${python_modules}")
+
+foreach(python_module ${python_modules})
+  add_python_module(${python_module})
+endforeach(python_module)
+
 add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
     COMMAND ${CMAKE_COMMAND} -E make_directory
     "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/lite")
@@ -514,161 +215,6 @@ add_custom_command(
     TARGET tf_python_copy_scripts_to_destination PRE_BUILD
     COMMAND ${CMAKE_COMMAND} -E touch
     ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/lite/python/lite.py)
-add_python_module("tensorflow/contrib/lookup")
-add_python_module("tensorflow/contrib/losses")
-add_python_module("tensorflow/contrib/losses/python")
-add_python_module("tensorflow/contrib/losses/python/losses")
-add_python_module("tensorflow/contrib/losses/python/metric_learning")
-add_python_module("tensorflow/contrib/makefile")
-add_python_module("tensorflow/contrib/makefile/test")
-add_python_module("tensorflow/contrib/memory_stats")
-add_python_module("tensorflow/contrib/memory_stats/kernels")
-add_python_module("tensorflow/contrib/memory_stats/ops")
-add_python_module("tensorflow/contrib/memory_stats/python")
-add_python_module("tensorflow/contrib/memory_stats/python/kernel_tests")
-add_python_module("tensorflow/contrib/memory_stats/python/ops")
-add_python_module("tensorflow/contrib/meta_graph_transform")
-add_python_module("tensorflow/contrib/metrics")
-add_python_module("tensorflow/contrib/metrics/kernels")
-add_python_module("tensorflow/contrib/metrics/ops")
-add_python_module("tensorflow/contrib/metrics/python")
-add_python_module("tensorflow/contrib/metrics/python/kernel_tests")
-add_python_module("tensorflow/contrib/metrics/python/metrics")
-add_python_module("tensorflow/contrib/metrics/python/ops")
-add_python_module("tensorflow/contrib/model_pruning")
-add_python_module("tensorflow/contrib/model_pruning/examples")
-add_python_module("tensorflow/contrib/model_pruning/examples/cifar10")
-add_python_module("tensorflow/contrib/model_pruning/python")
-add_python_module("tensorflow/contrib/model_pruning/python/layers")
-add_python_module("tensorflow/contrib/ndlstm")
-add_python_module("tensorflow/contrib/ndlstm/python")
-add_python_module("tensorflow/contrib/nn")
-add_python_module("tensorflow/contrib/nn/python")
-add_python_module("tensorflow/contrib/nn/python/ops")
-add_python_module("tensorflow/contrib/nccl")
-add_python_module("tensorflow/contrib/nccl/kernels")
-add_python_module("tensorflow/contrib/nccl/ops")
-add_python_module("tensorflow/contrib/nccl/python")
-add_python_module("tensorflow/contrib/nccl/python/ops")
-add_python_module("tensorflow/contrib/nearest_neighbor/kernels")
-add_python_module("tensorflow/contrib/nearest_neighbor/ops")
-add_python_module("tensorflow/contrib/nearest_neighbor/python")
-add_python_module("tensorflow/contrib/nearest_neighbor/python/kernel_tests")
-add_python_module("tensorflow/contrib/nearest_neighbor/python/ops")
-add_python_module("tensorflow/contrib/opt")
-add_python_module("tensorflow/contrib/opt/python")
-add_python_module("tensorflow/contrib/opt/python/training")
-add_python_module("tensorflow/contrib/pi_examples")
-add_python_module("tensorflow/contrib/pi_examples/camera")
-add_python_module("tensorflow/contrib/pi_examples/label_image")
-add_python_module("tensorflow/contrib/pi_examples/label_image/data")
-add_python_module("tensorflow/contrib/periodic_resample")
-add_python_module("tensorflow/contrib/periodic_resample/python")
-add_python_module("tensorflow/contrib/periodic_resample/python/ops")
-add_python_module("tensorflow/contrib/periodic_resample/python/kernel_tests")
-add_python_module("tensorflow/contrib/predictor")
-add_python_module("tensorflow/contrib/quantization")
-add_python_module("tensorflow/contrib/quantization/python")
-add_python_module("tensorflow/contrib/quantize")
-add_python_module("tensorflow/contrib/quantize/python")
-add_python_module("tensorflow/contrib/remote_fused_graph/pylib")
-add_python_module("tensorflow/contrib/remote_fused_graph/pylib/python")
-add_python_module("tensorflow/contrib/remote_fused_graph/pylib/python/ops")
-add_python_module("tensorflow/contrib/resampler")
-add_python_module("tensorflow/contrib/resampler/kernels")
-add_python_module("tensorflow/contrib/resampler/ops")
-add_python_module("tensorflow/contrib/resampler/python")
-add_python_module("tensorflow/contrib/resampler/python/ops")
-add_python_module("tensorflow/contrib/rnn")
-add_python_module("tensorflow/contrib/rnn/kernels")
-add_python_module("tensorflow/contrib/rnn/ops")
-add_python_module("tensorflow/contrib/rnn/python")
-add_python_module("tensorflow/contrib/rnn/python/kernel_tests")
-add_python_module("tensorflow/contrib/rnn/python/ops")
-add_python_module("tensorflow/contrib/saved_model")
-add_python_module("tensorflow/contrib/saved_model/python")
-add_python_module("tensorflow/contrib/saved_model/python/saved_model")
-add_python_module("tensorflow/contrib/seq2seq")
-add_python_module("tensorflow/contrib/seq2seq/kernels")
-add_python_module("tensorflow/contrib/seq2seq/ops")
-add_python_module("tensorflow/contrib/seq2seq/python")
-add_python_module("tensorflow/contrib/seq2seq/python/kernel_tests")
-add_python_module("tensorflow/contrib/seq2seq/python/ops")
-add_python_module("tensorflow/contrib/session_bundle")
-add_python_module("tensorflow/contrib/session_bundle/example")
-add_python_module("tensorflow/contrib/session_bundle/testdata")
-add_python_module("tensorflow/contrib/signal")
-add_python_module("tensorflow/contrib/signal/python")
-add_python_module("tensorflow/contrib/signal/python/ops")
-add_python_module("tensorflow/contrib/slim")
-add_python_module("tensorflow/contrib/slim/python")
-add_python_module("tensorflow/contrib/slim/python/slim")
-add_python_module("tensorflow/contrib/slim/python/slim/data")
-add_python_module("tensorflow/contrib/slim/python/slim/nets")
-add_python_module("tensorflow/contrib/solvers")
-add_python_module("tensorflow/contrib/solvers/python")
-add_python_module("tensorflow/contrib/solvers/python/ops")
-add_python_module("tensorflow/contrib/sparsemax")
-add_python_module("tensorflow/contrib/sparsemax/python")
-add_python_module("tensorflow/contrib/sparsemax/python/ops")
-add_python_module("tensorflow/contrib/specs")
-add_python_module("tensorflow/contrib/specs/python")
-add_python_module("tensorflow/contrib/staging")
-add_python_module("tensorflow/contrib/stat_summarizer")
-add_python_module("tensorflow/contrib/stateless")
-add_python_module("tensorflow/contrib/tensorboard")
-add_python_module("tensorflow/contrib/tensorboard/plugins")
-add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
-add_python_module("tensorflow/contrib/tensor_forest")
-add_python_module("tensorflow/contrib/tensor_forest/client")
-add_python_module("tensorflow/contrib/tensor_forest/core")
-add_python_module("tensorflow/contrib/tensor_forest/core/ops")
-add_python_module("tensorflow/contrib/tensor_forest/data")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/core")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/core/ops")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/ops")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/kernel_tests")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/layers")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/models")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/ops")
-add_python_module("tensorflow/contrib/tensor_forest/python")
-add_python_module("tensorflow/contrib/tensor_forest/python/kernel_tests")
-add_python_module("tensorflow/contrib/tensor_forest/python/ops")
-add_python_module("tensorflow/contrib/testing")
-add_python_module("tensorflow/contrib/testing/python")
-add_python_module("tensorflow/contrib/testing/python/framework")
-add_python_module("tensorflow/contrib/text")
-add_python_module("tensorflow/contrib/text/kernels")
-add_python_module("tensorflow/contrib/text/ops")
-add_python_module("tensorflow/contrib/text/python")
-add_python_module("tensorflow/contrib/text/python/ops")
-add_python_module("tensorflow/contrib/tfprof")
-add_python_module("tensorflow/contrib/timeseries")
-add_python_module("tensorflow/contrib/timeseries/examples")
-add_python_module("tensorflow/contrib/timeseries/examples/data")
-add_python_module("tensorflow/contrib/timeseries/python")
-add_python_module("tensorflow/contrib/timeseries/python/timeseries")
-add_python_module("tensorflow/contrib/timeseries/python/timeseries/state_space_models")
-add_python_module("tensorflow/contrib/tpu")
-add_python_module("tensorflow/contrib/tpu/ops")
-add_python_module("tensorflow/contrib/tpu/profiler")
-add_python_module("tensorflow/contrib/tpu/python")
-add_python_module("tensorflow/contrib/tpu/python/ops")
-add_python_module("tensorflow/contrib/tpu/python/profiler")
-add_python_module("tensorflow/contrib/tpu/python/tpu")
-add_python_module("tensorflow/contrib/training")
-add_python_module("tensorflow/contrib/training/python")
-add_python_module("tensorflow/contrib/training/python/training")
-add_python_module("tensorflow/contrib/util")
-add_python_module("tensorflow/contrib/reduce_slice_ops")
-add_python_module("tensorflow/contrib/reduce_slice_ops/kernels")
-add_python_module("tensorflow/contrib/reduce_slice_ops/ops")
-add_python_module("tensorflow/contrib/reduce_slice_ops/python")
-add_python_module("tensorflow/contrib/reduce_slice_ops/python/kernel_tests")
-add_python_module("tensorflow/contrib/reduce_slice_ops/python/ops")
-add_python_module("tensorflow/contrib/summary")
 
 # Generate the tensorflow.python.platform.build_info module.
 set(BUILD_INFO_PY "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/platform/build_info.py")
-- 
GitLab


From 48892001b48d628a00a535d4cc19d9d7b5fcac11 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 3 Dec 2017 06:30:40 -0800
Subject: [PATCH 0560/1225] Add uint32 and uint64 support for
 `bitwise_and/or/xor` (#14883)

* Add uint32 and uint64 support for `bitwise_and/or/xor`

In `tensorflow/core/ops/bitwise_ops.cc`, uint32 and uint64
have been enabled for bitwise operations `and/or/xor/left_shift/right_shift`.
However, the kernels of `and/or/xor` have no support of uint32
and uint64. This is in comparision to `left_shift/right_shift` which
have the uint32/uint64 support, and, is tested in `bitwise_ops_test.py`.

This fix adds uint32 and uint64 to bitwise `and/or/xor` kernels and
adds relevant test cases in `bitwise_ops_test.py`, to bring `and/or/xor`
as `left_shift/right_shift`.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Add uint32 and uint64 support for bitwise_and

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Add uint32 and uint64 support for bitwise_or

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Add uint32 and uint64 support for bitwise_xor

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Register GPU functor for bitwise_and, bitwise_or, bitwise_xor

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Sanitize with clang-format -i --style=Google

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/cwise_op_bitwise_and.cc        | 10 ++++++----
 tensorflow/core/kernels/cwise_op_bitwise_or.cc         | 10 ++++++----
 tensorflow/core/kernels/cwise_op_bitwise_xor.cc        | 10 ++++++----
 tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc |  3 ++-
 tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc  |  3 ++-
 tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc |  3 ++-
 tensorflow/python/ops/bitwise_ops_test.py              |  2 +-
 7 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/tensorflow/core/kernels/cwise_op_bitwise_and.cc b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
index 017a2182dc..5a6cf4bad1 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_and.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                      \
@@ -30,13 +30,15 @@ REGISTER_SYCL_KERNEL(int32);
 REGISTER_SYCL_KERNEL(int64);
 REGISTER_SYCL_KERNEL(uint8);
 REGISTER_SYCL_KERNEL(uint16);
+REGISTER_SYCL_KERNEL(uint32);
+REGISTER_SYCL_KERNEL(uint64);
 #undef REGISTER_SYCL_KERNEL
 
 #endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
-REGISTER6(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_or.cc b/tensorflow/core/kernels/cwise_op_bitwise_or.cc
index 36f45fe92d..201a10198a 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_or.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_or.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, CPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                     \
@@ -30,13 +30,15 @@ REGISTER_SYCL_KERNEL(int32);
 REGISTER_SYCL_KERNEL(int64);
 REGISTER_SYCL_KERNEL(uint8);
 REGISTER_SYCL_KERNEL(uint16);
+REGISTER_SYCL_KERNEL(uint32);
+REGISTER_SYCL_KERNEL(uint64);
 #undef REGISTER_SYCL_KERNEL
 
 #endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
-REGISTER6(BinaryOp, GPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, GPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
index 36432d851d..2a7cd26995 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, CPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                      \
@@ -30,13 +30,15 @@ REGISTER_SYCL_KERNEL(int32);
 REGISTER_SYCL_KERNEL(int64);
 REGISTER_SYCL_KERNEL(uint8);
 REGISTER_SYCL_KERNEL(uint16);
+REGISTER_SYCL_KERNEL(uint32);
+REGISTER_SYCL_KERNEL(uint64);
 #undef REGISTER_SYCL_KERNEL
 
 #endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
-REGISTER6(BinaryOp, GPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, GPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
index 27f973c90d..3fbf69c114 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
@@ -19,7 +19,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY6(bitwise_and, int8, int16, int32, int64, uint8, uint16);
+DEFINE_BINARY8(bitwise_and, int8, int16, int32, int64, uint8, uint16, uint32,
+               uint64);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
index a34c3a52cd..8bcb82266a 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
@@ -19,7 +19,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY6(bitwise_or, int8, int16, int32, int64, uint8, uint16);
+DEFINE_BINARY8(bitwise_or, int8, int16, int32, int64, uint8, uint16, uint32,
+               uint64);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
index a4531ab7c6..e62a87aba4 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
@@ -19,7 +19,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY6(bitwise_xor, int8, int16, int32, int64, uint8, uint16);
+DEFINE_BINARY8(bitwise_xor, int8, int16, int32, int64, uint8, uint16, uint32,
+               uint64);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/python/ops/bitwise_ops_test.py b/tensorflow/python/ops/bitwise_ops_test.py
index fa1b219b17..75eb100a90 100644
--- a/tensorflow/python/ops/bitwise_ops_test.py
+++ b/tensorflow/python/ops/bitwise_ops_test.py
@@ -36,7 +36,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
 
   def testBinaryOps(self):
     dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
-                  dtypes.uint8, dtypes.uint16]
+                  dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64]
 
     with self.test_session(use_gpu=True) as sess:
       for dtype in dtype_list:
-- 
GitLab


From 2013b330b3c8bf79a4fbab7c49b8fef30411b6d0 Mon Sep 17 00:00:00 2001
From: Alan Lee <secsilm@outlook.com>
Date: Mon, 4 Dec 2017 09:25:04 +0800
Subject: [PATCH 0561/1225] DOC: Fix documentation for dataset.md

The code `image = tf.decode_jpeg(parsed["image_data"])` in 738 lines is incorrect. It should be `tf.image.decode_jpeg` instead of `tf.decode_jpeg`.
---
 tensorflow/docs_src/programmers_guide/datasets.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md
index c54b399c3a..308cbad376 100644
--- a/tensorflow/docs_src/programmers_guide/datasets.md
+++ b/tensorflow/docs_src/programmers_guide/datasets.md
@@ -735,7 +735,7 @@ def dataset_input_fn():
     parsed = tf.parse_single_example(record, keys_to_features)
 
     # Perform additional preprocessing on the parsed data.
-    image = tf.decode_jpeg(parsed["image_data"])
+    image = tf.image.decode_jpeg(parsed["image_data"])
     image = tf.reshape(image, [299, 299, 1])
     label = tf.cast(parsed["label"], tf.int32)
 
-- 
GitLab


From a22c8219581a6bd597c92b51c5dbe7db706e3100 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 3 Dec 2017 17:50:50 -0800
Subject: [PATCH 0562/1225] Update the pruning library to handle graphs which
 has both partitioned and non-partitioned variables

PiperOrigin-RevId: 177761638
---
 .../contrib/model_pruning/python/pruning.py   | 23 ++++++++-----------
 .../model_pruning/python/pruning_test.py      |  2 +-
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py
index 42d91a71fd..39eb79daf0 100644
--- a/tensorflow/contrib/model_pruning/python/pruning.py
+++ b/tensorflow/contrib/model_pruning/python/pruning.py
@@ -74,6 +74,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_impl
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary
 from tensorflow.python.training import training_util
@@ -341,11 +342,7 @@ def get_pruning_hparams():
 
 class Pruning(object):
 
-  def __init__(self,
-               spec=None,
-               global_step=None,
-               sparsity=None,
-               partitioner=None):
+  def __init__(self, spec=None, global_step=None, sparsity=None):
     """Set up the specification for model pruning.
 
     If a spec is provided, the sparsity is set up based on the sparsity_function
@@ -358,8 +355,6 @@ class Pruning(object):
       global_step: A tensorflow variable that is used while setting up the
         sparsity function
       sparsity: A tensorflow scalar variable storing the sparsity
-      partitioner: The tensorflow partitioner function used to distribute
-        parameters across shards
     """
     # Pruning specification
     self._spec = spec if spec else get_pruning_hparams()
@@ -373,9 +368,6 @@ class Pruning(object):
     # Built using self._setup_sparsity() or provided externally
     self._sparsity = sparsity if sparsity else self._setup_sparsity()
 
-    # Stores the partitioner function uses to partition variables across tasks/
-    self._partitioner = partitioner
-
     # List of tensorflow assignments ops for new masks and thresholds
     self._assign_ops = []
 
@@ -509,8 +501,10 @@ class Pruning(object):
 
     for index, mask in enumerate(masks):
       threshold = thresholds[index]
-      weight = weights[index] if self._partitioner is None else weights[
-          index].as_tensor()
+      weight = weights[index]
+      is_partitioned = isinstance(weight, variables.PartitionedVariable)
+      if is_partitioned:
+        weight = weight.as_tensor()
 
       if self._spec.do_not_prune:
         if self._exists_in_do_not_prune_list(mask.name):
@@ -518,9 +512,10 @@ class Pruning(object):
 
       new_threshold, new_mask = self._update_mask(weight, threshold)
       self._assign_ops.append(_variable_assign(threshold, new_threshold))
+
       self._assign_ops.append(
-          _variable_assign(mask, new_mask) if self._partitioner is None else
-          _partitioned_variable_assign(mask, new_mask))
+          _partitioned_variable_assign(mask, new_mask)
+          if is_partitioned else _variable_assign(mask, new_mask))
 
   def mask_update_op(self):
     with ops.name_scope(self._spec.name):
diff --git a/tensorflow/contrib/model_pruning/python/pruning_test.py b/tensorflow/contrib/model_pruning/python/pruning_test.py
index c23fd649ce..34b4584f49 100644
--- a/tensorflow/contrib/model_pruning/python/pruning_test.py
+++ b/tensorflow/contrib/model_pruning/python/pruning_test.py
@@ -120,7 +120,7 @@ class PruningTest(test.TestCase):
             "weights", initializer=math_ops.linspace(1.0, 100.0, 100))
         masked_weights = pruning.apply_mask(
             weights, scope=variable_scope.get_variable_scope())
-      p = pruning.Pruning(sparsity=sparsity, partitioner=partitioner)
+      p = pruning.Pruning(sparsity=sparsity)
       p._spec.threshold_decay = 0.0
       mask_update_op = p.mask_update_op()
       variables.global_variables_initializer().run()
-- 
GitLab


From 73dca81809561d10bc3acf7e90f5a9dc08ad05d1 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 3 Dec 2017 18:36:38 -0800
Subject: [PATCH 0563/1225] Add S3 to the list of implemented file systems in
 doc (#15080)

This fix adds S3 to the list of implemented file systems
in doc.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/docs_src/extend/add_filesys.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/docs_src/extend/add_filesys.md b/tensorflow/docs_src/extend/add_filesys.md
index 44ba198998..f0591b7b7d 100644
--- a/tensorflow/docs_src/extend/add_filesys.md
+++ b/tensorflow/docs_src/extend/add_filesys.md
@@ -35,6 +35,7 @@ Note that TensorFlow already includes many filesystem implementations, such as:
 
 *   HDFS - the Hadoop File System
 *   GCS - Google Cloud Storage filesystem
+*   S3 - Amazon Simple Storage Service filesystem
 *   A "memory-mapped-file" filesystem
 
 The rest of this guide describes how to implement a custom filesystem.
-- 
GitLab


From 440d84579a86964fca80af5c0d8715824a8e660d Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 3 Dec 2017 18:38:18 -0800
Subject: [PATCH 0564/1225] Update AWS C++ SDK to 1.3.15 (#15067)

This fix tries to address the issue raised in 15066
where AWS C++ SDK version was not high enough to support
ECS.

This fix updates AWS C++ SDK to 1.3.15.

This fix fixes 15066.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 567ebf7955..c241a0f82d 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -505,11 +505,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "aws",
       urls = [
-          "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz",
-          "https://github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz",
+          "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
+          "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
       ],
-      sha256 = "f599b57aec4f03ad696044dd430b2d201864113937353adc346f53ad47991319",
-      strip_prefix = "aws-sdk-cpp-1.0.90",
+      sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c",
+      strip_prefix = "aws-sdk-cpp-1.3.15",
       build_file = str(Label("//third_party:aws.BUILD")),
   )
 
-- 
GitLab


From ed2e8c227e77d591b868ecd6d1c8d59ddae3b3d9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 3 Dec 2017 19:59:51 -0800
Subject: [PATCH 0565/1225] Generalize TFGAN's classifier metrics.

1) Separate out input batching from computation.
2) Support fetching multiple Tensors in one call.

PiperOrigin-RevId: 177766643
---
 .../eval/python/classifier_metrics_impl.py    | 127 ++++++++++++++----
 .../eval/python/classifier_metrics_test.py    |  17 +++
 2 files changed, 118 insertions(+), 26 deletions(-)

diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
index bb65f05b5a..82293b575a 100644
--- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
+++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
@@ -57,8 +57,10 @@ __all__ = [
     'run_inception',
     'inception_score',
     'classifier_score',
+    'classifier_score_from_logits',
     'frechet_inception_distance',
     'frechet_classifier_distance',
+    'frechet_classifier_distance_from_activations',
     'INCEPTION_DEFAULT_IMAGE_SIZE',
 ]
 
@@ -222,13 +224,13 @@ def run_inception(images,
     image_size: Required image width and height. See unit tests for the default
       values.
     input_tensor: Name of input Tensor.
-    output_tensor: Name of output Tensor. This function will compute activations
-      at the specified layer. Examples include INCEPTION_V3_OUTPUT and
-      INCEPTION_V3_FINAL_POOL which would result in this function computing
+    output_tensor: Name or list of output Tensors. This function will compute
+      activations at the specified layer. Examples include INCEPTION_V3_OUTPUT
+      and INCEPTION_V3_FINAL_POOL which would result in this function computing
       the final logits or the penultimate pooling layer.
 
   Returns:
-    Logits.
+    Tensor or Tensors corresponding to computed `output_tensor`.
 
   Raises:
     ValueError: If images are not the correct size.
@@ -244,8 +246,14 @@ def run_inception(images,
 
   activations = run_image_classifier(images, graph_def, input_tensor,
                                      output_tensor)
-  if array_ops.rank(activations) != 2:
-    activations = layers.flatten(activations)
+  if isinstance(activations, list):
+    for i, activation in enumerate(activations):
+      if array_ops.rank(activation) != 2:
+        activations[i] = layers.flatten(activation)
+  else:
+    if array_ops.rank(activations) != 2:
+      activations = layers.flatten(activations)
+
   return activations
 
 
@@ -257,23 +265,26 @@ def run_image_classifier(tensor, graph_def, input_tensor,
     tensor: An Input tensor.
     graph_def: A GraphDef proto.
     input_tensor: Name of input tensor in graph def.
-    output_tensor: Name of output tensor in graph def.
+    output_tensor: A tensor name or list of tensor names in graph def.
     scope: Name scope for classifier.
 
   Returns:
-    Classifier output. Shape depends on the classifier used, but is often
-    [batch, classes].
+    Classifier output if `output_tensor` is a string, or a list of outputs if
+    `output_tensor` is a list.
 
   Raises:
-    ValueError: If `image_size` is not `None`, and `tensor` are not the correct
-      size.
+    ValueError: If `input_tensor` or `output_tensor` aren't in the graph_def.
   """
   input_map = {input_tensor: tensor}
-  return_elements = [output_tensor]
-  classifier_output = importer.import_graph_def(
-      graph_def, input_map, return_elements, name=scope)[0]
+  is_singleton = isinstance(output_tensor, str)
+  if is_singleton:
+    output_tensor = [output_tensor]
+  classifier_outputs = importer.import_graph_def(
+      graph_def, input_map, output_tensor, name=scope)
+  if is_singleton:
+    classifier_outputs = classifier_outputs[0]
 
-  return classifier_output
+  return classifier_outputs
 
 
 def classifier_score(images, classifier_fn, num_batches=1):
@@ -312,6 +323,30 @@ def classifier_score(images, classifier_fn, num_batches=1):
       swap_memory=True,
       name='RunClassifier')
   logits = array_ops.concat(array_ops.unstack(logits), 0)
+
+  return classifier_score_from_logits(logits)
+
+
+def classifier_score_from_logits(logits):
+  """Classifier score for evaluating a conditional generative model.
+
+  This is based on the Inception Score, but for an arbitrary classifier.
+
+  This technique is described in detail in https://arxiv.org/abs/1606.03498. In
+  summary, this function calculates
+
+  exp( E[ KL(p(y|x) || p(y)) ] )
+
+  which captures how different the network's classification prediction is from
+  the prior distribution over classes.
+
+  Args:
+    logits: A 2D Tensor of logits.
+
+  Returns:
+    The classifier score. A floating-point scalar of the same type as the output
+    of `logits`.
+  """
   logits.shape.assert_has_rank(2)
 
   # Use maximum precision for best results.
@@ -436,31 +471,71 @@ def frechet_classifier_distance(real_images,
       swap_memory=True,
       name='RunClassifier')
 
-  activations_dtype = activations.dtype
   # Split the activations by the real and generated images.
   real_a, gen_a = array_ops.split(activations, [num_batches, num_batches], 0)
 
   # Ensure the activations have the right shapes.
   real_a = array_ops.concat(array_ops.unstack(real_a), 0)
   gen_a = array_ops.concat(array_ops.unstack(gen_a), 0)
-  if activations_dtype != dtypes.float64:
-    real_a = math_ops.to_double(real_a)
-    gen_a = math_ops.to_double(gen_a)
 
-  real_a.shape.assert_has_rank(2)
-  gen_a.shape.assert_has_rank(2)
+  return frechet_classifier_distance_from_activations(real_a, gen_a)
+
+
+def frechet_classifier_distance_from_activations(
+    real_activations, generated_activations):
+  """Classifier distance for evaluating a generative model.
+
+  This is based on the Frechet Inception distance, but for an arbitrary
+  classifier.
+
+  This technique is described in detail in https://arxiv.org/abs/1706.08500.
+  Given two Gaussian distribution with means m and m_w and covariance matrices
+  C and C_w, this function calcuates
+
+  |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2))
+
+  which captures how different the distributions of real images and generated
+  images (or more accurately, their visual features) are. Note that unlike the
+  Inception score, this is a true distance and utilizes information about real
+  world images.
+
+  Note that when computed using sample means and sample covariance matrices,
+  Frechet distance is biased. It is more biased for small sample sizes. (e.g.
+  even if the two distributions are the same, for a small sample size, the
+  expected Frechet distance is large). It is important to use the same
+  sample size to compute frechet classifier distance when comparing two
+  generative models.
+
+  Args:
+    real_activations: Real images to use to compute Frechet Inception distance.
+    generated_activations: Generated images to use to compute Frechet Inception
+      distance.
+
+  Returns:
+    The Frechet Inception distance. A floating-point scalar of the same type
+    as the output of the activations.
+  """
+  real_activations.shape.assert_has_rank(2)
+  generated_activations.shape.assert_has_rank(2)
+
+  activations_dtype = real_activations.dtype
+  if activations_dtype != dtypes.float64:
+    real_activations = math_ops.to_double(real_activations)
+    generated_activations = math_ops.to_double(generated_activations)
 
   # Compute mean and covariance matrices of activations.
-  m = math_ops.reduce_mean(real_a, 0)
-  m_v = math_ops.reduce_mean(gen_a, 0)
-  num_examples = math_ops.to_double(array_ops.shape(real_a)[0])
+  m = math_ops.reduce_mean(real_activations, 0)
+  m_v = math_ops.reduce_mean(generated_activations, 0)
+  num_examples = math_ops.to_double(array_ops.shape(real_activations)[0])
 
   # sigma = (1 / (n - 1)) * (X - mu) (X - mu)^T
+  real_centered = real_activations - m
   sigma = math_ops.matmul(
-      real_a - m, real_a - m, transpose_a=True) / (num_examples - 1)
+      real_centered, real_centered, transpose_a=True) / (num_examples - 1)
 
+  gen_centered = generated_activations - m_v
   sigma_v = math_ops.matmul(
-      gen_a - m_v, gen_a - m_v, transpose_a=True) / (num_examples - 1)
+      gen_centered, gen_centered, transpose_a=True) / (num_examples - 1)
 
   # Find the Tr(sqrt(sigma sigma_v)) component of FID
   sqrt_trace_component = trace_sqrt_product(sigma, sigma_v)
diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
index 92e0a99574..1e18c699ba 100644
--- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
+++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
@@ -190,6 +190,23 @@ class ClassifierMetricsTest(test.TestCase):
     # Check that none of the model variables are trainable.
     self.assertListEqual([], variables.trainable_variables())
 
+  def test_run_inception_multiple_outputs(self):
+    """Test `run_inception` graph construction with multiple outputs."""
+    batch_size = 3
+    img = array_ops.ones([batch_size, 299, 299, 3])
+    logits, pool = _run_with_mock(
+        classifier_metrics.run_inception, img,
+        output_tensor=[classifier_metrics.INCEPTION_OUTPUT,
+                       classifier_metrics.INCEPTION_FINAL_POOL])
+
+    self.assertTrue(isinstance(logits, ops.Tensor))
+    self.assertTrue(isinstance(pool, ops.Tensor))
+    logits.shape.assert_is_compatible_with([batch_size, 1001])
+    pool.shape.assert_is_compatible_with([batch_size, 2048])
+
+    # Check that none of the model variables are trainable.
+    self.assertListEqual([], variables.trainable_variables())
+
   def test_inception_score_graph(self):
     """Test `inception_score` graph construction."""
     score = _run_with_mock(classifier_metrics.inception_score,
-- 
GitLab


From 22daa9bb307988920c27c4f8232ce9d0144eca84 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Dec 2017 06:07:36 +0000
Subject: [PATCH 0566/1225] Update docs for `tf.contrib.losses` -> `tf.losses`

This fix updates the docs in `extend/estimators.md`
and changes the reference from `tf.contrib.losses`
to `tf.losses`, as the former has been deprecated.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/docs_src/extend/estimators.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/docs_src/extend/estimators.md b/tensorflow/docs_src/extend/estimators.md
index 7e6507c584..96fc9fae47 100644
--- a/tensorflow/docs_src/extend/estimators.md
+++ b/tensorflow/docs_src/extend/estimators.md
@@ -515,7 +515,7 @@ using `mean_squared_error()` (in bold):
   loss = tf.losses.mean_squared_error(labels, predictions)</strong>
   ...</code></pre>
 
-See the @{$python/contrib.losses$API guide} for a
+See the @{tf.losses$API guide} for a
 full list of loss functions and more details on supported arguments and usage.
 
 Supplementary metrics for evaluation can be added to an `eval_metric_ops` dict.
@@ -694,5 +694,5 @@ For additional reference materials on building `Estimator`s, see the following
 sections of the API guides:
 
 *   @{$python/contrib.layers$Layers}
-*   @{$python/contrib.losses$Losses}
+*   @{tf.losses$Losses}
 *   @{$python/contrib.layers#optimization$Optimization}
-- 
GitLab


From dd788dbbfa544c1ea4768940ac4300c22bb7e88e Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 3 Dec 2017 22:25:40 -0800
Subject: [PATCH 0567/1225] Fix a BUILD file bug in
 `tensorflow/contrib/cloud/BUILD` (#15023)

In `tensorflow/contrib/cloud`, invoking `bigquery_reader_ops_test` will fail.
The error is caused by the the fact that `bigquery_reader_ops_test`
depends on ":bigquery_reader_ops_op_lib" and ":bigquery_reader_ops".

However, bigquery_reader_ops_test is in python, ":bigquery_reader_ops_op_lib" and ":bigquery_reader_ops"
are cc libraries. So they shouldn't be the dependencies of bigquery_reader_ops_test.

This fix removes the above two dependencies so that `bigquery_reader_ops_test` could
run successfully.

Below is the full error message before this PR.

```
ubuntu@ubuntu:~/tensorflow$ bazel test -s --config=opt //tensorflow/contrib/cloud:bigquery_reader_ops_test
..........
WARNING: /home/ubuntu/tensorflow/tensorflow/core/BUILD:1815:1: in includes attribute of cc_library rule //tensorflow/core:framework_headers_lib: '../../external/nsync/public' resolves to 'external/nsync/public' not below the relative path of its package 'tensorflow/core'. This will be an error in the future. Since this rule was created by the macro 'cc_header_only_library', the error might have been caused by the macro implementation in /home/ubuntu/tensorflow/tensorflow/tensorflow.bzl:1127:30
ERROR: /home/ubuntu/tensorflow/tensorflow/contrib/cloud/BUILD:58:1: in deps attribute of py_test rule //tensorflow/contrib/cloud:bigquery_reader_ops_test: '//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib' does not have mandatory providers: 'py'. Since this rule was created by the macro 'tf_py_test', the error might have been caused by the macro implementation in /home/ubuntu/tensorflow/tensorflow/tensorflow.bzl:1368:12
ERROR: /home/ubuntu/tensorflow/tensorflow/contrib/cloud/BUILD:58:1: in deps attribute of py_test rule //tensorflow/contrib/cloud:bigquery_reader_ops_test: '//tensorflow/contrib/cloud/kernels:bigquery_reader_ops' does not have mandatory providers: 'py'. Since this rule was created by the macro 'tf_py_test', the error might have been caused by the macro implementation in /home/ubuntu/tensorflow/tensorflow/tensorflow.bzl:1368:12
ERROR: Analysis of target '//tensorflow/contrib/cloud:bigquery_reader_ops_test' failed; build aborted: Analysis of target '//tensorflow/contrib/cloud:bigquery_reader_ops_test' failed; build aborted
INFO: Elapsed time: 10.083s
FAILED: Build did NOT complete successfully (105 packages loaded)
ERROR: Couldn't start the build. Unable to run tests
ubuntu@ubuntu:~/tensorflow$
```

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/cloud/BUILD | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/contrib/cloud/BUILD b/tensorflow/contrib/cloud/BUILD
index aa8f5ed12b..fe8bd072af 100644
--- a/tensorflow/contrib/cloud/BUILD
+++ b/tensorflow/contrib/cloud/BUILD
@@ -60,9 +60,7 @@ tf_py_test(
     size = "small",
     srcs = ["python/ops/bigquery_reader_ops_test.py"],
     additional_deps = [
-        ":bigquery_reader_ops_op_lib",
         ":cloud_py",
-        "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-- 
GitLab


From 88e63eb7099e2cd82e942f4b3867a9fedffb3d85 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 00:57:14 -0800
Subject: [PATCH 0568/1225] Make gather_nd & scatter_nd handle rank 6 and 7
 indices.

PiperOrigin-RevId: 177781215
---
 tensorflow/contrib/makefile/tf_op_files.txt   |  4 ++
 tensorflow/core/kernels/BUILD                 |  4 ++
 tensorflow/core/kernels/gather_nd_op.cc       |  8 +++-
 .../core/kernels/gather_nd_op_cpu_impl_6.cc   | 18 +++++++++
 .../core/kernels/gather_nd_op_cpu_impl_7.cc   | 18 +++++++++
 .../core/kernels/gather_nd_op_gpu.cu.cc       |  4 +-
 tensorflow/core/kernels/scatter_nd_op.cc      |  6 ++-
 .../core/kernels/scatter_nd_op_cpu_impl_6.cc  | 18 +++++++++
 .../core/kernels/scatter_nd_op_cpu_impl_7.cc  | 19 +++++++++
 .../core/kernels/scatter_nd_op_gpu.cu.cc      |  4 +-
 .../python/kernel_tests/gather_nd_op_test.py  | 29 ++++++++++++++
 .../kernel_tests/scatter_nd_ops_test.py       | 39 ++++++++++++++++++-
 12 files changed, 165 insertions(+), 6 deletions(-)
 create mode 100644 tensorflow/core/kernels/gather_nd_op_cpu_impl_6.cc
 create mode 100644 tensorflow/core/kernels/gather_nd_op_cpu_impl_7.cc
 create mode 100644 tensorflow/core/kernels/scatter_nd_op_cpu_impl_6.cc
 create mode 100644 tensorflow/core/kernels/scatter_nd_op_cpu_impl_7.cc

diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index ff612f1fdf..9fc9aeb785 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -68,6 +68,8 @@ tensorflow/core/kernels/scatter_nd_op_cpu_impl_2.cc
 tensorflow/core/kernels/scatter_nd_op_cpu_impl_3.cc
 tensorflow/core/kernels/scatter_nd_op_cpu_impl_4.cc
 tensorflow/core/kernels/scatter_nd_op_cpu_impl_5.cc
+tensorflow/core/kernels/scatter_nd_op_cpu_impl_6.cc
+tensorflow/core/kernels/scatter_nd_op_cpu_impl_7.cc
 tensorflow/core/kernels/scatter_nd_op.cc
 tensorflow/core/kernels/save_restore_tensor.cc
 tensorflow/core/kernels/save_restore_v2_ops.cc
@@ -132,6 +134,8 @@ tensorflow/core/kernels/gather_nd_op_cpu_impl_2.cc
 tensorflow/core/kernels/gather_nd_op_cpu_impl_3.cc
 tensorflow/core/kernels/gather_nd_op_cpu_impl_4.cc
 tensorflow/core/kernels/gather_nd_op_cpu_impl_5.cc
+tensorflow/core/kernels/gather_nd_op_cpu_impl_6.cc
+tensorflow/core/kernels/gather_nd_op_cpu_impl_7.cc
 tensorflow/core/kernels/fused_batch_norm_op.cc
 tensorflow/core/kernels/function_ops.cc
 tensorflow/core/kernels/fill_functor.cc
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index a46fbbfc8e..c8359b4480 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -3940,6 +3940,8 @@ tf_kernel_library(
         "scatter_nd_op_cpu_impl_3.cc",
         "scatter_nd_op_cpu_impl_4.cc",
         "scatter_nd_op_cpu_impl_5.cc",
+        "scatter_nd_op_cpu_impl_6.cc",
+        "scatter_nd_op_cpu_impl_7.cc",
     ],
     hdrs = [
         "scatter_nd_op.h",
@@ -4512,6 +4514,8 @@ filegroup(
         "gather_nd_op_cpu_impl_3.cc",
         "gather_nd_op_cpu_impl_4.cc",
         "gather_nd_op_cpu_impl_5.cc",
+        "gather_nd_op_cpu_impl_6.cc",
+        "gather_nd_op_cpu_impl_7.cc",
         "gather_op.cc",
         "identity_n_op.cc",
         "identity_n_op.h",
diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc
index 5dc74d720a..7e5a9e1ec5 100644
--- a/tensorflow/core/kernels/gather_nd_op.cc
+++ b/tensorflow/core/kernels/gather_nd_op.cc
@@ -176,10 +176,12 @@ Status DoGatherNd(OpKernelContext* c, const Tensor& params,
       PARAMS_CASE(3);
       PARAMS_CASE(4);
       PARAMS_CASE(5);
+      PARAMS_CASE(6);
+      PARAMS_CASE(7);
 #undef PARAMS_CASE
       default:
         return errors::InvalidArgument(
-            "Only indices.shape[-1] values between 1 and 5 "
+            "Only indices.shape[-1] values between 1 and 7 "
             "are currently supported.  Requested rank: ",
             indices_nd);
     }
@@ -218,7 +220,9 @@ namespace functor {
   DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 2); \
   DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 3); \
   DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 4); \
-  DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 5);
+  DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 5); \
+  DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 6); \
+  DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 7);
 
 #define DECLARE_GPU_SPECS(T)         \
   DECLARE_GPU_SPECS_INDEX(T, int32); \
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl_6.cc b/tensorflow/core/kernels/gather_nd_op_cpu_impl_6.cc
new file mode 100644
index 0000000000..2aec872448
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl_6.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 6
+#include "tensorflow/core/kernels/gather_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl_7.cc b/tensorflow/core/kernels/gather_nd_op_cpu_impl_7.cc
new file mode 100644
index 0000000000..9222cb0769
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl_7.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 7
+#include "tensorflow/core/kernels/gather_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
index ed5240c20a..b03efc684f 100644
--- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
@@ -111,7 +111,9 @@ struct GatherNdSlice<GPUDevice, T, Index, IXDIM> {
   DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 2); \
   DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 3); \
   DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 4); \
-  DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 5);
+  DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 5); \
+  DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 6); \
+  DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 7);
 
 #define DEFINE_GPU_SPECS(T)         \
   DEFINE_GPU_SPECS_INDEX(T, int32); \
diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index 98c0181afb..3a95dd1773 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -487,6 +487,8 @@ Status DoScatterNd(OpKernelContext* c, const Tensor& indices,
       PARAMS_CASE(3);
       PARAMS_CASE(4);
       PARAMS_CASE(5);
+      PARAMS_CASE(6);
+      PARAMS_CASE(7);
 #undef PARAMS_CASE
       default:
         return errors::InvalidArgument(
@@ -525,7 +527,9 @@ namespace functor {
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 2); \
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 3); \
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 4); \
-  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 5);
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 5); \
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 6); \
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 7);
 
 #define DECLARE_GPU_SPECS_INDEX(T, Index)                                \
   DECLARE_GPU_SPECS_INDEX_OP(T, Index, scatter_nd_op::UpdateOp::ASSIGN); \
diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl_6.cc b/tensorflow/core/kernels/scatter_nd_op_cpu_impl_6.cc
new file mode 100644
index 0000000000..d98412e255
--- /dev/null
+++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl_6.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 6
+#include "tensorflow/core/kernels/scatter_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl_7.cc b/tensorflow/core/kernels/scatter_nd_op_cpu_impl_7.cc
new file mode 100644
index 0000000000..a008b55603
--- /dev/null
+++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl_7.cc
@@ -0,0 +1,19 @@
+
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 7
+#include "tensorflow/core/kernels/scatter_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
index 0eb3cf32dd..31f74671ca 100644
--- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
@@ -136,7 +136,9 @@ struct ScatterNdFunctor<GPUDevice, T, Index, op, IXDIM> {
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 2); \
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 3); \
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 4); \
-  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 5);
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 5); \
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 6); \
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 7);
 
 #define DECLARE_GPU_SPECS_INDEX(T, Index)                                \
   DECLARE_GPU_SPECS_INDEX_OP(T, Index, scatter_nd_op::UpdateOp::ASSIGN); \
diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py
index 5109ed98c9..91ebe8de99 100644
--- a/tensorflow/python/kernel_tests/gather_nd_op_test.py
+++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py
@@ -255,6 +255,35 @@ class GatherNdTest(test.TestCase):
     with self.test_session(use_gpu=True):
       self.assertAllEqual(expected_grads, grads.eval())
 
+  def testGradientsRank7Elements(self):
+    # Shape [1,1,2,1,1,2,2]
+    indices = constant_op.constant(
+        [[[
+            [[[[0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]]]],
+            [[[[0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 1]]]]
+        ]]],
+        dtype=dtypes.int32)
+    inputs = constant_op.constant(
+        [[[
+            [[[[1, 3], [5, 7]]]],
+            [[[[2, 4], [6, 8]]]]
+        ]]], dtype=dtypes.float64)
+    outputs = array_ops.gather_nd(inputs, indices)
+
+    grad_vals = constant_op.constant(
+        [[[
+            [[[[1, 2], [3, 4]]]],
+            [[[[5, 6], [7, 8]]]]
+        ]]], dtype=dtypes.float64)
+    grads = gradients_impl.gradients([outputs], [inputs], [grad_vals])[0]
+    expected_grads = np.array(
+        [[[
+            [[[[5, 6], [1, 2]]]],
+            [[[[3, 4], [7, 8]]]]
+        ]]], dtype=np.float64)
+    with self.test_session(use_gpu=True):
+      self.assertAllEqual(expected_grads, grads.eval())
+
   def testGradientsInt64Indices(self):
     indices = constant_op.constant(
         [[[0, 1], [1, 0]], [[0, 0], [1, 1]]], dtype=dtypes.int64)
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index d7bde04230..9f57949515 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -350,7 +350,7 @@ class StatefulScatterNdTest(test.TestCase):
         indices = np.array([2, 0, 5])
         op(ref, indices, updates).eval()
 
-        # Indicies out of range should not fail.
+        # Indices out of range should not fail.
         indices = np.array([-1, 0, 5])
         op(ref, indices, updates).eval()
         indices = np.array([2, 0, 6])
@@ -502,6 +502,43 @@ class ScatterNdTest(test.TestCase):
       if self.non_aliasing_add_test:
         self.assertAllEqual(expected_input_grad, input_grad.eval())
 
+  def testGradientsRank7SliceUpdate(self):
+    indices = constant_op.constant(
+        [[[
+            [[[[0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]]]],
+            [[[[0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 1]]]]
+        ]]], dtype=dtypes.int32)
+    updates = constant_op.constant(
+        [[[
+            [[[[5, 6], [2, 4]]]],
+            [[[[1, 3], [6, 8]]]]
+        ]]], dtype=dtypes.float64)
+    shape = constant_op.constant([1, 1, 2, 1, 1, 2, 2], dtype=dtypes.int32)
+    input_ = array_ops.zeros(shape, dtype=dtypes.float64)
+    outputs = self.scatter_nd(indices, updates, shape, input_)
+
+    grad_vals = constant_op.constant(
+        [[[
+            [[[[1, 2], [3, 4]]]],
+            [[[[5, 6], [7, 8]]]]
+        ]]], dtype=dtypes.float64)
+    updates_grad, input_grad = gradients_impl.gradients(
+        [outputs], [updates, input_], [grad_vals])
+    expected_updates_grad = np.array(
+        [[[
+            [[[[3, 4], [5, 6]]]],
+            [[[[1, 2], [7, 8]]]]
+        ]]], dtype=np.float64)
+    expected_input_grad = np.array(
+        [[[
+            [[[[1, 2], [3, 4]]]],
+            [[[[5, 6], [7, 8]]]]
+        ]]], dtype=np.float64)
+    with self.test_session():
+      self.assertAllEqual(expected_updates_grad, updates_grad.eval())
+      if self.non_aliasing_add_test:
+        self.assertAllEqual(expected_input_grad, input_grad.eval())
+
   def testScatterNdRepatedIndicesAdd(self):
     indices = array_ops.zeros([100000, 1], dtypes.int32)
     values = np.random.randn(100000)
-- 
GitLab


From c02cfb040d2609d605b909b81f4419e948e1560d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 01:32:12 -0800
Subject: [PATCH 0569/1225] Add an argument for additional linkopts for
 py_wrappers rule.

PiperOrigin-RevId: 177784085
---
 tensorflow/tensorflow.bzl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 0015eb0094..d194b37700 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -458,6 +458,8 @@ def tf_gen_op_wrappers_cc(name,
 #     "name" arg)
 #   op_whitelist: if not empty, only op names in this list will be wrapped. It
 #     is invalid to specify both "hidden" and "op_whitelist".
+#   cc_linkopts: Optional linkopts to be added to tf_cc_binary that contains the
+#     specified ops.
 def tf_gen_op_wrapper_py(name,
                          out=None,
                          hidden=None,
@@ -466,7 +468,8 @@ def tf_gen_op_wrapper_py(name,
                          require_shape_functions=False,
                          hidden_file=None,
                          generated_target_name=None,
-                         op_whitelist=[]):
+                         op_whitelist=[],
+                         cc_linkopts=[]):
   if (hidden or hidden_file) and op_whitelist:
     fail('Cannot pass specify both hidden and op_whitelist.')
 
@@ -476,7 +479,7 @@ def tf_gen_op_wrapper_py(name,
     deps = [str(Label("//tensorflow/core:" + name + "_op_lib"))]
   tf_cc_binary(
       name=tool_name,
-      linkopts=["-lm"],
+      linkopts=["-lm"] + cc_linkopts,
       copts=tf_copts(),
       linkstatic=1,  # Faster to link this one-time-use binary dynamically
       deps=([
-- 
GitLab


From 540e86701e077b6b537ee839be296dc3a6cd167a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 05:09:52 -0800
Subject: [PATCH 0570/1225] Wrappers for CUDA 9 warp-synchronous intrinsics.

PiperOrigin-RevId: 177799252
---
 .../kernels/reduce_slice_ops_gpu.cu.cc        |  11 +-
 tensorflow/core/BUILD                         |   7 +
 tensorflow/core/kernels/bias_op_gpu.cu.cc     |  18 +-
 .../core/kernels/depthwise_conv_op_gpu.cu.cc  |  11 +-
 .../core/kernels/scatter_nd_op_gpu.cu.cc      |  21 +
 tensorflow/core/kernels/svd_op_gpu.cu.cc      |   4 +-
 tensorflow/core/util/cuda_device_functions.h  | 418 +++++++++
 tensorflow/core/util/cuda_kernel_helper.h     | 837 ++----------------
 .../core/util/cuda_kernel_helper_test.cu.cc   |  12 +-
 tensorflow/core/util/cuda_launch_config.h     | 284 ++++++
 10 files changed, 851 insertions(+), 772 deletions(-)
 create mode 100644 tensorflow/core/util/cuda_device_functions.h
 create mode 100644 tensorflow/core/util/cuda_launch_config.h

diff --git a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc
index 8e6870fadd..501cddb8c8 100644
--- a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc
+++ b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc
@@ -34,9 +34,9 @@ namespace functor {
   __global__ void ReduceSliceDeviceKernel##reduceop(                           \
       Cuda3DLaunchConfig config, Index indices_width, Index bound,             \
       const T begin, const Index *indices, const T *input, T *out) {           \
-    CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {                 \
-      CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {               \
-        CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count, z) {             \
+    CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count.x, X) {               \
+      CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count.y, Y) {             \
+        CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count.z, Z) {           \
           Index outidx = x * config.virtual_thread_count.y *                   \
                              config.virtual_thread_count.z +                   \
                          y * config.virtual_thread_count.z + z;                \
@@ -68,8 +68,9 @@ namespace functor {
       if (sizex * sizey * sizez == 0) {                                        \
         return;                                                                \
       }                                                                        \
-      Cuda3DLaunchConfig config = GetCuda3DLaunchConfig(sizex, sizey, sizez, d,\
-          ReduceSliceDeviceKernel##reduceop<T, Index>, 0, 0);                  \
+      Cuda3DLaunchConfig config = GetCuda3DLaunchConfig(                       \
+          sizex, sizey, sizez, d, ReduceSliceDeviceKernel##reduceop<T, Index>, \
+          0, 0);                                                               \
                                                                                \
       ReduceSliceDeviceKernel##reduceop<T, Index>                              \
           <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(    \
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 4b5f67baad..d77021c3ee 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1847,6 +1847,13 @@ cc_library(
     ],
 )
 
+tf_cuda_library(
+    name = "cuda_device_functions",
+    hdrs = ["util/cuda_device_functions.h"],
+    visibility = ["//visibility:public"],
+    deps = [":framework_lite"],
+)
+
 # TODO(josh11b): Is this needed, or can we just use ":protos_all_cc"?
 cc_library(
     name = "protos_cc",
diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc
index 42f3db1d79..f9a207208a 100644
--- a/tensorflow/core/kernels/bias_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc
@@ -173,19 +173,13 @@ __global__ void BiasGradNCHW_SharedAtomics(const T* output_backprop,
   // Accumulate the results in the shared memory into the first element.
   // No syncthreads is needed since this is only in the same warp.
   int32 thread_index = threadIdx.x;
-  if (thread_index < 16) {
-    s_data[thread_index] += s_data[thread_index + 16];
-    __syncwarp(0xFFFF);
-    if (thread_index < 8) s_data[thread_index] += s_data[thread_index + 8];
-    __syncwarp(0xFF);
-    if (thread_index < 4) s_data[thread_index] += s_data[thread_index + 4];
-    __syncwarp(0xF);
-    if (thread_index < 2) s_data[thread_index] += s_data[thread_index + 2];
-    __syncwarp(0x3);
+  if (thread_index < 32) {
+    AccT data = s_data[thread_index];
+    for (int32 offset = warpSize / 2; offset > 0; offset /= 2) {
+      data += CudaShuffleDownSync(kCudaWarpAll, data, offset);
+    }
     if (thread_index == 0) {
-      T val = T(s_data[0] + s_data[1]);
-      // The first thread writes out the accumulated result to global location.
-      CudaAtomicAdd(bias_backprop + bias_index, val);
+      CudaAtomicAdd(bias_backprop + bias_index, T(data));
     }
   }
 }
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
index 903aac5d68..de0bf84c8b 100644
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
@@ -34,6 +34,7 @@ limitations under the License.
 
 namespace tensorflow {
 
+typedef Eigen::GpuDevice GPUDevice;
 using Eigen::GpuDevice;
 
 // Returns whether depthwise convolution forward or backward input pass can be
@@ -1028,7 +1029,7 @@ __device__ __forceinline__ T WarpSumReduce(T val) {
   int zeros = sub_warp * kWidth;
   unsigned mask = ((1UL << kWidth) - 1) << zeros;
   for (int delta = kWidth / 2; delta > 0; delta /= 2) {
-    val += CudaShuffleXor(mask, val, delta);
+    val += CudaShuffleXorSync(mask, val, delta);
   }
   return val;
 }
@@ -1145,7 +1146,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall(
 
     // Note: the condition to reach this is uniform across the entire block.
     __syncthreads();
-    unsigned active_threads = CudaBallot(CUDA_WARP_ALL, depth_in_range);
+    unsigned active_threads = CudaBallotSync(kCudaWarpAll, depth_in_range);
 
     if (depth_in_range) {
       const T* const out_ptr = inout_offset + output;
@@ -1159,7 +1160,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall(
           T val = out1 * tile_ptr[0] + out2 * tile_ptr[tile_offset];
           // Warp-accumulate pixels of the same depth and write to accumulator.
           for (int delta = 16; delta >= kBlockSlices; delta /= 2) {
-            val += CudaShuffleDown(active_threads, val, delta);
+            val += CudaShuffleDownSync(active_threads, val, delta);
           }
           if (!(thread_idx & 32 - kBlockSlices) /* lane_idx < kBlockSlices */) {
             *accum_ptr = val;
@@ -1399,7 +1400,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall(
 
     // Note: the condition to reach this is uniform across the entire block.
     __syncthreads();
-    unsigned active_threads = CudaBallot(CUDA_WARP_ALL, slice_in_range);
+    unsigned active_threads = CudaBallotSync(kCudaWarpAll, slice_in_range);
 
     if (slice_in_range) {
       const T* const out_ptr = inout_offset + output;
@@ -1413,7 +1414,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall(
           T val = out1 * tile_ptr[0] + out2 * tile_ptr[tile_offset];
           // Warp-accumulate pixels of the same depth and write to accumulator.
           for (int delta = 16 / kBlockSlices; delta > 0; delta /= 2) {
-            val += CudaShuffleDown(active_threads, val, delta);
+            val += CudaShuffleDownSync(active_threads, val, delta);
           }
           if (!(thread_idx & 32 / kBlockSlices - 1)) {
             *accum_ptr = val;
diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
index 31f74671ca..a3c21edc15 100644
--- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
@@ -55,6 +55,27 @@ struct LeftUpdate<T, scatter_nd_op::UpdateOp::SUB> {
   }
 };
 
+// Specializations for std::complex, updating real and imaginary part
+// individually. Even though this is not an atomic op anymore, it is safe
+// because there is only one type of op per kernel.
+template <typename T>
+struct LeftUpdate<std::complex<T>, scatter_nd_op::UpdateOp::ADD> {
+  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void operator()(
+      std::complex<T>* out, const std::complex<T>& val) {
+    T* ptr = reinterpret_cast<T*>(out);
+    CudaAtomicAdd(ptr, val.real());
+    CudaAtomicAdd(ptr, val.imag());
+  }
+};
+
+template <typename T>
+struct LeftUpdate<std::complex<T>, scatter_nd_op::UpdateOp::SUB> {
+  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void operator()(
+      std::complex<T>* out, const std::complex<T>& val) {
+    LeftUpdate<std::complex<T>, scatter_nd_op::UpdateOp::ADD>()(out, -val);
+  }
+};
+
 }  // namespace
 
 template <typename T, typename Index, scatter_nd_op::UpdateOp op, int IXDIM>
diff --git a/tensorflow/core/kernels/svd_op_gpu.cu.cc b/tensorflow/core/kernels/svd_op_gpu.cu.cc
index dedc2da60b..8c3a58b108 100644
--- a/tensorflow/core/kernels/svd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/svd_op_gpu.cu.cc
@@ -63,8 +63,8 @@ __global__ void ComputeValueOfVKernel(Cuda2DLaunchConfig config, int64 m,
                                       int64 ldu, const Scalar* M,
                                       const Scalar* U, const Scalar* S,
                                       Scalar* V) {
-  CUDA_AXIS_KERNEL_LOOP(batch, config.virtual_thread_count, x) {
-    CUDA_AXIS_KERNEL_LOOP(i, config.virtual_thread_count, y) {
+  CUDA_AXIS_KERNEL_LOOP(batch, config.virtual_thread_count.x, X) {
+    CUDA_AXIS_KERNEL_LOOP(i, config.virtual_thread_count.y, Y) {
       Scalar v = M[i + m * batch] * U[ldu * (i + m * batch)] * S[batch];
       CudaAtomicAdd(V + batch, v);
     }
diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h
new file mode 100644
index 0000000000..973a43d78f
--- /dev/null
+++ b/tensorflow/core/util/cuda_device_functions.h
@@ -0,0 +1,418 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_UTIL_CUDA_DEVICE_FUNCTIONS_H_
+#define TENSORFLOW_CORE_UTIL_CUDA_DEVICE_FUNCTIONS_H_
+
+/**
+ * Wrappers and helpers for CUDA device code.
+ *
+ * Wraps the warp-cooperative intrinsics introduced in CUDA 9 to provide
+ * backwards compatibility, see go/volta-porting for details.
+ * Provides atomic operations on types that aren't natively supported.
+ */
+
+#if GOOGLE_CUDA
+
+#include <algorithm>
+#include <complex>
+#include "cuda/include/cuda.h"
+#include "cuda/include/device_functions.h"
+#include "tensorflow/core/platform/types.h"
+
+#if __CUDACC_VER_MAJOR__ >= 9
+#include "cuda/include/cuda_fp16.h"
+#elif __CUDACC_VER__ >= 7050
+#include "cuda/include/cuda_fp16.h"
+#else
+#endif
+
+namespace tensorflow {
+
+namespace detail {
+
+// Helper for range-based for loop using 'delta' increments.
+// Usage: see CudaGridRange?() functions below.
+template <typename T>
+class CudaGridRange {
+  struct Iterator {
+    __device__ Iterator(T index, T delta) : index_(index), delta_(delta) {}
+    __device__ T operator*() const { return index_; }
+    __device__ Iterator& operator++() {
+      index_ += delta_;
+      return *this;
+    }
+    __device__ bool operator!=(const Iterator& other) const {
+      bool greater = index_ > other.index_;
+      bool less = index_ < other.index_;
+      // Anything past an end iterator (delta_ == 0) is equal.
+      // In range-based for loops, this optimizes to 'return less'.
+      if (!other.delta_) {
+        return less;
+      }
+      if (!delta_) {
+        return greater;
+      }
+      return less || greater;
+    }
+
+   private:
+    T index_;
+    const T delta_;
+  };
+
+ public:
+  __device__ CudaGridRange(T begin, T delta, T end)
+      : begin_(begin), delta_(delta), end_(end) {}
+
+  __device__ Iterator begin() const { return Iterator{begin_, delta_}; }
+  __device__ Iterator end() const { return Iterator{end_, 0}; }
+
+ private:
+  T begin_;
+  T delta_;
+  T end_;
+};
+
+}  // namespace detail
+
+// Helper to visit indices in the range 0 <= i < count, using the x-coordinate
+// of the global thread index. That is, each index i is visited by all threads
+// with the same x-coordinate.
+// Usage: for(int i : CudaGridRangeX(count)) { visit(i); }
+template <typename T>
+__device__ detail::CudaGridRange<T> CudaGridRangeX(T count) {
+  return detail::CudaGridRange<T>(blockIdx.x * blockDim.x + threadIdx.x,
+                                  gridDim.x * blockDim.x, count);
+}
+
+// Helper to visit indices in the range 0 <= i < count using the y-coordinate.
+// Usage: for(int i : CudaGridRangeY(count)) { visit(i); }
+template <typename T>
+__device__ detail::CudaGridRange<T> CudaGridRangeY(T count) {
+  return detail::CudaGridRange<T>(blockIdx.y * blockDim.y + threadIdx.y,
+                                  gridDim.y * blockDim.y, count);
+}
+
+// Helper to visit indices in the range 0 <= i < count using the z-coordinate.
+// Usage: for(int i : CudaGridRangeZ(count)) { visit(i); }
+template <typename T>
+__device__ detail::CudaGridRange<T> CudaGridRangeZ(T count) {
+  return detail::CudaGridRange<T>(blockIdx.z * blockDim.z + threadIdx.z,
+                                  gridDim.z * blockDim.z, count);
+}
+
+// Mask for all 32 threads in a warp.
+const unsigned kCudaWarpAll = 0xffffffff;
+
+// On sm_6x and earlier, verifies that all bits in mask corresponding to active
+// threads of the warp are set. It does not verify the converse (bits of
+// inactive threads are not set), because all syncs are unblocked when a thread
+// exits the kernel, but the ballot of inactive (including exited) threads
+// returns 0.
+__device__ inline void CudaVerifySyncMask(unsigned mask) {
+#if __CUDA_ARCH__ < 700
+  assert(0 == (__ballot(1) & ~mask));  // Active threads must have mask bit set.
+#endif
+}
+
+// For all *_sync wrappers below, it is illegal to synchronize threads from
+// different program locations, because that is not supported before sm_70.
+// Code that requires sm_70 (and CUDA 9) may use the intrinsic directly.
+
+// Wrapper for __syncwarp.
+__device__ inline void CudaSyncWarp(unsigned mask = kCudaWarpAll) {
+  CudaVerifySyncMask(mask);
+#if CUDA_VERSION >= 9000
+  __syncwarp(mask);
+#endif
+}
+
+// Wrapper for __ballot_sync.
+__device__ inline unsigned CudaBallotSync(unsigned mask, int pred) {
+  CudaVerifySyncMask(mask);
+#if CUDA_VERSION >= 9000
+  return __ballot_sync(mask, pred);
+#else
+  return __ballot(pred);
+#endif
+}
+
+// Wrapper for __any_sync.
+__device__ inline int CudaAnySync(unsigned mask, int pred) {
+  CudaVerifySyncMask(mask);
+#if CUDA_VERSION >= 9000
+  return __any_sync(mask, pred);
+#else
+  return __any(pred);
+#endif
+}
+
+// Wrapper for __all_sync.
+__device__ inline int CudaAllSync(unsigned mask, int pred) {
+  CudaVerifySyncMask(mask);
+#if CUDA_VERSION >= 9000
+  return __all_sync(mask, pred);
+#else
+  return __all(pred);
+#endif
+}
+
+// Wrapper for __shfl_sync.
+template <typename T>
+__device__ T CudaShuffleSync(unsigned mask, T value, int src_lane,
+                             int width = warpSize) {
+  CudaVerifySyncMask(mask);
+#if CUDA_VERSION >= 9000
+  return __shfl_sync(mask, value, src_lane, width);
+#else
+  return __shfl(value, src_lane, width);
+#endif
+}
+
+// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
+// instead of float for lo and hi (which is incorrect with ftz, for example).
+// See b/69446944.
+__device__ inline double CudaShuffleSync(unsigned mask, double value,
+                                         int src_lane, int width = warpSize) {
+  unsigned lo, hi;
+  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
+  hi = CudaShuffleSync(mask, hi, src_lane, width);
+  lo = CudaShuffleSync(mask, lo, src_lane, width);
+  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
+  return value;
+}
+
+// Wrapper for __shfl_up_sync.
+template <typename T>
+__device__ inline T CudaShuffleUpSync(unsigned mask, T value, int delta,
+                                      int width = warpSize) {
+  CudaVerifySyncMask(mask);
+#if CUDA_VERSION >= 9000
+  return __shfl_up_sync(mask, value, delta, width);
+#else
+  return __shfl_up(value, delta, width);
+#endif
+}
+
+// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
+// instead of float for lo and hi (which is incorrect with ftz, for example).
+// See b/69446944.
+__device__ inline double CudaShuffleUpSync(unsigned mask, double value,
+                                           int delta, int width = warpSize) {
+  unsigned lo, hi;
+  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
+  hi = CudaShuffleUpSync(mask, hi, delta, width);
+  lo = CudaShuffleUpSync(mask, lo, delta, width);
+  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
+  return value;
+}
+
+// Wrapper for __shfl_down_sync.
+template <typename T>
+__device__ inline T CudaShuffleDownSync(unsigned mask, T value, int delta,
+                                        int width = warpSize) {
+  CudaVerifySyncMask(mask);
+#if CUDA_VERSION >= 9000
+  return __shfl_down_sync(mask, value, delta, width);
+#else
+  return __shfl_down(value, delta, width);
+#endif
+}
+
+// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
+// instead of float for lo and hi (which is incorrect with ftz, for example).
+// See b/69446944.
+__device__ inline double CudaShuffleDownSync(unsigned mask, double value,
+                                             int delta, int width = warpSize) {
+  unsigned lo, hi;
+  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
+  hi = CudaShuffleDownSync(mask, hi, delta, width);
+  lo = CudaShuffleDownSync(mask, lo, delta, width);
+  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
+  return value;
+}
+
+// Wrapper for __shfl_xor_sync.
+template <typename T>
+__device__ T CudaShuffleXorSync(unsigned mask, T value, int lane_mask,
+                                int width = warpSize) {
+  CudaVerifySyncMask(mask);
+#if CUDA_VERSION >= 9000
+  return __shfl_xor_sync(mask, value, lane_mask, width);
+#else
+  return __shfl_xor(value, lane_mask, width);
+#endif
+}
+
+// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
+// instead of float for lo and hi (which is incorrect with ftz, for example).
+// See b/69446944.
+__device__ inline double CudaShuffleXorSync(unsigned mask, double value,
+                                            int lane_mask,
+                                            int width = warpSize) {
+  unsigned lo, hi;
+  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
+  hi = CudaShuffleXorSync(mask, hi, lane_mask, width);
+  lo = CudaShuffleXorSync(mask, lo, lane_mask, width);
+  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
+  return value;
+}
+
+// Wrapper for __ldg.
+template <typename T>
+__host__ __device__ T CudaLdg(const T* address) {
+#if __CUDA_ARCH__ >= 350
+  return __ldg(address);
+#else
+  return *address;
+#endif
+}
+
+__host__ __device__ inline bool CudaLdg(const bool* address) {
+  return CudaLdg(reinterpret_cast<const char*>(address)) != 0;
+}
+
+__host__ __device__ inline std::complex<float> CudaLdg(
+    const std::complex<float>* address) {
+#if __CUDA_ARCH__ >= 350
+  float2 mem = __ldg(reinterpret_cast<const float2*>(address));
+  return std::complex<float>(mem.x, mem.y);
+#else
+  return *address;
+#endif
+}
+
+__host__ __device__ inline std::complex<double> CudaLdg(
+    const std::complex<double>* address) {
+#if __CUDA_ARCH__ >= 350
+  double2 mem = __ldg(reinterpret_cast<const double2*>(address));
+  return std::complex<double>(mem.x, mem.y);
+#else
+  return *address;
+#endif
+}
+
+// Zeroes count elements starting at ptr using all threads of a 1-D grid.
+// Note: this function does not synchronize, and therefore the memory range is
+// not guaranteed to be zero until the next kernel launch.
+template <typename T>
+__global__ void SetZero(const int count, T* ptr) {
+  // Check that the grid is one dimensional and index doesn't overflow.
+  assert(blockDim.y == 1 && blockDim.z == 1);
+  assert(blockDim.x * gridDim.x / blockDim.x == gridDim.x);
+  for (int i : CudaGridRangeX(count)) {
+    ptr[i] = T(0);
+  }
+}
+
+namespace detail {
+// Helper function for atomic accumulation implemented as CAS.
+template <typename T, typename F>
+__device__ T CudaAtomicCasHelper(T* ptr, F accumulate) {
+  T old = *ptr;
+  T assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(ptr, assumed, accumulate(assumed));
+  } while (assumed != old);
+  return old;
+}
+
+// Overload for floating point (using integer comparison to handle NaN
+// correctly).
+template <typename F>
+__device__ float CudaAtomicCasHelper(float* ptr, F accumulate) {
+  return __float_as_int(
+      CudaAtomicCasHelper(reinterpret_cast<int32*>(ptr), [accumulate](int32 a) {
+        return __float_as_int(accumulate(__int_as_float(a)));
+      }));
+}
+template <typename F>
+__device__ double CudaAtomicCasHelper(double* ptr, F accumulate) {
+  return __longlong_as_double(CudaAtomicCasHelper(
+      reinterpret_cast<tensorflow::uint64*>(ptr),
+      [accumulate](tensorflow::uint64 a) {
+        return __double_as_longlong(accumulate(__longlong_as_double(a)));
+      }));
+}
+}  // namespace detail
+
+// CUDA provides atomic ops, but not for all types.  We provide wrappers
+// for some ops and provide implementation for all reasonable types.
+
+template <typename T>
+__device__ T CudaAtomicAdd(T* ptr, T value) {
+  return atomicAdd(ptr, value);
+}
+#if __CUDA_ARCH__ < 600
+__device__ inline double CudaAtomicAdd(double* ptr, double value) {
+  return detail::CudaAtomicCasHelper(ptr,
+                                     [value](double a) { return a + value; });
+}
+#elif __clang__
+// Clang cannot compile __nvvm_atom_add_gen_d builtin yet, use inline PTX.
+// see https://reviews.llvm.org/D39638
+__device__ inline double CudaAtomicAdd(double* ptr, double value) {
+  double result;
+  asm volatile("atom.add.f64 %0, [%1], %2;"
+               : "=d"(result)
+               : "l"(ptr), "d"(value)
+               : "memory");
+  return result;
+}
+#endif
+
+template <typename T>
+__device__ T CudaAtomicSub(T* ptr, T value) {
+  return atomicSub(ptr, value);
+}
+// Specializations of substraction which add the negative value.
+__device__ inline float CudaAtomicSub(float* ptr, float value) {
+  return CudaAtomicAdd(ptr, -value);
+}
+__device__ inline double CudaAtomicSub(double* ptr, double value) {
+  return CudaAtomicAdd(ptr, -value);
+}
+__device__ inline tensorflow::uint64 CudaAtomicSub(tensorflow::uint64* ptr,
+                                                   tensorflow::uint64 value) {
+  return CudaAtomicAdd(ptr, -value);
+}
+
+template <typename T>
+__device__ T CudaAtomicMax(T* ptr, T value) {
+  return atomicMax(ptr, value);
+}
+#if __CUDA_ARCH__ < 320
+__device__ inline tensorflow::uint64 CudaAtomicMax(tensorflow::uint64* ptr,
+                                                   tensorflow::uint64 value) {
+  return detail::CudaAtomicCasHelper(
+      ptr, [value](tensorflow::uint64 a) { return max(a, value); });
+}
+#endif
+
+template <typename T>
+__device__ inline T CudaAtomicMul(T* ptr, T value) {
+  return detail::CudaAtomicCasHelper(ptr, [value](T a) { return a * value; });
+}
+template <typename T>
+__device__ inline T CudaAtomicDiv(T* ptr, T value) {
+  return detail::CudaAtomicCasHelper(ptr, [value](T a) { return a / value; });
+}
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+#endif  // TENSORFLOW_CORE_UTIL_CUDA_KERNEL_HELPER_H_
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index cf11f419a4..b71218d73c 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -18,299 +18,125 @@ limitations under the License.
 
 #if GOOGLE_CUDA
 
-#include <algorithm>
+#include "tensorflow/core/util/cuda_device_functions.h"
+#include "tensorflow/core/util/cuda_launch_config.h"
 
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "cuda/include/cuda.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/stream_executor.h"
-#include "tensorflow/core/platform/types.h"
+// Deprecated, use 'for(int i : CudaGridRangeX(n))' instead.
+#define CUDA_1D_KERNEL_LOOP(i, n) \
+  for (int i : ::tensorflow::CudaGridRangeX<int>(n))
+// Deprecated, use 'for(int i : CudaGridRange?(n))' instead.
+#define CUDA_AXIS_KERNEL_LOOP(i, n, axis) \
+  for (int i : ::tensorflow::CudaGridRange##axis<int>(n))
 
-// Mask for all 32 threads in a warp.
-#define CUDA_WARP_ALL 0xFFFFFFFF
-
-#if defined(CUDA_VERSION) && CUDA_VERSION < 9000
-// CUDA 9.0 introduces a new, light-weight barrier synchronization primitive
-// that operates at the warp-scope. This is required to ensure visibility of
-// reads/writes among threads that can make indepenent progress on Volta.
-// For previous CUDA versions these synchronizations not necessary, and we
-// define an empty function as a convenience for backward compatibility.
-__device__ inline void __syncwarp(unsigned mask = CUDA_WARP_ALL) {}
-
-// CUDA 9.0 deprecates the warp-intrinsic functions (shfl, ballot, etc.) in
-// favor of synchronizing versions. These ensure that all warp lanes specified
-// in mask execute the intrinsic in convergence. Here we provide legacy mappings
-// to the less-verbose routines provided in previous versions of CUDA.
-#define __ballot_sync(mask, predicate) __ballot(predicate)
-#define __shfl_sync(mask, val, srcLane, width) __shfl(val, srcLane, width)
-#define __shfl_down_sync(mask, val, delta, width) __shfl_down(val, delta, width)
-#define __shfl_up_sync(mask, val, delta, width) __shfl_up(val, delta, width)
-#define __shfl_xor_sync(mask, val, laneMask, width) \
-  __shfl_xor(val, laneMask, width)
-#endif
-
-// Usage of GetCudaLaunchConfig, GetCuda2DLaunchConfig, and
-// GetCuda3DLaunchConfig:
-//
-// There are two versions of GetCudaLaunchConfig and GetCuda2DLaunchConfig, one
-// version uses heuristics without any knowledge of the device kernel, the other
-// version uses cudaOccupancyMaxPotentialBlockSize to determine the theoretical
-// launch parameters that maximize occupancy. Currently, only the maximum
-// occupancy version of GetCuda3DLaunchConfig is available.
-//
-// For large number of work elements, the convention is that each kernel would
-// iterate through its assigned range. The return value of GetCudaLaunchConfig
-// is struct CudaLaunchConfig, which contains all the information needed for the
-// kernel launch, including: virtual number of threads, the number of threads
-// per block and number of threads per block used inside <<< >>> of a kernel
-// launch. GetCuda2DLaunchConfig and GetCuda3DLaunchConfig does the same thing
-// as CudaLaunchConfig. The only difference is the dimension. The macros
-// CUDA_1D_KERNEL_LOOP and CUDA_AXIS_KERNEL_LOOP might be used to do inner loop.
-//
-/* Sample code:
-
-__global__ void MyKernel1D(CudaLaunchConfig config, other_args...) {
-  CUDA_1D_KERNEL_LOOP(x, config.virtual_thread_count) {
-    do_your_job_here;
-  }
+namespace tensorflow {
+template <typename T>
+__host__ __device__ inline T ldg(const T* ptr) {
+  return CudaLdg(ptr);
 }
 
-__global__ void MyKernel2D(Cuda2DLaunchConfig config, other_args...) {
-  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
-    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
-      do_your_job_here;
-    }
-  }
+template <typename T>
+__host__ __device__ inline const T& tf_min(const T& x, const T& y) {
+  return x < y ? x : y;
 }
 
-__global__ void MyKernel3D(Cuda3DLaunchConfig config, other_args...) {
-  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
-    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
-      CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count, z) {
-        do_your_job_here;
-      }
-    }
-  }
+template <typename T>
+__host__ __device__ inline const T& tf_max(const T& x, const T& y) {
+  return x < y ? y : x;
 }
 
-void MyDriverFunc(const GPUDevice &d) {
-  // use heuristics
-  CudaLaunchConfig cfg1 = GetCudaLaunchConfig(10240, d);
-  MyKernel1D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg1, other_args...);
-  Cuda2DLaunchConfig cfg2 = GetCuda2DLaunchConfig(10240, 10240, d);
-  MyKernel2D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg2, other_args...);
-  Cuda3DLaunchConfig cfg3 = GetCuda3DLaunchConfig(4096, 4096, 100, d);
-  MyKernel3D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg3, other_args...);
-
-  // maximize occupancy
-  CudaLaunchConfig cfg4 = GetCudaLaunchConfig(10240, d, MyKernel1D, 0, 0 );
-  MyKernel1D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg4, other_args...);
-  Cuda2DLaunchConfig cfg5 = GetCuda2DLaunchConfig(10240, 10240, d,
-                                                  MyKernel1D, 0, 0);
-  MyKernel2D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg5, other_args...);
-  Cuda3DLaunchConfig cfg6 = GetCuda3DLaunchConfig(4096, 4096, 100, d,
-                                                  MyKernel1D, 0, 0);
-  MyKernel3D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg6, other_args...);
+// Overloads of the above functions for float and double.
+__host__ __device__ inline float tf_min(float x, float y) {
+  return fminf(x, y);
 }
-
-// See the test for this for more example:
-//
-https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
-
-*/
-
-#define CUDA_1D_KERNEL_LOOP(i, n)                            \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
-       i += blockDim.x * gridDim.x)
-
-#define CUDA_AXIS_KERNEL_LOOP(i, n, axis)                                  \
-  for (int i = blockIdx.axis * blockDim.axis + threadIdx.axis; i < n.axis; \
-       i += blockDim.axis * gridDim.axis)
-
-#define DIV_UP(a, b) (((a) + (b)-1) / (b))
-
-namespace tensorflow {
-
-typedef Eigen::GpuDevice GPUDevice;
-
-struct CudaLaunchConfig {
-  // Logical number of thread that works on the elements. If each logical
-  // thread works on exactly a single element, this is the same as the working
-  // element count.
-  int virtual_thread_count = -1;
-  // Number of threads per block.
-  int thread_per_block = -1;
-  // Number of blocks for Cuda kernel launch.
-  int block_count = -1;
-};
-
-// Calculate the Cuda launch config we should use for a kernel launch.
-// This is assuming the kernel is quite simple and will largely be
-// memory-limited.
-// REQUIRES: work_element_count > 0.
-inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
-                                            const GPUDevice& d) {
-  CHECK_GT(work_element_count, 0);
-  CudaLaunchConfig config;
-  const int virtual_thread_count = work_element_count;
-  const int physical_thread_count = std::min(
-      d.getNumCudaMultiProcessors() * d.maxCudaThreadsPerMultiProcessor(),
-      virtual_thread_count);
-  const int thread_per_block = std::min(1024, d.maxCudaThreadsPerBlock());
-  const int block_count =
-      std::min(DIV_UP(physical_thread_count, thread_per_block),
-               d.getNumCudaMultiProcessors());
-
-  config.virtual_thread_count = virtual_thread_count;
-  config.thread_per_block = thread_per_block;
-  config.block_count = block_count;
-  return config;
+__host__ __device__ inline double tf_min(double x, double y) {
+  return fmin(x, y);
 }
-
-// Calculate the Cuda launch config we should use for a kernel launch. This
-// variant takes the resource limits of func into account to maximize occupancy.
-// REQUIRES: work_element_count > 0.
-template <typename DeviceFunc>
-inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
-                                            const GPUDevice& d, DeviceFunc func,
-                                            size_t dynamic_shared_memory_size,
-                                            int block_size_limit) {
-  CHECK_GT(work_element_count, 0);
-  CudaLaunchConfig config;
-  int block_count = 0;
-  int thread_per_block = 0;
-
-  cudaError_t err = cudaOccupancyMaxPotentialBlockSize(
-      &block_count, &thread_per_block, func, dynamic_shared_memory_size,
-      block_size_limit);
-  CHECK_EQ(err, cudaSuccess);
-
-  block_count =
-      std::min(block_count, DIV_UP(work_element_count, thread_per_block));
-
-  config.virtual_thread_count = work_element_count;
-  config.thread_per_block = thread_per_block;
-  config.block_count = block_count;
-  return config;
+__host__ __device__ inline float tf_max(float x, float y) {
+  return fmaxf(x, y);
+}
+__host__ __device__ inline double tf_max(double x, double y) {
+  return fmax(x, y);
 }
 
-struct Cuda2DLaunchConfig {
-  dim3 virtual_thread_count = dim3(0, 0, 0);
-  dim3 thread_per_block = dim3(0, 0, 0);
-  dim3 block_count = dim3(0, 0, 0);
-};
-
-inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
-                                                const GPUDevice& d) {
-  Cuda2DLaunchConfig config;
-
-  if (xdim <= 0 || ydim <= 0) {
-    return config;
-  }
-
-  const int kThreadsPerBlock = 256;
-  int block_cols = std::min(xdim, kThreadsPerBlock);
-  // ok to round down here and just do more loops in the kernel
-  int block_rows = std::max(kThreadsPerBlock / block_cols, 1);
-
-  const int physical_thread_count =
-      d.getNumCudaMultiProcessors() * d.maxCudaThreadsPerMultiProcessor();
-
-  const int max_blocks = std::max(physical_thread_count / kThreadsPerBlock, 1);
-
-  config.virtual_thread_count = dim3(xdim, ydim, 1);
-  config.thread_per_block = dim3(block_cols, block_rows, 1);
-
-  int grid_x = std::min(DIV_UP(xdim, block_cols), max_blocks);
+__device__ inline Eigen::half CudaShuffleSync(unsigned mask, Eigen::half value,
+                                              int src_lane,
+                                              int width = warpSize) {
+  return Eigen::half(
+      CudaShuffleSync(mask, static_cast<uint16>(value), src_lane, width));
+}
 
-  config.block_count = dim3(
-      grid_x, std::min(max_blocks / grid_x, std::max(ydim / block_rows, 1)), 1);
-  return config;
+__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleUpSync(
+    unsigned mask, Eigen::half value, int delta, int width = warpSize) {
+  return Eigen::half(
+      CudaShuffleUpSync(mask, static_cast<uint16>(value), delta, width));
 }
 
-// Calculate the Cuda 2D and 3D launch config we should use for a kernel launch.
-// This variant takes the resource limits of func into account to maximize
-// occupancy.
-using Cuda3DLaunchConfig = Cuda2DLaunchConfig;
+__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDownSync(
+    unsigned mask, Eigen::half value, int delta, int width = warpSize) {
+  return Eigen::half(
+      CudaShuffleDownSync(mask, static_cast<uint16>(value), delta, width));
+}
 
-template <typename DeviceFunc>
-inline Cuda3DLaunchConfig GetCuda3DLaunchConfig(
-    int xdim, int ydim, int zdim, const GPUDevice& d, DeviceFunc func,
-    size_t dynamic_shared_memory_size, int block_size_limit) {
-  Cuda3DLaunchConfig config;
+__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXorSync(
+    unsigned mask, Eigen::half value, int lane_mask, int width = warpSize) {
+  return Eigen::half(
+      CudaShuffleXorSync(mask, static_cast<uint16>(value), lane_mask, width));
+}
 
-  if (xdim <= 0 || ydim <= 0 || zdim <= 0) {
-    return config;
+namespace detail {
+// Overload of above function for half. Note that we don't have
+// atomicCAS() for anything less than 32 bits, so we need to include the
+// other 16 bits in the operation.
+//
+// This version is going to be very slow
+// under high concurrency, since most threads will be spinning on failing
+// their compare-and-swap tests. (The fact that we get false sharing on the
+// neighboring fp16 makes this even worse.) If you are doing a large reduction,
+// you are much better off with doing the intermediate steps in fp32 and then
+// switching to fp16 as late as you can in the calculations.
+//
+// Note: Assumes little endian.
+template <typename F>
+__device__ Eigen::half CudaAtomicCasHelper(Eigen::half* ptr, F accumulate) {
+  namespace half_impl = Eigen::half_impl;
+  intptr_t intptr = reinterpret_cast<intptr_t>(ptr);
+  if (intptr & 0x3) {
+    assert(!(intptr & 0x1));
+    // The half is in the second part of the uint32 (upper 16 bits).
+    uint32* address = reinterpret_cast<uint32*>(intptr - 2);
+    uint32 result = CudaAtomicCasHelper(address, [accumulate](uint32 a) {
+      Eigen::half acc = accumulate(
+          half_impl::__half_raw{static_cast<unsigned short>(a >> 16)});
+      uint32_t upper = static_cast<half_impl::__half_raw>(acc).x;
+      return (upper << 16) | (a & 0xffff);
+    });
+    return half_impl::__half_raw{static_cast<uint16>(result >> 16)};
+  } else {
+    // The half is in the first part of the uint32 (lower 16 bits).
+    uint32* address = reinterpret_cast<uint32*>(intptr);
+    uint32 result = CudaAtomicCasHelper(address, [accumulate](uint32 a) {
+      Eigen::half acc = accumulate(
+          half_impl::__half_raw{static_cast<unsigned short>(a & 0xffff)});
+      uint32_t lower = static_cast<half_impl::__half_raw>(acc).x;
+      return (a & 0xffff0000) | lower;
+    });
+    return half_impl::__half_raw{static_cast<uint16>(result & 0xffff)};
   }
-
-  int dev;
-  cudaGetDevice(&dev);
-  cudaDeviceProp deviceProp;
-  cudaGetDeviceProperties(&deviceProp, dev);
-  int xthreadlimit = deviceProp.maxThreadsDim[0];
-  int ythreadlimit = deviceProp.maxThreadsDim[1];
-  int zthreadlimit = deviceProp.maxThreadsDim[2];
-  int xgridlimit = deviceProp.maxGridSize[0];
-  int ygridlimit = deviceProp.maxGridSize[1];
-  int zgridlimit = deviceProp.maxGridSize[2];
-
-  int block_count = 0;
-  int thread_per_block = 0;
-  cudaError_t err = cudaOccupancyMaxPotentialBlockSize(
-      &block_count, &thread_per_block, func, dynamic_shared_memory_size,
-      block_size_limit);
-  CHECK_EQ(err, cudaSuccess);
-
-#define MIN3(a, b, c) std::min((a), std::min((b), (c)))
-  int threadsx = MIN3(xdim, thread_per_block, xthreadlimit);
-  int threadsy =
-      MIN3(ydim, std::max(thread_per_block / threadsx, 1), ythreadlimit);
-  int threadsz =
-      MIN3(zdim, std::max(thread_per_block / (threadsx * threadsy), 1),
-           zthreadlimit);
-
-  int blocksx = MIN3(block_count, DIV_UP(xdim, threadsx), xgridlimit);
-  int blocksy =
-      MIN3(DIV_UP(block_count, blocksx), DIV_UP(ydim, threadsy), ygridlimit);
-  int blocksz = MIN3(DIV_UP(block_count, (blocksx * blocksy)),
-                     DIV_UP(zdim, threadsz), zgridlimit);
-#undef MIN3
-
-  config.virtual_thread_count = dim3(xdim, ydim, zdim);
-  config.thread_per_block = dim3(threadsx, threadsy, threadsz);
-  config.block_count = dim3(blocksx, blocksy, blocksz);
-  return config;
 }
+}  // namespace detail
 
-template <typename DeviceFunc>
-inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(
-    int xdim, int ydim, const GPUDevice& d, DeviceFunc func,
-    size_t dynamic_shared_memory_size, int block_size_limit) {
-  return GetCuda3DLaunchConfig(xdim, ydim, 1, d, func,
-                               dynamic_shared_memory_size, block_size_limit);
+__device__ inline Eigen::half CudaAtomicAdd(Eigen::half* ptr,
+                                            Eigen::half value) {
+  return detail::CudaAtomicCasHelper(
+      ptr, [value](Eigen::half a) { return a + value; });
 }
-
-// Returns a raw reference to the current cuda stream.  Required by a
-// number of kernel calls (for which StreamInterface* does not work), i.e.
-// CUB and certain cublas primitives.
-inline const cudaStream_t& GetCudaStream(OpKernelContext* context) {
-  const cudaStream_t* ptr = CHECK_NOTNULL(
-      reinterpret_cast<const cudaStream_t*>(context->op_device_context()
-                                                ->stream()
-                                                ->implementation()
-                                                ->CudaStreamMemberHack()));
-  return *ptr;
+__device__ inline Eigen::half CudaAtomicSub(Eigen::half* ptr,
+                                            Eigen::half value) {
+  return detail::CudaAtomicCasHelper(
+      ptr, [value](Eigen::half a) { return a - value; });
 }
 
 namespace cuda_helper {
-
 template <typename IntType>
 __device__ IntType upper_bound(IntType* first, IntType count, IntType val) {
   IntType* orig = first;
@@ -330,481 +156,8 @@ __device__ IntType upper_bound(IntType* first, IntType count, IntType val) {
 
   return first - orig;
 }
-
 }  // namespace cuda_helper
-
-template <typename T>
-__device__ __host__ inline T ldg(const T* address) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
-  return __ldg(address);
-#else
-  return *address;
-#endif
-}
-
-template <>
-__device__ __host__ inline std::complex<float> ldg(
-    const std::complex<float>* address) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
-  float2 mem = __ldg(reinterpret_cast<const float2*>(address));
-  return std::complex<float>(mem.x, mem.y);
-#else
-  return *address;
-#endif
-}
-
-template <>
-__device__ __host__ inline std::complex<double> ldg(
-    const std::complex<double>* address) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
-  double2 mem = __ldg(reinterpret_cast<const double2*>(address));
-  return std::complex<double>(mem.x, mem.y);
-#else
-  return *address;
-#endif
-}
-
-template <>
-__device__ __host__ inline Eigen::half ldg(const Eigen::half* address) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
-  return Eigen::half_impl::raw_uint16_to_half(
-      __ldg(reinterpret_cast<const uint16_t*>(address)));
-#else
-  return *address;
-#endif
-}
-
-template <>
-__device__ __host__ inline bool ldg(const bool* address) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
-  return *reinterpret_cast<const bool*>(
-      __ldg(reinterpret_cast<const char*>(address)));
-#else
-  return *address;
-#endif
-}
-
-// CUDA provides atomic ops, but not for all types.  We provide wrappers
-// for some ops and provide implementation for all reasonable types.
-#define CUDA_ATOMIC_WRAPPER(op, T) \
-  __device__ __forceinline__ T CudaAtomic##op(T* address, T val)
-
-#define USE_CUDA_ATOMIC(op, T) \
-  CUDA_ATOMIC_WRAPPER(op, T) { return atomic##op(address, val); }
-
-// For atomicAdd.
-USE_CUDA_ATOMIC(Add, int32);
-USE_CUDA_ATOMIC(Add, uint32);
-USE_CUDA_ATOMIC(Add, uint64);
-USE_CUDA_ATOMIC(Add, float);
-
-// For atomicMax.
-USE_CUDA_ATOMIC(Max, int32);
-USE_CUDA_ATOMIC(Max, uint32);
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
-USE_CUDA_ATOMIC(Max, uint64);
-#else
-// The uint64 overload of atomicMax() is only available for __CUDA_ARCH__ >=
-// 350.  If not satisfied, we provide a custom implementation using atomicCAS().
-CUDA_ATOMIC_WRAPPER(Max, uint64) {
-  uint64* address_as_ull = reinterpret_cast<uint64*>(address);
-  uint64 old = *address_as_ull, assumed;
-
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_ull, assumed, max(val, assumed));
-  } while (assumed != old);
-
-  return old;
-}
-#endif
-
-// Custom implementation of atomicAdd for double.
-// This implementation is copied from CUDA manual.
-CUDA_ATOMIC_WRAPPER(Add, double) {
-  uint64* address_as_ull = reinterpret_cast<uint64*>(address);
-  uint64 old = *address_as_ull, assumed;
-
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_ull, assumed,
-                    __double_as_longlong(val + __longlong_as_double(assumed)));
-
-    // Note: uses integer comparison to avoid hang in case of NaN
-  } while (assumed != old);
-
-  return __longlong_as_double(old);
-}
-
-// Custom implementation of atomicAdd for std::complex<float>.
-// This implementation performs to atomic additions on the components.
-CUDA_ATOMIC_WRAPPER(Add, std::complex<float>) {
-#if defined(__CUDA_ARCH__)
-#if __CUDA_ARCH__ >= 350
-  float2* addr_as_float2 = reinterpret_cast<float2*>(address);
-  float2* val_as_float2 = reinterpret_cast<float2*>(&val);
-  CudaAtomicAdd(&(addr_as_float2->x), val_as_float2->x);
-  CudaAtomicAdd(&(addr_as_float2->y), val_as_float2->y);
-#else
-  static_assert(sizeof(std::complex<float>) == 2 * sizeof(float),
-                "Unable to compile CudaAtomicAdd for complex64 because "
-                "sizeof(complex64) != 2*sizeof(float32)");
-  float* addr_as_float = reinterpret_cast<float*>(address);
-  float* val_as_float = reinterpret_cast<float*>(&val);
-  CudaAtomicAdd(addr_as_float, *val_as_float);
-  CudaAtomicAdd(addr_as_float + 1, *(val_as_float + 1));
-#endif
-#endif
-  return *address;
-}
-
-// Custom implementation of atomicAdd for std::complex<double>.
-// This implementation performs to atomic additions on the components
-// using the double atomic wrapper above.
-CUDA_ATOMIC_WRAPPER(Add, complex128) {
-#if defined(__CUDA_ARCH__)
-#if __CUDA_ARCH__ >= 350
-  double2* addr_as_double2 = reinterpret_cast<double2*>(address);
-  double2* val_as_double2 = reinterpret_cast<double2*>(&val);
-  CudaAtomicAdd(&(addr_as_double2->x), val_as_double2->x);
-  CudaAtomicAdd(&(addr_as_double2->y), val_as_double2->y);
-#else
-  static_assert(sizeof(std::complex<double>) == 2 * sizeof(double),
-                "Unable to compile CudaAtomicAdd for complex128 because "
-                "sizeof(complex128) != 2*sizeof(float64)");
-  double* addr_as_double = reinterpret_cast<double*>(address);
-  double* val_as_double = reinterpret_cast<double*>(&val);
-  CudaAtomicAdd(addr_as_double, *val_as_double);
-  CudaAtomicAdd(addr_as_double + 1, *(val_as_double + 1));
-#endif
-#endif
-  return *address;
-}
-
-// Helper functions for CudaAtomicAdd(half*, half), below.
-//
-// Note that if __CUDA_ARCH__ >= 530, we could probably use __hadd2()
-// for a more efficient implementation, assuming that adding -0.0
-// will never harm the neighboring value. In this version, we take special
-// care to guarantee the bits of the untouched value are unchanged.
-inline __device__ uint32 add_to_low_half(uint32 val, float x) {
-  Eigen::half low_half;
-  low_half.x = static_cast<uint16>(val & 0xffffu);
-  low_half = static_cast<Eigen::half>(static_cast<float>(low_half) + x);
-  return (val & 0xffff0000u) | low_half.x;
-}
-
-inline __device__ uint32 add_to_high_half(uint32 val, float x) {
-  Eigen::half high_half;
-  high_half.x = static_cast<uint16>(val >> 16);
-  high_half = static_cast<Eigen::half>(static_cast<float>(high_half) + x);
-  return (val & 0xffffu) | (high_half.x << 16);
-}
-
-// Custom implementation of atomicAdd for half. Note that we don't have
-// atomicCAS() for anything less than 32 bits, so we need to include the
-// other 16 bits in the operation.
-//
-// Unlike the other atomic adds, this version is going to be very slow
-// under high concurrency, since most threads will be spinning on failing
-// their compare-and-swap tests. (The fact that we get false sharing on the
-// neighboring fp16 makes this even worse.) If you are doing a large reduction,
-// you are much better off with doing the intermediate steps in fp32 and then
-// switching to fp16 as late as you can in the calculations.
-//
-// Note: Assumes little endian.
-CUDA_ATOMIC_WRAPPER(Add, Eigen::half) {
-  float val_as_float(val);
-  intptr_t address_int = reinterpret_cast<intptr_t>(address);
-  if ((address_int & 0x2) == 0) {
-    // The half is in the first part of the uint32 (lower 16 bits).
-    uint32* address_as_uint32 = reinterpret_cast<uint32*>(address);
-    assert(((intptr_t)address_as_uint32 & 0x3) == 0);
-    uint32 old = *address_as_uint32, assumed;
-
-    do {
-      assumed = old;
-      old = atomicCAS(address_as_uint32, assumed,
-                      add_to_low_half(assumed, val_as_float));
-
-      // Note: uses integer comparison to avoid hang in case of NaN
-    } while (assumed != old);
-
-    Eigen::half ret;
-    ret.x = old & 0xffffu;
-    return ret;
-  } else {
-    // The half is in the second part of the uint32 (upper 16 bits).
-    uint32* address_as_uint32 = reinterpret_cast<uint32*>(address_int - 2);
-    assert(((intptr_t)address_as_uint32 & 0x3) == 0);
-    uint32 old = *address_as_uint32, assumed;
-
-    do {
-      assumed = old;
-      old = atomicCAS(address_as_uint32, assumed,
-                      add_to_high_half(assumed, val_as_float));
-
-      // Note: uses integer comparison to avoid hang in case of NaN
-    } while (assumed != old);
-
-    Eigen::half ret;
-    ret.x = old >> 16;
-    return ret;
-  }
-}
-
-template <typename T>
-__global__ void SetZero(const int nthreads, T* bottom_diff) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) { *(bottom_diff + index) = T(0); }
-}
-
-// For atomicSub.
-
-// Custom implementation for sub by just negating the value.
-#define WRAPPED_ATOMIC_SUB(T) \
-  CUDA_ATOMIC_WRAPPER(Sub, T) { return CudaAtomicAdd(address, -val); }
-
-WRAPPED_ATOMIC_SUB(uint64);
-WRAPPED_ATOMIC_SUB(int32);
-WRAPPED_ATOMIC_SUB(uint32);
-WRAPPED_ATOMIC_SUB(Eigen::half);
-WRAPPED_ATOMIC_SUB(float);
-WRAPPED_ATOMIC_SUB(double);
-
-CUDA_ATOMIC_WRAPPER(Sub, complex64) {
-  const std::complex<float> Tneg(-val.real(), -val.imag());
-  return CudaAtomicAdd(address, Tneg);
-}
-
-CUDA_ATOMIC_WRAPPER(Sub, complex128) {
-  const std::complex<double> Tneg(-val.real(), -val.imag());
-  return CudaAtomicAdd(address, Tneg);
-}
-
-#undef WRAPPED_ATOMIC_SUB
-
-// For atomicMul.
-CUDA_ATOMIC_WRAPPER(Mul, int32) {
-  int32 old = *address, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address, assumed, val * assumed);
-  } while (assumed != old);
-  return old;
-}
-
-CUDA_ATOMIC_WRAPPER(Mul, uint32) {
-  uint32 old = *address, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address, assumed, val * assumed);
-  } while (assumed != old);
-  return old;
-}
-
-CUDA_ATOMIC_WRAPPER(Mul, uint64) {
-  uint64 old = *address, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address, assumed, val * assumed);
-  } while (assumed != old);
-  return old;
-}
-
-CUDA_ATOMIC_WRAPPER(Mul, float) {
-  int32* address_as_int = reinterpret_cast<int32*>(address);
-  int32 old = *address_as_int, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_int, assumed,
-                    __float_as_int(val * __int_as_float(assumed)));
-  } while (assumed != old);
-  return __int_as_float(old);
-}
-
-CUDA_ATOMIC_WRAPPER(Mul, double) {
-  uint64* address_as_ull = reinterpret_cast<uint64*>(address);
-  uint64 old = *address_as_ull, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_ull, assumed,
-                    __double_as_longlong(val * __longlong_as_double(assumed)));
-  } while (assumed != old);
-  return __longlong_as_double(old);
-}
-
-// For atomicDiv.
-CUDA_ATOMIC_WRAPPER(Div, int32) {
-  int32 old = *address, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address, assumed, assumed / val);
-  } while (assumed != old);
-  return old;
-}
-
-CUDA_ATOMIC_WRAPPER(Div, uint32) {
-  uint32 old = *address, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address, assumed, assumed / val);
-  } while (assumed != old);
-  return old;
-}
-
-CUDA_ATOMIC_WRAPPER(Div, uint64) {
-  uint64 old = *address, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address, assumed, assumed / val);
-  } while (assumed != old);
-  return old;
-}
-
-CUDA_ATOMIC_WRAPPER(Div, float) {
-  int32* address_as_int = reinterpret_cast<int32*>(address);
-  int32 old = *address_as_int, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_int, assumed,
-                    __float_as_int(__int_as_float(assumed) / val));
-  } while (assumed != old);
-  return __int_as_float(old);
-}
-
-CUDA_ATOMIC_WRAPPER(Div, double) {
-  uint64* address_as_ull = reinterpret_cast<uint64*>(address);
-  uint64 old = *address_as_ull, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_ull, assumed,
-                    __double_as_longlong(__longlong_as_double(assumed) / val));
-  } while (assumed != old);
-  return __longlong_as_double(old);
-}
-
-#undef USE_CUDA_ATOMIC
-#undef CUDA_ATOMIC_WRAPPER
-
-template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tf_min(const T& x, const T& y) {
-  return x > y ? y : x;
-}
-
-template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tf_max(const T& x, const T& y) {
-  return x < y ? y : x;
-}
-
-__device__ EIGEN_ALWAYS_INLINE unsigned CudaBallot(unsigned mask,
-                                                   int predicate) {
-  return __ballot_sync(mask, predicate);
-}
-
-template <typename T>
-__device__ EIGEN_ALWAYS_INLINE T CudaShuffle(unsigned mask, T value,
-                                             int srcLane,
-                                             int width = warpSize) {
-  return __shfl_sync(mask, value, srcLane, width);
-}
-
-// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
-// instead of float for lo and hi (which is incorrect with ftz, for example).
-// A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
-// TODO(csigg): remove when the bug is fixed in the next CUDA release.
-__device__ EIGEN_ALWAYS_INLINE double CudaShuffle(unsigned mask, double value,
-                                                  int srcLane,
-                                                  int width = warpSize) {
-  unsigned lo, hi;
-  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
-  hi = __shfl_sync(mask, hi, srcLane, width);
-  lo = __shfl_sync(mask, lo, srcLane, width);
-  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
-  return value;
-}
-
-template <typename T>
-__device__ EIGEN_ALWAYS_INLINE T CudaShuffleUp(unsigned mask, T value,
-                                               int delta,
-                                               int width = warpSize) {
-  return __shfl_up_sync(mask, value, delta, width);
-}
-
-// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
-// instead of float for lo and hi (which is incorrect with ftz, for example).
-// A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
-// TODO(csigg): remove when the bug is fixed in the next CUDA release.
-__device__ EIGEN_ALWAYS_INLINE double CudaShuffleUp(unsigned mask, double value,
-                                                    int delta,
-                                                    int width = warpSize) {
-  unsigned lo, hi;
-  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
-  hi = __shfl_up_sync(mask, hi, delta, width);
-  lo = __shfl_up_sync(mask, lo, delta, width);
-  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
-  return value;
-}
-
-template <typename T>
-__device__ EIGEN_ALWAYS_INLINE T CudaShuffleDown(unsigned mask, T value,
-                                                 int delta,
-                                                 int width = warpSize) {
-  return __shfl_down_sync(mask, value, delta, width);
-}
-
-__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDown(
-    unsigned mask, Eigen::half value, int delta, int width = warpSize) {
-  return Eigen::half(
-      __shfl_down_sync(mask, static_cast<uint16>(value), delta, width));
-}
-
-// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
-// instead of float for lo and hi (which is incorrect with ftz, for example).
-// A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
-// TODO(csigg): remove when the bug is fixed in the next CUDA release.
-__device__ EIGEN_ALWAYS_INLINE double CudaShuffleDown(unsigned mask,
-                                                      double value, int delta,
-                                                      int width = warpSize) {
-  unsigned lo, hi;
-  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
-  hi = __shfl_down_sync(mask, hi, delta, width);
-  lo = __shfl_down_sync(mask, lo, delta, width);
-  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
-  return value;
-}
-
-template <typename T>
-__device__ EIGEN_ALWAYS_INLINE T CudaShuffleXor(unsigned mask, T value,
-                                                int laneMask,
-                                                int width = warpSize) {
-  return __shfl_xor_sync(mask, value, laneMask, width);
-}
-
-__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXor(
-    unsigned mask, Eigen::half value, int laneMask, int width = warpSize) {
-  return Eigen::half(
-      __shfl_xor_sync(mask, static_cast<uint16>(value), laneMask, width));
-}
-
-// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
-// instead of float for lo and hi (which is incorrect with ftz, for example).
-// A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
-// TODO(csigg): remove when the bug is fixed in the next CUDA release.
-__device__ EIGEN_ALWAYS_INLINE double CudaShuffleXor(unsigned mask,
-                                                     double value, int laneMask,
-                                                     int width = warpSize) {
-  unsigned lo, hi;
-  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
-  hi = __shfl_xor_sync(mask, hi, laneMask, width);
-  lo = __shfl_xor_sync(mask, lo, laneMask, width);
-  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
-  return value;
-}
-
 }  // namespace tensorflow
 
-#undef DIV_UP
-
 #endif  // GOOGLE_CUDA
-
 #endif  // TENSORFLOW_CORE_UTIL_CUDA_KERNEL_HELPER_H_
diff --git a/tensorflow/core/util/cuda_kernel_helper_test.cu.cc b/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
index 6991554eff..4eb1558e58 100644
--- a/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
+++ b/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
@@ -52,11 +52,11 @@ __global__ void Count1D(CudaLaunchConfig config, int bufsize, int* outbuf) {
   }
 }
 __global__ void Count2D(Cuda2DLaunchConfig config, int bufsize, int* outbuf) {
-  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
+  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count.x, X) {
     if (x < 0) {  // x might overflow when testing extreme case
       break;
     }
-    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
+    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count.y, Y) {
       if (y < 0) {  // y might overflow when testing extreme case
         break;
       }
@@ -66,15 +66,15 @@ __global__ void Count2D(Cuda2DLaunchConfig config, int bufsize, int* outbuf) {
   }
 }
 __global__ void Count3D(Cuda3DLaunchConfig config, int bufsize, int* outbuf) {
-  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
+  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count.x, X) {
     if (x < 0) {  // x might overflow when testing extreme case
       break;
     }
-    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
+    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count.y, Y) {
       if (y < 0) {  // y might overflow when testing extreme case
         break;
       }
-      CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count, z) {
+      CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count.z, Z) {
         if (z < 0) {  // z might overflow when testing extreme case
           break;
         }
@@ -94,7 +94,7 @@ class CudaLaunchConfigTest : public ::testing::Test {
   const int bufsize = 1024;
   int* outbuf = nullptr;
   Eigen::CudaStreamDevice stream;
-  GPUDevice d = GPUDevice(&stream);
+  Eigen::GpuDevice d = Eigen::GpuDevice(&stream);
 
   virtual void SetUp() {
     cudaError_t err = cudaMallocManaged(&outbuf, sizeof(int) * bufsize);
diff --git a/tensorflow/core/util/cuda_launch_config.h b/tensorflow/core/util/cuda_launch_config.h
new file mode 100644
index 0000000000..3ea33ee6cf
--- /dev/null
+++ b/tensorflow/core/util/cuda_launch_config.h
@@ -0,0 +1,284 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_UTIL_CUDA_LAUNCH_CONFIG_H_
+#define TENSORFLOW_CORE_UTIL_CUDA_LAUNCH_CONFIG_H_
+
+#if GOOGLE_CUDA
+
+#include <algorithm>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "cuda/include/cuda.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/stream_executor.h"
+#include "tensorflow/core/platform/types.h"
+
+// Usage of GetCudaLaunchConfig, GetCuda2DLaunchConfig, and
+// GetCuda3DLaunchConfig:
+//
+// There are two versions of GetCudaLaunchConfig and GetCuda2DLaunchConfig, one
+// version uses heuristics without any knowledge of the device kernel, the other
+// version uses cudaOccupancyMaxPotentialBlockSize to determine the theoretical
+// launch parameters that maximize occupancy. Currently, only the maximum
+// occupancy version of GetCuda3DLaunchConfig is available.
+//
+// For large number of work elements, the convention is that each kernel would
+// iterate through its assigned range. The return value of GetCudaLaunchConfig
+// is struct CudaLaunchConfig, which contains all the information needed for the
+// kernel launch, including: virtual number of threads, the number of threads
+// per block and number of threads per block used inside <<< >>> of a kernel
+// launch. GetCuda2DLaunchConfig and GetCuda3DLaunchConfig does the same thing
+// as CudaLaunchConfig. The only difference is the dimension. The macros
+// CUDA_1D_KERNEL_LOOP and CUDA_AXIS_KERNEL_LOOP might be used to do inner loop.
+//
+/* Sample code:
+
+__global__ void MyKernel1D(CudaLaunchConfig config, other_args...) {
+  CUDA_1D_KERNEL_LOOP(x, config.virtual_thread_count) {
+    do_your_job_here;
+  }
+}
+
+__global__ void MyKernel2D(Cuda2DLaunchConfig config, other_args...) {
+  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
+    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
+      do_your_job_here;
+    }
+  }
+}
+
+__global__ void MyKernel3D(Cuda3DLaunchConfig config, other_args...) {
+  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
+    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
+      CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count, z) {
+        do_your_job_here;
+      }
+    }
+  }
+}
+
+void MyDriverFunc(const Eigen::GpuDevice &d) {
+  // use heuristics
+  CudaLaunchConfig cfg1 = GetCudaLaunchConfig(10240, d);
+  MyKernel1D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg1, other_args...);
+  Cuda2DLaunchConfig cfg2 = GetCuda2DLaunchConfig(10240, 10240, d);
+  MyKernel2D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg2, other_args...);
+  Cuda3DLaunchConfig cfg3 = GetCuda3DLaunchConfig(4096, 4096, 100, d);
+  MyKernel3D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg3, other_args...);
+
+  // maximize occupancy
+  CudaLaunchConfig cfg4 = GetCudaLaunchConfig(10240, d, MyKernel1D, 0, 0 );
+  MyKernel1D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg4, other_args...);
+  Cuda2DLaunchConfig cfg5 = GetCuda2DLaunchConfig(10240, 10240, d,
+                                                  MyKernel1D, 0, 0);
+  MyKernel2D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg5, other_args...);
+  Cuda3DLaunchConfig cfg6 = GetCuda3DLaunchConfig(4096, 4096, 100, d,
+                                                  MyKernel1D, 0, 0);
+  MyKernel3D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg6, other_args...);
+}
+
+// See the test for this for more example:
+//
+https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
+
+*/
+
+namespace tensorflow {
+
+inline int DivUp(int a, int b) { return (a + b - 1) / b; }
+
+struct CudaLaunchConfig {
+  // Logical number of thread that works on the elements. If each logical
+  // thread works on exactly a single element, this is the same as the working
+  // element count.
+  int virtual_thread_count = -1;
+  // Number of threads per block.
+  int thread_per_block = -1;
+  // Number of blocks for Cuda kernel launch.
+  int block_count = -1;
+};
+
+// Calculate the Cuda launch config we should use for a kernel launch.
+// This is assuming the kernel is quite simple and will largely be
+// memory-limited.
+// REQUIRES: work_element_count > 0.
+inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
+                                            const Eigen::GpuDevice& d) {
+  CHECK_GT(work_element_count, 0);
+  CudaLaunchConfig config;
+  const int virtual_thread_count = work_element_count;
+  const int physical_thread_count = std::min(
+      d.getNumCudaMultiProcessors() * d.maxCudaThreadsPerMultiProcessor(),
+      virtual_thread_count);
+  const int thread_per_block = std::min(1024, d.maxCudaThreadsPerBlock());
+  const int block_count =
+      std::min(DivUp(physical_thread_count, thread_per_block),
+               d.getNumCudaMultiProcessors());
+
+  config.virtual_thread_count = virtual_thread_count;
+  config.thread_per_block = thread_per_block;
+  config.block_count = block_count;
+  return config;
+}
+
+// Calculate the Cuda launch config we should use for a kernel launch. This
+// variant takes the resource limits of func into account to maximize occupancy.
+// REQUIRES: work_element_count > 0.
+template <typename DeviceFunc>
+inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
+                                            const Eigen::GpuDevice& d,
+                                            DeviceFunc func,
+                                            size_t dynamic_shared_memory_size,
+                                            int block_size_limit) {
+  CHECK_GT(work_element_count, 0);
+  CudaLaunchConfig config;
+  int block_count = 0;
+  int thread_per_block = 0;
+
+  cudaError_t err = cudaOccupancyMaxPotentialBlockSize(
+      &block_count, &thread_per_block, func, dynamic_shared_memory_size,
+      block_size_limit);
+  CHECK_EQ(err, cudaSuccess);
+
+  block_count =
+      std::min(block_count, DivUp(work_element_count, thread_per_block));
+
+  config.virtual_thread_count = work_element_count;
+  config.thread_per_block = thread_per_block;
+  config.block_count = block_count;
+  return config;
+}
+
+struct Cuda2DLaunchConfig {
+  dim3 virtual_thread_count = dim3(0, 0, 0);
+  dim3 thread_per_block = dim3(0, 0, 0);
+  dim3 block_count = dim3(0, 0, 0);
+};
+
+inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
+                                                const Eigen::GpuDevice& d) {
+  Cuda2DLaunchConfig config;
+
+  if (xdim <= 0 || ydim <= 0) {
+    return config;
+  }
+
+  const int kThreadsPerBlock = 256;
+  int block_cols = std::min(xdim, kThreadsPerBlock);
+  // ok to round down here and just do more loops in the kernel
+  int block_rows = std::max(kThreadsPerBlock / block_cols, 1);
+
+  const int physical_thread_count =
+      d.getNumCudaMultiProcessors() * d.maxCudaThreadsPerMultiProcessor();
+
+  const int max_blocks = std::max(physical_thread_count / kThreadsPerBlock, 1);
+
+  config.virtual_thread_count = dim3(xdim, ydim, 1);
+  config.thread_per_block = dim3(block_cols, block_rows, 1);
+
+  int grid_x = std::min(DivUp(xdim, block_cols), max_blocks);
+
+  config.block_count = dim3(
+      grid_x, std::min(max_blocks / grid_x, std::max(ydim / block_rows, 1)), 1);
+  return config;
+}
+
+// Calculate the Cuda 2D and 3D launch config we should use for a kernel launch.
+// This variant takes the resource limits of func into account to maximize
+// occupancy.
+using Cuda3DLaunchConfig = Cuda2DLaunchConfig;
+
+template <typename DeviceFunc>
+inline Cuda3DLaunchConfig GetCuda3DLaunchConfig(
+    int xdim, int ydim, int zdim, const Eigen::GpuDevice& d, DeviceFunc func,
+    size_t dynamic_shared_memory_size, int block_size_limit) {
+  Cuda3DLaunchConfig config;
+
+  if (xdim <= 0 || ydim <= 0 || zdim <= 0) {
+    return config;
+  }
+
+  int dev;
+  cudaGetDevice(&dev);
+  cudaDeviceProp deviceProp;
+  cudaGetDeviceProperties(&deviceProp, dev);
+  int xthreadlimit = deviceProp.maxThreadsDim[0];
+  int ythreadlimit = deviceProp.maxThreadsDim[1];
+  int zthreadlimit = deviceProp.maxThreadsDim[2];
+  int xgridlimit = deviceProp.maxGridSize[0];
+  int ygridlimit = deviceProp.maxGridSize[1];
+  int zgridlimit = deviceProp.maxGridSize[2];
+
+  int block_count = 0;
+  int thread_per_block = 0;
+  cudaError_t err = cudaOccupancyMaxPotentialBlockSize(
+      &block_count, &thread_per_block, func, dynamic_shared_memory_size,
+      block_size_limit);
+  CHECK_EQ(err, cudaSuccess);
+
+  auto min3 = [](int a, int b, int c) { return std::min(a, std::min(b, c)); };
+
+  int threadsx = min3(xdim, thread_per_block, xthreadlimit);
+  int threadsy =
+      min3(ydim, std::max(thread_per_block / threadsx, 1), ythreadlimit);
+  int threadsz =
+      min3(zdim, std::max(thread_per_block / (threadsx * threadsy), 1),
+           zthreadlimit);
+
+  int blocksx = min3(block_count, DivUp(xdim, threadsx), xgridlimit);
+  int blocksy =
+      min3(DivUp(block_count, blocksx), DivUp(ydim, threadsy), ygridlimit);
+  int blocksz = min3(DivUp(block_count, (blocksx * blocksy)),
+                     DivUp(zdim, threadsz), zgridlimit);
+
+  config.virtual_thread_count = dim3(xdim, ydim, zdim);
+  config.thread_per_block = dim3(threadsx, threadsy, threadsz);
+  config.block_count = dim3(blocksx, blocksy, blocksz);
+  return config;
+}
+
+template <typename DeviceFunc>
+inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(
+    int xdim, int ydim, const Eigen::GpuDevice& d, DeviceFunc func,
+    size_t dynamic_shared_memory_size, int block_size_limit) {
+  return GetCuda3DLaunchConfig(xdim, ydim, 1, d, func,
+                               dynamic_shared_memory_size, block_size_limit);
+}
+
+// Returns a raw reference to the current cuda stream.  Required by a
+// number of kernel calls (for which StreamInterface* does not work), i.e.
+// CUB and certain cublas primitives.
+inline const cudaStream_t& GetCudaStream(OpKernelContext* context) {
+  const cudaStream_t* ptr = CHECK_NOTNULL(
+      reinterpret_cast<const cudaStream_t*>(context->op_device_context()
+                                                ->stream()
+                                                ->implementation()
+                                                ->CudaStreamMemberHack()));
+  return *ptr;
+}
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+
+#endif  // TENSORFLOW_CORE_UTIL_CUDA_KERNEL_HELPER_H_
-- 
GitLab


From 5ad554ac06743410aa66f9e94183922ef63e3d29 Mon Sep 17 00:00:00 2001
From: Jan <github@geheimwerk.de>
Date: Mon, 4 Dec 2017 14:08:58 +0100
Subject: [PATCH 0571/1225] =?UTF-8?q?Fixing=20=E2=80=9Cactivate=20the=20Vi?=
 =?UTF-8?q?rtualenv=E2=80=9D.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tensorflow/docs_src/install/install_mac.md | 31 ++++++++++++----------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index 79b383817b..3afd0aec0f 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -79,22 +79,23 @@ Take the following steps to install TensorFlow with Virtualenv:
   4. Activate the Virtualenv environment by issuing one of the
      following commands:
 
-     <pre>$ <b>source ~/tensorflow/bin/activate</b>      # If using bash, sh, ksh, or zsh
-    $ <b>source ~/tensorflow/bin/activate.csh</b>  # If using csh or tcsh </pre>
+     <pre>$ <b>cd <i>targetDirectory</i></b>
+    $ <b>source ./bin/activate</b>      # If using bash, sh, ksh, or zsh
+    $ <b>source ./bin/activate.csh</b>  # If using csh or tcsh </pre>
 
      The preceding `source` command should change your prompt to the following:
 
-     <pre> (tensorflow)$ </pre>
+     <pre> (<i>targetDirectory</i>)$ </pre>
 
   5. Ensure pip ≥8.1 is installed:
 
-     <pre> (tensorflow)$ <b>easy_install -U pip</b></pre>
+     <pre> (<i>targetDirectory</i>)$ <b>easy_install -U pip</b></pre>
 
   6. Issue one of the following commands to install TensorFlow and all the
      packages that TensorFlow requires into the active Virtualenv environment:
 
-     <pre> (tensorflow)$ <b>pip install --upgrade tensorflow</b>      # for Python 2.7
-     (tensorflow)$ <b>pip3 install --upgrade tensorflow</b>     # for Python 3.n
+     <pre> (<i>targetDirectory</i>)$ <b>pip install --upgrade tensorflow</b>      # for Python 2.7
+     (<i>targetDirectory</i>)$ <b>pip3 install --upgrade tensorflow</b>     # for Python 3.n
 
   7. Optional. If Step 6 failed (typically because you invoked a pip version
      lower than 8.1), install TensorFlow in the active
@@ -128,16 +129,18 @@ to confirm that the installation worked properly.
 
 Note that you must activate the Virtualenv environment each time you
 use TensorFlow in a new shell.  If the Virtualenv environment is not
-currently active (that is, the prompt is not `(tensorflow)`, invoke
+currently active (that is, the prompt is not `(<i>targetDirectory</i>)`, invoke
 one of the following commands:
 
-<pre>$ <b>source ~/tensorflow/bin/activate</b>      # bash, sh, ksh, or zsh
-$ <b>source ~/tensorflow/bin/activate.csh</b>  # csh or tcsh </pre>
+<pre>$ <b>cd <i>targetDirectory</i></b>
+$ <b>source ./bin/activate</b>      # If using bash, sh, ksh, or zsh
+$ <b>source ./bin/activate.csh</b>  # If using csh or tcsh </pre>
+
 
 Your prompt will transform to the following to indicate that your
 tensorflow environment is active:
 
-<pre> (tensorflow)$ </pre>
+<pre> (<i>targetDirectory</i>)$ </pre>
 
 When the Virtualenv environment is active, you may run
 TensorFlow programs from this shell.
@@ -145,7 +148,7 @@ TensorFlow programs from this shell.
 When you are done using TensorFlow, you may deactivate the
 environment by issuing the following command:
 
-<pre> (tensorflow)$ <b>deactivate</b> </pre>
+<pre> (<i>targetDirectory</i>)$ <b>deactivate</b> </pre>
 
 The prompt will revert back to your default prompt (as defined by `PS1`).
 
@@ -331,19 +334,19 @@ Take the following steps to install TensorFlow in an Anaconda environment:
   3. Activate the conda environment by issuing the following command:
 
      <pre>$ <b>source activate tensorflow</b>
-     (tensorflow)$  # Your prompt should change</pre>
+     (<i>targetDirectory</i>)$  # Your prompt should change</pre>
 
   4. Issue a command of the following format to install
      TensorFlow inside your conda environment:
 
-     <pre>(tensorflow)<b>$ pip install --ignore-installed --upgrade</b> <i>TF_PYTHON_URL</i></pre>
+     <pre>(<i>targetDirectory</i>)<b>$ pip install --ignore-installed --upgrade</b> <i>TF_PYTHON_URL</i></pre>
 
      where <i>TF_PYTHON_URL</i> is the
      [URL of the TensorFlow Python package](#the_url_of_the_tensorflow_python_package).
      For example, the following command installs the CPU-only version of
      TensorFlow for Python 2.7:
 
-     <pre> (tensorflow)$ <b>pip install --ignore-installed --upgrade \
+     <pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
      https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
 
 
-- 
GitLab


From 4bf945713a50ba64d08c4527bdfedd16658834b1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 06:18:17 -0800
Subject: [PATCH 0572/1225] Remove unused BUILD dependencies

PiperOrigin-RevId: 177805240
---
 tensorflow/compiler/xla/service/cpu/BUILD | 4 ----
 tensorflow/core/distributed_runtime/BUILD | 1 -
 2 files changed, 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index bf41d5ce07..ade887f193 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -290,10 +290,7 @@ cc_library(
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
-        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
-        "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
-        "//tensorflow/compiler/xla/service/llvm_ir:vector_support_library",
         "@llvm//:core",
     ],
 )
@@ -303,7 +300,6 @@ cc_library(
     srcs = ["parallel_loop_emitter.cc"],
     hdrs = ["parallel_loop_emitter.h"],
     deps = [
-        "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop",
diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD
index 19122e3b74..2db7ebd795 100644
--- a/tensorflow/core/distributed_runtime/BUILD
+++ b/tensorflow/core/distributed_runtime/BUILD
@@ -269,7 +269,6 @@ cc_library(
     hdrs = ["worker_cache_wrapper.h"],
     deps = [
         ":worker_cache",
-        ":worker_interface",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
     ],
-- 
GitLab


From 8fda7ae46e3a1f52668287088f01447428514677 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 07:10:59 -0800
Subject: [PATCH 0573/1225] Updates the documentation for DirichletMultinomial
 to take floats as input.

PiperOrigin-RevId: 177809511
---
 .../distributions/dirichlet_multinomial.py    | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/ops/distributions/dirichlet_multinomial.py b/tensorflow/python/ops/distributions/dirichlet_multinomial.py
index d792e9fe52..aa2b511c54 100644
--- a/tensorflow/python/ops/distributions/dirichlet_multinomial.py
+++ b/tensorflow/python/ops/distributions/dirichlet_multinomial.py
@@ -122,21 +122,22 @@ class DirichletMultinomial(distribution.Distribution):
   #### Examples
 
   ```python
-  alpha = [1, 2, 3]
-  n = 2
+  alpha = [1., 2., 3.]
+  n = 2.
   dist = DirichletMultinomial(n, alpha)
   ```
 
-  Creates a 3-class distribution, with the 3rd class is most likely to be drawn.
+  Creates a 3-class distribution, with the 3rd class is most likely to be
+  drawn.
   The distribution functions can be evaluated on counts.
 
   ```python
   # counts same shape as alpha.
-  counts = [0, 0, 2]
+  counts = [0., 0., 2.]
   dist.prob(counts)  # Shape []
 
-  # alpha will be broadcast to [[1, 2, 3], [1, 2, 3]] to match counts.
-  counts = [[1, 1, 0], [1, 0, 1]]
+  # alpha will be broadcast to [[1., 2., 3.], [1., 2., 3.]] to match counts.
+  counts = [[1., 1., 0.], [1., 0., 1.]]
   dist.prob(counts)  # Shape [2]
 
   # alpha will be broadcast to shape [5, 7, 3] to match counts.
@@ -147,12 +148,12 @@ class DirichletMultinomial(distribution.Distribution):
   Creates a 2-batch of 3-class distributions.
 
   ```python
-  alpha = [[1, 2, 3], [4, 5, 6]]  # Shape [2, 3]
-  n = [3, 3]
+  alpha = [[1., 2., 3.], [4., 5., 6.]]  # Shape [2, 3]
+  n = [3., 3.]
   dist = DirichletMultinomial(n, alpha)
 
-  # counts will be broadcast to [[2, 1, 0], [2, 1, 0]] to match alpha.
-  counts = [2, 1, 0]
+  # counts will be broadcast to [[2., 1., 0.], [2., 1., 0.]] to match alpha.
+  counts = [2., 1., 0.]
   dist.prob(counts)  # Shape [2]
   ```
 
-- 
GitLab


From 798cc84b5696adffb3e3fcd41adb298034f4c746 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Mon, 4 Dec 2017 11:31:39 -0500
Subject: [PATCH 0574/1225] Add no_pip tag to
 contrib/eager/python/examples/spinn:spinn_test

---
 tensorflow/contrib/eager/python/examples/spinn/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/eager/python/examples/spinn/BUILD b/tensorflow/contrib/eager/python/examples/spinn/BUILD
index 0263d21325..a1f8a759e2 100644
--- a/tensorflow/contrib/eager/python/examples/spinn/BUILD
+++ b/tensorflow/contrib/eager/python/examples/spinn/BUILD
@@ -38,4 +38,5 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_test_lib",
     ],
+    tags = ["no_pip"],  # because spinn.py is under third_party/.
 )
-- 
GitLab


From e69bb08cd9cb75015af485368ad66d1520fbfc25 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 09:09:11 -0800
Subject: [PATCH 0575/1225] Modify custom export strategy to account for
 multidimensional sparse float splits.

PiperOrigin-RevId: 177821712
---
 .../estimator_batch/custom_export_strategy.py | 23 +++--
 .../custom_export_strategy_test.py            | 96 ++++++++++++++++---
 2 files changed, 99 insertions(+), 20 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
index ef8dee91b6..6ebc7d7911 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
@@ -33,6 +33,8 @@ from tensorflow.python.platform import gfile
 from tensorflow.python.saved_model import loader as saved_model_loader
 from tensorflow.python.saved_model import tag_constants
 
+_SPARSE_FLOAT_FEATURE_NAME_TEMPLATE = "%s_%d"
+
 
 def make_custom_export_strategy(name,
                                 convert_fn,
@@ -147,13 +149,12 @@ def convert_to_universal_format(dtec, sorted_feature_names,
           inequality_test.threshold.float_value = split.threshold
         elif node_type == "sparse_float_binary_split_default_left":
           split = gtflow_node.sparse_float_binary_split_default_left.split
-          node.default_direction = (
-              generic_tree_model_pb2.BinaryNode.LEFT)
-          # TODO(nponomareva): adjust this id assignement when we allow multi-
-          # column sparse tensors.
+          node.default_direction = (generic_tree_model_pb2.BinaryNode.LEFT)
           feature_id = split.feature_column + num_dense
           inequality_test = node.inequality_left_child_test
-          inequality_test.feature_id.id.value = sorted_feature_names[feature_id]
+          inequality_test.feature_id.id.value = (
+              _SPARSE_FLOAT_FEATURE_NAME_TEMPLATE %
+              (sorted_feature_names[feature_id], split.dimension_id))
           inequality_test.type = (
               generic_tree_model_pb2.InequalityTest.LESS_OR_EQUAL)
           inequality_test.threshold.float_value = split.threshold
@@ -165,7 +166,9 @@ def convert_to_universal_format(dtec, sorted_feature_names,
           # column sparse tensors.
           feature_id = split.feature_column + num_dense
           inequality_test = node.inequality_left_child_test
-          inequality_test.feature_id.id.value = sorted_feature_names[feature_id]
+          inequality_test.feature_id.id.value = (
+              _SPARSE_FLOAT_FEATURE_NAME_TEMPLATE %
+              (sorted_feature_names[feature_id], split.dimension_id))
           inequality_test.type = (
               generic_tree_model_pb2.InequalityTest.LESS_OR_EQUAL)
           inequality_test.threshold.float_value = split.threshold
@@ -201,10 +204,14 @@ def _get_feature_importances(dtec, feature_names, num_dense_floats,
         split_column = feature_names[split.feature_column]
       elif node_type == "sparse_float_binary_split_default_left":
         split = tree_node.sparse_float_binary_split_default_left.split
-        split_column = feature_names[split.feature_column + num_dense_floats]
+        split_column = _SPARSE_FLOAT_FEATURE_NAME_TEMPLATE % (
+            feature_names[split.feature_column + num_dense_floats],
+            split.dimension_id)
       elif node_type == "sparse_float_binary_split_default_right":
         split = tree_node.sparse_float_binary_split_default_right.split
-        split_column = feature_names[split.feature_column + num_dense_floats]
+        split_column = _SPARSE_FLOAT_FEATURE_NAME_TEMPLATE % (
+            feature_names[split.feature_column + num_dense_floats],
+            split.dimension_id)
       elif node_type == "categorical_id_binary_split":
         split = tree_node.categorical_id_binary_split
         split_column = feature_names[split.feature_column + num_dense_floats +
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py
index 4ed18b2d34..492d9ca40c 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the conversion code from GTFlow format to Chauffeur."""
+"""Tests for the conversion code and for feature importances export.
+
+Tests that cover conversion from TFBT format to a tensorflow.contrib.
+decision_tree generic_tree_model format and feature importances export.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -95,10 +99,31 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
           }
         }
       }
+      nodes {
+        sparse_float_binary_split_default_right {
+          split {
+            feature_column: 1
+            dimension_id:3
+            threshold: -0.4
+            left_id: 7
+            right_id: 8
+          }
+        }
+        node_metadata {
+            gain: 3600
+        }
+      }
+      nodes {
+        leaf {
+          vector {
+            value: 0.36
+          }
+        }
+      }
       nodes {
         leaf {
           vector {
-            value: 0.3
+            value: 18
           }
         }
       }
@@ -108,17 +133,25 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
     """
     dtec = tree_config_pb2.DecisionTreeEnsembleConfig()
     text_format.Merge(dtec_str, dtec)
-    feature_columns = ["feature_b", "feature_a", "feature_d"]
+    feature_columns = [
+        "feature_b",
+        "feature_a",
+        "feature_a_m",
+        "feature_d",
+    ]
     return dtec, feature_columns
 
   def testConvertModel(self):
     dtec, feature_columns = self._make_trees()
+    # Assume 2 sparse float columns, one with 1 dimension, the second one with
+    # 5 dimensions.
     # The feature columns in the order they were added.
     out = custom_export_strategy.convert_to_universal_format(
-        dtec, feature_columns, 1, 1,
-        1)
+        dtec, feature_columns, 1, 2, 1)
+    # Features a and a_m are sparse float features, a_m is multidimensional.
     expected_tree = """
     features { key: "feature_a" }
+    features { key: "feature_a_m" }
     features { key: "feature_b" }
     features { key: "feature_d" }
     model {
@@ -169,7 +202,6 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
                   }
                 }
               }
-
               nodes {
                 node_id {
                   value: 1
@@ -196,7 +228,7 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
                   inequality_left_child_test {
                     feature_id {
                       id {
-                        value: "feature_a"
+                        value: "feature_a_0"
                       }
                     }
                     threshold {
@@ -259,14 +291,51 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
                 node_id {
                   value: 6
                 }
+                binary_node {
+                  left_child_id {
+                    value: 7
+                  }
+                  right_child_id {
+                    value: 8
+                  }
+                  default_direction: RIGHT
+                  inequality_left_child_test {
+                      feature_id {
+                        id {
+                          value: "feature_a_m_3"
+                        }
+                      }
+                      threshold {
+                        float_value: -0.4
+                      }
+                  }
+                }
+              }
+              nodes {
+                node_id {
+                  value: 7
+                }
                 leaf {
                   vector {
                     value {
-                      float_value: 0.03
+                      float_value: 0.036
                     }
                   }
                 }
               }
+              nodes {
+                node_id {
+                  value: 8
+                }
+                leaf {
+                  vector {
+                    value {
+                      float_value: 1.8
+                    }
+                  }
+                }
+              }
+
             }
           }
           submodel_id {
@@ -280,12 +349,15 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
   def testFeatureImportance(self):
     dtec, feature_columns = self._make_trees()
     feature_importances = custom_export_strategy._get_feature_importances(
-        dtec, feature_columns, 1, 1, 1)
-    self.assertItemsEqual(["feature_b", "feature_a", "feature_d"],
-                          feature_importances.keys())
+        dtec, feature_columns, 1, 2, 1)
+    self.assertItemsEqual(
+        ["feature_b", "feature_a_0", "feature_a_m_3", "feature_d"],
+        feature_importances.keys())
     self.assertAlmostEqual(50.0, feature_importances["feature_b"], places=4)
-    self.assertAlmostEqual(50.0, feature_importances["feature_a"], places=4)
+    self.assertAlmostEqual(50.0, feature_importances["feature_a_0"], places=4)
     self.assertAlmostEqual(50.0, feature_importances["feature_d"], places=4)
+    self.assertAlmostEqual(
+        360.0, feature_importances["feature_a_m_3"], places=4)
 
 
 if __name__ == "__main__":
-- 
GitLab


From 8cb4a0f69a631e9f405b724940bc978b3784b19a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 09:35:24 -0800
Subject: [PATCH 0576/1225] Better deprecation message for --input_type[s].

PiperOrigin-RevId: 177824668
---
 .../contrib/lite/toco/toco_cmdline_flags.cc   | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
index 83947d6b28..f8281f3a57 100644
--- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
@@ -65,10 +65,12 @@ bool ParseTocoFlagsFromCommandLineFlags(
            "is used."),
       Flag("input_type", parsed_flags.input_type.bind(),
            parsed_flags.input_type.default_value(),
-           "Deprecated old name of inference_input_type."),
+           "Deprecated ambiguous flag that set both --input_data_types and "
+           "--inference_input_type."),
       Flag("input_types", parsed_flags.input_types.bind(),
            parsed_flags.input_types.default_value(),
-           "Deprecated old name of inference_input_type. Was meant to be a "
+           "Deprecated ambiguous flag that set both --input_data_types and "
+           "--inference_input_type. Was meant to be a "
            "comma-separated list, but this was deprecated before "
            "multiple-input-types was ever properly supported."),
 
@@ -140,7 +142,6 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags,
           << #name;                                                          \
     }                                                                        \
   } while (false)
-
 #define READ_TOCO_FLAG(name, requirement)                     \
   ENFORCE_FLAG_REQUIREMENT(name, requirement);                \
   do {                                                        \
@@ -174,14 +175,26 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags,
 
   // Deprecated flag handling.
   if (parsed_toco_flags.input_type.specified()) {
-    LOG(WARNING) << "--input_type is deprecated. Use --inference_input_type.";
+    LOG(WARNING)
+        << "--input_type is deprecated. It was an ambiguous flag that set both "
+           "--input_data_types and --inference_input_type. If you are trying "
+           "to complement the input file with information about the type of "
+           "input arrays, use --input_data_type. If you are trying to control "
+           "the quantization/dequantization of real-numbers input arrays in "
+           "the output file, use --inference_input_type.";
     toco::IODataType input_type;
     QCHECK(toco::IODataType_Parse(parsed_toco_flags.input_type.value(),
                                   &input_type));
     toco_flags->set_inference_input_type(input_type);
   }
   if (parsed_toco_flags.input_types.specified()) {
-    LOG(WARNING) << "--input_types is deprecated. Use --inference_input_type.";
+    LOG(WARNING)
+        << "--input_types is deprecated. It was an ambiguous flag that set "
+           "both --input_data_types and --inference_input_type. If you are "
+           "trying to complement the input file with information about the "
+           "type of input arrays, use --input_data_type. If you are trying to "
+           "control the quantization/dequantization of real-numbers input "
+           "arrays in the output file, use --inference_input_type.";
     std::vector<string> input_types =
         absl::StrSplit(parsed_toco_flags.input_types.value(), ',');
     QCHECK(!input_types.empty());
-- 
GitLab


From f1582cf82f06810900ee99870f5d5d3a7478d044 Mon Sep 17 00:00:00 2001
From: Robin Richtsfeld <robin.richtsfeld@gmail.com>
Date: Mon, 4 Dec 2017 16:13:58 +0100
Subject: [PATCH 0577/1225] [CMake] Re-Enable include

Issue #3996 (and referenced tickets) are already fixed.
---
 tensorflow/contrib/cmake/CMakeLists.txt | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index ba708673b0..af7df2f679 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -387,11 +387,7 @@ endif()
 
 # Let's get to work!
 include(tf_core_framework.cmake)
-# NOTE: Disabled until issue #3996 is fixed.
-# include(tf_stream_executor.cmake)
-if (tensorflow_ENABLE_GPU)
-    include(tf_stream_executor.cmake)
-endif()
+include(tf_stream_executor.cmake)
 
 include(tf_core_cpu.cmake)
 include(tf_core_ops.cmake)
-- 
GitLab


From 50efd9202b582900e30d9d4478b5baaf2107d84a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 10:46:51 -0800
Subject: [PATCH 0578/1225] Enable dependency optimizer in Grappler by default.

PiperOrigin-RevId: 177835459
---
 tensorflow/core/grappler/op_types.cc                  | 11 +++++++++++
 tensorflow/core/grappler/optimizers/meta_optimizer.cc |  4 ++--
 tensorflow/core/protobuf/rewriter_config.proto        |  2 +-
 tensorflow/python/grappler/cost_analyzer_test.py      |  5 +----
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 571975aca1..631fe84201 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <unordered_set>
 
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/grappler/op_types.h"
@@ -171,6 +172,12 @@ bool IsVariable(const NodeDef& node) {
          op == "VarHandleOp" || op == "ReadVariableOp";
 }
 
+namespace {
+bool GetBoolAttr(const NodeDef& node, const string& name) {
+  return node.attr().count(name) > 0 && node.attr().at(name).b();
+}
+}  // namespace
+
 bool IsFreeOfSideEffect(const NodeDef& node) {
   // Placeholders must be preserved to keep the graph feedable.
   if (IsPlaceholder(node)) {
@@ -190,6 +197,10 @@ bool IsFreeOfSideEffect(const NodeDef& node) {
       return false;
     }
   }
+  // Some nodes do in-place updates on regular tensor inputs.
+  if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace")) {
+    return false;
+  }
   return true;
 }
 
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 49bdc44462..0d0b947c8a 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -76,7 +76,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       optimizers.push_back(std::unique_ptr<GraphOptimizer>(
           new ArithmeticOptimizer(cfg_.arithmetic_optimization())));
     }
-    if (cfg_.dependency_optimization() == RewriterConfig::ON) {
+    if (cfg_.dependency_optimization() != RewriterConfig::OFF) {
       optimizers.push_back(std::unique_ptr<GraphOptimizer>(
           new DependencyOptimizer(cfg_.dependency_optimization())));
     }
@@ -191,7 +191,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
   return !cfg.disable_model_pruning() ||
          cfg.layout_optimizer() == RewriterConfig::ON ||
          cfg.constant_folding() != RewriterConfig::OFF ||
-         cfg.dependency_optimization() == RewriterConfig::ON ||
+         cfg.dependency_optimization() != RewriterConfig::OFF ||
          cfg.arithmetic_optimization() != RewriterConfig::OFF ||
          cfg.auto_parallel().enable() || cfg.memory_optimization() > 1 ||
          !cfg.optimizers().empty();
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 3b5d1563a2..96b55ce04b 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -35,7 +35,7 @@ message RewriterConfig {
   Toggle constant_folding = 3;
   // Arithmetic optimizations (default is ON)
   Toggle arithmetic_optimization = 7;
-  // Control dependency optimizations (default is OFF).
+  // Control dependency optimizations (default is ON).
   Toggle dependency_optimization = 8;
   // If true, don't remove unnecessary ops from the graph
   bool disable_model_pruning = 2;
diff --git a/tensorflow/python/grappler/cost_analyzer_test.py b/tensorflow/python/grappler/cost_analyzer_test.py
index f4933a4514..325ff0fb00 100644
--- a/tensorflow/python/grappler/cost_analyzer_test.py
+++ b/tensorflow/python/grappler/cost_analyzer_test.py
@@ -89,13 +89,10 @@ class CostAnalysisTest(test.TestCase):
     self.assertTrue(b"MatMul" in report)
     self.assertTrue(b"ApplyAdam" in report)
     self.assertTrue(b"Conv2D" in report)
-    self.assertTrue(b"Conv2DBackpropInput" in report)
     self.assertTrue(b"Conv2DBackpropFilter" in report)
     self.assertTrue(b"Softmax" in report)
 
-    for op_type in [
-        b"MatMul", b"Conv2D", b"Conv2DBackpropInput", b"Conv2DBackpropFilter"
-    ]:
+    for op_type in [b"MatMul", b"Conv2D", b"Conv2DBackpropFilter"]:
       matcher = re.compile(
           br"\s+" + op_type + br",\s*(\d+),\s*(\d+),\s*([\d\.eE+-]+)%,\s*" +
           br"([\d\.eE+-]+)%,\s*(-?\d+),\s*(\d+),", re.MULTILINE)
-- 
GitLab


From 8842c73dea0806c530452f68430819972d5a4c24 Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Mon, 4 Dec 2017 11:03:20 -0800
Subject: [PATCH 0579/1225] Make `replicate_model_fn` available via
 tensorflow.contrib.estimator namespace.

I built a pip package from the latest sources and noticed that it's not available.

PiperOrigin-RevId: 177838305
---
 tensorflow/contrib/estimator/__init__.py      |  2 +
 .../python/estimator/replicate_model_fn.py    | 97 ++++++++++---------
 .../estimator/replicate_model_fn_test.py      | 31 +++---
 3 files changed, 72 insertions(+), 58 deletions(-)

diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py
index 8191e06fae..28c1f8b180 100644
--- a/tensorflow/contrib/estimator/__init__.py
+++ b/tensorflow/contrib/estimator/__init__.py
@@ -26,6 +26,7 @@ from tensorflow.contrib.estimator.python.estimator.head import *
 from tensorflow.contrib.estimator.python.estimator.linear import *
 from tensorflow.contrib.estimator.python.estimator.logit_fns import *
 from tensorflow.contrib.estimator.python.estimator.multi_head import *
+from tensorflow.contrib.estimator.python.estimator.replicate_model_fn import *
 
 from tensorflow.python.util.all_util import remove_undocumented
 # pylint: enable=unused-import,line-too-long,wildcard-import
@@ -45,6 +46,7 @@ _allowed_symbols = [
     'call_logit_fn',
     'dnn_logit_fn_builder',
     'linear_logit_fn_builder',
+    'replicate_model_fn',
 ]
 
 remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
index f5154231da..ca3a2394ee 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
@@ -46,45 +46,7 @@ from tensorflow.python.training import device_setter as device_setter_lib
 from tensorflow.python.training import training_util
 
 
-class Mode(object):
-  """Modes for variables replication used for forcing a particular mode.
-
-  Forcing a mode is meant for performance experimentation purposes rather than
-  for general use cases.
-  """
-
-  AUTO = 0
-  """Use internal heuristics for choosing the best Mode value.
-
-     This mode is supposed to be the most appropriate in most cases given what
-     is known about the system.
-  """
-  # TODO(isaprykin): Query system configuration to choose modes other than
-  # `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often appropriate.
-
-  SHARED_LOCAL_PARAMETER_SERVER = 2
-  """Variables are placed on a single device and shared across all devices.
-
-  Two ways to achieve this replication over available GPUs are supported:
-    1)  If exactly 1 GPU is detected, then variables and operations are placed
-        onto GPU.
-    2)  If more than 1 GPU is detected, then variables are going to be placed on
-        the CPU.  Replicas of operations are placed on each individual GPU.
-  """
-
-  SHARED_ROUND_ROBIN = 3
-  """Variables are placed on all devices in a round-robin fashion.
-
-  Every subsequent variable is placed on the next device.  There is only one
-  copy of each variable that is shared across all devices.
-  """
-
-  # TODO(isaprykin):  Implement `REPLICATED_ALL_REDUCE`.
-  REPLICATED_ALL_REDUCE = 3
-  """Variables are mirrored on all devices."""
-
-
-def replicate_model_fn(model_fn, optimizer_fn, devices=None, mode=Mode.AUTO):
+def replicate_model_fn(model_fn, optimizer_fn, devices=None):
   """Replicate `Estimator.model_fn` over GPUs within a single host.
 
   The given `model_fn` specifies a single forward pass of a model.  To replicate
@@ -97,11 +59,14 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None, mode=Mode.AUTO):
   optimizer.
 
   If `devices` are `None`, then all available GPUs are going to be used for
-  replication: `devices=[<all available GPUs>]`.  If no GPUs are available,
-  then the model is going to be placed on the CPU: `devices=['/device:CPU:0']`.
+  replication.  If no GPUs are available, then the model is going to be
+  placed on the CPU.
 
-  Varibles are placed on to `devices` according to the given `mode`. Operations
-  are going for each tower are going to be copied on each device.
+  Two modes of local replication over available GPUs are supported:
+    1)  If exactly 1 GPU is detected, then variables and operations are placed
+        onto GPU.
+    2)  If more than 1 GPU is detected, then variables are going to be placed on
+        the CPU.  Replicas of operations are placed on each individual GPU.
 
   Here is an example of how one might use their `model_fn` to run over GPUs:
     ```python
@@ -145,7 +110,7 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None, mode=Mode.AUTO):
     - For all other fields of `EstimatorSpec` the values of the first tower
       are taken.
 
-  On replication of variables:
+  On distribution of variables:
   Variables are not duplicated between towers.  Instead, they are placed on a
   single device as defined above and shared across towers.
 
@@ -163,14 +128,52 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None, mode=Mode.AUTO):
       argument can be used to replice only on the subset of available GPUs.
       If `None`, then all available GPUs are going to be used for replication.
       If no GPUs are available, then the model is going to be placed on the CPU.
-    mode: An optional argument that specifies the replication method used for
-      distributing variables across devices.
 
   Returns:
     A replicated version of the supplied `model_fn`. Returned function that
       conforms to the requirements of `Estimator`'s `model_fn` and can be used
       instead of the supplied `model_fn`.
   """
+  return _replicate_model_fn_with_mode(
+      model_fn,
+      optimizer_fn,
+      devices,
+      # TODO(isaprykin): Query system configuration to choose modes other than
+      # `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often appropriate.
+      mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER)
+
+
+class _VariableDistributionMode(object):
+  """Modes for variable distribution used for forcing a particular one.
+
+  Forcing a mode is meant for performance experimentation purposes rather than
+  for general use cases.
+  """
+
+  SHARED_LOCAL_PARAMETER_SERVER = 1
+  """Variables are placed on a single device and shared across all devices.
+
+  Two ways to achieve this distribution over available GPUs are supported:
+    1)  If exactly 1 GPU is detected, then variables and operations are placed
+        onto GPU.
+    2)  If more than 1 GPU is detected, then variables are going to be placed on
+        the CPU.  Replicas of operations are placed on each individual GPU.
+  """
+
+  SHARED_ROUND_ROBIN = 2
+  """Variables are placed on all devices in a round-robin fashion.
+
+  Every subsequent variable is placed on the next device.  There is only one
+  copy of each variable that is shared across all devices.
+  """
+
+
+def _replicate_model_fn_with_mode(
+    model_fn,
+    optimizer_fn,
+    devices=None,
+    mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER):
+  """A version of `replicate_model_fn` that allows to specify a `mode`."""
   if not devices:
     devices = _get_local_devices('GPU') or _get_local_devices('CPU')
 
@@ -179,7 +182,7 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None, mode=Mode.AUTO):
                                         if is_a_single_gpu_case else 'CPU')
 
   ps_devices = [consolidation_device]
-  if mode == Mode.SHARED_ROUND_ROBIN:
+  if mode == _VariableDistributionMode.SHARED_ROUND_ROBIN:
     ps_devices = devices
 
   tf_logging.info('Replicating the `model_fn` across {}.  Variables are going '
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
index 91e4b9ba7d..a83a1b8407 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
@@ -53,23 +53,24 @@ from tensorflow.python.training import device_setter
 from tensorflow.python.training import gradient_descent
 
 
-# TODO(isaprykin):  Parametrize all the tests on replicate_model_fn.Mode when
-#   it's supported.
+# TODO(isaprykin):  Parametrize all the tests on
+#   replicate_model_fn._VariableDistributionMode when it's supported.
 class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
 
-  def test_complete_flow_with_mode_auto(self):
-    return self._complete_flow_with_mode(replicate_model_fn.Mode.AUTO)
+  def test_complete_flow_with_public_version(self):
+    return self._complete_flow_with_mode(mode=None)
 
   def test_complete_flow_with_mode_local_ps_server(self):
     return self._complete_flow_with_mode(
-        replicate_model_fn.Mode.SHARED_LOCAL_PARAMETER_SERVER)
+        replicate_model_fn._VariableDistributionMode.
+        SHARED_LOCAL_PARAMETER_SERVER)
 
   def test_complete_flow_with_mode_round_robin(self):
     return self._complete_flow_with_mode(
-        replicate_model_fn.Mode.SHARED_ROUND_ROBIN)
+        replicate_model_fn._VariableDistributionMode.SHARED_ROUND_ROBIN)
 
   def _complete_flow_with_mode(self, mode):
     n_classes = 3
@@ -119,12 +120,20 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase):
     def optimizer_fn():
       return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05)
 
+    if not mode:  # Use the public `replicate_model_fn`.
+      model_fn = replicate_model_fn.replicate_model_fn(
+          estimator.model_fn,
+          optimizer_fn,
+          devices=['/gpu:0', '/gpu:1', '/gpu:2'])
+    else:
+      model_fn = replicate_model_fn._replicate_model_fn_with_mode(
+          estimator.model_fn,
+          optimizer_fn,
+          devices=['/gpu:0', '/gpu:1', '/gpu:2'],
+          mode=mode)
+
     estimator = estimator_lib.Estimator(
-        model_fn=replicate_model_fn.replicate_model_fn(
-            estimator.model_fn,
-            optimizer_fn,
-            devices=['/gpu:0', '/gpu:1', '/gpu:2'],
-            mode=mode),
+        model_fn=model_fn,
         model_dir=estimator.model_dir,
         config=estimator.config,
         params=estimator.params)
-- 
GitLab


From 6eed89cb81d300fb81240eb83b6a818b93d31dd7 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Mon, 4 Dec 2017 11:04:08 -0800
Subject: [PATCH 0580/1225] Treat stream removal as an UNAVAILABLE error in
 Tensorflow instead of UNKNOWN.

This allows for retry by monitored sessions.

PiperOrigin-RevId: 177838460
---
 .../core/distributed_runtime/rpc/grpc_util.h  | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_util.h b/tensorflow/core/distributed_runtime/rpc/grpc_util.h
index 0ddcd89130..ac0a33a2b9 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_util.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_util.h
@@ -28,10 +28,30 @@ limitations under the License.
 
 namespace tensorflow {
 
+constexpr char kStreamRemovedMessage[] = "Stream removed";
+
+// Identify if the given grpc::Status corresponds to an HTTP stream removed
+// error (see chttp2_transport.cc).
+//
+// When auto-reconnecting to a remote TensorFlow worker after it restarts, gRPC
+// can return an UNKNOWN error code with a "Stream removed" error message.
+// This should not be treated as an unrecoverable error.
+//
+// N.B. This is dependent on the error message from grpc remaining consistent.
+inline bool IsStreamRemovedError(const ::grpc::Status& s) {
+  return !s.ok() && s.error_code() == ::grpc::StatusCode::UNKNOWN &&
+         s.error_message() == kStreamRemovedMessage;
+}
+
 inline Status FromGrpcStatus(const ::grpc::Status& s) {
   if (s.ok()) {
     return Status::OK();
   } else {
+    // Convert "UNKNOWN" stream removed errors into unavailable, to allow
+    // for retry upstream.
+    if (IsStreamRemovedError(s)) {
+      return Status(tensorflow::error::UNAVAILABLE, s.error_message());
+    }
     return Status(static_cast<tensorflow::error::Code>(s.error_code()),
                   s.error_message());
   }
-- 
GitLab


From 7ea70dbe6e099fc495de54b5d9883e047ff54631 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 4 Dec 2017 11:06:25 -0800
Subject: [PATCH 0581/1225] [XLA:CPU] Extract out a common helper to create an
 llvm::Function

This way we have on central place that knows about all the attributes
we attach to llvm::Functions.

PiperOrigin-RevId: 177838832
---
 .../xla/service/cpu/dot_op_emitter.cc         | 13 ++++++++--
 .../compiler/xla/service/cpu/ir_function.cc   | 24 ++++--------------
 .../service/llvm_ir/kernel_support_library.cc |  6 +++--
 .../service/llvm_ir/kernel_support_library.h  |  5 +++-
 .../compiler/xla/service/llvm_ir/llvm_util.cc | 25 +++++++++++++++++++
 .../compiler/xla/service/llvm_ir/llvm_util.h  |  6 +++++
 6 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index 4ccff756a3..7496dd20c2 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -581,6 +581,11 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() {
   llvm::Value* rhs_op =
       swap_operands ? lhs_array_.GetBasePointer() : rhs_array_.GetBasePointer();
 
+  const bool enable_fast_math =
+      hlo_module_config_.debug_options().xla_enable_fast_math();
+  const bool optimize_for_size =
+      options::OptimizeForSizeRequested(hlo_module_config_);
+
   if (is_column_major_matrix_vector) {
     VLOG(2) << "Emitting column major matrix-vector multiply with m = " << m
             << " and k = " << k;
@@ -592,7 +597,9 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() {
         "_", tile_cols, "_", m, "_", k);
 
     KernelSupportLibrary::EmitAndCallOutlinedKernel(
-        ir_builder_, kernel_name, lhs_op, rhs_op, result_op,
+        /*enable_fast_math=*/enable_fast_math,
+        /*optimize_for_size=*/optimize_for_size, ir_builder_, kernel_name,
+        lhs_op, rhs_op, result_op,
         [this, tile_rows, tile_cols, m, k, primitive_type](
             llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* result_op) {
           ColumnMajorMatrixVectorProductEmitter emitter(
@@ -611,7 +618,9 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() {
         "_", tile_cols, "_", m, "_", k);
 
     KernelSupportLibrary::EmitAndCallOutlinedKernel(
-        ir_builder_, kernel_name, lhs_op, rhs_op, result_op,
+        /*enable_fast_math=*/enable_fast_math,
+        /*optimize_for_size=*/optimize_for_size, ir_builder_, kernel_name,
+        lhs_op, rhs_op, result_op,
         [this, tile_rows, tile_cols, m, k, primitive_type](
             llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* result_op) {
           RowMajorMatrixVectorProductEmitter emitter(
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.cc b/tensorflow/compiler/xla/service/cpu/ir_function.cc
index 701bce2cbf..ed257613d8 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_function.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.cc
@@ -112,11 +112,11 @@ void IrFunction::Initialize(const string& function_name,
   // Functions with local linkage get an inlining bonus.  Because we know
   // a-priori that embedded functions (non-entry functions) will not have its
   // name resolved, give it local linkage.
-  function_ = llvm::Function::Create(/*Ty=*/function_type,
-                                     /*Linkage=*/linkage,
-                                     /*N=*/AsStringRef(function_name),
-                                     /*M=*/llvm_module_);
-  function_->setCallingConv(llvm::CallingConv::C);
+  function_ =
+      llvm_ir::CreateFunction(function_type, linkage,
+                              /*enable_fast_math=*/enable_fast_math,
+                              /*optimize_for_size=*/optimize_for_size_requested,
+                              function_name, llvm_module_);
 
   // Set meaningful names for the function's arguments: useful for debugging.
   llvm::Function::arg_iterator arg_iter = function_->arg_begin();
@@ -147,20 +147,6 @@ void IrFunction::Initialize(const string& function_name,
     function_->addAttribute(argument.getArgNo() + 1, llvm::Attribute::NoAlias);
   }
 
-  // Add the optize attribute to the function if optimizing for size. This
-  // controls internal behavior of some optimization passes (e.g. loop
-  // unrolling).
-  if (optimize_for_size_requested) {
-    function_->addFnAttr(llvm::Attribute::OptimizeForSize);
-  }
-
-  if (enable_fast_math) {
-    function_->addFnAttr("unsafe-fp-math", "true");
-    function_->addFnAttr("no-infs-fp-math", "true");
-    function_->addFnAttr("no-nans-fp-math", "true");
-    function_->addFnAttr("no-signed-zeros-fp-math", "true");
-  }
-
   ir_builder_->SetInsertPoint(llvm::BasicBlock::Create(
       /*Context=*/llvm_module_->getContext(),
       /*Name=*/"entry",
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
index d951a37d5d..d68d699d7e 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
@@ -65,6 +65,7 @@ void KernelSupportLibrary::If(
 }
 
 void KernelSupportLibrary::EmitAndCallOutlinedKernel(
+    bool enable_fast_math, bool optimize_for_size,
     llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
     KernelSupportLibrary::ArgumentVector arguments,
     const std::function<void(KernelSupportLibrary::ArgumentVector)>&
@@ -82,9 +83,10 @@ void KernelSupportLibrary::EmitAndCallOutlinedKernel(
     auto* function_type = llvm::FunctionType::get(
         ir_builder->getVoidTy(), arg_types, /*isVarArg=*/false);
 
-    function = llvm::Function::Create(
+    function = llvm_ir::CreateFunction(
         function_type, llvm::GlobalValue::InternalLinkage,
-        llvm_ir::AsStringRef(kernel_name), module);
+        /*enable_fast_math=*/enable_fast_math,
+        /*optimize_for_size=*/optimize_for_size, kernel_name, module);
 
     llvm::IRBuilder<>::InsertPointGuard guard(*ir_builder);
 
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
index 997b84bb27..150a464c66 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
@@ -134,18 +134,21 @@ class KernelSupportLibrary {
   // that function is re-used.  In that sense we're using the llvm::Module as a
   // cache of outlined kernels, keyed by function name.
   static void EmitAndCallOutlinedKernel(
+      bool enable_fast_math, bool optimize_for_size,
       llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
       ArgumentVector arguments,
       const std::function<void(ArgumentVector)>& kernel_body_generator);
 
   // Thin wrapper around the more general EmitAndCallOutlinedKernel above.
   static void EmitAndCallOutlinedKernel(
+      bool enable_fast_math, bool optimize_for_size,
       llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
       llvm::Value* arg0, llvm::Value* arg1, llvm::Value* arg2,
       const std::function<void(llvm::Value*, llvm::Value*, llvm::Value*)>&
           kernel_body_generator) {
     EmitAndCallOutlinedKernel(
-        ir_builder, kernel_name, {arg0, arg1, arg2}, [&](ArgumentVector args) {
+        enable_fast_math, optimize_for_size, ir_builder, kernel_name,
+        {arg0, arg1, arg2}, [&](ArgumentVector args) {
           kernel_body_generator(args[0], args[1], args[2]);
         });
   }
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
index cd0c4a371e..b4c5d3cd90 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
@@ -676,5 +676,30 @@ Status DumpIRToDirectory(const string& directory_name,
   return f->Close();
 }
 
+llvm::Function* CreateFunction(llvm::FunctionType* function_type,
+                               llvm::GlobalValue::LinkageTypes linkage,
+                               bool enable_fast_math, bool optimize_for_size,
+                               tensorflow::StringPiece name,
+                               llvm::Module* module) {
+  llvm::Function* function =
+      llvm::Function::Create(function_type, linkage, AsStringRef(name), module);
+  function->setCallingConv(llvm::CallingConv::C);
+  if (enable_fast_math) {
+    function->addFnAttr("unsafe-fp-math", "true");
+    function->addFnAttr("no-infs-fp-math", "true");
+    function->addFnAttr("no-nans-fp-math", "true");
+    function->addFnAttr("no-signed-zeros-fp-math", "true");
+  }
+
+  // Add the optize attribute to the function if optimizing for size. This
+  // controls internal behavior of some optimization passes (e.g. loop
+  // unrolling).
+  if (optimize_for_size) {
+    function->addFnAttr(llvm::Attribute::OptimizeForSize);
+  }
+
+  return function;
+}
+
 }  // namespace llvm_ir
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h
index 063ead2b64..6bdc6a01a2 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h
@@ -281,6 +281,12 @@ Status DumpIRToDirectory(const string& directory_name,
                          const string& hlo_module_name,
                          const llvm::Module& llvm_module, bool optimized);
 
+llvm::Function* CreateFunction(llvm::FunctionType* function_type,
+                               llvm::GlobalValue::LinkageTypes linkage,
+                               bool enable_fast_math, bool optimize_for_size,
+                               tensorflow::StringPiece name,
+                               llvm::Module* module);
+
 }  // namespace llvm_ir
 }  // namespace xla
 
-- 
GitLab


From 6e7be32a555a32adaec7093be23606815156f334 Mon Sep 17 00:00:00 2001
From: ManHyuk <manhyuk@kw.ac.kr>
Date: Tue, 5 Dec 2017 04:14:59 +0900
Subject: [PATCH 0582/1225] Fix typo (#15094)

* fix typo

* fix typo

* fix typo

* fix typo

* fix typo
---
 tensorflow/compiler/xla/service/cpu/disassembler.h   | 4 ++--
 tensorflow/contrib/framework/python/ops/variables.py | 2 +-
 tensorflow/core/common_runtime/pending_counts.h      | 2 +-
 tensorflow/core/common_runtime/shape_refiner.cc      | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/disassembler.h b/tensorflow/compiler/xla/service/cpu/disassembler.h
index b6feaa7e45..5e302f8899 100644
--- a/tensorflow/compiler/xla/service/cpu/disassembler.h
+++ b/tensorflow/compiler/xla/service/cpu/disassembler.h
@@ -37,7 +37,7 @@ struct DisassemblerResult {
   DisassemblerResult(const string& text, size_t code_size_bytes)
       : text(text), code_size_bytes(code_size_bytes) {}
 
-  // The dissassembled text sections of the object file.
+  // The disassembled text sections of the object file.
   string text;
   // The total number of bytes of executable code in the object file.
   uint64_t code_size_bytes;
@@ -53,7 +53,7 @@ class Disassembler {
   // Returns a DisassemblerResult for the given object file, containing the
   // disassembled code.
   //
-  // If we couldnt' retrieve a disassembler for this platform, an error status
+  // If we couldn't retrieve a disassembler for this platform, an error status
   // is returned.
   StatusOr<DisassemblerResult> DisassembleObjectFile(
       const llvm::object::ObjectFile& object_file) const;
diff --git a/tensorflow/contrib/framework/python/ops/variables.py b/tensorflow/contrib/framework/python/ops/variables.py
index 07b7857e7b..3f1ece4510 100644
--- a/tensorflow/contrib/framework/python/ops/variables.py
+++ b/tensorflow/contrib/framework/python/ops/variables.py
@@ -441,7 +441,7 @@ def get_unique_variable(var_op_name):
   """
   candidates = get_variables(scope=var_op_name)
   if not candidates:
-    raise ValueError('Couldnt find variable %s' % var_op_name)
+    raise ValueError('Couldn\'t find variable %s' % var_op_name)
 
   for candidate in candidates:
     if candidate.op.name == var_op_name:
diff --git a/tensorflow/core/common_runtime/pending_counts.h b/tensorflow/core/common_runtime/pending_counts.h
index 9e39b6b7b9..5707f52592 100644
--- a/tensorflow/core/common_runtime/pending_counts.h
+++ b/tensorflow/core/common_runtime/pending_counts.h
@@ -44,7 +44,7 @@ namespace tensorflow {
 
 //    PendingCounts counts(layout);
 //    ...
-//    counts.decrement_panding(h[id], 1);
+//    counts.decrement_pending(h[id], 1);
 class PendingCounts {
  public:
   // The state machine for a node's execution.
diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index c82d57694a..3ae52f414f 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -127,7 +127,7 @@ Status InferShapesForFunctionSubNode(const Node* node, ShapeRefiner* refiner,
 //
 // NOTE: Recursive user-defined functions are not supported.
 // Maybe we won't support recursive functions at all in TF, because of
-// other maintanabilty issues.
+// other maintainability issues.
 Status ShapeRefiner::InferShapesForFunction(
     const tensorflow::FunctionDef* function_def, bool keep_nested_shapes,
     ExtendedInferenceContext* outer_context) {
-- 
GitLab


From ff71c2792746262bbce936d78c3543cdea0c4b70 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Mon, 4 Dec 2017 11:12:36 -0800
Subject: [PATCH 0583/1225] Cleanup: Remove dummy_disabled_internal target.

PiperOrigin-RevId: 177839746
---
 tensorflow/BUILD                   | 8 --------
 tensorflow/contrib/lite/toco/BUILD | 9 ++-------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index e6dc15a701..48f594c954 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -364,14 +364,6 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
-# Make a dummy rule that we can change "default" in select statements to.
-# to disable dependencies in copybara.
-config_setting(
-    name = "dummy_disabled_internal",
-    values = {"define": "with_dummy_disabled_internal=true"},
-    visibility = ["//visibility:public"],
-)
-
 package_group(
     name = "internal",
     packages = [
diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD
index 0bf8d067a3..0bad7ddb6e 100644
--- a/tensorflow/contrib/lite/toco/BUILD
+++ b/tensorflow/contrib/lite/toco/BUILD
@@ -159,16 +159,11 @@ cc_library(
         "toco_types.h",
     ],
     deps = [
+        # Placeholder for internal file dependency.
         "//tensorflow/core:framework_lite",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-    ] + select({
-        "//tensorflow:android": [],
-        "//tensorflow:darwin": [],
-        "//tensorflow:ios": [],
-        "//conditions:default": [],
-        "//tensorflow:dummy_disabled_internal": [],
-    }),
+    ],
 )
 
 cc_library(
-- 
GitLab


From a4b8615aef61cfdf1309561d92af4486340a26ef Mon Sep 17 00:00:00 2001
From: Matt Wytock <mwytock@gmail.com>
Date: Mon, 4 Dec 2017 11:26:54 -0800
Subject: [PATCH 0584/1225] fix overpadding in MixtureSameFamily (#14870)

---
 .../distributions/python/ops/mixture_same_family.py        | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
index 5448918a50..0623b2c726 100644
--- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
@@ -320,13 +320,14 @@ class MixtureSameFamily(distribution.Distribution):
         return array_ops.shape(d.batch_shape_tensor())[0]
       dist_batch_ndims = _get_ndims(self)
       cat_batch_ndims = _get_ndims(self.mixture_distribution)
-      bnd = distribution_util.pick_vector(
+      pad_ndims = distribution_util.pick_vector(
           self.mixture_distribution.is_scalar_batch(),
-          [dist_batch_ndims], [cat_batch_ndims])[0]
+          [dist_batch_ndims],
+          [dist_batch_ndims - cat_batch_ndims])[0]
       s = array_ops.shape(x)
       x = array_ops.reshape(x, shape=array_ops.concat([
           s[:-1],
-          array_ops.ones([bnd], dtype=dtypes.int32),
+          array_ops.ones([pad_ndims], dtype=dtypes.int32),
           s[-1:],
           array_ops.ones([self._event_ndims], dtype=dtypes.int32),
       ], axis=0))
-- 
GitLab


From 3ff72d4fd07783e28752e5ec0bc70966dffe2e3c Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 4 Dec 2017 11:38:59 -0800
Subject: [PATCH 0585/1225] Sanitize dtypes in filenames in normalization_test.

PiperOrigin-RevId: 177843964
---
 tensorflow/python/layers/normalization_test.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index 7c91c3284e..e147f348b0 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -105,9 +105,17 @@ class BNTest(test.TestCase):
                          infer_use_gpu):
     batch, height, width, input_channels = 2, 4, 5, 3
     shape = [batch, height, width, input_channels]
-    checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' %
-                              (dtype, train1_use_gpu, train2_use_gpu,
-                               infer_use_gpu))
+
+    # Not all characters in a dtype string representation are allowed in
+    # filenames in all operating systems. This map will sanitize these.
+    dtype_to_valid_fn = {
+        dtypes.float16: 'float16',
+        dtypes.float32: 'float32',
+    }
+    checkpoint = os.path.join(
+        self.get_temp_dir(), 'cp_%s_%s_%s_%s' % (
+            dtype_to_valid_fn[dtype], train1_use_gpu, train2_use_gpu,
+            infer_use_gpu))
 
     self._train(
         checkpoint,
-- 
GitLab


From 171e4db57703ea12a314a1db520ea28aceca034c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 11:39:12 -0800
Subject: [PATCH 0586/1225] Changed tensorflow::StringPiece default constructor
 to set data_ to nullptr instead of "". This matches the default constructor
 of absl::string_view, and is necessary to ensure compatibility before
 tensorflow::StringPiece can be replaced with absl::string_view.

PiperOrigin-RevId: 177843991
---
 tensorflow/compiler/jit/kernels/xla_launch_op.cc | 2 --
 tensorflow/core/lib/core/stringpiece.h           | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc
index 54f60fae5e..39a770ab7b 100644
--- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc
+++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc
@@ -102,7 +102,6 @@ xla::StatusOr<gpu::DeviceMemoryBase> XlaAllocator::Allocate(
   }
   void* data =
       reinterpret_cast<void*>(const_cast<char*>(t.tensor_data().data()));
-  TF_RET_CHECK(data != nullptr);
   tensors_[data] = t;
   return gpu::DeviceMemoryBase(data, size);
 }
@@ -110,7 +109,6 @@ xla::StatusOr<gpu::DeviceMemoryBase> XlaAllocator::Allocate(
 Status XlaAllocator::RegisterArgument(const Tensor* t) {
   void* data =
       reinterpret_cast<void*>(const_cast<char*>(t->tensor_data().data()));
-  TF_RET_CHECK(data != nullptr);
   tensors_[data] = *t;
   return Status::OK();
 }
diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h
index 89a1e26b81..caa9642774 100644
--- a/tensorflow/core/lib/core/stringpiece.h
+++ b/tensorflow/core/lib/core/stringpiece.h
@@ -42,7 +42,7 @@ class StringPiece {
   typedef size_t size_type;
 
   // Create an empty slice.
-  StringPiece() : data_(""), size_(0) {}
+  StringPiece() : data_(nullptr), size_(0) {}
 
   // Create a slice that refers to d[0,n-1].
   StringPiece(const char* d, size_t n) : data_(d), size_(n) {}
-- 
GitLab


From 5ae216835bc4cbeb309fe69add57675810028b13 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 4 Dec 2017 11:49:22 -0800
Subject: [PATCH 0587/1225] [XLA:CPU] Disable frame pointer elimination

This helps debug and profile generated code, and does not seem to affect
performance.

PiperOrigin-RevId: 177845521
---
 tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
index b4c5d3cd90..ef5b6ad90e 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
@@ -684,6 +684,8 @@ llvm::Function* CreateFunction(llvm::FunctionType* function_type,
   llvm::Function* function =
       llvm::Function::Create(function_type, linkage, AsStringRef(name), module);
   function->setCallingConv(llvm::CallingConv::C);
+  function->addFnAttr("no-frame-pointer-elim", "false");
+
   if (enable_fast_math) {
     function->addFnAttr("unsafe-fp-math", "true");
     function->addFnAttr("no-infs-fp-math", "true");
-- 
GitLab


From 2d3d5c23a99b92968e9265cb7d872fe4dbf0e7ca Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 12:12:29 -0800
Subject: [PATCH 0588/1225] Fixing wording in truncated_normal as per Github
 bug request #13686.

PiperOrigin-RevId: 177848994
---
 tensorflow/python/ops/random_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py
index afaff8ca41..a59578b422 100644
--- a/tensorflow/python/ops/random_ops.py
+++ b/tensorflow/python/ops/random_ops.py
@@ -152,7 +152,7 @@ def truncated_normal(shape,
     mean: A 0-D Tensor or Python value of type `dtype`. The mean of the
       truncated normal distribution.
     stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation
-      of the truncated normal distribution.
+      of the normal distribution, before truncation.
     dtype: The type of the output.
     seed: A Python integer. Used to create a random seed for the distribution.
       See
-- 
GitLab


From 36ac0f3eb6406556dda8ff76961b04d6eebf71b7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 12:14:42 -0800
Subject: [PATCH 0589/1225]        [tpu:profiler] Modify the fields in
 host-independent and host-dependent job info in tf_op_stats.proto.

PiperOrigin-RevId: 177849255
---
 tensorflow/contrib/tpu/profiler/tf_op_stats.proto | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
index e4a2b530c7..5440bbbfdd 100644
--- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
+++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
@@ -117,9 +117,9 @@ message HloExtraInfoMapResult {
 // Result proto for host-independent job information.
 message HostIndependentJobInfoResult {
   // The change-list number of this build.
-  optional string change_list = 1;
+  optional int64 change_list = 1;
   // The time of this build.
-  optional string build_time = 2;
+  optional int64 build_time = 2;
   // The target of this build.
   optional string build_target = 3;
 }
@@ -131,7 +131,7 @@ message HostDependentJobInfoResult {
   // The command line used to run the job.
   optional string command_line = 2;
   // The start time of the job on this host.
-  optional string start_time = 3;
+  optional int64 start_time = 3;
 }
 
 // Result proto for RunEnvironment (the run environment of a profiling session).
@@ -144,11 +144,10 @@ message RunEnvironmentResult {
   optional int32 tpu_core_count = 3;
   // The per-TPU-core batch size.
   optional int32 per_core_batch_size = 4;
-  reserved 5;  // was job_info.
   // Host-independent job information.
-  optional HostIndependentJobInfoResult host_independent_job_info = 6;
+  optional HostIndependentJobInfoResult host_independent_job_info = 5;
   // Host-dependent job information.
-  repeated HostDependentJobInfoResult host_dependent_job_info = 7;
+  repeated HostDependentJobInfoResult host_dependent_job_info = 6;
 }
 
 // Result proto for TfStatsHelper.
-- 
GitLab


From a1c29139ccf441ad4de97c4e7fe2729e6130fcb8 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 4 Dec 2017 12:28:48 -0800
Subject: [PATCH 0590/1225] [XLA:GPU] Switch from specifying maxntid to
 reqntid.

maxntid specifies the max number of threads in a block, whereas reqntid
says that we will use *exactly* this many threads in a block.

This doesn't have any effect on the benchmarks I ran, but we might as
well do it in case it helps ptxas make a better decision at some point
on some GPU.  At least it will prevent the next person to come along
from doing this same investigation I just did.  :)

PiperOrigin-RevId: 177851116
---
 tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index abc739d181..ec7f3c75c4 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -123,10 +123,12 @@ void UpdateLaunchDimensions(const LaunchDimensions& launch_dims, Thunk* thunk,
   llvm::ConstantInt* threads_per_block_ir_value = llvm::ConstantInt::get(
       llvm::IntegerType::get(llvm_context, /*NumBits=*/32),
       launch_dims.threads_per_block());
+  // Our launch bounds are exact, so we can specify them as reqntidx rather than
+  // maxntidx.
   nvvm_annotations_node->addOperand(llvm::MDNode::get(
       llvm_context,
       {llvm::ConstantAsMetadata::get(ir_kernel),
-       llvm::MDString::get(llvm_context, "maxntidx"),
+       llvm::MDString::get(llvm_context, "reqntidx"),
        llvm::ConstantAsMetadata::get(threads_per_block_ir_value)}));
 }
 }  // namespace
-- 
GitLab


From 8f1e63d5629bda4f6c91fdec7a3b8418ed96786e Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 4 Dec 2017 12:31:03 -0800
Subject: [PATCH 0591/1225] Actually use ApiDef when generating Python API.

PiperOrigin-RevId: 177851421
---
 tensorflow/contrib/cmake/tf_python.cmake      |   2 +-
 tensorflow/core/BUILD                         |   2 +-
 tensorflow/core/framework/op_gen_lib.cc       |   7 +-
 tensorflow/core/framework/op_gen_lib_test.cc  |   4 +-
 tensorflow/python/build_defs.bzl              |   4 +
 tensorflow/python/eager/gen_op.bzl            |  20 +-
 .../python/eager/python_eager_op_gen.cc       | 234 +++++++++++-------
 .../python/eager/python_eager_op_gen_main.cc  |  37 +--
 tensorflow/python/framework/python_op_gen.cc  |  60 +++--
 .../python/framework/python_op_gen_internal.h |  24 +-
 .../python/framework/python_op_gen_main.cc    |  56 +++--
 tensorflow/python/ops/array_ops.py            |   8 +-
 tensorflow/tensorflow.bzl                     |  27 +-
 13 files changed, 300 insertions(+), 185 deletions(-)

diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 819b6213ea..401662b6c5 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -739,7 +739,7 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
     # containing the wrappers.
     add_custom_command(
       OUTPUT ${GENERATE_PYTHON_OP_LIB_DESTINATION}
-      COMMAND ${tf_python_op_lib_name}_gen_python @${tensorflow_source_dir}/tensorflow/python/ops/hidden_ops.txt ${require_shape_fn} > ${GENERATE_PYTHON_OP_LIB_DESTINATION}
+      COMMAND ${tf_python_op_lib_name}_gen_python ${tensorflow_source_dir}/tensorflow/core/api_def/base_api,${tensorflow_source_dir}/tensorflow/core/api_def/python_api @${tensorflow_source_dir}/tensorflow/python/ops/hidden_ops.txt ${require_shape_fn} > ${GENERATE_PYTHON_OP_LIB_DESTINATION}
       DEPENDS ${tf_python_op_lib_name}_gen_python
     )
 
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index d77021c3ee..390950ae98 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -3416,7 +3416,7 @@ filegroup(
 
 filegroup(
     name = "python_api_def",
-    data = glob(["api_def/python_api/*"]),
+    srcs = glob(["api_def/python_api/*"]),
 )
 
 tf_cc_test(
diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc
index 95a9b763f9..acff74070d 100644
--- a/tensorflow/core/framework/op_gen_lib.cc
+++ b/tensorflow/core/framework/op_gen_lib.cc
@@ -629,14 +629,11 @@ Status ApiDefMap::LoadApiDef(const string& api_def_file_contents) {
   ApiDefs api_defs;
   protobuf::TextFormat::ParseFromString(contents, &api_defs);
   for (const auto& api_def : api_defs.op()) {
-    // Check if the op definition is already loaded.
+    // Check if the op definition is loaded. If op definition is not
+    // loaded, then we just skip this ApiDef.
     if (map_.find(api_def.graph_op_name()) != map_.end()) {
       // Overwrite current api def with data in api_def.
       TF_RETURN_IF_ERROR(MergeApiDefs(&map_[api_def.graph_op_name()], api_def));
-    } else {
-      return errors::FailedPrecondition(
-          "Unexpected ApiDef override: ", api_def.graph_op_name(),
-          " is not defined in base ApiDef.");
     }
   }
   return Status::OK();
diff --git a/tensorflow/core/framework/op_gen_lib_test.cc b/tensorflow/core/framework/op_gen_lib_test.cc
index bbe57bdd62..857b1c8dbc 100644
--- a/tensorflow/core/framework/op_gen_lib_test.cc
+++ b/tensorflow/core/framework/op_gen_lib_test.cc
@@ -410,8 +410,8 @@ op {
 
   ApiDefMap api_map(op_list);
   TF_CHECK_OK(api_map.LoadApiDef(kTestApiDef));
-  auto status = api_map.LoadApiDef(api_def1);
-  ASSERT_EQ(tensorflow::error::FAILED_PRECONDITION, status.code());
+  TF_CHECK_OK(api_map.LoadApiDef(api_def1));
+  ASSERT_EQ(nullptr, api_map.GetApiDef("different_testop"));
 }
 
 TEST(OpGenLibTest, ApiDefInvalidArgOrder) {
diff --git a/tensorflow/python/build_defs.bzl b/tensorflow/python/build_defs.bzl
index 2d8625933f..48b03fab0f 100644
--- a/tensorflow/python/build_defs.bzl
+++ b/tensorflow/python/build_defs.bzl
@@ -27,4 +27,8 @@ def tf_gen_op_wrapper_private_py(name, out=None, deps=[],
     deps=deps,
     require_shape_functions=require_shape_functions,
     generated_target_name=name,
+    api_def_srcs = [
+        "//tensorflow/core:base_api_def",
+        "//tensorflow/core:python_api_def",
+    ],
   )
diff --git a/tensorflow/python/eager/gen_op.bzl b/tensorflow/python/eager/gen_op.bzl
index 1c99d342be..8bc1d6c10a 100644
--- a/tensorflow/python/eager/gen_op.bzl
+++ b/tensorflow/python/eager/gen_op.bzl
@@ -10,7 +10,9 @@ def tfe_gen_op_wrapper_py(name,
                           out=None,
                           visibility=None,
                           deps=[],
-                          generated_target_name=None):
+                          generated_target_name=None,
+                          # ApiDefs will be loaded in the order specified in this list.
+                          api_def_srcs=[]):
   """Generate an eager-mode Python op wrapper for an op library."""
   # Construct a cc_binary containing the specified ops.
   tool_name = "gen_" + name + "_py_wrappers_cc"
@@ -30,11 +32,25 @@ def tfe_gen_op_wrapper_py(name,
   if not out:
     out = "gen_" + name + ".py"
 
+  if not api_def_srcs:
+    api_def_args_str = ","
+  else:
+    api_def_args = []
+    for api_def_src in api_def_srcs:
+      # Add directory of the first ApiDef source to args.
+      # We are assuming all ApiDefs in a single api_def_src are in the
+      # same directory.
+      api_def_args.append(
+          "$$(dirname $$(echo $(locations " + api_def_src +
+          ") | cut -d\" \" -f1))")
+    api_def_args_str = ",".join(api_def_args)
+
   native.genrule(
       name=name + "_pygenrule",
       outs=[out],
+      srcs=api_def_srcs,
       tools=[tool_name] + tf_binary_additional_srcs(),
-      cmd=("$(location " + tool_name + ")  > $@"))
+      cmd=("$(location " + tool_name + ") " + api_def_args_str + " > $@"))
 
   # Make a py_library out of the generated python file.
   if not generated_target_name:
diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc
index 956fbdac50..90a8779ff8 100644
--- a/tensorflow/python/eager/python_eager_op_gen.cc
+++ b/tensorflow/python/eager/python_eager_op_gen.cc
@@ -99,6 +99,15 @@ string TensorPBString(const TensorProto& pb) {
   return strings::StrCat("\"\"\"", ProtoShortDebugString(pb), "\"\"\"");
 }
 
+const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def) {
+  for (int i = 0; i < api_def.in_arg_size(); ++i) {
+    if (api_def.in_arg(i).name() == name) {
+      return &api_def.in_arg(i);
+    }
+  }
+  return nullptr;
+}
+
 class GenEagerPythonOp : public python_op_gen_internal::GenPythonOp {
  public:
   GenEagerPythonOp(const OpDef& op_def, const ApiDef& api_def,
@@ -164,14 +173,14 @@ string GenEagerPythonOp::FlattenInputs(
       } else if (inputs_state == WAS_LIST_INPUT) {
         strings::StrAppend(&inputs, " + ");
       }
-      strings::StrAppend(&inputs, "list(", param_names_[i], ")");
+      strings::StrAppend(&inputs, "list(", param_names_[i].GetRenameTo(), ")");
       inputs_state = WAS_LIST_INPUT;
       if (output_sizes != nullptr) {
         if (!arg.number_attr().empty()) {
           output_sizes->emplace_back(AttrVarName(arg.number_attr(), nullptr));
         } else {
           output_sizes->emplace_back(
-              strings::StrCat("len(", param_names_[i], ")"));
+              strings::StrCat("len(", param_names_[i].GetRenameTo(), ")"));
         }
       }
     } else {
@@ -182,7 +191,7 @@ string GenEagerPythonOp::FlattenInputs(
       } else {
         strings::StrAppend(&inputs, "[");
       }
-      strings::StrAppend(&inputs, param_names_[i]);
+      strings::StrAppend(&inputs, param_names_[i].GetRenameTo());
       inputs_state = WAS_SOLO_INPUT;
       if (output_sizes != nullptr) output_sizes->emplace_back();
     }
@@ -195,15 +204,21 @@ string GenEagerPythonOp::FlattenInputs(
 }
 
 string GenEagerPythonOp::Code() {
+  if (api_def_.visibility() == ApiDef::SKIP) {
+    return "";
+  }
   // This has all the input args followed by those attrs that don't have
   // defaults.
-  std::vector<string> args_no_default;
+  std::vector<python_op_gen_internal::ParamNames> params_no_default;
   // The parameters with defaults (these have to be listed after those without).
   // No input args are included, just attrs.
-  std::vector<std::pair<string, string>> args_with_defaults;
-  for (int i = 0; i < op_def_.input_arg_size(); ++i) {
-    const auto& arg(op_def_.input_arg(i));
-    args_no_default.push_back(arg.name());
+  std::vector<std::pair<python_op_gen_internal::ParamNames, string>>
+      params_with_default;
+
+  for (int i = 0; i < api_def_.arg_order_size(); ++i) {
+    const auto& arg = *FindInputArg(api_def_.arg_order(i), op_def_);
+    const auto& api_def_arg = *FindInputArg(api_def_.arg_order(i), api_def_);
+    params_no_default.emplace_back(api_def_arg.name(), api_def_arg.rename_to());
     if (!arg.type_attr().empty()) {
       AddAttrForArg(arg.type_attr(), i);
     } else if (!arg.type_list_attr().empty()) {
@@ -215,31 +230,39 @@ string GenEagerPythonOp::Code() {
   }
   for (int i = 0; i < op_def_.attr_size(); ++i) {
     const auto& attr(op_def_.attr(i));
+    const auto& api_def_attr(api_def_.attr(i));
     // Do not add inferred attrs to the Python function signature.
     if (inferred_attrs_.find(attr.name()) == inferred_attrs_.end()) {
-      if (attr.has_default_value()) {
+      if (api_def_attr.has_default_value()) {
         if (attr.type() == "tensor") {
-          args_with_defaults.emplace_back(
-              attr.name(),
-              strings::StrCat("_execute.make_tensor(",
-                              TensorPBString(attr.default_value().tensor()),
-                              ", \"", attr.name(), "\")"));
+          params_with_default.emplace_back(
+              python_op_gen_internal::ParamNames(api_def_attr.name(),
+                                                 api_def_attr.rename_to()),
+              strings::StrCat(
+                  "_execute.make_tensor(",
+                  TensorPBString(api_def_attr.default_value().tensor()), ", \"",
+                  api_def_attr.rename_to(), "\")"));
         } else if (attr.type() == "list(tensor)") {
           std::vector<string> pbtxt;
-          for (const auto& pb : attr.default_value().list().tensor()) {
+          for (const auto& pb : api_def_attr.default_value().list().tensor()) {
             pbtxt.emplace_back(TensorPBString(pb));
           }
-          args_with_defaults.emplace_back(
-              attr.name(),
-              strings::StrCat("[_execute.make_tensor(_pb, \"", attr.name(),
-                              "\") for _pb in ", VectorToTuple(pbtxt), "]"));
+          params_with_default.emplace_back(
+              python_op_gen_internal::ParamNames(api_def_attr.name(),
+                                                 api_def_attr.rename_to()),
+              strings::StrCat("[_execute.make_tensor(_pb, \"",
+                              api_def_attr.rename_to(), "\") for _pb in ",
+                              VectorToTuple(pbtxt), "]"));
         } else {
-          args_with_defaults.emplace_back(
-              attr.name(), python_op_gen_internal::AttrValueToPython(
-                               attr.type(), attr.default_value(), "_dtypes."));
+          params_with_default.emplace_back(
+              python_op_gen_internal::ParamNames(api_def_attr.name(),
+                                                 api_def_attr.rename_to()),
+              python_op_gen_internal::AttrValueToPython(
+                  attr.type(), api_def_attr.default_value(), "_dtypes."));
         }
       } else {
-        args_no_default.push_back(attr.name());
+        params_no_default.emplace_back(api_def_attr.name(),
+                                       api_def_attr.rename_to());
       }
     }
   }
@@ -247,34 +270,37 @@ string GenEagerPythonOp::Code() {
   // Save the list of attr parameters (attrs that won't be inferred),
   // those with defaults go at the end.
   // Get the attrs in the order we want by taking the attrs without defaults
-  // from the end of args_no_default, and adding args_no_default.
-  attrs_.reserve(args_no_default.size() - op_def_.input_arg_size() +
-                 args_with_defaults.size());
-  attrs_.insert(attrs_.end(),
-                args_no_default.begin() + op_def_.input_arg_size(),
-                args_no_default.end());
-  for (const auto& a : args_with_defaults) {
-    attrs_.push_back(a.first);
+  // from the end of params_no_default, and adding params_no_default.
+  attrs_.reserve(params_no_default.size() - op_def_.input_arg_size() +
+                 params_with_default.size());
+  for (int i = op_def_.input_arg_size(); i < params_no_default.size(); ++i) {
+    attrs_.push_back(params_no_default[i].GetName());
+  }
+  for (const auto& p : params_with_default) {
+    attrs_.push_back(p.first.GetName());
+  }
+
+  param_names_.reserve(params_no_default.size() + params_with_default.size());
+  param_names_.insert(param_names_.begin(), params_no_default.begin(),
+                      params_no_default.end());
+  for (const auto& param_and_default : params_with_default) {
+    param_names_.push_back(param_and_default.first);
   }
 
-  param_names_.reserve(args_no_default.size() + args_with_defaults.size());
   string parameters;
-  for (const string& name : args_no_default) {
+  for (const auto& param : params_no_default) {
     if (!parameters.empty()) strings::StrAppend(&parameters, ", ");
-    const string param = python_op_gen_internal::AvoidPythonReserved(name);
-    strings::StrAppend(&parameters, param);
-    param_names_.push_back(param);
+    strings::StrAppend(&parameters, param.GetRenameTo());
   }
-  for (const auto& name_default : args_with_defaults) {
+  for (const auto& param_and_default : params_with_default) {
     if (!parameters.empty()) strings::StrAppend(&parameters, ", ");
-    const string param =
-        python_op_gen_internal::AvoidPythonReserved(name_default.first);
-    strings::StrAppend(&parameters, param, "=", name_default.second);
-    param_names_.push_back(param);
+    strings::StrAppend(&parameters, param_and_default.first.GetRenameTo(), "=",
+                       param_and_default.second);
   }
   if (!parameters.empty()) strings::StrAppend(&parameters, ", ");
   strings::StrAppend(&parameters, "name=None");
 
+  AddExport();
   AddDefLine(parameters);
   AddDocStringDescription();
   AddDocStringArgs();
@@ -297,25 +323,26 @@ string GenEagerPythonOp::Code() {
         // inputs are lists and have the same length.
         for (auto iter = arg_list->second.begin();
              iter != arg_list->second.end(); ++iter) {
-          const string& arg_name = param_names_[*iter];
-          ExpectListArg(arg_name);
+          const string& arg_api_name = param_names_[*iter].GetRenameTo();
+          ExpectListArg(arg_api_name);
           if (iter == arg_list->second.begin()) {
-            AddInferredAttr(attr.name(), strings::StrCat("len(", arg_name, ")"),
+            AddInferredAttr(attr.name(),
+                            strings::StrCat("len(", arg_api_name, ")"),
                             &result_, &attr_expressions_);
           } else {
             const auto& attr_var = attr_expressions_[attr.name()];
-            strings::StrAppend(&result_, "  if len(", arg_name,
+            strings::StrAppend(&result_, "  if len(", arg_api_name,
                                ") != ", attr_var,
                                ":\n"
                                "    raise ValueError(\n"
                                "        \"List argument '",
-                               arg_name, "' to '", op_name_,
+                               arg_api_name, "' to '", op_name_,
                                "' Op with length %d \"\n"
                                "        \"must match length %d of argument '",
                                inferred_attrs_[attr.name()],
                                "'.\" %\n"
                                "        (len(",
-                               arg_name, "), ", attr_var, "))\n");
+                               arg_api_name, "), ", attr_var, "))\n");
           }
         }
       }
@@ -325,65 +352,76 @@ string GenEagerPythonOp::Code() {
   // Values for non-inferred attrs.
   for (int i = 0; i < attrs_.size(); ++i) {
     const string& attr_name = attrs_[i];
-    const string& param = param_names_[i + op_def_.input_arg_size()];
+    const auto& param = param_names_[i + op_def_.input_arg_size()];
     const auto& attr = *FindAttr(attr_name, op_def_);
+    const string& attr_api_name = param.GetRenameTo();
     StringPiece attr_type = attr.type();
-    attr_expressions_[attr_name] = param;
-    const int default_index = i - (attrs_.size() - args_with_defaults.size());
+    attr_expressions_[attr_name] = attr_api_name;
+    const int default_index = i - (attrs_.size() - params_with_default.size());
     if (default_index >= 0) {
-      const string& default_value = args_with_defaults[default_index].second;
-      strings::StrAppend(&result_, "  if ", param, " is None:\n");
-      strings::StrAppend(&result_, "    ", param, " = ", default_value, "\n");
+      const string& default_value = params_with_default[default_index].second;
+      strings::StrAppend(&result_, "  if ", attr_api_name, " is None:\n");
+      strings::StrAppend(&result_, "    ", attr_api_name, " = ", default_value,
+                         "\n");
     }
     if (attr_type.starts_with("list(")) {
-      ExpectListArg(param);
+      ExpectListArg(attr_api_name);
     }
 
     if (attr_type == "string") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_str(", param,
-                         ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name, " = _execute.make_str(",
+                         attr_api_name, ", \"", attr_api_name, "\")\n");
     } else if (attr_type == "list(string)") {
-      strings::StrAppend(&result_, "  ", param, " = [_execute.make_str(_s, \"",
-                         param, "\") for _s in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_str(_s, \"", attr_api_name,
+                         "\") for _s in ", attr_api_name, "]\n");
     } else if (attr_type == "int") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_int(", param,
-                         ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name, " = _execute.make_int(",
+                         attr_api_name, ", \"", attr_api_name, "\")\n");
     } else if (attr_type == "list(int)") {
-      strings::StrAppend(&result_, "  ", param, " = [_execute.make_int(_i, \"",
-                         param, "\") for _i in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_int(_i, \"", attr_api_name,
+                         "\") for _i in ", attr_api_name, "]\n");
     } else if (attr_type == "float") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_float(",
-                         param, ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = _execute.make_float(", attr_api_name, ", \"",
+                         attr_api_name, "\")\n");
     } else if (attr_type == "list(float)") {
-      strings::StrAppend(&result_, "  ", param,
-                         " = [_execute.make_float(_f, \"", param,
-                         "\") for _f in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_float(_f, \"", attr_api_name,
+                         "\") for _f in ", attr_api_name, "]\n");
     } else if (attr_type == "bool") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_bool(", param,
-                         ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = _execute.make_bool(", attr_api_name, ", \"",
+                         attr_api_name, "\")\n");
     } else if (attr_type == "list(bool)") {
-      strings::StrAppend(&result_, "  ", param, " = [_execute.make_bool(_b, \"",
-                         param, "\") for _b in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_bool(_b, \"", attr_api_name,
+                         "\") for _b in ", attr_api_name, "]\n");
     } else if (attr_type == "type") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_type(", param,
-                         ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = _execute.make_type(", attr_api_name, ", \"",
+                         attr_api_name, "\")\n");
     } else if (attr_type == "list(type)") {
-      strings::StrAppend(&result_, "  ", param, " = [_execute.make_type(_t, \"",
-                         param, "\") for _t in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_type(_t, \"", attr_api_name,
+                         "\") for _t in ", attr_api_name, "]\n");
     } else if (attr_type == "shape") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_shape(",
-                         param, ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = _execute.make_shape(", attr_api_name, ", \"",
+                         attr_api_name, "\")\n");
     } else if (attr_type == "list(shape)") {
-      strings::StrAppend(&result_, "  ", param,
-                         " = [_execute.make_shape(_s, \"", param,
-                         "\") for _s in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_shape(_s, \"", attr_api_name,
+                         "\") for _s in ", attr_api_name, "]\n");
     } else if (attr_type == "tensor") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_tensor(",
-                         param, ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = _execute.make_tensor(", attr_api_name, ", \"",
+                         attr_api_name, "\")\n");
     } else if (attr_type == "list(tensor)") {
-      strings::StrAppend(&result_, "  ", param,
-                         " = [_execute.make_tensor(_t, \"", param,
-                         "\") for _t in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_tensor(_t, \"", attr_api_name,
+                         "\") for _t in ", attr_api_name, "]\n");
     } else if (attr_type != "func") {
       return strings::StrCat("# No definition for ", function_name_,
                              " since we don't support attrs with type\n"
@@ -484,16 +522,20 @@ string GenEagerPythonOp::Code() {
 
   bool eager_allowed = true;
   string ref_arg;
-  for (const auto& arg : op_def_.input_arg()) {
+  for (int i = 0; i < op_def_.input_arg_size(); ++i) {
+    const auto& arg = op_def_.input_arg(i);
     if (arg.is_ref()) {
       eager_allowed = false;
-      ref_arg = arg.name();
+      DCHECK_EQ(op_def_.input_arg(i).name(), api_def_.in_arg(i).name());
+      ref_arg = api_def_.in_arg(i).rename_to();
     }
   }
-  for (const auto& arg : op_def_.output_arg()) {
+  for (int i = 0; i < op_def_.output_arg_size(); ++i) {
+    const auto& arg = op_def_.output_arg(i);
     if (arg.is_ref()) {
       eager_allowed = false;
-      ref_arg = arg.name();
+      DCHECK_EQ(op_def_.output_arg(i).name(), api_def_.out_arg(i).name());
+      ref_arg = api_def_.out_arg(i).rename_to();
     }
   }
 
@@ -553,6 +595,7 @@ void GenEagerPythonOp::AddEagerInferredAttrs() {
   // Figure out values for inferred attrs, and cast to eager tensors.
   for (int i = 0; i < op_def_.attr_size(); ++i) {
     const auto& attr(op_def_.attr(i));
+    const auto& api_def_attr(api_def_.attr(i));
     auto arg_list = attr_to_args_.find(attr.name());
     if (arg_list != attr_to_args_.end()) {
       if (attr.type() == "type") {
@@ -565,14 +608,15 @@ void GenEagerPythonOp::AddEagerInferredAttrs() {
           strings::StrAppend(
               &conversion, ", ",
               python_op_gen_internal::AttrValueToPython(
-                  attr.type(), attr.default_value(), "_dtypes."));
+                  attr.type(), api_def_attr.default_value(), "_dtypes."));
         }
         strings::StrAppend(&conversion, ")");
         const string var_name = AttrVarName(attr.name(), &attr_expressions_);
         if (output_sizes.size() == 1) {
           // Avoid creating a temporary variable in the case where
           // we can easily assign to the right value directly.
-          const string inputs_var = param_names_[arg_list->second.front()];
+          const string inputs_var =
+              param_names_[arg_list->second.front()].GetRenameTo();
           if (output_sizes.front().empty()) {
             strings::StrAppend(&result_, "    ", var_name, ", (", inputs_var,
                                ",) = ", conversion, "\n");
@@ -589,7 +633,7 @@ void GenEagerPythonOp::AddEagerInferredAttrs() {
           Unflatten("    ", output_sizes, inputs_var, &result_);
           std::vector<string> p;
           for (int j : arg_list->second) {
-            p.emplace_back(param_names_[j]);
+            p.emplace_back(param_names_[j].GetRenameTo());
           }
           strings::StrAppend(&result_, "    ", VectorToTuple(p), " = ",
                              inputs_var, "\n");
@@ -608,14 +652,14 @@ void GenEagerPythonOp::AddEagerInferredAttrs() {
           std::vector<string> lists;
           for (auto iter = arg_list->second.begin();
                iter != arg_list->second.end(); ++iter) {
-            lists.push_back(param_names_[*iter]);
+            lists.push_back(param_names_[*iter].GetRenameTo());
           }
           inputs_var = VectorToTuple(lists);
           conversion = "_execute.args_to_mixed_eager_tensors";
         } else {
           // For one list(tensor) argument, we just convert every
           // element of the list to an eager tensor.
-          inputs_var = param_names_[arg_list->second.front()];
+          inputs_var = param_names_[arg_list->second.front()].GetRenameTo();
           conversion = "_execute.convert_to_mixed_eager_tensors";
         }
         strings::StrAppend(&result_, "    ", var_name, ", ", inputs_var, " = ",
@@ -630,7 +674,7 @@ void GenEagerPythonOp::AddEagerInputCasts() {
   for (int i = 0; i < op_def_.input_arg_size(); ++i) {
     const auto& arg(op_def_.input_arg(i));
     if (!arg.type_attr().empty() || !arg.type_list_attr().empty()) continue;
-    const string& param = param_names_[i];
+    const string& param = param_names_[i].GetRenameTo();
     const string fn = arg.number_attr().empty() ? "" : "n_";
     const string dtype =
         python_op_gen_internal::DataTypeToPython(arg.type(), "_dtypes.");
diff --git a/tensorflow/python/eager/python_eager_op_gen_main.cc b/tensorflow/python/eager/python_eager_op_gen_main.cc
index cd74c438ec..05351bd8b1 100644
--- a/tensorflow/python/eager/python_eager_op_gen_main.cc
+++ b/tensorflow/python/eager/python_eager_op_gen_main.cc
@@ -21,34 +21,32 @@ limitations under the License.
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/init_main.h"
 
 namespace tensorflow {
 namespace {
 
-constexpr char kBaseApiDef[] =
-    "tensorflow/core/api_def/base_api/*.pbtxt";
-constexpr char kPythonApiDef[] =
-    "tensorflow/core/api_def/python_api/*.pbtxt";
-constexpr bool kUseApiDef = false;
-
-void PrintAllPythonOps(const std::vector<string>& hidden_ops) {
+void PrintAllPythonOps(const std::vector<string>& hidden_ops,
+                       const std::vector<string>& api_def_dirs) {
   OpList ops;
   OpRegistry::Global()->Export(false, &ops);
 
   ApiDefMap api_def_map(ops);
-  if (kUseApiDef) {
+  if (!api_def_dirs.empty()) {
     Env* env = Env::Default();
 
-    std::vector<string> base_api_files;
-    std::vector<string> python_api_files;
-    TF_CHECK_OK(env->GetMatchingPaths(kBaseApiDef, &base_api_files));
-    TF_CHECK_OK(env->GetMatchingPaths(kPythonApiDef, &python_api_files));
-
-    TF_CHECK_OK(api_def_map.LoadFileList(env, base_api_files));
-    TF_CHECK_OK(api_def_map.LoadFileList(env, python_api_files));
+    for (const auto& api_def_dir : api_def_dirs) {
+      std::vector<string> api_files;
+      TF_CHECK_OK(env->GetMatchingPaths(io::JoinPath(api_def_dir, "*.pbtxt"),
+                                        &api_files));
+      TF_CHECK_OK(api_def_map.LoadFileList(env, api_files));
+    }
+    api_def_map.UpdateDocs();
   }
+
   PrintEagerPythonOps(ops, api_def_map, hidden_ops, true /* require_shapes */);
 }
 
@@ -58,8 +56,15 @@ void PrintAllPythonOps(const std::vector<string>& hidden_ops) {
 int main(int argc, char* argv[]) {
   tensorflow::port::InitMain(argv[0], &argc, &argv);
 
+  // Usage:
+  //   python_eager_op_gen_main api_def_dir1,api_def_dir2,...
   if (argc == 1) {
-    tensorflow::PrintAllPythonOps({});
+    tensorflow::PrintAllPythonOps({}, {});
+  } else if (argc == 2) {
+    const std::vector<tensorflow::string> api_def_dirs =
+        tensorflow::str_util::Split(argv[1], ",",
+                                    tensorflow::str_util::SkipEmpty());
+    tensorflow::PrintAllPythonOps({}, api_def_dirs);
   } else {
     return -1;
   }
diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc
index c57f0a9842..72d3ea90fd 100644
--- a/tensorflow/python/framework/python_op_gen.cc
+++ b/tensorflow/python/framework/python_op_gen.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <stdio.h>
 #include <sstream>
 #include <unordered_map>
+#include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb_text.h"
@@ -480,15 +481,15 @@ string GenPythonOp::Code() {
   }
   // This has all the input args followed by those attrs that don't have
   // defaults.
-  std::vector<string> args_no_default;
+  std::vector<ParamNames> params_no_default;
   // The parameters with defaults (these have to be listed after those without).
   // No input args are included, just attrs.
-  std::vector<string> args_with_defaults;
+  std::vector<ParamNames> params_with_default;
 
   for (int i = 0; i < api_def_.arg_order_size(); ++i) {
     const auto& arg = *FindInputArg(api_def_.arg_order(i), op_def_);
     const auto& api_def_arg = *FindInputArg(api_def_.arg_order(i), api_def_);
-    args_no_default.push_back(api_def_arg.rename_to());
+    params_no_default.emplace_back(api_def_arg.name(), api_def_arg.rename_to());
     if (!arg.type_attr().empty()) {
       gtl::InsertIfNotPresent(&inferred_attrs_, arg.type_attr(), arg.name());
     } else if (!arg.type_list_attr().empty()) {
@@ -504,9 +505,9 @@ string GenPythonOp::Code() {
     // Do not add inferred attrs to the Python function signature.
     if (inferred_attrs_.find(attr.name()) == inferred_attrs_.end()) {
       if (attr.has_default_value()) {
-        args_with_defaults.push_back(attr.rename_to());
+        params_with_default.emplace_back(attr.name(), attr.rename_to());
       } else {
-        args_no_default.push_back(attr.rename_to());
+        params_no_default.emplace_back(attr.name(), attr.rename_to());
       }
     }
   }
@@ -515,27 +516,30 @@ string GenPythonOp::Code() {
   // those with defaults go at the end.
   // Get the attrs in the order we want by taking the attrs without defaults
   // from the end of args_no_default, and adding args_no_default.
-  attrs_.reserve(args_no_default.size() - op_def_.input_arg_size() +
-                 args_with_defaults.size());
-  attrs_.insert(attrs_.end(),
-                args_no_default.begin() + op_def_.input_arg_size(),
-                args_no_default.end());
-  attrs_.insert(attrs_.end(), args_with_defaults.begin(),
-                args_with_defaults.end());
-
-  param_names_.reserve(args_no_default.size() + args_with_defaults.size());
+  attrs_.reserve(params_no_default.size() - op_def_.input_arg_size() +
+                 params_with_default.size());
+  for (int i = op_def_.input_arg_size(); i < params_no_default.size(); ++i) {
+    attrs_.push_back(params_no_default[i].GetName());
+  }
+  for (int i = 0; i < params_with_default.size(); ++i) {
+    attrs_.push_back(params_with_default[i].GetName());
+  }
+
+  param_names_.reserve(params_no_default.size() + params_with_default.size());
+  param_names_.insert(param_names_.begin(), params_no_default.begin(),
+                      params_no_default.end());
+  for (const auto& param : params_with_default) {
+    param_names_.push_back(param);
+  }
+
   string parameters;
-  for (const string& name : args_no_default) {
+  for (const auto& param : params_no_default) {
     AddDelimiter(&parameters, ", ");
-    const string param = AvoidPythonReserved(name);
-    strings::StrAppend(&parameters, param);
-    param_names_.push_back(param);
+    strings::StrAppend(&parameters, param.GetRenameTo());
   }
-  for (const string& name : args_with_defaults) {
+  for (const auto& param_and_default : params_with_default) {
     AddDelimiter(&parameters, ", ");
-    const string param = AvoidPythonReserved(name);
-    strings::StrAppend(&parameters, param, "=None");
-    param_names_.push_back(param);
+    strings::StrAppend(&parameters, param_and_default.GetRenameTo(), "=None");
   }
   AddDelimiter(&parameters, ", ");
   strings::StrAppend(&parameters, "name=None");
@@ -557,10 +561,11 @@ string GenPythonOp::Code() {
 }
 
 void GenPythonOp::AddExport() {
-  if (api_def_.visibility() != api_def_.VISIBLE) {
+  if (api_def_.visibility() != ApiDef::VISIBLE) {
     return;
   }
-  strings::StrAppend(&result_, "tf_export(");
+
+  strings::StrAppend(&result_, "@tf_export(");
 
   // Add all endpoint names to tf_export.
   bool first_endpoint = true;
@@ -603,9 +608,9 @@ void GenPythonOp::AddDocStringInputs() {
     StringPiece description = api_def_arg.description();
     string desc;
     if (ConsumeEquals(&description)) {  // Skip the generated type info.
-      desc = strings::StrCat(param_names_[i], ": ");
+      desc = strings::StrCat(param_names_[i].GetRenameTo(), ": ");
     } else {
-      desc = strings::StrCat(param_names_[i], ": ",
+      desc = strings::StrCat(param_names_[i].GetRenameTo(), ": ",
                              ArgTypeName(op_def_, arg, inferred_attrs_, false));
     }
     if (!description.empty()) {
@@ -750,7 +755,8 @@ void GenPythonOp::AddBody(const string& prefix) {
 void GenPythonOp::AddBodyNoReturn(const string& apply_prefix) {
   string args = strings::StrCat("\"", op_def_.name(), "\", ");
   for (size_t i = 0; i < param_names_.size(); ++i) {
-    strings::StrAppend(&args, param_names_[i], "=", param_names_[i], ", ");
+    strings::StrAppend(&args, AvoidPythonReserved(param_names_[i].GetName()),
+                       "=", param_names_[i].GetRenameTo(), ", ");
   }
   strings::StrAppend(&args, "name=name)");
 
diff --git a/tensorflow/python/framework/python_op_gen_internal.h b/tensorflow/python/framework/python_op_gen_internal.h
index c1efbf9be2..6b53825a6d 100644
--- a/tensorflow/python/framework/python_op_gen_internal.h
+++ b/tensorflow/python/framework/python_op_gen_internal.h
@@ -41,6 +41,28 @@ void GenerateLowerCaseOpName(const string& str, string* result);
 
 string DataTypeToPython(DataType dtype, const string& dtype_module);
 
+// Names that corresponds to a single input parameter.
+class ParamNames {
+ public:
+  // Create param based on Arg.
+  ParamNames(const string& name, const string& rename_to) : name_(name) {
+    rename_to_ = AvoidPythonReserved(rename_to);
+  }
+
+  // Get original parameter name.
+  string GetName() const { return name_; }
+
+  // Get the name to rename the parameter to. Note that AvoidPythonReserved
+  // has already been applied.
+  string GetRenameTo() const { return rename_to_; }
+
+ private:
+  // Original parameter name.
+  string name_;
+  // API name for this parameter.
+  string rename_to_;
+};
+
 class GenPythonOp {
  public:
   GenPythonOp(const OpDef& op_def, const ApiDef& api_def,
@@ -84,7 +106,7 @@ class GenPythonOp {
 
   // All parameters, including inputs & non-inferred attrs, required and those
   // with defaults, except "name"
-  std::vector<string> param_names_;
+  std::vector<ParamNames> param_names_;
 };
 
 }  // namespace python_op_gen_internal
diff --git a/tensorflow/python/framework/python_op_gen_main.cc b/tensorflow/python/framework/python_op_gen_main.cc
index 61b1d02a5e..bc5ca195da 100644
--- a/tensorflow/python/framework/python_op_gen_main.cc
+++ b/tensorflow/python/framework/python_op_gen_main.cc
@@ -34,12 +34,6 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-constexpr char kBaseApiDef[] =
-    "tensorflow/core/api_def/base_api/*.pbtxt";
-constexpr char kPythonApiDef[] =
-    "tensorflow/core/api_def/python_api/*.pbtxt";
-constexpr bool kUseApiDef = false;
-
 Status ReadOpListFromFile(const string& filename,
                           std::vector<string>* op_list) {
   std::unique_ptr<RandomAccessFile> file;
@@ -110,22 +104,23 @@ string InferSourceFileName(const char* argv_zero) {
 }
 
 void PrintAllPythonOps(const std::vector<string>& op_list,
+                       const std::vector<string>& api_def_dirs,
                        const string& source_file_name, bool require_shapes,
                        bool op_list_is_whitelist) {
   OpList ops;
   OpRegistry::Global()->Export(false, &ops);
 
   ApiDefMap api_def_map(ops);
-  if (kUseApiDef) {
+  if (!api_def_dirs.empty()) {
     Env* env = Env::Default();
 
-    std::vector<string> base_api_files;
-    std::vector<string> python_api_files;
-    TF_CHECK_OK(env->GetMatchingPaths(kBaseApiDef, &base_api_files));
-    TF_CHECK_OK(env->GetMatchingPaths(kPythonApiDef, &python_api_files));
-
-    TF_CHECK_OK(api_def_map.LoadFileList(env, base_api_files));
-    TF_CHECK_OK(api_def_map.LoadFileList(env, python_api_files));
+    for (const auto& api_def_dir : api_def_dirs) {
+      std::vector<string> api_files;
+      TF_CHECK_OK(env->GetMatchingPaths(io::JoinPath(api_def_dir, "*.pbtxt"),
+                                        &api_files));
+      TF_CHECK_OK(api_def_map.LoadFileList(env, api_files));
+    }
+    api_def_map.UpdateDocs();
   }
 
   if (op_list_is_whitelist) {
@@ -154,23 +149,30 @@ int main(int argc, char* argv[]) {
       tensorflow::InferSourceFileName(argv[0]);
 
   // Usage:
-  //   gen_main [ @FILENAME | OpName[,OpName]* ] (0 | 1) [0 | 1]
-  if (argc == 2) {
-    tensorflow::PrintAllPythonOps({}, source_file_name,
-                                  tensorflow::string(argv[1]) == "1",
-                                  false /* op_list_is_whitelist */);
-  } else if (argc == 3) {
-    std::vector<tensorflow::string> hidden_ops;
-    TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[1], &hidden_ops));
-    tensorflow::PrintAllPythonOps(hidden_ops, source_file_name,
+  //   gen_main api_def_dir1,api_def_dir2,...
+  //       [ @FILENAME | OpName[,OpName]* ] (0 | 1) [0 | 1]
+  if (argc < 3) {
+    return -1;
+  }
+  std::vector<tensorflow::string> api_def_dirs = tensorflow::str_util::Split(
+      argv[1], ",", tensorflow::str_util::SkipEmpty());
+
+  if (argc == 3) {
+    tensorflow::PrintAllPythonOps({}, api_def_dirs, source_file_name,
                                   tensorflow::string(argv[2]) == "1",
                                   false /* op_list_is_whitelist */);
   } else if (argc == 4) {
+    std::vector<tensorflow::string> hidden_ops;
+    TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[2], &hidden_ops));
+    tensorflow::PrintAllPythonOps(hidden_ops, api_def_dirs, source_file_name,
+                                  tensorflow::string(argv[3]) == "1",
+                                  false /* op_list_is_whitelist */);
+  } else if (argc == 5) {
     std::vector<tensorflow::string> op_list;
-    TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[1], &op_list));
-    tensorflow::PrintAllPythonOps(op_list, source_file_name,
-                                  tensorflow::string(argv[2]) == "1",
-                                  tensorflow::string(argv[3]) == "1");
+    TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[2], &op_list));
+    tensorflow::PrintAllPythonOps(op_list, api_def_dirs, source_file_name,
+                                  tensorflow::string(argv[3]) == "1",
+                                  tensorflow::string(argv[4]) == "1");
   } else {
     return -1;
   }
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 23aa74c027..73a19e7042 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1306,7 +1306,7 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
   size_splits = ops.convert_to_tensor(num_or_size_splits)
   if size_splits._rank() == 0 and size_splits.dtype.is_integer:
     return gen_array_ops._split(
-        split_dim=axis, num_split=num_or_size_splits, value=value, name=name)
+        axis=axis, num_split=num_or_size_splits, value=value, name=name)
 
   if num is None:
     num = size_splits._shape_tuple()[0]
@@ -1316,7 +1316,7 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
   return gen_array_ops._split_v(
       value=value,
       size_splits=size_splits,
-      split_dim=axis,
+      axis=axis,
       num_split=num,
       name=name)
 
@@ -2538,9 +2538,9 @@ def where(condition, x=None, y=None, name=None):
     with ops.name_scope(name, "Where", [condition]) as name:
       condition = ops.convert_to_tensor(
           condition, preferred_dtype=dtypes.bool, name="condition")
-      return gen_array_ops.where(input=condition, name=name)
+      return gen_array_ops.where(condition=condition, name=name)
   elif x is not None and y is not None:
-    return gen_math_ops._select(condition=condition, t=x, e=y, name=name)
+    return gen_math_ops._select(condition=condition, x=x, y=y, name=name)
   else:
     raise ValueError("x and y must both be non-None or both be None.")
 
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index d194b37700..0db915f1b9 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -334,6 +334,7 @@ def tf_gen_op_wrapper_cc(name,
           " $$(dirname $$(echo $(locations " + api_def_src +
           ") | cut -d\" \" -f1))")
     api_def_args_str = ",".join(api_def_args)
+
   native.genrule(
       name=name + "_genrule",
       outs=[
@@ -469,7 +470,8 @@ def tf_gen_op_wrapper_py(name,
                          hidden_file=None,
                          generated_target_name=None,
                          op_whitelist=[],
-                         cc_linkopts=[]):
+                         cc_linkopts=[],
+                         api_def_srcs=[]):
   if (hidden or hidden_file) and op_whitelist:
     fail('Cannot pass specify both hidden and op_whitelist.')
 
@@ -502,22 +504,39 @@ def tf_gen_op_wrapper_py(name,
     op_list_arg = "''"
     op_list_is_whitelist = False
 
+  # Prepare ApiDef directories to pass to the genrule.
+  if not api_def_srcs:
+    api_def_args_str = ","
+  else:
+    api_def_args = []
+    for api_def_src in api_def_srcs:
+      # Add directory of the first ApiDef source to args.
+      # We are assuming all ApiDefs in a single api_def_src are in the
+      # same directory.
+      api_def_args.append(
+          "$$(dirname $$(echo $(locations " + api_def_src +
+          ") | cut -d\" \" -f1))")
+    api_def_args_str = ",".join(api_def_args)
+
   if hidden_file:
     # `hidden_file` is file containing a list of op names to be hidden in the
     # generated module.
     native.genrule(
         name=name + "_pygenrule",
         outs=[out],
-        srcs=[hidden_file],
+        srcs=api_def_srcs + [hidden_file],
         tools=[tool_name] + tf_binary_additional_srcs(),
-        cmd=("$(location " + tool_name + ") @$(location " + hidden_file + ") " +
+        cmd=("$(location " + tool_name + ") " + api_def_args_str +
+             " @$(location " + hidden_file + ") " +
              ("1" if require_shape_functions else "0") + " > $@"))
   else:
     native.genrule(
         name=name + "_pygenrule",
         outs=[out],
+        srcs=api_def_srcs,
         tools=[tool_name] + tf_binary_additional_srcs(),
-        cmd=("$(location " + tool_name + ") " + op_list_arg + " " +
+        cmd=("$(location " + tool_name + ") " + api_def_args_str + " " +
+             op_list_arg + " " +
              ("1" if require_shape_functions else "0") + " " +
              ("1" if op_list_is_whitelist else "0") + " > $@"))
 
-- 
GitLab


From 5917d48293a5582d625f015e4862b2d370b75079 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Mon, 4 Dec 2017 12:32:40 -0800
Subject: [PATCH 0592/1225] Apply oss_serial tag to tests that use portpicker
 to create local clusters

to avoid port conflicts with other tests during parallel bazel tests.

PiperOrigin-RevId: 177851615
---
 tensorflow/contrib/data/python/kernel_tests/BUILD | 5 ++++-
 tensorflow/python/profiler/BUILD                  | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 15cc529de4..1e8a6b26c9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -211,7 +211,10 @@ py_test(
     size = "small",
     srcs = ["iterator_ops_cluster_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_windows"],
+    tags = [
+        "no_windows",
+        "oss_serial",
+    ],
     deps = [
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/core:protos_all_py",
diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD
index 519b05975f..c815aad0a0 100644
--- a/tensorflow/python/profiler/BUILD
+++ b/tensorflow/python/profiler/BUILD
@@ -57,7 +57,10 @@ cuda_py_test(
         "//tensorflow/python:platform",
         "//tensorflow/python:variables",
     ],
-    tags = ["no_pip"],
+    tags = [
+        "no_pip",
+        "oss_serial",
+    ],
 )
 
 cuda_py_test(
-- 
GitLab


From 71cd06c608d1cb6fb23f63cf20403b1958965c43 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 4 Dec 2017 12:33:58 -0800
Subject: [PATCH 0593/1225] [TF:XLA] Fix wrong output of FloorDiv op for
 DT_HALF values.

PiperOrigin-RevId: 177851804
---
 tensorflow/compiler/tf2xla/kernels/binary_ops.cc |  3 ++-
 tensorflow/core/framework/types.cc               | 12 ++++++++++++
 tensorflow/core/framework/types.h                |  3 +++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
index 1de9192432..2436a6074a 100644
--- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/computation_builder.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/types.h"
 
 namespace tensorflow {
 namespace {
@@ -75,7 +76,7 @@ static xla::ComputationDataHandle FloorDivImpl(xla::ComputationBuilder* b,
   auto abs_y = b->Abs(y);
   auto t = b->Neg(b->Sub(b->Add(abs_x, abs_y), one));
   auto result = b->Select(different_sign, b->Div(t, abs_y), b->Div(x, y));
-  if (dtype == DT_FLOAT || dtype == DT_DOUBLE) {
+  if (DataTypeIsFloating(dtype)) {
     result = b->Floor(result);
   }
   return result;
diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc
index 48849f9dda..02b2df448a 100644
--- a/tensorflow/core/framework/types.cc
+++ b/tensorflow/core/framework/types.cc
@@ -306,6 +306,18 @@ bool DataTypeCanUseMemcpy(DataType dt) {
   }
 }
 
+bool DataTypeIsFloating(DataType dt) {
+  switch (dt) {
+    case DT_HALF:
+    case DT_BFLOAT16:
+    case DT_FLOAT:
+    case DT_DOUBLE:
+      return true;
+    default:
+      return false;
+  }
+}
+
 bool DataTypeIsQuantized(DataType dt) {
   switch (dt) {
     case DT_QINT8:
diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h
index dc53ed4178..c27a4d4605 100644
--- a/tensorflow/core/framework/types.h
+++ b/tensorflow/core/framework/types.h
@@ -222,6 +222,9 @@ static_assert(IsValidDataType<int32>::value, "Incorrect impl for int32");
 
 bool DataTypeCanUseMemcpy(DataType dt);
 
+// Returns true iff 'dt' is a real, non-quantized floating point type.
+bool DataTypeIsFloating(DataType dt);
+
 bool DataTypeIsQuantized(DataType dt);
 
 // Is the dtype nonquantized integral?
-- 
GitLab


From e96c3643adfc823cff1b59f2164d43ccbbec5edb Mon Sep 17 00:00:00 2001
From: Max Galkin <maxgalkin@google.com>
Date: Mon, 4 Dec 2017 13:06:25 -0800
Subject: [PATCH 0594/1225] Sanitize formatting in IdTableWithHashBuckets doc
 comment. Fixes list formatting and sanitizes words in angle brackets, which
 aren't rendered in the web doc:

https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/lookup/IdTableWithHashBuckets

Follows the working formatting example of TextFileInitializer.

PiperOrigin-RevId: 177856349
---
 tensorflow/python/ops/lookup_ops.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index 8bc0bc7d06..227e1a5265 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -688,19 +688,22 @@ class IdTableWithHashBuckets(LookupInterface):
 
   For example, if an instance of `IdTableWithHashBuckets` is initialized with a
   string-to-id table that maps:
-  - emerson -> 0
-  - lake -> 1
-  - palmer -> 2
+
+  * `emerson -> 0`
+  * `lake -> 1`
+  * `palmer -> 2`
 
   The `IdTableWithHashBuckets` object will performs the following mapping:
-  - emerson -> 0
-  - lake -> 1
-  - palmer -> 2
-  - <other term> -> bucket id between 3 and 3 + num_oov_buckets - 1, calculated
-    by: hash(<term>) % num_oov_buckets + vocab_size
-
-  If input_tensor is ["emerson", "lake", "palmer", "king", "crimson"],
-  the lookup result is [0, 1, 2, 4, 7]
+
+  * `emerson -> 0`
+  * `lake -> 1`
+  * `palmer -> 2`
+  * `<other term> -> bucket_id`, where bucket_id will be between `3` and
+  `3 + num_oov_buckets - 1`, calculated by:
+  `hash(<term>) % num_oov_buckets + vocab_size`
+
+  If input_tensor is `["emerson", "lake", "palmer", "king", "crimson"]`,
+  the lookup result is `[0, 1, 2, 4, 7]`.
 
   If `table` is None, only out-of-vocabulary buckets are used.
 
-- 
GitLab


From 89f4dd0e6700592c8f25c12fc363d3ecc0c3148d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 13:16:05 -0800
Subject: [PATCH 0595/1225] Getting rid of obsolete function
 is_variable_registered from LayerCollection.  Replaced it with a simple
 function that returns a list of all the registered variables.

PiperOrigin-RevId: 177857623
---
 .../kfac/python/ops/layer_collection.py       | 20 +++++++------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py
index 3a005ee39d..275c88eafd 100644
--- a/tensorflow/contrib/kfac/python/ops/layer_collection.py
+++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py
@@ -152,19 +152,13 @@ class LayerCollection(object):
     """LossFunctions registered with this LayerCollection."""
     return list(self._loss_dict.values())
 
-  def is_variable_registered(self, variable):
-    """Checks whether the variable has already been registered.
-
-    Args:
-      variable: A single variable or tensor.
-    Returns:
-      True if the variable has been registered either by itself or as part of a
-      tuple.
-    """
-    return any([
-        variable in key if isinstance(key, (tuple, list)) else variable == key
-        for key in self.fisher_blocks.keys()
-    ])
+  @property
+  def registered_variables(self):
+    """A tuple of all of the variables currently registered."""
+    tuple_of_tuples = (ensure_sequence(key) for key, block
+                       in six.iteritems(self.fisher_blocks))
+    flat_tuple = tuple(item for tuple_ in tuple_of_tuples for item in tuple_)
+    return flat_tuple
 
   @property
   def linked_parameters(self):
-- 
GitLab


From a2df7bd6e13c68700dea82d88b9753fce26f9a05 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 13:20:47 -0800
Subject: [PATCH 0596/1225] Update pin for bazel-toolchains to latest version

https://github.com/bazelbuild/bazel-toolchains/releases/tag/b49ba36

PiperOrigin-RevId: 177858255
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 44c7a514d1..f4abeb014d 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -763,11 +763,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   native.http_archive(
       name = "bazel_toolchains",
       urls = [
-          "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/af4681c3d19f063f090222ec3d04108c4e0ca255.tar.gz",
-          "https://github.com/bazelbuild/bazel-toolchains/archive/af4681c3d19f063f090222ec3d04108c4e0ca255.tar.gz",
+          "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/b49ba3689f46ac50e9277dafd8ff32b26951f82e.tar.gz",
+          "https://github.com/bazelbuild/bazel-toolchains/archive/b49ba3689f46ac50e9277dafd8ff32b26951f82e.tar.gz",
       ],
-      sha256 = "d58bb2d6c8603f600d522b6104d6192a65339aa26cbba9f11ff5c4b36dedb928",
-      strip_prefix = "bazel-toolchains-af4681c3d19f063f090222ec3d04108c4e0ca255",
+      sha256 = "1266f1e27b4363c83222f1a776397c7a069fbfd6aacc9559afa61cdd73e1b429",
+      strip_prefix = "bazel-toolchains-b49ba3689f46ac50e9277dafd8ff32b26951f82e",
   )
 
   native.new_http_archive(
-- 
GitLab


From c14ef60950282552acd6d536f94811de42aa4ea9 Mon Sep 17 00:00:00 2001
From: Vinu Rajashekhar <vinuraja@google.com>
Date: Mon, 4 Dec 2017 13:51:58 -0800
Subject: [PATCH 0597/1225] Marks args as runtime consts in XLA
 EncapsulateSubgraphsPass.

- Using the GuaranteeConstOp.
- Runs a backwards analysis on the args to see if all the paths lead to GuaranteeConstOps/ConstOps.

PiperOrigin-RevId: 177862716
---
 .../jit/encapsulate_subgraphs_pass.cc         |  53 +++++++++
 .../jit/encapsulate_subgraphs_pass_test.cc    | 104 ++++++++++++++++++
 2 files changed, 157 insertions(+)

diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index 22899ebeeb..dc06b7a402 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -32,6 +32,7 @@ limitations under the License.
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/tensor_id.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -48,6 +49,52 @@ const char* const kXlaNumResourceArgsAttr = "_XlaNumResourceArgs";
 
 namespace {
 
+bool AreAllParentsConst(const Node& n,
+                        const gtl::FlatSet<const Node*>& runtime_const_nodes) {
+  if (n.type_string() == "GuaranteeConst" || n.type_string() == "Const") {
+    // If the current node is itself a cast-to-const, no need
+    // to look at the incoming edges.
+    return true;
+  }
+
+  bool all_parents_const = true;
+  bool atleast_one_non_control_edge = false;
+  for (const Edge* in : n.in_edges()) {
+    atleast_one_non_control_edge =
+        atleast_one_non_control_edge || !in->IsControlEdge();
+    if (!in->IsControlEdge() && runtime_const_nodes.count(in->src()) == 0) {
+      all_parents_const = false;
+      break;
+    }
+  }
+  return all_parents_const && atleast_one_non_control_edge;
+}
+
+void MarkGuaranteedConstants(
+    const Graph& graph,
+    const std::vector<std::pair<Node*, Node*>>& src_arg_pairs) {
+  gtl::FlatSet<const Node*> guaranteed_const_nodes;
+  std::vector<Node*> srcs;
+  srcs.reserve(src_arg_pairs.size());
+  for (const auto& src_arg : src_arg_pairs) {
+    srcs.push_back(src_arg.first);
+  }
+  ReverseDFSFrom(graph, srcs, /*enter=*/nullptr,
+                 /*leave=*/[&guaranteed_const_nodes](Node* n) {
+                   // TODO(vinuraja): Doesn't work in the presence of loops.
+                   if (AreAllParentsConst(*n, guaranteed_const_nodes)) {
+                     guaranteed_const_nodes.insert(n);
+                   }
+                 });
+
+  for (auto& src_arg : src_arg_pairs) {
+    if (guaranteed_const_nodes.count(src_arg.first) != 0) {
+      VLOG(1) << "Guaranteed const found: " << src_arg.first->DebugString();
+      src_arg.second->AddAttr("_is_guaranteed_constant", true);
+    }
+  }
+}
+
 // A node/slot pair.
 // TODO(phawkins): is there a common definition of this?
 struct NodeSlot {
@@ -175,9 +222,11 @@ Status Encapsulator::SplitIntoSubgraphs() {
   // Map from input graph nodes to subgraph nodes.
   std::unordered_map<Node*, Node*> node_images;
 
+  std::vector<std::pair<Node*, Node*>> src_arg_pairs;
   // Copy all marked nodes to a subgraph. Do nothing for unmarked nodes.
   for (Node* node : graph_in_->op_nodes()) {
     string func_id = GetFunctionNameAttr(node);
+
     if (func_id.empty()) continue;
 
     Subgraph& subgraph = subgraphs_[func_id];
@@ -276,11 +325,13 @@ Status Encapsulator::SplitIntoSubgraphs() {
                                kArgOp);
         builder.Attr("T", dtype);
         builder.Attr("index", arg_index);
+
         s = builder.Finalize(&arg_def);
         if (!s.ok()) return s;
 
         Node* arg = dst_subgraph.graph->AddNode(arg_def, &s);
         if (!s.ok()) return s;
+        src_arg_pairs.push_back({edge->src(), arg});
 
         dst_subgraph.args.push_back(arg);
       }
@@ -292,6 +343,8 @@ Status Encapsulator::SplitIntoSubgraphs() {
     }
   }
 
+  MarkGuaranteedConstants(*graph_in_, src_arg_pairs);
+
   for (auto& entry : subgraphs_) {
     FixupSourceAndSinkEdges(entry.second.graph.get());
   }
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
index 4a1dbaf05d..717efb3601 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
@@ -398,5 +398,109 @@ TEST(EncapsulateSubgraphsTest, ParallelChecking) {
   EXPECT_EQ(expected_edges, GraphEdges(*graph));
 }
 
+const Node* FindNodeByName(const Graph& graph, const string& name) {
+  for (const Node* node : graph.nodes()) {
+    if (node->name() == name) return node;
+  }
+  return nullptr;
+}
+
+bool HasGuaranteeConstAttr(const Node& n) {
+  bool is_guaranteed_constant = false;
+  if (!GetNodeAttr(n.attrs(), "_is_guaranteed_constant",
+                   &is_guaranteed_constant)
+           .ok()) {
+    return false;
+  }
+  return is_guaranteed_constant;
+}
+
+TEST(EncapsulateSubgraphsWithGuaranteeConstOpTest, Simple) {
+  Scope root = Scope::NewRootScope().ExitOnError().WithDevice(
+      "/job:localhost/replica:0/task:0/cpu:0");
+  auto x1 = ops::Placeholder(root.WithOpName("x1"), DT_FLOAT);
+  auto const_x2 = ops::Const(root.WithOpName("const_x2"), 10.0f);
+  auto const_guarantee_x1 =
+      ops::GuaranteeConst(root.WithOpName("const_guarantee_x1"), x1);
+  auto add1 = ops::Add(root.WithOpName("add1"), const_guarantee_x1, const_x2);
+  add1.node()->AddAttr("_encapsulate", "encapsulate1");
+
+  Graph graph_before(OpRegistry::Global());
+  TF_ASSERT_OK(root.ToGraph(&graph_before));
+
+  std::unique_ptr<Graph> graph_after;
+  FunctionLibraryDefinition library(OpRegistry::Global(), {});
+  int guaranteed_consts = 0;
+  TF_ASSERT_OK(EncapsulateSubgraphsInFunctions(
+      "_encapsulate", graph_before,
+      /*rewrite_subgraph_fn=*/
+      [&guaranteed_consts](std::unique_ptr<Graph>* graph_ptr,
+                           std::vector<int>* input_permutation,
+                           std::vector<int>* output_permutation,
+                           NodeDef* call_def) {
+        Graph* graph = graph_ptr->get();
+        for (const Node* n : graph->nodes()) {
+          if (n->type_string() == "_Arg" &&
+              StringPiece(n->name()).starts_with("const")) {
+            ++guaranteed_consts;
+            EXPECT_TRUE(HasGuaranteeConstAttr(*n));
+          } else {
+            EXPECT_FALSE(HasGuaranteeConstAttr(*n));
+          }
+        }
+        return Status::OK();
+      },
+      /*parallel_checking=*/false,
+      /*reuse_existing_functions=*/false, &graph_after, &library));
+  EXPECT_EQ(2, guaranteed_consts);
+}
+
+TEST(EncapsulateSubgraphsWithGuaranteeConstOpTest, Add) {
+  Scope root = Scope::NewRootScope().ExitOnError().WithDevice(
+      "/job:localhost/replica:0/task:0/cpu:0");
+  auto x1 = ops::Placeholder(root.WithOpName("x1"), DT_FLOAT);
+  auto x2 = ops::Placeholder(root.WithOpName("x2"), DT_FLOAT);
+  auto const_guarantee_x1 =
+      ops::GuaranteeConst(root.WithOpName("const_guarantee_x1"), x1);
+  auto const_guarantee_x2 =
+      ops::GuaranteeConst(root.WithOpName("const_guarantee_x2"), x2);
+  auto const_guarantee_add1 = ops::Add(root.WithOpName("const_guarantee_add1"),
+                                       const_guarantee_x1, const_guarantee_x2);
+  auto add2 = ops::Add(root.WithOpName("add2"), const_guarantee_x1, x2);
+  auto mul1 = ops::Mul(root.WithOpName("mul1"), const_guarantee_add1, add2);
+  mul1.node()->AddAttr("_encapsulate", "encapsulate1");
+
+  Graph graph_before(OpRegistry::Global());
+  TF_ASSERT_OK(root.ToGraph(&graph_before));
+
+  std::unique_ptr<Graph> graph_after;
+  FunctionLibraryDefinition library(OpRegistry::Global(), {});
+  int guaranteed_consts = 0;
+  TF_ASSERT_OK(EncapsulateSubgraphsInFunctions(
+      "_encapsulate", graph_before,
+      /*rewrite_subgraph_fn=*/
+      [&guaranteed_consts](std::unique_ptr<Graph>* graph_ptr,
+                           std::vector<int>* input_permutation,
+                           std::vector<int>* output_permutation,
+                           NodeDef* call_def) {
+        Graph* graph = graph_ptr->get();
+        for (const Node* n : graph->nodes()) {
+          if (n->type_string() == "_Arg" &&
+              StringPiece(n->name()).starts_with("const")) {
+            ++guaranteed_consts;
+            EXPECT_TRUE(HasGuaranteeConstAttr(*n));
+          } else {
+            EXPECT_FALSE(HasGuaranteeConstAttr(*n));
+          }
+        }
+        return Status::OK();
+      },
+      /*parallel_checking=*/false,
+      /*reuse_existing_functions=*/false, &graph_after, &library));
+  // Only 1 runtime const, which is const_guarantee_add1. Add2 has one const
+  // and another non-const, so overall non-const.
+  EXPECT_EQ(1, guaranteed_consts);
+}
+
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From f410adcacc0fe0021abe2fd3958ca0c8597c606e Mon Sep 17 00:00:00 2001
From: Toni Kunic <tkunic@rocketmail.com>
Date: Mon, 4 Dec 2017 17:00:43 -0500
Subject: [PATCH 0598/1225] Fix link to BUILD file in android readme.

---
 tensorflow/examples/android/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md
index 881a975e60..0fb96d60ce 100644
--- a/tensorflow/examples/android/README.md
+++ b/tensorflow/examples/android/README.md
@@ -164,7 +164,7 @@ download-models.gradle.
 
 **Optional**: If you wish to place the models in your assets manually, remove
 all of the `model_files` entries from the `assets` list in `tensorflow_demo`
-found in the `[BUILD](BUILD)` file. Then download and extract the archives
+found in the [`BUILD`](BUILD#L92) file. Then download and extract the archives
 yourself to the `assets` directory in the source tree:
 
 ```bash
-- 
GitLab


From 1b2bd86c5c1d9889ddc8de1eb7f52c4708e91a9e Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Mon, 4 Dec 2017 14:08:41 -0800
Subject: [PATCH 0599/1225] Internal-only changes

PiperOrigin-RevId: 177865604
---
 third_party/repo.bzl | 102 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 third_party/repo.bzl

diff --git a/third_party/repo.bzl b/third_party/repo.bzl
new file mode 100644
index 0000000000..eb91316f67
--- /dev/null
+++ b/third_party/repo.bzl
@@ -0,0 +1,102 @@
+# Copyright 2017 The TensorFlow Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for defining TensorFlow Bazel dependencies."""
+
+_SINGLE_URL_WHITELIST = depset([
+    "arm_compiler",
+    "ortools_archive",
+])
+
+def _is_windows(ctx):
+  return ctx.os.name.lower().find("windows") != -1
+
+def _get_env_var(ctx, name):
+  if name in ctx.os.environ:
+    return ctx.os.environ[name]
+  else:
+    return None
+
+# Executes specified command with arguments and calls 'fail' if it exited with
+# non-zero code
+def _execute_and_check_ret_code(repo_ctx, cmd_and_args):
+  result = repo_ctx.execute(cmd_and_args, timeout=10)
+  if result.return_code != 0:
+    fail(("Non-zero return code({1}) when executing '{0}':\n" + "Stdout: {2}\n"
+          + "Stderr: {3}").format(" ".join(cmd_and_args), result.return_code,
+                                  result.stdout, result.stderr))
+
+def _repos_are_siblings():
+  return Label("@foo//bar").workspace_root.startswith("../")
+
+# Apply a patch_file to the repository root directory
+# Runs 'patch -p1'
+def _apply_patch(ctx, patch_file):
+  # Don't check patch on Windows, because patch is only available under bash.
+  if not _is_windows(ctx) and not ctx.which("patch"):
+    fail("patch command is not found, please install it")
+  cmd = ["patch", "-p1", "-d", ctx.path("."), "-i", ctx.path(patch_file)]
+  if _is_windows(ctx):
+    bazel_sh = _get_env_var(ctx, "BAZEL_SH")
+    if not bazel_sh:
+      fail("BAZEL_SH environment variable is not set")
+    cmd = [bazel_sh, "-c", " ".join(cmd)]
+  _execute_and_check_ret_code(ctx, cmd)
+
+def _apply_delete(ctx, paths):
+  for path in paths:
+    if path.startswith("/"):
+      fail("refusing to rm -rf path starting with '/': " + path)
+    if ".." in path:
+      fail("refusing to rm -rf path containing '..': " + path)
+  _execute_and_check_ret_code(
+      ctx, ["rm", "-rf"] + [ctx.path(path) for path in paths])
+
+def _tf_http_archive(ctx):
+  if ("mirror.bazel.build" not in ctx.attr.urls[0] or
+      (len(ctx.attr.urls) < 2 and
+       ctx.attr.name not in _SINGLE_URL_WHITELIST)):
+    fail("tf_http_archive(urls) must have redundant URLs. The Bazel Mirror " +
+         "URL must come first. Please note mirroring happens after merge")
+  ctx.download_and_extract(
+      ctx.attr.urls,
+      "",
+      ctx.attr.sha256,
+      ctx.attr.type,
+      ctx.attr.strip_prefix)
+  if ctx.attr.delete:
+    _apply_delete(ctx, ctx.attr.delete)
+  if ctx.attr.patch_file != None:
+    _apply_patch(ctx, ctx.attr.patch_file)
+  if ctx.attr.build_file != None:
+    ctx.template("BUILD", ctx.attr.build_file, {
+        "%prefix%": ".." if _repos_are_siblings() else "external",
+    }, False)
+
+tf_http_archive = repository_rule(
+    implementation=_tf_http_archive,
+    attrs={
+        "sha256": attr.string(mandatory=True),
+        "urls": attr.string_list(mandatory=True, allow_empty=False),
+        "strip_prefix": attr.string(),
+        "type": attr.string(),
+        "delete": attr.string_list(),
+        "patch_file": attr.label(),
+        "build_file": attr.label(),
+    })
+"""Downloads and creates Bazel repos for dependencies.
+This is a swappable replacement for both http_archive() and
+new_http_archive() that offers some additional features. It also helps
+ensure best practices are followed.
+"""
-- 
GitLab


From fbec2151a066cf13135f261410bd003faff47400 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 4 Dec 2017 14:25:09 -0800
Subject: [PATCH 0600/1225] [XLA:CPU] Use an AVX optimized reduction step for
 row-major matrix-vector dot

The optimization is to use the vhaddps instruction when possible.

PiperOrigin-RevId: 177868238
---
 .../xla/service/cpu/dot_op_emitter.cc         |  11 +-
 .../service/llvm_ir/vector_support_library.cc | 120 +++++++++++++++++-
 .../service/llvm_ir/vector_support_library.h  |  35 ++++-
 3 files changed, 161 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index 7496dd20c2..7f0bf2c8e4 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -415,10 +415,17 @@ void RowMajorMatrixVectorProductEmitter::EmitOuterLoopBody(llvm::Value* row,
   EmitInnerLoopEpilogue(/*current_tile_row=*/row, /*rows=*/row_count,
                         &scalar_accumulators);
 
+  std::vector<llvm::Value*> accumulator_values;
+  std::transform(
+      vector_accumulators.begin(), vector_accumulators.end(),
+      std::back_inserter(accumulator_values),
+      [](const VectorVariable& vector_var) { return vector_var.Get(); });
+  std::vector<llvm::Value*> horizontal_sums =
+      vsl_.ComputeHorizontalSums(std::move(accumulator_values));
+
   for (int i = 0; i < row_count; i++) {
     llvm::Value* result_value =
-        vsl_.Add(vsl_.AddReduce(vector_accumulators[i].Get()),
-                 scalar_accumulators[i].Get());
+        vsl_.Add(horizontal_sums[i], scalar_accumulators[i].Get());
     llvm::Value* offset = ir_builder_->CreateAdd(ir_builder_->getInt64(i), row);
     vsl_.StoreScalar(result_value, result_, offset);
   }
diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc
index e8c6a83618..59e8296078 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc
@@ -34,6 +34,12 @@ VectorSupportLibrary::VectorSupportLibrary(PrimitiveType primitive_type,
 }
 
 llvm::Value* VectorSupportLibrary::Mul(llvm::Value* lhs, llvm::Value* rhs) {
+  CHECK(lhs->getType() == scalar_type() || lhs->getType() == vector_type());
+  return MulInternal(lhs, rhs);
+}
+
+llvm::Value* VectorSupportLibrary::MulInternal(llvm::Value* lhs,
+                                               llvm::Value* rhs) {
   if (scalar_type_->isFloatingPointTy()) {
     return ir_builder()->CreateFMul(lhs, rhs, name());
   } else {
@@ -42,6 +48,12 @@ llvm::Value* VectorSupportLibrary::Mul(llvm::Value* lhs, llvm::Value* rhs) {
 }
 
 llvm::Value* VectorSupportLibrary::Add(llvm::Value* lhs, llvm::Value* rhs) {
+  CHECK(lhs->getType() == scalar_type() || lhs->getType() == vector_type());
+  return AddInternal(lhs, rhs);
+}
+
+llvm::Value* VectorSupportLibrary::AddInternal(llvm::Value* lhs,
+                                               llvm::Value* rhs) {
   if (scalar_type_->isFloatingPointTy()) {
     return ir_builder()->CreateFAdd(lhs, rhs, name());
   } else {
@@ -129,6 +141,110 @@ llvm::Value* VectorSupportLibrary::AddReduce(llvm::Value* vector) {
                                             name());
 }
 
+llvm::Value* VectorSupportLibrary::AvxStyleHorizontalAdd(llvm::Value* lhs,
+                                                         llvm::Value* rhs) {
+  CHECK_EQ(lhs->getType(), vector_type());
+  CHECK_EQ(rhs->getType(), vector_type());
+  CHECK_EQ(vector_size() % 2, 0);
+
+  llvm::SmallVector<llvm::Constant*, 32> mask_a, mask_b;
+
+  // Adding the values shuffled using mask_a and mask_b gives us the
+  // AVX-style horizontal add we want.  The masks work as documented
+  // in https://llvm.org/docs/LangRef.html#shufflevector-instruction
+  //
+  // Here are the masks for vector_width() == 8:
+  //
+  //    index: |0 |1 |2 | 3 |4 |5 | 6 | 7
+  //   --------+--+--+--+---+--+--+---+---
+  //   mask_a: |0 |2 |8 |10 |4 |6 |12 |14
+  //   mask_b: |1 |3 |9 |11 |5 |7 |13 |16
+  //
+  // So, as an example, the value at lane 3 of the result vector is
+  // the result of adding lane 10 and lane 11 in the combined lhs++rhs
+  // vector, which are the lanes 2 and 3 in the rhs vector.
+  for (int i = 0; i < vector_size(); i += 2) {
+    int increment = i < vector_size() / 2 ? 0 : (vector_size() / 2);
+    mask_a.push_back(ir_builder()->getInt32(increment + i));
+    mask_b.push_back(ir_builder()->getInt32(increment + i + 1));
+  }
+  for (int i = 0; i < vector_size(); i += 2) {
+    int increment = i < vector_size() / 2 ? (vector_size() / 2) : vector_size();
+    mask_a.push_back(ir_builder()->getInt32(increment + i));
+    mask_b.push_back(ir_builder()->getInt32(increment + i + 1));
+  }
+
+  llvm::Value* shuffle_0 = ir_builder()->CreateShuffleVector(
+      lhs, rhs, llvm::ConstantVector::get(mask_a));
+  llvm::Value* shuffle_1 = ir_builder()->CreateShuffleVector(
+      lhs, rhs, llvm::ConstantVector::get(mask_b));
+
+  return Add(shuffle_0, shuffle_1);
+}
+
+llvm::Value* VectorSupportLibrary::ExtractLowHalf(llvm::Value* vector) {
+  llvm::SmallVector<llvm::Constant*, 32> mask;
+  for (int i = 0; i < vector_size() / 2; i++) {
+    mask.push_back(ir_builder()->getInt32(i));
+  }
+
+  return ir_builder()->CreateShuffleVector(vector,
+                                           llvm::UndefValue::get(vector_type()),
+                                           llvm::ConstantVector::get(mask));
+}
+
+llvm::Value* VectorSupportLibrary::ExtractHighHalf(llvm::Value* vector) {
+  llvm::SmallVector<llvm::Constant*, 32> mask;
+  for (int i = 0; i < vector_size() / 2; i++) {
+    mask.push_back(ir_builder()->getInt32(i + vector_size() / 2));
+  }
+
+  return ir_builder()->CreateShuffleVector(vector,
+                                           llvm::UndefValue::get(vector_type()),
+                                           llvm::ConstantVector::get(mask));
+}
+
+std::vector<llvm::Value*> VectorSupportLibrary::ComputeHorizontalSums(
+    std::vector<llvm::Value*> vectors) {
+  // TODO(sanjoy): Move this magic constant to TargetMachineFeatures.
+  const int kAvxVectorWidth = 8;
+  if (vector_size() == kAvxVectorWidth && vectors.size() == kAvxVectorWidth) {
+    return ComputeAvxOptimizedHorizontalSums(std::move(vectors));
+  }
+
+  std::vector<llvm::Value*> result;
+  std::transform(vectors.begin(), vectors.end(), std::back_inserter(result),
+                 [this](llvm::Value* vector) { return AddReduce(vector); });
+  return result;
+}
+
+std::vector<llvm::Value*>
+VectorSupportLibrary::ComputeAvxOptimizedHorizontalSums(
+    std::vector<llvm::Value*> vectors) {
+  while (vectors.size() != 2) {
+    std::vector<llvm::Value*> new_vectors;
+    for (int i = 0; i < vectors.size(); i += 2) {
+      new_vectors.push_back(AvxStyleHorizontalAdd(vectors[i], vectors[i + 1]));
+    }
+
+    vectors = std::move(new_vectors);
+  }
+
+  llvm::Value* low =
+      AddInternal(ExtractLowHalf(vectors[0]), ExtractHighHalf(vectors[0]));
+  llvm::Value* high =
+      AddInternal(ExtractLowHalf(vectors[1]), ExtractHighHalf(vectors[1]));
+
+  std::vector<llvm::Value*> results;
+  for (int i = 0; i < 8; i++) {
+    llvm::Value* scalar_result = ir_builder()->CreateExtractElement(
+        i < 4 ? low : high, ir_builder()->getInt32(i % 4), name());
+    results.push_back(scalar_result);
+  }
+
+  return results;
+}
+
 llvm::Value* VectorSupportLibrary::GetZeroVector() {
   return llvm::Constant::getNullValue(vector_type());
 }
@@ -142,7 +258,9 @@ LlvmVariable::LlvmVariable(llvm::Type* type, llvm::IRBuilder<>* ir_builder)
   alloca_ = llvm_ir::EmitAllocaAtFunctionEntry(type, "", ir_builder_);
 }
 
-llvm::Value* LlvmVariable::Get() { return ir_builder_->CreateLoad(alloca_); }
+llvm::Value* LlvmVariable::Get() const {
+  return ir_builder_->CreateLoad(alloca_);
+}
 
 void LlvmVariable::Set(llvm::Value* new_value) {
   ir_builder_->CreateStore(new_value, alloca_);
diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h
index 3072677ab0..f4c7a6a420 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h
@@ -111,7 +111,11 @@ class VectorSupportLibrary {
     return LoadBroadcast(base_pointer, ir_builder()->getInt64(offset_elements));
   }
 
-  llvm::Value* AddReduce(llvm::Value* vector);
+  // Compute the horizontal sum of each vector in `vectors`.  The i'th element
+  // in the result vector is the (scalar) horizontal sum of the i'th vector in
+  // `vectors`.
+  std::vector<llvm::Value*> ComputeHorizontalSums(
+      std::vector<llvm::Value*> vectors);
 
   llvm::Value* GetZeroVector();
   llvm::Value* GetZeroScalar();
@@ -126,6 +130,33 @@ class VectorSupportLibrary {
   const std::string& name() const { return name_; }
 
  private:
+  llvm::Value* ExtractLowHalf(llvm::Value*);
+  llvm::Value* ExtractHighHalf(llvm::Value*);
+
+  llvm::Value* MulInternal(llvm::Value* lhs, llvm::Value* rhs);
+  llvm::Value* AddInternal(llvm::Value* lhs, llvm::Value* rhs);
+
+  llvm::Value* AddReduce(llvm::Value* vector);
+
+  // Perform an X86 AVX style horizontal add between `lhs` and `rhs`.  The
+  // resulting IR for an 8-float wide vector is expected to lower to a single
+  // vhaddps instruction on a CPU that supports vhaddps, and not be too bad in
+  // other cases.
+  //
+  // For a vector width of 8, the result vector is computed as:
+  //   Result[0] = Lhs[0] + Lhs[1]
+  //   Result[1] = Lhs[2] + Lhs[3]
+  //   Result[2] = Rhs[0] + Rhs[1]
+  //   Result[3] = Rhs[2] + Rhs[3]
+  //   Result[4] = Lhs[4] + Lhs[5]
+  //   Result[5] = Lhs[6] + Lhs[7]
+  //   Result[6] = Rhs[4] + Rhs[5]
+  //   Result[7] = Rhs[6] + Rhs[7]
+  llvm::Value* AvxStyleHorizontalAdd(llvm::Value* lhs, llvm::Value* rhs);
+
+  std::vector<llvm::Value*> ComputeAvxOptimizedHorizontalSums(
+      std::vector<llvm::Value*> vectors);
+
   int64 vector_size_;
   PrimitiveType primitive_type_;
   llvm::IRBuilder<>* ir_builder_;
@@ -142,7 +173,7 @@ class LlvmVariable {
  public:
   LlvmVariable(llvm::Type*, llvm::IRBuilder<>* ir_builder);
 
-  llvm::Value* Get();
+  llvm::Value* Get() const;
   void Set(llvm::Value* new_value);
 
  private:
-- 
GitLab


From c24b377eb8d72975d1e165a9bccd01723a2dee73 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 4 Dec 2017 14:26:58 -0800
Subject: [PATCH 0601/1225] [XLA:CPU] Avoid over-aligning parameter buffers

We sometimes pass scalars to non-entry computations, and since these are
pointers pointing to elements in a buffer and are not individually allocated
buffers, they don't have to follow the same alignment rules as buffers, even
though they incidentally do so today.

PiperOrigin-RevId: 177868506
---
 tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index bb75d3f49e..939dbf0e11 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -243,11 +243,12 @@ int IrEmitter::MinimumAlignmentForBufferSize(int64 buffer_size) {
 
 // Calculate the alignment of a buffer allocated for a given primitive type.
 int IrEmitter::MinimumAlignmentForPrimitiveType(PrimitiveType primitive_type) {
-  int64 buffer_size = ShapeUtil::ByteSizeOfPrimitiveType(primitive_type);
-  DCHECK_GE(buffer_size, 0);
-  DCHECK_LE(buffer_size, SIZE_MAX);
-
-  return MinimumAlignmentForBufferSize(buffer_size);
+  int64 byte_size = ShapeUtil::ByteSizeOfPrimitiveType(primitive_type);
+  DCHECK_GE(byte_size, 0);
+  // Largest scalar is a complex64 so we don't need to worry about the
+  // int64->int truncation here.
+  DCHECK_LE(byte_size, 8);
+  return byte_size;
 }
 
 int64 IrEmitter::ByteSizeOf(const Shape& shape) const {
@@ -256,6 +257,10 @@ int64 IrEmitter::ByteSizeOf(const Shape& shape) const {
 
 // Calculate the alignment of a buffer allocated for a given shape.
 int IrEmitter::MinimumAlignmentForShape(const Shape& shape) {
+  if (ShapeUtil::IsScalar(shape)) {
+    return MinimumAlignmentForPrimitiveType(shape.element_type());
+  }
+
   int64 buffer_size = ByteSizeOf(shape);
   DCHECK_GE(buffer_size, 0);
   DCHECK_LE(buffer_size, SIZE_MAX);
-- 
GitLab


From 2427923aeb15d63c0df296081b7400ddb8a308ee Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 4 Dec 2017 14:33:47 -0800
Subject: [PATCH 0602/1225] Internal change.

PiperOrigin-RevId: 177869591
---
 tensorflow/tools/api/generator/BUILD | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/tools/api/generator/BUILD b/tensorflow/tools/api/generator/BUILD
index 064668a865..fa0f9b59aa 100644
--- a/tensorflow/tools/api/generator/BUILD
+++ b/tensorflow/tools/api/generator/BUILD
@@ -43,8 +43,13 @@ genrule(
     # api/module1/module2/__init__.py and api/module3/__init__.py.
     outs = [
         "api/__init__.py",
+        "api/bitwise/__init__.py",
         "api/contrib/__init__.py",
         "api/contrib/stat_summarizer/__init__.py",
+        "api/image/__init__.py",
+        "api/linalg/__init__.py",
+        "api/nn/__init__.py",
+        "api/spectral/__init__.py",
         "api/train/__init__.py",
     ],
     cmd = "$(location create_python_api) $(OUTS)",
-- 
GitLab


From 6f09a74e31e8953e8ebf870e53e1fdb8ce073fff Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 14:39:45 -0800
Subject: [PATCH 0603/1225] Fix TFGAN's `clip_weights_test.py` bugs.

PiperOrigin-RevId: 177870577
---
 .../python/features/python/clip_weights_test.py   | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/gan/python/features/python/clip_weights_test.py b/tensorflow/contrib/gan/python/features/python/clip_weights_test.py
index 030e37ec67..2b7bb5f14e 100644
--- a/tensorflow/contrib/gan/python/features/python/clip_weights_test.py
+++ b/tensorflow/contrib/gan/python/features/python/clip_weights_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for tfgan.python.features.clip_weights."""
+"""Tests for features.clip_weights."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -31,17 +31,18 @@ class ClipWeightsTest(test.TestCase):
   """Tests for `discriminator_weight_clip`."""
 
   def setUp(self):
+    super(ClipWeightsTest, self).setUp()
     self.variables = [variables.Variable(2.0)]
     self.tuple = collections.namedtuple(
         'VarTuple', ['discriminator_variables'])(self.variables)
 
   def _test_weight_clipping_helper(self, use_tuple):
-    loss = self.variables[0] * 2.0
+    loss = self.variables[0]
     opt = training.GradientDescentOptimizer(1.0)
     if use_tuple:
-      opt_clip = clip_weights.weight_clip(opt, self.variables, 0.1)
+      opt_clip = clip_weights.clip_variables(opt, self.variables, 0.1)
     else:
-      opt_clip = clip_weights.discriminator_weight_clip(opt, self.tuple, 0.1)
+      opt_clip = clip_weights.clip_discriminator_weights(opt, self.tuple, 0.1)
 
     train_op1 = opt.minimize(loss, var_list=self.variables)
     train_op2 = opt_clip.minimize(loss, var_list=self.variables)
@@ -72,10 +73,14 @@ class ClipWeightsTest(test.TestCase):
         clip_weights.clip_discriminator_weights(opt, self.tuple, weight_clip=-1)
     else:
       with self.assertRaisesRegexp(ValueError, 'must be positive'):
-        clip_weights.clip_weights(opt, self.variables, weight_clip=-1)
+        clip_weights.clip_variables(opt, self.variables, weight_clip=-1)
 
   def test_incorrect_weight_clip_value_argsonly(self):
     self._test_incorrect_weight_clip_value_helper(False)
 
   def test_incorrect_weight_clip_value_tuple(self):
     self._test_incorrect_weight_clip_value_helper(True)
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From 7b532459b569cb8df6fb204b533e7d7f57a2668f Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 4 Dec 2017 14:44:50 -0800
Subject: [PATCH 0604/1225] Sort sections in operation semantics
 alphabetically.

PiperOrigin-RevId: 177871286
---
 .../performance/xla/operation_semantics.md    | 874 +++++++++---------
 1 file changed, 436 insertions(+), 438 deletions(-)

diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
index a49973d550..cd2a0cfbc6 100644
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -13,6 +13,156 @@ arbitrary-dimensional array. For convenience, special cases have more specific
 and familiar names; for example a *vector* is a 1-dimensional array and a
 *matrix* is a 2-dimensional array.
 
+## BatchNormGrad
+
+See also
+[`ComputationBuilder::BatchNormGrad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+<b> Warning: Not implemented yet. </b>
+
+Calculates gradients of batch norm.
+
+<b> `BatchNormGrad(operand, scale, mean, variance, grad_output, epsilon, feature_index)` </b>
+
+| Arguments       | Type                    | Semantics                        |
+| --------------  | ----------------------- | -------------------------------- |
+| `operand`       | `ComputationDataHandle` | n dimensional array to be        |
+:                 :                         : normalized (x)                   :
+| `scale`         | `ComputationDataHandle` | 1 dimensional array              |
+:                 :                         : (\\(\gamma\\))                   :
+| `mean`          | `ComputationDataHandle` | 1 dimensional array (\\(\mu\\))  |
+| `variance`      | `ComputationDataHandle` | 1 dimensional array              |
+:                 :                         : (\\(\sigma^2\\))                 :
+| `grad_output`   | `ComputationDataHandle` | Gradients passed to              |
+:                 :                         : `BatchNormTraining`              :
+:                 :                         : (\\( \nabla y\\))                :
+| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
+| `feature_index` | `int64`                 | Index to feature dimension in    |
+:                 :                         : `operand`                        :
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the gradients with
+respect to `operand`, `offset` and `scale` across all the other dimensions. The
+`feature_index` must be a valid index for the feature dimension in `operand`.
+
+The three gradients are defined by the following formulas:
+
+\\( \nabla x = \nabla y * \gamma * \sqrt{\sigma^2+\epsilon} \\)
+
+\\( \nabla \gamma = sum(\nabla y * (x - \mu) * \sqrt{\sigma^2 + \epsilon}) \\)
+
+\\( \nabla \beta = sum(\nabla y) \\)
+
+The inputs `mean` and `variance` represents moments value
+across batch and spatial dimensions.
+
+The output type is a tuple of three ComputationDataHandles:
+
+|Outputs       | Type                    | Semantics                           |
+|------------- | ----------------------- | ------------------------------------|
+|`grad_operand`| `ComputationDataHandle` | gradient with respect to input      |
+:              :                         : `operand`                           :
+|`grad_offset` | `ComputationDataHandle` | gradient with respect to input      |
+:              :                         : `offset`                            :
+|`grad_scale`  | `ComputationDataHandle` | gradient with respect to input      |
+:              :                         : `scale`                             :
+
+## BatchNormInference
+
+See also
+[`ComputationBuilder::BatchNormInference`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+<b> Warning: Not implemented yet. </b>
+
+Normalizes an array across batch and spatial dimensions.
+
+<b> `BatchNormInference(operand, scale, offset, mean, variance, epsilon, feature_index)` </b>
+
+| Arguments       | Type                    | Semantics                       |
+| --------------  | ----------------------- | ------------------------------- |
+| `operand`       | `ComputationDataHandle` | n dimensional array to be       |
+:                 :                         : normalized                      :
+| `scale`         | `ComputationDataHandle` | 1 dimensional array             |
+| `offset`        | `ComputationDataHandle` | 1 dimensional array             |
+| `mean`          | `ComputationDataHandle` | 1 dimensional array             |
+| `variance`      | `ComputationDataHandle` | 1 dimensional array             |
+| `epsilon`       | `float`                 | Epsilon value                   |
+| `feature_index` | `int64`                 | Index to feature dimension in   |
+:                 :                         : `operand`                       :
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the mean and variance
+across all the other dimensions and use the mean and variance to normalize each
+element in `operand`. The `feature_index` must be a valid index for the feature
+dimension in `operand`.
+
+`BatchNormInference`  is equivalent to calling `BatchNormTraining` without
+computing `mean` and `variance` for each batch. It uses the input `mean` and
+`variance` instead as estimated values. The purpose of this op is to reduce
+latency in inference, hence the name `BatchNormInference`.
+
+The output is a n dimensional, normalized array with the same shape as input
+`operand`.
+
+## BatchNormTraining
+
+See also
+[`ComputationBuilder::BatchNormTraining`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h) and
+[`the original batch normalization paper`](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
+
+<b> Warning: Not implemented on GPU backend yet. </b>
+
+Normalizes an array across batch and spatial dimensions.
+
+<b> `BatchNormTraining(operand, scale, offset, epsilon, feature_index)` </b>
+
+| Arguments       | Type                    | Semantics                        |
+| --------------- | ----------------------- | -------------------------------- |
+| `operand`       | `ComputationDataHandle` | n dimensional array to be        |
+:                 :                         : normalized                       :
+| `scale`         | `ComputationDataHandle` | 1 dimensional array              |
+:                 :                         : (\\(\gamma\\))                   :
+| `offset`        | `ComputationDataHandle` | 1 dimensional array              |
+:                 :                         : (\\(\beta\\ )                    :
+| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
+| `feature_index` | `int64`                 | Index to feature dimension       |
+:                 :                         : in `operand`                     :
+
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the mean and variance
+across all the other dimensions and use the mean and variance to normalize each
+element in `operand`. The `feature_index` must be a valid index for the feature
+dimension in `operand`.
+
+The algorithm goes as follows for each batch in `operand` \\(x\\) that
+contains `m` elements with `w` and `h` as the size of spatial dimensions (
+assuming `operand` is an 4 dimensional array):
+
+- Calculates batch mean \\(\mu_l\\) for each feature `l` in feature dimension:
+\\(\mu_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h x_{ijkl}\\)
+
+- Calculates batch variance \\(\sigma^2_l\\):
+\\(\sigma^2_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (x_{ijkl} - \mu_l)^2\\)
+
+- Normalizes, scales and shifts:
+\\(y_{ijkl}=\frac{\gamma_l(x_{ijkl}-\mu_l)}{\sqrt[2]{\sigma^2_l+\epsilon}}+\beta_l\\)
+
+The epsilon value, usually a small number, is added to avoid divide-by-zero errors.
+
+The output type is a tuple of three ComputationDataHandles:
+
+| Outputs      | Type                    | Semantics                            |
+| ------------ | ----------------------- | -------------------------------------|
+| `output`     | `ComputationDataHandle` | n dimensional array with the same    |
+:              :                         : shape as input `operand` (y)         :
+| `batch_mean` | `ComputationDataHandle` | 1 dimensional array (\\(\mu\\))      |
+| `batch_var`  | `ComputationDataHandle` | 1 dimensional array (\\(\sigma^2\\)) |
+
+The `batch_mean` and `batch_var` are moments calculated across the batch and
+spatial dimensions using the formulas above.
+
 ## BitcastConvertType
 
 See also
@@ -239,40 +389,6 @@ Diagram:
   <img style="width:100%" src="https://www.tensorflow.org/images/ops_concatenate.png">
 </div>
 
-## ConvertElementType
-
-See also
-[`ComputationBuilder::ConvertElementType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-Similar to an element-wise `static_cast` in C++, performs an element-wise
-conversion operation from a data shape to a target shape. The dimensions must
-match, and the conversion is an element-wise one; e.g. `s32` elements become
-`f32` elements via an `s32`-to-`f32` conversion routine.
-
-<b> `ConvertElementType(operand, new_element_type)` </b>
-
-Arguments          | Type                    | Semantics
------------------- | ----------------------- | ---------------------------
-`operand`          | `ComputationDataHandle` | array of type T with dims D
-`new_element_type` | `PrimitiveType`         | type U
-
-The dimensions of the operand and the target shape must match. The source and
-destination element types must not be tuples.
-
-A conversion such as `T=s32` to `U=f32` will perform a normalizing int-to-float
-conversion routine such as round-to-nearest-even.
-
-> Note: The precise float-to-int and visa-versa conversions are currently
-> unspecified, but may become additional arguments to the convert operation in
-> the future.  Not all possible conversions have been implemented for all
->targets.
-
-```
-let a: s32[3] = {0, 1, 2};
-let b: f32[3] = convert(a, f32);
-then b == f32[3]{0.0, 1.0, 2.0}
-```
-
 ## Conv (convolution)
 
 See also
@@ -395,6 +511,40 @@ for (b, oz, oy, ox) {  // output coordinates
 }
 ```
 
+## ConvertElementType
+
+See also
+[`ComputationBuilder::ConvertElementType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+Similar to an element-wise `static_cast` in C++, performs an element-wise
+conversion operation from a data shape to a target shape. The dimensions must
+match, and the conversion is an element-wise one; e.g. `s32` elements become
+`f32` elements via an `s32`-to-`f32` conversion routine.
+
+<b> `ConvertElementType(operand, new_element_type)` </b>
+
+Arguments          | Type                    | Semantics
+------------------ | ----------------------- | ---------------------------
+`operand`          | `ComputationDataHandle` | array of type T with dims D
+`new_element_type` | `PrimitiveType`         | type U
+
+The dimensions of the operand and the target shape must match. The source and
+destination element types must not be tuples.
+
+A conversion such as `T=s32` to `U=f32` will perform a normalizing int-to-float
+conversion routine such as round-to-nearest-even.
+
+> Note: The precise float-to-int and visa-versa conversions are currently
+> unspecified, but may become additional arguments to the convert operation in
+> the future.  Not all possible conversions have been implemented for all
+>targets.
+
+```
+let a: s32[3] = {0, 1, 2};
+let b: f32[3] = convert(a, f32);
+then b == f32[3]{0.0, 1.0, 2.0}
+```
+
 ## CrossReplicaSum
 
 See also
@@ -592,22 +742,148 @@ DotGeneral(lhs, rhs, dnums) -> { { {1.0, 2.0},
 | [b0, m, k] `dot` [b0, k, n]         | [b0, m, n]        |  batch matmul    |
 | [b0, b1, m, k] `dot` [b0, b1, k, n] | [b0, b1, m, n]    |  batch matmul    |
 
-## Element-wise binary arithmetic operations
+## DynamicSlice
 
 See also
-[`ComputationBuilder::Add`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+[`ComputationBuilder::DynamicSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
-A set of element-wise binary arithmetic operations is supported.
+DynamicSlice extracts a sub-array from the input array at dynamic
+`start_indices`. The size of the slice in each dimension is passed in
+`size_indices`, which specify the end point of exclusive slice intervals in each
+dimension: [start, start + size). The shape of `start_indices` must be rank ==
+1, with dimension size equal to the rank of `operand`.
+Note: handling of out-of-bounds slice indices (generated by incorrect runtime
+calculation of 'start_indices') is currently implementation-defined. Currently,
+slice indices are computed modulo input dimension sizes to prevent out-of-bound
+array accesses, but this behavior may change in future implementations.
 
-<b> `Op(lhs, rhs)` </b>
+<b> `DynamicSlice(operand, start_indices, size_indices)` </b>
 
-Where `Op` is one of `Add` (addition), `Sub` (subtraction), `Mul`
-(multiplication), `Div` (division), `Rem` (remainder), `Max` (maximum), `Min`
-(minimum), `LogicalAnd` (logical AND), or `LogicalOr` (logical OR).
+| Arguments       | Type                    | Semantics                        |
+| --------------- | ----------------------- | -------------------------------- |
+| `operand`       | `ComputationDataHandle` | N dimensional array of type T    |
+| `start_indices` | `ComputationDataHandle` | Rank 1 array of N integers       |
+:                 :                         : containing the starting indices  :
+:                 :                         : of the slice for each dimension. :
+:                 :                         : Value must be greater than or    :
+:                 :                         : equal to zero.                   :
+| `size_indices`  | `ArraySlice<int64>`     | List of N integers containing    |
+:                 :                         : the slice size for each          :
+:                 :                         : dimension. Each value must be    :
+:                 :                         : strictly greater than zero, and  :
+:                 :                         : start + size must be less than   :
+:                 :                         : or equal to the size of the      :
+:                 :                         : dimension to avoid wrapping      :
+:                 :                         : modulo dimension size.           :
 
-Arguments | Type                    | Semantics
---------- | ----------------------- | ----------------------------------------
-`lhs`     | `ComputationDataHandle` | left-hand-side operand: array of type T
+1-dimensional example:
+
+```
+let a = {0.0, 1.0, 2.0, 3.0, 4.0}
+let s = {2}
+
+DynamicSlice(a, s, {2}) produces:
+  {2.0, 3.0}
+```
+
+2-dimensional example:
+
+```
+let b =
+ { {0.0,  1.0,  2.0},
+   {3.0,  4.0,  5.0},
+   {6.0,  7.0,  8.0},
+   {9.0, 10.0, 11.0} }
+let s = {2, 1}
+
+DynamicSlice(b, s, {2, 2}) produces:
+  { { 7.0,  8.0},
+    {10.0, 11.0} }
+```
+## DynamicUpdateSlice
+
+See also
+[`ComputationBuilder::DynamicUpdateSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+DynamicUpdateSlice generates a result which is the value of the input array
+`operand`, with a slice `update` overwritten at `start_indices`.
+The shape of `update` determines the shape of the sub-array of the result which
+is updated.
+The shape of `start_indices` must be rank == 1, with dimension size equal to
+the rank of `operand`.
+Note: handling of out-of-bounds slice indices (generated by incorrect runtime
+calculation of 'start_indices') is currently implementation-defined. Currently,
+slice indices are computed modulo update dimension sizes to prevent out-of-bound
+array accesses, but this behavior may change in future implementations.
+
+<b> `DynamicUpdateSlice(operand, update, start_indices)` </b>
+
+| Arguments       | Type                    | Semantics                        |
+| --------------- | ----------------------- | -------------------------------- |
+| `operand`       | `ComputationDataHandle` | N dimensional array of type T    |
+| `update`        | `ComputationDataHandle` | N dimensional array of type T    |
+:                 :                         : containing the slice update.     :
+:                 :                         : Each dimension of update shape    :
+:                 :                         : must be strictly greater than    :
+:                 :                         : zero, and start + update must be :
+:                 :                         : less than operand size for each  :
+:                 :                         : dimension to avoid generating    :
+:                 :                         : out-of-bounds update indices.    :
+| `start_indices` | `ComputationDataHandle` | Rank 1 array of N integers       |
+:                 :                         : containing the starting indices  :
+:                 :                         : of the slice for each dimension. :
+:                 :                         : Value must be greater than or    :
+:                 :                         : equal to zero.                   :
+
+1-dimensional example:
+
+```
+let a = {0.0, 1.0, 2.0, 3.0, 4.0}
+let u = {5.0, 6.0}
+let s = {2}
+
+DynamicUpdateSlice(a, u, s) produces:
+  {0.0, 1.0, 5.0, 6.0, 4.0}
+```
+
+2-dimensional example:
+
+```
+let b =
+ { {0.0,  1.0,  2.0},
+   {3.0,  4.0,  5.0},
+   {6.0,  7.0,  8.0},
+   {9.0, 10.0, 11.0} }
+let u =
+ { {12.0,  13.0},
+   {14.0,  15.0},
+   {16.0,  17.0} }
+
+let s = {1, 1}
+
+DynamicUpdateSlice(b, u, s) produces:
+ { {0.0,  1.0,  2.0},
+   {3.0, 12.0, 13.0},
+   {6.0, 14.0, 15.0},
+   {9.0, 16.0, 17.0} }
+```
+
+## Element-wise binary arithmetic operations
+
+See also
+[`ComputationBuilder::Add`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+A set of element-wise binary arithmetic operations is supported.
+
+<b> `Op(lhs, rhs)` </b>
+
+Where `Op` is one of `Add` (addition), `Sub` (subtraction), `Mul`
+(multiplication), `Div` (division), `Rem` (remainder), `Max` (maximum), `Min`
+(minimum), `LogicalAnd` (logical AND), or `LogicalOr` (logical OR).
+
+Arguments | Type                    | Semantics
+--------- | ----------------------- | ----------------------------------------
+`lhs`     | `ComputationDataHandle` | left-hand-side operand: array of type T
 `rhs`     | `ComputationDataHandle` | right-hand-side operand: array of type T
 
 The arguments' shapes have to be either similar or compatible. See the
@@ -719,157 +995,6 @@ The function is applied to each element in the `operand` array, resulting in an
 array with the same shape. It is allowed for `operand` to be a scalar (rank 0).
 
 
-## BatchNormTraining
-
-See also
-[`ComputationBuilder::BatchNormTraining`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h) and
-[`the original batch normalization paper`](https://arxiv.org/abs/1502.03167)
-for a detailed description of the algorithm.
-
-<b> Warning: Not implemented on GPU backend yet. </b>
-
-Normalizes an array across batch and spatial dimensions.
-
-<b> `BatchNormTraining(operand, scale, offset, epsilon, feature_index)` </b>
-
-| Arguments       | Type                    | Semantics                        |
-| --------------- | ----------------------- | -------------------------------- |
-| `operand`       | `ComputationDataHandle` | n dimensional array to be        |
-:                 :                         : normalized                       :
-| `scale`         | `ComputationDataHandle` | 1 dimensional array              |
-:                 :                         : (\\(\gamma\\))                   :
-| `offset`        | `ComputationDataHandle` | 1 dimensional array              |
-:                 :                         : (\\(\beta\\ )                    :
-| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
-| `feature_index` | `int64`                 | Index to feature dimension       |
-:                 :                         : in `operand`                     :
-
-
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the mean and variance
-across all the other dimensions and use the mean and variance to normalize each
-element in `operand`. The `feature_index` must be a valid index for the feature
-dimension in `operand`.
-
-The algorithm goes as follows for each batch in `operand` \\(x\\) that
-contains `m` elements with `w` and `h` as the size of spatial dimensions (
-assuming `operand` is an 4 dimensional array):
-
-- Calculates batch mean \\(\mu_l\\) for each feature `l` in feature dimension:
-\\(\mu_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h x_{ijkl}\\)
-
-- Calculates batch variance \\(\sigma^2_l\\):
-\\(\sigma^2_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (x_{ijkl} - \mu_l)^2\\)
-
-- Normalizes, scales and shifts:
-\\(y_{ijkl}=\frac{\gamma_l(x_{ijkl}-\mu_l)}{\sqrt[2]{\sigma^2_l+\epsilon}}+\beta_l\\)
-
-The epsilon value, usually a small number, is added to avoid divide-by-zero errors.
-
-The output type is a tuple of three ComputationDataHandles:
-
-| Outputs      | Type                    | Semantics                            |
-| ------------ | ----------------------- | -------------------------------------|
-| `output`     | `ComputationDataHandle` | n dimensional array with the same    |
-:              :                         : shape as input `operand` (y)         :
-| `batch_mean` | `ComputationDataHandle` | 1 dimensional array (\\(\mu\\))      |
-| `batch_var`  | `ComputationDataHandle` | 1 dimensional array (\\(\sigma^2\\)) |
-
-The `batch_mean` and `batch_var` are moments calculated across the batch and
-spatial dimensions using the formulas above.
-
-## BatchNormInference
-
-See also
-[`ComputationBuilder::BatchNormInference`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-<b> Warning: Not implemented yet. </b>
-
-Normalizes an array across batch and spatial dimensions.
-
-<b> `BatchNormInference(operand, scale, offset, mean, variance, epsilon, feature_index)` </b>
-
-| Arguments       | Type                    | Semantics                       |
-| --------------  | ----------------------- | ------------------------------- |
-| `operand`       | `ComputationDataHandle` | n dimensional array to be       |
-:                 :                         : normalized                      :
-| `scale`         | `ComputationDataHandle` | 1 dimensional array             |
-| `offset`        | `ComputationDataHandle` | 1 dimensional array             |
-| `mean`          | `ComputationDataHandle` | 1 dimensional array             |
-| `variance`      | `ComputationDataHandle` | 1 dimensional array             |
-| `epsilon`       | `float`                 | Epsilon value                   |
-| `feature_index` | `int64`                 | Index to feature dimension in   |
-:                 :                         : `operand`                       :
-
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the mean and variance
-across all the other dimensions and use the mean and variance to normalize each
-element in `operand`. The `feature_index` must be a valid index for the feature
-dimension in `operand`.
-
-`BatchNormInference`  is equivalent to calling `BatchNormTraining` without
-computing `mean` and `variance` for each batch. It uses the input `mean` and
-`variance` instead as estimated values. The purpose of this op is to reduce
-latency in inference, hence the name `BatchNormInference`.
-
-The output is a n dimensional, normalized array with the same shape as input
-`operand`.
-
-## BatchNormGrad
-
-See also
-[`ComputationBuilder::BatchNormGrad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-<b> Warning: Not implemented yet. </b>
-
-Calculates gradients of batch norm.
-
-<b> `BatchNormGrad(operand, scale, mean, variance, grad_output, epsilon, feature_index)` </b>
-
-| Arguments       | Type                    | Semantics                        |
-| --------------  | ----------------------- | -------------------------------- |
-| `operand`       | `ComputationDataHandle` | n dimensional array to be        |
-:                 :                         : normalized (x)                   :
-| `scale`         | `ComputationDataHandle` | 1 dimensional array              |
-:                 :                         : (\\(\gamma\\))                   :
-| `mean`          | `ComputationDataHandle` | 1 dimensional array (\\(\mu\\))  |
-| `variance`      | `ComputationDataHandle` | 1 dimensional array              |
-:                 :                         : (\\(\sigma^2\\))                 :
-| `grad_output`   | `ComputationDataHandle` | Gradients passed to              |
-:                 :                         : `BatchNormTraining`              :
-:                 :                         : (\\( \nabla y\\))                :
-| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
-| `feature_index` | `int64`                 | Index to feature dimension in    |
-:                 :                         : `operand`                        :
-
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the gradients with
-respect to `operand`, `offset` and `scale` across all the other dimensions. The
-`feature_index` must be a valid index for the feature dimension in `operand`.
-
-The three gradients are defined by the following formulas:
-
-\\( \nabla x = \nabla y * \gamma * \sqrt{\sigma^2+\epsilon} \\)
-
-\\( \nabla \gamma = sum(\nabla y * (x - \mu) * \sqrt{\sigma^2 + \epsilon}) \\)
-
-\\( \nabla \beta = sum(\nabla y) \\)
-
-The inputs `mean` and `variance` represents moments value
-across batch and spatial dimensions.
-
-The output type is a tuple of three ComputationDataHandles:
-
-|Outputs       | Type                    | Semantics                           |
-|------------- | ----------------------- | ------------------------------------|
-|`grad_operand`| `ComputationDataHandle` | gradient with respect to input      |
-:              :                         : `operand`                           :
-|`grad_offset` | `ComputationDataHandle` | gradient with respect to input      |
-:              :                         : `offset`                            :
-|`grad_scale`  | `ComputationDataHandle` | gradient with respect to input      |
-:              :                         : `scale`                             :
-
-
 ## GetTupleElement
 
 See also
@@ -1036,61 +1161,6 @@ transfer. The context is a tuple of {receive buffer (shape), request identifier
 Given a context created by a `Recv` instruction, waits for the data transfer to
 complete and returns the received data.
 
-## Send
-
-See also
-[`ComputationBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-<b> `Send(operand, channel_handle)` </b>
-
-| Arguments        | Type                    | Semantics                        |
-| ---------------- | ----------------------- | -------------------------------- |
-| `operand`        | `ComputationDataHandle` | data to send (array of type T)   |
-| `channel_handle` | `ChannelHandle`         | unique identifier for each send/recv pair |
-
-Sends the given operand data to a `Recv` instruction in another computation
-that shares the same channel handle. Does not return any data.
-
-Similar to the `Recv` operation, the client API of `Send` operation represents
-synchronous communication, and is internally decomposed into 2 HLO instructions
-(`Send` and `SendDone`) to enable asynchronous data transfers. See also
-[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
-
-<b>`Send(HloInstruction operand, int64 channel_id)`</b>
-
-Initiates an asynchronous transfer of the operand to the resources allocated by
-the `Recv` instruction with the same channel id. Returns a context, which is
-used by a following `SendDone` instruction to wait for the completion of the
-data transfer. The context is a tuple of {operand (shape), request identifier
-(U32)} and it can only be used by a `SendDone` instruction.
-
-<b> `SendDone(HloInstruction context)` </b>
-
-Given a context created by a `Send` instruction, waits for the data transfer to
-complete.  The instruction does not return any data.
-
-<b> Scheduling of channel instructions </b>
-
-The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`,
-`Send`, `SendDone`) is as below.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:70%" src="../../images/send_recv_order.png">
-</div>
-
-* `Recv` happens before `Send`
-* `Send` happens before `RecvDone`
-* `Recv` happens before `RecvDone`
-* `Send` happens before `SendDone`
-
-When the backend compilers generate a linear schedule for each computation that
-communicates via channel instructions, there must not be cycles across the
-computations. For example, below schedules lead to deadlocks.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/send_recv_schedule.png">
-</div>
-
 ## Reduce
 
 See also
@@ -1244,7 +1314,6 @@ must have a non-negative number of mantissa bits.  The number of exponent or
 mantissa bits may exceed the corresponding value for type `T`; the corresponding
 portion of the conversion is then simply a no-op.
 
-
 ## ReduceWindow
 
 See also
@@ -1458,34 +1527,85 @@ be scalar valued.
 
 <b>`RngNormal(mean, sigma, shape)`</b>
 
-| Arguments | Type                    | Semantics                              |
-| --------- | ----------------------- | -------------------------------------- |
-| `mu`      | `ComputationDataHandle` | Scalar of type F32 specifying mean of  |
-:           :                         : generated numbers                      :
-| `sigma`   | `ComputationDataHandle` | Scalar of type F32 specifying standard |
-:           :                         : deviation of generated numbers         :
-| `shape`   | `Shape`                 | Output shape of type F32               |
+| Arguments | Type                    | Semantics                              |
+| --------- | ----------------------- | -------------------------------------- |
+| `mu`      | `ComputationDataHandle` | Scalar of type F32 specifying mean of  |
+:           :                         : generated numbers                      :
+| `sigma`   | `ComputationDataHandle` | Scalar of type F32 specifying standard |
+:           :                         : deviation of generated numbers         :
+| `shape`   | `Shape`                 | Output shape of type F32               |
+
+## RngUniform
+
+See also
+[`ComputationBuilder::RngUniform`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+Constructs an output of a given shape with random numbers generated following
+the uniform distribution over the interval $$[a,b)$$. The parameters and output
+shape may be either F32, S32 or U32, but the types have to be consistent.
+Furthermore, the parameters need to be scalar valued. If $$b <= a$$ the result
+is implementation-defined.
+
+<b>`RngUniform(a, b, shape)`</b>
+
+| Arguments | Type                    | Semantics                         |
+| --------- | ----------------------- | --------------------------------- |
+| `a`       | `ComputationDataHandle` | Scalar of type T specifying lower |
+:           :                         : limit of interval                 :
+| `b`       | `ComputationDataHandle` | Scalar of type T specifying upper |
+:           :                         : limit of interval                 :
+| `shape`   | `Shape`                 | Output shape of type T            |
+
+## Select
+
+See also
+[`ComputationBuilder::Select`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+Constructs an output array from elements of two input arrays, based on the
+values of a predicate array.
+
+<b> `Select(pred, on_true, on_false)` </b>
+
+Arguments  | Type                    | Semantics
+---------- | ----------------------- | ------------------
+`pred`     | `ComputationDataHandle` | array of type PRED
+`on_true`  | `ComputationDataHandle` | array of type T
+`on_false` | `ComputationDataHandle` | array of type T
+
+The arrays `on_true` and `on_false` must have the same shape. This is also the
+shape of the output array. The array `pred` must have the same dimensionality as
+`on_true` and `on_false`, with the `PRED` element type.
+
+For each element `P` of `pred`, the corresponding element of the output array is
+taken from `on_true` if the value of `P` is `true`, and from `on_false` if the
+value of `P` is `false`. As a restricted form of [broadcasting]
+(broadcasting.md), `pred` can be a scalar of type `PRED`. In this case, the
+output array is taken wholly from `on_true` if `pred` is `true`, and from
+`on_false` if `pred` is `false`.
 
-## RngUniform
+Example with non-scalar `pred`:
 
-See also
-[`ComputationBuilder::RngUniform`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+```
+let pred: PRED[4] = {true, false, false, true};
+let v1: s32[4] = {1, 2, 3, 4};
+let v2: s32[4] = {100, 200, 300, 400};
+==>
+Select(pred, v1, v2) = s32[4]{1, 200, 300, 4};
+```
 
-Constructs an output of a given shape with random numbers generated following
-the uniform distribution over the interval $$[a,b)$$. The parameters and output
-shape may be either F32, S32 or U32, but the types have to be consistent.
-Furthermore, the parameters need to be scalar valued. If $$b <= a$$ the result
-is implementation-defined.
+Example with scalar `pred`:
 
-<b>`RngUniform(a, b, shape)`</b>
+```
+let pred: PRED = true;
+let v1: s32[4] = {1, 2, 3, 4};
+let v2: s32[4] = {100, 200, 300, 400};
+==>
+Select(pred, v1, v2) = s32[4]{1, 2, 3, 4};
+```
 
-| Arguments | Type                    | Semantics                         |
-| --------- | ----------------------- | --------------------------------- |
-| `a`       | `ComputationDataHandle` | Scalar of type T specifying lower |
-:           :                         : limit of interval                 :
-| `b`       | `ComputationDataHandle` | Scalar of type T specifying upper |
-:           :                         : limit of interval                 :
-| `shape`   | `Shape`                 | Output shape of type T            |
+Selections between tuples are supported. Tuples are considered to be scalar
+types for this purpose. If `on_true` and `on_false` are tuples (which must have
+the same shape!) then `pred` has to be a scalar of type `PRED`.
 
 ## SelectAndScatter
 
@@ -1568,56 +1688,60 @@ non-deterministic. Therefore, the `scatter` function should not be overly
 sensitive to reassociation. See the discussion about associativity in the
 context of [`Reduce`](#reduce) for more details.
 
-## Select
+## Send
 
 See also
-[`ComputationBuilder::Select`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+[`ComputationBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
-Constructs an output array from elements of two input arrays, based on the
-values of a predicate array.
+<b> `Send(operand, channel_handle)` </b>
 
-<b> `Select(pred, on_true, on_false)` </b>
+| Arguments        | Type                    | Semantics                        |
+| ---------------- | ----------------------- | -------------------------------- |
+| `operand`        | `ComputationDataHandle` | data to send (array of type T)   |
+| `channel_handle` | `ChannelHandle`         | unique identifier for each send/recv pair |
 
-Arguments  | Type                    | Semantics
----------- | ----------------------- | ------------------
-`pred`     | `ComputationDataHandle` | array of type PRED
-`on_true`  | `ComputationDataHandle` | array of type T
-`on_false` | `ComputationDataHandle` | array of type T
+Sends the given operand data to a `Recv` instruction in another computation
+that shares the same channel handle. Does not return any data.
 
-The arrays `on_true` and `on_false` must have the same shape. This is also the
-shape of the output array. The array `pred` must have the same dimensionality as
-`on_true` and `on_false`, with the `PRED` element type.
+Similar to the `Recv` operation, the client API of `Send` operation represents
+synchronous communication, and is internally decomposed into 2 HLO instructions
+(`Send` and `SendDone`) to enable asynchronous data transfers. See also
+[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
 
-For each element `P` of `pred`, the corresponding element of the output array is
-taken from `on_true` if the value of `P` is `true`, and from `on_false` if the
-value of `P` is `false`. As a restricted form of [broadcasting]
-(broadcasting.md), `pred` can be a scalar of type `PRED`. In this case, the
-output array is taken wholly from `on_true` if `pred` is `true`, and from
-`on_false` if `pred` is `false`.
+<b>`Send(HloInstruction operand, int64 channel_id)`</b>
 
-Example with non-scalar `pred`:
+Initiates an asynchronous transfer of the operand to the resources allocated by
+the `Recv` instruction with the same channel id. Returns a context, which is
+used by a following `SendDone` instruction to wait for the completion of the
+data transfer. The context is a tuple of {operand (shape), request identifier
+(U32)} and it can only be used by a `SendDone` instruction.
 
-```
-let pred: PRED[4] = {true, false, false, true};
-let v1: s32[4] = {1, 2, 3, 4};
-let v2: s32[4] = {100, 200, 300, 400};
-==>
-Select(pred, v1, v2) = s32[4]{1, 200, 300, 4};
-```
+<b> `SendDone(HloInstruction context)` </b>
 
-Example with scalar `pred`:
+Given a context created by a `Send` instruction, waits for the data transfer to
+complete.  The instruction does not return any data.
 
-```
-let pred: PRED = true;
-let v1: s32[4] = {1, 2, 3, 4};
-let v2: s32[4] = {100, 200, 300, 400};
-==>
-Select(pred, v1, v2) = s32[4]{1, 2, 3, 4};
-```
+<b> Scheduling of channel instructions </b>
 
-Selections between tuples are supported. Tuples are considered to be scalar
-types for this purpose. If `on_true` and `on_false` are tuples (which must have
-the same shape!) then `pred` has to be a scalar of type `PRED`.
+The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`,
+`Send`, `SendDone`) is as below.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:70%" src="../../images/send_recv_order.png">
+</div>
+
+* `Recv` happens before `Send`
+* `Send` happens before `RecvDone`
+* `Recv` happens before `RecvDone`
+* `Send` happens before `SendDone`
+
+When the backend compilers generate a linear schedule for each computation that
+communicates via channel instructions, there must not be cycles across the
+computations. For example, below schedules lead to deadlocks.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/send_recv_schedule.png">
+</div>
 
 ## Slice
 
@@ -1671,132 +1795,6 @@ Slice(b, {2, 1}, {4, 3}) produces:
     {10.0, 11.0} }
 ```
 
-## DynamicSlice
-
-See also
-[`ComputationBuilder::DynamicSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-DynamicSlice extracts a sub-array from the input array at dynamic
-`start_indices`. The size of the slice in each dimension is passed in
-`size_indices`, which specify the end point of exclusive slice intervals in each
-dimension: [start, start + size). The shape of `start_indices` must be rank ==
-1, with dimension size equal to the rank of `operand`.
-Note: handling of out-of-bounds slice indices (generated by incorrect runtime
-calculation of 'start_indices') is currently implementation-defined. Currently,
-slice indices are computed modulo input dimension sizes to prevent out-of-bound
-array accesses, but this behavior may change in future implementations.
-
-<b> `DynamicSlice(operand, start_indices, size_indices)` </b>
-
-| Arguments       | Type                    | Semantics                        |
-| --------------- | ----------------------- | -------------------------------- |
-| `operand`       | `ComputationDataHandle` | N dimensional array of type T    |
-| `start_indices` | `ComputationDataHandle` | Rank 1 array of N integers       |
-:                 :                         : containing the starting indices  :
-:                 :                         : of the slice for each dimension. :
-:                 :                         : Value must be greater than or    :
-:                 :                         : equal to zero.                   :
-| `size_indices`  | `ArraySlice<int64>`     | List of N integers containing    |
-:                 :                         : the slice size for each          :
-:                 :                         : dimension. Each value must be    :
-:                 :                         : strictly greater than zero, and  :
-:                 :                         : start + size must be less than   :
-:                 :                         : or equal to the size of the      :
-:                 :                         : dimension to avoid wrapping      :
-:                 :                         : modulo dimension size.           :
-
-1-dimensional example:
-
-```
-let a = {0.0, 1.0, 2.0, 3.0, 4.0}
-let s = {2}
-
-DynamicSlice(a, s, {2}) produces:
-  {2.0, 3.0}
-```
-
-2-dimensional example:
-
-```
-let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
-let s = {2, 1}
-
-DynamicSlice(b, s, {2, 2}) produces:
-  { { 7.0,  8.0},
-    {10.0, 11.0} }
-```
-## DynamicUpdateSlice
-
-See also
-[`ComputationBuilder::DynamicUpdateSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-DynamicUpdateSlice generates a result which is the value of the input array
-`operand`, with a slice `update` overwritten at `start_indices`.
-The shape of `update` determines the shape of the sub-array of the result which
-is updated.
-The shape of `start_indices` must be rank == 1, with dimension size equal to
-the rank of `operand`.
-Note: handling of out-of-bounds slice indices (generated by incorrect runtime
-calculation of 'start_indices') is currently implementation-defined. Currently,
-slice indices are computed modulo update dimension sizes to prevent out-of-bound
-array accesses, but this behavior may change in future implementations.
-
-<b> `DynamicUpdateSlice(operand, update, start_indices)` </b>
-
-| Arguments       | Type                    | Semantics                        |
-| --------------- | ----------------------- | -------------------------------- |
-| `operand`       | `ComputationDataHandle` | N dimensional array of type T    |
-| `update`        | `ComputationDataHandle` | N dimensional array of type T    |
-:                 :                         : containing the slice update.     :
-:                 :                         : Each dimension of update shape    :
-:                 :                         : must be strictly greater than    :
-:                 :                         : zero, and start + update must be :
-:                 :                         : less than operand size for each  :
-:                 :                         : dimension to avoid generating    :
-:                 :                         : out-of-bounds update indices.    :
-| `start_indices` | `ComputationDataHandle` | Rank 1 array of N integers       |
-:                 :                         : containing the starting indices  :
-:                 :                         : of the slice for each dimension. :
-:                 :                         : Value must be greater than or    :
-:                 :                         : equal to zero.                   :
-
-1-dimensional example:
-
-```
-let a = {0.0, 1.0, 2.0, 3.0, 4.0}
-let u = {5.0, 6.0}
-let s = {2}
-
-DynamicUpdateSlice(a, u, s) produces:
-  {0.0, 1.0, 5.0, 6.0, 4.0}
-```
-
-2-dimensional example:
-
-```
-let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
-let u =
- { {12.0,  13.0},
-   {14.0,  15.0},
-   {16.0,  17.0} }
-
-let s = {1, 1}
-
-DynamicUpdateSlice(b, u, s) produces:
- { {0.0,  1.0,  2.0},
-   {3.0, 12.0, 13.0},
-   {6.0, 14.0, 15.0},
-   {9.0, 16.0, 17.0} }
-```
-
 ## Sort
 
 See also
-- 
GitLab


From 6f7d8b5a4e0da68c03ae50e05ee6106b78a233ad Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Mon, 4 Dec 2017 14:46:29 -0800
Subject: [PATCH 0605/1225] Allow test_util.evaluate handle nested tensors.

PiperOrigin-RevId: 177871523
---
 tensorflow/python/framework/test_util.py      | 32 +++++++++----------
 tensorflow/python/framework/test_util_test.py | 10 ++++++
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 4c026590c2..ae3b6c584a 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -60,6 +60,7 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
 from tensorflow.python.util import compat
+from tensorflow.python.util import nest
 from tensorflow.python.util.protobuf import compare
 
 
@@ -715,25 +716,22 @@ class TensorFlowTestCase(googletest.TestCase):
       fail_msg += " : %r" % (msg) if msg else ""
       self.fail(fail_msg)
 
-  def _eval_helper(self, tensors):
-    if isinstance(tensors, ops.EagerTensor):
-      return tensors.numpy()
-    if isinstance(tensors, resource_variable_ops.ResourceVariable):
-      return tensors.read_value().numpy()
-
-    if isinstance(tensors, tuple):
-      return tuple([self._eval_helper(t) for t in tensors])
-    elif isinstance(tensors, list):
-      return [self._eval_helper(t) for t in tensors]
-    elif isinstance(tensors, dict):
-      assert not tensors, "Only support empty dict now."
-      return dict()
-    elif tensors is None:
+  def _eval_tensor(self, tensor):
+    if tensor is None:
       return None
-    elif callable(tensors):
-      return self._eval_helper(tensors())
+    elif isinstance(tensor, ops.EagerTensor):
+      return tensor.numpy()
+    elif isinstance(tensor, resource_variable_ops.ResourceVariable):
+      return tensor.read_value().numpy()
+    elif callable(tensor):
+      return self._eval_helper(tensor())
     else:
-      raise ValueError("Unsupported type %s." % type(tensors))
+      raise ValueError("Unsupported type %s." % type(tensor))
+
+  def _eval_helper(self, tensors):
+    if tensors is None:
+      return None
+    return nest.map_structure(self._eval_tensor, tensors)
 
   def evaluate(self, tensors):
     """Evaluates tensors and returns numpy values.
diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py
index 9aed3457a6..90b5290626 100644
--- a/tensorflow/python/framework/test_util_test.py
+++ b/tensorflow/python/framework/test_util_test.py
@@ -339,6 +339,16 @@ class TestUtilTest(test_util.TensorFlowTestCase):
     with context.eager_mode():
       self.assertEqual(2, self.evaluate(model))
 
+  @test_util.run_in_graph_and_eager_modes()
+  def test_nested_tensors_evaluate(self):
+    expected = {"a": 1, "b": 2, "nested": {"d": 3, "e": 4}}
+    nested = {"a": constant_op.constant(1),
+              "b": constant_op.constant(2),
+              "nested": {"d": constant_op.constant(3),
+                         "e": constant_op.constant(4)}}
+
+    self.assertEqual(expected, self.evaluate(nested))
+
 
 class GarbageCollectionTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 46af7946ea2ca3ff9e8792d344d0eb7a6a90f985 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 4 Dec 2017 14:54:20 -0800
Subject: [PATCH 0606/1225] Fix edge case with
 ImportGraphDefOption.uniquify_names = true.

This change fixes the case where a newly-generated uniquified name
conflicts with another NodeDef being imported (the original NodeDef
names are required to be unique among each other, so this is only an
issue when we create new names).

Note that this behavior is not well defined in the Python
import_graph_def method. It will always generate unique names, but the
exact naming scheme may depend on the order the NodeDefs are
imported. I didn't write a corresponding Python unit test or try to
make this change produce the same names for this reason.

PiperOrigin-RevId: 177872720
---
 tensorflow/core/graph/graph_constructor.cc    | 58 ++++++++++++++-----
 .../core/graph/graph_constructor_test.cc      | 36 ++++++++++--
 2 files changed, 72 insertions(+), 22 deletions(-)

diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index 8890a9fb0f..63e3d5ee7d 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -203,7 +203,11 @@ class GraphConstructor {
 
   // Returns true if `name` already exists in `g_` (either as a node name or
   // prefix).
-  bool NameExists(StringPiece name);
+  bool NameExistsInGraph(StringPiece name);
+
+  // Returns true if `name` already exists in the GraphDef being imported
+  // (either as a node name or prefix).
+  bool NameExistsInGraphDef(StringPiece name);
 
   // Returns a unique version of `original_name`, or `original_name` if it's
   // already unique in the graph.
@@ -243,6 +247,9 @@ class GraphConstructor {
   // alternative implementation of std::unordered_map.
   std::unordered_map<StringPiece, NodeInfo, StringPieceHasher> gdef_nodes_;
 
+  // Prefixes already used in the GraphDef being imported.
+  std::unordered_set<StringPiece, StringPieceHasher> gdef_prefixes_;
+
   // Mapping from node name to the existing node in g_.
   std::unordered_map<StringPiece, Node*, StringPieceHasher> existing_nodes_;
 
@@ -305,6 +312,16 @@ bool NodeNameInValues(const std::vector<string>& control_dependencies,
                    node_name) != control_dependencies.end();
 }
 
+// Adds any prefixes of `node_name` (not including the full name itself) to
+// `prefixes`.
+void AddPrefixes(StringPiece node_name,
+                 std::unordered_set<StringPiece, StringPieceHasher>* prefixes) {
+  size_t idx = -1;
+  while ((idx = node_name.find('/', idx + 1)) != StringPiece::npos) {
+    prefixes->insert(node_name.substr(0, idx));
+  }
+}
+
 Status GraphConstructor::EnsureNoNameCollisions() {
   existing_nodes_.reserve(g_->num_nodes());
   // Populate existing_nodes_ and existing_prefixes_.
@@ -323,17 +340,12 @@ Status GraphConstructor::EnsureNoNameCollisions() {
             n->name(), "'");
       }
     }
-    // Add all of node's prefixes to existing_prefixes_ (if it has any).
-    size_t idx = -1;
-    while ((idx = n->name().find('/', idx + 1)) != string::npos) {
-      StringPiece name(n->name());
-      existing_prefixes_.insert(name.substr(0, idx));
-    }
+    AddPrefixes(n->name(), &existing_prefixes_);
   }
   if (opts_.prefix.empty() && opts_.importing && !opts_.uniquify_names) {
     for (const NodeDef* n : node_defs_) {
       const string& name = n->name();
-      if (NameExists(name)) {
+      if (NameExistsInGraph(name)) {
         return errors::InvalidArgument("Node name '", name,
                                        "' already exists in the Graph");
       }
@@ -346,7 +358,7 @@ Status GraphConstructor::EnsureNoNameCollisions() {
                                      opts_.prefix,
                                      "' would lead to invalid node names");
     }
-    if (NameExists(prefix_no_slash)) {
+    if (NameExistsInGraph(prefix_no_slash)) {
       return errors::InvalidArgument("Import node name prefix '",
                                      prefix_no_slash,
                                      "' conflicts with "
@@ -384,7 +396,7 @@ Status GraphConstructor::ValidateInputMapAndControlDependencies() {
 }
 
 Status GraphConstructor::BuildNodeIndex() {
-  // Validate the node names and add them to gdef_nodes_.
+  // Validate the node names and add them to gdef_nodes_ and gdef_prefixes_.
   for (int n = 0; n < node_defs_.size(); ++n) {
     const NodeDef& node_def = *node_defs_[n];
     if (!IsValidNodeName(node_def.name(), opts_.allow_internal_ops)) {
@@ -419,6 +431,8 @@ Status GraphConstructor::BuildNodeIndex() {
             "': Control dependencies must come after regular dependencies");
       }
     }
+    // Update gdef_prefixes_.
+    AddPrefixes(node_def.name(), &gdef_prefixes_);
   }
   return Status::OK();
 }
@@ -750,10 +764,13 @@ void GraphConstructor::AddPrefixToNodeDef(
 
 void GraphConstructor::UniquifyNames(
     const std::vector<bool>& input_already_exists, NodeDef* node_def) {
-  if (NameExists(node_def->name())) {
+  if (NameExistsInGraph(node_def->name())) {
     string old_name = node_def->name();
     node_def->set_name(FindUniqueName(node_def->name()));
     uniquified_names_[old_name] = node_def->name();
+    // Note that we don't have to update gdef_nodes_ or gdef_prefixes_ with
+    // `name` because we guarantee the original NodeDef names are unique,
+    // meaning we won't generate this name again.
   }
   for (int i = 0; i < node_def->input_size(); ++i) {
     // Skip remapped inputs (which already exist in g_ and are not being
@@ -783,16 +800,25 @@ void GraphConstructor::UniquifyNames(
   }
 }
 
-bool GraphConstructor::NameExists(StringPiece name) {
+bool GraphConstructor::NameExistsInGraph(StringPiece name) {
   if (existing_nodes_.find(name) != existing_nodes_.end()) return true;
-  return existing_prefixes_.find(name) != existing_prefixes_.end();
+  if (existing_prefixes_.find(name) != existing_prefixes_.end()) return true;
+  return false;
+}
+
+bool GraphConstructor::NameExistsInGraphDef(StringPiece name) {
+  if (gdef_nodes_.find(name) != gdef_nodes_.end()) return true;
+  if (gdef_prefixes_.find(name) != gdef_prefixes_.end()) return true;
+  return false;
 }
 
 string GraphConstructor::FindUniqueName(StringPiece original_name) {
   string name = original_name.ToString();
-  int count = 1;
-  while (NameExists(name)) {
-    name = strings::StrCat(original_name, "_", count++);
+  int count = 0;
+  // Check that any generated names don't collide with imported NodeDefs (as
+  // well as nodes in g_).
+  while (NameExistsInGraph(name) || (count > 0 && NameExistsInGraphDef(name))) {
+    name = strings::StrCat(original_name, "_", ++count);
   }
   return name;
 }
diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc
index 0f88c80b85..479f07f7f6 100644
--- a/tensorflow/core/graph/graph_constructor_test.cc
+++ b/tensorflow/core/graph/graph_constructor_test.cc
@@ -1822,6 +1822,30 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
   EXPECT_EQ(results.return_nodes[1]->name(), "B_1_1");
   EXPECT_EQ(results.return_nodes[1]->def().input(0), "A_1_1:0");
 
+  // Import with node names that must be de-duped from names and prefixes that
+  // exist in both the existing graph and the GraphDef being imported.
+  opts = ImportGraphDefOptions();
+  opts.uniquify_names = true;
+  opts.return_nodes.push_back("A");
+  opts.return_nodes.push_back("A_3");
+  opts.return_nodes.push_back("B");
+  opts.return_nodes.push_back("B_3/B");
+  results = ImportGraphDefResults();
+  ExpectOK(
+      "node { name: 'A' op: 'TestInput' }"
+      "node { name: 'A_3' op: 'TestInput' }"
+      "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A'] }"
+      "node { name: 'B_3/B' op: 'TestOneInputTwoOutputs' input: ['A_3'] }",
+      opts, &refiner, &results);
+
+  ASSERT_EQ(results.return_nodes.size(), 4);
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_4");
+  EXPECT_EQ(results.return_nodes[1]->name(), "A_3");
+  EXPECT_EQ(results.return_nodes[2]->name(), "B_4");
+  EXPECT_EQ(results.return_nodes[2]->def().input(0), "A_4:0");
+  EXPECT_EQ(results.return_nodes[3]->name(), "B_3/B");
+  EXPECT_EQ(results.return_nodes[3]->def().input(0), "A_3");
+
   // Create node with prefix and then import node with same name
   ExpectOK("node { name: 'foo/abc' op: 'ABC' }");
   opts = ImportGraphDefOptions();
@@ -1871,8 +1895,8 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
   ExpectOK(graph_def_str, opts, &refiner, &results);
 
   ASSERT_EQ(results.return_nodes.size(), 2);
-  EXPECT_EQ(results.return_nodes[0]->name(), "A_3");
-  EXPECT_EQ(results.return_nodes[1]->name(), "B_3");
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_5");
+  EXPECT_EQ(results.return_nodes[1]->name(), "B_5");
   EXPECT_EQ(results.return_nodes[1]->def().input(0), "A:0");
 
   // Check that colocation groups are updated
@@ -1888,14 +1912,14 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
       opts, &refiner, &results);
 
   ASSERT_EQ(results.return_nodes.size(), 2);
-  EXPECT_EQ(results.return_nodes[0]->name(), "A_4");
-  EXPECT_EQ(results.return_nodes[1]->name(), "B_4");
-  EXPECT_EQ(results.return_nodes[1]->def().input(0), "A_4:0");
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_6");
+  EXPECT_EQ(results.return_nodes[1]->name(), "B_6");
+  EXPECT_EQ(results.return_nodes[1]->def().input(0), "A_6:0");
   const AttrValue* class_attr =
       results.return_nodes[1]->attrs().Find(kColocationAttrName);
   ASSERT_TRUE(class_attr != nullptr);
   ASSERT_EQ(class_attr->list().s_size(), 1);
-  EXPECT_EQ(class_attr->list().s(0), "loc:@A_4");
+  EXPECT_EQ(class_attr->list().s(0), "loc:@A_6");
 }
 
 TEST_F(GraphConstructorTest, ImportGraphDef_WithCycle) {
-- 
GitLab


From 4a8e27e2ad9c3b72ac227f18012f4f68ae17c1ab Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 4 Dec 2017 15:04:10 -0800
Subject: [PATCH 0607/1225] [StreamExecutor] Add UnqueryableDeviceParams for
 all nvidia GPUs.

Some properties of nvidia GPUs cannot be queried via the driver API --
these are hardcoded in the UnqueryableDeviceParams struct in
StreamExecutor.

Before this change, we only had values for sm_35.  This change adds the
values for all other nvidia GPUs, sm_20 through sm_70.

PiperOrigin-RevId: 177874401
---
 .../stream_executor/cuda/cuda_gpu_executor.cc | 131 ++++++++++++++++--
 1 file changed, 122 insertions(+), 9 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 64d14f29df..c9d094e3d0 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -925,16 +925,129 @@ struct UnqueryableDeviceParams {
   uint64 shared_memory_alloc_granularity;
 };
 
+// http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
+// https://developer.download.nvidia.com/compute/cuda/CUDA_Occupancy_calculator.xls
 static const UnqueryableDeviceParams kAllUnqueryableDeviceParams[] = {
-  {
-    3, 5,       // compute capability (3.5)
-    16,         // blocks_per_core_limit
-    64 * 1024,  // registers_per_core_limit
-    255,        // registers_per_thread_limit
-    4,          // warp_alloc_granularity
-    256,        // register_alloc_granularity
-    256         // shared_memory_alloc_granularity
-  }
+    {
+        2, 0,       // compute capability (2.0)
+        8,          // blocks_per_core_limit
+        32 * 1024,  // registers_per_core_limit
+        63,         // registers_per_thread_limit
+        2,          // warp_alloc_granularity
+        64,         // register_alloc_granularity
+        128,        // shared_memory_alloc_granularity
+    },
+    {
+        2, 1,       // compute capability (2.1)
+        8,          // blocks_per_core_limit
+        32 * 1024,  // registers_per_core_limit
+        63,         // registers_per_thread_limit
+        2,          // warp_alloc_granularity
+        64,         // register_alloc_granularity
+        128,        // shared_memory_alloc_granularity
+    },
+    {
+        3, 0,       // compute capability (3.0)
+        16,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        63,         // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        3, 2,       // compute capability (3.2)
+        16,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        3, 5,       // compute capability (3.5)
+        16,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        3, 7,        // compute capability (3.7)
+        16,          // blocks_per_core_limit
+        128 * 1024,  // registers_per_core_limit
+        255,         // registers_per_thread_limit
+        4,           // warp_alloc_granularity
+        256,         // register_alloc_granularity
+        256,         // shared_memory_alloc_granularity
+    },
+    {
+        5, 0,       // compute capability (5.0)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        5, 2,       // compute capability (5.2)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        5, 3,       // compute capability (5.3)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        6, 0,       // compute capability (6.0)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        2,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        6, 1,       // compute capability (6.1)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        6, 2,       // compute capability (6.2)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    // TODO(jlebar): Confirm the alloc granularity values for sm_70.  These are
+    // not published in the spreadsheet linked above.  Currently we guess that
+    // they're the same as sm_60.
+    {
+        7, 0,       // compute capability (7.0)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        2,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
 };
 
 DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
-- 
GitLab


From f5f7b85b3c10da291f23263f0b4f0ac7cffa9059 Mon Sep 17 00:00:00 2001
From: Reed Wanderman-Milne <reedwm@google.com>
Date: Mon, 4 Dec 2017 15:12:08 -0800
Subject: [PATCH 0608/1225] Fix ResourceVariable's docstring example.

PiperOrigin-RevId: 177875589
---
 tensorflow/python/ops/resource_variable_ops.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 652bfa1ebc..58ede02747 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -184,11 +184,12 @@ class ResourceVariable(variables.Variable):
     assign = a.assign(2.0)
     with tf.control_dependencies([assign]):
       b = a.read_value()
-
-    other_assign = a.assign(3.0)
+    with tf.control_dependencies([b]):
+      other_assign = a.assign(3.0)
     with tf.control_dependencies([other_assign]):
-      tf.Print(b, [b]).run()  # Will print 2.0 because the value was read before
-                              # other_assign ran.
+      # Will print 2.0 because the value was read before other_assign ran. If
+      # `a` was a tf.Variable instead, 2.0 or 3.0 could be printed.
+      tf.Print(b, [b]).eval()
   ```
 
   To enforce these consistency properties tf.ResourceVariable might make more
-- 
GitLab


From d87a76d7d6b053219dbd49a87b3c4b379a1c6566 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 4 Dec 2017 15:13:45 -0800
Subject: [PATCH 0609/1225] Enable bfloat16 use from Python: * add a bfloat16
 Python type and NumPy extension. * allow the bfloat16 type in a number places
 in the Python libraries.

PiperOrigin-RevId: 177875784
---
 tensorflow/contrib/cmake/tf_python.cmake      |   2 +
 tensorflow/core/framework/numeric_types.h     |   7 +
 tensorflow/python/BUILD                       |  31 +
 tensorflow/python/framework/dtypes.py         |  24 +-
 tensorflow/python/framework/dtypes_test.py    |   5 +-
 tensorflow/python/framework/tensor_util.py    |  13 +
 tensorflow/python/layers/normalization.py     |   2 +-
 tensorflow/python/lib/core/bfloat16.cc        | 560 ++++++++++++++++++
 tensorflow/python/lib/core/bfloat16.h         |  34 ++
 tensorflow/python/lib/core/bfloat16.i         |  30 +
 tensorflow/python/lib/core/bfloat16_test.py   | 200 +++++++
 tensorflow/python/lib/core/ndarray_tensor.cc  |   5 +
 .../python/lib/core/ndarray_tensor_bridge.cc  |   3 +-
 tensorflow/python/ops/math_ops.py             |   1 +
 tensorflow/python/ops/nn_impl.py              |   2 +-
 tensorflow/python/ops/random_ops.py           |   4 +-
 tensorflow/python/tensorflow.i                |   2 +
 tensorflow/python/training/optimizer.py       |   3 +-
 18 files changed, 915 insertions(+), 13 deletions(-)
 create mode 100644 tensorflow/python/lib/core/bfloat16.cc
 create mode 100644 tensorflow/python/lib/core/bfloat16.h
 create mode 100644 tensorflow/python/lib/core/bfloat16.i
 create mode 100644 tensorflow/python/lib/core/bfloat16_test.py

diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 401662b6c5..d102b442e7 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -889,6 +889,8 @@ set (pywrap_tensorflow_internal_src
     "${tensorflow_source_dir}/tensorflow/python/framework/cpp_shape_inference.cc"
     "${tensorflow_source_dir}/tensorflow/python/framework/python_op_gen.h"
     "${tensorflow_source_dir}/tensorflow/python/framework/python_op_gen.cc"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/bfloat16.h"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/bfloat16.cc"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/numpy.h"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/numpy.cc"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/ndarray_tensor.h"
diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index bdd5af064b..569a4c3756 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -46,6 +46,10 @@ struct bfloat16 {
   EIGEN_DEVICE_FUNC bfloat16() {}
 
   EIGEN_DEVICE_FUNC explicit bfloat16(const float v) {
+    if (isnan(v)) {
+      value = NAN_VALUE;
+      return;
+    }
     const uint16_t* p = reinterpret_cast<const uint16_t*>(&v);
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
     value = p[0];
@@ -132,6 +136,9 @@ struct bfloat16 {
   }
 
   uint16_t value;
+
+  // A value that represents "not a number".
+  static const uint16_t NAN_VALUE = 0x7FC0;
 };
 
 inline bfloat16 operator+(bfloat16 a, bfloat16 b) {
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 12d81c4383..9bddf7d161 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -227,11 +227,26 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "bfloat16_lib",
+    srcs = ["lib/core/bfloat16.cc"],
+    hdrs = ["lib/core/bfloat16.h"],
+    deps = [
+        ":numpy_lib",
+        ":safe_ptr",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//third_party/py/numpy:headers",
+        "//util/python:python_headers",
+    ],
+)
+
 cc_library(
     name = "ndarray_tensor_bridge",
     srcs = ["lib/core/ndarray_tensor_bridge.cc"],
     hdrs = ["lib/core/ndarray_tensor_bridge.h"],
     deps = [
+        ":bfloat16_lib",
         ":numpy_lib",
         "//tensorflow/c:c_api",
         "//tensorflow/core:lib",
@@ -294,6 +309,7 @@ cc_library(
     srcs = ["lib/core/ndarray_tensor.cc"],
     hdrs = ["lib/core/ndarray_tensor.h"],
     deps = [
+        ":bfloat16_lib",
         ":ndarray_tensor_bridge",
         ":numpy_lib",
         ":safe_ptr",
@@ -599,6 +615,7 @@ py_library(
     srcs = ["framework/dtypes.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":pywrap_tensorflow",
         "//tensorflow/core:protos_all_py",
     ],
 )
@@ -3020,6 +3037,7 @@ tf_py_wrap_cc(
         "grappler/item.i",
         "grappler/model_analyzer.i",
         "grappler/tf_optimizer.i",
+        "lib/core/bfloat16.i",
         "lib/core/py_func.i",
         "lib/core/strings.i",
         "lib/io/file_io.i",
@@ -3038,6 +3056,7 @@ tf_py_wrap_cc(
         "util/util.i",
     ],
     deps = [
+        ":bfloat16_lib",
         ":cost_analyzer_lib",
         ":model_analyzer_lib",
         ":cpp_python_util",
@@ -3435,6 +3454,18 @@ py_test(
     ],
 )
 
+py_test(
+    name = "bfloat16_test",
+    size = "small",
+    srcs = ["lib/core/bfloat16_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":client_testlib",
+        ":lib",
+        ":pywrap_tensorflow",
+    ],
+)
+
 py_test(
     name = "file_io_test",
     size = "small",
diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py
index db124ab12a..b0422eb6be 100644
--- a/tensorflow/python/framework/dtypes.py
+++ b/tensorflow/python/framework/dtypes.py
@@ -18,9 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+
 import numpy as np
 
 from tensorflow.core.framework import types_pb2
+from tensorflow.python import pywrap_tensorflow
+
+
+_np_bfloat16 = pywrap_tensorflow.TF_bfloat16_type()
 
 
 class DType(object):
@@ -146,8 +151,9 @@ class DType(object):
   @property
   def is_floating(self):
     """Returns whether this is a (non-quantized, real) floating point type."""
-    return self.is_numpy_compatible and np.issubdtype(self.as_numpy_dtype,
-                                                      np.floating)
+    return ((self.is_numpy_compatible and np.issubdtype(self.as_numpy_dtype,
+                                                        np.floating))
+            or self.base_dtype == bfloat16)
 
   @property
   def is_complex(self):
@@ -157,7 +163,7 @@ class DType(object):
   @property
   def is_quantized(self):
     """Returns whether this is a quantized data type."""
-    return self.base_dtype in [qint8, quint8, qint16, quint16, qint32, bfloat16]
+    return self.base_dtype in [qint8, quint8, qint16, quint16, qint32]
 
   @property
   def is_unsigned(self):
@@ -194,6 +200,8 @@ class DType(object):
       try:
         return np.iinfo(self.as_numpy_dtype()).min
       except:
+        if self.base_dtype == bfloat16:
+          return _np_bfloat16(float.fromhex("-0x1.FEp127"))
         raise TypeError("Cannot find minimum value of %s." % self)
 
   @property
@@ -216,6 +224,8 @@ class DType(object):
       try:
         return np.iinfo(self.as_numpy_dtype()).max
       except:
+        if self.base_dtype == bfloat16:
+          return _np_bfloat16(float.fromhex("0x1.FEp127"))
         raise TypeError("Cannot find maximum value of %s." % self)
 
   @property
@@ -486,6 +496,8 @@ _np_qint16 = np.dtype([("qint16", np.int16, 1)])
 _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
 _np_qint32 = np.dtype([("qint32", np.int32, 1)])
 
+# _np_bfloat16 is defined by a module import.
+
 # Custom struct dtype for directly-fed ResourceHandles of supported type(s).
 np_resource = np.dtype([("resource", np.ubyte, 1)])
 
@@ -511,7 +523,7 @@ _NP_TO_TF = frozenset([
     (_np_qint16, qint16),
     (_np_quint16, quint16),
     (_np_qint32, qint32),
-    # NOTE(touts): Intentionally no way to feed a DT_BFLOAT16.
+    (_np_bfloat16, bfloat16),
 ])
 _TF_TO_NP = {
     types_pb2.DT_HALF: np.float16,
@@ -536,7 +548,7 @@ _TF_TO_NP = {
     types_pb2.DT_QINT16: _np_qint16,
     types_pb2.DT_QUINT16: _np_quint16,
     types_pb2.DT_QINT32: _np_qint32,
-    types_pb2.DT_BFLOAT16: np.uint16,
+    types_pb2.DT_BFLOAT16: _np_bfloat16,
 
     # Ref types
     types_pb2.DT_HALF_REF: np.float16,
@@ -559,7 +571,7 @@ _TF_TO_NP = {
     types_pb2.DT_QINT16_REF: _np_qint16,
     types_pb2.DT_QUINT16_REF: _np_quint16,
     types_pb2.DT_QINT32_REF: _np_qint32,
-    types_pb2.DT_BFLOAT16_REF: np.uint16,
+    types_pb2.DT_BFLOAT16_REF: _np_bfloat16,
 }
 
 
diff --git a/tensorflow/python/framework/dtypes_test.py b/tensorflow/python/framework/dtypes_test.py
index 67842e14b1..e49e2fda5d 100644
--- a/tensorflow/python/framework/dtypes_test.py
+++ b/tensorflow/python/framework/dtypes_test.py
@@ -176,7 +176,7 @@ class TypesTest(test_util.TensorFlowTestCase):
     self.assertEqual(dtypes.as_dtype("float64").is_floating, True)
     self.assertEqual(dtypes.as_dtype("string").is_floating, False)
     self.assertEqual(dtypes.as_dtype("bool").is_floating, False)
-    self.assertEqual(dtypes.as_dtype("bfloat16").is_integer, False)
+    self.assertEqual(dtypes.as_dtype("bfloat16").is_floating, True)
     self.assertEqual(dtypes.as_dtype("qint8").is_floating, False)
     self.assertEqual(dtypes.as_dtype("qint16").is_floating, False)
     self.assertEqual(dtypes.as_dtype("qint32").is_floating, False)
@@ -276,6 +276,9 @@ class TypesTest(test_util.TensorFlowTestCase):
       if numpy_dtype in (np.float16, np.float32, np.float64):
         self.assertEquals(dtype.min, np.finfo(numpy_dtype).min)
         self.assertEquals(dtype.max, np.finfo(numpy_dtype).max)
+      if numpy_dtype == dtypes.bfloat16.as_numpy_dtype:
+        self.assertEquals(dtype.min, float.fromhex("-0x1.FEp127"))
+        self.assertEquals(dtype.max, float.fromhex("0x1.FEp127"))
 
   def testRepr(self):
     for enum, name in dtypes._TYPE_TO_STRING.items():
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index 9fc0e49463..1b90c7ad4d 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -49,8 +49,20 @@ def SlowAppendFloat16ArrayToTensorProto(tensor_proto, proto_values):
   tensor_proto.half_val.extend([
       ExtractBitsFromFloat16(x) for x in proto_values])
 
+
+def ExtractBitsFromBFloat16(x):
+  return np.asscalar(
+      np.asarray(x, dtype=dtypes.bfloat16.as_numpy_dtype).view(np.uint16))
+
+
+def SlowAppendBFloat16ArrayToTensorProto(tensor_proto, proto_values):
+  tensor_proto.half_val.extend([
+      ExtractBitsFromBFloat16(x) for x in proto_values])
+
+
 if _FAST_TENSOR_UTIL_AVAILABLE:
   _NP_TO_APPEND_FN = {
+      dtypes.bfloat16.as_numpy_dtype: SlowAppendBFloat16ArrayToTensorProto,
       # TODO(sesse): We should have a
       # fast_tensor_util.AppendFloat16ArrayToTensorProto,
       # but it seems np.float16_t doesn't exist?
@@ -121,6 +133,7 @@ else:
     tensor_proto.bool_val.extend([np.asscalar(x) for x in proto_values])
 
   _NP_TO_APPEND_FN = {
+      dtypes.bfloat16.as_numpy_dtype: SlowAppendBFloat16ArrayToTensorProto,
       np.float16: SlowAppendFloat16ArrayToTensorProto,
       np.float32: SlowAppendFloat32ArrayToTensorProto,
       np.float64: SlowAppendFloat64ArrayToTensorProto,
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index 83237b8733..65e67dd016 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -241,7 +241,7 @@ class BatchNormalization(base.Layer):
                          'axis == [1] or axis == [3]')
 
     # Raise parameters of fp16 batch norm to fp32
-    if self.dtype == dtypes.float16:
+    if self.dtype == dtypes.float16 or self.dtype == dtypes.bfloat16:
       param_dtype = dtypes.float32
     else:
       param_dtype = self.dtype or dtypes.float32
diff --git a/tensorflow/python/lib/core/bfloat16.cc b/tensorflow/python/lib/core/bfloat16.cc
new file mode 100644
index 0000000000..dfe9eba03d
--- /dev/null
+++ b/tensorflow/python/lib/core/bfloat16.cc
@@ -0,0 +1,560 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/python/lib/core/bfloat16.h"
+
+#include "tensorflow/core/framework/numeric_types.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/python/lib/core/numpy.h"
+#include "tensorflow/python/lib/core/safe_ptr.h"
+
+namespace tensorflow {
+namespace {
+
+// Workarounds for Python 2 vs 3 API differences.
+#if PY_MAJOR_VERSION < 3
+
+PyObject* MakePyString(const string& s) {
+  return PyString_FromString(s.c_str());
+}
+
+typedef long HashType;  // NOLINT
+
+bool TfPyInt_Check(PyObject* object) { return PyInt_Check(object); }
+
+PyObject* TfPyInt_FromLong(long x) {  // NOLINT
+  return PyInt_FromLong(x);
+}
+
+long TfPyInt_AsLong(PyObject* x) {  // NOLINT
+  return PyInt_AsLong(x);
+}
+
+#else  // PY_MAJOR_VERSION < 3
+
+PyObject* MakePyString(const string& s) {
+  return PyUnicode_FromString(s.c_str());
+}
+
+bool TfPyInt_Check(PyObject* object) {
+  if (!PyLong_Check(object)) {
+    return 0;
+  }
+  int overflow = 0;
+  PyLong_AsLongAndOverflow(object, &overflow);
+  return (overflow == 0);
+}
+
+PyObject* TfPyInt_FromLong(long x) {  // NOLINT
+  return PyLong_FromLong(x);
+}
+
+long TfPyInt_AsLong(PyObject* x) {  // NOLINT
+  return PyLong_AsLong(x);
+}
+
+typedef Py_hash_t HashType;
+
+#endif  // PY_MAJOR_VERSION < 3
+
+// Forward declaration.
+extern PyTypeObject PyBfloat16_Type;
+
+// Representation of a Python bfloat16 object.
+struct PyBfloat16 {
+  PyObject_HEAD;  // Python object header
+  bfloat16 value;
+};
+
+// Returns true if 'object' is a PyBfloat16.
+bool PyBfloat16_Check(PyObject* object) {
+  return PyObject_IsInstance(object,
+                             reinterpret_cast<PyObject*>(&PyBfloat16_Type));
+}
+
+// Extracts the value of a PyBfloat16 object.
+bfloat16 PyBfloat16_Bfloat16(PyObject* object) {
+  return reinterpret_cast<PyBfloat16*>(object)->value;
+}
+
+// Constructs a PyBfloat16 object from a bfloat16.
+Safe_PyObjectPtr PyBfloat16_FromBfloat16(bfloat16 x) {
+  Safe_PyObjectPtr ref =
+      make_safe(PyBfloat16_Type.tp_alloc(&PyBfloat16_Type, 0));
+  PyBfloat16* p = reinterpret_cast<PyBfloat16*>(ref.get());
+  if (p) {
+    p->value = x;
+  }
+  return ref;
+}
+
+// Converts a Python object to a bfloat16 value. Returns true on success,
+// returns false and reports a Python error on failure.
+bool AsBfloat16(PyObject* arg, bfloat16* output) {
+  if (PyBfloat16_Check(arg)) {
+    *output = PyBfloat16_Bfloat16(arg);
+    return true;
+  }
+  if (PyFloat_Check(arg)) {
+    double d = PyFloat_AsDouble(arg);
+    if (PyErr_Occurred()) {
+      return false;
+    }
+    // TODO(phawkins): check for overflow
+    *output = bfloat16(d);
+    return true;
+  }
+  if (TfPyInt_Check(arg)) {
+    long l = TfPyInt_AsLong(arg);  // NOLINT
+    if (PyErr_Occurred()) {
+      return false;
+    }
+    // TODO(phawkins): check for overflow
+    *output = bfloat16(static_cast<float>(l));
+    return true;
+  }
+  if (PyArray_IsScalar(arg, Float)) {
+    float f;
+    PyArray_ScalarAsCtype(arg, &f);
+    *output = bfloat16(f);
+    return true;
+  }
+  PyErr_Format(PyExc_TypeError, "expected number, got %s",
+               arg->ob_type->tp_name);
+  return false;
+}
+
+// Converts a PyBfloat16 into a PyFloat.
+PyObject* PyBfloat16_Float(PyObject* self) {
+  bfloat16 x = PyBfloat16_Bfloat16(self);
+  return PyFloat_FromDouble(static_cast<double>(x));
+}
+
+// Converts a PyBfloat16 into a PyInt.
+PyObject* PyBfloat16_Int(PyObject* self) {
+  bfloat16 x = PyBfloat16_Bfloat16(self);
+  long y = static_cast<long>(x);  // NOLINT
+  return TfPyInt_FromLong(y);
+}
+
+// Negates a PyBfloat16.
+PyObject* PyBfloat16_Negative(PyObject* self) {
+  bfloat16 x = PyBfloat16_Bfloat16(self);
+  return PyBfloat16_FromBfloat16(-x).release();
+}
+
+// Binary arithmetic operators on PyBfloat16 values.
+#define BFLOAT16_BINOP(name, op)                                  \
+  PyObject* PyBfloat16_##name(PyObject* a, PyObject* b) {         \
+    bfloat16 x, y;                                                \
+    if (!AsBfloat16(a, &x) || !AsBfloat16(b, &y)) return nullptr; \
+    bfloat16 z = x op y;                                          \
+    return PyBfloat16_FromBfloat16(z).release();                  \
+  }
+BFLOAT16_BINOP(Add, +)
+BFLOAT16_BINOP(Subtract, -)
+BFLOAT16_BINOP(Multiply, *)
+BFLOAT16_BINOP(Divide, /)
+#undef BFLOAT16_BINOP
+
+// Python number methods for PyBfloat16 objects.
+PyNumberMethods PyBfloat16_AsNumber = {
+    PyBfloat16_Add,       // nb_add
+    PyBfloat16_Subtract,  // nb_subtract
+    PyBfloat16_Multiply,  // nb_multiply
+#if PY_MAJOR_VERSION < 3
+    PyBfloat16_Divide,  // nb_divide
+#endif
+    nullptr,              // nb_remainder
+    nullptr,              // nb_divmod
+    nullptr,              // nb_power
+    PyBfloat16_Negative,  // nb_negative
+    nullptr,              // nb_positive
+    nullptr,              // nb_absolute
+    nullptr,              // nb_nonzero
+    nullptr,              // nb_invert
+    nullptr,              // nb_lshift
+    nullptr,              // nb_rshift
+    nullptr,              // nb_and
+    nullptr,              // nb_xor
+    nullptr,              // nb_or
+#if PY_MAJOR_VERSION < 3
+    nullptr,  // nb_coerce
+#endif
+    PyBfloat16_Int,  // nb_int
+#if PY_MAJOR_VERSION < 3
+    PyBfloat16_Int,  // nb_long
+#else
+    nullptr,  // reserved
+#endif
+    PyBfloat16_Float,  // nb_float
+#if PY_MAJOR_VERSION < 3
+    nullptr,  // nb_oct
+    nullptr,  // nb_hex
+#endif
+
+    nullptr,  // nb_inplace_add
+    nullptr,  // nb_inplace_subtract
+    nullptr,  // nb_inplace_multiply
+#if PY_MAJOR_VERSION < 3
+    nullptr,  // nb_inplace_divide
+#endif
+    nullptr,  // nb_inplace_remainder
+    nullptr,  // nb_inplace_power
+    nullptr,  // nb_inplace_lshift
+    nullptr,  // nb_inplace_rshift
+    nullptr,  // nb_inplace_and
+    nullptr,  // nb_inplace_xor
+    nullptr,  // nb_inplace_or
+
+    nullptr,            // nb_floor_divide
+    PyBfloat16_Divide,  // nb_true_divide
+    nullptr,            // nb_inplace_floor_divide
+    nullptr,            // nb_inplace_true_divide
+    nullptr,            // nb_index
+};
+
+// Constructs a new PyBfloat16.
+PyObject* PyBfloat16_New(PyTypeObject* type, PyObject* args, PyObject* kwds) {
+  if (kwds && PyDict_Size(kwds)) {
+    PyErr_SetString(PyExc_TypeError, "constructor takes no keyword arguments");
+    return nullptr;
+  }
+  Py_ssize_t size = PyTuple_Size(args);
+  if (size != 1) {
+    PyErr_SetString(PyExc_TypeError,
+                    "expected number as argument to bfloat16 constructor");
+    return nullptr;
+  }
+  PyObject* arg = PyTuple_GetItem(args, 0);
+
+  if (PyBfloat16_Check(arg)) {
+    Py_INCREF(arg);
+    return arg;
+  } else {
+    bfloat16 value;
+    if (!AsBfloat16(arg, &value)) {
+      return nullptr;
+    }
+    return PyBfloat16_FromBfloat16(value).release();
+  }
+}
+
+// Comparisons on PyBfloat16s.
+PyObject* PyBfloat16_RichCompare(PyObject* a, PyObject* b, int op) {
+  bfloat16 x, y;
+  if (!AsBfloat16(a, &x) || !AsBfloat16(b, &y)) return nullptr;
+  bool result;
+  switch (op) {
+    case Py_LT:
+      result = x < y;
+      break;
+    case Py_LE:
+      result = x <= y;
+      break;
+    case Py_EQ:
+      result = x == y;
+      break;
+    case Py_NE:
+      result = x != y;
+      break;
+    case Py_GT:
+      result = x > y;
+      break;
+    case Py_GE:
+      result = x >= y;
+      break;
+    default:
+      LOG(FATAL) << "Invalid op type " << op;
+  }
+  return PyBool_FromLong(result);
+}
+
+// Implementation of repr() for PyBfloat16.
+PyObject* PyBfloat16_Repr(PyObject* self) {
+  bfloat16 x = reinterpret_cast<PyBfloat16*>(self)->value;
+  string v = strings::StrCat("bfloat16(", static_cast<float>(x), ")");
+  return MakePyString(v);
+}
+
+// Implementation of str() for PyBfloat16.
+PyObject* PyBfloat16_Str(PyObject* self) {
+  bfloat16 x = reinterpret_cast<PyBfloat16*>(self)->value;
+  string v = strings::StrCat(static_cast<float>(x));
+  return MakePyString(v);
+}
+
+// Hash function for PyBfloat16. We use the identity function, which is a weak
+// hash function.
+HashType PyBfloat16_Hash(PyObject* self) {
+  bfloat16 x = reinterpret_cast<PyBfloat16*>(self)->value;
+  return x.value;
+}
+
+// Python type for PyBfloat16 objects.
+PyTypeObject PyBfloat16_Type = {
+#if PY_MAJOR_VERSION < 3
+    PyObject_HEAD_INIT(nullptr) 0,  // ob_size
+#else
+    PyVarObject_HEAD_INIT(nullptr, 0)
+#endif
+    "bfloat16",                                // tp_name
+    sizeof(PyBfloat16),                        // tp_basicsize
+    0,                                         // tp_itemsize
+    nullptr,                                   // tp_dealloc
+    nullptr,                                   // tp_print
+    nullptr,                                   // tp_getattr
+    nullptr,                                   // tp_setattr
+    nullptr,                                   // tp_compare / tp_reserved
+    PyBfloat16_Repr,                           // tp_repr
+    &PyBfloat16_AsNumber,                      // tp_as_number
+    nullptr,                                   // tp_as_sequence
+    nullptr,                                   // tp_as_mapping
+    PyBfloat16_Hash,                           // tp_hash
+    nullptr,                                   // tp_call
+    PyBfloat16_Str,                            // tp_str
+    nullptr,                                   // tp_getattro
+    nullptr,                                   // tp_setattro
+    nullptr,                                   // tp_as_buffer
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  // tp_flags
+    "bfloat16 floating-point values",          // tp_doc
+    nullptr,                                   // tp_traverse
+    nullptr,                                   // tp_clear
+    PyBfloat16_RichCompare,                    // tp_richcompare
+    0,                                         // tp_weaklistoffset
+    nullptr,                                   // tp_iter
+    nullptr,                                   // tp_iternext
+    nullptr,                                   // tp_methods
+    nullptr,                                   // tp_members
+    nullptr,                                   // tp_getset
+    nullptr,                                   // tp_base
+    nullptr,                                   // tp_dict
+    nullptr,                                   // tp_descr_get
+    nullptr,                                   // tp_descr_set
+    0,                                         // tp_dictoffset
+    nullptr,                                   // tp_init
+    nullptr,                                   // tp_alloc
+    PyBfloat16_New,                            // tp_new
+    nullptr,                                   // tp_free
+    nullptr,                                   // tp_is_gc
+    nullptr,                                   // tp_bases
+    nullptr,                                   // tp_mro
+    nullptr,                                   // tp_cache
+    nullptr,                                   // tp_subclasses
+    nullptr,                                   // tp_weaklist
+    nullptr,                                   // tp_del
+    0,                                         // tp_version_tag
+};
+
+// Numpy support
+
+PyArray_ArrFuncs NPyBfloat16_ArrFuncs;
+
+PyArray_Descr NPyBfloat16_Descr = {
+    PyObject_HEAD_INIT(nullptr) & PyBfloat16_Type,  // typeobj
+    // We must register bfloat16 with a kind other than "f", because numpy
+    // considers two types with the same kind and size to be equal, but
+    // float16 != bfloat16.
+    'V',  // kind
+    // TODO(phawkins): there doesn't seem to be a way of guaranteeing a type
+    // character is unique.
+    'E',                                                  // type
+    '=',                                                  // byteorder
+    NPY_NEEDS_PYAPI | NPY_USE_GETITEM | NPY_USE_SETITEM,  // hasobject
+    0,                                                    // type_num
+    sizeof(bfloat16),                                     // elsize
+    alignof(bfloat16),                                    // alignment
+    nullptr,                                              // subarray
+    nullptr,                                              // fields
+    nullptr,                                              // names
+    &NPyBfloat16_ArrFuncs,                                // f
+};
+
+// Registered numpy type ID. Global variable populated by the registration code.
+int npy_bfloat16_ = -1;
+
+// Implementations of NumPy array methods.
+
+PyObject* NPyBfloat16_GetItem(void* data, void* arr) {
+  bfloat16 x;
+  memcpy(&x, data, sizeof(bfloat16));
+  return PyBfloat16_FromBfloat16(x).release();
+}
+
+int NPyBfloat16_SetItem(PyObject* item, void* data, void* arr) {
+  bfloat16 x;
+  if (!AsBfloat16(item, &x)) return -1;
+  memcpy(data, &x, sizeof(bfloat16));
+  return 0;
+}
+
+void ByteSwap16(void* value) {
+  char* p = reinterpret_cast<char*>(value);
+  std::swap(p[0], p[1]);
+}
+
+void NPyBfloat16_CopySwapN(void* dstv, npy_intp dstride, void* srcv,
+                           npy_intp sstride, npy_intp n, int swap, void* arr) {
+  char* dst = reinterpret_cast<char*>(dstv);
+  char* src = reinterpret_cast<char*>(srcv);
+  if (!src) {
+    return;
+  }
+  if (swap) {
+    for (npy_intp i = 0; i < n; i++) {
+      char* r = dst + dstride * i;
+      memcpy(r, src + sstride * i, sizeof(uint16_t));
+      ByteSwap16(r);
+    }
+  } else if (dstride == sizeof(uint16_t) && sstride == sizeof(uint16_t)) {
+    memcpy(dst, src, n * sizeof(uint16_t));
+  } else {
+    for (npy_intp i = 0; i < n; i++) {
+      memcpy(dst + dstride * i, src + sstride * i, sizeof(uint16_t));
+    }
+  }
+}
+
+void NPyBfloat16_CopySwap(void* dst, void* src, int swap, void* arr) {
+  if (!src) {
+    return;
+  }
+  memcpy(dst, src, sizeof(uint16_t));
+  if (swap) {
+    ByteSwap16(dst);
+  }
+}
+
+npy_bool NPyBfloat16_NonZero(void* data, void* arr) {
+  bfloat16 x;
+  memcpy(&x, data, sizeof(x));
+  return x != static_cast<bfloat16>(0);
+}
+
+// NumPy casts
+
+// Performs a NumPy array cast from type 'From' to 'To'.
+template <typename From, typename To>
+void NPyCast(void* from_void, void* to_void, npy_intp n, void* fromarr,
+             void* toarr) {
+  const From* from = reinterpret_cast<From*>(from_void);
+  To* to = reinterpret_cast<To*>(to_void);
+  for (npy_intp i = 0; i < n; ++i) {
+    to[i] = static_cast<To>(from[i]);
+  }
+}
+
+// Registers a cast between bfloat16 and type 'T'. 'numpy_type' is the NumPy
+// type corresponding to 'T'. If 'cast_is_safe', registers that bfloat16 can be
+// safely coerced to T.
+template <typename T>
+bool RegisterBfloat16Cast(int numpy_type, bool cast_is_safe) {
+  if (PyArray_RegisterCastFunc(PyArray_DescrFromType(numpy_type), npy_bfloat16_,
+                               NPyCast<T, bfloat16>) < 0) {
+    return false;
+  }
+  if (PyArray_RegisterCastFunc(&NPyBfloat16_Descr, numpy_type,
+                               NPyCast<bfloat16, T>) < 0) {
+    return false;
+  }
+  if (cast_is_safe && PyArray_RegisterCanCast(&NPyBfloat16_Descr, numpy_type,
+                                              NPY_NOSCALAR) < 0) {
+    return false;
+  }
+  return true;
+}
+
+// Initializes the module.
+bool Initialize() {
+  // We hit a mysterious crash if we haven't initialized numpy before this:
+  PyBfloat16_Type.tp_base = &PyGenericArrType_Type;
+
+  if (PyType_Ready(&PyBfloat16_Type) < 0) {
+    return false;
+  }
+
+  // Initializes the NumPy descriptor.
+  PyArray_InitArrFuncs(&NPyBfloat16_ArrFuncs);
+  NPyBfloat16_ArrFuncs.getitem = NPyBfloat16_GetItem;
+  NPyBfloat16_ArrFuncs.setitem = NPyBfloat16_SetItem;
+  NPyBfloat16_ArrFuncs.copyswapn = NPyBfloat16_CopySwapN;
+  NPyBfloat16_ArrFuncs.copyswap = NPyBfloat16_CopySwap;
+  NPyBfloat16_ArrFuncs.nonzero = NPyBfloat16_NonZero;
+
+  Py_TYPE(&NPyBfloat16_Descr) = &PyArrayDescr_Type;
+  npy_bfloat16_ = PyArray_RegisterDataType(&NPyBfloat16_Descr);
+  if (npy_bfloat16_ < 0) return false;
+
+  // Support dtype(bfloat16)
+  if (PyDict_SetItemString(PyBfloat16_Type.tp_dict, "dtype",
+                           reinterpret_cast<PyObject*>(&NPyBfloat16_Descr)) <
+      0) {
+    return false;
+  }
+
+  // Register casts
+
+  // We lie shamelessly and say that a cast from half to bfloat16 is safe.
+  // Numpy frequently uses the smallest legal representation type for small
+  // float constants (e.g., 1.0), which is often float16. Things break if these
+  // cannot be converted transparently to bfloat16.
+  if (!RegisterBfloat16Cast<Eigen::half>(NPY_HALF, /*cast_is_safe=*/true)) {
+    return false;
+  }
+
+  if (!RegisterBfloat16Cast<float>(NPY_FLOAT, /*cast_is_safe=*/true)) {
+    return false;
+  }
+  if (!RegisterBfloat16Cast<double>(NPY_DOUBLE, /*cast_is_safe=*/true)) {
+    return false;
+  }
+  if (!RegisterBfloat16Cast<int32>(NPY_INT32, /*cast_is_safe=*/false)) {
+    return false;
+  }
+  if (!RegisterBfloat16Cast<int64>(NPY_INT64, /*cast_is_safe=*/false)) {
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace
+
+void RegisterNumpyBfloat16() {
+  if (npy_bfloat16_ >= 0) {
+    // Already initialized.
+    return;
+  }
+  if (!Initialize()) {
+    if (!PyErr_Occurred()) {
+      PyErr_SetString(PyExc_RuntimeError, "cannot load bfloat16 module.");
+    }
+    PyErr_Print();
+  }
+}
+
+PyObject* Bfloat16PyType() {
+  CHECK(PyBfloat16_Type.tp_base != nullptr);
+  Py_INCREF(&PyBfloat16_Type);
+  return reinterpret_cast<PyObject*>(&PyBfloat16_Type);
+}
+
+int Bfloat16NumpyType() {
+  CHECK_GE(npy_bfloat16_, 0);
+  return npy_bfloat16_;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/python/lib/core/bfloat16.h b/tensorflow/python/lib/core/bfloat16.h
new file mode 100644
index 0000000000..a609928ba9
--- /dev/null
+++ b/tensorflow/python/lib/core/bfloat16.h
@@ -0,0 +1,34 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PYTHON_LIB_CORE_BFLOAT16_H_
+#define TENSORFLOW_PYTHON_LIB_CORE_BFLOAT16_H_
+
+#include <Python.h>
+
+namespace tensorflow {
+
+// Register the bfloat16 numpy type.
+void RegisterNumpyBfloat16();
+
+// Returns the PyObject for the bfloat16 type.
+PyObject* Bfloat16PyType();
+
+// Returns the id number of the bfloat16 numpy type.
+int Bfloat16NumpyType();
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_PYTHON_LIB_CORE_BFLOAT16_H_
diff --git a/tensorflow/python/lib/core/bfloat16.i b/tensorflow/python/lib/core/bfloat16.i
new file mode 100644
index 0000000000..10444b676b
--- /dev/null
+++ b/tensorflow/python/lib/core/bfloat16.i
@@ -0,0 +1,30 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+%{
+#include "tensorflow/python/lib/core/bfloat16.h"
+%}
+
+%init %{
+tensorflow::RegisterNumpyBfloat16();
+%}
+
+%{
+PyObject* TF_bfloat16_type() {
+  return tensorflow::Bfloat16PyType();
+}
+%}
+
+PyObject* TF_bfloat16_type();
diff --git a/tensorflow/python/lib/core/bfloat16_test.py b/tensorflow/python/lib/core/bfloat16_test.py
new file mode 100644
index 0000000000..02af33d98b
--- /dev/null
+++ b/tensorflow/python/lib/core/bfloat16_test.py
@@ -0,0 +1,200 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Test cases for the bfloat16 Python type."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import numpy as np
+
+# pylint: disable=unused-import,g-bad-import-order
+from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.platform import test
+
+
+bfloat16 = pywrap_tensorflow.TF_bfloat16_type()
+
+
+class Bfloat16Test(test.TestCase):
+
+  def float_values(self):
+    """Returns values that should round trip exactly to float and back."""
+    epsilon = float.fromhex("1.0p-7")
+    return [
+        0.0, 1.0, -1, 0.5, -0.5, epsilon, 1.0 + epsilon, 1.0 - epsilon,
+        -1.0 - epsilon, -1.0 + epsilon, 3.5, 42.0, 255.0, 256.0,
+        float("inf"), float("-inf"), float("nan")]
+
+  def _assertFloatIdentical(self, v, w):
+    if math.isnan(v):
+      self.assertTrue(math.isnan(w))
+    else:
+      self.assertEqual(v, w)
+
+  def testRoundTripToFloat(self):
+    for v in self.float_values():
+      self._assertFloatIdentical(v, float(bfloat16(v)))
+
+  def testRoundTripToInt(self):
+    for v in [-256, -255, -34, -2, -1, 0, 1, 2, 10, 47, 128, 255, 256, 512]:
+      self.assertEqual(v, int(bfloat16(v)))
+
+  def testStr(self):
+    self.assertEqual("0", str(bfloat16(0.0)))
+    self.assertEqual("1", str(bfloat16(1.0)))
+    self.assertEqual("-3.5", str(bfloat16(-3.5)))
+    self.assertEqual("0.0078125", str(bfloat16(float.fromhex("1.0p-7"))))
+    self.assertEqual("inf", str(bfloat16(float("inf"))))
+    self.assertEqual("-inf", str(bfloat16(float("-inf"))))
+    self.assertEqual("nan", str(bfloat16(float("nan"))))
+
+  def testRepr(self):
+    self.assertEqual("bfloat16(0)", repr(bfloat16(0)))
+    self.assertEqual("bfloat16(1)", repr(bfloat16(1)))
+    self.assertEqual("bfloat16(-3.5)", repr(bfloat16(-3.5)))
+    self.assertEqual("bfloat16(0.0078125)",
+                     repr(bfloat16(float.fromhex("1.0p-7"))))
+    self.assertEqual("bfloat16(inf)", repr(bfloat16(float("inf"))))
+    self.assertEqual("bfloat16(-inf)", repr(bfloat16(float("-inf"))))
+    self.assertEqual("bfloat16(nan)", repr(bfloat16(float("nan"))))
+
+  def testHash(self):
+    self.assertEqual(0, hash(bfloat16(0.0)))
+    self.assertEqual(0x3f80, hash(bfloat16(1.0)))
+    self.assertEqual(0x7fc0, hash(bfloat16(float("nan"))))
+
+  # Tests for Python operations
+  def testNegate(self):
+    for v in self.float_values():
+      self._assertFloatIdentical(-v, float(-bfloat16(v)))
+
+  def testAdd(self):
+    self._assertFloatIdentical(0, float(bfloat16(0) + bfloat16(0)))
+    self._assertFloatIdentical(1, float(bfloat16(1) + bfloat16(0)))
+    self._assertFloatIdentical(0, float(bfloat16(1) + bfloat16(-1)))
+    self._assertFloatIdentical(5.5, float(bfloat16(2) + bfloat16(3.5)))
+    self._assertFloatIdentical(1.25, float(bfloat16(3.5) + bfloat16(-2.25)))
+    self._assertFloatIdentical(float("inf"),
+                               float(bfloat16(float("inf")) + bfloat16(-2.25)))
+    self._assertFloatIdentical(float("-inf"),
+                               float(bfloat16(float("-inf")) + bfloat16(-2.25)))
+    self.assertTrue(math.isnan(float(bfloat16(3.5) + bfloat16(float("nan")))))
+
+  def testSub(self):
+    self._assertFloatIdentical(0, float(bfloat16(0) - bfloat16(0)))
+    self._assertFloatIdentical(1, float(bfloat16(1) - bfloat16(0)))
+    self._assertFloatIdentical(2, float(bfloat16(1) - bfloat16(-1)))
+    self._assertFloatIdentical(-1.5, float(bfloat16(2) - bfloat16(3.5)))
+    self._assertFloatIdentical(5.75, float(bfloat16(3.5) - bfloat16(-2.25)))
+    self._assertFloatIdentical(float("-inf"),
+                               float(bfloat16(-2.25) - bfloat16(float("inf"))))
+    self._assertFloatIdentical(float("inf"),
+                               float(bfloat16(-2.25) - bfloat16(float("-inf"))))
+    self.assertTrue(math.isnan(float(bfloat16(3.5) - bfloat16(float("nan")))))
+
+  def testMul(self):
+    self._assertFloatIdentical(0, float(bfloat16(0) * bfloat16(0)))
+    self._assertFloatIdentical(0, float(bfloat16(1) * bfloat16(0)))
+    self._assertFloatIdentical(-1, float(bfloat16(1) * bfloat16(-1)))
+    self._assertFloatIdentical(-7.875, float(bfloat16(3.5) * bfloat16(-2.25)))
+    self._assertFloatIdentical(float("-inf"),
+                               float(bfloat16(float("inf")) * bfloat16(-2.25)))
+    self._assertFloatIdentical(float("inf"),
+                               float(bfloat16(float("-inf")) * bfloat16(-2.25)))
+    self.assertTrue(math.isnan(float(bfloat16(3.5) * bfloat16(float("nan")))))
+
+  def testDiv(self):
+    self.assertTrue(math.isnan(float(bfloat16(0) / bfloat16(0))))
+    self._assertFloatIdentical(float("inf"), float(bfloat16(1) / bfloat16(0)))
+    self._assertFloatIdentical(-1, float(bfloat16(1) / bfloat16(-1)))
+    self._assertFloatIdentical(-1.75, float(bfloat16(3.5) / bfloat16(-2)))
+    self._assertFloatIdentical(float("-inf"),
+                               float(bfloat16(float("inf")) / bfloat16(-2.25)))
+    self._assertFloatIdentical(float("inf"),
+                               float(bfloat16(float("-inf")) / bfloat16(-2.25)))
+    self.assertTrue(math.isnan(float(bfloat16(3.5) / bfloat16(float("nan")))))
+
+  def testLess(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v < w, bfloat16(v) < bfloat16(w))
+
+  def testLessEqual(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v <= w, bfloat16(v) <= bfloat16(w))
+
+  def testGreater(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v > w, bfloat16(v) > bfloat16(w))
+
+  def testGreaterEqual(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v >= w, bfloat16(v) >= bfloat16(w))
+
+  def testEqual(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v == w, bfloat16(v) == bfloat16(w))
+
+  def testNotEqual(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v != w, bfloat16(v) != bfloat16(w))
+
+
+class Bfloat16NumPyTest(test.TestCase):
+
+  def testDtype(self):
+    self.assertEqual(bfloat16, np.dtype(bfloat16))
+
+  def testArray(self):
+    x = np.array([[1, 2, 3]], dtype=bfloat16)
+    self.assertEqual(bfloat16, x.dtype)
+    self.assertEqual("[[bfloat16(1) bfloat16(2) bfloat16(3)]]", str(x))
+    self.assertAllEqual(x, x)
+    self.assertAllClose(x, x)
+
+  def testCasts(self):
+    for dtype in [np.float16, np.float32, np.float64, np.int32, np.int64]:
+      x = np.array([[1, 2, 3]], dtype=dtype)
+      y = x.astype(bfloat16)
+      z = y.astype(dtype)
+      self.assertTrue(np.all(x == y))
+      self.assertEqual(bfloat16, y.dtype)
+      self.assertTrue(np.all(x == z))
+      self.assertEqual(dtype, z.dtype)
+
+  def testAdd(self):
+    x = np.array([[1, 2, 3]], dtype=bfloat16)
+    y = np.array([[4, 5, 6]], dtype=bfloat16)
+    self.assertAllClose(np.array([[5, 7, 9]]), x + y)
+
+  def testLogSumExp(self):
+    x = np.array([[1, 2, 3]], dtype=np.float32)
+    y = np.array([[4, 5, 6]], dtype=np.float32)
+    self.assertAllClose(np.logaddexp(x, y),
+                        np.logaddexp(x.astype(bfloat16), y.astype(bfloat16)),
+                        atol=2e-2)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc
index cf2c2e6eb0..994af69386 100644
--- a/tensorflow/python/lib/core/ndarray_tensor.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/python/lib/core/bfloat16.h"
 #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
 
 namespace tensorflow {
@@ -125,6 +126,10 @@ Status PyArray_TYPE_to_TF_DataType(PyArrayObject* array,
       // custom struct type.
       return PyArrayDescr_to_TF_DataType(descr, out_tf_datatype);
     default:
+      if (pyarray_type == Bfloat16NumpyType()) {
+        *out_tf_datatype = TF_BFLOAT16;
+        break;
+      }
       // TODO(mrry): Support these.
       return errors::Internal("Unsupported feed type");
   }
diff --git a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
index 82c45f5a31..65e2178cda 100644
--- a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/python/lib/core/bfloat16.h"
 #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
 
 namespace tensorflow {
@@ -175,7 +176,7 @@ Status TF_DataType_to_PyArray_TYPE(TF_DataType tf_datatype,
       *out_pyarray_type = NPY_INT32;
       break;
     case TF_BFLOAT16:
-      *out_pyarray_type = NPY_UINT16;
+      *out_pyarray_type = Bfloat16NumpyType();
       break;
     default:
       return errors::Internal("Tensorflow type ", tf_datatype,
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index e2e23dccef..f9538be6c9 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -950,6 +950,7 @@ _TRUEDIV_TABLE = {
     dtypes.int16: dtypes.float32,
     dtypes.int32: dtypes.float64,
     dtypes.int64: dtypes.float64,
+    dtypes.bfloat16: None,
     dtypes.float16: None,
     dtypes.float32: None,
     dtypes.float64: None,
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 00e3c7dc0f..18bf5897cf 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -863,7 +863,7 @@ def fused_batch_norm(
   # currently only use the V2 version for float16 inputs, which is not supported
   # by the V1 version.
   # pylint: disable=protected-access
-  if x.dtype == dtypes.float16:
+  if x.dtype == dtypes.float16 or x.dtype == dtypes.bfloat16:
     fused_batch_norm_func = gen_nn_ops._fused_batch_norm_v2
   else:
     fused_batch_norm_func = gen_nn_ops._fused_batch_norm
diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py
index a59578b422..a2264a7bdf 100644
--- a/tensorflow/python/ops/random_ops.py
+++ b/tensorflow/python/ops/random_ops.py
@@ -220,8 +220,8 @@ def random_uniform(shape,
     ValueError: If `dtype` is integral and `maxval` is not specified.
   """
   dtype = dtypes.as_dtype(dtype)
-  if dtype not in (dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int32,
-                   dtypes.int64):
+  if dtype not in (dtypes.float16, dtypes.bfloat16, dtypes.float32,
+                   dtypes.float64, dtypes.int32, dtypes.int64):
     raise ValueError("Invalid dtype %r" % dtype)
   if maxval is None:
     if dtype.is_integer:
diff --git a/tensorflow/python/tensorflow.i b/tensorflow/python/tensorflow.i
index d221dd523b..344702097f 100644
--- a/tensorflow/python/tensorflow.i
+++ b/tensorflow/python/tensorflow.i
@@ -33,6 +33,8 @@ limitations under the License.
 %include "tensorflow/python/client/tf_session.i"
 %include "tensorflow/python/client/device_lib.i"
 
+%include "tensorflow/python/lib/core/bfloat16.i"
+
 %include "tensorflow/python/lib/io/file_io.i"
 %include "tensorflow/python/training/quantize_training.i"
 %include "tensorflow/python/training/server_lib.i"
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index b31d02eb8d..56cf4d42ee 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -644,7 +644,8 @@ class Optimizer(object):
     Returns:
       Valid types for loss, variables and gradients.
     """
-    return set([dtypes.float16, dtypes.float32, dtypes.float64])
+    return set(
+        [dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64])
 
   def _create_slots(self, var_list):
     """Create all slots needed by the variables.
-- 
GitLab


From e12c032397c54f41731d6924d38854eb2bbf5b4d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 15:17:41 -0800
Subject: [PATCH 0610/1225] hsv_in_yiq gpu implementation.

PiperOrigin-RevId: 177876455
---
 tensorflow/contrib/image/BUILD                | 39 +++++---
 .../image/kernels/adjust_hsv_in_yiq_op.cc     | 88 +++++++++----------
 .../image/kernels/adjust_hsv_in_yiq_op.h      | 87 ++++++++++++++++++
 .../kernels/adjust_hsv_in_yiq_op_gpu.cu.cc    | 84 ++++++++++++++++++
 .../kernels/adjust_hsv_in_yiq_op_test.cc      | 48 ++++++++++
 .../kernel_tests/distort_image_ops_test.py    | 10 ++-
 6 files changed, 294 insertions(+), 62 deletions(-)
 create mode 100644 tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h
 create mode 100644 tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc
 create mode 100644 tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_test.cc

diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD
index 157e97d237..54502cfc6e 100755
--- a/tensorflow/contrib/image/BUILD
+++ b/tensorflow/contrib/image/BUILD
@@ -9,6 +9,7 @@ package(default_visibility = ["//visibility:public"])
 
 load(
     "//tensorflow:tensorflow.bzl",
+    "tf_cc_test",
     "tf_custom_op_library",
     "tf_gen_op_libs",
     "tf_gen_op_wrapper_py",
@@ -106,10 +107,33 @@ tf_custom_op_library(
     name = "python/ops/_distort_image_ops.so",
     srcs = [
         "kernels/adjust_hsv_in_yiq_op.cc",
+        "kernels/adjust_hsv_in_yiq_op.h",
         "ops/distort_image_ops.cc",
     ],
+    gpu_srcs = [
+        "kernels/adjust_hsv_in_yiq_op_gpu.cu.cc",
+        "kernels/adjust_hsv_in_yiq_op.h",
+    ],
     deps = [
-        "@protobuf_archive//:protobuf",
+        "//tensorflow/core/kernels:gpu_util_hdrs",
+    ],
+)
+
+tf_cc_test(
+    name = "adjust_hsv_in_yiq_op_test",
+    size = "small",
+    srcs = [
+        "kernels/adjust_hsv_in_yiq_op.h",
+        "kernels/adjust_hsv_in_yiq_op_test.cc",
+    ],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+        "//third_party/eigen3",
     ],
 )
 
@@ -122,19 +146,6 @@ tf_gen_op_wrapper_py(
     deps = [":distort_image_ops_op_lib"],
 )
 
-cc_library(
-    name = "distort_image_ops_cc",
-    srcs = [
-        "kernels/adjust_hsv_in_yiq_op.cc",
-    ],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//third_party/eigen3",
-    ],
-    alwayslink = 1,
-)
-
 py_library(
     name = "distort_image_py",
     srcs = [
diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.cc b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.cc
index f4962ed69d..478b716d88 100644
--- a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.cc
+++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.cc
@@ -12,14 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include <cmath>
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif
+
+#include "tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h"
 #include <memory>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/work_sharder.h"
@@ -36,10 +37,10 @@ class AdjustHsvInYiqOpBase : public OpKernel {
 
   struct ComputeOptions {
     const Tensor* input = nullptr;
+    Tensor* output = nullptr;
     const Tensor* delta_h = nullptr;
     const Tensor* scale_s = nullptr;
     const Tensor* scale_v = nullptr;
-    Tensor* output = nullptr;
     int64 channel_count = 0;
   };
 
@@ -65,7 +66,7 @@ class AdjustHsvInYiqOpBase : public OpKernel {
                                         scale_v.shape().DebugString()));
     auto channels = input.dim_size(input.dims() - 1);
     OP_REQUIRES(
-        context, channels == 3,
+        context, channels == kChannelSize,
         errors::InvalidArgument("input must have 3 channels but instead has ",
                                 channels, " channels."));
 
@@ -101,53 +102,21 @@ class AdjustHsvInYiqOp<CPUDevice> : public AdjustHsvInYiqOpBase {
     const Tensor* input = options.input;
     Tensor* output = options.output;
     const int64 channel_count = options.channel_count;
-    static const int kChannelSize = 3;
     auto input_data = input->shaped<float, 2>({channel_count, kChannelSize});
     const float delta_h = options.delta_h->scalar<float>()();
     const float scale_s = options.scale_s->scalar<float>()();
     const float scale_v = options.scale_v->scalar<float>()();
     auto output_data = output->shaped<float, 2>({channel_count, kChannelSize});
+    float tranformation_matrix[kChannelSize * kChannelSize] = {0};
+    internal::compute_tranformation_matrix<kChannelSize * kChannelSize>(
+        delta_h, scale_s, scale_v, tranformation_matrix);
     const int kCostPerChannel = 10;
     const DeviceBase::CpuWorkerThreads& worker_threads =
         *context->device()->tensorflow_cpu_worker_threads();
     Shard(worker_threads.num_threads, worker_threads.workers, channel_count,
           kCostPerChannel,
-          [channel_count, &input_data, &output_data, delta_h, scale_s, scale_v](
+          [channel_count, &input_data, &output_data, &tranformation_matrix](
               int64 start_channel, int64 end_channel) {
-            // Using approximate linear transfomation described in:
-            // https://beesbuzz.biz/code/hsv_color_transforms.php
-            /** Get the constants from sympy
-             from sympy import Matrix
-             from sympy.abc import u, w
-             # Projection matrix to YIQ. http://en.wikipedia.org/wiki/YIQ
-             tyiq = Matrix([[0.299, 0.587, 0.114],
-                            [0.596, -0.274, -0.322],
-                            [0.211, -0.523, 0.312]])
-             # Hue rotation matrix in YIQ space.
-             hue_proj = Matrix(3,3, [v, 0, 0, 0, vsu, -vsw, 0, vsw, vsu])
-             m = tyiq.inv() * hue_proj * tyiq
-             **/
-            // TODO(huangyp): directly compute the projection matrix from tyiq.
-            static const float t[kChannelSize][kChannelSize][kChannelSize] = {
-                {{.299, .701, .16862179492229},
-                 {.587, -.587, .329804745287403},
-                 {.114, -.114, -0.498426540209694}},
-                {{.299, -.299, -.327963394172371},
-                 {.587, .413, .0346106879248821},
-                 {.114, -.114, .293352706247489}},
-                {{.299, -.299, 1.24646136576682},
-                 {.587, -.587, -1.04322888291964},
-                 {.114, .886, -.203232482847173}}};
-            float m[kChannelSize][kChannelSize] = {{0.}};
-            float su = scale_s * std::cos(delta_h);
-            float sw = scale_s * std::sin(delta_h);
-            for (int q_index = 0; q_index < kChannelSize; q_index++) {
-              for (int p_index = 0; p_index < kChannelSize; p_index++) {
-                m[q_index][p_index] = scale_v * (t[q_index][p_index][0] +
-                                                 t[q_index][p_index][1] * su +
-                                                 t[q_index][p_index][2] * sw);
-              }
-            }
             // Applying projection matrix to input RGB vectors.
             const float* p = input_data.data() + start_channel * kChannelSize;
             float* q = output_data.data() + start_channel * kChannelSize;
@@ -155,7 +124,9 @@ class AdjustHsvInYiqOp<CPUDevice> : public AdjustHsvInYiqOpBase {
               for (int q_index = 0; q_index < kChannelSize; q_index++) {
                 q[q_index] = 0;
                 for (int p_index = 0; p_index < kChannelSize; p_index++) {
-                  q[q_index] += m[q_index][p_index] * p[p_index];
+                  q[q_index] +=
+                      p[p_index] *
+                      tranformation_matrix[q_index + kChannelSize * p_index];
                 }
               }
               p += kChannelSize;
@@ -165,8 +136,33 @@ class AdjustHsvInYiqOp<CPUDevice> : public AdjustHsvInYiqOpBase {
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("AdjustHsvInYiq").Device(DEVICE_CPU),
-                        AdjustHsvInYiqOp<CPUDevice>);
+REGISTER_KERNEL_BUILDER(
+    Name("AdjustHsvInYiq").Device(DEVICE_CPU).TypeConstraint<float>("T"),
+    AdjustHsvInYiqOp<CPUDevice>);
+
+#if GOOGLE_CUDA
+template <>
+class AdjustHsvInYiqOp<GPUDevice> : public AdjustHsvInYiqOpBase {
+ public:
+  explicit AdjustHsvInYiqOp(OpKernelConstruction* context)
+      : AdjustHsvInYiqOpBase(context) {}
+
+  void DoCompute(OpKernelContext* ctx, const ComputeOptions& options) override {
+    const int64 number_of_elements = options.input->NumElements();
+    if (number_of_elements <= 0) {
+      return;
+    }
+    const float* delta_h = options.delta_h->flat<float>().data();
+    const float* scale_s = options.scale_s->flat<float>().data();
+    const float* scale_v = options.scale_v->flat<float>().data();
+    functor::AdjustHsvInYiqGPU()(ctx, options.channel_count, options.input,
+                                 delta_h, scale_s, scale_v, options.output);
+  }
+};
+
+REGISTER_KERNEL_BUILDER(
+    Name("AdjustHsvInYiq").Device(DEVICE_GPU).TypeConstraint<float>("T"),
+    AdjustHsvInYiqOp<GPUDevice>);
+#endif
 
-// TODO(huangyp): add the GPU kernel
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h
new file mode 100644
index 0000000000..194ae2ba47
--- /dev/null
+++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h
@@ -0,0 +1,87 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_IMAGE_KERNELS_ADJUST_HSV_IN_YIQ_OP_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_IMAGE_KERNELS_ADJUST_HSV_IN_YIQ_OP_H_
+
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif  // GOOGLE_CUDA
+
+#include <cmath>
+#include "third_party/eigen3/Eigen/Core"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+
+static constexpr int kChannelSize = 3;
+
+namespace internal {
+
+template <int MATRIX_SIZE>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void compute_tranformation_matrix(
+    const float delta_h, const float scale_s, const float scale_v,
+    float* matrix) {
+  static_assert(MATRIX_SIZE == kChannelSize * kChannelSize,
+                "Size of matrix should be 9.");
+  // Projection matrix from RGB to YIQ. Numbers from wikipedia
+  // https://en.wikipedia.org/wiki/YIQ
+  Eigen::Matrix3f yiq;
+  /* clang-format off */
+  yiq << 0.299, 0.587, 0.114,
+         0.596, -0.274, -0.322,
+         0.211, -0.523, 0.312;
+  Eigen::Matrix3f yiq_inverse;
+  yiq_inverse << 1, 0.95617069, 0.62143257,
+                 1, -0.2726886, -0.64681324,
+                 1, -1.103744, 1.70062309;
+  /* clang-format on */
+  // Construct hsv linear transformation matrix in YIQ space.
+  // https://beesbuzz.biz/code/hsv_color_transforms.php
+  float vsu = scale_v * scale_s * std::cos(delta_h);
+  float vsw = scale_v * scale_s * std::sin(delta_h);
+  Eigen::Matrix3f hsv_transform;
+  /* clang-format off */
+  hsv_transform << scale_v, 0, 0,
+                   0, vsu, -vsw,
+                   0, vsw, vsu;
+  /* clang-format on */
+  // Compute final transformation matrix = inverse_yiq * hsv_transform * yiq
+  Eigen::Map<Eigen::Matrix<float, 3, 3, Eigen::ColMajor>> eigen_matrix(matrix);
+  eigen_matrix = yiq_inverse * hsv_transform * yiq;
+}
+}  // namespace internal
+
+#if GOOGLE_CUDA
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
+struct AdjustHsvInYiqGPU {
+  void operator()(OpKernelContext* ctx, int channel_count,
+                  const Tensor* const input, const float* const delta_h,
+                  const float* const scale_s, const float* const scale_v,
+                  Tensor* const output);
+};
+
+}  // namespace functor
+
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_IMAGE_KERNELS_ADJUST_HSV_IN_YIQ_OP_H_
diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc
new file mode 100644
index 0000000000..b71ff9cd50
--- /dev/null
+++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc
@@ -0,0 +1,84 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h"
+#include "tensorflow/core/kernels/gpu_utils.h"
+#include "tensorflow/core/platform/stream_executor.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+
+namespace internal {
+
+__global__ void compute_tranformation_matrix_cuda(const float* const delta_h,
+                                                  const float* const scale_s,
+                                                  const float* const scale_v,
+                                                  float* const matrix,
+                                                  const int matrix_size) {
+  if (matrix_size == kChannelSize * kChannelSize) {
+    compute_tranformation_matrix<kChannelSize * kChannelSize>(
+        *delta_h, *scale_s, *scale_v, matrix);
+  }
+}
+}  // namespace internal
+
+namespace functor {
+
+void AdjustHsvInYiqGPU::operator()(OpKernelContext* ctx, int channel_count,
+                                   const Tensor* const input,
+                                   const float* const delta_h,
+                                   const float* const scale_s,
+                                   const float* const scale_v,
+                                   Tensor* const output) {
+  const uint64 m = channel_count;
+  const uint64 k = kChannelSize;
+  const uint64 n = kChannelSize;
+  auto* cu_stream = ctx->eigen_device<GPUDevice>().stream();
+  OP_REQUIRES(ctx, cu_stream, errors::Internal("No GPU stream available."));
+  Tensor tranformation_matrix;
+  OP_REQUIRES_OK(ctx, ctx->allocate_temp(
+                          DT_FLOAT, TensorShape({kChannelSize * kChannelSize}),
+                          &tranformation_matrix));
+  // TODO(huangyp): It takes about 3.5 us to comute tranformation_matrix
+  // with one thread. Improve its performance if necessary.
+  internal::compute_tranformation_matrix_cuda<<<1, 1, 0, cu_stream>>>(
+      delta_h, scale_s, scale_v, tranformation_matrix.flat<float>().data(),
+      tranformation_matrix.flat<float>().size());
+  // Call cuBlas C = A * B directly.
+  auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose;
+  auto a_ptr =
+      AsDeviceMemory(input->flat<float>().data(), input->flat<float>().size());
+  auto b_ptr = AsDeviceMemory(tranformation_matrix.flat<float>().data(),
+                              tranformation_matrix.flat<float>().size());
+  auto c_ptr = AsDeviceMemory(output->flat<float>().data(),
+                              output->flat<float>().size());
+  auto* stream = ctx->op_device_context()->stream();
+  OP_REQUIRES(ctx, stream, errors::Internal("No GPU stream available."));
+  // TODO(huangyp): share/use autotune cublas algorithms in Matmul.op.
+  bool blas_launch_status =
+      stream
+          ->ThenBlasGemm(no_transpose, no_transpose, n, m, k, 1.0f, b_ptr, n,
+                         a_ptr, k, 0.0f, &c_ptr, n)
+          .ok();
+  if (!blas_launch_status) {
+    ctx->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m,
+                                    ", n=", n, ", k=", k));
+  }
+}
+}  // namespace functor
+}  // namespace tensorflow
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_test.cc b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_test.cc
new file mode 100644
index 0000000000..4cbbd27784
--- /dev/null
+++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_test.cc
@@ -0,0 +1,48 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+
+class AdjustHsvInYiqOpTest : public OpsTestBase {
+ protected:
+};
+
+TEST_F(AdjustHsvInYiqOpTest, IdentiyTransformMatrix) {
+  Tensor matrix(allocator(), DT_FLOAT, TensorShape({9}));
+  internal::compute_tranformation_matrix<9>(0.0, 1.0, 1.0,
+                                            matrix.flat<float>().data());
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({9}));
+  test::FillValues<float>(&expected, {1, 0, 0, 0, 1, 0, 0, 0, 1});
+  test::ExpectClose(matrix, expected);
+}
+
+TEST_F(AdjustHsvInYiqOpTest, ScaleValueTransformMatrix) {
+  float scale_v = 2.3;
+  Tensor matrix(allocator(), DT_FLOAT, TensorShape({9}));
+  internal::compute_tranformation_matrix<9>(0.0, 1.0, scale_v,
+                                            matrix.flat<float>().data());
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({9}));
+  test::FillValues<float>(&expected,
+                          {scale_v, 0, 0, 0, scale_v, 0, 0, 0, scale_v});
+  test::ExpectClose(matrix, expected);
+}
+
+}  // end namespace tensorflow
diff --git a/tensorflow/contrib/image/python/kernel_tests/distort_image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/distort_image_ops_test.py
index b85f19d29b..a495b58b7f 100644
--- a/tensorflow/contrib/image/python/kernel_tests/distort_image_ops_test.py
+++ b/tensorflow/contrib/image/python/kernel_tests/distort_image_ops_test.py
@@ -172,7 +172,7 @@ class AdjustValueInYiqTest(test_util.TensorFlowTestCase):
           raise AssertionError('Invalid test style: %s' % (test_style))
         y_np = self._adjust_value_in_yiq_np(x_np, scale)
         y_tf = self._adjust_value_in_yiq_tf(x_np, scale)
-        self.assertAllClose(y_tf, y_np, rtol=2e-5, atol=1e-5)
+        self.assertAllClose(y_tf, y_np, rtol=2e-4, atol=1e-4)
 
   def test_invalid_shapes(self):
     x_np = np.random.rand(2, 3) * 255.
@@ -237,7 +237,7 @@ class AdjustSaturationInYiqTest(test_util.TensorFlowTestCase):
             raise AssertionError('Invalid test style: %s' % (test_style))
           y_baseline = self._adjust_saturation_in_yiq_np(x_np, scale)
           y_tf = self._adjust_saturation_in_yiq_tf(x_np, scale)
-          self.assertAllClose(y_tf, y_baseline, rtol=2e-5, atol=1e-5)
+          self.assertAllClose(y_tf, y_baseline, rtol=2e-4, atol=1e-4)
 
   def test_invalid_shapes(self):
     x_np = np.random.rand(2, 3) * 255.
@@ -291,6 +291,9 @@ class AdjustHueInYiqBenchmark(test.Benchmark):
   def benchmark_adjust_hue_in_yiqCpuAll(self):
     self._benchmark_adjust_hue_in_yiq('/cpu:0', None)
 
+  def benchmark_adjust_hue_in_yiq_gpu_all(self):
+    self._benchmark_adjust_hue_in_yiq(test.gpu_device_name(), None)
+
 
 class AdjustSaturationInYiqBenchmark(test.Benchmark):
 
@@ -333,6 +336,9 @@ class AdjustSaturationInYiqBenchmark(test.Benchmark):
   def benchmark_adjust_saturation_in_yiq_cpu_all(self):
     self._benchmark_adjust_saturation_in_yiq('/cpu:0', None)
 
+  def benchmark_adjust_saturation_in_yiq_gpu_all(self):
+    self._benchmark_adjust_saturation_in_yiq(test.gpu_device_name(), None)
+
 
 if __name__ == '__main__':
   googletest.main()
-- 
GitLab


From 0946c24012c9e26670d2e29679df13bf6f002fa8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 15:27:01 -0800
Subject: [PATCH 0611/1225] Add a single capacity prefetch to
 `tf.contrib.data.read_batch_features`.

PiperOrigin-RevId: 177877751
---
 tensorflow/contrib/data/python/ops/readers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index acb7a43211..347e5edc7b 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -179,6 +179,7 @@ def read_batch_features(file_pattern,
     dataset = dataset.shuffle(capacity)
   dataset = dataset.batch(batch_size)
   dataset = dataset.map(lambda x: parsing_ops.parse_example(x, features))
+  dataset = dataset.prefetch(1)
   iterator = dataset.make_one_shot_iterator()
   outputs = iterator.get_next()
   return outputs
-- 
GitLab


From c51221e358e1ba7fdf3f47919cdddcdd53816fe9 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 4 Dec 2017 15:34:18 -0800
Subject: [PATCH 0612/1225] [XLA] Add a default implementation of
 Literal::ToString for rank >= 6 tensors.

PiperOrigin-RevId: 177878887
---
 tensorflow/compiler/xla/literal_util.cc | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc
index 250df5f4d5..42c9d21149 100644
--- a/tensorflow/compiler/xla/literal_util.cc
+++ b/tensorflow/compiler/xla/literal_util.cc
@@ -715,7 +715,13 @@ string Literal::ToString(bool print_layout) const {
     pieces.push_back("}");
   } else {
     pieces.push_back(shape_to_string(shape()));
-    pieces.push_back(" {...}");
+    pieces.push_back(" {");
+    EachCellAsString(
+        [&](tensorflow::gtl::ArraySlice<int64> indices, const string& value) {
+          pieces.push_back(" ");
+          pieces.push_back(value);
+        });
+    pieces.push_back("}");
   }
 
   return tensorflow::str_util::Join(pieces, "");
-- 
GitLab


From eaf51240fa648451766ac0fbabe16619dc48489b Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 4 Dec 2017 15:40:15 -0800
Subject: [PATCH 0613/1225] [XLA:GPU] Use more threads per thread block.

Before this change, we supported two algorithms for choosing the number
of threads per block:

 * "optimize-for-latency" algorithm assumed that each thread would want
   the maximum number of registers it could have, and chose a block size
   small enough to accommodate this.
 * "optimize-for-throughput" algorithm packed as many threads into a
   block as possible.

In practice we always chose the optimize-for-latency algorithm.

This change removes the choice of algorithm and changes us to
unconditionally use a new one.  In our new algorithm, we choose the
smallest block size that still has the potential to allow the GPU to
reach maximum occupancy.

When each thread's register usage is small, we can pack many of these
blocks into one SM and hit maximum occupancy.  When the threads'
register usage is larger, we degrade gracefully (unlike with larger
block sizes, where the occupancy degredation is more quantized).

On our benchmarks, this is a moderate (0-10%) speedup on K40, and a
large (10-25%) speedup on P100.

PiperOrigin-RevId: 177879741
---
 .../xla/service/gpu/partition_assignment.cc   | 58 ++++++++++---------
 .../xla/service/gpu/partition_assignment.h    | 11 +---
 2 files changed, 31 insertions(+), 38 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
index d0d2deee24..6cf280df05 100644
--- a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
@@ -44,37 +44,41 @@ std::ostream& operator<<(std::ostream& out,
 
 // Calculates the launch dimensions used to invoke `hlo`.
 LaunchDimensions CalculateLaunchDimensions(
-    const Shape& shape, const se::DeviceDescription& device_desc,
-    PartitionStrategy partition_strategy) {
-  int64 warp_size = device_desc.threads_per_warp();
-
+    const Shape& shape, const se::DeviceDescription& device_desc) {
   int64 num_elements = ShapeUtil::ElementsIn(shape);
   if (num_elements <= 1) {
     return LaunchDimensions();
   }
 
-  // Calculate the number of threads per block.
-  // Initialize threads_per_block as the threads-per-block limit.
-  int64 threads_per_block = device_desc.threads_per_block_limit();
-  VLOG(2) << "Initial # of threads per block = " << threads_per_block;
-
-  if (partition_strategy == PartitionStrategy::kLatency) {
-    // Limit the thread count to allow maximum number of registers per thread.
-    // TODO(b/28560520): We don't have to assume the emitted kernel will use up
-    // all the registers. We could use ptxas to examine the actual number of
-    // register used, and set the thread count accordingly.
-    int64 threads_per_block_limit_due_to_registers =
-        device_desc.registers_per_core_limit() /
-        device_desc.registers_per_thread_limit();
-    CHECK_NE(0, threads_per_block_limit_due_to_registers);
-    if (threads_per_block_limit_due_to_registers < threads_per_block) {
-      threads_per_block =
-          // Make `threads_per_block` a multiple of warp size to use GPU
-          // efficiently.
-          warp_size *
-          std::max(1LL, threads_per_block_limit_due_to_registers / warp_size);
-      VLOG(2) << "Update # of threads per block due to register pressure = "
-              << threads_per_block;
+  // Since we don't do any inter-warp communication, we're free to choose any
+  // block size we want, subject to hardware constraints.  We choose the
+  // smallest block size that allows the GPU to reach full occupancy (assuming
+  // the kernel uses sufficiently few registers).  This gives us max performance
+  // when the kernel uses few registers, and lets us scale down gracefully as
+  // the kernel uses more registers.
+  //
+  // Specifically, we choose the number of threads per block such that
+  //
+  //   <num threads per block> * <max blocks per core> = <max threads per core>
+
+  auto threads_per_core = device_desc.threads_per_core_limit();
+  auto blocks_per_core = device_desc.blocks_per_core_limit();
+  int64 threads_per_block;
+  if (threads_per_core != 0 && blocks_per_core != 0) {
+    threads_per_block = device_desc.threads_per_core_limit() /
+                        device_desc.blocks_per_core_limit();
+  } else {
+    static std::atomic<int64> log_count{0};
+    if (log_count.fetch_add(1) < 8) {
+      LOG(WARNING) << "Attempting to calculate launch dimensions for GPU "
+                      "without full information about its capabilities.  "
+                      "StreamExecutor's PopulateDeviceDescription should be "
+                      "updated for this device.";
+    }
+    threads_per_block = device_desc.threads_per_warp();
+    if (threads_per_block == 0) {
+      // Fall back to *something* if we can't even get num threads per warp.
+      threads_per_block = 32;
     }
   }
 
@@ -84,8 +88,6 @@ LaunchDimensions CalculateLaunchDimensions(
             << threads_per_block << ") because the latter is smaller.";
   }
 
-  // Calculate the block count. We copy the strategy used by Eigen:
-  // eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
   int64 block_count = CeilOfRatio(num_elements, threads_per_block);
   VLOG(2) << tensorflow::strings::Printf(
       "Initialized the block count to ceil(# of elements / threads per "
diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.h b/tensorflow/compiler/xla/service/gpu/partition_assignment.h
index 8f7fce884a..0bf463a6ef 100644
--- a/tensorflow/compiler/xla/service/gpu/partition_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.h
@@ -30,14 +30,6 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
-enum class PartitionStrategy {
-  // Optimized for latency by allowing maximum number of registers per thread.
-  kLatency,
-  // Optimized for throughput. This may limit registers per thread and cause
-  // longer latency.
-  kThroughput
-};
-
 // Encapsulates the launch dimensions of a kernel, e.g., the block count and the
 // number of threads per block.
 class LaunchDimensions {
@@ -66,8 +58,7 @@ std::ostream& operator<<(std::ostream& out,
 
 LaunchDimensions CalculateLaunchDimensions(
     const Shape& shape,
-    const perftools::gputools::DeviceDescription& device_desc,
-    PartitionStrategy partition_strategy = PartitionStrategy::kLatency);
+    const perftools::gputools::DeviceDescription& device_desc);
 
 }  // namespace gpu
 }  // namespace xla
-- 
GitLab


From aa8b75a1b06b612a3abeaa2f164bc3d5a9eaf5ec Mon Sep 17 00:00:00 2001
From: Jingyue Wu <jingyue@google.com>
Date: Mon, 4 Dec 2017 16:11:16 -0800
Subject: [PATCH 0614/1225] Fix minor typos in the doc of SpaceToDepth and
 DepthToSpace.

PiperOrigin-RevId: 177884096
---
 tensorflow/core/api_def/base_api/api_def_DepthToSpace.pbtxt | 2 +-
 tensorflow/core/api_def/base_api/api_def_SpaceToDepth.pbtxt | 2 +-
 tensorflow/core/ops/array_ops.cc                            | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_DepthToSpace.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthToSpace.pbtxt
index e7a18cd6b4..d20b47a3ed 100644
--- a/tensorflow/core/api_def/base_api/api_def_DepthToSpace.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DepthToSpace.pbtxt
@@ -28,7 +28,7 @@ with the following options:
   "NHWC": `[ batch, height, width, channels ]`
   "NCHW": `[ batch, channels, height, width ]`
   "NCHW_VECT_C":
-      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
+      `qint8 [ batch, channels / 4, height, width, 4 ]`
 
 It is useful to consider the operation as transforming a 6-D Tensor.
 e.g. for data_format = NHWC,
diff --git a/tensorflow/core/api_def/base_api/api_def_SpaceToDepth.pbtxt b/tensorflow/core/api_def/base_api/api_def_SpaceToDepth.pbtxt
index 8fd3966f70..b808ff5f9c 100644
--- a/tensorflow/core/api_def/base_api/api_def_SpaceToDepth.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SpaceToDepth.pbtxt
@@ -25,7 +25,7 @@ with the following options:
   "NHWC": `[ batch, height, width, channels ]`
   "NCHW": `[ batch, channels, height, width ]`
   "NCHW_VECT_C":
-      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
+      `qint8 [ batch, channels / 4, height, width, 4 ]`
 
 It is useful to consider the operation as transforming a 6-D Tensor.
 e.g. for data_format = NHWC,
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 36d27ea110..1fbd123515 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -4245,7 +4245,7 @@ with the following options:
   "NHWC": `[ batch, height, width, channels ]`
   "NCHW": `[ batch, channels, height, width ]`
   "NCHW_VECT_C":
-      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
+      `qint8 [ batch, channels / 4, height, width, 4 ]`
 
 It is useful to consider the operation as transforming a 6-D Tensor.
 e.g. for data_format = NHWC,
@@ -4389,7 +4389,7 @@ with the following options:
   "NHWC": `[ batch, height, width, channels ]`
   "NCHW": `[ batch, channels, height, width ]`
   "NCHW_VECT_C":
-      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
+      `qint8 [ batch, channels / 4, height, width, 4 ]`
 
 It is useful to consider the operation as transforming a 6-D Tensor.
 e.g. for data_format = NHWC,
-- 
GitLab


From a5b01ffa570e9c2cf4e5989b2b2a13bc406e9896 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 16:26:35 -0800
Subject: [PATCH 0615/1225] Correct trivial spelling error in
 internal_convert_to_tensor

PiperOrigin-RevId: 177886163
---
 tensorflow/python/framework/ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 52c1c7d26c..61dd435106 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -965,7 +965,7 @@ def internal_convert_to_tensor(value,
     # Fast path for EagerTensors that don't need any conversion.
     if isinstance(value, EagerTensor):
       # Note that we don't check that value's dtype matches the dtype
-      # argument.  We exepct that the C runtime will do that checking
+      # argument.  We expect that the C runtime will do that checking
       # when we execute the kernel.
       return value
 
-- 
GitLab


From 868e2b344b3e7b2e6b069d5c6ec21d73959352c8 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 4 Dec 2017 16:54:58 -0800
Subject: [PATCH 0616/1225] [XLA] Add --print_result flag to replay_computation
 tool.

Before, we assumed that if you passed --use_fake_data, you didn't care
about the output of the computation.  With this patch, we decouple the
decision of using fake data from the decision of whether or not to print
the results.

PiperOrigin-RevId: 177889877
---
 .../compiler/xla/tools/replay_computation.cc  | 59 ++++++++++---------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc
index ec3f6a0471..a7dc586205 100644
--- a/tensorflow/compiler/xla/tools/replay_computation.cc
+++ b/tensorflow/compiler/xla/tools/replay_computation.cc
@@ -59,19 +59,26 @@ namespace xla {
 namespace tools {
 namespace {
 
+// Command-line opts to this tool.  See main() for descriptions of these
+// fields.
+struct Options {
+  string fake_infeed_shape;
+  bool use_fake_data = false;
+  bool print_result = true;
+  int num_runs = 1;
+};
+
 // Invokes the given computation passing arbitrary data for every (unbound)
 // parameter if use_fake_data, Otherwise use recorded data if available.
 //
 // Similarly, infeeds fake data of shape fake_infeed_shape if it is provided;
 // otherwise, no infeed is performed.
 StatusOr<std::unique_ptr<Literal>> ReplayComputation(
-    const SessionModule& module, int num_runs,
-    tensorflow::StringPiece fake_infeed_shape, bool use_fake_data,
-    Client* client) {
+    const SessionModule& module, Client* client, const Options& opts) {
   TF_ASSIGN_OR_RETURN(Computation computation, client->LoadSnapshot(module));
 
   std::vector<std::unique_ptr<GlobalData>> arguments;
-  if (use_fake_data) {
+  if (opts.use_fake_data) {
     arguments = MakeFakeArgumentsOrDie(computation, client);
   } else {  // use recorded data if available
     for (const auto& proto : module.arguments()) {
@@ -86,12 +93,12 @@ StatusOr<std::unique_ptr<Literal>> ReplayComputation(
   // concurrent infeed occur via the fake_infeed_shape.
   tensorflow::gtl::optional<tensorflow::thread::ThreadPool> pool;
 
-  if (!fake_infeed_shape.empty()) {
+  if (!opts.fake_infeed_shape.empty()) {
     pool.emplace(tensorflow::Env::Default(), "infeed",
                  /*num_threads=*/1);
-    pool->Schedule([fake_infeed_shape, client]() {
+    pool->Schedule([opts, client]() {
       StatusOr<Shape> shape_status =
-          ShapeUtil::ParseShapeString(fake_infeed_shape);
+          ShapeUtil::ParseShapeString(opts.fake_infeed_shape);
       TF_CHECK_OK(shape_status.status());
       Shape shape = std::move(shape_status).ValueOrDie();
       StatusOr<std::unique_ptr<Literal>> data_status = MakeFakeLiteral(shape);
@@ -112,19 +119,19 @@ StatusOr<std::unique_ptr<Literal>> ReplayComputation(
   // Run the computation num_runs times, and return the result from the last
   // execution.
   std::unique_ptr<Literal> result;
-  for (int i = 0; i < num_runs; ++i) {
+  for (int i = 0; i < opts.num_runs; ++i) {
     ExecutionProfile profile;
-    if (use_fake_data) {
-      // If using fake data, execute the computation but don't bother retrieving
-      // the result -- presumably it's uninteresting, since our data is fake.
+    if (opts.print_result) {
+      TF_ASSIGN_OR_RETURN(result, client->ExecuteAndTransfer(
+                                      computation, execute_arguments,
+                                      /*execution_options=*/nullptr, &profile));
+    } else {
+      // If we're not printing the result, execute the computation but don't
+      // bother retrieving the result.  This can be a significant speedup.
       TF_RETURN_IF_ERROR(client
                              ->Execute(computation, execute_arguments,
                                        /*execution_options=*/nullptr, &profile)
                              .status());
-    } else {
-      TF_ASSIGN_OR_RETURN(result, client->ExecuteAndTransfer(
-                                      computation, execute_arguments,
-                                      /*execution_options=*/nullptr, &profile));
     }
     LOG(INFO) << "Execution took "
               << static_cast<double>(profile.compute_time_ns()) / 1e9 << "s";
@@ -133,16 +140,15 @@ StatusOr<std::unique_ptr<Literal>> ReplayComputation(
   return std::move(result);
 }
 
-int RealMain(tensorflow::gtl::ArraySlice<char*> args, int num_runs,
-             tensorflow::StringPiece fake_infeed_shape, bool use_fake_data) {
+int RealMain(tensorflow::gtl::ArraySlice<char*> args, const Options& opts) {
   Client* client = ClientLibrary::LocalClientOrDie();
   tensorflow::Env* env = tensorflow::Env::Default();
   int exit_status = EXIT_SUCCESS;
   for (char* arg : args) {
     SessionModule module;
     TF_CHECK_OK(tensorflow::ReadBinaryProto(env, arg, &module));
-    StatusOr<std::unique_ptr<Literal>> result_status = ReplayComputation(
-        module, num_runs, fake_infeed_shape, use_fake_data, client);
+    StatusOr<std::unique_ptr<Literal>> result_status =
+        ReplayComputation(module, client, opts);
     if (!result_status.ok()) {
       fprintf(stderr, "%s: error: %s\n", arg,
               result_status.status().ToString().c_str());
@@ -170,16 +176,15 @@ int RealMain(tensorflow::gtl::ArraySlice<char*> args, int num_runs,
 }  // namespace xla
 
 int main(int argc, char** argv) {
-  // Flags
-  xla::string fake_infeed_shape;
-  bool use_fake_data = false;
-  int num_runs = 1;
+  xla::tools::Options opts;
   const std::vector<tensorflow::Flag> flag_list = {
-      tensorflow::Flag("use_fake_data", &use_fake_data,
+      tensorflow::Flag("use_fake_data", &opts.use_fake_data,
                        "Replay computation using fake data"),
-      tensorflow::Flag("num_runs", &num_runs,
+      tensorflow::Flag("print_result", &opts.print_result,
+                       "Print the result of the computation to stdout"),
+      tensorflow::Flag("num_runs", &opts.num_runs,
                        "Number of times to run each computation"),
-      tensorflow::Flag("fake_infeed_shape", &fake_infeed_shape,
+      tensorflow::Flag("fake_infeed_shape", &opts.fake_infeed_shape,
                        "Shape of fake data to construct for (infinite) infeed"),
   };
   xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
@@ -191,5 +196,5 @@ int main(int argc, char** argv) {
 
   tensorflow::gtl::ArraySlice<char*> args(argv, argc);
   args.pop_front();  // Pop off the binary name, argv[0]
-  return xla::tools::RealMain(args, num_runs, fake_infeed_shape, use_fake_data);
+  return xla::tools::RealMain(args, opts);
 }
-- 
GitLab


From 129892420278367aa774400455396e4e4d0734ba Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 4 Dec 2017 16:58:49 -0800
Subject: [PATCH 0617/1225] Fix bug with uniquified colocation attrs in
 ImportGraphDef.

The colocation attrs must be updated after all NodeDefs have been
processed. The nodes are processed and uniquified in topological
order, which allows us to update the inputs simultaneously due to the
topological ordering, but this doesn't work for the colocation groups.

I also considered updating all the NodeDefs with prefixes or unique
names at the very beginning, before starting conversion. This would
make the logic simpler, but require us to potentially keep a full copy
of all the NodeDefs in memory (so we could edit them), so I decided to
edit in-place after construction. We might want to consider this
alternate in future though.

PiperOrigin-RevId: 177890362
---
 tensorflow/core/graph/graph_constructor.cc    | 38 +++++++++----
 .../core/graph/graph_constructor_test.cc      | 55 +++++++++++++++++--
 2 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index 63e3d5ee7d..0fb61fd9af 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -159,6 +159,7 @@ class GraphConstructor {
     TF_RETURN_IF_ERROR(UpdateVersionDef());
     TF_RETURN_IF_ERROR(PopulateReturnTensors());
     TF_RETURN_IF_ERROR(PopulateReturnNodes());
+    UpdateUniquifiedColocationNames();
     FixupSourceAndSinkEdges(g_);
     return Status::OK();
   }
@@ -201,6 +202,11 @@ class GraphConstructor {
   void UniquifyNames(const std::vector<bool>& input_already_exists,
                      NodeDef* node_def);
 
+  // Updates any constructed nodes' colocation group names if the name has been
+  // updated by UniquifyNames. This is called after all the nodes have been
+  // constructed so all the names have been uniquified if necessary.
+  void UpdateUniquifiedColocationNames();
+
   // Returns true if `name` already exists in `g_` (either as a node name or
   // prefix).
   bool NameExistsInGraph(StringPiece name);
@@ -785,18 +791,30 @@ void GraphConstructor::UniquifyNames(
     id.first = iter->second;
     node_def->set_input(i, id.ToString());
   }
-  // Update names of colocation groups
-  if (node_def->attr().find(kColocationAttrName) != node_def->attr().end()) {
-    auto* list =
-        node_def->mutable_attr()->at(kColocationAttrName).mutable_list();
-    for (int i = 0; i < list->s_size(); ++i) {
-      StringPiece v(list->s(i));
-      if (v.Consume(kColocationGroupPrefix)) {
-        auto iter = uniquified_names_.find(v.ToString());
-        if (iter == uniquified_names_.end()) continue;
-        list->set_s(i, strings::StrCat(kColocationGroupPrefix, iter->second));
+}
+
+void GraphConstructor::UpdateUniquifiedColocationNames() {
+  for (const auto& pair : gdef_nodes_) {
+    Node* node = pair.second.node;
+    if (node == nullptr) continue;
+    std::vector<string> coloc_values;
+    Status status =
+        GetNodeAttr(node->attrs(), kColocationAttrName, &coloc_values);
+    if (!status.ok()) continue;
+    bool updated = false;
+    for (int i = 0; i < coloc_values.size(); ++i) {
+      StringPiece val(coloc_values[i]);
+      if (val.Consume(kColocationGroupPrefix)) {
+        const auto& name_pair = uniquified_names_.find(val.ToString());
+        if (name_pair == uniquified_names_.end()) continue;
+        updated = true;
+        coloc_values[i] =
+            strings::StrCat(kColocationGroupPrefix, name_pair->second);
       }
     }
+    if (updated) {
+      node->AddAttr(kColocationAttrName, coloc_values);
+    }
   }
 }
 
diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc
index 479f07f7f6..83aba6c9be 100644
--- a/tensorflow/core/graph/graph_constructor_test.cc
+++ b/tensorflow/core/graph/graph_constructor_test.cc
@@ -1898,13 +1898,22 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
   EXPECT_EQ(results.return_nodes[0]->name(), "A_5");
   EXPECT_EQ(results.return_nodes[1]->name(), "B_5");
   EXPECT_EQ(results.return_nodes[1]->def().input(0), "A:0");
+}
+
+TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames_ColocationGroups) {
+  ShapeRefiner refiner(TF_GRAPH_DEF_VERSION, graph_.op_registry());
+
+  // Create nodes 'A' and 'b"
+  ExpectOK(
+      "node { name: 'A' op: 'TestInput' }"
+      "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A'] }");
 
   // Check that colocation groups are updated
-  opts = ImportGraphDefOptions();
+  ImportGraphDefOptions opts;
   opts.uniquify_names = true;
   opts.return_nodes.push_back("A");
   opts.return_nodes.push_back("B");
-  results = ImportGraphDefResults();
+  ImportGraphDefResults results;
   ExpectOK(
       "node { name: 'A' op: 'TestInput' }"
       "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A:0'] "
@@ -1912,14 +1921,48 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
       opts, &refiner, &results);
 
   ASSERT_EQ(results.return_nodes.size(), 2);
-  EXPECT_EQ(results.return_nodes[0]->name(), "A_6");
-  EXPECT_EQ(results.return_nodes[1]->name(), "B_6");
-  EXPECT_EQ(results.return_nodes[1]->def().input(0), "A_6:0");
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_1");
+  EXPECT_EQ(results.return_nodes[1]->name(), "B_1");
   const AttrValue* class_attr =
       results.return_nodes[1]->attrs().Find(kColocationAttrName);
   ASSERT_TRUE(class_attr != nullptr);
   ASSERT_EQ(class_attr->list().s_size(), 1);
-  EXPECT_EQ(class_attr->list().s(0), "loc:@A_6");
+  EXPECT_EQ(class_attr->list().s(0), "loc:@A_1");
+
+  results = ImportGraphDefResults();
+  ExpectOK(
+      "node { name: 'A' op: 'TestInput' "
+      "       attr { key: '_class' value { list { s:'loc:@B' } } } }"
+      "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A:0'] }",
+      opts, &refiner, &results);
+
+  ASSERT_EQ(results.return_nodes.size(), 2);
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_2");
+  EXPECT_EQ(results.return_nodes[1]->name(), "B_2");
+  class_attr = results.return_nodes[0]->attrs().Find(kColocationAttrName);
+  ASSERT_TRUE(class_attr != nullptr);
+  ASSERT_EQ(class_attr->list().s_size(), 1);
+  EXPECT_EQ(class_attr->list().s(0), "loc:@B_2");
+
+  results = ImportGraphDefResults();
+  ExpectOK(
+      "node { name: 'A' op: 'TestInput' "
+      "       attr { key: '_class' value { list { s:'loc:@B' } } } }"
+      "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A:0'] "
+      "       attr { key: '_class' value { list { s:'loc:@B' } } } }",
+      opts, &refiner, &results);
+
+  ASSERT_EQ(results.return_nodes.size(), 2);
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_3");
+  EXPECT_EQ(results.return_nodes[1]->name(), "B_3");
+  class_attr = results.return_nodes[0]->attrs().Find(kColocationAttrName);
+  ASSERT_TRUE(class_attr != nullptr);
+  ASSERT_EQ(class_attr->list().s_size(), 1);
+  EXPECT_EQ(class_attr->list().s(0), "loc:@B_3");
+  class_attr = results.return_nodes[1]->attrs().Find(kColocationAttrName);
+  ASSERT_TRUE(class_attr != nullptr);
+  ASSERT_EQ(class_attr->list().s_size(), 1);
+  EXPECT_EQ(class_attr->list().s(0), "loc:@B_3");
 }
 
 TEST_F(GraphConstructorTest, ImportGraphDef_WithCycle) {
-- 
GitLab


From c414e2646f2e4c1ec9d67fa5ed06b91c2585e298 Mon Sep 17 00:00:00 2001
From: Yuanzhong Xu <yuanzx@google.com>
Date: Mon, 4 Dec 2017 17:02:23 -0800
Subject: [PATCH 0618/1225] Add BF16 tests for reduce-window.

PiperOrigin-RevId: 177890892
---
 .../xla/tests/client_library_test_base.cc     |   8 +
 .../xla/tests/client_library_test_base.h      |  18 +
 .../compiler/xla/tests/reduce_window_test.cc  | 607 +++++++++---------
 3 files changed, 329 insertions(+), 304 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc
index 15bd273e9b..bbd6a87ca3 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.cc
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc
@@ -512,4 +512,12 @@ ClientLibraryTestBase::CreateParameterAndTransferLiteral(
   return data;
 }
 
+ComputationDataHandle ClientLibraryTestBase::CreateConstantFromLiteral(
+    const Literal& literal, ComputationBuilder* builder) {
+  return builder->ConstantLiteral(
+      use_bfloat16_ && literal.shape().element_type() == F32
+          ? *LiteralTestUtil::ConvertF32ToBF16(literal)
+          : literal);
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h
index d8fe12a72d..d5f9ec858e 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.h
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.h
@@ -253,6 +253,21 @@ class ClientLibraryTestBase : public ::testing::Test {
       int64 parameter_number, const Literal& literal, const string& name,
       ComputationBuilder* builder, ComputationDataHandle* data_handle);
 
+  // Creates a constant instruction with the given literal. When the
+  // use_bfloat16 flag is set but the literal has F32 elements, the elements
+  // will be converted to BF16s.
+  ComputationDataHandle CreateConstantFromLiteral(const Literal& literal,
+                                                  ComputationBuilder* builder);
+
+  // Creates a constant instruction with the given array. When the use_bfloat16
+  // flag is set but the array has float elements, the elements will be
+  // converted to bfloat16s.
+  template <typename NativeT>
+  ComputationDataHandle CreateConstantFromArray(const Array<NativeT>& array,
+                                                ComputationBuilder* builder) {
+    return CreateConstantFromLiteral(*Literal::CreateFromArray(array), builder);
+  }
+
   // Creates a parameter instruction that wraps a given value and then stores
   // into "data_handle" the global handle for that parameter.
   //
@@ -315,6 +330,9 @@ class ClientLibraryTestBase : public ::testing::Test {
   bool use_bfloat16() const { return use_bfloat16_; }
   void set_use_bfloat16(bool value) { use_bfloat16_ = value; }
 
+  // The float type used in this test, BF16 or F32 according to use_bfloat16.
+  PrimitiveType FloatType() const { return use_bfloat16_ ? BF16 : F32; }
+
   Client* client_;
   ExecutionOptions execution_options_;
 
diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index aa035f0ba5..fd73a82093 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -41,16 +41,42 @@ limitations under the License.
 namespace xla {
 namespace {
 
-class ReduceWindowTest : public ClientLibraryTestBase {
+// We can remove the GPU define here once we have complete GPU support in place.
+#if defined(XLA_TEST_BACKEND_CPU) || defined(XLA_TEST_BACKEND_CPU_PARALLEL) || \
+    defined(XLA_TEST_BACKEND_GPU)
+// Only tests F32.
+static std::array<bool, 1> use_bfloat16_params{false};
+#else
+// Tests both F32 and BF16.
+static std::array<bool, 2> use_bfloat16_params{false, true};
+#endif
+
+class ReduceWindowTestBase : public ClientLibraryTestBase {
  public:
-  ReduceWindowTest() : builder_(client_, TestName()) {}
+  ErrorSpec DefaultErrorSpec() const {
+    if (use_bfloat16()) {
+      return ErrorSpec(1e-1, 3e-2);
+    } else {
+      return ErrorSpec(1e-3, 1e-3);
+    }
+  }
+};
+
+class ReduceWindowTest : public ::testing::WithParamInterface<bool>,
+                         public ReduceWindowTestBase {
+ public:
+  ReduceWindowTest() : builder_(client_, TestName()) {
+    set_use_bfloat16(GetParam());
+  }
 
   void ReduceWindowAdd(const ComputationDataHandle& input,
                        tensorflow::gtl::ArraySlice<int64> window_dimensions,
                        tensorflow::gtl::ArraySlice<int64> window_strides,
                        Padding padding) {
-    builder_.ReduceWindow(input, builder_.ConstantR0<float>(0.0f),
-                          CreateScalarAddComputation(F32, &builder_),
+    auto init =
+        CreateConstantFromLiteral(*Literal::CreateR0<float>(0.0f), &builder_);
+    builder_.ReduceWindow(input, init,
+                          CreateScalarAddComputation(FloatType(), &builder_),
                           window_dimensions, window_strides, padding);
   }
 
@@ -58,30 +84,32 @@ class ReduceWindowTest : public ClientLibraryTestBase {
                        tensorflow::gtl::ArraySlice<int64> window_dimensions,
                        tensorflow::gtl::ArraySlice<int64> window_strides,
                        Padding padding) {
-    builder_.ReduceWindow(
-        input, builder_.ConstantLiteral(Literal::MinValue(F32)),
-        CreateScalarMax(), window_dimensions, window_strides, padding);
+    auto init = CreateConstantFromLiteral(Literal::MinValue(F32), &builder_);
+    builder_.ReduceWindow(input, init, CreateScalarMax(), window_dimensions,
+                          window_strides, padding);
   }
 
   void ReduceWindowMin(const ComputationDataHandle& input,
                        tensorflow::gtl::ArraySlice<int64> window_dimensions,
                        tensorflow::gtl::ArraySlice<int64> window_strides,
                        Padding padding) {
-    builder_.ReduceWindow(input,
-                          builder_.ConstantLiteral(Literal::MaxValue(F32)),
-                          CreateScalarMinComputation(F32, &builder_),
+    auto init = CreateConstantFromLiteral(Literal::MaxValue(F32), &builder_);
+    builder_.ReduceWindow(input, init,
+                          CreateScalarMinComputation(FloatType(), &builder_),
                           window_dimensions, window_strides, padding);
   }
 
   ComputationBuilder builder_;
 };
 
-TEST_F(ReduceWindowTest, MismatchedRanksGivesErrorStatus) {
-  const auto input = builder_.ConstantR1<float>({1, 1, 1, 1});
-  const auto init_value = builder_.ConstantR0<float>(0);
+TEST_P(ReduceWindowTest, MismatchedRanksGivesErrorStatus) {
+  const auto input = CreateConstantFromLiteral(
+      *Literal::CreateR1<float>({1, 1, 1, 1}), &builder_);
+  const auto init_value =
+      CreateConstantFromLiteral(*Literal::CreateR0<float>(0), &builder_);
   TF_ASSERT_OK(builder_.first_error());
   builder_.ReduceWindow(input, init_value,
-                        CreateScalarAddComputation(F32, &builder_),
+                        CreateScalarAddComputation(FloatType(), &builder_),
                         /*window_dimensions=*/{1, 2},
                         /*window_strides=*/{1}, Padding::kValid);
   ASSERT_EQ(builder_.first_error().code(), tensorflow::error::INVALID_ARGUMENT)
@@ -91,88 +119,96 @@ TEST_F(ReduceWindowTest, MismatchedRanksGivesErrorStatus) {
 }
 
 // Regression test for b/68964348.
-TEST_F(ReduceWindowTest, R0ReduceWindow) {
-  auto input = builder_.ConstantR0<float>(42);
-  auto init = builder_.ConstantR0<float>(1.0);
-  builder_.ReduceWindow(input, init, CreateScalarAddComputation(F32, &builder_),
+TEST_P(ReduceWindowTest, R0ReduceWindow) {
+  const auto input =
+      CreateConstantFromLiteral(*Literal::CreateR0<float>(42.0), &builder_);
+  const auto init =
+      CreateConstantFromLiteral(*Literal::CreateR0<float>(1.0), &builder_);
+  builder_.ReduceWindow(input, init,
+                        CreateScalarAddComputation(FloatType(), &builder_),
                         /*window_dimensions=*/{},
                         /*window_strides=*/{}, Padding::kSame);
-  ComputeAndCompareR0<float>(&builder_, 43, {}, ErrorSpec(0.00001));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateR0<float>(43.0), {},
+                           ErrorSpec(0.00001));
 }
 
-TEST_F(ReduceWindowTest, Min3In5Stride2) {
-  const auto input = builder_.ConstantR1<float>({10000, 1000, 100, 10, 1});
+TEST_P(ReduceWindowTest, Min3In5Stride2) {
+  const auto input = CreateConstantFromLiteral(
+      *Literal::CreateR1<float>({10000, 1000, 100, 10, 1}), &builder_);
   ReduceWindowMin(input, {3}, {2}, Padding::kValid);
-  ComputeAndCompareR1<float>(&builder_, {100, 1}, {}, ErrorSpec(0.0001));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateR1<float>({100, 1}), {},
+                           ErrorSpec(0.00001));
 }
 
-XLA_TEST_F(ReduceWindowTest, ZeroElementSmall) {
+XLA_TEST_P(ReduceWindowTest, ZeroElementSmall) {
   Array4D<float> input_array(1, 0, 2, 1);
-
-  const auto input = builder_.ConstantR4FromArray4D<float>(input_array);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {1, 1, 2, 1}, {1, 1, 1, 1}, padding);
 
   auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 2, 1},
                                               {1, 1, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, NonSquareSmall) {
+TEST_P(ReduceWindowTest, NonSquareSmall) {
   Array4D<float> input_array(1, 2, 2, 1);
-  input_array.FillRandom(2.f);
+  input_array.FillRandom(2.f, 2.f);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
-  const auto input = builder_.ConstantR4FromArray4D<float>(input_array);
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {1, 1, 2, 1}, {1, 1, 1, 1}, padding);
 
   auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 2, 1},
                                               {1, 1, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, MiddleDimsSmall) {
+TEST_P(ReduceWindowTest, MiddleDimsSmall) {
   Array4D<float> input_array(1, 3, 3, 1);
-  input_array.FillRandom(2.f);
-
-  const auto input = builder_.ConstantR4FromArray4D<float>(input_array);
+  input_array.FillRandom(2.f, 2.f);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {1, 1, 1, 1}, {1, 2, 2, 1}, padding);
 
   auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 1, 1},
                                               {1, 2, 2, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, Along2ndMinorDim) {
+TEST_P(ReduceWindowTest, Along2ndMinorDim) {
   Array4D<float> input_array(3, 6, 7, 32);
-  input_array.FillRandom(2.f);
+  input_array.FillRandom(2.f, 2.f);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
   // The parameters of this reduction mimic feature norm (e.g. LRN).
   int lrn_diameter = 7;  // diameter = 2*radius + 1 --> must be odd
-  const auto input = builder_.ConstantR4FromArray4D<float>(input_array);
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {1, 1, lrn_diameter, 1}, {1, 1, 1, 1}, padding);
 
   auto res = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {1, 1, lrn_diameter, 1}, {1, 1, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, AmongMajor2Dims) {
+TEST_P(ReduceWindowTest, AmongMajor2Dims) {
   Array4D<float> input_array(4, 4, 6, 8);
   input_array.FillWithMinorDimNum();
+  const auto input_data_handle =
+      CreateConstantFromArray(input_array, &builder_);
 
   int win_len = 3;
   int win_stride = 1;
 
   Padding padding = Padding::kSame;
-  const auto input_data_handle =
-      builder_.ConstantR4FromArray4D<float>(input_array);
   // Reduce only along the x and y dimensions, according to the win_len.
   ReduceWindowAdd(input_data_handle, {win_len, win_len, 1, 1},
                   {win_stride, win_stride, 1, 1}, padding);
@@ -180,18 +216,20 @@ TEST_F(ReduceWindowTest, AmongMajor2Dims) {
   auto result = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {win_len, win_len, 1, 1},
       {win_stride, win_stride, 1, 1}, padding);
-  ComputeAndCompareR4<float>(&builder_, *result, {}, ErrorSpec(1e-3, 1e-3));
+
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*result), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, AmongMajor2DimsMediumSize) {
+TEST_P(ReduceWindowTest, AmongMajor2DimsMediumSize) {
   Array4D<float> input_array(9, 12, 4, 89);
-  input_array.FillRandom(2.0f);
+  input_array.FillRandom(2.f, 2.f);
 
   int win_len = 3;
   int win_stride = 2;
 
   const auto input_data_handle =
-      builder_.ConstantR4FromArray4D<float>(input_array);
+      CreateConstantFromArray(input_array, &builder_);
 
   Padding padding = Padding::kSame;
   // Reduce only along the x and y dimensions, according to the win_len.
@@ -202,20 +240,21 @@ TEST_F(ReduceWindowTest, AmongMajor2DimsMediumSize) {
       input_array, 0.0f, {win_len, win_len, 1, 1},
       {win_stride, win_stride, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *result, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*result), {},
+                           DefaultErrorSpec());
 }
 
 // TODO(b/32173947): Test support for arbitrary-sized padding.
-TEST_F(ReduceWindowTest, DISABLED_AmongMajor2DimsMediumSizeLargePadding) {
+TEST_P(ReduceWindowTest, DISABLED_AmongMajor2DimsMediumSizeLargePadding) {
   Array4D<float> input_array(9, 12, 4, 89);  // simulate Dim0IsMinor layout
-  input_array.FillRandom(2.0f);
+  input_array.FillRandom(2.f, 2.f);
 
   int64 rank = 4;
   int win_len = 3;
   int win_stride = 2;
 
   const auto input_data_handle =
-      builder_.ConstantR4FromArray4D<float>(input_array);
+      CreateConstantFromArray(input_array, &builder_);
 
   Padding padding = Padding::kSame;
   // Reduce only along the x and y dimensions, according to the win_len.
@@ -232,26 +271,28 @@ TEST_F(ReduceWindowTest, DISABLED_AmongMajor2DimsMediumSizeLargePadding) {
       input_array, 0.0f, {win_len, win_len, 1, 1},
       {win_stride, win_stride, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *result, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*result), {},
+                           DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x2) {
+XLA_TEST_P(ReduceWindowTest, Add1x1x2In2x1x2) {
   Array3D<float> input_array(2, 1, 2);
   input_array(0, 0, 0) = 1000;
   input_array(0, 0, 1) = 100;
   input_array(1, 0, 0) = 10;
   input_array(1, 0, 1) = 1;
-  auto input = builder_.ConstantR3FromArray3D<float>(input_array);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
   ReduceWindowAdd(input, {1, 1, 2}, {1, 1, 1}, Padding::kValid);
 
   Array3D<float> expected(2, 1, 1);
   expected(0, 0, 0) = 1100;
   expected(1, 0, 0) = 11;
-  ComputeAndCompareR3<float>(&builder_, expected, {}, ErrorSpec(0.0001));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(expected), {},
+                           DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x3Stride1x1x2) {
+XLA_TEST_P(ReduceWindowTest, Add1x1x2In2x1x3Stride1x1x2) {
   Array3D<float> input_array(2, 1, 3);
   input_array(0, 0, 0) = 100;
   input_array(0, 0, 1) = 10;
@@ -259,17 +300,18 @@ XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x3Stride1x1x2) {
   input_array(1, 0, 0) = 500;
   input_array(1, 0, 1) = 50;
   input_array(1, 0, 2) = 5;
-  auto input = builder_.ConstantR3FromArray3D<float>(input_array);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
   ReduceWindowAdd(input, {1, 1, 2}, {1, 1, 2}, Padding::kValid);
 
   Array3D<float> expected(2, 1, 1);
   expected(0, 0, 0) = 110;
   expected(1, 0, 0) = 550;
-  ComputeAndCompareR3<float>(&builder_, expected, {}, ErrorSpec(0.0001));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(expected), {},
+                           DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x3SamePad) {
+XLA_TEST_P(ReduceWindowTest, Add1x1x2In2x1x3SamePad) {
   Array3D<float> input_array(2, 1, 3);
   input_array(0, 0, 0) = 100;
   input_array(0, 0, 1) = 10;
@@ -277,7 +319,7 @@ XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x3SamePad) {
   input_array(1, 0, 0) = 500;
   input_array(1, 0, 1) = 50;
   input_array(1, 0, 2) = 5;
-  auto input = builder_.ConstantR3FromArray3D<float>(input_array);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
   ReduceWindowAdd(input, {1, 1, 2}, {1, 1, 1}, Padding::kSame);
 
@@ -288,30 +330,34 @@ XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x3SamePad) {
   expected(1, 0, 0) = 550;
   expected(1, 0, 1) = 55;
   expected(1, 0, 2) = 5;
-  ComputeAndCompareR3<float>(&builder_, expected, {}, ErrorSpec(0.0001));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(expected), {},
+                           DefaultErrorSpec());
 }
 
 // Tests a reduction function that is not a simple add/min/max/etc.
-XLA_TEST_F(ReduceWindowTest, NonstandardReduceFunction) {
+XLA_TEST_P(ReduceWindowTest, NonstandardReduceFunction) {
   Array4D<float> input_array(1, 2, 2, 1);
   input_array(0, 0, 0, 0) = 1;
   input_array(0, 0, 1, 0) = 2;
   input_array(0, 1, 0, 0) = 3;
   input_array(0, 1, 1, 0) = 4;
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
-  const auto input = builder_.ConstantR4FromArray4D<float>(input_array);
   Padding padding = Padding::kValid;
-
-  const Shape scalar = ShapeUtil::MakeShape(F32, {});
+  const Shape scalar = ShapeUtil::MakeShape(FloatType(), {});
   auto b = builder_.CreateSubBuilder("unusual");
   auto lhs = b->Parameter(0, scalar, "lhs");
   auto rhs = b->Parameter(1, scalar, "rhs");
-  b->Min(b->Add(lhs, rhs), b->ConstantR0<float>(8.0f));
+  b->Min(b->Add(lhs, rhs),
+         CreateConstantFromLiteral(*Literal::CreateR0<float>(8.0f), b.get()));
   Computation reduce_fn = b->BuildAndNoteError();
 
-  builder_.ReduceWindow(input, builder_.ConstantR0<float>(3.0f), reduce_fn,
-                        /*window_dimensions=*/{1, 1, 2, 1},
-                        /*window_strides=*/{1, 1, 1, 1}, padding);
+  builder_.ReduceWindow(
+      input,
+      CreateConstantFromLiteral(*Literal::CreateR0<float>(3.0f), &builder_),
+      reduce_fn,
+      /*window_dimensions=*/{1, 1, 2, 1},
+      /*window_strides=*/{1, 1, 1, 1}, padding);
 
   const auto reduce_func = [](float arg1, float arg2) {
     return std::min<float>(arg1 + arg2, 8.0f);
@@ -322,17 +368,19 @@ XLA_TEST_F(ReduceWindowTest, NonstandardReduceFunction) {
                                            /*window=*/{1, 1, 2, 1},
                                            /*stride=*/{1, 1, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *expected, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*expected), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, R4UnitWindow) {
+TEST_P(ReduceWindowTest, R4UnitWindow) {
   Array4D<float> input_array(13, 12, 8, 15);
-  input_array.Fill(1.0f);
+  input_array.FillRandom(2.f, 2.f);
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input_array, LayoutUtil::MakeLayout({0, 3, 2, 1}));
-  ComputationDataHandle input =
-      builder_.Parameter(0, input_literal->shape(), "operand");
+  ComputationDataHandle input;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "parameter", &builder_, &input);
 
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {1, 1, 7, 1}, {1, 4, 1, 1}, padding);
@@ -340,15 +388,11 @@ TEST_F(ReduceWindowTest, R4UnitWindow) {
   auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 7, 1},
                                               {1, 4, 1, 1}, padding);
 
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_data,
-                          client_->TransferToServer(*input_literal));
-  ComputeAndCompareR4<float>(&builder_, *res, {input_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res),
+                           {input_data.get()}, DefaultErrorSpec());
 }
 
-XLA_TEST_F(HloTestBase, R6AddMultipleStrides) {
-  auto b = HloComputation::Builder(TestName());
-
+XLA_TEST_P(ReduceWindowTest, R6AddMultipleStrides) {
   std::vector<int64> input_dims(6, 8);
   auto shape = ShapeUtil::MakeShape(F32, input_dims);
 
@@ -358,56 +402,15 @@ XLA_TEST_F(HloTestBase, R6AddMultipleStrides) {
   };
   TF_EXPECT_OK(arg_literal->Populate<float>(generator));
 
-  auto input =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal)));
-
-  auto init_value = b.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0<float>(0.f)));
-
-  HloComputation::Builder add_computation("add");
-  Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
-  auto param_lhs = add_computation.AddInstruction(
-      HloInstruction::CreateParameter(0, scalar_shape, "lhs"));
-  auto param_rhs = add_computation.AddInstruction(
-      HloInstruction::CreateParameter(1, scalar_shape, "rhs"));
-  add_computation.AddInstruction(HloInstruction::CreateBinary(
-      scalar_shape, HloOpcode::kAdd, param_lhs, param_rhs));
-
-  auto module = CreateNewModule();
-  auto add_func = module->AddEmbeddedComputation(add_computation.Build());
-
-  WindowDimension trivial_dim;
-  trivial_dim.set_size(1);
-  trivial_dim.set_stride(1);
-  trivial_dim.set_padding_low(0);
-  trivial_dim.set_padding_high(0);
-  trivial_dim.set_window_dilation(1);
-  trivial_dim.set_base_dilation(1);
-
-  WindowDimension active_dim;
-  active_dim.set_size(3);
-  active_dim.set_stride(1);
-  active_dim.set_padding_low(0);
-  active_dim.set_padding_high(0);
-  active_dim.set_window_dilation(1);
-  active_dim.set_base_dilation(1);
-
-  Window window;
-  *window.add_dimensions() = active_dim;
-  *window.add_dimensions() = trivial_dim;
-  *window.add_dimensions() = active_dim;
-  *window.add_dimensions() = active_dim;
-  *window.add_dimensions() = trivial_dim;
-  *window.add_dimensions() = trivial_dim;
-
-  // Non-monotonic output layout with minor dims trivial.
+  const auto input = CreateConstantFromLiteral(*arg_literal, &builder_);
+
+  Padding padding = Padding::kValid;
+  ReduceWindowAdd(input, {3, 1, 3, 3, 1, 1}, {1, 1, 1, 1, 1, 1}, padding);
+
   std::vector<int64> output_layout = {1, 5, 3, 2, 0, 4};
   std::vector<int64> output_dims = {6, 8, 6, 6, 8, 8};
   Shape result_shape =
       ShapeUtil::MakeShapeWithLayout(F32, output_dims, output_layout);
-  b.AddInstruction(HloInstruction::CreateReduceWindow(
-      result_shape, input, init_value, window, add_func));
-
   std::unique_ptr<Literal> expected = Literal::CreateFromShape(result_shape);
   auto out_generator =
       [&](tensorflow::gtl::ArraySlice<int64> indexes) -> float {
@@ -415,82 +418,37 @@ XLA_TEST_F(HloTestBase, R6AddMultipleStrides) {
   };
   TF_EXPECT_OK(expected->Populate<float>(out_generator));
 
-  module->AddEntryComputation(b.Build());
-  auto actual = ExecuteAndTransfer(std::move(module), {});
-
-  LiteralTestUtil::ExpectNear(*actual, *expected, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *expected, {}, DefaultErrorSpec());
 }
 
-XLA_TEST_F(HloTestBase, R6Add) {
-  auto b = HloComputation::Builder(TestName());
-
+XLA_TEST_P(ReduceWindowTest, R6Add) {
   std::vector<int64> input_dims(6, 8);
+  auto shape = ShapeUtil::MakeShape(F32, input_dims);
+
   std::unique_ptr<Literal> arg_literal =
       Literal::CreateFullWithMonotonicDim0MajorLayout<float>(input_dims, 1.0f);
-  auto input =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal)));
-
-  auto init_value = b.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0<float>(0.f)));
-
-  HloComputation::Builder add_computation("add");
-  Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
-  auto param_lhs = add_computation.AddInstruction(
-      HloInstruction::CreateParameter(0, scalar_shape, "lhs"));
-  auto param_rhs = add_computation.AddInstruction(
-      HloInstruction::CreateParameter(1, scalar_shape, "rhs"));
-  add_computation.AddInstruction(HloInstruction::CreateBinary(
-      scalar_shape, HloOpcode::kAdd, param_lhs, param_rhs));
-
-  auto module = CreateNewModule();
-  auto add_func = module->AddEmbeddedComputation(add_computation.Build());
-
-  WindowDimension trivial_dim;
-  trivial_dim.set_size(1);
-  trivial_dim.set_stride(1);
-  trivial_dim.set_padding_low(0);
-  trivial_dim.set_padding_high(0);
-  trivial_dim.set_window_dilation(1);
-  trivial_dim.set_base_dilation(1);
-
-  WindowDimension active_dim;
-  active_dim.set_size(3);
-  active_dim.set_stride(1);
-  active_dim.set_padding_low(0);
-  active_dim.set_padding_high(0);
-  active_dim.set_window_dilation(1);
-  active_dim.set_base_dilation(1);
-
-  Window window;
-  *window.add_dimensions() = trivial_dim;
-  *window.add_dimensions() = trivial_dim;
-  *window.add_dimensions() = active_dim;
-  *window.add_dimensions() = active_dim;
-  *window.add_dimensions() = trivial_dim;
-  *window.add_dimensions() = trivial_dim;
-
-  Shape shape = ShapeUtil::MakeShape(F32, {8, 8, 6, 6, 8, 8});
-  b.AddInstruction(HloInstruction::CreateReduceWindow(shape, input, init_value,
-                                                      window, add_func));
+
+  const auto input = CreateConstantFromLiteral(*arg_literal, &builder_);
+
+  Padding padding = Padding::kValid;
+  ReduceWindowAdd(input, {1, 1, 3, 3, 1, 1}, {1, 1, 1, 1, 1, 1}, padding);
 
   std::vector<int64> output_dims = {8, 8, 6, 6, 8, 8};
   std::unique_ptr<Literal> expected =
       Literal::CreateFullWithMonotonicDim0MajorLayout<float>(output_dims, 9.0f);
 
-  module->AddEntryComputation(b.Build());
-  auto actual = ExecuteAndTransfer(std::move(module), {});
-
-  LiteralTestUtil::ExpectNear(*actual, *expected, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *expected, {}, DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, R4SecondMinorStride) {
+XLA_TEST_P(ReduceWindowTest, R4SecondMinorStride) {
   Array4D<float> input_array(2, 1, 27, 119);
   input_array.FillRandom(2.0f);
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input_array, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  ComputationDataHandle input =
-      builder_.Parameter(0, input_literal->shape(), "operand");
+  ComputationDataHandle input;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "parameter", &builder_, &input);
 
   int win_len = 1;
   int stride = 8;
@@ -500,20 +458,19 @@ XLA_TEST_F(ReduceWindowTest, R4SecondMinorStride) {
   auto res = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {1, 1, win_len, 1}, {1, 1, stride, 1}, padding);
 
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_data,
-                          client_->TransferToServer(*input_literal));
-  ComputeAndCompareR4<float>(&builder_, *res, {input_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res),
+                           {input_data.get()}, DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, R4SecondMinorUnitStride) {
+XLA_TEST_P(ReduceWindowTest, R4SecondMinorUnitStride) {
   Array4D<float> input_array(3, 2, 4, 64);
   input_array.FillRandom(2.0f);
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input_array, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  ComputationDataHandle input =
-      builder_.Parameter(0, input_literal->shape(), "operand");
+  ComputationDataHandle input;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "parameter", &builder_, &input);
 
   int win_len = 3;
   int stride = 1;
@@ -523,20 +480,19 @@ XLA_TEST_F(ReduceWindowTest, R4SecondMinorUnitStride) {
   auto res = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {1, 1, win_len, 1}, {1, 1, stride, 1}, padding);
 
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_data,
-                          client_->TransferToServer(*input_literal));
-  ComputeAndCompareR4<float>(&builder_, *res, {input_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res),
+                           {input_data.get()}, DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, R4SecondMinorWin) {
+XLA_TEST_P(ReduceWindowTest, R4SecondMinorWin) {
   Array4D<float> input_array(1, 3, 12, 200);
   input_array.FillRandom(2.0f);
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input_array, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  ComputationDataHandle input =
-      builder_.Parameter(0, input_literal->shape(), "operand");
+  ComputationDataHandle input;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "parameter", &builder_, &input);
 
   int win_len = 8;
   int stride = 5;
@@ -546,13 +502,11 @@ XLA_TEST_F(ReduceWindowTest, R4SecondMinorWin) {
   auto res = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {1, 1, win_len, 1}, {1, 1, stride, 1}, padding);
 
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_data,
-                          client_->TransferToServer(*input_literal));
-  ComputeAndCompareR4<float>(&builder_, *res, {input_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res),
+                           {input_data.get()}, DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, AmongMajor2DimsMultipleMinor) {
+TEST_P(ReduceWindowTest, AmongMajor2DimsMultipleMinor) {
   Array4D<float> input_array(6, 4, 10, 130);
   input_array.FillRandom(2.0f);
 
@@ -561,7 +515,7 @@ TEST_F(ReduceWindowTest, AmongMajor2DimsMultipleMinor) {
 
   Padding padding = Padding::kSame;
   const auto input_data_handle =
-      builder_.ConstantR4FromArray4D<float>(input_array);
+      CreateConstantFromArray(input_array, &builder_);
   // Reduce only along the x and y dimensions, according to the win_len.
   ReduceWindowAdd(input_data_handle, {win_len, win_len, 1, 1},
                   {win_stride, win_stride, 1, 1}, padding);
@@ -569,36 +523,42 @@ TEST_F(ReduceWindowTest, AmongMajor2DimsMultipleMinor) {
   auto result = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {win_len, win_len, 1, 1},
       {win_stride, win_stride, 1, 1}, padding);
-  ComputeAndCompareR4<float>(&builder_, *result, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*result), {},
+                           DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, Add24In1152_NoOverlap) {
+XLA_TEST_P(ReduceWindowTest, Add24In1152_NoOverlap) {
   std::vector<float> input_vector(128 * 9, 1);
-  const auto input = builder_.ConstantR1<float>(input_vector);
+  const auto input = CreateConstantFromLiteral(
+      *Literal::CreateR1<float>(input_vector), &builder_);
   ReduceWindowAdd(input, {32}, {128}, Padding::kValid);
-  ComputeAndCompareR1<float>(&builder_, {32, 32, 32, 32, 32, 32, 32, 32, 32},
-                             {}, ErrorSpec(0.0001));
+  ComputeAndCompareLiteral(
+      &builder_,
+      *Literal::CreateR1<float>({32, 32, 32, 32, 32, 32, 32, 32, 32}), {},
+      DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, Add128In128Stride128) {
-  const auto input = builder_.ConstantR1<float>(
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+XLA_TEST_P(ReduceWindowTest, Add128In128Stride128) {
+  std::vector<float> input_vector{
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  const auto input = CreateConstantFromLiteral(
+      *Literal::CreateR1<float>(input_vector), &builder_);
   ReduceWindowAdd(input, {128}, {128}, Padding::kValid);
-  ComputeAndCompareR1<float>(&builder_, {1088}, {}, ErrorSpec(0.0001));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateR1<float>({1088}), {},
+                           DefaultErrorSpec());
 }
 
 // Regression test for a bug that appeared in Inception (b/34784899).
-TEST_F(ReduceWindowTest, R2ReduceWindowInceptionFromBroadcast) {
+TEST_P(ReduceWindowTest, R2ReduceWindowInceptionFromBroadcast) {
   Array2D<float> input_array(14, 14, 1.0f);
-  ComputationDataHandle input =
-      builder_.Broadcast(builder_.ConstantLiteral(Literal::One(F32)), {14, 14});
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
   int win_len = 3;
   int stride = 1;
@@ -608,13 +568,14 @@ TEST_F(ReduceWindowTest, R2ReduceWindowInceptionFromBroadcast) {
   auto res = ReferenceUtil::ReduceWindow2DAdd(
       input_array, 0.0f, {win_len, win_len}, {stride, stride}, padding);
 
-  ComputeAndCompareR2<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray<float>(*res),
+                           {}, DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, R2ReduceWindowNonOverlappingFromBroadcast) {
+TEST_P(ReduceWindowTest, R2ReduceWindowNonOverlappingFromBroadcast) {
   Array2D<float> input_array(6, 4, 1.0f);
-  ComputationDataHandle input =
-      builder_.Broadcast(builder_.ConstantLiteral(Literal::One(F32)), {6, 4});
+  ComputationDataHandle input = builder_.Broadcast(
+      CreateConstantFromLiteral(Literal::One(F32), &builder_), {6, 4});
 
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {4, 2}, {3, 3}, padding);
@@ -622,9 +583,13 @@ TEST_F(ReduceWindowTest, R2ReduceWindowNonOverlappingFromBroadcast) {
   auto res = ReferenceUtil::ReduceWindow2DAdd(input_array, 0.0f, {4, 2}, {3, 3},
                                               padding);
 
-  ComputeAndCompareR2<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray<float>(*res),
+                           {}, DefaultErrorSpec());
 }
 
+INSTANTIATE_TEST_CASE_P(ReduceWindowTestInstance, ReduceWindowTest,
+                        ::testing::ValuesIn(use_bfloat16_params));
+
 enum Reducer { kAdd, kMax };
 
 struct R4ReduceWindowTestData {
@@ -638,30 +603,36 @@ struct R4ReduceWindowTestData {
 };
 
 string R4ReduceWindowTestDataToString(
-    const ::testing::TestParamInfo<R4ReduceWindowTestData>& data) {
+    const ::testing::TestParamInfo<
+        ::testing::tuple<R4ReduceWindowTestData, bool>>& data) {
+  const auto& param = ::testing::get<0>(data.param);
   string str = tensorflow::strings::StrCat(
-      "base_bounds_",
-      tensorflow::str_util::Join(data.param.base_bounds, "x"),  //
+      "base_bounds_", tensorflow::str_util::Join(param.base_bounds, "x"),  //
       "__window_bounds_",
-      tensorflow::str_util::Join(data.param.window_bounds, "x"),            //
-      "__strides_", tensorflow::str_util::Join(data.param.strides, "x"),    //
-      "__pad_low_", tensorflow::str_util::Join(data.param.pad_low, "x"),    //
-      "__pad_high_", tensorflow::str_util::Join(data.param.pad_high, "x"),  //
-      (data.param.reducer == kAdd) ? "add" : "max");
-  CHECK(data.param.reducer == kAdd || data.param.reducer == kMax);
+      tensorflow::str_util::Join(param.window_bounds, "x"),            //
+      "__strides_", tensorflow::str_util::Join(param.strides, "x"),    //
+      "__pad_low_", tensorflow::str_util::Join(param.pad_low, "x"),    //
+      "__pad_high_", tensorflow::str_util::Join(param.pad_high, "x"),  //
+      (param.reducer == kAdd) ? "add" : "max");
+  CHECK(param.reducer == kAdd || param.reducer == kMax);
 
   // Test names are not allowed to contain the '-' character.
   std::replace(str.begin(), str.end(), '-', 'n');
+  if (::testing::get<1>(data.param)) {
+    str = tensorflow::strings::StrCat(str, "_bfloat16");
+  }
   return str;
 }
 
-class R4ReduceWindowTest
-    : public ClientLibraryTestBase,
-      public ::testing::WithParamInterface<R4ReduceWindowTestData> {
+class R4ReduceWindowTest : public ReduceWindowTestBase,
+                           public ::testing::WithParamInterface<
+                               ::testing::tuple<R4ReduceWindowTestData, bool>> {
  protected:
+  R4ReduceWindowTest() { set_use_bfloat16(::testing::get<1>(GetParam())); }
+
   void DoIt() {
     ComputationBuilder b(client_, TestName());
-    const auto& param = GetParam();
+    const auto& param = ::testing::get<0>(GetParam());
 
     const float kInitValue = 0.0f;
 
@@ -670,23 +641,24 @@ class R4ReduceWindowTest
     input.FillIota(1);
     std::unique_ptr<Literal> input_literal =
         Literal::CreateR4FromArray4D(input);
-    TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_arg,
-                            client_->TransferToServer(*input_literal));
+    ComputationDataHandle parameter;
+    auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0",
+                                                       &b, &parameter);
 
     std::vector<std::pair<int64, int64>> padding(4);
     for (int i = 0; i < 4; ++i) {
       padding[i] = {param.pad_low[i], param.pad_high[i]};
     }
 
-    auto parameter = b.Parameter(0, input_literal->shape(), "p0");
-    auto pad_value = b.ConstantR0<float>(kInitValue);
+    auto init_value =
+        CreateConstantFromLiteral(*Literal::CreateR0(kInitValue), &b);
     CHECK(param.reducer == kAdd || param.reducer == kMax);
     auto computation = param.reducer == kAdd
-                           ? CreateScalarAddComputation(F32, &b)
-                           : CreateScalarMaxComputation(F32, &b);
+                           ? CreateScalarAddComputation(FloatType(), &b)
+                           : CreateScalarMaxComputation(FloatType(), &b);
     b.ReduceWindowWithGeneralPadding(
         /*operand=*/parameter,
-        /*init_value=*/pad_value,
+        /*init_value=*/init_value,
         /*computation=*/computation,
         /*window_dimensions=*/param.window_bounds,
         /*window_strides=*/param.strides,
@@ -704,8 +676,8 @@ class R4ReduceWindowTest
             /*window=*/param.window_bounds,
             /*stride=*/param.strides,
             /*padding=*/padding);
-    ComputeAndCompareR4<float>(&b, *expected, {input_arg.get()},
-                               ErrorSpec(1e-3, 1e-3));
+    ComputeAndCompareLiteral(&b, *Literal::CreateFromArray(*expected),
+                             {input_arg.get()}, DefaultErrorSpec());
   }
 };
 
@@ -834,9 +806,11 @@ const R4ReduceWindowTestData kR4ReduceWindowTestValues[] = {
                            /*reducer=*/kAdd},
 };
 
-INSTANTIATE_TEST_CASE_P(R4ReduceWindowTestInstantiation, R4ReduceWindowTest,
-                        ::testing::ValuesIn(kR4ReduceWindowTestValues),
-                        R4ReduceWindowTestDataToString);
+INSTANTIATE_TEST_CASE_P(
+    R4ReduceWindowTestInstantiation, R4ReduceWindowTest,
+    ::testing::Combine(::testing::ValuesIn(kR4ReduceWindowTestValues),
+                       ::testing::ValuesIn(use_bfloat16_params)),
+    R4ReduceWindowTestDataToString);
 
 class R4ReduceWindowLargeTest : public R4ReduceWindowTest {};
 
@@ -859,10 +833,11 @@ const R4ReduceWindowTestData kR4ReduceWindowLargeTestValues[] = {
                            /*reducer=*/kAdd},
 };
 
-INSTANTIATE_TEST_CASE_P(R4ReduceWindowLargeTestInstantiation,
-                        R4ReduceWindowLargeTest,
-                        ::testing::ValuesIn(kR4ReduceWindowLargeTestValues),
-                        R4ReduceWindowTestDataToString);
+INSTANTIATE_TEST_CASE_P(
+    R4ReduceWindowLargeTestInstantiation, R4ReduceWindowLargeTest,
+    ::testing::Combine(::testing::ValuesIn(kR4ReduceWindowLargeTestValues),
+                       ::testing::ValuesIn(use_bfloat16_params)),
+    R4ReduceWindowTestDataToString);
 
 struct R2ReduceWindowTestData {
   int64 base_bounds[2];
@@ -910,26 +885,33 @@ struct R2ReduceWindowTestData {
 };
 
 string R2ReduceWindowTestDataToString(
-    const ::testing::TestParamInfo<R2ReduceWindowTestData>& data) {
+    const ::testing::TestParamInfo<
+        ::testing::tuple<R2ReduceWindowTestData, bool>>& data) {
+  const auto& param = ::testing::get<0>(data.param);
   string str = tensorflow::strings::StrCat(
-      "base_bounds_",
-      tensorflow::str_util::Join(data.param.base_bounds, "x"),  //
+      "base_bounds_", tensorflow::str_util::Join(param.base_bounds, "x"),  //
       "__window_bounds_",
-      tensorflow::str_util::Join(data.param.window_bounds, "x"),              //
-      "__strides_", tensorflow::str_util::Join(data.param.strides, "x"),      //
-      "__padding_", data.param.padding == Padding::kSame ? "same" : "valid",  //
-      "__layout_", data.param.layout[0], "_", data.param.layout[1],           //
-      "__reducer_", data.param.reducer == kAdd ? "add" : "max");
+      tensorflow::str_util::Join(param.window_bounds, "x"),              //
+      "__strides_", tensorflow::str_util::Join(param.strides, "x"),      //
+      "__padding_", param.padding == Padding::kSame ? "same" : "valid",  //
+      "__layout_", param.layout[0], "_", param.layout[1],                //
+      "__reducer_", param.reducer == kAdd ? "add" : "max");
+  if (::testing::get<1>(data.param)) {
+    str = tensorflow::strings::StrCat(str, "_bfloat16");
+  }
   return str;
 }
 
-class R2ReduceWindowTest
-    : public ClientLibraryTestBase,
-      public ::testing::WithParamInterface<R2ReduceWindowTestData> {};
+class R2ReduceWindowTest : public ReduceWindowTestBase,
+                           public ::testing::WithParamInterface<
+                               ::testing::tuple<R2ReduceWindowTestData, bool>> {
+ protected:
+  R2ReduceWindowTest() { set_use_bfloat16(::testing::get<1>(GetParam())); }
+};
 
 TEST_P(R2ReduceWindowTest, Add) {
   ComputationBuilder b(client_, TestName());
-  const auto& param = GetParam();
+  const auto& param = ::testing::get<0>(GetParam());
   CHECK(param.reducer == kAdd);
 
   const float kInitValue = 0.0f;
@@ -937,12 +919,15 @@ TEST_P(R2ReduceWindowTest, Add) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR2FromArray2DWithLayout(
           input, LayoutUtil::MakeLayout(param.layout));
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_arg,
-                          client_->TransferToServer(*input_literal));
-  b.ReduceWindow(/*operand=*/
-                 b.Parameter(0, input_literal->shape(), "p0"),
-                 /*init_value=*/b.ConstantR0<float>(kInitValue),
-                 /*computation=*/CreateScalarAddComputation(F32, &b),
+
+  ComputationDataHandle parameter;
+  auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0",
+                                                     &b, &parameter);
+  auto init_value =
+      CreateConstantFromLiteral(*Literal::CreateR0(kInitValue), &b);
+  b.ReduceWindow(/*operand=*/parameter,
+                 /*init_value=*/init_value,
+                 /*computation=*/CreateScalarAddComputation(FloatType(), &b),
                  /*window_dimensions=*/param.window_bounds,
                  /*window_strides=*/param.strides, /*padding=*/param.padding);
 
@@ -950,13 +935,15 @@ TEST_P(R2ReduceWindowTest, Add) {
       /*operand=*/input, /*init=*/kInitValue, /*window=*/param.window_bounds,
       /*stride=*/param.strides, /*padding=*/param.padding);
 
-  ComputeAndCompareR2<float>(&b, *expected, {input_arg.get()},
-                             ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&b, *Literal::CreateFromArray(*expected),
+                           {input_arg.get()}, DefaultErrorSpec());
 }
 
-INSTANTIATE_TEST_CASE_P(R2ReduceWindowTestInstantiation, R2ReduceWindowTest,
-                        ::testing::ValuesIn(kR2TestCases),
-                        R2ReduceWindowTestDataToString);
+INSTANTIATE_TEST_CASE_P(
+    R2ReduceWindowTestInstantiation, R2ReduceWindowTest,
+    ::testing::Combine(::testing::ValuesIn(kR2TestCases),
+                       ::testing::ValuesIn(use_bfloat16_params)),
+    R2ReduceWindowTestDataToString);
 
 struct R1ReduceWindowTestData {
   int64 base_bounds[1];
@@ -1061,25 +1048,32 @@ struct R1ReduceWindowTestData {
 };
 
 string R1ReduceWindowTestDataToString(
-    const ::testing::TestParamInfo<R1ReduceWindowTestData>& data) {
+    const ::testing::TestParamInfo<
+        ::testing::tuple<R1ReduceWindowTestData, bool>>& data) {
+  const auto& param = ::testing::get<0>(data.param);
   string str = tensorflow::strings::StrCat(
-      "base_bounds_", tensorflow::str_util::Join(data.param.base_bounds, "x"),
-      "__window_bounds_",
-      tensorflow::str_util::Join(data.param.window_bounds, "x"), "__strides_",
-      tensorflow::str_util::Join(data.param.strides, "x"), "__pad_low_",
-      tensorflow::str_util::Join(data.param.pad_low, "x"), "__pad_high_",
-      tensorflow::str_util::Join(data.param.pad_high, "x"), "__reducer_",
-      data.param.reducer == kAdd ? "add" : "max");
+      "base_bounds_", tensorflow::str_util::Join(param.base_bounds, "x"),
+      "__window_bounds_", tensorflow::str_util::Join(param.window_bounds, "x"),
+      "__strides_", tensorflow::str_util::Join(param.strides, "x"),
+      "__pad_low_", tensorflow::str_util::Join(param.pad_low, "x"),
+      "__pad_high_", tensorflow::str_util::Join(param.pad_high, "x"),
+      "__reducer_", param.reducer == kAdd ? "add" : "max");
+  if (::testing::get<1>(data.param)) {
+    str = tensorflow::strings::StrCat(str, "_bfloat16");
+  }
   return str;
 }
 
-class R1ReduceWindowTest
-    : public ClientLibraryTestBase,
-      public ::testing::WithParamInterface<R1ReduceWindowTestData> {};
+class R1ReduceWindowTest : public ReduceWindowTestBase,
+                           public ::testing::WithParamInterface<
+                               ::testing::tuple<R1ReduceWindowTestData, bool>> {
+ protected:
+  R1ReduceWindowTest() { set_use_bfloat16(::testing::get<1>(GetParam())); }
+};
 
 TEST_P(R1ReduceWindowTest, DoIt) {
   ComputationBuilder b(client_, TestName());
-  const auto& param = GetParam();
+  const auto& param = ::testing::get<0>(GetParam());
   CHECK(param.reducer == kAdd || param.reducer == kMax);
 
   const float kInitValue = 0.0f;
@@ -1087,18 +1081,21 @@ TEST_P(R1ReduceWindowTest, DoIt) {
   std::iota(std::begin(input_vector), std::end(input_vector), 0);
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR1(tensorflow::gtl::ArraySlice<float>(input_vector));
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_arg,
-                          client_->TransferToServer(*input_literal));
+  ComputationDataHandle parameter;
+  auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0",
+                                                     &b, &parameter);
 
   std::vector<std::pair<int64, int64>> padding(1);
   padding[0] = {param.pad_low[0], param.pad_high[0]};
 
   auto computation = param.reducer == kAdd
-                         ? CreateScalarAddComputation(F32, &b)
-                         : CreateScalarMaxComputation(F32, &b);
+                         ? CreateScalarAddComputation(FloatType(), &b)
+                         : CreateScalarMaxComputation(FloatType(), &b);
+  auto init_value =
+      CreateConstantFromLiteral(*Literal::CreateR0(kInitValue), &b);
   b.ReduceWindowWithGeneralPadding(
-      /*operand=*/b.Parameter(0, input_literal->shape(), "p0"),
-      /*init_value=*/b.ConstantR0<float>(kInitValue),
+      /*operand=*/parameter,
+      /*init_value=*/init_value,
       /*computation=*/computation,
       /*window_dimensions=*/param.window_bounds,
       /*window_strides=*/param.strides, /*padding=*/padding);
@@ -1114,12 +1111,14 @@ TEST_P(R1ReduceWindowTest, DoIt) {
       /*stride=*/param.strides,
       /*padding=*/padding);
 
-  ComputeAndCompareR1<float>(&b, tensorflow::gtl::ArraySlice<float>(*expected),
-                             {input_arg.get()}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&b, *Literal::CreateR1<float>(*expected),
+                           {input_arg.get()}, DefaultErrorSpec());
 }
 
-INSTANTIATE_TEST_CASE_P(R1ReduceWindowTestInstantiation, R1ReduceWindowTest,
-                        ::testing::ValuesIn(kR1TestCases),
-                        R1ReduceWindowTestDataToString);
+INSTANTIATE_TEST_CASE_P(
+    R1ReduceWindowTestInstantiation, R1ReduceWindowTest,
+    ::testing::Combine(::testing::ValuesIn(kR1TestCases),
+                       ::testing::ValuesIn(use_bfloat16_params)),
+    R1ReduceWindowTestDataToString);
 }  // namespace
 }  // namespace xla
-- 
GitLab


From cfc1de550abdfbc35083d0bd5f7fe84f8897282d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 17:04:45 -0800
Subject: [PATCH 0619/1225] Fix tf.identity(resource variable) with eager
 execution and a device copy.

PiperOrigin-RevId: 177891209
---
 tensorflow/python/eager/BUILD       | 3 +++
 tensorflow/python/eager/ops_test.py | 8 ++++++++
 tensorflow/python/ops/array_ops.py  | 7 ++-----
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index b491a637ba..f470e18120 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -110,6 +110,7 @@ cuda_py_test(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:nn_ops",
+        "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:random_ops",
         "//tensorflow/python:nn_grad",
         "//tensorflow/python:training",
@@ -144,6 +145,7 @@ cuda_py_test(
         ":test",
         "//tensorflow/python:clip_ops",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
     ],
 )
 
@@ -415,6 +417,7 @@ cuda_py_test(
         "//tensorflow/python:layers",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:random_ops",
+        "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:tensor_shape",
     ],
diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py
index 70e23b9311..48dcb4830c 100644
--- a/tensorflow/python/eager/ops_test.py
+++ b/tensorflow/python/eager/ops_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import sparse_ops
 
 
@@ -322,6 +323,13 @@ class OpsTest(test_util.TensorFlowTestCase):
   def testIdentity(self):
     self.assertAllEqual(2, array_ops.identity(2))
 
+  def testIdentityOnVariable(self):
+    if not context.context().num_gpus():
+      self.skipTest('No GPUs found')
+    with context.device('/gpu:0'):
+      v = resource_variable_ops.ResourceVariable(True)
+    self.assertAllEqual(True, array_ops.identity(v))
+
   def testIncompatibleSetShape(self):
     x = constant_op.constant(1)
     with self.assertRaises(ValueError):
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 73a19e7042..74b405681b 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -126,11 +126,8 @@ def identity(input, name=None):  # pylint: disable=redefined-builtin
   if context.in_graph_mode():
     return gen_array_ops.identity(input, name=name)
   else:
-    try:
-      in_device = input.device
-    except AttributeError:
-      input = ops.convert_to_tensor(input)
-      in_device = input.device
+    input = ops.convert_to_tensor(input)
+    in_device = input.device
     # TODO(ashankar): Does 'identity' need to invoke execution callbacks?
     if context.context().device_name != in_device:
       return input._copy()  # pylint: disable=protected-access
-- 
GitLab


From 2091f50c4a6549256233157f47bfb54023476938 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 4 Dec 2017 17:15:58 -0800
Subject: [PATCH 0620/1225] Treat integer default initializers like floating
 point ones.

This fixes subtle problems with partitioned variables.

PiperOrigin-RevId: 177892499
---
 .../partitioned_variables_test.py             |  9 ++++++
 .../kernel_tests/variable_scope_test.py       | 29 -------------------
 tensorflow/python/ops/variable_scope.py       |  6 ++--
 3 files changed, 12 insertions(+), 32 deletions(-)

diff --git a/tensorflow/python/kernel_tests/partitioned_variables_test.py b/tensorflow/python/kernel_tests/partitioned_variables_test.py
index d405175100..56a07cb012 100644
--- a/tensorflow/python/kernel_tests/partitioned_variables_test.py
+++ b/tensorflow/python/kernel_tests/partitioned_variables_test.py
@@ -46,6 +46,15 @@ class PartitionerCreatorsTest(test.TestCase):
         self.assertEqual(len(v0_list), 5)
         self.assertAllEqual(v0_part, (5, 1))
 
+  def testFixedSizePartitionerInt64(self):
+    with self.test_session():
+      partitioner = partitioned_variables.fixed_size_partitioner(4, axis=0)
+      with variable_scope.variable_scope("root", partitioner=partitioner):
+        v0 = variable_scope.get_variable(
+            "v0", dtype=dtypes.int64, shape=[20])
+        v0_list = v0._get_variable_list()
+        self.assertEqual(len(v0_list), 4)
+
   def testResourceFixedSizePartitioner(self):
     with self.test_session():
       partitioner = partitioned_variables.fixed_size_partitioner(5, axis=0)
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 70fe0a4785..8491171923 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -901,35 +901,6 @@ def axis0_into3_partitioner(shape=None, **unused_kwargs):
 
 class VariableScopeWithPartitioningTest(test.TestCase):
 
-  def testInitFromNonInitializer(self):
-    with self.test_session() as sess:
-      # Test various dtypes with zeros initializer as following:
-      types = [
-          dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.uint16, dtypes.int32,
-          dtypes.int64, dtypes.bool
-      ]
-
-      # Use different variable_name to distinguish various dtypes
-      for (i, dtype) in enumerate(types):
-        x = variable_scope.get_variable(
-            name="x%d" % i,
-            shape=(3, 4),
-            dtype=dtype,
-            partitioner=axis0_into2_partitioner)
-        y = variable_scope.get_variable(
-            name="y%d" % i,
-            shape=(6, 4),
-            dtype=dtype,
-            partitioner=axis0_into2_partitioner,
-            initializer=init_ops.zeros_initializer(dtype=dtype))
-
-        variables_lib.global_variables_initializer().run()
-        # x and y would become var list after partition
-        val_x = sess.run(list(x))
-        val_y = sess.run(list(y))
-
-        self.assertAllEqual(val_x, val_y)
-
   def testResultNameMatchesRequested(self):
     with variable_scope.variable_scope(
         "scope0", partitioner=axis0_into2_partitioner):
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 3643861a16..4a23d96721 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -843,6 +843,7 @@ class _VariableStore(object):
     Raises:
       ValueError: When giving unsupported dtype.
     """
+    del shape
     # If dtype is DT_FLOAT, provide a uniform unit scaling initializer
     if dtype.is_floating:
       initializer = init_ops.glorot_uniform_initializer()
@@ -850,9 +851,8 @@ class _VariableStore(object):
     # If dtype is DT_INT/DT_UINT, provide a default value `zero`
     # If dtype is DT_BOOL, provide a default value `FALSE`
     elif dtype.is_integer or dtype.is_unsigned or dtype.is_bool:
-      initializer = init_ops.zeros_initializer()(
-          shape=shape, dtype=dtype.base_dtype)
-      initializing_from_value = True
+      initializer = init_ops.zeros_initializer()
+      initializing_from_value = False
     # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX here?
     else:
       raise ValueError("An initializer for variable %s of %s is required"
-- 
GitLab


From 601687d9f5046f411be556f28b6c82ac035696f9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 17:16:44 -0800
Subject: [PATCH 0621/1225] Modifying _get_examples in graph_io.py to utilize
 tf.cond.

PiperOrigin-RevId: 177892591
---
 .../learn/python/learn/learn_io/graph_io.py   | 40 ++++++++++++++-----
 .../python/learn/learn_io/graph_io_test.py    | 36 ++++++++++++++---
 2 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
index 4b34fc6284..3a46c23968 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.layers import utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import io_ops
@@ -280,14 +281,33 @@ def _get_file_names(file_pattern, randomize_input):
 
 def _get_examples(file_name_queue, reader, num_threads, read_batch_size,
                   filter_fn, parse_fn):
+  """Get example filenames matching.
+
+  Args:
+    file_name_queue: A queue implementation that dequeues elements in
+      first-in first-out order.
+    reader: A function or class that returns an object with
+      `read` method, (filename tensor) -> (example tensor).
+    num_threads: The number of threads enqueuing examples.
+    read_batch_size: An int or scalar `Tensor` specifying the number of
+      records to read at once.
+    filter_fn: Filtering function, takes both keys as well as an `Example`
+      Tensors and returns a boolean mask of the same shape as the input Tensors
+      to be applied for filtering. If `None`, no filtering is done.
+    parse_fn: Parsing function, takes `Example` Tensor returns parsed
+      representation. If `None`, no parsing is done.
+
+  Returns:
+    List of example file names matching `file_name_queue`.
+  """
   with ops.name_scope('read'):
     example_list = []
     for _ in range(num_threads):
-      if read_batch_size > 1:
-        keys, examples_proto = reader().read_up_to(file_name_queue,
-                                                   read_batch_size)
-      else:
-        keys, examples_proto = reader().read(file_name_queue)
+      keys, examples_proto = utils.smart_cond(
+          read_batch_size > 1,
+          lambda: reader().read_up_to(file_name_queue, read_batch_size),
+          lambda: reader().read(file_name_queue))
+
       if filter_fn:
         mask = filter_fn(keys, examples_proto)
         keys = array_ops.boolean_mask(keys, mask)
@@ -379,14 +399,15 @@ def _read_keyed_batch_examples_helper(file_pattern,
             capacity=1, dtypes=[dtypes.string], shapes=[[]])
         enqueue_op = file_name_queue.enqueue(
             input_pipeline_ops.seek_next(
-                file_names, shuffle=randomize_input, num_epochs=num_epochs,
+                file_names,
+                shuffle=randomize_input,
+                num_epochs=num_epochs,
                 seed=seed))
         queue_runner.add_queue_runner(
             queue_runner.QueueRunner(file_name_queue, [enqueue_op]))
       else:
         file_name_queue = input_ops.string_input_producer(
-            constant_op.constant(
-                file_names, name='input'),
+            constant_op.constant(file_names, name='input'),
             shuffle=randomize_input,
             num_epochs=num_epochs,
             name=file_name_queue_scope,
@@ -496,7 +517,8 @@ def read_keyed_batch_features(file_pattern,
   """
 
   with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
-    if read_batch_size is None: read_batch_size = batch_size
+    if read_batch_size is None:
+      read_batch_size = batch_size
     keys, examples = read_keyed_batch_examples(
         file_pattern,
         batch_size,
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
index 6f0fd9a297..e11e8b698a 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
@@ -204,8 +204,7 @@ class GraphIOTest(test.TestCase):
     shape = (0,)
     features = {
         "feature":
-            parsing_ops.FixedLenFeature(
-                shape=shape, dtype=dtypes_lib.float32)
+            parsing_ops.FixedLenFeature(shape=shape, dtype=dtypes_lib.float32)
     }
 
     with ops.Graph().as_default() as g, self.test_session(graph=g) as sess:
@@ -255,8 +254,8 @@ class GraphIOTest(test.TestCase):
       self.assertAllEqual((None,), inputs.get_shape().as_list())
       self.assertEqual("%s:1" % name, inputs.name)
       file_name_queue_name = "%s/file_name_queue" % name
-      file_name_queue_limit_name = ("%s/limit_epochs/epochs" %
-                                    file_name_queue_name)
+      file_name_queue_limit_name = (
+          "%s/limit_epochs/epochs" % file_name_queue_name)
       file_names_name = "%s/input" % file_name_queue_name
       example_queue_name = "%s/random_shuffle_queue" % name
       op_nodes = test_util.assert_ops_in_graph({
@@ -354,8 +353,8 @@ class GraphIOTest(test.TestCase):
     json_lines = [
         "".join([
             '{"features": { "feature": { "sequence": {',
-            '"bytes_list": { "value": ["', base64.b64encode(l).decode("ascii"),
-            '"]}}}}}\n'
+            '"bytes_list": { "value": ["',
+            base64.b64encode(l).decode("ascii"), '"]}}}}}\n'
         ]) for l in lines
     ]
     return self._create_temp_file("".join(json_lines))
@@ -823,6 +822,31 @@ class GraphIOTest(test.TestCase):
       coord.request_stop()
       coord.join(threads)
 
+  def test_read_keyed_batch_features_shared_queue(self):
+    batch_size = 17
+    shape = (0,)
+    fixed_feature = parsing_ops.FixedLenFeature(
+        shape=shape, dtype=dtypes_lib.float32)
+    feature = {"feature": fixed_feature}
+    reader = io_ops.TFRecordReader
+
+    _, queued_feature = graph_io.read_keyed_batch_features_shared_queue(
+        _VALID_FILE_PATTERN, batch_size, feature, reader)
+
+    with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
+      features_result = graph_io.read_batch_features(
+          _VALID_FILE_PATTERN, batch_size, feature, reader)
+      session.run(variables.local_variables_initializer())
+
+    self.assertAllEqual(
+        queued_feature.get("feature").get_shape().as_list(),
+        features_result.get("feature").get_shape().as_list())
+
+  def test_get_file_names_errors(self):
+    # Raise bad file_pattern.
+    with self.assertRaises(ValueError):
+      graph_io._get_file_names([], True)
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 4ff0f280053187e6360f0812198813ed576d6b62 Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Mon, 4 Dec 2017 17:36:48 -0800
Subject: [PATCH 0622/1225] Reproduce an issue with MonitoredSession when
 saving a variable on a GPU.

Also arrange for continuous testing with GPUs.

PiperOrigin-RevId: 177895214
---
 tensorflow/python/BUILD                            | 14 +++++++-------
 .../python/training/monitored_session_test.py      | 14 ++++++++++++++
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 9bddf7d161..cd11be9341 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3773,16 +3773,11 @@ py_test(
     ],
 )
 
-py_test(
+cuda_py_test(
     name = "monitored_session_test",
     size = "medium",
     srcs = ["training/monitored_session_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_windows",
-        "notsan",  # b/67945581
-    ],
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client_testlib",
         ":control_flow_ops",
@@ -3797,6 +3792,11 @@ py_test(
         "//tensorflow/contrib/testing:testing_py",
         "//tensorflow/core:protos_all_py",
     ],
+    tags = [
+        "multi_gpu",
+        "no_windows",
+        "notsan",  # b/67945581
+    ],
 )
 
 py_test(
diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py
index 159b2d5c16..349d8537cb 100644
--- a/tensorflow/python/training/monitored_session_test.py
+++ b/tensorflow/python/training/monitored_session_test.py
@@ -36,6 +36,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -1968,6 +1969,19 @@ class MonitoredSessionTest(test.TestCase):
           self.assertEqual(2, trace_the_exception['side_effect_counter'])
           self.assertNear(0.62, session.run(graph_state), 0.1)
 
+  def test_saver_on_a_gpu(self):
+    if not test_util.is_gpu_available():
+      return
+    with ops.Graph().as_default():
+      with self.test_session():
+        with ops.device('/gpu:0'):
+          variables.Variable(0)
+        saver_lib.Saver()
+
+        # TODO(b/36964652): Reproduces the issue that needs to be fixed.
+        with self.assertRaises(errors_impl.InvalidArgumentError):
+          monitored_session.MonitoredSession()
+
 
 class SingularMonitoredSessionTest(test.TestCase):
   """Tests SingularMonitoredSession."""
-- 
GitLab


From b7affdee5d3baa3c98084f254510d65c7f8a3860 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 4 Dec 2017 17:46:24 -0800
Subject: [PATCH 0623/1225] [TF2XLA] Change the implementation of Diag and
 MatrixDiag to use arithmetic rather than Pad.

PiperOrigin-RevId: 177896187
---
 tensorflow/compiler/tests/unary_ops_test.py   | 21 +++++
 tensorflow/compiler/tf2xla/kernels/BUILD      |  1 +
 tensorflow/compiler/tf2xla/kernels/diag_op.cc | 92 +++++++++++++++----
 3 files changed, 97 insertions(+), 17 deletions(-)

diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index a9a3f4f97f..b96770bd46 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -76,6 +76,12 @@ class UnaryOpsTest(XLATestCase):
           array_ops.diag_part,
           np.arange(36).reshape([2, 3, 2, 3]).astype(dtype),
           np.array([[0, 7, 14], [21, 28, 35]], dtype=dtype))
+      self._assertOpOutputMatchesExpected(
+          array_ops.diag, np.array([[1, 2], [3, 4]], dtype=dtype),
+          np.array(
+              [[[[1, 0], [0, 0]], [[0, 2], [0, 0]]], [[[0, 0], [3, 0]],
+                                                      [[0, 0], [0, 4]]]],
+              dtype=dtype))
 
       self._assertOpOutputMatchesExpected(
           array_ops.identity,
@@ -86,6 +92,21 @@ class UnaryOpsTest(XLATestCase):
           array_ops.matrix_diag,
           np.array([[1, 2], [3, 4]], dtype=dtype),
           np.array([[[1, 0], [0, 2]], [[3, 0], [0, 4]]], dtype=dtype))
+      self._assertOpOutputMatchesExpected(
+          array_ops.matrix_diag, np.array([1, 2, 3, 4], dtype=dtype),
+          np.array(
+              [[1, 0, 0, 0], [0, 2, 0, 0], [0, 0, 3, 0], [0, 0, 0, 4]],
+              dtype=dtype))
+      self._assertOpOutputMatchesExpected(
+          array_ops.matrix_diag,
+          np.array(
+              [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=dtype),
+          np.array(
+              [[[[1, 0, 0], [0, 2, 0], [0, 0, 3]],
+                [[4, 0, 0], [0, 5, 0], [0, 0, 6]]],
+               [[[7, 0, 0], [0, 8, 0], [0, 0, 9]],
+                [[10, 0, 0], [0, 11, 0], [0, 0, 12]]]],
+              dtype=dtype))
       self._assertOpOutputMatchesExpected(
           array_ops.matrix_diag_part,
           np.arange(3 * 2 * 4).reshape([3, 2, 4]).astype(dtype),
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index a1720ff919..ed1e731681 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -86,6 +86,7 @@ tf_kernel_library(
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/lib:batch_dot",
         "//tensorflow/compiler/tf2xla/lib:cholesky",
+        "//tensorflow/compiler/tf2xla/lib:util",
         "//tensorflow/compiler/tf2xla/ops:sendrecv_ops",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
diff --git a/tensorflow/compiler/tf2xla/kernels/diag_op.cc b/tensorflow/compiler/tf2xla/kernels/diag_op.cc
index ec5017f6ab..765ea922a5 100644
--- a/tensorflow/compiler/tf2xla/kernels/diag_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/diag_op.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/compiler/tf2xla/lib/util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -22,6 +24,62 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+// Create a diagonal / batch diagonal matrix with 'input' on the diagonal.
+xla::StatusOr<xla::ComputationDataHandle> CreateDiagonal(
+    const xla::ComputationDataHandle& input, int64 last_dim_size,
+    tensorflow::gtl::ArraySlice<int64> other_dims, XlaOpKernelContext* ctx,
+    xla::ComputationBuilder* builder) {
+  // Create two matrices that have the following forms, and compare them:
+  //
+  // [[0, 0, 0, 0]            [[0, 1, 2, 3]
+  //  [1, 1, 1, 1]             [0, 1, 2, 3]
+  //  [2, 2, 2, 2]             [0, 1, 2, 3]
+  //  [3, 3, 3, 3]]            [0, 1, 2, 3]]
+  //
+  // This produces a predicate matrix of the right size, with "true" on the
+  // diagonal.
+  xla::ComputationDataHandle iota;
+  TF_RETURN_IF_ERROR(
+      XlaHelpers::Iota(builder, DataType::DT_INT32, last_dim_size, &iota));
+  xla::ComputationDataHandle iota_broadcast =
+      builder->Broadcast(iota, {last_dim_size});
+  xla::ComputationDataHandle mask = builder->Eq(iota_broadcast, iota, {0});
+
+  // If this is a batched diagonal, broadcast the mask across the other
+  // dimensions.
+  if (!other_dims.empty()) {
+    mask = builder->Broadcast(mask, other_dims);
+  }
+
+  // Broadcast the input, and then use the mask computed above to select the
+  // diagonal:
+  // e.g, in 2D:
+  //         [[t, f, f]    [[1, 1, 1]    [[0, 0, 0]      [[1, 0, 0]
+  // select(  [f, t, f]  ,  [4, 4, 4]  ,  [0, 0, 0]  ) =  [0, 4, 0]
+  //          [f, f, t]]    [9, 9, 9]]    [0, 0, 0]]      [0, 0, 9]]
+  //
+  // Broadcasting the input is less-than-trivial, since we need to broadcast
+  // into a "middle" dimension. We can do this with a reshape + implicit
+  // broadcast.
+  // TODO(b/30112114): Replace with in-dim broadcast when those are supported.
+  std::vector<int64> broadcast_dims(other_dims.begin(), other_dims.end());
+  broadcast_dims.push_back(1LL);
+  broadcast_dims.push_back(last_dim_size);
+  xla::ComputationDataHandle input_broadcast =
+      builder->Reshape(input, broadcast_dims);
+
+  broadcast_dims[broadcast_dims.size() - 2] = last_dim_size;
+  xla::PrimitiveType element_type;
+  TF_RETURN_IF_ERROR(
+      DataTypeToPrimitiveType(ctx->input_type(0), &element_type));
+  auto broadcast_shape =
+      xla::ShapeUtil::MakeShape(element_type, broadcast_dims);
+  xla::ComputationDataHandle zeros = Zeros(builder, broadcast_shape);
+
+  input_broadcast = builder->Add(input_broadcast, zeros);
+  return builder->Select(mask, input_broadcast, zeros);
+}
+
 class DiagOp : public XlaOpKernel {
  public:
   explicit DiagOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
@@ -29,6 +87,8 @@ class DiagOp : public XlaOpKernel {
   void Compile(XlaOpKernelContext* ctx) override {
     xla::ComputationBuilder* builder = ctx->builder();
 
+    OP_REQUIRES(ctx, ctx->num_inputs() >= 1,
+                errors::InvalidArgument("Diag op must have at an input"));
     const TensorShape input_shape = ctx->InputShape(0);
 
     auto dims = input_shape.dim_sizes();
@@ -36,7 +96,7 @@ class DiagOp : public XlaOpKernel {
                 errors::InvalidArgument("Expected 1 <= dims, got shape ",
                                         input_shape.DebugString()));
 
-    xla::ComputationDataHandle diag = ctx->Input(0);
+    xla::ComputationDataHandle input = ctx->Input(0);
 
     // Picture:
     // tf.diag([1, 2, 3, 4]) ==> [[1, 0, 0, 0]
@@ -46,13 +106,13 @@ class DiagOp : public XlaOpKernel {
 
     // Flattens the input to 1D.
     int64 size = input_shape.num_elements();
-    diag = builder->Reshape(diag, {size});
+    input = builder->Reshape(input, {size});
 
-    // Adds inter-element padding of 'size'.
-    xla::PaddingConfig config;
-    auto* dim = config.add_dimensions();
-    dim->set_interior_padding(size);
-    diag = builder->Pad(diag, XlaHelpers::Zero(builder, input_type(0)), config);
+    // Create an R2 with the R1 diagonal.
+    auto diag_or_status =
+        CreateDiagonal(input, size, /*other_dims=*/{}, ctx, builder);
+    OP_REQUIRES_OK(ctx, diag_or_status.status());
+    xla::ComputationDataHandle diag = diag_or_status.ValueOrDie();
 
     // Reshapes to the final shape.
     std::vector<int64> new_dims(dims.size() * 2);
@@ -141,6 +201,8 @@ class MatrixDiagOp : public XlaOpKernel {
   void Compile(XlaOpKernelContext* ctx) override {
     xla::ComputationBuilder* builder = ctx->builder();
 
+    OP_REQUIRES(ctx, ctx->num_inputs() >= 1,
+                errors::InvalidArgument("MatrixDiag op must have at an input"));
     const TensorShape input_shape = ctx->InputShape(0);
 
     auto dims = input_shape.dim_sizes();
@@ -152,17 +214,13 @@ class MatrixDiagOp : public XlaOpKernel {
 
     int last_dim = dims.size() - 1;
     int64 last_dim_size = input_shape.dim_size(last_dim);
+    tensorflow::gtl::ArraySlice<int64> other_dims(dims);
+    other_dims.pop_back();
 
-    // Adds inter-element padding of 'last_dim_size' to the last dimension.
-    xla::PaddingConfig config = xla::MakeNoPaddingConfig(dims.size());
-    auto* dim = config.mutable_dimensions(last_dim);
-    dim->set_interior_padding(last_dim_size);
-    diag = builder->Pad(diag, XlaHelpers::Zero(builder, input_type(0)), config);
-
-    // Reshapes to the final shape.
-    dims.push_back(last_dim_size);
-    diag = builder->Reshape(diag, dims);
-
+    auto diag_or_status =
+        CreateDiagonal(diag, last_dim_size, other_dims, ctx, builder);
+    OP_REQUIRES_OK(ctx, diag_or_status.status());
+    diag = diag_or_status.ValueOrDie();
     ctx->SetOutput(0, diag);
   }
 };
-- 
GitLab


From 201d8d839f3384cfecea78a098764afceba82c78 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Mon, 4 Dec 2017 18:28:47 -0800
Subject: [PATCH 0624/1225] Enable transferring a tuple literal to a replicated
 device. Use ShapedBuffer to allocate required memory for the shape, then
 transfer the literal to the allocated addresses on each replica. Also, add
 Allocate() method to ShapedBuffer.

PiperOrigin-RevId: 177900588
---
 tensorflow/compiler/xla/service/service.cc    | 75 +++++++++++++------
 .../compiler/xla/service/shaped_buffer.cc     | 55 ++++++++------
 .../compiler/xla/service/shaped_buffer.h      | 13 +++-
 3 files changed, 94 insertions(+), 49 deletions(-)

diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index fa62080be4..61b3d3e0fe 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -1042,18 +1042,29 @@ tensorflow::Status Service::TransferToClient(const TransferToClientRequest* arg,
   return tensorflow::Status::OK();
 }
 
+namespace {
+
+// Creates a clone of the given shaped buffer with the given device ordinal. The
+// shape and DeviceMemoryBase values of the clone are identical to the original.
+std::unique_ptr<ShapedBuffer> CloneShapedBufferOnDevice(
+    const ShapedBuffer& shaped_buffer, int device_ordinal) {
+  auto clone = MakeUnique<ShapedBuffer>(
+      shaped_buffer.shape(), shaped_buffer.platform(), device_ordinal);
+  ShapeUtil::ForEachSubshape(
+      shaped_buffer.shape(), [&clone, &shaped_buffer](const Shape& /*subshape*/,
+                                                      const ShapeIndex& index) {
+        clone->AddBufferAtIndex(shaped_buffer.buffer(index), index);
+      });
+  return clone;
+}
+
+}  // namespace
+
 tensorflow::Status Service::TransferToServer(const TransferToServerRequest* arg,
                                              TransferToServerResponse* result) {
   Literal literal = Literal(arg->literal());
   const Shape& shape = literal.shape();
 
-  if (ShapeUtil::IsTuple(shape) && options_.number_of_replicas() > 1) {
-    // TODO(b/32990684): Tuple transfers to host end up allocating further
-    // buffers - implement that correctly.
-    return Unimplemented(
-        "Tuple transfers to the device not supported with replication.");
-  }
-
   std::vector<se::StreamExecutor*> replicas;
   if (arg->has_device_handle()) {
     TF_ASSIGN_OR_RETURN(replicas,
@@ -1063,24 +1074,44 @@ tensorflow::Status Service::TransferToServer(const TransferToServerRequest* arg,
         replicas, Replicas(*execute_backend_, SingleComputationDeviceHandle()));
   }
 
-  // Allocate memory on the device, using the stream executor. The size of the
-  // allocation is obtained by examining the shape of the literal passed from
-  // the client. An allocation handle is returned in the response.
-  int64 allocation_size =
-      execute_backend_->transfer_manager()->GetByteSizeRequirement(shape);
-
-  TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase allocation,
-                      execute_backend_->memory_allocator()->Allocate(
-                          replicas[0]->device_ordinal(), allocation_size));
-
+  // All memory allocation is done on the first replica. The allocations in all
+  // other replicas mirror the firsts'.
+  int master_device_ordinal = replicas[0]->device_ordinal();
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> shaped_buffer,
+      ShapedBuffer::Allocate(
+          shape, execute_backend_->memory_allocator(), master_device_ordinal,
+          [this](const Shape& shape) {
+            return execute_backend_->transfer_manager()->GetByteSizeRequirement(
+                shape);
+          }));
+
+  // The allocation tracker only keeps track of the top-level buffer of the
+  // shape so pass in the buffer at shape index {}.
+  // TODO(b/37515654): Allocation tracker should hold a ShapedBuffer.
   *result->mutable_data() = allocation_tracker_.Register(
-      execute_backend_.get(), replicas[0]->device_ordinal(), allocation, shape,
-      StrCat("TransferToServer literal of size ", allocation_size));
+      execute_backend_.get(), master_device_ordinal,
+      shaped_buffer->buffer(/*index=*/{}), shape,
+      StrCat("TransferToServer literal of shape ",
+             ShapeUtil::HumanString(shape)));
 
+  // Transfer the data to the replicas.
   for (se::StreamExecutor* executor : replicas) {
-    TF_RETURN_IF_ERROR(
-        execute_backend_->transfer_manager()->TransferLiteralToDevice(
-            executor, literal, &allocation));
+    if (executor->device_ordinal() == master_device_ordinal) {
+      TF_RETURN_IF_ERROR(
+          execute_backend_->transfer_manager()->TransferLiteralToDevice(
+              executor, literal, *shaped_buffer));
+    } else {
+      // The replica is not the master. Create an cloned shaped buffer with
+      // the replica's device ordinal. This is required because
+      // TransferLiteralToDevice verifies that the device ordinal of the shaped
+      // buffer matches that of the executor.
+      std::unique_ptr<ShapedBuffer> clone =
+          CloneShapedBufferOnDevice(*shaped_buffer, executor->device_ordinal());
+      TF_RETURN_IF_ERROR(
+          execute_backend_->transfer_manager()->TransferLiteralToDevice(
+              executor, literal, *clone));
+    }
   }
   return tensorflow::Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc
index a7539a1a11..aa0a24a283 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.cc
+++ b/tensorflow/compiler/xla/service/shaped_buffer.cc
@@ -51,6 +51,34 @@ ShapedBuffer::MakeArrayShapedBuffer(const Shape& shape,
   return std::move(shaped_buffer);
 }
 
+/* static */ StatusOr<std::unique_ptr<ShapedBuffer>> ShapedBuffer::Allocate(
+    const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
+    const std::function<int64(const Shape&)>& shape_size_fn) {
+  if (!LayoutUtil::HasLayout(shape)) {
+    return InvalidArgument("Shape must have a layout: %s",
+                           ShapeUtil::HumanStringWithLayout(shape).c_str());
+  }
+  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(shape));
+  auto shaped_buffer = WrapUnique(
+      new ShapedBuffer(shape, allocator->platform(), device_ordinal));
+
+  // Allocate an appropriate sized buffer for each element in the shape
+  // including the tuple pointer arrays.
+  for (auto& pair : shaped_buffer->shape_index_to_buffer_entry_) {
+    const ShapeIndex& index = pair.first;
+    size_t& buffer_entry = pair.second;
+    TF_ASSIGN_OR_RETURN(
+        se::DeviceMemoryBase memory_base,
+        allocator->Allocate(shaped_buffer->device_ordinal(),
+                            shape_size_fn(ShapeUtil::GetSubshape(
+                                shaped_buffer->shape(), index))));
+    shaped_buffer->buffers_.push_back(memory_base);
+    buffer_entry = shaped_buffer->buffers_.size() - 1;
+  }
+
+  return std::move(shaped_buffer);
+}
+
 ShapedBuffer::ShapedBuffer(const Shape& shape, const se::Platform* platform,
                            int device_ordinal)
     : shape_(shape),
@@ -109,29 +137,10 @@ std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer) {
 ScopedShapedBuffer::Allocate(
     const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
     const std::function<int64(const Shape&)>& shape_size_fn) {
-  if (!LayoutUtil::HasLayout(shape)) {
-    return InvalidArgument("Shape must have a layout: %s",
-                           ShapeUtil::HumanStringWithLayout(shape).c_str());
-  }
-  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(shape));
-  auto shaped_buffer =
-      WrapUnique(new ScopedShapedBuffer(shape, allocator, device_ordinal));
-
-  // Allocate an appropriate sized buffer for each element in the shape
-  // including the tuple pointer arrays.
-  for (auto& pair : shaped_buffer->shape_index_to_buffer_entry_) {
-    const ShapeIndex& index = pair.first;
-    size_t& buffer_entry = pair.second;
-    TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase memory_base,
-                        shaped_buffer->allocator_->Allocate(
-                            shaped_buffer->device_ordinal(),
-                            shape_size_fn(ShapeUtil::GetSubshape(
-                                shaped_buffer->shape(), index))));
-    shaped_buffer->buffers_.push_back(memory_base);
-    buffer_entry = shaped_buffer->buffers_.size() - 1;
-  }
-
-  return std::move(shaped_buffer);
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> unscoped_buffer,
+      ShapedBuffer::Allocate(shape, allocator, device_ordinal, shape_size_fn));
+  return MakeScoped(unscoped_buffer.get(), allocator);
 }
 
 /* static */
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h
index fa88caa13f..ca8bfff674 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.h
+++ b/tensorflow/compiler/xla/service/shaped_buffer.h
@@ -43,6 +43,14 @@ class ShapedBuffer {
       const Shape& shape, const perftools::gputools::Platform* platform,
       int device_ordinal, const perftools::gputools::DeviceMemoryBase& buffer);
 
+  // Return a newly allocated ShapedBuffer of an arbitrary shape. Array buffers
+  // (leaves in the shape) are allocated and uninitialized. Tuple buffers (if
+  // any) are allocated and initialized to the backend-specific representation
+  // of an array of pointers to the tuple elements.
+  static StatusOr<std::unique_ptr<ShapedBuffer>> Allocate(
+      const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
+      const std::function<int64(const Shape&)>& shape_size_fn);
+
   ShapedBuffer(const Shape& shape,
                const perftools::gputools::Platform* platform,
                int device_ordinal);
@@ -110,10 +118,7 @@ std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer);
 // destructed.
 class ScopedShapedBuffer : public ShapedBuffer {
  public:
-  // Return a newly allocated ScopedShapedBuffer of an arbitrary shape. Array
-  // buffers (leaves in the shape) are allocated and uninitialized. Tuple
-  // buffers (if any) are allocated and initialized to the backend-specific
-  // representation of an array of pointers to the tuple elements.
+  // Identical to ShapedBuffer::Allocate.
   static StatusOr<std::unique_ptr<ScopedShapedBuffer>> Allocate(
       const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
       const std::function<int64(const Shape&)>& shape_size_fn);
-- 
GitLab


From 367f54938464bbe3ed35bb311f686c7ae3448662 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=BF=97=E8=B1=AA?= <izhangzhihao@hotmail.com>
Date: Tue, 5 Dec 2017 11:13:29 +0800
Subject: [PATCH 0625/1225] variables.get_global_step() is deprecated,  use
 `training_util.get_global_step()` instead

error message:

```
tensorflow/contrib/timeseries/python/timeseries/head.py:63: get_global_step (from tensorflow.contrib.framework.python.ops.variables) is deprecated and will be removed in a future version.
```
---
 tensorflow/contrib/timeseries/python/timeseries/head.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py
index 5896fc2a20..f0330bfbbd 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/head.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/head.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import re
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 
 from tensorflow.contrib.timeseries.python.timeseries import feature_keys
@@ -79,7 +79,7 @@ class _TimeSeriesRegressionHead(head_lib._Head):  # pylint:disable=protected-acc
 
     train_op = optimizers.optimize_loss(
         model_outputs.loss,
-        global_step=variables.get_global_step(),
+        global_step=training_util.get_global_step(),
         optimizer=self.optimizer,
         # Learning rate is set in the Optimizer object
         learning_rate=None)
-- 
GitLab


From c9038c93426fe5090322a8be10126f27ec255d07 Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Mon, 4 Dec 2017 20:29:04 -0800
Subject: [PATCH 0626/1225] Generates a warning if the global step is not
 increased.

PiperOrigin-RevId: 177908680
---
 .../training/basic_session_run_hooks.py       | 26 ++++++++++++++++++
 .../training/basic_session_run_hooks_test.py  | 27 ++++++++++++++++---
 2 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py
index 1fb00343ef..b499cdf7f8 100644
--- a/tensorflow/python/training/basic_session_run_hooks.py
+++ b/tensorflow/python/training/basic_session_run_hooks.py
@@ -514,6 +514,8 @@ class StepCounterHook(session_run_hook.SessionRunHook):
 
     self._summary_writer = summary_writer
     self._output_dir = output_dir
+    self._last_global_step = None
+    self._global_step_check_count = 0
 
   def begin(self):
     if self._summary_writer is None and self._output_dir:
@@ -545,6 +547,30 @@ class StepCounterHook(session_run_hook.SessionRunHook):
             self._summary_writer.add_summary(summary, global_step)
           logging.info("%s: %g", self._summary_tag, steps_per_sec)
 
+    # Check whether the global step has been increased. Here, we do not use the
+    # timer.last_triggered_step as the timer might record a different global
+    # step value such that the comparison could be unreliable. For simplicity,
+    # we just compare the stale_global_step with previously recorded version.
+    if stale_global_step == self._last_global_step:
+      # Here, we use a counter to count how many times we have observed that the
+      # global step has not been increased. For some Optimizers, the global step
+      # is not increased each time by design. For example, SyncReplicaOptimizer
+      # doesn't increase the global step in worker's main train step.
+      self._global_step_check_count += 1
+      if self._global_step_check_count % 20 == 0:
+        self._global_step_check_count = 0
+        logging.warning(
+            "It seems that global step (tf.train.get_global_step) has not "
+            "been increased. Current value (could be stable): %s vs previous "
+            "value: %s. You could increase the global step by passing "
+            "tf.train.get_global_step() to Optimizer.apply_gradients or "
+            "Optimizer.minimize.", stale_global_step, self._last_global_step)
+    else:
+      # Whenever we observe the increment, reset the counter.
+      self._global_step_check_count = 0
+
+    self._last_global_step = stale_global_step
+
 
 class NanLossDuringTrainingError(RuntimeError):
 
diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py
index e7ff7e1221..2547661e52 100644
--- a/tensorflow/python/training/basic_session_run_hooks_test.py
+++ b/tensorflow/python/training/basic_session_run_hooks_test.py
@@ -780,9 +780,12 @@ class StepCounterHookTest(test.TestCase):
       hook.begin()
       sess.run(variables_lib.global_variables_initializer())
       mon_sess = monitored_session._HookedSession(sess, [hook])
-      for _ in range(30):
-        time.sleep(0.01)
-        mon_sess.run(train_op)
+      with test.mock.patch.object(tf_logging, 'warning') as mock_log:
+        for _ in range(30):
+          time.sleep(0.01)
+          mon_sess.run(train_op)
+        # logging.warning should not be called.
+        self.assertIsNone(mock_log.call_args)
       hook.end(sess)
       summary_writer.assert_summaries(
           test_case=self,
@@ -857,6 +860,24 @@ class StepCounterHookTest(test.TestCase):
       summary_value = summary_writer.summaries[2][0].value[0]
       self.assertEqual('bar/foo/sec', summary_value.tag)
 
+  def test_log_warning_if_global_step_not_increased(self):
+    with ops.Graph().as_default(), session_lib.Session() as sess:
+      variables.get_or_create_global_step()
+      train_op = training_util._increment_global_step(0)  # keep same.
+      sess.run(variables_lib.global_variables_initializer())
+      hook = basic_session_run_hooks.StepCounterHook(
+          every_n_steps=1, every_n_secs=None)
+      hook.begin()
+      mon_sess = monitored_session._HookedSession(sess, [hook])
+      mon_sess.run(train_op)  # Run one step to record global step.
+      with test.mock.patch.object(tf_logging, 'warning') as mock_log:
+        for _ in range(30):
+          mon_sess.run(train_op)
+        self.assertRegexpMatches(
+            str(mock_log.call_args),
+            'global step.*has not been increased')
+      hook.end(sess)
+
 
 class SummarySaverHookTest(test.TestCase):
 
-- 
GitLab


From c486a9177192f652320d37a5fdf33ab9a3a789f2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 00:08:02 -0800
Subject: [PATCH 0627/1225] Fix bugs in neutral element code and add more unit
 tests to cover matmul with input shape != output shape.

PiperOrigin-RevId: 177920882
---
 .../grappler/optimizers/constant_folding.cc   | 54 +++++-------
 .../optimizers/constant_folding_test.cc       | 86 +++++++++++--------
 2 files changed, 74 insertions(+), 66 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 84f3cc9df7..a8c3f897d6 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1254,8 +1254,11 @@ void ConstantFolding::ReplaceAddOrMulWithIdentity(int input_to_forward,
 Status ConstantFolding::ReplaceAddOrMulWithConstant(
     double value, const TensorShapeProto& shape, NodeDef* node) {
   AttrValue tensor_attr;
-  TF_RETURN_IF_ERROR(CreateConstantTensorAttrValue(node->attr().at("T").type(),
-                                                   value, shape, &tensor_attr));
+  AttrValue dtype_attr = node->attr().at("T");
+  TF_RETURN_IF_ERROR(CreateConstantTensorAttrValue(dtype_attr.type(), value,
+                                                   shape, &tensor_attr));
+  node->clear_attr();
+  node->mutable_attr()->insert({"dtype", dtype_attr});
   node->mutable_attr()->insert({"value", tensor_attr});
   node->set_op("Const");
   // Convert all inputs to control dependencies.
@@ -1333,55 +1336,44 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
           properties.GetOutputProperties(node.name())[0].shape();
       const TensorShapeProto& x_shape =
           properties.GetInputProperties(node.name())[0].shape();
-
-      // Simplify multiplication by or addition of zeros.
-      const bool x_is_zero = IsZeros(*x);
-      const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
-      if (x_is_zero) {
-        if ((is_mul && x_matches_output_shape) || is_matmul) {
-          // 0 * y = 0
-          ReplaceAddOrMulWithIdentity(0, &node);
-        } else {
-          // 0 + y = y.
-          ReplaceAddOrMulWithIdentity(1, &node);
-        }
-        continue;
-      }
       const TensorShapeProto& y_shape =
           properties.GetInputProperties(node.name())[1].shape();
+      const bool x_is_zero = IsZeros(*x);
+      const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
       const bool y_is_zero = IsZeros(*y);
       const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape);
-      if (y_is_zero) {
-        if ((is_mul && y_matches_output_shape) || is_matmul) {
-          // x * 0 = 0
+
+      // Simplify addition of zeros.
+      if (is_add) {
+        if (x_is_zero && y_matches_output_shape) {
+          // 0 + y = y.
           ReplaceAddOrMulWithIdentity(1, &node);
-        } else {
+          continue;
+        } else if (y_is_zero && x_matches_output_shape) {
           // x + 0 = y.
           ReplaceAddOrMulWithIdentity(0, &node);
+          continue;
         }
-        continue;
       }
 
+      // Simplify element-wise multiplication by ones.
       if (is_mul) {
-        // Simplify scalar multiplication by zeros where, due to broadcasting,
-        // the output shape does not match the shape of the zero input.
-        if (x_is_zero || y_is_zero) {
-          TF_RETURN_IF_ERROR(
-              ReplaceAddOrMulWithConstant(0, output_shape, &node));
-          continue;
-        }
-
-        // Simplify multiplication by ones.
         if (IsOnes(*x) && y_matches_output_shape) {
           // 1 * y = y.
           ReplaceAddOrMulWithIdentity(1, &node);
           continue;
-        } else if (IsOnes(*y) && x_matches_output_shape) {
+        }
+        if (IsOnes(*y) && x_matches_output_shape) {
           // x * 1 = x.
           ReplaceAddOrMulWithIdentity(0, &node);
           continue;
         }
       }
+
+      // Simplify multiplication and matmul by zeros.
+      if (x_is_zero || y_is_zero) {
+        TF_RETURN_IF_ERROR(ReplaceAddOrMulWithConstant(0, output_shape, &node));
+      }
     }
   }
   return Status::OK();
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index a17ec733ea..4bd50e48d2 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -84,6 +84,10 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
                                 ops::Placeholder::Shape(TensorShape({2, 2})));
     Output y = ops::Placeholder(s.WithOpName("y"), DT_FLOAT,
                                 ops::Placeholder::Shape(TensorShape({2, 2})));
+    Output a = ops::Placeholder(s.WithOpName("a"), DT_FLOAT,
+                                ops::Placeholder::Shape(TensorShape({3, 2})));
+    Output b = ops::Placeholder(s.WithOpName("b"), DT_FLOAT,
+                                ops::Placeholder::Shape(TensorShape({2, 3})));
     Output zeros = !use_const ? ops::ZerosLike(s.WithOpName("zeros"), x)
                               : ops::Const(s.WithOpName("zeros"), 0.0f, {2, 2});
     Output zeros_broadcast =
@@ -94,16 +98,20 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
     Output mul2 = ops::Mul(s.WithOpName("mul2"), zeros, y);
     Output mul3 = ops::Mul(s.WithOpName("mul3"), x, ones);
     Output mul4 = ops::Mul(s.WithOpName("mul4"), ones, y);
-    Output mul5 = ops::Mul(s.WithOpName("mul1"), x, zeros_broadcast);
-    Output mul6 = ops::Mul(s.WithOpName("mul2"), zeros_broadcast, y);
+    Output mul5 = ops::Mul(s.WithOpName("mul5"), x, zeros_broadcast);
+    Output mul6 = ops::Mul(s.WithOpName("mul6"), zeros_broadcast, y);
     Output matmul1 = ops::MatMul(s.WithOpName("matmul1"), x, zeros);
     Output matmul2 = ops::MatMul(s.WithOpName("matmul2"), zeros, y);
+    Output matmul3 = ops::MatMul(s.WithOpName("matmul3"), a, zeros);
+    Output matmul4 = ops::MatMul(s.WithOpName("matmul4"), zeros, b);
     Output add1 = ops::Add(s.WithOpName("add1"), x, zeros);
     Output add2 = ops::Add(s.WithOpName("add2"), zeros, y);
-    Output addn =
-        ops::AddN(s, {mul1, mul2, mul3, mul4, matmul1, matmul2, add1, add2});
+    Output addn = ops::AddN(
+        s.WithOpName("addn"),
+        {mul1, mul2, mul3, mul4, mul5, mul6, matmul1, matmul2, add1, add2});
     GrapplerItem item;
     TF_CHECK_OK(s.ToGraphDef(&item.graph));
+    item.fetch = {"addn", "matmul3", "matmul4"};
 
     ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
                               nullptr /* cpu_device */);
@@ -111,35 +119,17 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
     Status status = optimizer.Optimize(nullptr, item, &output);
     TF_EXPECT_OK(status);
 
-    EXPECT_EQ(16, output.node_size());
+    EXPECT_EQ(20, output.node_size());
     for (int i = 0; i < output.node_size(); ++i) {
       const NodeDef& node = output.node(i);
       const string& name = node.name();
       if (name == "mul1") {
-        if (use_const) {
-          EXPECT_EQ("Const", node.op());
-          EXPECT_EQ("^x", node.input(0));
-        } else {
-          EXPECT_EQ("Identity", node.op());
-          EXPECT_EQ("zeros", node.input(0));
-          EXPECT_EQ("^x", node.input(1));
-        }
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^x", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
       } else if (name == "mul2") {
-        if (use_const) {
-          EXPECT_EQ("Const", node.op());
-          EXPECT_EQ("^y", node.input(0));
-        } else {
-          EXPECT_EQ("Identity", node.op());
-          EXPECT_EQ("zeros", node.input(0));
-          EXPECT_EQ("^y", node.input(1));
-        }
-      } else if (name == "matmul1") {
-        EXPECT_EQ("Identity", node.op());
-        EXPECT_EQ("zeros", node.input(0));
-        EXPECT_EQ("^x", node.input(1));
-      } else if (name == "matmul2") {
-        EXPECT_EQ("Identity", node.op());
-        EXPECT_EQ("zeros", node.input(0));
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^zeros", node.input(0));
         EXPECT_EQ("^y", node.input(1));
       } else if (name == "mul3") {
         EXPECT_EQ("Identity", node.op());
@@ -152,23 +142,39 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
       } else if (name == "mul5") {
         EXPECT_EQ("Const", node.op());
         EXPECT_EQ("^x", node.input(0));
-        EXPECT_EQ("^ones", node.input(1));
+        EXPECT_EQ("^zeros_broadcast", node.input(1));
+      } else if (name == "mul6") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^zeros_broadcast", node.input(0));
+        EXPECT_EQ("^y", node.input(1));
+      } else if (name == "matmul1") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^x", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
+      } else if (name == "matmul2") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^zeros", node.input(0));
+        EXPECT_EQ("^y", node.input(1));
+      } else if (name == "matmul3") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^a", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
         TensorProto t = node.attr().at("value").tensor();
         EXPECT_EQ(1, t.float_val_size());
         EXPECT_EQ(0, t.float_val(0));
         EXPECT_EQ(2, t.tensor_shape().dim_size());
-        EXPECT_EQ(1, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(3, t.tensor_shape().dim(0).size());
         EXPECT_EQ(2, t.tensor_shape().dim(1).size());
-      } else if (name == "mul6") {
+      } else if (name == "matmul4") {
         EXPECT_EQ("Const", node.op());
-        EXPECT_EQ("^y", node.input(0));
-        EXPECT_EQ("^ones", node.input(1));
+        EXPECT_EQ("^zeros", node.input(0));
+        EXPECT_EQ("^b", node.input(1));
         TensorProto t = node.attr().at("value").tensor();
         EXPECT_EQ(1, t.float_val_size());
         EXPECT_EQ(0, t.float_val(0));
         EXPECT_EQ(2, t.tensor_shape().dim_size());
-        EXPECT_EQ(1, t.tensor_shape().dim(0).size());
-        EXPECT_EQ(2, t.tensor_shape().dim(1).size());
+        EXPECT_EQ(2, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(3, t.tensor_shape().dim(1).size());
       } else if (name == "add1") {
         EXPECT_EQ("Identity", node.op());
         EXPECT_EQ("x", node.input(0));
@@ -178,6 +184,16 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
         EXPECT_EQ("y", node.input(0));
         EXPECT_EQ("^zeros", node.input(1));
       }
+      const std::set<string> square_zero_const{"mul1", "mul2",    "mul5",
+                                               "mul6", "matmul1", "matmul2"};
+      if (square_zero_const.count(name) > 0) {
+        TensorProto t = node.attr().at("value").tensor();
+        EXPECT_EQ(1, t.float_val_size());
+        EXPECT_EQ(0, t.float_val(0));
+        EXPECT_EQ(2, t.tensor_shape().dim_size());
+        EXPECT_EQ(2, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(2, t.tensor_shape().dim(1).size());
+      }
     }
   }
 }
-- 
GitLab


From 523418530fdd59b355a5c7a96cdf9c27257c1f27 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Tue, 5 Dec 2017 02:50:18 -0800
Subject: [PATCH 0628/1225] [XLA] Mark Rng as side-effecting and add a
 rematerialization test to ensure that rng instructions are not
 rematerialized. This also lists Rng as non-rematerializable.

PiperOrigin-RevId: 177932160
---
 .../compiler/xla/service/hlo_instruction.cc   |  1 +
 .../xla/service/hlo_instruction_test.cc       | 42 -----------
 .../xla/service/hlo_rematerialization_test.cc | 70 +++++++++++++++++++
 3 files changed, 71 insertions(+), 42 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 45825c7c76..b2700fdbdb 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -1000,6 +1000,7 @@ bool HloInstruction::HasSideEffect() const {
     case HloOpcode::kSendDone:
     case HloOpcode::kRecv:
     case HloOpcode::kRecvDone:
+    case HloOpcode::kRng:
     case HloOpcode::kInfeed:
     case HloOpcode::kOutfeed:
     case HloOpcode::kTrace:
diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index 11420cae63..aa3fd0cf4f 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -1091,48 +1091,6 @@ TEST_F(HloInstructionTest, CloneOfFusionPreservesShape) {
                                root2->operand(1)->operand(0)->shape()));
 }
 
-TEST_F(HloInstructionTest, IsRandomFusable) {
-  auto shape = ShapeUtil::MakeShape(F32, {2, 2});
-  {
-    auto builder = HloComputation::Builder(TestName());
-    auto hlo_module = CreateNewModule();
-    auto const0 = builder.AddInstruction(HloInstruction::CreateConstant(
-        Literal::CreateR0<float>(0.0)));
-    auto const1 = builder.AddInstruction(HloInstruction::CreateConstant(
-        Literal::CreateR0<float>(1.0)));
-    auto rng = builder.AddInstruction(HloInstruction::CreateRng(
-        shape, RandomDistribution::RNG_NORMAL, {const0, const1}));
-
-    auto* computation = hlo_module->AddEntryComputation(builder.Build());
-    computation->CreateFusionInstruction({rng, const0, const1},
-      HloInstruction::FusionKind::kLoop);
-
-    auto* root = computation->root_instruction();
-
-    EXPECT_EQ(HloOpcode::kFusion, root->opcode());
-  }
-  {
-    auto builder = HloComputation::Builder(TestName());
-    auto hlo_module = CreateNewModule();
-    auto const0 = builder.AddInstruction(HloInstruction::CreateConstant(
-        Literal::CreateR0<float>(0.0)));
-    auto const1 = builder.AddInstruction(HloInstruction::CreateConstant(
-        Literal::CreateR0<float>(1.0)));
-    auto rng = builder.AddInstruction(HloInstruction::CreateRng(
-        shape, RandomDistribution::RNG_NORMAL, {const0, const1}));
-    builder.AddInstruction(HloInstruction::CreateUnary(
-        shape, HloOpcode::kNegate, rng));
-    auto* computation = hlo_module->AddEntryComputation(builder.Build());
-    computation->CreateFusionInstruction({rng, const0, const1},
-      HloInstruction::FusionKind::kLoop);
-
-    auto* root = computation->root_instruction();
-
-    EXPECT_EQ(HloOpcode::kFusion, root->operand(0)->opcode());
-  }
-}
-
-
 TEST_F(HloInstructionTest, CloneSuffixNames) {
   // Test that the suffix string added to cloned instructions is not
   // duplicated. Rather a numeric incrementing value should be appended. That
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
index d88aa4bb56..c9b57166af 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
@@ -323,6 +323,76 @@ TEST_F(HloRematerializationTest, RematerializeNestedComputations) {
   EXPECT_EQ(inner_computation->instruction_count(), 8);
 }
 
+TEST_F(HloRematerializationTest, RngNotRematerialized) {
+  // Test that a single rng is not rematerialized:
+  //
+  // Entry computation:
+  //   F32[] %param = {...}
+  //   F32[1024] rng = rng(param)
+  //   F32[1024] tanh = tanh(rng)
+  //   F32[1024] exp = exp(rng)
+  //   F32[1024] add_0 = add(rng, tanh)              // LIVE: add_0 + rng +
+  //                                                 //       tanh + exp
+  //
+  //   F32[1024] add_1 = add(rng, add(exp, add_0))   // LIVE: add_1 + add_0 +
+  //                                                 //       rng + tanh + exp
+  //
+  //   F32[1024] add_2 = add(rng, add(tanh, add_1))  // LIVE: add_2 + add_1 +
+  //                                                 //       rng + tanh + exp
+  auto module = CreateNewModule();
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "param"));
+  auto rng = builder.AddInstruction(HloInstruction::CreateRng(
+      vec1024_shape_, RandomDistribution::RNG_BERNOULLI, {param}));
+  auto tanh = builder.AddInstruction(
+      HloInstruction::CreateUnary(vec1024_shape_, HloOpcode::kTanh, rng));
+  auto exp = builder.AddInstruction(
+      HloInstruction::CreateUnary(vec1024_shape_, HloOpcode::kExp, rng));
+  auto add_0 = builder.AddInstruction(
+      HloInstruction::CreateBinary(vec1024_shape_, HloOpcode::kAdd, rng, tanh));
+  auto add_1 = builder.AddInstruction(HloInstruction::CreateBinary(
+      vec1024_shape_, HloOpcode::kAdd, rng,
+      builder.AddInstruction(HloInstruction::CreateBinary(
+          vec1024_shape_, HloOpcode::kAdd, exp, add_0))));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      vec1024_shape_, HloOpcode::kAdd, rng,
+      builder.AddInstruction(HloInstruction::CreateBinary(
+          vec1024_shape_, HloOpcode::kAdd, tanh, add_1))));
+  HloComputation* entry_computation =
+      module->AddEntryComputation(builder.Build());
+
+  auto count_rngs = [](const HloComputation* computation) {
+    int64 rng_count = 0;
+    for (auto* instruction : computation->instructions()) {
+      if (instruction->opcode() == HloOpcode::kRng) {
+        ++rng_count;
+      }
+    }
+    return rng_count;
+  };
+  // Before rematerialization there should be a single broadcast rng in
+  // the graph.
+  ASSERT_EQ(count_rngs(entry_computation), 1);
+  const int64 original_instruction_count =
+      entry_computation->instruction_count();
+  SequentialHloOrdering::HloModuleSequence sequence;
+  // Pick a memory limit some where between 24KB (initial peak memory including
+  // parameter and output) and 20KB (peak memory possible with
+  // rematerialization).
+  TF_ASSERT_OK_AND_ASSIGN(
+      bool changed, HloRematerialization::RematerializeAndSchedule(
+                        ByteSizeOf,
+                        /*memory_limit_bytes=*/4 * ByteSizeOf(vec1024_shape_),
+                        module.get(), &sequence));
+  EXPECT_TRUE(changed);
+  // The rng should not have been rematerialized.
+  EXPECT_EQ(count_rngs(entry_computation), 1);
+  // There should have been rematerialization.
+  EXPECT_GT(entry_computation->instruction_count(), original_instruction_count);
+}
+
 TEST_F(HloRematerializationTest, InstructionRematerializedMultipleTimes) {
   // Test that a single instruction is rematerialized several times. Module:
   //
-- 
GitLab


From 4a408e88ed93df9cbea18f16ab3b6c7dd427ac7c Mon Sep 17 00:00:00 2001
From: peisong <bringtree@qq.com>
Date: Tue, 5 Dec 2017 19:56:04 +0800
Subject: [PATCH 0629/1225] fix typo

---
 tensorflow/python/debug/lib/stepper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/debug/lib/stepper.py b/tensorflow/python/debug/lib/stepper.py
index 1fa0b3dba2..c27b3f51cd 100644
--- a/tensorflow/python/debug/lib/stepper.py
+++ b/tensorflow/python/debug/lib/stepper.py
@@ -80,7 +80,7 @@ class NodeStepper(object):
   when they are required as data dependencies.
 
   The temporary directories are automatically clean when the NodeStepper
-  instance exits as a context mananger.
+  instance exits as a context manager.
 
   Once the tracing is complete, it will issue a run() call on the
   underlying session, using the aforementioned feed_dict prepared by the input
-- 
GitLab


From 500569f7f0459566977e1738944cae32d97a58ad Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Tue, 5 Dec 2017 10:06:26 -0500
Subject: [PATCH 0630/1225] Revert "Speed up safe_strtod and safe_strtof
 functions by using double-conversion library (#12102)"

This reverts commit 495bb7b9f6b55b0e431fc604ad9dbf5415016d90.
---
 tensorflow/contrib/cmake/CMakeLists.txt       |   4 -
 .../cmake/external/double_conversion.cmake    |  54 ---------
 tensorflow/contrib/makefile/Makefile          |   9 +-
 .../contrib/makefile/download_dependencies.sh |   2 -
 tensorflow/core/BUILD                         |   9 +-
 tensorflow/core/lib/strings/numbers.cc        | 112 +++++++++++++-----
 tensorflow/core/lib/strings/numbers_test.cc   |  69 -----------
 tensorflow/core/lib/strings/str_util.cc       |   8 --
 tensorflow/core/lib/strings/str_util.h        |   5 -
 tensorflow/core/lib/strings/str_util_test.cc  |   8 --
 tensorflow/workspace.bzl                      |   9 --
 11 files changed, 86 insertions(+), 203 deletions(-)
 delete mode 100644 tensorflow/contrib/cmake/external/double_conversion.cmake

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index ba708673b0..77a3fc0c83 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -160,7 +160,6 @@ include(protobuf)
 include(re2)
 include(cub)
 include(sqlite)
-include(double_conversion)
 if (tensorflow_BUILD_CC_TESTS)
   include(googletest)
 endif()
@@ -179,7 +178,6 @@ set(tensorflow_EXTERNAL_LIBRARIES
     ${protobuf_STATIC_LIBRARIES}
     ${re2_STATIC_LIBRARIES}
     ${sqlite_STATIC_LIBRARIES}
-    ${double_conversion_STATIC_LIBRARIES}
 )
 set(tensorflow_EXTERNAL_DEPENDENCIES
     zlib_copy_headers_to_destination
@@ -198,7 +196,6 @@ set(tensorflow_EXTERNAL_DEPENDENCIES
     fft2d
     re2
     sqlite_copy_headers_to_destination
-    double_conversion
 )
 
 include_directories(
@@ -221,7 +218,6 @@ include_directories(
     ${PROTOBUF_INCLUDE_DIRS}
     ${re2_INCLUDE_DIR}
     ${sqlite_INCLUDE_DIR}
-    ${double_conversion_INCLUDE_DIR}
 )
 
 if(tensorflow_ENABLE_SSL_SUPPORT)
diff --git a/tensorflow/contrib/cmake/external/double_conversion.cmake b/tensorflow/contrib/cmake/external/double_conversion.cmake
deleted file mode 100644
index 527ccdc8d8..0000000000
--- a/tensorflow/contrib/cmake/external/double_conversion.cmake
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-include (ExternalProject)
-
-set(double_conversion_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/double_conversion/src/double_conversion)
-set(double_conversion_URL https://github.com/google/double-conversion.git)
-set(double_conversion_TAG 5664746)
-set(double_conversion_BUILD ${double_conversion_INCLUDE_DIR})
-set(double_conversion_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.so)
-set(double_conversion_INCLUDES ${double_conversion_BUILD})
-
-if(WIN32)
-  set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/$(Configuration)/double-conversion.lib)
-else()
-  set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.a)
-endif()
-
-set(double_conversion_HEADERS
-    "${double_conversion_INCLUDE_DIR}/double-conversion/bignum-dtoa.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/cached-powers.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/double-conversion.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/fixed-dtoa.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/strtod.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/bignum.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/diy-fp.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/fast-dtoa.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/ieee.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/utils.h"
-)
-
-ExternalProject_Add(double_conversion
-    PREFIX double_conversion
-    GIT_REPOSITORY ${double_conversion_URL}
-    GIT_TAG ${double_conversion_TAG}
-    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
-    BUILD_IN_SOURCE 1
-    INSTALL_COMMAND ""
-    CMAKE_CACHE_ARGS
-        -DCMAKE_BUILD_TYPE:STRING=Release
-        -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-)
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index 617ef25fa4..e2e6c05591 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -89,7 +89,6 @@ HOST_INCLUDES := \
 -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/downloads/nsync/public \
 -I$(MAKEFILE_DIR)/downloads/fft2d \
--I$(MAKEFILE_DIR)/downloads/double_conversion \
 -I$(HOST_GENDIR)
 ifeq ($(HAS_GEN_HOST_PROTOC),true)
 	HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
@@ -126,9 +125,7 @@ PROTO_TEXT := $(HOST_BINDIR)proto_text
 # The list of dependencies is derived from the Bazel build file by running
 # the gen_file_lists.sh script on a system with a working Bazel setup.
 PROTO_TEXT_CC_FILES := $(shell cat $(MAKEFILE_DIR)/proto_text_cc_files.txt)
-PROTO_TEXT_PB_CC_LIST := \
-	$(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt) \
-	$(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc)
+PROTO_TEXT_PB_CC_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt)
 PROTO_TEXT_PB_H_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_h_files.txt)
 
 # Locations of the intermediate files proto_text generates.
@@ -174,7 +171,6 @@ INCLUDES := \
 -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/downloads/nsync/public \
 -I$(MAKEFILE_DIR)/downloads/fft2d \
--I$(MAKEFILE_DIR)/downloads/double_conversion \
 -I$(PROTOGENDIR) \
 -I$(PBTGENDIR)
 ifeq ($(HAS_GEN_HOST_PROTOC),true)
@@ -330,8 +326,6 @@ $(MARCH_OPTION) \
 -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/downloads/nsync/public \
 -I$(MAKEFILE_DIR)/downloads/fft2d \
--I$(MAKEFILE_DIR)/downloads/double_conversion \
--I$(MAKEFILE_DIR)/gen/protobuf/include \
 -I$(MAKEFILE_DIR)/gen/protobuf_android/$(ANDROID_ARCH)/include \
 -I$(PROTOGENDIR) \
 -I$(PBTGENDIR)
@@ -549,7 +543,6 @@ $(wildcard tensorflow/core/platform/*/*.cc) \
 $(wildcard tensorflow/core/platform/*/*/*.cc) \
 $(wildcard tensorflow/core/util/*.cc) \
 $(wildcard tensorflow/core/util/*/*.cc) \
-$(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc) \
 tensorflow/core/util/version_info.cc
 # Remove duplicates (for version_info.cc)
 CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS))
diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index 675ab24289..b610441308 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -33,7 +33,6 @@ NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.
 PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
-DOUBLE_CONVERSION_URL="$(grep -o "https.*google/double-conversion.*\.tar.gz" "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
 ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
 
 # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
@@ -82,7 +81,6 @@ download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync"
 download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf"
 download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2"
 download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d"
-download_and_extract "${DOUBLE_CONVERSION_URL}" "${DOWNLOADS_DIR}/double_conversion"
 download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl"
 
 replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 39972f3e02..d77021c3ee 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -276,9 +276,7 @@ cc_library(
         "platform/types.h",
     ] + glob(tf_additional_proto_hdrs()) + glob(tf_env_time_hdrs()),
     copts = tf_copts(),
-    deps = tf_lib_proto_parsing_deps() + [
-        "@double_conversion//:double-conversion",
-    ],
+    deps = tf_lib_proto_parsing_deps(),
 )
 
 # This build rule (along with :lib_internal, :framework, and
@@ -1026,7 +1024,6 @@ cc_library(
     deps = [
         ":protos_all_cc_impl",
         "//third_party/eigen3",
-        "@double_conversion//:double-conversion",
         "@nsync//:nsync_cpp",
         "@protobuf_archive//:protobuf",
     ],
@@ -1051,7 +1048,6 @@ cc_library(
         ":protos_all_cc_impl",
         "//third_party/eigen3",
         "//third_party/fft2d:fft2d_headers",
-        "@double_conversion//:double-conversion",
         "@fft2d//:fft2d",
         "@gemmlowp//:gemmlowp",
         "@nsync//:nsync_cpp",
@@ -1117,7 +1113,6 @@ cc_library(
     deps = [
         ":protos_all_cc_impl",
         "//third_party/eigen3",
-        "@double_conversion//:double-conversion",
         "@protobuf_archive//:protobuf",
     ],
     alwayslink = 1,
@@ -1139,7 +1134,6 @@ cc_library(
     deps = [
         ":protos_all_cc_impl",
         "//third_party/eigen3",
-        "@double_conversion//:double-conversion",
         "@nsync//:nsync_cpp",
         "@protobuf_archive//:protobuf",
     ],
@@ -1501,7 +1495,6 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:platformlib",
         "@snappy",
         "@zlib_archive//:zlib",
-        "@double_conversion//:double-conversion",
         "@protobuf_archive//:protobuf",
     ] + tf_protos_all_impl(),
 )
diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc
index b3cca504e1..302a6967e3 100644
--- a/tensorflow/core/lib/strings/numbers.cc
+++ b/tensorflow/core/lib/strings/numbers.cc
@@ -23,9 +23,6 @@ limitations under the License.
 #include <locale>
 #include <unordered_map>
 
-#include "double-conversion/double-conversion.h"
-
-#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -35,15 +32,72 @@ namespace tensorflow {
 
 namespace {
 
-static inline const double_conversion::StringToDoubleConverter& StringToFloatConverter() {
-    const static double_conversion::StringToDoubleConverter converter(
-        double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES
-        | double_conversion::StringToDoubleConverter::ALLOW_HEX
-        | double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES
-        | double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY,
-        0., 0., "inf", "nan"
-    );
-    return converter;
+template <typename T>
+T locale_independent_strtonum(const char* str, const char** endptr) {
+  static const std::unordered_map<string, T> special_nums = {
+      {"inf", std::numeric_limits<T>::infinity()},
+      {"+inf", std::numeric_limits<T>::infinity()},
+      {"-inf", -std::numeric_limits<T>::infinity()},
+      {"infinity", std::numeric_limits<T>::infinity()},
+      {"+infinity", std::numeric_limits<T>::infinity()},
+      {"-infinity", -std::numeric_limits<T>::infinity()},
+      {"nan", std::numeric_limits<T>::quiet_NaN()},
+      {"+nan", std::numeric_limits<T>::quiet_NaN()},
+      {"-nan", -std::numeric_limits<T>::quiet_NaN()},
+  };
+  std::stringstream s(str);
+
+  // Check if str is one of the special numbers.
+  string special_num_str;
+  s >> special_num_str;
+
+  for (int i = 0; i < special_num_str.length(); ++i) {
+    special_num_str[i] =
+        std::tolower(special_num_str[i], std::locale::classic());
+  }
+
+  auto entry = special_nums.find(special_num_str);
+  if (entry != special_nums.end()) {
+    *endptr = str + (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
+                             : s.tellg());
+    return entry->second;
+  } else {
+    // Perhaps it's a hex number
+    if (special_num_str.compare(0, 2, "0x") == 0 ||
+        special_num_str.compare(0, 3, "-0x") == 0) {
+      return strtol(str, const_cast<char**>(endptr), 16);
+    }
+  }
+  // Reset the stream
+  s.str(str);
+  s.clear();
+  // Use the "C" locale
+  s.imbue(std::locale::classic());
+
+  T result;
+  s >> result;
+
+  // Set to result to what strto{f,d} functions would have returned. If the
+  // number was outside the range, the stringstream sets the fail flag, but
+  // returns the +/-max() value, whereas strto{f,d} functions return +/-INF.
+  if (s.fail()) {
+    if (result == std::numeric_limits<T>::max()) {
+      result = std::numeric_limits<T>::infinity();
+      s.clear(s.rdstate() & ~std::ios::failbit);
+    } else if (result == -std::numeric_limits<T>::max()) {
+      result = -std::numeric_limits<T>::infinity();
+      s.clear(s.rdstate() & ~std::ios::failbit);
+    }
+  }
+
+  if (endptr) {
+    *endptr =
+        str +
+        (s.fail() ? static_cast<std::iostream::pos_type>(0)
+                  : (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
+                             : s.tellg()));
+  }
+  return result;
 }
 
 }  // namespace
@@ -111,8 +165,8 @@ char* DoubleToBuffer(double value, char* buffer) {
     // larger than the precision we asked for.
     DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
 
-    auto parsed_value = double{};
-    full_precision_needed = !safe_strtod(buffer, &parsed_value) || parsed_value != value;
+    full_precision_needed =
+        locale_independent_strtonum<double>(buffer, nullptr) != value;
   }
 
   if (full_precision_needed) {
@@ -248,23 +302,25 @@ bool safe_strtou32(StringPiece str, uint32* value) {
 }
 
 bool safe_strtof(const char* str, float* value) {
-  int processed_characters_count = -1;
-  auto len = str_util::Strnlen(str, kFastToBufferSize);
-  *value = StringToFloatConverter().StringToFloat(
-      str,
-      len,
-      &processed_characters_count);
-  return processed_characters_count > 0;
+  const char* endptr;
+  *value = locale_independent_strtonum<float>(str, &endptr);
+  while (isspace(*endptr)) ++endptr;
+  // Ignore range errors from strtod/strtof.
+  // The values it returns on underflow and
+  // overflow are the right fallback in a
+  // robust setting.
+  return *str != '\0' && *endptr == '\0';
 }
 
 bool safe_strtod(const char* str, double* value) {
-  int processed_characters_count = -1;
-  auto len = str_util::Strnlen(str, kFastToBufferSize);
-  *value = StringToFloatConverter().StringToDouble(
-      str,
-      len,
-      &processed_characters_count);
-  return processed_characters_count > 0;
+  const char* endptr;
+  *value = locale_independent_strtonum<double>(str, &endptr);
+  while (isspace(*endptr)) ++endptr;
+  // Ignore range errors from strtod/strtof.
+  // The values it returns on underflow and
+  // overflow are the right fallback in a
+  // robust setting.
+  return *str != '\0' && *endptr == '\0';
 }
 
 char* FloatToBuffer(float value, char* buffer) {
diff --git a/tensorflow/core/lib/strings/numbers_test.cc b/tensorflow/core/lib/strings/numbers_test.cc
index df395c301e..e15161de66 100644
--- a/tensorflow/core/lib/strings/numbers_test.cc
+++ b/tensorflow/core/lib/strings/numbers_test.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 
 #include <string>
-#include <cmath>
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -278,40 +277,7 @@ TEST(safe_strtof, Float) {
   EXPECT_TRUE(safe_strtof("-0x2A", &result));
   EXPECT_EQ(-42.0f, result);
 
-  EXPECT_TRUE(safe_strtof(" -0x2", &result));
-  EXPECT_EQ(-2.0f, result);
-
-  EXPECT_TRUE(safe_strtof("8 \t", &result));
-  EXPECT_EQ(8.0f, result);
-
-  EXPECT_TRUE(safe_strtof("\t20.0\t ", &result));
-  EXPECT_EQ(20.0f, result);
-
   EXPECT_FALSE(safe_strtof("-infinity is awesome", &result));
-
-  EXPECT_TRUE(safe_strtof("-inf", &result));
-  EXPECT_EQ(-std::numeric_limits<float>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtof("+inf", &result));
-  EXPECT_EQ(std::numeric_limits<float>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtof("InF", &result));
-  EXPECT_EQ(std::numeric_limits<float>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtof("-INF", &result));
-  EXPECT_EQ(-std::numeric_limits<float>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtof("nan", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtof("-nan", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtof("-NaN", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtof("+NAN", &result));
-  EXPECT_TRUE(std::isnan(result));
 }
 
 TEST(safe_strtod, Double) {
@@ -330,41 +296,6 @@ TEST(safe_strtod, Double) {
 
   EXPECT_TRUE(safe_strtod("1e-325", &result));
   EXPECT_EQ(0, result);
-
-  EXPECT_TRUE(safe_strtod(" -0x1c", &result));
-  EXPECT_EQ(-28.0, result);
-
-  EXPECT_TRUE(safe_strtod("50 \t", &result));
-  EXPECT_EQ(50.0, result);
-
-  EXPECT_TRUE(safe_strtod("\t82.0\t ", &result));
-  EXPECT_EQ(82.0, result);
-
-  EXPECT_FALSE(safe_strtod("infinity", &result));
-
-  EXPECT_TRUE(safe_strtod("-inf", &result));
-  EXPECT_EQ(-std::numeric_limits<double>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtod("+inf", &result));
-  EXPECT_EQ(std::numeric_limits<double>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtod("InF", &result));
-  EXPECT_EQ(std::numeric_limits<double>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtod("-INF", &result));
-  EXPECT_EQ(-std::numeric_limits<double>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtod("nan", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtod("-nan", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtod("-NaN", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtod("+NAN", &result));
-  EXPECT_TRUE(std::isnan(result));
 }
 
 }  // namespace strings
diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
index 0ae6c66080..d28857803d 100644
--- a/tensorflow/core/lib/strings/str_util.cc
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -452,13 +452,5 @@ bool SplitAndParseAsFloats(StringPiece text, char delim,
                                     result);
 }
 
-size_t Strnlen(const char* str, const size_t string_max_len) {
-  size_t len = 0;
-  while (len < string_max_len && str[len] != '\0') {
-    ++len;
-  }
-  return len;
-}
-
 }  // namespace str_util
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h
index b0d774a05c..8cea0f0718 100644
--- a/tensorflow/core/lib/strings/str_util.h
+++ b/tensorflow/core/lib/strings/str_util.h
@@ -209,11 +209,6 @@ std::vector<string> Split(StringPiece text, char delims, Predicate p) {
   return Split(text, StringPiece(&delims, 1), p);
 }
 
-// Returns the length of the given null-terminated byte string 'str'.
-// Returns 'string_max_len' if the null character was not found in the first
-// 'string_max_len' bytes of 'str'.
-size_t Strnlen(const char* str, const size_t string_max_len);
-
 }  // namespace str_util
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc
index 3a8de7c96b..d5909d17aa 100644
--- a/tensorflow/core/lib/strings/str_util_test.cc
+++ b/tensorflow/core/lib/strings/str_util_test.cc
@@ -430,12 +430,4 @@ TEST(StringReplace, EmptyStringReplaceAll) {
   EXPECT_EQ("", str_util::StringReplace("", "a", "X", /*replace_all=*/true));
 }
 
-TEST(Strnlen, Basic) {
-  EXPECT_EQ(0, str_util::Strnlen("ab", 0));
-  EXPECT_EQ(1, str_util::Strnlen("a", 1));
-  EXPECT_EQ(2, str_util::Strnlen("abcd", 2));
-  EXPECT_EQ(3, str_util::Strnlen("abc", 10));
-  EXPECT_EQ(4, str_util::Strnlen("a \t\n", 10));
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 34184208e3..de85cc1af0 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -651,15 +651,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")),
   )
 
-  tf_http_archive(
-      name = "double_conversion",
-      urls = [
-          "https://mirror.bazel.build/github.com/google/double-conversion/archive/5664746c5e64dc265e7fbc1a890a6698e6ad0ebb.tar.gz",
-          "https://github.com/google/double-conversion/archive/5664746c5e64dc265e7fbc1a890a6698e6ad0ebb.tar.gz",
-      ],
-      sha256 = "ce651ba63faa55f86333f50bdd58a574327ca1565a65b875b11f5132c7c72bb6",
-      strip_prefix = "double-conversion-5664746c5e64dc265e7fbc1a890a6698e6ad0ebb",
-  )
 
   tf_http_archive(
       name = "tflite_mobilenet",
-- 
GitLab


From 009f4f36b74f7bb2492f197f79bf084acbcb1dc6 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 5 Dec 2017 07:36:35 -0800
Subject: [PATCH 0631/1225] [TF:XLA] Add support for NCHW format to
 SpaceToDepth and DepthToSpace.

PiperOrigin-RevId: 177953076
---
 tensorflow/compiler/tests/unary_ops_test.py   | 126 ++++++++++++------
 .../tf2xla/kernels/depthtospace_op.cc         |  95 +++++++++++--
 .../tf2xla/kernels/spacetodepth_op.cc         | 112 +++++++++++++---
 3 files changed, 262 insertions(+), 71 deletions(-)

diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index b96770bd46..0da7442a24 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -33,6 +33,17 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.platform import googletest
 
 
+def nhwc_to_format(x, data_format):
+  """Converts a numpy array from NHWC format to `data_format`."""
+  rank = len(x.shape)
+  if data_format == "NCHW":
+    return np.transpose(x, [0, rank - 1] + list(range(1, rank - 1)))
+  elif data_format == "NHWC":
+    return x
+  else:
+    raise ValueError("Unknown format {}".format(data_format))
+
+
 class UnaryOpsTest(XLATestCase):
   """Test cases for unary operators."""
 
@@ -662,55 +673,88 @@ class UnaryOpsTest(XLATestCase):
         equality_test=self.ListsAreClose)
 
   def testDepthToSpace(self):
+    def make_op(data_format):
+      def op(x):
+        return array_ops.depth_to_space(x, block_size=2,
+                                        data_format=data_format)
+      return op
+
     for dtype in self.numeric_types:
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.depth_to_space(x, block_size=2),
-          np.array([[[[1, 2, 3, 4]]]], dtype=dtype),
-          expected=np.array([[[[1], [2]],
-                              [[3], [4]]]], dtype=dtype))
+      for data_format in ["NCHW", "NHWC"]:
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(np.array([[[[1, 2, 3, 4]]]], dtype=dtype),
+                           data_format),
+            expected=nhwc_to_format(np.array([[[[1], [2]],
+                                               [[3], [4]]]], dtype=dtype),
+                                    data_format))
 
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.depth_to_space(x, block_size=2),
-          np.array([[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]], dtype=dtype),
-          expected=np.array([[[[1, 2, 3], [4, 5, 6]],
-                              [[7, 8, 9], [10, 11, 12]]]], dtype=dtype))
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(
+                np.array([[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]],
+                         dtype=dtype),
+                data_format),
+            expected=nhwc_to_format(
+                np.array([[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]],
+                         dtype=dtype),
+                data_format))
 
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.depth_to_space(x, block_size=2),
-          np.array([[[[1, 2, 3, 4],
-                      [5, 6, 7, 8]],
-                     [[9, 10, 11, 12],
-                      [13, 14, 15, 16]]]], dtype=dtype),
-          expected=np.array([[[[1], [2], [5], [6]],
-                              [[3], [4], [7], [8]],
-                              [[9], [10], [13], [14]],
-                              [[11], [12], [15], [16]]]], dtype=dtype))
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(
+                np.array([[[[1, 2, 3, 4],
+                            [5, 6, 7, 8]],
+                           [[9, 10, 11, 12],
+                            [13, 14, 15, 16]]]], dtype=dtype),
+                data_format),
+            expected=nhwc_to_format(
+                np.array([[[[1], [2], [5], [6]],
+                           [[3], [4], [7], [8]],
+                           [[9], [10], [13], [14]],
+                           [[11], [12], [15], [16]]]], dtype=dtype),
+                data_format))
 
   def testSpaceToDepth(self):
+    def make_op(data_format):
+      def op(x):
+        return array_ops.space_to_depth(x, block_size=2,
+                                        data_format=data_format)
+      return op
+
     for dtype in self.numeric_types:
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.space_to_depth(x, block_size=2),
-          np.array([[[[1], [2]],
-                     [[3], [4]]]], dtype=dtype),
-          expected=np.array([[[[1, 2, 3, 4]]]], dtype=dtype))
+      for data_format in ["NCHW", "NHWC"]:
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(np.array([[[[1], [2]],
+                                      [[3], [4]]]], dtype=dtype),
+                           data_format),
+            expected=nhwc_to_format(np.array([[[[1, 2, 3, 4]]]], dtype=dtype),
+                                    data_format))
 
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.space_to_depth(x, block_size=2),
-          np.array([[[[1, 2, 3], [4, 5, 6]],
-                     [[7, 8, 9], [10, 11, 12]]]], dtype=dtype),
-          expected=np.array([[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]],
-                            dtype=dtype))
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(np.array([[[[1, 2, 3], [4, 5, 6]],
+                                      [[7, 8, 9], [10, 11, 12]]]], dtype=dtype),
+                           data_format),
+            expected=nhwc_to_format(
+                np.array([[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]],
+                         dtype=dtype),
+                data_format))
 
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.space_to_depth(x, block_size=2),
-          np.array([[[[1], [2], [5], [6]],
-                     [[3], [4], [7], [8]],
-                     [[9], [10], [13], [14]],
-                     [[11], [12], [15], [16]]]], dtype=dtype),
-          expected=np.array([[[[1, 2, 3, 4],
-                               [5, 6, 7, 8]],
-                              [[9, 10, 11, 12],
-                               [13, 14, 15, 16]]]], dtype=dtype))
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(np.array([[[[1], [2], [5], [6]],
+                                      [[3], [4], [7], [8]],
+                                      [[9], [10], [13], [14]],
+                                      [[11], [12], [15], [16]]]], dtype=dtype),
+                           data_format),
+            expected=nhwc_to_format(
+                np.array([[[[1, 2, 3, 4],
+                            [5, 6, 7, 8]],
+                           [[9, 10, 11, 12],
+                            [13, 14, 15, 16]]]], dtype=dtype),
+                data_format))
 
   def _assertSoftplusMatchesExpected(self, features, dtype):
     features = np.array(features, dtype=dtype)
diff --git a/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc b/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc
index a4ea65ea89..96d7809f79 100644
--- a/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/core/util/tensor_format.h"
 
 namespace tensorflow {
 namespace {
@@ -23,6 +24,16 @@ namespace {
 class DepthToSpaceOp : public XlaOpKernel {
  public:
   explicit DepthToSpaceOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    string data_format_str;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(ctx, FormatFromString(data_format_str, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
+
+    OP_REQUIRES(ctx, data_format_ == FORMAT_NCHW || data_format_ == FORMAT_NHWC,
+                errors::InvalidArgument("Unsupported data format ",
+                                        ToString(data_format_),
+                                        "; expected formats NHWC or NCHW"));
+
     OP_REQUIRES_OK(ctx, ctx->GetAttr("block_size", &block_size_));
     OP_REQUIRES(
         ctx, block_size_ > 1,
@@ -31,18 +42,79 @@ class DepthToSpaceOp : public XlaOpKernel {
 
   void Compile(XlaOpKernelContext* ctx) override {
     const TensorShape input_tensor_shape = ctx->InputShape(0);
-    // The input is presumed to be [batch, height, width, depth]
     int input_rank = input_tensor_shape.dims();
     static const int kRequiredDims = 4;
     OP_REQUIRES(ctx, kRequiredDims == input_rank,
-                errors::InvalidArgument("Input rank should be: ", kRequiredDims,
-                                        " instead of: ", input_rank));
+                errors::InvalidArgument("Input rank should be ", kRequiredDims,
+                                        "; got: ", input_rank));
     const gtl::InlinedVector<int64, 4> input_shape =
         input_tensor_shape.dim_sizes();
 
     xla::ComputationBuilder* b = ctx->builder();
     xla::ComputationDataHandle input = ctx->Input(0);
 
+    int feature_dim = GetTensorFeatureDimIndex(input_rank, data_format_);
+    int num_spatial_dims = GetTensorSpatialDims(input_rank, data_format_);
+
+    std::vector<int64> reshaped_shape;
+    std::vector<int64> transpose_order;
+    std::vector<int64> output_shape;
+    reshaped_shape.reserve(input_rank);
+    transpose_order.reserve(input_rank);
+    output_shape.reserve(input_rank);
+    if (data_format_ == FORMAT_NHWC) {
+      reshaped_shape.push_back(input_shape[0]);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(input_shape[1 + i]);
+      }
+      int64 block_elems = 1;
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(block_size_);
+        block_elems *= block_size_;
+      }
+      reshaped_shape.push_back(input_shape[feature_dim] / block_elems);
+
+      transpose_order.push_back(0);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(i + 1);
+        transpose_order.push_back(i + 1 + num_spatial_dims);
+      }
+      transpose_order.push_back(feature_dim + num_spatial_dims);
+
+      output_shape.push_back(input_shape[0]);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        output_shape.push_back(input_shape[1 + i] * block_size_);
+      }
+      output_shape.push_back(input_shape[feature_dim] / block_elems);
+    } else {
+      // NCHW format.
+      reshaped_shape.push_back(input_shape[0]);
+      int64 block_elems = 1;
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(block_size_);
+        block_elems *= block_size_;
+      }
+      reshaped_shape.push_back(input_shape[feature_dim] / block_elems);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(input_shape[2 + i]);
+      }
+
+      transpose_order.push_back(0);
+      transpose_order.push_back(1 + num_spatial_dims);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(2 + num_spatial_dims + i);
+        transpose_order.push_back(1 + i);
+      }
+
+      output_shape.push_back(input_shape[0]);
+      output_shape.push_back(input_shape[feature_dim] / block_elems);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        output_shape.push_back(input_shape[2 + i] * block_size_);
+      }
+    }
+
+    // Note: comments are given in NHWC format; NCHW is similar with a different
+    // dimension order.
     // 1. Reshape `input` to `reshaped` of shape:
     //
     //      [batch,
@@ -51,14 +123,14 @@ class DepthToSpaceOp : public XlaOpKernel {
     //       block_size_,
     //       block_size_,
     //       depth / (block_size_ * block_size_)]
-    OP_REQUIRES(ctx, input_shape[3] % (block_size_ * block_size_) == 0,
+    OP_REQUIRES(ctx,
+                input_shape[feature_dim] % (block_size_ * block_size_) == 0,
                 errors::InvalidArgument(
                     "Input depth dimension (", input_shape[3],
                     ") is not divisible by square of the block size (",
                     block_size_, ")"));
-    xla::ComputationDataHandle reshaped = b->Reshape(
-        input, {input_shape[0], input_shape[1], input_shape[2], block_size_,
-                block_size_, input_shape[3] / (block_size_ * block_size_)});
+
+    xla::ComputationDataHandle reshaped = b->Reshape(input, reshaped_shape);
 
     // 2. Permute dimensions of `reshaped` to produce
     //    `permuted_reshaped` of shape:
@@ -70,7 +142,7 @@ class DepthToSpaceOp : public XlaOpKernel {
     //       block_size_,
     //       depth / (block_size_ * block_size_)]
     xla::ComputationDataHandle permuted_reshaped =
-        b->Transpose(reshaped, {0, 1, 3, 2, 4, 5});
+        b->Transpose(reshaped, transpose_order);
 
     // 3. Reshape `permuted_reshaped` to flatten `block_shape` into the
     //    batch dimension, producing an output tensor of shape:
@@ -80,15 +152,14 @@ class DepthToSpaceOp : public XlaOpKernel {
     //       input_shape[2] * block_size_,
     //       depth / (block_size_ * block_size_)]
     //
-    xla::ComputationDataHandle output = b->Reshape(
-        permuted_reshaped, {input_shape[0], input_shape[1] * block_size_,
-                            input_shape[2] * block_size_,
-                            input_shape[3] / (block_size_ * block_size_)});
+    xla::ComputationDataHandle output =
+        b->Reshape(permuted_reshaped, output_shape);
 
     ctx->SetOutput(0, output);
   }
 
  private:
+  TensorFormat data_format_;
   int block_size_;
 };
 REGISTER_XLA_OP(Name("DepthToSpace"), DepthToSpaceOp);
diff --git a/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc b/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc
index 89befda346..806fda632c 100644
--- a/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/core/util/tensor_format.h"
 
 namespace tensorflow {
 namespace {
@@ -23,6 +24,16 @@ namespace {
 class SpaceToDepthOp : public XlaOpKernel {
  public:
   explicit SpaceToDepthOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    string data_format_str;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(ctx, FormatFromString(data_format_str, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
+
+    OP_REQUIRES(ctx, data_format_ == FORMAT_NCHW || data_format_ == FORMAT_NHWC,
+                errors::InvalidArgument("Unsupported data format ",
+                                        ToString(data_format_),
+                                        "; expected formats NHWC or NCHW"));
+
     OP_REQUIRES_OK(ctx, ctx->GetAttr("block_size", &block_size_));
     OP_REQUIRES(
         ctx, block_size_ > 1,
@@ -31,34 +42,100 @@ class SpaceToDepthOp : public XlaOpKernel {
 
   void Compile(XlaOpKernelContext* ctx) override {
     const TensorShape input_tensor_shape = ctx->InputShape(0);
-    // The input is presumed to be [batch, height, width, depth]
     int input_rank = input_tensor_shape.dims();
     static const int kRequiredDims = 4;
     OP_REQUIRES(ctx, kRequiredDims == input_rank,
-                errors::InvalidArgument("Input rank should be: ", kRequiredDims,
-                                        " instead of: ", input_rank));
+                errors::InvalidArgument("Input rank should be ", kRequiredDims,
+                                        "; got ", input_rank));
     const gtl::InlinedVector<int64, 4> input_shape =
         input_tensor_shape.dim_sizes();
 
     xla::ComputationBuilder* b = ctx->builder();
     xla::ComputationDataHandle input = ctx->Input(0);
 
+    int feature_dim = GetTensorFeatureDimIndex(input_rank, data_format_);
+    int num_spatial_dims = GetTensorSpatialDims(input_rank, data_format_);
+
+    std::vector<int64> reshaped_shape;
+    std::vector<int64> transpose_order;
+    std::vector<int64> output_shape;
+    reshaped_shape.reserve(input_rank);
+    transpose_order.reserve(input_rank);
+    output_shape.reserve(input_rank);
+    if (data_format_ == FORMAT_NHWC) {
+      int64 block_elems = 1;
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        OP_REQUIRES(ctx, input_shape[1 + i] % block_size_ == 0,
+                    errors::InvalidArgument(
+                        "input shape[", 1 + i, "]=", input_shape[1 + i],
+                        " is not divisible by block_size=", block_size_));
+        block_elems *= block_size_;
+      }
+
+      reshaped_shape.push_back(input_shape[0]);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(input_shape[1 + i] / block_size_);
+        reshaped_shape.push_back(block_size_);
+      }
+      reshaped_shape.push_back(input_shape[feature_dim]);
+
+      transpose_order.push_back(0);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(i * 2 + 1);
+      }
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(i * 2 + 2);
+      }
+      transpose_order.push_back(feature_dim + num_spatial_dims);
+
+      output_shape.push_back(input_shape[0]);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        output_shape.push_back(input_shape[1 + i] / block_size_);
+      }
+      output_shape.push_back(input_shape[feature_dim] * block_elems);
+    } else {
+      // FORMAT_NCHW
+      int64 block_elems = 1;
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        OP_REQUIRES(ctx, input_shape[2 + i] % block_size_ == 0,
+                    errors::InvalidArgument(
+                        "input shape[", 2 + i, "]=", input_shape[2 + i],
+                        " is not divisible by block_size=", block_size_));
+        block_elems *= block_size_;
+      }
+
+      reshaped_shape.push_back(input_shape[0]);
+      reshaped_shape.push_back(input_shape[feature_dim]);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(input_shape[2 + i] / block_size_);
+        reshaped_shape.push_back(block_size_);
+      }
+
+      transpose_order.push_back(0);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(i * 2 + 3);
+      }
+      transpose_order.push_back(feature_dim);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(i * 2 + 2);
+      }
+
+      output_shape.push_back(input_shape[0]);
+      output_shape.push_back(input_shape[feature_dim] * block_elems);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        output_shape.push_back(input_shape[2 + i] / block_size_);
+      }
+    }
+
+    // Note: comments are given in NHWC format; NCHW is similar with a different
+    // dimension order.
     // 1. Reshape `input` to `reshaped` of shape:
     //
     //      [batch,
     //       input_shape[1] / block_size_, block_size_,
     //       input_shape[2] / block_size_, block_size_,
     //       depth]
-    const int block_rank = 2;
-    for (int i = 0; i < block_rank; ++i) {
-      OP_REQUIRES(ctx, input_shape[1 + i] % block_size_ == 0,
-                  errors::InvalidArgument(
-                      "input shape[", 1 + i, "]=", input_shape[1 + i],
-                      " is not divisible by block_size=", block_size_));
-    }
-    xla::ComputationDataHandle reshaped = b->Reshape(
-        input, {input_shape[0], input_shape[1] / block_size_, block_size_,
-                input_shape[2] / block_size_, block_size_, input_shape[3]});
+    xla::ComputationDataHandle reshaped = b->Reshape(input, reshaped_shape);
 
     // 2. Permute dimensions of `reshaped` to produce
     //    `permuted_reshaped` of shape:
@@ -69,7 +146,7 @@ class SpaceToDepthOp : public XlaOpKernel {
     //       block_size_, block_size_,
     //       depth]
     xla::ComputationDataHandle permuted_reshaped =
-        b->Transpose(reshaped, {0, 1, 3, 2, 4, 5});
+        b->Transpose(reshaped, transpose_order);
 
     // 3. Reshape `permuted_reshaped` to flatten `block_shape` into the
     //    batch dimension, producing an output tensor of shape:
@@ -79,15 +156,14 @@ class SpaceToDepthOp : public XlaOpKernel {
     //       input_shape[2] / block_size_,
     //       block_size_ * block_size_ * depth]
     //
-    xla::ComputationDataHandle output = b->Reshape(
-        permuted_reshaped, {input_shape[0], input_shape[1] / block_size_,
-                            input_shape[2] / block_size_,
-                            block_size_ * block_size_ * input_shape[3]});
+    xla::ComputationDataHandle output =
+        b->Reshape(permuted_reshaped, output_shape);
 
     ctx->SetOutput(0, output);
   }
 
  private:
+  TensorFormat data_format_;
   int block_size_;
 };
 REGISTER_XLA_OP(Name("SpaceToDepth"), SpaceToDepthOp);
-- 
GitLab


From 5ee30dfdbdd9bf37b63ef1096c1d51931f114f74 Mon Sep 17 00:00:00 2001
From: Max Galkin <maxgalkin@google.com>
Date: Tue, 5 Dec 2017 08:09:26 -0800
Subject: [PATCH 0632/1225] Estimate Placeholder as a no-op.

PiperOrigin-RevId: 177956552
---
 tensorflow/core/grappler/costs/op_level_cost_estimator.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index 1c278a1030..6bc136a3f8 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -31,6 +31,7 @@ constexpr char kConv2dBackpropFilter[] = "Conv2DBackpropFilter";
 constexpr char kConv2dBackpropInput[] = "Conv2DBackpropInput";
 constexpr char kMatMul[] = "MatMul";
 constexpr char kSparseMatMul[] = "SparseMatMul";
+constexpr char kPlaceholder[] = "Placeholder";
 constexpr char kIdentity[] = "Identity";
 constexpr char kRefIdentity[] = "RefIdentity";
 constexpr char kNoOp[] = "NoOp";
@@ -160,6 +161,9 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
        wrap(&OpLevelCostEstimator::PredictConv2DBackpropInput)},
       {kMatMul, wrap(&OpLevelCostEstimator::PredictMatMul)},
       {kSparseMatMul, wrap(&OpLevelCostEstimator::PredictMatMul)},
+      {kBatchMatMul, wrap(&OpLevelCostEstimator::PredictBatchMatMul)},
+
+      {kPlaceholder, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kIdentity, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kRefIdentity, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kStopGradient, wrap(&OpLevelCostEstimator::PredictNoOp)},
@@ -171,7 +175,7 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
       {kConst, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kVariable, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kVariableV2, wrap(&OpLevelCostEstimator::PredictNoOp)},
-      {kBatchMatMul, wrap(&OpLevelCostEstimator::PredictBatchMatMul)},
+
       {kRank, wrap(&OpLevelCostEstimator::PredictMetadata)},
       {kShape, wrap(&OpLevelCostEstimator::PredictMetadata)},
       {kSize, wrap(&OpLevelCostEstimator::PredictMetadata)}};
-- 
GitLab


From 2b2500a3cf3d9bf1bd38f50f10b9cfbd2653d463 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 08:09:38 -0800
Subject: [PATCH 0633/1225] Add a helper to HloSharding to easily create
 trivial flat tuples without requiring a ShapeTree.

PiperOrigin-RevId: 177956572
---
 tensorflow/compiler/xla/service/hlo_sharding.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h
index 1a6988a2dc..7263198385 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.h
+++ b/tensorflow/compiler/xla/service/hlo_sharding.h
@@ -80,6 +80,17 @@ class HloSharding {
     return HloSharding(flattened_list);
   }
 
+  // Creates a new sharding for a tuple type. The requested tuple shape must not
+  // be nested. For nested tuples, use the ShapeTree overload.
+  static HloSharding Tuple(const Shape& tuple_shape,
+                           tensorflow::gtl::ArraySlice<HloSharding> shardings) {
+    CHECK(ShapeUtil::IsTuple(tuple_shape));
+    CHECK(!ShapeUtil::IsNestedTuple(tuple_shape));
+    std::vector<HloSharding> flattened_list(shardings.begin(), shardings.end());
+    CHECK_EQ(flattened_list.size(), ShapeUtil::TupleElementCount(tuple_shape));
+    return HloSharding(flattened_list);
+  }
+
   // Create a new sharding from a protobuf OpSharding.
   static StatusOr<HloSharding> FromProto(const OpSharding& proto);
 
-- 
GitLab


From b62a356f9e79202dd25222d85c048a5e08c4238a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 09:24:35 -0800
Subject: [PATCH 0634/1225] Make RevBlock a subclass of Layer

PiperOrigin-RevId: 177964932
---
 .../layers/python/layers/rev_block_lib.py     | 105 +++++++++++++-----
 .../python/layers/rev_block_lib_test.py       |  37 +++++-
 2 files changed, 110 insertions(+), 32 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
index 31a1b38bd4..123275e1fd 100644
--- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
@@ -34,12 +34,13 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 from tensorflow.contrib.framework.python import ops as contrib_framework_ops
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops as framework_ops
+from tensorflow.python.layers import base
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import template
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
 __all__ = ["rev_block", "RevBlock", "recompute_grad"]
@@ -137,7 +138,17 @@ def _rev_block_forward(x1,
   return y1, y2
 
 
-class RevBlock(object):
+def _scope_wrap(fn, scope):
+
+  @functools.wraps(fn)
+  def wrap(*args, **kwargs):
+    with variable_scope.variable_scope(scope):
+      return fn(*args, **kwargs)
+
+  return wrap
+
+
+class RevBlock(base.Layer):
   """Block of reversible layers. See rev_block."""
 
   def __init__(self,
@@ -146,7 +157,10 @@ class RevBlock(object):
                num_layers=1,
                f_side_input=None,
                g_side_input=None,
-               use_efficient_backprop=True):
+               use_efficient_backprop=True,
+               name="revblock",
+               **kwargs):
+    super(RevBlock, self).__init__(name=name, **kwargs)
 
     if isinstance(f, list):
       assert len(f) == num_layers
@@ -158,18 +172,8 @@ class RevBlock(object):
     else:
       g = [g] * num_layers
 
-    scope_prefix = "revblock/revlayer_%d/"
-    f_scope = scope_prefix + "f"
-    g_scope = scope_prefix + "g"
-
-    f = [
-        template.make_template(f_scope % i, fn, create_scope_now_=True)
-        for i, fn in enumerate(f)
-    ]
-    g = [
-        template.make_template(g_scope % i, fn, create_scope_now_=True)
-        for i, fn in enumerate(g)
-    ]
+    f = [_scope_wrap(fn, "revlayer_%d/f" % i) for i, fn in enumerate(f)]
+    g = [_scope_wrap(fn, "revlayer_%d/g" % i) for i, fn in enumerate(g)]
 
     self.f = f
     self.g = g
@@ -180,6 +184,39 @@ class RevBlock(object):
 
     self._use_efficient_backprop = use_efficient_backprop
 
+  def call(self, inputs, forward=True):
+    vs = variable_scope.get_variable_scope()
+    vars_before = vs.global_variables()
+
+    if forward:
+      x1, x2 = inputs
+      out = self._forward(x1, x2)
+    else:
+      y1, y2 = inputs
+      out = self._backward(y1, y2)
+
+    # Add any created variables to the Layer's variable stores
+    new_vars = vs.global_variables()[len(vars_before):]
+    train_vars = vs.trainable_variables()
+    for new_var in new_vars:
+      if new_var in train_vars:
+        self._trainable_weights.append(new_var)
+      else:
+        self._non_trainable_weights.append(new_var)
+
+    return out
+
+  def forward(self, x1, x2):
+    return self.apply([x1, x2])
+
+  def backward(self, y1, y2):
+    return self.apply([y1, y2], forward=False)
+
+  def build(self, _):
+    logging.warn("RevBlock constructs its variables on first call, not on "
+                 "build.")
+    self.built = True
+
   def _efficient_grad_fn(self, inputs, variables, ys, grad_ys):
     """Custom gradient fn for a block of reversible residual layers."""
     side_inputs = inputs[2:]
@@ -228,17 +265,18 @@ class RevBlock(object):
     f.reverse()
     g.reverse()
 
-    for i in xrange(self.num_layers):
-      ys, grad_ys, f_ret, g_ret = _rev_layer_backward(
-          ys, grad_ys, f[i], g[i], f_vars[i], self.f_side_input, g_vars[i],
-          self.g_side_input)
+    with variable_scope.variable_scope(self.scope_name, reuse=True):
+      for i in xrange(self.num_layers):
+        ys, grad_ys, f_ret, g_ret = _rev_layer_backward(
+            ys, grad_ys, f[i], g[i], f_vars[i], self.f_side_input, g_vars[i],
+            self.g_side_input)
 
-      grad_f_vars, grad_f_side = f_ret
-      grad_g_vars, grad_g_side = g_ret
-      f_var_grads.append(grad_f_vars)
-      g_var_grads.append(grad_g_vars)
-      f_side_grads.append(grad_f_side)
-      g_side_grads.append(grad_g_side)
+        grad_f_vars, grad_f_side = f_ret
+        grad_g_vars, grad_g_side = g_ret
+        f_var_grads.append(grad_f_vars)
+        g_var_grads.append(grad_g_vars)
+        f_side_grads.append(grad_f_side)
+        g_side_grads.append(grad_g_side)
 
     # Accumulate layer gradients for f_side_input and g_side_input
     acc_f_side_grads = _acc_grads(*f_side_grads)
@@ -265,7 +303,7 @@ class RevBlock(object):
     grad_x1, grad_x2 = grad_ys
     return [grad_x1, grad_x2] + side_input_grads, variable_grads
 
-  def forward(self, x1, x2):
+  def _forward(self, x1, x2):
     """Run forward through the reversible layers."""
 
     side_inputs = [self.f_side_input, self.g_side_input]
@@ -275,7 +313,7 @@ class RevBlock(object):
         self._efficient_grad_fn if self._use_efficient_backprop else None)
 
     @_fn_with_custom_grad(custom_grad_fn)
-    def _forward(x1_, x2_, *flat_side_inputs):
+    def _forward_wrap(x1_, x2_, *flat_side_inputs):
       f_side, g_side = nest.pack_sequence_as(side_inputs, flat_side_inputs)
       return _rev_block_forward(
           x1_,
@@ -287,9 +325,9 @@ class RevBlock(object):
           g_side_input=g_side,
           gate_outputs=self._use_efficient_backprop)
 
-    return _forward(x1, x2, *flat_side_inputs)
+    return _forward_wrap(x1, x2, *flat_side_inputs)
 
-  def backward(self, y1, y2):
+  def _backward(self, y1, y2):
     """Run backward through the reversible layers."""
 
     f = list(self.f)
@@ -356,7 +394,14 @@ def rev_block(x1,
   Returns:
     y1, y2: tuple of float Tensors.
   """
-  block = RevBlock(f, g, num_layers, f_side_input, g_side_input, is_training)
+  block = RevBlock(
+      f=f,
+      g=g,
+      num_layers=num_layers,
+      f_side_input=f_side_input,
+      g_side_input=g_side_input,
+      use_efficient_backprop=is_training,
+      _reuse=variable_scope.get_variable_scope().reuse)
   return block.forward(x1, x2)
 
 
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
index a420753fd5..cbcbcd7511 100644
--- a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
@@ -188,13 +188,46 @@ class RevBlockTest(test.TestCase):
 
     def f(x):
       x = convolutional.conv1d(x, self.CHANNELS // 2, 3, padding="same")
-      x = core_layers.batch_normalization(x, training=True)
+      x = layers.batch_norm(x, is_training=True)
       x = convolutional.conv1d(x, self.CHANNELS // 2, 3, padding="same")
-      x = core_layers.batch_normalization(x, training=True)
+      x = layers.batch_norm(x, is_training=True)
       return x
 
     self._testRevBlock(x=x, f=f)
 
+  def testReuse(self):
+
+    def f(x):
+      return core_layers.dense(x, self.CHANNELS // 2)
+
+    def g(x):
+      return core_layers.dense(x, self.CHANNELS // 2)
+
+    x = random_ops.random_uniform(
+        [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32)
+    x1, x2 = array_ops.split(x, 2, axis=-1)
+
+    with variable_scope.variable_scope("test"):
+      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)
+
+    num_vars_before = len(variables.global_variables())
+
+    with variable_scope.variable_scope("test", reuse=True):
+      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)
+
+    num_vars_after = len(variables.global_variables())
+    self.assertEqual(num_vars_before, num_vars_after)
+
+    loss = math_ops.reduce_mean(y1 + y2)
+    _ = gradients_impl.gradients(loss,
+                                 [x] + variables.trainable_variables())
+
+    with variable_scope.variable_scope("test", reuse=True):
+      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)
+
+    num_vars_after = len(variables.global_variables())
+    self.assertEqual(num_vars_before, num_vars_after)
+
 
 class RecomputeTest(test.TestCase):
 
-- 
GitLab


From 09e9ca10205a25b5fe4271c2055fe1f552ca83ca Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 09:34:46 -0800
Subject: [PATCH 0635/1225] Improve handling of operations that are known to
 TOCO but not to TF Lite.

PiperOrigin-RevId: 177966156
---
 tensorflow/contrib/lite/toco/tflite/BUILD     |  1 +
 tensorflow/contrib/lite/toco/tflite/export.cc | 31 ++++++----
 .../contrib/lite/toco/tflite/export_test.cc   | 56 ++++++++++++++++---
 3 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tflite/BUILD b/tensorflow/contrib/lite/toco/tflite/BUILD
index e910e3957f..793eb366a4 100644
--- a/tensorflow/contrib/lite/toco/tflite/BUILD
+++ b/tensorflow/contrib/lite/toco/tflite/BUILD
@@ -93,6 +93,7 @@ tf_cc_test(
     ],
     deps = [
         ":export",
+        "//tensorflow/contrib/lite/schema:schema_fbs",
         "@com_google_googletest//:gtest_main",
     ],
 )
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index beda710614..bec694a233 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -188,19 +188,26 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
     const details::OperatorKey operator_key = GetOperatorKey(*op);
     int op_index = operators_map.at(operator_key);
 
-    if (ops_by_type.count(op->type) == 0) {
-      LOG(FATAL) << "Unsupported operator: " << HelpfulOperatorTypeName(*op);
+    string name = HelpfulOperatorTypeName(*op);
+    bool is_builtin = false;
+    if (ops_by_type.count(op->type) != 0) {
+      name = ops_by_type.at(op->type)->name();
+      is_builtin = (builtin_ops.count(name) > 0);
     }
 
-    string name = ops_by_type.at(op->type)->name();
-    if (builtin_ops.count(name) > 0) {
+    if (is_builtin) {
       ordered_opcodes[op_index] =
           CreateOperatorCode(*builder, builtin_ops[name], 0);
     } else {
-      // If use the custom operation code if it's available in the OperatorKey.
+      // This could be a kTensorFlowUnsupported, in which case we should be
+      // able to retrieve the original Tensorflow name from the OperatorKey, or
+      // this could be a proper TOCO operator that is completely unknown to TF
+      // Lite.
       if (!operator_key.custom_code.empty()) {
         name = operator_key.custom_code;
       }
+      // Either way, this is an operator that is not supported by TF Lite,
+      // so we output it as a custom op and add it to the error summary.
       if (error_summary) {
         error_summary->insert(name);
       }
@@ -226,11 +233,6 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
   // The operators are in execution order, so we just follow tf.mini order.
   std::vector<Offset<Operator>> op_vector;
   for (const auto& op : model.operators) {
-    if (ops_by_type.count(op->type) == 0) {
-      LOG(FATAL) << "Op type '" << OperatorTypeName(op->type)
-                 << "' not supported";
-    }
-
     std::vector<int32_t> inputs;
     for (const string& input : op->inputs) {
       inputs.push_back(tensors_map.at(input));
@@ -241,8 +243,15 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
       outputs.push_back(tensors_map.at(output));
     }
 
-    auto options = ops_by_type.at(op->type)->Serialize(*op, builder);
     int op_index = operators_map.at(GetOperatorKey(*op));
+
+    // This is a custom op unless we can find it in ops_by_type, and even then
+    // it could be a custom op (such as kTensorFlowUnsupported).
+
+    auto options = Options::Custom(0);
+    if (ops_by_type.count(op->type) != 0) {
+      options = ops_by_type.at(op->type)->Serialize(*op, builder);
+    }
     // The only supported CustomOptionFormat is FLEXBUFFERS now.
     op_vector.push_back(CreateOperator(
         *builder, op_index, builder->CreateVector(inputs),
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index e395645383..d4c4612d62 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -16,12 +16,14 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
 
 namespace toco {
-
 namespace tflite {
 namespace {
 
+using ::testing::ElementsAre;
+
 class ExportTest : public ::testing::Test {
  protected:
   // This is a very simplistic model. We are not interested in testing all the
@@ -31,11 +33,20 @@ class ExportTest : public ::testing::Test {
   void BuildTestModel() {
     input_model_.GetOrCreateArray("tensor_one");
     input_model_.GetOrCreateArray("tensor_two");
-    input_model_.operators.emplace_back(new ConvOperator);
+    {
+      auto* op = new ConvOperator;
+      op->padding.type = PaddingType::kSame;
+      input_model_.operators.emplace_back(op);
+    }
     input_model_.operators.emplace_back(new AddOperator);
-    auto unsupported_operator = new TensorFlowUnsupportedOperator;
-    unsupported_operator->tensorflow_op = "MyCrazyOp";
-    input_model_.operators.emplace_back(unsupported_operator);
+    {
+      auto* op = new TensorFlowUnsupportedOperator;
+      op->tensorflow_op = "MyCrazyOp";
+      input_model_.operators.emplace_back(op);
+    }
+    // Note that Sub is not know to TF Lite, so it gets exported as a custom
+    // op (and no options).
+    input_model_.operators.emplace_back(new SubOperator);
   }
 
   Model input_model_;
@@ -57,13 +68,44 @@ TEST_F(ExportTest, LoadOperatorsMap) {
   details::LoadOperatorsMap(input_model_, &operators);
   EXPECT_EQ(0, operators[details::OperatorKey(OperatorType::kAdd, "")]);
   EXPECT_EQ(1, operators[details::OperatorKey(OperatorType::kConv, "")]);
-  EXPECT_EQ(2, operators[details::OperatorKey(
+  EXPECT_EQ(2, operators[details::OperatorKey(OperatorType::kSub, "")]);
+  EXPECT_EQ(3, operators[details::OperatorKey(
                    OperatorType::kTensorFlowUnsupported, "MyCrazyOp")]);
 }
 
+TEST_F(ExportTest, Export) {
+  BuildTestModel();
+
+  string result;
+  Export(input_model_, true, &result);
+
+  auto* model = ::tflite::GetModel(result.data());
+
+  std::vector<string> names;
+  for (const ::tflite::OperatorCode* opcode : *model->operator_codes()) {
+    if (opcode->builtin_code() != ::tflite::BuiltinOperator_CUSTOM) {
+      names.push_back(string("builtin:") + ::tflite::EnumNameBuiltinOperator(
+                                               opcode->builtin_code()));
+    } else {
+      names.push_back(string("custom:") + opcode->custom_code()->c_str());
+    }
+  }
+
+  EXPECT_THAT(names, ElementsAre("builtin:ADD", "builtin:CONV_2D", "custom:Sub",
+                                 "custom:MyCrazyOp"));
+
+  std::vector<uint32_t> indices;
+  auto operators = (*model->subgraphs())[0]->operators();
+  EXPECT_EQ(operators->Length(), 4);
+  for (const auto* op : *operators) {
+    indices.push_back(op->opcode_index());
+  }
+
+  EXPECT_THAT(indices, ElementsAre(1, 0, 3, 2));
+}
+
 // TODO(ahentz): tests for tensors, inputs, outpus, opcodes and operators.
 
 }  // namespace
 }  // namespace tflite
-
 }  // namespace toco
-- 
GitLab


From 77b60c1ac63d0f188c4108ecb64bbe40004b2b8f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 10:03:45 -0800
Subject: [PATCH 0636/1225] Simplify code in dependency optimizer. Change
 dependency optimizer to remove isolated NoOps when it is safe. Fix bug in
 arithmetic optimizer: Only remove deduped nodes if we know the fetches.

PiperOrigin-RevId: 177970063
---
 .../optimizers/arithmetic_optimizer.cc        |   2 +-
 .../optimizers/arithmetic_optimizer_test.cc   |   3 +
 .../optimizers/dependency_optimizer.cc        | 120 +++++++++---------
 .../optimizers/dependency_optimizer.h         |  26 ++--
 4 files changed, 78 insertions(+), 73 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index efe8ac05a3..8fece69739 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -518,7 +518,7 @@ void ArithmeticOptimizer::DedupComputations() {
   } while (!stop);
 
   // Delete duplicates
-  if (!duplicates.empty()) {
+  if (fetch_nodes_known_ && !duplicates.empty()) {
     int last = optimized_graph_->node_size() - 1;
     for (auto it = duplicates.rbegin(); it != duplicates.rend(); ++it) {
       int index = *it;
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index 80f42694d9..eccf90f3b1 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -69,6 +69,7 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) {
   Output div = ops::Div(s.WithOpName("div"), c1, c2);
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"div"};
 
   ArithmeticOptimizer optimizer;
   GraphDef output;
@@ -102,6 +103,7 @@ TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) {
                         check1, check2);
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"div"};
 
   ArithmeticOptimizer optimizer;
   GraphDef output;
@@ -130,6 +132,7 @@ TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) {
   Output div1 = ops::Div(s.WithOpName("div1"), mul1, mul2);
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"div"};
 
   ArithmeticOptimizer optimizer;
   GraphDef output;
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index bd8a58d814..a86420f693 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -106,8 +106,10 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
   return do_not_rewrite_ops.find(node.op()) == do_not_rewrite_ops.end();
 }
 
-string DependencyOptimizer::TryOptimizeDependencies(
-    NodeDef* node, SetVector<NodeDef*>* nodes_to_simplify) {
+void DependencyOptimizer::OptimizeNode(int node_idx,
+                                       SetVector<int>* nodes_to_simplify,
+                                       std::set<int>* nodes_to_delete) {
+  NodeDef* node = optimized_graph_->mutable_node(node_idx);
   // Change ops that only have control dependencies as outputs to NoOps.
   if (node->op() != "NoOp" && SafeToConvertToNoOp(*node)) {
     VLOG(1) << "***** Replacing  " << node->name() << " (" << node->op()
@@ -133,15 +135,15 @@ string DependencyOptimizer::TryOptimizeDependencies(
       if (ctrl_inputs.insert(ctrl_input).second) {
         node->set_input(pos, ctrl_input);
         node_map_->UpdateInput(node->name(), old_input, ctrl_input);
-        auto old_input_node = node_map_->GetNode(old_input);
-        nodes_to_simplify->PushBack(old_input_node);
+        const NodeDef* old_input_node = node_map_->GetNode(old_input);
+        nodes_to_simplify->PushBack(node_to_idx_[old_input_node]);
       }
       ++pos;
     }
     node->set_op("NoOp");
     node->clear_attr();
-    nodes_to_simplify->PushBack(node);
-    return "";
+    nodes_to_simplify->PushBack(node_idx);
+    return;
   }
 
   // Remove NoOp nodes if their fan-in or fan-out is less than 2.
@@ -158,13 +160,12 @@ string DependencyOptimizer::TryOptimizeDependencies(
   //    x --^> | NoOp | --^> b  ==>    | x | --^> b
   //           |      | ...            |   | ...
   //           +------+ --^> c         +---+ --^> c
-  if (node->op() == "NoOp" &&
-      nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) {
+  if (node->op() == "NoOp") {
     const auto output_nodes = node_map_->GetOutputs(node->name());
     const int num_outputs = output_nodes.size();
     const int num_inputs = node->input_size();
     if (num_inputs > 1 && num_outputs > 1) {
-      return "";
+      return;
     }
     VLOG(1) << "***** Rerouting input around  " << node->name();
     std::vector<NodeDef*> input_nodes;
@@ -186,7 +187,7 @@ string DependencyOptimizer::TryOptimizeDependencies(
           consumer->add_input(input);
           updated_consumer = true;
           node_map_->AddOutput(NodeName(input), consumer->name());
-          nodes_to_simplify->PushBack(input_nodes[i]);
+          nodes_to_simplify->PushBack(node_to_idx_[input_nodes[i]]);
         }
       }
       // Remove dependency on node from consumer.
@@ -195,82 +196,81 @@ string DependencyOptimizer::TryOptimizeDependencies(
       if (updated_consumer) {
         VLOG(1) << "***** Updated consumer  " << consumer->name() << " ("
                 << consumer->op() << ")";
-        nodes_to_simplify->PushBack(consumer);
+        nodes_to_simplify->PushBack(node_to_idx_[consumer]);
       }
     }
 
-    // Clear all (control) inputs to this NoOp node.
-    if (fetch_nodes_known_) {
-      node_map_->RemoveInputs(node->name());
-      node->clear_input();
+    if (nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) {
+      // Mark the node for deletion.
+      nodes_to_delete->insert(node_idx);
     }
   }
+}
 
-  return "";
+void DependencyOptimizer::CleanControlInputs() {
+  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
+    PruneControlInputs(optimized_graph_->mutable_node(i));
+  }
+}
+
+void DependencyOptimizer::DeleteNodes(const std::set<int>& nodes_to_delete) {
+  int last = optimized_graph_->node_size() - 1;
+  for (auto it = nodes_to_delete.rbegin(); it != nodes_to_delete.rend(); ++it) {
+    const int index = *it;
+    optimized_graph_->mutable_node()->SwapElements(index, last);
+    last--;
+  }
+  optimized_graph_->mutable_node()->DeleteSubrange(last + 1,
+                                                   nodes_to_delete.size());
+  // Rebuild the NodeMap which was invalidated by the node swapping above.
+  node_map_.reset(new NodeMap(optimized_graph_));
+  BuildNodeToIdx();
 }
 
 Status DependencyOptimizer::OptimizeDependencies() {
-  // TODO(rmlarsen,bsteiner): The following code is similar to the control loop
-  // in the ArithmeticOptimizer. Dedup this.
-  SetVector<NodeDef*> nodes_to_simplify;
+  SetVector<int> nodes_to_simplify;
+  std::set<int> nodes_to_delete;
   for (int i = 0; i < optimized_graph_->node_size(); ++i) {
-    NodeDef* node = optimized_graph_->mutable_node(i);
-    if (node->op() == "NoOp" || SafeToConvertToNoOp(*node)) {
-      PruneControlInputs(node);
-      nodes_to_simplify.PushBack(node);
+    const NodeDef& node = optimized_graph_->node(i);
+    if (node.op() == "NoOp" || SafeToConvertToNoOp(node)) {
+      nodes_to_simplify.PushBack(i);
     }
   }
   while (!nodes_to_simplify.Empty()) {
-    NodeDef* node = nodes_to_simplify.PopBack();
-    const string simplified_tensor =
-        TryOptimizeDependencies(node, &nodes_to_simplify);
-    if (!simplified_tensor.empty() &&
-        NodeName(simplified_tensor) != node->name()) {
-      // Always consider simplified_tensor for further optimizations.
-      NodeDef* simplified_node = node_map_->GetNode(simplified_tensor);
-      if (simplified_node != nullptr) {
-        nodes_to_simplify.PushBack(simplified_node);
-      }
-      // When `node` is simplifed to another node rather than in-place, the
-      // consumers of `node` are already redirected to `simplified_tensor`.
-      // Re-push the consumers into `nodes_to_simplify` for further
-      // optimizations.
-      std::set<NodeDef*> consumers = node_map_->GetOutputs(node->name());
-      for (NodeDef* consumer : consumers) {
-        // Update `consumer`'s use of `node` to `input`'s operand.
-        for (int i = 0; i < consumer->input_size(); ++i) {
-          int operand_pos;
-          string operand_node_name =
-              ParseNodeName(consumer->input(i), &operand_pos);
-          if (operand_node_name == node->name()) {
-            *consumer->mutable_input(i) =
-                (operand_pos < 0
-                     ? AsControlDependency(NodeName(simplified_tensor))
-                     : simplified_tensor);
-          }
-        }
-        node_map_->UpdateInput(consumer->name(), node->name(),
-                               simplified_tensor);
-        nodes_to_simplify.PushBack(consumer);
-      }
-    }
+    OptimizeNode(nodes_to_simplify.PopBack(), &nodes_to_simplify,
+                 &nodes_to_delete);
   }
-  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
-    NodeDef* node = optimized_graph_->mutable_node(i);
-    PruneControlInputs(node);
+
+  if (fetch_nodes_known_) {
+    VLOG(1) << "Deleted " << nodes_to_delete.size() << " out of "
+            << optimized_graph_->node_size() << " nodes.";
+    DeleteNodes(nodes_to_delete);
   }
   return Status::OK();
 }
 
+void DependencyOptimizer::BuildNodeToIdx() {
+  // Set up &node -> index map.
+  node_to_idx_.clear();
+  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
+    const NodeDef& node = optimized_graph_->node(i);
+    node_to_idx_[&node] = i;
+  }
+}
+
 Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                      GraphDef* optimized_graph) {
   optimized_graph_ = optimized_graph;
   *optimized_graph_ = item.graph;
   nodes_to_preserve_ = item.NodesToPreserve();
-  node_map_.reset(new NodeMap(optimized_graph));
   fetch_nodes_known_ = !item.fetch.empty();
+  node_map_.reset(new NodeMap(optimized_graph_));
+  BuildNodeToIdx();
+
   VLOG(1) << "Graph before optimization:\n" << optimized_graph_->DebugString();
   TF_RETURN_IF_ERROR(OptimizeDependencies());
+
+  CleanControlInputs();
   VLOG(1) << "Graph after optimization:\n" << optimized_graph_->DebugString();
 
   return Status::OK();
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.h b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
index a9d3322744..f9d4d0b6c2 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
@@ -43,25 +43,27 @@ class DependencyOptimizer : public GraphOptimizer {
                 const GraphDef& optimized_graph, double result) override;
 
  private:
-  Status OptimizeDependencies();
-
   // Returns true if it is safe to convert node to NoOp.
   bool SafeToConvertToNoOp(const NodeDef& node);
-
-  // Tries to simplify the expression that roots at `node` and replaces the uses
-  // of `node` to the simplified expression. Returns the name of the simplified
-  // tensor (e.g. "split:1") or an empty string if no simplification is
-  // performed.
-  string TryOptimizeDependencies(NodeDef* node,
-                                 SetVector<NodeDef*>* nodes_to_simplify);
-
-  bool HasOnlyControlOutputs(const NodeDef* node);
+  // Removes all duplicate control dependencies.
+  void CleanControlInputs();
+  // Builds a map from the &optimized_graph_->node(i) to i.
+  void BuildNodeToIdx();
+  // Removes the given set of nodes from the graph.
+  void DeleteNodes(const std::set<int>& nodes_to_delete);
+  // Tries to optimize the node with the given index, possibly additional
+  // optimizations by inserting nodes in nodes_to_simplify, and pruning nodes by
+  // inserting them in nodes_to_delete.
+  void OptimizeNode(int node_idx, SetVector<int>* nodes_to_simplify,
+                    std::set<int>* nodes_to_delete);
+  // Main driver of dependency optimizations.
+  Status OptimizeDependencies();
 
   RewriterConfig::Toggle opt_level_;
-
   bool fetch_nodes_known_;
   std::unordered_set<string> nodes_to_preserve_;
   std::unique_ptr<NodeMap> node_map_;
+  std::unordered_map<const NodeDef*, int> node_to_idx_;
   GraphDef* optimized_graph_;  // Not owned.
 };
 
-- 
GitLab


From f88cd9195589e41f011c68995d61d760dc2e1a83 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Tue, 5 Dec 2017 10:13:41 -0800
Subject: [PATCH 0637/1225] Adding variant-based serialization and
 deserialization for sparse tensors.

PiperOrigin-RevId: 177971801
---
 .../api_def_SerializeManySparse.pbtxt         |   9 +-
 .../base_api/api_def_SerializeSparse.pbtxt    |   9 +-
 tensorflow/core/kernels/concat_lib_cpu.cc     |   8 +-
 tensorflow/core/kernels/pack_op.cc            |   1 +
 .../core/kernels/serialize_sparse_op.cc       | 296 ++++++++++++------
 tensorflow/core/ops/sparse_ops.cc             |  21 +-
 .../sparse_serialization_ops_test.py          | 251 +++++++++++----
 tensorflow/python/ops/hidden_ops.txt          |   2 +-
 tensorflow/python/ops/sparse_ops.py           |  32 +-
 tensorflow/tools/api/golden/tensorflow.pbtxt  |   4 +-
 10 files changed, 444 insertions(+), 189 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_SerializeManySparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_SerializeManySparse.pbtxt
index 0010bca0b0..d46b4b20ee 100644
--- a/tensorflow/core/api_def/base_api/api_def_SerializeManySparse.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SerializeManySparse.pbtxt
@@ -18,7 +18,14 @@ END
 1-D.  The `shape` of the minibatch `SparseTensor`.
 END
   }
-  summary: "Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` string `Tensor`."
+  attr {
+    name: "out_type"
+    description: <<END
+The `dtype` to use for serialization; the supported types are `string`
+(default) and `variant`.
+END
+  }
+  summary: "Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object."
   description: <<END
 The `SparseTensor` must have rank `R` greater than 1, and the first dimension
 is treated as the minibatch dimension.  Elements of the `SparseTensor`
diff --git a/tensorflow/core/api_def/base_api/api_def_SerializeSparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_SerializeSparse.pbtxt
index bb4a352d48..491f69fda0 100644
--- a/tensorflow/core/api_def/base_api/api_def_SerializeSparse.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SerializeSparse.pbtxt
@@ -18,5 +18,12 @@ END
 1-D.  The `shape` of the `SparseTensor`.
 END
   }
-  summary: "Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object."
+  attr {
+    name: "out_type"
+    description: <<END
+The `dtype` to use for serialization; the supported types are `string`
+(default) and `variant`.
+END
+  }
+  summary: "Serialize a `SparseTensor` into a `[3]` `Tensor` object."
 }
diff --git a/tensorflow/core/kernels/concat_lib_cpu.cc b/tensorflow/core/kernels/concat_lib_cpu.cc
index b0bec0c5dc..743e3acfd5 100644
--- a/tensorflow/core/kernels/concat_lib_cpu.cc
+++ b/tensorflow/core/kernels/concat_lib_cpu.cc
@@ -73,12 +73,14 @@ REGISTER(quint16)
 REGISTER(qint16)
 REGISTER(qint32)
 REGISTER(bfloat16)
+TF_CALL_variant(REGISTER)
 
 #if defined(IS_MOBILE_PLATFORM) && !defined(SUPPORT_SELECTIVE_REGISTRATION) && \
     !defined(__ANDROID_TYPES_FULL__)
-// Primarily used for SavedModel support on mobile. Registering it here only if
-// __ANDROID_TYPES_FULL__ is not defined, as that already register strings
-REGISTER(string);
+    // Primarily used for SavedModel support on mobile. Registering it here only
+    // if __ANDROID_TYPES_FULL__ is not defined (which already registers string)
+    // to avoid duplicate registration.
+    REGISTER(string);
 #endif  // defined(IS_MOBILE_PLATFORM) &&
         // !defined(SUPPORT_SELECTIVE_REGISTRATION) &&
         // !defined(__ANDROID_TYPES_FULL__)
diff --git a/tensorflow/core/kernels/pack_op.cc b/tensorflow/core/kernels/pack_op.cc
index 814128d99a..6167593013 100644
--- a/tensorflow/core/kernels/pack_op.cc
+++ b/tensorflow/core/kernels/pack_op.cc
@@ -140,6 +140,7 @@ class PackOp : public OpKernel {
 TF_CALL_ALL_TYPES(REGISTER_PACK);
 TF_CALL_QUANTIZED_TYPES(REGISTER_PACK);
 TF_CALL_bfloat16(REGISTER_PACK);
+TF_CALL_variant(REGISTER_PACK);
 
 #if defined(IS_MOBILE_PLATFORM) && !defined(SUPPORT_SELECTIVE_REGISTRATION)
 // Primarily used for SavedModel support on mobile.
diff --git a/tensorflow/core/kernels/serialize_sparse_op.cc b/tensorflow/core/kernels/serialize_sparse_op.cc
index f4159da229..a3b573b5d9 100644
--- a/tensorflow/core/kernels/serialize_sparse_op.cc
+++ b/tensorflow/core/kernels/serialize_sparse_op.cc
@@ -27,6 +27,8 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/framework/variant_encode_decode.h"
 #include "tensorflow/core/kernels/reshape_util.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
@@ -35,15 +37,20 @@ namespace tensorflow {
 
 using sparse::SparseTensor;
 
+template <typename T>
 class SerializeSparseOp : public OpKernel {
  public:
   explicit SerializeSparseOp(OpKernelConstruction* context)
       : OpKernel(context) {}
 
+  Status Initialize(Tensor* result);
+  Status Serialize(const Tensor& input, T* result);
+
   void Compute(OpKernelContext* context) override {
     const Tensor* input_indices;
     const Tensor* input_values;
     const Tensor* input_shape;
+
     OP_REQUIRES_OK(context, context->input("sparse_indices", &input_indices));
     OP_REQUIRES_OK(context, context->input("sparse_values", &input_values));
     OP_REQUIRES_OK(context, context->input("sparse_shape", &input_shape));
@@ -62,34 +69,75 @@ class SerializeSparseOp : public OpKernel {
                     "Input shape should be a vector but received shape ",
                     input_shape->shape().DebugString()));
 
-    TensorProto proto_indices;
-    TensorProto proto_values;
-    TensorProto proto_shape;
-
-    input_indices->AsProtoTensorContent(&proto_indices);
-    input_values->AsProtoTensorContent(&proto_values);
-    input_shape->AsProtoTensorContent(&proto_shape);
+    Tensor serialized_sparse;
+    OP_REQUIRES_OK(context, Initialize(&serialized_sparse));
 
-    Tensor serialized_sparse(DT_STRING, TensorShape({3}));
-    auto serialized_sparse_t = serialized_sparse.vec<string>();
-
-    serialized_sparse_t(0) = proto_indices.SerializeAsString();
-    serialized_sparse_t(1) = proto_values.SerializeAsString();
-    serialized_sparse_t(2) = proto_shape.SerializeAsString();
+    auto serialized_sparse_t = serialized_sparse.vec<T>();
+    OP_REQUIRES_OK(context, Serialize(*input_indices, &serialized_sparse_t(0)));
+    OP_REQUIRES_OK(context, Serialize(*input_values, &serialized_sparse_t(1)));
+    OP_REQUIRES_OK(context, Serialize(*input_shape, &serialized_sparse_t(2)));
 
     context->set_output(0, serialized_sparse);
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("SerializeSparse").Device(DEVICE_CPU),
-                        SerializeSparseOp);
+template <>
+Status SerializeSparseOp<string>::Initialize(Tensor* result) {
+  *result = Tensor(DT_STRING, TensorShape({3}));
+  return Status::OK();
+}
+
+template <>
+Status SerializeSparseOp<string>::Serialize(const Tensor& input,
+                                            string* result) {
+  TensorProto proto;
+  input.AsProtoTensorContent(&proto);
+  *result = proto.SerializeAsString();
+  return Status::OK();
+}
+
+REGISTER_KERNEL_BUILDER(Name("SerializeSparse")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<string>("out_type"),
+                        SerializeSparseOp<string>);
+
+template <>
+Status SerializeSparseOp<Variant>::Initialize(Tensor* result) {
+  *result = Tensor(DT_VARIANT, TensorShape({3}));
+  return Status::OK();
+}
+
+template <>
+Status SerializeSparseOp<Variant>::Serialize(const Tensor& input,
+                                             Variant* result) {
+  *result = input;
+  return Status::OK();
+}
+
+REGISTER_KERNEL_BUILDER(Name("SerializeSparse")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<Variant>("out_type"),
+                        SerializeSparseOp<Variant>);
 
 template <typename T>
-class SerializeManySparseOp : public OpKernel {
+class SerializeManySparseOpBase : public OpKernel {
  public:
-  explicit SerializeManySparseOp(OpKernelConstruction* context)
+  explicit SerializeManySparseOpBase(OpKernelConstruction* context)
       : OpKernel(context) {}
 
+  void Compute(OpKernelContext* context) override {}
+
+ protected:
+  Status Initialize(const int64 n, Tensor* result);
+  Status Serialize(const Tensor& input, T* result);
+};
+
+template <typename T, typename U>
+class SerializeManySparseOp : public SerializeManySparseOpBase<U> {
+ public:
+  explicit SerializeManySparseOp(OpKernelConstruction* context)
+      : SerializeManySparseOpBase<U>(context) {}
+
   void Compute(OpKernelContext* context) override {
     const Tensor* input_indices;
     const Tensor* input_values;
@@ -127,37 +175,31 @@ class SerializeManySparseOp : public OpKernel {
 
     auto input_shape_t = input_shape->vec<int64>();
     const int64 N = input_shape_t(0);
-
-    Tensor serialized_sparse(DT_STRING, TensorShape({N, 3}));
-    auto serialized_sparse_t = serialized_sparse.matrix<string>();
+    Tensor serialized_sparse;
+    OP_REQUIRES_OK(context, this->Initialize(N, &serialized_sparse));
+    auto serialized_sparse_t = serialized_sparse.matrix<U>();
 
     OP_REQUIRES_OK(context, input_st.IndicesValid());
 
-    // We can generate the output shape proto string now, for all
-    // minibatch entries.
-    Tensor output_shape(DT_INT64, {rank - 1});
-    auto output_shape_t = output_shape.vec<int64>();
-    for (int d = 1; d < rank; d++) output_shape_t(d - 1) = input_shape_t(d);
-    TensorProto proto_shape;
-    output_shape.AsProtoTensorContent(&proto_shape);
-    const string proto_shape_string = proto_shape.SerializeAsString();
-
+    // Initialize output with empty values and the proper shapes.
     Tensor output_blank_indices(DT_INT64, {0, rank - 1});
-    Tensor output_blank_values(DataTypeToEnum<T>::value, {0});
-    TensorProto proto_blank_indices;
-    TensorProto proto_blank_values;
-    output_blank_indices.AsProtoTensorContent(&proto_blank_indices);
-    output_blank_values.AsProtoTensorContent(&proto_blank_values);
+    U serialized_indices;
+    OP_REQUIRES_OK(context,
+                   this->Serialize(output_blank_indices, &serialized_indices));
+    serialized_sparse_t.template chip<1>(0).setConstant(serialized_indices);
 
-    const string proto_blank_indices_string =
-        proto_blank_indices.SerializeAsString();
-    const string proto_blank_values_string =
-        proto_blank_values.SerializeAsString();
+    Tensor output_blank_values(DataTypeToEnum<T>::value, {0});
+    U serialized_values;
+    OP_REQUIRES_OK(context,
+                   this->Serialize(output_blank_values, &serialized_values));
+    serialized_sparse_t.template chip<1>(1).setConstant(serialized_values);
 
-    // Initialize output with empty values and the proper shapes.
-    serialized_sparse_t.chip<1>(0).setConstant(proto_blank_indices_string);
-    serialized_sparse_t.chip<1>(1).setConstant(proto_blank_values_string);
-    serialized_sparse_t.chip<1>(2).setConstant(proto_shape_string);
+    Tensor output_shape(DT_INT64, {rank - 1});
+    auto output_shape_t = output_shape.vec<int64>();
+    for (int d = 1; d < rank; d++) output_shape_t(d - 1) = input_shape_t(d);
+    U serialized_shape;
+    OP_REQUIRES_OK(context, this->Serialize(output_shape, &serialized_shape));
+    serialized_sparse_t.template chip<1>(2).setConstant(serialized_shape);
 
     // Get groups by minibatch dimension
     sparse::GroupIterable minibatch = input_st.group({0});
@@ -186,34 +228,84 @@ class SerializeManySparseOp : public OpKernel {
         output_values_t(i) = values(i);
       }
 
-      TensorProto proto_indices;
-      TensorProto proto_values;
-      output_indices.AsProtoTensorContent(&proto_indices);
-      output_values.AsProtoTensorContent(&proto_values);
-
-      serialized_sparse_t(b, 0) = proto_indices.SerializeAsString();
-      serialized_sparse_t(b, 1) = proto_values.SerializeAsString();
+      OP_REQUIRES_OK(
+          context, this->Serialize(output_indices, &serialized_sparse_t(b, 0)));
+      OP_REQUIRES_OK(
+          context, this->Serialize(output_values, &serialized_sparse_t(b, 1)));
     }
 
     context->set_output(0, serialized_sparse);
   }
 };
 
-#define REGISTER_KERNELS(type)                            \
-  REGISTER_KERNEL_BUILDER(Name("SerializeManySparse")     \
-                              .Device(DEVICE_CPU)         \
-                              .TypeConstraint<type>("T"), \
-                          SerializeManySparseOp<type>)
+template <>
+Status SerializeManySparseOpBase<string>::Initialize(const int64 n,
+                                                     Tensor* result) {
+  *result = Tensor(DT_STRING, TensorShape({n, 3}));
+  return Status::OK();
+}
+
+template <>
+Status SerializeManySparseOpBase<string>::Serialize(const Tensor& input,
+                                                    string* result) {
+  TensorProto proto;
+  input.AsProtoTensorContent(&proto);
+  *result = proto.SerializeAsString();
+  return Status::OK();
+}
+
+#define REGISTER_KERNELS(type)                                     \
+  REGISTER_KERNEL_BUILDER(Name("SerializeManySparse")              \
+                              .Device(DEVICE_CPU)                  \
+                              .TypeConstraint<type>("T")           \
+                              .TypeConstraint<string>("out_type"), \
+                          SerializeManySparseOp<type, string>)
+
+TF_CALL_ALL_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+template <>
+Status SerializeManySparseOpBase<Variant>::Initialize(const int64 n,
+                                                      Tensor* result) {
+  *result = Tensor(DT_VARIANT, TensorShape({n, 3}));
+  return Status::OK();
+}
+
+template <>
+Status SerializeManySparseOpBase<Variant>::Serialize(const Tensor& input,
+                                                     Variant* result) {
+  *result = input;
+  return Status::OK();
+}
+
+#define REGISTER_KERNELS(type)                                      \
+  REGISTER_KERNEL_BUILDER(Name("SerializeManySparse")               \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .TypeConstraint<Variant>("out_type"), \
+                          SerializeManySparseOp<type, Variant>)
 
 TF_CALL_ALL_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
 
 template <typename T>
-class DeserializeSparseOp : public OpKernel {
+class DeserializeSparseOpBase : public OpKernel {
  public:
-  explicit DeserializeSparseOp(OpKernelConstruction* context)
+  explicit DeserializeSparseOpBase(OpKernelConstruction* context)
       : OpKernel(context) {}
 
+  void Compute(OpKernelContext* context) override {}
+
+ protected:
+  Status Deserialize(const T& serialized, Tensor* result);
+};
+
+template <typename T, typename U>
+class DeserializeSparseOp : public DeserializeSparseOpBase<U> {
+ public:
+  explicit DeserializeSparseOp(OpKernelConstruction* context)
+      : DeserializeSparseOpBase<U>(context) {}
+
   void Compute(OpKernelContext* context) override {
     const Tensor& serialized_sparse = context->input(0);
     const int ndims = serialized_sparse.shape().dims();
@@ -246,53 +338,30 @@ class DeserializeSparseOp : public OpKernel {
     indices.reserve(num_sparse_tensors);
     values.reserve(num_sparse_tensors);
 
-    const auto& serialized_sparse_t =
-        serialized_sparse.flat_inner_dims<string, 2>();
+    const auto& serialized_sparse_t = serialized_sparse.flat_inner_dims<U, 2>();
 
     for (int i = 0; i < num_sparse_tensors; ++i) {
-      Tensor output_indices(DT_INT64);
-      Tensor output_values(DataTypeToEnum<T>::value);
-      Tensor output_shape(DT_INT64);
-      TensorProto proto_indices;
-      TensorProto proto_values;
-      TensorProto proto_shape;
-
-      OP_REQUIRES(
-          context,
-          ParseProtoUnlimited(&proto_indices, serialized_sparse_t(i, 0)),
-          errors::InvalidArgument("Could not parse serialized_sparse[", i,
-                                  ", 0]"));
-      OP_REQUIRES(context,
-                  ParseProtoUnlimited(&proto_values, serialized_sparse_t(i, 1)),
-                  errors::InvalidArgument("Could not parse serialized_sparse[",
-                                          i, ", 1]"));
-      OP_REQUIRES(context,
-                  ParseProtoUnlimited(&proto_shape, serialized_sparse_t(i, 2)),
-                  errors::InvalidArgument("Could not parse serialized_sparse[",
-                                          i, ", 2]"));
-
-      OP_REQUIRES(context, output_indices.FromProto(proto_indices),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 0] (indices)"));
+      Tensor output_indices;
+      OP_REQUIRES_OK(context, this->Deserialize(serialized_sparse_t(i, 0),
+                                                &output_indices));
       OP_REQUIRES(context, TensorShapeUtils::IsMatrix(output_indices.shape()),
                   errors::InvalidArgument(
                       "Expected serialized_sparse[", i,
                       ", 0] to represent an index matrix but received shape ",
                       output_indices.shape().DebugString()));
-      OP_REQUIRES(context, output_values.FromProto(proto_values),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 1] (values)"));
+
+      Tensor output_values;
+      OP_REQUIRES_OK(context, this->Deserialize(serialized_sparse_t(i, 1),
+                                                &output_values));
       OP_REQUIRES(context, TensorShapeUtils::IsVector(output_values.shape()),
                   errors::InvalidArgument(
                       "Expected serialized_sparse[", i,
                       ", 1] to represent a values vector but received shape ",
                       output_values.shape().DebugString()));
-      OP_REQUIRES(context, output_shape.FromProto(proto_shape),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 2] (shape)"));
+
+      Tensor output_shape;
+      OP_REQUIRES_OK(
+          context, this->Deserialize(serialized_sparse_t(i, 2), &output_shape));
       OP_REQUIRES(
           context, TensorShapeUtils::IsVector(output_shape.shape()),
           errors::InvalidArgument("Expected serialized_sparse[", i,
@@ -400,11 +469,27 @@ class DeserializeSparseOp : public OpKernel {
   }
 };
 
-#define REGISTER_KERNELS(type)                                \
-  REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")           \
-                              .Device(DEVICE_CPU)             \
-                              .TypeConstraint<type>("dtype"), \
-                          DeserializeSparseOp<type>)
+template <>
+Status DeserializeSparseOpBase<string>::Deserialize(const string& serialized,
+                                                    Tensor* result) {
+  TensorProto proto;
+  if (!ParseProtoUnlimited(&proto, serialized)) {
+    return errors::InvalidArgument("Could not parse serialized proto");
+  }
+  Tensor tensor;
+  if (!tensor.FromProto(proto)) {
+    return errors::InvalidArgument("Could not construct tensor from proto");
+  }
+  *result = tensor;
+  return Status::OK();
+}
+
+#define REGISTER_KERNELS(type)                                        \
+  REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")                   \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<type>("dtype")          \
+                              .TypeConstraint<string>("Tserialized"), \
+                          DeserializeSparseOp<type, string>)
 
 TF_CALL_ALL_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
@@ -413,7 +498,24 @@ TF_CALL_ALL_TYPES(REGISTER_KERNELS);
   REGISTER_KERNEL_BUILDER(Name("DeserializeManySparse")       \
                               .Device(DEVICE_CPU)             \
                               .TypeConstraint<type>("dtype"), \
-                          DeserializeSparseOp<type>)
+                          DeserializeSparseOp<type, string>)
+
+TF_CALL_ALL_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+template <>
+Status DeserializeSparseOpBase<Variant>::Deserialize(const Variant& serialized,
+                                                     Tensor* result) {
+  *result = *serialized.get<Tensor>();
+  return Status::OK();
+}
+
+#define REGISTER_KERNELS(type)                                         \
+  REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")                    \
+                              .Device(DEVICE_CPU)                      \
+                              .TypeConstraint<type>("dtype")           \
+                              .TypeConstraint<Variant>("Tserialized"), \
+                          DeserializeSparseOp<type, Variant>)
 
 TF_CALL_ALL_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc
index 772e2531dc..99f61a3054 100644
--- a/tensorflow/core/ops/sparse_ops.cc
+++ b/tensorflow/core/ops/sparse_ops.cc
@@ -190,7 +190,8 @@ REGISTER_OP("SerializeSparse")
     .Input("sparse_values: T")
     .Input("sparse_shape: int64")
     .Attr("T: type")
-    .Output("serialized_sparse: string")
+    .Output("serialized_sparse: out_type")
+    .Attr("out_type: {string, variant} = DT_STRING")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused));
@@ -200,11 +201,13 @@ REGISTER_OP("SerializeSparse")
       return Status::OK();
     })
     .Doc(R"doc(
-Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object.
+Serialize a `SparseTensor` into a `[3]` `Tensor` object.
 
 sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
 sparse_values: 1-D.  The `values` of the `SparseTensor`.
 sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+out_type: The `dtype` to use for serialization; the supported types are `string`
+  (default) and `variant`.
 )doc");
 
 REGISTER_OP("SerializeManySparse")
@@ -212,7 +215,8 @@ REGISTER_OP("SerializeManySparse")
     .Input("sparse_values: T")
     .Input("sparse_shape: int64")
     .Attr("T: type")
-    .Output("serialized_sparse: string")
+    .Output("serialized_sparse: out_type")
+    .Attr("out_type: {string, variant} = DT_STRING")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused));
@@ -222,7 +226,7 @@ REGISTER_OP("SerializeManySparse")
       return Status::OK();
     })
     .Doc(R"doc(
-Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` string `Tensor`.
+Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object.
 
 The `SparseTensor` must have rank `R` greater than 1, and the first dimension
 is treated as the minibatch dimension.  Elements of the `SparseTensor`
@@ -235,14 +239,17 @@ The minibatch size `N` is extracted from `sparse_shape[0]`.
 sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
 sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
 sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+out_type: The `dtype` to use for serialization; the supported types are `string`
+  (default) and `variant`.
 )doc");
 
 REGISTER_OP("DeserializeSparse")
-    .Input("serialized_sparse: string")
-    .Attr("dtype: type")
+    .Input("serialized_sparse: Tserialized")
     .Output("sparse_indices: int64")
     .Output("sparse_values: dtype")
     .Output("sparse_shape: int64")
+    .Attr("dtype: type")
+    .Attr("Tserialized: {string, variant} = DT_STRING")
     .SetShapeFn([](InferenceContext* c) {
       // serialized sparse is [?, ..., ?, 3] vector.
       DimensionHandle unused;
@@ -305,10 +312,10 @@ dtype: The `dtype` of the serialized `SparseTensor` objects.
 
 REGISTER_OP("DeserializeManySparse")
     .Input("serialized_sparse: string")
-    .Attr("dtype: type")
     .Output("sparse_indices: int64")
     .Output("sparse_values: dtype")
     .Output("sparse_shape: int64")
+    .Attr("dtype: type")
     .SetShapeFn([](InferenceContext* c) {
       // serialized sparse is [?,3] matrix.
       ShapeHandle serialized_sparse;
diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
index d1a90952c7..27b39a626f 100644
--- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
@@ -64,12 +64,14 @@ class SerializeSparseTest(test.TestCase):
     shape = np.array([3, 4, 5]).astype(np.int64)
     return sparse_tensor_lib.SparseTensorValue(ind, val, shape)
 
-  def testSerializeDeserialize(self):
+  def _testSerializeDeserializeHelper(self,
+                                      serialize_fn,
+                                      deserialize_fn,
+                                      out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input = self._SparseTensorValue_5x6(np.arange(6))
-      serialized = sparse_ops.serialize_sparse(sp_input)
-      sp_deserialized = sparse_ops.deserialize_sparse(
-          serialized, dtype=dtypes.int32)
+      serialized = serialize_fn(sp_input, out_type=out_type)
+      sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)
 
       indices, values, shape = sess.run(sp_deserialized)
 
@@ -77,14 +79,25 @@ class SerializeSparseTest(test.TestCase):
       self.assertAllEqual(values, sp_input[1])
       self.assertAllEqual(shape, sp_input[2])
 
-  def testSerializeDeserializeBatch(self):
+  def testSerializeDeserialize(self):
+    self._testSerializeDeserializeHelper(sparse_ops.serialize_sparse,
+                                         sparse_ops.deserialize_sparse)
+
+  def testVariantSerializeDeserialize(self):
+    self._testSerializeDeserializeHelper(sparse_ops.serialize_sparse,
+                                         sparse_ops.deserialize_sparse,
+                                         dtypes.variant)
+
+  def _testSerializeDeserializeBatchHelper(self,
+                                           serialize_fn,
+                                           deserialize_fn,
+                                           out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input = self._SparseTensorValue_5x6(np.arange(6))
-      serialized = sparse_ops.serialize_sparse(sp_input)
+      serialized = serialize_fn(sp_input, out_type=out_type)
       serialized = array_ops.stack([serialized, serialized])
 
-      sp_deserialized = sparse_ops.deserialize_sparse(
-          serialized, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)
 
       combined_indices, combined_values, combined_shape = sess.run(
           sp_deserialized)
@@ -97,16 +110,29 @@ class SerializeSparseTest(test.TestCase):
       self.assertAllEqual(combined_values[6:], sp_input[1])
       self.assertAllEqual(combined_shape, [2, 5, 6])
 
-  def testSerializeDeserializeBatchInconsistentShape(self):
+  def testSerializeDeserializeBatch(self):
+    self._testSerializeDeserializeBatchHelper(sparse_ops.serialize_sparse,
+                                              sparse_ops.deserialize_sparse)
+
+  def testSerializeDeserializeManyBatch(self):
+    self._testSerializeDeserializeBatchHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
+
+  def testVariantSerializeDeserializeBatch(self):
+    self._testSerializeDeserializeBatchHelper(sparse_ops.serialize_sparse,
+                                              sparse_ops.deserialize_sparse,
+                                              dtypes.variant)
+
+  def _testSerializeDeserializeBatchInconsistentShapeHelper(
+      self, serialize_fn, deserialize_fn, out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorValue_5x6(np.arange(6))
       sp_input1 = self._SparseTensorValue_3x4(np.arange(6))
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
-      serialized1 = sparse_ops.serialize_sparse(sp_input1)
+      serialized0 = serialize_fn(sp_input0, out_type=out_type)
+      serialized1 = serialize_fn(sp_input1, out_type=out_type)
       serialized = array_ops.stack([serialized0, serialized1])
 
-      sp_deserialized = sparse_ops.deserialize_sparse(
-          serialized, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)
 
       combined_indices, combined_values, combined_shape = sess.run(
           sp_deserialized)
@@ -119,15 +145,26 @@ class SerializeSparseTest(test.TestCase):
       self.assertAllEqual(combined_values[6:], sp_input1[1])
       self.assertAllEqual(combined_shape, [2, 5, 6])
 
-  def testSerializeDeserializeNestedBatch(self):
+  def testSerializeDeserializeBatchInconsistentShape(self):
+    self._testSerializeDeserializeBatchInconsistentShapeHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse)
+
+  def testVariantSerializeDeserializeBatchInconsistentShape(self):
+    self._testSerializeDeserializeBatchInconsistentShapeHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse,
+        dtypes.variant)
+
+  def _testSerializeDeserializeNestedBatchHelper(self,
+                                                 serialize_fn,
+                                                 deserialize_fn,
+                                                 out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input = self._SparseTensorValue_5x6(np.arange(6))
-      serialized = sparse_ops.serialize_sparse(sp_input)
+      serialized = serialize_fn(sp_input, out_type=out_type)
       serialized = array_ops.stack([serialized, serialized])
       serialized = array_ops.stack([serialized, serialized])
 
-      sp_deserialized = sparse_ops.deserialize_sparse(
-          serialized, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)
 
       combined_indices, combined_values, combined_shape = sess.run(
           sp_deserialized)
@@ -151,40 +188,29 @@ class SerializeSparseTest(test.TestCase):
 
       self.assertAllEqual(combined_shape, [2, 2, 5, 6])
 
-  def testSerializeDeserializeMany(self):
-    with self.test_session(use_gpu=False) as sess:
-      sp_input0 = self._SparseTensorValue_5x6(np.arange(6))
-      sp_input1 = self._SparseTensorValue_3x4(np.arange(6))
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
-      serialized1 = sparse_ops.serialize_sparse(sp_input1)
-      serialized_concat = array_ops.stack([serialized0, serialized1])
-
-      sp_deserialized = sparse_ops.deserialize_many_sparse(
-          serialized_concat, dtype=dtypes.int32)
-
-      combined_indices, combined_values, combined_shape = sess.run(
-          sp_deserialized)
-
-      self.assertAllEqual(combined_indices[:6, 0], [0] * 6)  # minibatch 0
-      self.assertAllEqual(combined_indices[:6, 1:], sp_input0[0])
-      self.assertAllEqual(combined_indices[6:, 0], [1] * 6)  # minibatch 1
-      self.assertAllEqual(combined_indices[6:, 1:], sp_input1[0])
-      self.assertAllEqual(combined_values[:6], sp_input0[1])
-      self.assertAllEqual(combined_values[6:], sp_input1[1])
-      self.assertAllEqual(combined_shape, [2, 5, 6])
-
-  def testFeedSerializeDeserializeMany(self):
+  def testSerializeDeserializeNestedBatch(self):
+    self._testSerializeDeserializeNestedBatchHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse)
+
+  def testVariantSerializeDeserializeNestedBatch(self):
+    self._testSerializeDeserializeNestedBatchHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse,
+        dtypes.variant)
+
+  def _testFeedSerializeDeserializeBatchHelper(self,
+                                               serialize_fn,
+                                               deserialize_fn,
+                                               out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       sp_input1 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
       input1_val = self._SparseTensorValue_3x4(np.arange(6))
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
-      serialized1 = sparse_ops.serialize_sparse(sp_input1)
+      serialized0 = serialize_fn(sp_input0, out_type=out_type)
+      serialized1 = serialize_fn(sp_input1, out_type=out_type)
       serialized_concat = array_ops.stack([serialized0, serialized1])
 
-      sp_deserialized = sparse_ops.deserialize_many_sparse(
-          serialized_concat, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized_concat, dtype=dtypes.int32)
 
       combined_indices, combined_values, combined_shape = sess.run(
           sp_deserialized, {sp_input0: input0_val,
@@ -198,40 +224,96 @@ class SerializeSparseTest(test.TestCase):
       self.assertAllEqual(combined_values[6:], input1_val[1])
       self.assertAllEqual(combined_shape, [2, 5, 6])
 
-  def testSerializeManyDeserializeManyRoundTrip(self):
+  def testFeedSerializeDeserializeBatch(self):
+    self._testFeedSerializeDeserializeBatchHelper(sparse_ops.serialize_sparse,
+                                                  sparse_ops.deserialize_sparse)
+
+  def testFeedSerializeDeserializeManyBatch(self):
+    self._testFeedSerializeDeserializeBatchHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
+
+  def testFeedVariantSerializeDeserializeBatch(self):
+    self._testFeedSerializeDeserializeBatchHelper(sparse_ops.serialize_sparse,
+                                                  sparse_ops.deserialize_sparse,
+                                                  dtypes.variant)
+
+  def _testSerializeManyShapeHelper(self,
+                                    serialize_many_fn,
+                                    out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       # N == 4 because shape_value == [4, 5]
       indices_value = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64)
       values_value = np.array([b"a", b"b", b"c"])
       shape_value = np.array([4, 5], dtype=np.int64)
       sparse_tensor = self._SparseTensorPlaceholder(dtype=dtypes.string)
-      serialized = sparse_ops.serialize_many_sparse(sparse_tensor)
-      deserialized = sparse_ops.deserialize_many_sparse(
-          serialized, dtype=dtypes.string)
-      serialized_value, deserialized_value = sess.run(
-          [serialized, deserialized],
+      serialized = serialize_many_fn(sparse_tensor, out_type=out_type)
+      serialized_value = sess.run(
+          serialized,
           feed_dict={
               sparse_tensor.indices: indices_value,
               sparse_tensor.values: values_value,
               sparse_tensor.dense_shape: shape_value
           })
       self.assertEqual(serialized_value.shape, (4, 3))
+
+  def testSerializeManyShape(self):
+    self._testSerializeManyShapeHelper(sparse_ops.serialize_many_sparse)
+
+  def testVariantSerializeManyShape(self):
+    # NOTE: The following test is a no-op as it is currently not possible to
+    # convert the serialized variant value to a numpy value.
+    pass
+
+  def _testSerializeManyDeserializeBatchHelper(self,
+                                               serialize_many_fn,
+                                               deserialize_fn,
+                                               out_type=dtypes.string):
+    with self.test_session(use_gpu=False) as sess:
+      # N == 4 because shape_value == [4, 5]
+      indices_value = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64)
+      values_value = np.array([b"a", b"b", b"c"])
+      shape_value = np.array([4, 5], dtype=np.int64)
+      sparse_tensor = self._SparseTensorPlaceholder(dtype=dtypes.string)
+      serialized = serialize_many_fn(sparse_tensor, out_type=out_type)
+      deserialized = deserialize_fn(serialized, dtype=dtypes.string)
+      deserialized_value = sess.run(
+          deserialized,
+          feed_dict={
+              sparse_tensor.indices: indices_value,
+              sparse_tensor.values: values_value,
+              sparse_tensor.dense_shape: shape_value
+          })
       self.assertAllEqual(deserialized_value.indices, indices_value)
       self.assertAllEqual(deserialized_value.values, values_value)
       self.assertAllEqual(deserialized_value.dense_shape, shape_value)
 
-  def testDeserializeFailsWrongType(self):
+  def testSerializeManyDeserializeBatch(self):
+    self._testSerializeManyDeserializeBatchHelper(
+        sparse_ops.serialize_many_sparse, sparse_ops.deserialize_sparse)
+
+  def testSerializeManyDeserializeManyBatch(self):
+    self._testSerializeManyDeserializeBatchHelper(
+        sparse_ops.serialize_many_sparse, sparse_ops.deserialize_many_sparse)
+
+  def testVariantSerializeManyDeserializeBatch(self):
+    self._testSerializeManyDeserializeBatchHelper(
+        sparse_ops.serialize_many_sparse, sparse_ops.deserialize_sparse,
+        dtypes.variant)
+
+  def _testDeserializeFailsWrongTypeHelper(self,
+                                           serialize_fn,
+                                           deserialize_fn,
+                                           out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       sp_input1 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
       input1_val = self._SparseTensorValue_3x4(np.arange(6))
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
-      serialized1 = sparse_ops.serialize_sparse(sp_input1)
+      serialized0 = serialize_fn(sp_input0, out_type=out_type)
+      serialized1 = serialize_fn(sp_input1, out_type=out_type)
       serialized_concat = array_ops.stack([serialized0, serialized1])
 
-      sp_deserialized = sparse_ops.deserialize_many_sparse(
-          serialized_concat, dtype=dtypes.int64)
+      sp_deserialized = deserialize_fn(serialized_concat, dtype=dtypes.int64)
 
       with self.assertRaisesOpError(
           r"Requested SparseTensor of type int64 but "
@@ -240,18 +322,33 @@ class SerializeSparseTest(test.TestCase):
                  {sp_input0: input0_val,
                   sp_input1: input1_val})
 
-  def testDeserializeFailsInconsistentRank(self):
+  def testDeserializeFailsWrongType(self):
+    self._testDeserializeFailsWrongTypeHelper(sparse_ops.serialize_sparse,
+                                              sparse_ops.deserialize_sparse)
+
+  def testDeserializeManyFailsWrongType(self):
+    self._testDeserializeFailsWrongTypeHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
+
+  def testVariantDeserializeFailsWrongType(self):
+    self._testDeserializeFailsWrongTypeHelper(sparse_ops.serialize_sparse,
+                                              sparse_ops.deserialize_sparse,
+                                              dtypes.variant)
+
+  def _testDeserializeFailsInconsistentRankHelper(self,
+                                                  serialize_fn,
+                                                  deserialize_fn,
+                                                  out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       sp_input1 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
       input1_val = self._SparseTensorValue_1x1x1()
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
-      serialized1 = sparse_ops.serialize_sparse(sp_input1)
+      serialized0 = serialize_fn(sp_input0, out_type=out_type)
+      serialized1 = serialize_fn(sp_input1, out_type=out_type)
       serialized_concat = array_ops.stack([serialized0, serialized1])
 
-      sp_deserialized = sparse_ops.deserialize_many_sparse(
-          serialized_concat, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized_concat, dtype=dtypes.int32)
 
       with self.assertRaisesOpError(
           r"Inconsistent shape across SparseTensors: rank prior to "
@@ -260,21 +357,43 @@ class SerializeSparseTest(test.TestCase):
                  {sp_input0: input0_val,
                   sp_input1: input1_val})
 
-  def testDeserializeFailsInvalidProto(self):
+  def testDeserializeFailsInconsistentRank(self):
+    self._testDeserializeFailsInconsistentRankHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse)
+
+  def testDeserializeManyFailsInconsistentRank(self):
+    self._testDeserializeFailsInconsistentRankHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
+
+  def testVariantDeserializeFailsInconsistentRank(self):
+    self._testDeserializeFailsInconsistentRankHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse,
+        dtypes.variant)
+
+  def _testDeserializeFailsInvalidProtoHelper(self,
+                                              serialize_fn,
+                                              deserialize_fn,
+                                              out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
+      serialized0 = serialize_fn(sp_input0, out_type=out_type)
       serialized1 = ["a", "b", "c"]
       serialized_concat = array_ops.stack([serialized0, serialized1])
 
-      sp_deserialized = sparse_ops.deserialize_many_sparse(
-          serialized_concat, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized_concat, dtype=dtypes.int32)
 
-      with self.assertRaisesOpError(
-          r"Could not parse serialized_sparse\[1, 0\]"):
+      with self.assertRaisesOpError(r"Could not parse serialized proto"):
         sess.run(sp_deserialized, {sp_input0: input0_val})
 
+  def testDeserializeFailsInvalidProto(self):
+    self._testDeserializeFailsInvalidProtoHelper(sparse_ops.serialize_sparse,
+                                                 sparse_ops.deserialize_sparse)
+
+  def testDeserializeManyFailsInvalidProto(self):
+    self._testDeserializeFailsInvalidProtoHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt
index f834d9002c..af014a7e39 100644
--- a/tensorflow/python/ops/hidden_ops.txt
+++ b/tensorflow/python/ops/hidden_ops.txt
@@ -354,8 +354,8 @@ DestroyTemporaryVariable
 AddSparseToTensorsMap
 AddManySparseToTensorsMap
 TakeManySparseFromTensorsMap
-DeserializeSparse
 DeserializeManySparse
+DeserializeSparse
 SerializeManySparse
 SerializeSparse
 SparseAdd
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 9bdc124c83..62f20e8c9d 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -1385,16 +1385,17 @@ def sparse_fill_empty_rows(sp_input, default_value, name=None):
             empty_row_indicator)
 
 
-def serialize_sparse(sp_input, name=None):
-  """Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object.
+def serialize_sparse(sp_input, name=None, out_type=dtypes.string):
+  """Serialize a `SparseTensor` into a 3-vector (1-D `Tensor`) object.
 
   Args:
     sp_input: The input `SparseTensor`.
     name: A name prefix for the returned tensors (optional).
+    out_type: The `dtype` to use for serialization.
 
   Returns:
-    A string 3-vector (1D `Tensor`), with each column representing the
-    serialized `SparseTensor`'s indices, values, and shape (respectively).
+    A 3-vector (1-D `Tensor`), with each column representing the serialized
+    `SparseTensor`'s indices, values, and shape (respectively).
 
   Raises:
     TypeError: If `sp_input` is not a `SparseTensor`.
@@ -1402,11 +1403,15 @@ def serialize_sparse(sp_input, name=None):
   sp_input = _convert_to_sparse_tensor(sp_input)
 
   return gen_sparse_ops._serialize_sparse(
-      sp_input.indices, sp_input.values, sp_input.dense_shape, name=name)
+      sp_input.indices,
+      sp_input.values,
+      sp_input.dense_shape,
+      name=name,
+      out_type=out_type)
 
 
-def serialize_many_sparse(sp_input, name=None):
-  """Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` string `Tensor`.
+def serialize_many_sparse(sp_input, name=None, out_type=dtypes.string):
+  """Serialize `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor`.
 
   The `SparseTensor` must have rank `R` greater than 1, and the first dimension
   is treated as the minibatch dimension.  Elements of the `SparseTensor`
@@ -1419,11 +1424,12 @@ def serialize_many_sparse(sp_input, name=None):
   Args:
     sp_input: The input rank `R` `SparseTensor`.
     name: A name prefix for the returned tensors (optional).
+    out_type: The `dtype` to use for serialization.
 
   Returns:
-    A string matrix (2-D `Tensor`) with `N` rows and `3` columns.
-    Each column represents serialized `SparseTensor`'s indices, values, and
-    shape (respectively).
+    A matrix (2-D `Tensor`) with `N` rows and `3` columns. Each column
+    represents serialized `SparseTensor`'s indices, values, and shape
+    (respectively).
 
   Raises:
     TypeError: If `sp_input` is not a `SparseTensor`.
@@ -1431,7 +1437,11 @@ def serialize_many_sparse(sp_input, name=None):
   sp_input = _convert_to_sparse_tensor(sp_input)
 
   return gen_sparse_ops._serialize_many_sparse(
-      sp_input.indices, sp_input.values, sp_input.dense_shape, name=name)
+      sp_input.indices,
+      sp_input.values,
+      sp_input.dense_shape,
+      name=name,
+      out_type=out_type)
 
 
 def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None):
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index e79f2a56f5..b12cf5a864 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -1710,11 +1710,11 @@ tf_module {
   }
   member_method {
     name: "serialize_many_sparse"
-    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
   }
   member_method {
     name: "serialize_sparse"
-    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
   }
   member_method {
     name: "serialize_tensor"
-- 
GitLab


From c72bb9754152577de6393879ba38dcfdf583477d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 10:17:45 -0800
Subject: [PATCH 0638/1225] nn_impl.py cleanup: used keepdims instead of
 deprecated keep_dims.

PiperOrigin-RevId: 177972555
---
 tensorflow/python/ops/nn_impl.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 18bf5897cf..19a86df6a9 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -341,7 +341,7 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
   with ops.name_scope(name, "l2_normalize", [x]) as name:
     axis = deprecated_argument_lookup("axis", axis, "dim", dim)
     x = ops.convert_to_tensor(x, name="x")
-    square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keep_dims=True)
+    square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keepdims=True)
     x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon))
     return math_ops.multiply(x, x_inv_norm, name=name)
 
@@ -593,8 +593,8 @@ def sufficient_statistics(x, axes, shift=None, keep_dims=False, name=None):
     else:  # no shift.
       m_ss = x
       v_ss = math_ops.square(x)
-    m_ss = math_ops.reduce_sum(m_ss, axes, keep_dims=keep_dims, name="mean_ss")
-    v_ss = math_ops.reduce_sum(v_ss, axes, keep_dims=keep_dims, name="var_ss")
+    m_ss = math_ops.reduce_sum(m_ss, axes, keepdims=keep_dims, name="mean_ss")
+    v_ss = math_ops.reduce_sum(v_ss, axes, keepdims=keep_dims, name="var_ss")
   return counts, m_ss, v_ss, shift
 
 
@@ -664,12 +664,12 @@ def moments(x, axes,
     # on 32-bit floats before converting the mean and variance back to fp16
     y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x
     # Compute true mean while keeping the dims for proper broadcasting.
-    mean = math_ops.reduce_mean(y, axes, keep_dims=True, name="mean")
+    mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean")
     # sample variance, not unbiased variance
     variance = math_ops.reduce_mean(
         math_ops.squared_difference(y, array_ops.stop_gradient(mean)),
         axes,
-        keep_dims=True,
+        keepdims=True,
         name="variance")
     if not keep_dims:
       mean = array_ops.squeeze(mean, axes)
@@ -714,7 +714,7 @@ def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False):
     # Note that we use keep_dims=True for our reductions regardless of the arg;
     # this is so that the results remain broadcast-compatible with the inputs.
     weighted_input_sum = math_ops.reduce_sum(
-        frequency_weights * x, axes, name="weighted_input_sum", keep_dims=True)
+        frequency_weights * x, axes, name="weighted_input_sum", keepdims=True)
 
     # The shape of the weights isn't necessarily the same as x's
     # shape, just broadcast-compatible with it -- so this expression
@@ -725,7 +725,7 @@ def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False):
     broadcasted_weights = frequency_weights + array_ops.zeros_like(x)
 
     sum_of_weights = math_ops.reduce_sum(
-        broadcasted_weights, axes, name="sum_of_weights", keep_dims=True)
+        broadcasted_weights, axes, name="sum_of_weights", keepdims=True)
 
     divisor = math_ops.reciprocal(sum_of_weights, name="inv_weight_sum")
 
@@ -736,7 +736,7 @@ def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False):
         frequency_weights * math_ops.squared_difference(x, weighted_mean),
         axes,
         name="weighted_distsq",
-        keep_dims=True)
+        keepdims=True)
 
     weighted_variance = math_ops.multiply(weighted_distsq, divisor)
 
-- 
GitLab


From e72ecbdb7a84c5cc0801e85a9c38f6fd181ceef6 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Tue, 5 Dec 2017 11:38:19 -0800
Subject: [PATCH 0639/1225] Add ImportGraphDefOptions::uniquify_prefix.

This option is necessary to mimic the Python import_graph_def method's
behavior.

PiperOrigin-RevId: 177986165
---
 tensorflow/core/graph/graph_constructor.cc    | 40 +++++++++++--------
 tensorflow/core/graph/graph_constructor.h     | 10 ++++-
 .../core/graph/graph_constructor_test.cc      | 39 ++++++++++++------
 3 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index 0fb61fd9af..6e72d73918 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -77,6 +77,7 @@ class GraphConstructor {
                      ? in.prefix
                      : in.prefix + "/"),
           uniquify_names(in.uniquify_names),
+          uniquify_prefix(in.uniquify_prefix),
           input_map(in.input_map),
           skip_mapped_nodes(in.skip_mapped_nodes),
           control_dependencies(in.control_dependencies),
@@ -90,6 +91,7 @@ class GraphConstructor {
 
     string prefix;
     bool uniquify_names;
+    bool uniquify_prefix;
     std::map<TensorId, TensorId> input_map;
     bool skip_mapped_nodes;
     std::vector<string> control_dependencies;
@@ -144,6 +146,7 @@ class GraphConstructor {
         library_(library),
         g_(g),
         original_versions_(g->versions()),
+        prefix_(opts.prefix),
         refiner_(refiner),
         return_tensors_(return_tensors),
         return_nodes_(return_nodes),
@@ -227,6 +230,9 @@ class GraphConstructor {
   Graph* g_;
   const VersionDef original_versions_;
 
+  // A copy of opts_.prefix, possibly uniquified.
+  string prefix_;
+
   ShapeRefiner* refiner_;
 
   // May be null. Not owned.
@@ -348,7 +354,7 @@ Status GraphConstructor::EnsureNoNameCollisions() {
     }
     AddPrefixes(n->name(), &existing_prefixes_);
   }
-  if (opts_.prefix.empty() && opts_.importing && !opts_.uniquify_names) {
+  if (prefix_.empty() && opts_.importing && !opts_.uniquify_names) {
     for (const NodeDef* n : node_defs_) {
       const string& name = n->name();
       if (NameExistsInGraph(name)) {
@@ -356,19 +362,22 @@ Status GraphConstructor::EnsureNoNameCollisions() {
                                        "' already exists in the Graph");
       }
     }
-  } else if (!opts_.prefix.empty()) {
-    StringPiece prefix_no_slash(opts_.prefix);
+  } else if (!prefix_.empty()) {
+    StringPiece prefix_no_slash(prefix_);
     prefix_no_slash.remove_suffix(1);
     if (!IsValidNodeName(prefix_no_slash, false)) {
-      return errors::InvalidArgument("Imported node name prefix '",
-                                     opts_.prefix,
+      return errors::InvalidArgument("Imported node name prefix '", prefix_,
                                      "' would lead to invalid node names");
     }
     if (NameExistsInGraph(prefix_no_slash)) {
-      return errors::InvalidArgument("Import node name prefix '",
-                                     prefix_no_slash,
-                                     "' conflicts with "
-                                     "name already used in the graph");
+      if (opts_.uniquify_prefix) {
+        prefix_ = strings::StrCat(FindUniqueName(prefix_no_slash), "/");
+      } else {
+        return errors::InvalidArgument("Import node name prefix '",
+                                       prefix_no_slash,
+                                       "' conflicts with "
+                                       "name already used in the graph");
+      }
     }
   }
   return Status::OK();
@@ -740,8 +749,8 @@ void GraphConstructor::AddControlDependencies(
 
 void GraphConstructor::AddPrefixToNodeDef(
     const std::vector<bool>& input_already_exists, NodeDef* node_def) {
-  if (opts_.prefix.empty()) return;
-  node_def->set_name(strings::StrCat(opts_.prefix, node_def->name()));
+  if (prefix_.empty()) return;
+  node_def->set_name(strings::StrCat(prefix_, node_def->name()));
   // Update names of input nodes
   for (int i = 0; i < node_def->input_size(); ++i) {
     StringPiece input(node_def->input(i));
@@ -749,9 +758,9 @@ void GraphConstructor::AddPrefixToNodeDef(
     // imported).
     if (input_already_exists[i]) continue;
     if (input.Consume("^")) {
-      node_def->set_input(i, strings::StrCat("^", opts_.prefix, input));
+      node_def->set_input(i, strings::StrCat("^", prefix_, input));
     } else {
-      node_def->set_input(i, strings::StrCat(opts_.prefix, input));
+      node_def->set_input(i, strings::StrCat(prefix_, input));
     }
   }
   // Update names of colocation groups
@@ -761,8 +770,7 @@ void GraphConstructor::AddPrefixToNodeDef(
     for (int i = 0; i < list->s_size(); ++i) {
       StringPiece v(list->s(i));
       if (v.Consume(kColocationGroupPrefix)) {
-        list->set_s(i,
-                    strings::StrCat(kColocationGroupPrefix, opts_.prefix, v));
+        list->set_s(i, strings::StrCat(kColocationGroupPrefix, prefix_, v));
       }
     }
   }
@@ -975,7 +983,7 @@ Status GraphConstructor::Convert() {
 
     Node* node;
     if (opts_.importing) {
-      if (!opts_.prefix.empty()) {
+      if (!prefix_.empty()) {
         AddPrefixToNodeDef(input_already_exists, &imported_node_def);
       } else if (opts_.uniquify_names) {
         UniquifyNames(input_already_exists, &imported_node_def);
diff --git a/tensorflow/core/graph/graph_constructor.h b/tensorflow/core/graph/graph_constructor.h
index 4b418b8622..b4dd2ba51a 100644
--- a/tensorflow/core/graph/graph_constructor.h
+++ b/tensorflow/core/graph/graph_constructor.h
@@ -54,7 +54,10 @@ extern Status ConvertNodeDefsToGraph(const GraphConstructorOptions& opts,
 
 // Options for calling ImportGraphDef().
 struct ImportGraphDefOptions {
-  ImportGraphDefOptions() : uniquify_names(false), skip_mapped_nodes(false) {}
+  ImportGraphDefOptions()
+      : uniquify_names(false),
+        uniquify_prefix(false),
+        skip_mapped_nodes(false) {}
 
   // Name prefix to use for nodes imported from the GraphDef.  For example, if
   // prefix="animals" and GraphDef contains a node "bunny" then the node will be
@@ -68,6 +71,11 @@ struct ImportGraphDefOptions {
   // will guarantee all node names are unique.
   bool uniquify_names;
 
+  // If true, `prefix` will be modified if it already exists as a node name or
+  // prefix in the graph. If false, a conflicting prefix will be treated as an
+  // error. This option has no effect if `prefix` isn't specified.
+  bool uniquify_prefix;
+
   // Maps tensors in `gdef` to existing tensors in `g`. Inputs in `gdef`
   // corresponding to `input_map` keys will be remapped to the nodes in `g`
   // corresponding to the values.
diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc
index 83aba6c9be..9be3de2388 100644
--- a/tensorflow/core/graph/graph_constructor_test.cc
+++ b/tensorflow/core/graph/graph_constructor_test.cc
@@ -1806,6 +1806,21 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
   EXPECT_EQ(results.return_nodes[1]->name(), "B_2");
   EXPECT_EQ(results.return_nodes[1]->def().input(0), "A_2:0");
 
+  // Import with an already-used prefix
+  opts.prefix = "A";
+  opts.uniquify_prefix = true;
+  results = ImportGraphDefResults();
+  ExpectOK(graph_def_str, opts, &refiner, &results);
+
+  ASSERT_EQ(results.return_nodes.size(), 2);
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_3/A");
+  EXPECT_EQ(results.return_nodes[1]->name(), "A_3/B");
+  EXPECT_EQ(results.return_nodes[1]->def().input(0), "A_3/A");
+
+  // Create B_3 node to keep the A/B numbering in sync
+  opts = ImportGraphDefOptions();
+  ExpectOK("node { name: 'B_3' op: 'TestInput' }");
+
   // Import with existing de-duped node names
   opts = ImportGraphDefOptions();
   opts.uniquify_names = true;
@@ -1827,24 +1842,24 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
   opts = ImportGraphDefOptions();
   opts.uniquify_names = true;
   opts.return_nodes.push_back("A");
-  opts.return_nodes.push_back("A_3");
+  opts.return_nodes.push_back("A_4");
   opts.return_nodes.push_back("B");
-  opts.return_nodes.push_back("B_3/B");
+  opts.return_nodes.push_back("B_4/B");
   results = ImportGraphDefResults();
   ExpectOK(
       "node { name: 'A' op: 'TestInput' }"
-      "node { name: 'A_3' op: 'TestInput' }"
+      "node { name: 'A_4' op: 'TestInput' }"
       "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A'] }"
-      "node { name: 'B_3/B' op: 'TestOneInputTwoOutputs' input: ['A_3'] }",
+      "node { name: 'B_4/B' op: 'TestOneInputTwoOutputs' input: ['A_4'] }",
       opts, &refiner, &results);
 
   ASSERT_EQ(results.return_nodes.size(), 4);
-  EXPECT_EQ(results.return_nodes[0]->name(), "A_4");
-  EXPECT_EQ(results.return_nodes[1]->name(), "A_3");
-  EXPECT_EQ(results.return_nodes[2]->name(), "B_4");
-  EXPECT_EQ(results.return_nodes[2]->def().input(0), "A_4:0");
-  EXPECT_EQ(results.return_nodes[3]->name(), "B_3/B");
-  EXPECT_EQ(results.return_nodes[3]->def().input(0), "A_3");
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_5");
+  EXPECT_EQ(results.return_nodes[1]->name(), "A_4");
+  EXPECT_EQ(results.return_nodes[2]->name(), "B_5");
+  EXPECT_EQ(results.return_nodes[2]->def().input(0), "A_5:0");
+  EXPECT_EQ(results.return_nodes[3]->name(), "B_4/B");
+  EXPECT_EQ(results.return_nodes[3]->def().input(0), "A_4");
 
   // Create node with prefix and then import node with same name
   ExpectOK("node { name: 'foo/abc' op: 'ABC' }");
@@ -1895,8 +1910,8 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
   ExpectOK(graph_def_str, opts, &refiner, &results);
 
   ASSERT_EQ(results.return_nodes.size(), 2);
-  EXPECT_EQ(results.return_nodes[0]->name(), "A_5");
-  EXPECT_EQ(results.return_nodes[1]->name(), "B_5");
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_6");
+  EXPECT_EQ(results.return_nodes[1]->name(), "B_6");
   EXPECT_EQ(results.return_nodes[1]->def().input(0), "A:0");
 }
 
-- 
GitLab


From 4b0a23684852fe68ac2248fe2e04e118a6173848 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 11:41:24 -0800
Subject: [PATCH 0640/1225] Add the tf2xla_supported_ops tool, which dumps ops
 supported by tf2xla.

Also fix a TODO in XlaOpRegistry to filter by the types allowed by the OpDef.

Also see #14798

PiperOrigin-RevId: 177986664
---
 tensorflow/compiler/tf2xla/BUILD              |  26 +-
 .../tf2xla/g3doc/cpu_supported_ops.md         | 242 ++++++++++++++++++
 .../tf2xla/g3doc/gpu_supported_ops.md         | 238 +++++++++++++++++
 .../compiler/tf2xla/tf2xla_supported_ops.cc   |  97 +++++++
 .../compiler/tf2xla/tf2xla_supported_ops.h    |  33 +++
 .../tf2xla/tf2xla_supported_ops_main.cc       |  22 ++
 tensorflow/compiler/tf2xla/xla_op_registry.cc |  50 +++-
 tensorflow/compiler/tf2xla/xla_op_registry.h  |  10 +-
 8 files changed, 707 insertions(+), 11 deletions(-)
 create mode 100644 tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md
 create mode 100644 tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md
 create mode 100644 tensorflow/compiler/tf2xla/tf2xla_supported_ops.cc
 create mode 100644 tensorflow/compiler/tf2xla/tf2xla_supported_ops.h
 create mode 100644 tensorflow/compiler/tf2xla/tf2xla_supported_ops_main.cc

diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index dc6d826a3a..5d1cb6d735 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -1,6 +1,6 @@
 licenses(["notice"])  # Apache 2.0
 
-load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_cc_test")
 
 package_group(
     name = "internal",
@@ -25,6 +25,30 @@ package(
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
 load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library")
 
+cc_library(
+    name = "tf2xla_supported_ops_lib",
+    srcs = ["tf2xla_supported_ops.cc"],
+    hdrs = ["tf2xla_supported_ops.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":xla_compiler",
+        "//tensorflow/compiler/tf2xla/kernels:xla_cpu_only_ops",
+        "//tensorflow/compiler/tf2xla/kernels:xla_ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+tf_cc_binary(
+    name = "tf2xla_supported_ops",
+    srcs = ["tf2xla_supported_ops_main.cc"],
+    visibility = ["//visibility:public"],
+    deps = [":tf2xla_supported_ops_lib"],
+)
+
 xla_proto_library(
     name = "tf2xla_proto",
     srcs = ["tf2xla.proto"],
diff --git a/tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md b/tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md
new file mode 100644
index 0000000000..82b3b46a2f
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md
@@ -0,0 +1,242 @@
+**Supported operators for device: XLA_CPU_JIT**
+
+Operator                              | Type Constraint
+------------------------------------- | ---------------
+`Abs`                                 | `T={double,float,int32,int64}`
+`Acosh`                               | `T={complex64,double,float}`
+`Add`                                 | `T={complex64,double,float,int32,int64}`
+`AddN`                                | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`All`                                 | `Tidx={int32,int64}`
+`Angle`                               | `Tout={double,float}`<br>`T={complex64}`
+`Any`                                 | `Tidx={int32,int64}`
+`ApproximateEqual`                    | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`ArgMax`                              | `Tidx={int32,int64}`<br>`output_type={int32,int64}`<br>`T={float}`
+`ArgMin`                              | `Tidx={int32,int64}`<br>`output_type={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Asinh`                               | `T={complex64,double,float}`
+`AssignAddVariableOp`                 | `dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`AssignSubVariableOp`                 | `dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`AssignVariableOp`                    | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Atan2`                               | `T={double,float}`
+`Atanh`                               | `T={complex64,double,float}`
+`AvgPool`                             | `T={double,float}`
+`AvgPool3D`                           | `T={double,float}`
+`AvgPool3DGrad`                       | `T={double,float}`
+`AvgPoolGrad`                         | `T={double,float}`
+`BatchMatMul`                         | `T={complex64,double,float,int32}`
+`BatchToSpace`                        | `Tidx={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`BatchToSpaceND`                      | `Tcrops={int32,int64}`<br>`Tblock_shape={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAdd`                             | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAddGrad`                         | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAddV1`                           | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BitwiseAnd`                          | `T={int32,int64,uint32,uint64}`
+`BitwiseOr`                           | `T={int32,int64,uint32,uint64}`
+`BroadcastArgs`                       | `T={int32,int64}`
+`BroadcastGradientArgs`               | `T={int32,int64}`
+`Cast`                                | `DstT={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`SrcT={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Ceil`                                | `T={double,float}`
+`Cholesky`                            | `T={complex64,double,float}`
+`Complex`                             | `Tout={complex64}`<br>`T={double,float}`
+`ComplexAbs`                          | `Tout={double,float}`<br>`T={complex64}`
+`Concat`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ConcatOffset`                        |
+`ConcatV2`                            | `Tidx={int32}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Conj`                                | `T={complex64}`
+`Const`                               | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ControlTrigger`                      |
+`Conv2D`                              | `T={float}`
+`Conv2DBackpropFilter`                | `T={float}`
+`Conv2DBackpropInput`                 | `T={float}`
+`Conv3D`                              | `T={double,float}`
+`Conv3DBackpropFilterV2`              | `T={double,float}`
+`Conv3DBackpropInputV2`               | `T={double,float}`
+`Cos`                                 | `T={complex64,double,float}`
+`Cosh`                                | `T={complex64,double,float}`
+`Cross`                               | `T={double,float,int32,int64,uint32,uint64}`
+`Cumprod`                             | `Tidx={int32,int64}`<br>`T={float}`
+`Cumsum`                              | `Tidx={int32,int64}`<br>`T={float}`
+`DepthToSpace`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`DepthwiseConv2dNative`               | `T={double,float}`
+`DepthwiseConv2dNativeBackpropFilter` | `T={double,float}`
+`DepthwiseConv2dNativeBackpropInput`  | `T={double,float}`
+`Diag`                                | `T={complex64,double,float,int32,int64}`
+`DiagPart`                            | `T={complex64,double,float,int32,int64}`
+`Div`                                 | `T={complex64,double,float,int32,int64}`
+`DynamicStitch`                       | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Elu`                                 | `T={double,float}`
+`EluGrad`                             | `T={double,float}`
+`Equal`                               | `T={bool,complex64,double,float,int32,int64}`
+`Exp`                                 | `T={complex64,double,float}`
+`ExpandDims`                          | `Tdim={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Expm1`                               | `T={complex64,double,float}`
+`Fill`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Floor`                               | `T={double,float}`
+`FloorDiv`                            | `T={complex64,double,float,int32,int64}`
+`FloorMod`                            | `T={double,float,int32,int64}`
+`FusedBatchNorm`                      | `T={float}`
+`FusedBatchNormGrad`                  | `T={float}`
+`FusedBatchNormGradV2`                | `U={float}`<br>`T={float}`
+`FusedBatchNormV2`                    | `U={float}`<br>`T={float}`
+`Gather`                              | `Tindices={int32,int64}`<br>`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`GatherV2`                            | `Taxis={int32,int64}`<br>`Tindices={int32,int64}`<br>`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Greater`                             | `T={double,float,int32,int64,uint32,uint64}`
+`GreaterEqual`                        | `T={double,float,int32,int64,uint32,uint64}`
+`Identity`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`IdentityN`                           | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Imag`                                | `Tout={double,float}`<br>`T={complex64}`
+`Inv`                                 | `T={complex64,double,float,int32,int64}`
+`Invert`                              | `T={int32,int64,uint32,uint64}`
+`InvertPermutation`                   | `T={int32}`
+`IsFinite`                            | `T={double,float}`
+`IsInf`                               | `T={double,float}`
+`IsNan`                               | `T={double,float}`
+`L2Loss`                              | `T={double,float}`
+`LRN`                                 | `T={float}`
+`LRNGrad`                             | `T={float}`
+`LeftShift`                           | `T={int32,int64,uint32,uint64}`
+`Less`                                | `T={double,float,int32,int64,uint32,uint64}`
+`LessEqual`                           | `T={double,float,int32,int64,uint32,uint64}`
+`LinSpace`                            | `Tidx={int32,int64}`<br>`T={double,float}`
+`Log`                                 | `T={complex64,double,float}`
+`Log1p`                               | `T={complex64,double,float}`
+`LogSoftmax`                          | `T={double,float}`
+`LogicalAnd`                          |
+`LogicalNot`                          |
+`LogicalOr`                           |
+`MatMul`                              | `T={complex64,double,float}`
+`MatrixDiag`                          | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`MatrixDiagPart`                      | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Max`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`MaxPool`                             | `T={double,float,int32,int64}`
+`MaxPool3D`                           | `T={float}`
+`MaxPool3DGrad`                       | `TInput={float}`<br>`T={float}`
+`MaxPoolGrad`                         | `T={double,float,int32,int64,uint32,uint64}`
+`Maximum`                             | `T={double,float,int32,int64}`
+`Mean`                                | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Min`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Minimum`                             | `T={double,float,int32,int64}`
+`MirrorPad`                           | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Mod`                                 | `T={double,float,int32,int64}`
+`Mul`                                 | `T={complex64,double,float,int32,int64}`
+`Multinomial`                         | `output_dtype={int32,int64}`<br>`T={double,float,int32,int64,uint32,uint64}`
+`Neg`                                 | `T={complex64,double,float,int32,int64}`
+`NoOp`                                |
+`NotEqual`                            | `T={bool,complex64,double,float,int32,int64}`
+`OneHot`                              | `TI={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`OnesLike`                            | `T={bool,complex64,double,float,int32,int64}`
+`Pack`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Pad`                                 | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`PadV2`                               | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ParallelDynamicStitch`               | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Pow`                                 | `T={complex64,double,float,int32,int64}`
+`PreventGradient`                     | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Prod`                                | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`QuantizeAndDequantizeV2`             | `T={double,float}`
+`RandomStandardNormal`                | `dtype={float}`
+`RandomUniform`                       | `T={int32,int64}`<br>`dtype={double,float}`
+`RandomUniformInt`                    | `T={int32,int64}`<br>`Tout={int32,int64}`
+`Range`                               | `Tidx={double,float,int32,int64}`
+`Rank`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ReadVariableOp`                      | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Real`                                | `Tout={double,float}`<br>`T={complex64}`
+`RealDiv`                             | `T={complex64,double,float,int32,int64}`
+`Reciprocal`                          | `T={complex64,double,float,int32,int64}`
+`ReciprocalGrad`                      | `T={complex64,double,float}`
+`Relu`                                | `T={double,float,int32,int64,uint32,uint64}`
+`Relu6`                               | `T={double,float,int32,int64,uint32,uint64}`
+`Relu6Grad`                           | `T={double,float,int32,int64,uint32,uint64}`
+`ReluGrad`                            | `T={double,float,int32,int64,uint32,uint64}`
+`Reshape`                             | `Tshape={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ResourceApplyAdagrad`                | `T={double,float}`
+`ResourceApplyAdam`                   | `T={double,float}`
+`ResourceApplyFtrl`                   | `T={double,float}`
+`ResourceApplyFtrlV2`                 | `T={double,float}`
+`ResourceApplyGradientDescent`        | `T={double,float}`
+`ResourceApplyMomentum`               | `T={double,float}`
+`ResourceApplyRMSProp`                | `T={double,float}`
+`ResourceGather`                      | `Tindices={int32,int64}`<br>`dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`ResourceStridedSliceAssign`          | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Reverse`                             | `T={bool,complex64,double,float,int32,int64}`
+`ReverseV2`                           | `T={bool,complex64,double,float,int32,int64}`<br>`Tidx={int32,int64}`
+`RightShift`                          | `T={int32,int64,uint32,uint64}`
+`Rint`                                | `T={double,float}`
+`Round`                               | `T={complex64,double,float,int32,int64}`
+`Rsqrt`                               | `T={complex64,double,float}`
+`RsqrtGrad`                           | `T={complex64,double,float}`
+`Select`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Selu`                                | `T={double,float}`
+`SeluGrad`                            | `T={double,float}`
+`Shape`                               | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ShapeN`                              | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sigmoid`                             | `T={complex64,double,float}`
+`SigmoidGrad`                         | `T={complex64,double,float}`
+`Sign`                                | `T={complex64,double,float,int32,int64}`
+`Sin`                                 | `T={complex64,double,float}`
+`Sinh`                                | `T={complex64,double,float}`
+`Size`                                | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Slice`                               | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Softmax`                             | `T={double,float}`
+`SoftmaxCrossEntropyWithLogits`       | `T={double,float}`
+`Softplus`                            | `T={double,float,int32,int64,uint32,uint64}`
+`SoftplusGrad`                        | `T={double,float,int32,int64,uint32,uint64}`
+`Softsign`                            | `T={double,float,int32,int64,uint32,uint64}`
+`SoftsignGrad`                        | `T={double,float,int32,int64,uint32,uint64}`
+`SpaceToBatch`                        | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SpaceToBatchND`                      | `Tblock_shape={int32,int64}`<br>`Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SpaceToDepth`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SparseMatMul`                        | `Tb={float}`<br>`Ta={float}`
+`SparseSoftmaxCrossEntropyWithLogits` | `Tlabels={int32,int64}`<br>`T={double,float}`
+`Split`                               | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SplitV`                              | `Tlen={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sqrt`                                | `T={complex64,double,float}`
+`SqrtGrad`                            | `T={complex64,double,float}`
+`Square`                              | `T={complex64,double,float,int32,int64}`
+`SquaredDifference`                   | `T={complex64,double,float,int32,int64}`
+`Squeeze`                             | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackCloseV2`                        |
+`StackPopV2`                          | `elem_type={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackPushV2`                         | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackV2`                             | `elem_type={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StatelessRandomNormal`               | `Tseed={int32}`<br>`T={int32,int64}`<br>`dtype={float}`
+`StatelessRandomUniform`              | `Tseed={int32}`<br>`T={int32,int64}`<br>`dtype={float}`
+`StopGradient`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StridedSlice`                        | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StridedSliceGrad`                    | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sub`                                 | `T={complex64,double,float,int32,int64}`
+`Sum`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`SymbolicGradient`                    | `Tout={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`Tin={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Tan`                                 | `T={complex64,double,float,int32,int64}`
+`Tanh`                                | `T={complex64,double,float}`
+`TanhGrad`                            | `T={complex64,double,float}`
+`TensorArrayCloseV3`                  |
+`TensorArrayConcatV3`                 | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayGatherV3`                 | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayGradV3`                   |
+`TensorArrayReadV3`                   | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayScatterV3`                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArraySizeV3`                   |
+`TensorArraySplitV3`                  | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayV3`                       | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayWriteV3`                  | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Tile`                                | `Tmultiples={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Transpose`                           | `Tperm={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TruncateDiv`                         | `T={complex64,double,float,int32,int64}`
+`TruncateMod`                         | `T={double,float,int32,int64}`
+`TruncatedNormal`                     | `T={int32,int64}`<br>`dtype={double,float}`
+`Unpack`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`UnsortedSegmentSum`                  | `Tnumsegments={int32,int64}`<br>`Tindices={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`VarIsInitializedOp`                  |
+`VariableShape`                       | `out_type={int32,int64}`
+`XlaWhile`                            | `T={bool,complex64,double,float,int32,int64,resource,uint32,uint64}`
+`ZerosLike`                           | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_Arg`                                | `T={bool,complex64,double,float,int32,int64,resource,uint32,uint64}`
+`_ArrayToList`                        | `out_types={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_ListToArray`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`Tin={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_Retval`                             | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_XLARecv`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_XLASend`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+
+To regenerate this table, run:
+
+```shell
+bazel run -c opt -- tensorflow/compiler/tf2xla:tf2xla_supported_ops --device=XLA_CPU_JIT
+```
diff --git a/tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md b/tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md
new file mode 100644
index 0000000000..d4b7621ad2
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md
@@ -0,0 +1,238 @@
+**Supported operators for device: XLA_GPU_JIT**
+
+Operator                              | Type Constraint
+------------------------------------- | ---------------
+`Abs`                                 | `T={double,float,int32,int64}`
+`Acosh`                               | `T={complex64,double,float}`
+`Add`                                 | `T={complex64,double,float,int32,int64}`
+`AddN`                                | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`All`                                 | `Tidx={int32,int64}`
+`Angle`                               | `Tout={double,float}`<br>`T={complex64}`
+`Any`                                 | `Tidx={int32,int64}`
+`ApproximateEqual`                    | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`ArgMax`                              | `Tidx={int32,int64}`<br>`output_type={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`ArgMin`                              | `Tidx={int32,int64}`<br>`output_type={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Asinh`                               | `T={complex64,double,float}`
+`AssignAddVariableOp`                 | `dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`AssignSubVariableOp`                 | `dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`AssignVariableOp`                    | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Atan2`                               | `T={double,float}`
+`Atanh`                               | `T={complex64,double,float}`
+`AvgPool`                             | `T={double,float}`
+`AvgPool3D`                           | `T={double,float}`
+`AvgPool3DGrad`                       | `T={double,float}`
+`AvgPoolGrad`                         | `T={double,float}`
+`BatchMatMul`                         | `T={complex64,double,float,int32}`
+`BatchToSpace`                        | `Tidx={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`BatchToSpaceND`                      | `Tcrops={int32,int64}`<br>`Tblock_shape={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAdd`                             | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAddGrad`                         | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAddV1`                           | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BitwiseAnd`                          | `T={int32,int64,uint32,uint64}`
+`BitwiseOr`                           | `T={int32,int64,uint32,uint64}`
+`BroadcastArgs`                       | `T={int32,int64}`
+`BroadcastGradientArgs`               | `T={int32,int64}`
+`Cast`                                | `DstT={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`SrcT={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Ceil`                                | `T={double,float}`
+`Cholesky`                            | `T={complex64,double,float}`
+`Complex`                             | `Tout={complex64}`<br>`T={double,float}`
+`ComplexAbs`                          | `Tout={double,float}`<br>`T={complex64}`
+`Concat`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ConcatOffset`                        |
+`ConcatV2`                            | `Tidx={int32}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Conj`                                | `T={complex64}`
+`Const`                               | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ControlTrigger`                      |
+`Conv2D`                              | `T={float}`
+`Conv2DBackpropFilter`                | `T={float}`
+`Conv2DBackpropInput`                 | `T={float}`
+`Conv3D`                              | `T={double,float}`
+`Conv3DBackpropFilterV2`              | `T={double,float}`
+`Conv3DBackpropInputV2`               | `T={double,float}`
+`Cos`                                 | `T={complex64,double,float}`
+`Cosh`                                | `T={complex64,double,float}`
+`Cross`                               | `T={double,float,int32,int64,uint32,uint64}`
+`Cumprod`                             | `Tidx={int32,int64}`<br>`T={float}`
+`Cumsum`                              | `Tidx={int32,int64}`<br>`T={float}`
+`DepthToSpace`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`DepthwiseConv2dNative`               | `T={double,float}`
+`DepthwiseConv2dNativeBackpropFilter` | `T={double,float}`
+`DepthwiseConv2dNativeBackpropInput`  | `T={double,float}`
+`Diag`                                | `T={complex64,double,float,int32,int64}`
+`DiagPart`                            | `T={complex64,double,float,int32,int64}`
+`Div`                                 | `T={complex64,double,float,int32,int64}`
+`DynamicStitch`                       | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Elu`                                 | `T={double,float}`
+`EluGrad`                             | `T={double,float}`
+`Equal`                               | `T={bool,complex64,double,float,int32,int64}`
+`Exp`                                 | `T={complex64,double,float}`
+`ExpandDims`                          | `Tdim={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Expm1`                               | `T={complex64,double,float}`
+`Fill`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Floor`                               | `T={double,float}`
+`FloorDiv`                            | `T={complex64,double,float,int32,int64}`
+`FloorMod`                            | `T={double,float,int32,int64}`
+`FusedBatchNorm`                      | `T={float}`
+`FusedBatchNormGrad`                  | `T={float}`
+`FusedBatchNormGradV2`                | `U={float}`<br>`T={float}`
+`FusedBatchNormV2`                    | `U={float}`<br>`T={float}`
+`Gather`                              | `Tindices={int32,int64}`<br>`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`GatherV2`                            | `Taxis={int32,int64}`<br>`Tindices={int32,int64}`<br>`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Greater`                             | `T={double,float,int32,int64,uint32,uint64}`
+`GreaterEqual`                        | `T={double,float,int32,int64,uint32,uint64}`
+`Identity`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`IdentityN`                           | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Imag`                                | `Tout={double,float}`<br>`T={complex64}`
+`Inv`                                 | `T={complex64,double,float,int32,int64}`
+`Invert`                              | `T={int32,int64,uint32,uint64}`
+`InvertPermutation`                   | `T={int32}`
+`IsFinite`                            | `T={double,float}`
+`IsInf`                               | `T={double,float}`
+`IsNan`                               | `T={double,float}`
+`L2Loss`                              | `T={double,float}`
+`LRN`                                 | `T={float}`
+`LRNGrad`                             | `T={float}`
+`LeftShift`                           | `T={int32,int64,uint32,uint64}`
+`Less`                                | `T={double,float,int32,int64,uint32,uint64}`
+`LessEqual`                           | `T={double,float,int32,int64,uint32,uint64}`
+`LinSpace`                            | `Tidx={int32,int64}`<br>`T={double,float}`
+`Log`                                 | `T={complex64,double,float}`
+`Log1p`                               | `T={complex64,double,float}`
+`LogSoftmax`                          | `T={double,float}`
+`LogicalAnd`                          |
+`LogicalNot`                          |
+`LogicalOr`                           |
+`MatMul`                              | `T={complex64,double,float}`
+`MatrixDiag`                          | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`MatrixDiagPart`                      | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Max`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`MaxPool`                             | `T={double,float,int32,int64}`
+`MaxPool3D`                           | `T={float}`
+`MaxPool3DGrad`                       | `TInput={float}`<br>`T={float}`
+`MaxPoolGrad`                         | `T={double,float,int32,int64,uint32,uint64}`
+`Maximum`                             | `T={double,float,int32,int64}`
+`Mean`                                | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Min`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Minimum`                             | `T={double,float,int32,int64}`
+`MirrorPad`                           | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Mod`                                 | `T={double,float,int32,int64}`
+`Mul`                                 | `T={complex64,double,float,int32,int64}`
+`Multinomial`                         | `output_dtype={int32,int64}`<br>`T={double,float,int32,int64,uint32,uint64}`
+`Neg`                                 | `T={complex64,double,float,int32,int64}`
+`NoOp`                                |
+`NotEqual`                            | `T={bool,complex64,double,float,int32,int64}`
+`OneHot`                              | `TI={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`OnesLike`                            | `T={bool,complex64,double,float,int32,int64}`
+`Pack`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Pad`                                 | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`PadV2`                               | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ParallelDynamicStitch`               | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Pow`                                 | `T={complex64,double,float,int32,int64}`
+`PreventGradient`                     | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Prod`                                | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`QuantizeAndDequantizeV2`             | `T={double,float}`
+`Range`                               | `Tidx={double,float,int32,int64}`
+`Rank`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ReadVariableOp`                      | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Real`                                | `Tout={double,float}`<br>`T={complex64}`
+`RealDiv`                             | `T={complex64,double,float,int32,int64}`
+`Reciprocal`                          | `T={complex64,double,float,int32,int64}`
+`ReciprocalGrad`                      | `T={complex64,double,float}`
+`Relu`                                | `T={double,float,int32,int64,uint32,uint64}`
+`Relu6`                               | `T={double,float,int32,int64,uint32,uint64}`
+`Relu6Grad`                           | `T={double,float,int32,int64,uint32,uint64}`
+`ReluGrad`                            | `T={double,float,int32,int64,uint32,uint64}`
+`Reshape`                             | `Tshape={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ResourceApplyAdagrad`                | `T={double,float}`
+`ResourceApplyAdam`                   | `T={double,float}`
+`ResourceApplyFtrl`                   | `T={double,float}`
+`ResourceApplyFtrlV2`                 | `T={double,float}`
+`ResourceApplyGradientDescent`        | `T={double,float}`
+`ResourceApplyMomentum`               | `T={double,float}`
+`ResourceApplyRMSProp`                | `T={double,float}`
+`ResourceGather`                      | `Tindices={int32,int64}`<br>`dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`ResourceStridedSliceAssign`          | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Reverse`                             | `T={bool,complex64,double,float,int32,int64}`
+`ReverseV2`                           | `T={bool,complex64,double,float,int32,int64}`<br>`Tidx={int32,int64}`
+`RightShift`                          | `T={int32,int64,uint32,uint64}`
+`Rint`                                | `T={double,float}`
+`Round`                               | `T={complex64,double,float,int32,int64}`
+`Rsqrt`                               | `T={complex64,double,float}`
+`RsqrtGrad`                           | `T={complex64,double,float}`
+`Select`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Selu`                                | `T={double,float}`
+`SeluGrad`                            | `T={double,float}`
+`Shape`                               | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ShapeN`                              | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sigmoid`                             | `T={complex64,double,float}`
+`SigmoidGrad`                         | `T={complex64,double,float}`
+`Sign`                                | `T={complex64,double,float,int32,int64}`
+`Sin`                                 | `T={complex64,double,float}`
+`Sinh`                                | `T={complex64,double,float}`
+`Size`                                | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Slice`                               | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Softmax`                             | `T={double,float}`
+`SoftmaxCrossEntropyWithLogits`       | `T={double,float}`
+`Softplus`                            | `T={double,float,int32,int64,uint32,uint64}`
+`SoftplusGrad`                        | `T={double,float,int32,int64,uint32,uint64}`
+`Softsign`                            | `T={double,float,int32,int64,uint32,uint64}`
+`SoftsignGrad`                        | `T={double,float,int32,int64,uint32,uint64}`
+`SpaceToBatch`                        | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SpaceToBatchND`                      | `Tblock_shape={int32,int64}`<br>`Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SpaceToDepth`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SparseMatMul`                        | `Tb={float}`<br>`Ta={float}`
+`SparseSoftmaxCrossEntropyWithLogits` | `Tlabels={int32,int64}`<br>`T={double,float}`
+`Split`                               | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SplitV`                              | `Tlen={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sqrt`                                | `T={complex64,double,float}`
+`SqrtGrad`                            | `T={complex64,double,float}`
+`Square`                              | `T={complex64,double,float,int32,int64}`
+`SquaredDifference`                   | `T={complex64,double,float,int32,int64}`
+`Squeeze`                             | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackCloseV2`                        |
+`StackPopV2`                          | `elem_type={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackPushV2`                         | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackV2`                             | `elem_type={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StatelessRandomNormal`               | `Tseed={int32}`<br>`T={int32,int64}`<br>`dtype={float}`
+`StatelessRandomUniform`              | `Tseed={int32}`<br>`T={int32,int64}`<br>`dtype={float}`
+`StopGradient`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StridedSlice`                        | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StridedSliceGrad`                    | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sub`                                 | `T={complex64,double,float,int32,int64}`
+`Sum`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`SymbolicGradient`                    | `Tout={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`Tin={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Tan`                                 | `T={complex64,double,float,int32,int64}`
+`Tanh`                                | `T={complex64,double,float}`
+`TanhGrad`                            | `T={complex64,double,float}`
+`TensorArrayCloseV3`                  |
+`TensorArrayConcatV3`                 | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayGatherV3`                 | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayGradV3`                   |
+`TensorArrayReadV3`                   | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayScatterV3`                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArraySizeV3`                   |
+`TensorArraySplitV3`                  | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayV3`                       | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayWriteV3`                  | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Tile`                                | `Tmultiples={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Transpose`                           | `Tperm={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TruncateDiv`                         | `T={complex64,double,float,int32,int64}`
+`TruncateMod`                         | `T={double,float,int32,int64}`
+`Unpack`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`UnsortedSegmentSum`                  | `Tnumsegments={int32,int64}`<br>`Tindices={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`VarIsInitializedOp`                  |
+`VariableShape`                       | `out_type={int32,int64}`
+`XlaWhile`                            | `T={bool,complex64,double,float,int32,int64,resource,uint32,uint64}`
+`ZerosLike`                           | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_Arg`                                | `T={bool,complex64,double,float,int32,int64,resource,uint32,uint64}`
+`_ArrayToList`                        | `out_types={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_ListToArray`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`Tin={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_Retval`                             | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_XLARecv`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_XLASend`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+
+To regenerate this table, run:
+
+```shell
+bazel run -c opt -- tensorflow/compiler/tf2xla:tf2xla_supported_ops --device=XLA_GPU_JIT
+```
diff --git a/tensorflow/compiler/tf2xla/tf2xla_supported_ops.cc b/tensorflow/compiler/tf2xla/tf2xla_supported_ops.cc
new file mode 100644
index 0000000000..7aca889a26
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/tf2xla_supported_ops.cc
@@ -0,0 +1,97 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/tf2xla_supported_ops.h"
+
+#include <algorithm>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/core/framework/kernel_def.pb.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/util/command_line_flags.h"
+
+namespace tensorflow {
+namespace tf2xla {
+namespace {
+
+void PrintSupportedOps(const string& device, const string& regen_run) {
+  XlaOpRegistry::RegisterCompilationKernels();
+
+  std::vector<const KernelDef*> kdefs =
+      XlaOpRegistry::DeviceKernels(device,
+                                   /*include_compilation_only_kernels=*/true);
+  std::sort(
+      kdefs.begin(), kdefs.end(),
+      [](const KernelDef* a, const KernelDef* b) { return a->op() < b->op(); });
+
+  std::cout << "**Supported operators for device: " << device << "**\n\n"
+            << "Operator | Type Constraint\n"
+            << "-------- | ---------------" << std::endl;
+  for (const KernelDef* kdef : kdefs) {
+    std::vector<string> constraints;
+    for (const KernelDef::AttrConstraint& constraint : kdef->constraint()) {
+      std::vector<string> types;
+      for (int type : constraint.allowed_values().list().type()) {
+        types.push_back(DataTypeString(static_cast<DataType>(type)));
+      }
+      std::sort(types.begin(), types.end());
+      constraints.push_back("`" + constraint.name() + "={" +
+                            str_util::Join(types, ",") + "}`");
+    }
+    std::cout << "`" << kdef->op() << "` | "
+              << str_util::Join(constraints, "<br>") << std::endl;
+  }
+
+  std::cout << "\nTo regenerate this table, run:\n\n```shell\n"
+            << regen_run << " --device=" << device << "\n```" << std::endl;
+}
+
+}  // namespace
+
+void SupportedOpsMain(int argc, char** argv, const char* regen_run) {
+  std::vector<string> device_names = XlaOpRegistry::BackendNames();
+  std::sort(device_names.begin(), device_names.end());
+
+  // Set up and parse flags.
+  string device;
+  std::vector<Flag> flag_list = {
+      {"device", &device,
+       "Name of the compilation device for which to print supported ops, "
+       "one of: " +
+           str_util::Join(device_names, ",")},
+  };
+  string usage = Flags::Usage(argv[0], flag_list);
+  bool parsed_flags_ok = Flags::Parse(&argc, argv, flag_list);
+  QCHECK(parsed_flags_ok) << "\n" << usage;
+  QCHECK(XlaOpRegistry::IsBackendRegistered(device))
+      << "\nUnknown device: " << device << "\n"
+      << usage;
+
+  // Run the program.
+  port::InitMain(usage.c_str(), &argc, &argv);
+  QCHECK(argc == 1) << "\nERROR: This command does not take any arguments "
+                       "other than flags\n\n"
+                    << usage;
+  PrintSupportedOps(device, regen_run);
+}
+
+}  // namespace tf2xla
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/tf2xla_supported_ops.h b/tensorflow/compiler/tf2xla/tf2xla_supported_ops.h
new file mode 100644
index 0000000000..1b45fb4cdd
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/tf2xla_supported_ops.h
@@ -0,0 +1,33 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_TF2XLA_TF2XLA_SUPPORTED_OPS_H_
+#define TENSORFLOW_COMPILER_TF2XLA_TF2XLA_SUPPORTED_OPS_H_
+
+namespace tensorflow {
+namespace tf2xla {
+
+// The implementation of a main function for a binary that prints a table of
+// supported tf2xla operators for a given device, along with their type
+// constraints, to stdout.
+//
+// Pass the argc and argv from main, unmodified.  Use regen_run to specify the
+// command used to regenerate the table.
+void SupportedOpsMain(int argc, char** argv, const char* regen_run);
+
+}  // namespace tf2xla
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_TF2XLA_TF2XLA_SUPPORTED_OPS_H_
diff --git a/tensorflow/compiler/tf2xla/tf2xla_supported_ops_main.cc b/tensorflow/compiler/tf2xla/tf2xla_supported_ops_main.cc
new file mode 100644
index 0000000000..690666c240
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/tf2xla_supported_ops_main.cc
@@ -0,0 +1,22 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/tf2xla_supported_ops.h"
+
+int main(int argc, char** argv) {
+  const char* regen_run =
+      "bazel run -c opt -- tensorflow/compiler/tf2xla:tf2xla_supported_ops";
+  tensorflow::tf2xla::SupportedOpsMain(argc, argv, regen_run);
+}
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.cc b/tensorflow/compiler/tf2xla/xla_op_registry.cc
index 02318cf7fa..faf47434b5 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/kernel_def.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 
@@ -187,22 +188,39 @@ void XlaOpRegistry::RegisterCompilationKernels() {
 
       // Constrain each type attribute to the intersection of:
       // a) the types supported by the backend, and
-      // b) the attribute's type constraints.
-      // TODO(phawkins): it may be necessary to also take the intersection with
-      // the set of types supported by the OpDef.
+      // b) the types allowed by the OpDef, and
+      // c) the type constraints.
       for (const string& type_attr : type_attrs) {
         KernelDef::AttrConstraint* attr_constraint = kdef->add_constraint();
         attr_constraint->set_name(type_attr);
         auto* allowed_values =
             attr_constraint->mutable_allowed_values()->mutable_list();
 
-        auto it = op_registration->type_constraints.find(type_attr);
+        const OpDef::AttrDef& op_def_attr = *FindAttr(type_attr, *op_def);
+        const auto* op_def_allowed_types =
+            op_def_attr.has_allowed_values()
+                ? &op_def_attr.allowed_values().list().type()
+                : nullptr;
+        auto constraint_it = op_registration->type_constraints.find(type_attr);
+        const std::set<DataType>* type_constraints =
+            constraint_it != op_registration->type_constraints.end()
+                ? &constraint_it->second
+                : nullptr;
         for (DataType dtype : backend.second.supported_types) {
-          if (it == op_registration->type_constraints.end() ||
-              (it != op_registration->type_constraints.end() &&
-               it->second.find(dtype) != it->second.end())) {
-            allowed_values->add_type(dtype);
+          // Filter out types that aren't allowed by the OpDef.
+          if (op_def_allowed_types != nullptr &&
+              std::find(op_def_allowed_types->begin(),
+                        op_def_allowed_types->end(),
+                        dtype) == op_def_allowed_types->end()) {
+            continue;
           }
+          // Filter out types based on the type constraints.
+          if (type_constraints != nullptr &&
+              type_constraints->find(dtype) == type_constraints->end()) {
+            continue;
+          }
+          // Passed all the filters, this type is allowed.
+          allowed_values->add_type(dtype);
         }
         if (op_registration->allow_resource_types) {
           allowed_values->add_type(DT_RESOURCE);
@@ -245,6 +263,22 @@ std::vector<const KernelDef*> XlaOpRegistry::DeviceKernels(
   return kernels;
 }
 
+std::vector<string> XlaOpRegistry::BackendNames() {
+  std::vector<string> names;
+  XlaOpRegistry& registry = Instance();
+  mutex_lock lock(registry.mutex_);
+  for (const auto& backend_pair : registry.backends_) {
+    names.push_back(backend_pair.first);
+  }
+  return names;
+}
+
+bool XlaOpRegistry::IsBackendRegistered(const string& name) {
+  XlaOpRegistry& registry = Instance();
+  mutex_lock lock(registry.mutex_);
+  return registry.backends_.find(name) != registry.backends_.end();
+}
+
 XlaOpRegistry& XlaOpRegistry::Instance() {
   static XlaOpRegistry* r = new XlaOpRegistry;
   return *r;
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h
index 6aee8c91cc..2959d2ab69 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.h
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.h
@@ -97,6 +97,12 @@ class XlaOpRegistry {
                               gtl::ArraySlice<DataType> supported_types,
                               BackendOpFilter op_filter);
 
+  // Returns the names of the registered backends.
+  static std::vector<string> BackendNames();
+
+  // Returns true iff a backend with the given name is registered.
+  static bool IsBackendRegistered(const string& name);
+
   // Registers `device_name` for XLA compilation, using information from
   // `registration`.
   static void RegisterCompilationDevice(const string& device_name,
@@ -116,8 +122,8 @@ class XlaOpRegistry {
   static void RegisterCompilationKernels();
 
   // Returns KernelDefs for compilation ops registered on
-  // 'compilation_device_name'.
-  // Does not include kernels registered as CompilationOnly.
+  // 'compilation_device_name'.  Does not include kernels registered as
+  // CompilationOnly, iff include_compilation_only_kernels=false.
   static std::vector<const KernelDef*> DeviceKernels(
       const string& compilation_device_name,
       bool include_compilation_only_kernels);
-- 
GitLab


From f38f92eb369b9cbb12b2c8bd0006d7fa1c64c5c0 Mon Sep 17 00:00:00 2001
From: Yilei Yang <yileiyang@google.com>
Date: Tue, 5 Dec 2017 11:42:14 -0800
Subject: [PATCH 0641/1225] Only parse known flags in tf.app.run().

This requires absl-py 0.1.6.

Also remove the manual tag on //tensorflow/python:app_test.

PiperOrigin-RevId: 177986813
---
 tensorflow/python/BUILD               | 5 +----
 tensorflow/python/platform/app.py     | 9 ++-------
 tensorflow/tools/pip_package/setup.py | 2 +-
 tensorflow/workspace.bzl              | 8 ++++----
 4 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index cd11be9341..ed44e9e332 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -179,10 +179,7 @@ tf_py_test(
     size = "small",
     srcs = ["platform/app_test.py"],
     additional_deps = [":platform"],
-    tags = [
-        "manual",
-        "notap",
-    ],
+    tags = ["notap"],
 )
 
 cc_library(
diff --git a/tensorflow/python/platform/app.py b/tensorflow/python/platform/app.py
index 1d8acf3f00..9b92d9a180 100644
--- a/tensorflow/python/platform/app.py
+++ b/tensorflow/python/platform/app.py
@@ -114,13 +114,8 @@ def run(main=None, argv=None):
   # Define help flags.
   _define_help_flags()
 
-  # Parse flags.
-  try:
-    argv = flags.FLAGS(_sys.argv if argv is None else argv)
-  except flags.Error as error:
-    _sys.stderr.write('FATAL Flags parsing error: %s\n' % error)
-    _sys.stderr.write('Pass --helpshort or --helpfull to see help on flags.\n')
-    _sys.exit(1)
+  # Parse known flags.
+  argv = flags.FLAGS(_sys.argv if argv is None else argv, known_only=True)
 
   main = main or _sys.modules['__main__'].main
 
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 3852b251d9..dc19e1bc94 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -32,7 +32,7 @@ from setuptools.dist import Distribution
 _VERSION = '1.4.0'
 
 REQUIRED_PACKAGES = [
-    'absl-py',
+    'absl-py >= 0.1.6',
     # weakref.finalize introduced in Python 3.4
     'backports.weakref >= 1.0rc1; python_version < "3.4"',
     # enum module introduced in Python 3.4
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index f4abeb014d..5753b0c897 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -321,11 +321,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   native.http_archive(
       name = "absl_py",
       urls = [
-          "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/231e3870b976c1dc61dce1749138661d21556028.tar.gz",
-          "https://github.com/abseil/abseil-py/archive/231e3870b976c1dc61dce1749138661d21556028.tar.gz",
+          "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/acec853355ef987eae48a8d87a79351c15dff593.tar.gz",
+          "https://github.com/abseil/abseil-py/archive/acec853355ef987eae48a8d87a79351c15dff593.tar.gz",
       ],
-      sha256 = "8ea2b23bfdb9ae7622f3e5d95236bc600c8d8509a2f38c84732b3145585d4f73",
-      strip_prefix = "abseil-py-231e3870b976c1dc61dce1749138661d21556028",
+      sha256 = "29e4584e778bee13aa4093824133d131d927cc160561892880118d9ff7b95a6a",
+      strip_prefix = "abseil-py-acec853355ef987eae48a8d87a79351c15dff593",
   )
 
   native.new_http_archive(
-- 
GitLab


From 33e3da538aef02b4f1293969d9601cc98f3ad30c Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 5 Dec 2017 11:47:41 -0800
Subject: [PATCH 0642/1225] [TF:XLA] Add support for FusedBatchNormGrad where
 is_training=False. Also add support for rank != 4 tensors to the TF/XLA fused
 batchnorm operators, although the TF core ops don't actually support other
 ranks yet so this is not tested.

PiperOrigin-RevId: 177987592
---
 .../compiler/tests/fused_batchnorm_test.py    |  51 +++++-
 .../compiler/tf2xla/kernels/batch_norm_op.cc  | 150 +++++++++++-------
 2 files changed, 145 insertions(+), 56 deletions(-)

diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py
index 00a9c9a65b..a80d69fa5f 100644
--- a/tensorflow/compiler/tests/fused_batchnorm_test.py
+++ b/tensorflow/compiler/tests/fused_batchnorm_test.py
@@ -155,7 +155,7 @@ class FusedBatchNormTest(XLATestCase):
   def testLearningWithGradientChecker(self):
     self._testLearning(True)
 
-  def testGradient(self):
+  def testGradientTraining(self):
     # TODO(b/64270657): Use gradient_checker here in addition to comparing with
     # this reference implementation.
     channel = 3
@@ -175,7 +175,7 @@ class FusedBatchNormTest(XLATestCase):
       var = array_ops.placeholder(np.float32, shape=scale_shape, name="var")
       scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
       grad_x, grad_scale, grad_offset, _, _ = gen_nn_ops.fused_batch_norm_grad(
-          grad, x, scale, mean, var, data_format="NHWC")
+          grad, x, scale, mean, var, data_format="NHWC", is_training=True)
 
       grad_x_val, grad_scale_val, grad_offset_val = sess.run(
           [grad_x, grad_scale, grad_offset], {
@@ -193,6 +193,53 @@ class FusedBatchNormTest(XLATestCase):
       self.assertAllClose(grad_scale_val, grad_scale_ref, atol=1e-2)
       self.assertAllClose(grad_offset_val, grad_offset_ref, atol=1e-3)
 
+  def testGradientInference(self):
+    # TODO(b/64270657): Use gradient_checker here in addition to comparing with
+    # this reference implementation.
+    channel = 3
+    x_shape = [2, 2, 6, channel]
+    scale_shape = [channel]
+    grad_val = np.random.random_sample(x_shape).astype(np.float32)
+    x_val = np.random.random_sample(x_shape).astype(np.float32)
+    scale_val = np.random.random_sample(scale_shape).astype(np.float32)
+    mean_val = np.random.random_sample(scale_shape).astype(np.float32)
+    var_val = np.random.random_sample(scale_shape).astype(np.float32)
+
+    with self.test_session() as sess, self.test_scope():
+      grad = array_ops.placeholder(np.float32, shape=x_shape, name="grad")
+      x = array_ops.placeholder(np.float32, shape=x_shape, name="x")
+      mean = array_ops.placeholder(np.float32, shape=scale_shape, name="mean")
+      var = array_ops.placeholder(np.float32, shape=scale_shape, name="var")
+      scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
+      with self.test_scope():
+        out = gen_nn_ops.fused_batch_norm_grad(
+            grad, x, scale, mean, var, data_format="NHWC", is_training=False)
+        grad_x, grad_scale, grad_offset, _, _ = out
+
+      ref_x, ref_scale, ref_offset, _, _ = gen_nn_ops.fused_batch_norm_grad(
+          grad, x, scale, mean, var, data_format="NHWC", is_training=False)
+
+      grad_x_val, grad_scale_val, grad_offset_val, = sess.run(
+          [grad_x, grad_scale, grad_offset], {
+              grad: grad_val,
+              x: x_val,
+              mean: mean_val,
+              var: var_val,
+              scale: scale_val
+          })
+      grad_x_ref, grad_scale_ref, grad_offset_ref, = sess.run(
+          [ref_x, ref_scale, ref_offset], {
+              grad: grad_val,
+              x: x_val,
+              mean: mean_val,
+              var: var_val,
+              scale: scale_val
+          })
+
+      self.assertAllClose(grad_x_val, grad_x_ref, atol=1e-2)
+      self.assertAllClose(grad_scale_val, grad_scale_ref, atol=1e-2)
+      self.assertAllClose(grad_offset_val, grad_offset_ref, atol=1e-3)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
index 468af34aab..a249b1869f 100644
--- a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
@@ -26,41 +26,44 @@ namespace {
 class FusedBatchNormOp : public XlaOpKernel {
  public:
   explicit FusedBatchNormOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
-    string data_format;
     OP_REQUIRES_OK(ctx, ctx->GetAttr("epsilon", &epsilon_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("is_training", &is_training_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format));
-    TensorFormat tensor_format;
-    if (ctx->GetAttr("data_format", &data_format).ok()) {
-      OP_REQUIRES(ctx, FormatFromString(data_format, &tensor_format),
-                  errors::InvalidArgument("Invalid data format"));
-      OP_REQUIRES(
-          ctx, (tensor_format == FORMAT_NHWC || tensor_format == FORMAT_NCHW),
-          errors::InvalidArgument("Not supported format"));
-      feature_index_ = GetTensorFeatureDimIndex(/*num_dims=*/4, tensor_format);
-    }
+    string data_format_str;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(
+        ctx, FormatFromString(data_format_str, &data_format_),
+        errors::InvalidArgument("Invalid data format: ", data_format_str));
+    OP_REQUIRES(ctx,
+                (data_format_ == FORMAT_NHWC || data_format_ == FORMAT_NCHW),
+                errors::InvalidArgument(
+                    "Unsupported data format ", ToString(data_format_),
+                    "; supported formats are NHWC and NCHW"));
   }
 
   void Compile(XlaOpKernelContext* ctx) override {
     xla::PrimitiveType input_type;
     OP_REQUIRES_OK(ctx,
                    DataTypeToPrimitiveType(ctx->input_type(0), &input_type));
-    xla::PrimitiveType stats_type;
+    xla::PrimitiveType scale_type;
     OP_REQUIRES_OK(ctx,
-                   DataTypeToPrimitiveType(ctx->input_type(1), &stats_type));
+                   DataTypeToPrimitiveType(ctx->input_type(1), &scale_type));
 
     xla::ComputationBuilder* builder = ctx->builder();
 
     xla::ComputationDataHandle input = ctx->Input(0);
+    TensorShape input_shape = ctx->InputShape(0);
+
+    int feature_index =
+        GetTensorFeatureDimIndex(input_shape.dims(), data_format_);
 
     // TODO(b/69928690): support mixed precision in the XLA batch normalization
     // operators. As a workaround, cast everything to the statistics type (which
     // may be more precise than the input type).
-    input = builder->ConvertElementType(input, stats_type);
+    input = builder->ConvertElementType(input, scale_type);
 
     if (is_training_) {
       xla::ComputationDataHandle output = builder->BatchNormTraining(
-          input, ctx->Input(1), ctx->Input(2), epsilon_, feature_index_);
+          input, ctx->Input(1), ctx->Input(2), epsilon_, feature_index);
 
       // In training mode, outputs the normalized value as well as the
       // calculated mean and variance.
@@ -78,7 +81,7 @@ class FusedBatchNormOp : public XlaOpKernel {
     } else {
       xla::ComputationDataHandle output = builder->BatchNormInference(
           input, ctx->Input(1), ctx->Input(2), ctx->Input(3), ctx->Input(4),
-          epsilon_, feature_index_);
+          epsilon_, feature_index);
       ctx->SetOutput(0, builder->ConvertElementType(output, input_type));
       // Directly send input to output as mean and variance in inference mode.
       ctx->SetOutput(1, ctx->Input(3));
@@ -90,7 +93,7 @@ class FusedBatchNormOp : public XlaOpKernel {
 
  private:
   float epsilon_;
-  int64 feature_index_;
+  TensorFormat data_format_;
   bool is_training_;
 };
 
@@ -100,60 +103,99 @@ REGISTER_XLA_OP(Name("FusedBatchNormV2"), FusedBatchNormOp);
 class FusedBatchNormGradOp : public XlaOpKernel {
  public:
   explicit FusedBatchNormGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
-    string data_format;
     OP_REQUIRES_OK(ctx, ctx->GetAttr("epsilon", &epsilon_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format));
-    bool is_training;
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("is_training", &is_training));
-    CHECK(is_training) << "FusedBatchNormGradOp with is_training=False cannot "
-                          "be used with XLA for now!";
-    TensorFormat tensor_format;
-    if (ctx->GetAttr("data_format", &data_format).ok()) {
-      OP_REQUIRES(ctx, FormatFromString(data_format, &tensor_format),
-                  errors::InvalidArgument("Invalid data format"));
-      OP_REQUIRES(
-          ctx, (tensor_format == FORMAT_NHWC || tensor_format == FORMAT_NCHW),
-          errors::InvalidArgument("Not supported format"));
-      feature_index_ = GetTensorFeatureDimIndex(4, tensor_format);
-    }
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("is_training", &is_training_));
+    string data_format_str;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(
+        ctx, FormatFromString(data_format_str, &data_format_),
+        errors::InvalidArgument("Invalid data format: ", data_format_str));
+    OP_REQUIRES(ctx,
+                (data_format_ == FORMAT_NHWC || data_format_ == FORMAT_NCHW),
+                errors::InvalidArgument(
+                    "Unsupported data format ", ToString(data_format_),
+                    "; supported formats are NHWC and NCHW"));
   }
 
   void Compile(XlaOpKernelContext* ctx) override {
-    xla::ComputationBuilder* builder = ctx->builder();
+    xla::ComputationBuilder* b = ctx->builder();
 
-    auto grad_output = ctx->Input(0);
-    auto activation = ctx->Input(1);
+    auto grad_backprop = ctx->Input(0);
+    auto activations = ctx->Input(1);
     auto scale = ctx->Input(2);
     auto mean = ctx->Input(3);
     auto var = ctx->Input(4);
 
+    TensorShape input_shape = ctx->InputShape(0);
+    int feature_index =
+        GetTensorFeatureDimIndex(input_shape.dims(), data_format_);
+
+    DataType input_dtype = ctx->input_type(0);
+    DataType scale_dtype = ctx->input_type(2);
     xla::PrimitiveType input_type;
-    OP_REQUIRES_OK(ctx,
-                   DataTypeToPrimitiveType(ctx->input_type(0), &input_type));
-    xla::PrimitiveType stats_type;
-    OP_REQUIRES_OK(ctx,
-                   DataTypeToPrimitiveType(ctx->input_type(3), &stats_type));
+    OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(input_dtype, &input_type));
+    xla::PrimitiveType scale_type;
+    OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(scale_dtype, &scale_type));
 
     // TODO(b/69928690): support mixed precision in the XLA batch normalization
-    // operators. As a workaround, cast everything to the statistics type (which
+    // operators. For now, cast everything to the statistics type (which
     // may be more precise than the input type).
-    grad_output = builder->ConvertElementType(grad_output, stats_type);
-    activation = builder->ConvertElementType(activation, stats_type);
-
-    xla::ComputationDataHandle output = builder->BatchNormGrad(
-        activation, scale, mean, var, grad_output, epsilon_, feature_index_);
-
-    ctx->SetOutput(0, builder->ConvertElementType(
-                          builder->GetTupleElement(output, 0), input_type));
-    ctx->SetOutput(1, builder->GetTupleElement(output, 1));
-    ctx->SetOutput(2, builder->GetTupleElement(output, 2));
-    ctx->SetOutput(3, builder->GetTupleElement(output, 1));
-    ctx->SetOutput(4, builder->GetTupleElement(output, 2));
+    grad_backprop = b->ConvertElementType(grad_backprop, scale_type);
+    activations = b->ConvertElementType(activations, scale_type);
+
+    xla::ComputationDataHandle x_backprop;
+    xla::ComputationDataHandle scale_backprop;
+    xla::ComputationDataHandle offset_backprop;
+    if (is_training_) {
+      xla::ComputationDataHandle output =
+          b->BatchNormGrad(activations, scale, mean, var, grad_backprop,
+                           epsilon_, feature_index);
+
+      x_backprop = b->GetTupleElement(output, 0);
+      scale_backprop = b->GetTupleElement(output, 1);
+      offset_backprop = b->GetTupleElement(output, 2);
+    } else {
+      // Reduce over all dimensions except the feature dim.
+      std::vector<int64> reduction_dims(input_shape.dims() - 1);
+      std::iota(reduction_dims.begin(), reduction_dims.begin() + feature_index,
+                0);
+      std::iota(reduction_dims.begin() + feature_index, reduction_dims.end(),
+                feature_index + 1);
+      // offset_backprop  = sum(y_backprop)
+      // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var +
+      // epsilon))
+      // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon))
+      offset_backprop =
+          b->Reduce(grad_backprop, XlaHelpers::Zero(b, scale_dtype),
+                    *ctx->GetOrCreateAdd(scale_dtype), reduction_dims);
+
+      // scratch1 = rsqrt(pop_var + epsilon)
+      auto neg_half = XlaHelpers::FloatLiteral(b, scale_dtype, -0.5);
+      auto scratch1 =
+          b->Pow(b->Add(var, b->ConstantR0<float>(epsilon_)), neg_half);
+
+      // scratch2 = sum(y_backprop * (x - mean))
+      auto scratch2 = b->Reduce(
+          b->Mul(grad_backprop, b->Sub(activations, mean, {feature_index})),
+          XlaHelpers::Zero(b, scale_dtype), *ctx->GetOrCreateAdd(scale_dtype),
+          reduction_dims);
+
+      x_backprop =
+          b->Mul(grad_backprop, b->Mul(scratch1, scale), {feature_index});
+      scale_backprop = b->Mul(scratch1, scratch2);
+    }
+
+    ctx->SetOutput(0, b->ConvertElementType(x_backprop, input_type));
+    ctx->SetOutput(1, scale_backprop);
+    ctx->SetOutput(2, offset_backprop);
+    ctx->SetConstantOutput(3, Tensor(scale_dtype, {}));
+    ctx->SetConstantOutput(4, Tensor(scale_dtype, {}));
   }
 
  private:
+  TensorFormat data_format_;
   float epsilon_;
-  int64 feature_index_;
+  bool is_training_;
 };
 
 REGISTER_XLA_OP(Name("FusedBatchNormGrad"), FusedBatchNormGradOp);
-- 
GitLab


From 21e831dc4a3fe67331dc186d2493678f24838250 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 11:57:53 -0800
Subject: [PATCH 0643/1225] Automated g4 rollback of changelist 177799252

PiperOrigin-RevId: 177989542
---
 .../kernels/reduce_slice_ops_gpu.cu.cc        |  11 +-
 tensorflow/core/BUILD                         |   7 -
 tensorflow/core/kernels/bias_op_gpu.cu.cc     |  18 +-
 .../core/kernels/depthwise_conv_op_gpu.cu.cc  |  11 +-
 .../core/kernels/scatter_nd_op_gpu.cu.cc      |  21 -
 tensorflow/core/kernels/svd_op_gpu.cu.cc      |   4 +-
 tensorflow/core/util/cuda_device_functions.h  | 418 ---------
 tensorflow/core/util/cuda_kernel_helper.h     | 837 ++++++++++++++++--
 .../core/util/cuda_kernel_helper_test.cu.cc   |  12 +-
 tensorflow/core/util/cuda_launch_config.h     | 284 ------
 10 files changed, 772 insertions(+), 851 deletions(-)
 delete mode 100644 tensorflow/core/util/cuda_device_functions.h
 delete mode 100644 tensorflow/core/util/cuda_launch_config.h

diff --git a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc
index 501cddb8c8..8e6870fadd 100644
--- a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc
+++ b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc
@@ -34,9 +34,9 @@ namespace functor {
   __global__ void ReduceSliceDeviceKernel##reduceop(                           \
       Cuda3DLaunchConfig config, Index indices_width, Index bound,             \
       const T begin, const Index *indices, const T *input, T *out) {           \
-    CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count.x, X) {               \
-      CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count.y, Y) {             \
-        CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count.z, Z) {           \
+    CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {                 \
+      CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {               \
+        CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count, z) {             \
           Index outidx = x * config.virtual_thread_count.y *                   \
                              config.virtual_thread_count.z +                   \
                          y * config.virtual_thread_count.z + z;                \
@@ -68,9 +68,8 @@ namespace functor {
       if (sizex * sizey * sizez == 0) {                                        \
         return;                                                                \
       }                                                                        \
-      Cuda3DLaunchConfig config = GetCuda3DLaunchConfig(                       \
-          sizex, sizey, sizez, d, ReduceSliceDeviceKernel##reduceop<T, Index>, \
-          0, 0);                                                               \
+      Cuda3DLaunchConfig config = GetCuda3DLaunchConfig(sizex, sizey, sizez, d,\
+          ReduceSliceDeviceKernel##reduceop<T, Index>, 0, 0);                  \
                                                                                \
       ReduceSliceDeviceKernel##reduceop<T, Index>                              \
           <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(    \
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 390950ae98..6365791512 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1847,13 +1847,6 @@ cc_library(
     ],
 )
 
-tf_cuda_library(
-    name = "cuda_device_functions",
-    hdrs = ["util/cuda_device_functions.h"],
-    visibility = ["//visibility:public"],
-    deps = [":framework_lite"],
-)
-
 # TODO(josh11b): Is this needed, or can we just use ":protos_all_cc"?
 cc_library(
     name = "protos_cc",
diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc
index f9a207208a..42f3db1d79 100644
--- a/tensorflow/core/kernels/bias_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc
@@ -173,13 +173,19 @@ __global__ void BiasGradNCHW_SharedAtomics(const T* output_backprop,
   // Accumulate the results in the shared memory into the first element.
   // No syncthreads is needed since this is only in the same warp.
   int32 thread_index = threadIdx.x;
-  if (thread_index < 32) {
-    AccT data = s_data[thread_index];
-    for (int32 offset = warpSize / 2; offset > 0; offset /= 2) {
-      data += CudaShuffleDownSync(kCudaWarpAll, data, offset);
-    }
+  if (thread_index < 16) {
+    s_data[thread_index] += s_data[thread_index + 16];
+    __syncwarp(0xFFFF);
+    if (thread_index < 8) s_data[thread_index] += s_data[thread_index + 8];
+    __syncwarp(0xFF);
+    if (thread_index < 4) s_data[thread_index] += s_data[thread_index + 4];
+    __syncwarp(0xF);
+    if (thread_index < 2) s_data[thread_index] += s_data[thread_index + 2];
+    __syncwarp(0x3);
     if (thread_index == 0) {
-      CudaAtomicAdd(bias_backprop + bias_index, T(data));
+      T val = T(s_data[0] + s_data[1]);
+      // The first thread writes out the accumulated result to global location.
+      CudaAtomicAdd(bias_backprop + bias_index, val);
     }
   }
 }
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
index de0bf84c8b..903aac5d68 100644
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
@@ -34,7 +34,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-typedef Eigen::GpuDevice GPUDevice;
 using Eigen::GpuDevice;
 
 // Returns whether depthwise convolution forward or backward input pass can be
@@ -1029,7 +1028,7 @@ __device__ __forceinline__ T WarpSumReduce(T val) {
   int zeros = sub_warp * kWidth;
   unsigned mask = ((1UL << kWidth) - 1) << zeros;
   for (int delta = kWidth / 2; delta > 0; delta /= 2) {
-    val += CudaShuffleXorSync(mask, val, delta);
+    val += CudaShuffleXor(mask, val, delta);
   }
   return val;
 }
@@ -1146,7 +1145,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall(
 
     // Note: the condition to reach this is uniform across the entire block.
     __syncthreads();
-    unsigned active_threads = CudaBallotSync(kCudaWarpAll, depth_in_range);
+    unsigned active_threads = CudaBallot(CUDA_WARP_ALL, depth_in_range);
 
     if (depth_in_range) {
       const T* const out_ptr = inout_offset + output;
@@ -1160,7 +1159,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall(
           T val = out1 * tile_ptr[0] + out2 * tile_ptr[tile_offset];
           // Warp-accumulate pixels of the same depth and write to accumulator.
           for (int delta = 16; delta >= kBlockSlices; delta /= 2) {
-            val += CudaShuffleDownSync(active_threads, val, delta);
+            val += CudaShuffleDown(active_threads, val, delta);
           }
           if (!(thread_idx & 32 - kBlockSlices) /* lane_idx < kBlockSlices */) {
             *accum_ptr = val;
@@ -1400,7 +1399,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall(
 
     // Note: the condition to reach this is uniform across the entire block.
     __syncthreads();
-    unsigned active_threads = CudaBallotSync(kCudaWarpAll, slice_in_range);
+    unsigned active_threads = CudaBallot(CUDA_WARP_ALL, slice_in_range);
 
     if (slice_in_range) {
       const T* const out_ptr = inout_offset + output;
@@ -1414,7 +1413,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall(
           T val = out1 * tile_ptr[0] + out2 * tile_ptr[tile_offset];
           // Warp-accumulate pixels of the same depth and write to accumulator.
           for (int delta = 16 / kBlockSlices; delta > 0; delta /= 2) {
-            val += CudaShuffleDownSync(active_threads, val, delta);
+            val += CudaShuffleDown(active_threads, val, delta);
           }
           if (!(thread_idx & 32 / kBlockSlices - 1)) {
             *accum_ptr = val;
diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
index a3c21edc15..31f74671ca 100644
--- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
@@ -55,27 +55,6 @@ struct LeftUpdate<T, scatter_nd_op::UpdateOp::SUB> {
   }
 };
 
-// Specializations for std::complex, updating real and imaginary part
-// individually. Even though this is not an atomic op anymore, it is safe
-// because there is only one type of op per kernel.
-template <typename T>
-struct LeftUpdate<std::complex<T>, scatter_nd_op::UpdateOp::ADD> {
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void operator()(
-      std::complex<T>* out, const std::complex<T>& val) {
-    T* ptr = reinterpret_cast<T*>(out);
-    CudaAtomicAdd(ptr, val.real());
-    CudaAtomicAdd(ptr, val.imag());
-  }
-};
-
-template <typename T>
-struct LeftUpdate<std::complex<T>, scatter_nd_op::UpdateOp::SUB> {
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void operator()(
-      std::complex<T>* out, const std::complex<T>& val) {
-    LeftUpdate<std::complex<T>, scatter_nd_op::UpdateOp::ADD>()(out, -val);
-  }
-};
-
 }  // namespace
 
 template <typename T, typename Index, scatter_nd_op::UpdateOp op, int IXDIM>
diff --git a/tensorflow/core/kernels/svd_op_gpu.cu.cc b/tensorflow/core/kernels/svd_op_gpu.cu.cc
index 8c3a58b108..dedc2da60b 100644
--- a/tensorflow/core/kernels/svd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/svd_op_gpu.cu.cc
@@ -63,8 +63,8 @@ __global__ void ComputeValueOfVKernel(Cuda2DLaunchConfig config, int64 m,
                                       int64 ldu, const Scalar* M,
                                       const Scalar* U, const Scalar* S,
                                       Scalar* V) {
-  CUDA_AXIS_KERNEL_LOOP(batch, config.virtual_thread_count.x, X) {
-    CUDA_AXIS_KERNEL_LOOP(i, config.virtual_thread_count.y, Y) {
+  CUDA_AXIS_KERNEL_LOOP(batch, config.virtual_thread_count, x) {
+    CUDA_AXIS_KERNEL_LOOP(i, config.virtual_thread_count, y) {
       Scalar v = M[i + m * batch] * U[ldu * (i + m * batch)] * S[batch];
       CudaAtomicAdd(V + batch, v);
     }
diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h
deleted file mode 100644
index 973a43d78f..0000000000
--- a/tensorflow/core/util/cuda_device_functions.h
+++ /dev/null
@@ -1,418 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_UTIL_CUDA_DEVICE_FUNCTIONS_H_
-#define TENSORFLOW_CORE_UTIL_CUDA_DEVICE_FUNCTIONS_H_
-
-/**
- * Wrappers and helpers for CUDA device code.
- *
- * Wraps the warp-cooperative intrinsics introduced in CUDA 9 to provide
- * backwards compatibility, see go/volta-porting for details.
- * Provides atomic operations on types that aren't natively supported.
- */
-
-#if GOOGLE_CUDA
-
-#include <algorithm>
-#include <complex>
-#include "cuda/include/cuda.h"
-#include "cuda/include/device_functions.h"
-#include "tensorflow/core/platform/types.h"
-
-#if __CUDACC_VER_MAJOR__ >= 9
-#include "cuda/include/cuda_fp16.h"
-#elif __CUDACC_VER__ >= 7050
-#include "cuda/include/cuda_fp16.h"
-#else
-#endif
-
-namespace tensorflow {
-
-namespace detail {
-
-// Helper for range-based for loop using 'delta' increments.
-// Usage: see CudaGridRange?() functions below.
-template <typename T>
-class CudaGridRange {
-  struct Iterator {
-    __device__ Iterator(T index, T delta) : index_(index), delta_(delta) {}
-    __device__ T operator*() const { return index_; }
-    __device__ Iterator& operator++() {
-      index_ += delta_;
-      return *this;
-    }
-    __device__ bool operator!=(const Iterator& other) const {
-      bool greater = index_ > other.index_;
-      bool less = index_ < other.index_;
-      // Anything past an end iterator (delta_ == 0) is equal.
-      // In range-based for loops, this optimizes to 'return less'.
-      if (!other.delta_) {
-        return less;
-      }
-      if (!delta_) {
-        return greater;
-      }
-      return less || greater;
-    }
-
-   private:
-    T index_;
-    const T delta_;
-  };
-
- public:
-  __device__ CudaGridRange(T begin, T delta, T end)
-      : begin_(begin), delta_(delta), end_(end) {}
-
-  __device__ Iterator begin() const { return Iterator{begin_, delta_}; }
-  __device__ Iterator end() const { return Iterator{end_, 0}; }
-
- private:
-  T begin_;
-  T delta_;
-  T end_;
-};
-
-}  // namespace detail
-
-// Helper to visit indices in the range 0 <= i < count, using the x-coordinate
-// of the global thread index. That is, each index i is visited by all threads
-// with the same x-coordinate.
-// Usage: for(int i : CudaGridRangeX(count)) { visit(i); }
-template <typename T>
-__device__ detail::CudaGridRange<T> CudaGridRangeX(T count) {
-  return detail::CudaGridRange<T>(blockIdx.x * blockDim.x + threadIdx.x,
-                                  gridDim.x * blockDim.x, count);
-}
-
-// Helper to visit indices in the range 0 <= i < count using the y-coordinate.
-// Usage: for(int i : CudaGridRangeY(count)) { visit(i); }
-template <typename T>
-__device__ detail::CudaGridRange<T> CudaGridRangeY(T count) {
-  return detail::CudaGridRange<T>(blockIdx.y * blockDim.y + threadIdx.y,
-                                  gridDim.y * blockDim.y, count);
-}
-
-// Helper to visit indices in the range 0 <= i < count using the z-coordinate.
-// Usage: for(int i : CudaGridRangeZ(count)) { visit(i); }
-template <typename T>
-__device__ detail::CudaGridRange<T> CudaGridRangeZ(T count) {
-  return detail::CudaGridRange<T>(blockIdx.z * blockDim.z + threadIdx.z,
-                                  gridDim.z * blockDim.z, count);
-}
-
-// Mask for all 32 threads in a warp.
-const unsigned kCudaWarpAll = 0xffffffff;
-
-// On sm_6x and earlier, verifies that all bits in mask corresponding to active
-// threads of the warp are set. It does not verify the converse (bits of
-// inactive threads are not set), because all syncs are unblocked when a thread
-// exits the kernel, but the ballot of inactive (including exited) threads
-// returns 0.
-__device__ inline void CudaVerifySyncMask(unsigned mask) {
-#if __CUDA_ARCH__ < 700
-  assert(0 == (__ballot(1) & ~mask));  // Active threads must have mask bit set.
-#endif
-}
-
-// For all *_sync wrappers below, it is illegal to synchronize threads from
-// different program locations, because that is not supported before sm_70.
-// Code that requires sm_70 (and CUDA 9) may use the intrinsic directly.
-
-// Wrapper for __syncwarp.
-__device__ inline void CudaSyncWarp(unsigned mask = kCudaWarpAll) {
-  CudaVerifySyncMask(mask);
-#if CUDA_VERSION >= 9000
-  __syncwarp(mask);
-#endif
-}
-
-// Wrapper for __ballot_sync.
-__device__ inline unsigned CudaBallotSync(unsigned mask, int pred) {
-  CudaVerifySyncMask(mask);
-#if CUDA_VERSION >= 9000
-  return __ballot_sync(mask, pred);
-#else
-  return __ballot(pred);
-#endif
-}
-
-// Wrapper for __any_sync.
-__device__ inline int CudaAnySync(unsigned mask, int pred) {
-  CudaVerifySyncMask(mask);
-#if CUDA_VERSION >= 9000
-  return __any_sync(mask, pred);
-#else
-  return __any(pred);
-#endif
-}
-
-// Wrapper for __all_sync.
-__device__ inline int CudaAllSync(unsigned mask, int pred) {
-  CudaVerifySyncMask(mask);
-#if CUDA_VERSION >= 9000
-  return __all_sync(mask, pred);
-#else
-  return __all(pred);
-#endif
-}
-
-// Wrapper for __shfl_sync.
-template <typename T>
-__device__ T CudaShuffleSync(unsigned mask, T value, int src_lane,
-                             int width = warpSize) {
-  CudaVerifySyncMask(mask);
-#if CUDA_VERSION >= 9000
-  return __shfl_sync(mask, value, src_lane, width);
-#else
-  return __shfl(value, src_lane, width);
-#endif
-}
-
-// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
-// instead of float for lo and hi (which is incorrect with ftz, for example).
-// See b/69446944.
-__device__ inline double CudaShuffleSync(unsigned mask, double value,
-                                         int src_lane, int width = warpSize) {
-  unsigned lo, hi;
-  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
-  hi = CudaShuffleSync(mask, hi, src_lane, width);
-  lo = CudaShuffleSync(mask, lo, src_lane, width);
-  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
-  return value;
-}
-
-// Wrapper for __shfl_up_sync.
-template <typename T>
-__device__ inline T CudaShuffleUpSync(unsigned mask, T value, int delta,
-                                      int width = warpSize) {
-  CudaVerifySyncMask(mask);
-#if CUDA_VERSION >= 9000
-  return __shfl_up_sync(mask, value, delta, width);
-#else
-  return __shfl_up(value, delta, width);
-#endif
-}
-
-// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
-// instead of float for lo and hi (which is incorrect with ftz, for example).
-// See b/69446944.
-__device__ inline double CudaShuffleUpSync(unsigned mask, double value,
-                                           int delta, int width = warpSize) {
-  unsigned lo, hi;
-  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
-  hi = CudaShuffleUpSync(mask, hi, delta, width);
-  lo = CudaShuffleUpSync(mask, lo, delta, width);
-  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
-  return value;
-}
-
-// Wrapper for __shfl_down_sync.
-template <typename T>
-__device__ inline T CudaShuffleDownSync(unsigned mask, T value, int delta,
-                                        int width = warpSize) {
-  CudaVerifySyncMask(mask);
-#if CUDA_VERSION >= 9000
-  return __shfl_down_sync(mask, value, delta, width);
-#else
-  return __shfl_down(value, delta, width);
-#endif
-}
-
-// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
-// instead of float for lo and hi (which is incorrect with ftz, for example).
-// See b/69446944.
-__device__ inline double CudaShuffleDownSync(unsigned mask, double value,
-                                             int delta, int width = warpSize) {
-  unsigned lo, hi;
-  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
-  hi = CudaShuffleDownSync(mask, hi, delta, width);
-  lo = CudaShuffleDownSync(mask, lo, delta, width);
-  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
-  return value;
-}
-
-// Wrapper for __shfl_xor_sync.
-template <typename T>
-__device__ T CudaShuffleXorSync(unsigned mask, T value, int lane_mask,
-                                int width = warpSize) {
-  CudaVerifySyncMask(mask);
-#if CUDA_VERSION >= 9000
-  return __shfl_xor_sync(mask, value, lane_mask, width);
-#else
-  return __shfl_xor(value, lane_mask, width);
-#endif
-}
-
-// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
-// instead of float for lo and hi (which is incorrect with ftz, for example).
-// See b/69446944.
-__device__ inline double CudaShuffleXorSync(unsigned mask, double value,
-                                            int lane_mask,
-                                            int width = warpSize) {
-  unsigned lo, hi;
-  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
-  hi = CudaShuffleXorSync(mask, hi, lane_mask, width);
-  lo = CudaShuffleXorSync(mask, lo, lane_mask, width);
-  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
-  return value;
-}
-
-// Wrapper for __ldg.
-template <typename T>
-__host__ __device__ T CudaLdg(const T* address) {
-#if __CUDA_ARCH__ >= 350
-  return __ldg(address);
-#else
-  return *address;
-#endif
-}
-
-__host__ __device__ inline bool CudaLdg(const bool* address) {
-  return CudaLdg(reinterpret_cast<const char*>(address)) != 0;
-}
-
-__host__ __device__ inline std::complex<float> CudaLdg(
-    const std::complex<float>* address) {
-#if __CUDA_ARCH__ >= 350
-  float2 mem = __ldg(reinterpret_cast<const float2*>(address));
-  return std::complex<float>(mem.x, mem.y);
-#else
-  return *address;
-#endif
-}
-
-__host__ __device__ inline std::complex<double> CudaLdg(
-    const std::complex<double>* address) {
-#if __CUDA_ARCH__ >= 350
-  double2 mem = __ldg(reinterpret_cast<const double2*>(address));
-  return std::complex<double>(mem.x, mem.y);
-#else
-  return *address;
-#endif
-}
-
-// Zeroes count elements starting at ptr using all threads of a 1-D grid.
-// Note: this function does not synchronize, and therefore the memory range is
-// not guaranteed to be zero until the next kernel launch.
-template <typename T>
-__global__ void SetZero(const int count, T* ptr) {
-  // Check that the grid is one dimensional and index doesn't overflow.
-  assert(blockDim.y == 1 && blockDim.z == 1);
-  assert(blockDim.x * gridDim.x / blockDim.x == gridDim.x);
-  for (int i : CudaGridRangeX(count)) {
-    ptr[i] = T(0);
-  }
-}
-
-namespace detail {
-// Helper function for atomic accumulation implemented as CAS.
-template <typename T, typename F>
-__device__ T CudaAtomicCasHelper(T* ptr, F accumulate) {
-  T old = *ptr;
-  T assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(ptr, assumed, accumulate(assumed));
-  } while (assumed != old);
-  return old;
-}
-
-// Overload for floating point (using integer comparison to handle NaN
-// correctly).
-template <typename F>
-__device__ float CudaAtomicCasHelper(float* ptr, F accumulate) {
-  return __float_as_int(
-      CudaAtomicCasHelper(reinterpret_cast<int32*>(ptr), [accumulate](int32 a) {
-        return __float_as_int(accumulate(__int_as_float(a)));
-      }));
-}
-template <typename F>
-__device__ double CudaAtomicCasHelper(double* ptr, F accumulate) {
-  return __longlong_as_double(CudaAtomicCasHelper(
-      reinterpret_cast<tensorflow::uint64*>(ptr),
-      [accumulate](tensorflow::uint64 a) {
-        return __double_as_longlong(accumulate(__longlong_as_double(a)));
-      }));
-}
-}  // namespace detail
-
-// CUDA provides atomic ops, but not for all types.  We provide wrappers
-// for some ops and provide implementation for all reasonable types.
-
-template <typename T>
-__device__ T CudaAtomicAdd(T* ptr, T value) {
-  return atomicAdd(ptr, value);
-}
-#if __CUDA_ARCH__ < 600
-__device__ inline double CudaAtomicAdd(double* ptr, double value) {
-  return detail::CudaAtomicCasHelper(ptr,
-                                     [value](double a) { return a + value; });
-}
-#elif __clang__
-// Clang cannot compile __nvvm_atom_add_gen_d builtin yet, use inline PTX.
-// see https://reviews.llvm.org/D39638
-__device__ inline double CudaAtomicAdd(double* ptr, double value) {
-  double result;
-  asm volatile("atom.add.f64 %0, [%1], %2;"
-               : "=d"(result)
-               : "l"(ptr), "d"(value)
-               : "memory");
-  return result;
-}
-#endif
-
-template <typename T>
-__device__ T CudaAtomicSub(T* ptr, T value) {
-  return atomicSub(ptr, value);
-}
-// Specializations of substraction which add the negative value.
-__device__ inline float CudaAtomicSub(float* ptr, float value) {
-  return CudaAtomicAdd(ptr, -value);
-}
-__device__ inline double CudaAtomicSub(double* ptr, double value) {
-  return CudaAtomicAdd(ptr, -value);
-}
-__device__ inline tensorflow::uint64 CudaAtomicSub(tensorflow::uint64* ptr,
-                                                   tensorflow::uint64 value) {
-  return CudaAtomicAdd(ptr, -value);
-}
-
-template <typename T>
-__device__ T CudaAtomicMax(T* ptr, T value) {
-  return atomicMax(ptr, value);
-}
-#if __CUDA_ARCH__ < 320
-__device__ inline tensorflow::uint64 CudaAtomicMax(tensorflow::uint64* ptr,
-                                                   tensorflow::uint64 value) {
-  return detail::CudaAtomicCasHelper(
-      ptr, [value](tensorflow::uint64 a) { return max(a, value); });
-}
-#endif
-
-template <typename T>
-__device__ inline T CudaAtomicMul(T* ptr, T value) {
-  return detail::CudaAtomicCasHelper(ptr, [value](T a) { return a * value; });
-}
-template <typename T>
-__device__ inline T CudaAtomicDiv(T* ptr, T value) {
-  return detail::CudaAtomicCasHelper(ptr, [value](T a) { return a / value; });
-}
-
-}  // namespace tensorflow
-
-#endif  // GOOGLE_CUDA
-#endif  // TENSORFLOW_CORE_UTIL_CUDA_KERNEL_HELPER_H_
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index b71218d73c..cf11f419a4 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -18,125 +18,299 @@ limitations under the License.
 
 #if GOOGLE_CUDA
 
-#include "tensorflow/core/util/cuda_device_functions.h"
-#include "tensorflow/core/util/cuda_launch_config.h"
+#include <algorithm>
 
-// Deprecated, use 'for(int i : CudaGridRangeX(n))' instead.
-#define CUDA_1D_KERNEL_LOOP(i, n) \
-  for (int i : ::tensorflow::CudaGridRangeX<int>(n))
-// Deprecated, use 'for(int i : CudaGridRange?(n))' instead.
-#define CUDA_AXIS_KERNEL_LOOP(i, n, axis) \
-  for (int i : ::tensorflow::CudaGridRange##axis<int>(n))
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "cuda/include/cuda.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/stream_executor.h"
+#include "tensorflow/core/platform/types.h"
 
-namespace tensorflow {
-template <typename T>
-__host__ __device__ inline T ldg(const T* ptr) {
-  return CudaLdg(ptr);
-}
+// Mask for all 32 threads in a warp.
+#define CUDA_WARP_ALL 0xFFFFFFFF
 
-template <typename T>
-__host__ __device__ inline const T& tf_min(const T& x, const T& y) {
-  return x < y ? x : y;
-}
+#if defined(CUDA_VERSION) && CUDA_VERSION < 9000
+// CUDA 9.0 introduces a new, light-weight barrier synchronization primitive
+// that operates at the warp-scope. This is required to ensure visibility of
+// reads/writes among threads that can make indepenent progress on Volta.
+// For previous CUDA versions these synchronizations not necessary, and we
+// define an empty function as a convenience for backward compatibility.
+__device__ inline void __syncwarp(unsigned mask = CUDA_WARP_ALL) {}
 
-template <typename T>
-__host__ __device__ inline const T& tf_max(const T& x, const T& y) {
-  return x < y ? y : x;
-}
+// CUDA 9.0 deprecates the warp-intrinsic functions (shfl, ballot, etc.) in
+// favor of synchronizing versions. These ensure that all warp lanes specified
+// in mask execute the intrinsic in convergence. Here we provide legacy mappings
+// to the less-verbose routines provided in previous versions of CUDA.
+#define __ballot_sync(mask, predicate) __ballot(predicate)
+#define __shfl_sync(mask, val, srcLane, width) __shfl(val, srcLane, width)
+#define __shfl_down_sync(mask, val, delta, width) __shfl_down(val, delta, width)
+#define __shfl_up_sync(mask, val, delta, width) __shfl_up(val, delta, width)
+#define __shfl_xor_sync(mask, val, laneMask, width) \
+  __shfl_xor(val, laneMask, width)
+#endif
 
-// Overloads of the above functions for float and double.
-__host__ __device__ inline float tf_min(float x, float y) {
-  return fminf(x, y);
-}
-__host__ __device__ inline double tf_min(double x, double y) {
-  return fmin(x, y);
+// Usage of GetCudaLaunchConfig, GetCuda2DLaunchConfig, and
+// GetCuda3DLaunchConfig:
+//
+// There are two versions of GetCudaLaunchConfig and GetCuda2DLaunchConfig, one
+// version uses heuristics without any knowledge of the device kernel, the other
+// version uses cudaOccupancyMaxPotentialBlockSize to determine the theoretical
+// launch parameters that maximize occupancy. Currently, only the maximum
+// occupancy version of GetCuda3DLaunchConfig is available.
+//
+// For large number of work elements, the convention is that each kernel would
+// iterate through its assigned range. The return value of GetCudaLaunchConfig
+// is struct CudaLaunchConfig, which contains all the information needed for the
+// kernel launch, including: virtual number of threads, the number of threads
+// per block and number of threads per block used inside <<< >>> of a kernel
+// launch. GetCuda2DLaunchConfig and GetCuda3DLaunchConfig does the same thing
+// as CudaLaunchConfig. The only difference is the dimension. The macros
+// CUDA_1D_KERNEL_LOOP and CUDA_AXIS_KERNEL_LOOP might be used to do inner loop.
+//
+/* Sample code:
+
+__global__ void MyKernel1D(CudaLaunchConfig config, other_args...) {
+  CUDA_1D_KERNEL_LOOP(x, config.virtual_thread_count) {
+    do_your_job_here;
+  }
 }
-__host__ __device__ inline float tf_max(float x, float y) {
-  return fmaxf(x, y);
+
+__global__ void MyKernel2D(Cuda2DLaunchConfig config, other_args...) {
+  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
+    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
+      do_your_job_here;
+    }
+  }
 }
-__host__ __device__ inline double tf_max(double x, double y) {
-  return fmax(x, y);
+
+__global__ void MyKernel3D(Cuda3DLaunchConfig config, other_args...) {
+  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
+    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
+      CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count, z) {
+        do_your_job_here;
+      }
+    }
+  }
 }
 
-__device__ inline Eigen::half CudaShuffleSync(unsigned mask, Eigen::half value,
-                                              int src_lane,
-                                              int width = warpSize) {
-  return Eigen::half(
-      CudaShuffleSync(mask, static_cast<uint16>(value), src_lane, width));
+void MyDriverFunc(const GPUDevice &d) {
+  // use heuristics
+  CudaLaunchConfig cfg1 = GetCudaLaunchConfig(10240, d);
+  MyKernel1D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg1, other_args...);
+  Cuda2DLaunchConfig cfg2 = GetCuda2DLaunchConfig(10240, 10240, d);
+  MyKernel2D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg2, other_args...);
+  Cuda3DLaunchConfig cfg3 = GetCuda3DLaunchConfig(4096, 4096, 100, d);
+  MyKernel3D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg3, other_args...);
+
+  // maximize occupancy
+  CudaLaunchConfig cfg4 = GetCudaLaunchConfig(10240, d, MyKernel1D, 0, 0 );
+  MyKernel1D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg4, other_args...);
+  Cuda2DLaunchConfig cfg5 = GetCuda2DLaunchConfig(10240, 10240, d,
+                                                  MyKernel1D, 0, 0);
+  MyKernel2D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg5, other_args...);
+  Cuda3DLaunchConfig cfg6 = GetCuda3DLaunchConfig(4096, 4096, 100, d,
+                                                  MyKernel1D, 0, 0);
+  MyKernel3D <<<config.block_count,
+                config.thread_per_block, 0, d.stream()>>> (cfg6, other_args...);
 }
 
-__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleUpSync(
-    unsigned mask, Eigen::half value, int delta, int width = warpSize) {
-  return Eigen::half(
-      CudaShuffleUpSync(mask, static_cast<uint16>(value), delta, width));
+// See the test for this for more example:
+//
+https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
+
+*/
+
+#define CUDA_1D_KERNEL_LOOP(i, n)                            \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
+       i += blockDim.x * gridDim.x)
+
+#define CUDA_AXIS_KERNEL_LOOP(i, n, axis)                                  \
+  for (int i = blockIdx.axis * blockDim.axis + threadIdx.axis; i < n.axis; \
+       i += blockDim.axis * gridDim.axis)
+
+#define DIV_UP(a, b) (((a) + (b)-1) / (b))
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+struct CudaLaunchConfig {
+  // Logical number of thread that works on the elements. If each logical
+  // thread works on exactly a single element, this is the same as the working
+  // element count.
+  int virtual_thread_count = -1;
+  // Number of threads per block.
+  int thread_per_block = -1;
+  // Number of blocks for Cuda kernel launch.
+  int block_count = -1;
+};
+
+// Calculate the Cuda launch config we should use for a kernel launch.
+// This is assuming the kernel is quite simple and will largely be
+// memory-limited.
+// REQUIRES: work_element_count > 0.
+inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
+                                            const GPUDevice& d) {
+  CHECK_GT(work_element_count, 0);
+  CudaLaunchConfig config;
+  const int virtual_thread_count = work_element_count;
+  const int physical_thread_count = std::min(
+      d.getNumCudaMultiProcessors() * d.maxCudaThreadsPerMultiProcessor(),
+      virtual_thread_count);
+  const int thread_per_block = std::min(1024, d.maxCudaThreadsPerBlock());
+  const int block_count =
+      std::min(DIV_UP(physical_thread_count, thread_per_block),
+               d.getNumCudaMultiProcessors());
+
+  config.virtual_thread_count = virtual_thread_count;
+  config.thread_per_block = thread_per_block;
+  config.block_count = block_count;
+  return config;
 }
 
-__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDownSync(
-    unsigned mask, Eigen::half value, int delta, int width = warpSize) {
-  return Eigen::half(
-      CudaShuffleDownSync(mask, static_cast<uint16>(value), delta, width));
+// Calculate the Cuda launch config we should use for a kernel launch. This
+// variant takes the resource limits of func into account to maximize occupancy.
+// REQUIRES: work_element_count > 0.
+template <typename DeviceFunc>
+inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
+                                            const GPUDevice& d, DeviceFunc func,
+                                            size_t dynamic_shared_memory_size,
+                                            int block_size_limit) {
+  CHECK_GT(work_element_count, 0);
+  CudaLaunchConfig config;
+  int block_count = 0;
+  int thread_per_block = 0;
+
+  cudaError_t err = cudaOccupancyMaxPotentialBlockSize(
+      &block_count, &thread_per_block, func, dynamic_shared_memory_size,
+      block_size_limit);
+  CHECK_EQ(err, cudaSuccess);
+
+  block_count =
+      std::min(block_count, DIV_UP(work_element_count, thread_per_block));
+
+  config.virtual_thread_count = work_element_count;
+  config.thread_per_block = thread_per_block;
+  config.block_count = block_count;
+  return config;
 }
 
-__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXorSync(
-    unsigned mask, Eigen::half value, int lane_mask, int width = warpSize) {
-  return Eigen::half(
-      CudaShuffleXorSync(mask, static_cast<uint16>(value), lane_mask, width));
+struct Cuda2DLaunchConfig {
+  dim3 virtual_thread_count = dim3(0, 0, 0);
+  dim3 thread_per_block = dim3(0, 0, 0);
+  dim3 block_count = dim3(0, 0, 0);
+};
+
+inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
+                                                const GPUDevice& d) {
+  Cuda2DLaunchConfig config;
+
+  if (xdim <= 0 || ydim <= 0) {
+    return config;
+  }
+
+  const int kThreadsPerBlock = 256;
+  int block_cols = std::min(xdim, kThreadsPerBlock);
+  // ok to round down here and just do more loops in the kernel
+  int block_rows = std::max(kThreadsPerBlock / block_cols, 1);
+
+  const int physical_thread_count =
+      d.getNumCudaMultiProcessors() * d.maxCudaThreadsPerMultiProcessor();
+
+  const int max_blocks = std::max(physical_thread_count / kThreadsPerBlock, 1);
+
+  config.virtual_thread_count = dim3(xdim, ydim, 1);
+  config.thread_per_block = dim3(block_cols, block_rows, 1);
+
+  int grid_x = std::min(DIV_UP(xdim, block_cols), max_blocks);
+
+  config.block_count = dim3(
+      grid_x, std::min(max_blocks / grid_x, std::max(ydim / block_rows, 1)), 1);
+  return config;
 }
 
-namespace detail {
-// Overload of above function for half. Note that we don't have
-// atomicCAS() for anything less than 32 bits, so we need to include the
-// other 16 bits in the operation.
-//
-// This version is going to be very slow
-// under high concurrency, since most threads will be spinning on failing
-// their compare-and-swap tests. (The fact that we get false sharing on the
-// neighboring fp16 makes this even worse.) If you are doing a large reduction,
-// you are much better off with doing the intermediate steps in fp32 and then
-// switching to fp16 as late as you can in the calculations.
-//
-// Note: Assumes little endian.
-template <typename F>
-__device__ Eigen::half CudaAtomicCasHelper(Eigen::half* ptr, F accumulate) {
-  namespace half_impl = Eigen::half_impl;
-  intptr_t intptr = reinterpret_cast<intptr_t>(ptr);
-  if (intptr & 0x3) {
-    assert(!(intptr & 0x1));
-    // The half is in the second part of the uint32 (upper 16 bits).
-    uint32* address = reinterpret_cast<uint32*>(intptr - 2);
-    uint32 result = CudaAtomicCasHelper(address, [accumulate](uint32 a) {
-      Eigen::half acc = accumulate(
-          half_impl::__half_raw{static_cast<unsigned short>(a >> 16)});
-      uint32_t upper = static_cast<half_impl::__half_raw>(acc).x;
-      return (upper << 16) | (a & 0xffff);
-    });
-    return half_impl::__half_raw{static_cast<uint16>(result >> 16)};
-  } else {
-    // The half is in the first part of the uint32 (lower 16 bits).
-    uint32* address = reinterpret_cast<uint32*>(intptr);
-    uint32 result = CudaAtomicCasHelper(address, [accumulate](uint32 a) {
-      Eigen::half acc = accumulate(
-          half_impl::__half_raw{static_cast<unsigned short>(a & 0xffff)});
-      uint32_t lower = static_cast<half_impl::__half_raw>(acc).x;
-      return (a & 0xffff0000) | lower;
-    });
-    return half_impl::__half_raw{static_cast<uint16>(result & 0xffff)};
+// Calculate the Cuda 2D and 3D launch config we should use for a kernel launch.
+// This variant takes the resource limits of func into account to maximize
+// occupancy.
+using Cuda3DLaunchConfig = Cuda2DLaunchConfig;
+
+template <typename DeviceFunc>
+inline Cuda3DLaunchConfig GetCuda3DLaunchConfig(
+    int xdim, int ydim, int zdim, const GPUDevice& d, DeviceFunc func,
+    size_t dynamic_shared_memory_size, int block_size_limit) {
+  Cuda3DLaunchConfig config;
+
+  if (xdim <= 0 || ydim <= 0 || zdim <= 0) {
+    return config;
   }
+
+  int dev;
+  cudaGetDevice(&dev);
+  cudaDeviceProp deviceProp;
+  cudaGetDeviceProperties(&deviceProp, dev);
+  int xthreadlimit = deviceProp.maxThreadsDim[0];
+  int ythreadlimit = deviceProp.maxThreadsDim[1];
+  int zthreadlimit = deviceProp.maxThreadsDim[2];
+  int xgridlimit = deviceProp.maxGridSize[0];
+  int ygridlimit = deviceProp.maxGridSize[1];
+  int zgridlimit = deviceProp.maxGridSize[2];
+
+  int block_count = 0;
+  int thread_per_block = 0;
+  cudaError_t err = cudaOccupancyMaxPotentialBlockSize(
+      &block_count, &thread_per_block, func, dynamic_shared_memory_size,
+      block_size_limit);
+  CHECK_EQ(err, cudaSuccess);
+
+#define MIN3(a, b, c) std::min((a), std::min((b), (c)))
+  int threadsx = MIN3(xdim, thread_per_block, xthreadlimit);
+  int threadsy =
+      MIN3(ydim, std::max(thread_per_block / threadsx, 1), ythreadlimit);
+  int threadsz =
+      MIN3(zdim, std::max(thread_per_block / (threadsx * threadsy), 1),
+           zthreadlimit);
+
+  int blocksx = MIN3(block_count, DIV_UP(xdim, threadsx), xgridlimit);
+  int blocksy =
+      MIN3(DIV_UP(block_count, blocksx), DIV_UP(ydim, threadsy), ygridlimit);
+  int blocksz = MIN3(DIV_UP(block_count, (blocksx * blocksy)),
+                     DIV_UP(zdim, threadsz), zgridlimit);
+#undef MIN3
+
+  config.virtual_thread_count = dim3(xdim, ydim, zdim);
+  config.thread_per_block = dim3(threadsx, threadsy, threadsz);
+  config.block_count = dim3(blocksx, blocksy, blocksz);
+  return config;
 }
-}  // namespace detail
 
-__device__ inline Eigen::half CudaAtomicAdd(Eigen::half* ptr,
-                                            Eigen::half value) {
-  return detail::CudaAtomicCasHelper(
-      ptr, [value](Eigen::half a) { return a + value; });
+template <typename DeviceFunc>
+inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(
+    int xdim, int ydim, const GPUDevice& d, DeviceFunc func,
+    size_t dynamic_shared_memory_size, int block_size_limit) {
+  return GetCuda3DLaunchConfig(xdim, ydim, 1, d, func,
+                               dynamic_shared_memory_size, block_size_limit);
 }
-__device__ inline Eigen::half CudaAtomicSub(Eigen::half* ptr,
-                                            Eigen::half value) {
-  return detail::CudaAtomicCasHelper(
-      ptr, [value](Eigen::half a) { return a - value; });
+
+// Returns a raw reference to the current cuda stream.  Required by a
+// number of kernel calls (for which StreamInterface* does not work), i.e.
+// CUB and certain cublas primitives.
+inline const cudaStream_t& GetCudaStream(OpKernelContext* context) {
+  const cudaStream_t* ptr = CHECK_NOTNULL(
+      reinterpret_cast<const cudaStream_t*>(context->op_device_context()
+                                                ->stream()
+                                                ->implementation()
+                                                ->CudaStreamMemberHack()));
+  return *ptr;
 }
 
 namespace cuda_helper {
+
 template <typename IntType>
 __device__ IntType upper_bound(IntType* first, IntType count, IntType val) {
   IntType* orig = first;
@@ -156,8 +330,481 @@ __device__ IntType upper_bound(IntType* first, IntType count, IntType val) {
 
   return first - orig;
 }
+
 }  // namespace cuda_helper
+
+template <typename T>
+__device__ __host__ inline T ldg(const T* address) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  return __ldg(address);
+#else
+  return *address;
+#endif
+}
+
+template <>
+__device__ __host__ inline std::complex<float> ldg(
+    const std::complex<float>* address) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  float2 mem = __ldg(reinterpret_cast<const float2*>(address));
+  return std::complex<float>(mem.x, mem.y);
+#else
+  return *address;
+#endif
+}
+
+template <>
+__device__ __host__ inline std::complex<double> ldg(
+    const std::complex<double>* address) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  double2 mem = __ldg(reinterpret_cast<const double2*>(address));
+  return std::complex<double>(mem.x, mem.y);
+#else
+  return *address;
+#endif
+}
+
+template <>
+__device__ __host__ inline Eigen::half ldg(const Eigen::half* address) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  return Eigen::half_impl::raw_uint16_to_half(
+      __ldg(reinterpret_cast<const uint16_t*>(address)));
+#else
+  return *address;
+#endif
+}
+
+template <>
+__device__ __host__ inline bool ldg(const bool* address) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  return *reinterpret_cast<const bool*>(
+      __ldg(reinterpret_cast<const char*>(address)));
+#else
+  return *address;
+#endif
+}
+
+// CUDA provides atomic ops, but not for all types.  We provide wrappers
+// for some ops and provide implementation for all reasonable types.
+#define CUDA_ATOMIC_WRAPPER(op, T) \
+  __device__ __forceinline__ T CudaAtomic##op(T* address, T val)
+
+#define USE_CUDA_ATOMIC(op, T) \
+  CUDA_ATOMIC_WRAPPER(op, T) { return atomic##op(address, val); }
+
+// For atomicAdd.
+USE_CUDA_ATOMIC(Add, int32);
+USE_CUDA_ATOMIC(Add, uint32);
+USE_CUDA_ATOMIC(Add, uint64);
+USE_CUDA_ATOMIC(Add, float);
+
+// For atomicMax.
+USE_CUDA_ATOMIC(Max, int32);
+USE_CUDA_ATOMIC(Max, uint32);
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+USE_CUDA_ATOMIC(Max, uint64);
+#else
+// The uint64 overload of atomicMax() is only available for __CUDA_ARCH__ >=
+// 350.  If not satisfied, we provide a custom implementation using atomicCAS().
+CUDA_ATOMIC_WRAPPER(Max, uint64) {
+  uint64* address_as_ull = reinterpret_cast<uint64*>(address);
+  uint64 old = *address_as_ull, assumed;
+
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed, max(val, assumed));
+  } while (assumed != old);
+
+  return old;
+}
+#endif
+
+// Custom implementation of atomicAdd for double.
+// This implementation is copied from CUDA manual.
+CUDA_ATOMIC_WRAPPER(Add, double) {
+  uint64* address_as_ull = reinterpret_cast<uint64*>(address);
+  uint64 old = *address_as_ull, assumed;
+
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __double_as_longlong(val + __longlong_as_double(assumed)));
+
+    // Note: uses integer comparison to avoid hang in case of NaN
+  } while (assumed != old);
+
+  return __longlong_as_double(old);
+}
+
+// Custom implementation of atomicAdd for std::complex<float>.
+// This implementation performs to atomic additions on the components.
+CUDA_ATOMIC_WRAPPER(Add, std::complex<float>) {
+#if defined(__CUDA_ARCH__)
+#if __CUDA_ARCH__ >= 350
+  float2* addr_as_float2 = reinterpret_cast<float2*>(address);
+  float2* val_as_float2 = reinterpret_cast<float2*>(&val);
+  CudaAtomicAdd(&(addr_as_float2->x), val_as_float2->x);
+  CudaAtomicAdd(&(addr_as_float2->y), val_as_float2->y);
+#else
+  static_assert(sizeof(std::complex<float>) == 2 * sizeof(float),
+                "Unable to compile CudaAtomicAdd for complex64 because "
+                "sizeof(complex64) != 2*sizeof(float32)");
+  float* addr_as_float = reinterpret_cast<float*>(address);
+  float* val_as_float = reinterpret_cast<float*>(&val);
+  CudaAtomicAdd(addr_as_float, *val_as_float);
+  CudaAtomicAdd(addr_as_float + 1, *(val_as_float + 1));
+#endif
+#endif
+  return *address;
+}
+
+// Custom implementation of atomicAdd for std::complex<double>.
+// This implementation performs to atomic additions on the components
+// using the double atomic wrapper above.
+CUDA_ATOMIC_WRAPPER(Add, complex128) {
+#if defined(__CUDA_ARCH__)
+#if __CUDA_ARCH__ >= 350
+  double2* addr_as_double2 = reinterpret_cast<double2*>(address);
+  double2* val_as_double2 = reinterpret_cast<double2*>(&val);
+  CudaAtomicAdd(&(addr_as_double2->x), val_as_double2->x);
+  CudaAtomicAdd(&(addr_as_double2->y), val_as_double2->y);
+#else
+  static_assert(sizeof(std::complex<double>) == 2 * sizeof(double),
+                "Unable to compile CudaAtomicAdd for complex128 because "
+                "sizeof(complex128) != 2*sizeof(float64)");
+  double* addr_as_double = reinterpret_cast<double*>(address);
+  double* val_as_double = reinterpret_cast<double*>(&val);
+  CudaAtomicAdd(addr_as_double, *val_as_double);
+  CudaAtomicAdd(addr_as_double + 1, *(val_as_double + 1));
+#endif
+#endif
+  return *address;
+}
+
+// Helper functions for CudaAtomicAdd(half*, half), below.
+//
+// Note that if __CUDA_ARCH__ >= 530, we could probably use __hadd2()
+// for a more efficient implementation, assuming that adding -0.0
+// will never harm the neighboring value. In this version, we take special
+// care to guarantee the bits of the untouched value are unchanged.
+inline __device__ uint32 add_to_low_half(uint32 val, float x) {
+  Eigen::half low_half;
+  low_half.x = static_cast<uint16>(val & 0xffffu);
+  low_half = static_cast<Eigen::half>(static_cast<float>(low_half) + x);
+  return (val & 0xffff0000u) | low_half.x;
+}
+
+inline __device__ uint32 add_to_high_half(uint32 val, float x) {
+  Eigen::half high_half;
+  high_half.x = static_cast<uint16>(val >> 16);
+  high_half = static_cast<Eigen::half>(static_cast<float>(high_half) + x);
+  return (val & 0xffffu) | (high_half.x << 16);
+}
+
+// Custom implementation of atomicAdd for half. Note that we don't have
+// atomicCAS() for anything less than 32 bits, so we need to include the
+// other 16 bits in the operation.
+//
+// Unlike the other atomic adds, this version is going to be very slow
+// under high concurrency, since most threads will be spinning on failing
+// their compare-and-swap tests. (The fact that we get false sharing on the
+// neighboring fp16 makes this even worse.) If you are doing a large reduction,
+// you are much better off with doing the intermediate steps in fp32 and then
+// switching to fp16 as late as you can in the calculations.
+//
+// Note: Assumes little endian.
+CUDA_ATOMIC_WRAPPER(Add, Eigen::half) {
+  float val_as_float(val);
+  intptr_t address_int = reinterpret_cast<intptr_t>(address);
+  if ((address_int & 0x2) == 0) {
+    // The half is in the first part of the uint32 (lower 16 bits).
+    uint32* address_as_uint32 = reinterpret_cast<uint32*>(address);
+    assert(((intptr_t)address_as_uint32 & 0x3) == 0);
+    uint32 old = *address_as_uint32, assumed;
+
+    do {
+      assumed = old;
+      old = atomicCAS(address_as_uint32, assumed,
+                      add_to_low_half(assumed, val_as_float));
+
+      // Note: uses integer comparison to avoid hang in case of NaN
+    } while (assumed != old);
+
+    Eigen::half ret;
+    ret.x = old & 0xffffu;
+    return ret;
+  } else {
+    // The half is in the second part of the uint32 (upper 16 bits).
+    uint32* address_as_uint32 = reinterpret_cast<uint32*>(address_int - 2);
+    assert(((intptr_t)address_as_uint32 & 0x3) == 0);
+    uint32 old = *address_as_uint32, assumed;
+
+    do {
+      assumed = old;
+      old = atomicCAS(address_as_uint32, assumed,
+                      add_to_high_half(assumed, val_as_float));
+
+      // Note: uses integer comparison to avoid hang in case of NaN
+    } while (assumed != old);
+
+    Eigen::half ret;
+    ret.x = old >> 16;
+    return ret;
+  }
+}
+
+template <typename T>
+__global__ void SetZero(const int nthreads, T* bottom_diff) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) { *(bottom_diff + index) = T(0); }
+}
+
+// For atomicSub.
+
+// Custom implementation for sub by just negating the value.
+#define WRAPPED_ATOMIC_SUB(T) \
+  CUDA_ATOMIC_WRAPPER(Sub, T) { return CudaAtomicAdd(address, -val); }
+
+WRAPPED_ATOMIC_SUB(uint64);
+WRAPPED_ATOMIC_SUB(int32);
+WRAPPED_ATOMIC_SUB(uint32);
+WRAPPED_ATOMIC_SUB(Eigen::half);
+WRAPPED_ATOMIC_SUB(float);
+WRAPPED_ATOMIC_SUB(double);
+
+CUDA_ATOMIC_WRAPPER(Sub, complex64) {
+  const std::complex<float> Tneg(-val.real(), -val.imag());
+  return CudaAtomicAdd(address, Tneg);
+}
+
+CUDA_ATOMIC_WRAPPER(Sub, complex128) {
+  const std::complex<double> Tneg(-val.real(), -val.imag());
+  return CudaAtomicAdd(address, Tneg);
+}
+
+#undef WRAPPED_ATOMIC_SUB
+
+// For atomicMul.
+CUDA_ATOMIC_WRAPPER(Mul, int32) {
+  int32 old = *address, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address, assumed, val * assumed);
+  } while (assumed != old);
+  return old;
+}
+
+CUDA_ATOMIC_WRAPPER(Mul, uint32) {
+  uint32 old = *address, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address, assumed, val * assumed);
+  } while (assumed != old);
+  return old;
+}
+
+CUDA_ATOMIC_WRAPPER(Mul, uint64) {
+  uint64 old = *address, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address, assumed, val * assumed);
+  } while (assumed != old);
+  return old;
+}
+
+CUDA_ATOMIC_WRAPPER(Mul, float) {
+  int32* address_as_int = reinterpret_cast<int32*>(address);
+  int32 old = *address_as_int, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_int, assumed,
+                    __float_as_int(val * __int_as_float(assumed)));
+  } while (assumed != old);
+  return __int_as_float(old);
+}
+
+CUDA_ATOMIC_WRAPPER(Mul, double) {
+  uint64* address_as_ull = reinterpret_cast<uint64*>(address);
+  uint64 old = *address_as_ull, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __double_as_longlong(val * __longlong_as_double(assumed)));
+  } while (assumed != old);
+  return __longlong_as_double(old);
+}
+
+// For atomicDiv.
+CUDA_ATOMIC_WRAPPER(Div, int32) {
+  int32 old = *address, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address, assumed, assumed / val);
+  } while (assumed != old);
+  return old;
+}
+
+CUDA_ATOMIC_WRAPPER(Div, uint32) {
+  uint32 old = *address, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address, assumed, assumed / val);
+  } while (assumed != old);
+  return old;
+}
+
+CUDA_ATOMIC_WRAPPER(Div, uint64) {
+  uint64 old = *address, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address, assumed, assumed / val);
+  } while (assumed != old);
+  return old;
+}
+
+CUDA_ATOMIC_WRAPPER(Div, float) {
+  int32* address_as_int = reinterpret_cast<int32*>(address);
+  int32 old = *address_as_int, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_int, assumed,
+                    __float_as_int(__int_as_float(assumed) / val));
+  } while (assumed != old);
+  return __int_as_float(old);
+}
+
+CUDA_ATOMIC_WRAPPER(Div, double) {
+  uint64* address_as_ull = reinterpret_cast<uint64*>(address);
+  uint64 old = *address_as_ull, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __double_as_longlong(__longlong_as_double(assumed) / val));
+  } while (assumed != old);
+  return __longlong_as_double(old);
+}
+
+#undef USE_CUDA_ATOMIC
+#undef CUDA_ATOMIC_WRAPPER
+
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tf_min(const T& x, const T& y) {
+  return x > y ? y : x;
+}
+
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tf_max(const T& x, const T& y) {
+  return x < y ? y : x;
+}
+
+__device__ EIGEN_ALWAYS_INLINE unsigned CudaBallot(unsigned mask,
+                                                   int predicate) {
+  return __ballot_sync(mask, predicate);
+}
+
+template <typename T>
+__device__ EIGEN_ALWAYS_INLINE T CudaShuffle(unsigned mask, T value,
+                                             int srcLane,
+                                             int width = warpSize) {
+  return __shfl_sync(mask, value, srcLane, width);
+}
+
+// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
+// instead of float for lo and hi (which is incorrect with ftz, for example).
+// A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
+// TODO(csigg): remove when the bug is fixed in the next CUDA release.
+__device__ EIGEN_ALWAYS_INLINE double CudaShuffle(unsigned mask, double value,
+                                                  int srcLane,
+                                                  int width = warpSize) {
+  unsigned lo, hi;
+  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
+  hi = __shfl_sync(mask, hi, srcLane, width);
+  lo = __shfl_sync(mask, lo, srcLane, width);
+  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
+  return value;
+}
+
+template <typename T>
+__device__ EIGEN_ALWAYS_INLINE T CudaShuffleUp(unsigned mask, T value,
+                                               int delta,
+                                               int width = warpSize) {
+  return __shfl_up_sync(mask, value, delta, width);
+}
+
+// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
+// instead of float for lo and hi (which is incorrect with ftz, for example).
+// A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
+// TODO(csigg): remove when the bug is fixed in the next CUDA release.
+__device__ EIGEN_ALWAYS_INLINE double CudaShuffleUp(unsigned mask, double value,
+                                                    int delta,
+                                                    int width = warpSize) {
+  unsigned lo, hi;
+  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
+  hi = __shfl_up_sync(mask, hi, delta, width);
+  lo = __shfl_up_sync(mask, lo, delta, width);
+  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
+  return value;
+}
+
+template <typename T>
+__device__ EIGEN_ALWAYS_INLINE T CudaShuffleDown(unsigned mask, T value,
+                                                 int delta,
+                                                 int width = warpSize) {
+  return __shfl_down_sync(mask, value, delta, width);
+}
+
+__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDown(
+    unsigned mask, Eigen::half value, int delta, int width = warpSize) {
+  return Eigen::half(
+      __shfl_down_sync(mask, static_cast<uint16>(value), delta, width));
+}
+
+// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
+// instead of float for lo and hi (which is incorrect with ftz, for example).
+// A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
+// TODO(csigg): remove when the bug is fixed in the next CUDA release.
+__device__ EIGEN_ALWAYS_INLINE double CudaShuffleDown(unsigned mask,
+                                                      double value, int delta,
+                                                      int width = warpSize) {
+  unsigned lo, hi;
+  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
+  hi = __shfl_down_sync(mask, hi, delta, width);
+  lo = __shfl_down_sync(mask, lo, delta, width);
+  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
+  return value;
+}
+
+template <typename T>
+__device__ EIGEN_ALWAYS_INLINE T CudaShuffleXor(unsigned mask, T value,
+                                                int laneMask,
+                                                int width = warpSize) {
+  return __shfl_xor_sync(mask, value, laneMask, width);
+}
+
+__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXor(
+    unsigned mask, Eigen::half value, int laneMask, int width = warpSize) {
+  return Eigen::half(
+      __shfl_xor_sync(mask, static_cast<uint16>(value), laneMask, width));
+}
+
+// Variant of the (undocumented) version from the CUDA SDK, but using unsigned
+// instead of float for lo and hi (which is incorrect with ftz, for example).
+// A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
+// TODO(csigg): remove when the bug is fixed in the next CUDA release.
+__device__ EIGEN_ALWAYS_INLINE double CudaShuffleXor(unsigned mask,
+                                                     double value, int laneMask,
+                                                     int width = warpSize) {
+  unsigned lo, hi;
+  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(value));
+  hi = __shfl_xor_sync(mask, hi, laneMask, width);
+  lo = __shfl_xor_sync(mask, lo, laneMask, width);
+  asm volatile("mov.b64 %0, {%1,%2};" : "=d"(value) : "r"(lo), "r"(hi));
+  return value;
+}
+
 }  // namespace tensorflow
 
+#undef DIV_UP
+
 #endif  // GOOGLE_CUDA
+
 #endif  // TENSORFLOW_CORE_UTIL_CUDA_KERNEL_HELPER_H_
diff --git a/tensorflow/core/util/cuda_kernel_helper_test.cu.cc b/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
index 4eb1558e58..6991554eff 100644
--- a/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
+++ b/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
@@ -52,11 +52,11 @@ __global__ void Count1D(CudaLaunchConfig config, int bufsize, int* outbuf) {
   }
 }
 __global__ void Count2D(Cuda2DLaunchConfig config, int bufsize, int* outbuf) {
-  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count.x, X) {
+  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
     if (x < 0) {  // x might overflow when testing extreme case
       break;
     }
-    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count.y, Y) {
+    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
       if (y < 0) {  // y might overflow when testing extreme case
         break;
       }
@@ -66,15 +66,15 @@ __global__ void Count2D(Cuda2DLaunchConfig config, int bufsize, int* outbuf) {
   }
 }
 __global__ void Count3D(Cuda3DLaunchConfig config, int bufsize, int* outbuf) {
-  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count.x, X) {
+  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
     if (x < 0) {  // x might overflow when testing extreme case
       break;
     }
-    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count.y, Y) {
+    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
       if (y < 0) {  // y might overflow when testing extreme case
         break;
       }
-      CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count.z, Z) {
+      CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count, z) {
         if (z < 0) {  // z might overflow when testing extreme case
           break;
         }
@@ -94,7 +94,7 @@ class CudaLaunchConfigTest : public ::testing::Test {
   const int bufsize = 1024;
   int* outbuf = nullptr;
   Eigen::CudaStreamDevice stream;
-  Eigen::GpuDevice d = Eigen::GpuDevice(&stream);
+  GPUDevice d = GPUDevice(&stream);
 
   virtual void SetUp() {
     cudaError_t err = cudaMallocManaged(&outbuf, sizeof(int) * bufsize);
diff --git a/tensorflow/core/util/cuda_launch_config.h b/tensorflow/core/util/cuda_launch_config.h
deleted file mode 100644
index 3ea33ee6cf..0000000000
--- a/tensorflow/core/util/cuda_launch_config.h
+++ /dev/null
@@ -1,284 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_UTIL_CUDA_LAUNCH_CONFIG_H_
-#define TENSORFLOW_CORE_UTIL_CUDA_LAUNCH_CONFIG_H_
-
-#if GOOGLE_CUDA
-
-#include <algorithm>
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "cuda/include/cuda.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/stream_executor.h"
-#include "tensorflow/core/platform/types.h"
-
-// Usage of GetCudaLaunchConfig, GetCuda2DLaunchConfig, and
-// GetCuda3DLaunchConfig:
-//
-// There are two versions of GetCudaLaunchConfig and GetCuda2DLaunchConfig, one
-// version uses heuristics without any knowledge of the device kernel, the other
-// version uses cudaOccupancyMaxPotentialBlockSize to determine the theoretical
-// launch parameters that maximize occupancy. Currently, only the maximum
-// occupancy version of GetCuda3DLaunchConfig is available.
-//
-// For large number of work elements, the convention is that each kernel would
-// iterate through its assigned range. The return value of GetCudaLaunchConfig
-// is struct CudaLaunchConfig, which contains all the information needed for the
-// kernel launch, including: virtual number of threads, the number of threads
-// per block and number of threads per block used inside <<< >>> of a kernel
-// launch. GetCuda2DLaunchConfig and GetCuda3DLaunchConfig does the same thing
-// as CudaLaunchConfig. The only difference is the dimension. The macros
-// CUDA_1D_KERNEL_LOOP and CUDA_AXIS_KERNEL_LOOP might be used to do inner loop.
-//
-/* Sample code:
-
-__global__ void MyKernel1D(CudaLaunchConfig config, other_args...) {
-  CUDA_1D_KERNEL_LOOP(x, config.virtual_thread_count) {
-    do_your_job_here;
-  }
-}
-
-__global__ void MyKernel2D(Cuda2DLaunchConfig config, other_args...) {
-  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
-    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
-      do_your_job_here;
-    }
-  }
-}
-
-__global__ void MyKernel3D(Cuda3DLaunchConfig config, other_args...) {
-  CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) {
-    CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) {
-      CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count, z) {
-        do_your_job_here;
-      }
-    }
-  }
-}
-
-void MyDriverFunc(const Eigen::GpuDevice &d) {
-  // use heuristics
-  CudaLaunchConfig cfg1 = GetCudaLaunchConfig(10240, d);
-  MyKernel1D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg1, other_args...);
-  Cuda2DLaunchConfig cfg2 = GetCuda2DLaunchConfig(10240, 10240, d);
-  MyKernel2D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg2, other_args...);
-  Cuda3DLaunchConfig cfg3 = GetCuda3DLaunchConfig(4096, 4096, 100, d);
-  MyKernel3D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg3, other_args...);
-
-  // maximize occupancy
-  CudaLaunchConfig cfg4 = GetCudaLaunchConfig(10240, d, MyKernel1D, 0, 0 );
-  MyKernel1D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg4, other_args...);
-  Cuda2DLaunchConfig cfg5 = GetCuda2DLaunchConfig(10240, 10240, d,
-                                                  MyKernel1D, 0, 0);
-  MyKernel2D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg5, other_args...);
-  Cuda3DLaunchConfig cfg6 = GetCuda3DLaunchConfig(4096, 4096, 100, d,
-                                                  MyKernel1D, 0, 0);
-  MyKernel3D <<<config.block_count,
-                config.thread_per_block, 0, d.stream()>>> (cfg6, other_args...);
-}
-
-// See the test for this for more example:
-//
-https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
-
-*/
-
-namespace tensorflow {
-
-inline int DivUp(int a, int b) { return (a + b - 1) / b; }
-
-struct CudaLaunchConfig {
-  // Logical number of thread that works on the elements. If each logical
-  // thread works on exactly a single element, this is the same as the working
-  // element count.
-  int virtual_thread_count = -1;
-  // Number of threads per block.
-  int thread_per_block = -1;
-  // Number of blocks for Cuda kernel launch.
-  int block_count = -1;
-};
-
-// Calculate the Cuda launch config we should use for a kernel launch.
-// This is assuming the kernel is quite simple and will largely be
-// memory-limited.
-// REQUIRES: work_element_count > 0.
-inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
-                                            const Eigen::GpuDevice& d) {
-  CHECK_GT(work_element_count, 0);
-  CudaLaunchConfig config;
-  const int virtual_thread_count = work_element_count;
-  const int physical_thread_count = std::min(
-      d.getNumCudaMultiProcessors() * d.maxCudaThreadsPerMultiProcessor(),
-      virtual_thread_count);
-  const int thread_per_block = std::min(1024, d.maxCudaThreadsPerBlock());
-  const int block_count =
-      std::min(DivUp(physical_thread_count, thread_per_block),
-               d.getNumCudaMultiProcessors());
-
-  config.virtual_thread_count = virtual_thread_count;
-  config.thread_per_block = thread_per_block;
-  config.block_count = block_count;
-  return config;
-}
-
-// Calculate the Cuda launch config we should use for a kernel launch. This
-// variant takes the resource limits of func into account to maximize occupancy.
-// REQUIRES: work_element_count > 0.
-template <typename DeviceFunc>
-inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
-                                            const Eigen::GpuDevice& d,
-                                            DeviceFunc func,
-                                            size_t dynamic_shared_memory_size,
-                                            int block_size_limit) {
-  CHECK_GT(work_element_count, 0);
-  CudaLaunchConfig config;
-  int block_count = 0;
-  int thread_per_block = 0;
-
-  cudaError_t err = cudaOccupancyMaxPotentialBlockSize(
-      &block_count, &thread_per_block, func, dynamic_shared_memory_size,
-      block_size_limit);
-  CHECK_EQ(err, cudaSuccess);
-
-  block_count =
-      std::min(block_count, DivUp(work_element_count, thread_per_block));
-
-  config.virtual_thread_count = work_element_count;
-  config.thread_per_block = thread_per_block;
-  config.block_count = block_count;
-  return config;
-}
-
-struct Cuda2DLaunchConfig {
-  dim3 virtual_thread_count = dim3(0, 0, 0);
-  dim3 thread_per_block = dim3(0, 0, 0);
-  dim3 block_count = dim3(0, 0, 0);
-};
-
-inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
-                                                const Eigen::GpuDevice& d) {
-  Cuda2DLaunchConfig config;
-
-  if (xdim <= 0 || ydim <= 0) {
-    return config;
-  }
-
-  const int kThreadsPerBlock = 256;
-  int block_cols = std::min(xdim, kThreadsPerBlock);
-  // ok to round down here and just do more loops in the kernel
-  int block_rows = std::max(kThreadsPerBlock / block_cols, 1);
-
-  const int physical_thread_count =
-      d.getNumCudaMultiProcessors() * d.maxCudaThreadsPerMultiProcessor();
-
-  const int max_blocks = std::max(physical_thread_count / kThreadsPerBlock, 1);
-
-  config.virtual_thread_count = dim3(xdim, ydim, 1);
-  config.thread_per_block = dim3(block_cols, block_rows, 1);
-
-  int grid_x = std::min(DivUp(xdim, block_cols), max_blocks);
-
-  config.block_count = dim3(
-      grid_x, std::min(max_blocks / grid_x, std::max(ydim / block_rows, 1)), 1);
-  return config;
-}
-
-// Calculate the Cuda 2D and 3D launch config we should use for a kernel launch.
-// This variant takes the resource limits of func into account to maximize
-// occupancy.
-using Cuda3DLaunchConfig = Cuda2DLaunchConfig;
-
-template <typename DeviceFunc>
-inline Cuda3DLaunchConfig GetCuda3DLaunchConfig(
-    int xdim, int ydim, int zdim, const Eigen::GpuDevice& d, DeviceFunc func,
-    size_t dynamic_shared_memory_size, int block_size_limit) {
-  Cuda3DLaunchConfig config;
-
-  if (xdim <= 0 || ydim <= 0 || zdim <= 0) {
-    return config;
-  }
-
-  int dev;
-  cudaGetDevice(&dev);
-  cudaDeviceProp deviceProp;
-  cudaGetDeviceProperties(&deviceProp, dev);
-  int xthreadlimit = deviceProp.maxThreadsDim[0];
-  int ythreadlimit = deviceProp.maxThreadsDim[1];
-  int zthreadlimit = deviceProp.maxThreadsDim[2];
-  int xgridlimit = deviceProp.maxGridSize[0];
-  int ygridlimit = deviceProp.maxGridSize[1];
-  int zgridlimit = deviceProp.maxGridSize[2];
-
-  int block_count = 0;
-  int thread_per_block = 0;
-  cudaError_t err = cudaOccupancyMaxPotentialBlockSize(
-      &block_count, &thread_per_block, func, dynamic_shared_memory_size,
-      block_size_limit);
-  CHECK_EQ(err, cudaSuccess);
-
-  auto min3 = [](int a, int b, int c) { return std::min(a, std::min(b, c)); };
-
-  int threadsx = min3(xdim, thread_per_block, xthreadlimit);
-  int threadsy =
-      min3(ydim, std::max(thread_per_block / threadsx, 1), ythreadlimit);
-  int threadsz =
-      min3(zdim, std::max(thread_per_block / (threadsx * threadsy), 1),
-           zthreadlimit);
-
-  int blocksx = min3(block_count, DivUp(xdim, threadsx), xgridlimit);
-  int blocksy =
-      min3(DivUp(block_count, blocksx), DivUp(ydim, threadsy), ygridlimit);
-  int blocksz = min3(DivUp(block_count, (blocksx * blocksy)),
-                     DivUp(zdim, threadsz), zgridlimit);
-
-  config.virtual_thread_count = dim3(xdim, ydim, zdim);
-  config.thread_per_block = dim3(threadsx, threadsy, threadsz);
-  config.block_count = dim3(blocksx, blocksy, blocksz);
-  return config;
-}
-
-template <typename DeviceFunc>
-inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(
-    int xdim, int ydim, const Eigen::GpuDevice& d, DeviceFunc func,
-    size_t dynamic_shared_memory_size, int block_size_limit) {
-  return GetCuda3DLaunchConfig(xdim, ydim, 1, d, func,
-                               dynamic_shared_memory_size, block_size_limit);
-}
-
-// Returns a raw reference to the current cuda stream.  Required by a
-// number of kernel calls (for which StreamInterface* does not work), i.e.
-// CUB and certain cublas primitives.
-inline const cudaStream_t& GetCudaStream(OpKernelContext* context) {
-  const cudaStream_t* ptr = CHECK_NOTNULL(
-      reinterpret_cast<const cudaStream_t*>(context->op_device_context()
-                                                ->stream()
-                                                ->implementation()
-                                                ->CudaStreamMemberHack()));
-  return *ptr;
-}
-
-}  // namespace tensorflow
-
-#endif  // GOOGLE_CUDA
-
-#endif  // TENSORFLOW_CORE_UTIL_CUDA_KERNEL_HELPER_H_
-- 
GitLab


From 6affacedbbea1a55cf4b7e33f881c6cd8c3c2493 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Tue, 5 Dec 2017 11:59:17 -0800
Subject: [PATCH 0644/1225] Add android rule helpers and cleanup input loops

This change teaches the configure script how to search for Android NDK
and SDK installations and create new WORKSPACE rules pointing to them.
It also refactors many similar loop-over-user-input functions into using
a reusable method (not the more complex ones).

Specifying an SDK directory will further query for the available SDK API
levels and build tools versions, but it won't perform any compatibility
checks.

Like other settings, every android-related setting can be set beforehand
via an env param. The script will not ask for any Android settings if
there are already any android repository rules in the WORKSPACE.

The script will emit a warning if using an NDK version newer than 14 due
to https://github.com/bazelbuild/bazel/issues/4068.

PiperOrigin-RevId: 177989785
---
 configure.py | 407 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 312 insertions(+), 95 deletions(-)

diff --git a/configure.py b/configure.py
index 1f205861f1..99c0a8d321 100644
--- a/configure.py
+++ b/configure.py
@@ -34,6 +34,8 @@ except ImportError:
 
 _TF_BAZELRC = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                            '.tf_configure.bazelrc')
+_TF_WORKSPACE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                             'WORKSPACE')
 _DEFAULT_CUDA_VERSION = '8.0'
 _DEFAULT_CUDNN_VERSION = '6'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2'
@@ -44,6 +46,13 @@ _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing '
 _TF_OPENCL_VERSION = '1.2'
 _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp'
 _DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include'
+_SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15]
+
+_DEFAULT_PROMPT_ASK_ATTEMPTS = 10
+
+
+class UserInputError(Exception):
+  pass
 
 
 def is_windows():
@@ -158,7 +167,7 @@ def get_python_path(environ_cp, python_bin_path):
   try:
     library_paths = run_shell(
         [python_bin_path, '-c',
-         'import site; print("\\n".join(site.getsitepackages()))']).split("\n")
+         'import site; print("\\n".join(site.getsitepackages()))']).split('\n')
   except subprocess.CalledProcessError:
     library_paths = [run_shell(
         [python_bin_path, '-c',
@@ -557,6 +566,218 @@ def set_clang_cuda_compiler_path(environ_cp):
                               clang_cuda_compiler_path)
 
 
+def prompt_loop_or_load_from_env(
+    environ_cp,
+    var_name,
+    var_default,
+    ask_for_var,
+    check_success,
+    error_msg,
+    suppress_default_error=False,
+    n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS
+):
+  """Loop over user prompts for an ENV param until receiving a valid response.
+
+  For the env param var_name, read from the environment or verify user input
+  until receiving valid input. When done, set var_name in the environ_cp to its
+  new value.
+
+  Args:
+    environ_cp: (Dict) copy of the os.environ.
+    var_name: (String) string for name of environment variable, e.g. "TF_MYVAR".
+    var_default: (String) default value string.
+    ask_for_var: (String) string for how to ask for user input.
+    check_success: (Function) function that takes one argument and returns a
+      boolean. Should return True if the value provided is considered valid. May
+      contain a complex error message if error_msg does not provide enough
+      information. In that case, set suppress_default_error to True.
+    error_msg: (String) String with one and only one '%s'. Formatted with each
+      invalid response upon check_success(input) failure.
+    suppress_default_error: (Bool) Suppress the above error message in favor of
+      one from the check_success function.
+    n_ask_attempts: (Integer) Number of times to query for valid input before
+      raising an error and quitting.
+
+  Returns:
+    [String] The value of var_name after querying for input.
+
+  Raises:
+    UserInputError: if a query has been attempted n_ask_attempts times without
+    success, assume that the user has made a scripting error, and will continue
+    to provide invalid input. Raise the error to avoid infinitely looping.
+  """
+  default = environ_cp.get(var_name) or var_default
+  full_query = '%s [Default is %s]: ' % (
+      ask_for_var,
+      default,
+  )
+
+  for _ in range(n_ask_attempts):
+    val = get_from_env_or_user_or_default(environ_cp,
+                                          var_name,
+                                          full_query,
+                                          default)
+    if check_success(val):
+      break
+    if not suppress_default_error:
+      print(error_msg % val)
+    environ_cp[var_name] = ''
+  else:
+    raise UserInputError('Invalid %s setting was provided %d times in a row. '
+                         'Assuming to be a scripting mistake.' %
+                         (var_name, n_ask_attempts))
+
+  environ_cp[var_name] = val
+  return val
+
+
+def create_android_ndk_rule(environ_cp):
+  """Set ANDROID_NDK_HOME and write Android NDK WORKSPACE rule."""
+  if is_windows() or is_cygwin():
+    default_ndk_path = cygpath('%s/Android/Sdk/ndk-bundle' %
+                               environ_cp['APPDATA'])
+  elif is_macos():
+    default_ndk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME']
+  else:
+    default_ndk_path = '%s/Android/Sdk/ndk-bundle' % environ_cp['HOME']
+
+  def valid_ndk_path(path):
+    return (os.path.exists(path) and
+            os.path.exists(os.path.join(path, 'source.properties')))
+
+  android_ndk_home_path = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='ANDROID_NDK_HOME',
+      var_default=default_ndk_path,
+      ask_for_var='Please specify the home path of the Android NDK to use.',
+      check_success=valid_ndk_path,
+      error_msg=('The path %s or its child file "source.properties" '
+                 'does not exist.')
+  )
+
+  write_android_ndk_workspace_rule(android_ndk_home_path)
+
+
+def create_android_sdk_rule(environ_cp):
+  """Set Android variables and write Android SDK WORKSPACE rule."""
+  if is_windows() or is_cygwin():
+    default_sdk_path = cygpath('%s/Android/Sdk' % environ_cp['APPDATA'])
+  elif is_macos():
+    default_sdk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME']
+  else:
+    default_sdk_path = '%s/Android/Sdk' % environ_cp['HOME']
+
+  def valid_sdk_path(path):
+    return (os.path.exists(path) and
+            os.path.exists(os.path.join(path, 'platforms')) and
+            os.path.exists(os.path.join(path, 'build-tools')))
+
+  android_sdk_home_path = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='ANDROID_SDK_HOME',
+      var_default=default_sdk_path,
+      ask_for_var='Please specify the home path of the Android SDK to use.',
+      check_success=valid_sdk_path,
+      error_msg=('Either %s does not exist, or it does not contain the '
+                 'subdirectories "platforms" and "build-tools".'))
+
+  platforms = os.path.join(android_sdk_home_path, 'platforms')
+  api_levels = sorted(os.listdir(platforms))
+  api_levels = [x.replace('android-', '') for x in api_levels]
+
+  def valid_api_level(api_level):
+    return os.path.exists(os.path.join(android_sdk_home_path,
+                                       'platforms',
+                                       'android-' + api_level))
+
+  android_api_level = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='ANDROID_API_LEVEL',
+      var_default=api_levels[-1],
+      ask_for_var=('Please specify the Android SDK API level to use. '
+                   '[Available levels: %s]') % api_levels,
+      check_success=valid_api_level,
+      error_msg='Android-%s is not present in the SDK path.')
+
+  build_tools = os.path.join(android_sdk_home_path, 'build-tools')
+  versions = sorted(os.listdir(build_tools))
+
+  def valid_build_tools(version):
+    return os.path.exists(os.path.join(android_sdk_home_path,
+                                       'build-tools',
+                                       version))
+
+  android_build_tools_version = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='ANDROID_BUILD_TOOLS_VERSION',
+      var_default=versions[-1],
+      ask_for_var=('Please specify an Android build tools version to use. '
+                   '[Available versions: %s]') % versions,
+      check_success=valid_build_tools,
+      error_msg=('The selected SDK does not have build-tools version %s '
+                 'available.'))
+
+  write_android_sdk_workspace_rule(android_sdk_home_path,
+                                   android_build_tools_version,
+                                   android_api_level)
+
+
+def write_android_sdk_workspace_rule(android_sdk_home_path,
+                                     android_build_tools_version,
+                                     android_api_level):
+  print('Writing android_sdk_workspace rule.\n')
+  with open(_TF_WORKSPACE, 'a') as f:
+    f.write("""
+android_sdk_repository(
+  name="androidsdk",
+  api_level=%s,
+  path="%s",
+  build_tools_version="%s")\n
+""" % (android_api_level, android_sdk_home_path, android_build_tools_version))
+
+
+def write_android_ndk_workspace_rule(android_ndk_home_path):
+  print('Writing android_ndk_workspace rule.')
+  ndk_api_level = check_ndk_level(android_ndk_home_path)
+  if int(ndk_api_level) not in _SUPPORTED_ANDROID_NDK_VERSIONS:
+    print('WARNING: The API level of the NDK in %s is %s, which is not '
+          'supported by Bazel (officially supported versions: %s). Please use '
+          'another version. Compiling Android targets may result in confusing '
+          'errors.\n' % (android_ndk_home_path, ndk_api_level,
+                         _SUPPORTED_ANDROID_NDK_VERSIONS))
+  with open(_TF_WORKSPACE, 'a') as f:
+    f.write("""
+android_ndk_repository(
+  name="androidndk",
+  path="%s",
+  api_level=%s)\n
+""" % (android_ndk_home_path, ndk_api_level))
+
+
+def check_ndk_level(android_ndk_home_path):
+  """Check the revision number of an Android NDK path."""
+  properties_path = '%s/source.properties' % android_ndk_home_path
+  if is_windows() or is_cygwin():
+    properties_path = cygpath(properties_path)
+  with open(properties_path, 'r') as f:
+    filedata = f.read()
+
+  revision = re.search(r'Pkg.Revision = (\d+)', filedata)
+  if revision:
+    return revision.group(1)
+  return None
+
+
+def workspace_has_any_android_rule():
+  """Check the WORKSPACE for existing android_*_repository rules."""
+  with open(_TF_WORKSPACE, 'r') as f:
+    workspace = f.read()
+  has_any_rule = re.search(r'^android_[ns]dk_repository',
+                           workspace,
+                           re.MULTILINE)
+  return has_any_rule
+
+
 def set_gcc_host_compiler_path(environ_cp):
   """Set GCC_HOST_COMPILER_PATH."""
   default_gcc_host_compiler_path = which('gcc') or ''
@@ -566,23 +787,16 @@ def set_gcc_host_compiler_path(environ_cp):
     # os.readlink is only available in linux
     default_gcc_host_compiler_path = os.path.realpath(cuda_bin_symlink)
 
-  ask_gcc_path = (
-      'Please specify which gcc should be used by nvcc as the '
-      'host compiler. [Default is %s]: ') % default_gcc_host_compiler_path
-  while True:
-    gcc_host_compiler_path = get_from_env_or_user_or_default(
-        environ_cp, 'GCC_HOST_COMPILER_PATH', ask_gcc_path,
-        default_gcc_host_compiler_path)
-
-    if os.path.exists(gcc_host_compiler_path):
-      break
-
-    # Reset and retry
-    print('Invalid gcc path. %s cannot be found' % gcc_host_compiler_path)
-    environ_cp['GCC_HOST_COMPILER_PATH'] = ''
+  gcc_host_compiler_path = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='GCC_HOST_COMPILER_PATH',
+      var_default=default_gcc_host_compiler_path,
+      ask_for_var=
+      'Please specify which gcc should be used by nvcc as the host compiler.',
+      check_success=os.path.exists,
+      error_msg='Invalid gcc path. %s cannot be found.',
+  )
 
-  # Set GCC_HOST_COMPILER_PATH
-  environ_cp['GCC_HOST_COMPILER_PATH'] = gcc_host_compiler_path
   write_action_env_to_bazelrc('GCC_HOST_COMPILER_PATH', gcc_host_compiler_path)
 
 
@@ -810,124 +1024,110 @@ def set_other_cuda_vars(environ_cp):
 def set_host_cxx_compiler(environ_cp):
   """Set HOST_CXX_COMPILER."""
   default_cxx_host_compiler = which('g++') or ''
-  ask_cxx_host_compiler = (
-      'Please specify which C++ compiler should be used as'
-      ' the host C++ compiler. [Default is %s]: ') % default_cxx_host_compiler
 
-  while True:
-    host_cxx_compiler = get_from_env_or_user_or_default(
-        environ_cp, 'HOST_CXX_COMPILER', ask_cxx_host_compiler,
-        default_cxx_host_compiler)
-    if os.path.exists(host_cxx_compiler):
-      break
-
-    # Reset and retry
-    print('Invalid C++ compiler path. %s cannot be found' % host_cxx_compiler)
-    environ_cp['HOST_CXX_COMPILER'] = ''
+  host_cxx_compiler = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='HOST_CXX_COMPILER',
+      var_default=default_cxx_host_compiler,
+      ask_for_var=('Please specify which C++ compiler should be used as the '
+                   'host C++ compiler.'),
+      check_success=os.path.exists,
+      error_msg='Invalid C++ compiler path. %s cannot be found.',
+  )
 
-  # Set HOST_CXX_COMPILER
-  environ_cp['HOST_CXX_COMPILER'] = host_cxx_compiler
   write_action_env_to_bazelrc('HOST_CXX_COMPILER', host_cxx_compiler)
 
 
 def set_host_c_compiler(environ_cp):
   """Set HOST_C_COMPILER."""
   default_c_host_compiler = which('gcc') or ''
-  ask_c_host_compiler = (
-      'Please specify which C compiler should be used as the'
-      ' host C compiler. [Default is %s]: ') % default_c_host_compiler
-
-  while True:
-    host_c_compiler = get_from_env_or_user_or_default(
-        environ_cp, 'HOST_C_COMPILER', ask_c_host_compiler,
-        default_c_host_compiler)
-    if os.path.exists(host_c_compiler):
-      break
 
-    # Reset and retry
-    print('Invalid C compiler path. %s cannot be found' % host_c_compiler)
-    environ_cp['HOST_C_COMPILER'] = ''
+  host_c_compiler = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='HOST_C_COMPILER',
+      var_default=default_c_host_compiler,
+      ask_for_var=('Please specify which C compiler should be used as the host'
+                   'C compiler.'),
+      check_success=os.path.exists,
+      error_msg='Invalid C compiler path. %s cannot be found.',
+  )
 
-  # Set HOST_C_COMPILER
-  environ_cp['HOST_C_COMPILER'] = host_c_compiler
   write_action_env_to_bazelrc('HOST_C_COMPILER', host_c_compiler)
 
 
 def set_computecpp_toolkit_path(environ_cp):
   """Set COMPUTECPP_TOOLKIT_PATH."""
-  ask_computecpp_toolkit_path = ('Please specify the location where ComputeCpp '
-                                 'for SYCL %s is installed. [Default is %s]: '
-                                ) % (_TF_OPENCL_VERSION,
-                                     _DEFAULT_COMPUTECPP_TOOLKIT_PATH)
 
-  while True:
-    computecpp_toolkit_path = get_from_env_or_user_or_default(
-        environ_cp, 'COMPUTECPP_TOOLKIT_PATH', ask_computecpp_toolkit_path,
-        _DEFAULT_COMPUTECPP_TOOLKIT_PATH)
+  def toolkit_exists(toolkit_path):
+    """Check if a computecpp toolkit path is valid."""
     if is_linux():
       sycl_rt_lib_path = 'lib/libComputeCpp.so'
     else:
       sycl_rt_lib_path = ''
 
-    sycl_rt_lib_path_full = os.path.join(computecpp_toolkit_path,
+    sycl_rt_lib_path_full = os.path.join(toolkit_path,
                                          sycl_rt_lib_path)
-    if os.path.exists(sycl_rt_lib_path_full):
-      break
+    exists = os.path.exists(sycl_rt_lib_path_full)
+    if not exists:
+      print('Invalid SYCL %s library path. %s cannot be found' %
+            (_TF_OPENCL_VERSION, sycl_rt_lib_path_full))
+    return exists
 
-    print('Invalid SYCL %s library path. %s cannot be found' %
-          (_TF_OPENCL_VERSION, sycl_rt_lib_path_full))
-    environ_cp['COMPUTECPP_TOOLKIT_PATH'] = ''
+  computecpp_toolkit_path = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='COMPUTECPP_TOOLKIT_PATH',
+      var_default=_DEFAULT_COMPUTECPP_TOOLKIT_PATH,
+      ask_for_var=(
+          'Please specify the location where ComputeCpp for SYCL %s is '
+          'installed.' % _TF_OPENCL_VERSION),
+      check_success=toolkit_exists,
+      error_msg='Invalid SYCL compiler path. %s cannot be found.',
+      suppress_default_error=True)
 
-  # Set COMPUTECPP_TOOLKIT_PATH
-  environ_cp['COMPUTECPP_TOOLKIT_PATH'] = computecpp_toolkit_path
   write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
                               computecpp_toolkit_path)
 
 
 def set_trisycl_include_dir(environ_cp):
   """Set TRISYCL_INCLUDE_DIR."""
-  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
-                             'include directory. (Use --config=sycl_trisycl '
-                             'when building with Bazel) '
-                             '[Default is %s]: ') % (
-                                 _DEFAULT_TRISYCL_INCLUDE_DIR)
-  while True:
-    trisycl_include_dir = get_from_env_or_user_or_default(
-        environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
-        _DEFAULT_TRISYCL_INCLUDE_DIR)
-    if os.path.exists(trisycl_include_dir):
-      break
 
-    print('Invalid triSYCL include directory, %s cannot be found' %
-          (trisycl_include_dir))
+  trisycl_include_dir = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='TRISYCL_INCLUDE_DIR',
+      var_default=_DEFAULT_TRISYCL_INCLUDE_DIR,
+      ask_for_var=('Please specify the location of the triSYCL include '
+                   'directory. (Use --config=sycl_trisycl when building with '
+                   'Bazel)'),
+      check_success=os.path.exists,
+      error_msg='Invalid trySYCL include directory. %s cannot be found.',
+      suppress_default_error=True)
 
-  # Set TRISYCL_INCLUDE_DIR
-  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
   write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
 
 
 def set_mpi_home(environ_cp):
   """Set MPI_HOME."""
+
   default_mpi_home = which('mpirun') or which('mpiexec') or ''
   default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home))
 
-  ask_mpi_home = ('Please specify the MPI toolkit folder. [Default is %s]: '
-                 ) % default_mpi_home
-  while True:
-    mpi_home = get_from_env_or_user_or_default(environ_cp, 'MPI_HOME',
-                                               ask_mpi_home, default_mpi_home)
-
-    if os.path.exists(os.path.join(mpi_home, 'include')) and os.path.exists(
-        os.path.join(mpi_home, 'lib')):
-      break
-
-    print('Invalid path to the MPI Toolkit. %s or %s cannot be found' %
-          (os.path.join(mpi_home, 'include'),
-           os.path.exists(os.path.join(mpi_home, 'lib'))))
-    environ_cp['MPI_HOME'] = ''
+  def valid_mpi_path(mpi_home):
+    exists = (os.path.exists(os.path.join(mpi_home, 'include')) and
+              os.path.exists(os.path.join(mpi_home, 'lib')))
+    if not exists:
+      print('Invalid path to the MPI Toolkit. %s or %s cannot be found' %
+            (os.path.join(mpi_home, 'include'),
+             os.path.exists(os.path.join(mpi_home, 'lib'))))
+    return exists
 
-  # Set MPI_HOME
-  environ_cp['MPI_HOME'] = str(mpi_home)
+  _ = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='MPI_HOME',
+      var_default=default_mpi_home,
+      ask_for_var='Please specify the MPI toolkit folder.',
+      check_success=valid_mpi_path,
+      error_msg='',
+      suppress_default_error=True)
 
 
 def set_other_mpi_vars(environ_cp):
@@ -970,7 +1170,7 @@ def set_mkl():
       'support.\nPlease note that MKL on MacOS or windows is still not '
       'supported.\nIf you would like to use a local MKL instead of '
       'downloading, please set the environment variable \"TF_MKL_ROOT\" every '
-      'time before build.')
+      'time before build.\n')
 
 
 def set_monolithic():
@@ -1082,5 +1282,22 @@ def main():
   set_monolithic()
   create_android_bazelrc_configs()
 
+  if workspace_has_any_android_rule():
+    print('The WORKSPACE file has at least one of ["android_sdk_repository", '
+          '"android_ndk_repository"] already set. Will not ask to help '
+          'configure the WORKSPACE. Please delete the existing rules to '
+          'activate the helper.\n')
+  else:
+    if get_var(
+        environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace',
+        False,
+        ('Would you like to interactively configure ./WORKSPACE for '
+         'Android builds?'),
+        'Searching for NDK and SDK installations.',
+        'Not configuring the WORKSPACE for Android builds.'):
+      create_android_ndk_rule(environ_cp)
+      create_android_sdk_rule(environ_cp)
+
+
 if __name__ == '__main__':
   main()
-- 
GitLab


From 6e0c6adfe94335e4915e4f9d6c3031e5a07180b1 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Tue, 5 Dec 2017 12:14:16 -0800
Subject: [PATCH 0645/1225] Document tf-coreml converter in lite/README.md

---
 tensorflow/contrib/lite/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index fc9144d5fc..b9828f7d31 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -215,3 +215,7 @@ Note that you'd need to follow instructions for installing TensorFlow on Android
 
 ### For iOS
 Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app.
+
+## Core ML support
+
+Core ML is a machine learning framework used across Apple products. In addition to using Tensorflow Lite models directly in their applications, developers have the option to convert their trained Tensorflow models to the [CoreML](https://developer.apple.com/machine-learning/) format for use on Apple devices. For information on how to use the converter please refer to the [Tensorflow-CoreML converter documentation](https://github.com/tf-coreml/tf-coreml).
-- 
GitLab


From ad30cd2eb0360739bd89b5fa31a9033780901b8b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 12:28:19 -0800
Subject: [PATCH 0646/1225] Internal change.

PiperOrigin-RevId: 177994155
---
 tensorflow/compiler/xla/tests/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index addce9019b..74652a47fb 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -382,6 +382,7 @@ xla_test(
     name = "params_test",
     srcs = ["params_test.cc"],
     shard_count = 30,
+    tags = ["optonly"],
     deps = [
         "//tensorflow/compiler/xla:array2d",
         "//tensorflow/compiler/xla:literal_util",
-- 
GitLab


From 248176bbc74127e26a15b7b5c63c3f9c114123ba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 13:05:28 -0800
Subject: [PATCH 0647/1225] New document for Getting Started section about
 saving models.

PiperOrigin-RevId: 177999275
---
 .../docs_src/get_started/saving_models.md     | 237 ++++++++++++++++++
 1 file changed, 237 insertions(+)
 create mode 100644 tensorflow/docs_src/get_started/saving_models.md

diff --git a/tensorflow/docs_src/get_started/saving_models.md b/tensorflow/docs_src/get_started/saving_models.md
new file mode 100644
index 0000000000..056263c157
--- /dev/null
+++ b/tensorflow/docs_src/get_started/saving_models.md
@@ -0,0 +1,237 @@
+# Checkpoints
+
+This document examines how to save and restore TensorFlow models built with
+Estimators. TensorFlow provides two model formats:
+
+*   checkpoints, which is a format dependent on the code that created
+    the model.
+*   SavedModel, which is a format independent of the code that created
+    the model.
+
+This document focuses on checkpoints. For details on SavedModel, see the
+@{$saved_model$Saving and Restoring} chapter of the
+*TensorFlow Programmer's Guide*.
+
+
+## Sample code
+
+This document relies on the same Iris classification example detailed in
+<!-- TODO (barryr): fill in link when module settles down. --> 
+@{$premade_estimators$Getting Started with TensorFlow}.
+To download and access the example, invoke the following two commands:
+
+```shell
+git clone https://github.com/tensorflow/models/
+cd models/samples/core/get_started
+```
+
+Most of the code snippets in this document are minor variations
+on `premade_estimator.py`.
+
+
+## Saving partially-trained models
+
+Estimators automatically write the following to disk:
+
+*   **checkpoints**, which are versions of the model created during training.
+*   **event files**, which contain information that
+    [TensorBoard](https://developers.google.com/machine-learning/glossary/#TensorBoard)
+    uses to create visualizations.
+
+To specify the top-level directory in which the Estimator stores its
+information, assign a value to the optional `model_dir` argument of any
+Estimator's constructor.  For example, the following code sets the `model_dir`
+argument to the `models/iris` directory:
+
+```python
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3,
+    model_dir='models/iris')
+```
+
+Suppose you call the Estimator's `train` method. For example:
+
+
+```python
+classifier.train(
+        input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
+                steps=200)
+```
+
+As suggested by the following diagrams, the first call to `train`
+adds checkpoints and other files to the `model_dir` directory:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/first_train_calls.png">
+</div>
+<div style="text-align: center">
+The first call to train().
+</div>
+
+
+To see the objects in the created `model_dir` directory on a
+UNIX-based system, just call `ls` as follows:
+
+```none
+$ ls -1 models/iris
+checkpoint
+events.out.tfevents.timestamp.hostname
+graph.pbtxt
+model.ckpt-1.data-00000-of-00001
+model.ckpt-1.index
+model.ckpt-1.meta
+model.ckpt-200.data-00000-of-00001
+model.ckpt-200.index
+model.ckpt-200.meta
+```
+
+The preceding `ls` command shows that the Estimator created checkpoints
+at steps 1 (the start of training) and 200 (the end of training).
+
+
+### Default checkpoint directory
+
+If you don't specify `model_dir` in an Estimator's constructor, the Estimator
+writes checkpoint files to a temporary directory chosen by Python's
+[tempfile.mkdtemp](https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp)
+function. For example, the following Estimator constructor does *not* specify
+the `model_dir` argument:
+
+```python
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3)
+
+print(classifier.model_dir)
+```
+
+The `tempfile.mkdtemp` function picks a secure, temporary directory
+appropriate for your operating system. For example, a typical temporary
+directory on macOS might be something like the following:
+
+```None
+/var/folders/0s/5q9kfzfj3gx2knj0vj8p68yc00dhcr/T/tmpYm1Rwa
+```
+
+### Checkpointing Frequency
+
+By default, the Estimator saves
+[checkpoints](https://developers.google.com/machine-learning/glossary/#checkpoint)
+in the `model_dir` according to the following schedule:
+
+*   Writes a checkpoint every 10 minutes (600 seconds).
+*   Writes a checkpoint when the `train` method starts (first iteration)
+    and completes (final iteration).
+*   Retains only the 5 most recent checkpoints in the directory.
+
+You may alter the default schedule by taking the following steps:
+
+1.  Create a @{tf.estimator.RunConfig$`RunConfig`} object that defines the
+    desired schedule.
+2.  When instantiating the Estimator, pass that `RunConfig` object to the
+    Estimator's `config` argument.
+
+For example, the following code changes the checkpointing schedule to every
+20 minutes and retains the 10 most recent checkpoints:
+
+```python
+my_checkpointing_config = tf.estimator.RunConfig(
+    save_checkpoints_secs = 20*60,  # Save checkpoints every 20 minutes.
+    keep_checkpoint_max = 10,       # Retain the 10 most recent checkpoints.
+)
+
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3,
+    model_dir='models/iris',
+    config=my_checkpointing_config)
+```
+
+## Restoring your model
+
+The first time you call an Estimator's `train` method, TensorFlow saves a
+checkpoint to the `model_dir`. Each subsequent call to the Estimator's
+`train`, `eval`, or `predict` method causes the following:
+
+1.  The Estimator builds the model's
+    [graph](https://developers.google.com/machine-learning/glossary/#graph)
+    by running the `model_fn()`.  (For details on the `model_fn()`, see
+    @{$custom_estimators$Creating Custom Estimators.})
+2.  The Estimator initializes the weights of the new model from the data
+    stored in the most recent checkpoint.
+
+In other words, as the following illustration suggests, once checkpoints
+exist, TensorFlow rebuilds the model each time you call `train()`,
+`evaluate()`, or `predict()`.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/subsequent_calls.png">
+</div>
+<div style="text-align: center">
+Subsequent calls to train(), evaluate(), or predict()
+</div>
+
+
+### Avoiding a bad restoration
+
+Restoring a model's state from a checkpoint only works if the model
+and checkpoint are compatible.  For example, suppose you trained a
+`DNNClassifier` Estimator containing two hidden layers,
+each having 10 nodes:
+
+```python
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3,
+    model_dir='models/iris')
+
+classifier.train(
+    input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
+        steps=200)
+```
+
+After training (and, therefore, after creating checkpoints in `models/iris`),
+imagine that you changed the number of neurons in each hidden layer from 10 to
+20 and then attempted to retrain the model:
+
+``` python
+classifier2 = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[20, 20],  # Change the number of neurons in the model.
+    n_classes=3,
+    model_dir='models/iris')
+
+classifier.train(
+    input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
+        steps=200)
+```
+
+Since the state in the checkpoint is incompatible with the model described
+in `classifier2`, retraining fails with the following error:
+
+```None
+...
+InvalidArgumentError (see above for traceback): tensor_name =
+dnn/hiddenlayer_1/bias/t_0/Adagrad; shape in shape_and_slice spec [10]
+does not match the shape stored in checkpoint: [20]
+```
+
+To run experiments in which you train and compare slightly different
+versions of a model, save a copy of the code that created each
+`model-dir`, possibly by creating a separate git branch for each version.
+This separation will keep your checkpoints recoverable.
+
+## Summary
+
+Checkpoints provide an easy automatic mechanism for storing and restoring
+models created by Estimators.  See the @{$saved_model$Saving and Restoring}
+chapter of the *TensorFlow Programmer's Guide* for details on:
+
+*   Saving and restoring models created by low-level TensorFlow APIs.
+*   Saving and restoring models in the SavedModel format, which is a
+    language-neutral, recoverable, serialization format.
-- 
GitLab


From b352b38aabd33404e7ae987778caa6e4b44d86d1 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Tue, 5 Dec 2017 14:00:19 -0800
Subject: [PATCH 0648/1225] Rather than make potentially complex modifications
 to the Hlo graph, simply generate input data that is constrained for certain
 entry computation parameters.

Generate fake literals that are within bounds for DynamicSlice and other
operations that accept dynamically computed indices.

PiperOrigin-RevId: 178006866
---
 tensorflow/compiler/xla/tests/BUILD         |   1 +
 tensorflow/compiler/xla/tests/test_utils.cc | 162 +++++++++++++-------
 tensorflow/compiler/xla/tests/test_utils.h  |  10 +-
 3 files changed, 114 insertions(+), 59 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 74652a47fb..b99e046b9b 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -69,6 +69,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_dataflow_analysis",
         "//tensorflow/compiler/xla/service:hlo_verifier",
         "//tensorflow/compiler/xla/service:transfer_manager",
         "//tensorflow/core:lib",
diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc
index 0d56c9f483..93bce97a3e 100644
--- a/tensorflow/compiler/xla/tests/test_utils.cc
+++ b/tensorflow/compiler/xla/tests/test_utils.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/tests/test_utils.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
+#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
 #include "tensorflow/compiler/xla/service/hlo_verifier.h"
 #include "tensorflow/compiler/xla/service/transfer_manager.h"
 
@@ -47,42 +48,113 @@ void PopulateWithRandomIntegralData(Literal* literal) {
       }));
 }
 
-bool LooksLikeSum(const HloInstruction& instruction) {
-  return instruction.opcode() == HloOpcode::kAdd &&
-         instruction.operand(0)->opcode() == HloOpcode::kParameter &&
-         instruction.operand(1)->opcode() == HloOpcode::kParameter &&
-         instruction.operand(0) != instruction.operand(1);
+// Matches binary addition computations.
+bool LooksLikeSum(const HloComputation& computation) {
+  const HloInstruction* const root = computation.root_instruction();
+  return root->opcode() == HloOpcode::kAdd &&
+         computation.num_parameters() == 2 &&
+         root->operand(0)->opcode() == HloOpcode::kParameter &&
+         root->operand(1)->opcode() == HloOpcode::kParameter &&
+         root->operand(0) != root->operand(1);
 }
 
-// Given an instruction and operand number, replace the given operand with
-// a Literal Constant Zero. Handle the case of a fusion instruction by
-// replacing the fusion's parent's parameter with a Literal Constant Zero,
-// unless the fusion's parent is itself a fusion.
-Status MaybeReplaceParameterInputWithZero(HloInstruction* const instruction,
-                                          const int64 operand_number) {
-  CHECK_LT(operand_number, instruction->operand_count());
-  if (instruction->operand(operand_number)->opcode() != HloOpcode::kParameter) {
-    return Status::OK();
-  }
+// Reduce, ReduceWindow, and SelectAndScatter ops may use binary addition,
+// which requires an init_value of 0 rather than a random value.
+bool NeedsZeroInitValue(const HloUse& use) {
+  const HloInstruction* const instruction = use.instruction;
+  const HloOpcode opcode = instruction->opcode();
+  const int64 op_num = use.operand_number;
+  return (
+      ((opcode == HloOpcode::kReduce || opcode == HloOpcode::kReduceWindow) &&
+       op_num == 1 && LooksLikeSum(*instruction->to_apply())) ||
+      (opcode == HloOpcode::kSelectAndScatter && op_num == 2 &&
+       LooksLikeSum(*instruction->scatter())));
+}
 
-  HloComputation* const computation = instruction->parent();
-  std::unique_ptr<HloInstruction> zero = HloInstruction::CreateConstant(
-      MakeUnique<Literal>(Literal::Zero(instruction->shape().element_type())));
+// Generate random values that are constrained to the input_shape minus the
+// output_shape so as not to produce wrapping slices, for instance.
+std::unique_ptr<Literal> MakeRandomNonwrappingSliceIndex(
+    const Shape& input_shape, const Shape& slice_shape) {
+  const int64 rank = ShapeUtil::Rank(input_shape);
+  std::vector<int32> start_indices(rank);
+  std::minstd_rand0 engine;
+  for (int i = 0; i < rank; ++i) {
+    const int32 upper_bound = ShapeUtil::GetDimension(input_shape, i) -
+                              ShapeUtil::GetDimension(slice_shape, i);
+    std::uniform_int_distribution<int32> generator(0, upper_bound);
+    start_indices[i] = generator(engine);
+  }
+  return Literal::CreateR1<int32>(start_indices);
+}
 
-  if (computation->IsFusionComputation()) {
-    HloInstruction* const fusion_instruction = computation->FusionInstruction();
-    if (fusion_instruction->IsFused()) {
-      return Unimplemented(
-          "Unable to replace fused parameter of fusion instruction");
+// Use dataflow analysis on each parameter to see if there are uses that would
+// be problematic when generating input data.  Returns the list of instructions
+// that correspond to their uses.
+//
+// Should be paired with the CreateLiteralForConstrainedUses() function below.
+std::vector<HloInstruction*> FindConstrainedUses(
+    const HloDataflowAnalysis& dataflow, const HloInstruction& param) {
+  std::vector<HloInstruction*> constrained_uses;
+  for (const auto& pair : dataflow.GetInstructionValueSet(&param)) {
+    const HloValue& value = dataflow.GetUniqueValueAt(&param, pair.first);
+    for (const HloUse& use : value.uses()) {
+      HloInstruction* instruction = use.instruction;
+      const HloOpcode opcode = instruction->opcode();
+      const int64 op_num = use.operand_number;
+      if ((opcode == HloOpcode::kDynamicSlice && op_num == 1) ||
+          (opcode == HloOpcode::kDynamicUpdateSlice && op_num == 2)) {
+        constrained_uses.push_back(instruction);
+      } else if (opcode == HloOpcode::kFusion) {
+        const HloInstruction* const to_analyze =
+            instruction->fused_parameter(op_num);
+        auto fused_uses = FindConstrainedUses(dataflow, *to_analyze);
+        constrained_uses.insert(constrained_uses.end(), fused_uses.begin(),
+                                fused_uses.end());
+      } else if (NeedsZeroInitValue(use)) {
+        constrained_uses.push_back(instruction);
+      }
     }
-    TF_RETURN_IF_ERROR(fusion_instruction->ReplaceOperandWith(
-        instruction->operand(operand_number)->parameter_number(),
-        fusion_instruction->parent()->AddInstruction(std::move(zero))));
-  } else {
-    TF_RETURN_IF_ERROR(instruction->ReplaceOperandWith(
-        operand_number, computation->AddInstruction(std::move(zero))));
   }
-  return Status::OK();
+  return constrained_uses;
+}
+
+// Given a parameter, generate a random Literal to use as input if there exist
+// no constrained uses in the dataflow graph.  If such constraints exist,
+// generate a constrained literal (either bounded in the case of indices, or
+// zero in the case of init_values for reductions).
+StatusOr<std::unique_ptr<Literal>> CreateLiteralForConstrainedUses(
+    const tensorflow::gtl::ArraySlice<HloInstruction*> constrained_uses,
+    const HloInstruction& param) {
+  const auto count = constrained_uses.size();
+  if (count > 1) {
+    return Unimplemented("multiple constrained uses not yet supported");
+  }
+
+  if (count == 0) {
+    return MakeFakeLiteral(param.shape());
+  }
+
+  const HloInstruction* const use = constrained_uses[0];
+  switch (use->opcode()) {
+    case HloOpcode::kDynamicSlice:
+    case HloOpcode::kDynamicUpdateSlice:
+      return MakeRandomNonwrappingSliceIndex(use->operand(0)->shape(),
+                                             use->shape());
+    case HloOpcode::kReduce:
+    case HloOpcode::kReduceWindow:
+    case HloOpcode::kSelectAndScatter:
+      return Literal::CreateFromShape(param.shape());
+    default:
+      return Unimplemented("constrained use given; no equivalent literal");
+  }
+}
+
+// Given a module entry parameter, use the dataflow analysis to see if a
+// special case literal must be created, or if we can generate fake data.
+StatusOr<std::unique_ptr<Literal>> MakeConstrainedArgument(
+    const HloDataflowAnalysis& dataflow, const HloInstruction& param) {
+  const auto constrained_uses = FindConstrainedUses(dataflow, param);
+  return CreateLiteralForConstrainedUses(constrained_uses, param);
 }
 
 }  // namespace
@@ -146,33 +218,17 @@ StatusOr<std::unique_ptr<Literal>> MakeFakeLiteral(const Shape& shape) {
 }
 
 StatusOr<std::vector<std::unique_ptr<Literal>>> MakeFakeArguments(
-    const HloModule& module) {
-  std::vector<std::unique_ptr<Literal>> arguments;
-  for (const ShapeLayout& shape_layout :
-       module.config().entry_computation_layout().parameter_layouts()) {
-    TF_ASSIGN_OR_RETURN(auto literal, MakeFakeLiteral(shape_layout.shape()));
-    arguments.push_back(std::move(literal));
+    HloModule* const module) {
+  TF_ASSIGN_OR_RETURN(auto dataflow, HloDataflowAnalysis::Run(module));
+  const auto params = module->entry_computation()->parameter_instructions();
+  std::vector<std::unique_ptr<Literal>> arguments(params.size());
+  for (int i = 0; i < params.size(); ++i) {
+    TF_ASSIGN_OR_RETURN(arguments[i],
+                        MakeConstrainedArgument(*dataflow, *params[i]));
   }
   return std::move(arguments);
 }
 
-Status ReplaceInitsWithConstants(HloModule* const module) {
-  for (HloComputation* const computation : module->computations()) {
-    for (HloInstruction* const instruction : computation->instructions()) {
-      const HloOpcode opcode = instruction->opcode();
-      if ((opcode == HloOpcode::kReduce ||
-           opcode == HloOpcode::kReduceWindow) &&
-          LooksLikeSum(*instruction->to_apply()->root_instruction())) {
-        TF_RETURN_IF_ERROR(MaybeReplaceParameterInputWithZero(instruction, 1));
-      } else if (opcode == HloOpcode::kSelectAndScatter &&
-                 LooksLikeSum(*instruction->scatter()->root_instruction())) {
-        TF_RETURN_IF_ERROR(MaybeReplaceParameterInputWithZero(instruction, 2));
-      }
-    }
-  }
-  return Status::OK();
-}
-
 Status VerifyHloModule(const perftools::gputools::Platform& platform,
                        HloModule* const module) {
   return HloVerifier(
diff --git a/tensorflow/compiler/xla/tests/test_utils.h b/tensorflow/compiler/xla/tests/test_utils.h
index 9aca162a18..0fb024ffb0 100644
--- a/tensorflow/compiler/xla/tests/test_utils.h
+++ b/tensorflow/compiler/xla/tests/test_utils.h
@@ -60,13 +60,11 @@ StatusOr<std::unique_ptr<Literal>> MakeFakeLiteral(const Shape& shape);
 
 // Generates a vector of arguments containing fake data. The number, shape and
 // layout of the arguments is appropriate for given HLO module.
+//
+// Will handle special cases such as making sure that indices used for dynamic
+// slices are bounded, reduces that call adds use 0 as an init value, etc.
 StatusOr<std::vector<std::unique_ptr<Literal>>> MakeFakeArguments(
-    const HloModule& module);
-
-// Reductions using Adds, ReduceWindow, and SelectAndScatter, require their
-// init_value to be replaced with the constant 0.0f when testing, otherwise we
-// may generate a bad init_value when looking at the op in isolation.
-Status ReplaceInitsWithConstants(HloModule* const module);
+    HloModule* const module);
 
 // Check that a given module satisfies various constraints before trying to
 // execute it.
-- 
GitLab


From 3b930e311da5ce7c851f209be38fbe9b03f8fb85 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 14:17:56 -0800
Subject: [PATCH 0649/1225] [TF:XLA] Add test with while loop and many
 parameters.

PiperOrigin-RevId: 178009859
---
 tensorflow/compiler/xla/tests/params_test.cc | 100 +++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/tensorflow/compiler/xla/tests/params_test.cc b/tensorflow/compiler/xla/tests/params_test.cc
index b7f62b8aa1..24c5daed3d 100644
--- a/tensorflow/compiler/xla/tests/params_test.cc
+++ b/tensorflow/compiler/xla/tests/params_test.cc
@@ -334,6 +334,106 @@ XLA_TEST_F(ParamsTest, DISABLED_ON_CPU(DISABLED_ON_GPU(
   ComputeAndCompareTuple(&builder, *Literal::MakeTuple(ptrs), param_data);
 }
 
+// Test large number of parameters flowing into a while-loop.
+// Construct conceptually the following HLO graph:
+//
+// p0 = parameter(0)
+// p1 = parameter(1)
+// ...
+// pN = parameter(N)
+// result = while (false) {
+//   p0 += (1, 1);
+//   p1 += (1, 1);
+//   ...
+//   pN += (1, 1)
+// }
+// result = {p0, p1, ..., pN}
+//
+// TODO(b/70173746): Times out during compilation on GPU and CPU-parallel
+// backend as of 2017-12-03.
+XLA_TEST_F(ParamsTest, DISABLED_ON_CPU_PARALLEL(
+                           DISABLED_ON_GPU(ManyParametersIntoWhileLoop))) {
+  ComputationBuilder builder(client_, TestName());
+
+  std::vector<std::unique_ptr<GlobalData>> param_data_owner;
+  constexpr int kParamCount = 1900;
+  std::vector<ComputationDataHandle> params;
+  std::vector<Shape> parameter_shapes;
+  for (int i = 0; i < kParamCount; ++i) {
+    std::unique_ptr<Literal> literal = Literal::CreateR1<int32>({i, i});
+    param_data_owner.push_back(
+        std::move(client_->TransferToServer(*literal)).ValueOrDie());
+    ComputationDataHandle param =
+        builder.Parameter(i, literal->shape(), "param");
+    params.push_back(param);
+    parameter_shapes.push_back(literal->shape());
+  }
+
+  // Add bool parameter for the loop condition. Use a parameter HLO instead of a
+  // constant because DCE may eliminate the while-body otherwise.
+  std::unique_ptr<Literal> bool_literal = Literal::CreateR0<bool>(false);
+  param_data_owner.push_back(
+      std::move(client_->TransferToServer(*bool_literal)).ValueOrDie());
+  ComputationDataHandle bool_param =
+      builder.Parameter(kParamCount, bool_literal->shape(), "bool_param");
+  params.push_back(bool_param);
+  parameter_shapes.push_back(bool_literal->shape());
+
+  auto init = builder.Tuple(params);
+
+  // Create a computation for the condition: while(bool_param).
+  Shape while_shape = ShapeUtil::MakeTupleShape(parameter_shapes);
+  Computation condition;
+  {
+    ComputationBuilder builder(client_, "condition");
+    auto condition_parameter =
+        builder.Parameter(0, while_shape, "condition_parameter");
+    builder.GetTupleElement(condition_parameter, kParamCount);
+    condition = builder.Build().ConsumeValueOrDie();
+  }
+
+  // Create a computation for the body.
+  // Add {1, 1} to the each tuple element.
+  Computation body;
+  {
+    ComputationBuilder builder(client_, "body");
+    auto body_parameter = builder.Parameter(0, while_shape, "body_parameter");
+    std::vector<ComputationDataHandle> updates;
+    for (int i = 0; i < kParamCount; ++i) {
+      auto add = builder.Add(builder.GetTupleElement(body_parameter, i),
+                             builder.ConstantR1<int32>({1, 1}));
+      updates.push_back(add);
+    }
+    // Add bool parameter.
+    updates.push_back(builder.GetTupleElement(body_parameter, kParamCount));
+
+    builder.Tuple(updates);
+    body = builder.Build().ConsumeValueOrDie();
+  }
+
+  auto loop = builder.While(condition, body, init);
+
+  std::vector<ComputationDataHandle> outputs;
+  for (int i = 0; i < kParamCount; ++i) {
+    outputs.push_back(builder.GetTupleElement(loop, i));
+  }
+  builder.Tuple(outputs);
+
+  std::vector<GlobalData*> param_data;
+  param_data.reserve(param_data_owner.size());
+  for (const std::unique_ptr<GlobalData>& data : param_data_owner) {
+    param_data.push_back(data.get());
+  }
+
+  std::vector<std::unique_ptr<Literal>> elements;
+  std::vector<const Literal*> ptrs;
+  for (int i = 0; i < kParamCount; ++i) {
+    elements.push_back(Literal::CreateR1<int32>({i, i}));
+    ptrs.push_back(elements.back().get());
+  }
+  ComputeAndCompareTuple(&builder, *Literal::MakeTuple(ptrs), param_data);
+}
+
 #endif
 
 XLA_TEST_F(ParamsTest,
-- 
GitLab


From 5f75189ad83c648044180d79050cb26b3fb2e17d Mon Sep 17 00:00:00 2001
From: Clayne Robison <clayne.b.robison@intel.com>
Date: Tue, 5 Dec 2017 14:22:57 -0800
Subject: [PATCH 0650/1225] Revving mkl-dnn to include all changes before
 2017-11-20.

---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 34184208e3..387e4e0f4e 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -74,11 +74,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "mkl_dnn",
       urls = [
-          "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
-          "https://github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
+          "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/aab753280e83137ba955f8f19d72cb6aaba545ef.tar.gz",
+          "https://github.com/01org/mkl-dnn/archive/aab753280e83137ba955f8f19d72cb6aaba545ef.tar.gz",
       ],
-      sha256 = "0d529ad4c49dc799e6df07c2b88b115d0668735da15fb3b3862d28d33fa68165",
-      strip_prefix = "mkl-dnn-b01e3a55a07be62172e713bcd2644c5176360212",
+      sha256 = "fb67f255a96bd4ad39b8dd104eca5aa92200c95c1ed36e59641e6c0478eefd11",
+      strip_prefix = "mkl-dnn-aab753280e83137ba955f8f19d72cb6aaba545ef",
       build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")),
   )
 
-- 
GitLab


From 557e0ce1ca949abb0ffdcccc6ab0480e65ea9fe1 Mon Sep 17 00:00:00 2001
From: Yuanzhong Xu <yuanzx@google.com>
Date: Tue, 5 Dec 2017 14:21:19 -0800
Subject: [PATCH 0651/1225] Use a macro to determine whether BF16 is supported.

PiperOrigin-RevId: 178010405
---
 tensorflow/compiler/xla/tests/reduce_window_test.cc | 10 ++++------
 tensorflow/compiler/xla/tests/reshape_test.cc       |  7 +++----
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index fd73a82093..330575a02e 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -41,14 +41,12 @@ limitations under the License.
 namespace xla {
 namespace {
 
-// We can remove the GPU define here once we have complete GPU support in place.
-#if defined(XLA_TEST_BACKEND_CPU) || defined(XLA_TEST_BACKEND_CPU_PARALLEL) || \
-    defined(XLA_TEST_BACKEND_GPU)
-// Only tests F32.
-static std::array<bool, 1> use_bfloat16_params{false};
-#else
+#ifdef XLA_BACKEND_SUPPORTS_BFLOAT16
 // Tests both F32 and BF16.
 static std::array<bool, 2> use_bfloat16_params{false, true};
+#else
+// Only tests F32.
+static std::array<bool, 1> use_bfloat16_params{false};
 #endif
 
 class ReduceWindowTestBase : public ClientLibraryTestBase {
diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc
index 6286a89748..ddd50d7a58 100644
--- a/tensorflow/compiler/xla/tests/reshape_test.cc
+++ b/tensorflow/compiler/xla/tests/reshape_test.cc
@@ -1014,12 +1014,11 @@ XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeTrivialR2) {
                            zero_error_spec_, &expected->shape());
 }
 
-#if defined(XLA_TEST_BACKEND_CPU) || defined(XLA_TEST_BACKEND_CPU_PARALLEL) || \
-    defined(XLA_TEST_BACKEND_GPU)
+#ifdef XLA_BACKEND_SUPPORTS_BFLOAT16
+INSTANTIATE_TEST_CASE_P(ReshapeTestInstance, ReshapeTest, ::testing::Bool());
+#else
 INSTANTIATE_TEST_CASE_P(ReshapeTestInstance, ReshapeTest,
                         ::testing::ValuesIn(std::vector<bool>{false}));
-#else
-INSTANTIATE_TEST_CASE_P(ReshapeTestInstance, ReshapeTest, ::testing::Bool());
 #endif
 
 }  // namespace
-- 
GitLab


From aa4f491ea93520179ef48d586177f05c7937a274 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 14:39:35 -0800
Subject: [PATCH 0652/1225] Adds shards and increases size to
 dnn_linear_combined_test to prevent timeouts.

PiperOrigin-RevId: 178013302
---
 tensorflow/contrib/estimator/BUILD | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index 706a174efb..ba272d7e88 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -88,12 +88,12 @@ py_library(
 
 py_test(
     name = "dnn_linear_combined_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/estimator/dnn_linear_combined_test.py"],
+    shard_count = 3,
     srcs_version = "PY2AND3",
     tags = [
         "no_pip",
-        "notap",  # b/62204861
         "notsan",
     ],
     deps = [
-- 
GitLab


From 7f756875f14afbc40ee00e2c93e326493fe7b4e5 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 5 Dec 2017 14:56:24 -0800
Subject: [PATCH 0653/1225] Replace `FunctionCallFrame` with a pure-virtual
 `CallFrameInterface`.

Current users are unaffected. Running
`//tensorflow/core/common_runtime_direct_session_test
--benchmarks=all`, which stresses the Arg and Retval ops, reveals no
performance change.
PiperOrigin-RevId: 178015803
---
 tensorflow/compiler/tf2xla/xla_op_kernel.h |  2 +-
 tensorflow/core/framework/function.h       | 14 +++++++++++---
 tensorflow/core/framework/op_kernel.h      |  6 +++---
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h
index 06845a674e..f1ae81a5aa 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.h
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h
@@ -178,7 +178,7 @@ class XlaOpKernelContext {
 
   // If this kernel invocation is within a function execution,
   // call_frame() returns the call frame for the function call.
-  FunctionCallFrame* call_frame() const { return context_->call_frame(); }
+  CallFrameInterface* call_frame() const { return context_->call_frame(); }
 
   FunctionLibraryRuntime* function_library() const {
     return context_->function_library();
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index d3d6358362..6c5cc1da98 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -243,13 +243,21 @@ uint64 FunctionDefHash(const FunctionDef& fdef);
 // address spaces.
 string Canonicalize(const string& funcname, AttrSlice attrs);
 
+class CallFrameInterface {
+ public:
+  virtual ~CallFrameInterface() {}
+
+  virtual Status GetArg(int index, Tensor* val) const = 0;
+  virtual Status SetRetval(int index, const Tensor& val) = 0;
+};
+
 // Represents a function call frame. I.e., the data structure used to
 // pass arguments to a function and retrieve its results.
 //
 // Runtime must arrange accesses to one FunctionCallFrame s.t.
 //   1. SetArgs() happens before any GetArg();
 //   2. GetRetvals happens after all SetRetval();
-class FunctionCallFrame {
+class FunctionCallFrame : public CallFrameInterface {
  public:
   FunctionCallFrame(DataTypeSlice arg_types, DataTypeSlice ret_types);
   ~FunctionCallFrame();
@@ -260,8 +268,8 @@ class FunctionCallFrame {
   Status ConsumeRetvals(std::vector<Tensor>* rets);
 
   // Callee methods.
-  Status GetArg(int index, Tensor* val) const;
-  Status SetRetval(int index, const Tensor& val);
+  Status GetArg(int index, Tensor* val) const override;
+  Status SetRetval(int index, const Tensor& val) override;
 
  private:
   DataTypeVector arg_types_;
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index da0dc54943..a7b9bb393d 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -61,7 +61,7 @@ class TensorSliceReaderCacheWrapper;
 }  // namespace checkpoint
 
 class AsyncOpKernel;
-class FunctionCallFrame;
+class CallFrameInterface;
 class FunctionLibraryRuntime;
 class OpKernelConstruction;  // declared below
 class OpKernelContext;       // declared below
@@ -548,7 +548,7 @@ class OpKernelContext {
     FrameAndIter frame_iter;
 
     // Function call supports.
-    FunctionCallFrame* call_frame = nullptr;
+    CallFrameInterface* call_frame = nullptr;
     FunctionLibraryRuntime* function_library = nullptr;
     std::function<void(std::function<void()>)>* runner = nullptr;
     StepStatsCollector* stats_collector = nullptr;
@@ -930,7 +930,7 @@ class OpKernelContext {
   //
   // If this kernel invocation is within a function execution,
   // call_frame() returns the call frame for the function call.
-  FunctionCallFrame* call_frame() const { return params_->call_frame; }
+  CallFrameInterface* call_frame() const { return params_->call_frame; }
 
   // If not nullptr, the kernel invoke functions defined in the
   // library. E.g., CHECK_NOTNULL(function_library())->Run("Foo", ...).
-- 
GitLab


From b6ed812dbc87833a2f3076184cfe7d6fdbdba2fe Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Tue, 5 Dec 2017 15:33:42 -0800
Subject: [PATCH 0654/1225] Sets the master to '' for single node cluster.

PiperOrigin-RevId: 178021454
---
 tensorflow/python/estimator/run_config.py     |  7 ++
 .../python/estimator/run_config_test.py       |  4 +-
 tensorflow/python/estimator/training.py       | 23 +++++-
 tensorflow/python/estimator/training_test.py  | 80 +++++++++++++++----
 4 files changed, 93 insertions(+), 21 deletions(-)

diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py
index d71964d2ec..3893f48cae 100644
--- a/tensorflow/python/estimator/run_config.py
+++ b/tensorflow/python/estimator/run_config.py
@@ -80,6 +80,13 @@ def _get_master(cluster_spec, task_type, task_id):
         '%s\n\n'
         'Note that these values may be coming from the TF_CONFIG environment '
         'variable.' % (task_id, task_type, cluster_spec))
+
+  # If there is only one node in the cluster, do things locally by setting
+  # master to ''.  If a service or user sets TF_CONFIG with a single node, it's
+  # more performant to use a direct master rather than an RPC service.
+  if len(jobs) == 1 and len(cluster_spec.job_tasks(jobs[0])) == 1:
+    return _LOCAL_MASTER
+
   return _GRPC_SCHEME + addresses[task_id]
 
 
diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py
index ecc850d540..6a62c061ff 100644
--- a/tensorflow/python/estimator/run_config_test.py
+++ b/tensorflow/python/estimator/run_config_test.py
@@ -344,7 +344,7 @@ class RunConfigDistributedSettingTest(test.TestCase):
         expected_cluster_spec=tf_config['cluster'],
         expected_task_type=run_config_lib.TaskType.CHIEF,
         expected_task_id=0,
-        expected_master='grpc://host0:0',
+        expected_master='',
         expected_evaluation_master='',
         expected_is_chief=True,
         expected_num_worker_replicas=1,
@@ -572,7 +572,7 @@ class RunConfigDistributedSettingWithMasterTest(test.TestCase):
         expected_cluster_spec=tf_config['cluster'],
         expected_task_type=run_config_lib.TaskType.MASTER,
         expected_task_id=0,
-        expected_master='grpc://host0:0',
+        expected_master='',
         expected_evaluation_master='',
         expected_is_chief=True,
         expected_num_worker_replicas=1,
diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py
index 1131995b3e..58fccc3a29 100644
--- a/tensorflow/python/estimator/training.py
+++ b/tensorflow/python/estimator/training.py
@@ -43,6 +43,8 @@ _DELAY_SECS_PER_WORKER = 5
 _TF_CONFIG_ENV = 'TF_CONFIG'
 _ENVIRONMENT_KEY = 'environment'
 _ENVIRONMENT_GOOGLE_VALUE = 'google'
+_TRAINER_JOBS = (run_config_lib.TaskType.CHIEF, run_config_lib.TaskType.MASTER,
+                 run_config_lib.TaskType.WORKER)
 
 
 def _validate_input_fn(input_fn):
@@ -624,11 +626,28 @@ class _TrainingExecutor(object):
 
   def _start_std_server(self, config):
     """Creates, starts, and returns a server_lib.Server."""
-    if (not config.cluster_spec or not config.task_type or not config.master or
+    if (not config.cluster_spec or not config.task_type or
         config.task_id is None):
       raise RuntimeError('Could not start server; be sure to specify '
-                         'cluster_spec, task_type, master, and task in '
+                         'cluster_spec, task_type, and task in '
                          'RunConfig or set the TF_CONFIG environment variable.')
+
+    if not config.master:
+      jobs = config.cluster_spec.jobs
+      if (len(jobs) == 1 and len(config.cluster_spec.job_tasks(jobs[0])) == 1
+          and config.task_type in _TRAINER_JOBS):
+        # For distributed training, config.master is empty if and only if it has
+        # a single node in the cluster spec. In this case, we should not start
+        # the server.
+        logging.info('Skip starting Tensorflow server as there is only one '
+                     'node in the cluster.')
+        return
+      else:
+        raise RuntimeError(
+            'Could not start server; be sure to specify master in '
+            'RunConfig or set the TF_CONFIG environment variable.')
+
+    logging.info('Start Tensorflow server.')
     server = server_lib.Server(
         config.cluster_spec,
         job_name=config.task_type,
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index 17d018aa88..d72b95dbdd 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -480,7 +480,7 @@ class TrainAndEvaluteTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.Mock()
-    mock_est.config.cluster_spec = {'1': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'1': ['dummy']})
     mock_est.config.task_type = ''
 
     with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE):
@@ -598,7 +598,8 @@ class _TrainingExecutorTrainingTest(object):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec(
+        {'worker': ['dummy', 'dummy1']})
     mock_est.config.master = ''
     mock_est.config.task_type = 'worker'
     mock_est.config.task_id = 2
@@ -608,13 +609,33 @@ class _TrainingExecutorTrainingTest(object):
       self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
                                                 mock_eval_spec))
 
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_single_worker_node_with_empty_tf_master(
+      self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    # Single node cluster.
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
+    mock_est.config.master = ''
+    mock_est.config.task_type = 'worker'
+    mock_est.config.task_id = 2
+
+    self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
+                                              mock_eval_spec))
+    mock_est.train.assert_called()
+    mock_server.assert_not_called()
+
   def test_fail_with_empty_task_type(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
     mock_est.config.master = 'grpc://...'
     mock_est.config.task_type = ''
     mock_est.config.task_id = 2
@@ -630,7 +651,7 @@ class _TrainingExecutorTrainingTest(object):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
     mock_est.config.master = 'grpc://...'
     mock_est.config.task_type = 'worker'
     mock_est.config.task_id = None
@@ -768,7 +789,7 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
     mock_est.config.cluster_spec = None
     mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'worker'
+    mock_est.config.task_type = 'master'
     mock_est.config.task_id = 2
 
     with self.assertRaisesRegexp(RuntimeError,
@@ -782,23 +803,48 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec(
+        {'master': ['dummy'], 'worker': ['dummy1']})
     mock_est.config.master = ''
-    mock_est.config.task_type = 'worker'
-    mock_est.config.task_id = 2
+    mock_est.config.task_type = 'master'
+    mock_est.config.task_id = 0
 
     with self.assertRaisesRegexp(RuntimeError,
                                  _INVALID_CONFIG_FOR_STD_SERVER_MSG):
       training._TrainingExecutor(
           mock_est, mock_train_spec, mock_eval_spec).run_master()
 
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_single_master_node_with_empty_tf_master(
+      self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
+
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, max_steps=123)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec(
+        {'master': ['dummy']})
+    mock_est.config.master = ''
+    mock_est.config.task_type = 'master'
+    mock_est.config.task_id = 0
+
+    executor = training._TrainingExecutor(
+        mock_est, mock_train_spec, mock_eval_spec)
+    executor.run_master()
+
+    mock_server.assert_not_called()
+    mock_est.train.assert_called()
+
   def test_fail_with_empty_task_type(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'master': ['dummy']})
     mock_est.config.master = 'grpc://...'
     mock_est.config.task_type = ''
     mock_est.config.task_id = 2
@@ -814,9 +860,9 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'master': ['dummy']})
     mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'worker'
+    mock_est.config.task_type = 'master'
     mock_est.config.task_id = None
 
     with self.assertRaisesRegexp(RuntimeError,
@@ -1246,7 +1292,7 @@ class TrainingExecutorRunPsTest(test.TestCase):
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
     mock_est.config.cluster_spec = None
     mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'gs'
+    mock_est.config.task_type = 'ps'
     mock_est.config.task_id = 2
 
     with self.assertRaisesRegexp(RuntimeError,
@@ -1260,9 +1306,9 @@ class TrainingExecutorRunPsTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'gs': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
     mock_est.config.master = ''
-    mock_est.config.task_type = 'gs'
+    mock_est.config.task_type = 'ps'
     mock_est.config.task_id = 2
 
     with self.assertRaisesRegexp(RuntimeError,
@@ -1276,7 +1322,7 @@ class TrainingExecutorRunPsTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'gs': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
     mock_est.config.master = 'grpc://...'
     mock_est.config.task_type = ''
     mock_est.config.task_id = 2
@@ -1292,9 +1338,9 @@ class TrainingExecutorRunPsTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'gs': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
     mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'gs'
+    mock_est.config.task_type = 'ps'
     mock_est.config.task_id = None
 
     with self.assertRaisesRegexp(RuntimeError,
-- 
GitLab


From d1906697d04092fedea4489f661380db52c37bab Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Tue, 5 Dec 2017 16:03:49 -0800
Subject: [PATCH 0655/1225] Add SaveRestoreMeasuringCostEstimator to measure
 the memory and runtime cost of a grappler item's save/restore subgraph.

PiperOrigin-RevId: 178025696
---
 tensorflow/core/grappler/clusters/cluster.cc | 4 ++++
 tensorflow/core/grappler/clusters/cluster.h  | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/tensorflow/core/grappler/clusters/cluster.cc b/tensorflow/core/grappler/clusters/cluster.cc
index e2db47b758..01a618ed77 100644
--- a/tensorflow/core/grappler/clusters/cluster.cc
+++ b/tensorflow/core/grappler/clusters/cluster.cc
@@ -35,6 +35,10 @@ void Cluster::SetNumWarmupSteps(int num_steps) {
       num_steps);
 }
 
+int Cluster::NumWarmupSteps() const {
+  return options_.config.graph_options().build_cost_model_after();
+}
+
 void Cluster::DisableDetailedStats(bool disable) {
   if (disable) {
     options_.config.mutable_graph_options()->set_build_cost_model(0);
diff --git a/tensorflow/core/grappler/clusters/cluster.h b/tensorflow/core/grappler/clusters/cluster.h
index 616ab6ffdc..d7af50f7dc 100644
--- a/tensorflow/core/grappler/clusters/cluster.h
+++ b/tensorflow/core/grappler/clusters/cluster.h
@@ -64,6 +64,9 @@ class Cluster {
   // before Provision().
   void SetNumWarmupSteps(int num_steps);
 
+  // Returns the number of warmup steps.
+  int NumWarmupSteps() const;
+
   // Disable the collection of detailed statistics. Must be called
   // before Provision().
   void DisableDetailedStats(bool disable);
-- 
GitLab


From 41c19afe0ce1ba17685a0ebdf2bb20eded4924a6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 16:07:17 -0800
Subject: [PATCH 0656/1225] Implement faster and less memory hungry version of
 topological sort that is idempotent.

PiperOrigin-RevId: 178026253
---
 tensorflow/core/grappler/utils.cc             | 98 +++++++++++++++++++
 tensorflow/core/grappler/utils.h              | 38 +++++++
 .../core/grappler/utils/topological_sort.cc   | 52 +++++-----
 .../grappler/utils/topological_sort_test.cc   | 21 ++++
 4 files changed, 179 insertions(+), 30 deletions(-)

diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 07cf2cfc05..afcb465d27 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include <memory>
+#include <vector>
 
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
@@ -317,5 +318,102 @@ NodeDef* GetTailOfChain(const NodeDef& source, const NodeMap& node_map,
   return const_cast<NodeDef*>(current);
 }
 
+// Every permutation is a product of one or more cycles. Iterate over the cycles
+// in the permutation, and convert each of those into a product of
+// transpositions (swaps): https://en.wikipedia.org/wiki/Cyclic_permutation
+void PermuteNodesInPlace(GraphDef* graph, std::vector<int>* permutation,
+                         bool invert_permutation) {
+  CHECK_EQ(graph->node_size(), permutation->size());
+  std::vector<int> inv_perm(permutation->size(), 0);
+  if (invert_permutation) {
+    for (size_t n = 0; n < permutation->size(); ++n) {
+      inv_perm[(*permutation)[n]] = n;
+    }
+    permutation->swap(inv_perm);
+  }
+  for (std::size_t n = 0; n + 1 < permutation->size(); ++n) {
+    while (n != (*permutation)[n]) {
+      std::size_t r = (*permutation)[n];
+      graph->mutable_node()->SwapElements(n, r);
+      std::swap((*permutation)[n], (*permutation)[r]);
+    }
+  }
+}
+
+namespace {
+template <typename T>
+inline void STLSortAndRemoveDuplicates(T* v) {
+  std::sort(v->begin(), v->end());
+  v->erase(std::unique(v->begin(), v->end()), v->end());
+}
+}  // namespace
+
+Status SimpleGraphView::Initialize(const GraphDef& graph, bool dedup_inputs,
+                                   bool dedup_outputs) {
+  const int num_nodes = graph.node_size();
+  inputs_.clear();
+  inputs_.resize(num_nodes);
+  outputs_.clear();
+  outputs_.resize(num_nodes);
+  name_to_index_.clear();
+  name_to_index_.reserve(num_nodes);
+  index_to_name_.clear();
+  index_to_name_.reserve(num_nodes);
+
+  // Build map from name to index and vice versa.
+  for (int node_idx = 0; node_idx < num_nodes; ++node_idx) {
+    const NodeDef& node = graph.node(node_idx);
+    name_to_index_.emplace(node.name(), node_idx);
+    index_to_name_.push_back(node.name());
+  }
+
+  // Build forward and reverse adjacency lists.
+  for (int node_idx = 0; node_idx < num_nodes; ++node_idx) {
+    const NodeDef& node = graph.node(node_idx);
+    inputs_[node_idx].reserve(node.input_size());
+    for (const string& input : node.input()) {
+      auto it = name_to_index_.find(NodeName(input));
+      if (it == name_to_index_.end()) {
+        return errors::InvalidArgument("Invalid input name: ", input);
+      }
+      const int input_idx = it->second;
+      inputs_[node_idx].push_back(input_idx);
+      outputs_[input_idx].push_back(node_idx);
+    }
+    if (dedup_inputs) {
+      // Dedup the input list while it's still hot in cache.
+      STLSortAndRemoveDuplicates(&inputs_[node_idx]);
+    }
+  }
+
+  // Dedup outputs.
+  if (dedup_outputs) {
+    for (int node_idx = 0; node_idx < num_nodes; ++node_idx) {
+      STLSortAndRemoveDuplicates(&outputs_[node_idx]);
+    }
+  }
+  return Status::OK();
+}
+
+string SimpleGraphView::PrintToString() const {
+  string str;
+  for (int i = 0; i < num_nodes(); ++i) {
+    strings::StrAppend(&str, "Node ", i, "'", node_name(i), "'\n", "Inputs: [");
+    for (int input : inputs(i)) {
+      strings::StrAppend(&str, input, " '", node_name(input), "', ");
+    }
+    strings::StrAppend(&str, "]\n", "Outputs: [");
+    for (int j = 0; j < outputs(i).size(); ++j) {
+      const int output = outputs(i)[j];
+      if (j > 0) {
+        strings::StrAppend(&str, ", ");
+      }
+      strings::StrAppend(&str, output, " '", node_name(output), "'");
+    }
+    strings::StrAppend(&str, "]\n");
+  }
+  return str;
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index 411e44d487..476ab8b51a 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -175,6 +176,43 @@ NodeDef* GetTailOfChain(const NodeDef& source, const NodeMap& node_map,
                         bool follow_control_input,
                         const std::function<bool(const NodeDef&)>& pred_fn);
 
+// Permute the nodes of graph in place according to the permutation.
+void PermuteNodesInPlace(GraphDef* graph, std::vector<int>* permutation,
+                         bool invert_permutation);
+
+class SimpleGraphView {
+ public:
+  Status Initialize(const GraphDef& graph) {
+    return Initialize(graph, true, true);
+  }
+  Status Initialize(const GraphDef& graph, bool dedup_inputs,
+                    bool dedup_outputs);
+
+  inline int num_nodes() const { return index_to_name_.size(); }
+  inline const int index(const string& node_name) const {
+    const auto& it = name_to_index_.find(node_name);
+    DCHECK(it != name_to_index_.end());
+    return it == name_to_index_.end() ? -1 : it->second;
+  }
+  inline const string& node_name(int node_idx) const {
+    return index_to_name_[node_idx];
+  }
+  inline const gtl::InlinedVector<int, 4>& inputs(int node_idx) const {
+    return inputs_[node_idx];
+  }
+  inline const gtl::InlinedVector<int, 2>& outputs(int node_idx) const {
+    return outputs_[node_idx];
+  }
+
+  string PrintToString() const;
+
+ private:
+  std::vector<string> index_to_name_;
+  std::unordered_map<string, int> name_to_index_;
+  std::vector<gtl::InlinedVector<int, 4>> inputs_;
+  std::vector<gtl::InlinedVector<int, 2>> outputs_;
+};
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/utils/topological_sort.cc b/tensorflow/core/grappler/utils/topological_sort.cc
index d87f43a498..8d8ff4da3a 100644
--- a/tensorflow/core/grappler/utils/topological_sort.cc
+++ b/tensorflow/core/grappler/utils/topological_sort.cc
@@ -27,55 +27,47 @@ namespace grappler {
 // Kahn's algorithm is implemented.
 // For details, see https://en.wikipedia.org/wiki/Topological_sorting
 Status TopologicalSort(GraphDef* graph) {
-  OutputMap output_map(graph);
-  std::vector<NodeDef*> ready_nodes;
-  ready_nodes.reserve(graph->node_size());
+  SimpleGraphView graph_view;
+  TF_RETURN_IF_ERROR(graph_view.Initialize(*graph));
+
+  std::vector<int> ready_nodes;
+  ready_nodes.reserve(graph_view.num_nodes());
+
   int front = 0;
   int back = 0;
-  std::unordered_map<const NodeDef*, int> ready_inputs;
-  for (int i = 0; i < graph->node_size(); i++) {
-    auto node = graph->mutable_node(i);
-    if (node->input_size() == 0) {
-      ready_nodes.push_back(node);
+  std::vector<int> num_ready_inputs(graph_view.num_nodes(), 0);
+  for (int i = 0; i < graph_view.num_nodes(); i++) {
+    if (graph_view.inputs(i).empty()) {
+      ready_nodes.push_back(i);
       back++;
     }
-    if (IsMerge(*node)) {
-      ready_inputs[node] = 0;
-      for (const auto& input : node->input()) {
-        if (IsNextIteration(*output_map.GetNode(input))) {
-          ready_inputs[node]++;
+    if (IsMerge(graph->node(i))) {
+      for (int input : graph_view.inputs(i)) {
+        if (IsNextIteration(graph->node(input))) {
+          num_ready_inputs[i]++;
         }
       }
-    } else {
-      ready_inputs[node] = 0;
     }
   }
 
   while (front != back) {
-    auto ready_node = ready_nodes[front];
-    for (const auto& fanout_pair : output_map.GetOutputs(ready_node->name())) {
-      auto fanout = fanout_pair.first;
-      ready_inputs[fanout] += fanout_pair.second;
-      if (ready_inputs[fanout] == fanout->input_size()) {
+    int ready_node = ready_nodes[front];
+    for (int fanout : graph_view.outputs(ready_node)) {
+      ++num_ready_inputs[fanout];
+      if (num_ready_inputs[fanout] == graph_view.inputs(fanout).size()) {
         ready_nodes.push_back(fanout);
-        back++;
+        ++back;
       }
     }
-    front++;
+    ++front;
   }
 
-  if (back != graph->node_size()) {
+  if (back != graph_view.num_nodes()) {
     return errors::InvalidArgument(
         "The graph couldn't be sorted in topological order.");
   }
 
-  GraphDef new_graph;
-  new_graph.mutable_node()->Reserve(graph->node_size());
-  for (int i = 0; i < graph->node_size(); i++) {
-    auto new_node = new_graph.add_node();
-    new_node->Swap(ready_nodes[i]);
-  }
-  graph->mutable_node()->Swap(new_graph.mutable_node());
+  PermuteNodesInPlace(graph, &ready_nodes, /*invert_permutation=*/true);
   return Status::OK();
 }
 
diff --git a/tensorflow/core/grappler/utils/topological_sort_test.cc b/tensorflow/core/grappler/utils/topological_sort_test.cc
index ba0fe0155a..c96f15b0e8 100644
--- a/tensorflow/core/grappler/utils/topological_sort_test.cc
+++ b/tensorflow/core/grappler/utils/topological_sort_test.cc
@@ -102,6 +102,27 @@ TEST_F(TopologicalSortTest, DuplicatedInputs) {
   }
 }
 
+TEST_F(TopologicalSortTest, Idempotent) {
+  GraphDef graph;
+  *graph.add_node() = CreateNode("1", {});
+  *graph.add_node() = CreateNode("2", {});
+  *graph.add_node() = CreateNode("3", {"1", "2"});
+  *graph.add_node() = CreateNode("4", {"1", "3"});
+  *graph.add_node() = CreateNode("5", {"2", "3"});
+
+  TF_EXPECT_OK(TopologicalSort(&graph));
+  std::vector<string> order = {"1", "2", "3", "4", "5"};
+  for (int i = 0; i < order.size(); i++) {
+    EXPECT_EQ(graph.node(i).name(), order[i]);
+  }
+
+  // Run topo sort again to verify that it is idenpotent.
+  TF_EXPECT_OK(TopologicalSort(&graph));
+  for (int i = 0; i < order.size(); i++) {
+    EXPECT_EQ(graph.node(i).name(), order[i]);
+  }
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From cc7482fd0a0f3a370880a0108f2c980bb808b277 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 16:13:30 -0800
Subject: [PATCH 0657/1225] Change InputArray.shape from being a repeated int
 field to being an optional embedded message itself containing a repeated int
 field (now called 'dims'). This matches existing shape structurs (both in
 Toco internally, and in TensorFlow) and is necessary in order to disambiguate
 between a 0-dimensional shape and an undefined/unknown shape. This is a
 necessary prerequisite, in particular, for allowing toco to operate without
 given fixed input shapes, as so far these were impossible to disambiguate
 from fixed 0-dimensional shapes.

PiperOrigin-RevId: 178027064
---
 tensorflow/contrib/lite/python/lite.py        |  2 +-
 .../lite/toco/allocate_transient_arrays.cc    |  3 +-
 .../contrib/lite/toco/model_cmdline_flags.cc  | 29 +++++----------
 .../contrib/lite/toco/model_flags.proto       |  8 +++--
 .../lite/toco/python/toco_from_protos_test.py |  2 +-
 tensorflow/contrib/lite/toco/tooling_util.cc  | 36 +++++++++----------
 6 files changed, 36 insertions(+), 44 deletions(-)

diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py
index 982ea90f2b..95309478a6 100644
--- a/tensorflow/contrib/lite/python/lite.py
+++ b/tensorflow/contrib/lite/python/lite.py
@@ -187,7 +187,7 @@ def toco_convert(input_data,
       input_array.mean, input_array.std = quantized_input_stats[idx]
 
     input_array.name = _tensor_name(input_tensor)
-    input_array.shape.extend(map(int, input_tensor.get_shape()))
+    input_array.shape.dims.extend(map(int, input_tensor.get_shape()))
     toco.inference_input_type = tflite_input_type
 
   for output_tensor in output_tensors:
diff --git a/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc
index 2f4454d7c8..62e7282d16 100644
--- a/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc
+++ b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc
@@ -218,7 +218,8 @@ void AllocateTransientArrays(Model* model,
   // just guard this assumption with a CHECK:
   bool batchless_input_shapes = true;
   for (const auto& input_array : model->flags.input_arrays()) {
-    if (input_array.shape().empty() || input_array.shape(0) != 1) {
+    if (!input_array.has_shape() || input_array.shape().dims().empty() ||
+        input_array.shape().dims(0) != 1) {
       batchless_input_shapes = false;
       break;
     }
diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
index dde602e186..54ed95650e 100644
--- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
@@ -265,10 +265,10 @@ void ReadModelFlagsFromCommandLineFlags(
       model_flags->add_input_arrays();
     }
     auto* shape = model_flags->mutable_input_arrays(0)->mutable_shape();
-    shape->Clear();
+    shape->clear_dims();
     const IntList& list = parsed_model_flags.input_shape.value();
     for (auto& dim : list.elements) {
-      shape->Add(dim);
+      shape->add_dims(dim);
     }
   }
   if (parsed_model_flags.input_shapes.specified()) {
@@ -278,25 +278,12 @@ void ReadModelFlagsFromCommandLineFlags(
     QCHECK(input_shapes.size() == model_flags->input_arrays_size());
     for (int i = 0; i < input_shapes.size(); ++i) {
       auto* shape = model_flags->mutable_input_arrays(i)->mutable_shape();
-      shape->Clear();
-      if (input_shapes[i].empty()) {
-        // empty i.e. 0-dimensional input shape.
-        // Unfortunately, the current toco::InputArray
-        // proto does not allow to distinguish between a known 0-D shape,
-        // and an unknown shape. Indeed, shape is currently a plain array,
-        // and it being empty means unknown shape. So here, we import a
-        // 0-D shape as a 1-D shape of size.
-        // TODO(benoitjacob): fix toco::InputArray to allow 0-D shape,
-        // probably by making shape an optional message,
-        // encapsulating the array.
-        shape->Add(1);
-      } else {
-        for (const auto& dim_str : absl::StrSplit(input_shapes[i], ',')) {
-          int size;
-          CHECK(absl::SimpleAtoi(dim_str, &size))
-              << "Failed to parse input_shape: " << input_shapes[i];
-          shape->Add(size);
-        }
+      shape->clear_dims();
+      for (const auto& dim_str : absl::StrSplit(input_shapes[i], ',')) {
+        int size;
+        CHECK(absl::SimpleAtoi(dim_str, &size))
+            << "Failed to parse input_shape: " << input_shapes[i];
+        shape->add_dims(size);
       }
     }
   }
diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto
index 5b30904696..d818a3632d 100644
--- a/tensorflow/contrib/lite/toco/model_flags.proto
+++ b/tensorflow/contrib/lite/toco/model_flags.proto
@@ -16,7 +16,11 @@ import "tensorflow/contrib/lite/toco/types.proto";
 
 package toco;
 
-// Next ID to USE: 6.
+message InputArrayShape {
+  repeated int32 dims = 2;
+}
+
+// Next ID to USE: 7.
 message InputArray {
   // Name of the input arrays, i.e. the arrays from which input activations
   // will be read.
@@ -28,7 +32,7 @@ message InputArray {
   //
   // The last dimension is typically called 'depth' or 'channels'. For example,
   // for an image model taking RGB images as input, this would have the value 3.
-  repeated int32 shape = 2;
+  optional InputArrayShape shape = 6;
 
   // mean_value and std_value parameters control the interpretation of raw input
   // activation values (elements of the input array) as real numbers. The
diff --git a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py
index 28d52067a9..c35b6f9925 100644
--- a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py
+++ b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py
@@ -53,7 +53,7 @@ class TocoFromProtosTest(googletest.TestCase):
     model_flags = model_flags_pb2.ModelFlags()
     input_array = model_flags.input_arrays.add()
     input_array.name = TensorName(in_tensor)
-    input_array.shape.extend(map(int, in_tensor.get_shape()))
+    input_array.shape.dims.extend(map(int, in_tensor.get_shape()))
     model_flags.output_arrays.append(TensorName(out_tensor))
     # Shell out to run toco (in case it crashes)
     with tempfile.NamedTemporaryFile() as fp_toco, \
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 3f289817e0..637287a947 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -914,9 +914,9 @@ void CreateOrCheckRnnStateArray(const string& name, int size, Model* model) {
     // Pick 'num_dims' and 'batch' from the first input_arrays, unless we find
     // a better match by name.
     if (input_array.name() == name || num_dims == -1) {
-      num_dims = input_array.shape_size();
-      if (num_dims != 0) {
-        batch = input_array.shape(0);
+      num_dims = input_array.shape().dims_size();
+      if (num_dims > 0) {
+        batch = input_array.shape().dims(0);
       }
     }
   }
@@ -985,29 +985,29 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
     RESOLVE_MODEL_FLAG(mean_value);
 #undef RESOLVE_MODEL_FLAG
 
-    if (!specified_input_array.shape().empty()) {
-      if (!dst_input_array->shape().empty()) {
-        QCHECK_EQ(specified_input_array.shape().size(),
-                  dst_input_array->shape().size())
+    if (specified_input_array.has_shape()) {
+      if (dst_input_array->has_shape()) {
+        QCHECK_EQ(specified_input_array.shape().dims_size(),
+                  dst_input_array->shape().dims_size())
             << "For input array '" << specified_input_array.name() << "', "
             << "size of specified input shape flag with size: "
-            << specified_input_array.shape().size()
+            << specified_input_array.shape().dims_size()
             << " does not agree with already defined input shape"
                " of this model, with size: "
-            << dst_input_array->shape().size();
+            << dst_input_array->shape().dims_size();
         // We treat the first dimension as a special case, since it is often
         // a batch size and the input_shape flag is effectively overriding
         // the model.
-        for (int i = 1; i < specified_input_array.shape().size(); i++) {
-          QCHECK_EQ(specified_input_array.shape().Get(i),
-                    dst_input_array->shape().Get(i))
+        for (int i = 1; i < specified_input_array.shape().dims_size(); i++) {
+          QCHECK_EQ(specified_input_array.shape().dims(i),
+                    dst_input_array->shape().dims(i))
               << "At dimension number " << i << " of input array "
               << specified_input_array.name() << ", the specified shape's "
               << "dimension flag with dimension: "
-              << specified_input_array.shape().Get(i)
+              << specified_input_array.shape().dims(i)
               << " does not agree with already defined shape"
               << " of this model, with dimension: "
-              << dst_input_array->shape().Get(i);
+              << dst_input_array->shape().dims(i);
         }
       } else {
         dst_input_array->mutable_shape()->CopyFrom(
@@ -1089,7 +1089,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
     }
 
     if (!input_array.has_shape()) {
-      QCHECK(!input_array_proto.shape().empty())
+      QCHECK(!input_array_proto.shape().dims().empty())
           << "This model does not have shape defined for input array "
           << input_array_proto.name();
     }
@@ -1098,14 +1098,14 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
     // the actual input array's shape.
     auto& input_array_dims = *input_array.mutable_shape()->mutable_dims();
     if (input_array_dims.empty()) {
-      for (auto dim : input_array_proto.shape()) {
+      for (auto dim : input_array_proto.shape().dims()) {
         CHECK_GE(dim, 1);
         input_array_dims.push_back(dim);
       }
     } else {
-      CHECK_EQ(input_array_dims.size(), input_array_proto.shape_size());
+      CHECK_EQ(input_array_dims.size(), input_array_proto.shape().dims_size());
       for (int i = 0; i < input_array_dims.size(); i++) {
-        CHECK_EQ(input_array_dims[i], input_array_proto.shape(i));
+        CHECK_EQ(input_array_dims[i], input_array_proto.shape().dims(i));
       }
     }
 
-- 
GitLab


From 1e782498561e176749f4f43975b2236ff1505a11 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 16:56:17 -0800
Subject: [PATCH 0658/1225] Improve module docstrings, which show up in Google
 search.

PiperOrigin-RevId: 178032838
---
 tensorflow/contrib/gan/__init__.py                  | 8 +++++++-
 tensorflow/contrib/gan/python/estimator/__init__.py | 6 +++++-
 tensorflow/contrib/gan/python/eval/__init__.py      | 6 +++++-
 tensorflow/contrib/gan/python/features/__init__.py  | 6 +++++-
 tensorflow/contrib/gan/python/losses/__init__.py    | 5 ++++-
 5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/gan/__init__.py b/tensorflow/contrib/gan/__init__.py
index dff361fdc4..f1946c7f92 100644
--- a/tensorflow/contrib/gan/__init__.py
+++ b/tensorflow/contrib/gan/__init__.py
@@ -12,7 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TFGAN grouped API. Please see README.md for details and usage."""
+"""TFGAN is a lightweight library for training and evaluating GANs.
+
+In addition to providing the infrastructure for easily training and evaluating
+GANS, this library contains modules for a TFGAN-backed Estimator,
+evaluation metrics, features (such as virtual batch normalization), and losses.
+Please see README.md for details and usage.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/gan/python/estimator/__init__.py b/tensorflow/contrib/gan/python/estimator/__init__.py
index 8c4a182280..c9f7bc61b2 100644
--- a/tensorflow/contrib/gan/python/estimator/__init__.py
+++ b/tensorflow/contrib/gan/python/estimator/__init__.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TFGAN grouped API. Please see README.md for details and usage."""
+"""TFGAN estimator module.
+
+GANEstimator provides all the infrastructure support of a TensorFlow Estimator
+with the feature support of TFGAN.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/gan/python/eval/__init__.py b/tensorflow/contrib/gan/python/eval/__init__.py
index bb80461878..7daf78bc5d 100644
--- a/tensorflow/contrib/gan/python/eval/__init__.py
+++ b/tensorflow/contrib/gan/python/eval/__init__.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TFGAN grouped API. Please see README.md for details and usage."""
+"""TFGAN evaluation module.
+
+This module supports techniques such as Inception Score, Frechet Inception
+distance, and Sliced Wasserstein distance.
+"""
 # pylint: disable=,wildcard-import,unused-import
 
 from __future__ import absolute_import
diff --git a/tensorflow/contrib/gan/python/features/__init__.py b/tensorflow/contrib/gan/python/features/__init__.py
index 50d0bfa17c..4816daf760 100644
--- a/tensorflow/contrib/gan/python/features/__init__.py
+++ b/tensorflow/contrib/gan/python/features/__init__.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TFGAN grouped API. Please see README.md for details and usage."""
+"""TFGAN features module.
+
+This module includes support for virtual batch normalization, buffer replay,
+conditioning, etc.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/gan/python/losses/__init__.py b/tensorflow/contrib/gan/python/losses/__init__.py
index 290ff867a1..d9bf8ebfdf 100644
--- a/tensorflow/contrib/gan/python/losses/__init__.py
+++ b/tensorflow/contrib/gan/python/losses/__init__.py
@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TFGAN grouped API. Please see README.md for details and usage."""
+"""TFGAN losses and penalties.
+
+Losses can be used with individual arguments or with GANModel tuples.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
-- 
GitLab


From 44cb3887b39d2c5edbf5f5a638cef08be8bc6aeb Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Tue, 5 Dec 2017 16:57:48 -0800
Subject: [PATCH 0659/1225] Support a vector and a 4D tensor as inputs to a
 binary op.

PiperOrigin-RevId: 178033021
---
 .../grappler/optimizers/layout_optimizer.cc   | 34 +++++++++----------
 .../optimizers/layout_optimizer_test.cc       | 14 +++++---
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 96144abbe8..eac3b6b0f1 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -828,19 +828,15 @@ class AddNProcessor : public AgnosticNodeProcessor {
 class BinaryOpProcessor : public AgnosticNodeProcessor {
  public:
   explicit BinaryOpProcessor(const OptimizeContext& opt_cxt)
-      : AgnosticNodeProcessor(opt_cxt) {
-    is_4d_with_vector_ = IsNDOperateWithMD(4, 1);
-  }
+      : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   bool ShouldProcess() const override {
-    // TODO(yaozhang): Support IsNDOperateWithMD(1, 4): first input is a vector
-    // and the second input is a 4D tensor; and update CustomizedProcessing()
-    // accordingly.
     return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() &&
            (IsNDOperateWithMD(4, 0) || IsNDOperateWithMD(4, 1) ||
-            IsNDOperateWithMD(4, 4) || IsNDOperateWithMD(0, 4)) &&
+            IsNDOperateWithMD(4, 4) || IsNDOperateWithMD(0, 4) ||
+            IsNDOperateWithMD(1, 4)) &&
            IsOnGPU();
   }
 
@@ -915,31 +911,35 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
   }
 
   Status CustomizedProcessing() override {
-    if (is_4d_with_vector_) {
-      string base_name = strings::StrCat(node_->name(), "-", node_->input(1));
+    int vector_index = -1;
+    if (IsNDOperateWithMD(4, 1)) {
+      vector_index = 1;
+    } else if (IsNDOperateWithMD(1, 4)) {
+      vector_index = 0;
+    }
+    if (vector_index != -1) {
+      string base_name =
+          strings::StrCat(node_->name(), "-", node_->input(vector_index));
       string reshape_node_name =
           AddPrefixToNodeName(base_name, kReshapeNHWCToNCHW, "-");
       string shape_const_node_name =
           AddPrefixToNodeName(base_name, kReshapeConst, "-");
-      auto input_node = node_map_->GetNode(node_->input(1));
+      auto input_node = node_map_->GetNode(node_->input(vector_index));
       TF_RETURN_IF_ERROR(HasAttribute(*input_node, "_output_shapes"));
       int vector_size =
           input_node->attr().at("_output_shapes").list().shape(0).dim(0).size();
       AddNodeShapeConst(shape_const_node_name, vector_size);
       TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
-      AddNodeReshape(reshape_node_name, node_->input(1), shape_const_node_name,
-                     node_->attr().at("T").type());
+      AddNodeReshape(reshape_node_name, node_->input(vector_index),
+                     shape_const_node_name, node_->attr().at("T").type());
       node_map_->AddOutput(shape_const_node_name, reshape_node_name);
-      node_map_->UpdateOutput(node_->input(1), node_->name(),
+      node_map_->UpdateOutput(node_->input(vector_index), node_->name(),
                               reshape_node_name);
       node_map_->AddOutput(reshape_node_name, node_->name());
-      *node_->mutable_input(1) = reshape_node_name;
+      *node_->mutable_input(vector_index) = reshape_node_name;
     }
     return Status::OK();
   }
-
- private:
-  bool is_4d_with_vector_;
 };
 
 class ConcatProcessor : public AgnosticNodeProcessor {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 5ad2e25392..4d932a0932 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -755,11 +755,15 @@ TEST_F(LayoutOptimizerTest, MulVectorAnd4D) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto mul_node = node_map.GetNode("mul");
-  // TODO(yaozhang): Support vector as the first input and 4d tensor as the
-  // second input for BinaryOpProcessor.
-  EXPECT_EQ(mul_node->input(0), "vector");
-  EXPECT_EQ(mul_node->input(1),
-            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-mul-1");
+  EXPECT_EQ(mul_node->input(0), "LayoutOptimizerReshapeNHWCToNCHW-mul-vector");
+  EXPECT_EQ(mul_node->input(1), "Conv2D");
+  auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-vector");
+  Tensor tensor;
+  EXPECT_TRUE(
+      tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor tensor_expected(DT_INT32, {4});
+  test::FillValues<int>(&tensor_expected, {1, 2, 1, 1});
+  test::ExpectTensorEqual<int>(tensor_expected, tensor);
 }
 
 TEST_F(LayoutOptimizerTest, SliceConst) {
-- 
GitLab


From af8a5507937108a41781ba117fa16edd3b1091b5 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 5 Dec 2017 16:58:24 -0800
Subject: [PATCH 0660/1225] Always include the function library when exporting
 a MetaGraphDef.

Previously, some code paths through `tf.train.export_meta_graph()` did
not ensure that the function library was persisted in the resulting
`MetaGraphDef`. This would break serialization for meta-graphs that
included `tf.data` pipelines that used functions. This fix ensures
that the library is copied to all such meta-graphs.

Fixes #15019. Fixes #14143.

PiperOrigin-RevId: 178033103
---
 tensorflow/python/BUILD                   |  1 +
 tensorflow/python/framework/meta_graph.py |  4 ++++
 tensorflow/python/framework/ops.py        | 26 +++++++++++++----------
 tensorflow/python/training/saver_test.py  | 26 +++++++++++++++++++++++
 4 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index ed44e9e332..05fd81c8d3 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3616,6 +3616,7 @@ cuda_py_test(
         "//third_party/py/numpy",
         "@six_archive//:six",
         "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
 
diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py
index 44ddc013b2..c839d7a9a6 100644
--- a/tensorflow/python/framework/meta_graph.py
+++ b/tensorflow/python/framework/meta_graph.py
@@ -773,6 +773,7 @@ def export_scoped_meta_graph(filename=None,
     if graph_def:
       new_graph_def = graph_pb2.GraphDef()
       new_graph_def.versions.CopyFrom(graph_def.versions)
+      new_graph_def.library.CopyFrom(graph_def.library)
 
       if clear_extraneous_savers:
         exclude_nodes = _find_extraneous_saver_nodes(graph_def, saver_def)
@@ -810,6 +811,9 @@ def export_scoped_meta_graph(filename=None,
           bytesize += value.node_def.ByteSize()
           if bytesize >= (1 << 31) or bytesize < 0:
             raise ValueError("GraphDef cannot be larger than 2GB.")
+
+      graph._copy_functions_to_graph_def(graph_def, bytesize)  # pylint: disable=protected-access
+
     # It's possible that not all the inputs are in the export_scope.
     # If we would like such information included in the exported meta_graph,
     # add them to a special unbound_inputs collection.
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 61dd435106..65f7e97ab1 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -2888,6 +2888,20 @@ class Graph(object):
     """
     self._control_flow_context = ctx
 
+  def _copy_functions_to_graph_def(self, graph_def, starting_bytesize):
+    """If this graph contains functions, copy them to `graph_def`."""
+    bytesize = starting_bytesize
+    for f in self._functions.values():
+      bytesize += f.definition.ByteSize()
+      if bytesize >= (1 << 31) or bytesize < 0:
+        raise ValueError("GraphDef cannot be larger than 2GB.")
+      graph_def.library.function.extend([f.definition])
+      if f.grad_func_name:
+        grad_def = function_pb2.GradientDef()
+        grad_def.function_name = f.name
+        grad_def.gradient_func = f.grad_func_name
+        graph_def.library.gradient.extend([grad_def])
+
   def _as_graph_def(self, from_version=None, add_shapes=False):
     # pylint: disable=line-too-long
     """Returns a serialized `GraphDef` representation of this graph.
@@ -2931,17 +2945,7 @@ class Graph(object):
           bytesize += op.node_def.ByteSize()
           if bytesize >= (1 << 31) or bytesize < 0:
             raise ValueError("GraphDef cannot be larger than 2GB.")
-      if self._functions:
-        for f in self._functions.values():
-          bytesize += f.definition.ByteSize()
-          if bytesize >= (1 << 31) or bytesize < 0:
-            raise ValueError("GraphDef cannot be larger than 2GB.")
-          graph.library.function.extend([f.definition])
-          if f.grad_func_name:
-            grad_def = function_pb2.GradientDef()
-            grad_def.function_name = f.name
-            grad_def.gradient_func = f.grad_func_name
-            graph.library.gradient.extend([grad_def])
+      self._copy_functions_to_graph_def(graph, bytesize)
       return graph, self._version
 
   def as_graph_def(self, from_version=None, add_shapes=False):
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 74ee1e5fa8..ffe933bb0f 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -38,6 +38,7 @@ from tensorflow.core.protobuf import queue_runner_pb2
 from tensorflow.core.protobuf import saver_pb2
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.client import session
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -2137,6 +2138,31 @@ class MetaGraphTest(test.TestCase):
               10, size=[1, 10])
       })
 
+  def testPreserveDatasetAndFunctions(self):
+    with ops_lib.Graph().as_default() as g:
+      dataset = dataset_ops.Dataset.range(10).map(lambda x: x * x)
+      iterator = dataset.make_one_shot_iterator()
+      next_element = iterator.get_next()
+      _ = array_ops.identity(next_element, name="output")
+
+      # Generate three MetaGraphDef protos using different code paths.
+      meta_graph_def_simple = saver_module.export_meta_graph()
+      meta_graph_def_devices_cleared = saver_module.export_meta_graph(
+          clear_devices=True)
+      meta_graph_def_from_graph_def = saver_module.export_meta_graph(
+          clear_devices=True, graph_def=g.as_graph_def())
+
+    for meta_graph_def in [meta_graph_def_simple,
+                           meta_graph_def_devices_cleared,
+                           meta_graph_def_from_graph_def]:
+      with session.Session(graph=ops_lib.Graph()) as sess:
+        saver_module.import_meta_graph(meta_graph_def, import_scope="new_model")
+        sess.run(variables.global_variables_initializer())
+        for i in range(10):
+          self.assertEqual(i * i, sess.run("new_model/output:0"))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run("new_model/output:0")
+
 
 class CheckpointReaderTest(test.TestCase):
 
-- 
GitLab


From fb857dcef928700b9d75c80b533299957a1581fb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 17:01:07 -0800
Subject: [PATCH 0661/1225] Adds an optional dict to hold tensors that are
 concatenated into the returned tensor

Notes: for learning tasks built on sparse signals, most of the tensors that go
into the returned tensors are embedding, which are potentially useful for
applications that consume embeddings from other models. This makes it easy for
the caller to retrieve these tensors and make their customized signatures.
PiperOrigin-RevId: 178033410
---
 .../python/layers/feature_column_ops.py       | 15 ++++++++---
 .../python/layers/feature_column_ops_test.py  | 25 +++++++++++++++++++
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops.py b/tensorflow/contrib/layers/python/layers/feature_column_ops.py
index fa0047f05d..78affea44c 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops.py
@@ -97,10 +97,13 @@ def _input_from_feature_columns(columns_to_tensors,
                                 trainable,
                                 scope,
                                 output_rank,
-                                default_name):
+                                default_name,
+                                cols_to_outs=None):
   """Implementation of `input_from(_sequence)_feature_columns`."""
   columns_to_tensors = columns_to_tensors.copy()
   check_feature_columns(feature_columns)
+  if cols_to_outs is not None and not isinstance(cols_to_outs, dict):
+    raise ValueError('cols_to_outs must be a dict unless None')
   with variable_scope.variable_scope(scope,
                                      default_name=default_name,
                                      values=columns_to_tensors.values()):
@@ -144,6 +147,8 @@ def _input_from_feature_columns(columns_to_tensors,
           except ValueError as e:
             raise ValueError('Error creating input layer for column: {}.\n'
                              '{}, {}'.format(column.name, e, ee))
+        if cols_to_outs is not None:
+          cols_to_outs[column] = output_tensors[-1]
     return array_ops.concat(output_tensors, output_rank - 1)
 
 
@@ -151,7 +156,8 @@ def input_from_feature_columns(columns_to_tensors,
                                feature_columns,
                                weight_collections=None,
                                trainable=True,
-                               scope=None):
+                               scope=None,
+                               cols_to_outs=None):
   """A tf.contrib.layers style input layer builder based on FeatureColumns.
 
   Generally a single example in training data is described with feature columns.
@@ -196,6 +202,8 @@ def input_from_feature_columns(columns_to_tensors,
     trainable: If `True` also add variables to the graph collection
       `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
     scope: Optional scope for variable_scope.
+    cols_to_outs: Optional dict from feature column to output tensor,
+      which is concatenated into the returned tensor.
 
   Returns:
     A Tensor which can be consumed by hidden layers in the neural network.
@@ -209,7 +217,8 @@ def input_from_feature_columns(columns_to_tensors,
                                      trainable,
                                      scope,
                                      output_rank=2,
-                                     default_name='input_from_feature_columns')
+                                     default_name='input_from_feature_columns',
+                                     cols_to_outs=cols_to_outs)
 
 
 @experimental
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
index fbfa0e32de..e6bbd86ab7 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
@@ -607,6 +607,31 @@ class CreateInputLayersForDNNsTest(test.TestCase):
       # Verify cross compatibility: Core builder output should equal to contrib.
       self.assertAllEqual(output.eval().shape, output_core.eval().shape)
 
+  def testAllDNNColumnsWithColumnwiseOutputs(self):
+    sparse_column = feature_column.sparse_column_with_keys(
+        "ids", ["a", "b", "c", "unseen"])
+    real_valued_column = feature_column.real_valued_column("income", 2)
+    one_hot_column = feature_column.one_hot_column(sparse_column)
+    embedding_column = feature_column.embedding_column(sparse_column, 10)
+    features = {
+        "ids":
+            sparse_tensor.SparseTensor(
+                values=["c", "b", "a"],
+                indices=[[0, 0], [1, 0], [2, 0]],
+                dense_shape=[3, 1]),
+        "income":
+            constant_op.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]]),
+    }
+    columns = [one_hot_column, embedding_column, real_valued_column]
+    cols_to_outs = {}
+    feature_column_ops.input_from_feature_columns(
+        features, columns, cols_to_outs=cols_to_outs)
+    with self.test_session():
+      variables_lib.global_variables_initializer().run()
+      lookup_ops.tables_initializer().run()
+      for column in columns:
+        self.assertTrue(column in cols_to_outs)
+
   def testRealValuedColumn(self):
     real_valued = feature_column.real_valued_column("price")
     features = {"price": constant_op.constant([[20.], [110], [-3]])}
-- 
GitLab


From d9a71ad0e5723e6e97741d60be106a6658cde4e4 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Tue, 5 Dec 2017 17:36:42 -0800
Subject: [PATCH 0662/1225] Fix some build incompatibilities with new versions
 of Bazel

See #15137.

PiperOrigin-RevId: 178037461
---
 tensorflow/tools/test/performance.bzl | 5 +++--
 third_party/jpeg/jpeg.BUILD           | 8 ++++++--
 third_party/sycl/sycl/BUILD.tpl       | 4 ++--
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/tensorflow/tools/test/performance.bzl b/tensorflow/tools/test/performance.bzl
index b5c4bbf5a7..cee53dd5b6 100644
--- a/tensorflow/tools/test/performance.bzl
+++ b/tensorflow/tools/test/performance.bzl
@@ -21,8 +21,9 @@ def tf_cc_logged_benchmark(
     fail(" ".join(("Target must be a single well-defined test, e.g.,",
                    "//path/to:test. Received: %s" % target)))
 
-  all_tags = list(depset(tags) + \
-                  depset(["benchmark-test", "local", "manual", "regression-test"]))
+  all_tags = (
+    depset(tags) + depset(
+      ["benchmark-test", "local", "manual", "regression-test"])).to_list()
 
   tf_py_test(
       name = name,
diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD
index e431f19382..527a08c4b3 100644
--- a/third_party/jpeg/jpeg.BUILD
+++ b/third_party/jpeg/jpeg.BUILD
@@ -323,14 +323,18 @@ JCONFIG_NOWIN_COMMON_SUBSTITUTIONS = {
     "#undef RIGHT_SHIFT_IS_UNSIGNED": "",
 }
 
-JCONFIG_NOWIN_SIMD_SUBSTITUTIONS = JCONFIG_NOWIN_COMMON_SUBSTITUTIONS + {
+JCONFIG_NOWIN_SIMD_SUBSTITUTIONS = {
     "#undef WITH_SIMD": "#define WITH_SIMD 1",
 }
 
-JCONFIG_NOWIN_NOSIMD_SUBSTITUTIONS = JCONFIG_NOWIN_COMMON_SUBSTITUTIONS + {
+JCONFIG_NOWIN_NOSIMD_SUBSTITUTIONS = {
     "#undef WITH_SIMD": "",
 }
 
+JCONFIG_NOWIN_SIMD_SUBSTITUTIONS.update(JCONFIG_NOWIN_COMMON_SUBSTITUTIONS)
+
+JCONFIG_NOWIN_NOSIMD_SUBSTITUTIONS.update(JCONFIG_NOWIN_COMMON_SUBSTITUTIONS)
+
 template_rule(
     name = "jconfig_nowin_nosimd",
     src = "jconfig.h.in",
diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl
index b6ceaadda7..21b1a2bbf7 100755
--- a/third_party/sycl/sycl/BUILD.tpl
+++ b/third_party/sycl/sycl/BUILD.tpl
@@ -1,9 +1,9 @@
 licenses(["notice"])  # Apache 2.0
 
 load("@local_config_sycl//sycl:build_defs.bzl", "if_sycl")
-load("platform", "sycl_library_path")
+load(":platform.bzl", "sycl_library_path")
 
-load("platform", "readlink_command")
+load(":platform.bzl", "readlink_command")
 
 package(default_visibility = ["//visibility:public"])
 
-- 
GitLab


From 3d895eb7f2ee9ea031bf72dc52393344478952d8 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Tue, 5 Dec 2017 17:36:46 -0800
Subject: [PATCH 0663/1225] [StreamExecutor] When a kernel launch fails, print
 the kernel's name.

Previously, we printed everything about the kernel *except* its name.
:)

PiperOrigin-RevId: 178037469
---
 tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index c9d094e3d0..60eaaba21c 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -380,9 +380,9 @@ bool CUDAExecutor::Launch(Stream *stream, const ThreadDim &thread_dims,
                                 thread_dims.z, args.number_of_shared_bytes(),
                                 custream, kernel_params,
                                 nullptr /* = extra */)) {
-    LOG(ERROR) << "failed to launch CUDA kernel with args: "
+    LOG(ERROR) << "failed to launch CUDA kernel " << kernel.name() << " with "
                << args.number_of_arguments()
-               << "; thread dim: " << thread_dims.ToString()
+               << " args; thread dim: " << thread_dims.ToString()
                << "; block dim: " << block_dims.ToString();
     return false;
   }
-- 
GitLab


From cefd2c73cd785c201e9c0cb9890b2bff9310021c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 18:02:29 -0800
Subject: [PATCH 0664/1225] [XLA] Humanize some print-outs.

PiperOrigin-RevId: 178040190
---
 .../compiler/xla/service/hlo_rematerialization.cc   |  8 +++++---
 tensorflow/compiler/xla/service/hlo_scheduling.cc   | 13 +++++++++----
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index d09de7b528..1747790e63 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -1024,7 +1024,9 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
 
       HloInstruction* best = best_item->instruction;
       VLOG(1) << "Rematerializing instruction " << best->name() << " (saving "
-              << memory_tracker.MemoryReducedIfRematerialized(best_item) << ")";
+              << HumanReadableNumBytes(
+                     memory_tracker.MemoryReducedIfRematerialized(best_item))
+              << ")";
       changed = true;
       remat_count++;
 
@@ -1104,8 +1106,8 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
         net_instructions_added++;
       }
 
-      VLOG(3) << "memory_usage after rematerialization = "
-              << memory_tracker.memory_usage();
+      VLOG(1) << "memory_usage after rematerialization = "
+              << HumanReadableNumBytes(memory_tracker.memory_usage());
     }
 
     const CallSite* callsite = call_graph_node.GetCallSite(instruction);
diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc
index 8ccbcaeee4..0dc17392f1 100644
--- a/tensorflow/compiler/xla/service/hlo_scheduling.cc
+++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc
@@ -31,6 +31,8 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
 
+using ::tensorflow::strings::HumanReadableNumBytes;
+
 namespace xla {
 
 StatusOr<int64> MinimumMemoryForSequence(
@@ -375,6 +377,7 @@ StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
   // Note that this is just a heuristic. One obvious inaccuracy is that the
   // memory required for sub-computations might be different when considered
   // within the caller's context. But it's good enough for now.
+  VLOG(2) << "Computation: " << computation.name();
   TF_ASSIGN_OR_RETURN(
       std::vector<const HloInstruction*> list_sequence,
       ListScheduler::Run(computation, points_to_analysis, size_function));
@@ -382,7 +385,7 @@ StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
       const int64 list_memory,
       MinimumMemoryForComputation(computation, list_sequence,
                                   points_to_analysis, size_function));
-  VLOG(2) << "Min-memory list sequence: " << list_memory << " bytes";
+  VLOG(2) << "Min-memory list sequence: " << HumanReadableNumBytes(list_memory);
 
   TF_ASSIGN_OR_RETURN(
       std::vector<const HloInstruction*> dfs_sequence,
@@ -391,13 +394,15 @@ StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
       const int64 dfs_memory,
       MinimumMemoryForComputation(computation, dfs_sequence, points_to_analysis,
                                   size_function));
-  VLOG(2) << "Min-memory dfs sequence: " << dfs_memory << " bytes";
+  VLOG(2) << "Min-memory dfs sequence: " << HumanReadableNumBytes(dfs_memory);
 
   if (list_memory <= dfs_memory) {
-    VLOG(2) << "Chose min-memory list sequence: " << list_memory << " bytes";
+    VLOG(2) << "Chose min-memory list sequence: "
+            << HumanReadableNumBytes(list_memory);
     return list_sequence;
   } else {
-    VLOG(2) << "Chose min-memory dfs sequence: " << dfs_memory << " bytes";
+    VLOG(2) << "Chose min-memory dfs sequence: "
+            << HumanReadableNumBytes(dfs_memory);
     return dfs_sequence;
   }
 }
-- 
GitLab


From 1a786ab335aabe9020cff4f0ab69a5844de70fbc Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Tue, 5 Dec 2017 18:13:48 -0800
Subject: [PATCH 0665/1225] [XLA:GPU] Don't autotune while other kernels are
 running.

XLA:GPU autotunes gemm and conv thunks, trying multiple algorithms in
sequence and picking the fastest one.

If other work is running concurrently with our autotuning, this can mess
up the results.  In particular, even if the GPU is totally
deterministic, the concurrent work may finish before we finish
autotuning, giving an unfair advantage to the later algorithms.

To address this, we modify GpuExecutable to wait until the GPU is
quiescent before executing a thunk which performs autotuning.  We then
cross our fingers and hope that whatever is fastest while the GPU is
quiescent will also be fastest in the "real world", with (potentially)
concurrent work going on.

PiperOrigin-RevId: 178041481
---
 .../xla/service/gpu/convolution_thunk.cc       | 18 ++++++++++--------
 .../xla/service/gpu/convolution_thunk.h        | 16 +++++++++++++---
 .../compiler/xla/service/gpu/gemm_thunk.h      |  9 +++++++++
 .../compiler/xla/service/gpu/gpu_executable.cc |  9 ++++++++-
 tensorflow/compiler/xla/service/gpu/thunk.h    | 13 +++++++++++++
 5 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
index 037eec8ef5..899cc5c83b 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
@@ -314,7 +314,9 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune(
     const ConvolutionDescriptor& convolution_descriptor,
     const BufferAllocations& buffer_allocations, se::Stream* stream) {
   // TODO(b/29126320): Try cudnn v5's new auto-tuner when it's rolled out.
-  if (best_algorithm_.algorithm().is_default()) {
+  if (!best_algorithm_.has_value()) {
+    best_algorithm_.emplace();
+
     // Auto-tuning either is disabled or only happens in the first run of this
     // function.
     VLOG(2) << "Profiling for best convolution algorithm used for "
@@ -363,35 +365,35 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune(
     }
 
     if (best_result.is_valid()) {
-      best_algorithm_.set_algorithm(best_result.algorithm());
+      best_algorithm_->set_algorithm(best_result.algorithm());
     } else {
       LOG(ERROR) << "No convolution algorithm works with profiling. Fall back "
                     "to the default algorithm.";
-      best_algorithm_.set_algorithm(AlgorithmDesc());
+      best_algorithm_->set_algorithm(AlgorithmDesc());
     }
 
     if (best_result_without_scratch.is_valid()) {
-      best_algorithm_.set_algorithm_no_scratch(
+      best_algorithm_->set_algorithm_no_scratch(
           best_result_without_scratch.algorithm());
     } else {
       LOG(ERROR) << "No convolution algorithm without scratch works with "
                     "profiling. Fall back "
                     "to the default algorithm.";
-      best_algorithm_.set_algorithm_no_scratch(AlgorithmDesc());
+      best_algorithm_->set_algorithm_no_scratch(AlgorithmDesc());
     }
   }
 
   {
     VLOG(2) << "Using convolution algorithm ("
-            << AlgorithmToString(best_algorithm_.algorithm()) << ", "
-            << AlgorithmToString(best_algorithm_.algorithm_no_scratch())
+            << AlgorithmToString(best_algorithm_->algorithm()) << ", "
+            << AlgorithmToString(best_algorithm_->algorithm_no_scratch())
             << ") for ConvolutionThunk: " << this;
     ConvolveScratchAllocator scratch_allocator(
         buffer_allocations.device_ordinal(),
         buffer_allocations.memory_allocator());
     return Convolve(input_descriptor, input_data, filter_descriptor,
                     filter_data, output_descriptor, output_data,
-                    convolution_descriptor, best_algorithm_, stream,
+                    convolution_descriptor, *best_algorithm_, stream,
                     &scratch_allocator, nullptr);
   }
 }
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
index 5ac5db2f04..7c25a2e645 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 
 namespace xla {
@@ -87,6 +88,14 @@ class ConvolutionThunk : public Thunk {
       const BufferAllocations& buffer_allocations,
       perftools::gputools::Stream* stream) override;
 
+  // Returns true if the next run of ExecuteOnStream will do autotuning.  If so,
+  // we want the GPU to be quiescent during autotuning, so as not to introduce
+  // noise in our results.
+  bool ShouldHaltAllActivityBeforeRunning(
+      perftools::gputools::Stream*) override {
+    return !best_algorithm_.has_value();
+  }
+
  private:
   tensorflow::Status ConvolveWithTune(
       const perftools::gputools::dnn::BatchDescriptor& input_descriptor,
@@ -121,9 +130,10 @@ class ConvolutionThunk : public Thunk {
 
   // Fastest cuDNN convolution algorithm for this thunk learned from
   // auto-tuning. If auto-tuning is disabled or failed, best_algorithm_ is set
-  // to the default value indicating cuDNN's convolution will choose
-  // the best algorithm from some heuristics based on its parameters.
-  perftools::gputools::dnn::AlgorithmConfig best_algorithm_;
+  // to the default value, indicating cuDNN's convolution will choose the best
+  // algorithm from some heuristics based on its parameters.
+  tensorflow::gtl::optional<perftools::gputools::dnn::AlgorithmConfig>
+      best_algorithm_;
 
   const ConvolutionKind convolution_kind_;
 
diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.h b/tensorflow/compiler/xla/service/gpu/gemm_thunk.h
index 983cb87292..8c6a1f51a8 100644
--- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.h
@@ -52,6 +52,15 @@ class GemmThunk : public Thunk {
       const BufferAllocations& buffer_allocations,
       perftools::gputools::Stream* stream) override;
 
+  // Returns true if we'll perform autotuning if run on the given stream.  If
+  // so, we want the GPU to be quiescent during autotuning, so as not to
+  // introduce noise in our results.
+  bool ShouldHaltAllActivityBeforeRunning(
+      perftools::gputools::Stream* stream) override {
+    return autotune_results_.count(
+               stream->parent()->GetDeviceDescription().name()) != 0;
+  }
+
  private:
   const BufferAllocation::Slice lhs_buffer_;
   const BufferAllocation::Slice rhs_buffer_;
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 0fd85e4fb0..21e9fc96f6 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -167,9 +167,16 @@ Status GpuExecutable::ExecuteThunks(
       stream->ThenWaitFor(FindOrDie(thunk_to_finish_event, dependency).get());
     }
 
+    // If this thunk requests it, wait for all currently-executing thunks to
+    // finish.  This is useful e.g. if the thunk is about to perform autotuning.
+    if (thunk->ShouldHaltAllActivityBeforeRunning(stream)) {
+      main_stream->BlockHostUntilDone();
+    }
+
     profiler.StartOperation();
     VLOG(2) << "Executing the thunk for "
-            << thunk->hlo_instruction()->ToString();
+            << thunk->hlo_instruction()->ToString() << " on stream "
+            << stream_no;
     TF_RETURN_IF_ERROR(thunk->ExecuteOnStream(buffer_allocations, stream));
     if (thunk_schedule_->Depended(thunk)) {
       auto finish_event = MakeUnique<se::Event>(main_stream->parent());
diff --git a/tensorflow/compiler/xla/service/gpu/thunk.h b/tensorflow/compiler/xla/service/gpu/thunk.h
index 0ff27888ad..486ea7d7e1 100644
--- a/tensorflow/compiler/xla/service/gpu/thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/thunk.h
@@ -70,6 +70,19 @@ class Thunk {
     return tensorflow::Status::OK();
   }
 
+  // Users of Thunk should call ShouldHaltAllActivityBeforeRunning(stream)
+  // before calling ExecuteOnStream(stream).  If it returns true, it's the
+  // user's responsibility to wait for all activity on the GPU to finish before
+  // calling ExecuteOnStream.
+  //
+  // This value is not required to be constant for a given Thunk.  For example,
+  // a Thunk that performs autotuning may return true for its first run and
+  // false thereafter.
+  virtual bool ShouldHaltAllActivityBeforeRunning(
+      perftools::gputools::Stream* /*stream*/) {
+    return false;
+  }
+
   // Execute the kernel for the thunk on the given stream. This method must be
   // called after Initialize and can be called multiple times over Thunk's
   // lifetime. Stream argument must be non-null.
-- 
GitLab


From 22767d59b3c6958ed690814ff77e29ee1d458b18 Mon Sep 17 00:00:00 2001
From: Bjarke Hammersholt Roune <broune@google.com>
Date: Tue, 5 Dec 2017 18:33:11 -0800
Subject: [PATCH 0666/1225] Allow CrossReplicaSum to take multiple operands
 internally.

PiperOrigin-RevId: 178043362
---
 .../compiler/xla/service/hlo_cost_analysis.cc |  9 +++++-
 .../compiler/xla/service/hlo_instruction.cc   | 12 +++-----
 .../compiler/xla/service/hlo_instruction.h    |  3 +-
 .../compiler/xla/service/hlo_reachability.h   | 28 +++++++++++++++++--
 .../compiler/xla/service/hlo_verifier.cc      |  8 ++++--
 tensorflow/compiler/xla/service/service.cc    |  7 ++++-
 .../compiler/xla/service/shape_inference.cc   | 17 ++++++++---
 .../compiler/xla/service/shape_inference.h    |  6 ++--
 .../compiler/xla/service/user_computation.cc  |  4 +--
 .../compiler/xla/tools/parser/hlo_parser.cc   |  5 ++--
 10 files changed, 72 insertions(+), 27 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index 0ed64e6779..b933695b82 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
@@ -398,7 +398,14 @@ Status HloCostAnalysis::HandleCrossReplicaSum(const HloInstruction* crs) {
   //
   // TODO(b/33004697): Compute correct cost here, taking the actual number of
   // replicas into account.
-  current_properties_[kFlopsKey] = ShapeUtil::ElementsIn(crs->shape());
+  double flops = 0.0;
+  ShapeUtil::ForEachSubshape(
+      crs->shape(), [&, this](const Shape& subshape, const ShapeIndex&) {
+        if (ShapeUtil::IsArray(subshape)) {
+          flops += ShapeUtil::ElementsIn(subshape);
+        }
+      });
+  current_properties_[kFlopsKey] = flops;
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index b2700fdbdb..7849301957 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -361,12 +361,9 @@ HloInstruction::CreateReducePrecision(const Shape& shape,
 }
 
 /* static */ std::unique_ptr<HloInstruction>
-HloInstruction::CreateCrossReplicaSum(const Shape& shape,
-                                      HloInstruction* operand) {
-  auto instruction =
-      WrapUnique(new HloInstruction(HloOpcode::kCrossReplicaSum, shape));
-  instruction->AppendOperand(operand);
-  return instruction;
+HloInstruction::CreateCrossReplicaSum(
+    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands) {
+  return CreateNary(shape, HloOpcode::kCrossReplicaSum, operands);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateInfeed(
@@ -1159,8 +1156,7 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
                         *dot_dimension_numbers_);
       break;
     case HloOpcode::kCrossReplicaSum:
-      CHECK_EQ(new_operands.size(), 1);
-      clone = CreateCrossReplicaSum(shape, new_operands[0]);
+      clone = CreateCrossReplicaSum(shape, new_operands);
       break;
     case HloOpcode::kGetTupleElement:
       CHECK_EQ(new_operands.size(), 1);
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 088902e2a7..5e798c2045 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -175,7 +175,8 @@ class HloInstruction {
 
   // Creates a cross replica sum op.
   static std::unique_ptr<HloInstruction> CreateCrossReplicaSum(
-      const Shape& shape, HloInstruction* operand);
+      const Shape& shape,
+      tensorflow::gtl::ArraySlice<HloInstruction*> operands);
 
   // Creates a conversion instruction, where operand is the data to convert and
   // shape is the target shape for the conversion.
diff --git a/tensorflow/compiler/xla/service/hlo_reachability.h b/tensorflow/compiler/xla/service/hlo_reachability.h
index d7bdac9c86..553ec11f6f 100644
--- a/tensorflow/compiler/xla/service/hlo_reachability.h
+++ b/tensorflow/compiler/xla/service/hlo_reachability.h
@@ -30,11 +30,17 @@ namespace xla {
 
 class HloInstruction;
 
-// A class for computing and representing reachability between HloInstructions.
+// A class for representing reachability between HloInstructions.
+//
+// !!! THIS CLASS DOES NOT COMPUTE REACHABILITY !!! It has an adjacency matrix
+// and it is up to the user of the class to set the adjacency matrix such that
+// it represents reachability, i.e. such that it is transitive. That the graph
+// be transitive is thus not an invariant of this class, but it is required for
+// the name of the class and its methods to make sense.
 class HloReachabilityMap {
  public:
-  // Sets up an empty reachable matrix for the full set of instructions
-  // specified in 'instructions'.
+  // Sets up a graph with no edges and where the nodes correspond to the given
+  // instructions.
   explicit HloReachabilityMap(const std::list<HloInstruction*>& instructions);
 
   // Set the reachability set of 'instruction' to the union of the reachability
@@ -42,17 +48,33 @@ class HloReachabilityMap {
   // 'x' is not 'instruction' will return true iff IsReachable(x, input) is true
   // for some 'input' in 'inputs'. Also sets 'instruction' to be reachable from
   // itself. Returns whether the reachability set of 'instruction' changed.
+  //
+  // !!! THIS FUNCTION DOES NOT COMPUTE REACHABILITY !!! It sets the adjacency
+  // vector in the internal graph of this HloReachabilityMap for the given
+  // instruction and does not transitively update any other part of the
+  // adjacency matrix.
   bool SetReachabilityToUnion(
       tensorflow::gtl::ArraySlice<const HloInstruction*> inputs,
       const HloInstruction* instruction);
 
   // Sets entry so that IsReachable(a, b) will return true
+  //
+  // !!! THIS FUNCTION DOES NOT COMPUTE REACHABILITY !!! It sets the adjacency
+  // matrix in the internal graph of this HloReachabilityMap to have an edge
+  // from a to b and does not transitively update any other part of the
+  // adjacency matrix.
   void SetReachable(const HloInstruction* a, const HloInstruction* b);
 
   // Returns true if "b" is reachable from "a"
+  //
+  // Note that this function only correctly answers queries about reachability
+  // if the set of edges that have been provided to this class are transitive.
   bool IsReachable(const HloInstruction* a, const HloInstruction* b) const;
 
   // Returns true if "b" is reachable from "a" or "a" is reachable from "b"
+  //
+  // Note that this function only correctly answers queries about reachability
+  // if the set of edges that have been provided to this class are transitive.
   bool IsConnected(const HloInstruction* a, const HloInstruction* b) const;
 
  private:
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 4d02846cf7..515edd48b4 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -93,8 +93,12 @@ class ShapeVerifier : public DfsHloVisitor {
   }
 
   Status HandleCrossReplicaSum(HloInstruction* crs) override {
-    return CheckShape(crs, ShapeInference::InferCrossReplicaSumShape(
-                               crs->operand(0)->shape()));
+    std::vector<const Shape*> operand_shapes;
+    for (const HloInstruction* operand : crs->operands()) {
+      operand_shapes.push_back(&operand->shape());
+    }
+    return CheckShape(
+        crs, ShapeInference::InferCrossReplicaSumShape(operand_shapes));
   }
 
   Status HandleReducePrecision(HloInstruction* reduce_precision) override {
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 61b3d3e0fe..8c287a6ab0 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -666,6 +666,7 @@ StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
         result, executable->ExecuteOnStreamWrapper<se::DeviceMemoryBase>(
                     &run_options[0], profile, arguments));
   } else {
+    // TODO(b/69985541): Support profiling also on this path.
     std::vector<
         tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>>
         repeated_arguments(options_.number_of_replicas(), arguments);
@@ -1535,8 +1536,12 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) {
       handle_status = computation->AddRecvInstruction(arg->recv_request());
       break;
     }
+    case OpRequest::kFftRequest:
+      return Unimplemented("FftRequest not implemented in XLA service.");
+    case OpRequest::OP_NOT_SET:
+      return InvalidArgument("XLA service received OpRequest with OP_NOT_SET");
     default:
-      return InvalidArgument("Unsupported operation");
+      return InvalidArgument("Unsupported operation in XLA service");
   }
   TF_ASSIGN_OR_RETURN(*result->mutable_output(), handle_status);
 
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 7178eb40dd..1c0578ecc8 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -1701,10 +1701,19 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferCrossReplicaSumShape(
-    const Shape& operand) {
-  TF_RETURN_IF_ERROR(
-      ExpectNotTupleOrOpaque(operand, "operand of cross replica sum"));
-  return operand;
+    tensorflow::gtl::ArraySlice<const Shape*> operand_shapes) {
+  for (const Shape* operand_shape : operand_shapes) {
+    TF_RETURN_IF_ERROR(
+        ExpectNotTupleOrOpaque(*operand_shape, "operand of cross replica sum"));
+  }
+  if (operand_shapes.size() == 1) {
+    return *operand_shapes[0];
+  }
+  std::vector<Shape> operand_shape_values;
+  for (const Shape* operand_shape : operand_shapes) {
+    operand_shape_values.push_back(*operand_shape);
+  }
+  return ShapeUtil::MakeTupleShape(operand_shape_values);
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferReduceShape(
diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h
index 382c4f8abc..8c5ac20244 100644
--- a/tensorflow/compiler/xla/service/shape_inference.h
+++ b/tensorflow/compiler/xla/service/shape_inference.h
@@ -109,8 +109,10 @@ class ShapeInference {
       const Shape& lhs, const Shape& rhs, const Window& window,
       const ConvolutionDimensionNumbers& dimension_numbers);
 
-  // Infers the shape produced a cross replica sum with the given operand shape.
-  static StatusOr<Shape> InferCrossReplicaSumShape(const Shape& operand);
+  // Infers the shape produced a cross replica sum with the given operand
+  // shapes.
+  static StatusOr<Shape> InferCrossReplicaSumShape(
+      tensorflow::gtl::ArraySlice<const Shape*> operand_shapes);
 
   // Infers the shape produced by applying the given reduction computation
   // shape to the given input operand shape.
diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index 6d0d367981..1ec21a0b51 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -1080,7 +1080,7 @@ StatusOr<ComputationDataHandle> UserComputation::AddCrossReplicaSumInstruction(
   TF_ASSIGN_OR_RETURN(const OperationRequest* operand,
                       LookUpRequest(cross_replica_sum_request.operand()));
   TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferCrossReplicaSumShape(
-                                       operand->output_shape()));
+                                       {&operand->output_shape()}));
 
   ComputationDataHandle handle = CreateComputationDataHandle();
 
@@ -2788,7 +2788,7 @@ void ComputationLowerer::Visit(
       HloInstruction* operand =
           lookup_instruction(cross_replica_sum_request.operand());
       hlo_instruction = add_instruction(HloInstruction::CreateCrossReplicaSum(
-          request.output_shape(), operand));
+          request.output_shape(), {operand}));
       break;
     }
 
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index ddc1e69951..cc0461fc5b 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -447,12 +447,11 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
       break;
     }
     case HloOpcode::kCrossReplicaSum: {
-      if (!ParseOperands(&operands, /*expected_size=*/1) ||
-          !ParseAttributes(attrs)) {
+      if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
         return false;
       }
       instruction = builder->AddInstruction(
-          HloInstruction::CreateCrossReplicaSum(shape, operands[0]));
+          HloInstruction::CreateCrossReplicaSum(shape, operands));
       break;
     }
     case HloOpcode::kReshape: {
-- 
GitLab


From c231d2115d5536873fa1f3d93ecaf3b1701c158f Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Tue, 5 Dec 2017 19:03:50 -0800
Subject: [PATCH 0667/1225] Fix broken usage of mutexes in training ops like
 AdaDelta.

Currently, if the op is run and returns an error, it doesn't clean up a mutex
lock at exit; any future access to the underlying parameters will cause
a deadlock.

PiperOrigin-RevId: 178045860
---
 tensorflow/core/kernels/training_ops.cc | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc
index b8d601389b..38e77ab60f 100644
--- a/tensorflow/core/kernels/training_ops.cc
+++ b/tensorflow/core/kernels/training_ops.cc
@@ -536,8 +536,9 @@ class ApplyAdadeltaOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    if (use_exclusive_lock_) {
-      mutex_lock l1(*GetTrainingVariableMutex(ctx, 0));
+    mutex* mu = GetTrainingVariableMutex(ctx, 0);
+    if (use_exclusive_lock_ && mu != nullptr) {
+      mutex_lock l1(*mu);
       // Don't try to acquire a lock on the second ref as they share the same
       // mutex.
       //
@@ -682,15 +683,21 @@ class SparseApplyAdadeltaOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
   }
 
-  void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    mutex* mu_var = GetTrainingVariableMutex(ctx, 0);
+  void Compute(OpKernelContext* ctx) override {
+    mutex* mu = GetTrainingVariableMutex(ctx, 0);
     // mu_accum is actually the same mutex as mu_var since currently we use a
     // global mutex.
     //
     // mutex* mu_accum = ctx->input_ref_mutex(1);
-    if (use_exclusive_lock_) {
-      mu_var->lock();
+    if (use_exclusive_lock_ && mu != nullptr) {
+      mutex_lock ml(*mu);
+      DoCompute(ctx);
+    } else {
+      DoCompute(ctx);
     }
+  }
+
+  void DoCompute(OpKernelContext* ctx) {
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
                             ctx, 0, use_exclusive_lock_, true, &var));
@@ -791,9 +798,6 @@ class SparseApplyAdadeltaOp : public OpKernel {
             update.square() * update.constant(static_cast<T>(1) - rho_scalar);
       }
     }
-    if (use_exclusive_lock_) {
-      mu_var->unlock();
-    }
 
     MaybeForwardRefInputToRefOutput(ctx, 0, 0);
   }
-- 
GitLab


From 4985cf21cbb2b456281969ff50a405a62c3c0804 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 19:19:04 -0800
Subject: [PATCH 0668/1225] Add tutorial, model code and dataset conversion
 tool for the Quick, Draw! dataset.

PiperOrigin-RevId: 178046836
---
 tensorflow/docs_src/tutorials/index.md        |   4 +
 tensorflow/docs_src/tutorials/leftnav_files   |   1 +
 .../docs_src/tutorials/recurrent_quickdraw.md | 410 ++++++++++++++++++
 3 files changed, 415 insertions(+)
 create mode 100644 tensorflow/docs_src/tutorials/recurrent_quickdraw.md

diff --git a/tensorflow/docs_src/tutorials/index.md b/tensorflow/docs_src/tutorials/index.md
index a34dbd6956..6e24f47882 100644
--- a/tensorflow/docs_src/tutorials/index.md
+++ b/tensorflow/docs_src/tutorials/index.md
@@ -46,6 +46,10 @@ The following tutorials focus on linear models:
   * @{$audio_recognition$Simple Audio Recognition}, which shows how to
     build a basic speech recognition network.
 
+The following tutorial covers building a classification model for sequences:
+
+  * ${$recurrent_quickdraw$Classifying Drawings using Recurrent Neural Networks}
+
 Although TensorFlow specializes in machine learning, you may also use
 TensorFlow to solve other kinds of math problems.  For example:
 
diff --git a/tensorflow/docs_src/tutorials/leftnav_files b/tensorflow/docs_src/tutorials/leftnav_files
index 5a5d6ca558..e612961ae0 100644
--- a/tensorflow/docs_src/tutorials/leftnav_files
+++ b/tensorflow/docs_src/tutorials/leftnav_files
@@ -6,6 +6,7 @@ layers.md
 deep_cnn.md
 word2vec.md
 recurrent.md
+recurrent_quickdraw.md
 seq2seq.md
 linear.md
 wide.md
diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md
new file mode 100644
index 0000000000..8ed8e16c22
--- /dev/null
+++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md
@@ -0,0 +1,410 @@
+# Recurrent Neural Networks for Drawing Classification
+
+[Quick, Draw!]: http://quickdraw.withgoogle.com
+
+[Quick, Draw!] is a game where a player is challenged to draw a number of
+objects and see if a computer can recognize the drawing.
+
+The recognition in [Quick, Draw!] is performed by a classifier that takes the
+user input, given as a sequence of strokes of points in x and y, and recognizes
+the object category that the user tried to draw.
+
+In this tutorial we'll show how to build an RNN-based recognizer for this
+problem. The model will use a combination of convolutional layers, LSTM layers,
+and a softmax output layer to classify the drawings:
+
+<center> ![RNN model structure](../images/quickdraw_model.png) </center>
+
+The figure above shows the structure of the model that we will build in this
+tutorial. The input is a drawing that is encoded as a sequence of strokes of
+points in x, y, and n, where n indicates whether a the point is the first point
+in a new stroke.
+
+Then, a series of 1-dimensional convolutions is applied. Then LSTM layers are
+applied and the sum of the outputs of all LSTM steps is fed into a softmax layer
+to make a classification decision among the classes of drawings that we know.
+
+This tutorial uses the data from actual [Quick, Draw!] games [that is publicly
+available](https://quickdraw.withgoogle.com/data). This dataset contains of 50M
+drawings in 345 categories.
+
+## Run the tutorial code
+
+To try the code for this tutorial:
+
+1.  @{$install$Install TensorFlow} if you haven't already.
+1.  Download the [tutorial code]
+(https://github.com/tensorflow/models/tree/master/tutorials/rnn/quickdraw/train_model.py).
+1.  [Download the data](#download-the-data) in `TFRecord` format from
+    [here](http://todo.url/deselaers) and unzip it. More details about [how to
+    obtain the original Quick, Draw!
+    data](#optional-download-the-full-quick-draw-data) and [how to convert that
+    to `TFRecord` files](#optional-converting-the-data) is available below.
+
+1.  Execute the tutorial code with the following command to train the RNN-based
+    model described in this tutorial. Make sure to adjust the paths to point to
+    the unzipped data from the download in step 3.
+
+```shell
+  python train_model.py \
+    --training_data=rnn_tutorial_data/training.tfrecord-?????-of-????? \
+    --eval_data=rnn_tutorial_data/eval.tfrecord-?????-of-????? \
+    --classes_file=rnn_tutorial_data/training.tfrecord.classes
+```
+
+## Tutorial details
+
+### Download the data
+
+We make the data that we use in this tutorial available as `TFRecord` files
+containing `TFExamples`. You can download the data from here:
+
+http://download.tensorflow.org/data/quickdraw_tutorial_dataset_v1.tar.gz
+
+Alternatively you can download the original data in `ndjson` format from the
+Google cloud and convert it to the `TFRecord` files containing `TFExamples`
+yourself as described in the next section.
+
+### Optional: Download the full Quick Draw Data
+
+The full [Quick, Draw!](https://quickdraw.withgoogle.com)
+[dataset](https://quickdraw.withgoogle.com/data) is available on Google Cloud
+Storage as [ndjson](http://ndjson.org/) files separated by category. You can
+[browse the list of files in Cloud
+Console](https://console.cloud.google.com/storage/quickdraw_dataset).
+
+To download the data we recommend using
+[gsutil](https://cloud.google.com/storage/docs/gsutil_install#install) to
+download the entire dataset. Note that the original .ndjson files require
+downloading ~22GB.
+
+Then use the following command to check that your gsutil installation works and
+that you can access the data bucket:
+
+```shell
+gsutil ls -r "gs://quickdraw_dataset/full/simplified/*"
+```
+
+which will output a long list of files like the following:
+
+```shell
+gs://quickdraw_dataset/full/simplified/The Eiffel Tower.ndjson
+gs://quickdraw_dataset/full/simplified/The Great Wall of China.ndjson
+gs://quickdraw_dataset/full/simplified/The Mona Lisa.ndjson
+gs://quickdraw_dataset/full/simplified/aircraft carrier.ndjson
+...
+```
+
+Then create a folder and download the dataset there.
+
+```shell
+mkdir rnn_tutorial_data
+cd rnn_tutorial_data
+gsutil -m cp "gs://quickdraw_dataset/full/simplified/*" .
+```
+
+This download will take a while and download a bit more than 23GB of data.
+
+### Optional: Converting the data
+
+To convert the `ndjson` files to
+@{$python/python_io#tfrecords_format_details$TFRecord} files containing
+${tf.train.Example} protos run the following command.
+
+```shell
+   python create_dataset.py --ndjson_path rnn_tutorial_data \
+      --output_path rnn_tutorial_data
+```
+
+This will store the data in 10 shards of
+@{$python/python_io#tfrecords_format_details$TFRecord} files with 10000 items
+per class for the training data and 1000 items per class as eval data.
+
+This conversion process is described in more detail in the following.
+
+The original QuickDraw data is formatted as `ndjson` files where each line
+contains a JSON object like the following:
+
+```json
+{"word":"cat",
+ "countrycode":"VE",
+ "timestamp":"2017-03-02 23:25:10.07453 UTC",
+ "recognized":true,
+ "key_id":"5201136883597312",
+ "drawing":[
+   [
+     [130,113,99,109,76,64,55,48,48,51,59,86,133,154,170,203,214,217,215,208,186,176,162,157,132],
+     [72,40,27,79,82,88,100,120,134,152,165,184,189,186,179,152,131,114,100,89,76,0,31,65,70]
+   ],[
+     [76,28,7],
+     [136,128,128]
+   ],[
+     [76,23,0],
+     [160,164,175]
+   ],[
+     [87,52,37],
+     [175,191,204]
+   ],[
+     [174,220,246,251],
+     [134,132,136,139]
+   ],[
+     [175,255],
+     [147,168]
+   ],[
+     [171,208,215],
+     [164,198,210]
+   ],[
+     [130,110,108,111,130,139,139,119],
+     [129,134,137,144,148,144,136,130]
+   ],[
+     [107,106],
+     [96,113]
+   ]
+ ]
+}
+```
+
+For our purpose of building a classifier we only care about the fields "`word`"
+and "`drawing`". While parsing the ndjson files, we process them line by line
+using a function that converts the strokes from the `drawing` field into a
+tensor of size `[number of points, 3]` containing the differences of consecutive
+points. This function also returns the class name as a string.
+
+```python
+def parse_line(ndjson_line):
+  """Parse an ndjson line and return ink (as np array) and classname."""
+  sample = json.loads(ndjson_line)
+  class_name = sample["word"]
+  inkarray = sample["drawing"]
+  stroke_lengths = [len(stroke[0]) for stroke in inkarray]
+  total_points = sum(stroke_lengths)
+  np_ink = np.zeros((total_points, 3), dtype=np.float32)
+  current_t = 0
+  for stroke in inkarray:
+    for i in [0, 1]:
+      np_ink[current_t:(current_t + len(stroke[0])), i] = stroke[i]
+    current_t += len(stroke[0])
+    np_ink[current_t - 1, 2] = 1  # stroke_end
+  # Preprocessing.
+  # 1. Size normalization.
+  lower = np.min(np_ink[:, 0:2], axis=0)
+  upper = np.max(np_ink[:, 0:2], axis=0)
+  scale = upper - lower
+  scale[scale == 0] = 1
+  np_ink[:, 0:2] = (np_ink[:, 0:2] - lower) / scale
+  # 2. Compute deltas.
+  np_ink = np_ink[1:, 0:2] - np_ink[0:-1, 0:2]
+  return np_ink, class_name
+```
+
+Since we want the data to be shuffled for writing we read from each of the
+category files in random order and write to a random shard.
+
+For the training data we read the first 10000 items for each class and for the
+eval data we read the next 1000 items for each class.
+
+This data is then reformatted into a tensor of shape `[num_training_samples,
+max_length, 3]`. Then we determine the bounding box of the original drawing in
+screen coordinates and normalize the size such that the drawing has unit height.
+
+<center> ![Size normalization](../images/quickdraw_sizenormalization.png) </center>
+
+Finally, we compute the differences between consecutive points and store these
+as a `VarLenFeature` in a
+[tensorflow.Example](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
+under the key `ink`. In addition we store the `class_index` as a single entry
+`FixedLengthFeature` and the `shape` of the `ink` as a `FixedLengthFeature` of
+length 2.
+
+### Defining the model
+
+To define the model we create a new `Estimator`. If you want to read more about
+estimators, we recommend @{$extend/estimators$this tutorial}.
+
+To build the model, we:
+
+1.  reshape the input back into the original shape - where the mini batch is
+    padded to the maximal length of its contents. In addition to the ink data we
+    also have the lengths for each example and the target class. This happens in
+    the function [`_get_input_tensors`](#-get-input-tensors).
+
+1.  pass the input through to a series of convolution layers in
+    [`_add_conv_layers`](#-add-conv-layers).
+
+1.  pass the output of the convolutions into a series of bidirectional LSTM
+    layers in [`_add_rnn_layers`](#-add-rnn-layers). At the end of that, the
+    outputs for each time step are summed up to have a compact, fixed length
+    embedding of the input.
+
+1.  classify this embedding using a softmax layer in
+    [`_add_fc_layers`](#-add-fc-layers).
+
+In code this looks like:
+
+```python
+inks, lengths, targets = _get_input_tensors(features, targets)
+convolved = _add_conv_layers(inks)
+final_state = _add_rnn_layers(convolved, lengths)
+logits =_add_fc_layers(final_state)
+```
+
+### _get_input_tensors
+
+To obtain the input features we first obtain the shape from the features dict
+and then create a 1D tensor of size `[batch_size]` containing the lengths of the
+input sequences. The ink is stored as a SparseTensor in the features dict which
+we convert into a dense tensor and then reshape to be `[batch_size, ?, 3]`. And
+finally, if targets were passed in we make sure they are stored as a 1D tensor
+of size `[batch_size]`
+
+In code this looks like this:
+
+```python
+shapes = features["shape"]
+lengths = tf.squeeze(
+    tf.slice(shapes, begin=[0, 0], size=[params["batch_size"], 1]))
+inks = tf.reshape(
+    tf.sparse_tensor_to_dense(features["ink"]),
+    [params["batch_size"], -1, 3])
+if targets is not None:
+  targets = tf.squeeze(targets)
+```
+
+### _add_conv_layers
+
+The desired number of convolution layers and the lengths of the filters is
+configured through the parameters `num_conv` and `conv_len` in the `params`
+dict.
+
+The input is a sequence where each point has dimensionality 3. We are going to
+use 1D convolutions where we treat the 3 input features as channels. That means
+that the input is a `[batch_size, length, 3]` tensor and the output will be a
+`[batch_size, length, number_of_filters]` tensor.
+
+```python
+convolved = inks
+for i in range(len(params.num_conv)):
+  convolved_input = convolved
+  if params.batch_norm:
+    convolved_input = tf.layers.batch_normalization(
+        convolved_input,
+        training=(mode == tf.estimator.ModeKeys.TRAIN))
+  # Add dropout layer if enabled and not first convolution layer.
+  if i > 0 and params.dropout:
+    convolved_input = tf.layers.dropout(
+        convolved_input,
+        rate=params.dropout,
+        training=(mode == tf.estimator.ModeKeys.TRAIN))
+  convolved = tf.layers.conv1d(
+      convolved_input,
+      filters=params.num_conv[i],
+      kernel_size=params.conv_len[i],
+      activation=None,
+      strides=1,
+      padding="same",
+      name="conv1d_%d" % i)
+return convolved, lengths
+```
+
+### _add_rnn_layers
+
+We pass the output from the convolutions into bidirectional LSTM layers for
+which we use a helper function from contrib.
+
+```python
+outputs, _, _ = contrib_rnn.stack_bidirectional_dynamic_rnn(
+    cells_fw=[cell(params.num_nodes) for _ in range(params.num_layers)],
+    cells_bw=[cell(params.num_nodes) for _ in range(params.num_layers)],
+    inputs=convolved,
+    sequence_length=lengths,
+    dtype=tf.float32,
+    scope="rnn_classification")
+```
+
+see the code for more details and how to use `CUDA` accelerated implementations.
+
+To create a compact, fixed-length embedding, we sum up the output of the LSTMs.
+We first zero out the regions of the batch where the sequences have no data.
+
+```python
+mask = tf.tile(
+    tf.expand_dims(tf.sequence_mask(lengths, tf.shape(outputs)[1]), 2),
+    [1, 1, tf.shape(outputs)[2]])
+zero_outside = tf.where(mask, outputs, tf.zeros_like(outputs))
+outputs = tf.reduce_sum(zero_outside, axis=1)
+```
+
+### _add_fc_layers
+
+The embedding of the input is passed into a fully connected layer which we then
+use as a softmax layer.
+
+```python
+tf.layers.dense(final_state, params.num_classes)
+```
+
+### Loss, predictions, and optimizer
+
+Finally, we need to add a loss, a training op, and predictions to create the
+`ModelFn`:
+
+```python
+cross_entropy = tf.reduce_mean(
+    tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=targets, logits=logits))
+# Add the optimizer.
+train_op = tf.contrib.layers.optimize_loss(
+    loss=cross_entropy,
+    global_step=tf.train.get_global_step(),
+    learning_rate=params.learning_rate,
+    optimizer="Adam",
+    # some gradient clipping stabilizes training in the beginning.
+    clip_gradients=params.gradient_clipping_norm,
+    summaries=["learning_rate", "loss", "gradients", "gradient_norm"])
+predictions = tf.argmax(logits, axis=1)
+return model_fn_lib.ModelFnOps(
+    mode=mode,
+    predictions={"logits": logits,
+                 "predictions": predictions},
+    loss=cross_entropy,
+    train_op=train_op,
+    eval_metric_ops={"accuracy": tf.metrics.accuracy(targets, predictions)})
+```
+
+### Training and evaluating the model
+
+To train and evaluate the model we can rely on the functionalities of the
+`Estimator` APIs and easily run training and evaluation with the `Experiment`
+APIs:
+
+```python
+  estimator = tf.estimator.Estimator(
+      model_fn=model_fn,
+      model_dir=output_dir,
+      config=config,
+      params=model_params)
+  # Train the model.
+  tf.contrib.learn.Experiment(
+      estimator=estimator,
+      train_input_fn=get_input_fn(
+          mode=tf.contrib.learn.ModeKeys.TRAIN,
+          tfrecord_pattern=FLAGS.training_data,
+          batch_size=FLAGS.batch_size),
+      train_steps=FLAGS.steps,
+      eval_input_fn=get_input_fn(
+          mode=tf.contrib.learn.ModeKeys.EVAL,
+          tfrecord_pattern=FLAGS.eval_data,
+          batch_size=FLAGS.batch_size),
+      min_eval_frequency=1000)
+```
+
+Note that this tutorial is just a quick example on a relatively small dataset to
+get you familiar with the APIs of recurrent neural networks and estimators. Such
+models can be even more powerful if you try them on a large dataset.
+
+When training the model for 1M steps you can expect to get an accuracy of
+approximately of approximately 70% on the top-1 candidate. Note that this
+accuracy is sufficient to build the quickdraw game because of the game dynamics
+the user will be able to adjust their drawing until it is ready. Also, the game
+does not use the top-1 candidate only but accepts a drawing as correct if the
+target category shows up with a score better than a fixed threshold.
-- 
GitLab


From fca13de37711720fa7e5fe181f23b1625e0c9d60 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 19:21:12 -0800
Subject: [PATCH 0669/1225] [XLA] Change default argument to explicit 0 for
 default address space. NFC

There is a FIXME in llvm::DataLayout::getPointerSize .

PiperOrigin-RevId: 178046949
---
 tensorflow/compiler/xla/service/gpu/gpu_compiler.cc         | 3 ++-
 tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index fcd73fd37a..aa5f38ff58 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -294,7 +294,8 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
 }  // namespace
 
 GpuCompiler::GpuCompiler()
-    : pointer_size_(llvm::DataLayout(kDataLayout).getPointerSize()) {}
+    : pointer_size_(llvm::DataLayout(kDataLayout)
+                        .getPointerSize(0 /* default address space */)) {}
 
 StatusOr<std::unique_ptr<HloModule>> GpuCompiler::RunHloPasses(
     std::unique_ptr<HloModule> module, se::StreamExecutor* /*stream_exec*/) {
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
index f0f036f7f3..4cf49d4a72 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
@@ -44,7 +44,7 @@ GpuTransferManager::GpuTransferManager()
     : GenericTransferManager(
           se::cuda::kCudaPlatformId,
           /*pointer_size=*/llvm::DataLayout(gpu::GpuCompiler::kDataLayout)
-              .getPointerSize()) {}
+              .getPointerSize(0 /* default address space */)) {}
 
 Status GpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor,
                                                    const Literal& literal) {
-- 
GitLab


From 9c79b0fc8b4112f72fba0fc11142806e41e465d2 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Tue, 5 Dec 2017 19:23:50 -0800
Subject: [PATCH 0670/1225] Optimize away NoOp in the common case where
 num_inputs=num_ouputs=2

PiperOrigin-RevId: 178047106
---
 tensorflow/core/grappler/optimizers/dependency_optimizer.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index a86420f693..541b479797 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -164,7 +164,7 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
     const auto output_nodes = node_map_->GetOutputs(node->name());
     const int num_outputs = output_nodes.size();
     const int num_inputs = node->input_size();
-    if (num_inputs > 1 && num_outputs > 1) {
+    if (num_inputs * num_outputs > num_inputs + num_outputs) {
       return;
     }
     VLOG(1) << "***** Rerouting input around  " << node->name();
-- 
GitLab


From 041dc3349d100dc8a26a337d6656efe1543a6b9d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 5 Dec 2017 20:43:42 -0800
Subject: [PATCH 0671/1225] Disable arithmetic optimizations that require shape
 inference, since it is too slow.

PiperOrigin-RevId: 178051602
---
 .../core/grappler/optimizers/arithmetic_optimizer.cc | 12 ++++++++----
 .../grappler/optimizers/arithmetic_optimizer_test.cc |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 8fece69739..9629e074ee 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -896,7 +896,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   //   AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn))
   // to the following:
   //   Mul(x, AddN(y1, y2, y3, ... yn))
-  if (IsAggregate(*node) && NumNonControlInputs(*node) > 1 &&
+  if (opt_level_ == RewriterConfig::AGGRESSIVE && IsAggregate(*node) &&
+      NumNonControlInputs(*node) > 1 &&
       !OptimizedNodeExists(StrCat(node->name(), "_hoist_add"))) {
     // Determine the set of common factors if the input nodes are all Mul nodes.
     std::set<string> common_factors;
@@ -1108,10 +1109,13 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/,
   int num_frames;
   TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph_, *node_map_,
                                                &frame_map_, &num_frames));
-  graph_properties_.reset(new GraphProperties(item));
   // Shapes are only needed in aggressive mode.
-  TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false));
-  TF_RETURN_IF_ERROR(graph_properties_->AnnotateOutputShapes(optimized_graph_));
+  if (opt_level_ == RewriterConfig::AGGRESSIVE) {
+    graph_properties_.reset(new GraphProperties(item));
+    TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false));
+    TF_RETURN_IF_ERROR(
+        graph_properties_->AnnotateOutputShapes(optimized_graph_));
+  }
 
   // Perform the optimizations.
   DedupComputations();
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index eccf90f3b1..da4263ff42 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -350,7 +350,7 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) {
   for (int i = 0; i < item.graph.node_size(); ++i) {
     item.graph.mutable_node(i)->set_device(devices[i]);
   }
-  ArithmeticOptimizer optimizer;
+  ArithmeticOptimizer optimizer(RewriterConfig::AGGRESSIVE);
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
@@ -423,7 +423,7 @@ TEST_F(ArithmeticOptimizerTest, HoistFactor) {
 
       GrapplerItem item;
       TF_CHECK_OK(s.ToGraphDef(&item.graph));
-      ArithmeticOptimizer optimizer;
+      ArithmeticOptimizer optimizer(RewriterConfig::AGGRESSIVE);
       GraphDef output;
       Status status = optimizer.Optimize(nullptr, item, &output);
       TF_EXPECT_OK(status);
-- 
GitLab


From 63dd8ffea012bf7c743d6848faf5f406ede94c05 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Tue, 5 Dec 2017 20:43:47 -0800
Subject: [PATCH 0672/1225] Add DataFormatDimMap op.

PiperOrigin-RevId: 178051608
---
 tensorflow/contrib/makefile/tf_op_files.txt   |   1 +
 .../base_api/api_def_DataFormatDimMap.pbtxt   |  31 ++++++
 tensorflow/core/kernels/BUILD                 |   9 ++
 tensorflow/core/kernels/data_format_ops.cc    | 103 ++++++++++++++++++
 tensorflow/core/kernels/data_format_ops.h     |  45 ++++++++
 .../core/kernels/data_format_ops_gpu.cu.cc    |  31 ++++++
 tensorflow/core/ops/nn_ops.cc                 |  17 +++
 tensorflow/python/ops/nn_test.py              |  20 ++++
 8 files changed, 257 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_DataFormatDimMap.pbtxt
 create mode 100644 tensorflow/core/kernels/data_format_ops.cc
 create mode 100644 tensorflow/core/kernels/data_format_ops.h
 create mode 100644 tensorflow/core/kernels/data_format_ops_gpu.cu.cc

diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 9fc9aeb785..5f27566398 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -148,6 +148,7 @@ tensorflow/core/kernels/dynamic_stitch_op.cc
 tensorflow/core/kernels/dynamic_partition_op.cc
 tensorflow/core/kernels/decode_bmp_op.cc
 tensorflow/core/kernels/depthtospace_op.cc
+tensorflow/core/kernels/data_format_ops.cc
 tensorflow/core/kernels/spacetodepth_op.cc
 tensorflow/core/kernels/dense_update_ops.cc
 tensorflow/core/kernels/deep_conv2d.cc
diff --git a/tensorflow/core/api_def/base_api/api_def_DataFormatDimMap.pbtxt b/tensorflow/core/api_def/base_api/api_def_DataFormatDimMap.pbtxt
new file mode 100644
index 0000000000..62098acd38
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DataFormatDimMap.pbtxt
@@ -0,0 +1,31 @@
+op {
+  graph_op_name: "DataFormatDimMap"
+  in_arg {
+    name: "x"
+    description: <<END
+Scalar. Dimension index in source data format. Must be in the range [-4, 4).
+END
+  }
+  out_arg {
+    name: "y"
+    description: <<END
+Scalar. Dimension index in destination data format.
+END
+  }
+  attr {
+    name: "src_format"
+    description: <<END
+source data format.
+END
+  }
+  attr {
+    name: "dst_format"
+    description: <<END
+destination data format.
+END
+  }
+  summary: "Returns the dimension index in the destination data format given the one in"
+  description: <<END
+the source data format.
+END
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index c8359b4480..77ca8f5fcb 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -3115,6 +3115,7 @@ cc_library(
         ":batch_norm_op",
         ":bias_op",
         ":conv_ops",
+        ":data_format_ops",
         ":depthwise_conv_grad_op",
         ":depthwise_conv_op",
         ":dilation_ops",
@@ -3152,6 +3153,12 @@ tf_kernel_library(
     deps = NN_DEPS,
 )
 
+tf_kernel_library(
+    name = "data_format_ops",
+    prefix = "data_format_ops",
+    deps = NN_DEPS,
+)
+
 tf_kernel_library(
     name = "bias_op",
     prefix = "bias_op",
@@ -4603,6 +4610,7 @@ filegroup(
         "control_flow_ops.h",
         "conv_2d.h",
         "conv_ops.h",
+        "data_format_ops.h",
         "depthtospace_op.h",
         "depthwise_conv_op.h",
         "fake_quant_ops_functor.h",
@@ -4716,6 +4724,7 @@ filegroup(
         "cwise_op_squared_difference.cc",
         "cwise_op_sub.cc",
         "cwise_op_tanh.cc",
+        "data_format_ops.cc",
         "decode_wav_op.cc",
         "deep_conv2d.cc",
         "deep_conv2d.h",
diff --git a/tensorflow/core/kernels/data_format_ops.cc b/tensorflow/core/kernels/data_format_ops.cc
new file mode 100644
index 0000000000..047188f754
--- /dev/null
+++ b/tensorflow/core/kernels/data_format_ops.cc
@@ -0,0 +1,103 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/nn_ops.cc.
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/data_format_ops.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+template <typename Device, typename T>
+class DataFormatDimMapOp : public OpKernel {
+ public:
+  explicit DataFormatDimMapOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string src_format;
+    OP_REQUIRES_OK(context, context->GetAttr("src_format", &src_format));
+    string dst_format;
+    OP_REQUIRES_OK(context, context->GetAttr("dst_format", &dst_format));
+    OP_REQUIRES(
+        context, src_format == "NHWC",
+        errors::InvalidArgument(strings::StrCat(
+            "Current implementation doesn't support source data format ",
+            src_format)));
+    OP_REQUIRES(context, dst_format == "NCHW",
+                errors::InvalidArgument(strings::StrCat(
+                    "Current implementation doesn't support dst data format ",
+                    dst_format)));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    OP_REQUIRES(context, input.dims() == 0,
+                errors::InvalidArgument("input must be a scalar",
+                                        input.shape().DebugString()));
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &output));
+    functor::DataFormatDimMap<Device, T>()(context->eigen_device<Device>(),
+                                           input.scalar<T>(),
+                                           output->scalar<T>());
+  }
+};
+
+#define REGISTER_KERNEL(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("DataFormatDimMap").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      DataFormatDimMapOp<CPUDevice, T>);
+
+TF_CALL_int32(REGISTER_KERNEL);
+TF_CALL_int64(REGISTER_KERNEL);
+#undef REGISTER_KERNEL
+
+#if GOOGLE_CUDA
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+#define DECLARE_GPU_SPEC(T)                                  \
+  template <>                                                \
+  void DataFormatDimMap<GPUDevice, T>::operator()(           \
+      const GPUDevice& d, typename TTypes<T>::ConstScalar x, \
+      typename TTypes<T>::Scalar y);                         \
+  extern template struct DataFormatDimMap<GPUDevice, T>;
+
+#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
+
+TF_CALL_int32(DECLARE_GPU_SPECS);
+TF_CALL_int64(DECLARE_GPU_SPECS);
+#undef DECLARE_GPU_SPEC
+}  // namespace functor
+
+// Registration of the GPU implementations.
+#define REGISTER_GPU_KERNEL(T)                                            \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("DataFormatDimMap").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      DataFormatDimMapOp<GPUDevice, T>);
+
+TF_CALL_int32(REGISTER_GPU_KERNEL);
+TF_CALL_int64(REGISTER_GPU_KERNEL);
+#undef REGISTER_GPU_KERNEL
+
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data_format_ops.h b/tensorflow/core/kernels/data_format_ops.h
new file mode 100644
index 0000000000..079e76c0d9
--- /dev/null
+++ b/tensorflow/core/kernels/data_format_ops.h
@@ -0,0 +1,45 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_KERNELS_DATA_FORMAT_OPS_H_
+#define TENSORFLOW_KERNELS_DATA_FORMAT_OPS_H_
+// Functor definition for data format dim mapping ops, must be compilable
+// by nvcc.
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+
+namespace tensorflow {
+namespace functor {
+
+// Functor used by DataFormatDimMapOP to do the computations.
+template <typename Device, typename T>
+struct DataFormatDimMap {
+  void operator()(const Device& d, typename TTypes<T>::ConstScalar x,
+                  typename TTypes<T>::Scalar y) {
+    auto zero = x.constant(0);
+    auto one = x.constant(1);
+    auto three = x.constant(3);
+    auto four = x.constant(4);
+    auto x_mod = (x + four) % 4;
+    auto is_zero = (x_mod == zero);
+    auto is_three = (x_mod == three);
+    y.device(d) = is_zero.select(zero, is_three.select(one, x_mod + one));
+  }
+};
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_KERNELS_DATA_FORMAT_OPS_H_
diff --git a/tensorflow/core/kernels/data_format_ops_gpu.cu.cc b/tensorflow/core/kernels/data_format_ops_gpu.cu.cc
new file mode 100644
index 0000000000..09340a7d87
--- /dev/null
+++ b/tensorflow/core/kernels/data_format_ops_gpu.cu.cc
@@ -0,0 +1,31 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/kernels/data_format_ops.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+template struct functor::DataFormatDimMap<GPUDevice, int32>;
+template struct functor::DataFormatDimMap<GPUDevice, int64>;
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 102de94787..f58425db0a 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -751,6 +751,23 @@ Status CommonFusedConvCalculations(InferenceContext* c, bool has_resize) {
 
 }  // namespace
 
+REGISTER_OP("DataFormatDimMap")
+    .Input("x: T")
+    .Output("y: T")
+    .Attr("T: {int32, int64} = DT_INT32")
+    .Attr("src_format: string = 'NHWC'")
+    .Attr("dst_format: string = 'NCHW'")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns the dimension index in the destination data format given the one in
+the source data format.
+
+x: Scalar. Dimension index in source data format. Must be in the range [-4, 4).
+y: Scalar. Dimension index in destination data format.
+src_format: source data format.
+dst_format: destination data format.
+)doc");
+
 REGISTER_OP("FusedResizeAndPadConv2D")
     .Input("input: T")
     .Input("size: int32")
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index 3b918e4f74..ac79354fb7 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -953,5 +953,25 @@ class MomentsTest(test_lib.TestCase):
     self.doOutputTest((10, 10, 10, 30), (1, 2, 3))
 
 
+class DataFormatDimMapTest(test_lib.TestCase):
+
+  def _test(self, x_val, y_val_expected):
+    x = constant_op.constant(x_val)
+    y = nn_ops.data_format_dim_map(x)
+    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+      y_val = sess.run(y)
+      self.assertEqual(y_val, y_val_expected)
+
+  def test(self):
+    self._test(0, 0)
+    self._test(1, 2)
+    self._test(2, 3)
+    self._test(3, 1)
+    self._test(-1, 1)
+    self._test(-2, 3)
+    self._test(-3, 2)
+    self._test(-4, 0)
+
+
 if __name__ == "__main__":
   test_lib.main()
-- 
GitLab


From 5694fb96ef42a5d6ab2777cff41045d127ebc9f8 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Tue, 5 Dec 2017 21:18:12 -0800
Subject: [PATCH 0673/1225] Create a new Var-like object, LegacyVar, which
 allows access to its mutex.

Future changes will change how locking happens on the resource-specific Var
object.

Also hide any access to LegacyVar in the implementation file; and move other
ops into the .cc file where they belong.

PiperOrigin-RevId: 178054272
---
 tensorflow/core/kernels/variable_ops.cc | 211 +++++++++++++++++++++---
 tensorflow/core/kernels/variable_ops.h  | 158 +-----------------
 2 files changed, 200 insertions(+), 169 deletions(-)

diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index 36b8ff09d7..ddcfb14273 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -23,6 +23,177 @@ limitations under the License.
 
 namespace tensorflow {
 
+// Resource stored by variables in the resource manager
+// (legacy, ref-style version).
+class LegacyVar : public ResourceBase {
+ public:
+  explicit LegacyVar(DataType dtype) : tensor_(dtype) {}
+  // Not copyable or movable.
+  LegacyVar(const LegacyVar&) = delete;
+  LegacyVar& operator=(const LegacyVar&) = delete;
+
+  mutex* mu() { return &mu_; }
+  Tensor* tensor() { return &tensor_; }
+
+  string DebugString() override {
+    return strings::StrCat(DataTypeString(tensor_.dtype()), "/",
+                           tensor_.shape().DebugString());
+  }
+
+ private:
+  mutex mu_;
+  Tensor tensor_;
+
+  ~LegacyVar() override {}
+};
+
+VariableOp::VariableOp(OpKernelConstruction* context) : OpKernel(context) {
+  OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
+  dtype_ = RemoveRefType(context->output_type(0));
+}
+
+void VariableOp::Compute(OpKernelContext* ctx) {
+  mutex_lock l(init_mu_);
+  if (!initialized_) {
+    OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(),
+                                    true /* use name() */));
+    initialized_ = true;
+  }
+  auto creator = [this](LegacyVar** var) {
+    *var = new LegacyVar(dtype_);
+    (*var)->tensor()->set_shape(shape_);
+    return Status::OK();
+  };
+  LegacyVar* var;
+  OP_REQUIRES_OK(ctx, cinfo_.resource_manager()->LookupOrCreate<LegacyVar>(
+                          cinfo_.container(), cinfo_.name(), &var, creator));
+  // Output a reference to our tensor, so it may be updated.
+  //
+  // As long as the resource manager hasn't been cleared the ref we return
+  // here is valid because it owns a ref on var.
+  ctx->set_output_ref(0, var->mu(), var->tensor());
+  if (ctx->track_allocations() && var->tensor()->IsInitialized()) {
+    AllocatorAttributes attr;
+    attr.set_gpu_compatible(true);
+    attr.set_nic_compatible(true);
+    if (ctx->allocate_on_host(attr)) {
+      ctx->record_host_persistent_memory_allocation(
+          var->tensor()->AllocatedBytes());
+    } else {
+      ctx->record_device_persistent_memory_allocation(
+          var->tensor()->AllocatedBytes());
+    }
+  }
+  var->Unref();
+}
+
+class TemporaryVariableOp : public OpKernel {
+ public:
+  explicit TemporaryVariableOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
+    OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
+    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
+    // Variable name defaults to op name if not specified explicitly.
+    if (var_name_.empty()) var_name_ = name();
+  }
+
+  void Compute(OpKernelContext* context) override {
+    Status s;
+    ResourceMgr* rm = context->resource_manager();
+    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
+    auto* tmp_var = new TmpVar;
+    OP_REQUIRES(context, tmp_var,
+                errors::ResourceExhausted("Could not allocate TmpVar."));
+    tmp_var->name = var_name_;
+    s = context->allocate_temp(dtype_, shape_, &tmp_var->val);
+    if (!s.ok()) tmp_var->Unref();
+    OP_REQUIRES_OK(context, s);
+    OP_REQUIRES_OK(context, rm->Create(context->step_container()->name(),
+                                       var_name_, tmp_var));
+    context->set_output_ref(0, &tmp_var->mu, &tmp_var->val);
+    if (context->track_allocations()) {
+      AllocatorAttributes attr;
+      if (context->allocate_on_host(attr)) {
+        context->record_host_persistent_memory_allocation(
+            tmp_var->val.AllocatedBytes());
+      } else {
+        context->record_device_persistent_memory_allocation(
+            tmp_var->val.AllocatedBytes());
+      }
+    }
+  }
+
+ private:
+  // Refcounted temporary variable resource.
+  friend class DestroyTemporaryVariableOp;
+  struct TmpVar : public ResourceBase {
+    mutex mu;
+    Tensor val;
+    string name;
+    string DebugString() override { return name; }
+    ~TmpVar() override { VLOG(3) << "TmpVar " << name << " deleted"; }
+  };
+
+  TensorShape shape_;
+  DataType dtype_;
+  string var_name_;
+};
+
+class DestroyTemporaryVariableOp : public OpKernel {
+ public:
+  explicit DestroyTemporaryVariableOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES(context, IsRefType(context->input_type(0)),
+                errors::InvalidArgument("lhs input needs to be a ref type"))
+    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
+    OP_REQUIRES(context, !var_name_.empty(),
+                errors::InvalidArgument("Missing var_name attribute"));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // NOTE(pbar): All other mutators of the Tensor Ref *must* have completed
+    // their execution before this DestroyTemporaryVariable op executes.
+    // This is typically achieved using control dependencies.
+    CHECK(IsRefType(context->input_dtype(0)));
+    Tensor tmpvar = context->mutable_input(0, false);
+    context->set_output(0, tmpvar);
+    ResourceMgr* rm = context->resource_manager();
+    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
+    OP_REQUIRES_OK(context, rm->Delete<TemporaryVariableOp::TmpVar>(
+                                context->step_container()->name(), var_name_));
+    if (context->track_allocations()) {
+      if (context->allocate_on_host(AllocatorAttributes())) {
+        context->record_host_persistent_memory_allocation(
+            -static_cast<int64>(tmpvar.AllocatedBytes()));
+      } else {
+        context->record_device_persistent_memory_allocation(
+            -static_cast<int64>(tmpvar.AllocatedBytes()));
+      }
+    }
+  }
+
+ private:
+  string var_name_;
+};
+
+class IsVariableInitializedOp : public OpKernel {
+ public:
+  explicit IsVariableInitializedOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    // Get a mutable input tensor of the Ref input.
+    const Tensor& input_tensor = context->mutable_input(0, false);
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, TensorShape({}), &output));
+    auto output_tensor = output->tensor<bool, 0>();
+    bool result = input_tensor.IsInitialized();
+    output_tensor() = result;
+  }
+};
+
 REGISTER_KERNEL_BUILDER(Name("Variable").Device(DEVICE_CPU), VariableOp);
 REGISTER_KERNEL_BUILDER(Name("VariableV2").Device(DEVICE_CPU), VariableOp);
 REGISTER_KERNEL_BUILDER(Name("TemporaryVariable").Device(DEVICE_CPU),
@@ -33,30 +204,30 @@ REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
                         IsVariableInitializedOp);
 
 #ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                                         \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("Variable").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),  \
-      VariableOp);                                                         \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),\
-      VariableOp);                                                         \
-  REGISTER_KERNEL_BUILDER(Name("TemporaryVariable")                        \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("dtype"),              \
-                          TemporaryVariableOp);                            \
-  REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable")                 \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("T"),                  \
-                          DestroyTemporaryVariableOp);                     \
-  REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized")                    \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("dtype")               \
-                              .HostMemory("is_initialized"),               \
+#define REGISTER_SYCL_KERNEL(type)                                          \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("Variable").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),   \
+      VariableOp);                                                          \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"), \
+      VariableOp);                                                          \
+  REGISTER_KERNEL_BUILDER(Name("TemporaryVariable")                         \
+                              .Device(DEVICE_SYCL)                          \
+                              .TypeConstraint<type>("dtype"),               \
+                          TemporaryVariableOp);                             \
+  REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable")                  \
+                              .Device(DEVICE_SYCL)                          \
+                              .TypeConstraint<type>("T"),                   \
+                          DestroyTemporaryVariableOp);                      \
+  REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized")                     \
+                              .Device(DEVICE_SYCL)                          \
+                              .TypeConstraint<type>("dtype")                \
+                              .HostMemory("is_initialized"),                \
                           IsVariableInitializedOp);
 
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL);
 #undef REGISTER_SYCL_KERNEL
-#endif // TENSORFLOW_USE_SYCL
+#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 // Only register 'Variable' on GPU for the subset of types also supported by
diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h
index 355140d44c..83134bad37 100644
--- a/tensorflow/core/kernels/variable_ops.h
+++ b/tensorflow/core/kernels/variable_ops.h
@@ -27,10 +27,16 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Resource stored by variables in the resource manager.
+// Resource stored by variables in the resource manager
+// (new, resource-style version).
 class Var : public ResourceBase {
  public:
   explicit Var(DataType dtype) : tensor_(dtype) {}
+  // Not copyable or movable.
+  Var(const Var&) = delete;
+  Var& operator=(const Var&) = delete;
+
+  // TODO(ebrevdo): Use LockSet instead of exposing mu.
   mutex* mu() { return &mu_; }
   Tensor* tensor() { return &tensor_; }
 
@@ -44,52 +50,12 @@ class Var : public ResourceBase {
   Tensor tensor_;
 
   ~Var() override {}
-  TF_DISALLOW_COPY_AND_ASSIGN(Var);
 };
 
 class VariableOp : public OpKernel {
  public:
-  explicit VariableOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
-    dtype_ = RemoveRefType(context->output_type(0));
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    mutex_lock l(init_mu_);
-    if (!initialized_) {
-      OP_REQUIRES_OK(
-          ctx,
-          cinfo_.Init(ctx->resource_manager(), def(), true /* use name() */));
-      initialized_ = true;
-    }
-    auto creator = [this](Var** var) {
-      *var = new Var(dtype_);
-      (*var)->tensor()->set_shape(shape_);
-      return Status::OK();
-    };
-    Var* var;
-    OP_REQUIRES_OK(ctx,
-                   cinfo_.resource_manager()->LookupOrCreate<Var>(
-                       cinfo_.container(), cinfo_.name(), &var, creator));
-    // Output a reference to our tensor, so it may be updated.
-    //
-    // As long as the resource manager hasn't been cleared the ref we return
-    // here is valid because it owns a ref on var.
-    ctx->set_output_ref(0, var->mu(), var->tensor());
-    if (ctx->track_allocations() && var->tensor()->IsInitialized()) {
-      AllocatorAttributes attr;
-      attr.set_gpu_compatible(true);
-      attr.set_nic_compatible(true);
-      if (ctx->allocate_on_host(attr)) {
-        ctx->record_host_persistent_memory_allocation(
-            var->tensor()->AllocatedBytes());
-      } else {
-        ctx->record_device_persistent_memory_allocation(
-            var->tensor()->AllocatedBytes());
-      }
-    }
-    var->Unref();
-  }
+  explicit VariableOp(OpKernelConstruction* context);
+  void Compute(OpKernelContext* ctx) override;
 
  private:
   DataType dtype_;
@@ -102,112 +68,6 @@ class VariableOp : public OpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(VariableOp);
 };
 
-class TemporaryVariableOp : public OpKernel {
- public:
-  explicit TemporaryVariableOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
-    OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
-    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
-    // Variable name defaults to op name if not specified explicitly.
-    if (var_name_ == "") var_name_ = name();
-  }
-
-  void Compute(OpKernelContext* context) override {
-    Status s;
-    ResourceMgr* rm = context->resource_manager();
-    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
-    auto* tmp_var = new TmpVar;
-    OP_REQUIRES(context, tmp_var,
-                errors::ResourceExhausted("Could not allocate TmpVar."));
-    tmp_var->name = var_name_;
-    s = context->allocate_temp(dtype_, shape_, &tmp_var->val);
-    if (!s.ok()) tmp_var->Unref();
-    OP_REQUIRES_OK(context, s);
-    OP_REQUIRES_OK(context, rm->Create(context->step_container()->name(),
-                                       var_name_, tmp_var));
-    context->set_output_ref(0, &tmp_var->mu, &tmp_var->val);
-    if (context->track_allocations()) {
-      AllocatorAttributes attr;
-      if (context->allocate_on_host(attr)) {
-        context->record_host_persistent_memory_allocation(
-            tmp_var->val.AllocatedBytes());
-      } else {
-        context->record_device_persistent_memory_allocation(
-            tmp_var->val.AllocatedBytes());
-      }
-    }
-  }
-
- private:
-  // Refcounted temporary variable resource.
-  friend class DestroyTemporaryVariableOp;
-  struct TmpVar : public ResourceBase {
-    mutex mu;
-    Tensor val;
-    string name;
-    string DebugString() override { return name; }
-    ~TmpVar() override { VLOG(3) << "TmpVar " << name << " deleted"; }
-  };
-
-  TensorShape shape_;
-  DataType dtype_;
-  string var_name_;
-};
-
-class DestroyTemporaryVariableOp : public OpKernel {
- public:
-  explicit DestroyTemporaryVariableOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES(context, IsRefType(context->input_type(0)),
-                errors::InvalidArgument("lhs input needs to be a ref type"))
-    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
-    OP_REQUIRES(context, var_name_ != "",
-                errors::InvalidArgument("Missing var_name attribute"));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    // NOTE(pbar): All other mutators of the Tensor Ref *must* have completed
-    // their execution before this DestroyTemporaryVariable op executes.
-    // This is typically achieved using control dependencies.
-    CHECK(IsRefType(context->input_dtype(0)));
-    Tensor tmpvar = context->mutable_input(0, false);
-    context->set_output(0, tmpvar);
-    ResourceMgr* rm = context->resource_manager();
-    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
-    OP_REQUIRES_OK(context, rm->Delete<TemporaryVariableOp::TmpVar>(
-                                context->step_container()->name(), var_name_));
-    if (context->track_allocations()) {
-      if (context->allocate_on_host(AllocatorAttributes())) {
-        context->record_host_persistent_memory_allocation(
-            -static_cast<int64>(tmpvar.AllocatedBytes()));
-      } else {
-        context->record_device_persistent_memory_allocation(
-            -static_cast<int64>(tmpvar.AllocatedBytes()));
-      }
-    }
-  }
-
- private:
-  string var_name_;
-};
-
-class IsVariableInitializedOp : public OpKernel {
- public:
-  IsVariableInitializedOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    // Get a mutable input tensor of the Ref input.
-    const Tensor& input_tensor = context->mutable_input(0, false);
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, TensorShape({}), &output));
-    auto output_tensor = output->tensor<bool, 0>();
-    bool result = input_tensor.IsInitialized();
-    output_tensor() = result;
-  }
-};
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_KERNELS_VARIABLE_OPS_H_
-- 
GitLab


From 707d586dd78213a2413ac0e733b2e4049f980ed0 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Tue, 5 Dec 2017 21:44:21 -0800
Subject: [PATCH 0674/1225] Support non-const axis for split and concat.

PiperOrigin-RevId: 178055647
---
 .../grappler/optimizers/layout_optimizer.cc   | 62 +++++++++--------
 .../optimizers/layout_optimizer_test.cc       | 67 +++++++++++++------
 .../python/grappler/layout_optimizer_test.py  | 33 +++++++++
 3 files changed, 113 insertions(+), 49 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index eac3b6b0f1..c7461b93fe 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -36,7 +36,7 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
-const char kDimConst[] = "LayoutOptimizerDimConst";
+const char kDim[] = "LayoutOptimizerDim";
 const char kPermNHWCToNCHW[] = "LayoutOptimizerPermConstNHWCToNCHW";
 const char kPermNCHWToNHWC[] = "LayoutOptimizerPermConstNCHWToNHWC";
 const char kGatherAxisConst[] = "LayoutOptimizerGatherAxisConst";
@@ -952,10 +952,6 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   }
 
  protected:
-  bool ShouldProcess() const override {
-    return AgnosticNodeProcessor::ShouldProcess() && DimSupported();
-  }
-
   std::vector<int> GetInputPos() const override {
     std::vector<int> input_pos;
     int start = (IsConcatV1(*node_)) ? 1 : 0;
@@ -968,33 +964,19 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   }
 
   Status CustomizedProcessing() override {
-    string dim_const_name = AddNodeDimConst()->name();
-    node_map_->AddOutput(dim_const_name, node_->name());
-    *node_->mutable_input(axis_node_pos_) = dim_const_name;
+    auto dim_node = node_map_->GetNode(node_->input(axis_node_pos_));
+    if (IsConstant(*dim_node)) {
+      AddNodeDimConst();
+    } else {
+      AddNodeDataFormatDimMap();
+    }
     return Status::OK();
   }
 
   int axis_node_pos_;
 
  private:
-  bool DimSupported() const {
-    auto dim_node = node_map_->GetNode(node_->input(axis_node_pos_));
-    // TODO(yaozhang): Support non-constant axis node.
-    if (!IsConstant(*dim_node)) {
-      return false;
-    }
-    if (HasAttribute(*dim_node, "value").ok()) {
-      auto tensor = dim_node->attr().at({"value"}).tensor();
-      if (tensor.tensor_shape().dim_size() == 0 && tensor.int_val_size() == 1) {
-        if (tensor.int_val(0) < 4 && tensor.int_val(0) >= -4) {
-          return true;
-        }
-      }
-    }
-    return false;
-  }
-
-  NodeDef* AddNodeDimConst() {
+  void AddNodeDimConst() {
     auto dim_node = node_map_->GetNode(node_->input(axis_node_pos_));
     auto tensor = dim_node->attr().at({"value"}).tensor();
     int value = tensor.int_val(0);
@@ -1010,10 +992,34 @@ class ConcatProcessor : public AgnosticNodeProcessor {
     // to ensure added_node is in the same frame with node_.
     NodeDef* added_node = graph_->add_node();
     *added_node = *dim_node;
-    added_node->set_name(strings::StrCat(kDimConst, "-", node_->name()));
+    added_node->set_name(strings::StrCat(kDim, "-", node_->name()));
+    node_map_->AddNode(added_node->name(), added_node);
     added_node->mutable_attr()->at({"value"}).mutable_tensor()->set_int_val(
         0, value);
-    return added_node;
+    node_map_->RemoveOutput(node_->input(axis_node_pos_), node_->name());
+    *node_->mutable_input(axis_node_pos_) = added_node->name();
+    node_map_->AddOutput(added_node->name(), node_->name());
+  }
+
+  void AddNodeDataFormatDimMap() {
+    NodeDef* added_node = graph_->add_node();
+    added_node->set_name(strings::StrCat(kDim, "-", node_->name()));
+    added_node->set_op("DataFormatDimMap");
+    node_map_->AddNode(added_node->name(), added_node);
+    added_node->set_device(node_->device());
+    AttrValue attr_data_type;
+    attr_data_type.set_type(DT_INT32);
+    added_node->mutable_attr()->insert({"T", attr_data_type});
+    AttrValue attr_format;
+    attr_format.set_s("NHWC");
+    added_node->mutable_attr()->insert({"src_format", attr_format});
+    attr_format.set_s("NCHW");
+    added_node->mutable_attr()->insert({"dst_format", attr_format});
+    *added_node->add_input() = node_->input(axis_node_pos_);
+    *node_->mutable_input(axis_node_pos_) = added_node->name();
+    node_map_->UpdateOutput(added_node->input(0), node_->name(),
+                            added_node->name());
+    node_map_->AddOutput(added_node->name(), node_->name());
   }
 };
 
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 4d932a0932..af07eaf2d5 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -445,9 +445,9 @@ TEST_F(LayoutOptimizerTest, SplitDimC) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDimConst-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDim-split");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerDimConst-split");
+  auto split_const = node_map.GetNode("LayoutOptimizerDim-split");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 1);
 }
@@ -465,9 +465,9 @@ TEST_F(LayoutOptimizerTest, SplitDimH) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDimConst-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDim-split");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerDimConst-split");
+  auto split_const = node_map.GetNode("LayoutOptimizerDim-split");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 2);
 }
@@ -485,9 +485,9 @@ TEST_F(LayoutOptimizerTest, SplitDimW) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDimConst-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDim-split");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerDimConst-split");
+  auto split_const = node_map.GetNode("LayoutOptimizerDim-split");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 3);
 }
@@ -505,9 +505,9 @@ TEST_F(LayoutOptimizerTest, SplitDimN) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDimConst-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDim-split");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerDimConst-split");
+  auto split_const = node_map.GetNode("LayoutOptimizerDim-split");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 0);
 }
@@ -526,9 +526,11 @@ TEST_F(LayoutOptimizerTest, SplitNonConstDim) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "i1");
-  EXPECT_EQ(split_node->input(1),
-            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-split-1");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDim-split");
+  EXPECT_EQ(split_node->input(1), "Conv2D");
+  auto map_node = node_map.GetNode("LayoutOptimizerDim-split");
+  EXPECT_EQ(map_node->op(), "DataFormatDimMap");
+  EXPECT_EQ(map_node->input(0), "i1");
 }
 
 TEST_F(LayoutOptimizerTest, SplitSamePortToMultipleInputsOfSameNode) {
@@ -549,8 +551,8 @@ TEST_F(LayoutOptimizerTest, SplitSamePortToMultipleInputsOfSameNode) {
   EXPECT_EQ(concat_node->input(0), "split:1");
   EXPECT_EQ(concat_node->input(1), "split:1");
   EXPECT_EQ(concat_node->input(2), "split:1");
-  EXPECT_EQ(concat_node->input(3), "LayoutOptimizerDimConst-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDimConst-concat");
+  EXPECT_EQ(concat_node->input(3), "LayoutOptimizerDim-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1);
 }
 
@@ -570,11 +572,34 @@ TEST_F(LayoutOptimizerTest, ConcatDimH) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDimConst-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDimConst-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDim-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 2);
 }
 
+TEST_F(LayoutOptimizerTest, ConcatNonConst) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto axis = ops::Const(s.WithOpName("axis"), 1);
+  auto i = ops::Identity(s.WithOpName("i"), axis);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, i);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDim-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
+  EXPECT_EQ(concat_dim->op(), "DataFormatDimMap");
+  EXPECT_EQ(concat_dim->input(0), "i");
+}
+
 TEST_F(LayoutOptimizerTest, ConcatDimW) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv = SimpleConv2D(&s, 3, 2, "VALID");
@@ -591,8 +616,8 @@ TEST_F(LayoutOptimizerTest, ConcatDimW) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDimConst-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDimConst-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDim-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 3);
 }
 
@@ -612,8 +637,8 @@ TEST_F(LayoutOptimizerTest, ConcatDimN) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDimConst-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDimConst-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDim-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 0);
 }
 
@@ -633,8 +658,8 @@ TEST_F(LayoutOptimizerTest, ConcatDimC) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDimConst-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDimConst-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDim-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1);
 }
 
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 50735fb567..8cad8a514f 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -195,6 +195,39 @@ class LayoutOptimizerTest(test.TestCase):
 
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
+  def testSplitWithNonConstAxis(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      dim = array_ops.placeholder(dtype='int32')
+      split = array_ops.split(conv, 2, axis=dim)
+      output = math_ops.reduce_sum(split[0])
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={dim: 3})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if node.name.startswith('LayoutOptimizerTranspose'):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
+                    nodes)
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-split-Sum-0', nodes)
+      self.assertIn('LayoutOptimizerDim-split', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
   def testLoop(self):
     if test.is_gpu_available(cuda_only=True):
       output = _loop()
-- 
GitLab


From 5b3e3c94eb72c0d7d5c3372321d89b0f48c987d0 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 5 Dec 2017 21:58:04 -0800
Subject: [PATCH 0675/1225] Use `CallFrameInterface` instead of
 `FunctionCallFrame` in the executor.

PiperOrigin-RevId: 178056389
---
 tensorflow/core/common_runtime/executor.cc |  2 +-
 tensorflow/core/common_runtime/executor.h  |  2 +-
 tensorflow/core/common_runtime/function.cc | 23 +++++++++++++++-------
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 1896baaf66..fe1cf1b12e 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -1188,7 +1188,7 @@ class ExecutorState {
   // QUESTION: Make it a checkpoint::TensorSliceReaderCacheWrapper
   // instead of a pointer?  (avoids having to delete).
   checkpoint::TensorSliceReaderCacheWrapper* slice_reader_cache_;
-  FunctionCallFrame* call_frame_;
+  CallFrameInterface* call_frame_;
   const ExecutorImpl* impl_;
   CancellationManager* cancellation_manager_;
   Executor::Args::Runner runner_;
diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h
index e09dc4e346..b5f4ebb005 100644
--- a/tensorflow/core/common_runtime/executor.h
+++ b/tensorflow/core/common_runtime/executor.h
@@ -84,7 +84,7 @@ class Executor {
     int64 step_id = 0;
     Rendezvous* rendezvous = nullptr;
     StepStatsCollector* stats_collector = nullptr;
-    FunctionCallFrame* call_frame = nullptr;
+    CallFrameInterface* call_frame = nullptr;
     CancellationManager* cancellation_manager = nullptr;
     SessionState* session_state = nullptr;
     TensorStore* tensor_store = nullptr;
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 3328125dc9..b152529711 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -588,14 +588,13 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
                                            std::vector<Tensor>* rets,
                                            Executor::Args* exec_args,
                                            Item* item, DoneCallback done) {
-  FunctionCallFrame* frame = exec_args->call_frame;
+  DCHECK(exec_args->call_frame == nullptr);
   string target_device = parent_->GetDeviceName(handle);
   string source_device = opts.source_device;
   Rendezvous* rendezvous = opts.rendezvous;
   DeviceContext* device_context;
   Status s = parent_->GetDeviceContext(target_device, &device_context);
   if (!s.ok()) {
-    delete frame;
     delete exec_args;
     done(s);
     return;
@@ -603,6 +602,16 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
   int64 src_incarnation, target_incarnation;
   s = parent_->GetDeviceIncarnation(source_device, &src_incarnation);
   s.Update(parent_->GetDeviceIncarnation(target_device, &target_incarnation));
+  if (!s.ok()) {
+    delete exec_args;
+    done(s);
+    return;
+  }
+
+  const FunctionBody* fbody = GetFunctionBody(handle);
+  FunctionCallFrame* frame =
+      new FunctionCallFrame(fbody->arg_types, fbody->ret_types);
+  exec_args->call_frame = frame;
   if (!s.ok()) {
     delete frame;
     delete exec_args;
@@ -679,14 +688,10 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
     parent_->Run(run_opts, handle, args, rets, done);
     return;
   }
-  const FunctionBody* fbody = GetFunctionBody(handle);
-  FunctionCallFrame* frame =
-      new FunctionCallFrame(fbody->arg_types, fbody->ret_types);
 
   Item* item = nullptr;
   Status s = GetOrCreateItem(handle, &item);
   if (!s.ok()) {
-    delete frame;
     done(s);
     return;
   }
@@ -697,7 +702,6 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
   exec_args->step_id = run_opts.step_id;
   exec_args->rendezvous = run_opts.rendezvous;
   exec_args->stats_collector = run_opts.stats_collector;
-  exec_args->call_frame = frame;
   exec_args->cancellation_manager = run_opts.cancellation_manager;
   exec_args->step_container = run_opts.step_container;
   exec_args->runner = *run_opts.runner;
@@ -707,6 +711,10 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
     return;
   }
 
+  const FunctionBody* fbody = GetFunctionBody(handle);
+  FunctionCallFrame* frame =
+      new FunctionCallFrame(fbody->arg_types, fbody->ret_types);
+  exec_args->call_frame = frame;
   s = frame->SetArgs(args);
   if (!s.ok()) {
     delete frame;
@@ -714,6 +722,7 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
     done(s);
     return;
   }
+
   item->exec->RunAsync(
       // Executor args
       *exec_args,
-- 
GitLab


From 45fef9265f737521ad2c08bfec0e97cf9fa28783 Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Tue, 5 Dec 2017 22:00:31 -0800
Subject: [PATCH 0676/1225] Fix typo: SyncReplicaOptimizer ->
 SyncReplicasOptimizer

PiperOrigin-RevId: 178056514
---
 tensorflow/python/training/sync_replicas_optimizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/training/sync_replicas_optimizer.py b/tensorflow/python/training/sync_replicas_optimizer.py
index b52d101a21..47702fdad0 100644
--- a/tensorflow/python/training/sync_replicas_optimizer.py
+++ b/tensorflow/python/training/sync_replicas_optimizer.py
@@ -449,7 +449,7 @@ class _SyncReplicasOptimizerHook(session_run_hook.SessionRunHook):
   """A SessionRunHook handles ops related to SyncReplicasOptimizer."""
 
   def __init__(self, sync_optimizer, is_chief, num_tokens):
-    """Creates hook to handle SyncReplicaOptimizer initialization ops.
+    """Creates hook to handle SyncReplicasOptimizer initialization ops.
 
     Args:
       sync_optimizer: `SyncReplicasOptimizer` which this hook will initialize.
-- 
GitLab


From feb859d919f451540e93ecbc90791103282fd5b2 Mon Sep 17 00:00:00 2001
From: Dan Becker <dansbecker@gmail.com>
Date: Wed, 6 Dec 2017 00:27:16 -0700
Subject: [PATCH 0677/1225] Allow keras applications to load weights from
 arbitrary path

---
 .../keras/applications/inception_resnet_v2.py   | 15 ++++++++++-----
 .../_impl/keras/applications/inception_v3.py    | 17 ++++++++++++-----
 .../keras/_impl/keras/applications/mobilenet.py | 16 +++++++++++-----
 .../keras/_impl/keras/applications/resnet50.py  | 17 ++++++++++++-----
 .../keras/_impl/keras/applications/vgg16.py     | 17 ++++++++++++-----
 .../keras/_impl/keras/applications/vgg19.py     | 17 ++++++++++++-----
 .../keras/_impl/keras/applications/xception.py  | 16 +++++++++++-----
 7 files changed, 80 insertions(+), 35 deletions(-)

diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
index de29b92575..1bd118a419 100644
--- a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
+++ b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
@@ -23,6 +23,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.applications import imagenet_utils
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
@@ -208,8 +210,9 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or `'imagenet'` (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -239,10 +242,12 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
+
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_v3.py b/tensorflow/python/keras/_impl/keras/applications/inception_v3.py
index d4fea4fbb0..3a17c647dd 100644
--- a/tensorflow/python/keras/_impl/keras/applications/inception_v3.py
+++ b/tensorflow/python/keras/_impl/keras/applications/inception_v3.py
@@ -29,6 +29,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import layers
 from tensorflow.python.keras._impl.keras.applications import imagenet_utils
@@ -118,8 +120,9 @@ def InceptionV3(include_top=True,
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -151,10 +154,12 @@ def InceptionV3(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
+
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -383,6 +388,8 @@ def InceptionV3(include_top=True,
           cache_subdir='models',
           file_hash='bcbd6486424b2319ff4ef7d526e38f63')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
index 653bd8c09f..9179422d0e 100644
--- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
+++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
@@ -67,6 +67,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 import warnings
 
 from tensorflow.python.keras._impl.keras import backend as K
@@ -348,8 +349,9 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
       dropout: dropout rate
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: `None` (random initialization) or
-          `imagenet` (ImageNet weights)
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of
           `layers.Input()`)
           to use as image input for the model.
@@ -384,10 +386,12 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
                        'as other backends do not support '
                        'depthwise convolution.')
 
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
+
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as ImageNet with `include_top` '
@@ -537,6 +541,8 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/applications/resnet50.py b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
index 717b626fdc..5238ba70c1 100644
--- a/tensorflow/python/keras/_impl/keras/applications/resnet50.py
+++ b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
@@ -26,6 +26,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import layers
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
@@ -161,8 +163,9 @@ def ResNet50(include_top=True,
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -194,10 +197,12 @@ def ResNet50(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
+
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -283,4 +288,6 @@ def ResNet50(include_top=True,
           cache_subdir='models',
           md5_hash='a268eb855778b3df3c7506639542a6af')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg16.py b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
index a0862e6407..aa26160709 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg16.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
@@ -25,6 +25,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions  # pylint: disable=unused-import
@@ -68,8 +70,9 @@ def VGG16(include_top=True,
   Arguments:
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -101,10 +104,12 @@ def VGG16(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
+
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -211,4 +216,6 @@ def VGG16(include_top=True,
         dense = model.get_layer(name='fc1')
         layer_utils.convert_dense_weights_data_format(dense, shape,
                                                       'channels_first')
+  elif weights is not None:
+    model.load_weights(weights)
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg19.py b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
index cfa1c95336..d842d0db6a 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg19.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
@@ -25,6 +25,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions  # pylint: disable=unused-import
@@ -68,8 +70,9 @@ def VGG19(include_top=True,
   Arguments:
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+         'imagenet' (pre-training on ImageNet),
+         or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -101,10 +104,12 @@ def VGG19(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
+
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -217,4 +222,6 @@ def VGG19(include_top=True,
         dense = model.get_layer(name='fc1')
         layer_utils.convert_dense_weights_data_format(dense, shape,
                                                       'channels_first')
+  elif weights is not None:
+    model.load_weights(weights)
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/xception.py b/tensorflow/python/keras/_impl/keras/applications/xception.py
index 14f6ad8090..25bc6288b2 100644
--- a/tensorflow/python/keras/_impl/keras/applications/xception.py
+++ b/tensorflow/python/keras/_impl/keras/applications/xception.py
@@ -36,6 +36,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import layers
 from tensorflow.python.keras._impl.keras.applications import imagenet_utils
@@ -80,8 +82,9 @@ def Xception(include_top=True,
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -114,10 +117,11 @@ def Xception(include_top=True,
       RuntimeError: If attempting to run this model with a
           backend that does not support separable convolutions.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -300,6 +304,8 @@ def Xception(include_top=True,
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
-- 
GitLab


From e2a2747dc85154516b08ee49885cfaf00ad2c3c5 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Tue, 5 Dec 2017 23:28:03 -0800
Subject: [PATCH 0678/1225] Fix session_list_devices_test

Fixes #14711

PiperOrigin-RevId: 178061423
---
 tensorflow/python/client/session_list_devices_test.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tensorflow/python/client/session_list_devices_test.py b/tensorflow/python/client/session_list_devices_test.py
index 584b1abe55..5a7413c12e 100644
--- a/tensorflow/python/client/session_list_devices_test.py
+++ b/tensorflow/python/client/session_list_devices_test.py
@@ -39,7 +39,6 @@ class SessionListDevicesTestMethods(object):
       devices = sess.list_devices()
       self.assertTrue('/job:localhost/replica:0/task:0/device:CPU:0' in set(
           [d.name for d in devices]), devices)
-      self.assertGreaterEqual(1, len(devices), devices)
 
   def testInvalidDeviceNumber(self):
     opts = tf_session.TF_NewSessionOptions()
@@ -65,7 +64,6 @@ class SessionListDevicesTestMethods(object):
       devices = sess.list_devices()
       self.assertTrue('/job:local/replica:0/task:0/device:CPU:0' in set(
           [d.name for d in devices]), devices)
-      self.assertGreaterEqual(1, len(devices), devices)
 
   def testListDevicesClusterSpecPropagation(self):
     server1 = server_lib.Server.create_local_server()
@@ -84,7 +82,6 @@ class SessionListDevicesTestMethods(object):
           '/job:worker/replica:0/task:0/device:CPU:0' in device_names)
       self.assertTrue(
           '/job:worker/replica:0/task:1/device:CPU:0' in device_names)
-      self.assertGreaterEqual(2, len(devices), devices)
 
 
 class SessionListDevicesTest(SessionListDevicesTestMethods,
-- 
GitLab


From c60e32e0ca452aec465a33529a0ea22ef88b443f Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Wed, 6 Dec 2017 05:13:16 -0800
Subject: [PATCH 0679/1225] [TF:XLA] Support for DT_INT64 in the VariableShape
 operation.

PiperOrigin-RevId: 178084701
---
 .../compiler/tests/variable_ops_test.py       | 17 ++++++-
 tensorflow/compiler/tf2xla/kernels/BUILD      |  2 +
 .../compiler/tf2xla/kernels/shape_op.cc       | 29 +----------
 .../compiler/tf2xla/kernels/shape_util.cc     | 48 +++++++++++++++++++
 .../compiler/tf2xla/kernels/shape_util.h      | 34 +++++++++++++
 .../compiler/tf2xla/kernels/variable_ops.cc   | 28 +++++------
 6 files changed, 113 insertions(+), 45 deletions(-)
 create mode 100644 tensorflow/compiler/tf2xla/kernels/shape_util.cc
 create mode 100644 tensorflow/compiler/tf2xla/kernels/shape_util.h

diff --git a/tensorflow/compiler/tests/variable_ops_test.py b/tensorflow/compiler/tests/variable_ops_test.py
index c50342dee4..b08d6ab21e 100644
--- a/tensorflow/compiler/tests/variable_ops_test.py
+++ b/tensorflow/compiler/tests/variable_ops_test.py
@@ -107,11 +107,26 @@ class VariableOpsTest(XLATestCase):
                  [[[30, 31, 32], [33, 34, 35]], [[0, 1, 2], [3, 4, 5]]]],
             ).astype(dtype), sess.run(x))
 
+  def testShape(self):
+    for dtype in self.numeric_types:
+      init = np.ones([2, 3]).astype(dtype)
+      with self.test_session() as session, self.test_scope():
+        v = resource_variable_ops.ResourceVariable(init)
+        session.run(variables.variables_initializer([v]))
+        h = v.handle
+        s32, s64 = session.run([
+            resource_variable_ops.variable_shape(h),
+            resource_variable_ops.variable_shape(h, out_type=dtypes.int64)
+        ])
+        self.assertEqual(s32.dtype, np.int32)
+        self.assertEqual(s64.dtype, np.int64)
+        self.assertAllEqual(s32, [2, 3])
+        self.assertAllEqual(s64, [2, 3])
+
   def testReadWrite(self):
     """Tests initialization, reading, and writing a resource variable."""
     for dtype in self.numeric_types:
       with self.test_session() as session:
-        print(ops.get_default_graph())
         with self.test_scope():
           with variable_scope.variable_scope("ascope", use_resource=True):
             x = variable_scope.get_variable(
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index ed1e731681..27ed684b00 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -60,6 +60,7 @@ tf_kernel_library(
         "sendrecv_ops.cc",
         "sequence_ops.cc",
         "shape_op.cc",
+        "shape_util.cc",
         "slice_op.cc",
         "softmax_op.cc",
         "spacetobatch_op.cc",
@@ -79,6 +80,7 @@ tf_kernel_library(
     hdrs = [
         "gather_op.h",
         "index_ops.h",
+        "shape_util.h",
     ],
     deps = [
         ":while_op",
diff --git a/tensorflow/compiler/tf2xla/kernels/shape_op.cc b/tensorflow/compiler/tf2xla/kernels/shape_op.cc
index 06838d1625..e205fadd2b 100644
--- a/tensorflow/compiler/tf2xla/kernels/shape_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/shape_op.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 // XLA-specific Shape Ops.
 
+#include "tensorflow/compiler/tf2xla/kernels/shape_util.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
@@ -25,34 +26,6 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-// Converts a TensorShape to a constant Tensor.
-//
-// The input TensorShape input_shape is used to populate the elements of
-// shape_constant, which is modified in place.
-Status TensorShapeToConstant(const TensorShape& input_shape,
-                             Tensor* shape_constant) {
-  const int dims = input_shape.dims();
-  if (shape_constant->dtype() == DT_INT32) {
-    auto vec = shape_constant->vec<int32>();
-    for (int i = 0; i < dims; ++i) {
-      int64 dim_size = input_shape.dim_size(i);
-      if (!FastBoundsCheck(dim_size, std::numeric_limits<int32>::max())) {
-        return errors::InvalidArgument(
-            "Shape with out_type=int32 does not support tensors > int32max",
-            " but dim ", i, " is ", dim_size);
-      }
-      vec(i) = static_cast<int32>(dim_size);
-    }
-  } else {
-    auto vec = shape_constant->vec<int64>();
-    for (int i = 0; i < dims; ++i) {
-      int64 dim_size = input_shape.dim_size(i);
-      vec(i) = dim_size;
-    }
-  }
-  return Status::OK();
-}
-
 class ShapeOp : public XlaOpKernel {
  public:
   explicit ShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
diff --git a/tensorflow/compiler/tf2xla/kernels/shape_util.cc b/tensorflow/compiler/tf2xla/kernels/shape_util.cc
new file mode 100644
index 0000000000..76ea5f5255
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/shape_util.cc
@@ -0,0 +1,48 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/kernels/shape_util.h"
+
+#include <limits>
+
+#include "tensorflow/core/kernels/bounds_check.h"
+
+namespace tensorflow {
+
+Status TensorShapeToConstant(const TensorShape& input_shape,
+                             Tensor* shape_constant) {
+  const int dims = input_shape.dims();
+  if (shape_constant->dtype() == DT_INT32) {
+    auto vec = shape_constant->vec<int32>();
+    for (int i = 0; i < dims; ++i) {
+      int64 dim_size = input_shape.dim_size(i);
+      if (!FastBoundsCheck(dim_size, std::numeric_limits<int32>::max())) {
+        return errors::InvalidArgument(
+            "Shape with out_type=int32 does not support tensors > int32max",
+            " but dim ", i, " is ", dim_size);
+      }
+      vec(i) = static_cast<int32>(dim_size);
+    }
+  } else {
+    auto vec = shape_constant->vec<int64>();
+    for (int i = 0; i < dims; ++i) {
+      int64 dim_size = input_shape.dim_size(i);
+      vec(i) = dim_size;
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/shape_util.h b/tensorflow/compiler/tf2xla/kernels/shape_util.h
new file mode 100644
index 0000000000..575086e118
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/shape_util.h
@@ -0,0 +1,34 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_TF2XLA_KERNELS_SHAPE_UTIL_H_
+#define TENSORFLOW_COMPILER_TF2XLA_KERNELS_SHAPE_UTIL_H_
+
+#include <limits>
+
+#include "tensorflow/core/framework/tensor.h"
+
+namespace tensorflow {
+
+// Converts a TensorShape to a constant Tensor.
+//
+// The input TensorShape input_shape is used to populate the elements of
+// shape_constant, which is modified in place.
+Status TensorShapeToConstant(const TensorShape& input_shape,
+                             Tensor* shape_constant);
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_TF2XLA_KERNELS_SHAPE_UTIL_H_
diff --git a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc
index 2346c62ad1..68847ae7a2 100644
--- a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/tf2xla/kernels/cwise_ops.h"
 #include "tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h"
+#include "tensorflow/compiler/tf2xla/kernels/shape_util.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -124,27 +125,22 @@ REGISTER_XLA_OP(Name("ResourceGather").TypeConstraint("dtype", kNumericTypes),
 
 class VariableShapeOp : public XlaOpKernel {
  public:
-  explicit VariableShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
+  explicit VariableShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_dtype_));
+  }
 
   void Compile(XlaOpKernelContext* ctx) override {
-    DataType dtype;
+    DataType variable_dtype;
     TensorShape shape;
-    OP_REQUIRES_OK(ctx, ctx->GetVariableTypeAndShape(0, &dtype, &shape));
-    const int rank = shape.dims();
-    Tensor shape_constant(DT_INT32, TensorShape({rank}));
-    auto vec = shape_constant.vec<int32>();
-    // TODO(dga): support int64.  b/28119922.
-    for (int i = 0; i < rank; ++i) {
-      int64 dim_size = shape.dim_size(i);
-      OP_REQUIRES(
-          ctx, FastBoundsCheck(dim_size, std::numeric_limits<int32>::max()),
-          errors::InvalidArgument("Shape does not support tensors > int32max",
-                                  " but dim ", i, " is ", dim_size));
-      vec(i) = static_cast<int32>(dim_size);
-    }
-
+    OP_REQUIRES_OK(ctx,
+                   ctx->GetVariableTypeAndShape(0, &variable_dtype, &shape));
+    Tensor shape_constant(out_dtype_, TensorShape({shape.dims()}));
+    OP_REQUIRES_OK(ctx, TensorShapeToConstant(shape, &shape_constant));
     ctx->SetConstantOutput(0, shape_constant);
   }
+
+ private:
+  DataType out_dtype_;
 };
 
 REGISTER_XLA_OP(Name("VariableShape"), VariableShapeOp);
-- 
GitLab


From 2ad791c2ed84980d24d334f6b1ecd3c6e4320601 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 08:12:17 -0800
Subject: [PATCH 0680/1225] Automated g4 rollback of changelist 178054272

PiperOrigin-RevId: 178099261
---
 tensorflow/core/kernels/variable_ops.cc | 211 +++---------------------
 tensorflow/core/kernels/variable_ops.h  | 158 +++++++++++++++++-
 2 files changed, 169 insertions(+), 200 deletions(-)

diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index ddcfb14273..36b8ff09d7 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -23,177 +23,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Resource stored by variables in the resource manager
-// (legacy, ref-style version).
-class LegacyVar : public ResourceBase {
- public:
-  explicit LegacyVar(DataType dtype) : tensor_(dtype) {}
-  // Not copyable or movable.
-  LegacyVar(const LegacyVar&) = delete;
-  LegacyVar& operator=(const LegacyVar&) = delete;
-
-  mutex* mu() { return &mu_; }
-  Tensor* tensor() { return &tensor_; }
-
-  string DebugString() override {
-    return strings::StrCat(DataTypeString(tensor_.dtype()), "/",
-                           tensor_.shape().DebugString());
-  }
-
- private:
-  mutex mu_;
-  Tensor tensor_;
-
-  ~LegacyVar() override {}
-};
-
-VariableOp::VariableOp(OpKernelConstruction* context) : OpKernel(context) {
-  OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
-  dtype_ = RemoveRefType(context->output_type(0));
-}
-
-void VariableOp::Compute(OpKernelContext* ctx) {
-  mutex_lock l(init_mu_);
-  if (!initialized_) {
-    OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(),
-                                    true /* use name() */));
-    initialized_ = true;
-  }
-  auto creator = [this](LegacyVar** var) {
-    *var = new LegacyVar(dtype_);
-    (*var)->tensor()->set_shape(shape_);
-    return Status::OK();
-  };
-  LegacyVar* var;
-  OP_REQUIRES_OK(ctx, cinfo_.resource_manager()->LookupOrCreate<LegacyVar>(
-                          cinfo_.container(), cinfo_.name(), &var, creator));
-  // Output a reference to our tensor, so it may be updated.
-  //
-  // As long as the resource manager hasn't been cleared the ref we return
-  // here is valid because it owns a ref on var.
-  ctx->set_output_ref(0, var->mu(), var->tensor());
-  if (ctx->track_allocations() && var->tensor()->IsInitialized()) {
-    AllocatorAttributes attr;
-    attr.set_gpu_compatible(true);
-    attr.set_nic_compatible(true);
-    if (ctx->allocate_on_host(attr)) {
-      ctx->record_host_persistent_memory_allocation(
-          var->tensor()->AllocatedBytes());
-    } else {
-      ctx->record_device_persistent_memory_allocation(
-          var->tensor()->AllocatedBytes());
-    }
-  }
-  var->Unref();
-}
-
-class TemporaryVariableOp : public OpKernel {
- public:
-  explicit TemporaryVariableOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
-    OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
-    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
-    // Variable name defaults to op name if not specified explicitly.
-    if (var_name_.empty()) var_name_ = name();
-  }
-
-  void Compute(OpKernelContext* context) override {
-    Status s;
-    ResourceMgr* rm = context->resource_manager();
-    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
-    auto* tmp_var = new TmpVar;
-    OP_REQUIRES(context, tmp_var,
-                errors::ResourceExhausted("Could not allocate TmpVar."));
-    tmp_var->name = var_name_;
-    s = context->allocate_temp(dtype_, shape_, &tmp_var->val);
-    if (!s.ok()) tmp_var->Unref();
-    OP_REQUIRES_OK(context, s);
-    OP_REQUIRES_OK(context, rm->Create(context->step_container()->name(),
-                                       var_name_, tmp_var));
-    context->set_output_ref(0, &tmp_var->mu, &tmp_var->val);
-    if (context->track_allocations()) {
-      AllocatorAttributes attr;
-      if (context->allocate_on_host(attr)) {
-        context->record_host_persistent_memory_allocation(
-            tmp_var->val.AllocatedBytes());
-      } else {
-        context->record_device_persistent_memory_allocation(
-            tmp_var->val.AllocatedBytes());
-      }
-    }
-  }
-
- private:
-  // Refcounted temporary variable resource.
-  friend class DestroyTemporaryVariableOp;
-  struct TmpVar : public ResourceBase {
-    mutex mu;
-    Tensor val;
-    string name;
-    string DebugString() override { return name; }
-    ~TmpVar() override { VLOG(3) << "TmpVar " << name << " deleted"; }
-  };
-
-  TensorShape shape_;
-  DataType dtype_;
-  string var_name_;
-};
-
-class DestroyTemporaryVariableOp : public OpKernel {
- public:
-  explicit DestroyTemporaryVariableOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES(context, IsRefType(context->input_type(0)),
-                errors::InvalidArgument("lhs input needs to be a ref type"))
-    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
-    OP_REQUIRES(context, !var_name_.empty(),
-                errors::InvalidArgument("Missing var_name attribute"));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    // NOTE(pbar): All other mutators of the Tensor Ref *must* have completed
-    // their execution before this DestroyTemporaryVariable op executes.
-    // This is typically achieved using control dependencies.
-    CHECK(IsRefType(context->input_dtype(0)));
-    Tensor tmpvar = context->mutable_input(0, false);
-    context->set_output(0, tmpvar);
-    ResourceMgr* rm = context->resource_manager();
-    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
-    OP_REQUIRES_OK(context, rm->Delete<TemporaryVariableOp::TmpVar>(
-                                context->step_container()->name(), var_name_));
-    if (context->track_allocations()) {
-      if (context->allocate_on_host(AllocatorAttributes())) {
-        context->record_host_persistent_memory_allocation(
-            -static_cast<int64>(tmpvar.AllocatedBytes()));
-      } else {
-        context->record_device_persistent_memory_allocation(
-            -static_cast<int64>(tmpvar.AllocatedBytes()));
-      }
-    }
-  }
-
- private:
-  string var_name_;
-};
-
-class IsVariableInitializedOp : public OpKernel {
- public:
-  explicit IsVariableInitializedOp(OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    // Get a mutable input tensor of the Ref input.
-    const Tensor& input_tensor = context->mutable_input(0, false);
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, TensorShape({}), &output));
-    auto output_tensor = output->tensor<bool, 0>();
-    bool result = input_tensor.IsInitialized();
-    output_tensor() = result;
-  }
-};
-
 REGISTER_KERNEL_BUILDER(Name("Variable").Device(DEVICE_CPU), VariableOp);
 REGISTER_KERNEL_BUILDER(Name("VariableV2").Device(DEVICE_CPU), VariableOp);
 REGISTER_KERNEL_BUILDER(Name("TemporaryVariable").Device(DEVICE_CPU),
@@ -204,30 +33,30 @@ REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
                         IsVariableInitializedOp);
 
 #ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                                          \
-  REGISTER_KERNEL_BUILDER(                                                  \
-      Name("Variable").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),   \
-      VariableOp);                                                          \
-  REGISTER_KERNEL_BUILDER(                                                  \
-      Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"), \
-      VariableOp);                                                          \
-  REGISTER_KERNEL_BUILDER(Name("TemporaryVariable")                         \
-                              .Device(DEVICE_SYCL)                          \
-                              .TypeConstraint<type>("dtype"),               \
-                          TemporaryVariableOp);                             \
-  REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable")                  \
-                              .Device(DEVICE_SYCL)                          \
-                              .TypeConstraint<type>("T"),                   \
-                          DestroyTemporaryVariableOp);                      \
-  REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized")                     \
-                              .Device(DEVICE_SYCL)                          \
-                              .TypeConstraint<type>("dtype")                \
-                              .HostMemory("is_initialized"),                \
+#define REGISTER_SYCL_KERNEL(type)                                         \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Variable").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),  \
+      VariableOp);                                                         \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),\
+      VariableOp);                                                         \
+  REGISTER_KERNEL_BUILDER(Name("TemporaryVariable")                        \
+                              .Device(DEVICE_SYCL)                         \
+                              .TypeConstraint<type>("dtype"),              \
+                          TemporaryVariableOp);                            \
+  REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable")                 \
+                              .Device(DEVICE_SYCL)                         \
+                              .TypeConstraint<type>("T"),                  \
+                          DestroyTemporaryVariableOp);                     \
+  REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized")                    \
+                              .Device(DEVICE_SYCL)                         \
+                              .TypeConstraint<type>("dtype")               \
+                              .HostMemory("is_initialized"),               \
                           IsVariableInitializedOp);
 
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL);
 #undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 // Only register 'Variable' on GPU for the subset of types also supported by
diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h
index 83134bad37..355140d44c 100644
--- a/tensorflow/core/kernels/variable_ops.h
+++ b/tensorflow/core/kernels/variable_ops.h
@@ -27,16 +27,10 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Resource stored by variables in the resource manager
-// (new, resource-style version).
+// Resource stored by variables in the resource manager.
 class Var : public ResourceBase {
  public:
   explicit Var(DataType dtype) : tensor_(dtype) {}
-  // Not copyable or movable.
-  Var(const Var&) = delete;
-  Var& operator=(const Var&) = delete;
-
-  // TODO(ebrevdo): Use LockSet instead of exposing mu.
   mutex* mu() { return &mu_; }
   Tensor* tensor() { return &tensor_; }
 
@@ -50,12 +44,52 @@ class Var : public ResourceBase {
   Tensor tensor_;
 
   ~Var() override {}
+  TF_DISALLOW_COPY_AND_ASSIGN(Var);
 };
 
 class VariableOp : public OpKernel {
  public:
-  explicit VariableOp(OpKernelConstruction* context);
-  void Compute(OpKernelContext* ctx) override;
+  explicit VariableOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
+    dtype_ = RemoveRefType(context->output_type(0));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    mutex_lock l(init_mu_);
+    if (!initialized_) {
+      OP_REQUIRES_OK(
+          ctx,
+          cinfo_.Init(ctx->resource_manager(), def(), true /* use name() */));
+      initialized_ = true;
+    }
+    auto creator = [this](Var** var) {
+      *var = new Var(dtype_);
+      (*var)->tensor()->set_shape(shape_);
+      return Status::OK();
+    };
+    Var* var;
+    OP_REQUIRES_OK(ctx,
+                   cinfo_.resource_manager()->LookupOrCreate<Var>(
+                       cinfo_.container(), cinfo_.name(), &var, creator));
+    // Output a reference to our tensor, so it may be updated.
+    //
+    // As long as the resource manager hasn't been cleared the ref we return
+    // here is valid because it owns a ref on var.
+    ctx->set_output_ref(0, var->mu(), var->tensor());
+    if (ctx->track_allocations() && var->tensor()->IsInitialized()) {
+      AllocatorAttributes attr;
+      attr.set_gpu_compatible(true);
+      attr.set_nic_compatible(true);
+      if (ctx->allocate_on_host(attr)) {
+        ctx->record_host_persistent_memory_allocation(
+            var->tensor()->AllocatedBytes());
+      } else {
+        ctx->record_device_persistent_memory_allocation(
+            var->tensor()->AllocatedBytes());
+      }
+    }
+    var->Unref();
+  }
 
  private:
   DataType dtype_;
@@ -68,6 +102,112 @@ class VariableOp : public OpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(VariableOp);
 };
 
+class TemporaryVariableOp : public OpKernel {
+ public:
+  explicit TemporaryVariableOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
+    OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
+    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
+    // Variable name defaults to op name if not specified explicitly.
+    if (var_name_ == "") var_name_ = name();
+  }
+
+  void Compute(OpKernelContext* context) override {
+    Status s;
+    ResourceMgr* rm = context->resource_manager();
+    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
+    auto* tmp_var = new TmpVar;
+    OP_REQUIRES(context, tmp_var,
+                errors::ResourceExhausted("Could not allocate TmpVar."));
+    tmp_var->name = var_name_;
+    s = context->allocate_temp(dtype_, shape_, &tmp_var->val);
+    if (!s.ok()) tmp_var->Unref();
+    OP_REQUIRES_OK(context, s);
+    OP_REQUIRES_OK(context, rm->Create(context->step_container()->name(),
+                                       var_name_, tmp_var));
+    context->set_output_ref(0, &tmp_var->mu, &tmp_var->val);
+    if (context->track_allocations()) {
+      AllocatorAttributes attr;
+      if (context->allocate_on_host(attr)) {
+        context->record_host_persistent_memory_allocation(
+            tmp_var->val.AllocatedBytes());
+      } else {
+        context->record_device_persistent_memory_allocation(
+            tmp_var->val.AllocatedBytes());
+      }
+    }
+  }
+
+ private:
+  // Refcounted temporary variable resource.
+  friend class DestroyTemporaryVariableOp;
+  struct TmpVar : public ResourceBase {
+    mutex mu;
+    Tensor val;
+    string name;
+    string DebugString() override { return name; }
+    ~TmpVar() override { VLOG(3) << "TmpVar " << name << " deleted"; }
+  };
+
+  TensorShape shape_;
+  DataType dtype_;
+  string var_name_;
+};
+
+class DestroyTemporaryVariableOp : public OpKernel {
+ public:
+  explicit DestroyTemporaryVariableOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES(context, IsRefType(context->input_type(0)),
+                errors::InvalidArgument("lhs input needs to be a ref type"))
+    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
+    OP_REQUIRES(context, var_name_ != "",
+                errors::InvalidArgument("Missing var_name attribute"));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // NOTE(pbar): All other mutators of the Tensor Ref *must* have completed
+    // their execution before this DestroyTemporaryVariable op executes.
+    // This is typically achieved using control dependencies.
+    CHECK(IsRefType(context->input_dtype(0)));
+    Tensor tmpvar = context->mutable_input(0, false);
+    context->set_output(0, tmpvar);
+    ResourceMgr* rm = context->resource_manager();
+    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
+    OP_REQUIRES_OK(context, rm->Delete<TemporaryVariableOp::TmpVar>(
+                                context->step_container()->name(), var_name_));
+    if (context->track_allocations()) {
+      if (context->allocate_on_host(AllocatorAttributes())) {
+        context->record_host_persistent_memory_allocation(
+            -static_cast<int64>(tmpvar.AllocatedBytes()));
+      } else {
+        context->record_device_persistent_memory_allocation(
+            -static_cast<int64>(tmpvar.AllocatedBytes()));
+      }
+    }
+  }
+
+ private:
+  string var_name_;
+};
+
+class IsVariableInitializedOp : public OpKernel {
+ public:
+  IsVariableInitializedOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    // Get a mutable input tensor of the Ref input.
+    const Tensor& input_tensor = context->mutable_input(0, false);
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, TensorShape({}), &output));
+    auto output_tensor = output->tensor<bool, 0>();
+    bool result = input_tensor.IsInitialized();
+    output_tensor() = result;
+  }
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_KERNELS_VARIABLE_OPS_H_
-- 
GitLab


From 7ac7aa868406a5d9b03e4101509ac80e011b91c7 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Wed, 6 Dec 2017 09:03:26 -0800
Subject: [PATCH 0681/1225] Add version to image retraining setup instructions.

fixes: #12736
PiperOrigin-RevId: 178105093
---
 tensorflow/docs_src/tutorials/image_retraining.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/docs_src/tutorials/image_retraining.md b/tensorflow/docs_src/tutorials/image_retraining.md
index ad565e6d8b..52e6980e00 100644
--- a/tensorflow/docs_src/tutorials/image_retraining.md
+++ b/tensorflow/docs_src/tutorials/image_retraining.md
@@ -44,8 +44,14 @@ following command (these examples are not included in the installation):
 
 ```sh
 git clone https://github.com/tensorflow/tensorflow
+```
+
+Then checkout the version of the tensorflow repository matching your
+installation and this tutorial as follows:
 
+``` sh
 cd tensorflow
+git checkout {version}
 ```
 
 In the simplest cases the retrainer can then be run like this:
-- 
GitLab


From f79c39e9c8291787718015318b396bd11ff7ae71 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Wed, 6 Dec 2017 09:08:09 -0800
Subject: [PATCH 0682/1225] Use sparse xent to avoid softmax_v2 warning in
 examples/learn

`tf.nn.softmax_cross_entropy_with_logits` and `tf.losses.softmax_cross_entropy` both throw the warning.

Almost everywhere it's used can simply be replaced by `tf.losses.sparse_softmax_cross_entropy`

PiperOrigin-RevId: 178105702
---
 tensorflow/examples/learn/iris_custom_decay_dnn.py          | 6 +-----
 tensorflow/examples/learn/iris_custom_model.py              | 6 +-----
 tensorflow/examples/learn/mnist.py                          | 4 +---
 tensorflow/examples/learn/multiple_gpu.py                   | 6 +-----
 tensorflow/examples/learn/resnet.py                         | 4 +---
 tensorflow/examples/learn/text_classification.py            | 4 +---
 .../examples/learn/text_classification_character_cnn.py     | 4 +---
 .../examples/learn/text_classification_character_rnn.py     | 4 +---
 tensorflow/examples/learn/text_classification_cnn.py        | 4 +---
 9 files changed, 9 insertions(+), 33 deletions(-)

diff --git a/tensorflow/examples/learn/iris_custom_decay_dnn.py b/tensorflow/examples/learn/iris_custom_decay_dnn.py
index 072357e51c..4a219694d1 100644
--- a/tensorflow/examples/learn/iris_custom_decay_dnn.py
+++ b/tensorflow/examples/learn/iris_custom_decay_dnn.py
@@ -46,12 +46,8 @@ def my_model(features, labels, mode):
     }
     return tf.estimator.EstimatorSpec(mode, predictions=predictions)
 
-  # Convert the labels to a one-hot tensor of shape (length of features, 3) and
-  # with a on-value of 1 for each one-hot vector of length 3.
-  onehot_labels = tf.one_hot(labels, 3, 1, 0)
   # Compute loss.
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Create training op with exponentially decaying learning rate.
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/learn/iris_custom_model.py b/tensorflow/examples/learn/iris_custom_model.py
index 471a99ba76..c6bdb86ba5 100644
--- a/tensorflow/examples/learn/iris_custom_model.py
+++ b/tensorflow/examples/learn/iris_custom_model.py
@@ -47,12 +47,8 @@ def my_model(features, labels, mode):
     }
     return tf.estimator.EstimatorSpec(mode, predictions=predictions)
 
-  # Convert the labels to a one-hot tensor of shape (length of features, 3) and
-  # with a on-value of 1 for each one-hot vector of length 3.
-  onehot_labels = tf.one_hot(labels, 3, 1, 0)
   # Compute loss.
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Create training op.
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/learn/mnist.py b/tensorflow/examples/learn/mnist.py
index 88425ea0d0..98819b20bf 100644
--- a/tensorflow/examples/learn/mnist.py
+++ b/tensorflow/examples/learn/mnist.py
@@ -77,9 +77,7 @@ def conv_model(features, labels, mode):
     return tf.estimator.EstimatorSpec(mode, predictions=predictions)
 
   # Compute loss.
-  onehot_labels = tf.one_hot(tf.cast(labels, tf.int32), N_DIGITS, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Create training op.
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/learn/multiple_gpu.py b/tensorflow/examples/learn/multiple_gpu.py
index a294950a38..3bad22ddf6 100644
--- a/tensorflow/examples/learn/multiple_gpu.py
+++ b/tensorflow/examples/learn/multiple_gpu.py
@@ -65,12 +65,8 @@ def my_model(features, labels, mode):
       }
       return tf.estimator.EstimatorSpec(mode, predictions=predictions)
 
-    # Convert the labels to a one-hot tensor of shape (length of features, 3)
-    # and with a on-value of 1 for each one-hot vector of length 3.
-    onehot_labels = tf.one_hot(labels, 3, 1, 0)
     # Compute loss.
-    loss = tf.losses.softmax_cross_entropy(
-        onehot_labels=onehot_labels, logits=logits)
+    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
     # Create training op.
     if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/learn/resnet.py b/tensorflow/examples/learn/resnet.py
index 1e0966475b..9542e55250 100755
--- a/tensorflow/examples/learn/resnet.py
+++ b/tensorflow/examples/learn/resnet.py
@@ -151,9 +151,7 @@ def res_net_model(features, labels, mode):
     return tf.estimator.EstimatorSpec(mode, predictions=predictions)
 
   # Compute loss.
-  onehot_labels = tf.one_hot(tf.cast(labels, tf.int32), N_DIGITS, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Create training op.
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/learn/text_classification.py b/tensorflow/examples/learn/text_classification.py
index ba89c532be..eb117c39a1 100644
--- a/tensorflow/examples/learn/text_classification.py
+++ b/tensorflow/examples/learn/text_classification.py
@@ -46,9 +46,7 @@ def estimator_spec_for_softmax_classification(
             'prob': tf.nn.softmax(logits)
         })
 
-  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
   if mode == tf.estimator.ModeKeys.TRAIN:
     optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
     train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
diff --git a/tensorflow/examples/learn/text_classification_character_cnn.py b/tensorflow/examples/learn/text_classification_character_cnn.py
index 363ff00362..afda170e2a 100644
--- a/tensorflow/examples/learn/text_classification_character_cnn.py
+++ b/tensorflow/examples/learn/text_classification_character_cnn.py
@@ -88,9 +88,7 @@ def char_cnn_model(features, labels, mode):
             'prob': tf.nn.softmax(logits)
         })
 
-  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
   if mode == tf.estimator.ModeKeys.TRAIN:
     optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
     train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
diff --git a/tensorflow/examples/learn/text_classification_character_rnn.py b/tensorflow/examples/learn/text_classification_character_rnn.py
index 86adc056ad..15733821fb 100644
--- a/tensorflow/examples/learn/text_classification_character_rnn.py
+++ b/tensorflow/examples/learn/text_classification_character_rnn.py
@@ -59,9 +59,7 @@ def char_rnn_model(features, labels, mode):
             'prob': tf.nn.softmax(logits)
         })
 
-  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
   if mode == tf.estimator.ModeKeys.TRAIN:
     optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
     train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
diff --git a/tensorflow/examples/learn/text_classification_cnn.py b/tensorflow/examples/learn/text_classification_cnn.py
index be262285a3..9e21aee87f 100644
--- a/tensorflow/examples/learn/text_classification_cnn.py
+++ b/tensorflow/examples/learn/text_classification_cnn.py
@@ -87,9 +87,7 @@ def cnn_model(features, labels, mode):
             'prob': tf.nn.softmax(logits)
         })
 
-  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
   if mode == tf.estimator.ModeKeys.TRAIN:
     optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
     train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
-- 
GitLab


From a16b13761823675ad1e27450a924f040936839c4 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 6 Dec 2017 09:30:19 -0800
Subject: [PATCH 0683/1225] Simplified parts of the constant folding code and
 made it more robust to unexpected inputs. Also materialize more shapes by
 default.

PiperOrigin-RevId: 178108110
---
 .../grappler/optimizers/constant_folding.cc   | 60 +++++++++----------
 .../optimizers/constant_folding_test.cc       | 10 ++--
 .../lib/debug_graph_reconstruction_test.py    |  2 +-
 3 files changed, 33 insertions(+), 39 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index a8c3f897d6..8426aa1c3f 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -244,10 +244,13 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
       continue;
     }
 
-    std::vector<OpInfo::TensorProperties> output =
+    const std::vector<OpInfo::TensorProperties>& output =
         properties.GetOutputProperties(node.name());
-    std::vector<OpInfo::TensorProperties> input =
+    const std::vector<OpInfo::TensorProperties>& input =
         properties.GetInputProperties(node.name());
+    if (input.empty() || output.empty()) {
+      continue;
+    }
     if (op == "Shape" || op == "Size" || op == "Rank") {
       CHECK_EQ(1, output.size());
       CHECK_EQ(1, input.size());
@@ -475,8 +478,12 @@ Status ConstantFolding::MaterializeReductionIndices(
     return Status::OK();
   }
 
-  const OpInfo::TensorProperties& input_prop =
-      properties.GetInputProperties(node->name())[0];
+  const std::vector<OpInfo::TensorProperties>& input_props =
+      properties.GetInputProperties(node->name());
+  if (input_props.size() != 2) {
+    return Status::OK();
+  }
+  const OpInfo::TensorProperties& input_prop = input_props[0];
   if (input_prop.shape().unknown_rank()) {
     // We can't do anything if we don't know the rank of the input.
     return Status::OK();
@@ -486,8 +493,12 @@ Status ConstantFolding::MaterializeReductionIndices(
     // Unexpected graph, don't try to change it.
     return Status::OK();
   }
-  const OpInfo::TensorProperties& output_prop =
-      properties.GetOutputProperties(node->name())[0];
+  const std::vector<OpInfo::TensorProperties>& output_props =
+      properties.GetOutputProperties(node->name());
+  if (output_props.size() != 1) {
+    return Status::OK();
+  }
+  const OpInfo::TensorProperties& output_prop = output_props[0];
   PartialTensorShape output_shape(output_prop.shape());
   if (output_shape.num_elements() != 1) {
     bool full_reduction = false;
@@ -495,8 +506,12 @@ Status ConstantFolding::MaterializeReductionIndices(
       if (!IsReshape(*fanout)) {
         continue;
       }
-      const OpInfo::TensorProperties& reshape_prop =
-          properties.GetOutputProperties(fanout->name())[0];
+      const std::vector<OpInfo::TensorProperties>& reshape_props =
+          properties.GetOutputProperties(fanout->name());
+      if (reshape_props.size() != 1) {
+        return Status::OK();
+      }
+      const OpInfo::TensorProperties& reshape_prop = reshape_props[0];
       PartialTensorShape shape(reshape_prop.shape());
       if (shape.num_elements() != 1) {
         return Status::OK();
@@ -509,8 +524,7 @@ Status ConstantFolding::MaterializeReductionIndices(
     }
   }
 
-  const OpInfo::TensorProperties& reduction_prop =
-      properties.GetInputProperties(node->name())[1];
+  const OpInfo::TensorProperties& reduction_prop = input_props[1];
   DataType dtype = reduction_prop.dtype();
   if (dtype != DT_INT32 && dtype != DT_INT64) {
     return Status::OK();
@@ -1279,7 +1293,6 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
     if (IsSimplifiableReduction(node)) {
       // Replace the reduction node with an identity node, that can be further
       // optimized by the model pruner.
-      const NodeDef* reductions_indices = node_map_->GetNode(node.input(1));
       DataType output_type;
       if (node.attr().count("T") > 0) {
         output_type = node.attr().at("T").type();
@@ -1290,32 +1303,17 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       node.set_op("Identity");
       node.clear_attr();
       (*node.mutable_attr())["T"].set_type(output_type);
-      if (node.input_size() > 2) {
-        node.mutable_input()->SwapElements(1, node.input_size() - 1);
-      }
-      node.mutable_input()->RemoveLast();
-      for (const auto& input : reductions_indices->input()) {
-        DCHECK(IsControlInput(input));
-        *node.add_input() = input;
-      }
+      *node.mutable_input(1) = AsControlDependency(node.input(1));
     }
     const bool safe_to_use_shapes =
         use_shape_info &&
         (feed_nodes_.empty() || opt_level_ == RewriterConfig::AGGRESSIVE);
     if (safe_to_use_shapes && IsSimplifiableReshape(node, properties)) {
-      const NodeDef* new_shape = node_map_->GetNode(node.input(1));
       DataType output_type = node.attr().at("T").type();
       node.set_op("Identity");
       node.clear_attr();
       (*node.mutable_attr())["T"].set_type(output_type);
-      if (node.input_size() > 2) {
-        node.mutable_input()->SwapElements(1, node.input_size() - 1);
-      }
-      node.mutable_input()->RemoveLast();
-      for (const auto& input : new_shape->input()) {
-        DCHECK(IsControlInput(input));
-        *node.add_input() = input;
-      }
+      *node.mutable_input(1) = AsControlDependency(node.input(1));
     }
 
     // Simplify multiplication by ones or zeros, and addition of zeros.
@@ -1408,14 +1406,10 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
 
   if (can_use_shape_info) {
     TF_RETURN_IF_ERROR(MaterializeShapes(properties));
-
-    if (opt_level_ == RewriterConfig::AGGRESSIVE) {
-      TF_RETURN_IF_ERROR(MaterializeConstants(properties));
-    }
+    TF_RETURN_IF_ERROR(MaterializeConstants(properties));
   }
 
   TF_RETURN_IF_ERROR(FoldGraph(output));
-
   TF_RETURN_IF_ERROR(SimplifyGraph(output, properties, can_use_shape_info));
 
   return Status::OK();
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index 4bd50e48d2..ffa09b8e29 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -915,7 +915,7 @@ TEST_F(ConstantFoldingTest, NoOpReduction) {
       EXPECT_EQ("Identity", node.op());
       EXPECT_EQ(2, node.input_size());
       EXPECT_EQ("v", node.input(0));
-      EXPECT_EQ("^v", node.input(1));
+      EXPECT_EQ("^i", node.input(1));
     }
   }
   EXPECT_TRUE(found);
@@ -974,20 +974,20 @@ TEST_F(ConstantFoldingTest, NoOpReshape) {
       EXPECT_EQ("Identity", node.op());
       ASSERT_EQ(3, node.input_size());
       EXPECT_EQ("v1", node.input(0));
-      EXPECT_EQ("^d1", node.input(1));
-      EXPECT_EQ("^v1", node.input(2));
+      EXPECT_EQ("^i1", node.input(1));
+      EXPECT_EQ("^d1", node.input(2));
     } else if (node.name() == "r3") {
       ++found;
       EXPECT_EQ("Identity", node.op());
       ASSERT_EQ(2, node.input_size());
       EXPECT_EQ("v3", node.input(0));
-      EXPECT_EQ("^v3", node.input(1));
+      EXPECT_EQ("^i3", node.input(1));
     } else if (node.name() == "r4") {
       ++found;
       EXPECT_EQ("Identity", node.op());
       ASSERT_EQ(2, node.input_size());
       EXPECT_EQ("v4", node.input(0));
-      EXPECT_EQ("^v4", node.input(1));
+      EXPECT_EQ("^i4", node.input(1));
     } else if (node.name() == "r2") {
       ++found;
       EXPECT_EQ("Reshape", node.op());
diff --git a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
index cc1a380538..bd00f73861 100644
--- a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
+++ b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
@@ -164,7 +164,7 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase):
       self._compareOriginalAndReconstructedGraphDefs(sess, loop)
 
   def testReconstructGraphWithGradients(self):
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       u = variables.Variable(12.0, name="u")
       v = variables.Variable(30.0, name="v")
       x = constant_op.constant(1.1, name="x")
-- 
GitLab


From d69b292a753de409fb46916e80f87bd999144456 Mon Sep 17 00:00:00 2001
From: Hanchen Li <hanchenl@nvidia.com>
Date: Wed, 6 Dec 2017 10:35:43 -0800
Subject: [PATCH 0684/1225] Fix problem with camera on Android TV

---
 .../android/src/org/tensorflow/demo/CameraActivity.java        | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
index 4e45f42d0c..bd45e44537 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
@@ -333,7 +333,8 @@ public abstract class CameraActivity extends Activity
           continue;
         }
 
-        useCamera2API = isHardwareLevelSupported(characteristics,
+        useCamera2API = facing == CameraCharacteristics.LENS_FACING_EXTERNAL || 
+            isHardwareLevelSupported(characteristics,
             CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
         LOGGER.i("Camera API lv2?: %s", useCamera2API);
         return cameraId;
-- 
GitLab


From ce1eacfc117e9868620f466919996ec755321daa Mon Sep 17 00:00:00 2001
From: sandipmgiri <sgiri@us.ibm.com>
Date: Thu, 7 Dec 2017 00:15:48 +0530
Subject: [PATCH 0685/1225] summary_image_op_test fixed on ppc64le (#15147)

---
 tensorflow/python/kernel_tests/summary_image_op_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/summary_image_op_test.py b/tensorflow/python/kernel_tests/summary_image_op_test.py
index d2152ab560..4718827e88 100644
--- a/tensorflow/python/kernel_tests/summary_image_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_image_op_test.py
@@ -50,7 +50,6 @@ class SummaryImageOpTest(test.TestCase):
     self.assertProtoEquals(expected, image_summ)
 
   def testImageSummary(self):
-    np.random.seed(7)
     for depth in (1, 3, 4):
       for positive in False, True:
         with self.test_session(graph=ops.Graph()) as sess:
-- 
GitLab


From dfc3d9489777d9e7cd5063ccc8cfb4514e7775d1 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 6 Dec 2017 10:43:53 -0800
Subject: [PATCH 0686/1225] Switch tf.data support for sparse tensors from
 string-based serialization to variant-based serialization.

PiperOrigin-RevId: 178118839
---
 tensorflow/core/kernels/BUILD                 |   1 +
 tensorflow/core/kernels/batch_util.cc         |  59 +++++++--
 tensorflow/core/kernels/batch_util.h          |   3 +
 tensorflow/core/kernels/fifo_queue.cc         |   2 +-
 tensorflow/core/kernels/inplace_ops.cc        |   1 +
 tensorflow/core/kernels/priority_queue.cc     |   2 +-
 tensorflow/core/kernels/queue_base.cc         |  27 +---
 .../core/kernels/random_shuffle_queue_op.cc   |   2 +-
 .../core/kernels/tensor_slice_dataset_op.cc   |  38 +-----
 tensorflow/python/data/ops/dataset_ops.py     |   7 +-
 tensorflow/python/data/util/sparse.py         |  26 +++-
 tensorflow/python/data/util/sparse_test.py    | 120 +++++++++++-------
 .../dataset_constructor_op_test.py            |  10 +-
 13 files changed, 163 insertions(+), 135 deletions(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 77ca8f5fcb..9a150fef7c 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -6231,6 +6231,7 @@ tf_kernel_library(
     name = "tensor_slice_dataset_op",
     srcs = ["tensor_slice_dataset_op.cc"],
     deps = [
+        ":batch_util",
         ":dataset",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
diff --git a/tensorflow/core/kernels/batch_util.cc b/tensorflow/core/kernels/batch_util.cc
index 298e156579..7f2df95e2d 100644
--- a/tensorflow/core/kernels/batch_util.cc
+++ b/tensorflow/core/kernels/batch_util.cc
@@ -24,7 +24,21 @@ namespace batch_util {
 
 namespace {
 
-// Copies element into the index^th slice of parent (in the 0th dimension).
+Status ValidateInput(const Tensor& parent, const Tensor& element, int64 index) {
+  DCHECK_NE(parent.dim_size(0), 0);
+  DCHECK_GE(index, 0);
+  if (element.NumElements() != (parent.NumElements() / parent.dim_size(0))) {
+    TensorShape chip_shape = parent.shape();
+    chip_shape.RemoveDim(0);
+    return errors::Internal(
+        "ValidateInput Cannot perform copy: number of elements does not match. "
+        " Shapes are: [element]: ",
+        element.shape().DebugString(),
+        ", [parent slice]: ", chip_shape.DebugString());
+  }
+  return Status::OK();
+}
+
 template <typename T>
 Status HandleElementToSlice(Tensor element, Tensor* parent, int64 index,
                             bool /* can_move */) {
@@ -47,18 +61,22 @@ Status HandleElementToSlice<string>(Tensor element, Tensor* parent, int64 index,
   return Status::OK();
 }
 
+// TODO(jsimsa): Add HandleElementToSlice<variant> specialization that moves
+// the data when possible.
+
+template <typename T>
+static Status HandleSliceToElement(const Tensor& parent, Tensor* element,
+                                   int64 index) {
+  element->flat<T>() = parent.flat_outer_dims<T>().chip(index, 0);
+  return Status::OK();
+}
+
 }  // namespace
 
+// Copies element into the index^th slice of parent (in the 0th dimension).
 Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
-  if (element.NumElements() != (parent->NumElements() / parent->dim_size(0))) {
-    TensorShape chip_shape = parent->shape();
-    chip_shape.RemoveDim(0);
-    return errors::InvalidArgument(
-        "HandleElementToSlice Cannot copy slice: number of elements does "
-        "not match. Shapes are: [element]: ",
-        element.shape().DebugString(),
-        ", [parent slice]: ", chip_shape.DebugString());
-  }
+  TF_RETURN_IF_ERROR(ValidateInput(*parent, element, index));
+
   bool can_move = element.RefCountIsOne();
 #define HANDLE_TYPE(T)                                                \
   case DataTypeToEnum<T>::value: {                                    \
@@ -69,6 +87,7 @@ Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
   switch (element.dtype()) {
     TF_CALL_ALL_TYPES(HANDLE_TYPE);
     TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
+    TF_CALL_variant(HANDLE_TYPE);
 #undef HANDLE_TYPE
     default:
       return errors::Unimplemented("CopyElementToSlice Unhandled data type: ",
@@ -76,5 +95,25 @@ Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
   }
 }
 
+// Copies the index^th slice of parent (in the 0th dimension) into element.
+Status CopySliceToElement(const Tensor& parent, Tensor* element, int64 index) {
+  TF_RETURN_IF_ERROR(ValidateInput(parent, *element, index));
+
+#define HANDLE_TYPE(T)                                      \
+  case DataTypeToEnum<T>::value: {                          \
+    return HandleSliceToElement<T>(parent, element, index); \
+  }
+
+  switch (parent.dtype()) {
+    TF_CALL_ALL_TYPES(HANDLE_TYPE);
+    TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
+    TF_CALL_variant(HANDLE_TYPE);
+#undef HANDLE_TYPE
+    default:
+      return errors::Unimplemented("CopySliceToElement Unhandled data type: ",
+                                   element->dtype());
+  }
+}
+
 }  // namespace batch_util
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/batch_util.h b/tensorflow/core/kernels/batch_util.h
index 065011a699..b066e2a574 100644
--- a/tensorflow/core/kernels/batch_util.h
+++ b/tensorflow/core/kernels/batch_util.h
@@ -29,6 +29,9 @@ namespace batch_util {
 // for DT_STRING tensors.
 Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index);
 
+// Copies the index^th slice of parent (in the 0th dimension) into element.
+Status CopySliceToElement(const Tensor& parent, Tensor* element, int64 index);
+
 }  // namespace batch_util
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/fifo_queue.cc b/tensorflow/core/kernels/fifo_queue.cc
index 9fd82e2168..82ec879119 100644
--- a/tensorflow/core/kernels/fifo_queue.cc
+++ b/tensorflow/core/kernels/fifo_queue.cc
@@ -95,7 +95,7 @@ Status FIFOQueue::GetElementComponentFromBatch(const FIFOQueue::Tuple& tuple,
   TF_RETURN_IF_ERROR(ctx->allocate_persistent(
       tuple[component].dtype(), element_shape, out_tensor, &element_access));
   TF_RETURN_IF_ERROR(
-      CopySliceToElement(tuple[component], element_access, index));
+      batch_util::CopySliceToElement(tuple[component], element_access, index));
   return Status::OK();
 }
 
diff --git a/tensorflow/core/kernels/inplace_ops.cc b/tensorflow/core/kernels/inplace_ops.cc
index 01ae5a83c1..7728ba850c 100644
--- a/tensorflow/core/kernels/inplace_ops.cc
+++ b/tensorflow/core/kernels/inplace_ops.cc
@@ -52,6 +52,7 @@ Status DoParallelConcat(const CPUDevice& d, const Tensor& value, int32 loc,
     return DoParallelConcatUpdate<CPUDevice, type>(d, value, loc, output);
     TF_CALL_NUMBER_TYPES(CASE);
     TF_CALL_string(CASE);
+    TF_CALL_variant(CASE);
 #undef CASE
     default:
       return errors::InvalidArgument("Unsupported data type: ", value.dtype());
diff --git a/tensorflow/core/kernels/priority_queue.cc b/tensorflow/core/kernels/priority_queue.cc
index 5c487edbe3..bab94f7f0a 100644
--- a/tensorflow/core/kernels/priority_queue.cc
+++ b/tensorflow/core/kernels/priority_queue.cc
@@ -123,7 +123,7 @@ Status PriorityQueue::GetElementComponentFromBatch(
   TF_RETURN_IF_ERROR(ctx->allocate_persistent(
       tuple[component].dtype(), element_shape, out_tensor, &element_access));
   TF_RETURN_IF_ERROR(
-      CopySliceToElement(tuple[component], element_access, index));
+      batch_util::CopySliceToElement(tuple[component], element_access, index));
   return Status::OK();
 }
 
diff --git a/tensorflow/core/kernels/queue_base.cc b/tensorflow/core/kernels/queue_base.cc
index 6c91d0cd94..330d161c32 100644
--- a/tensorflow/core/kernels/queue_base.cc
+++ b/tensorflow/core/kernels/queue_base.cc
@@ -336,32 +336,7 @@ void QueueBase::FlushUnlocked() {
 
 Status QueueBase::CopySliceToElement(const Tensor& parent, Tensor* element,
                                      int64 index) {
-#define HANDLE_TYPE(DT)                                                   \
-  if (parent.dtype() == DT) {                                             \
-    TF_RETURN_IF_ERROR(HandleSliceToElement<DT>(parent, element, index)); \
-    return Status::OK();                                                  \
-  }
-  HANDLE_TYPE(DT_FLOAT);
-  HANDLE_TYPE(DT_HALF);
-  HANDLE_TYPE(DT_DOUBLE);
-  HANDLE_TYPE(DT_INT32);
-  HANDLE_TYPE(DT_UINT8);
-  HANDLE_TYPE(DT_INT16);
-  HANDLE_TYPE(DT_INT8);
-  HANDLE_TYPE(DT_STRING);
-  HANDLE_TYPE(DT_COMPLEX64);
-  HANDLE_TYPE(DT_COMPLEX128);
-  HANDLE_TYPE(DT_INT64);
-  HANDLE_TYPE(DT_BOOL);
-  HANDLE_TYPE(DT_QINT8);
-  HANDLE_TYPE(DT_QUINT8);
-  HANDLE_TYPE(DT_QINT32);
-  HANDLE_TYPE(DT_QINT16);
-  HANDLE_TYPE(DT_QUINT16);
-  HANDLE_TYPE(DT_UINT16);
-#undef HANDLE_TYPE
-  return errors::Unimplemented("CopySliceToElement Unhandled data type: ",
-                               parent.dtype());
+  return batch_util::CopySliceToElement(parent, element, index);
 }
 
 /* static */
diff --git a/tensorflow/core/kernels/random_shuffle_queue_op.cc b/tensorflow/core/kernels/random_shuffle_queue_op.cc
index 7a40e9ddf2..e9695cfde3 100644
--- a/tensorflow/core/kernels/random_shuffle_queue_op.cc
+++ b/tensorflow/core/kernels/random_shuffle_queue_op.cc
@@ -171,7 +171,7 @@ Status RandomShuffleQueue::GetElementComponentFromBatch(
   TF_RETURN_IF_ERROR(ctx->allocate_persistent(
       tuple[component].dtype(), element_shape, out_tensor, &element_access));
   TF_RETURN_IF_ERROR(
-      CopySliceToElement(tuple[component], element_access, index));
+      batch_util::CopySliceToElement(tuple[component], element_access, index));
   return Status::OK();
 }
 
diff --git a/tensorflow/core/kernels/tensor_slice_dataset_op.cc b/tensorflow/core/kernels/tensor_slice_dataset_op.cc
index 19d4816ff3..86f8f436d4 100644
--- a/tensorflow/core/kernels/tensor_slice_dataset_op.cc
+++ b/tensorflow/core/kernels/tensor_slice_dataset_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/batch_util.h"
 
 namespace tensorflow {
 
@@ -101,41 +102,6 @@ class TensorSliceDatasetOp : public DatasetOpKernel {
     }
 
    private:
-    template <typename T>
-    static Status HandleSliceToElement(const Tensor& parent, Tensor* element,
-                                       int64 index) {
-      DCHECK_NE(parent.dim_size(0), 0);
-      DCHECK_GE(index, 0);
-      if (element->NumElements() !=
-          (parent.NumElements() / parent.dim_size(0))) {
-        TensorShape chip_shape = parent.shape();
-        chip_shape.RemoveDim(0);
-        return errors::Internal(
-            "HandleSliceToElement Cannot copy slice: number of elements does "
-            "not match.  Shapes are: [element]: ",
-            element->shape().DebugString(), ", [parent slice]: ",
-            chip_shape.DebugString());
-      }
-      auto parent_as_matrix = parent.flat_outer_dims<T>();
-      element->flat<T>() = parent_as_matrix.chip(index, 0);
-      return Status::OK();
-    }
-
-    static Status CopySliceToElement(const Tensor& parent, Tensor* element,
-                                     int64 index) {
-#define HANDLE_TYPE(T)                                      \
-  case DataTypeToEnum<T>::value: {                          \
-    return HandleSliceToElement<T>(parent, element, index); \
-  }
-
-      switch (parent.dtype()) {
-        TF_CALL_DATASET_TYPES(HANDLE_TYPE);
-        default:
-          return errors::Unimplemented(
-              "CopySliceToElement Unhandled data type: ", element->dtype());
-      }
-    }
-
     class Iterator : public DatasetIterator<Dataset> {
      public:
       explicit Iterator(const Params& params)
@@ -154,7 +120,7 @@ class TensorSliceDatasetOp : public DatasetOpKernel {
             const Tensor& t = dataset()->tensors_[i];
             Tensor t_slice(cpu_allocator(), t.dtype(),
                            TensorShape(dataset()->shapes_[i].dim_sizes()));
-            TF_RETURN_IF_ERROR(CopySliceToElement(t, &t_slice, i_));
+            TF_RETURN_IF_ERROR(batch_util::CopySliceToElement(t, &t_slice, i_));
             out_tensors->emplace_back(std::move(t_slice));
           }
           ++i_;
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 927c6d5c02..17d9510cc3 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -40,7 +40,6 @@ from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import gen_io_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
-from tensorflow.python.ops import sparse_ops
 from tensorflow.python.util import deprecation
 
 
@@ -946,11 +945,7 @@ class TensorSliceDataset(Dataset):
     batch_dim = flat_tensors[0].get_shape()[0]
     for t in flat_tensors[1:]:
       batch_dim.assert_is_compatible_with(t.get_shape()[0])
-    self._tensors = nest.pack_sequence_as(tensors, [
-        sparse_ops.serialize_many_sparse(tensor)
-        if sparse_tensor_lib.is_sparse(tensor) else tensor
-        for tensor in nest.flatten(tensors)
-    ])
+    self._tensors = sparse.serialize_many_sparse_tensors(tensors)
     self._output_classes = sparse.get_classes(tensors)
     self._output_shapes = nest.pack_sequence_as(
         tensors, [t.get_shape()[1:] for t in nest.flatten(tensors)])
diff --git a/tensorflow/python/data/util/sparse.py b/tensorflow/python/data/util/sparse.py
index b4219198d3..5ebcb4ea81 100644
--- a/tensorflow/python/data/util/sparse.py
+++ b/tensorflow/python/data/util/sparse.py
@@ -57,7 +57,7 @@ def as_dense_shapes(shapes, classes):
 
 
 def as_dense_types(types, classes):
-  """Converts sparse tensor types to `dtypes.string`.
+  """Converts sparse tensor types to `dtypes.variant`.
 
   Args:
     types: a structure of types to convert.
@@ -65,11 +65,11 @@ def as_dense_types(types, classes):
 
   Returns:
     a structure matching the nested structure of `types`, containing
-    `dtypes.string` at positions where `classes` contains `tf.SparseTensor` and
+    `dtypes.variant` at positions where `classes` contains `tf.SparseTensor` and
     matching contents of `types` otherwise
   """
   ret = nest.pack_sequence_as(types, [
-      dtypes.string if c is sparse_tensor.SparseTensor else ty
+      dtypes.variant if c is sparse_tensor.SparseTensor else ty
       for ty, c in zip(nest.flatten(types), nest.flatten(classes))
   ])
   return ret
@@ -116,6 +116,24 @@ def get_classes(tensors):
   ])
 
 
+def serialize_many_sparse_tensors(tensors):
+  """Serializes many sparse tensors into a batch.
+
+  Args:
+    tensors: a tensor structure to serialize.
+
+  Returns:
+    `tensors` with any sparse tensors replaced by the serialized batch.
+  """
+
+  ret = nest.pack_sequence_as(tensors, [
+      sparse_ops.serialize_many_sparse(tensor, out_type=dtypes.variant)
+      if sparse_tensor.is_sparse(tensor) else tensor
+      for tensor in nest.flatten(tensors)
+  ])
+  return ret
+
+
 def serialize_sparse_tensors(tensors):
   """Serializes sparse tensors.
 
@@ -127,7 +145,7 @@ def serialize_sparse_tensors(tensors):
   """
 
   ret = nest.pack_sequence_as(tensors, [
-      sparse_ops.serialize_sparse(tensor)
+      sparse_ops.serialize_sparse(tensor, out_type=dtypes.variant)
       if isinstance(tensor, sparse_tensor.SparseTensor) else tensor
       for tensor in nest.flatten(tensors)
   ])
diff --git a/tensorflow/python/data/util/sparse_test.py b/tensorflow/python/data/util/sparse_test.py
index a707570bab..d49b3ff34b 100644
--- a/tensorflow/python/data/util/sparse_test.py
+++ b/tensorflow/python/data/util/sparse_test.py
@@ -168,7 +168,7 @@ class SparseTest(test.TestCase):
         {
             "types": dtypes.int32,
             "classes": sparse_tensor.SparseTensor,
-            "expected": dtypes.string
+            "expected": dtypes.variant
         },
         {
             "types": (dtypes.int32),
@@ -178,7 +178,7 @@ class SparseTest(test.TestCase):
         {
             "types": (dtypes.int32),
             "classes": (sparse_tensor.SparseTensor),
-            "expected": (dtypes.string)
+            "expected": (dtypes.variant)
         },
         {
             "types": (dtypes.int32, ()),
@@ -193,12 +193,12 @@ class SparseTest(test.TestCase):
         {
             "types": (dtypes.int32, ()),
             "classes": (sparse_tensor.SparseTensor, ()),
-            "expected": (dtypes.string, ())
+            "expected": (dtypes.variant, ())
         },
         {
             "types": ((), dtypes.int32),
             "classes": ((), sparse_tensor.SparseTensor),
-            "expected": ((), dtypes.string)
+            "expected": ((), dtypes.variant)
         },
         {
             "types": (dtypes.int32, (), dtypes.int32),
@@ -209,7 +209,7 @@ class SparseTest(test.TestCase):
             "types": (dtypes.int32, (), dtypes.int32),
             "classes": (sparse_tensor.SparseTensor, (),
                         sparse_tensor.SparseTensor),
-            "expected": (dtypes.string, (), dtypes.string)
+            "expected": (dtypes.variant, (), dtypes.variant)
         },
         {
             "types": ((), dtypes.int32, ()),
@@ -219,7 +219,7 @@ class SparseTest(test.TestCase):
         {
             "types": ((), dtypes.int32, ()),
             "classes": ((), sparse_tensor.SparseTensor, ()),
-            "expected": ((), dtypes.string, ())
+            "expected": ((), dtypes.variant, ())
         },
     )
     for test_case in test_cases:
@@ -227,45 +227,6 @@ class SparseTest(test.TestCase):
           sparse.as_dense_types(test_case["types"], test_case["classes"]),
           test_case["expected"])
 
-  def assertSparseValuesEqual(self, a, b):
-    if not isinstance(a, sparse_tensor.SparseTensor):
-      self.assertFalse(isinstance(b, sparse_tensor.SparseTensor))
-      self.assertEqual(a, b)
-      return
-    self.assertTrue(isinstance(b, sparse_tensor.SparseTensor))
-    with self.test_session():
-      self.assertAllEqual(a.eval().indices, b.eval().indices)
-      self.assertAllEqual(a.eval().values, b.eval().values)
-      self.assertAllEqual(a.eval().dense_shape, b.eval().dense_shape)
-
-  def testSerializeDeserialize(self):
-    test_cases = (
-        (),
-        sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
-        sparse_tensor.SparseTensor(
-            indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
-        sparse_tensor.SparseTensor(
-            indices=[[0, 0], [3, 4]], values=[1, -1], dense_shape=[4, 5]),
-        (sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
-        (sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1]), ()),
-        ((), sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
-    )
-    for expected in test_cases:
-      classes = sparse.get_classes(expected)
-      shapes = nest.map_structure(lambda _: tensor_shape.TensorShape(None),
-                                  classes)
-      types = nest.map_structure(lambda _: dtypes.int32, classes)
-      actual = sparse.deserialize_sparse_tensors(
-          sparse.serialize_sparse_tensors(expected), types, shapes,
-          sparse.get_classes(expected))
-      nest.assert_same_structure(expected, actual)
-      for a, e in zip(nest.flatten(actual), nest.flatten(expected)):
-        self.assertSparseValuesEqual(a, e)
-
   def testGetClasses(self):
     s = sparse_tensor.SparseTensor(indices=[[0]], values=[1], dense_shape=[1])
     d = ops.Tensor
@@ -324,6 +285,75 @@ class SparseTest(test.TestCase):
       self.assertEqual(
           sparse.get_classes(test_case["classes"]), test_case["expected"])
 
+  def assertSparseValuesEqual(self, a, b):
+    if not isinstance(a, sparse_tensor.SparseTensor):
+      self.assertFalse(isinstance(b, sparse_tensor.SparseTensor))
+      self.assertEqual(a, b)
+      return
+    self.assertTrue(isinstance(b, sparse_tensor.SparseTensor))
+    with self.test_session():
+      self.assertAllEqual(a.eval().indices, b.eval().indices)
+      self.assertAllEqual(a.eval().values, b.eval().values)
+      self.assertAllEqual(a.eval().dense_shape, b.eval().dense_shape)
+
+  def testSerializeDeserialize(self):
+    test_cases = (
+        (),
+        sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+        sparse_tensor.SparseTensor(
+            indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
+        sparse_tensor.SparseTensor(
+            indices=[[0, 0], [3, 4]], values=[1, -1], dense_shape=[4, 5]),
+        (sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
+        (sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]), ()),
+        ((),
+         sparse_tensor.SparseTensor(
+             indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
+    )
+    for expected in test_cases:
+      classes = sparse.get_classes(expected)
+      shapes = nest.map_structure(lambda _: tensor_shape.TensorShape(None),
+                                  classes)
+      types = nest.map_structure(lambda _: dtypes.int32, classes)
+      actual = sparse.deserialize_sparse_tensors(
+          sparse.serialize_sparse_tensors(expected), types, shapes,
+          sparse.get_classes(expected))
+      nest.assert_same_structure(expected, actual)
+      for a, e in zip(nest.flatten(actual), nest.flatten(expected)):
+        self.assertSparseValuesEqual(a, e)
+
+  def testSerializeManyDeserialize(self):
+    test_cases = (
+        (),
+        sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+        sparse_tensor.SparseTensor(
+            indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
+        sparse_tensor.SparseTensor(
+            indices=[[0, 0], [3, 4]], values=[1, -1], dense_shape=[4, 5]),
+        (sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
+        (sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]), ()),
+        ((),
+         sparse_tensor.SparseTensor(
+             indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
+    )
+    for expected in test_cases:
+      classes = sparse.get_classes(expected)
+      shapes = nest.map_structure(lambda _: tensor_shape.TensorShape(None),
+                                  classes)
+      types = nest.map_structure(lambda _: dtypes.int32, classes)
+      actual = sparse.deserialize_sparse_tensors(
+          sparse.serialize_many_sparse_tensors(expected), types, shapes,
+          sparse.get_classes(expected))
+      nest.assert_same_structure(expected, actual)
+      for a, e in zip(nest.flatten(actual), nest.flatten(expected)):
+        self.assertSparseValuesEqual(a, e)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/kernel_tests/dataset_constructor_op_test.py
index 9e2a620550..85ff228eb2 100644
--- a/tensorflow/python/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/python/kernel_tests/dataset_constructor_op_test.py
@@ -37,7 +37,7 @@ from tensorflow.python.platform import test
 class DatasetConstructorTest(test.TestCase):
 
   def testFromTensors(self):
-    """Test an dataset that represents a single tuple of tensors."""
+    """Test a dataset that represents a single tuple of tensors."""
     components = (np.array(1), np.array([1, 2, 3]), np.array(37.0))
 
     iterator = (dataset_ops.Dataset.from_tensors(components)
@@ -62,7 +62,7 @@ class DatasetConstructorTest(test.TestCase):
     self.assertAllEqual(a.dense_shape, b.dense_shape)
 
   def testFromTensorsSparse(self):
-    """Test an dataset that represents a single tuple of tensors."""
+    """Test a dataset that represents a single tuple of tensors."""
     components = (sparse_tensor.SparseTensorValue(
         indices=np.array([[0]]),
         values=np.array([0]),
@@ -125,7 +125,7 @@ class DatasetConstructorTest(test.TestCase):
         sess.run(get_next)
 
   def testFromTensorSlices(self):
-    """Test an dataset that represents the slices from a tuple of tensors."""
+    """Test a dataset that represents the slices from a tuple of tensors."""
     components = (
         np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile(
             np.array([[12], [13], [14], [15]]), 22),
@@ -150,7 +150,7 @@ class DatasetConstructorTest(test.TestCase):
         sess.run(get_next)
 
   def testFromTensorSlicesSparse(self):
-    """Test an dataset that represents the slices from a tuple of tensors."""
+    """Test a dataset that represents the slices from a tuple of tensors."""
     components = (sparse_tensor.SparseTensorValue(
         indices=np.array([[0, 0], [1, 0], [2, 0]]),
         values=np.array([0, 0, 0]),
@@ -206,7 +206,7 @@ class DatasetConstructorTest(test.TestCase):
         sess.run(get_next)
 
   def testFromTensorSlicesMixed(self):
-    """Test an dataset that represents the slices from a tuple of tensors."""
+    """Test a dataset that represents the slices from a tuple of tensors."""
     components = (np.tile(np.array([[1], [2], [3]]), 20),
                   np.tile(np.array([[12], [13], [14]]), 22),
                   np.array([37.0, 38.0, 39.0]),
-- 
GitLab


From 3571514ec20d3da04b82abd239a4e5817dbfd5d8 Mon Sep 17 00:00:00 2001
From: Yun Peng <pcloudy@google.com>
Date: Wed, 6 Dec 2017 19:51:55 +0100
Subject: [PATCH 0687/1225] Improve the Windows Bazel build (#15125)

* Windows build refactor

* Some fixes

* Improve py_test macro

* Remove comments

* Fix marker file genrule

* Add md5 check

* Pass santiy check

* Still need BAZEL_SH

* fix pip_smoke_test.py

* Add comments about pip marker file

* Fix pip_smoke_test.py again
---
 configure.py                                  | 10 +++++
 tensorflow/tensorflow.bzl                     | 43 +++++++++++++++----
 .../ci_build/windows/bazel/bazel_test_lib.sh  |  4 --
 .../ci_build/windows/bazel/common_env.sh      | 16 -------
 .../windows/cpu/bazel/run_cc_test_windows.sh  |  6 +--
 .../windows/cpu/pip/build_tf_windows.sh       |  9 +---
 .../windows/gpu/bazel/run_cc_test_windows.sh  |  4 +-
 .../windows/gpu/pip/build_tf_windows.sh       |  9 +---
 .../ci_build/windows/libtensorflow_cpu.sh     |  3 +-
 tensorflow/tools/pip_package/BUILD            | 21 +++++++++
 .../tools/pip_package/build_pip_package.sh    |  4 +-
 .../tools/pip_package/pip_smoke_test.py       |  1 +
 12 files changed, 76 insertions(+), 54 deletions(-)

diff --git a/configure.py b/configure.py
index cf562bdee8..e52ff7a0ac 100644
--- a/configure.py
+++ b/configure.py
@@ -1023,6 +1023,15 @@ def create_android_bazelrc_configs():
 def set_grpc_build_flags():
   write_to_bazelrc('build --define grpc_no_ares=true')
 
+def set_windows_build_flags():
+  if is_windows():
+    # The non-monolithic build is not supported yet
+    write_to_bazelrc('build --config monolithic')
+    # Suppress warning messages
+    write_to_bazelrc('build --copt=-w --host_copt=-w')
+    # Output more verbose information when something goes wrong
+    write_to_bazelrc('build --verbose_failures')
+
 
 def main():
   # Make a copy of os.environ to be clear when functions and getting and setting
@@ -1101,6 +1110,7 @@ def main():
   set_cc_opt_flags(environ_cp)
   set_mkl()
   set_monolithic()
+  set_windows_build_flags()
   create_android_bazelrc_configs()
 
 if __name__ == '__main__':
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index d194b37700..84fab24031 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -123,6 +123,13 @@ def if_not_windows(a):
       "//conditions:default": a,
   })
 
+def if_windows(a):
+  return select({
+      clean_dep("//tensorflow:windows"): a,
+      clean_dep("//tensorflow:windows_msvc"): a,
+      "//conditions:default": [],
+  })
+
 def if_linux_x86_64(a):
   return select({
       clean_dep("//tensorflow:linux_x86_64"): a,
@@ -1298,11 +1305,32 @@ def tf_py_wrap_cc(name,
           "//conditions:default": [":" + cc_library_name],
       }))
 
-def py_test(deps=[], **kwargs):
+# This macro is for running python tests against system installed pip package
+# on Windows.
+#
+# py_test is built as an exectuable python zip file on Windows, which contains all
+# dependencies of the target. Because of the C++ extensions, it would be very
+# inefficient if the py_test zips all runfiles, plus we don't need them when running
+# tests against system installed pip package. So we'd like to get rid of the deps
+# of py_test in this case.
+#
+# In order to trigger the tests without bazel clean after getting rid of deps,
+# we introduce the following :
+# 1. When --define=no_tensorflow_py_deps=true, the py_test depends on a marker
+#    file of the pip package, the test gets to rerun when the pip package change.
+#    Note that this only works on Windows. See the definition of
+#    //tensorflow/tools/pip_package:win_pip_package_marker for specific reasons.
+# 2. When --define=no_tensorflow_py_deps=false (by default), it's a normal py_test.
+def py_test(deps=[], data=[], **kwargs):
   native.py_test(
       deps=select({
           "//conditions:default": deps,
-          clean_dep("//tensorflow:no_tensorflow_py_deps"): []
+          clean_dep("//tensorflow:no_tensorflow_py_deps"): [],
+      }),
+      data = data + select({
+          "//conditions:default": [],
+          clean_dep("//tensorflow:no_tensorflow_py_deps"):
+          ["//tensorflow/tools/pip_package:win_pip_package_marker"],
       }),
       **kwargs)
 
@@ -1324,7 +1352,7 @@ def tf_py_test(name,
                xla_enabled=False):
   if xla_enabled:
     additional_deps = additional_deps + tf_additional_xla_deps_py()
-  native.py_test(
+  py_test(
       name=name,
       size=size,
       srcs=srcs,
@@ -1334,13 +1362,10 @@ def tf_py_test(name,
       visibility=[clean_dep("//tensorflow:internal")],
       shard_count=shard_count,
       data=data,
-      deps=select({
-          "//conditions:default": [
-              clean_dep("//tensorflow/python:extra_py_tests_deps"),
-              clean_dep("//tensorflow/python:gradient_checker"),
+      deps=[
+            clean_dep("//tensorflow/python:extra_py_tests_deps"),
+            clean_dep("//tensorflow/python:gradient_checker"),
           ] + additional_deps,
-          clean_dep("//tensorflow:no_tensorflow_py_deps"): []
-      }),
       flaky=flaky,
       srcs_version="PY2AND3")
 
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 44b6d52952..1e455ddc99 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -96,10 +96,6 @@ exclude_cpu_cc_tests="${failing_cpu_cc_tests} + ${broken_cpu_cc_tests}"
 
 exclude_gpu_cc_tests="${extra_failing_gpu_cc_tests} + ${exclude_cpu_cc_tests}"
 
-function clean_output_base() {
-  bazel clean --expunge
-}
-
 function run_configure_for_cpu_build {
   # Due to a bug in Bazel: https://github.com/bazelbuild/bazel/issues/2182
   # yes "" | ./configure doesn't work on Windows, so we set all the
diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
index 4a653698a2..f88e7176f0 100644
--- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
@@ -36,12 +36,6 @@ export BAZEL_SH=${BAZEL_SH:-"C:/tools/msys64/usr/bin/bash"}
 export PYTHON_BIN_PATH="C:/Program Files/Anaconda3/python.exe"
 export PYTHON_LIB_PATH="C:/Program Files/Anaconda3/lib/site-packages"
 
-# Set Python path for cc_configure.bzl
-export BAZEL_PYTHON="C:/Program Files/Anaconda3/python.exe"
-
-# Set Visual Studio path
-export BAZEL_VS="C:/Program Files (x86)/Microsoft Visual Studio 14.0"
-
 # Add python into PATH, it's needed because gen_git_source.py uses
 # '/usr/bin/env python' as a shebang
 export PATH="/c/Program Files/Anaconda3:$PATH"
@@ -53,13 +47,3 @@ export PATH="/c/Program Files/Anaconda3/Scripts:$PATH"
 export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin:$PATH"
 export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/extras/CUPTI/libx64:$PATH"
 export PATH="/c/tools/cuda/bin:$PATH"
-
-# Set the common build options on Windows
-export BUILD_OPTS='--config=monolithic --copt=-w --host_copt=-w --verbose_failures --experimental_ui'
-
-# Build TF with wrapper-less CROSSTOOL
-# TODO(pcloudy): Remove this after wrapper-less CROSSTOOL becomes default
-export NO_MSVC_WRAPPER=1
-
-export USE_DYNAMIC_CRT=1
-
diff --git a/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh b/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
index 8c419347d6..748a961e44 100644
--- a/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
@@ -42,8 +42,6 @@ source "tensorflow/tools/ci_build/windows/bazel/common_env.sh" \
 source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
   || { echo "Failed to source bazel_test_lib.sh" >&2; exit 1; }
 
-clean_output_base
-
 run_configure_for_cpu_build
 
 # Compliling the following test is extremely slow with -c opt
@@ -54,5 +52,5 @@ passing_tests=$(bazel query "kind(cc_test, //tensorflow/cc/... + //tensorflow/co
   # We need to strip \r so that the result could be store into a variable under MSYS
   tr '\r' ' ')
 
-bazel test $BUILD_OPTS -k $slow_compiling_test --test_output=errors
-bazel test -c opt $BUILD_OPTS -k $passing_tests --test_output=errors
+bazel test -k $slow_compiling_test --test_output=errors
+bazel test -c opt -k $passing_tests --test_output=errors
diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 8520ca898f..31b4226a30 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -44,9 +44,7 @@ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
 
 run_configure_for_cpu_build
 
-clean_output_base
-
-bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $?
+bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $?
 
 # Create a python test directory to avoid package name conflict
 PY_TEST_DIR="py_test_dir"
@@ -60,11 +58,8 @@ reinstall_tensorflow_pip ${PIP_NAME}
 
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
-# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after
-# https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
-bazel test -c opt $BUILD_OPTS -k --test_output=errors \
+bazel test -c opt -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_oss \
   --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \
-  --test_env=TF_SAVER_LENIENT_NAMES=True \
   //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh b/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
index 3fd960deab..f26f8727e5 100644
--- a/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
@@ -56,5 +56,5 @@ passing_tests=$(bazel query "kind(cc_test, //tensorflow/cc/... + //tensorflow/co
 
 # TODO(pcloudy): There is a bug in Bazel preventing build with GPU support without -c opt
 # Re-enable this test after it is fixed.
-# bazel test --config=win-cuda $BUILD_OPTS -k $slow_compiling_test --test_output=errors
-bazel test -c opt --config=win-cuda $BUILD_OPTS -k $passing_tests --test_output=errors
+# bazel test --config=win-cuda -k $slow_compiling_test --test_output=errors
+bazel test -c opt --config=win-cuda -k $passing_tests --test_output=errors
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 47ca42d642..922bb67bbf 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -44,9 +44,7 @@ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
 
 run_configure_for_gpu_build
 
-clean_output_base
-
-bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $?
+bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $?
 
 # Create a python test directory to avoid package name conflict
 PY_TEST_DIR="py_test_dir"
@@ -61,11 +59,8 @@ reinstall_tensorflow_pip ${PIP_NAME}
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
 # GPU tests are very flaky when running concurrently, so set local_test_jobs=1
-# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after
-# https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
-bazel test -c opt $BUILD_OPTS -k --test_output=errors \
+bazel test -c opt -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
   --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
-  --test_env=TF_SAVER_LENIENT_NAMES=True \
   --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
index 9ac3613f27..80f2b590c9 100755
--- a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
@@ -44,13 +44,12 @@ export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:clic
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/java:libtensorflow_jni.so"
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:jnilicenses_generate"
 
-clean_output_base
 run_configure_for_cpu_build
 
 # build_libtensorflow_tarball in ../builds/libtensorflow.sh
 # cannot be used on Windows since it relies on pkg_tar rules.
 # So we do something special here
-bazel build -c opt ${BUILD_OPTS} \
+bazel build -c opt \
   tensorflow:libtensorflow.so \
   tensorflow/tools/lib_package:clicenses_generate \
   tensorflow/java:libtensorflow_jni.so \
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 48fc4c91be..321f514f6d 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -6,6 +6,7 @@ package(default_visibility = ["//visibility:private"])
 load(
     "//tensorflow:tensorflow.bzl",
     "if_not_windows",
+    "if_windows",
     "transitive_hdrs",
 )
 load("//third_party/mkl:build_defs.bzl", "if_mkl")
@@ -193,3 +194,23 @@ sh_binary(
         ],
     }) + if_mkl(["//third_party/mkl:intel_binary_blob"]),
 )
+
+# A genrule for generating a marker file for the pip package on Windows
+#
+# This only works on Windows, because :simple_console_for_windows is a
+# python zip file containing everything we need for building the pip package.
+# However, on other platforms, due to https://github.com/bazelbuild/bazel/issues/4223,
+# when C++ extensions change, this generule doesn't rebuild.
+genrule(
+    name = "win_pip_package_marker",
+    srcs = if_windows([
+        ":build_pip_package",
+        ":simple_console_for_windows",
+    ]),
+    outs = ["win_pip_package_marker_file"],
+    cmd = select({
+        "//conditions:default": "touch $@",
+        "//tensorflow:windows": "md5sum $(locations :build_pip_package) $(locations :simple_console_for_windows) > $@",
+    }),
+    visibility = ["//visibility:public"],
+)
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index 8249703ba7..f5203bc544 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -24,7 +24,7 @@ function real_path() {
 function cp_external() {
   local src_dir=$1
   local dest_dir=$2
-  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*'`; do
+  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*org_tensorflow*'`; do
     cp -R "$f" "$dest_dir"
   done
 }
@@ -92,7 +92,6 @@ function main() {
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles/org_tensorflow/tensorflow \
       "${TMPDIR}"
     mkdir "${TMPDIR}/external"
-    # Note: this makes an extra copy of org_tensorflow.
     cp_external \
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles \
       "${TMPDIR}/external"
@@ -123,7 +122,6 @@ function main() {
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/tensorflow \
         "${TMPDIR}"
       mkdir "${TMPDIR}/external"
-      # Note: this makes an extra copy of org_tensorflow.
       cp_external \
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles \
         "${TMPDIR}/external"
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index cc46dd5162..22e1584b78 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -42,6 +42,7 @@ BLACKLIST = [
     "//tensorflow/python:extra_py_tests_deps",
     "//tensorflow/cc/saved_model:saved_model_half_plus_two",
     "//tensorflow:no_tensorflow_py_deps",
+    "//tensorflow/tools/pip_package:win_pip_package_marker",
     "//tensorflow/python:test_ops_2",
     "//tensorflow/python:tf_optimizer",
     "//tensorflow/python:compare_test_proto_py",
-- 
GitLab


From 78f199db1c6670b0de8b0a70a42e08ee57eff6d3 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Wed, 6 Dec 2017 11:23:00 -0800
Subject: [PATCH 0688/1225] fix link to quickdraw data

PiperOrigin-RevId: 178125503
---
 tensorflow/docs_src/tutorials/recurrent_quickdraw.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md
index 8ed8e16c22..7306b4bf56 100644
--- a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md
+++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md
@@ -36,7 +36,7 @@ To try the code for this tutorial:
 1.  Download the [tutorial code]
 (https://github.com/tensorflow/models/tree/master/tutorials/rnn/quickdraw/train_model.py).
 1.  [Download the data](#download-the-data) in `TFRecord` format from
-    [here](http://todo.url/deselaers) and unzip it. More details about [how to
+    [here](http://download.tensorflow.org/data/quickdraw_tutorial_dataset_v1.tar.gz) and unzip it. More details about [how to
     obtain the original Quick, Draw!
     data](#optional-download-the-full-quick-draw-data) and [how to convert that
     to `TFRecord` files](#optional-converting-the-data) is available below.
-- 
GitLab


From c2a6168b061ceaa7f8e9deeca556bd87d9094823 Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Wed, 6 Dec 2017 11:27:54 -0800
Subject: [PATCH 0689/1225] Adding MKL-DNN Reshape op (#14682)

---
 tensorflow/core/kernels/mkl_reshape_op.cc | 182 ++++++++++++++++++++++
 1 file changed, 182 insertions(+)

diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc
index 5e98582475..11c92ebdb4 100644
--- a/tensorflow/core/kernels/mkl_reshape_op.cc
+++ b/tensorflow/core/kernels/mkl_reshape_op.cc
@@ -28,6 +28,11 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
 template <typename Device, typename T>
@@ -35,6 +40,7 @@ class MklReshapeOp : public OpKernel {
  public:
   explicit MklReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
 
+#ifndef INTEL_MKL_DNN
   void Compute(OpKernelContext* context) override {
     const Tensor& input = MklGetInput(context, 0);
     const Tensor& sizes = MklGetInput(context, 1);
@@ -129,7 +135,183 @@ class MklReshapeOp : public OpKernel {
     }
   }
 
+#else
+
  private:
+  // When the input tensor is in MKL layout and we are reshaping the tensor to a
+  // different shape than its actual shape, then we use MKLDNN reorder primitive
+  // to put tensor back in Tensorflow layout. But we can skip this reordering
+  // some times. This function checks for all such cases.
+  bool SkipReorder(const MklDnnShape& mkl_shape_input,
+                   const TensorShape& reshape_to) {
+    CHECK_EQ(mkl_shape_input.IsMklTensor(), true);
+    bool ret = false;
+
+    // If Tensorflow's data format and the underlying format maintained by
+    // MKLDNN are equivalent (both are NHWC or both are NCHW), then we can
+    // safely return true.
+    auto input_mkl_md = mkl_shape_input.GetMklLayout();
+    if (mkl_shape_input.GetTfDataFormat() == input_mkl_md.data.format) {
+      ret = true;
+    }
+
+    return ret;
+  }
+
+ public:
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor = MklGetInput(context, 0);
+    const Tensor& sizes = MklGetInput(context, 1);
+
+    MklDnnShape mkl_shape_input;
+    GetMklShape(context, kInputSlotIdx, &mkl_shape_input);
+    bool input_in_mkl_format = mkl_shape_input.IsMklTensor();
+    const int64 nelems = input_in_mkl_format ?
+                         mkl_shape_input.GetTfShape().num_elements()
+                         : input_tensor.NumElements();
+
+    // Preliminary validation of sizes.
+    OP_REQUIRES(context, IsLegacyVector(sizes.shape()),
+                errors::InvalidArgument("sizes input must be 1-D, not shape ",
+                                        sizes.shape().DebugString()));
+
+    // Compute the output shape.  Determine product of specified
+    // dimensions, and find the index of the unspecified one.
+    TensorShape shape;
+    int64 product = 1;
+    int unknown_index = -1;
+    switch (sizes.dtype()) {
+      case DT_INT32:
+        OP_REQUIRES_OK(context, ValidateSizes<int32>(sizes, &product,
+                                                     &unknown_index, &shape));
+        break;
+      case DT_INT64:
+        OP_REQUIRES_OK(context, ValidateSizes<int64>(sizes, &product,
+                                                     &unknown_index, &shape));
+        break;
+      default:
+        context->CtxFailure(errors::InvalidArgument(
+            "desired shape must be a DT_INT32 or DT_INT64 vector, not a ",
+            DataTypeString(sizes.dtype())));
+        return;
+    }
+    if (unknown_index != -1) {
+      OP_REQUIRES(
+          context, product > 0,
+          errors::InvalidArgument("Reshape cannot infer the missing input size "
+                                  "for an empty tensor unless all specified "
+                                  "input sizes are non-zero"));
+      const int64 missing = nelems / product;
+      OP_REQUIRES(
+          context, product * missing == nelems,
+          errors::InvalidArgument(
+              "Input to reshape is a tensor with ", nelems,
+              " values, but the requested shape requires a multiple of ",
+              product));
+      shape.set_dim(unknown_index, missing);
+    }
+    OP_REQUIRES(context, shape.num_elements() == nelems,
+                errors::InvalidArgument("Input to reshape is a tensor with ",
+                                        nelems,
+                                        " values, but the requested shape has ",
+                                        shape.num_elements()));
+
+    if (input_in_mkl_format) {
+      TensorShape& shape_to = shape;
+      TensorShape shape_from = mkl_shape_input.GetTfShape();
+      if (shape_from == shape_to) {
+        CopyMklTensorInToOut(context, kInputSlotIdx, kOutputSlotIdx);
+        return;
+      } else {
+        try {
+          auto cpu_engine = engine(engine::cpu, 0);
+          MklDnnData<T> dnn_data_input(&cpu_engine);
+          // Reshape is just a logical view change operation for a tensor.
+          // It does not change underlying layout. But MKLDNN may maintain
+          // tensor data in different layout than that specified by Tensorflow.
+          // If MKLDNN maintains input tensor in different layout than that
+          // specified by Tensorflow, we will need to reorder tensor and then
+          // put it in the shape expected by Tensorflow. But if MKLDNN has
+          // maintained input tensor in the same layout as it is expected by
+          // Tensorflow, we don't need to reorder tensor contents, we just
+          // need to update MklDnnShape object associated with the input
+          // tensor to reflect the shape change expected by reshape.
+          if (!SkipReorder(mkl_shape_input, shape_to)) {
+              // If dimensions that are being expanded or collapsed are not
+              // maintained contiguously by MKLDNN, then we use reorder.
+
+              // Get Mkl layout of input tensor.
+              auto input_mkl_md = mkl_shape_input.GetMklLayout();
+              // Set input Mkl layout as the user layout.
+              dnn_data_input.SetUsrMem(input_mkl_md, &input_tensor);
+              // Get expected Tensorflow layout of input tensor.
+              auto output_tf_md = mkl_shape_input.GetTfLayout();
+              auto output_tf_pd = memory::primitive_desc(output_tf_md,
+                                                         cpu_engine);
+
+              Tensor* output_tensor = nullptr;
+              MklShape mkl_shape_output;
+              mkl_shape_output.SetMklTensor(false);
+              // We allocate output tensor in the shape expected by Reshape.
+              AllocateOutputSetMklShape(context, kOutputSlotIdx, &output_tensor,
+                                        shape_to, mkl_shape_output);
+
+              // Insert reorder between Mkl layout and TensorFlow layout.
+              std::vector<primitive> net;
+              CHECK_EQ(dnn_data_input.CheckReorderToOpMem(output_tf_pd,
+                       output_tensor, &net), true);
+              stream(stream::kind::eager).submit(net).wait();
+              return;
+          } else {
+            // If dimensions that are being expanded or collapsed are
+            // maintained contiguously by MKLDNN, then we skip reorder, just
+            // update MklDnnShape object for the tensorflow tensor, and forward
+            // Tensorflow tensor as it is to the output.
+            auto output_dims = TFShapeToMklDnnDims(shape_to);
+            auto output_strides = CalculateTFStrides(output_dims);
+            auto output_tf_md = MklDnnData<T>::CreateBlockedMemDesc(output_dims,
+                                                               output_strides);
+            auto output_tf_pd = memory::primitive_desc(output_tf_md,
+                                                       cpu_engine);
+
+            // Set MklDnnShape
+            MklDnnShape mkl_shape_output;
+            mkl_shape_output.SetMklTensor(true);
+            mkl_shape_output.SetMklLayout(&output_tf_pd);
+            mkl_shape_output.SetElemType(MklDnnType<T>());
+            mkl_shape_output.SetTfLayout(output_dims.size(), output_dims,
+                                         memory::format::blocked);
+
+            // We now simply forward input Mkl tensor to output and change its
+            // output MklDnnShape object.
+            ForwardMklTensorInToOutWithMklShape(context, kInputSlotIdx,
+                                              kOutputSlotIdx, mkl_shape_output);
+            return;
+          }
+        } catch (mkldnn::error &e) {
+          string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+          OP_REQUIRES_OK(context,
+                   errors::Aborted("Operation received an exception:",
+                      error_msg));
+        }
+      }
+    } else {
+      // If input tensor is not in Mkl format, then just copy Tensorflow tensor
+      // to output with specified shape.
+      CopyTfTensorInToOutWithShape(context, kInputSlotIdx, kOutputSlotIdx,
+                                   shape);
+    }
+  }
+
+#endif  // INTEL_MKL_DNN
+
+ private:
+  const int kInputSlotIdx = 0;
+  const int kOutputSlotIdx = 0;
+
   template <typename Tshape>
   Status ValidateSizes(const Tensor& sizes, int64* product, int* unknown_index,
                        TensorShape* shape) {
-- 
GitLab


From 60af864e46c207d136dd241c0705210ca5a4dbc7 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 6 Dec 2017 11:34:32 -0800
Subject: [PATCH 0690/1225] [tf.data] Validate that all elements of a batch
 have the same shape.

Fixes #15152. Previously, it would be possible for elements with
different shapes but the same overall number of elements to be batched
together, which would give unexpected behavior.

PiperOrigin-RevId: 178127613
---
 tensorflow/core/kernels/batch_dataset_op.cc   | 10 ++++++++++
 .../kernel_tests/batch_dataset_op_test.py     | 20 +++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/tensorflow/core/kernels/batch_dataset_op.cc b/tensorflow/core/kernels/batch_dataset_op.cc
index 3dec4f71d8..d5f2fd4c19 100644
--- a/tensorflow/core/kernels/batch_dataset_op.cc
+++ b/tensorflow/core/kernels/batch_dataset_op.cc
@@ -151,6 +151,16 @@ class BatchDatasetOp : public UnaryDatasetOpKernel {
           // Build the output tuple component by copying one slice
           // from each input element in the batch.
           for (size_t i = 0; i < num_batch_elements; ++i) {
+            if (batch_elements[i][component_index].shape() !=
+                first_element.shape()) {
+              return errors::InvalidArgument(
+                  "Cannot batch tensors with different shapes in component ",
+                  component_index, ". First element had shape ",
+                  first_element.shape().DebugString(), " and element ", i,
+                  " had shape ",
+                  batch_elements[i][component_index].shape().DebugString(),
+                  ".");
+            }
             TF_RETURN_IF_ERROR(batch_util::CopyElementToSlice(
                 std::move(batch_elements[i][component_index]), &batch_component,
                 i));
diff --git a/tensorflow/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/kernel_tests/batch_dataset_op_test.py
index 0546218601..53c8be1d1d 100644
--- a/tensorflow/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_dataset_op_test.py
@@ -187,6 +187,26 @@ class BatchDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def testBatchShapeError(self):
+    def generator():
+      yield [1.0, 2.0, 3.0]
+      yield [4.0, 5.0, 6.0]
+      yield [7.0, 8.0, 9.0, 10.0]
+
+    iterator = (dataset_ops.Dataset.from_generator(generator, dtypes.float32,
+                                                   output_shapes=[None])
+                .batch(3)
+                .make_initializable_iterator())
+    next_element = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(iterator.initializer)
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r"Cannot batch tensors with different shapes in component 0. "
+          r"First element had shape \[3\] and element 2 had shape \[4\]."):
+        sess.run(next_element)
+
   def testPaddedBatchDataset(self):
     seq_lens = array_ops.placeholder(dtypes.int32, shape=[None])
     padded_shape = array_ops.placeholder(dtypes.int64, shape=[1])
-- 
GitLab


From 93aeebad51f29e6d90d091be6e28986079805d3a Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 6 Dec 2017 11:37:17 -0800
Subject: [PATCH 0691/1225] [tf] Change TensorArray "enable_identical_shapes"
 to infer_shape by default.

PiperOrigin-RevId: 178128060
---
 tensorflow/python/ops/tensor_array_ops.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py
index 605654d9be..398521c9b5 100644
--- a/tensorflow/python/ops/tensor_array_ops.py
+++ b/tensorflow/python/ops/tensor_array_ops.py
@@ -36,9 +36,6 @@ from tensorflow.python.ops import gen_data_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.util import tf_should_use
 
-# TODO(ebrevdo): Set to True in Dec. 4, 2017.
-_ENABLE_IDENTICAL_ELEMENT_SHAPES = False
-
 
 # _GraphTensorArray accesses many of the hidden generated ops, but is in
 # fact built to wrap these methods.
@@ -150,18 +147,15 @@ class _GraphTensorArray(object):
         # will retroactively set the device value of this op.
         def create():
           """Create the TensorArray op."""
-          ta_kwargs = {}
-          if _ENABLE_IDENTICAL_ELEMENT_SHAPES:
-            ta_kwargs["identical_element_shapes"] = infer_shape
           return gen_data_flow_ops._tensor_array_v3(
               dtype=dtype,
               size=size,
               element_shape=element_shape,
+              identical_element_shapes=infer_shape,
               dynamic_size=dynamic_size,
               clear_after_read=clear_after_read,
               tensor_array_name=tensor_array_name,
-              name=scope,
-              **ta_kwargs)
+              name=scope)
         if colocate_with_first_write_call:
           with ops.device(None), ops.colocate_with(None, ignore_existing=True):
             self._handle, self._flow = create()
-- 
GitLab


From 91c75ecc66c630f541a2215844b2012b9f5e6df6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 12:02:02 -0800
Subject: [PATCH 0692/1225] Allow SparseSegmentReduction ops to have missing
 segment IDs.

PiperOrigin-RevId: 178131721
---
 ...def_SparseSegmentMeanWithNumSegments.pbtxt |  36 +++++
 ...ef_SparseSegmentSqrtNWithNumSegments.pbtxt |  38 +++++
 ..._def_SparseSegmentSumWithNumSegments.pbtxt |  57 +++++++
 .../core/kernels/segment_reduction_ops.cc     | 146 +++++++++++++----
 tensorflow/core/ops/math_ops.cc               | 151 +++++++++++++++++
 .../segment_reduction_ops_test.py             | 135 ++++++++++++++--
 tensorflow/python/ops/math_grad.py            |  25 +++
 tensorflow/python/ops/math_ops.py             | 153 ++++++++++++++++++
 tensorflow/tools/api/golden/tensorflow.pbtxt  |   6 +-
 9 files changed, 702 insertions(+), 45 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt

diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
new file mode 100644
index 0000000000..d6e1054003
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
@@ -0,0 +1,36 @@
+op {
+  graph_op_name: "SparseSegmentMeanWithNumSegments"
+  in_arg {
+    name: "indices"
+    description: <<END
+A 1-D tensor. Has same rank as `segment_ids`.
+END
+  }
+  in_arg {
+    name: "segment_ids"
+    description: <<END
+A 1-D tensor. Values should be sorted and can be repeated.
+END
+  }
+  in_arg {
+    name: "num_segments"
+    description: <<END
+Should equal the number of distinct segment IDs.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Has same shape as data, except for dimension 0 which has size
+`num_segments`.
+END
+  }
+  summary: "Computes the mean along sparse segments of a tensor."
+  description: <<END
+Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
new file mode 100644
index 0000000000..9ba98b8191
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
@@ -0,0 +1,38 @@
+op {
+  graph_op_name: "SparseSegmentSqrtNWithNumSegments"
+  in_arg {
+    name: "indices"
+    description: <<END
+A 1-D tensor. Has same rank as `segment_ids`.
+END
+  }
+  in_arg {
+    name: "segment_ids"
+    description: <<END
+A 1-D tensor. Values should be sorted and can be repeated.
+END
+  }
+  in_arg {
+    name: "num_segments"
+    description: <<END
+Should equal the number of distinct segment IDs.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Has same shape as data, except for dimension 0 which
+has size `k`, the number of segments.
+END
+  }
+  summary: "Computes the sum along sparse segments of a tensor divided by the sqrt of N."
+  description: <<END
+N is the size of the segment being reduced.
+
+Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt
new file mode 100644
index 0000000000..3aeaba38e9
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt
@@ -0,0 +1,57 @@
+op {
+  graph_op_name: "SparseSegmentSumWithNumSegments"
+  in_arg {
+    name: "indices"
+    description: <<END
+A 1-D tensor. Has same rank as `segment_ids`.
+END
+  }
+  in_arg {
+    name: "segment_ids"
+    description: <<END
+A 1-D tensor. Values should be sorted and can be repeated.
+END
+  }
+  in_arg {
+    name: "num_segments"
+    description: <<END
+Should equal the number of distinct segment IDs.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Has same shape as data, except for dimension 0 which
+has size `num_segments`.
+END
+  }
+  summary: "Computes the sum along sparse segments of a tensor."
+  description: <<END
+Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+For example:
+
+```python
+c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+
+tf.sparse_segment_sum_with_num_segments(
+    c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
+# => [[0 0 0 0]
+#     [0 0 0 0]
+#     [0 0 0 0]]
+
+tf.sparse_segment_sum_with_num_segments(c,
+                                        tf.constant([0, 1]),
+                                        tf.constant([0, 2],
+                                        num_segments=4))
+# => [[ 1  2  3  4]
+#     [ 0  0  0  0]
+#     [-1 -2 -3 -4]
+#     [ 0  0  0  0]]
+```
+END
+}
diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc
index 2334e50f1d..3ef1cd1e06 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.cc
+++ b/tensorflow/core/kernels/segment_reduction_ops.cc
@@ -553,10 +553,11 @@ class SparseSegmentReductionOpBase : public OpKernel {
  public:
   explicit SparseSegmentReductionOpBase(OpKernelConstruction* context,
                                         bool is_mean, bool is_sqrtn,
-                                        T default_value)
+                                        bool has_num_segments, T default_value)
       : OpKernel(context),
         is_mean_(is_mean),
         is_sqrtn_(is_sqrtn),
+        has_num_segments_(has_num_segments),
         default_value_(default_value) {}
 
   void Compute(OpKernelContext* context) override {
@@ -564,6 +565,19 @@ class SparseSegmentReductionOpBase : public OpKernel {
     const Tensor& indices = context->input(1);
     const Tensor& segment_ids = context->input(2);
 
+    Index output_rows = -1;
+    if (has_num_segments_) {
+      const Tensor& num_segments = context->input(3);
+
+      OP_REQUIRES(
+          context, num_segments.shape().dims() == 0,
+          errors::InvalidArgument("num_segments should be a scalar, not shape ",
+                                  num_segments.shape().DebugString()));
+      output_rows = internal::SubtleMustCopy(num_segments.scalar<int32>()());
+      OP_REQUIRES(context, output_rows >= 0,
+                  errors::InvalidArgument("segment ids must be >= 0"));
+    }
+
     OP_REQUIRES(context, TensorShapeUtils::IsVector(indices.shape()),
                 errors::InvalidArgument("indices should be a vector."));
     OP_REQUIRES(context, TensorShapeUtils::IsVector(segment_ids.shape()),
@@ -581,10 +595,17 @@ class SparseSegmentReductionOpBase : public OpKernel {
     const auto segment_vec = segment_ids.vec<OutputRow>();
     // Note that the current implementation assumes that segment_vec values are
     // sorted.
-    const OutputRow output_rows =
+    const OutputRow last_segment_id_plus_one =
         num_indices > 0
             ? internal::SubtleMustCopy(segment_vec(num_indices - 1)) + 1
             : 0;
+    if (has_num_segments_) {
+      OP_REQUIRES(
+          context, output_rows >= last_segment_id_plus_one,
+          errors::InvalidArgument("segment ids must be < num_segments"));
+    } else {
+      output_rows = last_segment_id_plus_one;
+    }
     OP_REQUIRES(context, output_rows >= 0,
                 errors::InvalidArgument("segment ids must be >= 0"));
 
@@ -646,11 +667,20 @@ class SparseSegmentReductionOpBase : public OpKernel {
                       indices_vec(start + bad_offset), " out of range [0, ",
                       input_flat.dimension(0), ")"));
 
-      if (end >= num_indices) break;
       start = end;
       ++end;
       uninitialized_index = out_index + 1;
       out_index = next_index;
+      if (end > num_indices) break;
+    }
+
+    // Fill the gap at the end with the default value.
+    if (uninitialized_index < output_rows) {
+      Eigen::DSizes<Eigen::DenseIndex, 2> gap_slice_shape(
+          output_rows - uninitialized_index, num_col);
+      Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor>, Eigen::Unaligned>
+          gap_slice(&output_flat(uninitialized_index, 0), gap_slice_shape);
+      gap_slice.setConstant(default_value_);
     }
   }
 
@@ -786,6 +816,7 @@ class SparseSegmentReductionOpBase : public OpKernel {
 
   const bool is_mean_;
   const bool is_sqrtn_;
+  const bool has_num_segments_;
   const T default_value_;
 };
 
@@ -794,9 +825,20 @@ class SparseSegmentReductionMeanOp
     : public SparseSegmentReductionOpBase<Device, T> {
  public:
   explicit SparseSegmentReductionMeanOp(OpKernelConstruction* context)
-      : SparseSegmentReductionOpBase<Device, T>(context, true /*is_mean*/,
-                                                false /*is_sqrtn*/,
-                                                T(0) /* default_value */) {}
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, true /*is_mean*/, false /*is_sqrtn*/,
+            false /* has_num_segments */, T(0) /* default_value */) {}
+};
+
+template <typename Device, class T>
+class SparseSegmentReductionMeanWithNumSegmentsOp
+    : public SparseSegmentReductionOpBase<Device, T> {
+ public:
+  explicit SparseSegmentReductionMeanWithNumSegmentsOp(
+      OpKernelConstruction* context)
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, true /*is_mean*/, false /*is_sqrtn*/,
+            true /* has_num_segments */, T(0) /* default_value */) {}
 };
 
 template <typename Device, class T>
@@ -804,9 +846,20 @@ class SparseSegmentReductionSqrtNOp
     : public SparseSegmentReductionOpBase<Device, T> {
  public:
   explicit SparseSegmentReductionSqrtNOp(OpKernelConstruction* context)
-      : SparseSegmentReductionOpBase<Device, T>(context, false /*is_mean*/,
-                                                true /*is_sqrtn*/,
-                                                T(0) /* default_value */) {}
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, false /*is_mean*/, true /*is_sqrtn*/,
+            false /* has_num_segments */, T(0) /* default_value */) {}
+};
+
+template <typename Device, class T>
+class SparseSegmentReductionSqrtNWithNumSegmentsOp
+    : public SparseSegmentReductionOpBase<Device, T> {
+ public:
+  explicit SparseSegmentReductionSqrtNWithNumSegmentsOp(
+      OpKernelConstruction* context)
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, false /*is_mean*/, true /*is_sqrtn*/,
+            true /* has_num_segments */, T(0) /* default_value */) {}
 };
 
 template <typename Device, class T>
@@ -814,37 +867,65 @@ class SparseSegmentReductionSumOp
     : public SparseSegmentReductionOpBase<Device, T> {
  public:
   explicit SparseSegmentReductionSumOp(OpKernelConstruction* context)
-      : SparseSegmentReductionOpBase<Device, T>(context, false /*is_mean*/,
-                                                false /*is_sqrtn*/,
-                                                T(0) /* default_value */) {}
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, false /*is_mean*/, false /*is_sqrtn*/,
+            false /* has_num_segments */, T(0) /* default_value */) {}
 };
 
-#define REGISTER_CPU_SPARSE_KERNELS(type)                     \
-  REGISTER_KERNEL_BUILDER(Name("SparseSegmentSum")            \
-                              .Device(DEVICE_CPU)             \
-                              .TypeConstraint<type>("T")      \
-                              .TypeConstraint<int32>("Tidx"), \
-                          SparseSegmentReductionSumOp<CPUDevice, type>);
+template <typename Device, class T>
+class SparseSegmentReductionSumWithNumSegmentsOp
+    : public SparseSegmentReductionOpBase<Device, T> {
+ public:
+  explicit SparseSegmentReductionSumWithNumSegmentsOp(
+      OpKernelConstruction* context)
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, false /*is_mean*/, false /*is_sqrtn*/,
+            true /* has_num_segments */, T(0) /* default_value */) {}
+};
 
+#define REGISTER_CPU_SPARSE_KERNELS(type)                                \
+  REGISTER_KERNEL_BUILDER(Name("SparseSegmentSum")                       \
+                              .Device(DEVICE_CPU)                        \
+                              .TypeConstraint<type>("T")                 \
+                              .TypeConstraint<int32>("Tidx"),            \
+                          SparseSegmentReductionSumOp<CPUDevice, type>); \
+  REGISTER_KERNEL_BUILDER(                                               \
+      Name("SparseSegmentSumWithNumSegments")                            \
+          .Device(DEVICE_CPU)                                            \
+          .TypeConstraint<type>("T")                                     \
+          .TypeConstraint<int32>("Tidx"),                                \
+      SparseSegmentReductionSumWithNumSegmentsOp<CPUDevice, type>);
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_SPARSE_KERNELS);
 #undef REGISTER_CPU_SPARSE_KERNELS
 
-#define REGISTER_CPU_SPARSE_KERNELS(type)                     \
-  REGISTER_KERNEL_BUILDER(Name("SparseSegmentMean")           \
-                              .Device(DEVICE_CPU)             \
-                              .TypeConstraint<type>("T")      \
-                              .TypeConstraint<int32>("Tidx"), \
-                          SparseSegmentReductionMeanOp<CPUDevice, type>);
+#define REGISTER_CPU_SPARSE_KERNELS(type)                                 \
+  REGISTER_KERNEL_BUILDER(Name("SparseSegmentMean")                       \
+                              .Device(DEVICE_CPU)                         \
+                              .TypeConstraint<type>("T")                  \
+                              .TypeConstraint<int32>("Tidx"),             \
+                          SparseSegmentReductionMeanOp<CPUDevice, type>); \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("SparseSegmentMeanWithNumSegments")                            \
+          .Device(DEVICE_CPU)                                             \
+          .TypeConstraint<type>("T")                                      \
+          .TypeConstraint<int32>("Tidx"),                                 \
+      SparseSegmentReductionMeanWithNumSegmentsOp<CPUDevice, type>);
 REGISTER_CPU_SPARSE_KERNELS(float);
 REGISTER_CPU_SPARSE_KERNELS(double);
 #undef REGISTER_CPU_SPARSE_KERNELS
 
-#define REGISTER_CPU_SPARSE_KERNELS(type)                     \
-  REGISTER_KERNEL_BUILDER(Name("SparseSegmentSqrtN")          \
-                              .Device(DEVICE_CPU)             \
-                              .TypeConstraint<type>("T")      \
-                              .TypeConstraint<int32>("Tidx"), \
-                          SparseSegmentReductionSqrtNOp<CPUDevice, type>);
+#define REGISTER_CPU_SPARSE_KERNELS(type)                                  \
+  REGISTER_KERNEL_BUILDER(Name("SparseSegmentSqrtN")                       \
+                              .Device(DEVICE_CPU)                          \
+                              .TypeConstraint<type>("T")                   \
+                              .TypeConstraint<int32>("Tidx"),              \
+                          SparseSegmentReductionSqrtNOp<CPUDevice, type>); \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("SparseSegmentSqrtNWithNumSegments")                            \
+          .Device(DEVICE_CPU)                                              \
+          .TypeConstraint<type>("T")                                       \
+          .TypeConstraint<int32>("Tidx"),                                  \
+      SparseSegmentReductionSqrtNWithNumSegmentsOp<CPUDevice, type>);
 REGISTER_CPU_SPARSE_KERNELS(float);
 REGISTER_CPU_SPARSE_KERNELS(double);
 #undef REGISTER_CPU_SPARSE_KERNELS
@@ -889,9 +970,10 @@ class SparseSegmentGradOpBase : public OpKernel {
 
     // Note that similar to SparseSegmentMean, we assume that segment_vec is
     // already sorted and has non-negative values.
-    const SegmentId num_segments =
+    const SegmentId num_segments = input.dim_size(0);
+    const SegmentId last_segment_id_plus_one =
         internal::SubtleMustCopy(segment_vec(N - 1)) + 1;
-    OP_REQUIRES(context, input.dim_size(0) == num_segments,
+    OP_REQUIRES(context, last_segment_id_plus_one <= num_segments,
                 errors::InvalidArgument("Invalid number of segments"));
 
     // Compute scaling factors for input.
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 45ebfa203b..8ea170ba14 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1632,6 +1632,45 @@ Status SparseSegmentReductionGradShapeFn(InferenceContext* c) {
   return Status::OK();
 }
 
+Status SparseSegmentReductionWithNumSegmentsShapeFn(InferenceContext* c) {
+  ShapeHandle data_shape;
+  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &data_shape));
+
+  ShapeHandle indices_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &indices_shape));
+
+  ShapeHandle segment_ids_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &segment_ids_shape));
+
+  ShapeHandle num_segments_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &num_segments_shape));
+
+  // indices and segment_ids should merge cleanly.
+  ShapeHandle unused;
+  TF_RETURN_IF_ERROR(c->Merge(indices_shape, segment_ids_shape, &unused));
+
+  ShapeHandle subshape;
+  TF_RETURN_IF_ERROR(c->Subshape(data_shape, 1, &subshape));
+
+  ShapeHandle out;
+  const Tensor* dim0 = c->input_tensor(3);
+  if (dim0 == nullptr) {
+    // We don't have the value at inference time, so the output
+    // shape is unknown.
+    TF_RETURN_IF_ERROR(c->Concatenate(c->Vector(InferenceContext::kUnknownDim),
+                                      subshape, &out));
+  } else {
+    auto dim0_value = dim0->scalar<int32>()();
+    if (dim0_value < 0) {
+      return errors::InvalidArgument(
+          "Cannot specify a negative value for num_segments");
+    }
+    TF_RETURN_IF_ERROR(c->Concatenate(c->Vector(dim0_value), subshape, &out));
+  }
+  c->set_output(0, out);
+  return Status::OK();
+}
+
 Status UnsortedSegmentReductionShapeFn(InferenceContext* c) {
   ShapeHandle s_data = c->input(0);
   ShapeHandle s_segment_ids = c->input(1);
@@ -1890,6 +1929,7 @@ output: Has same shape as data, except for dimension 0 which
 has size `num_segments`.
 
 )doc");
+
 REGISTER_OP("SparseSegmentSum")
     .Input("data: T")
     .Input("indices: Tidx")
@@ -1938,6 +1978,56 @@ output: Has same shape as data, except for dimension 0 which
   has size `k`, the number of segments.
 )doc");
 
+REGISTER_OP("SparseSegmentSumWithNumSegments")
+    .Input("data: T")
+    .Input("indices: Tidx")
+    .Input("segment_ids: int32")
+    .Input("num_segments: Tnumsegments")
+    .Output("output: T")
+    .Attr("T: realnumbertype")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
+    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn)
+    .Doc(R"doc(
+Computes the sum along sparse segments of a tensor.
+
+Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+For example:
+
+```python
+c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+
+tf.sparse_segment_sum_with_num_segments(
+    c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
+# => [[0 0 0 0]
+#     [0 0 0 0]
+#     [0 0 0 0]]
+
+tf.sparse_segment_sum_with_num_segments(c,
+                                        tf.constant([0, 1]),
+                                        tf.constant([0, 2],
+                                        num_segments=4))
+# => [[ 1  2  3  4]
+#     [ 0  0  0  0]
+#     [-1 -2 -3 -4]
+#     [ 0  0  0  0]]
+```
+
+indices: A 1-D tensor. Has same rank as `segment_ids`.
+
+segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+
+num_segments: Should equal the number of distinct segment IDs.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `num_segments`.
+)doc");
+
 REGISTER_OP("SparseSegmentMean")
     .Input("data: T")
     .Input("indices: Tidx")
@@ -1964,6 +2054,35 @@ output: Has same shape as data, except for dimension 0 which
 
 )doc");
 
+REGISTER_OP("SparseSegmentMeanWithNumSegments")
+    .Input("data: T")
+    .Input("indices: Tidx")
+    .Input("segment_ids: int32")
+    .Input("num_segments: Tnumsegments")
+    .Output("output: T")
+    .Attr("T: {float, double}")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
+    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn)
+    .Doc(R"doc(
+Computes the mean along sparse segments of a tensor.
+
+Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+indices: A 1-D tensor. Has same rank as `segment_ids`.
+
+segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+
+num_segments: Should equal the number of distinct segment IDs.
+
+output: Has same shape as data, except for dimension 0 which has size
+    `num_segments`.
+)doc");
+
 REGISTER_OP("SparseSegmentMeanGrad")
     .Input("grad: T")
     .Input("indices: Tidx")
@@ -2010,6 +2129,38 @@ output: Has same shape as data, except for dimension 0 which
 
 )doc");
 
+REGISTER_OP("SparseSegmentSqrtNWithNumSegments")
+    .Input("data: T")
+    .Input("indices: Tidx")
+    .Input("segment_ids: int32")
+    .Input("num_segments: Tnumsegments")
+    .Output("output: T")
+    .Attr("T: {float, double}")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
+    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn)
+    .Doc(R"doc(
+Computes the sum along sparse segments of a tensor divided by the sqrt of N.
+
+N is the size of the segment being reduced.
+
+Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+indices: A 1-D tensor. Has same rank as `segment_ids`.
+
+segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+
+num_segments: Should equal the number of distinct segment IDs.
+
+output: Has same shape as data, except for dimension 0 which
+  has size `k`, the number of segments.
+
+)doc");
+
 REGISTER_OP("SparseSegmentSqrtNGrad")
     .Input("grad: T")
     .Input("indices: Tidx")
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index fd58cdb170..5a54f448d0 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -46,13 +46,13 @@ class SegmentReductionHelper(test.TestCase):
     return constant_op.constant(
         np_values, shape=input_shape, dtype=dtype), np_values
 
-  def _segmentReduce(self, indices, x, op1, op2=None, num_out_rows=None):
+  def _segmentReduce(self, indices, x, op1, op2=None, num_segments=None):
     if not x.size:
       return np.array([])
     indices = np.asarray(indices)
-    if num_out_rows is None:
-      num_out_rows = indices[-1] + 1
-    output = [None] * num_out_rows
+    if num_segments is None:
+      num_segments = indices[-1] + 1
+    output = [None] * num_segments
     slice_shape = x.shape[indices.ndim:]
     x_flat = x.reshape((indices.size,) + slice_shape)
     for i, index in enumerate(indices.ravel()):
@@ -259,7 +259,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
         with self.test_session(use_gpu=True):
           tf_x, np_x = self._input(shape, dtype=dtype)
           np_ans = self._segmentReduce(
-              indices, np_x, np.add, op2=None, num_out_rows=num_segments)
+              indices, np_x, np.add, op2=None, num_segments=num_segments)
           s = math_ops.unsorted_segment_sum(
               data=tf_x, segment_ids=indices, num_segments=num_segments)
           tf_ans = s.eval()
@@ -278,7 +278,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
           num_segments_constant = constant_op.constant(
               num_segments, dtype=dtype)
           np_ans = self._segmentReduce(
-              indices, np_x, np.add, op2=None, num_out_rows=num_segments)
+              indices, np_x, np.add, op2=None, num_segments=num_segments)
           s = math_ops.unsorted_segment_sum(
               data=tf_x,
               segment_ids=indices,
@@ -397,7 +397,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
         with self.test_session(use_gpu=True):
           tf_x, np_x = self._input(shape, dtype=dtype)
           np_ans = self._segmentReduce(
-              indices, np_x, np.add, op2=None, num_out_rows=num_segments)
+              indices, np_x, np.add, op2=None, num_segments=num_segments)
           # Replace np_ans[8] with 0 for the value
           np_ans[8:] = 0
           # Replace 8 with -1 in indices
@@ -417,8 +417,15 @@ class SparseSegmentReductionHelper(SegmentReductionHelper):
     return (constant_op.constant(
         indices, dtype=dtypes_lib.int32), indices, a, b)
 
-  def _sparseSegmentReduce(self, x, indices, segment_indices, op1, op2=None):
-    return self._segmentReduce(segment_indices, x[indices], op1, op2)
+  def _sparseSegmentReduce(self,
+                           x,
+                           indices,
+                           segment_indices,
+                           op1,
+                           op2=None,
+                           num_segments=None):
+    return self._segmentReduce(
+        segment_indices, x[indices], op1, op2, num_segments=num_segments)
 
 
 class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
@@ -475,6 +482,31 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
         tf_ans = s.eval()
         self.assertAllClose(np_ans, tf_ans)
 
+  def testWithNumSegments(self):
+    tf_x, np_x = self._input([10, 4], dtype=dtypes_lib.float32)
+    ops_list = [(np.add, None, math_ops.sparse_segment_sum_with_num_segments),
+                (self._mean_cum_op, self._mean_reduce_op,
+                 math_ops.sparse_segment_mean_with_num_segments)]
+    segment_indices = [0, 2, 2, 2]
+    tf_indices = [8, 3, 0, 9]
+    num_segments = 5
+    with self.test_session(use_gpu=False):
+      for np_op1, np_op2, tf_op in ops_list:
+        np_ans = self._sparseSegmentReduce(
+            np_x,
+            tf_indices,
+            segment_indices,
+            np_op1,
+            np_op2,
+            num_segments=num_segments)
+        s = tf_op(
+            data=tf_x,
+            indices=tf_indices,
+            segment_ids=segment_indices,
+            num_segments=num_segments)
+        tf_ans = s.eval()
+        self.assertAllClose(np_ans, tf_ans)
+
   def testSegmentIdsGreaterThanZero(self):
     tf_x, np_x = self._input([10, 4], dtype=dtypes_lib.float32)
     ops_list = [(np.add, None, math_ops.sparse_segment_sum), (
@@ -583,6 +615,63 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
         with self.assertRaisesOpError("segment ids must be >= 0"):
           s.eval()
 
+  def testSegmentWithNumSegmentsValid(self):
+    # Baseline for the test*WithNumSegmentsInvalid* methods below.
+    tf_x, _ = self._input([10, 4], dtype=dtypes_lib.float32)
+    ops_list = [
+        math_ops.sparse_segment_sum_with_num_segments,
+        math_ops.sparse_segment_mean_with_num_segments,
+    ]
+    num_segments = 5
+    segment_indices = [0, 1, 3, 3]
+    tf_indices = [8, 3, 0, 9]
+    with self.test_session(use_gpu=False):
+      for tf_op in ops_list:
+        s = tf_op(
+            data=tf_x,
+            indices=tf_indices,
+            segment_ids=segment_indices,
+            num_segments=num_segments)
+        s.eval()
+
+  def testSegmentWithNumSegmentsInvalid1(self):
+    tf_x, _ = self._input([10, 4], dtype=dtypes_lib.float32)
+    ops_list = [
+        math_ops.sparse_segment_sum_with_num_segments,
+        math_ops.sparse_segment_mean_with_num_segments,
+    ]
+    num_segments = 5
+    segment_indices = [0, 1, 3, 5]
+    tf_indices = [8, 3, 0, 9]
+    with self.test_session(use_gpu=False):
+      for tf_op in ops_list:
+        s = tf_op(
+            data=tf_x,
+            indices=tf_indices,
+            segment_ids=segment_indices,
+            num_segments=num_segments)
+        with self.assertRaisesOpError("segment ids must be < num_segments"):
+          s.eval()
+
+  def testSegmentWithNumSegmentsInvalid2(self):
+    tf_x, _ = self._input([10, 4], dtype=dtypes_lib.float32)
+    ops_list = [
+        math_ops.sparse_segment_sum_with_num_segments,
+        math_ops.sparse_segment_mean_with_num_segments,
+    ]
+    num_segments = -2
+    segment_indices = [0, 1, 3, 3]
+    tf_indices = [8, 3, 0, 9]
+    with self.test_session(use_gpu=False):
+      for tf_op in ops_list:
+        with self.assertRaisesRegexp(
+            ValueError, "Cannot specify a negative value for num_segments"):
+          tf_op(
+              data=tf_x,
+              indices=tf_indices,
+              segment_ids=segment_indices,
+              num_segments=num_segments)
+
   def testGradient(self):
     shape = [10, 4]
 
@@ -601,6 +690,32 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
             delta=1)
       self.assertAllClose(jacob_t, jacob_n)
 
+  def testGradientWithEmptySegmentsAtEnd(self):
+    shape = [10, 4]
+
+    num_segments = 5
+    segment_indices = [0, 1, 2, 2]
+    num_indices = len(segment_indices)
+    for tf_op in [
+        math_ops.sparse_segment_sum_with_num_segments,
+        math_ops.sparse_segment_mean_with_num_segments,
+    ]:
+      with self.test_session():
+        tf_indices, _, tf_x, np_x = self._sparse_input(
+            shape, num_indices, dtype=dtypes_lib.float64)
+        s = tf_op(
+            data=tf_x,
+            indices=tf_indices,
+            segment_ids=segment_indices,
+            num_segments=num_segments)
+        jacob_t, jacob_n = gradient_checker.compute_gradient(
+            tf_x,
+            shape,
+            s, [5, 4],
+            x_init_value=np_x.astype(np.double),
+            delta=1)
+      self.assertAllClose(jacob_t, jacob_n)
+
   def testGradientValid(self):
     # Baseline for the testGradient*Invalid* methods below.
     tf_x, _ = self._input([3, 4], dtype=dtypes_lib.float32)
@@ -646,7 +761,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ops_list = [
         math_ops.sparse_segment_mean_grad, math_ops.sparse_segment_sqrt_n_grad
     ]
-    segment_indices = [0, 1, 1, 1]  # 2 segments
+    segment_indices = [0, 1, 1, 4]  # 5 segments
     tf_indices = [8, 3, 0, 9]
     with self.test_session(use_gpu=False):
       for tf_op in ops_list:
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index 38fe093ba7..0239396ae3 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -184,6 +184,15 @@ def _SparseSegmentSumGrad(op, grad):
           None)
 
 
+@ops.RegisterGradient("SparseSegmentSumWithNumSegments")
+def _SparseSegmentSumWithNumSegmentsGrad(op, grad):
+  """Gradient for SparseSegmentSumWithNumSegments."""
+  input_rows = array_ops.shape(op.inputs[0])[0]
+  return (math_ops.unsorted_segment_sum(
+      array_ops.gather(grad, op.inputs[2]), op.inputs[1], input_rows), None,
+          None, None)
+
+
 @ops.RegisterGradient("SparseSegmentMean")
 def _SparseSegmentMeanGrad(op, grad):
   """Gradient for SparseSegmentMean."""
@@ -192,6 +201,14 @@ def _SparseSegmentMeanGrad(op, grad):
                                             dim0), None, None)
 
 
+@ops.RegisterGradient("SparseSegmentMeanWithNumSegments")
+def _SparseSegmentMeanWithNumSegmentsGrad(op, grad):
+  """Gradient for SparseSegmentMeanWithNumSegments."""
+  dim0 = array_ops.shape(op.inputs[0])[0]
+  return (math_ops.sparse_segment_mean_grad(grad, op.inputs[1], op.inputs[2],
+                                            dim0), None, None, None)
+
+
 @ops.RegisterGradient("SparseSegmentSqrtN")
 def _SparseSegmentSqrtNGrad(op, grad):
   """Gradient for SparseSegmentSqrtN."""
@@ -200,6 +217,14 @@ def _SparseSegmentSqrtNGrad(op, grad):
                                               dim0), None, None)
 
 
+@ops.RegisterGradient("SparseSegmentSqrtNWithNumSegments")
+def _SparseSegmentSqrtNWithNumSegmentsGrad(op, grad):
+  """Gradient for SparseSegmentSqrtNWithNumSegmnets."""
+  dim0 = array_ops.shape(op.inputs[0])[0]
+  return (math_ops.sparse_segment_sqrt_n_grad(grad, op.inputs[1], op.inputs[2],
+                                              dim0), None, None, None)
+
+
 def _SegmentMinOrMaxGrad(op, grad, is_sorted):
   """Gradient for SegmentMin and (unsorted) SegmentMax. They share similar code."""
   zeros = array_ops.zeros(array_ops.shape(op.inputs[0]),
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index f9538be6c9..6af36343d5 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -2495,6 +2495,159 @@ def reduced_shape(input_shape, axes):
       ])  # [1, 1]
 
 
+def sparse_segment_sum(data, indices, segment_ids, name=None,
+                       num_segments=None):
+  r"""Computes the sum along sparse segments of a tensor.
+
+  Read @{$math_ops#segmentation$the section on segmentation} for an explanation
+  of segments.
+
+  Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
+  dimension, selecting a subset of dimension 0, specified by `indices`.
+  `segment_ids` is allowed to have missing ids, in which case the output will
+  be zeros at those indices. In those cases `num_segments` is used to determine
+  the size of the output.
+
+  For example:
+
+  ```python
+  c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+
+  # Select two rows, one segment.
+  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
+  # => [[0 0 0 0]]
+
+  # Select two rows, two segment.
+  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
+  # => [[ 1  2  3  4]
+  #     [-1 -2 -3 -4]]
+
+  # With missing segment ids.
+  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 2]),
+                        num_segments=4)
+  # => [[ 1  2  3  4]
+  #     [ 0  0  0  0]
+  #     [-1 -2 -3 -4]
+  #     [ 0  0  0  0]]
+
+  # Select all rows, two segments.
+  tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+  # => [[0 0 0 0]
+  #     [5 6 7 8]]
+
+  # Which is equivalent to:
+  tf.segment_sum(c, tf.constant([0, 0, 1]))
+  ```
+
+  Args:
+    data: A `Tensor` with data that will be assembled in the output.
+    indices: A 1-D `Tensor` with indices into `data`. Has same rank as
+      `segment_ids`.
+    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`.
+      Values should be sorted and can be repeated.
+    name: A name for the operation (optional).
+    num_segments: An optional int32 scalar. Indicates the size of the output
+      `Tensor`.
+
+  Returns:
+    A `tensor` of the shape as data, except for dimension 0 which
+    has size `k`, the number of segments specified via `num_segments` or
+    inferred for the last element in `segments_ids`.
+  """
+  if num_segments is not None:
+    return gen_math_ops.sparse_segment_sum_with_num_segments(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        num_segments=num_segments,
+        name=name)
+  else:
+    return gen_math_ops.sparse_segment_sum(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        name=name)
+
+
+def sparse_segment_mean(data, indices, segment_ids, name=None,
+                        num_segments=None):
+  r"""Computes the mean along sparse segments of a tensor.
+
+  Read @{$math_ops#segmentation$the section on segmentation} for an explanation
+  of segments.
+
+  Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
+  dimension, selecting a subset of dimension 0, specified by `indices`.
+  `segment_ids` is allowed to have missing ids, in which case the output will
+  be zeros at those indices. In those cases `num_segments` is used to determine
+  the size of the output.
+
+  Args:
+    data: A `Tensor` with data that will be assembled in the output.
+    indices: A 1-D `Tensor` with indices into `data`. Has same rank as
+      `segment_ids`.
+    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`.
+      Values should be sorted and can be repeated.
+    name: A name for the operation (optional).
+    num_segments: An optional int32 scalar. Indicates the size of the output
+      `Tensor`.
+
+  Returns:
+    A `tensor` of the shape as data, except for dimension 0 which
+    has size `k`, the number of segments specified via `num_segments` or
+    inferred for the last element in `segments_ids`.
+  """
+  if num_segments is not None:
+    return gen_math_ops.sparse_segment_mean_with_num_segments(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        num_segments=num_segments,
+        name=name)
+  else:
+    return gen_math_ops.sparse_segment_mean(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        name=name)
+
+
+def sparse_segment_sqrt_n(data, indices, segment_ids, name=None,
+                          num_segments=None):
+  r"""Computes the sum along sparse segments of a tensor divided by the sqrt(N).
+
+  `N` is the size of the segment being reduced.
+
+  Args:
+    data: A `Tensor` with data that will be assembled in the output.
+    indices: A 1-D `Tensor` with indices into `data`. Has same rank as
+      `segment_ids`.
+    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`.
+      Values should be sorted and can be repeated.
+    name: A name for the operation (optional).
+    num_segments: An optional int32 scalar. Indicates the size of the output
+      `Tensor`.
+
+  Returns:
+    A `tensor` of the shape as data, except for dimension 0 which
+    has size `k`, the number of segments specified via `num_segments` or
+    inferred for the last element in `segments_ids`.
+  """
+  if num_segments is not None:
+    return gen_math_ops.sparse_segment_sqrt_n_with_num_segments(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        num_segments=num_segments,
+        name=name)
+  else:
+    return gen_math_ops.sparse_segment_sqrt_n(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        name=name)
+
+
 def tensordot(a, b, axes, name=None):
   r"""Tensor contraction of a and b along specified axes.
 
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index b12cf5a864..4b33aa218c 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -1842,15 +1842,15 @@ tf_module {
   }
   member_method {
     name: "sparse_segment_mean"
-    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "sparse_segment_sqrt_n"
-    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "sparse_segment_sum"
-    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "sparse_slice"
-- 
GitLab


From a5ac67990437168ce034bdd177b854269803844f Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Wed, 6 Dec 2017 12:10:05 -0800
Subject: [PATCH 0693/1225] MKL: Adding Relu implementation using the open
 source MKL-DNN (#14457)

* Adding mkl-dnn relu code

* Fixes per PR review
---
 tensorflow/core/kernels/mkl_relu_op.cc | 505 ++++++++++++++++++++++++-
 1 file changed, 484 insertions(+), 21 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 86a77d769a..45bdd0ad5c 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -28,6 +28,19 @@ limitations under the License.
 #include "mkl_dnn.h"
 #include "mkl_dnn_types.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+using mkldnn::relu_forward;
+using mkldnn::relu_backward;
+using mkldnn::eltwise_relu;
+using mkldnn::eltwise_elu;
+using mkldnn::eltwise_tanh;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -45,6 +58,8 @@ struct MklReluHelpers {
   }
 };
 
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklReluOp : public OpKernel {
  public:
@@ -59,6 +74,7 @@ class MklReluOp : public OpKernel {
     GetMklShape(context, 0, &mkl_context.input_shape);
     void* user_i = static_cast<void*>(const_cast<T*>(input.flat<T>().data()));
     bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor();
+
     if (!input_in_mkl_format && !input.dims()) {  // handle the case of a scalar
       const TensorShape& o_shape = input.shape();
       Tensor* out_tensor = nullptr;
@@ -164,6 +180,7 @@ class MklReluOp : public OpKernel {
   } MklReluOpContext;
 };
 
+
 template <typename Device, typename T>
 class MklReluGradOp : public OpKernel {
  public:
@@ -189,18 +206,18 @@ class MklReluGradOp : public OpKernel {
       const Tensor& a = MklGetInput(context, 1);
       void* buf_input = static_cast<void*>(const_cast<T*>(a.flat<T>().data()));
       void* mkl_buffer_convert = nullptr;
+
       dnnPrimitive_t cv_input_to_grad = nullptr;
 
-      // if input and grad are not in the same layout, do a conversion between
-      // them.
+      // if input and grad are not in the same layout,
+      // do a conversion between them.
       if (!dnnLayoutCompare_F32(lt_input, lt_grad)) {
         AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, lt_grad,
                        &mkl_buffer_convert);
         CHECK_EQ(dnnConversionCreate_F32(&cv_input_to_grad, lt_input,
                    lt_grad), E_SUCCESS);
         CHECK_EQ(dnnConversionExecute_F32(cv_input_to_grad, buf_input,
-                                          mkl_buffer_convert),
-                 E_SUCCESS);
+                                          mkl_buffer_convert), E_SUCCESS);
         relu_res[dnnResourceSrc] = mkl_buffer_convert;
         dnnDelete_F32(cv_input_to_grad);
       } else {
@@ -246,7 +263,6 @@ class MklReluGradOp : public OpKernel {
 };
 
 template <typename Device, typename T>
-
 void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   MklReluGradOpContext mkl_context;
   const Tensor& g = MklGetInput(context, 0);
@@ -264,20 +280,21 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
       !MklReluHelpers::ValidateSameSize(context, g, a))
     return;
   Tensor* output = nullptr;
-  if (!input_is_mkl && !grad_is_mkl &&
-      !a.dims()) {  // handle the case of a scalar
-    // Allocate space for g and
+
+  if (!input_is_mkl && !grad_is_mkl && !a.dims()) {
+    // handle the scalar case
     const TensorShape& g_shape = g.shape();
     mkl_context.output_shape.SetMklTensor(false);
     AllocateOutputSetMklShape(context, 0, &output, g_shape,
                               mkl_context.output_shape);
+
     void* out_o = static_cast<void*>(output->flat<T>().data());
     (static_cast<T*>(out_o))[0] =
         (static_cast<T*>(user_g))[0] * ((static_cast<T*>(user_i))[0] > 0);
     return;
   }
 
-  // Generate size, stride for input if input/grad is in MKL format.
+  // generate size, stride for input if input/grad is in mkl format.
   if (grad_is_mkl || input_is_mkl) {
     const MklShape* tmp_mkl_shape =
         (grad_is_mkl) ? &mkl_context.grad_shape : &mkl_context.input_shape;
@@ -308,21 +325,20 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   float negative_slope = 0.0;
   CHECK_EQ(dnnReLUCreateBackward_F32(&mkl_context.prim_relu_bwd, NULL,
                                      mkl_context.lt_grad, mkl_context.lt_grad,
-                                     negative_slope),
-           E_SUCCESS);
+                                     negative_slope), E_SUCCESS);
   Tensor mkl_tmp_input_buf_tensor;
   mkl_context.MklPrepareReluGradInputs(context, &mkl_tmp_input_buf_tensor);
 
   if (input_is_mkl ||
-      grad_is_mkl) { /*if  grad or input are MKL leave it in MKL*/
+      grad_is_mkl) { /*if  grad or input are mkl leave it in mkl*/
     TensorShape tf_shape;
     mkl_context.output_shape.SetMklTensor(true);
     mkl_context.output_shape.SetMklLayout(mkl_context.prim_relu_bwd,
                                           dnnResourceDiffSrc);
     mkl_context.output_shape.SetTfLayout(
         mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
-    // If input_is_mkl or grad_is_mkl, then we copy strides and sizes from Mkl
-    // shape of one that is in MKL layout.
+    // if input_is_mkl or grad_is_mkl, then we copy strides and sizes from mkl
+    // shape of one that is in mkl layout.
     if (grad_is_mkl == true) {
       mkl_context.output_shape.SetTfDimOrder(
           mkl_context.in_dims, mkl_context.grad_shape.GetTfToMklDimMap());
@@ -332,11 +348,9 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
     }
 
     tf_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(
-                        mkl_context.output_shape.GetMklLayout())) /
-                    sizeof(T));
+                    mkl_context.output_shape.GetMklLayout())) / sizeof(T));
     AllocateOutputSetMklShape(context, 0, &output, tf_shape,
                               mkl_context.output_shape);
-
   } else {
     const TensorShape& o_shape = g.shape();
     mkl_context.output_shape.SetMklTensor(false);
@@ -347,13 +361,430 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   mkl_context.relu_res[dnnResourceDiffSrc] =
       static_cast<void*>(output->flat<T>().data());
 
-  CHECK_EQ(dnnExecute_F32(mkl_context.prim_relu_bwd, mkl_context.relu_res),
-           E_SUCCESS);
+  CHECK_EQ(dnnExecute_F32(mkl_context.prim_relu_bwd,
+                          mkl_context.relu_res),
+                          E_SUCCESS);
   mkl_context.MklCleanup();
 }
 
-/* Register DNN kernels for supported operations and supported types - right now
- * it is only Relu and f32*/
+
+#else  // INTEL_MKL_DNN
+
+template <typename Device, typename T, algorithm alg_kind>
+class MklReluOpBase : public OpKernel {
+ public:
+  ~MklReluOpBase() {}
+
+  explicit MklReluOpBase(OpKernelConstruction* context) : OpKernel(context) {
+  }
+
+  virtual void Compute_Scalar(OpKernelContext* context) = 0;
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const size_t src_index = 0;  // index of src input tensor
+      const size_t dst_index = 0;  // index of dst output tensor
+      const Tensor& src_tensor = MklGetInput(context, src_index);
+      MklDnnShape dnn_shape_src;
+      GetMklShape(context, src_index, &dnn_shape_src);
+
+      Tensor* dst_tensor = nullptr;
+      if (src_tensor.dims() == 0) {
+        Compute_Scalar(context);
+        return;
+      }
+
+      // Create relu primitive.
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      // Set DNN primitive - src
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor()) {
+        src_md = dnn_shape_src.GetMklLayout();
+      } else {
+        auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
+        auto src_strides = CalculateTFStrides(src_dims);
+        // Create blocked memory descriptor
+        src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+
+      T alpha = 0, beta = 0;
+      std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
+      auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
+          // Operator memory descriptor is same as user memory descriptor.
+                                              alg_kind, src.GetUsrMemDesc(),
+                                              alpha, beta);
+      relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
+                                                         cpu_engine));
+
+      // allocate dst tensor
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = relu_fwd_pd->dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(),
+                                  dnn_shape_src.GetSizesAsMklDnnDims(),
+                                  dnn_shape_src.GetTfDataFormat());
+        tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, dst_index, &dst_tensor, tf_shape_dst,
+                                dnn_shape_dst);
+
+      // Destination memory descriptor is same as source memory descriptor.
+      auto dst_md = src_md;
+      dst.SetUsrMem(dst_md, dst_tensor);
+
+      // execute net
+      std::vector<primitive> net;
+      auto relu_fwd = relu_forward(*relu_fwd_pd, src.GetOpMem(),
+                                   dst.GetOpMem());
+      net.push_back(relu_fwd);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + string(e.message) +
+                         ", in file " + string(__FILE__) + ":" +
+                         std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                        error_msg));
+    }
+  }
+};
+
+
+template <typename Device, typename T, algorithm alg_kind>
+class MklReluGradOpBase : public OpKernel {
+ public:
+  ~MklReluGradOpBase() {}
+
+  explicit MklReluGradOpBase(OpKernelConstruction* context) :
+    OpKernel(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) = 0;
+
+  void Compute(OpKernelContext* context)  {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> diff_dst(&cpu_engine);
+      MklDnnData<T> diff_src(&cpu_engine);
+
+      const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+      const size_t src_index = 1;       // index of src input tensor
+      const size_t diff_src_index = 0;  // index of diff_src output tensor
+
+      const Tensor& src_tensor      = MklGetInput(context, src_index);
+      const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+      Tensor* diff_src_tensor       = nullptr;
+
+      MklDnnShape dnn_shape_src, dnn_shape_diff_dst;
+      GetMklShape(context, src_index, &dnn_shape_src);
+      GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+      int src_dims_size = src_tensor.dims();
+      if (src_dims_size == 0) {
+        Compute_Scalar(context);
+        return;
+      }
+
+      // Set DNN primitives for src & diff_dst
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
+        if (dnn_shape_diff_dst.IsMklTensor()) {
+          diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
+          src_md = diff_dst_md;
+        } else {
+          src_md = dnn_shape_src.GetMklLayout();
+          diff_dst_md = src_md;
+        }
+      } else {
+        auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
+        auto src_strides = CalculateTFStrides(src_dims);
+        src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
+        diff_dst_md = src_md;
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+      diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
+
+      T alpha = 0, beta = 0;
+      std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
+      auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
+                                              alg_kind, src_md, alpha, beta);
+      relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
+                                                         cpu_engine));
+      auto relu_bwd_desc = relu_backward::desc(alg_kind, diff_dst_md, src_md,
+                                                alpha, beta);
+      auto relu_bwd_pd  = relu_backward::primitive_desc(relu_bwd_desc,
+                                                cpu_engine, *relu_fwd_pd);
+
+      // allocate diff_src tensor
+      MklDnnShape dnn_shape_diff_src;
+      TensorShape tf_shape_diff_src;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_diff_src.SetMklTensor(true);
+        auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc();
+        dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
+        dnn_shape_diff_src.SetElemType(MklDnnType<T>());
+        dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(),
+                                       dnn_shape_src.GetSizesAsMklDnnDims(),
+                                       dnn_shape_src.GetTfDataFormat());
+        tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_diff_src.SetMklTensor(false);
+        tf_shape_diff_src = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                                 tf_shape_diff_src, dnn_shape_diff_src);
+
+      // diff_src memory descriptor is same as diff_dst memory descriptor.
+      auto diff_src_md = diff_dst_md;
+      diff_src.SetUsrMem(diff_src_md, diff_src_tensor);
+
+      PrepareAndExecuteNet(relu_bwd_pd, &src, &diff_src, &diff_dst);
+     } catch (mkldnn::error &e) {
+       string error_msg = "Status: " + std::to_string(e.status) +
+                          ", message: " + string(e.message) +
+                          ", in file " + string(__FILE__) + ":" +
+                          std::to_string(__LINE__);
+       OP_REQUIRES_OK(context,
+                      errors::Aborted("Operation received an exception:",
+                                      error_msg));
+    }
+  }
+
+  void PrepareAndExecuteNet(const relu_backward::primitive_desc& relu_prim_desc,
+                  MklDnnData<T>* src, MklDnnData<T>* diff_src, MklDnnData<T>*
+                  diff_dst) {
+    std::vector<primitive> net;
+    net.push_back(relu_backward(relu_prim_desc, src->GetOpMem(),
+                                diff_dst->GetOpMem(), diff_src->GetOpMem()));
+    stream(stream::kind::eager).submit(net).wait();
+  }
+};
+
+
+template <typename Device, typename T>
+class MklReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
+ public:
+  ~MklReluOp() {}
+
+  explicit MklReluOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_relu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    (static_cast<T*>(out_o))[0] =
+              std::max((static_cast<T*>(user_i))[0], static_cast<T>(0));
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
+ public:
+  ~MklReluGradOp() {}
+
+  explicit MklReluGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_relu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    int src_dims_size = src_tensor.dims();
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] *
+                                  ((static_cast<T*>(user_i))[0] > 0);
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklEluOp : public MklReluOpBase<Device, T, eltwise_elu> {
+ public:
+  ~MklEluOp() {}
+
+  explicit MklEluOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_elu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    // return exp(feature) - 1 if feature > 0; feature otherwise
+    T feature = (static_cast<T*>(user_i))[0];
+    if (feature < 0)
+      (static_cast<T*>(out_o))[0] = std::exp(feature);
+    else
+      (static_cast<T*>(out_o))[0] = feature;
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklEluGradOp : public MklReluGradOpBase<Device, T, eltwise_elu> {
+ public:
+  ~MklEluGradOp() {}
+
+  explicit MklEluGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_elu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    int src_dims_size = src_tensor.dims();
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    // gradient of elu(x) = 1 if x > 0; elu(x) + 1 otherwise
+    T feature = (static_cast<T*>(user_i))[0];
+    if (feature > 0) {
+      (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0];
+    } else {
+      T elu = std::exp(feature) - 1;
+      (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] * (elu + 1);
+    }
+  }
+};
+
+template <typename Device, typename T>
+class MklTanhOp : public MklReluOpBase<Device, T, eltwise_tanh> {
+ public:
+  ~MklTanhOp() {}
+
+  explicit MklTanhOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_tanh>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    // tanh(x) = (e^x - e^(-x))/ (e^x + e^(-x))
+    T feature = (static_cast<T*>(user_i))[0];
+    T e1 = std::exp(feature);
+    T e2 = std::exp(-feature);
+    (static_cast<T*>(out_o))[0] = (e1 - e2)/(e1 + e2);
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklTanhGradOp : public MklReluGradOpBase<Device, T, eltwise_tanh> {
+ public:
+  ~MklTanhGradOp() {}
+
+  explicit MklTanhGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_tanh>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    int src_dims_size = src_tensor.dims();
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    // gradient of tanh(x) = 1 - tanh(x)^2
+    T feature = (static_cast<T*>(user_i))[0];
+    T e1 = std::exp(feature);
+    T e2 = std::exp(-feature);
+    T tanh = (e1 - e2)/(e1 + e2);
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] *
+                                  (1 - tanh * tanh);
+  }
+};
+
+#endif
+
+// register dnn kernels for supported operations and supported types
 #define REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES(type)             \
   REGISTER_KERNEL_BUILDER(Name("_MklRelu")                          \
                               .Device(DEVICE_CPU)                   \
@@ -367,6 +798,38 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
                           MklReluGradOp<CPUDevice, type>);
 TF_CALL_float(REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES);
 
+#ifdef INTEL_MKL_DNN
+
+// register dnn kernels for supported operations and supported types
+#define REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES(type)             \
+  REGISTER_KERNEL_BUILDER(Name("_MklElu")                          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklEluOp<CPUDevice, type>);              \
+  REGISTER_KERNEL_BUILDER(Name("_MklEluGrad")                      \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklEluGradOp<CPUDevice, type>);
+TF_CALL_float(REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES);
+
+#define REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES(type)             \
+  REGISTER_KERNEL_BUILDER(Name("_MklTanh")                          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklTanhOp<CPUDevice, type>);              \
+  REGISTER_KERNEL_BUILDER(Name("_MklTanhGrad")                      \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklTanhGradOp<CPUDevice, type>);
+TF_CALL_float(REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES);
+
+#endif
+
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
+
-- 
GitLab


From d231eb95999426bebae03b1c1452ae14793ccf88 Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Wed, 6 Dec 2017 12:11:06 -0800
Subject: [PATCH 0694/1225] Adding MKL-DNN graph pass implementation (#14763)

---
 tensorflow/core/graph/mkl_graph_util.h        |    9 +-
 tensorflow/core/graph/mkl_layout_pass.cc      | 2083 +++++++++++++++++
 tensorflow/core/graph/mkl_layout_pass_test.cc | 1624 +++++++++++++
 .../core/kernels/mkl_input_conversion_op.cc   |  217 +-
 tensorflow/core/ops/nn_ops.cc                 |  173 ++
 tensorflow/core/util/mkl_util.h               |  313 ++-
 6 files changed, 4348 insertions(+), 71 deletions(-)

diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index 880e4e712e..9f505e6bee 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -76,12 +76,12 @@ namespace tensorflow {
 namespace mkl_op_registry {
   static const char* kMklOpLabel = "MklOp";
   static const char* kMklOpLabelPattern = "label='MklOp'";
+  // Prefix that we add to Tensorflow op name to construct Mkl op name.
+  static const char* const kMklOpPrefix = "_Mkl";
 
   // Get the name of Mkl op from original TensorFlow op
   // We prefix 'Mkl' to the original op to get Mkl op.
   inline string GetMklOpName(const string& name) {
-    // Prefix that we add to Tensorflow op name to construct Mkl op name.
-    const char* const kMklOpPrefix = "_Mkl";
     return string(kMklOpPrefix) + name;
   }
 
@@ -94,9 +94,6 @@ namespace mkl_op_registry {
     string kernel = KernelsRegisteredForOp(op_name);
     bool result =
         kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
-    if (result) {
-      VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
-    }
     return result;
   }
 
@@ -119,8 +116,6 @@ namespace mkl_op_registry {
                     0 == op_name.compare(GetMklOpName("Maximum")) ||
                     0 == op_name.compare(GetMklOpName("SquaredDifference")));
 
-    VLOG(1) << "mkl_op_registry::" << op_name
-            << " is elementwise MKL op: " << result;
     return result;
   }
 }  // namespace mkl_op_registry
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 912075aa28..3beca1e5d2 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -42,6 +42,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+#ifndef INTEL_MKL_DNN
+
 // This pass implements rewriting of graph to support following scenarios:
 // (A) Merging nodes in the graph
 // (B) Rewriting a node in the graph to a new node
@@ -2213,6 +2215,2087 @@ Status MklLayoutRewritePass::Run(
   return Status::OK();
 }
 
+#else  // INTEL_MKL_DNN
+
+// This pass implements rewriting of graph to support following scenarios:
+// (A) Merging nodes in the graph
+// (B) Rewriting a node in the graph to a new node
+//     Rewrite happens under following scenario:
+//     - Propagating Mkl layout as an additional output tensor
+//        (we will loosely call a tensor that carries Mkl layout as Mkl tensor
+//         henceforth.) from every Mkl supported NN layer.
+//
+// Example of A : Merging nodes in the graph
+// -----------------------------------------
+// Currently, we merge Conv2D+AddBias together. Consider Conv2D and BiasAdd as:
+//
+//           O = Conv2D(A, B)
+//           P = BiasAdd(O, C)
+//
+// We merge them into Conv2DWithBias as:
+//           P = _MklConv2DWithBias(A, A_m, B, B_m, C, C_m)
+//
+// The meaning of A_m, B_m and C_m is explained in B.1.
+//
+// Merge rules:
+//  - The merge for Conv2D and BiasAdd happens when the output of Conv2D _only_
+//    goes to BiasAdd.
+//  - Also, the intersection of attributes of both the nodes must have same
+//    values.
+//  - Both the nodes must have been assigned to same device (if any).
+//
+// Example of B.1 : Rewriting nodes to Mkl nodes
+// ---------------------------------------------
+// Consider a Relu node. Current definition of Relu node looks like:
+//
+//           O = Relu(A)
+//
+// Relu has 1 input (A), and 1 output (O).
+//
+// This rewrite pass will generate a new graph node for Relu (new node is
+// called MklRelu) as:
+//
+//          O, O_m = MklRelu(A, A_m)
+//
+// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m). Here input A is
+// same as input A of Relu; output O is same as output O of Relu. O_m is the
+// additional output tensor that will be set by MklRelu, and it represents
+// Mkl tensor corresponding to O -- in other words, O_m is some kind of
+// metadata for O. A_m is additional input of Relu, and it represents metadata
+// for A - as O_m is metadata for O, A_m is metadata for A. MklRelu receives
+// this metadata from previous node in the graph.
+//
+// When a previous node in the graph is an Mkl node, A_m will represent a valid
+// Mkl tensor. But when a previous node is not an Mkl node, A_m will represent
+// a dummy Mkl tensor.
+//
+// Rewriting rules:
+//  - Selection of a node for rewriting happens by registering the op type of
+//    the node with the rewriting pass. If the op type is not registered, then
+//    all nodes of this op type will not be rewritten.
+//  - Number of inputs after rewriting:
+//      Since for every input Tensorflow tensor, the rewritten node gets Mkl
+//      tensor(s), rewritten node gets 2*N inputs, where N is the number of
+//      inputs for the original node.
+//  - Number of outputs after rewriting:
+//      Since for every output Tensorflow tensor, the rewritten node generates
+//      Mkl tensor(s), the rewritten node generates 2*N outputs, where N is the
+//      number of outputs of the original node.
+//  - Ordering of Tensorflow tensors and Mkl tensors:
+//      Since every rewritten node generates twice the number of inputs and
+//      outputs, one could imagine various orderings among Tensorflow tensors
+//      and Mkl tensors. E.g., assume an op 'Conv2D' that takes (A, B) as
+//      inputs, then the new op '_MklConv2D' can take inputs A, B, A_m and B_m
+//      in A, A_m, B, B_m order or it can also take them in A, B, A_m, B_m
+//      order. Among N inputs one can get N! permutations.
+//
+//      So the question is: which order do we follow? We support 2 types of
+//      orderings: (1) interleaved, and (2) contiguous. Interleaved ordering
+//      follows an intuitive order where an Mkl tensor follows the
+//      corresponding Tensorflow tensor immediately. In the context of the
+//      above example, it will be: A, A_m, B, B_m. Note that the ordering rule
+//      applies to both the inputs and outputs. Contiguous ordering means
+//      all the Tensorflow tensors are contiguous followed by all the Mkl
+//      tensors. We use contiguous ordering as default.
+//
+// Graph rewrite algorithm:
+//      Algorithm: Graph Rewrite
+//      Input: Graph G, Names of the nodes to rewrite and their new names
+//      Output: Modified Graph G' if the nodes are modified, G otherwise.
+//      Start:
+//        N = Topological_Sort(G) // N is a set of nodes in toposort order.
+//        foreach node n in N
+//        do
+//          if (Is_MKL_Op(n))  // Can this node accept an Mkl layout as input.
+//          then
+//            E = set of <incoming edge and its src_output slot> of n
+//            E' = {}   // a new set of edges for rewritten node
+//            foreach <e,s> in E
+//            do
+//              E' U {<e,s>}  // First copy edge which generates Tensorflow
+//                            // tensor as it is
+//              m = Source node of edge e
+//              if Is_Rewritten(m)  // Did we rewrite this node in this pass?
+//              then
+//                E' U {<m,s+1>}    // If yes, then m will generate an Mkl
+//                                  // tensor as an additional output.
+//              else
+//                d = Generate_Dummy_Mkl_Tensor()  // If not, generate a dummy
+//                                                 // Mkl tensor.
+//                E' U {<d,0>}  // The dummy Mkl tensor has only 1 output slot.
+//              fi
+//            done
+//            n' = Build_New_Node(G,new_name,E')
+//            Mark_Rewritten(n')  // Mark the new node as being rewritten.
+//          fi
+//        done
+//
+//      Explanation:
+//        For graph rewrite, we visit nodes of the input graph in the
+//        topological sort order. With this ordering, we visit nodes in the
+//        top-to-bottom fashion. We need this order because while visiting a
+//        node we want that all of its input nodes are visited and rewritten if
+//        applicable. This is because if we need to rewrite a given node
+//        then all of its input nodes need to be fixed (in other words they
+//        cannot be deleted later.)
+//
+//        While visiting a node, we first check if the op type of the node is
+//        an Mkl op. If it is, then we rewrite that node after constructing
+//        new inputs to the node. If the op type of the node is not Mkl op,
+//        then we do not rewrite that node.
+//
+// Handling workspace propagation for certain ops:
+//
+//        Certain backward ops in MKL (MaxPool, LRN and BatchNorm) require
+//        passing of a workspace from their respective forward ops. Workspace
+//        tensors provide memory for storing results of intermediate operations
+//        which are helpful in backward propagation. TensorFlow does not have
+//        a notion of a workspace and as a result does not allow producing
+//        additional outputs from these forward ops. For these ops, we need
+//        to add 2 extra edges between forward ops and their corresponding
+//        backward ops - the first extra edge carries a workspace tensor and
+//        the second one carries an Mkl tensor for the workspace tensor.
+//
+//        Example:
+//
+//        Typical graph for MaxPool and its gradient looks like:
+//
+//        A = MaxPool(T)
+//        B = MaxPoolGrad(X, A, Y)
+//
+//        We will transform this graph to propagate the workspace as:
+//        (with the contiguous ordering)
+//
+//        A, W, A_m, W_m = MklMaxPool(T, T_m)
+//        B, B_m = MklMaxPoolGrad(X, A, Y, W, X_m, A_m, Y_m, W_m)
+//
+//        Here W is the workspace tensor. Transformed tensor names with the
+//        suffix _m are Mkl tensors, and this transformation has been done
+//        using the algorithm discussed earlier. The transformation for
+//        workspace propagation only adds extra outputs (W, W_m) for a forward
+//        op and connects them to the corresponding backward ops.
+//
+//        Terms:
+//
+//        Forward op name = name of the op in the forward pass
+//          where a workspace tensor originates (MaxPool in this example)
+//        Backward op name = name of the op in the backward pass that receives
+//          a workspace tensor from the forward op (MaxPoolGrad in the example)
+//        Slot = Position of the output or input slot that will be
+//               used by the workspace tensor (1 for MklMaxPool as W is the 2nd
+//               output of MaxPool (0 is 1st); 3 for MklMaxPoolGrad)
+//
+//        Question:
+//
+//        How do we associate a backward op to a forward op? There can be more
+//        than one op with the exact same name.
+//
+//        In this example, we associate MaxPoolGrad with MaxPool. But there
+//        could be more than one MaxPool ops. To solve this problem, we look
+//        for _direct_ edge between a forward op and a backward op (tensor A is
+//        flowing along this edge in the example).
+//
+//        How do we transform forward and backward ops when there is no direct
+//        edge between them? In such a case, we generate dummy tensors for
+//        workspace tensors. For the example, transformation of MaxPool will
+//        be exactly same as it would be when there is a direct edge between
+//        the forward and the backward op --- it is just that MaxPool won't
+//        generate any workspace tensor. For MaxPoolGrad, the transformation
+//        will also be same, but instead of connecting W and W_m with the
+//        outputs of MaxPool, we will produce dummy tensors for them, and we
+//        will set workspace_enabled attribute to false.
+//
+class MklLayoutRewritePass : public GraphOptimizationPass {
+ public:
+  MklLayoutRewritePass() {
+    // NOTE: names are alphabetically sorted.
+    csinfo_.addn = "AddN";
+    csinfo_.avg_pool = "AvgPool";
+    csinfo_.avg_pool_grad = "AvgPoolGrad";
+    csinfo_.bias_add = "BiasAdd";
+    csinfo_.bias_add_grad = "BiasAddGrad";
+    csinfo_.concat = "Concat";
+    csinfo_.concatv2 = "ConcatV2";
+    csinfo_.conv2d = "Conv2D";
+    csinfo_.conv2d_with_bias = "__MklDummyConv2DWithBias";
+    csinfo_.conv2d_grad_input = "Conv2DBackpropInput";
+    csinfo_.conv2d_grad_filter = "Conv2DBackpropFilter";
+    csinfo_.conv2d_grad_filter_with_bias =
+                              "__MklDummyConv2DBackpropFilterWithBias";
+    csinfo_.fused_batch_norm = "FusedBatchNorm";
+    csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad";
+    csinfo_.identity = "Identity";
+    csinfo_.lrn = "LRN";
+    csinfo_.lrn_grad = "LRNGrad";
+    csinfo_.matmul = "MatMul";
+    csinfo_.max_pool = "MaxPool";
+    csinfo_.max_pool_grad = "MaxPoolGrad";
+    csinfo_.mkl_conv2d = "_MklConv2D";
+    csinfo_.mkl_conv2d_grad_input = "_MklConv2DBackpropInput";
+    csinfo_.mkl_conv2d_grad_filter = "_MklConv2DBackpropFilter";
+    csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias";
+    csinfo_.mkl_conv2d_grad_filter_with_bias =
+                                   "_MklConv2DBackpropFilterWithBias";
+    csinfo_.relu = "Relu";
+    csinfo_.relu_grad = "ReluGrad";
+    csinfo_.tanh       = "Tanh";
+    csinfo_.tanh_grad  = "TanhGrad";
+    csinfo_.reshape = "Reshape";
+    csinfo_.softmax = "Softmax";
+    csinfo_.split = "Split";
+    // Element-wise ops. Ensure you also add any new ops to IsOpElementWise
+    // in the MklUtil.h (IsMklElementWiseOp method) to ensure that the
+    // MklInputConversion op is added before it.
+    csinfo_.add = "Add";
+    csinfo_.maximum = "Maximum";
+    csinfo_.mul = "Mul";
+    csinfo_.squared_difference = "SquaredDifference";
+    csinfo_.sub = "Sub";
+    // End - element-wise ops. See note above.
+
+    // NOTE: names are alphabetically sorted.
+    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
+                      CopyAttrsAddN, AddNRewrite});
+    rinfo_.push_back({csinfo_.add,
+                      mkl_op_registry::GetMklOpName(csinfo_.add),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.avg_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.avg_pool_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.concat,
+                      mkl_op_registry::GetMklOpName(csinfo_.concat),
+                      CopyAttrsConcat, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.concatv2,
+                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
+                      CopyAttrsConcatV2, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_with_bias,
+                      csinfo_.mkl_conv2d_with_bias,
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_filter,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_filter_with_bias,
+                      csinfo_.mkl_conv2d_grad_filter_with_bias,
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_input,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.fused_batch_norm,
+                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
+                      CopyAttrsFusedBatchNorm, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.fused_batch_norm_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
+                      CopyAttrsFusedBatchNorm, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.identity,
+                      mkl_op_registry::GetMklOpName(csinfo_.identity),
+                      CopyAttrsDataType, AlwaysRewrite});
+    /*
+    rinfo_.push_back({csinfo_.lrn,
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn),
+                      CopyAttrsLRN, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.lrn_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
+                      CopyAttrsLRN, AlwaysRewrite});
+    */
+    rinfo_.push_back({csinfo_.max_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
+                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
+    rinfo_.push_back({csinfo_.max_pool_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.maximum,
+                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.mul,
+                      mkl_op_registry::GetMklOpName(csinfo_.mul),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.relu,
+                      mkl_op_registry::GetMklOpName(csinfo_.relu),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.relu_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.tanh,
+                      mkl_op_registry::GetMklOpName(csinfo_.tanh),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.tanh_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.tanh_grad),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.reshape,
+                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
+                      CopyAttrsReshape, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.softmax,
+                      mkl_op_registry::GetMklOpName(csinfo_.softmax),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.squared_difference,
+                      mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.sub,
+                      mkl_op_registry::GetMklOpName(csinfo_.sub),
+                      CopyAttrsDataType, AlwaysRewrite});
+
+    // Add info about which ops to add workspace edge to and the slots.
+    wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
+    wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3});
+
+    // Add a rule for merging nodes
+    minfo_.push_back({csinfo_.conv2d, csinfo_.bias_add,
+                      csinfo_.conv2d_with_bias,
+                      GetConv2DOrBiasAdd});
+
+    minfo_.push_back({csinfo_.conv2d_grad_filter, csinfo_.bias_add_grad,
+                      csinfo_.conv2d_grad_filter_with_bias,
+                      GetConv2DBackpropFilterOrBiasAddGrad});
+  }
+
+  // Standard interface to run pass
+  Status Run(const GraphOptimizationPassOptions& options);
+
+  // Helper function which does most of heavy lifting for rewriting
+  // Mkl nodes to propagate Mkl tensor as additional output
+  //
+  // Extracts common functionality between Run public interface and
+  // test interface.
+  //
+  // @return true, if and only if graph is mutated; false otherwise.
+  bool RunPass(std::unique_ptr<Graph>* g);
+
+  /// Structure to specify the name of an original node, its new name after
+  /// rewrite, the number of inputs to the original node, the function to
+  /// be used to copy attributes for the op, and the rule (if any) which
+  /// must hold for rewriting the node
+  typedef struct {
+    string name;      // Original name of op of the node in the graph
+    string new_name;  // New name of the op of the node in the graph
+    // A function handler to copy attributes from an old node to a new node.
+    std::function<void(const Node*, NodeBuilder*)> copy_attrs;
+    // A rule under which to rewrite this node
+    std::function<bool(const Node*)> rewrite_rule;
+  } RewriteInfo;
+
+  /// Structure to specify a forward op, a backward op, and the slot numbers
+  /// in the forward and backward ops where we will add a workspace edge.
+  typedef struct {
+    string fwd_op;    // Name of a forward op in the graph
+    string bwd_op;    // Name of a backward op in the graph
+    int fwd_slot;     // Output slot in the forward op node where actual
+                      // output tensor resides
+    int bwd_slot;     // Input slot in the backward op node where actual
+                      // input tensor resides
+    int ws_fwd_slot;  // Output slot in the forward op node where workspace
+                      // edge is added
+    int ws_bwd_slot;  // Input slot in the backward op node where workspace
+                      // edge is added
+  } WorkSpaceInfo;
+
+  /// Structure to specify information used in node merge of 2 operators
+  typedef struct {
+    string op1;       // Node string for one operator.
+    string op2;       // Node string for second operator.
+    string new_node;  // Name of the node after merge
+    // Function that enables user of the node merger to specify how to find
+    // second operator given the first operator.
+    std::function<Node*(const Node*)> get_node_to_be_merged;
+  } MergeInfo;
+
+  /// Structure to store all constant strings
+  /// NOTE: names are alphabetically sorted.
+  typedef struct {
+    string addn;
+    string add;
+    string avg_pool;
+    string avg_pool_grad;
+    string bias_add;
+    string bias_add_grad;
+    string concat;
+    string concatv2;
+    string conv2d;
+    string conv2d_with_bias;
+    string conv2d_grad_input;
+    string conv2d_grad_filter;
+    string conv2d_grad_filter_with_bias;
+    string fused_batch_norm;
+    string fused_batch_norm_grad;
+    string identity;
+    string lrn;
+    string lrn_grad;
+    string matmul;
+    string max_pool;
+    string max_pool_grad;
+    string maximum;
+    string mkl_conv2d;
+    string mkl_conv2d_grad_input;
+    string mkl_conv2d_grad_filter;
+    string mkl_conv2d_grad_filter_with_bias;
+    string mkl_conv2d_with_bias;
+    string mul;
+    string relu;
+    string relu_grad;
+    string tanh;
+    string tanh_grad;
+    string reshape;
+    string softmax;
+    string split;
+    string squared_difference;
+    string sub;
+  } ConstStringsInfo;
+
+ private:
+  /// Maintain info about nodes to rewrite
+  std::vector<RewriteInfo> rinfo_;
+
+  /// Maintain info about nodes to add workspace edge
+  std::vector<WorkSpaceInfo> wsinfo_;
+
+  /// Maintain info about nodes to be merged
+  std::vector<MergeInfo> minfo_;
+
+  /// Maintain structure of constant strings
+  static ConstStringsInfo csinfo_;
+
+ private:
+  // Is OpDef::ArgDef a list type? It could be N * T or list(type).
+  // Refer to opdef.proto for details of list type.
+  inline bool ArgIsList(const OpDef::ArgDef& arg) const {
+    return !arg.type_list_attr().empty() || !arg.number_attr().empty();
+  }
+
+  // Get length of a list in 'n' if 'arg' is of list type. Refer to
+  // description of ArgIsList for definition of list type.
+  inline int GetTensorListLength(const OpDef::ArgDef& arg, Node* n) {
+    CHECK_EQ(ArgIsList(arg), true);
+    int N = 0;
+    const string attr_name = !arg.type_list_attr().empty()
+                                 ? arg.type_list_attr()
+                                 : arg.number_attr();
+    if (!arg.type_list_attr().empty()) {
+      std::vector<DataType> value;
+      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &value));
+      N = value.size();
+    } else {
+      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &N));
+    }
+    return N;
+  }
+
+  // Can op represented by node 'n' run on DEVICE_CPU?
+  // Op can run on CPU with MKL if the runtime assigned device or the
+  // user requested device contains device CPU, or both are empty.
+  bool CanOpRunOnCPUDevice(const Node* n) {
+    bool result = true;
+    string reason;
+
+    // Substring that should be checked for in device name for CPU device.
+    const char* const kCPUDeviceSubStr = "CPU";
+
+    // If Op has been specifically assigned to a non-CPU device, then No.
+    if (!n->assigned_device_name().empty() &&
+        !StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
+      result = false;
+      reason = "Op has been assigned a runtime device that is not CPU.";
+    }
+
+    // If user has specifically assigned this op to a non-CPU device, then No.
+    if (!n->def().device().empty() &&
+        !StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
+      result = false;
+      reason = "User has assigned a device that is not CPU.";
+    }
+
+    if (result == false) {
+      VLOG(1) << "MklLayoutRewritePass: Skipping rewriting of the node "
+              << n->type_string() << ", reason: " << reason;
+    }
+
+    // Otherwise Yes.
+    return result;
+  }
+
+  // Return a node that can be merged with input node 'n'
+  //
+  // @return pointer to the node if we can find such a
+  // node. Otherwise, it returns nullptr.
+  Node* CheckForNodeMerge(const Node* n) const;
+
+  // Merge node 'm' with node 'n'.
+  // Currently, we merge (1) Conv2D with BiasAdd, and (2) BiasAddGrad with
+  // Conv2DBackpropFilter.
+  //
+  // Input nodes m and n may be deleted if the call to
+  // this function is successful. Attempt to use the pointers
+  // after the call to function may result in undefined behaviors.
+  //
+  // @input g - input graph, m - graph node, n - graph node to be merged with m
+  // @return Status::OK(), if merging is successful and supported.
+  //         Returns appropriate Status error code otherwise.
+  //         Graph is updated in case nodes are merged. Otherwise, it is
+  //         not updated.
+  Status MergeNode(std::unique_ptr<Graph>* g, Node* m, Node* n);
+
+  // Helper function to merge different nodes
+  Status MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g, Node* m, Node* n);
+  Status MergeConv2DBackpropFilterWithBiasAddGrad(std::unique_ptr<Graph>* g,
+                                                  Node* m, Node* n);
+
+  // Find BiasAdd or Conv2D node that can be merged with input node 'm'.
+  // If input 'm' is BiasAdd, then check if there exists Conv2D node that can be
+  // merged with 'm'. If input 'm' is Conv2D, then check if there exists BiasAdd
+  // node that can be merged with 'm'.
+  static Node* GetConv2DOrBiasAdd(const Node* m) {
+    CHECK_NOTNULL(m);
+    Node* n = nullptr;
+
+    if (m->type_string() == csinfo_.bias_add) {
+      // If a is BiasAdd, then Conv2D is 0th input of BiasAdd.
+      TF_CHECK_OK(m->input_node(0, &n));
+    } else {
+      CHECK_EQ(m->type_string(), csinfo_.conv2d);
+      // Go over all output edges and search for BiasAdd Node.
+      // 0th input of BiasAdd is Conv2D.
+      for (const Edge* e : m->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.bias_add &&
+            e->dst_input() == 0) {
+          n = e->dst();
+          break;
+        }
+      }
+    }
+
+    if (n == nullptr) {
+      VLOG(1) << "MklLayoutRewritePass: Could not find matching "
+              << "Conv2D and BiasAdd node for merging. Input node: "
+              << m->DebugString();
+    }
+
+    return n;
+  }
+
+  // Find Conv2DBackpropFilter or BiasAddGrad node that can be merged with input
+  // node 'm'. If input 'm' is Conv2DBackpropFilter, then check if there exists
+  // BiasAddGrad node that can be merged with 'm'. If input 'm' is BiasAddGrad,
+  // then check if there exists Conv2DBackpropFilter node that can be merged
+  // with 'm'.
+  //
+  // Graph that will allow us to connect Conv2DBackpropFilter with BiasAddGrad
+  // would look like:
+  //
+  // _ = Conv2DBackpropFilter(F, _, G)
+  // _ = BiasAddGrad(G)
+  //
+  // So 1st input of BiasAddGrad connects with 3rd input of
+  // Conv2DBackpropFilter and vice versa.
+  static Node* GetConv2DBackpropFilterOrBiasAddGrad(const Node* m) {
+    CHECK_NOTNULL(m);
+    Node* n = nullptr;
+
+    if (m->type_string() == csinfo_.bias_add_grad) {
+      // Get 1st input 'g' of BiasAddGrad.
+      Node* g = nullptr;
+      TF_CHECK_OK(m->input_node(0, &g));
+      // Now traverse all outgoing edges from g that have destination node as
+      // Conv2DBackpropFilter.
+      for (const Edge* e : g->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.conv2d_grad_filter &&
+            e->dst_input() == 2 /* 3rd input of BackpropFilter */) {
+          n = e->dst();
+          break;
+        }
+      }
+    } else {
+      CHECK_EQ(m->type_string(), csinfo_.conv2d_grad_filter);
+      // Get 3rd input 'g' of Conv2DBackpropFilter.
+      Node* g = nullptr;
+      TF_CHECK_OK(m->input_node(2, &g));
+      // Now traverse all outgoing edges from g that have destination node as
+      // BiasAddGrad.
+      for (const Edge* e : g->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.bias_add_grad &&
+            e->dst_input() == 0 /* 1st input of BiasAddGrad */) {
+          n = e->dst();
+          break;
+        }
+      }
+    }
+
+    if (n == nullptr) {
+      VLOG(1) << "MklLayoutRewritePass: Could not find matching "
+              << "Conv2DBackpropFilter and BiasAddGrad node for merging. "
+              << "Input node: " << m->DebugString();
+    }
+    return n;
+  }
+
+  // Check if the node 'n' has any applicable rewrite rule
+  // We check for 2 scenarios for rewrite.
+  //
+  // @return RewriteInfo* for the applicable rewrite rule
+  const RewriteInfo* CheckForNodeRewrite(const Node* n) const;
+
+  // Default rewrite rule to be used in scenario 1 for rewrite.
+  // @return - true (since we want to always rewrite)
+  static bool AlwaysRewrite(const Node* n) {
+    return true;
+  }
+
+  // Check if we are performing pooling on depth or batch. If it is, then we
+  // do not rewrite MaxPool node to Mkl version.
+  // @return - true (if it is not a depth/batch wise pooling case);
+  //           false otherwise.
+  static bool NonDepthBatchWisePoolRewrite(const Node* n) {
+    CHECK_NOTNULL(n);
+
+    string data_format_str;
+    TensorFormat data_format;
+    std::vector<int32> ksize, strides;
+    CHECK_EQ(GetNodeAttr(n->def(), "ksize", &ksize).ok(), true);
+    CHECK_EQ(GetNodeAttr(n->def(), "strides", &strides).ok(), true);
+    CHECK_EQ(GetNodeAttr(n->def(), "data_format", &data_format_str).ok(),
+             true);
+    CHECK_EQ(FormatFromString(data_format_str, &data_format), true);
+
+    // Condition that specifies non-batch-wise and non-depth-wise pooling.
+    if (GetTensorDim(ksize,   data_format, 'N') == 1 &&
+        GetTensorDim(strides, data_format, 'N') == 1 &&
+        GetTensorDim(ksize,   data_format, 'C') == 1 &&
+        GetTensorDim(strides, data_format, 'C') == 1) {
+      return true;
+    }
+
+    return false;
+  }
+
+  static bool AddNRewrite(const Node* n) {
+    CHECK_NOTNULL(n);
+
+    int num;
+    CHECK_EQ(GetNodeAttr(n->def(), "N", &num).ok(), true);
+
+    // Condition that specifies non-batch-wise and non-depth-wise pooling.
+    if (num == 2) {
+      return true;
+    }
+
+    return false;
+  }
+
+  // Rewrites input node to a new node specified by its matching rewrite info.
+  //
+  // Method first searches matching rewrite info for input node and then
+  // uses that info to rewrite.
+  //
+  // Input node may be deleted in case of rewrite. Attempt to use the node
+  // after the call can result in undefined behaviors.
+  //
+  // @input  g - input graph, n - Node to be rewritten,
+  //         ri - matching rewriteinfo
+  // @return Status::OK(), if the input node is rewritten;
+  //         Returns appropriate Status error code otherwise.
+  //         Graph is updated in case the input node is rewritten.
+  //         Otherwise, it is not updated.
+  Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const RewriteInfo* ri);
+
+  // Get nodes that will feed a list of TF tensors to the new
+  // node that we are constructing.
+  //
+  // @input g - input graph,
+  // @input inputs - inputs to old node that we are using for constructing
+  //                 new inputs,
+  // @input input_idx - the index in the 'inputs' vector pointing to the
+  //                    current input that we have processed so far
+  // @output input_idx - index will be incremented by the number of nodes
+  //                     from 'inputs' that are processed
+  // @input list_length - The expected length of list of TF tensors
+  // @output output_nodes - the list of new nodes creating TF tensors
+  //
+  // @return None
+  void GetNodesProducingTFTensorList(
+      const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+      int* input_idx, int list_length,
+      std::vector<NodeBuilder::NodeOut>* output_nodes);
+
+  // Get nodes that will feed a list of Mkl tensors to the new
+  // node that we are constructing.
+  //
+  // @input g - input graph,
+  // @input orig_node - Original node that we are rewriting
+  // @input inputs - inputs to old node that we are using for constructing
+  //                 new inputs,
+  // @input input_idx - the index in the 'inputs' vector pointing to the
+  //                    current input that we have processed so far
+  // @output input_idx - index will be incremented by the number of nodes
+  //                     from 'inputs' that are processed
+  // @input list_length - The expected length of list of Mkl tensors
+  // @output output_nodes - the list of new nodes creating Mkl tensors
+  //
+  // @return None
+  void GetNodesProducingMklTensorList(std::unique_ptr<Graph>* g,
+    Node* orig_node, const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+    int* input_idx, int list_length,
+    std::vector<NodeBuilder::NodeOut>* output_nodes);
+
+  // Get a node that will feed an Mkl tensor to the new
+  // node that we are constructing. The output node could be (1) 'n'
+  // if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
+  // if 'n' is not an Mkl layer.
+  //
+  // @input g - input graph,
+  // @input orig_node - Original node that we are rewriting,
+  // @input n - Node based on which we are creating Mkl node,
+  // @input n_output_slot - the output slot of node 'n'
+  //            which is feeding to the node that we are constructing
+  // @output mkl_node - the new node that will feed Mkl tensor
+  // @output mkl_node_output_slot - the slot number of mkl_node that
+  //                                will feed the tensor
+  // @return None
+  void GetNodeProducingMklTensor(std::unique_ptr<Graph>* g, Node* orig_node,
+    Node* n, int n_output_slot, Node** mkl_node, int* mkl_node_output_slot);
+
+  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
+  // in graph 'g'. Original node is input in 'old_node'. Inputs to 'nb' are
+  // set up in contiguous fashion. 'workspace_tensors' carry graph nodes
+  // producing workspace edges if 'are_workspace_tensors_available' is true.
+  // Otherwise, 'workspace_tensors' is empty vector.
+  //
+  // For details, refer to 'Ordering of inputs after rewriting' section in the
+  // documentation above.
+  //
+  // Returns Status::OK() if setting up inputs is successful, otherwise
+  // returns appropriate status code.
+  int SetUpContiguousInputs(
+      std::unique_ptr<Graph>* g,
+      const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+      NodeBuilder* nb, Node* old_node,
+      std::vector<NodeBuilder::NodeOut>* workspace_tensors,
+      bool are_workspace_tensors_available);
+
+  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
+  // in graph 'g'. Original node is input in 'orig_node'.
+  //
+  // For details, refer to 'Ordering of Tensorflow tensors and Mkl tensors'
+  // section in the documentation above.
+  //
+  // Returns Status::OK() if setting up inputs is successful, otherwise
+  // returns appropriate status code.
+  Status SetUpInputs(std::unique_ptr<Graph>* g,
+                     const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+                     NodeBuilder* nb, Node* orig_node);
+
+  // Add workspace edge on the input or output side of Node 'orig_node' by using
+  // NodeBuilder 'nb' for the new node provided. If 'orig_node' does not dictate
+  // adding workspace edge then do not add it. Workspace Tensorflow and Mkl
+  // tensors, if they need to be added, will be set into these tensors.
+  // If we set workspace tensors, then are_ws_tensors_added should be true.
+  void AddWorkSpaceEdgeIfNeeded(std::unique_ptr<Graph>* g, Node* orig_node,
+                                NodeBuilder* nb,
+                                std::vector<NodeBuilder::NodeOut>* ws_tensors,
+                                bool* are_ws_tensors_added);
+
+  // Functions specific to operators to copy attributes
+  // We need operator-specific function to copy attributes because the framework
+  // does not provide any generic function for it.
+  // NOTE: names are alphabetically sorted.
+  static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
+
+  // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
+  // using node for original node 'orig_node' and return it in '*out'.
+  // TODO(nhasabni) We should move this to mkl_util.h
+  void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out,
+                             Node* orig_node);
+  void GetDummyWorkspaceTensorNode(std::unique_ptr<Graph>* g, Node** out,
+                                   Node* orig_node);
+};
+
+MklLayoutRewritePass::ConstStringsInfo MklLayoutRewritePass::csinfo_;
+
+// We register Mkl rewrite pass for phase 1 in post partitioning group.
+// We register it here so that we get a complete picture of all users of Mkl
+// nodes. Do not change the ordering of the Mkl passes.
+const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup =
+    OptimizationPassRegistry::POST_PARTITIONING;
+REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass);
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions for creating new node
+//////////////////////////////////////////////////////////////////////////
+
+static void FillInputs(const Node* n,
+                       gtl::InlinedVector<Node*, 4>* control_edges,
+                       gtl::InlinedVector<std::pair<Node*, int>, 4>* in) {
+  control_edges->clear();
+  for (const Edge* e : n->in_edges()) {
+    if (e->IsControlEdge()) {
+      control_edges->push_back(e->src());
+    } else {
+      (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output());
+    }
+  }
+  std::sort(control_edges->begin(), control_edges->end());
+  if (n->op_def().is_commutative()) {
+    // For commutative inputs, we sort the input by the input Node*
+    // to get a canonical ordering (so that add(a,b) and add(b, a) will
+    // hash to the same value if is_commutative is true for 'add').
+    std::sort(in->begin(), in->end());
+  }
+}
+
+void MklLayoutRewritePass::GetNodesProducingTFTensorList(
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, int* input_idx,
+    int list_length, std::vector<NodeBuilder::NodeOut>* output_nodes) {
+  CHECK_LT(*input_idx, inputs.size());
+  CHECK_GT(list_length, 0);
+  CHECK_NOTNULL(output_nodes);
+  output_nodes->reserve(list_length);
+
+  while (list_length != 0) {
+    CHECK_GT(list_length, 0);
+    CHECK_LT(*input_idx, inputs.size());
+    Node* n = inputs[*input_idx].first;
+    int slot = inputs[*input_idx].second;
+    // If input node 'n' is just producing a single tensor at
+    // output slot 'slot' then we just add that single node.
+    output_nodes->push_back(NodeBuilder::NodeOut(n, slot));
+    (*input_idx)++;
+    list_length--;
+  }
+}
+
+// TODO(nhasabni) We should move this to mkl_util.h.
+void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr<Graph>* g,
+                                                 Node** out, Node* orig_node) {
+  // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent
+  // dummy Mkl tensor. 8 = 2*size_t.
+  const DataType dt = DataTypeToEnum<uint8>::v();
+  TensorProto proto;
+  proto.set_dtype(dt);
+  uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+  proto.set_tensor_content(const_cast<const void*>(static_cast<void*>(&zero)),
+                           8);
+  TensorShape dummy_shape({8});
+  dummy_shape.AsProto(proto.mutable_tensor_shape());
+  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+               .Attr("value", proto)
+               .Attr("dtype", dt)
+               .Device(orig_node->def().device())  // We place this node on
+                                                   // the same device as the
+                                                   // device of the original
+                                                   // node.
+               .Finalize(&**g, out));
+
+  // If number of inputs to the original node is > 0, then we add
+  // control dependency between 1st input (index 0) of the original node and
+  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
+  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
+  // rewritten node. Adding control edge between 1st input of the original node
+  // and the dummy Mkl node ensures that the dummy node is in the same frame
+  // as the original node. Choosing 1st input is not necessary - any input of
+  // the original node is fine because all the inputs of a node are always in
+  // the same frame.
+  if (orig_node->num_inputs() > 0) {
+    Node* orig_input0 = nullptr;
+    TF_CHECK_OK(orig_node->input_node(0,
+                                      const_cast<const Node**>(&orig_input0)));
+    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
+  }
+
+  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
+}
+
+void MklLayoutRewritePass::GetNodesProducingMklTensorList(
+    std::unique_ptr<Graph>* g,
+    Node* orig_node,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+    int* input_idx, int list_length,
+    std::vector<NodeBuilder::NodeOut>* output_nodes) {
+  CHECK_LT(*input_idx, inputs.size());
+  CHECK_GT(list_length, 0);
+  CHECK_NOTNULL(output_nodes);
+  output_nodes->reserve(list_length);
+
+  while (list_length != 0) {
+    CHECK_GT(list_length, 0);
+    CHECK_LT(*input_idx, inputs.size());
+    Node* n = inputs[*input_idx].first;
+    int slot = inputs[*input_idx].second;
+    // If 'n' is producing a single tensor, then create a single Mkl tensor
+    // node.
+    Node* mkl_node = nullptr;
+    int mkl_node_output_slot = 0;
+    GetNodeProducingMklTensor(g, orig_node, n, slot, &mkl_node,
+                              &mkl_node_output_slot);
+    output_nodes->push_back(NodeBuilder::NodeOut(mkl_node,
+                                                mkl_node_output_slot));
+    (*input_idx)++;
+    list_length--;
+  }
+}
+
+// Get an input node that will feed Mkl tensor to the new
+// node that we are constructing. An input node could be (1) 'n'
+// if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
+// if 'n' is not an Mkl layer.
+void MklLayoutRewritePass::GetNodeProducingMklTensor(std::unique_ptr<Graph>* g,
+    Node* orig_node, Node* n,
+    int n_output_slot, Node** mkl_node, int* mkl_node_output_slot) {
+  CHECK_NOTNULL(n);
+  CHECK_NOTNULL(mkl_node);
+  CHECK_NOTNULL(mkl_node_output_slot);
+
+  // If this is an MKL op, then it will create extra output for MKL layout.
+  DataType T;
+  if (GetNodeAttr(n->def(), "T", &T).ok() &&
+      mkl_op_registry::IsMklOp(n->type_string(), T)) {
+    // If this is an MKL op, then it will generate an edge that will receive
+    // Mkl tensor from a node.
+    // output slot number for Mkl tensor would be N+slot number of TensorFlow
+    // tensor, where N is total number of TensorFlow tensors.
+    *mkl_node = n;
+    *mkl_node_output_slot =
+        GetTensorMetaDataIndex(n_output_slot, n->num_outputs());
+  } else {
+    // If we have not visited the node and rewritten it, then we need
+    // to create a dummy node that will feed a dummy Mkl tensor to this node.
+    // DummyMklTensor node has no input and generates only 1 output
+    // (dummy Mkl tensor) as output slot number 0.
+    GetDummyMklTensorNode(g, mkl_node, orig_node);
+    CHECK_NOTNULL(*mkl_node);
+    *mkl_node_output_slot = 0;
+  }
+}
+
+int MklLayoutRewritePass::SetUpContiguousInputs(
+    std::unique_ptr<Graph>* g,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+    NodeBuilder* nb, Node* old_node,
+    std::vector<NodeBuilder::NodeOut>* workspace_tensors,
+    bool are_workspace_tensors_available) {
+  CHECK_NOTNULL(workspace_tensors);
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+
+  // TODO(nhasabni): Temporary solution to connect filter input of
+  // BackpropInput with the converted filter from Conv2D.
+  bool do_connect_conv2d_backprop_input_filter = false;
+  Node* conv2d_node = nullptr;
+  // Filter node is 2nd input (slot index 1) of Conv2D.
+  int kConv2DFilterInputSlotIdx = 1;
+  int kConv2DBackpropInputFilterInputSlotIdx = 1;
+  int kConv2DFilterOutputSlotIdx = 1;
+  if (old_node->type_string() == csinfo_.conv2d_grad_input) {
+    // We need to find Conv2D node from Conv2DBackpropInput.
+    // For that let's first find filter node that is 2nd input (slot 1)
+    // of BackpropInput.
+    Node* filter_node = nullptr;
+    old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, &filter_node);
+    CHECK_NOTNULL(filter_node);
+
+    // Now check which nodes receive from filter_node. Filter feeds as
+    // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias.
+    for (const Edge* e : filter_node->out_edges()) {
+      if ((e->dst()->type_string() == csinfo_.mkl_conv2d ||
+           e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias) &&
+          e->dst_input() == kConv2DFilterInputSlotIdx
+          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
+        if (conv2d_node != nullptr) {
+          VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
+                  << " feeding multiple Conv2D nodes: "
+                  << filter_node->DebugString();
+          // We will not connect filter input of Conv2DBackpropInput
+          // to be safe here.
+          do_connect_conv2d_backprop_input_filter = false;
+          break;
+        } else {
+          conv2d_node = e->dst();
+          do_connect_conv2d_backprop_input_filter = true;
+        }
+      }
+    }
+  }
+
+  // Number of input slots to original op
+  // Input slots are represented by .Input() calls in REGISTER_OP.
+  int old_node_input_slots = old_node->op_def().input_arg_size();
+  // Actual number of inputs can be greater than or equal to number
+  // of Input slots because inputs of type list could be unfolded.
+  CHECK_GE(old_node_inputs.size(), old_node_input_slots);
+  int nn_slot_idx = 0;  // slot index for inputs of new node
+
+  // Let's copy all inputs (TF tensors) of original node to new node.
+  int iidx = 0;
+  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
+    // An input slot could be a single tensor or a list. We need
+    // to handle this case accordingly.
+    CHECK_LT(iidx, old_node_inputs.size());
+    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
+    if (ArgIsList(arg)) {
+      std::vector<NodeBuilder::NodeOut> new_node_inputs;
+      int N = GetTensorListLength(arg, old_node);
+      GetNodesProducingTFTensorList(old_node_inputs, &iidx, N,
+                                    &new_node_inputs);
+      nb->Input(new_node_inputs);
+      nn_slot_idx++;
+    } else {
+      // Special case for connecting filter input of Conv2DBackpropInput
+      if (do_connect_conv2d_backprop_input_filter &&
+          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
+        nb->Input(conv2d_node, kConv2DFilterOutputSlotIdx);
+      } else {
+        nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second);
+      }
+      iidx++;
+      nn_slot_idx++;
+    }
+  }
+
+  // If workspace tensors are available for this op and we are using
+  // contiguous ordering then we need to add Tensorflow tensor for
+  // workspace here because Tensorflow tensor for workspace is the
+  // last tensor in the list of Tensorflow tensors.
+  if (are_workspace_tensors_available) {
+    CHECK_EQ(workspace_tensors->size(), 2);
+    // Tensorflow tensor
+    nb->Input((*workspace_tensors)[0].node, (*workspace_tensors)[0].index);
+    nn_slot_idx++;
+  }
+
+  // Let's now setup all Mkl inputs to a new node.
+  // Number of Mkl inputs must be same as number of TF inputs.
+  iidx = 0;
+  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
+    // An input slot could be a single tensor or a list. We need
+    // to handle this case accordingly.
+    CHECK_LT(iidx, old_node_inputs.size());
+    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
+    if (ArgIsList(arg)) {
+      std::vector<NodeBuilder::NodeOut> new_node_inputs;
+      int N = GetTensorListLength(arg, old_node);
+      GetNodesProducingMklTensorList(g, old_node, old_node_inputs, &iidx,
+                                     N, &new_node_inputs);
+      nb->Input(new_node_inputs);
+      nn_slot_idx++;
+    } else {
+      Node* mkl_node = nullptr;
+      int mkl_node_output_slot = 0;
+      // Special case for connecting filter input of Conv2DBackpropInput
+      if (do_connect_conv2d_backprop_input_filter &&
+          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
+        GetNodeProducingMklTensor(g, old_node, conv2d_node,
+                                  kConv2DFilterOutputSlotIdx, &mkl_node,
+                                  &mkl_node_output_slot);
+      } else {
+        GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first,
+                                  old_node_inputs[iidx].second, &mkl_node,
+                                  &mkl_node_output_slot);
+      }
+      nb->Input(mkl_node, mkl_node_output_slot);
+      iidx++;
+      nn_slot_idx++;
+    }
+  }
+
+  // If workspace tensors are available for this op and we are using
+  // contiguous ordering then we need to add Mkl tensor for
+  // workspace here because Mkl tensor for workspace is the
+  // last tensor in the list of Mkl tensors.
+  if (are_workspace_tensors_available) {
+    CHECK_EQ(workspace_tensors->size(), 2);
+    // Mkl tensor
+    nb->Input((*workspace_tensors)[1].node, (*workspace_tensors)[1].index);
+    nn_slot_idx++;
+  }
+
+  return nn_slot_idx;
+}
+
+Status MklLayoutRewritePass::SetUpInputs(
+    std::unique_ptr<Graph>* g,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+    NodeBuilder* nb, Node* old_node) {
+  // Let's check if we need to add workspace tensors for this node.
+  // We add workspace edge only for MaxPool, LRN and BatchNorm.
+  std::vector<NodeBuilder::NodeOut> workspace_tensors;
+  bool are_workspace_tensors_available = false;
+  AddWorkSpaceEdgeIfNeeded(g, old_node, nb, &workspace_tensors,
+                           &are_workspace_tensors_available);
+
+  int new_node_input_slots = 0;
+  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
+    // TODO(nhasabni): implement this function just for same of completion.
+    // We do not use interleaved ordering right now.
+    return Status(
+        error::Code::UNIMPLEMENTED,
+        "Interleaved ordering of tensors is currently not supported.");
+  } else {
+    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+    new_node_input_slots = SetUpContiguousInputs(
+        g, old_node_inputs, nb, old_node, &workspace_tensors,
+        are_workspace_tensors_available);
+  }
+
+  // Sanity check
+  int old_node_input_slots = old_node->op_def().input_arg_size();
+  if (!are_workspace_tensors_available) {
+    // If we are not adding workspace tensors for this op, then the total
+    // number of input slots to the new node _must_ be 2 times the number
+    // of input slots to the original node: N original Tensorflow tensors and
+    // N for Mkl tensors corresponding to each Tensorflow tensors.
+    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2);
+  } else {
+    // If we are adding workspace tensors for this op, then the total
+    // The total number of input slots to new node _must_ be 2 times the number
+    // of input slots to the original node: N original Tensorflow tensors and
+    // N for Mkl tensors corresponding to each Tensorflow tensors plus 2
+    // (for workspace Tensorflow tensor and workspace Mkl tensor).
+    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2 + 2);
+  }
+
+  return Status::OK();
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions related to workspace pass
+//////////////////////////////////////////////////////////////////////////
+
+// TODO(nhasabni) We should move this to mkl_util.h.
+void MklLayoutRewritePass::GetDummyWorkspaceTensorNode(
+    std::unique_ptr<Graph>* g, Node** out, Node* orig_node) {
+  // We use a tensor of shape {1} and value 0 to represent
+  // dummy float tensor. We need this as a dummy workspace tensor.
+  // Workspace tensor has type float.
+  const DataType dt = DataTypeToEnum<float>::v();
+  TensorProto proto;
+  proto.set_dtype(dt);
+  float zero[1] = {0};
+  proto.set_tensor_content(const_cast<const void*>(static_cast<void*>(&zero)),
+                           4);
+  TensorShape dummy_shape({1});
+  dummy_shape.AsProto(proto.mutable_tensor_shape());
+  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+                .Attr("value", proto)
+                .Attr("dtype", dt)
+                .Device(orig_node->def().device())  // We place this node on
+                                                    // same the device as the
+                                                    // device of the original
+                                                    // node.
+                .Finalize(&**g, out));
+
+  // If number of inputs to the original node is > 0, then we add
+  // control dependency between 1st input (index 0) of the original node and
+  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
+  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
+  // rewritten node. Adding control edge between 1st input of the original node
+  // and the dummy Mkl node ensures that the dummy node is in the same frame
+  // as the original node. Choosing 1st input is not necessary - any input of
+  // the original node is fine because all the inputs of a node are always in
+  // the same frame.
+  if (orig_node->num_inputs() > 0) {
+    Node* orig_input0 = nullptr;
+    TF_CHECK_OK(orig_node->input_node(0,
+                                      const_cast<const Node**>(&orig_input0)));
+    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
+  }
+
+  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
+}
+
+void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded(
+    std::unique_ptr<Graph>* g, Node* orig_node, NodeBuilder* nb,
+    std::vector<NodeBuilder::NodeOut>* ws_tensors, bool* are_ws_tensors_added) {
+  bool workspace_edge_added = false;  // Default initializer
+  CHECK_NOTNULL(are_ws_tensors_added);
+  *are_ws_tensors_added = false;  // Default initializer
+
+  DataType T;
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  for (auto ws : wsinfo_) {
+    if (orig_node->type_string() == ws.fwd_op &&
+        mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+          orig_node->type_string()), T)) {
+      // If this op is a fwd op, then we need to check if there is an
+      // edge from this node's fwd_slot to bwdop's bwd_slot. If there is
+      // an edge, then we just add an attribute on this node for setting
+      // workspace_passed to true. We don't add actual workspace edge
+      // in this node. Actual workspace edge gets added in the backward
+      // op for this node.
+      for (const Edge* e : orig_node->out_edges()) {
+        if (e->src_output() == ws.fwd_slot &&
+            e->dst()->type_string() == ws.bwd_op &&
+            e->dst_input() == ws.bwd_slot) {
+          nb->Attr("workspace_enabled", true);
+          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
+                  << orig_node->type_string();
+          workspace_edge_added = true;
+          // We found the edge that we were looking for, so break.
+          break;
+        }
+      }
+
+      if (!workspace_edge_added) {
+        // If we are here, then we did not find backward operator for this
+        // node.
+        nb->Attr("workspace_enabled", false);
+      }
+    } else if (orig_node->type_string() == ws.bwd_op &&
+               mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+                                          orig_node->type_string()), T)) {
+      // If this op is a bwd op, then we need to add workspace edge and
+      // it's Mkl tensor edge between its corresponding fwd op and this
+      // op. Corresponding fwd op is specified in 'fwd_op' field of
+      // workspace info. fwd_slot and bwd_slot in workspace info specify
+      // an edge between which slots connect forward and backward op.
+      // Once all these criteria match, we add a workspace edge between
+      // ws_fwd_slot and ws_bwd_slot. Its corresponding Mkl tensor is
+      // determined by interleaved/contiguous ordering. Function
+      // DataIndexToMetaDataIndex tells us the location of Mkl tensor
+      // from the location of the Tensorflow tensor.
+      for (const Edge* e : orig_node->in_edges()) {
+        if (e->src_output() == ws.fwd_slot &&
+            // We would have rewritten the forward op, so we need to use
+            // GetMklOpName call to get its Mkl name.
+            e->src()->type_string() == mkl_op_registry::GetMklOpName(
+                                                          ws.fwd_op) &&
+            e->dst_input() == ws.bwd_slot) {
+          nb->Attr("workspace_enabled", true);
+          CHECK_NOTNULL(ws_tensors);
+          // Add workspace edge between fwd op and bwd op.
+          ws_tensors->push_back(NodeBuilder::NodeOut(e->src(), ws.ws_fwd_slot));
+          // Add Mkl tensor edge for workspace edge between fwd op and bwd op.
+          ws_tensors->push_back(NodeBuilder::NodeOut(
+              e->src(), DataIndexToMetaDataIndex(ws.ws_fwd_slot,
+                                                 e->src()->num_outputs())));
+          *are_ws_tensors_added = true;
+          // In terms of input ordering, we add these calls to add Input
+          // here because workspace edge (and its Mkl tensor) is the last
+          // edge in the fwdop and bwdop. So all inputs before workspace
+          // tensor have been added by SetUpInputs function.
+          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
+                  << orig_node->type_string();
+          workspace_edge_added = true;
+          // We found the edge that we were looking for, so break.
+          break;
+        }
+      }
+
+      // If we are here means we did not find fwd op that feeds to this
+      // bwd op. So in this case, we need to generate dummy tensors for
+      // workspace input and Mkl tensor for workspace, and set
+      // workspace_enabled to false.
+      if (!workspace_edge_added) {
+        nb->Attr("workspace_enabled", false);
+        Node* dmt_ws = nullptr;      // Dummy tensor for workspace
+        Node* dmt_mkl_ws = nullptr;  // Dummy Mkl tensor for workspace
+        GetDummyWorkspaceTensorNode(g, &dmt_ws, orig_node);
+        GetDummyMklTensorNode(g, &dmt_mkl_ws, orig_node);
+        CHECK_NOTNULL(dmt_ws);
+        CHECK_NOTNULL(dmt_mkl_ws);
+        CHECK_NOTNULL(ws_tensors);
+        // We add dummy tensor as workspace tensor.
+        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_ws, 0));
+        // We add dummy tensor as Mkl tensor for workspace tensor.
+        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_mkl_ws, 0));
+        *are_ws_tensors_added = true;
+        VLOG(1) << "MklLayoutRewritePass: dummy workspace_enabled for "
+                << orig_node->type_string();
+      }
+    } else {
+      // If this node does not match any workspace info, then we do not
+      // do anything special for workspace propagation for it.
+    }
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Op-specific functions to copy attributes from old node to new node
+//////////////////////////////////////////////////////////////////////////
+
+void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  string padding;
+  std::vector<int32> strides;
+  bool use_cudnn_on_gpu;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+  TF_CHECK_OK(
+      GetNodeAttr(orig_node->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("strides", strides);
+  nb->Attr("padding", padding);
+  nb->Attr("data_format", data_format);
+  nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu);
+}
+
+void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node,
+                                         NodeBuilder* nb) {
+  DataType T;
+  int N;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+}
+
+void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node,
+                                                NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  std::vector<int32> strides;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("strides", strides);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node,
+                                        NodeBuilder* nb) {
+  DataType T;
+  int depth_radius;
+  float bias;
+  float alpha;
+  float beta;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "depth_radius", &depth_radius));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "bias", &bias));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "alpha", &alpha));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "beta", &beta));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("depth_radius", depth_radius);
+  nb->Attr("bias", bias);
+  nb->Attr("alpha", alpha);
+  nb->Attr("beta", beta);
+}
+
+void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node,
+                                            NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  string padding;
+  std::vector<int32> ksize, strides;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "ksize", &ksize));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("ksize", ksize);
+  nb->Attr("strides", strides);
+  nb->Attr("padding", padding);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node,
+                                             NodeBuilder* nb) {
+  DataType T;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+}
+
+void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  DataType Tshape;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tshape", &Tshape));
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("Tshape", Tshape);
+}
+
+void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node,
+                                          NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  int num_split;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "num_split", &num_split));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("num_split", num_split);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsConcat(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  int N;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+}
+
+void MklLayoutRewritePass::CopyAttrsConcatV2(const Node* orig_node,
+                                             NodeBuilder* nb) {
+  DataType T;
+  int N;
+  DataType tidx;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tidx", &tidx));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+  nb->Attr("Tidx", tidx);
+}
+
+void MklLayoutRewritePass::CopyAttrsFusedBatchNorm(const Node* orig_node,
+                                                   NodeBuilder* nb) {
+  DataType T;
+  float epsilon;
+  string data_format;
+  bool is_training;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "epsilon", &epsilon));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "is_training", &is_training));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("epsilon", epsilon);
+  nb->Attr("data_format", data_format);
+  nb->Attr("is_training", is_training);
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions related to node merge pass
+//////////////////////////////////////////////////////////////////////////
+
+Node* MklLayoutRewritePass::CheckForNodeMerge(const Node* a) const {
+  // TODO(nhasabni) Add check for type of node similar to CheckForNodeRewrite
+  // once we support BiasAddGrad as Mkl layer.
+
+  // Search for all matching mergeinfo.
+  // We allow more than one match for extensibility.
+  std::vector<const MergeInfo*> matching_mi;
+  for (auto mi = minfo_.cbegin(); mi != minfo_.cend(); ++mi) {
+    if (a->type_string() == mi->op1 || a->type_string() == mi->op2) {
+      matching_mi.push_back(&*mi);
+    }
+  }
+
+  for (const MergeInfo* mi : matching_mi) {
+    // Get the operand with which 'a' can be merged.
+    Node* b = nullptr;
+    if ((b = mi->get_node_to_be_merged(a)) == nullptr) {
+      continue;
+    }
+
+    // Get the control edges and input of node
+    const int N_in = a->num_inputs();
+    gtl::InlinedVector<Node*, 4> a_control_edges;
+    gtl::InlinedVector<std::pair<Node*, int>, 4> a_in(N_in);
+    FillInputs(a, &a_control_edges, &a_in);
+
+    const int B_in = b->num_inputs();
+    gtl::InlinedVector<Node*, 4> b_control_edges;
+    gtl::InlinedVector<std::pair<Node*, int>, 4> b_in(B_in);
+    FillInputs(b, &b_control_edges, &b_in);
+
+    // Shouldn't merge if a and b have different control edges.
+    if (a_control_edges != b_control_edges) {
+      continue;
+    } else {
+      // We found a match.
+      return b;
+    }
+  }
+
+  return nullptr;
+}
+
+Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g,
+                                                    Node* m, Node* n) {
+  CHECK_EQ(((m->type_string() == csinfo_.bias_add &&
+             n->type_string() == csinfo_.conv2d)) ||
+           ((n->type_string() == csinfo_.bias_add &&
+             m->type_string() == csinfo_.conv2d)), true);
+
+  // If 'm' is BiasAdd, then 'n' is Conv2D. Since Conv2D feeds BiasAdd,
+  // BiasAdd is successor node, and Conv2D predecessor node.
+  Node* pred = m->type_string() == csinfo_.bias_add ? n : m;
+  Node* succ = m->type_string() == csinfo_.bias_add ? m : n;
+
+  // 1. Get all attributes from input nodes.
+  DataType T_pred, T_succ;
+  string padding;
+  std::vector<int32> strides;
+  string data_format_pred, data_format_succ;
+  bool use_cudnn_on_gnu;
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred));
+  TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred));
+  TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ));
+  TF_CHECK_OK(
+      GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu));
+  // We check to ensure that data formats of both succ and pred are same.
+  // We expect them to be same, so we can enforce this as assert.
+  // But assert can be too strict, so we enforce this as a check.
+  // If the check fails, then we do not merge two nodes.
+  // We also do same check for devices.
+  if (data_format_pred != data_format_succ || T_pred != T_succ ||
+      pred->assigned_device_name() != succ->assigned_device_name() ||
+      pred->def().device() != succ->def().device()) {
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "data_format or T attribute or devices of Conv2D and "
+                  "BiasAdd do not match. Will skip node merge optimization");
+  }
+
+  const int succ_num = succ->num_inputs();
+  gtl::InlinedVector<Node*, 4> succ_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> succ_in(succ_num);
+  FillInputs(succ, &succ_control_edges, &succ_in);
+
+  const int pred_num = pred->num_inputs();
+  gtl::InlinedVector<Node*, 4> pred_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> pred_in(pred_num);
+  FillInputs(pred, &pred_control_edges, &pred_in);
+
+  // We need to ensure that Conv2D only feeds to BiasAdd (some other operator is
+  // not expecting output of Conv2D). If this is not the case, then we cannot
+  // merge Conv2D with BiasAdd.
+  const int kFirstOutputSlot = 0;
+  for (const Edge* e : pred->out_edges()) {
+    if (e->src_output() == kFirstOutputSlot && e->dst() != succ) {
+      return Status(error::Code::INVALID_ARGUMENT,
+                    "Conv2D does not feed to BiasAdd, or "
+                    "it feeds BiasAdd but has multiple outputs. "
+                    "Will skip node merge optimization");
+    }
+  }
+
+  // 2. Get inputs from both the nodes.
+  // Find the 2 inputs from the conv and the bias from the add Bias.
+  // Get operand 0, 1 of conv2D.
+  CHECK_EQ(pred->in_edges().size(), 2);  // Conv2D must have 2 inputs.
+  // Get operand 1 of add_bias
+  // BiasAdd must have 2 inputs: Conv, bias
+  CHECK_EQ(succ->in_edges().size(), 2);
+
+  // We will use the node name of BiasAdd as the name of new node
+  // Build new node. We use same name as original node, but change the op
+  // name.
+  NodeBuilder nb(succ->name(), csinfo_.conv2d_with_bias);
+  nb.Input(pred_in[0].first, pred_in[0].second);  // In1 of Conv2D
+  // pred_in[1] will be 2nd Tensorflow tensor for Conv2D.
+  nb.Input(pred_in[1].first, pred_in[1].second);  // In2 of Conv2D
+  // In1 of BiasAdd is same as output of Conv2D.
+  nb.Input(succ_in[1].first, succ_in[1].second);  // In2 of BiasAdd
+
+  // Copy attributes from Conv2D to Conv2DWithBias.
+  CopyAttrsConv2D(const_cast<const Node*>(pred), &nb);
+
+  // Copy the device assigned to old node to new node.
+  nb.Device(succ->def().device());
+
+  // Create node.
+  Node* new_node;
+  nb.Finalize(&**g, &new_node);
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from 'pred' node and 'succ' node to new 'new_node'
+  // node are already copied in BuildNode. We handle control edges now.
+  for (const Edge* e : pred->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+  for (const Edge* e : succ->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+
+  // Incoming edges are fixed, we will fix the outgoing edges now.
+  // First, we will fix outgoing control edges from 'pred' node.
+  for (const Edge* e : pred->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    }
+  }
+
+  // Second, we will fix outgoing control and data edges from 'succ' node.
+  for (const Edge* e : succ->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      // BiasAdd has only 1 output (at slot 0) and merged node also has only 1
+      // output (at slot 0).
+      const int kConv2DWithBiasOutputSlot = 0;
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kConv2DWithBiasOutputSlot,
+                                    e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy device assigned to old node to new node.
+  // It's ok to use pred or succ as we have enforced a check that
+  // both have same device assigned.
+  new_node->set_assigned_device_name(pred->assigned_device_name());
+
+  VLOG(1) << "MklLayoutRewritePass: Merged old node:" << pred->DebugString()
+          << ", and node: " << succ->DebugString()
+          << ", into node:" << new_node->DebugString();
+
+  (*g)->RemoveNode(succ);
+  (*g)->RemoveNode(pred);
+
+  return Status::OK();
+}
+
+Status MklLayoutRewritePass::MergeConv2DBackpropFilterWithBiasAddGrad(
+    std::unique_ptr<Graph>* g, Node* m, Node* n) {
+  CHECK_EQ(((m->type_string() == csinfo_.bias_add_grad &&
+             n->type_string() == csinfo_.conv2d_grad_filter)) ||
+           ((n->type_string() == csinfo_.bias_add_grad &&
+             m->type_string() == csinfo_.conv2d_grad_filter)), true);
+
+  // If 'm' is BiasAddGrad, then 'n' is BackpropFilter.
+  Node* badd = m->type_string() == csinfo_.bias_add_grad ? m : n;
+  Node* fltr = m->type_string() == csinfo_.bias_add_grad ? n : m;
+
+  // Sanity check for attributes from input nodes.
+  DataType T_b, T_f;
+  string data_format_b, data_format_f;
+  TF_CHECK_OK(GetNodeAttr(badd->def(), "T", &T_b));
+  TF_CHECK_OK(GetNodeAttr(fltr->def(), "T", &T_f));
+  TF_CHECK_OK(GetNodeAttr(badd->def(), "data_format", &data_format_b));
+  TF_CHECK_OK(GetNodeAttr(fltr->def(), "data_format", &data_format_f));
+  if (data_format_b != data_format_f || T_b != T_f ||
+      badd->assigned_device_name() != fltr->assigned_device_name() ||
+      badd->def().device() != fltr->def().device()) {
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "data_format or T attribute or devices of "
+                  "Conv2DBackpropFilter and BiasAddGrad do not match. "
+                  "Will skip node merge optimization");
+  }
+
+  // We will use the node name of Conv2DBackpropFilter as the name of new node.
+  // This is because BackpropFilterWithBias is going to emit bias output also.
+  NodeBuilder nb(fltr->name(), csinfo_.conv2d_grad_filter_with_bias);
+  // Since Conv2DBackpropFilterWithBias has same number of inputs as
+  // Conv2DBackpropFilter, we can just copy input edges directly. We dont need
+  // to copy any data input of BiasAddGrad because that input also goes to
+  // Conv2DBackpropFilter.
+  const int fltr_ins = fltr->num_inputs();
+  gtl::InlinedVector<Node*, 4> fltr_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> fltr_in_edges(fltr_ins);
+  FillInputs(fltr, &fltr_control_edges, &fltr_in_edges);
+  for (int idx = 0; idx < fltr_ins; idx++) {
+    nb.Input(fltr_in_edges[idx].first, fltr_in_edges[idx].second);
+  }
+
+  // Copy attributes from Conv2DBackpropFilter.
+  CopyAttrsConv2D(const_cast<const Node*>(fltr), &nb);
+
+  // Copy the device assigned to old node to new node.
+  nb.Device(fltr->def().device());
+
+  // Create node.
+  Node* new_node;
+  nb.Finalize(&**g, &new_node);
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from BiasAddGrad node and Conv2DBackpropFilter node to
+  // new 'new_node' node are already copied in BuildNode. We handle control
+  // edges now.
+  for (const Edge* e : badd->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+  for (const Edge* e : fltr->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+
+  // Incoming edges are fixed, we will fix the outgoing edges now.
+  // First, we will fix outgoing control edges from 'badd' node.
+  // Conv2DBackpropFilter has 1 output -- filter_grad.
+  // Conv2DBackpropFilterWithBias has 2 outputs -- filter_grad and
+  // bias_grad. But filter_grad is at same slot number (0) in both the
+  // nodes. bias_grad is at slot number 1 in Conv2DBackpropFilterWithBias, while
+  // it is at slot number 0 in BiasAddGrad.
+  const int kMergedNodeFilterGradOutputIdx = 0;
+  const int kMergedNodeBiasGradOutputIdx = 1;
+
+  for (const Edge* e : badd->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeBiasGradOutputIdx,
+                                  e->dst(), e->dst_input()));
+    }
+  }
+
+  // Second, we will fix outgoing control and data edges from 'fltr' node.
+  for (const Edge* e : fltr->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeFilterGradOutputIdx,
+                                  e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy device assigned to old node to new node.
+  // It's ok to use badd or fltr as we have enforced a check that
+  // both have same device assigned.
+  new_node->set_assigned_device_name(badd->assigned_device_name());
+
+  VLOG(1) << "MklLayoutRewritePass: Merged old node:" << badd->DebugString()
+          << ", and node: " << fltr->DebugString()
+          << ", into node:" << new_node->DebugString();
+
+  (*g)->RemoveNode(badd);
+  (*g)->RemoveNode(fltr);
+
+  return Status::OK();
+}
+
+Status MklLayoutRewritePass::MergeNode(std::unique_ptr<Graph>* g, Node* m,
+                                       Node* n) {
+  CHECK_NOTNULL(m);
+  CHECK_NOTNULL(n);
+
+  if (((m->type_string() == csinfo_.bias_add &&
+        n->type_string() == csinfo_.conv2d)) ||
+      ((n->type_string() == csinfo_.bias_add &&
+        m->type_string() == csinfo_.conv2d))) {
+    return this->MergeConv2DWithBiasAdd(g, m, n);
+  }
+
+  if (((m->type_string() == csinfo_.bias_add_grad &&
+        n->type_string() == csinfo_.conv2d_grad_filter)) ||
+      ((n->type_string() == csinfo_.bias_add_grad &&
+        m->type_string() == csinfo_.conv2d_grad_filter))) {
+    return this->MergeConv2DBackpropFilterWithBiasAddGrad(g, m, n);
+  }
+
+  return Status(error::Code::UNIMPLEMENTED,
+                "Unimplemented case for node merge optimization.");
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions for node rewrite
+//////////////////////////////////////////////////////////////////////////
+
+Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
+                                         Node* orig_node,
+                                         const RewriteInfo* ri) {
+  CHECK_NOTNULL(ri);
+  CHECK_NOTNULL(orig_node);
+
+  VLOG(1) << "MklLayoutRewritePass: Original node:" << orig_node->DebugString();
+
+  // Get all inputs.
+  int num_inputs = orig_node->in_edges().size();
+
+  // Drop count for control edges from inputs
+  for (const Edge* e : orig_node->in_edges()) {
+    if (e->IsControlEdge()) {
+      num_inputs--;
+    }
+  }
+
+  gtl::InlinedVector<Node*, 4> control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num_inputs);
+  FillInputs(orig_node, &control_edges, &inputs);
+
+  // Build new node. We use same name as original node, but change the op name.
+  NodeBuilder nb(orig_node->name().c_str(), ri->new_name.c_str());
+  // Copy user-specified device assigned to original node to new node.
+  nb.Device(orig_node->def().device());
+  // Set up new inputs to the rewritten node.
+  Status s = SetUpInputs(g, inputs, &nb, orig_node);
+  if (s != Status::OK()) {
+    return s;
+  }
+
+  ri->copy_attrs(const_cast<const Node*>(orig_node), &nb);
+  // Set the Mkl layer label for this op.
+  nb.Attr("_kernel", mkl_op_registry::kMklOpLabel);
+
+  // Finalize graph and get new node.
+  Node* new_node = nullptr;
+  TF_CHECK_OK(nb.Finalize(&**g, &new_node));
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from 'orig_node' node to new 'new_node' node are
+  // already copied in BuildNode. We need to handle control edges now.
+  for (const Edge* e : orig_node->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+
+  // Copy outgoing edges from 'orig_node' node to new
+  // 'new_node' node, since the output also follows same ordering among
+  // Tensorflow tensors and Mkl tensors. We need to connect Tensorflow
+  // tensors appropriately. Specifically, nth output of the original node
+  // will become 2*nth output of the Mkl node for the interleaved ordering
+  // of the tensors. For the contiguous ordering of the tensors, it will be n.
+  // GetTensorDataIndex provides this mapping function.
+  for (const Edge* e : orig_node->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, GetTensorDataIndex(e->src_output(),
+                            e->src()->num_outputs()),
+                    e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy the runtime device assigned from original code to new node.
+  new_node->set_assigned_device_name(orig_node->assigned_device_name());
+
+  // Delete original node and mark new node as rewritten.
+  (*g)->RemoveNode(orig_node);
+
+  VLOG(1) << "MklLayoutRewritePass: New node:" << new_node->DebugString();
+  return Status::OK();
+}
+
+const MklLayoutRewritePass::RewriteInfo*
+MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
+  CHECK_NOTNULL(n);
+
+  // First check if node along with its type is supported by MKL layer.
+  // We do not want to rewrite an op into Mkl op if types are not supported.
+  // E.g., MklRelu does not support INT32. So we cannot rewrite Relu to
+  // MklRelu if type is INT32.
+  DataType T;
+  if (!GetNodeAttr(n->def(), "T", &T).ok()) {
+    return nullptr;
+  }
+
+  // We make an exception for __MklDummyConv2DWithBias and
+  // __MklConv2DBackpropFilterWithBias since their names do not match Mkl node
+  // names.
+  if (n->type_string() != csinfo_.conv2d_with_bias &&
+      n->type_string() != csinfo_.conv2d_grad_filter_with_bias &&
+      !mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+                                        n->type_string()), T)) {
+      return nullptr;
+  }
+
+  // For elementwise node, we reuse the Eigen implementation and pass the MKL
+  // metadata tensor through so we can avoid conversions. However, if all
+  // incoming edges are in TF format, we don't need all this overhead, so
+  // replace the elementwise node only if at least one of its parents is a MKL
+  // node.
+  //
+  // Identity nodes can also skip replacement if they are not being served by
+  // any MKL nodes.
+  //
+  // TODO(vrane): Add implementation for element-wise ops that doesn't reuse
+  // eigen code to reduce cross-library dependency.
+  VLOG(1) << "ELEMENTWISE: checking op: " << n->type_string();
+  if (mkl_op_registry::IsMklElementWiseOp(
+        mkl_op_registry::GetMklOpName(n->type_string()), T) ||
+      n->type_string().find("Identity") != string::npos) {
+    VLOG(1) << "ELEMENTWISE: op is elementwise: " << n->type_string();
+    bool incoming_mkl_edge = false;
+    int num_parent = 0;
+    for (auto parent : n->in_edges()) {
+      if (mkl_op_registry::IsMklOp(parent->src()->type_string(), T)) {
+        VLOG(1) << "ELEMENTWISE: parent " << num_parent++ << " is MKL op: "
+                << parent->src()->type_string();
+        incoming_mkl_edge = true;
+        break;
+      } else {
+        VLOG(1) << "ELEMENTWISE: parent " << num_parent++ << " is NON-MKL op: "
+                << parent->src()->type_string();
+      }
+    }
+    if (incoming_mkl_edge == false) {
+      VLOG(1) << "ELEMENTWISE: Skipping replacement of elementwise node which has no MKL "
+                 "parents.";
+      return nullptr;
+    } else {
+      VLOG(1) << "ELEMENTWISE: Replacing elementwise node " << n->type_string() <<
+        " which has MKL parents";
+    }
+  }
+
+  // We now check if rewrite rule applies for this op. If rewrite rule passes
+  // for this op, then we rewrite it to Mkl op.
+  // Find matching RewriteInfo and then check that rewrite rule applies.
+  for (auto ri = rinfo_.cbegin(); ri != rinfo_.cend(); ++ri) {
+    if (n->type_string().compare(ri->name) == 0 &&
+        ri->rewrite_rule(n)) {
+      return &*ri;
+    }
+  }
+
+  // Else return not found.
+  return nullptr;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//              Run function for the pass
+///////////////////////////////////////////////////////////////////////////////
+
+bool MklLayoutRewritePass::RunPass(std::unique_ptr<Graph>* g) {
+  bool result = false;
+  CHECK_NOTNULL(g);
+
+  DumpGraph("Before running MklLayoutRewritePass", &**g);
+
+  std::vector<Node*> order;
+  GetReversePostOrder(**g, &order);  // This will give us topological sort.
+  for (Node* n : order) {
+    // If node is not an op or it cannot run on CPU device, then skip.
+    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
+      continue;
+    }
+
+    Node* m = nullptr;
+    if ((m = CheckForNodeMerge(n)) != nullptr && CanOpRunOnCPUDevice(m)) {
+      // Check if the node 'n' can be merged with any other node. If it can
+      // be 'm' contains the node with which it can be merged.
+      string n1_name = n->name();
+      string n2_name = m->name();
+
+      VLOG(1) << "MklLayoutRewritePass: Scheduled nodes " << n1_name << " and "
+              << n2_name << " for merging";
+
+      if (MergeNode(g, n, m) == Status::OK()) {
+        VLOG(1) << "MklLayoutRewritePass: Merged nodes " << n1_name << " and "
+                << n2_name;
+        result = true;
+      }
+    }
+  }
+
+  DumpGraph("After running MklLayoutRewritePass(NodeMerge)", &**g);
+
+  order.clear();
+  GetReversePostOrder(**g, &order);  // This will give us topological sort.
+  for (Node* n : order) {
+    // If node is not an op or it cannot run on CPU device, then skip.
+    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
+      continue;
+    }
+
+    const RewriteInfo* ri = nullptr;
+    // We will first search if node is to be rewritten.
+    if ((ri = CheckForNodeRewrite(n)) != nullptr) {
+      string node_name = n->name();
+      string op_name = n->type_string();
+
+      VLOG(1) << "MklLayoutRewritePass: Scheduled node " << node_name
+              << " with op " << op_name << " for rewrite using"
+              << " layout optimization.";
+
+      if (RewriteNode(g, n, ri) == Status::OK()) {
+        VLOG(1) << "MklLayoutRewritePass: rewrote node " << node_name
+                << " with op " << op_name << " for Mkl layout optimization.";
+        result = true;
+      }
+    }
+  }
+
+  DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite)", &**g);
+
+  return result;
+}
+
+bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) {
+  return MklLayoutRewritePass().RunPass(g);
+}
+
+Status MklLayoutRewritePass::Run(
+  const GraphOptimizationPassOptions& options) {
+  if (options.graph == nullptr && options.partition_graphs == nullptr) {
+    return Status::OK();
+  }
+
+  auto process_graph = [&](std::unique_ptr<Graph>* g) {
+    // Get the ownership of a graph
+    std::unique_ptr<Graph>* ng = std::move(g);
+    RunPass(ng);
+    // Return the ownership of a graph back
+    g->reset(ng->release());
+  };
+
+  if (kMklLayoutRewritePassGroup !=
+      OptimizationPassRegistry::POST_PARTITIONING) {
+    // For any pre-partitioning phase, a graph is stored in options.graph.
+    process_graph(options.graph);
+  } else {
+    // For post partitioning phase, graphs are stored in
+    // options.partition_graphs.
+    for (auto& pg : *options.partition_graphs) {
+      process_graph(&pg.second);
+    }
+  }
+
+  return Status::OK();
+}
+#endif  // INTEL_MKL_DNN
 }  // namespace tensorflow
 
 #endif
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index abc63e4f35..75f7ca2d4d 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -37,6 +37,9 @@ limitations under the License.
 #include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
+
+#ifndef INTEL_MKL_DNN
+
 namespace {
 
 const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
@@ -1881,6 +1884,1627 @@ static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
 BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
 
 }  // namespace
+
+#else  // INTEL_MKL_DNN
+
+namespace {
+
+const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
+const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0";
+
+static void InitGraph(const string& s, Graph* graph,
+                      const string& device = kCPUDevice) {
+  GraphDef graph_def;
+
+  auto parser = protobuf::TextFormat::Parser();
+  //  parser.AllowRelaxedWhitespace(true);
+  CHECK(parser.MergeFromString(s, &graph_def)) << s;
+  GraphConstructorOptions opts;
+  TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
+
+  for (Node* node : graph->nodes()) {
+    node->set_assigned_device_name(device);
+  }
+}
+
+class MklLayoutPassTest : public ::testing::Test {
+ public:
+  MklLayoutPassTest() : graph_(OpRegistry::Global()) {}
+
+  void InitGraph(const string& s, const string& device = kCPUDevice) {
+    ::tensorflow::InitGraph(s, &graph_, device);
+    original_ = CanonicalGraphString(&graph_);
+  }
+
+  static bool IncludeNode(const Node* n) { return n->IsOp(); }
+
+  static string EdgeId(const Node* n, int index) {
+    if (index == 0) {
+      return n->name();
+    } else if (index == Graph::kControlSlot) {
+      return strings::StrCat(n->name(), ":control");
+    } else {
+      return strings::StrCat(n->name(), ":", index);
+    }
+  }
+
+  string CanonicalGraphString(Graph* g) {
+    std::vector<string> nodes;
+    std::vector<string> edges;
+    for (const Node* n : g->nodes()) {
+      if (IncludeNode(n)) {
+        nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
+      }
+    }
+    for (const Edge* e : g->edges()) {
+      if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
+        edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
+                                        EdgeId(e->dst(), e->dst_input())));
+      }
+    }
+    // Canonicalize
+    std::sort(nodes.begin(), nodes.end());
+    std::sort(edges.begin(), edges.end());
+    return strings::StrCat(str_util::Join(nodes, ";"), "|",
+                           str_util::Join(edges, ";"));
+  }
+
+  string DoMklLayoutOptimizationPass() {
+    string before = CanonicalGraphString(&graph_);
+    LOG(ERROR) << "Before MKL layout rewrite pass: " << before;
+
+    std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
+    RunMklLayoutRewritePass(ug);
+
+    string result = CanonicalGraphString(&graph_);
+    LOG(ERROR) << "After MKL layout rewrite pass:  " << result;
+    return result;
+  }
+
+  const string& OriginalGraph() const { return original_; }
+
+  Graph graph_;
+  string original_;
+};
+
+REGISTER_OP("Input").Output("o: float").SetIsStateful();
+REGISTER_OP("InputList").Output("o: N * float").Attr("N: int").SetIsStateful();
+REGISTER_OP("HalfInput").Output("o: half").SetIsStateful();
+REGISTER_OP("Int32Input").Output("o: int32").SetIsStateful();
+REGISTER_OP("_MklInput").Output("o: uint8").SetIsStateful();
+REGISTER_OP("_MklInput2").Output("o: uint8")
+                        .Output("o1: uint8").SetIsStateful();
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to node merge optiimization
+/////////////////////////////////////////////////////////////////////
+
+TEST_F(MklLayoutPassTest, Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Zeta);D(Zeta)|"
+            "A->C;A->D;B->C:1;B->D:1");
+}
+
+// Test set 1: Conv2D + AddBias
+
+// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y)
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'Y' op: 'Input'}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'Y']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(_MklConv2DWithBias);Y(Input);Z(Zeta)|A->E;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;DMT/_1->E:4;"
+            "DMT/_2->E:5;E->Z;Y->Z:1");
+}
+
+// Graph contains only Conv2D, no AddBias.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_NoAddBias) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);DMT/_0(Const);DMT/_1(Const)|"
+            "A->C;A:control->DMT/_0:control;A:control->DMT/_1:control;B->C:1;"
+            "DMT/_0->C:2;DMT/_1->C:3");
+}
+
+// Conv2D output does not go to BiasAdd.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D', 'E'] }");  // Output of _MklConv2D does not go to BiasAdd.
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(Input);F(BiasAdd)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;D->F;DMT/_0->C:2;DMT/_1->C:3;"
+            "E->F:1");
+}
+
+// Conv2D has two outgoing edges: BiasAdd and some other dummy node (Zeta).
+// Merge should not be done in such case.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D', 'E'] }"  // Conv2D has two outputs.
+                              // No merge should happen.
+      "node { name: 'G' op: 'Zeta'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(Input);F(BiasAdd);G(Zeta)|A->C;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;B->C:1;C->G;"
+            "D->F;DMT/_0->C:2;DMT/_1->C:3;E->F:1;E->G:1");
+}
+
+// data_format attribute value mismatch. Merge should not be done
+// in such case.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_AttrMismatch) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NHCW' } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(BiasAdd)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;C->E;D->E:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+// Test set 2: BiasAddGrad + Conv2DBackpropFilter fusion tests
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilterWithBias);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const)|A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion in the presence of BackpropFilter. But nodes do not match
+// criteria for rewrite. So rewrite should not happen. 3rd input of
+// Conv2DBackpropFilter is different than input to BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['A'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilter);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(BiasAddGrad)|A->D;A->E;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion, but nodes do not match criteria for fusion.
+// Different input formats.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NHWC' } }"
+      " input: ['A'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilter);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(BiasAddGrad)|A->D;A->E;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion in the presence of BackpropFilter only. Fusion is done
+// before node rewrite. Check this ordering.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'O' op: '_MklInput'}"
+      "node { name: 'D' op: '_MklConv2DWithBias'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
+      "node { name: 'E' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['D', 'A']}"
+      "node { name: 'F' op: 'Int32Input'}"
+      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['E', 'F', 'A', 'M', 'N', 'O'] }"
+      "node { name: 'H' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
+            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
+            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
+            "C->D:2;D->E;E->G;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
+            "O->G:5");
+}
+
+// C=Conv2D(A,B); E=BiasAdd(C,D); Y=Zeta(E,X);
+// G=Conv2DBackpropInput(F,B,E)
+// This is a case of node rewrite followed by node merge followed by connecting
+// filter output of Conv2DWithBias to filter input of Conv2DBackpropInput.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'X' op: 'Input'}"
+      "node { name: 'Y' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'X']}"
+      "node { name: 'F' op: 'Int32Input'}"
+      "node { name: 'G' op: 'Conv2DBackpropInput'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['F', 'B', 'E']}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['G', 'X']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2DWithBias);F(Int32Input);"
+            "G(_MklConv2DBackpropInput);X(Input);Y(Zeta);Z(Zeta)|"
+            "A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;"
+            "DMT/_1->E:4;DMT/_2->E:5;DMT/_3->G:3;E->G:2;E->Y;E:1->G:1;E:2->G:5;"
+            "E:3->G:4;F->G;F:control->DMT/_3:control;G->Z;X->Y:1;X->Z:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to rewriting node to Mkl node
+/////////////////////////////////////////////////////////////////////
+
+// Single Conv2D Op; No Mkl layer on the input and on the output.
+// We will generate dummy Mkl tensor as 2nd input of Conv2D.
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+// 2 Conv2D Ops in sequence. Both should get transformed and 1st Conv2D will
+// have 2 outputs, both of which will be inputs to next Conv2D.
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(_MklConv2D);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->C;A->D;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;"
+            "C:2->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2");
+}
+
+// Conv2D with INT32 which is not supported by Mkl
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) {
+  InitGraph(
+      "node { name: 'A' op: 'HalfInput'}"
+      "node { name: 'B' op: 'HalfInput'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_HALF } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_HALF } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(HalfInput);B(HalfInput);C(Conv2D);D(Zeta)|"
+            "A->C;B->C:1;B->D;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropFilter);"
+            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:4;DMT/_2->D:5");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropInput'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['B', 'A', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropInput);"
+            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
+            "A->D:1;A->E;B->D;B:control->DMT/_0:control;"
+            "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;"
+            "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Polygamma'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
+            "A->C;A->D:1;B->C:1;C->D;D->E");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'MatMul'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'transpose_a'      value { b: false } }"
+      " attr { key: 'transpose_b'      value { b: false } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
+            "A->C;A->D:1;B->C:1;C->D;D->E");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'C' op: '_MklConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'M', 'N']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Zeta);E(BiasAddGrad);"
+            "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;"
+            "M->C:2;N->C:3");
+}
+
+// Concat Op test: Concat with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['A', 'B:0', 'B:1']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(_MklConcat);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;"
+            "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Concat with 2 Mkl layers feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['G', 'E', 'F']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
+            "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;"
+            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
+            "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;"
+            "G:control->DMT/_4:control;H->I:1");
+}
+
+// Concat with 1 Mkl and 1 non-Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['G', 'E', 'F']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
+            "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
+            "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;"
+            "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1");
+}
+
+// ConcatV2 Op test: ConcatV2 with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['B:0', 'B:1', 'A']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(_MklConcatV2);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;B:1->D:1;"
+            "B:control->DMT/_0:control;B:control->DMT/_1:control;"
+            "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// ConcatV2 with 2 Mkl layers feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['E', 'F', 'G']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
+            "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;"
+            "C:control->DMT/_0:control;C:control->DMT/_1:control;"
+            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
+            "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;"
+            "F:2->H:4;G->H:2;H->I:1");
+}
+
+// ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['E', 'F', 'G']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
+            "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
+            "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;"
+            "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;"
+            "G->H:2;H->I:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Relu_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;"
+            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluReluGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;"
+            "DMT/_1->C:2");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklAvgPool);C(Zeta);DMT/_0(Const)|A->B;A->C;"
+            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Int32Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'AvgPoolGrad' "
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Int32Input);B(Input);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolAvgPoolGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'I' op: 'Int32Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'AvgPoolGrad' "
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['I', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklAvgPool);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const);I(Int32Input)|A->B;A->D;A:control->DMT/_0:control;"
+            "B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;DMT/_1->C:2;I->C;"
+            "I:control->DMT/_1:control");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNormGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNormGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
+            "F(_MklFusedBatchNormGrad);G(Zeta)|A->F;A->G;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
+            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
+            "E->F:4;F->G:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNorm'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
+            "F(_MklFusedBatchNorm);G(Zeta)|A->F;A->G;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
+            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
+            "E->F:4;F->G:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to rewriting node for workspace edges
+/////////////////////////////////////////////////////////////////////
+
+/* Test LRN->MaxPool->MaxPoolGrad->LRNGrad replacement by workspace nodes. */
+TEST_F(MklLayoutPassTest, MaxPoolLRN_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['B'] }"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['B', 'C', 'D'] }"
+      "node { name: 'F' op: 'Input'}"
+      "node { name: 'G' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['E', 'F', 'B'] }"
+      "node { name: 'H' op: 'Input'}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['H', 'G'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+      "A(Input);B(_MklLRN);C(_MklMaxPool);D(Input);DMT/_0(Const);DMT/_1(Const);"
+      "DMT/_2(Const);E(_MklMaxPoolGrad);F(Input);G(_MklLRNGrad);H(Input);"
+      "I(Zeta)|A->B;A:control->DMT/_0:control;B->C;B->E;B->G:2;B:1->G:3;"
+      "B:2->C:1;B:2->E:4;B:2->G:6;B:3->G:7;B:control->DMT/_1:control;C->E:1;"
+      "C:1->E:3;C:2->E:5;C:3->E:7;D->E:2;DMT/_0->B:1;DMT/_1->E:6;DMT/_2->G:5;"
+      "E->G;E:1->G:4;E:control->DMT/_2:control;F->G:1;G->I:1;H->I");
+}
+
+/* Test LRN->LRNGrad replacement by workspace nodes. */
+TEST_F(MklLayoutPassTest, LRN_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'D', 'B'] }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(_MklLRNGrad);F(Zeta)|"
+            "A->B;A:control->DMT/_0:control;B->E:2;B:1->E:3;B:2->E:6;B:3->E:7;"
+            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "D->E:1;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:5;E->F:1");
+}
+
+/* Test LRN->LRNGrad replacement when only one of them is present. */
+TEST_F(MklLayoutPassTest, LRN_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Zeta);DMT/_0(Const)|"
+            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+/* Test LRN->LRNGrad replacement when only one of them is present. */
+TEST_F(MklLayoutPassTest, LRN_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklLRNGrad);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
+}
+
+/* Test LRN->LRNGrad negative case, where single LRN feeds
+   2 LRNGrad nodes at different slots. */
+TEST_F(MklLayoutPassTest, LRN_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'D', 'B'] }"
+      "node { name: 'F' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'B', 'D'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['E', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);DMT/_5(Const);"
+            "DMT/_6(Const);E(_MklLRNGrad);F(_MklLRNGrad);G(Zeta)|A->B;"
+            "A:control->DMT/_0:control;B->E:2;"
+            "B->F:1;B:1->E:3;B:2->E:6;B:2->F:5;B:3->E:7;C->E;C->F;"
+            "C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "C:control->DMT/_3:control;C:control->DMT/_4:control;"
+            "C:control->DMT/_5:control;C:control->DMT/_6:control;"
+            "D->E:1;D->F:2;DMT/_0->B:1;DMT/_1->F:3;DMT/_2->F:7;DMT/_3->F:4;"
+            "DMT/_4->F:6;DMT/_5->E:4;DMT/_6->E:5;E->G;F->G:1");
+}
+
+/* Test MaxPool->MaxPoolGrad replacement by workspace+rewrite nodes. */
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['C', 'B', 'D'] }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklMaxPool);C(Input);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(_MklMaxPoolGrad);F(Zeta)|"
+            "A->B;A:control->DMT/_0:control;B->E:1;B:1->E:3;B:2->E:5;B:3->E:7;"
+            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "D->E:2;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:6;E->F:1");
+}
+
+// Test MaxPool>MaxPoolGrad replacement when only one of them is present.
+// In this case, we will rewrite MaxPool node but workspace edges will not
+// be present.
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklMaxPool);C(Zeta);DMT/_0(Const)|"
+            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+// Test MaxPoolGrad replacement when only one of them is present.
+// In this case, we will rewrite MaxPoolGrad and for workspace tensor and
+// its Mkl part, we will generate dummy tensor.
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklMaxPoolGrad);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
+}
+
+// Test MaxPool handling for batch-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative4) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative5) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:2, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative6) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:2, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative7) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative8) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative9) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:2} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative10) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+
+// Single Conv2D Op on GPU device
+// No rewrite should happen
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Conv2D);D(Zeta)|A->C;B->C:1;B->D;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'O' op: '_MklInput'}"
+      "node { name: 'D' op: '_MklConv2DWithBias'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
+      "node { name: 'E' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['D', 'A']}"
+      "node { name: 'F' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['E'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
+            "E(Zeta);F(BiasAddGrad);M(_MklInput);N(_MklInput);"
+            "O(_MklInput)|A->D;A->E:1;B->D:1;C->D:2;D->E;E->F;"
+            "M->D:3;N->D:4;O->D:5");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(Conv2DBackpropFilter);E(Zeta)|"
+            "A->D;A->E;B->D:1;C->D:2;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Relu_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Relu);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(ReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_MaxPool_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(AvgPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Concat Op test: Concat with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['A', 'B:0', 'B:1']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(Concat);E(Zeta)|A->D;"
+            "B->D:1;B:1->D:2;C->E;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['B:0', 'B:1', 'A']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(ConcatV2);E(Zeta)|"
+            "A->D:2;B->D;B:1->D:1;C->E;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNorm'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);E(Input);"
+            "F(FusedBatchNorm);G(Zeta)|A->F;A->G;B->F:1;C->F:2;D->F:3;"
+            "E->F:4;F->G:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'C' op: '_MklConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'M', 'N']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'Y' op: 'Input'}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'Y']}", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);"
+            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->C;"
+            "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+
+static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
+  testing::StopTiming();
+  string s;
+  for (int in = 0; in < 10; in++) {
+    s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
+  }
+  random::PhiloxRandom philox(301, 17);
+  random::SimplePhilox rnd(&philox);
+  for (int op = 0; op < op_nodes; op++) {
+    s += strings::Printf(
+        "node { name: 'op%04d' op: 'Zeta' attr { key: 'T' value { "
+        "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
+        op, rnd.Uniform(10), rnd.Uniform(10));
+  }
+
+  bool first = true;
+  while (iters > 0) {
+    Graph* graph = new Graph(OpRegistry::Global());
+    InitGraph(s, graph);
+    int N = graph->num_node_ids();
+    if (first) {
+      testing::SetLabel(strings::StrCat("Per graph node.  Nodes: ", N));
+      first = false;
+    }
+    {
+      testing::StartTiming();
+      std::unique_ptr<Graph> ug(graph);
+      RunMklLayoutRewritePass(&ug);
+      testing::StopTiming();
+    }
+    iters -= N;  // Our benchmark units are individual graph nodes,
+                 // not whole graphs
+    // delete graph;
+  }
+}
+BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
+
+}  // namespace
+
+#endif  // INTEL_MKL_DNN
+
 }  // namespace tensorflow
 
 #endif /* INTEL_MKL */
diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc
index b58e44e398..001834b13b 100644
--- a/tensorflow/core/kernels/mkl_input_conversion_op.cc
+++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc
@@ -31,6 +31,12 @@ limitations under the License.
 #include "tensorflow/core/kernels/mkl_tfconv_op.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
@@ -44,15 +50,16 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 // else if both inputs are in mkl format:
 //   if both have the same shape:
 //     pass the inputs through to the output
-// 	else:
-// 		convert both to TF
+//   else:
+//     convert both to TF
 // else if one is TF and one is MKL:
-// 	if broadcast is needed:
-// 		convert the MKL format input to TF format
-// 	else:
-// 		convert the TF format input to MKL format
+//   if broadcast is needed:
+//     convert the MKL format input to TF format
+//   else:
+//     convert the TF format input to MKL format
 ///////////////////////////////////////////////////////////
 
+#ifndef INTEL_MKL_DNN
 template <typename Device, typename T>
 class MklInputConversionOp : public OpKernel {
  public:
@@ -242,6 +249,199 @@ class MklInputConversionOp : public OpKernel {
   bool has_avx512f_ = false;
 };
 
+#else
+
+template <typename Device, typename T>
+class MklInputConversionOp : public OpKernel {
+ public:
+  explicit MklInputConversionOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type));
+    has_avx512f_ = port::TestCPUFeature(port::CPUFeature::AVX512F);
+  }
+
+ private:
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor_0 = MklGetInput(context, 0);
+    MklDnnShape input_shape_0;
+    GetMklShape(context, 0, &input_shape_0);
+
+    const Tensor& input_tensor_1 = MklGetInput(context, 1);
+    MklDnnShape input_shape_1;
+    GetMklShape(context, 1, &input_shape_1);
+
+    bool tf_shapes_are_same = context->input(0).shape() ==
+                              context->input(1).shape();
+
+    VLOG(1) << "MklInputConversionOp: Input shapes are "
+            << (tf_shapes_are_same ? "*same*" : "*different*") << ": "
+            << context->input(0).shape().DebugString() << " and "
+            << context->input(1).shape().DebugString();
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // if both inputs are in TF format, just copy input tensors to output.
+    if (!input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
+      VLOG(1) << "MklInputConversionOp: No conversion needed, "
+              << "copying TF inputs to output";
+
+      ForwardTfTensorInToOut(context, 0, 0);
+      ForwardTfTensorInToOut(context, 1, 1);
+      return;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // If both inputs are in MKL format
+    if (input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
+      // If both have the same shape, pass them through
+      if (tf_shapes_are_same) {
+        VLOG(1) << "MklInputConversionOp: No conversion needed, "
+                << "copying MKL inputs with identical shapes to output";
+
+        ForwardMklTensorInToOut(context, 0, 0);
+        ForwardMklTensorInToOut(context, 1, 1);
+        return;
+      }
+
+      // Sanity check
+      bool mkl_shapes_are_same = input_shape_0 == input_shape_1;
+      if (mkl_shapes_are_same) {
+        CHECK(false) << "MklInputConversionOp: Unexpected: TF shapes are "
+                        "different but MKL shapes are same";
+      }
+
+      // Both have different shapes, so broadcast will be necessary.
+      // Convert to TF and pass both tensors through (we can't do broadcast
+      // with MKL tensors)
+      VLOG(1) << "MklInputConversionOp: Broadcast needed, "
+              << "converted MKL inputs to TF format";
+
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_, 0);
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_, 1);
+      SetDummyMklShapeOutput(context, 0);
+      SetDummyMklShapeOutput(context, 1);
+      return;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // One input is MKL and one is TF. If no broadcast is needed, convert
+    // the TF tensor to MKL, otherwise convert the MKL tensor to TF format
+    VLOG(1) << "MklInputConversionOp: Inputs in different formats (MKL/TF)";
+
+    const Tensor* mkl_tensor;
+    const MklDnnShape* mkl_shape;
+    const Tensor* tf_tensor;
+    MklDnnShape* tf_mkl_shape;
+    uint mkl_tensor_index;
+    uint tf_tensor_index;
+    if (input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
+      mkl_tensor = &input_tensor_0;
+      mkl_shape = &input_shape_0;
+      mkl_tensor_index = 0;
+      tf_tensor = &input_tensor_1;
+      tf_mkl_shape = &input_shape_1;
+      tf_tensor_index = 1;
+    } else if (!input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
+      mkl_tensor = &input_tensor_1;
+      mkl_shape = &input_shape_1;
+      mkl_tensor_index = 1;
+      tf_tensor = &input_tensor_0;
+      tf_mkl_shape = &input_shape_0;
+      tf_tensor_index = 0;
+    } else {
+      CHECK(false) << "MklInputConversionOp: Unexpected combination of input "
+                      "shapes for MKL "
+                   << "element-wise op";
+    }
+
+    // Broadcast is needed if the shapes are not the same
+    bool broadcast_needed;
+
+    size_t in0_size = 1;
+    for (size_t i = 0; i < mkl_shape->GetDimension(); ++i)
+      in0_size *= mkl_shape->TfDimSize(i);
+
+    size_t in1_size = 1;
+    for (size_t i = 0; i < tf_tensor->shape().dims(); ++i)
+      in1_size *= tf_tensor->shape().dim_size(i);
+
+    broadcast_needed = (in0_size != in1_size);
+
+    if (!broadcast_needed) {
+      // Both shapes are same, convert the TF input to MKL
+      VLOG(1) << "MklInputConversionOp: No broadcast needed.";
+      VLOG(1) << "MklInputConversionOp: Converting input " << tf_tensor_index
+              << " to MKL format";
+
+      // Create MklDnnShape for output Mkl tensor.
+      Tensor* tensor_out;
+      MklDnnShape mkl_output_mkl_shape;
+      mkl_output_mkl_shape.SetMklTensor(true);
+      mkl_output_mkl_shape.SetElemType(MklDnnType<T>());
+      mkl_output_mkl_shape.SetTfLayout(mkl_shape->GetDimension(),
+                                       mkl_shape->GetSizesAsMklDnnDims(),
+                                       mkl_shape->GetTfDataFormat());
+      // ** Temporarily borrow the layout from the MKL input **
+      auto output_mkl_md = mkl_shape->GetMklLayout();
+      mkl_output_mkl_shape.SetMklLayout(&output_mkl_md);
+
+      // Create output Mkl tensor
+      AllocateOutputSetMklShape(context, tf_tensor_index, &tensor_out,
+                                mkl_tensor->shape(), mkl_output_mkl_shape);
+
+      // Create MklDnnData object for input tensor. Input tensor is in
+      // Tensorflow layout.
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> tf_input(&cpu_engine);
+      auto input_tf_md = mkl_output_mkl_shape.GetTfLayout();
+      tf_input.SetUsrMem(input_tf_md, &tf_tensor);
+
+      // Create reorder between tensorflow layout and Mkl layout.
+      std::vector<primitive> net;
+      CHECK_EQ(tf_input.CheckReorderToOpMem(memory::primitive_desc(
+                                            output_mkl_md, cpu_engine),
+                                            tensor_out, &net),
+               true);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // -- The tensor in MKL format passes through --
+      ForwardMklTensorInToOut(context, mkl_tensor_index, mkl_tensor_index);
+    } else {
+      // Broadcast is needed, so convert the MKL input to TF
+      VLOG(1) << "MklInputConversionOp: Broadcast needed.";
+      VLOG(1) << "MklInputConversionOp: Converting input " << mkl_tensor_index
+              << " to TF format";
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_,
+                                           mkl_tensor_index);
+      SetDummyMklShapeOutput(context, mkl_tensor_index);
+
+      // The tensor in TF format passes through
+      ForwardTfTensorInToOut(context, tf_tensor_index, tf_tensor_index);
+    }
+
+    VLOG(1) << "MklInputConversionOp: Shapes (output): "
+            << context->mutable_output(0)->shape().DebugString() << " and "
+            << context->mutable_output(1)->shape().DebugString();
+
+    VLOG(1) << "MklInputConversion completed successfully.";
+  }
+
+ private:
+  /// Data format of the operation
+  string data_format_str;
+
+  /// Data type of the operation
+  DataType op_data_type;
+
+  /// CPUIDInfo
+  bool has_avx512f_ = false;
+};
+
+#endif
+
 ///////////////////////////////////////////////////////////
 //               Register kernel
 ///////////////////////////////////////////////////////////
@@ -253,7 +453,10 @@ class MklInputConversionOp : public OpKernel {
                               .Label(mkl_op_registry::kMklOpLabel), \
                           MklInputConversionOp<CPUDevice, T>);
 
-TF_CALL_NUMBER_TYPES(REGISTER_CPU);
+// TODO(nhasabni): We cannot support all number types since MklDnn does
+// not support types.
+// TF_CALL_NUMBER_TYPES(REGISTER_CPU);
+TF_CALL_float(REGISTER_CPU);
 #undef REGISTER_CPU
 }  // namespace tensorflow
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 102de94787..c4ceb0289e 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -2924,6 +2924,25 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("__MklDummyConv2DWithBias")
+    .Input("input: T")
+    .Input("filter: T")
+    .Input("bias: T")
+    .Output("output: T")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .Doc(R"doc(
+Dummy node that enables fusing Conv2D and BiasAdd operator for MKL. This node
+does not perform anything. It is just created as an intermediate output of
+merging Conv2D and BiasAdd.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklConv2DWithBias")
     .Input("input: T")
     .Input("filter: T")
@@ -2977,6 +2996,88 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("__MklDummyConv2DBackpropFilterWithBias")
+    .Input("input: T")
+    .Input("filter_sizes: int32")
+    .Input("out_backprop: T")
+    .Output("output: T")
+    .Output("bias_grad: T")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input_shape;
+      // Fetch the data_format attribute, which may not exist.
+      string data_format;
+      Status s = c->GetAttr("data_format", &data_format);
+
+      if (s.ok() && data_format == "NCHW") {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -3)));
+      } else {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -1)));
+      }
+      ShapeHandle sh;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &sh));
+      TF_RETURN_IF_ERROR(c->WithRank(sh, 4, &sh));
+      c->set_output(0, sh);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Dummy node that enables fusing Conv2DBackpropFilter and BiasAddGrad operator
+for MKL. This node does not perform anything. It is just created as an
+intermediate output of merging Conv2DBackpropFilter and BiasAddGrad.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklConv2DBackpropFilterWithBias")
+    .Input("input: T")
+    .Input("filter_sizes: int32")
+    .Input("out_backprop: T")
+    .Input("mkl_input: uint8")
+    .Input("mkl_filter_size: uint8")
+    .Input("mkl_out_backprop: uint8")
+    .Output("output: T")
+    .Output("bias_grad: T")
+    .Output("mkl_output: uint8")
+    .Output("mkl_bias_grad: uint8")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input_shape;
+      // Fetch the data_format attribute, which may not exist.
+      string data_format;
+      Status s = c->GetAttr("data_format", &data_format);
+
+      if (s.ok() && data_format == "NCHW") {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -3)));
+      } else {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -1)));
+      }
+      ShapeHandle sh;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &sh));
+      TF_RETURN_IF_ERROR(c->WithRank(sh, 4, &sh));
+      c->set_output(0, sh);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+MKL version of Conv2DBackpropFilterWithBias. Uses MKL DNN APIs to compute the
+gradients of convolution with respect to the filter.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklConv2DWithBiasBackpropBias")
     .Input("out_backprop: T")
     .Input("mkl_out_backprop: uint8")
@@ -3053,6 +3154,78 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("_MklElu")
+    .Input("features: T")
+    .Input("mkl_features: uint8")
+    .Output("activations: T")
+    .Output("mkl_activations: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+MKL version of Elu operator. Uses MKL DNN APIs to implement Elu operator.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklEluGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Input("mkl_gradients: uint8")
+    .Input("mkl_features: uint8")
+    .Output("backprops: T")
+    .Output("mkl_backprops: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+MKL version of EluGrad operator. Uses MKL DNN APIs to compute Elu
+gradients for Elu operation.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklSoftmax")
+    .Input("logits: T")
+    .Input("mkl_logits: uint8")
+    .Output("softmax: T")
+    .Output("mkl_softmax: uint8")
+    .Attr("T: {half, float, double}")
+    .SetShapeFn([](InferenceContext* c) {
+      return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
+    })
+    .Doc(R"doc(
+MKL version of ReluGrad operator. Uses MKL DNN APIs to compute rectified
+linear gradients for Relu operation.
+)doc");
+
+REGISTER_OP("_MklTanh")
+    .Input("features: T")
+    .Input("mkl_features: uint8")
+    .Output("activations: T")
+    .Output("mkl_activations: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+MKL version of Tanh operator. Uses MKL DNN APIs to implement Tanh operator.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklTanhGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Input("mkl_gradients: uint8")
+    .Input("mkl_features: uint8")
+    .Output("backprops: T")
+    .Output("mkl_backprops: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+MKL version of TanhGrad operator. Uses MKL DNN APIs to compute tanh
+gradients for Tanh operation.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklMaxPool")
     .Attr("T: {float, half} = DT_FLOAT")
     .Attr("ksize: list(int) >= 4")
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 148c7851bd..2caf5fc56d 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -328,6 +328,10 @@ class MklShape {
 
 // Forward decl
 TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format);
+memory::dims CalculateTFStrides(const memory::dims& dims_tf_order);
+memory::desc CreateBlockedMemDescHelper(const memory::dims& dim,
+                                        const memory::dims& strides,
+                                        memory::data_type dtype);
 
 class MklDnnShape {
  private:
@@ -364,6 +368,52 @@ class MklDnnShape {
   ~MklDnnShape() {}
   TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape);  // Cannot copy
 
+  /// Helper function to compare memory::desc objects for MklDnn.
+  /// May be this should go into MklDnn directly.
+  inline bool CompareMklDnnLayouts(const memory::desc& md1,
+                                   const memory::desc& md2) const {
+    mkldnn_memory_desc_t mdd1 = md1.data;
+    mkldnn_memory_desc_t mdd2 = md2.data;
+    const char* d1 = reinterpret_cast<const char*>(&mdd1);
+    const char* d2 = reinterpret_cast<const char*>(&mdd2);
+
+    size_t md_size = sizeof(mdd1);
+    for (size_t i = 0; i < md_size; i++) {
+      if (*d1++ != *d2++) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /// Equality function for MklDnnShape objects
+  /// @return true if both are equal; false otherwise.
+  inline bool operator == (const MklDnnShape& input_shape) const {
+    if (this->IsMklTensor() != input_shape.IsMklTensor()) {
+      return false;
+    }
+
+    // If input tensors are in Mkl layout, then we check for dimensions and
+    // sizes.
+    if (this->IsMklTensor()) {
+      return this->GetTfShape() == input_shape.GetTfShape() &&
+             CompareMklDnnLayouts(this->GetMklLayout(),
+                                  input_shape.GetMklLayout());
+    }
+
+    return true;
+  }
+
+  /// Equality operator for MklDnnShape and TFShape.
+  /// Returns: true if TF shapes for both are the same, false otherwise
+  inline bool operator == (const TensorShape& input_shape) const {
+    if (!this->IsMklTensor()) {
+      return false;
+    }
+
+    return this->GetTfShape() == input_shape;
+  }
+
   inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; }
   inline void SetMklTensor(bool is_mkl_tensor) {
     data_.is_mkl_tensor_ = is_mkl_tensor;
@@ -375,7 +425,7 @@ class MklDnnShape {
   inline size_t GetDimension(char dimension) const {
     int index = GetMklDnnTensorDimIndex(dimension);
     CHECK(index >= 0 && index < this->GetDimension())
-        << "Invalid index from the dimension: " << index << ", " << dimension;
+      << "Invalid index from the dimension: " << index << ", " << dimension;
     return this->DimSize(index);
   }
 
@@ -405,7 +455,7 @@ class MklDnnShape {
   inline memory::dims GetSizesAsMklDnnDims() const {
     memory::dims retVal;
     if (data_.is_mkl_tensor_) {
-      int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
+      size_t dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
       for (size_t i = 0; i < dimensions; i++) {
         if (data_.sizes_[i] != INVALID_DIM_SIZE)
           retVal.push_back(data_.sizes_[i]);
@@ -423,12 +473,21 @@ class MklDnnShape {
 
   /// Return TensorShape that describes the Tensorflow shape of the tensor
   /// represented by this MklShape.
-  inline TensorShape GetTfShape() {
+  inline TensorShape GetTfShape() const {
     CHECK_EQ(data_.is_mkl_tensor_, true);
 
     std::vector<int32> shape(data_.dimension_, -1);
-    for (size_t idx = 0; idx < data_.dimension_; ++idx) {
-      shape[idx] = data_.sizes_[TfDimIdx(idx)];
+    if (data_.tf_data_format_ != memory::format::blocked) {
+      for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+        shape[idx] = data_.sizes_[TfDimIdx(idx)];
+      }
+    } else {
+      // If Tensorflow shape is in Blocked format, then we don't have dimension
+      // map for it. So we just create Tensorflow shape from sizes in the
+      // specified order.
+      for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+        shape[idx] = data_.sizes_[idx];
+      }
     }
 
     TensorShape ts;
@@ -444,6 +503,12 @@ class MklDnnShape {
     CHECK_NOTNULL(pd);
     data_.mkl_md_ = pd->desc().data;
   }
+
+  inline void SetMklLayout(memory::desc* md) {
+    CHECK_NOTNULL(md);
+    data_.mkl_md_ = md->data;
+  }
+
   inline const memory::desc GetMklLayout() const {
     return memory::desc(data_.mkl_md_);
   }
@@ -452,7 +517,8 @@ class MklDnnShape {
     return data_.tf_data_format_;
   }
   /// We don't create primitive_descriptor for TensorFlow layout now.
-  /// We use lazy evaluation and create it only when needed.
+  /// We use lazy evaluation and create it only when needed. Input format can
+  /// also be Blocked format.
   inline void SetTfLayout(size_t dims, const memory::dims& sizes,
                           memory::format format) {
     CHECK_EQ(dims, sizes.size());
@@ -461,15 +527,26 @@ class MklDnnShape {
       data_.sizes_[ii] = sizes[ii];
     }
     data_.tf_data_format_ = format;
-    SetTfDimOrder(dims, format);
+    if (format != memory::format::blocked) {
+      SetTfDimOrder(dims, format);
+    }
   }
+
   inline const memory::desc GetTfLayout() const {
     memory::dims dims;
     for (size_t ii = 0; ii < data_.dimension_; ii++) {
       dims.push_back(data_.sizes_[ii]);
     }
-    return memory::desc(dims, data_.T_, data_.tf_data_format_);
+
+    // Create Blocked memory desc if input TF format was set like that.
+    if (data_.tf_data_format_ == memory::format::blocked) {
+      auto strides = CalculateTFStrides(dims);
+      return CreateBlockedMemDescHelper(dims, strides, data_.T_);
+    } else {
+      return memory::desc(dims, data_.T_, data_.tf_data_format_);
+    }
   }
+
   inline const memory::desc GetCurLayout() const {
     return IsMklTensor() ? GetMklLayout() : GetTfLayout();
   }
@@ -579,8 +656,13 @@ class MklDnnShape {
 #endif
 
 // List of MklShape objects. Used in Concat/Split layers.
+
 typedef std::vector<MklShape> MklShapeList;
 
+#ifdef INTEL_MKL_DNN
+typedef std::vector<MklDnnShape> MklDnnShapeList;
+#endif
+
 // Check if all tensors specified by MklShapes are MKL tensors.
 inline bool AreAllMklTensors(const MklShapeList& shapes) {
   for (auto& s : shapes) {
@@ -591,6 +673,7 @@ inline bool AreAllMklTensors(const MklShapeList& shapes) {
   return true;
 }
 
+#ifndef INTEL_MKL_DNN
 template <typename T>
 inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
                              const MklShape& mkl_shape) {
@@ -615,32 +698,15 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
 
   return output_tensor;
 }
-
-#ifdef INTEL_MKL_DNN
+#else
 template <typename T>
 inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
                              const MklDnnShape& mkl_shape) {
   Tensor output_tensor;
   TensorShape output_shape;
 
-#if 0
-  // TODO(nhasabni): need to implement
-  for (size_t j = 0; j < mkl_shape.GetDimension(); j++) {
-    // Outermost to innermost dimension
-    output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]);
-  }
-
-  // Allocate output tensor.
-  context->allocate_temp(DataTypeToEnum<T>::v(), output_shape, &output_tensor);
-
-  dnnLayout_t output_layout = static_cast<dnnLayout_t>(mkl_shape.GetTfLayout());
-  void* input_buffer = const_cast<T*>(mkl_tensor.flat<T>().data());
-  void* output_buffer = const_cast<T*>(output_tensor.flat<T>().data());
-
-  if (mkl_tensor.NumElements() != 0) {
-    mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer);
-  }
-#endif
+  TF_CHECK_OK(Status(error::Code::UNIMPLEMENTED,
+                     "Unimplemented conversion function"));
 
   return output_tensor;
 }
@@ -682,6 +748,9 @@ inline void GetMklInputList(OpKernelContext* ctext, StringPiece name,
   ctext->input_list(name, input_tensors);
 }
 
+
+#ifndef INTEL_MKL_DNN
+
 inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
                             MklShapeList* mkl_shapes) {
   OpInputList input_mkl_tensors;
@@ -694,6 +763,22 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
   }
 }
 
+#else
+
+inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
+                            MklDnnShapeList* mkl_shapes) {
+  OpInputList input_mkl_tensors;
+  GetMklInputList(ctext, strings::StrCat("mkl_", name), &input_mkl_tensors);
+
+  for (int i = 0; i < input_mkl_tensors.size(); i++) {
+    (*mkl_shapes)[i].DeSerializeMklDnnShape(
+        input_mkl_tensors[i].flat<uint8>().data(),
+        input_mkl_tensors[i].flat<uint8>().size() * sizeof(uint8));
+  }
+}
+
+#endif
+
 #ifdef INTEL_MKL_DNN
 /// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
 /// If the input tensor is in MKL layout, then obtains TensorShape from
@@ -909,6 +994,7 @@ inline void CopyMklTensorInToOut(OpKernelContext* context,
   context->set_output(idx_meta_out, meta_output);
 }
 
+#ifndef INTEL_MKL_DNN
 inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
                                          int idx_in, int idx_out,
                                          const TensorShape& shape) {
@@ -926,6 +1012,27 @@ inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
   CHECK(output.CopyFrom(data, shape));
   context->set_output(idx_data_out, output);
 }
+#else
+inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
+                                         int idx_in, int idx_out,
+                                         const TensorShape& shape) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  const Tensor& data = context->input(idx_data_in);
+  MklDnnShape mkl_shape_output;
+  mkl_shape_output.SetMklTensor(false);
+  AllocateOutputSetMklShape(context, idx_out, mkl_shape_output);
+  Tensor output(data.dtype());
+  // TODO(intel_tf): alternatively, call forward_input_to_output_with_shape(...)
+  CHECK(output.CopyFrom(data, shape));
+  context->set_output(idx_data_out, output);
+}
+#endif
+
+#ifndef INTEL_MKL_DNN
 
 inline void ForwardTfTensorInToOut(OpKernelContext* context,
                                   int idx_in, int idx_out) {
@@ -944,6 +1051,27 @@ inline void ForwardTfTensorInToOut(OpKernelContext* context,
   }
 }
 
+#else
+
+inline void ForwardTfTensorInToOut(OpKernelContext* context,
+                                  int idx_in, int idx_out) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  MklDnnShape dnn_shape_output;
+  dnn_shape_output.SetMklTensor(false);
+  AllocateOutputSetMklShape(context, idx_out, dnn_shape_output);
+  if (IsRefType(context->input_dtype(idx_data_in))) {
+    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);
+  } else {
+    context->set_output(idx_data_out, context->input(idx_data_in));
+  }
+}
+
+#endif
+
 inline void ForwardMklTensorInToOut(OpKernelContext* context,
                                    int idx_in, int idx_out) {
   int num_inputs = context->num_inputs();
@@ -962,6 +1090,25 @@ inline void ForwardMklTensorInToOut(OpKernelContext* context,
   }
 }
 
+#ifdef INTEL_MKL_DNN
+inline void ForwardMklTensorInToOutWithMklShape(OpKernelContext* context,
+                                             int idx_in, int idx_out,
+                                             const MklDnnShape& mkl_shape) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  AllocateOutputSetMklShape(context, idx_out, mkl_shape);
+
+  if (IsRefType(context->input_dtype(idx_data_in))) {
+    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);
+  } else {
+    context->set_output(idx_data_out, context->input(idx_data_in));
+  }
+}
+#endif
+
 // Forward the MKL shape ONLY (used in elementwise and other ops where
 // we call the eigen implementation and MKL shape is not used)
 inline void ForwardMklMetaDataInToOut(OpKernelContext* context,
@@ -985,6 +1132,10 @@ inline void SetDummyMklShapeOutput(OpKernelContext* context,
   AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output);
 }
 
+#ifndef INTEL_MKL_DNN
+// We don't need these functions in MKLDNN. We have defined equality operator
+// on MklDnnShape class directly.
+
 // Checks if the TF shape for both MKL tensors is the same or not
 // Returns: true if both TF shapes are the same, false otherwise
 inline bool MklCompareShapes(const MklShape* input_shape_0,
@@ -1051,6 +1202,7 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0,
 
   return true;
 }
+#endif
 
 // These functions do not compile with MKL-DNN since mkl.h is missing.
 // We may need to remove them later.
@@ -1127,11 +1279,14 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
 /// @return: Tensorflow data format corresponding to memory::format
 ///          Fails with an error if invalid data format.
 inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
-  if (format == memory::format::nhwc)
-    return FORMAT_NHWC;
-  else if (format == memory::format::nchw)
-    return FORMAT_NCHW;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
+  if (format == memory::format::nhwc) return FORMAT_NHWC;
+  else if (format == memory::format::nchw) return FORMAT_NCHW;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
+                     "Unsupported data format"));
+
+  // Return to prevent compiler warnings, otherwise TF_CHECK_OK will ensure
+  // that we don't come here.
+  return FORMAT_NHWC;
 }
 
 /// Map TensorShape object into memory::dims required by MKL-DNN
@@ -1175,6 +1330,23 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
   return memory::dims({n, c, h, w});
 }
 
+/// Overloaded version of function above. Input parameters are
+/// self-explanatory.
+inline memory::dims MklDnnDimsInNCHW(const memory::dims& in_dims,
+                                     TensorFormat format) {
+  // Check validity of format.
+  CHECK_NE(TFDataFormatToMklDnnDataFormat(format),
+           memory::format::format_undef);
+
+  int n = in_dims[GetTensorDimIndex(format, 'N')];
+  int c = in_dims[GetTensorDimIndex(format, 'C')];
+  int h = in_dims[GetTensorDimIndex(format, 'H')];
+  int w = in_dims[GetTensorDimIndex(format, 'W')];
+
+  // MKL-DNN requires dimensions in NCHW format.
+  return memory::dims({n, c, h, w});
+}
+
 /// Map MklDnn memory::dims object into TensorShape object.
 ///
 /// This function will simply map input shape in MKL-DNN memory::dims format
@@ -1217,6 +1389,43 @@ inline padding_kind TFPaddingToMklDnnPadding(Padding pad) {
   return padding_kind::zero;
 }
 
+/// Helper function to create memory descriptor in Blocked format
+///
+/// @input: Tensor dimensions
+/// @input: strides corresponding to dimensions. One can use utility
+///         function such as CalculateTFStrides to compute strides
+///         for given dimensions.
+/// @return: memory::desc object corresponding to blocked memory format
+///          for given dimensions and strides.
+inline memory::desc CreateBlockedMemDescHelper(const memory::dims& dim,
+                                               const memory::dims& strides,
+                                               memory::data_type dtype) {
+  CHECK_EQ(dim.size(), strides.size());
+
+  // We have to construct memory descriptor in a C style. This is not at all
+  // ideal but MKLDNN does not offer any API to construct descriptor in
+  // blocked format except a copy constructor that accepts
+  // mkldnn_memory_desc_t.
+  mkldnn_memory_desc_t md;
+  md.primitive_kind = mkldnn_memory;
+  md.ndims = dim.size();
+  md.format = mkldnn_blocked;
+  md.data_type = memory::convert_to_c(dtype);
+
+  for (size_t i = 0; i < dim.size(); i++) {
+    md.layout_desc.blocking.block_dims[i] = 1;
+    md.layout_desc.blocking.strides[1][i] = 1;
+    md.layout_desc.blocking.strides[0][i] = strides[i];
+    md.layout_desc.blocking.padding_dims[i] = dim[i];
+    md.layout_desc.blocking.offset_padding_to_data[i] = 0;
+    md.dims[i] = dim[i];
+  }
+  md.layout_desc.blocking.offset_padding = 0;
+
+  return memory::desc(md);
+}
+
+
 /*
  * Class to represent all the resources corresponding to a tensor in TensorFlow
  * that are required to execute an operation (such as Convolution).
@@ -1285,30 +1494,8 @@ class MklDnnData {
   /// @return: memory::desc object corresponding to blocked memory format
   ///          for given dimensions and strides.
   static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
-                                                  const memory::dims& strides) {
-    CHECK_EQ(dim.size(), strides.size());
-
-    // We have to construct memory descriptor in a C style. This is not at all
-    // ideal but MKLDNN does not offer any API to construct descriptor in
-    // blocked format except a copy constructor that accepts
-    // mkldnn_memory_desc_t.
-    mkldnn_memory_desc_t md;
-    md.primitive_kind = mkldnn_memory;
-    md.ndims = dim.size();
-    md.format = mkldnn_blocked;
-    md.data_type = memory::convert_to_c(MklDnnType<T>());
-
-    for (size_t i = 0; i < dim.size(); i++) {
-      md.layout_desc.blocking.block_dims[i] = 1;
-      md.layout_desc.blocking.strides[1][i] = 1;
-      md.layout_desc.blocking.strides[0][i] = strides[i];
-      md.layout_desc.blocking.padding_dims[i] = dim[i];
-      md.layout_desc.blocking.offset_padding_to_data[i] = 0;
-      md.dims[i] = dim[i];
-    }
-    md.layout_desc.blocking.offset_padding = 0;
-
-    return memory::desc(md);
+                                                 const memory::dims& strides) {
+    return CreateBlockedMemDescHelper(dim, strides, MklDnnType<T>());
   }
 
   /// A version of SetUsrMem call that allows user to create memory in blocked
@@ -1376,6 +1563,7 @@ class MklDnnData {
     return user_memory_->get_primitive_desc();
   }
 
+
   /// Get function for descriptor of user memory.
   inline memory::desc GetUsrMemDesc() {
     // This is ugly. Why MKL-DNN does not provide desc() method of const type??
@@ -1438,6 +1626,17 @@ class MklDnnData {
     return op_pd != user_memory_->get_primitive_desc();
   }
 
+  /// Predicate that checks if we need to reorder user's memory into memory
+  /// based on the provided format.
+  ///
+  /// @input: target_format - memory format of the given input of an
+  ///               operation
+  /// @return: true in case reorder of input is needed; false, otherwise.
+  inline bool IsReorderNeeded(const memory::format& target_format) const {
+    CHECK_NOTNULL(user_memory_);
+    return target_format != user_memory_->get_primitive_desc().desc().data.format;
+  }
+
   /// Function to create a reorder from memory pointed by from to memory pointed
   /// by to. Returns created primitive.
   inline primitive CreateReorder(const memory* from, const memory* to) const {
-- 
GitLab


From 62ed39337eeb7d11a503454840d83af191f9b0ad Mon Sep 17 00:00:00 2001
From: "Joshua V. Dillon" <jvdillon@google.com>
Date: Wed, 6 Dec 2017 12:12:34 -0800
Subject: [PATCH 0695/1225] Style cleanups in
 `tf.contrib.distributions.bijectors.SoftmaxCentered`.

PiperOrigin-RevId: 178133366
---
 .../ops/bijectors/softmax_centered_impl.py    | 38 ++++++++++---------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py
index 8645cc1b6b..e4a1d3dde2 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py
@@ -134,23 +134,20 @@ class SoftmaxCentered(bijector.Bijector):
     # Pad the last dim with a zeros vector. We need this because it lets us
     # infer the scale in the inverse function.
     y = array_ops.expand_dims(x, dim=-1) if self._static_event_ndims == 0 else x
-    ndims = (y.get_shape().ndims if y.get_shape().ndims is not None
-             else array_ops.rank(y))
-    y = array_ops.pad(y,
-                      paddings=array_ops.concat(
-                          (array_ops.zeros(
-                              (ndims - 1, 2), dtype=dtypes.int32), [[0, 1]]),
-                          0))
-
+    ndims = _get_ndims(y)
+    y = array_ops.pad(y, paddings=array_ops.one_hot(indices=[-1, ndims - 1],
+                                                    depth=ndims,
+                                                    axis=0,
+                                                    dtype=dtypes.int32))
     # Set shape hints.
-    if x.get_shape().ndims is not None:
-      shape = x.get_shape().as_list()
+    if x.shape.ndims is not None:
+      shape = x.shape.as_list()
       if self._static_event_ndims == 0:
         shape += [2]
       elif shape[-1] is not None:
         shape[-1] += 1
       shape = tensor_shape.TensorShape(shape)
-      y.get_shape().assert_is_compatible_with(shape)
+      y.shape.assert_is_compatible_with(shape)
       y.set_shape(shape)
 
     # Since we only support event_ndims in [0, 1] and we do padding, we always
@@ -166,10 +163,10 @@ class SoftmaxCentered(bijector.Bijector):
     # x[i] = log(exp(x[i])) - log(y[end]) - log(normalization)
     #      = log(exp(x[i])/normalization) - log(y[end])
     #      = log(y[i]) - log(y[end])
-    shape = (np.asarray(y.get_shape().as_list(), dtype=np.int32)
-             if y.get_shape().is_fully_defined()
+    shape = (np.asarray(y.shape.as_list(), dtype=np.int32)
+             if y.shape.is_fully_defined()
              else array_ops.shape(y, name="shape"))
-    ndims = y.get_shape().ndims or math_ops.rank(y, name="ndims")
+    ndims = _get_ndims(y)
 
     # Do this first to make sure CSE catches that it'll happen again in
     # _inverse_log_det_jacobian.
@@ -195,14 +192,14 @@ class SoftmaxCentered(bijector.Bijector):
       x = array_ops.squeeze(x, squeeze_dims=[ndims-1])
 
     # Set shape hints.
-    if y.get_shape().ndims is not None:
-      shape = y.get_shape().as_list()
+    if y.shape.ndims is not None:
+      shape = y.shape.as_list()
       if self._static_event_ndims == 0:
         shape = shape[:-1]
       elif shape[-1] is not None:
         shape[-1] -= 1
       shape = tensor_shape.TensorShape(shape)
-      x.get_shape().assert_is_compatible_with(shape)
+      x.shape.assert_is_compatible_with(shape)
       x.set_shape(shape)
 
     return x
@@ -243,3 +240,10 @@ class SoftmaxCentered(bijector.Bijector):
                                   axis=-1,
                                   keep_dims=True))
       return array_ops.squeeze(fldj, squeeze_dims=-1)
+
+
+def _get_ndims(x):
+  """Returns `ndims`, statically if possible."""
+  if x.shape.ndims is not None:
+    return x.shape.ndims
+  return array_ops.rank(x, name="ndims")
-- 
GitLab


From c7778898eaf001c82744a8f4c71eb9a880a158f0 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 6 Dec 2017 12:12:43 -0800
Subject: [PATCH 0696/1225] Uniquify names and prefixes in import_graph_def
 with C API enabled.

This makes the C API-enabled behavior the same as the current behavior.

PiperOrigin-RevId: 178133381
---
 tensorflow/c/c_api.cc                        | 10 +++++
 tensorflow/c/c_api.h                         | 14 +++++++
 tensorflow/python/framework/importer.py      |  2 +
 tensorflow/python/framework/importer_test.py | 39 ++++++++------------
 4 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index c8b4bfffd4..13253ced49 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -1850,6 +1850,16 @@ void TF_ImportGraphDefOptionsSetPrefix(TF_ImportGraphDefOptions* opts,
   opts->opts.prefix = prefix;
 }
 
+void TF_ImportGraphDefOptionsSetUniquifyNames(TF_ImportGraphDefOptions* opts,
+                                              unsigned char uniquify_names) {
+  opts->opts.uniquify_names = uniquify_names;
+}
+
+void TF_ImportGraphDefOptionsSetUniquifyPrefix(TF_ImportGraphDefOptions* opts,
+                                               unsigned char uniquify_prefix) {
+  opts->opts.uniquify_prefix = uniquify_prefix;
+}
+
 void TF_ImportGraphDefOptionsAddInputMapping(TF_ImportGraphDefOptions* opts,
                                              const char* src_name,
                                              int src_index, TF_Output dst) {
diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index bb569d67fc..df7fe222b1 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -889,6 +889,20 @@ TF_CAPI_EXPORT extern void TF_DeleteImportGraphDefOptions(
 TF_CAPI_EXPORT extern void TF_ImportGraphDefOptionsSetPrefix(
     TF_ImportGraphDefOptions* opts, const char* prefix);
 
+// Set whether to uniquify imported operation names. If true, imported operation
+// names will be modified if their name already exists in the graph. If false,
+// conflicting names will be treated as an error. Note that this option has no
+// effect if a prefix is set, since the prefix will guarantee all names are
+// unique. Defaults to false.
+TF_CAPI_EXPORT extern void TF_ImportGraphDefOptionsSetUniquifyNames(
+    TF_ImportGraphDefOptions* opts, unsigned char uniquify_names);
+
+// If true, the specified prefix will be modified if it already exists as an
+// operation name or prefix in the graph. If false, a conflicting prefix will be
+// treated as an error. This option has no effect if no prefix is specified.
+TF_CAPI_EXPORT extern void TF_ImportGraphDefOptionsSetUniquifyPrefix(
+    TF_ImportGraphDefOptions* opts, unsigned char uniquify_prefix);
+
 // Set any imported nodes with input `src_name:src_index` to have that input
 // replaced with `dst`. `src_name` refers to a node in the graph to be imported,
 // `dst` references a node already existing in the graph being imported into.
diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index 7fd7991523..860e3fe715 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -253,6 +253,8 @@ def _PopulateTFImportGraphDefOptions(options, prefix, input_map,
                                      return_elements):
   """Populates the TF_ImportGraphDefOptions `options`."""
   c_api.TF_ImportGraphDefOptionsSetPrefix(options, prefix)
+  c_api.TF_ImportGraphDefOptionsSetUniquifyNames(options, True)
+  c_api.TF_ImportGraphDefOptionsSetUniquifyPrefix(options, True)
 
   for input_src, input_dst in input_map.items():
     input_src = compat.as_str(input_src)
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index 5d7d3fe5e3..b5cc24ff33 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -111,8 +111,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertNotEqual(None, a.op_def)
 
   def testMultipleImport(self):
-    if ops._USE_C_API: return  # TODO(skyewm): set uniquify_names
-
     graph_def = self._MakeGraphDef("""
     node { name: 'A' op: 'IntOutput' }
     node { name: 'B' op: 'IntInput' input: 'A:0' }
@@ -156,16 +154,16 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(list(b3.inputs), [a3.outputs[0]])
 
       # Import with existing de-duped node names
-      a4, b4 = importer.import_graph_def(
+      a1_1, b1_1 = importer.import_graph_def(
           self._MakeGraphDef("""
           node { name: 'A_1' op: 'IntOutput' }
           node { name: 'B_1' op: 'IntInput' input: 'A_1:0' }
           """),
           return_elements=["A_1", "B_1"],
           name="")
-      self.assertEqual(a4.name, "A_1_1")
-      self.assertEqual(b4.name, "B_1_1")
-      self.assertEqual(list(b4.inputs), [a4.outputs[0]])
+      self.assertEqual(a1_1.name, "A_1_1")
+      self.assertEqual(b1_1.name, "B_1_1")
+      self.assertEqual(list(b1_1.inputs), [a1_1.outputs[0]])
 
       # Create a name scope and then import node with same name
       with ops.name_scope("foo"):
@@ -738,8 +736,6 @@ class ImportGraphDefTest(test.TestCase):
                        [b"loc:@imported_graph/A", b"loc:@imported_graph/B"])
 
   def testNamePrefixColocationAttrsMultipleImport(self):
-    if ops._USE_C_API: return  # TODO(skyewm): set uniquify_names
-
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None' }
           node { name: 'B' op: 'None'  attr {
@@ -748,21 +744,18 @@ class ImportGraphDefTest(test.TestCase):
           } }""")
 
     with ops.Graph().as_default():
-      b, = importer.import_graph_def(
-          original_graph_def, return_elements=["B"], name="")
-      _, = importer.import_graph_def(
-          original_graph_def, return_elements=["B"], name="")
-      self.assertProtoEqualsVersion("""
-          node { name: 'A' op: 'None' }
-          node { name: 'B' op: 'None'  attr {
-            key: '_class'
-            value { list { s: 'loc:@A' } }
-          } }
-          node { name: 'A_1' op: 'None' }
-          node { name: 'B_1' op: 'None'  attr {
-            key: '_class'
-            value { list { s: 'loc:@A_1' } }
-          } }""", b.graph.as_graph_def())
+      a, b = importer.import_graph_def(
+          original_graph_def, return_elements=["A", "B"], name="")
+      a_1, b_1 = importer.import_graph_def(
+          original_graph_def, return_elements=["A", "B"], name="")
+
+      self.assertEqual(a.name, "A")
+      self.assertEqual(b.name, "B")
+      self.assertEqual(b.colocation_groups(), [b"loc:@A"])
+
+      self.assertEqual(a_1.name, "A_1")
+      self.assertEqual(b_1.name, "B_1")
+      self.assertEqual(b_1.colocation_groups(), [b"loc:@A_1"])
 
   def testNamePrefixColocationAttrsNotFound(self):
     original_graph_def = self._MakeGraphDef("""
-- 
GitLab


From 4c3069a68238766c8e4333b573de0433e6e976d0 Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Wed, 6 Dec 2017 12:30:45 -0800
Subject: [PATCH 0697/1225] Adding MKL-DNN AddN op (#14681)

---
 tensorflow/core/kernels/mkl_aggregate_ops.cc | 204 ++++++++++++++++---
 1 file changed, 178 insertions(+), 26 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc
index 935eb81dd0..9aabbbdb6b 100644
--- a/tensorflow/core/kernels/mkl_aggregate_ops.cc
+++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include <numeric>
-
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -29,10 +28,17 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-namespace tensorflow {
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::stream;
+using mkldnn::sum;
+#endif
 
+namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklAddNOp : public OpKernel {
  public:
@@ -41,17 +47,18 @@ class MklAddNOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const int num = ctx->num_inputs();
     OP_REQUIRES(ctx, num / 2 == 2,
-                errors::InvalidArgument("Only additions of two arguments "
+                errors::InvalidArgument("Only additions of two tensors "
                                         "supported by MKL. Num inputs: ",
                                         num));
 
     MklAddNOpContext mkl_context;
-    const Tensor& input0 = MklGetInput(ctx, 0);
-    GetMklShape(ctx, 0, &(mkl_context.input1_shape));
+    size_t src1_idx = 0, src2_idx = 1;
+    const Tensor& input0 = MklGetInput(ctx, src1_idx);
+    GetMklShape(ctx, src1_idx, &(mkl_context.input1_shape));
     bool input1_in_mkl_format = mkl_context.input1_shape.IsMklTensor();
 
-    const Tensor& input1 = MklGetInput(ctx, 1);
-    GetMklShape(ctx, 1, &(mkl_context.input2_shape));
+    const Tensor& input1 = MklGetInput(ctx, src2_idx);
+    GetMklShape(ctx, src2_idx, &(mkl_context.input2_shape));
     bool input2_in_mkl_format = mkl_context.input2_shape.IsMklTensor();
 
     // handle the case of a scalar
@@ -59,13 +66,12 @@ class MklAddNOp : public OpKernel {
       const TensorShape& o_shape = input0.shape();
       Tensor* out_tensor = nullptr;
       mkl_context.output_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(ctx, 0, &out_tensor, o_shape,
+      AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
                                 mkl_context.output_shape);
       float user_i1 = (input0.scalar<T>()());
-      ;
       float user_i2 = (input1.scalar<T>()());
-      ;
-      out_tensor->scalar<T>()() = std::plus<float>{}(user_i1, user_i2);
+      out_tensor->scalar<T>()() =
+          std::plus<float>{}(user_i1, user_i2);
       return;
     }
 
@@ -82,8 +88,8 @@ class MklAddNOp : public OpKernel {
       if (o_shape.num_elements() == 0) {
         Tensor* out_tensor = nullptr;
         mkl_context.output_shape.SetMklTensor(false);
-        AllocateOutputSetMklShape(ctx, 0, &out_tensor, o_shape,
-                                  mkl_context.output_shape);
+        AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
+                                 mkl_context.output_shape);
         return;
       }
     }
@@ -92,9 +98,9 @@ class MklAddNOp : public OpKernel {
     mkl_context.in_strides = new size_t[mkl_context.in_dims];
     // Generate size, stride for input if input is in MKL format.
     if (input1_in_mkl_format || input2_in_mkl_format) {
-      const MklShape* tmp_mkl_shape = (input1_in_mkl_format)
-                                          ? &mkl_context.input1_shape
-                                          : &mkl_context.input2_shape;
+      const MklShape* tmp_mkl_shape =
+        (input1_in_mkl_format) ? &mkl_context.input1_shape :
+        &mkl_context.input2_shape;
       for (int i = 0; i < mkl_context.in_dims; i++) {
         mkl_context.in_sizes[i] = tmp_mkl_shape->GetSizes()[i];
         mkl_context.in_strides[i] = tmp_mkl_shape->GetStrides()[i];
@@ -110,7 +116,6 @@ class MklAddNOp : public OpKernel {
             mkl_context.in_strides[i - 1] * mkl_context.in_sizes[i - 1];
       }
     }
-
     std::vector<float> coeff(2, 1.0);
     mkl_context.MklCreateInputLayouts(ctx);
     CHECK_EQ(dnnSumCreate_F32(&mkl_context.Eltwise, mkl_context.attributes, 2,
@@ -127,7 +132,7 @@ class MklAddNOp : public OpKernel {
      mkl_context.output_shape.SetMklLayout(mkl_context.Eltwise, dnnResourceDst);
 
      mkl_context.output_shape.SetTfLayout(
-         mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
+        mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
      if (input1_in_mkl_format == true) {
       mkl_context.output_shape.SetTfDimOrder(mkl_context.in_dims,
       mkl_context.input1_shape.GetTfToMklDimMap());
@@ -139,12 +144,12 @@ class MklAddNOp : public OpKernel {
                         mkl_context.output_shape.GetMklLayout())) /
                     sizeof(T));
 
-     AllocateOutputSetMklShape(ctx, 0, &output, tf_shape,
+     AllocateOutputSetMklShape(ctx, src1_idx, &output, tf_shape,
                               mkl_context.output_shape);
     } else {
      const TensorShape& o_shape = input1.shape();
      mkl_context.output_shape.SetMklTensor(false);
-     AllocateOutputSetMklShape(ctx, 0, &output, o_shape,
+     AllocateOutputSetMklShape(ctx, src1_idx, &output, o_shape,
                                 mkl_context.output_shape);
     }
 
@@ -172,16 +177,18 @@ class MklAddNOp : public OpKernel {
     void MklCreateInputLayouts(OpKernelContext* context) {
       bool input1_in_mkl_format = input1_shape.IsMklTensor();
       if (!input1_in_mkl_format) {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
+        CHECK_EQ(
+            dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
+            E_SUCCESS);
       } else {
         lt_input1 = static_cast<dnnLayout_t>(input1_shape.GetCurLayout());
       }
 
       bool input2_in_mkl_format = input2_shape.IsMklTensor();
       if (!input2_in_mkl_format) {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
+        CHECK_EQ(
+            dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
+            E_SUCCESS);
       } else {
         lt_input2 = static_cast<dnnLayout_t>(input2_shape.GetCurLayout());
       }
@@ -257,8 +264,8 @@ class MklAddNOp : public OpKernel {
       bool input2_in_mkl_format = input2_shape.IsMklTensor();
       dnnDelete_F32(Eltwise);
       if (!input1_in_mkl_format || !input2_in_mkl_format) {
-        delete[] in_sizes;
-        delete[] in_strides;
+         delete [] in_sizes;
+         delete [] in_strides;
       }
       if (!input1_in_mkl_format) {
          dnnLayoutDelete_F32(lt_input1);
@@ -270,6 +277,151 @@ class MklAddNOp : public OpKernel {
   } MklAddNOpContext;
 };
 
+#else  // INTEL_MKL_DNN
+template <typename Device, typename T>
+class MklAddNOp : public OpKernel {
+ public:
+  ~MklAddNOp() {}
+  explicit MklAddNOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const int num = ctx->num_inputs();
+    // Only additions of 2 input tensors is supported now
+    OP_REQUIRES(ctx, num / 2 == 2,
+                errors::InvalidArgument("Only additions of two tensors "
+                                        "supported by MKL. Num inputs: ",
+                                        num));
+
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      size_t src1_idx = 0, src2_idx = 1;
+      const Tensor& src1_tensor = MklGetInput(ctx, src1_idx);
+      const Tensor& src2_tensor = MklGetInput(ctx, src2_idx);
+
+      MklDnnShape src1_mkl_shape, src2_mkl_shape;
+      GetMklShape(ctx, src1_idx, &src1_mkl_shape);
+      GetMklShape(ctx, src2_idx, &src2_mkl_shape);
+      bool input1_in_mkl_format = src1_mkl_shape.IsMklTensor();
+      bool input2_in_mkl_format = src2_mkl_shape.IsMklTensor();
+      int src1_dims_size = input1_in_mkl_format?
+       src1_mkl_shape.GetDimension(): src1_tensor.dims();
+      int src2_dims_size = input2_in_mkl_format?
+       src2_mkl_shape.GetDimension(): src2_tensor.dims();
+
+      if (!input1_in_mkl_format && src1_dims_size == 0) {
+         Tensor* dst_tensor = nullptr;
+         MklShape mkl_shape_dst;
+         mkl_shape_dst.SetMklTensor(false);
+         AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor,
+         src1_tensor.shape(), mkl_shape_dst);
+         float user_i1 = (src1_tensor.scalar<T>()());
+         float user_i2 = (src2_tensor.scalar<T>()());
+         dst_tensor->scalar<T>()() =
+           std::plus<float>{}(user_i1, user_i2);
+         return;
+       }
+
+      // If there is nothing to compute, return.
+      if (!input1_in_mkl_format && !input2_in_mkl_format) {
+        if (src1_tensor.shape().num_elements() == 0) {
+           Tensor* dst_tensor = nullptr;
+           MklShape mkl_shape_dst;
+           mkl_shape_dst.SetMklTensor(false);
+           AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor,
+           src1_tensor.shape(), mkl_shape_dst);
+           return;
+        }
+      }
+
+      // element-wise add operator for tensor input1 and tensor input2
+      std::vector<double> coeff(2, 1.0);
+      MklDnnData<T> src1(&cpu_engine);
+      MklDnnData<T> src2(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      int tmp_size = input1_in_mkl_format ? src2_dims_size: src1_dims_size;
+      memory::dims dims(tmp_size);
+      memory::dims strides(tmp_size);
+      memory::desc md1({}, memory::data_undef, memory::format_undef);
+      memory::desc md2({}, memory::data_undef, memory::format_undef);
+
+      if ( input1_in_mkl_format || input2_in_mkl_format ) {
+        if ( input1_in_mkl_format ) {
+          md1 = src1_mkl_shape.GetMklLayout();
+          md2 = md1;
+          dst.SetUsrMem(md1);
+        } else {
+          md2 = src2_mkl_shape.GetMklLayout();
+          md1 = md2;
+          dst.SetUsrMem(md2);
+        }
+      } else {
+         dims = TFShapeToMklDnnDims(src1_tensor.shape());
+         strides = CalculateTFStrides(dims);
+         md1 = MklDnnData<T>::CreateBlockedMemDesc(dims, strides);
+         md2 = md1;
+         dst.SetUsrMem(dims, strides);
+      }
+
+      std::vector<memory::primitive_desc> srcs_pd;
+
+      src1.SetUsrMem(md1, &src1_tensor);
+      auto mpd1 = src1.GetUsrMemPrimDesc();
+      srcs_pd.push_back(mpd1);
+
+      src2.SetUsrMem(md2, &src2_tensor);
+      auto mpd2 = src2.GetUsrMemPrimDesc();
+      srcs_pd.push_back(mpd2);
+
+      std::vector<primitive::at> inputs;
+      inputs.push_back(src1.GetOpMem());
+      inputs.push_back(src2.GetOpMem());
+      auto output_pd = dst.GetUsrMemPrimDesc();
+      Tensor* dst_tensor = nullptr;
+      auto sum_pd = sum::primitive_desc(dst.GetUsrMemDesc(), coeff, srcs_pd);
+      auto sum_op = sum(sum_pd, inputs, dst.GetOpMem());
+      if ( input2_in_mkl_format || input1_in_mkl_format ) {
+         MklDnnShape output_mkl_shape;
+         output_mkl_shape.SetMklTensor(true);
+         output_mkl_shape.SetMklLayout(&output_pd);
+         output_mkl_shape.SetElemType(MklDnnType<T>());
+         if ( input1_in_mkl_format ) {
+          output_mkl_shape.SetTfLayout(src1_dims_size,
+          src1_mkl_shape.GetSizesAsMklDnnDims(),
+          src1_mkl_shape.GetTfDataFormat());
+         } else {
+          output_mkl_shape.SetTfLayout(src2_dims_size,
+          src2_mkl_shape.GetSizesAsMklDnnDims(),
+          src2_mkl_shape.GetTfDataFormat());
+         }
+         TensorShape output_tf_shape;
+         output_tf_shape.AddDim((output_pd.get_size() / sizeof(T))
+         + (output_pd.get_size()%sizeof(T) == 0 ? 0 : 1));
+         AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor, output_tf_shape,
+                                output_mkl_shape);
+      } else {
+         MklShape mkl_shape_dst;
+         mkl_shape_dst.SetMklTensor(false);
+         AllocateOutputSetMklShape(ctx, src1_idx,
+         &dst_tensor, src1_tensor.shape(), mkl_shape_dst);
+      }
+
+      dst.SetUsrMemDataHandle(dst_tensor);
+      std::vector<primitive> net;
+      net.push_back(sum_op);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(ctx, errors::Aborted("Operation received an exception:",
+                                            error_msg));
+    }
+  }
+};
+
+#endif
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklAddN")                          \
                               .Device(DEVICE_CPU)                   \
-- 
GitLab


From f6e0e3e89447980a7fe4e27e0207360a8476e7a4 Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Wed, 6 Dec 2017 12:31:13 -0800
Subject: [PATCH 0698/1225] Adding MKL-DNN Identity op (#14680)

---
 tensorflow/core/kernels/mkl_identity_op.cc | 33 ++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tensorflow/core/kernels/mkl_identity_op.cc b/tensorflow/core/kernels/mkl_identity_op.cc
index f31e7afd46..9ee27ee21c 100644
--- a/tensorflow/core/kernels/mkl_identity_op.cc
+++ b/tensorflow/core/kernels/mkl_identity_op.cc
@@ -28,8 +28,15 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
+
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklIdentityOp : public OpKernel {
  public:
@@ -50,6 +57,32 @@ class MklIdentityOp : public OpKernel {
   bool IsExpensive() override { return false; }
 };
 
+#else
+
+template <typename Device, typename T>
+class MklIdentityOp : public OpKernel {
+ public:
+  explicit MklIdentityOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    MklDnnShape dnn_shape_input;
+    const int kInputIdx = 0, kOutputIdx = 0;
+    GetMklShape(context, kInputIdx, &dnn_shape_input);
+
+    if (dnn_shape_input.IsMklTensor()) {
+      ForwardMklTensorInToOut(context, kInputIdx, kOutputIdx);
+    } else {
+      ForwardTfTensorInToOut(context, kInputIdx, kOutputIdx);
+    }
+  }
+
+  // TensorFlow's IdentityOp has the following member function, so kept it
+  // as it is.
+  bool IsExpensive() override { return false; }
+};
+
+#endif
+
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklIdentity")                      \
                               .Device(DEVICE_CPU)                   \
-- 
GitLab


From db87cffc17d8abb323729b80e94c93f667777b21 Mon Sep 17 00:00:00 2001
From: Robin Richtsfeld <robin.richtsfeld@gmail.com>
Date: Wed, 6 Dec 2017 21:33:21 +0100
Subject: [PATCH 0699/1225] Fix some Bash issues

---
 tensorflow/tools/ci_build/builds/print_build_info.sh        | 2 +-
 tensorflow/tools/ci_build/builds/test_user_ops.sh           | 6 +++---
 tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh | 2 +-
 tensorflow/tools/ci_build/remote/remote_docker_build.sh     | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/print_build_info.sh b/tensorflow/tools/ci_build/builds/print_build_info.sh
index 7c43419a76..e366abf8bb 100755
--- a/tensorflow/tools/ci_build/builds/print_build_info.sh
+++ b/tensorflow/tools/ci_build/builds/print_build_info.sh
@@ -88,7 +88,7 @@ fi
 # Print info
 echo "TF_BUILD_INFO = {"\
 "container_type: \"${CONTAINER_TYPE}\", "\
-"command: \"${COMMAND[@]}\", "\
+"command: \"${COMMAND[*]}\", "\
 "source_HEAD: \"${TF_HEAD}\", "\
 "source_remote_origin: \"${TF_FETCH_URL}\", "\
 "OS: \"${OS}\", "\
diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh
index 358f82ac5d..caa3a40817 100755
--- a/tensorflow/tools/ci_build/builds/test_user_ops.sh
+++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh
@@ -82,11 +82,11 @@ TF_CFLAGS=( $("${PYTHON_BIN_PATH}" \
 TF_LFLAGS=( $("${PYTHON_BIN_PATH}" \
 	      -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
 
-if [[ -z "${TF_CFLAGS}" || -z "${TF_LFLAGS}" ]]; then
+if [[ -z "${TF_CFLAGS[*]}" || -z "${TF_LFLAGS[*]}" ]]; then
   die "FAILED to determine TensorFlow compilation or linking flags"
 else
-  echo "TensorFlow compile flags: ${TF_CFLAGS[@]}"
-  echo "TensorFlow link flags: ${TF_LFLAGS[@]}"
+  echo "TensorFlow compile flags: ${TF_CFLAGS[*]}"
+  echo "TensorFlow link flags: ${TF_LFLAGS[*]}"
 fi
 
 # Check g++ availability
diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index 6e7b752c06..cfeaebdbf5 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -45,7 +45,7 @@ for i in `seq 0 $((TF_GPU_COUNT-1))`; do
       # This export only works within the brackets, so it is isolated to one
       # single command.
       export CUDA_VISIBLE_DEVICES=$i
-      echo "Running test $@ on GPU $CUDA_VISIBLE_DEVICES"
+      echo "Running test $* on GPU $CUDA_VISIBLE_DEVICES"
       $@
     )
     return_code=$?
diff --git a/tensorflow/tools/ci_build/remote/remote_docker_build.sh b/tensorflow/tools/ci_build/remote/remote_docker_build.sh
index 3ac6840f4e..e00a66aaba 100755
--- a/tensorflow/tools/ci_build/remote/remote_docker_build.sh
+++ b/tensorflow/tools/ci_build/remote/remote_docker_build.sh
@@ -124,7 +124,7 @@ function build_tf_image {
 
 
 function publish_tf_image {
-  $gcr_tf_image="gcr.io/tensorflow/${tf_image}"
+  gcr_tf_image="gcr.io/tensorflow/${tf_image}"
   docker tag $tf_image $gcr_tf_image
   gcloud docker -- push $gcr_tf_image
 }
-- 
GitLab


From 88a32faf72b648f3f6f84a649882cc1336567782 Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Wed, 6 Dec 2017 12:37:04 -0800
Subject: [PATCH 0700/1225] MKL: Adding Batch Normalization implementation
 using the open source MKL-DNN Lib (#14459)

* adding mkl-dnn batchnorm

* Fixes per PR review

* fix per code review suggestions
---
 .../core/kernels/mkl_fused_batch_norm_op.cc   | 652 +++++++++++++++++-
 1 file changed, 640 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
index bc9e906c39..a761562a4b 100644
--- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
@@ -25,10 +25,24 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+using mkldnn::use_scale_shift;
+using mkldnn::use_global_stats;
+using mkldnn::batch_normalization_forward;
+using mkldnn::batch_normalization_backward;
+#endif
+
 // TODO(inteltf) Address comments from PR 8968.
 
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
+
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklFusedBatchNormOp : public OpKernel {
  public:
@@ -46,7 +60,6 @@ class MklFusedBatchNormOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     MklFusedBatchNormOpContext mkl_context;
-
     const Tensor& input = MklGetInput(context, 0);
     const Tensor& scale = MklGetInput(context, 1);
     const Tensor& shift = MklGetInput(context, 2);
@@ -55,6 +68,7 @@ class MklFusedBatchNormOp : public OpKernel {
 
     GetMklShape(context, 0, &(mkl_context.mkl_shape_input_shape));
     bool input_in_mkl_format = mkl_context.mkl_shape_input_shape.IsMklTensor();
+
     if (!input_in_mkl_format) {
       OP_REQUIRES(context, input.dims() == 4,
                   errors::InvalidArgument("input must be 4-dimensional",
@@ -69,10 +83,12 @@ class MklFusedBatchNormOp : public OpKernel {
     OP_REQUIRES(context, est_mean.dims() == 1,
                 errors::InvalidArgument("estimated_mean must be 1-dimensional",
                                         est_mean.shape().DebugString()));
+
     OP_REQUIRES(
         context, est_variance.dims() == 1,
         errors::InvalidArgument("estimated_variance must be 1-dimensional",
                                 est_variance.shape().DebugString()));
+
     if (is_training_) {
       OP_REQUIRES(context, est_mean.dim_size(0) == 0,
                   errors::InvalidArgument("estimated_mean empty for training",
@@ -258,7 +274,6 @@ class MklFusedBatchNormOp : public OpKernel {
             E_SUCCESS);
       }
     }
-
     void MklPrepareContextInputs(OpKernelContext* context,
                                  Tensor* mkl_tmp_input_buf_tensor,
                                  Tensor* mkl_tmp_scale_shift_buf_tensor) {
@@ -325,15 +340,6 @@ class MklFusedBatchNormOp : public OpKernel {
   } MklFusedBatchNormOpContext;
 };
 
-#define REGISTER_MKL_CPU(T)                                         \
-  REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm")                \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklFusedBatchNormOp<CPUDevice, T>);
-TF_CALL_float(REGISTER_MKL_CPU);
-#undef REGISTER_MKL_CPU
-
 template <typename Device, typename T>
 class MklFusedBatchNormGradOp : public OpKernel {
  public:
@@ -595,7 +601,7 @@ class MklFusedBatchNormGradOp : public OpKernel {
       mkl_res_batchnorm_bwd[dnnResourceSrc] =
           (mkl_convert_input) ? mkl_buf_converted_input : mkl_buf_input;
 
-      bool mkl_convert_out_backprop;
+     bool mkl_convert_out_backprop;
       dnnPrimitive_t mkl_prim_convert_out_backprop = nullptr;
       dnnLayout_t mkl_lt_internal_out_backprop = nullptr;
       void* mkl_buf_converted_out_backprop = nullptr;
@@ -675,6 +681,628 @@ class MklFusedBatchNormGradOp : public OpKernel {
     }
   } MklFusedBatchNormGradOpContext;
 };
+#endif
+
+#ifdef INTEL_MKL_DNN
+
+template <typename Device, typename T>
+class MklFusedBatchNormOp : public OpKernel {
+ public:
+  explicit MklFusedBatchNormOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    float epsilon;
+    OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon));
+    epsilon_ = T(epsilon);
+    string tensor_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format));
+    OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("is_training", &is_training_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const size_t src_index = 0;    // index of src input tensor
+      const size_t scale_index = 1;  // index of scale tensor
+      const size_t shift_index = 2;  // index of shift tensor
+      const size_t mean_index = 3;   // index of est_mean tensor
+      const size_t var_index = 4;    // index of est_variance tensor
+
+      const Tensor& src_tensor          = MklGetInput(context, src_index);
+      const Tensor& scale_tensor        = MklGetInput(context, scale_index);
+      const Tensor& shift_tensor        = MklGetInput(context, shift_index);
+      const Tensor& est_mean_tensor     = MklGetInput(context, mean_index);
+      const Tensor& est_variance_tensor = MklGetInput(context, var_index);
+
+      MklDnnShape dnn_shape_src;
+      GetMklShape(context, src_index, &dnn_shape_src);
+
+      if (dnn_shape_src.IsMklTensor()) {
+        OP_REQUIRES(context, dnn_shape_src.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      } else {
+        OP_REQUIRES(context, src_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      }
+      OP_REQUIRES(context, scale_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "scale must be 1-dimensional",
+                      scale_tensor.shape().DebugString()));
+      OP_REQUIRES(context, shift_tensor.dims() == 1,
+                  errors::InvalidArgument("offset must be 1-dimensional",
+                                        shift_tensor.shape().DebugString()));
+      OP_REQUIRES(context, est_mean_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "estimated_mean must be 1-dimensional",
+                      est_mean_tensor.shape().DebugString()));
+      OP_REQUIRES(context, est_variance_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "estimated_variance must be 1-dimensional",
+                      est_variance_tensor.shape().DebugString()));
+
+      if (is_training_) {
+        OP_REQUIRES(context, est_mean_tensor.dim_size(0) == 0,
+                    errors::InvalidArgument(
+                        "estimated_mean must be empty for training",
+                        est_mean_tensor.shape().DebugString()));
+        OP_REQUIRES(context, est_variance_tensor.dim_size(0) == 0,
+                    errors::InvalidArgument(
+                        "estimated_variance must be empty for training",
+                        est_variance_tensor.shape().DebugString()));
+      }
+
+      if (dnn_shape_src.IsMklTensor())
+        depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C);
+      else
+        ExtractParams(context);
+
+      // Indices of output tensors
+      const size_t dst_index = 0;
+      const size_t batch_mean_index = 1;
+      const size_t batch_variance_index = 2;
+      const size_t saved_mean_index = 3;
+      const size_t saved_variance_index = 4;
+
+      // allocate batch mean output tensor
+      Tensor* batch_mean_tensor = nullptr;
+      MklDnnShape mkl_shape_batch_mean;
+      mkl_shape_batch_mean.SetMklTensor(false);
+      AllocateOutputSetMklShape(context,
+                                batch_mean_index,
+                                &batch_mean_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_batch_mean);
+      CHECK_NOTNULL(batch_mean_tensor);
+
+      // Batch variance
+      Tensor* batch_variance_tensor = nullptr;
+      MklDnnShape mkl_shape_batch_variance;
+      mkl_shape_batch_variance.SetMklTensor(false);
+      AllocateOutputSetMklShape(context,
+                                batch_variance_index,
+                                &batch_variance_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_batch_variance);
+      CHECK_NOTNULL(batch_variance_tensor);
+
+      if (is_training_)
+        SetMeanVariance(*batch_mean_tensor, *batch_variance_tensor);
+      else
+        SetMeanVariance(est_mean_tensor, est_variance_tensor);
+
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      memory::format format_m;
+      if (dnn_shape_src.IsMklTensor()) {
+        if (dnn_shape_src.IsTensorInNCHWFormat()) {
+          format_m = memory::format::nchw;
+        } else {
+          format_m = memory::format::nhwc;
+        }
+      } else {
+        format_m = TFDataFormatToMklDnnDataFormat(tensor_format_);
+      }
+
+      // set src primitive
+      memory::dims src_dims;
+      if (dnn_shape_src.IsMklTensor()) {
+        src_dims = TFShapeToMklDnnDimsInNCHW(dnn_shape_src.GetTfShape(),
+                                             tensor_format_);
+      } else {
+        src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(),
+                                             tensor_format_);
+      }
+
+      auto src_md = dnn_shape_src.IsMklTensor()
+                    ? dnn_shape_src.GetMklLayout()
+                    : memory::desc(src_dims, MklDnnType<T>(), format_m);
+      src.SetUsrMem(src_md, &src_tensor);
+
+      // set weights primitive
+      // MKL-DNN packs scale & shift as "weights":
+      // <scale>...<scale><shift>...<shift>
+      auto weights_desc = memory::desc({2, depth_},
+                                       MklDnnType<T>(),
+                                       memory::format::nc);
+      auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine);
+      auto weights_m = memory(weights_pd);
+      T* weights_data = reinterpret_cast<T*>(
+                        weights_m.get_data_handle());
+      T* scale_tf = reinterpret_cast<T*>(
+                    const_cast<T*>(scale_tensor.flat<T>().data()));
+      T* shift_tf = reinterpret_cast<T*>(
+                    const_cast<T*>(shift_tensor.flat<T>().data()));
+
+      for (int k=0; k < depth_; k++) {
+        weights_data[k] = scale_tf[k];
+        weights_data[k + depth_] = shift_tf[k];
+      }
+
+      // Mean and variance (without Bessel's correction) saved for backward
+      // computation to serve as pre-computed mean and variance.
+      Tensor* saved_mean_tensor = nullptr;
+      MklDnnShape mkl_shape_saved_mean;
+      mkl_shape_saved_mean.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, saved_mean_index,
+                                &saved_mean_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_saved_mean);
+      CHECK_NOTNULL(saved_mean_tensor);
+
+      Tensor* saved_variance_tensor = nullptr;
+      MklDnnShape mkl_shape_saved_variance;
+      mkl_shape_saved_variance.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, saved_variance_index,
+                                &saved_variance_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_saved_variance);
+      CHECK_NOTNULL(saved_variance_tensor);
+
+      // set mean primitive
+      auto mean_desc = memory::desc({1, depth_},
+                                    MklDnnType<T>(),
+                                    memory::format::nc);
+      auto mean_pd = memory::primitive_desc(mean_desc, cpu_engine);
+      char* saved_mean_data_tf = reinterpret_cast<char*>
+                                 (saved_mean_tensor->flat<T>().data());
+      std::memcpy(saved_mean_data_tf,
+                  reinterpret_cast<char*>(mean_values_),
+                  depth_*sizeof(T));
+      auto mean_m = memory(mean_pd,
+                           reinterpret_cast<void*>(saved_mean_data_tf));
+
+      // set variance primitive
+      auto variance_desc = memory::desc({1, depth_},
+                                    MklDnnType<T>(),
+                                    memory::format::nc);
+      auto variance_pd = memory::primitive_desc(variance_desc, cpu_engine);
+      char* saved_variance_data_tf = reinterpret_cast<char*>
+                  (saved_variance_tensor->flat<T>().data());
+      std::memcpy(saved_variance_data_tf,
+                  reinterpret_cast<char*>(variance_values_),
+                  depth_*sizeof(T));
+      auto variance_m = memory(variance_pd, saved_variance_data_tf);
+
+      prop_kind pk = (is_training_) ?
+                     prop_kind::forward_training :
+                     prop_kind::forward_scoring;
+      auto bnrm_fwd_desc = batch_normalization_forward::desc(
+                               pk, src.GetUsrMemDesc(), epsilon_,
+                               is_training_ ? use_scale_shift :
+                               (use_scale_shift | use_global_stats));
+      auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc(
+                             bnrm_fwd_desc, cpu_engine);
+
+      // allocate dst tensor
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      Tensor* dst_tensor = nullptr;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = bnrm_fwd_pd.dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(),
+                                  src_dims, format_m);
+        tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                                tf_shape_dst, dnn_shape_dst);
+
+      // Output of batchnorm has same shape as input.
+      dst.SetUsrMem(src_md, dst_tensor);
+
+      primitive bnrm_fwd_op;
+      if (is_training_) {
+        bnrm_fwd_op = batch_normalization_forward(
+                          bnrm_fwd_pd,
+                          src.GetOpMem(),
+                          weights_m,
+                          dst.GetOpMem(),
+                          mean_m,
+                          variance_m);
+      } else {
+        bnrm_fwd_op = batch_normalization_forward(
+                          bnrm_fwd_pd,
+                          src.GetOpMem(),
+                          mean_m,
+                          variance_m,
+                          (const primitive::at) weights_m,
+                          dst.GetOpMem());
+      }
+      std::vector<primitive> net;
+      net.push_back(bnrm_fwd_op);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // copy batch_mean data
+      T* batch_mean_data_tf = reinterpret_cast<T*>(
+                                batch_mean_tensor->flat<T>().data());
+      std::memcpy(reinterpret_cast<char*>(batch_mean_data_tf),
+                  reinterpret_cast<char*>(mean_m.get_data_handle()),
+                  depth_*sizeof(T));
+
+      // copy batch_variance data with Bessel's correction
+      // if training mode is on
+      float adjust_factor = 1.0;
+      if (is_training_) {
+        size_t orig_size = src_dims[0] * src_dims[2] * src_dims[3];
+        size_t adjust_size = orig_size - 1;
+        adjust_factor = (static_cast<float>(orig_size)) / adjust_size;
+      }
+      T* batch_variance_data_tf = reinterpret_cast<T*>(
+                                  batch_variance_tensor->flat<T>().data());
+      for (int k=0; k < depth_; k++)
+        batch_variance_data_tf[k] =
+            (reinterpret_cast<T*>(variance_m.get_data_handle()))[k]
+            * adjust_factor;
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + string(e.message) +
+                         ", in file " + string(__FILE__) + ":" +
+                         std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                     error_msg));
+    }
+  }
+
+ private:
+  T epsilon_;
+  TensorFormat tensor_format_;
+  bool is_training_;
+  T* mean_values_;
+  T* variance_values_;
+  size_t depth_;          // batch normalization is done for per channel.
+
+  void ExtractParams(OpKernelContext* context) {
+    const Tensor& input = MklGetInput(context, 0);
+    depth_ = static_cast<int>(GetTensorDim(input, tensor_format_, 'C'));
+  }
+
+  void SetMeanVariance(const Tensor& mean, const Tensor& variance) {
+    mean_values_ = reinterpret_cast<T*>(
+                       const_cast<T*>(mean.flat<T>().data()));
+    variance_values_ = reinterpret_cast<T*>(
+                       const_cast<T*>(variance.flat<T>().data()));
+  }
+};
+
+
+template <typename Device, typename T>
+class MklFusedBatchNormGradOp : public OpKernel {
+ public:
+  explicit MklFusedBatchNormGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    float epsilon;
+    OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon));
+    epsilon_ = T(epsilon);
+    string tensor_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format));
+    OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_),
+                errors::InvalidArgument("Invalid data format"));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+
+      const size_t diff_dst_index = 0;  // index of diff_dst tensor
+      const size_t src_index = 1;       // index of src input tensor
+      const size_t scale_index = 2;     // index of scale tensor
+      const size_t mean_index = 3;      // index of saved_mean tensor
+      const size_t variance_index = 4;  // index of saved_variance tensor
+      const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+      const Tensor& src_tensor = MklGetInput(context, src_index);
+      const Tensor& scale_tensor = MklGetInput(context, scale_index);
+      const Tensor& saved_mean_tensor = MklGetInput(context, mean_index);
+      const Tensor& saved_variance_tensor = MklGetInput(context,
+                                            variance_index);
+
+      MklDnnShape dnn_shape_src, dnn_shape_diff_dst;
+      GetMklShape(context, src_index, &dnn_shape_src);
+      GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+      if (dnn_shape_diff_dst.IsMklTensor()) {
+        OP_REQUIRES(context, dnn_shape_diff_dst.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        diff_dst_tensor.shape().DebugString()));
+      } else {
+        OP_REQUIRES(context, diff_dst_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        diff_dst_tensor.shape().DebugString()));
+      }
+
+      if (dnn_shape_src.IsMklTensor()) {
+        OP_REQUIRES(context, dnn_shape_src.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                         src_tensor.shape().DebugString()));
+      } else {
+        OP_REQUIRES(context, src_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      }
+
+      OP_REQUIRES(context, scale_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "scale must be 1-dimensional",
+                      scale_tensor.shape().DebugString()));
+      OP_REQUIRES(context, saved_mean_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "saved mean must be 1-dimensional",
+                       saved_mean_tensor.shape().DebugString()));
+
+      OP_REQUIRES(context, saved_variance_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "saved variance must be 1-dimensional",
+                      saved_variance_tensor.shape().DebugString()));
+
+      if (dnn_shape_src.IsMklTensor())
+        depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C);
+      else
+        ExtractParams(context);
+
+      memory::format format_m;
+      if (dnn_shape_src.IsMklTensor()) {
+        if (dnn_shape_src.IsTensorInNCHWFormat())
+          format_m = memory::format::nchw;
+        else
+          format_m = memory::format::nhwc;
+      } else {
+        format_m = TFDataFormatToMklDnnDataFormat(tensor_format_);
+      }
+
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> mean(&cpu_engine);
+      MklDnnData<T> variance(&cpu_engine);
+      MklDnnData<T> diff_dst(&cpu_engine);
+      MklDnnData<T> diff_src(&cpu_engine);
+
+      memory::dims src_dims, diff_dst_dims;
+      if (dnn_shape_src.IsMklTensor())
+        src_dims = TFShapeToMklDnnDimsInNCHW(
+                       dnn_shape_src.GetTfShape(), tensor_format_);
+      else
+        src_dims = TFShapeToMklDnnDimsInNCHW(
+                       src_tensor.shape(), tensor_format_);
+
+      if (dnn_shape_diff_dst.IsMklTensor())
+        diff_dst_dims = TFShapeToMklDnnDimsInNCHW(
+                            dnn_shape_diff_dst.GetTfShape(),
+                            tensor_format_);
+      else
+        diff_dst_dims = TFShapeToMklDnnDimsInNCHW(
+                            diff_dst_tensor.shape(),
+                            tensor_format_);
+
+      // set src and diff_dst primitives
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
+        if (dnn_shape_src.IsMklTensor()) {
+          src_md = dnn_shape_src.GetMklLayout();
+          diff_dst_md = src_md;
+        } else {
+          diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
+          src_md = diff_dst_md;
+        }
+      } else {
+        src_md =  memory::desc(src_dims, MklDnnType<T>(), format_m);
+        diff_dst_md = src_md;
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+      diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
+
+      // weights -- DNN packs scales/shifts as weights in order of
+      // scale, ..., scale, shift, ..., shift
+      auto weights_desc = memory::desc({2, depth_},
+                                       MklDnnType<T>(),
+                                       memory::format::nc);
+      auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine);
+      auto weights_m = memory(weights_pd);
+      T* weights_data = reinterpret_cast<T*>(weights_m.get_data_handle());
+      T* scale_tf = reinterpret_cast<T*>(const_cast<T*>
+                                        (scale_tensor.flat<T>().data()));
+      for (int k=0; k < depth_; k++) {
+        weights_data[k] = scale_tf[k];
+        weights_data[k + depth_] = 0;
+      }
+
+      // set mean primitive
+      memory::dims mv_dims = GetMeanVarianceDims();
+      mean.SetUsrMem(mv_dims,
+                     memory::format::nc,
+                     const_cast<void*>(static_cast<const void*>
+                     (saved_mean_tensor.flat<T>().data())));
+      mean.SetOpMemDesc(mv_dims, memory::format::nc);
+
+      // set variance primitive
+      variance.SetUsrMem(mv_dims,  memory::format::nc,
+                         const_cast<void*>(static_cast<const void*>
+                         (saved_variance_tensor.flat<T>().data())));
+      variance.SetOpMemDesc(mv_dims, memory::format::nc);
+
+      // set diff_weight primitive
+      auto diff_weights_desc = memory::desc(
+                                 {2, depth_},
+                                 MklDnnType<T>(),
+                                 memory::format::nc);
+      auto diff_weights_pd = memory::primitive_desc(
+                                diff_weights_desc,
+                                cpu_engine);
+      auto diff_weights_m = memory(diff_weights_pd);
+
+      auto bnrm_fwd_desc = batch_normalization_forward::desc(
+                                prop_kind::forward_training,
+                                src.GetUsrMemDesc(),
+                                epsilon_,
+                                use_scale_shift);
+      auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc(
+                                bnrm_fwd_desc,
+                                cpu_engine);
+
+      // Indices of output tensors
+      const size_t diff_src_index = 0;    // index of diff_src tensor
+      const size_t diff_scale_index = 1;  // index of diff_scale tensor
+      const size_t diff_shift_index = 2;  // index of diff_shift tensor
+      const size_t p1_index = 3;  // index of 1st placeholder tensor
+      const size_t p2_index = 4;  // index of 2nd placeholder tensor
+
+      // allocate diff_src tensor
+      MklDnnShape dnn_shape_diff_src;
+      TensorShape tf_shape_diff_src;
+      Tensor* diff_src_tensor = nullptr;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_diff_src.SetMklTensor(true);
+        auto diff_src_pd = bnrm_fwd_pd.dst_primitive_desc();
+        dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
+        dnn_shape_diff_src.SetElemType(MklDnnType<T>());
+        dnn_shape_diff_src.SetTfLayout(
+                              dnn_shape_src.GetDimension(),
+                              src_dims,
+                              format_m);
+        dnn_shape_diff_src.SetTfDimOrder(
+                              dnn_shape_src.GetDimension(),
+                              tensor_format_);
+        tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_diff_src.SetMklTensor(false);
+        tf_shape_diff_src = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                                tf_shape_diff_src, dnn_shape_diff_src);
+
+      diff_src.SetUsrMem(src_md, diff_src_tensor);
+
+      prop_kind pk = prop_kind::backward;
+      auto bnrm_bwd_desc = batch_normalization_backward::desc(
+                               pk,
+                               diff_src.GetUsrMemDesc(),
+                               src.GetUsrMemDesc(),
+                               epsilon_,
+                               use_scale_shift);
+      auto bnrm_bwd_pd = batch_normalization_backward::primitive_desc(
+                               bnrm_bwd_desc,
+                               cpu_engine,
+                               bnrm_fwd_pd);
+
+      auto bnrm_bwd_op = batch_normalization_backward(
+                               bnrm_bwd_pd,
+                               src.GetOpMem(),
+                               mean.GetOpMem(),
+                               variance.GetOpMem(),
+                               diff_dst.GetOpMem(),
+                               weights_m,
+                               diff_src.GetOpMem(),
+                               diff_weights_m);
+
+      std::vector<primitive> net;
+      net.push_back(bnrm_bwd_op);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // separate out scale and shift grad and copy to individual tensors
+      const TensorShape& tf_shape_scale_shift = scale_tensor.shape();
+      Tensor* diff_scale_tensor = nullptr;
+      MklDnnShape mkl_shape_diff_scale;
+      mkl_shape_diff_scale.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, diff_scale_index, &diff_scale_tensor,
+                                tf_shape_scale_shift, mkl_shape_diff_scale);
+
+      Tensor* diff_shift_tensor = nullptr;
+      MklDnnShape mkl_shape_diff_shift;
+      mkl_shape_diff_shift.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, diff_shift_index, &diff_shift_tensor,
+                                tf_shape_scale_shift, mkl_shape_diff_shift);
+
+      // copy data: diff_scale and diff_shift
+      T* diff_weights_data_dnn = reinterpret_cast<T*>
+                                 (diff_weights_m.get_data_handle());
+      float* diff_scale_data_tf = const_cast<float*>(
+             static_cast<const float*>(diff_scale_tensor->flat<T>().data()));
+      float* diff_shift_data_tf = const_cast<float*>(
+             static_cast<const float*>(diff_shift_tensor->flat<T>().data()));
+      for (int i = 0; i < depth_; i++) {
+        diff_scale_data_tf[i] = diff_weights_data_dnn[i];
+        diff_shift_data_tf[i] = diff_weights_data_dnn[i + depth_];
+      }
+
+      // Placeholders for estimated_mean and estimated_variance, which are
+      // used for inference and thus not needed here for gradient computation.
+      Tensor* p1_tensor = nullptr, *p2_tensor = nullptr;
+      MklDnnShape mkl_shape_p;
+      mkl_shape_p.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, p1_index, &p1_tensor,
+                                TensorShape({}), mkl_shape_p);
+      AllocateOutputSetMklShape(context, p2_index, &p2_tensor,
+                                TensorShape({}), mkl_shape_p);
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                          ", message: " + string(e.message) +
+                          ", in file " + string(__FILE__) + ":" +
+                          std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                     error_msg));
+    }
+  }
+
+ private:
+  T epsilon_;
+  TensorFormat tensor_format_;
+  int depth_;             // batch normalization is done for per channel.
+
+  void ExtractParams(OpKernelContext* context) {
+      const Tensor& input = MklGetInput(context, 0);
+      depth_ = static_cast<int>(GetTensorDim(input, tensor_format_, 'C'));
+  }
+
+  memory::dims GetMeanVarianceDims() {
+    return memory::dims({1, depth_});
+  }
+};
+
+#endif
+
+#define REGISTER_MKL_CPU(T)                                         \
+  REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm")                \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklFusedBatchNormOp<CPUDevice, T>);
+TF_CALL_float(REGISTER_MKL_CPU);
+#undef REGISTER_MKL_CPU
 
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNormGrad")            \
-- 
GitLab


From 16fe0561d95043c92669a30a62285f2d122db455 Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Wed, 6 Dec 2017 13:07:01 -0800
Subject: [PATCH 0701/1225] MKL: Adding Convolution implementation using the
 open source MKL-DNN Lib (#14470)

* Adding mkl-dnn convolution

* Fixed style issue

* removing file

* Addressing review comments in Convolution code
---
 .../core/kernels/mkl_conv_grad_filter_ops.cc  | 317 ++++++++++--------
 .../core/kernels/mkl_conv_grad_input_ops.cc   | 244 ++++++--------
 tensorflow/core/kernels/mkl_conv_ops.cc       | 149 ++++----
 tensorflow/core/kernels/mkl_conv_ops.h        | 269 ++++++++++++++-
 4 files changed, 623 insertions(+), 356 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index f291281108..793fa24d99 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -47,11 +47,8 @@ limitations under the License.
 
 using mkldnn::stream;
 using mkldnn::prop_kind;
-
-using mkldnn::convolution_forward;
 using mkldnn::convolution_backward_weights;
-using mkldnn::convolution_direct;
-
+using mkldnn::memory;
 #endif
 
 namespace tensorflow {
@@ -426,183 +423,229 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
   TensorFormat data_format_;
 };
 
+#define REGISTER_MKL_FILTER_KERNELS(T)                              \
+  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
+TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
+#undef REGISTER_MKL_FILTER_KERNELS
+
 #else
 
-template <typename Device, class T>
-class MklConv2DCustomBackpropFilterOp : public OpKernel {
+template <typename Device, class T, bool biasEnabled>
+class MklConv2DCustomBackpropFilterOp :
+  public MklConv2DBackpropCommonOp<Device, T> {
  public:
   explicit MklConv2DCustomBackpropFilterOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string data_format;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
-    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
+      : MklConv2DBackpropCommonOp<Device, T>(context) { }
+  ~MklConv2DCustomBackpropFilterOp() {}
 
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    int stride_n = GetTensorDim(strides_, data_format_, 'N');
-    int stride_c = GetTensorDim(strides_, data_format_, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ private:
+  void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                         const MklDnnShape& filter_mkl_shape,
+                         const MklDnnShape& obp_mkl_shape) {
+    CHECK(!filter_mkl_shape.IsMklTensor())
+      << "Conv2DBackpropFilter: filter should not be in MKL Layout";
   }
 
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
+  size_t GetInputTensorIndexWithSizes() { return 1; /* filter index */ }
 
-      MklDnnData<T> input(&cpu_engine);
-      MklDnnData<T> outbackprop(&cpu_engine);
-      MklDnnData<T> output(&cpu_engine);
+  TensorShape MakeInputTfShape(OpKernelContext* context,
+                               const Tensor& input_tensor) {
+    size_t input_idx = 0;
+    return GetTfShape(context, input_idx);
+  }
 
-      // Input tensors
-      const Tensor& input_tensor = MklGetInput(context, 0);
-      const Tensor& filter_tensor = MklGetInput(context, 1);
-      const Tensor& obp_tensor = MklGetInput(context, 2);  // Outbackprop
+  TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                const Tensor& filter_tensor) {
+    TensorShape filter_tf_shape;
+    CHECK_EQ(TensorShapeUtils::IsVector(filter_tensor.shape()), true);
+    CHECK_EQ(TensorShapeUtils::MakeShape(
+             filter_tensor.vec<int32>(), &filter_tf_shape).ok(), true);
+    return filter_tf_shape;
+  }
 
-      // Generate input shapes.
-      TensorShape filter_shape;
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()),
-        errors::InvalidArgument(
-              "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ",
-              filter_tensor.dims()));
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                        filter_tensor.vec<int32>(), &filter_shape));
-      TensorShape input_shape = input_tensor.shape();
-      TensorShape obp_shape = obp_tensor.shape();
-
-      // By default, all dims are in MKL order. Only dims in TF order
-      // are those with prefix tf_order.
-      memory::dims obp_dims, fwd_input_dims, fwd_filter_dims;
-      memory::dims padding_l, padding_r, strides, fwd_output_dims;
-      memory::dims fwd_output_dims_tf_order;
-
-      // Get forward convolution parameters.
-      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
-      if (!context->status().ok()) return;
-
-      // Create Convolution forward descriptor since Convolution backward
-      // API needs it. For that, we first need to create input, filter
-      // and output memory descriptors.
-      auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                        memory::format::hwio);
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
-      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
-
-      // Allocate output tensor and shape
-      // TODO(nhasabni): Update this when support for MKL layout is added.
-      // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D.
-      TensorShape tf_output_shape(filter_shape);
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      Tensor* output_tensor = nullptr;
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
-
-      // Create memory for user data.
-      // Describe how the inputs and outputs of Convolution look like. Also
-      // specify buffers containing actual input and output data.
-      // Although input shape required is in MKL-DNN order, the layout is
-      // Tensorflow's layout (NHWC or NCHW depending on data format).
-      input.SetUsrMem(fwd_input_dims, mkl_data_format, &input_tensor);
-      // Outbackprop shape is NHWC or NCHW depending on data format. Since
-      // GetInputSizeInMklOrder function returns size in that order we just use
-      // use that function directly.
-      conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims);
-      if (!context->status().ok()) return;
-      outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor);
-      // Although output shape required is in MKL-DNN order,
-      // layout is Tensorflow's filter layout (HWIO)
-      // Shape of output of Conv2DBackpropInput is same as shape of filter.
-      memory::dims bwd_output_dims = fwd_filter_dims;
-      output.SetUsrMem(bwd_output_dims, memory::format::hwio, output_tensor);
-
-      // Create memory descriptors for convolution data w/ no specified format.
-      input.SetOpMemDesc(fwd_input_dims, memory::format::any);
-      outbackprop.SetOpMemDesc(obp_dims, memory::format::any);
-      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
-
-      // Create convolution backward weights primitive.
-      auto bwd_desc = convolution_backward_weights::desc(convolution_direct,
-                          input.GetOpMemDesc(), output.GetOpMemDesc(),
-                          outbackprop.GetOpMemDesc(), strides, padding_l,
-                          padding_r, TFPaddingToMklDnnPadding(padding_));
-
-      auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
-                                                              cpu_engine,
-                                                              fwd_pd);
-
-      PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) {
+    // Shape of output of Conv2DBackpropFilter is same as shape of filter.
+    return fwd_filter_dims;
+  }
+
+  memory::format GetOutputFormat(const memory::format data_format) {
+    // Output layout is Tensorflow's filter layout (HWIO).
+    return memory::format::hwio;
+  }
+
+  void CreatePrimitive(OpKernelContext* context,
+                       const engine& cpu_engine,
+                       const convolution_forward::primitive_desc& conv_fwd_pd,
+                       MklDnnData<T>* input, MklDnnData<T>* filter,
+                       MklDnnData<T>* outbackprop, MklDnnData<T>* output,
+                       Tensor** output_tensor,
+                       const memory::dims& strides,
+                       const memory::dims& padding_l,
+                       const memory::dims& padding_r,
+                       padding_kind padding,
+                       const memory::dims& bwd_output_dims,
+                       memory::format bwd_output_format) {
+    CHECK_NOTNULL(context);
+    CHECK_NOTNULL(input);
+    CHECK_NOTNULL(filter);
+    CHECK_NOTNULL(outbackprop);
+    CHECK_NOTNULL(output);
+    CHECK_NOTNULL(output_tensor);
+
+    MklDnnData<T>* bias_grad = nullptr;
+    int depth = 0;
+    if (biasEnabled) {
+      // Data structure for bias_grad
+      bias_grad = new MklDnnData<T> (&cpu_engine);
+      TensorShape obp_tf_shape = GetTfShape(context, 2);
+      depth = (MklConv2DBackpropCommonOp<Device, T>::GetTFDataFormat()
+                == FORMAT_NCHW) ?
+          obp_tf_shape.dim_size(1) : obp_tf_shape.dim_size(3);
+      memory::dims bias_grad_dims = {depth};
+      bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x);
+    }
+
+    // Create convolution backward weights primitive.
+    auto bwd_desc = (biasEnabled && (bias_grad != nullptr))?
+        convolution_backward_weights::desc(convolution_direct,
+                                input->GetOpMemDesc(), output->GetOpMemDesc(),
+                                bias_grad->GetOpMemDesc(),
+                                outbackprop->GetOpMemDesc(), strides, padding_l,
+                                padding_r, padding) :
+        convolution_backward_weights::desc(convolution_direct,
+                          input->GetOpMemDesc(), output->GetOpMemDesc(),
+                          outbackprop->GetOpMemDesc(), strides, padding_l,
+                          padding_r, padding);
+
+    auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
+                                                            cpu_engine,
+                                                            conv_fwd_pd);
+
+    // Allocate output tensor.
+    AllocateOutputTensor(context, bwd_pd, bwd_output_dims,
+                         bwd_output_format, output_tensor);
+
+    CHECK_NOTNULL(*output_tensor);
+    // Set buffer handle using allocated output tensor.
+    output->SetUsrMemDataHandle(*output_tensor);
+
+    if (biasEnabled && (bias_grad != nullptr)) {
+      // Allocate bias_grad tensor
+      TensorShape bias_grad_shape({depth});
+      Tensor* bias_grad_tensor = nullptr;
+      AllocateBiasGradTensor(context, bias_grad_shape, &bias_grad_tensor);
+      memory::dims bias_grad_dims = {depth};
+      // Since Bias is 1D, we use format::x from MKLDNN to represent it.
+      auto bias_grad_md = memory::desc({bias_grad_dims}, MklDnnType<T>(),
+                                       memory::format::x);
+      bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor);
+      bias_grad->SetUsrMemDataHandle(bias_grad_tensor);
+    }
+
+    if (biasEnabled && (bias_grad != nullptr)) {
+      PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, bias_grad);
+    } else {
+      PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output);
     }
   }
 
- private:
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_;
+  // Allocate output tensor.
+  void AllocateOutputTensor(OpKernelContext* context,
+                  const convolution_backward_weights::primitive_desc& conv_pd,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+
+      // For BackpropFilter, we convert the output tensor back in Tensorflow
+      // layout. Because typically, BackpropFilter is the last operator in the
+      // graph that emit filter gradient that is provided to ApplyGradient
+      // method to update the filter. But it may be possible to eliminate this
+      // by forwarding filter in MKL layout if we support ApplyGradient method
+      // for MKL layout propagation.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(false);
+      // output_dims_mkl_order is in OIHW format.
+      // Allocate shape of TF tensor in HWIO format.
+      TensorShape output_tf_shape({output_dims_mkl_order[MklDnnDims::Dim_H],
+                                   output_dims_mkl_order[MklDnnDims::Dim_W],
+                                   output_dims_mkl_order[MklDnnDims::Dim_I],
+                                   output_dims_mkl_order[MklDnnDims::Dim_O]});
+      AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
+                                output_mkl_shape);
+  }
+
+  // Allocate tensor for bias grad
+  void AllocateBiasGradTensor(OpKernelContext* context,
+                              const TensorShape& bias_grad_shape,
+                              Tensor** bias_grad_tensor) {
+    CHECK_NOTNULL(bias_grad_tensor);
+
+    MklDnnShape bias_grad_mkl_shape;
+    bias_grad_mkl_shape.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, 1, bias_grad_tensor, bias_grad_shape,
+                              bias_grad_mkl_shape);
+  }
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
                   const convolution_backward_weights::primitive_desc& conv_pd,
                   MklDnnData<T>* input, MklDnnData<T>* obp,
-                  MklDnnData<T>* output) {
+                  MklDnnData<T>* output, MklDnnData<T>* bias_grad = nullptr) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
     input->CheckReorderToOpMem(conv_pd.src_primitive_desc(), &net);
     obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
+    // For BackpropFilter, we convert the output tensor back in Tensorflow
+    // layout.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
                                       conv_pd.diff_weights_primitive_desc());
 
-    net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
-                                    obp->GetOpMem(), output->GetOpMem()));
+    if (biasEnabled && (bias_grad != nullptr)) {
+      net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                      obp->GetOpMem(), output->GetOpMem(),
+                                      bias_grad->GetOpMem()));
+    } else {
+      net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                      obp->GetOpMem(), output->GetOpMem()));
+    }
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
     if (output_reorder_required) {
       output->InsertReorderToUserMem(&net);
     }
 
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
-#endif
 
 #define REGISTER_MKL_FILTER_KERNELS(T)                              \
   REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
                               .Device(DEVICE_CPU)                   \
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T, false>);\
+  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilterWithBias")  \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T, true>); \
+  REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DBackpropFilterWithBias")  \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklDummyOp<CPUDevice, T>);
 
 TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
 #undef REGISTER_MKL_FILTER_KERNELS
+
+#endif  // INTEL_MKL_DNN
+
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index 4a47d0463e..df51df9638 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -49,9 +49,6 @@ limitations under the License.
 
 using mkldnn::stream;
 using mkldnn::prop_kind;
-
-using mkldnn::convolution_forward;
-using mkldnn::convolution_direct;
 using mkldnn::convolution_backward_data;
 #endif
 
@@ -362,143 +359,117 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 #else
 
 template <typename Device, class T>
-class MklConv2DCustomBackpropInputOp : public OpKernel {
+class MklConv2DCustomBackpropInputOp :
+  public MklConv2DBackpropCommonOp<Device, T> {
  public:
-  ~MklConv2DCustomBackpropInputOp() {}
   explicit MklConv2DCustomBackpropInputOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string data_format_str;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
-    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    int stride_n = GetTensorDim(strides_, data_format_, 'N');
-    int stride_c = GetTensorDim(strides_, data_format_, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
+      : MklConv2DBackpropCommonOp<Device, T>(context) { }
+  ~MklConv2DCustomBackpropInputOp() {}
 
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ private:
+  void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                         const MklDnnShape& filter_mkl_shape,
+                         const MklDnnShape& obp_mkl_shape) {
+    // Tensor that feeds to 'Input' slot of BackpropInput is always just a shape
+    // of the Tensor and never an actual tensor. So it will never be in MKL
+    // layout.
+    CHECK(!input_mkl_shape.IsMklTensor())
+      << "Conv2DBackpropInput: input should not be in MKL Layout";
   }
 
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
+  size_t GetInputTensorIndexWithSizes() { return 0; /* input index */ }
 
-      MklDnnData<T> filter(&cpu_engine);
-      MklDnnData<T> outbackprop(&cpu_engine);
-      MklDnnData<T> output(&cpu_engine);
+  TensorShape MakeInputTfShape(OpKernelContext* context,
+                               const Tensor& input_tensor) {
+    TensorShape input_tf_shape;
+    CHECK_EQ(TensorShapeUtils::IsVector(input_tensor.shape()), true);
+    CHECK_EQ(TensorShapeUtils::MakeShape(input_tensor.vec<int32>(),
+                                         &input_tf_shape).ok(), true);
+    return input_tf_shape;
+  }
 
-      // Input tensors
-      const Tensor& input_tensor = MklGetInput(context, 0);
-      const Tensor& filter_tensor = MklGetInput(context, 1);
-      const Tensor& obp_tensor = MklGetInput(context, 2);  // Outbackprop
+  TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                const Tensor& filter_tensor) {
+    size_t filter_idx = 1;
+    return GetTfShape(context, filter_idx);
+  }
 
-      // Generate input shape.
-      TensorShape input_shape;
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
-        errors::InvalidArgument(
-              "Conv2DBackpropInput: input_sizes input must be 1-dim, not ",
-              input_tensor.dims()));
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                        input_tensor.vec<int32>(), &input_shape));
-      TensorShape filter_shape = filter_tensor.shape();
-      TensorShape obp_shape = obp_tensor.shape();
-
-      // By default, all dims are in MKL order. Only dims in TF order
-      // are those with prefix tf_order.
-      memory::dims obp_dims, fwd_input_dims, fwd_filter_dims;
-      memory::dims padding_l, padding_r, strides, fwd_output_dims;
-      memory::dims fwd_output_dims_tf_order;
-
-      // Get forward convolution parameters.
-      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
-      if (!context->status().ok()) return;
-
-      // Create Convolution forward descriptor since Convolution backward
-      // API needs it. For that, we first need to create input, filter
-      // and output memory descriptors.
-      auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                        memory::format::hwio);
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
-      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
-
-      // Allocate output tensor and shape
-      // TODO(nhasabni): Update this when support for MKL layout is added.
-      // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D.
-      TensorShape tf_output_shape(input_shape);
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      Tensor* output_tensor = nullptr;
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
-
-      // Create memory for user data.
-      // Describe how the inputs and outputs of Convolution look like. Also
-      // specify buffers containing actual input and output data.
-      // Although input shape required is in MKL-DNN order, the layout is
-      // Tensorflow's layout (NHWC or NCHW depending on data format).
-      // Although filter shape (filter_dims) required is in MKL-DNN order,
-      // the layout is Tensorflow's layout (HWIO).
-      // Shape of Conv2DBackpropInput's filter is same as that of Conv2D filter.
-      filter.SetUsrMem(fwd_filter_dims, memory::format::hwio, &filter_tensor);
-      // Outbackprop shape is NHWC or NCHW depending on data format. Since
-      // GetInputSizeInMklOrder function returns size in that order we just use
-      // use that function directly.
-      conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims);
-      if (!context->status().ok()) return;
-      outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor);
-      // Although output shape required is in MKL-DNN order,
-      // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
-      // Shape of output of Conv2DBackpropInput is same as shape of 'input'
-      // of Conv2D.
-      memory::dims bwd_output_dims = fwd_input_dims;
-      output.SetUsrMem(bwd_output_dims, mkl_data_format, output_tensor);
-
-      // Create memory descriptors for convolution data w/ no specified format.
-      filter.SetOpMemDesc(fwd_filter_dims, memory::format::any);
-      outbackprop.SetOpMemDesc(obp_dims, memory::format::any);
-      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
-
-      // Create convolution backward data primitive.
-      auto bwd_desc = convolution_backward_data::desc(convolution_direct,
-                          output.GetOpMemDesc(), filter.GetOpMemDesc(),
-                          outbackprop.GetOpMemDesc(), strides, padding_l,
-                          padding_r, TFPaddingToMklDnnPadding(padding_));
-
-      auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
-                                                              cpu_engine,
-                                                              fwd_pd);
-
-      PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
-    }
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) {
+    // Output Shape of Conv2DBackpropInput is same as shape of Conv2D 'input'.
+    return fwd_input_dims;
   }
 
- private:
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_;
+  memory::format GetOutputFormat(const memory::format data_format) {
+    // Output layout is Tensorflow's layout in data format order.
+    return data_format;
+  }
+
+  void CreatePrimitive(OpKernelContext* context,
+                       const engine& cpu_engine,
+                       const convolution_forward::primitive_desc& conv_fwd_pd,
+                       MklDnnData<T>* input, MklDnnData<T>* filter,
+                       MklDnnData<T>* outbackprop, MklDnnData<T>* output,
+                       Tensor** output_tensor,
+                       const memory::dims& strides,
+                       const memory::dims& padding_l,
+                       const memory::dims& padding_r,
+                       padding_kind padding,
+                       const memory::dims& bwd_output_dims,
+                       memory::format bwd_output_format) {
+    CHECK_NOTNULL(context);
+    CHECK_NOTNULL(input);
+    CHECK_NOTNULL(filter);
+    CHECK_NOTNULL(outbackprop);
+    CHECK_NOTNULL(output);
+    CHECK_NOTNULL(output_tensor);
+
+    // Create convolution backward data primitive.
+    auto bwd_desc = convolution_backward_data::desc(convolution_direct,
+                      output->GetOpMemDesc(), filter->GetOpMemDesc(),
+                      outbackprop->GetOpMemDesc(), strides, padding_l,
+                      padding_r, padding);
+
+    auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
+                                                          cpu_engine,
+                                                          conv_fwd_pd);
+
+
+    // Allocate output tensor in TensorFlow and MKL layout.
+    AllocateOutputTensor(context, bwd_pd, bwd_output_dims,
+                         bwd_output_format, output_tensor);
+    CHECK_NOTNULL(*output_tensor);
+    // Set buffer handle using allocated output tensor.
+    output->SetUsrMemDataHandle(*output_tensor);
+
+    PrepareAndExecutePrimitive(bwd_pd, filter, outbackprop, output);
+  }
+
+  // Allocate output tensor.
+  void AllocateOutputTensor(OpKernelContext* context,
+                  const convolution_backward_data::primitive_desc& conv_pd,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+
+      // Output primitive descriptor for backward data is diff_src.
+      auto dst_pd = conv_pd.diff_src_primitive_desc();
+
+      // Allocate shape of Mkl tensor.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(true);
+      output_mkl_shape.SetMklLayout(&dst_pd);
+      output_mkl_shape.SetElemType(MklDnnType<T>());
+      output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                                   output_dims_mkl_order, output_tf_format);
+
+      // Allocate shape of TF tensor.
+      TensorShape output_tf_shape;
+      output_tf_shape.AddDim(dst_pd.get_size() / sizeof(T));
+
+      AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
+                                output_mkl_shape);
+  }
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
@@ -511,22 +482,9 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
     filter->CheckReorderToOpMem(conv_pd.weights_primitive_desc(), &net);
     obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
-    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_pd.diff_src_primitive_desc());
-
     net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(),
                                     filter->GetOpMem(), output->GetOpMem()));
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
-    if (output_reorder_required) {
-      output->InsertReorderToUserMem(&net);
-    }
-
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index a9872b8d6d..04268f23bb 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -40,8 +40,7 @@ limitations under the License.
 #include "tensorflow/core/util/tensor_format.h"
 
 #include "tensorflow/core/util/mkl_util.h"
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
+
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
@@ -51,6 +50,9 @@ using mkldnn::prop_kind;
 
 using mkldnn::convolution_forward;
 using mkldnn::convolution_direct;
+#else
+#include "mkl_dnn.h"
+#include "mkl_dnn_types.h"
 #endif
 
 namespace tensorflow {
@@ -288,10 +290,8 @@ class MklConv2DOp : public OpKernel {
     mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd,
                                              dnnResourceFilter);
 
-    size_t filter_sizes[4] = {static_cast<size_t>(filter.dim_size(0)),
-                              static_cast<size_t>(filter.dim_size(1)),
-                              static_cast<size_t>(filter.dim_size(2)),
-                              static_cast<size_t>(filter.dim_size(3))};
+    size_t filter_sizes[4] = {filter.dim_size(0), filter.dim_size(1),
+                              filter.dim_size(2), filter.dim_size(3)};
     mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes,
                                             mkl_context.filter_strides);
 
@@ -514,6 +514,12 @@ class MklConv2DOp : public OpKernel {
       const Tensor& src_tensor = MklGetInput(context, src_idx);
       const Tensor& filter_tensor = MklGetInput(context, filter_idx);
 
+      MklDnnShape src_mkl_shape, filter_mkl_shape;
+      GetMklShape(context, src_idx, &src_mkl_shape);
+      GetMklShape(context, filter_idx, &filter_mkl_shape);
+      CHECK(!filter_mkl_shape.IsMklTensor())
+        << "Conv2D filter should not be in MKL Layout";
+
       MklDnnData<T> src(&cpu_engine);
       MklDnnData<T> filter(&cpu_engine);
       MklDnnData<T> output(&cpu_engine);
@@ -523,8 +529,9 @@ class MklConv2DOp : public OpKernel {
 
       // Get shapes of input tensors in MKL-DNN order
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(),
-                                         filter_tensor.shape(),
+      auto src_tf_shape = GetTfShape(context, src_idx);
+      auto filter_tf_shape = GetTfShape(context, filter_idx);
+      conv_utl.GetConvFwdSizesInMklOrder(src_tf_shape, filter_tf_shape,
                                          &src_dims, &filter_dims, &strides,
                                          &output_dims_tf_order,
                                          &output_dims_mkl_order, &padding_l,
@@ -532,58 +539,47 @@ class MklConv2DOp : public OpKernel {
       if (!context->status().ok()) return;
 
       // Check for corner case - if there is nothing to compute, return.
-      TensorShape tf_output_shape({output_dims_tf_order[0],
-                                output_dims_tf_order[1],
-                                output_dims_tf_order[2],
-                                output_dims_tf_order[3]});
-      Tensor* output_tensor = nullptr;
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
+      TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order);
 
       // Forward filter in TF format from input at index 1 to output at index 1.
       ForwardTfTensorInToOut(context, 1, 1);
 
-      if (tf_output_shape.num_elements() == 0) {
+      // Corner cases: output with 0 elements and 0 batch size.
+      Tensor* output_tensor = nullptr;
+      if (output_tf_shape.num_elements() == 0 ||
+          output_dims_tf_order[0] == 0) {
         // TODO(jbobba): Verify correctness here
         //               Need semantics for Null MKL tensor
+        MklDnnShape output_mkl_shape;
+        output_mkl_shape.SetMklTensor(false);
+        AllocateOutputSetMklShape(context, 0, &output_tensor, src_tf_shape,
+                                output_mkl_shape);
         return;
       }
 
-      // Corner case to handle 0 batch size.
-      if (output_dims_tf_order[0] == 0) {
-        // Nothing to do, allocate output tensor and return
-        // TODO(nhasabni): remove this code later once serialization
-        // in MKL-DNN is supported.
-        AllocateOutputSetMklShape(context, 0, &output_tensor,
-                                  src_tensor.shape(), mkl_output_mkl_shape);
-        return;
-      } else {
-        // Otherwise regular output tensor allocation
-        // Allocate output tensor.
-      }
-      CHECK_NOTNULL(output_tensor);
-
       // Create memory for user data.
       // Describe how the inputs and outputs of Convolution look like. Also
       // specify buffers containing actual input and output data.
-      // Although input shape (src_dims) required is in MKL-DNN order,
-      // the layout is Tensorflow's layout (NHWC or NCHW depending on data
-      // format).
-      src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_),
-                    const_cast<void*>(static_cast<const void*>(
-                    src_tensor.flat<T>().data())));
+      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
+      // If input is in MKL layout, then simply grab input layout; otherwise,
+      // construct input Tf layout. For TF layout, although input shape
+      // (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
+      // layout (NHWC or NCHW depending on data format).
+      auto src_md = src_mkl_shape.IsMklTensor()
+                    ? src_mkl_shape.GetMklLayout()
+                    : memory::desc(src_dims, MklDnnType<T>(), tf_fmt);
+      src.SetUsrMem(src_md, &src_tensor);
       // Although filter shape (filter_dims) required is in MKL-DNN order,
       // the layout is Tensorflow's layout (HWIO).
-      filter.SetUsrMem(filter_dims, memory::format::hwio,
-                       const_cast<void*>(static_cast<const void*>(
-                       filter_tensor.flat<T>().data())));
-      // Although output shape (output_dims) required is in MKL-DNN order,
-      // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
-      output.SetUsrMem(output_dims_mkl_order,
-                       TFDataFormatToMklDnnDataFormat(data_format_),
-                       output_tensor->flat<T>().data());
+      auto filter_md = filter_mkl_shape.IsMklTensor()
+                    ? filter_mkl_shape.GetMklLayout()
+          : memory::desc(filter_dims, MklDnnType<T>(), memory::format::hwio);
+      filter.SetUsrMem(filter_md, &filter_tensor);
+      // Set output shape (output_dims) required in MKL-DNN order.
+      // Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
+      // depending on data format). But later we propagate Mkl layout of the
+      // output to the next op directly.
+      output.SetUsrMem(output_dims_mkl_order, tf_fmt);
 
       // Create memory descriptors for convolution data w/ no specified format.
       src.SetOpMemDesc(src_dims, memory::format::any);
@@ -596,9 +592,7 @@ class MklConv2DOp : public OpKernel {
         memory::dims bias_size;
         conv_utl.GetBiasSizeInMklOrder(2 /* bias idx */, &bias_size);
         const Tensor& bias_tensor = MklGetInput(context, 2);
-        bias.SetUsrMem(bias_size, memory::format::x,
-                       const_cast<void*>(static_cast<const void*>(
-                       bias_tensor.flat<T>().data())));
+        bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
         bias.SetOpMemDesc(bias_size, memory::format::any);
 
         // Create convolution primitive with Bias.
@@ -609,6 +603,10 @@ class MklConv2DOp : public OpKernel {
 
         auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                 cpu_engine);
+        AllocateOutputTensor(context, conv_prim_desc,
+                             output_dims_mkl_order, tf_fmt, &output_tensor);
+        // Set data handle for output.
+        output.SetUsrMemDataHandle(output_tensor);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output);
       } else {
         // Create convolution primitive without Bias.
@@ -619,6 +617,10 @@ class MklConv2DOp : public OpKernel {
 
         auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                 cpu_engine);
+        AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order,
+                             tf_fmt, &output_tensor);
+        // Set data handle for output.
+        output.SetUsrMemDataHandle(output_tensor);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output);
       }
     } catch (mkldnn::error &e) {
@@ -636,23 +638,44 @@ class MklConv2DOp : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
 
+  // Allocate output tensor.
+  void AllocateOutputTensor(
+                  OpKernelContext* context,
+                  const convolution_forward::primitive_desc& conv_prim_desc,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+      auto dst_pd = conv_prim_desc.dst_primitive_desc();
+
+      // Allocate shape of Mkl tensor.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(true);
+      output_mkl_shape.SetMklLayout(&dst_pd);
+      output_mkl_shape.SetElemType(MklDnnType<T>());
+      output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                                   output_dims_mkl_order, output_tf_format);
+
+      // Allocate shape of TF tensor.
+      TensorShape output_tf_shape;
+      output_tf_shape.AddDim((dst_pd.get_size() / sizeof(T)));
+
+      const int kOutputSlotIdx = 0;
+      AllocateOutputSetMklShape(context, kOutputSlotIdx, output_tensor,
+                                output_tf_shape, output_mkl_shape);
+  }
+
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecuteNet(
                   const convolution_forward::primitive_desc& conv_prim_desc,
                   MklDnnData<T>* src, MklDnnData<T>* filter,
                   MklDnnData<T>* bias, MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
-    // add it to the net before convolution.
+    // add it to the net before convolution. No need to check for output
+    // reorder as we propagate output layout to the next layer.
     std::vector<primitive> net;
     src->CheckReorderToOpMem(conv_prim_desc.src_primitive_desc(), &net);
     filter->CheckReorderToOpMem(conv_prim_desc.weights_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
-    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_prim_desc.dst_primitive_desc());
-
     // Create convolution primitive and add it to net.
     if (bias) {
       CHECK_EQ(biasEnabled, true);
@@ -665,13 +688,6 @@ class MklConv2DOp : public OpKernel {
                                     filter->GetOpMem(), output->GetOpMem()));
     }
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
-    if (output_reorder_required) {
-      output->InsertReorderToUserMem(&net);
-    }
-
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
@@ -688,7 +704,12 @@ class MklConv2DOp : public OpKernel {
                               .Device(DEVICE_CPU)                   \
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DOp<CPUDevice, T, true>);
+                          MklConv2DOp<CPUDevice, T, true>);         \
+  REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DWithBias")          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklDummyOp<CPUDevice, T>);
 
 TF_CALL_float(REGISTER_MKL_CPU);
 
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index f0cb37f8a4..b4b76d2186 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -41,6 +41,12 @@ limitations under the License.
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+
+using mkldnn::convolution_forward;
+using mkldnn::convolution_direct;
 #endif
 
 namespace tensorflow {
@@ -108,7 +114,13 @@ class MklDnnConvUtil {
   #undef CHECK_BOUNDS
 
     // MKL-DNN always requires input in NCHW format.
-    *input_dims = {input_batch, input_depth, input_rows, input_cols};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_N] = input_batch;
+    mkldnn_sizes[MklDnnDims::Dim_C] = input_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = input_rows;
+    mkldnn_sizes[MklDnnDims::Dim_W] = input_cols;
+
+    *input_dims = mkldnn_sizes;
   }
 
   // Calculate Convolution filter size in MKL-DNN order. MKL-DNN
@@ -156,7 +168,13 @@ class MklDnnConvUtil {
 
     // MKL-DNN always needs filter in OIHW format.
     // OIHW = (out_depth, in_depth, rows, cols)
-    *filter_dims = {out_depth, in_depth, filter_rows, filter_cols};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_O] = out_depth;
+    mkldnn_sizes[MklDnnDims::Dim_I] = in_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = filter_rows;
+    mkldnn_sizes[MklDnnDims::Dim_W] = filter_cols;
+
+    *filter_dims = mkldnn_sizes;
   }
 
   // Calculate Convolution filter size in MKL-DNN order. MKL-DNN
@@ -167,9 +185,9 @@ class MklDnnConvUtil {
   GetFilterSizeInMklOrder(size_t src_index, size_t filter_index,
                           memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
-    const Tensor& input = MklGetInput(context_, src_index);
-    const Tensor& filter = MklGetInput(context_, filter_index);
-    GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims);
+    GetFilterSizeInMklOrder(GetTfShape(context_, src_index),
+                            GetTfShape(context_, filter_index),
+                            filter_dims);
   }
 
   // Calculate Bias size for 2D Convolution. Function does not return
@@ -238,8 +256,12 @@ class MklDnnConvUtil {
     *output_dims_tf_order = TFShapeToMklDnnDims(out_shape);
 
     // MKL-DNN always needs output in NCHW format.
-    *output_dims_mkl_order = {out_batch, out_depth, static_cast<int>(out_rows),
-                   static_cast<int>(out_cols)};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_N] = out_batch;
+    mkldnn_sizes[MklDnnDims::Dim_C] = out_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = static_cast<int>(out_rows);
+    mkldnn_sizes[MklDnnDims::Dim_W] = static_cast<int>(out_cols);
+    *output_dims_mkl_order = mkldnn_sizes;
 
     // Now handle padding. MKL-DNN uses asymetric padding.
     *pad_l = {static_cast<int>(pad_top), static_cast<int>(pad_left)};
@@ -261,14 +283,14 @@ class MklDnnConvUtil {
     CHECK_NOTNULL(pad_l);
     CHECK_NOTNULL(pad_r);
 
-    const Tensor& input = MklGetInput(context_, src_index);
-    const Tensor& filter = MklGetInput(context_, filter_index);
+    auto input_tf_shape = GetTfShape(context_, src_index);
+    auto filter_tf_shape = GetTfShape(context_, filter_index);
 
-    OP_REQUIRES(context_, input.dims() == 4,
+    OP_REQUIRES(context_, input_tf_shape.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
-                                          input.shape().DebugString()));
+                                          input_tf_shape.DebugString()));
 
-    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(),
+    GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape,
                                   strides, output_dims_tf_order,
                                   output_dims_mkl_order, pad_l, pad_r);
   }
@@ -309,8 +331,231 @@ class MklDnnConvUtil {
   }
 };
 
+/////////////////////////////////////////////////////////////////////
+///  Common class that implements Conv2DBackpropFilter and Input
+/////////////////////////////////////////////////////////////////////
+
+template <typename Device, class T>
+class MklConv2DBackpropCommonOp :  public OpKernel {
+ public:
+  ~MklConv2DBackpropCommonOp() {}
+  explicit MklConv2DBackpropCommonOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string data_format_str;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
+    int stride_n = GetTensorDim(strides_, data_format_, 'N');
+    int stride_c = GetTensorDim(strides_, data_format_, 'C');
+    OP_REQUIRES(
+        context, (stride_n == 1 && stride_c == 1),
+        errors::InvalidArgument("Current implementation does not yet support "
+                                "strides in the batch and depth dimensions."));
+
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+
+      // Prepare common tensors for Conv2DBackpropInput and
+      // Conv2DBackpropFilter.
+      MklDnnData<T> input(&cpu_engine);
+      MklDnnData<T> filter(&cpu_engine);
+      MklDnnData<T> outbackprop(&cpu_engine);
+      MklDnnData<T> output(&cpu_engine);
+
+      // Input tensors
+      const int kInputIdx = 0, kFilterIdx = 1, kOutbpropIdx = 2;
+      const Tensor& input_tensor = MklGetInput(context, kInputIdx);
+      const Tensor& filter_tensor = MklGetInput(context, kFilterIdx);
+      const Tensor& outbprop_tensor = MklGetInput(context, kOutbpropIdx);
+
+      MklDnnShape input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape;
+      GetMklShape(context, kInputIdx, &input_mkl_shape);
+      GetMklShape(context, kFilterIdx, &filter_mkl_shape);
+      GetMklShape(context, kOutbpropIdx, &outbprop_mkl_shape);
+      // Allow operator-specific sanity checking of shapes.
+      ValidateMklShapes(input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape);
+
+      // Allow operator-specific generation of shapes.
+      // E.g., Conv2DBackpropFilter gets filter as filter_sizes. It is a
+      // tensor containing shape of filter. So filter.shape() is not
+      // a correct way to get filter shape. These operator-specific calls
+      // allow this class to handle this case.
+      TensorShape input_tf_shape = MakeInputTfShape(context, input_tensor);
+      TensorShape filter_tf_shape = MakeFilterTfShape(context, filter_tensor);
+      TensorShape outbprop_tf_shape = GetTfShape(context, kOutbpropIdx);
+
+      // By default, all dims are in MKL order. Only dims in TF order
+      // are those with prefix tf_order.
+      memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims;
+      memory::dims padding_l, padding_r, strides, fwd_output_dims;
+      memory::dims fwd_output_dims_tf_order;
+
+      // Get forward convolution parameters.
+      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
+      conv_utl.GetConvFwdSizesInMklOrder(input_tf_shape, filter_tf_shape,
+                                         &fwd_input_dims, &fwd_filter_dims,
+                                         &strides,
+                                         &fwd_output_dims_tf_order,
+                                         &fwd_output_dims,
+                                         &padding_l, &padding_r);
+      if (!context->status().ok()) return;
+
+      // Create Convolution forward descriptor since Convolution backward
+      // API needs it. For that, we first need to create input, filter
+      // and output memory descriptors.
+      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
+      // If input is in MKL layout, then simply grab input layout; otherwise,
+      // construct input TF layout. For TF layout, although input shape
+      // required is in MKL-DNN order, the layout is Tensorflow's layout
+      // (NHWC or NCHW depending on data format).
+      auto fwd_input_md = input_mkl_shape.IsMklTensor() ?
+                          input_mkl_shape.GetMklLayout() :
+                       memory::desc(fwd_input_dims, MklDnnType<T>(), tf_fmt);
+      // If filter is in MKL layout, then simply grab filter layout; otherwise
+      // construct filter in TF layout. For TF layout, filter is in HWIO format.
+      auto fwd_filter_md = filter_mkl_shape.IsMklTensor() ?
+                          filter_mkl_shape.GetMklLayout() :
+                          memory::desc(fwd_filter_dims, MklDnnType<T>(),
+                                       memory::format::hwio);
+      // Tensorflow Output of Conv2D is in data_format order.
+      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(), tf_fmt);
+      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, fwd_input_md, fwd_filter_md, fwd_out_md,
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
+
+      // Create memory for user data. Describe how the inputs and outputs of
+      // Convolution look like. Also specify buffers containing actual input
+      // and output data.
+
+      // Since this is a common class for both Conv2DBackpropFilter and
+      // Conv2DBackpropInput, we skip SetUsrMem call for input tensor (for
+      // Conv2DBackpropInput) and for filter tensor (for
+      // conv2DBackpropFilter) depending on which tensor is int32 type.
+      size_t input_with_sizes = GetInputTensorIndexWithSizes();
+      if (input_with_sizes != kInputIdx) {
+        // Shape of Conv2DBackpropFilter's input is same as Conv2D input.
+        input.SetUsrMem(fwd_input_md, &input_tensor);
+      } else if (input_with_sizes != kFilterIdx) {
+        // Shape of Conv2DBackpropInput's filter is same as Conv2D filter.
+        filter.SetUsrMem(fwd_filter_md, &filter_tensor);
+      }
+
+      conv_utl.GetInputSizeInMklOrder(outbprop_tf_shape, &outbprop_dims);
+      if (!context->status().ok()) return;
+      if (outbprop_mkl_shape.IsMklTensor()) {
+        // If outbackprop is in Mkl layout, then simply grab it.
+        auto outbprop_md = outbprop_mkl_shape.GetMklLayout();
+        outbackprop.SetUsrMem(outbprop_md, &outbprop_tensor);
+      } else {
+        // If outbackprop is in TensorFlow layout, then we need to create memory
+        // descriptor for it. Outbackprop shape is data format order.
+        outbackprop.SetUsrMem(outbprop_dims, tf_fmt, &outbprop_tensor);
+      }
+
+      // Operator specific call to get output shape and data_format.
+      auto bwd_output_dims = GetOutputDims(fwd_input_dims, fwd_filter_dims);
+      auto bwd_output_format = GetOutputFormat(tf_fmt);
+      output.SetUsrMem(bwd_output_dims, bwd_output_format);
+
+      // Create memory descriptors for convolution data w/ no specified format.
+      input.SetOpMemDesc(fwd_input_dims, memory::format::any);
+      filter.SetOpMemDesc(fwd_filter_dims, memory::format::any);
+      outbackprop.SetOpMemDesc(outbprop_dims, memory::format::any);
+      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
+
+      // Operator-specific call to create and execute primitive.
+      Tensor* output_tensor = nullptr;
+      CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter,
+                      &outbackprop, &output, &output_tensor,
+                      strides, padding_l, padding_r,
+                      TFPaddingToMklDnnPadding(padding_),
+                      bwd_output_dims, bwd_output_format);
+    } catch (mkldnn::error &e) {
+     string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
+                                            error_msg));
+    }
+  }
+
+  /// Pure virtual function to allow operator to check for validity of input
+  /// shapes. Function asserts that input shapes are valid.
+  virtual void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                                 const MklDnnShape& filter_mkl_shape,
+                                 const MklDnnShape& outbprop_mkl_shape) = 0;
+
+  /// Operator-specific function that returns index of input that is
+  /// representing input sizes. For Conv2DBackpropFilter it returns 1 since
+  /// filter for this operator is filter shape. For Conv2DBackpropInput it
+  /// returns 0 (for input).
+  virtual size_t GetInputTensorIndexWithSizes() = 0;
+
+  /// Get TensorFlow shape of input tensor.
+  virtual TensorShape MakeInputTfShape(OpKernelContext* context,
+                                      const Tensor& input_tensor) = 0;
+
+  /// Get TensorFlow shape of filter tensor.
+  virtual TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                       const Tensor& filter_tensor) = 0;
+
+  /// Get shape of output in MKL-DNN order. Computes shape of output from
+  /// input shape (fwd_input_dims) and filter shape (fwd_filter_dims).
+  virtual
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) = 0;
+
+  /// Get data_format of output in MKL-DNN order. If output data format is
+  /// same as input data format, then it simply returns value of data_format
+  /// parameter as it is.
+  virtual memory::format GetOutputFormat(const memory::format data_format) = 0;
+
+  /// Create and execute the primitive storing output in the output_tensor.
+  virtual void CreatePrimitive(OpKernelContext* context,
+    const engine& cpu_engine,
+    const convolution_forward::primitive_desc& conv_fwd_pd,
+    MklDnnData<T>* input, MklDnnData<T>* filter, MklDnnData<T>* outbackprop,
+    MklDnnData<T>* output, Tensor** output_tensor, const memory::dims& strides,
+    const memory::dims& padding_l, const memory::dims& padding_r,
+    padding_kind padding, const memory::dims& bwd_output_dims,
+    memory::format bwd_output_format) = 0;
+
+  // Get the data_format {NCHW, NHWC}
+  TensorFormat GetTFDataFormat () { return data_format_; }
+
+ private:
+  std::vector<int32> strides_;
+  Padding padding_;
+  TensorFormat data_format_;
+};
 #endif  // INTEL_MKL_DNN
 
+/////////////////////////////////////////////////////////////////////
+///  Dummy Mkl op that is just used for operators that are intermediate
+///  output of node fusion in the graph
+/////////////////////////////////////////////////////////////////////
+
+template <typename Device, typename T>
+class MklDummyOp : public OpKernel {
+ public:
+  ~MklDummyOp() {}
+
+  explicit MklDummyOp(OpKernelConstruction* context) :
+    OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    TF_CHECK_OK(errors::Unimplemented("This is a dummy op."
+                                      "It should not have been invoked."));
+  }
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
-- 
GitLab


From 932cb708272957b88b8bd61a54647581394724ff Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Wed, 6 Dec 2017 13:07:49 -0800
Subject: [PATCH 0702/1225] MKL: Adding Concat op implementation using the open
 source MKL_DNN (#14458)

* Adding mkl-dnn concat

* Fixes per PR review
---
 tensorflow/core/kernels/mkl_concat_op.cc | 374 ++++++++++++++++++++++-
 1 file changed, 371 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
index e6673b2ffb..d0175dfd71 100644
--- a/tensorflow/core/kernels/mkl_concat_op.cc
+++ b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -1,11 +1,8 @@
 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -33,11 +30,22 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::concat;
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// List of TensorShape objects. Used in Concat/Split layers.
+typedef std::vector<TensorShape> TensorShapeList;
+
 enum AxisArgumentName { NAME_IS_AXIS, NAME_IS_CONCAT_DIM };
 
+
 // TODO(intelft) Check if we can reuse existing EigenConcatOp using Mutable
 // reference inputs.
 // --------------------------------------------------------------------------
@@ -55,6 +63,8 @@ class EigenConcatBaseOp : public OpKernel {
   // we need to have empty Compute because Compute is pure virtual function.
   void Compute(OpKernelContext* c) {}
 
+#ifndef INTEL_MKL_DNN
+
   void Compute(OpKernelContext* c, const std::vector<Tensor>& values) {
     const Tensor* concat_dim_tensor;
     const char* axis_attribute_name =
@@ -139,8 +149,89 @@ class EigenConcatBaseOp : public OpKernel {
       ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
     }
   }
+
+#else  // MKL_DNN
+
+void Compute(OpKernelContext* c, const std::vector<Tensor>& values,
+                        const TensorShapeList& input_shapes) {
+    const Tensor* concat_dim_tensor;
+    const char* axis_attribute_name =
+        AxisArgName == NAME_IS_AXIS
+            ? "axis"
+            : AxisArgName == NAME_IS_CONCAT_DIM ? "concat_dim" : "<invalid>";
+    OP_REQUIRES_OK(c, c->input(axis_attribute_name, &concat_dim_tensor));
+    OP_REQUIRES(c, IsLegacyScalar(concat_dim_tensor->shape()),
+                errors::InvalidArgument(
+                    axis_attribute_name,
+                    " tensor should be a scalar integer, but got shape ",
+                    concat_dim_tensor->shape().DebugString()));
+    const int32 concat_dim =
+        internal::SubtleMustCopy(concat_dim_tensor->scalar<int32>()());
+    // Instead of accessing values from context, we use input to Compute.
+    const int N = values.size();
+    const int input_dims = input_shapes[0].dims();
+    const TensorShape& input_shape = input_shapes[0];
+
+    int32 axis = concat_dim < 0 ? concat_dim + input_dims : concat_dim;
+    OP_REQUIRES(c,
+                (0 <= axis && axis < input_dims) ||
+                    (allow_legacy_scalars() && concat_dim == 0),
+                errors::InvalidArgument(
+                    "ConcatOp : Expected concatenating dimensions in the range "
+                    "[",
+                    -input_dims, ", ", input_dims, "), but got ", concat_dim));
+    // Note that we reduce the concat of n-dimensional tensors into a two
+    // dimensional concat. Assuming the dimensions of any input/output
+    // tensor are {x0, x1,...,xn-1, y0, y1,...,ym-1}, where the concat is along
+    // the dimension indicated with size y0, we flatten it to {x, y}, where y =
+    // Prod_i(yi) and x = ((n > 0) ? Prod_i(xi) : 1).
+    ConstMatrixVector inputs_flat;
+    inputs_flat.reserve(N);
+    int64 inputs_flat_dim0 = 1;
+    for (int d = 0; d < axis; ++d) {
+      inputs_flat_dim0 *= input_shape.dim_size(d);
+    }
+    int64 output_concat_dim = 0;
+    const bool input_is_scalar = IsLegacyScalar(input_shape);
+    for (int i = 0; i < N; ++i) {
+      const auto in = values[i];
+      const bool in_is_scalar = IsLegacyScalar(input_shapes[i]);
+      OP_REQUIRES(
+          c, (input_shapes[i].dims() == input_dims) ||
+              (input_is_scalar && in_is_scalar),
+          errors::InvalidArgument(
+              "ConcatOp : Ranks of all input tensors should match: shape[0] = ",
+              input_shape.DebugString(), " vs. shape[", i,
+              "] = ", input_shapes[i].DebugString()));
+      if (in.NumElements() > 0) {
+        int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0;
+        inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
+            in.shaped<T, 2>({inputs_flat_dim0, inputs_flat_dim1})));
+      }
+      output_concat_dim += input_shapes[i].dims() > 0 ?
+                           input_shapes[i].dim_size(axis) : 1;
+    }
+
+    TensorShape output_shape(input_shape);
+    if (output_shape.dims() == 0) {
+      output_shape.AddDim(output_concat_dim);
+    } else {
+      output_shape.set_dim(axis, output_concat_dim);
+    }
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(c, c->allocate_output(0, output_shape, &output));
+    if (output->NumElements() > 0) {
+      int64 output_dim1 = output->NumElements() / inputs_flat_dim0;
+      auto output_flat = output->shaped<T, 2>({inputs_flat_dim0, output_dim1});
+      ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
+    }
+  }
+
+#endif
 };
 
+#ifndef INTEL_MKL_DNN
+
 // --------------------------------------------------------------------------
 //                      Mkl Concat Op
 // --------------------------------------------------------------------------
@@ -327,6 +418,7 @@ class MklConcatOp : public OpKernel {
     OP_REQUIRES_OK(context, context->status());
   }
 
+
  private:
   typedef struct {
     TensorFormat data_format;
@@ -435,8 +527,284 @@ class MklConcatOp : public OpKernel {
         mkl_tensor->flat<uint8>().data(),
         mkl_tensor->flat<uint8>().size() * sizeof(uint8));
   }
+
+  // overloading methods with input shapes as a list of TensorShape's
+  void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
+                        const TensorShapeList& input_shapes) {
+    CHECK_EQ(values.size(), input_shapes.size());
+
+    std::vector<Tensor> converted_values;
+    for (int i = 0; i < input_shapes.size(); i++) {
+      converted_values.push_back(values[i]);
+    }
+
+    // Call Eigen concat.
+    eigen_concat_op_.Compute(context, converted_values);
+
+    // Set dummy Mkl tensor as output Mkl tensor for this op.
+    MklShape mkl_tensor_mkl_shape;
+    mkl_tensor_mkl_shape.SetMklTensor(false);
+    mkl_tensor_mkl_shape.SetDimensions(4);
+    Tensor* mkl_tensor = nullptr;
+    TensorShape mkl_tensor_tf_shape;
+    mkl_tensor_tf_shape.AddDim(
+        SIZE_OF_MKL_SERIAL_DATA(mkl_tensor_mkl_shape.GetDimension()));
+    int tf_output_index = 0;
+    context->allocate_output(
+        GetTensorMetaDataIndex(tf_output_index, context->num_outputs()),
+        mkl_tensor_tf_shape, &mkl_tensor);
+    mkl_tensor_mkl_shape.SerializeMklShape(
+        mkl_tensor->flat<uint8>().data(),
+        mkl_tensor->flat<uint8>().size() * sizeof(uint8));
+  }
 };
 
+#else
+
+// --------------------------------------------------------------------------
+//                      Mkl Concat Op
+// --------------------------------------------------------------------------
+
+template <typename Device, typename T, AxisArgumentName AxisArgName>
+class MklConcatOp : public OpKernel {
+ private:
+  TensorFormat data_format_;
+  EigenConcatBaseOp<Device, T, AxisArgName> eigen_concat_op_;
+
+ public:
+  typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>
+      ConstMatrixVector;
+
+  explicit MklConcatOp(OpKernelConstruction* c)
+      : OpKernel(c), eigen_concat_op_(c) {}
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      OpInputList input_tensors;
+      GetMklInputList(context, "values", &input_tensors);
+      const int N = input_tensors.size();
+
+      // Get Tensor shapes.
+      std::vector<MklDnnShape> input_shapes(N);
+      GetMklShapeList(context, "values", &input_shapes);
+
+      const Tensor& concat_dim_tensor = (AxisArgName == NAME_IS_CONCAT_DIM)
+                    ? MklGetInput(context, 0) : MklGetInput(context, N);
+      // Sanity checks
+      OP_REQUIRES(context, IsLegacyScalar(concat_dim_tensor.shape()),
+        errors::InvalidArgument(
+            "Concat dim tensor should be a scalar integer, but got shape ",
+            concat_dim_tensor.shape().DebugString()));
+      int32 concat_dim = internal::SubtleMustCopy(
+                           concat_dim_tensor.scalar<int32>()());
+      if (concat_dim < 0) concat_dim = N + concat_dim;
+
+      // check that ranks of all tensors match
+      // and that their shapes match except for concat_dim.
+      int i = 0;
+      bool invoke_eigen = false;
+      bool are_all_mkl_inputs = true, are_all_tf_inputs = true;
+      const TensorShape expected_shape = input_shapes[0].IsMklTensor() ?
+                                         input_shapes[0].GetTfShape() :
+                                         input_tensors[0].shape();
+      size_t expected_dims = expected_shape.dims();
+      for (auto& s : input_shapes) {
+        if (s == expected_shape) {++i; continue;}
+
+        TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() :
+                      input_tensors[i].shape();
+        size_t s_dims = s_shape.dims();
+
+        OP_REQUIRES(context, s_dims == expected_dims,
+                  errors::InvalidArgument(
+                      "_MklConcatOp : Ranks of all input tensors should match:"
+                      " input dimensions = ",
+                      s_dims, " vs. expected rank = ", expected_dims));
+
+        for (int d = 0; d < expected_dims; ++d) {
+          if (d == concat_dim) continue;
+
+          size_t expected_size = expected_shape.dim_size(d);
+          size_t s_size = s_shape.dim_size(d);
+          OP_REQUIRES(
+            context, expected_size == s_size,
+            errors::InvalidArgument("_MklConcatOp : Dimensions of inputs "
+                    "should match: shape[0][", d, "]= ", expected_size,
+                    " vs. shape[", i, "][", d, "] = ", s_size));
+        }
+
+        if (s.IsMklTensor())
+          are_all_tf_inputs = false;
+        else
+          are_all_mkl_inputs = false;
+
+        if (s_dims != 4) invoke_eigen = true;
+        ++i;
+      }
+
+      // All inputs are not in one format (TF or MKL). This is mixed input case.
+      // We can potentially optimize this case by converting all TF inputs
+      // to Mkl format. But currently, we fall to Eigen for this case.
+      // It may be possible to convert inputs that in TF format to Mkl
+      // format and avoid calling eigen version.
+      if (!are_all_tf_inputs && !are_all_mkl_inputs) invoke_eigen = true;
+
+      // Temporary fallback to Eigen until MKLDNN Concat performance
+      // is improved. To be removed.
+      invoke_eigen = true;
+
+      // Call Eigen library
+      if (invoke_eigen) {
+        TensorShapeList tf_input_shapes;
+        i = 0;
+        for (auto& s : input_shapes) {
+          TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() :
+                                input_tensors[i].shape();
+          tf_input_shapes.push_back(s_shape);
+          ++i;
+        }
+        CallEigenVersion(context, input_tensors, tf_input_shapes);
+        return;
+      }
+
+      memory::dims dst_dims;
+      if (are_all_mkl_inputs)
+        dst_dims = TFShapeToMklDnnDims(input_shapes[0].GetTfShape());
+      else
+        // When all the inputs are in Tensorflow format, we don't know
+        // what is the input data format. In that case, we just use
+        // output format that is same as input formats.
+        dst_dims = TFShapeToMklDnnDims(input_tensors[0].shape());
+
+      std::vector<memory::primitive_desc> srcs_pd;
+      std::vector<MklDnnData<T>> srcs(N, MklDnnData<T>(&cpu_engine));
+      int64 dst_concat_dim_size = 0;
+      for (int k =0; k < N; k++) {
+        bool is_mkl_tensor = input_shapes[k].IsMklTensor();
+        memory::dims src_dims;
+
+        // Same comment as dst_dims for src_dims.
+        src_dims = (is_mkl_tensor) ?
+                   TFShapeToMklDnnDims(input_shapes[k].GetTfShape()) :
+                   TFShapeToMklDnnDims(input_tensors[k].shape());
+
+        dst_concat_dim_size += src_dims[concat_dim];
+        auto src_md = is_mkl_tensor ? input_shapes[k].GetMklLayout() :
+          // It does not matter what data format we use here (NHWC or NCHW).
+          // We just need to ensure that output of Concat uses same data format
+          // as input.
+                  memory::desc(src_dims, MklDnnType<T>(), memory::format::nhwc);
+
+        srcs[k].SetUsrMem(src_md, &input_tensors[k]);
+        auto src_mpd = srcs[k].GetUsrMemPrimDesc();
+        srcs_pd.push_back(src_mpd);
+      }
+      dst_dims[concat_dim] = dst_concat_dim_size;
+
+      MklDnnData<T> dst(&cpu_engine);
+      memory::desc dst_md({}, memory::data_undef, memory::format_undef);
+      memory::dims dst_dims_in_nchw;
+      if (are_all_mkl_inputs) {
+        // Since we are passing a specific format for destination,
+        // we need to have dst_dims in MklDnn order (NCHW).
+        auto orig_tf_format = input_shapes[0].GetTfDataFormat();
+        dst_dims_in_nchw = MklDnnDimsInNCHW(dst_dims,
+                               MklDnnDataFormatToTFDataFormat(orig_tf_format));
+        // We will set the output in the same format as input to avoid layout
+        // conversions.
+        // Currently we are setting dst format same as input format.
+        // See if we can make this choice in a better way.
+        dst_md = memory::desc(dst_dims_in_nchw, MklDnnType<T>(),
+                 (memory::format) input_shapes[0].GetMklLayout().data.format);
+      } else {
+        // Again, format does not matter here. We just need to make it same as
+        // input format.
+        dst_md = memory::desc(dst_dims, MklDnnType<T>(), memory::format::nhwc);
+      }
+
+      std::vector<primitive::at> inputs;
+      for (int k=0; k < input_tensors.size(); k++)
+        inputs.push_back(srcs[k].GetOpMem());
+
+      // If all inputs are in MKL format, then meaning of concat_dim needs to
+      // change. Value of concat_dim is tied to input Tensorflow data format
+      // (NHWC or NCHW). MklDnn dimensions are in NCHW order. So if Tensorflow
+      // tensors are in NCHW order, then concat_dim semantics is preserved.
+      // But ifinput tensors are in NHWC order, then semantics need to change.
+      // E.g., if we are concatinating over Channel (dimension 3 for NHWC),
+      // then since MklDnn order is NCHW, concat_dim needs to be 1.
+      if (are_all_mkl_inputs)
+        concat_dim = input_shapes[0].TfDimIdx(concat_dim);
+
+      auto concat_pd = concat::primitive_desc(dst_md, concat_dim, srcs_pd);
+
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      Tensor* dst_tensor = nullptr;
+      if (are_all_mkl_inputs) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = concat_pd.dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dst_dims.size(), dst_dims_in_nchw,
+                                  input_shapes[0].GetTfDataFormat());
+        tf_shape_dst.AddDim((dst_pd.get_size() / sizeof(T)));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = MklDnnDimsToTFShape(dst_dims);
+      }
+      AllocateOutputSetMklShape(context, 0, &dst_tensor,
+                                tf_shape_dst, dnn_shape_dst);
+      CHECK_NOTNULL(dst_tensor);
+
+      dst_md = dnn_shape_dst.IsMklTensor() ?
+               dnn_shape_dst.GetMklLayout() : dst_md;
+      dst.SetUsrMem(dst_md, dst_tensor);
+
+      auto concat_op = concat(concat_pd, inputs, dst.GetOpMem());
+      std::vector<primitive> net;
+      net.push_back(concat_op);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+               ", message: " + string(e.message) + ", in file " +
+               string(__FILE__) + ":" + std::to_string(__LINE__);
+        OP_REQUIRES_OK(context, errors::Aborted(
+                "Operation received an exception:", error_msg));
+    }
+  }
+
+  void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
+                        const TensorShapeList& input_shapes) {
+    CHECK_EQ(values.size(), input_shapes.size());
+
+    std::vector<Tensor> converted_values;
+    for (int i = 0; i < input_shapes.size(); i++)
+      converted_values.push_back(values[i]);
+
+    // Call Eigen concat.
+    eigen_concat_op_.Compute(context, converted_values, input_shapes);
+
+    // Set output Mkl tensor for this op.
+    MklDnnShape dnn_shape_output;
+    dnn_shape_output.SetMklTensor(false);
+    dnn_shape_output.SetDimensions(4);
+    Tensor* output_tensor = nullptr;
+    TensorShape tf_shape_output;
+    tf_shape_output.AddDim(
+        dnn_shape_output.GetSerializeBufferSize());
+    context->allocate_output(
+        GetTensorMetaDataIndex(0, context->num_outputs()),
+        tf_shape_output, &output_tensor);
+    dnn_shape_output.SerializeMklDnnShape(
+        output_tensor->flat<uint8>().data(),
+        output_tensor->flat<uint8>().size() * sizeof(uint8));
+  }
+};
+
+#endif
+
 /* Use optimized concat for float type only */
 #define REGISTER_MKL_CPU(type)                                              \
   REGISTER_KERNEL_BUILDER(Name("_MklConcat")                                \
-- 
GitLab


From 4c5564a3018f5351b2c158d1391199ff11aaebd2 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 6 Dec 2017 13:27:54 -0800
Subject: [PATCH 0703/1225] Remove vestigial test modification for TensorArray.

PiperOrigin-RevId: 178143663
---
 tensorflow/python/kernel_tests/tensor_array_ops_test.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index 835fdbe2aa..aad2443eea 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -43,10 +43,6 @@ import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
 from tensorflow.python.platform import test
 
 
-# TODO(ebrevdo): Delete this line after Dec. 4, 2017.
-tensor_array_ops._ENABLE_IDENTICAL_ELEMENT_SHAPES = True
-
-
 def _make_converter(tf_dtype):
   def _converter(x):
     if tf_dtype == dtypes.string:
-- 
GitLab


From 4567c7736cb0d0557ab6ce172dc7c4c140a0614c Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 6 Dec 2017 14:15:52 -0800
Subject: [PATCH 0704/1225] Fixed a few bugs in the dependency optimizer.

PiperOrigin-RevId: 178151202
---
 .../optimizers/dependency_optimizer.cc        | 31 +++++++++++--------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 541b479797..950c738dc2 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -40,7 +40,7 @@ int RemoveInput(NodeDef* node, const string& input, NodeMap* node_map) {
     if (node->input(pos) == input) {
       node->mutable_input()->SwapElements(pos, node->input_size() - 1);
       node->mutable_input()->RemoveLast();
-      node_map->RemoveOutput(node->name(), NodeName(input));
+      node_map->RemoveOutput(NodeName(input), node->name());
     } else {
       ++pos;
     }
@@ -119,7 +119,7 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
     std::unordered_set<string> ctrl_inputs;
     int pos = 0;
     while (pos < node->input_size()) {
-      const string& old_input = node->input(pos);
+      const string old_input = node->input(pos);
       if (IsControlInput(old_input)) {
         if (!ctrl_inputs.insert(old_input).second) {
           // We found a duplicate control input. Remove it.
@@ -142,8 +142,6 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
     }
     node->set_op("NoOp");
     node->clear_attr();
-    nodes_to_simplify->PushBack(node_idx);
-    return;
   }
 
   // Remove NoOp nodes if their fan-in or fan-out is less than 2.
@@ -164,6 +162,7 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
     const auto output_nodes = node_map_->GetOutputs(node->name());
     const int num_outputs = output_nodes.size();
     const int num_inputs = node->input_size();
+
     if (num_inputs * num_outputs > num_inputs + num_outputs) {
       return;
     }
@@ -171,23 +170,23 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
     std::vector<NodeDef*> input_nodes;
     for (int i = 0; i < num_inputs; ++i) {
       NodeDef* tmp = node_map_->GetNode(node->input(i));
-      if (tmp != nullptr) {
-        input_nodes.push_back(tmp);
-      }
+      CHECK_NE(tmp, nullptr);
+      input_nodes.push_back(tmp);
     }
+
     for (auto consumer : output_nodes) {
       bool updated_consumer = false;
       VLOG(1) << "***** Considering consumer  " << consumer->name() << "\n"
               << consumer->DebugString();
       for (int i = 0; i < num_inputs; ++i) {
-        const string& input = node->input(i);
+        const NodeDef* input = input_nodes[i];
         // Forward dependency from input to consumer if it doesn't already
         // depend on it.
-        if (node_map_->GetOutputs(NodeName(input)).count(consumer) == 0) {
-          consumer->add_input(input);
+        if (node_map_->GetOutputs(input->name()).count(consumer) == 0) {
+          consumer->add_input(AsControlDependency(input->name()));
           updated_consumer = true;
-          node_map_->AddOutput(NodeName(input), consumer->name());
-          nodes_to_simplify->PushBack(node_to_idx_[input_nodes[i]]);
+          node_map_->AddOutput(input->name(), consumer->name());
+          nodes_to_simplify->PushBack(node_to_idx_[input]);
         }
       }
       // Remove dependency on node from consumer.
@@ -200,9 +199,15 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
       }
     }
 
-    if (nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) {
+    node_map_->RemoveOutputs(node->name());
+    if (fetch_nodes_known_ &&
+        nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) {
       // Mark the node for deletion.
       nodes_to_delete->insert(node_idx);
+
+      // Unconnect the node from its inputs to enable further optimizations.
+      node_map_->RemoveInputs(node->name());
+      node->clear_input();
     }
   }
 }
-- 
GitLab


From 278d35832d2175d51b49d83bb40b105acaa930ac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 14:16:41 -0800
Subject: [PATCH 0705/1225] Bugfix: Cast dtype of log_det_jacobian to match
 log_prob in TransformedDistribution.

PiperOrigin-RevId: 178151318
---
 .../transformed_distribution_test.py          | 21 +++++++++++++++++++
 .../conditional_transformed_distribution.py   |  4 ++--
 .../distributions/transformed_distribution.py |  8 ++++---
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
index 103d8e1862..cbaf74d3f6 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
@@ -200,6 +200,27 @@ class TransformedDistributionTest(test.TestCase):
       self.assertAllEqual([2], multi_logit_normal.event_shape)
       self.assertAllEqual([2], multi_logit_normal.event_shape_tensor().eval())
 
+  def testCastLogDetJacobian(self):
+    """Test log_prob when Jacobian and log_prob dtypes do not match."""
+
+    with self.test_session():
+      # Create an identity bijector whose jacobians have dtype int32
+      int_identity = bs.Inline(
+          forward_fn=array_ops.identity,
+          inverse_fn=array_ops.identity,
+          inverse_log_det_jacobian_fn=lambda x: math_ops.cast(0, dtypes.int32),
+          forward_log_det_jacobian_fn=lambda x: math_ops.cast(0, dtypes.int32),
+          is_constant_jacobian=True)
+      normal = self._cls()(
+          distribution=ds.Normal(loc=0., scale=1.),
+          bijector=int_identity,
+          validate_args=True)
+
+      y = normal.sample()
+      normal.log_prob(y).eval()
+      normal.prob(y).eval()
+      normal.entropy().eval()
+
   def testEntropy(self):
     with self.test_session():
       shift = np.array([[-1, 0, 1], [-1, -2, -3]], dtype=np.float32)
diff --git a/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py b/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py
index 599c855cda..1d4c5660d8 100644
--- a/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py
+++ b/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py
@@ -121,7 +121,7 @@ class ConditionalTransformedDistribution(
     log_prob = self.distribution.log_prob(x, **distribution_kwargs)
     if self._is_maybe_event_override:
       log_prob = math_ops.reduce_sum(log_prob, self._reduce_event_indices)
-    return ildj + log_prob
+    return math_ops.cast(ildj, log_prob.dtype) + log_prob
 
   @distribution_util.AppendDocstring(kwargs_dict=_condition_kwargs_dict)
   def _prob(self, y, bijector_kwargs=None, distribution_kwargs=None):
@@ -143,7 +143,7 @@ class ConditionalTransformedDistribution(
     prob = self.distribution.prob(x, **distribution_kwargs)
     if self._is_maybe_event_override:
       prob = math_ops.reduce_prod(prob, self._reduce_event_indices)
-    return math_ops.exp(ildj) * prob
+    return math_ops.exp(math_ops.cast(ildj, prob.dtype)) * prob
 
   @distribution_util.AppendDocstring(kwargs_dict=_condition_kwargs_dict)
   def _log_cdf(self, y, bijector_kwargs=None, distribution_kwargs=None):
diff --git a/tensorflow/python/ops/distributions/transformed_distribution.py b/tensorflow/python/ops/distributions/transformed_distribution.py
index ba25b2c348..1efcf9d32e 100644
--- a/tensorflow/python/ops/distributions/transformed_distribution.py
+++ b/tensorflow/python/ops/distributions/transformed_distribution.py
@@ -434,7 +434,7 @@ class TransformedDistribution(distribution_lib.Distribution):
     log_prob = self.distribution.log_prob(x)
     if self._is_maybe_event_override:
       log_prob = math_ops.reduce_sum(log_prob, self._reduce_event_indices)
-    log_prob = ildj + log_prob
+    log_prob += math_ops.cast(ildj, log_prob.dtype)
     if self._is_maybe_event_override:
       log_prob.set_shape(array_ops.broadcast_static_shape(
           y.get_shape().with_rank_at_least(1)[:-1], self.batch_shape))
@@ -457,7 +457,7 @@ class TransformedDistribution(distribution_lib.Distribution):
     prob = self.distribution.prob(x)
     if self._is_maybe_event_override:
       prob = math_ops.reduce_prod(prob, self._reduce_event_indices)
-    prob *= math_ops.exp(ildj)
+    prob *= math_ops.exp(math_ops.cast(ildj, prob.dtype))
     if self._is_maybe_event_override:
       prob.set_shape(array_ops.broadcast_static_shape(
           y.get_shape().with_rank_at_least(1)[:-1], self.batch_shape))
@@ -546,7 +546,9 @@ class TransformedDistribution(distribution_lib.Distribution):
       ], 0)
       entropy = array_ops.tile(entropy, multiples)
     dummy = array_ops.zeros([], self.dtype)
-    entropy -= self.bijector.inverse_log_det_jacobian(dummy)
+    entropy -= math_ops.cast(
+        self.bijector.inverse_log_det_jacobian(dummy),
+        entropy.dtype)
     entropy.set_shape(self.batch_shape)
     return entropy
 
-- 
GitLab


From 04807b625b0260c4daff98a618f3742c5fe1a782 Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Wed, 6 Dec 2017 14:28:04 -0800
Subject: [PATCH 0706/1225] MKL: Adding MKL-DNN pooling ops (#14679)

* Adding MKL-DNN pooling ops

* Disabling LRN MKL path; forcing to Eigen

* Using the real undef preprocessor command.
---
 tensorflow/core/kernels/mkl_avgpooling_op.cc  | 306 ++++++++++++++-
 tensorflow/core/kernels/mkl_lrn_op.cc         |   2 +-
 tensorflow/core/kernels/mkl_maxpooling_op.cc  | 357 +++++++++++++++++-
 .../core/kernels/mkl_pooling_ops_common.cc    |  38 +-
 .../core/kernels/mkl_pooling_ops_common.h     | 342 +++++++++++++++++
 5 files changed, 1038 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc
index d90baee069..d751a70fc8 100644
--- a/tensorflow/core/kernels/mkl_avgpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc
@@ -24,10 +24,25 @@
 
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::error;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::padding_kind;
+using mkldnn::engine;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// For now, MKL-ML is default. So making MKL-DNN not a default choice.
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklAvgPoolingOp : public OpKernel {
  public:
@@ -132,7 +147,7 @@ class MklAvgPoolingOp : public OpKernel {
         E_SUCCESS);
 
     mkl_context.MklCleanup();
-  }
+  }  // Compute
 
  private:
   typedef struct {
@@ -411,7 +426,293 @@ class MklAvgPoolingGradOp : public OpKernel {
   std::vector<int32> stride_;
   Padding padding_;
   TensorFormat data_format_;
-};
+};  // MklAvgPoolingGradOp
+
+
+#else  // INTEL_MKL_DNN is defined
+
+template <typename Device, typename T>
+class MklAvgPoolingOp : public MklPoolingForwardOpBase<T> {
+ public:
+  explicit MklAvgPoolingOp(OpKernelConstruction* context)
+  : MklPoolingForwardOpBase<T>(context) {
+    // Workspace is an MKLDNN construct that is only used in Max Pooling.
+    // So set workspace_enabled_ to false.
+    this->workspace_enabled_ = false;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const Tensor& input_tensor = MklGetInput(context,
+              this->kInputTensorIndexInput);
+      MklDnnShape dnn_shape_input;
+      GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input);
+      this->SanityCheckInput(context, input_tensor, dnn_shape_input);
+      if (!context->status().ok()) return;
+
+      MklDnnData<T> dnn_data_input(&cpu_engine);
+      MklDnnData<T> dnn_data_output(&cpu_engine);
+
+      // initialize variables for the pooling op
+      MklPoolParameters pool_params;
+      // Get the input tensor and initialize the pooling parameters
+      this->ConfigureInput(context, dnn_shape_input,
+                          input_tensor, &pool_params,
+                          &dnn_data_input);
+      OP_REQUIRES_OK(context, context->status());
+
+      // Declare output tensor
+      Tensor* output_tensor = nullptr;
+      memory::dims output_dims_mkl_order;
+      this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+      // If input is in Mkl layout, then just get the memory format from it
+      // directly, instead of using input data_format to AvgPool.
+      if (dnn_shape_input.IsMklTensor()) {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                static_cast<memory::format>(dnn_data_input.GetUsrMemDesc()
+                    .data.format));
+
+      } else {
+          dnn_data_output.SetUsrMem(output_dims_mkl_order,
+              this->data_format_mkldnn_);
+      }
+
+        // describe the memory layout
+      dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
+
+      // 3. create a pooling primitive descriptor
+      auto pool_desc = pooling_forward::desc(prop_kind::forward,
+              algorithm::pooling_avg_exclude_padding,
+              dnn_data_input.GetUsrMemDesc(),
+              dnn_data_output.GetUsrMemDesc(),
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_prim_desc = pooling_forward::primitive_desc(pool_desc,
+                                                 cpu_engine);
+
+      this->AllocateOutputTensor(context, pool_prim_desc, output_dims_mkl_order,
+                            this->data_format_mkldnn_, &output_tensor);
+      CHECK_NOTNULL(output_tensor);
+
+      OP_REQUIRES_OK(context, context->status());
+      dnn_data_output.SetUsrMemDataHandle(output_tensor);
+
+      this->PrepareAndExecuteNet(pool_prim_desc,
+                                &dnn_data_input,
+                                &dnn_data_output);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Operation received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+};  // MklAvgPoolingOp
+
+//-----------------------------------------------------------------------------
+
+template <class Device, class T>
+class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase<T> {
+ public:
+  explicit MklAvgPoolingGradOp(OpKernelConstruction* context)
+      : MklPoolingBackwardOpBase<T>(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnShape original_input_mkl_shape, input_gradient_mkl_shape;
+      const Tensor& tensor_in_shape = MklGetInput(context,
+          kInputTensorIndexInputShape);
+      const Tensor& input_gradient_tensor = MklGetInput(context,
+          kInputTensorIndexInputGradient);
+      GetMklShape(context, kInputTensorIndexInputShape,
+            &original_input_mkl_shape);
+      GetMklShape(context, kInputTensorIndexInputGradient,
+            &input_gradient_mkl_shape);
+
+
+      SanityCheckInputs(context, tensor_in_shape,
+                        input_gradient_tensor,
+                        original_input_mkl_shape,
+                        input_gradient_mkl_shape);
+      if (!context->status().ok()) return;
+
+      // Used to allocate output_diff_src/diff_src
+      // and create pool_fwd mdm desc
+      // 0. Input("orig_input_shape: int32") //NOT a T Tensor!
+      // 1. Input("grad: T")
+
+      MklDnnData<T> input_gradient_diff_dst(&cpu_engine);
+      MklDnnData<T> output_diff_src(&cpu_engine);
+      Tensor* output_tensor_diff_src = nullptr;
+      TensorShape original_input_shape;
+      MklPoolParameters pool_params;
+      memory::dims output_dims_mkl_order, original_input_dims_nchw;
+      // Configure the original input memory descriptor
+      memory::desc original_input_md = ConfigureOriginalInput(context,
+                                      tensor_in_shape,
+                                      original_input_mkl_shape,
+                                      &original_input_dims_nchw,
+                                      &pool_params,
+                                      &original_input_shape);
+
+      // configure the original output memory descriptor
+      // by definition, the shape of the original output is the same
+      // as the shape of the gradient diff_dst
+      memory::desc original_output_md = this->ConfigureOriginalOutput(
+                pool_params, input_gradient_mkl_shape, output_dims_mkl_order);
+
+      memory::desc target_diff_dst_md = this->ConfigureInputGradient(
+                                    input_gradient_mkl_shape,
+                                    input_gradient_tensor,
+                                    &input_gradient_diff_dst,
+                                    original_output_md);
+      // The shape of the output diff src needs to be the same shape as the
+      // original input. But we will set its format to be same as the format of
+      // input gradient. We won't use format of original input since it will
+      // always be in Tensorflow layout (given that AvgPoolGrad gets shape of
+      // the input rather than actual input).
+      output_diff_src.SetUsrMem(original_input_dims_nchw,
+                                static_cast<memory::format>(
+                                  target_diff_dst_md.data.format));
+
+      // Create the forward pooling primitive descriptor so we can reference it
+      // in the backward pooling primitive descriptor
+      auto pool_fwd_desc = pooling_forward::desc(prop_kind::forward,
+              algorithm::pooling_avg_exclude_padding,
+              original_input_md,
+              original_output_md,
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_fwd_prim_desc
+              = pooling_forward::primitive_desc(pool_fwd_desc,
+                                                  cpu_engine);
+
+      auto pool_bkwd_desc = pooling_backward::desc(
+              algorithm::pooling_avg_exclude_padding,
+              output_diff_src.GetUsrMemDesc(),
+              target_diff_dst_md,
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_bkwd_prim_desc
+                = pooling_backward::primitive_desc(pool_bkwd_desc,
+                                              cpu_engine,
+                                              pool_fwd_prim_desc);
+      this->AllocateOutputTensor(context, pool_bkwd_prim_desc,
+                      original_input_dims_nchw,
+                      this->data_format_mkldnn_,
+                      &output_tensor_diff_src);
+
+      output_diff_src.SetUsrMemDataHandle(output_tensor_diff_src);
+
+      this->PrepareAndExecuteNet(pool_bkwd_prim_desc,
+                          &input_gradient_diff_dst,
+                          &output_diff_src,
+                          memory::primitive_desc(
+                              target_diff_dst_md,
+                              cpu_engine));
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                      ", message: " + string(e.message) +
+                      ", in file " + string(__FILE__) + ":" +
+                      std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                      errors::Aborted("Compute received an exception:",
+                                      error_msg));
+    }
+  }  // Compute
+
+ private:
+  // 0. Input("orig_input_shape: int32")
+  // 1. Input("grad: T")
+  const int kInputTensorIndexInputShape = 0;
+  const int kInputTensorIndexInputGradient = 1;
+
+  memory::desc ConfigureOriginalInput(OpKernelContext* context,
+        const Tensor& tensor_original_input_shape,
+        const MklDnnShape& original_input_mkl_shape,
+        memory::dims* original_input_dims_mkl_order,
+        MklPoolParameters* pool_params,
+        TensorShape* input_tensor_shape) {
+    CHECK_NOTNULL(original_input_dims_mkl_order);
+    CHECK_NOTNULL(pool_params);
+    CHECK_NOTNULL(input_tensor_shape);
+    // For AvgPoolGrad, we only get the size of the original input because
+    // The original data is irrelvant.
+    auto shape_vec = tensor_original_input_shape.vec<int32>();
+    for (int64 i = 0; i < tensor_original_input_shape.NumElements(); ++i) {
+      input_tensor_shape->AddDim(shape_vec(i));
+    }
+
+    return MklPoolingBackwardOpBase<T>::ConfigureOriginalInput(
+                                              context,
+                                              tensor_original_input_shape,
+                                              original_input_mkl_shape,
+                                              original_input_dims_mkl_order,
+                                              pool_params,
+                                              *input_tensor_shape);
+}
+
+  void SanityCheckInputs(OpKernelContext* context,
+                        const Tensor& tensor_in_shape,
+                        const Tensor& input_gradient_tensor,
+                        const MklDnnShape& original_input_mkl_shape,
+                        const MklDnnShape& input_gradient_mkl_shape) {
+    if (!original_input_mkl_shape.IsMklTensor()) {
+      OP_REQUIRES(context, tensor_in_shape.dims() == 1 &&
+          tensor_in_shape.NumElements() == 4,
+          errors::InvalidArgument("original input shape must be "
+                "1-dimensional and 4 elements"));
+    } else {
+      OP_REQUIRES(context, original_input_mkl_shape.GetDimension() == 1 &&
+          original_input_mkl_shape.DimSize(0) == 4,
+          errors::InvalidArgument("original input shape must be "
+                "1-dimensional and 4 elements"));
+    }
+
+    if (!input_gradient_mkl_shape.IsMklTensor()) {
+      // For avgpooling, input_gradient_diff_dst should have 4 dimensions.
+      OP_REQUIRES(context, input_gradient_tensor.dims() == 4,
+          errors::InvalidArgument("Gradient shape must be "
+                              "4-dimensional"));
+    } else {
+      OP_REQUIRES(context, input_gradient_mkl_shape.GetDimension() == 4,
+          errors::InvalidArgument("Gradient shape must be "
+                              "4-dimensional"));
+    }
+  }
+};  // MklAvgPoolingGradOp
+
+
+
+#endif  // INTEL_MKL_DNN
 
 REGISTER_KERNEL_BUILDER(Name("_MklAvgPool")
                             .Device(DEVICE_CPU)
@@ -427,3 +728,4 @@ REGISTER_KERNEL_BUILDER(Name("_MklAvgPoolGrad")
 
 }  // namespace tensorflow
 #endif  // INTEL_MKL
+
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc
index aa08e93924..227765e46d 100644
--- a/tensorflow/core/kernels/mkl_lrn_op.cc
+++ b/tensorflow/core/kernels/mkl_lrn_op.cc
@@ -17,7 +17,7 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc. This opkernel uses MKL library, create MKL
 // layout and primitives, use MKL dnn primitives to compute local
 // response normalization
-
+#undef INTEL_MKL
 #ifdef INTEL_MKL
 
 #define EIGEN_USE_THREADS
diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc
index 846bb5710d..de4d7d2e72 100644
--- a/tensorflow/core/kernels/mkl_maxpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc
@@ -16,17 +16,32 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc.
 #ifdef INTEL_MKL
 #define EIGEN_USE_THREADS
-
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 
+#ifdef INTEL_MKL_DNN
+#include <algorithm>
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::error;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::padding_kind;
+using mkldnn::engine;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// For now, MKL-ML is default. So making MKL-DNN not a default choice.
+#ifndef INTEL_MKL_DNN
+
 // An implementation of MaxPooling (forward).
 template <typename Device, typename T>
 class MklMaxPoolingOp : public OpKernel {
@@ -475,8 +490,348 @@ class MklMaxPoolingGradOp : public OpKernel {
   TensorFormat data_format_;
 
   bool workspace_enabled_;
+};  // MklMaxPoolingGradOp
+
+#else  // INTEL_MKL_DNN is defined
+
+// An implementation of MaxPooling (forward).
+template <typename Device, typename T>
+class MklMaxPoolingOp : public MklPoolingForwardOpBase<T> {
+ public:
+  explicit MklMaxPoolingOp(OpKernelConstruction* context)
+            : MklPoolingForwardOpBase<T>(context) {
+    // In Max Pooling, MKLDNN does not allow passing workspace as NULL.
+    // So we set workspace_enabled_ to true.
+    this->workspace_enabled_ = true;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const Tensor& input_tensor = MklGetInput(context,
+                this->kInputTensorIndexInput);
+      MklDnnShape dnn_shape_input;
+      GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input);
+      this->SanityCheckInput(context, input_tensor, dnn_shape_input);
+      if (!context->status().ok()) return;
+
+      MklDnnData<T> dnn_data_input(&cpu_engine);
+      MklDnnData<T> dnn_data_output(&cpu_engine);
+      MklDnnData<T> dnn_data_wksp(&cpu_engine);
+
+      // initialize variables for the pooling op
+      MklPoolParameters pool_params;
+      // Get the input tensor and initialize the pooling parameters
+      this->ConfigureInput(context, dnn_shape_input,
+                        input_tensor, &pool_params,
+                        &dnn_data_input);
+      OP_REQUIRES_OK(context, context->status());
+
+      // Declare output tensor
+      Tensor* output_tensor = nullptr;
+      memory::dims output_dims_mkl_order;
+      this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+      // If input is in Mkl layout, then just get the memory format from it
+      // directly, instead of using input data_format to MaxPool.
+      if (dnn_shape_input.IsMklTensor()) {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                                  static_cast<memory::format>(
+              dnn_data_input.GetUsrMemDesc().data.format));
+      } else {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                                  this->data_format_mkldnn_);
+      }
+
+      // describe the memory layout; let mkl-dnn choose the best for the op
+      dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
+
+      auto pool_desc = pooling_forward::desc(prop_kind::forward,
+            algorithm::pooling_max,
+            dnn_data_input.GetUsrMemDesc(),
+            dnn_data_output.GetUsrMemDesc(),
+            memory::dims({  pool_params.row_stride,
+                            pool_params.col_stride}),
+            memory::dims({  pool_params.window_rows,
+                            pool_params.window_cols}),
+            memory::dims({  static_cast<int>(pool_params.pad_top),
+                            static_cast<int>(pool_params.pad_left)}),
+            memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                            static_cast<int>(pool_params.pad_right)}),
+            TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_fwd_desc = pooling_forward::primitive_desc(pool_desc,
+            cpu_engine);
+
+      this->AllocateOutputTensor(context, pool_fwd_desc, output_dims_mkl_order,
+                            this->data_format_mkldnn_, &output_tensor);
+      OP_REQUIRES_OK(context, context->status());
+      dnn_data_output.SetUsrMemDataHandle(output_tensor);
+
+      AllocateWorkspaceTensor(context, pool_fwd_desc, &dnn_data_wksp);
+      OP_REQUIRES_OK(context, context->status());
+
+      this->PrepareAndExecuteNet(pool_fwd_desc, &dnn_data_input,
+                        &dnn_data_output, &dnn_data_wksp);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Compute received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+
+ private:
+    const int kOutputTensorIndexWorkspace = 1;
+
+    void AllocateWorkspaceTensor(OpKernelContext* context,
+                const pooling_forward::primitive_desc& pool_fwd_prim_desc,
+                MklDnnData<T>* dnn_data_wksp) {
+        CHECK_NOTNULL(dnn_data_wksp);
+        Tensor* workspace_tensor = nullptr;
+        memory::primitive_desc workspace_pd
+                    = pool_fwd_prim_desc.workspace_primitive_desc();
+        size_t workspace_t_elems = this->GetNumTElements(workspace_pd);
+        MklDnnShape workspace_mkl_shape;
+        workspace_mkl_shape.SetMklTensor(false);
+        TensorShape workspace_tf_shape;
+        workspace_tf_shape.AddDim(workspace_t_elems);
+        AllocateOutputSetMklShape(context, kOutputTensorIndexWorkspace,
+                                &workspace_tensor,
+                                workspace_tf_shape, workspace_mkl_shape);
+        CHECK_NOTNULL(workspace_tensor);
+        dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor);
+    }
 };
 
+// The operation to compute MaxPool gradients.
+// It takes three inputs:
+//   - The original input tensor
+//   - The original output tensor
+//   - Backprop tensor for output
+// It produces one output: backprop tensor for input.
+template <class Device, class T>
+class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase<T> {
+ public:
+  explicit MklMaxPoolingGradOp(OpKernelConstruction* context)
+      : MklPoolingBackwardOpBase<T>(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+        auto cpu_engine = engine(engine::cpu, 0);
+        const Tensor& orig_input_tensor = MklGetInput(context,
+            kInputTensorIndexOrigInput);
+        const Tensor& orig_output_tensor = MklGetInput(context,
+            kInputTensorIndexOrigOutput);
+        const Tensor& grad_tensor = MklGetInput(context,
+            kInputTensorIndexGradient);
+        const Tensor& workspace_tensor = MklGetInput(context,
+            kInputTensorIndexWorkspace);
+        MklDnnShape orig_input_mkl_shape,
+                    orig_output_mkl_shape,
+                    grad_mkl_shape,
+                    workspace_mkl_shape;
+        GetMklShape(context, kInputTensorIndexOrigInput,
+            &orig_input_mkl_shape);
+        GetMklShape(context, kInputTensorIndexOrigOutput,
+            &orig_output_mkl_shape);
+        GetMklShape(context, kInputTensorIndexGradient,
+            &grad_mkl_shape);
+        GetMklShape(context, kInputTensorIndexWorkspace,
+            &workspace_mkl_shape);
+
+        SanityCheckInputs(context,
+                            orig_input_tensor, orig_output_tensor,
+                            grad_tensor, workspace_tensor,
+                            orig_input_mkl_shape, orig_output_mkl_shape,
+                            grad_mkl_shape, workspace_mkl_shape);
+        if (!context->status().ok()) return;
+
+        MklDnnData<T> grad_dnn_data(&cpu_engine);
+        MklDnnData<T> workspace_dnn_data(&cpu_engine);
+        MklDnnData<T> output_dnn_data(&cpu_engine);
+        Tensor* output_tensor = nullptr;
+        MklPoolParameters pool_params;
+        TensorShape orig_input_shape;
+        memory::dims output_dims_mkl_order, orig_input_dims_mkl_order;
+        memory::desc original_input_md = ConfigureOriginalInput(context,
+                                orig_input_tensor,
+                                orig_input_mkl_shape,
+                                &orig_input_dims_mkl_order,
+                                &pool_params,
+                                &orig_input_shape);
+
+        memory::desc original_output_md = this->ConfigureOriginalOutput(
+                                pool_params,
+                                orig_output_mkl_shape,
+                                output_dims_mkl_order);
+
+        memory::desc target_diff_dst_md =  this->ConfigureInputGradient(
+                                        grad_mkl_shape,
+                                        grad_tensor,
+                                        &grad_dnn_data,
+                                        original_output_md);
+
+        output_dnn_data.SetUsrMem(original_input_md);
+
+        // Create the forward pooling primitive descriptor so we can
+        // pass it as a hint to the backward pooling primitive descriptor
+        auto pool_fwd_desc = pooling_forward::desc(prop_kind::forward,
+                algorithm::pooling_max,
+                original_input_md,
+                original_output_md,
+                memory::dims({  pool_params.row_stride,
+                                pool_params.col_stride}),
+                memory::dims({  pool_params.window_rows,
+                                pool_params.window_cols}),
+                memory::dims({  static_cast<int>(pool_params.pad_top),
+                                static_cast<int>(pool_params.pad_left)}),
+                memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                                static_cast<int>(pool_params.pad_right)}),
+                TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_fwd_prim_desc
+                = pooling_forward::primitive_desc(pool_fwd_desc,
+                                                    cpu_engine);
+
+        auto pool_bkwd_desc = pooling_backward::desc(
+                algorithm::pooling_max,
+                output_dnn_data.GetUsrMemDesc(),
+                target_diff_dst_md,
+                memory::dims({  pool_params.row_stride,
+                                pool_params.col_stride}),
+                memory::dims({  pool_params.window_rows,
+                                pool_params.window_cols}),
+                memory::dims({  static_cast<int>(pool_params.pad_top),
+                                static_cast<int>(pool_params.pad_left)}),
+                memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                                static_cast<int>(pool_params.pad_right)}),
+                TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_bkwd_prim_desc
+            = pooling_backward::primitive_desc(pool_bkwd_desc,
+                                                cpu_engine,
+                                                pool_fwd_prim_desc);
+
+        this->AllocateOutputTensor(context, pool_bkwd_prim_desc,
+            orig_input_dims_mkl_order,
+            this->data_format_mkldnn_,
+            &output_tensor);
+        output_dnn_data.SetUsrMemDataHandle(output_tensor);
+
+        ConfigureWorkspace(workspace_tensor,
+                pool_fwd_prim_desc.workspace_primitive_desc(),
+                &workspace_dnn_data);
+        this->PrepareAndExecuteNet(pool_bkwd_prim_desc,
+                            &grad_dnn_data,
+                            &output_dnn_data,
+                            memory::primitive_desc(
+                                target_diff_dst_md,
+                                cpu_engine),
+                            &workspace_dnn_data);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Compute received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+
+ private:
+    // .Input("orig_input: T")
+    // .Input("orig_output: T")
+    // .Input("grad: T")
+    // .Input("workspace: T")
+    const int kInputTensorIndexOrigInput = 0;
+    const int kInputTensorIndexOrigOutput = 1;
+    const int kInputTensorIndexGradient = 2;
+    const int kInputTensorIndexWorkspace = 3;
+    //  Output("output: T") in Base Class
+
+    memory::desc ConfigureOriginalInput(OpKernelContext* context,
+                                const Tensor& tensor_original_input,
+                                const MklDnnShape& original_input_mkl_shape,
+                                memory::dims* original_input_dims_mkl_order,
+                                MklPoolParameters* pool_params,
+                                TensorShape* input_tensor_shape) {
+        *input_tensor_shape = tensor_original_input.shape();
+        return MklPoolingBackwardOpBase<T>::ConfigureOriginalInput(
+                                        context,
+                                        tensor_original_input,
+                                        original_input_mkl_shape,
+                                        original_input_dims_mkl_order,
+                                        pool_params,
+                                        *input_tensor_shape);
+    }
+
+    void ConfigureWorkspace(const Tensor& workspace_tensor,
+                        memory::primitive_desc workspace_pd,
+                        MklDnnData<T> *workspace_dnn_data) {
+        CHECK_NOTNULL(workspace_dnn_data);
+
+        workspace_dnn_data->SetUsrMem(workspace_pd, &workspace_tensor);
+    }
+
+    void SanityCheckInputs(OpKernelContext* context,
+                            const Tensor& orig_input_tensor,
+                            const Tensor& orig_output_tensor,
+                            const Tensor& grad_tensor,
+                            const Tensor& workspace_tensor,
+                            const MklDnnShape& orig_input_mkl_shape,
+                            const MklDnnShape& orig_output_mkl_shape,
+                            const MklDnnShape& grad_mkl_shape,
+                            const MklDnnShape& workspace_mkl_shape) {
+        if (!orig_input_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, orig_input_tensor.dims() == 4,
+                errors::InvalidArgument("Original input shape must be "
+                "4-dimensional"));
+        } else {
+            OP_REQUIRES(context, orig_input_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Original input shape must be "
+                    "4-dimensional"));
+        }
+        if (!orig_output_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, orig_output_tensor.dims() == 4,
+                errors::InvalidArgument("Original output must be "
+                        "4-dimensional"));
+        } else {
+            OP_REQUIRES(context, orig_output_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Original output must be "
+                    "4-dimensional"));
+        }
+        if (!grad_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, grad_tensor.dims() == 4,
+                errors::InvalidArgument("Gradient must be 4-dimensional"));
+        } else {
+            OP_REQUIRES(context, grad_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Gradient must be "
+                    "4-dimensional"));
+        }
+        if (this->workspace_enabled_){
+            // The workspace should not be an MKL tensor
+            OP_REQUIRES(context, workspace_mkl_shape.IsMklTensor() == false,
+                    errors::InvalidArgument("Workspace tensor should not"
+                                            " be an MKL Tensor."));
+            // It should only have one dimension
+            OP_REQUIRES(context, workspace_tensor.dims() == 1,
+                    errors::InvalidArgument("Workspace tensor must be "
+                                "1-dimensional"));
+        } else {
+            OP_REQUIRES(context, this->workspace_enabled_,
+                    errors::Unimplemented("MKL-DNN Max Pooling does not "
+                                "yet support the use case "
+                                "where MaxPoolGrad is called without first"
+                                " calling MaxPool."));
+        }
+    }
+};  // MklMaxPoolingGradOp
+
+#endif  // INTEL_MKL_DNN
+
 REGISTER_KERNEL_BUILDER(Name("_MklMaxPool")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<float>("T")
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
index 65e8852cfb..f7cadffd39 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
@@ -14,10 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 #ifdef INTEL_MKL
+
 #include <vector>
+#include <limits>
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/kernels/bounds_check.h"
 
 namespace tensorflow {
 
@@ -39,6 +42,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
   Init(context, ksize, stride, padding, data_format);
 }
 
+#ifndef INTEL_MKL_DNN
 // Initialization for MKL format
 void MklPoolParameters::Init(OpKernelContext* context,
                              const std::vector<int32>& ksize,
@@ -53,7 +57,22 @@ void MklPoolParameters::Init(OpKernelContext* context,
 
   Init(context, ksize, stride, padding, data_format);
 }
+#else
+// Initialization for MKL format
+void MklPoolParameters::Init(OpKernelContext* context,
+                             const std::vector<int32>& ksize,
+                             const std::vector<int32>& stride, Padding padding,
+                             TensorFormat data_format,
+                             const MklDnnShape* mklInputShape) {
+  // Get the input sizes
+  depth = mklInputShape->GetDimension('C');
+  tensor_in_cols = mklInputShape->GetDimension('W');
+  tensor_in_rows = mklInputShape->GetDimension('H');
+  tensor_in_batch = mklInputShape->GetDimension('N');
 
+  Init(context, ksize, stride, padding, data_format);
+}
+#endif  // INTEL_MKL_DNN
 // Common Initialization for TensorFlow and MKL formats
 void MklPoolParameters::Init(OpKernelContext* context,
                              const std::vector<int32>& ksize,
@@ -80,7 +99,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
                   "MaxPooling supports exactly one of pooling across depth "
                   "or pooling across width/height."));
 
-  if (depth_window == 1) {
+  if (depth_window == 1) {  // we are pooling in the H and W
     OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
                                 tensor_in_rows, window_rows, row_stride,
                                 padding, &out_height, &pad_top, &pad_bottom));
@@ -88,7 +107,21 @@ void MklPoolParameters::Init(OpKernelContext* context,
     OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
                                 tensor_in_cols, window_cols, col_stride,
                                 padding, &out_width, &pad_left, &pad_right));
-  } else {
+#ifdef INTEL_MKL_DNN
+    // TF can work with int64, but mkldnn only supports int32
+    // Fail if the height or width are greater than MAX_INT
+
+    OP_REQUIRES(context, FastBoundsCheck(out_height,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("output height is too large"));
+
+    OP_REQUIRES(context, FastBoundsCheck(out_width,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("output width is too large"));
+
+#endif
+    out_depth = depth;  // output will have the same depth as the input
+  } else {  // we are pooling in the depth dimension
     // Our current version of depthwise max pooling does not support
     // any padding, and expects the depth_window to equal the depth
     // stride (no overlapping).
@@ -109,7 +142,6 @@ void MklPoolParameters::Init(OpKernelContext* context,
                 errors::Unimplemented("Depthwise max pooling is currently "
                                       "only implemented for CPU devices."));
 
-    pad_depth = 0;
     out_depth = depth / depth_window;
   }
 }
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h
index 92ea2beb25..d33e91a15d 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.h
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h
@@ -18,9 +18,18 @@ limitations under the License.
 
 #ifdef INTEL_MKL
 #include <vector>
+#include <string>
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -51,14 +60,28 @@ struct MklPoolParameters {
   int pad_depth;
 
   TensorFormat data_format;
+  MklPoolParameters()
+    : depth(0)
+    , tensor_in_cols(0), tensor_in_rows(0), tensor_in_batch(0)
+    , window_rows(0), window_cols(0), depth_window(0)
+    , row_stride(0), col_stride(0), depth_stride(0)
+    , out_height(0), out_width(0), out_depth(0)
+    , pad_left(0), pad_right(0), pad_top(0), pad_bottom(0), pad_depth(0)
+    , data_format(TensorFormat::FORMAT_NCHW) {}
 
   // Updates context->status if there is an invalid input.
   void Init(OpKernelContext* context, const std::vector<int32>& ksize,
             const std::vector<int32>& stride, Padding padding,
             TensorFormat data_format, const TensorShape& tensor_in_shape);
+#ifndef INTEL_MKL_DNN
   void Init(OpKernelContext* context, const std::vector<int32>& ksize,
             const std::vector<int32>& stride, Padding padding,
             TensorFormat data_format, const MklShape* mkl_in_shape);
+#else
+  void Init(OpKernelContext* context, const std::vector<int32>& ksize,
+            const std::vector<int32>& stride, Padding padding,
+            TensorFormat data_format, const MklDnnShape* mkl_in_shape);
+#endif
 
  private:
   // Common initialization for TensorFlow and MKL formats
@@ -67,6 +90,325 @@ struct MklPoolParameters {
             TensorFormat data_format);
 };
 
+#ifdef INTEL_MKL_DNN
+
+template <class T>
+class MklPoolingOpBase : public OpKernel {
+ public:
+  explicit MklPoolingOpBase(OpKernelConstruction* context)
+            : OpKernel(context)
+            , workspace_enabled_(false) {
+      string data_format;
+      OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+      OP_REQUIRES(context,
+            FormatFromString(data_format, &this->data_format_tf_),
+            errors::InvalidArgument("Invalid data format"));
+      this->data_format_mkldnn_
+                = TFDataFormatToMklDnnDataFormat(this->data_format_tf_);
+      OP_REQUIRES_OK(context, context->GetAttr("ksize", &this->ksize_));
+      OP_REQUIRES(context, this->ksize_.size() == 4,
+                  errors::InvalidArgument("Sliding window ksize field must "
+                                          "specify 4 dimensions"));
+      OP_REQUIRES_OK(context, context->GetAttr("strides", &this->stride_));
+      OP_REQUIRES(context, this->stride_.size() == 4,
+                  errors::InvalidArgument("Sliding window strides field must "
+                                          "specify 4 dimensions"));
+      OP_REQUIRES_OK(context, context->GetAttr("padding", &this->padding_));
+      OP_REQUIRES(context, this->ksize_[0] == 1 && this->stride_[0] == 1,
+                  errors::Unimplemented("Pooling is not yet supported on the "
+                                        "batch dimension."));
+
+      // We may not get this attribute for this node if it does not go through
+      // graph rewrite pass. So we do not check for error while retrieving this
+      // attribute value.
+      context->GetAttr("workspace_enabled", &this->workspace_enabled_);
+    }
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  // Calculate output shape of pooling op in MKL-DNN and TensorFlow order.
+  // MKL-DNN uses NCHW for output order. But TensorFlow output will be in
+  // NHWC or NCHW format depending on data format. Function expects
+  // output height and output width to have already been int32
+  // bounds-checked
+  void GetOutputDims(const MklPoolParameters& mkl_pool_params,
+                    memory::dims* output_dims_mkl_order) {
+    // MKL-DNN always needs output in NCHW format.
+    *output_dims_mkl_order = { mkl_pool_params.tensor_in_batch,
+                              mkl_pool_params.out_depth,
+                              static_cast<int>(mkl_pool_params.out_height),
+                              static_cast<int>(mkl_pool_params.out_width)};
+  }
+
+  void InitMklPoolParameters(OpKernelContext* context,
+                      MklPoolParameters* pool_params,
+                      const MklDnnShape& original_input_mkl_shape,
+                      const TensorShape& input_tensor_shape) {
+    if (!original_input_mkl_shape.IsMklTensor()) {
+      pool_params->Init(context, this->ksize_, this->stride_, this->padding_,
+          this->data_format_tf_, input_tensor_shape);
+    } else {
+      pool_params->Init(context, this->ksize_, this->stride_, this->padding_,
+          this->data_format_tf_, &original_input_mkl_shape);
+    }
+  }
+
+  // Checks to make sure that the memory we need to allocate
+  // is a multiple of sizeof(T)
+  // returns the number of elements
+  size_t GetNumTElements(const memory::primitive_desc& pd) {
+    size_t num_bytes = pd.get_size();
+    size_t ret_val = num_bytes / sizeof(T);
+    if ( num_bytes % sizeof(T) != 0 ) {
+        ret_val++;
+    }
+    return ret_val;
+  }
+
+
+  std::vector<int32> ksize_;
+  std::vector<int32> stride_;
+  Padding padding_;
+  TensorFormat data_format_tf_;
+  memory::format data_format_mkldnn_;
+  bool workspace_enabled_;
+};
+
+template <class T>
+class MklPoolingForwardOpBase : public MklPoolingOpBase<T> {
+ public:
+  explicit MklPoolingForwardOpBase<T>(OpKernelConstruction* context)
+      : MklPoolingOpBase<T>(context) {}
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  void ConfigureInput(OpKernelContext* context,
+                    const MklDnnShape& input_mkl_shape,
+                    const Tensor& input_tensor,
+                    MklPoolParameters* pool_params,
+                    MklDnnData<T>* dnn_data_input) {
+    CHECK_NOTNULL(pool_params);
+    CHECK_NOTNULL(dnn_data_input);
+    TensorShape input_tensor_shape = input_tensor.shape();
+    memory::desc input_md = input_mkl_shape.IsMklTensor()
+                        ? input_mkl_shape.GetMklLayout()
+                        : memory::desc(
+                              TFShapeToMklDnnDimsInNCHW(
+                                  input_tensor_shape, this->data_format_tf_),
+                              MklDnnType<T>(),
+                              this->data_format_mkldnn_);
+    dnn_data_input->SetUsrMem(input_md, &input_tensor);
+    this->InitMklPoolParameters(context, pool_params,
+                      input_mkl_shape, input_tensor_shape);
+  }
+
+  void AllocateOutputTensor(OpKernelContext* context,
+            const pooling_forward::primitive_desc& pool_fwd_prim_desc,
+            const memory::dims output_dims_mkl_order,
+            const memory::format& output_tf_format,
+            Tensor** output_tensor) {
+    CHECK_NOTNULL(output_tensor);
+    memory::primitive_desc dst_pd = pool_fwd_prim_desc.dst_primitive_desc();
+
+    MklDnnShape output_mkl_shape;
+    output_mkl_shape.SetMklTensor(true);
+    output_mkl_shape.SetMklLayout(&dst_pd);
+    output_mkl_shape.SetElemType(MklDnnType<T>());
+    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                              output_dims_mkl_order,
+                              output_tf_format);
+    TensorShape output_tf_shape;
+
+    // only allocate enough space for the elements we need.
+    output_tf_shape.AddDim(this->GetNumTElements(dst_pd));
+    AllocateOutputSetMklShape(context, kOutputTensorIndexOutput,
+                            output_tensor,
+                            output_tf_shape, output_mkl_shape);
+    CHECK_NOTNULL(*output_tensor);
+  }
+
+  void PrepareAndExecuteNet(
+                  const pooling_forward::primitive_desc& pool_fwd_desc,
+                  const MklDnnData<T>* src,
+                  MklDnnData<T>* dst,
+                  MklDnnData<T>* wksp = nullptr) {
+    std::vector<primitive> net;
+
+    // Create pooling primitive and add it to net
+    if (wksp != nullptr) {
+        net.push_back(pooling_forward(pool_fwd_desc,
+                        src->GetOpMem(),
+                        dst->GetOpMem(),
+                        wksp->GetOpMem()));
+    } else {
+        net.push_back(pooling_forward(pool_fwd_desc,
+            src->GetOpMem(),
+            dst->GetOpMem()));
+    }
+    stream(stream::kind::eager).submit(net).wait();
+  }
+
+
+  void SanityCheckInput(OpKernelContext* context,
+                  const Tensor& input_tensor,
+                  const MklDnnShape& input_mkl_shape) {
+    if (!input_mkl_shape.IsMklTensor()) {
+      OP_REQUIRES(context, input_tensor.dims() == 4,
+          errors::InvalidArgument("Input must be 4-dimensional"));
+    } else {
+        OP_REQUIRES(context, input_mkl_shape.GetDimension() == 4,
+                errors::InvalidArgument("Input shape must be "
+                "4-dimensional"));
+    }
+  }
+  // .Input("value: T")
+  // .Output("output: T")
+  const int kInputTensorIndexInput = 0;
+  const int kOutputTensorIndexOutput = 0;
+};  // MklPoolingForwardBaseOp
+
+
+template <class T>
+class MklPoolingBackwardOpBase : public MklPoolingOpBase<T> {
+ public:
+  explicit MklPoolingBackwardOpBase<T>(OpKernelConstruction* context)
+          : MklPoolingOpBase<T>(context) { }
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  const int kOutputTensorIndexOutput = 0;
+
+  void AllocateOutputTensor(OpKernelContext* context,
+            const pooling_backward::primitive_desc& pool_bkwd_prim_desc,
+            const memory::dims output_dims_mkl_order,
+            const memory::format& output_tf_format,
+            Tensor** output_tensor) {
+    CHECK_NOTNULL(output_tensor);
+    memory::primitive_desc dst_pd
+                = pool_bkwd_prim_desc.diff_src_primitive_desc();
+    MklDnnShape output_mkl_shape;
+    output_mkl_shape.SetMklTensor(true);
+    output_mkl_shape.SetMklLayout(&dst_pd);
+    output_mkl_shape.SetElemType(MklDnnType<T>());
+    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                              output_dims_mkl_order,
+                              output_tf_format);
+
+    TensorShape output_tf_shape;
+    output_tf_shape.AddDim(this->GetNumTElements(dst_pd));
+    AllocateOutputSetMklShape(context, kOutputTensorIndexOutput,
+                            output_tensor,
+                            output_tf_shape, output_mkl_shape);
+    CHECK_NOTNULL(*output_tensor);
+  }
+
+  void PrepareAndExecuteNet(
+    const pooling_backward::primitive_desc& pool_bkwd_desc,
+    MklDnnData<T>* input_gradient_diff_dst,
+    MklDnnData<T>* output_diff_src,
+    const memory::primitive_desc& target_diff_dst_pd,
+    const MklDnnData<T>* workspace = nullptr) {
+
+    std::vector<primitive> net;
+
+    // If the input gradient isn't in the same format as the output
+    // reorder it to the same format as the output
+    input_gradient_diff_dst->CheckReorderToOpMem(
+            target_diff_dst_pd,
+            &net);
+
+    // Create pooling primitive and add it to net
+    if (nullptr == workspace) {
+      net.push_back(pooling_backward(pool_bkwd_desc,
+                              input_gradient_diff_dst->GetOpMem(),
+                              output_diff_src->GetOpMem()));
+    } else {
+      net.push_back(pooling_backward(pool_bkwd_desc,
+                                  input_gradient_diff_dst->GetOpMem(),
+                                  workspace->GetOpMem(),
+                                  output_diff_src->GetOpMem()));
+    }
+    stream(stream::kind::eager).submit(net).wait();
+  }
+
+  // Max Pooling and Avg Pooling have slightly different implementations
+  // Takes the Tensor containing original input data and the original
+  // mkl Dnn Shape and populates other data
+  memory::desc ConfigureOriginalInput(OpKernelContext* context,
+                              const Tensor& tensor_original_input_shape,
+                              const MklDnnShape& original_input_mkl_shape,
+                              memory::dims* original_input_dims_nchw,
+                              MklPoolParameters* pool_params,
+                              const TensorShape& input_tensor_shape) {
+    CHECK_NOTNULL(original_input_dims_nchw);
+    CHECK_NOTNULL(pool_params);
+    this->InitMklPoolParameters(context, pool_params,
+                          original_input_mkl_shape,
+                          input_tensor_shape);
+
+    *original_input_dims_nchw
+          = original_input_mkl_shape.IsMklTensor()
+          ? original_input_mkl_shape.GetSizesAsMklDnnDims()
+          : TFShapeToMklDnnDimsInNCHW(input_tensor_shape,
+        this->data_format_tf_);
+
+    return  original_input_mkl_shape.IsMklTensor()
+      ? original_input_mkl_shape.GetMklLayout()
+      : memory::desc(*original_input_dims_nchw,
+                      MklDnnType<T>(),
+                      this->data_format_mkldnn_);
+  }
+
+  memory::desc ConfigureOriginalOutput(const MklPoolParameters& pool_params,
+                                const MklDnnShape& original_output_mkl_shape,
+                                      memory::dims output_dims_mkl_order) {
+    this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+    return original_output_mkl_shape.IsMklTensor()
+            ? original_output_mkl_shape.GetMklLayout()
+            : memory::desc(output_dims_mkl_order,
+                         MklDnnType<T>(),
+                         this->data_format_mkldnn_);
+  }
+
+  memory::desc ConfigureInputGradient(
+        const MklDnnShape& input_gradient_mkl_shape,
+        const Tensor& input_gradient_tensor,
+        MklDnnData<T>* input_gradient_dnn_data,
+        const memory::desc& original_output_md) {
+    // Configure the gradient as is
+    memory::desc original_input_grad_md
+          = input_gradient_mkl_shape.IsMklTensor()
+          ? input_gradient_mkl_shape.GetMklLayout()
+          : memory::desc(TFShapeToMklDnnDimsInNCHW(
+                    input_gradient_tensor.shape(),
+                    this->data_format_tf_),
+                    MklDnnType<T>(), this->data_format_mkldnn_);
+
+    input_gradient_dnn_data->SetUsrMem(original_input_grad_md,
+                &input_gradient_tensor);
+
+    // Check to see if input grad diff dst is in the right format
+    // Create a new memory descriptor with the same shape as the
+    // original, but the format of the other tensors.
+    memory::format original_output_format =
+            static_cast<memory::format>(original_output_md.data.format);
+    bool grad_reorder_needed = input_gradient_dnn_data->IsReorderNeeded(
+                                    original_output_format);
+    memory::dims diff_dst_dims = input_gradient_mkl_shape.IsMklTensor()
+        ? input_gradient_mkl_shape.GetSizesAsMklDnnDims()
+        : TFShapeToMklDnnDimsInNCHW(input_gradient_tensor.shape(),
+                    this->data_format_tf_);
+    memory::desc target_diff_dst_md = memory::desc(diff_dst_dims,
+        MklDnnType<T>(), original_output_format);
+
+    return grad_reorder_needed
+            ? target_diff_dst_md
+            : original_input_grad_md;
+  }
+};
+#endif  // INTEL_MKL_DNN
+
 //-------------------------------------------------------------------
 // Utility functions
 
-- 
GitLab


From 8d3a25abe96849ca78d40510cd107f8f3bbbc371 Mon Sep 17 00:00:00 2001
From: Nathan Luehr <nluehr@nvidia.com>
Date: Wed, 6 Dec 2017 14:28:41 -0800
Subject: [PATCH 0707/1225] Update Eigen hash for fix of predux bug (#14770)

For Maxwell and earlier GPUs, Eigen was incorectly casting fp16 values to
unsigned int during some reductions. This results in incorrect results in
Tensorflow's xent and sparse_xent ops when applied to fp16 data.
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index de85cc1af0..72f52ae1e9 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -95,11 +95,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "eigen_archive",
       urls = [
-          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
-          "https://bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
+          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
+          "https://bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
       ],
-      sha256 = "61d8b6fc4279dd1dda986fb1677d15e3d641c07a3ea5abe255790b1f0c0c14e9",
-      strip_prefix = "eigen-eigen-429aa5254200",
+      sha256 = "0840c497f2749b5e90bda666aab96be6da90dc75b4e21ca9843cae69b7fed52a",
+      strip_prefix = "eigen-eigen-b6e6d0cf6a77",
       build_file = str(Label("//third_party:eigen.BUILD")),
   )
 
-- 
GitLab


From aacf735be6c16bc8831868bd30506baf1668bac9 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Wed, 6 Dec 2017 14:28:46 -0800
Subject: [PATCH 0708/1225] Go: Don't require -std=c99 for the cgo code.

This should fix the error:
github.com/tensorflow/tensorflow/tensorflow/go/graph.go:31:3: error:
'for' loop initial declarations are only allowed in C99 mode
 //  for (int i = 0; i < num_shapes; i++) {
    ^

in some continuous builds like:
https://ci.tensorflow.org/job/tensorflow-master-cpu/3297/consoleFull
---
 tensorflow/go/graph.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go
index f200a8e00a..fb0af87acc 100644
--- a/tensorflow/go/graph.go
+++ b/tensorflow/go/graph.go
@@ -28,7 +28,8 @@ package tensorflow
 //                                 int num_shapes) {
 //  const int64_t** dims =
 //    (const int64_t**)malloc(sizeof(const int64_t*) * num_shapes);
-//  for (int i = 0; i < num_shapes; i++) {
+//  int i = 0;
+//  for (i = 0; i < num_shapes; i++) {
 //    dims[i] = flat_dims;
 //    if (num_dims[i] > 0) {
 //      // flat_dims will be NULL iff num_shapes is 0 or all elements in num_dims are <= 0.
-- 
GitLab


From 4972095bb594c238de90688f1209ae8aeb7a1312 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 14:25:26 -0800
Subject: [PATCH 0709/1225] Clean up bijectors by removing _impl files.

PiperOrigin-RevId: 178152737
---
 .../bijectors/masked_autoregressive_test.py   |   2 +-
 .../python/ops/bijectors/absolute_value.py    | 119 ++++-
 .../ops/bijectors/absolute_value_impl.py      | 134 -----
 .../python/ops/bijectors/affine.py            | 388 +++++++++++++-
 .../python/ops/bijectors/affine_impl.py       | 403 ---------------
 .../ops/bijectors/affine_linear_operator.py   | 216 +++++++-
 .../bijectors/affine_linear_operator_impl.py  | 231 ---------
 .../python/ops/bijectors/chain.py             | 153 +++++-
 .../python/ops/bijectors/chain_impl.py        | 168 -------
 .../ops/bijectors/cholesky_outer_product.py   | 221 +++++++-
 .../bijectors/cholesky_outer_product_impl.py  | 236 ---------
 .../ops/bijectors/conditional_bijector.py     |  40 +-
 .../bijectors/conditional_bijector_impl.py    |  55 --
 .../distributions/python/ops/bijectors/exp.py |  51 +-
 .../python/ops/bijectors/exp_impl.py          |  66 ---
 .../python/ops/bijectors/gumbel.py            | 109 +++-
 .../python/ops/bijectors/gumbel_impl.py       | 124 -----
 .../python/ops/bijectors/inline.py            | 126 ++++-
 .../python/ops/bijectors/inline_impl.py       | 141 ------
 .../python/ops/bijectors/invert.py            |  87 +++-
 .../python/ops/bijectors/invert_impl.py       | 102 ----
 .../ops/bijectors/masked_autoregressive.py    | 459 ++++++++++++++++-
 .../bijectors/masked_autoregressive_impl.py   | 476 ------------------
 .../python/ops/bijectors/permute.py           | 125 ++++-
 .../python/ops/bijectors/permute_impl.py      | 138 -----
 .../python/ops/bijectors/power_transform.py   | 112 ++++-
 .../ops/bijectors/power_transform_impl.py     | 127 -----
 .../python/ops/bijectors/reshape.py           | 301 ++++++++++-
 .../python/ops/bijectors/reshape_impl.py      | 314 ------------
 .../python/ops/bijectors/sigmoid.py           |  33 +-
 .../python/ops/bijectors/sigmoid_centered.py  |  24 +-
 .../ops/bijectors/sigmoid_centered_impl.py    |  39 --
 .../python/ops/bijectors/sigmoid_impl.py      |  48 --
 .../python/ops/bijectors/sinh_arcsinh.py      | 164 +++++-
 .../python/ops/bijectors/sinh_arcsinh_impl.py | 179 -------
 .../python/ops/bijectors/softmax_centered.py  | 234 ++++++++-
 .../ops/bijectors/softmax_centered_impl.py    | 249 ---------
 .../python/ops/bijectors/softplus.py          | 129 ++++-
 .../python/ops/bijectors/softplus_impl.py     | 144 ------
 .../python/ops/bijectors/weibull.py           | 134 ++++-
 .../python/ops/bijectors/weibull_impl.py      | 149 ------
 41 files changed, 3083 insertions(+), 3667 deletions(-)
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/chain_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/exp_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/gumbel_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/inline_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/invert_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/power_transform_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/softplus_impl.py
 delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/weibull_impl.py

diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py
index 25a9b6f5fe..288d9d8dd6 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py
@@ -22,9 +22,9 @@ import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import test_util
 from tensorflow.contrib.distributions.python.ops.bijectors.invert import Invert
+from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import _gen_mask
 from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import masked_autoregressive_default_template
 from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import MaskedAutoregressiveFlow
-from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive_impl import _gen_mask
 from tensorflow.python.framework import constant_op
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variables
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py
index 6049419818..0fe9f6aa78 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py
@@ -18,12 +18,117 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.absolute_value_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["AbsoluteValue"]
+__all__ = [
+    "AbsoluteValue",
+]
 
-remove_undocumented(__name__, _allowed_symbols)
+
+class AbsoluteValue(bijector.Bijector):
+  """Computes `Y = g(X) = Abs(X)`, element-wise.
+
+  This non-injective bijector allows for transformations of scalar distributions
+  with the absolute value function, which maps `(-inf, inf)` to `[0, inf)`.
+
+  * For `y in (0, inf)`, `AbsoluteValue.inverse(y)` returns the set inverse
+    `{x in (-inf, inf) : |x| = y}` as a tuple, `-y, y`.
+  * `AbsoluteValue.inverse(0)` returns `0, 0`, which is not the set inverse
+    (the set inverse is the singleton `{0}`), but "works" in conjunction with
+    `TransformedDistribution` to produce a left semi-continuous pdf.
+  * For `y < 0`, `AbsoluteValue.inverse(y)` happily returns the
+    wrong thing, `-y, y`.  This is done for efficiency.  If
+    `validate_args == True`, `y < 0` will raise an exception.
+
+
+  ```python
+  tfd = tf.contrib.distributions
+
+  abs = tfd.bijectors.AbsoluteValue()
+
+  abs.forward([-1., 0., 1.])
+  ==> [1., 0.,  1.]
+
+  abs.inverse(1.)
+  ==> [-1., 1.]
+
+  # The |dX/dY| is constant, == 1.  So Log|dX/dY| == 0.
+  abs.inverse_log_det_jacobian(1.)
+  ==> [0., 0.]
+
+  # Special case handling of 0.
+  abs.inverse(0.)
+  ==> [0., 0.]
+
+  abs.inverse_log_det_jacobian(0.)
+  ==> [0., 0.]
+  ```
+
+  """
+
+  def __init__(self, event_ndims=0, validate_args=False, name="absolute_value"):
+    """Instantiates the `AbsoluteValue` bijector.
+
+    Args:
+      event_ndims: Python scalar indicating the number of dimensions associated
+        with a particular draw from the distribution.  Currently only zero is
+        supported.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness, in particular whether inputs to `inverse` and
+        `inverse_log_det_jacobian` are non-negative.
+      name: Python `str` name given to ops managed by this object.
+
+    Raises:
+      ValueError:  If `event_ndims` is not zero.
+    """
+    self._graph_parents = []
+    self._name = name
+
+    event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+    event_ndims_const = tensor_util.constant_value(event_ndims)
+    if event_ndims_const is not None and event_ndims_const not in (0,):
+      raise ValueError("event_ndims(%s) was not 0" % event_ndims_const)
+    else:
+      if validate_args:
+        event_ndims = control_flow_ops.with_dependencies(
+            [check_ops.assert_equal(
+                event_ndims, 0, message="event_ndims was not 0")],
+            event_ndims)
+
+    with self._name_scope("init"):
+      super(AbsoluteValue, self).__init__(
+          event_ndims=event_ndims,
+          validate_args=validate_args,
+          name=name)
+
+  def _forward(self, x):
+    return math_ops.abs(x)
+
+  def _inverse(self, y):
+    if self.validate_args:
+      y = control_flow_ops.with_dependencies(
+          [check_ops.assert_non_negative(y, message="Argument y was negative")],
+          y)
+    return -y, y
+
+  def _inverse_log_det_jacobian(self, y):
+    # If event_ndims = 2,
+    # F^{-1}(y) = (-y, y), so DF^{-1}(y) = (-1, 1),
+    # so Log|DF^{-1}(y)| = Log[1, 1] = [0, 0].
+    batch_shape = array_ops.shape(y)[:array_ops.rank(y) - self.event_ndims]
+    zeros = array_ops.zeros(batch_shape, dtype=y.dtype)
+    if self.validate_args:
+      zeros = control_flow_ops.with_dependencies(
+          [check_ops.assert_non_negative(y, message="Argument y was negative")],
+          zeros)
+    return zeros, zeros
+
+  @property
+  def _is_injective(self):
+    return False
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py
deleted file mode 100644
index 0fe9f6aa78..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""AbsoluteValue bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-__all__ = [
-    "AbsoluteValue",
-]
-
-
-class AbsoluteValue(bijector.Bijector):
-  """Computes `Y = g(X) = Abs(X)`, element-wise.
-
-  This non-injective bijector allows for transformations of scalar distributions
-  with the absolute value function, which maps `(-inf, inf)` to `[0, inf)`.
-
-  * For `y in (0, inf)`, `AbsoluteValue.inverse(y)` returns the set inverse
-    `{x in (-inf, inf) : |x| = y}` as a tuple, `-y, y`.
-  * `AbsoluteValue.inverse(0)` returns `0, 0`, which is not the set inverse
-    (the set inverse is the singleton `{0}`), but "works" in conjunction with
-    `TransformedDistribution` to produce a left semi-continuous pdf.
-  * For `y < 0`, `AbsoluteValue.inverse(y)` happily returns the
-    wrong thing, `-y, y`.  This is done for efficiency.  If
-    `validate_args == True`, `y < 0` will raise an exception.
-
-
-  ```python
-  tfd = tf.contrib.distributions
-
-  abs = tfd.bijectors.AbsoluteValue()
-
-  abs.forward([-1., 0., 1.])
-  ==> [1., 0.,  1.]
-
-  abs.inverse(1.)
-  ==> [-1., 1.]
-
-  # The |dX/dY| is constant, == 1.  So Log|dX/dY| == 0.
-  abs.inverse_log_det_jacobian(1.)
-  ==> [0., 0.]
-
-  # Special case handling of 0.
-  abs.inverse(0.)
-  ==> [0., 0.]
-
-  abs.inverse_log_det_jacobian(0.)
-  ==> [0., 0.]
-  ```
-
-  """
-
-  def __init__(self, event_ndims=0, validate_args=False, name="absolute_value"):
-    """Instantiates the `AbsoluteValue` bijector.
-
-    Args:
-      event_ndims: Python scalar indicating the number of dimensions associated
-        with a particular draw from the distribution.  Currently only zero is
-        supported.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness, in particular whether inputs to `inverse` and
-        `inverse_log_det_jacobian` are non-negative.
-      name: Python `str` name given to ops managed by this object.
-
-    Raises:
-      ValueError:  If `event_ndims` is not zero.
-    """
-    self._graph_parents = []
-    self._name = name
-
-    event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
-    event_ndims_const = tensor_util.constant_value(event_ndims)
-    if event_ndims_const is not None and event_ndims_const not in (0,):
-      raise ValueError("event_ndims(%s) was not 0" % event_ndims_const)
-    else:
-      if validate_args:
-        event_ndims = control_flow_ops.with_dependencies(
-            [check_ops.assert_equal(
-                event_ndims, 0, message="event_ndims was not 0")],
-            event_ndims)
-
-    with self._name_scope("init"):
-      super(AbsoluteValue, self).__init__(
-          event_ndims=event_ndims,
-          validate_args=validate_args,
-          name=name)
-
-  def _forward(self, x):
-    return math_ops.abs(x)
-
-  def _inverse(self, y):
-    if self.validate_args:
-      y = control_flow_ops.with_dependencies(
-          [check_ops.assert_non_negative(y, message="Argument y was negative")],
-          y)
-    return -y, y
-
-  def _inverse_log_det_jacobian(self, y):
-    # If event_ndims = 2,
-    # F^{-1}(y) = (-y, y), so DF^{-1}(y) = (-1, 1),
-    # so Log|DF^{-1}(y)| = Log[1, 1] = [0, 0].
-    batch_shape = array_ops.shape(y)[:array_ops.rank(y) - self.event_ndims]
-    zeros = array_ops.zeros(batch_shape, dtype=y.dtype)
-    if self.validate_args:
-      zeros = control_flow_ops.with_dependencies(
-          [check_ops.assert_non_negative(y, message="Argument y was negative")],
-          zeros)
-    return zeros, zeros
-
-  @property
-  def _is_injective(self):
-    return False
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine.py
index 940cceff04..05bb9c2f9b 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/affine.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine.py
@@ -18,12 +18,386 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.affine_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.contrib import linalg
+from tensorflow.contrib.distributions.python.ops import distribution_util
+from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["Affine"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Affine",
+]
+
+
+def _as_tensor(x, name):
+  """Convenience to convert to `Tensor` or leave as `None`."""
+  return None if x is None else ops.convert_to_tensor(x, name=name)
+
+
+class Affine(bijector.Bijector):
+  """Compute `Y = g(X; shift, scale) = scale @ X + shift`.
+
+  Here `scale = c * I + diag(D1) + tril(L) + V @ diag(D2) @ V.T`.
+
+  In TF parlance, the `scale` term is logically equivalent to:
+
+  ```python
+  scale = (
+    scale_identity_multiplier * tf.diag(tf.ones(d)) +
+    tf.diag(scale_diag) +
+    scale_tril +
+    scale_perturb_factor @ diag(scale_perturb_diag) @
+      tf.transpose([scale_perturb_factor])
+  )
+  ```
+
+  The `scale` term is applied without necessarily materializing constituent
+  matrices, i.e., the matmul is [matrix-free](
+  https://en.wikipedia.org/wiki/Matrix-free_methods) when possible.
+
+  Examples:
+
+  ```python
+  # Y = X
+  b = Affine()
+
+  # Y = X + shift
+  b = Affine(shift=[1., 2, 3])
+
+  # Y = 2 * I @ X.T + shift
+  b = Affine(shift=[1., 2, 3],
+             scale_identity_multiplier=2.)
+
+  # Y = tf.diag(d1) @ X.T + shift
+  b = Affine(shift=[1., 2, 3],
+             scale_diag=[-1., 2, 1])         # Implicitly 3x3.
+
+  # Y = (I + v * v.T) @ X.T + shift
+  b = Affine(shift=[1., 2, 3],
+             scale_perturb_factor=[[1., 0],
+                                   [0, 1],
+                                   [1, 1]])
+
+  # Y = (diag(d1) + v * diag(d2) * v.T) @ X.T + shift
+  b = Affine(shift=[1., 2, 3],
+             scale_diag=[1., 3, 3],          # Implicitly 3x3.
+             scale_perturb_diag=[2., 1],     # Implicitly 2x2.
+             scale_perturb_factor=[[1., 0],
+                                   [0, 1],
+                                   [1, 1]])
+
+  ```
+
+  """
+
+  def __init__(self,
+               shift=None,
+               scale_identity_multiplier=None,
+               scale_diag=None,
+               scale_tril=None,
+               scale_perturb_factor=None,
+               scale_perturb_diag=None,
+               event_ndims=1,
+               validate_args=False,
+               name="affine"):
+    """Instantiates the `Affine` bijector.
+
+    This `Bijector` is initialized with `shift` `Tensor` and `scale` arguments,
+    giving the forward operation:
+
+    ```none
+    Y = g(X) = scale @ X + shift
+    ```
+
+    where the `scale` term is logically equivalent to:
+
+    ```python
+    scale = (
+      scale_identity_multiplier * tf.diag(tf.ones(d)) +
+      tf.diag(scale_diag) +
+      scale_tril +
+      scale_perturb_factor @ diag(scale_perturb_diag) @
+        tf.transpose([scale_perturb_factor])
+    )
+    ```
+
+    If none of `scale_identity_multiplier`, `scale_diag`, or `scale_tril` are
+    specified then `scale += IdentityMatrix`. Otherwise specifying a
+    `scale` argument has the semantics of `scale += Expand(arg)`, i.e.,
+    `scale_diag != None` means `scale += tf.diag(scale_diag)`.
+
+    Args:
+      shift: Floating-point `Tensor`. If this is set to `None`, no shift is
+        applied.
+      scale_identity_multiplier: floating point rank 0 `Tensor` representing a
+        scaling done to the identity matrix.
+        When `scale_identity_multiplier = scale_diag = scale_tril = None` then
+        `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added
+        to `scale`.
+      scale_diag: Floating-point `Tensor` representing the diagonal matrix.
+        `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
+        diagonal matrix.
+        When `None` no diagonal term is added to `scale`.
+      scale_tril: Floating-point `Tensor` representing the diagonal matrix.
+        `scale_diag` has shape [N1, N2, ...  k, k], which represents a k x k
+        lower triangular matrix.
+        When `None` no `scale_tril` term is added to `scale`.
+        The upper triangular elements above the diagonal are ignored.
+      scale_perturb_factor: Floating-point `Tensor` representing factor matrix
+        with last two dimensions of shape `(k, r)`. When `None`, no rank-r
+        update is added to `scale`.
+      scale_perturb_diag: Floating-point `Tensor` representing the diagonal
+        matrix. `scale_perturb_diag` has shape [N1, N2, ...  r], which
+        represents an `r x r` diagonal matrix. When `None` low rank updates will
+        take the form `scale_perturb_factor * scale_perturb_factor.T`.
+      event_ndims: Scalar `int` `Tensor` indicating the number of dimensions
+        associated with a particular draw from the distribution. Must be 0 or 1.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+
+    Raises:
+      ValueError: if `perturb_diag` is specified but not `perturb_factor`.
+      TypeError: if `shift` has different `dtype` from `scale` arguments.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+
+    # Ambiguous definition of low rank update.
+    if scale_perturb_diag is not None and scale_perturb_factor is None:
+      raise ValueError("When scale_perturb_diag is specified, "
+                       "scale_perturb_factor must be specified.")
+
+    # Special case, only handling a scaled identity matrix. We don't know its
+    # dimensions, so this is special cased.
+    # We don't check identity_multiplier, since below we set it to 1. if all
+    # other scale args are None.
+    self._is_only_identity_multiplier = (scale_tril is None and
+                                         scale_diag is None and
+                                         scale_perturb_factor is None)
+
+    with self._name_scope("init", values=[
+        shift, scale_identity_multiplier, scale_diag, scale_tril,
+        scale_perturb_diag, scale_perturb_factor]):
+      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+      event_ndims_const = tensor_util.constant_value(event_ndims)
+      if event_ndims_const is not None and event_ndims_const not in (0, 1):
+        raise ValueError("event_ndims(%s) was not 0 or 1" % event_ndims_const)
+      else:
+        if validate_args:
+          # Shape tool will catch if event_ndims is negative.
+          event_ndims = control_flow_ops.with_dependencies(
+              [check_ops.assert_less(
+                  event_ndims, 2, message="event_ndims must be 0 or 1")],
+              event_ndims)
+
+      if event_ndims_const == 0 and not self._is_only_identity_multiplier:
+        raise ValueError(
+            "If event_ndims == 0, the only scale argument you can pass is "
+            "scale_identity_multiplier.  All others operate on vectors.")
+
+      # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`.
+      dtype = dtypes.float32
+
+      if shift is not None:
+        shift = ops.convert_to_tensor(shift, name="shift")
+        dtype = shift.dtype.base_dtype
+      self._shift = shift
+
+      # When no args are specified, pretend the scale matrix is the identity
+      # matrix.
+      if (self._is_only_identity_multiplier and
+          scale_identity_multiplier is None):
+        scale_identity_multiplier = ops.convert_to_tensor(1., dtype=dtype)
+
+      # self._create_scale_operator returns a LinearOperator in all cases
+      # except if self._is_only_identity_multiplier; in which case it
+      # returns a scalar Tensor.
+      scale = self._create_scale_operator(
+          identity_multiplier=scale_identity_multiplier,
+          diag=scale_diag,
+          tril=scale_tril,
+          perturb_diag=scale_perturb_diag,
+          perturb_factor=scale_perturb_factor,
+          shift=shift,
+          validate_args=validate_args)
+
+      if scale.dtype is not None:
+        dtype = scale.dtype.base_dtype
+
+      if scale is not None and not self._is_only_identity_multiplier:
+        if (shift is not None and
+            shift.dtype.base_dtype != scale.dtype.base_dtype):
+          raise TypeError(
+              "shift.dtype({}) is incompatible with scale.dtype({}).".format(
+                  shift.dtype, scale.dtype))
+
+        if scale.tensor_rank is not None:
+          batch_ndims = scale.tensor_rank - 2
+        else:
+          batch_ndims = scale.tensor_rank_tensor() - 2
+      else:
+        # We won't need shape inference when scale is None or when scale is a
+        # scalar.
+        batch_ndims = 0
+      self._scale = scale
+      self._shaper = _DistributionShape(
+          batch_ndims=batch_ndims,
+          event_ndims=event_ndims,
+          validate_args=validate_args)
+      super(Affine, self).__init__(
+          event_ndims=event_ndims,
+          graph_parents=(
+              [event_ndims] +
+              [self._scale] if tensor_util.is_tensor(self._scale)
+              else self._scale.graph_parents +
+              [self._shift] if self._shift is not None else []),
+          is_constant_jacobian=True,
+          dtype=dtype,
+          validate_args=validate_args,
+          name=name)
+
+  def _create_scale_operator(self, identity_multiplier, diag, tril,
+                             perturb_diag, perturb_factor, shift,
+                             validate_args):
+    """Construct `scale` from various components.
+
+    Args:
+      identity_multiplier: floating point rank 0 `Tensor` representing a scaling
+        done to the identity matrix.
+      diag: Floating-point `Tensor` representing the diagonal matrix.
+        `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
+        diagonal matrix.
+      tril: Floating-point `Tensor` representing the diagonal matrix.
+        `scale_tril` has shape [N1, N2, ...  k], which represents a k x k lower
+        triangular matrix.
+      perturb_diag: Floating-point `Tensor` representing the diagonal matrix of
+        the low rank update.
+      perturb_factor: Floating-point `Tensor` representing factor matrix.
+      shift: Floating-point `Tensor` representing `shift in `scale @ X + shift`.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+
+    Returns:
+      scale. In the case of scaling by a constant, scale is a
+      floating point `Tensor`. Otherwise, scale is a `LinearOperator`.
+
+    Raises:
+      ValueError: if all of `tril`, `diag` and `identity_multiplier` are `None`.
+    """
+    identity_multiplier = _as_tensor(identity_multiplier, "identity_multiplier")
+    diag = _as_tensor(diag, "diag")
+    tril = _as_tensor(tril, "tril")
+    perturb_diag = _as_tensor(perturb_diag, "perturb_diag")
+    perturb_factor = _as_tensor(perturb_factor, "perturb_factor")
+
+    # If possible, use the low rank update to infer the shape of
+    # the identity matrix, when scale represents a scaled identity matrix
+    # with a low rank update.
+    shape_hint = None
+    if perturb_factor is not None:
+      shape_hint = distribution_util.dimension_size(perturb_factor, axis=-2)
+
+    if self._is_only_identity_multiplier:
+      if validate_args:
+        return control_flow_ops.with_dependencies(
+            [check_ops.assert_none_equal(
+                identity_multiplier,
+                array_ops.zeros([], identity_multiplier.dtype),
+                ["identity_multiplier should be non-zero."])],
+            identity_multiplier)
+      return identity_multiplier
+
+    scale = distribution_util.make_tril_scale(
+        loc=shift,
+        scale_tril=tril,
+        scale_diag=diag,
+        scale_identity_multiplier=identity_multiplier,
+        validate_args=validate_args,
+        assert_positive=False,
+        shape_hint=shape_hint)
+
+    if perturb_factor is not None:
+      return linalg.LinearOperatorLowRankUpdate(
+          scale,
+          u=perturb_factor,
+          diag_update=perturb_diag,
+          is_diag_update_positive=perturb_diag is None,
+          is_non_singular=True,  # Implied by is_positive_definite=True.
+          is_self_adjoint=True,
+          is_positive_definite=True,
+          is_square=True)
+
+    return scale
+
+  @property
+  def shift(self):
+    """The `shift` `Tensor` in `Y = scale @ X + shift`."""
+    return self._shift
+
+  @property
+  def scale(self):
+    """The `scale` `LinearOperator` in `Y = scale @ X + shift`."""
+    return self._scale
+
+  def _forward(self, x):
+    y = x
+    if self._is_only_identity_multiplier:
+      y *= self._scale
+      if self.shift is not None:
+        return y + self.shift
+      return y
+    y, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
+        y, expand_batch_dim=False)
+    with ops.control_dependencies(self._maybe_check_scale() if
+                                  self.validate_args else []):
+      y = self.scale.matmul(y)
+    y = self._shaper.undo_make_batch_of_event_sample_matrices(
+        y, sample_shape, expand_batch_dim=False)
+    if self.shift is not None:
+      y += self.shift
+    return y
+
+  def _inverse(self, y):
+    x = y
+    if self.shift is not None:
+      x -= self.shift
+    if self._is_only_identity_multiplier:
+      return x / self._scale
+
+    x, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
+        x, expand_batch_dim=False)
+    # Solve fails if the op is singular so we may safely skip this assertion.
+    x = self.scale.solve(x)
+    x = self._shaper.undo_make_batch_of_event_sample_matrices(
+        x, sample_shape, expand_batch_dim=False)
+    return x
+
+  def _inverse_log_det_jacobian(self, y):
+    return -self._forward_log_det_jacobian(y)
+
+  def _forward_log_det_jacobian(self, x):
+    if self._is_only_identity_multiplier:
+      # We don't pad in this case and instead let the fldj be applied
+      # via broadcast.
+      event_size = distribution_util.pick_vector(
+          math_ops.equal(self._shaper.event_ndims, 0),
+          [1], array_ops.shape(x))[-1]
+      event_size = math_ops.cast(event_size, dtype=self._scale.dtype)
+      return math_ops.log(math_ops.abs(self._scale)) * event_size
+    return self.scale.log_abs_determinant()
+
+  def _maybe_check_scale(self):
+    try:
+      return [self.scale.assert_non_singular()]
+    except NotImplementedError:
+      pass
+    return []
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py
deleted file mode 100644
index 05bb9c2f9b..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py
+++ /dev/null
@@ -1,403 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Affine bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib import linalg
-from tensorflow.contrib.distributions.python.ops import distribution_util
-from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "Affine",
-]
-
-
-def _as_tensor(x, name):
-  """Convenience to convert to `Tensor` or leave as `None`."""
-  return None if x is None else ops.convert_to_tensor(x, name=name)
-
-
-class Affine(bijector.Bijector):
-  """Compute `Y = g(X; shift, scale) = scale @ X + shift`.
-
-  Here `scale = c * I + diag(D1) + tril(L) + V @ diag(D2) @ V.T`.
-
-  In TF parlance, the `scale` term is logically equivalent to:
-
-  ```python
-  scale = (
-    scale_identity_multiplier * tf.diag(tf.ones(d)) +
-    tf.diag(scale_diag) +
-    scale_tril +
-    scale_perturb_factor @ diag(scale_perturb_diag) @
-      tf.transpose([scale_perturb_factor])
-  )
-  ```
-
-  The `scale` term is applied without necessarily materializing constituent
-  matrices, i.e., the matmul is [matrix-free](
-  https://en.wikipedia.org/wiki/Matrix-free_methods) when possible.
-
-  Examples:
-
-  ```python
-  # Y = X
-  b = Affine()
-
-  # Y = X + shift
-  b = Affine(shift=[1., 2, 3])
-
-  # Y = 2 * I @ X.T + shift
-  b = Affine(shift=[1., 2, 3],
-             scale_identity_multiplier=2.)
-
-  # Y = tf.diag(d1) @ X.T + shift
-  b = Affine(shift=[1., 2, 3],
-             scale_diag=[-1., 2, 1])         # Implicitly 3x3.
-
-  # Y = (I + v * v.T) @ X.T + shift
-  b = Affine(shift=[1., 2, 3],
-             scale_perturb_factor=[[1., 0],
-                                   [0, 1],
-                                   [1, 1]])
-
-  # Y = (diag(d1) + v * diag(d2) * v.T) @ X.T + shift
-  b = Affine(shift=[1., 2, 3],
-             scale_diag=[1., 3, 3],          # Implicitly 3x3.
-             scale_perturb_diag=[2., 1],     # Implicitly 2x2.
-             scale_perturb_factor=[[1., 0],
-                                   [0, 1],
-                                   [1, 1]])
-
-  ```
-
-  """
-
-  def __init__(self,
-               shift=None,
-               scale_identity_multiplier=None,
-               scale_diag=None,
-               scale_tril=None,
-               scale_perturb_factor=None,
-               scale_perturb_diag=None,
-               event_ndims=1,
-               validate_args=False,
-               name="affine"):
-    """Instantiates the `Affine` bijector.
-
-    This `Bijector` is initialized with `shift` `Tensor` and `scale` arguments,
-    giving the forward operation:
-
-    ```none
-    Y = g(X) = scale @ X + shift
-    ```
-
-    where the `scale` term is logically equivalent to:
-
-    ```python
-    scale = (
-      scale_identity_multiplier * tf.diag(tf.ones(d)) +
-      tf.diag(scale_diag) +
-      scale_tril +
-      scale_perturb_factor @ diag(scale_perturb_diag) @
-        tf.transpose([scale_perturb_factor])
-    )
-    ```
-
-    If none of `scale_identity_multiplier`, `scale_diag`, or `scale_tril` are
-    specified then `scale += IdentityMatrix`. Otherwise specifying a
-    `scale` argument has the semantics of `scale += Expand(arg)`, i.e.,
-    `scale_diag != None` means `scale += tf.diag(scale_diag)`.
-
-    Args:
-      shift: Floating-point `Tensor`. If this is set to `None`, no shift is
-        applied.
-      scale_identity_multiplier: floating point rank 0 `Tensor` representing a
-        scaling done to the identity matrix.
-        When `scale_identity_multiplier = scale_diag = scale_tril = None` then
-        `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added
-        to `scale`.
-      scale_diag: Floating-point `Tensor` representing the diagonal matrix.
-        `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
-        diagonal matrix.
-        When `None` no diagonal term is added to `scale`.
-      scale_tril: Floating-point `Tensor` representing the diagonal matrix.
-        `scale_diag` has shape [N1, N2, ...  k, k], which represents a k x k
-        lower triangular matrix.
-        When `None` no `scale_tril` term is added to `scale`.
-        The upper triangular elements above the diagonal are ignored.
-      scale_perturb_factor: Floating-point `Tensor` representing factor matrix
-        with last two dimensions of shape `(k, r)`. When `None`, no rank-r
-        update is added to `scale`.
-      scale_perturb_diag: Floating-point `Tensor` representing the diagonal
-        matrix. `scale_perturb_diag` has shape [N1, N2, ...  r], which
-        represents an `r x r` diagonal matrix. When `None` low rank updates will
-        take the form `scale_perturb_factor * scale_perturb_factor.T`.
-      event_ndims: Scalar `int` `Tensor` indicating the number of dimensions
-        associated with a particular draw from the distribution. Must be 0 or 1.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-
-    Raises:
-      ValueError: if `perturb_diag` is specified but not `perturb_factor`.
-      TypeError: if `shift` has different `dtype` from `scale` arguments.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-
-    # Ambiguous definition of low rank update.
-    if scale_perturb_diag is not None and scale_perturb_factor is None:
-      raise ValueError("When scale_perturb_diag is specified, "
-                       "scale_perturb_factor must be specified.")
-
-    # Special case, only handling a scaled identity matrix. We don't know its
-    # dimensions, so this is special cased.
-    # We don't check identity_multiplier, since below we set it to 1. if all
-    # other scale args are None.
-    self._is_only_identity_multiplier = (scale_tril is None and
-                                         scale_diag is None and
-                                         scale_perturb_factor is None)
-
-    with self._name_scope("init", values=[
-        shift, scale_identity_multiplier, scale_diag, scale_tril,
-        scale_perturb_diag, scale_perturb_factor]):
-      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
-      event_ndims_const = tensor_util.constant_value(event_ndims)
-      if event_ndims_const is not None and event_ndims_const not in (0, 1):
-        raise ValueError("event_ndims(%s) was not 0 or 1" % event_ndims_const)
-      else:
-        if validate_args:
-          # Shape tool will catch if event_ndims is negative.
-          event_ndims = control_flow_ops.with_dependencies(
-              [check_ops.assert_less(
-                  event_ndims, 2, message="event_ndims must be 0 or 1")],
-              event_ndims)
-
-      if event_ndims_const == 0 and not self._is_only_identity_multiplier:
-        raise ValueError(
-            "If event_ndims == 0, the only scale argument you can pass is "
-            "scale_identity_multiplier.  All others operate on vectors.")
-
-      # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`.
-      dtype = dtypes.float32
-
-      if shift is not None:
-        shift = ops.convert_to_tensor(shift, name="shift")
-        dtype = shift.dtype.base_dtype
-      self._shift = shift
-
-      # When no args are specified, pretend the scale matrix is the identity
-      # matrix.
-      if (self._is_only_identity_multiplier and
-          scale_identity_multiplier is None):
-        scale_identity_multiplier = ops.convert_to_tensor(1., dtype=dtype)
-
-      # self._create_scale_operator returns a LinearOperator in all cases
-      # except if self._is_only_identity_multiplier; in which case it
-      # returns a scalar Tensor.
-      scale = self._create_scale_operator(
-          identity_multiplier=scale_identity_multiplier,
-          diag=scale_diag,
-          tril=scale_tril,
-          perturb_diag=scale_perturb_diag,
-          perturb_factor=scale_perturb_factor,
-          shift=shift,
-          validate_args=validate_args)
-
-      if scale.dtype is not None:
-        dtype = scale.dtype.base_dtype
-
-      if scale is not None and not self._is_only_identity_multiplier:
-        if (shift is not None and
-            shift.dtype.base_dtype != scale.dtype.base_dtype):
-          raise TypeError(
-              "shift.dtype({}) is incompatible with scale.dtype({}).".format(
-                  shift.dtype, scale.dtype))
-
-        if scale.tensor_rank is not None:
-          batch_ndims = scale.tensor_rank - 2
-        else:
-          batch_ndims = scale.tensor_rank_tensor() - 2
-      else:
-        # We won't need shape inference when scale is None or when scale is a
-        # scalar.
-        batch_ndims = 0
-      self._scale = scale
-      self._shaper = _DistributionShape(
-          batch_ndims=batch_ndims,
-          event_ndims=event_ndims,
-          validate_args=validate_args)
-      super(Affine, self).__init__(
-          event_ndims=event_ndims,
-          graph_parents=(
-              [event_ndims] +
-              [self._scale] if tensor_util.is_tensor(self._scale)
-              else self._scale.graph_parents +
-              [self._shift] if self._shift is not None else []),
-          is_constant_jacobian=True,
-          dtype=dtype,
-          validate_args=validate_args,
-          name=name)
-
-  def _create_scale_operator(self, identity_multiplier, diag, tril,
-                             perturb_diag, perturb_factor, shift,
-                             validate_args):
-    """Construct `scale` from various components.
-
-    Args:
-      identity_multiplier: floating point rank 0 `Tensor` representing a scaling
-        done to the identity matrix.
-      diag: Floating-point `Tensor` representing the diagonal matrix.
-        `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
-        diagonal matrix.
-      tril: Floating-point `Tensor` representing the diagonal matrix.
-        `scale_tril` has shape [N1, N2, ...  k], which represents a k x k lower
-        triangular matrix.
-      perturb_diag: Floating-point `Tensor` representing the diagonal matrix of
-        the low rank update.
-      perturb_factor: Floating-point `Tensor` representing factor matrix.
-      shift: Floating-point `Tensor` representing `shift in `scale @ X + shift`.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-
-    Returns:
-      scale. In the case of scaling by a constant, scale is a
-      floating point `Tensor`. Otherwise, scale is a `LinearOperator`.
-
-    Raises:
-      ValueError: if all of `tril`, `diag` and `identity_multiplier` are `None`.
-    """
-    identity_multiplier = _as_tensor(identity_multiplier, "identity_multiplier")
-    diag = _as_tensor(diag, "diag")
-    tril = _as_tensor(tril, "tril")
-    perturb_diag = _as_tensor(perturb_diag, "perturb_diag")
-    perturb_factor = _as_tensor(perturb_factor, "perturb_factor")
-
-    # If possible, use the low rank update to infer the shape of
-    # the identity matrix, when scale represents a scaled identity matrix
-    # with a low rank update.
-    shape_hint = None
-    if perturb_factor is not None:
-      shape_hint = distribution_util.dimension_size(perturb_factor, axis=-2)
-
-    if self._is_only_identity_multiplier:
-      if validate_args:
-        return control_flow_ops.with_dependencies(
-            [check_ops.assert_none_equal(
-                identity_multiplier,
-                array_ops.zeros([], identity_multiplier.dtype),
-                ["identity_multiplier should be non-zero."])],
-            identity_multiplier)
-      return identity_multiplier
-
-    scale = distribution_util.make_tril_scale(
-        loc=shift,
-        scale_tril=tril,
-        scale_diag=diag,
-        scale_identity_multiplier=identity_multiplier,
-        validate_args=validate_args,
-        assert_positive=False,
-        shape_hint=shape_hint)
-
-    if perturb_factor is not None:
-      return linalg.LinearOperatorLowRankUpdate(
-          scale,
-          u=perturb_factor,
-          diag_update=perturb_diag,
-          is_diag_update_positive=perturb_diag is None,
-          is_non_singular=True,  # Implied by is_positive_definite=True.
-          is_self_adjoint=True,
-          is_positive_definite=True,
-          is_square=True)
-
-    return scale
-
-  @property
-  def shift(self):
-    """The `shift` `Tensor` in `Y = scale @ X + shift`."""
-    return self._shift
-
-  @property
-  def scale(self):
-    """The `scale` `LinearOperator` in `Y = scale @ X + shift`."""
-    return self._scale
-
-  def _forward(self, x):
-    y = x
-    if self._is_only_identity_multiplier:
-      y *= self._scale
-      if self.shift is not None:
-        return y + self.shift
-      return y
-    y, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
-        y, expand_batch_dim=False)
-    with ops.control_dependencies(self._maybe_check_scale() if
-                                  self.validate_args else []):
-      y = self.scale.matmul(y)
-    y = self._shaper.undo_make_batch_of_event_sample_matrices(
-        y, sample_shape, expand_batch_dim=False)
-    if self.shift is not None:
-      y += self.shift
-    return y
-
-  def _inverse(self, y):
-    x = y
-    if self.shift is not None:
-      x -= self.shift
-    if self._is_only_identity_multiplier:
-      return x / self._scale
-
-    x, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
-        x, expand_batch_dim=False)
-    # Solve fails if the op is singular so we may safely skip this assertion.
-    x = self.scale.solve(x)
-    x = self._shaper.undo_make_batch_of_event_sample_matrices(
-        x, sample_shape, expand_batch_dim=False)
-    return x
-
-  def _inverse_log_det_jacobian(self, y):
-    return -self._forward_log_det_jacobian(y)
-
-  def _forward_log_det_jacobian(self, x):
-    if self._is_only_identity_multiplier:
-      # We don't pad in this case and instead let the fldj be applied
-      # via broadcast.
-      event_size = distribution_util.pick_vector(
-          math_ops.equal(self._shaper.event_ndims, 0),
-          [1], array_ops.shape(x))[-1]
-      event_size = math_ops.cast(event_size, dtype=self._scale.dtype)
-      return math_ops.log(math_ops.abs(self._scale)) * event_size
-    return self.scale.log_abs_determinant()
-
-  def _maybe_check_scale(self):
-    try:
-      return [self.scale.assert_non_singular()]
-    except NotImplementedError:
-      pass
-    return []
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py
index aca04a89df..89043b1410 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py
@@ -18,12 +18,214 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.affine_linear_operator_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops.distributions import bijector
+from tensorflow.python.ops.linalg import linear_operator
 
-_allowed_symbols = ["AffineLinearOperator"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "AffineLinearOperator",
+]
+
+
+class AffineLinearOperator(bijector.Bijector):
+  """Compute `Y = g(X; shift, scale) = scale @ X + shift`.
+
+  `shift` is a numeric `Tensor` and `scale` is a `LinearOperator`.
+
+  If `X` is a scalar then the forward transformation is: `scale * X + shift`
+  where `*` denotes the scalar product.
+
+  Note: we don't always simply transpose `X` (but write it this way for
+  brevity). Actually the input `X` undergoes the following transformation
+  before being premultiplied by `scale`:
+
+  1. If there are no sample dims, we call `X = tf.expand_dims(X, 0)`, i.e.,
+     `new_sample_shape = [1]`. Otherwise do nothing.
+  2. The sample shape is flattened to have one dimension, i.e.,
+     `new_sample_shape = [n]` where `n = tf.reduce_prod(old_sample_shape)`.
+  3. The sample dim is cyclically rotated left by 1, i.e.,
+     `new_shape = [B1,...,Bb, k, n]` where `n` is as above, `k` is the
+     event_shape, and `B1,...,Bb` are the batch shapes for each of `b` batch
+     dimensions.
+
+  (For more details see `shape.make_batch_of_event_sample_matrices`.)
+
+  The result of the above transformation is that `X` can be regarded as a batch
+  of matrices where each column is a draw from the distribution. After
+  premultiplying by `scale`, we take the inverse of this procedure. The input
+  `Y` also undergoes the same transformation before/after premultiplying by
+  `inv(scale)`.
+
+  Example Use:
+
+  ```python
+  linalg = tf.linalg
+
+  x = [1., 2, 3]
+
+  shift = [-1., 0., 1]
+  diag = [1., 2, 3]
+  scale = linalg.LinearOperatorDiag(diag)
+  affine = AffineLinearOperator(shift, scale)
+  # In this case, `forward` is equivalent to:
+  # y = scale @ x + shift
+  y = affine.forward(x)  # [0., 4, 10]
+
+  shift = [2., 3, 1]
+  tril = [[1., 0, 0],
+          [2, 1, 0],
+          [3, 2, 1]]
+  scale = linalg.LinearOperatorLowerTriangular(tril)
+  affine = AffineLinearOperator(shift, scale)
+  # In this case, `forward` is equivalent to:
+  # np.squeeze(np.matmul(tril, np.expand_dims(x, -1)), -1) + shift
+  y = affine.forward(x)  # [3., 7, 11]
+  ```
+
+  """
+
+  def __init__(self,
+               shift=None,
+               scale=None,
+               event_ndims=1,
+               validate_args=False,
+               name="affine_linear_operator"):
+    """Instantiates the `AffineLinearOperator` bijector.
+
+    Args:
+      shift: Floating-point `Tensor`.
+      scale:  Subclass of `LinearOperator`. Represents the (batch) positive
+        definite matrix `M` in `R^{k x k}`.
+      event_ndims: Scalar `integer` `Tensor` indicating the number of dimensions
+        associated with a particular draw from the distribution. Must be 0 or 1.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+
+    Raises:
+      ValueError: if `event_ndims` is not 0 or 1.
+      TypeError: if `scale` is not a `LinearOperator`.
+      TypeError: if `shift.dtype` does not match `scale.dtype`.
+      ValueError: if not `scale.is_non_singular`.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+    graph_parents = []
+    with self._name_scope("init", values=[shift]):
+      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+      if tensor_util.constant_value(event_ndims) is not None:
+        event_ndims = tensor_util.constant_value(event_ndims)
+        if event_ndims not in (0, 1):
+          raise ValueError("event_ndims({}) was not 0 or 1".format(event_ndims))
+      else:
+        if validate_args:
+          # Shape tool will catch if event_ndims is negative.
+          event_ndims = control_flow_ops.with_dependencies(
+              [check_ops.assert_less(
+                  event_ndims, 2, message="event_ndims must be 0 or 1")],
+              event_ndims)
+        graph_parents += [event_ndims]
+
+      # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`.
+      dtype = dtypes.float32
+
+      if shift is not None:
+        shift = ops.convert_to_tensor(shift, name="shift")
+        graph_parents += [shift]
+        dtype = shift.dtype.base_dtype
+      self._shift = shift
+
+      if scale is not None:
+        if (shift is not None and
+            shift.dtype.base_dtype != scale.dtype.base_dtype):
+          raise TypeError(
+              "shift.dtype({}) is incompatible with scale.dtype({}).".format(
+                  shift.dtype, scale.dtype))
+        if not isinstance(scale, linear_operator.LinearOperator):
+          raise TypeError("scale is not an instance of tf.LinearOperator")
+        if validate_args and not scale.is_non_singular:
+          raise ValueError("Scale matrix must be non-singular.")
+        graph_parents += scale.graph_parents
+        if scale.tensor_rank is not None:
+          batch_ndims = scale.tensor_rank - 2
+        else:
+          batch_ndims = scale.tensor_rank_tensor() - 2
+          graph_parents += [batch_ndims]
+        if scale.dtype is not None:
+          dtype = scale.dtype.base_dtype
+      else:
+        batch_ndims = 0  # We won't need shape inference when scale is None.
+      self._scale = scale
+      self._shaper = _DistributionShape(
+          batch_ndims=batch_ndims,
+          event_ndims=event_ndims,
+          validate_args=validate_args)
+      super(AffineLinearOperator, self).__init__(
+          event_ndims=event_ndims,
+          graph_parents=graph_parents,
+          is_constant_jacobian=True,
+          dtype=dtype,
+          validate_args=validate_args,
+          name=name)
+
+  @property
+  def shift(self):
+    """The `shift` `Tensor` in `Y = scale @ X + shift`."""
+    return self._shift
+
+  @property
+  def scale(self):
+    """The `scale` `LinearOperator` in `Y = scale @ X + shift`."""
+    return self._scale
+
+  def _forward(self, x):
+    y = x
+    if self.scale is not None:
+      y, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
+          y, expand_batch_dim=False)
+      with ops.control_dependencies(self._maybe_collect_assertions() if
+                                    self.validate_args else []):
+        y = self.scale.matmul(y)
+      y = self._shaper.undo_make_batch_of_event_sample_matrices(
+          y, sample_shape, expand_batch_dim=False)
+    if self.shift is not None:
+      y += self.shift
+    return y
+
+  def _inverse(self, y):
+    x = y
+    if self.shift is not None:
+      x -= self.shift
+    if self.scale is not None:
+      x, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
+          x, expand_batch_dim=False)
+      # Solve fails if the op is singular so we may safely skip this assertion.
+      x = self.scale.solve(x)
+      x = self._shaper.undo_make_batch_of_event_sample_matrices(
+          x, sample_shape, expand_batch_dim=False)
+    return x
+
+  def _inverse_log_det_jacobian(self, y):
+    return -self._forward_log_det_jacobian(y)
+
+  def _forward_log_det_jacobian(self, x):  # pylint: disable=unused-argument
+    if self.scale is None:
+      return constant_op.constant(0, dtype=x.dtype.base_dtype)
+    with ops.control_dependencies(self._maybe_collect_assertions() if
+                                  self.validate_args else []):
+      return self.scale.log_abs_determinant()
+
+  def _maybe_collect_assertions(self):
+    try:
+      return [self.scale.assert_non_singular()]
+    except NotImplementedError:
+      pass
+    return []
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py
deleted file mode 100644
index 89043b1410..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py
+++ /dev/null
@@ -1,231 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""AffineLinearOperator bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops.distributions import bijector
-from tensorflow.python.ops.linalg import linear_operator
-
-
-__all__ = [
-    "AffineLinearOperator",
-]
-
-
-class AffineLinearOperator(bijector.Bijector):
-  """Compute `Y = g(X; shift, scale) = scale @ X + shift`.
-
-  `shift` is a numeric `Tensor` and `scale` is a `LinearOperator`.
-
-  If `X` is a scalar then the forward transformation is: `scale * X + shift`
-  where `*` denotes the scalar product.
-
-  Note: we don't always simply transpose `X` (but write it this way for
-  brevity). Actually the input `X` undergoes the following transformation
-  before being premultiplied by `scale`:
-
-  1. If there are no sample dims, we call `X = tf.expand_dims(X, 0)`, i.e.,
-     `new_sample_shape = [1]`. Otherwise do nothing.
-  2. The sample shape is flattened to have one dimension, i.e.,
-     `new_sample_shape = [n]` where `n = tf.reduce_prod(old_sample_shape)`.
-  3. The sample dim is cyclically rotated left by 1, i.e.,
-     `new_shape = [B1,...,Bb, k, n]` where `n` is as above, `k` is the
-     event_shape, and `B1,...,Bb` are the batch shapes for each of `b` batch
-     dimensions.
-
-  (For more details see `shape.make_batch_of_event_sample_matrices`.)
-
-  The result of the above transformation is that `X` can be regarded as a batch
-  of matrices where each column is a draw from the distribution. After
-  premultiplying by `scale`, we take the inverse of this procedure. The input
-  `Y` also undergoes the same transformation before/after premultiplying by
-  `inv(scale)`.
-
-  Example Use:
-
-  ```python
-  linalg = tf.linalg
-
-  x = [1., 2, 3]
-
-  shift = [-1., 0., 1]
-  diag = [1., 2, 3]
-  scale = linalg.LinearOperatorDiag(diag)
-  affine = AffineLinearOperator(shift, scale)
-  # In this case, `forward` is equivalent to:
-  # y = scale @ x + shift
-  y = affine.forward(x)  # [0., 4, 10]
-
-  shift = [2., 3, 1]
-  tril = [[1., 0, 0],
-          [2, 1, 0],
-          [3, 2, 1]]
-  scale = linalg.LinearOperatorLowerTriangular(tril)
-  affine = AffineLinearOperator(shift, scale)
-  # In this case, `forward` is equivalent to:
-  # np.squeeze(np.matmul(tril, np.expand_dims(x, -1)), -1) + shift
-  y = affine.forward(x)  # [3., 7, 11]
-  ```
-
-  """
-
-  def __init__(self,
-               shift=None,
-               scale=None,
-               event_ndims=1,
-               validate_args=False,
-               name="affine_linear_operator"):
-    """Instantiates the `AffineLinearOperator` bijector.
-
-    Args:
-      shift: Floating-point `Tensor`.
-      scale:  Subclass of `LinearOperator`. Represents the (batch) positive
-        definite matrix `M` in `R^{k x k}`.
-      event_ndims: Scalar `integer` `Tensor` indicating the number of dimensions
-        associated with a particular draw from the distribution. Must be 0 or 1.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-
-    Raises:
-      ValueError: if `event_ndims` is not 0 or 1.
-      TypeError: if `scale` is not a `LinearOperator`.
-      TypeError: if `shift.dtype` does not match `scale.dtype`.
-      ValueError: if not `scale.is_non_singular`.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-    graph_parents = []
-    with self._name_scope("init", values=[shift]):
-      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
-      if tensor_util.constant_value(event_ndims) is not None:
-        event_ndims = tensor_util.constant_value(event_ndims)
-        if event_ndims not in (0, 1):
-          raise ValueError("event_ndims({}) was not 0 or 1".format(event_ndims))
-      else:
-        if validate_args:
-          # Shape tool will catch if event_ndims is negative.
-          event_ndims = control_flow_ops.with_dependencies(
-              [check_ops.assert_less(
-                  event_ndims, 2, message="event_ndims must be 0 or 1")],
-              event_ndims)
-        graph_parents += [event_ndims]
-
-      # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`.
-      dtype = dtypes.float32
-
-      if shift is not None:
-        shift = ops.convert_to_tensor(shift, name="shift")
-        graph_parents += [shift]
-        dtype = shift.dtype.base_dtype
-      self._shift = shift
-
-      if scale is not None:
-        if (shift is not None and
-            shift.dtype.base_dtype != scale.dtype.base_dtype):
-          raise TypeError(
-              "shift.dtype({}) is incompatible with scale.dtype({}).".format(
-                  shift.dtype, scale.dtype))
-        if not isinstance(scale, linear_operator.LinearOperator):
-          raise TypeError("scale is not an instance of tf.LinearOperator")
-        if validate_args and not scale.is_non_singular:
-          raise ValueError("Scale matrix must be non-singular.")
-        graph_parents += scale.graph_parents
-        if scale.tensor_rank is not None:
-          batch_ndims = scale.tensor_rank - 2
-        else:
-          batch_ndims = scale.tensor_rank_tensor() - 2
-          graph_parents += [batch_ndims]
-        if scale.dtype is not None:
-          dtype = scale.dtype.base_dtype
-      else:
-        batch_ndims = 0  # We won't need shape inference when scale is None.
-      self._scale = scale
-      self._shaper = _DistributionShape(
-          batch_ndims=batch_ndims,
-          event_ndims=event_ndims,
-          validate_args=validate_args)
-      super(AffineLinearOperator, self).__init__(
-          event_ndims=event_ndims,
-          graph_parents=graph_parents,
-          is_constant_jacobian=True,
-          dtype=dtype,
-          validate_args=validate_args,
-          name=name)
-
-  @property
-  def shift(self):
-    """The `shift` `Tensor` in `Y = scale @ X + shift`."""
-    return self._shift
-
-  @property
-  def scale(self):
-    """The `scale` `LinearOperator` in `Y = scale @ X + shift`."""
-    return self._scale
-
-  def _forward(self, x):
-    y = x
-    if self.scale is not None:
-      y, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
-          y, expand_batch_dim=False)
-      with ops.control_dependencies(self._maybe_collect_assertions() if
-                                    self.validate_args else []):
-        y = self.scale.matmul(y)
-      y = self._shaper.undo_make_batch_of_event_sample_matrices(
-          y, sample_shape, expand_batch_dim=False)
-    if self.shift is not None:
-      y += self.shift
-    return y
-
-  def _inverse(self, y):
-    x = y
-    if self.shift is not None:
-      x -= self.shift
-    if self.scale is not None:
-      x, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
-          x, expand_batch_dim=False)
-      # Solve fails if the op is singular so we may safely skip this assertion.
-      x = self.scale.solve(x)
-      x = self._shaper.undo_make_batch_of_event_sample_matrices(
-          x, sample_shape, expand_batch_dim=False)
-    return x
-
-  def _inverse_log_det_jacobian(self, y):
-    return -self._forward_log_det_jacobian(y)
-
-  def _forward_log_det_jacobian(self, x):  # pylint: disable=unused-argument
-    if self.scale is None:
-      return constant_op.constant(0, dtype=x.dtype.base_dtype)
-    with ops.control_dependencies(self._maybe_collect_assertions() if
-                                  self.validate_args else []):
-      return self.scale.log_abs_determinant()
-
-  def _maybe_collect_assertions(self):
-    try:
-      return [self.scale.assert_non_singular()]
-    except NotImplementedError:
-      pass
-    return []
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/chain.py b/tensorflow/contrib/distributions/python/ops/bijectors/chain.py
index 0db10fb75c..3ce7c26213 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/chain.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/chain.py
@@ -18,12 +18,151 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.chain_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import itertools
 
-_allowed_symbols = ["Chain"]
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops.distributions import bijector
 
-remove_undocumented(__name__, _allowed_symbols)
+
+__all__ = [
+    "Chain",
+]
+
+
+class Chain(bijector.Bijector):
+  """Bijector which applies a sequence of bijectors.
+
+  Example Use:
+
+  ```python
+  chain = Chain([Exp(), Softplus()], name="one_plus_exp")
+  ```
+
+  Results in:
+
+  * Forward:
+
+   ```python
+   exp = Exp()
+   softplus = Softplus()
+   Chain([exp, softplus]).forward(x)
+   = exp.forward(softplus.forward(x))
+   = tf.exp(tf.log(1. + tf.exp(x)))
+   = 1. + tf.exp(x)
+   ```
+
+  * Inverse:
+
+   ```python
+   exp = Exp()
+   softplus = Softplus()
+   Chain([exp, softplus]).inverse(y)
+   = softplus.inverse(exp.inverse(y))
+   = tf.log(tf.exp(tf.log(y)) - 1.)
+   = tf.log(y - 1.)
+   ```
+
+  """
+
+  def __init__(self, bijectors=None, validate_args=False, name=None):
+    """Instantiates `Chain` bijector.
+
+    Args:
+      bijectors: Python `list` of bijector instances. An empty list makes this
+        bijector equivalent to the `Identity` bijector.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str`, name given to ops managed by this object. Default:
+        E.g., `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`.
+
+    Raises:
+      ValueError: if bijectors have different dtypes.
+    """
+    if bijectors is None:
+      bijectors = ()
+    self._bijectors = bijectors
+
+    for a_bijector in bijectors:
+      if not a_bijector._is_injective:  # pylint: disable=protected-access
+        raise NotImplementedError(
+            "Invert is not implemented for non-injective bijector ({})".format(
+                a_bijector.name))
+
+    dtype = list(set([b.dtype for b in bijectors]))
+    if len(dtype) > 2:
+      raise ValueError("incompatible dtypes: %s" % dtype)
+    elif len(dtype) == 2:
+      dtype = dtype[1] if dtype[0] is None else dtype[0]
+      event_ndims = bijectors[0].event_ndims
+    elif len(dtype) == 1:
+      dtype = dtype[0]
+      event_ndims = bijectors[0].event_ndims
+    else:
+      dtype = None
+      event_ndims = None
+
+    super(Chain, self).__init__(
+        graph_parents=list(itertools.chain.from_iterable(
+            b.graph_parents for b in bijectors)),
+        is_constant_jacobian=all(b.is_constant_jacobian for b in bijectors),
+        validate_args=validate_args,
+        dtype=dtype,
+        event_ndims=event_ndims,
+        name=name or ("identity" if not bijectors else
+                      "_of_".join(["chain"] + [b.name for b in bijectors])))
+
+  @property
+  def bijectors(self):
+    return self._bijectors
+
+  def _shape_helper(self, func_name, input_shape, reverse):
+    new_shape = input_shape
+    for b in reversed(self.bijectors) if reverse else self.bijectors:
+      func = getattr(b, func_name, None)
+      if func is None:
+        raise ValueError("unable to call %s on bijector %s (%s)" %
+                         (func_name, b.name, func))
+      new_shape = func(new_shape)
+    return new_shape
+
+  def _forward_event_shape(self, input_shape):
+    return self._shape_helper("forward_event_shape", input_shape,
+                              reverse=True)
+
+  def _forward_event_shape_tensor(self, input_shape):
+    return self._shape_helper(
+        "forward_event_shape_tensor", input_shape, reverse=True)
+
+  def _inverse_event_shape(self, output_shape):
+    return self._shape_helper("inverse_event_shape", output_shape,
+                              reverse=False)
+
+  def _inverse_event_shape_tensor(self, output_shape):
+    return self._shape_helper("inverse_event_shape_tensor", output_shape,
+                              reverse=False)
+
+  def _inverse(self, y, **kwargs):
+    for b in self.bijectors:
+      y = b.inverse(y, **kwargs.get(b.name, {}))
+    return y
+
+  def _inverse_log_det_jacobian(self, y, **kwargs):
+    ildj = constant_op.constant(0., dtype=y.dtype,
+                                name="inverse_log_det_jacobian")
+    for b in self.bijectors:
+      ildj += b.inverse_log_det_jacobian(y, **kwargs.get(b.name, {}))
+      y = b.inverse(y, **kwargs.get(b.name, {}))
+    return ildj
+
+  def _forward(self, x, **kwargs):
+    for b in reversed(self.bijectors):
+      x = b.forward(x, **kwargs.get(b.name, {}))
+    return x
+
+  def _forward_log_det_jacobian(self, x, **kwargs):
+    fldj = constant_op.constant(0., dtype=x.dtype,
+                                name="forward_log_det_jacobian")
+    for b in reversed(self.bijectors):
+      fldj += b.forward_log_det_jacobian(x, **kwargs.get(b.name, {}))
+      x = b.forward(x, **kwargs.get(b.name, {}))
+    return fldj
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/chain_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/chain_impl.py
deleted file mode 100644
index 3ce7c26213..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/chain_impl.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Chain bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import itertools
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "Chain",
-]
-
-
-class Chain(bijector.Bijector):
-  """Bijector which applies a sequence of bijectors.
-
-  Example Use:
-
-  ```python
-  chain = Chain([Exp(), Softplus()], name="one_plus_exp")
-  ```
-
-  Results in:
-
-  * Forward:
-
-   ```python
-   exp = Exp()
-   softplus = Softplus()
-   Chain([exp, softplus]).forward(x)
-   = exp.forward(softplus.forward(x))
-   = tf.exp(tf.log(1. + tf.exp(x)))
-   = 1. + tf.exp(x)
-   ```
-
-  * Inverse:
-
-   ```python
-   exp = Exp()
-   softplus = Softplus()
-   Chain([exp, softplus]).inverse(y)
-   = softplus.inverse(exp.inverse(y))
-   = tf.log(tf.exp(tf.log(y)) - 1.)
-   = tf.log(y - 1.)
-   ```
-
-  """
-
-  def __init__(self, bijectors=None, validate_args=False, name=None):
-    """Instantiates `Chain` bijector.
-
-    Args:
-      bijectors: Python `list` of bijector instances. An empty list makes this
-        bijector equivalent to the `Identity` bijector.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str`, name given to ops managed by this object. Default:
-        E.g., `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`.
-
-    Raises:
-      ValueError: if bijectors have different dtypes.
-    """
-    if bijectors is None:
-      bijectors = ()
-    self._bijectors = bijectors
-
-    for a_bijector in bijectors:
-      if not a_bijector._is_injective:  # pylint: disable=protected-access
-        raise NotImplementedError(
-            "Invert is not implemented for non-injective bijector ({})".format(
-                a_bijector.name))
-
-    dtype = list(set([b.dtype for b in bijectors]))
-    if len(dtype) > 2:
-      raise ValueError("incompatible dtypes: %s" % dtype)
-    elif len(dtype) == 2:
-      dtype = dtype[1] if dtype[0] is None else dtype[0]
-      event_ndims = bijectors[0].event_ndims
-    elif len(dtype) == 1:
-      dtype = dtype[0]
-      event_ndims = bijectors[0].event_ndims
-    else:
-      dtype = None
-      event_ndims = None
-
-    super(Chain, self).__init__(
-        graph_parents=list(itertools.chain.from_iterable(
-            b.graph_parents for b in bijectors)),
-        is_constant_jacobian=all(b.is_constant_jacobian for b in bijectors),
-        validate_args=validate_args,
-        dtype=dtype,
-        event_ndims=event_ndims,
-        name=name or ("identity" if not bijectors else
-                      "_of_".join(["chain"] + [b.name for b in bijectors])))
-
-  @property
-  def bijectors(self):
-    return self._bijectors
-
-  def _shape_helper(self, func_name, input_shape, reverse):
-    new_shape = input_shape
-    for b in reversed(self.bijectors) if reverse else self.bijectors:
-      func = getattr(b, func_name, None)
-      if func is None:
-        raise ValueError("unable to call %s on bijector %s (%s)" %
-                         (func_name, b.name, func))
-      new_shape = func(new_shape)
-    return new_shape
-
-  def _forward_event_shape(self, input_shape):
-    return self._shape_helper("forward_event_shape", input_shape,
-                              reverse=True)
-
-  def _forward_event_shape_tensor(self, input_shape):
-    return self._shape_helper(
-        "forward_event_shape_tensor", input_shape, reverse=True)
-
-  def _inverse_event_shape(self, output_shape):
-    return self._shape_helper("inverse_event_shape", output_shape,
-                              reverse=False)
-
-  def _inverse_event_shape_tensor(self, output_shape):
-    return self._shape_helper("inverse_event_shape_tensor", output_shape,
-                              reverse=False)
-
-  def _inverse(self, y, **kwargs):
-    for b in self.bijectors:
-      y = b.inverse(y, **kwargs.get(b.name, {}))
-    return y
-
-  def _inverse_log_det_jacobian(self, y, **kwargs):
-    ildj = constant_op.constant(0., dtype=y.dtype,
-                                name="inverse_log_det_jacobian")
-    for b in self.bijectors:
-      ildj += b.inverse_log_det_jacobian(y, **kwargs.get(b.name, {}))
-      y = b.inverse(y, **kwargs.get(b.name, {}))
-    return ildj
-
-  def _forward(self, x, **kwargs):
-    for b in reversed(self.bijectors):
-      x = b.forward(x, **kwargs.get(b.name, {}))
-    return x
-
-  def _forward_log_det_jacobian(self, x, **kwargs):
-    fldj = constant_op.constant(0., dtype=x.dtype,
-                                name="forward_log_det_jacobian")
-    for b in reversed(self.bijectors):
-      fldj += b.forward_log_det_jacobian(x, **kwargs.get(b.name, {}))
-      x = b.forward(x, **kwargs.get(b.name, {}))
-    return fldj
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
index 4686af8bc4..cbd60f92a6 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
@@ -18,12 +18,219 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.cholesky_outer_product_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = ["CholeskyOuterProduct"]
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import linalg_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
+from tensorflow.python.ops.distributions import util as distribution_util
 
-remove_undocumented(__name__, _allowed_symbols)
+
+__all__ = [
+    "CholeskyOuterProduct",
+]
+
+
+class CholeskyOuterProduct(bijector.Bijector):
+  """Compute `g(X) = X @ X.T`; X is lower-triangular, positive-diagonal matrix.
+
+  `event_ndims` must be 0 or 2, i.e., scalar or matrix.
+
+  Note: the upper-triangular part of X is ignored (whether or not its zero).
+
+  The surjectivity of g as a map from  the set of n x n positive-diagonal
+  lower-triangular matrices to the set of SPD matrices follows immediately from
+  executing the Cholesky factorization algorithm on an SPD matrix A to produce a
+  positive-diagonal lower-triangular matrix L such that `A = L @ L.T`.
+
+  To prove the injectivity of g, suppose that L_1 and L_2 are lower-triangular
+  with positive diagonals and satisfy `A = L_1 @ L_1.T = L_2 @ L_2.T`. Then
+    `inv(L_1) @ A @ inv(L_1).T = [inv(L_1) @ L_2] @ [inv(L_1) @ L_2].T = I`.
+  Setting `L_3 := inv(L_1) @ L_2`, that L_3 is a positive-diagonal
+  lower-triangular matrix follows from `inv(L_1)` being positive-diagonal
+  lower-triangular (which follows from the diagonal of a triangular matrix being
+  its spectrum), and that the product of two positive-diagonal lower-triangular
+  matrices is another positive-diagonal lower-triangular matrix.
+
+  A simple inductive argument (proceding one column of L_3 at a time) shows
+  that, if `I = L_3 @ L_3.T`, with L_3 being lower-triangular with positive-
+  diagonal, then `L_3 = I`. Thus, `L_1 = L_2`, proving injectivity of g.
+
+  Examples:
+
+  ```python
+  bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]])
+  # Result: [[1., 2], [2, 5]], i.e., x @ x.T
+
+  bijector.CholeskyOuterProduct(event_ndims=2).inverse(y=[[1., 2], [2, 5]])
+  # Result: [[1., 0], [2, 1]], i.e., cholesky(y).
+  ```
+
+  """
+
+  def __init__(self, event_ndims=2, validate_args=False,
+               name="cholesky_outer_product"):
+    """Instantiates the `CholeskyOuterProduct` bijector.
+
+    Args:
+      event_ndims: `constant` `int32` scalar `Tensor` indicating the number of
+        dimensions associated with a particular draw from the distribution. Must
+        be 0 or 2.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+
+    Raises:
+      ValueError: if event_ndims is neither 0 or 2.
+    """
+    self._graph_parents = []
+    self._name = name
+    with self._name_scope("init", values=[event_ndims]):
+      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+      event_ndims = tensor_util.constant_value(event_ndims)
+    if event_ndims is None or event_ndims not in [0, 2]:
+      raise ValueError("`event_ndims` must be a TF constant which is 0 or 2")
+    self._static_event_ndims = event_ndims
+    super(CholeskyOuterProduct, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  def _forward(self, x):
+    if self._static_event_ndims == 0:
+      return math_ops.square(x)
+    if self.validate_args:
+      is_matrix = check_ops.assert_rank_at_least(x, 2)
+      shape = array_ops.shape(x)
+      is_square = check_ops.assert_equal(shape[-2], shape[-1])
+      x = control_flow_ops.with_dependencies([is_matrix, is_square], x)
+    # For safety, explicitly zero-out the upper triangular part.
+    x = array_ops.matrix_band_part(x, -1, 0)
+    return math_ops.matmul(x, x, adjoint_b=True)
+
+  def _inverse(self, y):
+    return (math_ops.sqrt(y) if self._static_event_ndims == 0
+            else linalg_ops.cholesky(y))
+
+  def _inverse_log_det_jacobian(self, y):
+    return -self._forward_log_det_jacobian(x=self._inverse(y))
+
+  def _forward_log_det_jacobian(self, x):
+    # Let Y be a symmetric, positive definite matrix and write:
+    #   Y = X X.T
+    # where X is lower-triangular.
+    #
+    # Observe that,
+    #   dY[i,j]/dX[a,b]
+    #   = d/dX[a,b] { X[i,:] X[j,:] }
+    #   = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] }
+    #
+    # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is
+    # symmetric and X is lower-triangular, we need vectors of dimension:
+    #   d = p (p + 1) / 2
+    # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e.,
+    #   k = { i (i + 1) / 2 + j   i>=j
+    #       { undef               i<j
+    # and assume zero-based indexes. When k is undef, the element is dropped.
+    # Example:
+    #           j      k
+    #        0 1 2 3  /
+    #    0 [ 0 . . . ]
+    # i  1 [ 1 2 . . ]
+    #    2 [ 3 4 5 . ]
+    #    3 [ 6 7 8 9 ]
+    # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With
+    # slight abuse: k(i,j)=undef means the element is dropped.)
+    #
+    # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are
+    # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b.
+    # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since:
+    # (1) j<=i<a thus i,j!=a.
+    # (2) i=a>j  thus i,j!=a.
+    #
+    # Since the Jacobian is lower-triangular, we need only compute the product
+    # of diagonal elements:
+    #   d vec[Y] / d vec[X] @[k(i,j), k(i,j)]
+    #   = X[j,j] + I[i=j] X[i,j]
+    #   = 2 X[j,j].
+    # Since there is a 2 X[j,j] term for every lower-triangular element of X we
+    # conclude:
+    #   |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
+    if self._static_event_ndims == 0:
+      if self.validate_args:
+        is_positive = check_ops.assert_positive(
+            x, message="All elements must be positive.")
+        x = control_flow_ops.with_dependencies([is_positive], x)
+      return np.log(2.) + math_ops.log(x)
+
+    diag = array_ops.matrix_diag_part(x)
+
+    # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output
+    # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the
+    # output is unchanged.
+    diag = self._make_columnar(diag)
+
+    if self.validate_args:
+      is_matrix = check_ops.assert_rank_at_least(
+          x, 2, message="Input must be a (batch of) matrix.")
+      shape = array_ops.shape(x)
+      is_square = check_ops.assert_equal(
+          shape[-2], shape[-1],
+          message="Input must be a (batch of) square matrix.")
+      # Assuming lower-triangular means we only need check diag>0.
+      is_positive_definite = check_ops.assert_positive(
+          diag, message="Input must be positive definite.")
+      x = control_flow_ops.with_dependencies(
+          [is_matrix, is_square, is_positive_definite], x)
+
+    # Create a vector equal to: [p, p-1, ..., 2, 1].
+    if x.get_shape().ndims is None or x.get_shape()[-1].value is None:
+      p_int = array_ops.shape(x)[-1]
+      p_float = math_ops.cast(p_int, dtype=x.dtype)
+    else:
+      p_int = x.get_shape()[-1].value
+      p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype)
+    exponents = math_ops.linspace(p_float, 1., p_int)
+
+    sum_weighted_log_diag = array_ops.squeeze(
+        math_ops.matmul(math_ops.log(diag),
+                        exponents[..., array_ops.newaxis]),
+        squeeze_dims=-1)
+    fldj = p_float * np.log(2.) + sum_weighted_log_diag
+
+    return fldj
+
+  def _make_columnar(self, x):
+    """Ensures non-scalar input has at least one column.
+
+    Example:
+      If `x = [1, 2, 3]` then the output is `[[1], [2], [3]]`.
+
+      If `x = [[1, 2, 3], [4, 5, 6]]` then the output is unchanged.
+
+      If `x = 1` then the output is unchanged.
+
+    Args:
+      x: `Tensor`.
+
+    Returns:
+      columnar_x: `Tensor` with at least two dimensions.
+    """
+    if x.get_shape().ndims is not None:
+      if x.get_shape().ndims == 1:
+        x = x[array_ops.newaxis, :]
+      return x
+    shape = array_ops.shape(x)
+    maybe_expanded_shape = array_ops.concat([
+        shape[:-1],
+        distribution_util.pick_vector(
+            math_ops.equal(array_ops.rank(x), 1),
+            [1], np.array([], dtype=np.int32)),
+        shape[-1:],
+    ], 0)
+    return array_ops.reshape(x, maybe_expanded_shape)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product_impl.py
deleted file mode 100644
index cbd60f92a6..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product_impl.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CholeskyOuterProduct bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import linalg_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-from tensorflow.python.ops.distributions import util as distribution_util
-
-
-__all__ = [
-    "CholeskyOuterProduct",
-]
-
-
-class CholeskyOuterProduct(bijector.Bijector):
-  """Compute `g(X) = X @ X.T`; X is lower-triangular, positive-diagonal matrix.
-
-  `event_ndims` must be 0 or 2, i.e., scalar or matrix.
-
-  Note: the upper-triangular part of X is ignored (whether or not its zero).
-
-  The surjectivity of g as a map from  the set of n x n positive-diagonal
-  lower-triangular matrices to the set of SPD matrices follows immediately from
-  executing the Cholesky factorization algorithm on an SPD matrix A to produce a
-  positive-diagonal lower-triangular matrix L such that `A = L @ L.T`.
-
-  To prove the injectivity of g, suppose that L_1 and L_2 are lower-triangular
-  with positive diagonals and satisfy `A = L_1 @ L_1.T = L_2 @ L_2.T`. Then
-    `inv(L_1) @ A @ inv(L_1).T = [inv(L_1) @ L_2] @ [inv(L_1) @ L_2].T = I`.
-  Setting `L_3 := inv(L_1) @ L_2`, that L_3 is a positive-diagonal
-  lower-triangular matrix follows from `inv(L_1)` being positive-diagonal
-  lower-triangular (which follows from the diagonal of a triangular matrix being
-  its spectrum), and that the product of two positive-diagonal lower-triangular
-  matrices is another positive-diagonal lower-triangular matrix.
-
-  A simple inductive argument (proceding one column of L_3 at a time) shows
-  that, if `I = L_3 @ L_3.T`, with L_3 being lower-triangular with positive-
-  diagonal, then `L_3 = I`. Thus, `L_1 = L_2`, proving injectivity of g.
-
-  Examples:
-
-  ```python
-  bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]])
-  # Result: [[1., 2], [2, 5]], i.e., x @ x.T
-
-  bijector.CholeskyOuterProduct(event_ndims=2).inverse(y=[[1., 2], [2, 5]])
-  # Result: [[1., 0], [2, 1]], i.e., cholesky(y).
-  ```
-
-  """
-
-  def __init__(self, event_ndims=2, validate_args=False,
-               name="cholesky_outer_product"):
-    """Instantiates the `CholeskyOuterProduct` bijector.
-
-    Args:
-      event_ndims: `constant` `int32` scalar `Tensor` indicating the number of
-        dimensions associated with a particular draw from the distribution. Must
-        be 0 or 2.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-
-    Raises:
-      ValueError: if event_ndims is neither 0 or 2.
-    """
-    self._graph_parents = []
-    self._name = name
-    with self._name_scope("init", values=[event_ndims]):
-      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
-      event_ndims = tensor_util.constant_value(event_ndims)
-    if event_ndims is None or event_ndims not in [0, 2]:
-      raise ValueError("`event_ndims` must be a TF constant which is 0 or 2")
-    self._static_event_ndims = event_ndims
-    super(CholeskyOuterProduct, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
-
-  def _forward(self, x):
-    if self._static_event_ndims == 0:
-      return math_ops.square(x)
-    if self.validate_args:
-      is_matrix = check_ops.assert_rank_at_least(x, 2)
-      shape = array_ops.shape(x)
-      is_square = check_ops.assert_equal(shape[-2], shape[-1])
-      x = control_flow_ops.with_dependencies([is_matrix, is_square], x)
-    # For safety, explicitly zero-out the upper triangular part.
-    x = array_ops.matrix_band_part(x, -1, 0)
-    return math_ops.matmul(x, x, adjoint_b=True)
-
-  def _inverse(self, y):
-    return (math_ops.sqrt(y) if self._static_event_ndims == 0
-            else linalg_ops.cholesky(y))
-
-  def _inverse_log_det_jacobian(self, y):
-    return -self._forward_log_det_jacobian(x=self._inverse(y))
-
-  def _forward_log_det_jacobian(self, x):
-    # Let Y be a symmetric, positive definite matrix and write:
-    #   Y = X X.T
-    # where X is lower-triangular.
-    #
-    # Observe that,
-    #   dY[i,j]/dX[a,b]
-    #   = d/dX[a,b] { X[i,:] X[j,:] }
-    #   = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] }
-    #
-    # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is
-    # symmetric and X is lower-triangular, we need vectors of dimension:
-    #   d = p (p + 1) / 2
-    # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e.,
-    #   k = { i (i + 1) / 2 + j   i>=j
-    #       { undef               i<j
-    # and assume zero-based indexes. When k is undef, the element is dropped.
-    # Example:
-    #           j      k
-    #        0 1 2 3  /
-    #    0 [ 0 . . . ]
-    # i  1 [ 1 2 . . ]
-    #    2 [ 3 4 5 . ]
-    #    3 [ 6 7 8 9 ]
-    # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With
-    # slight abuse: k(i,j)=undef means the element is dropped.)
-    #
-    # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are
-    # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b.
-    # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since:
-    # (1) j<=i<a thus i,j!=a.
-    # (2) i=a>j  thus i,j!=a.
-    #
-    # Since the Jacobian is lower-triangular, we need only compute the product
-    # of diagonal elements:
-    #   d vec[Y] / d vec[X] @[k(i,j), k(i,j)]
-    #   = X[j,j] + I[i=j] X[i,j]
-    #   = 2 X[j,j].
-    # Since there is a 2 X[j,j] term for every lower-triangular element of X we
-    # conclude:
-    #   |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
-    if self._static_event_ndims == 0:
-      if self.validate_args:
-        is_positive = check_ops.assert_positive(
-            x, message="All elements must be positive.")
-        x = control_flow_ops.with_dependencies([is_positive], x)
-      return np.log(2.) + math_ops.log(x)
-
-    diag = array_ops.matrix_diag_part(x)
-
-    # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output
-    # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the
-    # output is unchanged.
-    diag = self._make_columnar(diag)
-
-    if self.validate_args:
-      is_matrix = check_ops.assert_rank_at_least(
-          x, 2, message="Input must be a (batch of) matrix.")
-      shape = array_ops.shape(x)
-      is_square = check_ops.assert_equal(
-          shape[-2], shape[-1],
-          message="Input must be a (batch of) square matrix.")
-      # Assuming lower-triangular means we only need check diag>0.
-      is_positive_definite = check_ops.assert_positive(
-          diag, message="Input must be positive definite.")
-      x = control_flow_ops.with_dependencies(
-          [is_matrix, is_square, is_positive_definite], x)
-
-    # Create a vector equal to: [p, p-1, ..., 2, 1].
-    if x.get_shape().ndims is None or x.get_shape()[-1].value is None:
-      p_int = array_ops.shape(x)[-1]
-      p_float = math_ops.cast(p_int, dtype=x.dtype)
-    else:
-      p_int = x.get_shape()[-1].value
-      p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype)
-    exponents = math_ops.linspace(p_float, 1., p_int)
-
-    sum_weighted_log_diag = array_ops.squeeze(
-        math_ops.matmul(math_ops.log(diag),
-                        exponents[..., array_ops.newaxis]),
-        squeeze_dims=-1)
-    fldj = p_float * np.log(2.) + sum_weighted_log_diag
-
-    return fldj
-
-  def _make_columnar(self, x):
-    """Ensures non-scalar input has at least one column.
-
-    Example:
-      If `x = [1, 2, 3]` then the output is `[[1], [2], [3]]`.
-
-      If `x = [[1, 2, 3], [4, 5, 6]]` then the output is unchanged.
-
-      If `x = 1` then the output is unchanged.
-
-    Args:
-      x: `Tensor`.
-
-    Returns:
-      columnar_x: `Tensor` with at least two dimensions.
-    """
-    if x.get_shape().ndims is not None:
-      if x.get_shape().ndims == 1:
-        x = x[array_ops.newaxis, :]
-      return x
-    shape = array_ops.shape(x)
-    maybe_expanded_shape = array_ops.concat([
-        shape[:-1],
-        distribution_util.pick_vector(
-            math_ops.equal(array_ops.rank(x), 1),
-            [1], np.array([], dtype=np.int32)),
-        shape[-1:],
-    ], 0)
-    return array_ops.reshape(x, maybe_expanded_shape)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py b/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py
index d254b635d2..ccb1f02927 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py
@@ -18,12 +18,38 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.conditional_bijector_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.ops.distributions import bijector
+from tensorflow.python.ops.distributions import util as distribution_util
 
-_allowed_symbols = ["ConditionalBijector"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = ["ConditionalBijector"]
+
+
+class ConditionalBijector(bijector.Bijector):
+  """Conditional Bijector is a Bijector that allows intrinsic conditioning."""
+
+  @distribution_util.AppendDocstring(kwargs_dict={
+      "**condition_kwargs":
+      "Named arguments forwarded to subclass implementation."})
+  def forward(self, x, name="forward", **condition_kwargs):
+    return self._call_forward(x, name, **condition_kwargs)
+
+  @distribution_util.AppendDocstring(kwargs_dict={
+      "**condition_kwargs":
+      "Named arguments forwarded to subclass implementation."})
+  def inverse(self, y, name="inverse", **condition_kwargs):
+    return self._call_inverse(y, name, **condition_kwargs)
+
+  @distribution_util.AppendDocstring(kwargs_dict={
+      "**condition_kwargs":
+      "Named arguments forwarded to subclass implementation."})
+  def inverse_log_det_jacobian(
+      self, y, name="inverse_log_det_jacobian", **condition_kwargs):
+    return self._call_inverse_log_det_jacobian(y, name, **condition_kwargs)
+
+  @distribution_util.AppendDocstring(kwargs_dict={
+      "**condition_kwargs":
+      "Named arguments forwarded to subclass implementation."})
+  def forward_log_det_jacobian(
+      self, x, name="forward_log_det_jacobian", **condition_kwargs):
+    return self._call_forward_log_det_jacobian(x, name, **condition_kwargs)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector_impl.py
deleted file mode 100644
index ccb1f02927..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector_impl.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""ConditionalBijector base."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.ops.distributions import bijector
-from tensorflow.python.ops.distributions import util as distribution_util
-
-
-__all__ = ["ConditionalBijector"]
-
-
-class ConditionalBijector(bijector.Bijector):
-  """Conditional Bijector is a Bijector that allows intrinsic conditioning."""
-
-  @distribution_util.AppendDocstring(kwargs_dict={
-      "**condition_kwargs":
-      "Named arguments forwarded to subclass implementation."})
-  def forward(self, x, name="forward", **condition_kwargs):
-    return self._call_forward(x, name, **condition_kwargs)
-
-  @distribution_util.AppendDocstring(kwargs_dict={
-      "**condition_kwargs":
-      "Named arguments forwarded to subclass implementation."})
-  def inverse(self, y, name="inverse", **condition_kwargs):
-    return self._call_inverse(y, name, **condition_kwargs)
-
-  @distribution_util.AppendDocstring(kwargs_dict={
-      "**condition_kwargs":
-      "Named arguments forwarded to subclass implementation."})
-  def inverse_log_det_jacobian(
-      self, y, name="inverse_log_det_jacobian", **condition_kwargs):
-    return self._call_inverse_log_det_jacobian(y, name, **condition_kwargs)
-
-  @distribution_util.AppendDocstring(kwargs_dict={
-      "**condition_kwargs":
-      "Named arguments forwarded to subclass implementation."})
-  def forward_log_det_jacobian(
-      self, x, name="forward_log_det_jacobian", **condition_kwargs):
-    return self._call_forward_log_det_jacobian(x, name, **condition_kwargs)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/exp.py b/tensorflow/contrib/distributions/python/ops/bijectors/exp.py
index 399d713098..b1ff840d62 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/exp.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/exp.py
@@ -18,12 +18,49 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.exp_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.contrib.distributions.python.ops.bijectors import power_transform
 
-_allowed_symbols = ["Exp"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Exp",
+]
+
+
+class Exp(power_transform.PowerTransform):
+  """Compute `Y = g(X) = exp(X)`.
+
+    Example Use:
+
+    ```python
+    # Create the Y=g(X)=exp(X) transform which works only on Tensors with 1
+    # batch ndim and 2 event ndims (i.e., vector of matrices).
+    exp = Exp(event_ndims=2)
+    x = [[[1., 2],
+           [3, 4]],
+          [[5, 6],
+           [7, 8]]]
+    exp(x) == exp.forward(x)
+    log(x) == exp.inverse(x)
+    ```
+
+    Note: the exp(.) is applied element-wise but the Jacobian is a reduction
+    over the event space.
+  """
+
+  def __init__(self,
+               event_ndims=0,
+               validate_args=False,
+               name="exp"):
+    """Instantiates the `Exp` bijector.
+
+    Args:
+      event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions
+        associated with a particular draw from the distribution.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+    """
+    super(Exp, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/exp_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/exp_impl.py
deleted file mode 100644
index b1ff840d62..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/exp_impl.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Exp bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.distributions.python.ops.bijectors import power_transform
-
-
-__all__ = [
-    "Exp",
-]
-
-
-class Exp(power_transform.PowerTransform):
-  """Compute `Y = g(X) = exp(X)`.
-
-    Example Use:
-
-    ```python
-    # Create the Y=g(X)=exp(X) transform which works only on Tensors with 1
-    # batch ndim and 2 event ndims (i.e., vector of matrices).
-    exp = Exp(event_ndims=2)
-    x = [[[1., 2],
-           [3, 4]],
-          [[5, 6],
-           [7, 8]]]
-    exp(x) == exp.forward(x)
-    log(x) == exp.inverse(x)
-    ```
-
-    Note: the exp(.) is applied element-wise but the Jacobian is a reduction
-    over the event space.
-  """
-
-  def __init__(self,
-               event_ndims=0,
-               validate_args=False,
-               name="exp"):
-    """Instantiates the `Exp` bijector.
-
-    Args:
-      event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions
-        associated with a particular draw from the distribution.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-    """
-    super(Exp, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py b/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py
index cf37aa5111..67f3978556 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py
@@ -18,12 +18,107 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.gumbel_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["Gumbel"]
+__all__ = [
+    "Gumbel",
+]
 
-remove_undocumented(__name__, _allowed_symbols)
+
+class Gumbel(bijector.Bijector):
+  """Compute `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
+
+  This bijector maps inputs from `[-inf, inf]` to [0, 1]`. The inverse of the
+  bijector applied to a uniform random variable `X ~ U(0, 1) gives back a
+  random variable with the
+  [Gumbel distribution](https://en.wikipedia.org/wiki/Gumbel_distribution):
+
+  ```none
+  Y ~ Gumbel(loc, scale)
+  pdf(y; loc, scale) = exp(
+    -( (y - loc) / scale + exp(- (y - loc) / scale) ) ) / scale
+  ```
+  """
+
+  def __init__(self,
+               loc=0.,
+               scale=1.,
+               event_ndims=0,
+               validate_args=False,
+               name="gumbel"):
+    """Instantiates the `Gumbel` bijector.
+
+    Args:
+      loc: Float-like `Tensor` that is the same dtype and is
+        broadcastable with `scale`.
+        This is `loc` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
+      scale: Positive Float-like `Tensor` that is the same dtype and is
+        broadcastable with `loc`.
+        This is `scale` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
+      event_ndims: Python scalar indicating the number of dimensions associated
+        with a particular draw from the distribution.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+    with self._name_scope("init", values=[loc, scale]):
+      self._loc = ops.convert_to_tensor(loc, name="loc")
+      self._scale = ops.convert_to_tensor(scale, name="scale")
+      check_ops.assert_same_float_dtype([self._loc, self._scale])
+      if validate_args:
+        self._scale = control_flow_ops.with_dependencies([
+            check_ops.assert_positive(
+                self._scale, message="Argument scale was not positive")
+        ], self._scale)
+
+    super(Gumbel, self).__init__(
+        event_ndims=event_ndims, validate_args=validate_args, name=name)
+
+  @property
+  def loc(self):
+    """The `loc` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`."""
+    return self._loc
+
+  @property
+  def scale(self):
+    """This is `scale` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`."""
+    return self._scale
+
+  def _forward(self, x):
+    z = (x - self.loc) / self.scale
+    return math_ops.exp(-math_ops.exp(-z))
+
+  def _inverse(self, y):
+    y = self._maybe_assert_valid_y(y)
+    return self.loc - self.scale * math_ops.log(-math_ops.log(y))
+
+  def _inverse_log_det_jacobian(self, y):
+    y = self._maybe_assert_valid_y(y)
+    event_dims = self._event_dims_tensor(y)
+    return math_ops.reduce_sum(
+        math_ops.log(self.scale / (-math_ops.log(y) * y)), axis=event_dims)
+
+  def _forward_log_det_jacobian(self, x):
+    event_dims = self._event_dims_tensor(x)
+    z = (x - self.loc) / self.scale
+    return math_ops.reduce_sum(
+        -z - math_ops.exp(-z) - math_ops.log(self.scale), axis=event_dims)
+
+  def _maybe_assert_valid_y(self, y):
+    if not self.validate_args:
+      return y
+    is_positive = check_ops.assert_non_negative(
+        y, message="Inverse transformation input must be greater than 0.")
+    less_than_one = check_ops.assert_less_equal(
+        y,
+        constant_op.constant(1., y.dtype),
+        message="Inverse transformation input must be less than or equal to 1.")
+    return control_flow_ops.with_dependencies([is_positive, less_than_one], y)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/gumbel_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/gumbel_impl.py
deleted file mode 100644
index 67f3978556..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/gumbel_impl.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Gumbel bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-__all__ = [
-    "Gumbel",
-]
-
-
-class Gumbel(bijector.Bijector):
-  """Compute `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
-
-  This bijector maps inputs from `[-inf, inf]` to [0, 1]`. The inverse of the
-  bijector applied to a uniform random variable `X ~ U(0, 1) gives back a
-  random variable with the
-  [Gumbel distribution](https://en.wikipedia.org/wiki/Gumbel_distribution):
-
-  ```none
-  Y ~ Gumbel(loc, scale)
-  pdf(y; loc, scale) = exp(
-    -( (y - loc) / scale + exp(- (y - loc) / scale) ) ) / scale
-  ```
-  """
-
-  def __init__(self,
-               loc=0.,
-               scale=1.,
-               event_ndims=0,
-               validate_args=False,
-               name="gumbel"):
-    """Instantiates the `Gumbel` bijector.
-
-    Args:
-      loc: Float-like `Tensor` that is the same dtype and is
-        broadcastable with `scale`.
-        This is `loc` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
-      scale: Positive Float-like `Tensor` that is the same dtype and is
-        broadcastable with `loc`.
-        This is `scale` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
-      event_ndims: Python scalar indicating the number of dimensions associated
-        with a particular draw from the distribution.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-    with self._name_scope("init", values=[loc, scale]):
-      self._loc = ops.convert_to_tensor(loc, name="loc")
-      self._scale = ops.convert_to_tensor(scale, name="scale")
-      check_ops.assert_same_float_dtype([self._loc, self._scale])
-      if validate_args:
-        self._scale = control_flow_ops.with_dependencies([
-            check_ops.assert_positive(
-                self._scale, message="Argument scale was not positive")
-        ], self._scale)
-
-    super(Gumbel, self).__init__(
-        event_ndims=event_ndims, validate_args=validate_args, name=name)
-
-  @property
-  def loc(self):
-    """The `loc` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`."""
-    return self._loc
-
-  @property
-  def scale(self):
-    """This is `scale` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`."""
-    return self._scale
-
-  def _forward(self, x):
-    z = (x - self.loc) / self.scale
-    return math_ops.exp(-math_ops.exp(-z))
-
-  def _inverse(self, y):
-    y = self._maybe_assert_valid_y(y)
-    return self.loc - self.scale * math_ops.log(-math_ops.log(y))
-
-  def _inverse_log_det_jacobian(self, y):
-    y = self._maybe_assert_valid_y(y)
-    event_dims = self._event_dims_tensor(y)
-    return math_ops.reduce_sum(
-        math_ops.log(self.scale / (-math_ops.log(y) * y)), axis=event_dims)
-
-  def _forward_log_det_jacobian(self, x):
-    event_dims = self._event_dims_tensor(x)
-    z = (x - self.loc) / self.scale
-    return math_ops.reduce_sum(
-        -z - math_ops.exp(-z) - math_ops.log(self.scale), axis=event_dims)
-
-  def _maybe_assert_valid_y(self, y):
-    if not self.validate_args:
-      return y
-    is_positive = check_ops.assert_non_negative(
-        y, message="Inverse transformation input must be greater than 0.")
-    less_than_one = check_ops.assert_less_equal(
-        y,
-        constant_op.constant(1., y.dtype),
-        message="Inverse transformation input must be less than or equal to 1.")
-    return control_flow_ops.with_dependencies([is_positive, less_than_one], y)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/inline.py b/tensorflow/contrib/distributions/python/ops/bijectors/inline.py
index db10c3fc3a..fab1b22fbf 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/inline.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/inline.py
@@ -18,12 +18,124 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.inline_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["Inline"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Inline",
+]
+
+
+class Inline(bijector.Bijector):
+  """Bijector constructed from custom callables.
+
+  Example Use:
+
+  ```python
+  exp = Inline(
+    forward_fn=tf.exp,
+    inverse_fn=tf.log,
+    inverse_log_det_jacobian_fn=(
+      lambda y: -tf.reduce_sum(tf.log(y), axis=-1)),
+    name="exp")
+  ```
+
+  The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
+  """
+
+  def __init__(self,
+               forward_fn=None,
+               inverse_fn=None,
+               inverse_log_det_jacobian_fn=None,
+               forward_log_det_jacobian_fn=None,
+               forward_event_shape_fn=None,
+               forward_event_shape_tensor_fn=None,
+               inverse_event_shape_fn=None,
+               inverse_event_shape_tensor_fn=None,
+               is_constant_jacobian=False,
+               validate_args=False,
+               name="inline"):
+    """Creates a `Bijector` from callables.
+
+    Args:
+      forward_fn: Python callable implementing the forward transformation.
+      inverse_fn: Python callable implementing the inverse transformation.
+      inverse_log_det_jacobian_fn: Python callable implementing the
+        log o det o jacobian of the inverse transformation.
+      forward_log_det_jacobian_fn: Python callable implementing the
+        log o det o jacobian of the forward transformation.
+      forward_event_shape_fn: Python callable implementing non-identical
+        static event shape changes. Default: shape is assumed unchanged.
+      forward_event_shape_tensor_fn: Python callable implementing non-identical
+        event shape changes. Default: shape is assumed unchanged.
+      inverse_event_shape_fn: Python callable implementing non-identical
+        static event shape changes. Default: shape is assumed unchanged.
+      inverse_event_shape_tensor_fn: Python callable implementing non-identical
+        event shape changes. Default: shape is assumed unchanged.
+      is_constant_jacobian: Python `bool` indicating that the Jacobian is
+        constant for all input arguments.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str`, name given to ops managed by this object.
+    """
+    super(Inline, self).__init__(
+        event_ndims=0,
+        is_constant_jacobian=is_constant_jacobian,
+        validate_args=validate_args,
+        name=name)
+    self._forward_fn = forward_fn
+    self._inverse_fn = inverse_fn
+    self._inverse_log_det_jacobian_fn = inverse_log_det_jacobian_fn
+    self._forward_log_det_jacobian_fn = forward_log_det_jacobian_fn
+    self._forward_event_shape_fn = forward_event_shape_fn
+    self._forward_event_shape_tensor_fn = forward_event_shape_tensor_fn
+    self._inverse_event_shape_fn = inverse_event_shape_fn
+    self._inverse_event_shape_tensor_fn = inverse_event_shape_tensor_fn
+
+  def _forward_event_shape(self, input_shape):
+    if self._forward_event_shape_fn is None:
+      # By default assume shape doesn't change.
+      return input_shape
+    return self._forward_event_shape_fn(input_shape)
+
+  def _forward_event_shape_tensor(self, input_shape):
+    if self._forward_event_shape_tensor_fn is None:
+      # By default assume shape doesn't change.
+      return input_shape
+    return self._forward_event_shape_tensor_fn(input_shape)
+
+  def _inverse_event_shape(self, output_shape):
+    if self._inverse_event_shape_fn is None:
+      # By default assume shape doesn't change.
+      return output_shape
+    return self._inverse_event_shape_fn(output_shape)
+
+  def _inverse_event_shape_tensor(self, output_shape):
+    if self._inverse_event_shape_tensor_fn is None:
+      # By default assume shape doesn't change.
+      return output_shape
+    return self._inverse_event_shape_tensor_fn(output_shape)
+
+  def _forward(self, x, **kwargs):
+    if not callable(self._forward_fn):
+      raise NotImplementedError(
+          "forward_fn is not a callable function.")
+    return self._forward_fn(x, **kwargs)
+
+  def _inverse(self, y, **kwargs):
+    if not callable(self._inverse_fn):
+      raise NotImplementedError(
+          "inverse_fn is not a callable function.")
+    return self._inverse_fn(y, **kwargs)
+
+  def _inverse_log_det_jacobian(self, y, **kwargs):
+    if not callable(self._inverse_log_det_jacobian_fn):
+      raise NotImplementedError(
+          "inverse_log_det_jacobian_fn is not a callable function.")
+    return self._inverse_log_det_jacobian_fn(y, **kwargs)
+
+  def _forward_log_det_jacobian(self, y, **kwargs):
+    if not callable(self._forward_log_det_jacobian_fn):
+      raise NotImplementedError(
+          "forward_log_det_jacobian_fn is not a callable function.")
+    return self._forward_log_det_jacobian_fn(y, **kwargs)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/inline_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/inline_impl.py
deleted file mode 100644
index fab1b22fbf..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/inline_impl.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Inline bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "Inline",
-]
-
-
-class Inline(bijector.Bijector):
-  """Bijector constructed from custom callables.
-
-  Example Use:
-
-  ```python
-  exp = Inline(
-    forward_fn=tf.exp,
-    inverse_fn=tf.log,
-    inverse_log_det_jacobian_fn=(
-      lambda y: -tf.reduce_sum(tf.log(y), axis=-1)),
-    name="exp")
-  ```
-
-  The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
-  """
-
-  def __init__(self,
-               forward_fn=None,
-               inverse_fn=None,
-               inverse_log_det_jacobian_fn=None,
-               forward_log_det_jacobian_fn=None,
-               forward_event_shape_fn=None,
-               forward_event_shape_tensor_fn=None,
-               inverse_event_shape_fn=None,
-               inverse_event_shape_tensor_fn=None,
-               is_constant_jacobian=False,
-               validate_args=False,
-               name="inline"):
-    """Creates a `Bijector` from callables.
-
-    Args:
-      forward_fn: Python callable implementing the forward transformation.
-      inverse_fn: Python callable implementing the inverse transformation.
-      inverse_log_det_jacobian_fn: Python callable implementing the
-        log o det o jacobian of the inverse transformation.
-      forward_log_det_jacobian_fn: Python callable implementing the
-        log o det o jacobian of the forward transformation.
-      forward_event_shape_fn: Python callable implementing non-identical
-        static event shape changes. Default: shape is assumed unchanged.
-      forward_event_shape_tensor_fn: Python callable implementing non-identical
-        event shape changes. Default: shape is assumed unchanged.
-      inverse_event_shape_fn: Python callable implementing non-identical
-        static event shape changes. Default: shape is assumed unchanged.
-      inverse_event_shape_tensor_fn: Python callable implementing non-identical
-        event shape changes. Default: shape is assumed unchanged.
-      is_constant_jacobian: Python `bool` indicating that the Jacobian is
-        constant for all input arguments.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str`, name given to ops managed by this object.
-    """
-    super(Inline, self).__init__(
-        event_ndims=0,
-        is_constant_jacobian=is_constant_jacobian,
-        validate_args=validate_args,
-        name=name)
-    self._forward_fn = forward_fn
-    self._inverse_fn = inverse_fn
-    self._inverse_log_det_jacobian_fn = inverse_log_det_jacobian_fn
-    self._forward_log_det_jacobian_fn = forward_log_det_jacobian_fn
-    self._forward_event_shape_fn = forward_event_shape_fn
-    self._forward_event_shape_tensor_fn = forward_event_shape_tensor_fn
-    self._inverse_event_shape_fn = inverse_event_shape_fn
-    self._inverse_event_shape_tensor_fn = inverse_event_shape_tensor_fn
-
-  def _forward_event_shape(self, input_shape):
-    if self._forward_event_shape_fn is None:
-      # By default assume shape doesn't change.
-      return input_shape
-    return self._forward_event_shape_fn(input_shape)
-
-  def _forward_event_shape_tensor(self, input_shape):
-    if self._forward_event_shape_tensor_fn is None:
-      # By default assume shape doesn't change.
-      return input_shape
-    return self._forward_event_shape_tensor_fn(input_shape)
-
-  def _inverse_event_shape(self, output_shape):
-    if self._inverse_event_shape_fn is None:
-      # By default assume shape doesn't change.
-      return output_shape
-    return self._inverse_event_shape_fn(output_shape)
-
-  def _inverse_event_shape_tensor(self, output_shape):
-    if self._inverse_event_shape_tensor_fn is None:
-      # By default assume shape doesn't change.
-      return output_shape
-    return self._inverse_event_shape_tensor_fn(output_shape)
-
-  def _forward(self, x, **kwargs):
-    if not callable(self._forward_fn):
-      raise NotImplementedError(
-          "forward_fn is not a callable function.")
-    return self._forward_fn(x, **kwargs)
-
-  def _inverse(self, y, **kwargs):
-    if not callable(self._inverse_fn):
-      raise NotImplementedError(
-          "inverse_fn is not a callable function.")
-    return self._inverse_fn(y, **kwargs)
-
-  def _inverse_log_det_jacobian(self, y, **kwargs):
-    if not callable(self._inverse_log_det_jacobian_fn):
-      raise NotImplementedError(
-          "inverse_log_det_jacobian_fn is not a callable function.")
-    return self._inverse_log_det_jacobian_fn(y, **kwargs)
-
-  def _forward_log_det_jacobian(self, y, **kwargs):
-    if not callable(self._forward_log_det_jacobian_fn):
-      raise NotImplementedError(
-          "forward_log_det_jacobian_fn is not a callable function.")
-    return self._forward_log_det_jacobian_fn(y, **kwargs)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/invert.py b/tensorflow/contrib/distributions/python/ops/bijectors/invert.py
index c134e10109..2c603fe61f 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/invert.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/invert.py
@@ -18,12 +18,85 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.invert_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.ops.distributions import bijector as bijector_lib
 
-_allowed_symbols = ["Invert"]
+__all__ = [
+    "Invert",
+]
 
-remove_undocumented(__name__, _allowed_symbols)
+
+class Invert(bijector_lib.Bijector):
+  """Bijector which inverts another Bijector.
+
+  Example Use: [ExpGammaDistribution (see Background & Context)](
+  https://reference.wolfram.com/language/ref/ExpGammaDistribution.html)
+  models `Y=log(X)` where `X ~ Gamma`.
+
+  ```python
+  exp_gamma_distribution = TransformedDistribution(
+    distribution=Gamma(concentration=1., rate=2.),
+    bijector=bijector.Invert(bijector.Exp())
+  ```
+
+  """
+
+  def __init__(self, bijector, validate_args=False, name=None):
+    """Creates a `Bijector` which swaps the meaning of `inverse` and `forward`.
+
+    Note: An inverted bijector's `inverse_log_det_jacobian` is often more
+    efficient if the base bijector implements `_forward_log_det_jacobian`. If
+    `_forward_log_det_jacobian` is not implemented then the following code is
+    used:
+
+    ```python
+    y = self.inverse(x, **kwargs)
+    return -self.inverse_log_det_jacobian(y, **kwargs)
+    ```
+
+    Args:
+      bijector: Bijector instance.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str`, name given to ops managed by this object.
+    """
+
+    if not bijector._is_injective:  # pylint: disable=protected-access
+      raise NotImplementedError(
+          "Invert is not implemented for non-injective bijectors.")
+
+    self._bijector = bijector
+    super(Invert, self).__init__(
+        event_ndims=bijector.event_ndims,
+        graph_parents=bijector.graph_parents,
+        is_constant_jacobian=bijector.is_constant_jacobian,
+        validate_args=validate_args,
+        dtype=bijector.dtype,
+        name=name or "_".join(["invert", bijector.name]))
+
+  def _forward_event_shape(self, input_shape):
+    return self.bijector._inverse_event_shape(input_shape)  # pylint: disable=protected-access
+
+  def _forward_event_shape_tensor(self, input_shape):
+    return self.bijector._inverse_event_shape_tensor(input_shape)  # pylint: disable=protected-access
+
+  def _inverse_event_shape(self, output_shape):
+    return self.bijector._forward_event_shape(output_shape)  # pylint: disable=protected-access
+
+  def _inverse_event_shape_tensor(self, output_shape):
+    return self.bijector._forward_event_shape_tensor(output_shape)  # pylint: disable=protected-access
+
+  @property
+  def bijector(self):
+    return self._bijector
+
+  def _forward(self, x, **kwargs):
+    return self.bijector._inverse(x, **kwargs)  # pylint: disable=protected-access
+
+  def _inverse(self, y, **kwargs):
+    return self.bijector._forward(y, **kwargs)  # pylint: disable=protected-access
+
+  def _inverse_log_det_jacobian(self, y, **kwargs):
+    return self.bijector._forward_log_det_jacobian(y, **kwargs)  # pylint: disable=protected-access
+
+  def _forward_log_det_jacobian(self, x, **kwargs):
+    return self.bijector._inverse_log_det_jacobian(x, **kwargs)  # pylint: disable=protected-access
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/invert_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/invert_impl.py
deleted file mode 100644
index 2c603fe61f..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/invert_impl.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Invert bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.ops.distributions import bijector as bijector_lib
-
-__all__ = [
-    "Invert",
-]
-
-
-class Invert(bijector_lib.Bijector):
-  """Bijector which inverts another Bijector.
-
-  Example Use: [ExpGammaDistribution (see Background & Context)](
-  https://reference.wolfram.com/language/ref/ExpGammaDistribution.html)
-  models `Y=log(X)` where `X ~ Gamma`.
-
-  ```python
-  exp_gamma_distribution = TransformedDistribution(
-    distribution=Gamma(concentration=1., rate=2.),
-    bijector=bijector.Invert(bijector.Exp())
-  ```
-
-  """
-
-  def __init__(self, bijector, validate_args=False, name=None):
-    """Creates a `Bijector` which swaps the meaning of `inverse` and `forward`.
-
-    Note: An inverted bijector's `inverse_log_det_jacobian` is often more
-    efficient if the base bijector implements `_forward_log_det_jacobian`. If
-    `_forward_log_det_jacobian` is not implemented then the following code is
-    used:
-
-    ```python
-    y = self.inverse(x, **kwargs)
-    return -self.inverse_log_det_jacobian(y, **kwargs)
-    ```
-
-    Args:
-      bijector: Bijector instance.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str`, name given to ops managed by this object.
-    """
-
-    if not bijector._is_injective:  # pylint: disable=protected-access
-      raise NotImplementedError(
-          "Invert is not implemented for non-injective bijectors.")
-
-    self._bijector = bijector
-    super(Invert, self).__init__(
-        event_ndims=bijector.event_ndims,
-        graph_parents=bijector.graph_parents,
-        is_constant_jacobian=bijector.is_constant_jacobian,
-        validate_args=validate_args,
-        dtype=bijector.dtype,
-        name=name or "_".join(["invert", bijector.name]))
-
-  def _forward_event_shape(self, input_shape):
-    return self.bijector._inverse_event_shape(input_shape)  # pylint: disable=protected-access
-
-  def _forward_event_shape_tensor(self, input_shape):
-    return self.bijector._inverse_event_shape_tensor(input_shape)  # pylint: disable=protected-access
-
-  def _inverse_event_shape(self, output_shape):
-    return self.bijector._forward_event_shape(output_shape)  # pylint: disable=protected-access
-
-  def _inverse_event_shape_tensor(self, output_shape):
-    return self.bijector._forward_event_shape_tensor(output_shape)  # pylint: disable=protected-access
-
-  @property
-  def bijector(self):
-    return self._bijector
-
-  def _forward(self, x, **kwargs):
-    return self.bijector._inverse(x, **kwargs)  # pylint: disable=protected-access
-
-  def _inverse(self, y, **kwargs):
-    return self.bijector._forward(y, **kwargs)  # pylint: disable=protected-access
-
-  def _inverse_log_det_jacobian(self, y, **kwargs):
-    return self.bijector._forward_log_det_jacobian(y, **kwargs)  # pylint: disable=protected-access
-
-  def _forward_log_det_jacobian(self, x, **kwargs):
-    return self.bijector._inverse_log_det_jacobian(x, **kwargs)  # pylint: disable=protected-access
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
index 132dc570f9..06c7c61ec3 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
@@ -18,16 +18,459 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = [
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.layers import core as layers
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import template as template_ops
+from tensorflow.python.ops import variable_scope as variable_scope_lib
+from tensorflow.python.ops.distributions import bijector as bijector_lib
+
+
+__all__ = [
     "MaskedAutoregressiveFlow",
-    "masked_dense",
     "masked_autoregressive_default_template",
+    "masked_dense",
 ]
 
-remove_undocumented(__name__, _allowed_symbols)
+
+class MaskedAutoregressiveFlow(bijector_lib.Bijector):
+  """Affine MaskedAutoregressiveFlow bijector for vector-valued events.
+
+  The affine autoregressive flow [1] provides a relatively simple framework for
+  user-specified (deep) architectures to learn a distribution over vector-valued
+  events. Regarding terminology,
+
+    "Autoregressive models decompose the joint density as a product of
+    conditionals, and model each conditional in turn. Normalizing flows
+    transform a base density (e.g. a standard Gaussian) into the target density
+    by an invertible transformation with tractable Jacobian." [1]
+
+  In other words, the "autoregressive property" is equivalent to the
+  decomposition, `p(x) = prod{ p(x[i] | x[0:i]) : i=0, ..., d }`. The provided
+  `shift_and_log_scale_fn`, `masked_autoregressive_default_template`, achieves
+  this property by zeroing out weights in its `masked_dense` layers.
+
+  In the `tf.distributions` framework, a "normalizing flow" is implemented as a
+  `tf.distributions.bijectors.Bijector`. The `forward` "autoregression"
+  is implemented using a `tf.while_loop` and a deep neural network (DNN) with
+  masked weights such that the autoregressive property is automatically met in
+  the `inverse`.
+
+  A `TransformedDistribution` using `MaskedAutoregressiveFlow(...)` uses the
+  (expensive) forward-mode calculation to draw samples and the (cheap)
+  reverse-mode calculation to compute log-probabilities. Conversely, a
+  `TransformedDistribution` using `Invert(MaskedAutoregressiveFlow(...))` uses
+  the (expensive) forward-mode calculation to compute log-probabilities and the
+  (cheap) reverse-mode calculation to compute samples.  See "Example Use"
+  [below] for more details.
+
+  Given a `shift_and_log_scale_fn`, the forward and inverse transformations are
+  (a sequence of) affine transformations. A "valid" `shift_and_log_scale_fn`
+  must compute each `shift` (aka `loc` or "mu" [2]) and `log(scale)` (aka
+  "alpha" [2]) such that each are broadcastable with the arguments to `forward`
+  and `inverse`, i.e., such that the calculations in `forward`, `inverse`
+  [below] are possible.
+
+  For convenience, `masked_autoregressive_default_template` is offered as a
+  possible `shift_and_log_scale_fn` function. It implements the MADE
+  architecture [2]. MADE is a feed-forward network that computes a `shift` and
+  `log(scale)` using `masked_dense` layers in a deep neural network. Weights are
+  masked to ensure the autoregressive property. It is possible that this
+  architecture is suboptimal for your task. To build alternative networks,
+  either change the arguments to `masked_autoregressive_default_template`, use
+  the `masked_dense` function to roll-out your own, or use some other
+  architecture, e.g., using `tf.layers`.
+
+  Warning: no attempt is made to validate that the `shift_and_log_scale_fn`
+  enforces the "autoregressive property".
+
+  Assuming `shift_and_log_scale_fn` has valid shape and autoregressive
+  semantics, the forward transformation is,
+
+  ```python
+  def forward(x):
+    y = zeros_like(x)
+    event_size = x.shape[-1]
+    for _ in range(event_size):
+      shift, log_scale = shift_and_log_scale_fn(y)
+      y = x * math_ops.exp(log_scale) + shift
+    return y
+  ```
+
+  and the inverse transformation is,
+
+  ```python
+  def inverse(y):
+    shift, log_scale = shift_and_log_scale_fn(y)
+    return (y - shift) / math_ops.exp(log_scale)
+  ```
+
+  Notice that the `inverse` does not need a for-loop. This is because in the
+  forward pass each calculation of `shift` and `log_scale` is based on the `y`
+  calculated so far (not `x`). In the `inverse`, the `y` is fully known, thus is
+  equivalent to the scaling used in `forward` after `event_size` passes, i.e.,
+  the "last" `y` used to compute `shift`, `log_scale`. (Roughly speaking, this
+  also proves the transform is bijective.)
+
+  #### Example Use
+
+  ```python
+  tfd = tf.contrib.distributions
+  tfb = tfd.bijectors
+
+  dims = 5
+
+  # A common choice for a normalizing flow is to use a Gaussian for the base
+  # distribution. (However, any continuous distribution would work.) E.g.,
+  maf = tfd.TransformedDistribution(
+      distribution=tfd.Normal(loc=0., scale=1.),
+      bijector=tfb.MaskedAutoregressiveFlow(
+          shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
+              hidden_layers=[512, 512])),
+      event_shape=[dims])
+
+  x = maf.sample()  # Expensive; uses `tf.while_loop`, no Bijector caching.
+  maf.log_prob(x)   # Almost free; uses Bijector caching.
+  maf.log_prob(0.)  # Cheap; no `tf.while_loop` despite no Bijector caching.
+
+  # [1] also describes an "Inverse Autoregressive Flow", e.g.,
+  iaf = tfd.TransformedDistribution(
+      distribution=tfd.Normal(loc=0., scale=1.),
+      bijector=tfb.Invert(tfb.MaskedAutoregressiveFlow(
+          shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
+              hidden_layers=[512, 512]))),
+      event_shape=[dims])
+
+  x = iaf.sample()  # Cheap; no `tf.while_loop` despite no Bijector caching.
+  iaf.log_prob(x)   # Almost free; uses Bijector caching.
+  iaf.log_prob(0.)  # Expensive; uses `tf.while_loop`, no Bijector caching.
+
+  # In many (if not most) cases the default `shift_and_log_scale_fn` will be a
+  # poor choice. Here's an example of using a "shift only" version and with a
+  # different number/depth of hidden layers.
+  shift_only = True
+  maf_no_scale_hidden2 = tfd.TransformedDistribution(
+      distribution=tfd.Normal(loc=0., scale=1.),
+      bijector=tfb.MaskedAutoregressiveFlow(
+          tfb.masked_autoregressive_default_template(
+              hidden_layers=[32],
+              shift_only=shift_only),
+          is_constant_jacobian=shift_only),
+      event_shape=[dims])
+  ```
+
+  [1]: "Masked Autoregressive Flow for Density Estimation."
+       George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017.
+       https://arxiv.org/abs/1705.07057
+
+  [2]: "MADE: Masked Autoencoder for Distribution Estimation."
+       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
+       https://arxiv.org/abs/1502.03509
+
+  """
+
+  def __init__(self,
+               shift_and_log_scale_fn,
+               is_constant_jacobian=False,
+               validate_args=False,
+               name=None):
+    """Creates the MaskedAutoregressiveFlow bijector.
+
+    Args:
+      shift_and_log_scale_fn: Python `callable` which computes `shift` and
+        `log_scale` from both the forward domain (`x`) and the inverse domain
+        (`y`). Calculation must respect the "autoregressive property" (see class
+        docstring). Suggested default
+        `masked_autoregressive_default_template(hidden_layers=...)`.
+        Typically the function contains `tf.Variables` and is wrapped using
+        `tf.make_template`. Returning `None` for either (both) `shift`,
+        `log_scale` is equivalent to (but more efficient than) returning zero.
+      is_constant_jacobian: Python `bool`. Default: `False`. When `True` the
+        implementation assumes `log_scale` does not depend on the forward domain
+        (`x`) or inverse domain (`y`) values. (No validation is made;
+        `is_constant_jacobian=False` is always safe but possibly computationally
+        inefficient.)
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str`, name given to ops managed by this object.
+    """
+    name = name or "masked_autoregressive_flow"
+    self._shift_and_log_scale_fn = shift_and_log_scale_fn
+    super(MaskedAutoregressiveFlow, self).__init__(
+        is_constant_jacobian=is_constant_jacobian,
+        validate_args=validate_args,
+        name=name)
+
+  def _forward(self, x):
+    event_size = array_ops.shape(x)[-1]
+    y0 = array_ops.zeros_like(x, name="y0")
+    # call the template once to ensure creation
+    _ = self._shift_and_log_scale_fn(y0)
+    def _loop_body(index, y0):
+      """While-loop body for autoregression calculation."""
+      # Set caching device to avoid re-getting the tf.Variable for every while
+      # loop iteration.
+      with variable_scope_lib.variable_scope(
+          variable_scope_lib.get_variable_scope()) as vs:
+        if vs.caching_device is None:
+          vs.set_caching_device(lambda op: op.device)
+        shift, log_scale = self._shift_and_log_scale_fn(y0)
+      y = x
+      if log_scale is not None:
+        y *= math_ops.exp(log_scale)
+      if shift is not None:
+        y += shift
+      return index + 1, y
+    _, y = control_flow_ops.while_loop(
+        cond=lambda index, _: index < event_size,
+        body=_loop_body,
+        loop_vars=[0, y0])
+    return y
+
+  def _inverse(self, y):
+    shift, log_scale = self._shift_and_log_scale_fn(y)
+    x = y
+    if shift is not None:
+      x -= shift
+    if log_scale is not None:
+      x *= math_ops.exp(-log_scale)
+    return x
+
+  def _inverse_log_det_jacobian(self, y):
+    _, log_scale = self._shift_and_log_scale_fn(y)
+    if log_scale is None:
+      return constant_op.constant(0., dtype=y.dtype, name="ildj")
+    return -math_ops.reduce_sum(log_scale, axis=-1)
+
+
+MASK_INCLUSIVE = "inclusive"
+MASK_EXCLUSIVE = "exclusive"
+
+
+def _gen_slices(num_blocks, n_in, n_out, mask_type=MASK_EXCLUSIVE):
+  """Generate the slices for building an autoregressive mask."""
+  # TODO(b/67594795): Better support of dynamic shape.
+  slices = []
+  col = 0
+  d_in = n_in // num_blocks
+  d_out = n_out // num_blocks
+  row = d_out if mask_type == MASK_EXCLUSIVE else 0
+  for _ in range(num_blocks):
+    row_slice = slice(row, None)
+    col_slice = slice(col, col + d_in)
+    slices.append([row_slice, col_slice])
+    col += d_in
+    row += d_out
+  return slices
+
+
+def _gen_mask(num_blocks,
+              n_in,
+              n_out,
+              mask_type=MASK_EXCLUSIVE,
+              dtype=dtypes.float32):
+  """Generate the mask for building an autoregressive dense layer."""
+  # TODO(b/67594795): Better support of dynamic shape.
+  mask = np.zeros([n_out, n_in], dtype=dtype.as_numpy_dtype())
+  slices = _gen_slices(num_blocks, n_in, n_out, mask_type=mask_type)
+  for [row_slice, col_slice] in slices:
+    mask[row_slice, col_slice] = 1
+  return mask
+
+
+def masked_dense(inputs,
+                 units,
+                 num_blocks=None,
+                 exclusive=False,
+                 kernel_initializer=None,
+                 reuse=None,
+                 name=None,
+                 *args,
+                 **kwargs):
+  """A autoregressively masked dense layer. Analogous to `tf.layers.dense`.
+
+  See [1] for detailed explanation.
+
+  [1]: "MADE: Masked Autoencoder for Distribution Estimation."
+       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
+       https://arxiv.org/abs/1502.03509
+
+  Arguments:
+    inputs: Tensor input.
+    units: Python `int` scalar representing the dimensionality of the output
+      space.
+    num_blocks: Python `int` scalar representing the number of blocks for the
+      MADE masks.
+    exclusive: Python `bool` scalar representing whether to zero the diagonal of
+      the mask, used for the first layer of a MADE.
+    kernel_initializer: Initializer function for the weight matrix.
+      If `None` (default), weights are initialized using the
+      `tf.glorot_random_initializer`.
+    reuse: Python `bool` scalar representing whether to reuse the weights of a
+      previous layer by the same name.
+    name: Python `str` used to describe ops managed by this function.
+    *args: `tf.layers.dense` arguments.
+    **kwargs: `tf.layers.dense` keyword arguments.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
+      graph execution.
+  """
+  # TODO(b/67594795): Better support of dynamic shape.
+  input_depth = inputs.shape.with_rank_at_least(1)[-1].value
+  if input_depth is None:
+    raise NotImplementedError(
+        "Rightmost dimension must be known prior to graph execution.")
+
+  mask = _gen_mask(num_blocks, input_depth, units,
+                   MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T
+
+  if kernel_initializer is None:
+    kernel_initializer = init_ops.glorot_normal_initializer()
+
+  def masked_initializer(shape, dtype=None, partition_info=None):
+    return mask * kernel_initializer(shape, dtype, partition_info)
+
+  with ops.name_scope(name, "masked_dense", [inputs, units, num_blocks]):
+    layer = layers.Dense(
+        units,
+        kernel_initializer=masked_initializer,
+        kernel_constraint=lambda x: mask * x,
+        name=name,
+        dtype=inputs.dtype.base_dtype,
+        _scope=name,
+        _reuse=reuse,
+        *args,
+        **kwargs)
+    return layer.apply(inputs)
+
+
+def masked_autoregressive_default_template(
+    hidden_layers,
+    shift_only=False,
+    activation=nn_ops.relu,
+    log_scale_min_clip=-5.,
+    log_scale_max_clip=3.,
+    log_scale_clip_gradient=False,
+    name=None,
+    *args,
+    **kwargs):
+  """Build the MADE Model [1].
+
+  This will be wrapped in a make_template to ensure the variables are only
+  created once. It takes the input and returns the `loc` ("mu" [1]) and
+  `log_scale` ("alpha" [1]) from the MADE network.
+
+  Warning: This function uses `masked_dense` to create randomly initialized
+  `tf.Variables`. It is presumed that these will be fit, just as you would any
+  other neural architecture which uses `tf.layers.dense`.
+
+  #### About Hidden Layers:
+
+  Each element of `hidden_layers` should be greater than the `input_depth`
+  (i.e., `input_depth = tf.shape(input)[-1]` where `input` is the input to the
+  neural network). This is necessary to ensure the autoregressivity property.
+
+  #### About Clipping:
+
+  This function also optionally clips the `log_scale` (but possibly not its
+  gradient). This is useful because if `log_scale` is too small/large it might
+  underflow/overflow making it impossible for the `MaskedAutoregressiveFlow`
+  bijector to implement a bijection. Additionally, the `log_scale_clip_gradient`
+  `bool` indicates whether the gradient should also be clipped. The default does
+  not clip the gradient; this is useful because it still provides gradient
+  information (for fitting) yet solves the numerical stability problem. I.e.,
+  `log_scale_clip_gradient = False` means
+  `grad[exp(clip(x))] = grad[x] exp(clip(x))` rather than the usual
+  `grad[clip(x)] exp(clip(x))`.
+
+  [1]: "MADE: Masked Autoencoder for Distribution Estimation."
+       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
+       https://arxiv.org/abs/1502.03509
+
+  Arguments:
+    hidden_layers: Python `list`-like of non-negative integer, scalars
+      indicating the number of units in each hidden layer. Default: `[512, 512].
+    shift_only: Python `bool` indicating if only the `shift` term shall be
+      computed. Default: `False`.
+    activation: Activation function (callable). Explicitly setting to `None`
+      implies a linear activation.
+    log_scale_min_clip: `float`-like scalar `Tensor`, or a `Tensor` with the
+      same shape as `log_scale`. The minimum value to clip by. Default: -5.
+    log_scale_max_clip: `float`-like scalar `Tensor`, or a `Tensor` with the
+      same shape as `log_scale`. The maximum value to clip by. Default: 3.
+    log_scale_clip_gradient: Python `bool` indicating that the gradient of
+      `tf.clip_by_value` should be preserved. Default: `False`.
+    name: A name for ops managed by this function. Default:
+      "masked_autoregressive_default_template".
+    *args: `tf.layers.dense` arguments.
+    **kwargs: `tf.layers.dense` keyword arguments.
+
+  Returns:
+    shift: `Float`-like `Tensor` of shift terms (the "mu" in [2]).
+    log_scale: `Float`-like `Tensor` of log(scale) terms (the "alpha" in [2]).
+
+  Raises:
+    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
+      graph execution.
+  """
+
+  with ops.name_scope(name, "masked_autoregressive_default_template",
+                      values=[log_scale_min_clip, log_scale_max_clip]):
+    def _fn(x):
+      """MADE parameterized via `masked_autoregressive_default_template`."""
+      # TODO(b/67594795): Better support of dynamic shape.
+      input_depth = x.shape.with_rank_at_least(1)[-1].value
+      if input_depth is None:
+        raise NotImplementedError(
+            "Rightmost dimension must be known prior to graph execution.")
+      input_shape = (np.int32(x.shape.as_list()) if x.shape.is_fully_defined()
+                     else array_ops.shape(x))
+      for i, units in enumerate(hidden_layers):
+        x = masked_dense(
+            inputs=x,
+            units=units,
+            num_blocks=input_depth,
+            exclusive=True if i == 0 else False,
+            activation=activation,
+            *args,
+            **kwargs)
+      x = masked_dense(
+          inputs=x,
+          units=(1 if shift_only else 2) * input_depth,
+          num_blocks=input_depth,
+          activation=None,
+          *args,
+          **kwargs)
+      if shift_only:
+        x = array_ops.reshape(x, shape=input_shape)
+        return x, None
+      x = array_ops.reshape(
+          x, shape=array_ops.concat([input_shape, [2]], axis=0))
+      shift, log_scale = array_ops.unstack(x, num=2, axis=-1)
+      which_clip = (math_ops.clip_by_value if log_scale_clip_gradient
+                    else _clip_by_value_preserve_grad)
+      log_scale = which_clip(log_scale, log_scale_min_clip, log_scale_max_clip)
+      return shift, log_scale
+    return template_ops.make_template(
+        "masked_autoregressive_default_template", _fn)
+
+
+def _clip_by_value_preserve_grad(x, clip_value_min, clip_value_max, name=None):
+  """Clips input while leaving gradient unaltered."""
+  with ops.name_scope(name, "clip_by_value_preserve_grad",
+                      [x, clip_value_min, clip_value_max]):
+    clip_x = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
+    return x + array_ops.stop_gradient(clip_x - x)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
deleted file mode 100644
index 06c7c61ec3..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
+++ /dev/null
@@ -1,476 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""MaskedAutoregressiveFlow bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.layers import core as layers
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import clip_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import template as template_ops
-from tensorflow.python.ops import variable_scope as variable_scope_lib
-from tensorflow.python.ops.distributions import bijector as bijector_lib
-
-
-__all__ = [
-    "MaskedAutoregressiveFlow",
-    "masked_autoregressive_default_template",
-    "masked_dense",
-]
-
-
-class MaskedAutoregressiveFlow(bijector_lib.Bijector):
-  """Affine MaskedAutoregressiveFlow bijector for vector-valued events.
-
-  The affine autoregressive flow [1] provides a relatively simple framework for
-  user-specified (deep) architectures to learn a distribution over vector-valued
-  events. Regarding terminology,
-
-    "Autoregressive models decompose the joint density as a product of
-    conditionals, and model each conditional in turn. Normalizing flows
-    transform a base density (e.g. a standard Gaussian) into the target density
-    by an invertible transformation with tractable Jacobian." [1]
-
-  In other words, the "autoregressive property" is equivalent to the
-  decomposition, `p(x) = prod{ p(x[i] | x[0:i]) : i=0, ..., d }`. The provided
-  `shift_and_log_scale_fn`, `masked_autoregressive_default_template`, achieves
-  this property by zeroing out weights in its `masked_dense` layers.
-
-  In the `tf.distributions` framework, a "normalizing flow" is implemented as a
-  `tf.distributions.bijectors.Bijector`. The `forward` "autoregression"
-  is implemented using a `tf.while_loop` and a deep neural network (DNN) with
-  masked weights such that the autoregressive property is automatically met in
-  the `inverse`.
-
-  A `TransformedDistribution` using `MaskedAutoregressiveFlow(...)` uses the
-  (expensive) forward-mode calculation to draw samples and the (cheap)
-  reverse-mode calculation to compute log-probabilities. Conversely, a
-  `TransformedDistribution` using `Invert(MaskedAutoregressiveFlow(...))` uses
-  the (expensive) forward-mode calculation to compute log-probabilities and the
-  (cheap) reverse-mode calculation to compute samples.  See "Example Use"
-  [below] for more details.
-
-  Given a `shift_and_log_scale_fn`, the forward and inverse transformations are
-  (a sequence of) affine transformations. A "valid" `shift_and_log_scale_fn`
-  must compute each `shift` (aka `loc` or "mu" [2]) and `log(scale)` (aka
-  "alpha" [2]) such that each are broadcastable with the arguments to `forward`
-  and `inverse`, i.e., such that the calculations in `forward`, `inverse`
-  [below] are possible.
-
-  For convenience, `masked_autoregressive_default_template` is offered as a
-  possible `shift_and_log_scale_fn` function. It implements the MADE
-  architecture [2]. MADE is a feed-forward network that computes a `shift` and
-  `log(scale)` using `masked_dense` layers in a deep neural network. Weights are
-  masked to ensure the autoregressive property. It is possible that this
-  architecture is suboptimal for your task. To build alternative networks,
-  either change the arguments to `masked_autoregressive_default_template`, use
-  the `masked_dense` function to roll-out your own, or use some other
-  architecture, e.g., using `tf.layers`.
-
-  Warning: no attempt is made to validate that the `shift_and_log_scale_fn`
-  enforces the "autoregressive property".
-
-  Assuming `shift_and_log_scale_fn` has valid shape and autoregressive
-  semantics, the forward transformation is,
-
-  ```python
-  def forward(x):
-    y = zeros_like(x)
-    event_size = x.shape[-1]
-    for _ in range(event_size):
-      shift, log_scale = shift_and_log_scale_fn(y)
-      y = x * math_ops.exp(log_scale) + shift
-    return y
-  ```
-
-  and the inverse transformation is,
-
-  ```python
-  def inverse(y):
-    shift, log_scale = shift_and_log_scale_fn(y)
-    return (y - shift) / math_ops.exp(log_scale)
-  ```
-
-  Notice that the `inverse` does not need a for-loop. This is because in the
-  forward pass each calculation of `shift` and `log_scale` is based on the `y`
-  calculated so far (not `x`). In the `inverse`, the `y` is fully known, thus is
-  equivalent to the scaling used in `forward` after `event_size` passes, i.e.,
-  the "last" `y` used to compute `shift`, `log_scale`. (Roughly speaking, this
-  also proves the transform is bijective.)
-
-  #### Example Use
-
-  ```python
-  tfd = tf.contrib.distributions
-  tfb = tfd.bijectors
-
-  dims = 5
-
-  # A common choice for a normalizing flow is to use a Gaussian for the base
-  # distribution. (However, any continuous distribution would work.) E.g.,
-  maf = tfd.TransformedDistribution(
-      distribution=tfd.Normal(loc=0., scale=1.),
-      bijector=tfb.MaskedAutoregressiveFlow(
-          shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
-              hidden_layers=[512, 512])),
-      event_shape=[dims])
-
-  x = maf.sample()  # Expensive; uses `tf.while_loop`, no Bijector caching.
-  maf.log_prob(x)   # Almost free; uses Bijector caching.
-  maf.log_prob(0.)  # Cheap; no `tf.while_loop` despite no Bijector caching.
-
-  # [1] also describes an "Inverse Autoregressive Flow", e.g.,
-  iaf = tfd.TransformedDistribution(
-      distribution=tfd.Normal(loc=0., scale=1.),
-      bijector=tfb.Invert(tfb.MaskedAutoregressiveFlow(
-          shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
-              hidden_layers=[512, 512]))),
-      event_shape=[dims])
-
-  x = iaf.sample()  # Cheap; no `tf.while_loop` despite no Bijector caching.
-  iaf.log_prob(x)   # Almost free; uses Bijector caching.
-  iaf.log_prob(0.)  # Expensive; uses `tf.while_loop`, no Bijector caching.
-
-  # In many (if not most) cases the default `shift_and_log_scale_fn` will be a
-  # poor choice. Here's an example of using a "shift only" version and with a
-  # different number/depth of hidden layers.
-  shift_only = True
-  maf_no_scale_hidden2 = tfd.TransformedDistribution(
-      distribution=tfd.Normal(loc=0., scale=1.),
-      bijector=tfb.MaskedAutoregressiveFlow(
-          tfb.masked_autoregressive_default_template(
-              hidden_layers=[32],
-              shift_only=shift_only),
-          is_constant_jacobian=shift_only),
-      event_shape=[dims])
-  ```
-
-  [1]: "Masked Autoregressive Flow for Density Estimation."
-       George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017.
-       https://arxiv.org/abs/1705.07057
-
-  [2]: "MADE: Masked Autoencoder for Distribution Estimation."
-       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
-       https://arxiv.org/abs/1502.03509
-
-  """
-
-  def __init__(self,
-               shift_and_log_scale_fn,
-               is_constant_jacobian=False,
-               validate_args=False,
-               name=None):
-    """Creates the MaskedAutoregressiveFlow bijector.
-
-    Args:
-      shift_and_log_scale_fn: Python `callable` which computes `shift` and
-        `log_scale` from both the forward domain (`x`) and the inverse domain
-        (`y`). Calculation must respect the "autoregressive property" (see class
-        docstring). Suggested default
-        `masked_autoregressive_default_template(hidden_layers=...)`.
-        Typically the function contains `tf.Variables` and is wrapped using
-        `tf.make_template`. Returning `None` for either (both) `shift`,
-        `log_scale` is equivalent to (but more efficient than) returning zero.
-      is_constant_jacobian: Python `bool`. Default: `False`. When `True` the
-        implementation assumes `log_scale` does not depend on the forward domain
-        (`x`) or inverse domain (`y`) values. (No validation is made;
-        `is_constant_jacobian=False` is always safe but possibly computationally
-        inefficient.)
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str`, name given to ops managed by this object.
-    """
-    name = name or "masked_autoregressive_flow"
-    self._shift_and_log_scale_fn = shift_and_log_scale_fn
-    super(MaskedAutoregressiveFlow, self).__init__(
-        is_constant_jacobian=is_constant_jacobian,
-        validate_args=validate_args,
-        name=name)
-
-  def _forward(self, x):
-    event_size = array_ops.shape(x)[-1]
-    y0 = array_ops.zeros_like(x, name="y0")
-    # call the template once to ensure creation
-    _ = self._shift_and_log_scale_fn(y0)
-    def _loop_body(index, y0):
-      """While-loop body for autoregression calculation."""
-      # Set caching device to avoid re-getting the tf.Variable for every while
-      # loop iteration.
-      with variable_scope_lib.variable_scope(
-          variable_scope_lib.get_variable_scope()) as vs:
-        if vs.caching_device is None:
-          vs.set_caching_device(lambda op: op.device)
-        shift, log_scale = self._shift_and_log_scale_fn(y0)
-      y = x
-      if log_scale is not None:
-        y *= math_ops.exp(log_scale)
-      if shift is not None:
-        y += shift
-      return index + 1, y
-    _, y = control_flow_ops.while_loop(
-        cond=lambda index, _: index < event_size,
-        body=_loop_body,
-        loop_vars=[0, y0])
-    return y
-
-  def _inverse(self, y):
-    shift, log_scale = self._shift_and_log_scale_fn(y)
-    x = y
-    if shift is not None:
-      x -= shift
-    if log_scale is not None:
-      x *= math_ops.exp(-log_scale)
-    return x
-
-  def _inverse_log_det_jacobian(self, y):
-    _, log_scale = self._shift_and_log_scale_fn(y)
-    if log_scale is None:
-      return constant_op.constant(0., dtype=y.dtype, name="ildj")
-    return -math_ops.reduce_sum(log_scale, axis=-1)
-
-
-MASK_INCLUSIVE = "inclusive"
-MASK_EXCLUSIVE = "exclusive"
-
-
-def _gen_slices(num_blocks, n_in, n_out, mask_type=MASK_EXCLUSIVE):
-  """Generate the slices for building an autoregressive mask."""
-  # TODO(b/67594795): Better support of dynamic shape.
-  slices = []
-  col = 0
-  d_in = n_in // num_blocks
-  d_out = n_out // num_blocks
-  row = d_out if mask_type == MASK_EXCLUSIVE else 0
-  for _ in range(num_blocks):
-    row_slice = slice(row, None)
-    col_slice = slice(col, col + d_in)
-    slices.append([row_slice, col_slice])
-    col += d_in
-    row += d_out
-  return slices
-
-
-def _gen_mask(num_blocks,
-              n_in,
-              n_out,
-              mask_type=MASK_EXCLUSIVE,
-              dtype=dtypes.float32):
-  """Generate the mask for building an autoregressive dense layer."""
-  # TODO(b/67594795): Better support of dynamic shape.
-  mask = np.zeros([n_out, n_in], dtype=dtype.as_numpy_dtype())
-  slices = _gen_slices(num_blocks, n_in, n_out, mask_type=mask_type)
-  for [row_slice, col_slice] in slices:
-    mask[row_slice, col_slice] = 1
-  return mask
-
-
-def masked_dense(inputs,
-                 units,
-                 num_blocks=None,
-                 exclusive=False,
-                 kernel_initializer=None,
-                 reuse=None,
-                 name=None,
-                 *args,
-                 **kwargs):
-  """A autoregressively masked dense layer. Analogous to `tf.layers.dense`.
-
-  See [1] for detailed explanation.
-
-  [1]: "MADE: Masked Autoencoder for Distribution Estimation."
-       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
-       https://arxiv.org/abs/1502.03509
-
-  Arguments:
-    inputs: Tensor input.
-    units: Python `int` scalar representing the dimensionality of the output
-      space.
-    num_blocks: Python `int` scalar representing the number of blocks for the
-      MADE masks.
-    exclusive: Python `bool` scalar representing whether to zero the diagonal of
-      the mask, used for the first layer of a MADE.
-    kernel_initializer: Initializer function for the weight matrix.
-      If `None` (default), weights are initialized using the
-      `tf.glorot_random_initializer`.
-    reuse: Python `bool` scalar representing whether to reuse the weights of a
-      previous layer by the same name.
-    name: Python `str` used to describe ops managed by this function.
-    *args: `tf.layers.dense` arguments.
-    **kwargs: `tf.layers.dense` keyword arguments.
-
-  Returns:
-    Output tensor.
-
-  Raises:
-    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
-      graph execution.
-  """
-  # TODO(b/67594795): Better support of dynamic shape.
-  input_depth = inputs.shape.with_rank_at_least(1)[-1].value
-  if input_depth is None:
-    raise NotImplementedError(
-        "Rightmost dimension must be known prior to graph execution.")
-
-  mask = _gen_mask(num_blocks, input_depth, units,
-                   MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T
-
-  if kernel_initializer is None:
-    kernel_initializer = init_ops.glorot_normal_initializer()
-
-  def masked_initializer(shape, dtype=None, partition_info=None):
-    return mask * kernel_initializer(shape, dtype, partition_info)
-
-  with ops.name_scope(name, "masked_dense", [inputs, units, num_blocks]):
-    layer = layers.Dense(
-        units,
-        kernel_initializer=masked_initializer,
-        kernel_constraint=lambda x: mask * x,
-        name=name,
-        dtype=inputs.dtype.base_dtype,
-        _scope=name,
-        _reuse=reuse,
-        *args,
-        **kwargs)
-    return layer.apply(inputs)
-
-
-def masked_autoregressive_default_template(
-    hidden_layers,
-    shift_only=False,
-    activation=nn_ops.relu,
-    log_scale_min_clip=-5.,
-    log_scale_max_clip=3.,
-    log_scale_clip_gradient=False,
-    name=None,
-    *args,
-    **kwargs):
-  """Build the MADE Model [1].
-
-  This will be wrapped in a make_template to ensure the variables are only
-  created once. It takes the input and returns the `loc` ("mu" [1]) and
-  `log_scale` ("alpha" [1]) from the MADE network.
-
-  Warning: This function uses `masked_dense` to create randomly initialized
-  `tf.Variables`. It is presumed that these will be fit, just as you would any
-  other neural architecture which uses `tf.layers.dense`.
-
-  #### About Hidden Layers:
-
-  Each element of `hidden_layers` should be greater than the `input_depth`
-  (i.e., `input_depth = tf.shape(input)[-1]` where `input` is the input to the
-  neural network). This is necessary to ensure the autoregressivity property.
-
-  #### About Clipping:
-
-  This function also optionally clips the `log_scale` (but possibly not its
-  gradient). This is useful because if `log_scale` is too small/large it might
-  underflow/overflow making it impossible for the `MaskedAutoregressiveFlow`
-  bijector to implement a bijection. Additionally, the `log_scale_clip_gradient`
-  `bool` indicates whether the gradient should also be clipped. The default does
-  not clip the gradient; this is useful because it still provides gradient
-  information (for fitting) yet solves the numerical stability problem. I.e.,
-  `log_scale_clip_gradient = False` means
-  `grad[exp(clip(x))] = grad[x] exp(clip(x))` rather than the usual
-  `grad[clip(x)] exp(clip(x))`.
-
-  [1]: "MADE: Masked Autoencoder for Distribution Estimation."
-       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
-       https://arxiv.org/abs/1502.03509
-
-  Arguments:
-    hidden_layers: Python `list`-like of non-negative integer, scalars
-      indicating the number of units in each hidden layer. Default: `[512, 512].
-    shift_only: Python `bool` indicating if only the `shift` term shall be
-      computed. Default: `False`.
-    activation: Activation function (callable). Explicitly setting to `None`
-      implies a linear activation.
-    log_scale_min_clip: `float`-like scalar `Tensor`, or a `Tensor` with the
-      same shape as `log_scale`. The minimum value to clip by. Default: -5.
-    log_scale_max_clip: `float`-like scalar `Tensor`, or a `Tensor` with the
-      same shape as `log_scale`. The maximum value to clip by. Default: 3.
-    log_scale_clip_gradient: Python `bool` indicating that the gradient of
-      `tf.clip_by_value` should be preserved. Default: `False`.
-    name: A name for ops managed by this function. Default:
-      "masked_autoregressive_default_template".
-    *args: `tf.layers.dense` arguments.
-    **kwargs: `tf.layers.dense` keyword arguments.
-
-  Returns:
-    shift: `Float`-like `Tensor` of shift terms (the "mu" in [2]).
-    log_scale: `Float`-like `Tensor` of log(scale) terms (the "alpha" in [2]).
-
-  Raises:
-    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
-      graph execution.
-  """
-
-  with ops.name_scope(name, "masked_autoregressive_default_template",
-                      values=[log_scale_min_clip, log_scale_max_clip]):
-    def _fn(x):
-      """MADE parameterized via `masked_autoregressive_default_template`."""
-      # TODO(b/67594795): Better support of dynamic shape.
-      input_depth = x.shape.with_rank_at_least(1)[-1].value
-      if input_depth is None:
-        raise NotImplementedError(
-            "Rightmost dimension must be known prior to graph execution.")
-      input_shape = (np.int32(x.shape.as_list()) if x.shape.is_fully_defined()
-                     else array_ops.shape(x))
-      for i, units in enumerate(hidden_layers):
-        x = masked_dense(
-            inputs=x,
-            units=units,
-            num_blocks=input_depth,
-            exclusive=True if i == 0 else False,
-            activation=activation,
-            *args,
-            **kwargs)
-      x = masked_dense(
-          inputs=x,
-          units=(1 if shift_only else 2) * input_depth,
-          num_blocks=input_depth,
-          activation=None,
-          *args,
-          **kwargs)
-      if shift_only:
-        x = array_ops.reshape(x, shape=input_shape)
-        return x, None
-      x = array_ops.reshape(
-          x, shape=array_ops.concat([input_shape, [2]], axis=0))
-      shift, log_scale = array_ops.unstack(x, num=2, axis=-1)
-      which_clip = (math_ops.clip_by_value if log_scale_clip_gradient
-                    else _clip_by_value_preserve_grad)
-      log_scale = which_clip(log_scale, log_scale_min_clip, log_scale_max_clip)
-      return shift, log_scale
-    return template_ops.make_template(
-        "masked_autoregressive_default_template", _fn)
-
-
-def _clip_by_value_preserve_grad(x, clip_value_min, clip_value_max, name=None):
-  """Clips input while leaving gradient unaltered."""
-  with ops.name_scope(name, "clip_by_value_preserve_grad",
-                      [x, clip_value_min, clip_value_max]):
-    clip_x = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
-    return x + array_ops.stop_gradient(clip_x - x)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
index a187ce22d6..8654cc39d0 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
@@ -12,18 +12,127 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Permute bijector."""
+"""Permutation bijectors."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.permute_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = ["Permute"]
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops.distributions import bijector as bijector_lib
 
-remove_undocumented(__name__, _allowed_symbols)
+
+__all__ = [
+    "Permute",
+]
+
+
+class Permute(bijector_lib.Bijector):
+  """Permutes the rightmost dimension of a `Tensor`.
+
+  ```python
+  tfd = tf.contrib.distributions
+
+  reverse = tfd.bijectors.Permute(permutation=[2, 1, 0])
+
+  reverse.forward([-1., 0., 1.])
+  # ==> [1., 0., -1]
+
+  reverse.inverse([1., 0., -1])
+  # ==> [-1., 0., 1.]
+
+  reverse.forward_log_det_jacobian(any_value)
+  # ==> 0.
+
+  reverse.inverse_log_det_jacobian(any_value)
+  # ==> 0.
+  ```
+
+  Warning: `tf.estimator` may repeatedly build the graph thus
+  `Permute(np.random.permutation(event_size)).astype("int32"))` is not a
+  reliable parameterization (nor would it be even if using `tf.constant`). A
+  safe alternative is to use `tf.get_variable` to achieve "init once" behavior,
+  i.e.,
+
+  ```python
+  def init_once(x, name):
+    return tf.get_variable(name, initializer=x, trainable=False)
+
+  Permute(permutation=init_once(
+      np.random.permutation(event_size).astype("int32"),
+      name="permutation"))
+  ```
+
+  """
+
+  def __init__(self, permutation, validate_args=False, name=None):
+    """Creates the `Permute` bijector.
+
+    Args:
+      permutation: An `int`-like vector-shaped `Tensor` representing the
+        permutation to apply to the rightmost dimension of the transformed
+        `Tensor`.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str`, name given to ops managed by this object.
+
+    Raises:
+      TypeError: if `not permutation.dtype.is_integer`.
+      ValueError: if `permutation` does not contain exactly one of each of
+        `{0, 1, ..., d}`.
+    """
+    with ops.name_scope(name, "permute", values=[permutation]):
+      permutation = ops.convert_to_tensor(
+          permutation,
+          name="permutation")
+      if not permutation.dtype.is_integer:
+        raise TypeError("permutation.dtype ({}) should be `int`-like.".format(
+            permutation.dtype.name))
+      p = tensor_util.constant_value(permutation)
+      if p is not None:
+        if set(p) != set(np.arange(p.size)):
+          raise ValueError("Permutation over `d` must contain exactly one of "
+                           "each of `{0, 1, ..., d}`.")
+      elif validate_args:
+        p, _ = nn_ops.top_k(-permutation,
+                            k=array_ops.shape(permutation)[-1],
+                            sorted=True)
+        permutation = control_flow_ops.with_dependencies([
+            check_ops.assert_equal(
+                -p, math_ops.range(array_ops.size(p)),
+                message=("Permutation over `d` must contain exactly one of "
+                         "each of `{0, 1, ..., d}`.")),
+        ], permutation)
+      self._permutation = permutation
+      super(Permute, self).__init__(
+          is_constant_jacobian=True,
+          validate_args=validate_args,
+          name=name or "permute")
+
+  @property
+  def permutation(self):
+    return self._permutation
+
+  def _forward(self, x):
+    return array_ops.gather(x, self.permutation, axis=-1)
+
+  def _inverse(self, y):
+    return array_ops.gather(
+        y,
+        array_ops.invert_permutation(self.permutation),
+        axis=-1)
+
+  def _inverse_log_det_jacobian(self, y):
+    return constant_op.constant(0., dtype=y.dtype)
+
+  def _forward_log_det_jacobian(self, x):
+    return constant_op.constant(0., dtype=x.dtype)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py
deleted file mode 100644
index 8654cc39d0..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Permutation bijectors."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops.distributions import bijector as bijector_lib
-
-
-__all__ = [
-    "Permute",
-]
-
-
-class Permute(bijector_lib.Bijector):
-  """Permutes the rightmost dimension of a `Tensor`.
-
-  ```python
-  tfd = tf.contrib.distributions
-
-  reverse = tfd.bijectors.Permute(permutation=[2, 1, 0])
-
-  reverse.forward([-1., 0., 1.])
-  # ==> [1., 0., -1]
-
-  reverse.inverse([1., 0., -1])
-  # ==> [-1., 0., 1.]
-
-  reverse.forward_log_det_jacobian(any_value)
-  # ==> 0.
-
-  reverse.inverse_log_det_jacobian(any_value)
-  # ==> 0.
-  ```
-
-  Warning: `tf.estimator` may repeatedly build the graph thus
-  `Permute(np.random.permutation(event_size)).astype("int32"))` is not a
-  reliable parameterization (nor would it be even if using `tf.constant`). A
-  safe alternative is to use `tf.get_variable` to achieve "init once" behavior,
-  i.e.,
-
-  ```python
-  def init_once(x, name):
-    return tf.get_variable(name, initializer=x, trainable=False)
-
-  Permute(permutation=init_once(
-      np.random.permutation(event_size).astype("int32"),
-      name="permutation"))
-  ```
-
-  """
-
-  def __init__(self, permutation, validate_args=False, name=None):
-    """Creates the `Permute` bijector.
-
-    Args:
-      permutation: An `int`-like vector-shaped `Tensor` representing the
-        permutation to apply to the rightmost dimension of the transformed
-        `Tensor`.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str`, name given to ops managed by this object.
-
-    Raises:
-      TypeError: if `not permutation.dtype.is_integer`.
-      ValueError: if `permutation` does not contain exactly one of each of
-        `{0, 1, ..., d}`.
-    """
-    with ops.name_scope(name, "permute", values=[permutation]):
-      permutation = ops.convert_to_tensor(
-          permutation,
-          name="permutation")
-      if not permutation.dtype.is_integer:
-        raise TypeError("permutation.dtype ({}) should be `int`-like.".format(
-            permutation.dtype.name))
-      p = tensor_util.constant_value(permutation)
-      if p is not None:
-        if set(p) != set(np.arange(p.size)):
-          raise ValueError("Permutation over `d` must contain exactly one of "
-                           "each of `{0, 1, ..., d}`.")
-      elif validate_args:
-        p, _ = nn_ops.top_k(-permutation,
-                            k=array_ops.shape(permutation)[-1],
-                            sorted=True)
-        permutation = control_flow_ops.with_dependencies([
-            check_ops.assert_equal(
-                -p, math_ops.range(array_ops.size(p)),
-                message=("Permutation over `d` must contain exactly one of "
-                         "each of `{0, 1, ..., d}`.")),
-        ], permutation)
-      self._permutation = permutation
-      super(Permute, self).__init__(
-          is_constant_jacobian=True,
-          validate_args=validate_args,
-          name=name or "permute")
-
-  @property
-  def permutation(self):
-    return self._permutation
-
-  def _forward(self, x):
-    return array_ops.gather(x, self.permutation, axis=-1)
-
-  def _inverse(self, y):
-    return array_ops.gather(
-        y,
-        array_ops.invert_permutation(self.permutation),
-        axis=-1)
-
-  def _inverse_log_det_jacobian(self, y):
-    return constant_op.constant(0., dtype=y.dtype)
-
-  def _forward_log_det_jacobian(self, x):
-    return constant_op.constant(0., dtype=x.dtype)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py b/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py
index a83199549c..c37db61720 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py
@@ -18,12 +18,110 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.power_transform_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["PowerTransform"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "PowerTransform",
+]
+
+
+class PowerTransform(bijector.Bijector):
+  """Compute `Y = g(X) = (1 + X * c)**(1 / c), X >= -1 / c`.
+
+  The [power transform](https://en.wikipedia.org/wiki/Power_transform) maps
+  inputs from `[0, inf]` to `[-1/c, inf]`; this is equivalent to the `inverse`
+  of this bijector.
+
+  This bijector is equivalent to the `Exp` bijector when `c=0`.
+  """
+
+  def __init__(self,
+               power=0.,
+               event_ndims=0,
+               validate_args=False,
+               name="power_transform"):
+    """Instantiates the `PowerTransform` bijector.
+
+    Args:
+      power: Python `float` scalar indicating the transform power, i.e.,
+        `Y = g(X) = (1 + X * c)**(1 / c)` where `c` is the `power`.
+      event_ndims: Python scalar indicating the number of dimensions associated
+        with a particular draw from the distribution.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+
+    Raises:
+      ValueError: if `power < 0` or is not known statically.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+    with self._name_scope("init", values=[power]):
+      power = tensor_util.constant_value(
+          ops.convert_to_tensor(power, name="power"))
+    if power is None or power < 0:
+      raise ValueError("`power` must be a non-negative TF constant.")
+    self._power = power
+    super(PowerTransform, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  @property
+  def power(self):
+    """The `c` in: `Y = g(X) = (1 + X * c)**(1 / c)`."""
+    return self._power
+
+  def _forward(self, x):
+    x = self._maybe_assert_valid_x(x)
+    if self.power == 0.:
+      return math_ops.exp(x)
+    # If large x accuracy is an issue, consider using:
+    # (1. + x * self.power)**(1. / self.power) when x >> 1.
+    return math_ops.exp(math_ops.log1p(x * self.power) / self.power)
+
+  def _inverse(self, y):
+    y = self._maybe_assert_valid_y(y)
+    if self.power == 0.:
+      return math_ops.log(y)
+    # If large y accuracy is an issue, consider using:
+    # (y**self.power - 1.) / self.power when y >> 1.
+    return math_ops.expm1(math_ops.log(y) * self.power) / self.power
+
+  def _inverse_log_det_jacobian(self, y):
+    y = self._maybe_assert_valid_y(y)
+    event_dims = self._event_dims_tensor(y)
+    return (self.power - 1.) * math_ops.reduce_sum(
+        math_ops.log(y), axis=event_dims)
+
+  def _forward_log_det_jacobian(self, x):
+    x = self._maybe_assert_valid_x(x)
+    event_dims = self._event_dims_tensor(x)
+    if self.power == 0.:
+      return math_ops.reduce_sum(x, axis=event_dims)
+    return (1. / self.power - 1.) * math_ops.reduce_sum(
+        math_ops.log1p(x * self.power),
+        axis=event_dims)
+
+  def _maybe_assert_valid_x(self, x):
+    if not self.validate_args or self.power == 0.:
+      return x
+    is_valid = check_ops.assert_non_negative(
+        1. + self.power * x,
+        message="Forward transformation input must be at least {}.".format(
+            -1. / self.power))
+    return control_flow_ops.with_dependencies([is_valid], x)
+
+  def _maybe_assert_valid_y(self, y):
+    if not self.validate_args:
+      return y
+    is_valid = check_ops.assert_positive(
+        y, message="Inverse transformation input must be greater than 0.")
+    return control_flow_ops.with_dependencies([is_valid], y)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/power_transform_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/power_transform_impl.py
deleted file mode 100644
index c37db61720..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/power_transform_impl.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""PowerTransform bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "PowerTransform",
-]
-
-
-class PowerTransform(bijector.Bijector):
-  """Compute `Y = g(X) = (1 + X * c)**(1 / c), X >= -1 / c`.
-
-  The [power transform](https://en.wikipedia.org/wiki/Power_transform) maps
-  inputs from `[0, inf]` to `[-1/c, inf]`; this is equivalent to the `inverse`
-  of this bijector.
-
-  This bijector is equivalent to the `Exp` bijector when `c=0`.
-  """
-
-  def __init__(self,
-               power=0.,
-               event_ndims=0,
-               validate_args=False,
-               name="power_transform"):
-    """Instantiates the `PowerTransform` bijector.
-
-    Args:
-      power: Python `float` scalar indicating the transform power, i.e.,
-        `Y = g(X) = (1 + X * c)**(1 / c)` where `c` is the `power`.
-      event_ndims: Python scalar indicating the number of dimensions associated
-        with a particular draw from the distribution.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-
-    Raises:
-      ValueError: if `power < 0` or is not known statically.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-    with self._name_scope("init", values=[power]):
-      power = tensor_util.constant_value(
-          ops.convert_to_tensor(power, name="power"))
-    if power is None or power < 0:
-      raise ValueError("`power` must be a non-negative TF constant.")
-    self._power = power
-    super(PowerTransform, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
-
-  @property
-  def power(self):
-    """The `c` in: `Y = g(X) = (1 + X * c)**(1 / c)`."""
-    return self._power
-
-  def _forward(self, x):
-    x = self._maybe_assert_valid_x(x)
-    if self.power == 0.:
-      return math_ops.exp(x)
-    # If large x accuracy is an issue, consider using:
-    # (1. + x * self.power)**(1. / self.power) when x >> 1.
-    return math_ops.exp(math_ops.log1p(x * self.power) / self.power)
-
-  def _inverse(self, y):
-    y = self._maybe_assert_valid_y(y)
-    if self.power == 0.:
-      return math_ops.log(y)
-    # If large y accuracy is an issue, consider using:
-    # (y**self.power - 1.) / self.power when y >> 1.
-    return math_ops.expm1(math_ops.log(y) * self.power) / self.power
-
-  def _inverse_log_det_jacobian(self, y):
-    y = self._maybe_assert_valid_y(y)
-    event_dims = self._event_dims_tensor(y)
-    return (self.power - 1.) * math_ops.reduce_sum(
-        math_ops.log(y), axis=event_dims)
-
-  def _forward_log_det_jacobian(self, x):
-    x = self._maybe_assert_valid_x(x)
-    event_dims = self._event_dims_tensor(x)
-    if self.power == 0.:
-      return math_ops.reduce_sum(x, axis=event_dims)
-    return (1. / self.power - 1.) * math_ops.reduce_sum(
-        math_ops.log1p(x * self.power),
-        axis=event_dims)
-
-  def _maybe_assert_valid_x(self, x):
-    if not self.validate_args or self.power == 0.:
-      return x
-    is_valid = check_ops.assert_non_negative(
-        1. + self.power * x,
-        message="Forward transformation input must be at least {}.".format(
-            -1. / self.power))
-    return control_flow_ops.with_dependencies([is_valid], x)
-
-  def _maybe_assert_valid_y(self, y):
-    if not self.validate_args:
-      return y
-    is_valid = check_ops.assert_positive(
-        y, message="Inverse transformation input must be greater than 0.")
-    return control_flow_ops.with_dependencies([is_valid], y)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
index 8997f7ab69..55eca06312 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
@@ -12,18 +12,303 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Reshape bijector."""
+"""Reshape bijectors."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.reshape_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = ["Reshape"]
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector as bijector_lib
 
-remove_undocumented(__name__, _allowed_symbols)
+
+__all__ = [
+    "Reshape",
+]
+
+
+def _static_ndims_from_shape(shape):
+  return shape.shape.with_rank_at_least(1)[0].value
+
+
+def _ndims_from_shape(shape):
+  return array_ops.shape(shape)[0]
+
+
+class Reshape(bijector_lib.Bijector):
+  """Reshapes the `event_shape` of a `Tensor`.
+
+  The semantics generally follow that of `tf.reshape()`, with
+  a few differences:
+
+  * The user must provide both the input and output shape, so that
+    the transformation can be inverted. If an input shape is not
+    specified, the default assumes a vector-shaped input, i.e.,
+    event_shape_in = (-1,).
+  * The `Reshape` bijector automatically broadcasts over the leftmost
+    dimensions of its input (`sample_shape` and `batch_shape`); only
+    the rightmost `event_ndims_in` dimensions are reshaped. The
+    number of dimensions to reshape is inferred from the provided
+    `event_shape_in` (`event_ndims_in = len(event_shape_in)`).
+
+  Example usage:
+  ```python
+
+  tfd = tf.contrib.distributions
+
+  r = tfd.bijectors.Reshape(event_shape_out=[1, -1])
+
+  r.forward([3., 4.])    # shape [2]
+  # ==> [[3., 4.]]       # shape [1, 2]
+
+  r.forward([[1., 2.], [3., 4.]])  # shape [2, 2]
+  # ==> [[[1., 2.]],
+  #      [[3., 4.]]]   # shape [2, 1, 2]
+
+  r.inverse([[3., 4.]])  # shape [1,2]
+  # ==> [3., 4.]         # shape [2]
+
+  r.forward_log_det_jacobian(any_value)
+  # ==> 0.
+
+  r.inverse_log_det_jacobian(any_value)
+  # ==> 0.
+  ```
+
+  """
+
+  def __init__(self, event_shape_out, event_shape_in=(-1,),
+               validate_args=False, name=None):
+    """Creates a `Reshape` bijector.
+
+    Args:
+      event_shape_out: An `int`-like vector-shaped `Tensor`
+        representing the event shape of the transformed output.
+      event_shape_in: An optional `int`-like vector-shape `Tensor`
+        representing the event shape of the input. This is required in
+        order to define inverse operations; the default of (-1,)
+        assumes a vector-shaped input.
+      validate_args: Python `bool` indicating whether arguments should
+        be checked for correctness.
+      name: Python `str`, name given to ops managed by this object.
+
+    Raises:
+      TypeError: if either `event_shape_in` or `event_shape_out` has
+        non-integer `dtype`.
+      ValueError: if either of `event_shape_in` or `event_shape_out`
+       has non-vector shape (`rank > 1`), or if their sizes do not
+       match.
+    """
+    with ops.name_scope(name, "reshape",
+                        values=[event_shape_out, event_shape_in]):
+
+      event_shape_out = ops.convert_to_tensor(event_shape_out,
+                                              name="event_shape_out",
+                                              preferred_dtype=dtypes.int32)
+      event_shape_in = ops.convert_to_tensor(event_shape_in,
+                                             name="event_shape_in",
+                                             preferred_dtype=dtypes.int32)
+
+      assertions = []
+      assertions.extend(self._maybe_check_valid_shape(
+          event_shape_out, validate_args))
+      assertions.extend(self._maybe_check_valid_shape(
+          event_shape_in, validate_args))
+
+      self._assertions = assertions
+      self._event_shape_in = event_shape_in
+      self._event_shape_out = event_shape_out
+
+      super(Reshape, self).__init__(is_constant_jacobian=True,
+                                    validate_args=validate_args,
+                                    name=name or "reshape")
+
+  def _maybe_check_valid_shape(self, shape, validate_args):
+    """Check that a shape Tensor is int-type and otherwise sane."""
+    if not shape.dtype.is_integer:
+      raise TypeError("{} dtype ({}) should be `int`-like.".format(
+          shape.op.name, shape.dtype.name))
+
+    assertions = []
+
+    ndims = array_ops.rank(shape)
+    ndims_ = tensor_util.constant_value(ndims)
+    if ndims_ is not None and ndims_ > 1:
+      raise ValueError("`{}` rank ({}) should be <= 1.".format(
+          shape.op.name, ndims_))
+    elif validate_args:
+      assertions.append(check_ops.assert_less_equal(
+          ndims, 1, message="`{}` rank should be <= 1.".format(shape.op.name)))
+
+    shape_ = tensor_util.constant_value_as_shape(shape)
+    if shape_.is_fully_defined():
+      es = np.int32(shape_.as_list())
+      if sum(es == -1) > 1:
+        raise ValueError(
+            "`{}` must have at most one `-1` (given {})"
+            .format(shape.op.name, es))
+      if np.any(es < -1):
+        raise ValueError(
+            "`{}` elements must be either positive integers or `-1`"
+            "(given {})."
+            .format(shape.op.name, es))
+    elif validate_args:
+      assertions.extend([
+          check_ops.assert_less_equal(
+              math_ops.reduce_sum(
+                  math_ops.cast(math_ops.equal(shape, -1), dtypes.int32)),
+              1,
+              message="`{}` elements must have at most one `-1`."
+              .format(shape.op.name)),
+          check_ops.assert_greater_equal(
+              shape, -1,
+              message="`{}` elements must be either positive integers or `-1`."
+              .format(shape.op.name)),
+      ])
+    return assertions
+
+  def _reshape_helper(self, x, event_shape_in, event_shape_out):
+    """Reshape only the event_shape of an input `Tensor`."""
+
+    event_ndims_in_ = _static_ndims_from_shape(event_shape_in)
+    event_ndims_in = _ndims_from_shape(event_shape_in)
+    x_ndims_, x_ndims = x.shape.ndims, array_ops.rank(x)
+
+    assertions = []
+
+    # Ensure x.event_shape is compatible with event_shape_in.
+    if (event_ndims_in_ is not None
+        and x_ndims_ is not None
+        and x.shape.with_rank_at_least(event_ndims_in_)[
+            x_ndims_-event_ndims_in_:].is_fully_defined()):
+      x_event_shape_, x_event_shape = [  # pylint: disable=unbalanced-tuple-unpacking
+          np.int32(x.shape[x_ndims_-event_ndims_in_:])]*2
+    else:
+      x_event_shape_, x_event_shape = (
+          None, array_ops.shape(x)[x_ndims-event_ndims_in:])
+
+    event_shape_in_ = tensor_util.constant_value(event_shape_in)
+
+    if x_event_shape_ is not None and event_shape_in_ is not None:
+      # Compare the shape dimensions that are fully specified in the
+      # input (i.e., for which event_shape_in is not -1). If x_event_shape
+      # matches along all of these dimensions, it is compatible with
+      # the desired input shape and any further mismatches (i.e.,
+      # imcompatibility with the desired *output* shape) will be
+      # caught inside of array_ops.reshape() below.
+      x_event_shape_specified_ = x_event_shape_[event_shape_in_ >= 0]
+      event_shape_in_specified_ = event_shape_in_[event_shape_in_ >= 0]
+      if not np.equal(x_event_shape_specified_,
+                      event_shape_in_specified_).all():
+        raise ValueError(
+            "Input `event_shape` does not match `event_shape_in` ({} vs {}).".
+            format(x_event_shape_, event_shape_in_))
+    elif self.validate_args:
+      # Similarly to the static case, we compare the shape dimensions
+      # that are fully specified in the input. We extract these
+      # dimensions using boolean_mask(), which requires that the mask
+      # have known ndims. We can assume that shape Tensors always have
+      # ndims==1 (this assumption is verified inside of
+      # _maybe_check_valid_shape), so the reshape operation is just a
+      # no-op that formally encodes this fact to make boolean_mask()
+      # happy.
+      event_shape_mask = array_ops.reshape(event_shape_in >= 0, [-1])
+      x_event_shape_specified = array_ops.boolean_mask(x_event_shape,
+                                                       event_shape_mask)
+      event_shape_in_specified = array_ops.boolean_mask(event_shape_in,
+                                                        event_shape_mask)
+      assertions.append(check_ops.assert_equal(
+          x_event_shape_specified, event_shape_in_specified,
+          message="Input `event_shape` does not match `event_shape_in`."))
+
+    if assertions:
+      x = control_flow_ops.with_dependencies(assertions, x)
+
+    # get the parts of shape(x) that will not change
+    sample_and_batch_shape = array_ops.shape(x)
+
+    ndims = (x.shape.ndims if x.shape.ndims is not None
+             else array_ops.rank(x))
+    sample_and_batch_shape = sample_and_batch_shape[
+        :(ndims - math_ops.abs(event_ndims_in))]
+
+    if (event_ndims_in_ is not None
+        and x_ndims_ is not None
+        and event_ndims_in_ == x_ndims_):
+      # Hack to allow forward/inverse_event_shape to do shape
+      # inference by calling this helper method with a dummy Tensor of
+      # shape event_shape_in. In this special case,
+      # sample_and_batch_shape will be empty so we can preserve static
+      # shape information by avoiding the concat operation below
+      # (which would be a no-op).
+      new_shape = event_shape_out
+    else:
+      new_shape = array_ops.concat(
+          [sample_and_batch_shape, event_shape_out], axis=0)
+
+    return array_ops.reshape(x, new_shape)
+
+  def _forward(self, x):
+    with ops.control_dependencies(self._assertions):
+      return self._reshape_helper(x,
+                                  self._event_shape_in,
+                                  self._event_shape_out)
+
+  def _inverse(self, y):
+    with ops.control_dependencies(self._assertions):
+      return self._reshape_helper(y,
+                                  self._event_shape_out,
+                                  self._event_shape_in)
+
+  def _inverse_log_det_jacobian(self, y):
+    with ops.control_dependencies(self._assertions):
+      return constant_op.constant(0., dtype=y.dtype)
+
+  def _forward_log_det_jacobian(self, x):
+    with ops.control_dependencies(self._assertions):
+      return constant_op.constant(0., dtype=x.dtype)
+
+  def _forward_event_shape(self, input_shape):
+    # NOTE: this method and the other *_event_shape* methods
+    # compute shape by explicit transformation of a dummy
+    # variable. This approach is not generally recommended because it
+    # bloats the graph and could in general trigger side effects.
+    #
+    # In this particular case of the Reshape bijector, the
+    # forward and inverse transforms have no side effects, and we
+    # believe the reduction in code complexity from delegating the
+    # heavy lifting to tf.reshape() is worth the added graph ops.
+    # However, you should think hard before implementing this approach
+    # in other Bijectors; it is strongly preferred to compute
+    # shapes explicitly whenever it's feasible to do so.
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=input_shape)
+      dummy_reshaped = self.forward(dummy)
+      return dummy_reshaped.shape
+
+  def _inverse_event_shape(self, output_shape):
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=output_shape)
+      dummy_reshaped = self.inverse(dummy)
+      return dummy_reshaped.shape
+
+  def _forward_event_shape_tensor(self, input_shape):
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=input_shape)
+      dummy_reshaped = self.forward(dummy)
+      return array_ops.shape(dummy_reshaped)
+
+  def _inverse_event_shape_tensor(self, output_shape):
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=output_shape)
+      dummy_reshaped = self.inverse(dummy)
+      return array_ops.shape(dummy_reshaped)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
deleted file mode 100644
index 55eca06312..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
+++ /dev/null
@@ -1,314 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Reshape bijectors."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector as bijector_lib
-
-
-__all__ = [
-    "Reshape",
-]
-
-
-def _static_ndims_from_shape(shape):
-  return shape.shape.with_rank_at_least(1)[0].value
-
-
-def _ndims_from_shape(shape):
-  return array_ops.shape(shape)[0]
-
-
-class Reshape(bijector_lib.Bijector):
-  """Reshapes the `event_shape` of a `Tensor`.
-
-  The semantics generally follow that of `tf.reshape()`, with
-  a few differences:
-
-  * The user must provide both the input and output shape, so that
-    the transformation can be inverted. If an input shape is not
-    specified, the default assumes a vector-shaped input, i.e.,
-    event_shape_in = (-1,).
-  * The `Reshape` bijector automatically broadcasts over the leftmost
-    dimensions of its input (`sample_shape` and `batch_shape`); only
-    the rightmost `event_ndims_in` dimensions are reshaped. The
-    number of dimensions to reshape is inferred from the provided
-    `event_shape_in` (`event_ndims_in = len(event_shape_in)`).
-
-  Example usage:
-  ```python
-
-  tfd = tf.contrib.distributions
-
-  r = tfd.bijectors.Reshape(event_shape_out=[1, -1])
-
-  r.forward([3., 4.])    # shape [2]
-  # ==> [[3., 4.]]       # shape [1, 2]
-
-  r.forward([[1., 2.], [3., 4.]])  # shape [2, 2]
-  # ==> [[[1., 2.]],
-  #      [[3., 4.]]]   # shape [2, 1, 2]
-
-  r.inverse([[3., 4.]])  # shape [1,2]
-  # ==> [3., 4.]         # shape [2]
-
-  r.forward_log_det_jacobian(any_value)
-  # ==> 0.
-
-  r.inverse_log_det_jacobian(any_value)
-  # ==> 0.
-  ```
-
-  """
-
-  def __init__(self, event_shape_out, event_shape_in=(-1,),
-               validate_args=False, name=None):
-    """Creates a `Reshape` bijector.
-
-    Args:
-      event_shape_out: An `int`-like vector-shaped `Tensor`
-        representing the event shape of the transformed output.
-      event_shape_in: An optional `int`-like vector-shape `Tensor`
-        representing the event shape of the input. This is required in
-        order to define inverse operations; the default of (-1,)
-        assumes a vector-shaped input.
-      validate_args: Python `bool` indicating whether arguments should
-        be checked for correctness.
-      name: Python `str`, name given to ops managed by this object.
-
-    Raises:
-      TypeError: if either `event_shape_in` or `event_shape_out` has
-        non-integer `dtype`.
-      ValueError: if either of `event_shape_in` or `event_shape_out`
-       has non-vector shape (`rank > 1`), or if their sizes do not
-       match.
-    """
-    with ops.name_scope(name, "reshape",
-                        values=[event_shape_out, event_shape_in]):
-
-      event_shape_out = ops.convert_to_tensor(event_shape_out,
-                                              name="event_shape_out",
-                                              preferred_dtype=dtypes.int32)
-      event_shape_in = ops.convert_to_tensor(event_shape_in,
-                                             name="event_shape_in",
-                                             preferred_dtype=dtypes.int32)
-
-      assertions = []
-      assertions.extend(self._maybe_check_valid_shape(
-          event_shape_out, validate_args))
-      assertions.extend(self._maybe_check_valid_shape(
-          event_shape_in, validate_args))
-
-      self._assertions = assertions
-      self._event_shape_in = event_shape_in
-      self._event_shape_out = event_shape_out
-
-      super(Reshape, self).__init__(is_constant_jacobian=True,
-                                    validate_args=validate_args,
-                                    name=name or "reshape")
-
-  def _maybe_check_valid_shape(self, shape, validate_args):
-    """Check that a shape Tensor is int-type and otherwise sane."""
-    if not shape.dtype.is_integer:
-      raise TypeError("{} dtype ({}) should be `int`-like.".format(
-          shape.op.name, shape.dtype.name))
-
-    assertions = []
-
-    ndims = array_ops.rank(shape)
-    ndims_ = tensor_util.constant_value(ndims)
-    if ndims_ is not None and ndims_ > 1:
-      raise ValueError("`{}` rank ({}) should be <= 1.".format(
-          shape.op.name, ndims_))
-    elif validate_args:
-      assertions.append(check_ops.assert_less_equal(
-          ndims, 1, message="`{}` rank should be <= 1.".format(shape.op.name)))
-
-    shape_ = tensor_util.constant_value_as_shape(shape)
-    if shape_.is_fully_defined():
-      es = np.int32(shape_.as_list())
-      if sum(es == -1) > 1:
-        raise ValueError(
-            "`{}` must have at most one `-1` (given {})"
-            .format(shape.op.name, es))
-      if np.any(es < -1):
-        raise ValueError(
-            "`{}` elements must be either positive integers or `-1`"
-            "(given {})."
-            .format(shape.op.name, es))
-    elif validate_args:
-      assertions.extend([
-          check_ops.assert_less_equal(
-              math_ops.reduce_sum(
-                  math_ops.cast(math_ops.equal(shape, -1), dtypes.int32)),
-              1,
-              message="`{}` elements must have at most one `-1`."
-              .format(shape.op.name)),
-          check_ops.assert_greater_equal(
-              shape, -1,
-              message="`{}` elements must be either positive integers or `-1`."
-              .format(shape.op.name)),
-      ])
-    return assertions
-
-  def _reshape_helper(self, x, event_shape_in, event_shape_out):
-    """Reshape only the event_shape of an input `Tensor`."""
-
-    event_ndims_in_ = _static_ndims_from_shape(event_shape_in)
-    event_ndims_in = _ndims_from_shape(event_shape_in)
-    x_ndims_, x_ndims = x.shape.ndims, array_ops.rank(x)
-
-    assertions = []
-
-    # Ensure x.event_shape is compatible with event_shape_in.
-    if (event_ndims_in_ is not None
-        and x_ndims_ is not None
-        and x.shape.with_rank_at_least(event_ndims_in_)[
-            x_ndims_-event_ndims_in_:].is_fully_defined()):
-      x_event_shape_, x_event_shape = [  # pylint: disable=unbalanced-tuple-unpacking
-          np.int32(x.shape[x_ndims_-event_ndims_in_:])]*2
-    else:
-      x_event_shape_, x_event_shape = (
-          None, array_ops.shape(x)[x_ndims-event_ndims_in:])
-
-    event_shape_in_ = tensor_util.constant_value(event_shape_in)
-
-    if x_event_shape_ is not None and event_shape_in_ is not None:
-      # Compare the shape dimensions that are fully specified in the
-      # input (i.e., for which event_shape_in is not -1). If x_event_shape
-      # matches along all of these dimensions, it is compatible with
-      # the desired input shape and any further mismatches (i.e.,
-      # imcompatibility with the desired *output* shape) will be
-      # caught inside of array_ops.reshape() below.
-      x_event_shape_specified_ = x_event_shape_[event_shape_in_ >= 0]
-      event_shape_in_specified_ = event_shape_in_[event_shape_in_ >= 0]
-      if not np.equal(x_event_shape_specified_,
-                      event_shape_in_specified_).all():
-        raise ValueError(
-            "Input `event_shape` does not match `event_shape_in` ({} vs {}).".
-            format(x_event_shape_, event_shape_in_))
-    elif self.validate_args:
-      # Similarly to the static case, we compare the shape dimensions
-      # that are fully specified in the input. We extract these
-      # dimensions using boolean_mask(), which requires that the mask
-      # have known ndims. We can assume that shape Tensors always have
-      # ndims==1 (this assumption is verified inside of
-      # _maybe_check_valid_shape), so the reshape operation is just a
-      # no-op that formally encodes this fact to make boolean_mask()
-      # happy.
-      event_shape_mask = array_ops.reshape(event_shape_in >= 0, [-1])
-      x_event_shape_specified = array_ops.boolean_mask(x_event_shape,
-                                                       event_shape_mask)
-      event_shape_in_specified = array_ops.boolean_mask(event_shape_in,
-                                                        event_shape_mask)
-      assertions.append(check_ops.assert_equal(
-          x_event_shape_specified, event_shape_in_specified,
-          message="Input `event_shape` does not match `event_shape_in`."))
-
-    if assertions:
-      x = control_flow_ops.with_dependencies(assertions, x)
-
-    # get the parts of shape(x) that will not change
-    sample_and_batch_shape = array_ops.shape(x)
-
-    ndims = (x.shape.ndims if x.shape.ndims is not None
-             else array_ops.rank(x))
-    sample_and_batch_shape = sample_and_batch_shape[
-        :(ndims - math_ops.abs(event_ndims_in))]
-
-    if (event_ndims_in_ is not None
-        and x_ndims_ is not None
-        and event_ndims_in_ == x_ndims_):
-      # Hack to allow forward/inverse_event_shape to do shape
-      # inference by calling this helper method with a dummy Tensor of
-      # shape event_shape_in. In this special case,
-      # sample_and_batch_shape will be empty so we can preserve static
-      # shape information by avoiding the concat operation below
-      # (which would be a no-op).
-      new_shape = event_shape_out
-    else:
-      new_shape = array_ops.concat(
-          [sample_and_batch_shape, event_shape_out], axis=0)
-
-    return array_ops.reshape(x, new_shape)
-
-  def _forward(self, x):
-    with ops.control_dependencies(self._assertions):
-      return self._reshape_helper(x,
-                                  self._event_shape_in,
-                                  self._event_shape_out)
-
-  def _inverse(self, y):
-    with ops.control_dependencies(self._assertions):
-      return self._reshape_helper(y,
-                                  self._event_shape_out,
-                                  self._event_shape_in)
-
-  def _inverse_log_det_jacobian(self, y):
-    with ops.control_dependencies(self._assertions):
-      return constant_op.constant(0., dtype=y.dtype)
-
-  def _forward_log_det_jacobian(self, x):
-    with ops.control_dependencies(self._assertions):
-      return constant_op.constant(0., dtype=x.dtype)
-
-  def _forward_event_shape(self, input_shape):
-    # NOTE: this method and the other *_event_shape* methods
-    # compute shape by explicit transformation of a dummy
-    # variable. This approach is not generally recommended because it
-    # bloats the graph and could in general trigger side effects.
-    #
-    # In this particular case of the Reshape bijector, the
-    # forward and inverse transforms have no side effects, and we
-    # believe the reduction in code complexity from delegating the
-    # heavy lifting to tf.reshape() is worth the added graph ops.
-    # However, you should think hard before implementing this approach
-    # in other Bijectors; it is strongly preferred to compute
-    # shapes explicitly whenever it's feasible to do so.
-    with ops.control_dependencies(self._assertions):
-      dummy = array_ops.zeros(dtype=dtypes.float32, shape=input_shape)
-      dummy_reshaped = self.forward(dummy)
-      return dummy_reshaped.shape
-
-  def _inverse_event_shape(self, output_shape):
-    with ops.control_dependencies(self._assertions):
-      dummy = array_ops.zeros(dtype=dtypes.float32, shape=output_shape)
-      dummy_reshaped = self.inverse(dummy)
-      return dummy_reshaped.shape
-
-  def _forward_event_shape_tensor(self, input_shape):
-    with ops.control_dependencies(self._assertions):
-      dummy = array_ops.zeros(dtype=dtypes.float32, shape=input_shape)
-      dummy_reshaped = self.forward(dummy)
-      return array_ops.shape(dummy_reshaped)
-
-  def _inverse_event_shape_tensor(self, output_shape):
-    with ops.control_dependencies(self._assertions):
-      dummy = array_ops.zeros(dtype=dtypes.float32, shape=output_shape)
-      dummy_reshaped = self.inverse(dummy)
-      return array_ops.shape(dummy_reshaped)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py
index c20e76c0b7..a640dfe7df 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py
@@ -18,12 +18,31 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["Sigmoid"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Sigmoid",
+]
+
+
+class Sigmoid(bijector.Bijector):
+  """Bijector which computes `Y = g(X) = 1 / (1 + exp(-X))`."""
+
+  def __init__(self, validate_args=False, name="sigmoid"):
+    super(Sigmoid, self).__init__(
+        event_ndims=0, validate_args=validate_args, name=name)
+
+  def _forward(self, x):
+    return math_ops.sigmoid(x)
+
+  def _inverse(self, y):
+    return math_ops.log(y) - math_ops.log1p(-y)
+
+  def _inverse_log_det_jacobian(self, y):
+    return -math_ops.log(y) - math_ops.log1p(-y)
+
+  def _forward_log_det_jacobian(self, x):
+    return -nn_ops.softplus(-x) - nn_ops.softplus(x)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py
index 448125230d..223bc9d042 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py
@@ -18,12 +18,22 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid_centered_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.contrib.distributions.python.ops.bijectors import softmax_centered
 
-_allowed_symbols = ["SigmoidCentered"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "SigmoidCentered",
+]
+
+
+class SigmoidCentered(softmax_centered.SoftmaxCentered):
+  """Bijector which computes Y = g(X) = exp([X 0]) / (1 + exp(-X)).
+
+  Equivalent to: `bijector.SoftmaxCentered(event_ndims=0)`.
+
+  See `bijector.SoftmaxCentered` for more details.
+  """
+
+  def __init__(self, validate_args=False, name="sigmoid_centered"):
+    super(SigmoidCentered, self).__init__(
+        event_ndims=0, validate_args=validate_args, name=name)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered_impl.py
deleted file mode 100644
index 223bc9d042..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered_impl.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""SigmoidCentered bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.distributions.python.ops.bijectors import softmax_centered
-
-
-__all__ = [
-    "SigmoidCentered",
-]
-
-
-class SigmoidCentered(softmax_centered.SoftmaxCentered):
-  """Bijector which computes Y = g(X) = exp([X 0]) / (1 + exp(-X)).
-
-  Equivalent to: `bijector.SoftmaxCentered(event_ndims=0)`.
-
-  See `bijector.SoftmaxCentered` for more details.
-  """
-
-  def __init__(self, validate_args=False, name="sigmoid_centered"):
-    super(SigmoidCentered, self).__init__(
-        event_ndims=0, validate_args=validate_args, name=name)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_impl.py
deleted file mode 100644
index a640dfe7df..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_impl.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Sigmoid bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "Sigmoid",
-]
-
-
-class Sigmoid(bijector.Bijector):
-  """Bijector which computes `Y = g(X) = 1 / (1 + exp(-X))`."""
-
-  def __init__(self, validate_args=False, name="sigmoid"):
-    super(Sigmoid, self).__init__(
-        event_ndims=0, validate_args=validate_args, name=name)
-
-  def _forward(self, x):
-    return math_ops.sigmoid(x)
-
-  def _inverse(self, y):
-    return math_ops.log(y) - math_ops.log1p(-y)
-
-  def _inverse_log_det_jacobian(self, y):
-    return -math_ops.log(y) - math_ops.log1p(-y)
-
-  def _forward_log_det_jacobian(self, x):
-    return -nn_ops.softplus(-x) - nn_ops.softplus(x)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py
index b3cf03c246..3a75e4ae94 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py
@@ -18,12 +18,162 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.sinh_arcsinh_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = ["SinhArcsinh"]
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "SinhArcsinh",
+]
+
+
+def _sqrtx2p1(x):
+  """Implementation of `sqrt(1 + x**2)` which is stable despite large `x`."""
+  return array_ops.where(
+      math_ops.abs(x) * np.sqrt(np.finfo(x.dtype.as_numpy_dtype).eps) <= 1.,
+      math_ops.sqrt(x**2. + 1.),
+      # For large x, calculating x**2 can overflow. This can be alleviated by
+      # considering:
+      # sqrt(1 + x**2)
+      # = exp(0.5 log(1 + x**2))
+      # = exp(0.5 log(x**2 * (1 + x**-2)))
+      # = exp(log(x) + 0.5 * log(1 + x**-2))
+      # = |x| * exp(0.5 log(1 + x**-2))
+      # = |x| * sqrt(1 + x**-2)
+      # We omit the last term in this approximation.
+      # When |x| > 1 / sqrt(machineepsilon), the second term will be 1,
+      # due to sqrt(1 + x**-2) = 1. This is also true with the gradient term,
+      # and higher order gradients, since the first order derivative of
+      # sqrt(1 + x**-2) is -2 * x**-3 / (1 + x**-2) = -2 / (x**3 + x),
+      # and all nth-order derivatives will be O(x**-(n + 2)). This makes any
+      # gradient terms that contain any derivatives of sqrt(1 + x**-2) vanish.
+      math_ops.abs(x))
+
+
+class SinhArcsinh(bijector.Bijector):
+  """Compute `Y = g(X) = Sinh( (Arcsinh(X) + skewness) * tailweight )`.
+
+  For `skewness in (-inf, inf)` and `tailweight in (0, inf)`, this
+  transformation is a
+  diffeomorphism of the real line `(-inf, inf)`.  The inverse transform is
+  `X = g^{-1}(Y) = Sinh( ArcSinh(Y) / tailweight - skewness )`.
+
+  The `SinhArcsinh` transformation of the Normal is described in
+  [Sinh-arcsinh distributions](https://www.jstor.org/stable/27798865)
+  This Bijector allows a similar transformation of any distribution supported on
+  `(-inf, inf)`.
+
+  #### Meaning of the parameters
+
+  * If `skewness = 0` and `tailweight = 1`, this transform is the identity.
+  * Positive (negative) `skewness` leads to positive (negative) skew.
+    * positive skew means, for unimodal `X` centered at zero, the mode of `Y` is
+      "tilted" to the right.
+    * positive skew means positive values of `Y` become more likely, and
+      negative values become less likely.
+  * Larger (smaller) `tailweight` leads to fatter (thinner) tails.
+    * Fatter tails mean larger values of `|Y|` become more likely.
+    * If `X` is a unit Normal, `tailweight < 1` leads to a distribution that is
+      "flat" around `Y = 0`, and a very steep drop-off in the tails.
+    * If `X` is a unit Normal, `tailweight > 1` leads to a distribution more
+      peaked at the mode with heavier tails.
+
+  To see the argument about the tails, note that for `|X| >> 1` and
+  `|X| >> (|skewness| * tailweight)**tailweight`, we have
+  `Y approx 0.5 X**tailweight e**(sign(X) skewness * tailweight)`.
+  """
+
+  def __init__(self,
+               skewness=None,
+               tailweight=None,
+               event_ndims=0,
+               validate_args=False,
+               name="SinhArcsinh"):
+    """Instantiates the `SinhArcsinh` bijector.
+
+    Args:
+      skewness:  Skewness parameter.  Float-type `Tensor`.  Default is `0`
+        of type `float32`.
+      tailweight:  Tailweight parameter.  Positive `Tensor` of same `dtype` as
+        `skewness` and broadcastable `shape`.  Default is `1` of type `float32`.
+      event_ndims: Python scalar indicating the number of dimensions associated
+        with a particular draw from the distribution.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+    with self._name_scope("init", values=[skewness, tailweight]):
+      tailweight = 1. if tailweight is None else tailweight
+      skewness = 0. if skewness is None else skewness
+      self._skewness = ops.convert_to_tensor(
+          skewness, name="skewness")
+      self._tailweight = ops.convert_to_tensor(
+          tailweight, name="tailweight", dtype=self._skewness.dtype)
+      check_ops.assert_same_float_dtype([self._skewness, self._tailweight])
+      if validate_args:
+        self._tailweight = control_flow_ops.with_dependencies([
+            check_ops.assert_positive(
+                self._tailweight,
+                message="Argument tailweight was not positive")
+        ], self._tailweight)
+    super(SinhArcsinh, self).__init__(
+        event_ndims=event_ndims, validate_args=validate_args, name=name)
+
+  @property
+  def skewness(self):
+    """The `skewness` in: `Y  = Sinh((Arcsinh(X) + skewness) * tailweight)`."""
+    return self._skewness
+
+  @property
+  def tailweight(self):
+    """The `tailweight` in: `Y = Sinh((Arcsinh(X) + skewness) * tailweight)`."""
+    return self._tailweight
+
+  def _forward(self, x):
+    return math_ops.sinh((math_ops.asinh(x) + self.skewness) * self.tailweight)
+
+  def _inverse(self, y):
+    return math_ops.sinh(math_ops.asinh(y) / self.tailweight - self.skewness)
+
+  def _inverse_log_det_jacobian(self, y):
+    # x = sinh(arcsinh(y) / tailweight - skewness)
+    # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1),
+    # dx/dy
+    # = cosh(arcsinh(y) / tailweight - skewness)
+    #     / (tailweight * sqrt(y**2 + 1))
+    event_dims = self._event_dims_tensor(y)
+    return math_ops.reduce_sum(
+        # This is computed inside the log to avoid catastrophic cancellations
+        # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1).
+        math_ops.log(math_ops.cosh(
+            math_ops.asinh(y) / self.tailweight - self.skewness)
+                     # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
+                     # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x).
+                     / _sqrtx2p1(y))
+        - math_ops.log(self.tailweight),
+        axis=event_dims)
+
+  def _forward_log_det_jacobian(self, x):
+    # y = sinh((arcsinh(x) + skewness) * tailweight)
+    # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1),
+    # dy/dx
+    # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1)
+    event_dims = self._event_dims_tensor(x)
+    return math_ops.reduce_sum(
+        # This is computed inside the log to avoid catastrophic cancellations
+        # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1).
+        math_ops.log(math_ops.cosh(
+            (math_ops.asinh(x) + self.skewness) * self.tailweight)
+                     # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
+                     # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x).
+                     / _sqrtx2p1(x))
+        + math_ops.log(self.tailweight),
+        axis=event_dims)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py
deleted file mode 100644
index 3a75e4ae94..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""SinhArcsinh bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-__all__ = [
-    "SinhArcsinh",
-]
-
-
-def _sqrtx2p1(x):
-  """Implementation of `sqrt(1 + x**2)` which is stable despite large `x`."""
-  return array_ops.where(
-      math_ops.abs(x) * np.sqrt(np.finfo(x.dtype.as_numpy_dtype).eps) <= 1.,
-      math_ops.sqrt(x**2. + 1.),
-      # For large x, calculating x**2 can overflow. This can be alleviated by
-      # considering:
-      # sqrt(1 + x**2)
-      # = exp(0.5 log(1 + x**2))
-      # = exp(0.5 log(x**2 * (1 + x**-2)))
-      # = exp(log(x) + 0.5 * log(1 + x**-2))
-      # = |x| * exp(0.5 log(1 + x**-2))
-      # = |x| * sqrt(1 + x**-2)
-      # We omit the last term in this approximation.
-      # When |x| > 1 / sqrt(machineepsilon), the second term will be 1,
-      # due to sqrt(1 + x**-2) = 1. This is also true with the gradient term,
-      # and higher order gradients, since the first order derivative of
-      # sqrt(1 + x**-2) is -2 * x**-3 / (1 + x**-2) = -2 / (x**3 + x),
-      # and all nth-order derivatives will be O(x**-(n + 2)). This makes any
-      # gradient terms that contain any derivatives of sqrt(1 + x**-2) vanish.
-      math_ops.abs(x))
-
-
-class SinhArcsinh(bijector.Bijector):
-  """Compute `Y = g(X) = Sinh( (Arcsinh(X) + skewness) * tailweight )`.
-
-  For `skewness in (-inf, inf)` and `tailweight in (0, inf)`, this
-  transformation is a
-  diffeomorphism of the real line `(-inf, inf)`.  The inverse transform is
-  `X = g^{-1}(Y) = Sinh( ArcSinh(Y) / tailweight - skewness )`.
-
-  The `SinhArcsinh` transformation of the Normal is described in
-  [Sinh-arcsinh distributions](https://www.jstor.org/stable/27798865)
-  This Bijector allows a similar transformation of any distribution supported on
-  `(-inf, inf)`.
-
-  #### Meaning of the parameters
-
-  * If `skewness = 0` and `tailweight = 1`, this transform is the identity.
-  * Positive (negative) `skewness` leads to positive (negative) skew.
-    * positive skew means, for unimodal `X` centered at zero, the mode of `Y` is
-      "tilted" to the right.
-    * positive skew means positive values of `Y` become more likely, and
-      negative values become less likely.
-  * Larger (smaller) `tailweight` leads to fatter (thinner) tails.
-    * Fatter tails mean larger values of `|Y|` become more likely.
-    * If `X` is a unit Normal, `tailweight < 1` leads to a distribution that is
-      "flat" around `Y = 0`, and a very steep drop-off in the tails.
-    * If `X` is a unit Normal, `tailweight > 1` leads to a distribution more
-      peaked at the mode with heavier tails.
-
-  To see the argument about the tails, note that for `|X| >> 1` and
-  `|X| >> (|skewness| * tailweight)**tailweight`, we have
-  `Y approx 0.5 X**tailweight e**(sign(X) skewness * tailweight)`.
-  """
-
-  def __init__(self,
-               skewness=None,
-               tailweight=None,
-               event_ndims=0,
-               validate_args=False,
-               name="SinhArcsinh"):
-    """Instantiates the `SinhArcsinh` bijector.
-
-    Args:
-      skewness:  Skewness parameter.  Float-type `Tensor`.  Default is `0`
-        of type `float32`.
-      tailweight:  Tailweight parameter.  Positive `Tensor` of same `dtype` as
-        `skewness` and broadcastable `shape`.  Default is `1` of type `float32`.
-      event_ndims: Python scalar indicating the number of dimensions associated
-        with a particular draw from the distribution.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-    with self._name_scope("init", values=[skewness, tailweight]):
-      tailweight = 1. if tailweight is None else tailweight
-      skewness = 0. if skewness is None else skewness
-      self._skewness = ops.convert_to_tensor(
-          skewness, name="skewness")
-      self._tailweight = ops.convert_to_tensor(
-          tailweight, name="tailweight", dtype=self._skewness.dtype)
-      check_ops.assert_same_float_dtype([self._skewness, self._tailweight])
-      if validate_args:
-        self._tailweight = control_flow_ops.with_dependencies([
-            check_ops.assert_positive(
-                self._tailweight,
-                message="Argument tailweight was not positive")
-        ], self._tailweight)
-    super(SinhArcsinh, self).__init__(
-        event_ndims=event_ndims, validate_args=validate_args, name=name)
-
-  @property
-  def skewness(self):
-    """The `skewness` in: `Y  = Sinh((Arcsinh(X) + skewness) * tailweight)`."""
-    return self._skewness
-
-  @property
-  def tailweight(self):
-    """The `tailweight` in: `Y = Sinh((Arcsinh(X) + skewness) * tailweight)`."""
-    return self._tailweight
-
-  def _forward(self, x):
-    return math_ops.sinh((math_ops.asinh(x) + self.skewness) * self.tailweight)
-
-  def _inverse(self, y):
-    return math_ops.sinh(math_ops.asinh(y) / self.tailweight - self.skewness)
-
-  def _inverse_log_det_jacobian(self, y):
-    # x = sinh(arcsinh(y) / tailweight - skewness)
-    # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1),
-    # dx/dy
-    # = cosh(arcsinh(y) / tailweight - skewness)
-    #     / (tailweight * sqrt(y**2 + 1))
-    event_dims = self._event_dims_tensor(y)
-    return math_ops.reduce_sum(
-        # This is computed inside the log to avoid catastrophic cancellations
-        # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1).
-        math_ops.log(math_ops.cosh(
-            math_ops.asinh(y) / self.tailweight - self.skewness)
-                     # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
-                     # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x).
-                     / _sqrtx2p1(y))
-        - math_ops.log(self.tailweight),
-        axis=event_dims)
-
-  def _forward_log_det_jacobian(self, x):
-    # y = sinh((arcsinh(x) + skewness) * tailweight)
-    # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1),
-    # dy/dx
-    # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1)
-    event_dims = self._event_dims_tensor(x)
-    return math_ops.reduce_sum(
-        # This is computed inside the log to avoid catastrophic cancellations
-        # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1).
-        math_ops.log(math_ops.cosh(
-            (math_ops.asinh(x) + self.skewness) * self.tailweight)
-                     # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
-                     # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x).
-                     / _sqrtx2p1(x))
-        + math_ops.log(self.tailweight),
-        axis=event_dims)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py
index be6608f978..e4a1d3dde2 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py
@@ -18,12 +18,232 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = ["SoftmaxCentered"]
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops.distributions import bijector
 
-remove_undocumented(__name__, _allowed_symbols)
+
+__all__ = [
+    "SoftmaxCentered",
+]
+
+
+class SoftmaxCentered(bijector.Bijector):
+  """Bijector which computes `Y = g(X) = exp([X 0]) / sum(exp([X 0]))`.
+
+  To implement [softmax](https://en.wikipedia.org/wiki/Softmax_function) as a
+  bijection, the forward transformation appends a value to the input and the
+  inverse removes this coordinate. The appended coordinate represents a pivot,
+  e.g., `softmax(x) = exp(x-c) / sum(exp(x-c))` where `c` is the implicit last
+  coordinate.
+
+  Because we append a coordinate, this bijector only supports `event_ndim in [0,
+  1]`, i.e., scalars and vectors.
+
+  Example Use:
+
+  ```python
+  bijector.SoftmaxCentered(event_ndims=1).forward(tf.log([2, 3, 4]))
+  # Result: [0.2, 0.3, 0.4, 0.1]
+  # Extra result: 0.1
+
+  bijector.SoftmaxCentered(event_ndims=1).inverse([0.2, 0.3, 0.4, 0.1])
+  # Result: tf.log([2, 3, 4])
+  # Extra coordinate removed.
+  ```
+
+  At first blush it may seem like the [Invariance of domain](
+  https://en.wikipedia.org/wiki/Invariance_of_domain) theorem implies this
+  implementation is not a bijection. However, the appended dimension
+  makes the (forward) image non-open and the theorem does not directly apply.
+  """
+
+  def __init__(self,
+               event_ndims=0,
+               validate_args=False,
+               name="softmax_centered"):
+    self._graph_parents = []
+    self._name = name
+    with self._name_scope("init", values=[event_ndims]):
+      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+      event_ndims = tensor_util.constant_value(event_ndims)
+      if event_ndims is None or event_ndims not in [0, 1]:
+        raise ValueError("`event_ndims` must be a TF constant which is 0 or 1")
+    self._static_event_ndims = event_ndims
+    super(SoftmaxCentered, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  def _forward_event_shape(self, input_shape):
+    if input_shape.ndims is None:
+      return input_shape
+    if input_shape.ndims != self._static_event_ndims:
+      raise ValueError("input_shape.dims = %d != %d" %
+                       (input_shape.ndims, self._static_event_ndims))
+    if input_shape.ndims == 0:
+      return tensor_shape.TensorShape([2])
+    if input_shape.ndims == 1:
+      return tensor_shape.TensorShape(input_shape[0] + 1)
+    # Unreachable code:
+    raise ValueError("event_ndims = %d must be 0 or 1" % input_shape.ndims)
+
+  def _forward_event_shape_tensor(self, input_shape):
+    ndims = array_ops.shape(input_shape)
+    if self.validate_args:
+      # It is not possible for a negative shape so we need only check <= 1.
+      is_zero_or_one = check_ops.assert_equal(
+          ndims, 0 if self._static_event_ndims == 0 else 1,
+          message="event_ndims must be 0 or 1")
+      ndims = control_flow_ops.with_dependencies([is_zero_or_one], ndims)
+    if self._static_event_ndims == 0:
+      return ops.convert_to_tensor(
+          [2], dtype=dtypes.int32, name="output_shape")
+    return input_shape + 1
+
+  def _inverse_event_shape(self, output_shape):
+    if output_shape.ndims is None:
+      return output_shape
+    if output_shape.ndims != 1:
+      raise ValueError("output_shape.ndims = %d != 1" % output_shape.ndims)
+    if self._static_event_ndims == 0:
+      return tensor_shape.TensorShape([])
+    return tensor_shape.TensorShape(output_shape[0] - 1)
+
+  def _inverse_event_shape_tensor(self, output_shape):
+    ndims = array_ops.shape(output_shape)[0]
+    if self.validate_args:
+      # It is not possible for a negative shape so we need only check <= 1.
+      is_one = check_ops.assert_equal(
+          ndims, 1, message="event_ndims must be 1")
+      ndims = control_flow_ops.with_dependencies([is_one], ndims)
+    if self._static_event_ndims == 0:
+      return ops.convert_to_tensor([], dtype=dtypes.int32, name="output_shape")
+    return array_ops.expand_dims(output_shape[0] - 1, dim=0)
+
+  def _forward(self, x):
+    # Pad the last dim with a zeros vector. We need this because it lets us
+    # infer the scale in the inverse function.
+    y = array_ops.expand_dims(x, dim=-1) if self._static_event_ndims == 0 else x
+    ndims = _get_ndims(y)
+    y = array_ops.pad(y, paddings=array_ops.one_hot(indices=[-1, ndims - 1],
+                                                    depth=ndims,
+                                                    axis=0,
+                                                    dtype=dtypes.int32))
+    # Set shape hints.
+    if x.shape.ndims is not None:
+      shape = x.shape.as_list()
+      if self._static_event_ndims == 0:
+        shape += [2]
+      elif shape[-1] is not None:
+        shape[-1] += 1
+      shape = tensor_shape.TensorShape(shape)
+      y.shape.assert_is_compatible_with(shape)
+      y.set_shape(shape)
+
+    # Since we only support event_ndims in [0, 1] and we do padding, we always
+    # reduce over the last dimension, i.e., dim=-1 (which is the default).
+    return nn_ops.softmax(y)
+
+  def _inverse(self, y):
+    # To derive the inverse mapping note that:
+    #   y[i] = exp(x[i]) / normalization
+    # and
+    #   y[end] = 1 / normalization.
+    # Thus:
+    # x[i] = log(exp(x[i])) - log(y[end]) - log(normalization)
+    #      = log(exp(x[i])/normalization) - log(y[end])
+    #      = log(y[i]) - log(y[end])
+    shape = (np.asarray(y.shape.as_list(), dtype=np.int32)
+             if y.shape.is_fully_defined()
+             else array_ops.shape(y, name="shape"))
+    ndims = _get_ndims(y)
+
+    # Do this first to make sure CSE catches that it'll happen again in
+    # _inverse_log_det_jacobian.
+    x = math_ops.log(y)
+
+    # We now extract the last coordinate of the rightmost dimension.
+    # Our trick is to slice from [0,0,...,shape[-1]-1] to shape[:-1]+[1].
+    begin = array_ops.one_hot(indices=ndims-1,
+                              depth=ndims,
+                              on_value=shape[-1]-np.array(1, dtype=shape.dtype),
+                              dtype=shape.dtype)
+    size = array_ops.concat([shape[:-1], np.asarray([1], dtype=shape.dtype)], 0)
+    log_normalization = -array_ops.strided_slice(x, begin, begin + size)
+
+    # Here we slice out all but the last coordinate; see above for idea.
+    begin = array_ops.zeros_like(shape)
+    size = array_ops.concat([shape[:-1], [shape[-1] - 1]], 0)
+    x = array_ops.strided_slice(x, begin, begin + size)
+
+    x += log_normalization
+
+    if self._static_event_ndims == 0:
+      x = array_ops.squeeze(x, squeeze_dims=[ndims-1])
+
+    # Set shape hints.
+    if y.shape.ndims is not None:
+      shape = y.shape.as_list()
+      if self._static_event_ndims == 0:
+        shape = shape[:-1]
+      elif shape[-1] is not None:
+        shape[-1] -= 1
+      shape = tensor_shape.TensorShape(shape)
+      x.shape.assert_is_compatible_with(shape)
+      x.set_shape(shape)
+
+    return x
+
+  def _inverse_log_det_jacobian(self, y):
+    # WLOG, consider the vector case:
+    #   x = log(y[:-1]) - log(y[-1])
+    # where,
+    #   y[-1] = 1 - sum(y[:-1]).
+    # We have:
+    #   det{ dX/dY } = det{ diag(1 ./ y[:-1]) + 1 / y[-1] }
+    #                = det{ inv{ diag(y[:-1]) - y[:-1]' y[:-1] } }   (1)
+    #                = 1 / det{ diag(y[:-1]) - y[:-1]' y[:-1] }
+    #                = 1 / { (1 + y[:-1]' inv(diag(y[:-1])) y[:-1]) *
+    #                        det(diag(y[:-1])) }                     (2)
+    #                = 1 / { y[-1] prod(y[:-1]) }
+    #                = 1 / prod(y)
+    # (1) - https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
+    #       or by noting that det{ dX/dY } = 1 / det{ dY/dX } from Bijector
+    #       docstring "Tip".
+    # (2) - https://en.wikipedia.org/wiki/Matrix_determinant_lemma
+    return -math_ops.reduce_sum(math_ops.log(y), axis=-1)
+
+  def _forward_log_det_jacobian(self, x):
+    if self._static_event_ndims == 0:
+      return x - 2. * nn_ops.softplus(x)
+    else:
+      # This code is similar to nn_ops.log_softmax but different because we have
+      # an implicit zero column to handle. I.e., instead of:
+      #   reduce_sum(logits - reduce_sum(exp(logits), dim))
+      # we must do:
+      #   log_normalization = 1 + reduce_sum(exp(logits))
+      #   -log_normalization + reduce_sum(logits - log_normalization)
+      log_normalization = nn_ops.softplus(
+          math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True))
+      fldj = (-log_normalization +
+              math_ops.reduce_sum(x - log_normalization,
+                                  axis=-1,
+                                  keep_dims=True))
+      return array_ops.squeeze(fldj, squeeze_dims=-1)
+
+
+def _get_ndims(x):
+  """Returns `ndims`, statically if possible."""
+  if x.shape.ndims is not None:
+    return x.shape.ndims
+  return array_ops.rank(x, name="ndims")
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py
deleted file mode 100644
index e4a1d3dde2..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""SoftmaxCentered bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "SoftmaxCentered",
-]
-
-
-class SoftmaxCentered(bijector.Bijector):
-  """Bijector which computes `Y = g(X) = exp([X 0]) / sum(exp([X 0]))`.
-
-  To implement [softmax](https://en.wikipedia.org/wiki/Softmax_function) as a
-  bijection, the forward transformation appends a value to the input and the
-  inverse removes this coordinate. The appended coordinate represents a pivot,
-  e.g., `softmax(x) = exp(x-c) / sum(exp(x-c))` where `c` is the implicit last
-  coordinate.
-
-  Because we append a coordinate, this bijector only supports `event_ndim in [0,
-  1]`, i.e., scalars and vectors.
-
-  Example Use:
-
-  ```python
-  bijector.SoftmaxCentered(event_ndims=1).forward(tf.log([2, 3, 4]))
-  # Result: [0.2, 0.3, 0.4, 0.1]
-  # Extra result: 0.1
-
-  bijector.SoftmaxCentered(event_ndims=1).inverse([0.2, 0.3, 0.4, 0.1])
-  # Result: tf.log([2, 3, 4])
-  # Extra coordinate removed.
-  ```
-
-  At first blush it may seem like the [Invariance of domain](
-  https://en.wikipedia.org/wiki/Invariance_of_domain) theorem implies this
-  implementation is not a bijection. However, the appended dimension
-  makes the (forward) image non-open and the theorem does not directly apply.
-  """
-
-  def __init__(self,
-               event_ndims=0,
-               validate_args=False,
-               name="softmax_centered"):
-    self._graph_parents = []
-    self._name = name
-    with self._name_scope("init", values=[event_ndims]):
-      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
-      event_ndims = tensor_util.constant_value(event_ndims)
-      if event_ndims is None or event_ndims not in [0, 1]:
-        raise ValueError("`event_ndims` must be a TF constant which is 0 or 1")
-    self._static_event_ndims = event_ndims
-    super(SoftmaxCentered, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
-
-  def _forward_event_shape(self, input_shape):
-    if input_shape.ndims is None:
-      return input_shape
-    if input_shape.ndims != self._static_event_ndims:
-      raise ValueError("input_shape.dims = %d != %d" %
-                       (input_shape.ndims, self._static_event_ndims))
-    if input_shape.ndims == 0:
-      return tensor_shape.TensorShape([2])
-    if input_shape.ndims == 1:
-      return tensor_shape.TensorShape(input_shape[0] + 1)
-    # Unreachable code:
-    raise ValueError("event_ndims = %d must be 0 or 1" % input_shape.ndims)
-
-  def _forward_event_shape_tensor(self, input_shape):
-    ndims = array_ops.shape(input_shape)
-    if self.validate_args:
-      # It is not possible for a negative shape so we need only check <= 1.
-      is_zero_or_one = check_ops.assert_equal(
-          ndims, 0 if self._static_event_ndims == 0 else 1,
-          message="event_ndims must be 0 or 1")
-      ndims = control_flow_ops.with_dependencies([is_zero_or_one], ndims)
-    if self._static_event_ndims == 0:
-      return ops.convert_to_tensor(
-          [2], dtype=dtypes.int32, name="output_shape")
-    return input_shape + 1
-
-  def _inverse_event_shape(self, output_shape):
-    if output_shape.ndims is None:
-      return output_shape
-    if output_shape.ndims != 1:
-      raise ValueError("output_shape.ndims = %d != 1" % output_shape.ndims)
-    if self._static_event_ndims == 0:
-      return tensor_shape.TensorShape([])
-    return tensor_shape.TensorShape(output_shape[0] - 1)
-
-  def _inverse_event_shape_tensor(self, output_shape):
-    ndims = array_ops.shape(output_shape)[0]
-    if self.validate_args:
-      # It is not possible for a negative shape so we need only check <= 1.
-      is_one = check_ops.assert_equal(
-          ndims, 1, message="event_ndims must be 1")
-      ndims = control_flow_ops.with_dependencies([is_one], ndims)
-    if self._static_event_ndims == 0:
-      return ops.convert_to_tensor([], dtype=dtypes.int32, name="output_shape")
-    return array_ops.expand_dims(output_shape[0] - 1, dim=0)
-
-  def _forward(self, x):
-    # Pad the last dim with a zeros vector. We need this because it lets us
-    # infer the scale in the inverse function.
-    y = array_ops.expand_dims(x, dim=-1) if self._static_event_ndims == 0 else x
-    ndims = _get_ndims(y)
-    y = array_ops.pad(y, paddings=array_ops.one_hot(indices=[-1, ndims - 1],
-                                                    depth=ndims,
-                                                    axis=0,
-                                                    dtype=dtypes.int32))
-    # Set shape hints.
-    if x.shape.ndims is not None:
-      shape = x.shape.as_list()
-      if self._static_event_ndims == 0:
-        shape += [2]
-      elif shape[-1] is not None:
-        shape[-1] += 1
-      shape = tensor_shape.TensorShape(shape)
-      y.shape.assert_is_compatible_with(shape)
-      y.set_shape(shape)
-
-    # Since we only support event_ndims in [0, 1] and we do padding, we always
-    # reduce over the last dimension, i.e., dim=-1 (which is the default).
-    return nn_ops.softmax(y)
-
-  def _inverse(self, y):
-    # To derive the inverse mapping note that:
-    #   y[i] = exp(x[i]) / normalization
-    # and
-    #   y[end] = 1 / normalization.
-    # Thus:
-    # x[i] = log(exp(x[i])) - log(y[end]) - log(normalization)
-    #      = log(exp(x[i])/normalization) - log(y[end])
-    #      = log(y[i]) - log(y[end])
-    shape = (np.asarray(y.shape.as_list(), dtype=np.int32)
-             if y.shape.is_fully_defined()
-             else array_ops.shape(y, name="shape"))
-    ndims = _get_ndims(y)
-
-    # Do this first to make sure CSE catches that it'll happen again in
-    # _inverse_log_det_jacobian.
-    x = math_ops.log(y)
-
-    # We now extract the last coordinate of the rightmost dimension.
-    # Our trick is to slice from [0,0,...,shape[-1]-1] to shape[:-1]+[1].
-    begin = array_ops.one_hot(indices=ndims-1,
-                              depth=ndims,
-                              on_value=shape[-1]-np.array(1, dtype=shape.dtype),
-                              dtype=shape.dtype)
-    size = array_ops.concat([shape[:-1], np.asarray([1], dtype=shape.dtype)], 0)
-    log_normalization = -array_ops.strided_slice(x, begin, begin + size)
-
-    # Here we slice out all but the last coordinate; see above for idea.
-    begin = array_ops.zeros_like(shape)
-    size = array_ops.concat([shape[:-1], [shape[-1] - 1]], 0)
-    x = array_ops.strided_slice(x, begin, begin + size)
-
-    x += log_normalization
-
-    if self._static_event_ndims == 0:
-      x = array_ops.squeeze(x, squeeze_dims=[ndims-1])
-
-    # Set shape hints.
-    if y.shape.ndims is not None:
-      shape = y.shape.as_list()
-      if self._static_event_ndims == 0:
-        shape = shape[:-1]
-      elif shape[-1] is not None:
-        shape[-1] -= 1
-      shape = tensor_shape.TensorShape(shape)
-      x.shape.assert_is_compatible_with(shape)
-      x.set_shape(shape)
-
-    return x
-
-  def _inverse_log_det_jacobian(self, y):
-    # WLOG, consider the vector case:
-    #   x = log(y[:-1]) - log(y[-1])
-    # where,
-    #   y[-1] = 1 - sum(y[:-1]).
-    # We have:
-    #   det{ dX/dY } = det{ diag(1 ./ y[:-1]) + 1 / y[-1] }
-    #                = det{ inv{ diag(y[:-1]) - y[:-1]' y[:-1] } }   (1)
-    #                = 1 / det{ diag(y[:-1]) - y[:-1]' y[:-1] }
-    #                = 1 / { (1 + y[:-1]' inv(diag(y[:-1])) y[:-1]) *
-    #                        det(diag(y[:-1])) }                     (2)
-    #                = 1 / { y[-1] prod(y[:-1]) }
-    #                = 1 / prod(y)
-    # (1) - https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
-    #       or by noting that det{ dX/dY } = 1 / det{ dY/dX } from Bijector
-    #       docstring "Tip".
-    # (2) - https://en.wikipedia.org/wiki/Matrix_determinant_lemma
-    return -math_ops.reduce_sum(math_ops.log(y), axis=-1)
-
-  def _forward_log_det_jacobian(self, x):
-    if self._static_event_ndims == 0:
-      return x - 2. * nn_ops.softplus(x)
-    else:
-      # This code is similar to nn_ops.log_softmax but different because we have
-      # an implicit zero column to handle. I.e., instead of:
-      #   reduce_sum(logits - reduce_sum(exp(logits), dim))
-      # we must do:
-      #   log_normalization = 1 + reduce_sum(exp(logits))
-      #   -log_normalization + reduce_sum(logits - log_normalization)
-      log_normalization = nn_ops.softplus(
-          math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True))
-      fldj = (-log_normalization +
-              math_ops.reduce_sum(x - log_normalization,
-                                  axis=-1,
-                                  keep_dims=True))
-      return array_ops.squeeze(fldj, squeeze_dims=-1)
-
-
-def _get_ndims(x):
-  """Returns `ndims`, statically if possible."""
-  if x.shape.ndims is not None:
-    return x.shape.ndims
-  return array_ops.rank(x, name="ndims")
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py b/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py
index 250a1144b5..81957fcf78 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py
@@ -18,12 +18,127 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.softplus_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops.distributions import bijector
+from tensorflow.python.ops.distributions import util as distribution_util
 
-_allowed_symbols = ["Softplus"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Softplus",
+]
+
+
+class Softplus(bijector.Bijector):
+  """Bijector which computes `Y = g(X) = Log[1 + exp(X)]`.
+
+  The softplus `Bijector` has the following two useful properties:
+
+  * The domain is the positive real numbers
+  * `softplus(x) approx x`, for large `x`, so it does not overflow as easily as
+    the `Exp` `Bijector`.
+
+  The optional nonzero `hinge_softness` parameter changes the transition at
+  zero.  With `hinge_softness = c`, the bijector is:
+
+    ```f_c(x) := c * g(x / c) = c * Log[1 + exp(x / c)].```
+
+  For large `x >> 1`, `c * Log[1 + exp(x / c)] approx c * Log[exp(x / c)] = x`,
+  so the behavior for large `x` is the same as the standard softplus.
+
+  As `c > 0` approaches 0 from the right, `f_c(x)` becomes less and less soft,
+  approaching `max(0, x)`.
+
+  * `c = 1` is the default.
+  * `c > 0` but small means `f(x) approx ReLu(x) = max(0, x)`.
+  * `c < 0` flips sign and reflects around the `y-axis`: `f_{-c}(x) = -f_c(-x)`.
+  * `c = 0` results in a non-bijective transformation and triggers an exception.
+
+    Example Use:
+
+    ```python
+    # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1
+    # batch ndim and 2 event ndims (i.e., vector of matrices).
+    softplus = Softplus(event_ndims=2)
+    x = [[[1., 2],
+          [3, 4]],
+         [[5, 6],
+          [7, 8]]]
+    log(1 + exp(x)) == softplus.forward(x)
+    log(exp(x) - 1) == softplus.inverse(x)
+    ```
+
+    Note: log(.) and exp(.) are applied element-wise but the Jacobian is a
+    reduction over the event space.
+  """
+
+  @distribution_util.AppendDocstring(
+      kwargs_dict={
+          "hinge_softness": (
+              "Nonzero floating point `Tensor`.  Controls the softness of what "
+              "would otherwise be a kink at the origin.  Default is 1.0")})
+  def __init__(self,
+               event_ndims=0,
+               hinge_softness=None,
+               validate_args=False,
+               name="softplus"):
+    with ops.name_scope(name, values=[hinge_softness]):
+      if hinge_softness is not None:
+        self._hinge_softness = ops.convert_to_tensor(
+            hinge_softness, name="hinge_softness")
+      else:
+        self._hinge_softness = None
+      if validate_args:
+        nonzero_check = check_ops.assert_none_equal(
+            ops.convert_to_tensor(
+                0, dtype=self.hinge_softness.dtype),
+            self.hinge_softness,
+            message="hinge_softness must be non-zero")
+        self._hinge_softness = control_flow_ops.with_dependencies(
+            [nonzero_check], self.hinge_softness)
+
+    super(Softplus, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  def _forward(self, x):
+    if self.hinge_softness is None:
+      return nn_ops.softplus(x)
+    hinge_softness = math_ops.cast(self.hinge_softness, x.dtype)
+    return hinge_softness * nn_ops.softplus(x / hinge_softness)
+
+  def _inverse(self, y):
+    if self.hinge_softness is None:
+      return distribution_util.softplus_inverse(y)
+    hinge_softness = math_ops.cast(self.hinge_softness, y.dtype)
+    return hinge_softness * distribution_util.softplus_inverse(
+        y / hinge_softness)
+
+  def _inverse_log_det_jacobian(self, y):
+    # Could also do:
+    #   ildj = math_ops.reduce_sum(y - distribution_util.softplus_inverse(y),
+    #                              axis=event_dims)
+    # but the following is more numerically stable. Ie,
+    # Y = Log[1 + exp{X}] ==> X = Log[exp{Y} - 1]
+    # ==> dX/dY = exp{Y} / (exp{Y} - 1)
+    #           = 1 / (1 - exp{-Y}),
+    # which is the most stable for large Y > 0. For small Y, we use
+    # 1 - exp{-Y} approx Y.
+    if self.hinge_softness is not None:
+      y /= math_ops.cast(self.hinge_softness, y.dtype)
+    return -math_ops.reduce_sum(math_ops.log(-math_ops.expm1(-y)),
+                                axis=self._event_dims_tensor(y))
+
+  def _forward_log_det_jacobian(self, x):
+    if self.hinge_softness is not None:
+      x /= math_ops.cast(self.hinge_softness, x.dtype)
+    return -math_ops.reduce_sum(nn_ops.softplus(-x),
+                                axis=self._event_dims_tensor(x))
+
+  @property
+  def hinge_softness(self):
+    return self._hinge_softness
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softplus_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/softplus_impl.py
deleted file mode 100644
index 81957fcf78..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/softplus_impl.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Softplus bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops.distributions import bijector
-from tensorflow.python.ops.distributions import util as distribution_util
-
-
-__all__ = [
-    "Softplus",
-]
-
-
-class Softplus(bijector.Bijector):
-  """Bijector which computes `Y = g(X) = Log[1 + exp(X)]`.
-
-  The softplus `Bijector` has the following two useful properties:
-
-  * The domain is the positive real numbers
-  * `softplus(x) approx x`, for large `x`, so it does not overflow as easily as
-    the `Exp` `Bijector`.
-
-  The optional nonzero `hinge_softness` parameter changes the transition at
-  zero.  With `hinge_softness = c`, the bijector is:
-
-    ```f_c(x) := c * g(x / c) = c * Log[1 + exp(x / c)].```
-
-  For large `x >> 1`, `c * Log[1 + exp(x / c)] approx c * Log[exp(x / c)] = x`,
-  so the behavior for large `x` is the same as the standard softplus.
-
-  As `c > 0` approaches 0 from the right, `f_c(x)` becomes less and less soft,
-  approaching `max(0, x)`.
-
-  * `c = 1` is the default.
-  * `c > 0` but small means `f(x) approx ReLu(x) = max(0, x)`.
-  * `c < 0` flips sign and reflects around the `y-axis`: `f_{-c}(x) = -f_c(-x)`.
-  * `c = 0` results in a non-bijective transformation and triggers an exception.
-
-    Example Use:
-
-    ```python
-    # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1
-    # batch ndim and 2 event ndims (i.e., vector of matrices).
-    softplus = Softplus(event_ndims=2)
-    x = [[[1., 2],
-          [3, 4]],
-         [[5, 6],
-          [7, 8]]]
-    log(1 + exp(x)) == softplus.forward(x)
-    log(exp(x) - 1) == softplus.inverse(x)
-    ```
-
-    Note: log(.) and exp(.) are applied element-wise but the Jacobian is a
-    reduction over the event space.
-  """
-
-  @distribution_util.AppendDocstring(
-      kwargs_dict={
-          "hinge_softness": (
-              "Nonzero floating point `Tensor`.  Controls the softness of what "
-              "would otherwise be a kink at the origin.  Default is 1.0")})
-  def __init__(self,
-               event_ndims=0,
-               hinge_softness=None,
-               validate_args=False,
-               name="softplus"):
-    with ops.name_scope(name, values=[hinge_softness]):
-      if hinge_softness is not None:
-        self._hinge_softness = ops.convert_to_tensor(
-            hinge_softness, name="hinge_softness")
-      else:
-        self._hinge_softness = None
-      if validate_args:
-        nonzero_check = check_ops.assert_none_equal(
-            ops.convert_to_tensor(
-                0, dtype=self.hinge_softness.dtype),
-            self.hinge_softness,
-            message="hinge_softness must be non-zero")
-        self._hinge_softness = control_flow_ops.with_dependencies(
-            [nonzero_check], self.hinge_softness)
-
-    super(Softplus, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
-
-  def _forward(self, x):
-    if self.hinge_softness is None:
-      return nn_ops.softplus(x)
-    hinge_softness = math_ops.cast(self.hinge_softness, x.dtype)
-    return hinge_softness * nn_ops.softplus(x / hinge_softness)
-
-  def _inverse(self, y):
-    if self.hinge_softness is None:
-      return distribution_util.softplus_inverse(y)
-    hinge_softness = math_ops.cast(self.hinge_softness, y.dtype)
-    return hinge_softness * distribution_util.softplus_inverse(
-        y / hinge_softness)
-
-  def _inverse_log_det_jacobian(self, y):
-    # Could also do:
-    #   ildj = math_ops.reduce_sum(y - distribution_util.softplus_inverse(y),
-    #                              axis=event_dims)
-    # but the following is more numerically stable. Ie,
-    # Y = Log[1 + exp{X}] ==> X = Log[exp{Y} - 1]
-    # ==> dX/dY = exp{Y} / (exp{Y} - 1)
-    #           = 1 / (1 - exp{-Y}),
-    # which is the most stable for large Y > 0. For small Y, we use
-    # 1 - exp{-Y} approx Y.
-    if self.hinge_softness is not None:
-      y /= math_ops.cast(self.hinge_softness, y.dtype)
-    return -math_ops.reduce_sum(math_ops.log(-math_ops.expm1(-y)),
-                                axis=self._event_dims_tensor(y))
-
-  def _forward_log_det_jacobian(self, x):
-    if self.hinge_softness is not None:
-      x /= math_ops.cast(self.hinge_softness, x.dtype)
-    return -math_ops.reduce_sum(nn_ops.softplus(-x),
-                                axis=self._event_dims_tensor(x))
-
-  @property
-  def hinge_softness(self):
-    return self._hinge_softness
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py b/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py
index d439f28884..00520bcda8 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py
@@ -18,12 +18,132 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.weibull_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["Weibull"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Weibull",
+]
+
+
+class Weibull(bijector.Bijector):
+  """Compute `Y = g(X) = 1 - exp((-X / scale) ** concentration), X >= 0`.
+
+  This bijector maps inputs from `[0, inf]` to [0, 1]`. The inverse of the
+  bijector applied to a uniform random variable `X ~ U(0, 1) gives back a
+  random variable with the
+  [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution):
+
+  ```none
+  Y ~ Weibull(scale, concentration)
+  pdf(y; scale, concentration, y >= 0) = (scale / concentration) * (
+    scale / concentration) ** (concentration - 1) * exp(
+      -(y / scale) ** concentration)
+  ```
+  """
+
+  def __init__(self,
+               scale=1.,
+               concentration=1.,
+               event_ndims=0,
+               validate_args=False,
+               name="weibull"):
+    """Instantiates the `Weibull` bijector.
+
+    Args:
+      scale: Positive Float-type `Tensor` that is the same dtype and is
+        broadcastable with `concentration`.
+        This is `l` in `Y = g(X) = 1 - exp((-x / l) ** k)`.
+      concentration: Positive Float-type `Tensor` that is the same dtype and is
+        broadcastable with `scale`.
+        This is `k` in `Y = g(X) = 1 - exp((-x / l) ** k)`.
+      event_ndims: Python scalar indicating the number of dimensions associated
+        with a particular draw from the distribution.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+    with self._name_scope("init", values=[scale, concentration]):
+      self._scale = ops.convert_to_tensor(scale, name="scale")
+      self._concentration = ops.convert_to_tensor(
+          concentration, name="concentration")
+      check_ops.assert_same_float_dtype([self._scale, self._concentration])
+      if validate_args:
+        self._scale = control_flow_ops.with_dependencies([
+            check_ops.assert_positive(
+                self._scale,
+                message="Argument scale was not positive")
+        ], self._scale)
+        self._concentration = control_flow_ops.with_dependencies([
+            check_ops.assert_positive(
+                self._concentration,
+                message="Argument concentration was not positive")
+        ], self._concentration)
+
+    super(Weibull, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  @property
+  def scale(self):
+    """The `l` in `Y = g(X) = 1 - exp((-x / l) ** k)`."""
+    return self._scale
+
+  @property
+  def concentration(self):
+    """The `k` in `Y = g(X) = 1 - exp((-x / l) ** k)`."""
+    return self._concentration
+
+  def _forward(self, x):
+    x = self._maybe_assert_valid_x(x)
+    return -math_ops.expm1(-((x / self.scale) ** self.concentration))
+
+  def _inverse(self, y):
+    y = self._maybe_assert_valid_y(y)
+    return self.scale * (-math_ops.log1p(-y)) ** (1 / self.concentration)
+
+  def _inverse_log_det_jacobian(self, y):
+    y = self._maybe_assert_valid_y(y)
+    event_dims = self._event_dims_tensor(y)
+    return math_ops.reduce_sum(
+        -math_ops.log1p(-y) +
+        (1 / self.concentration - 1) * math_ops.log(-math_ops.log1p(-y)) +
+        math_ops.log(self.scale / self.concentration),
+        axis=event_dims)
+
+  def _forward_log_det_jacobian(self, x):
+    x = self._maybe_assert_valid_x(x)
+    event_dims = self._event_dims_tensor(x)
+    return math_ops.reduce_sum(
+        -(x / self.scale) ** self.concentration +
+        (self.concentration - 1) * math_ops.log(x) +
+        math_ops.log(self.concentration) +
+        -self.concentration * math_ops.log(self.scale),
+        axis=event_dims)
+
+  def _maybe_assert_valid_x(self, x):
+    if not self.validate_args:
+      return x
+    is_valid = check_ops.assert_non_negative(
+        x,
+        message="Forward transformation input must be at least {}.".format(0))
+    return control_flow_ops.with_dependencies([is_valid], x)
+
+  def _maybe_assert_valid_y(self, y):
+    if not self.validate_args:
+      return y
+    is_positive = check_ops.assert_non_negative(
+        y, message="Inverse transformation input must be greater than 0.")
+    less_than_one = check_ops.assert_less_equal(
+        y, constant_op.constant(1., y.dtype),
+        message="Inverse transformation input must be less than or equal to 1.")
+    return control_flow_ops.with_dependencies([is_positive, less_than_one], y)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/weibull_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/weibull_impl.py
deleted file mode 100644
index 00520bcda8..0000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/weibull_impl.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Weibull bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "Weibull",
-]
-
-
-class Weibull(bijector.Bijector):
-  """Compute `Y = g(X) = 1 - exp((-X / scale) ** concentration), X >= 0`.
-
-  This bijector maps inputs from `[0, inf]` to [0, 1]`. The inverse of the
-  bijector applied to a uniform random variable `X ~ U(0, 1) gives back a
-  random variable with the
-  [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution):
-
-  ```none
-  Y ~ Weibull(scale, concentration)
-  pdf(y; scale, concentration, y >= 0) = (scale / concentration) * (
-    scale / concentration) ** (concentration - 1) * exp(
-      -(y / scale) ** concentration)
-  ```
-  """
-
-  def __init__(self,
-               scale=1.,
-               concentration=1.,
-               event_ndims=0,
-               validate_args=False,
-               name="weibull"):
-    """Instantiates the `Weibull` bijector.
-
-    Args:
-      scale: Positive Float-type `Tensor` that is the same dtype and is
-        broadcastable with `concentration`.
-        This is `l` in `Y = g(X) = 1 - exp((-x / l) ** k)`.
-      concentration: Positive Float-type `Tensor` that is the same dtype and is
-        broadcastable with `scale`.
-        This is `k` in `Y = g(X) = 1 - exp((-x / l) ** k)`.
-      event_ndims: Python scalar indicating the number of dimensions associated
-        with a particular draw from the distribution.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-    with self._name_scope("init", values=[scale, concentration]):
-      self._scale = ops.convert_to_tensor(scale, name="scale")
-      self._concentration = ops.convert_to_tensor(
-          concentration, name="concentration")
-      check_ops.assert_same_float_dtype([self._scale, self._concentration])
-      if validate_args:
-        self._scale = control_flow_ops.with_dependencies([
-            check_ops.assert_positive(
-                self._scale,
-                message="Argument scale was not positive")
-        ], self._scale)
-        self._concentration = control_flow_ops.with_dependencies([
-            check_ops.assert_positive(
-                self._concentration,
-                message="Argument concentration was not positive")
-        ], self._concentration)
-
-    super(Weibull, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
-
-  @property
-  def scale(self):
-    """The `l` in `Y = g(X) = 1 - exp((-x / l) ** k)`."""
-    return self._scale
-
-  @property
-  def concentration(self):
-    """The `k` in `Y = g(X) = 1 - exp((-x / l) ** k)`."""
-    return self._concentration
-
-  def _forward(self, x):
-    x = self._maybe_assert_valid_x(x)
-    return -math_ops.expm1(-((x / self.scale) ** self.concentration))
-
-  def _inverse(self, y):
-    y = self._maybe_assert_valid_y(y)
-    return self.scale * (-math_ops.log1p(-y)) ** (1 / self.concentration)
-
-  def _inverse_log_det_jacobian(self, y):
-    y = self._maybe_assert_valid_y(y)
-    event_dims = self._event_dims_tensor(y)
-    return math_ops.reduce_sum(
-        -math_ops.log1p(-y) +
-        (1 / self.concentration - 1) * math_ops.log(-math_ops.log1p(-y)) +
-        math_ops.log(self.scale / self.concentration),
-        axis=event_dims)
-
-  def _forward_log_det_jacobian(self, x):
-    x = self._maybe_assert_valid_x(x)
-    event_dims = self._event_dims_tensor(x)
-    return math_ops.reduce_sum(
-        -(x / self.scale) ** self.concentration +
-        (self.concentration - 1) * math_ops.log(x) +
-        math_ops.log(self.concentration) +
-        -self.concentration * math_ops.log(self.scale),
-        axis=event_dims)
-
-  def _maybe_assert_valid_x(self, x):
-    if not self.validate_args:
-      return x
-    is_valid = check_ops.assert_non_negative(
-        x,
-        message="Forward transformation input must be at least {}.".format(0))
-    return control_flow_ops.with_dependencies([is_valid], x)
-
-  def _maybe_assert_valid_y(self, y):
-    if not self.validate_args:
-      return y
-    is_positive = check_ops.assert_non_negative(
-        y, message="Inverse transformation input must be greater than 0.")
-    less_than_one = check_ops.assert_less_equal(
-        y, constant_op.constant(1., y.dtype),
-        message="Inverse transformation input must be less than or equal to 1.")
-    return control_flow_ops.with_dependencies([is_positive, less_than_one], y)
-- 
GitLab


From e7fc1d52a1e638051e4a424035d2c922713d5b6b Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Wed, 6 Dec 2017 14:26:59 -0800
Subject: [PATCH 0710/1225] Add a convenient function to
 client_library_test_base.

PiperOrigin-RevId: 178152972
---
 tensorflow/compiler/xla/tests/client_library_test_base.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h
index d5f9ec858e..4d0cf8bf71 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.h
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.h
@@ -268,6 +268,14 @@ class ClientLibraryTestBase : public ::testing::Test {
     return CreateConstantFromLiteral(*Literal::CreateFromArray(array), builder);
   }
 
+  // Same as CreateConstantFromArray, but for scalars.
+  template <typename NativeT>
+  ComputationDataHandle CreateConstantFromScalar(NativeT value,
+                                                 ComputationBuilder* builder) {
+    return CreateConstantFromLiteral(*Literal::CreateR0<NativeT>(value),
+                                     builder);
+  }
+
   // Creates a parameter instruction that wraps a given value and then stores
   // into "data_handle" the global handle for that parameter.
   //
-- 
GitLab


From 2463815ad59b8dc52840f143d95be39581ac5896 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 14:46:03 -0800
Subject: [PATCH 0711/1225] Check against passing the same array in
 --input_arrays and --output_arrays.

PiperOrigin-RevId: 178156041
---
 tensorflow/contrib/lite/toco/model_cmdline_flags.cc | 12 ++++++++++++
 tensorflow/contrib/lite/toco/model_cmdline_flags.h  |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
index 54ed95650e..29802da9fe 100644
--- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
@@ -349,6 +349,8 @@ void ReadModelFlagsFromCommandLineFlags(
       }
     }
   }
+
+  CheckInputArraysAreNotOutputArrays(*model_flags);
 }
 
 ParsedModelFlags* UncheckedGlobalParsedModelFlags(bool must_already_exist) {
@@ -384,4 +386,14 @@ void ParseModelFlagsOrDie(int* argc, char* argv[]) {
   }
 }
 
+void CheckInputArraysAreNotOutputArrays(const ModelFlags& model_flags) {
+  for (const auto& input_array : model_flags.input_arrays()) {
+    for (const string& output_array : model_flags.output_arrays()) {
+      QCHECK_NE(input_array.name(), output_array)
+          << "The array " << output_array
+          << " is listed in both --input_arrays and --output_arrays.";
+    }
+  }
+}
+
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.h b/tensorflow/contrib/lite/toco/model_cmdline_flags.h
index 027d7ae1aa..61bcde234e 100644
--- a/tensorflow/contrib/lite/toco/model_cmdline_flags.h
+++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.h
@@ -38,6 +38,8 @@ void ParseModelFlagsOrDie(int* argc, char* argv[]);
 // Get the global parsed model flags
 ParsedModelFlags* GlobalParsedModelFlags();
 
+void CheckInputArraysAreNotOutputArrays(const ModelFlags& model_flags);
+
 }  // namespace toco
 
 
-- 
GitLab


From e70d5621fa3d75868a328440d37db2af47a19cd8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 14:47:56 -0800
Subject: [PATCH 0712/1225] graphviz improvements:  - strip 'TensorFlow'
 prefixes to operator names  - don't generate workspace allocations in
 --output_format=GRAPHVIZ_DOT case    (can still see them with
 --dump_graphviz).

PiperOrigin-RevId: 178156346
---
 tensorflow/contrib/lite/toco/dump_graphviz.cc | 17 ++++++++++++++++-
 tensorflow/contrib/lite/toco/toco_tooling.cc  |  2 +-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/dump_graphviz.cc b/tensorflow/contrib/lite/toco/dump_graphviz.cc
index f5e2868dc0..d1a7b26d91 100644
--- a/tensorflow/contrib/lite/toco/dump_graphviz.cc
+++ b/tensorflow/contrib/lite/toco/dump_graphviz.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/strings/str_replace.h"
+#include "absl/strings/strip.h"
 #include "tensorflow/contrib/lite/toco/model_flags.pb.h"
 #include "tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h"
 #include "tensorflow/contrib/lite/toco/toco_port.h"
@@ -160,7 +161,21 @@ NodeProperties GetPropertiesForOperator(const Operator& op) {
     node_properties.label =
         static_cast<const TensorFlowUnsupportedOperator&>(op).tensorflow_op;
   } else {
-    node_properties.label = OperatorTypeName(op.type);
+    node_properties.label =
+        string(absl::StripPrefix(OperatorTypeName(op.type), "TensorFlow"));
+  }
+  switch (op.fused_activation_function) {
+    case FusedActivationFunctionType::kRelu:
+      AppendF(&node_properties.label, "\\nReLU");
+      break;
+    case FusedActivationFunctionType::kRelu6:
+      AppendF(&node_properties.label, "\\nReLU6");
+      break;
+    case FusedActivationFunctionType::kRelu1:
+      AppendF(&node_properties.label, "\\nReLU1");
+      break;
+    default:
+      break;
   }
   // Additional information for some of the operators.
   switch (op.type) {
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index 1d1d767518..161b94f1d6 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -99,7 +99,7 @@ bool SupportsLstmCell(FileFormat format) {
 }
 
 bool SupportsPreallocatedWorkspace(FileFormat format) {
-  return (format == GRAPHVIZ_DOT || format == TFLITE);
+  return (format == TFLITE);
 }
 
 bool IsRealValued(toco::ArrayDataType type) {
-- 
GitLab


From 697fa59286888d75f704629f4181dcca1bf6786a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 14:49:40 -0800
Subject: [PATCH 0713/1225] Simplification of propagate_array_data_types: No
 need to distinguish between ops for which we propagate the type of the first
 input, and ops for which we require all inputs to have the same type then
 propagate that type. Doing only the former is a mild relaxation of what we've
 been doing, allows to drop much code, and makes this code far less frequently
 needing to be updated again in the future as we drop a long non-canonical
 list of operator types being special-cased.

PiperOrigin-RevId: 178156653
---
 .../propagate_array_data_types.cc             | 47 ++-----------------
 1 file changed, 3 insertions(+), 44 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
index 1ff4e827aa..550e0408aa 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
@@ -24,19 +24,6 @@ limitations under the License.
 namespace toco {
 
 namespace {
-
-ArrayDataType CommonDataTypeOfAllInputs(const Model& model,
-                                        const Operator& op) {
-  CHECK_GT(op.inputs.size(), 0);
-  const ArrayDataType data_type = model.GetArray(op.inputs[0]).data_type;
-  for (const auto& input : op.inputs) {
-    const auto& array = model.GetArray(input);
-    CHECK(array.data_type == data_type)
-        << " Unexpected: this operator has inputs with different data types.";
-  }
-  return data_type;
-}
-
 void SetDataTypeForAllOutputs(Model* model, Operator* op,
                               ArrayDataType data_type) {
   for (const auto& output : op->outputs) {
@@ -75,34 +62,6 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
   } else if (op->type == OperatorType::kTensorFlowShape) {
     // These operators are assumed to produce int32 outputs.
     SetDataTypeForAllOutputs(model, op, ArrayDataType::kInt32);
-  } else if (op->type == OperatorType::kAveragePool ||
-             op->type == OperatorType::kMaxPool ||
-             op->type == OperatorType::kL2Pool ||
-             op->type == OperatorType::kConv ||
-             op->type == OperatorType::kDepthwiseConv ||
-             op->type == OperatorType::kFullyConnected ||
-             op->type == OperatorType::kTensorFlowMax ||
-             op->type == OperatorType::kTensorFlowMin ||
-             op->type == OperatorType::kPad ||
-             op->type == OperatorType::kStridedSlice ||
-             op->type == OperatorType::kTensorFlowReshape ||
-             op->type == OperatorType::kSlice ||
-             op->type == OperatorType::kSqueeze ||
-             op->type == OperatorType::kTensorFlowSum ||
-             op->type == OperatorType::kTensorFlowSwitch ||
-             op->type == OperatorType::kTensorFlowTile ||
-             op->type == OperatorType::kTensorFlowAll ||
-             op->type == OperatorType::kReorderAxes ||
-             op->type == OperatorType::kTensorFlowConcatV2 ||
-             op->type == OperatorType::kFloor ||
-             op->type == OperatorType::kGather ||
-             op->type == OperatorType::kSpaceToBatchND ||
-             op->type == OperatorType::kBatchToSpaceND ||
-             op->type == OperatorType::kMean) {
-    // These operators produce outputs with the same type as their 1st input
-    CHECK_GT(op->inputs.size(), 0);
-    const ArrayDataType data_type = model->arrays[op->inputs[0]]->data_type;
-    SetDataTypeForAllOutputs(model, op, data_type);
   } else if (op->type == OperatorType::kTensorFlowSplit ||
              op->type == OperatorType::kTensorFlowConcat) {
     // These operators produce an output with the same type as their 2nd input
@@ -125,9 +84,9 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
       model->arrays[output]->data_type = data_type;
     }
   } else {
-    // These operators produce an output with the same type as any of their
-    // inputs, which must always have the same type.
-    const ArrayDataType data_type = CommonDataTypeOfAllInputs(*model, *op);
+    // These operators produce outputs with the same type as their 1st input
+    CHECK_GT(op->inputs.size(), 0);
+    const ArrayDataType data_type = model->arrays[op->inputs[0]]->data_type;
     SetDataTypeForAllOutputs(model, op, data_type);
   }
   // Return true if any output data type changed, false if none changed.
-- 
GitLab


From c2e6d554cfd7a19fa46c03e2e4eae264580b3692 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 15:03:15 -0800
Subject: [PATCH 0714/1225] [XLA:CPU] Factor out parallel function call logic
 into IrFunction (so it can be called from other emitters). Just code movement
 (no functional change).

PiperOrigin-RevId: 178158853
---
 tensorflow/compiler/xla/service/cpu/BUILD     |   7 +
 .../xla/service/cpu/ir_emission_utils.h       |  14 ++
 .../compiler/xla/service/cpu/ir_emitter.cc    | 177 ++---------------
 .../compiler/xla/service/cpu/ir_emitter.h     |  18 +-
 .../compiler/xla/service/cpu/ir_function.cc   | 184 ++++++++++++++++--
 .../compiler/xla/service/cpu/ir_function.h    |  49 +++--
 .../xla/service/cpu/parallel_loop_emitter.cc  |   4 +-
 .../xla/service/cpu/parallel_loop_emitter.h   |  12 +-
 8 files changed, 257 insertions(+), 208 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index ade887f193..6e7b062280 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -287,10 +287,15 @@ cc_library(
     srcs = ["ir_function.cc"],
     hdrs = ["ir_function.h"],
     deps = [
+        ":ir_emission_utils",
+        ":shape_partition",
+        "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/service/cpu:cpu_runtime",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
+        "//tensorflow/core:lib",
         "@llvm//:core",
     ],
 )
@@ -300,6 +305,7 @@ cc_library(
     srcs = ["parallel_loop_emitter.cc"],
     hdrs = ["parallel_loop_emitter.h"],
     deps = [
+        ":ir_emission_utils",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop",
@@ -645,6 +651,7 @@ cc_library(
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:window_util",
         "//tensorflow/compiler/xla/service:hlo",
+        "@llvm//:core",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h
index ac361ddfb4..34b2003916 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_EMISSION_UTILS_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_EMISSION_UTILS_H_
 
+#include "llvm/IR/Value.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 
 namespace xla {
@@ -23,6 +24,19 @@ namespace cpu {
 
 bool PotentiallyImplementedAsEigenConvolution(
     const HloInstruction& convolution);
+
+// Dynamic loop bounds are specified as an array of dimension index
+// [start, limit) pairs of ir values (one for each partitioned outer dimension).
+//
+// EX: Let 'shape' = [8, 16, 32], with the loop bounds of the two-most major
+//     dimensions dynamic. Then 'dynamic_loop_bounds' will contain the
+//     following ir values for the two most-major dimensions:
+//       [dim0_index_start_ir_value, dim0_index_limit_ir_value]
+//       [dim1_index_start_ir_value, dim1_index_limit_ir_value]
+//
+// See IrFunction and ParallelLoopEmitter for details.
+using DynamicLoopBounds = std::vector<std::pair<llvm::Value*, llvm::Value*>>;
+
 }  // namespace cpu
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 939dbf0e11..70e7aec5c5 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -1361,7 +1361,7 @@ Status IrEmitter::HandleParameter(HloInstruction* parameter) {
   //
   // Where Param is the actual element type of the underlying buffer (for
   // example, float for an XLA F32 element type).
-  llvm::Argument* params = compute_function_->parameters_arg();
+  llvm::Value* params = compute_function_->parameters_arg();
   llvm::Value* param_address_offset =
       llvm_ir::EmitBufferIndexingGEP(params, param_number, &ir_builder_);
   llvm::LoadInst* param_address_untyped =
@@ -2201,9 +2201,17 @@ Status IrEmitter::HandleCall(HloInstruction* call) {
       !parallel_cpu_backend_) {
     // ParallelTaskAssignment assigned partitions, emit call to
     // ParallelForkJoin.
-    TF_RETURN_IF_ERROR(EmitParallelForkJoin(parameter_addresses,
-                                            emitted_value_[call], computation,
-                                            call_ir_function));
+    std::vector<llvm::Value*> call_args = GetArrayFunctionCallArguments(
+        parameter_addresses, &ir_builder_, computation->name(),
+        /*return_value_buffer=*/emitted_value_[call],
+        /*exec_run_options_arg=*/GetExecutableRunOptionsArgument(),
+        /*temp_buffers_arg=*/GetTempBuffersArgument(),
+        /*profile_counters_arg=*/GetProfileCountersArgument());
+
+    HloInstruction* root = computation->root_instruction();
+    TF_RETURN_IF_ERROR(EmitCallToParallelForkJoin(
+        call_args, root->shape(), root->outer_dimension_partitions(),
+        &ir_builder_, call_ir_function, computation->name()));
   } else {
     EmitArrayFunctionCallInto(call_ir_function, parameter_addresses,
                               emitted_value_[call], computation->name());
@@ -2678,7 +2686,7 @@ llvm::Type* IrEmitter::IrShapeType(const Shape& shape) {
   return llvm_ir::ShapeToIrType(shape, module_);
 }
 
-llvm::Argument* IrEmitter::GetProfileCountersArgument() {
+llvm::Value* IrEmitter::GetProfileCountersArgument() {
   return compute_function_->profile_counters_arg();
 }
 
@@ -2752,42 +2760,6 @@ llvm::Value* IrEmitter::EmitElementFunctionCall(
       AsStringRef(tensorflow::strings::StrCat(name, "_return_value")));
 }
 
-// Emits code to allocate an array of parameter address pointers, and store
-// each address from 'parameter_addresses'.
-// Returns an array of compute function call arguments (including parameter
-// address buffer).
-std::vector<llvm::Value*> IrEmitter::GetArrayFunctionCallArguments(
-    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
-    llvm::Value* return_value_buffer, tensorflow::StringPiece name) {
-  llvm::Value* parameter_addresses_buffer =
-      llvm_ir::EmitAllocaAtFunctionEntryWithCount(
-          ir_builder_.getInt8PtrTy(),
-          ir_builder_.getInt32(parameter_addresses.size()),
-          tensorflow::strings::StrCat(name, "_parameter_addresses"),
-          &ir_builder_);
-  for (size_t i = 0; i < parameter_addresses.size(); ++i) {
-    llvm::Value* parameter_as_i8ptr = ir_builder_.CreateBitCast(
-        parameter_addresses[i], ir_builder_.getInt8PtrTy(),
-        AsStringRef(tensorflow::strings::StrCat(name, "_parameter_", i,
-                                                "_address_as_i8ptr")));
-    llvm::Value* slot_in_param_adresses = ir_builder_.CreateInBoundsGEP(
-        parameter_addresses_buffer, {ir_builder_.getInt64(i)});
-    ir_builder_.CreateStore(parameter_as_i8ptr, slot_in_param_adresses);
-  }
-
-  const auto to_int8_ptr = [this](llvm::Value* ptr) {
-    return ir_builder_.CreatePointerCast(ptr, ir_builder_.getInt8PtrTy());
-  };
-  std::vector<llvm::Value*> arguments{
-      to_int8_ptr(return_value_buffer),
-      to_int8_ptr(GetExecutableRunOptionsArgument()),
-      parameter_addresses_buffer, GetTempBuffersArgument()};
-  if (auto* profile_counters = GetProfileCountersArgument()) {
-    arguments.push_back(profile_counters);
-  }
-  return arguments;
-}
-
 // Emits a core function call based on the following pseudo-code.
 //
 //   char** parameter_addresses_buffer =
@@ -2803,8 +2775,12 @@ void IrEmitter::EmitArrayFunctionCallInto(
     tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
     llvm::Value* return_value_buffer, tensorflow::StringPiece name) {
   ir_builder_.CreateCall(
-      function, GetArrayFunctionCallArguments(parameter_addresses,
-                                              return_value_buffer, name));
+      function, GetArrayFunctionCallArguments(
+                    parameter_addresses, &ir_builder_, name,
+                    /*return_value_buffer=*/return_value_buffer,
+                    /*exec_run_options_arg=*/GetExecutableRunOptionsArgument(),
+                    /*temp_buffers_arg=*/GetTempBuffersArgument(),
+                    /*profile_counters_arg=*/GetProfileCountersArgument()));
 }
 
 llvm::Value* IrEmitter::EmitArrayFunctionCall(
@@ -2824,111 +2800,6 @@ llvm::Value* IrEmitter::EmitArrayFunctionCall(
   return return_value_buffer;
 }
 
-// Emits a call to a runtime fork/join function which dispatches parallel
-// calls to 'parallel_function' (and joins threads before returning).
-Status IrEmitter::EmitParallelForkJoin(
-    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
-    llvm::Value* output_address, HloComputation* computation,
-    llvm::Function* parallel_function) {
-  HloInstruction* root = computation->root_instruction();
-
-  // Build ParallelForkJoin function type.
-  std::vector<llvm::Type*> compute_function_params =
-      compute_function_->GetComputeFunctionParams();
-  // Number of parallel compute functions.
-  compute_function_params.push_back(ir_builder_.getInt32Ty());
-  // Array of partitions. There is an array element for each
-  // partition x partition_dim x 2 (for dimension start and limit).
-  compute_function_params.push_back(
-      llvm::Type::getInt64PtrTy(module_->getContext()));
-  // Number of partitioned most-major dimensions in 'root.shape'.
-  compute_function_params.push_back(ir_builder_.getInt32Ty());
-  // Function pointer for compute function to be dispatched in parallel.
-  compute_function_params.push_back(
-      llvm::Type::getInt8PtrTy(module_->getContext()));
-
-  llvm::FunctionType* fork_join_type = llvm::FunctionType::get(
-      /*Result=*/llvm::Type::getVoidTy(module_->getContext()),
-      /*Params=*/compute_function_params,
-      /*isVarArg=*/false);
-
-  llvm::Function* fork_join_func =
-      llvm::cast<llvm::Function>(module_->getOrInsertFunction(
-          runtime::kParallelForkJoinSymbolName, fork_join_type));
-  fork_join_func->setCallingConv(llvm::CallingConv::C);
-  fork_join_func->setDoesNotThrow();
-
-  // Add common compute function arguments.
-  const string name = computation->name();
-  std::vector<llvm::Value*> arguments =
-      GetArrayFunctionCallArguments(parameter_addresses, output_address, name);
-
-  // Create ShapePartitionIterator to generate all partitions of 'root.shape'.
-  ShapePartitionIterator partition_iterator(root->shape(),
-                                            root->outer_dimension_partitions());
-  const int64 num_partitions = partition_iterator.GetTotalPartitionCount();
-  // Add argument specifying the number of parallel partitions.
-  arguments.push_back(ir_builder_.getInt32(num_partitions));
-
-  // The number of partitioned most-major dimensions in 'root.shape'.
-  const int32 num_partitioned_dims = root->outer_dimension_partitions().size();
-  // A dimension partition consists of two elements: [start_index, limit_index).
-  const int32 dim_partition_size = 2;
-  // Calculate array partition stride.
-  const int32 array_partition_stride =
-      num_partitioned_dims * dim_partition_size;
-  // Calculate the total number of elements in the partition array.
-  const int32 partition_array_size =
-      dim_partition_size * num_partitioned_dims * num_partitions;
-
-  // Store dimension partition values as llvm constants in 'partitions'.
-  // See comments in runtime_fork_join.cc for array layout description.
-  std::vector<llvm::Constant*> partitions(partition_array_size);
-  for (int32 i = 0; i < num_partitions; ++i) {
-    std::vector<std::pair<int64, int64>> dim_partitions =
-        partition_iterator.GetPartition(i);
-    CHECK_EQ(num_partitioned_dims, dim_partitions.size());
-    const int32 partition_index = i * array_partition_stride;
-    for (int32 j = 0; j < num_partitioned_dims; ++j) {
-      const std::pair<int64, int64>& dim_partition = dim_partitions[j];
-      const int32 index = partition_index + j * dim_partition_size;
-      // Store partition [dim_start, dim_limit) intervals for each dimension.
-      partitions[index] = ir_builder_.getInt64(dim_partition.first);
-      partitions[index + 1] =
-          ir_builder_.getInt64(dim_partition.first + dim_partition.second);
-    }
-  }
-
-  // Create global variable out of dimension partitions in 'partitions'.
-  llvm::ArrayType* partitions_array_type =
-      llvm::ArrayType::get(ir_builder_.getInt64Ty(), partition_array_size);
-  llvm::Constant* partitions_array =
-      llvm::ConstantArray::get(partitions_array_type, partitions);
-  llvm::GlobalVariable* global_partitions_array = new llvm::GlobalVariable(
-      /*Module=*/*module_,
-      /*Type=*/partitions_array_type,
-      /*isConstant=*/true,
-      /*Linkage=*/llvm::GlobalValue::PrivateLinkage,
-      /*Initializer=*/partitions_array,
-      /*Name=*/
-      AsStringRef(
-          tensorflow::strings::StrCat(name, "_parallel_dimension_partitions")));
-
-  // Add argument specifying parallel dimension partitions.
-  arguments.push_back(ir_builder_.CreateBitCast(
-      global_partitions_array,
-      llvm::Type::getInt64PtrTy(module_->getContext())));
-  // Add argument specifying the number of partitioned most-major dimensions.
-  arguments.push_back(ir_builder_.getInt32(num_partitioned_dims));
-  // Add argument for parallel compute function pointer.
-  arguments.push_back(
-      ir_builder_.CreateBitCast(parallel_function, ir_builder_.getInt8PtrTy()));
-  // Emit call to parallel fork/join.
-  ir_builder_.CreateCall(fork_join_func, arguments);
-
-  return Status::OK();
-}
-
 Status IrEmitter::EmitTargetAddressForOp(const HloInstruction* op) {
   llvm::Value* addr;
   const Shape& target_shape = op->shape();
@@ -2997,14 +2868,8 @@ Status IrEmitter::EmitTargetElementLoop(
   } else {
     if (ShouldEmitParallelLoopFor(*target_op)) {
       // Emit code to read dynamic loop bounds from compute function argument.
-      ParallelLoopEmitter::LoopBounds dynamic_loop_bounds(
-          num_dynamic_loop_bounds_);
-      for (int i = 0; i < num_dynamic_loop_bounds_; ++i) {
-        dynamic_loop_bounds[i].first =
-            compute_function_->GetDynamicLoopBound(i * 2 + 0);
-        dynamic_loop_bounds[i].second =
-            compute_function_->GetDynamicLoopBound(i * 2 + 1);
-      }
+      std::vector<std::pair<llvm::Value*, llvm::Value*>> dynamic_loop_bounds =
+          compute_function_->GetDynamicLoopBounds();
       // Emit parallel loop with dynamic loop bounds for most-major dimensions.
       TF_RETURN_IF_ERROR(ParallelLoopEmitter(element_generator, target_array,
                                              &dynamic_loop_bounds, &ir_builder_)
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 6b576d16bb..692e2b3877 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -237,7 +237,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
 
   // Get the llvm::Value* that represents the "prof_counters" argument of the
   // computation function being emitted by this emitter.
-  llvm::Argument* GetProfileCountersArgument();
+  llvm::Value* GetProfileCountersArgument();
 
   // Get the xla::ExecutableRunOptions that represents the "run_options"
   // argument of the computation function being emitted by this emitter.
@@ -300,18 +300,6 @@ class IrEmitter : public DfsHloVisitorWithDefault {
       tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
       tensorflow::StringPiece name);
 
-  // Returns an array of compute function call arguments.
-  std::vector<llvm::Value*> GetArrayFunctionCallArguments(
-      tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
-      llvm::Value* return_value_buffer, tensorflow::StringPiece name);
-
-  // Emits a call to a runtime fork/join function which dispatches parallel
-  // calls to 'parallel_function' (and joins threads before returning).
-  Status EmitParallelForkJoin(
-      tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
-      llvm::Value* output_address, HloComputation* computation,
-      llvm::Function* parallel_function);
-
   // Verifies that the element types of all of the given operand instructions
   // match and are of one of the given supported types.
   Status ElementTypesSameAndSupported(
@@ -493,7 +481,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
           use_rdtscp_(false),
           prof_counters_(nullptr) {}
     ProfilingState(bool is_top_level_computation, bool use_rdtscp,
-                   llvm::Argument* prof_counters)
+                   llvm::Value* prof_counters)
         : is_top_level_computation_(is_top_level_computation),
           use_rdtscp_(use_rdtscp),
           prof_counters_(prof_counters) {}
@@ -526,7 +514,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
     bool use_rdtscp_;
 
     // The argument which corresponds to the profile counter buffer.
-    llvm::Argument* prof_counters_;
+    llvm::Value* prof_counters_;
 
     // The first read cycle counter in the program.
     llvm::Value* first_read_cycle_start_ = nullptr;
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.cc b/tensorflow/compiler/xla/service/cpu/ir_function.cc
index ed257613d8..ca8c290dd1 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_function.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.cc
@@ -17,6 +17,8 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/cpu/ir_function.h"
 
+#include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h"
+#include "tensorflow/compiler/xla/service/cpu/shape_partition.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 
@@ -28,6 +30,21 @@ using llvm_ir::AsStringRef;
 
 namespace cpu {
 
+static std::vector<llvm::Type*> GetComputeFunctionParams(
+    llvm::Module* llvm_module, const int64 num_dynamic_loop_bounds) {
+  llvm::Type* i8_ptr_type = llvm::Type::getInt8PtrTy(llvm_module->getContext());
+  llvm::Type* i8_ptr_ptr_type = i8_ptr_type->getPointerTo();
+  llvm::Type* i64_ptr_type =
+      llvm::Type::getInt64PtrTy(llvm_module->getContext());
+  std::vector<llvm::Type*> compute_function_params(
+      {i8_ptr_type, i8_ptr_type, i8_ptr_ptr_type, i8_ptr_ptr_type});
+  if (num_dynamic_loop_bounds > 0) {
+    compute_function_params.push_back(i64_ptr_type);
+  }
+  compute_function_params.push_back(i64_ptr_type);
+  return compute_function_params;
+}
+
 IrFunction::IrFunction(const string& function_name,
                        llvm::Function::LinkageTypes linkage,
                        const bool optimize_for_size_requested,
@@ -47,6 +64,15 @@ IrFunction::~IrFunction() {
   ir_builder_->CreateRetVoid();
 }
 
+DynamicLoopBounds IrFunction::GetDynamicLoopBounds() {
+  DynamicLoopBounds dynamic_loop_bounds(num_dynamic_loop_bounds_);
+  for (int i = 0; i < num_dynamic_loop_bounds_; ++i) {
+    dynamic_loop_bounds[i].first = GetDynamicLoopBound(i * 2 + 0);
+    dynamic_loop_bounds[i].second = GetDynamicLoopBound(i * 2 + 1);
+  }
+  return dynamic_loop_bounds;
+}
+
 void IrFunction::Initialize(const string& function_name,
                             llvm::Function::LinkageTypes linkage,
                             const bool optimize_for_size_requested,
@@ -106,7 +132,8 @@ void IrFunction::Initialize(const string& function_name,
   // to use GEPs to unravel the indirection layers.
   llvm::FunctionType* function_type = llvm::FunctionType::get(
       /*Result=*/llvm::Type::getVoidTy(llvm_module_->getContext()),
-      /*Params=*/GetComputeFunctionParams(),
+      /*Params=*/
+      GetComputeFunctionParams(llvm_module_, num_dynamic_loop_bounds_),
       /*isVarArg=*/false);
 
   // Functions with local linkage get an inlining bonus.  Because we know
@@ -153,21 +180,6 @@ void IrFunction::Initialize(const string& function_name,
       /*Parent=*/function_));
 }
 
-std::vector<llvm::Type*> IrFunction::GetComputeFunctionParams() {
-  llvm::Type* i8_ptr_type =
-      llvm::Type::getInt8PtrTy(llvm_module_->getContext());
-  llvm::Type* i8_ptr_ptr_type = i8_ptr_type->getPointerTo();
-  llvm::Type* i64_ptr_type =
-      llvm::Type::getInt64PtrTy(llvm_module_->getContext());
-  std::vector<llvm::Type*> compute_function_params(
-      {i8_ptr_type, i8_ptr_type, i8_ptr_ptr_type, i8_ptr_ptr_type});
-  if (num_dynamic_loop_bounds_ > 0) {
-    compute_function_params.push_back(i64_ptr_type);
-  }
-  compute_function_params.push_back(i64_ptr_type);
-  return compute_function_params;
-}
-
 llvm::Value* IrFunction::GetDynamicLoopBound(const int64 offset) {
   CHECK_GT(num_dynamic_loop_bounds_, 0);
   CHECK_LT(offset, num_dynamic_loop_bounds_ * 2);
@@ -177,5 +189,145 @@ llvm::Value* IrFunction::GetDynamicLoopBound(const int64 offset) {
                              ir_builder_->getInt64(offset), AsStringRef(name)));
 }
 
+// Emits code to allocate an array of parameter address pointers, and store
+// each address from 'parameter_addresses'.
+// Returns an array of compute function call arguments (including parameter
+// address buffer).
+std::vector<llvm::Value*> GetArrayFunctionCallArguments(
+    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
+    llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece name,
+    llvm::Value* return_value_buffer, llvm::Value* exec_run_options_arg,
+    llvm::Value* temp_buffers_arg, llvm::Value* profile_counters_arg) {
+  llvm::Value* parameter_addresses_buffer =
+      llvm_ir::EmitAllocaAtFunctionEntryWithCount(
+          ir_builder->getInt8PtrTy(),
+          ir_builder->getInt32(parameter_addresses.size()),
+          tensorflow::strings::StrCat(name, "_parameter_addresses"),
+          ir_builder);
+  for (size_t i = 0; i < parameter_addresses.size(); ++i) {
+    llvm::Value* parameter_as_i8ptr = ir_builder->CreateBitCast(
+        parameter_addresses[i], ir_builder->getInt8PtrTy(),
+        AsStringRef(tensorflow::strings::StrCat(name, "_parameter_", i,
+                                                "_address_as_i8ptr")));
+    llvm::Value* slot_in_param_adresses = ir_builder->CreateInBoundsGEP(
+        parameter_addresses_buffer, {ir_builder->getInt64(i)});
+    ir_builder->CreateStore(parameter_as_i8ptr, slot_in_param_adresses);
+  }
+
+  const auto to_int8_ptr = [=](llvm::Value* ptr) {
+    return ir_builder->CreatePointerCast(ptr, ir_builder->getInt8PtrTy());
+  };
+  std::vector<llvm::Value*> arguments{
+      to_int8_ptr(return_value_buffer), to_int8_ptr(exec_run_options_arg),
+      parameter_addresses_buffer, temp_buffers_arg};
+  if (profile_counters_arg != nullptr) {
+    arguments.push_back(profile_counters_arg);
+  }
+  return arguments;
+}
+
+// Emits a call to a runtime fork/join function which dispatches parallel
+// calls to 'parallel_function' (and joins threads before returning).
+Status EmitCallToParallelForkJoin(
+    const std::vector<llvm::Value*>& arguments, const Shape& shape,
+    const std::vector<int64>& dimension_partition_counts,
+    llvm::IRBuilder<>* ir_builder, llvm::Function* parallel_function,
+    const string& name) {
+  llvm::Module* module = ir_builder->GetInsertBlock()->getModule();
+
+  // Build ParallelForkJoin function type.
+  std::vector<llvm::Type*> compute_function_params =
+      GetComputeFunctionParams(module, /*num_dynamic_loop_bounds=*/0);
+  // Number of parallel compute functions.
+  compute_function_params.push_back(ir_builder->getInt32Ty());
+  // Array of partitions. There is an array element for each
+  // partition x partition_dim x 2 (for dimension start and limit).
+  compute_function_params.push_back(
+      llvm::Type::getInt64PtrTy(module->getContext()));
+  // Number of partitioned most-major dimensions in 'shape'.
+  compute_function_params.push_back(ir_builder->getInt32Ty());
+  // Function pointer for compute function to be dispatched in parallel.
+  compute_function_params.push_back(
+      llvm::Type::getInt8PtrTy(module->getContext()));
+
+  llvm::FunctionType* fork_join_type = llvm::FunctionType::get(
+      /*Result=*/llvm::Type::getVoidTy(module->getContext()),
+      /*Params=*/compute_function_params,
+      /*isVarArg=*/false);
+
+  llvm::Function* fork_join_func =
+      llvm::cast<llvm::Function>(module->getOrInsertFunction(
+          runtime::kParallelForkJoinSymbolName, fork_join_type));
+  fork_join_func->setCallingConv(llvm::CallingConv::C);
+  fork_join_func->setDoesNotThrow();
+
+  // Add common compute function arguments.
+  std::vector<llvm::Value*> fork_join_arguments(arguments);
+
+  // Create ShapePartitionIterator to generate all partitions of 'shape'.
+  ShapePartitionIterator partition_iterator(shape, dimension_partition_counts);
+  const int64 num_partitions = partition_iterator.GetTotalPartitionCount();
+  // Add argument specifying the number of parallel partitions.
+  fork_join_arguments.push_back(ir_builder->getInt32(num_partitions));
+
+  // The number of partitioned most-major dimensions in 'shape'.
+  const int32 num_partitioned_dims = dimension_partition_counts.size();
+  // A dimension partition consists of two elements: [start_index, limit_index).
+  const int32 dim_partition_size = 2;
+  // Calculate array partition stride.
+  const int32 array_partition_stride =
+      num_partitioned_dims * dim_partition_size;
+  // Calculate the total number of elements in the partition array.
+  const int32 partition_array_size =
+      dim_partition_size * num_partitioned_dims * num_partitions;
+
+  // Store dimension partition values as llvm constants in 'partitions'.
+  // See comments in runtime_fork_join.cc for array layout description.
+  std::vector<llvm::Constant*> partitions(partition_array_size);
+  for (int32 i = 0; i < num_partitions; ++i) {
+    std::vector<std::pair<int64, int64>> dim_partitions =
+        partition_iterator.GetPartition(i);
+    CHECK_EQ(num_partitioned_dims, dim_partitions.size());
+    const int32 partition_index = i * array_partition_stride;
+    for (int32 j = 0; j < num_partitioned_dims; ++j) {
+      const std::pair<int64, int64>& dim_partition = dim_partitions[j];
+      const int32 index = partition_index + j * dim_partition_size;
+      // Store partition [dim_start, dim_limit) intervals for each dimension.
+      partitions[index] = ir_builder->getInt64(dim_partition.first);
+      partitions[index + 1] =
+          ir_builder->getInt64(dim_partition.first + dim_partition.second);
+    }
+  }
+
+  // Create global variable out of dimension partitions in 'partitions'.
+  llvm::ArrayType* partitions_array_type =
+      llvm::ArrayType::get(ir_builder->getInt64Ty(), partition_array_size);
+  llvm::Constant* partitions_array =
+      llvm::ConstantArray::get(partitions_array_type, partitions);
+  llvm::GlobalVariable* global_partitions_array = new llvm::GlobalVariable(
+      /*M=*/*module,
+      /*Ty=*/partitions_array_type,
+      /*isConstant=*/true,
+      /*Linkage=*/llvm::GlobalValue::PrivateLinkage,
+      /*Initializer=*/partitions_array,
+      /*Name=*/
+      AsStringRef(
+          tensorflow::strings::StrCat(name, "_parallel_dimension_partitions")));
+
+  // Add argument specifying parallel dimension partitions.
+  fork_join_arguments.push_back(ir_builder->CreateBitCast(
+      global_partitions_array,
+      llvm::Type::getInt64PtrTy(module->getContext())));
+  // Add argument specifying the number of partitioned most-major dimensions.
+  fork_join_arguments.push_back(ir_builder->getInt32(num_partitioned_dims));
+  // Add argument for parallel compute function pointer.
+  fork_join_arguments.push_back(
+      ir_builder->CreateBitCast(parallel_function, ir_builder->getInt8PtrTy()));
+  // Emit call to parallel fork/join.
+  ir_builder->CreateCall(fork_join_func, fork_join_arguments);
+
+  return Status::OK();
+}
+
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.h b/tensorflow/compiler/xla/service/cpu/ir_function.h
index b7516b403e..1fd2da4dce 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_function.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.h
@@ -20,8 +20,11 @@ limitations under the License.
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
+#include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace xla {
 namespace cpu {
@@ -54,12 +57,15 @@ class IrFunction {
              llvm::IRBuilder<>* ir_builder, int64 num_dynamic_loop_bounds);
   ~IrFunction();
 
-  // Returns an array of compute function parameter types.
-  std::vector<llvm::Type*> GetComputeFunctionParams();
-
-  // Emit ir to read and return the ir value for the dynamic loop bound at
-  // 'offset' from the "dynamic_loop_bounds" argument of this function.
-  llvm::Value* GetDynamicLoopBound(int64 offset);
+  // Emit ir to read and return the set of ir values representing the dynamic
+  // loop bounds argument of this function.
+  // Each element in returned vector is a pair of ir values representing
+  // the loop bounds for a specific dimension, where the first element of the
+  // pair is the dimension start index, and the second element of the pair
+  // is the dimension limit.
+  // EX: [dimension_i_index_start_ir_value, dimension_i_index_limit_ir_value]
+  //
+  DynamicLoopBounds GetDynamicLoopBounds();
 
   // Returns the encapculated llvm::Function.
   llvm::Function* function() { return function_; }
@@ -71,15 +77,15 @@ class IrFunction {
   // "run_options" argument.
   llvm::Value* exec_run_options_arg() { return exec_run_options_arg_; }
 
-  // Get the llvm::Argument that represents this functions parameters argument.
-  llvm::Argument* parameters_arg() { return parameters_arg_; }
+  // Get the llvm::Value* that represents this functions parameters argument.
+  llvm::Value* parameters_arg() { return parameters_arg_; }
 
   // Get the llvm::Value* that represents this functions "temps" argument.
   llvm::Value* temp_buffers_arg() { return temp_buffers_arg_; }
 
   // Get the llvm::Value* that represents this functions "prof_counters"
   // argument.
-  llvm::Argument* profile_counters_arg() { return profile_counters_arg_; }
+  llvm::Value* profile_counters_arg() { return profile_counters_arg_; }
 
  private:
   // Initialize an llvm::Function with standard signature based on arguments.
@@ -87,6 +93,10 @@ class IrFunction {
                   llvm::Function::LinkageTypes linkage,
                   bool optimize_for_size_requested, bool enable_fast_math);
 
+  // Emit ir to read and return the ir value for the dynamic loop bound at
+  // 'offset' from the "dynamic_loop_bounds" argument of this function.
+  llvm::Value* GetDynamicLoopBound(int64 offset);
+
   llvm::IRBuilder<>* ir_builder_;
   llvm::Module* llvm_module_;
   llvm::IRBuilder<>::InsertPointGuard caller_insert_point_guard_;
@@ -97,12 +107,27 @@ class IrFunction {
   // Function argument IR values.
   llvm::Argument* result_arg_;
   llvm::Value* exec_run_options_arg_;
-  llvm::Argument* parameters_arg_;
+  llvm::Value* parameters_arg_;
   llvm::Value* temp_buffers_arg_;
-  llvm::Argument* dynamic_loop_bounds_arg_ = nullptr;
-  llvm::Argument* profile_counters_arg_;
+  llvm::Value* dynamic_loop_bounds_arg_ = nullptr;
+  llvm::Value* profile_counters_arg_;
 };
 
+// Returns an array of compute function call argument ir values.
+std::vector<llvm::Value*> GetArrayFunctionCallArguments(
+    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
+    llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece name,
+    llvm::Value* return_value_buffer, llvm::Value* exec_run_options_arg,
+    llvm::Value* temp_buffers_arg, llvm::Value* profile_counters_arg);
+
+// Emits a call to a runtime fork/join function which dispatches parallel
+// calls to 'parallel_function' (and joins threads before returning).
+Status EmitCallToParallelForkJoin(
+    const std::vector<llvm::Value*>& arguments, const Shape& shape,
+    const std::vector<int64>& dimension_partition_counts,
+    llvm::IRBuilder<>* ir_builder, llvm::Function* parallel_function,
+    const string& name);
+
 }  // namespace cpu
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
index 91e704e3d0..a3c3c1e5ef 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
@@ -24,8 +24,8 @@ namespace cpu {
 
 ParallelLoopEmitter::ParallelLoopEmitter(
     const llvm_ir::ElementGenerator& target_element_generator,
-    const llvm_ir::IrArray& target_array, const LoopBounds* dynamic_loop_bounds,
-    llvm::IRBuilder<>* ir_builder)
+    const llvm_ir::IrArray& target_array,
+    const DynamicLoopBounds* dynamic_loop_bounds, llvm::IRBuilder<>* ir_builder)
     : LoopEmitter(target_element_generator, target_array, ir_builder),
       dynamic_loop_bounds_(dynamic_loop_bounds) {}
 
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h
index 492d5953c4..9335d2818e 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Value.h"
+#include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h"
 
@@ -31,9 +32,8 @@ namespace cpu {
 // [start, limit) pairs of ir values (one for each partitioned outer dimension).
 //
 // EX: Let 'shape' = [8, 16, 32], with the loop bounds of the two-most major
-//     dimensions dynamic.
-//     Then 'dynamic_loop_bounds' will contain the following ir values for
-//     the two most-major dimenions:
+//     dimensions dynamic. Then 'dynamic_loop_bounds' will contain the
+//     following ir values for the two most-major dimensions:
 //       [dim0_index_start_ir_value, dim0_index_limit_ir_value]
 //       [dim1_index_start_ir_value, dim1_index_limit_ir_value]
 //
@@ -47,15 +47,13 @@ namespace cpu {
 //
 class ParallelLoopEmitter : public llvm_ir::LoopEmitter {
  public:
-  using LoopBounds = std::vector<std::pair<llvm::Value*, llvm::Value*>>;
-
   // Constructs a ParallelLoopEmitter which uses 'target_element_generator' to
   // generate elements, 'dynamic_loop_bounds' to set the loop bounds of the
   // most-major dimensions, and 'target_array.' shape to set the static loop
   // bounds for the most-minor dimensions.
   ParallelLoopEmitter(const llvm_ir::ElementGenerator& target_element_generator,
                       const llvm_ir::IrArray& target_array,
-                      const LoopBounds* dynamic_loop_bounds,
+                      const DynamicLoopBounds* dynamic_loop_bounds,
                       llvm::IRBuilder<>* ir_builder);
 
   ParallelLoopEmitter(const ParallelLoopEmitter&) = delete;
@@ -66,7 +64,7 @@ class ParallelLoopEmitter : public llvm_ir::LoopEmitter {
       tensorflow::StringPiece loop_name) override;
 
  private:
-  const LoopBounds* dynamic_loop_bounds_;
+  const DynamicLoopBounds* dynamic_loop_bounds_;
 };
 
 }  // namespace cpu
-- 
GitLab


From 2d8206b6b5daf8f5bedd94f32c61eb2c00fd7c25 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 6 Dec 2017 15:21:25 -0800
Subject: [PATCH 0715/1225] Add Python checks to prevent mixing ops from
 different while loops.

The executor can currently catch some errors like this by trying to
reconstruct the while loop contexts by tracing the graph from enter
nodes, but this doesn't catch everything and can cause hangs or other
undesirable behavior. This change puts the check in Python and also
provides better debugging information.

In addition, this change refactors some logic from control_flow_ops.py
to a new file, control_flow_util.py. This is so we can call
CheckInputFromValidContext from ops.py without creating circular imports
between ops.py and control_flow_ops.py.

PiperOrigin-RevId: 178161679
---
 tensorflow/contrib/cmake/tf_tests.cmake       |   3 +
 tensorflow/python/BUILD                       |  18 +-
 tensorflow/python/framework/ops.py            |   3 +
 tensorflow/python/kernel_tests/BUILD          |  13 ++
 .../kernel_tests/control_flow_ops_py_test.py  | 116 ++++++++++
 .../kernel_tests/control_flow_util_test.py    |  71 +++++++
 tensorflow/python/ops/control_flow_grad.py    |   3 +-
 tensorflow/python/ops/control_flow_ops.py     |  79 +++----
 tensorflow/python/ops/control_flow_util.py    | 200 ++++++++++++++++++
 tensorflow/python/ops/gradients_impl.py       |   9 +-
 10 files changed, 454 insertions(+), 61 deletions(-)
 create mode 100644 tensorflow/python/kernel_tests/control_flow_util_test.py
 create mode 100644 tensorflow/python/ops/control_flow_util.py

diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 2e3ee2c96b..09e22285e1 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -296,6 +296,9 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       # Test should only be run manually
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/reduction_ops_test_big.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/svd_op_test.py"
+      # Depends on python/framework/test_ops
+      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/array_ops_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/control_flow_util_test.py"
   )
   endif()
   list(REMOVE_ITEM tf_test_src_py ${tf_test_src_py_exclude})
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 05fd81c8d3..8471d5924f 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -703,6 +703,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":c_api_util",
+        ":control_flow_util",
         ":device",
         ":dtypes",
         ":op_def_registry",
@@ -1285,7 +1286,10 @@ tf_gen_op_wrapper_private_py(
 
 tf_gen_op_wrapper_private_py(
     name = "control_flow_ops_gen",
-    visibility = ["//learning/brain/python/ops:__pkg__"],
+    visibility = [
+        "//learning/brain/python/ops:__pkg__",
+        "//tensorflow/python/kernel_tests:__pkg__",
+    ],
     deps = [
         "//tensorflow/core:control_flow_ops_op_lib",
         "//tensorflow/core:no_op_op_lib",
@@ -1553,6 +1557,7 @@ py_library(
     deps = [
         ":control_flow_ops",
         ":control_flow_ops_gen",
+        ":control_flow_util",
         ":framework",
         ":framework_for_generated_wrappers",
         ":math_ops",
@@ -1569,6 +1574,7 @@ py_library(
         ":array_ops_gen",
         ":constant_op",
         ":control_flow_ops_gen",
+        ":control_flow_util",
         ":data_flow_ops_gen",
         ":dtypes",
         ":framework_ops",
@@ -1584,6 +1590,15 @@ py_library(
     ],
 )
 
+py_library(
+    name = "control_flow_util",
+    srcs = ["ops/control_flow_util.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":platform",
+    ],
+)
+
 py_library(
     name = "ctc_ops",
     srcs = ["ops/ctc_ops.py"],
@@ -1657,6 +1672,7 @@ py_library(
         ":bitwise_ops",
         ":control_flow_grad",
         ":control_flow_ops",
+        ":control_flow_util",
         ":framework",
         ":framework_for_generated_wrappers",
         ":functional_ops",
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 65f7e97ab1..79cc793f93 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -48,6 +48,7 @@ from tensorflow.python.framework import op_def_registry
 from tensorflow.python.framework import registry
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import versions
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.platform import app
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import compat
@@ -1651,6 +1652,8 @@ class Operation(object):
 
     # Add this op to the current control flow context.
     self._control_flow_context = g._get_control_flow_context()  # pylint: disable=protected-access
+    for input_tensor in self.inputs:
+      control_flow_util.CheckInputFromValidContext(self, input_tensor.op)
     if self._control_flow_context is not None:
       self._control_flow_context.AddOp(self)
     # NOTE(keveman): Control flow context's AddOp could be creating new ops and
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index f6721de32a..b4c202ea39 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1265,6 +1265,19 @@ cuda_py_test(
     ],
 )
 
+tf_py_test(
+    name = "control_flow_util_test",
+    size = "small",
+    srcs = ["control_flow_util_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:control_flow_ops_gen",
+        "//tensorflow/python:control_flow_util",
+        "//tensorflow/python:test_ops",
+    ],
+)
+
 cuda_py_test(
     name = "conv1d_test",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 1b7f9b110c..ad02a9e58c 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -2622,6 +2622,122 @@ class ControlFlowTest(test.TestCase):
           1)
 
 
+class ControlFlowContextCheckTest(test.TestCase):
+
+  def _getWhileTensor(self):
+    """Creates and returns a tensor from a while context."""
+    tensor = []
+
+    def body(i):
+      if not tensor:
+        tensor.append(constant_op.constant(1))
+      return i + tensor[0]
+
+    control_flow_ops.while_loop(lambda i: i < 10, body, [0])
+    return tensor[0]
+
+  def _getCondTensor(self):
+    cond_tensor = []
+    def true_fn():
+      if not cond_tensor:
+        cond_tensor.append(constant_op.constant(1))
+      return cond_tensor[0]
+    control_flow_ops.cond(math_ops.less(1, 2), true_fn,
+                          lambda: constant_op.constant(0))
+    return cond_tensor[0]
+
+  def testInvalidContext(self):
+    # Accessing a while loop tensor outside of control flow is illegal.
+    while_tensor = self._getWhileTensor()
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Cannot use 'while/Const_1' as input to 'Add' because 'while/Const_1' "
+        "is in a while loop. See info log for more details."):
+      math_ops.add(1, while_tensor)
+
+  def testInvalidContextInCond(self):
+    # Accessing a while loop tensor in cond is illegal.
+    while_tensor = self._getWhileTensor()
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Cannot use 'while/Const_1' as input to 'cond/Add' because "
+        "'while/Const_1' is in a while loop. See info log for more details."):
+      # TODO(skyewm): this passes if we return while_tensor directly instead
+      # of using it as input to another op.
+      control_flow_ops.cond(math_ops.less(1, 2),
+                            lambda: math_ops.add(1, while_tensor),
+                            lambda: constant_op.constant(0))
+
+  def testInvalidContextInWhile(self):
+    # Accessing a while loop tensor in a different while loop is illegal.
+    while_tensor = self._getWhileTensor()
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Cannot use 'while_1/Add' as input to 'while/Const_1' because they are "
+        "in different while loops. See info log for more details."):
+      control_flow_ops.while_loop(lambda i: i < 10,
+                                  lambda x: math_ops.add(1, while_tensor), [0])
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Cannot use 'while_2/NextIteration' as input to 'while/Const_1' "
+        "because they are in different while loops. See info log for more "
+        "details."):
+      control_flow_ops.while_loop(lambda i: i < 10, lambda i: while_tensor, [0])
+
+  def testValidCondContext(self):
+    # Accessing a tensor from a cond context is OK (although dangerous).
+    cond_tensor = self._getCondTensor()
+    math_ops.add(1, cond_tensor)
+
+  def testValidCondContextBranches(self):
+    # Accessing a tensor from a cond context from the other branch's cond
+    # context is OK (although dangerous).
+    cond_tensor = []
+    def branch_fn():
+      if not cond_tensor:
+        cond_tensor.append(constant_op.constant(1))
+      return cond_tensor[0]
+
+    control_flow_ops.cond(math_ops.less(1, 2), branch_fn, branch_fn)
+
+  def testValidWhileContext(self):
+    # Accessing a tensor in a nested while is OK.
+    def body(_):
+      c = constant_op.constant(1)
+      return control_flow_ops.while_loop(lambda i: i < 3, lambda i: i + c, [0])
+
+    control_flow_ops.while_loop(lambda i: i < 5, body, [0])
+
+  def testValidNestedContexts(self):
+    # Accessing a tensor from a cond context in a while context, all inside an
+    # outer while context, is OK.
+    def body(_):
+      cond_tensor = self._getCondTensor()
+      # Create another cond containing the while loop for good measure
+      return control_flow_ops.cond(
+          math_ops.less(1, 2),
+          lambda: control_flow_ops.while_loop(lambda i: i < 3,
+                                              lambda i: i + cond_tensor, [0]),
+          lambda: constant_op.constant(0))
+
+    control_flow_ops.while_loop(lambda i: i < 5, body, [0])
+
+  def testInvalidNestedContexts(self):
+    # Accessing a tensor from a while context in a different while context, all
+    # inside a cond context, is illegal.
+    def true_fn():
+      while_tensor = self._getWhileTensor()
+      return control_flow_ops.while_loop(lambda i: i < 3,
+                                         lambda i: i + while_tensor, [0])
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Cannot use 'cond/while_1/add' as input to 'cond/while/Const_1' because"
+        " they are in different while loops. See info log for more details."):
+      control_flow_ops.cond(math_ops.less(1, 2), true_fn,
+                            lambda: constant_op.constant(0))
+
+
 class TupleTest(test.TestCase):
 
   def testTensors(self):
diff --git a/tensorflow/python/kernel_tests/control_flow_util_test.py b/tensorflow/python/kernel_tests/control_flow_util_test.py
new file mode 100644
index 0000000000..39e96f74b0
--- /dev/null
+++ b/tensorflow/python/kernel_tests/control_flow_util_test.py
@@ -0,0 +1,71 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for tensorflow.python.ops.control_flow_util."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import test_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import gen_control_flow_ops
+from tensorflow.python.platform import test
+
+
+class ControlFlowUtilTest(test.TestCase):
+
+  def testIsSwitch(self):
+    switch_false, _ = control_flow_ops.switch(1, True)
+    switch = switch_false.op
+    self.assertTrue(control_flow_util.IsSwitch(switch))
+
+    ref_switch_false, _ = control_flow_ops.ref_switch(test_ops.ref_output(),
+                                                      True)
+    ref_switch = ref_switch_false.op
+    self.assertTrue(control_flow_util.IsSwitch(ref_switch))
+
+    self.assertFalse(control_flow_util.IsSwitch(test_ops.int_output().op))
+
+  def testIsLoopEnter(self):
+    enter = gen_control_flow_ops.enter(1, frame_name="name").op
+    self.assertTrue(control_flow_util.IsLoopEnter(enter))
+    self.assertFalse(control_flow_util.IsLoopConstantEnter(enter))
+
+    ref_enter = gen_control_flow_ops.ref_enter(test_ops.ref_output(),
+                                               frame_name="name").op
+    self.assertTrue(control_flow_util.IsLoopEnter(ref_enter))
+    self.assertFalse(control_flow_util.IsLoopConstantEnter(ref_enter))
+
+    const_enter = gen_control_flow_ops.enter(1, frame_name="name",
+                                             is_constant=True).op
+    self.assertTrue(control_flow_util.IsLoopEnter(const_enter))
+    self.assertTrue(control_flow_util.IsLoopConstantEnter(const_enter))
+
+    self.assertFalse(control_flow_util.IsLoopEnter(test_ops.int_output().op))
+
+  def testIsLoopExit(self):
+    exit_op = control_flow_ops.exit(1).op
+    self.assertTrue(control_flow_util.IsLoopExit(exit_op))
+
+    ref_exit = control_flow_ops.exit(test_ops.ref_output()).op
+    self.assertTrue(control_flow_util.IsLoopExit(ref_exit))
+
+    self.assertFalse(control_flow_util.IsLoopExit(test_ops.int_output().op))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py
index 22dc6771ec..c3dd54a1ff 100644
--- a/tensorflow/python/ops/control_flow_grad.py
+++ b/tensorflow/python/ops/control_flow_grad.py
@@ -23,6 +23,7 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import math_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import,undefined-variable
@@ -91,7 +92,7 @@ def _MergeGrad(op, grad, _):
   input_op = op.inputs[0].op
   graph = ops.get_default_graph()
   # pylint: disable=protected-access
-  op_ctxt = control_flow_ops._GetOutputContext(input_op)
+  op_ctxt = control_flow_util.GetOutputContext(input_op)
   grad_ctxt = graph._get_control_flow_context()
   # pylint: enable=protected-access
   if isinstance(op_ctxt, WhileContext):
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 38c959df8d..3accfc835b 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -66,6 +66,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_util as util
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_control_flow_ops
 from tensorflow.python.ops import gen_data_flow_ops
@@ -505,29 +506,6 @@ def _convert_flows_to_tensorarrays(tensors_or_tensorarrays, tensors_or_flows):
       for (ta, t_or_flow) in zip(tensors_or_tensorarrays, tensors_or_flows)]
 
 
-def _IsLoopConstantEnter(op):
-  """Return true iff op is a loop invariant."""
-  is_enter = (op.type == "Enter" or op.type == "RefEnter")
-  return is_enter and op.get_attr("is_constant")
-
-
-def _GetLoopConstantEnter(value):
-  """Return the enter op if we can infer `value` to be a loop invariant."""
-  id_ops = {"Switch", "RefSwitch", "Identity", "RefIdentity"}
-  op = value.op
-  while op.type in id_ops:
-    op = op.inputs[0].op
-  return op if _IsLoopConstantEnter(op) else None
-
-
-def _GetOutputContext(op):
-  """Return the control flow context for the output of an op."""
-  ctxt = op._get_control_flow_context()
-  if IsLoopExit(op):
-    ctxt = ctxt.outer_context
-  return ctxt
-
-
 def _ShapeLessThanOrEqual(shape1, shape2):
   if shape2.dims is None:
     return True
@@ -918,7 +896,7 @@ class GradLoopState(object):
 
       # Add the stack_push op in the context of value.op.
       swap_enabled = self.forward_context.swap_memory
-      value_ctxt = _GetOutputContext(value.op)
+      value_ctxt = util.GetOutputContext(value.op)
       if value_ctxt == self.forward_context:
         # value is not nested in the forward context.
         self.forward_context.Enter()
@@ -1028,7 +1006,7 @@ class GradLoopState(object):
       cur_value = value
       cur_grad_state = self
       while True:
-        enter_op = _GetLoopConstantEnter(cur_value)
+        enter_op = util.GetLoopConstantEnter(cur_value)
         if enter_op:
           # Special case: cur_value comes from a constant Enter node.
           cur_value = enter_op.inputs[0]
@@ -1081,7 +1059,7 @@ class ControlFlowState(object):
 
   def GetGradState(self, op, before):
     """Return the grad state for this op if it's in a forward loop context."""
-    if before and IsLoopExit(op):
+    if before and util.IsLoopExit(op):
       forward_ctxt = op._get_control_flow_context()
       forward_ctxt = forward_ctxt.outer_context
       if forward_ctxt:
@@ -1241,8 +1219,8 @@ class ControlFlowState(object):
     Returns:
       A zero tensor of the same shape of op.outputs[index].
     """
-    if IsLoopSwitch(op): return None
-    dead_branch = IsSwitch(op)
+    if util.IsLoopSwitch(op): return None
+    dead_branch = util.IsSwitch(op)
     forward_ctxt = _GetWhileContext(op)
     grad_state = self._map.get(forward_ctxt)
     if grad_state is None:
@@ -1342,7 +1320,7 @@ def MaybeCreateControlFlowState(between_op_list, between_ops,
   """
   loop_state = None
   for op in between_op_list:
-    if IsLoopExit(op):
+    if util.IsLoopExit(op):
       if loop_state is None:
         loop_state = ControlFlowState()
       if colocate_gradients_with_ops:
@@ -1353,28 +1331,10 @@ def MaybeCreateControlFlowState(between_op_list, between_ops,
   return loop_state
 
 
-def IsSwitch(op):
-  """Return true if `op` is a Switch."""
-  return op.type == "Switch" or op.type == "RefSwitch"
-
-
-def IsLoopExit(op):
-  """Return true if `op` is an Exit."""
-  return op.type == "Exit" or op.type == "RefExit"
-
-
-def IsLoopSwitch(op):
-  """Return true if `op` is the Switch for a while loop."""
-  if IsSwitch(op):
-    ctxt = op._get_control_flow_context()
-    return ctxt and isinstance(ctxt, WhileContext)
-  return False
-
-
 def ZerosLikeOutsideLoop(op, index):
   """Create zeros_like for the specified output of an op."""
   val = op.outputs[index]
-  if not IsSwitch(op):
+  if not util.IsSwitch(op):
     return array_ops.zeros_like(val, optimize=False)
   else:
     op_ctxt = op._get_control_flow_context()
@@ -1511,7 +1471,7 @@ class ControlFlowContext(object):
     return None
 
   def _IsInOuterContext(self, op):
-    op_ctxt = _GetOutputContext(op)
+    op_ctxt = util.GetOutputContext(op)
     outer_ctxt = self.outer_context
     while outer_ctxt != op_ctxt:
       if outer_ctxt is None:
@@ -1529,7 +1489,7 @@ class ControlFlowContext(object):
     else:
       internal_control_inputs = []
       for x in op.control_inputs:
-        ctxt = _GetOutputContext(x)
+        ctxt = util.GetOutputContext(x)
         if ctxt is not None and ctxt.GetWhileContext() == while_ctxt:
           internal_control_inputs.append(x)
     if len(internal_control_inputs) != len(op.control_inputs):
@@ -1547,6 +1507,12 @@ class ControlFlowContext(object):
     """Returns the pivot node for this context, or None."""
     return None
 
+  def IsWhileContext(self):
+    return False
+
+  def __str__(self):
+    return self.name
+
 
 class CondContext(ControlFlowContext):
   """The context for the conditional construct."""
@@ -1720,7 +1686,7 @@ class CondContext(ControlFlowContext):
         op._add_control_input(self._pivot.op)
       # pylint: enable=protected-access
 
-    if self._outer_context or not IsLoopExit(op):
+    if self._outer_context or not util.IsLoopExit(op):
       op.graph.prevent_fetching(op)
 
     if self._outer_context:
@@ -2190,7 +2156,7 @@ class WhileContext(ControlFlowContext):
         grad_ctxt = grad_ctxt.GetWhileContext()
         if grad_ctxt.grad_state:
           forward_ctxt = _GetWhileContext(val.op)
-          if IsLoopExit(val.op):
+          if util.IsLoopExit(val.op):
             forward_ctxt = forward_ctxt.outer_context
             if forward_ctxt:
               forward_ctxt = forward_ctxt.GetWhileContext()
@@ -2272,7 +2238,7 @@ class WhileContext(ControlFlowContext):
       self._MaybeAddControlDependency(op)
       for x in op.outputs:
         self._values.add(x.name)
-    if self._outer_context or not IsLoopExit(op):
+    if self._outer_context or not util.IsLoopExit(op):
       op.graph.prevent_fetching(op)
       for x in op.outputs:
         op.graph.prevent_feeding(x)
@@ -2291,7 +2257,7 @@ class WhileContext(ControlFlowContext):
         return True
       # pylint: enable=protected-access
       for x in op.inputs:
-        if not _IsLoopConstantEnter(x.op):
+        if not util.IsLoopConstantEnter(x.op):
           return False
       return True
     if _IsOpFree(op):
@@ -2607,7 +2573,7 @@ class WhileContext(ControlFlowContext):
 
     if control_pivot is not None:
       for var in enter_vars:
-        if _IsLoopConstantEnter(var.op.inputs[0].op):
+        if util.IsLoopConstantEnter(var.op.inputs[0].op):
           # pylint: disable=protected-access
           var.op._add_control_input(control_pivot.op)
           # pylint: enable=protected-access
@@ -2743,6 +2709,9 @@ class WhileContext(ControlFlowContext):
         graph._record_op_seen_by_control_dependencies(x.op)
     # pylint: enable=protected-access
 
+  def IsWhileContext(self):
+    return True
+
 
 def while_loop(cond, body, loop_vars, shape_invariants=None,
                parallel_iterations=10, back_prop=True, swap_memory=False,
diff --git a/tensorflow/python/ops/control_flow_util.py b/tensorflow/python/ops/control_flow_util.py
new file mode 100644
index 0000000000..941a1a743e
--- /dev/null
+++ b/tensorflow/python/ops/control_flow_util.py
@@ -0,0 +1,200 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Utilty functions for control flow.
+
+This file is necessary to avoid cyclic dependencies between ops.py and
+control_flow_ops.py.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import traceback
+
+from tensorflow.python.platform import tf_logging as logging
+
+
+def IsSwitch(op):
+  """Return true if `op` is a Switch."""
+  return op.type == "Switch" or op.type == "RefSwitch"
+
+
+def IsLoopEnter(op):
+  """Returns true if `op` is an Enter."""
+  return op.type == "Enter" or op.type == "RefEnter"
+
+
+def IsLoopExit(op):
+  """Return true if `op` is an Exit."""
+  return op.type == "Exit" or op.type == "RefExit"
+
+
+def IsLoopSwitch(op):
+  """Return true if `op` is the Switch for a while loop."""
+  if IsSwitch(op):
+    ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+    return ctxt and ctxt.IsWhileContext()
+  return False
+
+
+def IsLoopConstantEnter(op):
+  """Return true iff op is a loop invariant."""
+  return IsLoopEnter(op) and op.get_attr("is_constant")
+
+
+def GetLoopConstantEnter(value):
+  """Return the enter op if we can infer `value` to be a loop invariant."""
+  id_ops = {"Switch", "RefSwitch", "Identity", "RefIdentity"}
+  op = value.op
+  while op.type in id_ops:
+    op = op.inputs[0].op
+  return op if IsLoopConstantEnter(op) else None
+
+
+def GetOutputContext(op):
+  """Return the control flow context for the output of an op."""
+  ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+  if IsLoopExit(op):
+    ctxt = ctxt.outer_context
+  return ctxt
+
+
+def GetContainingWhileContext(ctxt):
+  """Returns the first ancestor WhileContext of `ctxt`.
+
+  Returns `ctxt` if `ctxt` is a WhileContext, or None if `ctxt` is not in a
+  while loop.
+
+  Args:
+    ctxt: ControlFlowContext
+
+  Returns:
+    `ctxt` if `ctxt` is a WhileContext, the most nested WhileContext containing
+    `ctxt`, or None if `ctxt` is not in a while loop.
+  """
+  while ctxt:
+    if ctxt.IsWhileContext(): return ctxt
+    ctxt = ctxt.outer_context
+  return None
+
+
+def IsContainingContext(ctxt, maybe_containing_ctxt):
+  """Returns true if `maybe_containing_ctxt` is or contains `ctxt`."""
+  while ctxt is not maybe_containing_ctxt:
+    if ctxt is None: return False
+    ctxt = ctxt.outer_context
+  return True
+
+
+def CheckInputFromValidContext(op, input_op):
+  """Returns whether `input_op` can be used from `op`s context.
+
+  Conceptually, only inputs from op's while context or any ancestor while
+  context (including outside of any context) are valid. In practice, there are
+  many other edge cases as well.
+
+  Args:
+    op: Operation
+    input_op: Operation
+
+  Raises:
+    ValueError: if input_op is from an invalid context.
+  """
+  op_ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+  input_ctxt = GetOutputContext(input_op)
+  valid = False
+
+  if not input_ctxt:
+    # input_op isn't in a control flow context.
+    valid = True
+  elif op_ctxt is input_ctxt:
+    # input_op is in the same context as op.
+    valid = True
+  else:
+    while_ctxt = GetContainingWhileContext(op_ctxt)
+    input_while_ctxt = GetContainingWhileContext(input_ctxt)
+
+    if while_ctxt is None:
+      if input_while_ctxt is None:
+        # Neither op nor input_op is in a while loop, but one or both are in
+        # conds. We allow this, although execution will fail if the branch
+        # corresponding to input_op's cond context isn't taken.
+        valid = True
+      # Invalid if op isn't in a while loop and input_op is. Unless...
+      if IsLoopEnter(op):
+        # WhileContext._BuildLoop clears context for Enter nodes.
+        valid = True
+      if IsSwitch(op):
+        # CondContext.AddValue clears context for Switch nodes.
+        valid = True
+    elif IsContainingContext(while_ctxt, input_while_ctxt):
+      # input_op is in a while loop which contains op's while loop (or not in a
+      # while loop at all).
+      valid = True
+    elif (while_ctxt.grad_state and
+          IsContainingContext(while_ctxt.grad_state.forward_context,
+                              input_while_ctxt)):
+      # op is in a gradient context and input_op is in the associated forward
+      # pass context or an ancestor thereof. This case is need to build while
+      # loop gradients.
+      # NOTE(skyewm): we theoretically also need this case for custom gradient
+      # functions that close over tensors from ancestor contexts, but I haven't
+      # verified this.
+      valid = True
+    elif (while_ctxt.grad_state and
+          while_ctxt.grad_state.forward_context is
+          input_while_ctxt._outer_context):  # pylint: disable=protected-access
+      # op is in a gradient context and input_op is in a child of the associated
+      # forward pass context. This case is needed for the gradients of while
+      # loops with conds.
+      valid = True
+    elif (input_while_ctxt.grad_state and
+          input_while_ctxt.grad_state.forward_context is while_ctxt):
+      # input_op is in the gradient context of op's context. This case is needed
+      # when the gradient of a while loop gradient is requested (this will
+      # eventually fail unless there is a stop_gradient() or similar).
+      valid = True
+    elif (input_while_ctxt.grad_state and
+          input_ctxt.grad_state.forward_context.grad_state and
+          input_ctxt.grad_state.forward_context.grad_state.forward_context is
+          while_ctxt):
+      # input_op is in the grad grad context of op's context. This case is
+      # needed when the gradient of a while loop gradient is requested (this
+      # will eventually fail unless there is a stop_gradient() or similar).
+      valid = True
+
+  if not valid:
+    if while_ctxt:
+      error_msg = (
+          "Cannot use '%s' as input to '%s' because they are in different while"
+          " loops." % (op.name, input_op.name))
+    else:
+      error_msg = (
+          "Cannot use '%s' as input to '%s' because '%s' is in a while loop."
+          % (input_op.name, op.name, input_op.name))
+
+    # Log the error message plus the relevant stack traces. The stacks may be
+    # useful for debugging this error, but we don't want to raise an
+    # unreadable exception.
+    log_msg = error_msg
+    log_msg += "\n\n%s while context: %s" % (op.name, while_ctxt)
+    log_msg += "\n%s while context: %s" % (input_op.name, input_while_ctxt)
+    log_msg += "\n\nTraceback for %s:\n%s\nTraceback for %s:\n%s\n" % (
+        op.name, "".join(traceback.format_list(op.traceback)),
+        input_op.name, "".join(traceback.format_list(input_op.traceback)))
+    logging.info(log_msg)
+    raise ValueError(error_msg + " See info log for more details.")
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index 8d00a3c6ab..f5fdb12b2c 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -38,6 +38,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import image_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import linalg_grad  # pylint: disable=unused-import
@@ -668,10 +669,10 @@ def _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state):
     ready = (pending_count[x.op._id] == 0)
     if loop_state and not ready:
       ready = (pending_count[x.op._id] > 0 and
-               control_flow_ops.IsLoopSwitch(x.op))
+               control_flow_util.IsLoopSwitch(x.op))
     # pylint: enable=protected-access
     if ready:
-      if control_flow_ops.IsLoopExit(x.op):
+      if control_flow_util.IsLoopExit(x.op):
         # if x is an exit without real gradient, defer processing them.
         grad_state = loop_state.GetGradState(x.op, before=False)
         grad_state.deferred_exits.append(x)
@@ -711,7 +712,7 @@ def _SetGrad(grads, t, grad):
   if isinstance(t_grads, list):
     t_grads.append(grad)
   else:
-    assert control_flow_ops.IsLoopSwitch(op)
+    assert control_flow_util.IsLoopSwitch(op)
     op_grads[t.value_index] = grad
 
 
@@ -851,7 +852,7 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None):
   for i, out_grad in enumerate(out_grads):
     if loop_state:
       if isinstance(out_grad, (ops.Tensor, ops.IndexedSlices)):
-        assert control_flow_ops.IsLoopSwitch(op)
+        assert control_flow_util.IsLoopSwitch(op)
         continue
     # Grads have to be Tensors or IndexedSlices
     if (isinstance(out_grad, collections.Sequence) and not all([
-- 
GitLab


From bcdcb78854e8dfa1b2eda813b9e2910df522abb4 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 6 Dec 2017 15:29:56 -0800
Subject: [PATCH 0716/1225] Fix tests in control_flow_ops_test.py to not access
 Tensor._shape

Accessing _shape directly doesn't work with the C API enabled. Rewrite
these tests to use public APIs instead.

PiperOrigin-RevId: 178162875
---
 .../python/ops/control_flow_ops_test.py       | 53 +++++++++++--------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index 3e8f39dd24..923b26f958 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -540,7 +540,9 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 
   def _testReturnValues(self, fn_true, fn_false, expected_value_true,
                         expected_value_false, strict=False,
-                        check_cond=True):
+                        check_cond=True, feed_dict=None):
+    if feed_dict is None: feed_dict = {}
+
     condition = array_ops.placeholder(dtypes.bool)
     output_cond = control_flow_ops.cond(condition, fn_true, fn_false,
                                         strict=strict)
@@ -549,13 +551,17 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 
     with self.test_session() as sess:
       variables.global_variables_initializer().run()
+      true_feed_dict = {condition: True}
+      true_feed_dict.update(feed_dict)
       result_cond, result_case = sess.run([output_cond, output_case],
-                                          feed_dict={condition: True})
+                                          feed_dict=true_feed_dict)
       self.assertAllEqualNested(result_cond, expected_value_true)
       if check_cond:
         self.assertAllEqualNested(result_case, expected_value_true)
+      false_feed_dict = {condition: False}
+      false_feed_dict.update(feed_dict)
       result_cond, result_case = sess.run([output_cond, output_case],
-                                          feed_dict={condition: False})
+                                          feed_dict=false_feed_dict)
       self.assertAllEqualNested(result_cond, expected_value_false)
       if check_cond:
         self.assertAllEqualNested(result_case, expected_value_false)
@@ -631,26 +637,26 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 
   def test_tensors_unknown_shape(self):
     def _BuildTrueBranch(dtype):
+      tensor = array_ops.placeholder(dtype=dtype, shape=None)
       def _Build():
-        tensor = array_ops.zeros([2, 2], dtype=dtype)
-        tensor._shape = tensor_shape.TensorShape(None)
         return tensor
-      return _Build
+      return _Build, tensor
 
     def _BuildFalseBranch(dtype):
+      tensor = array_ops.placeholder(dtype=dtype, shape=None)
       def _Build():
-        tensor = array_ops.ones([2, 2], dtype=dtype)
-        tensor._shape = tensor_shape.TensorShape(None)
         return tensor
-      return _Build
+      return _Build, tensor
 
     for dtype in (dtypes.float16, dtypes.int8, dtypes.int32, dtypes.uint8):
       shape = tensor_shape.TensorShape(None)
-      fn_true = _BuildTrueBranch(dtype)
-      fn_false = _BuildFalseBranch(dtype)
+      fn_true, true_tensor = _BuildTrueBranch(dtype)
+      fn_false, false_tensor = _BuildFalseBranch(dtype)
       self._testShape(fn_true, fn_false, shape)
       self._testReturnValues(fn_true, fn_false,
-                             np.zeros([2, 2]), np.ones([2, 2]))
+                             np.zeros([2, 2]), np.ones([2, 2]),
+                             feed_dict={true_tensor: np.zeros([2, 2]),
+                                        false_tensor: np.ones([2, 2])})
 
   def test_sparse_tensors(self):
     shape = tensor_shape.TensorShape([None, None])
@@ -674,26 +680,29 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 
   def test_tensors_with_partially_specified_shapes(self):
     def _BuildBranch(dtype, shape):
+      a = array_ops.placeholder(dtype=dtype, shape=shape[0])
+      b = array_ops.placeholder(dtype=dtype, shape=shape[1])
+      c = array_ops.placeholder(dtype=dtype, shape=shape[2])
       def _Build():
-        a = array_ops.zeros([2, 2], dtype=dtype)
-        b = array_ops.zeros([5], dtype=dtype)
-        c = array_ops.ones([3, 3], dtype=dtype)
-        a._shape = tensor_shape.TensorShape(shape[0])
-        b._shape = tensor_shape.TensorShape(shape[1])
-        c._shape = tensor_shape.TensorShape(shape[2])
         return a, b, c
-      return _Build
+      return _Build, (a, b, c)
 
     for dtype in (dtypes.float16, dtypes.int8, dtypes.int32, dtypes.uint8):
       shape = (tensor_shape.TensorShape([None, 2]),
                tensor_shape.TensorShape([None]),
                tensor_shape.TensorShape([3, None]))
-      fn_true = _BuildBranch(dtype, shape)
-      fn_false = _BuildBranch(dtype, shape)
+      fn_true, true_tensors = _BuildBranch(dtype, shape)
+      fn_false, false_tensors = _BuildBranch(dtype, shape)
       self._testShape(fn_true, fn_false, shape)
       self._testReturnValues(fn_true, fn_false,
                              (np.zeros([2, 2]), np.zeros(5), np.ones([3, 3])),
-                             (np.zeros([2, 2]), np.zeros(5), np.ones([3, 3])))
+                             (np.zeros([2, 2]), np.zeros(5), np.ones([3, 3])),
+                             feed_dict={true_tensors[0]: np.zeros([2, 2]),
+                                        false_tensors[0]: np.zeros([2, 2]),
+                                        true_tensors[1]: np.zeros([5]),
+                                        false_tensors[1]: np.zeros([5]),
+                                        true_tensors[2]: np.ones([3, 3]),
+                                        false_tensors[2]: np.ones([3, 3])})
 
   def test_tensor_arrays(self):
     element_shape = tensor_shape.TensorShape([2])
-- 
GitLab


From 20aa9e0a9f129ed929cea1fb45ec12b7be3ac68e Mon Sep 17 00:00:00 2001
From: Josh Varty <JoshVarty@users.noreply.github.com>
Date: Wed, 6 Dec 2017 19:22:11 -0500
Subject: [PATCH 0717/1225] Add batch support for various image_ops (#14854)

* Change fix_image_flip_shape to create shape based on rank

* Refactor duplicate code to _EnsureTensorIs4D

* Convert flip_up_down

* Temporarily comment out ValueError Check

* Add batch support for flip_left_right

* Add batch support for random_flip_left_right

* Add batch support for random_flip_up_down

* Add batch support for transpose_image

* Add batch support for rot90

* Correct comments

* Refactor so as not to introduce new method

* Add tests for batch inputs

* Fix test to expect 3 or 4 dims

* Fix misc Pylint issues in image_ops_impl.py

* Fix misc Pyline issues in image_ops_test.py

* Refactor into _flip_image

* Correct Idempotent to Involution

* Check if >20 images were flipped

* Reverse condition in rot90

* Remove duplicate comment

* Address feedback

* Punctuation
---
 tensorflow/python/ops/image_ops_impl.py | 244 ++++++++++++++----------
 tensorflow/python/ops/image_ops_test.py | 194 +++++++++++++++++--
 2 files changed, 322 insertions(+), 116 deletions(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index b9c89d62d5..4e77ef8fcf 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -182,8 +182,81 @@ def _CheckAtLeast3DImage(image, require_static=True):
     return []
 
 
-def fix_image_flip_shape(image, result):
-  """Set the shape to 3 dimensional if we don't know anything else.
+def _EnsureTensorIs4D(image):
+  """Converts `image` to a 4-D Tensor if it is not already one.
+
+  Args:
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or
+           3-D Tensor of shape `[height, width, channels]`.
+  Raises:
+    ValueError: if image is not a 3-D or 4-D Tensor.
+
+  Returns:
+    If `image` was 4-D, a 4-D float Tensor of shape
+    `[batch, width, height, channels]`
+    If `image` was 3-D, a 4-D float Tensor of shape
+    `[1, width, height, channels]`
+  """
+  original_shape = image.get_shape()
+  is_batch = True
+  if original_shape.ndims == 3:
+    is_batch = False
+    image = array_ops.expand_dims(image, 0)
+  elif original_shape.ndims is None:
+    is_batch = False
+    image = array_ops.expand_dims(image, 0)
+    image.set_shape([None] * 4)
+  elif original_shape.ndims != 4:
+    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
+
+  return (image, is_batch)
+
+def _flip_image(image, axis, random=False, seed=None):
+  """
+  Flips image(s) around a given axis.
+
+  Args:
+    image:  4-D Tensor of shape `[batch, height, width, channels]` or
+            3-D Tensor of shape `[height, width, channels]`.
+    axis:   A Python integer representing the axis on which the image(s)
+            will be flipped. Note: The provided axis must be specified relative
+            to the shape `[batch, height, width, channels]` as 3-D images will
+            be expanded to fit this shape before being flipped.
+    random: A boolean representing whether or not we should flip the
+            image(s) at random.
+    seed:   Python integer. Used to create a random seed. See
+            tf.set_random_seed for behavior.
+
+  Raises:
+    ValueError: if image is not a 3-D or 4-D Tensor.
+
+  Returns:
+    A tensor of the same type and shape as `image`
+  """
+  image = ops.convert_to_tensor(image, name='image')
+  original_image = image
+  image, is_batch = _EnsureTensorIs4D(image)
+
+  image = control_flow_ops.with_dependencies(
+    _CheckAtLeast3DImage(image, require_static=False), image)
+
+  batch, _, _, _ = _ImageDimensions(image, rank=4)
+  flipped = array_ops.reverse(image, [axis])
+
+  if random == True:
+    uniform_random = random_ops.random_uniform([batch], 0, 1.0, seed=seed)
+    mirror_cond = math_ops.less(uniform_random, 0.5)
+    flipped = array_ops.where(mirror_cond, x=image, y=flipped)
+
+  if is_batch:
+    return fix_image_flip_shape(original_image, flipped, rank=4)
+
+  flipped = array_ops.squeeze(flipped, squeeze_dims=[0])
+  return fix_image_flip_shape(original_image, flipped, rank=3)
+
+
+def fix_image_flip_shape(image, result, rank=3):
+  """Set the shape to original dimensional if we don't know anything else.
 
   Args:
     image: original image size
@@ -195,171 +268,174 @@ def fix_image_flip_shape(image, result):
 
   image_shape = image.get_shape()
   if image_shape == tensor_shape.unknown_shape():
-    result.set_shape([None, None, None])
+    result.set_shape([None] * rank)
   else:
     result.set_shape(image_shape)
   return result
 
 
 def random_flip_up_down(image, seed=None):
-  """Randomly flips an image vertically (upside down).
+  """Randomly flips image(s) vertically (upside down).
 
-  With a 1 in 2 chance, outputs the contents of `image` flipped along the first
-  dimension, which is `height`.  Otherwise output the image as-is.
+  With a 1 in 2 chance, outputs the contents of `image` flipped along the height
+  dimension. Otherwise output the image as-is.
 
   Args:
-    image: A 3-D tensor of shape `[height, width, channels].`
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or
+           3-D Tensor of shape `[height, width, channels]`.
     seed: A Python integer. Used to create a random seed. See
       @{tf.set_random_seed}
       for behavior.
 
   Returns:
-    A 3-D tensor of the same type and shape as `image`.
+    A tensor of the same type and shape as `image`.
 
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
-  mirror_cond = math_ops.less(uniform_random, .5)
-  result = control_flow_ops.cond(mirror_cond,
-                                 lambda: array_ops.reverse(image, [0]),
-                                 lambda: image)
-  return fix_image_flip_shape(image, result)
+  return _flip_image(image, axis=1, random=True, seed=seed)
 
 
 def random_flip_left_right(image, seed=None):
-  """Randomly flip an image horizontally (left to right).
+  """Randomly flip image(s) horizontally (left to right).
 
   With a 1 in 2 chance, outputs the contents of `image` flipped along the
-  second dimension, which is `width`.  Otherwise output the image as-is.
+  width dimension. Otherwise output the image as-is.
 
   Args:
-    image: A 3-D tensor of shape `[height, width, channels].`
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or
+           3-D Tensor of shape `[height, width, channels]`.
     seed: A Python integer. Used to create a random seed. See
       @{tf.set_random_seed}
       for behavior.
 
   Returns:
-    A 3-D tensor of the same type and shape as `image`.
+    A tensor of the same type and shape as `image`.
 
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
-  mirror_cond = math_ops.less(uniform_random, .5)
-  result = control_flow_ops.cond(mirror_cond,
-                                 lambda: array_ops.reverse(image, [1]),
-                                 lambda: image)
-  return fix_image_flip_shape(image, result)
+  return _flip_image(image, axis=2, random=True, seed=seed)
 
 
 def flip_left_right(image):
   """Flip an image horizontally (left to right).
 
-  Outputs the contents of `image` flipped along the second dimension, which is
-  `width`.
+  Outputs the contents of `image` flipped along the width dimension.
 
   See also `reverse()`.
 
   Args:
-    image: A 3-D tensor of shape `[height, width, channels].`
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or
+           3-D Tensor of shape `[height, width, channels]`.
 
   Returns:
-    A 3-D tensor of the same type and shape as `image`.
+    A tensor of the same type and shape as `image`.
 
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  return fix_image_flip_shape(image, array_ops.reverse(image, [1]))
-
+  return _flip_image(image, axis=2, random=False)
 
 def flip_up_down(image):
   """Flip an image vertically (upside down).
 
-  Outputs the contents of `image` flipped along the first dimension, which is
-  `height`.
+  Outputs the contents of `image` flipped along the height dimension.
 
   See also `reverse()`.
 
   Args:
-    image: A 3-D tensor of shape `[height, width, channels].`
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or
+           3-D Tensor of shape `[height, width, channels]`.
 
   Returns:
-    A 3-D tensor of the same type and shape as `image`.
+    A tensor of the same type and shape as `image`.
 
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  return fix_image_flip_shape(image, array_ops.reverse(image, [0]))
+  return _flip_image(image, axis=1, random=False)
 
 
 def rot90(image, k=1, name=None):
-  """Rotate an image counter-clockwise by 90 degrees.
+  """Rotate image(s) counter-clockwise by 90 degrees.
 
   Args:
-    image: A 3-D tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or
+           3-D Tensor of shape `[height, width, channels]`.
     k: A scalar integer. The number of times the image is rotated by 90 degrees.
     name: A name for this operation (optional).
 
   Returns:
-    A rotated 3-D tensor of the same type and shape as `image`.
+    A rotated of the same type and shape as `image`.
+
+  Raises:
+    ValueError: if the shape of `image` not supported.
   """
   with ops.name_scope(name, 'rot90', [image, k]) as scope:
     image = ops.convert_to_tensor(image, name='image')
+    image, is_batch = _EnsureTensorIs4D(image)
     image = control_flow_ops.with_dependencies(
-        _Check3DImage(image, require_static=False), image)
+        _CheckAtLeast3DImage(image, require_static=False), image)
     k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k')
     k.get_shape().assert_has_rank(0)
     k = math_ops.mod(k, 4)
 
     def _rot90():
-      return array_ops.transpose(array_ops.reverse_v2(image, [1]),
-                                 [1, 0, 2])
+      return array_ops.transpose(array_ops.reverse_v2(image, [2]),
+                                 [0, 2, 1, 3])
     def _rot180():
-      return array_ops.reverse_v2(image, [0, 1])
+      return array_ops.reverse_v2(image, [1, 2])
     def _rot270():
-      return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]),
-                                  [1])
+      return array_ops.reverse_v2(array_ops.transpose(image, [0, 2, 1, 3]),
+                                  [2])
     cases = [(math_ops.equal(k, 1), _rot90),
              (math_ops.equal(k, 2), _rot180),
              (math_ops.equal(k, 3), _rot270)]
 
-    ret = control_flow_ops.case(cases, default=lambda: image, exclusive=True,
+    result = control_flow_ops.case(cases, default=lambda: image, exclusive=True,
                                 name=scope)
-    ret.set_shape([None, None, image.get_shape()[2]])
-    return ret
+
+    shape = image.get_shape()
+    result.set_shape([shape[0], None, None, shape[3]])
+
+    if is_batch == True:
+      return result
+
+    result = array_ops.squeeze(result, squeeze_dims=[0])
+    return result
 
 
 def transpose_image(image):
-  """Transpose an image by swapping the first and second dimension.
+  """Transpose an image by swapping the height and width dimension.
 
   See also `transpose()`.
 
   Args:
-    image: 3-D tensor of shape `[height, width, channels]`
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or
+           3-D Tensor of shape `[height, width, channels]`.
 
   Returns:
-    A 3-D tensor of shape `[width, height, channels]`
+    If `image` was 4-D, a 4-D float Tensor of shape
+    `[batch, width, height, channels]`
+    If `image` was 3-D, a 3-D float Tensor of shape
+    `[width, height, channels]`
 
   Raises:
     ValueError: if the shape of `image` not supported.
   """
   image = ops.convert_to_tensor(image, name='image')
+  image, is_batch = _EnsureTensorIs4D(image)
   image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  return array_ops.transpose(image, [1, 0, 2], name='transpose_image')
+      _CheckAtLeast3DImage(image, require_static=False), image)
+
+  result = array_ops.transpose(image, [0, 2, 1, 3], name='transpose_image')
+
+  if is_batch:
+    return result
+
+  result = array_ops.squeeze(result, squeeze_dims=[0])
+  return result
 
 
 def central_crop(image, central_fraction):
@@ -445,21 +521,9 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
       negative.
   """
   image = ops.convert_to_tensor(image, name='image')
-
-  is_batch = True
-  image_shape = image.get_shape()
-  if image_shape.ndims == 3:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-  elif image_shape.ndims is None:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-    image.set_shape([None] * 4)
-  elif image_shape.ndims != 4:
-    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
+  image, is_batch = _EnsureTensorIs4D(image)
 
   assert_ops = _CheckAtLeast3DImage(image, require_static=False)
-
   batch, height, width, depth = _ImageDimensions(image, rank=4)
 
   after_padding_width = target_width - offset_width - width
@@ -524,21 +588,9 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
       negative, or either `target_height` or `target_width` is not positive.
   """
   image = ops.convert_to_tensor(image, name='image')
-
-  is_batch = True
-  image_shape = image.get_shape()
-  if image_shape.ndims == 3:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-  elif image_shape.ndims is None:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-    image.set_shape([None] * 4)
-  elif image_shape.ndims != 4:
-    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
+  image, is_batch = _EnsureTensorIs4D(image)
 
   assert_ops = _CheckAtLeast3DImage(image, require_static=False)
-
   batch, height, width, depth = _ImageDimensions(image, rank=4)
 
   assert_ops += _assert(offset_width >= 0, ValueError,
@@ -599,17 +651,7 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
     `[new_height, new_width, channels]`.
   """
   image = ops.convert_to_tensor(image, name='image')
-  image_shape = image.get_shape()
-  is_batch = True
-  if image_shape.ndims == 3:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-  elif image_shape.ndims is None:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-    image.set_shape([None] * 4)
-  elif image_shape.ndims != 4:
-    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
+  image, is_batch = _EnsureTensorIs4D(image)
 
   assert_ops = _CheckAtLeast3DImage(image, require_static=False)
   assert_ops += _assert(target_width > 0, ValueError,
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index d1554b399f..e18a47ff8a 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -729,7 +729,7 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase):
 
 class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
 
-  def testIdempotentLeftRight(self):
+  def testInvolutionLeftRight(self):
     x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
@@ -737,6 +737,15 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, x_np)
 
+  def testInvolutionLeftRightWithBatch(self):
+    x_np = np.array([[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+    with self.test_session(use_gpu=True):
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.flip_left_right(image_ops.flip_left_right(x_tf))
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, x_np)
+
   def testLeftRight(self):
     x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[3, 2, 1], [3, 2, 1]], dtype=np.uint8).reshape([2, 3, 1])
@@ -747,17 +756,30 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testLeftRightWithBatch(self):
+    x_np = np.array([[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+    y_np = np.array([[[3, 2, 1], [3, 2, 1]], [[3, 2, 1], [3, 2, 1]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+
+    with self.test_session(use_gpu=True):
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.flip_left_right(x_tf)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def testRandomFlipLeftRight(self):
     x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[3, 2, 1], [3, 2, 1]], dtype=np.uint8).reshape([2, 3, 1])
+    seed = 42
 
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
-      y = image_ops.random_flip_left_right(x_tf)
+      y = image_ops.random_flip_left_right(x_tf, seed=seed)
 
       count_flipped = 0
       count_unflipped = 0
-      for _ in range(50):
+      for _ in range(100):
         y_tf = y.eval()
         if y_tf[0][0] == 1:
           self.assertAllEqual(y_tf, x_np)
@@ -765,10 +787,46 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
         else:
           self.assertAllEqual(y_tf, y_np)
           count_flipped += 1
-      self.assertGreaterEqual(count_flipped, 1)
-      self.assertGreaterEqual(count_unflipped, 1)
+      # 100 trials
+      # Mean: 50
+      # Std Dev: ~5
+      # Six Sigma: 50 - (5 * 6) = 20
+      self.assertGreaterEqual(count_flipped, 20)
+      self.assertGreaterEqual(count_unflipped, 20)
+
+  def testRandomFlipLeftRightWithBatch(self):
+    x_np = np.array([[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+    y_np = np.array([[[3, 2, 1], [3, 2, 1]], [[3, 2, 1], [3, 2, 1]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+    seed = 42
 
-  def testIdempotentUpDown(self):
+    with self.test_session(use_gpu=True):
+      x_tf = constant_op.constant(x_np, shape=x_np.shape).eval()
+      y = image_ops.random_flip_left_right(x_tf, seed=seed)
+      count_flipped = 0
+      count_unflipped = 0
+      for _ in range(50):
+        y_tf = y.eval()
+        for index in range(0, x_tf.shape[0]):
+          current_x_tf = x_tf[index]
+          current_y_tf = y_tf[index]
+          current_y_np = y_np[index]
+
+          if current_y_tf[0][0] == 1:
+            self.assertAllEqual(current_y_tf, current_x_tf)
+            count_unflipped += 1
+          else:
+            self.assertAllEqual(current_y_tf, current_y_np)
+            count_flipped += 1
+      # Batch size 2 * 50 trials = 100
+      # Mean: 50
+      # Std Dev: ~5
+      # Six Sigma: 50 - (5 * 6) = 20
+      self.assertGreaterEqual(count_flipped, 20)
+      self.assertGreaterEqual(count_unflipped, 20)
+
+  def testInvolutionUpDown(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
 
     with self.test_session(use_gpu=True):
@@ -777,6 +835,16 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, x_np)
 
+  def testInvolutionUpDownWithBatch(self):
+    x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+
+    with self.test_session(use_gpu=True):
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.flip_up_down(image_ops.flip_up_down(x_tf))
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, x_np)
+
   def testUpDown(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
@@ -787,16 +855,29 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testUpDownWithBatch(self):
+    x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+    y_np = np.array([[[4, 5, 6], [1, 2, 3]], [[10, 11, 12], [7, 8, 9]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+
+    with self.test_session(use_gpu=True):
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.flip_up_down(x_tf)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def testRandomFlipUpDown(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
+    seed = 42
 
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
-      y = image_ops.random_flip_up_down(x_tf)
+      y = image_ops.random_flip_up_down(x_tf, seed=42)
       count_flipped = 0
       count_unflipped = 0
-      for _ in range(50):
+      for _ in range(100):
         y_tf = y.eval()
         if y_tf[0][0] == 1:
           self.assertAllEqual(y_tf, x_np)
@@ -804,10 +885,45 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
         else:
           self.assertAllEqual(y_tf, y_np)
           count_flipped += 1
-      self.assertGreaterEqual(count_flipped, 1)
-      self.assertGreaterEqual(count_unflipped, 1)
+      # 100 trials
+      # Mean: 50
+      # Std Dev: ~5
+      # Six Sigma: 50 - (5 * 6) = 20
+      self.assertGreaterEqual(count_flipped, 20)
+      self.assertGreaterEqual(count_unflipped, 20)
+
+  def testRandomFlipUpDownWithBatch(self):
+    x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+    y_np = np.array([[[4, 5, 6], [1, 2, 3]], [[4, 5, 6], [1, 2, 3]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+    seed = 42
 
-  def testIdempotentTranspose(self):
+    with self.test_session(use_gpu=True):
+      x_tf = constant_op.constant(x_np, shape=x_np.shape).eval()
+      y = image_ops.random_flip_up_down(x_tf, seed=42)
+      count_flipped = 0
+      count_unflipped = 0
+      for _ in range(50):
+        y_tf = y.eval()
+        for index in range(0, x_tf.shape[0]):
+          current_x_tf = x_tf[index]
+          current_y_tf = y_tf[index]
+          current_y_np = y_np[index]
+        if current_y_tf[0][0] == 1:
+          self.assertAllEqual(current_y_tf, current_x_tf)
+          count_unflipped += 1
+        else:
+          self.assertAllEqual(current_y_tf, current_y_np)
+          count_flipped += 1
+      # Batch size 2 * 50 trials = 100
+      # Mean: 50
+      # Std Dev: ~5
+      # Six Sigma: 50 - (5 * 6) = 20
+      self.assertGreaterEqual(count_flipped, 20)
+      self.assertGreaterEqual(count_unflipped, 20)
+
+  def testInvolutionTranspose(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
 
     with self.test_session(use_gpu=True):
@@ -816,6 +932,16 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, x_np)
 
+  def testInvolutionTransposeWithBatch(self):
+    x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+
+    with self.test_session(use_gpu=True):
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.transpose_image(image_ops.transpose_image(x_tf))
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, x_np)
+
   def testTranspose(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[1, 4], [2, 5], [3, 6]], dtype=np.uint8).reshape([3, 2, 1])
@@ -826,11 +952,28 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testTransposeWithBatch(self):
+    x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
+                    dtype=np.uint8).reshape([2, 2, 3, 1])
+
+    y_np = np.array([[[1, 4], [2, 5], [3, 6]], [[7, 10], [8, 11], [9, 12]]],
+                    dtype=np.uint8).reshape([2, 3, 2, 1])
+
+    with self.test_session(use_gpu=True):
+      x_tf = constant_op.constant(x_np, shape=x_np.shape)
+      y = image_ops.transpose_image(x_tf)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def testPartialShapes(self):
     p_unknown_rank = array_ops.placeholder(dtypes.uint8)
-    p_unknown_dims = array_ops.placeholder(
+    p_unknown_dims_3 = array_ops.placeholder(
         dtypes.uint8, shape=[None, None, None])
+    p_unknown_dims_4 = array_ops.placeholder(
+        dtypes.uint8, shape=[None, None, None, None])
     p_unknown_width = array_ops.placeholder(dtypes.uint8, shape=[64, None, 3])
+    p_unknown_batch = array_ops.placeholder(dtypes.uint8,
+                                            shape=[None, 64, 64, 3])
 
     p_wrong_rank = array_ops.placeholder(dtypes.uint8, shape=[None, None])
     p_zero_dim = array_ops.placeholder(dtypes.uint8, shape=[64, 0, 3])
@@ -842,12 +985,17 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
     ]:
       transformed_unknown_rank = op(p_unknown_rank)
       self.assertEqual(3, transformed_unknown_rank.get_shape().ndims)
-      transformed_unknown_dims = op(p_unknown_dims)
-      self.assertEqual(3, transformed_unknown_dims.get_shape().ndims)
+      transformed_unknown_dims_3 = op(p_unknown_dims_3)
+      self.assertEqual(3, transformed_unknown_dims_3.get_shape().ndims)
+      transformed_unknown_dims_4 = op(p_unknown_dims_4)
+      self.assertEqual(4, transformed_unknown_dims_4.get_shape().ndims)
       transformed_unknown_width = op(p_unknown_width)
       self.assertEqual(3, transformed_unknown_width.get_shape().ndims)
+      transformed_unknown_batch = op(p_unknown_batch)
+      self.assertEqual(4, transformed_unknown_batch.get_shape().ndims)
 
-      with self.assertRaisesRegexp(ValueError, "must be three-dimensional"):
+      with self.assertRaisesRegexp(ValueError,
+                                   "must have either 3 or 4 dimensions."):
         op(p_wrong_rank)
       with self.assertRaisesRegexp(ValueError, "must be > 0"):
         op(p_zero_dim)
@@ -860,6 +1008,14 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
         rotated = image_ops.rot90(rotated)
       self.assertAllEqual(image, rotated.eval())
 
+  def testRot90GroupOrderWithBatch(self):
+    image = np.arange(48, dtype=np.uint8).reshape([2, 2, 4, 3])
+    with self.test_session(use_gpu=True):
+      rotated = image
+      for _ in xrange(4):
+        rotated = image_ops.rot90(rotated)
+      self.assertAllEqual(image, rotated.eval())
+
   def testRot90NumpyEquivalence(self):
     image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3])
     with self.test_session(use_gpu=True):
@@ -869,6 +1025,14 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
         y_np = np.rot90(image, k=k)
         self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k}))
 
+  def testRot90NumpyEquivalenceWithBatch(self):
+    image = np.arange(48, dtype=np.uint8).reshape([2, 2, 4, 3])
+    with self.test_session(use_gpu=True):
+      k_placeholder = array_ops.placeholder(dtypes.int32, shape=[])
+      y_tf = image_ops.rot90(image, k_placeholder)
+      for k in xrange(4):
+        y_np = np.rot90(image, k=k, axes=(1, 2))
+        self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k}))
 
 class RandomFlipTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 9244afcbfe54bc621eb99035dc0d8528a2bf731d Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 6 Dec 2017 16:45:51 -0800
Subject: [PATCH 0718/1225] Minor changes to help debugging graph corruptions

PiperOrigin-RevId: 178173273
---
 tensorflow/core/grappler/optimizers/constant_folding.cc | 4 ++--
 tensorflow/core/grappler/utils.cc                       | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 8426aa1c3f..718aa69ebf 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -452,8 +452,8 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs(
     }
   }
 
-  auto outputs = node_map_->GetOutputs(node.name());
-  for (const auto& output : outputs) {
+  const std::set<NodeDef*> outputs = node_map_->GetOutputs(node.name());
+  for (NodeDef* output : outputs) {
     for (int k = 0; k < output->input_size(); ++k) {
       int port;
       string node_name = ParseNodeName(output->input(k), &port);
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index afcb465d27..fc80772360 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -374,7 +374,8 @@ Status SimpleGraphView::Initialize(const GraphDef& graph, bool dedup_inputs,
     for (const string& input : node.input()) {
       auto it = name_to_index_.find(NodeName(input));
       if (it == name_to_index_.end()) {
-        return errors::InvalidArgument("Invalid input name: ", input);
+        return errors::InvalidArgument("Non-existent input ", input,
+                                       " for node ", node.name());
       }
       const int input_idx = it->second;
       inputs_[node_idx].push_back(input_idx);
-- 
GitLab


From dcd7431b6d0f4681941860b6b20c3edfd618035b Mon Sep 17 00:00:00 2001
From: Anush Elangovan <anush@nod-labs.com>
Date: Fri, 24 Nov 2017 14:48:36 -0600
Subject: [PATCH 0719/1225] [XLA] Guard AVX, SSE and NEON instructions

On OSX you currently run into linker errors because unsupported
instructions are registered.

clang on OSX doesn't respect __attribute((WEAK)) (see PR#14893).
So we add ifdefs to register only the supported instructions.

Also register __sincos on __APPLE__ platforms.

TEST=Build tensorflow/compiler/aot/tests:tfcompile builds
successfully on OSX (10.13.2)
---
 .../xla/service/cpu/cpu_runtime_avx.h         |  6 ++--
 .../xla/service/cpu/cpu_runtime_neon.h        |  6 ++--
 .../xla/service/cpu/cpu_runtime_sse4_1.h      |  7 ++--
 .../xla/service/cpu/simple_orc_jit.cc         | 32 ++++++++++++++++---
 4 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h
index acfada8540..74ae6d00c9 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h
@@ -38,14 +38,16 @@ typedef float V8F32AVX __attribute__((__vector_size__(32)));
 
 extern "C" {
 
+#ifdef __AVX__
 // The following functions are vectorized versions of a selection of libm
 // library functions.
 // References to these functions are created by the LLVM vectorizer.
 xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_ExpV8F32AVX(
-    xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V8F32AVX x);
 
 xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_LogV8F32AVX(
-    xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V8F32AVX x);
+#endif
 }
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_AVX_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h
index 75cb16b273..645a43858f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h
@@ -49,14 +49,16 @@ struct V4F32NEON;
 
 extern "C" {
 
+#ifdef __ARM_NEON__
 // The following functions are vectorized versions of a selection of libm
 // library functions.
 // References to these functions are created by the LLVM vectorizer.
 xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_ExpV4F32NEON(
-    xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V4F32NEON x);
 
 xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_LogV4F32NEON(
-    xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V4F32NEON x);
+#endif  // __ARM_NEON__
 }
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_NEON_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
index 96587d10d2..1bd8494bf8 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
@@ -39,14 +39,17 @@ typedef float V4F32SSE __attribute__((__vector_size__(16)));
 
 extern "C" {
 
+#ifdef __SSE4_1__
 // The following functions are vectorized versions of a selection of libm
 // library functions.
 // References to these functions are created by the LLVM vectorizer.
 xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_ExpV4F32SSE(
-    xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V4F32SSE x);
 
 xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_LogV4F32SSE(
-    xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V4F32SSE x);
+#endif
+
 }
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_SSE4_1_H_
diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index cda2783307..c942cd6bf1 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -102,9 +102,21 @@ llvm::StringRef GetHostCpuName() {
 
 CompilerFunctor::VectorIntrinsics GetAvailableIntrinsics() {
   CompilerFunctor::VectorIntrinsics intrinsics;
-  intrinsics.sse_intrinsics = (&__xla_cpu_runtime_ExpV4F32SSE != nullptr);
-  intrinsics.avx_intrinsics = (&__xla_cpu_runtime_ExpV8F32AVX != nullptr);
-  intrinsics.neon_intrinsics = (&__xla_cpu_runtime_ExpV4F32NEON != nullptr);
+#ifdef __SSE4_1__
+  intrinsics.sse_intrinsics = true;
+#else
+  intrinsics.sse_intrinsics = false;
+#endif
+#ifdef __AVX__
+  intrinsics.avx_intrinsics = true;
+#else
+  intrinsics.avx_intrinsics = false;
+#endif
+#ifdef __ARM_NEON__
+  intrinsics.neon_intrinsics = true;
+#else
+  intrinsics.neon_intrinsics = false;
+#endif
   return intrinsics;
 }
 
@@ -201,12 +213,18 @@ bool RegisterKnownJITSymbols() {
   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
+#ifdef __ARM_NEON__
   REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32NEON);
-  REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE);
-  REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX);
   REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32NEON);
+#endif
+#ifdef __SSE4_1__
+  REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE);
   REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32SSE);
+#endif
+#ifdef __AVX__
+  REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX);
   REGISTER_CPU_RUNTIME_SYMBOL(LogV8F32AVX);
+#endif
   REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
   REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
   REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
@@ -275,7 +293,11 @@ bool RegisterKnownJITSymbols() {
   REGISTER_LIBM_SYMBOL(scalbln, double (*)(double, long));
   REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
   REGISTER_LIBM_SYMBOL(sin, double (*)(double));
+#ifdef __APPLE__
+  REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*));
+#else
   REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
+#endif
   REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
   REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
   REGISTER_LIBM_SYMBOL(tan, double (*)(double));
-- 
GitLab


From 80375ffd9a564fb8b6afd8c6860fe9148b8f6704 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Wed, 6 Dec 2017 17:35:59 -0800
Subject: [PATCH 0720/1225] Make note of configure script's WORKSPACE-updating
 ability

A recent change taught the configure script how to create WORKSPACE rules for the Android SDK and NDK. This CL adds a note about this functionality to the build instructions for the TF & TF Lite Android Demo apps.

PiperOrigin-RevId: 178178825
---
 tensorflow/contrib/lite/java/demo/README.md | 4 ++++
 tensorflow/examples/android/README.md       | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/tensorflow/contrib/lite/java/demo/README.md b/tensorflow/contrib/lite/java/demo/README.md
index 5d13a798e2..2e818f728e 100644
--- a/tensorflow/contrib/lite/java/demo/README.md
+++ b/tensorflow/contrib/lite/java/demo/README.md
@@ -21,6 +21,10 @@
   2. [Edit your `WORKSPACE`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#edit-workspace)
      to add SDK and NDK targets.
 
+     NOTE: As long as you have the SDK and NDK installed, the `./configure`
+     script will create these rules for you. Answer "Yes" when the script asks
+     to automatically configure the `./WORKSPACE`.
+
       - Make sure the `api_level` in `WORKSPACE` is set to an SDK version that
         you have installed.
       - By default, Android Studio will install the SDK to `~/Android/Sdk` and
diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md
index 881a975e60..51621d51ef 100644
--- a/tensorflow/examples/android/README.md
+++ b/tensorflow/examples/android/README.md
@@ -143,6 +143,10 @@ the Android NDK and SDK must be installed on your system.
 
 ##### Edit WORKSPACE
 
+NOTE: As long as you have the SDK and NDK installed, the `./configure` script
+will create these rules for you. Answer "Yes" when the script asks to
+automatically configure the `./WORKSPACE`.
+
 The Android entries in
 [`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36) must be uncommented
 with the paths filled in appropriately depending on where you installed the NDK
-- 
GitLab


From afdb6c83ee51277c2673367668cec200ca2afd5d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 17:41:28 -0800
Subject: [PATCH 0721/1225] Add missing gradient registration for
 ConjugateTranspose.

PiperOrigin-RevId: 178179371
---
 tensorflow/core/ops/array_grad.cc             | 19 +++++++++++++++++++
 .../python/kernel_tests/transpose_op_test.py  |  4 ++--
 tensorflow/python/ops/array_grad.py           | 10 ++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/ops/array_grad.cc b/tensorflow/core/ops/array_grad.cc
index 325dbc4883..38bd851da8 100644
--- a/tensorflow/core/ops/array_grad.cc
+++ b/tensorflow/core/ops/array_grad.cc
@@ -333,6 +333,25 @@ Status TransposeGrad(const AttrSlice& attrs, FunctionDef* g) {
 }
 REGISTER_OP_GRADIENT("Transpose", TransposeGrad);
 
+Status ConjugateTransposeGrad(const AttrSlice& attrs, FunctionDef* g) {
+  *g = FDH::Define(
+      // Arg defs
+      {"x: T", "p: int32", "dy: T"},
+      // Ret val defs
+      {"dx: T", "dp: int32"},
+      // Attr defs
+      {"T: type"},
+      // Nodes
+      {
+          {{"q"}, "InvertPermutation", {"p"}, {}},
+          {{"dx"}, "ConjugateTranspose", {"dy", "q"}, {{"T", "$T"}}},
+          {{"dp"}, "ZerosLike", {"p"}, {{"T", DT_INT32}}},
+      });
+  VLOG(1) << "ConjugateTransposeGrad " << DebugString(*g);
+  return Status::OK();
+}
+REGISTER_OP_GRADIENT("ConjugateTranspose", ConjugateTransposeGrad);
+
 Status ReverseGrad(const AttrSlice& attrs, FunctionDef* g) {
   *g = FDH::Define(
       // Arg defs
diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py
index c551d9c3d0..290200ce45 100644
--- a/tensorflow/python/kernel_tests/transpose_op_test.py
+++ b/tensorflow/python/kernel_tests/transpose_op_test.py
@@ -53,11 +53,11 @@ class TransposeTest(test.TestCase):
       # Gradient check on CPU.
       xs = list(np.shape(x))
       ys = list(np.shape(tf_ans))
-      if x.dtype == np.float32:
+      if x.dtype in [np.float32, np.complex64]:
         jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x,
                                                              1e-2)
         self.assertAllClose(jacob_t, jacob_n, 1e-3, 1e-3)
-      elif x.dtype == np.float64:
+      elif x.dtype in [np.float64, np.complex128]:
         jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x,
                                                              1e-2)
         self.assertAllClose(jacob_t, jacob_n, 1e-6, 1e-6)
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index 87f8d14860..55cae0bcbf 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -524,6 +524,16 @@ def _TransposeGrad(op, grad):
   return [array_ops.transpose(grad, array_ops.invert_permutation(p)), None]
 
 
+@ops.RegisterGradient("ConjugateTranspose")
+def _ConjugateTransposeGrad(op, grad):
+  """Returns conj(unshuffle(grad))."""
+  p = op.inputs[1]
+  return [
+      array_ops.transpose(
+          grad, array_ops.invert_permutation(p), conjugate=True), None
+  ]
+
+
 ops.NotDifferentiable("Shape")
 
 
-- 
GitLab


From b10855a148a14b3972dd7b3261856ff33ddc0504 Mon Sep 17 00:00:00 2001
From: Akshay Agrawal <akshayka@google.com>
Date: Wed, 6 Dec 2017 17:44:51 -0800
Subject: [PATCH 0722/1225] Add feature_column.InputLayer, an object-oriented
 version of input_layer that is compatible with eager execution.

feature_column.InputLayer ensures that variables are reused by wrapping
input_layer in a Template.

PiperOrigin-RevId: 178179765
---
 tensorflow/python/feature_column/BUILD        |   1 +
 .../python/feature_column/feature_column.py   | 176 ++++++++++++------
 .../feature_column/feature_column_test.py     | 103 ++++++++++
 3 files changed, 226 insertions(+), 54 deletions(-)

diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index b1c81dd58c..76d44fc474 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -48,6 +48,7 @@ py_library(
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
+        "//tensorflow/python:template",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:training",
         "//tensorflow/python:util",
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 0686480ca4..80c36dc4aa 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -134,7 +134,7 @@ import math
 import numpy as np
 import six
 
-from tensorflow.python.eager import context
+
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
@@ -150,6 +150,7 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import string_ops
+from tensorflow.python.ops import template
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
@@ -158,6 +159,65 @@ from tensorflow.python.training import checkpoint_utils
 from tensorflow.python.util import nest
 
 
+def _internal_input_layer(features,
+                          feature_columns,
+                          weight_collections=None,
+                          trainable=True,
+                          cols_to_vars=None,
+                          scope=None):
+  """See input_layer. `scope` is a name or variable scope to use."""
+
+  feature_columns = _clean_feature_columns(feature_columns)
+  for column in feature_columns:
+    if not isinstance(column, _DenseColumn):
+      raise ValueError(
+          'Items of feature_columns must be a _DenseColumn. '
+          'You can wrap a categorical column with an '
+          'embedding_column or indicator_column. Given: {}'.format(column))
+  weight_collections = list(weight_collections or [])
+  if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections:
+    weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
+  if ops.GraphKeys.MODEL_VARIABLES not in weight_collections:
+    weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
+
+  # a non-None `scope` can allow for variable reuse, when, e.g., this function
+  # is wrapped by a `make_template`.
+  with variable_scope.variable_scope(
+      scope, default_name='input_layer', values=features.values()):
+    builder = _LazyBuilder(features)
+    output_tensors = []
+    ordered_columns = []
+    for column in sorted(feature_columns, key=lambda x: x.name):
+      ordered_columns.append(column)
+      with variable_scope.variable_scope(
+          None, default_name=column._var_scope_name):  # pylint: disable=protected-access
+        if column._var_scope_name == column.name:  # pylint: disable=protected-access
+          tensor = _get_dense_tensor(
+              column=column,
+              builder=builder,
+              weight_collections=weight_collections,
+              trainable=trainable)
+        else:
+          # This is typically the case for shared_embedding_columns. The
+          # embedding weights variable will be under the common variable_scope,
+          # but the ops for each column will be under a separate name_scope.
+          with ops.name_scope(column.name):
+            tensor = _get_dense_tensor(
+                column=column,
+                builder=builder,
+                weight_collections=weight_collections,
+                trainable=trainable)
+        output_tensors.append(tensor)
+        if cols_to_vars is not None:
+          # Retrieve any variables created (some _DenseColumn's don't create
+          # variables, in which case an empty list is returned).
+          cols_to_vars[column] = ops.get_collection(
+              ops.GraphKeys.GLOBAL_VARIABLES,
+              scope=variable_scope.get_variable_scope().name)
+    _verify_static_batch_size_equality(output_tensors, ordered_columns)
+    return array_ops.concat(output_tensors, 1)
+
+
 def input_layer(features,
                 feature_columns,
                 weight_collections=None,
@@ -194,7 +254,7 @@ def input_layer(features,
       `bucketized_column`, `indicator_column`. If you have categorical features,
       you can wrap them with an `embedding_column` or `indicator_column`.
     weight_collections: A list of collection names to which the Variable will be
-      added. Note that, variables will also be added to collections
+      added. Note that variables will also be added to collections
       `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
     trainable: If `True` also add the variable to the graph collection
       `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
@@ -216,52 +276,66 @@ def input_layer(features,
   Raises:
     ValueError: if an item in `feature_columns` is not a `_DenseColumn`.
   """
-  feature_columns = _clean_feature_columns(feature_columns)
-  for column in feature_columns:
-    if not isinstance(column, _DenseColumn):
-      raise ValueError(
-          'Items of feature_columns must be a _DenseColumn. '
-          'You can wrap a categorical column with an '
-          'embedding_column or indicator_column. Given: {}'.format(column))
-  weight_collections = list(weight_collections or [])
-  if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections:
-    weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
-  if ops.GraphKeys.MODEL_VARIABLES not in weight_collections:
-    weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
-  with variable_scope.variable_scope(
-      None, default_name='input_layer', values=features.values()):
-    builder = _LazyBuilder(features)
-    output_tensors = []
-    ordered_columns = []
-    for column in sorted(feature_columns, key=lambda x: x.name):
-      ordered_columns.append(column)
-      with variable_scope.variable_scope(
-          None, default_name=column._var_scope_name):  # pylint: disable=protected-access
-        if column._var_scope_name == column.name:  # pylint: disable=protected-access
-          tensor = _get_dense_tensor(
-              column=column,
-              builder=builder,
-              weight_collections=weight_collections,
-              trainable=trainable)
-        else:
-          # This is typically the case for shared_embedding_columns. The
-          # embedding weights variable will be under the common variable_scope,
-          # but the ops for each column will be under a separate name_scope.
-          with ops.name_scope(column.name):
-            tensor = _get_dense_tensor(
-                column=column,
-                builder=builder,
-                weight_collections=weight_collections,
-                trainable=trainable)
-        output_tensors.append(tensor)
-        if cols_to_vars is not None:
-          # Retrieve any variables created (some _DenseColumn's don't create
-          # variables, in which case an empty list is returned).
-          cols_to_vars[column] = ops.get_collection(
-              ops.GraphKeys.GLOBAL_VARIABLES,
-              scope=variable_scope.get_variable_scope().name)
-    _verify_static_batch_size_equality(output_tensors, ordered_columns)
-    return array_ops.concat(output_tensors, 1)
+  return _internal_input_layer(features, feature_columns, weight_collections,
+                               trainable, cols_to_vars)
+
+
+# TODO(akshayka): InputLayer should be a subclass of Layer, and it
+# should implement the logic in input_layer using Layer's build-and-call
+# paradigm; input_layer should create an instance of InputLayer and
+# return the result of inovking its apply method, just as functional layers do.
+class InputLayer(object):
+  """An object-oriented version of `input_layer` that reuses variables."""
+
+  def __init__(self,
+               feature_columns,
+               weight_collections=None,
+               trainable=True,
+               cols_to_vars=None):
+    """See `input_layer`."""
+
+    self._feature_columns = feature_columns
+    self._weight_collections = weight_collections
+    self._trainable = trainable
+    self._cols_to_vars = cols_to_vars
+    self._input_layer_template = template.make_template(
+        'feature_column_input_layer',
+        _internal_input_layer,
+        create_scope_now_=True)
+    self._scope = self._input_layer_template.variable_scope
+
+  def __call__(self, features):
+    return self._input_layer_template(
+        features=features,
+        feature_columns=self._feature_columns,
+        weight_collections=self._weight_collections,
+        trainable=self._trainable,
+        cols_to_vars=None,
+        scope=self._scope)
+
+  @property
+  def non_trainable_variables(self):
+    return self._input_layer_template.non_trainable_variables
+
+  @property
+  def non_trainable_weights(self):
+    return self._input_layer_template.non_trainable_weights
+
+  @property
+  def trainable_variables(self):
+    return self._input_layer_template.trainable_variables
+
+  @property
+  def trainable_weights(self):
+    return self._input_layer_template.trainable_weights
+
+  @property
+  def variables(self):
+    return self._input_layer_template.variables
+
+  @property
+  def weights(self):
+    return self._input_layer_template.weights
 
 
 def linear_model(features,
@@ -579,10 +653,6 @@ def embedding_column(
       is specified.
     ValueError: if `initializer` is specified and is not callable.
     RuntimeError: If eager execution is enabled.
-
-  @compatibility(eager)
-  Not compatible with eager execution.
-  @end_compatibility
   """
   if (dimension is None) or (dimension < 1):
     raise ValueError('Invalid dimension {}.'.format(dimension))
@@ -594,8 +664,6 @@ def embedding_column(
     raise ValueError('initializer must be callable if specified. '
                      'Embedding of column_name: {}'.format(
                          categorical_column.name))
-  if not context.in_graph_mode():
-    raise RuntimeError('Embedding_column not supported in eager mode.')
   if initializer is None:
     initializer = init_ops.truncated_normal_initializer(
         mean=0.0, stddev=1 / math.sqrt(dimension))
diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py
index d974f14b8a..5d9849951b 100644
--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -26,6 +26,8 @@ import numpy as np
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.client import session
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.feature_column import feature_column as fc_lib
 from tensorflow.python.feature_column import feature_column_lib as fc
@@ -34,11 +36,13 @@ from tensorflow.python.feature_column.feature_column import _DenseColumn
 from tensorflow.python.feature_column.feature_column import _FeatureColumn
 from tensorflow.python.feature_column.feature_column import _LazyBuilder
 from tensorflow.python.feature_column.feature_column import _transform_features
+from tensorflow.python.feature_column.feature_column import InputLayer
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import parsing_ops
@@ -1690,6 +1694,105 @@ class LinearModelTest(test.TestCase):
 
 class InputLayerTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
+  def test_retrieving_input(self):
+    features = {'a': [0.]}
+    input_layer = InputLayer(fc.numeric_column('a'))
+    inputs = self.evaluate(input_layer(features))
+    self.assertAllClose([[0.]], inputs)
+
+  def test_reuses_variables(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0)),
+          values=(0, 1, 2),
+          dense_shape=(3, 3))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(key='a',
+                                                               num_buckets=3)
+      embedding_dimension = 2
+      def _embedding_column_initializer(shape, dtype, partition_info):
+        del shape  # unused
+        del dtype  # unused
+        del partition_info  # unused
+        embedding_values = (
+            (1, 0),  # id 0
+            (0, 1),  # id 1
+            (1, 1))  # id 2
+        return embedding_values
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      input_layer = InputLayer([embedding_column])
+      features = {'a': sparse_input}
+
+      inputs = input_layer(features)
+      variables = input_layer.variables
+
+      # Sanity check: test that the inputs are correct.
+      self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)
+
+      # Check that only one variable was created.
+      self.assertEqual(1, len(variables))
+
+      # Check that invoking input_layer on the same features does not create
+      # additional variables
+      _ = input_layer(features)
+      self.assertEqual(1, len(variables))
+      self.assertEqual(variables[0], input_layer.variables[0])
+
+  def test_feature_column_input_layer_gradient(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0)),
+          values=(0, 1, 2),
+          dense_shape=(3, 3))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(key='a',
+                                                               num_buckets=3)
+      embedding_dimension = 2
+
+      def _embedding_column_initializer(shape, dtype, partition_info):
+        del shape  # unused
+        del dtype  # unused
+        del partition_info  # unused
+        embedding_values = (
+            (1, 0),  # id 0
+            (0, 1),  # id 1
+            (1, 1))  # id 2
+        return embedding_values
+
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      input_layer = InputLayer([embedding_column])
+      features = {'a': sparse_input}
+
+      def scale_matrix():
+        matrix = input_layer(features)
+        return 2 * matrix
+
+      # Sanity check: Verify that scale_matrix returns the correct output.
+      self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix())
+
+      # Check that the returned gradient is correct.
+      grad_function = backprop.implicit_grad(scale_matrix)
+      grads_and_vars = grad_function()
+      indexed_slice = grads_and_vars[0][0]
+      gradient = grads_and_vars[0][0].values
+
+      self.assertAllEqual([0, 1, 2], indexed_slice.indices)
+      self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)
+
+
+class FunctionalInputLayerTest(test.TestCase):
+
   def test_raises_if_empty_feature_columns(self):
     with self.assertRaisesRegexp(ValueError,
                                  'feature_columns must not be empty'):
-- 
GitLab


From cddf9415564b16c2bc234df68d3eb44fc8689dae Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 6 Dec 2017 18:00:15 -0800
Subject: [PATCH 0723/1225] [TF:XLA] Add XLA lowering of ResizeBilinear and
 ResizeBilinearGrad for the case align_corners=True, using a convolution with
 LHS dilation.

PiperOrigin-RevId: 178181497
---
 tensorflow/compiler/tests/BUILD               |  13 +
 tensorflow/compiler/tests/image_ops_test.py   | 142 +++++++
 tensorflow/compiler/tests/randomized_tests.cc |  30 ++
 tensorflow/compiler/tf2xla/const_analysis.cc  |   1 +
 tensorflow/compiler/tf2xla/kernels/BUILD      |   3 +
 .../tf2xla/kernels/image_resize_ops.cc        | 367 ++++++++++++++++++
 6 files changed, 556 insertions(+)
 create mode 100644 tensorflow/compiler/tests/image_ops_test.py
 create mode 100644 tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index fff1a7f57b..8ace678daa 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -279,6 +279,19 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "image_ops_test",
+    size = "small",
+    srcs = ["image_ops_test.py"],
+    deps = [
+        ":xla_test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:image_ops",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 tf_xla_py_test(
     name = "lrn_ops_test",
     size = "medium",
diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py
new file mode 100644
index 0000000000..a04f376ebf
--- /dev/null
+++ b/tensorflow/compiler/tests/image_ops_test.py
@@ -0,0 +1,142 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for image ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.compiler.tests.xla_test import XLATestCase
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_image_ops
+from tensorflow.python.platform import test
+
+
+class ResizeBilinearTest(XLATestCase):
+
+  def _assertForwardOpMatchesExpected(self,
+                                      image_np,
+                                      target_shape,
+                                      expected=None):
+    if expected is None:
+      self.fail("expected must be specified")
+    with self.test_session() as sess, self.test_scope():
+      image = array_ops.placeholder(image_np.dtype)
+      resized = gen_image_ops.resize_bilinear(
+          image, target_shape, align_corners=True)
+      out = sess.run(resized, {image: image_np[np.newaxis, :, :, np.newaxis]})
+      self.assertAllClose(expected[np.newaxis, :, :, np.newaxis], out)
+
+  def _assertBackwardOpMatchesExpected(self,
+                                       grads_np,
+                                       input_shape=None,
+                                       dtype=None,
+                                       expected=None):
+    if input_shape is None:
+      self.fail("input_shape must be specified")
+    if expected is None:
+      self.fail("expected must be specified")
+    with self.test_session() as sess, self.test_scope():
+      dtype = dtype or np.float32
+      grads = array_ops.placeholder(np.float32)
+      resized = gen_image_ops._resize_bilinear_grad(
+          grads,
+          np.zeros([1, input_shape[0], input_shape[1], 1], dtype=dtype),
+          align_corners=True)
+      out = sess.run(resized, {grads: grads_np[np.newaxis, :, :, np.newaxis]})
+      self.assertAllClose(expected[np.newaxis, :, :, np.newaxis], out)
+
+  def testAlignCorners1x2To3x2(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array([[1, 2]], dtype=dtype), [3, 3],
+          expected=np.array(
+              [[1, 1.5, 2], [1, 1.5, 2], [1, 1.5, 2]], dtype=np.float32))
+
+  def testAlignCorners1x2To3x2Grad(self):
+    for dtype in self.float_types:
+      self._assertBackwardOpMatchesExpected(
+          np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32),
+          input_shape=[1, 2],
+          dtype=dtype,
+          expected=np.array([[9, 12]], dtype=np.float32))
+
+  def testAlignCorners2x2To1x1(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array([[1, 2], [3, 4]], dtype=dtype), [1, 1],
+          expected=np.array([[1]], dtype=np.float32))
+
+  def testAlignCorners2x2To1x1Grad(self):
+    for dtype in self.float_types:
+      self._assertBackwardOpMatchesExpected(
+          np.array([[7]], dtype=np.float32),
+          input_shape=[2, 2],
+          dtype=dtype,
+          expected=np.array([[7, 0], [0, 0]], dtype=np.float32))
+
+  def testAlignCorners2x2To3x3(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array([[1, 2], [3, 4]], dtype=dtype), [3, 3],
+          expected=np.array(
+              [[1, 1.5, 2], [2, 2.5, 3], [3, 3.5, 4]], dtype=np.float32))
+
+  def testAlignCorners2x2To3x3Grad(self):
+    self._assertBackwardOpMatchesExpected(
+        np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float32),
+        input_shape=[2, 2],
+        expected=np.array([[5.25, 8.25], [14.25, 17.25]], dtype=np.float32))
+
+  def testAlignCorners3x3To2x2(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=dtype), [2, 2],
+          expected=np.array([[1, 3], [7, 9]], dtype=np.float32))
+
+  def testAlignCorners3x3To2x2Grad(self):
+    for dtype in self.float_types:
+      self._assertBackwardOpMatchesExpected(
+          np.array([[7, 13], [22, 4]], dtype=np.float32),
+          input_shape=[3, 3],
+          dtype=dtype,
+          expected=np.array(
+              [[7, 0, 13], [0, 0, 0], [22, 0, 4]], dtype=np.float32))
+
+  def testAlignCorners4x4To3x3(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array(
+              [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]],
+              dtype=dtype), [3, 3],
+          expected=np.array(
+              [[1, 2.5, 4], [7, 8.5, 10], [13, 14.5, 16]], dtype=np.float32))
+
+  def testAlignCorners4x4To3x3Grad(self):
+    for dtype in self.float_types:
+      self._assertBackwardOpMatchesExpected(
+          np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float32),
+          input_shape=[4, 4],
+          dtype=dtype,
+          expected=np.array(
+              [[1, 1, 1, 3], [2, 1.25, 1.25, 3], [2, 1.25, 1.25, 3],
+               [7, 4, 4, 9]],
+              dtype=np.float32))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
index 6a8c3bcd55..798daaadbc 100644
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@@ -2460,6 +2460,36 @@ TEST_F(OpTest, Reshape) {
   });
 }
 
+TEST_F(OpTest, ResizeBilinear) {
+  Repeatedly([this]() {
+    std::vector<int64> in_dims = RandomDims(4, 4);
+    std::vector<int64> out_dims = RandomDims(2, 2);
+
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("ResizeBilinear")
+            .RandomInput(DT_FLOAT, in_dims)
+            .Input(test::AsTensor<int32>(
+                std::vector<int32>(out_dims.begin(), out_dims.end())))
+            .Attr("T", DT_FLOAT)
+            .Attr("align_corners", true));
+  });
+}
+
+TEST_F(OpTest, ResizeBilinearGrad) {
+  Repeatedly([this]() {
+    std::vector<int64> in_dims = RandomDims(4, 4);
+    std::vector<int64> out_dims = RandomDims(2, 2);
+
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("ResizeBilinearGrad")
+            .RandomInput(DT_FLOAT, in_dims)
+            .RandomInput(DT_FLOAT,
+                         {in_dims[0], out_dims[0], out_dims[1], in_dims[3]})
+            .Attr("T", DT_FLOAT)
+            .Attr("align_corners", true));
+  });
+}
+
 TEST_F(OpTest, Reverse) {
   Repeatedly([this]() {
     std::vector<int64> dims = RandomDims(1);
diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc
index 6a1a5467e0..ab2f1e9a7a 100644
--- a/tensorflow/compiler/tf2xla/const_analysis.cc
+++ b/tensorflow/compiler/tf2xla/const_analysis.cc
@@ -80,6 +80,7 @@ Status BackwardsConstAnalysis(const Graph& g,
       {"Range", "limit"},
       {"Range", "delta"},
       {"Reshape", "shape"},
+      {"ResizeBilinear", "size"},
       {"ResourceStridedSliceAssign", "begin"},
       {"ResourceStridedSliceAssign", "end"},
       {"ResourceStridedSliceAssign", "strides"},
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 27ed684b00..3e24cf042e 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -35,6 +35,7 @@ tf_kernel_library(
         "gather_op.cc",
         "gather_op_helpers.h",
         "identity_op.cc",
+        "image_resize_ops.cc",
         "index_ops.cc",
         "l2loss_op.cc",
         "lrn_ops.cc",
@@ -90,6 +91,7 @@ tf_kernel_library(
         "//tensorflow/compiler/tf2xla/lib:cholesky",
         "//tensorflow/compiler/tf2xla/lib:util",
         "//tensorflow/compiler/tf2xla/ops:sendrecv_ops",
+        "//tensorflow/compiler/xla:array4d",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:util",
@@ -98,6 +100,7 @@ tf_kernel_library(
         "//tensorflow/compiler/xla/client:computation_builder",
         "//tensorflow/compiler/xla/client/lib:arithmetic",
         "//tensorflow/core:framework",
+        "//tensorflow/core:image_ops_op_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:linalg_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc
new file mode 100644
index 0000000000..d91ebb500b
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc
@@ -0,0 +1,367 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/type_util.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/array4d.h"
+#include "tensorflow/core/framework/kernel_def_builder.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/lib/math/math_util.h"
+
+namespace tensorflow {
+namespace {
+
+// We implement bilinear interpolation by upsampling followed by convolution.
+// The basic idea is as follows. To scale from NxN to RxR:
+//
+//    1. S := (N - 1) /  gcd(N-1, R-1)
+//    2. k := (R - 1) /  gcd(N-1, R-1)
+//    3. Convolution(kxk, stride=S, lhs_dilation=k, padding=k-1)
+//
+// For example, to Scale from 7x7 -> 15x15:
+//
+//    1. S := (7-1) / gcd(7-1, 15-1) = 6 / gcd(6, 14) = 6 / 2 = 3
+//    2. k := (15 - 1) / gcd(7-1, 15-1) = 14 / gcd(6, 14) = 14 / 2 = 7
+//    3. Convolution(7x7, stride=3, lhs_dilation=3, padding=2)
+//
+//
+// The 7x7 -> 15x15 case is much too large to write out in full as an
+// example. The smallest interesting example is 3x3 -> 4x4.
+//
+// S := 2
+// k := 3
+//
+// 00 03 06    00 00 00 00 00 00 00 00 00 00 00      00 02 04 06
+// 09 12 15 -> 00 00 00 00 00 00 00 00 00 00 00   -> 06 08 10 12
+// 18 21 24    00 00 00 00 00 03 00 00 06 00 00      12 14 16 18
+//             00 00 00 00 00 00 00 00 00 00 00      18 20 22 24
+//             00 00 00 00 00 00 00 00 00 00 00
+//             00 00 09 00 00 12 00 00 15 00 00
+//             00 00 00 00 00 00 00 00 00 00 00
+//             00 00 00 00 00 00 00 00 00 00 00
+//             00 00 18 00 00 21 00 00 24 00 00
+//             00 00 00 00 00 00 00 00 00 00 00
+//             00 00 00 00 00 00 00 00 00 00 00
+//
+// with the following convolutional kernel, with stride [2, 2]:
+//       1 2 3 2 1
+//       2 4 6 4 2
+// 1/9 * 3 6 9 6 3
+//       2 4 6 4 2
+//       1 2 3 2 1
+
+// Computes the size of the convolutional kernel and stride to use when resizing
+// from in_size to out_size.
+struct ResizeConvolutionDims {
+  // Size of the kernel to use.
+  std::vector<int64> kernel_size;
+
+  // Stride of the convolution to use.
+  std::vector<int64> stride;
+};
+ResizeConvolutionDims ComputeResizeConvolutionParameters(
+    gtl::ArraySlice<int64> in_size, gtl::ArraySlice<int64> out_size) {
+  CHECK_EQ(in_size.size(), out_size.size());
+  int num_spatial_dims = in_size.size();
+  ResizeConvolutionDims dims;
+  dims.kernel_size.resize(num_spatial_dims);
+  dims.stride.resize(num_spatial_dims);
+  for (int i = 0; i < num_spatial_dims; ++i) {
+    if (in_size[i] == 1) {
+      // We must handle input size 1 specially because XLA convolution does
+      // not allow stride 0.
+      dims.stride[i] = dims.kernel_size[i] = 1;
+    } else if (out_size[i] == 1) {
+      // If in_size[i] > 1 but out_size[i] == 1, then we slice out the first
+      // entry before resizing.
+      dims.stride[i] = dims.kernel_size[i] = 1;
+    } else {
+      int64 gcd = MathUtil::GCD(static_cast<uint64>(in_size[i] - 1),
+                                static_cast<uint64>(out_size[i] - 1));
+      dims.stride[i] = (in_size[i] - 1) / gcd;
+      dims.kernel_size[i] = (out_size[i] - 1) / gcd;
+    }
+  }
+  return dims;
+}
+
+xla::ComputationDataHandle MakeBilinearResizeKernel(
+    xla::ComputationBuilder* builder, gtl::ArraySlice<int64> kernel_size,
+    int64 channels) {
+  // Form a 2D convolution kernel like:
+  //       1 2 3 2 1
+  //       2 4 6 4 2
+  // 1/9 * 3 6 9 6 3
+  //       2 4 6 4 2
+  //       1 2 3 2 1
+  // by multiplying two 1D kernels of the form:
+  // 1/3 * [1 2 3 2 1]
+  auto make_1d_kernel = [](int64 n) {
+    std::vector<float> kernel(n * 2 - 1);
+    for (int64 i = 0; i < n; ++i) {
+      float v = i + 1;
+      kernel[i] = v;
+      kernel[n * 2 - 2 - i] = v;
+    }
+    return kernel;
+  };
+
+  // Form a block diagonal kernel where each channel interacts only with itself.
+  xla::Array4D<float> diag(1, 1, channels, channels, 0.0f);
+  for (int i = 0; i < channels; ++i) {
+    diag(0, 0, i, i) = 1.0f / (kernel_size[0] * kernel_size[1]);
+  }
+  return builder->Mul(
+      builder->ConstantR1<float>(make_1d_kernel(kernel_size[0])),
+      builder->Mul(builder->ConstantR1<float>(make_1d_kernel(kernel_size[1])),
+                   builder->ConstantR4FromArray4D(diag),
+                   /*broadcast_dimensions=*/{1}),
+      /*broadcast_dimensions=*/{0});
+}
+
+class ResizeBilinearOp : public XlaOpKernel {
+ public:
+  explicit ResizeBilinearOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("align_corners", &align_corners_));
+    OP_REQUIRES(
+        ctx, align_corners_ == true,
+        errors::Unimplemented(
+            "ResizeBilinear with align_corners=False is not yet implemented"));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::ComputationBuilder* b = ctx->builder();
+
+    TensorShape input_shape = ctx->InputShape(0);
+    OP_REQUIRES(ctx, input_shape.dims() == 4,
+                errors::InvalidArgument("input must be 4-dimensional",
+                                        input_shape.DebugString()));
+    const int64 batch = input_shape.dim_size(0);
+    const std::vector<int64> in_size = {input_shape.dim_size(1),
+                                        input_shape.dim_size(2)};
+    const int64 channels = input_shape.dim_size(3);
+    OP_REQUIRES(ctx, in_size[0] > 0 && in_size[1] > 0,
+                errors::InvalidArgument("input size must be positive, got [",
+                                        in_size[0], ",", in_size[1], "]"));
+
+    std::vector<int64> out_size;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(1, &out_size));
+    OP_REQUIRES(ctx, out_size.size() == 2,
+                errors::InvalidArgument("output size must be length 2, got ",
+                                        out_size.size()));
+    OP_REQUIRES(ctx, out_size[0] > 0 && out_size[1] > 0,
+                errors::InvalidArgument("output size must be positive, got [",
+                                        out_size[0], ",", out_size[1], "]"));
+
+    const int num_spatial_dims = 2;
+
+    xla::ComputationDataHandle input = ctx->Input(0);
+
+    // If in_size[i] > 1 and out_size[i] == 1, slice out the first input in
+    // dimension i.
+    std::vector<int64> slice_size = in_size;
+    bool slice_input = false;
+    for (int i = 0; i < num_spatial_dims; ++i) {
+      if (in_size[i] > 1 && out_size[i] == 1) {
+        // If in_size[i] > 1 but out_size[i] == 1, then we slice out the first
+        // entry before resizing.
+        slice_input = true;
+        slice_size[i] = 1;
+      }
+    }
+    if (slice_input) {
+      input = b->Slice(input, {0, 0, 0, 0},
+                       {batch, slice_size[0], slice_size[1], channels},
+                       {1, 1, 1, 1});
+    }
+
+    // Output is always type float.
+    input = b->ConvertElementType(input, xla::F32);
+
+    // Picture for a 1x3 to 1x4 resize:
+    // stride = 2, kernel size = 3
+    // Input:
+    // 3 6 9
+    // Input with dilation and padding:
+    // 0 0 3 0 0 6 0 0 9 0 0
+    // Convolution kernel:
+    // 1/3 * [1 2 3 2 1]
+    // Output:
+    // 3 5 7 9
+    xla::ConvolutionDimensionNumbers dnums;
+    dnums.set_input_batch_dimension(0);
+    dnums.set_output_batch_dimension(0);
+    dnums.set_input_feature_dimension(3);
+    dnums.set_output_feature_dimension(3);
+    for (int i = 0; i < num_spatial_dims; ++i) {
+      dnums.add_input_spatial_dimensions(1 + i);
+      dnums.add_output_spatial_dimensions(1 + i);
+      dnums.add_kernel_spatial_dimensions(i);
+    }
+    dnums.set_kernel_input_feature_dimension(num_spatial_dims);
+    dnums.set_kernel_output_feature_dimension(num_spatial_dims + 1);
+
+    ResizeConvolutionDims dims =
+        ComputeResizeConvolutionParameters(in_size, out_size);
+    xla::ComputationDataHandle kernel =
+        MakeBilinearResizeKernel(b, dims.kernel_size, channels);
+    xla::ComputationDataHandle output = b->ConvGeneralDilated(
+        input, kernel, dims.stride,
+        /*padding=*/
+        {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1},
+         {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}},
+        /*lhs_dilation=*/dims.kernel_size,
+        /*rhs_dilation=*/{1, 1}, dnums);
+
+    // Add broadcasts to handle expanding from a size == 1 dimension to a
+    // size > 1 dimension.
+    for (int i = 0; i < num_spatial_dims; ++i) {
+      if (in_size[i] == 1 && out_size[i] > 1) {
+        output = b->Add(output, b->ConstantR1<float>(out_size[i], 0),
+                        /*broadcast_dimensions=*/{1 + i});
+      }
+    }
+
+    ctx->SetOutput(0, output);
+  }
+
+ private:
+  bool align_corners_;
+};
+
+REGISTER_XLA_OP(Name("ResizeBilinear"), ResizeBilinearOp);
+
+class ResizeBilinearGradOp : public XlaOpKernel {
+ public:
+  explicit ResizeBilinearGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("align_corners", &align_corners_));
+    OP_REQUIRES(
+        ctx, align_corners_ == true,
+        errors::Unimplemented("ResizeBilinearGrad with align_corners=False is "
+                              "not yet implemented"));
+
+    DataType output_dtype;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("T", &output_dtype));
+    OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(output_dtype, &output_type_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::ComputationBuilder* b = ctx->builder();
+
+    TensorShape input_shape = ctx->InputShape(1);
+    OP_REQUIRES(ctx, input_shape.dims() == 4,
+                errors::InvalidArgument("input must be 4-dimensional",
+                                        input_shape.DebugString()));
+    const int64 batch = input_shape.dim_size(0);
+    const std::vector<int64> in_size = {input_shape.dim_size(1),
+                                        input_shape.dim_size(2)};
+    const int64 channels = input_shape.dim_size(3);
+    OP_REQUIRES(ctx, in_size[0] > 0 && in_size[1] > 0,
+                errors::InvalidArgument("input size must be positive, got [",
+                                        in_size[0], ",", in_size[1], "]"));
+
+    TensorShape grad_shape = ctx->InputShape(0);
+    OP_REQUIRES(ctx, grad_shape.dims() == 4,
+                errors::InvalidArgument("gradient must be 4-dimensional",
+                                        grad_shape.DebugString()));
+    const int64 grad_batch = grad_shape.dim_size(0);
+    const std::vector<int64> grad_size = {grad_shape.dim_size(1),
+                                          grad_shape.dim_size(2)};
+    const int64 grad_channels = grad_shape.dim_size(3);
+    OP_REQUIRES(ctx, batch == grad_batch,
+                errors::InvalidArgument(
+                    "activations and gradients must have the same batch size (",
+                    batch, " vs. ", grad_batch, ")"));
+    OP_REQUIRES(ctx, grad_size[0] > 0 && grad_size[1] > 0,
+                errors::InvalidArgument("gradient size must be positive, got [",
+                                        grad_size[0], ",", grad_size[1], "]"));
+    OP_REQUIRES(
+        ctx, channels == grad_channels,
+        errors::InvalidArgument(
+            "activations and gradients must have the same number of channels (",
+            channels, " vs. ", grad_channels, ")"));
+
+    const int num_spatial_dims = 2;
+
+    xla::ComputationDataHandle grad = ctx->Input(0);
+
+    ResizeConvolutionDims dims =
+        ComputeResizeConvolutionParameters(in_size, grad_size);
+
+    // To form the backward convolution, we keep the kernel unchanged (it is
+    // already symmetric) and swap the roles of strides and LHS dilation.
+    xla::ConvolutionDimensionNumbers dnums;
+    dnums.set_input_batch_dimension(0);
+    dnums.set_output_batch_dimension(0);
+    dnums.set_input_feature_dimension(3);
+    dnums.set_output_feature_dimension(3);
+    for (int i = 0; i < num_spatial_dims; ++i) {
+      dnums.add_input_spatial_dimensions(1 + i);
+      dnums.add_output_spatial_dimensions(1 + i);
+      dnums.add_kernel_spatial_dimensions(i);
+    }
+    dnums.set_kernel_input_feature_dimension(num_spatial_dims);
+    dnums.set_kernel_output_feature_dimension(num_spatial_dims + 1);
+    xla::ComputationDataHandle kernel =
+        MakeBilinearResizeKernel(b, dims.kernel_size, channels);
+
+    // Broadcast the input kernel where the forward op expanded from a size == 1
+    // dimension to a size > 1 dimension. This has the effect of summing the
+    // gradient contributions in that dimension.
+    for (int i = 0; i < num_spatial_dims; ++i) {
+      if (in_size[i] == 1 && grad_size[i] > 1) {
+        kernel = b->Add(kernel, b->ConstantR1<float>(grad_size[i], 0),
+                        /*broadcast_dimensions=*/{i});
+      }
+    }
+
+    xla::ComputationDataHandle output = b->ConvGeneralDilated(
+        grad, kernel, /*window_strides=*/dims.kernel_size,
+        /*padding=*/
+        {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1},
+         {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}},
+        /*lhs_dilation=*/dims.stride,
+        /*rhs_dilation=*/{1, 1}, dnums);
+
+    // If in_size[i] > 1 and grad_size[i] == 1, pad the output in dimension i.
+    // Opposite of the slice performed by the forward op.
+    xla::PaddingConfig padding = xla::MakeNoPaddingConfig(4);
+    bool pad_output = false;
+    for (int i = 0; i < num_spatial_dims; ++i) {
+      if (in_size[i] > 1 && grad_size[i] == 1) {
+        pad_output = true;
+        padding.mutable_dimensions(1 + i)->set_edge_padding_high(in_size[i] -
+                                                                 1);
+      }
+    }
+    if (pad_output) {
+      output = b->Pad(output, b->ConstantR0<float>(0.0f), padding);
+    }
+
+    output = b->ConvertElementType(output, output_type_);
+    ctx->SetOutput(0, output);
+  }
+
+ private:
+  bool align_corners_;
+  xla::PrimitiveType output_type_;
+};
+
+REGISTER_XLA_OP(Name("ResizeBilinearGrad"), ResizeBilinearGradOp);
+
+}  // namespace
+}  // namespace tensorflow
-- 
GitLab


From a51cc5801f60ff95a54d289886cb62a978522aa2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 18:21:01 -0800
Subject: [PATCH 0724/1225] Handle repeated applications of functools.partial
 correctly.

PiperOrigin-RevId: 178183885
---
 tensorflow/contrib/learn/python/learn/metric_spec.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py
index ed6683abed..6440bc204b 100644
--- a/tensorflow/contrib/learn/python/learn/metric_spec.py
+++ b/tensorflow/contrib/learn/python/learn/metric_spec.py
@@ -42,10 +42,8 @@ def _args(fn):
   """
   if hasattr(fn, 'func') and hasattr(fn, 'keywords'):
     # Handle functools.partial and similar objects.
-    return tuple([
-        arg for arg in tf_inspect.getargspec(fn.func).args
-        if arg not in set(fn.keywords.keys())
-    ])
+    return tuple(
+        [arg for arg in _args(fn.func) if arg not in set(fn.keywords.keys())])
   # Handle function.
   return tuple(tf_inspect.getargspec(fn).args)
 
-- 
GitLab


From 8ad62af489df718992561710123bc8c037e7d17b Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Wed, 6 Dec 2017 18:40:17 -0800
Subject: [PATCH 0725/1225] Split the tests so that it doesn't time out.

PiperOrigin-RevId: 178185460
---
 tensorflow/python/BUILD                       |  2 +-
 .../python/ops/nn_fused_batchnorm_test.py     | 45 ++++++++++++++-----
 2 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 8471d5924f..bd8ef6944c 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2624,7 +2624,7 @@ cuda_py_test(
         ":nn_grad",
         "//third_party/py/numpy",
     ],
-    shard_count = 4,
+    shard_count = 16,
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index e72d34d1f7..ff7137d492 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -333,7 +333,7 @@ class BatchNormalizationTest(test.TestCase):
     self.assertLess(err_grad_x_2, err_tolerance)
     self.assertLess(err_grad_scale, err_tolerance)
 
-  def testInference(self):
+  def testInferenceShape1(self):
     x_shape = [1, 1, 6, 1]
     for dtype in [np.float16, np.float32]:
       if test.is_gpu_available(cuda_only=True):
@@ -344,6 +344,7 @@ class BatchNormalizationTest(test.TestCase):
       self._test_inference(
           x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')
 
+  def testInferenceShape2(self):
     x_shape = [1, 1, 6, 2]
     if test.is_gpu_available(cuda_only=True):
       for dtype in [np.float16, np.float32]:
@@ -352,12 +353,14 @@ class BatchNormalizationTest(test.TestCase):
         self._test_inference(
             x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')
 
+  def testInferenceShape3(self):
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
       for dtype in [np.float16, np.float32]:
         self._test_inference(
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
+  def testInferenceShape4(self):
     x_shape = [27, 131, 127, 6]
     for dtype in [np.float16, np.float32]:
       if test.is_gpu_available(cuda_only=True):
@@ -368,7 +371,7 @@ class BatchNormalizationTest(test.TestCase):
       self._test_inference(
           x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
-  def testTraining(self):
+  def testTrainingShape1(self):
     x_shape = [1, 1, 6, 1]
     for dtype in [np.float16, np.float32]:
       if test.is_gpu_available(cuda_only=True):
@@ -379,6 +382,7 @@ class BatchNormalizationTest(test.TestCase):
       self._test_training(
           x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')
 
+  def testTrainingShape2(self):
     x_shape = [1, 1, 6, 2]
     for dtype in [np.float16, np.float32]:
       if test.is_gpu_available(cuda_only=True):
@@ -387,12 +391,14 @@ class BatchNormalizationTest(test.TestCase):
       self._test_training(
           x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')
 
+  def testTrainingShape3(self):
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
       for dtype in [np.float16, np.float32]:
         self._test_training(
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
+  def testTrainingShape4(self):
     x_shape = [27, 131, 127, 6]
     for dtype in [np.float16, np.float32]:
       if test.is_gpu_available(cuda_only=True):
@@ -403,7 +409,7 @@ class BatchNormalizationTest(test.TestCase):
       self._test_training(
           x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
-  def testBatchNormGrad(self):
+  def testBatchNormGradShape1(self):
     for is_training in [True, False]:
       x_shape = [1, 1, 6, 1]
       for dtype in [np.float16, np.float32]:
@@ -430,6 +436,8 @@ class BatchNormalizationTest(test.TestCase):
             data_format='NHWC',
             is_training=is_training)
 
+  def testBatchNormGradShape2(self):
+    for is_training in [True, False]:
       x_shape = [1, 1, 6, 2]
       for dtype in [np.float16, np.float32]:
         if test.is_gpu_available(cuda_only=True):
@@ -448,6 +456,8 @@ class BatchNormalizationTest(test.TestCase):
             data_format='NHWC',
             is_training=is_training)
 
+  def testBatchNormGradShape3(self):
+    for is_training in [True, False]:
       x_shape = [1, 2, 1, 6]
       if test.is_gpu_available(cuda_only=True):
         for dtype in [np.float16, np.float32]:
@@ -459,6 +469,8 @@ class BatchNormalizationTest(test.TestCase):
               data_format='NCHW',
               is_training=is_training)
 
+  def testBatchNormGradShape4(self):
+    for is_training in [True, False]:
       x_shape = [5, 7, 11, 4]
       for dtype in [np.float16, np.float32]:
         if test.is_gpu_available(cuda_only=True):
@@ -515,26 +527,37 @@ class BatchNormalizationTest(test.TestCase):
           is_training=is_training,
           err_tolerance=err_tolerance)
 
-  def testBatchNormGradGrad(self):
-    configs = [{
+  def testBatchNormGradGradConfig1(self):
+    config = {
         'shape': [2, 3, 4, 5],
         'err_tolerance': 1e-2,
         'dtype': np.float32,
-    }, {
+    }
+    self._testBatchNormGradGrad(config)
+
+  def testBatchNormGradGradConfig2(self):
+    config = {
         'shape': [2, 3, 2, 2],
         'err_tolerance': 1e-3,
         'dtype': np.float32,
-    }, {
+    }
+    self._testBatchNormGradGrad(config)
+
+  def testBatchNormGradGradConfig3(self):
+    config = {
         'shape': [2, 3, 4, 5],
         'err_tolerance': 1e-2,
         'dtype': np.float16,
-    }, {
+    }
+    self._testBatchNormGradGrad(config)
+
+  def testBatchNormGradGradConfig4(self):
+    config = {
         'shape': [2, 3, 2, 2],
         'err_tolerance': 2e-3,
         'dtype': np.float16,
-    }]
-    for config in configs:
-      self._testBatchNormGradGrad(config)
+    }
+    self._testBatchNormGradGrad(config)
 
 
 if __name__ == '__main__':
-- 
GitLab


From fe8406149feec453250905965a14285465cd2063 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Wed, 6 Dec 2017 18:43:24 -0800
Subject: [PATCH 0726/1225] Merge changes from github.

PiperOrigin-RevId: 178185697
---
 AUTHORS                                       |    2 +-
 CONTRIBUTING.md                               |    1 +
 configure.py                                  |   22 +
 tensorflow/cc/gradients/nn_grad.cc            |   12 +
 tensorflow/cc/gradients/nn_grad_test.cc       |    7 +
 .../compiler/xla/service/cpu/disassembler.h   |    4 +-
 .../compiler/xla/service/hlo_instruction.h    |    2 +-
 tensorflow/contrib/BUILD                      |    1 +
 tensorflow/contrib/__init__.py                |    1 +
 tensorflow/contrib/cloud/BUILD                |    2 -
 tensorflow/contrib/cmake/python_modules.txt   |  449 ++
 tensorflow/contrib/cmake/python_protos.txt    |   19 +
 tensorflow/contrib/cmake/python_protos_cc.txt |    5 +
 tensorflow/contrib/cmake/tf_core_ops.cmake    |    1 +
 tensorflow/contrib/cmake/tf_python.cmake      |  535 +-
 tensorflow/contrib/cmake/tf_tests.cmake       |    2 +
 .../contrib/data/python/kernel_tests/BUILD    |    3 +
 tensorflow/contrib/distributions/BUILD        |   18 +
 tensorflow/contrib/distributions/__init__.py  |    2 +
 .../python/kernel_tests/half_normal_test.py   |  320 +
 .../distributions/python/ops/half_normal.py   |  171 +
 .../python/ops/mixture_same_family.py         |    7 +-
 .../contrib/eager/python/examples/spinn/BUILD |    1 +
 .../contrib/framework/python/ops/variables.py |    2 +-
 .../contrib/layers/python/layers/layers.py    |   89 +-
 .../python/learn/estimators/estimator.py      |    4 +-
 tensorflow/contrib/lite/README.md             |   17 +-
 .../contrib/lite/download_dependencies.sh     |    7 +
 .../ios/camera/CameraExampleViewController.mm |    6 +-
 .../lite/examples/ios/simple/AppDelegate.mm   |    1 +
 .../examples/ios/simple/ios_image_load.mm     |    2 +
 tensorflow/contrib/lite/python/BUILD          |    1 +
 .../contrib/lite/schema/schema_generated.h    | 5417 +++++++++++++++++
 tensorflow/contrib/lite/toco/tflite/BUILD     |    5 +
 .../contrib/lite/tools/benchmark_model.cc     |    1 +
 .../contrib/makefile/download_dependencies.sh |   11 +-
 .../python/layers/core_layers.py              |    8 +-
 tensorflow/contrib/periodic_resample/BUILD    |  113 +
 .../contrib/periodic_resample/__init__.py     |   27 +
 .../kernels/periodic_resample_op.cc           |   26 +
 .../kernels/periodic_resample_op.h            |  230 +
 .../periodic_resample/ops/array_ops.cc        |   88 +
 .../periodic_resample/python/__init__.py      |   20 +
 .../kernel_tests/periodic_resample_op_test.py |  101 +
 .../python/ops/periodic_resample_op.py        |   30 +
 .../python/kernel_tests/core_rnn_cell_test.py |    1 -
 .../pip_package/cloud_tpu_profiler/main.py    |   12 +-
 .../contrib/tpu/profiler/pip_package/setup.py |    2 +-
 tensorflow/contrib/verbs/BUILD                |    6 +-
 tensorflow/contrib/verbs/rdma.cc              |   98 +-
 tensorflow/contrib/verbs/rdma.h               |   29 +-
 tensorflow/contrib/verbs/rdma_mgr.cc          |   51 +
 tensorflow/contrib/verbs/rdma_mgr.h           |    5 +-
 .../contrib/verbs/rdma_rendezvous_mgr.cc      |   46 +-
 tensorflow/contrib/verbs/verbs_server_lib.cc  |    5 +-
 .../core/common_runtime/pending_counts.h      |    2 +-
 .../core/common_runtime/shape_refiner.cc      |    2 +-
 tensorflow/core/graph/graph_partition.cc      |    2 +-
 tensorflow/core/graph/mkl_graph_util.h        |  179 +-
 tensorflow/core/graph/mkl_layout_pass.cc      |    2 +-
 .../core/graph/mkl_tfconversion_pass.cc       |    2 +-
 tensorflow/core/kernels/cwise_op_asinh.cc     |    2 +-
 .../core/kernels/cwise_op_bitwise_and.cc      |   10 +-
 .../core/kernels/cwise_op_bitwise_or.cc       |   10 +-
 .../core/kernels/cwise_op_bitwise_xor.cc      |   10 +-
 .../kernels/cwise_op_gpu_bitwise_and.cu.cc    |    3 +-
 .../kernels/cwise_op_gpu_bitwise_or.cu.cc     |    3 +-
 .../kernels/cwise_op_gpu_bitwise_xor.cu.cc    |    3 +-
 tensorflow/core/kernels/decode_bmp_op.cc      |   42 +-
 tensorflow/core/kernels/depthwise_conv_op.cc  |    7 +-
 .../kernels/dynamic_partition_op_gpu.cu.cc    |  465 ++
 tensorflow/core/kernels/maxpooling_op.cc      |    2 +-
 .../core/kernels/mkl_batch_matmul_op.cc       |    1 +
 .../core/kernels/mkl_conv_grad_filter_ops.cc  |   78 +-
 .../core/kernels/mkl_conv_grad_input_ops.cc   |   86 +-
 tensorflow/core/kernels/mkl_conv_ops.cc       |   82 +-
 tensorflow/core/kernels/mkl_conv_ops.h        |  140 +-
 tensorflow/core/kernels/shape_ops.h           |    8 +-
 tensorflow/core/kernels/slice_op.h            |    1 +
 .../core/util/transform_output_iterator.h     |    2 +-
 tensorflow/docs_src/extend/add_filesys.md     |    1 +
 tensorflow/docs_src/extend/estimators.md      |    4 +-
 tensorflow/docs_src/get_started/input_fn.md   |    2 +-
 tensorflow/docs_src/install/install_mac.md    |   31 +-
 .../docs_src/programmers_guide/datasets.md    |    6 +-
 .../docs_src/programmers_guide/saved_model.md |   18 +-
 tensorflow/examples/android/README.md         |    2 +-
 tensorflow/examples/speech_commands/train.py  |    2 +-
 tensorflow/go/tensor.go                       |   17 +
 tensorflow/go/tensor_test.go                  |   20 +
 tensorflow/python/debug/lib/stepper.py        |    2 +-
 tensorflow/python/estimator/export/export.py  |    3 +-
 .../python/estimator/export/export_test.py    |    3 +-
 tensorflow/python/keras/BUILD                 |    3 +-
 .../python/keras/_impl/keras/estimator.py     |   13 +-
 .../python/kernel_tests/decode_bmp_op_test.py |    1 +
 .../distributions/special_math_test.py        |   26 +
 .../kernel_tests/dynamic_partition_op_test.py |  197 +-
 tensorflow/python/ops/bitwise_ops_test.py     |    2 +-
 .../python/ops/distributions/special_math.py  |   24 +
 tensorflow/python/ops/losses/losses_impl.py   |    2 +-
 tensorflow/python/platform/tf_logging.py      |   90 +-
 tensorflow/tools/pip_package/setup.py         |    7 +
 tensorflow/workspace.bzl                      |  348 +-
 third_party/repo.bzl                          |    1 +
 105 files changed, 8798 insertions(+), 1219 deletions(-)
 create mode 100644 tensorflow/contrib/cmake/python_modules.txt
 create mode 100644 tensorflow/contrib/cmake/python_protos.txt
 create mode 100644 tensorflow/contrib/cmake/python_protos_cc.txt
 create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py
 create mode 100644 tensorflow/contrib/distributions/python/ops/half_normal.py
 create mode 100755 tensorflow/contrib/lite/schema/schema_generated.h
 create mode 100644 tensorflow/contrib/periodic_resample/BUILD
 create mode 100644 tensorflow/contrib/periodic_resample/__init__.py
 create mode 100644 tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc
 create mode 100644 tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
 create mode 100644 tensorflow/contrib/periodic_resample/ops/array_ops.cc
 create mode 100644 tensorflow/contrib/periodic_resample/python/__init__.py
 create mode 100644 tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py
 create mode 100644 tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py
 create mode 100644 tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
 mode change 100644 => 100755 tensorflow/python/keras/BUILD

diff --git a/AUTHORS b/AUTHORS
index a46ae7e616..aa4be5169d 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -7,4 +7,4 @@
 # The email address is not required for organizations.
 
 Google Inc.
-Yuan Tang terrytangyuan@gmail.com
+Yuan Tang <terrytangyuan@gmail.com>
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 43abdaafbf..1b537ca73c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -114,6 +114,7 @@ pylint --rcfile=/tmp/pylintrc myfile.py
 * [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html)
 * [Google JavaScript Style Guide](https://google.github.io/styleguide/jsguide.html)
 * [Google Shell Style Guide](https://google.github.io/styleguide/shell.xml)
+* [Google Objective-C Style Guide](http://google.github.io/styleguide/objcguide.html)
 
 #### Running sanity check
 
diff --git a/configure.py b/configure.py
index 99c0a8d321..680448d7b6 100644
--- a/configure.py
+++ b/configure.py
@@ -1088,6 +1088,28 @@ def set_computecpp_toolkit_path(environ_cp):
                               computecpp_toolkit_path)
 
 
+def set_trisycl_include_dir(environ_cp):
+  """Set TRISYCL_INCLUDE_DIR."""
+  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
+                             'include directory. (Use --config=sycl_trisycl '
+                             'when building with Bazel) '
+                             '[Default is %s]: ') % _DEFAULT_TRISYCL_INCLUDE_DIR
+  while True:
+    trisycl_include_dir = get_from_env_or_user_or_default(
+        environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
+        _DEFAULT_TRISYCL_INCLUDE_DIR)
+    if os.path.exists(trisycl_include_dir):
+      break
+
+    print('Invalid triSYCL include directory, %s cannot be found'
+          % (trisycl_include_dir))
+
+  # Set TRISYCL_INCLUDE_DIR
+  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
+  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
+                              trisycl_include_dir)
+
+
 def set_trisycl_include_dir(environ_cp):
   """Set TRISYCL_INCLUDE_DIR."""
 
diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc
index 09fadfcab5..13a3bba5e6 100644
--- a/tensorflow/cc/gradients/nn_grad.cc
+++ b/tensorflow/cc/gradients/nn_grad.cc
@@ -196,6 +196,18 @@ Status MaxPoolGradV2Helper(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("MaxPoolV2", MaxPoolGradV2Helper);
 
+Status LRNGradHelper(const Scope& scope, const Operation& op,
+                     const std::vector<Output>& grad_inputs,
+                     std::vector<Output>* grad_outputs){
+  internal::LRNGrad::Attrs grad_attrs;
+
+  auto dx = internal::LRNGrad(scope, grad_inputs[0], op.input(0), op.output(0),
+                              grad_attrs);
+  grad_outputs->push_back(dx);
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("LRN", LRNGradHelper);
+
 }  // anonymous namespace
 }  // namespace ops
 }  // namespace tensorflow
diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index ac66f51cf0..f9063e8365 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -191,5 +191,12 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) {
   RunTest(x, x_init_value, y, y_shape);
 }
 
+TEST_F(NNGradTest, LRN){
+  TensorShape x_shape({1, 1, 2, 1});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
+  auto y = LRN(scope_, x);
+  RunTest(x, x_shape, y, x_shape);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/xla/service/cpu/disassembler.h b/tensorflow/compiler/xla/service/cpu/disassembler.h
index b6feaa7e45..5e302f8899 100644
--- a/tensorflow/compiler/xla/service/cpu/disassembler.h
+++ b/tensorflow/compiler/xla/service/cpu/disassembler.h
@@ -37,7 +37,7 @@ struct DisassemblerResult {
   DisassemblerResult(const string& text, size_t code_size_bytes)
       : text(text), code_size_bytes(code_size_bytes) {}
 
-  // The dissassembled text sections of the object file.
+  // The disassembled text sections of the object file.
   string text;
   // The total number of bytes of executable code in the object file.
   uint64_t code_size_bytes;
@@ -53,7 +53,7 @@ class Disassembler {
   // Returns a DisassemblerResult for the given object file, containing the
   // disassembled code.
   //
-  // If we couldnt' retrieve a disassembler for this platform, an error status
+  // If we couldn't retrieve a disassembler for this platform, an error status
   // is returned.
   StatusOr<DisassemblerResult> DisassembleObjectFile(
       const llvm::object::ObjectFile& object_file) const;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 5e798c2045..03cf9aaf90 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -428,7 +428,7 @@ class HloInstruction {
   Status RemoveControlDependencyTo(HloInstruction* instruction);
 
   // Returns the set of control predecessors (successors) of this
-  // instruction. Control predecessors (sucessors) must execute before (after)
+  // instruction. Control predecessors (successors) must execute before (after)
   // the current instruction.
   const std::vector<HloInstruction*>& control_predecessors() const {
     return control_predecessors_;
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index b7ade95115..61f7821519 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -64,6 +64,7 @@ py_library(
         "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_py",
         "//tensorflow/contrib/nn:nn_py",
         "//tensorflow/contrib/opt:opt_py",
+        "//tensorflow/contrib/periodic_resample:init_py",
         "//tensorflow/contrib/predictor",
         "//tensorflow/contrib/quantization:quantization_py",
         "//tensorflow/contrib/quantize:quantize_graph",
diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py
index 1eda1abfcf..08247c6b38 100644
--- a/tensorflow/contrib/__init__.py
+++ b/tensorflow/contrib/__init__.py
@@ -55,6 +55,7 @@ from tensorflow.contrib import model_pruning
 from tensorflow.contrib import nccl
 from tensorflow.contrib import nn
 from tensorflow.contrib import opt
+from tensorflow.contrib import periodic_resample
 from tensorflow.contrib import predictor
 from tensorflow.contrib import quantization
 from tensorflow.contrib import quantize
diff --git a/tensorflow/contrib/cloud/BUILD b/tensorflow/contrib/cloud/BUILD
index aa8f5ed12b..fe8bd072af 100644
--- a/tensorflow/contrib/cloud/BUILD
+++ b/tensorflow/contrib/cloud/BUILD
@@ -60,9 +60,7 @@ tf_py_test(
     size = "small",
     srcs = ["python/ops/bigquery_reader_ops_test.py"],
     additional_deps = [
-        ":bigquery_reader_ops_op_lib",
         ":cloud_py",
-        "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
new file mode 100644
index 0000000000..a0fca690ef
--- /dev/null
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -0,0 +1,449 @@
+tensorflow
+tensorflow/core
+tensorflow/core/example
+tensorflow/core/framework
+tensorflow/core/lib
+tensorflow/core/lib/core
+tensorflow/core/protobuf
+tensorflow/core/util
+tensorflow/examples
+tensorflow/examples/tutorials
+tensorflow/examples/tutorials/mnist
+tensorflow/python
+tensorflow/python/client
+tensorflow/python/data
+tensorflow/python/data/ops
+tensorflow/python/data/util
+tensorflow/python/debug
+tensorflow/python/debug/cli
+tensorflow/python/debug/examples
+tensorflow/python/debug/lib
+tensorflow/python/debug/wrappers
+tensorflow/python/eager
+tensorflow/python/estimator
+tensorflow/python/estimator/canned
+tensorflow/python/estimator/export
+tensorflow/python/estimator/inputs
+tensorflow/python/estimator/inputs/queues
+tensorflow/python/feature_column
+tensorflow/python/framework
+tensorflow/python/grappler
+tensorflow/python/keras
+tensorflow/python/keras/activations
+tensorflow/python/keras/applications
+tensorflow/python/keras/applications/inception_resnet_v2
+tensorflow/python/keras/applications/inception_v3
+tensorflow/python/keras/applications/mobilenet
+tensorflow/python/keras/applications/resnet50
+tensorflow/python/keras/applications/vgg16
+tensorflow/python/keras/applications/vgg19
+tensorflow/python/keras/applications/xception
+tensorflow/python/keras/backend
+tensorflow/python/keras/callbacks
+tensorflow/python/keras/constraints
+tensorflow/python/keras/datasets
+tensorflow/python/keras/datasets/boston_housing
+tensorflow/python/keras/datasets/cifar10
+tensorflow/python/keras/datasets/cifar100
+tensorflow/python/keras/datasets/fashion_mnist
+tensorflow/python/keras/datasets/imdb
+tensorflow/python/keras/datasets/mnist
+tensorflow/python/keras/datasets/reuters
+tensorflow/python/keras/estimator
+tensorflow/python/keras/initializers
+tensorflow/python/keras/layers
+tensorflow/python/keras/losses
+tensorflow/python/keras/metrics
+tensorflow/python/keras/models
+tensorflow/python/keras/optimizers
+tensorflow/python/keras/preprocessing
+tensorflow/python/keras/preprocessing/image
+tensorflow/python/keras/preprocessing/sequence
+tensorflow/python/keras/preprocessing/text
+tensorflow/python/keras/regularizers
+tensorflow/python/keras/utils
+tensorflow/python/keras/wrappers
+tensorflow/python/keras/wrappers/scikit_learn
+tensorflow/python/keras/_impl
+tensorflow/python/keras/_impl/keras
+tensorflow/python/keras/_impl/keras/applications
+tensorflow/python/keras/_impl/keras/datasets
+tensorflow/python/keras/_impl/keras/engine
+tensorflow/python/keras/_impl/keras/layers
+tensorflow/python/keras/_impl/keras/preprocessing
+tensorflow/python/keras/_impl/keras/utils
+tensorflow/python/keras/_impl/keras/wrappers
+tensorflow/python/kernel_tests
+tensorflow/python/kernel_tests/distributions
+tensorflow/python/kernel_tests/linalg
+tensorflow/python/kernel_tests/random
+tensorflow/python/layers
+tensorflow/python/lib
+tensorflow/python/lib/core
+tensorflow/python/lib/io
+tensorflow/python/ops
+tensorflow/python/ops/distributions
+tensorflow/python/ops/linalg
+tensorflow/python/ops/losses
+tensorflow/python/platform
+tensorflow/python/platform/default
+tensorflow/python/platform/summary
+tensorflow/python/profiler/
+tensorflow/python/profiler/internal
+tensorflow/python/saved_model
+tensorflow/python/summary
+tensorflow/python/summary/writer
+tensorflow/python/tools
+tensorflow/python/training
+tensorflow/python/user_ops
+tensorflow/python/util
+tensorflow/python/util/protobuf
+tensorflow/tools
+tensorflow/tools/graph_transforms
+tensorflow/contrib
+tensorflow/contrib/all_reduce
+tensorflow/contrib/all_reduce/python
+tensorflow/contrib/android
+tensorflow/contrib/android/java
+tensorflow/contrib/android/java/org
+tensorflow/contrib/android/java/org/tensorflow
+tensorflow/contrib/android/java/org/tensorflow/contrib
+tensorflow/contrib/android/java/org/tensorflow/contrib/android
+tensorflow/contrib/android/jni
+tensorflow/contrib/batching
+tensorflow/contrib/batching/kernels
+tensorflow/contrib/batching/python
+tensorflow/contrib/batching/python/ops
+tensorflow/contrib/bayesflow
+tensorflow/contrib/bayesflow/examples
+tensorflow/contrib/bayesflow/examples/reinforce_simple
+tensorflow/contrib/bayesflow/python
+tensorflow/contrib/bayesflow/python/ops
+tensorflow/contrib/boosted_trees
+tensorflow/contrib/boosted_trees/estimator_batch
+tensorflow/contrib/boosted_trees/kernels
+tensorflow/contrib/boosted_trees/ops
+tensorflow/contrib/boosted_trees/proto
+tensorflow/contrib/boosted_trees/python
+tensorflow/contrib/boosted_trees/python/ops
+tensorflow/contrib/cloud
+tensorflow/contrib/cloud/kernels
+tensorflow/contrib/cloud/ops
+tensorflow/contrib/cloud/python
+tensorflow/contrib/cloud/python/ops
+tensorflow/contrib/cluster_resolver
+tensorflow/contrib/cluster_resolver/python
+tensorflow/contrib/cluster_resolver/python/training
+tensorflow/contrib/compiler
+tensorflow/contrib/copy_graph
+tensorflow/contrib/copy_graph/python
+tensorflow/contrib/copy_graph/python/util
+tensorflow/contrib/crf
+tensorflow/contrib/crf/python
+tensorflow/contrib/crf/python/ops
+tensorflow/contrib/cudnn_rnn
+tensorflow/contrib/cudnn_rnn/kernels
+tensorflow/contrib/cudnn_rnn/ops
+tensorflow/contrib/cudnn_rnn/python
+tensorflow/contrib/cudnn_rnn/python/layers
+tensorflow/contrib/cudnn_rnn/python/ops
+tensorflow/contrib/data
+tensorflow/contrib/data/kernels
+tensorflow/contrib/data/python
+tensorflow/contrib/data/python/kernel_tests
+tensorflow/contrib/data/python/ops
+tensorflow/contrib/decision_trees
+tensorflow/contrib/decision_trees/proto
+tensorflow/contrib/deprecated
+tensorflow/contrib/distributions
+tensorflow/contrib/distributions/python
+tensorflow/contrib/distributions/python/ops
+tensorflow/contrib/distributions/python/ops/bijectors
+tensorflow/contrib/eager
+tensorflow/contrib/eager/python
+tensorflow/contrib/estimator
+tensorflow/contrib/estimator/python
+tensorflow/contrib/estimator/python/estimator
+tensorflow/contrib/factorization
+tensorflow/contrib/factorization/examples
+tensorflow/contrib/factorization/kernels
+tensorflow/contrib/factorization/ops
+tensorflow/contrib/factorization/python
+tensorflow/contrib/factorization/python/ops
+tensorflow/contrib/ffmpeg
+tensorflow/contrib/ffmpeg/default
+tensorflow/contrib/framework
+tensorflow/contrib/framework/kernels
+tensorflow/contrib/framework/ops
+tensorflow/contrib/framework/python
+tensorflow/contrib/framework/python/framework
+tensorflow/contrib/framework/python/ops
+tensorflow/contrib/fused_conv
+tensorflow/contrib/fused_conv/kernels
+tensorflow/contrib/fused_conv/python
+tensorflow/contrib/fused_conv/python/ops
+tensorflow/contrib/gan
+tensorflow/contrib/gan/python
+tensorflow/contrib/gan/python/estimator
+tensorflow/contrib/gan/python/estimator/python
+tensorflow/contrib/gan/python/eval
+tensorflow/contrib/gan/python/eval/python
+tensorflow/contrib/gan/python/features
+tensorflow/contrib/gan/python/features/python
+tensorflow/contrib/gan/python/losses
+tensorflow/contrib/gan/python/losses/python
+tensorflow/contrib/graph_editor
+tensorflow/contrib/graph_editor/examples
+tensorflow/contrib/grid_rnn
+tensorflow/contrib/grid_rnn/python
+tensorflow/contrib/grid_rnn/python/ops
+tensorflow/contrib/hooks
+tensorflow/contrib/hooks/python
+tensorflow/contrib/image
+tensorflow/contrib/image/kernels
+tensorflow/contrib/image/ops
+tensorflow/contrib/image/python
+tensorflow/contrib/image/python/ops
+tensorflow/contrib/input_pipeline
+tensorflow/contrib/input_pipeline/kernels
+tensorflow/contrib/input_pipeline/ops
+tensorflow/contrib/input_pipeline/python
+tensorflow/contrib/input_pipeline/python/ops
+tensorflow/contrib/integrate
+tensorflow/contrib/integrate/python
+tensorflow/contrib/integrate/python/ops
+tensorflow/contrib/ios_examples
+tensorflow/contrib/ios_examples/benchmark
+tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj
+tensorflow/contrib/ios_examples/benchmark/data
+tensorflow/contrib/ios_examples/camera
+tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj
+tensorflow/contrib/ios_examples/camera/en.lproj
+tensorflow/contrib/ios_examples/simple
+tensorflow/contrib/ios_examples/simple/data
+tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj
+tensorflow/contrib/keras
+tensorflow/contrib/keras/api
+tensorflow/contrib/keras/api/keras
+tensorflow/contrib/keras/api/keras/activations
+tensorflow/contrib/keras/api/keras/applications
+tensorflow/contrib/keras/api/keras/applications/inception_v3
+tensorflow/contrib/keras/api/keras/applications/mobilenet
+tensorflow/contrib/keras/api/keras/applications/resnet50
+tensorflow/contrib/keras/api/keras/applications/vgg16
+tensorflow/contrib/keras/api/keras/applications/vgg19
+tensorflow/contrib/keras/api/keras/applications/xception
+tensorflow/contrib/keras/api/keras/backend
+tensorflow/contrib/keras/api/keras/callbacks
+tensorflow/contrib/keras/api/keras/constraints
+tensorflow/contrib/keras/api/keras/datasets
+tensorflow/contrib/keras/api/keras/datasets/boston_housing
+tensorflow/contrib/keras/api/keras/datasets/cifar10
+tensorflow/contrib/keras/api/keras/datasets/cifar100
+tensorflow/contrib/keras/api/keras/datasets/imdb
+tensorflow/contrib/keras/api/keras/datasets/mnist
+tensorflow/contrib/keras/api/keras/datasets/reuters
+tensorflow/contrib/keras/api/keras/initializers
+tensorflow/contrib/keras/api/keras/layers
+tensorflow/contrib/keras/api/keras/losses
+tensorflow/contrib/keras/api/keras/metrics
+tensorflow/contrib/keras/api/keras/models
+tensorflow/contrib/keras/api/keras/optimizers
+tensorflow/contrib/keras/api/keras/preprocessing
+tensorflow/contrib/keras/api/keras/preprocessing/image
+tensorflow/contrib/keras/api/keras/preprocessing/sequence
+tensorflow/contrib/keras/api/keras/preprocessing/text
+tensorflow/contrib/keras/api/keras/regularizers
+tensorflow/contrib/keras/api/keras/utils
+tensorflow/contrib/keras/api/keras/wrappers
+tensorflow/contrib/keras/api/keras/wrappers/scikit_learn
+tensorflow/contrib/kernel_methods
+tensorflow/contrib/kernel_methods/python
+tensorflow/contrib/kernel_methods/python/mappers
+tensorflow/contrib/kfac
+tensorflow/contrib/kfac/examples
+tensorflow/contrib/kfac/python
+tensorflow/contrib/kfac/python/ops
+tensorflow/contrib/labeled_tensor
+tensorflow/contrib/labeled_tensor/python
+tensorflow/contrib/labeled_tensor/python/ops
+tensorflow/contrib/layers
+tensorflow/contrib/layers/kernels
+tensorflow/contrib/layers/ops
+tensorflow/contrib/layers/python
+tensorflow/contrib/layers/python/layers
+tensorflow/contrib/layers/python/ops
+tensorflow/contrib/learn
+tensorflow/contrib/learn/python
+tensorflow/contrib/learn/python/learn
+tensorflow/contrib/learn/python/learn/dataframe
+tensorflow/contrib/learn/python/learn/dataframe/queues
+tensorflow/contrib/learn/python/learn/dataframe/transforms
+tensorflow/contrib/learn/python/learn/datasets
+tensorflow/contrib/learn/python/learn/datasets/data
+tensorflow/contrib/learn/python/learn/estimators
+tensorflow/contrib/learn/python/learn/learn_io
+tensorflow/contrib/learn/python/learn/ops
+tensorflow/contrib/learn/python/learn/preprocessing
+tensorflow/contrib/learn/python/learn/utils
+tensorflow/contrib/legacy_seq2seq
+tensorflow/contrib/legacy_seq2seq/python
+tensorflow/contrib/legacy_seq2seq/python/ops
+tensorflow/contrib/linalg
+tensorflow/contrib/linalg/python
+tensorflow/contrib/linalg/python/ops
+tensorflow/contrib/linear_optimizer
+tensorflow/contrib/linear_optimizer/kernels
+tensorflow/contrib/linear_optimizer/kernels/g3doc
+tensorflow/contrib/linear_optimizer/python
+tensorflow/contrib/linear_optimizer/python/ops
+tensorflow/contrib/lookup
+tensorflow/contrib/losses
+tensorflow/contrib/losses/python
+tensorflow/contrib/losses/python/losses
+tensorflow/contrib/losses/python/metric_learning
+tensorflow/contrib/makefile
+tensorflow/contrib/memory_stats
+tensorflow/contrib/memory_stats/kernels
+tensorflow/contrib/memory_stats/ops
+tensorflow/contrib/memory_stats/python
+tensorflow/contrib/memory_stats/python/ops
+tensorflow/contrib/meta_graph_transform
+tensorflow/contrib/metrics
+tensorflow/contrib/metrics/ops
+tensorflow/contrib/metrics/python
+tensorflow/contrib/metrics/python/metrics
+tensorflow/contrib/metrics/python/ops
+tensorflow/contrib/model_pruning
+tensorflow/contrib/model_pruning/examples
+tensorflow/contrib/model_pruning/examples/cifar10
+tensorflow/contrib/model_pruning/python
+tensorflow/contrib/model_pruning/python/layers
+tensorflow/contrib/nccl
+tensorflow/contrib/nccl/kernels
+tensorflow/contrib/nccl/ops
+tensorflow/contrib/nccl/python
+tensorflow/contrib/nccl/python/ops
+tensorflow/contrib/ndlstm
+tensorflow/contrib/ndlstm/python
+tensorflow/contrib/nearest_neighbor/kernels
+tensorflow/contrib/nearest_neighbor/ops
+tensorflow/contrib/nearest_neighbor/python
+tensorflow/contrib/nearest_neighbor/python/ops
+tensorflow/contrib/nn
+tensorflow/contrib/nn/python
+tensorflow/contrib/nn/python/ops
+tensorflow/contrib/opt
+tensorflow/contrib/opt/python
+tensorflow/contrib/opt/python/training
+tensorflow/contrib/pi_examples
+tensorflow/contrib/pi_examples/camera
+tensorflow/contrib/pi_examples/label_image
+tensorflow/contrib/pi_examples/label_image/data
+tensorflow/contrib/periodic_resample
+tensorflow/contrib/periodic_resample/python
+tensorflow/contrib/periodic_resample/python/kernels
+tensorflow/contrib/periodic_resample/python/ops
+tensorflow/contrib/predictor
+tensorflow/contrib/quantization
+tensorflow/contrib/quantization/python
+tensorflow/contrib/quantize
+tensorflow/contrib/quantize/python
+tensorflow/contrib/receptive_field
+tensorflow/contrib/receptive_field/python
+tensorflow/contrib/reduce_slice_ops
+tensorflow/contrib/reduce_slice_ops/kernels
+tensorflow/contrib/reduce_slice_ops/ops
+tensorflow/contrib/reduce_slice_ops/python
+tensorflow/contrib/reduce_slice_ops/python/ops
+tensorflow/contrib/remote_fused_graph/pylib
+tensorflow/contrib/remote_fused_graph/pylib/python
+tensorflow/contrib/remote_fused_graph/pylib/python/ops
+tensorflow/contrib/resampler
+tensorflow/contrib/resampler/kernels
+tensorflow/contrib/resampler/ops
+tensorflow/contrib/resampler/python
+tensorflow/contrib/resampler/python/ops
+tensorflow/contrib/rnn
+tensorflow/contrib/rnn/kernels
+tensorflow/contrib/rnn/ops
+tensorflow/contrib/rnn/python
+tensorflow/contrib/rnn/python/kernel_tests
+tensorflow/contrib/rnn/python/ops
+tensorflow/contrib/saved_model
+tensorflow/contrib/saved_model/python
+tensorflow/contrib/saved_model/python/saved_model
+tensorflow/contrib/seq2seq
+tensorflow/contrib/seq2seq/kernels
+tensorflow/contrib/seq2seq/ops
+tensorflow/contrib/seq2seq/python
+tensorflow/contrib/seq2seq/python/ops
+tensorflow/contrib/session_bundle
+tensorflow/contrib/session_bundle/example
+tensorflow/contrib/signal
+tensorflow/contrib/signal/python
+tensorflow/contrib/signal/python/ops
+tensorflow/contrib/slim
+tensorflow/contrib/slim/python
+tensorflow/contrib/slim/python/slim
+tensorflow/contrib/slim/python/slim/data
+tensorflow/contrib/slim/python/slim/nets
+tensorflow/contrib/solvers
+tensorflow/contrib/solvers/python
+tensorflow/contrib/solvers/python/ops
+tensorflow/contrib/sparsemax
+tensorflow/contrib/sparsemax/python
+tensorflow/contrib/sparsemax/python/ops
+tensorflow/contrib/specs
+tensorflow/contrib/specs/python
+tensorflow/contrib/staging
+tensorflow/contrib/stat_summarizer
+tensorflow/contrib/stat_summarizer/python
+tensorflow/contrib/stateless
+tensorflow/contrib/stateless/python
+tensorflow/contrib/summary
+tensorflow/contrib/tensorboard
+tensorflow/contrib/tensorboard/plugins
+tensorflow/contrib/tensorboard/plugins/projector
+tensorflow/contrib/tensor_forest
+tensorflow/contrib/tensor_forest/client
+tensorflow/contrib/tensor_forest/core
+tensorflow/contrib/tensor_forest/core/ops
+tensorflow/contrib/tensor_forest/data
+tensorflow/contrib/tensor_forest/hybrid
+tensorflow/contrib/tensor_forest/hybrid/core
+tensorflow/contrib/tensor_forest/hybrid/core/ops
+tensorflow/contrib/tensor_forest/hybrid/ops
+tensorflow/contrib/tensor_forest/hybrid/python
+tensorflow/contrib/tensor_forest/hybrid/python/layers
+tensorflow/contrib/tensor_forest/hybrid/python/models
+tensorflow/contrib/tensor_forest/hybrid/python/ops
+tensorflow/contrib/tensor_forest/kernels
+tensorflow/contrib/tensor_forest/python
+tensorflow/contrib/tensor_forest/python/ops
+tensorflow/contrib/testing
+tensorflow/contrib/testing/python
+tensorflow/contrib/testing/python/framework
+tensorflow/contrib/text
+tensorflow/contrib/text/kernels
+tensorflow/contrib/text/ops
+tensorflow/contrib/text/python
+tensorflow/contrib/text/python/ops
+tensorflow/contrib/tfprof
+tensorflow/contrib/timeseries
+tensorflow/contrib/timeseries/examples
+tensorflow/contrib/timeseries/examples/data
+tensorflow/contrib/timeseries/python
+tensorflow/contrib/timeseries/python/timeseries
+tensorflow/contrib/timeseries/python/timeseries/state_space_models
+tensorflow/contrib/tpu
+tensorflow/contrib/tpu/ops
+tensorflow/contrib/tpu/profiler
+tensorflow/contrib/tpu/python
+tensorflow/contrib/tpu/python/ops
+tensorflow/contrib/tpu/python/profiler
+tensorflow/contrib/tpu/python/tpu
+tensorflow/contrib/training
+tensorflow/contrib/training/python
+tensorflow/contrib/training/python/training
+tensorflow/contrib/util
diff --git a/tensorflow/contrib/cmake/python_protos.txt b/tensorflow/contrib/cmake/python_protos.txt
new file mode 100644
index 0000000000..8a9c406d8b
--- /dev/null
+++ b/tensorflow/contrib/cmake/python_protos.txt
@@ -0,0 +1,19 @@
+tensorflow/core
+tensorflow/core/profiler
+tensorflow/python
+tensorflow/contrib/boosted_trees/proto
+tensorflow/contrib/cloud/kernels
+tensorflow/contrib/decision_trees/proto
+tensorflow/contrib/gdr
+tensorflow/contrib/lite/toco
+tensorflow/contrib/mpi
+tensorflow/contrib/mpi_collectives
+tensorflow/contrib/session_bundle
+tensorflow/contrib/tensor_forest/proto
+tensorflow/contrib/tensorboard/graph_explorer/proto
+tensorflow/contrib/tensorboard/plugins/projector
+tensorflow/contrib/tensorboard/plugins/trace
+tensorflow/contrib/tpu/proto
+tensorflow/contrib/tpu/profiler
+tensorflow/contrib/training/python/training
+tensorflow/contrib/verbs
diff --git a/tensorflow/contrib/cmake/python_protos_cc.txt b/tensorflow/contrib/cmake/python_protos_cc.txt
new file mode 100644
index 0000000000..d4a257b25c
--- /dev/null
+++ b/tensorflow/contrib/cmake/python_protos_cc.txt
@@ -0,0 +1,5 @@
+tensorflow/core/profiler
+tensorflow/python
+tensorflow/contrib/session_bundle
+tensorflow/contrib/tensorboard
+tensorflow/contrib/training
diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake
index 4a61ed7a35..e8c2cd3473 100644
--- a/tensorflow/contrib/cmake/tf_core_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_core_ops.cmake
@@ -92,6 +92,7 @@ GENERATE_CONTRIB_OP_LIBRARY(image_sirds "${tensorflow_source_dir}/tensorflow/con
 GENERATE_CONTRIB_OP_LIBRARY(layers_sparse_feature_cross "${tensorflow_source_dir}/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc")
 GENERATE_CONTRIB_OP_LIBRARY(memory_stats "${tensorflow_source_dir}/tensorflow/contrib/memory_stats/ops/memory_stats_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(nccl "${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(periodic_resample "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/ops/array_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(nearest_neighbor "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/ops/nearest_neighbor_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(resampler "${tensorflow_source_dir}/tensorflow/contrib/resampler/ops/resampler_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(rnn_gru "${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc")
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index d102b442e7..8db6929e31 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -120,33 +120,34 @@ function(RELATIVE_PROTOBUF_GENERATE_CPP SRCS HDRS ROOT_DIR)
   set(${HDRS} ${${HDRS}} PARENT_SCOPE)
 endfunction()
 
-file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir}
-    "${tensorflow_source_dir}/tensorflow/core/*.proto"
-    "${tensorflow_source_dir}/tensorflow/core/profiler/*.proto"
-    "${tensorflow_source_dir}/tensorflow/python/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/decision_trees/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tpu/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tpu/profiler/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/training/*.proto"
-)
+FILE(READ python_protos.txt python_protos)
+# Convert file contents into a CMake list (where each element in the list is one line of the file)
+STRING(REGEX REPLACE ";" "\\\\;" python_protos "${python_protos}")
+STRING(REGEX REPLACE "\n" ";" python_protos "${python_protos}")
+
+foreach(python_proto ${python_protos})
+  file(GLOB_RECURSE tf_python_protos_src RELATIVE ${tensorflow_source_dir}
+      "${tensorflow_source_dir}/${python_proto}/*.proto"
+  )
+  list(APPEND tf_python_protos_srcs ${tf_python_protos_src})
+endforeach(python_proto)
+
 RELATIVE_PROTOBUF_GENERATE_PYTHON(
-    ${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_protos_python_srcs}
+    ${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_python_protos_srcs}
 )
 
-# NOTE(mrry): Avoid regenerating the tensorflow/core protos because this
-# can cause benign-but-failing-on-Windows-due-to-file-locking conflicts
-# when two rules attempt to generate the same file.
-file(GLOB_RECURSE tf_python_protos_cc_srcs RELATIVE ${tensorflow_source_dir}
-    "${tensorflow_source_dir}/tensorflow/core/profiler/*.proto"
-    "${tensorflow_source_dir}/tensorflow/python/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/training/*.proto"
-)
+FILE(READ python_protos_cc.txt python_protos_cc)
+# Convert file contents into a CMake list (where each element in the list is one line of the file)
+STRING(REGEX REPLACE ";" "\\\\;" python_protos_cc "${python_protos_cc}")
+STRING(REGEX REPLACE "\n" ";" python_protos_cc "${python_protos_cc}")
+
+foreach(python_proto_cc ${python_protos_cc})
+  file(GLOB_RECURSE tf_python_protos_cc_src RELATIVE ${tensorflow_source_dir}
+      "${tensorflow_source_dir}/${python_proto_cc}/*.proto"
+  )
+  list(APPEND tf_python_protos_cc_srcs ${tf_python_protos_cc_src})
+endforeach(python_proto_cc)
+
 RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS
     ${tensorflow_source_dir} ${tf_python_protos_cc_srcs}
 )
@@ -192,315 +193,15 @@ function(add_python_module MODULE_NAME)
     endif()
 endfunction()
 
-add_python_module("tensorflow")
-add_python_module("tensorflow/core")
-add_python_module("tensorflow/core/example")
-add_python_module("tensorflow/core/framework")
-add_python_module("tensorflow/core/lib")
-add_python_module("tensorflow/core/lib/core")
-add_python_module("tensorflow/core/protobuf")
-add_python_module("tensorflow/core/util")
-add_python_module("tensorflow/examples")
-add_python_module("tensorflow/examples/tutorials")
-add_python_module("tensorflow/examples/tutorials/mnist")
-add_python_module("tensorflow/python")
-add_python_module("tensorflow/python/client")
-add_python_module("tensorflow/python/data")
-add_python_module("tensorflow/python/data/ops")
-add_python_module("tensorflow/python/data/util")
-add_python_module("tensorflow/python/debug")
-add_python_module("tensorflow/python/debug/cli")
-add_python_module("tensorflow/python/debug/examples")
-add_python_module("tensorflow/python/debug/lib")
-add_python_module("tensorflow/python/debug/wrappers")
-add_python_module("tensorflow/python/eager")
-add_python_module("tensorflow/python/estimator")
-add_python_module("tensorflow/python/estimator/canned")
-add_python_module("tensorflow/python/estimator/export")
-add_python_module("tensorflow/python/estimator/inputs")
-add_python_module("tensorflow/python/estimator/inputs/queues")
-add_python_module("tensorflow/python/feature_column")
-add_python_module("tensorflow/python/framework")
-add_python_module("tensorflow/python/grappler")
-add_python_module("tensorflow/python/keras")
-add_python_module("tensorflow/python/keras/activations")
-add_python_module("tensorflow/python/keras/applications")
-add_python_module("tensorflow/python/keras/applications/inception_resnet_v2")
-add_python_module("tensorflow/python/keras/applications/inception_v3")
-add_python_module("tensorflow/python/keras/applications/mobilenet")
-add_python_module("tensorflow/python/keras/applications/resnet50")
-add_python_module("tensorflow/python/keras/applications/vgg16")
-add_python_module("tensorflow/python/keras/applications/vgg19")
-add_python_module("tensorflow/python/keras/applications/xception")
-add_python_module("tensorflow/python/keras/backend")
-add_python_module("tensorflow/python/keras/callbacks")
-add_python_module("tensorflow/python/keras/constraints")
-add_python_module("tensorflow/python/keras/datasets")
-add_python_module("tensorflow/python/keras/datasets/boston_housing")
-add_python_module("tensorflow/python/keras/datasets/cifar10")
-add_python_module("tensorflow/python/keras/datasets/cifar100")
-add_python_module("tensorflow/python/keras/datasets/fashion_mnist")
-add_python_module("tensorflow/python/keras/datasets/imdb")
-add_python_module("tensorflow/python/keras/datasets/mnist")
-add_python_module("tensorflow/python/keras/datasets/reuters")
-add_python_module("tensorflow/python/keras/estimator")
-add_python_module("tensorflow/python/keras/initializers")
-add_python_module("tensorflow/python/keras/layers")
-add_python_module("tensorflow/python/keras/losses")
-add_python_module("tensorflow/python/keras/metrics")
-add_python_module("tensorflow/python/keras/models")
-add_python_module("tensorflow/python/keras/optimizers")
-add_python_module("tensorflow/python/keras/preprocessing")
-add_python_module("tensorflow/python/keras/preprocessing/image")
-add_python_module("tensorflow/python/keras/preprocessing/sequence")
-add_python_module("tensorflow/python/keras/preprocessing/text")
-add_python_module("tensorflow/python/keras/regularizers")
-add_python_module("tensorflow/python/keras/utils")
-add_python_module("tensorflow/python/keras/wrappers")
-add_python_module("tensorflow/python/keras/wrappers/scikit_learn")
-add_python_module("tensorflow/python/keras/_impl")
-add_python_module("tensorflow/python/keras/_impl/keras")
-add_python_module("tensorflow/python/keras/_impl/keras/applications")
-add_python_module("tensorflow/python/keras/_impl/keras/datasets")
-add_python_module("tensorflow/python/keras/_impl/keras/engine")
-add_python_module("tensorflow/python/keras/_impl/keras/layers")
-add_python_module("tensorflow/python/keras/_impl/keras/preprocessing")
-add_python_module("tensorflow/python/keras/_impl/keras/utils")
-add_python_module("tensorflow/python/keras/_impl/keras/wrappers")
-add_python_module("tensorflow/python/kernel_tests")
-add_python_module("tensorflow/python/kernel_tests/distributions")
-add_python_module("tensorflow/python/kernel_tests/linalg")
-add_python_module("tensorflow/python/layers")
-add_python_module("tensorflow/python/lib")
-add_python_module("tensorflow/python/lib/core")
-add_python_module("tensorflow/python/lib/io")
-add_python_module("tensorflow/python/ops")
-add_python_module("tensorflow/python/ops/distributions")
-add_python_module("tensorflow/python/ops/linalg")
-add_python_module("tensorflow/python/ops/losses")
-add_python_module("tensorflow/python/platform")
-add_python_module("tensorflow/python/platform/default")
-add_python_module("tensorflow/python/platform/summary")
-add_python_module("tensorflow/python/profiler/")
-add_python_module("tensorflow/python/profiler/internal")
-add_python_module("tensorflow/python/saved_model")
-add_python_module("tensorflow/python/summary")
-add_python_module("tensorflow/python/summary/writer")
-add_python_module("tensorflow/python/tools")
-add_python_module("tensorflow/python/training")
-add_python_module("tensorflow/python/user_ops")
-add_python_module("tensorflow/python/util")
-add_python_module("tensorflow/python/util/protobuf")
-add_python_module("tensorflow/tools")
-add_python_module("tensorflow/tools/graph_transforms")
-add_python_module("tensorflow/contrib")
-add_python_module("tensorflow/contrib/all_reduce")
-add_python_module("tensorflow/contrib/all_reduce/python")
-add_python_module("tensorflow/contrib/android")
-add_python_module("tensorflow/contrib/android/java")
-add_python_module("tensorflow/contrib/android/java/org")
-add_python_module("tensorflow/contrib/android/java/org/tensorflow")
-add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib")
-add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib/android")
-add_python_module("tensorflow/contrib/android/jni")
-add_python_module("tensorflow/contrib/bayesflow")
-add_python_module("tensorflow/contrib/bayesflow/examples")
-add_python_module("tensorflow/contrib/bayesflow/examples/reinforce_simple")
-add_python_module("tensorflow/contrib/bayesflow/python")
-add_python_module("tensorflow/contrib/bayesflow/python/kernel_tests")
-add_python_module("tensorflow/contrib/bayesflow/python/ops")
-add_python_module("tensorflow/contrib/boosted_trees")
-add_python_module("tensorflow/contrib/boosted_trees/estimator_batch")
-add_python_module("tensorflow/contrib/boosted_trees/ops")
-add_python_module("tensorflow/contrib/boosted_trees/proto")
-add_python_module("tensorflow/contrib/boosted_trees/python")
-add_python_module("tensorflow/contrib/boosted_trees/python/kernel_tests")
-add_python_module("tensorflow/contrib/boosted_trees/python/ops")
-add_python_module("tensorflow/contrib/cloud")
-add_python_module("tensorflow/contrib/cloud/kernels")
-add_python_module("tensorflow/contrib/cloud/ops")
-add_python_module("tensorflow/contrib/cloud/python")
-add_python_module("tensorflow/contrib/cloud/python/ops")
-add_python_module("tensorflow/contrib/cluster_resolver")
-add_python_module("tensorflow/contrib/cluster_resolver/python")
-add_python_module("tensorflow/contrib/cluster_resolver/python/training")
-add_python_module("tensorflow/contrib/compiler")
-add_python_module("tensorflow/contrib/copy_graph")
-add_python_module("tensorflow/contrib/copy_graph/python")
-add_python_module("tensorflow/contrib/copy_graph/python/util")
-add_python_module("tensorflow/contrib/crf")
-add_python_module("tensorflow/contrib/crf/python")
-add_python_module("tensorflow/contrib/crf/python/kernel_tests")
-add_python_module("tensorflow/contrib/crf/python/ops")
-add_python_module("tensorflow/contrib/cudnn_rnn")
-add_python_module("tensorflow/contrib/cudnn_rnn/kernels")
-add_python_module("tensorflow/contrib/cudnn_rnn/ops")
-add_python_module("tensorflow/contrib/cudnn_rnn/python")
-add_python_module("tensorflow/contrib/cudnn_rnn/python/kernel_tests")
-add_python_module("tensorflow/contrib/cudnn_rnn/python/layers")
-add_python_module("tensorflow/contrib/cudnn_rnn/python/ops")
-add_python_module("tensorflow/contrib/data")
-add_python_module("tensorflow/contrib/data/python")
-add_python_module("tensorflow/contrib/data/python/kernel_tests")
-add_python_module("tensorflow/contrib/data/python/ops")
-add_python_module("tensorflow/contrib/decision_trees")
-add_python_module("tensorflow/contrib/decision_trees/proto")
-add_python_module("tensorflow/contrib/deprecated")
-add_python_module("tensorflow/contrib/distributions")
-add_python_module("tensorflow/contrib/distributions/python")
-add_python_module("tensorflow/contrib/distributions/python/kernel_tests")
-add_python_module("tensorflow/contrib/distributions/python/ops")
-add_python_module("tensorflow/contrib/distributions/python/ops/bijectors")
-add_python_module("tensorflow/contrib/eager")
-add_python_module("tensorflow/contrib/eager/python")
-add_python_module("tensorflow/contrib/estimator")
-add_python_module("tensorflow/contrib/estimator/python")
-add_python_module("tensorflow/contrib/estimator/python/estimator")
-add_python_module("tensorflow/contrib/factorization")
-add_python_module("tensorflow/contrib/factorization/examples")
-add_python_module("tensorflow/contrib/factorization/kernels")
-add_python_module("tensorflow/contrib/factorization/ops")
-add_python_module("tensorflow/contrib/factorization/python")
-add_python_module("tensorflow/contrib/factorization/python/kernel_tests")
-add_python_module("tensorflow/contrib/factorization/python/ops")
-add_python_module("tensorflow/contrib/ffmpeg")
-add_python_module("tensorflow/contrib/ffmpeg/default")
-add_python_module("tensorflow/contrib/ffmpeg/testdata")
-add_python_module("tensorflow/contrib/framework")
-add_python_module("tensorflow/contrib/framework/kernels")
-add_python_module("tensorflow/contrib/framework/ops")
-add_python_module("tensorflow/contrib/framework/python")
-add_python_module("tensorflow/contrib/framework/python/framework")
-add_python_module("tensorflow/contrib/framework/python/ops")
-add_python_module("tensorflow/contrib/gan")
-add_python_module("tensorflow/contrib/gan/python")
-add_python_module("tensorflow/contrib/gan/python/eval")
-add_python_module("tensorflow/contrib/gan/python/eval/python")
-add_python_module("tensorflow/contrib/gan/python/features")
-add_python_module("tensorflow/contrib/gan/python/features/python")
-add_python_module("tensorflow/contrib/gan/python/estimator")
-add_python_module("tensorflow/contrib/gan/python/estimator/python")
-add_python_module("tensorflow/contrib/gan/python/losses")
-add_python_module("tensorflow/contrib/gan/python/losses/python")
-add_python_module("tensorflow/contrib/graph_editor")
-add_python_module("tensorflow/contrib/graph_editor/examples")
-add_python_module("tensorflow/contrib/graph_editor/tests")
-add_python_module("tensorflow/contrib/grid_rnn")
-add_python_module("tensorflow/contrib/grid_rnn/python")
-add_python_module("tensorflow/contrib/grid_rnn/python/kernel_tests")
-add_python_module("tensorflow/contrib/grid_rnn/python/ops")
-add_python_module("tensorflow/contrib/hooks")
-add_python_module("tensorflow/contrib/image")
-add_python_module("tensorflow/contrib/image/ops")
-add_python_module("tensorflow/contrib/image/python")
-add_python_module("tensorflow/contrib/image/python/ops")
-add_python_module("tensorflow/contrib/input_pipeline")
-add_python_module("tensorflow/contrib/input_pipeline/ops")
-add_python_module("tensorflow/contrib/input_pipeline/python")
-add_python_module("tensorflow/contrib/input_pipeline/python/ops")
-add_python_module("tensorflow/contrib/integrate")
-add_python_module("tensorflow/contrib/integrate/python")
-add_python_module("tensorflow/contrib/integrate/python/ops")
-add_python_module("tensorflow/contrib/ios_examples")
-add_python_module("tensorflow/contrib/ios_examples/benchmark")
-add_python_module("tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj")
-add_python_module("tensorflow/contrib/ios_examples/benchmark/data")
-add_python_module("tensorflow/contrib/ios_examples/camera")
-add_python_module("tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj")
-add_python_module("tensorflow/contrib/ios_examples/camera/en.lproj")
-add_python_module("tensorflow/contrib/ios_examples/simple")
-add_python_module("tensorflow/contrib/ios_examples/simple/data")
-add_python_module("tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj")
-add_python_module("tensorflow/contrib/keras")
-add_python_module("tensorflow/contrib/keras/api")
-add_python_module("tensorflow/contrib/keras/api/keras")
-add_python_module("tensorflow/contrib/keras/api/keras/activations")
-add_python_module("tensorflow/contrib/keras/api/keras/applications")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/inception_v3")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/mobilenet")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/resnet50")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/vgg16")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/vgg19")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/xception")
-add_python_module("tensorflow/contrib/keras/api/keras/backend")
-add_python_module("tensorflow/contrib/keras/api/keras/callbacks")
-add_python_module("tensorflow/contrib/keras/api/keras/constraints")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/boston_housing")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/cifar10")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/cifar100")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/imdb")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/mnist")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/reuters")
-add_python_module("tensorflow/contrib/keras/api/keras/initializers")
-add_python_module("tensorflow/contrib/keras/api/keras/layers")
-add_python_module("tensorflow/contrib/keras/api/keras/losses")
-add_python_module("tensorflow/contrib/keras/api/keras/metrics")
-add_python_module("tensorflow/contrib/keras/api/keras/models")
-add_python_module("tensorflow/contrib/keras/api/keras/optimizers")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing/image")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing/sequence")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing/text")
-add_python_module("tensorflow/contrib/keras/api/keras/regularizers")
-add_python_module("tensorflow/contrib/keras/api/keras/utils")
-add_python_module("tensorflow/contrib/keras/api/keras/wrappers")
-add_python_module("tensorflow/contrib/keras/api/keras/wrappers/scikit_learn")
-add_python_module("tensorflow/contrib/keras/python")
-add_python_module("tensorflow/contrib/keras/python/keras")
-add_python_module("tensorflow/contrib/keras/python/keras/applications")
-add_python_module("tensorflow/contrib/keras/python/keras/datasets")
-add_python_module("tensorflow/contrib/keras/python/keras/engine")
-add_python_module("tensorflow/contrib/keras/python/keras/layers")
-add_python_module("tensorflow/contrib/keras/python/keras/preprocessing")
-add_python_module("tensorflow/contrib/keras/python/keras/utils")
-add_python_module("tensorflow/contrib/keras/python/keras/wrappers")
-add_python_module("tensorflow/contrib/kernel_methods")
-add_python_module("tensorflow/contrib/kernel_methods/python")
-add_python_module("tensorflow/contrib/kernel_methods/python/mappers")
-add_python_module("tensorflow/contrib/kfac")
-add_python_module("tensorflow/contrib/kfac/examples")
-add_python_module("tensorflow/contrib/kfac/python")
-add_python_module("tensorflow/contrib/kfac/python/ops")
-add_python_module("tensorflow/contrib/labeled_tensor")
-add_python_module("tensorflow/contrib/labeled_tensor/python")
-add_python_module("tensorflow/contrib/labeled_tensor/python/ops")
-add_python_module("tensorflow/contrib/layers")
-add_python_module("tensorflow/contrib/layers/kernels")
-add_python_module("tensorflow/contrib/layers/ops")
-add_python_module("tensorflow/contrib/layers/python")
-add_python_module("tensorflow/contrib/layers/python/kernel_tests")
-add_python_module("tensorflow/contrib/layers/python/layers")
-add_python_module("tensorflow/contrib/layers/python/ops")
-add_python_module("tensorflow/contrib/learn")
-add_python_module("tensorflow/contrib/learn/python")
-add_python_module("tensorflow/contrib/learn/python/learn")
-add_python_module("tensorflow/contrib/learn/python/learn/dataframe")
-add_python_module("tensorflow/contrib/learn/python/learn/dataframe/queues")
-add_python_module("tensorflow/contrib/learn/python/learn/dataframe/transforms")
-add_python_module("tensorflow/contrib/learn/python/learn/datasets")
-add_python_module("tensorflow/contrib/learn/python/learn/datasets/data")
-add_python_module("tensorflow/contrib/learn/python/learn/estimators")
-add_python_module("tensorflow/contrib/learn/python/learn/learn_io")
-add_python_module("tensorflow/contrib/learn/python/learn/ops")
-add_python_module("tensorflow/contrib/learn/python/learn/preprocessing")
-add_python_module("tensorflow/contrib/learn/python/learn/preprocessing/tests")
-add_python_module("tensorflow/contrib/learn/python/learn/tests")
-add_python_module("tensorflow/contrib/learn/python/learn/tests/dataframe")
-add_python_module("tensorflow/contrib/learn/python/learn/utils")
-add_python_module("tensorflow/contrib/legacy_seq2seq")
-add_python_module("tensorflow/contrib/legacy_seq2seq/python")
-add_python_module("tensorflow/contrib/legacy_seq2seq/python/ops")
-add_python_module("tensorflow/contrib/linalg")
-add_python_module("tensorflow/contrib/linalg/python")
-add_python_module("tensorflow/contrib/linalg/python/ops")
-add_python_module("tensorflow/contrib/linalg/python/kernel_tests")
-add_python_module("tensorflow/contrib/linear_optimizer")
-add_python_module("tensorflow/contrib/linear_optimizer/kernels")
-add_python_module("tensorflow/contrib/linear_optimizer/kernels/g3doc")
-add_python_module("tensorflow/contrib/linear_optimizer/python")
-add_python_module("tensorflow/contrib/linear_optimizer/python/kernel_tests")
-add_python_module("tensorflow/contrib/linear_optimizer/python/ops")
+FILE(READ python_modules.txt python_modules)
+# Convert file contents into a CMake list (where each element in the list is one line of the file)
+STRING(REGEX REPLACE ";" "\\\\;" python_modules "${python_modules}")
+STRING(REGEX REPLACE "\n" ";" python_modules "${python_modules}")
+
+foreach(python_module ${python_modules})
+  add_python_module(${python_module})
+endforeach(python_module)
+
 add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
     COMMAND ${CMAKE_COMMAND} -E make_directory
     "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/lite")
@@ -514,157 +215,6 @@ add_custom_command(
     TARGET tf_python_copy_scripts_to_destination PRE_BUILD
     COMMAND ${CMAKE_COMMAND} -E touch
     ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/lite/python/lite.py)
-add_python_module("tensorflow/contrib/lookup")
-add_python_module("tensorflow/contrib/losses")
-add_python_module("tensorflow/contrib/losses/python")
-add_python_module("tensorflow/contrib/losses/python/losses")
-add_python_module("tensorflow/contrib/losses/python/metric_learning")
-add_python_module("tensorflow/contrib/makefile")
-add_python_module("tensorflow/contrib/makefile/test")
-add_python_module("tensorflow/contrib/memory_stats")
-add_python_module("tensorflow/contrib/memory_stats/kernels")
-add_python_module("tensorflow/contrib/memory_stats/ops")
-add_python_module("tensorflow/contrib/memory_stats/python")
-add_python_module("tensorflow/contrib/memory_stats/python/kernel_tests")
-add_python_module("tensorflow/contrib/memory_stats/python/ops")
-add_python_module("tensorflow/contrib/meta_graph_transform")
-add_python_module("tensorflow/contrib/metrics")
-add_python_module("tensorflow/contrib/metrics/kernels")
-add_python_module("tensorflow/contrib/metrics/ops")
-add_python_module("tensorflow/contrib/metrics/python")
-add_python_module("tensorflow/contrib/metrics/python/kernel_tests")
-add_python_module("tensorflow/contrib/metrics/python/metrics")
-add_python_module("tensorflow/contrib/metrics/python/ops")
-add_python_module("tensorflow/contrib/model_pruning")
-add_python_module("tensorflow/contrib/model_pruning/examples")
-add_python_module("tensorflow/contrib/model_pruning/examples/cifar10")
-add_python_module("tensorflow/contrib/model_pruning/python")
-add_python_module("tensorflow/contrib/model_pruning/python/layers")
-add_python_module("tensorflow/contrib/ndlstm")
-add_python_module("tensorflow/contrib/ndlstm/python")
-add_python_module("tensorflow/contrib/nn")
-add_python_module("tensorflow/contrib/nn/python")
-add_python_module("tensorflow/contrib/nn/python/ops")
-add_python_module("tensorflow/contrib/nccl")
-add_python_module("tensorflow/contrib/nccl/kernels")
-add_python_module("tensorflow/contrib/nccl/ops")
-add_python_module("tensorflow/contrib/nccl/python")
-add_python_module("tensorflow/contrib/nccl/python/ops")
-add_python_module("tensorflow/contrib/nearest_neighbor/kernels")
-add_python_module("tensorflow/contrib/nearest_neighbor/ops")
-add_python_module("tensorflow/contrib/nearest_neighbor/python")
-add_python_module("tensorflow/contrib/nearest_neighbor/python/kernel_tests")
-add_python_module("tensorflow/contrib/nearest_neighbor/python/ops")
-add_python_module("tensorflow/contrib/opt")
-add_python_module("tensorflow/contrib/opt/python")
-add_python_module("tensorflow/contrib/opt/python/training")
-add_python_module("tensorflow/contrib/pi_examples")
-add_python_module("tensorflow/contrib/pi_examples/camera")
-add_python_module("tensorflow/contrib/pi_examples/label_image")
-add_python_module("tensorflow/contrib/pi_examples/label_image/data")
-add_python_module("tensorflow/contrib/predictor")
-add_python_module("tensorflow/contrib/quantization")
-add_python_module("tensorflow/contrib/quantization/python")
-add_python_module("tensorflow/contrib/quantize")
-add_python_module("tensorflow/contrib/quantize/python")
-add_python_module("tensorflow/contrib/remote_fused_graph/pylib")
-add_python_module("tensorflow/contrib/remote_fused_graph/pylib/python")
-add_python_module("tensorflow/contrib/remote_fused_graph/pylib/python/ops")
-add_python_module("tensorflow/contrib/resampler")
-add_python_module("tensorflow/contrib/resampler/kernels")
-add_python_module("tensorflow/contrib/resampler/ops")
-add_python_module("tensorflow/contrib/resampler/python")
-add_python_module("tensorflow/contrib/resampler/python/ops")
-add_python_module("tensorflow/contrib/rnn")
-add_python_module("tensorflow/contrib/rnn/kernels")
-add_python_module("tensorflow/contrib/rnn/ops")
-add_python_module("tensorflow/contrib/rnn/python")
-add_python_module("tensorflow/contrib/rnn/python/kernel_tests")
-add_python_module("tensorflow/contrib/rnn/python/ops")
-add_python_module("tensorflow/contrib/saved_model")
-add_python_module("tensorflow/contrib/saved_model/python")
-add_python_module("tensorflow/contrib/saved_model/python/saved_model")
-add_python_module("tensorflow/contrib/seq2seq")
-add_python_module("tensorflow/contrib/seq2seq/kernels")
-add_python_module("tensorflow/contrib/seq2seq/ops")
-add_python_module("tensorflow/contrib/seq2seq/python")
-add_python_module("tensorflow/contrib/seq2seq/python/kernel_tests")
-add_python_module("tensorflow/contrib/seq2seq/python/ops")
-add_python_module("tensorflow/contrib/session_bundle")
-add_python_module("tensorflow/contrib/session_bundle/example")
-add_python_module("tensorflow/contrib/session_bundle/testdata")
-add_python_module("tensorflow/contrib/signal")
-add_python_module("tensorflow/contrib/signal/python")
-add_python_module("tensorflow/contrib/signal/python/ops")
-add_python_module("tensorflow/contrib/slim")
-add_python_module("tensorflow/contrib/slim/python")
-add_python_module("tensorflow/contrib/slim/python/slim")
-add_python_module("tensorflow/contrib/slim/python/slim/data")
-add_python_module("tensorflow/contrib/slim/python/slim/nets")
-add_python_module("tensorflow/contrib/solvers")
-add_python_module("tensorflow/contrib/solvers/python")
-add_python_module("tensorflow/contrib/solvers/python/ops")
-add_python_module("tensorflow/contrib/sparsemax")
-add_python_module("tensorflow/contrib/sparsemax/python")
-add_python_module("tensorflow/contrib/sparsemax/python/ops")
-add_python_module("tensorflow/contrib/specs")
-add_python_module("tensorflow/contrib/specs/python")
-add_python_module("tensorflow/contrib/staging")
-add_python_module("tensorflow/contrib/stat_summarizer")
-add_python_module("tensorflow/contrib/stateless")
-add_python_module("tensorflow/contrib/tensorboard")
-add_python_module("tensorflow/contrib/tensorboard/plugins")
-add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
-add_python_module("tensorflow/contrib/tensor_forest")
-add_python_module("tensorflow/contrib/tensor_forest/client")
-add_python_module("tensorflow/contrib/tensor_forest/core")
-add_python_module("tensorflow/contrib/tensor_forest/core/ops")
-add_python_module("tensorflow/contrib/tensor_forest/data")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/core")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/core/ops")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/ops")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/kernel_tests")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/layers")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/models")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/ops")
-add_python_module("tensorflow/contrib/tensor_forest/python")
-add_python_module("tensorflow/contrib/tensor_forest/python/kernel_tests")
-add_python_module("tensorflow/contrib/tensor_forest/python/ops")
-add_python_module("tensorflow/contrib/testing")
-add_python_module("tensorflow/contrib/testing/python")
-add_python_module("tensorflow/contrib/testing/python/framework")
-add_python_module("tensorflow/contrib/text")
-add_python_module("tensorflow/contrib/text/kernels")
-add_python_module("tensorflow/contrib/text/ops")
-add_python_module("tensorflow/contrib/text/python")
-add_python_module("tensorflow/contrib/text/python/ops")
-add_python_module("tensorflow/contrib/tfprof")
-add_python_module("tensorflow/contrib/timeseries")
-add_python_module("tensorflow/contrib/timeseries/examples")
-add_python_module("tensorflow/contrib/timeseries/examples/data")
-add_python_module("tensorflow/contrib/timeseries/python")
-add_python_module("tensorflow/contrib/timeseries/python/timeseries")
-add_python_module("tensorflow/contrib/timeseries/python/timeseries/state_space_models")
-add_python_module("tensorflow/contrib/tpu")
-add_python_module("tensorflow/contrib/tpu/ops")
-add_python_module("tensorflow/contrib/tpu/profiler")
-add_python_module("tensorflow/contrib/tpu/python")
-add_python_module("tensorflow/contrib/tpu/python/ops")
-add_python_module("tensorflow/contrib/tpu/python/profiler")
-add_python_module("tensorflow/contrib/tpu/python/tpu")
-add_python_module("tensorflow/contrib/training")
-add_python_module("tensorflow/contrib/training/python")
-add_python_module("tensorflow/contrib/training/python/training")
-add_python_module("tensorflow/contrib/util")
-add_python_module("tensorflow/contrib/reduce_slice_ops")
-add_python_module("tensorflow/contrib/reduce_slice_ops/kernels")
-add_python_module("tensorflow/contrib/reduce_slice_ops/ops")
-add_python_module("tensorflow/contrib/reduce_slice_ops/python")
-add_python_module("tensorflow/contrib/reduce_slice_ops/python/kernel_tests")
-add_python_module("tensorflow/contrib/reduce_slice_ops/python/ops")
-add_python_module("tensorflow/contrib/summary")
 
 # Generate the tensorflow.python.platform.build_info module.
 set(BUILD_INFO_PY "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/platform/build_info.py")
@@ -817,6 +367,9 @@ GENERATE_PYTHON_OP_LIB("contrib_memory_stats_ops"
   DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/memory_stats/ops/gen_memory_stats_ops.py)
 GENERATE_PYTHON_OP_LIB("contrib_nccl_ops"
   DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/nccl/ops/gen_nccl_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_periodic_resample_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/periodic_resample/python/ops/gen_periodic_resample_op.py)
+
 GENERATE_PYTHON_OP_LIB("contrib_nearest_neighbor_ops"
   DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/nearest_neighbor/ops/gen_nearest_neighbor_ops.py)
 GENERATE_PYTHON_OP_LIB("contrib_resampler_ops"
@@ -1019,6 +572,20 @@ target_link_libraries(pywrap_tensorflow_internal PRIVATE
 )
 
 if(WIN32)
+
+    # include contrib/periodic_resample as .so
+    #
+    set(tf_periodic_resample_srcs
+       "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc"
+       "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h"
+       "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/ops/array_ops.cc"
+    )
+
+    AddUserOps(TARGET _periodic_resample_op
+        SOURCES "${tf_periodic_resample_srcs}"
+        DEPENDS pywrap_tensorflow_internal tf_python_ops
+        DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/periodic_resample/python/ops/)
+
     # include contrib/nearest_neighbor as .so
     #
     set(tf_nearest_neighbor_srcs
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 09e22285e1..2d58a48a49 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -154,6 +154,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     "${tensorflow_source_dir}/tensorflow/contrib/factorization/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/image/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/*_test.py"
+    "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/python/kernel_tests/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/python/kernel_tests/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/python/kernel_tests/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/stateless/python/kernel_tests/*_test.py"
@@ -224,6 +225,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       # Numerical issues, calculations off.
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/concat_op_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/wals_test.py"
+      "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py"
       "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py"
       "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/backend_test.py"
       "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py"
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 1e8a6b26c9..2cb6b7e76c 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -143,6 +143,7 @@ py_test(
     size = "small",
     srcs = ["filter_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
@@ -315,6 +316,7 @@ py_test(
     size = "small",
     srcs = ["prefetch_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/python:platform",
@@ -423,6 +425,7 @@ py_test(
     size = "medium",
     srcs = ["shuffle_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 145b9495ff..b2c641f8ab 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -204,6 +204,24 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "half_normal_test",
+    size = "medium",
+    srcs = ["python/kernel_tests/half_normal_test.py"],
+    additional_deps = [
+        ":distributions_py",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:nn_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
+    ],
+)
+
 cuda_py_test(
     name = "inverse_gamma_test",
     srcs = ["python/kernel_tests/inverse_gamma_test.py"],
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 0d12d83893..66827179e9 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -36,6 +36,7 @@ from tensorflow.contrib.distributions.python.ops.distribution_util import softpl
 from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag
 from tensorflow.contrib.distributions.python.ops.estimator import *
 from tensorflow.contrib.distributions.python.ops.geometric import *
+from tensorflow.contrib.distributions.python.ops.half_normal import *
 from tensorflow.contrib.distributions.python.ops.independent import *
 from tensorflow.contrib.distributions.python.ops.inverse_gamma import *
 from tensorflow.contrib.distributions.python.ops.logistic import *
@@ -107,6 +108,7 @@ _allowed_symbols = [
     'Gamma',
     'GammaWithSoftplusConcentrationRate',
     'Geometric',
+    'HalfNormal',
     'Independent',
     'InverseGamma',
     'InverseGammaWithSoftplusConcentrationRate',
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py b/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py
new file mode 100644
index 0000000000..a4e7566008
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py
@@ -0,0 +1,320 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for initializers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import importlib
+import numpy as np
+
+from tensorflow.contrib.distributions.python.ops import half_normal as hn_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+
+
+def try_import(name):  # pylint: disable=invalid-name
+  module = None
+  try:
+    module = importlib.import_module(name)
+  except ImportError as e:
+    tf_logging.warning("Could not import %s: %s" % (name, str(e)))
+  return module
+
+stats = try_import("scipy.stats")
+
+
+class HalfNormalTest(test.TestCase):
+
+  def setUp(self):
+    self._rng = np.random.RandomState(123)
+
+  def assertAllFinite(self, tensor):
+    is_finite = np.isfinite(tensor.eval())
+    all_true = np.ones_like(is_finite, dtype=np.bool)
+    self.assertAllEqual(all_true, is_finite)
+
+  def _testParamShapes(self, sample_shape, expected):
+    with self.test_session():
+      param_shapes = hn_lib.HalfNormal.param_shapes(sample_shape)
+      scale_shape = param_shapes["scale"]
+      self.assertAllEqual(expected, scale_shape.eval())
+      scale = array_ops.ones(scale_shape)
+      self.assertAllEqual(
+          expected,
+          array_ops.shape(hn_lib.HalfNormal(scale).sample()).eval())
+
+  def _testParamStaticShapes(self, sample_shape, expected):
+    param_shapes = hn_lib.HalfNormal.param_static_shapes(sample_shape)
+    scale_shape = param_shapes["scale"]
+    self.assertEqual(expected, scale_shape)
+
+  def _testBatchShapes(self, dist, tensor):
+    self.assertAllEqual(dist.batch_shape_tensor().eval(), tensor.shape)
+    self.assertAllEqual(dist.batch_shape_tensor().eval(), tensor.eval().shape)
+    self.assertAllEqual(dist.batch_shape, tensor.shape)
+    self.assertAllEqual(dist.batch_shape, tensor.eval().shape)
+
+  def testParamShapes(self):
+    sample_shape = [10, 3, 4]
+    self._testParamShapes(sample_shape, sample_shape)
+    self._testParamShapes(constant_op.constant(sample_shape), sample_shape)
+
+  def testParamStaticShapes(self):
+    sample_shape = [10, 3, 4]
+    self._testParamStaticShapes(sample_shape, sample_shape)
+    self._testParamStaticShapes(
+        tensor_shape.TensorShape(sample_shape), sample_shape)
+
+  def testHalfNormalLogPDF(self):
+    with self.test_session():
+      batch_size = 6
+      scale = constant_op.constant([3.0] * batch_size)
+      x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      log_pdf = halfnorm.log_prob(x)
+      self._testBatchShapes(halfnorm, log_pdf)
+
+      pdf = halfnorm.prob(x)
+      self._testBatchShapes(halfnorm, pdf)
+
+      if not stats:
+        return
+      expected_log_pdf = stats.halfnorm(scale=scale.eval()).logpdf(x)
+      self.assertAllClose(expected_log_pdf, log_pdf.eval())
+      self.assertAllClose(np.exp(expected_log_pdf), pdf.eval())
+
+  def testHalfNormalLogPDFMultidimensional(self):
+    with self.test_session():
+      batch_size = 6
+      scale = constant_op.constant([[3.0, 1.0]] * batch_size)
+      x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      log_pdf = halfnorm.log_prob(x)
+      self._testBatchShapes(halfnorm, log_pdf)
+
+      pdf = halfnorm.prob(x)
+      self._testBatchShapes(halfnorm, pdf)
+
+      if not stats:
+        return
+      expected_log_pdf = stats.halfnorm(scale=scale.eval()).logpdf(x)
+      self.assertAllClose(expected_log_pdf, log_pdf.eval())
+      self.assertAllClose(np.exp(expected_log_pdf), pdf.eval())
+
+  def testHalfNormalCDF(self):
+    with self.test_session():
+      batch_size = 50
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      cdf = halfnorm.cdf(x)
+      self._testBatchShapes(halfnorm, cdf)
+
+      log_cdf = halfnorm.log_cdf(x)
+      self._testBatchShapes(halfnorm, log_cdf)
+
+      if not stats:
+        return
+      expected_logcdf = stats.halfnorm(scale=scale).logcdf(x)
+      self.assertAllClose(expected_logcdf, log_cdf.eval(), atol=0)
+      self.assertAllClose(np.exp(expected_logcdf), cdf.eval(), atol=0)
+
+  def testHalfNormalSurvivalFunction(self):
+    with self.test_session():
+      batch_size = 50
+      scale = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      sf = halfnorm.survival_function(x)
+      self._testBatchShapes(halfnorm, sf)
+
+      log_sf = halfnorm.log_survival_function(x)
+      self._testBatchShapes(halfnorm, log_sf)
+
+      if not stats:
+        return
+      expected_logsf = stats.halfnorm(scale=scale).logsf(x)
+      self.assertAllClose(expected_logsf, log_sf.eval(), atol=0)
+      self.assertAllClose(np.exp(expected_logsf), sf.eval(), atol=0)
+
+  def testHalfNormalQuantile(self):
+    with self.test_session():
+      batch_size = 50
+      scale = self._rng.rand(batch_size) + 1.0
+      p = np.linspace(0., 1.0, batch_size).astype(np.float64)
+
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+      x = halfnorm.quantile(p)
+      self._testBatchShapes(halfnorm, x)
+
+      if not stats:
+        return
+      expected_x = stats.halfnorm(scale=scale).ppf(p)
+      self.assertAllClose(expected_x, x.eval(), atol=0)
+
+  def testFiniteGradients(self):
+    for dtype in [np.float32, np.float64]:
+      g = ops.Graph()
+      with g.as_default():
+        scale = variables.Variable(dtype(3.0))
+        dist = hn_lib.HalfNormal(scale=scale)
+        x = np.array([0.01, 0.1, 1., 5., 10.]).astype(dtype)
+        for func in [
+            dist.cdf, dist.log_cdf, dist.survival_function,
+            dist.log_prob, dist.prob, dist.log_survival_function,
+        ]:
+          print(func.__name__)
+          value = func(x)
+          grads = gradients_impl.gradients(value, [scale])
+          with self.test_session(graph=g):
+            variables.global_variables_initializer().run()
+            self.assertAllFinite(value)
+            self.assertAllFinite(grads[0])
+
+  def testHalfNormalEntropy(self):
+    with self.test_session():
+      scale = np.array([[1.0, 2.0, 3.0]])
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      # See https://en.wikipedia.org/wiki/Half-normal_distribution for the
+      # entropy formula used here.
+      expected_entropy = 0.5 * np.log(np.pi * scale ** 2.0 / 2.0) + 0.5
+
+      entropy = halfnorm.entropy()
+      self._testBatchShapes(halfnorm, entropy)
+      self.assertAllClose(expected_entropy, entropy.eval())
+
+  def testHalfNormalMeanAndMode(self):
+    with self.test_session():
+      scale = np.array([11., 12., 13.])
+
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+      expected_mean = scale * np.sqrt(2.0) / np.sqrt(np.pi)
+
+      self.assertAllEqual((3,), halfnorm.mean().eval().shape)
+      self.assertAllEqual(expected_mean, halfnorm.mean().eval())
+
+      self.assertAllEqual((3,), halfnorm.mode().eval().shape)
+      self.assertAllEqual([0., 0., 0.], halfnorm.mode().eval())
+
+  def testHalfNormalVariance(self):
+    with self.test_session():
+      scale = np.array([7., 7., 7.])
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+      expected_variance = scale ** 2.0 * (1.0 - 2.0 / np.pi)
+
+      self.assertAllEqual((3,), halfnorm.variance().eval().shape)
+      self.assertAllEqual(expected_variance, halfnorm.variance().eval())
+
+  def testHalfNormalStandardDeviation(self):
+    with self.test_session():
+      scale = np.array([7., 7., 7.])
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+      expected_variance = scale ** 2.0 * (1.0 - 2.0 / np.pi)
+
+      self.assertAllEqual((3,), halfnorm.stddev().shape)
+      self.assertAllEqual(np.sqrt(expected_variance), halfnorm.stddev().eval())
+
+  def testHalfNormalSample(self):
+    with self.test_session():
+      scale = constant_op.constant(3.0)
+      n = constant_op.constant(100000)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      sample = halfnorm.sample(n)
+
+      self.assertEqual(sample.eval().shape, (100000,))
+      self.assertAllClose(sample.eval().mean(),
+                          3.0 * np.sqrt(2.0) / np.sqrt(np.pi), atol=1e-1)
+
+      expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
+          tensor_shape.TensorShape(halfnorm.batch_shape_tensor().eval()))
+      self.assertAllEqual(expected_shape, sample.shape)
+      self.assertAllEqual(expected_shape, sample.eval().shape)
+
+      expected_shape_static = (tensor_shape.TensorShape(
+          [n.eval()]).concatenate(halfnorm.batch_shape))
+      self.assertAllEqual(expected_shape_static, sample.shape)
+      self.assertAllEqual(expected_shape_static, sample.eval().shape)
+
+  def testHalfNormalSampleMultiDimensional(self):
+    with self.test_session():
+      batch_size = 2
+      scale = constant_op.constant([[2.0, 3.0]] * batch_size)
+      n = constant_op.constant(100000)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      sample = halfnorm.sample(n)
+      self.assertEqual(sample.shape, (100000, batch_size, 2))
+      self.assertAllClose(sample.eval()[:, 0, 0].mean(),
+                          2.0 * np.sqrt(2.0) / np.sqrt(np.pi), atol=1e-1)
+      self.assertAllClose(sample.eval()[:, 0, 1].mean(),
+                          3.0 * np.sqrt(2.0) / np.sqrt(np.pi), atol=1e-1)
+
+      expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
+          tensor_shape.TensorShape(halfnorm.batch_shape_tensor().eval()))
+      self.assertAllEqual(expected_shape, sample.shape)
+      self.assertAllEqual(expected_shape, sample.eval().shape)
+
+      expected_shape_static = (tensor_shape.TensorShape(
+          [n.eval()]).concatenate(halfnorm.batch_shape))
+      self.assertAllEqual(expected_shape_static, sample.shape)
+      self.assertAllEqual(expected_shape_static, sample.eval().shape)
+
+  def testNegativeSigmaFails(self):
+    with self.test_session():
+      halfnorm = hn_lib.HalfNormal(scale=[-5.], validate_args=True, name="G")
+      with self.assertRaisesOpError("Condition x > 0 did not hold"):
+        halfnorm.mean().eval()
+
+  def testHalfNormalShape(self):
+    with self.test_session():
+      scale = constant_op.constant([6.0] * 5)
+      halfnorm = hn_lib.HalfNormal(scale=scale)
+
+      self.assertEqual(halfnorm.batch_shape_tensor().eval(), [5])
+      self.assertEqual(halfnorm.batch_shape, tensor_shape.TensorShape([5]))
+      self.assertAllEqual(halfnorm.event_shape_tensor().eval(), [])
+      self.assertEqual(halfnorm.event_shape, tensor_shape.TensorShape([]))
+
+  def testHalfNormalShapeWithPlaceholders(self):
+    scale = array_ops.placeholder(dtype=dtypes.float32)
+    halfnorm = hn_lib.HalfNormal(scale=scale)
+
+    with self.test_session() as sess:
+      # get_batch_shape should return an "<unknown>" tensor.
+      self.assertEqual(halfnorm.batch_shape, tensor_shape.TensorShape(None))
+      self.assertEqual(halfnorm.event_shape, ())
+      self.assertAllEqual(halfnorm.event_shape_tensor().eval(), [])
+      self.assertAllEqual(
+          sess.run(halfnorm.batch_shape_tensor(),
+                   feed_dict={scale: [1.0, 2.0]}), [2])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/half_normal.py b/tensorflow/contrib/distributions/python/ops/half_normal.py
new file mode 100644
index 0000000000..fc0751a6e0
--- /dev/null
+++ b/tensorflow/contrib/distributions/python/ops/half_normal.py
@@ -0,0 +1,171 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The Half Normal distribution class."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops.distributions import distribution
+from tensorflow.python.ops.distributions import special_math
+
+
+__all__ = [
+    "HalfNormal",
+]
+
+
+class HalfNormal(distribution.Distribution):
+  """The Half Normal distribution with scale `scale`.
+
+  #### Mathematical details
+
+  The half normal is a transformation of a centered normal distribution.
+  If some random variable `X` has normal distribution,
+  ```none
+  X ~ Normal(0.0, scale)
+  Y = |X|
+  ```
+  Then `Y` will have half normal distribution. The probability density
+  function (pdf) is:
+
+  ```none
+  pdf(x; scale, x > 0) = sqrt(2) / (scale * sqrt(pi)) *
+    exp(- 1/2 * (x / scale) ** 2)
+  )
+  ```
+  Where `scale = sigma` is the standard deviation of the underlying normal
+  distribution.
+
+  #### Examples
+
+  Examples of initialization of one or a batch of distributions.
+
+  ```python
+  # Define a single scalar HalfNormal distribution.
+  dist = tf.contrib.distributions.HalfNormal(scale=3.0)
+
+  # Evaluate the cdf at 1, returning a scalar.
+  dist.cdf(1.)
+
+  # Define a batch of two scalar valued HalfNormals.
+  # The first has scale 11.0, the second 22.0
+  dist = tf.contrib.distributions.HalfNormal(scale=[11.0, 22.0])
+
+  # Evaluate the pdf of the first distribution on 1.0, and the second on 1.5,
+  # returning a length two tensor.
+  dist.prob([1.0, 1.5])
+
+  # Get 3 samples, returning a 3 x 2 tensor.
+  dist.sample([3])
+  ```
+
+  """
+
+  def __init__(self,
+               scale,
+               validate_args=False,
+               allow_nan_stats=True,
+               name="HalfNormal"):
+    """Construct HalfNormals with scale `scale`.
+
+    Args:
+      scale: Floating point tensor; the scales of the distribution(s).
+        Must contain only positive values.
+      validate_args: Python `bool`, default `False`. When `True` distribution
+        parameters are checked for validity despite possibly degrading runtime
+        performance. When `False` invalid inputs may silently render incorrect
+        outputs.
+      allow_nan_stats: Python `bool`, default `True`. When `True`,
+        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
+        indicate the result is undefined. When `False`, an exception is raised
+        if one or more of the statistic's batch members are undefined.
+      name: Python `str` name prefixed to Ops created by this class.
+    """
+    parameters = locals()
+    with ops.name_scope(name, values=[scale]):
+      with ops.control_dependencies([check_ops.assert_positive(scale)] if
+                                    validate_args else []):
+        self._scale = array_ops.identity(scale, name="scale")
+    super(HalfNormal, self).__init__(
+        dtype=self._scale.dtype,
+        reparameterization_type=distribution.FULLY_REPARAMETERIZED,
+        validate_args=validate_args,
+        allow_nan_stats=allow_nan_stats,
+        parameters=parameters,
+        graph_parents=[self._scale],
+        name=name)
+
+  @staticmethod
+  def _param_shapes(sample_shape):
+    return {"scale": ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)}
+
+  @property
+  def scale(self):
+    """Distribution parameter for the scale."""
+    return self._scale
+
+  def _batch_shape_tensor(self):
+    return array_ops.shape(self.scale)
+
+  def _batch_shape(self):
+    return self.scale.shape
+
+  def _event_shape_tensor(self):
+    return constant_op.constant([], dtype=dtypes.int32)
+
+  def _event_shape(self):
+    return tensor_shape.scalar()
+
+  def _sample_n(self, n, seed=None):
+    shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
+    sampled = random_ops.random_normal(
+        shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed)
+    return math_ops.abs(sampled * self.scale)
+
+  def _prob(self, x):
+    coeff = np.sqrt(2) / self.scale / np.sqrt(np.pi)
+    pdf = coeff * math_ops.exp(- 0.5 * (x / self.scale) ** 2)
+    return pdf * math_ops.cast(x >= 0, self.dtype)
+
+  def _cdf(self, x):
+    truncated_x = nn.relu(x)
+    return math_ops.erf(truncated_x / self.scale / np.sqrt(2.0))
+
+  def _entropy(self):
+    return 0.5 * math_ops.log(np.pi * self.scale ** 2.0 / 2.0) + 0.5
+
+  def _mean(self):
+    return self.scale * np.sqrt(2.0) / np.sqrt(np.pi)
+
+  def _quantile(self, p):
+    return np.sqrt(2.0) * self.scale * special_math.erfinv(p)
+
+  def _mode(self):
+    return array_ops.zeros(self.batch_shape_tensor())
+
+  def _variance(self):
+    return self.scale ** 2.0 * (1.0 - 2.0 / np.pi)
diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
index 5448918a50..0623b2c726 100644
--- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
@@ -320,13 +320,14 @@ class MixtureSameFamily(distribution.Distribution):
         return array_ops.shape(d.batch_shape_tensor())[0]
       dist_batch_ndims = _get_ndims(self)
       cat_batch_ndims = _get_ndims(self.mixture_distribution)
-      bnd = distribution_util.pick_vector(
+      pad_ndims = distribution_util.pick_vector(
           self.mixture_distribution.is_scalar_batch(),
-          [dist_batch_ndims], [cat_batch_ndims])[0]
+          [dist_batch_ndims],
+          [dist_batch_ndims - cat_batch_ndims])[0]
       s = array_ops.shape(x)
       x = array_ops.reshape(x, shape=array_ops.concat([
           s[:-1],
-          array_ops.ones([bnd], dtype=dtypes.int32),
+          array_ops.ones([pad_ndims], dtype=dtypes.int32),
           s[-1:],
           array_ops.ones([self._event_ndims], dtype=dtypes.int32),
       ], axis=0))
diff --git a/tensorflow/contrib/eager/python/examples/spinn/BUILD b/tensorflow/contrib/eager/python/examples/spinn/BUILD
index 0263d21325..a1f8a759e2 100644
--- a/tensorflow/contrib/eager/python/examples/spinn/BUILD
+++ b/tensorflow/contrib/eager/python/examples/spinn/BUILD
@@ -38,4 +38,5 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_test_lib",
     ],
+    tags = ["no_pip"],  # because spinn.py is under third_party/.
 )
diff --git a/tensorflow/contrib/framework/python/ops/variables.py b/tensorflow/contrib/framework/python/ops/variables.py
index 07b7857e7b..3f1ece4510 100644
--- a/tensorflow/contrib/framework/python/ops/variables.py
+++ b/tensorflow/contrib/framework/python/ops/variables.py
@@ -441,7 +441,7 @@ def get_unique_variable(var_op_name):
   """
   candidates = get_variables(scope=var_op_name)
   if not candidates:
-    raise ValueError('Couldnt find variable %s' % var_op_name)
+    raise ValueError('Couldn\'t find variable %s' % var_op_name)
 
   for candidate in candidates:
     if candidate.op.name == var_op_name:
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 2d42875b46..0d25a09852 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -2654,51 +2654,52 @@ def spatial_softmax(features,
     ValueError: If unexpected data_format specified.
     ValueError: If num_channels dimension is unspecified.
   """
-  shape = array_ops.shape(features)
-  static_shape = features.shape
-  if data_format == DATA_FORMAT_NHWC:
-    height, width, num_channels = shape[1], shape[2], static_shape[3]
-  elif data_format == DATA_FORMAT_NCHW:
-    num_channels, height, width = static_shape[1], shape[2], shape[3]
-  else:
-    raise ValueError('data_format has to be either NCHW or NHWC.')
-  if num_channels.value is None:
-    raise ValueError('The num_channels dimension of the inputs to '
-                     '`spatial_softmax` should be defined. Found `None`.')
-
-  with ops.name_scope(name, 'spatial_softmax', [features]) as name:
-    # Create tensors for x and y coordinate values, scaled to range [-1, 1].
-    pos_x, pos_y = array_ops.meshgrid(math_ops.lin_space(-1., 1., num=height),
-                                      math_ops.lin_space(-1., 1., num=width),
-                                      indexing='ij')
-    pos_x = array_ops.reshape(pos_x, [height * width])
-    pos_y = array_ops.reshape(pos_y, [height * width])
-    if temperature is None:
-      temperature_collections = utils.get_variable_collections(
-          variables_collections, 'temperature')
-      temperature = variables.model_variable(
-          'temperature',
-          shape=(),
-          dtype=dtypes.float32,
-          initializer=init_ops.ones_initializer(),
-          collections=temperature_collections,
-          trainable=trainable)
-    if data_format == 'NCHW':
-      features = array_ops.reshape(features, [-1, height * width])
+  with variable_scope.variable_scope(name, 'spatial_softmax'):
+    shape = array_ops.shape(features)
+    static_shape = features.shape
+    if data_format == DATA_FORMAT_NHWC:
+      height, width, num_channels = shape[1], shape[2], static_shape[3]
+    elif data_format == DATA_FORMAT_NCHW:
+      num_channels, height, width = static_shape[1], shape[2], shape[3]
     else:
-      features = array_ops.reshape(
-          array_ops.transpose(features, [0, 3, 1, 2]), [-1, height * width])
-
-    softmax_attention = nn.softmax(features/temperature)
-    expected_x = math_ops.reduce_sum(
-        pos_x * softmax_attention, [1], keep_dims=True)
-    expected_y = math_ops.reduce_sum(
-        pos_y * softmax_attention, [1], keep_dims=True)
-    expected_xy = array_ops.concat([expected_x, expected_y], 1)
-    feature_keypoints = array_ops.reshape(
-        expected_xy, [-1, num_channels.value * 2])
-    feature_keypoints.set_shape([None, num_channels.value * 2])
-    return feature_keypoints
+      raise ValueError('data_format has to be either NCHW or NHWC.')
+    if num_channels.value is None:
+      raise ValueError('The num_channels dimension of the inputs to '
+                       '`spatial_softmax` should be defined. Found `None`.')
+
+    with ops.name_scope('spatial_softmax_op', 'spatial_softmax_op', [features]):
+      # Create tensors for x and y coordinate values, scaled to range [-1, 1].
+      pos_x, pos_y = array_ops.meshgrid(math_ops.lin_space(-1., 1., num=height),
+                                        math_ops.lin_space(-1., 1., num=width),
+                                        indexing='ij')
+      pos_x = array_ops.reshape(pos_x, [height * width])
+      pos_y = array_ops.reshape(pos_y, [height * width])
+      if temperature is None:
+        temperature_collections = utils.get_variable_collections(
+            variables_collections, 'temperature')
+        temperature = variables.model_variable(
+            'temperature',
+            shape=(),
+            dtype=dtypes.float32,
+            initializer=init_ops.ones_initializer(),
+            collections=temperature_collections,
+            trainable=trainable)
+      if data_format == 'NCHW':
+        features = array_ops.reshape(features, [-1, height * width])
+      else:
+        features = array_ops.reshape(
+            array_ops.transpose(features, [0, 3, 1, 2]), [-1, height * width])
+
+      softmax_attention = nn.softmax(features/temperature)
+      expected_x = math_ops.reduce_sum(
+          pos_x * softmax_attention, [1], keep_dims=True)
+      expected_y = math_ops.reduce_sum(
+          pos_y * softmax_attention, [1], keep_dims=True)
+      expected_xy = array_ops.concat([expected_x, expected_y], 1)
+      feature_keypoints = array_ops.reshape(
+          expected_xy, [-1, num_channels.value * 2])
+      feature_keypoints.set_shape([None, num_channels.value * 2])
+  return feature_keypoints
 
 
 def stack(inputs, layer, stack_args, **kwargs):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 788d2d0b1a..05ed8b3409 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -30,7 +30,6 @@ import six
 
 from google.protobuf import message
 from tensorflow.contrib import layers
-from tensorflow.contrib import metrics as metrics_lib
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_args
 from tensorflow.contrib.framework import list_variables
@@ -60,6 +59,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import metrics as metrics_lib
 from tensorflow.python.ops import resources
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
@@ -1230,7 +1230,7 @@ class Estimator(BaseEstimator):
 
     if metric_key.MetricKey.LOSS not in model_fn_ops.eval_metric_ops:
       model_fn_ops.eval_metric_ops[metric_key.MetricKey.LOSS] = (
-          metrics_lib.streaming_mean(model_fn_ops.loss))
+          metrics_lib.mean(model_fn_ops.loss))
     return model_fn_ops
 
   def _get_predict_ops(self, features):
diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index c7464bcc9d..fc9144d5fc 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -4,7 +4,7 @@ TensorFlow Lite is TensorFlow's lightweight solution for mobile and embedded dev
 TensorFlow Lite uses many techniques for achieving low latency like optimizing the kernels for specific mobile apps, pre-fused activations, quantized kernels that allow smaller and faster (fixed-point math) models, and in the future, leverage specialized machine learning hardware to get the best possible performance for a particular model on a particular device.
 
 ![image](g3doc/TFLite-Architecture.jpg)
-# Getting Started with a Demo App
+# Getting Started with an Android Demo App
 
 This section contains an example application using TensorFlow Lite for Android devices. The demo is a sample camera app that classifies images continuously using a quantized Mobilenet model. A device running Android 5.0 ( API 21) or higher is required to run the demo.
 
@@ -17,7 +17,7 @@ There are 3 ways to get the demo app to your device
 In the demo app, inference is done using the TensorFlow Lite Java API. The demo app classifies frames in real-time, displaying the top most probable classifications. It also displays the time taken to detect the object.
 
 ## Downloading the pre-built binary
-The  fastest path to trying the demo, is to download the pre-built binary
+The fastest path to trying the demo, is to download the pre-built binary
 [TfLiteCameraDemo.apk](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk)
 
 Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera's field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified.
@@ -69,7 +69,7 @@ android_ndk_repository(
 
 Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md).
 
-### Build the  source code
+### Build the source code
 Run bazel with the following command to build the demo.
 
 Build the demo app:
@@ -86,6 +86,17 @@ environment (due to a Bazel bug).
 ### More about the demo
 The demo is resizing each camera image frame to (224 width * 224 height) to match the  quantized Mobilenet model being used. The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch 224 * 224 is the width and height of the image 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. The Mobilenet model has 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The Mobilenet quantized model is bundled within the assets directory of the app.
 
+# iOS Demo App
+
+Similar to the Android demo app, there's an iOS camera app that uses exactly the same model (224 * 224 quantized Mobilenet).
+
+This demo app requires a camera so it doesn't work with simulators. It need to be executed on a real iOS device. Follow the instructions to build and run the demo app:
+
+1.   Follow the Building section [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/ios.md#building) to build the universal iOS library for TensorFlow Lite.
+1.   Install [CocoaPods](https://cocoapods.org/) if it wasn't installed yet: `sudo gem install cocoapods`.
+1.   Run `pod install` in `tensorflow/contrib/lite/examples/ios/camera` to generate the workspace file.
+1.   Open the project by running `open tflite_camera_example.xcworkspace`, and build the app in XCode.
+
 # TensorFlow Lite Quick Start
 
 ## Step 1. Decide which GraphDef to use
diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh
index 778d618361..7fce1ba346 100755
--- a/tensorflow/contrib/lite/download_dependencies.sh
+++ b/tensorflow/contrib/lite/download_dependencies.sh
@@ -19,6 +19,13 @@ set -e
 DOWNLOADS_DIR=tensorflow/contrib/lite/downloads
 BZL_FILE_PATH=tensorflow/workspace.bzl
 
+# Ensure it is being run from repo root
+if [ ! -f $BZL_FILE_PATH ]; then
+  echo "Could not find ${BZL_FILE_PATH}":
+  echo "Likely you are not running this from the root directory of the repository.";
+  exit 1;
+fi
+
 EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
 GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
index ea398ad14e..10f31bb6f1 100644
--- a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
+++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
@@ -123,7 +123,11 @@ static void GetTopN(const uint8_t* prediction, const int prediction_size, const
   AVCaptureDevice* device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
   AVCaptureDeviceInput* deviceInput =
       [AVCaptureDeviceInput deviceInputWithDevice:device error:&error];
-  assert(error == nil);
+
+  if (error != nil) {
+    NSLog(@"Failed to initialize AVCaptureDeviceInput. Note: This app doesn't work with simulator");
+    assert(NO);
+  }
 
   if ([session canAddInput:deviceInput]) [session addInput:deviceInput];
 
diff --git a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm
index fe26ceec42..d1215fa0bf 100644
--- a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm
+++ b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm
@@ -20,6 +20,7 @@
 
 - (BOOL)application:(UIApplication *)application
     didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
+
   UITabBarController *bar = [[UITabBarController alloc] init];
   [bar setViewControllers:@[ [[RunModelViewController alloc] init] ]];
   bar.selectedIndex = 0;
diff --git a/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm
index cb19377d7e..cb0fe1a765 100644
--- a/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm
+++ b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm
@@ -31,6 +31,7 @@ std::vector<uint8_t> LoadImageFromFile(const char* file_name, int* out_width, in
   std::vector<uint8_t> file_data(bytes_in_file);
   fread(file_data.data(), 1, bytes_in_file, file_handle);
   fclose(file_handle);
+
   CFDataRef file_data_ref =
       CFDataCreateWithBytesNoCopy(NULL, file_data.data(), bytes_in_file, kCFAllocatorNull);
   CGDataProviderRef image_provider = CGDataProviderCreateWithCFData(file_data_ref);
@@ -63,6 +64,7 @@ std::vector<uint8_t> LoadImageFromFile(const char* file_name, int* out_width, in
   const int bytes_in_image = (bytes_per_row * height);
   std::vector<uint8_t> result(bytes_in_image);
   const int bits_per_component = 8;
+
   CGContextRef context =
       CGBitmapContextCreate(result.data(), width, height, bits_per_component, bytes_per_row,
                             color_space, kCGImageAlphaPremultipliedLast | kCGBitmapByteOrder32Big);
diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index 89e8693490..3d6a3ec0fd 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -24,6 +24,7 @@ py_test(
     name = "lite_test",
     srcs = ["lite_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_oss"],
     deps = [
         ":lite",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
new file mode 100755
index 0000000000..cbf10275f3
--- /dev/null
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -0,0 +1,5417 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// automatically generated by the FlatBuffers compiler, do not modify
+
+#ifndef FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
+#define FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+namespace tflite {
+
+struct QuantizationParameters;
+struct QuantizationParametersT;
+
+struct Tensor;
+struct TensorT;
+
+struct Conv2DOptions;
+struct Conv2DOptionsT;
+
+struct Pool2DOptions;
+struct Pool2DOptionsT;
+
+struct DepthwiseConv2DOptions;
+struct DepthwiseConv2DOptionsT;
+
+struct ConcatEmbeddingsOptions;
+struct ConcatEmbeddingsOptionsT;
+
+struct LSHProjectionOptions;
+struct LSHProjectionOptionsT;
+
+struct SVDFOptions;
+struct SVDFOptionsT;
+
+struct RNNOptions;
+struct RNNOptionsT;
+
+struct FullyConnectedOptions;
+struct FullyConnectedOptionsT;
+
+struct SoftmaxOptions;
+struct SoftmaxOptionsT;
+
+struct ConcatenationOptions;
+struct ConcatenationOptionsT;
+
+struct AddOptions;
+struct AddOptionsT;
+
+struct MulOptions;
+struct MulOptionsT;
+
+struct L2NormOptions;
+struct L2NormOptionsT;
+
+struct LocalResponseNormalizationOptions;
+struct LocalResponseNormalizationOptionsT;
+
+struct LSTMOptions;
+struct LSTMOptionsT;
+
+struct ResizeBilinearOptions;
+struct ResizeBilinearOptionsT;
+
+struct CallOptions;
+struct CallOptionsT;
+
+struct ReshapeOptions;
+struct ReshapeOptionsT;
+
+struct SkipGramOptions;
+struct SkipGramOptionsT;
+
+struct SpaceToDepthOptions;
+struct SpaceToDepthOptionsT;
+
+struct EmbeddingLookupSparseOptions;
+struct EmbeddingLookupSparseOptionsT;
+
+struct OperatorCode;
+struct OperatorCodeT;
+
+struct Operator;
+struct OperatorT;
+
+struct SubGraph;
+struct SubGraphT;
+
+struct Buffer;
+struct BufferT;
+
+struct Model;
+struct ModelT;
+
+enum TensorType {
+  TensorType_FLOAT32 = 0,
+  TensorType_FLOAT16 = 1,
+  TensorType_INT32 = 2,
+  TensorType_UINT8 = 3,
+  TensorType_INT64 = 4,
+  TensorType_STRING = 5,
+  TensorType_MIN = TensorType_FLOAT32,
+  TensorType_MAX = TensorType_STRING
+};
+
+inline TensorType (&EnumValuesTensorType())[6] {
+  static TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16,
+                                TensorType_INT32,   TensorType_UINT8,
+                                TensorType_INT64,   TensorType_STRING};
+  return values;
+}
+
+inline const char **EnumNamesTensorType() {
+  static const char *names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8",
+                                "INT64",   "STRING",  nullptr};
+  return names;
+}
+
+inline const char *EnumNameTensorType(TensorType e) {
+  const size_t index = static_cast<int>(e);
+  return EnumNamesTensorType()[index];
+}
+
+enum BuiltinOperator {
+  BuiltinOperator_ADD = 0,
+  BuiltinOperator_AVERAGE_POOL_2D = 1,
+  BuiltinOperator_CONCATENATION = 2,
+  BuiltinOperator_CONV_2D = 3,
+  BuiltinOperator_DEPTHWISE_CONV_2D = 4,
+  BuiltinOperator_EMBEDDING_LOOKUP = 7,
+  BuiltinOperator_FULLY_CONNECTED = 9,
+  BuiltinOperator_HASHTABLE_LOOKUP = 10,
+  BuiltinOperator_L2_NORMALIZATION = 11,
+  BuiltinOperator_L2_POOL_2D = 12,
+  BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13,
+  BuiltinOperator_LOGISTIC = 14,
+  BuiltinOperator_LSH_PROJECTION = 15,
+  BuiltinOperator_LSTM = 16,
+  BuiltinOperator_MAX_POOL_2D = 17,
+  BuiltinOperator_MUL = 18,
+  BuiltinOperator_RELU = 19,
+  BuiltinOperator_RELU1 = 20,
+  BuiltinOperator_RELU6 = 21,
+  BuiltinOperator_RESHAPE = 22,
+  BuiltinOperator_RESIZE_BILINEAR = 23,
+  BuiltinOperator_RNN = 24,
+  BuiltinOperator_SOFTMAX = 25,
+  BuiltinOperator_SPACE_TO_DEPTH = 26,
+  BuiltinOperator_SVDF = 27,
+  BuiltinOperator_TANH = 28,
+  BuiltinOperator_CONCAT_EMBEDDINGS = 29,
+  BuiltinOperator_SKIP_GRAM = 30,
+  BuiltinOperator_CALL = 31,
+  BuiltinOperator_CUSTOM = 32,
+  BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
+  BuiltinOperator_MIN = BuiltinOperator_ADD,
+  BuiltinOperator_MAX = BuiltinOperator_EMBEDDING_LOOKUP_SPARSE
+};
+
+inline BuiltinOperator (&EnumValuesBuiltinOperator())[31] {
+  static BuiltinOperator values[] = {
+      BuiltinOperator_ADD,
+      BuiltinOperator_AVERAGE_POOL_2D,
+      BuiltinOperator_CONCATENATION,
+      BuiltinOperator_CONV_2D,
+      BuiltinOperator_DEPTHWISE_CONV_2D,
+      BuiltinOperator_EMBEDDING_LOOKUP,
+      BuiltinOperator_FULLY_CONNECTED,
+      BuiltinOperator_HASHTABLE_LOOKUP,
+      BuiltinOperator_L2_NORMALIZATION,
+      BuiltinOperator_L2_POOL_2D,
+      BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
+      BuiltinOperator_LOGISTIC,
+      BuiltinOperator_LSH_PROJECTION,
+      BuiltinOperator_LSTM,
+      BuiltinOperator_MAX_POOL_2D,
+      BuiltinOperator_MUL,
+      BuiltinOperator_RELU,
+      BuiltinOperator_RELU1,
+      BuiltinOperator_RELU6,
+      BuiltinOperator_RESHAPE,
+      BuiltinOperator_RESIZE_BILINEAR,
+      BuiltinOperator_RNN,
+      BuiltinOperator_SOFTMAX,
+      BuiltinOperator_SPACE_TO_DEPTH,
+      BuiltinOperator_SVDF,
+      BuiltinOperator_TANH,
+      BuiltinOperator_CONCAT_EMBEDDINGS,
+      BuiltinOperator_SKIP_GRAM,
+      BuiltinOperator_CALL,
+      BuiltinOperator_CUSTOM,
+      BuiltinOperator_EMBEDDING_LOOKUP_SPARSE};
+  return values;
+}
+
+inline const char **EnumNamesBuiltinOperator() {
+  static const char *names[] = {"ADD",
+                                "AVERAGE_POOL_2D",
+                                "CONCATENATION",
+                                "CONV_2D",
+                                "DEPTHWISE_CONV_2D",
+                                "",
+                                "",
+                                "EMBEDDING_LOOKUP",
+                                "",
+                                "FULLY_CONNECTED",
+                                "HASHTABLE_LOOKUP",
+                                "L2_NORMALIZATION",
+                                "L2_POOL_2D",
+                                "LOCAL_RESPONSE_NORMALIZATION",
+                                "LOGISTIC",
+                                "LSH_PROJECTION",
+                                "LSTM",
+                                "MAX_POOL_2D",
+                                "MUL",
+                                "RELU",
+                                "RELU1",
+                                "RELU6",
+                                "RESHAPE",
+                                "RESIZE_BILINEAR",
+                                "RNN",
+                                "SOFTMAX",
+                                "SPACE_TO_DEPTH",
+                                "SVDF",
+                                "TANH",
+                                "CONCAT_EMBEDDINGS",
+                                "SKIP_GRAM",
+                                "CALL",
+                                "CUSTOM",
+                                "EMBEDDING_LOOKUP_SPARSE",
+                                nullptr};
+  return names;
+}
+
+inline const char *EnumNameBuiltinOperator(BuiltinOperator e) {
+  const size_t index = static_cast<int>(e);
+  return EnumNamesBuiltinOperator()[index];
+}
+
+enum BuiltinOptions {
+  BuiltinOptions_NONE = 0,
+  BuiltinOptions_Conv2DOptions = 1,
+  BuiltinOptions_DepthwiseConv2DOptions = 2,
+  BuiltinOptions_ConcatEmbeddingsOptions = 3,
+  BuiltinOptions_LSHProjectionOptions = 4,
+  BuiltinOptions_Pool2DOptions = 5,
+  BuiltinOptions_SVDFOptions = 6,
+  BuiltinOptions_RNNOptions = 7,
+  BuiltinOptions_FullyConnectedOptions = 8,
+  BuiltinOptions_SoftmaxOptions = 9,
+  BuiltinOptions_ConcatenationOptions = 10,
+  BuiltinOptions_AddOptions = 11,
+  BuiltinOptions_L2NormOptions = 12,
+  BuiltinOptions_LocalResponseNormalizationOptions = 13,
+  BuiltinOptions_LSTMOptions = 14,
+  BuiltinOptions_ResizeBilinearOptions = 15,
+  BuiltinOptions_CallOptions = 16,
+  BuiltinOptions_ReshapeOptions = 17,
+  BuiltinOptions_SkipGramOptions = 18,
+  BuiltinOptions_SpaceToDepthOptions = 19,
+  BuiltinOptions_EmbeddingLookupSparseOptions = 20,
+  BuiltinOptions_MulOptions = 21,
+  BuiltinOptions_MIN = BuiltinOptions_NONE,
+  BuiltinOptions_MAX = BuiltinOptions_MulOptions
+};
+
+inline BuiltinOptions (&EnumValuesBuiltinOptions())[22] {
+  static BuiltinOptions values[] = {
+      BuiltinOptions_NONE,
+      BuiltinOptions_Conv2DOptions,
+      BuiltinOptions_DepthwiseConv2DOptions,
+      BuiltinOptions_ConcatEmbeddingsOptions,
+      BuiltinOptions_LSHProjectionOptions,
+      BuiltinOptions_Pool2DOptions,
+      BuiltinOptions_SVDFOptions,
+      BuiltinOptions_RNNOptions,
+      BuiltinOptions_FullyConnectedOptions,
+      BuiltinOptions_SoftmaxOptions,
+      BuiltinOptions_ConcatenationOptions,
+      BuiltinOptions_AddOptions,
+      BuiltinOptions_L2NormOptions,
+      BuiltinOptions_LocalResponseNormalizationOptions,
+      BuiltinOptions_LSTMOptions,
+      BuiltinOptions_ResizeBilinearOptions,
+      BuiltinOptions_CallOptions,
+      BuiltinOptions_ReshapeOptions,
+      BuiltinOptions_SkipGramOptions,
+      BuiltinOptions_SpaceToDepthOptions,
+      BuiltinOptions_EmbeddingLookupSparseOptions,
+      BuiltinOptions_MulOptions};
+  return values;
+}
+
+inline const char **EnumNamesBuiltinOptions() {
+  static const char *names[] = {"NONE",
+                                "Conv2DOptions",
+                                "DepthwiseConv2DOptions",
+                                "ConcatEmbeddingsOptions",
+                                "LSHProjectionOptions",
+                                "Pool2DOptions",
+                                "SVDFOptions",
+                                "RNNOptions",
+                                "FullyConnectedOptions",
+                                "SoftmaxOptions",
+                                "ConcatenationOptions",
+                                "AddOptions",
+                                "L2NormOptions",
+                                "LocalResponseNormalizationOptions",
+                                "LSTMOptions",
+                                "ResizeBilinearOptions",
+                                "CallOptions",
+                                "ReshapeOptions",
+                                "SkipGramOptions",
+                                "SpaceToDepthOptions",
+                                "EmbeddingLookupSparseOptions",
+                                "MulOptions",
+                                nullptr};
+  return names;
+}
+
+inline const char *EnumNameBuiltinOptions(BuiltinOptions e) {
+  const size_t index = static_cast<int>(e);
+  return EnumNamesBuiltinOptions()[index];
+}
+
+template <typename T>
+struct BuiltinOptionsTraits {
+  static const BuiltinOptions enum_value = BuiltinOptions_NONE;
+};
+
+template <>
+struct BuiltinOptionsTraits<Conv2DOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<DepthwiseConv2DOptions> {
+  static const BuiltinOptions enum_value =
+      BuiltinOptions_DepthwiseConv2DOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<ConcatEmbeddingsOptions> {
+  static const BuiltinOptions enum_value =
+      BuiltinOptions_ConcatEmbeddingsOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<LSHProjectionOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<Pool2DOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<SVDFOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<RNNOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<FullyConnectedOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<SoftmaxOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<ConcatenationOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<AddOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<L2NormOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<LocalResponseNormalizationOptions> {
+  static const BuiltinOptions enum_value =
+      BuiltinOptions_LocalResponseNormalizationOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<LSTMOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<ResizeBilinearOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<CallOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<ReshapeOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<SkipGramOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<SpaceToDepthOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions> {
+  static const BuiltinOptions enum_value =
+      BuiltinOptions_EmbeddingLookupSparseOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<MulOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
+};
+
+struct BuiltinOptionsUnion {
+  BuiltinOptions type;
+  void *value;
+
+  BuiltinOptionsUnion() : type(BuiltinOptions_NONE), value(nullptr) {}
+  BuiltinOptionsUnion(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT
+      : type(BuiltinOptions_NONE),
+        value(nullptr) {
+    std::swap(type, u.type);
+    std::swap(value, u.value);
+  }
+  BuiltinOptionsUnion(const BuiltinOptionsUnion &) FLATBUFFERS_NOEXCEPT;
+  BuiltinOptionsUnion &operator=(const BuiltinOptionsUnion &u)
+      FLATBUFFERS_NOEXCEPT {
+    BuiltinOptionsUnion t(u);
+    std::swap(type, t.type);
+    std::swap(value, t.value);
+    return *this;
+  }
+  BuiltinOptionsUnion &operator=(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT {
+    std::swap(type, u.type);
+    std::swap(value, u.value);
+    return *this;
+  }
+  ~BuiltinOptionsUnion() { Reset(); }
+
+  void Reset();
+
+#ifndef FLATBUFFERS_CPP98_STL
+  template <typename T>
+  void Set(T &&val) {
+    Reset();
+    type = BuiltinOptionsTraits<typename T::TableType>::enum_value;
+    if (type != BuiltinOptions_NONE) {
+      value = new T(std::forward<T>(val));
+    }
+  }
+#endif  // FLATBUFFERS_CPP98_STL
+
+  static void *UnPack(const void *obj, BuiltinOptions type,
+                      const flatbuffers::resolver_function_t *resolver);
+  flatbuffers::Offset<void> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
+
+  Conv2DOptionsT *AsConv2DOptions() {
+    return type == BuiltinOptions_Conv2DOptions
+               ? reinterpret_cast<Conv2DOptionsT *>(value)
+               : nullptr;
+  }
+  const Conv2DOptionsT *AsConv2DOptions() const {
+    return type == BuiltinOptions_Conv2DOptions
+               ? reinterpret_cast<const Conv2DOptionsT *>(value)
+               : nullptr;
+  }
+  DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() {
+    return type == BuiltinOptions_DepthwiseConv2DOptions
+               ? reinterpret_cast<DepthwiseConv2DOptionsT *>(value)
+               : nullptr;
+  }
+  const DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() const {
+    return type == BuiltinOptions_DepthwiseConv2DOptions
+               ? reinterpret_cast<const DepthwiseConv2DOptionsT *>(value)
+               : nullptr;
+  }
+  ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() {
+    return type == BuiltinOptions_ConcatEmbeddingsOptions
+               ? reinterpret_cast<ConcatEmbeddingsOptionsT *>(value)
+               : nullptr;
+  }
+  const ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() const {
+    return type == BuiltinOptions_ConcatEmbeddingsOptions
+               ? reinterpret_cast<const ConcatEmbeddingsOptionsT *>(value)
+               : nullptr;
+  }
+  LSHProjectionOptionsT *AsLSHProjectionOptions() {
+    return type == BuiltinOptions_LSHProjectionOptions
+               ? reinterpret_cast<LSHProjectionOptionsT *>(value)
+               : nullptr;
+  }
+  const LSHProjectionOptionsT *AsLSHProjectionOptions() const {
+    return type == BuiltinOptions_LSHProjectionOptions
+               ? reinterpret_cast<const LSHProjectionOptionsT *>(value)
+               : nullptr;
+  }
+  Pool2DOptionsT *AsPool2DOptions() {
+    return type == BuiltinOptions_Pool2DOptions
+               ? reinterpret_cast<Pool2DOptionsT *>(value)
+               : nullptr;
+  }
+  const Pool2DOptionsT *AsPool2DOptions() const {
+    return type == BuiltinOptions_Pool2DOptions
+               ? reinterpret_cast<const Pool2DOptionsT *>(value)
+               : nullptr;
+  }
+  SVDFOptionsT *AsSVDFOptions() {
+    return type == BuiltinOptions_SVDFOptions
+               ? reinterpret_cast<SVDFOptionsT *>(value)
+               : nullptr;
+  }
+  const SVDFOptionsT *AsSVDFOptions() const {
+    return type == BuiltinOptions_SVDFOptions
+               ? reinterpret_cast<const SVDFOptionsT *>(value)
+               : nullptr;
+  }
+  RNNOptionsT *AsRNNOptions() {
+    return type == BuiltinOptions_RNNOptions
+               ? reinterpret_cast<RNNOptionsT *>(value)
+               : nullptr;
+  }
+  const RNNOptionsT *AsRNNOptions() const {
+    return type == BuiltinOptions_RNNOptions
+               ? reinterpret_cast<const RNNOptionsT *>(value)
+               : nullptr;
+  }
+  FullyConnectedOptionsT *AsFullyConnectedOptions() {
+    return type == BuiltinOptions_FullyConnectedOptions
+               ? reinterpret_cast<FullyConnectedOptionsT *>(value)
+               : nullptr;
+  }
+  const FullyConnectedOptionsT *AsFullyConnectedOptions() const {
+    return type == BuiltinOptions_FullyConnectedOptions
+               ? reinterpret_cast<const FullyConnectedOptionsT *>(value)
+               : nullptr;
+  }
+  SoftmaxOptionsT *AsSoftmaxOptions() {
+    return type == BuiltinOptions_SoftmaxOptions
+               ? reinterpret_cast<SoftmaxOptionsT *>(value)
+               : nullptr;
+  }
+  const SoftmaxOptionsT *AsSoftmaxOptions() const {
+    return type == BuiltinOptions_SoftmaxOptions
+               ? reinterpret_cast<const SoftmaxOptionsT *>(value)
+               : nullptr;
+  }
+  ConcatenationOptionsT *AsConcatenationOptions() {
+    return type == BuiltinOptions_ConcatenationOptions
+               ? reinterpret_cast<ConcatenationOptionsT *>(value)
+               : nullptr;
+  }
+  const ConcatenationOptionsT *AsConcatenationOptions() const {
+    return type == BuiltinOptions_ConcatenationOptions
+               ? reinterpret_cast<const ConcatenationOptionsT *>(value)
+               : nullptr;
+  }
+  AddOptionsT *AsAddOptions() {
+    return type == BuiltinOptions_AddOptions
+               ? reinterpret_cast<AddOptionsT *>(value)
+               : nullptr;
+  }
+  const AddOptionsT *AsAddOptions() const {
+    return type == BuiltinOptions_AddOptions
+               ? reinterpret_cast<const AddOptionsT *>(value)
+               : nullptr;
+  }
+  L2NormOptionsT *AsL2NormOptions() {
+    return type == BuiltinOptions_L2NormOptions
+               ? reinterpret_cast<L2NormOptionsT *>(value)
+               : nullptr;
+  }
+  const L2NormOptionsT *AsL2NormOptions() const {
+    return type == BuiltinOptions_L2NormOptions
+               ? reinterpret_cast<const L2NormOptionsT *>(value)
+               : nullptr;
+  }
+  LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() {
+    return type == BuiltinOptions_LocalResponseNormalizationOptions
+               ? reinterpret_cast<LocalResponseNormalizationOptionsT *>(value)
+               : nullptr;
+  }
+  const LocalResponseNormalizationOptionsT *
+  AsLocalResponseNormalizationOptions() const {
+    return type == BuiltinOptions_LocalResponseNormalizationOptions
+               ? reinterpret_cast<const LocalResponseNormalizationOptionsT *>(
+                     value)
+               : nullptr;
+  }
+  LSTMOptionsT *AsLSTMOptions() {
+    return type == BuiltinOptions_LSTMOptions
+               ? reinterpret_cast<LSTMOptionsT *>(value)
+               : nullptr;
+  }
+  const LSTMOptionsT *AsLSTMOptions() const {
+    return type == BuiltinOptions_LSTMOptions
+               ? reinterpret_cast<const LSTMOptionsT *>(value)
+               : nullptr;
+  }
+  ResizeBilinearOptionsT *AsResizeBilinearOptions() {
+    return type == BuiltinOptions_ResizeBilinearOptions
+               ? reinterpret_cast<ResizeBilinearOptionsT *>(value)
+               : nullptr;
+  }
+  const ResizeBilinearOptionsT *AsResizeBilinearOptions() const {
+    return type == BuiltinOptions_ResizeBilinearOptions
+               ? reinterpret_cast<const ResizeBilinearOptionsT *>(value)
+               : nullptr;
+  }
+  CallOptionsT *AsCallOptions() {
+    return type == BuiltinOptions_CallOptions
+               ? reinterpret_cast<CallOptionsT *>(value)
+               : nullptr;
+  }
+  const CallOptionsT *AsCallOptions() const {
+    return type == BuiltinOptions_CallOptions
+               ? reinterpret_cast<const CallOptionsT *>(value)
+               : nullptr;
+  }
+  ReshapeOptionsT *AsReshapeOptions() {
+    return type == BuiltinOptions_ReshapeOptions
+               ? reinterpret_cast<ReshapeOptionsT *>(value)
+               : nullptr;
+  }
+  const ReshapeOptionsT *AsReshapeOptions() const {
+    return type == BuiltinOptions_ReshapeOptions
+               ? reinterpret_cast<const ReshapeOptionsT *>(value)
+               : nullptr;
+  }
+  SkipGramOptionsT *AsSkipGramOptions() {
+    return type == BuiltinOptions_SkipGramOptions
+               ? reinterpret_cast<SkipGramOptionsT *>(value)
+               : nullptr;
+  }
+  const SkipGramOptionsT *AsSkipGramOptions() const {
+    return type == BuiltinOptions_SkipGramOptions
+               ? reinterpret_cast<const SkipGramOptionsT *>(value)
+               : nullptr;
+  }
+  SpaceToDepthOptionsT *AsSpaceToDepthOptions() {
+    return type == BuiltinOptions_SpaceToDepthOptions
+               ? reinterpret_cast<SpaceToDepthOptionsT *>(value)
+               : nullptr;
+  }
+  const SpaceToDepthOptionsT *AsSpaceToDepthOptions() const {
+    return type == BuiltinOptions_SpaceToDepthOptions
+               ? reinterpret_cast<const SpaceToDepthOptionsT *>(value)
+               : nullptr;
+  }
+  EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() {
+    return type == BuiltinOptions_EmbeddingLookupSparseOptions
+               ? reinterpret_cast<EmbeddingLookupSparseOptionsT *>(value)
+               : nullptr;
+  }
+  const EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() const {
+    return type == BuiltinOptions_EmbeddingLookupSparseOptions
+               ? reinterpret_cast<const EmbeddingLookupSparseOptionsT *>(value)
+               : nullptr;
+  }
+  MulOptionsT *AsMulOptions() {
+    return type == BuiltinOptions_MulOptions
+               ? reinterpret_cast<MulOptionsT *>(value)
+               : nullptr;
+  }
+  const MulOptionsT *AsMulOptions() const {
+    return type == BuiltinOptions_MulOptions
+               ? reinterpret_cast<const MulOptionsT *>(value)
+               : nullptr;
+  }
+};
+
+bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj,
+                          BuiltinOptions type);
+bool VerifyBuiltinOptionsVector(
+    flatbuffers::Verifier &verifier,
+    const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+    const flatbuffers::Vector<uint8_t> *types);
+
+enum Padding {
+  Padding_SAME = 0,
+  Padding_VALID = 1,
+  Padding_MIN = Padding_SAME,
+  Padding_MAX = Padding_VALID
+};
+
+inline Padding (&EnumValuesPadding())[2] {
+  static Padding values[] = {Padding_SAME, Padding_VALID};
+  return values;
+}
+
+inline const char **EnumNamesPadding() {
+  static const char *names[] = {"SAME", "VALID", nullptr};
+  return names;
+}
+
+inline const char *EnumNamePadding(Padding e) {
+  const size_t index = static_cast<int>(e);
+  return EnumNamesPadding()[index];
+}
+
+enum ActivationFunctionType {
+  ActivationFunctionType_NONE = 0,
+  ActivationFunctionType_RELU = 1,
+  ActivationFunctionType_RELU1 = 2,
+  ActivationFunctionType_RELU6 = 3,
+  ActivationFunctionType_TANH = 4,
+  ActivationFunctionType_SIGN_BIT = 5,
+  ActivationFunctionType_MIN = ActivationFunctionType_NONE,
+  ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT
+};
+
+inline ActivationFunctionType (&EnumValuesActivationFunctionType())[6] {
+  static ActivationFunctionType values[] = {
+      ActivationFunctionType_NONE,  ActivationFunctionType_RELU,
+      ActivationFunctionType_RELU1, ActivationFunctionType_RELU6,
+      ActivationFunctionType_TANH,  ActivationFunctionType_SIGN_BIT};
+  return values;
+}
+
+inline const char **EnumNamesActivationFunctionType() {
+  static const char *names[] = {"NONE", "RELU",     "RELU1", "RELU6",
+                                "TANH", "SIGN_BIT", nullptr};
+  return names;
+}
+
+inline const char *EnumNameActivationFunctionType(ActivationFunctionType e) {
+  const size_t index = static_cast<int>(e);
+  return EnumNamesActivationFunctionType()[index];
+}
+
+enum LSHProjectionType {
+  LSHProjectionType_UNKNOWN = 0,
+  LSHProjectionType_SPARSE = 1,
+  LSHProjectionType_DENSE = 2,
+  LSHProjectionType_MIN = LSHProjectionType_UNKNOWN,
+  LSHProjectionType_MAX = LSHProjectionType_DENSE
+};
+
+inline LSHProjectionType (&EnumValuesLSHProjectionType())[3] {
+  static LSHProjectionType values[] = {LSHProjectionType_UNKNOWN,
+                                       LSHProjectionType_SPARSE,
+                                       LSHProjectionType_DENSE};
+  return values;
+}
+
+inline const char **EnumNamesLSHProjectionType() {
+  static const char *names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
+  return names;
+}
+
+inline const char *EnumNameLSHProjectionType(LSHProjectionType e) {
+  const size_t index = static_cast<int>(e);
+  return EnumNamesLSHProjectionType()[index];
+}
+
+enum CombinerType {
+  CombinerType_SUM = 0,
+  CombinerType_MEAN = 1,
+  CombinerType_SQRTN = 2,
+  CombinerType_MIN = CombinerType_SUM,
+  CombinerType_MAX = CombinerType_SQRTN
+};
+
+inline CombinerType (&EnumValuesCombinerType())[3] {
+  static CombinerType values[] = {CombinerType_SUM, CombinerType_MEAN,
+                                  CombinerType_SQRTN};
+  return values;
+}
+
+inline const char **EnumNamesCombinerType() {
+  static const char *names[] = {"SUM", "MEAN", "SQRTN", nullptr};
+  return names;
+}
+
+inline const char *EnumNameCombinerType(CombinerType e) {
+  const size_t index = static_cast<int>(e);
+  return EnumNamesCombinerType()[index];
+}
+
+enum CustomOptionsFormat {
+  CustomOptionsFormat_FLEXBUFFERS = 0,
+  CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
+  CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS
+};
+
+inline CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] {
+  static CustomOptionsFormat values[] = {CustomOptionsFormat_FLEXBUFFERS};
+  return values;
+}
+
+inline const char **EnumNamesCustomOptionsFormat() {
+  static const char *names[] = {"FLEXBUFFERS", nullptr};
+  return names;
+}
+
+inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) {
+  const size_t index = static_cast<int>(e);
+  return EnumNamesCustomOptionsFormat()[index];
+}
+
+struct QuantizationParametersT : public flatbuffers::NativeTable {
+  typedef QuantizationParameters TableType;
+  std::vector<float> min;
+  std::vector<float> max;
+  std::vector<float> scale;
+  std::vector<int64_t> zero_point;
+  QuantizationParametersT() {}
+};
+
+struct QuantizationParameters FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef QuantizationParametersT NativeTableType;
+  enum { VT_MIN = 4, VT_MAX = 6, VT_SCALE = 8, VT_ZERO_POINT = 10 };
+  const flatbuffers::Vector<float> *min() const {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN);
+  }
+  const flatbuffers::Vector<float> *max() const {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_MAX);
+  }
+  const flatbuffers::Vector<float> *scale() const {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_SCALE);
+  }
+  const flatbuffers::Vector<int64_t> *zero_point() const {
+    return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MIN) &&
+           verifier.Verify(min()) && VerifyOffset(verifier, VT_MAX) &&
+           verifier.Verify(max()) && VerifyOffset(verifier, VT_SCALE) &&
+           verifier.Verify(scale()) && VerifyOffset(verifier, VT_ZERO_POINT) &&
+           verifier.Verify(zero_point()) && verifier.EndTable();
+  }
+  QuantizationParametersT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      QuantizationParametersT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<QuantizationParameters> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct QuantizationParametersBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min) {
+    fbb_.AddOffset(QuantizationParameters::VT_MIN, min);
+  }
+  void add_max(flatbuffers::Offset<flatbuffers::Vector<float>> max) {
+    fbb_.AddOffset(QuantizationParameters::VT_MAX, max);
+  }
+  void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale) {
+    fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale);
+  }
+  void add_zero_point(
+      flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point) {
+    fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
+  }
+  explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  QuantizationParametersBuilder &operator=(
+      const QuantizationParametersBuilder &);
+  flatbuffers::Offset<QuantizationParameters> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<QuantizationParameters>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+    flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+    flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0) {
+  QuantizationParametersBuilder builder_(_fbb);
+  builder_.add_zero_point(zero_point);
+  builder_.add_scale(scale);
+  builder_.add_max(max);
+  builder_.add_min(min);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<QuantizationParameters>
+CreateQuantizationParametersDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<float> *min = nullptr,
+    const std::vector<float> *max = nullptr,
+    const std::vector<float> *scale = nullptr,
+    const std::vector<int64_t> *zero_point = nullptr) {
+  return tflite::CreateQuantizationParameters(
+      _fbb, min ? _fbb.CreateVector<float>(*min) : 0,
+      max ? _fbb.CreateVector<float>(*max) : 0,
+      scale ? _fbb.CreateVector<float>(*scale) : 0,
+      zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0);
+}
+
+flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+    flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TensorT : public flatbuffers::NativeTable {
+  typedef Tensor TableType;
+  std::vector<int32_t> shape;
+  TensorType type;
+  uint32_t buffer;
+  std::string name;
+  std::unique_ptr<QuantizationParametersT> quantization;
+  TensorT() : type(TensorType_FLOAT32), buffer(0) {}
+};
+
+struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef TensorT NativeTableType;
+  enum {
+    VT_SHAPE = 4,
+    VT_TYPE = 6,
+    VT_BUFFER = 8,
+    VT_NAME = 10,
+    VT_QUANTIZATION = 12
+  };
+  const flatbuffers::Vector<int32_t> *shape() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
+  }
+  TensorType type() const {
+    return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0));
+  }
+  uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
+  const flatbuffers::String *name() const {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  const QuantizationParameters *quantization() const {
+    return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) &&
+           verifier.Verify(shape()) && VerifyField<int8_t>(verifier, VT_TYPE) &&
+           VerifyField<uint32_t>(verifier, VT_BUFFER) &&
+           VerifyOffset(verifier, VT_NAME) && verifier.Verify(name()) &&
+           VerifyOffset(verifier, VT_QUANTIZATION) &&
+           verifier.VerifyTable(quantization()) && verifier.EndTable();
+  }
+  TensorT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver =
+                                 nullptr) const;
+  static flatbuffers::Offset<Tensor> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TensorBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape) {
+    fbb_.AddOffset(Tensor::VT_SHAPE, shape);
+  }
+  void add_type(TensorType type) {
+    fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
+  }
+  void add_buffer(uint32_t buffer) {
+    fbb_.AddElement<uint32_t>(Tensor::VT_BUFFER, buffer, 0);
+  }
+  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
+    fbb_.AddOffset(Tensor::VT_NAME, name);
+  }
+  void add_quantization(
+      flatbuffers::Offset<QuantizationParameters> quantization) {
+    fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
+  }
+  explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  TensorBuilder &operator=(const TensorBuilder &);
+  flatbuffers::Offset<Tensor> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Tensor>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Tensor> CreateTensor(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+    TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
+    flatbuffers::Offset<flatbuffers::String> name = 0,
+    flatbuffers::Offset<QuantizationParameters> quantization = 0) {
+  TensorBuilder builder_(_fbb);
+  builder_.add_quantization(quantization);
+  builder_.add_name(name);
+  builder_.add_buffer(buffer);
+  builder_.add_shape(shape);
+  builder_.add_type(type);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Tensor> CreateTensorDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *shape = nullptr,
+    TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
+    const char *name = nullptr,
+    flatbuffers::Offset<QuantizationParameters> quantization = 0) {
+  return tflite::CreateTensor(
+      _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
+      name ? _fbb.CreateString(name) : 0, quantization);
+}
+
+flatbuffers::Offset<Tensor> CreateTensor(
+    flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Conv2DOptionsT : public flatbuffers::NativeTable {
+  typedef Conv2DOptions TableType;
+  Padding padding;
+  int32_t stride_w;
+  int32_t stride_h;
+  ActivationFunctionType fused_activation_function;
+  Conv2DOptionsT()
+      : padding(Padding_SAME),
+        stride_w(0),
+        stride_h(0),
+        fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef Conv2DOptionsT NativeTableType;
+  enum {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_FUSED_ACTIVATION_FUNCTION = 10
+  };
+  Padding padding() const {
+    return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  Conv2DOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      Conv2DOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Conv2DOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Conv2DOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(Padding padding) {
+    fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING,
+                            static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w) {
+    fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h) {
+    fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
+  flatbuffers::Offset<Conv2DOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Conv2DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+    int32_t stride_w = 0, int32_t stride_h = 0,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  Conv2DOptionsBuilder builder_(_fbb);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Pool2DOptionsT : public flatbuffers::NativeTable {
+  typedef Pool2DOptions TableType;
+  Padding padding;
+  int32_t stride_w;
+  int32_t stride_h;
+  int32_t filter_width;
+  int32_t filter_height;
+  ActivationFunctionType fused_activation_function;
+  Pool2DOptionsT()
+      : padding(Padding_SAME),
+        stride_w(0),
+        stride_h(0),
+        filter_width(0),
+        filter_height(0),
+        fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef Pool2DOptionsT NativeTableType;
+  enum {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_FILTER_WIDTH = 10,
+    VT_FILTER_HEIGHT = 12,
+    VT_FUSED_ACTIVATION_FUNCTION = 14
+  };
+  Padding padding() const {
+    return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
+  int32_t filter_height() const {
+    return GetField<int32_t>(VT_FILTER_HEIGHT, 0);
+  }
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) &&
+           VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  Pool2DOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      Pool2DOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Pool2DOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Pool2DOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(Padding padding) {
+    fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING,
+                            static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w) {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h) {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_filter_width(int32_t filter_width) {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
+  }
+  void add_filter_height(int32_t filter_height) {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
+  }
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
+  flatbuffers::Offset<Pool2DOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Pool2DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+    int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0,
+    int32_t filter_height = 0,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  Pool2DOptionsBuilder builder_(_fbb);
+  builder_.add_filter_height(filter_height);
+  builder_.add_filter_width(filter_width);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable {
+  typedef DepthwiseConv2DOptions TableType;
+  Padding padding;
+  int32_t stride_w;
+  int32_t stride_h;
+  int32_t depth_multiplier;
+  ActivationFunctionType fused_activation_function;
+  DepthwiseConv2DOptionsT()
+      : padding(Padding_SAME),
+        stride_w(0),
+        stride_h(0),
+        depth_multiplier(0),
+        fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef DepthwiseConv2DOptionsT NativeTableType;
+  enum {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_DEPTH_MULTIPLIER = 10,
+    VT_FUSED_ACTIVATION_FUNCTION = 12
+  };
+  Padding padding() const {
+    return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  int32_t depth_multiplier() const {
+    return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0);
+  }
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  DepthwiseConv2DOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      DepthwiseConv2DOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DepthwiseConv2DOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DepthwiseConv2DOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(Padding padding) {
+    fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING,
+                            static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w) {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h) {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_depth_multiplier(int32_t depth_multiplier) {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER,
+                             depth_multiplier, 0);
+  }
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(
+        DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+        static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  DepthwiseConv2DOptionsBuilder &operator=(
+      const DepthwiseConv2DOptionsBuilder &);
+  flatbuffers::Offset<DepthwiseConv2DOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+    int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  DepthwiseConv2DOptionsBuilder builder_(_fbb);
+  builder_.add_depth_multiplier(depth_multiplier);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ConcatEmbeddingsOptionsT : public flatbuffers::NativeTable {
+  typedef ConcatEmbeddingsOptions TableType;
+  int32_t num_channels;
+  std::vector<int32_t> num_columns_per_channel;
+  std::vector<int32_t> embedding_dim_per_channel;
+  ConcatEmbeddingsOptionsT() : num_channels(0) {}
+};
+
+struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef ConcatEmbeddingsOptionsT NativeTableType;
+  enum {
+    VT_NUM_CHANNELS = 4,
+    VT_NUM_COLUMNS_PER_CHANNEL = 6,
+    VT_EMBEDDING_DIM_PER_CHANNEL = 8
+  };
+  int32_t num_channels() const { return GetField<int32_t>(VT_NUM_CHANNELS, 0); }
+  const flatbuffers::Vector<int32_t> *num_columns_per_channel() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(
+        VT_NUM_COLUMNS_PER_CHANNEL);
+  }
+  const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(
+        VT_EMBEDDING_DIM_PER_CHANNEL);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_NUM_CHANNELS) &&
+           VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) &&
+           verifier.Verify(num_columns_per_channel()) &&
+           VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) &&
+           verifier.Verify(embedding_dim_per_channel()) && verifier.EndTable();
+  }
+  ConcatEmbeddingsOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      ConcatEmbeddingsOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ConcatEmbeddingsOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ConcatEmbeddingsOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_num_channels(int32_t num_channels) {
+    fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS,
+                             num_channels, 0);
+  }
+  void add_num_columns_per_channel(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>>
+          num_columns_per_channel) {
+    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL,
+                   num_columns_per_channel);
+  }
+  void add_embedding_dim_per_channel(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>>
+          embedding_dim_per_channel) {
+    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
+                   embedding_dim_per_channel);
+  }
+  explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ConcatEmbeddingsOptionsBuilder &operator=(
+      const ConcatEmbeddingsOptionsBuilder &);
+  flatbuffers::Offset<ConcatEmbeddingsOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                              int32_t num_channels = 0,
+                              flatbuffers::Offset<flatbuffers::Vector<int32_t>>
+                                  num_columns_per_channel = 0,
+                              flatbuffers::Offset<flatbuffers::Vector<int32_t>>
+                                  embedding_dim_per_channel = 0) {
+  ConcatEmbeddingsOptionsBuilder builder_(_fbb);
+  builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
+  builder_.add_num_columns_per_channel(num_columns_per_channel);
+  builder_.add_num_channels(num_channels);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+    const std::vector<int32_t> *num_columns_per_channel = nullptr,
+    const std::vector<int32_t> *embedding_dim_per_channel = nullptr) {
+  return tflite::CreateConcatEmbeddingsOptions(
+      _fbb, num_channels,
+      num_columns_per_channel
+          ? _fbb.CreateVector<int32_t>(*num_columns_per_channel)
+          : 0,
+      embedding_dim_per_channel
+          ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel)
+          : 0);
+}
+
+flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LSHProjectionOptionsT : public flatbuffers::NativeTable {
+  typedef LSHProjectionOptions TableType;
+  LSHProjectionType type;
+  LSHProjectionOptionsT() : type(LSHProjectionType_UNKNOWN) {}
+};
+
+struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef LSHProjectionOptionsT NativeTableType;
+  enum { VT_TYPE = 4 };
+  LSHProjectionType type() const {
+    return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_TYPE) && verifier.EndTable();
+  }
+  LSHProjectionOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      LSHProjectionOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LSHProjectionOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LSHProjectionOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_type(LSHProjectionType type) {
+    fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE,
+                            static_cast<int8_t>(type), 0);
+  }
+  explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
+  flatbuffers::Offset<LSHProjectionOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LSHProjectionOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    LSHProjectionType type = LSHProjectionType_UNKNOWN) {
+  LSHProjectionOptionsBuilder builder_(_fbb);
+  builder_.add_type(type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SVDFOptionsT : public flatbuffers::NativeTable {
+  typedef SVDFOptions TableType;
+  int32_t rank;
+  ActivationFunctionType fused_activation_function;
+  SVDFOptionsT()
+      : rank(0), fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SVDFOptionsT NativeTableType;
+  enum { VT_RANK = 4, VT_FUSED_ACTIVATION_FUNCTION = 6 };
+  int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_RANK) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  SVDFOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SVDFOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SVDFOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SVDFOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_rank(int32_t rank) {
+    fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0);
+  }
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
+  flatbuffers::Offset<SVDFOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SVDFOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  SVDFOptionsBuilder builder_(_fbb);
+  builder_.add_rank(rank);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct RNNOptionsT : public flatbuffers::NativeTable {
+  typedef RNNOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  RNNOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef RNNOptionsT NativeTableType;
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  RNNOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      RNNOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RNNOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RNNOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
+  flatbuffers::Offset<RNNOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RNNOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  RNNOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RNNOptions> CreateRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FullyConnectedOptionsT : public flatbuffers::NativeTable {
+  typedef FullyConnectedOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  FullyConnectedOptionsT()
+      : fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef FullyConnectedOptionsT NativeTableType;
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  FullyConnectedOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      FullyConnectedOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FullyConnectedOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FullyConnectedOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
+  flatbuffers::Offset<FullyConnectedOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FullyConnectedOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  FullyConnectedOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SoftmaxOptionsT : public flatbuffers::NativeTable {
+  typedef SoftmaxOptions TableType;
+  float beta;
+  SoftmaxOptionsT() : beta(0.0f) {}
+};
+
+struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SoftmaxOptionsT NativeTableType;
+  enum { VT_BETA = 4 };
+  float beta() const { return GetField<float>(VT_BETA, 0.0f); }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<float>(verifier, VT_BETA) && verifier.EndTable();
+  }
+  SoftmaxOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SoftmaxOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SoftmaxOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SoftmaxOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_beta(float beta) {
+    fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f);
+  }
+  explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
+  flatbuffers::Offset<SoftmaxOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SoftmaxOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f) {
+  SoftmaxOptionsBuilder builder_(_fbb);
+  builder_.add_beta(beta);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ConcatenationOptionsT : public flatbuffers::NativeTable {
+  typedef ConcatenationOptions TableType;
+  int32_t axis;
+  ActivationFunctionType fused_activation_function;
+  ConcatenationOptionsT()
+      : axis(0), fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef ConcatenationOptionsT NativeTableType;
+  enum { VT_AXIS = 4, VT_FUSED_ACTIVATION_FUNCTION = 6 };
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_AXIS) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  ConcatenationOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      ConcatenationOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ConcatenationOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ConcatenationOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_axis(int32_t axis) {
+    fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0);
+  }
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
+  flatbuffers::Offset<ConcatenationOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ConcatenationOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  ConcatenationOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct AddOptionsT : public flatbuffers::NativeTable {
+  typedef AddOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  AddOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef AddOptionsT NativeTableType;
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  AddOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      AddOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<AddOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct AddOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  AddOptionsBuilder &operator=(const AddOptionsBuilder &);
+  flatbuffers::Offset<AddOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AddOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AddOptions> CreateAddOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  AddOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<AddOptions> CreateAddOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MulOptionsT : public flatbuffers::NativeTable {
+  typedef MulOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  MulOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef MulOptionsT NativeTableType;
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  MulOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      MulOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MulOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MulOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  MulOptionsBuilder &operator=(const MulOptionsBuilder &);
+  flatbuffers::Offset<MulOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MulOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MulOptions> CreateMulOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  MulOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MulOptions> CreateMulOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct L2NormOptionsT : public flatbuffers::NativeTable {
+  typedef L2NormOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  L2NormOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef L2NormOptionsT NativeTableType;
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  L2NormOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      L2NormOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<L2NormOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct L2NormOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
+  flatbuffers::Offset<L2NormOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<L2NormOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  L2NormOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LocalResponseNormalizationOptionsT : public flatbuffers::NativeTable {
+  typedef LocalResponseNormalizationOptions TableType;
+  int32_t radius;
+  float bias;
+  float alpha;
+  float beta;
+  LocalResponseNormalizationOptionsT()
+      : radius(0), bias(0.0f), alpha(0.0f), beta(0.0f) {}
+};
+
+struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef LocalResponseNormalizationOptionsT NativeTableType;
+  enum { VT_RADIUS = 4, VT_BIAS = 6, VT_ALPHA = 8, VT_BETA = 10 };
+  int32_t radius() const { return GetField<int32_t>(VT_RADIUS, 0); }
+  float bias() const { return GetField<float>(VT_BIAS, 0.0f); }
+  float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
+  float beta() const { return GetField<float>(VT_BETA, 0.0f); }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_RADIUS) &&
+           VerifyField<float>(verifier, VT_BIAS) &&
+           VerifyField<float>(verifier, VT_ALPHA) &&
+           VerifyField<float>(verifier, VT_BETA) && verifier.EndTable();
+  }
+  LocalResponseNormalizationOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      LocalResponseNormalizationOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LocalResponseNormalizationOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb,
+      const LocalResponseNormalizationOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LocalResponseNormalizationOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_radius(int32_t radius) {
+    fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS,
+                             radius, 0);
+  }
+  void add_bias(float bias) {
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias,
+                           0.0f);
+  }
+  void add_alpha(float alpha) {
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha,
+                           0.0f);
+  }
+  void add_beta(float beta) {
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta,
+                           0.0f);
+  }
+  explicit LocalResponseNormalizationOptionsBuilder(
+      flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  LocalResponseNormalizationOptionsBuilder &operator=(
+      const LocalResponseNormalizationOptionsBuilder &);
+  flatbuffers::Offset<LocalResponseNormalizationOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                        int32_t radius = 0, float bias = 0.0f,
+                                        float alpha = 0.0f, float beta = 0.0f) {
+  LocalResponseNormalizationOptionsBuilder builder_(_fbb);
+  builder_.add_beta(beta);
+  builder_.add_alpha(alpha);
+  builder_.add_bias(bias);
+  builder_.add_radius(radius);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LocalResponseNormalizationOptions>
+CreateLocalResponseNormalizationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const LocalResponseNormalizationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LSTMOptionsT : public flatbuffers::NativeTable {
+  typedef LSTMOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  float cell_clip;
+  float proj_clip;
+  LSTMOptionsT()
+      : fused_activation_function(ActivationFunctionType_NONE),
+        cell_clip(0.0f),
+        proj_clip(0.0f) {}
+};
+
+struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LSTMOptionsT NativeTableType;
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4, VT_CELL_CLIP = 6, VT_PROJ_CLIP = 8 };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+  float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP) && verifier.EndTable();
+  }
+  LSTMOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      LSTMOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LSTMOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LSTMOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip) {
+    fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip) {
+    fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
+  flatbuffers::Offset<LSTMOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE,
+    float cell_clip = 0.0f, float proj_clip = 0.0f) {
+  LSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ResizeBilinearOptionsT : public flatbuffers::NativeTable {
+  typedef ResizeBilinearOptions TableType;
+  int32_t new_height;
+  int32_t new_width;
+  ResizeBilinearOptionsT() : new_height(0), new_width(0) {}
+};
+
+struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef ResizeBilinearOptionsT NativeTableType;
+  enum { VT_NEW_HEIGHT = 4, VT_NEW_WIDTH = 6 };
+  int32_t new_height() const { return GetField<int32_t>(VT_NEW_HEIGHT, 0); }
+  int32_t new_width() const { return GetField<int32_t>(VT_NEW_WIDTH, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_NEW_HEIGHT) &&
+           VerifyField<int32_t>(verifier, VT_NEW_WIDTH) && verifier.EndTable();
+  }
+  ResizeBilinearOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      ResizeBilinearOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ResizeBilinearOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ResizeBilinearOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_new_height(int32_t new_height) {
+    fbb_.AddElement<int32_t>(ResizeBilinearOptions::VT_NEW_HEIGHT, new_height,
+                             0);
+  }
+  void add_new_width(int32_t new_width) {
+    fbb_.AddElement<int32_t>(ResizeBilinearOptions::VT_NEW_WIDTH, new_width, 0);
+  }
+  explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
+  flatbuffers::Offset<ResizeBilinearOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ResizeBilinearOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t new_height = 0,
+    int32_t new_width = 0) {
+  ResizeBilinearOptionsBuilder builder_(_fbb);
+  builder_.add_new_width(new_width);
+  builder_.add_new_height(new_height);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CallOptionsT : public flatbuffers::NativeTable {
+  typedef CallOptions TableType;
+  uint32_t subgraph;
+  CallOptionsT() : subgraph(0) {}
+};
+
+struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef CallOptionsT NativeTableType;
+  enum { VT_SUBGRAPH = 4 };
+  uint32_t subgraph() const { return GetField<uint32_t>(VT_SUBGRAPH, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint32_t>(verifier, VT_SUBGRAPH) && verifier.EndTable();
+  }
+  CallOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      CallOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CallOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CallOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_subgraph(uint32_t subgraph) {
+    fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0);
+  }
+  explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  CallOptionsBuilder &operator=(const CallOptionsBuilder &);
+  flatbuffers::Offset<CallOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CallOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CallOptions> CreateCallOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, uint32_t subgraph = 0) {
+  CallOptionsBuilder builder_(_fbb);
+  builder_.add_subgraph(subgraph);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CallOptions> CreateCallOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReshapeOptionsT : public flatbuffers::NativeTable {
+  typedef ReshapeOptions TableType;
+  std::vector<int32_t> new_shape;
+  ReshapeOptionsT() {}
+};
+
+struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ReshapeOptionsT NativeTableType;
+  enum { VT_NEW_SHAPE = 4 };
+  const flatbuffers::Vector<int32_t> *new_shape() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NEW_SHAPE) &&
+           verifier.Verify(new_shape()) && verifier.EndTable();
+  }
+  ReshapeOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      ReshapeOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReshapeOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReshapeOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_new_shape(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape) {
+    fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape);
+  }
+  explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
+  flatbuffers::Offset<ReshapeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReshapeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape = 0) {
+  ReshapeOptionsBuilder builder_(_fbb);
+  builder_.add_new_shape(new_shape);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *new_shape = nullptr) {
+  return tflite::CreateReshapeOptions(
+      _fbb, new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
+}
+
+flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SkipGramOptionsT : public flatbuffers::NativeTable {
+  typedef SkipGramOptions TableType;
+  int32_t ngram_size;
+  int32_t max_skip_size;
+  bool include_all_ngrams;
+  SkipGramOptionsT()
+      : ngram_size(0), max_skip_size(0), include_all_ngrams(false) {}
+};
+
+struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SkipGramOptionsT NativeTableType;
+  enum { VT_NGRAM_SIZE = 4, VT_MAX_SKIP_SIZE = 6, VT_INCLUDE_ALL_NGRAMS = 8 };
+  int32_t ngram_size() const { return GetField<int32_t>(VT_NGRAM_SIZE, 0); }
+  int32_t max_skip_size() const {
+    return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0);
+  }
+  bool include_all_ngrams() const {
+    return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) &&
+           VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) &&
+           VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) &&
+           verifier.EndTable();
+  }
+  SkipGramOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SkipGramOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SkipGramOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SkipGramOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_ngram_size(int32_t ngram_size) {
+    fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
+  }
+  void add_max_skip_size(int32_t max_skip_size) {
+    fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size,
+                             0);
+  }
+  void add_include_all_ngrams(bool include_all_ngrams) {
+    fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS,
+                             static_cast<uint8_t>(include_all_ngrams), 0);
+  }
+  explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
+  flatbuffers::Offset<SkipGramOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SkipGramOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = 0,
+    int32_t max_skip_size = 0, bool include_all_ngrams = false) {
+  SkipGramOptionsBuilder builder_(_fbb);
+  builder_.add_max_skip_size(max_skip_size);
+  builder_.add_ngram_size(ngram_size);
+  builder_.add_include_all_ngrams(include_all_ngrams);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SpaceToDepthOptionsT : public flatbuffers::NativeTable {
+  typedef SpaceToDepthOptions TableType;
+  int32_t block_size;
+  SpaceToDepthOptionsT() : block_size(0) {}
+};
+
+struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef SpaceToDepthOptionsT NativeTableType;
+  enum { VT_BLOCK_SIZE = 4 };
+  int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) && verifier.EndTable();
+  }
+  SpaceToDepthOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SpaceToDepthOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SpaceToDepthOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SpaceToDepthOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_block_size(int32_t block_size) {
+    fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0);
+  }
+  explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
+  flatbuffers::Offset<SpaceToDepthOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SpaceToDepthOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0) {
+  SpaceToDepthOptionsBuilder builder_(_fbb);
+  builder_.add_block_size(block_size);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct EmbeddingLookupSparseOptionsT : public flatbuffers::NativeTable {
+  typedef EmbeddingLookupSparseOptions TableType;
+  CombinerType combiner;
+  EmbeddingLookupSparseOptionsT() : combiner(CombinerType_SUM) {}
+};
+
+struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef EmbeddingLookupSparseOptionsT NativeTableType;
+  enum { VT_COMBINER = 4 };
+  CombinerType combiner() const {
+    return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_COMBINER) && verifier.EndTable();
+  }
+  EmbeddingLookupSparseOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      EmbeddingLookupSparseOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<EmbeddingLookupSparseOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb,
+      const EmbeddingLookupSparseOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct EmbeddingLookupSparseOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_combiner(CombinerType combiner) {
+    fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
+                            static_cast<int8_t>(combiner), 0);
+  }
+  explicit EmbeddingLookupSparseOptionsBuilder(
+      flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  EmbeddingLookupSparseOptionsBuilder &operator=(
+      const EmbeddingLookupSparseOptionsBuilder &);
+  flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                   CombinerType combiner = CombinerType_SUM) {
+  EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
+  builder_.add_combiner(combiner);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const EmbeddingLookupSparseOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct OperatorCodeT : public flatbuffers::NativeTable {
+  typedef OperatorCode TableType;
+  BuiltinOperator builtin_code;
+  std::string custom_code;
+  OperatorCodeT() : builtin_code(BuiltinOperator_ADD) {}
+};
+
+struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef OperatorCodeT NativeTableType;
+  enum { VT_BUILTIN_CODE = 4, VT_CUSTOM_CODE = 6 };
+  BuiltinOperator builtin_code() const {
+    return static_cast<BuiltinOperator>(GetField<int8_t>(VT_BUILTIN_CODE, 0));
+  }
+  const flatbuffers::String *custom_code() const {
+    return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_BUILTIN_CODE) &&
+           VerifyOffset(verifier, VT_CUSTOM_CODE) &&
+           verifier.Verify(custom_code()) && verifier.EndTable();
+  }
+  OperatorCodeT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      OperatorCodeT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<OperatorCode> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct OperatorCodeBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_builtin_code(BuiltinOperator builtin_code) {
+    fbb_.AddElement<int8_t>(OperatorCode::VT_BUILTIN_CODE,
+                            static_cast<int8_t>(builtin_code), 0);
+  }
+  void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code) {
+    fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
+  }
+  explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
+  flatbuffers::Offset<OperatorCode> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<OperatorCode>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<OperatorCode> CreateOperatorCode(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    BuiltinOperator builtin_code = BuiltinOperator_ADD,
+    flatbuffers::Offset<flatbuffers::String> custom_code = 0) {
+  OperatorCodeBuilder builder_(_fbb);
+  builder_.add_custom_code(custom_code);
+  builder_.add_builtin_code(builtin_code);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<OperatorCode> CreateOperatorCodeDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    BuiltinOperator builtin_code = BuiltinOperator_ADD,
+    const char *custom_code = nullptr) {
+  return tflite::CreateOperatorCode(
+      _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0);
+}
+
+flatbuffers::Offset<OperatorCode> CreateOperatorCode(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct OperatorT : public flatbuffers::NativeTable {
+  typedef Operator TableType;
+  uint32_t opcode_index;
+  std::vector<int32_t> inputs;
+  std::vector<int32_t> outputs;
+  BuiltinOptionsUnion builtin_options;
+  std::vector<uint8_t> custom_options;
+  CustomOptionsFormat custom_options_format;
+  OperatorT()
+      : opcode_index(0),
+        custom_options_format(CustomOptionsFormat_FLEXBUFFERS) {}
+};
+
+struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef OperatorT NativeTableType;
+  enum {
+    VT_OPCODE_INDEX = 4,
+    VT_INPUTS = 6,
+    VT_OUTPUTS = 8,
+    VT_BUILTIN_OPTIONS_TYPE = 10,
+    VT_BUILTIN_OPTIONS = 12,
+    VT_CUSTOM_OPTIONS = 14,
+    VT_CUSTOM_OPTIONS_FORMAT = 16
+  };
+  uint32_t opcode_index() const {
+    return GetField<uint32_t>(VT_OPCODE_INDEX, 0);
+  }
+  const flatbuffers::Vector<int32_t> *inputs() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
+  }
+  const flatbuffers::Vector<int32_t> *outputs() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
+  }
+  BuiltinOptions builtin_options_type() const {
+    return static_cast<BuiltinOptions>(
+        GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
+  }
+  const void *builtin_options() const {
+    return GetPointer<const void *>(VT_BUILTIN_OPTIONS);
+  }
+  template <typename T>
+  const T *builtin_options_as() const;
+  const Conv2DOptions *builtin_options_as_Conv2DOptions() const {
+    return builtin_options_type() == BuiltinOptions_Conv2DOptions
+               ? static_cast<const Conv2DOptions *>(builtin_options())
+               : nullptr;
+  }
+  const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
+               ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
+               : nullptr;
+  }
+  const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
+               ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
+               : nullptr;
+  }
+  const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const {
+    return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
+               ? static_cast<const LSHProjectionOptions *>(builtin_options())
+               : nullptr;
+  }
+  const Pool2DOptions *builtin_options_as_Pool2DOptions() const {
+    return builtin_options_type() == BuiltinOptions_Pool2DOptions
+               ? static_cast<const Pool2DOptions *>(builtin_options())
+               : nullptr;
+  }
+  const SVDFOptions *builtin_options_as_SVDFOptions() const {
+    return builtin_options_type() == BuiltinOptions_SVDFOptions
+               ? static_cast<const SVDFOptions *>(builtin_options())
+               : nullptr;
+  }
+  const RNNOptions *builtin_options_as_RNNOptions() const {
+    return builtin_options_type() == BuiltinOptions_RNNOptions
+               ? static_cast<const RNNOptions *>(builtin_options())
+               : nullptr;
+  }
+  const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
+               ? static_cast<const FullyConnectedOptions *>(builtin_options())
+               : nullptr;
+  }
+  const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const {
+    return builtin_options_type() == BuiltinOptions_SoftmaxOptions
+               ? static_cast<const SoftmaxOptions *>(builtin_options())
+               : nullptr;
+  }
+  const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const {
+    return builtin_options_type() == BuiltinOptions_ConcatenationOptions
+               ? static_cast<const ConcatenationOptions *>(builtin_options())
+               : nullptr;
+  }
+  const AddOptions *builtin_options_as_AddOptions() const {
+    return builtin_options_type() == BuiltinOptions_AddOptions
+               ? static_cast<const AddOptions *>(builtin_options())
+               : nullptr;
+  }
+  const L2NormOptions *builtin_options_as_L2NormOptions() const {
+    return builtin_options_type() == BuiltinOptions_L2NormOptions
+               ? static_cast<const L2NormOptions *>(builtin_options())
+               : nullptr;
+  }
+  const LocalResponseNormalizationOptions *
+  builtin_options_as_LocalResponseNormalizationOptions() const {
+    return builtin_options_type() ==
+                   BuiltinOptions_LocalResponseNormalizationOptions
+               ? static_cast<const LocalResponseNormalizationOptions *>(
+                     builtin_options())
+               : nullptr;
+  }
+  const LSTMOptions *builtin_options_as_LSTMOptions() const {
+    return builtin_options_type() == BuiltinOptions_LSTMOptions
+               ? static_cast<const LSTMOptions *>(builtin_options())
+               : nullptr;
+  }
+  const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
+               ? static_cast<const ResizeBilinearOptions *>(builtin_options())
+               : nullptr;
+  }
+  const CallOptions *builtin_options_as_CallOptions() const {
+    return builtin_options_type() == BuiltinOptions_CallOptions
+               ? static_cast<const CallOptions *>(builtin_options())
+               : nullptr;
+  }
+  const ReshapeOptions *builtin_options_as_ReshapeOptions() const {
+    return builtin_options_type() == BuiltinOptions_ReshapeOptions
+               ? static_cast<const ReshapeOptions *>(builtin_options())
+               : nullptr;
+  }
+  const SkipGramOptions *builtin_options_as_SkipGramOptions() const {
+    return builtin_options_type() == BuiltinOptions_SkipGramOptions
+               ? static_cast<const SkipGramOptions *>(builtin_options())
+               : nullptr;
+  }
+  const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const {
+    return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
+               ? static_cast<const SpaceToDepthOptions *>(builtin_options())
+               : nullptr;
+  }
+  const EmbeddingLookupSparseOptions *
+  builtin_options_as_EmbeddingLookupSparseOptions() const {
+    return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
+               ? static_cast<const EmbeddingLookupSparseOptions *>(
+                     builtin_options())
+               : nullptr;
+  }
+  const MulOptions *builtin_options_as_MulOptions() const {
+    return builtin_options_type() == BuiltinOptions_MulOptions
+               ? static_cast<const MulOptions *>(builtin_options())
+               : nullptr;
+  }
+  const flatbuffers::Vector<uint8_t> *custom_options() const {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
+  }
+  CustomOptionsFormat custom_options_format() const {
+    return static_cast<CustomOptionsFormat>(
+        GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) &&
+           VerifyOffset(verifier, VT_INPUTS) && verifier.Verify(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.Verify(outputs()) &&
+           VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) &&
+           VerifyOffset(verifier, VT_BUILTIN_OPTIONS) &&
+           VerifyBuiltinOptions(verifier, builtin_options(),
+                                builtin_options_type()) &&
+           VerifyOffset(verifier, VT_CUSTOM_OPTIONS) &&
+           verifier.Verify(custom_options()) &&
+           VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) &&
+           verifier.EndTable();
+  }
+  OperatorT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      OperatorT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Operator> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+template <>
+inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>()
+    const {
+  return builtin_options_as_Conv2DOptions();
+}
+
+template <>
+inline const DepthwiseConv2DOptions *
+Operator::builtin_options_as<DepthwiseConv2DOptions>() const {
+  return builtin_options_as_DepthwiseConv2DOptions();
+}
+
+template <>
+inline const ConcatEmbeddingsOptions *
+Operator::builtin_options_as<ConcatEmbeddingsOptions>() const {
+  return builtin_options_as_ConcatEmbeddingsOptions();
+}
+
+template <>
+inline const LSHProjectionOptions *
+Operator::builtin_options_as<LSHProjectionOptions>() const {
+  return builtin_options_as_LSHProjectionOptions();
+}
+
+template <>
+inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>()
+    const {
+  return builtin_options_as_Pool2DOptions();
+}
+
+template <>
+inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const {
+  return builtin_options_as_SVDFOptions();
+}
+
+template <>
+inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const {
+  return builtin_options_as_RNNOptions();
+}
+
+template <>
+inline const FullyConnectedOptions *
+Operator::builtin_options_as<FullyConnectedOptions>() const {
+  return builtin_options_as_FullyConnectedOptions();
+}
+
+template <>
+inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>()
+    const {
+  return builtin_options_as_SoftmaxOptions();
+}
+
+template <>
+inline const ConcatenationOptions *
+Operator::builtin_options_as<ConcatenationOptions>() const {
+  return builtin_options_as_ConcatenationOptions();
+}
+
+template <>
+inline const AddOptions *Operator::builtin_options_as<AddOptions>() const {
+  return builtin_options_as_AddOptions();
+}
+
+template <>
+inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>()
+    const {
+  return builtin_options_as_L2NormOptions();
+}
+
+template <>
+inline const LocalResponseNormalizationOptions *
+Operator::builtin_options_as<LocalResponseNormalizationOptions>() const {
+  return builtin_options_as_LocalResponseNormalizationOptions();
+}
+
+template <>
+inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const {
+  return builtin_options_as_LSTMOptions();
+}
+
+template <>
+inline const ResizeBilinearOptions *
+Operator::builtin_options_as<ResizeBilinearOptions>() const {
+  return builtin_options_as_ResizeBilinearOptions();
+}
+
+template <>
+inline const CallOptions *Operator::builtin_options_as<CallOptions>() const {
+  return builtin_options_as_CallOptions();
+}
+
+template <>
+inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>()
+    const {
+  return builtin_options_as_ReshapeOptions();
+}
+
+template <>
+inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>()
+    const {
+  return builtin_options_as_SkipGramOptions();
+}
+
+template <>
+inline const SpaceToDepthOptions *
+Operator::builtin_options_as<SpaceToDepthOptions>() const {
+  return builtin_options_as_SpaceToDepthOptions();
+}
+
+template <>
+inline const EmbeddingLookupSparseOptions *
+Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const {
+  return builtin_options_as_EmbeddingLookupSparseOptions();
+}
+
+template <>
+inline const MulOptions *Operator::builtin_options_as<MulOptions>() const {
+  return builtin_options_as_MulOptions();
+}
+
+struct OperatorBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_opcode_index(uint32_t opcode_index) {
+    fbb_.AddElement<uint32_t>(Operator::VT_OPCODE_INDEX, opcode_index, 0);
+  }
+  void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) {
+    fbb_.AddOffset(Operator::VT_INPUTS, inputs);
+  }
+  void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) {
+    fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
+  }
+  void add_builtin_options_type(BuiltinOptions builtin_options_type) {
+    fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
+                             static_cast<uint8_t>(builtin_options_type), 0);
+  }
+  void add_builtin_options(flatbuffers::Offset<void> builtin_options) {
+    fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options);
+  }
+  void add_custom_options(
+      flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options) {
+    fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
+  }
+  void add_custom_options_format(CustomOptionsFormat custom_options_format) {
+    fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
+                            static_cast<int8_t>(custom_options_format), 0);
+  }
+  explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  OperatorBuilder &operator=(const OperatorBuilder &);
+  flatbuffers::Offset<Operator> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Operator>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Operator> CreateOperator(
+    flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+    BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
+    flatbuffers::Offset<void> builtin_options = 0,
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+    CustomOptionsFormat custom_options_format =
+        CustomOptionsFormat_FLEXBUFFERS) {
+  OperatorBuilder builder_(_fbb);
+  builder_.add_custom_options(custom_options);
+  builder_.add_builtin_options(builtin_options);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  builder_.add_opcode_index(opcode_index);
+  builder_.add_custom_options_format(custom_options_format);
+  builder_.add_builtin_options_type(builtin_options_type);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Operator> CreateOperatorDirect(
+    flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+    const std::vector<int32_t> *inputs = nullptr,
+    const std::vector<int32_t> *outputs = nullptr,
+    BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
+    flatbuffers::Offset<void> builtin_options = 0,
+    const std::vector<uint8_t> *custom_options = nullptr,
+    CustomOptionsFormat custom_options_format =
+        CustomOptionsFormat_FLEXBUFFERS) {
+  return tflite::CreateOperator(
+      _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+      outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type,
+      builtin_options,
+      custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0,
+      custom_options_format);
+}
+
+flatbuffers::Offset<Operator> CreateOperator(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SubGraphT : public flatbuffers::NativeTable {
+  typedef SubGraph TableType;
+  std::vector<std::unique_ptr<TensorT>> tensors;
+  std::vector<int32_t> inputs;
+  std::vector<int32_t> outputs;
+  std::vector<std::unique_ptr<OperatorT>> operators;
+  std::string name;
+  SubGraphT() {}
+};
+
+struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SubGraphT NativeTableType;
+  enum {
+    VT_TENSORS = 4,
+    VT_INPUTS = 6,
+    VT_OUTPUTS = 8,
+    VT_OPERATORS = 10,
+    VT_NAME = 12
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(
+        VT_TENSORS);
+  }
+  const flatbuffers::Vector<int32_t> *inputs() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
+  }
+  const flatbuffers::Vector<int32_t> *outputs() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const {
+    return GetPointer<
+        const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(
+        VT_OPERATORS);
+  }
+  const flatbuffers::String *name() const {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TENSORS) &&
+           verifier.Verify(tensors()) &&
+           verifier.VerifyVectorOfTables(tensors()) &&
+           VerifyOffset(verifier, VT_INPUTS) && verifier.Verify(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.Verify(outputs()) &&
+           VerifyOffset(verifier, VT_OPERATORS) &&
+           verifier.Verify(operators()) &&
+           verifier.VerifyVectorOfTables(operators()) &&
+           VerifyOffset(verifier, VT_NAME) && verifier.Verify(name()) &&
+           verifier.EndTable();
+  }
+  SubGraphT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SubGraphT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SubGraph> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SubGraphBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_tensors(
+      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>>
+          tensors) {
+    fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
+  }
+  void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) {
+    fbb_.AddOffset(SubGraph::VT_INPUTS, inputs);
+  }
+  void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) {
+    fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
+  }
+  void add_operators(
+      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>>
+          operators) {
+    fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
+  }
+  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
+    fbb_.AddOffset(SubGraph::VT_NAME, name);
+  }
+  explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  SubGraphBuilder &operator=(const SubGraphBuilder &);
+  flatbuffers::Offset<SubGraph> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SubGraph>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SubGraph> CreateSubGraph(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>>
+        tensors = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>>
+        operators = 0,
+    flatbuffers::Offset<flatbuffers::String> name = 0) {
+  SubGraphBuilder builder_(_fbb);
+  builder_.add_name(name);
+  builder_.add_operators(operators);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  builder_.add_tensors(tensors);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
+    const std::vector<int32_t> *inputs = nullptr,
+    const std::vector<int32_t> *outputs = nullptr,
+    const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr,
+    const char *name = nullptr) {
+  return tflite::CreateSubGraph(
+      _fbb,
+      tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
+      inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+      outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
+      operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators)
+                : 0,
+      name ? _fbb.CreateString(name) : 0);
+}
+
+flatbuffers::Offset<SubGraph> CreateSubGraph(
+    flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BufferT : public flatbuffers::NativeTable {
+  typedef Buffer TableType;
+  std::vector<uint8_t> data;
+  BufferT() {}
+};
+
+struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef BufferT NativeTableType;
+  enum { VT_DATA = 4 };
+  const flatbuffers::Vector<uint8_t> *data() const {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA) &&
+           verifier.Verify(data()) && verifier.EndTable();
+  }
+  BufferT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver =
+                                 nullptr) const;
+  static flatbuffers::Offset<Buffer> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BufferBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data) {
+    fbb_.AddOffset(Buffer::VT_DATA, data);
+  }
+  explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  BufferBuilder &operator=(const BufferBuilder &);
+  flatbuffers::Offset<Buffer> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Buffer>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Buffer> CreateBuffer(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data = 0) {
+  BufferBuilder builder_(_fbb);
+  builder_.add_data(data);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Buffer> CreateBufferDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<uint8_t> *data = nullptr) {
+  return tflite::CreateBuffer(_fbb,
+                              data ? _fbb.CreateVector<uint8_t>(*data) : 0);
+}
+
+flatbuffers::Offset<Buffer> CreateBuffer(
+    flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ModelT : public flatbuffers::NativeTable {
+  typedef Model TableType;
+  uint32_t version;
+  std::vector<std::unique_ptr<OperatorCodeT>> operator_codes;
+  std::vector<std::unique_ptr<SubGraphT>> subgraphs;
+  std::string description;
+  std::vector<std::unique_ptr<BufferT>> buffers;
+  ModelT() : version(0) {}
+};
+
+struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ModelT NativeTableType;
+  enum {
+    VT_VERSION = 4,
+    VT_OPERATOR_CODES = 6,
+    VT_SUBGRAPHS = 8,
+    VT_DESCRIPTION = 10,
+    VT_BUFFERS = 12
+  };
+  uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
+  const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes()
+      const {
+    return GetPointer<
+        const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
+        VT_OPERATOR_CODES);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const {
+    return GetPointer<
+        const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(
+        VT_SUBGRAPHS);
+  }
+  const flatbuffers::String *description() const {
+    return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(
+        VT_BUFFERS);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint32_t>(verifier, VT_VERSION) &&
+           VerifyOffset(verifier, VT_OPERATOR_CODES) &&
+           verifier.Verify(operator_codes()) &&
+           verifier.VerifyVectorOfTables(operator_codes()) &&
+           VerifyOffset(verifier, VT_SUBGRAPHS) &&
+           verifier.Verify(subgraphs()) &&
+           verifier.VerifyVectorOfTables(subgraphs()) &&
+           VerifyOffset(verifier, VT_DESCRIPTION) &&
+           verifier.Verify(description()) &&
+           VerifyOffset(verifier, VT_BUFFERS) && verifier.Verify(buffers()) &&
+           verifier.VerifyVectorOfTables(buffers()) && verifier.EndTable();
+  }
+  ModelT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver =
+                                nullptr) const;
+  static flatbuffers::Offset<Model> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ModelBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_version(uint32_t version) {
+    fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0);
+  }
+  void add_operator_codes(
+      flatbuffers::Offset<
+          flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>>
+          operator_codes) {
+    fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
+  }
+  void add_subgraphs(
+      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>>
+          subgraphs) {
+    fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
+  }
+  void add_description(flatbuffers::Offset<flatbuffers::String> description) {
+    fbb_.AddOffset(Model::VT_DESCRIPTION, description);
+  }
+  void add_buffers(
+      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>>
+          buffers) {
+    fbb_.AddOffset(Model::VT_BUFFERS, buffers);
+  }
+  explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ModelBuilder &operator=(const ModelBuilder &);
+  flatbuffers::Offset<Model> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Model>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Model> CreateModel(
+    flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>>
+        operator_codes = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>>
+        subgraphs = 0,
+    flatbuffers::Offset<flatbuffers::String> description = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>>
+        buffers = 0) {
+  ModelBuilder builder_(_fbb);
+  builder_.add_buffers(buffers);
+  builder_.add_description(description);
+  builder_.add_subgraphs(subgraphs);
+  builder_.add_operator_codes(operator_codes);
+  builder_.add_version(version);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Model> CreateModelDirect(
+    flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+    const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes =
+        nullptr,
+    const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr,
+    const char *description = nullptr,
+    const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr) {
+  return tflite::CreateModel(
+      _fbb, version,
+      operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(
+                           *operator_codes)
+                     : 0,
+      subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs)
+                : 0,
+      description ? _fbb.CreateString(description) : 0,
+      buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0);
+}
+
+flatbuffers::Offset<Model> CreateModel(
+    flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+inline QuantizationParametersT *QuantizationParameters::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new QuantizationParametersT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void QuantizationParameters::UnPackTo(
+    QuantizationParametersT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = min();
+    if (_e) {
+      _o->min.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->min[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = max();
+    if (_e) {
+      _o->max.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->max[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = scale();
+    if (_e) {
+      _o->scale.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->scale[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = zero_point();
+    if (_e) {
+      _o->zero_point.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->zero_point[_i] = _e->Get(_i);
+      }
+    }
+  };
+}
+
+inline flatbuffers::Offset<QuantizationParameters> QuantizationParameters::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateQuantizationParameters(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+    flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const QuantizationParametersT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _min = _o->min.size() ? _fbb.CreateVector(_o->min) : 0;
+  auto _max = _o->max.size() ? _fbb.CreateVector(_o->max) : 0;
+  auto _scale = _o->scale.size() ? _fbb.CreateVector(_o->scale) : 0;
+  auto _zero_point =
+      _o->zero_point.size() ? _fbb.CreateVector(_o->zero_point) : 0;
+  return tflite::CreateQuantizationParameters(_fbb, _min, _max, _scale,
+                                              _zero_point);
+}
+
+inline TensorT *Tensor::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new TensorT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void Tensor::UnPackTo(
+    TensorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = shape();
+    if (_e) {
+      _o->shape.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->shape[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = type();
+    _o->type = _e;
+  };
+  {
+    auto _e = buffer();
+    _o->buffer = _e;
+  };
+  {
+    auto _e = name();
+    if (_e) _o->name = _e->str();
+  };
+  {
+    auto _e = quantization();
+    if (_e)
+      _o->quantization =
+          std::unique_ptr<QuantizationParametersT>(_e->UnPack(_resolver));
+  };
+}
+
+inline flatbuffers::Offset<Tensor> Tensor::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateTensor(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Tensor> CreateTensor(
+    flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const TensorT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _shape = _o->shape.size() ? _fbb.CreateVector(_o->shape) : 0;
+  auto _type = _o->type;
+  auto _buffer = _o->buffer;
+  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
+  auto _quantization = _o->quantization
+                           ? CreateQuantizationParameters(
+                                 _fbb, _o->quantization.get(), _rehasher)
+                           : 0;
+  return tflite::CreateTensor(_fbb, _shape, _type, _buffer, _name,
+                              _quantization);
+}
+
+inline Conv2DOptionsT *Conv2DOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new Conv2DOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void Conv2DOptions::UnPackTo(
+    Conv2DOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  };
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  };
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<Conv2DOptions> Conv2DOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateConv2DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Conv2DOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateConv2DOptions(_fbb, _padding, _stride_w, _stride_h,
+                                     _fused_activation_function);
+}
+
+inline Pool2DOptionsT *Pool2DOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new Pool2DOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void Pool2DOptions::UnPackTo(
+    Pool2DOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  };
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  };
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  };
+  {
+    auto _e = filter_width();
+    _o->filter_width = _e;
+  };
+  {
+    auto _e = filter_height();
+    _o->filter_height = _e;
+  };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<Pool2DOptions> Pool2DOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreatePool2DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Pool2DOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _filter_width = _o->filter_width;
+  auto _filter_height = _o->filter_height;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreatePool2DOptions(_fbb, _padding, _stride_w, _stride_h,
+                                     _filter_width, _filter_height,
+                                     _fused_activation_function);
+}
+
+inline DepthwiseConv2DOptionsT *DepthwiseConv2DOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new DepthwiseConv2DOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void DepthwiseConv2DOptions::UnPackTo(
+    DepthwiseConv2DOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  };
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  };
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  };
+  {
+    auto _e = depth_multiplier();
+    _o->depth_multiplier = _e;
+  };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions> DepthwiseConv2DOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDepthwiseConv2DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const DepthwiseConv2DOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _depth_multiplier = _o->depth_multiplier;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateDepthwiseConv2DOptions(_fbb, _padding, _stride_w,
+                                              _stride_h, _depth_multiplier,
+                                              _fused_activation_function);
+}
+
+inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new ConcatEmbeddingsOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void ConcatEmbeddingsOptions::UnPackTo(
+    ConcatEmbeddingsOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = num_channels();
+    _o->num_channels = _e;
+  };
+  {
+    auto _e = num_columns_per_channel();
+    if (_e) {
+      _o->num_columns_per_channel.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->num_columns_per_channel[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = embedding_dim_per_channel();
+    if (_e) {
+      _o->embedding_dim_per_channel.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->embedding_dim_per_channel[_i] = _e->Get(_i);
+      }
+    }
+  };
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+ConcatEmbeddingsOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateConcatEmbeddingsOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ConcatEmbeddingsOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _num_channels = _o->num_channels;
+  auto _num_columns_per_channel =
+      _o->num_columns_per_channel.size()
+          ? _fbb.CreateVector(_o->num_columns_per_channel)
+          : 0;
+  auto _embedding_dim_per_channel =
+      _o->embedding_dim_per_channel.size()
+          ? _fbb.CreateVector(_o->embedding_dim_per_channel)
+          : 0;
+  return tflite::CreateConcatEmbeddingsOptions(_fbb, _num_channels,
+                                               _num_columns_per_channel,
+                                               _embedding_dim_per_channel);
+}
+
+inline LSHProjectionOptionsT *LSHProjectionOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new LSHProjectionOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void LSHProjectionOptions::UnPackTo(
+    LSHProjectionOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = type();
+    _o->type = _e;
+  };
+}
+
+inline flatbuffers::Offset<LSHProjectionOptions> LSHProjectionOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLSHProjectionOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LSHProjectionOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _type = _o->type;
+  return tflite::CreateLSHProjectionOptions(_fbb, _type);
+}
+
+inline SVDFOptionsT *SVDFOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SVDFOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void SVDFOptions::UnPackTo(
+    SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = rank();
+    _o->rank = _e;
+  };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<SVDFOptions> SVDFOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSVDFOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SVDFOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _rank = _o->rank;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateSVDFOptions(_fbb, _rank, _fused_activation_function);
+}
+
+inline RNNOptionsT *RNNOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new RNNOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void RNNOptions::UnPackTo(
+    RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<RNNOptions> RNNOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateRNNOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const RNNOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateRNNOptions(_fbb, _fused_activation_function);
+}
+
+inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new FullyConnectedOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void FullyConnectedOptions::UnPackTo(
+    FullyConnectedOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<FullyConnectedOptions> FullyConnectedOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateFullyConnectedOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const FullyConnectedOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateFullyConnectedOptions(_fbb, _fused_activation_function);
+}
+
+inline SoftmaxOptionsT *SoftmaxOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SoftmaxOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void SoftmaxOptions::UnPackTo(
+    SoftmaxOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = beta();
+    _o->beta = _e;
+  };
+}
+
+inline flatbuffers::Offset<SoftmaxOptions> SoftmaxOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSoftmaxOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SoftmaxOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _beta = _o->beta;
+  return tflite::CreateSoftmaxOptions(_fbb, _beta);
+}
+
+inline ConcatenationOptionsT *ConcatenationOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new ConcatenationOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void ConcatenationOptions::UnPackTo(
+    ConcatenationOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = axis();
+    _o->axis = _e;
+  };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<ConcatenationOptions> ConcatenationOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateConcatenationOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ConcatenationOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _axis = _o->axis;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateConcatenationOptions(_fbb, _axis,
+                                            _fused_activation_function);
+}
+
+inline AddOptionsT *AddOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new AddOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void AddOptions::UnPackTo(
+    AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<AddOptions> AddOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateAddOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<AddOptions> CreateAddOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const AddOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateAddOptions(_fbb, _fused_activation_function);
+}
+
+inline MulOptionsT *MulOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new MulOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void MulOptions::UnPackTo(
+    MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<MulOptions> MulOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateMulOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MulOptions> CreateMulOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const MulOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateMulOptions(_fbb, _fused_activation_function);
+}
+
+inline L2NormOptionsT *L2NormOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new L2NormOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void L2NormOptions::UnPackTo(
+    L2NormOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<L2NormOptions> L2NormOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateL2NormOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const L2NormOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateL2NormOptions(_fbb, _fused_activation_function);
+}
+
+inline LocalResponseNormalizationOptionsT *
+LocalResponseNormalizationOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new LocalResponseNormalizationOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void LocalResponseNormalizationOptions::UnPackTo(
+    LocalResponseNormalizationOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = radius();
+    _o->radius = _e;
+  };
+  {
+    auto _e = bias();
+    _o->bias = _e;
+  };
+  {
+    auto _e = alpha();
+    _o->alpha = _e;
+  };
+  {
+    auto _e = beta();
+    _o->beta = _e;
+  };
+}
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+LocalResponseNormalizationOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const LocalResponseNormalizationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLocalResponseNormalizationOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+CreateLocalResponseNormalizationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const LocalResponseNormalizationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LocalResponseNormalizationOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _radius = _o->radius;
+  auto _bias = _o->bias;
+  auto _alpha = _o->alpha;
+  auto _beta = _o->beta;
+  return tflite::CreateLocalResponseNormalizationOptions(_fbb, _radius, _bias,
+                                                         _alpha, _beta);
+}
+
+inline LSTMOptionsT *LSTMOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new LSTMOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void LSTMOptions::UnPackTo(
+    LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+  {
+    auto _e = cell_clip();
+    _o->cell_clip = _e;
+  };
+  {
+    auto _e = proj_clip();
+    _o->proj_clip = _e;
+  };
+}
+
+inline flatbuffers::Offset<LSTMOptions> LSTMOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LSTMOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  return tflite::CreateLSTMOptions(_fbb, _fused_activation_function, _cell_clip,
+                                   _proj_clip);
+}
+
+inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new ResizeBilinearOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void ResizeBilinearOptions::UnPackTo(
+    ResizeBilinearOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = new_height();
+    _o->new_height = _e;
+  };
+  {
+    auto _e = new_width();
+    _o->new_width = _e;
+  };
+}
+
+inline flatbuffers::Offset<ResizeBilinearOptions> ResizeBilinearOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateResizeBilinearOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ResizeBilinearOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _new_height = _o->new_height;
+  auto _new_width = _o->new_width;
+  return tflite::CreateResizeBilinearOptions(_fbb, _new_height, _new_width);
+}
+
+inline CallOptionsT *CallOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new CallOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void CallOptions::UnPackTo(
+    CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = subgraph();
+    _o->subgraph = _e;
+  };
+}
+
+inline flatbuffers::Offset<CallOptions> CallOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateCallOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CallOptions> CreateCallOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const CallOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _subgraph = _o->subgraph;
+  return tflite::CreateCallOptions(_fbb, _subgraph);
+}
+
+inline ReshapeOptionsT *ReshapeOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new ReshapeOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void ReshapeOptions::UnPackTo(
+    ReshapeOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = new_shape();
+    if (_e) {
+      _o->new_shape.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->new_shape[_i] = _e->Get(_i);
+      }
+    }
+  };
+}
+
+inline flatbuffers::Offset<ReshapeOptions> ReshapeOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateReshapeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ReshapeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _new_shape = _o->new_shape.size() ? _fbb.CreateVector(_o->new_shape) : 0;
+  return tflite::CreateReshapeOptions(_fbb, _new_shape);
+}
+
+inline SkipGramOptionsT *SkipGramOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SkipGramOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void SkipGramOptions::UnPackTo(
+    SkipGramOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = ngram_size();
+    _o->ngram_size = _e;
+  };
+  {
+    auto _e = max_skip_size();
+    _o->max_skip_size = _e;
+  };
+  {
+    auto _e = include_all_ngrams();
+    _o->include_all_ngrams = _e;
+  };
+}
+
+inline flatbuffers::Offset<SkipGramOptions> SkipGramOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSkipGramOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SkipGramOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _ngram_size = _o->ngram_size;
+  auto _max_skip_size = _o->max_skip_size;
+  auto _include_all_ngrams = _o->include_all_ngrams;
+  return tflite::CreateSkipGramOptions(_fbb, _ngram_size, _max_skip_size,
+                                       _include_all_ngrams);
+}
+
+inline SpaceToDepthOptionsT *SpaceToDepthOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SpaceToDepthOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void SpaceToDepthOptions::UnPackTo(
+    SpaceToDepthOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = block_size();
+    _o->block_size = _e;
+  };
+}
+
+inline flatbuffers::Offset<SpaceToDepthOptions> SpaceToDepthOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSpaceToDepthOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SpaceToDepthOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _block_size = _o->block_size;
+  return tflite::CreateSpaceToDepthOptions(_fbb, _block_size);
+}
+
+inline EmbeddingLookupSparseOptionsT *EmbeddingLookupSparseOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new EmbeddingLookupSparseOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void EmbeddingLookupSparseOptions::UnPackTo(
+    EmbeddingLookupSparseOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = combiner();
+    _o->combiner = _e;
+  };
+}
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+EmbeddingLookupSparseOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const EmbeddingLookupSparseOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateEmbeddingLookupSparseOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const EmbeddingLookupSparseOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const EmbeddingLookupSparseOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _combiner = _o->combiner;
+  return tflite::CreateEmbeddingLookupSparseOptions(_fbb, _combiner);
+}
+
+inline OperatorCodeT *OperatorCode::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new OperatorCodeT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void OperatorCode::UnPackTo(
+    OperatorCodeT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = builtin_code();
+    _o->builtin_code = _e;
+  };
+  {
+    auto _e = custom_code();
+    if (_e) _o->custom_code = _e->str();
+  };
+}
+
+inline flatbuffers::Offset<OperatorCode> OperatorCode::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateOperatorCode(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<OperatorCode> CreateOperatorCode(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const OperatorCodeT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _builtin_code = _o->builtin_code;
+  auto _custom_code =
+      _o->custom_code.empty() ? 0 : _fbb.CreateString(_o->custom_code);
+  return tflite::CreateOperatorCode(_fbb, _builtin_code, _custom_code);
+}
+
+inline OperatorT *Operator::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new OperatorT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void Operator::UnPackTo(
+    OperatorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = opcode_index();
+    _o->opcode_index = _e;
+  };
+  {
+    auto _e = inputs();
+    if (_e) {
+      _o->inputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->inputs[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = outputs();
+    if (_e) {
+      _o->outputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->outputs[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = builtin_options_type();
+    _o->builtin_options.type = _e;
+  };
+  {
+    auto _e = builtin_options();
+    if (_e)
+      _o->builtin_options.value =
+          BuiltinOptionsUnion::UnPack(_e, builtin_options_type(), _resolver);
+  };
+  {
+    auto _e = custom_options();
+    if (_e) {
+      _o->custom_options.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->custom_options[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = custom_options_format();
+    _o->custom_options_format = _e;
+  };
+}
+
+inline flatbuffers::Offset<Operator> Operator::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateOperator(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Operator> CreateOperator(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const OperatorT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _opcode_index = _o->opcode_index;
+  auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0;
+  auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0;
+  auto _builtin_options_type = _o->builtin_options.type;
+  auto _builtin_options = _o->builtin_options.Pack(_fbb);
+  auto _custom_options =
+      _o->custom_options.size() ? _fbb.CreateVector(_o->custom_options) : 0;
+  auto _custom_options_format = _o->custom_options_format;
+  return tflite::CreateOperator(_fbb, _opcode_index, _inputs, _outputs,
+                                _builtin_options_type, _builtin_options,
+                                _custom_options, _custom_options_format);
+}
+
+inline SubGraphT *SubGraph::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SubGraphT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void SubGraph::UnPackTo(
+    SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = tensors();
+    if (_e) {
+      _o->tensors.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->tensors[_i] =
+            std::unique_ptr<TensorT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  };
+  {
+    auto _e = inputs();
+    if (_e) {
+      _o->inputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->inputs[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = outputs();
+    if (_e) {
+      _o->outputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->outputs[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = operators();
+    if (_e) {
+      _o->operators.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->operators[_i] =
+            std::unique_ptr<OperatorT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  };
+  {
+    auto _e = name();
+    if (_e) _o->name = _e->str();
+  };
+}
+
+inline flatbuffers::Offset<SubGraph> SubGraph::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSubGraph(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SubGraph> CreateSubGraph(
+    flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SubGraphT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _tensors =
+      _o->tensors.size()
+          ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(
+                _o->tensors.size(),
+                [](size_t i, _VectorArgs *__va) {
+                  return CreateTensor(*__va->__fbb, __va->__o->tensors[i].get(),
+                                      __va->__rehasher);
+                },
+                &_va)
+          : 0;
+  auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0;
+  auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0;
+  auto _operators = _o->operators.size()
+                        ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(
+                              _o->operators.size(),
+                              [](size_t i, _VectorArgs *__va) {
+                                return CreateOperator(
+                                    *__va->__fbb, __va->__o->operators[i].get(),
+                                    __va->__rehasher);
+                              },
+                              &_va)
+                        : 0;
+  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
+  return tflite::CreateSubGraph(_fbb, _tensors, _inputs, _outputs, _operators,
+                                _name);
+}
+
+inline BufferT *Buffer::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new BufferT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void Buffer::UnPackTo(
+    BufferT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = data();
+    if (_e) {
+      _o->data.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->data[_i] = _e->Get(_i);
+      }
+    }
+  };
+}
+
+inline flatbuffers::Offset<Buffer> Buffer::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBuffer(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Buffer> CreateBuffer(
+    flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BufferT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _data = _o->data.size() ? _fbb.CreateVector(_o->data) : 0;
+  return tflite::CreateBuffer(_fbb, _data);
+}
+
+inline ModelT *Model::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new ModelT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void Model::UnPackTo(
+    ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = version();
+    _o->version = _e;
+  };
+  {
+    auto _e = operator_codes();
+    if (_e) {
+      _o->operator_codes.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->operator_codes[_i] =
+            std::unique_ptr<OperatorCodeT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  };
+  {
+    auto _e = subgraphs();
+    if (_e) {
+      _o->subgraphs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->subgraphs[_i] =
+            std::unique_ptr<SubGraphT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  };
+  {
+    auto _e = description();
+    if (_e) _o->description = _e->str();
+  };
+  {
+    auto _e = buffers();
+    if (_e) {
+      _o->buffers.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->buffers[_i] =
+            std::unique_ptr<BufferT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  };
+}
+
+inline flatbuffers::Offset<Model> Model::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateModel(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Model> CreateModel(
+    flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ModelT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _version = _o->version;
+  auto _operator_codes =
+      _o->operator_codes.size()
+          ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(
+                _o->operator_codes.size(),
+                [](size_t i, _VectorArgs *__va) {
+                  return CreateOperatorCode(*__va->__fbb,
+                                            __va->__o->operator_codes[i].get(),
+                                            __va->__rehasher);
+                },
+                &_va)
+          : 0;
+  auto _subgraphs = _o->subgraphs.size()
+                        ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(
+                              _o->subgraphs.size(),
+                              [](size_t i, _VectorArgs *__va) {
+                                return CreateSubGraph(
+                                    *__va->__fbb, __va->__o->subgraphs[i].get(),
+                                    __va->__rehasher);
+                              },
+                              &_va)
+                        : 0;
+  auto _description =
+      _o->description.empty() ? 0 : _fbb.CreateString(_o->description);
+  auto _buffers =
+      _o->buffers.size()
+          ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(
+                _o->buffers.size(),
+                [](size_t i, _VectorArgs *__va) {
+                  return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(),
+                                      __va->__rehasher);
+                },
+                &_va)
+          : 0;
+  return tflite::CreateModel(_fbb, _version, _operator_codes, _subgraphs,
+                             _description, _buffers);
+}
+
+inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier,
+                                 const void *obj, BuiltinOptions type) {
+  switch (type) {
+    case BuiltinOptions_NONE: {
+      return true;
+    }
+    case BuiltinOptions_Conv2DOptions: {
+      auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions: {
+      auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions: {
+      auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LSHProjectionOptions: {
+      auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Pool2DOptions: {
+      auto ptr = reinterpret_cast<const Pool2DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SVDFOptions: {
+      auto ptr = reinterpret_cast<const SVDFOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RNNOptions: {
+      auto ptr = reinterpret_cast<const RNNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FullyConnectedOptions: {
+      auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SoftmaxOptions: {
+      auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ConcatenationOptions: {
+      auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AddOptions: {
+      auto ptr = reinterpret_cast<const AddOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_L2NormOptions: {
+      auto ptr = reinterpret_cast<const L2NormOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions: {
+      auto ptr =
+          reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LSTMOptions: {
+      auto ptr = reinterpret_cast<const LSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ResizeBilinearOptions: {
+      auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CallOptions: {
+      auto ptr = reinterpret_cast<const CallOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReshapeOptions: {
+      auto ptr = reinterpret_cast<const ReshapeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SkipGramOptions: {
+      auto ptr = reinterpret_cast<const SkipGramOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SpaceToDepthOptions: {
+      auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions: {
+      auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MulOptions: {
+      auto ptr = reinterpret_cast<const MulOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    default:
+      return false;
+  }
+}
+
+inline bool VerifyBuiltinOptionsVector(
+    flatbuffers::Verifier &verifier,
+    const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+    const flatbuffers::Vector<uint8_t> *types) {
+  if (values->size() != types->size()) return false;
+  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+    if (!VerifyBuiltinOptions(verifier, values->Get(i),
+                              types->GetEnum<BuiltinOptions>(i))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void *BuiltinOptionsUnion::UnPack(
+    const void *obj, BuiltinOptions type,
+    const flatbuffers::resolver_function_t *resolver) {
+  switch (type) {
+    case BuiltinOptions_Conv2DOptions: {
+      auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions: {
+      auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions: {
+      auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LSHProjectionOptions: {
+      auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_Pool2DOptions: {
+      auto ptr = reinterpret_cast<const Pool2DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SVDFOptions: {
+      auto ptr = reinterpret_cast<const SVDFOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_RNNOptions: {
+      auto ptr = reinterpret_cast<const RNNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FullyConnectedOptions: {
+      auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SoftmaxOptions: {
+      auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ConcatenationOptions: {
+      auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_AddOptions: {
+      auto ptr = reinterpret_cast<const AddOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_L2NormOptions: {
+      auto ptr = reinterpret_cast<const L2NormOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions: {
+      auto ptr =
+          reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LSTMOptions: {
+      auto ptr = reinterpret_cast<const LSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ResizeBilinearOptions: {
+      auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CallOptions: {
+      auto ptr = reinterpret_cast<const CallOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReshapeOptions: {
+      auto ptr = reinterpret_cast<const ReshapeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SkipGramOptions: {
+      auto ptr = reinterpret_cast<const SkipGramOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SpaceToDepthOptions: {
+      auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions: {
+      auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MulOptions: {
+      auto ptr = reinterpret_cast<const MulOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    default:
+      return nullptr;
+  }
+}
+
+inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const flatbuffers::rehasher_function_t *_rehasher) const {
+  switch (type) {
+    case BuiltinOptions_Conv2DOptions: {
+      auto ptr = reinterpret_cast<const Conv2DOptionsT *>(value);
+      return CreateConv2DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions: {
+      auto ptr = reinterpret_cast<const DepthwiseConv2DOptionsT *>(value);
+      return CreateDepthwiseConv2DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions: {
+      auto ptr = reinterpret_cast<const ConcatEmbeddingsOptionsT *>(value);
+      return CreateConcatEmbeddingsOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LSHProjectionOptions: {
+      auto ptr = reinterpret_cast<const LSHProjectionOptionsT *>(value);
+      return CreateLSHProjectionOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_Pool2DOptions: {
+      auto ptr = reinterpret_cast<const Pool2DOptionsT *>(value);
+      return CreatePool2DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SVDFOptions: {
+      auto ptr = reinterpret_cast<const SVDFOptionsT *>(value);
+      return CreateSVDFOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_RNNOptions: {
+      auto ptr = reinterpret_cast<const RNNOptionsT *>(value);
+      return CreateRNNOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FullyConnectedOptions: {
+      auto ptr = reinterpret_cast<const FullyConnectedOptionsT *>(value);
+      return CreateFullyConnectedOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SoftmaxOptions: {
+      auto ptr = reinterpret_cast<const SoftmaxOptionsT *>(value);
+      return CreateSoftmaxOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ConcatenationOptions: {
+      auto ptr = reinterpret_cast<const ConcatenationOptionsT *>(value);
+      return CreateConcatenationOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_AddOptions: {
+      auto ptr = reinterpret_cast<const AddOptionsT *>(value);
+      return CreateAddOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_L2NormOptions: {
+      auto ptr = reinterpret_cast<const L2NormOptionsT *>(value);
+      return CreateL2NormOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions: {
+      auto ptr =
+          reinterpret_cast<const LocalResponseNormalizationOptionsT *>(value);
+      return CreateLocalResponseNormalizationOptions(_fbb, ptr, _rehasher)
+          .Union();
+    }
+    case BuiltinOptions_LSTMOptions: {
+      auto ptr = reinterpret_cast<const LSTMOptionsT *>(value);
+      return CreateLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ResizeBilinearOptions: {
+      auto ptr = reinterpret_cast<const ResizeBilinearOptionsT *>(value);
+      return CreateResizeBilinearOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CallOptions: {
+      auto ptr = reinterpret_cast<const CallOptionsT *>(value);
+      return CreateCallOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReshapeOptions: {
+      auto ptr = reinterpret_cast<const ReshapeOptionsT *>(value);
+      return CreateReshapeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SkipGramOptions: {
+      auto ptr = reinterpret_cast<const SkipGramOptionsT *>(value);
+      return CreateSkipGramOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SpaceToDepthOptions: {
+      auto ptr = reinterpret_cast<const SpaceToDepthOptionsT *>(value);
+      return CreateSpaceToDepthOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions: {
+      auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptionsT *>(value);
+      return CreateEmbeddingLookupSparseOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MulOptions: {
+      auto ptr = reinterpret_cast<const MulOptionsT *>(value);
+      return CreateMulOptions(_fbb, ptr, _rehasher).Union();
+    }
+    default:
+      return 0;
+  }
+}
+
+inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u)
+    FLATBUFFERS_NOEXCEPT : type(u.type),
+                           value(nullptr) {
+  switch (type) {
+    case BuiltinOptions_Conv2DOptions: {
+      value = new Conv2DOptionsT(*reinterpret_cast<Conv2DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions: {
+      value = new DepthwiseConv2DOptionsT(
+          *reinterpret_cast<DepthwiseConv2DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions: {
+      value = new ConcatEmbeddingsOptionsT(
+          *reinterpret_cast<ConcatEmbeddingsOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LSHProjectionOptions: {
+      value = new LSHProjectionOptionsT(
+          *reinterpret_cast<LSHProjectionOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_Pool2DOptions: {
+      value = new Pool2DOptionsT(*reinterpret_cast<Pool2DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SVDFOptions: {
+      value = new SVDFOptionsT(*reinterpret_cast<SVDFOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_RNNOptions: {
+      value = new RNNOptionsT(*reinterpret_cast<RNNOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FullyConnectedOptions: {
+      value = new FullyConnectedOptionsT(
+          *reinterpret_cast<FullyConnectedOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SoftmaxOptions: {
+      value =
+          new SoftmaxOptionsT(*reinterpret_cast<SoftmaxOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ConcatenationOptions: {
+      value = new ConcatenationOptionsT(
+          *reinterpret_cast<ConcatenationOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_AddOptions: {
+      value = new AddOptionsT(*reinterpret_cast<AddOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_L2NormOptions: {
+      value = new L2NormOptionsT(*reinterpret_cast<L2NormOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions: {
+      value = new LocalResponseNormalizationOptionsT(
+          *reinterpret_cast<LocalResponseNormalizationOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LSTMOptions: {
+      value = new LSTMOptionsT(*reinterpret_cast<LSTMOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ResizeBilinearOptions: {
+      value = new ResizeBilinearOptionsT(
+          *reinterpret_cast<ResizeBilinearOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CallOptions: {
+      value = new CallOptionsT(*reinterpret_cast<CallOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReshapeOptions: {
+      value =
+          new ReshapeOptionsT(*reinterpret_cast<ReshapeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SkipGramOptions: {
+      value =
+          new SkipGramOptionsT(*reinterpret_cast<SkipGramOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SpaceToDepthOptions: {
+      value = new SpaceToDepthOptionsT(
+          *reinterpret_cast<SpaceToDepthOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions: {
+      value = new EmbeddingLookupSparseOptionsT(
+          *reinterpret_cast<EmbeddingLookupSparseOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MulOptions: {
+      value = new MulOptionsT(*reinterpret_cast<MulOptionsT *>(u.value));
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+inline void BuiltinOptionsUnion::Reset() {
+  switch (type) {
+    case BuiltinOptions_Conv2DOptions: {
+      auto ptr = reinterpret_cast<Conv2DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions: {
+      auto ptr = reinterpret_cast<DepthwiseConv2DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions: {
+      auto ptr = reinterpret_cast<ConcatEmbeddingsOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LSHProjectionOptions: {
+      auto ptr = reinterpret_cast<LSHProjectionOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_Pool2DOptions: {
+      auto ptr = reinterpret_cast<Pool2DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SVDFOptions: {
+      auto ptr = reinterpret_cast<SVDFOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_RNNOptions: {
+      auto ptr = reinterpret_cast<RNNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FullyConnectedOptions: {
+      auto ptr = reinterpret_cast<FullyConnectedOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SoftmaxOptions: {
+      auto ptr = reinterpret_cast<SoftmaxOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ConcatenationOptions: {
+      auto ptr = reinterpret_cast<ConcatenationOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_AddOptions: {
+      auto ptr = reinterpret_cast<AddOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_L2NormOptions: {
+      auto ptr = reinterpret_cast<L2NormOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions: {
+      auto ptr = reinterpret_cast<LocalResponseNormalizationOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LSTMOptions: {
+      auto ptr = reinterpret_cast<LSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ResizeBilinearOptions: {
+      auto ptr = reinterpret_cast<ResizeBilinearOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CallOptions: {
+      auto ptr = reinterpret_cast<CallOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReshapeOptions: {
+      auto ptr = reinterpret_cast<ReshapeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SkipGramOptions: {
+      auto ptr = reinterpret_cast<SkipGramOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SpaceToDepthOptions: {
+      auto ptr = reinterpret_cast<SpaceToDepthOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions: {
+      auto ptr = reinterpret_cast<EmbeddingLookupSparseOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MulOptions: {
+      auto ptr = reinterpret_cast<MulOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    default:
+      break;
+  }
+  value = nullptr;
+  type = BuiltinOptions_NONE;
+}
+
+inline const tflite::Model *GetModel(const void *buf) {
+  return flatbuffers::GetRoot<tflite::Model>(buf);
+}
+
+inline const char *ModelIdentifier() { return "TFL3"; }
+
+inline bool ModelBufferHasIdentifier(const void *buf) {
+  return flatbuffers::BufferHasIdentifier(buf, ModelIdentifier());
+}
+
+inline bool VerifyModelBuffer(flatbuffers::Verifier &verifier) {
+  return verifier.VerifyBuffer<tflite::Model>(ModelIdentifier());
+}
+
+inline const char *ModelExtension() { return "tflite"; }
+
+inline void FinishModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
+                              flatbuffers::Offset<tflite::Model> root) {
+  fbb.Finish(root, ModelIdentifier());
+}
+
+inline std::unique_ptr<ModelT> UnPackModel(
+    const void *buf, const flatbuffers::resolver_function_t *res = nullptr) {
+  return std::unique_ptr<ModelT>(GetModel(buf)->UnPack(res));
+}
+
+}  // namespace tflite
+
+#endif  // FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
diff --git a/tensorflow/contrib/lite/toco/tflite/BUILD b/tensorflow/contrib/lite/toco/tflite/BUILD
index 793eb366a4..332253a092 100644
--- a/tensorflow/contrib/lite/toco/tflite/BUILD
+++ b/tensorflow/contrib/lite/toco/tflite/BUILD
@@ -1,3 +1,8 @@
+package(
+    # To suppress build cleaner error about inclusion of schema_generate.h.
+    features = ["-layering_check"],
+)
+
 licenses(["notice"])  # Apache 2.0
 
 load(
diff --git a/tensorflow/contrib/lite/tools/benchmark_model.cc b/tensorflow/contrib/lite/tools/benchmark_model.cc
index ef43f64131..6ae3ab5729 100644
--- a/tensorflow/contrib/lite/tools/benchmark_model.cc
+++ b/tensorflow/contrib/lite/tools/benchmark_model.cc
@@ -31,6 +31,7 @@ void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
 #endif
 
 #define LOG(x) std::cerr
+
 #define CHECK(x)                  \
   if (!(x)) {                     \
     LOG(ERROR) << #x << "failed"; \
diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index a2b444d53a..b610441308 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -19,13 +19,20 @@ set -e
 DOWNLOADS_DIR=tensorflow/contrib/makefile/downloads
 BZL_FILE_PATH=tensorflow/workspace.bzl
 
-EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
+# Ensure it is being run from repo root
+if [ ! -f $BZL_FILE_PATH ]; then
+  echo "Could not find ${BZL_FILE_PATH}":
+  echo "Likely you are not running this from the root directory of the repository.";
+  exit 1;
+fi
+
+EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
 GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
 NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
-FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
+FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
 ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
 
 # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
diff --git a/tensorflow/contrib/model_pruning/python/layers/core_layers.py b/tensorflow/contrib/model_pruning/python/layers/core_layers.py
index ae60d8b1e1..95dfd8f421 100644
--- a/tensorflow/contrib/model_pruning/python/layers/core_layers.py
+++ b/tensorflow/contrib/model_pruning/python/layers/core_layers.py
@@ -72,8 +72,8 @@ class _MaskedConv(base.Layer):
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Regularizer function for the output.
@@ -279,8 +279,8 @@ class MaskedConv2D(_MaskedConv):
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Regularizer function for the output.
diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD
new file mode 100644
index 0000000000..71582f9c9a
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/BUILD
@@ -0,0 +1,113 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_gen_op_libs",
+    "tf_custom_op_library",
+    "tf_custom_op_py_library",
+    "tf_gen_op_wrapper_py",
+)
+
+cc_library(
+    name = "all_ops",
+    srcs = [":custom_op_sources"],
+    hdrs = [":custom_op_headers"],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//third_party/eigen3",
+        "@protobuf_archive//:protobuf_headers",
+    ],
+    alwayslink = 1,
+)
+
+tf_custom_op_library(
+    name = "python/ops/_periodic_resample_op.so",
+    srcs = [
+        ":custom_op_headers",
+        ":custom_op_sources",
+    ],
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["array_ops"],
+)
+
+tf_gen_op_wrapper_py(
+    name = "gen_periodic_resample_op_py",
+    out = "python/ops/gen_periodic_resample_op.py",
+    deps = [":array_ops_op_lib"],
+)
+
+tf_custom_op_py_library(
+    name = "periodic_resample_op_py",
+    srcs = ["python/ops/periodic_resample_op.py"],
+    dso = ["python/ops/_periodic_resample_op.so"],
+    kernels = [
+        ":array_ops_op_lib",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":gen_periodic_resample_op_py",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:framework_for_generated_wrappers",
+    ],
+)
+
+py_library(
+    name = "init_py",
+    srcs = [
+        "__init__.py",
+        "python/__init__.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":periodic_resample_op_py",
+    ],
+)
+
+# py_library(
+#     name = "periodic_resample_op_py",
+#     srcs = ["python/ops/periodic_resample_op.py"],
+#     data = ["python/ops/_periodic_resample_op.so"],
+#     srcs_version = "PY2AND3",
+# )
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+filegroup(
+    name = "custom_op_sources",
+    srcs = glob(
+        [
+            "ops/*.cc",
+            "kernels/*.cc",
+        ],
+        exclude = [
+            "ops/*_test.cc",
+            "kernels/*_test.cc",
+        ],
+    ),
+)
+
+filegroup(
+    name = "custom_op_headers",
+    srcs = glob(
+        [
+            "kernels/*.h",
+            "ops/*.h",
+        ],
+    ),
+)
diff --git a/tensorflow/contrib/periodic_resample/__init__.py b/tensorflow/contrib/periodic_resample/__init__.py
new file mode 100644
index 0000000000..fde9091b88
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/__init__.py
@@ -0,0 +1,27 @@
+# =============================================================================
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Custom op used by periodic_resample."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.periodic_resample.python.ops.periodic_resample_op import periodic_resample
+from tensorflow.python.util.all_util import remove_undocumented
+
+_allowed_symbols = ["periodic_resample"]
+
+remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc
new file mode 100644
index 0000000000..9cee405cef
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc
@@ -0,0 +1,26 @@
+// =============================================================================
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h"
+
+namespace tensorflow {
+
+REGISTER_KERNEL_BUILDER(Name("PeriodicResample")
+                            .Device(DEVICE_CPU),
+                        PeriodicResampleOp);
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
new file mode 100644
index 0000000000..bef21f7a5c
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
@@ -0,0 +1,230 @@
+// =============================================================================
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_
+#define TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_
+
+#include <cmath>
+#include <type_traits>
+#include <vector>
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace {
+
+template <class IndexVecT, class IndexT>
+IndexT compute_input_index(
+    IndexVecT* target_dimensions, const IndexT& output_index,
+    const IndexVecT& original_dimensions, const int& adjustable_dimension,
+    const std::vector<tensorflow::int64>& dimension_ceiling,
+    const std::vector<tensorflow::int64>& cumulative_dimensions, IndexT* result,
+    std::vector<IndexT>* output_indices, const int& rank) {
+  *result = 0;
+  output_indices->clear();
+
+  // un-rasterize the output index
+  auto last_reduced_i = output_index;
+  for (auto r = rank - 1; r >= 0; --r) {
+    (*output_indices)[r] = last_reduced_i % (*target_dimensions)[r];
+    last_reduced_i =
+        (last_reduced_i - (*output_indices)[r]) / (*target_dimensions)[r];
+  }
+
+  // rasterize the input index
+  IndexT last_index_factor = 1;
+  for (auto r = rank - 1; r >= 0; --r) {
+    IndexT index = 0;
+    if (r != adjustable_dimension)
+      index = (*output_indices)[r] / dimension_ceiling[r];
+    else {
+      for (int qi = 0; qi < rank; ++qi) {
+        if (qi == adjustable_dimension) continue;
+        index += cumulative_dimensions[qi] *
+                 ((*output_indices)[qi] % dimension_ceiling[qi]);
+      }
+      index *= (*target_dimensions)[adjustable_dimension];
+      index += (*output_indices)[r];
+    }
+    *result += last_index_factor * index;
+    last_index_factor *= original_dimensions[r];
+  }
+
+  return *result;
+}
+
+template <class InputDataT,
+          class IndexVecT>  // both types are needed here b/c IndexVecT and
+                            // InputDataT are not related
+                            void
+                            fill_periodic_tensor(
+                                tensorflow::OpKernelContext* context,
+                                const IndexVecT& desired_shape,
+                                const tensorflow::Tensor& input_tensor) {
+  // input is a strided array (last index is fastest, C-ordered)
+  auto input = input_tensor.flat<InputDataT>();
+  const int rank = input_tensor.dims();
+  // original and target dimensions
+  std::vector<tensorflow::int64> original_dimensions(rank),
+      target_dimensions(rank);
+  tensorflow::int64 total_size(input_tensor.NumElements()), new_sliced_size(1);
+  // factors by which original_dimensions increases/decreases w.r.t.
+  // target_dimensions
+  std::vector<tensorflow::int64> dimension_ceiling(rank),
+      cumulative_dimensions(rank);
+  // index of adjustable dimension
+  int adjustable_dimension;
+  tensorflow::TensorShape output_shape;
+
+  // requires that the rank of the input tensor and length of the desired shape
+  // are equal
+  OP_REQUIRES(context, rank == desired_shape.size(),
+              tensorflow::errors::InvalidArgument(
+                  "periodic_resample expects the rank of the input tensor, ",
+                  rank, ", to be the same as the length of the desired shape, ",
+                  desired_shape.size(), "."));
+
+  bool found = false;
+  for (int i = 0; i < rank; ++i) {
+    // if (desired_shape(i) < 1) {
+    if (desired_shape[i] < 1) {
+      // only one index can be adjustable
+      OP_REQUIRES(context, !found,
+                  tensorflow::errors::InvalidArgument(
+                      "periodic_resample expects only "
+                      "one index to be marked as adjustable."));
+      adjustable_dimension = i;
+      found = true;
+    } else {
+      // target_dimensions[i] = desired_shape(i);
+      target_dimensions[i] = desired_shape[i];
+      new_sliced_size *= target_dimensions[i];
+    }
+  }
+  // at least one index needs to be adjustable
+  OP_REQUIRES(context, found,
+              tensorflow::errors::InvalidArgument(
+                  "periodic_resample expects at least "
+                  "one index to be marked as adjustable."));
+
+  int count = 0;
+  for (const auto dim_info : input_tensor.shape()) {
+    original_dimensions[count] = dim_info.size;
+    ++count;
+  }
+
+  target_dimensions[adjustable_dimension] = total_size / new_sliced_size;
+
+  count = 0;
+  for (int i = 0; i < input_tensor.shape().dims(); ++i) {
+    dimension_ceiling[count] = tensorflow::int64(std::ceil(
+        float(target_dimensions[count]) / float(original_dimensions[count])));
+    if (count == 0)
+      cumulative_dimensions[count] = 1;
+    else
+      cumulative_dimensions[count] =
+          cumulative_dimensions[count - 1] * dimension_ceiling[count - 1];
+    ++count;
+  }
+
+  // ensure that the new dimension is greater than zero
+  OP_REQUIRES(context, target_dimensions[adjustable_dimension] > 0,
+              tensorflow::errors::InvalidArgument(
+                  "periodic_resample found that the "
+                  "adjustable dimension, ",
+                  adjustable_dimension, ", isn't greater than zero, ",
+                  target_dimensions[adjustable_dimension], "."));
+  for (int i = 0; i < rank; ++i) {
+    output_shape.AddDim(target_dimensions[i]);
+  }
+  const auto new_size =
+      new_sliced_size * target_dimensions[adjustable_dimension];
+
+  // Create an output tensor and attach it to the current context
+  tensorflow::Tensor* output_tensor = nullptr;
+  OP_REQUIRES_OK(context,
+                 context->allocate_output(0, output_shape, &output_tensor));
+  auto output = output_tensor->flat<InputDataT>();
+
+  // memory is allocated for these variables outside the inner loop for
+  // efficiency (although, I could create a separate class scope for
+  // this purpose instead)
+  tensorflow::int64 result = 0;
+  std::vector<tensorflow::int64> output_indices(target_dimensions.size());
+
+  // Fill output tensor with periodically resampled input tensor values
+  for (tensorflow::int64 output_index = 0; output_index < new_size;
+       ++output_index) {
+    output(output_index) = input(compute_input_index(
+        &target_dimensions, output_index, original_dimensions,
+        adjustable_dimension, dimension_ceiling, cumulative_dimensions, &result,
+        &output_indices, rank));
+  }
+}
+
+void create_output_tensor(
+    tensorflow::OpKernelContext* context,
+    const tensorflow::Tensor& input_tensor,
+    const tensorflow::DataType& input_tensor_type,
+    const tensorflow::PartialTensorShape& desired_shape_tensor) {
+  auto desired_shape = desired_shape_tensor.dim_sizes();
+
+  // obligatory type switch
+  switch (input_tensor_type) {
+    case tensorflow::DataTypeToEnum<float>::value:
+      fill_periodic_tensor<float>(context, desired_shape, input_tensor);
+      break;
+    case tensorflow::DataTypeToEnum<double>::value:
+      fill_periodic_tensor<double>(context, desired_shape, input_tensor);
+      break;
+    case tensorflow::DataTypeToEnum<tensorflow::int32>::value:
+      fill_periodic_tensor<tensorflow::int32>(context, desired_shape,
+                                              input_tensor);
+      break;
+    case tensorflow::DataTypeToEnum<tensorflow::int64>::value:
+      fill_periodic_tensor<tensorflow::int64>(context, desired_shape,
+                                              input_tensor);
+      break;
+    default:;
+  }
+}
+
+}  // namespace
+
+class PeriodicResampleOp : public tensorflow::OpKernel {
+ public:
+  explicit PeriodicResampleOp(tensorflow::OpKernelConstruction* context)
+      : tensorflow::OpKernel(context) {
+    // Get the desired shape
+    OP_REQUIRES_OK(context, context->GetAttr("shape", &desired_shape));
+  }
+
+  void Compute(tensorflow::OpKernelContext* context) override {
+    // Grab the input tensor
+    const tensorflow::Tensor& input_tensor = context->input(0);
+    const tensorflow::DataType input_tensor_type = context->input_dtype(0);
+
+    create_output_tensor(context, input_tensor, input_tensor_type,
+                         desired_shape);
+  }
+
+ private:
+  tensorflow::PartialTensorShape desired_shape;
+};
+
+#endif  // TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_
diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops.cc b/tensorflow/contrib/periodic_resample/ops/array_ops.cc
new file mode 100644
index 0000000000..6029ad6a0d
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/ops/array_ops.cc
@@ -0,0 +1,88 @@
+// =============================================================================
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+using namespace tensorflow;
+
+REGISTER_OP("PeriodicResample")
+    .Attr("T: numbertype")
+    .Input("values: T")
+    .Attr("shape: shape")
+    .Output("output: T")
+    .SetShapeFn(shape_inference::ExplicitShape)
+    .Doc(R"doc(
+Periodically resample elements of a tensor to conform to `shape`.
+
+This function implements a slightly more generic version of the subpixel
+convolutions found in this [paper](https://arxiv.org/abs/1609.05158).
+
+The formula for computing the elements in the `output` tensor is as follows:
+  `T` = `values` tensor of rank `R`
+  `S` = desired `shape` of output tensor (vector of length `R`)
+  `P` = `output` tensor of rank `R`
+  \((T_1,\ldots,T_R)\) = shape(`T`)
+  \([S_1,\ldots,S_q,\ldots,S_R]\) = elements of vector `S`
+
+  A single element in `S` is left unspecified (denoted \(S_q=-1\)).
+  Let \(f_i\) denote the (possibly non-integer) factor that relates the original
+  dimension to the desired dimensions, \(S_i=f_i T_i\), for \(i\neq q\) where
+  \(f_i>0\).
+  Define the following:
+    \(g_i=\lceil f_i\rceil\)
+    \(t=\prod_i T_i\)
+    \(s=\prod_{i\neq q} S_i\)
+  \(S_q\) can then be defined as by \(S_q=\lfloor t/s\rfloor\).
+  The elements of the resulting tensor are defined as
+  \(P_{s_1,\ldots,s_R}=T_{h_1,\ldots,h_q,\ldots,h_R}\).
+  The \(h_i\) (\(i\neq q\)) are defined by \(h_i=\lfloor s_i/g_i\rfloor\).
+  \(h_q=S_q\sum_{j\neq q}^{q-1}G_j \mathrm{mod}(s_j,g_j) + s_q\), where
+  \(G_j=\prod_{i}^{j-1}g_i\) (\(G_0=1\)).
+
+One drawback of this method is that whenever the output dimensions are slightly
+less than integer multiples of the input dimensions, many of the tensor elements
+are repeated in an inefficient way. This is resolved by specifying that all
+desired dimensions are integer multiples of the input tensor.
+
+For example:
+
+```prettyprint
+`input` is [[ 0  1  2  3]
+            [ 4  5  6  7]
+            [ 8  9 10 11]]
+
+tf.periodic_resample(input, [6, None]) ==> [[ 0  1]
+                                            [ 2  3]
+                                            [ 4  5]
+                                            [ 6  7]
+                                            [ 8  9]
+                                            [10 11]]
+```
+
+values: The tensor of rank `R` to periodic_resample
+shape: A 1-D tensor representing the desired shape of the output tensor.
+  Exactly one element of this tensor must have the value `None` which represents
+  that this dimension of `values` can be adjusted downward in order to
+  accommodate increases in other dimensions. The specified sizes of the
+  non-adjustable dimensions must by at least as large as in the `values` tensor.
+output: Periodically resampled tensor that has dimensions specified as in
+  `shape` except that the dimension specified as `None` will be minimally
+  decreased as necessary.
+
+)doc");
diff --git a/tensorflow/contrib/periodic_resample/python/__init__.py b/tensorflow/contrib/periodic_resample/python/__init__.py
new file mode 100644
index 0000000000..a8b6ead0f5
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/python/__init__.py
@@ -0,0 +1,20 @@
+# =============================================================================
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Public API of periodic_resample."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py
new file mode 100644
index 0000000000..1d727870f6
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py
@@ -0,0 +1,101 @@
+# =============================================================================
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy
+import tensorflow
+from tensorflow.contrib.periodic_resample import periodic_resample
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import googletest
+
+
+class PeriodicResampleTest(test_util.TensorFlowTestCase):
+
+  def testPeriodicResampleBasic2D(self):
+
+    input_tensor = numpy.arange(12).reshape((3, 4))
+    desired_shape = numpy.array([6, None])
+    output_tensor = input_tensor.reshape((6, 2))
+
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      result = periodic_resample(input_tensor, desired_shape).eval()
+      self.assertAllEqual(result, output_tensor)
+
+  def testPeriodicResampleTruncatedBasic2D(self):
+
+    input_tensor = numpy.arange(12).reshape((3, 4))
+    desired_shape = numpy.array([5, None])
+    output_tensor = input_tensor.reshape((6, 2))[:-1]
+
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      result = periodic_resample(input_tensor, desired_shape).eval()
+      self.assertAllEqual(result, output_tensor)
+
+  def testPeriodicResampleBasic3D(self):
+
+    input_tensor = numpy.arange(2*2*4).reshape((2, 2, 4))
+    desired_shape = numpy.array([4, 4, None])
+    output_tensor = numpy.array([[[0], [2], [4], [6]],
+                                 [[1], [3], [5], [7]],
+                                 [[8], [10], [12], [14]],
+                                 [[9], [11], [13], [15]]])
+
+    # NOTE: output_tensor != input_tensor.reshape((4, 4, -1))
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      result = periodic_resample(input_tensor, desired_shape).eval()
+      # input_tensor[0, 0, 0] == result[0, 0, 0]
+      # input_tensor[0, 0, 1] == result[1, 0, 0]
+      # input_tensor[0, 0, 2] == result[0, 1, 0]
+      # input_tensor[0, 0, 3] == result[1, 1, 0]
+      self.assertAllEqual(result, output_tensor)
+
+  def testPeriodicResampleBasic4D(self):
+
+    input_tensor = numpy.arange(2*2*2*8).reshape((2, 2, 2, 8))
+    desired_shape = numpy.array([4, 4, 4, None])
+    output_tensor = numpy.array([[[[0], [4], [8], [12]],
+                                  [[2], [6], [10], [14]],
+                                  [[16], [20], [24], [28]],
+                                  [[18], [22], [26], [30]]],
+                                 [[[1], [5], [9], [13]],
+                                  [[3], [7], [11], [15]],
+                                  [[17], [21], [25], [29]],
+                                  [[19], [23], [27], [31]]],
+                                 [[[32], [36], [40], [44]],
+                                  [[34], [38], [42], [46]],
+                                  [[48], [52], [56], [60]],
+                                  [[50], [54], [58], [62]]],
+                                 [[[33], [37], [41], [45]],
+                                  [[35], [39], [43], [47]],
+                                  [[49], [53], [57], [61]],
+                                  [[51], [55], [59], [63]]]])
+
+    # NOTE: output_tensor != input_tensor.reshape((4, 4, 4, -1))
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      result = periodic_resample(input_tensor, desired_shape).eval()
+      self.assertAllEqual(result, output_tensor)
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py
new file mode 100644
index 0000000000..6a09f70f44
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py
@@ -0,0 +1,30 @@
+# =============================================================================
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from tensorflow.contrib.periodic_resample.python.ops import gen_periodic_resample_op
+
+from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample
+
+from tensorflow.contrib.util import loader
+from tensorflow.python.platform import resource_loader
+
+_periodic_resample_op = loader.load_op_library(
+    resource_loader.get_path_to_datafile('_periodic_resample_op.so'))
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index f130a2187c..84fcf733c1 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -40,7 +40,6 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
 
-
 # pylint: enable=protected-access
 Linear = core_rnn_cell._Linear  # pylint: disable=invalid-name
 
diff --git a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
index 3bdd475fad..7970c20a26 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
@@ -24,22 +24,18 @@ import sys
 
 import tensorflow as tf
 
-
 tf.flags.DEFINE_string('service_addr', '',
                        'Address of TPU profiler service e.g. localhost:8466')
-
-
 tf.flags.DEFINE_string('logdir', '',
                        'Path of TensorBoard log directory e.g. /tmp/tb_log')
-
-
 tf.flags.DEFINE_integer('duration_ms', 2000, 'Duration of tracing in ms.')
 
-
 FLAGS = tf.flags.FLAGS
+EXECUTABLE = 'data/capture_tpu_profile'
 
 
-EXECUTABLE = 'data/capture_tpu_profile'
+def run_main():
+  tf.app.run(main)
 
 
 def main(unused_argv=None):
@@ -54,4 +50,4 @@ def main(unused_argv=None):
 
 
 if __name__ == '__main__':
-  tf.app.run(main)
+  run_main()
diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
index e77cae4695..ee6950699e 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
@@ -23,7 +23,7 @@ from setuptools import setup
 _VERSION = '1.3.0-a1'
 
 CONSOLE_SCRIPTS = [
-    'capture_tpu_profile=cloud_tpu_profiler.main:main',
+    'capture_tpu_profile=cloud_tpu_profiler.main:run_main',
 ]
 
 REQUIRED_PACKAGES = [
diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD
index 746ff38b37..38a84ffb10 100644
--- a/tensorflow/contrib/verbs/BUILD
+++ b/tensorflow/contrib/verbs/BUILD
@@ -7,6 +7,8 @@ package(default_visibility = [
 
 licenses(["notice"])  # Apache 2.0
 
+load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
+
 exports_files(["LICENSE"])
 
 filegroup(
@@ -97,7 +99,7 @@ cc_library(
     alwayslink = 1,
 )
 
-cc_library(
+tf_cuda_library(
     name = "rdma_rendezvous_mgr",
     srcs = ["rdma_rendezvous_mgr.cc"],
     hdrs = ["rdma_rendezvous_mgr.h"],
@@ -130,7 +132,7 @@ cc_library(
     ],
 )
 
-cc_library(
+tf_cuda_library(
     name = "rdma",
     srcs = ["rdma.cc"],
     hdrs = ["rdma.h"],
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index ac8d994502..ae9a384565 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -18,11 +18,14 @@ limitations under the License.
 #include "tensorflow/contrib/verbs/rdma.h"
 #include <fcntl.h>
 #include <cstdlib>
+#include <fcntl.h>
 #include "tensorflow/contrib/verbs/verbs_util.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
+#if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/gpu/process_state.h"
+#endif
 #include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h"
 #include "tensorflow/core/distributed_runtime/session_mgr.h"
 #include "tensorflow/core/framework/rendezvous.h"
@@ -31,6 +34,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 
 namespace tensorflow {
 
@@ -418,9 +422,6 @@ RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env)
                       0);
   CHECK(cq_) << "Failed to create completion queue";
   CHECK(!ibv_req_notify_cq(cq_, 0)) << "Failed to request CQ notification";
-  polling_thread_.reset(Env::Default()->StartThread(
-      ThreadOptions(), "RdmaAdapterCQThread", [this] { Process_CQ(); }));
-  VLOG(2) << "Start RdmaAdapter: " << name();
 }
 
 RdmaAdapter::~RdmaAdapter() {
@@ -432,6 +433,12 @@ RdmaAdapter::~RdmaAdapter() {
   CHECK(!ibv_close_device(context_)) << "Failed to release context";
 }
 
+void RdmaAdapter::StartPolling() {
+  polling_thread_.reset(Env::Default()->StartThread(
+      ThreadOptions(), "RdmaAdapterCQThread", [this] { Process_CQ(); }));
+  VLOG(2) << "Start RdmaAdapter: " << name();
+}
+
 string RdmaAdapter::name() const { return string(context_->device->name); }
 
 // Function to process incoming messages
@@ -452,9 +459,9 @@ void RdmaAdapter::Process_CQ() {
     CHECK_GE(ne, 0);
     for (int i = 0; i < ne; ++i) {
       CHECK(wc_[i].status == IBV_WC_SUCCESS)
-          << "Failed status \n"
-          << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " "
-          << static_cast<int>(wc_[i].wr_id) << " " << wc_[i].vendor_err;
+          << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " "
+          << wc_[i].status << " " << static_cast<int>(wc_[i].wr_id) << " "
+          << wc_[i].vendor_err;
       if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) {
         RdmaChannel* rc = reinterpret_cast<RdmaChannel*>(wc_[i].wr_id);
         // put back a recv wr.
@@ -557,9 +564,44 @@ void RdmaAdapter::Process_CQ() {
   }
 }
 
+int RdmaChannel::PingPostRecv() {
+  struct ibv_recv_wr wr, *bad_wr;
+  memset(&wr, 0, sizeof(wr));
+  wr.sg_list = &ping_sge_list_;
+  wr.num_sge = 1;
+  wr.wr_id = kPingRecvWrid;
+
+  return ibv_post_recv(qp_, &wr, &bad_wr);
+}
+
+int RdmaChannel::PingPostSend() {
+  struct ibv_send_wr wr, *bad_wr;
+  memset(&wr, 0, sizeof(wr));
+  wr.wr_id = (uint64_t) this;
+  wr.sg_list = &ping_sge_list_;
+  wr.num_sge = 1;
+  wr.opcode = IBV_WR_SEND;
+  wr.send_flags = IBV_SEND_SIGNALED;
+
+  return ibv_post_send(qp_, &wr, &bad_wr);
+}
+
 RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
                          const string remote_name)
     : adapter_(adapter), local_name_(local_name), remote_name_(remote_name) {
+
+  struct ibv_sge list;
+
+  mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, kPingBuffSize,
+                   IBV_ACCESS_LOCAL_WRITE);
+  CHECK(mr_) << "Failed to register memory region";
+
+  memset(&list, 0, sizeof(list));
+  list.addr = (uintptr_t)ping_buff_;
+  list.length = kPingBuffSize;
+  list.lkey = mr_->lkey;
+
+  ping_sge_list_ = list;
   // Create queue pair
   {
     struct ibv_qp_init_attr attr;
@@ -610,7 +652,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
   // create message and ack buffers, then initialize the tables.
   {
     const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer",
-                                   "tx_ack_buffer", "rx_ack_buffer"};
+                                   "tx_ack_buffer",     "rx_ack_buffer"};
     tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]);
     rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]);
     tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]);
@@ -632,15 +674,13 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
       buffer_index_name_table_.insert({index, buffer_names[i]});
       buffer_name_index_table_.insert({buffer_names[i], index});
     }
-
-    // Initiate recv
-    for (int i = 0; i < 100; i++) {
-      Recv();
-    }
   }
+  CHECK(PingPostRecv() == 0) << "Couldn't post receive from " << remote_name_
+                             << " with error " << std::strerror(errno);
 }
 
 RdmaChannel::~RdmaChannel() {
+  ibv_dereg_mr(mr_);
   CHECK(!ibv_destroy_qp(qp_)) << "Failed to destroy QP";
   delete tx_message_buffer_;
   delete rx_message_buffer_;
@@ -671,7 +711,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) {
 void RdmaChannel::Recv() {
   struct ibv_recv_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t)this;
+  wr.wr_id = (uint64_t) this;
   struct ibv_recv_wr* bad_wr;
   CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv";
 }
@@ -825,11 +865,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class;
 
     int r;
-    CHECK(!(r = ibv_modify_qp(qp_, &attr,
-                              IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU |
-                                  IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
-                                  IBV_QP_MAX_DEST_RD_ATOMIC |
-                                  IBV_QP_MIN_RNR_TIMER)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV |
+                                              IBV_QP_PATH_MTU |
+                                              IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
+                                              IBV_QP_MAX_DEST_RD_ATOMIC |
+                                              IBV_QP_MIN_RNR_TIMER)))
         << "QP to Ready to Receive " << r;
 
     memset(&attr, 0, sizeof(ibv_qp_attr));
@@ -840,10 +880,10 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.rnr_retry = 7; /* infinite */
     attr.max_rd_atomic = 1;
 
-    CHECK(!(r = ibv_modify_qp(qp_, &attr,
-                              IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT |
-                                  IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
-                                  IBV_QP_MAX_QP_RD_ATOMIC)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT |
+                                              IBV_QP_RETRY_CNT |
+                                              IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
+                                              IBV_QP_MAX_QP_RD_ATOMIC)))
         << "QP to Ready to Send " << r;
 
     connected_ = true;
@@ -930,7 +970,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) {
 
   struct ibv_send_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t)this;
+  wr.wr_id = (uint64_t) this;
   wr.sg_list = &list;
   wr.num_sge = 1;
   wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
@@ -1025,9 +1065,10 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
     TensorProto proto;
     if (src_dev->tensorflow_gpu_device_info() &&
         (!send_args.alloc_attrs.on_host())) {
-      CHECK(send_args.device_context)
-          << "send dev name: " << src_dev->name()
-          << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
+#if GOOGLE_CUDA
+      CHECK(send_args.device_context) << "send dev name: " << src_dev->name()
+                                      << " gpu_info: "
+                                      << src_dev->tensorflow_gpu_device_info();
 
       if (can_memcpy) {
         AllocatorAttributes host_alloc_attrs;
@@ -1053,8 +1094,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
         // aync instead
         GPUUtil::SetProtoFromGPU(
             in, src_dev, send_args.device_context, &proto, is_dead,
-            [this, proto, buffer_size, key, in, step_id, key_with_step_id,
-             is_dead, send_args, recv_args](const Status& s) mutable {
+	    [this, proto, buffer_size, key, in, step_id, key_with_step_id,
+            is_dead, send_args, recv_args](const Status& s) mutable {
               CHECK(s.ok()) << "copy proto from gpu sync";
               auto tensor_bytes = proto.ByteSize();
               buffer_size += tensor_bytes;
@@ -1063,6 +1104,7 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
                                  &proto, NULL, send_args, recv_args);
             });
       }
+#endif  // GOOGLE_CUDA
     } else {
       // tensor is in CPU memory.
       StringPiece copy_buf;
diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h
index 00217c81d4..fea2327d77 100644
--- a/tensorflow/contrib/verbs/rdma.h
+++ b/tensorflow/contrib/verbs/rdma.h
@@ -67,9 +67,20 @@ struct RemoteMR {
   uint64_t remote_addr;
   uint32_t rkey;
 };
-enum BufferStatus { none, idle, busy };
-enum Location { local, remote };
-enum BufferType { ACK, MESSAGE, TENSOR };
+enum BufferStatus {
+  none,
+  idle,
+  busy
+};
+enum Location {
+  local,
+  remote
+};
+enum BufferType {
+  ACK,
+  MESSAGE,
+  TENSOR
+};
 enum RdmaMessageType {
   RDMA_MESSAGE_ACK,
   RDMA_MESSAGE_BUFFER_IDLE,
@@ -96,6 +107,7 @@ class RdmaAdapter {
   ~RdmaAdapter();
   // Adapter name, e.g. mlx5_0.
   string name() const;
+  void StartPolling();
   void Process_CQ();
 
  protected:
@@ -150,6 +162,15 @@ class RdmaChannel {
   void RemoveRecvCallback(const string& key);
   void RunRecvCallback(const string& key);
   static const int kNumMessageBuffers = 4;
+  static const int kPingRecvWrid = 0;
+
+ private:
+  static const int kPingBuffSize = 1024;
+  char ping_buff_[kPingBuffSize];
+  struct ibv_mr* mr_;
+  struct ibv_sge ping_sge_list_;
+  int PingPostRecv();
+  int PingPostSend();
 
  protected:
   const RdmaAdapter* adapter_;
@@ -202,7 +223,7 @@ class RdmaBuffer {
   }
   void FreeBuffer();
   void EnqueueItem(string Item);
-  virtual void SendNextItem(){};
+  virtual void SendNextItem() {};
   void CreateCPUBuffer(size_t size, bool lock = true);
   void SetRemoteMR(RemoteMR rmi, bool override);
   uint32_t LookupBufferIndex(const string& buffer_name) {
diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 09b878843f..9cb307bcfa 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -115,6 +115,57 @@ void RdmaMgr::SetupChannels() {
   }
 }
 
+// Check connectivity by pinging every channel
+bool RdmaMgr::ConnectivityCheck() {
+  int i, rcnt = 0, scnt = 0;
+
+  for (const auto& p : channel_table_) {
+    string worker_name = p.first;
+    RdmaChannel* rc = p.second;
+
+    VLOG(2) << "Ping to " << worker_name;
+    CHECK(rc->PingPostSend() == 0) << "Couldn't post send  to " << worker_name
+                                   << " with error: " << std::strerror(errno);
+    for (i = 0; i < rc->adapter_->params_.queue_depth - 1; i++) {
+      rc->Recv();
+    }
+  }
+
+  while (rcnt < num_remote_workers_ || scnt < num_remote_workers_) {
+    int ne;
+    do {
+      ne = ibv_poll_cq(rdma_adapter_->cq_, 2 * num_remote_workers_,
+                       rdma_adapter_->wc_);
+      CHECK(ne >= 0) << "poll CQ failed " << ne << "with error"
+                     << std::strerror(errno);
+    } while (ne < 1);
+
+    for (i = 0; i < ne; ++i) {
+      ibv_wc_status s = rdma_adapter_->wc_[i].status;
+      // recv complete
+      if ((int)rdma_adapter_->wc_[i].wr_id == RdmaChannel::kPingRecvWrid) {
+        CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str(
+                                                  rdma_adapter_->wc_[i].status)
+                                   << "(" << rdma_adapter_->wc_[i].status
+                                   << ") for PING_RECV_WRID";
+        ++rcnt;
+        // send complete
+      } else {
+        RdmaChannel* rc =
+            reinterpret_cast<RdmaChannel*>(rdma_adapter_->wc_[i].wr_id);
+        CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str(
+                                                  rdma_adapter_->wc_[i].status)
+                                   << "(" << rdma_adapter_->wc_[i].status
+                                   << ") to " << rc->remote_name_;
+        ++scnt;
+      }
+    }  // for
+  }    // while
+  CHECK(rcnt == scnt) << "Connectivity check failed!";
+  rdma_adapter_->StartPolling();
+  return (num_remote_workers_ == rcnt) && (num_remote_workers_ == scnt);
+}
+
 RdmaMgr::~RdmaMgr() {
   for (const auto& p : channel_table_) delete p.second;
   channel_table_.clear();
diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h
index b156f64096..e711e60478 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.h
+++ b/tensorflow/contrib/verbs/rdma_mgr.h
@@ -28,12 +28,16 @@ limitations under the License.
 namespace tensorflow {
 
 class RdmaMgr {
+  friend class RdmaChannel;
+  friend class RdmaAdapter;
+
  public:
   explicit RdmaMgr(const WorkerEnv* const worker_env,
                    GrpcChannelCache* const channel_cache);
   ~RdmaMgr();
   RdmaChannel* FindChannel(const string& key);
   void SetupChannels();
+  bool ConnectivityCheck();
   const string& local_worker() { return local_worker_; }
 
  private:
@@ -44,7 +48,6 @@ class RdmaMgr {
   RdmaAdapter* rdma_adapter_;
   typedef std::unordered_map<string, RdmaChannel*> ChannelTable;
   ChannelTable channel_table_;
-
   TF_DISALLOW_COPY_AND_ASSIGN(RdmaMgr);
 };
 
diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
index ce82ca2883..74f6681af3 100644
--- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc
@@ -21,8 +21,10 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
+#if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/gpu/process_state.h"
+#endif  // GOOGLE_CUDA
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -58,20 +60,13 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
   // parse src_name and dst_name
   string src_name, dst_name, unused;
   if (!DeviceNameUtils::SplitDeviceName(parsed.src_device, &src_name,
+                                        &unused) ||
+      !DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name,
                                         &unused)) {
-    s = errors::Internal("Could not parse src name.");
+    s = errors::Internal("Could not parse src or dst name.");
   }
-  CHECK(s.ok()) << "s is not ok, error code " << s.error_message();
-  if (!s.ok()) {
-    done(s, Args(), recv_args, Tensor{}, false);
-    return;
-  }
-  if (!DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name,
-                                        &unused)) {
-    s = errors::Internal("Could not parse dst name.");
-  }
-  CHECK(s.ok()) << "s is not ok, error code " << s.error_message();
   if (!s.ok()) {
+    LOG(ERROR) << "s is not ok, error code " << s.error_message();
     done(s, Args(), recv_args, Tensor{}, false);
     return;
   }
@@ -82,18 +77,13 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
   // insert callback
   rc->InsertRecvCallback(key_with_step_id, [this, key, key_with_step_id, rc,
                                             recv_args, parsed, done]() {
-    Status s;
-    Device* src_dev;
-    s = env_->device_mgr->LookupDevice("CPU:0", &src_dev);
-    CHECK(s.ok()) << "s is not ok, error code " << s.error_message();
-    if (!s.ok()) {
-      done(s, Args(), recv_args, Tensor(), true);
-      return;
-    }
-    Device* dst_dev;
-    s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_dev);
-    CHECK(s.ok()) << "s is not ok, error code " << s.error_message();
-    if (!s.ok()) {
+    Status src_s, dst_s, s;
+    Device* src_dev, *dst_dev;
+    src_s = env_->device_mgr->LookupDevice("CPU:0", &src_dev);
+    dst_s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_dev);
+    if (!src_s.ok() || !dst_s.ok()) {
+      s = src_s.ok() ? dst_s : src_s;
+      LOG(ERROR) << "s is not ok, error code " << s.error_message();
       done(s, Args(), recv_args, Tensor(), true);
       return;
     }
@@ -110,9 +100,10 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
       if (can_memcpy) {
         if (dst_dev->tensorflow_gpu_device_info() &&
             (!recv_args.alloc_attrs.on_host())) {
+#if GOOGLE_CUDA
           CHECK(recv_args.device_context)
-            << "send dev name: " << src_dev->name()
-            << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
+              << "send dev name: " << src_dev->name()
+              << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
           Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0);
           Tensor copy(alloc, rm.data_type_, rm.tensor_shape_);
           memcpy(DMAHelper::base(&copy), input, rm.tensor_bytes_);
@@ -122,14 +113,15 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync(
 
           GPUUtil::CopyCPUTensorToGPU(
               &copy, recv_args.device_context, dst_dev, &gpu_copy,
-              [this, gpu_copy, key, key_with_step_id, recv_args, done, rm,
-               rc](const Status& s) {
+              [this, gpu_copy, key, key_with_step_id, recv_args, done, rm, rc](
+                  const Status& s) {
                 CHECK(s.ok()) << "copy tensor to gpu sync";
                 Tensor val;
                 val = std::move(gpu_copy);
                 RecvPostCopyOps(key, key_with_step_id, recv_args, done, rm, rc,
                                 val, s);
               });
+#endif  // GOOGLE_CUDA
           return;
         } else {
           AllocatorAttributes host_alloc_attrs;
diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc
index 6d1c79c0fb..a606ef75a4 100644
--- a/tensorflow/contrib/verbs/verbs_server_lib.cc
+++ b/tensorflow/contrib/verbs/verbs_server_lib.cc
@@ -49,8 +49,8 @@ VerbsServer::~VerbsServer() {
 Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def,
                                         GrpcChannelCache** channel_cache) {
   string name_prefix =
-      strings::StrCat("/job:", server_def.job_name(), "/replica:0",
-                      "/task:", server_def.task_index());
+      strings::StrCat("/job:", server_def.job_name(), "/replica:0", "/task:",
+                      server_def.task_index());
 
   GrpcChannelSpec channel_spec;
   TF_RETURN_IF_ERROR(ParseChannelSpec(server_def, &channel_spec));
@@ -103,6 +103,7 @@ Status VerbsServer::Start() {
           ThreadOptions(), "TF_verbs_service",
           [this] { verbs_service_->HandleRPCsLoop(); }));
       rdma_mgr_->SetupChannels();
+      CHECK(rdma_mgr_->ConnectivityCheck()) << "Connectivity check failed!";
       verbs_state_ = CONNECTED;
     }
   }
diff --git a/tensorflow/core/common_runtime/pending_counts.h b/tensorflow/core/common_runtime/pending_counts.h
index 9e39b6b7b9..5707f52592 100644
--- a/tensorflow/core/common_runtime/pending_counts.h
+++ b/tensorflow/core/common_runtime/pending_counts.h
@@ -44,7 +44,7 @@ namespace tensorflow {
 
 //    PendingCounts counts(layout);
 //    ...
-//    counts.decrement_panding(h[id], 1);
+//    counts.decrement_pending(h[id], 1);
 class PendingCounts {
  public:
   // The state machine for a node's execution.
diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index c82d57694a..3ae52f414f 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -127,7 +127,7 @@ Status InferShapesForFunctionSubNode(const Node* node, ShapeRefiner* refiner,
 //
 // NOTE: Recursive user-defined functions are not supported.
 // Maybe we won't support recursive functions at all in TF, because of
-// other maintanabilty issues.
+// other maintainability issues.
 Status ShapeRefiner::InferShapesForFunction(
     const tensorflow::FunctionDef* function_def, bool keep_nested_shapes,
     ExtendedInferenceContext* outer_context) {
diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index 1924c05d3d..add80eda23 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -1152,7 +1152,7 @@ Status Partition(const PartitionOptions& opts, Graph* g,
     // Add control edges from 'ref_control_inputs' to 'ref_recvs'.
     // NOTE(yuanbyu): Adding these control edges should not introduce
     // deadlocks. 'dst' has implicit "read" nodes that, when we split
-    // across devices, are made explicit; Retargettig the dependencies
+    // across devices, are made explicit; Retargeting the dependencies
     // to 'dst' to those nodes would not introduce cycles if there isn't
     // one before the transformation.
     // NOTE(yuanbyu): This may impact performance because it defers the
diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index cb32d64334..880e4e712e 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -21,107 +21,108 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 
 namespace tensorflow {
-// Since our ops are going to produce and also consume N addition tensors
-// (Mkl) for N Tensorflow tensors, we can have following different
-// orderings among these 2N tensors.
-//
-// E.g., for Tensorflow tensors A, B, and C, our ops will produce and
-// consume A_m, B_m, and C_m additionally.
-//
-// INTERLEAVED: in this case 2N tensors are interleaved. So for above
-//              example, the ordering looks like: A, A_m, B, B_m, C, C_m.
-//
-// CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed
-//             by N Mkl tensors. So for above example, the ordering looks
-//             like: A, B, C, A_m, B_m, C_m
-//
-// Following APIs map index of original Tensorflow tensors to their
-// appropriate position based on selected ordering. For contiguous ordering,
-// we need to know the total number of tensors (parameter total).
-//
-typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering;
-// NOTE: Currently, we use contiguous ordering. If you change this, then you
-// would need to change Mkl op definitions in nn_ops.cc.
-static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS;
+  // Since our ops are going to produce and also consume N addition tensors
+  // (Mkl) for N Tensorflow tensors, we can have following different
+  // orderings among these 2N tensors.
+  //
+  // E.g., for Tensorflow tensors A, B, and C, our ops will produce and
+  // consume A_m, B_m, and C_m additionally.
+  //
+  // INTERLEAVED: in this case 2N tensors are interleaved. So for above
+  //              example, the ordering looks like: A, A_m, B, B_m, C, C_m.
+  //
+  // CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed
+  //             by N Mkl tensors. So for above example, the ordering looks
+  //             like: A, B, C, A_m, B_m, C_m
+  //
+  // Following APIs map index of original Tensorflow tensors to their
+  // appropriate position based on selected ordering. For contiguous ordering,
+  // we need to know the total number of tensors (parameter total).
+  //
+  typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering;
+  // NOTE: Currently, we use contiguous ordering. If you change this, then you
+  // would need to change Mkl op definitions in nn_ops.cc.
+  static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS;
 
-// Get index of MetaData tensor from index 'n' of Data tensor.
-inline int DataIndexToMetaDataIndex(int n, int total_tensors) {
-  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-    // For interleaved ordering, Mkl tensor follows immediately after
-    // Tensorflow tensor.
-    return n + 1;
-  } else {
-    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-    // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away.
-    return n + total_tensors / 2;
+  // Get index of MetaData tensor from index 'n' of Data tensor.
+  inline int DataIndexToMetaDataIndex(int n, int total_tensors) {
+    if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
+      // For interleaved ordering, Mkl tensor follows immediately after
+      // Tensorflow tensor.
+      return n + 1;
+    } else {
+      CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+      // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away.
+      return n + total_tensors / 2;
+    }
   }
-}
 
-int inline GetTensorDataIndex(int n, int total_tensors) {
-  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-    return 2 * n;  // index corresponding to nth input/output tensor
-  } else {
-    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-    return n;
-  }
-}
+  int inline GetTensorDataIndex(int n, int total_tensors) {
+      if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
+        return 2 * n;  // index corresponding to nth input/output tensor
+      } else {
+        CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+        return n;
+      }
+    }
 
-int inline GetTensorMetaDataIndex(int n, int total_tensors) {
-  // Get index for TensorData first and then use mapping function
-  // to get TensorMetaData index from TensorData index.
-  int tidx = GetTensorDataIndex(n, total_tensors);
-  return DataIndexToMetaDataIndex(tidx, total_tensors);
-}
+  int inline GetTensorMetaDataIndex(int n, int total_tensors) {
+      // Get index for TensorData first and then use mapping function
+      // to get TensorMetaData index from TensorData index.
+      int tidx = GetTensorDataIndex(n, total_tensors);
+      return DataIndexToMetaDataIndex(tidx, total_tensors);
+    }
 
 namespace mkl_op_registry {
-static const char* kMklOpLabel = "MklOp";
-static const char* kMklOpLabelPattern = "label='MklOp'";
-
-// Get the name of Mkl op from original TensorFlow op
-// We prefix 'Mkl' to the original op to get Mkl op.
-inline string GetMklOpName(const string& name) {
-  // Prefix that we add to Tensorflow op name to construct Mkl op name.
-  const char* const kMklOpPrefix = "_Mkl";
-  return string(kMklOpPrefix) + name;
-}
+  static const char* kMklOpLabel = "MklOp";
+  static const char* kMklOpLabelPattern = "label='MklOp'";
 
-// Check whether opname with type T is registered as MKL-compliant.
-//
-// @input: name of the op
-// @input: T datatype to be used for checking op
-// @return: true if opname is registered as Mkl op; false otherwise
-static inline bool IsMklOp(const std::string& op_name, DataType T) {
-  string kernel = KernelsRegisteredForOp(op_name);
-  bool result =
-      kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
-  if (result) {
-    VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
+  // Get the name of Mkl op from original TensorFlow op
+  // We prefix 'Mkl' to the original op to get Mkl op.
+  inline string GetMklOpName(const string& name) {
+    // Prefix that we add to Tensorflow op name to construct Mkl op name.
+    const char* const kMklOpPrefix = "_Mkl";
+    return string(kMklOpPrefix) + name;
   }
-  return result;
-}
 
-// Check whether opname with type T is registered as MKL-compliant and
-// is element-wise.
-//
-// @input: name of the op
-// @input: T datatype to be used for checking op
-// @return: true if opname is registered as element-wise Mkl op;
-// false otherwise
-static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) {
-  if (!IsMklOp(op_name, T)) {
-    return false;
+  // Check whether opname with type T is registered as MKL-compliant.
+  //
+  // @input: name of the op
+  // @input: T datatype to be used for checking op
+  // @return: true if opname is registered as Mkl op; false otherwise
+  static inline bool IsMklOp(const std::string& op_name, DataType T) {
+    string kernel = KernelsRegisteredForOp(op_name);
+    bool result =
+        kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
+    if (result) {
+      VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
+    }
+    return result;
   }
 
-  bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
-                 0 == op_name.compare(GetMklOpName("Sub")) ||
-                 0 == op_name.compare(GetMklOpName("Mul")) ||
-                 0 == op_name.compare(GetMklOpName("Maximum")) ||
-                 0 == op_name.compare(GetMklOpName("SquaredDifference")));
+  // Check whether opname with type T is registered as MKL-compliant and
+  // is element-wise.
+  //
+  // @input: name of the op
+  // @input: T datatype to be used for checking op
+  // @return: true if opname is registered as element-wise Mkl op;
+  // false otherwise
+  static inline bool IsMklElementWiseOp(const std::string& op_name,
+    DataType T) {
+    if (!IsMklOp(op_name, T)) {
+      return false;
+    }
 
-  VLOG(1) << "mkl_op_registry::" << op_name
-          << " is elementwise MKL op: " << result;
-  return result;
-}
+    bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
+                    0 == op_name.compare(GetMklOpName("Sub")) ||
+                    0 == op_name.compare(GetMklOpName("Mul")) ||
+                    0 == op_name.compare(GetMklOpName("Maximum")) ||
+                    0 == op_name.compare(GetMklOpName("SquaredDifference")));
+
+    VLOG(1) << "mkl_op_registry::" << op_name
+            << " is elementwise MKL op: " << result;
+    return result;
+  }
 }  // namespace mkl_op_registry
 }  // namespace tensorflow
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index f4c9073dee..912075aa28 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -37,8 +37,8 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/tensor_format.h"
 
-#include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_layout_pass.h"
+#include "tensorflow/core/graph/mkl_graph_util.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index 3fd89e2b66..599bb88f01 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -33,8 +33,8 @@ limitations under the License.
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
 
-#include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_tfconversion_pass.h"
+#include "tensorflow/core/graph/mkl_graph_util.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index 822d72e068..0aec6aac34 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -1,4 +1,4 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+  /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_and.cc b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
index 017a2182dc..5a6cf4bad1 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_and.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                      \
@@ -30,13 +30,15 @@ REGISTER_SYCL_KERNEL(int32);
 REGISTER_SYCL_KERNEL(int64);
 REGISTER_SYCL_KERNEL(uint8);
 REGISTER_SYCL_KERNEL(uint16);
+REGISTER_SYCL_KERNEL(uint32);
+REGISTER_SYCL_KERNEL(uint64);
 #undef REGISTER_SYCL_KERNEL
 
 #endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
-REGISTER6(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_or.cc b/tensorflow/core/kernels/cwise_op_bitwise_or.cc
index 36f45fe92d..201a10198a 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_or.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_or.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, CPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                     \
@@ -30,13 +30,15 @@ REGISTER_SYCL_KERNEL(int32);
 REGISTER_SYCL_KERNEL(int64);
 REGISTER_SYCL_KERNEL(uint8);
 REGISTER_SYCL_KERNEL(uint16);
+REGISTER_SYCL_KERNEL(uint32);
+REGISTER_SYCL_KERNEL(uint64);
 #undef REGISTER_SYCL_KERNEL
 
 #endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
-REGISTER6(BinaryOp, GPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, GPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
index 36432d851d..2a7cd26995 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, CPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                      \
@@ -30,13 +30,15 @@ REGISTER_SYCL_KERNEL(int32);
 REGISTER_SYCL_KERNEL(int64);
 REGISTER_SYCL_KERNEL(uint8);
 REGISTER_SYCL_KERNEL(uint16);
+REGISTER_SYCL_KERNEL(uint32);
+REGISTER_SYCL_KERNEL(uint64);
 #undef REGISTER_SYCL_KERNEL
 
 #endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
-REGISTER6(BinaryOp, GPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, GPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
index 27f973c90d..3fbf69c114 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
@@ -19,7 +19,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY6(bitwise_and, int8, int16, int32, int64, uint8, uint16);
+DEFINE_BINARY8(bitwise_and, int8, int16, int32, int64, uint8, uint16, uint32,
+               uint64);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
index a34c3a52cd..8bcb82266a 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
@@ -19,7 +19,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY6(bitwise_or, int8, int16, int32, int64, uint8, uint16);
+DEFINE_BINARY8(bitwise_or, int8, int16, int32, int64, uint8, uint16, uint32,
+               uint64);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
index a4531ab7c6..e62a87aba4 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
@@ -19,7 +19,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY6(bitwise_xor, int8, int16, int32, int64, uint8, uint16);
+DEFINE_BINARY8(bitwise_xor, int8, int16, int32, int64, uint8, uint16, uint32,
+               uint64);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc
index 6d9fdfcf33..c778278e8f 100644
--- a/tensorflow/core/kernels/decode_bmp_op.cc
+++ b/tensorflow/core/kernels/decode_bmp_op.cc
@@ -49,6 +49,12 @@ class DecodeBmpOp : public OpKernel {
     // Start decoding image to get shape details
     const StringPiece input = contents.scalar<string>()();
 
+    OP_REQUIRES(context, (32 <= input.size()),
+                errors::InvalidArgument("Incomplete bmp content, requires at "
+                                        "least 32 bytes to find the header "
+                                        "size, width, height, and bpp, got ",
+                                        input.size(), " bytes"));
+
     const uint8* img_bytes = reinterpret_cast<const uint8*>(input.data());
     const int32 header_size = internal::SubtleMustCopy(
         *(reinterpret_cast<const int32*>(img_bytes + 10)));
@@ -74,6 +80,22 @@ class DecodeBmpOp : public OpKernel {
                 errors::InvalidArgument(
                     "Number of channels must be 1, 3 or 4, was ", channels_));
 
+    // there may be padding bytes when the width is not a multiple of 4 bytes
+    // 8 * channels == bits per pixel
+    const int row_size = (8 * channels_ * width + 31) / 32 * 4;
+
+    const int last_pixel_offset =
+        header_size + (abs(height) - 1) * row_size + (width - 1) * channels_;
+
+    // [expected file size] = [last pixel offset] + [last pixel size=channels]
+    const int expected_file_size = last_pixel_offset + channels_;
+
+    OP_REQUIRES(
+        context, (expected_file_size <= input.size()),
+        errors::InvalidArgument("Incomplete bmp content, requires at least ",
+                                expected_file_size, " bytes, got ",
+                                input.size(), " bytes"));
+
     // if height is negative, data layout is top down
     // otherwise, it's bottom up
     bool top_down = (height < 0);
@@ -86,25 +108,23 @@ class DecodeBmpOp : public OpKernel {
 
     const uint8* bmp_pixels = &img_bytes[header_size];
 
-    Decode(bmp_pixels, output->flat<uint8>().data(), width, abs(height),
-           channels_, top_down);
+    Decode(bmp_pixels, row_size, output->flat<uint8>().data(), width,
+           abs(height), channels_, top_down);
   }
 
-  uint8* Decode(const uint8* input, uint8* const output, const int width,
-                const int height, const int channles, bool top_down);
+  uint8* Decode(const uint8* input, const int row_size, uint8* const output,
+                const int width, const int height, const int channles,
+                bool top_down);
 
  private:
   int channels_;
 };
 REGISTER_KERNEL_BUILDER(Name("DecodeBmp").Device(DEVICE_CPU), DecodeBmpOp);
 
-uint8* DecodeBmpOp::Decode(const uint8* input, uint8* const output,
-                           const int width, const int height,
-                           const int channels, bool top_down) {
-  // there may be padding bytes when the width is not a multiple of 4 bytes
-  // 8 * channels == bits per pixel
-  int row_size = (8 * channels * width + 31) / 32 * 4;
-
+uint8* DecodeBmpOp::Decode(const uint8* input, const int row_size,
+                           uint8* const output, const int width,
+                           const int height, const int channels,
+                           bool top_down) {
   for (int i = 0; i < height; i++) {
     int src_pos;
     int dst_pos;
diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index 02da64ce98..a5fd07fbe1 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -430,10 +430,9 @@ TF_CALL_double(REGISTER_CPU_KERNEL);
 #endif
 
 #if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Eigen::half>("T"),
-                        DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
+REGISTER_KERNEL_BUILDER(
+    Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
+    DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
 
 REGISTER_KERNEL_BUILDER(
     Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<float>("T"),
diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
new file mode 100644
index 0000000000..9bb58b13f3
--- /dev/null
+++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
@@ -0,0 +1,465 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// The algorithm for dynamic partition has the following steps:
+// 1. Let N be the size of partitions. We initialize a new vector indices_in
+//    with the values 0, 1, 2, ..., N-1.
+// 2. We apply cub::DeviceRadixSort::SortPairs to the key - value pairs given
+//    by partitions and indices_in. This will result in two new vectors
+//    partitions_out and indices_out, with partitions_out sorted.
+// 3. The first dimension of outputs[i] is equal to the number of i-values in
+//    partitions_out. We determine it in two steps:
+//    - apply cub::DeviceReduce::ReduceByKey to count how many times each value
+//      appears in partitions_out,
+//    - move the results to partition_count. This handles missing values
+//      (corresponding to empty parts).
+// 4. Because partition_count is on the GPU, we bring it asynchronously to
+//    the CPU. Then we can allocate the output tensors.
+// 5. Finally, we use indices_out and the gather functor to collect the output.
+//    This works, because for each interval of i-values, indices_out points
+//    to the slices which should form output[i].
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "external/cub_archive/cub/device/device_radix_sort.cuh"
+#include "external/cub_archive/cub/device/device_reduce.cuh"
+#include "external/cub_archive/cub/iterator/constant_input_iterator.cuh"
+#include "external/cub_archive/cub/thread/thread_operators.cuh"
+#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/kernels/gather_functor_gpu.cu.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+#include "tensorflow/core/util/transform_output_iterator.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace {
+
+template <typename T>
+__global__ void RangeInitKernel(const T start, const T delta, const int32 size,
+                                T* out) {
+  CUDA_1D_KERNEL_LOOP(i, size) { out[i] = start + i * delta; }
+}
+
+__global__ void MoveValuesKernel(const int32* keys, const int32* values,
+                                 const int32* size, int32 out_size,
+                                 int32* out) {
+  int32 N = min(ldg(size), out_size);
+  CUDA_1D_KERNEL_LOOP(i, N) {
+    int32 key = ldg(keys + i);
+    int32 value = ldg(values + i);
+    if (FastBoundsCheck(key, out_size)) out[key] = value;
+  }
+}
+
+// Initialize out with range start, start + delta, start + 2 * delta, ...
+// This is needed because tf.range has no GPU implementation.
+template <typename T>
+void RangeInit(const GPUDevice& d, const T start, const T delta,
+               const int32 size, typename TTypes<T>::Flat out) {
+  CudaLaunchConfig config = GetCudaLaunchConfig(size, d);
+  RangeInitKernel<
+      T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+      start, delta, size, out.data());
+}
+
+// Given *num_runs pairs (key, value), this function moves the value
+// corresponding to key i at position i in the array out.
+void MoveValues(const GPUDevice& d, int32* keys, int32* values, int32* num_runs,
+                int32 out_size, int32* out) {
+  // Because num_runs is located on the GPU, we can not access it directly.
+  // So we launch the kernel with size = out_size.
+  // This is valid for correct inputs, because then out_size >= *num_runs.
+  // For wrong inputs, we may have out_size < *num_runs. In this case we will
+  // only handle the first out_size values.
+  CudaLaunchConfig config = GetCudaLaunchConfig(out_size, d);
+  MoveValuesKernel<<<config.block_count, config.thread_per_block, 0,
+                     d.stream()>>>(keys, values, num_runs, out_size, out);
+}
+
+template <typename T>
+void CallGatherKernel(const GPUDevice& d, const T* params, const int32* indices,
+                      T* out, int64 gather_dim_size, int64 indices_size,
+                      int64 slice_size, int64 out_size) {
+  CudaLaunchConfig config = GetCudaLaunchConfig(out_size, d);
+  GatherOpKernel<
+      T, int32,
+      true><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+      params, indices, out, gather_dim_size, indices_size, slice_size,
+      out_size);
+}
+
+struct IdentityOp {
+  __device__ int32 __forceinline__ operator()(const int32& a) const {
+    return a;
+  }
+};
+
+// Define an output iterator that only allows assignment to
+// positions between [base, base + limit).
+class BoundedOutputIterator
+    : public TransformOutputIterator<int32, int32, IdentityOp> {
+ private:
+  int32 limit;
+  int32* base;
+
+  struct BoundedReference : Reference {
+    int32 limit;
+    int32* base;
+    // Constructor
+    __host__ __device__ __forceinline__
+    BoundedReference(int32* ptr, int32* base, IdentityOp op, int32 limit)
+        : Reference(ptr, op), limit(limit), base(base) {}
+
+    // Assignment
+    __host__ __device__ __forceinline__ int32 operator=(int32 val) {
+      if (ptr - base < limit && ptr - base >= 0) *ptr = val;
+      return val;
+    }
+  };
+
+ public:
+  typedef BoundedOutputIterator self_type;
+  typedef BoundedReference reference;
+
+  __host__ __device__ __forceinline__ BoundedOutputIterator(int32* ptr,
+                                                            IdentityOp op,
+                                                            int32 size)
+      : TransformOutputIterator(ptr, op), limit(size), base(ptr) {}
+
+  __host__ __device__ __forceinline__
+  BoundedOutputIterator(int32* ptr, int32* base, IdentityOp op, int32 size)
+      : TransformOutputIterator(ptr, op), limit(size), base(base) {}
+
+  // Indirection
+  __host__ __device__ __forceinline__ reference operator*() const {
+    return BoundedReference(ptr, base, conversion_op, limit);
+  }
+
+  // Array subscript
+  __host__ __device__ __forceinline__ reference operator[](int32 n) const {
+    return BoundedReference(ptr + n, base, conversion_op, limit);
+  }
+
+  // Addition
+  __host__ __device__ __forceinline__ self_type operator+(int32 n) const {
+    self_type retval(ptr + n, base, conversion_op, limit);
+    return retval;
+  }
+
+  // Subtraction
+  __host__ __device__ __forceinline__ self_type operator-(int32 n) const {
+    self_type retval(ptr - n, base, conversion_op, limit);
+    return retval;
+  }
+};
+
+}  // namespace
+
+// The current implementation has memory cost on GPU
+// I + P + max(3N + R + P, O + N), where:
+// I - the size of the input
+// N - the size of the partitions tensor
+// R - the temporary storage used by cub::RadixSort, about 2N
+// P - the number of partitions
+// O - the size of the output
+// So roughly the cost is I + P + max(5N, O + N).
+template <typename T>
+class DynamicPartitionOpGPU : public AsyncOpKernel {
+ public:
+  explicit DynamicPartitionOpGPU(OpKernelConstruction* c) : AsyncOpKernel(c) {
+    OP_REQUIRES_OK(c, c->GetAttr("num_partitions", &num_partitions_));
+    OP_REQUIRES(c, num_partitions_ >= 1,
+                errors::InvalidArgument("num_partitions must be at least 1"));
+  }
+
+  void AllocateTempSpace(OpKernelContext* c, int32 N, Tensor* indices_in,
+                         Tensor* partitions_out, Tensor* indices_out,
+                         DoneCallback done) {
+    int32 M = std::max(N, num_partitions_);
+    // indices_in will be made slightly larger to accommodate
+    // later computations.
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({M}), indices_in), done);
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({N}), partitions_out), done);
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({N}), indices_out), done);
+  }
+
+  void AllocateOutputs(OpKernelContext* c, const Tensor* data,
+                       const Tensor* partitions, const Tensor* partition_count,
+                       OpOutputList* Tout, DoneCallback done) {
+    auto e_part_count = partition_count->flat<int32>();
+    // Allocate output tensors of the right size
+    OP_REQUIRES_OK_ASYNC(c, c->output_list("outputs", Tout), done);
+    for (int p = 0; p < num_partitions_; p++) {
+      TensorShape shape;
+      shape.AddDim(e_part_count(p));
+      for (int i = partitions->dims(); i < data->dims(); i++) {
+        shape.AddDim(data->dim_size(i));
+      }
+      Tensor* out;
+      OP_REQUIRES_OK_ASYNC(c, Tout->allocate(p, shape, &out), done);
+    }
+  }
+
+  void ComputeAsync(OpKernelContext* c, DoneCallback done) {
+    const Tensor& data = c->input(0);
+    const Tensor& partitions = c->input(1);
+
+    OP_REQUIRES_ASYNC(
+        c, TensorShapeUtils::StartsWith(data.shape(), partitions.shape()),
+        errors::InvalidArgument("data.shape must start with partitions.shape, ",
+                                "got data.shape = ", data.shape().DebugString(),
+                                ", partitions.shape = ",
+                                partitions.shape().DebugString()),
+        done);
+
+    Tensor partition_count;
+
+    // We must handle the case of empty partitions separately,
+    // because kernels don't work with 0-sized tensors.
+    if (partitions.NumElements() == 0) {
+      AllocatorAttributes alloc_attr;
+      alloc_attr.set_on_host(true);
+      OP_REQUIRES_OK_ASYNC(
+          c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}),
+                              &partition_count, alloc_attr),
+          done);
+      auto e_part_count = partition_count.flat<int32>();
+      for (int i = 0; i < num_partitions_; i++) e_part_count(i) = 0;
+      OpOutputList outputs;
+      this->AllocateOutputs(c, &data, &partitions, &partition_count, &outputs,
+                            done);
+      if (c->status().ok()) done();
+      return;
+    }
+
+    // Prepare for counting.
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}),
+                            &partition_count),
+        done);
+    Tensor indices_out;
+    // Count how many times each partition index occurs.
+    // Also sort the info in partitions and output it in indices_out,
+    // in preparation for the next step.
+    this->CountAndSortParts(c, &partitions, &partition_count, &indices_out,
+                            done);
+    if (!c->status().ok()) return;
+
+    // In order to allocate the output tensor we have to move partition_count
+    // to CPU.
+    auto* stream = c->op_device_context()->stream();
+    OP_REQUIRES_ASYNC(c, stream, errors::Internal("No GPU stream available."),
+                      done);
+    Tensor cpu_tensor;
+    AllocatorAttributes alloc_attr;
+    alloc_attr.set_on_host(true);
+    alloc_attr.set_gpu_compatible(true);
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(partition_count.dtype(), partition_count.shape(),
+                            &cpu_tensor, alloc_attr),
+        done);
+    perftools::gputools::DeviceMemoryBase wrapped(
+        partition_count.flat<int32>().data(), num_partitions_ * sizeof(int32));
+    const bool status =
+        stream
+            ->ThenMemcpy(cpu_tensor.flat<int32>().data(), wrapped,
+                         num_partitions_ * sizeof(int32))
+            .ok();
+    OP_REQUIRES_ASYNC(
+        c, status,
+        errors::Internal("Failed to launch copy from device to host."), done);
+
+    // Keep a reference to partition_count so that the buffer
+    // is not deallocated at the end of the function, before
+    // memcpy is completed.
+    TensorReference partition_ref(partition_count);
+    auto wrapped_callback = [this, c, &data, &partitions, indices_out,
+                             partition_ref, cpu_tensor, done]() {
+      OpOutputList outputs;
+      this->AllocateOutputs(c, &data, &partitions, &cpu_tensor, &outputs, done);
+      if (!c->status().ok()) {
+        partition_ref.Unref();
+        return;
+      }
+      int32 N = partitions.NumElements();
+      int64 slice_size = data.NumElements() / N;
+      this->GatherSlices(c, &data, &indices_out, N, slice_size, outputs);
+      partition_ref.Unref();
+      done();
+    };
+
+    c->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
+        stream, wrapped_callback);
+  }
+
+ protected:
+  void RadixSort(OpKernelContext* c, const Tensor* partitions,
+                 Tensor* indices_in, Tensor* partitions_out,
+                 Tensor* indices_out, DoneCallback done) {
+    int32 N = partitions->NumElements();
+    const GPUDevice& device = c->eigen_device<GPUDevice>();
+    const cudaStream_t& cu_stream = GetCudaStream(c);
+
+    // Initialize the indices_in tensor using the Range GPU kernel.
+    RangeInit(device, 0, 1, N, indices_in->flat<int32>());
+    // Obtain the pointers to inner buffers.
+    const int32* partitions_ptr = partitions->flat<int32>().data();
+    int32* partitions_out_ptr = partitions_out->flat<int32>().data();
+    int32* indices_in_ptr = indices_in->flat<int32>().data();
+    int32* indices_out_ptr = indices_out->flat<int32>().data();
+    // Determine temporary device storage requirements.
+    Tensor cub_temp_storage;
+    size_t temp_storage_bytes = 0;
+    cub::DeviceRadixSort::SortPairs(
+        NULL, temp_storage_bytes, partitions_ptr, partitions_out_ptr,
+        indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream);
+    // Allocate temporary storage.
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(
+               DT_INT8, TensorShape({static_cast<int64>(temp_storage_bytes)}),
+               &cub_temp_storage),
+        done);
+    // Radix-sort the partition information.
+    cub::DeviceRadixSort::SortPairs(
+        cub_temp_storage.flat<int8>().data(), temp_storage_bytes,
+        partitions_ptr, partitions_out_ptr, indices_in_ptr, indices_out_ptr, N,
+        0, sizeof(int32) * 8, cu_stream);
+  }  // At this point cub_temp_storage will be marked for deallocation.
+
+  void CountAndSortParts(OpKernelContext* c, const Tensor* partitions,
+                         Tensor* partition_count, Tensor* indices_out,
+                         DoneCallback done) {
+    const GPUDevice& device = c->eigen_device<GPUDevice>();
+    const cudaStream_t& cu_stream = GetCudaStream(c);
+    int32 N = partitions->NumElements();
+    Tensor indices_in;
+    Tensor partitions_out;
+    Tensor aggregates_out;
+
+    // Allocate memory for Radix-Sort.
+    this->AllocateTempSpace(c, N, &indices_in, &partitions_out, indices_out,
+                            done);
+    if (!c->status().ok()) return;
+    this->RadixSort(c, partitions, &indices_in, &partitions_out, indices_out,
+                    done);
+    if (!c->status().ok()) return;
+    // We will now apply a reduce operation to count how many times
+    // each index appears in partitions.
+
+    // Zero-out the partition_count tensor.
+    functor::SetZeroFunctor<GPUDevice, int32> zero_functor;
+    zero_functor(device, partition_count->flat<int32>());
+    // Allocate memory for aggregates_out.
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({num_partitions_}),
+                            &aggregates_out),
+        done);
+    // Obtain the pointers to inner buffers.
+    int32* keys_in_ptr = partitions_out.flat<int32>().data();
+    // Here we reuse the indices_in tensor for the unique keys output.
+    int32* unique_out_ptr = indices_in.flat<int32>().data();
+    int32* aggregates_out_ptr = aggregates_out.flat<int32>().data();
+    // We wrap the pointers in bounded output iterators to guard against
+    // wrong inputs (more than num_partitions distinct indices).
+    IdentityOp id_op;
+    BoundedOutputIterator unique_out_it(unique_out_ptr, id_op, num_partitions_);
+    BoundedOutputIterator aggregates_out_it(aggregates_out_ptr, id_op,
+                                            num_partitions_);
+
+    cub::ConstantInputIterator<int32> values_in(1);
+    cub::Sum reduction_op;
+
+    // Allocate space on GPU for the number of runs. This is required by CUB.
+    Tensor num_runs;
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(DT_INT32, TensorShape({1}), &num_runs), done);
+    int32* num_runs_ptr = num_runs.flat<int32>().data();
+
+    // Determine temporary device storage requirements
+    Tensor cub_temp_storage;
+    size_t temp_storage_bytes = 0;
+    cub::DeviceReduce::ReduceByKey(NULL, temp_storage_bytes, keys_in_ptr,
+                                   unique_out_it, values_in, aggregates_out_it,
+                                   num_runs_ptr, reduction_op, N, cu_stream);
+    // Allocate temporary storage.
+    OP_REQUIRES_OK_ASYNC(
+        c, c->allocate_temp(
+               DT_INT8, TensorShape({static_cast<int64>(temp_storage_bytes)}),
+               &cub_temp_storage),
+        done);
+    // Run reduce-by-key. The effect is that we count how many times
+    // each index appears in partitions. The distinct indices are stored
+    // in unique_out, while the count is stored in aggregates_out.
+    // The total number of distinct indices is stored in num_runs.
+    cub::DeviceReduce::ReduceByKey(cub_temp_storage.flat<int8>().data(),
+                                   temp_storage_bytes, keys_in_ptr,
+                                   unique_out_it, values_in, aggregates_out_it,
+                                   num_runs_ptr, reduction_op, N, cu_stream);
+    // We are not done yet. unique_out only contains the indices that appeared
+    // at least once in partitions. We move each value from aggregates_out
+    // to the corresponding position in partition_count. This will handle
+    // possibly empty parts.
+    MoveValues(device, unique_out_ptr, aggregates_out_ptr, num_runs_ptr,
+               num_partitions_, partition_count->flat<int32>().data());
+  }  // At this point indices_in, partitions_out, aggregates_out
+     // and cub_temp_storage will be marked for deallocation.
+
+  void GatherSlices(OpKernelContext* c, const Tensor* data,
+                    const Tensor* indices, int32 N, int64 slice_size,
+                    OpOutputList& outs) {
+    const GPUDevice& device = c->eigen_device<GPUDevice>();
+    const int32* ind_base = indices->flat<int32>().data();
+    const T* data_base = data->flat<T>().data();
+
+    for (int p = 0; p < num_partitions_; p++) {
+      int32 indices_size = outs[p]->dim_size(0);
+      int64 out_size = outs[p]->NumElements();
+      T* out_base = outs[p]->flat<T>().data();
+      if (out_size > 0)
+        CallGatherKernel<T>(device, data_base, ind_base, out_base, N,
+                            indices_size, slice_size, out_size);
+      ind_base += indices_size;
+    }
+  }
+
+  int32 num_partitions_;
+};
+
+#define REGISTER_DYNAMIC_PARTITION_GPU(T)                                 \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("DynamicPartition").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      DynamicPartitionOpGPU<T>)
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_DYNAMIC_PARTITION_GPU);
+TF_CALL_complex64(REGISTER_DYNAMIC_PARTITION_GPU);
+TF_CALL_complex128(REGISTER_DYNAMIC_PARTITION_GPU);
+#undef REGISTER_DYNAMIC_PARTITION_GPU
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index d8bdb700e6..2eefadad49 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/maxpooling_op.h"
 
 #include <vector>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -38,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #if GOOGLE_CUDA
 #include "tensorflow/core/kernels/maxpooling_op_gpu.h"
diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc
index d9713075be..9fee94f946 100644
--- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc
+++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc
@@ -40,6 +40,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #define MKL_Complex8 tensorflow::complex64
 #define MKL_Complex16 tensorflow::complex128
diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index 9080bf7be8..f291281108 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -45,12 +45,12 @@ limitations under the License.
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::prop_kind;
 using mkldnn::stream;
+using mkldnn::prop_kind;
 
+using mkldnn::convolution_forward;
 using mkldnn::convolution_backward_weights;
 using mkldnn::convolution_direct;
-using mkldnn::convolution_forward;
 
 #endif
 
@@ -463,13 +463,12 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
 
       // Generate input shapes.
       TensorShape filter_shape;
-      OP_REQUIRES(
-          context, TensorShapeUtils::IsVector(filter_tensor.shape()),
-          errors::InvalidArgument(
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()),
+        errors::InvalidArgument(
               "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ",
               filter_tensor.dims()));
       OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                  filter_tensor.vec<int32>(), &filter_shape));
+                        filter_tensor.vec<int32>(), &filter_shape));
       TensorShape input_shape = input_tensor.shape();
       TensorShape obp_shape = obp_tensor.shape();
 
@@ -481,26 +480,27 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
 
       // Get forward convolution parameters.
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(
-          input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims,
-          &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l,
-          &padding_r);
+      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
+                                         &fwd_input_dims, &fwd_filter_dims,
+                                         &strides,
+                                         &fwd_output_dims_tf_order,
+                                         &fwd_output_dims,
+                                         &padding_l, &padding_r);
       if (!context->status().ok()) return;
 
       // Create Convolution forward descriptor since Convolution backward
       // API needs it. For that, we first need to create input, filter
       // and output memory descriptors.
       auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md =
-          memory::desc(fwd_input_dims, MklDnnType<T>(), mkl_data_format);
-      auto fwd_filter_md =
-          memory::desc(fwd_filter_dims, MklDnnType<T>(), memory::format::hwio);
-      auto fwd_out_md =
-          memory::desc(fwd_output_dims, MklDnnType<T>(), mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(
-          prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md,
-          fwd_out_md, strides, padding_l, padding_r,
-          TFPaddingToMklDnnPadding(padding_));
+      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
+                                        memory::format::hwio);
+      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
       auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
 
       // Allocate output tensor and shape
@@ -537,22 +537,23 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
       output.SetOpMemDesc(bwd_output_dims, memory::format::any);
 
       // Create convolution backward weights primitive.
-      auto bwd_desc = convolution_backward_weights::desc(
-          convolution_direct, input.GetOpMemDesc(), output.GetOpMemDesc(),
-          outbackprop.GetOpMemDesc(), strides, padding_l, padding_r,
-          TFPaddingToMklDnnPadding(padding_));
+      auto bwd_desc = convolution_backward_weights::desc(convolution_direct,
+                          input.GetOpMemDesc(), output.GetOpMemDesc(),
+                          outbackprop.GetOpMemDesc(), strides, padding_l,
+                          padding_r, TFPaddingToMklDnnPadding(padding_));
 
-      auto bwd_pd = convolution_backward_weights::primitive_desc(
-          bwd_desc, cpu_engine, fwd_pd);
+      auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
+                                                              cpu_engine,
+                                                              fwd_pd);
 
       PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output);
-    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
-      OP_REQUIRES_OK(
-          context,
-          errors::Aborted("Operation received an exception:", error_msg));
+    } catch (mkldnn::error &e) {
+     string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
+                                            error_msg));
     }
   }
 
@@ -563,8 +564,9 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
-      const convolution_backward_weights::primitive_desc& conv_pd,
-      MklDnnData<T>* input, MklDnnData<T>* obp, MklDnnData<T>* output) {
+                  const convolution_backward_weights::primitive_desc& conv_pd,
+                  MklDnnData<T>* input, MklDnnData<T>* obp,
+                  MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
@@ -575,10 +577,10 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
     // output side, we will prepare reorder primitive in case output
     // reorder to user memory is required.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-        conv_pd.diff_weights_primitive_desc());
+                                      conv_pd.diff_weights_primitive_desc());
 
-    net.push_back(convolution_backward_weights(
-        conv_pd, input->GetOpMem(), obp->GetOpMem(), output->GetOpMem()));
+    net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                    obp->GetOpMem(), output->GetOpMem()));
 
     // Insert reorder primitive in the net for output reorder if reorder is
     // required.
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index 4b6bf92e42..4a47d0463e 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -23,8 +23,6 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 #include <algorithm>
 #include <vector>
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -43,16 +41,18 @@ limitations under the License.
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
 #include "tensorflow/core/util/work_sharder.h"
+#include "mkl_dnn.h"
+#include "mkl_dnn_types.h"
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::prop_kind;
 using mkldnn::stream;
+using mkldnn::prop_kind;
 
-using mkldnn::convolution_backward_data;
-using mkldnn::convolution_direct;
 using mkldnn::convolution_forward;
+using mkldnn::convolution_direct;
+using mkldnn::convolution_backward_data;
 #endif
 
 namespace tensorflow {
@@ -397,13 +397,12 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 
       // Generate input shape.
       TensorShape input_shape;
-      OP_REQUIRES(
-          context, TensorShapeUtils::IsVector(input_tensor.shape()),
-          errors::InvalidArgument(
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
+        errors::InvalidArgument(
               "Conv2DBackpropInput: input_sizes input must be 1-dim, not ",
               input_tensor.dims()));
       OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                  input_tensor.vec<int32>(), &input_shape));
+                        input_tensor.vec<int32>(), &input_shape));
       TensorShape filter_shape = filter_tensor.shape();
       TensorShape obp_shape = obp_tensor.shape();
 
@@ -415,26 +414,27 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 
       // Get forward convolution parameters.
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(
-          input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims,
-          &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l,
-          &padding_r);
+      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
+                                         &fwd_input_dims, &fwd_filter_dims,
+                                         &strides,
+                                         &fwd_output_dims_tf_order,
+                                         &fwd_output_dims,
+                                         &padding_l, &padding_r);
       if (!context->status().ok()) return;
 
       // Create Convolution forward descriptor since Convolution backward
       // API needs it. For that, we first need to create input, filter
       // and output memory descriptors.
       auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md =
-          memory::desc(fwd_input_dims, MklDnnType<T>(), mkl_data_format);
-      auto fwd_filter_md =
-          memory::desc(fwd_filter_dims, MklDnnType<T>(), memory::format::hwio);
-      auto fwd_out_md =
-          memory::desc(fwd_output_dims, MklDnnType<T>(), mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(
-          prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md,
-          fwd_out_md, strides, padding_l, padding_r,
-          TFPaddingToMklDnnPadding(padding_));
+      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
+                                        memory::format::hwio);
+      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
       auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
 
       // Allocate output tensor and shape
@@ -475,22 +475,23 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
       output.SetOpMemDesc(bwd_output_dims, memory::format::any);
 
       // Create convolution backward data primitive.
-      auto bwd_desc = convolution_backward_data::desc(
-          convolution_direct, output.GetOpMemDesc(), filter.GetOpMemDesc(),
-          outbackprop.GetOpMemDesc(), strides, padding_l, padding_r,
-          TFPaddingToMklDnnPadding(padding_));
+      auto bwd_desc = convolution_backward_data::desc(convolution_direct,
+                          output.GetOpMemDesc(), filter.GetOpMemDesc(),
+                          outbackprop.GetOpMemDesc(), strides, padding_l,
+                          padding_r, TFPaddingToMklDnnPadding(padding_));
 
-      auto bwd_pd = convolution_backward_data::primitive_desc(
-          bwd_desc, cpu_engine, fwd_pd);
+      auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
+                                                              cpu_engine,
+                                                              fwd_pd);
 
       PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output);
-    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
-      OP_REQUIRES_OK(
-          context,
-          errors::Aborted("Operation received an exception:", error_msg));
+    } catch (mkldnn::error &e) {
+     string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
+                                            error_msg));
     }
   }
 
@@ -501,8 +502,9 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
-      const convolution_backward_data::primitive_desc& conv_pd,
-      MklDnnData<T>* filter, MklDnnData<T>* obp, MklDnnData<T>* output) {
+                  const convolution_backward_data::primitive_desc& conv_pd,
+                  MklDnnData<T>* filter, MklDnnData<T>* obp,
+                  MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
@@ -512,11 +514,11 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
     // Memory for output of convolution. Since we may need reorder on the
     // output side, we will prepare reorder primitive in case output
     // reorder to user memory is required.
-    bool output_reorder_required =
-        output->PrepareReorderToUserMemIfReq(conv_pd.diff_src_primitive_desc());
+    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
+                                      conv_pd.diff_src_primitive_desc());
 
-    net.push_back(convolution_backward_data(
-        conv_pd, obp->GetOpMem(), filter->GetOpMem(), output->GetOpMem()));
+    net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(),
+                                    filter->GetOpMem(), output->GetOpMem()));
 
     // Insert reorder primitive in the net for output reorder if reorder is
     // required.
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index 369f632fb4..a9872b8d6d 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <string.h>
 #include <map>
-#include <string>
 #include <vector>
+#include <string>
 
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -46,11 +46,11 @@ limitations under the License.
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::prop_kind;
 using mkldnn::stream;
+using mkldnn::prop_kind;
 
-using mkldnn::convolution_direct;
 using mkldnn::convolution_forward;
+using mkldnn::convolution_direct;
 #endif
 
 namespace tensorflow {
@@ -523,16 +523,19 @@ class MklConv2DOp : public OpKernel {
 
       // Get shapes of input tensors in MKL-DNN order
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(
-          src_tensor.shape(), filter_tensor.shape(), &src_dims, &filter_dims,
-          &strides, &output_dims_tf_order, &output_dims_mkl_order, &padding_l,
-          &padding_r);
+      conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(),
+                                         filter_tensor.shape(),
+                                         &src_dims, &filter_dims, &strides,
+                                         &output_dims_tf_order,
+                                         &output_dims_mkl_order, &padding_l,
+                                         &padding_r);
       if (!context->status().ok()) return;
 
       // Check for corner case - if there is nothing to compute, return.
-      TensorShape tf_output_shape(
-          {output_dims_tf_order[0], output_dims_tf_order[1],
-           output_dims_tf_order[2], output_dims_tf_order[3]});
+      TensorShape tf_output_shape({output_dims_tf_order[0],
+                                output_dims_tf_order[1],
+                                output_dims_tf_order[2],
+                                output_dims_tf_order[3]});
       Tensor* output_tensor = nullptr;
       MklShape mkl_output_mkl_shape;
       mkl_output_mkl_shape.SetMklTensor(false);
@@ -569,13 +572,13 @@ class MklConv2DOp : public OpKernel {
       // the layout is Tensorflow's layout (NHWC or NCHW depending on data
       // format).
       src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_),
-                    const_cast<void*>(
-                        static_cast<const void*>(src_tensor.flat<T>().data())));
+                    const_cast<void*>(static_cast<const void*>(
+                    src_tensor.flat<T>().data())));
       // Although filter shape (filter_dims) required is in MKL-DNN order,
       // the layout is Tensorflow's layout (HWIO).
       filter.SetUsrMem(filter_dims, memory::format::hwio,
                        const_cast<void*>(static_cast<const void*>(
-                           filter_tensor.flat<T>().data())));
+                       filter_tensor.flat<T>().data())));
       // Although output shape (output_dims) required is in MKL-DNN order,
       // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
       output.SetUsrMem(output_dims_mkl_order,
@@ -595,36 +598,36 @@ class MklConv2DOp : public OpKernel {
         const Tensor& bias_tensor = MklGetInput(context, 2);
         bias.SetUsrMem(bias_size, memory::format::x,
                        const_cast<void*>(static_cast<const void*>(
-                           bias_tensor.flat<T>().data())));
+                       bias_tensor.flat<T>().data())));
         bias.SetOpMemDesc(bias_size, memory::format::any);
 
         // Create convolution primitive with Bias.
-        auto conv_desc = convolution_forward::desc(
-            prop_kind::forward, convolution_direct, src.GetOpMemDesc(),
-            filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(),
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+        auto conv_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(),
+            bias.GetOpMemDesc(), output.GetOpMemDesc(), strides,
+            padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
 
-        auto conv_prim_desc =
-            convolution_forward::primitive_desc(conv_desc, cpu_engine);
+        auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
+                                                                cpu_engine);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output);
       } else {
         // Create convolution primitive without Bias.
-        auto conv_desc = convolution_forward::desc(
-            prop_kind::forward, convolution_direct, src.GetOpMemDesc(),
-            filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l,
-            padding_r, TFPaddingToMklDnnPadding(padding_));
+        auto conv_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, src.GetOpMemDesc(), filter.GetOpMemDesc(),
+            output.GetOpMemDesc(), strides, padding_l, padding_r,
+            TFPaddingToMklDnnPadding(padding_));
 
-        auto conv_prim_desc =
-            convolution_forward::primitive_desc(conv_desc, cpu_engine);
+        auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
+                                                                cpu_engine);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output);
       }
-    } catch (mkldnn::error& e) {
+    } catch (mkldnn::error &e) {
       string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + std::string(e.message) + ", in file " +
-                         std::string(__FILE__) + ":" + std::to_string(__LINE__);
-      OP_REQUIRES_OK(
-          context,
-          errors::Aborted("Operation received an exception:", error_msg));
+                       ", message: " + std::string(e.message) +
+                       ", in file " + std::string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+        errors::Aborted("Operation received an exception:", error_msg));
     }
   }
 
@@ -635,9 +638,9 @@ class MklConv2DOp : public OpKernel {
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecuteNet(
-      const convolution_forward::primitive_desc& conv_prim_desc,
-      MklDnnData<T>* src, MklDnnData<T>* filter, MklDnnData<T>* bias,
-      MklDnnData<T>* output) {
+                  const convolution_forward::primitive_desc& conv_prim_desc,
+                  MklDnnData<T>* src, MklDnnData<T>* filter,
+                  MklDnnData<T>* bias, MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
@@ -648,19 +651,18 @@ class MklConv2DOp : public OpKernel {
     // output side, we will prepare reorder primitive in case output
     // reorder to user memory is required.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-        conv_prim_desc.dst_primitive_desc());
+                                      conv_prim_desc.dst_primitive_desc());
 
     // Create convolution primitive and add it to net.
     if (bias) {
       CHECK_EQ(biasEnabled, true);
       net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(),
-                                        filter->GetOpMem(), bias->GetOpMem(),
-                                        output->GetOpMem()));
+                                    filter->GetOpMem(), bias->GetOpMem(),
+                                    output->GetOpMem()));
     } else {
       CHECK_EQ(biasEnabled, false);
       net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(),
-                                        filter->GetOpMem(),
-                                        output->GetOpMem()));
+                                    filter->GetOpMem(), output->GetOpMem()));
     }
 
     // Insert reorder primitive in the net for output reorder if reorder is
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index e29af19ca9..f0cb37f8a4 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
 #define TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
 
-#include <limits>
 #include <vector>
+#include <limits>
 
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -26,8 +26,8 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_slice.h"
 #include "tensorflow/core/kernels/bounds_check.h"
-#include "tensorflow/core/kernels/conv_grad_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/conv_grad_ops.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/lib/strings/numbers.h"
@@ -49,15 +49,15 @@ namespace tensorflow {
 
 class MklDnnConvUtil {
  protected:
-  OpKernelContext *context_;  // We don't own this.
+  OpKernelContext* context_;  // We don't own this.
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
 
  public:
-  MklDnnConvUtil(OpKernelContext *context, const std::vector<int32> &strides,
-                 Padding pad, TensorFormat fm)
-      : context_(context), strides_(strides), padding_(pad), data_format_(fm) {}
+  MklDnnConvUtil(OpKernelContext* context, const std::vector<int32>& strides,
+                 Padding pad, TensorFormat fm) : context_(context),
+    strides_(strides), padding_(pad), data_format_(fm) {}
 
   virtual ~MklDnnConvUtil() { context_ = nullptr; }
 
@@ -75,14 +75,14 @@ class MklDnnConvUtil {
   // requires input in NCHW format. Function does not return anything.
   // But errors arising from sanity checks are returned in context's
   // status.
-  virtual inline void GetInputSizeInMklOrder(const TensorShape &input_shape,
-                                             memory::dims *input_dims) {
-#define CHECK_BOUNDS(val, err_msg)                                     \
-  do {                                                                 \
-    OP_REQUIRES(context_,                                              \
-                FastBoundsCheck(val, std::numeric_limits<int>::max()), \
-                errors::InvalidArgument(err_msg));                     \
-  } while (0)
+  virtual inline void
+  GetInputSizeInMklOrder(const TensorShape& input_shape,
+                         memory::dims *input_dims) {
+  #define CHECK_BOUNDS(val, err_msg) do {                     \
+    OP_REQUIRES(context_, FastBoundsCheck(val,                \
+                            std::numeric_limits<int>::max()), \
+                errors::InvalidArgument(err_msg));            \
+  }while(0)
 
     CHECK_NOTNULL(input_dims);
 
@@ -105,7 +105,7 @@ class MklDnnConvUtil {
     CHECK_BOUNDS(input_batch_raw, "Input batch too large");
     int input_batch = static_cast<int>(input_batch_raw);
 
-#undef CHECK_BOUNDS
+  #undef CHECK_BOUNDS
 
     // MKL-DNN always requires input in NCHW format.
     *input_dims = {input_batch, input_depth, input_rows, input_cols};
@@ -125,9 +125,10 @@ class MklDnnConvUtil {
   // forward gets actual tensor as input).
   //
   // TODO(nhasabni): Add similar function for input and filter in MklShape.
-  virtual inline void GetFilterSizeInMklOrder(const TensorShape &input_shape,
-                                              const TensorShape &filter_shape,
-                                              memory::dims *filter_dims) {
+  virtual inline void
+  GetFilterSizeInMklOrder(const TensorShape& input_shape,
+                          const TensorShape& filter_shape,
+                          memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
 
     OP_REQUIRES(context_, filter_shape.dims() == 4,
@@ -135,18 +136,17 @@ class MklDnnConvUtil {
                                         filter_shape.DebugString()));
 
     for (int i = 0; i < 3; i++) {
-      OP_REQUIRES(context_,
-                  FastBoundsCheck(filter_shape.dim_size(i),
-                                  std::numeric_limits<int>::max()),
-                  errors::InvalidArgument("filter too large"));
+      OP_REQUIRES(context_, FastBoundsCheck(filter_shape.dim_size(i),
+                                           std::numeric_limits<int>::max()),
+                errors::InvalidArgument("filter too large"));
     }
 
     int input_depth = GetTensorDim(input_shape, data_format_, 'C');
 
-    OP_REQUIRES(context_, input_depth == filter_shape.dim_size(2),
-                errors::InvalidArgument(
-                    "input and filter must have the same depth: ", input_depth,
-                    " vs ", filter_shape.dim_size(2)));
+    OP_REQUIRES(
+        context_, input_depth == filter_shape.dim_size(2),
+        errors::InvalidArgument("input and filter must have the same depth: ",
+                                input_depth, " vs ", filter_shape.dim_size(2)));
 
     // TF filter is always in (rows, cols, in_depth, out_depth) order.
     int filter_rows = static_cast<int>(filter_shape.dim_size(0));
@@ -163,25 +163,25 @@ class MklDnnConvUtil {
   // requires filter in OIHW format. Function does not return anything.
   // But errors arising from sanity checks are returned in context's
   // status.
-  virtual inline void GetFilterSizeInMklOrder(size_t src_index,
-                                              size_t filter_index,
-                                              memory::dims *filter_dims) {
+  virtual inline void
+  GetFilterSizeInMklOrder(size_t src_index, size_t filter_index,
+                          memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
-    const Tensor &input = MklGetInput(context_, src_index);
-    const Tensor &filter = MklGetInput(context_, filter_index);
+    const Tensor& input = MklGetInput(context_, src_index);
+    const Tensor& filter = MklGetInput(context_, filter_index);
     GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims);
   }
 
   // Calculate Bias size for 2D Convolution. Function does not return
   // anything, but sets error in context status.
-  virtual inline void GetBiasSizeInMklOrder(size_t bias_index,
-                                            memory::dims *bias_dims) {
-    const Tensor &bias = MklGetInput(context_, bias_index);
+  virtual inline void
+  GetBiasSizeInMklOrder(size_t bias_index, memory::dims *bias_dims) {
+    const Tensor& bias = MklGetInput(context_, bias_index);
     OP_REQUIRES(context_, bias.dims() == 1,
                 errors::InvalidArgument("bias must be 1-dimensional: ",
                                         bias.shape().DebugString()));
 
-    *bias_dims = {static_cast<int>(bias.dim_size(0))};
+    *bias_dims = { static_cast<int>(bias.dim_size(0)) };
   }
 
   // Function to calculate output and padding size for 2D convolution.
@@ -193,11 +193,13 @@ class MklDnnConvUtil {
   // status is returned via context status.
   //
   // TODO(nhasabni): Add similar function for input and filter in MklShape.
-  virtual inline void GetOutputAndPadSizeInMklOrder(
-      const TensorShape &input_shape, const TensorShape &filter_shape,
-      const memory::dims &strides, memory::dims *output_dims_tf_order,
-      memory::dims *output_dims_mkl_order, memory::dims *pad_l,
-      memory::dims *pad_r) {
+  virtual inline void
+  GetOutputAndPadSizeInMklOrder(const TensorShape& input_shape,
+                                const TensorShape& filter_shape,
+                                const memory::dims& strides,
+                                memory::dims *output_dims_tf_order,
+                                memory::dims *output_dims_mkl_order,
+                                memory::dims *pad_l, memory::dims *pad_r) {
     CHECK_NOTNULL(output_dims_tf_order);
     CHECK_NOTNULL(output_dims_mkl_order);
     CHECK_NOTNULL(pad_l);
@@ -223,21 +225,21 @@ class MklDnnConvUtil {
     int64 out_rows = 0, out_cols = 0;
     int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right;
 
-    OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose(
-                                 input_rows, filter_rows, stride_rows, padding_,
-                                 &out_rows, &pad_top, &pad_bottom));
-    OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose(
-                                 input_cols, filter_cols, stride_cols, padding_,
-                                 &out_cols, &pad_left, &pad_right));
+    OP_REQUIRES_OK(context_,
+            GetWindowedOutputSizeVerbose(input_rows, filter_rows, stride_rows,
+                                 padding_, &out_rows, &pad_top, &pad_bottom));
+    OP_REQUIRES_OK(context_,
+            GetWindowedOutputSizeVerbose(input_cols, filter_cols, stride_cols,
+                                 padding_, &out_cols, &pad_left, &pad_right));
 
     // Tensorflow output is in data_format order. (NHWC or NCHW)
-    TensorShape out_shape =
-        ShapeFromFormat(data_format_, out_batch, out_rows, out_cols, out_depth);
+    TensorShape out_shape = ShapeFromFormat(data_format_, out_batch,
+                                            out_rows, out_cols, out_depth);
     *output_dims_tf_order = TFShapeToMklDnnDims(out_shape);
 
     // MKL-DNN always needs output in NCHW format.
     *output_dims_mkl_order = {out_batch, out_depth, static_cast<int>(out_rows),
-                              static_cast<int>(out_cols)};
+                   static_cast<int>(out_cols)};
 
     // Now handle padding. MKL-DNN uses asymetric padding.
     *pad_l = {static_cast<int>(pad_top), static_cast<int>(pad_left)};
@@ -248,25 +250,27 @@ class MklDnnConvUtil {
   // See comment on GetConvOutputAndPadSizeInMklOrder for parameters.
   //
   // Function does not return anything, but sets error in context status.
-  inline void GetOutputAndPadSizeInMklOrder(
-      size_t src_index, size_t filter_index, const memory::dims &strides,
-      memory::dims *output_dims_tf_order, memory::dims *output_dims_mkl_order,
-      memory::dims *pad_l, memory::dims *pad_r) {
+  inline void
+  GetOutputAndPadSizeInMklOrder(size_t src_index, size_t filter_index,
+                                const memory::dims& strides,
+                                memory::dims *output_dims_tf_order,
+                                memory::dims *output_dims_mkl_order,
+                                memory::dims *pad_l, memory::dims *pad_r) {
     CHECK_NOTNULL(output_dims_tf_order);
     CHECK_NOTNULL(output_dims_mkl_order);
     CHECK_NOTNULL(pad_l);
     CHECK_NOTNULL(pad_r);
 
-    const Tensor &input = MklGetInput(context_, src_index);
-    const Tensor &filter = MklGetInput(context_, filter_index);
+    const Tensor& input = MklGetInput(context_, src_index);
+    const Tensor& filter = MklGetInput(context_, filter_index);
 
     OP_REQUIRES(context_, input.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
-                                        input.shape().DebugString()));
+                                          input.shape().DebugString()));
 
-    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(), strides,
-                                  output_dims_tf_order, output_dims_mkl_order,
-                                  pad_l, pad_r);
+    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(),
+                                  strides, output_dims_tf_order,
+                                  output_dims_mkl_order, pad_l, pad_r);
   }
 
   // Wrapper function to calculate input, filter, and output sizes of
@@ -275,12 +279,15 @@ class MklDnnConvUtil {
   // also calculates strides and paddings for 2D Convolution.
   //
   // Function does not return anything, but sets error in context status.
-  inline void GetConvFwdSizesInMklOrder(
-      const TensorShape &input_shape, const TensorShape &filter_shape,
-      memory::dims *input_dims, memory::dims *filter_dims,
-      memory::dims *strides, memory::dims *output_dims_tf_order,
-      memory::dims *output_dims_mkl_order, memory::dims *pad_l,
-      memory::dims *pad_r) {
+  inline void GetConvFwdSizesInMklOrder(const TensorShape& input_shape,
+                                        const TensorShape& filter_shape,
+                                        memory::dims *input_dims,
+                                        memory::dims *filter_dims,
+                                        memory::dims *strides,
+                                        memory::dims *output_dims_tf_order,
+                                        memory::dims *output_dims_mkl_order,
+                                        memory::dims *pad_l,
+                                        memory::dims *pad_r) {
     CHECK_NOTNULL(input_dims);
     CHECK_NOTNULL(filter_dims);
     CHECK_NOTNULL(strides);
@@ -295,7 +302,8 @@ class MklDnnConvUtil {
     if (!context_->status().ok()) return;
     GetStridesInMklOrder(strides);
     GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides,
-                                  output_dims_tf_order, output_dims_mkl_order,
+                                  output_dims_tf_order,
+                                  output_dims_mkl_order,
                                   pad_l, pad_r);
     if (!context_->status().ok()) return;
   }
diff --git a/tensorflow/core/kernels/shape_ops.h b/tensorflow/core/kernels/shape_ops.h
index 55be308901..8d9d0ea846 100644
--- a/tensorflow/core/kernels/shape_ops.h
+++ b/tensorflow/core/kernels/shape_ops.h
@@ -235,10 +235,10 @@ class SqueezeOp : public OpKernel {
       if (!wrapped_squeeze_dims.empty()) {
         if (wrapped_squeeze_dims.count(i) > 0) {
           OP_REQUIRES(ctx, existing_dim == 1,
-                      errors::InvalidArgument(
-                          "Tried to explicitly squeeze "
-                          "dimension ",
-                          i, " but dimension was not 1: ", existing_dim));
+                      errors::InvalidArgument("Tried to explicitly squeeze "
+                                              "dimension ",
+                                              i, " but dimension was not 1: ",
+                                              existing_dim));
         } else {
           // This dimension is not being squeezed.
           new_shape.push_back(existing_dim);
diff --git a/tensorflow/core/kernels/slice_op.h b/tensorflow/core/kernels/slice_op.h
index db7eded745..0362a02133 100644
--- a/tensorflow/core/kernels/slice_op.h
+++ b/tensorflow/core/kernels/slice_op.h
@@ -24,6 +24,7 @@ limitations under the License.
 namespace tensorflow {
 namespace functor {
 
+
 template <typename Device, typename T, int NDIMS>
 struct Slice {
   void operator()(const Device& d, typename TTypes<T, NDIMS>::Tensor output,
diff --git a/tensorflow/core/util/transform_output_iterator.h b/tensorflow/core/util/transform_output_iterator.h
index 1640791ad1..059206c75b 100644
--- a/tensorflow/core/util/transform_output_iterator.h
+++ b/tensorflow/core/util/transform_output_iterator.h
@@ -24,7 +24,7 @@ namespace tensorflow {
 template <typename StoreType, typename InputType, typename ConversionOp,
           typename OffsetT = ptrdiff_t>
 class TransformOutputIterator {
- private:
+ protected:
   // Proxy object
   struct Reference {
     StoreType* ptr;
diff --git a/tensorflow/docs_src/extend/add_filesys.md b/tensorflow/docs_src/extend/add_filesys.md
index 44ba198998..f0591b7b7d 100644
--- a/tensorflow/docs_src/extend/add_filesys.md
+++ b/tensorflow/docs_src/extend/add_filesys.md
@@ -35,6 +35,7 @@ Note that TensorFlow already includes many filesystem implementations, such as:
 
 *   HDFS - the Hadoop File System
 *   GCS - Google Cloud Storage filesystem
+*   S3 - Amazon Simple Storage Service filesystem
 *   A "memory-mapped-file" filesystem
 
 The rest of this guide describes how to implement a custom filesystem.
diff --git a/tensorflow/docs_src/extend/estimators.md b/tensorflow/docs_src/extend/estimators.md
index 7e6507c584..96fc9fae47 100644
--- a/tensorflow/docs_src/extend/estimators.md
+++ b/tensorflow/docs_src/extend/estimators.md
@@ -515,7 +515,7 @@ using `mean_squared_error()` (in bold):
   loss = tf.losses.mean_squared_error(labels, predictions)</strong>
   ...</code></pre>
 
-See the @{$python/contrib.losses$API guide} for a
+See the @{tf.losses$API guide} for a
 full list of loss functions and more details on supported arguments and usage.
 
 Supplementary metrics for evaluation can be added to an `eval_metric_ops` dict.
@@ -694,5 +694,5 @@ For additional reference materials on building `Estimator`s, see the following
 sections of the API guides:
 
 *   @{$python/contrib.layers$Layers}
-*   @{$python/contrib.losses$Losses}
+*   @{tf.losses$Losses}
 *   @{$python/contrib.layers#optimization$Optimization}
diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md
index f0dcdc47ff..24bfdbdd2e 100644
--- a/tensorflow/docs_src/get_started/input_fn.md
+++ b/tensorflow/docs_src/get_started/input_fn.md
@@ -292,7 +292,7 @@ prediction_set = pd.read_csv("boston_predict.csv", skipinitialspace=True,
 Next, create a list of `FeatureColumn`s for the input data, which formally
 specify the set of features to use for training. Because all features in the
 housing data set contain continuous values, you can create their
-`FeatureColumn`s using the `tf.contrib.layers.real_valued_column()` function:
+`FeatureColumn`s using the `tf.feature_column.numeric_column()` function:
 
 ```python
 feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index 79b383817b..3afd0aec0f 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -79,22 +79,23 @@ Take the following steps to install TensorFlow with Virtualenv:
   4. Activate the Virtualenv environment by issuing one of the
      following commands:
 
-     <pre>$ <b>source ~/tensorflow/bin/activate</b>      # If using bash, sh, ksh, or zsh
-    $ <b>source ~/tensorflow/bin/activate.csh</b>  # If using csh or tcsh </pre>
+     <pre>$ <b>cd <i>targetDirectory</i></b>
+    $ <b>source ./bin/activate</b>      # If using bash, sh, ksh, or zsh
+    $ <b>source ./bin/activate.csh</b>  # If using csh or tcsh </pre>
 
      The preceding `source` command should change your prompt to the following:
 
-     <pre> (tensorflow)$ </pre>
+     <pre> (<i>targetDirectory</i>)$ </pre>
 
   5. Ensure pip ≥8.1 is installed:
 
-     <pre> (tensorflow)$ <b>easy_install -U pip</b></pre>
+     <pre> (<i>targetDirectory</i>)$ <b>easy_install -U pip</b></pre>
 
   6. Issue one of the following commands to install TensorFlow and all the
      packages that TensorFlow requires into the active Virtualenv environment:
 
-     <pre> (tensorflow)$ <b>pip install --upgrade tensorflow</b>      # for Python 2.7
-     (tensorflow)$ <b>pip3 install --upgrade tensorflow</b>     # for Python 3.n
+     <pre> (<i>targetDirectory</i>)$ <b>pip install --upgrade tensorflow</b>      # for Python 2.7
+     (<i>targetDirectory</i>)$ <b>pip3 install --upgrade tensorflow</b>     # for Python 3.n
 
   7. Optional. If Step 6 failed (typically because you invoked a pip version
      lower than 8.1), install TensorFlow in the active
@@ -128,16 +129,18 @@ to confirm that the installation worked properly.
 
 Note that you must activate the Virtualenv environment each time you
 use TensorFlow in a new shell.  If the Virtualenv environment is not
-currently active (that is, the prompt is not `(tensorflow)`, invoke
+currently active (that is, the prompt is not `(<i>targetDirectory</i>)`, invoke
 one of the following commands:
 
-<pre>$ <b>source ~/tensorflow/bin/activate</b>      # bash, sh, ksh, or zsh
-$ <b>source ~/tensorflow/bin/activate.csh</b>  # csh or tcsh </pre>
+<pre>$ <b>cd <i>targetDirectory</i></b>
+$ <b>source ./bin/activate</b>      # If using bash, sh, ksh, or zsh
+$ <b>source ./bin/activate.csh</b>  # If using csh or tcsh </pre>
+
 
 Your prompt will transform to the following to indicate that your
 tensorflow environment is active:
 
-<pre> (tensorflow)$ </pre>
+<pre> (<i>targetDirectory</i>)$ </pre>
 
 When the Virtualenv environment is active, you may run
 TensorFlow programs from this shell.
@@ -145,7 +148,7 @@ TensorFlow programs from this shell.
 When you are done using TensorFlow, you may deactivate the
 environment by issuing the following command:
 
-<pre> (tensorflow)$ <b>deactivate</b> </pre>
+<pre> (<i>targetDirectory</i>)$ <b>deactivate</b> </pre>
 
 The prompt will revert back to your default prompt (as defined by `PS1`).
 
@@ -331,19 +334,19 @@ Take the following steps to install TensorFlow in an Anaconda environment:
   3. Activate the conda environment by issuing the following command:
 
      <pre>$ <b>source activate tensorflow</b>
-     (tensorflow)$  # Your prompt should change</pre>
+     (<i>targetDirectory</i>)$  # Your prompt should change</pre>
 
   4. Issue a command of the following format to install
      TensorFlow inside your conda environment:
 
-     <pre>(tensorflow)<b>$ pip install --ignore-installed --upgrade</b> <i>TF_PYTHON_URL</i></pre>
+     <pre>(<i>targetDirectory</i>)<b>$ pip install --ignore-installed --upgrade</b> <i>TF_PYTHON_URL</i></pre>
 
      where <i>TF_PYTHON_URL</i> is the
      [URL of the TensorFlow Python package](#the_url_of_the_tensorflow_python_package).
      For example, the following command installs the CPU-only version of
      TensorFlow for Python 2.7:
 
-     <pre> (tensorflow)$ <b>pip install --ignore-installed --upgrade \
+     <pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
      https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
 
 
diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md
index 073bdb7baa..308cbad376 100644
--- a/tensorflow/docs_src/programmers_guide/datasets.md
+++ b/tensorflow/docs_src/programmers_guide/datasets.md
@@ -190,8 +190,8 @@ validation_dataset = tf.data.Dataset.range(50)
 # A reinitializable iterator is defined by its structure. We could use the
 # `output_types` and `output_shapes` properties of either `training_dataset`
 # or `validation_dataset` here, because they are compatible.
-iterator = Iterator.from_structure(training_dataset.output_types,
-                                   training_dataset.output_shapes)
+iterator = tf.data.Iterator.from_structure(training_dataset.output_types,
+                                           training_dataset.output_shapes)
 next_element = iterator.get_next()
 
 training_init_op = iterator.make_initializer(training_dataset)
@@ -735,7 +735,7 @@ def dataset_input_fn():
     parsed = tf.parse_single_example(record, keys_to_features)
 
     # Perform additional preprocessing on the parsed data.
-    image = tf.decode_jpeg(parsed["image_data"])
+    image = tf.image.decode_jpeg(parsed["image_data"])
     image = tf.reshape(image, [299, 299, 1])
     label = tf.cast(parsed["label"], tf.int32)
 
diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md
index 34e8e5faf5..54693f3d4d 100644
--- a/tensorflow/docs_src/programmers_guide/saved_model.md
+++ b/tensorflow/docs_src/programmers_guide/saved_model.md
@@ -33,7 +33,7 @@ roughly speaking, map variable names to tensor values.
 
 Create a `Saver` with `tf.train.Saver()` to manage all variables in the
 model. For example, the following snippet demonstrates how to call the
-`tf.train.Saver.save` method to save variables to a checkpoint file:
+`tf.train.Saver.save` method to save variables to checkpoint files:
 
 ```python
 # Create some variables.
@@ -58,7 +58,7 @@ with tf.Session() as sess:
   dec_v2.op.run()
   # Save the variables to disk.
   save_path = saver.save(sess, "/tmp/model.ckpt")
-  print("Model saved in file: %s" % save_path)
+  print("Model saved in path: %s" % save_path)
 ```
 
 
@@ -66,10 +66,10 @@ with tf.Session() as sess:
 ### Restoring variables
 
 The `tf.train.Saver` object not only saves variables to checkpoint files, it
-also restores variables.  Note that when you restore variables from a file you
-do not have to initialize them beforehand. For example, the following snippet
-demonstrates how to call the `tf.train.Saver.restore` method to restore
-variables from a checkpoint file:
+also restores variables. Note that when you restore variables you do not have
+to initialize them beforehand. For example, the following snippet demonstrates
+how to call the `tf.train.Saver.restore` method to restore variables from the
+checkpoint files:
 
 ```python
 tf.reset_default_graph()
@@ -92,6 +92,12 @@ with tf.Session() as sess:
   print("v2 : %s" % v2.eval())
 ```
 
+Notes:
+
+*  There is not a physical file called "/tmp/model.ckpt". It is the **prefix**
+   of filenames created for the checkpoint. Users only interact with the
+   prefix instead of physical checkpoint files.
+
 
 ### Choosing which variables to save and restore
 
diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md
index 51621d51ef..30a26d13c5 100644
--- a/tensorflow/examples/android/README.md
+++ b/tensorflow/examples/android/README.md
@@ -168,7 +168,7 @@ download-models.gradle.
 
 **Optional**: If you wish to place the models in your assets manually, remove
 all of the `model_files` entries from the `assets` list in `tensorflow_demo`
-found in the `[BUILD](BUILD)` file. Then download and extract the archives
+found in the [`BUILD`](BUILD#L92) file. Then download and extract the archives
 yourself to the `assets` directory in the source tree:
 
 ```bash
diff --git a/tensorflow/examples/speech_commands/train.py b/tensorflow/examples/speech_commands/train.py
index f5bf04305a..bec7dacd21 100644
--- a/tensorflow/examples/speech_commands/train.py
+++ b/tensorflow/examples/speech_commands/train.py
@@ -161,7 +161,7 @@ def main(_):
   evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   tf.summary.scalar('accuracy', evaluation_step)
 
-  global_step = tf.contrib.framework.get_or_create_global_step()
+  global_step = tf.train.get_or_create_global_step()
   increment_global_step = tf.assign(global_step, global_step + 1)
 
   saver = tf.train.Saver(tf.global_variables())
diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go
index cd05e2aa0a..2d25c04dc9 100644
--- a/tensorflow/go/tensor.go
+++ b/tensorflow/go/tensor.go
@@ -328,6 +328,14 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error {
 			}
 		}
 
+		// Optimisation: if only one dimension is left we can use binary.Write() directly for this slice
+		if len(shape) == 1 && v.Len() > 0 {
+			switch v.Index(0).Kind() {
+			case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+				return binary.Write(w, nativeEndian, v.Interface())
+			}
+		}
+
 		subShape := shape[1:]
 		for i := 0; i < v.Len(); i++ {
 			err := encodeTensor(w, v.Index(i), subShape)
@@ -360,6 +368,15 @@ func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect.
 	case reflect.Slice:
 		val := reflect.Indirect(ptr)
 		val.Set(reflect.MakeSlice(typ, int(shape[0]), int(shape[0])))
+
+		// Optimization: if only one dimension is left we can use binary.Read() directly for this slice
+		if len(shape) == 1 && val.Len() > 0 {
+			switch val.Index(0).Kind() {
+			case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+				return binary.Read(r, nativeEndian, val.Interface())
+			}
+		}
+
 		for i := 0; i < val.Len(); i++ {
 			if err := decodeTensor(r, shape[1:], typ.Elem(), val.Index(i).Addr()); err != nil {
 				return err
diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go
index 674a8ce86f..793c36dd4d 100644
--- a/tensorflow/go/tensor_test.go
+++ b/tensorflow/go/tensor_test.go
@@ -243,3 +243,23 @@ func BenchmarkNewTensor(b *testing.B) {
 	)
 	b.Run("[150528]", func(b *testing.B) { benchmarkNewTensor(b, vector) })
 }
+
+func benchmarkDecodeTensor(b *testing.B, t *Tensor) {
+	for i := 0; i < b.N; i++ {
+		_ = t.Value()
+	}
+}
+
+func BenchmarkDecodeTensor(b *testing.B) {
+	var (
+		// Some sample sizes from the Inception image labeling model.
+		// Where input tensors correspond to a 224x224 RGB image
+		// flattened into a vector.
+		vector [224 * 224 * 3]int32
+	)
+	t, err := NewTensor(vector)
+	if err != nil {
+		b.Fatalf("(%v, %v)", t, err)
+	}
+	b.Run("[150528]", func(b *testing.B) { benchmarkDecodeTensor(b, t) })
+}
diff --git a/tensorflow/python/debug/lib/stepper.py b/tensorflow/python/debug/lib/stepper.py
index 1fa0b3dba2..c27b3f51cd 100644
--- a/tensorflow/python/debug/lib/stepper.py
+++ b/tensorflow/python/debug/lib/stepper.py
@@ -80,7 +80,7 @@ class NodeStepper(object):
   when they are required as data dependencies.
 
   The temporary directories are automatically clean when the NodeStepper
-  instance exits as a context mananger.
+  instance exits as a context manager.
 
   Once the tracing is complete, it will issue a run() call on the
   underlying session, using the aforementioned feed_dict prepared by the input
diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py
index 3b295a7e35..51075731dd 100644
--- a/tensorflow/python/estimator/export/export.py
+++ b/tensorflow/python/estimator/export/export.py
@@ -191,7 +191,8 @@ def build_all_signature_defs(receiver_tensors,
   if not isinstance(receiver_tensors, dict):
     receiver_tensors = {_SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors}
   if export_outputs is None or not isinstance(export_outputs, dict):
-    raise ValueError('export_outputs must be a dict.')
+    raise ValueError('export_outputs must be a dict and not'
+                     '{}'.format(type(export_outputs)))
 
   signature_def_map = {}
   excluded_signatures = {}
diff --git a/tensorflow/python/estimator/export/export_test.py b/tensorflow/python/estimator/export/export_test.py
index 3cbef4707a..8442bf04ac 100644
--- a/tensorflow/python/estimator/export/export_test.py
+++ b/tensorflow/python/estimator/export/export_test.py
@@ -358,7 +358,8 @@ class ExportTest(test_util.TensorFlowTestCase):
     with self.assertRaises(ValueError) as e:
       export.build_all_signature_defs(receiver_tensor, None)
 
-    self.assertEqual("export_outputs must be a dict.", str(e.exception))
+    self.assertTrue(str(e.exception).startswith(
+        "export_outputs must be a dict"))
 
   def test_get_timestamped_export_dir(self):
     export_dir_base = tempfile.mkdtemp() + "export/"
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
old mode 100644
new mode 100755
index d9391dd6c5..4a60b7835e
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -150,6 +150,7 @@ py_library(
         "//tensorflow/python:variables",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:model_fn",
+        "//tensorflow/python/saved_model",
         "@six_archive//:six",
     ],
 )
@@ -552,7 +553,7 @@ py_test(
 
 py_test(
     name = "data_utils_test",
-    size = "small",
+    size = "medium",
     srcs = ["_impl/keras/utils/data_utils_test.py"],
     srcs_version = "PY2AND3",
     tags = [
diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py
index 2e931769c7..4370341ad1 100644
--- a/tensorflow/python/keras/_impl/keras/estimator.py
+++ b/tensorflow/python/keras/_impl/keras/estimator.py
@@ -19,9 +19,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 
 from tensorflow.python.client import session
 from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.estimator import export as export_lib
 from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
@@ -31,9 +33,12 @@ from tensorflow.python.keras._impl.keras import models
 from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope
 from tensorflow.python.ops import metrics as metrics_module
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.training import saver as saver_lib
 from tensorflow.python.training import training_util
 
+_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
 
 def _create_ordered_io(keras_model, estimator_io_dict, is_input=True):
   """Create a list of tensors from IO dictionary based on Keras IO order.
@@ -184,7 +189,11 @@ def _create_keras_model_fn(keras_model, custom_objects=None):
         predictions=predictions,
         loss=loss,
         train_op=train_op,
-        eval_metric_ops=eval_metric_ops)
+        eval_metric_ops=eval_metric_ops,
+        export_outputs={
+            _DEFAULT_SERVING_KEY:
+            export_lib.export_output.PredictOutput(predictions)
+        })
 
   return model_fn
 
@@ -222,7 +231,7 @@ def _save_first_checkpoint(keras_model, estimator, custom_objects,
           K._initialize_variables(sess)
           # pylint: enable=protected-access
         saver = saver_lib.Saver()
-        saver.save(sess, estimator.model_dir + '/')
+        saver.save(sess, os.path.join(estimator.model_dir, 'keras_model.ckpt'))
 
 
 def model_to_estimator(keras_model=None,
diff --git a/tensorflow/python/kernel_tests/decode_bmp_op_test.py b/tensorflow/python/kernel_tests/decode_bmp_op_test.py
index 35f8f76991..c67c26b7be 100644
--- a/tensorflow/python/kernel_tests/decode_bmp_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_bmp_op_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import image_ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/python/kernel_tests/distributions/special_math_test.py b/tensorflow/python/kernel_tests/distributions/special_math_test.py
index 9441cdbe39..2d434a39c2 100644
--- a/tensorflow/python/kernel_tests/distributions/special_math_test.py
+++ b/tensorflow/python/kernel_tests/distributions/special_math_test.py
@@ -332,6 +332,32 @@ class LogNdtrGradientTest(NdtrGradientTest):
   _use_log = True
 
 
+class ErfInvTest(test.TestCase):
+
+  def testErfInvValues(self):
+    with self.test_session():
+      if not special:
+        return
+
+      x = np.linspace(0., 1.0, 50).astype(np.float64)
+
+      expected_x = special.erfinv(x)
+      x = special_math.erfinv(x)
+      self.assertAllClose(expected_x, x.eval(), atol=0.)
+
+  def testErfInvIntegerInput(self):
+    with self.test_session():
+
+      with self.assertRaises(TypeError):
+        x = np.array([1, 2, 3]).astype(np.int32)
+        special_math.erfinv(x)
+
+      with self.assertRaises(TypeError):
+        x = np.array([1, 2, 3]).astype(np.int64)
+        special_math.erfinv(x)
+
+
+
 class LogCDFLaplaceTest(test.TestCase):
   # Note that scipy.stats.laplace does not have a stable Log CDF, so we cannot
   # rely on scipy to cross check the extreme values.
diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
index 4883095707..b4fb5aa411 100644
--- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
@@ -33,13 +33,14 @@ from tensorflow.python.platform import test
 class DynamicPartitionTest(test.TestCase):
 
   def testSimpleOneDimensional(self):
-    with self.test_session() as sess:
-      data = constant_op.constant([0, 13, 2, 39, 4, 17])
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant([0, 13, 2, 39, 4, 17], dtype=dtypes.float32)
       indices = constant_op.constant([0, 0, 2, 3, 2, 1])
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
       partition_vals = sess.run(partitions)
 
+    self.assertEqual(4, len(partition_vals))
     self.assertAllEqual([0, 13], partition_vals[0])
     self.assertAllEqual([17], partition_vals[1])
     self.assertAllEqual([2, 4], partition_vals[2])
@@ -52,14 +53,16 @@ class DynamicPartitionTest(test.TestCase):
     self.assertEqual([None], partitions[3].get_shape().as_list())
 
   def testSimpleTwoDimensional(self):
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11],
-                                   [12, 13, 14], [15, 16, 17]])
+                                   [12, 13, 14], [15, 16, 17]],
+                                  dtype=dtypes.float32)
       indices = constant_op.constant([0, 0, 2, 3, 2, 1])
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
       partition_vals = sess.run(partitions)
 
+    self.assertEqual(4, len(partition_vals))
     self.assertAllEqual([[0, 1, 2], [3, 4, 5]], partition_vals[0])
     self.assertAllEqual([[15, 16, 17]], partition_vals[1])
     self.assertAllEqual([[6, 7, 8], [12, 13, 14]], partition_vals[2])
@@ -71,9 +74,84 @@ class DynamicPartitionTest(test.TestCase):
     self.assertEqual([None, 3], partitions[2].get_shape().as_list())
     self.assertEqual([None, 3], partitions[3].get_shape().as_list())
 
+  def testLargeOneDimensional(self):
+    num = 100000
+    data_list = [x for x in range(num)]
+    indices_list = [x % 2 for x in range(num)]
+    part1 = [x for x in range(num) if x % 2 == 0]
+    part2 = [x for x in range(num) if x % 2 == 1]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=2)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(2, len(partition_vals))
+    self.assertAllEqual(part1, partition_vals[0])
+    self.assertAllEqual(part2, partition_vals[1])
+
+  def testLargeTwoDimensional(self):
+    rows = 100000
+    cols = 100
+    data_list = [None] * rows
+    for i in range(rows):
+      data_list[i] = [i for _ in range(cols)]
+    num_partitions = 97
+    indices_list = [(i ** 2) % num_partitions for i in range(rows)]
+    parts = [[] for _ in range(num_partitions)]
+    for i in range(rows):
+      parts[(i ** 2) % num_partitions].append(data_list[i])
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=num_partitions)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(num_partitions, len(partition_vals))
+    for i in range(num_partitions):
+      # reshape because of empty parts
+      parts_np = np.array(parts[i], dtype=np.float).reshape(-1, cols)
+      self.assertAllEqual(parts_np, partition_vals[i])
+
+  def testSimpleComplex(self):
+    data_list = [1 + 2j, 3 + 4j, 5 + 6j, 7 + 8j]
+    indices_list = [1, 0, 1, 0]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.complex64)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=2)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(2, len(partition_vals))
+    self.assertAllEqual([3 + 4j, 7 + 8j], partition_vals[0])
+    self.assertAllEqual([1 + 2j, 5 + 6j], partition_vals[1])
+
+  def testScalarPartitions(self):
+    data_list = [10, 13, 12, 11]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float64)
+      indices = 3
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=4)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(4, len(partition_vals))
+    self.assertAllEqual(np.array([], dtype=np.float64).reshape(-1, 4),
+                        partition_vals[0])
+    self.assertAllEqual(np.array([], dtype=np.float64).reshape(-1, 4),
+                        partition_vals[1])
+    self.assertAllEqual(np.array([], dtype=np.float64).reshape(-1, 4),
+                        partition_vals[2])
+    self.assertAllEqual(np.array([10, 13, 12, 11],
+                                 dtype=np.float64).reshape(-1, 4),
+                        partition_vals[3])
+
   def testHigherRank(self):
     np.random.seed(7)
-    with self.test_session() as sess:
+    with self.test_session(use_gpu=True) as sess:
       for n in 2, 3:
         for shape in (4,), (4, 5), (4, 5, 2):
           partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape)
@@ -95,6 +173,115 @@ class DynamicPartitionTest(test.TestCase):
             self.assertEqual(grads[1], None)  # Partitions has no gradients
             self.assertAllEqual(7 * data, sess.run(grads[0]))
 
+  def testEmptyParts(self):
+    data_list = [1, 2, 3, 4]
+    indices_list = [1, 3, 1, 3]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=4)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(4, len(partition_vals))
+    self.assertAllEqual([], partition_vals[0])
+    self.assertAllEqual([1, 3], partition_vals[1])
+    self.assertAllEqual([], partition_vals[2])
+    self.assertAllEqual([2, 4], partition_vals[3])
+
+  def testEmptyDataTwoDimensional(self):
+    data_list = [[], []]
+    indices_list = [0, 1]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=3)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(3, len(partition_vals))
+    self.assertAllEqual([[]], partition_vals[0])
+    self.assertAllEqual([[]], partition_vals[1])
+    self.assertAllEqual(np.array([], dtype=np.float).reshape(0, 0),
+                        partition_vals[2])
+
+  def testEmptyPartitions(self):
+    data_list = []
+    indices_list = []
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=2)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(2, len(partition_vals))
+    self.assertAllEqual([], partition_vals[0])
+    self.assertAllEqual([], partition_vals[1])
+
+  def testGPUTooManyParts(self):
+    # This test only makes sense on the GPU. There we do not check
+    # for errors. In this case, we should discard all but the first
+    # num_partitions indices.
+    if not test.is_gpu_available():
+      return
+
+    data_list = [1, 2, 3, 4, 5, 6]
+    indices_list = [6, 5, 4, 3, 1, 0]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=2)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(2, len(partition_vals))
+    self.assertAllEqual([6], partition_vals[0])
+    self.assertAllEqual([5], partition_vals[1])
+
+  def testGPUPartsTooLarge(self):
+    # This test only makes sense on the GPU. There we do not check
+    # for errors. In this case, we should discard all the values
+    # larger than num_partitions.
+    if not test.is_gpu_available():
+      return
+
+    data_list = [1, 2, 3, 4, 5, 6]
+    indices_list = [10, 11, 2, 12, 0, 1000]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=5)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(5, len(partition_vals))
+    self.assertAllEqual([5], partition_vals[0])
+    self.assertAllEqual([], partition_vals[1])
+    self.assertAllEqual([3], partition_vals[2])
+    self.assertAllEqual([], partition_vals[3])
+    self.assertAllEqual([], partition_vals[4])
+
+  def testGPUAllIndicesBig(self):
+    # This test only makes sense on the GPU. There we do not check
+    # for errors. In this case, we should discard all the values
+    # and have an empty output.
+    if not test.is_gpu_available():
+      return
+
+    data_list = [1.1, 2.1, 3.1, 4.1, 5.1, 6.1]
+    indices_list = [90, 70, 60, 100, 110, 40]
+    with self.test_session(use_gpu=True) as sess:
+      data = constant_op.constant(data_list, dtype=dtypes.float32)
+      indices = constant_op.constant(indices_list, dtype=dtypes.int32)
+      partitions = data_flow_ops.dynamic_partition(
+          data, indices, num_partitions=40)
+      partition_vals = sess.run(partitions)
+
+    self.assertEqual(40, len(partition_vals))
+    for i in range(40):
+      self.assertAllEqual([], partition_vals[i])
+
   def testErrorIndexOutOfRange(self):
     with self.test_session() as sess:
       data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11],
diff --git a/tensorflow/python/ops/bitwise_ops_test.py b/tensorflow/python/ops/bitwise_ops_test.py
index fa1b219b17..75eb100a90 100644
--- a/tensorflow/python/ops/bitwise_ops_test.py
+++ b/tensorflow/python/ops/bitwise_ops_test.py
@@ -36,7 +36,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
 
   def testBinaryOps(self):
     dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
-                  dtypes.uint8, dtypes.uint16]
+                  dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64]
 
     with self.test_session(use_gpu=True) as sess:
       for dtype in dtype_list:
diff --git a/tensorflow/python/ops/distributions/special_math.py b/tensorflow/python/ops/distributions/special_math.py
index 222a39ad82..bed4cbb2c1 100644
--- a/tensorflow/python/ops/distributions/special_math.py
+++ b/tensorflow/python/ops/distributions/special_math.py
@@ -27,6 +27,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 
 __all__ = [
+    "erfinv",
     "ndtr",
     "ndtri",
     "log_ndtr",
@@ -350,6 +351,29 @@ def _log_ndtr_asymptotic_series(x, series_order):
   return 1. + even_sum - odd_sum
 
 
+def erfinv(x, name="erfinv"):
+  """The inverse function for erf, the error function.
+
+  Args:
+    x: `Tensor` of type `float32`, `float64`.
+    name: Python string. A name for the operation (default="erfinv").
+
+  Returns:
+    x: `Tensor` with `dtype=x.dtype`.
+
+  Raises:
+    TypeError: if `x` is not floating-type.
+  """
+
+  with ops.name_scope(name, values=[x]):
+    x = ops.convert_to_tensor(x, name="x")
+    if x.dtype.as_numpy_dtype not in [np.float32, np.float64]:
+      raise TypeError(
+          "x.dtype=%s is not handled, see docstring for supported types."
+          % x.dtype)
+    return ndtri((x + 1.0) / 2.0) / np.sqrt(2)
+
+
 def _double_factorial(n):
   """The double factorial function for small Python integer `n`."""
   return np.prod(np.arange(n, 1, -2))
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index 55a18d28ca..b74971f654 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -652,7 +652,7 @@ def softmax_cross_entropy(
 
   Args:
     onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
-    logits: [batch_size, num_classes] logits outputs of the network .
+    logits: `[batch_size, num_classes]` logits outputs of the network .
     weights: Optional `Tensor` whose rank is either 0, or rank 1 and is
       broadcastable to the loss which is a `Tensor` of shape `[batch_size]`.
     label_smoothing: If greater than 0 then smooth the labels.
diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py
index 71ee5e365f..85ed4f071c 100644
--- a/tensorflow/python/platform/tf_logging.py
+++ b/tensorflow/python/platform/tf_logging.py
@@ -30,64 +30,92 @@ from logging import ERROR
 from logging import FATAL
 from logging import INFO
 from logging import WARN
+import threading
 
 import six
 
 from tensorflow.python.util.all_util import remove_undocumented
 
 
-# Determine whether we are in an interactive environment
-_interactive = False
-try:
-  # This is only defined in interactive shells
-  if _sys.ps1: _interactive = True
-except AttributeError:
-  # Even now, we may be in an interactive shell with `python -i`.
-  _interactive = _sys.flags.interactive
+# Don't use this directly. Use _get_logger() instead.
+_logger = None
+_logger_lock = threading.Lock()
 
-# Scope the tensorflow logger to not conflict with users' loggers
-_logger = _logging.getLogger('tensorflow')
 
-# If we are in an interactive environment (like jupyter), set loglevel to info
-# and pipe the output to stdout
-if _interactive:
-  _logger.setLevel(INFO)
-  _logging_target = _sys.stdout
-else:
-  _logging_target = _sys.stderr
+def _get_logger():
+  global _logger
 
-# Add the output handler
-_handler = _logging.StreamHandler(_logging_target)
-_handler.setFormatter(_logging.Formatter(_logging.BASIC_FORMAT, None))
-_logger.addHandler(_handler)
+  # Use double-checked locking to avoid taking lock unnecessarily.
+  if _logger:
+    return _logger
+
+  _logger_lock.acquire()
+
+  try:
+    if _logger:
+      return _logger
+
+    # Scope the TensorFlow logger to not conflict with users' loggers.
+    logger = _logging.getLogger('tensorflow')
+
+    # Don't further configure the TensorFlow logger if the root logger is
+    # already configured. This prevents double logging in those cases.
+    if not _logging.getLogger().handlers:
+      # Determine whether we are in an interactive environment
+      _interactive = False
+      try:
+        # This is only defined in interactive shells.
+        if _sys.ps1: _interactive = True
+      except AttributeError:
+        # Even now, we may be in an interactive shell with `python -i`.
+        _interactive = _sys.flags.interactive
+
+      # If we are in an interactive environment (like Jupyter), set loglevel
+      # to INFO and pipe the output to stdout.
+      if _interactive:
+        logger.setLevel(INFO)
+        _logging_target = _sys.stdout
+      else:
+        _logging_target = _sys.stderr
+
+      # Add the output handler.
+      _handler = _logging.StreamHandler(_logging_target)
+      _handler.setFormatter(_logging.Formatter(_logging.BASIC_FORMAT, None))
+      logger.addHandler(_handler)
+
+    _logger = logger
+    return _logger
+
+  finally:
+    _logger_lock.release()
 
 
 def log(level, msg, *args, **kwargs):
-  _logger.log(level, msg, *args, **kwargs)
+  _get_logger().log(level, msg, *args, **kwargs)
 
 
 def debug(msg, *args, **kwargs):
-  _logger.debug(msg, *args, **kwargs)
+  _get_logger().debug(msg, *args, **kwargs)
 
 
 def error(msg, *args, **kwargs):
-  _logger.error(msg, *args, **kwargs)
+  _get_logger().error(msg, *args, **kwargs)
 
 
 def fatal(msg, *args, **kwargs):
-  _logger.fatal(msg, *args, **kwargs)
+  _get_logger().fatal(msg, *args, **kwargs)
 
 
 def info(msg, *args, **kwargs):
-  _logger.info(msg, *args, **kwargs)
+  _get_logger().info(msg, *args, **kwargs)
 
 
 def warn(msg, *args, **kwargs):
-  _logger.warn(msg, *args, **kwargs)
+  _get_logger().warn(msg, *args, **kwargs)
 
 
 def warning(msg, *args, **kwargs):
-  _logger.warning(msg, *args, **kwargs)
+  _get_logger().warning(msg, *args, **kwargs)
 
 
 _level_names = {
@@ -118,7 +146,7 @@ def flush():
 
 # Code below is taken from pyglib/logging
 def vlog(level, msg, *args, **kwargs):
-  _logger.log(level, msg, *args, **kwargs)
+  _get_logger().log(level, msg, *args, **kwargs)
 
 
 def _GetNextLogCountPerToken(token):
@@ -225,12 +253,12 @@ def google2_log_prefix(level, timestamp=None, file_and_line=None):
 
 def get_verbosity():
   """Return how much logging output will be produced."""
-  return _logger.getEffectiveLevel()
+  return _get_logger().getEffectiveLevel()
 
 
 def set_verbosity(v):
   """Sets the threshold for what messages will be logged."""
-  _logger.setLevel(v)
+  _get_logger().setLevel(v)
 
 
 def _get_thread_id():
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index dc19e1bc94..5ddc688a4c 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -57,6 +57,8 @@ if sys.version_info.major == 3:
   REQUIRED_PACKAGES.append('wheel >= 0.26')
 else:
   REQUIRED_PACKAGES.append('wheel')
+  # mock comes with unittest.mock for python3, need to install for python2
+  REQUIRED_PACKAGES.append('mock >= 2.0.0')
 
 # tf-nightly should depend on tb-nightly
 if 'tf_nightly' in project_name:
@@ -65,6 +67,11 @@ if 'tf_nightly' in project_name:
       REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.5.0a0, < 1.6.0a0'
       break
 
+# weakref.finalize and enum were introduced in Python 3.4
+if sys.version_info < (3, 4):
+  REQUIRED_PACKAGES.append('backports.weakref >= 1.0rc1')
+  REQUIRED_PACKAGES.append('enum34 >= 1.1.6')
+
 # pylint: disable=line-too-long
 CONSOLE_SCRIPTS = [
     'freeze_graph = tensorflow.python.tools.freeze_graph:main',
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 5753b0c897..20e1aaaf6e 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -1,40 +1,21 @@
 # TensorFlow external dependencies that can be loaded in WORKSPACE files.
 
 load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
-
-load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
 load("//third_party/mkl:build_defs.bzl", "mkl_repository")
-load(
-    "@io_bazel_rules_closure//closure/private:java_import_external.bzl",
-    "java_import_external",
-)
-load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external")
 load("//third_party/py:python_configure.bzl", "python_configure")
-load(
-    "//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl",
-    "arm_compiler_configure",
-)
-
-def _is_windows(repository_ctx):
-  """Returns true if the host operating system is windows."""
-  return repository_ctx.os.name.lower().find("windows") != -1
-
-def _get_env_var(repository_ctx, name):
-  """Find an environment variable."""
-  if name in repository_ctx.os.environ:
-    return repository_ctx.os.environ[name]
-  else:
-    return None
+load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
+load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compiler_configure")
+load("//third_party:repo.bzl", "tf_http_archive")
+load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external")
+load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external")
 
 # Parse the bazel version string from `native.bazel_version`.
 def _parse_bazel_version(bazel_version):
   # Remove commit from version.
   version = bazel_version.split(" ", 1)[0]
-
   # Split into (release, date) parts and only return the release
   # as a tuple of integers.
   parts = version.split("-", 1)
-
   # Turn "release" into a tuple of strings
   version_tuple = ()
   for number in parts[0].split("."):
@@ -57,50 +38,6 @@ def check_version(bazel_version):
       fail("\nCurrent Bazel version is {}, expected at least {}\n".format(
           native.bazel_version, bazel_version))
 
-# Executes specified command with arguments and calls 'fail' if it exited with
-# non-zero code
-def _execute_and_check_ret_code(repo_ctx, cmd_and_args):
-  result = repo_ctx.execute(cmd_and_args, timeout=10)
-  if result.return_code != 0:
-    fail(("Non-zero return code({1}) when executing '{0}':\n" + "Stdout: {2}\n"
-          + "Stderr: {3}").format(" ".join(cmd_and_args), result.return_code,
-                                  result.stdout, result.stderr))
-
-# Apply a patch_file to the repository root directory
-# Runs 'patch -p1'
-def _apply_patch(repo_ctx, patch_file):
-  # Don't check patch on Windows, because patch is only available under bash.
-  if not _is_windows(repo_ctx) and not repo_ctx.which("patch"):
-    fail("patch command is not found, please install it")
-
-  cmd = [
-      "patch", "-p1", "-d", repo_ctx.path("."), "-i", repo_ctx.path(patch_file)
-  ]
-  if _is_windows(repo_ctx):
-    bazel_sh = _get_env_var(repo_ctx, "BAZEL_SH")
-    if not bazel_sh:
-      fail("BAZEL_SH environment variable is not set")
-    cmd = [bazel_sh, "-l", "-c", " ".join(cmd)]
-  _execute_and_check_ret_code(repo_ctx, cmd)
-
-# Download the repository and apply a patch to its root
-def _patched_http_archive_impl(repo_ctx):
-  repo_ctx.download_and_extract(
-      repo_ctx.attr.urls,
-      sha256=repo_ctx.attr.sha256,
-      stripPrefix=repo_ctx.attr.strip_prefix)
-  _apply_patch(repo_ctx, repo_ctx.attr.patch_file)
-
-patched_http_archive = repository_rule(
-    attrs = {
-        "patch_file": attr.label(),
-        "urls": attr.string_list(default = []),
-        "sha256": attr.string(default = ""),
-        "strip_prefix": attr.string(default = ""),
-    },
-    implementation = _patched_http_archive_impl,
-)
-
 # If TensorFlow is linked as a submodule.
 # path_prefix is no longer used.
 # tf_repo_name is thought to be under consideration.
@@ -134,7 +71,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
     print("path_prefix was specified to tf_workspace but is no longer used " +
           "and will be removed in the future.")
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "mkl_dnn",
       urls = [
           "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
@@ -145,7 +82,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")),
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "com_google_absl",
       urls = [
           "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/cc4bed2d74f7c8717e31f9579214ab52a9c9c610.tar.gz",
@@ -155,7 +92,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
      strip_prefix = "abseil-cpp-cc4bed2d74f7c8717e31f9579214ab52a9c9c610",
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "eigen_archive",
       urls = [
           "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
@@ -166,18 +103,20 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:eigen.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "arm_compiler",
-      build_file = str(Label("//:arm_compiler.BUILD")),
       sha256 = "970285762565c7890c6c087d262b0a18286e7d0384f13a37786d8521773bc969",
       strip_prefix = "tools-0e906ebc527eab1cdbf7adabff5b474da9562e9f/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf",
       urls = [
           "https://mirror.bazel.build/github.com/raspberrypi/tools/archive/0e906ebc527eab1cdbf7adabff5b474da9562e9f.tar.gz",
+          # Please uncomment me, when the next upgrade happens. Then
+          # remove the whitelist entry in third_party/repo.bzl.
           # "https://github.com/raspberrypi/tools/archive/0e906ebc527eab1cdbf7adabff5b474da9562e9f.tar.gz",
       ],
+      build_file = str(Label("//:arm_compiler.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "libxsmm_archive",
       urls = [
           "https://mirror.bazel.build/github.com/hfp/libxsmm/archive/1.8.1.tar.gz",
@@ -188,15 +127,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:libxsmm.BUILD")),
   )
 
-  native.bind(
-      name = "xsmm_avx",
-      actual = "@libxsmm_archive//third_party:xsmm_avx",
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "ortools_archive",
       urls = [
           "https://mirror.bazel.build/github.com/google/or-tools/archive/253f7955c6a1fd805408fba2e42ac6d45b312d15.tar.gz",
+          # Please uncomment me, when the next upgrade happens. Then
+          # remove the whitelist entry in third_party/repo.bzl.
           # "https://github.com/google/or-tools/archive/253f7955c6a1fd805408fba2e42ac6d45b312d15.tar.gz",
       ],
       sha256 = "932075525642b04ac6f1b50589f1df5cd72ec2f448b721fd32234cf183f0e755",
@@ -204,7 +140,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:ortools.BUILD")),
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "com_googlesource_code_re2",
       urls = [
           "https://mirror.bazel.build/github.com/google/re2/archive/26cd968b735e227361c9703683266f01e5df7857.tar.gz",
@@ -215,7 +151,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "re2-26cd968b735e227361c9703683266f01e5df7857",
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "gemmlowp",
       urls = [
           "https://mirror.bazel.build/github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip",
@@ -225,7 +161,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "gemmlowp-010bb3e71a26ca1d0884a167081d092b43563996",
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "farmhash_archive",
       urls = [
           "https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz",
@@ -236,12 +172,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:farmhash.BUILD")),
   )
 
-  native.bind(
-      name = "farmhash",
-      actual = "@farmhash//:farmhash",
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "highwayhash",
       urls = [
           "https://mirror.bazel.build/github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz",
@@ -252,7 +183,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:highwayhash.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "nasm",
       urls = [
           "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2",
@@ -263,7 +194,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:nasm.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "jpeg",
       urls = [
           "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz",
@@ -274,7 +205,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party/jpeg:jpeg.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "png_archive",
       urls = [
           "https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.2.53.tar.gz",
@@ -285,7 +216,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:png.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "sqlite_archive",
       urls = [
           "https://mirror.bazel.build/www.sqlite.org/2017/sqlite-amalgamation-3200000.zip",
@@ -293,10 +224,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       ],
       sha256 = "208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4",
       strip_prefix = "sqlite-amalgamation-3200000",
-      build_file = str(Label("//third_party:sqlite.BUILD"))
+      build_file = str(Label("//third_party:sqlite.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "gif_archive",
       urls = [
           "https://mirror.bazel.build/ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz",
@@ -307,7 +238,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:gif.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "six_archive",
       urls = [
           "https://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
@@ -318,7 +249,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:six.BUILD")),
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "absl_py",
       urls = [
           "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/acec853355ef987eae48a8d87a79351c15dff593.tar.gz",
@@ -328,7 +259,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "abseil-py-acec853355ef987eae48a8d87a79351c15dff593",
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "org_python_pypi_backports_weakref",
       urls = [
           "https://mirror.bazel.build/pypi.python.org/packages/bc/cc/3cdb0a02e7e96f6c70bd971bc8a90b8463fda83e264fa9c5c1c98ceabd81/backports.weakref-1.0rc1.tar.gz",
@@ -339,7 +270,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:backports_weakref.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "com_github_andreif_codegen",
       urls = [
           "https://mirror.bazel.build/github.com/andreif/codegen/archive/1.0.tar.gz",
@@ -361,12 +292,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       },
   )
 
-  native.bind(
-      name = "six",
-      actual = "@six_archive//:six",
-  )
-
-  patched_http_archive(
+  tf_http_archive(
       name = "protobuf_archive",
       urls = [
           "https://mirror.bazel.build/github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz",
@@ -381,20 +307,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       patch_file = str(Label("//third_party/protobuf:add_noinlines.patch")),
   )
 
-  native.bind(
-      name = "protobuf",
-      actual = "@protobuf_archive//:protobuf",
-  )
-
-  native.bind(
-      name = "protobuf_headers",
-      actual = "@protobuf_archive//:protobuf_headers",
-  )
-
   # We need to import the protobuf library under the names com_google_protobuf
   # and com_google_protobuf_cc to enable proto_library support in bazel.
   # Unfortunately there is no way to alias http_archives at the moment.
-  native.http_archive(
+  tf_http_archive(
       name = "com_google_protobuf",
       urls = [
           "https://mirror.bazel.build/github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz",
@@ -404,7 +320,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "protobuf-b04e5cba356212e4e8c66c61bbe0c3a20537c5b9",
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "com_google_protobuf_cc",
       urls = [
           "https://mirror.bazel.build/github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz",
@@ -414,7 +330,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "protobuf-b04e5cba356212e4e8c66c61bbe0c3a20537c5b9",
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "nsync",
       urls = [
           "https://mirror.bazel.build/github.com/google/nsync/archive/8502189abfa44c249c01c2cad64e6ed660a9a668.tar.gz",
@@ -424,7 +340,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "nsync-8502189abfa44c249c01c2cad64e6ed660a9a668",
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "com_google_googletest",
       urls = [
           "https://mirror.bazel.build/github.com/google/googletest/archive/9816b96a6ddc0430671693df90192bbee57108b6.zip",
@@ -434,7 +350,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "googletest-9816b96a6ddc0430671693df90192bbee57108b6",
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "com_github_gflags_gflags",
       urls = [
           "https://mirror.bazel.build/github.com/gflags/gflags/archive/f8a0efe03aa69b3336d8e228b37d4ccb17324b88.tar.gz",
@@ -444,12 +360,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "gflags-f8a0efe03aa69b3336d8e228b37d4ccb17324b88",
   )
 
-  native.bind(
-      name = "python_headers",
-      actual = str(Label("//util/python:python_headers")),
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "pcre",
       sha256 = "ccdf7e788769838f8285b3ee672ed573358202305ee361cfec7a4a4fb005bbc7",
       urls = [
@@ -460,7 +371,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:pcre.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "swig",
       sha256 = "58a475dbbd4a4d7075e5fe86d4e54c9edde39847cdb96a3053d87cb64a23a453",
       urls = [
@@ -472,7 +383,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:swig.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "curl",
       sha256 = "ff3e80c1ca6a068428726cd7dd19037a47cc538ce58ef61c59587191039b2ca6",
       urls = [
@@ -483,26 +394,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:curl.BUILD")),
   )
 
-  # grpc expects //external:protobuf_clib and //external:protobuf_compiler
-  # to point to the protobuf's compiler library.
-  native.bind(
-      name = "protobuf_clib",
-      actual = "@protobuf_archive//:protoc_lib",
-  )
-
-  native.bind(
-      name = "libssl",
-      actual = "@boringssl//:ssl",
-  )
-
-  # gRPC has includes directly from their third_party path for nanopb, so we
-  # must depend on their version of it.
-  native.bind(
-      name = "nanopb",
-      actual = "@grpc//third_party/nanopb:nanopb",
-  )
-
-  native.http_archive(
+  tf_http_archive(
       name = "grpc",
       urls = [
           "https://mirror.bazel.build/github.com/grpc/grpc/archive/f836c7e941beb003289dc6e9a58a6e47f5caa5f0.tar.gz",
@@ -512,26 +404,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "grpc-f836c7e941beb003289dc6e9a58a6e47f5caa5f0",
   )
 
-  # gRPC wants the existence of a cares dependence but its contents are not
-  # actually important since we have set GRPC_ARES=0 in tools/bazel.rc
-  native.bind(
-      name = "cares",
-      actual = "@grpc//third_party/nanopb:nanopb",
-  )
-
-  # protobuf expects //external:grpc_cpp_plugin to point to grpc's
-  # C++ plugin code generator.
-  native.bind(
-      name = "grpc_cpp_plugin",
-      actual = "@grpc//:grpc_cpp_plugin",
-  )
-
-  native.bind(
-      name = "grpc_lib",
-      actual = "@grpc//:grpc++_unsecure",
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "linenoise",
       sha256 = "7f51f45887a3d31b4ce4fa5965210a5e64637ceac12720cfce7954d6a2e812f7",
       urls = [
@@ -544,7 +417,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
 
   # TODO(phawkins): currently, this rule uses an unofficial LLVM mirror.
   # Switch to an official source of snapshots if/when possible.
-  native.new_http_archive(
+  tf_http_archive(
       name = "llvm",
       urls = [
           "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/9ab4c272cb604a7f947865428c4ef2169fee2100.tar.gz",
@@ -555,7 +428,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party/llvm:llvm.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "lmdb",
       urls = [
           "https://mirror.bazel.build/github.com/LMDB/lmdb/archive/LMDB_0.9.19.tar.gz",
@@ -566,7 +439,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:lmdb.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "jsoncpp_git",
       urls = [
           "https://mirror.bazel.build/github.com/open-source-parsers/jsoncpp/archive/11086dd6a7eba04289944367ca82cea71299ed70.tar.gz",
@@ -577,12 +450,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:jsoncpp.BUILD")),
   )
 
-  native.bind(
-      name = "jsoncpp",
-      actual = "@jsoncpp_git//:jsoncpp",
-  )
-
-  native.http_archive(
+  tf_http_archive(
       name = "boringssl",
       urls = [
           "https://mirror.bazel.build/github.com/google/boringssl/archive/a0fb951d2a26a8ee746b52f3ba81ab011a0af778.tar.gz",
@@ -592,7 +460,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "boringssl-a0fb951d2a26a8ee746b52f3ba81ab011a0af778",
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "zlib_archive",
       urls = [
           "https://mirror.bazel.build/zlib.net/zlib-1.2.8.tar.gz",
@@ -603,12 +471,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:zlib.BUILD")),
   )
 
-  native.bind(
-      name = "zlib",
-      actual = "@zlib_archive//:zlib",
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "fft2d",
       urls = [
           "https://mirror.bazel.build/www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz",
@@ -618,7 +481,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party/fft2d:fft2d.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "snappy",
       urls = [
           "https://mirror.bazel.build/github.com/google/snappy/archive/1.1.4.tar.gz",
@@ -629,7 +492,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:snappy.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "nccl_archive",
       urls = [
           "https://mirror.bazel.build/github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz",
@@ -640,14 +503,14 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:nccl.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "aws",
       urls = [
-          "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz",
-          "https://github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz",
+          "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
+          "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
       ],
-      sha256 = "f599b57aec4f03ad696044dd430b2d201864113937353adc346f53ad47991319",
-      strip_prefix = "aws-sdk-cpp-1.0.90",
+      sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c",
+      strip_prefix = "aws-sdk-cpp-1.3.15",
       build_file = str(Label("//third_party:aws.BUILD")),
   )
 
@@ -676,7 +539,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       testonly_ = True,
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "jemalloc",
       urls = [
           "https://mirror.bazel.build/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz",
@@ -722,7 +585,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       licenses = ["notice"],  # Apache 2.0
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "com_google_pprof",
       urls = [
           "https://mirror.bazel.build/github.com/google/pprof/archive/c0fb62ec88c411cc91194465e54db2632845b650.tar.gz",
@@ -733,7 +596,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:pprof.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "cub_archive",
       urls = [
           "https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.4.zip",
@@ -744,12 +607,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:cub.BUILD")),
   )
 
-  native.bind(
-      name = "cub",
-      actual = "@cub_archive//:cub",
-  )
-
-  native.new_http_archive(
+  tf_http_archive(
       name = "cython",
       sha256 = "6dcd30b5ceb887b2b965ee7ceb82ea3acb5f0642fe2206c7636b45acea4798e5",
       urls = [
@@ -758,9 +616,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       ],
       strip_prefix = "cython-3732784c45cfb040a5b0936951d196f83a12ea17",
       build_file = str(Label("//third_party:cython.BUILD")),
+      delete = ["BUILD.bazel"],
   )
 
-  native.http_archive(
+  tf_http_archive(
       name = "bazel_toolchains",
       urls = [
           "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/b49ba3689f46ac50e9277dafd8ff32b26951f82e.tar.gz",
@@ -770,7 +629,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       strip_prefix = "bazel-toolchains-b49ba3689f46ac50e9277dafd8ff32b26951f82e",
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "arm_neon_2_x86_sse",
       sha256 = "c8d90aa4357f8079d427e87a6f4c493da1fa4140aee926c05902d7ec1533d9a5",
       strip_prefix = "ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d",
@@ -781,25 +640,102 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:arm_neon_2_x86_sse.BUILD")),
   )
 
-  native.new_http_archive(
+  tf_http_archive(
       name = "flatbuffers",
-      build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")),
       strip_prefix = "flatbuffers-971a68110e4fc1bace10fcb6deeb189e7e1a34ce",
       sha256 = "874088d2ee0d9f8524191f77209556415f03dd44e156276edf19e5b90ceb5f55",
       urls = [
           "https://mirror.bazel.build/github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz",
           "https://github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz",
       ],
+      build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")),
   )
 
-  native.new_http_archive(
+
+  tf_http_archive(
       name = "tflite_mobilenet",
-      build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
       sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b",
       urls = [
           "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip",
           "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip",
       ],
+      build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
+  )
+
+  ##############################################################################
+  # BIND DEFINITIONS
+  #
+  # Please do not add bind() definitions unless we have no other choice.
+  # If that ends up being the case, please leave a comment explaining
+  # why we can't depend on the canonical build target.
+
+  # gRPC wants a cares dependency but its contents is not actually
+  # important since we have set GRPC_ARES=0 in tools/bazel.rc
+  native.bind(
+      name = "cares",
+      actual = "@grpc//third_party/nanopb:nanopb",
+  )
+
+  # Needed by Protobuf
+  native.bind(
+      name = "grpc_cpp_plugin",
+      actual = "@grpc//:grpc_cpp_plugin",
+  )
+
+  # gRPC has three empty C++ functions which it wants the user to define
+  # at build time. https://github.com/grpc/grpc/issues/13590
+  native.bind(
+      name = "grpc_lib",
+      actual = "@grpc//:grpc++_unsecure",
+  )
+
+  # Needed by gRPC
+  native.bind(
+      name = "libssl",
+      actual = "@boringssl//:ssl",
+  )
+
+  # Needed by gRPC
+  native.bind(
+      name = "nanopb",
+      actual = "@grpc//third_party/nanopb:nanopb",
+  )
+
+  # Needed by gRPC
+  native.bind(
+      name = "protobuf",
+      actual = "@protobuf_archive//:protobuf",
+  )
+
+  # gRPC expects //external:protobuf_clib and //external:protobuf_compiler
+  # to point to Protobuf's compiler library.
+  native.bind(
+      name = "protobuf_clib",
+      actual = "@protobuf_archive//:protoc_lib",
+  )
+
+  # Needed by gRPC
+  native.bind(
+      name = "protobuf_headers",
+      actual = "@protobuf_archive//:protobuf_headers",
+  )
+
+  # Needed by Protobuf
+  native.bind(
+      name = "python_headers",
+      actual = str(Label("//util/python:python_headers")),
+  )
+
+  # Needed by Protobuf
+  native.bind(
+      name = "six",
+      actual = "@six_archive//:six",
+  )
+
+  # Needed by gRPC
+  native.bind(
+      name = "zlib",
+      actual = "@zlib_archive//:zlib",
   )
 
   native.new_http_archive(
diff --git a/third_party/repo.bzl b/third_party/repo.bzl
index eb91316f67..d6e5dfced0 100644
--- a/third_party/repo.bzl
+++ b/third_party/repo.bzl
@@ -96,6 +96,7 @@ tf_http_archive = repository_rule(
         "build_file": attr.label(),
     })
 """Downloads and creates Bazel repos for dependencies.
+
 This is a swappable replacement for both http_archive() and
 new_http_archive() that offers some additional features. It also helps
 ensure best practices are followed.
-- 
GitLab


From 04fa0f73c9e48439121f010e69cfa71de030c57d Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Wed, 6 Dec 2017 19:28:36 -0800
Subject: [PATCH 0727/1225] If a graph is already optimized by layout
 optimizer, no need to and do not optimize the second time.

PiperOrigin-RevId: 178188719
---
 .../core/grappler/optimizers/layout_optimizer.cc   | 14 ++++++++++++++
 .../grappler/optimizers/layout_optimizer_test.cc   | 14 ++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index c7461b93fe..86e2610c00 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -36,6 +36,7 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
+const char kPrefix[] = "LayoutOptimizer";
 const char kDim[] = "LayoutOptimizerDim";
 const char kPermNHWCToNCHW[] = "LayoutOptimizerPermConstNHWCToNCHW";
 const char kPermNCHWToNHWC[] = "LayoutOptimizerPermConstNCHWToNHWC";
@@ -94,6 +95,15 @@ std::set<string> GetOpsFormatAgnostic() {
   return ops_format_agnostic;
 }
 
+bool IsNodeByLayoutOptimizer(const string& node_name) {
+  const string prefix_pattern = kPrefix;
+  string prefix = node_name.substr(0, prefix_pattern.length());
+  if (prefix.compare(prefix_pattern) == 0) {
+    return true;
+  }
+  return false;
+}
+
 bool IsNodeNHWCToNCHW(const string& node_name) {
   const string transpose_node_prefix = kTransposeNHWCToNCHW;
   string prefix = node_name.substr(0, transpose_node_prefix.length());
@@ -1342,6 +1352,10 @@ class DataLayoutOptimizer : GraphProcessor {
     // This is the first pass where we expand the nodes which support NCHW.
     std::set<string> ops_format_supported = GetOpsFormatSupported();
     for (int i = 0; i < node_size_original; i++) {
+      if (IsNodeByLayoutOptimizer(graph_->node(i).name())) {
+        return Status(error::INVALID_ARGUMENT,
+                      "The graph is already optimized by layout optimizer.");
+      }
       if (ops_format_supported.find(graph_->node(i).op()) !=
           ops_format_supported.end()) {
         auto node = graph_->mutable_node(i);
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index af07eaf2d5..0ac177d9f7 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -861,6 +861,20 @@ TEST_F(LayoutOptimizerTest, SliceNonConst) {
   EXPECT_EQ(perm2->input(2), "LayoutOptimizerGatherAxisConst");
 }
 
+TEST_F(LayoutOptimizerTest, DoNotApplyOptimizerTwice) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto scalar =
+      ops::Const(s.WithOpName("LayoutOptimizerAlreadyApplied"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), scalar, scalar);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  EXPECT_TRUE(errors::IsInvalidArgument(status));
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 85818528f3caadb9f2a8e5b350cfe210d6661a7d Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 6 Dec 2017 19:33:50 -0800
Subject: [PATCH 0728/1225] Enable using C API with Tensor.consumers

PiperOrigin-RevId: 178189037
---
 tensorflow/python/client/tf_session.i         | 27 ++++++++++++++++---
 tensorflow/python/client/tf_session_helper.cc | 13 +++++++++
 tensorflow/python/client/tf_session_helper.h  |  5 ++++
 tensorflow/python/framework/ops.py            | 23 +++++++++++++---
 tensorflow/python/framework/ops_test.py       | 26 +++++++++++-------
 5 files changed, 76 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index d471a39b69..f57c5d73bc 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -145,6 +145,25 @@ tensorflow::ImportNumpy();
   }
 }
 
+%ignore TF_OperationOutputConsumers;
+%unignore TF_OperationOutputConsumers_wrapper;
+// See comment for "%noexception TF_SessionRun_wrapper;"
+%noexception TF_OperationGetOutputConsumers_wrapper;
+
+// Build a Python list of unicode strings and return it. (Operation names are
+// always represented as unicode.)
+%typemap(out) std::vector<const char*>
+tensorflow::TF_OperationOutputConsumers_wrapper {
+  $result = PyList_New($1.size());
+  if (!$result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
+  }
+
+  for (size_t i = 0; i < $1.size(); ++i) {
+    PyList_SET_ITEM($result, i, PyUnicode_FromString($1[i]));
+  }
+}
+
 %unignore GetOperationInputs;
 // See comment for "%noexception TF_SessionRun_wrapper;"
 %noexception GetOperationInputs;
@@ -157,10 +176,10 @@ tensorflow::ImportNumpy();
     SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
   }
 
-  // Unwrap the generated SwigValueWrapper<std::vector<TF_Output>> via &
-  std::vector<TF_Output>* tf_outputs = &$1;
-  for (size_t i = 0; i < $1.size(); ++i) {
-    PyList_SET_ITEM($result, i, CreateWrappedTFOutput((*tf_outputs)[i]));
+  // Unwrap the generated SwigValueWrapper<std::vector<TF_Output>>
+  const std::vector<TF_Output>& tf_outputs = $1;
+  for (size_t i = 0; i < tf_outputs.size(); ++i) {
+    PyList_SET_ITEM($result, i, CreateWrappedTFOutput(tf_outputs[i]));
   }
 }
 
diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index e4bf09a0ca..a00fade7ac 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc
@@ -374,6 +374,19 @@ std::vector<TF_Operation*> TF_OperationGetControlInputs_wrapper(
   return control_inputs;
 }
 
+std::vector<const char*> TF_OperationOutputConsumers_wrapper(
+    TF_Output oper_out) {
+  int num_consumers = TF_OperationOutputNumConsumers(oper_out);
+  std::vector<TF_Input> consumers(num_consumers);
+  TF_OperationOutputConsumers(oper_out, consumers.data(), num_consumers);
+
+  std::vector<const char*> consumer_names(num_consumers);
+  for (int i = 0; i < num_consumers; ++i) {
+    consumer_names[i] = TF_OperationName(consumers[i].oper);
+  }
+  return consumer_names;
+}
+
 TF_Function* TF_GraphToFunction_wrapper(
     const TF_Graph* fn_body, const char* fn_name, bool append_hash_to_fn_name,
     const std::vector<TF_Operation*>* opers,
diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h
index bb7171db31..3a8506de4d 100644
--- a/tensorflow/python/client/tf_session_helper.h
+++ b/tensorflow/python/client/tf_session_helper.h
@@ -160,6 +160,11 @@ std::vector<TF_Output> GetOperationInputs(TF_Operation* oper);
 std::vector<TF_Operation*> TF_OperationGetControlInputs_wrapper(
     TF_Operation* oper);
 
+// Retrieves the op names of the consumers of `oper_out`. The returned strings
+// have the lifetime of the underlying TF_Graph.
+std::vector<const char*> TF_OperationOutputConsumers_wrapper(
+    TF_Output oper_out);
+
 // `opers` equaling NULL are converted to `nopers = -1`.
 // `output_names` must be empty or have the same length as `outputs`.
 TF_Function* TF_GraphToFunction_wrapper(
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 79cc793f93..551aff9fa9 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -496,7 +496,17 @@ class Tensor(_TensorLike):
     Returns:
       A list of `Operation`s.
     """
-    return self._consumers
+    if self._op._c_op:  # pylint: disable=protected-access
+      consumer_names = c_api.TF_OperationOutputConsumers_wrapper(
+          self._as_tf_output())
+      # pylint: disable=protected-access
+      return [
+          self.graph._get_operation_by_name_unsafe(name)
+          for name in consumer_names
+      ]
+      # pylint: enable=protected-access
+    else:
+      return self._consumers
 
   def _add_consumer(self, consumer):
     """Add a consumer to this tensor.
@@ -507,6 +517,9 @@ class Tensor(_TensorLike):
     Raises:
       TypeError: if the consumer is not an Operation.
     """
+    # pylint: disable=protected-access
+    assert not self._op._c_op, "Tensor._add_consumer doesn't work with C API"
+    # pylint: enable=protected-access
     if not isinstance(consumer, Operation):
       raise TypeError("Consumer must be an Operation: %s" % consumer)
     self._consumers.append(consumer)
@@ -1631,9 +1644,11 @@ class Operation(object):
     else:
       self._c_op = None
 
-    # Mark that we consume the inputs.
-    for input_tensor in self.inputs:
-      input_tensor._add_consumer(self)  # pylint: disable=protected-access
+    # Mark that we consume the inputs. This is unnecessary and unsupported with
+    # the C API enabled, since the C API tracks the tensor consumers instead.
+    if not self._c_op:
+      for input_tensor in self.inputs:
+        input_tensor._add_consumer(self)  # pylint: disable=protected-access
 
     # Initialize self._outputs.
     if self._c_op:
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index b1ad6ad744..3ac9b10593 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -203,13 +203,13 @@ class OperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(dtypes.float32, float_t.dtype)
     self.assertEqual(op, float_t.op)
     self.assertEqual(0, float_t._value_index)
-    self.assertEqual(0, len(float_t._consumers))
+    self.assertEqual(0, len(float_t.consumers()))
     self.assertEqual("myop", float_t._as_node_def_input())
 
     self.assertEqual(dtypes.string, label_str_t.dtype)
     self.assertEqual(op, label_str_t.op)
     self.assertEqual(1, label_str_t._value_index)
-    self.assertEqual(0, len(label_str_t._consumers))
+    self.assertEqual(0, len(label_str_t.consumers()))
     self.assertEqual("myop:1", label_str_t._as_node_def_input())
 
     self.assertProtoEquals("op:'FloatOutputStringOutput' name:'myop'",
@@ -223,8 +223,8 @@ class OperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(1, len(op2.inputs))
     self.assertIs(float_t, op2.inputs[0])
 
-    self.assertEqual(1, len(float_t._consumers))
-    self.assertEqual(op2, float_t._consumers[0])
+    self.assertEqual(1, len(float_t.consumers()))
+    self.assertEqual(op2, float_t.consumers()[0])
 
     self.assertProtoEquals("op:'FloatOutput' name:'myop1'", op1.node_def)
     self.assertProtoEquals("op:'FloatInput' name:'myop2' input:'myop1'",
@@ -243,14 +243,14 @@ class OperationTest(test_util.TensorFlowTestCase):
     op3 = test_ops.foo2(float1_t, label2_str_t, label2_str_t, name="myop3").d.op
     self.assertEqual(2, len(op3.values()))
 
-    self.assertEqual(1, len(float1_t._consumers))
-    self.assertEqual(op3, float1_t._consumers[0])
+    self.assertEqual(1, len(float1_t.consumers()))
+    self.assertEqual(op3, float1_t.consumers()[0])
 
-    self.assertEqual(0, len(float2_t._consumers))
+    self.assertEqual(0, len(float2_t.consumers()))
 
-    self.assertEqual(2, len(label2_str_t._consumers))
-    self.assertEqual(op3, label2_str_t._consumers[0])
-    self.assertEqual(op3, label2_str_t._consumers[1])
+    self.assertEqual(2, len(label2_str_t.consumers()))
+    self.assertEqual(op3, label2_str_t.consumers()[0])
+    self.assertEqual(op3, label2_str_t.consumers()[1])
 
     self.assertProtoEquals("""
     op:'Foo2' name:'myop3'
@@ -511,16 +511,22 @@ class OperationTest(test_util.TensorFlowTestCase):
 
     z.op._update_input(0, y)  # pylint: disable=protected-access
     self.assertEquals(list(z.op.inputs), [y, y])
+    self.assertEquals(x.consumers(), [])
+    self.assertEquals(y.consumers(), [z.op, z.op])
     with session.Session(graph=g) as sess:
       self.assertEquals(sess.run(z), 4)
 
     z.op._update_input(0, x)  # pylint: disable=protected-access
     self.assertEquals(list(z.op.inputs), [x, y])
+    self.assertEquals(x.consumers(), [z.op])
+    self.assertEquals(y.consumers(), [z.op])
     with session.Session(graph=g) as sess:
       self.assertEquals(sess.run(z), 3)
 
     z.op._update_input(1, y)  # pylint: disable=protected-access
     self.assertEquals(list(z.op.inputs), [x, y])
+    self.assertEquals(x.consumers(), [z.op])
+    self.assertEquals(y.consumers(), [z.op])
     with session.Session(graph=g) as sess:
       self.assertEquals(sess.run(z), 3)
 
-- 
GitLab


From 53f86b3cca9b8a515a3b45627587db2df475f05e Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 6 Dec 2017 19:49:22 -0800
Subject: [PATCH 0729/1225] [tf.data] Generate a warning when
 `Iterator.get_next()` is called repeatedly.

This is an antipattern that adds nodes to the graph, causes poor performance,
and can cause resource exhaustion. The warning suggests an alternative course
of action. It has cropped up in several places (#15162 being the most recent
issue), and this warning should help users help themselves.

PiperOrigin-RevId: 178190011
---
 tensorflow/python/data/ops/iterator_ops.py    | 27 +++++++++++++++++++
 .../python/kernel_tests/iterator_ops_test.py  | 14 ++++++++++
 2 files changed, 41 insertions(+)

diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py
index 663bed07b2..0cbdb3ab19 100644
--- a/tensorflow/python/data/ops/iterator_ops.py
+++ b/tensorflow/python/data/ops/iterator_ops.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import warnings
+
 from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import sparse
 from tensorflow.python.framework import dtypes
@@ -25,6 +27,26 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import gen_dataset_ops
 
 
+# NOTE(mrry): It is legitimate to call `Iterator.get_next()` multiple
+# times, e.g. when you are distributing different elements to multiple
+# devices in a single step. However, a common pitfall arises when
+# users call `Iterator.get_next()` in each iteration of their training
+# loop. `Iterator.get_next()` adds ops to the graph, and executing
+# each op allocates resources (including threads); as a consequence,
+# invoking it in every iteration of a training loop causes slowdown
+# and eventual resource exhaustion. To guard against this outcome, we
+# log a warning when the number of uses crosses a threshold of suspicion.
+GET_NEXT_CALL_WARNING_THRESHOLD = 32
+
+GET_NEXT_CALL_WARNING_MESSAGE = (
+    "An unusually high number of `Iterator.get_next()` calls was detected. "
+    "This often indicates that `Iterator.get_next()` is being called inside "
+    "a training loop, which will cause gradual slowdown and eventual resource "
+    "exhaustion. If this is the case, restructure your code to call "
+    "`next_element = iterator.get_next() once outside the loop, and use "
+    "`next_element` inside the loop.")
+
+
 class Iterator(object):
   """Represents the state of iterating through a `Dataset`."""
 
@@ -56,6 +78,7 @@ class Iterator(object):
     self._output_shapes = output_shapes
     self._string_handle = gen_dataset_ops.iterator_to_string_handle(
         self._iterator_resource)
+    self._get_next_call_count = 0
 
   @staticmethod
   def from_structure(output_types,
@@ -282,6 +305,10 @@ class Iterator(object):
     Returns:
       A nested structure of `tf.Tensor` objects.
     """
+    self._get_next_call_count += 1
+    if self._get_next_call_count > GET_NEXT_CALL_WARNING_THRESHOLD:
+      warnings.warn(GET_NEXT_CALL_WARNING_MESSAGE)
+
     return sparse.deserialize_sparse_tensors(
         nest.pack_sequence_as(self._output_types,
                               gen_dataset_ops.iterator_get_next(
diff --git a/tensorflow/python/kernel_tests/iterator_ops_test.py b/tensorflow/python/kernel_tests/iterator_ops_test.py
index 513c36d64f..23c6d7385f 100644
--- a/tensorflow/python/kernel_tests/iterator_ops_test.py
+++ b/tensorflow/python/kernel_tests/iterator_ops_test.py
@@ -18,6 +18,8 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+import warnings
+
 import numpy as np
 
 from tensorflow.core.protobuf import config_pb2
@@ -633,6 +635,18 @@ class IteratorTest(test.TestCase):
         with self.assertRaises(errors.InvalidArgumentError):
           sess.run(restore_op)
 
+  def testRepeatedGetNextWarning(self):
+    iterator = dataset_ops.Dataset.range(10).make_one_shot_iterator()
+    warnings.simplefilter("always")
+    with warnings.catch_warnings(record=True) as w:
+      for _ in range(100):
+        iterator.get_next()
+    self.assertEqual(100 - iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD,
+                     len(w))
+    for warning in w:
+      self.assertTrue(
+          iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE in str(warning.message))
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 846a73f9f336e54a02c12388ac76a0aa8700543a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 19:51:49 -0800
Subject: [PATCH 0730/1225] Adds a int32 to int32 HashTable mapping.

PiperOrigin-RevId: 178190131
---
 tensorflow/core/kernels/lookup_table_op.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/kernels/lookup_table_op.cc b/tensorflow/core/kernels/lookup_table_op.cc
index e774c771b8..418d9dcc61 100644
--- a/tensorflow/core/kernels/lookup_table_op.cc
+++ b/tensorflow/core/kernels/lookup_table_op.cc
@@ -823,6 +823,7 @@ REGISTER_KERNEL(int64, int64);
 REGISTER_KERNEL(int64, float);
 REGISTER_KERNEL(string, string);
 REGISTER_KERNEL(string, bool);
+REGISTER_KERNEL(int32, int32);
 
 #undef REGISTER_KERNEL
 
-- 
GitLab


From f75481874fb7314c907b1770ea04c851b9ec07d4 Mon Sep 17 00:00:00 2001
From: Yuanzhong Xu <yuanzx@google.com>
Date: Wed, 6 Dec 2017 20:08:30 -0800
Subject: [PATCH 0731/1225] Tuple literal conversions for BF16 and F32

PiperOrigin-RevId: 178191335
---
 .../xla/tests/client_library_test_base.cc     | 25 +++++++-----
 .../compiler/xla/tests/literal_test_util.cc   | 38 ++++++++++++++-----
 .../compiler/xla/tests/literal_test_util.h    |  8 +++-
 3 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc
index bbd6a87ca3..50bf185936 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.cc
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc
@@ -267,12 +267,17 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
   const Literal* expected_ptr = &expected;
   std::unique_ptr<Literal> converted_expected;
   Shape layout_shape;
-  if (expected.shape().element_type() == F32 && use_bfloat16_) {
+  if (use_bfloat16_) {
     converted_expected = LiteralTestUtil::ConvertF32ToBF16(expected);
     expected_ptr = converted_expected.get();
     if (shape_with_layout != nullptr) {
       layout_shape = *shape_with_layout;
-      layout_shape.set_element_type(BF16);
+      ShapeUtil::ForEachMutableSubshape(
+          &layout_shape, [&](Shape* subshape, const ShapeIndex& /*index*/) {
+            if (subshape->element_type() == F32) {
+              subshape->set_element_type(BF16);
+            }
+          });
       shape_with_layout = &layout_shape;
     }
   }
@@ -305,13 +310,17 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
   const Literal* expected_ptr = &expected;
   std::unique_ptr<Literal> converted_expected;
   Shape layout_shape;
-  if (expected.shape().element_type() == F32 && use_bfloat16_) {
+  if (use_bfloat16_) {
     converted_expected = LiteralTestUtil::ConvertF32ToBF16(expected);
     expected_ptr = converted_expected.get();
-    layout_shape.set_element_type(BF16);
     if (shape_with_layout != nullptr) {
       layout_shape = *shape_with_layout;
-      layout_shape.set_element_type(BF16);
+      ShapeUtil::ForEachMutableSubshape(
+          &layout_shape, [&](Shape* subshape, const ShapeIndex& /*index*/) {
+            if (subshape->element_type() == F32) {
+              subshape->set_element_type(BF16);
+            }
+          });
       shape_with_layout = &layout_shape;
     }
   }
@@ -501,7 +510,7 @@ ClientLibraryTestBase::CreateParameterAndTransferLiteral(
     ComputationBuilder* builder, ComputationDataHandle* data_handle) {
   const Literal* param_literal = &literal;
   std::unique_ptr<Literal> converted_literal;
-  if (use_bfloat16_ && literal.shape().element_type() == F32) {
+  if (use_bfloat16_) {
     converted_literal = LiteralTestUtil::ConvertF32ToBF16(literal);
     param_literal = converted_literal.get();
   }
@@ -515,9 +524,7 @@ ClientLibraryTestBase::CreateParameterAndTransferLiteral(
 ComputationDataHandle ClientLibraryTestBase::CreateConstantFromLiteral(
     const Literal& literal, ComputationBuilder* builder) {
   return builder->ConstantLiteral(
-      use_bfloat16_ && literal.shape().element_type() == F32
-          ? *LiteralTestUtil::ConvertF32ToBF16(literal)
-          : literal);
+      use_bfloat16_ ? *LiteralTestUtil::ConvertF32ToBF16(literal) : literal);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc
index 6aa27e5470..e1a948c096 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util.cc
+++ b/tensorflow/compiler/xla/tests/literal_test_util.cc
@@ -101,32 +101,52 @@ namespace xla {
 }
 
 /* static */ std::unique_ptr<Literal> LiteralTestUtil::ConvertBF16ToF32(
-    const Literal& bf16_literal) {
-  CHECK_EQ(bf16_literal.shape().element_type(), BF16);
-  Shape converted_shape = bf16_literal.shape();
+    const Literal& literal) {
+  if (ShapeUtil::IsTuple(literal.shape())) {
+    std::vector<std::unique_ptr<Literal>> converted_elements;
+    for (const auto& element : literal.tuple_literals()) {
+      converted_elements.push_back(ConvertBF16ToF32(element));
+    }
+    return Literal::MakeTupleOwned(std::move(converted_elements));
+  }
+
+  if (literal.shape().element_type() != BF16) {
+    return MakeUnique<Literal>(literal);
+  }
+  Shape converted_shape = literal.shape();
   converted_shape.set_element_type(F32);
   auto converted = Literal::CreateFromShape(converted_shape);
   if (!ShapeUtil::HasZeroElements(converted_shape)) {
     std::vector<int64> index(converted_shape.dimensions_size(), 0);
     do {
-      converted->Set<float>(
-          index, static_cast<float>(bf16_literal.Get<bfloat16>(index)));
+      converted->Set<float>(index,
+                            static_cast<float>(literal.Get<bfloat16>(index)));
     } while (IndexUtil::BumpIndices(converted_shape, &index));
   }
   return converted;
 }
 
 /* static */ std::unique_ptr<Literal> LiteralTestUtil::ConvertF32ToBF16(
-    const Literal& f32_literal) {
-  CHECK_EQ(f32_literal.shape().element_type(), F32);
-  Shape converted_shape = f32_literal.shape();
+    const Literal& literal) {
+  if (ShapeUtil::IsTuple(literal.shape())) {
+    std::vector<std::unique_ptr<Literal>> converted_elements;
+    for (const auto& element : literal.tuple_literals()) {
+      converted_elements.push_back(ConvertF32ToBF16(element));
+    }
+    return Literal::MakeTupleOwned(std::move(converted_elements));
+  }
+
+  if (literal.shape().element_type() != F32) {
+    return MakeUnique<Literal>(literal);
+  }
+  Shape converted_shape = literal.shape();
   converted_shape.set_element_type(BF16);
   auto converted = Literal::CreateFromShape(converted_shape);
   if (!ShapeUtil::HasZeroElements(converted_shape)) {
     std::vector<int64> index(converted_shape.dimensions_size(), 0);
     do {
       converted->Set<bfloat16>(
-          index, static_cast<bfloat16>(f32_literal.Get<float>(index)));
+          index, static_cast<bfloat16>(literal.Get<float>(index)));
     } while (IndexUtil::BumpIndices(converted_shape, &index));
   }
   return converted;
diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h
index 6e4add2690..bf8c92f16d 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util.h
+++ b/tensorflow/compiler/xla/tests/literal_test_util.h
@@ -59,10 +59,14 @@ class LiteralTestUtil {
   static void AssertEqualShapesAndLayouts(const Shape& expected,
                                           const Shape& actual);
 
-  // Converts a bfloat16 literal to a float literal.
+  // If the given literal's data type is bfloat16, converts it to a float
+  // literal; otherwise, returns a copy of it. If the literal is a tuple,
+  // recursively converts its elements.
   static std::unique_ptr<Literal> ConvertBF16ToF32(const Literal& bf16_literal);
 
-  // Converts a float literal to a bfloat16 literal.
+  // If the given literal's data type is float, converts it to a bfloat16
+  // literal; otherwise, returns a copy of it. If the literal is a tuple,
+  // recursively converts its elements.
   static std::unique_ptr<Literal> ConvertF32ToBF16(const Literal& f32_literal);
 
   // Asserts that the expected and actual literals are (bitwise) equal for all
-- 
GitLab


From 64c4e8f6c1f2676fbd79b9a88a634424176d7101 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 20:13:31 -0800
Subject: [PATCH 0732/1225] Add BlockHostUntilDoneWithStatus, which returns
 Status rather than bool.

Also fixed a deadlock in Stream::BlockHostUntilDone.  The problem with the
original code was that it grabbed mu_ before looping over substreams, and would
call CheckError with mu_ still held.  But CheckError will attempt to lock mu_ in
the failure case, which would deadlock.

PiperOrigin-RevId: 178191634
---
 tensorflow/stream_executor/stream.cc          | 23 ++++++++++++-------
 tensorflow/stream_executor/stream.h           | 23 +++++++++++++------
 .../stream_executor_internal.cc               | 15 ++++++++++++
 .../stream_executor_internal.h                |  1 +
 .../stream_executor/stream_executor_pimpl.cc  | 15 +++++++-----
 .../stream_executor/stream_executor_pimpl.h   |  2 +-
 6 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 22fd6bce78..de65038d17 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -5055,22 +5055,24 @@ Stream &Stream::ThenEnqueueOnBackgroundThread(
   });
 }
 
-bool Stream::BlockHostUntilDone() {
+port::Status Stream::BlockHostUntilDoneWithStatus() {
   VLOG_CALL();
 
   if (!ok()) {
-    LOG(INFO)
-        << "stream " << this
-        << " did not block host until done; was already in an error state";
-    return false;
+    port::Status status = port::Status(
+        port::error::INTERNAL,
+        "stream did not block host until done; was already in an error state");
+    LOG(INFO) << status << " " << this;
+    return status;
   }
 
+  port::Status first_error;
   {
     // Wait until all active sub-streams have done their tasks.
     mutex_lock lock{mu_};
     for (auto &stream : sub_streams_) {
       if (!stream.second) {
-        CheckError(stream.first->BlockHostUntilDone());
+        first_error.Update(stream.first->BlockHostUntilDoneWithStatus());
         // Set this sub-stream as available.
         stream.second = true;
       }
@@ -5079,8 +5081,13 @@ bool Stream::BlockHostUntilDone() {
 
   temporary_memory_manager_.DeallocateFinalizedTemporaries();
 
-  CheckError(parent_->BlockHostUntilDone(this));
-  return ok();
+  first_error.Update(parent_->BlockHostUntilDoneWithStatus(this));
+  CheckError(first_error.ok());
+  return first_error;
+}
+
+bool Stream::BlockHostUntilDone() {
+  return BlockHostUntilDoneWithStatus().ok();
 }
 
 }  // namespace gputools
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index 023cffb965..15a5a2b6cb 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -113,7 +113,7 @@ class Stream {
 
   // Initialize the stream. This must be performed before entraining any other
   // operations.
-  Stream &Init();
+  Stream &Init() LOCKS_EXCLUDED(mu_);
 
   // Initializes timer t via the StreamExecutor.
   Stream &InitTimer(Timer *t);
@@ -124,11 +124,11 @@ class Stream {
   // Get or create a sub-stream from this stream. If there is any sub-stream in
   // the pool that can be reused then just return this sub-stream.  Otherwise
   // create a new sub-stream.
-  Stream *GetOrCreateSubStream();
+  Stream *GetOrCreateSubStream() LOCKS_EXCLUDED(mu_);
 
   // Return the sub-stream back to the host stream so that it can be reused
   // later.
-  void ReturnSubStream(Stream *sub_stream);
+  void ReturnSubStream(Stream *sub_stream) LOCKS_EXCLUDED(mu_);
 
   // Allocate temporary memories. The stream will deallocate them when blocked
   // or destroyed.
@@ -1903,8 +1903,17 @@ class Stream {
   // entrained on the stream (enqueued to this point in program
   // execution) to complete.
   //
-  // Returns true if the stream is ok().
-  bool BlockHostUntilDone();
+  // Returns an OK status if the blocking was successful and the stream is ok().
+  // Otherwise returns an error describing why the blocking failed.
+  //
+  // TODO(b/70298427): Rename to BlockHostUntilDone, once all callers have been
+  // converted from the bool form.
+  port::Status BlockHostUntilDoneWithStatus() LOCKS_EXCLUDED(mu_);
+
+  // DEPRECATED(b/70298427) - new code should use BlockHostUntilDoneWithStatus()
+  //
+  // Equivalent to BlockHostUntilDoneWithStatus().ok().
+  bool BlockHostUntilDone() LOCKS_EXCLUDED(mu_);
 
   // Warning! This method interacts with internal threads in
   // sometimes-unpredictable ways and is intended for GPU-Executor-internal
@@ -1960,14 +1969,14 @@ class Stream {
   friend struct ThenBlasImpl;  // for implementing ThenBlasXXX.
   friend class ocl::CLBlas;    // for parent_.
 
-  bool InErrorState() const {
+  bool InErrorState() const LOCKS_EXCLUDED(mu_) {
     tf_shared_lock lock{mu_};
     return !ok_;
   }
 
   // Sets the error state if operation_retcode is false.
   // This is a useful shorthand for many stream routines.
-  void CheckError(bool operation_retcode) {
+  void CheckError(bool operation_retcode) LOCKS_EXCLUDED(mu_) {
     if (operation_retcode) {
       return;
     }
diff --git a/tensorflow/stream_executor/stream_executor_internal.cc b/tensorflow/stream_executor/stream_executor_internal.cc
index 95b285b992..25b579fc16 100644
--- a/tensorflow/stream_executor/stream_executor_internal.cc
+++ b/tensorflow/stream_executor/stream_executor_internal.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/stream_executor/stream_executor_internal.h"
 
+#include "tensorflow/stream_executor/lib/error.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
 #include "tensorflow/stream_executor/lib/stringprintf.h"
 
@@ -40,6 +41,20 @@ StreamExecutorFactory* MakeOpenCLExecutorImplementation() {
 
 StreamExecutorFactory MakeHostExecutorImplementation;
 
+// TODO(b/70298427) There are two similar methods:
+//   bool BlockHostUntilDone(Stream*);
+//   Status BlockHostUntilDoneWithStatus(Stream*);
+//
+// The intention is to replace all implementations of the bool version with the
+// Status version.  In the meantime, just implement one in terms of the other.
+port::Status StreamExecutorInterface::BlockHostUntilDoneWithStatus(
+    Stream* stream) {
+  if (!BlockHostUntilDone(stream)) {
+    return port::Status(port::error::INTERNAL,
+                        "Failed to block host until done.");
+  }
+  return port::Status::OK();
+}
 
 }  // namespace internal
 }  // namespace gputools
diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h
index 14445a7657..d2426f46e2 100644
--- a/tensorflow/stream_executor/stream_executor_internal.h
+++ b/tensorflow/stream_executor/stream_executor_internal.h
@@ -220,6 +220,7 @@ class StreamExecutorInterface {
   virtual bool StartTimer(Stream *stream, Timer *timer) = 0;
   virtual bool StopTimer(Stream *stream, Timer *timer) = 0;
   virtual bool BlockHostUntilDone(Stream *stream) = 0;
+  virtual port::Status BlockHostUntilDoneWithStatus(Stream *stream);
   virtual int PlatformDeviceCount() = 0;
   virtual port::Status EnablePeerAccessTo(StreamExecutorInterface *other) = 0;
   virtual bool CanEnablePeerAccessTo(StreamExecutorInterface *other) = 0;
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index 76afb85068..5630255b5d 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -432,12 +432,15 @@ bool StreamExecutor::Launch(Stream *stream, const ThreadDim &thread_dims,
   return implementation_->Launch(stream, thread_dims, block_dims, kernel, args);
 }
 
-bool StreamExecutor::BlockHostUntilDone(Stream *stream) {
-  bool result;
-  SCOPED_TRACE(TraceListener::BlockHostUntilDone, &result, stream);
-
-  result = implementation_->BlockHostUntilDone(stream);
-  return result;
+port::Status StreamExecutor::BlockHostUntilDoneWithStatus(Stream *stream) {
+  // TODO(toddw): Change TraceListener::BlockHostUntilDone to record Status
+  // rather than bool.
+  bool trace_result;
+  SCOPED_TRACE(TraceListener::BlockHostUntilDone, &trace_result, stream);
+
+  port::Status status = implementation_->BlockHostUntilDoneWithStatus(stream);
+  trace_result = status.ok();
+  return status;
 }
 
 void *StreamExecutor::Allocate(uint64 size) {
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h
index 66c50d47e9..d2965dbfd7 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h
@@ -481,7 +481,7 @@ class StreamExecutor {
   // Causes the host code to synchronously wait for operations entrained onto
   // stream to complete. Effectively a join on the asynchronous device
   // operations enqueued on the stream before this program point.
-  bool BlockHostUntilDone(Stream *stream);
+  port::Status BlockHostUntilDoneWithStatus(Stream *stream);
 
   // Synchronously allocates size bytes on the underlying platform and returns
   // an opaque void* representing that allocation. In the case of failure,
-- 
GitLab


From fbcbabfdfd852ce6ff296cd18d1be89cfd153823 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 20:24:43 -0800
Subject: [PATCH 0733/1225] Fixes a bug where states with NaN log-densities
 would always be accepted. Now they're always rejected, which should almost
 always be the desired behavior.

PiperOrigin-RevId: 178192214
---
 .../bayesflow/python/kernel_tests/hmc_test.py | 30 +++++++++++++++++++
 .../contrib/bayesflow/python/ops/hmc_impl.py  | 20 ++++++++-----
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
index b1f108e5f0..1ab819d797 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
@@ -345,5 +345,35 @@ class HMCTest(test.TestCase):
   def testAIS12(self):
     self._ais_gets_correct_log_normalizer_wrapper([1, 2])
 
+  def testNanRejection(self):
+    """Tests that an update that yields NaN potentials gets rejected.
+
+    We run HMC with a target distribution that returns NaN
+    log-likelihoods if any element of x < 0, and unit-scale
+    exponential log-likelihoods otherwise. The exponential potential
+    pushes x towards 0, ensuring that any reasonably large update will
+    push us over the edge into NaN territory.
+    """
+    def _unbounded_exponential_log_prob(x):
+      """An exponential distribution with log-likelihood NaN for x < 0."""
+      per_element_potentials = array_ops.where(x < 0,
+                                               np.nan * array_ops.ones_like(x),
+                                               -x)
+      return math_ops.reduce_sum(per_element_potentials)
+
+    with self.test_session() as sess:
+      initial_x = math_ops.linspace(0.01, 5, 10)
+      updated_x, acceptance_probs, _, _ = hmc.kernel(
+          2., 5, initial_x, _unbounded_exponential_log_prob, [0])
+      initial_x_val, updated_x_val, acceptance_probs_val = sess.run(
+          [initial_x, updated_x, acceptance_probs])
+
+      logging.vlog(1, 'initial_x = {}'.format(initial_x_val))
+      logging.vlog(1, 'updated_x = {}'.format(updated_x_val))
+      logging.vlog(1, 'acceptance_probs = {}'.format(acceptance_probs_val))
+
+      self.assertAllEqual(initial_x_val, updated_x_val)
+      self.assertEqual(acceptance_probs_val, 0.)
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
index 333dce9295..da788be3db 100644
--- a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
+++ b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
@@ -468,15 +468,17 @@ def kernel(step_size, n_leapfrog_steps, x, target_log_prob_fn, event_dims=(),
 
     kinetic_1 = 0.5 * math_ops.reduce_sum(math_ops.square(new_m), event_dims)
 
-    # TODO(mhoffman): It seems like there may be an opportunity for nans here.
-    # I'm delaying addressing this because we're going to refactor this part
-    # to use the more general Metropolis abstraction anyway.
-    acceptance_probs = math_ops.exp(math_ops.minimum(0., log_potential_0 -
-                                                     log_potential_1 +
-                                                     kinetic_0 - kinetic_1))
+    energy_change = log_potential_1 - log_potential_0 + kinetic_1 - kinetic_0
+    # Treat NaN as infinite energy (and therefore guaranteed rejection).
+    energy_change = array_ops.where(
+        math_ops.is_nan(energy_change),
+        array_ops.fill(array_ops.shape(energy_change),
+                       energy_change.dtype.as_numpy_dtype(np.inf)),
+        energy_change)
+    acceptance_probs = math_ops.exp(math_ops.minimum(0., -energy_change))
     accepted = math_ops.cast(
         random_ops.random_uniform(array_ops.shape(acceptance_probs)) <
-        acceptance_probs, np.float32)
+        acceptance_probs, log_potential_0.dtype)
     new_log_prob = (-log_potential_0 * (1. - accepted) -
                     log_potential_1 * accepted)
 
@@ -485,7 +487,9 @@ def kernel(step_size, n_leapfrog_steps, x, target_log_prob_fn, event_dims=(),
     reduced_shape = array_ops.shape(math_ops.reduce_sum(x, event_dims,
                                                         keep_dims=True))
     accepted = array_ops.reshape(accepted, reduced_shape)
+    accepted = math_ops.cast(accepted, x.dtype)
     new_x = x * (1. - accepted) + new_x * accepted
+    accepted = math_ops.cast(accepted, accepted.dtype)
     new_grad = -grad_0 * (1. - accepted) - grad_1 * accepted
 
   return new_x, acceptance_probs, new_log_prob, new_grad
@@ -525,6 +529,7 @@ def leapfrog_integrator(step_size, n_steps, initial_position, initial_momentum,
       Has shape matching `initial_position`.
 
   Example: Simple quadratic potential.
+
   ```python
   def potential_and_grad(position):
     return tf.reduce_sum(0.5 * tf.square(position)), position
@@ -600,6 +605,7 @@ def leapfrog_step(step_size, position, momentum, potential_and_grad, grad,
       Has shape matching `position`.
 
   Example: Simple quadratic potential.
+
   ```python
   def potential_and_grad(position):
     # Simple quadratic potential
-- 
GitLab


From 072bc6a28a5f21c46c80c145d2bae3da475b32e7 Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Wed, 6 Dec 2017 20:31:49 -0800
Subject: [PATCH 0734/1225] Reset the session before save op is run.

PiperOrigin-RevId: 178192653
---
 tensorflow/core/grappler/clusters/single_machine.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc
index 1a6fad4182..ba27458104 100644
--- a/tensorflow/core/grappler/clusters/single_machine.cc
+++ b/tensorflow/core/grappler/clusters/single_machine.cc
@@ -92,6 +92,8 @@ Status SingleMachine::Provision() {
     VLOG(1) << "Adding GPU device " << device_name;
     devices_[device_name] = GetLocalGPUInfo(i);
   }
+  mutex_lock l(this->last_graph_mu_);
+  last_graph_ = nullptr;
   return Status::OK();
 }
 
@@ -230,7 +232,7 @@ Status SingleMachine::RunWithTimeout(
 }
 
 Status SingleMachine::CloseSession(bool use_timeout) {
-  if (!session_) {
+  if (!session_ || !thread_pool_) {
     return Status::OK();
   }
 
-- 
GitLab


From 93dc52c707032ee4de6885edba064ee4e7308d40 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Wed, 6 Dec 2017 20:32:37 -0800
Subject: [PATCH 0735/1225] tfdbg: Add protocol for sending tracebacks and
 source code to debug server

PiperOrigin-RevId: 178192708
---
 tensorflow/contrib/cmake/tf_tests.cmake       |   1 +
 tensorflow/core/debug/BUILD                   |   1 +
 tensorflow/core/debug/debug_service.proto     |  40 ++++
 tensorflow/core/protobuf/debug.proto          |  22 ++
 tensorflow/python/debug/BUILD                 |  37 ++++
 .../debug/lib/debug_service_pb2_grpc.py       |  35 +++
 .../python/debug/lib/grpc_debug_server.py     |  33 +++
 .../debug/lib/grpc_debug_test_server.py       | 100 +++++++++
 tensorflow/python/debug/lib/source_remote.py  | 205 ++++++++++++++++++
 .../python/debug/lib/source_remote_test.py    | 171 +++++++++++++++
 tensorflow/python/profiler/model_analyzer.py  |   8 +-
 tensorflow/python/profiler/tfprof_logger.py   |   6 +-
 12 files changed, 652 insertions(+), 7 deletions(-)
 create mode 100644 tensorflow/python/debug/lib/source_remote.py
 create mode 100644 tensorflow/python/debug/lib/source_remote_test.py

diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 2d58a48a49..1f4703023a 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -217,6 +217,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       # TFDBG grpc:// mode is not yet available on Windows.
       "${tensorflow_source_dir}/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py"
       "${tensorflow_source_dir}/tensorflow/python/debug/lib/session_debug_grpc_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/debug/lib/source_remote_test.py"
       # stl on windows handles overflows different
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/as_string_op_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/string_to_number_op_test.py"
diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD
index 108dc59919..a32badef6d 100644
--- a/tensorflow/core/debug/BUILD
+++ b/tensorflow/core/debug/BUILD
@@ -56,6 +56,7 @@ tf_proto_library(
     cc_grpc_version = 1,
     protodeps = [
         ":debugger_event_metadata_proto",
+        "//tensorflow/core/profiler:protos_all",
     ] + tf_additional_all_protos(),
     visibility = ["//tensorflow:__subpackages__"],
 )
diff --git a/tensorflow/core/debug/debug_service.proto b/tensorflow/core/debug/debug_service.proto
index 547c0576f0..4bef74dfc5 100644
--- a/tensorflow/core/debug/debug_service.proto
+++ b/tensorflow/core/debug/debug_service.proto
@@ -18,6 +18,8 @@ syntax = "proto3";
 package tensorflow;
 
 import "tensorflow/core/framework/tensor.proto";
+import "tensorflow/core/profiler/tfprof_log.proto";
+import "tensorflow/core/protobuf/debug.proto";
 import "tensorflow/core/util/event.proto";
 
 // Reply message from EventListener to the client, i.e., to the source of the
@@ -46,6 +48,38 @@ message EventReply {
   // during debugging.
 }
 
+// Data on the traceback of a debugged call, e.g., a Session.run() call, or the
+// execution of an eager operation.
+message CallTraceback {
+  enum CallType {
+    UNSPECIFIED = 0;
+    GRAPH_EXECUTION = 1;
+    EAGER_EXECUTION = 2;
+  }
+
+  CallType call_type = 1;
+
+  // A key for the call. For example, for graph execution, this is a key
+  // consisting of the names of the fed and fetched tensors.
+  string call_key = 2;
+
+  // Traceback stack for the origin of the call event.
+  // For graph execution, this is the stack of the Session.run() call.
+  // For eager execution, this is the stack of the Python line that invokes
+  // the execution of the eager op.
+  tfprof.CodeDef origin_stack = 3;
+
+  // Keeps track of the mapping from integer IDs in `origin_stack` to actual
+  // string values (e.g., file paths, function names).
+  map<int64, string> origin_id_to_string = 4;
+
+  // Traceback for the graph (if any) involved in the call.
+  tfprof.OpLogProto graph_traceback = 5;
+
+  // Version of the graph in `graph_traceback` (if any).
+  int64 graph_version = 6;
+}
+
 // EventListener: Receives Event protos, e.g., from debugged TensorFlow
 // runtime(s).
 service EventListener {
@@ -57,4 +91,10 @@ service EventListener {
   //      ops that get executed immediately after the beginning of the graph
   //      execution.
   rpc SendEvents(stream Event) returns (stream EventReply);
+
+  // Send the tracebacks of a TensorFlow execution call.
+  rpc SendTracebacks(CallTraceback) returns (EventReply);
+
+  // Send a collection of source code files being debugged.
+  rpc SendSourceFiles(DebuggedSourceFiles) returns (EventReply);
 }
diff --git a/tensorflow/core/protobuf/debug.proto b/tensorflow/core/protobuf/debug.proto
index 136c627e25..56983f3b7d 100644
--- a/tensorflow/core/protobuf/debug.proto
+++ b/tensorflow/core/protobuf/debug.proto
@@ -60,3 +60,25 @@ message DebugOptions {
   // step count.
   int64 global_step = 10;
 }
+
+message DebuggedSourceFile {
+  // The host name on which a source code file is located.
+  string host = 1;
+
+  // Path to the source code file.
+  string file_path = 2;
+
+  // The timestamp at which the source code file is last modified.
+  int64 last_modified = 3;
+
+  // Byte size of the file.
+  int64 bytes = 4;
+
+  // Line-by-line content of the source code file.
+  repeated string lines = 5;
+}
+
+message DebuggedSourceFiles {
+  // A collection of source code files.
+  repeated DebuggedSourceFile source_files = 1;
+}
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 68b97ddbe3..2fda463a77 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -45,6 +45,7 @@ py_library(
         ":grpc_debug_test_server",
         ":offline_analyzer",
         ":session_debug_testlib",
+        ":source_remote",
     ] + if_not_windows([
         ":debug_examples",
     ]),
@@ -110,6 +111,17 @@ py_library(
     ],
 )
 
+py_library(
+    name = "source_remote",
+    srcs = ["lib/source_remote.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":debug_service_pb2_grpc",
+        "//tensorflow/core/debug:debug_service_proto_py",
+        "//tensorflow/python/profiler:tfprof_logger",
+    ],
+)
+
 py_library(
     name = "stepper",
     srcs = ["lib/stepper.py"],
@@ -515,6 +527,31 @@ py_test(
     ],
 )
 
+py_test(
+    name = "source_remote_test",
+    size = "small",
+    srcs = ["lib/source_remote_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_windows",
+        "oss_serial",
+    ],
+    deps = [
+        ":grpc_debug_test_server",
+        ":source_remote",
+        ":source_utils",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python:variables",
+    ],
+)
+
 cuda_py_test(
     name = "stepper_test",
     size = "small",
diff --git a/tensorflow/python/debug/lib/debug_service_pb2_grpc.py b/tensorflow/python/debug/lib/debug_service_pb2_grpc.py
index 98adc3284b..16573eab6f 100755
--- a/tensorflow/python/debug/lib/debug_service_pb2_grpc.py
+++ b/tensorflow/python/debug/lib/debug_service_pb2_grpc.py
@@ -23,6 +23,7 @@ from __future__ import print_function
 import grpc
 
 from tensorflow.core.debug import debug_service_pb2 as tensorflow_dot_core_dot_debug_dot_debug__service__pb2
+from tensorflow.core.protobuf import debug_pb2 as tensorflow_dot_core_dot_protobuf_dot_debug__pb2
 from tensorflow.core.util import event_pb2 as tensorflow_dot_core_dot_util_dot_event__pb2
 
 
@@ -42,6 +43,16 @@ class EventListenerStub(object):
         request_serializer=tensorflow_dot_core_dot_util_dot_event__pb2.Event.SerializeToString,
         response_deserializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.FromString,
         )
+    self.SendTracebacks = channel.unary_unary(
+        '/tensorflow.EventListener/SendTracebacks',
+        request_serializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.CallTraceback.SerializeToString,
+        response_deserializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.FromString,
+        )
+    self.SendSourceFiles = channel.unary_unary(
+        '/tensorflow.EventListener/SendSourceFiles',
+        request_serializer=tensorflow_dot_core_dot_protobuf_dot_debug__pb2.DebuggedSourceFiles.SerializeToString,
+        response_deserializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.FromString,
+        )
 
 
 class EventListenerServicer(object):
@@ -62,6 +73,20 @@ class EventListenerServicer(object):
     context.set_details('Method not implemented!')
     raise NotImplementedError('Method not implemented!')
 
+  def SendTracebacks(self, request, context):
+    """Send the tracebacks of ops in a Python graph definition.
+    """
+    context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+    context.set_details('Method not implemented!')
+    raise NotImplementedError('Method not implemented!')
+
+  def SendSourceFiles(self, request, context):
+    """Send a collection of source code files being debugged.
+    """
+    context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+    context.set_details('Method not implemented!')
+    raise NotImplementedError('Method not implemented!')
+
 
 def add_EventListenerServicer_to_server(servicer, server):
   rpc_method_handlers = {
@@ -70,6 +95,16 @@ def add_EventListenerServicer_to_server(servicer, server):
           request_deserializer=tensorflow_dot_core_dot_util_dot_event__pb2.Event.FromString,
           response_serializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.SerializeToString,
       ),
+      'SendTracebacks': grpc.unary_unary_rpc_method_handler(
+          servicer.SendTracebacks,
+          request_deserializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.CallTraceback.FromString,
+          response_serializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.SerializeToString,
+      ),
+      'SendSourceFiles': grpc.unary_unary_rpc_method_handler(
+          servicer.SendSourceFiles,
+          request_deserializer=tensorflow_dot_core_dot_protobuf_dot_debug__pb2.DebuggedSourceFiles.FromString,
+          response_serializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.SerializeToString,
+      ),
   }
   generic_handler = grpc.method_handlers_generic_handler(
       'tensorflow.EventListener', rpc_method_handlers)
diff --git a/tensorflow/python/debug/lib/grpc_debug_server.py b/tensorflow/python/debug/lib/grpc_debug_server.py
index 5ab910fb0c..1b559f1f27 100644
--- a/tensorflow/python/debug/lib/grpc_debug_server.py
+++ b/tensorflow/python/debug/lib/grpc_debug_server.py
@@ -458,3 +458,36 @@ class EventListenerBaseServicer(debug_service_pb2_grpc.EventListenerServicer):
         `debug_op` as a `str`.
     """
     return list(self._gated_grpc_debug_watches)
+
+  def SendTracebacks(self, request, context):
+    """Base implementation of the handling of SendTracebacks calls.
+
+    The base implementation does nothing with the incoming request.
+    Override in an implementation of the server if necessary.
+
+    Args:
+      request: A `CallTraceback` proto, containing information about the
+        type (e.g., graph vs. eager execution) and source-code traceback of the
+        call and (any) associated `tf.Graph`s.
+      context: Server context.
+
+    Returns:
+      A `EventReply` proto.
+    """
+    return debug_service_pb2.EventReply()
+
+  def SendSourceFiles(self, request, context):
+    """Base implementation of the handling of SendSourceFiles calls.
+
+    The base implementation does nothing with the incoming request.
+    Override in an implementation of the server if necessary.
+
+    Args:
+      request: A `DebuggedSourceFiles` proto, containing the path, content, size
+        and last-modified timestamp of source files.
+      context: Server context.
+
+    Returns:
+      A `EventReply` proto.
+    """
+    return debug_service_pb2.EventReply()
diff --git a/tensorflow/python/debug/lib/grpc_debug_test_server.py b/tensorflow/python/debug/lib/grpc_debug_test_server.py
index 76e45c0bed..a637677d7d 100644
--- a/tensorflow/python/debug/lib/grpc_debug_test_server.py
+++ b/tensorflow/python/debug/lib/grpc_debug_test_server.py
@@ -238,6 +238,15 @@ class EventListenerTestServicer(grpc_debug_server.EventListenerBaseServicer):
         self, server_port,
         functools.partial(EventListenerTestStreamHandler, dump_dir, self))
 
+    # Members for storing the graph ops traceback and source files.
+    self._call_types = []
+    self._call_keys = []
+    self._origin_stacks = []
+    self._origin_id_to_strings = []
+    self._graph_tracebacks = []
+    self._graph_versions = []
+    self._source_files = None
+
   def _initialize_toggle_watch_state(self, toggle_watches):
     self._toggle_watches = toggle_watches
     self._toggle_watch_state = dict()
@@ -259,6 +268,97 @@ class EventListenerTestServicer(grpc_debug_server.EventListenerBaseServicer):
     self.core_metadata_json_strings = []
     self.partition_graph_defs = []
     self.debug_tensor_values = collections.defaultdict(list)
+    self._call_types = []
+    self._call_keys = []
+    self._origin_stacks = []
+    self._origin_id_to_strings = []
+    self._graph_tracebacks = []
+    self._graph_versions = []
+    self._source_files = None
+
+  def SendTracebacks(self, request, context):
+    self._call_types.append(request.call_type)
+    self._call_keys.append(request.call_key)
+    self._origin_stacks.append(request.origin_stack)
+    self._origin_id_to_strings.append(request.origin_id_to_string)
+    self._graph_tracebacks.append(request.graph_traceback)
+    self._graph_versions.append(request.graph_version)
+    return debug_service_pb2.EventReply()
+
+  def SendSourceFiles(self, request, context):
+    self._source_files = request
+    return debug_service_pb2.EventReply()
+
+  def query_op_traceback(self, op_name):
+    """Query the traceback of an op.
+
+    Args:
+      op_name: Name of the op to query.
+
+    Returns:
+      The traceback of the op, as a list of 3-tuples:
+        (filename, lineno, function_name)
+
+    Raises:
+      ValueError: If the op cannot be found in the tracebacks received by the
+        server so far.
+    """
+    for op_log_proto in self._graph_tracebacks:
+      for log_entry in op_log_proto.log_entries:
+        if log_entry.name == op_name:
+          return self._code_def_to_traceback(log_entry.code_def,
+                                             op_log_proto.id_to_string)
+    raise ValueError(
+        "Op '%s' does not exist in the tracebacks received by the debug "
+        "server.")
+
+  def query_origin_stack(self):
+    """Query the stack of the origin of the execution call.
+
+    Returns:
+      A `list` of all tracebacks. Each item corresponds to an execution call,
+        i.e., a `SendTracebacks` request. Each item is a `list` of 3-tuples:
+        (filename, lineno, function_name).
+    """
+    ret = []
+    for stack, id_to_string in zip(
+        self._origin_stacks, self._origin_id_to_strings):
+      ret.append(self._code_def_to_traceback(stack, id_to_string))
+    return ret
+
+  def query_call_types(self):
+    return self._call_types
+
+  def query_call_keys(self):
+    return self._call_keys
+
+  def query_graph_versions(self):
+    return self._graph_versions
+
+  def query_source_file_line(self, file_path, lineno):
+    """Query the content of a given line in a source file.
+
+    Args:
+      file_path: Path to the source file.
+      lineno: Line number as an `int`.
+
+    Returns:
+      Content of the line as a string.
+
+    Raises:
+      ValueError: If no source file is found at the given file_path.
+    """
+    for source_file_proto in self._source_files.source_files:
+      if source_file_proto.file_path == file_path:
+        return source_file_proto.lines[lineno - 1]
+    raise ValueError(
+        "Source file at path %s has not been received by the debug server",
+        file_path)
+
+  def _code_def_to_traceback(self, code_def, id_to_string):
+    return [(id_to_string[trace.file_id],
+             trace.lineno,
+             id_to_string[trace.function_id]) for trace in code_def.traces]
 
 
 def start_server_on_separate_thread(dump_to_filesystem=True,
diff --git a/tensorflow/python/debug/lib/source_remote.py b/tensorflow/python/debug/lib/source_remote.py
new file mode 100644
index 0000000000..9d10d5a8d1
--- /dev/null
+++ b/tensorflow/python/debug/lib/source_remote.py
@@ -0,0 +1,205 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Communicating tracebacks and source code with debug server."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import socket
+
+import grpc
+
+from tensorflow.core.debug import debug_service_pb2
+from tensorflow.core.protobuf import debug_pb2
+from tensorflow.python.debug.lib import debug_service_pb2_grpc
+from tensorflow.python.debug.lib import source_utils
+from tensorflow.python.platform import gfile
+from tensorflow.python.profiler import tfprof_logger
+
+
+def _load_debugged_source_file(file_path, source_file_proto):
+  file_stat = gfile.Stat(file_path)
+  source_file_proto.host = socket.gethostname()
+  source_file_proto.file_path = file_path
+  source_file_proto.last_modified = file_stat.mtime_nsec
+  source_file_proto.bytes = file_stat.length
+  try:
+    with gfile.Open(file_path, "r") as f:
+      source_lines = f.readlines()
+      for line in source_lines:
+        source_file_proto.lines.append(line.strip())
+  except IOError:
+    pass
+
+
+def _string_to_id(string, string_to_id):
+  if string not in string_to_id:
+    string_to_id[string] = len(string_to_id)
+  return string_to_id[string]
+
+
+def _format_origin_stack(origin_stack, call_traceback_proto):
+  """Format a traceback stack for a `CallTraceback` proto.
+
+  Args:
+    origin_stack: The stack list as returned by `traceback.extract_stack()`.
+    call_traceback_proto: A `CallTraceback` proto whose fields are to be
+      populated.
+  """
+  string_to_id = dict()
+  string_to_id[None] = 0
+  for frame in origin_stack:
+    file_path, lineno, func_name, line_text = frame
+    call_traceback_proto.origin_stack.traces.add(
+        file_id=_string_to_id(file_path, string_to_id),
+        lineno=lineno,
+        function_id=_string_to_id(func_name, string_to_id),
+        line_id=_string_to_id(line_text, string_to_id))
+
+  id_to_string = call_traceback_proto.origin_id_to_string
+  for key, value in string_to_id.items():
+    id_to_string[value] = key if key is not None else ""
+
+
+def _source_file_paths_outside_tensorflow_py_library(code_defs, id_to_string):
+  """Extract source file paths outside TensorFlow Python library.
+
+  Args:
+    code_defs: An iterable of `CodeDef` protos, i.e., an iterable of stack
+      traces.
+    id_to_string: A proto map from integer ids to strings.
+
+  Returns:
+    An iterable of source file paths outside the TensorFlow Python library.
+  """
+  file_ids = set()
+  for code_def in code_defs:
+    for trace in code_def.traces:
+      file_ids.add(trace.file_id)
+  non_tf_files = (id_to_string[file_id] for file_id in file_ids)
+  non_tf_files = (
+      f for f in non_tf_files
+      if not source_utils.guess_is_tensorflow_py_library(f) and gfile.Exists(f))
+  return non_tf_files
+
+
+def _send_call_tracebacks(destinations,
+                          origin_stack,
+                          is_eager_execution=False,
+                          call_key=None,
+                          graph=None,
+                          send_source=True):
+  """Send the tracebacks of a TensorFlow execution call.
+
+  To gRPC debug server(s). This applies to graph execution (`tf.Session.run()`)
+  calls and eager execution calls.
+
+  If `send_source`, also sends the underlying source files outside the
+  TensorFlow library.
+
+  Args:
+    destinations: gRPC destination addresses, a `str` or a `list` of `str`s,
+      e.g., "localhost:4242". If a `list`, gRPC requests containing the same
+      `CallTraceback` proto payload will be sent to all the destinations.
+    origin_stack: The traceback stack for the origin of the execution call. For
+      graph execution, this is the traceback of the `tf.Session.run()`
+      invocation. For eager execution, this is the traceback of the Python
+      line that executes the eager opertion.
+    is_eager_execution: (`bool`) whether an eager execution call (i.e., not a
+      `tf.Session.run` or derived methods) is being sent.
+    call_key: The key of the execution call, as a string. For graph execution,
+      this is a string describing the feeds, fetches (and targets) names of the
+      `tf.Session.run` call. For eager execution, this is ignored.
+    graph: A Python `tf.Graph` object (i.e., *not* a `tf.GraphDef`), which
+      contains op tracebacks, if applicable.
+    send_source: Whether the source files involved in the op tracebacks but
+      outside the TensorFlow library are to be sent.
+  """
+  if not isinstance(destinations, list):
+    destinations = [destinations]
+
+  call_type = (debug_service_pb2.CallTraceback.EAGER_EXECUTION
+               if is_eager_execution
+               else debug_service_pb2.CallTraceback.GRAPH_EXECUTION)
+  graph_traceback = tfprof_logger.merge_default_with_oplog(
+      graph, add_trainable_var=False) if graph else None
+  call_traceback = debug_service_pb2.CallTraceback(
+      call_type=call_type, call_key=call_key, graph_traceback=graph_traceback,
+      graph_version=graph.version if graph else None)
+
+  _format_origin_stack(origin_stack, call_traceback)
+
+  if send_source:
+    source_file_paths = set()
+    source_file_paths.update(_source_file_paths_outside_tensorflow_py_library(
+        (log_entry.code_def for log_entry
+         in call_traceback.graph_traceback.log_entries),
+        call_traceback.graph_traceback.id_to_string))
+    source_file_paths.update(_source_file_paths_outside_tensorflow_py_library(
+        [call_traceback.origin_stack], call_traceback.origin_id_to_string))
+
+    debugged_source_files = debug_pb2.DebuggedSourceFiles()
+    for file_path in source_file_paths:
+      _load_debugged_source_file(
+          file_path, debugged_source_files.source_files.add())
+
+  for destination in destinations:
+    channel = grpc.insecure_channel(destination)
+    stub = debug_service_pb2_grpc.EventListenerStub(channel)
+    stub.SendTracebacks(call_traceback)
+    if send_source:
+      stub.SendSourceFiles(debugged_source_files)
+
+
+def send_graph_tracebacks(destinations,
+                          run_key,
+                          origin_stack,
+                          graph,
+                          send_source=True):
+  """Send the tracebacks of a graph execution call to debug server(s).
+
+  Args:
+    destinations: gRPC destination addresses, a `str` or a `list` of `str`s,
+      e.g., "localhost:4242". If a `list`, gRPC requests containing the same
+      `CallTraceback` proto payload will be sent to all the destinations.
+    run_key: A string describing the feeds, fetches (and targets) names of the
+      `tf.Session.run` call.
+    origin_stack: The traceback of the `tf.Session.run()` invocation.
+    graph: A Python `tf.Graph` object (i.e., *not* a `tf.GraphDef`), which
+      contains op tracebacks.
+    send_source: Whether the source files involved in the op tracebacks but
+      outside the TensorFlow library are to be sent.
+  """
+  _send_call_tracebacks(
+      destinations, origin_stack, is_eager_execution=False, call_key=run_key,
+      graph=graph, send_source=send_source)
+
+
+def send_eager_tracebacks(destinations,
+                          origin_stack,
+                          send_source=True):
+  """Send the tracebacks of an eager execution call to debug server(s).
+
+  Args:
+    destinations: gRPC destination addresses, a `str` or a `list` of `str`s,
+      e.g., "localhost:4242". If a `list`, gRPC requests containing the same
+    origin_stack: The traceback of the eager operation invocation.
+    send_source: Whether the source files involved in the op tracebacks but
+      outside the TensorFlow library are to be sent.
+  """
+  _send_call_tracebacks(
+      destinations, origin_stack, is_eager_execution=True,
+      send_source=send_source)
diff --git a/tensorflow/python/debug/lib/source_remote_test.py b/tensorflow/python/debug/lib/source_remote_test.py
new file mode 100644
index 0000000000..1c4517f681
--- /dev/null
+++ b/tensorflow/python/debug/lib/source_remote_test.py
@@ -0,0 +1,171 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unit tests for source_remote."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import traceback
+
+from tensorflow.core.debug import debug_service_pb2
+from tensorflow.python.client import session
+from tensorflow.python.debug.lib import grpc_debug_test_server
+from tensorflow.python.debug.lib import source_remote
+from tensorflow.python.debug.lib import source_utils
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import math_ops
+# Import resource_variable_ops for the variables-to-tensor implicit conversion.
+from tensorflow.python.ops import resource_variable_ops  # pylint: disable=unused-import
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import googletest
+from tensorflow.python.util import tf_inspect
+
+
+def line_number_above():
+  return tf_inspect.stack()[1][2] - 1
+
+
+class SendTracebacksTest(test_util.TensorFlowTestCase):
+
+  @classmethod
+  def setUpClass(cls):
+    test_util.TensorFlowTestCase.setUpClass()
+    (cls._server_port, cls._debug_server_url, cls._server_dump_dir,
+     cls._server_thread,
+     cls._server) = grpc_debug_test_server.start_server_on_separate_thread()
+    cls._server_address = "localhost:%d" % cls._server_port
+    (cls._server_port_2, cls._debug_server_url_2, cls._server_dump_dir_2,
+     cls._server_thread_2,
+     cls._server_2) = grpc_debug_test_server.start_server_on_separate_thread()
+    cls._server_address_2 = "localhost:%d" % cls._server_port_2
+    cls._curr_file_path = os.path.normpath(os.path.abspath(__file__))
+
+  @classmethod
+  def tearDownClass(cls):
+    # Stop the test server and join the thread.
+    cls._server.stop_server().wait()
+    cls._server_thread.join()
+    cls._server_2.stop_server().wait()
+    cls._server_thread_2.join()
+    test_util.TensorFlowTestCase.tearDownClass()
+
+  def tearDown(self):
+    ops.reset_default_graph()
+    self._server.clear_data()
+    self._server_2.clear_data()
+    super(SendTracebacksTest, self).tearDown()
+
+  def _findFirstTraceInsideTensorFlowPyLibrary(self, op):
+    """Find the first trace of an op that belongs to the TF Python library."""
+    for trace in op.traceback:
+      if source_utils.guess_is_tensorflow_py_library(trace[0]):
+        return trace
+
+  def testSendGraphTracebacksToSingleDebugServer(self):
+    this_func_name = "testSendGraphTracebacksToSingleDebugServer"
+    with session.Session() as sess:
+      a = variables.Variable(21.0, name="a")
+      a_lineno = line_number_above()
+      b = variables.Variable(2.0, name="b")
+      b_lineno = line_number_above()
+      math_ops.add(a, b, name="x")
+      x_lineno = line_number_above()
+
+      send_stack = traceback.extract_stack()
+      send_lineno = line_number_above()
+      source_remote.send_graph_tracebacks(
+          self._server_address, "dummy_run_key", send_stack, sess.graph)
+
+      tb = self._server.query_op_traceback("a")
+      self.assertIn((self._curr_file_path, a_lineno, this_func_name), tb)
+      tb = self._server.query_op_traceback("b")
+      self.assertIn((self._curr_file_path, b_lineno, this_func_name), tb)
+      tb = self._server.query_op_traceback("x")
+      self.assertIn((self._curr_file_path, x_lineno, this_func_name), tb)
+
+      self.assertIn(
+          (self._curr_file_path, send_lineno, this_func_name),
+          self._server.query_origin_stack()[-1])
+
+      self.assertEqual(
+          "a = variables.Variable(21.0, name=\"a\")",
+          self._server.query_source_file_line(__file__, a_lineno))
+      # Files in the TensorFlow code base shouldn not have been sent.
+      tf_trace_file_path = self._findFirstTraceInsideTensorFlowPyLibrary(a.op)
+      with self.assertRaises(ValueError):
+        self._server.query_source_file_line(tf_trace_file_path, 0)
+      self.assertEqual([debug_service_pb2.CallTraceback.GRAPH_EXECUTION],
+                       self._server.query_call_types())
+      self.assertEqual(["dummy_run_key"], self._server.query_call_keys())
+      self.assertEqual(
+          [sess.graph.version], self._server.query_graph_versions())
+
+  def testSendGraphTracebacksToTwoDebugServers(self):
+    this_func_name = "testSendGraphTracebacksToTwoDebugServers"
+    with session.Session() as sess:
+      a = variables.Variable(21.0, name="two/a")
+      a_lineno = line_number_above()
+      b = variables.Variable(2.0, name="two/b")
+      b_lineno = line_number_above()
+      x = math_ops.add(a, b, name="two/x")
+      x_lineno = line_number_above()
+
+      send_traceback = traceback.extract_stack()
+      send_lineno = line_number_above()
+      source_remote.send_graph_tracebacks(
+          [self._server_address, self._server_address_2],
+          "dummy_run_key", send_traceback, sess.graph)
+
+      servers = [self._server, self._server_2]
+      for server in servers:
+        tb = server.query_op_traceback("two/a")
+        self.assertIn((self._curr_file_path, a_lineno, this_func_name), tb)
+        tb = server.query_op_traceback("two/b")
+        self.assertIn((self._curr_file_path, b_lineno, this_func_name), tb)
+        tb = server.query_op_traceback("two/x")
+        self.assertIn((self._curr_file_path, x_lineno, this_func_name), tb)
+
+        self.assertIn(
+            (self._curr_file_path, send_lineno, this_func_name),
+            server.query_origin_stack()[-1])
+
+        self.assertEqual(
+            "x = math_ops.add(a, b, name=\"two/x\")",
+            server.query_source_file_line(__file__, x_lineno))
+        tf_trace_file_path = self._findFirstTraceInsideTensorFlowPyLibrary(x.op)
+        with self.assertRaises(ValueError):
+          server.query_source_file_line(tf_trace_file_path, 0)
+        self.assertEqual([debug_service_pb2.CallTraceback.GRAPH_EXECUTION],
+                         server.query_call_types())
+        self.assertEqual(["dummy_run_key"], server.query_call_keys())
+        self.assertEqual([sess.graph.version], server.query_graph_versions())
+
+  def testSendEagerTracebacksToSingleDebugServer(self):
+    this_func_name = "testSendEagerTracebacksToSingleDebugServer"
+    send_traceback = traceback.extract_stack()
+    send_lineno = line_number_above()
+    source_remote.send_eager_tracebacks(self._server_address, send_traceback)
+
+    self.assertEqual([debug_service_pb2.CallTraceback.EAGER_EXECUTION],
+                     self._server.query_call_types())
+    self.assertIn((self._curr_file_path, send_lineno, this_func_name),
+                  self._server.query_origin_stack()[-1])
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py
index 46a921c0a1..72422f11e9 100644
--- a/tensorflow/python/profiler/model_analyzer.py
+++ b/tensorflow/python/profiler/model_analyzer.py
@@ -162,7 +162,7 @@ class Profiler(object):
     self._coverage = 0.0
     self._graph = graph
     # pylint: disable=protected-access
-    op_log = tfprof_logger._merge_default_with_oplog(
+    op_log = tfprof_logger.merge_default_with_oplog(
         self._graph, op_log=op_log)
     # pylint: enable=protected-access
 
@@ -182,7 +182,7 @@ class Profiler(object):
       run_meta: RunMetadata proto that contains statistics of a session run.
     """
     # pylint: disable=protected-access
-    op_log = tfprof_logger._merge_default_with_oplog(
+    op_log = tfprof_logger.merge_default_with_oplog(
         self._graph, run_meta=run_meta)
     # pylint: enable=protected-access
     # TODO(xpan): P1: Better to find the current graph.
@@ -315,7 +315,7 @@ def profile(graph,
                .trainable_variables_parameter())
 
   # pylint: disable=protected-access
-  op_log = tfprof_logger._merge_default_with_oplog(
+  op_log = tfprof_logger.merge_default_with_oplog(
       graph, op_log, run_meta, add_trace=cmd == 'code')
   # pylint: enable=protected-access
 
@@ -374,7 +374,7 @@ def advise(graph, run_meta=None, options=_DEFAULT_ADVISE_OPTIONS):
     options = ALL_ADVICE.copy()
 
   # pylint: disable=protected-access
-  op_log = tfprof_logger._merge_default_with_oplog(
+  op_log = tfprof_logger.merge_default_with_oplog(
       graph, None, run_meta, add_trace=True)
   # pylint: enable=protected-access
 
diff --git a/tensorflow/python/profiler/tfprof_logger.py b/tensorflow/python/profiler/tfprof_logger.py
index 838064a1f0..15c273794d 100644
--- a/tensorflow/python/profiler/tfprof_logger.py
+++ b/tensorflow/python/profiler/tfprof_logger.py
@@ -139,8 +139,8 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True,
   return logged_ops, string_to_id
 
 
-def _merge_default_with_oplog(graph, op_log=None, run_meta=None,
-                              add_trace=True, add_trainable_var=True):
+def merge_default_with_oplog(graph, op_log=None, run_meta=None,
+                             add_trace=True, add_trainable_var=True):
   """Merge the tfprof default extra info with caller's op_log.
 
   Args:
@@ -199,7 +199,7 @@ def write_op_log(graph, log_dir, op_log=None, run_meta=None, add_trace=True):
     add_trace: Whether to add python code trace information.
         Used to support "code" view.
   """
-  op_log = _merge_default_with_oplog(graph, op_log, run_meta, add_trace)
+  op_log = merge_default_with_oplog(graph, op_log, run_meta, add_trace)
 
   with gfile.Open(os.path.join(log_dir, 'tfprof_log'), 'w') as log:
     log.write(op_log.SerializeToString())
-- 
GitLab


From 10197197fd43af6027c62e57bd3be375075e90e3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 21:07:27 -0800
Subject: [PATCH 0736/1225] Make sure only test names are matched by regular
 expressions, not the whole path which is arbitrary.

PiperOrigin-RevId: 178195188
---
 .../testing/generated_examples_zip_test.cc    | 44 +++++++++----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
index e7df97ee54..f8f6044b62 100644
--- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
+++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
@@ -96,11 +96,11 @@ class ZipEnvironment : public ::testing::Environment {
   }
 
   // Unzip `zip` file into a new temporary directory  `out_dir`.
-  tensorflow::Status UnZip(const std::string& zip, std::string* out_dir) {
+  tensorflow::Status UnZip(const string& zip, string* out_dir) {
     string dir;
     TF_CHECK_OK(MakeTemporaryDirectory(&dir));
     tensorflow::SubProcess proc;
-    std::string unzip_binary =
+    string unzip_binary =
         "/usr/bin/unzip";
     proc.SetProgram(unzip_binary, {"unzip", "-d", dir, zip.c_str()});
     proc.SetChannelAction(tensorflow::CHAN_STDOUT, tensorflow::ACTION_PIPE);
@@ -144,48 +144,48 @@ ZipEnvironment* zip_environment() {
 // the temporary directory where the zip file has been unarchived and
 // `test_paths` is the list of test prefixes that were in the manifest.
 // Note, it is an error for a manifest to contain no tests.
-tensorflow::Status ReadManifest(const std::string& original_file,
-                                const std::string& dir,
-                                std::vector<std::string>* test_paths) {
+tensorflow::Status ReadManifest(const string& original_file, const string& dir,
+                                std::vector<string>* test_paths) {
   // Read the newline delimited list of entries in the manifest.
   std::ifstream manifest_fp(dir + "/manifest.txt");
-  std::string manifest((std::istreambuf_iterator<char>(manifest_fp)),
-                       std::istreambuf_iterator<char>());
+  string manifest((std::istreambuf_iterator<char>(manifest_fp)),
+                  std::istreambuf_iterator<char>());
   size_t pos = 0;
   int added = 0;
   while (true) {
     size_t end_pos = manifest.find("\n", pos);
-    if (end_pos == std::string::npos) break;
-    std::string filename = manifest.substr(pos, end_pos - pos);
+    if (end_pos == string::npos) break;
+    string filename = manifest.substr(pos, end_pos - pos);
     test_paths->push_back(dir + "/" + filename);
     pos = end_pos + 1;
     added += 1;
   }
   if (!added) {
-    std::string message = "Test had no examples: " + original_file;
+    string message = "Test had no examples: " + original_file;
     return tensorflow::Status(tensorflow::error::UNKNOWN, message.c_str());
   }
   return tensorflow::Status::OK();
 }
 
 // Get a list of tests from a zip file `zip_file_name`.
-std::vector<std::string> UnarchiveZipAndFindTestNames(
-    const std::string& zip_file_name) {
-  std::string zip_file = ::tensorflow::testing::TensorFlowSrcRoot() +
-                         "/contrib/lite/testing/optest/" + zip_file_name;
-  std::string decompress_tmp_dir;
+std::vector<string> UnarchiveZipAndFindTestNames(const string& zip_file_name) {
+  string zip_file = ::tensorflow::testing::TensorFlowSrcRoot() +
+                    "/contrib/lite/testing/optest/" + zip_file_name;
+  string decompress_tmp_dir;
   TF_CHECK_OK(zip_environment()->UnZip(zip_file, &decompress_tmp_dir));
-  std::vector<std::string> stuff;
+  std::vector<string> stuff;
   TF_CHECK_OK(ReadManifest(zip_file, decompress_tmp_dir, &stuff));
   return stuff;
 }
 
-class OpsTest : public ::testing::TestWithParam<std::string> {};
+class OpsTest : public ::testing::TestWithParam<string> {};
 
 TEST_P(OpsTest, RunStuff) {
-  std::string test_path = GetParam();
-  std::string tflite_file = test_path + ".bin";
-  std::string tflite_examples = test_path + ".inputs";
+  string test_path = GetParam();
+  string tflite_file = test_path + ".bin";
+  string tflite_examples = test_path + ".inputs";
+  string test_name = test_path.substr(test_path.find_last_of('/'));
+
   auto model = tflite::FlatBufferModel::BuildFromFile(tflite_file.c_str());
   std::unique_ptr<tflite::Interpreter> interpreter;
 
@@ -199,7 +199,7 @@ TEST_P(OpsTest, RunStuff) {
 
   string bug_number;
   for (const auto& p : kBrokenTests) {
-    if (RE2::PartialMatch(test_path, p.first)) {
+    if (RE2::PartialMatch(test_name, p.first)) {
       bug_number = p.second;
     }
   }
@@ -218,7 +218,7 @@ TEST_P(OpsTest, RunStuff) {
     } else {
       if (FLAGS_ignore_known_bugs) {
         ASSERT_EQ(result, kTfLiteError)
-            << "Not failing as expected dut to http://b/" << bug_number;
+            << "Not failing as expected due to http://b/" << bug_number;
       } else {
         ASSERT_EQ(result, kTfLiteOk)
             << "Possibly due to http://b/" << bug_number;
-- 
GitLab


From b9df87ffc4a53566320e2baf17f0daeca25fdde9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 6 Dec 2017 23:14:18 -0800
Subject: [PATCH 0737/1225] Wrap macro bodies in do{}while(0) to prevent
 capture of else-clauses.

PiperOrigin-RevId: 178202725
---
 .../kernels/hyperplane_lsh_probes.cc          | 15 +++++++------
 .../resampler/kernels/resampler_ops.cc        |  8 +++----
 tensorflow/core/framework/op_kernel.h         | 22 +++++++++++--------
 .../kernels/conditional_accumulator_base.h    | 10 +++++----
 .../core/kernels/quantized_concat_op.cc       |  4 ++--
 tensorflow/core/kernels/variable_ops.h        |  2 +-
 6 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc b/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc
index 62ee6630ac..2b412fac9a 100644
--- a/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc
+++ b/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc
@@ -45,16 +45,16 @@ class HyperplaneLSHProbesOp : public OpKernel {
     const Tensor& products_tensor = context->input(0);
     OP_REQUIRES(context, products_tensor.dims() == 2,
                 InvalidArgument("Need a two-dimensional products tensor, got ",
-                                products_tensor.dims(), " dimensions."))
+                                products_tensor.dims(), " dimensions."));
 
     const Tensor& num_tables_tensor = context->input(1);
     OP_REQUIRES(context, num_tables_tensor.dims() == 0,
                 InvalidArgument("Need a scalar num_tables tensor, got ",
-                                num_tables_tensor.dims(), " dimensions."))
+                                num_tables_tensor.dims(), " dimensions."));
     int num_tables = num_tables_tensor.scalar<int32>()();
     OP_REQUIRES(context, num_tables >= 1,
                 InvalidArgument("num_tables must be at least 1 but got ",
-                                num_tables, "."))
+                                num_tables, "."));
     OP_REQUIRES(context, num_tables <= 1000,
                 InvalidArgument("Need num_tables <= 1000, got ", num_tables,
                                 ". This is mostly to protect against incorrect "
@@ -66,12 +66,13 @@ class HyperplaneLSHProbesOp : public OpKernel {
                 InvalidArgument("Need a scalar num_hyperplanes_per_table "
                                 "tensor, got ",
                                 num_hyperplanes_per_table_tensor.dims(),
-                                " dimensions."))
+                                " dimensions."));
     int num_hyperplanes_per_table =
         num_hyperplanes_per_table_tensor.scalar<int32>()();
     OP_REQUIRES(context, num_hyperplanes_per_table >= 1,
                 InvalidArgument("num_hyperplanes_per_table must be at least 1 "
-                                "but got ", num_hyperplanes_per_table, "."))
+                                "but got ",
+                                num_hyperplanes_per_table, "."));
     OP_REQUIRES(context, num_hyperplanes_per_table <= 30,
                 InvalidArgument("Need num_hyperplanes_per_table <= 30, got ",
                                 num_hyperplanes_per_table, ". "
@@ -81,10 +82,10 @@ class HyperplaneLSHProbesOp : public OpKernel {
     const Tensor& num_probes_tensor = context->input(3);
     OP_REQUIRES(context, num_probes_tensor.dims() == 0,
                 InvalidArgument("Need a scalar num_probes tensor, got ",
-                                num_probes_tensor.dims(), " dimensions."))
+                                num_probes_tensor.dims(), " dimensions."));
     int num_probes = num_probes_tensor.scalar<int32>()();
     OP_REQUIRES(context, num_probes >= 1,
-                InvalidArgument("num_probes must be at least 1."))
+                InvalidArgument("num_probes must be at least 1."));
 
     int expected_num_hyperplanes = num_tables * num_hyperplanes_per_table;
     OP_REQUIRES(
diff --git a/tensorflow/contrib/resampler/kernels/resampler_ops.cc b/tensorflow/contrib/resampler/kernels/resampler_ops.cc
index 7d9ef14cef..e02c1b6a2b 100644
--- a/tensorflow/contrib/resampler/kernels/resampler_ops.cc
+++ b/tensorflow/contrib/resampler/kernels/resampler_ops.cc
@@ -406,10 +406,10 @@ class ResamplerGradOp : public ::tensorflow::OpKernel {
                                    data_channels);
     OP_REQUIRES(ctx, grad_output_shape == resampler_output_shape,
                 ::tensorflow::errors::InvalidArgument(
-                   "grad_output shape is not consistent with data and warp "
-                   "shapes; it should be ",
-                   resampler_output_shape.DebugString(), " but is ",
-                   grad_output_shape.DebugString()))
+                    "grad_output shape is not consistent with data and warp "
+                    "shapes; it should be ",
+                    resampler_output_shape.DebugString(), " but is ",
+                    grad_output_shape.DebugString()));
     const int num_sampling_points = warp.NumElements() / batch_size / 2;
     ::tensorflow::Tensor* grad_data = nullptr;
     ::tensorflow::Tensor* grad_warp = nullptr;
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index a7b9bb393d..3a9a6121c0 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -1492,10 +1492,12 @@ inline void OpOutputList::set_ref(int i, mutex* mu, Tensor* tensor_for_ref) {
 // }
 
 #define OP_REQUIRES(CTX, EXP, STATUS) \
-  if (!TF_PREDICT_TRUE(EXP)) {        \
-    (CTX)->CtxFailure((STATUS));      \
-    return;                           \
-  }
+  do {                                \
+    if (!TF_PREDICT_TRUE(EXP)) {      \
+      (CTX)->CtxFailure((STATUS));    \
+      return;                         \
+    }                                 \
+  } while (0)
 
 #define OP_REQUIRES_OK(CTX, ...)          \
   do {                                    \
@@ -1507,11 +1509,13 @@ inline void OpOutputList::set_ref(int i, mutex* mu, Tensor* tensor_for_ref) {
   } while (0)
 
 #define OP_REQUIRES_ASYNC(CTX, EXP, STATUS, CALLBACK) \
-  if (!TF_PREDICT_TRUE(EXP)) {                        \
-    (CTX)->CtxFailure((STATUS));                      \
-    (CALLBACK)();                                     \
-    return;                                           \
-  }
+  do {                                                \
+    if (!TF_PREDICT_TRUE(EXP)) {                      \
+      (CTX)->CtxFailure((STATUS));                    \
+      (CALLBACK)();                                   \
+      return;                                         \
+    }                                                 \
+  } while (0)
 
 #define OP_REQUIRES_OK_ASYNC(CTX, STATUS, CALLBACK) \
   do {                                              \
diff --git a/tensorflow/core/kernels/conditional_accumulator_base.h b/tensorflow/core/kernels/conditional_accumulator_base.h
index 05ee855dae..27db6ee785 100644
--- a/tensorflow/core/kernels/conditional_accumulator_base.h
+++ b/tensorflow/core/kernels/conditional_accumulator_base.h
@@ -162,10 +162,12 @@ class ConditionalAccumulatorBase : public ResourceBase {
  * function can get an indication that a failure has occurred.
 */
 #define OP_REQUIRES_BOOLEAN(CTX, EXP, STATUS) \
-  if (!TF_PREDICT_TRUE(EXP)) {                \
-    (CTX)->CtxFailure((STATUS));              \
-    return false;                             \
-  }
+  do {                                        \
+    if (!TF_PREDICT_TRUE(EXP)) {              \
+      (CTX)->CtxFailure((STATUS));            \
+      return false;                           \
+    }                                         \
+  } while (0)
 
 #define OP_REQUIRES_OK_BOOLEAN(CTX, STATUS) \
   do {                                      \
diff --git a/tensorflow/core/kernels/quantized_concat_op.cc b/tensorflow/core/kernels/quantized_concat_op.cc
index ee573f1bb8..d67f1ab3ec 100644
--- a/tensorflow/core/kernels/quantized_concat_op.cc
+++ b/tensorflow/core/kernels/quantized_concat_op.cc
@@ -174,13 +174,13 @@ class QuantizedConcatOp : public OpKernel {
     OP_REQUIRES(context, (input_mins.size() == N),
                 errors::InvalidArgument(
                     "QuantizedConcatOp : Expected mins input list length ",
-                    input_mins.size(), " to equal values length ", N))
+                    input_mins.size(), " to equal values length ", N));
     OpInputList input_maxes;
     OP_REQUIRES_OK(context, context->input_list("input_maxes", &input_maxes));
     OP_REQUIRES(context, (input_maxes.size() == N),
                 errors::InvalidArgument(
                     "QuantizedConcatOp : Expected maxes input list length ",
-                    input_maxes.size(), " to equal values length ", N))
+                    input_maxes.size(), " to equal values length ", N));
     const int input_dims = values[0].dims();
     const TensorShape& input_shape = values[0].shape();
     OP_REQUIRES(
diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h
index 355140d44c..820b90d041 100644
--- a/tensorflow/core/kernels/variable_ops.h
+++ b/tensorflow/core/kernels/variable_ops.h
@@ -160,7 +160,7 @@ class DestroyTemporaryVariableOp : public OpKernel {
   explicit DestroyTemporaryVariableOp(OpKernelConstruction* context)
       : OpKernel(context) {
     OP_REQUIRES(context, IsRefType(context->input_type(0)),
-                errors::InvalidArgument("lhs input needs to be a ref type"))
+                errors::InvalidArgument("lhs input needs to be a ref type"));
     OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
     OP_REQUIRES(context, var_name_ != "",
                 errors::InvalidArgument("Missing var_name attribute"));
-- 
GitLab


From 69c324591ba4dfeafb403ee59de56ffe063c1e94 Mon Sep 17 00:00:00 2001
From: resec <resec0109@gmail.com>
Date: Thu, 7 Dec 2017 15:35:24 +0800
Subject: [PATCH 0738/1225] Use argmax output_type argument instead of cast op

---
 tensorflow/contrib/seq2seq/python/ops/helper.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/seq2seq/python/ops/helper.py b/tensorflow/contrib/seq2seq/python/ops/helper.py
index b55d90cbab..dec03ce43f 100644
--- a/tensorflow/contrib/seq2seq/python/ops/helper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/helper.py
@@ -223,8 +223,7 @@ class TrainingHelper(Helper):
 
   def sample(self, time, outputs, name=None, **unused_kwargs):
     with ops.name_scope(name, "TrainingHelperSample", [time, outputs]):
-      sample_ids = math_ops.cast(
-          math_ops.argmax(outputs, axis=-1), dtypes.int32)
+      sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
       return sample_ids
 
   def next_inputs(self, time, outputs, state, name=None, **unused_kwargs):
@@ -540,8 +539,7 @@ class GreedyEmbeddingHelper(Helper):
     if not isinstance(outputs, ops.Tensor):
       raise TypeError("Expected outputs to be a single Tensor, got: %s" %
                       type(outputs))
-    sample_ids = math_ops.cast(
-        math_ops.argmax(outputs, axis=-1), dtypes.int32)
+    sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
     return sample_ids
 
   def next_inputs(self, time, outputs, state, sample_ids, name=None):
-- 
GitLab


From 418079c7fea20bfb8403a34f6742231fddd3dfde Mon Sep 17 00:00:00 2001
From: dongpilYu <yoodongphil@naver.com>
Date: Thu, 7 Dec 2017 16:57:16 +0900
Subject: [PATCH 0739/1225] typo fixed (#14431)

* did't -> didn't

* typo fixed : thats -> that's

* typo fixed

* typo fixed
-- 
GitLab


From 5723904ef8402093001a6b90fcf6675258be096b Mon Sep 17 00:00:00 2001
From: dongpilYu <yoodongphil@naver.com>
Date: Thu, 7 Dec 2017 17:03:23 +0900
Subject: [PATCH 0740/1225] Change path (#14840)

* did't -> didn't

* typo fixed : thats -> that's

* typo fixed

* typo fixed

* typo fixed

* typo fixed

* change path

* Address comment
---
 tensorflow/c/c_api_function.cc                | 4 ++--
 tensorflow/tools/benchmark/benchmark_model.cc | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index dcb818b88b..8b50b6b339 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -68,7 +68,7 @@ class NodeNameMapping {
   // This is a superset of values in name_mapping_.
   std::unordered_set<string> used_names_;
   // Mapping from original node name from the graph to the normalized
-  // and uniqified version of it.
+  // and uniquified version of it.
   std::unordered_map<string, string> name_mapping_;
 };
 
@@ -231,7 +231,7 @@ Status FillFunctionBody(
 }
 
 // Graph to FunctionDef conversion. This code is closely modeled on the Python
-// code in third_party/tensorflow/python/framework/function.py.
+// code in tensorflow/python/framework/function.py.
 Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
                           bool append_hash_to_fn_name,
                           const std::vector<const Node*>& body_nodes,
diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
index 9809ad52de..ecab6f8769 100644
--- a/tensorflow/tools/benchmark/benchmark_model.cc
+++ b/tensorflow/tools/benchmark/benchmark_model.cc
@@ -530,7 +530,7 @@ int Main(int argc, char** argv) {
   }
 
   // Capture overall inference time without stat logging overhead. This is the
-  // timing data that can be compared to other libaries.
+  // timing data that can be compared to other libraries.
   SleepSeconds(inter_benchmark_sleep_seconds);
   int64 no_stat_time_us = 0;
   int64 no_stat_num_runs = 0;
-- 
GitLab


From 1730f9743c6a57beee8158bc35c689d24c8df729 Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Thu, 7 Dec 2017 02:32:29 -0800
Subject: [PATCH 0741/1225] Add Snappy support to SQLite

SWIG and CLIF are way too hard. This is more idiomatic. Native
extensions that add functions for protobufs will follow.

PiperOrigin-RevId: 178215852
---
 .../contrib/cmake/external/sqlite.cmake       |   1 +
 tensorflow/contrib/tensorboard/db/BUILD       |  19 ++
 tensorflow/contrib/tensorboard/db/tbsnap.cc   | 214 ++++++++++++++++++
 .../contrib/tensorboard/db/tbsnap_test.py     |  70 ++++++
 third_party/sqlite.BUILD                      |   5 +-
 5 files changed, 308 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/contrib/tensorboard/db/tbsnap.cc
 create mode 100644 tensorflow/contrib/tensorboard/db/tbsnap_test.py

diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake
index 785039a469..14d8148e6e 100644
--- a/tensorflow/contrib/cmake/external/sqlite.cmake
+++ b/tensorflow/contrib/cmake/external/sqlite.cmake
@@ -28,6 +28,7 @@ endif()
 
 set(sqlite_HEADERS
     "${sqlite_BUILD}/sqlite3.h"
+    "${sqlite_BUILD}/sqlite3ext.h"
 )
 
 if (WIN32)
diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD
index 9d3d60c24d..28b5eba2a4 100644
--- a/tensorflow/contrib/tensorboard/db/BUILD
+++ b/tensorflow/contrib/tensorboard/db/BUILD
@@ -55,6 +55,25 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "tbsnap",
+    srcs = ["tbsnap.cc"],
+    deps = [
+        "@snappy",
+        "@sqlite_archive//:sqlite",
+    ],
+)
+
+py_test(
+    name = "tbsnap_test",
+    size = "small",
+    srcs = ["tbsnap_test.py"],
+    data = ["libtbsnap.so"],
+    srcs_version = "PY2AND3",
+    tags = ["nomac"],
+    deps = ["//tensorflow/python:client_testlib"],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(["*"]),
diff --git a/tensorflow/contrib/tensorboard/db/tbsnap.cc b/tensorflow/contrib/tensorboard/db/tbsnap.cc
new file mode 100644
index 0000000000..588ea9af89
--- /dev/null
+++ b/tensorflow/contrib/tensorboard/db/tbsnap.cc
@@ -0,0 +1,214 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+/// \brief SQLite extension for Snappy compression
+///
+/// Snappy a compression library that trades ratio for speed, almost going a
+/// tenth as fast as memcpy().
+///
+/// This extension adds the following native functions:
+///
+/// - snap(value: NULL|BLOB|TEXT) -> NULL|BLOB
+///
+///   Applies Snappy compression. If value is NULL, then NULL is returned. If
+///   value is TEXT and BLOB, then it is compressed and the result is a BLOB.
+///   An uncompressed byte is prepended to indicate the original type.
+///
+/// - unsnap(value: NULL|BLOB) -> NULL|TEXT|BLOB
+///
+///   Decompresses value created by snap(). If value is NULL, then NULL is
+///   returned. If value is empty, then an empty blob is returned. Otherwise
+///   the original type is restored from the first byte and the remaining ones
+///   are decompressed.
+///
+/// These functions are deterministic so they can be used for all purposes,
+/// including INDEX. Please note that SQLite currently does not currently
+/// perform common sub-expression optimization for pure functions when
+/// compiling queries.
+///
+/// If your SQLite environment isn't universally UTF8, please file an issue
+/// with the TensorBoard team letting us know. While this implementation should
+/// work, its performance could be improved to avoid superfluous TEXT coding.
+
+#include "sqlite3ext.h"
+#include "snappy.h"
+
+namespace {
+SQLITE_EXTENSION_INIT1
+
+void snap(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
+  const char* data;
+  int type = sqlite3_value_type(argv[0]);
+  switch (type) {
+    case SQLITE_NULL:
+      return;
+    case SQLITE_BLOB:
+      data = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0]));
+      break;
+    case SQLITE_TEXT:
+      data = reinterpret_cast<const char*>(sqlite3_value_text(argv[0]));
+      break;
+    default:
+      sqlite3_result_error(ctx, "snap() takes NULL|BLOB|TEXT", -1);
+      sqlite3_result_error_code(ctx, SQLITE_MISMATCH);
+      return;
+  }
+  int size = sqlite3_value_bytes(argv[0]);
+  if (size <= 0) {
+    char result[] = {static_cast<char>(type)};
+    sqlite3_result_blob(ctx, result, sizeof(result), SQLITE_TRANSIENT);
+    return;
+  }
+  size_t output_size =
+      snappy::MaxCompressedLength(static_cast<size_t>(size)) + 1;
+  if (output_size >
+      sqlite3_limit(sqlite3_context_db_handle(ctx), SQLITE_LIMIT_LENGTH, -1)) {
+    sqlite3_result_error_toobig(ctx);
+    return;
+  }
+  char* output =
+      static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size)));
+  if (output == nullptr) {
+    sqlite3_result_error_nomem(ctx);
+    return;
+  }
+  *output++ = static_cast<char>(type), --output_size;
+  snappy::RawCompress(data, static_cast<size_t>(size), output, &output_size);
+  sqlite3_result_blob(ctx, output - 1, static_cast<int>(output_size + 1),
+                      sqlite3_free);
+}
+
+void unsnap(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
+  int type = sqlite3_value_type(argv[0]);
+  if (type == SQLITE_NULL) return;
+  if (type != SQLITE_BLOB) {
+    sqlite3_result_error(ctx, "unsnap() takes NULL|BLOB", -1);
+    sqlite3_result_error_code(ctx, SQLITE_MISMATCH);
+    return;
+  }
+  int size = sqlite3_value_bytes(argv[0]);
+  const char* blob = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0]));
+  if (size <= 0) {
+    sqlite3_result_zeroblob(ctx, 0);
+    return;
+  }
+  type = static_cast<int>(*blob++), --size;
+  if (type != SQLITE_BLOB && type != SQLITE_TEXT) {
+    sqlite3_result_error(ctx, "unsnap() first byte is invalid type", -1);
+    sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
+    return;
+  }
+  if (size == 0) {
+    if (type == SQLITE_TEXT) {
+      sqlite3_result_text(ctx, "", 0, SQLITE_STATIC);
+    } else {
+      sqlite3_result_zeroblob(ctx, 0);
+    }
+    return;
+  }
+  size_t output_size;
+  if (!snappy::GetUncompressedLength(blob, static_cast<size_t>(size),
+                                     &output_size)) {
+    sqlite3_result_error(ctx, "snappy parse error", -1);
+    sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
+    return;
+  }
+  if (output_size >
+      sqlite3_limit(sqlite3_context_db_handle(ctx), SQLITE_LIMIT_LENGTH, -1)) {
+    sqlite3_result_error_toobig(ctx);
+    return;
+  }
+  char* output =
+      static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size)));
+  if (output == nullptr) {
+    sqlite3_result_error_nomem(ctx);
+    return;
+  }
+  if (!snappy::RawUncompress(blob, static_cast<size_t>(size), output)) {
+    sqlite3_result_error(ctx, "snappy message corruption", -1);
+    sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
+    return;
+  }
+  if (type == SQLITE_TEXT) {
+    sqlite3_result_text(ctx, output, static_cast<int>(output_size),
+                        sqlite3_free);
+  } else {
+    sqlite3_result_blob(ctx, output, static_cast<int>(output_size),
+                        sqlite3_free);
+  }
+}
+
+int init(sqlite3* db, const char** pzErrMsg, const sqlite3_api_routines* pApi) {
+  SQLITE_EXTENSION_INIT2(pApi);
+  int rc;
+
+  rc = sqlite3_create_function_v2(
+      db,
+      "snap",                              // zFunctionName
+      1,                                   // nArg
+      SQLITE_UTF8 | SQLITE_DETERMINISTIC,  // eTextRep
+      nullptr,                             // pApp
+      snap,                                // xFunc
+      nullptr,                             // xStep
+      nullptr,                             // xFinal
+      nullptr                              // xDestroy
+  );
+  if (rc != SQLITE_OK) {
+    *pzErrMsg = "oh snap()";
+    return rc;
+  }
+
+  rc = sqlite3_create_function_v2(
+      db,
+      "unsnap",                            // zFunctionName
+      1,                                   // nArg
+      SQLITE_UTF8 | SQLITE_DETERMINISTIC,  // eTextRep
+      nullptr,                             // pApp
+      unsnap,                              // xFunc
+      nullptr,                             // xStep
+      nullptr,                             // xFinal
+      nullptr                              // xDestroy
+  );
+  if (rc != SQLITE_OK) {
+    *pzErrMsg = "oh unsnap()";
+    return rc;
+  }
+
+  return SQLITE_OK;
+}
+
+}  // namespace
+
+extern "C" {
+
+#if defined(TF_SQLITE3_AUTO_EXTENSION)
+extern int sqlite3_tbsnap_status = sqlite3_auto_extension(init);
+#else
+
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#define EXPORT __declspec(dllexport)
+#else
+#define EXPORT __attribute__((visibility("default")))
+#endif
+
+// SQLite deduces this function name from "libtbsnap.so".
+EXPORT extern int sqlite3_tbsnap_init(sqlite3* db, const char** pzErrMsg,
+                                      const sqlite3_api_routines* pApi) {
+  return init(db, pzErrMsg, pApi);
+}
+
+#endif
+
+}  // extern "C"
diff --git a/tensorflow/contrib/tensorboard/db/tbsnap_test.py b/tensorflow/contrib/tensorboard/db/tbsnap_test.py
new file mode 100644
index 0000000000..2d011959e0
--- /dev/null
+++ b/tensorflow/contrib/tensorboard/db/tbsnap_test.py
@@ -0,0 +1,70 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sqlite3
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import resource_loader
+from tensorflow.python.platform import test
+
+
+class SqliteSnappyTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    super(SqliteSnappyTest, self).setUp()
+    self.db = sqlite3.connect(':memory:')
+    self.db.enable_load_extension(True)
+    self.db.execute("select load_extension('%s')" %
+                    resource_loader.get_path_to_datafile('libtbsnap.so'))
+    self.db.enable_load_extension(False)
+
+  def testRoundTrip(self):
+    self.assertEqual('hello',
+                     self.db.execute('SELECT unsnap(snap(\'hello\'))')
+                     .fetchone()[0])
+    self.assertEqual(
+        'hello',
+        self.db.execute(
+            'SELECT CAST(unsnap(snap(CAST(\'hello\' AS BLOB))) AS TEXT)')
+        .fetchone()[0])
+    self.assertEqual(
+        'text',
+        self.db.execute('SELECT typeof(unsnap(snap(\'h\')))').fetchone()[0])
+    self.assertEqual(
+        'blob',
+        self.db.execute(
+            'SELECT typeof(unsnap(snap(CAST(\'h\' AS BLOB))))').fetchone()[0])
+
+  def testNull_passesThrough(self):
+    self.assertIsNone(
+        self.db.execute('SELECT unsnap(snap(NULL))').fetchone()[0])
+
+  def testEmpty_passesThrough(self):
+    self.assertEqual('',
+                     self.db.execute('SELECT unsnap(snap(\'\'))').fetchone()[0])
+    self.assertEqual(
+        'text',
+        self.db.execute('SELECT typeof(unsnap(snap(\'\')))').fetchone()[0])
+    self.assertEqual(
+        'blob',
+        self.db.execute(
+            'SELECT typeof(unsnap(snap(CAST(\'\' AS BLOB))))').fetchone()[0])
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/third_party/sqlite.BUILD b/third_party/sqlite.BUILD
index 9840d7b151..03a6a7a5da 100644
--- a/third_party/sqlite.BUILD
+++ b/third_party/sqlite.BUILD
@@ -9,7 +9,10 @@ licenses(["unencumbered"])  # Public Domain
 cc_library(
     name = "sqlite",
     srcs = ["sqlite3.c"],
-    hdrs = ["sqlite3.h"],
+    hdrs = [
+        "sqlite3.h",
+        "sqlite3ext.h",
+    ],
     includes = ["."],
     linkopts = ["-lm"],
     visibility = ["//visibility:public"],
-- 
GitLab


From c72fac87de3c3156ca7a3ba7ddb889d9515097b4 Mon Sep 17 00:00:00 2001
From: MathSquared <MathSquared@users.noreply.github.com>
Date: Thu, 7 Dec 2017 05:26:15 -0600
Subject: [PATCH 0742/1225] Add un-init placeholder warning to
 negative-dimension error

---
 tensorflow/core/framework/tensor_shape_test.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/framework/tensor_shape_test.cc b/tensorflow/core/framework/tensor_shape_test.cc
index 06c576c7d4..b40a30cb9b 100644
--- a/tensorflow/core/framework/tensor_shape_test.cc
+++ b/tensorflow/core/framework/tensor_shape_test.cc
@@ -359,7 +359,8 @@ Status TensorShapeOld::IsValidShape(const TensorShapeProto& proto) {
   for (const auto& d : proto.dim()) {
     if (d.size() < 0) {
       return errors::InvalidArgument("Shape ", DebugString(proto),
-                                     " has negative dimensions");
+                                     " has negative dimensions; ",
+                                     "perhaps an uninitialized placeholder?");
     }
     num_elements *= d.size();
     if (num_elements > kMaxElements) {
-- 
GitLab


From c390ae2623e314970ac0f8c3bf8eb4d5d3f30096 Mon Sep 17 00:00:00 2001
From: MathSquared <MathSquared@users.noreply.github.com>
Date: Thu, 7 Dec 2017 05:28:19 -0600
Subject: [PATCH 0743/1225] Change message a bit

---
 tensorflow/core/framework/tensor_shape_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/framework/tensor_shape_test.cc b/tensorflow/core/framework/tensor_shape_test.cc
index b40a30cb9b..d8a9c0bac5 100644
--- a/tensorflow/core/framework/tensor_shape_test.cc
+++ b/tensorflow/core/framework/tensor_shape_test.cc
@@ -360,7 +360,7 @@ Status TensorShapeOld::IsValidShape(const TensorShapeProto& proto) {
     if (d.size() < 0) {
       return errors::InvalidArgument("Shape ", DebugString(proto),
                                      " has negative dimensions; ",
-                                     "perhaps an uninitialized placeholder?");
+                                     "perhaps an un-fed placeholder?");
     }
     num_elements *= d.size();
     if (num_elements > kMaxElements) {
-- 
GitLab


From 1be692cf1161539fdfa77257cd969a549da8cc97 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Thu, 7 Dec 2017 10:11:45 -0500
Subject: [PATCH 0744/1225] Fix assert_called error on Python3

by replacing it with assertTrue(....called)
---
 tensorflow/python/estimator/training_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index d72b95dbdd..285671f99f 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -626,7 +626,7 @@ class _TrainingExecutorTrainingTest(object):
 
     self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
                                               mock_eval_spec))
-    mock_est.train.assert_called()
+    self.assertTrue(mock_est.train.called)
     mock_server.assert_not_called()
 
   def test_fail_with_empty_task_type(self):
@@ -836,7 +836,7 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     executor.run_master()
 
     mock_server.assert_not_called()
-    mock_est.train.assert_called()
+    self.assertTrue(mock_est.train.called)
 
   def test_fail_with_empty_task_type(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-- 
GitLab


From dac4cbd4602b742532102f8953641ff301363fbe Mon Sep 17 00:00:00 2001
From: Andrew Harp <andrewharp@users.noreply.github.com>
Date: Thu, 7 Dec 2017 12:31:45 -0500
Subject: [PATCH 0745/1225] Update CameraActivity.java

reformat and add comment
---
 .../android/src/org/tensorflow/demo/CameraActivity.java  | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
index bd45e44537..b4de279c00 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
@@ -333,9 +333,12 @@ public abstract class CameraActivity extends Activity
           continue;
         }
 
-        useCamera2API = facing == CameraCharacteristics.LENS_FACING_EXTERNAL || 
-            isHardwareLevelSupported(characteristics,
-            CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
+        // Fallback to camera1 API for internal cameras that don't have full support.
+        // This should help with legacy situations where using the camera2 API causes
+        // distorted or otherwise broken previews.
+        useCamera2API = facing == CameraCharacteristics.LENS_FACING_EXTERNAL
+            || isHardwareLevelSupported(characteristics, 
+                                        CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
         LOGGER.i("Camera API lv2?: %s", useCamera2API);
         return cameraId;
       }
-- 
GitLab


From 4bc510c17b23af1adac83b148c0c7dfd978a20be Mon Sep 17 00:00:00 2001
From: Andrew Harp <andrewharp@users.noreply.github.com>
Date: Thu, 7 Dec 2017 12:32:23 -0500
Subject: [PATCH 0746/1225] add paren for clarity

---
 .../android/src/org/tensorflow/demo/CameraActivity.java         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
index b4de279c00..8bd4abb154 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
@@ -336,7 +336,7 @@ public abstract class CameraActivity extends Activity
         // Fallback to camera1 API for internal cameras that don't have full support.
         // This should help with legacy situations where using the camera2 API causes
         // distorted or otherwise broken previews.
-        useCamera2API = facing == CameraCharacteristics.LENS_FACING_EXTERNAL
+        useCamera2API = (facing == CameraCharacteristics.LENS_FACING_EXTERNAL)
             || isHardwareLevelSupported(characteristics, 
                                         CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
         LOGGER.i("Camera API lv2?: %s", useCamera2API);
-- 
GitLab


From 0d160ab43fcaa8357ce9eff6795dc30a41100175 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Thu, 7 Dec 2017 09:49:28 -0800
Subject: [PATCH 0747/1225] Clear softmax_v2 warning for image_retraining and
 speech_commands tutorials.

`tf.nn.softmax_cross_entropy_with_logits` and `tf.losses.softmax_cross_entropy` both throw the warning.

Almost everywhere it's used can simply be replaced by `tf.losses.sparse_softmax_cross_entropy`

PiperOrigin-RevId: 178253804
---
 .../examples/image_retraining/retrain.py      | 26 ++++++-------------
 .../examples/image_retraining/retrain_test.py |  2 +-
 .../examples/speech_commands/input_data.py    |  7 +++--
 tensorflow/examples/speech_commands/train.py  | 12 ++++-----
 4 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py
index ebddfb20f4..ec22684eaf 100644
--- a/tensorflow/examples/image_retraining/retrain.py
+++ b/tensorflow/examples/image_retraining/retrain.py
@@ -539,10 +539,8 @@ def get_random_cached_bottlenecks(sess, image_lists, how_many, category,
           sess, image_lists, label_name, image_index, image_dir, category,
           bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
           resized_input_tensor, bottleneck_tensor, architecture)
-      ground_truth = np.zeros(class_count, dtype=np.float32)
-      ground_truth[label_index] = 1.0
       bottlenecks.append(bottleneck)
-      ground_truths.append(ground_truth)
+      ground_truths.append(label_index)
       filenames.append(image_name)
   else:
     # Retrieve all bottlenecks.
@@ -555,10 +553,8 @@ def get_random_cached_bottlenecks(sess, image_lists, how_many, category,
             sess, image_lists, label_name, image_index, image_dir, category,
             bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
             resized_input_tensor, bottleneck_tensor, architecture)
-        ground_truth = np.zeros(class_count, dtype=np.float32)
-        ground_truth[label_index] = 1.0
         bottlenecks.append(bottleneck)
-        ground_truths.append(ground_truth)
+        ground_truths.append(label_index)
         filenames.append(image_name)
   return bottlenecks, ground_truths, filenames
 
@@ -610,10 +606,8 @@ def get_random_distorted_bottlenecks(
     bottleneck_values = sess.run(bottleneck_tensor,
                                  {resized_input_tensor: distorted_image_data})
     bottleneck_values = np.squeeze(bottleneck_values)
-    ground_truth = np.zeros(class_count, dtype=np.float32)
-    ground_truth[label_index] = 1.0
     bottlenecks.append(bottleneck_values)
-    ground_truths.append(ground_truth)
+    ground_truths.append(label_index)
   return bottlenecks, ground_truths
 
 
@@ -774,9 +768,8 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor,
         shape=[None, bottleneck_tensor_size],
         name='BottleneckInputPlaceholder')
 
-    ground_truth_input = tf.placeholder(tf.float32,
-                                        [None, class_count],
-                                        name='GroundTruthInput')
+    ground_truth_input = tf.placeholder(
+        tf.int64, [None], name='GroundTruthInput')
 
   # Organizing the following ops as `final_training_ops` so they're easier
   # to see in TensorBoard
@@ -823,10 +816,8 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor,
   tf.summary.histogram('activations', final_tensor)
 
   with tf.name_scope('cross_entropy'):
-    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+    cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
         labels=ground_truth_input, logits=logits)
-    with tf.name_scope('total'):
-      cross_entropy_mean = tf.reduce_mean(cross_entropy)
 
   tf.summary.scalar('cross_entropy', cross_entropy_mean)
 
@@ -852,8 +843,7 @@ def add_evaluation_step(result_tensor, ground_truth_tensor):
   with tf.name_scope('accuracy'):
     with tf.name_scope('correct_prediction'):
       prediction = tf.argmax(result_tensor, 1)
-      correct_prediction = tf.equal(
-          prediction, tf.argmax(ground_truth_tensor, 1))
+      correct_prediction = tf.equal(prediction, ground_truth_tensor)
     with tf.name_scope('accuracy'):
       evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   tf.summary.scalar('accuracy', evaluation_step)
@@ -1178,7 +1168,7 @@ def main(_):
     if FLAGS.print_misclassified_test_images:
       tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===')
       for i, test_filename in enumerate(test_filenames):
-        if predictions[i] != test_ground_truth[i].argmax():
+        if predictions[i] != test_ground_truth[i]:
           tf.logging.info('%70s  %s' %
                           (test_filename,
                            list(image_lists.keys())[predictions[i]]))
diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py
index 2de4c4ec99..8b8dd45fd7 100644
--- a/tensorflow/examples/image_retraining/retrain_test.py
+++ b/tensorflow/examples/image_retraining/retrain_test.py
@@ -87,7 +87,7 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase):
   def testAddEvaluationStep(self):
     with tf.Graph().as_default():
       final = tf.placeholder(tf.float32, [1], name='final')
-      gt = tf.placeholder(tf.float32, [1], name='gt')
+      gt = tf.placeholder(tf.int64, [1], name='gt')
       self.assertIsNotNone(retrain.add_evaluation_step(final, gt))
 
   def testAddJpegDecoding(self):
diff --git a/tensorflow/examples/speech_commands/input_data.py b/tensorflow/examples/speech_commands/input_data.py
index 751652b330..e7db9cddf0 100644
--- a/tensorflow/examples/speech_commands/input_data.py
+++ b/tensorflow/examples/speech_commands/input_data.py
@@ -417,8 +417,7 @@ class AudioProcessor(object):
       sess: TensorFlow session that was active when processor was created.
 
     Returns:
-      List of sample data for the transformed samples, and list of labels in
-      one-hot form.
+      List of sample data for the transformed samples, and list of label indexes
     """
     # Pick one of the partitions to choose samples from.
     candidates = self.data_index[mode]
@@ -428,7 +427,7 @@ class AudioProcessor(object):
       sample_count = max(0, min(how_many, len(candidates) - offset))
     # Data and labels will be populated and returned.
     data = np.zeros((sample_count, model_settings['fingerprint_size']))
-    labels = np.zeros((sample_count, model_settings['label_count']))
+    labels = np.zeros(sample_count)
     desired_samples = model_settings['desired_samples']
     use_background = self.background_data and (mode == 'training')
     pick_deterministically = (mode != 'training')
@@ -483,7 +482,7 @@ class AudioProcessor(object):
       # Run the graph to produce the output audio.
       data[i - offset, :] = sess.run(self.mfcc_, feed_dict=input_dict).flatten()
       label_index = self.word_to_index[sample['label']]
-      labels[i - offset, label_index] = 1
+      labels[i - offset] = label_index
     return data, labels
 
   def get_unprocessed_data(self, how_many, model_settings, mode):
diff --git a/tensorflow/examples/speech_commands/train.py b/tensorflow/examples/speech_commands/train.py
index bec7dacd21..a4e80041f8 100644
--- a/tensorflow/examples/speech_commands/train.py
+++ b/tensorflow/examples/speech_commands/train.py
@@ -133,7 +133,7 @@ def main(_):
 
   # Define loss and optimizer
   ground_truth_input = tf.placeholder(
-      tf.float32, [None, label_count], name='groundtruth_input')
+      tf.int64, [None], name='groundtruth_input')
 
   # Optionally we can add runtime checks to spot when NaNs or other symptoms of
   # numerical errors start occurring during training.
@@ -144,9 +144,8 @@ def main(_):
 
   # Create the back propagation and training evaluation machinery in the graph.
   with tf.name_scope('cross_entropy'):
-    cross_entropy_mean = tf.reduce_mean(
-        tf.nn.softmax_cross_entropy_with_logits(
-            labels=ground_truth_input, logits=logits))
+    cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
+        labels=ground_truth_input, logits=logits)
   tf.summary.scalar('cross_entropy', cross_entropy_mean)
   with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
     learning_rate_input = tf.placeholder(
@@ -154,10 +153,9 @@ def main(_):
     train_step = tf.train.GradientDescentOptimizer(
         learning_rate_input).minimize(cross_entropy_mean)
   predicted_indices = tf.argmax(logits, 1)
-  expected_indices = tf.argmax(ground_truth_input, 1)
-  correct_prediction = tf.equal(predicted_indices, expected_indices)
+  correct_prediction = tf.equal(predicted_indices, ground_truth_input)
   confusion_matrix = tf.confusion_matrix(
-      expected_indices, predicted_indices, num_classes=label_count)
+      ground_truth_input, predicted_indices, num_classes=label_count)
   evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   tf.summary.scalar('accuracy', evaluation_step)
 
-- 
GitLab


From c3e667573fcf97a8eff7b2ed118b0d903508a4ac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 10:26:56 -0800
Subject: [PATCH 0748/1225] Automated g4 rollback of changelist 178215852

PiperOrigin-RevId: 178260470
---
 .../contrib/cmake/external/sqlite.cmake       |   1 -
 tensorflow/contrib/tensorboard/db/BUILD       |  19 --
 tensorflow/contrib/tensorboard/db/tbsnap.cc   | 214 ------------------
 .../contrib/tensorboard/db/tbsnap_test.py     |  70 ------
 third_party/sqlite.BUILD                      |   5 +-
 5 files changed, 1 insertion(+), 308 deletions(-)
 delete mode 100644 tensorflow/contrib/tensorboard/db/tbsnap.cc
 delete mode 100644 tensorflow/contrib/tensorboard/db/tbsnap_test.py

diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake
index 14d8148e6e..785039a469 100644
--- a/tensorflow/contrib/cmake/external/sqlite.cmake
+++ b/tensorflow/contrib/cmake/external/sqlite.cmake
@@ -28,7 +28,6 @@ endif()
 
 set(sqlite_HEADERS
     "${sqlite_BUILD}/sqlite3.h"
-    "${sqlite_BUILD}/sqlite3ext.h"
 )
 
 if (WIN32)
diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD
index 28b5eba2a4..9d3d60c24d 100644
--- a/tensorflow/contrib/tensorboard/db/BUILD
+++ b/tensorflow/contrib/tensorboard/db/BUILD
@@ -55,25 +55,6 @@ tf_cc_test(
     ],
 )
 
-cc_library(
-    name = "tbsnap",
-    srcs = ["tbsnap.cc"],
-    deps = [
-        "@snappy",
-        "@sqlite_archive//:sqlite",
-    ],
-)
-
-py_test(
-    name = "tbsnap_test",
-    size = "small",
-    srcs = ["tbsnap_test.py"],
-    data = ["libtbsnap.so"],
-    srcs_version = "PY2AND3",
-    tags = ["nomac"],
-    deps = ["//tensorflow/python:client_testlib"],
-)
-
 filegroup(
     name = "all_files",
     srcs = glob(["*"]),
diff --git a/tensorflow/contrib/tensorboard/db/tbsnap.cc b/tensorflow/contrib/tensorboard/db/tbsnap.cc
deleted file mode 100644
index 588ea9af89..0000000000
--- a/tensorflow/contrib/tensorboard/db/tbsnap.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-/// \brief SQLite extension for Snappy compression
-///
-/// Snappy a compression library that trades ratio for speed, almost going a
-/// tenth as fast as memcpy().
-///
-/// This extension adds the following native functions:
-///
-/// - snap(value: NULL|BLOB|TEXT) -> NULL|BLOB
-///
-///   Applies Snappy compression. If value is NULL, then NULL is returned. If
-///   value is TEXT and BLOB, then it is compressed and the result is a BLOB.
-///   An uncompressed byte is prepended to indicate the original type.
-///
-/// - unsnap(value: NULL|BLOB) -> NULL|TEXT|BLOB
-///
-///   Decompresses value created by snap(). If value is NULL, then NULL is
-///   returned. If value is empty, then an empty blob is returned. Otherwise
-///   the original type is restored from the first byte and the remaining ones
-///   are decompressed.
-///
-/// These functions are deterministic so they can be used for all purposes,
-/// including INDEX. Please note that SQLite currently does not currently
-/// perform common sub-expression optimization for pure functions when
-/// compiling queries.
-///
-/// If your SQLite environment isn't universally UTF8, please file an issue
-/// with the TensorBoard team letting us know. While this implementation should
-/// work, its performance could be improved to avoid superfluous TEXT coding.
-
-#include "sqlite3ext.h"
-#include "snappy.h"
-
-namespace {
-SQLITE_EXTENSION_INIT1
-
-void snap(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
-  const char* data;
-  int type = sqlite3_value_type(argv[0]);
-  switch (type) {
-    case SQLITE_NULL:
-      return;
-    case SQLITE_BLOB:
-      data = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0]));
-      break;
-    case SQLITE_TEXT:
-      data = reinterpret_cast<const char*>(sqlite3_value_text(argv[0]));
-      break;
-    default:
-      sqlite3_result_error(ctx, "snap() takes NULL|BLOB|TEXT", -1);
-      sqlite3_result_error_code(ctx, SQLITE_MISMATCH);
-      return;
-  }
-  int size = sqlite3_value_bytes(argv[0]);
-  if (size <= 0) {
-    char result[] = {static_cast<char>(type)};
-    sqlite3_result_blob(ctx, result, sizeof(result), SQLITE_TRANSIENT);
-    return;
-  }
-  size_t output_size =
-      snappy::MaxCompressedLength(static_cast<size_t>(size)) + 1;
-  if (output_size >
-      sqlite3_limit(sqlite3_context_db_handle(ctx), SQLITE_LIMIT_LENGTH, -1)) {
-    sqlite3_result_error_toobig(ctx);
-    return;
-  }
-  char* output =
-      static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size)));
-  if (output == nullptr) {
-    sqlite3_result_error_nomem(ctx);
-    return;
-  }
-  *output++ = static_cast<char>(type), --output_size;
-  snappy::RawCompress(data, static_cast<size_t>(size), output, &output_size);
-  sqlite3_result_blob(ctx, output - 1, static_cast<int>(output_size + 1),
-                      sqlite3_free);
-}
-
-void unsnap(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
-  int type = sqlite3_value_type(argv[0]);
-  if (type == SQLITE_NULL) return;
-  if (type != SQLITE_BLOB) {
-    sqlite3_result_error(ctx, "unsnap() takes NULL|BLOB", -1);
-    sqlite3_result_error_code(ctx, SQLITE_MISMATCH);
-    return;
-  }
-  int size = sqlite3_value_bytes(argv[0]);
-  const char* blob = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0]));
-  if (size <= 0) {
-    sqlite3_result_zeroblob(ctx, 0);
-    return;
-  }
-  type = static_cast<int>(*blob++), --size;
-  if (type != SQLITE_BLOB && type != SQLITE_TEXT) {
-    sqlite3_result_error(ctx, "unsnap() first byte is invalid type", -1);
-    sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
-    return;
-  }
-  if (size == 0) {
-    if (type == SQLITE_TEXT) {
-      sqlite3_result_text(ctx, "", 0, SQLITE_STATIC);
-    } else {
-      sqlite3_result_zeroblob(ctx, 0);
-    }
-    return;
-  }
-  size_t output_size;
-  if (!snappy::GetUncompressedLength(blob, static_cast<size_t>(size),
-                                     &output_size)) {
-    sqlite3_result_error(ctx, "snappy parse error", -1);
-    sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
-    return;
-  }
-  if (output_size >
-      sqlite3_limit(sqlite3_context_db_handle(ctx), SQLITE_LIMIT_LENGTH, -1)) {
-    sqlite3_result_error_toobig(ctx);
-    return;
-  }
-  char* output =
-      static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size)));
-  if (output == nullptr) {
-    sqlite3_result_error_nomem(ctx);
-    return;
-  }
-  if (!snappy::RawUncompress(blob, static_cast<size_t>(size), output)) {
-    sqlite3_result_error(ctx, "snappy message corruption", -1);
-    sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
-    return;
-  }
-  if (type == SQLITE_TEXT) {
-    sqlite3_result_text(ctx, output, static_cast<int>(output_size),
-                        sqlite3_free);
-  } else {
-    sqlite3_result_blob(ctx, output, static_cast<int>(output_size),
-                        sqlite3_free);
-  }
-}
-
-int init(sqlite3* db, const char** pzErrMsg, const sqlite3_api_routines* pApi) {
-  SQLITE_EXTENSION_INIT2(pApi);
-  int rc;
-
-  rc = sqlite3_create_function_v2(
-      db,
-      "snap",                              // zFunctionName
-      1,                                   // nArg
-      SQLITE_UTF8 | SQLITE_DETERMINISTIC,  // eTextRep
-      nullptr,                             // pApp
-      snap,                                // xFunc
-      nullptr,                             // xStep
-      nullptr,                             // xFinal
-      nullptr                              // xDestroy
-  );
-  if (rc != SQLITE_OK) {
-    *pzErrMsg = "oh snap()";
-    return rc;
-  }
-
-  rc = sqlite3_create_function_v2(
-      db,
-      "unsnap",                            // zFunctionName
-      1,                                   // nArg
-      SQLITE_UTF8 | SQLITE_DETERMINISTIC,  // eTextRep
-      nullptr,                             // pApp
-      unsnap,                              // xFunc
-      nullptr,                             // xStep
-      nullptr,                             // xFinal
-      nullptr                              // xDestroy
-  );
-  if (rc != SQLITE_OK) {
-    *pzErrMsg = "oh unsnap()";
-    return rc;
-  }
-
-  return SQLITE_OK;
-}
-
-}  // namespace
-
-extern "C" {
-
-#if defined(TF_SQLITE3_AUTO_EXTENSION)
-extern int sqlite3_tbsnap_status = sqlite3_auto_extension(init);
-#else
-
-#if defined(_MSC_VER) || defined(__MINGW32__)
-#define EXPORT __declspec(dllexport)
-#else
-#define EXPORT __attribute__((visibility("default")))
-#endif
-
-// SQLite deduces this function name from "libtbsnap.so".
-EXPORT extern int sqlite3_tbsnap_init(sqlite3* db, const char** pzErrMsg,
-                                      const sqlite3_api_routines* pApi) {
-  return init(db, pzErrMsg, pApi);
-}
-
-#endif
-
-}  // extern "C"
diff --git a/tensorflow/contrib/tensorboard/db/tbsnap_test.py b/tensorflow/contrib/tensorboard/db/tbsnap_test.py
deleted file mode 100644
index 2d011959e0..0000000000
--- a/tensorflow/contrib/tensorboard/db/tbsnap_test.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sqlite3
-
-from tensorflow.python.framework import test_util
-from tensorflow.python.platform import resource_loader
-from tensorflow.python.platform import test
-
-
-class SqliteSnappyTest(test_util.TensorFlowTestCase):
-
-  def setUp(self):
-    super(SqliteSnappyTest, self).setUp()
-    self.db = sqlite3.connect(':memory:')
-    self.db.enable_load_extension(True)
-    self.db.execute("select load_extension('%s')" %
-                    resource_loader.get_path_to_datafile('libtbsnap.so'))
-    self.db.enable_load_extension(False)
-
-  def testRoundTrip(self):
-    self.assertEqual('hello',
-                     self.db.execute('SELECT unsnap(snap(\'hello\'))')
-                     .fetchone()[0])
-    self.assertEqual(
-        'hello',
-        self.db.execute(
-            'SELECT CAST(unsnap(snap(CAST(\'hello\' AS BLOB))) AS TEXT)')
-        .fetchone()[0])
-    self.assertEqual(
-        'text',
-        self.db.execute('SELECT typeof(unsnap(snap(\'h\')))').fetchone()[0])
-    self.assertEqual(
-        'blob',
-        self.db.execute(
-            'SELECT typeof(unsnap(snap(CAST(\'h\' AS BLOB))))').fetchone()[0])
-
-  def testNull_passesThrough(self):
-    self.assertIsNone(
-        self.db.execute('SELECT unsnap(snap(NULL))').fetchone()[0])
-
-  def testEmpty_passesThrough(self):
-    self.assertEqual('',
-                     self.db.execute('SELECT unsnap(snap(\'\'))').fetchone()[0])
-    self.assertEqual(
-        'text',
-        self.db.execute('SELECT typeof(unsnap(snap(\'\')))').fetchone()[0])
-    self.assertEqual(
-        'blob',
-        self.db.execute(
-            'SELECT typeof(unsnap(snap(CAST(\'\' AS BLOB))))').fetchone()[0])
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/third_party/sqlite.BUILD b/third_party/sqlite.BUILD
index 03a6a7a5da..9840d7b151 100644
--- a/third_party/sqlite.BUILD
+++ b/third_party/sqlite.BUILD
@@ -9,10 +9,7 @@ licenses(["unencumbered"])  # Public Domain
 cc_library(
     name = "sqlite",
     srcs = ["sqlite3.c"],
-    hdrs = [
-        "sqlite3.h",
-        "sqlite3ext.h",
-    ],
+    hdrs = ["sqlite3.h"],
     includes = ["."],
     linkopts = ["-lm"],
     visibility = ["//visibility:public"],
-- 
GitLab


From e2cd7050ceef06d975f82288ace59fd1f6e65106 Mon Sep 17 00:00:00 2001
From: Vinu Rajashekhar <vinuraja@google.com>
Date: Thu, 7 Dec 2017 10:29:25 -0800
Subject: [PATCH 0749/1225] Adds a 'guaranteed_constants' list of tensors to
 TPUReplicate Op.

PiperOrigin-RevId: 178260923
---
 tensorflow/contrib/tpu/ops/replication_ops.cc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/contrib/tpu/ops/replication_ops.cc b/tensorflow/contrib/tpu/ops/replication_ops.cc
index 36e865bf3c..cba71c6b98 100644
--- a/tensorflow/contrib/tpu/ops/replication_ops.cc
+++ b/tensorflow/contrib/tpu/ops/replication_ops.cc
@@ -72,10 +72,12 @@ REGISTER_OP("TPUReplicate")
     .Attr("Tinputs: list(type) >= 0")
     .Attr("Tbroadcast_inputs: list(type) >= 0")
     .Attr("NumVariables: int >= 0")
+    .Attr("Tguaranteed_constants: list(type) >= 0")
     .Attr("output_types: list(type) >= 0")
     .Input("inputs: Tinputs")
     .Input("broadcast_inputs: Tbroadcast_inputs")
     .Input("variables: NumVariables * resource")
+    .Input("guaranteed_constants: Tguaranteed_constants")
     .Output("outputs: output_types")
     .SetShapeFn(shape_inference::UnknownShape)
     .Doc(R"doc(
@@ -95,9 +97,13 @@ Tinputs: the types of the arguments to 'computation'.
 inputs: the inputs to 'computation', flattened, in replica-major order.
 Tbroadcast_inputs: the types of the additional arguments to broadcast to all
   replicas.
+Tguaranteed_constants: the types of the arguments to 'guaranteed_constants'.
 broadcast_inputs: additional arguments to broadcast to all replicas. The
   broadcast inputs are appended to the per-replica inputs when calling
   computation.
+guaranteed_constants: arguments which have been guaranteed to not
+change their values during the session lifetime. These contain tensors marked as
+constant using the GuaranteeConstOp.
 output_types: the types of the outputs of 'computation'.
 outputs: the outputs of 'computation'.
 )doc");
-- 
GitLab


From 2cfb088cf72b52c74a742d780cc5c4f93a74640e Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Thu, 7 Dec 2017 11:03:53 -0800
Subject: [PATCH 0750/1225] Fix ios_makefile.inc.

It seems broken when merging change in
b2db981a6731e978453862a73dab892bc674db68.
---
 tensorflow/contrib/lite/ios_makefile.inc | 78 ++++++++++++++----------
 1 file changed, 47 insertions(+), 31 deletions(-)

diff --git a/tensorflow/contrib/lite/ios_makefile.inc b/tensorflow/contrib/lite/ios_makefile.inc
index 345ed26212..bcff7ed988 100644
--- a/tensorflow/contrib/lite/ios_makefile.inc
+++ b/tensorflow/contrib/lite/ios_makefile.inc
@@ -1,31 +1,47 @@
-#Settings for iOS.
-ifeq($(TARGET), IOS) BUILD_FOR_IOS_SIMULATOR
-    : = false ifeq($(IOS_ARCH), x86_64) BUILD_FOR_IOS_SIMULATOR
-    : = true endif ifeq($(IOS_ARCH), i386) BUILD_FOR_IOS_SIMULATOR
-    : = true endif ifeq($(BUILD_FOR_IOS_SIMULATOR), true) IPHONEOS_PLATFORM
-    : = $(shell xcrun-- sdk iphonesimulator-- show - sdk - platform -
-          path) IPHONEOS_SYSROOT
-    : = $(shell xcrun-- sdk iphonesimulator-- show - sdk -
-          path) else IPHONEOS_PLATFORM
-    : = $(shell xcrun-- sdk iphoneos-- show - sdk - platform -
-          path) IPHONEOS_SYSROOT
-    : = $(shell xcrun-- sdk iphoneos-- show - sdk - path) endif IOS_SDK_VERSION
-    : = $(shell xcrun-- sdk iphoneos-- show - sdk - version) MIN_SDK_VERSION
-    : = 9.0
-#Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64.
-      IOS_ARCH
-    : = x86_64 CXXFLAGS
-      += -miphoneos - version
-         - min = $(MIN_SDK_VERSION) - DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
-                 - fembed - bitcode - Wno - c++ 11 - narrowing - mno - thumb
-                 - fno - exceptions
-                 - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) - O3 CCFLAGS
-      += -miphoneos - version
-         - min = $(MIN_SDK_VERSION) - fembed - bitcode - mno - thumb
-                 - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) -
-                 O3 LDFLAGS
-    : = -fembed - bitcode - miphoneos - version
-        - min = ${MIN_SDK_VERSION} - arch $(IOS_ARCH) OBJDIR
-    : = $(OBJDIR) ios_$(IOS_ARCH) / LIBDIR
-    : = $(LIBDIR) ios_$(IOS_ARCH) / BINDIR
-    : = $(BINDIR) ios_$(IOS_ARCH) / DEPDIR : = $(DEPDIR) ios_$(IOS_ARCH) / endif
+# Settings for iOS.
+ifeq ($(TARGET), IOS)
+        BUILD_FOR_IOS_SIMULATOR := false
+	ifeq ($(IOS_ARCH), x86_64)
+	     	BUILD_FOR_IOS_SIMULATOR := true
+	endif
+	ifeq ($(IOS_ARCH), i386)
+	     	BUILD_FOR_IOS_SIMULATOR := true
+	endif
+	ifeq ($(BUILD_FOR_IOS_SIMULATOR), true)
+		IPHONEOS_PLATFORM := $(shell xcrun --sdk iphonesimulator \
+			--show-sdk-platform-path)
+		IPHONEOS_SYSROOT := $(shell xcrun --sdk iphonesimulator \
+			--show-sdk-path)
+	else
+		IPHONEOS_PLATFORM := $(shell xcrun --sdk iphoneos --show-sdk-platform-path)
+		IPHONEOS_SYSROOT := $(shell xcrun --sdk iphoneos --show-sdk-path)
+	endif
+	IOS_SDK_VERSION := $(shell xcrun --sdk iphoneos --show-sdk-version)
+	MIN_SDK_VERSION := 9.0
+	# Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64.
+	IOS_ARCH := x86_64
+	CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
+		-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \
+		-fembed-bitcode \
+		-Wno-c++11-narrowing \
+		-mno-thumb \
+		-fno-exceptions \
+		-isysroot \
+		${IPHONEOS_SYSROOT} \
+		-arch $(IOS_ARCH) \
+		-O3
+	CCFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
+		-fembed-bitcode \
+		-mno-thumb \
+		-isysroot \
+		${IPHONEOS_SYSROOT} \
+		-arch $(IOS_ARCH) \
+		-O3
+	LDFLAGS := -fembed-bitcode \
+		-miphoneos-version-min=${MIN_SDK_VERSION} \
+		-arch $(IOS_ARCH)
+	OBJDIR := $(OBJDIR)ios_$(IOS_ARCH)/
+	LIBDIR := $(LIBDIR)ios_$(IOS_ARCH)/
+	BINDIR := $(BINDIR)ios_$(IOS_ARCH)/
+	DEPDIR := $(DEPDIR)ios_$(IOS_ARCH)/
+endif
-- 
GitLab


From e81b739873577dcb828dcb79cc1708eb4b8ae91c Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 7 Dec 2017 11:15:32 -0800
Subject: [PATCH 0751/1225] Enable more import_graph_def tests with C API.

These tests don't require any functional changes to run (i.e. they run as-is
or produce diferent error messages than the Python code).

PiperOrigin-RevId: 178268455
---
 tensorflow/python/framework/importer_test.py | 165 +++++++++++--------
 1 file changed, 94 insertions(+), 71 deletions(-)

diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index b5cc24ff33..ee3cfbbd05 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -356,22 +356,23 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(b.inputs[0], a.outputs[0])
 
   def testTypeMismatchInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      # TODO(skyewm): improve error message
+      error_msg = ("Input 0 of node import/B was passed int32 from import/A:0 "
+                   "incompatible with expected float.")
+    else:
+      error_msg = ("Cannot convert a tensor of type int32 to an input of type "
+                   "float")
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             node { name: 'B' op: 'FloatInput' input: 'A:0' }
             """))
-      self.assertTrue(
-          "Cannot convert a tensor of type int32 to an input of type float" in
-          str(e.exception))
 
   def testShapeWhitelist(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     # Barrier's shape is an output vector of 2, but the
     # graph says it's a scalar.  This is currently whitelisted.
     with ops.Graph().as_default():
@@ -379,14 +380,14 @@ class ImportGraphDefTest(test.TestCase):
           self._MakeGraphDef("""
           node { name: 'A' op: 'Barrier'
                  attr { key: '_output_shapes'
-                        value { list { shape { } } } } }
+                        value { list { shape { } } } }
+                 attr { key: 'component_types'
+                        value { list { type: DT_FLOAT } } } }
           """),
           return_elements=["A"],
           name="import")
 
   def testShapeWhitelistViolation(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     # L2 loss produces a scalar shape, but the graph
     # has the wrong shape, so raise an error.
     with ops.Graph().as_default():
@@ -406,41 +407,49 @@ class ImportGraphDefTest(test.TestCase):
             "Shapes () and (43,) are not compatible" in str(e.exception))
 
   def testInvalidSignatureTooManyInputsInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      # TODO(skyewm): improve error message
+      error_msg = "NodeDef expected inputs '' do not match 1 inputs specified"
+    else:
+      error_msg = r"More inputs specified \('A:0'\) than the op expects"
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             node { name: 'B' op: 'None' input: 'A:0' }
             """))
-      self.assertTrue("More inputs specified ('A:0') than the op expects" in
-                      str(e.exception))
 
   def testInvalidSignatureNotEnoughInputsInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      # TODO(skyewm): improve error message
+      error_msg = ("NodeDef expected inputs 'int32, float' do not match 1 "
+                   "inputs specified")
+    else:
+      error_msg = (r"Input types mismatch \(expected 'int32, float32' but "
+                   r"got 'int32'\)")
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             node { name: 'B' op: 'IntInputFloatInput' input: 'A:0' }
             """))
-      self.assertTrue("Input types mismatch (expected 'int32, float32' but "
-                      "got 'int32')" in str(e.exception))
 
   def testMissingInputOpInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = "Node 'B': Unknown input node 'A:0'"
+    else:
+      error_msg = "Input tensor 'A:0' not found"
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'B' op: 'FloatInput' input: 'A:0' }
             """))
-      self.assertTrue("Input tensor 'A:0' not found" in str(e.exception))
 
   def testMissingInputOpInGraphDefButAppearsInInputMap(self):
     with ops.Graph().as_default():
@@ -454,95 +463,111 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(b.inputs[0], feed_a_0)
 
   def testMissingInputTensorInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = ("Node 'B': Connecting to invalid output 1 of source node A "
+                   "which has 1 outputs")
+    else:
+      error_msg = "Input tensor 'A:1' not found"
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'FloatOutput' }
             node { name: 'B' op: 'FloatInput' input: 'A:1' }
             """))
-      self.assertTrue("Input tensor 'A:1' not found" in str(e.exception))
 
   def testMissingControlInputInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = r"Node 'B': Unknown input node '\^A'"
+    else:
+      error_msg = r"Control input '\^A' not found"
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'B' op: 'None' input: '^A' }
             """))
-      self.assertTrue("Control input '^A' not found" in str(e.exception))
 
   def testInvalidTensorNameOutputIndexInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = "Node 'B': Unknown input node 'A:B'"
+    else:
+      error_msg = "Cannot convert 'A:B' to a tensor name."
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'B' op: 'None' input: 'A:B' }
             """))
-      self.assertEqual("Cannot convert 'A:B' to a tensor name.",
-                       str(e.exception))
 
   def testInvalidTensorNameInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = "Node 'B': Unknown input node 'A:B:0'"
+    else:
+      error_msg = "Cannot convert 'A:B:0' to a tensor name."
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'B' op: 'None' input: 'A:B:0' }
             """))
-      self.assertEqual("Cannot convert 'A:B:0' to a tensor name.",
-                       str(e.exception))
 
   def testMissingReturnOperation(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = "Requested return node 'B' not found in graph def"
+    else:
+      error_msg = "return_element 'B' not found in graph_def."
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'None' }
             """),
             return_elements=["B"])
-      self.assertTrue(
-          "return_element 'B' not found in graph_def." in str(e.exception))
 
   def testMissingReturnTensor(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = (r"Invalid return output 1 of node 'A', which has 1 "
+                   r"output\(s\)")
+    else:
+      error_msg = "return_element 'A:1' not found in graph_def."
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             """),
             return_elements=["A:1"])
-      self.assertTrue(
-          "return_element 'A:1' not found in graph_def." in str(e.exception))
 
-      with self.assertRaises(ValueError) as e:
+      if ops._USE_C_API:
+        error_msg = "Requested return tensor 'B:0' not found in graph def"
+      else:
+        error_msg = "return_element 'B:0' not found in graph_def."
+
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             """),
             return_elements=["B:0"])
-      self.assertTrue(
-          "return_element 'B:0' not found in graph_def." in str(e.exception))
 
-      with self.assertRaises(ValueError) as e:
+      if ops._USE_C_API:
+        error_msg = "Cannot convert 'A:B:0' to a tensor name."
+      else:
+        error_msg = "return_element 'A:B:0' not found in graph_def."
+
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             """),
             return_elements=["A:B:0"])
-      self.assertTrue(
-          "return_element 'A:B:0' not found in graph_def." in str(e.exception))
 
   def testMissingInputMap(self):
     if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
@@ -818,10 +843,9 @@ class ImportGraphDefTest(test.TestCase):
 
   def testInvalidInputForReturnOperations(self):
     with ops.Graph().as_default():
-      with self.assertRaises(TypeError) as e:
+      with self.assertRaisesRegexp(
+          TypeError, "return_elements must be a list of strings."):
         importer.import_graph_def(self._MakeGraphDef(""), return_elements=[7])
-      self.assertEqual("return_elements must be a list of strings.",
-                       str(e.exception))
 
       if ops._USE_C_API:
         error_msg = "Cannot convert 'a:b:c' to a tensor name."
@@ -832,17 +856,19 @@ class ImportGraphDefTest(test.TestCase):
                                   return_elements=["a:b:c"])
 
   def testDuplicateOperationNames(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = "Node 'A' is not unique"
+    else:
+      error_msg = "Duplicate name 'A' in GraphDef."
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             node { name: 'B' op: 'IntOutput' }
             node { name: 'A' op: 'IntOutput' }
             """))
-      self.assertEqual("Duplicate name 'A' in GraphDef.", str(e.exception))
 
   def testWithExtensionAndAttr(self):
     with ops.Graph().as_default() as g:
@@ -855,8 +881,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertAllEqual(pack.outputs[0].eval(), [5.0, 5.0])
 
   def testWithDevice(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default() as g:
       # No device.
       a = constant_op.constant(3.0, name="a")
@@ -900,8 +924,6 @@ class ImportGraphDefTest(test.TestCase):
         self.assertEqual(c.device + "/device:GPU:0", c5.device)
 
   def testWithDeviceFunctionDependingOnInputs(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default() as g:
       with ops.device("/job:ps"):
         v1 = constant_op.constant(1.0)
@@ -927,8 +949,6 @@ class ImportGraphDefTest(test.TestCase):
     self.assertEqual(2, len(ops_with_two_inputs))
 
   def testGradient(self):
-    if ops._USE_C_API: return  # TODO(skyewm): get_shape() doesn't work
-
     with ops.Graph().as_default() as g:
       inputs = array_ops.placeholder(
           dtypes.float32, shape=[None, 100], name="input")
@@ -1006,23 +1026,26 @@ class ImportGraphDefTest(test.TestCase):
             sess.run(x)
 
   def testVersionHigh(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default() as g:
       pat = (r"GraphDef min consumer version %d above current version %d "
              r"for TensorFlow \S+\.  Please upgrade TensorFlow\.$" %
              (1 << 30, versions.GRAPH_DEF_VERSION))
-      importer.import_graph_def(self._MakeGraphDef("", min_consumer=1 << 30))
-      x = constant_op.constant(
-          7)  # Need at least one op to get a C++ graph generated
-      with self.test_session(graph=g) as sess:
-        with self.assertRaisesRegexp(Exception, pat):
-          sess.run(x)
+
+      if ops._USE_C_API:
+        with self.assertRaisesRegexp(ValueError, pat):
+          importer.import_graph_def(self._MakeGraphDef("",
+                                                       min_consumer=1 << 30))
+      else:
+        # Python API only throws when graph is run
+        importer.import_graph_def(self._MakeGraphDef("", min_consumer=1 << 30))
+        x = constant_op.constant(
+            7)  # Need at least one op to get a C++ graph generated
+        with self.test_session(graph=g) as sess:
+          with self.assertRaisesRegexp(Exception, pat):
+            sess.run(x)
 
   def testVersionAppliesToOpConstruction(self):
     """These tests rely on shape fns in test_ops.cc."""
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       importer.import_graph_def(
           self._MakeGraphDef(
-- 
GitLab


From b5c8cd65feb2614e739a83136e3d333b51a6c2f8 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 7 Dec 2017 11:18:07 -0800
Subject: [PATCH 0752/1225] Fix control flow test to not use session after it's
 gone out of scope.

This somehow works currently, but breaks with the C API enabled.

PiperOrigin-RevId: 178268847
---
 .../python/kernel_tests/control_flow_ops_py_test.py    | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index ad02a9e58c..20eb923e72 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -1450,7 +1450,8 @@ class ControlFlowTest(test.TestCase):
     gpu_dev_name = test.gpu_device_name() if test.is_gpu_available(
     ) else "/device:GPU:0"
 
-    with self.test_session(graph=ops.Graph()) as sess:
+    graph = ops.Graph()
+    with graph.as_default():
       v = constant_op.constant(2.0, name="v")
       c = lambda v: math_ops.less(v, 100.0)
 
@@ -1461,7 +1462,8 @@ class ControlFlowTest(test.TestCase):
       loop = control_flow_ops.while_loop(c, b, [v], parallel_iterations=1)
       r = gradients_impl.gradients(
           loop, v, colocate_gradients_with_ops=colocate)[0]
-    r_ops = r.graph.get_operations()
+
+    r_ops = graph.get_operations()
     r_devices = [(op.name, op.device) for op in r_ops]
 
     self.assertTrue(any("Square" in op.name for op in r_ops))
@@ -1475,7 +1477,9 @@ class ControlFlowTest(test.TestCase):
         self.assertTrue(gpu_dev_name in dev)
       else:
         self.assertFalse(gpu_dev_name in dev)
-    self.assertAllClose(1024.0, sess.run(r))
+
+    with self.test_session(graph=graph) as sess:
+      self.assertAllClose(1024.0, sess.run(r))
 
   def testWhileGrad_ColocateGradients(self):
     self._testWhileGrad_ColocateGradients(colocate=False)
-- 
GitLab


From 1e54177c916d97c34faa1a349b9898186f8b6325 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 11:28:44 -0800
Subject: [PATCH 0753/1225] Increase test size of
 tensorflow/python/kernel_tests:dynamic_partition_op_test

PiperOrigin-RevId: 178270549
---
 tensorflow/python/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index b4c202ea39..a1fd062540 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1384,7 +1384,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "dynamic_partition_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["dynamic_partition_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
-- 
GitLab


From 1fe793d36a2907ab063bc508fab264cf9e2c46db Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 11:29:51 -0800
Subject: [PATCH 0754/1225] Adds support for loading model directly from a
 Flatbuffer object.

PiperOrigin-RevId: 178270704
---
 tensorflow/contrib/lite/model.cc      | 15 ++++++++++++++
 tensorflow/contrib/lite/model.h       | 29 ++++++++++++++++++---------
 tensorflow/contrib/lite/model_test.cc | 22 ++++++++++++++++++++
 3 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index e2f3560e61..54efad94af 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -60,6 +60,14 @@ std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromBuffer(
   return model;
 }
 
+std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromModel(
+    const tflite::Model* model_spec, ErrorReporter* error_reporter) {
+  std::unique_ptr<FlatBufferModel> model;
+  model.reset(new FlatBufferModel(model_spec, error_reporter));
+  if (!model->initialized()) model.reset();
+  return model;
+}
+
 FlatBufferModel::FlatBufferModel(const char* filename, bool mmap_file,
                                  ErrorReporter* error_reporter, bool use_nnapi)
     : error_reporter_(error_reporter ? error_reporter
@@ -99,6 +107,13 @@ FlatBufferModel::FlatBufferModel(const char* ptr, size_t num_bytes,
   model_ = VerifyAndGetModel(allocation_->base(), allocation_->bytes());
 }
 
+FlatBufferModel::FlatBufferModel(const Model* model,
+                                 ErrorReporter* error_reporter)
+    : error_reporter_(error_reporter ? error_reporter
+                                     : DefaultErrorReporter()) {
+  model_ = model;
+}
+
 FlatBufferModel::~FlatBufferModel() { delete allocation_; }
 
 InterpreterBuilder::InterpreterBuilder(const FlatBufferModel& model,
diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h
index 15659d33f3..e0c96f7f04 100644
--- a/tensorflow/contrib/lite/model.h
+++ b/tensorflow/contrib/lite/model.h
@@ -45,18 +45,25 @@ namespace tflite {
 // or mmapped. This uses flatbuffers as the serialization format.
 class FlatBufferModel {
  public:
-  // Build a model based on a file. Return a nullptr in case of failure.
+  // Builds a model based on a file. Returns a nullptr in case of failure.
   static std::unique_ptr<FlatBufferModel> BuildFromFile(
       const char* filename,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
-  // Build a model based on a pre-loaded flatbuffer. The caller retains
+  // Builds a model based on a pre-loaded flatbuffer. The caller retains
   // ownership of the buffer and should keep it alive until the returned object
-  // is destroyed. Return a nullptr in case of failure.
+  // is destroyed. Returns a nullptr in case of failure.
   static std::unique_ptr<FlatBufferModel> BuildFromBuffer(
       const char* buffer, size_t buffer_size,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
+  // Builds a model directly from a flatbuffer pointer. The caller retains
+  // ownership of the buffer and should keep it alive until the returned object
+  // is destroyed. Returns a nullptr in case of failure.
+  static std::unique_ptr<FlatBufferModel> BuildFromModel(
+      const tflite::Model* model_spec,
+      ErrorReporter* error_reporter = DefaultErrorReporter());
+
   // Releases memory or unmaps mmaped meory.
   ~FlatBufferModel();
 
@@ -75,7 +82,7 @@ class FlatBufferModel {
   bool CheckModelIdentifier() const;
 
  private:
-  // Load a model from `filename`. If `mmap_file` is true then use mmap,
+  // Loads a model from `filename`. If `mmap_file` is true then use mmap,
   // otherwise make a copy of the model in a buffer.
   //
   // Note, if `error_reporter` is null, then a DefaultErrorReporter() will be
@@ -85,8 +92,8 @@ class FlatBufferModel {
       ErrorReporter* error_reporter = DefaultErrorReporter(),
       bool use_nnapi = false);
 
-  // Load a model from `ptr` and `num_bytes` of the model file. The `ptr` has to
-  // remain alive and unchanged until the end of this flatbuffermodel's
+  // Loads a model from `ptr` and `num_bytes` of the model file. The `ptr` has
+  // to remain alive and unchanged until the end of this flatbuffermodel's
   // lifetime.
   //
   // Note, if `error_reporter` is null, then a DefaultErrorReporter() will be
@@ -94,6 +101,10 @@ class FlatBufferModel {
   FlatBufferModel(const char* ptr, size_t num_bytes,
                   ErrorReporter* error_reporter = DefaultErrorReporter());
 
+  // Loads a model from Model flatbuffer. The `model` has to remain alive and
+  // unchanged until the end of this flatbuffermodel's lifetime.
+  FlatBufferModel(const Model* model, ErrorReporter* error_reporter);
+
   // Flatbuffer traverser pointer. (Model* is a pointer that is within the
   // allocated memory of the data allocated by allocation's internals.
   const tflite::Model* model_ = nullptr;
@@ -106,9 +117,9 @@ class FlatBufferModel {
 // model are mapped to executable function pointers (TfLiteRegistrations).
 class OpResolver {
  public:
-  // Find the op registration for a builtin operator by enum code.
+  // Finds the op registration for a builtin operator by enum code.
   virtual TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const = 0;
-  // Find the op registration of a custom operator by op name.
+  // Finds the op registration of a custom operator by op name.
   virtual TfLiteRegistration* FindOp(const char* op) const = 0;
   virtual ~OpResolver() {}
 };
@@ -131,7 +142,7 @@ class InterpreterBuilder {
  public:
   InterpreterBuilder(const FlatBufferModel& model,
                      const OpResolver& op_resolver);
-  // Build an interpreter given only the raw flatbuffer Model object (instead
+  // Builds an interpreter given only the raw flatbuffer Model object (instead
   // of a FlatBufferModel). Mostly used for testing.
   // If `error_reporter` is null, then DefaultErrorReporter() is used.
   InterpreterBuilder(const ::tflite::Model* model,
diff --git a/tensorflow/contrib/lite/model_test.cc b/tensorflow/contrib/lite/model_test.cc
index 83a5150a46..5330c8f594 100644
--- a/tensorflow/contrib/lite/model_test.cc
+++ b/tensorflow/contrib/lite/model_test.cc
@@ -255,6 +255,28 @@ TEST(BasicFlatBufferModel, TestBuildModelFromCorruptedData) {
   ASSERT_FALSE(model);
 }
 
+// Test that loading model directly from a Model flatbuffer works.
+TEST(BasicFlatBufferModel, TestBuildFromModel) {
+  TestErrorReporter reporter;
+  FileCopyAllocation model_allocation(
+      "tensorflow/contrib/lite/testdata/test_model.bin", &reporter);
+  ASSERT_TRUE(model_allocation.valid());
+  ::flatbuffers::Verifier verifier(
+      reinterpret_cast<const uint8_t*>(model_allocation.base()),
+      model_allocation.bytes());
+  ASSERT_TRUE(VerifyModelBuffer(verifier));
+  const Model* model_fb = ::tflite::GetModel(model_allocation.base());
+
+  auto model = FlatBufferModel::BuildFromModel(model_fb);
+  ASSERT_TRUE(model);
+
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(
+      InterpreterBuilder(*model, TrivialResolver(&dummy_reg))(&interpreter),
+      kTfLiteOk);
+  ASSERT_NE(interpreter, nullptr);
+}
+
 // TODO(aselle): Add tests for serialization of builtin op data types.
 // These tests will occur with the evaluation tests of individual operators,
 // not here.
-- 
GitLab


From b1c32226cc0cd44da9985a60e6a0ac6dec120fa5 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Thu, 7 Dec 2017 11:37:27 -0800
Subject: [PATCH 0755/1225] Use sparse loss to avoid the warning being thrown
 by tf.nn.softmax_cross_entopy.

Part 1 - Mnist

`tf.nn.softmax_cross_entropy_with_logits` and `tf.losses.softmax_cross_entropy` both throw the warning.

Almost everywhere it's used can simply be replaced by `tf.losses.sparse_softmax_cross_entropy`

PiperOrigin-RevId: 178271941
---
 .../docs_src/api_guides/python/meta_graph.md   | 10 ++--------
 tensorflow/docs_src/api_guides/python/nn.md    |  2 ++
 .../docs_src/get_started/custom_estimators.md  | 18 +++++++-----------
 .../docs_src/get_started/mnist/beginners.md    |  9 ++++-----
 .../docs_src/get_started/mnist/mechanics.md    | 17 ++++++-----------
 tensorflow/docs_src/get_started/mnist/pros.md  |  5 ++---
 .../get_started/summaries_and_tensorboard.md   |  8 +++-----
 .../docs_src/programmers_guide/debugger.md     |  2 +-
 tensorflow/docs_src/tutorials/deep_cnn.md      |  5 ++---
 tensorflow/docs_src/tutorials/layers.md        |  4 +---
 .../examples/tutorials/layers/cnn_mnist.py     |  4 +---
 tensorflow/examples/tutorials/mnist/mnist.py   |  4 +---
 .../examples/tutorials/mnist/mnist_deep.py     | 10 +++++-----
 .../examples/tutorials/mnist/mnist_softmax.py  | 11 +++++------
 .../tutorials/mnist/mnist_softmax_xla.py       | 13 ++++++-------
 .../tutorials/mnist/mnist_with_summaries.py    | 13 ++++++-------
 16 files changed, 54 insertions(+), 81 deletions(-)

diff --git a/tensorflow/docs_src/api_guides/python/meta_graph.md b/tensorflow/docs_src/api_guides/python/meta_graph.md
index fa4cee8700..0eff900093 100644
--- a/tensorflow/docs_src/api_guides/python/meta_graph.md
+++ b/tensorflow/docs_src/api_guides/python/meta_graph.md
@@ -221,15 +221,9 @@ Here are some of the typical usage models:
     # Addes loss and train.
     labels = tf.constant(0, tf.int32, shape=[100], name="labels")
     batch_size = tf.size(labels)
-    labels = tf.expand_dims(labels, 1)
-    indices = tf.expand_dims(tf.range(0, batch_size), 1)
-    concated = tf.concat([indices, labels], 1)
-    onehot_labels = tf.sparse_to_dense(
-        concated, tf.stack([batch_size, 10]), 1.0, 0.0)
     logits = tf.get_collection("logits")[0]
-    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
-        labels=onehot_labels, logits=logits, name="xentropy")
-    loss = tf.reduce_mean(cross_entropy, name="xentropy_mean")
+    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
+                                                  logits=logits)
 
     tf.summary.scalar('loss', loss)
     # Creates the gradient descent optimizer with the given learning rate.
diff --git a/tensorflow/docs_src/api_guides/python/nn.md b/tensorflow/docs_src/api_guides/python/nn.md
index eb3b251099..8e6fd1cff9 100644
--- a/tensorflow/docs_src/api_guides/python/nn.md
+++ b/tensorflow/docs_src/api_guides/python/nn.md
@@ -226,6 +226,8 @@ TensorFlow provides several operations that help you perform classification.
 *   @{tf.nn.softmax}
 *   @{tf.nn.log_softmax}
 *   @{tf.nn.softmax_cross_entropy_with_logits}
+*   @{tf.nn.softmax_cross_entropy_with_logits_v2} - identical to the base
+    version, except it allows gradient propagation into the labels.
 *   @{tf.nn.sparse_softmax_cross_entropy_with_logits}
 *   @{tf.nn.weighted_cross_entropy_with_logits}
 
diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/get_started/custom_estimators.md
index e347aa6bd0..ae9e107e56 100644
--- a/tensorflow/docs_src/get_started/custom_estimators.md
+++ b/tensorflow/docs_src/get_started/custom_estimators.md
@@ -335,21 +335,17 @@ model's loss. This is the
 [objective](https://developers.google.com/machine-learning/glossary/#objective)
 that will be optimized.
 
-Before we calculate loss, we we must first convert the labels from a list of
-indexes `(0, 1, 2)` to a
-[one-hot representation](https://developers.google.com/machine-learning/glossary/#one-hot_encoding)
-by calling @{tf.one_hot}. Then, we can calculate the loss by calling
-@{tf.losses.softmax_cross_entropy}. Here's the complete code:
+We can calculate the loss by calling @{tf.losses.sparse_softmax_cross_entropy}.
+The value returned by this function will be lowest, approximately 0,
+probability of the correct class (at index `label`) is near 1.0. The loss value
+returned is progressively larger as the probability of the correct class
+decreases.
 
+This function returns the average over the whole batch.
 
 ```python
-    # Convert the labels to a one-hot tensor of shape (length of features, 3)
-    # and with a on-value of 1 for each one-hot vector of length 3.
-    onehot_labels = tf.one_hot(labels, 3, 1, 0)
-
     # Compute loss.
-    loss = tf.losses.softmax_cross_entropy(
-        onehot_labels=onehot_labels, logits=logits)
+    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 ```
 
 ### Evaluate
diff --git a/tensorflow/docs_src/get_started/mnist/beginners.md b/tensorflow/docs_src/get_started/mnist/beginners.md
index 38c467ddc3..c419ca87c3 100644
--- a/tensorflow/docs_src/get_started/mnist/beginners.md
+++ b/tensorflow/docs_src/get_started/mnist/beginners.md
@@ -347,11 +347,10 @@ over all the examples in the batch.
 
 Note that in the source code, we don't use this formulation, because it is
 numerically unstable.  Instead, we apply
-`tf.nn.softmax_cross_entropy_with_logits` on the unnormalized logits (e.g., we
-call `softmax_cross_entropy_with_logits` on `tf.matmul(x, W) + b`), because this
-more numerically stable function internally computes the softmax activation.  In
-your code, consider using `tf.nn.softmax_cross_entropy_with_logits`
-instead.
+`tf.losses.sparse_softmax_cross_entropy` on the unnormalized logits (e.g., we
+call `sparse_softmax_cross_entropy` on the output of `tf.matmul(x, W) + b`),
+because this more numerically stable function internally computes the softmax
+activation.
 
 Now that we know what we want our model to do, it's very easy to have TensorFlow
 train it to do so.  Because TensorFlow knows the entire graph of your
diff --git a/tensorflow/docs_src/get_started/mnist/mechanics.md b/tensorflow/docs_src/get_started/mnist/mechanics.md
index 27fae45b5b..71eee4291e 100644
--- a/tensorflow/docs_src/get_started/mnist/mechanics.md
+++ b/tensorflow/docs_src/get_started/mnist/mechanics.md
@@ -167,20 +167,15 @@ Finally, the `logits` tensor that will contain the output is returned.
 The `loss()` function further builds the graph by adding the required loss
 ops.
 
-First, the values from the `labels_placeholder` are converted to 64-bit integers. Then, a @{tf.nn.sparse_softmax_cross_entropy_with_logits} op is added to automatically produce 1-hot labels from the `labels_placeholder` and compare the output logits from the `inference()` function with those 1-hot labels.
+First, the values from the `labels_placeholder` are converted to 64-bit
+integers. Then, a @{tf.losses.sparse_softmax_cross_entropy} op is used to
+calculate the batch's average cross entropy, of the `inference()` result,
+compared to the labels.
 
 ```python
 labels = tf.to_int64(labels)
-cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
-    labels=labels, logits=logits, name='xentropy')
-```
-
-It then uses @{tf.reduce_mean}
-to average the cross entropy values across the batch dimension (the first
-dimension) as the total loss.
-
-```python
-loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
+cross_entropy = tf.losses.sparse_softmax_cross_entropy(
+    labels=labels, logits=logits)
 ```
 
 And the tensor that will then contain the loss value is returned.
diff --git a/tensorflow/docs_src/get_started/mnist/pros.md b/tensorflow/docs_src/get_started/mnist/pros.md
index 4933dd28cd..c52e960bb3 100644
--- a/tensorflow/docs_src/get_started/mnist/pros.md
+++ b/tensorflow/docs_src/get_started/mnist/pros.md
@@ -49,7 +49,7 @@ these two lines of code which will download and read in the data automatically:
 
 ```python
 from tensorflow.examples.tutorials.mnist import input_data
-mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
+mnist = input_data.read_data_sets('MNIST_data')
 ```
 
 Here `mnist` is a lightweight class which stores the training, validation, and
@@ -172,8 +172,7 @@ between the target and the softmax activation function applied to the model's
 prediction.  As in the beginners tutorial, we use the stable formulation:
 
 ```python
-cross_entropy = tf.reduce_mean(
-    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
+cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y))
 ```
 
 Note that `tf.nn.softmax_cross_entropy_with_logits` internally applies the
diff --git a/tensorflow/docs_src/get_started/summaries_and_tensorboard.md b/tensorflow/docs_src/get_started/summaries_and_tensorboard.md
index ce5db079ba..32f387ae8e 100644
--- a/tensorflow/docs_src/get_started/summaries_and_tensorboard.md
+++ b/tensorflow/docs_src/get_started/summaries_and_tensorboard.md
@@ -137,12 +137,10 @@ with tf.name_scope('cross_entropy'):
   #
   # can be numerically unstable.
   #
-  # So here we use tf.nn.softmax_cross_entropy_with_logits on the
-  # raw outputs of the nn_layer above, and then average across
-  # the batch.
-  diff = tf.nn.softmax_cross_entropy_with_logits(targets=y_, logits=y)
+  # So here we use tf.losses.sparse_softmax_cross_entropy on the
+  # raw logit outputs of the nn_layer above.
   with tf.name_scope('total'):
-    cross_entropy = tf.reduce_mean(diff)
+    cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
 tf.summary.scalar('cross_entropy', cross_entropy)
 
 with tf.name_scope('train'):
diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md
index 25cb72008d..1a32882121 100644
--- a/tensorflow/docs_src/programmers_guide/debugger.md
+++ b/tensorflow/docs_src/programmers_guide/debugger.md
@@ -392,7 +392,7 @@ diff = -(y_ * tf.log(y))
 to the built-in, numerically-stable implementation of softmax cross-entropy:
 
 ```python
-diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=logits)
+diff = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=logits)
 ```
 
 Rerun with the `--debug` flag as follows:
diff --git a/tensorflow/docs_src/tutorials/deep_cnn.md b/tensorflow/docs_src/tutorials/deep_cnn.md
index b57ef24f58..3692a02f2e 100644
--- a/tensorflow/docs_src/tutorials/deep_cnn.md
+++ b/tensorflow/docs_src/tutorials/deep_cnn.md
@@ -195,9 +195,8 @@ The usual method for training a network to perform N-way classification is
 aka. *softmax regression*. Softmax regression applies a
 @{tf.nn.softmax$softmax} nonlinearity to the
 output of the network and calculates the
-@{tf.nn.softmax_cross_entropy_with_logits$cross-entropy}
-between the normalized predictions and a
-@{tf.sparse_to_dense$1-hot encoding} of the label.
+@{tf.nn.sparse_softmax_cross_entropy_with_logits$cross-entropy}
+between the normalized predictions and the label index.
 For regularization, we also apply the usual
 @{tf.nn.l2_loss$weight decay} losses to all learned
 variables.  The objective function for the model is the sum of the cross entropy
diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md
index e808a3677f..7c2029c442 100644
--- a/tensorflow/docs_src/tutorials/layers.md
+++ b/tensorflow/docs_src/tutorials/layers.md
@@ -169,9 +169,7 @@ def cnn_model_fn(features, labels, mode):
     return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
 
   # Calculate Loss (for both TRAIN and EVAL modes)
-  onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Configure the Training Op (for TRAIN mode)
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/tutorials/layers/cnn_mnist.py b/tensorflow/examples/tutorials/layers/cnn_mnist.py
index 2124843fcb..1e8d7d05e1 100644
--- a/tensorflow/examples/tutorials/layers/cnn_mnist.py
+++ b/tensorflow/examples/tutorials/layers/cnn_mnist.py
@@ -97,9 +97,7 @@ def cnn_model_fn(features, labels, mode):
     return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
 
   # Calculate Loss (for both TRAIN and EVAL modes)
-  onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Configure the Training Op (for TRAIN mode)
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/tutorials/mnist/mnist.py b/tensorflow/examples/tutorials/mnist/mnist.py
index 3585043a2a..7cedd0e264 100644
--- a/tensorflow/examples/tutorials/mnist/mnist.py
+++ b/tensorflow/examples/tutorials/mnist/mnist.py
@@ -94,9 +94,7 @@ def loss(logits, labels):
     loss: Loss tensor of type float.
   """
   labels = tf.to_int64(labels)
-  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
-      labels=labels, logits=logits, name='xentropy')
-  return tf.reduce_mean(cross_entropy, name='xentropy_mean')
+  return tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
 
 def training(loss, learning_rate):
diff --git a/tensorflow/examples/tutorials/mnist/mnist_deep.py b/tensorflow/examples/tutorials/mnist/mnist_deep.py
index a4dbab5123..1e0294db27 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_deep.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_deep.py
@@ -125,27 +125,27 @@ def bias_variable(shape):
 
 def main(_):
   # Import data
-  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
+  mnist = input_data.read_data_sets(FLAGS.data_dir)
 
   # Create the model
   x = tf.placeholder(tf.float32, [None, 784])
 
   # Define loss and optimizer
-  y_ = tf.placeholder(tf.float32, [None, 10])
+  y_ = tf.placeholder(tf.int64, [None])
 
   # Build the graph for the deep net
   y_conv, keep_prob = deepnn(x)
 
   with tf.name_scope('loss'):
-    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
-                                                            logits=y_conv)
+    cross_entropy = tf.losses.sparse_softmax_cross_entropy(
+        labels=y_, logits=y_conv)
   cross_entropy = tf.reduce_mean(cross_entropy)
 
   with tf.name_scope('adam_optimizer'):
     train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
 
   with tf.name_scope('accuracy'):
-    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
+    correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
     correct_prediction = tf.cast(correct_prediction, tf.float32)
   accuracy = tf.reduce_mean(correct_prediction)
 
diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax.py b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
index addd2d3810..fb3ac94203 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
@@ -34,7 +34,7 @@ FLAGS = None
 
 def main(_):
   # Import data
-  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
+  mnist = input_data.read_data_sets(FLAGS.data_dir)
 
   # Create the model
   x = tf.placeholder(tf.float32, [None, 784])
@@ -43,7 +43,7 @@ def main(_):
   y = tf.matmul(x, W) + b
 
   # Define loss and optimizer
-  y_ = tf.placeholder(tf.float32, [None, 10])
+  y_ = tf.placeholder(tf.int64, [None])
 
   # The raw formulation of cross-entropy,
   #
@@ -52,10 +52,9 @@ def main(_):
   #
   # can be numerically unstable.
   #
-  # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
+  # So here we use tf.losses.sparse_softmax_cross_entropy on the raw
   # outputs of 'y', and then average across the batch.
-  cross_entropy = tf.reduce_mean(
-      tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
+  cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
   train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 
   sess = tf.InteractiveSession()
@@ -66,7 +65,7 @@ def main(_):
     sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
 
   # Test trained model
-  correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
+  correct_prediction = tf.equal(tf.argmax(y, 1), y_)
   accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   print(sess.run(accuracy, feed_dict={x: mnist.test.images,
                                       y_: mnist.test.labels}))
diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py b/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py
index eaff05913a..e89317494f 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py
@@ -32,7 +32,7 @@ FLAGS = None
 
 def main(_):
   # Import data
-  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
+  mnist = input_data.read_data_sets(FLAGS.data_dir)
 
   # Create the model
   x = tf.placeholder(tf.float32, [None, 784])
@@ -41,7 +41,7 @@ def main(_):
   y = tf.matmul(x, w) + b
 
   # Define loss and optimizer
-  y_ = tf.placeholder(tf.float32, [None, 10])
+  y_ = tf.placeholder(tf.int64, [None])
 
   # The raw formulation of cross-entropy,
   #
@@ -50,10 +50,9 @@ def main(_):
   #
   # can be numerically unstable.
   #
-  # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
-  # outputs of 'y', and then average across the batch.
-  cross_entropy = tf.reduce_mean(
-      tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
+  # So here we use tf.losses.sparse_softmax_cross_entropy on the raw
+  # logit outputs of 'y', and then average across the batch.
+  cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
   train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 
   config = tf.ConfigProto()
@@ -86,7 +85,7 @@ def main(_):
       sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
 
   # Test trained model
-  correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
+  correct_prediction = tf.equal(tf.argmax(y, 1), y_)
   accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   print(sess.run(accuracy,
                  feed_dict={x: mnist.test.images,
diff --git a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
index c401d09df8..7967e22d6a 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
@@ -38,7 +38,6 @@ FLAGS = None
 def train():
   # Import data
   mnist = input_data.read_data_sets(FLAGS.data_dir,
-                                    one_hot=True,
                                     fake_data=FLAGS.fake_data)
 
   sess = tf.InteractiveSession()
@@ -47,7 +46,7 @@ def train():
   # Input placeholders
   with tf.name_scope('input'):
     x = tf.placeholder(tf.float32, [None, 784], name='x-input')
-    y_ = tf.placeholder(tf.float32, [None, 10], name='y-input')
+    y_ = tf.placeholder(tf.int64, [None], name='y-input')
 
   with tf.name_scope('input_reshape'):
     image_shaped_input = tf.reshape(x, [-1, 28, 28, 1])
@@ -117,12 +116,12 @@ def train():
     #
     # can be numerically unstable.
     #
-    # So here we use tf.nn.softmax_cross_entropy_with_logits on the
-    # raw outputs of the nn_layer above, and then average across
+    # So here we use tf.losses.sparse_softmax_cross_entropy on the
+    # raw logit outputs of the nn_layer above, and then average across
     # the batch.
-    diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
     with tf.name_scope('total'):
-      cross_entropy = tf.reduce_mean(diff)
+      cross_entropy = tf.losses.sparse_softmax_cross_entropy(
+          labels=y_, logits=y)
   tf.summary.scalar('cross_entropy', cross_entropy)
 
   with tf.name_scope('train'):
@@ -131,7 +130,7 @@ def train():
 
   with tf.name_scope('accuracy'):
     with tf.name_scope('correct_prediction'):
-      correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
+      correct_prediction = tf.equal(tf.argmax(y, 1), y_)
     with tf.name_scope('accuracy'):
       accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   tf.summary.scalar('accuracy', accuracy)
-- 
GitLab


From cdc36a3b1f7d227984bb5e415b555ed334737f82 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Thu, 7 Dec 2017 11:42:26 -0800
Subject: [PATCH 0756/1225] tfdbg: cosmetic fix to MonitoredSession.__del__
 AttributeError

Prevoiusly, calling "del" on a tf_debug-wrapped MonitoredSession causes a warning message like the following:
Exception AttributeError: "'MonitoredSession' object has no attribute '__del__'" in <bound method LocalCLIDebuggerWrapperSessionForTest.__del__ of <__main__.LocalCLIDebuggerWrapperSessionForTest object at 0x558c74f642d0>> ignored

As the message states, the AttributeError is ignored and doesn't cause failures.

This CL prevents this message by checking that the underlying _sess object has the __del__ method defined before calling it.

Fixes: #15105
PiperOrigin-RevId: 178272619
---
 tensorflow/python/debug/wrappers/framework.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/debug/wrappers/framework.py b/tensorflow/python/debug/wrappers/framework.py
index 4e243cb6c9..909150eb6a 100644
--- a/tensorflow/python/debug/wrappers/framework.py
+++ b/tensorflow/python/debug/wrappers/framework.py
@@ -706,7 +706,8 @@ class BaseDebugWrapperSession(session.SessionInterface):
         exec_type, exec_value, exec_tb)
 
   def __del__(self):
-    self._sess.__del__()
+    if hasattr(self._sess, "__del__"):
+      self._sess.__del__()
 
   def close(self):
     self._sess.close()
-- 
GitLab


From 2a41c99d75f3e086f6236bdc1bfae132b804a845 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Thu, 7 Dec 2017 11:43:31 -0800
Subject: [PATCH 0757/1225] Optimize control dependencies driven by constants

PiperOrigin-RevId: 178272773
---
 .../optimizers/dependency_optimizer.cc        | 39 +++++++++++++-
 .../optimizers/dependency_optimizer_test.cc   | 51 ++++++++++++++++---
 2 files changed, 82 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 950c738dc2..1e97d2d8d2 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -110,6 +110,43 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
                                        SetVector<int>* nodes_to_simplify,
                                        std::set<int>* nodes_to_delete) {
   NodeDef* node = optimized_graph_->mutable_node(node_idx);
+
+  // Constant nodes with no input control dependency are always executed early,
+  // so we can prune all their output control dependencies.
+  if (IsConstant(*node) && node->input_size() == 0) {
+    const std::set<NodeDef*> output_nodes = node_map_->GetOutputs(node->name());
+    for (NodeDef* fanout : output_nodes) {
+      bool optimize_fanout = false;
+      bool data_connection = false;
+      for (int i = fanout->input_size() - 1; i >= 0; --i) {
+        int pos;
+        string input_name = ParseNodeName(fanout->input(i), &pos);
+        if (input_name == node->name()) {
+          if (pos < 0) {
+            fanout->mutable_input()->SwapElements(i, fanout->input_size() - 1);
+            fanout->mutable_input()->RemoveLast();
+            optimize_fanout = true;
+          } else {
+            data_connection = true;
+          }
+        }
+      }
+      if (optimize_fanout) {
+        nodes_to_simplify->PushBack(node_to_idx_[fanout]);
+        if (!data_connection) {
+          node_map_->RemoveOutput(node->name(), fanout->name());
+        }
+      }
+    }
+    if (node_map_->GetOutputs(node->name()).empty() && fetch_nodes_known_ &&
+        nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) {
+      // Mark the node for deletion.
+      nodes_to_delete->insert(node_to_idx_[node]);
+    }
+
+    return;
+  }
+
   // Change ops that only have control dependencies as outputs to NoOps.
   if (node->op() != "NoOp" && SafeToConvertToNoOp(*node)) {
     VLOG(1) << "***** Replacing  " << node->name() << " (" << node->op()
@@ -237,7 +274,7 @@ Status DependencyOptimizer::OptimizeDependencies() {
   std::set<int> nodes_to_delete;
   for (int i = 0; i < optimized_graph_->node_size(); ++i) {
     const NodeDef& node = optimized_graph_->node(i);
-    if (node.op() == "NoOp" || SafeToConvertToNoOp(node)) {
+    if (node.op() == "NoOp" || IsConstant(node) || SafeToConvertToNoOp(node)) {
       nodes_to_simplify.PushBack(i);
     }
   }
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
index 90f5ec8c3f..e714f5c042 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
@@ -59,10 +59,47 @@ TEST_F(DependencyOptimizerTest, NoOp) {
   VerifyGraphsEqual(item.graph, output, __FUNCTION__);
 }
 
-TEST_F(DependencyOptimizerTest, ChangeToNoop) {
+TEST_F(DependencyOptimizerTest, DependenciesDrivenByConstants) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
   Output y = ops::Const(s.WithOpName("y"), {1.0f, 2.0f}, {1, 2});
+  Output z = ops::Const(s.WithOpName("z"), {1.0f, 2.0f}, {1, 2});
+  Output add = ops::Add(s.WithOpName("add"), x, y);
+  Output id1 =
+      ops::Identity(s.WithOpName("id1").WithControlDependencies(x), add);
+  Output id2 = ops::Identity(
+      s.WithOpName("id2").WithControlDependencies(y).WithControlDependencies(z),
+      add);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch.push_back("id1");
+  item.fetch.push_back("id2");
+
+  DependencyOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  // Run the optimizer twice to make sure the rewrite is idempotent.
+  item.graph.Swap(&output);
+  status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  // The 'z' node should have been optimized away leaving only 5 nodes.
+  EXPECT_EQ(5, output.node_size());
+
+  for (const NodeDef& node : item.graph.node()) {
+    if (node.name() == "id1" || node.name() == "id2") {
+      EXPECT_EQ(1, node.input_size());
+      EXPECT_EQ("add", node.input(0));
+    }
+  }
+}
+
+TEST_F(DependencyOptimizerTest, ChangeToNoop) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output x = ops::RandomUniform(s.WithOpName("x"), {1, 2}, DT_FLOAT);
+  Output y = ops::RandomUniform(s.WithOpName("y"), {1, 2}, DT_FLOAT);
   Output add = ops::Add(s.WithOpName("add"), x, y);
   Output id1 =
       ops::Identity(s.WithOpName("id1").WithControlDependencies(add), x);
@@ -107,8 +144,8 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop) {
 // TODO(rmlarsen): Add test to make sure we skip Switch and Merge.
 TEST_F(DependencyOptimizerTest, ChangeToNoop_NoFetch) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
-  Output y = ops::Const(s.WithOpName("y"), {1.0f, 2.0f}, {1, 2});
+  Output x = ops::RandomUniform(s.WithOpName("x"), {1, 2}, DT_FLOAT);
+  Output y = ops::RandomUniform(s.WithOpName("y"), {1, 2}, DT_FLOAT);
   Output add = ops::Add(s.WithOpName("add"), x, y);
   Output id1 =
       ops::Identity(s.WithOpName("id1").WithControlDependencies(add), x);
@@ -128,7 +165,7 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop_NoFetch) {
 
 TEST_F(DependencyOptimizerTest, RemoveNoOps_EmptyInputOrOutput) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output x = ops::Const(s, {1.0f, 2.0f}, {1, 2});
+  Output x = ops::RandomUniform(s, {1, 2}, DT_FLOAT);
   auto noop1 = ops::NoOp(s);
   auto noop2 = ops::NoOp(s.WithControlDependencies(x));
   Output id = ops::Identity(s.WithControlDependencies({noop1.operation}), x);
@@ -152,15 +189,15 @@ TEST_F(DependencyOptimizerTest, RemoveNoOps_EmptyInputOrOutput) {
       EXPECT_EQ(0, node.input_size());
     } else if (node.name() == "Identity") {
       EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("Const", node.input(0));
+      EXPECT_EQ("RandomUniform", node.input(0));
     }
   }
 }
 
 TEST_F(DependencyOptimizerTest, RemoveNoOps_SingleInputOrOutput) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
-  Output y = ops::Const(s.WithOpName("y"), {1.0f, 2.0f}, {1, 2});
+  Output x = ops::RandomUniform(s.WithOpName("x"), {1, 2}, DT_FLOAT);
+  Output y = ops::RandomUniform(s.WithOpName("y"), {1, 2}, DT_FLOAT);
   // NoOp with a single input- and two output dependencies.
   auto noop = ops::NoOp(s.WithControlDependencies(x));
   // NoOp with a two input- and a single output dependency.
-- 
GitLab


From e35da0b306af78a08d9dc313aed2c9acbbab194c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 11:49:55 -0800
Subject: [PATCH 0758/1225] Add logging to explicitly report the op types of
 nodes that fail shape inference.

PiperOrigin-RevId: 178273752
---
 tensorflow/core/grappler/costs/graph_properties.cc | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index ec44d11bdd..34d5c48c31 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -292,6 +292,7 @@ void VerboseLogUnknownDimensionSources(
   // Find all nodes in the graph for which we
   // do not have any unknown dimensions in their inputs, but
   // we have some unknown dimensions in their outputs.
+  std::map<string, int> op_to_count;
   for (const Node* const node : graph.nodes()) {
     if (node->num_outputs() == 0) {
       continue;
@@ -331,11 +332,18 @@ void VerboseLogUnknownDimensionSources(
         VLOG(2) << "Node: " << node->name() << ", Op: " << node->def().op()
                 << ", " << inputs << ", " << outputs;
 
+        op_to_count[node->def().op()]++;
+
         // don't log again for this node
         break;
       }
     }
   }
+  VLOG(2) << "Op types with known inputs, but with unknown output dimensions "
+          << "(format: <op_type> (<count>)):";
+  for (const auto& p : op_to_count) {
+    VLOG(2) << p.first << " (" << p.second << ")";
+  }
 }
 
 }  // namespace
-- 
GitLab


From 9620b2df63854538357bf41f4d9761499e8e573d Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Thu, 7 Dec 2017 12:10:17 -0800
Subject: [PATCH 0759/1225] Fix the issue with shared saver on GPU.

`ShardedFilename` and ``MergeV2Checkpoints/checkpoint_prefixes` operations were placed on GPU even though there are no GPU kernels for them.

PiperOrigin-RevId: 178276605
---
 tensorflow/python/BUILD                         | 15 ++++++++-------
 .../python/training/monitored_session_test.py   | 14 --------------
 tensorflow/python/training/saver.py             |  4 ++--
 tensorflow/python/training/saver_test.py        | 17 +++++++++++++++++
 4 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index bd8ef6944c..af99754776 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3634,6 +3634,7 @@ cuda_py_test(
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
+    tags = ["multi_gpu"],
 )
 
 py_test(
@@ -3787,11 +3788,16 @@ py_test(
     ],
 )
 
-cuda_py_test(
+py_test(
     name = "monitored_session_test",
     size = "medium",
     srcs = ["training/monitored_session_test.py"],
-    additional_deps = [
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_windows",
+        "notsan",  # b/67945581
+    ],
+    deps = [
         ":array_ops",
         ":client_testlib",
         ":control_flow_ops",
@@ -3806,11 +3812,6 @@ cuda_py_test(
         "//tensorflow/contrib/testing:testing_py",
         "//tensorflow/core:protos_all_py",
     ],
-    tags = [
-        "multi_gpu",
-        "no_windows",
-        "notsan",  # b/67945581
-    ],
 )
 
 py_test(
diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py
index 349d8537cb..159b2d5c16 100644
--- a/tensorflow/python/training/monitored_session_test.py
+++ b/tensorflow/python/training/monitored_session_test.py
@@ -36,7 +36,6 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -1969,19 +1968,6 @@ class MonitoredSessionTest(test.TestCase):
           self.assertEqual(2, trace_the_exception['side_effect_counter'])
           self.assertNear(0.62, session.run(graph_state), 0.1)
 
-  def test_saver_on_a_gpu(self):
-    if not test_util.is_gpu_available():
-      return
-    with ops.Graph().as_default():
-      with self.test_session():
-        with ops.device('/gpu:0'):
-          variables.Variable(0)
-        saver_lib.Saver()
-
-        # TODO(b/36964652): Reproduces the issue that needs to be fixed.
-        with self.assertRaises(errors_impl.InvalidArgumentError):
-          monitored_session.MonitoredSession()
-
 
 class SingularMonitoredSessionTest(test.TestCase):
   """Tests SingularMonitoredSession."""
diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index bd47736d4b..ba6301e785 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -349,7 +349,7 @@ class BaseSaverBuilder(object):
     last_device = None
     for shard, (device, saveables) in enumerate(per_device):
       last_device = device
-      with ops.device(device):
+      with ops.device(_set_cpu0(device)):
         sharded_filename = self.sharded_filename(tmp_checkpoint_prefix, shard,
                                                  num_shards_tensor)
         sharded_prefixes.append(sharded_filename)
@@ -357,7 +357,7 @@ class BaseSaverBuilder(object):
 
     with ops.control_dependencies([x.op for x in sharded_saves]):
       # Co-locates the merge step with the last device.
-      with ops.device(last_device):
+      with ops.device(_set_cpu0(last_device)):
         # V2 format write path consists of a metadata merge step.  Once merged,
         # attempts to delete the temporary directory, "<user-fed prefix>_temp".
         merge_step = gen_io_ops.merge_v2_checkpoints(
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index ffe933bb0f..207e4a2842 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -542,6 +542,23 @@ class SaverTest(test.TestCase):
       save = saver_module.Saver({"v0": v0_2})
       variables.global_variables_initializer().run()
 
+  def testSharedServerOnGPU(self):
+    if not test.is_gpu_available():
+      return
+    save_path = os.path.join(self.get_temp_dir(), "gpu")
+    with session.Session("", graph=ops_lib.Graph()) as sess:
+      with sess.graph.device(test.gpu_device_name()):
+        v0_1 = variables.Variable(123.45)
+      save = saver_module.Saver({"v0": v0_1}, sharded=True, allow_empty=True)
+      variables.global_variables_initializer().run()
+      save.save(sess, save_path)
+
+    with session.Session("", graph=ops_lib.Graph()) as sess:
+      with sess.graph.device(test.gpu_device_name()):
+        v0_2 = variables.Variable(543.21)
+      save = saver_module.Saver({"v0": v0_2}, sharded=True, allow_empty=True)
+      variables.global_variables_initializer().run()
+
   def testVariables(self):
     save_path = os.path.join(self.get_temp_dir(), "variables")
     with session.Session("", graph=ops_lib.Graph()) as sess:
-- 
GitLab


From a6d01af26616365a8ac80d635d7fb53b96a4ab78 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 7 Dec 2017 12:20:39 -0800
Subject: [PATCH 0760/1225] Graph._create_op_from_tf_operation should update
 _names_in_use.

Otherwise new operations can be created with the same name as existing
Operations created with _create_op_from_tf_operation. This will result in
an error from the C API, while the Python code is supposed to automatically
dedup op names.

PiperOrigin-RevId: 178277940
---
 tensorflow/python/framework/ops.py      |  8 ++++++++
 tensorflow/python/framework/ops_test.py | 25 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 551aff9fa9..22c2cc7bb1 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -3181,6 +3181,14 @@ class Graph(object):
     input_ops = set(self._get_operation_by_tf_operation(output.oper)
                     for output in tf_outputs)
     control_inputs = self._control_dependencies_for_inputs(input_ops)
+
+    # Update _names_in_use before calling the Operation constructor since the
+    # control flow code may create more Operations, and we don't want the names
+    # to conflict.
+    op_name = c_api.TF_OperationName(c_op)
+    assert op_name not in self._names_in_use
+    self._names_in_use[op_name] = 1
+
     ret = Operation(c_op, self, control_inputs=control_inputs)
     self._create_op_helper(ret, compute_device=compute_device)
     return ret
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 3ac9b10593..ae51125b39 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -743,6 +743,31 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(len(op.outputs), 1)
     self.assertEqual(op.outputs[0].shape, tensor_shape.matrix(2, 3))
 
+  def testUniqueName(self):
+    g = ops.Graph()
+    with g.as_default():
+      if ops._USE_C_API:
+        c_op = ops._create_c_op(
+            g, ops._NodeDef("IntOutput", "myop"), [], [])
+        c_op2 = ops._create_c_op(
+            g, ops._NodeDef("IntOutput", "myop_1"), [], [])
+        op = g._create_op_from_tf_operation(c_op)
+        op2 = g._create_op_from_tf_operation(c_op2)
+      else:
+        # Test pure-Python version to make sure C API has same behavior.
+        op = test_ops.int_output(name="myop").op
+        op2 = test_ops.int_output(name="myop_1").op
+
+      # Create ops with same names as op1 and op2. We expect the new names to be
+      # uniquified.
+      op3 = test_ops.int_output(name="myop").op
+      op4 = test_ops.int_output(name="myop_1").op
+
+    self.assertEqual(op.name, "myop")
+    self.assertEqual(op2.name, "myop_1")
+    self.assertEqual(op3.name, "myop_2")
+    self.assertEqual(op4.name, "myop_1_1")
+
   def testCond(self):
     g = ops.Graph()
     with g.as_default():
-- 
GitLab


From 507dc8c3b8d86f0db86cf21747810366f33c9efe Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 12:23:59 -0800
Subject: [PATCH 0761/1225] Use size=large, shard_count=2 for
 tensorflow/python/kernel_tests:transpose_op_test

PiperOrigin-RevId: 178278370
---
 tensorflow/python/kernel_tests/BUILD | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index a1fd062540..0660f40300 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2069,7 +2069,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "transpose_op_test",
-    size = "medium",
+    size = "large",
     srcs = ["transpose_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -2077,6 +2077,7 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
     ],
+    shard_count = 2,
 )
 
 cuda_py_test(
-- 
GitLab


From ee1bd60e64a2d89b7374ee2e8d65d1620eabf568 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 12:26:45 -0800
Subject: [PATCH 0762/1225] Add `discriminator_and_aux_fn` to `InfoGANModel`.

PiperOrigin-RevId: 178278681
---
 tensorflow/contrib/gan/python/namedtuples.py | 3 +++
 tensorflow/contrib/gan/python/train.py       | 3 ++-
 tensorflow/contrib/gan/python/train_test.py  | 6 ++++--
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/gan/python/namedtuples.py b/tensorflow/contrib/gan/python/namedtuples.py
index 48f5e8e47d..3d4e315ebd 100644
--- a/tensorflow/contrib/gan/python/namedtuples.py
+++ b/tensorflow/contrib/gan/python/namedtuples.py
@@ -79,6 +79,7 @@ class InfoGANModel(
     collections.namedtuple('InfoGANModel', GANModel._fields + (
         'structured_generator_inputs',
         'predicted_distributions',
+        'discriminator_and_aux_fn',
     ))):
   """An InfoGANModel contains all the pieces needed for InfoGAN training.
 
@@ -91,6 +92,8 @@ class InfoGANModel(
     predicted_distributions: A list of tf.Distributions. Predicted by the
       recognizer, and used to evaluate the likelihood of the structured noise.
       List length should match `structured_generator_inputs`.
+    discriminator_and_aux_fn: The original discriminator function that returns
+      a tuple of (logits, `predicted_distributions`).
   """
 
 
diff --git a/tensorflow/contrib/gan/python/train.py b/tensorflow/contrib/gan/python/train.py
index e9443f766b..27c1a22451 100644
--- a/tensorflow/contrib/gan/python/train.py
+++ b/tensorflow/contrib/gan/python/train.py
@@ -215,7 +215,8 @@ def infogan_model(
       disc_scope,
       lambda x, y: discriminator_fn(x, y)[0],  # conform to non-InfoGAN API
       structured_generator_inputs,
-      predicted_distributions)
+      predicted_distributions,
+      discriminator_fn)
 
 
 def acgan_model(
diff --git a/tensorflow/contrib/gan/python/train_test.py b/tensorflow/contrib/gan/python/train_test.py
index 6b27b69261..4d4ede706c 100644
--- a/tensorflow/contrib/gan/python/train_test.py
+++ b/tensorflow/contrib/gan/python/train_test.py
@@ -145,14 +145,16 @@ def get_infogan_model():
   return namedtuples.InfoGANModel(
       *get_gan_model(),
       structured_generator_inputs=[constant_op.constant(0)],
-      predicted_distributions=[categorical.Categorical([1.0])])
+      predicted_distributions=[categorical.Categorical([1.0])],
+      discriminator_and_aux_fn=infogan_discriminator_model)
 
 
 def get_callable_infogan_model():
   return namedtuples.InfoGANModel(
       *get_callable_gan_model(),
       structured_generator_inputs=[constant_op.constant(0)],
-      predicted_distributions=[categorical.Categorical([1.0])])
+      predicted_distributions=[categorical.Categorical([1.0])],
+      discriminator_and_aux_fn=infogan_discriminator_model)
 
 
 def create_infogan_model():
-- 
GitLab


From f28254428ff352ddb708df97545803aba97aa5b6 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 7 Dec 2017 12:28:27 -0800
Subject: [PATCH 0763/1225] WhileContext.AddBackpropIndexedSlicesAccumulator
 shouldn't create invalid shapes

AddBackpropIndexedSlicesAccumulator effectively creates a while loop
where one of the loop variables changes shape on every iteration (it
concats together the accumulated values). This means the while loop
body should not propagate the input tensor shape, which is what this
change acheives. The current code works because
Operation._update_input doesn't check the new shape, but it will check
with the C API enabled.

PiperOrigin-RevId: 178278910
---
 tensorflow/python/ops/control_flow_ops.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 3accfc835b..b091117d94 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -2493,9 +2493,17 @@ class WhileContext(ControlFlowContext):
     if shape_acc is not None:
       self.AddName(shape_acc.name)
       init_acc.append(shape_acc)
+
+    # Set use_input_shape=False since the accumulator tensors will grow in
+    # size. If use_input_shape=True, the _update_input call below will result in
+    # incompatible shapes.
     enter_acc = [_Enter(x, self._name, is_constant=False,
                         parallel_iterations=self._parallel_iterations,
-                        name="b_acc") for x in init_acc]
+                        use_input_shape=False, name="b_acc") for x in init_acc]
+    # Manually set appropriate partial shapes.
+    enter_acc[0].set_shape([None])
+    if values_acc.shape.dims is not None:
+      enter_acc[1].set_shape([None] + values_acc.shape.as_list()[1:])
     self.loop_enters.extend(enter_acc)
 
     merge_acc = [merge([x, x], name="b_acc")[0] for x in enter_acc]
-- 
GitLab


From 25adb9cd6a098f467023c5850040baf76b71c4ac Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Thu, 7 Dec 2017 12:46:18 -0800
Subject: [PATCH 0764/1225] Add "apple CROSSTOOL" to common problems

PiperOrigin-RevId: 178281082
---
 tensorflow/docs_src/install/install_sources.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index c01aa907a3..e187b0e51c 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -441,6 +441,15 @@ Stack Overflow and specify the `tensorflow` tag.
   <td>Invoking `python` or `ipython` generates the following error:
   <pre>ImportError: cannot import name pywrap_tensorflow</pre></td>
 </tr>
+
+<tr>
+  <td><a href="https://stackoverflow.com/questions/45276830">45276830</a></td>
+  <td><pre>external/local_config_cc/BUILD:50:5: in apple_cc_toolchain rule
+  @local_config_cc//:cc-compiler-darwin_x86_64: Xcode version must be specified
+  to use an Apple CROSSTOOL.</pre>
+  </td>
+</tr>
+
 </table>
 
 ## Tested source configurations
-- 
GitLab


From 30c10565f92ec0f506a4246bca837714e8c18a7f Mon Sep 17 00:00:00 2001
From: Ankur Taly <ataly@google.com>
Date: Thu, 7 Dec 2017 13:34:23 -0800
Subject: [PATCH 0765/1225] added no_mac tag to source_remote_test

---
 tensorflow/python/debug/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 2fda463a77..aec0966b66 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -533,6 +533,7 @@ py_test(
     srcs = ["lib/source_remote_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_mac",
         "no_windows",
         "oss_serial",
     ],
-- 
GitLab


From 7772360b9f9e466437a3ecadeb9c5ca28ccbbc50 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 7 Dec 2017 13:34:55 -0800
Subject: [PATCH 0766/1225] Raise exception on bad while loop shapes sooner.

This change moves the _EnforceShapeInvariant call inside
_AddNextAndBackEdge, instead of after it. This is so we can do the
check before calling Operation._update_input, which throws a shape
error with the C API enabled.

PiperOrigin-RevId: 178287233
---
 .../python/kernel_tests/control_flow_ops_py_test.py  |  8 +++++++-
 tensorflow/python/ops/control_flow_ops.py            | 12 +++++++-----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 20eb923e72..38ea8f7de4 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -914,7 +914,13 @@ class ControlFlowTest(test.TestCase):
       self.assertTrue(r[1].get_shape()[0].value is None)
       self.assertEqual(r[1].get_shape()[1], tensor_shape.Dimension(2))
 
-      with self.assertRaisesRegexp(ValueError, "not an invariant for"):
+      with self.assertRaisesRegexp(
+          ValueError,
+          r"The shape for while_1/Merge_1:0 is not an invariant for the loop. "
+          r"It enters the loop with shape \(2, 2\), but has shape \(4, 2\) "
+          r"after one iteration. Provide shape invariants using either the "
+          r"`shape_invariants` argument of tf.while_loop or set_shape\(\) on "
+          r"the loop variables."):
         r = control_flow_ops.while_loop(c, b, [i, m])
 
   def testWhileShapeInferenceSparseTensor(self):
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index b091117d94..1e2202df64 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -590,6 +590,8 @@ def _EnforceShapeInvariant(merge_var, next_var):
     m_shape = merge_var.get_shape()
     n_shape = next_var.get_shape()
     if not _ShapeLessThanOrEqual(n_shape, m_shape):
+      # TODO(skyewm): get original loop input that caused the shape error and
+      # report its name instead of the merge node's.
       raise ValueError(
           "The shape for %s is not an invariant for the loop. It enters "
           "the loop with shape %s, but has shape %s after one iteration. "
@@ -646,6 +648,11 @@ def _AddNextAndBackEdge(m, v):
   if isinstance(m, ops.Tensor):
     v = ops.convert_to_tensor(v)
     v = _NextIteration(v)
+    # Make sure the shapes of loop outputs are correct. We do this before
+    # calling _update_input, which will raise a less-helpful error message if
+    # the types don't match.
+    # TODO(skyewm): call this for other cases below (needs testing)
+    _EnforceShapeInvariant(m, v)
     m.op._update_input(1, v)   # pylint: disable=protected-access
   elif isinstance(m, ops.IndexedSlices):
     # pylint: disable=protected-access
@@ -2657,11 +2664,6 @@ class WhileContext(ControlFlowContext):
     exit_vars = [exit(x[0]) for x in switch_vars]
     self._loop_exits = exit_vars
 
-    # Make sure the shapes of loop outputs are correct.
-    for m_var, n_var in zip(merge_vars, next_vars):
-      if isinstance(m_var, ops.Tensor):
-        _EnforceShapeInvariant(m_var, n_var)
-
     # Exit the loop.
     self.ExitResult(exit_vars)
 
-- 
GitLab


From 488f09179f6dd42640176af5af05a2e063c4c2be Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 13:35:51 -0800
Subject: [PATCH 0767/1225] Remove unused BUILD dependencies

PiperOrigin-RevId: 178287366
---
 tensorflow/core/kernels/BUILD | 2 --
 tensorflow/python/BUILD       | 1 -
 2 files changed, 3 deletions(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 9a150fef7c..0b5b4bfd3d 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -269,13 +269,11 @@ cc_library(
 cc_library(
     name = "conv_ops_gpu_hdrs",
     hdrs = ["conv_ops_gpu.h"],
-    deps = ["//third_party/eigen3"],
 )
 
 cc_library(
     name = "gpu_util_hdrs",
     hdrs = ["gpu_utils.h"],
-    deps = ["//third_party/eigen3"],
 )
 
 tf_cc_test(
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index af99754776..868ffcb473 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -233,7 +233,6 @@ cc_library(
         ":safe_ptr",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//third_party/py/numpy:headers",
         "//util/python:python_headers",
     ],
 )
-- 
GitLab


From 89804a9c680dca12b26a271644f76c91a3a4676a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 13:48:07 -0800
Subject: [PATCH 0768/1225] Eliminate redundant control dependencies by
 computing the transitive reduction of the graph G = (V, E). The graph is
 turned into a DAG by breaking loops. We sort the DAG topologically and apply,
 at each source of control dependencies, the linear time algorithm for
 computing longest paths in a DAG. We can eliminate redundant control
 dependencies when there exists a path of length > 1 from source to target.

Worst case time complexity is O(\sum_{v \in V_c} |{(v, u) : topo(v) < topo(u) <= max(topo(z)) , where (v,z) \in E_c}|),

V_c \subset V is the set of nodes with control outputs, E_c \subset E is the set of control edges and topo(u) is the index of node u in the topological ordering of V.

-------------------------------------------------------------------------------------

Results on learning/brain/experimental/grappler/data/inceptionv3.meta:

Runtime for pass: ~30 ms.
Removes 12% of control dependencies, removes 3.7% of nodes.

I1201 15:54:44.624856   38255 dependency_optimizer.cc:351] Finished deduping control inputs
I1201 15:54:44.673534   38255 dependency_optimizer.cc:354] Finished topo sort
I1201 15:54:44.719586   38255 dependency_optimizer.cc:286] Finished compression
I1201 15:54:44.729909   38255 dependency_optimizer.cc:334] Finished reduction
I1201 15:54:44.729917   38255 dependency_optimizer.cc:337] Removed 519 out of 4325 control dependencies
I1201 15:54:44.890641   38255 dependency_optimizer.cc:245] Deleted 499 out of 13535 nodes for deletion.

PiperOrigin-RevId: 178289073
---
 tensorflow/core/grappler/optimizers/BUILD     |   2 +
 .../optimizers/dependency_optimizer.cc        | 121 +++++++++++++++++-
 .../optimizers/dependency_optimizer.h         |   3 +
 .../optimizers/dependency_optimizer_test.cc   |  54 +++++---
 4 files changed, 159 insertions(+), 21 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 7b4ed10e7e..e557adc211 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -213,6 +213,7 @@ cc_library(
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/costs:graph_properties",
+        "//tensorflow/core/grappler/utils:topological_sort",
     ],
 )
 
@@ -231,6 +232,7 @@ tf_cc_test(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder",
+        "//tensorflow/core/grappler/utils:topological_sort",
     ],
 )
 
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 1e97d2d8d2..498a3a443f 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -23,8 +23,10 @@ limitations under the License.
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
+#include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/util/device_name_utils.h"
 
@@ -77,6 +79,7 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
     return false;
   }
   if (!fetch_nodes_known_ || NumNonControlOutputs(node, *node_map_) > 0) {
+    // The output values of this node may be needed.
     return false;
   }
   if (IsMerge(node) || IsSwitch(node)) {
@@ -203,7 +206,7 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
     if (num_inputs * num_outputs > num_inputs + num_outputs) {
       return;
     }
-    VLOG(1) << "***** Rerouting input around  " << node->name();
+    VLOG(1) << "***** Rerouting input around " << node->name();
     std::vector<NodeDef*> input_nodes;
     for (int i = 0; i < num_inputs; ++i) {
       NodeDef* tmp = node_map_->GetNode(node->input(i));
@@ -291,6 +294,94 @@ Status DependencyOptimizer::OptimizeDependencies() {
   return Status::OK();
 }
 
+Status DependencyOptimizer::TransitiveReduction() {
+  // PRECONDITION: optimized_graph_ must be sorted topologically.
+  const int num_nodes = optimized_graph_->node_size();
+  // Set up a compressed version of the graph to save a constant factor in the
+  // expensive algorithm below. Also cache the set of control outputs and the
+  // highest index of a target of any control output from each node.
+  int num_controls = 0;
+  std::vector<gtl::InlinedVector<int, 4>> inputs(num_nodes);
+  std::vector<gtl::InlinedVector<std::pair<int, int>, 2>> control_outputs(
+      num_nodes);
+  for (int node_idx = 0; node_idx < num_nodes; ++node_idx) {
+    const NodeDef& node = optimized_graph_->node(node_idx);
+    if (ModifiesFrameInfo(node)) {
+      // Ignore nodes that modify frame info.
+      continue;
+    }
+    for (int input_slot = 0; input_slot < node.input_size(); ++input_slot) {
+      const string& input = node.input(input_slot);
+      const NodeDef* input_node = node_map_->GetNode(input);
+      if (ModifiesFrameInfo(*input_node)) {
+        // Ignore edges from nodes that modify frame info.
+        continue;
+      }
+      const int input_node_idx = node_to_idx_[input_node];
+      inputs[node_idx].push_back(input_node_idx);
+      if (IsControlInput(input)) {
+        ++num_controls;
+        control_outputs[input_node_idx].emplace_back(node_idx, input_slot);
+      }
+    }
+  }
+
+  // Run the longest path in DAG algorithm for each source node that has control
+  // outputs. If, for any target node of a control output, there exists a path
+  // of length > 1, we can drop that control dependency.
+  int num_controls_removed = 0;
+  std::vector<int> longest_distance(num_nodes);
+  for (int source = 0; source < num_nodes; ++source) {
+    int highest_control_target = -1;
+    for (const auto& control_output : control_outputs[source]) {
+      if (control_output.first > highest_control_target) {
+        highest_control_target = control_output.first;
+      }
+    }
+    if (highest_control_target < source) {
+      continue;
+    }
+    std::fill(longest_distance.begin() + source,
+              longest_distance.begin() + highest_control_target + 1, 0);
+    for (int target = source + 1; target <= highest_control_target; ++target) {
+      for (int input : inputs[target]) {
+        // If the input node is before source in the topo order, no path
+        // source -> input -> target can exits and we can skip it.
+        if (input >= source) {
+          // If source -> input -> target is longer than the longest
+          // path so far from source -> target, update the longest_distance.
+          int candidate_longest_distance = longest_distance[input] + 1;
+          if (candidate_longest_distance > longest_distance[target]) {
+            longest_distance[target] = candidate_longest_distance;
+          }
+        }
+      }
+    }
+
+    // If the longest path from the source to the target of a control dependency
+    // is longer than 1, there exists an alternate path, and we can eliminate
+    // the control dependency since it is redundant.
+    for (const auto& control_output : control_outputs[source]) {
+      const int target = control_output.first;
+      if (longest_distance[target] > 1) {
+        const int input_slot = control_output.second;
+        // We modify the node inplace here. This is safe because there can
+        // only be one control edge from a given source to a given target.
+        const NodeDef& source_node = optimized_graph_->node(source);
+        NodeDef* target_node = optimized_graph_->mutable_node(target);
+        target_node->mutable_input()->SwapElements(
+            input_slot, target_node->input_size() - 1);
+        node_map_->RemoveOutput(source_node.name(), target_node->name());
+        target_node->mutable_input()->RemoveLast();
+        ++num_controls_removed;
+      }
+    }
+  }
+  VLOG(1) << "Removed " << num_controls_removed << " out of " << num_controls
+          << " control dependencies";
+  return Status::OK();
+}
+
 void DependencyOptimizer::BuildNodeToIdx() {
   // Set up &node -> index map.
   node_to_idx_.clear();
@@ -302,17 +393,35 @@ void DependencyOptimizer::BuildNodeToIdx() {
 
 Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                      GraphDef* optimized_graph) {
+  VLOG(1) << "Graph before optimization:\n" << optimized_graph_->DebugString();
   optimized_graph_ = optimized_graph;
   *optimized_graph_ = item.graph;
   nodes_to_preserve_ = item.NodesToPreserve();
   fetch_nodes_known_ = !item.fetch.empty();
-  node_map_.reset(new NodeMap(optimized_graph_));
-  BuildNodeToIdx();
-
-  VLOG(1) << "Graph before optimization:\n" << optimized_graph_->DebugString();
-  TF_RETURN_IF_ERROR(OptimizeDependencies());
 
   CleanControlInputs();
+  const int num_iterations = opt_level_ == RewriterConfig::AGGRESSIVE ? 2 : 1;
+  for (int iteration = 0; iteration < num_iterations; ++iteration) {
+    Status topo_sort_status = TopologicalSort(optimized_graph_);
+    node_map_.reset(new NodeMap(optimized_graph_));
+    BuildNodeToIdx();
+
+    // Remove redundant control dependencies, iteration 1.
+    if (opt_level_ == RewriterConfig::AGGRESSIVE) {
+      if (topo_sort_status.ok()) {
+        TF_RETURN_IF_ERROR(TransitiveReduction());
+      } else {
+        LOG(ERROR) << topo_sort_status.error_message();
+      }
+      VLOG(1) << "Graph after transitive reduction:\n"
+              << optimized_graph_->DebugString();
+    }
+
+    // Turn nodes without non-control outputs into NoOps, prune NoOps.
+    TF_RETURN_IF_ERROR(OptimizeDependencies());
+    VLOG(1) << "Graph after NoOp conversion & pruning:\n"
+            << optimized_graph_->DebugString();
+  }
   VLOG(1) << "Graph after optimization:\n" << optimized_graph_->DebugString();
 
   return Status::OK();
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.h b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
index f9d4d0b6c2..3f6f418bee 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
@@ -56,6 +56,9 @@ class DependencyOptimizer : public GraphOptimizer {
   // inserting them in nodes_to_delete.
   void OptimizeNode(int node_idx, SetVector<int>* nodes_to_simplify,
                     std::set<int>* nodes_to_delete);
+  // Eliminates redundant control dependencies by computing the transitive
+  // reduction of the graph.
+  Status TransitiveReduction();
   // Main driver of dependency optimizations.
   Status OptimizeDependencies();
 
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
index e714f5c042..d91525f814 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
 #include "tensorflow/core/grappler/optimizers/model_pruner.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -122,21 +123,22 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop) {
 
   EXPECT_EQ(item.graph.node_size(), output.node_size());
   for (int i = 0; i < item.graph.node_size(); ++i) {
-    const NodeDef& original = item.graph.node(i);
-    const NodeDef& optimized = output.node(i);
-    EXPECT_EQ(original.name(), optimized.name());
-    if (original.name() == "add") {
-      EXPECT_EQ("NoOp", optimized.op());
-    } else {
-      EXPECT_EQ(original.op(), optimized.op());
-    }
-    EXPECT_EQ(original.input_size(), optimized.input_size());
-    for (int j = 0; j < original.input_size(); ++j) {
-      if (original.name() == "add") {
-        EXPECT_EQ(AsControlDependency(original.input(j)), optimized.input(j));
-      } else {
-        EXPECT_EQ(original.input(j), optimized.input(j));
-      }
+    const NodeDef& node = item.graph.node(i);
+    if (node.name() == "add") {
+      EXPECT_EQ("NoOp", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("^x", node.input(0));
+      EXPECT_EQ("^y", node.input(1));
+    } else if (node.name() == "id1") {
+      EXPECT_EQ("Identity", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("x", node.input(0));
+      EXPECT_EQ("^y", node.input(1));
+    } else if (node.name() == "id2") {
+      EXPECT_EQ("Identity", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("y", node.input(0));
+      EXPECT_EQ("^x", node.input(1));
     }
   }
 }
@@ -160,6 +162,7 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop_NoFetch) {
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
+  TF_CHECK_OK(TopologicalSort(&item.graph));
   VerifyGraphsEqual(item.graph, output, __FUNCTION__);
 }
 
@@ -234,6 +237,27 @@ TEST_F(DependencyOptimizerTest, RemoveNoOps_SingleInputOrOutput) {
   }
 }
 
+TEST_F(DependencyOptimizerTest, Transitive_Reduction_Simple) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output c = ops::Const(s.WithOpName("c"), {1.0f, 2.0f}, {1, 2});
+  Output x = ops::Square(s.WithOpName("x"), c);
+  Output id1 = ops::Identity(s.WithOpName("id1"), x);
+  Output id2 =
+      ops::Identity(s.WithOpName("id2").WithControlDependencies({x}), id1);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch.push_back("id2");
+  DependencyOptimizer optimizer(RewriterConfig::AGGRESSIVE);
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  EXPECT_EQ(4, output.node_size());
+  EXPECT_EQ("id2", output.node(3).name());
+  EXPECT_EQ(1, output.node(3).input_size());
+  EXPECT_EQ("id1", output.node(3).input(0));
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From b02eae0997fcc85970ac842a14505a015d6ae53d Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 7 Dec 2017 13:52:24 -0800
Subject: [PATCH 0769/1225] [tf.data] Add usage notes to the
 `Dataset.from_generator()` documentation.

PiperOrigin-RevId: 178289691
---
 tensorflow/python/data/ops/dataset_ops.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 17d9510cc3..76398beaa8 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -286,6 +286,23 @@ class Dataset(object):
     sess.run(value)  # (2, array([1, 1]))
     ```
 
+    NOTE: The current implementation of `Dataset.from_generator()` uses
+    @{tf.py_func} and inherits the same constraints. In particular, it
+    requires the `Dataset`- and `Iterator`-related operations to be placed
+    on a device in the same process as the Python program that called
+    `Dataset.from_generator()`. The body of `generator` will not be
+    serialized in a `GraphDef`, and you should not use this method if you
+    need to serialize your model and restore it in a different environment.
+
+    NOTE: If `generator` depends on mutable global variables or other external
+    state, be aware that the runtime may invoke `generator` multiple times
+    (in order to support repeating the `Dataset`) and at any time
+    between the call to `Dataset.from_generator()` and the production of the
+    first element from the generator. Mutating global variables or external
+    state can cause undefined behavior, and we recommend that you explicitly
+    cache any external state in `generator` before calling
+    `Dataset.from_generator()`.
+
     Args:
       generator: A callable object that takes no arguments and returns an
         object that supports the `iter()` protocol.
-- 
GitLab


From 0509f07cc2c80f8623570094d52e0bc696059e76 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 7 Dec 2017 14:01:51 -0800
Subject: [PATCH 0770/1225] Make BaseSession.make_callable work with the C API
 enabled.

This change also switches session_test.py to use the with_c_api decorator
so we get coverage with and without the C API enabled. This way we know
we're not breaking backwards compatibility with the C API enabled.

PiperOrigin-RevId: 178291189
---
 tensorflow/python/client/session.py      | 65 +++++++++++++++---------
 tensorflow/python/client/session_test.py | 27 +++++-----
 2 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index 759c36ad72..017bef99ce 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -1160,9 +1160,6 @@ class BaseSession(SessionInterface):
       TypeError: If `fetches` or `feed_list` cannot be interpreted
         as arguments to @{tf.Session.run}.
     """
-    assert not self._created_with_new_api, ('session.make_callable() doesn\'t '
-                                            'work with C API')
-
     if feed_list is not None:
       if not isinstance(feed_list, (list, tuple)):
         raise TypeError('`feed_list` must be a list or tuple.')
@@ -1184,12 +1181,18 @@ class BaseSession(SessionInterface):
 
     # Create a fetch handler to take care of the structure of fetches.
     fetch_handler = _FetchHandler(self._graph, fetches, {})
-    fetch_list_as_strings = _name_list(fetch_handler.fetches())
-    target_list_as_strings = _name_list(fetch_handler.targets())
+    if self._created_with_new_api:
+      # pylint: disable=protected-access
+      fetch_list = [t._as_tf_output() for t in fetch_handler.fetches()]
+      target_list = [op._c_op for op in fetch_handler.targets()]
+      # pylint: enable=protected-access
+    else:
+      fetch_list = _name_list(fetch_handler.fetches())
+      target_list = _name_list(fetch_handler.targets())
 
     def _callable_template_with_options_and_metadata(
-        fetch_list_as_strings,
-        target_list_as_strings,
+        fetch_list,
+        target_list,
         fetch_handler,
         options=None,
         run_metadata=None):
@@ -1199,9 +1202,14 @@ class BaseSession(SessionInterface):
       run_metadata_ptr = tf_session.TF_NewBuffer() if run_metadata else None
       try:
         with errors.raise_exception_on_not_ok_status() as status:
-          results = tf_session.TF_Run(
-              self._session, options_ptr, {}, fetch_list_as_strings,
-              target_list_as_strings, status, run_metadata_ptr)
+          if self._created_with_new_api:
+            results = tf_session.TF_SessionRun_wrapper(
+                self._session, options_ptr, {}, fetch_list, target_list,
+                run_metadata_ptr, status)
+          else:
+            results = tf_session.TF_Run(
+                self._session, options_ptr, {}, fetch_list, target_list, status,
+                run_metadata_ptr)
           if fetch_handler:
             results = fetch_handler.build_results(self, results)
           else:
@@ -1218,27 +1226,35 @@ class BaseSession(SessionInterface):
 
     if accept_options:
       return functools.partial(
-          _callable_template_with_options_and_metadata, fetch_list_as_strings,
-          target_list_as_strings, fetch_handler)
+          _callable_template_with_options_and_metadata, fetch_list,
+          target_list, fetch_handler)
     elif isinstance(fetches, ops.Operation):
       # Special case for fetching a single operation, because the
       # function will have no return value.
-      assert not fetch_list_as_strings
-      assert len(target_list_as_strings) == 1
+      assert not fetch_list
+      assert len(target_list) == 1
       def _single_operation_run():
         with errors.raise_exception_on_not_ok_status() as status:
-          tf_session.TF_Run(self._session, None, {}, [],
-                            target_list_as_strings, status, None)
+          if self._created_with_new_api:
+            tf_session.TF_SessionRun_wrapper(
+                self._session, None, {}, [], target_list, None, status)
+          else:
+            tf_session.TF_Run(
+                self._session, None, {}, [], target_list, status, None)
       return _single_operation_run
     elif isinstance(fetches, ops.Tensor):
       # Special case for fetching a single tensor, because the
       # function can return the result of `TF_Run()` directly.
-      assert len(fetch_list_as_strings) == 1
-      assert not target_list_as_strings
+      assert len(fetch_list) == 1
+      assert not target_list
       def _single_tensor_run():
         with errors.raise_exception_on_not_ok_status() as status:
-          results = tf_session.TF_Run(self._session, None, {},
-                                      fetch_list_as_strings, [], status, None)
+          if self._created_with_new_api:
+            results = tf_session.TF_SessionRun_wrapper(
+                self._session, None, {}, fetch_list, [], None, status)
+          else:
+            results = tf_session.TF_Run(
+                self._session, None, {}, fetch_list, [], status, None)
         return results[0]
       return _single_tensor_run
     else:
@@ -1246,9 +1262,12 @@ class BaseSession(SessionInterface):
       # results for us.
       def _fetch_handler_run():
         with errors.raise_exception_on_not_ok_status() as status:
-          results = tf_session.TF_Run(self._session, None, {},
-                                      fetch_list_as_strings,
-                                      target_list_as_strings, status, None)
+          if self._created_with_new_api:
+            results = tf_session.TF_SessionRun_wrapper(
+                self._session, None, {}, fetch_list, target_list, None, status)
+          else:
+            results = tf_session.TF_Run(
+                self._session, None, {}, fetch_list, target_list, status, None)
         return fetch_handler.build_results(self, results)
       return _fetch_handler_run
 
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index e4545d287b..3da03a7b0f 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -57,13 +57,13 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.training import server_lib
 from tensorflow.python.util import compat
 
-ops._USE_C_API = True
 
 # NOTE(mrry): Dummy shape registration for ops used in the tests, since they
 # don't have C++ op registrations on which to attach C++ shape fns.
 ops.RegisterShape('ConstructionFails')(common_shapes.unknown_shape)
 
 
+@test_util.with_c_api
 class SessionTest(test_util.TensorFlowTestCase):
 
   def testUseExistingGraph(self):
@@ -165,8 +165,9 @@ class SessionTest(test_util.TensorFlowTestCase):
         # Run with a bogus handle.
         s.partial_run('foo', r1, feed_dict={a: 1, b: 2})
 
-  @test_util.disable_c_api  # No shape registration for 'ConstructionFails'
   def testOpConstructionErrorPayload(self):
+    if ops._USE_C_API: return  # No shape registration for 'ConstructionFails'
+
     with session.Session():
       failing_op = ops.get_default_graph().create_op(
           'ConstructionFails', [], [], name='f')
@@ -208,7 +209,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       with self.assertRaises(TypeError):
         s.run({'a': a, 'b': None})
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testFetchSingleton(self):
     with session.Session() as sess:
       a = constant_op.constant(42.0)
@@ -231,7 +231,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       res = sess.run(a.op)  # An op, not a tensor.
       self.assertEqual(None, res)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testFetchList(self):
     with session.Session() as sess:
       a = constant_op.constant(42.0)
@@ -247,7 +246,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertTrue(isinstance(res, list))
       self.assertEqual([42.0, None, 44.0, 42.0, None], res)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testFetchTuple(self):
     with session.Session() as sess:
       a = constant_op.constant(42.0)
@@ -261,7 +259,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertTrue(isinstance(res, tuple))
       self.assertEqual((42.0, None, 44.0, 42.0), res)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testFetchNamedTuple(self):
     # pylint: disable=invalid-name
     ABC = collections.namedtuple('ABC', ['a', 'b', 'c'])
@@ -1178,7 +1175,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(b_val, [[2.0, 2.0, 2.0]])
       self.assertAllEqual(a2_val, [[1.0, 1.0]])
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testFeedAndFetch(self):
     with session.Session() as sess:
       for dtype in [dtypes.float16,
@@ -1225,7 +1221,6 @@ class SessionTest(test_util.TensorFlowTestCase):
           self.assertAllEqual(np_array, out_v)
           self.assertAllEqual(np_array, feed_v)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testMakeCallableOnTensorWithRunOptions(self):
     with session.Session() as sess:
       a = constant_op.constant(42.0)
@@ -1238,7 +1233,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertEqual(42.0, res)
       self.assertGreater(len(run_metadata.step_stats.dev_stats), 0)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testMakeCallableOnOperationWithRunOptions(self):
     with session.Session() as sess:
       a = variables.Variable(42.0)
@@ -1253,7 +1247,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertEqual(43.0, sess.run(a))
       self.assertGreater(len(run_metadata.step_stats.dev_stats), 0)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testMakeCallableWithFeedListAndRunOptions(self):
     with session.Session() as sess:
       ph = array_ops.placeholder(dtypes.float32)
@@ -1460,9 +1453,10 @@ class SessionTest(test_util.TensorFlowTestCase):
         self.assertTrue(run_metadata.HasField('step_stats'))
         self.assertEquals(len(run_metadata.step_stats.dev_stats), 1)
 
-  # TODO(nolivia): C API doesn't yet handle marking nodes as not feedable.
-  @test_util.disable_c_api
   def testFeedShapeCompatibility(self):
+    # TODO(nolivia): C API doesn't yet handle marking nodes as not feedable.
+    if ops._USE_C_API: return
+
     with session.Session() as sess:
       some_tensor = constant_op.constant([2.0, 2.0, 2.0, 2.0])
       new_shape = constant_op.constant([2, 2])
@@ -1746,6 +1740,15 @@ class SessionTest(test_util.TensorFlowTestCase):
 
 class GraphMutationTest(test_util.TensorFlowTestCase):
 
+  def setUp(self):
+    self._original_use_c_api_value = ops._USE_C_API
+    ops._USE_C_API = True
+    super(GraphMutationTest, self).setUp()
+
+  def tearDown(self):
+    ops._USE_C_API = self._original_use_c_api_value
+    super(GraphMutationTest, self).tearDown()
+
   def testUpdateInputAfterRunning(self):
     with ops.Graph().as_default() as g:
       a = constant_op.constant(1.0)
-- 
GitLab


From 2ea11416c908041958f29a8fa513cf8f107fbe78 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 14:12:44 -0800
Subject: [PATCH 0771/1225] Simplify and fix some bugs in constant folding of
 neutral/absorbing element optimizations.

PiperOrigin-RevId: 178293088
---
 .../grappler/optimizers/constant_folding.cc   |  61 +++++-----
 .../optimizers/constant_folding_test.cc       | 114 ++++++++++++++++++
 2 files changed, 146 insertions(+), 29 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 718aa69ebf..acd642044b 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -680,6 +680,7 @@ Status CreateConstantTensorAttrValue(DataType type, double value,
                                      const TensorShapeProto& shape,
                                      AttrValue* attr_tensor) {
   TensorProto* t = attr_tensor->mutable_tensor();
+  t->set_dtype(type);
   *t->mutable_tensor_shape() = shape;
   switch (type) {
     SET_TENSOR_VAL_CASE(DT_FLOAT, float, float);
@@ -1332,46 +1333,48 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       }
       const TensorShapeProto& output_shape =
           properties.GetOutputProperties(node.name())[0].shape();
-      const TensorShapeProto& x_shape =
-          properties.GetInputProperties(node.name())[0].shape();
+
+      // Simplify element-wise  multiplication by ones or addition of zeros.
       const TensorShapeProto& y_shape =
           properties.GetInputProperties(node.name())[1].shape();
       const bool x_is_zero = IsZeros(*x);
-      const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
-      const bool y_is_zero = IsZeros(*y);
+      const bool x_is_one = IsOnes(*x);
       const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape);
-
-      // Simplify addition of zeros.
-      if (is_add) {
-        if (x_is_zero && y_matches_output_shape) {
-          // 0 + y = y.
-          ReplaceAddOrMulWithIdentity(1, &node);
-          continue;
-        } else if (y_is_zero && x_matches_output_shape) {
-          // x + 0 = y.
-          ReplaceAddOrMulWithIdentity(0, &node);
-          continue;
-        }
+      if (y_matches_output_shape &&
+          ((is_mul && x_is_one) || (is_add && x_is_zero))) {
+        // 1 * y = y or 0 + y = y.
+        ReplaceAddOrMulWithIdentity(1, &node);
+        continue;
+      }
+      const TensorShapeProto& x_shape =
+          properties.GetInputProperties(node.name())[0].shape();
+      const bool y_is_zero = IsZeros(*y);
+      const bool y_is_one = IsOnes(*y);
+      const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
+      if (x_matches_output_shape &&
+          ((is_mul && y_is_one) || (is_add && y_is_zero))) {
+        // x * 1 = x or x + 0 = x
+        ReplaceAddOrMulWithIdentity(0, &node);
+        continue;
       }
 
-      // Simplify element-wise multiplication by ones.
-      if (is_mul) {
-        if (IsOnes(*x) && y_matches_output_shape) {
-          // 1 * y = y.
-          ReplaceAddOrMulWithIdentity(1, &node);
+      // Simplify multiplication and matmul by zeros.
+      if (!is_add && (x_is_zero || y_is_zero)) {
+        const PartialTensorShape shp(output_shape);
+        if (shp.IsFullyDefined()) {
+          TF_RETURN_IF_ERROR(
+              ReplaceAddOrMulWithConstant(0, output_shape, &node));
           continue;
         }
-        if (IsOnes(*y) && x_matches_output_shape) {
-          // x * 1 = x.
+        // Even if an input shape is only partially known, we may known that it
+        // matches the output shape and thus forward the corresponding zero
+        // input.
+        if (is_mul && x_is_zero && x_matches_output_shape) {
           ReplaceAddOrMulWithIdentity(0, &node);
-          continue;
+        } else if (is_mul && y_is_zero && y_matches_output_shape) {
+          ReplaceAddOrMulWithIdentity(1, &node);
         }
       }
-
-      // Simplify multiplication and matmul by zeros.
-      if (x_is_zero || y_is_zero) {
-        TF_RETURN_IF_ERROR(ReplaceAddOrMulWithConstant(0, output_shape, &node));
-      }
     }
   }
   return Status::OK();
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index ffa09b8e29..21011eb790 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -198,6 +198,120 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
   }
 }
 
+TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output x_known =
+      ops::Placeholder(s.WithOpName("x_known"), DT_FLOAT,
+                       ops::Placeholder::Shape(TensorShape({2, 2})));
+  Output x_partially_known =
+      ops::Placeholder(s.WithOpName("x_partially_unknown"), DT_FLOAT,
+                       ops::Placeholder::Shape(PartialTensorShape({-1, -1})));
+  Output x_unknown = ops::Placeholder(s.WithOpName("x_unknown"), DT_FLOAT);
+  Output zeros_known = ops::ZerosLike(s.WithOpName("zeros_known"), x_known);
+  Output zeros_partially_known =
+      ops::ZerosLike(s.WithOpName("zeros_partially_known"), x_partially_known);
+  Output zeros_unknown =
+      ops::ZerosLike(s.WithOpName("zeros_unknown"), x_unknown);
+
+  // Multiplies without any additional ops to supply the output shape.
+  int count = 0;
+  std::vector<Output> muls;
+  std::unordered_set<string> not_converted;
+  std::unordered_set<string> to_const;
+  std::unordered_set<string> to_identity;
+  for (const auto* x : {&x_known, &x_partially_known, &x_unknown}) {
+    for (const auto* zeros :
+         {&zeros_known, &zeros_partially_known, &zeros_unknown}) {
+      const string name = strings::StrCat("mul_", count++);
+      muls.push_back(ops::Mul(s.WithOpName(name), *x, *zeros));
+      if (x == &x_partially_known && zeros == &zeros_partially_known) {
+        to_identity.insert(name);
+      } else if (x == &x_unknown || zeros == &zeros_unknown) {
+        not_converted.insert(name);
+      } else {
+        to_const.insert(name);
+      }
+    }
+  }
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                            nullptr /* cpu_device */);
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  LOG(INFO) << output.DebugString();
+
+  EXPECT_EQ(15, output.node_size());
+  for (int i = 0; i < output.node_size(); ++i) {
+    const NodeDef& node = output.node(i);
+    const string& name = node.name();
+    if (to_const.count(name) > 0) {
+      EXPECT_EQ("Const", node.op()) << node.name();
+    } else if (to_identity.count(name) > 0) {
+      EXPECT_EQ("Identity", node.op()) << node.name();
+    } else if (not_converted.count(name) > 0) {
+      EXPECT_EQ("Mul", node.op()) << node.name();
+    }
+  }
+}
+
+TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output known_shape = ops::Const(s.WithOpName("known_shape"), 0.0f, {2, 2});
+  Output x_partially_known =
+      ops::Placeholder(s.WithOpName("x_partially_unknown"), DT_FLOAT,
+                       ops::Placeholder::Shape(PartialTensorShape({-1, -1})));
+  Output x_unknown = ops::Placeholder(s.WithOpName("x_unknown"), DT_FLOAT);
+  Output zeros_partially_known =
+      ops::ZerosLike(s.WithOpName("zeros_partially_known"), x_partially_known);
+  Output zeros_unknown =
+      ops::ZerosLike(s.WithOpName("zeros_unknown"), x_unknown);
+
+  // If at least one of the inputs to AddN has a known shape, shape inference
+  // will propagate the shape back to the inputs of AddN, making the
+  // output shapes of all its inputs known
+  std::vector<Output> muls_deduced_output_shape;
+  std::unordered_set<string> to_const;
+  int count = 0;
+  for (const auto& x : {x_partially_known, x_unknown}) {
+    for (const auto& zeros : {zeros_partially_known, zeros_unknown}) {
+      const string name = strings::StrCat("mul_", count++);
+      muls_deduced_output_shape.push_back(
+          ops::Mul(s.WithOpName(name), x, zeros));
+      to_const.insert(name);
+    }
+  }
+  // We add a known shape as input to AddN to propagate it back to the
+  // multiplies above, which means they can all be turned into Const nodes.
+  muls_deduced_output_shape.push_back(known_shape);
+  Output addn1 = ops::AddN(s.WithOpName("addn1"), muls_deduced_output_shape);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                            nullptr /* cpu_device */);
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  LOG(INFO) << output.DebugString();
+
+  EXPECT_EQ(10, output.node_size());
+  for (int i = 0; i < output.node_size(); ++i) {
+    const NodeDef& node = output.node(i);
+    const string& name = node.name();
+    if (to_const.count(name) > 0) {
+      EXPECT_EQ("Const", node.op()) << node.name();
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_TRUE(IsControlInput(node.input(0)));
+      EXPECT_TRUE(IsControlInput(node.input(1)));
+    }
+  }
+}
+
 TEST_F(ConstantFoldingTest, CreateConstNodes) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
 
-- 
GitLab


From daab44f7af8773f4c84eb1e4e592b4d3a4d6309d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 14:32:28 -0800
Subject: [PATCH 0772/1225] Allow partial handling of some cyclic graphs, at
 least to get graph visualizations.

TensorFlow graphs may be cyclic, generally corresponding to the encoding
of control flow (e.g. TensorFlow while loops) in the graph. TensorFlow Lite
does not currently support such control flow, and toco currently relies
extensively on the assumption that graphs are acyclic. The case of RNNs
is handled by special-casing some edges as 'RNN back-edges' so as to
keep the graph itself (i.e. without these back-edges) acyclic.
This assumption is guarded by CheckInvariants, so cyclic graphs result
in early failures.

The issue with that is that at the moment, given a cyclic TensorFlow graph,
toco is not even useful to get a mere graph-visualization of it. Leaving
alone actually supporting control flow, it would be nice to at least
support getting a graphviz. Indeed, a good graphviz is often the first
step toward reasoning about what a graph really is doing and how that
could be modified to avoid involving control flow.

This change achieves that as follows.

In import_tensorflow.cc, NextIteration nodes are special-cased: instead
of being imported as (unsupported) operators, they are imported as
RNN back-edges. As NextIteration nodes are characteristic of control
flow graphs which we do not support anyway, special-casing them is
acceptable.

This alone results in imported graphs that are no longer cyclic,
the cycles being only closed by RNN back edges (maintained separately
from the graph). So that alone already removes the CheckInvariants
failures.

However, another problem appears at this point: the resulting graph
visualizations are too large, as the graphs are not correctly pruned.
This is because the cycles (involving RNN back-edges) keep themselves
alive from the point of view of graph-pruning transformations
(remove_unused_op). Our graph transformations, which are local,
cannot see that sometimes a whole connected component of the graph
is disconnected from --input_arrays and --output_arrays, thus should
be dropped. That can only be done by a global tranformation. So
we add such a global transformation, running once at the end of each
graph-transformations pass (i.e. infrequently):
DiscardUselessConnectedComponents

This, however, raises another question. Discarding unused cycles involving
RNN back-edges implies, in particular, discarding RNN back-edges.
So far, RNN back-edges were always explicitily specified by the user
on the command-line, and we never discard things explicitly specified
by the user (that would in particular make TF->TF transformations
not idempotent). What changes here is that now a RNN back-edge needs
not be explicitly specified by the user anymore, it may instead be
internally constructed by import_tensorflow encountering a NextIteration
node. So we need to distinguish between these two cases. We add a
'discardable' bool flag on RNN back-edges.

It is important to allow discarding an array if it only occurs as a
vertex touching a discardable RNN back-edge, otherwise graph pruning
stops prematurely. That implies that RNN back-edges may be dangling,
i.e. may point to an array name that doesn't actually exist (anymore).

PiperOrigin-RevId: 178296359
---
 .../graph_transformations.cc                  |  92 ++++++++++++++++
 .../remove_trivial_concatenation_input.cc     |   3 +-
 .../graph_transformations/remove_unused_op.cc |   7 +-
 .../contrib/lite/toco/import_tensorflow.cc    | 100 ++++++++++++++----
 .../contrib/lite/toco/model_flags.proto       |  33 +++---
 tensorflow/contrib/lite/toco/tooling_util.cc  |  48 +++++----
 6 files changed, 230 insertions(+), 53 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
index 323fec6cf8..3a7611a668 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
@@ -41,6 +41,97 @@ void PrintModelStats(const string& label, const Model& model) {
             << " quantized)";
 }
 
+// Some graphs have RNN back-edges that are discardable, having been
+// created typically by TensorFlow import rather than specified by the user.
+// Such graphs might have cycles (closed by RNN back-edges) that may be pruned.
+// Local graph transformations can't identify such global features,
+// so this function performs this global transformation.
+//
+// The other (and related) thing that is peculiar about RNN back-edges
+// is that they do not prevent the arrays that they touch, from being
+// pruned. Thus, they may refer to array names which no longer exist.
+// The intent is for that to result in the eventual pruning of such
+// 'dangling' RNN back-edges. We perform this pruning at the end of this
+// function, as the pruning of connected components done here may leave
+// more RNN back-edges dangling.
+void DiscardUselessConnectedComponentsAndRNNBackEdges(Model* model) {
+  // Identify the set of arrays that are in 'useful' connected components
+  // of the graph, which means connected to output arrays.
+  std::unordered_set<string> useful_arrays;
+  for (const string& output_array : model->flags.output_arrays()) {
+    useful_arrays.insert(output_array);
+  }
+  bool found_new_useful_arrays;
+  do {
+    found_new_useful_arrays = false;
+    for (const auto& op : model->operators) {
+      bool op_touches_useful_arrays = false;
+      for (const string& output : op->outputs) {
+        op_touches_useful_arrays |= useful_arrays.count(output);
+      }
+      if (op_touches_useful_arrays) {
+        for (const string& input : op->inputs) {
+          found_new_useful_arrays |= !useful_arrays.count(input);
+          useful_arrays.insert(input);
+        }
+        for (const string& output : op->outputs) {
+          found_new_useful_arrays |= !useful_arrays.count(output);
+          useful_arrays.insert(output);
+        }
+      }
+    }
+    for (const auto& rnn_state : model->flags.rnn_states()) {
+      bool rnn_back_edge_touches_useful_arrays =
+          useful_arrays.count(rnn_state.state_array());
+      if (rnn_back_edge_touches_useful_arrays) {
+        found_new_useful_arrays |=
+            !useful_arrays.count(rnn_state.back_edge_source_array());
+        useful_arrays.insert(rnn_state.back_edge_source_array());
+      }
+    }
+  } while (found_new_useful_arrays);
+  // Erase arrays that aren't useful, and that are discardable.
+  for (auto it = model->arrays.begin(); it != model->arrays.end();) {
+    if (useful_arrays.count(it->first) ||
+        !IsDiscardableArray(*model, it->first)) {
+      ++it;
+    } else {
+      it = model->arrays.erase(it);
+    }
+  }
+  // Erase operators that do not produce a useful output array.
+  for (auto it = model->operators.begin(); it != model->operators.end();) {
+    // Only need to test the first output, as we simultaneously added all of
+    // an operator's outputs to the list of output arrays.
+    if (useful_arrays.count((*it)->outputs[0])) {
+      ++it;
+    } else {
+      for (const string& output : (*it)->outputs) {
+        CHECK(!useful_arrays.count(output));
+      }
+      it = model->operators.erase(it);
+    }
+  }
+  // Erase RNN back-edges that are 'dangling' i.e. that touch an array
+  // that no longer exists. This should only happen for discardable RNN
+  // back-edges.
+  std::vector<RnnState> rnn_states_to_keep;
+  for (const auto& rnn_state : model->flags.rnn_states()) {
+    const bool dangling =
+        !model->arrays.count(rnn_state.back_edge_source_array()) ||
+        !model->arrays.count(rnn_state.state_array());
+    if (dangling) {
+      CHECK(rnn_state.discardable());
+    } else {
+      rnn_states_to_keep.push_back(rnn_state);
+    }
+  }
+  model->flags.clear_rnn_states();
+  for (const auto& rnn_state : rnn_states_to_keep) {
+    *model->flags.add_rnn_states() = rnn_state;
+  }
+}
+
 bool GraphTransformationsPass(int increment, Model* model,
                               const GraphTransformationsSet& transformations) {
   CHECK(increment == 1 || increment == -1);
@@ -86,6 +177,7 @@ bool GraphTransformationsPass(int increment, Model* model,
       op_index += increment;
     }
   }
+  DiscardUselessConnectedComponentsAndRNNBackEdges(model);
   return changed;
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
index b603735704..23a5c857e8 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
@@ -57,7 +57,8 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) {
 
   // Drop trivial inputs.
   for (const string& input : trivial_inputs) {
-    if (CountOpsWithInput(*model, input) == 1) {
+    if (IsDiscardableArray(*model, input) &&
+        CountOpsWithInput(*model, input) == 1) {
       model->arrays.erase(input);
     }
   }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
index 0ab301552f..674a46815b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
@@ -65,7 +65,12 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
     }
     for (const auto& rnn_state : model->flags.rnn_states()) {
       if (output == rnn_state.back_edge_source_array()) {
-        return false;
+        // The output is consumed by a RNN back-edge..
+        if (!IsDiscardableArray(*model, rnn_state.back_edge_source_array()) ||
+            !IsDiscardableArray(*model, rnn_state.state_array()) ||
+            CountOpsWithInput(*model, rnn_state.state_array())) {
+          return false;
+        }
       }
     }
     if (CountOpsWithInput(*model, output)) {
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index cde5a936af..9f72f9a1d3 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -1380,6 +1380,31 @@ void ConvertSvdfOperator(const NodeDef& node,
   model->operators.emplace_back(op);
 }
 
+// Some TensorFlow ops only occur in graph cycles, representing
+// control flow. We do not currently support control flow, so we wouldn't
+// be able to fully support such graphs, including performing inference,
+// anyway. However, rather than erroring out early on graphs being cyclic,
+// it helps to at least support these just enough to allow getting a
+// graph visualization. This is not trivial, as we require graphs to be
+// acyclic aside from RNN back-edges. The solution is to special-case
+// such ops as RNN back-edges, which is technically incorrect (does not
+// allow representing the op's semantics) but good enough to get a
+// graph visualization.
+void ConvertOperatorSpecialCasedAsRNNBackEdge(
+    const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
+    Model* model) {
+  // At the moment, the only type of operator special-cased in this way is
+  // NextIteration, occuring only in control-flow cycles.
+  CHECK_EQ(node.op(), "NextIteration");
+  CHECK_EQ(node.input_size(), 1);
+  auto* rnn_state = model->flags.add_rnn_states();
+  // This RNN state is not explicitly created by the user, so it's
+  // OK for some later graph transformation to discard it.
+  rnn_state->set_discardable(true);
+  rnn_state->set_state_array(node.name());
+  rnn_state->set_back_edge_source_array(node.input(0));
+}
+
 void StripCaretFromArrayNames(Model* model) {
   for (auto& op : model->operators) {
     for (auto& input : op->inputs) {
@@ -1402,26 +1427,61 @@ void StripZeroOutputIndexFromInputs(NodeDef* node) {
   }
 }
 
-void AddExtraOutputsFedIntoOtherOps(Model* model) {
+// In TensorFlow GraphDef, when a node has multiple outputs, they are named
+// name:0, name:1, ...
+// where 'name' is the node's name(). Just 'name' is an equivalent shorthand
+// form for name:0.
+// A TensorFlow GraphDef does not explicitly list all the outputs of each node
+// (unlike inputs), it being implied by the node's name and operator type
+// (the latter implies the number of outputs).
+// This makes it non-trivial for us to reconstruct the list of all arrays
+// present in the graph and, for each operator, the list of its outputs.
+// We do that by taking advantage of the fact that
+// at least each node lists explicitly its inputs, so after we've loaded
+// all nodes, we can use that information.
+void AddExtraOutputs(Model* model) {
+  // Construct the list of all arrays consumed by anything in the graph.
+  std::vector<string> consumed_arrays;
+  // Add arrays consumed by an op.
   for (const auto& consumer_op : model->operators) {
     for (const string& input : consumer_op->inputs) {
-      const std::vector<string>& split = absl::StrSplit(input, ':');
-      if (split.size() != 2) {
-        continue;
-      }
-      int output_index = 0;
-      if (!absl::SimpleAtoi(split[1], &output_index)) {
-        continue;
-      }
-      auto* producer_op = GetOpWithOutput(*model, split[0]);
-      if (!producer_op) {
-        continue;
-      }
-      while (producer_op->outputs.size() <= output_index) {
-        using toco::port::StringF;
-        producer_op->outputs.push_back(
-            StringF("%s:%d", split[0], producer_op->outputs.size()));
-      }
+      consumed_arrays.push_back(input);
+    }
+  }
+  // Add global outputs of the model.
+  for (const string& output_array : model->flags.output_arrays()) {
+    consumed_arrays.push_back(output_array);
+  }
+  // Add arrays consumed by a RNN back-edge.
+  for (const auto& rnn_state : model->flags.rnn_states()) {
+    consumed_arrays.push_back(rnn_state.back_edge_source_array());
+  }
+  // Now add operator outputs so that all arrays that are consumed,
+  // are produced.
+  for (const string& consumed_array : consumed_arrays) {
+    // Split the consumed array name into the form name:output_index.
+    const std::vector<string>& split = absl::StrSplit(consumed_array, ':');
+    // If not of the form name:output_index, then this is not an additional
+    // output of a node with multiple outputs, so nothing to do here.
+    if (split.size() != 2) {
+      continue;
+    }
+    int output_index = 0;
+    if (!absl::SimpleAtoi(split[1], &output_index)) {
+      continue;
+    }
+    // Each op is initially recorded as producing at least the array that
+    // has its name. We use that to identify the producer node.
+    auto* producer_op = GetOpWithOutput(*model, split[0]);
+    if (!producer_op) {
+      continue;
+    }
+    // Add extra outputs to that producer node, all the way to the
+    // output_index.
+    while (producer_op->outputs.size() <= output_index) {
+      using toco::port::StringF;
+      producer_op->outputs.push_back(
+          StringF("%s:%d", split[0], producer_op->outputs.size()));
     }
   }
 }
@@ -1633,6 +1693,8 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
       ConvertMeanOperator(node, tf_import_flags, model);
     } else if (node.op() == "Svdf") {
       ConvertSvdfOperator(node, tf_import_flags, model);
+    } else if (node.op() == "NextIteration") {
+      ConvertOperatorSpecialCasedAsRNNBackEdge(node, tf_import_flags, model);
     } else {
       ConvertUnsupportedOperator(node, tf_import_flags, model);
     }
@@ -1641,7 +1703,7 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
   ResolveModelFlags(model_flags, model);
 
   StripCaretFromArrayNames(model);
-  AddExtraOutputsFedIntoOtherOps(model);
+  AddExtraOutputs(model);
   FixNoMissingArray(model);
   FixNoOrphanedArray(model);
   FixOperatorOrdering(model);
diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto
index d818a3632d..05c48bc369 100644
--- a/tensorflow/contrib/lite/toco/model_flags.proto
+++ b/tensorflow/contrib/lite/toco/model_flags.proto
@@ -77,6 +77,25 @@ message InputArray {
   optional IODataType data_type = 5;
 }
 
+message RnnState {
+  optional string state_array = 1;
+  optional string back_edge_source_array = 2;
+  optional bool discardable = 5;
+  // TODO(benoitjacob): drop the 'size' field. Should be redundant with
+  // --input_shapes and shapes propagation.
+  optional int32 size = 3;
+  // TODO(benoitjacob): manually_create is a temporary hack:
+  // due to discrepancies between the current toco dims tracking and
+  // TensorFlow shapes, for some models we need to manually create RNN state
+  // arrays with a specified shape.
+  // Maybe we should actually implement back-edges as operators of their own,
+  // which would remove the need for much special-casing, including here,
+  // we could probably consistently let PropagateFixedSizes handle state
+  // arrays.
+  // TODO(benoitjacob): should really drop manually_create now.
+  optional bool manually_create = 4;
+}
+
 // ModelFlags encodes properties of a model that, depending on the file
 // format, may or may not be recorded in the model file. The purpose of
 // representing these properties in ModelFlags is to allow passing them
@@ -112,20 +131,6 @@ message ModelFlags {
   // the 'batch' field: at most one of these two fields can be set.
   optional bool variable_batch = 10;
 
-  message RnnState {
-    optional string state_array = 1;
-    optional string back_edge_source_array = 2;
-    optional int32 size = 3;
-    // TODO(benoitjacob): manually_create is a temporary hack:
-    // due to discrepancies between the current toco dims tracking and
-    // TensorFlow shapes, for some models we need to manually create RNN state
-    // arrays with a specified shape.
-    // Maybe we should actually implement back-edges as operators of their own,
-    // which would remove the need for much special-casing, including here,
-    // we could probably consistently let PropagateFixedSizes handle state
-    // arrays.
-    optional bool manually_create = 4;
-  }
   repeated RnnState rnn_states = 12;
 
   // Checks applied to the model, typically after toco's comprehensive
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 637287a947..078afe79d0 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -573,8 +573,10 @@ void CheckNoMissingArray(const Model& model) {
         << "Output array not found: " << output_array;
   }
   for (const auto& rnn_state : model.flags.rnn_states()) {
-    CHECK(model.arrays.count(rnn_state.state_array()));
-    CHECK(model.arrays.count(rnn_state.back_edge_source_array()));
+    if (!rnn_state.discardable()) {
+      CHECK(model.arrays.count(rnn_state.state_array()));
+      CHECK(model.arrays.count(rnn_state.back_edge_source_array()));
+    }
   }
 }
 
@@ -596,12 +598,18 @@ void FixNoMissingArray(Model* model) {
       model->GetOrCreateArray(output_array);
     }
   }
+  for (const auto& rnn_state : model->flags.rnn_states()) {
+    model->GetOrCreateArray(rnn_state.state_array());
+    model->GetOrCreateArray(rnn_state.back_edge_source_array());
+  }
 }
 
 void CheckNoOrphanedArray(const Model& model) {
   std::unordered_set<string> arrays_without_known_use;
   for (const auto& array : model.arrays) {
-    arrays_without_known_use.insert(array.first);
+    if (IsDiscardableArray(model, array.first)) {
+      arrays_without_known_use.insert(array.first);
+    }
   }
   for (const auto& op : model.operators) {
     for (const auto& input : op->inputs) {
@@ -611,6 +619,10 @@ void CheckNoOrphanedArray(const Model& model) {
       arrays_without_known_use.erase(output);
     }
   }
+  for (const auto& rnn_state : model.flags.rnn_states()) {
+    arrays_without_known_use.erase(rnn_state.state_array());
+    arrays_without_known_use.erase(rnn_state.back_edge_source_array());
+  }
   if (!arrays_without_known_use.empty()) {
     for (const auto& array : arrays_without_known_use) {
       LOG(INFO) << "Error: Orphaned array: " << array;
@@ -632,8 +644,14 @@ void FixNoOrphanedArray(Model* model) {
       arrays_without_known_use.erase(output);
     }
   }
+  for (const auto& rnn_state : model->flags.rnn_states()) {
+    arrays_without_known_use.erase(rnn_state.state_array());
+    arrays_without_known_use.erase(rnn_state.back_edge_source_array());
+  }
   for (const auto& array : arrays_without_known_use) {
-    model->arrays.erase(array);
+    if (IsDiscardableArray(*model, array)) {
+      model->arrays.erase(array);
+    }
   }
 }
 
@@ -1042,16 +1060,8 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
 
 #undef RESOLVE_MODEL_FLAG
 
-  if (model->flags.rnn_states_size() == 0) {
+  if (!model_flags.rnn_states().empty()) {
     model->flags.mutable_rnn_states()->CopyFrom(model_flags.rnn_states());
-  } else {
-    CHECK_EQ(model->flags.rnn_states_size(), model_flags.rnn_states_size());
-    for (int i = 0; i < model->flags.rnn_states_size(); i++) {
-      CHECK_EQ(model->flags.rnn_states(i).state_array(),
-               model_flags.rnn_states(i).state_array());
-      CHECK_EQ(model->flags.rnn_states(i).back_edge_source_array(),
-               model_flags.rnn_states(i).back_edge_source_array());
-    }
   }
 
   if (model->flags.model_checks_size() == 0) {
@@ -1571,11 +1581,13 @@ bool IsDiscardableArray(const Model& model, const string& array_name) {
     }
   }
   for (const auto& rnn_state : model.flags.rnn_states()) {
-    if (array_name == rnn_state.state_array()) {
-      return false;
-    }
-    if (array_name == rnn_state.back_edge_source_array()) {
-      return false;
+    if (!rnn_state.discardable()) {
+      if (array_name == rnn_state.state_array()) {
+        return false;
+      }
+      if (array_name == rnn_state.back_edge_source_array()) {
+        return false;
+      }
     }
   }
   return true;
-- 
GitLab


From 2d4c29cd6a0627fdd71a752e6bd919204c7cb8bf Mon Sep 17 00:00:00 2001
From: Mustafa Ispir <ispir@google.com>
Date: Thu, 7 Dec 2017 14:34:25 -0800
Subject: [PATCH 0773/1225] Added __str__ to ClusterSpec. This will improve the
 logs users are attaching to github issues. For example : #14942

PiperOrigin-RevId: 178296636
---
 tensorflow/python/training/server_lib.py      |  6 ++++++
 tensorflow/python/training/server_lib_test.py | 11 +++++++++++
 2 files changed, 17 insertions(+)

diff --git a/tensorflow/python/training/server_lib.py b/tensorflow/python/training/server_lib.py
index 2091eca0b9..29da67a30a 100644
--- a/tensorflow/python/training/server_lib.py
+++ b/tensorflow/python/training/server_lib.py
@@ -307,6 +307,12 @@ class ClusterSpec(object):
   def __ne__(self, other):
     return self._cluster_spec != other
 
+  def __str__(self):
+    key_values = self.as_dict()
+    string_items = [
+        repr(k) + ": " + repr(key_values[k]) for k in sorted(key_values)]
+    return "ClusterSpec({" + ", ".join(string_items) + "})"
+
   def as_dict(self):
     """Returns a dictionary from job names to their tasks.
 
diff --git a/tensorflow/python/training/server_lib_test.py b/tensorflow/python/training/server_lib_test.py
index 26aac787ed..063044f0d0 100644
--- a/tensorflow/python/training/server_lib_test.py
+++ b/tensorflow/python/training/server_lib_test.py
@@ -421,6 +421,17 @@ class ServerDefTest(test.TestCase):
 
 class ClusterSpecTest(test.TestCase):
 
+  def testStringConversion(self):
+    cluster_spec = server_lib.ClusterSpec({
+        "ps": ["ps0:1111"],
+        "worker": ["worker0:3333", "worker1:4444"]
+    })
+
+    expected_str = (
+        "ClusterSpec({'ps': ['ps0:1111'], 'worker': ['worker0:3333', "
+        "'worker1:4444']})")
+    self.assertEqual(expected_str, str(cluster_spec))
+
   def testProtoDictDefEquivalences(self):
     cluster_spec = server_lib.ClusterSpec({
         "ps": ["ps0:2222", "ps1:2222"],
-- 
GitLab


From 364b4c83729480596447088d2ee81353bbc83f08 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 7 Dec 2017 22:42:05 +0000
Subject: [PATCH 0774/1225] Fix a typo in estimators.md

A small typo `similarily` -> `similarly`

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/docs_src/programmers_guide/estimators.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md
index 6544a16f2b..8b6cbbcd17 100644
--- a/tensorflow/docs_src/programmers_guide/estimators.md
+++ b/tensorflow/docs_src/programmers_guide/estimators.md
@@ -187,7 +187,7 @@ est_inception_v3.train(input_fn=train_input_fn, steps=2000)
 Note that the names of feature columns and labels of a keras estimator come from
 the corresponding compiled keras model. For example, the input key names for
 @{$get_started/input_fn} in above `est_inception_v3` estimator can be obtained
-from `keras_inception_v3.input_names`, and similarily, the predicted output
+from `keras_inception_v3.input_names`, and similarly, the predicted output
 names can be obtained from `keras_inception_v3.output_names`.
 
 For more details, please refer to the documentation for
-- 
GitLab


From f37380b064948fb6dd45feef0e8d93130c2f9884 Mon Sep 17 00:00:00 2001
From: Akshay Agrawal <akshayka@google.com>
Date: Thu, 7 Dec 2017 15:18:46 -0800
Subject: [PATCH 0775/1225] Add tfe.py_func, a tf.py_func-like construct that
 wraps a Python function and executes it eagerly.

In particular, an EagerPyFunc op is added that wraps a Python function and executes it eagerly. The wrapped function should take Tensors as inputs and return Tensors as outputs. Because functions wrapped in an EagerPyFunc are executed eagerly, they can make use of TensorFlow operations.

EagerPyFunc should be differentiable, in principle; a gradient will be implemented and registered in a future change. Once a gradient is implemented, tfe.py_func will probably be the easiest mechanism for experimenting with custom ops.

tfe.py_func will also make it easier to translate python functions with side-effects into defun-able code.

PiperOrigin-RevId: 178303818
---
 tensorflow/contrib/eager/python/BUILD         |   1 +
 tensorflow/contrib/eager/python/tfe.py        |   3 +
 .../base_api/api_def_EagerPyFunc.pbtxt        |   8 +
 .../python_api/api_def_EagerPyFunc.pbtxt      |   4 +
 tensorflow/core/ops/script_ops.cc             |  14 ++
 tensorflow/python/BUILD                       |   4 +
 tensorflow/python/kernel_tests/BUILD          |   2 +
 .../python/kernel_tests/py_func_test.py       | 221 +++++++++++++-----
 tensorflow/python/lib/core/py_func.cc         |  72 +++++-
 tensorflow/python/lib/core/py_func.h          |  20 +-
 tensorflow/python/ops/hidden_ops.txt          |   1 +
 tensorflow/python/ops/script_ops.py           | 176 ++++++++++----
 12 files changed, 399 insertions(+), 127 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_EagerPyFunc.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_EagerPyFunc.pbtxt

diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index 6e9bb87d58..fb667cd91b 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -19,6 +19,7 @@ py_library(
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:numerics",
         "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:script_ops",
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
         "//tensorflow/python/eager:backprop",
diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py
index 1697c879de..770a7e3e7a 100644
--- a/tensorflow/contrib/eager/python/tfe.py
+++ b/tensorflow/contrib/eager/python/tfe.py
@@ -23,6 +23,7 @@ To use, at program startup, call `tfe.enable_eager_execution()`.
 @@list_devices
 @@num_gpus
 
+@@py_func
 @@defun
 @@implicit_gradients
 @@implicit_value_and_gradients
@@ -101,8 +102,10 @@ from tensorflow.python.framework.test_util import IsolateTest
 from tensorflow.python.framework.test_util import run_in_graph_and_eager_modes as run_test_in_graph_and_eager_modes
 from tensorflow.python.ops.resource_variable_ops import ResourceVariable as Variable
 from tensorflow.python.ops.variable_scope import EagerVariableStore
+from tensorflow.python.ops import script_ops
 from tensorflow.python.util.all_util import remove_undocumented
 
+py_func = script_ops.eager_py_func
 defun = function.defun
 implicit_gradients = backprop.implicit_grad
 implicit_value_and_gradients = backprop.implicit_val_and_grad
diff --git a/tensorflow/core/api_def/base_api/api_def_EagerPyFunc.pbtxt b/tensorflow/core/api_def/base_api/api_def_EagerPyFunc.pbtxt
new file mode 100644
index 0000000000..9231368e16
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_EagerPyFunc.pbtxt
@@ -0,0 +1,8 @@
+op {
+  graph_op_name: "EagerPyFunc"
+  summary: "Eagerly executes a python function to compute func(input)->output. The"
+  description: <<END
+semantics of the input, output, and attributes are the same as those for
+PyFunc.
+END
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_EagerPyFunc.pbtxt b/tensorflow/core/api_def/python_api/api_def_EagerPyFunc.pbtxt
new file mode 100644
index 0000000000..ee0f95dacb
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_EagerPyFunc.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "EagerPyFunc"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/ops/script_ops.cc b/tensorflow/core/ops/script_ops.cc
index 8197327b56..c7c594a999 100644
--- a/tensorflow/core/ops/script_ops.cc
+++ b/tensorflow/core/ops/script_ops.cc
@@ -51,4 +51,18 @@ REGISTER_OP("PyFuncStateless")
 A stateless version of PyFunc.
 )doc");
 
+REGISTER_OP("EagerPyFunc")
+    .Input("input: Tin")
+    .Output("output: Tout")
+    .Attr("token: string")
+    .Attr("Tin: list(type) >= 0")
+    .Attr("Tout: list(type) >=0")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Eagerly executes a python function to compute func(input)->output. The
+semantics of the input, output, and attributes are the same as those for
+PyFunc.
+)doc");
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 868ffcb473..e5c4347833 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -280,10 +280,14 @@ cc_library(
         ":ndarray_tensor_bridge",
         ":numpy_lib",
         ":py_util",
+        ":safe_ptr",
+        "//tensorflow/c:tf_status_helper",
+        "//tensorflow/c/eager:c_api",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:script_ops_op_lib",
+        "//tensorflow/python/eager:pywrap_tfe_lib",
         "//third_party/py/numpy:headers",
         "//util/python:python_headers",
     ],
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 0660f40300..f017004e1a 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1645,6 +1645,8 @@ cuda_py_test(
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:script_ops",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:function",
     ],
     tags = ["no_windows"],
 )
diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py
index 7ed99c1be9..92fb68820e 100644
--- a/tensorflow/python/kernel_tests/py_func_test.py
+++ b/tensorflow/python/kernel_tests/py_func_test.py
@@ -23,82 +23,93 @@ from six.moves import queue
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.client import session as session_lib
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
 
-class PyOpTest(test.TestCase):
+def np_func(x, y):
+  return np.sinh(x) + np.cosh(y)
 
-  def testBasic(self):
 
-    def my_func(x, y):
-      return np.sinh(x) + np.cosh(y)
+def matmul(x, y):
+  return math_ops.matmul(x, y)
 
-    # single type
+
+class PyFuncTest(test.TestCase):
+  """Encapsulates tests for py_func and eager_py_func."""
+
+  # ----- Tests for py_func -----
+  def testSingleType(self):
     with self.test_session():
       x = constant_op.constant(1.0, dtypes.float32)
       y = constant_op.constant(2.0, dtypes.float32)
-      z = script_ops.py_func(my_func, [x, y], dtypes.float32)
-      self.assertEqual(z.eval(), my_func(1.0, 2.0).astype(np.float32))
+      z = self.evaluate(script_ops.py_func(np_func, [x, y], dtypes.float32))
+      self.assertEqual(z, np_func(1.0, 2.0).astype(np.float32))
 
-    # scalar
+  def testScalar(self):
     with self.test_session():
       x = constant_op.constant(1.0, dtypes.float32)
       y = constant_op.constant(2.0, dtypes.float32)
-      z = script_ops.py_func(my_func, [x, y], [dtypes.float32])
-      self.assertEqual(z[0].eval(), my_func(1.0, 2.0).astype(np.float32))
+      z = self.evaluate(
+          script_ops.eager_py_func(np_func, [x, y], [dtypes.float32]))
+      self.assertEqual(z[0], np_func(1.0, 2.0).astype(np.float32))
 
-    # array
+  def testArray(self):
     with self.test_session():
       x = constant_op.constant([1.0, 2.0], dtypes.float64)
       y = constant_op.constant([2.0, 3.0], dtypes.float64)
-      z = script_ops.py_func(my_func, [x, y], [dtypes.float64])
-      self.assertAllEqual(z[0].eval(),
-                          my_func([1.0, 2.0], [2.0, 3.0]).astype(np.float64))
+      z = self.evaluate(script_ops.py_func(np_func, [x, y], [dtypes.float64]))
+      self.assertAllEqual(z[0],
+                          np_func([1.0, 2.0], [2.0, 3.0]).astype(np.float64))
 
-    # a bit exotic type (complex64)
+  def testComplexType(self):
     with self.test_session():
       x = constant_op.constant(1 + 2j, dtypes.complex64)
       y = constant_op.constant(3 + 4j, dtypes.complex64)
-      z, = script_ops.py_func(my_func, [x, y], [dtypes.complex64])
-      self.assertAllClose(z.eval(), my_func(1 + 2j, 3 + 4j))
+      z = self.evaluate(script_ops.py_func(np_func, [x, y], dtypes.complex64))
+      self.assertAllClose(z, np_func(1 + 2j, 3 + 4j))
 
-    # a bit excotic function (rfft)
+  def testRFFT(self):
     with self.test_session():
       x = constant_op.constant([1., 2., 3., 4.], dtypes.float32)
 
       def rfft(x):
         return np.fft.rfft(x).astype(np.complex64)
 
-      y, = script_ops.py_func(rfft, [x], [dtypes.complex64])
-      self.assertAllClose(y.eval(), np.fft.rfft([1., 2., 3., 4.]))
+      y = self.evaluate(script_ops.py_func(rfft, [x], dtypes.complex64))
+      self.assertAllClose(y, np.fft.rfft([1., 2., 3., 4.]))
 
-    # returns a python literal.
+  def testPythonLiteral(self):
     with self.test_session():
 
       def literal(x):
-        return 1.0 if x == 0.0 else 0.0
+        return 1.0 if float(x) == 0.0 else 0.0
 
       x = constant_op.constant(0.0, dtypes.float64)
-      y, = script_ops.py_func(literal, [x], [dtypes.float64])
-      self.assertAllClose(y.eval(), 1.0)
+      y = self.evaluate(script_ops.py_func(literal, [x], dtypes.float64))
+      self.assertAllClose(y, 1.0)
 
-    # returns a list
+  def testList(self):
     with self.test_session():
 
       def list_func(x):
         return [x, x + 1]
 
       x = constant_op.constant(0.0, dtypes.float64)
-      y, z = script_ops.py_func(list_func, [x], [dtypes.float64] * 2)
-      self.assertAllClose(y.eval(), 0.0)
-      self.assertAllClose(z.eval(), 1.0)
+      y = self.evaluate(
+          script_ops.py_func(list_func, [x], [dtypes.float64] * 2))
+      self.assertAllClose(y, [0.0, 1.0])
 
+  def testTuple(self):
     # returns a tuple
     with self.test_session():
 
@@ -106,17 +117,17 @@ class PyOpTest(test.TestCase):
         return x, x + 1
 
       x = constant_op.constant(0.0, dtypes.float64)
-      y, z = script_ops.py_func(tuple_func, [x], [dtypes.float64] * 2)
-      self.assertAllClose(y.eval(), 0.0)
-      self.assertAllClose(z.eval(), 1.0)
+      y = self.evaluate(
+          script_ops.py_func(tuple_func, [x], [dtypes.float64] * 2))
+      self.assertAllClose(y, [0.0, 1.0])
 
     # returns a tuple, Tout and inp a tuple
     with self.test_session():
       x = constant_op.constant(0.0, dtypes.float64)
-      y, z = script_ops.py_func(tuple_func, (x,), (dtypes.float64,
-                                                   dtypes.float64))
-      self.assertAllClose(y.eval(), 0.0)
-      self.assertAllClose(z.eval(), 1.0)
+      y = self.evaluate(
+          script_ops.py_func(tuple_func, (x,),
+                             (dtypes.float64, dtypes.float64)))
+      self.assertAllClose(y, [0.0, 1.0])
 
   def testStrings(self):
 
@@ -128,10 +139,12 @@ class PyOpTest(test.TestCase):
 
     with self.test_session():
       x = constant_op.constant([b"hello", b"hi"], dtypes.string)
-      y, = script_ops.py_func(read_fixed_length_numpy_strings, [],
-                              [dtypes.string])
-      z, = script_ops.py_func(read_and_return_strings, [x, y], [dtypes.string])
-      self.assertListEqual(list(z.eval()), [b"hello there", b"hi there"])
+      y = self.evaluate(
+          script_ops.py_func(read_fixed_length_numpy_strings, [],
+                             dtypes.string))
+      z = self.evaluate(
+          script_ops.py_func(read_and_return_strings, [x, y], dtypes.string))
+      self.assertAllEqual(z, [b"hello there", b"hi there"])
 
   def testStringsAreConvertedToBytes(self):
 
@@ -143,10 +156,12 @@ class PyOpTest(test.TestCase):
 
     with self.test_session():
       x = constant_op.constant(["hello", "hi"], dtypes.string)
-      y, = script_ops.py_func(read_fixed_length_numpy_strings, [],
-                              [dtypes.string])
-      z, = script_ops.py_func(read_and_return_strings, [x, y], [dtypes.string])
-      self.assertListEqual(list(z.eval()), [b"hello there", b"hi there"])
+      y = self.evaluate(
+          script_ops.py_func(read_fixed_length_numpy_strings, [],
+                             dtypes.string))
+      z = self.evaluate(
+          script_ops.py_func(read_and_return_strings, [x, y], dtypes.string))
+      self.assertAllEqual(z, [b"hello there", b"hi there"])
 
   def testObjectArraysAreConvertedToBytes(self):
 
@@ -186,16 +201,8 @@ class PyOpTest(test.TestCase):
 
   def testNoInput(self):
     with self.test_session():
-      x, = script_ops.py_func(lambda: 42.0, [], [dtypes.float64])
-      self.assertAllClose(x.eval(), 42.0)
-
-  def testCleanup(self):
-    for _ in xrange(1000):
-      g = ops.Graph()
-      with g.as_default():
-        c = constant_op.constant([1.], dtypes.float32)
-        _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32])
-    self.assertTrue(script_ops._py_funcs.size() < 100)
+      x = self.evaluate(script_ops.py_func(lambda: 42.0, [], dtypes.float64))
+      self.assertAllClose(x, 42.0)
 
   def testAlias(self):
     with self.test_session():
@@ -242,8 +249,8 @@ class PyOpTest(test.TestCase):
       # Create a numpy array aliasing a tensor and a tensor aliasing this array
       z, = script_ops.py_func(ident, [p], [dtypes.float32])
       z += 0.0  # Makes sure we release the tensor aliasing the numpy array x[0]
-                # above instead of using its memory as the return value of
-                # session.run
+      # above instead of using its memory as the return value of
+      # session.run
       self.assertEqual(0.0, z.eval(feed_dict={p: [0.0]}))
 
   def testStateful(self):
@@ -319,10 +326,10 @@ class PyOpTest(test.TestCase):
       def value(self):
         return self._value
 
-    with self.test_session() as sess:
+    with self.test_session():
       s = State()
       op = s.increment(constant_op.constant(2, dtypes.int64))
-      ret = sess.run(op)
+      ret = self.evaluate(op)
       self.assertIsNone(ret)
       self.assertAllEqual([3], s.value)
 
@@ -336,15 +343,24 @@ class PyOpTest(test.TestCase):
     with self.test_session() as sess:
       self.assertEqual(sess.run(f), [])
 
-  def _testExceptionHandling(self, py_exp, tf_exp):
+  def _testExceptionHandling(self, py_exp, tf_exp, eager=False):
 
     def raise_exception():
       raise py_exp("blah")  # pylint: disable=not-callable
 
-    f = script_ops.py_func(raise_exception, [], [])
-    with self.test_session() as sess:
+    if eager:
+      if context.in_eager_mode():
+        with self.assertRaisesRegexp(tf_exp, "blah"):
+          f = script_ops.eager_py_func(raise_exception, [], [])
+        return
+      else:
+        f = script_ops.eager_py_func(raise_exception, [], [])
+    else:
+      f = script_ops.py_func(raise_exception, [], [])
+
+    with self.test_session():
       with self.assertRaisesRegexp(tf_exp, "blah"):
-        sess.run(f)
+        self.evaluate(f)
 
   def testExceptionHandling(self):
     self._testExceptionHandling(ValueError, errors.InvalidArgumentError)
@@ -358,6 +374,89 @@ class PyOpTest(test.TestCase):
 
     self._testExceptionHandling(WeirdError, errors.UnknownError)
 
+  # ----- Tests shared by py_func and eager_py_func -----
+  def testCleanup(self):
+    for _ in xrange(1000):
+      g = ops.Graph()
+      with g.as_default():
+        c = constant_op.constant([1.], dtypes.float32)
+        _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32])
+        _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32])
+    self.assertTrue(script_ops._py_funcs.size() < 100)
+
+  # ----- Tests for eager_py_func -----
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerSingleOutputInt32(self):
+    a = array_ops.ones((3, 3), dtype=dtypes.int32)
+    x = array_ops.ones((3, 1), dtype=dtypes.int32)
+    output = script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.int32)
+    with self.test_session():
+      ret = self.evaluate(output)
+      self.assertAllEqual(ret, [[3], [3], [3]])
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerSingleOutputFloat32(self):
+    a = array_ops.ones((3, 3), dtype=dtypes.float32)
+    x = array_ops.ones((3, 1), dtype=dtypes.float32)
+    output = script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.float32)
+    with self.test_session():
+      ret = self.evaluate(output)
+      self.assertAllClose(ret, [[3.0], [3.0], [3.0]])
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerArrayOutput(self):
+    a = array_ops.ones((3, 3), dtype=dtypes.int32)
+    x = array_ops.ones((3, 1), dtype=dtypes.int32)
+    output = script_ops.eager_py_func(
+        lambda a, x: [matmul(a, x)], inp=[a, x], Tout=[dtypes.int32])
+
+    with self.test_session():
+      ret = self.evaluate(output)
+      self.assertAllEqual(ret, [[[3], [3], [3]]])
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerReturnNone(self):
+
+    def no_return_value():
+      return
+
+    output = script_ops.eager_py_func(no_return_value, inp=[], Tout=[])
+    ret = self.evaluate(output)
+    if context.in_eager_mode():
+      self.assertEquals(len(ret), 0)
+    else:
+      self.assertIsNone(ret)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerPyFuncInDefun(self):
+
+    def wrapper():
+      a = array_ops.ones((3, 3), dtype=dtypes.int32)
+      x = array_ops.ones((3, 1), dtype=dtypes.int32)
+      return script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.int32)
+
+    wrapped = function.defun(wrapper)
+    ret = self.evaluate(wrapped())
+    self.assertAllEqual(ret, [[3], [3], [3]])
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerExceptionHandling(self):
+    self._testExceptionHandling(
+        ValueError, errors.InvalidArgumentError, eager=True)
+    self._testExceptionHandling(
+        TypeError, errors.InvalidArgumentError, eager=True)
+    self._testExceptionHandling(
+        StopIteration, errors.OutOfRangeError, eager=True)
+    self._testExceptionHandling(
+        MemoryError, errors.ResourceExhaustedError, eager=True)
+    self._testExceptionHandling(
+        NotImplementedError, errors.UnimplementedError, eager=True)
+
+    class WeirdError(Exception):
+      pass
+
+    self._testExceptionHandling(WeirdError, errors.UnknownError, eager=True)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc
index a42282b055..eae1c2eea6 100644
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -18,6 +18,8 @@ limitations under the License.
 #include <array>
 
 #include "numpy/arrayobject.h"
+#include "tensorflow/c/eager/c_api.h"
+#include "tensorflow/c/tf_status_helper.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -25,8 +27,10 @@ limitations under the License.
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/python/eager/pywrap_tfe.h"
 #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
 #include "tensorflow/python/lib/core/py_util.h"
+#include "tensorflow/python/lib/core/safe_ptr.h"
 #include <Python.h>
 
 namespace tensorflow {
@@ -48,6 +52,9 @@ struct PyCall {
   // with this "token".
   string token;
 
+  // True if the call is associated with an EagerPyFunc.
+  bool eager;
+
   // Inputs and outputs of this function invocation.
   std::vector<Tensor> ins;
   std::vector<Tensor> out;
@@ -55,19 +62,26 @@ struct PyCall {
 
 // Givens the 'call', prepares the token and inputs as a python tuple
 // that is appropriate for calling the trampoline.
-Status MakeArgTuple(PyCall* call, PyObject** tuple) {
+Status MakeArgTuple(const PyCall* call, PyObject** tuple) {
   int64 n = call->ins.size();
   PyObject* lst = PyList_New(n);
   CHECK(lst);
   for (int64 i = 0; i < n; ++i) {
+    PyObject* arg = nullptr;
     const Tensor& t = call->ins[i];
-    PyObject* a = nullptr;
-    Status s = ConvertTensorToNdarray(t, &a);
-    if (!s.ok()) {
-      Py_DECREF(lst);
-      return s;
+    if (call->eager) {
+      arg = EagerTensorFromHandle(TFE_NewTensorHandle(t));
+      if (arg == nullptr) {
+        return errors::Internal("Unable to procure EagerTensor from Tensor.");
+      }
+    } else {
+      Status s = ConvertTensorToNdarray(t, &arg);
+      if (!s.ok()) {
+        Py_DECREF(lst);
+        return s;
+      }
     }
-    PyList_SetItem(lst, i, a);
+    PyList_SetItem(lst, i, arg);
   }
   *tuple = Py_BuildValue("(sN)", call->token.c_str(), lst);
   CHECK(*tuple);
@@ -133,6 +147,18 @@ bool IsSingleNone(PyObject* obj) {
   return item == Py_None;
 }
 
+// Retrieves a Tensor from `eager_tensor` and stores it in `output_tensor`.
+Status ExtractTensorFromEagerTensor(const PyObject* eager_tensor,
+                                    Tensor* output_tensor,
+                                    TF_Status* tf_status) {
+  // TODO(akshayka): Lift the restriction requiring output tensors to
+  // lie in host memory; EagerPyFunc should be able to dispatch ops on GPU
+  // tensors, so we should eventually implement a GPU kernel for EagerPyFunc.
+  *output_tensor = *TFE_TensorHandleUnderlyingTensorInHostMemory(
+      EagerTensor_Handle(eager_tensor), tf_status);
+  return StatusFromTF_Status(tf_status);
+}
+
 // Calls the registered py function through the trampoline.
 Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) {
   *out_log_on_error = true;
@@ -172,21 +198,37 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) {
     }
   }
 
-  // Process the return values and converts them to tf Tensors.
+  // Process the return values and convert them to TF Tensors.
   Status s;
   if (PyList_Check(result)) {
-    // 'result' is a list.
     call->out.clear();
     for (int i = 0; i < PyList_Size(result); ++i) {
       Tensor t;
-      s = ConvertNdarrayToTensor(PyList_GetItem(result, i), &t);
+      if (call->eager) {
+        auto tf_status = tensorflow::make_safe(TF_NewStatus());
+        s = ExtractTensorFromEagerTensor(PyList_GetItem(result, i), &t,
+                                         tf_status.get());
+      } else {
+        s = ConvertNdarrayToTensor(PyList_GetItem(result, i), &t);
+      }
+
       if (!s.ok()) {
         break;
       }
       call->out.push_back(t);
     }
+  } else if (EagerTensor_CheckExact(result) || result == Py_None) {
+    DCHECK(call->eager);
+    Tensor t;
+    if (result != Py_None) {
+      auto tf_status = tensorflow::make_safe(TF_NewStatus());
+      s = ExtractTensorFromEagerTensor(result, &t, tf_status.get());
+      if (s.ok()) {
+        call->out.push_back(t);
+      }
+    }
   } else if (PyArray_Check(result)) {
-    // 'result' is a single ndarray.
+    DCHECK(!call->eager);
     if (!IsSingleNone(result)) {
       Tensor t;
       s = ConvertNdarrayToTensor(result, &t);
@@ -375,11 +417,13 @@ class PyFuncOp : public OpKernel {
  public:
   explicit PyFuncOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("token", &token_));
+    eager_ = type_string() == "EagerPyFunc";
   }
 
   void Compute(OpKernelContext* ctx) override {
     PyCall call;
     call.token = token_;
+    call.eager = eager_;
     for (int i = 0; i < ctx->num_inputs(); ++i) {
       call.ins.push_back(ctx->input(i));
     }
@@ -418,9 +462,15 @@ class PyFuncOp : public OpKernel {
  private:
   string token_;
 
+  // True if and only if this op should execute the python function eagerly,
+  // i.e., if and only if the eager attribute is set.
+  bool eager_;
+
   TF_DISALLOW_COPY_AND_ASSIGN(PyFuncOp);
 };
+
 REGISTER_KERNEL_BUILDER(Name("PyFunc").Device(DEVICE_CPU), PyFuncOp);
 REGISTER_KERNEL_BUILDER(Name("PyFuncStateless").Device(DEVICE_CPU), PyFuncOp);
+REGISTER_KERNEL_BUILDER(Name("EagerPyFunc").Device(DEVICE_CPU), PyFuncOp);
 
 }  // end namespace tensorflow
diff --git a/tensorflow/python/lib/core/py_func.h b/tensorflow/python/lib/core/py_func.h
index 5a451d5f43..3197a7ddfa 100644
--- a/tensorflow/python/lib/core/py_func.h
+++ b/tensorflow/python/lib/core/py_func.h
@@ -24,21 +24,27 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Called by py code on initialization.
+// Called by python code on initialization.
 //
 // "trampoline" must represent a python function which has the
 // following signature:
-//   (string, list(ndarray)) -> ndarray | list(ndarray) | python scalar
+//   (string, list(ndarray)) | (string, list(EagerTensor)) ->
+//     ndarray | list(ndarray) | python scalar |
+//     EagerTensor | list(EagerTensor) | None
 //
 // The trampoline takes two arguments, the first is a string token
 // used by the python frontend's dispatching logic; the second is a
-// list of numpy ndarrays.
+// list of numpy ndarrays or EagerTensor objects. It can return a
+// single numpy ndarray, a list of numpy ndarrays, a python scalar, an
+// EagerTensor, a list of EagerTensors, or None.
 //
-// The trampoline can return a single numpy ndarray, a list of numpy
-// ndarrays, or a simply python scalar. The C++ runtime converts them,
-// if supported, back to Tensor objects.
+// PyFunc requires inputs and outputs to be ndarrays. EagerPyFunc requires
+// inputs to be a list of EagerTensors and outputs to be an EagerTensor, a list
+// of EagerTensors, or None.
 //
-// This is called by script_ops.py during its module initialization.
+// The C++ runtime converts outputs back to Tensor objects.
+//
+// This function is called by script_ops.py during its module initialization.
 //
 // TODO(zhifengc): Support distributed runtime.
 void InitializePyTrampoline(PyObject* trampoline);
diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt
index af014a7e39..b3f7c26695 100644
--- a/tensorflow/python/ops/hidden_ops.txt
+++ b/tensorflow/python/ops/hidden_ops.txt
@@ -341,6 +341,7 @@ TruncatedNormal
 # script_ops
 PyFunc
 PyFuncStateless
+EagerPyFunc
 
 # sdca_ops
 
diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py
index 2c3667dffe..c0c1ade495 100644
--- a/tensorflow/python/ops/script_ops.py
+++ b/tensorflow/python/ops/script_ops.py
@@ -29,11 +29,41 @@ import numpy as np
 import six
 
 from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.eager import context
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_script_ops
 
 
+class EagerFunc(object):
+  """A wrapper for a function owned by an EagerPyFunc."""
+
+  def __init__(self, func, Tout):
+    """Constructs an EagerFunc.
+
+    Args:
+      func: The function to wrap.
+      Tout: A list of datatypes for the output; an empty list if the output is
+            None.
+    """
+    self._func = func
+    self._out_dtypes = Tout
+
+  def __call__(self, *args, **kwargs):
+    """Passes args, kwargs to `self._func`, which is executed eagerly."""
+    with context.eager_mode():
+      ret = self._func(*args, **kwargs)
+      if isinstance(ret, (tuple, list)):
+        return [
+            ops.convert_to_tensor(x, dtype=dtype)
+            for (x, dtype) in zip(ret, self._out_dtypes)
+        ]
+      elif ret is None:
+        return ret
+      else:
+        return ops.convert_to_tensor(ret, dtype=self._out_dtypes[0])
+
+
 class FuncRegistry(object):
   """A helper class to keep track of registered py functions.
 
@@ -91,16 +121,20 @@ class FuncRegistry(object):
     if func is None:
       raise ValueError("callback %s is not found" % token)
     ret = func(*args)
-    # Strings seem to lead to a memory leak here if they're not wrapped in a
-    # list.
-    if isinstance(ret, six.binary_type):
-      ret = [ret]
-    # Ensures that we return either a single numpy array or a list of numpy
-    # arrays.
-    if isinstance(ret, (tuple, list)):
-      return [self._convert(x) for x in ret]
+
+    if isinstance(func, EagerFunc):
+      return ret
     else:
-      return self._convert(ret)
+      # Strings seem to lead to a memory leak here if they're not wrapped in a
+      # list.
+      if isinstance(ret, six.binary_type):
+        ret = [ret]
+      # Ensures that we return either a single numpy array or a list of numpy
+      # arrays.
+      if isinstance(ret, (tuple, list)):
+        return [self._convert(x) for x in ret]
+      else:
+        return self._convert(ret)
 
   def size(self):
     """Returns how many functions are currently registered."""
@@ -129,6 +163,86 @@ class CleanupFunc(object):
     _py_funcs.remove(self._token)
 
 
+def _internal_py_func(func, inp, Tout, stateful=None, eager=False, name=None):
+  """See documentation for py_func and eager_py_func."""
+
+  is_list_or_tuple = False
+  if isinstance(Tout, (list, tuple)):
+    is_list_or_tuple = True
+  else:
+    Tout = [Tout]
+
+  if eager:
+    func = EagerFunc(func, Tout)
+
+  token = _py_funcs.insert(func)
+  # We tie the registered function's lifetime with the current default graph,
+  # i.e., when the current graph is destroyed, we remove its py funcs.
+  graph = ops.get_default_graph()
+
+  # pylint: disable=protected-access
+  while isinstance(graph, function._FuncGraph):
+    # If the py_func was declared inside a _FuncGraph, its lifetime should be
+    # bound to that of the outer graph instead.
+    graph = graph._outer_graph
+
+  cleanup = CleanupFunc(token)
+
+  # TODO(zhifengc): Consider adding a Graph method to collect
+  # `cleanup` objects in one of its member.
+  if not hasattr(graph, "_cleanup_py_funcs_used_in_graph"):
+    graph._cleanup_py_funcs_used_in_graph = []
+
+  # When `graph` is destroyed, elements in _cleanup_py_funcs_used_in_graph
+  # will be destroyed and their __del__ will remove the 'token' from
+  # the funcs registry.
+  graph._cleanup_py_funcs_used_in_graph.append(cleanup)
+  # pylint: enable=protected-access
+
+  # pylint: disable=protected-access
+  if eager:
+    result = gen_script_ops._eager_py_func(
+        input=inp, token=token, Tout=Tout, name=name)
+  else:
+    if stateful:
+      result = gen_script_ops._py_func(
+          input=inp, token=token, Tout=Tout, name=name)
+    else:
+      result = gen_script_ops._py_func_stateless(
+          input=inp, token=token, Tout=Tout, name=name)
+  # pylint: enable=protected-access
+  return result if is_list_or_tuple else result[0]
+
+
+def eager_py_func(func, inp, Tout, name=None):
+  """Wraps a python function into a TensorFlow op.
+
+  When the returned op is executed, `func` is invoked with eager execution
+  enabled. Inputs are Tensor objects and func must return None or objects
+  that may be converted to Tensor objects.
+
+  This function has the same limitations as `py_func` with respect to
+  serialization and distribution.
+
+  Args:
+    func: A Python function which accepts a list of `Tensor` objects
+      having element types that match the corresponding `tf.Tensor` objects
+      in `inp` and returns a list of `Tensor` objects (or a single
+      `Tensor`, or `None`) having element types that match the
+      corresponding values in `Tout`.
+    inp: A list of `Tensor` objects.
+    Tout: A list or tuple of tensorflow data types or a single tensorflow data
+      type if there is only one, indicating what `func` returns; an empty list
+      if no value is returned (i.e., if the return value is `None`).
+    name: A name for the operation (optional).
+
+  Returns:
+    A list of `Tensor` or a single `Tensor` which `func` computes; an empty list
+    if `func` returns None.
+  """
+  return _internal_py_func(func=func, inp=inp, Tout=Tout, eager=True, name=name)
+
+
 def py_func(func, inp, Tout, stateful=True, name=None):
   """Wraps a python function and uses it as a TensorFlow op.
 
@@ -182,46 +296,12 @@ def py_func(func, inp, Tout, stateful=True, name=None):
   Returns:
     A list of `Tensor` or a single `Tensor` which `func` computes.
   """
-  token = _py_funcs.insert(func)
-  # We tie the registered function's life-time with the current
-  # default graph. I.e., when the current graph is destroyed, we
-  # should remove its py funcs.
-  g = ops.get_default_graph()
-
-  # pylint: disable=protected-access
-  while isinstance(g, function._FuncGraph):
-    # If the py_func was declared inside a _FuncGraph, its lifetime should be
-    # bound to that of the outer graph instead.
-    g = g._outer_graph
-
-  cleanup = CleanupFunc(token)
-
-  # TODO(zhifengc): Consider adding a Graph method to collect
-  # `cleanup` objects in one of its member.
-  if not hasattr(g, "_cleanup_py_funcs_used_in_graph"):
-    g._cleanup_py_funcs_used_in_graph = []
-
-  # When g is destroyed, elements in _cleanup_py_funcs_used_in_graph
-  # will be destroyed and their __del__ will remove the 'token' from
-  # the funcs registry.
-  g._cleanup_py_funcs_used_in_graph.append(cleanup)
-  # pylint: enable=protected-access
-
-  if isinstance(Tout, (list, tuple)):
-    is_list_or_tuple = True
-  else:
-    Tout = [Tout]
-    is_list_or_tuple = False
-  # pylint: disable=protected-access
-  if stateful:
-    result = gen_script_ops._py_func(
-        input=inp, token=token, Tout=Tout, name=name)
-  else:
-    result = gen_script_ops._py_func_stateless(
-        input=inp, token=token, Tout=Tout, name=name)
-  # pylint: enable=protected-access
-  return result if is_list_or_tuple else result[0]
+  return _internal_py_func(
+      func=func, inp=inp, Tout=Tout, stateful=stateful, eager=False, name=name)
 
 
+# TODO(akshayka): PyFuncs where the 'eager' attribute is set to True should be
+# differentiable, i.e., the gradient of PyFunc should propagate Nones if the
+# eager attribute is not set, and otherwise, it should return the gradient.
 ops.NotDifferentiable("PyFunc")
 ops.NotDifferentiable("PyFuncStateless")
-- 
GitLab


From 51fa3f7fef62339fe8f207a3e83048f9b4c2f0e5 Mon Sep 17 00:00:00 2001
From: Mustafa Ispir <ispir@google.com>
Date: Thu, 7 Dec 2017 15:28:37 -0800
Subject: [PATCH 0776/1225] Added unknown rank support to dense to sparse
 conversion withing categorical columns. Used same logic with
 contrib.layers.dense_to_sparse.

PiperOrigin-RevId: 178305360
---
 .../python/feature_column/feature_column.py   | 41 +++++++--------
 .../feature_column/feature_column_test.py     | 52 ++++++++++---------
 2 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 80c36dc4aa..060fa640d5 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -1988,29 +1988,26 @@ def _to_sparse_input(input_tensor, ignore_value=None):
   if isinstance(input_tensor, sparse_tensor_lib.SparseTensor):
     return input_tensor
   with ops.name_scope(None, 'to_sparse_input', (input_tensor, ignore_value,)):
-    input_rank = input_tensor.get_shape().ndims
-    if input_rank is None:
-      # TODO(b/32318825): Implement dense_to_sparse_tensor for undefined rank.
-      raise ValueError('Undefined input_tensor shape.')
     if ignore_value is None:
-      ignore_value = '' if input_tensor.dtype == dtypes.string else -1
-    dense_shape = math_ops.cast(array_ops.shape(input_tensor), dtypes.int64)
-    indices = array_ops.where(math_ops.not_equal(
-        input_tensor, math_ops.cast(ignore_value, input_tensor.dtype)))
-    # Flattens the tensor and indices for use with gather.
-    flat_tensor = array_ops.reshape(input_tensor, [-1])
-    flat_indices = indices[:, input_rank - 1]
-    # Computes the correct flattened indices for 2d (or higher) tensors.
-    if input_rank > 1:
-      higher_dims = indices[:, :input_rank - 1]
-      shape_offsets = array_ops.stack(
-          _shape_offsets(array_ops.unstack(dense_shape)[1:]))
-      offsets = math_ops.reduce_sum(
-          math_ops.multiply(higher_dims, shape_offsets),
-          reduction_indices=[1])
-      flat_indices = math_ops.add(flat_indices, offsets)
-    values = array_ops.gather(flat_tensor, flat_indices)
-    return sparse_tensor_lib.SparseTensor(indices, values, dense_shape)
+      if input_tensor.dtype == dtypes.string:
+        # Exception due to TF strings are converted to numpy objects by default.
+        ignore_value = ''
+      elif input_tensor.dtype.is_integer:
+        ignore_value = -1  # -1 has a special meaning of missing feature
+      else:
+        # NOTE: `as_numpy_dtype` is a property, so with the parentheses this is
+        # constructing a new numpy object of the given type, which yields the
+        # default value for that type.
+        ignore_value = input_tensor.dtype.as_numpy_dtype()
+    ignore_value = math_ops.cast(
+        ignore_value, input_tensor.dtype, name='ignore_value')
+    indices = array_ops.where(
+        math_ops.not_equal(input_tensor, ignore_value), name='indices')
+    return sparse_tensor_lib.SparseTensor(
+        indices=indices,
+        values=array_ops.gather_nd(input_tensor, indices, name='values'),
+        dense_shape=array_ops.shape(
+            input_tensor, out_type=dtypes.int64, name='dense_shape'))
 
 
 def _clean_feature_columns(feature_columns):
diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py
index 5d9849951b..3651c41720 100644
--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -1650,8 +1650,9 @@ class LinearModelTest(test.TestCase):
         indices=((0,), (1,)),
         values=('sedan', 'hardtop'),
         dense_shape=(2,))
+    country_data = np.array(['US', 'CA'])
 
-    net = fc.linear_model(features, [price_buckets, body_style])
+    net = fc.linear_model(features, [price_buckets, body_style, country])
     bias = get_linear_model_bias()
     price_buckets_var = get_linear_model_column_var(price_buckets)
     body_style_var = get_linear_model_column_var(body_style)
@@ -1660,15 +1661,14 @@ class LinearModelTest(test.TestCase):
       sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
       sess.run(bias.assign([5.]))
 
-      self.assertAllClose(
-          [[10 - 1000 + 5.], [1000 - 10 + 5.]],
-          sess.run(net, feed_dict={
-              features['price']: price_data,
-              features['body-style']: body_style_data}))
-
-    # Dense categorical_column with unknown shape is not allowed.
-    with self.assertRaisesRegexp(ValueError, 'Undefined input_tensor shape.'):
-      fc.linear_model(features, [price_buckets, body_style, country])
+      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]],
+                          sess.run(
+                              net,
+                              feed_dict={
+                                  features['price']: price_data,
+                                  features['body-style']: body_style_data,
+                                  features['country']: country_data
+                              }))
 
   def test_with_rank_0_feature(self):
     price = fc.numeric_column('price')
@@ -2119,9 +2119,9 @@ class FunctionalInputLayerTest(test.TestCase):
 
   def test_with_1d_unknown_shape_sparse_tensor(self):
     embedding_values = (
-        (1., 2., 3., 4., 5.),  # id 0
-        (6., 7., 8., 9., 10.),  # id 1
-        (11., 12., 13., 14., 15.)  # id 2
+        (1., 2.),  # id 0
+        (6., 7.),  # id 1
+        (11., 12.)  # id 2
     )
     def _initializer(shape, dtype, partition_info):
       del shape, dtype, partition_info
@@ -2138,8 +2138,8 @@ class FunctionalInputLayerTest(test.TestCase):
     # embedded_body_style has 5 dims in input_layer.
     country = fc.categorical_column_with_vocabulary_list(
         'country', vocabulary_list=['US', 'JP', 'CA'])
-    embedded_country = fc.embedding_column(country, dimension=5,
-                                           initializer=_initializer)
+    embedded_country = fc.embedding_column(
+        country, dimension=2, initializer=_initializer)
 
     # Provides 1-dim tensor and dense tensor.
     features = {
@@ -2157,22 +2157,24 @@ class FunctionalInputLayerTest(test.TestCase):
         indices=((0,), (1,)),
         values=('sedan', 'hardtop'),
         dense_shape=(2,))
+    country_data = np.array([['US'], ['CA']])
 
-    # Dense categorical_column with unknown shape is not allowed.
-    with self.assertRaisesRegexp(ValueError, 'Undefined input_tensor shape.'):
-      fc.input_layer(features, [price, one_hot_body_style, embedded_country])
-
-    net = fc.input_layer(features, [price, one_hot_body_style])
-    self.assertEqual(1 + 3, net.shape[1])
+    net = fc.input_layer(features,
+                         [price, one_hot_body_style, embedded_country])
+    self.assertEqual(1 + 3 + 2, net.shape[1])
     with _initialized_session() as sess:
 
       # Each row is formed by concatenating `embedded_body_style`,
       # `one_hot_body_style`, and `price` in order.
       self.assertAllEqual(
-          [[0., 0., 1., 11.], [1., 0., 0., 12.]],
-          sess.run(net, feed_dict={
-              features['price']: price_data,
-              features['body-style']: body_style_data}))
+          [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]],
+          sess.run(
+              net,
+              feed_dict={
+                  features['price']: price_data,
+                  features['body-style']: body_style_data,
+                  features['country']: country_data
+              }))
 
   def test_with_rank_0_feature(self):
     # price has 1 dimension in input_layer
-- 
GitLab


From 6bb91a0712aa9e312fafb8a81bf7d891b6e064dc Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 7 Dec 2017 15:36:40 -0800
Subject: [PATCH 0777/1225] Make import_graph_def import functions with C API
 enabled.

PiperOrigin-RevId: 178306667
---
 tensorflow/python/framework/importer.py      | 16 ++++++++++++++++
 tensorflow/python/framework/importer_test.py |  4 ----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index 860e3fe715..62765aff00 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -462,6 +462,22 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
 
     _ProcessNewOps(graph)
 
+    # Create _DefinedFunctions for any imported functions.
+    #
+    # We do this by creating _DefinedFunctions directly from `graph_def`, and
+    # adding them to `graph`. Adding an existing function to a TF_Graph is a
+    # no-op, so this only has the effect of updating the Python state (usually
+    # _DefinedFunction.add_to_graph also adds the function to the TF_Graph).
+    #
+    # TODO(skyewm): fetch the TF_Functions directly from the TF_Graph
+    # TODO(skyewm): avoid sending serialized FunctionDefs back to the TF_Graph
+    if graph_def.library and graph_def.library.function:
+      # pylint: disable=protected-access
+      functions = function._from_library(graph_def.library)
+      for f in functions:
+        f.add_to_graph(graph)
+      # pylint: enable=protected-access
+
     # TODO(skyewm): error if unused input map key
 
     if return_elements is None:
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index ee3cfbbd05..7bf13ba93d 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -1110,8 +1110,6 @@ class ImportGraphDefTest(test.TestCase):
         self.assertEqual(987, a[0].get_attr("default_int"))
 
   def testFunctions(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     dtype = dtypes.float32
     @function.Defun(dtype, dtype, dtype, dtype)
     def Grad(x, y, dout1, dout2):  # pylint: disable=unused-argument
@@ -1189,8 +1187,6 @@ class ImportGraphDefTest(test.TestCase):
         self.assertEqual(sess.run("outer:0"), 21)
 
   def testImportInsideDefun(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     g = ops.Graph()
     with g.as_default():
       @function.Defun()
-- 
GitLab


From 7b0458a78922e7694987db2f49d7a74fea8ce2ca Mon Sep 17 00:00:00 2001
From: Ian Langmore <langmore@google.com>
Date: Thu, 7 Dec 2017 15:39:40 -0800
Subject: [PATCH 0778/1225] BUGFIX:  Ensure that rejected states don't
 propagate NaN.  Make float64 work.

PiperOrigin-RevId: 178307112
---
 .../bayesflow/python/kernel_tests/hmc_test.py | 69 ++++++++++++++++++-
 .../contrib/bayesflow/python/ops/hmc_impl.py  | 42 ++++++-----
 2 files changed, 92 insertions(+), 19 deletions(-)

diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
index 1ab819d797..cbc66b6dc1 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for Hamiltonian Monte Carlo.
-"""
+"""Tests for Hamiltonian Monte Carlo."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -27,6 +26,7 @@ from tensorflow.contrib.bayesflow.python.ops import hmc
 
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
@@ -46,6 +46,9 @@ class HMCTest(test.TestCase):
     random_seed.set_random_seed(10003)
     np.random.seed(10003)
 
+  def assertAllFinite(self, x):
+    self.assertAllEqual(np.ones_like(x).astype(bool), np.isfinite(x))
+
   def _log_gamma_log_prob(self, x, event_dims=()):
     """Computes log-pdf of a log-gamma random variable.
 
@@ -375,5 +378,67 @@ class HMCTest(test.TestCase):
       self.assertAllEqual(initial_x_val, updated_x_val)
       self.assertEqual(acceptance_probs_val, 0.)
 
+  def testNanFromGradsDontPropagate(self):
+    """Test that update with NaN gradients does not cause NaN in results."""
+    def _nan_log_prob_with_nan_gradient(x):
+      return np.nan * math_ops.reduce_sum(x)
+
+    with self.test_session() as sess:
+      initial_x = math_ops.linspace(0.01, 5, 10)
+      updated_x, acceptance_probs, new_log_prob, new_grad = hmc.kernel(
+          2., 5, initial_x, _nan_log_prob_with_nan_gradient, [0])
+      initial_x_val, updated_x_val, acceptance_probs_val = sess.run(
+          [initial_x, updated_x, acceptance_probs])
+
+      logging.vlog(1, 'initial_x = {}'.format(initial_x_val))
+      logging.vlog(1, 'updated_x = {}'.format(updated_x_val))
+      logging.vlog(1, 'acceptance_probs = {}'.format(acceptance_probs_val))
+
+      self.assertAllEqual(initial_x_val, updated_x_val)
+      self.assertEqual(acceptance_probs_val, 0.)
+
+      self.assertAllFinite(
+          gradients_impl.gradients(updated_x, initial_x)[0].eval())
+      self.assertTrue(
+          gradients_impl.gradients(new_grad, initial_x)[0] is None)
+
+      # Gradients of the acceptance probs and new log prob are not finite.
+      _ = new_log_prob  # Prevent unused arg error.
+      # self.assertAllFinite(
+      #     gradients_impl.gradients(acceptance_probs, initial_x)[0].eval())
+      # self.assertAllFinite(
+      #     gradients_impl.gradients(new_log_prob, initial_x)[0].eval())
+
+  def testChainWorksIn64Bit(self):
+    def log_prob(x):
+      return - math_ops.reduce_sum(x * x, axis=-1)
+    states, acceptance_probs = hmc.chain(
+        n_iterations=10,
+        step_size=np.float64(0.01),
+        n_leapfrog_steps=10,
+        initial_x=np.zeros(5).astype(np.float64),
+        target_log_prob_fn=log_prob,
+        event_dims=[-1])
+    with self.test_session() as sess:
+      states_, acceptance_probs_ = sess.run([states, acceptance_probs])
+    self.assertEqual(np.float64, states_.dtype)
+    self.assertEqual(np.float64, acceptance_probs_.dtype)
+
+  def testChainWorksIn16Bit(self):
+    def log_prob(x):
+      return - math_ops.reduce_sum(x * x, axis=-1)
+    states, acceptance_probs = hmc.chain(
+        n_iterations=10,
+        step_size=np.float16(0.01),
+        n_leapfrog_steps=10,
+        initial_x=np.zeros(5).astype(np.float16),
+        target_log_prob_fn=log_prob,
+        event_dims=[-1])
+    with self.test_session() as sess:
+      states_, acceptance_probs_ = sess.run([states, acceptance_probs])
+    self.assertEqual(np.float16, states_.dtype)
+    self.assertEqual(np.float16, acceptance_probs_.dtype)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
index da788be3db..5685a942e9 100644
--- a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
+++ b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
@@ -27,6 +27,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -174,9 +175,11 @@ def chain(n_iterations, step_size, n_leapfrog_steps, initial_x,
 
     potential_and_grad = _make_potential_and_grad(target_log_prob_fn)
     potential, grad = potential_and_grad(initial_x)
-    return functional_ops.scan(body, array_ops.zeros(n_iterations),
-                               (initial_x, array_ops.zeros(non_event_shape),
-                                -potential, -grad))[:2]
+    return functional_ops.scan(
+        body, array_ops.zeros(n_iterations, dtype=initial_x.dtype),
+        (initial_x,
+         array_ops.zeros(non_event_shape, dtype=initial_x.dtype),
+         -potential, -grad))[:2]
 
 
 def ais_chain(n_iterations, step_size, n_leapfrog_steps, initial_x,
@@ -298,8 +301,9 @@ def ais_chain(n_iterations, step_size, n_leapfrog_steps, initial_x,
       return updated_x, acceptance_probs, w
 
     x, acceptance_probs, w = functional_ops.scan(
-        _body, beta_series, (initial_x, array_ops.zeros(non_event_shape),
-                             array_ops.zeros(non_event_shape)))
+        _body, beta_series,
+        (initial_x, array_ops.zeros(non_event_shape, dtype=initial_x.dtype),
+         array_ops.zeros(non_event_shape, dtype=initial_x.dtype)))
   return w[-1], x[-1], acceptance_probs[-1]
 
 
@@ -446,9 +450,10 @@ def kernel(step_size, n_leapfrog_steps, x, target_log_prob_fn, event_dims=(),
   """
   with ops.name_scope(name, 'hmc_kernel', [step_size, n_leapfrog_steps, x]):
     potential_and_grad = _make_potential_and_grad(target_log_prob_fn)
+    x = ops.convert_to_tensor(x, name='x')
 
     x_shape = array_ops.shape(x)
-    m = random_ops.random_normal(x_shape)
+    m = random_ops.random_normal(x_shape, dtype=x.dtype)
 
     kinetic_0 = 0.5 * math_ops.reduce_sum(math_ops.square(m), event_dims)
 
@@ -475,23 +480,26 @@ def kernel(step_size, n_leapfrog_steps, x, target_log_prob_fn, event_dims=(),
         array_ops.fill(array_ops.shape(energy_change),
                        energy_change.dtype.as_numpy_dtype(np.inf)),
         energy_change)
-    acceptance_probs = math_ops.exp(math_ops.minimum(0., -energy_change))
-    accepted = math_ops.cast(
-        random_ops.random_uniform(array_ops.shape(acceptance_probs)) <
-        acceptance_probs, log_potential_0.dtype)
-    new_log_prob = (-log_potential_0 * (1. - accepted) -
-                    log_potential_1 * accepted)
+    acceptance_probs = math_ops.exp(math_ops.minimum(-energy_change, 0.))
+    accepted = (
+        random_ops.random_uniform(
+            array_ops.shape(acceptance_probs), dtype=x.dtype)
+        < acceptance_probs)
+    new_log_prob = -array_ops.where(accepted, log_potential_1, log_potential_0)
 
     # TODO(b/65738010): This should work, but it doesn't for now.
     # reduced_shape = math_ops.reduced_shape(x_shape, event_dims)
     reduced_shape = array_ops.shape(math_ops.reduce_sum(x, event_dims,
                                                         keep_dims=True))
     accepted = array_ops.reshape(accepted, reduced_shape)
-    accepted = math_ops.cast(accepted, x.dtype)
-    new_x = x * (1. - accepted) + new_x * accepted
-    accepted = math_ops.cast(accepted, accepted.dtype)
-    new_grad = -grad_0 * (1. - accepted) - grad_1 * accepted
-
+    accepted = math_ops.logical_or(
+        accepted, math_ops.cast(array_ops.zeros_like(x), dtypes.bool))
+    new_x = array_ops.where(accepted, new_x, x)
+    new_grad = -array_ops.where(accepted, grad_1, grad_0)
+
+  # TODO(langmore) Gradients of acceptance_probs and new_log_prob with respect
+  # to initial_x will propagate NaNs (see testNanFromGradsDontPropagate).  This
+  # should be fixed.
   return new_x, acceptance_probs, new_log_prob, new_grad
 
 
-- 
GitLab


From 029109b4e1cfb4ccb43d0ef053261f3e12983aaf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 15:53:53 -0800
Subject: [PATCH 0779/1225] [XLA] Make hlo_test_base support running and
 comparing an hlo module.

Also,
- Add examples that shows how to use the hlo_test_base as to create text/file-based testcases.
- Change the behavior of GetDefaultPlatform: when only one platform is present, returns that one; when two platforms are present and one of them is the interpreter, returns the other one. This is because some tests included both hlo_test_base and client_library_test_base, but now the hlo_test_base is linked with interpreter by default, which makes client_library_test_base fail getting the default platform.

PiperOrigin-RevId: 178309022
---
 tensorflow/compiler/xla/service/hlo_runner.cc |   8 +
 tensorflow/compiler/xla/service/hlo_runner.h  |  12 +-
 .../compiler/xla/service/platform_util.cc     |  92 +++++--
 .../compiler/xla/service/platform_util.h      |  16 +-
 tensorflow/compiler/xla/tests/BUILD           |  44 ++++
 .../compiler/xla/tests/hlo_test_base.cc       | 226 +++++++++++++++++-
 tensorflow/compiler/xla/tests/hlo_test_base.h | 145 ++++++++++-
 .../xla/tests/isolated_convolution.hlo        |   8 +
 .../compiler/xla/tests/literal_test_util.cc   |  55 ++++-
 .../compiler/xla/tests/literal_test_util.h    |  18 ++
 .../compiler/xla/tests/sample_file_test.cc    |  51 ++++
 .../compiler/xla/tests/sample_text_test.cc    |  66 +++++
 12 files changed, 697 insertions(+), 44 deletions(-)
 create mode 100644 tensorflow/compiler/xla/tests/isolated_convolution.hlo
 create mode 100644 tensorflow/compiler/xla/tests/sample_file_test.cc
 create mode 100644 tensorflow/compiler/xla/tests/sample_text_test.cc

diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index 6b6d48233a..4a7caf3ebd 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc
@@ -39,6 +39,14 @@ namespace se = ::perftools::gputools;
 
 namespace xla {
 
+/*static*/ StatusOr<std::unique_ptr<HloModule>>
+HloRunner::CreateModuleFromString(const tensorflow::StringPiece hlo_string,
+                                  const DebugOptions& debug_options) {
+  HloModuleConfig config;
+  config.set_debug_options(debug_options);
+  return tools::Parse(hlo_string, config);
+}
+
 /*static*/ StatusOr<std::unique_ptr<HloModule>>
 HloRunner::ReadModuleFromHloProtoFile(const std::string& filename,
                                       const DebugOptions& debug_options) {
diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h
index 95cddafc91..a65c66fd4b 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.h
+++ b/tensorflow/compiler/xla/service/hlo_runner.h
@@ -35,7 +35,8 @@ namespace xla {
 
 // A base class for running an HloModule. This executes the given HloModule on a
 // certain backend directly without using the client interface. HloModule can be
-// explicitly built, or loaded from a serialization file (e.g., hlo proto file).
+// explicitly built, or loaded from a serialization file (e.g., hlo proto
+// file), or parsed from a hlo textual IR string.
 class HloRunner {
  public:
   HloRunner();
@@ -44,6 +45,12 @@ class HloRunner {
 
   ~HloRunner();
 
+  // Converts an HloModule from the given hlo textual IR string (in
+  // HloModule::ToString format).
+  static StatusOr<std::unique_ptr<HloModule>> CreateModuleFromString(
+      const tensorflow::StringPiece hlo_string,
+      const DebugOptions& debug_options);
+
   // Reads the proto file in xla.HloProto format, creates and returns the
   // HloModule. Will try to parse the filename as binary proto, then try as
   // text proto if that fails.
@@ -65,7 +72,8 @@ class HloRunner {
   // Executes the given module with given literals as input and returns the
   // result as a Literal. The LiteralPtr type accepts Literal* or
   // std::unique_ptr<Literal>.
-  // If run_hlo_passes is true, the module will be executed without Hlo
+  //
+  // If run_hlo_passes is false, the module will be executed without Hlo
   // optimization.
   template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Execute(
diff --git a/tensorflow/compiler/xla/service/platform_util.cc b/tensorflow/compiler/xla/service/platform_util.cc
index 63f3bfb36c..aa974ee61a 100644
--- a/tensorflow/compiler/xla/service/platform_util.cc
+++ b/tensorflow/compiler/xla/service/platform_util.cc
@@ -33,10 +33,32 @@ namespace se = ::perftools::gputools;
 
 namespace xla {
 
+using tensorflow::str_util::Lowercase;
+
 // Minimum supported CUDA compute capability is 3.5.
 constexpr int kMinCudaComputeCapabilityMajor = 3;
 constexpr int kMinCudaComputeCapabilityMinor = 5;
 
+// The name of the interpreter platform.
+constexpr char kInterpreter[] = "interpreter";
+
+namespace {
+
+string CanonicalPlatformName(const string& name) {
+  string platform_str = Lowercase(name);
+  // "cpu" and "host" mean the same thing.
+  if (platform_str == "cpu") {
+    platform_str = "host";
+  }
+  // "gpu" and "cuda" mean the same thing.
+  if (platform_str == "gpu") {
+    platform_str = "cuda";
+  }
+  return platform_str;
+}
+
+}  // namespace
+
 /* static */ StatusOr<std::vector<se::Platform*>>
 PlatformUtil::GetSupportedPlatforms() {
   se::MultiPlatformManager::PlatformMap platform_map;
@@ -78,7 +100,7 @@ PlatformUtil::GetSupportedPlatforms() {
   return platforms;
 }
 
-/* static */ StatusOr<se::Platform*> PlatformUtil::GetDefaultPlatform() {
+/* static */ StatusOr<se::Platform*> PlatformUtil::GetSolePlatform() {
   TF_ASSIGN_OR_RETURN(auto platforms, GetSupportedPlatforms());
   if (platforms.empty()) {
     return NotFound("no platforms found");
@@ -87,26 +109,42 @@ PlatformUtil::GetSupportedPlatforms() {
   }
 
   // Multiple platforms present and we can't pick a reasonable default.
-  auto l = [](string* out, const se::Platform* p) { out->append(p->Name()); };
-  string platforms_string = tensorflow::str_util::Join(platforms, ", ", l);
+  string platforms_string = tensorflow::str_util::Join(
+      platforms, ", ",
+      [](string* out, const se::Platform* p) { out->append(p->Name()); });
   return InvalidArgument(
       "must specify platform because more than one platform found: %s",
       platforms_string.c_str());
 }
 
-/*static*/ StatusOr<se::Platform*> PlatformUtil::GetPlatform(
-    const string& platform_name) {
-  using tensorflow::str_util::Lowercase;
-  string platform_str = Lowercase(platform_name);
-  // "cpu" and "host" mean the same thing.
-  if (platform_str == "cpu") {
-    platform_str = "host";
-  }
-  // "gpu" and "cuda" mean the same thing.
-  if (platform_str == "gpu") {
-    platform_str = "cuda";
+/* static */ StatusOr<se::Platform*> PlatformUtil::GetDefaultPlatform() {
+  TF_ASSIGN_OR_RETURN(auto platforms, GetSupportedPlatforms());
+  if (platforms.empty()) {
+    return NotFound("no platforms found");
+  } else if (platforms.size() == 1) {
+    return platforms[0];
+  } else if (platforms.size() == 2) {
+    for (int i = 0; i < 2; i++) {
+      if (Lowercase(platforms[i]->Name()) == kInterpreter &&
+          Lowercase(platforms[1 - i]->Name()) != kInterpreter) {
+        return platforms[1 - i];
+      }
+    }
   }
 
+  // Multiple platforms present and we can't pick a reasonable default.
+  string platforms_string = tensorflow::str_util::Join(
+      platforms, ", ",
+      [](string* out, const se::Platform* p) { out->append(p->Name()); });
+  return InvalidArgument(
+      "must specify platform because more than one platform (except for the "
+      "interpreter platform) found: %s",
+      platforms_string.c_str());
+}
+
+/*static*/ StatusOr<se::Platform*> PlatformUtil::GetPlatform(
+    const string& platform_name) {
+  string platform_str = CanonicalPlatformName(platform_name);
   TF_ASSIGN_OR_RETURN(auto platforms, PlatformUtil::GetSupportedPlatforms());
   for (se::Platform* platform : platforms) {
     if (Lowercase(platform->Name()) == platform_str) {
@@ -116,6 +154,32 @@ PlatformUtil::GetSupportedPlatforms() {
   return InvalidArgument("platform %s not found", platform_name.c_str());
 }
 
+/*static*/ StatusOr<se::Platform*> PlatformUtil::GetPlatformExceptFor(
+    const string& platform_name) {
+  string platform_str = CanonicalPlatformName(platform_name);
+
+  TF_ASSIGN_OR_RETURN(auto platforms, PlatformUtil::GetSupportedPlatforms());
+  std::vector<se::Platform*> matched;
+  for (se::Platform* platform : platforms) {
+    if (Lowercase(platform->Name()) != platform_name) {
+      matched.push_back(platform);
+    }
+  }
+  if (matched.empty()) {
+    return InvalidArgument("unable to find platform that is not %s",
+                           platform_name.c_str());
+  }
+  if (matched.size() == 1) {
+    return matched[0];
+  }
+  string matched_string = tensorflow::str_util::Join(
+      matched, ", ",
+      [](string* out, const se::Platform* p) { out->append(p->Name()); });
+  return InvalidArgument(
+      "found multiple platforms %s, but expected one platform except for %s",
+      matched_string.c_str(), platform_name.c_str());
+}
+
 // Returns whether the device underlying the given StreamExecutor is supported
 // by XLA.
 static bool IsDeviceSupported(se::StreamExecutor* executor) {
diff --git a/tensorflow/compiler/xla/service/platform_util.h b/tensorflow/compiler/xla/service/platform_util.h
index a59d4ffe87..69188820a7 100644
--- a/tensorflow/compiler/xla/service/platform_util.h
+++ b/tensorflow/compiler/xla/service/platform_util.h
@@ -37,16 +37,28 @@ class PlatformUtil {
   static StatusOr<std::vector<perftools::gputools::Platform*>>
   GetSupportedPlatforms();
 
-  // Convenience function which returns the default supported platform. If
+  // Convenience function which returns the default supported platform for
+  // tests. If exactly one supported platform is present, then this platform is
+  // the default platform. If exactly two platforms are present and one of them
+  // is the interpreter platform, then the other platform is the default
+  // platform. Otherwise returns an error.
+  static StatusOr<perftools::gputools::Platform*> GetDefaultPlatform();
+
+  // Convenience function which returns the sole supported platform. If
   // exactly one supported platform is present, then this platform is the
   // default platform. Otherwise returns an error.
-  static StatusOr<perftools::gputools::Platform*> GetDefaultPlatform();
+  static StatusOr<perftools::gputools::Platform*> GetSolePlatform();
 
   // Returns the platform according to the given name. Returns error if there is
   // no such platform.
   static StatusOr<perftools::gputools::Platform*> GetPlatform(
       const string& platform_name);
 
+  // Returns exactly one platform that does not have given name. Returns error
+  // if there is no such platform, or there are multiple such platforms.
+  static StatusOr<perftools::gputools::Platform*> GetPlatformExceptFor(
+      const string& platform_name);
+
   // Returns a vector of StreamExecutors for the given platform. The vector is
   // indexed by device ordinal (device numbering used by StreamExecutor). If an
   // element is nullptr, then the device is present by not supported by XLA.
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index b99e046b9b..24f4a9d05a 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -105,7 +105,9 @@ cc_library(
     hdrs = ["hlo_test_base.h"],
     deps = [
         ":literal_test_util",
+        ":test_utils",
         "//tensorflow/compiler/xla:shape_layout",
+        "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
@@ -115,6 +117,9 @@ cc_library(
         "//tensorflow/compiler/xla/service:computation_layout",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_runner",
+        "//tensorflow/compiler/xla/service:interpreter_plugin",  # reference backend
+        "//tensorflow/compiler/xla/service:platform_util",
+        "//tensorflow/compiler/xla/tools/parser:hlo_parser",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
         "//tensorflow/core:test",
@@ -1678,6 +1683,45 @@ xla_test(
     ],
 )
 
+# A demo of textual IR based test.
+xla_test(
+    name = "sample_text_test",
+    srcs = ["sample_text_test.cc"],
+    # You can leave this empty if you want to test all supported backends.
+    backends = [
+        "cpu",
+        "gpu",
+    ],
+    deps = [
+        ":hlo_test_base",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+    ],
+)
+
+# A demo of test that loads an hlo module from a file and compares results on gpu and cpu.
+tf_cc_test(
+    name = "sample_file_test",
+    srcs = ["sample_file_test.cc"],
+    data = ["isolated_convolution.hlo"],
+    tags = ["requires-gpu-sm35"],
+    deps = [
+        ":hlo_test_base",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/service:cpu_plugin",  # reference backend
+        "//tensorflow/compiler/xla/service:gpu_plugin",  # test backend
+        "//tensorflow/compiler/xla/service:platform_util",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc
index d73c05ff92..e7a18828db 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc
@@ -15,13 +15,22 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 
+#include <memory>
 #include <set>
 #include <string>
 #include <utility>
 
+#include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_utils.h"
+#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h"
 #include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -30,18 +39,72 @@ namespace se = ::perftools::gputools;
 
 namespace xla {
 
+namespace {
+
+using tensorflow::StringPiece;
+using tensorflow::gtl::ArraySlice;
+using tensorflow::gtl::optional;
+
+constexpr char kInterpreter[] = "interpreter";
+
+// Helper functions to get test and reference platforms.
+se::Platform* GetReferencePlatform() {
+  auto result = PlatformUtil::GetPlatform(kInterpreter);
+  TF_CHECK_OK(result.status()) << "could not get interpreter platform";
+  return result.ValueOrDie();
+}
+
+se::Platform* GetTestPlatform() {
+  auto result = PlatformUtil::GetDefaultPlatform();
+  TF_CHECK_OK(result.status()) << "could not get test platform";
+  return result.ValueOrDie();
+}
+
+bool ProgramShapesEqual(const ProgramShape& lhs, const ProgramShape& rhs) {
+  if (lhs.parameters_size() != rhs.parameters_size()) {
+    return false;
+  }
+  for (int i = 0; i < lhs.parameters_size(); i++) {
+    if (!ShapeUtil::Equal(lhs.parameters(i), rhs.parameters(i))) {
+      return false;
+    }
+  }
+  return ShapeUtil::Equal(lhs.result(), rhs.result());
+}
+
+ProgramShape GetProgramShapeWithLayout(const HloModule& module) {
+  ProgramShape program_shape;
+  const auto* entry = module.entry_computation();
+  for (const auto* param : entry->parameter_instructions()) {
+    *program_shape.add_parameters() = param->shape();
+    *program_shape.add_parameter_names() = param->name();
+  }
+  *program_shape.mutable_result() = entry->root_instruction()->shape();
+  return program_shape;
+}
+
+}  // namespace
+
+HloTestBase::HloTestBase()
+    : HloTestBase(GetTestPlatform(), GetReferencePlatform()) {}
+
+HloTestBase::HloTestBase(se::Platform* test_platform,
+                         se::Platform* reference_platform)
+    : test_runner_(test_platform), reference_runner_(reference_platform) {}
+
 /* static */
 std::unique_ptr<HloModule> HloTestBase::CreateNewModule() {
   HloModuleConfig config;
+  config.set_debug_options(GetDebugOptionsForTest());
+  return MakeUnique<HloModule>(TestName(), VersionedComputationHandle(),
+                               config);
+}
 
+/*static*/ DebugOptions HloTestBase::GetDebugOptionsForTest() {
   auto debug_options = legacy_flags::GetDebugOptionsFromFlags();
   // TODO(b/38354253): Change tests to use Parameters instead of Constants.
   debug_options.add_xla_disable_hlo_passes("constant_folding");
-
-  config.set_debug_options(debug_options);
-
-  return MakeUnique<HloModule>(TestName(), VersionedComputationHandle(),
-                               config);
+  return debug_options;
 }
 
 StatusOr<perftools::gputools::DeviceMemoryBase> HloTestBase::Execute(
@@ -49,25 +112,168 @@ StatusOr<perftools::gputools::DeviceMemoryBase> HloTestBase::Execute(
     tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
         arguments,
     Shape* result_shape) {
-  return runner_.Execute(std::move(module), arguments, result_shape);
+  return test_runner_.Execute(std::move(module), arguments, result_shape);
 }
 
 se::DeviceMemoryBase HloTestBase::TransferToDevice(const Literal& literal) {
-  return runner_.TransferToDevice(literal).ValueOrDie();
+  return test_runner_.TransferToDevice(literal).ValueOrDie();
 }
 
 std::unique_ptr<Literal> HloTestBase::TransferFromDevice(
     const Shape& shape, se::DeviceMemoryBase device_base) {
-  return runner_.TransferFromDevice(shape, device_base).ValueOrDie();
+  return test_runner_.TransferFromDevice(shape, device_base).ValueOrDie();
 }
 
 std::unique_ptr<Literal> HloTestBase::ExecuteAndTransfer(
     std::unique_ptr<HloModule> module,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
-  return runner_.ExecuteAndTransfer(std::move(module), arguments).ValueOrDie();
+  return test_runner_.ExecuteAndTransfer(std::move(module), arguments)
+      .ValueOrDie();
+}
+
+StatusOr<std::unique_ptr<HloModule>> HloTestBase::MakeReferenceModule(
+    const HloModule& test_module,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  std::unique_ptr<HloModule> reference_module = test_module.Clone();
+  const auto& program_shape = GetProgramShapeWithLayout(test_module);
+
+  if (reference_preprocessor != nullptr) {
+    reference_preprocessor(reference_module.get());
+    if (!ProgramShapesEqual(program_shape,
+                            GetProgramShapeWithLayout(*reference_module))) {
+      return InvalidArgument(
+          "reference preprocessor must not modify the program shape");
+    }
+  }
+  TF_RETURN_IF_ERROR(VerifyHloModule(*reference_runner_.backend().platform(),
+                                     reference_module.get()));
+  return std::move(reference_module);
+}
+
+template <typename LiteralPtr>
+StatusOr<::testing::AssertionResult> HloTestBase::RunAndCompareInternal(
+    std::unique_ptr<HloModule> module, const ArraySlice<LiteralPtr> arguments,
+    const optional<ErrorSpec>& error, bool run_hlo_passes,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  static_assert(
+      std::is_same<Literal*, LiteralPtr>::value ||
+          std::is_same<std::unique_ptr<Literal>, LiteralPtr>::value,
+      "The LiteralPtr type only accepts Literal* or std::unique_ptr<Literal>.");
+  TF_RETURN_IF_ERROR(
+      VerifyHloModule(*test_runner_.backend().platform(), module.get()));
+  TF_ASSIGN_OR_RETURN(auto reference_module,
+                      MakeReferenceModule(*module, reference_preprocessor));
+
+  // Execute on two backends.
+  TF_ASSIGN_OR_RETURN(
+      auto test,
+      test_runner_.Execute(std::move(module), arguments, run_hlo_passes));
+  TF_ASSIGN_OR_RETURN(auto reference,
+                      reference_runner_.Execute(std::move(reference_module),
+                                                arguments, run_hlo_passes));
+  return LiteralTestUtil::NearOrEqual(/*expected=*/*reference, /*actual=*/*test,
+                                      error);
+}
+
+template <typename LiteralPtr>
+::testing::AssertionResult HloTestBase::RunAndCompare(
+    std::unique_ptr<HloModule> module, const ArraySlice<LiteralPtr> arguments,
+    const optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto result =
+      RunAndCompareInternal(std::move(module), arguments, error,
+                            /*run_hlo_passes=*/true, reference_preprocessor);
+  if (!result.ok()) {
+    return ::testing::AssertionFailure() << result.status();
+  }
+  return result.ValueOrDie();
+}
+
+template <typename LiteralPtr>
+::testing::AssertionResult HloTestBase::RunAndCompareNoHloPasses(
+    std::unique_ptr<HloModule> module, const ArraySlice<LiteralPtr> arguments,
+    const optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto result =
+      RunAndCompareInternal(std::move(module), arguments, error,
+                            /*run_hlo_passes=*/false, reference_preprocessor);
+  if (!result.ok()) {
+    return ::testing::AssertionFailure() << result.status();
+  }
+  return result.ValueOrDie();
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompare(
+    std::unique_ptr<HloModule> module, const optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  const auto& fake_arguments =
+      MakeFakeArguments(module.get()).ConsumeValueOrDie();
+  return RunAndCompare<std::unique_ptr<Literal>>(
+      std::move(module), fake_arguments, error, reference_preprocessor);
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompareNoHloPasses(
+    std::unique_ptr<HloModule> module, const optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  const auto& fake_arguments =
+      MakeFakeArguments(module.get()).ConsumeValueOrDie();
+  return RunAndCompareNoHloPasses<std::unique_ptr<Literal>>(
+      std::move(module), fake_arguments, error, reference_preprocessor);
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompare(
+    const StringPiece hlo_string,
+    const tensorflow::gtl::optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto module_or_status =
+      HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest());
+  if (!module_or_status.ok()) {
+    return ::testing::AssertionFailure() << "failed parsing hlo textual IR";
+  }
+  return RunAndCompare(module_or_status.ConsumeValueOrDie(), error,
+                       reference_preprocessor);
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompareFromFile(
+    const string& filename, const tensorflow::gtl::optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto module_or_status =
+      HloRunner::ReadModule(filename, GetDebugOptionsForTest());
+  if (!module_or_status.ok()) {
+    return ::testing::AssertionFailure()
+           << "failed reading hlo module from file";
+  }
+  return RunAndCompare(module_or_status.ConsumeValueOrDie(), error,
+                       reference_preprocessor);
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompareNoHloPasses(
+    const StringPiece hlo_string,
+    const tensorflow::gtl::optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto module_or_status =
+      HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest());
+  if (!module_or_status.ok()) {
+    return ::testing::AssertionFailure() << "failed parsing hlo textual IR";
+  }
+  return RunAndCompareNoHloPasses(module_or_status.ConsumeValueOrDie(), error,
+                                  reference_preprocessor);
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompareNoHloPassesFromFile(
+    const string& filename, const tensorflow::gtl::optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto module_or_status =
+      HloRunner::ReadModule(filename, GetDebugOptionsForTest());
+  if (!module_or_status.ok()) {
+    return ::testing::AssertionFailure()
+           << "failed reading hlo module from file";
+  }
+  return RunAndCompareNoHloPasses(module_or_status.ConsumeValueOrDie(), error,
+                                  reference_preprocessor);
 }
 
-Backend& HloTestBase::backend() { return runner_.backend(); }
+Backend& HloTestBase::backend() { return test_runner_.backend(); }
 
 /* static */
 string HloTestBase::TestName() {
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h
index 7f068dce36..3cbbb7aa24 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.h
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.h
@@ -24,31 +24,74 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_runner.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/shape_layout.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace xla {
 
-// A base class for tests which build and run HLO code. This is a lower level of
-// abstraction than using the client interface and enables, for one, explicitly
-// building a graph of HLO instructions to run.
+// A base class for tests which build and/or run HLO code. The class includes
+// support for running an HLO module on two platforms and compare the results.
+// This is a lower level of abstraction than using the client interface and
+// enables, for one, explicitly building a graph of HLO instructions to run.
+//
+// This can also be used to write text/file-based test cases. Note that the test
+// target is responsible for linking the needed backends. A covenient way to do
+// this is to make it an xla_test: it will generate test targets linking with
+// the respective backends, which will be used as the test backend; the
+// interpreter backend is already linked with hlo_test_base so it will be the
+// default reference backend. For example, if you want to compare both cpu vs.
+// interpreter, and gpu vs. interpreter, you can:
+//
+//  xla_test (
+//    name = "sample_text_test",
+//    srcs = ["sample_text_test.cc"],
+//    backends = [
+//      "cpu",
+//      "gpu",
+//    ],
+//    deps = [
+//      "//third_party/tensorflow/compiler/xla/tests:hlo_test_base",
+//      ...
+//    ],
+//  )
+//
+// For a more detailed example, see "../tests/sample_text_test.cc".
 class HloTestBase : public ::testing::Test {
  protected:
-  HloTestBase() {}
+  // This uses the interpreter backend as the reference backend and
+  // automatically finds another supported backend as the test backend. If the
+  // interpreter is the only supported backend, it will be both the test backend
+  // and the reference backend.
+  HloTestBase();
+
+  // If your test doesn't use interpreter as the reference backend, you can use
+  // this constructor. Note that your test target is responsible for linking in
+  // both needed backends.
+  HloTestBase(::perftools::gputools::Platform* test_platform,
+              ::perftools::gputools::Platform* reference_platform);
 
   ~HloTestBase() override {}
 
   // Creates a new HLO module for a test. The module created will have
   // TestName() for its name; it will also automatically populate its debug
-  // options from command-line flags. It's recommended to use this method to
-  // create all HloModules for tests.
+  // options from command-line flags. If you want a fresh HloModule object and
+  // then add HloComputations to it, it's recommended to use this method in your
+  // tests.
   static std::unique_ptr<HloModule> CreateNewModule();
 
+  // Populates debug options from command-line flags and adjusts the options for
+  // testing. It is recommended to use this when you need to pass in
+  // DebugOptions, e.g. when creating a module from a string or a file.
+  static DebugOptions GetDebugOptionsForTest();
+
   // Executes the given module and returns a global data handle.
   StatusOr<perftools::gputools::DeviceMemoryBase> Execute(
       std::unique_ptr<HloModule> module,
@@ -71,6 +114,73 @@ class HloTestBase : public ::testing::Test {
       tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
           arguments);
 
+  // Executes the given hlo module on two backends and compares results.
+  //
+  // 'arguments': the input of the hlo module. The LiteralPtr type accepts
+  // Literal* or std::unique_ptr<Literal>.
+  //
+  // 'error': if has value, expects the results to be near (within the error
+  // bound). Otherwise, expects the results to be equal.
+  //
+  // 'reference_preprocessor': the module should be ready to run on the test
+  // backend, but it might need to be tailored so that it is able to run on the
+  // reference backend. Note that the program shape of the module must not be
+  // modified.
+  template <typename LiteralPtr>
+  ::testing::AssertionResult RunAndCompare(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::ArraySlice<LiteralPtr> arguments,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+
+  // Same as above, except that the module will be executed without Hlo
+  // optimization.
+  template <typename LiteralPtr>
+  ::testing::AssertionResult RunAndCompareNoHloPasses(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::ArraySlice<LiteralPtr> arguments,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+
+  // Executes an hlo module with fake inputs and compares the results.
+  ::testing::AssertionResult RunAndCompare(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+
+  // Same as above, except that the module will be executed without Hlo
+  // optimization.
+  ::testing::AssertionResult RunAndCompareNoHloPasses(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+
+  // Convenient wrappers for executing and comparing an hlo module with fake
+  // input. Module can be passed in directly, or parsed from an hlo_string,
+  // or loaded from a file.
+  ::testing::AssertionResult RunAndCompare(
+      const tensorflow::StringPiece hlo_string,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+  ::testing::AssertionResult RunAndCompareFromFile(
+      const string& filename, const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+  ::testing::AssertionResult RunAndCompareNoHloPasses(
+      const tensorflow::StringPiece hlo_string,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+  ::testing::AssertionResult RunAndCompareNoHloPassesFromFile(
+      const string& filename, const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+
   // Convenience method to force the layout of a given parameter in a module.
   // The layout of parameter number 'param_no' in the 'module' is set to
   // 'layout'.
@@ -101,12 +211,31 @@ class HloTestBase : public ::testing::Test {
 
   static string TestName();
 
-  // Returns the backend owned by the HloRunner.
+  // Returns the backend owned by the test runner.
   Backend& backend();
 
-  HloRunner runner_;
+  HloRunner test_runner_;
+  HloRunner reference_runner_;
 
   ErrorSpec error_spec_{0.0001};
+
+ private:
+  // Given the test module, makes a reference module that is ready to run on the
+  // reference platform. This assumes that the given module is ready to run on
+  // the test platform.
+  StatusOr<std::unique_ptr<HloModule>> MakeReferenceModule(
+      const HloModule& test_module,
+      const std::function<void(HloModule*)>& reference_preprocessor);
+
+  // Runs the module on two platforms with or without running hlo passes and
+  // compares the results. Returns whether the results are near or equal. If any
+  // error happens before the results are computed, returns the error status.
+  template <typename LiteralPtr>
+  StatusOr<::testing::AssertionResult> RunAndCompareInternal(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::ArraySlice<LiteralPtr> arguments,
+      const tensorflow::gtl::optional<ErrorSpec>& error, bool run_hlo_passes,
+      const std::function<void(HloModule*)>& reference_preprocessor);
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/isolated_convolution.hlo b/tensorflow/compiler/xla/tests/isolated_convolution.hlo
new file mode 100644
index 0000000000..9452780930
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/isolated_convolution.hlo
@@ -0,0 +1,8 @@
+HloModule convolution.167:
+
+ENTRY %convolution.167 (parameter.0: f32[16,28,28,128], parameter.1: f32[3,3,128,128]) -> f32[16,28,28,128] {
+  %parameter.0 = f32[16,28,28,128]{3,0,2,1} parameter(0)
+  %parameter.1 = f32[3,3,128,128]{3,2,1,0} parameter(1)
+  ROOT %convolution.167 = f32[16,28,28,128]{3,0,2,1} convolution(f32[16,28,28,128]{3,0,2,1} %parameter.0, f32[3,3,128,128]{3,2,1,0} %parameter.1), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01oi->b01f
+}
+
diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc
index e1a948c096..bf6631a431 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util.cc
+++ b/tensorflow/compiler/xla/tests/literal_test_util.cc
@@ -333,23 +333,37 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual,
   return result;
 }
 
-/* static */ void LiteralTestUtil::ExpectEqualTuple(const Literal& expected,
-                                                    const Literal& actual) {
+/* static */ ::testing::AssertionResult LiteralTestUtil::EqualTuple(
+    const Literal& expected, const Literal& actual) {
   VLOG(1) << "expected: " << expected.ToString();
   VLOG(1) << "actual:   " << actual.ToString();
 
-  ASSERT_TRUE(ShapeUtil::IsTuple(expected.shape()));
-  ASSERT_TRUE(ShapeUtil::IsTuple(actual.shape()));
+  if (!ShapeUtil::IsTuple(expected.shape()) ||
+      !ShapeUtil::IsTuple(actual.shape())) {
+    return ::testing::AssertionFailure()
+           << "tuples expected shape = " << expected.shape().ShortDebugString()
+           << " actual shape = " << actual.shape().ShortDebugString();
+  }
   AssertEqualShapes(expected.shape(), actual.shape());
   for (uint64 i = 0; i < expected.tuple_literals_size(); ++i) {
     const auto& expected_element = expected.tuple_literals(i);
     const auto& actual_element = actual.tuple_literals(i);
     if (ShapeUtil::IsTuple(expected_element.shape())) {
-      ExpectEqualTuple(expected_element, actual_element);
+      auto ret = EqualTuple(expected_element, actual_element);
+      if (!ret) {
+        return ret;
+      }
     } else {
-      ExpectEqual(expected_element, actual_element);
+      return Equal(expected_element, actual_element);
     }
   }
+
+  return ::testing::AssertionSuccess();
+}
+
+/* static */ void LiteralTestUtil::ExpectEqualTuple(const Literal& expected,
+                                                    const Literal& actual) {
+  EXPECT_TRUE(EqualTuple(expected, actual));
 }
 
 namespace {
@@ -615,8 +629,7 @@ bool NearComparator::ExpectValuesNear<bfloat16>(bfloat16 expected,
   if (!ShapeUtil::IsTuple(expected.shape()) ||
       !ShapeUtil::IsTuple(actual.shape())) {
     return ::testing::AssertionFailure()
-           << "tuples expected expected shape = "
-           << expected.shape().ShortDebugString()
+           << "tuples expected shape = " << expected.shape().ShortDebugString()
            << " actual shape = " << actual.shape().ShortDebugString();
   }
   AssertEqualShapes(expected.shape(), actual.shape());
@@ -650,6 +663,32 @@ bool NearComparator::ExpectValuesNear<bfloat16>(bfloat16 expected,
   EXPECT_TRUE(NearTuple(expected, actual, error));
 }
 
+/*static*/ ::testing::AssertionResult LiteralTestUtil::NearOrEqual(
+    const Literal& expected, const Literal& actual,
+    const tensorflow::gtl::optional<ErrorSpec>& error) {
+  bool is_tuple = ShapeUtil::IsTuple(expected.shape());
+  if (error.has_value()) {
+    if (is_tuple) {
+      VLOG(1) << "Expects near tuple";
+      return NearTuple(expected, actual, *error);
+    }
+    VLOG(1) << "Expects near";
+    return Near(expected, actual, *error);
+  }
+  if (is_tuple) {
+    VLOG(1) << "Expects equal tuple";
+    return EqualTuple(expected, actual);
+  }
+  VLOG(1) << "Expects equal";
+  return Equal(expected, actual);
+}
+
+/*static*/ void LiteralTestUtil::ExpectNearOrEqual(
+    const Literal& expected, const Literal& actual,
+    const tensorflow::gtl::optional<ErrorSpec>& error) {
+  EXPECT_TRUE(NearOrEqual(expected, actual, error));
+}
+
 /* static */ string LiteralTestUtil::MultiIndexAsString(
     tensorflow::gtl::ArraySlice<int64> multi_index) {
   return tensorflow::strings::StrCat(
diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h
index bf8c92f16d..f53553c701 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util.h
+++ b/tensorflow/compiler/xla/tests/literal_test_util.h
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -110,6 +111,10 @@ class LiteralTestUtil {
   static void ExpectR4EqualArray4D(const Array4D<NativeT>& expected,
                                    const Literal& actual);
 
+  // Returns whether the two tuples are equal.
+  static ::testing::AssertionResult EqualTuple(
+      const Literal& expected, const Literal& actual) TF_MUST_USE_RESULT;
+
   // Expects that the values of the elements in the expected and actual tuples
   // are equal. Tuples are matched recursively.
   static void ExpectEqualTuple(const Literal& expected, const Literal& actual);
@@ -177,6 +182,19 @@ class LiteralTestUtil {
   static void ExpectNearTuple(const Literal& expected, const Literal& actual,
                               const ErrorSpec& error);
 
+  // If the error spec is given, returns whether the expected and the actual are
+  // within the error bound; otherwise, returns whether they are equal. Tuples
+  // will be compared recursively.
+  static ::testing::AssertionResult NearOrEqual(
+      const Literal& expected, const Literal& actual,
+      const tensorflow::gtl::optional<ErrorSpec>& error) TF_MUST_USE_RESULT;
+
+  // If the error spec is given, expects the expected and the actual to be near;
+  // otherwise, expects them to be equal. Tuples will be compared recursively.
+  static void ExpectNearOrEqual(
+      const Literal& expected, const Literal& actual,
+      const tensorflow::gtl::optional<ErrorSpec>& error);
+
   // Returns a multi-dimensional index as a string. For example: '{7, 8}' will
   // be returned for a 2-dimensional index with dimension 0 index equal to 7,
   // dimension 1 equal to 8.
diff --git a/tensorflow/compiler/xla/tests/sample_file_test.cc b/tensorflow/compiler/xla/tests/sample_file_test.cc
new file mode 100644
index 0000000000..31b104f4e3
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/sample_file_test.cc
@@ -0,0 +1,51 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This demonstrates how to use hlo_test_base to create a file based testcase
+// and compare results on gpu and cpu.
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/compiler/xla/service/platform_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace {
+
+class SampleFileTest : public HloTestBase {
+ protected:
+  SampleFileTest()
+      : HloTestBase(
+            /*test_platform=*/PlatformUtil::GetPlatform("gpu").ValueOrDie(),
+            /*reference_platform=*/PlatformUtil::GetPlatform("cpu")
+                .ValueOrDie()) {}
+};
+
+TEST_F(SampleFileTest, Convolution) {
+  const string& filename = "compiler/xla/tests/isolated_convolution.hlo";
+  string test_srcdir = tensorflow::testing::TensorFlowSrcRoot();
+  EXPECT_TRUE(RunAndCompareFromFile(
+      tensorflow::io::JoinPath(test_srcdir, filename), ErrorSpec{0.01}));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/sample_text_test.cc b/tensorflow/compiler/xla/tests/sample_text_test.cc
new file mode 100644
index 0000000000..b4f2b74e3d
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/sample_text_test.cc
@@ -0,0 +1,66 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This demonstrates how to use hlo_test_base to create textual IR based
+// testcases.
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/gtl/optional.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace {
+
+using tensorflow::gtl::nullopt;
+
+class SampleTextTest : public HloTestBase {};
+
+TEST_F(SampleTextTest, Axpy) {
+  const string& hlo_string = R"(
+HloModule axpy_module:
+ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] {
+  %alpha = f32[] parameter(0)
+  %broadcast = f32[2,4]{1,0} broadcast(f32[] %alpha), dimensions={}
+  %x = f32[2,4]{1,0} parameter(1)
+  %multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %broadcast, f32[2,4]{1,0} %x)
+  %y = f32[2,4]{1,0} parameter(2)
+  ROOT %add = f32[2,4]{1,0} add(f32[2,4]{1,0} %multiply, f32[2,4]{1,0} %y)
+}
+)";
+  EXPECT_TRUE(RunAndCompareNoHloPasses(hlo_string, ErrorSpec{0.0001}));
+}
+
+TEST_F(SampleTextTest, Tuple) {
+  const string& hlo_string = R"(
+HloModule TupleCreate_module:
+ENTRY %TupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f32[2,3]) {
+  %v1 = f32[] parameter(0)
+  %v2 = f32[3]{0} parameter(1)
+  %v3 = f32[2,3]{1,0} parameter(2)
+  ROOT %tuple = (f32[], f32[3]{0}, f32[2,3]{1,0}) tuple(f32[] %v1, f32[3]{0} %v2, f32[2,3]{1,0} %v3)
+}
+)";
+  EXPECT_TRUE(RunAndCompare(hlo_string, nullopt));
+}
+
+}  // namespace
+}  // namespace xla
-- 
GitLab


From 6e04085f90c5c0c2a49723cc682b16327c994957 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 16:14:08 -0800
Subject: [PATCH 0780/1225] Change TraceListener::BlockHostUntilDoneComplete to
 pass Status* rather than bool.

Also fix the trace for StreamExecutor::SynchronousMemcpyD2H, which wasn't
updating the result correctly.

PiperOrigin-RevId: 178311956
---
 .../stream_executor/stream_executor_pimpl.cc  | 39 ++++++++-----------
 tensorflow/stream_executor/trace_listener.h   |  3 +-
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index 5630255b5d..719f292937 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -433,14 +433,11 @@ bool StreamExecutor::Launch(Stream *stream, const ThreadDim &thread_dims,
 }
 
 port::Status StreamExecutor::BlockHostUntilDoneWithStatus(Stream *stream) {
-  // TODO(toddw): Change TraceListener::BlockHostUntilDone to record Status
-  // rather than bool.
-  bool trace_result;
-  SCOPED_TRACE(TraceListener::BlockHostUntilDone, &trace_result, stream);
+  port::Status result;
+  SCOPED_TRACE(TraceListener::BlockHostUntilDone, &result, stream);
 
-  port::Status status = implementation_->BlockHostUntilDoneWithStatus(stream);
-  trace_result = status.ok();
-  return status;
+  result = implementation_->BlockHostUntilDoneWithStatus(stream);
+  return result;
 }
 
 void *StreamExecutor::Allocate(uint64 size) {
@@ -569,19 +566,18 @@ port::Status StreamExecutor::SynchronousMemcpyD2H(
           << device_src.opaque() << ", size=" << size
           << ", host_dst=" << host_dst << ")" << StackTraceIfVLOG10();
 
-  port::Status result{port::Status::OK()};
+  port::Status result;
   SCOPED_TRACE(TraceListener::SynchronousMemcpyD2H, &result, device_src, size,
                host_dst);
 
-  port::Status status =
-      implementation_->SynchronousMemcpy(host_dst, device_src, size);
-  if (!status.ok()) {
-    return port::Status{port::error::INTERNAL,
-                        port::Printf("failed to synchronously memcpy "
-                                     "device-to-host: device %p to host %p "
-                                     "size %lld: %s",
-                                     device_src.opaque(), host_dst, size,
-                                     status.ToString().c_str())};
+  result = implementation_->SynchronousMemcpy(host_dst, device_src, size);
+  if (!result.ok()) {
+    result = port::Status{port::error::INTERNAL,
+                          port::Printf("failed to synchronously memcpy "
+                                       "device-to-host: device %p to host %p "
+                                       "size %lld: %s",
+                                       device_src.opaque(), host_dst, size,
+                                       result.ToString().c_str())};
   }
 
   return result;
@@ -593,19 +589,18 @@ port::Status StreamExecutor::SynchronousMemcpyH2D(
           << ", size=" << size << ", device_dst" << device_dst->opaque() << ")"
           << StackTraceIfVLOG10();
 
-  port::Status result{port::Status::OK()};
+  port::Status result;
   SCOPED_TRACE(TraceListener::SynchronousMemcpyH2D, &result, host_src, size,
                device_dst);
 
-  port::Status status =
-      implementation_->SynchronousMemcpy(device_dst, host_src, size);
-  if (!status.ok()) {
+  result = implementation_->SynchronousMemcpy(device_dst, host_src, size);
+  if (!result.ok()) {
     result = port::Status{
         port::error::INTERNAL,
         port::Printf("failed to synchronously memcpy host-to-device: host "
                      "%p to device %p size %lld: %s",
                      host_src, device_dst->opaque(), size,
-                     status.ToString().c_str())};
+                     result.ToString().c_str())};
   }
 
   return result;
diff --git a/tensorflow/stream_executor/trace_listener.h b/tensorflow/stream_executor/trace_listener.h
index 88c54f982b..d1e87c348b 100644
--- a/tensorflow/stream_executor/trace_listener.h
+++ b/tensorflow/stream_executor/trace_listener.h
@@ -65,7 +65,8 @@ class TraceListener {
                                             const port::Status* result) {}
 
   virtual void BlockHostUntilDoneBegin(int64 correlation_id, Stream* stream) {}
-  virtual void BlockHostUntilDoneComplete(int64 correlation_id, bool result) {}
+  virtual void BlockHostUntilDoneComplete(int64 correlation_id,
+                                          const port::Status* result) {}
 };
 
 }  // namespace gputools
-- 
GitLab


From 6c4af6202c3984d7eabc8044c43579315c4b07a2 Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Thu, 7 Dec 2017 16:16:36 -0800
Subject: [PATCH 0781/1225] Make SQLite random IDs unique across tables

We now have an invariant that no two IDs are the same across tables. We
also assume that only one instance will ever write tensors to a given
run. This approach to IDs also allows us to be more carefree about
garbage data, so we don't need the transactions anymore. It also brings
latency down from 16ms to 8ms.

Name                      Cold ?s   Average ?s  Flushing ?s       Size B
?i.i                       10,020          264            0            0
Scalar 1.0 FS               7,996          711        4,808       11,348
Scalar 1.0 TB FS           14,875          891        5,487       17,023
Scalar 2.0 FS              13,123          891        4,499       11,348
Scalar 2.0 DB              72,497        8,472        8,875      118,784
Tensor 1.0 FS 4            16,128          856        4,785       14,215
Tensor 2.0 FS 4            23,765        1,032        4,508       24,455
Tensor 2.0 DB 4            91,735        8,407        8,175      118,784
Tensor 1.0 FS 128          18,592          831        4,950       47,111
Tensor 2.0 FS 128          18,174        1,033        4,498       57,351
Tensor 2.0 DB 128          98,045       17,799        8,710      118,784
Tensor 1.0 FS 8192         19,225        1,164        5,217    2,119,816
Tensor 2.0 FS 8192         16,979          921        4,360    2,130,056
Tensor 2.0 DB 8192        108,704        8,470        8,543      126,976

PiperOrigin-RevId: 178312341
---
 .../contrib/summary/summary_ops_test.py       |   5 +-
 tensorflow/contrib/tensorboard/db/schema.cc   |  93 ++-
 .../tensorboard/db/summary_db_writer.cc       | 548 +++++++++++-------
 .../tensorboard/db/summary_db_writer_test.cc  |  61 +-
 4 files changed, 451 insertions(+), 256 deletions(-)

diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py
index 0b8e0b967c..4ef03434b7 100644
--- a/tensorflow/contrib/summary/summary_ops_test.py
+++ b/tensorflow/contrib/summary/summary_ops_test.py
@@ -140,6 +140,7 @@ class DbTest(summary_test_util.SummaryDbTest):
 
   def testIntegerSummaries(self):
     step = training_util.create_global_step()
+    writer = self.create_db_writer()
 
     def adder(x, y):
       state_ops.assign_add(step, 1)
@@ -150,7 +151,7 @@ class DbTest(summary_test_util.SummaryDbTest):
       return sum_
 
     with summary_ops.always_record_summaries():
-      with self.create_db_writer().as_default():
+      with writer.as_default():
         self.assertEqual(5, adder(int64(2), int64(3)).numpy())
 
     six.assertCountEqual(self, [1, 1, 1],
@@ -162,7 +163,7 @@ class DbTest(summary_test_util.SummaryDbTest):
     sum_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "sum"')
 
     with summary_ops.always_record_summaries():
-      with self.create_db_writer().as_default():
+      with writer.as_default():
         self.assertEqual(9, adder(int64(4), int64(5)).numpy())
 
     six.assertCountEqual(self, [1, 1, 1, 2, 2, 2],
diff --git a/tensorflow/contrib/tensorboard/db/schema.cc b/tensorflow/contrib/tensorboard/db/schema.cc
index d63b2c6cc2..fd024d692c 100644
--- a/tensorflow/contrib/tensorboard/db/schema.cc
+++ b/tensorflow/contrib/tensorboard/db/schema.cc
@@ -21,6 +21,48 @@ class SqliteSchema {
  public:
   explicit SqliteSchema(std::shared_ptr<Sqlite> db) : db_(std::move(db)) {}
 
+  /// \brief Creates Ids table.
+  ///
+  /// This table must be used to randomly allocate Permanent IDs for
+  /// all top-level tables, in order to maintain an invariant where
+  /// foo_id != bar_id for all IDs of any two tables.
+  ///
+  /// A row should only be deleted from this table if it can be
+  /// guaranteed that it exists absolutely nowhere else in the entire
+  /// system.
+  ///
+  /// Fields:
+  ///   id: An ID that was allocated globally. This must be in the
+  ///     range [1,2**47). 0 is assigned the same meaning as NULL and
+  ///     shouldn't be stored; 2**63-1 is reserved for statically
+  ///     allocating space in a page to UPDATE later; and all other
+  ///     int64 values are reserved for future use.
+  Status CreateIdsTable() {
+    return Run(R"sql(
+      CREATE TABLE IF NOT EXISTS Ids (
+        id INTEGER PRIMARY KEY
+      )
+    )sql");
+  }
+
+  /// \brief Creates Descriptions table.
+  ///
+  /// This table allows TensorBoard to associate Markdown text with any
+  /// object in the database that has a Permanent ID.
+  ///
+  /// Fields:
+  ///   id: The Permanent ID of the associated object. This is also the
+  ///     SQLite rowid.
+  ///   description: Arbitrary Markdown text.
+  Status CreateDescriptionsTable() {
+    return Run(R"sql(
+      CREATE TABLE IF NOT EXISTS Descriptions (
+        id INTEGER PRIMARY KEY,
+        description TEXT
+      )
+    )sql");
+  }
+
   /// \brief Creates Tensors table.
   ///
   /// Fields:
@@ -83,15 +125,15 @@ class SqliteSchema {
   ///
   /// Fields:
   ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   tag_id: Permanent >0 unique ID.
+  ///   tag_id: The Permanent ID of the Tag.
   ///   run_id: Optional ID of associated Run.
   ///   tag_name: The tag field in summary.proto, unique across Run.
   ///   inserted_time: Float UNIX timestamp with µs precision. This is
   ///     always the wall time of when the row was inserted into the
   ///     DB. It may be used as a hint for an archival job.
-  ///   metadata: Optional BLOB of SummaryMetadata proto.
   ///   display_name: Optional for GUI and defaults to tag_name.
-  ///   summary_description: Optional markdown information.
+  ///   plugin_name: Arbitrary TensorBoard plugin name for dispatch.
+  ///   plugin_data: Arbitrary data that plugin wants.
   Status CreateTagsTable() {
     return Run(R"sql(
       CREATE TABLE IF NOT EXISTS Tags (
@@ -100,28 +142,31 @@ class SqliteSchema {
         tag_id INTEGER NOT NULL,
         tag_name TEXT,
         inserted_time DOUBLE,
-        metadata BLOB,
         display_name TEXT,
-        description TEXT
+        plugin_name TEXT,
+        plugin_data BLOB
       )
     )sql");
   }
 
   /// \brief Creates Runs table.
   ///
-  /// This table stores information about runs. Each row usually
+  /// This table stores information about Runs. Each row usually
   /// represents a single attempt at training or testing a TensorFlow
   /// model, with a given set of hyper-parameters, whose summaries are
   /// written out to a single event logs directory with a monotonic step
   /// counter.
   ///
-  /// When a run is deleted from this table, TensorBoard should treat all
-  /// information associated with it as deleted, even if those rows in
-  /// different tables still exist.
-  ///
   /// Fields:
   ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   run_id: Permanent >0 unique ID.
+  ///   run_id: The Permanent ID of the Run. This has a 1:1 mapping
+  ///     with a SummaryWriter instance. If two writers spawn for a
+  ///     given (user_name, run_name, run_name) then each should
+  ///     allocate its own run_id and whichever writer puts it in the
+  ///     database last wins. The Tags / Tensors associated with the
+  ///     previous invocations will then enter limbo, where they may be
+  ///     accessible for certain operations, but should be garbage
+  ///     collected eventually.
   ///   experiment_id: Optional ID of associated Experiment.
   ///   run_name: User-supplied string, unique across Experiment.
   ///   inserted_time: Float UNIX timestamp with µs precision. This is
@@ -134,7 +179,10 @@ class SqliteSchema {
   ///     started, from the perspective of whichever machine talks to
   ///     the database. This field will be mutated if the run is
   ///     restarted.
-  ///   description: Optional markdown information.
+  ///   finished_time: Float UNIX timestamp with µs precision of when
+  ///     SummaryWriter resource that created this run was destroyed.
+  ///     Once this value becomes non-NULL a Run and its Tags and
+  ///     Tensors should be regarded as immutable.
   ///   graph_id: ID of associated Graphs row.
   Status CreateRunsTable() {
     return Run(R"sql(
@@ -145,7 +193,7 @@ class SqliteSchema {
         run_name TEXT,
         inserted_time REAL,
         started_time REAL,
-        description TEXT,
+        finished_time REAL,
         graph_id INTEGER
       )
     )sql");
@@ -159,15 +207,15 @@ class SqliteSchema {
   /// Fields:
   ///   rowid: Ephemeral b-tree ID dictating locality.
   ///   user_id: Optional ID of associated User.
-  ///   experiment_id: Permanent >0 unique ID.
+  ///   experiment_id: The Permanent ID of the Experiment.
   ///   experiment_name: User-supplied string, unique across User.
   ///   inserted_time: Float UNIX timestamp with µs precision. This is
   ///     always the time the row was inserted into the database. It
   ///     does not change.
   ///   started_time: Float UNIX timestamp with µs precision. This is
   ///     the MIN(experiment.started_time, run.started_time) of each
-  ///     Run added to the database.
-  ///   description: Optional markdown information.
+  ///     Run added to the database, including Runs which have since
+  ///     been overwritten.
   Status CreateExperimentsTable() {
     return Run(R"sql(
       CREATE TABLE IF NOT EXISTS Experiments (
@@ -176,8 +224,7 @@ class SqliteSchema {
         experiment_id INTEGER NOT NULL,
         experiment_name TEXT,
         inserted_time REAL,
-        started_time REAL,
-        description TEXT
+        started_time REAL
       )
     )sql");
   }
@@ -186,7 +233,7 @@ class SqliteSchema {
   ///
   /// Fields:
   ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   user_id: Permanent >0 unique ID.
+  ///   user_id: The Permanent ID of the User.
   ///   user_name: Unique user name.
   ///   email: Optional unique email address.
   ///   inserted_time: Float UNIX timestamp with µs precision. This is
@@ -208,7 +255,7 @@ class SqliteSchema {
   ///
   /// Fields:
   ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   graph_id: Permanent >0 unique ID.
+  ///   graph_id: The Permanent ID of the Graph.
   ///   inserted_time: Float UNIX timestamp with µs precision. This is
   ///     always the wall time of when the row was inserted into the
   ///     DB. It may be used as a hint for an archival job.
@@ -229,7 +276,7 @@ class SqliteSchema {
   ///
   /// Fields:
   ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   graph_id: Permanent >0 unique ID.
+  ///   graph_id: The Permanent ID of the associated Graph.
   ///   node_id: ID for this node. This is more like a 0-index within
   ///     the Graph. Please note indexes are allowed to be removed.
   ///   node_name: Unique name for this Node within Graph. This is
@@ -258,7 +305,7 @@ class SqliteSchema {
   ///
   /// Fields:
   ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   graph_id: Permanent >0 unique ID.
+  ///   graph_id: The Permanent ID of the associated Graph.
   ///   node_id: Index of Node in question. This can be considered the
   ///     'to' vertex.
   ///   idx: Used for ordering inputs on a given Node.
@@ -420,6 +467,8 @@ class SqliteSchema {
 
 Status SetupTensorboardSqliteDb(std::shared_ptr<Sqlite> db) {
   SqliteSchema s(std::move(db));
+  TF_RETURN_IF_ERROR(s.CreateIdsTable());
+  TF_RETURN_IF_ERROR(s.CreateDescriptionsTable());
   TF_RETURN_IF_ERROR(s.CreateTensorsTable());
   TF_RETURN_IF_ERROR(s.CreateTensorChunksTable());
   TF_RETURN_IF_ERROR(s.CreateTagsTable());
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
index 37a32acb1e..04b9c8e457 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
@@ -29,22 +29,25 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+// https://www.sqlite.org/fileformat.html#record_format
+const uint64 kIdTiers[] = {
+    0x7fffffULL,        // 23-bit (3 bytes on disk)
+    0x7fffffffULL,      // 31-bit (4 bytes on disk)
+    0x7fffffffffffULL,  // 47-bit (5 bytes on disk)
+                        // Remaining bits reserved for future use.
+};
+const int kMaxIdTier = sizeof(kIdTiers) / sizeof(uint64);
+const int kIdCollisionDelayMicros = 10;
+const int kMaxIdCollisions = 21;  // sum(2**i*10µs for i in range(21))~=21s
+const int64 kAbsent = 0LL;
+const int64 kReserved = 0x7fffffffffffffffLL;
+
 double GetWallTime(Env* env) {
   // TODO(@jart): Follow precise definitions for time laid out in schema.
   // TODO(@jart): Use monotonic clock from gRPC codebase.
   return static_cast<double>(env->NowMicros()) / 1.0e6;
 }
 
-int64 MakeRandomId() {
-  // TODO(@jart): Try generating ID in 2^24 space, falling back to 2^63
-  //              https://sqlite.org/src4/doc/trunk/www/varint.wiki
-  int64 id = static_cast<int64>(random::New64() & ((1ULL << 63) - 1));
-  if (id == 0) {
-    ++id;
-  }
-  return id;
-}
-
 Status Serialize(const protobuf::MessageLite& proto, string* output) {
   output->clear();
   if (!proto.SerializeToString(output)) {
@@ -130,54 +133,69 @@ Status CoerceScalar(const Tensor& t, Tensor* out) {
   return Status::OK();
 }
 
-class Transactor {
+/// \brief Generates unique IDs randomly in the [1,2**63-2] range.
+///
+/// This class starts off generating IDs in the [1,2**23-1] range,
+/// because it's human friendly and occupies 4 bytes max on disk with
+/// SQLite's zigzag varint encoding. Then, each time a collision
+/// happens, the random space is increased by 8 bits.
+///
+/// This class uses exponential back-off so writes will slow down as
+/// the ID space becomes exhausted.
+class IdAllocator {
  public:
-  explicit Transactor(std::shared_ptr<Sqlite> db)
-      : db_(std::move(db)),
-        begin_(db_->Prepare("BEGIN TRANSACTION")),
-        commit_(db_->Prepare("COMMIT TRANSACTION")),
-        rollback_(db_->Prepare("ROLLBACK TRANSACTION")) {}
-
-  template <typename T, typename... Args>
-  Status Transact(T callback, Args&&... args) {
-    TF_RETURN_IF_ERROR(begin_.StepAndReset());
-    Status s = callback(std::forward<Args>(args)...);
-    if (s.ok()) {
-      TF_RETURN_IF_ERROR(commit_.StepAndReset());
-    } else {
-      TF_RETURN_WITH_CONTEXT_IF_ERROR(rollback_.StepAndReset(), s.ToString());
+  IdAllocator(Env* env, Sqlite* db)
+      : env_{env}, inserter_{db->Prepare("INSERT INTO Ids (id) VALUES (?)")} {}
+
+  Status CreateNewId(int64* id) {
+    Status s;
+    for (int i = 0; i < kMaxIdCollisions; ++i) {
+      int64 tid = MakeRandomId();
+      inserter_.BindInt(1, tid);
+      s = inserter_.StepAndReset();
+      if (s.ok()) {
+        *id = tid;
+        break;
+      }
+      // SQLITE_CONSTRAINT maps to INVALID_ARGUMENT in sqlite.cc
+      if (s.code() != error::INVALID_ARGUMENT) break;
+      if (tier_ < kMaxIdTier) {
+        LOG(INFO) << "IdAllocator collision at tier " << tier_ << " (of "
+                  << kMaxIdTier << ") so auto-adjusting to a higher tier";
+        ++tier_;
+      } else {
+        LOG(WARNING) << "IdAllocator (attempt #" << i << ") "
+                     << "resulted in a collision at the highest tier; this "
+                        "is problematic if it happens often; you can try "
+                        "pruning the Ids table; you can also file a bug "
+                        "asking for the ID space to be increased; otherwise "
+                        "writes will gradually slow down over time until they "
+                        "become impossible";
+      }
+      env_->SleepForMicroseconds((1 << i) * kIdCollisionDelayMicros);
     }
     return s;
   }
 
  private:
-  std::shared_ptr<Sqlite> db_;
-  SqliteStatement begin_;
-  SqliteStatement commit_;
-  SqliteStatement rollback_;
+  int64 MakeRandomId() {
+    int64 id = static_cast<int64>(random::New64() & kIdTiers[tier_]);
+    if (id == kAbsent) ++id;
+    if (id == kReserved) --id;
+    return id;
+  }
+
+  Env* env_;
+  SqliteStatement inserter_;
+  int tier_ = 0;
 };
 
 class GraphSaver {
  public:
-  static Status SaveToRun(Env* env, Sqlite* db, GraphDef* graph, int64 run_id) {
-    auto get = db->Prepare("SELECT graph_id FROM Runs WHERE run_id = ?");
-    get.BindInt(1, run_id);
-    bool is_done;
-    TF_RETURN_IF_ERROR(get.Step(&is_done));
-    int64 graph_id = is_done ? 0 : get.ColumnInt(0);
-    if (graph_id == 0) {
-      graph_id = MakeRandomId();
-      // TODO(@jart): Check for ID collision.
-      auto set = db->Prepare("UPDATE Runs SET graph_id = ? WHERE run_id = ?");
-      set.BindInt(1, graph_id);
-      set.BindInt(2, run_id);
-      TF_RETURN_IF_ERROR(set.StepAndReset());
-    }
-    return Save(env, db, graph, graph_id);
-  }
-
-  static Status Save(Env* env, Sqlite* db, GraphDef* graph, int64 graph_id) {
-    GraphSaver saver{env, db, graph, graph_id};
+  static Status Save(Env* env, Sqlite* db, IdAllocator* id_allocator,
+                     GraphDef* graph, int64* graph_id) {
+    TF_RETURN_IF_ERROR(id_allocator->CreateNewId(graph_id));
+    GraphSaver saver{env, db, graph, *graph_id};
     saver.MapNameToNodeId();
     TF_RETURN_IF_ERROR(saver.SaveNodeInputs());
     TF_RETURN_IF_ERROR(saver.SaveNodes());
@@ -202,9 +220,6 @@ class GraphSaver {
   }
 
   Status SaveNodeInputs() {
-    auto purge = db_->Prepare("DELETE FROM NodeInputs WHERE graph_id = ?");
-    purge.BindInt(1, graph_id_);
-    TF_RETURN_IF_ERROR(purge.StepAndReset());
     auto insert = db_->Prepare(R"sql(
       INSERT INTO NodeInputs (graph_id, node_id, idx, input_node_id, is_control)
       VALUES (?, ?, ?, ?, ?)
@@ -233,9 +248,6 @@ class GraphSaver {
   }
 
   Status SaveNodes() {
-    auto purge = db_->Prepare("DELETE FROM Nodes WHERE graph_id = ?");
-    purge.BindInt(1, graph_id_);
-    TF_RETURN_IF_ERROR(purge.StepAndReset());
     auto insert = db_->Prepare(R"sql(
       INSERT INTO Nodes (graph_id, node_id, node_name, op, device, node_def)
       VALUES (?, ?, ?, ?, ?, ?)
@@ -263,7 +275,7 @@ class GraphSaver {
 
   Status SaveGraph() {
     auto insert = db_->Prepare(R"sql(
-      INSERT OR REPLACE INTO Graphs (graph_id, inserted_time, graph_def)
+      INSERT INTO Graphs (graph_id, inserted_time, graph_def)
       VALUES (?, ?, ?)
     )sql");
     insert.BindInt(1, graph_id_);
@@ -281,62 +293,258 @@ class GraphSaver {
   std::unordered_map<StringPiece, int64, StringPieceHasher> name_to_node_id_;
 };
 
-class SummaryDbWriter : public SummaryWriterInterface {
+class RunWriter {
  public:
-  SummaryDbWriter(Env* env, std::shared_ptr<Sqlite> db)
-      : SummaryWriterInterface(),
-        env_(env),
-        db_(std::move(db)),
-        txn_(db_),
-        run_id_{0LL} {}
-  ~SummaryDbWriter() override {}
+  RunWriter(Env* env, std::shared_ptr<Sqlite> db, const string& experiment_name,
+            const string& run_name, const string& user_name)
+      : env_{env},
+        db_{std::move(db)},
+        id_allocator_{env_, db_.get()},
+        experiment_name_{experiment_name},
+        run_name_{run_name},
+        user_name_{user_name},
+        insert_tensor_{db_->Prepare(R"sql(
+          INSERT OR REPLACE INTO Tensors (tag_id, step, computed_time, tensor)
+          VALUES (?, ?, ?, ?)
+        )sql")} {}
+
+  ~RunWriter() {
+    if (run_id_ == kAbsent) return;
+    auto update = db_->Prepare(R"sql(
+      UPDATE Runs SET finished_time = ? WHERE run_id = ?
+    )sql");
+    update.BindDouble(1, GetWallTime(env_));
+    update.BindInt(2, run_id_);
+    Status s = update.StepAndReset();
+    if (!s.ok()) {
+      LOG(ERROR) << "Failed to set Runs[" << run_id_
+                 << "].finish_time: " << s.ToString();
+    }
+  }
 
-  Status Initialize(const string& experiment_name, const string& run_name,
-                    const string& user_name) {
-    mutex_lock ml(mu_);
-    insert_tensor_ = db_->Prepare(R"sql(
-      INSERT OR REPLACE INTO Tensors (tag_id, step, computed_time, tensor)
-      VALUES (?, ?, ?, ?)
+  Status InsertTensor(int64 tag_id, int64 step, double computed_time,
+                      Tensor t) {
+    insert_tensor_.BindInt(1, tag_id);
+    insert_tensor_.BindInt(2, step);
+    insert_tensor_.BindDouble(3, computed_time);
+    if (t.shape().dims() == 0 && t.dtype() == DT_INT64) {
+      insert_tensor_.BindInt(4, t.scalar<int64>()());
+    } else if (t.shape().dims() == 0 && t.dtype() == DT_DOUBLE) {
+      insert_tensor_.BindDouble(4, t.scalar<double>()());
+    } else {
+      TF_RETURN_IF_ERROR(BindTensor(&insert_tensor_, 4, t));
+    }
+    return insert_tensor_.StepAndReset();
+  }
+
+  Status InsertGraph(std::unique_ptr<GraphDef> g, double computed_time) {
+    TF_RETURN_IF_ERROR(InitializeRun(computed_time));
+    int64 graph_id;
+    TF_RETURN_IF_ERROR(
+        GraphSaver::Save(env_, db_.get(), &id_allocator_, g.get(), &graph_id));
+    if (run_id_ != kAbsent) {
+      auto set = db_->Prepare("UPDATE Runs SET graph_id = ? WHERE run_id = ?");
+      set.BindInt(1, graph_id);
+      set.BindInt(2, run_id_);
+      TF_RETURN_IF_ERROR(set.StepAndReset());
+    }
+    return Status::OK();
+  }
+
+  Status GetTagId(double computed_time, const string& tag_name,
+                  const SummaryMetadata& metadata, int64* tag_id) {
+    TF_RETURN_IF_ERROR(InitializeRun(computed_time));
+    auto e = tag_ids_.find(tag_name);
+    if (e != tag_ids_.end()) {
+      *tag_id = e->second;
+      return Status::OK();
+    }
+    TF_RETURN_IF_ERROR(id_allocator_.CreateNewId(tag_id));
+    tag_ids_[tag_name] = *tag_id;
+    if (!metadata.summary_description().empty()) {
+      SqliteStatement insert_description = db_->Prepare(R"sql(
+        INSERT INTO Descriptions (id, description) VALUES (?, ?)
+      )sql");
+      insert_description.BindInt(1, *tag_id);
+      insert_description.BindText(2, metadata.summary_description());
+      TF_RETURN_IF_ERROR(insert_description.StepAndReset());
+    }
+    SqliteStatement insert = db_->Prepare(R"sql(
+      INSERT INTO Tags (
+        run_id,
+        tag_id,
+        tag_name,
+        inserted_time,
+        display_name,
+        plugin_name,
+        plugin_data
+      ) VALUES (?, ?, ?, ?, ?, ?, ?)
     )sql");
-    update_metadata_ = db_->Prepare(R"sql(
-      UPDATE Tags SET metadata = ? WHERE tag_id = ?
+    if (run_id_ != kAbsent) insert.BindInt(1, run_id_);
+    insert.BindInt(2, *tag_id);
+    insert.BindText(3, tag_name);
+    insert.BindDouble(4, GetWallTime(env_));
+    if (!metadata.display_name().empty()) {
+      insert.BindText(5, metadata.display_name());
+    }
+    if (!metadata.plugin_data().plugin_name().empty()) {
+      insert.BindText(6, metadata.plugin_data().plugin_name());
+    }
+    if (!metadata.plugin_data().content().empty()) {
+      insert.BindBlob(7, metadata.plugin_data().content());
+    }
+    return insert.StepAndReset();
+  }
+
+ private:
+  Status InitializeUser() {
+    if (user_id_ != kAbsent || user_name_.empty()) return Status::OK();
+    SqliteStatement get = db_->Prepare(R"sql(
+      SELECT user_id FROM Users WHERE user_name = ?
     )sql");
-    experiment_name_ = experiment_name;
-    run_name_ = run_name;
-    user_name_ = user_name;
+    get.BindText(1, user_name_);
+    bool is_done;
+    TF_RETURN_IF_ERROR(get.Step(&is_done));
+    if (!is_done) {
+      user_id_ = get.ColumnInt(0);
+      return Status::OK();
+    }
+    TF_RETURN_IF_ERROR(id_allocator_.CreateNewId(&user_id_));
+    SqliteStatement insert = db_->Prepare(R"sql(
+      INSERT INTO Users (user_id, user_name, inserted_time) VALUES (?, ?, ?)
+    )sql");
+    insert.BindInt(1, user_id_);
+    insert.BindText(2, user_name_);
+    insert.BindDouble(3, GetWallTime(env_));
+    TF_RETURN_IF_ERROR(insert.StepAndReset());
+    return Status::OK();
+  }
+
+  Status InitializeExperiment(double computed_time) {
+    if (experiment_name_.empty()) return Status::OK();
+    if (experiment_id_ == kAbsent) {
+      TF_RETURN_IF_ERROR(InitializeUser());
+      SqliteStatement get = db_->Prepare(R"sql(
+        SELECT
+          experiment_id,
+          started_time
+        FROM
+          Experiments
+        WHERE
+          user_id IS ?
+          AND experiment_name = ?
+      )sql");
+      if (user_id_ != kAbsent) get.BindInt(1, user_id_);
+      get.BindText(2, experiment_name_);
+      bool is_done;
+      TF_RETURN_IF_ERROR(get.Step(&is_done));
+      if (!is_done) {
+        experiment_id_ = get.ColumnInt(0);
+        experiment_started_time_ = get.ColumnInt(1);
+      } else {
+        TF_RETURN_IF_ERROR(id_allocator_.CreateNewId(&experiment_id_));
+        experiment_started_time_ = computed_time;
+        SqliteStatement insert = db_->Prepare(R"sql(
+          INSERT INTO Experiments (
+            user_id,
+            experiment_id,
+            experiment_name,
+            inserted_time,
+            started_time
+          ) VALUES (?, ?, ?, ?, ?)
+        )sql");
+        if (user_id_ != kAbsent) insert.BindInt(1, user_id_);
+        insert.BindInt(2, experiment_id_);
+        insert.BindText(3, experiment_name_);
+        insert.BindDouble(4, GetWallTime(env_));
+        insert.BindDouble(5, computed_time);
+        TF_RETURN_IF_ERROR(insert.StepAndReset());
+      }
+    }
+    if (computed_time < experiment_started_time_) {
+      experiment_started_time_ = computed_time;
+      SqliteStatement update = db_->Prepare(R"sql(
+        UPDATE Experiments SET started_time = ? WHERE experiment_id = ?
+      )sql");
+      update.BindDouble(1, computed_time);
+      update.BindInt(2, experiment_id_);
+      TF_RETURN_IF_ERROR(update.StepAndReset());
+    }
     return Status::OK();
   }
 
-  // TODO(@jart): Use transactions that COMMIT on Flush()
-  // TODO(@jart): Retry Commit() on SQLITE_BUSY with exponential back-off.
+  Status InitializeRun(double computed_time) {
+    if (run_name_.empty()) return Status::OK();
+    TF_RETURN_IF_ERROR(InitializeExperiment(computed_time));
+    if (run_id_ == kAbsent) {
+      TF_RETURN_IF_ERROR(id_allocator_.CreateNewId(&run_id_));
+      run_started_time_ = computed_time;
+      SqliteStatement insert = db_->Prepare(R"sql(
+        INSERT OR REPLACE INTO Runs (
+          experiment_id,
+          run_id,
+          run_name,
+          inserted_time,
+          started_time
+        ) VALUES (?, ?, ?, ?, ?)
+      )sql");
+      if (experiment_id_ != kAbsent) insert.BindInt(1, experiment_id_);
+      insert.BindInt(2, run_id_);
+      insert.BindText(3, run_name_);
+      insert.BindDouble(4, GetWallTime(env_));
+      insert.BindDouble(5, computed_time);
+      TF_RETURN_IF_ERROR(insert.StepAndReset());
+    }
+    if (computed_time < run_started_time_) {
+      run_started_time_ = computed_time;
+      SqliteStatement update = db_->Prepare(R"sql(
+        UPDATE Runs SET started_time = ? WHERE run_id = ?
+      )sql");
+      update.BindDouble(1, computed_time);
+      update.BindInt(2, run_id_);
+      TF_RETURN_IF_ERROR(update.StepAndReset());
+    }
+    return Status::OK();
+  }
+
+  Env* env_;
+  std::shared_ptr<Sqlite> db_;
+  IdAllocator id_allocator_;
+  const string experiment_name_;
+  const string run_name_;
+  const string user_name_;
+  int64 experiment_id_ = kAbsent;
+  int64 run_id_ = kAbsent;
+  int64 user_id_ = kAbsent;
+  std::unordered_map<string, int64> tag_ids_;
+  double experiment_started_time_ = 0.0;
+  double run_started_time_ = 0.0;
+  SqliteStatement insert_tensor_;
+};
+
+class SummaryDbWriter : public SummaryWriterInterface {
+ public:
+  SummaryDbWriter(Env* env, std::shared_ptr<Sqlite> db,
+                  const string& experiment_name, const string& run_name,
+                  const string& user_name)
+      : SummaryWriterInterface(),
+        env_{env},
+        run_writer_{env, std::move(db), experiment_name, run_name, user_name} {}
+  ~SummaryDbWriter() override {}
+
   Status Flush() override { return Status::OK(); }
 
   Status WriteTensor(int64 global_step, Tensor t, const string& tag,
                      const string& serialized_metadata) override {
     mutex_lock ml(mu_);
-    TF_RETURN_IF_ERROR(InitializeParents());
-    // TODO(@jart): Memoize tag_id.
-    int64 tag_id;
-    TF_RETURN_IF_ERROR(GetTagId(run_id_, tag, &tag_id));
+    SummaryMetadata metadata;
     if (!serialized_metadata.empty()) {
-      // TODO(@jart): Only update metadata for first tensor.
-      update_metadata_.BindBlobUnsafe(1, serialized_metadata);
-      update_metadata_.BindInt(2, tag_id);
-      TF_RETURN_IF_ERROR(update_metadata_.StepAndReset());
+      metadata.ParseFromString(serialized_metadata);
     }
-    // TODO(@jart): Lease blocks of rowids and *_ids to minimize fragmentation.
-    // TODO(@jart): Check for random ID collisions without needing txn retry.
-    insert_tensor_.BindInt(1, tag_id);
-    insert_tensor_.BindInt(2, global_step);
-    insert_tensor_.BindDouble(3, GetWallTime(env_));
-    if (t.shape().dims() == 0 && t.dtype() == DT_INT64) {
-      insert_tensor_.BindInt(4, t.scalar<int64>()());
-    } else if (t.shape().dims() == 0 && t.dtype() == DT_DOUBLE) {
-      insert_tensor_.BindDouble(4, t.scalar<double>()());
-    } else {
-      TF_RETURN_IF_ERROR(BindTensor(&insert_tensor_, 4, t));
-    }
-    return insert_tensor_.StepAndReset();
+    double now = GetWallTime(env_);
+    int64 tag_id;
+    TF_RETURN_IF_ERROR(run_writer_.GetTagId(now, tag, metadata, &tag_id));
+    return run_writer_.InsertTensor(tag_id, global_step, now, t);
   }
 
   Status WriteScalar(int64 global_step, Tensor t, const string& tag) override {
@@ -348,28 +556,26 @@ class SummaryDbWriter : public SummaryWriterInterface {
 
   Status WriteGraph(int64 global_step, std::unique_ptr<GraphDef> g) override {
     mutex_lock ml(mu_);
-    TF_RETURN_IF_ERROR(InitializeParents());
-    return txn_.Transact(GraphSaver::SaveToRun, env_, db_.get(), g.get(),
-                         run_id_);
+    return run_writer_.InsertGraph(std::move(g), GetWallTime(env_));
   }
 
   Status WriteEvent(std::unique_ptr<Event> e) override {
     switch (e->what_case()) {
       case Event::WhatCase::kSummary: {
         mutex_lock ml(mu_);
-        TF_RETURN_IF_ERROR(InitializeParents());
-        const Summary& summary = e->summary();
-        for (int i = 0; i < summary.value_size(); ++i) {
-          TF_RETURN_IF_ERROR(WriteSummary(e.get(), summary.value(i)));
+        Status s;
+        for (const auto& value : e->summary().value()) {
+          s.Update(WriteSummary(e.get(), value));
         }
-        return Status::OK();
+        return s;
       }
       case Event::WhatCase::kGraphDef: {
+        mutex_lock ml(mu_);
         std::unique_ptr<GraphDef> graph{new GraphDef};
         if (!ParseProtoUnlimited(graph.get(), e->graph_def())) {
           return errors::DataLoss("parse event.graph_def failed");
         }
-        return WriteGraph(e->step(), std::move(graph));
+        return run_writer_.InsertGraph(std::move(graph), e->wall_time());
       }
       default:
         // TODO(@jart): Handle other stuff.
@@ -401,128 +607,26 @@ class SummaryDbWriter : public SummaryWriterInterface {
   string DebugString() override { return "SummaryDbWriter"; }
 
  private:
-  Status InitializeParents() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    if (run_id_ > 0) {
-      return Status::OK();
-    }
-    int64 user_id;
-    TF_RETURN_IF_ERROR(GetUserId(user_name_, &user_id));
-    int64 experiment_id;
-    TF_RETURN_IF_ERROR(
-        GetExperimentId(user_id, experiment_name_, &experiment_id));
-    TF_RETURN_IF_ERROR(GetRunId(experiment_id, run_name_, &run_id_));
-    return Status::OK();
-  }
-
-  Status GetUserId(const string& user_name, int64* user_id)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    if (user_name.empty()) {
-      *user_id = 0LL;
-      return Status::OK();
-    }
-    SqliteStatement get_user_id = db_->Prepare(R"sql(
-      SELECT user_id FROM Users WHERE user_name = ?
-    )sql");
-    get_user_id.BindText(1, user_name);
-    bool is_done;
-    TF_RETURN_IF_ERROR(get_user_id.Step(&is_done));
-    if (!is_done) {
-      *user_id = get_user_id.ColumnInt(0);
-    } else {
-      *user_id = MakeRandomId();
-      SqliteStatement insert_user = db_->Prepare(R"sql(
-        INSERT INTO Users (user_id, user_name, inserted_time) VALUES (?, ?, ?)
-      )sql");
-      insert_user.BindInt(1, *user_id);
-      insert_user.BindText(2, user_name);
-      insert_user.BindDouble(3, GetWallTime(env_));
-      TF_RETURN_IF_ERROR(insert_user.StepAndReset());
-    }
-    return Status::OK();
-  }
-
-  Status GetExperimentId(int64 user_id, const string& experiment_name,
-                         int64* experiment_id) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    // TODO(@jart): Compute started_time.
-    return GetId("Experiments", "user_id", user_id, "experiment_name",
-                 experiment_name, "experiment_id", experiment_id);
-  }
-
-  Status GetRunId(int64 experiment_id, const string& run_name, int64* run_id)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    // TODO(@jart): Compute started_time.
-    return GetId("Runs", "experiment_id", experiment_id, "run_name", run_name,
-                 "run_id", run_id);
-  }
-
-  Status GetTagId(int64 run_id, const string& tag_name, int64* tag_id)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    return GetId("Tags", "run_id", run_id, "tag_name", tag_name, "tag_id",
-                 tag_id);
-  }
-
-  Status GetId(const char* table, const char* parent_id_field, int64 parent_id,
-               const char* name_field, const string& name, const char* id_field,
-               int64* id) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    if (name.empty()) {
-      *id = 0LL;
-      return Status::OK();
-    }
-    SqliteStatement select = db_->Prepare(
-        strings::Printf("SELECT %s FROM %s WHERE %s = ? AND %s = ?", id_field,
-                        table, parent_id_field, name_field));
-    if (parent_id > 0) {
-      select.BindInt(1, parent_id);
-    }
-    select.BindText(2, name);
-    bool is_done;
-    TF_RETURN_IF_ERROR(select.Step(&is_done));
-    if (!is_done) {
-      *id = select.ColumnInt(0);
-    } else {
-      *id = MakeRandomId();
-      SqliteStatement insert = db_->Prepare(strings::Printf(
-          "INSERT INTO %s (%s, %s, %s, inserted_time) VALUES (?, ?, ?, ?)",
-          table, parent_id_field, id_field, name_field));
-      if (parent_id > 0) {
-        insert.BindInt(1, parent_id);
-      }
-      insert.BindInt(2, *id);
-      insert.BindText(3, name);
-      insert.BindDouble(4, GetWallTime(env_));
-      TF_RETURN_IF_ERROR(insert.StepAndReset());
-    }
-    return Status::OK();
-  }
-
   Status WriteSummary(const Event* e, const Summary::Value& summary)
       EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    int64 tag_id;
-    TF_RETURN_IF_ERROR(GetTagId(run_id_, summary.tag(), &tag_id));
-    insert_tensor_.BindInt(1, tag_id);
-    insert_tensor_.BindInt(2, e->step());
-    insert_tensor_.BindDouble(3, e->wall_time());
     switch (summary.value_case()) {
-      case Summary::Value::ValueCase::kSimpleValue:
-        insert_tensor_.BindDouble(4, summary.simple_value());
-        break;
+      case Summary::Value::ValueCase::kSimpleValue: {
+        int64 tag_id;
+        TF_RETURN_IF_ERROR(run_writer_.GetTagId(e->wall_time(), summary.tag(),
+                                                summary.metadata(), &tag_id));
+        Tensor t{DT_DOUBLE, {}};
+        t.scalar<double>()() = summary.simple_value();
+        return run_writer_.InsertTensor(tag_id, e->step(), e->wall_time(), t);
+      }
       default:
         // TODO(@jart): Handle the rest.
         return Status::OK();
     }
-    return insert_tensor_.StepAndReset();
   }
 
   mutex mu_;
   Env* env_;
-  std::shared_ptr<Sqlite> db_ GUARDED_BY(mu_);
-  Transactor txn_ GUARDED_BY(mu_);
-  SqliteStatement insert_tensor_ GUARDED_BY(mu_);
-  SqliteStatement update_metadata_ GUARDED_BY(mu_);
-  string user_name_ GUARDED_BY(mu_);
-  string experiment_name_ GUARDED_BY(mu_);
-  string run_name_ GUARDED_BY(mu_);
-  int64 run_id_ GUARDED_BY(mu_);
+  RunWriter run_writer_ GUARDED_BY(mu_);
 };
 
 }  // namespace
@@ -532,14 +636,8 @@ Status CreateSummaryDbWriter(std::shared_ptr<Sqlite> db,
                              const string& run_name, const string& user_name,
                              Env* env, SummaryWriterInterface** result) {
   TF_RETURN_IF_ERROR(SetupTensorboardSqliteDb(db));
-  SummaryDbWriter* w = new SummaryDbWriter(env, std::move(db));
-  const Status s = w->Initialize(experiment_name, run_name, user_name);
-  if (!s.ok()) {
-    w->Unref();
-    *result = nullptr;
-    return s;
-  }
-  *result = w;
+  *result = new SummaryDbWriter(env, std::move(db), experiment_name, run_name,
+                                user_name);
   return Status::OK();
 }
 
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
index 625861fa6b..5ea844b668 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
@@ -101,6 +101,7 @@ TEST_F(SummaryDbWriterTest, NothingWritten_NoRowsCreated) {
   TF_ASSERT_OK(writer_->Flush());
   writer_->Unref();
   writer_ = nullptr;
+  EXPECT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Ids"));
   EXPECT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Users"));
   EXPECT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Experiments"));
   EXPECT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Runs"));
@@ -109,13 +110,24 @@ TEST_F(SummaryDbWriterTest, NothingWritten_NoRowsCreated) {
 }
 
 TEST_F(SummaryDbWriterTest, TensorsWritten_RowsGetInitialized) {
+  SummaryMetadata metadata;
+  metadata.set_display_name("display_name");
+  metadata.set_summary_description("description");
+  metadata.mutable_plugin_data()->set_plugin_name("plugin_name");
+  metadata.mutable_plugin_data()->set_content("plugin_data");
+  SummaryMetadata metadata_nope;
+  metadata_nope.set_display_name("nope");
+  metadata_nope.set_summary_description("nope");
+  metadata_nope.mutable_plugin_data()->set_plugin_name("nope");
+  metadata_nope.mutable_plugin_data()->set_content("nope");
   TF_ASSERT_OK(CreateSummaryDbWriter(db_, "mad-science", "train", "jart", &env_,
                                      &writer_));
   env_.AdvanceByMillis(23);
   TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy",
-                                    "this-is-metaaa"));
+                                    metadata.SerializeAsString()));
   env_.AdvanceByMillis(23);
-  TF_ASSERT_OK(writer_->WriteTensor(2, MakeScalarInt64(314LL), "taggy", ""));
+  TF_ASSERT_OK(writer_->WriteTensor(2, MakeScalarInt64(314LL), "taggy",
+                                    metadata_nope.SerializeAsString()));
   TF_ASSERT_OK(writer_->Flush());
 
   ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Users"));
@@ -148,27 +160,28 @@ TEST_F(SummaryDbWriterTest, TensorsWritten_RowsGetInitialized) {
   EXPECT_EQ(run_id, QueryInt("SELECT run_id FROM Tags"));
   EXPECT_EQ("taggy", QueryString("SELECT tag_name FROM Tags"));
   EXPECT_EQ(0.023, QueryDouble("SELECT inserted_time FROM Tags"));
-  EXPECT_EQ("this-is-metaaa", QueryString("SELECT metadata FROM Tags"));
+
+  EXPECT_EQ("display_name", QueryString("SELECT display_name FROM Tags"));
+  EXPECT_EQ("plugin_name", QueryString("SELECT plugin_name FROM Tags"));
+  EXPECT_EQ("plugin_data", QueryString("SELECT plugin_data FROM Tags"));
+  EXPECT_EQ("description", QueryString("SELECT description FROM Descriptions"));
 
   EXPECT_EQ(tag_id, QueryInt("SELECT tag_id FROM Tensors WHERE step = 1"));
   EXPECT_EQ(0.023,
             QueryDouble("SELECT computed_time FROM Tensors WHERE step = 1"));
-  EXPECT_EQ("this-is-metaaa", QueryString("SELECT metadata FROM Tags"));
   EXPECT_FALSE(
       QueryString("SELECT tensor FROM Tensors WHERE step = 1").empty());
 
   EXPECT_EQ(tag_id, QueryInt("SELECT tag_id FROM Tensors WHERE step = 2"));
   EXPECT_EQ(0.046,
             QueryDouble("SELECT computed_time FROM Tensors WHERE step = 2"));
-  EXPECT_EQ("this-is-metaaa", QueryString("SELECT metadata FROM Tags"));
   EXPECT_FALSE(
       QueryString("SELECT tensor FROM Tensors WHERE step = 2").empty());
 }
 
 TEST_F(SummaryDbWriterTest, EmptyParentNames_NoParentsCreated) {
   TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_));
-  TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy",
-                                    "this-is-metaaa"));
+  TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy", ""));
   TF_ASSERT_OK(writer_->Flush());
   ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Users"));
   ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Experiments"));
@@ -317,5 +330,39 @@ TEST_F(SummaryDbWriterTest, WriteScalarUint8_CoercesToInt64) {
   ASSERT_EQ(254LL, QueryInt("SELECT tensor FROM Tensors"));
 }
 
+TEST_F(SummaryDbWriterTest, UsesIdsTable) {
+  SummaryMetadata metadata;
+  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "mad-science", "train", "jart", &env_,
+                                     &writer_));
+  env_.AdvanceByMillis(23);
+  TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy",
+                                    metadata.SerializeAsString()));
+  TF_ASSERT_OK(writer_->Flush());
+  ASSERT_EQ(4LL, QueryInt("SELECT COUNT(*) FROM Ids"));
+  EXPECT_EQ(4LL, QueryInt(strings::StrCat(
+                     "SELECT COUNT(*) FROM Ids WHERE id IN (",
+                     QueryInt("SELECT user_id FROM Users"), ", ",
+                     QueryInt("SELECT experiment_id FROM Experiments"), ", ",
+                     QueryInt("SELECT run_id FROM Runs"), ", ",
+                     QueryInt("SELECT tag_id FROM Tags"), ")")));
+}
+
+TEST_F(SummaryDbWriterTest, SetsRunFinishedTime) {
+  SummaryMetadata metadata;
+  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "mad-science", "train", "jart", &env_,
+                                     &writer_));
+  env_.AdvanceByMillis(23);
+  TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy",
+                                    metadata.SerializeAsString()));
+  TF_ASSERT_OK(writer_->Flush());
+  ASSERT_EQ(0.023, QueryDouble("SELECT started_time FROM Runs"));
+  ASSERT_EQ(0.0, QueryDouble("SELECT finished_time FROM Runs"));
+  env_.AdvanceByMillis(23);
+  writer_->Unref();
+  writer_ = nullptr;
+  ASSERT_EQ(0.023, QueryDouble("SELECT started_time FROM Runs"));
+  ASSERT_EQ(0.046, QueryDouble("SELECT finished_time FROM Runs"));
+}
+
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From b463cbaa874c390296fd23e9dac31cadc58211a3 Mon Sep 17 00:00:00 2001
From: Loo Rong Jie <loorongjie@gmail.com>
Date: Fri, 8 Dec 2017 08:45:29 +0800
Subject: [PATCH 0782/1225] Add tensorflow::ops prefix for {Read,Write}File

ReadFile and WriteFile collide with the functions in windows.h. Tell MSVC we want Tensorflow's ones.
---
 .../examples/wav_to_spectrogram/wav_to_spectrogram.cc      | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
index 1e375ed48e..4a429837b7 100644
--- a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
+++ b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
@@ -53,7 +53,8 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
   //  - Scales, clamps, and converts that spectrogram to 0 to 255 uint8's.
   //  - Reshapes the tensor so that it's [height, width, 1] for imaging.
   //  - Encodes it as a PNG stream and saves it out to a file.
-  Output file_reader = ReadFile(root.WithOpName("input_wav"), input_wav);
+  Output file_reader =
+      tensorflow::ops::ReadFile(root.WithOpName("input_wav"), input_wav);
   DecodeWav wav_decoder =
       DecodeWav(root.WithOpName("wav_decoder"), file_reader);
   Output spectrogram = AudioSpectrogram(root.WithOpName("spectrogram"),
@@ -71,8 +72,8 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
   Output squeeze = Squeeze(root.WithOpName("squeeze"), expand_dims,
                            Squeeze::Attrs().Axis({0}));
   Output png_encoder = EncodePng(root.WithOpName("png_encoder"), squeeze);
-  WriteFile file_writer =
-      WriteFile(root.WithOpName("output_image"), output_image, png_encoder);
+  tensorflow::ops::WriteFile file_writer = tensorflow::ops::WriteFile(
+      root.WithOpName("output_image"), output_image, png_encoder);
   tensorflow::GraphDef graph;
   TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
 
-- 
GitLab


From 7ebe79f21d5f64f0b25c0b75d607f3cb00bb2c45 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 16:43:40 -0800
Subject: [PATCH 0783/1225] Fix nullptr dereferencing bug in dependency
 optimizer in VLOG statement. Don't run topological sort if it is not needed.

PiperOrigin-RevId: 178315738
---
 .../core/grappler/optimizers/dependency_optimizer.cc     | 9 +++++++--
 .../grappler/optimizers/dependency_optimizer_test.cc     | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 498a3a443f..41d632d9e3 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -393,16 +393,21 @@ void DependencyOptimizer::BuildNodeToIdx() {
 
 Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                      GraphDef* optimized_graph) {
-  VLOG(1) << "Graph before optimization:\n" << optimized_graph_->DebugString();
   optimized_graph_ = optimized_graph;
   *optimized_graph_ = item.graph;
   nodes_to_preserve_ = item.NodesToPreserve();
   fetch_nodes_known_ = !item.fetch.empty();
 
+  VLOG(1) << "Graph before optimization:\n" << optimized_graph_->DebugString();
   CleanControlInputs();
   const int num_iterations = opt_level_ == RewriterConfig::AGGRESSIVE ? 2 : 1;
   for (int iteration = 0; iteration < num_iterations; ++iteration) {
-    Status topo_sort_status = TopologicalSort(optimized_graph_);
+    Status topo_sort_status;
+    if (opt_level_ == RewriterConfig::AGGRESSIVE) {
+      // Prepare the graph for transitive reduction if enabled.
+      topo_sort_status = TopologicalSort(optimized_graph_);
+    }
+
     node_map_.reset(new NodeMap(optimized_graph_));
     BuildNodeToIdx();
 
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
index d91525f814..e17a8eb1cf 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
@@ -157,7 +157,7 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop_NoFetch) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
-  DependencyOptimizer optimizer;
+  DependencyOptimizer optimizer(RewriterConfig::AGGRESSIVE);
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
-- 
GitLab


From bcae10f5f4f60c48f793b3a928eb9d3c7aa3eb2f Mon Sep 17 00:00:00 2001
From: Adam Zahran <adham.zahran@hotmail.com>
Date: Fri, 8 Dec 2017 03:07:34 +0200
Subject: [PATCH 0784/1225]  fix #15188 replaced isnan with std::isnan to avoid
 build error  (#15189)

* fix  #15188 replaced isnan with std::isnan to avoid build error

* Instead of std::isnan, use Eigen::numext::isnan

* Fix typo
---
 tensorflow/core/framework/numeric_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index 569a4c3756..7e8aeb5001 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -46,7 +46,7 @@ struct bfloat16 {
   EIGEN_DEVICE_FUNC bfloat16() {}
 
   EIGEN_DEVICE_FUNC explicit bfloat16(const float v) {
-    if (isnan(v)) {
+    if (Eigen::numext::isnan(v)) {
       value = NAN_VALUE;
       return;
     }
-- 
GitLab


From fa9ebbc2c43aa9cf805b650bbc4a58c3a8afdff3 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 7 Dec 2017 17:12:00 -0800
Subject: [PATCH 0785/1225] Don't enforce shape invariance when creating
 backprop while loop.

I had trouble reasoning about the shapes in the backprop loop built in
control_flow_grad.py, but we know they work, so just don't check the
shapes here (as opposed to calling set_shape or similar at the
_AddNextAndBackEdge call site).

PiperOrigin-RevId: 178319140
---
 tensorflow/python/ops/control_flow_grad.py |  3 ++-
 tensorflow/python/ops/control_flow_ops.py  | 13 +++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py
index c3dd54a1ff..97b57177b2 100644
--- a/tensorflow/python/ops/control_flow_grad.py
+++ b/tensorflow/python/ops/control_flow_grad.py
@@ -53,7 +53,8 @@ def _SwitchGrad(op, *grad):
       # TODO(yuanbyu): Perform shape inference with this new input.
       if grad[1] is not None:
         # pylint: disable=protected-access
-        control_flow_ops._AddNextAndBackEdge(merge_grad, grad[1])
+        control_flow_ops._AddNextAndBackEdge(merge_grad, grad[1],
+                                             enforce_shape_invariant=False)
         # pylint: enable=protected-access
       return None, None
     elif grad[0] is not None:
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 1e2202df64..7d505e141b 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -643,16 +643,17 @@ def _EnforceShapeInvariant(merge_var, next_var):
              n_values_shape, n_indices_shape, n_shape_shape))
 
 
-def _AddNextAndBackEdge(m, v):
+def _AddNextAndBackEdge(m, v, enforce_shape_invariant=True):
   """Add NextIteration and back edge from v to m."""
   if isinstance(m, ops.Tensor):
     v = ops.convert_to_tensor(v)
     v = _NextIteration(v)
-    # Make sure the shapes of loop outputs are correct. We do this before
-    # calling _update_input, which will raise a less-helpful error message if
-    # the types don't match.
-    # TODO(skyewm): call this for other cases below (needs testing)
-    _EnforceShapeInvariant(m, v)
+    if enforce_shape_invariant:
+      # Make sure the shapes of loop outputs are correct. We do this before
+      # calling _update_input, which will raise a less-helpful error message if
+      # the types don't match.
+      # TODO(skyewm): call this for other cases below (needs testing)
+      _EnforceShapeInvariant(m, v)
     m.op._update_input(1, v)   # pylint: disable=protected-access
   elif isinstance(m, ops.IndexedSlices):
     # pylint: disable=protected-access
-- 
GitLab


From affd9937b000501d73e64da0c393b4d8b32a4392 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 7 Dec 2017 17:12:03 -0800
Subject: [PATCH 0786/1225] [XLA:CPU] Rename cpu/layout_assignment to
 cpu/cpu_layout_assignment

This is more consistent with the rest of the files in service/cpu.

PiperOrigin-RevId: 178319147
---
 tensorflow/compiler/xla/service/cpu/BUILD          | 14 +++++++-------
 .../compiler/xla/service/cpu/cpu_compiler.cc       |  2 +-
 ...yout_assignment.cc => cpu_layout_assignment.cc} |  2 +-
 ...layout_assignment.h => cpu_layout_assignment.h} |  6 +++---
 ...nment_test.cc => cpu_layout_assignment_test.cc} |  2 +-
 tensorflow/compiler/xla/service/gpu/BUILD          | 14 +++++++-------
 .../compiler/xla/service/gpu/gpu_compiler.cc       |  2 +-
 ...yout_assignment.cc => gpu_layout_assignment.cc} |  2 +-
 ...layout_assignment.h => gpu_layout_assignment.h} |  6 +++---
 ...nment_test.cc => gpu_layout_assignment_test.cc} |  2 +-
 10 files changed, 26 insertions(+), 26 deletions(-)
 rename tensorflow/compiler/xla/service/cpu/{layout_assignment.cc => cpu_layout_assignment.cc} (98%)
 rename tensorflow/compiler/xla/service/cpu/{layout_assignment.h => cpu_layout_assignment.h} (86%)
 rename tensorflow/compiler/xla/service/cpu/{layout_assignment_test.cc => cpu_layout_assignment_test.cc} (99%)
 rename tensorflow/compiler/xla/service/gpu/{layout_assignment.cc => gpu_layout_assignment.cc} (98%)
 rename tensorflow/compiler/xla/service/gpu/{layout_assignment.h => gpu_layout_assignment.h} (86%)
 rename tensorflow/compiler/xla/service/gpu/{layout_assignment_test.cc => gpu_layout_assignment_test.cc} (97%)

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 6e7b062280..32abb1b559 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -82,13 +82,13 @@ cc_library(
         ":cpu_copy_insertion",
         ":cpu_executable",
         ":cpu_instruction_fusion",
+        ":cpu_layout_assignment",
         ":cpu_options",
         ":cpu_parallelization_preparation",
         ":disassembler",
         ":dot_op_emitter",
         ":ir_emission_utils",
         ":ir_emitter",
-        ":layout_assignment",
         ":parallel_cpu_executable",
         ":parallel_task_assignment",
         ":simple_orc_jit",
@@ -656,9 +656,9 @@ cc_library(
 )
 
 cc_library(
-    name = "layout_assignment",
-    srcs = ["layout_assignment.cc"],
-    hdrs = ["layout_assignment.h"],
+    name = "cpu_layout_assignment",
+    srcs = ["cpu_layout_assignment.cc"],
+    hdrs = ["cpu_layout_assignment.h"],
     deps = [
         ":dot_op_emitter",
         ":ir_emission_utils",
@@ -670,11 +670,11 @@ cc_library(
 )
 
 tf_cc_test(
-    name = "layout_assignment_test",
+    name = "cpu_layout_assignment_test",
     size = "small",
-    srcs = ["layout_assignment_test.cc"],
+    srcs = ["cpu_layout_assignment_test.cc"],
     deps = [
-        ":layout_assignment",
+        ":cpu_layout_assignment",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_layout",
         "//tensorflow/compiler/xla:shape_util",
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 988f632748..6c72ef6849 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -51,13 +51,13 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_executable.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h"
+#include "tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_options.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_parallelization_preparation.h"
 #include "tensorflow/compiler/xla/service/cpu/disassembler.h"
 #include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/cpu/ir_emitter.h"
-#include "tensorflow/compiler/xla/service/cpu/layout_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h"
 #include "tensorflow/compiler/xla/service/cpu/parallel_task_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
similarity index 98%
rename from tensorflow/compiler/xla/service/cpu/layout_assignment.cc
rename to tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
index 69466fd32e..7975eba399 100644
--- a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/cpu/layout_assignment.h"
+#include "tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h"
 
 #include <numeric>
 
diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment.h b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
similarity index 86%
rename from tensorflow/compiler/xla/service/cpu/layout_assignment.h
rename to tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
index 4fd8d68dd6..c8edbb9e15 100644
--- a/tensorflow/compiler/xla/service/cpu/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_LAYOUT_ASSIGNMENT_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_LAYOUT_ASSIGNMENT_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_LAYOUT_ASSIGNMENT_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_LAYOUT_ASSIGNMENT_H_
 
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/layout_assignment.h"
@@ -38,4 +38,4 @@ class CpuLayoutAssignment : public LayoutAssignment {
 }  // namespace cpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_LAYOUT_ASSIGNMENT_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_LAYOUT_ASSIGNMENT_H_
diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
similarity index 99%
rename from tensorflow/compiler/xla/service/cpu/layout_assignment_test.cc
rename to tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
index 1ea5e8c7fc..401cf50717 100644
--- a/tensorflow/compiler/xla/service/cpu/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/cpu/layout_assignment.h"
+#include "tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h"
 
 #include <initializer_list>
 #include <memory>
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index e57558b578..8f59a4b8a5 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -431,11 +431,11 @@ cc_library(
         ":fusion_merger",
         ":gpu_copy_insertion",
         ":gpu_executable",
+        ":gpu_layout_assignment",
         ":hlo_schedule",
         ":instruction_fusion",
         ":ir_emission_utils",
         ":ir_emitter",
-        ":layout_assignment",
         ":pad_insertion",
         ":partition_assignment",
         ":stream_assignment",
@@ -492,9 +492,9 @@ cc_library(
 )
 
 cc_library(
-    name = "layout_assignment",
-    srcs = ["layout_assignment.cc"],
-    hdrs = ["layout_assignment.h"],
+    name = "gpu_layout_assignment",
+    srcs = ["gpu_layout_assignment.cc"],
+    hdrs = ["gpu_layout_assignment.h"],
     deps = [
         ":ir_emission_utils",
         "//tensorflow/compiler/xla:shape_util",
@@ -508,10 +508,10 @@ cc_library(
 )
 
 tf_cc_test(
-    name = "layout_assignment_test",
-    srcs = ["layout_assignment_test.cc"],
+    name = "gpu_layout_assignment_test",
+    srcs = ["gpu_layout_assignment_test.cc"],
     deps = [
-        ":layout_assignment",
+        ":gpu_layout_assignment",
         "//tensorflow/compiler/xla:shape_layout",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:xla_data_proto",
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index aa5f38ff58..464e770b37 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -36,12 +36,12 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emitter.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emitter_context.h"
-#include "tensorflow/compiler/xla/service/gpu/layout_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.h"
 #include "tensorflow/compiler/xla/service/gpu/pad_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h"
diff --git a/tensorflow/compiler/xla/service/gpu/layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
similarity index 98%
rename from tensorflow/compiler/xla/service/gpu/layout_assignment.cc
rename to tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index d475c4171b..50a249f448 100644
--- a/tensorflow/compiler/xla/service/gpu/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/layout_assignment.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
 
 #include <memory>
 
diff --git a/tensorflow/compiler/xla/service/gpu/layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
similarity index 86%
rename from tensorflow/compiler/xla/service/gpu/layout_assignment.h
rename to tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
index 169041eb85..7655a3ebf4 100644
--- a/tensorflow/compiler/xla/service/gpu/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_LAYOUT_ASSIGNMENT_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_LAYOUT_ASSIGNMENT_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_LAYOUT_ASSIGNMENT_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_LAYOUT_ASSIGNMENT_H_
 
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/layout_assignment.h"
@@ -38,4 +38,4 @@ class GpuLayoutAssignment : public LayoutAssignment {
 }  // namespace gpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_LAYOUT_ASSIGNMENT_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_LAYOUT_ASSIGNMENT_H_
diff --git a/tensorflow/compiler/xla/service/gpu/layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
similarity index 97%
rename from tensorflow/compiler/xla/service/gpu/layout_assignment_test.cc
rename to tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
index ac206b89d3..f68b23c8ce 100644
--- a/tensorflow/compiler/xla/service/gpu/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/layout_assignment.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
 
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/service/computation_layout.h"
-- 
GitLab


From 1667d4dcd2c7c33a3bcade62014931a1f8d9a2e0 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 7 Dec 2017 17:41:46 -0800
Subject: [PATCH 0787/1225] Parallelizing the creation of asynchronous function
 calls in `tf.data.map_and_batch`. This change is expected to improve
 `tf.data` performance on inexpensive `map` functions with a large number of
 input/output arguments.

PiperOrigin-RevId: 178322009
---
 .../kernel_tests/batch_dataset_op_test.py     | 17 +++-
 .../core/kernels/map_and_batch_dataset_op.cc  | 97 +++++++++++--------
 2 files changed, 69 insertions(+), 45 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
index a939b3c841..d975a0167f 100644
--- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
@@ -577,7 +577,7 @@ class BatchDatasetTest(test.TestCase):
     self.assertEqual([None], dataset.output_shapes[1][0].as_list())
     self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list())
 
-  def testBatchAndMapDataset(self):
+  def _testBatchAndMapDatasetHelper(self, num_parallel_batches=1):
     """Test a dataset that maps a TF function across its input elements."""
     # The pipeline is TensorSliceDataset ->
     # RepeatDataset(count) -> BatchAndMapDataset(square_3, batch_size).
@@ -593,7 +593,10 @@ class BatchDatasetTest(test.TestCase):
 
     iterator = (
         dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply(
-            batching.map_and_batch(_map_fn, batch_size))
+            batching.map_and_batch(
+                map_func=_map_fn,
+                batch_size=batch_size,
+                num_parallel_batches=num_parallel_batches))
         .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -640,6 +643,16 @@ class BatchDatasetTest(test.TestCase):
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(init_op, feed_dict={count: 14, batch_size: 0})
 
+  def testBatchAndMapDataset(self):
+    return self._testBatchAndMapDatasetHelper()
+
+  def testBatchAndMapDatasetWithParallelBatching(self):
+    # TODO(b/70299909): This test surfaces a bug in the `map_and_batch`
+    # transformation, which manifests as premature EOF. Fix it.
+    #
+    # return self._testBatchAndMapDatasetHelper(num_parallel_batches=10)
+    pass
+
   def testMapAndBatchSparse(self):
 
     def _sparse(i):
diff --git a/tensorflow/core/kernels/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/map_and_batch_dataset_op.cc
index 9bd66e681f..d17b4237d3 100644
--- a/tensorflow/core/kernels/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/map_and_batch_dataset_op.cc
@@ -14,13 +14,13 @@ limitations under the License.
 ==============================================================================*/
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/captured_function.h"
 #include "tensorflow/core/kernels/dataset.h"
 #include "tensorflow/core/kernels/inplace_ops_functor.h"
+#include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/tracing.h"
@@ -247,48 +247,59 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                   .IgnoreError();
             });
         opts.step_container = step_container;
-        opts.runner = ctx->runner();
-        dataset()->captured_func_->RunAsync(
-            opts, input_element, &result->return_values,
-            [this, result, step_container, batch_result,
-             offset](Status ret_status) {
-              delete step_container;
-              result->status.Update(ret_status);
-              if (ret_status.ok()) {
-                EnsureOutputAllocated(batch_result, result->return_values);
-                const size_t num_components = result->return_values.size();
-                for (size_t i = 0; i < num_components; ++i) {
-                  const Tensor& tensor = result->return_values[i];
-                  Tensor* batch = &(batch_result->output)[i];
-                  if (tensor.NumElements() !=
-                      (batch->NumElements() / batch->dim_size(0))) {
-                    TensorShape batch_shape = batch->shape();
-                    batch_shape.RemoveDim(0);
-                    result->status.Update(errors::InvalidArgument(
-                        "Cannot add tensor to the batch: number of "
-                        "elements does not match. Shapes are: [tensor]: ",
-                        tensor.shape().DebugString(),
-                        ", [batch]: ", batch_shape.DebugString()));
-                    break;
-                  }
-                  // TODO(mrry): Add a version of DoParallelConcat that allows
-                  // us to move `tensor` where possible, to speed up string
-                  // tensor batching.
-                  Status copy_status = ::tensorflow::functor::DoParallelConcat(
-                      *dataset()->device_, tensor, offset, batch);
-                  if (!copy_status.ok()) {
-                    result->status.Update(copy_status);
-                    break;
-                  }
-                }
-              }
-              // NOTE(mrry): We clear the return values here to release any
-              // memory associated with them and to paralellize the destruction
-              // of the tensors (which can be surprisingly expensive for
-              // map functions with large numbers of return values).
-              result->return_values.clear();
-              batch_result->counter->DecrementCount();
-            });
+        std::function<void(std::function<void()>)>* runner =
+            new std::function<void(std::function<void()>)>(*ctx->runner());
+        opts.runner = runner;
+        (*ctx->runner())(std::bind(
+            [=](std::vector<Tensor> input_element) {
+              dataset()->captured_func_->RunAsync(
+                  opts, input_element, &result->return_values,
+                  [this, step_container, runner, result, batch_result,
+                   offset](Status ret_status) {
+                    delete step_container;
+                    delete runner;
+                    result->status.Update(ret_status);
+                    if (ret_status.ok()) {
+                      EnsureOutputAllocated(batch_result,
+                                            result->return_values);
+                      const size_t num_components =
+                          result->return_values.size();
+                      for (size_t i = 0; i < num_components; ++i) {
+                        const Tensor& tensor = result->return_values[i];
+                        Tensor* batch = &(batch_result->output)[i];
+                        if (tensor.NumElements() !=
+                            (batch->NumElements() / batch->dim_size(0))) {
+                          TensorShape batch_shape = batch->shape();
+                          batch_shape.RemoveDim(0);
+                          result->status.Update(errors::InvalidArgument(
+                              "Cannot add tensor to the batch: number of "
+                              "elements does not match. Shapes are: [tensor]: ",
+                              tensor.shape().DebugString(),
+                              ", [batch]: ", batch_shape.DebugString()));
+                          break;
+                        }
+                        // TODO(mrry): Add a version of DoParallelConcat that
+                        // allows us to move `tensor` where possible, to speed
+                        // up string tensor batching.
+                        Status copy_status =
+                            ::tensorflow::functor::DoParallelConcat(
+                                *dataset()->device_, tensor, offset, batch);
+                        if (!copy_status.ok()) {
+                          result->status.Update(copy_status);
+                          break;
+                        }
+                      }
+                    }
+                    // NOTE(mrry): We clear the return values here to release
+                    // any memory associated with them and to paralellize the
+                    // destruction of the tensors (which can be surprisingly
+                    // expensive for map functions with large numbers of return
+                    // values).
+                    result->return_values.clear();
+                    batch_result->counter->DecrementCount();
+                  });
+            },
+            std::move(input_element)));
       }
 
       void StartInvocationBatch(IteratorContext* ctx, int64 batch_index)
-- 
GitLab


From 0e9cc7f3113ade82436729bd541f6b501d023ac0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 17:46:37 -0800
Subject: [PATCH 0788/1225] [XLA] Implement Conditional in XLA service, client
 ComputationBuilder, and CPU backend.

PiperOrigin-RevId: 178322445
---
 .../xla/client/computation_builder.cc         |  28 ++
 .../compiler/xla/client/computation_builder.h |   7 +
 .../compiler/xla/service/buffer_assignment.cc |  28 +-
 .../xla/service/buffer_assignment_test.cc     |  59 ++++
 tensorflow/compiler/xla/service/call_graph.cc |   1 +
 .../compiler/xla/service/call_graph_test.cc   |  53 ++-
 .../compiler/xla/service/cpu/ir_emitter.cc    |  59 ++++
 .../compiler/xla/service/cpu/ir_emitter.h     |   1 +
 .../xla/service/flatten_call_graph.cc         |  14 +-
 .../xla/service/flatten_call_graph_test.cc    |  30 ++
 .../xla/service/hlo_dataflow_analysis.cc      |  79 ++++-
 .../xla/service/hlo_dataflow_analysis.h       |   1 +
 .../xla/service/hlo_dataflow_analysis_test.cc | 322 ++++++++++++++++++
 tensorflow/compiler/xla/service/service.cc    |  11 +
 .../compiler/xla/service/shape_inference.cc   |  58 ++++
 .../compiler/xla/service/shape_inference.h    |   6 +
 .../xla/service/shape_inference_test.cc       |  75 ++++
 .../compiler/xla/service/user_computation.cc  | 123 +++++++
 .../compiler/xla/service/user_computation.h   |   6 +
 tensorflow/compiler/xla/tests/BUILD           |  18 +
 .../compiler/xla/tests/conditional_test.cc    | 238 +++++++++++++
 tensorflow/compiler/xla/xla_data.proto        |  11 +-
 .../performance/xla/operation_semantics.md    |  29 ++
 23 files changed, 1238 insertions(+), 19 deletions(-)
 create mode 100644 tensorflow/compiler/xla/tests/conditional_test.cc

diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc
index 9febea8dcf..317dcb4e41 100644
--- a/tensorflow/compiler/xla/client/computation_builder.cc
+++ b/tensorflow/compiler/xla/client/computation_builder.cc
@@ -1504,6 +1504,34 @@ ComputationDataHandle ComputationBuilder::While(
   return ParseOpResponse(s, &response);
 }
 
+ComputationDataHandle ComputationBuilder::Conditional(
+    const ComputationDataHandle& predicate,
+    const ComputationDataHandle& true_operand,
+    const Computation& true_computation,
+    const ComputationDataHandle& false_operand,
+    const Computation& false_computation) {
+  if (!first_error_.ok() || !PrepareComputation().ok()) {
+    return ComputationDataHandle();
+  }
+
+  ConditionalRequest request;
+  *request.mutable_predicate() = predicate;
+  *request.mutable_true_operand() = true_operand;
+  *request.mutable_true_computation() = true_computation.handle();
+  *request.mutable_false_operand() = false_operand;
+  *request.mutable_false_computation() = false_computation.handle();
+  OpRequest op_request;
+  *op_request.mutable_computation() = computation_.handle();
+  *op_request.mutable_conditional_request() = request;
+  AddCommonFieldsToOpRequest(&op_request);
+  OpResponse response;
+
+  VLOG(2) << "making conditional op request";
+  Status s = client_->stub()->Op(&op_request, &response);
+
+  return ParseOpResponse(s, &response);
+}
+
 ComputationDataHandle ComputationBuilder::Reduce(
     const ComputationDataHandle& operand,
     const ComputationDataHandle& init_value, const Computation& computation,
diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h
index 531b98cfb9..97531cdc75 100644
--- a/tensorflow/compiler/xla/client/computation_builder.h
+++ b/tensorflow/compiler/xla/client/computation_builder.h
@@ -741,6 +741,13 @@ class ComputationBuilder {
                               const Computation& body,
                               const ComputationDataHandle& init);
 
+  // Enqueues a conditional node onto the computation.
+  ComputationDataHandle Conditional(const ComputationDataHandle& predicate,
+                                    const ComputationDataHandle& true_operand,
+                                    const Computation& true_computation,
+                                    const ComputationDataHandle& false_operand,
+                                    const Computation& false_computation);
+
   // Enqueues a ReducePrecision node onto the computation.
   ComputationDataHandle ReducePrecision(const ComputationDataHandle& operand,
                                         const int exponent_bits,
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 19a9ff04de..7ece79d781 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -581,6 +581,7 @@ Status GatherComputationsByAllocationType(
            instruction->called_computations()) {
         switch (instruction->opcode()) {
           case HloOpcode::kCall:
+          case HloOpcode::kConditional:
           case HloOpcode::kWhile:
             // Call and while must be called from a computation with global
             // allocations as they may return references to buffers inside the
@@ -976,8 +977,8 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
   const HloOrdering& hlo_ordering = assignment->liveness().hlo_ordering();
   if (run_whole_module_heap_simulation) {
     // Run the heap simulation over the whole module. This reduces memory usage,
-    // since buffers for kCall and kWhile sub-computations are only live for the
-    // duration of their calling instructions.
+    // since buffers for kCall, kWhile, and kConditional sub-computations are
+    // only live for the duration of their calling instructions.
     VLOG(1) << "Running whole-module heap simulation";
     SequentialHloOrdering::HloModuleSequence module_sequence;
     FlatSet<const LogicalBuffer*> all_buffers_to_assign;
@@ -1272,7 +1273,8 @@ const LogicalBuffer* AddBufferToColocatedSet(
 }  // namespace
 
 // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated
-// in the same allocation (currently just supports kWhile and kCall).
+// in the same allocation (currently just supports kWhile, kCall, and
+// kConditional).
 void BufferAssigner::BuildColocatedBufferSets(
     const HloModule* module, const BufferLiveness& buffer_liveness,
     const LogicalBuffer::SizeFunction& buffer_size,
@@ -1336,6 +1338,26 @@ void BufferAssigner::BuildColocatedBufferSets(
                                       &colocated_set);
               AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets);
             });
+      } else if (opcode == HloOpcode::kConditional) {
+        const HloInstruction* conditional_hlo = instruction;
+        ShapeUtil::ForEachSubshape(
+            conditional_hlo->shape(),
+            [this, conditional_hlo, &points_to_analysis, colocated_buffer_sets](
+                const Shape& /*subshape*/, const ShapeIndex& index) {
+              std::vector<const LogicalBuffer*> colocated_set;
+              // Add conditional.result.
+              AddBufferToColocatedSet(conditional_hlo, index,
+                                      points_to_analysis, &colocated_set);
+              // Add conditional.true_computation.root.
+              AddBufferToColocatedSet(
+                  conditional_hlo->true_computation()->root_instruction(),
+                  index, points_to_analysis, &colocated_set);
+              // Add conditional.false_computation.root.
+              AddBufferToColocatedSet(
+                  conditional_hlo->false_computation()->root_instruction(),
+                  index, points_to_analysis, &colocated_set);
+              AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets);
+            });
       }
     }
   }
diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
index 09681b34e7..6fc9d783f1 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
@@ -166,6 +166,15 @@ class BufferAssignmentTest : public HloTestBase {
     return builder.Build();
   }
 
+  std::unique_ptr<HloComputation> BuildR0F32UnaryOpComputation(
+      HloOpcode opcode, const string& name) {
+    auto builder = HloComputation::Builder(name);
+    auto param =
+        builder.AddInstruction(HloInstruction::CreateParameter(0, r0f32_, "x"));
+    builder.AddInstruction(HloInstruction::CreateUnary(r0f32_, opcode, param));
+    return builder.Build();
+  }
+
   // Verifies that the given instruction hlo has a valid input buffer assigned,
   // i.e., the parameter number matches the op's.
   const BufferAllocation& GetAssignedInputAllocation(
@@ -740,6 +749,56 @@ TEST_F(BufferAssignmentTest, ExampleWhile) {
             << " instructions; total buffer size " << size0 + sizec + sizeb;
 }
 
+TEST_F(BufferAssignmentTest, ExampleConditional) {
+  auto module = CreateNewModule();
+  auto true_computation = module->AddEmbeddedComputation(
+      BuildR0F32UnaryOpComputation(HloOpcode::kCeil, "Ceil"));
+  auto false_computation = module->AddEmbeddedComputation(
+      BuildR0F32UnaryOpComputation(HloOpcode::kFloor, "Floor"));
+
+  auto builder = HloComputation::Builder(TestName());
+  auto pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  auto const1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(56.4f)));
+  auto const2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(12.4f)));
+  auto conditional = builder.AddInstruction(HloInstruction::CreateConditional(
+      r0f32_, pred, const1, true_computation, const2, false_computation));
+  module->AddEntryComputation(builder.Build());
+
+  const std::vector<const HloInstruction*> conditional_instrs =
+      GetInstructions(conditional);
+  const std::vector<const HloInstruction*> true_instrs =
+      GetInstructions(true_computation->root_instruction());
+  const std::vector<const HloInstruction*> false_instrs =
+      GetInstructions(false_computation->root_instruction());
+  EXPECT_EQ(4, conditional_instrs.size());
+  EXPECT_EQ(2, true_instrs.size());
+  EXPECT_EQ(2, false_instrs.size());
+
+  auto buffers = RunBufferAssignment(module.get());
+  ValidateBuffers(conditional_instrs, *buffers);
+  ValidateBuffers(true_instrs, *buffers);
+  ValidateBuffers(false_instrs, *buffers);
+
+  EXPECT_FALSE(BuffersDistinct(conditional_instrs, true_instrs, *buffers))
+      << "Should be reuse between conditional and true computation.";
+  EXPECT_FALSE(BuffersDistinct(conditional_instrs, false_instrs, *buffers))
+      << "Should be reuse between conditional and false computation.";
+  EXPECT_FALSE(BuffersDistinct(true_instrs, false_instrs, *buffers))
+      << "Should be reuse between true and false computations.";
+
+  const BufferAllocation& conditional_buffer =
+      GetTopLevelAllocation(*buffers, conditional);
+  const BufferAllocation& true_buffer =
+      GetTopLevelAllocation(*buffers, true_computation->root_instruction());
+  const BufferAllocation& false_buffer =
+      GetTopLevelAllocation(*buffers, false_computation->root_instruction());
+  EXPECT_EQ(conditional_buffer.size(), true_buffer.size());
+  EXPECT_EQ(conditional_buffer.size(), false_buffer.size());
+}
+
 TEST_F(BufferAssignmentTest, UnaryOpReuseChain) {
   // param0[100] ---> (exp) ---> (tanh) ---> (exp) ---> (neg)
   auto builder = HloComputation::Builder(TestName());
diff --git a/tensorflow/compiler/xla/service/call_graph.cc b/tensorflow/compiler/xla/service/call_graph.cc
index 1adecdb939..13eb02ca01 100644
--- a/tensorflow/compiler/xla/service/call_graph.cc
+++ b/tensorflow/compiler/xla/service/call_graph.cc
@@ -54,6 +54,7 @@ std::ostream& operator<<(std::ostream& out, const CallContext& context) {
 CallContext GetInstructionCallContext(const HloInstruction* instruction) {
   switch (instruction->opcode()) {
     case HloOpcode::kCall:
+    case HloOpcode::kConditional:
     case HloOpcode::kWhile:
       return CallContext::kSequential;
     case HloOpcode::kMap:
diff --git a/tensorflow/compiler/xla/service/call_graph_test.cc b/tensorflow/compiler/xla/service/call_graph_test.cc
index 0395ea8c8b..1ea7d538cd 100644
--- a/tensorflow/compiler/xla/service/call_graph_test.cc
+++ b/tensorflow/compiler/xla/service/call_graph_test.cc
@@ -34,12 +34,13 @@ using ::testing::UnorderedElementsAre;
 class CallGraphTest : public HloTestBase {
  protected:
   // Build and return a trivial computation taking and returning a scalar.
-  std::unique_ptr<HloComputation> MakeScalarComputation() {
+  std::unique_ptr<HloComputation> MakeScalarComputation(
+      HloOpcode opcode = HloOpcode::kNegate) {
     HloComputation::Builder builder(TestName() + ".ScalarComputation");
     HloInstruction* param0 = builder.AddInstruction(
         HloInstruction::CreateParameter(0, kScalarShape, "param0"));
     builder.AddInstruction(
-        HloInstruction::CreateUnary(kScalarShape, HloOpcode::kNegate, param0));
+        HloInstruction::CreateUnary(kScalarShape, opcode, param0));
     return builder.Build();
   }
 
@@ -236,6 +237,54 @@ TEST_F(CallGraphTest, ContextBothComputations) {
   EXPECT_EQ(CallContext::kBoth, sub_node.context());
 }
 
+TEST_F(CallGraphTest, ComputationWithConditional) {
+  // Test a call graph of a module with a conditional.
+  auto module = CreateNewModule();
+  HloComputation* true_computation =
+      module->AddEmbeddedComputation(MakeScalarComputation(HloOpcode::kCeil));
+  HloComputation* false_computation =
+      module->AddEmbeddedComputation(MakeScalarComputation(HloOpcode::kFloor));
+
+  HloComputation::Builder builder(TestName());
+  HloInstruction* pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  HloInstruction* const1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(56.4f)));
+  HloInstruction* const2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(12.6f)));
+  HloInstruction* conditional =
+      builder.AddInstruction(HloInstruction::CreateConditional(
+          kScalarShape, pred, const1, true_computation, const2,
+          false_computation));
+  HloComputation* entry_computation =
+      module->AddEntryComputation(builder.Build());
+
+  std::unique_ptr<CallGraph> call_graph = CallGraph::Build(module.get());
+
+  EXPECT_EQ(3, call_graph->nodes().size());
+
+  const CallGraphNode& entry_node = call_graph->GetNode(entry_computation);
+  EXPECT_EQ(entry_computation, entry_node.computation());
+  EXPECT_EQ(1, entry_node.callsites().size());
+
+  const CallSite& conditional_callsite = entry_node.callsites()[0];
+  EXPECT_EQ(conditional, conditional_callsite.instruction());
+  EXPECT_THAT(conditional_callsite.called_computations(),
+              UnorderedElementsAre(true_computation, false_computation));
+  EXPECT_EQ(CallContext::kSequential, conditional_callsite.context());
+  EXPECT_EQ(entry_node.GetCallSite(conditional), &conditional_callsite);
+
+  const CallGraphNode& true_node = call_graph->GetNode(true_computation);
+  EXPECT_TRUE(true_node.callees().empty());
+  EXPECT_EQ(1, true_node.callers().size());
+  EXPECT_EQ(entry_computation, true_node.callers()[0]);
+
+  const CallGraphNode& false_node = call_graph->GetNode(false_computation);
+  EXPECT_TRUE(false_node.callees().empty());
+  EXPECT_EQ(1, false_node.callers().size());
+  EXPECT_EQ(entry_computation, false_node.callers()[0]);
+}
+
 TEST_F(CallGraphTest, ComplexGraph) {
   // Test a call graph of a module with several computation called in various
   // contexts. The call graph looks like:
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 70e7aec5c5..85d9668f89 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -2511,6 +2511,65 @@ Status IrEmitter::HandleConcatenate(HloInstruction* concatenate) {
   return DefaultAction(concatenate);
 }
 
+Status IrEmitter::HandleConditional(HloInstruction* conditional) {
+  auto pred = conditional->operand(0);
+  auto true_arg = conditional->operand(1);
+  auto false_arg = conditional->operand(2);
+  TF_RET_CHECK(ShapeUtil::IsScalar(pred->shape()) &&
+               pred->shape().element_type() == PRED)
+      << "Predicate on a Conditional must be bool; got: "
+      << ShapeUtil::HumanString(pred->shape());
+
+  HloComputation* true_computation = conditional->true_computation();
+  HloComputation* false_computation = conditional->false_computation();
+  TF_RET_CHECK(ShapeUtil::Equal(conditional->shape(),
+                                true_computation->root_instruction()->shape()))
+      << "Shape of conditional should be same as the shape of the true "
+      << "computation; got: " << ShapeUtil::HumanString(conditional->shape())
+      << " and "
+      << ShapeUtil::HumanString(true_computation->root_instruction()->shape());
+
+  TF_RET_CHECK(ShapeUtil::Equal(conditional->shape(),
+                                false_computation->root_instruction()->shape()))
+      << "Shape of conditional should be same as the shape of the false "
+      << "computation; got: " << ShapeUtil::HumanString(conditional->shape())
+      << " and "
+      << ShapeUtil::HumanString(false_computation->root_instruction()->shape());
+
+  llvm::Function* true_function =
+      FindOrDie(emitted_functions_, true_computation);
+  llvm::Function* false_function =
+      FindOrDie(emitted_functions_, false_computation);
+
+  TF_RETURN_IF_ERROR(EmitTargetAddressForOp(conditional));
+  llvm::Value* conditional_result = GetEmittedValueFor(conditional);
+
+  // Generating:
+  //   if (pred)
+  //     cond_result = true_computation(true_operand)
+  //   else
+  //     cond_result = false_computation(false_operand)
+  llvm::LoadInst* pred_value = ir_builder_.CreateLoad(
+      GetIrArrayFor(pred).GetBasePointer(), "load_predicate_value");
+  llvm::Value* pred_cond = ir_builder_.CreateICmpNE(
+      pred_value,
+      llvm::ConstantInt::get(llvm_ir::PrimitiveTypeToIrType(PRED, module_), 0),
+      "boolean_predicate");
+  llvm_ir::LlvmIfData if_data =
+      llvm_ir::EmitIfThenElse(pred_cond, "conditional", &ir_builder_);
+
+  SetToFirstInsertPoint(if_data.true_block, &ir_builder_);
+  EmitArrayFunctionCallInto(true_function, {GetEmittedValueFor(true_arg)},
+                            conditional_result, IrName(conditional, "_true"));
+
+  SetToFirstInsertPoint(if_data.false_block, &ir_builder_);
+  EmitArrayFunctionCallInto(false_function, {GetEmittedValueFor(false_arg)},
+                            conditional_result, IrName(conditional, "_false"));
+
+  SetToFirstInsertPoint(if_data.after_block, &ir_builder_);
+  return Status::OK();
+}
+
 Status IrEmitter::FinishVisit(HloInstruction* root) {
   // When this method is called, we should have already emitted an IR value for
   // the root (return) op. The IR value holds the address of the buffer holding
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 692e2b3877..9bc2d97397 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -191,6 +191,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   Status HandleCustomCall(HloInstruction* custom_call) override;
   Status HandleWhile(HloInstruction* xla_while) override;
   Status HandleConcatenate(HloInstruction* concatenate) override;
+  Status HandleConditional(HloInstruction* conditional) override;
   Status FinishVisit(HloInstruction* root) override;
 
   Status Preprocess(HloInstruction* hlo) override;
diff --git a/tensorflow/compiler/xla/service/flatten_call_graph.cc b/tensorflow/compiler/xla/service/flatten_call_graph.cc
index dfba22a6c4..2b6caa1494 100644
--- a/tensorflow/compiler/xla/service/flatten_call_graph.cc
+++ b/tensorflow/compiler/xla/service/flatten_call_graph.cc
@@ -26,7 +26,10 @@ namespace xla {
 
 namespace {
 
-// Helper to replace the called computation at a while- or call-instruction.
+// Helper to replace the called computation at a while-, call-, or
+// conditional-instruction. This function replaces exactly one instance of
+// 'computation' with 'new_computation' even if 'instruction' calls
+// 'computation' more than once.
 void ReplaceCalledComputation(HloInstruction* instruction,
                               HloComputation* computation,
                               HloComputation* new_computation) {
@@ -45,6 +48,15 @@ void ReplaceCalledComputation(HloInstruction* instruction,
       instruction->set_to_apply(new_computation);
       break;
     }
+    case HloOpcode::kConditional: {
+      if (computation == instruction->true_computation()) {
+        instruction->set_true_computation(new_computation);
+      } else {
+        CHECK_EQ(computation, instruction->false_computation());
+        instruction->set_false_computation(new_computation);
+      }
+      break;
+    }
     default:
       LOG(FATAL) << "unexpected opcode: "
                  << HloOpcodeString(instruction->opcode());
diff --git a/tensorflow/compiler/xla/service/flatten_call_graph_test.cc b/tensorflow/compiler/xla/service/flatten_call_graph_test.cc
index a68e90b7d0..d3854b40de 100644
--- a/tensorflow/compiler/xla/service/flatten_call_graph_test.cc
+++ b/tensorflow/compiler/xla/service/flatten_call_graph_test.cc
@@ -223,5 +223,35 @@ TEST_F(FlattenCallGraphTest, FlattenCalls) {
   EXPECT_EQ(1, b_node.caller_callsites().size());
 }
 
+TEST_F(FlattenCallGraphTest, FlattenCallsInConditional) {
+  auto module = CreateNewModule();
+  HloComputation* sub_computation =
+      module->AddEmbeddedComputation(MakeScalarComputation());
+
+  // Create entry computation, which is a conditional that has the same
+  // computation in the true and false branch.
+  HloComputation::Builder builder(TestName());
+  auto pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  auto constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(56.0f)));
+  auto constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(12.0f)));
+  builder.AddInstruction(HloInstruction::CreateConditional(
+      kScalarShape, pred, constant1, sub_computation, constant2,
+      sub_computation));
+  module->AddEntryComputation(builder.Build());
+  EXPECT_EQ(2, module->computation_count());
+
+  TF_ASSERT_OK_AND_ASSIGN(bool result, RunFlattenCallGraph(module.get()));
+  EXPECT_TRUE(result);
+  std::unique_ptr<CallGraph> call_graph = CallGraph::Build(module.get());
+  // The true and false computations must now be different.
+  EXPECT_EQ(3, module->computation_count());
+
+  const CallGraphNode& sub_node = call_graph->GetNode(sub_computation);
+  EXPECT_EQ(1, sub_node.caller_callsites().size());
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index 3f34b9ceb3..2a335843f5 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -333,6 +333,21 @@ bool HloDataflowAnalysis::UpdateCallValueSet(HloInstruction* call) {
   return false;
 }
 
+bool HloDataflowAnalysis::UpdateConditionalValueSet(
+    HloInstruction* conditional) {
+  CHECK_EQ(conditional->opcode(), HloOpcode::kConditional);
+  std::vector<const InstructionValueSet*> inputs = {
+      &GetInstructionValueSet(
+          conditional->true_computation()->root_instruction()),
+      &GetInstructionValueSet(
+          conditional->false_computation()->root_instruction())};
+  // A phi-node is not defined for a kConditional instruction even though it
+  // represents a join point. This is because the current approach is to define
+  // a phi-node only for kWhile to account for the dataflow through back-edges
+  // and deal with the ambiguity in other cases.
+  return GetInstructionValueSet(conditional).AssignUnionOf(inputs);
+}
+
 bool HloDataflowAnalysis::UpdateCopyValueSet(HloInstruction* copy) {
   CHECK_EQ(copy->opcode(), HloOpcode::kCopy);
   bool changed = false;
@@ -394,7 +409,7 @@ bool HloDataflowAnalysis::UpdateParameterValueSet(HloInstruction* parameter) {
   CHECK_EQ(call_graph_node.context(), CallContext::kSequential);
 
   std::vector<const InstructionValueSet*> inputs;
-  bool called_from_while = false;
+  bool need_phi = false;
   for (const CallSite& callsite : call_graph_node.caller_callsites()) {
     if (callsite.instruction()->opcode() == HloOpcode::kCall) {
       // The operand values of a call instruction are forwarded to the
@@ -416,14 +431,32 @@ bool HloDataflowAnalysis::UpdateParameterValueSet(HloInstruction* parameter) {
         inputs.push_back(&GetInstructionValueSet(
             callsite.instruction()->while_body()->root_instruction()));
       }
-      called_from_while = true;
+      need_phi = true;
+    } else if (callsite.instruction()->opcode() == HloOpcode::kConditional) {
+      CHECK_EQ(parameter->parameter_number(), 0);
+      auto conditional = callsite.instruction();
+      // Conditional has 3 operands. Operand 0 is the predicate, operand 1 is
+      // the argument to the true computation and operand 2 is the argument to
+      // the false computation.
+      //
+      // If the parameter belongs to conditional's true computation, then
+      // operand 1 is forwarded to this parameter instruction. If the parameter
+      // belongs to conditional's false computation, then operand 2 is forwarded
+      // to this parameter instruction.
+      if (parameter->parent() == conditional->true_computation()) {
+        inputs.push_back(&GetInstructionValueSet(conditional->operand(1)));
+      } else {
+        CHECK_EQ(parameter->parent(), conditional->false_computation());
+        inputs.push_back(&GetInstructionValueSet(conditional->operand(2)));
+      }
+      need_phi = true;
     } else {
       LOG(FATAL) << "CallContext::kSequential computations should only be "
-                    "called from call or while instructions";
+                    "called from call, while, or conditional instructions";
     }
   }
 
-  if (ssa_form_ && called_from_while) {
+  if (ssa_form_ && need_phi) {
     return Phi(parameter, inputs);
   } else {
     return GetInstructionValueSet(parameter).AssignUnionOf(inputs);
@@ -512,6 +545,8 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet(
       return UpdateSendValueSet(instruction);
     case HloOpcode::kRecvDone:
       return UpdateRecvDoneValueSet(instruction);
+    case HloOpcode::kConditional:
+      return UpdateConditionalValueSet(instruction);
     default:
       // Instruction does not forward HloValues (it defines all values in its
       // output). No update is necessary.
@@ -550,13 +585,31 @@ void HloDataflowAnalysis::Propagate() {
 
       // If user sequentially calls a computation, then the respective
       // parameter(s) of the computation need to be updated.
-      for (HloComputation* called_computation : user->called_computations()) {
-        const CallGraphNode& call_graph_node =
-            call_graph_->GetNode(called_computation);
-        if (call_graph_node.context() == CallContext::kSequential) {
-          for (int64 operand_number : user->OperandIndices(instruction)) {
-            worklist.push(
-                called_computation->parameter_instruction(operand_number));
+      if (user->opcode() == HloOpcode::kConditional) {
+        // If operand 0 is the use of instruction, then no parameters need to be
+        // updated, since that is the predicate of the conditional.
+        // If operand 1 is the use of instruction, then the true_computation's
+        // parameter need to be updated.
+        // If operand 2 is the use of instruction, then the false_computation's
+        // parameter need to be updated.
+        //
+        // Note that the same instruction can be used in both operand 1 and
+        // operand 2.
+        if (user->operand(1) == instruction) {
+          worklist.push(user->true_computation()->parameter_instruction(0));
+        }
+        if (user->operand(2) == instruction) {
+          worklist.push(user->false_computation()->parameter_instruction(0));
+        }
+      } else {
+        for (HloComputation* called_computation : user->called_computations()) {
+          const CallGraphNode& call_graph_node =
+              call_graph_->GetNode(called_computation);
+          if (call_graph_node.context() == CallContext::kSequential) {
+            for (int64 operand_number : user->OperandIndices(instruction)) {
+              worklist.push(
+                  called_computation->parameter_instruction(operand_number));
+            }
           }
         }
       }
@@ -568,7 +621,8 @@ void HloDataflowAnalysis::Propagate() {
       const CallGraphNode& call_graph_node =
           call_graph_->GetNode(instruction->parent());
       for (const CallSite& callsite : call_graph_node.caller_callsites()) {
-        if (callsite.instruction()->opcode() == HloOpcode::kCall) {
+        if ((callsite.instruction()->opcode() == HloOpcode::kCall) ||
+            (callsite.instruction()->opcode() == HloOpcode::kConditional)) {
           worklist.push(callsite.instruction());
         } else if (callsite.instruction()->opcode() == HloOpcode::kWhile) {
           // Add the while itself, and the body and condition parameters.
@@ -636,6 +690,7 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() {
           break;
         case HloOpcode::kWhile:
         case HloOpcode::kCall:
+        case HloOpcode::kConditional:
         case HloOpcode::kGetTupleElement:
           // These instructions define no values. The values in their output
           // flow from their operands or from cross computation dataflow.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
index dfd81ae951..469620d012 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
@@ -146,6 +146,7 @@ class HloDataflowAnalysis {
   // the instruction value set changed.
   bool UpdateBitcastValueSet(HloInstruction* bitcast);
   bool UpdateCallValueSet(HloInstruction* call);
+  bool UpdateConditionalValueSet(HloInstruction* conditional);
   bool UpdateCopyValueSet(HloInstruction* copy);
   bool UpdateGetTupleElementValueSet(HloInstruction* gte);
   bool UpdateParameterValueSet(HloInstruction* parameter);
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
index f08f0b1d68..e714b2567f 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
@@ -34,6 +34,7 @@ limitations under the License.
 namespace xla {
 namespace {
 
+using ::testing::ElementsAre;
 using ::testing::UnorderedElementsAre;
 
 // Test is parameterized on a bool which is whether the dataflow analysis is
@@ -77,11 +78,23 @@ class HloDataflowAnalysisTest : public HloTestBase,
                                  analysis_->GetValueDefinedAt(b), *analysis_);
   }
 
+  std::unique_ptr<HloComputation> CreateR0F32UnaryOpComputation(
+      HloOpcode opcode) {
+    HloComputation::Builder builder(TestName() + "." + HloOpcodeString(opcode));
+    HloInstruction* param0 = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, scalar_shape_, "param0"));
+    builder.AddInstruction(
+        HloInstruction::CreateUnary(scalar_shape_, opcode, param0));
+    return builder.Build();
+  }
+
   std::unique_ptr<HloModule> module_;
   std::unique_ptr<HloDataflowAnalysis> analysis_;
 
   const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {});
   const Shape vector_shape_ = ShapeUtil::MakeShape(F32, {42});
+  const Shape tuple_shape_ = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShape(F32, {}), ShapeUtil::MakeShape(F32, {})});
 };
 
 TEST_P(HloDataflowAnalysisTest, BinaryOperation) {
@@ -1528,6 +1541,315 @@ TEST_P(HloDataflowAnalysisTest, EmbeddedComputationInterference) {
   EXPECT_TRUE(InstructionsMayInterfere(ordering, negate, embedded_log));
 }
 
+TEST_P(HloDataflowAnalysisTest, ConditionalWithIdentity) {
+  // Test conditional with identity computations in both true and false cases.
+  //
+  // true_computation(F32[] %true_param):
+  //   return %true_param
+  //
+  // false_computation(F32[] %false_param):
+  //   return %false_param
+  //
+  // entry:
+  //   %pred = Constant(true)
+  //   %constant1 = Constant(56.0)
+  //   %constant2 = Constant(12.0)
+  //   return Conditional(%pred, %constant1, true_computation,
+  //                      %constant2, false_computation)
+
+  auto true_builder = HloComputation::Builder(TestName() + "_true");
+  auto true_param = true_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "true_param"));
+  HloComputation* true_computation =
+      module_->AddEmbeddedComputation(true_builder.Build());
+
+  auto false_builder = HloComputation::Builder(TestName() + "_false");
+  auto false_param = false_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "false_param"));
+  HloComputation* false_computation =
+      module_->AddEmbeddedComputation(false_builder.Build());
+
+  auto builder = HloComputation::Builder(TestName());
+  auto pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  auto constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(56.0f)));
+  auto constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(12.0f)));
+  auto conditional = builder.AddInstruction(HloInstruction::CreateConditional(
+      scalar_shape_, pred, constant1, true_computation, constant2,
+      false_computation));
+  module_->AddEntryComputation(builder.Build());
+
+  const HloDataflowAnalysis& analysis = RunAnalysis(GetParam());
+
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(pred));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant1));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant2));
+
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(true_param));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(false_param));
+
+  EXPECT_EQ(analysis.GetUniqueValueAt(true_param),
+            analysis.GetValueDefinedAt(constant1));
+  EXPECT_EQ(analysis.GetUniqueValueAt(false_param),
+            analysis.GetValueDefinedAt(constant2));
+
+  EXPECT_THAT(analysis.GetValueDefinedAt(pred).uses(),
+              ElementsAre(HloUse{conditional, 0, {}}));
+  EXPECT_THAT(analysis.GetValueDefinedAt(constant1).uses(),
+              ElementsAre(HloUse{conditional, 1, {}}));
+  EXPECT_THAT(analysis.GetValueDefinedAt(constant2).uses(),
+              ElementsAre(HloUse{conditional, 2, {}}));
+
+  EXPECT_EQ(analysis.values().size(), 3);
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional));
+  EXPECT_THAT(HloValuesAt(conditional),
+              UnorderedElementsAre(analysis.GetValueDefinedAt(constant1),
+                                   analysis.GetValueDefinedAt(constant2)));
+}
+
+TEST_P(HloDataflowAnalysisTest, ConditionalTakingTupleOperand) {
+  // Test conditional with true and false computations taking a tuple operand.
+  //
+  // true_computation((F32[], F32[]) %true_param):
+  //   %true_x = GetTupleElement(%true_param, 0)
+  //   %true_y = GetTupleElement(%true_param, 1)
+  //   return Add(%true_x, %true_y)
+  //
+  // false_computation((F32[], F32[]) %false_param):
+  //   %false_x = GetTupleElement(%false_param, 0)
+  //   %false_y = GetTupleElement(%false_param, 1)
+  //   return Subtract(%false_x, %false_y)
+  //
+  // entry:
+  //   %pred = Constant(true)
+  //   %constant1 = Constant(56.0)
+  //   %constant2 = Constant(12.0)
+  //   %tuple_operand = Tuple(%constant1, %constant2)
+  //   return Conditional(%pred, %tuple_operand, true_computation,
+  //                      %tuple_operand, false_computation)
+
+  auto true_builder = HloComputation::Builder(TestName() + "_true");
+  auto true_param = true_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape_, "true_param"));
+  auto true_x = true_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, true_param, 0));
+  auto true_y = true_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, true_param, 1));
+  auto add = true_builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kAdd, true_x, true_y));
+  HloComputation* true_computation =
+      module_->AddEmbeddedComputation(true_builder.Build());
+
+  auto false_builder = HloComputation::Builder(TestName() + "_false");
+  auto false_param = false_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape_, "false_param"));
+  auto false_x = false_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, false_param, 0));
+  auto false_y = false_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, false_param, 1));
+  auto sub = false_builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kSubtract, false_x, false_y));
+  HloComputation* false_computation =
+      module_->AddEmbeddedComputation(false_builder.Build());
+
+  auto builder = HloComputation::Builder(TestName());
+  auto pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  auto constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(56.0f)));
+  auto constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(12.0f)));
+  auto tuple_operand = builder.AddInstruction(
+      HloInstruction::CreateTuple({constant1, constant2}));
+  auto conditional = builder.AddInstruction(HloInstruction::CreateConditional(
+      scalar_shape_, pred, tuple_operand, true_computation, tuple_operand,
+      false_computation));
+  module_->AddEntryComputation(builder.Build());
+
+  const HloDataflowAnalysis& analysis = RunAnalysis(GetParam());
+
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(pred));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant1));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant2));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(tuple_operand));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(add));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(sub));
+
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(true_param));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(false_param));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(true_x));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(true_y));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(false_x));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(false_y));
+
+  EXPECT_EQ(analysis.GetUniqueValueAt(true_param),
+            analysis.GetValueDefinedAt(tuple_operand));
+  EXPECT_EQ(analysis.GetUniqueValueAt(false_param),
+            analysis.GetValueDefinedAt(tuple_operand));
+  EXPECT_EQ(analysis.GetUniqueValueAt(true_x),
+            analysis.GetValueDefinedAt(constant1));
+  EXPECT_EQ(analysis.GetUniqueValueAt(true_y),
+            analysis.GetValueDefinedAt(constant2));
+  EXPECT_EQ(analysis.GetUniqueValueAt(false_x),
+            analysis.GetValueDefinedAt(constant1));
+  EXPECT_EQ(analysis.GetUniqueValueAt(false_y),
+            analysis.GetValueDefinedAt(constant2));
+
+  EXPECT_THAT(analysis.GetValueDefinedAt(pred).uses(),
+              ElementsAre(HloUse{conditional, 0, {}}));
+  EXPECT_THAT(analysis.GetValueDefinedAt(constant1).uses(),
+              UnorderedElementsAre(HloUse{conditional, 1, {0}},
+                                   HloUse{conditional, 2, {0}},
+                                   HloUse{add, 0, {}}, HloUse{sub, 0, {}}));
+  EXPECT_THAT(analysis.GetValueDefinedAt(constant2).uses(),
+              UnorderedElementsAre(HloUse{conditional, 1, {1}},
+                                   HloUse{conditional, 2, {1}},
+                                   HloUse{add, 1, {}}, HloUse{sub, 1, {}}));
+  EXPECT_THAT(analysis.GetValueDefinedAt(tuple_operand).uses(),
+              UnorderedElementsAre(
+                  HloUse{conditional, 1, {}}, HloUse{conditional, 2, {}},
+                  HloUse{true_x, 0, {}}, HloUse{true_y, 0, {}},
+                  HloUse{false_x, 0, {}}, HloUse{false_y, 0, {}}));
+
+  EXPECT_EQ(analysis.values().size(), 6);
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional));
+  EXPECT_THAT(HloValuesAt(conditional),
+              UnorderedElementsAre(analysis.GetValueDefinedAt(add),
+                                   analysis.GetValueDefinedAt(sub)));
+}
+
+TEST_P(HloDataflowAnalysisTest, NestedConditionals) {
+  // computation1(F32[] %param1):
+  //   %ceil = Ceil(%param1)
+  //   return %ceil
+  //
+  // computation2(F32[] %param2):
+  //   %floor = Floor(%param2)
+  //   return %floor
+  //
+  // computation3(F32[] %param3):
+  //   %negate = Negate(%param3)
+  //   return %negate
+  //
+  // inner_conditional((PRED, F32[], F32[]) %param_cond):
+  //   %pred_cond = GetTupleElement(%param_cond, 0)
+  //   %true_operand_cond = GetTupleElement(%param_cond, 1)
+  //   %false_opearnd_cond = GetTupleElement(%param_cond, 2)
+  //   return Conditional(%pred_cond, %true_operand_cond, computation1,
+  //                      %false_operand_cond, computation2)
+  //
+  // entry:
+  //   %pred1 = Constant(true)
+  //   %pred2 = Constant(false)
+  //   %constant1 = Constant(1.1);
+  //   %constant2 = Constant(2.2);
+  //   %constant3 = Constant(3.3);
+  //   return Conditional(%pred1, (%pred2, %constant1, %constant2),
+  //                      inner_conditional, %constant3, computation3)
+
+  auto computation1 = module_->AddEmbeddedComputation(
+      CreateR0F32UnaryOpComputation(HloOpcode::kCeil));
+  auto computation2 = module_->AddEmbeddedComputation(
+      CreateR0F32UnaryOpComputation(HloOpcode::kFloor));
+  auto computation3 = module_->AddEmbeddedComputation(
+      CreateR0F32UnaryOpComputation(HloOpcode::kNegate));
+
+  // Build inner_conditional computation.
+  const Shape scalar_bool_shape = ShapeUtil::MakeShape(PRED, {});
+  const Shape tuple_param_shape = ShapeUtil::MakeTupleShape(
+      {scalar_bool_shape, scalar_shape_, scalar_shape_});
+  auto inner_builder =
+      HloComputation::Builder(TestName() + "_inner_conditional");
+  auto param_cond = inner_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_param_shape, "param_cond"));
+  auto pred_cond = inner_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_bool_shape, param_cond, 0));
+  auto true_operand_cond = inner_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param_cond, 1));
+  auto false_operand_cond = inner_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param_cond, 2));
+  auto inner_conditional =
+      inner_builder.AddInstruction(HloInstruction::CreateConditional(
+          scalar_shape_, pred_cond, true_operand_cond, computation1,
+          false_operand_cond, computation2));
+  auto inner_conditional_computation =
+      module_->AddEmbeddedComputation(inner_builder.Build());
+
+  // Build entry computation.
+  auto builder = HloComputation::Builder(TestName());
+  auto pred1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  auto pred2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  auto constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(1.1f)));
+  auto constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(2.2f)));
+  auto constant3 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(3.3f)));
+  auto tuple_operand = builder.AddInstruction(
+      HloInstruction::CreateTuple({pred2, constant1, constant2}));
+  auto conditional = builder.AddInstruction(HloInstruction::CreateConditional(
+      scalar_shape_, pred1, tuple_operand, inner_conditional_computation,
+      constant3, computation3));
+  module_->AddEntryComputation(builder.Build());
+
+  const HloDataflowAnalysis& analysis = RunAnalysis(GetParam());
+
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(pred1));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(pred2));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant1));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant2));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant3));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(tuple_operand));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(computation1->root_instruction()));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(computation2->root_instruction()));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(computation3->root_instruction()));
+
+  auto computation1_param = computation1->parameter_instruction(0);
+  auto computation2_param = computation2->parameter_instruction(0);
+  auto computation3_param = computation3->parameter_instruction(0);
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(computation1_param));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(computation2_param));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(computation3_param));
+  EXPECT_EQ(analysis.GetUniqueValueAt(computation1_param),
+            analysis.GetValueDefinedAt(constant1));
+  EXPECT_EQ(analysis.GetUniqueValueAt(computation2_param),
+            analysis.GetValueDefinedAt(constant2));
+  EXPECT_EQ(analysis.GetUniqueValueAt(computation3_param),
+            analysis.GetValueDefinedAt(constant3));
+
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(param_cond));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(pred_cond));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(true_operand_cond));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(false_operand_cond));
+  EXPECT_EQ(analysis.GetUniqueValueAt(param_cond),
+            analysis.GetValueDefinedAt(tuple_operand));
+  EXPECT_EQ(analysis.GetUniqueValueAt(pred_cond),
+            analysis.GetValueDefinedAt(pred2));
+  EXPECT_EQ(analysis.GetUniqueValueAt(true_operand_cond),
+            analysis.GetValueDefinedAt(constant1));
+  EXPECT_EQ(analysis.GetUniqueValueAt(false_operand_cond),
+            analysis.GetValueDefinedAt(constant2));
+
+  EXPECT_EQ(analysis.values().size(), 9);
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(inner_conditional));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional));
+  EXPECT_THAT(
+      HloValuesAt(inner_conditional),
+      UnorderedElementsAre(
+          analysis.GetValueDefinedAt(computation1->root_instruction()),
+          analysis.GetValueDefinedAt(computation2->root_instruction())));
+  EXPECT_THAT(
+      HloValuesAt(conditional),
+      UnorderedElementsAre(
+          analysis.GetValueDefinedAt(computation1->root_instruction()),
+          analysis.GetValueDefinedAt(computation2->root_instruction()),
+          analysis.GetValueDefinedAt(computation3->root_instruction())));
+}
+
 INSTANTIATE_TEST_CASE_P(HloDataflowAnalysisInstantiation,
                         HloDataflowAnalysisTest,
                         ::testing::Values(false, true));
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 8c287a6ab0..6696bdf329 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -1389,6 +1389,17 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) {
       handle_status =
           computation->AddConcatenateInstruction(arg->concatenate_request());
       break;
+    case OpRequest::kConditionalRequest: {
+      TF_ASSIGN_OR_RETURN(UserComputation * true_computation,
+                          computation_tracker_.Resolve(
+                              arg->conditional_request().true_computation()));
+      TF_ASSIGN_OR_RETURN(UserComputation * false_computation,
+                          computation_tracker_.Resolve(
+                              arg->conditional_request().false_computation()));
+      handle_status = computation->AddConditionalInstruction(
+          arg->conditional_request(), *true_computation, *false_computation);
+      break;
+    }
     case OpRequest::kConstantRequest:
       handle_status =
           computation->AddConstantInstruction(arg->constant_request());
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 1c0578ecc8..9c54654f0a 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -2079,6 +2079,64 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
   return init;
 }
 
+/* static */ StatusOr<Shape> ShapeInference::InferConditionalShape(
+    const Shape& predicate, const Shape& true_operand,
+    const Shape& false_operand, const ProgramShape& true_computation,
+    const ProgramShape& false_computation) {
+  if (!ShapeUtil::ShapeIs(predicate, PRED, {})) {
+    return InvalidArgument("predicate must be a boolean; got %s.",
+                           ShapeUtil::HumanString(predicate).c_str());
+  }
+
+  if (true_computation.parameters_size() != 1) {
+    return InvalidArgument("true_computation must take 1 argument; got %d.",
+                           true_computation.parameters_size());
+  }
+  if (!ShapeUtil::Compatible(true_computation.parameters(0), true_operand)) {
+    auto true_shape_string = [&]() {
+      return tensorflow::strings::Printf(
+          "true_operand: %s; true_computation: %s",
+          ShapeUtil::HumanString(true_operand).c_str(),
+          ShapeUtil::HumanString(true_computation).c_str());
+    };
+    return InvalidArgument(
+        "true_operand must match the shape of the only parameter of "
+        "true_computation: got %s.",
+        true_shape_string().c_str());
+  }
+
+  if (false_computation.parameters_size() != 1) {
+    return InvalidArgument("false_computation must take 1 argument; got %d.",
+                           false_computation.parameters_size());
+  }
+  if (!ShapeUtil::Compatible(false_computation.parameters(0), false_operand)) {
+    auto false_shape_string = [&]() {
+      return tensorflow::strings::Printf(
+          "false_operand: %s; false_computation: %s",
+          ShapeUtil::HumanString(false_operand).c_str(),
+          ShapeUtil::HumanString(false_computation).c_str());
+    };
+    return InvalidArgument(
+        "false_operand must match the shape of the only parameter of "
+        "false_computation: got %s.",
+        false_shape_string().c_str());
+  }
+  if (!ShapeUtil::Compatible(true_computation.result(),
+                             false_computation.result())) {
+    auto shape_string = [&]() {
+      return tensorflow::strings::Printf(
+          "true_computation result: %s; false_computation result: %s.",
+          ShapeUtil::HumanString(true_computation.result()).c_str(),
+          ShapeUtil::HumanString(false_computation.result()).c_str());
+    };
+    return InvalidArgument(
+        "the result of true_computation and false_computation must have the "
+        "same shape: got %s.",
+        shape_string().c_str());
+  }
+  return true_computation.result();
+}
+
 /* static */ StatusOr<Shape> ShapeInference::InferBroadcastShape(
     const Shape& operand, tensorflow::gtl::ArraySlice<int64> broadcast_sizes) {
   TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(operand, "operand of broadcast"));
diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h
index 8c5ac20244..c06340d2d5 100644
--- a/tensorflow/compiler/xla/service/shape_inference.h
+++ b/tensorflow/compiler/xla/service/shape_inference.h
@@ -180,6 +180,12 @@ class ShapeInference {
                                          const ProgramShape& body,
                                          const Shape& init);
 
+  // Infers the shape produced by a conditional operation.
+  static StatusOr<Shape> InferConditionalShape(
+      const Shape& predicate, const Shape& true_operand,
+      const Shape& false_operand, const ProgramShape& true_computation,
+      const ProgramShape& false_computation);
+
   // Infers the shape produced by a broadcast operation.
   static StatusOr<Shape> InferBroadcastShape(
       const Shape& operand, tensorflow::gtl::ArraySlice<int64> broadcast_sizes);
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index 6e53d2d609..7af2805f12 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -1437,5 +1437,80 @@ TEST_F(ShapeInferenceTest, Transpose) {
                                     ShapeUtil::MakeShape(F32, {3, 4, 5, 2})));
 }
 
+TEST_F(ShapeInferenceTest, Conditional) {
+  auto inferred_status0 = ShapeInference::InferConditionalShape(
+      pred_, vector_32_, vector_64_,
+      ShapeUtil::MakeProgramShape({vector_32_}, f32_),
+      ShapeUtil::MakeProgramShape({vector_64_}, f32_));
+  EXPECT_IS_OK(inferred_status0.status());
+  EXPECT_TRUE(ShapeUtil::Equal(f32_, inferred_status0.ValueOrDie()));
+
+  auto inferred_status1 = ShapeInference::InferConditionalShape(
+      pred_, matrix_32_48_, vector_32_,
+      ShapeUtil::MakeProgramShape({matrix_32_48_}, vector_64_),
+      ShapeUtil::MakeProgramShape({vector_32_}, vector_64_));
+  EXPECT_IS_OK(inferred_status1.status());
+  EXPECT_TRUE(ShapeUtil::Equal(vector_64_, inferred_status1.ValueOrDie()));
+
+  auto tuple_f32_v32 = ShapeUtil::MakeTupleShape({f32_, vector_32_});
+  auto inferred_status2 = ShapeInference::InferConditionalShape(
+      pred_, matrix_32_48_, tuple_f32_v32,
+      ShapeUtil::MakeProgramShape({matrix_32_48_}, vector_32_),
+      ShapeUtil::MakeProgramShape({tuple_f32_v32}, vector_32_));
+  EXPECT_IS_OK(inferred_status2.status());
+  EXPECT_TRUE(ShapeUtil::Equal(vector_32_, inferred_status2.ValueOrDie()));
+
+  auto inferred_status_error0 = ShapeInference::InferConditionalShape(
+      s32_, vector_32_, vector_64_,
+      ShapeUtil::MakeProgramShape({vector_32_}, f32_),
+      ShapeUtil::MakeProgramShape({vector_64_}, f32_));
+  EXPECT_FALSE(inferred_status_error0.ok());
+  EXPECT_THAT(inferred_status_error0.status().error_message(),
+              HasSubstr("predicate must be a boolean"));
+
+  auto inferred_status_error1 = ShapeInference::InferConditionalShape(
+      pred_, ShapeUtil::MakeTupleShape({f32_, vector_32_}), matrix_32_48_,
+      ShapeUtil::MakeProgramShape({f32_, vector_32_}, vector_32_),
+      ShapeUtil::MakeProgramShape({matrix_32_48_}, vector_32_));
+  EXPECT_FALSE(inferred_status_error1.ok());
+  EXPECT_THAT(inferred_status_error1.status().error_message(),
+              HasSubstr("true_computation must take 1 argument"));
+
+  auto inferred_status_error2 = ShapeInference::InferConditionalShape(
+      pred_, vector_32_, vector_64_,
+      ShapeUtil::MakeProgramShape({vector_64_}, f32_),
+      ShapeUtil::MakeProgramShape({vector_64_}, f32_));
+  EXPECT_FALSE(inferred_status_error2.ok());
+  EXPECT_THAT(inferred_status_error2.status().error_message(),
+              HasSubstr("true_operand must match the shape of the only "
+                        "parameter of true_computation"));
+
+  auto inferred_status_error3 = ShapeInference::InferConditionalShape(
+      pred_, matrix_32_48_, ShapeUtil::MakeTupleShape({f32_, vector_32_}),
+      ShapeUtil::MakeProgramShape({matrix_32_48_}, vector_32_),
+      ShapeUtil::MakeProgramShape({f32_, vector_32_}, vector_32_));
+  EXPECT_FALSE(inferred_status_error3.ok());
+  EXPECT_THAT(inferred_status_error3.status().error_message(),
+              HasSubstr("false_computation must take 1 argument"));
+
+  auto inferred_status_error4 = ShapeInference::InferConditionalShape(
+      pred_, vector_32_, vector_64_,
+      ShapeUtil::MakeProgramShape({vector_32_}, f32_),
+      ShapeUtil::MakeProgramShape({vector_32_}, f32_));
+  EXPECT_FALSE(inferred_status_error4.ok());
+  EXPECT_THAT(inferred_status_error4.status().error_message(),
+              HasSubstr("false_operand must match the shape of the only "
+                        "parameter of false_computation"));
+
+  auto inferred_status_error5 = ShapeInference::InferConditionalShape(
+      pred_, vector_32_, vector_64_,
+      ShapeUtil::MakeProgramShape({vector_32_}, f32_),
+      ShapeUtil::MakeProgramShape({vector_64_}, vector_32_));
+  EXPECT_FALSE(inferred_status_error5.ok());
+  EXPECT_THAT(inferred_status_error5.status().error_message(),
+              HasSubstr("the result of true_computation and false_computation "
+                        "must have the same shape"));
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index 1ec21a0b51..e6893c8133 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -763,6 +763,54 @@ StatusOr<ComputationDataHandle> UserComputation::AddWhileInstruction(
   return handle;
 }
 
+StatusOr<ComputationDataHandle> UserComputation::AddConditionalInstruction(
+    const ConditionalRequest& conditional_request,
+    const UserComputation& true_computation,
+    const UserComputation& false_computation) {
+  tensorflow::mutex_lock lock(mutex_);
+
+  TF_ASSIGN_OR_RETURN(const OperationRequest* pred,
+                      LookUpRequest(conditional_request.predicate()));
+  TF_ASSIGN_OR_RETURN(const OperationRequest* true_operand,
+                      LookUpRequest(conditional_request.true_operand()));
+  TF_ASSIGN_OR_RETURN(const OperationRequest* false_operand,
+                      LookUpRequest(conditional_request.false_operand()));
+
+  VersionedComputationHandle::Version true_computation_version =
+      true_computation.version();
+  TF_ASSIGN_OR_RETURN(
+      std::shared_ptr<const ProgramShape> true_computation_shape,
+      true_computation.ComputeProgramShape(true_computation_version));
+
+  VersionedComputationHandle::Version false_computation_version =
+      false_computation.version();
+  TF_ASSIGN_OR_RETURN(
+      std::shared_ptr<const ProgramShape> false_computation_shape,
+      false_computation.ComputeProgramShape(false_computation_version));
+
+  TF_ASSIGN_OR_RETURN(Shape inferred_shape,
+                      ShapeInference::InferConditionalShape(
+                          pred->output_shape(), true_operand->output_shape(),
+                          false_operand->output_shape(),
+                          *true_computation_shape, *false_computation_shape));
+
+  ComputationDataHandle handle = CreateComputationDataHandle();
+
+  OperationRequest& request =
+      (*session_computation_.mutable_requests())[handle.handle()];
+  *request.mutable_output_handle() = handle;
+  *request.mutable_output_shape() = inferred_shape;
+  request.add_embedded_computation_versions(true_computation_version);
+  request.add_embedded_computation_versions(false_computation_version);
+  *request.mutable_request()->mutable_conditional_request() =
+      conditional_request;
+
+  VLOG(1) << "AddConditionalInstruction (" << GetVersionedHandleInternal()
+          << "), data handle " << handle.handle() << ": "
+          << conditional_request.ShortDebugString();
+  return handle;
+}
+
 StatusOr<ComputationDataHandle> UserComputation::AddBroadcastInstruction(
     const BroadcastRequest& broadcast_request) {
   tensorflow::mutex_lock lock(mutex_);
@@ -1791,6 +1839,23 @@ void PureFunctionalVisitor(const SessionComputation& session_computation,
       break;
     }
 
+    case OpRequest::kConditionalRequest: {
+      const ConditionalRequest& conditional_request =
+          request.request().conditional_request();
+      PureFunctionalVisitor(session_computation,
+                            conditional_request.predicate(), num_parameters,
+                            visited, is_functional);
+      PureFunctionalVisitor(session_computation,
+                            conditional_request.true_operand(), num_parameters,
+                            visited, is_functional);
+      PureFunctionalVisitor(session_computation,
+                            conditional_request.false_operand(), num_parameters,
+                            visited, is_functional);
+      // TODO(b/32495713): We aren't checking the true and false computations
+      // themselves.
+      break;
+    }
+
     case OpRequest::kTernaryOpRequest: {
       const TernaryOpRequest& ternary_op_request =
           request.request().ternary_op_request();
@@ -2019,6 +2084,21 @@ UserComputation::GetEmbeddedComputations(
           break;
         }
 
+        case OpRequest::kConditionalRequest: {
+          CHECK_EQ(2, request.embedded_computation_versions_size());
+          const ConditionalRequest& conditional_request =
+              request.request().conditional_request();
+          const VersionedComputationHandle true_computation_versioned_handle = {
+              conditional_request.true_computation(),
+              request.embedded_computation_versions(0)};
+          computations.push_back(true_computation_versioned_handle);
+          const VersionedComputationHandle false_computation_versioned_handle =
+              {conditional_request.false_computation(),
+               request.embedded_computation_versions(1)};
+          computations.push_back(false_computation_versioned_handle);
+          break;
+        }
+
         default:
           // No embedded computation.
           break;
@@ -2105,6 +2185,16 @@ Status UserComputation::RemapEmbeddedComputations(
         TF_RETURN_IF_ERROR(update(while_request->mutable_body()));
         break;
       }
+      case OpRequest::kConditionalRequest: {
+        TF_RET_CHECK(2 == request.embedded_computation_versions_size());
+        ConditionalRequest* conditional_request =
+            request.mutable_request()->mutable_conditional_request();
+        TF_RETURN_IF_ERROR(
+            update(conditional_request->mutable_true_computation()));
+        TF_RETURN_IF_ERROR(
+            update(conditional_request->mutable_false_computation()));
+        break;
+      }
       default:
         // No embedded computation.
         TF_RET_CHECK(0 == request.embedded_computation_versions_size());
@@ -2451,6 +2541,15 @@ static void ForEachOperand(
       break;
     }
 
+    case OpRequest::kConditionalRequest: {
+      const ConditionalRequest& conditional_request =
+          request.request().conditional_request();
+      apply(conditional_request.predicate());
+      apply(conditional_request.true_operand());
+      apply(conditional_request.false_operand());
+      break;
+    }
+
     case OpRequest::kTernaryOpRequest: {
       const TernaryOpRequest& ternary_op_request =
           request.request().ternary_op_request();
@@ -3071,6 +3170,30 @@ void ComputationLowerer::Visit(
       break;
     }
 
+    case OpRequest::kConditionalRequest: {
+      const ConditionalRequest& conditional_request =
+          request.request().conditional_request();
+      CHECK_EQ(2, request.embedded_computation_versions_size());
+      VersionedComputationHandle::Version true_computation_version =
+          request.embedded_computation_versions(0);
+      HloComputation* true_computation = ResolveComputation(
+          conditional_request.true_computation(), true_computation_version);
+      VersionedComputationHandle::Version false_computation_version =
+          request.embedded_computation_versions(1);
+      HloComputation* false_computation = ResolveComputation(
+          conditional_request.false_computation(), false_computation_version);
+      HloInstruction* predicate =
+          lookup_instruction(conditional_request.predicate());
+      HloInstruction* true_operand =
+          lookup_instruction(conditional_request.true_operand());
+      HloInstruction* false_operand =
+          lookup_instruction(conditional_request.false_operand());
+      hlo_instruction = add_instruction(HloInstruction::CreateConditional(
+          request.output_shape(), predicate, true_operand, true_computation,
+          false_operand, false_computation));
+      break;
+    }
+
     case OpRequest::kTernaryOpRequest: {
       const TernaryOpRequest& ternary_op_request =
           request.request().ternary_op_request();
diff --git a/tensorflow/compiler/xla/service/user_computation.h b/tensorflow/compiler/xla/service/user_computation.h
index b6686c3f1a..8a78d520e1 100644
--- a/tensorflow/compiler/xla/service/user_computation.h
+++ b/tensorflow/compiler/xla/service/user_computation.h
@@ -220,6 +220,12 @@ class UserComputation {
       const UserComputation& condition_computation,
       const UserComputation& body_computation);
 
+  // Enqueues a conditional instruction on this user computation.
+  StatusOr<ComputationDataHandle> AddConditionalInstruction(
+      const ConditionalRequest& conditional_request,
+      const UserComputation& true_computation,
+      const UserComputation& false_computation);
+
   // Enqueues a Send instruction onto this user computation.
   Status AddSendInstruction(const SendRequest& send_request);
 
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 24f4a9d05a..63d8b30368 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -437,6 +437,24 @@ xla_test(
     ],
 )
 
+xla_test(
+    name = "conditional_test",
+    srcs = ["conditional_test.cc"],
+    # Currently, Conditional is supported only in the CPU backend.
+    backends = ["cpu"],
+    deps = [
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/client:global_data",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 xla_test(
     name = "unary_op_test",
     srcs = ["unary_op_test.cc"],
diff --git a/tensorflow/compiler/xla/tests/conditional_test.cc b/tensorflow/compiler/xla/tests/conditional_test.cc
new file mode 100644
index 0000000000..cbfacaea53
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/conditional_test.cc
@@ -0,0 +1,238 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+
+namespace xla {
+namespace {
+
+class ConditionalOpTest : public ClientLibraryTestBase {
+ protected:
+  Computation CreateR0F32ConstantComputation(float value) {
+    ComputationBuilder builder(client_, "Constant");
+    builder.Parameter(0, empty_tuple_, "tuple");
+    builder.ConstantR0<float>(value);
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateR0F32IdentityComputation() {
+    ComputationBuilder builder(client_, "Identity");
+    builder.Parameter(0, r0f32_, "x");
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateR0F32CeilComputation() {
+    ComputationBuilder builder(client_, "Ceil");
+    auto param = builder.Parameter(0, r0f32_, "param");
+    builder.Ceil(param);
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateR0F32FloorComputation() {
+    ComputationBuilder builder(client_, "Ceil");
+    auto param = builder.Parameter(0, r0f32_, "param");
+    builder.Floor(param);
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateAddTupleComputation(const string& computation_name,
+                                        const Shape& tuple_shape) {
+    ComputationBuilder builder(client_, computation_name);
+    auto tuple = builder.Parameter(0, tuple_shape, "tuple");
+    auto x = builder.GetTupleElement(tuple, 0);
+    auto y = builder.GetTupleElement(tuple, 1);
+    builder.Add(x, y);
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateAddR0Computation() {
+    return CreateAddTupleComputation("AddR0", tuple_2_r0f32_);
+  }
+
+  Computation CreateAddR1Computation() {
+    return CreateAddTupleComputation("AddR1", tuple_2_r1s2f32_);
+  }
+
+  Computation CreateSubTupleComputation(const string& computation_name,
+                                        const Shape& tuple_shape) {
+    ComputationBuilder builder(client_, computation_name);
+    auto tuple = builder.Parameter(0, tuple_shape, "tuple");
+    auto x = builder.GetTupleElement(tuple, 0);
+    auto y = builder.GetTupleElement(tuple, 1);
+    builder.Sub(x, y);
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateSubR0Computation() {
+    return CreateSubTupleComputation("SubR0", tuple_2_r0f32_);
+  }
+
+  Computation CreateSubR1Computation() {
+    return CreateSubTupleComputation("SubR1", tuple_2_r1s2f32_);
+  }
+
+  Shape r0f32_ = ShapeUtil::MakeShape(F32, {});
+  Shape tuple_2_r0f32_ = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShape(F32, {}), ShapeUtil::MakeShape(F32, {})});
+  Shape tuple_2_r1s2f32_ = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShape(F32, {2}), ShapeUtil::MakeShape(F32, {2})});
+  Shape empty_tuple_ = ShapeUtil::MakeTupleShape({});
+  ErrorSpec error_spec_{0.001};
+};
+
+// Test true and false computations that do not take any parameters.
+XLA_TEST_F(ConditionalOpTest, Parameters0) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(true);
+  auto operands = builder.Tuple({});
+  auto true_computation = CreateR0F32ConstantComputation(56.0f);
+  auto false_computation = CreateR0F32ConstantComputation(12.0f);
+  auto result = builder.Conditional(pred, operands, true_computation, operands,
+                                    false_computation);
+
+  ComputeAndCompareR0<float>(&builder, 56.0f, {}, error_spec_);
+}
+
+// Test true and false computations that take in 1 parameter.
+XLA_TEST_F(ConditionalOpTest, Parameters1) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.0f);
+  auto operand2 = builder.ConstantR0<float>(12.0f);
+  auto identity = CreateR0F32IdentityComputation();
+  auto result =
+      builder.Conditional(pred, operand1, identity, operand2, identity);
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test true and false computations that take in 2 parameters and predicate is
+// true.
+XLA_TEST_F(ConditionalOpTest, Parameters2TrueBranch) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(true);
+  auto operand1 = builder.ConstantR0<float>(56.0f);
+  auto operand2 = builder.ConstantR0<float>(12.0f);
+  auto operands = builder.Tuple({operand1, operand2});
+  auto result = builder.Conditional(pred, operands, CreateAddR0Computation(),
+                                    operands, CreateSubR0Computation());
+
+  ComputeAndCompareR0<float>(&builder, 68.0f, {}, error_spec_);
+}
+
+// Test true and false computations that take in 2 parameters and predicate is
+// false.
+XLA_TEST_F(ConditionalOpTest, Parameters2FalseBranch) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.0f);
+  auto operand2 = builder.ConstantR0<float>(12.0f);
+  auto operands = builder.Tuple({operand1, operand2});
+  auto result = builder.Conditional(pred, operands, CreateAddR0Computation(),
+                                    operands, CreateSubR0Computation());
+
+  ComputeAndCompareR0<float>(&builder, 44.0f, {}, error_spec_);
+}
+
+// Test true and false computations that take in 2 array parameters and
+// predicate is true.
+XLA_TEST_F(ConditionalOpTest, Parameters2ArrayTrueBranch) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(true);
+  auto operand1 = builder.ConstantR1<float>({24.0f, 56.0f});
+  auto operand2 = builder.ConstantR1<float>({10.0f, 11.0f});
+  auto operands = builder.Tuple({operand1, operand2});
+  auto result = builder.Conditional(pred, operands, CreateAddR1Computation(),
+                                    operands, CreateSubR1Computation());
+
+  ComputeAndCompareR1<float>(&builder, {34.0f, 67.0f}, {}, error_spec_);
+}
+
+// Test true and false computations that take in 2 array parameters and
+// predicate is false.
+XLA_TEST_F(ConditionalOpTest, Parameters2ArrayFalseBranch) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR1<float>({24.0f, 56.0f});
+  auto operand2 = builder.ConstantR1<float>({10.0f, 11.0f});
+  auto operands = builder.Tuple({operand1, operand2});
+  auto result = builder.Conditional(pred, operands, CreateAddR1Computation(),
+                                    operands, CreateSubR1Computation());
+
+  ComputeAndCompareR1<float>(&builder, {14.0f, 45.0f}, {}, error_spec_);
+}
+
+// Test the case where one conditional is nested within another.
+XLA_TEST_F(ConditionalOpTest, NestedConditionals) {
+  Shape r0bool = ShapeUtil::MakeShape(PRED, {});
+  Shape tuple_shape = ShapeUtil::MakeTupleShape({r0bool, r0f32_, r0f32_});
+  ComputationBuilder inner_builder(client_, TestName() + ".inner_conditional");
+  auto param0 = inner_builder.Parameter(0, tuple_shape, "param0");
+  auto pred_cond = inner_builder.GetTupleElement(param0, 0);
+  auto true_operand = inner_builder.GetTupleElement(param0, 1);
+  auto false_operand = inner_builder.GetTupleElement(param0, 2);
+  inner_builder.Conditional(pred_cond, true_operand,
+                            CreateR0F32CeilComputation(), false_operand,
+                            CreateR0F32FloorComputation());
+  auto inner_builder_result = inner_builder.Build();
+
+  ComputationBuilder builder(client_, TestName());
+  auto pred1 = builder.ConstantR0<bool>(true);
+  auto pred2 = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(1.1f);
+  auto operand2 = builder.ConstantR0<float>(12.2f);
+  auto operand3 = builder.ConstantR0<float>(43.3f);
+  auto tuple_operand = builder.Tuple({pred2, operand1, operand2});
+  builder.Conditional(pred1, tuple_operand,
+                      inner_builder_result.ConsumeValueOrDie(), operand3,
+                      CreateR0F32IdentityComputation());
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test a mismatch in the shape of the true operand and true computation.
+XLA_TEST_F(ConditionalOpTest, ShapeMismatch) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(true);
+  auto operand1 = builder.ConstantR0<float>(56.0f);
+  auto operand2 = builder.ConstantR0<float>(12.0f);
+  auto operands = builder.Tuple({operand1, operand2});
+  builder.Conditional(pred, operands, CreateAddR1Computation(), operands,
+                      CreateSubR0Computation());
+
+  auto result = builder.Build();
+  EXPECT_FALSE(result.ok());
+  EXPECT_THAT(result.status().error_message(),
+              ::testing::HasSubstr("true_operand must match the shape of the "
+                                   "only parameter of true_computation"));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index 6800c3d7fa..215707634b 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -668,6 +668,14 @@ message ConcatenateRequest {
   int64 dimension = 3;
 }
 
+message ConditionalRequest {
+  ComputationDataHandle predicate = 2;
+  ComputationDataHandle true_operand = 3;
+  ComputationHandle true_computation = 4;
+  ComputationDataHandle false_operand = 5;
+  ComputationHandle false_computation = 6;
+}
+
 message WhileRequest {
   ComputationHandle condition = 2;
   ComputationHandle body = 3;
@@ -929,7 +937,8 @@ message OpRequest {
     BatchNormInferenceRequest batch_norm_inference_request = 38;
     FftRequest fft_request = 41;
     ConvertRequest bitcast_convert_request = 42;
-    // Next: 44
+    ConditionalRequest conditional_request = 44;
+    // Next: 45
   }
 }
 
diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
index cd2a0cfbc6..93d38a8438 100644
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -389,6 +389,35 @@ Diagram:
   <img style="width:100%" src="https://www.tensorflow.org/images/ops_concatenate.png">
 </div>
 
+## Conditional
+
+See also [`ComputationBuilder::Conditional`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+<b> `Conditional(pred, true_operand, true_computation, false_operand,
+    false_computation)` </b>
+
+| Arguments           | Type                    | Semantics                   |
+| ------------------- | ----------------------- | --------------------------- |
+| `pred`              | `ComputationDataHandle` | Scalar of type `PRED`       |
+| `true_operand`      | `ComputationDataHandle` | Argument of type `T_0`      |
+| `true_computation`  | `Computation`           | Computation of type `T_0 -> |
+:                     :                         : S`                          :
+| `false_operand`     | `ComputationDataHandle` | Argument of type `T_1`      |
+| `false_computation` | `Computation`           | Computation of type `T_1 -> |
+:                     :                         : S`                          :
+
+Executes `true_computation` if `pred` is `true`, `false_computation` if `pred`
+is `false`, and returns the result.
+
+The `true_computation` must take in a single argument of type `T_0` and will be
+invoked with `true_operand` which must be of the same type. The
+`false_computation` must take in a single argument of type `T_1` and will be
+invoked with `false_operand` which must be of the same type. The type of the
+returned value of `true_computation` and `false_computation` must be the same.
+
+Note that only one of `true_computation` and `false_computation` will be
+executed depending on the value of `pred`.
+
 ## Conv (convolution)
 
 See also
-- 
GitLab


From 327fe05ea7727fb8e27e2b4ec2c8b85493ec35b7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 17:53:43 -0800
Subject: [PATCH 0789/1225] boosted_trees: to name variables so that they can
 be distinguished in the graph.

PiperOrigin-RevId: 178323017
---
 .../boosted_trees/python/training/functions/gbdt_batch.py   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
index 6094dae6b5..b95956dae2 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
@@ -322,9 +322,11 @@ class GradientBoostedDecisionTreeModel(object):
     self._feature_columns = feature_columns
     self._learner_config_serialized = learner_config.SerializeToString()
     self._attempted_trees = variables.Variable(
-        initial_value=array_ops.zeros([], dtypes.int64), trainable=False)
+        initial_value=array_ops.zeros([], dtypes.int64), trainable=False,
+        name="attempted_trees")
     self._finalized_trees = variables.Variable(
-        initial_value=array_ops.zeros([], dtypes.int64), trainable=False)
+        initial_value=array_ops.zeros([], dtypes.int64), trainable=False,
+        name="finalized_trees")
     if not features:
       raise ValueError("Features dictionary must be specified.")
     (fc_names, dense_floats, sparse_float_indices, sparse_float_values,
-- 
GitLab


From 3edc4ee2e3e1c40ac80e32a6aed68265b08cb1e9 Mon Sep 17 00:00:00 2001
From: Chi Zeng <chizeng@google.com>
Date: Thu, 7 Dec 2017 18:11:53 -0800
Subject: [PATCH 0790/1225] Add ":grpc_debug_test_server" to the deps of
 ":debug_py".

PiperOrigin-RevId: 178324950
---
 tensorflow/python/debug/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 2fda463a77..c52bf49b98 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -31,6 +31,7 @@ py_library(
         ":debug_graphs",
         ":debug_utils",
         ":grpc_debug_server",
+        ":grpc_debug_test_server",
         ":hooks",
         ":local_cli_wrapper",
         "//tensorflow/python:util",
-- 
GitLab


From 28b92cd5a9ed677b5a2148e24d72f49de3768b23 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Thu, 7 Dec 2017 21:16:50 -0500
Subject: [PATCH 0791/1225] Fix tag in source_remote_test: no_mac --> nomac

---
 tensorflow/python/debug/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index aec0966b66..baa79e1087 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -533,8 +533,8 @@ py_test(
     srcs = ["lib/source_remote_test.py"],
     srcs_version = "PY2AND3",
     tags = [
-        "no_mac",
         "no_windows",
+        "nomac",
         "oss_serial",
     ],
     deps = [
-- 
GitLab


From 09030980ea42dd1f7c0058c15c27fc74c7c505ec Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Thu, 7 Dec 2017 19:08:37 -0800
Subject: [PATCH 0792/1225] Dont optimize away control triggers Added a small
 test

PiperOrigin-RevId: 178329604
---
 tensorflow/core/grappler/costs/graph_properties_test.cc   | 8 ++++++++
 .../core/grappler/optimizers/dependency_optimizer.cc      | 3 +++
 2 files changed, 11 insertions(+)

diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index cc40ff2cfc..a50ae0898c 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -740,6 +740,10 @@ TEST_F(GraphPropertiesTest, FunctionStaticShapeInference) {
   EXPECT_EQ(2, prop.shape().dim_size());
   EXPECT_EQ(1, prop.shape().dim(0).size());
   EXPECT_EQ(2, prop.shape().dim(1).size());
+
+  PartialTensorShape shape(prop.shape());
+  EXPECT_TRUE(shape.IsFullyDefined());
+  EXPECT_FALSE(shape.unknown_rank());
 }
 
 TEST_F(GraphPropertiesTest, SymbolicShapes) {
@@ -776,6 +780,10 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) {
   EXPECT_GE(-2, shape_a.dim(1).size());
   EXPECT_EQ(shape_a.dim(1).size(), shape_c.dim(1).size());
 
+  PartialTensorShape shape(shape_a);
+  EXPECT_FALSE(shape.IsFullyDefined());
+  EXPECT_FALSE(shape.unknown_rank());
+
   const auto shape_b = properties.GetOutputProperties("b").at(0).shape();
   const auto shape_d = properties.GetOutputProperties("d").at(0).shape();
   EXPECT_EQ(1, shape_b.dim_size());
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 41d632d9e3..77525cc788 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -91,6 +91,9 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
   if (!IsFreeOfSideEffect(node)) {
     return false;
   }
+  if (node.op() == "ControlTrigger") {
+    return false;
+  }
   if (node.op().rfind("Submodel", 0) == 0) {
     return false;
   }
-- 
GitLab


From 233aff82ad661b4792577690044b6a24132a2470 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Thu, 7 Dec 2017 20:29:06 -0800
Subject: [PATCH 0793/1225] Create a new Var-like object, LegacyVar, which
 allows access to its mutex.

Future changes will change how locking happens on the resource-specific Var
object.

Also hide any access to LegacyVar in the implementation file; and move other
ops into the .cc file where they belong.

PiperOrigin-RevId: 178334244
---
 tensorflow/core/kernels/variable_ops.cc | 211 +++++++++++++++++++++---
 tensorflow/core/kernels/variable_ops.h  | 158 +-----------------
 2 files changed, 200 insertions(+), 169 deletions(-)

diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index 36b8ff09d7..1b7079dcba 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -23,6 +23,177 @@ limitations under the License.
 
 namespace tensorflow {
 
+// Resource stored by variables in the resource manager
+// (legacy, ref-style version).
+class LegacyVar : public ResourceBase {
+ public:
+  explicit LegacyVar(DataType dtype) : tensor_(dtype) {}
+  // Not copyable or movable.
+  LegacyVar(const LegacyVar&) = delete;
+  LegacyVar& operator=(const LegacyVar&) = delete;
+
+  mutex* mu() { return &mu_; }
+  Tensor* tensor() { return &tensor_; }
+
+  string DebugString() override {
+    return strings::StrCat(DataTypeString(tensor_.dtype()), "/",
+                           tensor_.shape().DebugString());
+  }
+
+ private:
+  mutex mu_;
+  Tensor tensor_;
+
+  ~LegacyVar() override {}
+};
+
+VariableOp::VariableOp(OpKernelConstruction* context) : OpKernel(context) {
+  OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
+  dtype_ = RemoveRefType(context->output_type(0));
+}
+
+void VariableOp::Compute(OpKernelContext* ctx) {
+  mutex_lock l(init_mu_);
+  if (!initialized_) {
+    OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(),
+                                    true /* use name() */));
+    initialized_ = true;
+  }
+  auto creator = [this](LegacyVar** var) {
+    *var = new LegacyVar(dtype_);
+    (*var)->tensor()->set_shape(shape_);
+    return Status::OK();
+  };
+  LegacyVar* var;
+  OP_REQUIRES_OK(ctx, cinfo_.resource_manager()->LookupOrCreate<LegacyVar>(
+                          cinfo_.container(), cinfo_.name(), &var, creator));
+  // Output a reference to our tensor, so it may be updated.
+  //
+  // As long as the resource manager hasn't been cleared the ref we return
+  // here is valid because it owns a ref on var.
+  ctx->set_output_ref(0, var->mu(), var->tensor());
+  if (ctx->track_allocations() && var->tensor()->IsInitialized()) {
+    AllocatorAttributes attr;
+    attr.set_gpu_compatible(true);
+    attr.set_nic_compatible(true);
+    if (ctx->allocate_on_host(attr)) {
+      ctx->record_host_persistent_memory_allocation(
+          var->tensor()->AllocatedBytes());
+    } else {
+      ctx->record_device_persistent_memory_allocation(
+          var->tensor()->AllocatedBytes());
+    }
+  }
+  var->Unref();
+}
+
+class TemporaryVariableOp : public OpKernel {
+ public:
+  explicit TemporaryVariableOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
+    OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
+    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
+    // Variable name defaults to op name if not specified explicitly.
+    if (var_name_.empty()) var_name_ = name();
+  }
+
+  void Compute(OpKernelContext* context) override {
+    Status s;
+    ResourceMgr* rm = context->resource_manager();
+    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
+    auto* tmp_var = new TmpVar;
+    OP_REQUIRES(context, tmp_var,
+                errors::ResourceExhausted("Could not allocate TmpVar."));
+    tmp_var->name = var_name_;
+    s = context->allocate_temp(dtype_, shape_, &tmp_var->val);
+    if (!s.ok()) tmp_var->Unref();
+    OP_REQUIRES_OK(context, s);
+    OP_REQUIRES_OK(context, rm->Create(context->step_container()->name(),
+                                       var_name_, tmp_var));
+    context->set_output_ref(0, &tmp_var->mu, &tmp_var->val);
+    if (context->track_allocations()) {
+      AllocatorAttributes attr;
+      if (context->allocate_on_host(attr)) {
+        context->record_host_persistent_memory_allocation(
+            tmp_var->val.AllocatedBytes());
+      } else {
+        context->record_device_persistent_memory_allocation(
+            tmp_var->val.AllocatedBytes());
+      }
+    }
+  }
+
+ private:
+  // Refcounted temporary variable resource.
+  friend class DestroyTemporaryVariableOp;
+  struct TmpVar : public ResourceBase {
+    mutex mu;
+    Tensor val;
+    string name;
+    string DebugString() override { return name; }
+    ~TmpVar() override { VLOG(3) << "TmpVar " << name << " deleted"; }
+  };
+
+  TensorShape shape_;
+  DataType dtype_;
+  string var_name_;
+};
+
+class DestroyTemporaryVariableOp : public OpKernel {
+ public:
+  explicit DestroyTemporaryVariableOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES(context, IsRefType(context->input_type(0)),
+                errors::InvalidArgument("lhs input needs to be a ref type"));
+    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
+    OP_REQUIRES(context, !var_name_.empty(),
+                errors::InvalidArgument("Missing var_name attribute"));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // NOTE(pbar): All other mutators of the Tensor Ref *must* have completed
+    // their execution before this DestroyTemporaryVariable op executes.
+    // This is typically achieved using control dependencies.
+    CHECK(IsRefType(context->input_dtype(0)));
+    Tensor tmpvar = context->mutable_input(0, false);
+    context->set_output(0, tmpvar);
+    ResourceMgr* rm = context->resource_manager();
+    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
+    OP_REQUIRES_OK(context, rm->Delete<TemporaryVariableOp::TmpVar>(
+                                context->step_container()->name(), var_name_));
+    if (context->track_allocations()) {
+      if (context->allocate_on_host(AllocatorAttributes())) {
+        context->record_host_persistent_memory_allocation(
+            -static_cast<int64>(tmpvar.AllocatedBytes()));
+      } else {
+        context->record_device_persistent_memory_allocation(
+            -static_cast<int64>(tmpvar.AllocatedBytes()));
+      }
+    }
+  }
+
+ private:
+  string var_name_;
+};
+
+class IsVariableInitializedOp : public OpKernel {
+ public:
+  explicit IsVariableInitializedOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    // Get a mutable input tensor of the Ref input.
+    const Tensor& input_tensor = context->mutable_input(0, false);
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, TensorShape({}), &output));
+    auto output_tensor = output->tensor<bool, 0>();
+    bool result = input_tensor.IsInitialized();
+    output_tensor() = result;
+  }
+};
+
 REGISTER_KERNEL_BUILDER(Name("Variable").Device(DEVICE_CPU), VariableOp);
 REGISTER_KERNEL_BUILDER(Name("VariableV2").Device(DEVICE_CPU), VariableOp);
 REGISTER_KERNEL_BUILDER(Name("TemporaryVariable").Device(DEVICE_CPU),
@@ -33,30 +204,30 @@ REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
                         IsVariableInitializedOp);
 
 #ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                                         \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("Variable").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),  \
-      VariableOp);                                                         \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),\
-      VariableOp);                                                         \
-  REGISTER_KERNEL_BUILDER(Name("TemporaryVariable")                        \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("dtype"),              \
-                          TemporaryVariableOp);                            \
-  REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable")                 \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("T"),                  \
-                          DestroyTemporaryVariableOp);                     \
-  REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized")                    \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("dtype")               \
-                              .HostMemory("is_initialized"),               \
+#define REGISTER_SYCL_KERNEL(type)                                          \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("Variable").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),   \
+      VariableOp);                                                          \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"), \
+      VariableOp);                                                          \
+  REGISTER_KERNEL_BUILDER(Name("TemporaryVariable")                         \
+                              .Device(DEVICE_SYCL)                          \
+                              .TypeConstraint<type>("dtype"),               \
+                          TemporaryVariableOp);                             \
+  REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable")                  \
+                              .Device(DEVICE_SYCL)                          \
+                              .TypeConstraint<type>("T"),                   \
+                          DestroyTemporaryVariableOp);                      \
+  REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized")                     \
+                              .Device(DEVICE_SYCL)                          \
+                              .TypeConstraint<type>("dtype")                \
+                              .HostMemory("is_initialized"),                \
                           IsVariableInitializedOp);
 
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL);
 #undef REGISTER_SYCL_KERNEL
-#endif // TENSORFLOW_USE_SYCL
+#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 // Only register 'Variable' on GPU for the subset of types also supported by
diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h
index 820b90d041..83134bad37 100644
--- a/tensorflow/core/kernels/variable_ops.h
+++ b/tensorflow/core/kernels/variable_ops.h
@@ -27,10 +27,16 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Resource stored by variables in the resource manager.
+// Resource stored by variables in the resource manager
+// (new, resource-style version).
 class Var : public ResourceBase {
  public:
   explicit Var(DataType dtype) : tensor_(dtype) {}
+  // Not copyable or movable.
+  Var(const Var&) = delete;
+  Var& operator=(const Var&) = delete;
+
+  // TODO(ebrevdo): Use LockSet instead of exposing mu.
   mutex* mu() { return &mu_; }
   Tensor* tensor() { return &tensor_; }
 
@@ -44,52 +50,12 @@ class Var : public ResourceBase {
   Tensor tensor_;
 
   ~Var() override {}
-  TF_DISALLOW_COPY_AND_ASSIGN(Var);
 };
 
 class VariableOp : public OpKernel {
  public:
-  explicit VariableOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
-    dtype_ = RemoveRefType(context->output_type(0));
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    mutex_lock l(init_mu_);
-    if (!initialized_) {
-      OP_REQUIRES_OK(
-          ctx,
-          cinfo_.Init(ctx->resource_manager(), def(), true /* use name() */));
-      initialized_ = true;
-    }
-    auto creator = [this](Var** var) {
-      *var = new Var(dtype_);
-      (*var)->tensor()->set_shape(shape_);
-      return Status::OK();
-    };
-    Var* var;
-    OP_REQUIRES_OK(ctx,
-                   cinfo_.resource_manager()->LookupOrCreate<Var>(
-                       cinfo_.container(), cinfo_.name(), &var, creator));
-    // Output a reference to our tensor, so it may be updated.
-    //
-    // As long as the resource manager hasn't been cleared the ref we return
-    // here is valid because it owns a ref on var.
-    ctx->set_output_ref(0, var->mu(), var->tensor());
-    if (ctx->track_allocations() && var->tensor()->IsInitialized()) {
-      AllocatorAttributes attr;
-      attr.set_gpu_compatible(true);
-      attr.set_nic_compatible(true);
-      if (ctx->allocate_on_host(attr)) {
-        ctx->record_host_persistent_memory_allocation(
-            var->tensor()->AllocatedBytes());
-      } else {
-        ctx->record_device_persistent_memory_allocation(
-            var->tensor()->AllocatedBytes());
-      }
-    }
-    var->Unref();
-  }
+  explicit VariableOp(OpKernelConstruction* context);
+  void Compute(OpKernelContext* ctx) override;
 
  private:
   DataType dtype_;
@@ -102,112 +68,6 @@ class VariableOp : public OpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(VariableOp);
 };
 
-class TemporaryVariableOp : public OpKernel {
- public:
-  explicit TemporaryVariableOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
-    OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
-    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
-    // Variable name defaults to op name if not specified explicitly.
-    if (var_name_ == "") var_name_ = name();
-  }
-
-  void Compute(OpKernelContext* context) override {
-    Status s;
-    ResourceMgr* rm = context->resource_manager();
-    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
-    auto* tmp_var = new TmpVar;
-    OP_REQUIRES(context, tmp_var,
-                errors::ResourceExhausted("Could not allocate TmpVar."));
-    tmp_var->name = var_name_;
-    s = context->allocate_temp(dtype_, shape_, &tmp_var->val);
-    if (!s.ok()) tmp_var->Unref();
-    OP_REQUIRES_OK(context, s);
-    OP_REQUIRES_OK(context, rm->Create(context->step_container()->name(),
-                                       var_name_, tmp_var));
-    context->set_output_ref(0, &tmp_var->mu, &tmp_var->val);
-    if (context->track_allocations()) {
-      AllocatorAttributes attr;
-      if (context->allocate_on_host(attr)) {
-        context->record_host_persistent_memory_allocation(
-            tmp_var->val.AllocatedBytes());
-      } else {
-        context->record_device_persistent_memory_allocation(
-            tmp_var->val.AllocatedBytes());
-      }
-    }
-  }
-
- private:
-  // Refcounted temporary variable resource.
-  friend class DestroyTemporaryVariableOp;
-  struct TmpVar : public ResourceBase {
-    mutex mu;
-    Tensor val;
-    string name;
-    string DebugString() override { return name; }
-    ~TmpVar() override { VLOG(3) << "TmpVar " << name << " deleted"; }
-  };
-
-  TensorShape shape_;
-  DataType dtype_;
-  string var_name_;
-};
-
-class DestroyTemporaryVariableOp : public OpKernel {
- public:
-  explicit DestroyTemporaryVariableOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES(context, IsRefType(context->input_type(0)),
-                errors::InvalidArgument("lhs input needs to be a ref type"));
-    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
-    OP_REQUIRES(context, var_name_ != "",
-                errors::InvalidArgument("Missing var_name attribute"));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    // NOTE(pbar): All other mutators of the Tensor Ref *must* have completed
-    // their execution before this DestroyTemporaryVariable op executes.
-    // This is typically achieved using control dependencies.
-    CHECK(IsRefType(context->input_dtype(0)));
-    Tensor tmpvar = context->mutable_input(0, false);
-    context->set_output(0, tmpvar);
-    ResourceMgr* rm = context->resource_manager();
-    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
-    OP_REQUIRES_OK(context, rm->Delete<TemporaryVariableOp::TmpVar>(
-                                context->step_container()->name(), var_name_));
-    if (context->track_allocations()) {
-      if (context->allocate_on_host(AllocatorAttributes())) {
-        context->record_host_persistent_memory_allocation(
-            -static_cast<int64>(tmpvar.AllocatedBytes()));
-      } else {
-        context->record_device_persistent_memory_allocation(
-            -static_cast<int64>(tmpvar.AllocatedBytes()));
-      }
-    }
-  }
-
- private:
-  string var_name_;
-};
-
-class IsVariableInitializedOp : public OpKernel {
- public:
-  IsVariableInitializedOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    // Get a mutable input tensor of the Ref input.
-    const Tensor& input_tensor = context->mutable_input(0, false);
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, TensorShape({}), &output));
-    auto output_tensor = output->tensor<bool, 0>();
-    bool result = input_tensor.IsInitialized();
-    output_tensor() = result;
-  }
-};
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_KERNELS_VARIABLE_OPS_H_
-- 
GitLab


From 1c4f65b335b29a20511733612be6aed30e79b1a5 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 7 Dec 2017 21:36:01 -0800
Subject: [PATCH 0794/1225] Fix a bug in string to number conversion code.

PiperOrigin-RevId: 178337974
---
 tensorflow/core/lib/strings/numbers.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc
index 302a6967e3..f5822fad8e 100644
--- a/tensorflow/core/lib/strings/numbers.cc
+++ b/tensorflow/core/lib/strings/numbers.cc
@@ -81,10 +81,12 @@ T locale_independent_strtonum(const char* str, const char** endptr) {
   // number was outside the range, the stringstream sets the fail flag, but
   // returns the +/-max() value, whereas strto{f,d} functions return +/-INF.
   if (s.fail()) {
-    if (result == std::numeric_limits<T>::max()) {
+    if (result == std::numeric_limits<T>::max() ||
+        result == std::numeric_limits<T>::infinity()) {
       result = std::numeric_limits<T>::infinity();
       s.clear(s.rdstate() & ~std::ios::failbit);
-    } else if (result == -std::numeric_limits<T>::max()) {
+    } else if (result == -std::numeric_limits<T>::max() ||
+               result == -std::numeric_limits<T>::infinity()) {
       result = -std::numeric_limits<T>::infinity();
       s.clear(s.rdstate() & ~std::ios::failbit);
     }
-- 
GitLab


From 516f97cc24a6998492a1d58183f7d4e30a28efa6 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Thu, 7 Dec 2017 21:36:43 -0800
Subject: [PATCH 0795/1225] Add a maximum_iterations argument to tf.while_loop.

PiperOrigin-RevId: 178338024
---
 .../kernel_tests/control_flow_ops_py_test.py  | 17 ++++++++--
 tensorflow/python/ops/control_flow_ops.py     | 33 +++++++++++++++++--
 tensorflow/tools/api/golden/tensorflow.pbtxt  |  2 +-
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 38ea8f7de4..51eb13b921 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -122,11 +122,12 @@ def opt_cfg():
               do_constant_folding=True)))
 
 
-def isum(s):
+def isum(s, maximum_iterations=None):
   i = constant_op.constant(0, name="i")
   c = lambda i, s: math_ops.less(i, 10)
   b = lambda i, s: [math_ops.add(i, 1), math_ops.add(i, s)]
-  _, r_s = control_flow_ops.while_loop(c, b, [i, s])
+  _, r_s = control_flow_ops.while_loop(
+      c, b, [i, s], maximum_iterations=maximum_iterations)
   return r_s
 
 
@@ -746,6 +747,12 @@ class ControlFlowTest(test.TestCase):
       r = isum(s)
       self.assertAllEqual(45, r.eval())
 
+  def testWhileWithMaximumIterations(self):
+    with self.test_session():
+      s = constant_op.constant([1, 2, 3, 4, 5])
+      r = isum(s, maximum_iterations=3)
+      self.assertAllEqual([1+3, 2+3, 3+3, 4+3, 5+3], r.eval())
+
   # Have more than 10 parallel iterations and hence exercise k-bound
   # most of the time.
   def testWhile_3(self):
@@ -3001,6 +3008,12 @@ class EagerTest(test.TestCase):
       self.assertAllEqual(isum(tensor).numpy(),
                           [46, 47, 48, 49, 50])
 
+  def testWhileLoopWithMaxIterations(self):
+    with context.eager_mode():
+      tensor = constant_op.constant([1, 2, 3, 4, 5])
+      self.assertAllEqual(isum(tensor, maximum_iterations=3).numpy(),
+                          [1+3, 2+3, 3+3, 4+3, 5+3])
+
   def testWithDependencies(self):
     with context.eager_mode():
       t1 = constant_op.constant(1)
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 7d505e141b..f3ef2b9ac3 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -2726,7 +2726,7 @@ class WhileContext(ControlFlowContext):
 
 def while_loop(cond, body, loop_vars, shape_invariants=None,
                parallel_iterations=10, back_prop=True, swap_memory=False,
-               name=None):
+               name=None, maximum_iterations=None):
   """Repeat `body` while the condition `cond` is true.
 
   `cond` is a callable returning a boolean scalar tensor. `body` is a callable
@@ -2798,6 +2798,10 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
     back_prop: Whether backprop is enabled for this while loop.
     swap_memory: Whether GPU-CPU memory swap is enabled for this loop.
     name: Optional name prefix for the returned tensors.
+    maximum_iterations: Optional maximum number of iterations of the while loop
+      to run.  If provided, the `cond` output is AND-ed with an additional
+      condition ensuring the number of iterations executed is no greater than
+      `maximum_iterations`.
 
   Returns:
     The output tensors for the loop variables after the loop. When the length
@@ -2851,18 +2855,41 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
     if parallel_iterations < 1:
       raise TypeError("parallel_iterations must be a positive integer.")
 
+    if maximum_iterations is not None:
+      maximum_iterations = ops.convert_to_tensor(
+          maximum_iterations, name="maximum_iterations")
+      if maximum_iterations.shape.ndims != 0:
+        raise ValueError("maximum_iterations must be a scalar, saw shape: %s" %
+                         maximum_iterations.shape)
+      counter = constant_op.constant(
+          0, dtype=maximum_iterations.dtype, name="iteration_counter")
+      orig_cond = cond
+      orig_body = body
+      loop_vars = (counter, loop_vars)
+      cond = lambda i, lv: (  # pylint: disable=g-long-lambda
+          math_ops.logical_and(i < maximum_iterations, orig_cond(*lv)))
+      body = lambda i, lv: (i + 1, orig_body(*lv))
+
     if context.in_eager_mode():
       while cond(*loop_vars):
         loop_vars = body(*loop_vars)
-      return loop_vars
+      if maximum_iterations is not None:
+        return loop_vars[1]
+      else:
+        return loop_vars
 
     if shape_invariants is not None:
+      if maximum_iterations is not None:
+        shape_invariants = (tensor_shape.TensorShape([]), shape_invariants)
       nest.assert_same_structure(loop_vars, shape_invariants)
 
     loop_context = WhileContext(parallel_iterations, back_prop, swap_memory)  # pylint: disable=redefined-outer-name
     ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
     result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
-    return result
+    if maximum_iterations is not None:
+      return result[1]
+    else:
+      return result
 
 
 def _AsTensorList(x, p):
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index 4b33aa218c..d6a7a2d19f 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -2066,7 +2066,7 @@ tf_module {
   }
   member_method {
     name: "while_loop"
-    argspec: "args=[\'cond\', \'body\', \'loop_vars\', \'shape_invariants\', \'parallel_iterations\', \'back_prop\', \'swap_memory\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'True\', \'False\', \'None\'], "
+    argspec: "args=[\'cond\', \'body\', \'loop_vars\', \'shape_invariants\', \'parallel_iterations\', \'back_prop\', \'swap_memory\', \'name\', \'maximum_iterations\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'True\', \'False\', \'None\', \'None\'], "
   }
   member_method {
     name: "write_file"
-- 
GitLab


From 43d241193570c28f46a42b8f04a3814162662280 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 22:19:50 -0800
Subject: [PATCH 0796/1225] [XLA] Fix a issue in ParseShapeString which made
 the hlo parser fail in parsing bf16 shapes. It used to ignore everything
 after TUPLE, so the newly added primitive types were not recognized.

Also support parsing bfloat16 literals.

PiperOrigin-RevId: 178340689
---
 tensorflow/compiler/xla/shape_util.cc         | 36 +++++++++++++------
 .../compiler/xla/tools/parser/hlo_parser.cc   |  5 ++-
 .../xla/tools/parser/hlo_parser_test.cc       | 11 ++++++
 3 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 9e3f06e527..8845d6930e 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 #include <functional>
 #include <numeric>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -396,6 +397,26 @@ const string& LowercasePrimitiveTypeName(PrimitiveType s) {
   static PrimitiveTypeNameGenerator* gen = new PrimitiveTypeNameGenerator();
   return gen->LowercaseName(s);
 }
+
+StatusOr<PrimitiveType> StringToPrimitiveType(const string& name) {
+  static std::unordered_map<string, PrimitiveType>* name_to_type = [] {
+    static auto* map = new std::unordered_map<string, PrimitiveType>;
+    for (int i = 0; i < PrimitiveType_ARRAYSIZE; i++) {
+      if (PrimitiveType_IsValid(i)) {
+        auto value = static_cast<PrimitiveType>(i);
+        (*map)[LowercasePrimitiveTypeName(value)] = value;
+      }
+    }
+    return map;
+  }();
+  auto found = name_to_type->find(name);
+  if (found == name_to_type->end()) {
+    return InvalidArgument("Invalid element type string: \"%s\".",
+                           name.c_str());
+  }
+  return found->second;
+}
+
 }  // namespace
 
 /* static */ string ShapeUtil::HumanStringWithLayout(const Shape& shape) {
@@ -500,17 +521,10 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
                         comma_list_to_int64s(dimensions_string));
 
     // Extract the primitive element type.
-    PrimitiveType primitive_type = PRIMITIVE_TYPE_INVALID;
-    for (PrimitiveType i =
-             static_cast<PrimitiveType>(PRIMITIVE_TYPE_INVALID + 1);
-         i < TUPLE; i = static_cast<PrimitiveType>(i + 1)) {
-      if (tensorflow::str_util::Lowercase(PrimitiveType_Name(i)) ==
-          element_type_string) {
-        primitive_type = i;
-        break;
-      }
-    }
-    if (primitive_type == PRIMITIVE_TYPE_INVALID) {
+    TF_ASSIGN_OR_RETURN(const PrimitiveType primitive_type,
+                        StringToPrimitiveType(element_type_string));
+    if (primitive_type == PRIMITIVE_TYPE_INVALID || primitive_type == TUPLE ||
+        primitive_type == OPAQUE) {
       return InvalidArgument("Invalid element type string: \"%s\".",
                              element_type_string.c_str());
     }
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index cc0461fc5b..6e55214cbe 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -1086,6 +1086,8 @@ bool HloParser::SetValueInLiteral(double value, int64 linear_index,
   switch (shape.element_type()) {
     case F16:
       return SetValueInLiteralHelper<half>(value, linear_index, literal);
+    case BF16:
+      return SetValueInLiteralHelper<bfloat16>(value, linear_index, literal);
     case F32:
       return SetValueInLiteralHelper<float>(value, linear_index, literal);
     case F64:
@@ -1124,7 +1126,8 @@ bool HloParser::SetValueInLiteralHelper(ParsedElemT value, int64 linear_index,
        (std::numeric_limits<ParsedElemT>::infinity() == value ||
         -std::numeric_limits<ParsedElemT>::infinity() == value))) {
     // Skip range checking for non-finite value.
-  } else if (literal->shape().element_type() == F16) {
+  } else if (literal->shape().element_type() == F16 ||
+             literal->shape().element_type() == BF16) {
     if (value > kF16max || value < -kF16max) {
       return TokenError(StrCat(
           "value ", value, " is out of range for literal's primitive type ",
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 69d48d65bc..5b5326e7b7 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -146,6 +146,17 @@ ENTRY %ConstantF16.v4 () -> f16[] {
   ROOT %constant = f16[] constant(500)
 }
 
+)"
+},
+// bf16
+{
+"BF16",
+R"(HloModule BF16:
+
+ENTRY %BF16.v4 () -> bf16[] {
+  ROOT %constant = bf16[] constant(500)
+}
+
 )"
 },
 // constant + constant
-- 
GitLab


From 6605938c280590ee981470abe87386396cf0e438 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 7 Dec 2017 22:56:09 -0800
Subject: [PATCH 0797/1225] [XLA:GPU] Support atomic operations on small data
 types.

Implement atomic operations on less than 32 bit data types with atomicCAS(int).
This relies on an assumption that tensor buffers are 4 byte aligned and have a
size of 4N bytes in order not to access the buffers out of bound.

Enable the tests that requires atomic operations on small data types.

PiperOrigin-RevId: 178342413
---
 .../compiler/xla/service/gpu/ir_emitter.cc    | 233 ++++++++++++------
 .../compiler/xla/service/gpu/ir_emitter.h     |  13 +-
 tensorflow/compiler/xla/tests/reduce_test.cc  |  12 +-
 tensorflow/compiler/xla/tests/while_test.cc   |   6 +-
 4 files changed, 176 insertions(+), 88 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
index 6e2bd4e11d..44a314f8c4 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
@@ -173,7 +173,7 @@ Status IrEmitter::EmitCallToNestedComputation(
   return Status::OK();
 }
 
-bool IrEmitter::MaybeEmitSpecialAtomicOperation(
+bool IrEmitter::MaybeEmitDirectAtomicOperation(
     const HloComputation& computation, llvm::Value* output_address,
     llvm::Value* source_address) {
   CHECK_EQ(2, computation.num_parameters());
@@ -233,102 +233,189 @@ bool IrEmitter::MaybeEmitSpecialAtomicOperation(
   return false;
 }
 
-Status IrEmitter::EmitAtomicOperationForNestedComputation(
-    const HloComputation& computation, llvm::Value* output_address,
-    llvm::Value* source_address) {
-  if (computation.num_parameters() != 2) {
-    // TODO(b/30258929): We only accept binary computations so far.
-    return Unimplemented(
-        "We only support atomic functions with exactly two parameters, but "
-        "computation %s has %lld.",
-        computation.name().c_str(), computation.num_parameters());
-  }
+// Implements atomic binary operations using atomic compare-and-swap
+// (atomicCAS) as follows:
+//   1. Reads the value from the memory pointed to by output_address and
+//     records it as old_output.
+//   2. Uses old_output as one of the source operand to perform the binary
+//     operation and stores the result in new_output.
+//   3. Calls atomicCAS which implements compare-and-swap as an atomic
+//     operation. In particular, atomicCAS reads the value from the memory
+//     pointed to by output_address, and compares the value with old_output. If
+//     the two values equal, new_output is written to the same memory location
+//     and true is returned to indicate that the atomic operation succeeds.
+//     Otherwise, the new value read from the memory is returned. In this case,
+//     the new value is copied to old_output, and steps 2. and 3. are repeated
+//     until atomicCAS succeeds.
+//
+// On Nvidia GPUs, atomicCAS can only operate on 32 bit and 64 bit integers. If
+// the element type of the binary operation is 32 bits or 64 bits, the integer
+// type of the same size is used for the atomicCAS operation. On the other hand,
+// if the element type is smaller than 32 bits, int32 is used for the atomicCAS
+// operation. In this case, atomicCAS reads and writes 32 bit values from
+// the memory, which is larger than the memory size required by the original
+// atomic binary operation. We mask off the last two bits of the output_address
+// and use the result as an address to read the 32 bit values from the memory.
+// This can avoid out of bound memory accesses if tensor buffers are 4 byte
+// aligned and have a size of 4N, an assumption that the runtime can guarantee.
+//
+// The pseudo code is shown below. Variables *_address are pointers to a memory
+// region with a size equal to the size of the atomicCAS operation, with the
+// exception that new_output_address is a pointer to a memory region with a size
+// equal to the element size of the binary operation.
+//
+//   element_size = sizeof(element_type);
+//   atomic_size = max(32, element_size);
+//   cas_new_output_address = alloca(atomic_size);
+//   cas_old_output_address = alloca(atomic_size);
+//   if (atomic_size != element_size) {
+//     atomic_address = output_address & ((int64)(-2));
+//     new_output_address = cas_new_output_address + (output_address & 3);
+//   } else {
+//     atomic_address = output_address;
+//     new_output_address = cas_new_output_address;
+//   }
+//
+//   *cas_old_output_address = *atomic_address;
+//   do {
+//     *cas_new_output_address = *cas_old_output_address;
+//     *new_output_address = operation(*new_output_address, *source_address);
+//     (*cas_old_output_address, success) =
+//       atomicCAS(atomic_address, *cas_old_output_address,
+//       *cas_new_output_address);
+//   } while (!success);
+//
+Status IrEmitter::EmitAtomicOperationUsingCAS(const HloComputation& computation,
+                                              llvm::Value* output_address,
+                                              llvm::Value* source_address) {
+  llvm::PointerType* output_address_type =
+      llvm::dyn_cast<llvm::PointerType>(output_address->getType());
+  CHECK_NE(output_address_type, nullptr);
+
+  // element_type is the data type for the binary operation.
+  llvm::Type* element_type = output_address_type->getPointerElementType();
+  int element_size = llvm_ir::GetSizeInBits(element_type);
+  llvm::Type* element_address_type = element_type->getPointerTo();
+
+  int atomic_size = (element_size < 32) ? 32 : element_size;
+  llvm::Type* atomic_type = ir_builder_.getIntNTy(atomic_size);
+  llvm::Type* atomic_address_type =
+      atomic_type->getPointerTo(output_address_type->getPointerAddressSpace());
+
+  // cas_old_output_address and cas_new_output_address point to the scratch
+  // memory where we store the old and new values for the repeated atomicCAS
+  // operations.
+  llvm::Value* cas_old_output_address = ir_builder_.CreateAlloca(
+      atomic_type, /*ArraySize=*/nullptr, "cas_old_output_address");
+  llvm::Value* cas_new_output_address = ir_builder_.CreateAlloca(
+      atomic_type, /*ArraySize=*/nullptr, "cas_new_output_address");
 
-  if (MaybeEmitSpecialAtomicOperation(computation, output_address,
-                                      source_address)) {
-    return Status::OK();
-  }
-
-  // Other binary computations can be made atomic as following (labels are basic
-  // block names used in the IR emitting code later).
-  //
-  // atomic_op_loop_preheader:
-  //   ...
-  //   source = *source_address;
-  //   old_output = *output_address;
-  //   do {
-  // atomic_op_loop_body_entry:
-  //     new_output = computation(old_output, source);
-  //     (old_output, success) =
-  //         atomicCAS(output_address, old_output, new_output);
-  //   } while (!success);
-  //
-  // atomic_op_loop_exit:
-  //   ...
-  //
-  // TODO(jingyue): Consider encapsulate the logic of emitting control flow to
-  // something similar to llvm_ir::ForLoop.
-  //
   // Emit preparation code to the preheader.
   llvm::BasicBlock* loop_preheader_bb = ir_builder_.GetInsertBlock();
-  llvm::Type* element_ir_type =
-      output_address->getType()->getPointerElementType();
-  // old_output = *output_address;
-  llvm::Value* old_output_location = ir_builder_.CreateAlloca(
-      element_ir_type, /*ArraySize=*/nullptr, "old_output_location");
-  ir_builder_.CreateStore(ir_builder_.CreateLoad(output_address, "old_output"),
-                          old_output_location);
+
+  llvm::Value* atomic_memory_address;
+  // binop_output_address points to the scratch memory that stores the
+  // result of the binary operation.
+  llvm::Value* binop_output_address;
+  if (element_size < 32) {
+    // Assume the element size is an integer number of bytes.
+    CHECK_EQ((element_size % sizeof(char)), 0);
+    llvm::Type* address_int_type =
+        module_->getDataLayout().getIntPtrType(output_address_type);
+    atomic_memory_address =
+        ir_builder_.CreatePtrToInt(output_address, address_int_type);
+    llvm::Value* mask = llvm::ConstantInt::get(address_int_type, 3);
+    llvm::Value* offset = ir_builder_.CreateAnd(atomic_memory_address, mask);
+    mask = llvm::ConstantInt::get(address_int_type, -2);
+    atomic_memory_address = ir_builder_.CreateAnd(atomic_memory_address, mask);
+    atomic_memory_address =
+        ir_builder_.CreateIntToPtr(atomic_memory_address, atomic_address_type);
+    binop_output_address = ir_builder_.CreateAdd(
+        ir_builder_.CreatePtrToInt(cas_new_output_address, address_int_type),
+        offset);
+    binop_output_address =
+        ir_builder_.CreateIntToPtr(binop_output_address, element_address_type);
+  } else {
+    atomic_memory_address =
+        ir_builder_.CreateBitCast(output_address, atomic_address_type);
+    binop_output_address =
+        ir_builder_.CreateBitCast(cas_new_output_address, element_address_type);
+  }
+
+  // Use the value from the memory that atomicCAS operates on to initialize
+  // cas_old_output.
+  llvm::Value* cas_old_output =
+      ir_builder_.CreateLoad(atomic_memory_address, "cas_old_output");
+  ir_builder_.CreateStore(cas_old_output, cas_old_output_address);
+
   llvm::BasicBlock* loop_exit_bb = loop_preheader_bb->splitBasicBlock(
       ir_builder_.GetInsertPoint(), "atomic_op_loop_exit");
-
-  // Emit the body of the loop that repeatedly invokes atomicCAS.
   llvm::BasicBlock* loop_body_bb =
       llvm::BasicBlock::Create(ir_builder_.getContext(), "atomic_op_loop_body",
                                ir_builder_.GetInsertBlock()->getParent());
   ir_builder_.SetInsertPoint(loop_body_bb);
   // Change preheader's successor from loop_exit_bb to loop_body_bb.
   loop_preheader_bb->getTerminator()->setSuccessor(0, loop_body_bb);
-  // new_output = computation(old_output, source);
-  llvm::Value* new_output_location = ir_builder_.CreateAlloca(
-      element_ir_type, /*ArraySize=*/nullptr, "new_output_location");
+
+  // Emit the body of the loop that repeatedly invokes atomicCAS.
+  //
+  // Use cas_old_output to initialize cas_new_output.
+  cas_old_output =
+      ir_builder_.CreateLoad(cas_old_output_address, "cas_old_output");
+  ir_builder_.CreateStore(cas_old_output, cas_new_output_address);
+  // Emits code to calculate new_output = operation(old_output, source);
   TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
-      computation, {old_output_location, source_address}, new_output_location));
-
-  // (old_output, success) = atomicCAS(output_address, old_output, new_output);
-  int num_bits = llvm_ir::GetSizeInBits(element_ir_type);
-  llvm::Type* element_int_ir_type = ir_builder_.getIntNTy(num_bits);
-  // cmpxchg accepts integer only, and bitcast refuses to operate on aggregate
-  // types, so we bitcast load and store addresses to intN* of the same bit
-  // width.
-  llvm::Value* old_output = ir_builder_.CreateLoad(
-      ir_builder_.CreateBitCast(old_output_location,
-                                element_int_ir_type->getPointerTo()),
-      "old_output");
-  llvm::Value* new_output = ir_builder_.CreateLoad(
-      ir_builder_.CreateBitCast(new_output_location,
-                                element_int_ir_type->getPointerTo()),
-      "new_output");
+      computation, {binop_output_address, source_address},
+      binop_output_address));
+
+  llvm::Value* cas_new_output =
+      ir_builder_.CreateLoad(cas_new_output_address, "cas_new_output");
+
+  // Emit code to perform the atomicCAS operation
+  // (cas_old_output, success) = atomicCAS(memory_address, cas_old_output,
+  //                                       cas_new_output);
   llvm::Value* ret_value = ir_builder_.CreateAtomicCmpXchg(
-      ir_builder_.CreateBitCast(output_address,
-                                element_int_ir_type->getPointerTo()),
-      old_output, new_output, llvm::AtomicOrdering::SequentiallyConsistent,
+      atomic_memory_address, cas_old_output, cas_new_output,
+      llvm::AtomicOrdering::SequentiallyConsistent,
       llvm::AtomicOrdering::SequentiallyConsistent);
-  // cmpxchg returns a pair. The first element is the original value at
-  // output_address and the second element is whether the swap is successful.
+
+  // Extract the memory value returned from atomicCAS and store it as
+  // cas_old_output.
   ir_builder_.CreateStore(
-      ir_builder_.CreateExtractValue(ret_value, 0, "old_output"),
-      ir_builder_.CreateBitCast(old_output_location,
-                                element_int_ir_type->getPointerTo()));
+      ir_builder_.CreateExtractValue(ret_value, 0, "cas_old_output"),
+      cas_old_output_address);
+  // Extract the success bit returned from atomicCAS and generate a
+  // conditional branch on the success bit.
   ir_builder_.CreateCondBr(
       ir_builder_.CreateExtractValue(ret_value, 1, "success"), loop_exit_bb,
       loop_body_bb);
 
-  // Restore the insertion point to the exit basic block so that the caller of
+  // Set the insertion point to the exit basic block so that the caller of
   // this method can continue emitting code to the right place.
   SetToFirstInsertPoint(loop_exit_bb, &ir_builder_);
   return Status::OK();
 }
 
+Status IrEmitter::EmitAtomicOperationForNestedComputation(
+    const HloComputation& computation, llvm::Value* output_address,
+    llvm::Value* source_address) {
+  if (computation.num_parameters() != 2) {
+    // TODO(b/30258929): We only accept binary computations so far.
+    return Unimplemented(
+        "We only support atomic functions with exactly two parameters, but "
+        "computation %s has %lld.",
+        computation.name().c_str(), computation.num_parameters());
+  }
+
+  if (MaybeEmitDirectAtomicOperation(computation, output_address,
+                                     source_address)) {
+    return Status::OK();
+  }
+
+  return EmitAtomicOperationUsingCAS(computation, output_address,
+                                     source_address);
+}
+
 Status IrEmitter::HandleSelect(HloInstruction* select) {
   auto pred = select->operand(0);
   auto on_true = select->operand(1);
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
index 9c01f5b7c7..080e1f1fa4 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
@@ -185,9 +185,16 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   // be simply implemented using an LLVM atomic instruction. If "computation" is
   // one of this kind, emits code to do that and returns true; otherwise,
   // returns false.
-  bool MaybeEmitSpecialAtomicOperation(const HloComputation& computation,
-                                       llvm::Value* output_address,
-                                       llvm::Value* source_address);
+  bool MaybeEmitDirectAtomicOperation(const HloComputation& computation,
+                                      llvm::Value* output_address,
+                                      llvm::Value* source_address);
+
+  // A helper method for EmitAtomicOperationForNestedComputation. It implements
+  // binary atomic operations using atomicCAS with special handling to support
+  // small data types.
+  Status EmitAtomicOperationUsingCAS(const HloComputation& computation,
+                                     llvm::Value* output_address,
+                                     llvm::Value* source_address);
 
   StatusOr<llvm::Value*> ComputeNestedElement(
       const HloComputation& computation,
diff --git a/tensorflow/compiler/xla/tests/reduce_test.cc b/tensorflow/compiler/xla/tests/reduce_test.cc
index 7bc3185c36..b09ccdd679 100644
--- a/tensorflow/compiler/xla/tests/reduce_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_test.cc
@@ -352,15 +352,13 @@ XLA_TEST_F(ReduceTest, ReduceR2_111x50_01_To_R1) {
 XLA_TEST_F(ReduceTest, ReduceR2_1024x1024_To_R1) { RunR2ToR1Test(1024, 1024); }
 XLA_TEST_F(ReduceTest, ReduceR2_1000x1500_To_R1) { RunR2ToR1Test(1000, 1500); }
 
-// TODO(b/34969189): Invalid CAS generated on GPU.
-XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(AndReduceAllOnesR1_10_Pred)) {
+XLA_TEST_F(ReduceTest, AndReduceAllOnesR1_10_Pred) {
   constexpr int element_count = 10;
   std::vector<int> input(element_count, 1);
   RunR1ToR0PredTest(/*and_reduce=*/true, input);
 }
 
-// TODO(b/34969189): Invalid CAS generated on GPU.
-XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(AndReduceOnesAndZerosR1_10_Pred)) {
+XLA_TEST_F(ReduceTest, AndReduceOnesAndZerosR1_10_Pred) {
   constexpr int element_count = 10;
   std::vector<int> input(element_count);
   for (int i = 0; i < element_count; ++i) {
@@ -369,15 +367,13 @@ XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(AndReduceOnesAndZerosR1_10_Pred)) {
   RunR1ToR0PredTest(/*and_reduce=*/true, input);
 }
 
-// TODO(b/34969189): Invalid CAS generated on GPU.
-XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(OrReduceAllOnesR1_10_Pred)) {
+XLA_TEST_F(ReduceTest, OrReduceAllOnesR1_10_Pred) {
   constexpr int element_count = 10;
   std::vector<int> input(element_count, 1);
   RunR1ToR0PredTest(/*and_reduce=*/false, input);
 }
 
-// TODO(b/34969189): Invalid CAS generated on GPU.
-XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(OrReduceOnesAndZerosR1_10_Pred)) {
+XLA_TEST_F(ReduceTest, OrReduceOnesAndZerosR1_10_Pred) {
   constexpr int element_count = 10;
   std::vector<int> input(element_count);
   for (int i = 0; i < element_count; ++i) {
diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc
index f3f10517e3..0b3430ee1e 100644
--- a/tensorflow/compiler/xla/tests/while_test.cc
+++ b/tensorflow/compiler/xla/tests/while_test.cc
@@ -911,8 +911,7 @@ TEST_F(WhileTest, WhileWithPrngScalarResult) {
   }
 }
 
-// TODO(b/34969189) Fails with bad AtomicCmpSwap on GPU on 2017-09-11.
-TEST_F(WhileTest, DISABLED_ON_GPU(WhileThatSwapsParameterWithTupleElement)) {
+TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) {
   auto element_shape = ShapeUtil::MakeShape(F32, {2});
 
   ComputationBuilder outer(client_, "outer");
@@ -948,8 +947,7 @@ TEST_F(WhileTest, DISABLED_ON_GPU(WhileThatSwapsParameterWithTupleElement)) {
                          ErrorSpec(1e-6));
 }
 
-// TODO(b/34969189) Fails with bad AtomicCmpSwap on GPU on 2017-09-11.
-TEST_F(WhileTest, DISABLED_ON_GPU(WhileThatSwapsParameterWithBroadcast)) {
+TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) {
   auto element_shape = ShapeUtil::MakeShape(F32, {2});
 
   ComputationBuilder outer(client_, "outer");
-- 
GitLab


From b43d0f3c98140edfebb8295ea4a4b661e2fc2a85 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 7 Dec 2017 23:19:45 -0800
Subject: [PATCH 0798/1225] Upgrade cuda to 9 and cudnn version to 7. (#14773)

* Upgrade cuda to 9 and cudnn version to 7.

* Also update XLA build script.

* Fix typos.

* Fix botched conflict resolution.

* Disable transpose_op_test in opensource until the timeouts are fixed.
---
 configure.py                                        | 4 ++--
 tensorflow/python/kernel_tests/BUILD                | 4 ++++
 tensorflow/tools/ci_build/Dockerfile.gpu            | 4 ++--
 tensorflow/tools/ci_build/Dockerfile.gpu_clang      | 4 ++--
 tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh  | 2 ++
 tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh | 2 ++
 tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh  | 2 ++
 tensorflow/tools/docker/Dockerfile.devel-gpu        | 2 +-
 tensorflow/tools/docker/Dockerfile.gpu              | 2 +-
 9 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/configure.py b/configure.py
index 7080f0acb6..7a9d315eb0 100644
--- a/configure.py
+++ b/configure.py
@@ -36,8 +36,8 @@ _TF_BAZELRC = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                            '.tf_configure.bazelrc')
 _TF_WORKSPACE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              'WORKSPACE')
-_DEFAULT_CUDA_VERSION = '8.0'
-_DEFAULT_CUDNN_VERSION = '6'
+_DEFAULT_CUDA_VERSION = '9.0'
+_DEFAULT_CUDNN_VERSION = '7'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2'
 _DEFAULT_CUDA_PATH = '/usr/local/cuda'
 _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index b4c202ea39..e1be0ee512 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2077,6 +2077,10 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
     ],
+    tags = [
+        "no_gpu",
+        "no_oss",
+    ],
 )
 
 cuda_py_test(
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu b/tensorflow/tools/ci_build/Dockerfile.gpu
index 2d46ccb6b1..7591ecc04e 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu
@@ -1,8 +1,8 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
+FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
 
 LABEL maintainer="Jan Prach <jendap@google.com>"
 
-# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
+# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to
 # /usr/local/cuda
 RUN cp -P /usr/include/cudnn.h /usr/local/cuda/include
 RUN cp -P /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu_clang b/tensorflow/tools/ci_build/Dockerfile.gpu_clang
index 0ecd8c75e0..438a7ec532 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu_clang
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu_clang
@@ -1,8 +1,8 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
+FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
 
 LABEL maintainer="Ilya Biryukov <ibiryukov@google.com>"
 
-# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
+# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to
 # /usr/local/cuda
 RUN cp /usr/include/cudnn.h /usr/local/cuda/include
 RUN cp /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
index df196f829c..ac83e90f76 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
@@ -28,6 +28,8 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=8.0
+export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
index abd256a895..6b80f44729 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
@@ -28,6 +28,8 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=8.0
+export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
index a94a627dfb..88333de856 100755
--- a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
+++ b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
@@ -28,6 +28,8 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=8.0
+export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 041f45971b..339a39bcf3 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
+FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index e212d10290..b6682cd681 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04
+FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
-- 
GitLab


From cae2e561003bd04aed0be95aec49fd02adcddeb6 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 7 Dec 2017 23:28:25 -0800
Subject: [PATCH 0799/1225] Delete Dockerfile.devel-gpu-cuda9-cudnn7

our default dockerfiles now use cuda9-cudnn7.
No need for this file anymore.
---
 .../docker/Dockerfile.devel-gpu-cuda9-cudnn7  | 115 ------------------
 1 file changed, 115 deletions(-)
 delete mode 100644 tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7

diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
deleted file mode 100644
index 3bedc8cf34..0000000000
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
+++ /dev/null
@@ -1,115 +0,0 @@
-FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
-
-LABEL maintainer="Gunhan Gulsoy <gunan@google.com>"
-
-# It is possible to override these for releases.
-ARG TF_BRANCH=master
-ARG BAZEL_VERSION=0.5.4
-ARG TF_AVAILABLE_CPUS=32
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-        build-essential \
-        curl \
-        git \
-        golang \
-        libcurl3-dev \
-        libfreetype6-dev \
-        libpng12-dev \
-        libzmq3-dev \
-        pkg-config \
-        python-dev \
-        python-pip \
-        rsync \
-        software-properties-common \
-        unzip \
-        zip \
-        zlib1g-dev \
-        openjdk-8-jdk \
-        openjdk-8-jre-headless \
-        wget \
-        && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN pip --no-cache-dir install --upgrade \
-        pip setuptools
-
-RUN pip --no-cache-dir install \
-        ipykernel \
-        jupyter \
-        matplotlib \
-        numpy \
-        scipy \
-        sklearn \
-        pandas \
-        wheel \
-        && \
-    python -m ipykernel.kernelspec
-
-# Set up our notebook config.
-COPY jupyter_notebook_config.py /root/.jupyter/
-
-# Jupyter has issues with being run directly:
-#   https://github.com/ipython/ipython/issues/7062
-# We just add a little wrapper script.
-COPY run_jupyter.sh /
-
-# Set up Bazel.
-
-# Running bazel inside a `docker build` command causes trouble, cf:
-#   https://github.com/bazelbuild/bazel/issues/134
-# The easiest solution is to set up a bazelrc file forcing --batch.
-RUN echo "startup --batch" >>/etc/bazel.bazelrc
-# Similarly, we need to workaround sandboxing issues:
-#   https://github.com/bazelbuild/bazel/issues/418
-RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
-    >>/etc/bazel.bazelrc
-WORKDIR /
-RUN mkdir /bazel && \
-    cd /bazel && \
-    wget --quiet https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
-    wget --quiet https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \
-    chmod +x bazel-*.sh && \
-    ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
-    rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
-
-# Download and build TensorFlow.
-WORKDIR /
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
-    cd tensorflow && \
-    git checkout ${TF_BRANCH}
-WORKDIR /tensorflow
-
-# Configure the build for our CUDA configuration.
-ENV CI_BUILD_PYTHON=python \
-    LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:${LD_LIBRARY_PATH} \
-    CUDNN_INSTALL_PATH=/usr/lib/x86_64-linux-gnu \
-    PYTHON_BIN_PATH=/usr/bin/python \
-    PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \
-    TF_NEED_CUDA=1 \
-    TF_CUDA_VERSION=9.0 \
-    TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1,7.0 \
-    TF_CUDNN_VERSION=7
-RUN ./configure
-
-# Build and Install TensorFlow.
-RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
-    LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
-    bazel build -c opt \
-                --config=cuda \
-                --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
-                --jobs=${TF_AVAILABLE_CPUS} \
-                tensorflow/tools/pip_package:build_pip_package && \
-    mkdir /pip_pkg && \
-    bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \
-    pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \
-    rm -rf /pip_pkg && \
-    rm -rf /root/.cache
-# Clean up pip wheel and Bazel cache when done.
-
-WORKDIR /root
-
-# TensorBoard
-EXPOSE 6006
-# IPython
-EXPOSE 8888
-- 
GitLab


From a1c2e20fe5cf04965ce206911ff1a7446a24fadf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 05:21:12 -0800
Subject: [PATCH 0800/1225] Introduce an experimental API to pass sharding
 information from tensorflow to XLA.

PiperOrigin-RevId: 178366566
---
 tensorflow/compiler/tf2xla/sharding_util.cc   | 55 ++++++++++++++++---
 tensorflow/compiler/tf2xla/sharding_util.h    | 13 ++++-
 .../compiler/tf2xla/xla_compilation_device.cc |  2 +-
 3 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/sharding_util.cc b/tensorflow/compiler/tf2xla/sharding_util.cc
index d9c839b610..b08a7583cb 100644
--- a/tensorflow/compiler/tf2xla/sharding_util.cc
+++ b/tensorflow/compiler/tf2xla/sharding_util.cc
@@ -14,34 +14,59 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/compiler/tf2xla/sharding_util.h"
 
+#include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/util/device_name_utils.h"
 
 namespace tensorflow {
+namespace {
+const char kDeviceSuffixReplicatedCore[] = "REPLICATED_CORE";
+const char kShardingAttribute[] = "_XlaSharding";
+}  // namespace
 
-static const char DEVICE_SUFFIX_REPLICATED_CORE[] = "REPLICATED_CORE";
+namespace {
+xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
+GetShardingFromNodeDef(const NodeDef& node_def) {
+  if (!HasNodeAttr(node_def, kShardingAttribute)) {
+    return tensorflow::gtl::optional<xla::OpSharding>();
+  }
+  string value;
+  xla::OpSharding sharding;
+  TF_RETURN_IF_ERROR(GetNodeAttr(node_def, kShardingAttribute, &value));
+  if (!sharding.ParseFromString(value)) {
+    return xla::InvalidArgument(
+        "Experimental _XlaSharding attribute was not a valid encoded "
+        "xla::OpSharding proto.");
+  }
+  return tensorflow::gtl::optional<xla::OpSharding>(sharding);
+}
 
-static Status CoreOutOfRangeError(int core, int num_cores_per_replica) {
+Status CoreOutOfRangeError(int core, int num_cores_per_replica) {
   return errors::InvalidArgument(
       "Invalid replicated core id: ", core,
       "; num_cores_per_replica=", num_cores_per_replica);
 }
+}  // namespace
 
 xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
-ParseShardingFromDevice(const string& device_name, int num_cores_per_replica) {
+ParseShardingFromDevice(
+    const string& device_name, int num_cores_per_replica,
+    tensorflow::gtl::optional<xla::OpSharding> explicit_sharding) {
   if (device_name.empty()) {
     return tensorflow::gtl::optional<xla::OpSharding>();
   }
-
   DeviceNameUtils::ParsedName parsed_device;
   if (!DeviceNameUtils::ParseFullName(device_name, &parsed_device)) {
     return errors::InvalidArgument("Malformed assigned device '", device_name,
                                    "'");
   }
-  if (!parsed_device.has_type ||
-      !StringPiece(parsed_device.type)
-           .ends_with(DEVICE_SUFFIX_REPLICATED_CORE)) {
+
+  if (explicit_sharding.has_value()) {
+    return explicit_sharding;
+  } else if (!parsed_device.has_type || !parsed_device.has_id ||
+             !StringPiece(parsed_device.type)
+                  .contains(kDeviceSuffixReplicatedCore)) {
     return tensorflow::gtl::optional<xla::OpSharding>();
   } else {
     const int core = parsed_device.id;
@@ -53,20 +78,34 @@ ParseShardingFromDevice(const string& device_name, int num_cores_per_replica) {
   }
 }
 
+xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
+ParseShardingFromDevice(const NodeDef& node_def, int num_cores_per_replica) {
+  const string& device_name = node_def.device();
+  TF_ASSIGN_OR_RETURN(tensorflow::gtl::optional<xla::OpSharding> sharding,
+                      GetShardingFromNodeDef(node_def));
+  return ParseShardingFromDevice(device_name, num_cores_per_replica, sharding);
+}
+
 xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
 ParseShardingFromDevice(const Node& node, int num_cores_per_replica) {
   string device_name = node.assigned_device_name();
   if (device_name.empty()) {
     device_name = node.requested_device();
   }
-  return ParseShardingFromDevice(device_name, num_cores_per_replica);
+  TF_ASSIGN_OR_RETURN(tensorflow::gtl::optional<xla::OpSharding> sharding,
+                      GetShardingFromNodeDef(node.def()));
+  return ParseShardingFromDevice(device_name, num_cores_per_replica, sharding);
 }
+
 void SetShardingDeviceAssignmentFromNode(const Node& src, Node* dst) {
   string device_name = src.assigned_device_name();
   if (device_name.empty()) {
     device_name = src.requested_device();
   }
   dst->set_assigned_device_name(device_name);
+  if (const AttrValue* attr = src.attrs().Find(kShardingAttribute)) {
+    dst->AddAttr(kShardingAttribute, *attr);
+  }
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/sharding_util.h b/tensorflow/compiler/tf2xla/sharding_util.h
index f6468bba9f..9e430e30a1 100644
--- a/tensorflow/compiler/tf2xla/sharding_util.h
+++ b/tensorflow/compiler/tf2xla/sharding_util.h
@@ -29,14 +29,21 @@ namespace tensorflow {
 // - if the device name is invalid.
 // - the core is parsed and is out of the range [0, num_cores_per_replica).
 //
-// Otherwise, returns either a non-value or a sharding set as per
-// xla:ShardingBuilder::AssignDevice.
+// Otherwise, returns either:
+// - explicit_sharding if explicit_sharding.has_value()
+// - a non-value if there is no assigned core or
+// - a sharding set as per xla::ShardingBuilder::AssignDevice.
 xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
-ParseShardingFromDevice(const string& device_name, int num_cores_per_replica);
+ParseShardingFromDevice(const string& device_name, int num_cores_per_replica,
+                        tensorflow::gtl::optional<xla::OpSharding>
+                            explicit_sharding = tensorflow::gtl::nullopt);
 
 xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
 ParseShardingFromDevice(const Node& node, int num_cores_per_replica);
 
+xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
+ParseShardingFromDevice(const NodeDef& node_def, int num_cores_per_replica);
+
 void SetShardingDeviceAssignmentFromNode(const Node& src, Node* dst);
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_compilation_device.cc b/tensorflow/compiler/tf2xla/xla_compilation_device.cc
index 4f32c29954..cc459dc87c 100644
--- a/tensorflow/compiler/tf2xla/xla_compilation_device.cc
+++ b/tensorflow/compiler/tf2xla/xla_compilation_device.cc
@@ -100,7 +100,7 @@ void XlaCompilationDevice::Compute(OpKernel* op_kernel,
   b->SetOpMetadata(metadata);
 
   auto sharding_parse_result = ParseShardingFromDevice(
-      op_kernel->requested_device(), std::numeric_limits<int>::max());
+      op_kernel->def(), std::numeric_limits<int>::max());
   OP_REQUIRES_OK(context, sharding_parse_result.status());
   tensorflow::gtl::optional<xla::OpSharding> op_sharding =
       sharding_parse_result.ValueOrDie();
-- 
GitLab


From 2139d204f038ed944cecf05873a565f7f7d424b2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 06:53:22 -0800
Subject: [PATCH 0801/1225] Finishing the migration of the RNN / shared
 fully-connected layer blocks, and associated changes to LayerCollection.

Moved check_registration function to LayerCollection which is a more natural home for it.

Fixed major bug: make_inverse_update_ops was misspelled in FullyConnectedMultiKF meaning that inverses weren't being computed for these factors.

Many other minor tweaks and fixes to code and documentation.

PiperOrigin-RevId: 178372885
---
 .../kernel_tests/fisher_factors_test.py       |   3 +
 .../contrib/kfac/python/ops/estimator.py      |  52 +-----
 .../contrib/kfac/python/ops/fisher_blocks.py  |  68 +++++++-
 .../contrib/kfac/python/ops/fisher_factors.py |  66 +++++---
 .../kfac/python/ops/layer_collection.py       | 148 ++++++++++++++++--
 5 files changed, 242 insertions(+), 95 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
index f4a017fc77..70e56db055 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
@@ -77,6 +77,9 @@ class FisherFactorTestingDummy(ff.FisherFactor):
   def instantiate_covariance(self):
     pass
 
+  def make_inverse_update_ops(self):
+    return []
+
 
 class InverseProvidingFactorTestingDummy(ff.InverseProvidingFactor):
   """Dummy class to test the non-abstract methods on ff.InverseProvidingFactor.
diff --git a/tensorflow/contrib/kfac/python/ops/estimator.py b/tensorflow/contrib/kfac/python/ops/estimator.py
index 27ff951f16..5e1680967c 100644
--- a/tensorflow/contrib/kfac/python/ops/estimator.py
+++ b/tensorflow/contrib/kfac/python/ops/estimator.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import contextlib
 import itertools
-import math
 
 import numpy as np
 
@@ -128,7 +127,7 @@ class FisherEstimator(object):
     self._estimation_mode = estimation_mode
     self._layers = layer_collection
     self._layers.create_subgraph()
-    self._check_registration(variables)
+    self._layers.check_registration(variables)
     self._gradient_fns = {
         "gradients": self._get_grads_lists_gradients,
         "empirical": self._get_grads_lists_empirical,
@@ -203,49 +202,6 @@ class FisherEstimator(object):
     return self._apply_transformation(vecs_and_vars,
                                       lambda fb, vec: fb.multiply(vec))
 
-  def _check_registration(self, variables):
-    """Checks that all variable uses have been registered properly.
-
-    Args:
-      variables: List of variables.
-
-    Raises:
-      ValueError: If any registered variables are not included in the list.
-      ValueError: If any variable in the list is not registered.
-      ValueError: If any variable in the list is registered with the wrong
-          number of "uses" in the subgraph recorded (vs the number of times that
-          variable is actually used in the subgraph).
-    """
-    # Note that overlapping parameters (i.e. those that share variables) will
-    # be caught by layer_collection.LayerParametersDict during registration.
-
-    reg_use_map = self._layers.get_use_count_map()
-
-    error_messages = []
-
-    for var in variables:
-      total_uses = self._layers.subgraph.variable_uses(var)
-      reg_uses = reg_use_map[var]
-
-      if reg_uses == 0:
-        error_messages.append("Variable {} not registered.".format(var))
-      elif (not math.isinf(reg_uses)) and reg_uses != total_uses:
-        error_messages.append(
-            "Variable {} registered with wrong number of uses ({} "
-            "registrations vs {} uses).".format(var, reg_uses, total_uses))
-
-    num_get_vars = len(reg_use_map)
-
-    if num_get_vars > len(variables):
-      error_messages.append("{} registered variables were not included in list."
-                            .format(num_get_vars - len(variables)))
-
-    if error_messages:
-      error_messages = [
-          "Found the following errors with variable registration:"
-      ] + error_messages
-      raise ValueError("\n\t".join(error_messages))
-
   def _setup(self, cov_ema_decay):
     """Sets up the various operations.
 
@@ -333,11 +289,7 @@ class FisherEstimator(object):
     return tuple((grad,) for grad in grads_all)
 
   def _get_grads_lists_exact(self, tensors):
-    """Returns a list of all gradients, computing them exactly.
-
-    Args:
-      tensors: Tensors for which to compute gradients.
-    """
+    """No docstring required."""
     # Loop over all coordinates of all losses.
     grads_all = []
     for loss in self._layers.losses:
diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
index cdae7ddc21..1ccb9e040f 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
@@ -763,6 +763,63 @@ def _num_conv_locations(input_shape, strides):
   return input_shape[1] * input_shape[2] // (strides[1] * strides[2])
 
 
+class FullyConnectedMultiIndepFB(KroneckerProductFB):
+  """FisherBlock for fully-connected layers that share parameters.
+  """
+
+  def __init__(self, layer_collection, inputs, outputs, has_bias=False):
+    """Creates a FullyConnectedMultiIndepFB block.
+
+    Args:
+      layer_collection: LayerCollection instance.
+      inputs: list or tuple of Tensors. Each Tensor has shape [batch_size,
+        inputs_size].
+      outputs: list or tuple of Tensors. Each Tensor has shape [batch_size,
+        outputs_size].
+      has_bias: bool. If True, estimates Fisher with respect to a bias
+        parameter as well as the layer's parameters.
+    """
+
+    assert len(inputs) == len(outputs)
+    # We need to make sure inputs and outputs are tuples and not lists so that
+    # they get hashed by layer_collection.make_or_get_factor properly.
+    self._inputs = tuple(inputs)
+    self._outputs = tuple(outputs)
+    self._has_bias = has_bias
+    self._num_uses = len(inputs)
+
+    super(FullyConnectedMultiIndepFB, self).__init__(layer_collection)
+
+  @property
+  def num_registered_minibatches(self):
+    # TODO(b/69411207): Add support for registering additional minibatches.
+    return 1
+
+  def instantiate_factors(self, grads_list, damping):
+
+    self._input_factor = self._layer_collection.make_or_get_factor(
+        fisher_factors.FullyConnectedMultiKF,
+        ((self._inputs,), self._has_bias))
+
+    self._output_factor = self._layer_collection.make_or_get_factor(
+        fisher_factors.FullyConnectedMultiKF, (grads_list,))
+
+    if NORMALIZE_DAMPING_POWER:
+      damping /= self._num_uses**NORMALIZE_DAMPING_POWER
+
+    self._register_damped_input_and_output_inverses(damping)
+
+  @property
+  def _renorm_coeff(self):
+    return self._num_uses
+
+  def tensors_to_compute_grads(self):
+    return self._outputs
+
+  def num_inputs(self):
+    return len(self._inputs)
+
+
 class SeriesFBApproximation(enum.IntEnum):
   """See FullyConnectedSeriesFB.__init__ for description and usage."""
   option1 = 1
@@ -770,7 +827,7 @@ class SeriesFBApproximation(enum.IntEnum):
 
 
 class FullyConnectedSeriesFB(FisherBlock):
-  """FisherBlock for fully-connected RNN cells.
+  """FisherBlock for fully-connected layers that share parameters across time.
 
   See the following preprint for details:
     https://openreview.net/pdf?id=HyMTkQZAb
@@ -828,7 +885,8 @@ class FullyConnectedSeriesFB(FisherBlock):
     self._output_factor = self._layer_collection.make_or_get_factor(
         fisher_factors.FullyConnectedMultiKF, (grads_list,))
 
-    damping /= self._num_timesteps**NORMALIZE_DAMPING_POWER
+    if NORMALIZE_DAMPING_POWER:
+      damping /= self._num_timesteps**NORMALIZE_DAMPING_POWER
 
     self._damping_input, self._damping_output = _compute_pi_adjusted_damping(
         self._input_factor.get_cov(),
@@ -926,8 +984,8 @@ class FullyConnectedSeriesFB(FisherBlock):
       # Be careful with the outer product.  We don't want to accidentally
       # make it an inner-product instead.
       tmp = 1.0 - array_ops.reshape(mu_G, [int(mu_G.shape[0]), -1]) * mu_A
-      # Prevent some numerical issues by setting 0 eigs to 1.0
-      tmp += 1.0 * array_ops.cast(math_ops.equal(tmp, 0.0), dtype=tmp.dtype)
+      # Prevent some numerical issues by setting any 0.0 eigs to 1.0
+      tmp += 1.0 * math_ops.cast(math_ops.equal(tmp, 0.0), dtype=tmp.dtype)
       Z /= tmp
 
       # We now perform the transpose/reverse version of the operations
@@ -943,7 +1001,7 @@ class FullyConnectedSeriesFB(FisherBlock):
       # Note that this normalization is done because we compute the statistics
       # by averaging, not summing, over time. (And the gradient is presumably
       # summed over time, not averaged, and thus their scales are different.)
-      Z /= array_ops.cast(self._num_timesteps, Z.dtype)
+      Z /= math_ops.cast(self._num_timesteps, Z.dtype)
 
     # Convert back to the "batch_dim==0" orientation.
     Z = array_ops.transpose(Z)
diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
index ff8636785a..5a6d1a93ff 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
@@ -270,9 +270,10 @@ class FisherFactor(object):
     return moving_averages.assign_moving_average(
         self._cov, new_cov, ema_decay, zero_debias=ZERO_DEBIAS)
 
+  @abc.abstractmethod
   def make_inverse_update_ops(self):
     """Create and return update ops corresponding to registered computations."""
-    return []
+    pass
 
   def get_cov(self):
     return self._cov
@@ -304,6 +305,10 @@ class InverseProvidingFactor(FisherFactor):
   def register_damped_inverse(self, damping):
     """Registers a damped inverse needed by a FisherBlock.
 
+    This creates a variable and signals make_inverse_update_ops to make the
+    corresponding update op.  The variable can be read via the method
+    get_inverse.
+
     Args:
       damping: The damping value (float or Tensor) for this factor.
     """
@@ -321,6 +326,10 @@ class InverseProvidingFactor(FisherFactor):
   def register_matpower(self, exp, damping):
     """Registers a matrix power needed by a FisherBlock.
 
+    This creates a variable and signals make_inverse_update_ops to make the
+    corresponding update op.  The variable can be read via the method
+    get_matpower.
+
     Args:
       exp: The exponent (float or Tensor) to raise the matrix to.
       damping: The damping value (float or Tensor).
@@ -338,13 +347,26 @@ class InverseProvidingFactor(FisherFactor):
       self._matpower_by_exp_and_damping[(exp, damping)] = matpower
 
   def register_eigendecomp(self):
-    """Registers that an eigendecomposition is needed by a FisherBlock."""
+    """Registers an eigendecomposition.
+
+    Unlike register_damp_inverse and register_matpower this doesn't create
+    any variables or inverse ops.  Instead it merely makes tensors containing
+    the eigendecomposition available to anyone that wants them.  They will be
+    recomputed (once) for each session.run() call (when they needed by some op).
+    """
     if not self._eigendecomp:
-      self._eigendecomp = linalg_ops.self_adjoint_eig(self._cov)
+      eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig(self._cov)
+
+      # The matrix self._cov is positive semidefinite by construction, but the
+      # numerical eigenvalues could be negative due to numerical errors, so here
+      # we clip them to be at least FLAGS.eigenvalue_clipping_threshold
+      clipped_eigenvalues = math_ops.maximum(eigenvalues,
+                                             EIGENVALUE_CLIPPING_THRESHOLD)
+      self._eigendecomp = (clipped_eigenvalues, eigenvectors)
 
   def make_inverse_update_ops(self):
     """Create and return update ops corresponding to registered computations."""
-    ops = super(InverseProvidingFactor, self).make_inverse_update_ops()
+    ops = []
 
     num_inverses = len(self._inverses_by_damping)
     matrix_power_registered = bool(self._matpower_by_exp_and_damping)
@@ -356,26 +378,20 @@ class InverseProvidingFactor(FisherFactor):
       self.register_eigendecomp()  # ensures self._eigendecomp is set
       eigenvalues, eigenvectors = self._eigendecomp  # pylint: disable=unpacking-non-sequence
 
-      # The matrix self._cov is positive semidefinite by construction, but the
-      # numerical eigenvalues could be negative due to numerical errors, so here
-      # we clip them to be at least EIGENVALUE_CLIPPING_THRESHOLD.
-      clipped_eigenvalues = math_ops.maximum(eigenvalues,
-                                             EIGENVALUE_CLIPPING_THRESHOLD)
-
       for damping, inv in self._inverses_by_damping.items():
         ops.append(
             inv.assign(
-                math_ops.matmul(eigenvectors / (clipped_eigenvalues + damping),
+                math_ops.matmul(eigenvectors / (eigenvalues + damping),
                                 array_ops.transpose(eigenvectors))))
 
       for (exp, damping), matpower in self._matpower_by_exp_and_damping.items():
         ops.append(
             matpower.assign(
                 math_ops.matmul(eigenvectors *
-                                (clipped_eigenvalues + damping)**exp,
+                                (eigenvalues + damping)**exp,
                                 array_ops.transpose(eigenvectors))))
       # These ops share computation and should be run on a single device.
-      ops = [control_flow_ops.group(ops)]
+      ops = [control_flow_ops.group(*ops)]
     else:
       for damping, inv in self._inverses_by_damping.items():
         ops.append(inv.assign(utils.posdef_inv(self._cov, damping)))
@@ -383,6 +399,9 @@ class InverseProvidingFactor(FisherFactor):
     return ops
 
   def get_damped_inverse(self, damping):
+    # Note that this function returns a variable which gets updated by the
+    # inverse ops.  It may be stale / inconsistent with the latest value of
+    # get_cov().
     return self._inverses_by_damping[damping]
 
   def get_matpower(self, exp, damping):
@@ -458,6 +477,9 @@ class DiagonalFactor(FisherFactor):
   def _cov_initializer(self):
     return diagonal_covariance_initializer
 
+  def make_inverse_update_ops(self):
+    return []
+
 
 class NaiveDiagonalFactor(DiagonalFactor):
   """FisherFactor for a diagonal approximation of any type of param's Fisher.
@@ -879,10 +901,8 @@ class FullyConnectedMultiKF(InverseProvidingFactor):
     """Constructs a new `FullyConnectedMultiKF`.
 
     Args:
-      tensor_lists: List of lists of  Tensors of shape [batch_size, n]. Layer
-        inputs at each timestep.
-      has_bias: bool. If True, assume this factor is for the layer's inputs and
-        append '1' to each row.
+      tensor_lists: List of lists of Tensors of shape [batch_size, n].
+      has_bias: bool. If True, '1' is appended to each row.
       colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
         their inputs.
     """
@@ -975,14 +995,14 @@ class FullyConnectedMultiKF(InverseProvidingFactor):
   def register_cov_dt1(self):
     """Create a variable representing temporal cross-covariance.
 
-    This is technically the second moment, not covariance, since it's
-    not mean subtracted.
+    (This is technically the second moment, not covariance, since it's
+    not mean subtracted.)
     """
     if self._cov_dt1 is None:
       with variable_scope.variable_scope(self._var_scope):
         self._cov_dt1 = variable_scope.get_variable(
             "cov_dt1",
-            initializer=self._cov_initializer,
+            initializer=init_ops.zeros_initializer,
             shape=self._cov_shape,
             trainable=False,
             dtype=self._dtype)
@@ -1045,7 +1065,7 @@ class FullyConnectedMultiKF(InverseProvidingFactor):
 
       self._option2quants_by_damping[damping] = (Pmat, Kmat, mu)
 
-  def make_inverse_updates_ops(self):
+  def make_inverse_update_ops(self):
     """Create and return update ops corresponding to registered computations."""
     # TODO(b/69918258): Add correctness tests for this method.
     # pylint: disable=invalid-name
@@ -1089,7 +1109,7 @@ class FullyConnectedMultiKF(InverseProvidingFactor):
         hPsi = math_ops.matmul(math_ops.matmul(invsqrtC0, C1), invsqrtC0)
 
         # Compute the decomposition U*diag(psi)*U^T = hPsi
-        psi, U = utils.psd_eig(hPsi)
+        psi, U = utils.posdef_eig(hPsi)
 
         # L = C0^(-1/2) * U
         Lmat = math_ops.matmul(invsqrtC0, U)
@@ -1139,6 +1159,6 @@ class FullyConnectedMultiKF(InverseProvidingFactor):
         ops.append(Kmat_var.assign(Kmat))
         ops.append(mu_var.assign(mu))
 
-    return [control_flow_ops.group(ops)]
+    return [control_flow_ops.group(*ops)]
 
     # pylint: enable=invalid-name
diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py
index 275c88eafd..ca42afe6fb 100644
--- a/tensorflow/contrib/kfac/python/ops/layer_collection.py
+++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py
@@ -26,7 +26,9 @@ from __future__ import print_function
 
 from collections import defaultdict
 from collections import OrderedDict
+from functools import partial
 
+import math
 import six
 
 from tensorflow.contrib.kfac.python.ops import fisher_blocks as fb
@@ -57,12 +59,22 @@ _CONV2D_APPROX_TO_BLOCK_TYPES = {
     APPROX_DIAGONAL_NAME: fb.ConvDiagonalFB,
 }
 
+APPROX_KRONECKER_INDEP_NAME = "kron_indep"
+APPROX_KRONECKER_SERIES_1_NAME = "kron_series_1"
+APPROX_KRONECKER_SERIES_2_NAME = "kron_series_2"
+
+_FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES = {
+    APPROX_KRONECKER_INDEP_NAME: fb.FullyConnectedMultiIndepFB,
+    APPROX_KRONECKER_SERIES_1_NAME: partial(fb.FullyConnectedSeriesFB,
+                                            option=1),
+    APPROX_KRONECKER_SERIES_2_NAME: partial(fb.FullyConnectedSeriesFB,
+                                            option=2)
+}
+
 # Possible value for 'reuse' keyword argument. Sets 'reuse' to
 # tf.get_variable_scope().reuse.
 VARIABLE_SCOPE = "VARIABLE_SCOPE"
 
-# TODO(jamesmartens): need to add find_canonical_output back into this somewhere
-
 
 def ensure_sequence(obj):
   """If `obj` isn't a tuple or list, return a tuple containing `obj`."""
@@ -142,6 +154,8 @@ class LayerCollection(object):
     self._default_generic_approximation = APPROX_FULL_NAME
     self._default_fully_connected_approximation = APPROX_KRONECKER_NAME
     self._default_convolution_2d_approximation = APPROX_KRONECKER_NAME
+    self._default_fully_connected_multi_approximation = (
+        APPROX_KRONECKER_SERIES_2_NAME)
     self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
 
     with variable_scope.variable_scope(None, default_name=name) as scope:
@@ -207,6 +221,16 @@ class LayerCollection(object):
               value))
     self._default_convolution_2d_approximation = value
 
+  @property
+  def default_fully_connected_multi_approximation(self):
+    return self._default_fully_connected_multi_approximation
+
+  def set_default_fully_connected_multi_approximation(self, value):
+    if value not in _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES:
+      raise ValueError("{} is not a valid approximation for a fully-connected "
+                       "multi layer.".format(value))
+    self._default_fully_connected_multi_approximation = value
+
   def register_block(self, layer_key, fisher_block, reuse=VARIABLE_SCOPE):
     """Validates and registers the layer_key associated with the fisher_block.
 
@@ -215,7 +239,7 @@ class LayerCollection(object):
           existing registrations and to register if valid.
       fisher_block: The associated `FisherBlock`.
       reuse: Method to use for inserting new `FisherBlock`s. One of True, False,
-        or VARIABLE_SCOPE.
+        or 'VARIABLE_SCOPE'.
 
     Raises:
       ValueError: If `layer_key` was already registered and reuse is `False`,
@@ -266,13 +290,65 @@ class LayerCollection(object):
 
   def get_use_count_map(self):
     """Returns a dict of variables to their number of registrations."""
+    # TODO(b/70283403): Reimplement this in the old way, where each
+    # registration function would be responsible for incrementing the count.
+    # Also, this version has a bug: it won't do the right thing for generic
+    # registration for parameters that are shared.  i.e. it won't set the use
+    # count to infinity.
     vars_to_uses = defaultdict(int)
     for key, block in six.iteritems(self.fisher_blocks):
-      key = key if isinstance(key, (tuple, list)) else (key,)
+      n = (
+          block.num_inputs()*block.num_registered_minibatches if isinstance(
+              block, (fb.FullyConnectedSeriesFB, fb.FullyConnectedMultiIndepFB))
+          else block.num_registered_minibatches)
+      key = ensure_sequence(key)
       for k in key:
-        vars_to_uses[k] += block.num_registered_minibatches
+        vars_to_uses[k] += n
     return vars_to_uses
 
+  def check_registration(self, variables):
+    """Checks that all variable uses have been registered properly.
+
+    Args:
+      variables: List of variables.
+
+    Raises:
+      ValueError: If any registered variables are not included in the list.
+      ValueError: If any variable in the list is not registered.
+      ValueError: If any variable in the list is registered with the wrong
+          number of "uses" in the subgraph recorded (vs the number of times that
+          variable is actually used in the subgraph).
+    """
+    # Note that overlapping parameters (i.e. those that share variables) will
+    # be caught by layer_collection.LayerParametersDict during registration.
+
+    reg_use_map = self.get_use_count_map()
+
+    error_messages = []
+
+    for var in variables:
+      total_uses = self.subgraph.variable_uses(var)
+      reg_uses = reg_use_map[var]
+
+      if reg_uses == 0:
+        error_messages.append("Variable {} not registered.".format(var))
+      elif (not math.isinf(reg_uses)) and reg_uses != total_uses:
+        error_messages.append(
+            "Variable {} registered with wrong number of uses ({} "
+            "registrations vs {} uses).".format(var, reg_uses, total_uses))
+
+    num_get_vars = len(reg_use_map)
+
+    if num_get_vars > len(variables):
+      error_messages.append("{} registered variables were not included in list."
+                            .format(num_get_vars - len(variables)))
+
+    if error_messages:
+      error_messages = [
+          "Found the following errors with variable registration:"
+      ] + error_messages
+      raise ValueError("\n\t".join(error_messages))
+
   def get_blocks(self):
     return self.fisher_blocks.values()
 
@@ -364,11 +440,11 @@ class LayerCollection(object):
         this layer. Weight matrix should have shape [input_size, output_size].
         Bias should have shape [output_size].
       inputs: Tensor of shape [batch_size, input_size]. Inputs to layer.
-      outputs: Tensor of shape [batch_size, output_size]. Preactivations
+      outputs: Tensor of shape [batch_size, output_size]. Outputs
         produced by layer.
-      approx: str. One of APPROX_KRONECKER_NAME or APPROX_DIAGONAL_NAME.
+      approx: str. One of "kron" or "diagonal".
       reuse: bool or str.  If True, reuse an existing FisherBlock. If False,
-        create a new FisherBlock.  If VARIABLE_SCOPE, use
+        create a new FisherBlock.  If "VARIABLE_SCOPE", use
         tf.get_variable_scope().reuse.
 
     Raises:
@@ -410,10 +486,10 @@ class LayerCollection(object):
       inputs: Tensor of shape [batch_size, height, width, in_channels]. Inputs
         to layer.
       outputs: Tensor of shape [batch_size, height, width, out_channels].
-        Preactivations produced by layer.
-      approx: str. One of APPROX_KRONECKER_NAME or APPROX_DIAGONAL_NAME.
+        Output produced by layer.
+      approx: str. One of "kron" or "diagonal".
       reuse: bool or str.  If True, reuse an existing FisherBlock. If False,
-        create a new FisherBlock.  If VARIABLE_SCOPE, use
+        create a new FisherBlock.  If "VARIABLE_SCOPE", use
         tf.get_variable_scope().reuse.
 
     Raises:
@@ -443,14 +519,11 @@ class LayerCollection(object):
     """Registers a generic layer.
 
     Args:
-      params: Tensor or 2-tuple of Tensors corresponding to weight and bias of
-        this layer. Weight matrix should have shape [kernel_height,
-        kernel_width, in_channels, out_channels].  Bias should have shape
-        [out_channels].
+      params: Tensor or tuple of Tensors corresponding to the parameters.
       batch_size: 0-D Tensor. Size of the minibatch.
-      approx: str. One of APPROX_KRONECKER_NAME or APPROX_DIAGONAL_NAME.
+      approx: str. One of "full" or "diagonal".
       reuse: bool or str.  If True, reuse an existing FisherBlock. If False,
-        create a new FisherBlock.  If VARIABLE_SCOPE, use
+        create a new FisherBlock.  If "VARIABLE_SCOPE", use
         tf.get_variable_scope().reuse.
 
     Raises:
@@ -471,6 +544,47 @@ class LayerCollection(object):
     block = self.register_block(params, block_type(self, params), reuse=reuse)
     block.register_additional_minibatch(batch_size)
 
+  def register_fully_connected_multi(self, params, inputs, outputs,
+                                     approx=None):
+    """Register fully connected layers with shared parameters.
+
+    This can handle general fully-connected layers with shared parameters, but
+    has specialized approximations to deal with the case where there is a
+    meaningful linear order to the share instances (such as in an RNN).
+
+    Args:
+      params: Tensor or 2-tuple of Tensors corresponding to weight and bias of
+        this layer. Weight matrix should have shape [input_size, output_size].
+        Bias should have shape [output_size].
+      inputs: A list of tensors, each of shape [batch_size, input_size]. Inputs
+        to layer. In the case of RNNs, one Tensor per time step.
+      outputs: A list of tensors, the same length as 'inputs', each of shape
+        [batch_size, output_size]. Outputs produced by layer. In the case of
+        RNNs, one Tensor per time step.
+      approx: str. One of "kron_indep", "kron_series_1", or "kron_series_2".
+
+    Raises:
+      ValueError: For improper value to 'approx'.
+    """
+    if approx is None:
+      approx = self._get_linked_approx(params)
+      if approx is None:
+        approx = self.default_fully_connected_multi_approximation
+    has_bias = isinstance(params, (tuple, list))
+
+    # TODO(b/70283649): something along the lines of find_canonical_output
+    # should be added back in here (and for the other block types, arguably).
+
+    if approx not in _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES:
+      raise ValueError("Bad value {} for approx.".format(approx))
+    block_type = _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES[approx]
+
+    # For now we don't support multiple minibatches for this type of layer, so
+    # we set reuse=False
+    self.register_block(params,
+                        block_type(self, inputs, outputs, has_bias=has_bias),
+                        reuse=False)
+
   def register_categorical_predictive_distribution(self,
                                                    logits,
                                                    seed=None,
-- 
GitLab


From ad1834db58c967ae28707173c78b34b428c9d0c2 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Fri, 8 Dec 2017 09:34:13 -0800
Subject: [PATCH 0802/1225] Add a "file_pattern" argument to the @{} reference
 replacer.

This will allows to use it non ".md" files.

PiperOrigin-RevId: 178386823
---
 tensorflow/tools/docs/generate_lib.py | 37 +++++++++++++++++++--------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py
index c0cde1d3bd..f950f19a7c 100644
--- a/tensorflow/tools/docs/generate_lib.py
+++ b/tensorflow/tools/docs/generate_lib.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import fnmatch
 import os
 import sys
 
@@ -384,10 +385,26 @@ class _UpdateTags(py_guide_parser.PyGuideParser):
 EXCLUDED = set(['__init__.py', 'OWNERS', 'README.txt'])
 
 
-def _other_docs(src_dir, output_dir, reference_resolver):
-  """Convert all the files in `src_dir` and write results to `output_dir`."""
-  header = '<!-- DO NOT EDIT! Automatically generated file. -->\n'
+def _other_docs(src_dir, output_dir, reference_resolver, file_pattern='*.md'):
+  """Fix @{} references in all files under `src_dir` matching `file_pattern`.
 
+  A matching directory structure, with the modified files is
+  written to `output_dir`.
+
+  `{"__init__.py","OWNERS","README.txt"}` are skipped.
+
+  Files not matching `file_pattern` (using `fnmatch`) are copied with no change.
+
+  Also, files in the `api_guides/python` directory get explicit ids set on all
+  heading-2s to ensure back-links work.
+
+  Args:
+    src_dir: The directory to convert files from.
+    output_dir: The root directory to write the resulting files to.
+    reference_resolver: A `parser.ReferenceResolver` to make the replacements.
+    file_pattern: Only replace references in files matching file_patters,
+      using fnmatch. Non-matching files are copied unchanged.
+  """
   # Iterate through all the source files and process them.
   tag_updater = _UpdateTags()
   for dirpath, _, filenames in os.walk(src_dir):
@@ -415,21 +432,21 @@ def _other_docs(src_dir, output_dir, reference_resolver):
 
       suffix = os.path.relpath(path=full_in_path, start=src_dir)
       full_out_path = os.path.join(output_dir, suffix)
-      if not base_name.endswith('.md'):
-        print('Copying non-md file %s...' % suffix)
+      if not fnmatch.fnmatch(base_name, file_pattern):
+        print('Copying un-matched file %s...' % suffix)
         open(full_out_path, 'w').write(open(full_in_path).read())
         continue
       if dirpath.endswith('/api_guides/python'):
         print('Processing Python guide %s...' % base_name)
-        md_string = tag_updater.process(full_in_path)
+        content = tag_updater.process(full_in_path)
       else:
         print('Processing doc %s...' % suffix)
-        md_string = open(full_in_path).read()
+        content = open(full_in_path).read()
 
-      output = reference_resolver.replace_references(md_string,
-                                                     relative_path_to_root)
+      content = reference_resolver.replace_references(content,
+                                                      relative_path_to_root)
       with open(full_out_path, 'w') as f:
-        f.write(header + output)
+        f.write(content)
 
   print('Done.')
 
-- 
GitLab


From 8b86edcb9bc39d3f69ee94b6fec025a0a671dd36 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Fri, 8 Dec 2017 09:51:02 -0800
Subject: [PATCH 0803/1225] Return an error when shape inference fails to
 converge

PiperOrigin-RevId: 178388660
---
 tensorflow/core/grappler/costs/graph_properties.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index 34d5c48c31..f188c10615 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -853,6 +853,10 @@ Status GraphProperties::PropagateShapes(
   } while (!new_shapes->empty() &&
            num_resource_iterations++ < max_resource_iterations);
 
+  if (!new_shapes->empty()) {
+    return errors::Internal("Shape inference failed to converge");
+  }
+
   return Status::OK();
 }
 
-- 
GitLab


From f186c481cfdf145a517bfbc396bc992618ed9ec5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 10:39:08 -0800
Subject: [PATCH 0804/1225] Relax the requirements on --input_arrays and
 --input_shapes.

Relaxing the requirement on --input_shapes is overdue: in many cases, the input nodes contains shape information, which we're reading from the input file anyway, so there is no point asking the user to pass it. Also, we want to eventually fully support arbitrary runtime input shapes, and that is a step in that direction.

Relaxing the requirement on --input_arrays is motivated by the need to get graph visualizations for strange graph where either we don't know beforehand what the inputs are, or they only have constant inputs (which makes no sense from an inference perspective, but is something that we've seen in some graphs).

Finally, this change also drops the requirement that --input_arrays be connected by a path to --output_arrays. This was a meaningful requirement to have, however:
 - it caused friction with some bad graphs that we need visualizations of, and getting a visualization is often useful to understanding how the inputs failed to be connected to outputs;
 - it rarely caught user mistakes in practice. I.e. the mistake that it was catching wasn't really a mistake that people make in practice.

PiperOrigin-RevId: 178395267
---
 tensorflow/contrib/lite/toco/tooling_util.cc | 70 +++++---------------
 1 file changed, 15 insertions(+), 55 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 078afe79d0..7413cb9dd0 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -809,52 +809,11 @@ void FixOperatorOrdering(Model* model) {
       << "the above code should have generated a FATAL error already!";
 }
 
-// Checks that the --input_arrays of the Model are actually used by at least
-// one of the --output_arrays or --rnn_states i.e. that the graph contains a
-// path from each one of the inputs to at least one of the outputs or RNN
-// states. This catches cases where the user passed the wrong --input_arrays or
-// --output_arrays or --rnn_states, which otherwise may result in cryptic error
-// messages.
-void CheckInputsActuallyUsed(const Model& model) {
-  std::set<string> used_arrays;
-  for (const string& output : model.flags.output_arrays()) {
-    used_arrays.insert(output);
-  }
-  for (const auto& rnn_state : model.flags.rnn_states()) {
-    used_arrays.insert(rnn_state.back_edge_source_array());
-  }
-  for (int i = model.operators.size() - 1; i >= 0; i--) {
-    bool is_op_used = false;
-    for (const string& op_output : model.operators[i]->outputs) {
-      if (used_arrays.count(op_output)) {
-        is_op_used = true;
-        break;
-      }
-    }
-    if (!is_op_used) {
-      continue;
-    }
-    for (const string& op_input : model.operators[i]->inputs) {
-      used_arrays.insert(op_input);
-    }
-  }
-  for (const auto& input_array : model.flags.input_arrays()) {
-    QCHECK(used_arrays.count(input_array.name()))
-        << "The graph does not connect the input (" << input_array.name()
-        << ") specified by --input_arrays to any of the specified "
-        << "--output_arrays ("
-        << absl::StrJoin(model.flags.output_arrays(), ", ")
-        << "). Did you pass the wrong flags for this model, "
-        << "or is that model's graph actually incomplete?";
-  }
-}
-
 void CheckInvariants(const Model& model) {
   CheckNoMissingArray(model);
   CheckNoOrphanedArray(model);
   CheckArrayFieldsConsistent(model);
   CheckOperatorOrdering(model);
-  CheckInputsActuallyUsed(model);
 }
 
 void CheckCountInRange(const ::toco::ModelFlags::ModelCheck& model_check,
@@ -1068,9 +1027,6 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
     model->flags.mutable_model_checks()->CopyFrom(model_flags.model_checks());
   }
 
-  QCHECK_GT(model->flags.input_arrays_size(), 0)
-      << "This model does not define input arrays, so a "
-         "--input_arrays flag must be given on the command-line.";
   QCHECK_GT(model->flags.output_arrays_size(), 0)
       << "This model does not define output arrays, so a "
          "--output_arrays flag must be given on the command-line.";
@@ -1098,21 +1054,19 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
       input_array.data_type = ArrayDataType::kFloat;
     }
 
-    if (!input_array.has_shape()) {
-      QCHECK(!input_array_proto.shape().dims().empty())
-          << "This model does not have shape defined for input array "
-          << input_array_proto.name();
-    }
-
     // Compare/merge the model->flags describing the input_shape with
     // the actual input array's shape.
-    auto& input_array_dims = *input_array.mutable_shape()->mutable_dims();
-    if (input_array_dims.empty()) {
-      for (auto dim : input_array_proto.shape().dims()) {
-        CHECK_GE(dim, 1);
-        input_array_dims.push_back(dim);
+    if (!input_array.has_shape()) {
+      if (input_array_proto.has_shape()) {
+        auto& input_array_dims = *input_array.mutable_shape()->mutable_dims();
+        for (auto dim : input_array_proto.shape().dims()) {
+          CHECK_GE(dim, 1);
+          input_array_dims.push_back(dim);
+        }
       }
     } else {
+      const auto& input_array_dims =
+          *input_array.mutable_shape()->mutable_dims();
       CHECK_EQ(input_array_dims.size(), input_array_proto.shape().dims_size());
       for (int i = 0; i < input_array_dims.size(); i++) {
         CHECK_EQ(input_array_dims[i], input_array_proto.shape().dims(i));
@@ -1144,6 +1098,12 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
     CreateOrCheckRnnStateArray(rnn_state.state_array(), rnn_state.size(),
                                model);
   }
+
+  for (const auto& input_array : model->flags.input_arrays()) {
+    if (input_array.has_shape()) {
+      CHECK(input_array.shape().dims_size());
+    }
+  }
 }
 
 void CheckIsReadyForQuantization(const Model& model) {
-- 
GitLab


From 29e72226e76b8221800e6bc7154007181d3edef7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 10:40:45 -0800
Subject: [PATCH 0805/1225] Misc improvements required for some internal model:
  - handle StopGradient as an Identity node. So far we had refrained from
 supporting it as a smoking gun for a graph being a training not inference
 graph, but the resulting friction is proving not worth it at the moment.  -
 Relax a couple of conditions that we were fatal-erroring on early (during
 graph transformations) even as they only were necessary preconditions for
 much later code (some internal export formats). Erroring early is nice, but
 the existence of multiple output formats with very diverse requirements
 makies it unnecessary friction to error early. Here, when the output format
 is GRAPHVIZ_DOT, we do want to continue in these cases.  - add some verbose
 logging in propagate_fixed_sizes.

PiperOrigin-RevId: 178395479
---
 .../graph_transformations/propagate_fixed_sizes.cc    |  3 +++
 .../resolve_strided_slice_attributes.cc               |  5 -----
 tensorflow/contrib/lite/toco/import_tensorflow.cc     | 11 +++--------
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index f6daad9020..f29127c6b0 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/str_join.h"
 #include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
 #include "tensorflow/contrib/lite/toco/model.h"
 #include "tensorflow/contrib/lite/toco/tooling_util.h"
@@ -1114,6 +1115,8 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
   for (const auto& output : op->outputs) {
     if (model->arrays[output]->has_shape() &&
         (old_output_dims[output] != model->arrays[output]->shape().dims())) {
+      AddMessageF("Set shape of %s to [%s]", output,
+                  absl::StrJoin(model->arrays[output]->shape().dims(), ","));
       return true;
     }
   }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
index 5fc3b25bc1..851511268d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
@@ -50,11 +50,6 @@ bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) {
   op->stop_indices = stop_array.GetBuffer<ArrayDataType::kInt32>().data;
   op->strides = stride_array.GetBuffer<ArrayDataType::kInt32>().data;
 
-  // Only 4D arrays are supported for now.
-  CHECK_EQ(op->start_indices.size(), 4);
-  CHECK_EQ(op->stop_indices.size(), 4);
-  CHECK_EQ(op->strides.size(), 4);
-
   // TODO(dkalenichenko): Delete the extra inputs?
 
   return true;
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 9f72f9a1d3..a021fafce6 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -495,7 +495,7 @@ void ConvertIdentityOperator(const NodeDef& node,
                              const TensorFlowImportFlags& tf_import_flags,
                              Model* model) {
   CHECK(node.op() == "Identity" || node.op() == "CheckNumerics" ||
-        node.op() == "PlaceholderWithDefault");
+        node.op() == "PlaceholderWithDefault" || node.op() == "StopGradient");
   auto* op = new TensorFlowIdentityOperator;
   // Amazingly, some TensorFlow graphs (at least rajeev_lstm.pb) have
   // identity nodes with multiple inputs, but the other inputs seem
@@ -1154,12 +1154,6 @@ void ConvertCastOperator(const NodeDef& node,
   CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
   const auto tf_src_dtype = GetDataTypeAttr(node, "SrcT");
   const auto tf_dst_dtype = GetDataTypeAttr(node, "DstT");
-  CHECK(tf_src_dtype == DT_UINT8 || tf_src_dtype == DT_INT32 ||
-        tf_src_dtype == DT_FLOAT);
-  CHECK(tf_dst_dtype == DT_UINT8 || tf_dst_dtype == DT_INT32 ||
-        tf_dst_dtype == DT_FLOAT);
-  CHECK_NE(tf_src_dtype, tf_dst_dtype)
-      << "Same input and output data type. No need to cast.";
   auto* op = new CastOperator;
   op->src_data_type = GetArrayDataType(tf_src_dtype);
   op->dst_data_type = GetArrayDataType(tf_dst_dtype);
@@ -1600,7 +1594,8 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
       ConvertMatMulOperator(node, tf_import_flags, model);
     } else if (node.op() == "Div" || node.op() == "RealDiv") {
       ConvertDivOperator(node, tf_import_flags, model);
-    } else if (node.op() == "Identity" || node.op() == "CheckNumerics") {
+    } else if (node.op() == "Identity" || node.op() == "CheckNumerics" ||
+               node.op() == "StopGradient") {
       ConvertIdentityOperator(node, tf_import_flags, model);
     } else if (node.op() == "FakeQuantWithMinMaxVars") {
       ConvertFakeQuantWithMinMaxVars(node, tf_import_flags, model);
-- 
GitLab


From 3a0dd455f0612a104ec81afb847615d21f4ccce0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 10:43:37 -0800
Subject: [PATCH 0806/1225] Removes invariant that bitcasts must have the same
 shape byte size as their operand.

PiperOrigin-RevId: 178395859
---
 tensorflow/compiler/xla/service/hlo_verifier.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 515edd48b4..b8fd7a89ef 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -151,9 +151,6 @@ class ShapeVerifier : public DfsHloVisitor {
   }
 
   Status HandleBitcast(HloInstruction* bitcast) override {
-    // Bitcasts can be any shape, as long as the size matches the operand size.
-    TF_RET_CHECK(shape_size_fn_(bitcast->shape()) ==
-                 shape_size_fn_(bitcast->operand(0)->shape()));
     return tensorflow::Status::OK();
   }
 
-- 
GitLab


From ec0f20465e4cac9b45e6bf840c29487911c76d3f Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Fri, 8 Dec 2017 10:46:46 -0800
Subject: [PATCH 0807/1225] Fix tf.while_loop with maximum_iterations != None
 and single loop_var.

PiperOrigin-RevId: 178396322
---
 .../kernel_tests/control_flow_ops_py_test.py  | 19 +++++++++++++++++++
 tensorflow/python/ops/control_flow_ops.py     | 14 ++++++++++----
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 51eb13b921..3a61d76f58 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -753,6 +753,15 @@ class ControlFlowTest(test.TestCase):
       r = isum(s, maximum_iterations=3)
       self.assertAllEqual([1+3, 2+3, 3+3, 4+3, 5+3], r.eval())
 
+  def testWhileWithMaximumIterationsAndSingleArgument(self):
+    with self.test_session():
+      r = control_flow_ops.while_loop(
+          lambda i: i < 3,
+          lambda i: i + 1,
+          [0],
+          maximum_iterations=1)
+      self.assertEqual(1, r.eval())
+
   # Have more than 10 parallel iterations and hence exercise k-bound
   # most of the time.
   def testWhile_3(self):
@@ -3014,6 +3023,16 @@ class EagerTest(test.TestCase):
       self.assertAllEqual(isum(tensor, maximum_iterations=3).numpy(),
                           [1+3, 2+3, 3+3, 4+3, 5+3])
 
+  def testWhileWithMaximumIterationsAndSingleArgument(self):
+    with context.eager_mode():
+      tensor = constant_op.constant(0)
+      r = control_flow_ops.while_loop(
+          lambda i: i < 3,
+          lambda i: i + 1,
+          [tensor],
+          maximum_iterations=1)
+      self.assertEqual(1, r.numpy())
+
   def testWithDependencies(self):
     with context.eager_mode():
       t1 = constant_op.constant(1)
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index f3ef2b9ac3..12ee2e627b 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -2865,10 +2865,16 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
           0, dtype=maximum_iterations.dtype, name="iteration_counter")
       orig_cond = cond
       orig_body = body
-      loop_vars = (counter, loop_vars)
-      cond = lambda i, lv: (  # pylint: disable=g-long-lambda
-          math_ops.logical_and(i < maximum_iterations, orig_cond(*lv)))
-      body = lambda i, lv: (i + 1, orig_body(*lv))
+      if len(loop_vars) == 1:
+        loop_vars = (counter, loop_vars[0])
+        cond = lambda i, lv: (  # pylint: disable=g-long-lambda
+            math_ops.logical_and(i < maximum_iterations, orig_cond(lv)))
+        body = lambda i, lv: (i + 1, orig_body(lv))
+      else:
+        loop_vars = (counter, loop_vars)
+        cond = lambda i, lv: (  # pylint: disable=g-long-lambda
+            math_ops.logical_and(i < maximum_iterations, orig_cond(*lv)))
+        body = lambda i, lv: (i + 1, orig_body(*lv))
 
     if context.in_eager_mode():
       while cond(*loop_vars):
-- 
GitLab


From bb4ada759d9731a6591d6e2a10f72d0229c1549e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 11:20:42 -0800
Subject: [PATCH 0808/1225] [XLA:GPU] Remove the comment that says b/34969189
 blocking TruncateNormal.

The issue has been fixed and the GPU backend now supports atomic operations on
small data types.

PiperOrigin-RevId: 178401552
---
 tensorflow/compiler/tf2xla/xla_gpu_backend.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/xla_gpu_backend.cc b/tensorflow/compiler/tf2xla/xla_gpu_backend.cc
index d504613d23..8ca757e723 100644
--- a/tensorflow/compiler/tf2xla/xla_gpu_backend.cc
+++ b/tensorflow/compiler/tf2xla/xla_gpu_backend.cc
@@ -21,8 +21,6 @@ namespace tensorflow {
 bool GpuOpFilter(KernelDef* kdef) {
   // TODO(b/31361304): The GPU backend does not parallelize PRNG ops, leading to
   // slow code.
-  // TODO(b/34969189) The implementation of TruncatedNormal generates illegal
-  // code on GPU.
   if (kdef->op() == "RandomStandardNormal" || kdef->op() == "RandomUniform" ||
       kdef->op() == "RandomUniformInt" || kdef->op() == "TruncatedNormal") {
     return false;
-- 
GitLab


From d8425f553b5e67bc1fb008b8719dd3f59b3e0957 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 8 Dec 2017 11:39:23 -0800
Subject: [PATCH 0809/1225] Add n=1 special case to the DeserializeSparse op.

This avoids excessive copying in the common case where the
sparse-format output of a `tf.data.Dataset` pipeline or the input to a
`Dataset.map()` or `Dataset.filter()` transformation contains a single
`tf.SparseTensor`.

As I was refactoring to add the special case, I ended up removing the
template parameter for the output values' tensor DataType, and
switching the sole reamining code that depends on it to use a `switch`
on the `"dtype"` attr. This will reduce the binary size for this op.

PiperOrigin-RevId: 178404305
---
 .../core/kernels/serialize_sparse_op.cc       | 212 ++++++++++--------
 tensorflow/core/util/sparse/sparse_tensor.h   |  15 ++
 2 files changed, 138 insertions(+), 89 deletions(-)

diff --git a/tensorflow/core/kernels/serialize_sparse_op.cc b/tensorflow/core/kernels/serialize_sparse_op.cc
index a3b573b5d9..61e40caef9 100644
--- a/tensorflow/core/kernels/serialize_sparse_op.cc
+++ b/tensorflow/core/kernels/serialize_sparse_op.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/core/framework/variant_encode_decode.h"
 #include "tensorflow/core/kernels/reshape_util.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
@@ -289,22 +290,12 @@ TF_CALL_ALL_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
 
 template <typename T>
-class DeserializeSparseOpBase : public OpKernel {
- public:
-  explicit DeserializeSparseOpBase(OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {}
-
- protected:
-  Status Deserialize(const T& serialized, Tensor* result);
-};
-
-template <typename T, typename U>
-class DeserializeSparseOp : public DeserializeSparseOpBase<U> {
+class DeserializeSparseOp : public OpKernel {
  public:
   explicit DeserializeSparseOp(OpKernelConstruction* context)
-      : DeserializeSparseOpBase<U>(context) {}
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
+  }
 
   void Compute(OpKernelContext* context) override {
     const Tensor& serialized_sparse = context->input(0);
@@ -332,63 +323,43 @@ class DeserializeSparseOp : public DeserializeSparseOpBase<U> {
             "but has a zero dimension ",
             serialized_sparse.shape().DebugString()));
 
+    if (num_sparse_tensors == 0 && serialized_sparse.shape().dims() == 1) {
+      // Special case with a single sparse tensor. We can avoid data
+      // motion in the Concat and Reshape.
+      const auto& serialized_sparse_t = serialized_sparse.vec<T>();
+
+      Tensor output_indices;
+      Tensor output_values;
+      Tensor output_shape;
+      OP_REQUIRES_OK(context,
+                     this->GetAndValidateSparseTensor(
+                         serialized_sparse_t(0), serialized_sparse_t(1),
+                         serialized_sparse_t(2), dtype_, 0 /* index */,
+                         &output_indices, &output_values, &output_shape));
+      context->set_output(0, output_indices);
+      context->set_output(1, output_values);
+      context->set_output(2, output_shape);
+      return;
+    }
+
     std::vector<Tensor> indices;
     std::vector<Tensor> values;
     TensorShape shape;
     indices.reserve(num_sparse_tensors);
     values.reserve(num_sparse_tensors);
 
-    const auto& serialized_sparse_t = serialized_sparse.flat_inner_dims<U, 2>();
-
+    const auto& serialized_sparse_t = serialized_sparse.flat_inner_dims<T, 2>();
     for (int i = 0; i < num_sparse_tensors; ++i) {
       Tensor output_indices;
-      OP_REQUIRES_OK(context, this->Deserialize(serialized_sparse_t(i, 0),
-                                                &output_indices));
-      OP_REQUIRES(context, TensorShapeUtils::IsMatrix(output_indices.shape()),
-                  errors::InvalidArgument(
-                      "Expected serialized_sparse[", i,
-                      ", 0] to represent an index matrix but received shape ",
-                      output_indices.shape().DebugString()));
-
       Tensor output_values;
-      OP_REQUIRES_OK(context, this->Deserialize(serialized_sparse_t(i, 1),
-                                                &output_values));
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(output_values.shape()),
-                  errors::InvalidArgument(
-                      "Expected serialized_sparse[", i,
-                      ", 1] to represent a values vector but received shape ",
-                      output_values.shape().DebugString()));
-
       Tensor output_shape;
-      OP_REQUIRES_OK(
-          context, this->Deserialize(serialized_sparse_t(i, 2), &output_shape));
-      OP_REQUIRES(
-          context, TensorShapeUtils::IsVector(output_shape.shape()),
-          errors::InvalidArgument("Expected serialized_sparse[", i,
-                                  ", 1] to be a shape vector but its shape is ",
-                                  output_shape.shape().DebugString()));
-
-      OP_REQUIRES(
-          context, DataTypeToEnum<T>::value == output_values.dtype(),
-          errors::InvalidArgument(
-              "Requested SparseTensor of type ",
-              DataTypeString(DataTypeToEnum<T>::value), " but SparseTensor[", i,
-              "].values.dtype() == ", DataTypeString(output_values.dtype())));
-
+      OP_REQUIRES_OK(context,
+                     this->GetAndValidateSparseTensor(
+                         serialized_sparse_t(i, 0), serialized_sparse_t(i, 1),
+                         serialized_sparse_t(i, 2), dtype_, i, &output_indices,
+                         &output_values, &output_shape));
       int64 num_entries = output_indices.dim_size(0);
-      OP_REQUIRES(context, num_entries == output_values.dim_size(0),
-                  errors::InvalidArgument(
-                      "Expected row counts of SparseTensor[", i,
-                      "].indices and SparseTensor[", i,
-                      "].values to match but they do not: ", num_entries,
-                      " vs. ", output_values.dim_size(0)));
       int rank = output_indices.dim_size(1);
-      OP_REQUIRES(
-          context, rank == output_shape.dim_size(0),
-          errors::InvalidArgument("Expected column counts of SparseTensor[", i,
-                                  "].indices to match size of SparseTensor[", i,
-                                  "].shape but they do not: ", rank, " vs. ",
-                                  output_shape.dim_size(0)));
 
       // Now we expand each SparseTensors' indices and shape by
       // prefixing a dimension
@@ -445,7 +416,25 @@ class DeserializeSparseOp : public DeserializeSparseOpBase<U> {
       tensors.emplace_back(indices[i], values[i], shape, std_order);
     }
 
-    SparseTensor output = SparseTensor::Concat<T>(tensors);
+    gtl::optional<SparseTensor> maybe_output;
+#define HANDLE_TYPE(T)                               \
+  case DataTypeToEnum<T>::value: {                   \
+    maybe_output = SparseTensor::Concat<T>(tensors); \
+    break;                                           \
+  }
+
+    switch (dtype_) {
+      TF_CALL_ALL_TYPES(HANDLE_TYPE);
+      TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
+      TF_CALL_variant(HANDLE_TYPE);
+#undef HANDLE_TYPE
+      default:
+        OP_REQUIRES(context, false,
+                    errors::Unimplemented(
+                        "DeserializeSparse Unhandled data type: ", dtype_));
+    }
+    DCHECK(maybe_output);
+    SparseTensor& output = maybe_output.value();
 
     // Compute the input shape for the reshape operation.
     Tensor input_shape(DT_INT64, TensorShape({output.dims()}));
@@ -467,11 +456,71 @@ class DeserializeSparseOp : public DeserializeSparseOpBase<U> {
             0 /* output indices index */, 2 /* output shape index */);
     context->set_output(1, output.values());
   }
+
+ protected:
+  Status Deserialize(const T& serialized, Tensor* result);
+
+  Status GetAndValidateSparseTensor(
+      const T& serialized_indices, const T& serialized_values,
+      const T& serialized_shape, DataType values_dtype, int index,
+      Tensor* output_indices, Tensor* output_values, Tensor* output_shape) {
+    // Deserialize and validate the indices.
+    TF_RETURN_IF_ERROR(this->Deserialize(serialized_indices, output_indices));
+    if (!TensorShapeUtils::IsMatrix(output_indices->shape())) {
+      return errors::InvalidArgument(
+          "Expected serialized_sparse[", index,
+          ", 0] to represent an index matrix but received shape ",
+          output_indices->shape().DebugString());
+    }
+    int64 num_entries = output_indices->dim_size(0);
+    int rank = output_indices->dim_size(1);
+
+    // Deserialize and validate the values.
+    TF_RETURN_IF_ERROR(this->Deserialize(serialized_values, output_values));
+    if (!TensorShapeUtils::IsVector(output_values->shape())) {
+      return errors::InvalidArgument(
+          "Expected serialized_sparse[", index,
+          ", 1] to represent a values vector but received shape ",
+          output_values->shape().DebugString());
+    }
+    if (values_dtype != output_values->dtype()) {
+      return errors::InvalidArgument(
+          "Requested SparseTensor of type ", DataTypeString(values_dtype),
+          " but SparseTensor[", index,
+          "].values.dtype() == ", DataTypeString(output_values->dtype()));
+    }
+    if (num_entries != output_values->dim_size(0)) {
+      return errors::InvalidArgument(
+          "Expected row counts of SparseTensor[", index,
+          "].indices and SparseTensor[", index,
+          "].values to match but they do not: ", num_entries, " vs. ",
+          output_values->dim_size(0));
+    }
+
+    // Deserialize and validate the shape.
+    TF_RETURN_IF_ERROR(this->Deserialize(serialized_shape, output_shape));
+    if (!TensorShapeUtils::IsVector(output_shape->shape())) {
+      return errors::InvalidArgument(
+          "Expected serialized_sparse[", index,
+          ", 1] to be a shape vector but its shape is ",
+          output_shape->shape().DebugString());
+    }
+    if (rank != output_shape->dim_size(0)) {
+      return errors::InvalidArgument("Expected column counts of SparseTensor[",
+                                     index,
+                                     "].indices to match size of SparseTensor[",
+                                     index, "].shape but they do not: ", rank,
+                                     " vs. ", output_shape->dim_size(0));
+    }
+    return Status::OK();
+  }
+
+  DataType dtype_;
 };
 
 template <>
-Status DeserializeSparseOpBase<string>::Deserialize(const string& serialized,
-                                                    Tensor* result) {
+Status DeserializeSparseOp<string>::Deserialize(const string& serialized,
+                                                Tensor* result) {
   TensorProto proto;
   if (!ParseProtoUnlimited(&proto, serialized)) {
     return errors::InvalidArgument("Could not parse serialized proto");
@@ -484,39 +533,24 @@ Status DeserializeSparseOpBase<string>::Deserialize(const string& serialized,
   return Status::OK();
 }
 
-#define REGISTER_KERNELS(type)                                        \
-  REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")                   \
-                              .Device(DEVICE_CPU)                     \
-                              .TypeConstraint<type>("dtype")          \
-                              .TypeConstraint<string>("Tserialized"), \
-                          DeserializeSparseOp<type, string>)
-
-TF_CALL_ALL_TYPES(REGISTER_KERNELS);
-#undef REGISTER_KERNELS
+REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<string>("Tserialized"),
+                        DeserializeSparseOp<string>)
 
-#define REGISTER_KERNELS(type)                                \
-  REGISTER_KERNEL_BUILDER(Name("DeserializeManySparse")       \
-                              .Device(DEVICE_CPU)             \
-                              .TypeConstraint<type>("dtype"), \
-                          DeserializeSparseOp<type, string>)
-
-TF_CALL_ALL_TYPES(REGISTER_KERNELS);
-#undef REGISTER_KERNELS
+REGISTER_KERNEL_BUILDER(Name("DeserializeManySparse").Device(DEVICE_CPU),
+                        DeserializeSparseOp<string>)
 
 template <>
-Status DeserializeSparseOpBase<Variant>::Deserialize(const Variant& serialized,
-                                                     Tensor* result) {
+Status DeserializeSparseOp<Variant>::Deserialize(const Variant& serialized,
+                                                 Tensor* result) {
   *result = *serialized.get<Tensor>();
   return Status::OK();
 }
 
-#define REGISTER_KERNELS(type)                                         \
-  REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")                    \
-                              .Device(DEVICE_CPU)                      \
-                              .TypeConstraint<type>("dtype")           \
-                              .TypeConstraint<Variant>("Tserialized"), \
-                          DeserializeSparseOp<type, Variant>)
+REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<Variant>("Tserialized"),
+                        DeserializeSparseOp<Variant>)
 
-TF_CALL_ALL_TYPES(REGISTER_KERNELS);
-#undef REGISTER_KERNELS
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index 0ea74c38b1..e816c282c8 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@@ -69,6 +69,21 @@ class SparseTensor {
     CHECK_EQ(shape.size(), dims_) << "Shape rank must be SparseTensor rank.";
   }
 
+  SparseTensor(const SparseTensor& other)
+      : SparseTensor(other.ix_, other.vals_, other.shape_, other.order_) {}
+
+  SparseTensor(SparseTensor&& other)
+      : SparseTensor(std::move(other.ix_), std::move(other.vals_),
+                     std::move(other.shape_), std::move(other.order_)) {}
+
+  SparseTensor& operator=(const SparseTensor& other) {
+    ix_ = other.ix_;
+    vals_ = other.vals_;
+    shape_ = other.shape_;
+    order_ = other.order_;
+    return *this;
+  }
+
   std::size_t num_entries() const { return ix_.dim_size(0); }
 
   int dims() const { return shape_.size(); }
-- 
GitLab


From f6bf99e0c240b070a5e67f1568bfa3e1504c4185 Mon Sep 17 00:00:00 2001
From: "Joshua V. Dillon" <jvdillon@google.com>
Date: Fri, 8 Dec 2017 12:03:57 -0800
Subject: [PATCH 0810/1225] Add unittest for
 `tf.contrib.distributions.MixtureSameFamily` `batch_shape`.

PiperOrigin-RevId: 178407717
---
 .../python/kernel_tests/mixture_same_family_test.py   | 11 +++++++++++
 .../distributions/python/ops/mixture_same_family.py   |  6 +++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py
index ece6bc077d..ff6092fc26 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py
@@ -45,6 +45,17 @@ class MixtureSameFamilyTest(test_util.VectorDistributionTestHelpers,
       self.assertEqual([4, 5], x.shape)
       self.assertEqual([4, 5], log_prob_x.shape)
 
+  def testSampleAndLogProbBatch(self):
+    with self.test_session():
+      gm = mixture_same_family_lib.MixtureSameFamily(
+          mixture_distribution=categorical_lib.Categorical(probs=[[0.3, 0.7]]),
+          components_distribution=normal_lib.Normal(
+              loc=[[-1., 1]], scale=[[0.1, 0.5]]))
+      x = gm.sample([4, 5], seed=42)
+      log_prob_x = gm.log_prob(x)
+      self.assertEqual([4, 5, 1], x.shape)
+      self.assertEqual([4, 5, 1], log_prob_x.shape)
+
   def testSampleAndLogProbShapesBroadcastMix(self):
     mix_probs = np.float32([.3, .7])
     bern_probs = np.float32([[.4, .6], [.25, .75]])
diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
index 0623b2c726..0ca236c376 100644
--- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
@@ -320,10 +320,10 @@ class MixtureSameFamily(distribution.Distribution):
         return array_ops.shape(d.batch_shape_tensor())[0]
       dist_batch_ndims = _get_ndims(self)
       cat_batch_ndims = _get_ndims(self.mixture_distribution)
-      pad_ndims = distribution_util.pick_vector(
+      pad_ndims = array_ops.where(
           self.mixture_distribution.is_scalar_batch(),
-          [dist_batch_ndims],
-          [dist_batch_ndims - cat_batch_ndims])[0]
+          dist_batch_ndims,
+          dist_batch_ndims - cat_batch_ndims)
       s = array_ops.shape(x)
       x = array_ops.reshape(x, shape=array_ops.concat([
           s[:-1],
-- 
GitLab


From 213d4be96d63ab92ef4b659f5881a10b450023db Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 12:27:03 -0800
Subject: [PATCH 0811/1225] [XLA:GPU] Support conditional in GPU backend.

PiperOrigin-RevId: 178410596
---
 .../compiler/xla/service/gpu/ir_emitter.cc    | 31 +++++++++++++++++++
 .../compiler/xla/service/gpu/ir_emitter.h     |  2 ++
 .../xla/service/gpu/ir_emitter_unnested.cc    |  5 +++
 tensorflow/compiler/xla/tests/BUILD           |  7 +++--
 4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
index 44a314f8c4..f64e93024f 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
@@ -727,6 +727,37 @@ Status IrEmitter::HandleRng(HloInstruction* random) {
       .EmitLoop(IrName(random));
 }
 
+Status IrEmitter::HandleConditional(HloInstruction* conditional) {
+  auto pred = conditional->operand(0);
+  auto true_arg = conditional->operand(1);
+  auto false_arg = conditional->operand(2);
+
+  llvm::Value* conditional_result = GetBasePointer(*conditional);
+
+  llvm::LoadInst* pred_value = ir_builder_.CreateLoad(
+      GetBasePointer(*pred),
+      llvm_ir::AsStringRef(IrName(conditional, "load_predicate_value")));
+  llvm::Value* pred_cond = ir_builder_.CreateICmpNE(
+      pred_value,
+      llvm::ConstantInt::get(llvm_ir::PrimitiveTypeToIrType(PRED, module_), 0),
+      llvm_ir::AsStringRef(IrName(conditional, "boolean_predicate")));
+  llvm_ir::LlvmIfData if_data = llvm_ir::EmitIfThenElse(
+      pred_cond, IrName(conditional, "if_then_else"), &ir_builder_);
+
+  SetToFirstInsertPoint(if_data.true_block, &ir_builder_);
+  TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
+      *conditional->true_computation(), {GetBasePointer(*true_arg)},
+      conditional_result));
+
+  SetToFirstInsertPoint(if_data.false_block, &ir_builder_);
+  TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
+      *conditional->false_computation(), {GetBasePointer(*false_arg)},
+      conditional_result));
+
+  SetToFirstInsertPoint(if_data.after_block, &ir_builder_);
+  return Status::OK();
+}
+
 llvm_ir::IrArray::Index IrEmitter::EmitOperandArrayLoopNest(
     const llvm_ir::IrArray& operand_array, int64 reduction_dimension,
     tensorflow::StringPiece name_suffix, llvm_ir::ForLoopNest* loop_nest) {
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
index 080e1f1fa4..08bbbe36c7 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
@@ -95,6 +95,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   Status HandleCall(HloInstruction* call) override;
   Status HandleCustomCall(HloInstruction* custom_call) override;
   Status HandleRng(HloInstruction* random) override;
+  Status HandleConditional(HloInstruction* conditional) override;
 
   Status FinishVisit(HloInstruction* root) override { return Status::OK(); }
 
@@ -234,6 +235,7 @@ class IrEmitterUnnested : public IrEmitter {
   // IrEmitterUnnested handles the following instructions differently from
   // IrEmitter.
   Status HandleCopy(HloInstruction* copy) override;
+  Status HandleConditional(HloInstruction* conditional) override;
   Status HandleConvolution(HloInstruction* convolution) override;
   Status HandleDot(HloInstruction* dot) override;
   Status HandleFusion(HloInstruction* fusion) override;
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index ec7f3c75c4..8dbc90ee1f 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -261,6 +261,11 @@ Status IrEmitterUnnested::HandleDot(HloInstruction* dot) {
   return IrEmitter::HandleDot(dot);
 }
 
+Status IrEmitterUnnested::HandleConditional(HloInstruction* conditional) {
+  thunk_sequence_->push_back(BuildKernelThunk(conditional));
+  return IrEmitter::HandleConditional(conditional);
+}
+
 Status IrEmitterUnnested::HandleConvolution(HloInstruction* convolution) {
   if (ImplementedAsDnnConvolution(*convolution)) {
     thunk_sequence_->emplace_back(BuildConvolutionThunk(convolution));
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 63d8b30368..6f03f1a4e0 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -440,8 +440,11 @@ xla_test(
 xla_test(
     name = "conditional_test",
     srcs = ["conditional_test.cc"],
-    # Currently, Conditional is supported only in the CPU backend.
-    backends = ["cpu"],
+    # Currently, Conditional is supported only in CPU and GPU backends.
+    backends = [
+        "cpu",
+        "gpu",
+    ],
     deps = [
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/client:computation_builder",
-- 
GitLab


From 1afc6149ed0649971d83fe8e9748056285dcf332 Mon Sep 17 00:00:00 2001
From: Ian Langmore <langmore@google.com>
Date: Fri, 8 Dec 2017 13:22:45 -0800
Subject: [PATCH 0812/1225] BUGFIX:  Call convert_to_tensor on input in
 fill_triangular.  Also change use placeholder_with_default in unit test.

PiperOrigin-RevId: 178417998
---
 tensorflow/python/kernel_tests/distributions/util_test.py | 2 +-
 tensorflow/python/ops/distributions/util.py               | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/distributions/util_test.py b/tensorflow/python/kernel_tests/distributions/util_test.py
index 8fd26a1c9a..5950241141 100644
--- a/tensorflow/python/kernel_tests/distributions/util_test.py
+++ b/tensorflow/python/kernel_tests/distributions/util_test.py
@@ -587,7 +587,7 @@ class FillTriangularTest(test.TestCase):
     x_ = np.asarray(x_)
     with self.test_session() as sess:
       static_shape = None if use_deferred_shape else x_.shape
-      x_pl = array_ops.placeholder(dtype=x_.dtype, shape=static_shape)
+      x_pl = array_ops.placeholder_with_default(x_, shape=static_shape)
       # Add `zeros_like(x)` such that x's value and gradient are identical. We
       # do this so we can ensure each gradient value is mapped to the right
       # gradient location.  (Not doing this means the gradient wrt `x` is simple
diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py
index 41b86f7940..28c74bf981 100644
--- a/tensorflow/python/ops/distributions/util.py
+++ b/tensorflow/python/ops/distributions/util.py
@@ -751,6 +751,7 @@ def fill_triangular(x, upper=False, name=None):
   """
 
   with ops.name_scope(name, "fill_triangular", values=[x]):
+    x = ops.convert_to_tensor(x, name="x")
     if x.shape.with_rank_at_least(1)[-1].value is not None:
       # Formula derived by solving for n: m = n(n+1)/2.
       m = np.int32(x.shape[-1].value)
-- 
GitLab


From dc04e89bc6f0421bf77ac69f21c1f2f57618f53c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 13:25:49 -0800
Subject: [PATCH 0813/1225] Adding support for new TensorFlow operators. Also
 adding a transformation to convert an ExpandDims into a Reshape op.

PiperOrigin-RevId: 178418377
---
 tensorflow/contrib/lite/toco/BUILD            |   1 +
 .../convert_expanddims_to_reshape.cc          | 101 +++++++++++
 .../graph_transformations.h                   |   1 +
 .../propagate_array_data_types.cc             |  11 +-
 .../propagate_fixed_sizes.cc                  |  19 +-
 .../graph_transformations/remove_unused_op.cc |   5 +-
 .../resolve_mean_attributes.cc                |  26 ++-
 .../resolve_strided_slice_attributes.cc       |   4 +
 .../contrib/lite/toco/import_tensorflow.cc    | 171 ++++++++++++++++--
 tensorflow/contrib/lite/toco/model.h          | 137 ++++++++++++++
 tensorflow/contrib/lite/toco/toco_tooling.cc  |   1 +
 tensorflow/contrib/lite/toco/tooling_util.cc  |   9 +
 12 files changed, 452 insertions(+), 34 deletions(-)
 create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc

diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD
index 0bad7ddb6e..78c036fa77 100644
--- a/tensorflow/contrib/lite/toco/BUILD
+++ b/tensorflow/contrib/lite/toco/BUILD
@@ -169,6 +169,7 @@ cc_library(
 cc_library(
     name = "graph_transformations",
     srcs = [
+        "graph_transformations/convert_expanddims_to_reshape.cc",
         "graph_transformations/convert_pure_conv_to_depthwise.cc",
         "graph_transformations/create_im2col_arrays.cc",
         "graph_transformations/dequantize.cc",
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc
new file mode 100644
index 0000000000..3bde9b0169
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc
@@ -0,0 +1,101 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "absl/strings/str_cat.h"
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) {
+  auto expand_it = model->operators.begin() + op_index;
+  if (expand_it->get()->type != OperatorType::kExpandDims) {
+    return false;
+  }
+  ExpandDimsOperator* expand_op =
+      static_cast<ExpandDimsOperator*>(expand_it->get());
+  CHECK_EQ(expand_op->inputs.size(), 2);
+  CHECK_EQ(expand_op->outputs.size(), 1);
+
+  const auto& input_array = *model->arrays[expand_op->inputs[0]];
+  if (!input_array.has_shape()) {
+    // Yield until input dims have been resolved.
+    return false;
+  }
+  if (input_array.shape().dimensions_count() == 0) {
+    // Input array cannot be 0-D.
+    // (Unsure if this is TF behavior, but was required to get a test to pass.)
+    return false;
+  }
+
+  const auto& axis_array = *model->arrays[expand_op->inputs[1]];
+  if (!axis_array.has_shape()) {
+    // Yield until input axis array shape has been resolved.
+    return false;
+  }
+  CHECK_EQ(RequiredBufferSizeForShape(axis_array.shape()), 1);
+  if (!axis_array.buffer) {
+    // Yield until the input axis array is constant
+    return false;
+  }
+  int axis = axis_array.GetBuffer<ArrayDataType::kInt32>().data[0];
+  std::vector<int> reshape_dims(input_array.shape().dims());
+  if (axis < 0) {
+    axis = reshape_dims.size();
+  }
+  reshape_dims.insert(reshape_dims.begin() + axis, 1);
+
+  // The input tensor has shape, and the axis input is constant. We can now
+  // replace ExpandDims with a Reshape.
+  auto* reshape_op = new TensorFlowReshapeOperator;
+
+  // Copy inputs
+  reshape_op->inputs.push_back(expand_op->inputs[0]);
+  reshape_op->outputs = expand_op->outputs;
+
+  // Create a new input array
+  string axis_array_name = expand_op->inputs[1];
+  string shape_array_name = toco::AvailableArrayName(*model, axis_array_name);
+  Array& shape_array = model->GetOrCreateArray(shape_array_name);
+  *(shape_array.mutable_shape()->mutable_dims()) = {
+      1, static_cast<int>(reshape_dims.size())};
+  reshape_op->inputs.push_back(shape_array_name);
+  shape_array.data_type = ArrayDataType::kInt32;
+  auto& shape_buffer = shape_array.GetMutableBuffer<ArrayDataType::kInt32>();
+  shape_buffer.data = reshape_dims;
+
+  // Delete axis array if unused
+  if (IsDiscardableArray(*model, axis_array_name) &&
+      CountOpsWithInput(*model, axis_array_name) == 1 &&
+      !GetOpWithOutput(*model, axis_array_name)) {
+    model->arrays.erase(axis_array_name);
+  }
+
+  // Replace the operator in the graph.
+  const auto reshape_it = model->operators.emplace(expand_it, reshape_op);
+  expand_it = reshape_it + 1;
+  CHECK_EQ(expand_it->get(), expand_op);
+  model->operators.erase(expand_it);
+
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
index 9ad1b9622f..c1dc41170c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
@@ -112,6 +112,7 @@ void RunGraphTransformations(Model* model, const string& message,
   };
 
 // List of all graph transformations
+DECLARE_GRAPH_TRANSFORMATION(ConvertExpandDimsToReshape)
 DECLARE_GRAPH_TRANSFORMATION(ConvertPureConvToDepthwise)
 DECLARE_GRAPH_TRANSFORMATION(EnsureBiasVectors)
 DECLARE_GRAPH_TRANSFORMATION(FuseActivationFunctions)
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
index 550e0408aa..1d92bcbccd 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
@@ -59,13 +59,15 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
              op->type == OperatorType::kTensorFlowGreaterEqual) {
     // These operators unconditionally produce bool outputs
     SetDataTypeForAllOutputs(model, op, ArrayDataType::kBool);
-  } else if (op->type == OperatorType::kTensorFlowShape) {
+  } else if (op->type == OperatorType::kRank ||
+             op->type == OperatorType::kTensorFlowShape) {
     // These operators are assumed to produce int32 outputs.
     SetDataTypeForAllOutputs(model, op, ArrayDataType::kInt32);
   } else if (op->type == OperatorType::kTensorFlowSplit ||
-             op->type == OperatorType::kTensorFlowConcat) {
+             op->type == OperatorType::kTensorFlowConcat ||
+             op->type == OperatorType::kFill) {
     // These operators produce an output with the same type as their 2nd input
-    CHECK_GT(op->inputs.size(), 1);
+    CHECK_GE(op->inputs.size(), 2);
     const ArrayDataType data_type = model->arrays[op->inputs[1]]->data_type;
     SetDataTypeForAllOutputs(model, op, data_type);
   } else if (op->type == OperatorType::kCast) {
@@ -83,6 +85,9 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
       auto data_type = unsupported_op->output_data_types[i];
       model->arrays[output]->data_type = data_type;
     }
+  } else if (op->type == OperatorType::kExpandDims) {
+    // Yield on ExpandDim until it is converted to Reshape
+    return false;
   } else {
     // These operators produce outputs with the same type as their 1st input
     CHECK_GT(op->inputs.size(), 0);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index f29127c6b0..4530806ede 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -699,7 +699,10 @@ void ProcessSpaceToBatchNDOperator(Model* model, SpaceToBatchNDOperator* op) {
     return;
   }
   const auto& input_shape = input_array.shape();
-  CHECK_EQ(input_shape.dimensions_count(), 4);
+  if (input_shape.dimensions_count() != 4) {
+    // This method only handles input dimensions of 4
+    return;
+  }
   const auto input_height = input_shape.dims(1);
   const auto input_width = input_shape.dims(2);
 
@@ -978,6 +981,8 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
     case OperatorType::kSub:
     case OperatorType::kMul:
     case OperatorType::kDiv:
+    case OperatorType::kFloorDiv:
+    case OperatorType::kFloorMod:
     case OperatorType::kTensorFlowLess:
     case OperatorType::kTensorFlowLessEqual:
     case OperatorType::kTensorFlowGreater:
@@ -989,6 +994,10 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
     case OperatorType::kConv:
       ProcessConvOperator(model, static_cast<ConvOperator*>(op));
       break;
+    case OperatorType::kTransposeConv:
+      // Unimplemented, hopefully another graph transformation will drop it or
+      // rewrite it.
+      break;
     case OperatorType::kDepthwiseConv:
       ProcessDepthwiseConvOperator(model,
                                    static_cast<DepthwiseConvOperator*>(op));
@@ -1063,8 +1072,14 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
       // a more general non-depth concatenation that will hopefully be dropped,
       // or else at the moment we will abort.
       break;
+    case OperatorType::kExpandDims:
+    case OperatorType::kFill:
+    case OperatorType::kRange:
+    case OperatorType::kRank:
     case OperatorType::kTensorFlowShape:
-      // Unimplemented, hopefully another graph transformation will drop it or
+    case OperatorType::kStack:
+    case OperatorType::kTranspose:
+      // Unimplemented. Hopefully another graph transformation will drop it or
       // rewrite it.
       break;
     case OperatorType::kReorderAxes:
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
index 674a46815b..e6cca8acf3 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
@@ -47,10 +47,7 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
     bool found_output_as_rnn_state_array = false;
     for (const auto& rnn_state : model->flags.rnn_states()) {
       if (output == rnn_state.state_array()) {
-        CHECK(op->type == OperatorType::kTensorFlowUnsupported);
-        CHECK_EQ(static_cast<const TensorFlowUnsupportedOperator*>(op)
-                     ->tensorflow_op,
-                 "Fill");
+        CHECK(op->type == OperatorType::kFill);
         found_output_as_rnn_state_array = true;
         break;
       }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc
index d25c773f19..5d6ac331be 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc
@@ -29,22 +29,28 @@ bool ResolveMeanAttributes::Run(Model* model, std::size_t op_index) {
   if (mean_op->type != OperatorType::kMean) return false;
   auto* op = static_cast<MeanOperator*>(mean_op);
 
-  if (!op->reduction_indices.empty()) return false;
+  if (!op->reduction_indices.empty()) {
+    // Attributes already resolved
+    return false;
+  }
   if (op->inputs.size() != 2) return false;
   if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
 
   const auto& indices_array = *model->arrays[op->inputs[1]];
   if (!indices_array.has_shape()) return false;
 
-  op->reduction_indices = indices_array.GetBuffer<ArrayDataType::kInt32>().data;
-
-  // At the moment, we only support simultaneous reduction over width and
-  // height. This is mainly limited by the fact that currently, the runtime
-  // arrays are always 4-dimensional.
-  CHECK_EQ(op->reduction_indices.size(), 2);
-  CHECK((op->reduction_indices[0] == 1 && op->reduction_indices[1] == 2) ||
-        (op->reduction_indices[0] == 2 && op->reduction_indices[1] == 1));
-
+  // We only support simultaneous reduction over width and height.
+  std::vector<int> reduction_indices =
+      indices_array.GetBuffer<ArrayDataType::kInt32>().data;
+  if (reduction_indices.size() != 2) {
+    return false;
+  }
+  if (!((reduction_indices[0] == 1 && reduction_indices[1] == 2) ||
+        (reduction_indices[0] == 2 && reduction_indices[1] == 1))) {
+    return false;
+  }
+
+  op->reduction_indices = reduction_indices;
   return true;
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
index 851511268d..97946182ef 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
@@ -39,6 +39,10 @@ bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) {
 
   const auto& start_array = *model->arrays[op->inputs[1]];
   if (!start_array.has_shape()) return false;
+  if (toco::RequiredBufferSizeForShape(start_array.shape()) != 4) {
+    // Only 4D arrays are supported for now.
+    return false;
+  }
 
   const auto& stop_array = *model->arrays[op->inputs[2]];
   if (!stop_array.has_shape()) return false;
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index a021fafce6..34d38f1fcb 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -25,7 +25,6 @@ limitations under the License.
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
-//#include "absl/strings/string_view_utils.h"
 #include "absl/strings/strip.h"
 #include "tensorflow/contrib/lite/toco/model.h"
 #include "tensorflow/contrib/lite/toco/model_flags.pb.h"
@@ -1135,18 +1134,6 @@ void ConvertNoOpOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {}
 
-ArrayDataType GetArrayDataType(tensorflow::DataType tf_data_type) {
-  if (tf_data_type == DT_UINT8) {
-    return ArrayDataType::kUint8;
-  } else if (tf_data_type == DT_INT32) {
-    return ArrayDataType::kInt32;
-  } else if (tf_data_type == DT_FLOAT) {
-    return ArrayDataType::kFloat;
-  } else {
-    return ArrayDataType::kNone;
-  }
-}
-
 void ConvertCastOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {
@@ -1155,8 +1142,8 @@ void ConvertCastOperator(const NodeDef& node,
   const auto tf_src_dtype = GetDataTypeAttr(node, "SrcT");
   const auto tf_dst_dtype = GetDataTypeAttr(node, "DstT");
   auto* op = new CastOperator;
-  op->src_data_type = GetArrayDataType(tf_src_dtype);
-  op->dst_data_type = GetArrayDataType(tf_dst_dtype);
+  op->src_data_type = ConvertDataType(tf_src_dtype);
+  op->dst_data_type = ConvertDataType(tf_dst_dtype);
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
   model->operators.emplace_back(op);
@@ -1374,6 +1361,142 @@ void ConvertSvdfOperator(const NodeDef& node,
   model->operators.emplace_back(op);
 }
 
+// This is just bare bones support to get the shapes to propagate.
+void ConvertTransposeConvOperator(const NodeDef& node,
+                                  const TensorFlowImportFlags& tf_import_flags,
+                                  Model* model) {
+  CHECK_EQ(node.op(), "Conv2DBackpropInput");
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 3);
+  auto* op = new TransposeConvOperator;
+  op->inputs.push_back(node.input(2));
+  op->inputs.push_back(node.input(1));
+  op->inputs.push_back(node.input(0));
+  op->outputs.push_back(node.name());
+  const auto& strides = GetListAttr(node, "strides");
+  CHECK_EQ(strides.i_size(), 4);
+  CHECK_EQ(strides.i(0), 1);
+  op->stride_height = strides.i(1);
+  op->stride_width = strides.i(2);
+  CHECK_EQ(strides.i(3), 1);
+  auto const& padding = GetStringAttr(node, "padding");
+  if (padding == "SAME") {
+    op->padding.type = PaddingType::kSame;
+  } else if (padding == "VALID") {
+    op->padding.type = PaddingType::kValid;
+  } else {
+    LOG(FATAL) << "Only SAME and VALID padding supported on "
+                  "Conv2DBackpropInput nodes.";
+  }
+  model->operators.emplace_back(op);
+}
+
+void ConvertExpandDimsOperator(const NodeDef& node,
+                               const TensorFlowImportFlags& tf_import_flags,
+                               Model* model) {
+  CHECK_EQ(node.op(), "ExpandDims");
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  auto* op = new ExpandDimsOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertFillOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
+  CHECK_EQ(node.op(), "Fill");
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  auto* op = new FillOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertFloorDivOperator(const NodeDef& node,
+                             const TensorFlowImportFlags& tf_import_flags,
+                             Model* model) {
+  CHECK_EQ(node.op(), "FloorDiv");
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  auto* op = new FloorDivOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertFloorModOperator(const NodeDef& node,
+                             const TensorFlowImportFlags& tf_import_flags,
+                             Model* model) {
+  CHECK(node.op() == "FloorMod");
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  auto* op = new FloorModOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertRangeOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
+  CHECK_EQ(node.op(), "Range");
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 3);
+  auto* op = new RangeOperator;
+  if (HasAttr(node, "Tidx")) {
+    const auto dtype = toco::GetDataTypeAttr(node, "Tidx");
+    CHECK(dtype == DT_UINT8 || dtype == DT_INT32 || dtype == DT_INT64 ||
+          dtype == DT_FLOAT);
+    op->dtype = ConvertDataType(dtype);
+  }
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->inputs.push_back(node.input(2));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertRankOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
+  CHECK_EQ(node.op(), "Rank");
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  auto* op = new RankOperator;
+  op->inputs.push_back(node.input(0));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertStackOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
+  CHECK((node.op() == "Stack") || (node.op() == "Pack"));
+  auto* op = new StackOperator;
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
+  CHECK_GE(num_inputs, 1);
+  CHECK_EQ(num_inputs, GetIntAttr(node, "N"));
+  for (int i = 0; i < num_inputs; ++i) {
+    op->inputs.push_back(node.input(i));
+  }
+  // Both "Stack" and "Pack" have the "axis" attribute.
+  op->axis = GetIntAttr(node, "axis");
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertTransposeOperator(const NodeDef& node,
+                              const TensorFlowImportFlags& tf_import_flags,
+                              Model* model) {
+  CHECK_EQ(node.op(), "Transpose");
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  auto* op = new TransposeOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
 // Some TensorFlow ops only occur in graph cycles, representing
 // control flow. We do not currently support control flow, so we wouldn't
 // be able to fully support such graphs, including performing inference,
@@ -1568,6 +1691,8 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
       ConvertConstOperator(node, tf_import_flags, model);
     } else if (node.op() == "Conv2D") {
       ConvertConvOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Conv2DBackpropInput") {
+      ConvertTransposeConvOperator(node, tf_import_flags, model);
     } else if (node.op() == "DepthwiseConv2dNative") {
       ConvertDepthwiseConvOperator(node, tf_import_flags, model);
     } else if (node.op() == "DepthToSpace") {
@@ -1690,6 +1815,22 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
       ConvertSvdfOperator(node, tf_import_flags, model);
     } else if (node.op() == "NextIteration") {
       ConvertOperatorSpecialCasedAsRNNBackEdge(node, tf_import_flags, model);
+    } else if (node.op() == "ExpandDims") {
+      ConvertExpandDimsOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Fill") {
+      ConvertFillOperator(node, tf_import_flags, model);
+    } else if (node.op() == "FloorDiv") {
+      ConvertFloorDivOperator(node, tf_import_flags, model);
+    } else if (node.op() == "FloorMod") {
+      ConvertFloorModOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Range") {
+      ConvertRangeOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Rank") {
+      ConvertRankOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Stack" || node.op() == "Pack") {
+      ConvertStackOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Transpose") {
+      ConvertTransposeOperator(node, tf_import_flags, model);
     } else {
       ConvertUnsupportedOperator(node, tf_import_flags, model);
     }
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 04b0813523..57911b1e89 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -41,6 +41,10 @@ enum class OperatorType {
   kSpaceToDepth,
   kDequantize,
   kDiv,
+  kExpandDims,
+  kFill,
+  kFloorDiv,
+  kFloorMod,
   kFullyConnected,
   kL2Normalization,
   kL2Pool,
@@ -50,17 +54,21 @@ enum class OperatorType {
   kMaxPool,
   kFakeQuant,
   kMul,
+  kRange,
+  kRank,
   kRelu,
   kRelu1,
   kRelu6,
   kSoftmax,
   kSub,
   kTanh,
+  kTransposeConv,
   kCast,
   kFloor,
   kGather,
   kResizeBilinear,
   kSpaceToBatchND,
+  kStack,
   kBatchToSpaceND,
   kPad,
   kStridedSlice,
@@ -98,6 +106,7 @@ enum class OperatorType {
   kTensorFlowSum,
   kTensorFlowSwitch,
   kTensorFlowTile,
+  kTranspose,
   // An unsupported TF operation. It's only needed to be able to represent TF
   // graph internally and is expected to be dropped by graph transformations.
   kTensorFlowUnsupported,
@@ -302,6 +311,10 @@ struct ConvOperator : Operator {
   Padding padding;
   int stride_width = 0;
   int stride_height = 0;
+  // A dilation_rate of 0 is invalid and this field is an optional attribute.
+  // Thus initializing it to 1 to allow default conv behavior when the
+  // attribute is not present.
+  int dilation_rate = 1;
 };
 
 // Depthwise-separable convolution operator.
@@ -754,6 +767,102 @@ struct SqueezeOperator : Operator {
   std::vector<int> squeeze_dims;
 };
 
+// Inputs:
+//   inputs[0]: required: the input activations array
+//   inputs[1]: required: the Conv weights
+//   channel.
+//
+// Outputs:
+//   outputs[0]: required: the output activations array
+//
+// TensorFlow equivalent: Conv2DBackpropInput
+struct TransposeConvOperator : Operator {
+  TransposeConvOperator() : Operator(OperatorType::kTransposeConv) {}
+  Padding padding;
+  int stride_width = 0;
+  int stride_height = 0;
+};
+
+// Given a tensor input, this operation inserts a dimension of 1 at the
+// dimension index axis of input's shape. The dimension index axis starts at
+// zero; if you specify a negative number for axis it is counted backward from
+// the end.
+//
+// Inputs:
+//   inputs[0]: required: input tensor
+//   inputs[1]: required: 0-D (scalar). Specifies the dimension index at which
+//   to expand the shape of input
+//
+// TensorFlow equivalent: ExpandDims
+struct ExpandDimsOperator : Operator {
+  ExpandDimsOperator() : Operator(OperatorType::kExpandDims) {}
+};
+
+// Ceates a tensor of shape dims and fills it with the given scalar value.
+// Output type will be the same as the given scalar value.
+//
+// Inputs:
+//   inputs[0]: required: 1-D (int32) - the shape of the output tensor
+//   inputs[1]: required: 0-D (scalar) - value to fill the tensor with
+//
+// TensorFlow equivalent: Fill
+struct FillOperator : Operator {
+  FillOperator() : Operator(OperatorType::kFill) {}
+};
+
+// Element-wise floor division operator.
+//
+// Inputs:
+//   inputs[0]: required: the left-hand side array
+//   inputs[1]: required: the right-hand side array
+//
+// TensorFlow equivalent: FloorDiv
+struct FloorDivOperator : Operator {
+  FloorDivOperator() : Operator(OperatorType::kFloorDiv) {}
+};
+
+// Element-wise floor mod operator.
+//
+// Inputs:
+//   inputs[0]: required: the left-hand side array
+//   inputs[1]: required: the right-hand side array
+//
+// TensorFlow equivalent: FloorMod
+struct FloorModOperator : Operator {
+  FloorModOperator() : Operator(OperatorType::kFloorMod) {}
+};
+
+// Creates a sequence of numbers that begins at start and extends by increments
+// of delta up to but not including limit.
+//
+// The dtype of the resulting tensor is inferred from the inputs unless it is
+// provided explicitly.
+//
+// Inputs:
+//   inputs[0]: required: the start
+//   inputs[1]: required: the limit
+//   inputs[2]: required: the delta
+//
+// TensorFlow equivalent: Range
+struct RangeOperator : Operator {
+  RangeOperator() : Operator(OperatorType::kRange) {}
+  ArrayDataType dtype = ArrayDataType::kNone;
+};
+
+// Rank operator. Extracts the rank of the tensor.
+//
+// Inputs:
+//   inputs[0]: required: the input array
+//
+// This operation outputs a 0-D integer tensor representing the rank of
+// the input.
+//
+// TensorFlow equivalent: Rank.  We currently assume that the output is int32
+// and not int64.  The output type could be stored herein.
+struct RankOperator : Operator {
+  RankOperator() : Operator(OperatorType::kRank) {}
+};
+
 // Element-wise reciprocal-square-root (x^-0.5) operator.
 //
 // Inputs:
@@ -764,6 +873,21 @@ struct TensorFlowRsqrtOperator : Operator {
   TensorFlowRsqrtOperator() : Operator(OperatorType::kTensorFlowRsqrt) {}
 };
 
+// Stacks a list of rank-R tensors into one rank-(R+1) tensor.
+//
+// Packs the list of tensors in values into a tensor with rank one higher than
+// each tensor in values, by packing them along the axis dimension. Given a list
+// of length N of tensors of shape (A, B, C);.
+//
+// Inputs: this operator accepts any number >= 1 of inputs.
+//   inputs[i]: the i-th array to merge.
+//
+// TensorFlow equivalent: Stack or Pack
+struct StackOperator : Operator {
+  StackOperator() : Operator(OperatorType::kStack) {}
+  int axis = 0;
+};
+
 // Shape operator. Extracts the shape of the tensor.
 //
 // Inputs:
@@ -798,6 +922,19 @@ struct TensorFlowSquareOperator : Operator {
   TensorFlowSquareOperator() : Operator(OperatorType::kTensorFlowSquare) {}
 };
 
+// Transposes a tensor.
+//
+// By default, this operation performs a regular matrix transpose on 2-D input
+// tensors.
+//
+// Inputs:
+//   inputs[0]: required: the input array
+//
+// TensorFlow equivalent: Transpose
+struct TransposeOperator : Operator {
+  TransposeOperator() : Operator(OperatorType::kTranspose) {}
+};
+
 // Element-wise subtraction operator.
 //
 // Inputs:
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index 161b94f1d6..a93fade6af 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -51,6 +51,7 @@ void CheckUnsupportedOperations(const Model& model) {
 void MakeGeneralGraphTransformationsSet(
     GraphTransformationsSet* transformations) {
   CHECK(transformations->empty());
+  transformations->Add(new ConvertExpandDimsToReshape);
   transformations->Add(new ResolveReshapeAttributes);
   transformations->Add(new PropagateArrayDataTypes);
   transformations->Add(new PropagateFixedSizes);
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 7413cb9dd0..65bf1f215a 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -223,6 +223,10 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(Tanh)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowAll)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowAssert)
+    HANDLE_OPERATORTYPENAME_CASE(ExpandDims)
+    HANDLE_OPERATORTYPENAME_CASE(Fill)
+    HANDLE_OPERATORTYPENAME_CASE(FloorMod)
+    HANDLE_OPERATORTYPENAME_CASE(FloorDiv)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowGreater)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowGreaterEqual)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowIdentity)
@@ -236,6 +240,9 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowMinimum)
     HANDLE_OPERATORTYPENAME_CASE(Pad)
     HANDLE_OPERATORTYPENAME_CASE(StridedSlice)
+    HANDLE_OPERATORTYPENAME_CASE(Stack)
+    HANDLE_OPERATORTYPENAME_CASE(Range)
+    HANDLE_OPERATORTYPENAME_CASE(Rank)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowReshape)
     HANDLE_OPERATORTYPENAME_CASE(Squeeze)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowRsqrt)
@@ -248,6 +255,8 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(Sub)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowSum)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowTile)
+    HANDLE_OPERATORTYPENAME_CASE(Transpose)
+    HANDLE_OPERATORTYPENAME_CASE(TransposeConv)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowConcat)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowConcatV2)
     HANDLE_OPERATORTYPENAME_CASE(Cast)
-- 
GitLab


From 2f16f3afdcde16cf0de2f051c57b32cd61a12ec0 Mon Sep 17 00:00:00 2001
From: Bjarke Hammersholt Roune <broune@google.com>
Date: Fri, 8 Dec 2017 13:37:33 -0800
Subject: [PATCH 0814/1225] Add bfloat16 support to the CPU backend.

 * A few ops, in particular Convert, directly support bfloat16.
 * Added an HLO pass HloElementTypeConverter which converts graphs away from bfloat16
   without changing the numerics, using Convert ops.

This can be improved in many ways, but the feature here is that one can run XLA graphs that use bfloat16 on the CPU backend and get the correct result.

PiperOrigin-RevId: 178419829
---
 tensorflow/compiler/xla/primitive_util.h      |   7 +
 tensorflow/compiler/xla/service/BUILD         |  16 +
 tensorflow/compiler/xla/service/cpu/BUILD     |   1 +
 .../compiler/xla/service/cpu/cpu_compiler.cc  |   2 +
 .../compiler/xla/service/cpu/ir_emitter.cc    |   2 +-
 .../xla/service/elemental_ir_emitter.cc       | 289 +++++++++++-------
 .../xla/service/hlo_element_type_converter.cc | 137 +++++++++
 .../xla/service/hlo_element_type_converter.h  |  49 +++
 .../compiler/xla/service/llvm_ir/llvm_util.cc |  12 +
 tensorflow/compiler/xla/tests/BUILD           |   2 -
 .../compiler/xla/tests/reduce_window_test.cc  |   2 +-
 tensorflow/compiler/xla/tests/test_utils.cc   |  16 +
 12 files changed, 416 insertions(+), 119 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/hlo_element_type_converter.cc
 create mode 100644 tensorflow/compiler/xla/service/hlo_element_type_converter.h

diff --git a/tensorflow/compiler/xla/primitive_util.h b/tensorflow/compiler/xla/primitive_util.h
index 19c6a13888..cb4583d198 100644
--- a/tensorflow/compiler/xla/primitive_util.h
+++ b/tensorflow/compiler/xla/primitive_util.h
@@ -26,6 +26,13 @@ limitations under the License.
 namespace xla {
 namespace primitive_util {
 
+// The number of exponent bits in a BF16 value.
+const int kBFloat16ExponentBits = 8;
+
+// The number of mantissa bits in a BF16 value. There is an implicit leading
+// 1, so there is an implicit additional bit of precision.
+const int kBFloat16MantissaBits = 7;
+
 // Returns the XLA primitive type (eg, F32) corresponding to the given
 // template parameter native type (eg, float).
 template <typename NativeT>
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 1023d3e5dc..baa4afde2d 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1891,6 +1891,22 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "hlo_element_type_converter",
+    srcs = ["hlo_element_type_converter.cc"],
+    hdrs = ["hlo_element_type_converter.h"],
+    deps = [
+        ":hlo",
+        ":hlo_evaluator",
+        ":hlo_pass",
+        ":hlo_query",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "device_memory_allocator",
     srcs = ["device_memory_allocator.cc"],
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 32abb1b559..fe537dfdf2 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -110,6 +110,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_constant_folding",
         "//tensorflow/compiler/xla/service:hlo_cse",
         "//tensorflow/compiler/xla/service:hlo_dce",
+        "//tensorflow/compiler/xla/service:hlo_element_type_converter",
         "//tensorflow/compiler/xla/service:hlo_ordering",
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 6c72ef6849..a476a75027 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -68,6 +68,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_constant_folding.h"
 #include "tensorflow/compiler/xla/service/hlo_cse.h"
 #include "tensorflow/compiler/xla/service/hlo_dce.h"
+#include "tensorflow/compiler/xla/service/hlo_element_type_converter.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_ordering.h"
@@ -318,6 +319,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
       [](const Shape&, const Shape&) { return true; },
       /*enable_dot_strength_reduction=*/false);
   pipeline.AddPass<HloCSE>(/*is_layout_sensitive=*/true);
+  pipeline.AddPass<HloElementTypeConverter>(BF16, F32);
   // Outline ops in the entry computation into calls to subcomputations.
   const int max_parallelism =
       module->config().intra_op_parallelism_threads() > 0
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 85d9668f89..dd027986b2 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -516,7 +516,7 @@ Status IrEmitter::HandleReduceWindow(HloInstruction* reduce_window) {
   HloComputation* function = reduce_window->to_apply();
   TF_RETURN_IF_ERROR(ElementTypesSameAndSupported(
       /*instruction=*/*reduce_window, /*operands=*/{operand},
-      /*supported_types=*/{F32}));
+      /*supported_types=*/{F32, BF16}));
 
   // TODO(b/31410564): Implement dilation for reduce-window.
   if (window_util::HasDilation(window)) {
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index b9407818cd..7e88bbd631 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -50,11 +50,161 @@ using llvm_ir::IrName;
 using llvm_ir::SetToFirstInsertPoint;
 using tensorflow::strings::StrCat;
 
+namespace {
+
+llvm::Value* EmitReducePrecisionFloat(llvm::Value* x, int64 exponent_bits,
+                                      int64 mantissa_bits,
+                                      llvm::IRBuilder<>* ir_builder) {
+  // Integer and float types for casting and constant generation.
+  llvm::Type* float_type = x->getType();
+  llvm::IntegerType* int_type = ir_builder->getInt32Ty();
+
+  // Cast the input value to an integer for bitwise manipulation.
+  llvm::Value* x_as_int = ir_builder->CreateBitCast(x, int_type);
+
+  if (mantissa_bits < 23) {
+    // Last remaining mantissa bit.
+    const uint32_t last_mantissa_bit_mask = 1u << (23 - mantissa_bits);
+
+    // Compute rounding bias for round-to-nearest with ties to even.  This is
+    // equal to a base value of 0111... plus one bit if the last remaining
+    // mantissa bit is 1.
+    const uint32_t base_rounding_bias = (last_mantissa_bit_mask >> 1) - 1;
+    llvm::Value* x_last_mantissa_bit = ir_builder->CreateLShr(
+        ir_builder->CreateAnd(
+            x_as_int, llvm::ConstantInt::get(int_type, last_mantissa_bit_mask)),
+        (23 - mantissa_bits));
+    llvm::Value* x_rounding_bias = ir_builder->CreateAdd(
+        x_last_mantissa_bit,
+        llvm::ConstantInt::get(int_type, base_rounding_bias));
+
+    // Add rounding bias, and mask out truncated bits.  Note that the case
+    // where adding the rounding bias overflows into the exponent bits is
+    // correct; the non-masked mantissa bits will all be zero, and the
+    // exponent will be incremented by one.
+    const uint32_t truncation_mask = ~(last_mantissa_bit_mask - 1);
+    x_as_int = ir_builder->CreateAdd(x_as_int, x_rounding_bias);
+    x_as_int = ir_builder->CreateAnd(
+        x_as_int, llvm::ConstantInt::get(int_type, truncation_mask));
+  }
+
+  if (exponent_bits < 8) {
+    // Masks for f32 values.
+    const uint32_t f32_sign_bit_mask = 1u << 31;
+    const uint32_t f32_exp_bits_mask = 0xffu << 23;
+
+    // An exponent of 2^(n-1)-1 -- that is, 0111... with the zero in the most-
+    // significant bit -- is equal to 1.0f for all exponent sizes.  Adding
+    // 2^(n-1)-1 to this gives us the highest non-infinite exponent for a bit-
+    // size of n, and subtracting 2^(n-1)-1 from this gives us the lowest'
+    // exponent (corresponding to 0.0f).
+    //
+    // Thus, the f32 exponent corresponding to the highest non-infinite
+    // exponent for a bit size of n is (2^7-1) + 2^(n-1)-1, and the f32
+    // exponent corresponding to the lowest exponent for a bit size of n is
+    // (2^7-1) - 2^(n-1)-1.
+    //
+    // Note that we have already checked that exponents_bits >= 1.
+    const uint32_t f32_exponent_bias = (1 << 7) - 1;
+    const uint32_t reduced_exponent_bias = (1 << (exponent_bits - 1)) - 1;
+    const uint32_t reduced_max_exponent =
+        f32_exponent_bias + reduced_exponent_bias;
+    const uint32_t reduced_min_exponent =
+        f32_exponent_bias - reduced_exponent_bias;
+
+    // Do we overflow or underflow?
+    llvm::Value* x_exponent = ir_builder->CreateAnd(
+        x_as_int, llvm::ConstantInt::get(int_type, f32_exp_bits_mask));
+    llvm::Value* x_overflows = ir_builder->CreateICmpUGT(
+        x_exponent,
+        llvm::ConstantInt::get(int_type, reduced_max_exponent << 23));
+    llvm::Value* x_underflows = ir_builder->CreateICmpULE(
+        x_exponent,
+        llvm::ConstantInt::get(int_type, reduced_min_exponent << 23));
+
+    // Compute appropriately-signed values of zero and infinity.
+    llvm::Value* x_signed_zero = ir_builder->CreateAnd(
+        x_as_int, llvm::ConstantInt::get(int_type, f32_sign_bit_mask));
+    llvm::Value* x_signed_inf = ir_builder->CreateOr(
+        x_signed_zero, llvm::ConstantInt::get(int_type, f32_exp_bits_mask));
+
+    // Force to zero or infinity if overflow or underflow.  (Note that this
+    // truncates all denormal values to zero, rather than rounding them.)
+    x_as_int = ir_builder->CreateSelect(x_overflows, x_signed_inf, x_as_int);
+    x_as_int = ir_builder->CreateSelect(x_underflows, x_signed_zero, x_as_int);
+  }
+
+  // Cast the result back to a floating-point type.
+  llvm::Value* result = ir_builder->CreateBitCast(x_as_int, float_type);
+
+  // Correct result for NaN inputs.
+  //
+  // The exponent handling will "normalize" NaN values to infinities, which is
+  // undesirable (except in the case with no mantissa bits, in which case it
+  // is mandatory).  This logic also handles cases where mantissa-rounding
+  // causes a NaN's mantissa to overflow into the exponent bits, which would
+  // otherwise create an erroneous zero value.
+  //
+  // If the fast-math flags are set to assume no NaNs, the comparison is likely
+  // to be optimized away, so there's no point in even emitting it.
+  if (!ir_builder->getFastMathFlags().noNaNs()) {
+    llvm::Value* x_is_nan = ir_builder->CreateFCmpUNO(x, x);
+
+    if (mantissa_bits > 0) {
+      result = ir_builder->CreateSelect(x_is_nan, x, result);
+    } else {
+      result = ir_builder->CreateSelect(
+          x_is_nan, llvm::ConstantFP::getInfinity(float_type), result);
+    }
+  }
+  return result;
+}
+
+llvm::Value* EmitF32ToBF16(llvm::Value* f32_value,
+                           llvm::IRBuilder<>* ir_builder) {
+  auto reduced_precision = EmitReducePrecisionFloat(
+      f32_value,
+      /*exponent_bits=*/primitive_util::kBFloat16ExponentBits,
+      /*mantissa_bits=*/primitive_util::kBFloat16MantissaBits, ir_builder);
+  auto as_int32 =
+      ir_builder->CreateBitCast(reduced_precision, ir_builder->getInt32Ty());
+  auto shifted = ir_builder->CreateLShr(as_int32, 16);
+  auto truncated = ir_builder->CreateTrunc(shifted, ir_builder->getInt16Ty());
+  return ir_builder->CreateBitCast(truncated, ir_builder->getInt16Ty());
+}
+
+llvm::Value* EmitBF16ToF32(llvm::Value* bf16_value,
+                           llvm::IRBuilder<>* ir_builder) {
+  auto as_int16 =
+      ir_builder->CreateBitCast(bf16_value, ir_builder->getInt16Ty());
+  auto as_int32 = ir_builder->CreateZExt(as_int16, ir_builder->getInt32Ty());
+  auto shifted = ir_builder->CreateShl(as_int32, 16);
+  return ir_builder->CreateBitCast(shifted, ir_builder->getFloatTy());
+}
+
+llvm::Value* EmitIntegralToFloating(llvm::Value* integer_value,
+                                    PrimitiveType from_type,
+                                    PrimitiveType to_type, llvm::Module* module,
+                                    llvm::IRBuilder<>* ir_builder) {
+  if (primitive_util::IsSignedIntegralType(from_type)) {
+    return ir_builder->CreateSIToFP(
+        integer_value, llvm_ir::PrimitiveTypeToIrType(to_type, module));
+  } else {
+    CHECK(primitive_util::IsUnsignedIntegralType(from_type) ||
+          from_type == PRED);
+    return ir_builder->CreateUIToFP(
+        integer_value, llvm_ir::PrimitiveTypeToIrType(to_type, module));
+  }
+}
+
+}  // namespace
+
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitUnaryOp(
     const HloInstruction* op, llvm::Value* operand_value) const {
   if (op->opcode() == HloOpcode::kCopy) {
     return operand_value;
-  } else if (operand_value->getType()->isIntegerTy()) {
+  } else if (ShapeUtil::ElementIsIntegral(op->operand(0)->shape()) ||
+             op->operand(0)->shape().element_type() == PRED) {
     return EmitIntegerUnaryOp(op, operand_value);
   } else if (ShapeUtil::ElementIsComplex(op->operand(0)->shape())) {
     return EmitComplexUnaryOp(op, operand_value);
@@ -79,15 +229,14 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitIntegerUnaryOp(
             primitive_util::IsSignedIntegralType(to_type));
       }
       if (primitive_util::IsFloatingPointType(to_type)) {
-        if (primitive_util::IsSignedIntegralType(from_type)) {
-          return ir_builder_->CreateSIToFP(
-              operand_value, llvm_ir::PrimitiveTypeToIrType(to_type, module_));
-        }
-        if (primitive_util::IsUnsignedIntegralType(from_type) ||
-            from_type == PRED) {
-          return ir_builder_->CreateUIToFP(
-              operand_value, llvm_ir::PrimitiveTypeToIrType(to_type, module_));
+        if (to_type == BF16) {
+          return EmitF32ToBF16(
+              EmitIntegralToFloating(operand_value, from_type, F32, module_,
+                                     ir_builder_),
+              ir_builder_);
         }
+        return EmitIntegralToFloating(operand_value, from_type, to_type,
+                                      module_, ir_builder_);
       }
       if (primitive_util::IsComplexType(to_type)) {
         auto to_ir_component_type = llvm_ir::PrimitiveTypeToIrType(
@@ -207,6 +356,17 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatUnaryOp(
                 llvm_ir::PrimitiveTypeToIrType(to_component_type, module_)),
             nullptr);
       }
+      if (from_type == BF16) {
+        TF_RET_CHECK(to_type != BF16);
+        operand_value = EmitBF16ToF32(operand_value, ir_builder_);
+        from_type = F32;
+        if (from_type == to_type) {
+          return operand_value;
+        }
+      }
+      if (from_type == F32 && to_type == BF16) {
+        return EmitF32ToBF16(operand_value, ir_builder_);
+      }
       if (primitive_util::IsFloatingPointType(to_type)) {
         return ir_builder_->CreateFPCast(
             operand_value, llvm_ir::PrimitiveTypeToIrType(to_type, module_));
@@ -449,7 +609,8 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitBinaryOp(
     const HloInstruction* op, llvm::Value* lhs_value,
     llvm::Value* rhs_value) const {
   PrimitiveType operand_type = op->operand(0)->shape().element_type();
-  if (lhs_value->getType()->isIntegerTy()) {
+  if (ShapeUtil::ElementIsIntegral(op->operand(0)->shape()) ||
+      operand_type == PRED) {
     return EmitIntegerBinaryOp(
         op, lhs_value, rhs_value,
         primitive_util::IsSignedIntegralType(operand_type));
@@ -717,111 +878,9 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitReducePrecision(
   if (hlo->operand(0)->shape().element_type() != F32) {
     return Unimplemented("reduce-precision only implemented for F32");
   }
-
-  // Integer and float types for casting and constant generation.
-  llvm::Type* float_type = x->getType();
-  llvm::IntegerType* int_type = ir_builder_->getInt32Ty();
-
-  // Cast the input value to an integer for bitwise manipulation.
-  llvm::Value* x_as_int = ir_builder_->CreateBitCast(x, int_type);
-
-  if (hlo->mantissa_bits() < 23) {
-    // Last remaining mantissa bit.
-    const uint32_t last_mantissa_bit_mask = 1u << (23 - hlo->mantissa_bits());
-
-    // Compute rounding bias for round-to-nearest with ties to even.  This is
-    // equal to a base value of 0111... plus one bit if the last remaining
-    // mantissa bit is 1.
-    const uint32_t base_rounding_bias = (last_mantissa_bit_mask >> 1) - 1;
-    llvm::Value* x_last_mantissa_bit = ir_builder_->CreateLShr(
-        ir_builder_->CreateAnd(
-            x_as_int, llvm::ConstantInt::get(int_type, last_mantissa_bit_mask)),
-        (23 - hlo->mantissa_bits()));
-    llvm::Value* x_rounding_bias = ir_builder_->CreateAdd(
-        x_last_mantissa_bit,
-        llvm::ConstantInt::get(int_type, base_rounding_bias));
-
-    // Add rounding bias, and mask out truncated bits.  Note that the case
-    // where adding the rounding bias overflows into the exponent bits is
-    // correct; the non-masked mantissa bits will all be zero, and the
-    // exponent will be incremented by one.
-    const uint32_t truncation_mask = ~(last_mantissa_bit_mask - 1);
-    x_as_int = ir_builder_->CreateAdd(x_as_int, x_rounding_bias);
-    x_as_int = ir_builder_->CreateAnd(
-        x_as_int, llvm::ConstantInt::get(int_type, truncation_mask));
-  }
-
-  if (hlo->exponent_bits() < 8) {
-    // Masks for f32 values.
-    const uint32_t f32_sign_bit_mask = 1u << 31;
-    const uint32_t f32_exp_bits_mask = 0xffu << 23;
-
-    // An exponent of 2^(n-1)-1 -- that is, 0111... with the zero in the most-
-    // significant bit -- is equal to 1.0f for all exponent sizes.  Adding
-    // 2^(n-1)-1 to this gives us the highest non-infinite exponent for a bit-
-    // size of n, and subtracting 2^(n-1)-1 from this gives us the lowest'
-    // exponent (corresponding to 0.0f).
-    //
-    // Thus, the f32 exponent corresponding to the highest non-infinite
-    // exponent for a bit size of n is (2^7-1) + 2^(n-1)-1, and the f32
-    // exponent corresponding to the lowest exponent for a bit size of n is
-    // (2^7-1) - 2^(n-1)-1.
-    //
-    // Note that we have already checked that exponents_bits >= 1.
-    const uint32_t f32_exponent_bias = (1 << 7) - 1;
-    const uint32_t reduced_exponent_bias =
-        (1 << (hlo->exponent_bits() - 1)) - 1;
-    const uint32_t reduced_max_exponent =
-        f32_exponent_bias + reduced_exponent_bias;
-    const uint32_t reduced_min_exponent =
-        f32_exponent_bias - reduced_exponent_bias;
-
-    // Do we overflow or underflow?
-    llvm::Value* x_exponent = ir_builder_->CreateAnd(
-        x_as_int, llvm::ConstantInt::get(int_type, f32_exp_bits_mask));
-    llvm::Value* x_overflows = ir_builder_->CreateICmpUGT(
-        x_exponent,
-        llvm::ConstantInt::get(int_type, reduced_max_exponent << 23));
-    llvm::Value* x_underflows = ir_builder_->CreateICmpULE(
-        x_exponent,
-        llvm::ConstantInt::get(int_type, reduced_min_exponent << 23));
-
-    // Compute appropriately-signed values of zero and infinity.
-    llvm::Value* x_signed_zero = ir_builder_->CreateAnd(
-        x_as_int, llvm::ConstantInt::get(int_type, f32_sign_bit_mask));
-    llvm::Value* x_signed_inf = ir_builder_->CreateOr(
-        x_signed_zero, llvm::ConstantInt::get(int_type, f32_exp_bits_mask));
-
-    // Force to zero or infinity if overflow or underflow.  (Note that this
-    // truncates all denormal values to zero, rather than rounding them.)
-    x_as_int = ir_builder_->CreateSelect(x_overflows, x_signed_inf, x_as_int);
-    x_as_int = ir_builder_->CreateSelect(x_underflows, x_signed_zero, x_as_int);
-  }
-
-  // Cast the result back to a floating-point type.
-  llvm::Value* result = ir_builder_->CreateBitCast(x_as_int, float_type);
-
-  // Correct result for NaN inputs.
-  //
-  // The exponent handling will "normalize" NaN values to infinities, which is
-  // undesirable (except in the case with no mantissa bits, in which case it
-  // is mandatory).  This logic also handles cases where mantissa-rounding
-  // causes a NaN's mantissa to overflow into the exponent bits, which would
-  // otherwise create an erroneous zero value.
-  //
-  // If the fast-math flags are set to assume no NaNs, the comparison is likely
-  // to be optimized away, so there's no point in even emitting it.
-  if (!ir_builder_->getFastMathFlags().noNaNs()) {
-    llvm::Value* x_is_nan = ir_builder_->CreateFCmpUNO(x, x);
-
-    if (hlo->mantissa_bits() > 0) {
-      result = ir_builder_->CreateSelect(x_is_nan, x, result);
-    } else {
-      result = ir_builder_->CreateSelect(
-          x_is_nan, llvm::ConstantFP::getInfinity(float_type), result);
-    }
-  }
-  return result;
+  return EmitReducePrecisionFloat(x, /*exponent_bits=*/hlo->exponent_bits(),
+                                  /*mantissa_bits=*/hlo->mantissa_bits(),
+                                  ir_builder_);
 }
 
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitIntegerBinaryOp(
diff --git a/tensorflow/compiler/xla/service/hlo_element_type_converter.cc b/tensorflow/compiler/xla/service/hlo_element_type_converter.cc
new file mode 100644
index 0000000000..1773bb401d
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_element_type_converter.cc
@@ -0,0 +1,137 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_element_type_converter.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_evaluator.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_query.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace xla {
+namespace {
+
+HloInstruction* ToElementType(HloInstruction* hlo, PrimitiveType type) {
+  if (hlo->shape().element_type() != type) {
+    Shape shape = ShapeUtil::ChangeElementType(hlo->shape(), type);
+    hlo = hlo->parent()->AddInstruction(
+        HloInstruction::CreateConvert(shape, hlo));
+  }
+  CHECK_EQ(hlo->shape().element_type(), type);
+  return hlo;
+}
+
+bool HasOperandType(HloInstruction* hlo, PrimitiveType type) {
+  for (HloInstruction* operand : hlo->operands()) {
+    if (operand->shape().element_type() == type) {
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+HloElementTypeConverter::HloElementTypeConverter(
+    PrimitiveType eliminate_type, PrimitiveType replace_with_type)
+    : eliminate_type_(eliminate_type), replace_with_type_(replace_with_type) {}
+
+StatusOr<bool> HloElementTypeConverter::Run(HloModule* module) {
+  XLA_VLOG_LINES(
+      3, "HloElementTypeConverter::Run(), before:\n" + module->ToString());
+  bool changed = false;
+  for (auto* computation : module->computations()) {
+    for (auto* hlo : computation->MakeInstructionPostOrder()) {
+      // These are ops where it does not make sense to convert them.
+      if (hlo->opcode() == HloOpcode::kParameter ||
+          hlo->opcode() == HloOpcode::kConstant ||
+          hlo->opcode() == HloOpcode::kTuple ||
+          hlo->opcode() == HloOpcode::kConvert ||
+          hlo->opcode() == HloOpcode::kGetTupleElement ||
+          hlo->opcode() == HloOpcode::kInfeed ||
+          hlo->opcode() == HloOpcode::kOutfeed) {
+        continue;
+      }
+
+      // We cannot change a CustomCall since we have no way of adjusting the
+      // called binary to expect the updated type.
+      if (hlo->opcode() == HloOpcode::kCustomCall) {
+        continue;
+      }
+
+      // These are ops with embedded computations where it suffices to convert
+      // the embedded computations instead of converting the ops themselves.
+      if (hlo->opcode() == HloOpcode::kWhile ||
+          hlo->opcode() == HloOpcode::kCall ||
+          hlo->opcode() == HloOpcode::kFusion ||
+          hlo->opcode() == HloOpcode::kMap ||
+          hlo->opcode() == HloOpcode::kReduce ||
+          hlo->opcode() == HloOpcode::kReduceWindow ||
+          hlo->opcode() == HloOpcode::kSelectAndScatter ||
+          hlo->opcode() == HloOpcode::kConditional) {
+        continue;
+      }
+      TF_RET_CHECK(hlo->called_computations().empty()) << hlo->ToString();
+
+      if (!HasOperandType(hlo, eliminate_type_)) {
+        // If this CHECK fires, then this was an instruction that does not take
+        // the elimination type as an operand but it does return it. This pass
+        // does not have a feature to change the output type in that case, so
+        // instead of silently failing to eliminate the type, it fails loudly.
+        TF_RET_CHECK(hlo->shape().element_type() != eliminate_type_);
+        continue;
+      }
+
+      std::vector<HloInstruction*> new_operands;
+      for (HloInstruction* operand : hlo->operands()) {
+        if (operand->shape().element_type() == eliminate_type_) {
+          operand = ToElementType(operand, replace_with_type_);
+        }
+        new_operands.push_back(operand);
+      }
+
+      HloInstruction* new_hlo;
+      if (hlo->shape().element_type() == eliminate_type_) {
+        Shape shape =
+            ShapeUtil::ChangeElementType(hlo->shape(), replace_with_type_);
+        new_hlo = computation->AddInstruction(
+            hlo->CloneWithNewOperands(shape, new_operands, hlo->GetModule()));
+        new_hlo = ToElementType(new_hlo, eliminate_type_);
+      } else {
+        new_hlo = computation->AddInstruction(hlo->CloneWithNewOperands(
+            hlo->shape(), new_operands, hlo->GetModule()));
+      }
+      TF_RETURN_IF_ERROR(computation->ReplaceInstruction(hlo, new_hlo));
+      changed = true;
+    }
+  }
+  XLA_VLOG_LINES(
+      2, "HloElementTypeConverter::Run(), after:\n" + module->ToString());
+  return changed;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_element_type_converter.h b/tensorflow/compiler/xla/service/hlo_element_type_converter.h
new file mode 100644
index 0000000000..2b109225d0
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_element_type_converter.h
@@ -0,0 +1,49 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ELEMENT_TYPE_CONVERTER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ELEMENT_TYPE_CONVERTER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// A pass that eliminates certain element types as the input or output of ops by
+// inserting Convert ops. This allows a backend to support an element type while
+// only actually implementing the Convert op for that element type. This is
+// generally not the fastest approach, but it works.
+class HloElementTypeConverter : public HloPassInterface {
+ public:
+  // eliminate_type is the type to eliminate as the input or output of ops,
+  // using Convert ops to replace it with replace_with_type.
+  HloElementTypeConverter(PrimitiveType eliminate_type,
+                          PrimitiveType replace_with_type);
+
+  tensorflow::StringPiece name() const override {
+    return "element_type_converter";
+  }
+
+  // Returns the pass on the module and returns whether the module was modified.
+  StatusOr<bool> Run(HloModule* module) override;
+
+ private:
+  PrimitiveType eliminate_type_;
+  PrimitiveType replace_with_type_;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ELEMENT_TYPE_CONVERTER_H_
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
index ef5b6ad90e..9a0c94b1c7 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
@@ -142,6 +142,13 @@ llvm::Type* PrimitiveTypeToIrType(PrimitiveType element_type,
       return llvm::Type::getInt8Ty(module->getContext());
     case S16:
     case U16:
+    case BF16:
+      // For BF16 we just need some type that is 16 bits wide so that it will
+      // take up the right amount of space in memory. LLVM does not have a BF16
+      // type (the LLVM half type is IEEE 16 bit floating point, not bfloat), so
+      // we can't map it directly to an LLVM type. We will not map a BF16
+      // addition to an addition on this type (int16) - this is just the type
+      // used for storage.
       return llvm::Type::getInt16Ty(module->getContext());
     case S32:
     case U32:
@@ -280,6 +287,11 @@ llvm::Constant* LiteralToConstant(const Literal& literal, int64 dimension_index,
         value = llvm::ConstantFP::get(ir_element_type,
                                       literal.Get<float>(*multi_index));
         break;
+      case BF16:
+        value = llvm::ConstantInt::get(
+            ir_element_type,
+            tensorflow::bit_cast<uint16>(literal.Get<bfloat16>(*multi_index)));
+        break;
       case F64:
         value = llvm::ConstantFP::get(ir_element_type,
                                       literal.Get<double>(*multi_index));
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 6f03f1a4e0..6af01ae80d 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -802,8 +802,6 @@ xla_test(
     name = "bfloat16_test",
     srcs = ["bfloat16_test.cc"],
     blacklisted_backends = [
-        "cpu",
-        "cpu_parallel",
         "gpu",
     ],
     shard_count = 40,
diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index 330575a02e..b32df74312 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -53,7 +53,7 @@ class ReduceWindowTestBase : public ClientLibraryTestBase {
  public:
   ErrorSpec DefaultErrorSpec() const {
     if (use_bfloat16()) {
-      return ErrorSpec(1e-1, 3e-2);
+      return ErrorSpec(1e-1, 5e-2);
     } else {
       return ErrorSpec(1e-3, 1e-3);
     }
diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc
index 93bce97a3e..780b292d1a 100644
--- a/tensorflow/compiler/xla/tests/test_utils.cc
+++ b/tensorflow/compiler/xla/tests/test_utils.cc
@@ -35,6 +35,19 @@ void PopulateWithRandomFloatingPointData(Literal* literal) {
       }));
 }
 
+// The standard library does not have a case for bfloat16, unsurprisingly, so we
+// handle that one specially.
+template <>
+void PopulateWithRandomFloatingPointData<bfloat16>(Literal* literal) {
+  CHECK_EQ(literal->shape().element_type(), BF16);
+  std::minstd_rand0 engine;
+  std::uniform_real_distribution<float> generator(0.0f, 1.0f);
+  TF_CHECK_OK(literal->Populate<bfloat16>(
+      [&](tensorflow::gtl::ArraySlice<int64> /*indices*/) {
+        return static_cast<bfloat16>(generator(engine));
+      }));
+}
+
 template <typename IntT>
 void PopulateWithRandomIntegralData(Literal* literal) {
   CHECK_EQ(literal->shape().element_type(),
@@ -171,6 +184,9 @@ StatusOr<std::unique_ptr<Literal>> MakeFakeLiteral(const Shape& shape) {
   }
   std::unique_ptr<Literal> literal = Literal::CreateFromShape(shape);
   switch (shape.element_type()) {
+    case BF16:
+      PopulateWithRandomFloatingPointData<bfloat16>(literal.get());
+      break;
     case F32:
       PopulateWithRandomFloatingPointData<float>(literal.get());
       break;
-- 
GitLab


From d94d6c45a56bc0871bb152a18818a1def3e17a7d Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 8 Dec 2017 13:48:39 -0800
Subject: [PATCH 0815/1225] Add DataFormatVecPermute op.

PiperOrigin-RevId: 178421287
---
 .../api_def_DataFormatVecPermute.pbtxt        | 31 ++++++++
 tensorflow/core/kernels/data_format_ops.cc    | 71 +++++++++++++++++--
 tensorflow/core/kernels/data_format_ops.h     | 27 +++++++
 .../core/kernels/data_format_ops_gpu.cu.cc    |  2 +
 tensorflow/core/ops/nn_ops.cc                 | 17 +++++
 tensorflow/python/ops/nn_test.py              | 11 +++
 6 files changed, 154 insertions(+), 5 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt

diff --git a/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt b/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
new file mode 100644
index 0000000000..c2fa61aaed
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
@@ -0,0 +1,31 @@
+op {
+  graph_op_name: "DataFormatVecPermute"
+  in_arg {
+    name: "x"
+    description: <<END
+Vector in source data format. Must be of size 4.
+END
+  }
+  out_arg {
+    name: "y"
+    description: <<END
+Vector in destination data format. Must be of size 4.
+END
+  }
+  attr {
+    name: "src_format"
+    description: <<END
+source data format.
+END
+  }
+  attr {
+    name: "dst_format"
+    description: <<END
+destination data format.
+END
+  }
+  summary: "Returns the permuted vector in the destination data format given the one in"
+  description: <<END
+the source data format.
+END
+}
diff --git a/tensorflow/core/kernels/data_format_ops.cc b/tensorflow/core/kernels/data_format_ops.cc
index 047188f754..0d427eddf3 100644
--- a/tensorflow/core/kernels/data_format_ops.cc
+++ b/tensorflow/core/kernels/data_format_ops.cc
@@ -50,9 +50,10 @@ class DataFormatDimMapOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    OP_REQUIRES(context, input.dims() == 0,
-                errors::InvalidArgument("input must be a scalar",
-                                        input.shape().DebugString()));
+    OP_REQUIRES(
+        context, input.dims() == 0,
+        errors::InvalidArgument("input must be a scalar, but got shape ",
+                                input.shape().DebugString()));
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
@@ -62,11 +63,56 @@ class DataFormatDimMapOp : public OpKernel {
   }
 };
 
+template <typename Device, typename T>
+class DataFormatVecPermuteOp : public OpKernel {
+ public:
+  explicit DataFormatVecPermuteOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string src_format;
+    OP_REQUIRES_OK(context, context->GetAttr("src_format", &src_format));
+    string dst_format;
+    OP_REQUIRES_OK(context, context->GetAttr("dst_format", &dst_format));
+    OP_REQUIRES(
+        context, src_format == "NHWC",
+        errors::InvalidArgument(strings::StrCat(
+            "Current implementation doesn't support source data format ",
+            src_format)));
+    OP_REQUIRES(context, dst_format == "NCHW",
+                errors::InvalidArgument(strings::StrCat(
+                    "Current implementation doesn't support dst data format ",
+                    dst_format)));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    OP_REQUIRES(
+        context, input.dims() == 1,
+        errors::InvalidArgument("input must be a vector, but got shape ",
+                                input.shape().DebugString()));
+    OP_REQUIRES(
+        context, input.NumElements() == 4,
+        errors::InvalidArgument("input must be of size 4, but got shape ",
+                                input.shape().DebugString()));
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &output));
+    functor::DataFormatVecPermute<Device, T>()(
+        context->eigen_device<Device>(), input.vec<T>(), output->vec<T>());
+  }
+};
+
 #define REGISTER_KERNEL(T)                                                \
   REGISTER_KERNEL_BUILDER(                                                \
       Name("DataFormatDimMap").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
       DataFormatDimMapOp<CPUDevice, T>);
+TF_CALL_int32(REGISTER_KERNEL);
+TF_CALL_int64(REGISTER_KERNEL);
+#undef REGISTER_KERNEL
 
+#define REGISTER_KERNEL(T)                                                    \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("DataFormatVecPermute").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      DataFormatVecPermuteOp<CPUDevice, T>);
 TF_CALL_int32(REGISTER_KERNEL);
 TF_CALL_int64(REGISTER_KERNEL);
 #undef REGISTER_KERNEL
@@ -80,9 +126,18 @@ namespace functor {
       const GPUDevice& d, typename TTypes<T>::ConstScalar x, \
       typename TTypes<T>::Scalar y);                         \
   extern template struct DataFormatDimMap<GPUDevice, T>;
-
 #define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
+TF_CALL_int32(DECLARE_GPU_SPECS);
+TF_CALL_int64(DECLARE_GPU_SPECS);
+#undef DECLARE_GPU_SPEC
 
+#define DECLARE_GPU_SPEC(T)                               \
+  template <>                                             \
+  void DataFormatVecPermute<GPUDevice, T>::operator()(    \
+      const GPUDevice& d, typename TTypes<T>::ConstVec x, \
+      typename TTypes<T>::Vec y);                         \
+  extern template struct DataFormatVecPermute<GPUDevice, T>;
+#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
 TF_CALL_int32(DECLARE_GPU_SPECS);
 TF_CALL_int64(DECLARE_GPU_SPECS);
 #undef DECLARE_GPU_SPEC
@@ -93,11 +148,17 @@ TF_CALL_int64(DECLARE_GPU_SPECS);
   REGISTER_KERNEL_BUILDER(                                                \
       Name("DataFormatDimMap").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
       DataFormatDimMapOp<GPUDevice, T>);
-
 TF_CALL_int32(REGISTER_GPU_KERNEL);
 TF_CALL_int64(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
+#define REGISTER_GPU_KERNEL(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("DataFormatVecPermute").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      DataFormatVecPermuteOp<GPUDevice, T>);
+TF_CALL_int32(REGISTER_GPU_KERNEL);
+TF_CALL_int64(REGISTER_GPU_KERNEL);
+#undef REGISTER_GPU_KERNEL
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data_format_ops.h b/tensorflow/core/kernels/data_format_ops.h
index 079e76c0d9..54798cc6ab 100644
--- a/tensorflow/core/kernels/data_format_ops.h
+++ b/tensorflow/core/kernels/data_format_ops.h
@@ -39,6 +39,33 @@ struct DataFormatDimMap {
   }
 };
 
+template <typename T>
+struct VecPermute {
+  Eigen::DSizes<Eigen::DenseIndex, 1> dimensions(
+      typename TTypes<T>::ConstVec input) const {
+    Eigen::DSizes<Eigen::DenseIndex, 1> result;
+    result[0] = input.dimension(0);
+    return result;
+  }
+  template <typename Output, typename Device>
+  void eval(typename TTypes<T>::ConstVec input, Output& output,
+            const Device& d) const {
+    output.template chip<0>(0).device(d) = input.template chip<0>(0);
+    output.template chip<0>(1).device(d) = input.template chip<0>(3);
+    output.template chip<0>(2).device(d) = input.template chip<0>(1);
+    output.template chip<0>(3).device(d) = input.template chip<0>(2);
+  }
+};
+
+// Functor used by DataFormatVecPermuteOp to do the computations.
+template <typename Device, typename T>
+struct DataFormatVecPermute {
+  void operator()(const Device& d, typename TTypes<T>::ConstVec x,
+                  typename TTypes<T>::Vec y) {
+    y.device(d) = x.customOp(VecPermute<T>());
+  }
+};
+
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/data_format_ops_gpu.cu.cc b/tensorflow/core/kernels/data_format_ops_gpu.cu.cc
index 09340a7d87..38ce7c28fe 100644
--- a/tensorflow/core/kernels/data_format_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/data_format_ops_gpu.cu.cc
@@ -25,6 +25,8 @@ namespace tensorflow {
 typedef Eigen::GpuDevice GPUDevice;
 template struct functor::DataFormatDimMap<GPUDevice, int32>;
 template struct functor::DataFormatDimMap<GPUDevice, int64>;
+template struct functor::DataFormatVecPermute<GPUDevice, int32>;
+template struct functor::DataFormatVecPermute<GPUDevice, int64>;
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index f58425db0a..8c31be0c0d 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -768,6 +768,23 @@ src_format: source data format.
 dst_format: destination data format.
 )doc");
 
+REGISTER_OP("DataFormatVecPermute")
+    .Input("x: T")
+    .Output("y: T")
+    .Attr("T: {int32, int64} = DT_INT32")
+    .Attr("src_format: string = 'NHWC'")
+    .Attr("dst_format: string = 'NCHW'")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Returns the permuted vector in the destination data format given the one in
+the source data format.
+
+x: Vector in source data format. Must be of size 4.
+y: Vector in destination data format. Must be of size 4.
+src_format: source data format.
+dst_format: destination data format.
+)doc");
+
 REGISTER_OP("FusedResizeAndPadConv2D")
     .Input("input: T")
     .Input("size: int32")
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index ac79354fb7..8dfd0740bb 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -973,5 +973,16 @@ class DataFormatDimMapTest(test_lib.TestCase):
     self._test(-4, 0)
 
 
+class DataFormatVectorPermuteTest(test_lib.TestCase):
+
+  def test(self):
+    x_val = [7, 4, 9, 3]
+    x = constant_op.constant(x_val)
+    y = nn_ops.data_format_vec_permute(x)
+    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+      y_val = sess.run(y)
+      self.assertAllEqual(y_val, [7, 3, 4, 9])
+
+
 if __name__ == "__main__":
   test_lib.main()
-- 
GitLab


From c470142122c47d4ea3d91b2204d5da4f581095dd Mon Sep 17 00:00:00 2001
From: Robin Richtsfeld <robin.richtsfeld@gmail.com>
Date: Fri, 8 Dec 2017 22:58:43 +0100
Subject: [PATCH 0816/1225] Update .gitignore

---
 .gitignore | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index d11a504bdc..900ad921a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,7 @@ node_modules
 /bazel-*
 /bazel_pip
 /tools/python_bin_path.sh
-/tools/git/gen
+/tensorflow/tools/git/gen
 /pip_test
 /_python_build
 *.pyc
@@ -26,4 +26,4 @@ Podfile.lock
 /tensorflow/contrib/lite/gen/**
 /tensorflow/contrib/lite/examples/ios/simple/data/*.txt
 /tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
-xcuserdata/**
\ No newline at end of file
+xcuserdata/**
-- 
GitLab


From e8fcbf6cb20b9a0be49e131ee3a3bf41a9c022f3 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Fri, 8 Dec 2017 14:04:09 -0800
Subject: [PATCH 0817/1225] Always instantiate default attribute values when
 building a grappler item: this ensures that we can safely process graphs
 generated before attributes were added to an op.

PiperOrigin-RevId: 178423665
---
 tensorflow/core/grappler/grappler_item_builder.cc | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index da99777bbc..e069c0ddfd 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -126,9 +126,6 @@ Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def,
   graph_ctor_opts.allow_internal_ops = true;
   graph_ctor_opts.expect_device_spec = false;
   std::unique_ptr<Graph> graphptr(new Graph(function_library));
-  // Populate default attrs to the NodeDefs in the GraphDef.
-  TF_RETURN_IF_ERROR(
-      AddDefaultAttrsToGraphDef(&graph_def, *graphptr->op_registry(), 0));
 
   TF_RETURN_IF_ERROR(
       ConvertGraphDefToGraph(graph_ctor_opts, graph_def, graphptr.get()));
@@ -449,6 +446,15 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
     new_item->save_restore_loc_tensor = saver.filename_tensor_name();
   }
 
+  // Populate default attrs to the NodeDefs in the GraphDef.
+  Status attr_status =
+      AddDefaultAttrsToGraphDef(&new_item->graph, *OpRegistry::Global(), 0);
+  if (!attr_status.ok()) {
+    LOG(ERROR) << "Failed to instantiate default attribute values: "
+               << attr_status.error_message();
+    return nullptr;
+  }
+
   // Optimize the graph (function inlining, l1 optimizations, etc).
   VLOG(1) << "Number of nodes in graph before OptimizeGraph: "
           << new_item->graph.node_size();
-- 
GitLab


From b8368b721a2679fb68c014dc6fe890dc167dff14 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 14:14:44 -0800
Subject: [PATCH 0818/1225] Extend neutral element optimization to handle   -
 division:      *  0 / y -> 0 (in aggressive mode only)      *  x / 1 -> x    
  *  1 / y -> Reciprocal(y)   - subtraction:      * x - 0 -> x   - BiasAdd:   
   * BiasAdd(x, 0) -> x

PiperOrigin-RevId: 178425319
---
 tensorflow/core/grappler/op_types.cc          |  9 +++
 tensorflow/core/grappler/op_types.h           |  2 +
 .../grappler/optimizers/constant_folding.cc   | 59 +++++++++++++------
 .../grappler/optimizers/constant_folding.h    |  9 +--
 .../optimizers/constant_folding_test.cc       | 54 +++++++++++++----
 5 files changed, 102 insertions(+), 31 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 631fe84201..e1935fa9b3 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -35,6 +35,10 @@ bool IsAvgPoolGrad(const NodeDef& node) { return node.op() == "AvgPoolGrad"; }
 
 bool IsAssert(const NodeDef& node) { return node.op() == "Assert"; }
 
+bool IsBiasAdd(const NodeDef& node) {
+  return node.op() == "BiasAdd" || node.op() == "BiasAddV1";
+}
+
 bool IsBiasAddGrad(const NodeDef& node) { return node.op() == "BiasAddGrad"; }
 
 bool IsConcatOffset(const NodeDef& node) { return node.op() == "ConcatOffset"; }
@@ -119,6 +123,11 @@ bool IsPlaceholder(const NodeDef& node) {
          op == "PlaceholderWithDefault";
 }
 
+bool IsAnyDiv(const NodeDef& node) {
+  return node.op() == "RealDiv" || node.op() == "Div" ||
+         node.op() == "FloorDiv" || node.op() == "TruncateDiv";
+}
+
 bool IsRealDiv(const NodeDef& node) { return node.op() == "RealDiv"; }
 
 bool IsReluGrad(const NodeDef& node) { return node.op() == "ReluGrad"; }
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 47dd2c7faf..fc5279c1b8 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -26,6 +26,7 @@ bool IsAdd(const NodeDef& node);
 bool IsAddN(const NodeDef& node);
 bool IsAvgPoolGrad(const NodeDef& node);
 bool IsAssert(const NodeDef& node);
+bool IsBiasAdd(const NodeDef& node);
 bool IsBiasAddGrad(const NodeDef& node);
 bool IsConcatOffset(const NodeDef& node);
 bool IsConstant(const NodeDef& node);
@@ -48,6 +49,7 @@ bool IsNextIteration(const NodeDef& node);
 bool IsPad(const NodeDef& node);
 bool IsNoOp(const NodeDef& node);
 bool IsPlaceholder(const NodeDef& node);
+bool IsAnyDiv(const NodeDef& node);
 bool IsRealDiv(const NodeDef& node);
 bool IsReluGrad(const NodeDef& node);
 bool IsRecv(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index acd642044b..90b796a613 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1254,8 +1254,8 @@ bool ConstantFolding::IsZeros(const NodeDef& node) const {
   return false;
 }
 
-void ConstantFolding::ReplaceAddOrMulWithIdentity(int input_to_forward,
-                                                  NodeDef* node) {
+void ConstantFolding::ReplaceOperationWithIdentity(int input_to_forward,
+                                                   NodeDef* node) {
   node->set_op("Identity");
   // Propagate the designated input through the identity.
   node->mutable_input()->SwapElements(0, input_to_forward);
@@ -1266,7 +1266,14 @@ void ConstantFolding::ReplaceAddOrMulWithIdentity(int input_to_forward,
   graph_modified_ = true;
 }
 
-Status ConstantFolding::ReplaceAddOrMulWithConstant(
+void ConstantFolding::ReplaceDivisionOfOnesByReciprocal(NodeDef* node) {
+  node->set_op("Reciprocal");
+  node->mutable_input()->SwapElements(0, 1);
+  node->set_input(1, AsControlDependency(node->input(1)));
+  graph_modified_ = true;
+}
+
+Status ConstantFolding::ReplaceOperationWithConstant(
     double value, const TensorShapeProto& shape, NodeDef* node) {
   AttrValue tensor_attr;
   AttrValue dtype_attr = node->attr().at("T");
@@ -1317,12 +1324,16 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       *node.mutable_input(1) = AsControlDependency(node.input(1));
     }
 
-    // Simplify multiplication by ones or zeros, and addition of zeros.
+    // Simplify multiplication by ones or zeros, and addition/subtraction of
+    // zeros.
+    // TODO(rmlarsen): Rewrite x / const  -> x * (1/const).
     bool is_mul = IsMul(node);
     bool is_matmul = IsMatMul(node);
-    bool is_add = IsAdd(node);
+    bool is_add = IsAdd(node) || IsBiasAdd(node);
+    bool is_sub = IsSub(node);
+    bool is_div = IsAnyDiv(node);
     if (opt_level_ == RewriterConfig::AGGRESSIVE && use_shape_info &&
-        (is_mul || is_matmul || is_add) &&
+        (is_mul || is_matmul || is_add || is_sub || is_div) &&
         properties.HasInputProperties(node.name()) &&
         properties.HasOutputProperties(node.name())) {
       const NodeDef* x = node_map_->GetNode(node.input(0));
@@ -1334,7 +1345,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       const TensorShapeProto& output_shape =
           properties.GetOutputProperties(node.name())[0].shape();
 
-      // Simplify element-wise  multiplication by ones or addition of zeros.
+      // Simplify element-wise  multiplication by ones or addition/subtraction
+      // of zeros.
       const TensorShapeProto& y_shape =
           properties.GetInputProperties(node.name())[1].shape();
       const bool x_is_zero = IsZeros(*x);
@@ -1342,37 +1354,50 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape);
       if (y_matches_output_shape &&
           ((is_mul && x_is_one) || (is_add && x_is_zero))) {
+        // TODO(rmlarsen): Handle subtraction 0 - y.
         // 1 * y = y or 0 + y = y.
-        ReplaceAddOrMulWithIdentity(1, &node);
+        ReplaceOperationWithIdentity(1, &node);
         continue;
       }
+
+      // Replace 1 / y with Reciprocal op.
+      if (y_matches_output_shape && is_div && x_is_one) {
+        ReplaceDivisionOfOnesByReciprocal(&node);
+        continue;
+      }
+
       const TensorShapeProto& x_shape =
           properties.GetInputProperties(node.name())[0].shape();
       const bool y_is_zero = IsZeros(*y);
       const bool y_is_one = IsOnes(*y);
       const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
-      if (x_matches_output_shape &&
-          ((is_mul && y_is_one) || (is_add && y_is_zero))) {
-        // x * 1 = x or x + 0 = x
-        ReplaceAddOrMulWithIdentity(0, &node);
+      if (x_matches_output_shape && (((is_mul || is_div) && y_is_one) ||
+                                     ((is_add || is_sub) && y_is_zero))) {
+        // x * 1 = x or x / 1 = x or x +/- 0 = x
+        ReplaceOperationWithIdentity(0, &node);
         continue;
       }
 
       // Simplify multiplication and matmul by zeros.
-      if (!is_add && (x_is_zero || y_is_zero)) {
+      // Also optimize zeros divided by a tensor, but only if we are in
+      // aggressive mode, since we might get rid of divisions by zero.
+      bool optimize_zeros_divided_by_y =
+          is_div && x_is_zero && opt_level_ == RewriterConfig::AGGRESSIVE;
+      if ((x_is_zero || y_is_zero) &&
+          (is_mul || is_matmul || optimize_zeros_divided_by_y)) {
         const PartialTensorShape shp(output_shape);
         if (shp.IsFullyDefined()) {
           TF_RETURN_IF_ERROR(
-              ReplaceAddOrMulWithConstant(0, output_shape, &node));
+              ReplaceOperationWithConstant(0, output_shape, &node));
           continue;
         }
         // Even if an input shape is only partially known, we may known that it
         // matches the output shape and thus forward the corresponding zero
         // input.
-        if (is_mul && x_is_zero && x_matches_output_shape) {
-          ReplaceAddOrMulWithIdentity(0, &node);
+        if ((is_mul || is_div) && x_is_zero && x_matches_output_shape) {
+          ReplaceOperationWithIdentity(0, &node);
         } else if (is_mul && y_is_zero && y_matches_output_shape) {
-          ReplaceAddOrMulWithIdentity(1, &node);
+          ReplaceOperationWithIdentity(1, &node);
         }
       }
     }
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index 3bb9926338..db281dc98d 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h
@@ -74,10 +74,11 @@ class ConstantFolding : public GraphOptimizer {
 
   bool IsOnes(const NodeDef& node) const;
   bool IsZeros(const NodeDef& node) const;
-  void ReplaceAddOrMulWithIdentity(int input_to_forward, NodeDef* node);
-  Status ReplaceAddOrMulWithConstant(double value,
-                                     const TensorShapeProto& shape,
-                                     NodeDef* node);
+  void ReplaceOperationWithIdentity(int input_to_forward, NodeDef* node);
+  Status ReplaceOperationWithConstant(double value,
+                                      const TensorShapeProto& shape,
+                                      NodeDef* node);
+  void ReplaceDivisionOfOnesByReciprocal(NodeDef* node);
   Status FoldGraph(GraphDef* output);
 
   bool IsSimplifiableReduction(const NodeDef& node) const;
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index 21011eb790..7fc88cd466 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -88,27 +88,35 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
                                 ops::Placeholder::Shape(TensorShape({3, 2})));
     Output b = ops::Placeholder(s.WithOpName("b"), DT_FLOAT,
                                 ops::Placeholder::Shape(TensorShape({2, 3})));
+    Output bias = ops::Placeholder(s.WithOpName("bias"), DT_FLOAT,
+                                   ops::Placeholder::Shape(TensorShape({2})));
     Output zeros = !use_const ? ops::ZerosLike(s.WithOpName("zeros"), x)
                               : ops::Const(s.WithOpName("zeros"), 0.0f, {2, 2});
-    Output zeros_broadcast =
-        ops::Const(s.WithOpName("zeros_broadcast"), 0.0f, {1, 1});
+    Output zeros_1d = ops::Const(s.WithOpName("zeros_1d"), 0.0f, {2});
     Output ones = !use_const ? ops::OnesLike(s.WithOpName("ones"), x)
                              : ops::Const(s.WithOpName("ones"), 1.0f, {2, 2});
     Output mul1 = ops::Mul(s.WithOpName("mul1"), x, zeros);
     Output mul2 = ops::Mul(s.WithOpName("mul2"), zeros, y);
     Output mul3 = ops::Mul(s.WithOpName("mul3"), x, ones);
     Output mul4 = ops::Mul(s.WithOpName("mul4"), ones, y);
-    Output mul5 = ops::Mul(s.WithOpName("mul5"), x, zeros_broadcast);
-    Output mul6 = ops::Mul(s.WithOpName("mul6"), zeros_broadcast, y);
+    Output mul5 = ops::Mul(s.WithOpName("mul5"), x, zeros_1d);
+    Output mul6 = ops::Mul(s.WithOpName("mul6"), zeros_1d, y);
+    Output div1 = ops::Div(s.WithOpName("div1"), x, ones);
+    Output div2 = ops::Div(s.WithOpName("div2"), ones, y);
     Output matmul1 = ops::MatMul(s.WithOpName("matmul1"), x, zeros);
     Output matmul2 = ops::MatMul(s.WithOpName("matmul2"), zeros, y);
     Output matmul3 = ops::MatMul(s.WithOpName("matmul3"), a, zeros);
     Output matmul4 = ops::MatMul(s.WithOpName("matmul4"), zeros, b);
     Output add1 = ops::Add(s.WithOpName("add1"), x, zeros);
     Output add2 = ops::Add(s.WithOpName("add2"), zeros, y);
-    Output addn = ops::AddN(
-        s.WithOpName("addn"),
-        {mul1, mul2, mul3, mul4, mul5, mul6, matmul1, matmul2, add1, add2});
+    Output bias_add1 = ops::BiasAdd(s.WithOpName("bias_add1"), x, zeros_1d);
+    Output bias_add2 = ops::BiasAdd(s.WithOpName("bias_add2"), zeros, bias);
+    Output sub1 = ops::Sub(s.WithOpName("sub1"), x, zeros);
+    Output sub2 = ops::Sub(s.WithOpName("sub2"), zeros, y);
+    Output addn =
+        ops::AddN(s.WithOpName("addn"),
+                  {mul1, mul2, mul3, mul4, mul5, mul6, div1, div2, matmul1,
+                   matmul2, add1, add2, bias_add1, bias_add2, sub1, sub2});
     GrapplerItem item;
     TF_CHECK_OK(s.ToGraphDef(&item.graph));
     item.fetch = {"addn", "matmul3", "matmul4"};
@@ -119,7 +127,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
     Status status = optimizer.Optimize(nullptr, item, &output);
     TF_EXPECT_OK(status);
 
-    EXPECT_EQ(20, output.node_size());
+    EXPECT_EQ(27, output.node_size());
     for (int i = 0; i < output.node_size(); ++i) {
       const NodeDef& node = output.node(i);
       const string& name = node.name();
@@ -142,11 +150,19 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
       } else if (name == "mul5") {
         EXPECT_EQ("Const", node.op());
         EXPECT_EQ("^x", node.input(0));
-        EXPECT_EQ("^zeros_broadcast", node.input(1));
+        EXPECT_EQ("^zeros_1d", node.input(1));
       } else if (name == "mul6") {
         EXPECT_EQ("Const", node.op());
-        EXPECT_EQ("^zeros_broadcast", node.input(0));
+        EXPECT_EQ("^zeros_1d", node.input(0));
         EXPECT_EQ("^y", node.input(1));
+      } else if (name == "div1") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("x", node.input(0));
+        EXPECT_EQ("^ones", node.input(1));
+      } else if (name == "div2") {
+        EXPECT_EQ("Reciprocal", node.op());
+        EXPECT_EQ("y", node.input(0));
+        EXPECT_EQ("^ones", node.input(1));
       } else if (name == "matmul1") {
         EXPECT_EQ("Const", node.op());
         EXPECT_EQ("^x", node.input(0));
@@ -183,6 +199,24 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
         EXPECT_EQ("Identity", node.op());
         EXPECT_EQ("y", node.input(0));
         EXPECT_EQ("^zeros", node.input(1));
+      } else if (name == "bias_add1") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("x", node.input(0));
+        EXPECT_EQ("^zeros_1d", node.input(1));
+      } else if (name == "bias_add2") {
+        // We don't eliminate this one, because it requires broadcasting.
+        EXPECT_EQ("BiasAdd", node.op());
+        EXPECT_EQ("zeros", node.input(0));
+        EXPECT_EQ("bias", node.input(1));
+      } else if (name == "sub1") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("x", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
+      } else if (name == "sub2") {
+        // We don't handle this case yet.
+        EXPECT_EQ("Sub", node.op());
+        EXPECT_EQ("zeros", node.input(0));
+        EXPECT_EQ("y", node.input(1));
       }
       const std::set<string> square_zero_const{"mul1", "mul2",    "mul5",
                                                "mul6", "matmul1", "matmul2"};
-- 
GitLab


From 76b22baabc9b9dfed1df6f43ff74ff595dbb15d0 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 8 Dec 2017 14:59:28 -0800
Subject: [PATCH 0819/1225] Share constant creation code.

PiperOrigin-RevId: 178432213
---
 .../grappler/optimizers/layout_optimizer.cc   | 46 +++++++------------
 .../optimizers/layout_optimizer_test.cc       | 36 +++++++--------
 2 files changed, 35 insertions(+), 47 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 86e2610c00..f675f64cfc 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -354,7 +354,16 @@ class NodeProcessor : public GraphProcessor {
     if (!success) {
       LOG(ERROR) << "Failed to parse TensorProto.";
     }
-    if (tensor.dims() == 1) {
+    if (tensor.dims() == 0) {
+      int value = tensor.scalar<int>()();
+      value = (value >= 0) ? value : value + 4;
+      if (value == 1 || value == 2) {
+        value = value + 1;
+      } else if (value == 3) {
+        value = 1;
+      }
+      tensor.scalar<int>()() = value;
+    } else if (tensor.dims() == 1) {
       if (tensor.flat<int>().size() == 4) {
         int c = tensor.flat<int>()(3);
         tensor.flat<int>()(3) = tensor.flat<int>()(2);
@@ -381,8 +390,12 @@ class NodeProcessor : public GraphProcessor {
           error::INVALID_ARGUMENT,
           strings::StrCat("Unsupported dimension size: ", tensor.dims()));
     }
-    tensor.AsProtoTensorContent(
-        node->mutable_attr()->at({"value"}).mutable_tensor());
+    if (tensor.dims() == 0) {
+      tensor.AsProtoField(node->mutable_attr()->at({"value"}).mutable_tensor());
+    } else {
+      tensor.AsProtoTensorContent(
+          node->mutable_attr()->at({"value"}).mutable_tensor());
+    }
     return Status::OK();
   }
 
@@ -976,7 +989,7 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   Status CustomizedProcessing() override {
     auto dim_node = node_map_->GetNode(node_->input(axis_node_pos_));
     if (IsConstant(*dim_node)) {
-      AddNodeDimConst();
+      TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(axis_node_pos_));
     } else {
       AddNodeDataFormatDimMap();
     }
@@ -986,31 +999,6 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   int axis_node_pos_;
 
  private:
-  void AddNodeDimConst() {
-    auto dim_node = node_map_->GetNode(node_->input(axis_node_pos_));
-    auto tensor = dim_node->attr().at({"value"}).tensor();
-    int value = tensor.int_val(0);
-    value = (value >= 0) ? value : value + 4;
-    if (value == 1 || value == 2) {
-      value = value + 1;
-    } else if (value == 3) {
-      value = 1;
-    }
-    // We created a copy of the node, so that we don't modify the original node,
-    // which might be used elsewhere. Note that this copy also copies the
-    // control dependency input in the case this node is inside a loop,
-    // to ensure added_node is in the same frame with node_.
-    NodeDef* added_node = graph_->add_node();
-    *added_node = *dim_node;
-    added_node->set_name(strings::StrCat(kDim, "-", node_->name()));
-    node_map_->AddNode(added_node->name(), added_node);
-    added_node->mutable_attr()->at({"value"}).mutable_tensor()->set_int_val(
-        0, value);
-    node_map_->RemoveOutput(node_->input(axis_node_pos_), node_->name());
-    *node_->mutable_input(axis_node_pos_) = added_node->name();
-    node_map_->AddOutput(added_node->name(), node_->name());
-  }
-
   void AddNodeDataFormatDimMap() {
     NodeDef* added_node = graph_->add_node();
     added_node->set_name(strings::StrCat(kDim, "-", node_->name()));
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 0ac177d9f7..ef065b22c1 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -445,9 +445,9 @@ TEST_F(LayoutOptimizerTest, SplitDimC) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDim-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizer-split-c");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerDim-split");
+  auto split_const = node_map.GetNode("LayoutOptimizer-split-c");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 1);
 }
@@ -465,9 +465,9 @@ TEST_F(LayoutOptimizerTest, SplitDimH) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDim-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizer-split-c");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerDim-split");
+  auto split_const = node_map.GetNode("LayoutOptimizer-split-c");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 2);
 }
@@ -485,9 +485,9 @@ TEST_F(LayoutOptimizerTest, SplitDimW) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDim-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizer-split-c");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerDim-split");
+  auto split_const = node_map.GetNode("LayoutOptimizer-split-c");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 3);
 }
@@ -505,9 +505,9 @@ TEST_F(LayoutOptimizerTest, SplitDimN) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDim-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizer-split-c");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerDim-split");
+  auto split_const = node_map.GetNode("LayoutOptimizer-split-c");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 0);
 }
@@ -551,8 +551,8 @@ TEST_F(LayoutOptimizerTest, SplitSamePortToMultipleInputsOfSameNode) {
   EXPECT_EQ(concat_node->input(0), "split:1");
   EXPECT_EQ(concat_node->input(1), "split:1");
   EXPECT_EQ(concat_node->input(2), "split:1");
-  EXPECT_EQ(concat_node->input(3), "LayoutOptimizerDim-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
+  EXPECT_EQ(concat_node->input(3), "LayoutOptimizer-concat-axis");
+  auto concat_dim = node_map.GetNode("LayoutOptimizer-concat-axis");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1);
 }
 
@@ -572,8 +572,8 @@ TEST_F(LayoutOptimizerTest, ConcatDimH) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDim-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizer-concat-axis");
+  auto concat_dim = node_map.GetNode("LayoutOptimizer-concat-axis");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 2);
 }
 
@@ -616,8 +616,8 @@ TEST_F(LayoutOptimizerTest, ConcatDimW) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDim-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizer-concat-axis");
+  auto concat_dim = node_map.GetNode("LayoutOptimizer-concat-axis");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 3);
 }
 
@@ -637,8 +637,8 @@ TEST_F(LayoutOptimizerTest, ConcatDimN) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDim-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizer-concat-axis");
+  auto concat_dim = node_map.GetNode("LayoutOptimizer-concat-axis");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 0);
 }
 
@@ -658,8 +658,8 @@ TEST_F(LayoutOptimizerTest, ConcatDimC) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDim-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizer-concat-axis");
+  auto concat_dim = node_map.GetNode("LayoutOptimizer-concat-axis");
   EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1);
 }
 
-- 
GitLab


From b1c7d177e2aa9a4e3989caf7cfb21a5591c3832f Mon Sep 17 00:00:00 2001
From: Guangda Lai <laigd@google.com>
Date: Fri, 8 Dec 2017 15:34:01 -0800
Subject: [PATCH 0820/1225] Add a test to simulate the environment where GPU
 binary running on non-GPU device, and make sure that device initialization
 still works.

PiperOrigin-RevId: 178437003
---
 tensorflow/core/BUILD                         | 14 ++++-
 .../gpu/gpu_device_on_non_gpu_machine_test.cc | 54 +++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/core/common_runtime/gpu/gpu_device_on_non_gpu_machine_test.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 6365791512..a280444121 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2178,6 +2178,7 @@ GPU_RUNTIME_HEADERS = [
     "common_runtime/gpu/gpu_util.h",
     "common_runtime/gpu/pool_allocator.h",
     "common_runtime/gpu/process_state.h",
+    "common_runtime/gpu_device_context.h",
 ]
 
 tf_cuda_library(
@@ -2194,7 +2195,6 @@ tf_cuda_library(
         "common_runtime/gpu/gpu_util_platform_specific.cc",
         "common_runtime/gpu/pool_allocator.cc",
         "common_runtime/gpu/process_state.cc",
-        "common_runtime/gpu_device_context.h",
     ],
     hdrs = GPU_RUNTIME_HEADERS,
     copts = tf_copts(),
@@ -2783,6 +2783,18 @@ tf_cc_test_mkl(
     ]),
 )
 
+tf_cc_tests_gpu(
+    name = "gpu_device_on_non_gpu_machine_test",
+    size = "small",
+    srcs = ["common_runtime/gpu/gpu_device_on_non_gpu_machine_test.cc"],
+    linkstatic = tf_kernel_tests_linkstatic(),
+    deps = [
+        ":gpu_headers_lib",
+        ":gpu_runtime",
+        ":test",
+    ],
+)
+
 tf_cc_tests_gpu(
     name = "gpu_related_tests",
     size = "small",
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_on_non_gpu_machine_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_on_non_gpu_machine_test.cc
new file mode 100644
index 0000000000..75be6d60b8
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_on_non_gpu_machine_test.cc
@@ -0,0 +1,54 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/test.h"
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/common_runtime/gpu/gpu_device.h"
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/platform.h"
+#include "tensorflow/core/public/session_options.h"
+
+namespace tensorflow {
+namespace {
+
+TEST(GPUDeviceOnNonGPUMachineTest, CreateGPUDevicesOnNonGPUMachine) {
+  SessionOptions opts;
+  std::vector<tensorflow::Device*> devices;
+  TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, "/job:localhost/replica:0/task:0", &devices));
+  EXPECT_TRUE(devices.empty());
+}
+
+}  // namespace
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+
+int main(int argc, char** argv) {
+#if GOOGLE_CUDA
+  // Sets CUDA_VISIBLE_DEVICES to empty string to simulate non-gpu environment.
+  setenv("CUDA_VISIBLE_DEVICES", "", 1);
+#endif  // GOOGLE_CUDA
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
-- 
GitLab


From 28807c5666c9f574ef415fed7b18b99ebed41ecc Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Fri, 8 Dec 2017 15:45:25 -0800
Subject: [PATCH 0821/1225] Add Operation._remove_all_control_inputs and use in
 ControlFlowContext.

This allows while loop gradients to work with the C API. This change
also enables the C API for control flow tests.

PiperOrigin-RevId: 178438424
---
 tensorflow/c/python_api.cc                    | 12 ++++++++
 tensorflow/c/python_api.h                     |  2 ++
 tensorflow/python/framework/ops.py            |  7 +++++
 tensorflow/python/framework/ops_test.py       | 30 ++++++++++++++++---
 .../kernel_tests/control_flow_ops_py_test.py  |  7 +++++
 tensorflow/python/ops/control_flow_ops.py     |  2 +-
 .../python/ops/control_flow_ops_test.py       |  7 +++++
 tensorflow/python/ops/gradients_test.py       |  4 +--
 8 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc
index 37629a74ba..6e37cdb5f4 100644
--- a/tensorflow/c/python_api.cc
+++ b/tensorflow/c/python_api.cc
@@ -87,4 +87,16 @@ void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
   }
 }
 
+void RemoveAllControlInputs(TF_Graph* graph, TF_Operation* op) {
+  mutex_lock l(graph->mu);
+  std::vector<const Edge*> control_edges;
+  for (const Edge* edge : op->node.in_edges()) {
+    if (!edge->IsControlEdge()) continue;
+    control_edges.push_back(edge);
+  }
+  for (const Edge* edge : control_edges) {
+    graph->graph.RemoveControlEdge(edge);
+  }
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h
index f54585b0a1..b51ef2b531 100644
--- a/tensorflow/c/python_api.h
+++ b/tensorflow/c/python_api.h
@@ -35,6 +35,8 @@ void SetRequestedDevice(TF_Graph* graph, TF_Operation* op, const char* device);
 void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
                 TF_Status* status);
 
+void RemoveAllControlInputs(TF_Graph* graph, TF_Operation* op);
+
 }  // namespace tensorflow
 
 #endif  // THIRD_PARTY_TENSORFLOW_C_PYTHON_API_H_
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 22c2cc7bb1..6969924c70 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -1945,6 +1945,13 @@ class Operation(object):
     else:
       self._add_control_inputs([op])
 
+  def _remove_all_control_inputs(self):
+    """Removes any control inputs to this operation."""
+    if self._c_op:
+      c_api.RemoveAllControlInputs(self._graph._c_graph, self._c_op)  # pylint: disable=protected-access
+    else:
+      del self.control_inputs[:]
+
   # Methods below are used when building the NodeDef and Graph proto.
   def _recompute_node_def(self):
     # TODO(skyewm): remove this function when we switch to C API
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index ae51125b39..a69c0a1f82 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -485,6 +485,30 @@ class OperationTest(test_util.TensorFlowTestCase):
     z._add_control_inputs([x, y, y])  # pylint: disable=protected-access
     self.assertEqual(z.control_inputs, [x, y])
 
+  def testRemoveAllControlInputs(self):
+    a = constant_op.constant(1)
+    with ops.control_dependencies([a]):
+      b = constant_op.constant(2)
+    c = constant_op.constant(3)
+    d = constant_op.constant(4)
+    e = constant_op.constant(5)
+    with ops.control_dependencies([a, c]):
+      f = d + e
+
+    self.assertEqual(a.op.control_inputs, [])
+    self.assertEqual(b.op.control_inputs, [a.op])
+    self.assertEqual(f.op.control_inputs, [a.op, c.op])
+
+    a.op._remove_all_control_inputs()  # pylint: disable=protected-access
+    self.assertEqual(a.op.control_inputs, [])
+
+    b.op._remove_all_control_inputs()  # pylint: disable=protected-access
+    self.assertEqual(b.op.control_inputs, [])
+
+    f.op._remove_all_control_inputs()  # pylint: disable=protected-access
+    self.assertEqual(f.op.control_inputs, [])
+    self.assertEqual(list(f.op.inputs), [d, e])
+
   def testControlInputCycle(self):
     # Non-C API path has a different error message
     if not ops._USE_C_API: return
@@ -747,10 +771,8 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
     g = ops.Graph()
     with g.as_default():
       if ops._USE_C_API:
-        c_op = ops._create_c_op(
-            g, ops._NodeDef("IntOutput", "myop"), [], [])
-        c_op2 = ops._create_c_op(
-            g, ops._NodeDef("IntOutput", "myop_1"), [], [])
+        c_op = ops._create_c_op(g, ops._NodeDef("IntOutput", "myop"), [], [])
+        c_op2 = ops._create_c_op(g, ops._NodeDef("IntOutput", "myop_1"), [], [])
         op = g._create_op_from_tf_operation(c_op)
         op2 = g._create_op_from_tf_operation(c_op2)
       else:
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 3a61d76f58..e1d3f9a7d4 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -38,6 +38,7 @@ from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import data_flow_ops
@@ -131,6 +132,7 @@ def isum(s, maximum_iterations=None):
   return r_s
 
 
+@test_util.with_c_api
 class ControlFlowTest(test.TestCase):
 
   def testRefIdentity(self):
@@ -2648,6 +2650,7 @@ class ControlFlowTest(test.TestCase):
           1)
 
 
+@test_util.with_c_api
 class ControlFlowContextCheckTest(test.TestCase):
 
   def _getWhileTensor(self):
@@ -2764,6 +2767,7 @@ class ControlFlowContextCheckTest(test.TestCase):
                             lambda: constant_op.constant(0))
 
 
+@test_util.with_c_api
 class TupleTest(test.TestCase):
 
   def testTensors(self):
@@ -2849,6 +2853,7 @@ class TupleTest(test.TestCase):
       self.assertEquals(1, var.eval())
 
 
+@test_util.with_c_api
 class AssertTest(test.TestCase):
 
   def testGuardedAssertDoesNotCopyWhenTrue(self):
@@ -2886,6 +2891,7 @@ class AssertTest(test.TestCase):
       self.assertEqual([], guarded_memcpy_nodestat_names)
 
 
+@test_util.with_c_api
 class WhileOpBenchmark(test.Benchmark):
   """Evaluate the performance of while_loop op."""
 
@@ -2999,6 +3005,7 @@ class WhileOpBenchmark(test.Benchmark):
         name="unroll_same_device", iters=iters, wall_time=duration)
 
 
+@test_util.with_c_api
 class EagerTest(test.TestCase):
 
   def testCond(self):
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 12ee2e627b..8e8e7d4f8c 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -1501,7 +1501,7 @@ class ControlFlowContext(object):
         if ctxt is not None and ctxt.GetWhileContext() == while_ctxt:
           internal_control_inputs.append(x)
     if len(internal_control_inputs) != len(op.control_inputs):
-      del op.control_inputs[:]
+      op._remove_all_control_inputs()
       op._add_control_inputs(internal_control_inputs)
     return internal_control_inputs
   # pylint: enable=protected-access
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index 923b26f958..cd3c02f562 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -51,6 +51,7 @@ TestTuple = collections.namedtuple("TestTuple", "a b")
 SingletonTestTuple = collections.namedtuple("SingletonTestTuple", "a")
 
 
+@test_util.with_c_api
 class GroupTestCase(test_util.TensorFlowTestCase):
 
   def _StripNode(self, nd):
@@ -132,6 +133,7 @@ class GroupTestCase(test_util.TensorFlowTestCase):
         control_flow_ops.group(1, 2)
 
 
+@test_util.with_c_api
 class ShapeTestCase(test_util.TensorFlowTestCase):
 
   def testShape(self):
@@ -143,6 +145,7 @@ class ShapeTestCase(test_util.TensorFlowTestCase):
                             [constant_op.constant(1.0)], tensor).get_shape())
 
 
+@test_util.with_c_api
 class WithDependenciesTestCase(test_util.TensorFlowTestCase):
 
   def testTupleDependencies(self):
@@ -174,6 +177,7 @@ class WithDependenciesTestCase(test_util.TensorFlowTestCase):
         self.assertEquals(1, counter.eval())
 
 
+@test_util.with_c_api
 class SwitchTestCase(test_util.TensorFlowTestCase):
 
   def testIndexedSlicesWithDenseShape(self):
@@ -431,6 +435,7 @@ class CondTest(test_util.TensorFlowTestCase):
           control_flow_ops.cond(True, lambda: x, lambda: x, fn2=lambda: x)
 
 
+@test_util.with_c_api
 class ContextTest(test_util.TensorFlowTestCase):
 
   def testCondContext(self):
@@ -516,6 +521,7 @@ def _RawNestedShape(nested_shape):
 
 
 # TODO(yori): Add tests for indexed slices.
+@test_util.with_c_api
 class DataTypesTest(test_util.TensorFlowTestCase):
 
   def assertAllEqualNested(self, a, b):
@@ -846,6 +852,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     self.assertEqual(matrix.get_shape(), tensor_shape.TensorShape([2, 2]))
 
 
+@test_util.with_c_api
 class CaseTest(test_util.TensorFlowTestCase):
 
   def testCase_withDefault(self):
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index dacc2947fe..1211b2e923 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -573,9 +573,7 @@ class HessianVectorProductTest(test_util.TensorFlowTestCase):
       self.assertAllClose(hess_v_value, hess_v_actual)
 
 
-# TODO(skyewm): reenable C API once
-# ControlFlowContext._RemoveExternalControlEdges works with C API enabled
-# @test_util.with_c_api
+@test_util.with_c_api
 class HessianTest(test_util.TensorFlowTestCase):
 
   def testHessian1D(self):
-- 
GitLab


From 54c2584a20c56ecd3eefd2b781ccbe2a49db87b3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 15:52:40 -0800
Subject: [PATCH 0822/1225] Add a "Snapshot" kernel that always makes a copy of
 its input. This should be used in place of unreadable hacks like "x = y + 0"
 in places that need to force a copy of a tensor to be made.

PiperOrigin-RevId: 178439351
---
 .../api_def/base_api/api_def_Snapshot.pbtxt   |  4 ++
 .../api_def/python_api/api_def_Snapshot.pbtxt |  4 ++
 tensorflow/core/kernels/BUILD                 |  7 +++
 tensorflow/core/kernels/snapshot_op.cc        | 46 +++++++++++++++++
 tensorflow/core/kernels/snapshot_op.h         | 49 +++++++++++++++++++
 tensorflow/core/kernels/snapshot_op_gpu.cu.cc | 37 ++++++++++++++
 tensorflow/core/ops/array_ops.cc              | 14 ++++++
 .../python/kernel_tests/array_ops_test.py     | 11 +++++
 tensorflow/python/ops/hidden_ops.txt          |  1 +
 9 files changed, 173 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_Snapshot.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_Snapshot.pbtxt
 create mode 100644 tensorflow/core/kernels/snapshot_op.cc
 create mode 100644 tensorflow/core/kernels/snapshot_op.h
 create mode 100644 tensorflow/core/kernels/snapshot_op_gpu.cu.cc

diff --git a/tensorflow/core/api_def/base_api/api_def_Snapshot.pbtxt b/tensorflow/core/api_def/base_api/api_def_Snapshot.pbtxt
new file mode 100644
index 0000000000..49b7f5798c
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_Snapshot.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "Snapshot"
+  summary: "Returns a copy of the input tensor."
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_Snapshot.pbtxt b/tensorflow/core/api_def/python_api/api_def_Snapshot.pbtxt
new file mode 100644
index 0000000000..ea9ccee397
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_Snapshot.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "Snapshot"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 0b5b4bfd3d..94dbd53607 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -605,6 +605,7 @@ cc_library(
         ":reverse_sequence_op",
         ":shape_ops",
         ":slice_op",
+        ":snapshot_op",
         ":split_op",
         ":split_v_op",
         ":strided_slice_op",
@@ -801,6 +802,12 @@ tf_kernel_library(
     deps = ARRAY_DEPS + [":strided_slice_op"],
 )
 
+tf_kernel_library(
+    name = "snapshot_op",
+    prefix = "snapshot_op",
+    deps = ARRAY_DEPS,
+)
+
 tf_kernel_library(
     name = "split_op",
     gpu_srcs = ["cuda_device_array.h"],
diff --git a/tensorflow/core/kernels/snapshot_op.cc b/tensorflow/core/kernels/snapshot_op.cc
new file mode 100644
index 0000000000..50157d5d48
--- /dev/null
+++ b/tensorflow/core/kernels/snapshot_op.cc
@@ -0,0 +1,46 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/array_ops.cc.
+#include "tensorflow/core/kernels/snapshot_op.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+#define REGISTER_KERNEL(TYPE)                                        \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("Snapshot").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
+      SnapshotOp<CPUDevice, TYPE>);
+
+TF_CALL_POD_TYPES(REGISTER_KERNEL);
+#undef REGISTER_KERNEL
+
+#if TENSORFLOW_USE_SYCL
+typedef Eigen::SyclDevice SyclDevice;
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("Snapshot").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
+      SnapshotOp<SyclDevice, TYPE>);
+
+TF_CALL_POD_TYPES(REGISTER_SYCL_KERNEL);
+
+#undef REGISTER_SYCL_KERNEL
+#endif  // TENSORFLOW_USE_SYCL
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/snapshot_op.h b/tensorflow/core/kernels/snapshot_op.h
new file mode 100644
index 0000000000..2c79893b49
--- /dev/null
+++ b/tensorflow/core/kernels/snapshot_op.h
@@ -0,0 +1,49 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_KERNELS_SNAPSHOT_OP_H_
+#define TENSORFLOW_KERNELS_SNAPSHOT_OP_H_
+
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif
+
+#define EIGEN_USE_THREADS
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+
+template <typename Device, typename Scalar>
+class SnapshotOp : public OpKernel {
+ public:
+  explicit SnapshotOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &output));
+    const Device& device = context->eigen_device<Device>();
+    device.memcpy(output->template flat<Scalar>().data(),
+                  input.template flat<Scalar>().data(),
+                  input.NumElements() * sizeof(Scalar));
+  }
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_KERNELS_SNAPSHOT_OP_H_
diff --git a/tensorflow/core/kernels/snapshot_op_gpu.cu.cc b/tensorflow/core/kernels/snapshot_op_gpu.cu.cc
new file mode 100644
index 0000000000..52070be838
--- /dev/null
+++ b/tensorflow/core/kernels/snapshot_op_gpu.cu.cc
@@ -0,0 +1,37 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#if GOOGLE_CUDA
+
+// See docs in ../ops/array_ops.cc.
+#include "tensorflow/core/kernels/snapshot_op.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+#define REGISTER_KERNEL(TYPE)                                        \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("Snapshot").Device(DEVICE_GPU).TypeConstraint<TYPE>("T"), \
+      SnapshotOp<GPUDevice, TYPE>);
+
+TF_CALL_POD_TYPES(REGISTER_KERNEL);
+#undef REGISTER_KERNEL
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 1fbd123515..5a31f433ce 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -1722,6 +1722,20 @@ REGISTER_OP("Identity")
 Return a tensor with the same shape and contents as the input tensor or value.
 )Doc");
 
+REGISTER_OP("Snapshot")
+    .Input("input: T")
+    .Output("output: T")
+    .Attr("T: type")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->input(0));
+      auto* handle_data = c->input_handle_shapes_and_types(0);
+      if (handle_data != nullptr) {
+        c->set_output_handle_shapes_and_types(0, *handle_data);
+      }
+      return Status::OK();
+    })
+    .Doc(R"Doc(Returns a copy of the input tensor.)Doc");
+
 #ifdef INTEL_MKL
 REGISTER_OP("_MklIdentity")
     .Input("input: T")
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 6d649b1cac..17492e9255 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
@@ -1124,5 +1125,15 @@ class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
         guarantee_a.eval()
 
 
+class SnapshotOpTest(test_util.TensorFlowTestCase):
+
+  def testInvertPermutation(self):
+    for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
+      with self.test_session(use_gpu=True):
+        x = constant_op.constant([0, 1, 2, 3], dtype=dtype)
+        y = gen_array_ops._snapshot(x)
+        self.assertAllEqual(y.eval(), [0, 1, 2, 3])
+
+
 if __name__ == "__main__":
   test_lib.main()
diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt
index b3f7c26695..ec0890c016 100644
--- a/tensorflow/python/ops/hidden_ops.txt
+++ b/tensorflow/python/ops/hidden_ops.txt
@@ -21,6 +21,7 @@ ParallelConcat
 Placeholder
 RefIdentity
 Reverse
+Snapshot
 SpaceToBatch
 Split
 SplitV
-- 
GitLab


From e70078e3baae89fb497cfb2e8c56eee52833cd31 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Fri, 8 Dec 2017 16:03:32 -0800
Subject: [PATCH 0823/1225] Add default values of attributes that might have
 been stripped by optimizations.

PiperOrigin-RevId: 178440738
---
 tensorflow/core/grappler/grappler_item_builder.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index e069c0ddfd..ca3c1a6667 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -135,7 +135,10 @@ Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def,
   optimizer.Optimize(flr, env, devices[0], &graphptr, /*shape_map=*/nullptr);
   graphptr->ToGraphDef(output_graph_def);
 
-  return Status::OK();
+  // The default values of attributes might have been stripped by the optimizer.
+  // Add them back.
+  return AddDefaultAttrsToGraphDef(output_graph_def, *graphptr->op_registry(),
+                                   0);
 }
 
 // Applies the same graph pruning logic to the graph as Session.Run in TF.
@@ -446,7 +449,7 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
     new_item->save_restore_loc_tensor = saver.filename_tensor_name();
   }
 
-  // Populate default attrs to the NodeDefs in the GraphDef.
+  // Instantiate all the missing attributes with their default values.
   Status attr_status =
       AddDefaultAttrsToGraphDef(&new_item->graph, *OpRegistry::Global(), 0);
   if (!attr_status.ok()) {
-- 
GitLab


From d3e35f9d8a168d2fdb73b6fa9d7f4b4114cc508d Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 8 Dec 2017 16:14:39 -0800
Subject: [PATCH 0824/1225] Also install libssl-dev to make pip/py3.6 work
 properly. (#15226)

---
 tensorflow/tools/ci_build/install/install_deb_packages.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/tools/ci_build/install/install_deb_packages.sh b/tensorflow/tools/ci_build/install/install_deb_packages.sh
index 4ab307c925..9640810533 100755
--- a/tensorflow/tools/ci_build/install/install_deb_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_deb_packages.sh
@@ -48,6 +48,7 @@ apt-get install -y --no-install-recommends \
     git \
     libcurl4-openssl-dev \
     libtool \
+    libssl-dev \
     mlocate \
     openjdk-8-jdk \
     openjdk-8-jre-headless \
-- 
GitLab


From ddfd6253fe0870779abc78be52c872d86b03f577 Mon Sep 17 00:00:00 2001
From: Max Galkin <maxgalkin@google.com>
Date: Fri, 8 Dec 2017 16:13:07 -0800
Subject: [PATCH 0825/1225] Adjust verbosity of per-node logging in
 graph_properties.

PiperOrigin-RevId: 178441933
---
 tensorflow/core/grappler/costs/graph_properties.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index f188c10615..0453ceb6d1 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -1033,7 +1033,7 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) {
   }
 
   for (const Node* const node : graph.nodes()) {
-    VLOG(1) << "<Node> " << node->name();
+    VLOG(3) << "Filling in graph properties for node: " << node->name();
     auto ctx = shape_refiner.GetContext(node);
     if (!ctx) {
       continue;
-- 
GitLab


From a0c21217fcf2993c5625a726c62a04b749afcddf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 16:30:02 -0800
Subject: [PATCH 0826/1225] Make profiler memory profiling work with
 tf.while_loop

PiperOrigin-RevId: 178443834
---
 .../core/profiler/internal/tfprof_node.cc     |  68 ++++++----
 .../core/profiler/internal/tfprof_node.h      | 123 +++++++++++-------
 .../core/profiler/internal/tfprof_timeline.cc |  16 ++-
 .../profiler/internal/tfprof_timeline_test.cc |   2 +-
 tensorflow/core/profiler/tfprof_log.proto     |  53 ++++----
 .../python/profiler/model_analyzer_test.py    |  40 ++++++
 6 files changed, 196 insertions(+), 106 deletions(-)

diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc
index 5cd1050bcc..2945c9510f 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node.cc
@@ -80,10 +80,15 @@ void ExecStep::AddTimeStats(const string& dev, const NodeExecStats& step_stat) {
 
 void ExecStep::AddMemoryStats(const string& dev,
                               const NodeExecStats& step_stat) {
-  if (exec_.memory_intialized()) {
+  ExecMemory exec_mem;
+  if (step_stat.all_start_micros() > 0) {
+    exec_mem.set_memory_micros(step_stat.all_start_micros() +
+                               step_stat.op_end_rel_micros());
+  } else {
+    fprintf(stderr, "%s has no start time, skipping\n",
+            step_stat.node_name().c_str());
     return;
   }
-  exec_.set_memory_intialized(true);
 
   int accelerator_allocator_cnt = 0;
   for (const auto& mem : step_stat.memory()) {
@@ -93,14 +98,12 @@ void ExecStep::AddMemoryStats(const string& dev,
       continue;
     }
     ++accelerator_allocator_cnt;
-    exec_.set_allocator_bytes_in_use(
-        std::max(static_cast<int64>(exec_.allocator_bytes_in_use()),
+    exec_mem.set_allocator_bytes_in_use(
+        std::max(static_cast<int64>(exec_mem.allocator_bytes_in_use()),
                  static_cast<int64>(mem.allocator_bytes_in_use())));
-    Allocation allocation;
     for (const auto& alloc : mem.allocation_records()) {
-      allocation.add_allocation_records()->MergeFrom(alloc);
+      allocations_.push_back(alloc);
     }
-    allocations_.push_back(allocation);
   }
   if (accelerator_allocator_cnt > 1) {
     fprintf(stderr, "found %d gpu allocator for 1 node\n",
@@ -121,22 +124,26 @@ void ExecStep::AddMemoryStats(const string& dev,
       uint64 output_ptr =
           output.tensor_description().allocation_description().ptr();
       total_output_bytes += output_bytes;
-      output_memory_[output.slot()] = std::make_pair(output_bytes, output_ptr);
+
+      auto& mem = (*exec_mem.mutable_output_memory())[output.slot()];
+      mem.set_ptr(output_ptr);
+      mem.set_bytes(output_bytes);
     }
   }
-  exec_.set_output_bytes(total_output_bytes);
+  exec_mem.set_output_bytes(total_output_bytes);
 
   if (step_stat.has_memory_stats()) {
-    exec_.set_host_temp_bytes(exec_.host_temp_bytes() +
-                              step_stat.memory_stats().host_temp_memory_size());
-    exec_.set_host_persistent_bytes(
-        exec_.host_persistent_bytes() +
+    exec_mem.set_host_temp_bytes(
+        exec_mem.host_temp_bytes() +
+        step_stat.memory_stats().host_temp_memory_size());
+    exec_mem.set_host_persistent_bytes(
+        exec_mem.host_persistent_bytes() +
         step_stat.memory_stats().host_persistent_memory_size());
-    exec_.set_accelerator_temp_bytes(
-        exec_.accelerator_temp_bytes() +
+    exec_mem.set_accelerator_temp_bytes(
+        exec_mem.accelerator_temp_bytes() +
         step_stat.memory_stats().device_temp_memory_size());
-    exec_.set_accelerator_persistent_bytes(
-        exec_.accelerator_persistent_bytes() +
+    exec_mem.set_accelerator_persistent_bytes(
+        exec_mem.accelerator_persistent_bytes() +
         step_stat.memory_stats().device_persistent_memory_size());
   }
 
@@ -166,18 +173,20 @@ void ExecStep::AddMemoryStats(const string& dev,
     requested_bytes += mem.total_bytes();
     peak_bytes += mem.peak_bytes();
   }
-  residual_bytes +=
-      exec_.host_persistent_bytes() + exec_.accelerator_persistent_bytes();
-  requested_bytes += exec_.host_persistent_bytes() +
-                     exec_.accelerator_persistent_bytes() +
-                     exec_.host_temp_bytes() + exec_.accelerator_temp_bytes();
-  peak_bytes += exec_.host_persistent_bytes() +
-                exec_.accelerator_persistent_bytes() + exec_.host_temp_bytes() +
-                exec_.accelerator_temp_bytes();
+  residual_bytes += exec_mem.host_persistent_bytes() +
+                    exec_mem.accelerator_persistent_bytes();
+  requested_bytes += exec_mem.host_persistent_bytes() +
+                     exec_mem.accelerator_persistent_bytes() +
+                     exec_mem.host_temp_bytes() +
+                     exec_mem.accelerator_temp_bytes();
+  peak_bytes += exec_mem.host_persistent_bytes() +
+                exec_mem.accelerator_persistent_bytes() +
+                exec_mem.host_temp_bytes() + exec_mem.accelerator_temp_bytes();
 
-  exec_.set_requested_bytes(requested_bytes);
-  exec_.set_residual_bytes(residual_bytes);
-  exec_.set_peak_bytes(peak_bytes);
+  exec_mem.set_requested_bytes(requested_bytes);
+  exec_mem.set_residual_bytes(residual_bytes);
+  exec_mem.set_peak_bytes(peak_bytes);
+  memory_execs_.emplace_back(exec_mem);
 }
 
 void TFGraphNode::AddStepStat(int64 step, const string& device,
@@ -279,5 +288,8 @@ bool IsPlacedOnAccelerator(const string& device) {
   return device.find("gpu") != device.npos ||
          device.find("sycl") != device.npos;
 }
+bool IsPlacedOnCPU(const string& device) {
+  return device.find("cpu") != device.npos;
+}
 }  // namespace tfprof
 }  // namespace tensorflow
diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h
index 77c14cb792..5bc2ea3c42 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.h
+++ b/tensorflow/core/profiler/internal/tfprof_node.h
@@ -109,7 +109,6 @@ class ExecStep {
       const {
     return cpu_execs_;
   }
-
   int64 all_start_micros() const { return exec_.all_start_micros(); }
   int64 latest_end_micros() const { return exec_.latest_end_micros(); }
   int64 lastest_schedule_end_micros() const {
@@ -121,27 +120,73 @@ class ExecStep {
     }
     return ret;
   }
-
-  int64 requested_bytes() const { return exec_.requested_bytes(); }
-  int64 peak_bytes() const { return exec_.peak_bytes(); }
-  int64 residual_bytes() const { return exec_.residual_bytes(); }
-  int64 output_bytes() const { return exec_.output_bytes(); }
+  int64 requested_bytes() const {
+    int64 requested_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      requested_bytes += exec.requested_bytes();
+    }
+    return requested_bytes;
+  }
+  int64 peak_bytes() const {
+    int64 peak_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      peak_bytes += exec.peak_bytes();
+    }
+    return peak_bytes;
+  }
+  int64 residual_bytes() const {
+    int64 residual_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      residual_bytes += exec.residual_bytes();
+    }
+    return residual_bytes;
+  }
+  int64 output_bytes() const {
+    int64 output_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      output_bytes += exec.output_bytes();
+    }
+    return output_bytes;
+  }
   int64 accelerator_temp_bytes() const {
-    return exec_.accelerator_temp_bytes();
+    int64 accelerator_temp_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      accelerator_temp_bytes += exec.accelerator_temp_bytes();
+    }
+    return accelerator_temp_bytes;
+  }
+  int64 host_temp_bytes() const {
+    int64 host_temp_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      host_temp_bytes += exec.host_temp_bytes();
+    }
+    return host_temp_bytes;
   }
-  int64 host_temp_bytes() const { return exec_.host_temp_bytes(); }
   int64 accelerator_persistent_bytes() const {
-    return exec_.accelerator_persistent_bytes();
+    int64 accelerator_persistent_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      accelerator_persistent_bytes += exec.accelerator_persistent_bytes();
+    }
+    return accelerator_persistent_bytes;
   }
-  int64 host_persistent_bytes() const { return exec_.host_persistent_bytes(); }
-  const std::map<int32, std::pair<int64, uint64>>& output_memory() const {
-    return output_memory_;
+  int64 host_persistent_bytes() const {
+    int64 host_persistent_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      host_persistent_bytes += exec.host_persistent_bytes();
+    }
+    return host_persistent_bytes;
   }
-  int64 allocator_bytes_in_use() const {
-    return exec_.allocator_bytes_in_use();
+  std::map<int64, int64> allocator_bytes_in_use() const {
+    std::map<int64, int64> bytes_in_use;
+    for (const ExecMemory& exec : memory_execs_) {
+      bytes_in_use[exec.memory_micros()] = exec.allocator_bytes_in_use();
+    }
+    return bytes_in_use;
   }
 
-  const std::vector<Allocation>& allocations() const { return allocations_; }
+  const std::vector<AllocationRecord>& allocations() const {
+    return allocations_;
+  }
 
   const ExecProfile& ToProto() {
     exec_.mutable_accelerator_execs()->clear();
@@ -169,19 +214,15 @@ class ExecStep {
     for (const string& d : devices_) {
       exec_.add_devices(d);
     }
-
-    exec_.mutable_output_memory()->clear();
-    for (const auto& mem : output_memory_) {
-      auto& mem_pb = (*exec_.mutable_output_memory())[mem.first];
-      mem_pb.set_bytes(mem.second.first);
-      mem_pb.set_ptr(mem.second.second);
-    }
-
     exec_.mutable_allocations()->Clear();
     for (const auto& r : allocations_) {
       exec_.add_allocations()->MergeFrom(r);
     }
 
+    exec_.mutable_memory_execs()->Clear();
+    for (const auto& m : memory_execs_) {
+      exec_.add_memory_execs()->MergeFrom(m);
+    }
     return exec_;
   }
 
@@ -197,6 +238,7 @@ class ExecStep {
     op_execs_.clear();
 
     allocations_.clear();
+    memory_execs_.clear();
 
     for (const auto& exec_time : exec_.accelerator_execs()) {
       auto& exec = accelerator_execs_[exec_time.first];
@@ -214,15 +256,12 @@ class ExecStep {
         op_exec.push_back(std::make_pair(p.int64_values(0), p.int64_values(1)));
       }
     }
-    for (const auto& output_mem : exec_.output_memory()) {
-      auto& mem = output_memory_[output_mem.first];
-      mem.first = output_mem.second.bytes();
-      mem.second = output_mem.second.ptr();
-    }
-
     for (const auto& r : exec_.allocations()) {
       allocations_.push_back(r);
     }
+    for (const auto& m : exec_.memory_execs()) {
+      memory_execs_.push_back(m);
+    }
   }
 
  private:
@@ -237,14 +276,15 @@ class ExecStep {
   std::map<string, std::vector<std::pair<int64, int64>>> cpu_execs_;
   // combines accelerator_execs_ and cpu_execs_.
   std::map<string, std::vector<std::pair<int64, int64>>> op_execs_;
+  // Each ExecMemory corresponds to one scheduling of the op. Normally,
+  // there are multiple schedulings in while_loop.
+  std::vector<ExecMemory> memory_execs_;
   // All devices the op is associated with (e.g. gpu:0 (scheduling),
   // gpu:0:stream:xx (kernel exec), cpu:0 host)
   std::set<string> devices_;
-  // output_idx -> {output_bytes, memory_ptr}
-  std::map<int32, std::pair<int64, uint64>> output_memory_;
 
   // The history of accelerator allocations and deallocations of this step.
-  std::vector<Allocation> allocations_;
+  std::vector<AllocationRecord> allocations_;
 };
 
 #define GRAPH_NODE_BYTES(type)             \
@@ -598,23 +638,15 @@ class TFGraphNode {
     }
     return persistent_bytes;
   }
-  const std::map<int32, std::pair<int64, uint64>>& output_memory(
-      int64 step) const {
+  const std::map<int64, int64> allocator_bytes_in_use(int64 step) const {
     auto exec = execs_.find(step);
     if (exec == execs_.end()) {
-      return empty_output_memory_;
-    }
-    return exec->second.output_memory();
-  }
-  int64 allocator_bytes_in_use(int64 step) const {
-    auto exec = execs_.find(step);
-    if (exec == execs_.end()) {
-      return 0;
+      return empty_bytes_in_use_;
     }
     return exec->second.allocator_bytes_in_use();
   }
 
-  const std::vector<Allocation>& allocations(int64 step) const {
+  const std::vector<AllocationRecord>& allocations(int64 step) const {
     auto exec = execs_.find(step);
     if (exec == execs_.end()) {
       return empty_allocations_;
@@ -719,9 +751,9 @@ class TFGraphNode {
   std::map<int64, ExecStep> execs_;
 
   // Placeholder for empty cases.
-  std::map<int32, std::pair<int64, uint64>> empty_output_memory_;
+  std::map<int64, int64> empty_bytes_in_use_;
   std::map<string, std::vector<std::pair<int64, int64>>> empty_execs_;
-  std::vector<Allocation> empty_allocations_;
+  std::vector<AllocationRecord> empty_allocations_;
 };
 
 class TFMultiGraphNode {
@@ -874,6 +906,7 @@ class TFMultiGraphNode {
   std::map<string, const TFGraphNode*> nodes_;
 };
 
+bool IsPlacedOnCPU(const string& device);
 bool IsPlacedOnAccelerator(const string& device);
 bool CountAsAcceleratorTime(const string& device);
 bool CountAsCPUTime(const string& device);
diff --git a/tensorflow/core/profiler/internal/tfprof_timeline.cc b/tensorflow/core/profiler/internal/tfprof_timeline.cc
index bdb000747d..b0dd8ce5e0 100644
--- a/tensorflow/core/profiler/internal/tfprof_timeline.cc
+++ b/tensorflow/core/profiler/internal/tfprof_timeline.cc
@@ -153,10 +153,8 @@ void MemoryTracker::TrackNode(int64 step, const GraphNode* node) {
 
   std::map<int64, int64> allocs;
   for (const auto& alloc : node->node->allocations(step)) {
-    for (const auto& r : alloc.allocation_records()) {
-      allocs[r.alloc_micros()] += r.alloc_bytes();
-      dev.tracked_allocations[r.alloc_micros()] += r.alloc_bytes();
-    }
+    allocs[alloc.alloc_micros()] += alloc.alloc_bytes();
+    dev.tracked_allocations[alloc.alloc_micros()] += alloc.alloc_bytes();
   }
   dev.tracked_allocations[0] += node->node->accelerator_persistent_bytes();
   allocs[0] += node->node->accelerator_persistent_bytes();
@@ -167,9 +165,9 @@ void MemoryTracker::TrackNode(int64 step, const GraphNode* node) {
     last += it->second;
     aggregate_allocs[it->first] = last;
   }
-  int64 end_micros = node->node->lastest_schedule_end_micros(step);
-  if (end_micros > 0 && node->node->allocator_bytes_in_use(step) > 0) {
-    dev.allocations[end_micros] = node->node->allocator_bytes_in_use(step);
+  for (const auto& bytes_in_use : node->node->allocator_bytes_in_use(step)) {
+    if (bytes_in_use.first <= 0) continue;
+    dev.allocations[bytes_in_use.first] = bytes_in_use.second;
   }
 }
 
@@ -265,6 +263,10 @@ void Timeline::GenerateGraphTimeline(const std::vector<GraphNode*>& gnodes) {
     }
   }
   for (const auto& dev : mem_tracker_.devices()) {
+    if (IsPlacedOnCPU(dev.first)) {
+      // TODO(xpan): Maybe also support CPU allocator memory tracking.
+      continue;
+    }
     int64 pid = AllocatePID();
     chrome_formatter_.EmitPID(GetMemoryLaneName(dev.first), pid);
     int64 pid2 = AllocatePID();
diff --git a/tensorflow/core/profiler/internal/tfprof_timeline_test.cc b/tensorflow/core/profiler/internal/tfprof_timeline_test.cc
index 91eac0cf76..6a7ab01029 100644
--- a/tensorflow/core/profiler/internal/tfprof_timeline_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_timeline_test.cc
@@ -71,7 +71,7 @@ TEST_F(TFProfTimelineTest, GraphView) {
 
   string dump_str;
   TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file + "_0", &dump_str));
-  EXPECT_EQ(7932146665024565912ull, Hash64(dump_str));
+  EXPECT_EQ(16556121177519539380ull, Hash64(dump_str));
 }
 
 TEST_F(TFProfTimelineTest, ScopeView) {
diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto
index b49bdf64ac..0bf1b477ed 100644
--- a/tensorflow/core/profiler/tfprof_log.proto
+++ b/tensorflow/core/profiler/tfprof_log.proto
@@ -90,10 +90,6 @@ message ProfileNode {
   map<int64, ExecProfile> execs = 12;
 }
 
-message Allocation {
-  repeated AllocationRecord allocation_records = 1;
-}
-
 message ExecProfile {
   // Can be larger than 1 if run multiple times in loop.
   int64 run_count = 1;
@@ -110,35 +106,42 @@ message ExecProfile {
   // For cpu, vector size can be larger than 1 if in tf.while_loop.
   map<string, ExecTime> cpu_execs = 5;
 
-  map<int32, Memory> output_memory = 17;
+  // Each entry to memory information of a scheduling of the node.
+  // Normally, there will be multiple entries in while_loop.
+  repeated ExecMemory memory_execs = 7;
+  // The allocation and deallocation times and sizes throughout execution.
+  repeated AllocationRecord allocations = 11;
+  // The devices related to this execution.
+  repeated string devices = 6;
+}
 
-  repeated Allocation allocations = 18;
+message ExecTime {
+  repeated Tuple times = 1;
+}
 
-  repeated string devices = 6;
+message ExecMemory {
+  // This is the timestamp when the memory information was tracked.
+  int64 memory_micros = 1;
+  // NOTE: Please don't depend on the following 4 fields yet. Due to
+  // TensorFlow internal tracing issues, the numbers can be quite wrong.
+  // TODO(xpan): Fix the TensorFlow internal tracing.
+  int64 host_temp_bytes = 2;
+  int64 host_persistent_bytes = 3;
+  int64 accelerator_temp_bytes = 4;
+  int64 accelerator_persistent_bytes = 5;
 
   // Total bytes requested by the op.
-  int64 requested_bytes = 7;
+  int64 requested_bytes = 6;
   // Total bytes requested by the op and released before op end.
-  int64 peak_bytes = 8;
+  int64 peak_bytes = 7;
   // Total bytes requested by the op and not released after op end.
-  int64 residual_bytes = 9;
+  int64 residual_bytes = 8;
   // Total bytes output by the op (not necessarily requested by the op).
-  int64 output_bytes = 10;
-  // NOTE: Please don't depend on the following 4 fields yet. Due to
-  // TensorFlow internal tracing issues, the numbers can be quite wrong.
-  // TODO(xpan): Fix the TensorFlow internal tracing.
-  int64 host_temp_bytes = 11;
-  int64 host_persistent_bytes = 12;
-  int64 accelerator_temp_bytes = 13;
-  int64 accelerator_persistent_bytes = 14;
+  int64 output_bytes = 9;
   // The total number of bytes currently allocated by the allocator if >0.
-  int64 allocator_bytes_in_use = 15;
-
-  bool memory_intialized = 16;
-}
-
-message ExecTime {
-  repeated Tuple times = 1;
+  int64 allocator_bytes_in_use = 10;
+  // The memory of each output of the operation.
+  map<int32, Memory> output_memory = 11;
 }
 
 message Tuple {
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index ccfb9aac53..5d524c8c74 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -29,9 +29,12 @@ from tensorflow.core.profiler import profile_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.client import session
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gradients
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
@@ -730,6 +733,43 @@ class PrintModelAnalysisTest(test.TestCase):
       self.assertEqual(n.output_bytes, n2.output_bytes)
       self.assertEqual(n.residual_bytes, n2.residual_bytes)
 
+  def testTraceLoopBytes(self):
+    if not test.is_gpu_available(): return
+    ops.reset_default_graph()
+    steps = 100
+
+    with ops.device('/gpu:0'):
+      x = array_ops.ones((100, 100), dtype=dtypes.float32)
+      n = array_ops.constant(steps, dtype=dtypes.int32)
+      x1 = array_ops.ones((100, 100))
+
+      x *= x1
+      def loop_body(i, x):
+        x *= x
+        return i + 1, x
+
+      _, y = control_flow_ops.while_loop(
+          lambda i, x: i < n, loop_body,
+          [array_ops.constant(0), x])
+
+    grad = gradients.gradients(y, [x1])
+
+    with session.Session() as sess:
+      run_options = config_pb2.RunOptions(
+          trace_level=config_pb2.RunOptions.FULL_TRACE)
+      run_metadata = config_pb2.RunMetadata()
+      sess.run(grad, options=run_options, run_metadata=run_metadata)
+
+      options = option_builder.ProfileOptionBuilder.time_and_memory()
+      options['min_bytes'] = 0
+      options['min_micros'] = 0
+      options['select'] = ('bytes', 'peak_bytes', 'output_bytes',
+                           'residual_bytes')
+      options['output'] = 'none'
+      ret_pb = model_analyzer.profile(
+          sess.graph, run_meta=run_metadata, cmd='scope', options=options)
+      self.assertGreater(ret_pb.total_requested_bytes, 1000000)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From b1c64c61ad15ee90ae64a0b49f79646508a180d2 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 8 Dec 2017 16:34:00 -0800
Subject: [PATCH 0827/1225] [tf.data] Move tests from python/kernel_tests to
 python/data/kernel_tests.

This makes it easier to run all core tf.data tests with a single command, i.e.:

bazel test //tensorflow/python/data/...

PiperOrigin-RevId: 178444314
---
 tensorflow/BUILD                              |   1 +
 tensorflow/contrib/cmake/tf_tests.cmake       |   9 +-
 tensorflow/python/data/kernel_tests/BUILD     | 375 ++++++++++++++++++
 .../kernel_tests/batch_dataset_op_test.py     |   0
 .../kernel_tests/cache_dataset_op_test.py     |   0
 .../concatenate_dataset_op_test.py            |   0
 .../dataset_constructor_op_test.py            |   0
 .../dataset_from_generator_op_test.py         |   0
 .../kernel_tests/filter_dataset_op_test.py    |   0
 .../kernel_tests/flat_map_dataset_op_test.py  |   0
 .../interleave_dataset_op_test.py             |   0
 .../kernel_tests/iterator_ops_cluster_test.py |   0
 .../kernel_tests/iterator_ops_test.py         |   0
 .../list_files_dataset_op_test.py             |   0
 .../kernel_tests/map_dataset_op_test.py       |   0
 .../kernel_tests/prefetch_dataset_op_test.py  |   0
 .../kernel_tests/range_dataset_op_test.py     |   0
 .../kernel_tests/reader_dataset_ops_test.py   |   0
 .../kernel_tests/sequence_dataset_op_test.py  |   0
 .../kernel_tests/shard_dataset_op_test.py     |   0
 .../kernel_tests/shuffle_dataset_op_test.py   |   0
 .../kernel_tests/zip_dataset_op_test.py       |   0
 tensorflow/python/kernel_tests/BUILD          | 352 ----------------
 23 files changed, 381 insertions(+), 356 deletions(-)
 create mode 100644 tensorflow/python/data/kernel_tests/BUILD
 rename tensorflow/python/{ => data}/kernel_tests/batch_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/cache_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/concatenate_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/dataset_constructor_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/dataset_from_generator_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/filter_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/flat_map_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/interleave_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/iterator_ops_cluster_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/iterator_ops_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/list_files_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/map_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/prefetch_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/range_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/reader_dataset_ops_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/sequence_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/shard_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/shuffle_dataset_op_test.py (100%)
 rename tensorflow/python/{ => data}/kernel_tests/zip_dataset_op_test.py (100%)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 48f594c954..5a12fd17ed 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -594,6 +594,7 @@ filegroup(
         "//tensorflow/java/src/main/native:all_files",
         "//tensorflow/python:all_files",
         "//tensorflow/python/data:all_files",
+        "//tensorflow/python/data/kernel_tests:all_files",
         "//tensorflow/python/data/ops:all_files",
         "//tensorflow/python/data/util:all_files",
         "//tensorflow/python/debug:all_files",
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 1f4703023a..9ed5b4b9de 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -139,6 +139,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
 
   file(GLOB_RECURSE tf_test_src_py
     ${tf_test_rnn_src_py}
+    "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/*.py"
     "${tensorflow_source_dir}/tensorflow/python/debug/cli/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/debug/lib/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/debug/wrappers/*_test.py"
@@ -238,11 +239,11 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       "${tensorflow_source_dir}/tensorflow/python/training/sync_replicas_optimizer_test.py"
       "${tensorflow_source_dir}/tensorflow/python/debug/lib/session_debug_grpc_test.py"
       "${tensorflow_source_dir}tensorflow/python/training/localhost_cluster_performance_test.py"
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/iterator_ops_cluster_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/functional_ops_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/iterator_ops_cluster_test.py"
       # Type error in testRemoteIteratorUsingRemoteCallOpDirectSessionGPUCPU.
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/iterator_ops_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/iterator_ops_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py"
       # IteratorGetMax OutOfRangeError
@@ -266,9 +267,9 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/linalg_grad_test.py"  # cudaSolver handle creation fails.
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/array_ops_test.py"  # depends on python/framework/test_ops
       # Dataset tests
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/dataset_constructor_op_test.py"  # Segfaults on windows
+      "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py"  # Segfaults on windows
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py"  # Segfaults on Windows.
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/iterator_ops_cluster_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py"
       # Broken tensorboard test due to cmake issues.
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/iterator_ops_cluster_test.py"  # Needs portpicker
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/sloppy_transformation_dataset_op_test.py"  # b/65430561
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
new file mode 100644
index 0000000000..4fd7c99ac6
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -0,0 +1,375 @@
+# Tests of TensorFlow kernels written using the Python API.
+
+package(
+    default_visibility = ["//tensorflow:internal"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "sycl_py_test")
+
+tf_py_test(
+    name = "batch_dataset_op_test",
+    size = "small",
+    srcs = ["batch_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "dataset_constructor_op_test",
+    size = "small",
+    srcs = ["dataset_constructor_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+    tags = [
+        "manual",
+        "nomac",  # b/62040583
+    ],
+)
+
+tf_py_test(
+    name = "dataset_from_generator_op_test",
+    size = "small",
+    srcs = ["dataset_from_generator_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+tf_py_test(
+    name = "filter_dataset_op_test",
+    size = "small",
+    srcs = ["filter_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "flat_map_dataset_op_test",
+    size = "small",
+    srcs = ["flat_map_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:training",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "list_files_dataset_op_test",
+    size = "small",
+    srcs = ["list_files_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "interleave_dataset_op_test",
+    size = "small",
+    srcs = ["interleave_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:training",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "map_dataset_op_test",
+    size = "small",
+    srcs = ["map_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:data_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:lookup_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "prefetch_dataset_op_test",
+    size = "small",
+    srcs = ["prefetch_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "range_dataset_op_test",
+    size = "small",
+    srcs = ["range_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+)
+
+tf_py_test(
+    name = "reader_dataset_ops_test",
+    size = "small",
+    srcs = ["reader_dataset_ops_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:readers",
+    ],
+)
+
+tf_py_test(
+    name = "sequence_dataset_op_test",
+    size = "small",
+    srcs = ["sequence_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "shuffle_dataset_op_test",
+    size = "small",
+    srcs = ["shuffle_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+)
+
+tf_py_test(
+    name = "shard_dataset_op_test",
+    size = "small",
+    srcs = ["shard_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "cache_dataset_op_test",
+    size = "small",
+    srcs = ["cache_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+)
+
+tf_py_test(
+    name = "zip_dataset_op_test",
+    size = "small",
+    srcs = ["zip_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "concatenate_dataset_op_test",
+    size = "small",
+    srcs = ["concatenate_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
+
+tf_py_test(
+    name = "iterator_ops_test",
+    size = "small",
+    srcs = ["iterator_ops_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:function",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:training",
+    ],
+)
+
+tf_py_test(
+    name = "iterator_ops_cluster_test",
+    size = "small",
+    srcs = ["iterator_ops_cluster_test.py"],
+    additional_deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:function",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+    tags = [
+        "no_oss",  # Test flaky due to port collisions.
+        "no_windows",
+    ],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/batch_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/batch_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/cache_dataset_op_test.py b/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/cache_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/cache_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/concatenate_dataset_op_test.py b/tensorflow/python/data/kernel_tests/concatenate_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/concatenate_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/concatenate_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/dataset_constructor_op_test.py
rename to tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py
diff --git a/tensorflow/python/kernel_tests/dataset_from_generator_op_test.py b/tensorflow/python/data/kernel_tests/dataset_from_generator_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/dataset_from_generator_op_test.py
rename to tensorflow/python/data/kernel_tests/dataset_from_generator_op_test.py
diff --git a/tensorflow/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/filter_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/flat_map_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/flat_map_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/flat_map_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/interleave_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/iterator_ops_cluster_test.py b/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/iterator_ops_cluster_test.py
rename to tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py
diff --git a/tensorflow/python/kernel_tests/iterator_ops_test.py b/tensorflow/python/data/kernel_tests/iterator_ops_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/iterator_ops_test.py
rename to tensorflow/python/data/kernel_tests/iterator_ops_test.py
diff --git a/tensorflow/python/kernel_tests/list_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/list_files_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/map_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/map_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py b/tensorflow/python/data/kernel_tests/prefetch_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/prefetch_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/prefetch_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/range_dataset_op_test.py b/tensorflow/python/data/kernel_tests/range_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/range_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/range_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/reader_dataset_ops_test.py
rename to tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
diff --git a/tensorflow/python/kernel_tests/sequence_dataset_op_test.py b/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/sequence_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/shard_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shard_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/shard_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/shard_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/shuffle_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/zip_dataset_op_test.py b/tensorflow/python/data/kernel_tests/zip_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/zip_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/zip_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index f017004e1a..feee202829 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2793,101 +2793,6 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
-    name = "batch_dataset_op_test",
-    size = "small",
-    srcs = ["batch_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "dataset_constructor_op_test",
-    size = "small",
-    srcs = ["dataset_constructor_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-    ],
-    tags = [
-        "manual",
-        "nomac",  # b/62040583
-    ],
-)
-
-tf_py_test(
-    name = "dataset_from_generator_op_test",
-    size = "small",
-    srcs = ["dataset_from_generator_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:sparse",
-    ],
-)
-
-tf_py_test(
-    name = "filter_dataset_op_test",
-    size = "small",
-    srcs = ["filter_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "flat_map_dataset_op_test",
-    size = "small",
-    srcs = ["flat_map_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:training",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
 tf_py_test(
     name = "garbage_collection_test",
     size = "small",
@@ -2902,263 +2807,6 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
-    name = "list_files_dataset_op_test",
-    size = "small",
-    srcs = ["list_files_dataset_op_test.py"],
-    additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "interleave_dataset_op_test",
-    size = "small",
-    srcs = ["interleave_dataset_op_test.py"],
-    additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:training",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "map_dataset_op_test",
-    size = "small",
-    srcs = ["map_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:data_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:lookup_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "prefetch_dataset_op_test",
-    size = "small",
-    srcs = ["prefetch_dataset_op_test.py"],
-    additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "range_dataset_op_test",
-    size = "small",
-    srcs = ["range_dataset_op_test.py"],
-    additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-)
-
-tf_py_test(
-    name = "reader_dataset_ops_test",
-    size = "small",
-    srcs = ["reader_dataset_ops_test.py"],
-    additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:lib",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/ops:readers",
-    ],
-)
-
-tf_py_test(
-    name = "sequence_dataset_op_test",
-    size = "small",
-    srcs = ["sequence_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "shuffle_dataset_op_test",
-    size = "small",
-    srcs = ["shuffle_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-)
-
-tf_py_test(
-    name = "shard_dataset_op_test",
-    size = "small",
-    srcs = ["shard_dataset_op_test.py"],
-    additional_deps = [
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "cache_dataset_op_test",
-    size = "small",
-    srcs = ["cache_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-)
-
-tf_py_test(
-    name = "zip_dataset_op_test",
-    size = "small",
-    srcs = ["zip_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "concatenate_dataset_op_test",
-    size = "small",
-    srcs = ["concatenate_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-    ],
-)
-
-tf_py_test(
-    name = "iterator_ops_test",
-    size = "small",
-    srcs = ["iterator_ops_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/util:sparse",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:function",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:gradients",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:training",
-    ],
-)
-
-tf_py_test(
-    name = "iterator_ops_cluster_test",
-    size = "small",
-    srcs = ["iterator_ops_cluster_test.py"],
-    additional_deps = [
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:function",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-    tags = [
-        "no_oss",  # Test flaky due to port collisions.
-        "no_windows",
-    ],
-)
-
 filegroup(
     name = "all_files",
     srcs = glob(
-- 
GitLab


From 74780531e95b0e868403e5806e58a968e4563b13 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 17:07:39 -0800
Subject: [PATCH 0828/1225] In MovingAverageOptimizer, delegate
 compute_gradients() to the wrapped optimizer, which is a bug fix in case the
 wrapper optimizer (or any other optimizer in the stack) does something
 non-standard in its compute_gradients() method.

PiperOrigin-RevId: 178447959
---
 .../training/moving_average_optimizer.py      |  3 ++
 .../training/moving_average_optimizer_test.py | 31 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
index c48494585e..d68ad23d65 100644
--- a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
@@ -86,6 +86,9 @@ class MovingAverageOptimizer(optimizer.Optimizer):
     self._variable_map = None
     self._sequential_update = sequential_update
 
+  def compute_gradients(self, *args, **kwargs):
+    return self._optimizer.compute_gradients(*args, **kwargs)
+
   def apply_gradients(self, grads_and_vars, global_step=None, name=None):
     train_op = self._optimizer.apply_gradients(
         grads_and_vars, global_step=global_step, name=name)
diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py
index a4ffbfe1c6..60929add19 100644
--- a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py
@@ -116,6 +116,37 @@ class MovingAverageOptimizerTest(test.TestCase):
       with self.assertRaises(RuntimeError):
         _ = opt.swapping_saver([var])
 
+  def testCorrectOverride(self):
+
+    class WrapperOptimizer(gradient_descent.GradientDescentOptimizer):
+
+      def compute_gradients(self, *args, **kwargs):
+        self.compute_gradients_called = True
+        return super(WrapperOptimizer, self).compute_gradients(
+            *args, **kwargs)
+
+      def apply_gradients(self, *args, **kwargs):
+        self.apply_gradients_called = True
+        return super(WrapperOptimizer, self).apply_gradients(*args, **kwargs)
+
+    with self.test_session() as sess:
+      var = variables.Variable([1.2], name='var', dtype=dtypes.float32)
+      loss = var ** 2
+      wrapper_opt = WrapperOptimizer(learning_rate=2.0)
+      opt = moving_average_optimizer.MovingAverageOptimizer(wrapper_opt)
+      train_op = opt.minimize(loss)
+
+      # Check that both methods are called on the underlying optimizer.
+      self.assertTrue(wrapper_opt.compute_gradients_called)
+      self.assertTrue(wrapper_opt.apply_gradients_called)
+
+      # Run train_op once, and verify that we've updated the variable.
+      variables.global_variables_initializer().run()
+      sess.run(train_op)
+      var_value = sess.run(var)
+      # Started at 1.2, gradient is 2*1.2=2.4, lr=2, so should now be -3.6.
+      self.assertNear(-3.6, var_value, 1e-6)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From c697d96ec69333fcb5b2abe95a10cd53b68757fe Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Fri, 8 Dec 2017 17:10:40 -0800
Subject: [PATCH 0829/1225] Preserve symbolic shape information as much as
 possible during shape creation

PiperOrigin-RevId: 178448208
---
 tensorflow/core/framework/shape_inference.cc   | 18 ++++++------------
 .../core/framework/shape_inference_test.cc     |  8 ++++----
 tensorflow/core/ops/array_ops_test.cc          |  6 +++---
 tensorflow/core/ops/math_ops_test.cc           |  2 +-
 tensorflow/core/ops/spectral_ops_test.cc       |  6 +++---
 5 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index ee9192d4a1..c13f13a126 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -342,8 +342,8 @@ Status InferenceContext::WithRank(ShapeHandle shape, int64 rank,
     for (int i = 0; i < rank; ++i) {
       dims.push_back(UnknownDim());
     }
-    *out = shape_manager_.MakeShape(dims);
-    return Status::OK();
+    ShapeHandle shp = shape_manager_.MakeShape(dims);
+    return Merge(shape, shp, out);
   }
   *out = nullptr;
 
@@ -357,13 +357,10 @@ Status InferenceContext::WithRankAtLeast(ShapeHandle shape, int64 rank,
     return errors::InvalidArgument("Rank cannot exceed kint32max");
   }
   const int32 existing = Rank(shape);
-  if (existing >= rank) {
+  if (existing >= rank || existing == kUnknownRank) {
     *out = shape;
     return Status::OK();
   }
-  if (existing == kUnknownRank) {
-    return ReturnUnknownShape(out);
-  }
   *out = nullptr;
   return errors::InvalidArgument("Shape must be at least rank ", rank,
                                  " but is rank ", existing);
@@ -375,10 +372,7 @@ Status InferenceContext::WithRankAtMost(ShapeHandle shape, int64 rank,
     return errors::InvalidArgument("Rank cannot exceed kint32max");
   }
   const int32 existing = Rank(shape);
-  if (existing == kUnknownRank) {
-    return ReturnUnknownShape(out);
-  }
-  if (existing <= rank) {
+  if (existing <= rank || existing == kUnknownRank) {
     *out = shape;
     return Status::OK();
   }
@@ -395,8 +389,8 @@ Status InferenceContext::WithValue(DimensionHandle dim, int64 value,
     return Status::OK();
   }
   if (existing == kUnknownDim) {
-    *out = MakeDim(value);
-    return Status::OK();
+    DimensionHandle d = MakeDim(value);
+    return Merge(dim, d, out);
   }
   *out = nullptr;
   return errors::InvalidArgument("Dimension must be ", value, " but is ",
diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc
index 68156e63ca..a9b63ca60e 100644
--- a/tensorflow/core/framework/shape_inference_test.cc
+++ b/tensorflow/core/framework/shape_inference_test.cc
@@ -359,11 +359,11 @@ TEST_F(ShapeInferenceTest, WithRankAtMost) {
   // WithRankAtMost on a shape with unknown dimensionality always succeeds.
   EXPECT_TRUE(c.WithRankAtMost(in0, 1, &s1).ok());
   EXPECT_EQ("?", c.DebugString(s1));
-  EXPECT_FALSE(SameHandle(in0, s1));
+  EXPECT_TRUE(SameHandle(in0, s1));
 
   EXPECT_TRUE(c.WithRankAtMost(in0, 2, &s2).ok());
   EXPECT_EQ("?", c.DebugString(s2));
-  EXPECT_FALSE(SameHandle(s1, s2));
+  EXPECT_TRUE(SameHandle(s1, s2));
 
   // WithRankAtMost on shape with known dimensionality.
   s1 = in1;
@@ -398,11 +398,11 @@ TEST_F(ShapeInferenceTest, WithRankAtLeast) {
   // WithRankAtLeast on a shape with unknown dimensionality always succeeds.
   EXPECT_TRUE(c.WithRankAtLeast(in0, 1, &s1).ok());
   EXPECT_EQ("?", c.DebugString(s1));
-  EXPECT_FALSE(SameHandle(in0, s1));
+  EXPECT_TRUE(SameHandle(in0, s1));
 
   EXPECT_TRUE(c.WithRankAtLeast(in0, 2, &s2).ok());
   EXPECT_EQ("?", c.DebugString(s2));
-  EXPECT_FALSE(SameHandle(s1, s2));
+  EXPECT_TRUE(SameHandle(s1, s2));
 
   // WithRankAtLeast on shape with known dimensionality.
   s1 = in1;
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index e010ecda8e..c8ea443613 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -521,7 +521,7 @@ TEST(ArrayOpsTest, MatrixSetDiag_ShapeFn) {
   INFER_ERROR("Dimensions must be equal, but are 2 and 3", op, "[2,3];[3]");
 
   // Output matches input.
-  INFER_OK(op, "?;?", "?");
+  INFER_OK(op, "?;?", "in0");
   INFER_OK(op, "[1,2,2];[1,2]", "in0");
   INFER_OK(op, "[1,2,3];?", "in0");
   INFER_OK(op, "[1,3,2];?", "in0");
@@ -1619,7 +1619,7 @@ TEST(ArrayOpsTest, UnchangedWithQuantizationScalars_ShapeFn) {
 TEST(ArrayOpsTest, FakeQuantWithMinMaxVarsPerChannel) {
   ShapeInferenceTestOp op("FakeQuantWithMinMaxVarsPerChannel");
 
-  INFER_OK(op, "?;?;?", "?");
+  INFER_OK(op, "?;?;?", "in0");
   INFER_OK(op, "[?];?;?", "in0");
   INFER_OK(op, "[1,?,3];[3];[3]", "in0");
   INFER_OK(op, "[3];[3];[3]", "in0");
@@ -1638,7 +1638,7 @@ TEST(ArrayOpsTest, FakeQuantWithMinMaxVarsPerChannel) {
 TEST(ArrayOpsTest, FakeQuantWithMinMaxVarsPerChannelGradient) {
   ShapeInferenceTestOp op("FakeQuantWithMinMaxVarsPerChannelGradient");
 
-  INFER_OK(op, "?;?;?;?", "?;[?];[?]");
+  INFER_OK(op, "?;?;?;?", "in0;[?];[?]");
   INFER_OK(op, "[3];[3];[3];[3]", "in0;in3;in3");
   INFER_OK(op, "[1,3];[1,3];[3];[3]", "in0;in3;in3");
   INFER_OK(op, "[1,2,3,4];[1,2,3,4];[4];[4]", "in0;in3;in3");
diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc
index 3dfa776d26..ca3772e6f8 100644
--- a/tensorflow/core/ops/math_ops_test.cc
+++ b/tensorflow/core/ops/math_ops_test.cc
@@ -522,7 +522,7 @@ TEST(MathOpsTest, Cross_ShapeFn) {
   INFER_ERROR("Dimension 0 in both shapes must be equal, but", op, "[3];[5]");
   INFER_ERROR("Dimension must be 3 but", op, "[3,5];[3,5]");
 
-  INFER_OK(op, "?;?", "?");
+  INFER_OK(op, "?;?", "in0");
   INFER_OK(op, "[?];[?]", "in0");
   INFER_OK(op, "[1,?,3];[?,?,?]", "in0");
 }
diff --git a/tensorflow/core/ops/spectral_ops_test.cc b/tensorflow/core/ops/spectral_ops_test.cc
index 0f8a3e6ef1..b1c5e95fc5 100644
--- a/tensorflow/core/ops/spectral_ops_test.cc
+++ b/tensorflow/core/ops/spectral_ops_test.cc
@@ -22,7 +22,7 @@ namespace tensorflow {
 TEST(MathOpsTest, FFT_ShapeFn) {
   for (const auto* op_name : {"FFT", "IFFT"}) {
     ShapeInferenceTestOp op(op_name);
-    INFER_OK(op, "?", "?");
+    INFER_OK(op, "?", "in0");
     INFER_ERROR("Shape must be at least rank 1 but is rank 0", op, "[]");
     INFER_OK(op, "[?]", "in0");
     INFER_OK(op, "[1]", "in0");
@@ -31,7 +31,7 @@ TEST(MathOpsTest, FFT_ShapeFn) {
 
   for (const auto* op_name : {"FFT2D", "IFFT2D"}) {
     ShapeInferenceTestOp op(op_name);
-    INFER_OK(op, "?", "?");
+    INFER_OK(op, "?", "in0");
     INFER_ERROR("Shape must be at least rank 2 but is rank 1", op, "[1]");
     INFER_OK(op, "[?,1]", "in0");
     INFER_OK(op, "[1,2]", "in0");
@@ -40,7 +40,7 @@ TEST(MathOpsTest, FFT_ShapeFn) {
 
   for (const auto* op_name : {"FFT3D", "IFFT3D"}) {
     ShapeInferenceTestOp op(op_name);
-    INFER_OK(op, "?", "?");
+    INFER_OK(op, "?", "in0");
     INFER_ERROR("Shape must be at least rank 3 but is rank 2", op, "[1,2]");
     INFER_OK(op, "[?,1,?]", "in0");
     INFER_OK(op, "[1,2,3]", "in0");
-- 
GitLab


From 922fbb5ce22404e529860194b40d1c6e3bcaf93d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 17:23:00 -0800
Subject: [PATCH 0830/1225] Check that Rendezvous is not null.

PiperOrigin-RevId: 178449247
---
 tensorflow/core/common_runtime/rendezvous_util.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/core/common_runtime/rendezvous_util.cc b/tensorflow/core/common_runtime/rendezvous_util.cc
index a1e31016c2..92dc03812e 100644
--- a/tensorflow/core/common_runtime/rendezvous_util.cc
+++ b/tensorflow/core/common_runtime/rendezvous_util.cc
@@ -32,6 +32,10 @@ Status SendTensorsToRendezvous(
         "; alloc_attrs.size() = ", alloc_attrs.size());
   }
 
+  if (!rendezvous) {
+    return errors::InvalidArgument("Rendezvous is null.");
+  }
+
   Rendezvous::ParsedKey parsed;
   for (int i = 0; i < keys.size(); ++i) {
     Rendezvous::Args rendez_args;
-- 
GitLab


From d85eec050f83e25b4d70fefd0f04dbf69c20f904 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 8 Dec 2017 17:26:49 -0800
Subject: [PATCH 0831/1225] Always create a Rendezvous in RemoteCallOp.

This change does not affect existing functionality, and enables RemoteCallOp to work in environments where a Rendezvous is not necessarily available (e.g. in a function called from an IteratorContext).

PiperOrigin-RevId: 178449551
---
 tensorflow/core/kernels/function_ops.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc
index f2290e87a5..9382ff7847 100644
--- a/tensorflow/core/kernels/function_ops.cc
+++ b/tensorflow/core/kernels/function_ops.cc
@@ -318,7 +318,7 @@ class RemoteCallOp : public AsyncOpKernel {
     if (opts.source_device != target_device) {
       opts.remote_execution = true;
     }
-    opts.rendezvous = ctx->rendezvous();
+    opts.create_rendezvous = true;
     std::vector<Tensor> args;
     args.reserve(arguments.size());
     for (const Tensor& argument : arguments) {
-- 
GitLab


From 3457d1222b7e3171153297fb522cc3f729fc11b2 Mon Sep 17 00:00:00 2001
From: HyoukJoong Lee <hyouklee@google.com>
Date: Fri, 8 Dec 2017 17:28:52 -0800
Subject: [PATCH 0832/1225] Clear existing layouts before running the layout
 assignment.

PiperOrigin-RevId: 178449701
---
 .../xla/service/cpu/cpu_layout_assignment.cc       |  4 ++++
 .../compiler/xla/service/layout_assignment.cc      | 14 ++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
index 7975eba399..78732c31f9 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
@@ -139,6 +139,10 @@ Status CpuLayoutAssignment::AddBackendConstraints(
         if (constraints->OperandBufferForwarded(instruction, operand_no)) {
           continue;
         }
+        // Skip operands with non-array shapes.
+        if (!ShapeUtil::IsArray(instruction->operand(operand_no)->shape())) {
+          continue;
+        }
         Shape operand_shape(
             row_major_shape(instruction->operand(operand_no)->shape()));
         TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 7eda7c2284..af726271ae 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -1328,6 +1328,20 @@ Status LayoutAssignment::RunOnComputation(
           << ")";
   VLOG(2) << "  ComputationLayout = " << computation_layout.ToString();
 
+  // Clear existing layouts of the instructions. All layouts must be assigned by
+  // the LayoutAssignment pass, except for Infeed, Outfeed, Parameters and the
+  // computation result. The latter two are specified in computation_layout, so
+  // we only need to keep the existing layouts for Infeed and Outfeed. Clearing
+  // the layouts here avoids hiding potential bugs in the layout assignment pass
+  // that may accidently use the existing layout.
+  for (HloInstruction* instruction : computation->instructions()) {
+    if (instruction->opcode() == HloOpcode::kInfeed ||
+        instruction->opcode() == HloOpcode::kOutfeed) {
+      continue;
+    }
+    LayoutUtil::ClearLayout(instruction->mutable_shape());
+  }
+
   // Construct LayoutConstraints with all layout constraints of the computation.
   LayoutConstraints constraints(points_to_analysis, computation);
 
-- 
GitLab


From 826784467ecd0fed8e44645ef1032496fed88ba5 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Sat, 9 Dec 2017 10:03:43 +0800
Subject: [PATCH 0833/1225] Enable GCS filesystem for Windows (#14856)

---
 tensorflow/core/platform/cloud/BUILD          | 14 +++++++++
 .../core/platform/cloud/gcs_dns_cache.cc      | 31 +++++++++++++------
 .../core/platform/cloud/gcs_file_system.cc    | 21 ++++++++++++-
 .../platform/cloud/google_auth_provider.cc    |  5 ++-
 .../core/platform/cloud/oauth_client.cc       |  4 +++
 tensorflow/core/platform/cloud/time_util.cc   |  3 ++
 .../core/platform/default/build_config.bzl    |  1 -
 .../ci_build/windows/bazel/bazel_test_lib.sh  |  2 +-
 third_party/curl.BUILD                        | 26 ++++++++++++++--
 9 files changed, 91 insertions(+), 16 deletions(-)

diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD
index 624145da75..aaeccc8324 100644
--- a/tensorflow/core/platform/cloud/BUILD
+++ b/tensorflow/core/platform/cloud/BUILD
@@ -10,6 +10,7 @@ licenses(["notice"])  # Apache 2.0
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
+    "tf_copts",
 )
 
 filegroup(
@@ -29,6 +30,7 @@ filegroup(
 cc_library(
     name = "expiring_lru_cache",
     hdrs = ["expiring_lru_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = ["//tensorflow/core:lib"],
 )
@@ -37,6 +39,7 @@ cc_library(
     name = "file_block_cache",
     srcs = ["file_block_cache.cc"],
     hdrs = ["file_block_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = ["//tensorflow/core:lib"],
 )
@@ -45,6 +48,7 @@ cc_library(
     name = "gcs_dns_cache",
     srcs = ["gcs_dns_cache.cc"],
     hdrs = ["gcs_dns_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":http_request",
@@ -56,6 +60,7 @@ cc_library(
     name = "gcs_file_system",
     srcs = ["gcs_file_system.cc"],
     hdrs = ["gcs_file_system.h"],
+    copts = tf_copts(),
     linkstatic = 1,  # Needed since alwayslink is broken in bazel b/27630669
     visibility = ["//visibility:public"],
     deps = [
@@ -78,6 +83,7 @@ cc_library(
 cc_library(
     name = "http_request",
     hdrs = ["http_request.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/core:framework_headers_lib",
@@ -89,6 +95,7 @@ cc_library(
     name = "curl_http_request",
     srcs = ["curl_http_request.cc"],
     hdrs = ["curl_http_request.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":http_request",
@@ -104,6 +111,7 @@ cc_library(
     hdrs = [
         "http_request_fake.h",
     ],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":curl_http_request",
@@ -121,6 +129,7 @@ cc_library(
         "auth_provider.h",
         "google_auth_provider.h",
     ],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":curl_http_request",
@@ -136,6 +145,7 @@ cc_library(
     name = "now_seconds_env",
     testonly = 1,
     hdrs = ["now_seconds_env.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/core:lib",
@@ -151,6 +161,7 @@ cc_library(
     hdrs = [
         "oauth_client.h",
     ],
+    copts = tf_copts(),
     deps = [
         ":curl_http_request",
         ":http_request",
@@ -169,6 +180,7 @@ cc_library(
     hdrs = [
         "retrying_utils.h",
     ],
+    copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
@@ -183,6 +195,7 @@ cc_library(
     hdrs = [
         "retrying_file_system.h",
     ],
+    copts = tf_copts(),
     deps = [
         ":retrying_utils",
         "//tensorflow/core:framework_headers_lib",
@@ -198,6 +211,7 @@ cc_library(
     hdrs = [
         "time_util.h",
     ],
+    copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.cc b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
index 63f2da065d..840f2b21cd 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
@@ -14,9 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/gcs_dns_cache.h"
-
+#ifndef _WIN32
 #include <arpa/inet.h>
 #include <netdb.h>
+#else
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <Windows.h>
+#endif
 #include <sys/types.h>
 
 namespace tensorflow {
@@ -26,6 +31,20 @@ namespace {
 constexpr char kStorageHost[] = "storage.googleapis.com";
 constexpr char kWwwHost[] = "www.googleapis.com";
 
+inline void print_getaddrinfo_error(const string& name, int error_code) {
+#ifndef _WIN32
+  if (error_code == EAI_SYSTEM) {
+    LOG(ERROR) << "Error resolving " << name
+               << " (EAI_SYSTEM): " << strerror(errno);
+  } else {
+    LOG(ERROR) << "Error resolving " << name << ": "
+               << gai_strerror(error_code);
+  }
+#else
+  // TODO:WSAGetLastError is better than gai_strerror
+  LOG(ERROR) << "Error resolving " << name << ": " << gai_strerror(error_code);
+#endif
+}
 }  // namespace
 
 GcsDnsCache::GcsDnsCache(Env* env, int64 refresh_rate_secs)
@@ -77,7 +96,7 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
 
   std::vector<string> output;
   if (return_code == 0) {
-    for (addrinfo* i = result; i != nullptr; i = i->ai_next) {
+    for (const addrinfo* i = result; i != nullptr; i = i->ai_next) {
       if (i->ai_family != AF_INET || i->ai_addr->sa_family != AF_INET) {
         LOG(WARNING) << "Non-IPv4 address returned. ai_family: " << i->ai_family
                      << ". sa_family: " << i->ai_addr->sa_family << ".";
@@ -96,13 +115,7 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
       }
     }
   } else {
-    if (return_code == EAI_SYSTEM) {
-      LOG(ERROR) << "Error resolving " << name
-                 << " (EAI_SYSTEM): " << strerror(errno);
-    } else {
-      LOG(ERROR) << "Error resolving " << name << ": "
-                 << gai_strerror(return_code);
-    }
+    print_getaddrinfo_error(name, return_code);
   }
   if (result != nullptr) {
     freeaddrinfo(result);
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 45e9b05092..c44cad9fc8 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -22,6 +22,9 @@ limitations under the License.
 #include <cstring>
 #include <fstream>
 #include <vector>
+#ifdef _WIN32
+#include <io.h>  //for _mktemp
+#endif
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -40,6 +43,12 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 
+#ifdef _WIN32
+#ifdef DeleteFile
+#undef DeleteFile
+#endif
+#endif
+
 namespace tensorflow {
 
 namespace {
@@ -95,16 +104,25 @@ const FileStatistics DIRECTORY_STAT(0, 0, true);
 // userspace DNS cache.
 constexpr char kResolveCacheSecs[] = "GCS_RESOLVE_REFRESH_SECS";
 
+// TODO: DO NOT use a hardcoded path
 Status GetTmpFilename(string* filename) {
   if (!filename) {
     return errors::Internal("'filename' cannot be nullptr.");
   }
+#ifndef _WIN32
   char buffer[] = "/tmp/gcs_filesystem_XXXXXX";
   int fd = mkstemp(buffer);
   if (fd < 0) {
     return errors::Internal("Failed to create a temporary file.");
   }
   close(fd);
+#else
+  char buffer[] = "/tmp/gcs_filesystem_XXXXXX";
+  char* ret = _mktemp(buffer);
+  if (ret == nullptr) {
+    return errors::Internal("Failed to create a temporary file.");
+  }
+#endif
   *filename = buffer;
   return Status::OK();
 }
@@ -292,6 +310,7 @@ class GcsWritableFile : public WritableFile {
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
+    // TODO: to make it safer, outfile_ should be constructed from an FD
     if (GetTmpFilename(&tmp_content_filename_).ok()) {
       outfile_.open(tmp_content_filename_,
                     std::ofstream::binary | std::ofstream::app);
@@ -416,7 +435,7 @@ class GcsWritableFile : public WritableFile {
       return errors::Internal("'size' cannot be nullptr");
     }
     const auto tellp = outfile_.tellp();
-    if (tellp == -1) {
+    if (tellp == static_cast<std::streampos>(-1)) {
       return errors::Internal(
           "Could not get the size of the internal temporary file.");
     }
diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc
index f6fd8373cd..d77f439c5a 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider.cc
@@ -14,9 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/google_auth_provider.h"
+#ifndef _WIN32
 #include <pwd.h>
-#include <sys/types.h>
 #include <unistd.h>
+#else
+#include <sys/types.h>
+#endif
 #include <fstream>
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
diff --git a/tensorflow/core/platform/cloud/oauth_client.cc b/tensorflow/core/platform/cloud/oauth_client.cc
index c700b97dc9..3c2830ccd9 100644
--- a/tensorflow/core/platform/cloud/oauth_client.cc
+++ b/tensorflow/core/platform/cloud/oauth_client.cc
@@ -14,9 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/oauth_client.h"
+#ifndef _WIN32
 #include <pwd.h>
 #include <sys/types.h>
 #include <unistd.h>
+#else
+#include <sys/types.h>
+#endif
 #include <fstream>
 #include <openssl/bio.h>
 #include <openssl/evp.h>
diff --git a/tensorflow/core/platform/cloud/time_util.cc b/tensorflow/core/platform/cloud/time_util.cc
index 2f8643f3c7..0587a65c29 100644
--- a/tensorflow/core/platform/cloud/time_util.cc
+++ b/tensorflow/core/platform/cloud/time_util.cc
@@ -18,6 +18,9 @@ limitations under the License.
 #include <cmath>
 #include <cstdio>
 #include <ctime>
+#ifdef _WIN32
+#define timegm _mkgmtime
+#endif
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 0f8cf8f122..948334d27b 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -458,7 +458,6 @@ def tf_additional_lib_deps():
 
 def tf_additional_core_deps():
   return select({
-      "//tensorflow:with_gcp_support_windows_override": [],
       "//tensorflow:with_gcp_support_android_override": [],
       "//tensorflow:with_gcp_support_ios_override": [],
       "//tensorflow:with_gcp_support": [
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 1e455ddc99..8d50250c3a 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -111,7 +111,7 @@ function run_configure_for_cpu_build {
     export TF_NEED_MKL=0
   fi
   export TF_NEED_VERBS=0
-  export TF_NEED_GCP=0
+  export TF_NEED_GCP=1
   export TF_NEED_HDFS=0
   export TF_NEED_OPENCL_SYCL=0
   echo "" | ./configure
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index e311c7e758..4def6f9489 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -10,6 +10,7 @@ CURL_WIN_COPTS = [
     "/DHAVE_CONFIG_H",
     "/DCURL_DISABLE_FTP",
     "/DCURL_DISABLE_NTLM",
+    "/DCURL_DISABLE_PROXY",
     "/DHAVE_LIBZ",
     "/DHAVE_ZLIB_H",
     # Defining _USING_V110_SDK71_ is hackery to defeat curl's incorrect
@@ -23,6 +24,8 @@ CURL_WIN_SRCS = [
     "lib/asyn-thread.c",
     "lib/inet_ntop.c",
     "lib/system_win32.c",
+    "lib/vtls/schannel.c",
+    "lib/idn_win32.c",
 ]
 
 cc_library(
@@ -276,6 +279,7 @@ cc_library(
             "-DCURL_MAX_WRITE_SIZE=65536",
         ],
     }),
+    defines = ["CURL_STATICLIB"],
     includes = ["include"],
     linkopts = select({
         "@org_tensorflow//tensorflow:android": [
@@ -289,10 +293,16 @@ cc_library(
         ],
         "@org_tensorflow//tensorflow:ios": [],
         "@org_tensorflow//tensorflow:windows": [
-            "-Wl,ws2_32.lib",
+            "-DEFAULTLIB:ws2_32.lib",
+            "-DEFAULTLIB:advapi32.lib",
+            "-DEFAULTLIB:crypt32.lib",
+            "-DEFAULTLIB:Normaliz.lib",
         ],
         "@org_tensorflow//tensorflow:windows_msvc": [
-            "-Wl,ws2_32.lib",
+            "-DEFAULTLIB:ws2_32.lib",
+            "-DEFAULTLIB:advapi32.lib",
+            "-DEFAULTLIB:crypt32.lib",
+            "-DEFAULTLIB:Normaliz.lib",
         ],
         "//conditions:default": [
             "-lrt",
@@ -438,12 +448,22 @@ genrule(
         "#  include \"lib/config-win32.h\"",
         "#  define BUILDING_LIBCURL 1",
         "#  define CURL_DISABLE_CRYPTO_AUTH 1",
+        "#  define CURL_DISABLE_DICT 1",
+        "#  define CURL_DISABLE_FILE 1",
+        "#  define CURL_DISABLE_GOPHER 1",
         "#  define CURL_DISABLE_IMAP 1",
         "#  define CURL_DISABLE_LDAP 1",
         "#  define CURL_DISABLE_LDAPS 1",
         "#  define CURL_DISABLE_POP3 1",
         "#  define CURL_PULL_WS2TCPIP_H 1",
-        "#  define HTTP_ONLY 1",
+        "#  define CURL_DISABLE_SMTP 1",
+        "#  define CURL_DISABLE_TELNET 1",
+        "#  define CURL_DISABLE_TFTP 1",
+        "#  define CURL_PULL_WS2TCPIP_H 1",
+        "#  define USE_WINDOWS_SSPI 1",
+        "#  define USE_WIN32_IDN 1",
+        "#  define USE_SCHANNEL 1",
+        "#  define WANT_IDN_PROTOTYPES 1",
         "#elif defined(__APPLE__)",
         "#  define HAVE_FSETXATTR_6 1",
         "#  define HAVE_SETMODE 1",
-- 
GitLab


From 4a19c341457ff5b97455190506bf7b8d3bbe26c1 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 8 Dec 2017 18:34:10 -0800
Subject: [PATCH 0834/1225] Support non-const input sizes for
 Conv2DBackpropInput.

PiperOrigin-RevId: 178454629
---
 .../grappler/optimizers/layout_optimizer.cc   | 58 ++++++++++---------
 .../optimizers/layout_optimizer_test.cc       | 53 ++++++++++++++---
 .../python/grappler/layout_optimizer_test.py  |  2 +-
 3 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index f675f64cfc..7c177d8e98 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -37,7 +37,7 @@ namespace grappler {
 namespace {
 
 const char kPrefix[] = "LayoutOptimizer";
-const char kDim[] = "LayoutOptimizerDim";
+const char kDataFormatOp[] = "LayoutOptimizerDataFormatOp";
 const char kPermNHWCToNCHW[] = "LayoutOptimizerPermConstNHWCToNCHW";
 const char kPermNCHWToNHWC[] = "LayoutOptimizerPermConstNCHWToNHWC";
 const char kGatherAxisConst[] = "LayoutOptimizerGatherAxisConst";
@@ -544,6 +544,27 @@ class NodeProcessor : public GraphProcessor {
     return const_node;
   }
 
+  void AddNodeDataFormatOp(const string& op, int input_pos) {
+    NodeDef* added_node = graph_->add_node();
+    added_node->set_name(strings::StrCat(kDataFormatOp, "-", node_->name()));
+    added_node->set_op(op);
+    node_map_->AddNode(added_node->name(), added_node);
+    added_node->set_device(node_->device());
+    AttrValue attr_data_type;
+    attr_data_type.set_type(DT_INT32);
+    added_node->mutable_attr()->insert({"T", attr_data_type});
+    AttrValue attr_format;
+    attr_format.set_s("NHWC");
+    added_node->mutable_attr()->insert({"src_format", attr_format});
+    attr_format.set_s("NCHW");
+    added_node->mutable_attr()->insert({"dst_format", attr_format});
+    *added_node->add_input() = node_->input(input_pos);
+    *node_->mutable_input(input_pos) = added_node->name();
+    node_map_->UpdateOutput(added_node->input(0), node_->name(),
+                            added_node->name());
+    node_map_->AddOutput(added_node->name(), node_->name());
+  }
+
   NodeDef* node_;
   bool is_in_frame_;
 
@@ -730,7 +751,15 @@ class Conv2DBackpropInputProcessor : public Conv2DProcessor {
     return input_pos;
   }
 
-  Status CustomizedProcessing() override { return UpdateAttrValueOfInput(0); }
+  Status CustomizedProcessing() override {
+    auto input_size_node = node_map_->GetNode(node_->input(0));
+    if (IsConstant(*input_size_node)) {
+      TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(0));
+    } else {
+      AddNodeDataFormatOp("DataFormatVecPermute", 0);
+    }
+    return Status::OK();
+  }
 };
 
 class FusedBatchNormGradProcessor : public NodeProcessor {
@@ -991,34 +1020,11 @@ class ConcatProcessor : public AgnosticNodeProcessor {
     if (IsConstant(*dim_node)) {
       TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(axis_node_pos_));
     } else {
-      AddNodeDataFormatDimMap();
+      AddNodeDataFormatOp("DataFormatDimMap", axis_node_pos_);
     }
     return Status::OK();
   }
-
   int axis_node_pos_;
-
- private:
-  void AddNodeDataFormatDimMap() {
-    NodeDef* added_node = graph_->add_node();
-    added_node->set_name(strings::StrCat(kDim, "-", node_->name()));
-    added_node->set_op("DataFormatDimMap");
-    node_map_->AddNode(added_node->name(), added_node);
-    added_node->set_device(node_->device());
-    AttrValue attr_data_type;
-    attr_data_type.set_type(DT_INT32);
-    added_node->mutable_attr()->insert({"T", attr_data_type});
-    AttrValue attr_format;
-    attr_format.set_s("NHWC");
-    added_node->mutable_attr()->insert({"src_format", attr_format});
-    attr_format.set_s("NCHW");
-    added_node->mutable_attr()->insert({"dst_format", attr_format});
-    *added_node->add_input() = node_->input(axis_node_pos_);
-    *node_->mutable_input(axis_node_pos_) = added_node->name();
-    node_map_->UpdateOutput(added_node->input(0), node_->name(),
-                            added_node->name());
-    node_map_->AddOutput(added_node->name(), node_->name());
-  }
 };
 
 class PadProcessor : public AgnosticNodeProcessor {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index ef065b22c1..59796d38a9 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -71,6 +71,12 @@ class LayoutOptimizerTest : public ::testing::Test {
 
   Output SimpleConv2DBackpropInput(tensorflow::Scope* s, int input_size,
                                    int filter_size, const string& padding) {
+    return SimpleConv2DBackpropInput(s, input_size, filter_size, padding, true);
+  }
+
+  Output SimpleConv2DBackpropInput(tensorflow::Scope* s, int input_size,
+                                   int filter_size, const string& padding,
+                                   bool const_input_size) {
     int batch_size = 128;
     int input_height = input_size;
     int input_width = input_size;
@@ -100,11 +106,18 @@ class LayoutOptimizerTest : public ::testing::Test {
     Output output =
         ops::Const(s->WithOpName("Output"), Input::Initializer(output_data));
 
-    Output conv_backprop_input = ops::Conv2DBackpropInput(
-        s->WithOpName("Conv2DBackpropInput"), input_sizes, filter, output,
-        {1, stride, stride, 1}, padding);
-    TensorShape input_shape(
-        {batch_size, input_height, input_width, input_depth});
+    Output conv_backprop_input;
+    Output input_sizes_i =
+        ops::Identity(s->WithOpName("InputSizesIdentity"), input_sizes);
+    if (const_input_size) {
+      conv_backprop_input = ops::Conv2DBackpropInput(
+          s->WithOpName("Conv2DBackpropInput"), input_sizes, filter, output,
+          {1, stride, stride, 1}, padding);
+    } else {
+      conv_backprop_input = ops::Conv2DBackpropInput(
+          s->WithOpName("Conv2DBackpropInput"), input_sizes_i, filter, output,
+          {1, stride, stride, 1}, padding);
+    }
     return conv_backprop_input;
   }
 
@@ -171,6 +184,28 @@ TEST_F(LayoutOptimizerTest, Conv2DBackpropInput) {
   test::ExpectTensorEqual<int>(input_sizes_expected, input_sizes);
 }
 
+TEST_F(LayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2DBackpropInput(&s, 7, 2, "SAME", false);
+  Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto conv2d_backprop_node = node_map.GetNode("Conv2DBackpropInput");
+  CHECK(conv2d_backprop_node);
+  EXPECT_EQ(conv2d_backprop_node->input(0),
+            "LayoutOptimizerDataFormatOp-Conv2DBackpropInput");
+  auto input_sizes_node =
+      node_map.GetNode("LayoutOptimizerDataFormatOp-Conv2DBackpropInput");
+  CHECK(input_sizes_node);
+  EXPECT_EQ(input_sizes_node->input(0), "InputSizesIdentity");
+  EXPECT_EQ(input_sizes_node->op(), "DataFormatVecPermute");
+}
+
 TEST_F(LayoutOptimizerTest, FilterSizeIsOne) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv = SimpleConv2D(&s, 2, 1, "SAME");
@@ -526,9 +561,9 @@ TEST_F(LayoutOptimizerTest, SplitNonConstDim) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDim-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDataFormatOp-split");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto map_node = node_map.GetNode("LayoutOptimizerDim-split");
+  auto map_node = node_map.GetNode("LayoutOptimizerDataFormatOp-split");
   EXPECT_EQ(map_node->op(), "DataFormatDimMap");
   EXPECT_EQ(map_node->input(0), "i1");
 }
@@ -594,8 +629,8 @@ TEST_F(LayoutOptimizerTest, ConcatNonConst) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDim-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDim-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDataFormatOp-concat");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDataFormatOp-concat");
   EXPECT_EQ(concat_dim->op(), "DataFormatDimMap");
   EXPECT_EQ(concat_dim->input(0), "i");
 }
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 8cad8a514f..749e6d8f59 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -225,7 +225,7 @@ class LayoutOptimizerTest(test.TestCase):
       self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
                     nodes)
       self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-split-Sum-0', nodes)
-      self.assertIn('LayoutOptimizerDim-split', nodes)
+      self.assertIn('LayoutOptimizerDataFormatOp-split', nodes)
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
   def testLoop(self):
-- 
GitLab


From dda6d1b9d0621dc76ec779604f566bedfc59b3d2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 8 Dec 2017 18:53:56 -0800
Subject: [PATCH 0835/1225] [XLA] Hlo parser: support reporting error messages
 with locations pointed out. And fix the bug that some errors were reported at
 the token after the actual errors.

PiperOrigin-RevId: 178455738
---
 .../compiler/xla/tools/parser/hlo_lexer.cc    |  43 +++-
 .../compiler/xla/tools/parser/hlo_lexer.h     |  19 +-
 .../compiler/xla/tools/parser/hlo_parser.cc   | 183 +++++++++++-------
 .../xla/tools/parser/hlo_parser_test.cc       |   2 +-
 4 files changed, 161 insertions(+), 86 deletions(-)

diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
index 04247594ed..459d511e90 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
@@ -312,18 +312,43 @@ TokKind HloLexer::LexNumberOrPattern() {
   return TokKind::kError;
 }
 
-StringPiece HloLexer::GetCurrentLine() const {
-  const char* start = token_start_;
-  const char* end = current_ptr_;
-  if (!CanDereference(start) || !CanDereference(end)) {
-    return "LINE OUT OF RANGE";
+std::pair<unsigned, unsigned> HloLexer::GetLineAndColumn(LocTy location) const {
+  unsigned line_no = 1;
+  const char* start = buf_.begin();
+  const char* ptr = start;
+  if (line_no_cache_.last_query && CanDereference(line_no_cache_.last_query) &&
+      line_no_cache_.last_query <= location) {
+    ptr = line_no_cache_.last_query;
+    line_no = line_no_cache_.line_no_of_query;
+  }
+  for (; ptr != location; ptr++) {
+    if (*ptr == '\n') {
+      line_no++;
+    }
   }
-  while (start > buf_.begin() && *start != '\n') {
-    start--;
+
+  // Update the line number cache.
+  line_no_cache_.last_query = ptr;
+  line_no_cache_.line_no_of_query = line_no;
+  size_t line_offset = StringPieceFromPointers(start, ptr).rfind('\n');
+  if (line_offset == StringPiece::npos) {
+    line_offset = 0;
   }
-  while (end < buf_.end() && *end != '\n') {
-    end++;
+  return {line_no, ptr - start - line_offset};
+}
+
+StringPiece HloLexer::GetLine(LocTy loc) const {
+  if (!CanDereference(loc)) {
+    return "LINE OUT OF RANGE";
   }
+  size_t line_start =
+      StringPieceFromPointers(buf_.begin(), loc + 1).rfind('\n');
+  const char* start = line_start == StringPiece::npos
+                          ? buf_.begin()
+                          : buf_.begin() + line_start + 1;
+  size_t line_end = StringPieceFromPointers(loc, buf_.end()).find('\n');
+  const char* end = line_end == StringPiece::npos ? buf_.end() : loc + line_end;
+
   return StringPieceFromPointers(start, end);
 }
 
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h
index 9daf6a11d3..27880b9b8a 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h
+++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h
@@ -66,8 +66,16 @@ class HloLexer {
     return decimal_val_;
   }
 
-  // Returns the line of text that is currently being lexed.
-  tensorflow::StringPiece GetCurrentLine() const;
+  typedef const char* LocTy;
+
+  // Returns the location of the current token.
+  LocTy GetLoc() const { return token_start_; }
+
+  // Returns the line and column of a location in the buffer.
+  std::pair<unsigned, unsigned> GetLineAndColumn(LocTy location) const;
+
+  // Returns the whole line given the location.
+  tensorflow::StringPiece GetLine(LocTy loc) const;
 
  private:
   // Returns the current character. If it's neither the end of input buffer nor
@@ -108,6 +116,13 @@ class HloLexer {
   Shape shape_val_;
   int64 int64_val_;
   double decimal_val_;
+
+  struct LineNoCacheTy {
+    const char* last_query;
+    unsigned line_no_of_query;
+  };
+  // This caches the line number of the previous query.
+  mutable LineNoCacheTy line_no_cache_{nullptr, 0};
 };
 
 }  // namespace tools
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 6e55214cbe..2017648f01 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -41,6 +41,8 @@ const double kF16max = 65504;
 // Parser for the HloModule::ToString() format text.
 class HloParser {
  public:
+  using LocTy = HloLexer::LocTy;
+
   explicit HloParser(StringPiece str, const HloModuleConfig& config)
       : lexer_(str), config_(config) {}
 
@@ -184,6 +186,7 @@ class HloParser {
 
   // Logs the current parsing line and the given message. Always returns false.
   bool TokenError(StringPiece msg);
+  bool Error(LocTy loc, StringPiece msg);
 
   // If the current token is 'kind', eats it (i.e. lexes the next token) and
   // returns true.
@@ -194,10 +197,12 @@ class HloParser {
 
   // Adds the instruction to the pool. Returns false and emits an error if the
   // instruction already exists.
-  bool AddInstruction(const string& name, HloInstruction* instruction);
+  bool AddInstruction(const string& name, HloInstruction* instruction,
+                      LocTy name_loc);
   // Adds the computation to the pool. Returns false and emits an error if the
   // computation already exists.
-  bool AddComputation(const string& name, HloComputation* computation);
+  bool AddComputation(const string& name, HloComputation* computation,
+                      LocTy name_loc);
 
   // The map from the instruction name to the instruction. This does not own the
   // instructions.
@@ -210,15 +215,25 @@ class HloParser {
   std::vector<string> error_;
 };
 
-bool HloParser::TokenError(StringPiece msg) {
-  const string error =
-      StrCat("was parsing \"", lexer_.GetCurrentLine(), "\"; token ",
-             TokKindToString(lexer_.GetKind()), "; ", msg);
-  VLOG(1) << "TokenError: " << error;
-  error_.push_back(error);
+bool HloParser::Error(LocTy loc, StringPiece msg) {
+  auto line_col = lexer_.GetLineAndColumn(loc);
+  const unsigned line = line_col.first;
+  const unsigned col = line_col.second;
+  std::vector<string> error_lines;
+  error_lines.push_back(
+      StrCat("was parsing ", line, ":", col, ": error: ", msg));
+  error_lines.push_back(lexer_.GetLine(loc).ToString());
+  error_lines.push_back(col == 0 ? "" : StrCat(string(col - 1, ' '), "^"));
+
+  error_.push_back(tensorflow::str_util::Join(error_lines, "\n"));
+  VLOG(1) << "Error: " << error_.back();
   return false;
 }
 
+bool HloParser::TokenError(StringPiece msg) {
+  return Error(lexer_.GetLoc(), msg);
+}
+
 bool HloParser::Run() {
   lexer_.Lex();
   return ParseHloModule();
@@ -256,6 +271,7 @@ bool HloParser::ParseComputations() {
 bool HloParser::ParseComputation() {
   const bool is_entry_computation = EatIfPresent(TokKind::kw_ENTRY);
   string name;
+  LocTy name_loc = lexer_.GetLoc();
   if (!ParseName(&name)) {
     return false;
   }
@@ -276,6 +292,7 @@ bool HloParser::ParseComputation() {
     LOG(FATAL) << "instruction " << root_name
                << " was marked as ROOT but the parser has not seen it before";
   }
+
   // Now root can be either an existing instruction or a nullptr. If it's a
   // nullptr, the implementation of Builder will set the last instruction as
   // root instruction.
@@ -283,7 +300,7 @@ bool HloParser::ParseComputation() {
       is_entry_computation
           ? module_->AddEntryComputation(builder->Build(root))
           : module_->AddEmbeddedComputation(builder->Build(root));
-  return AddComputation(name, computation);
+  return AddComputation(name, computation, name_loc);
 }
 
 // instruction_list ::= '{' instruction_list1 '}'
@@ -311,6 +328,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
   HloOpcode opcode;
   std::vector<HloInstruction*> operands;
   bool is_root = EatIfPresent(TokKind::kw_ROOT);
+
+  const LocTy name_loc = lexer_.GetLoc();
   if (!ParseName(&name) ||
       !ParseToken(TokKind::kEqual, "expects '=' in instruction") ||
       !ParseShape(&shape) || !ParseOpcode(&opcode)) {
@@ -863,15 +882,15 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
     for (auto* pre : *predecessors) {
       Status status = pre->AddControlDependencyTo(instruction);
       if (!status.ok()) {
-        return TokenError(StrCat("error adding control dependency for: ", name,
-                                 " status: ", status.ToString()));
+        return Error(name_loc, StrCat("error adding control dependency for: ",
+                                      name, " status: ", status.ToString()));
       }
     }
   }
   if (metadata) {
     instruction->set_metadata(*metadata);
   }
-  return AddInstruction(name, instruction);
+  return AddInstruction(name, instruction, name_loc);
 }  // NOLINT(readability/fn_size)
 
 // ::= '{' (single_sharding | tuple_sharding) '}'
@@ -917,6 +936,7 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding,
     return false;
   }
 
+  LocTy loc = lexer_.GetLoc();
   bool maximal = false;
   bool replicated = false;
   std::vector<int64> devices;
@@ -984,34 +1004,35 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding,
 
   if (replicated) {
     if (!devices.empty()) {
-      return TokenError(
-          "replicated shardings should not have any devices assigned");
+      return Error(loc,
+                   "replicated shardings should not have any devices assigned");
     }
     if (!ShapeUtil::Equal(tile_shape, Shape())) {
-      return TokenError(
-          "replicated shardings should not have any tile shape set");
+      return Error(loc,
+                   "replicated shardings should not have any tile shape set");
     }
     sharding->set_type(OpSharding::Type::OpSharding_Type_REPLICATED);
   } else if (maximal) {
     if (devices.size() != 1) {
-      return TokenError(
-          "maximal shardings should have exactly one device assigned");
+      return Error(loc,
+                   "maximal shardings should have exactly one device assigned");
     }
     if (!ShapeUtil::Equal(tile_shape, Shape())) {
-      return TokenError("maximal shardings should not have any tile shape set");
+      return Error(loc, "maximal shardings should not have any tile shape set");
     }
     sharding->set_type(OpSharding::Type::OpSharding_Type_MAXIMAL);
     sharding->add_tile_assignment_devices(devices[0]);
   } else {
     if (devices.size() <= 1) {
-      return TokenError(
-          "non-maximal shardings must have more than one device assigned");
+      return Error(
+          loc, "non-maximal shardings must have more than one device assigned");
     }
     if (ShapeUtil::Equal(tile_shape, Shape())) {
-      return TokenError("non-maximal shardings should have a tile shape set");
+      return Error(loc, "non-maximal shardings should have a tile shape set");
     }
     if (tile_assignment_dimensions.empty()) {
-      return TokenError(
+      return Error(
+          loc,
           "non-maximal shardings must have a tile assignment list including "
           "dimensions");
     }
@@ -1036,10 +1057,11 @@ bool HloParser::ParseInstructionNames(
                   "expects '{' at the beginning of instruction name list")) {
     return false;
   }
+  LocTy loc = lexer_.GetLoc();
   do {
     string name;
     if (!ParseName(&name)) {
-      return TokenError("expects a instruction name");
+      return Error(loc, "expects a instruction name");
     }
     HloInstruction* instr =
         tensorflow::gtl::FindPtrOrNull(instruction_pool_, name);
@@ -1051,7 +1073,7 @@ bool HloParser::ParseInstructionNames(
   } while (EatIfPresent(TokKind::kComma));
 
   return ParseToken(TokKind::kRbrace,
-                    "expects '}' at the end of control instructions");
+                    "expects '}' at the end of instruction name list");
 }
 
 bool HloParser::SetValueInLiteral(int64 value, int64 linear_index,
@@ -1313,20 +1335,22 @@ bool HloParser::ParseNonTupleLiteral(std::unique_ptr<Literal>* literal,
           }
           lexer_.Lex();
         } else if (primitive_util::IsIntegralType(shape.element_type())) {
+          LocTy loc = lexer_.GetLoc();
           int64 value;
           if (!ParseInt64(&value)) {
-            return TokenError(StrCat("expects integer for primitive type: ",
+            return Error(loc, StrCat("expects integer for primitive type: ",
                                      PrimitiveType_Name(shape.element_type())));
           }
           if (!SetValueInLiteral(value, linear_index++, literal->get())) {
             return false;
           }
         } else if (primitive_util::IsFloatingPointType(shape.element_type())) {
+          LocTy loc = lexer_.GetLoc();
           double value;
           if (!ParseDouble(&value)) {
-            return TokenError(
-                StrCat("expect floating point value for primitive type: ",
-                       PrimitiveType_Name(shape.element_type())));
+            return Error(
+                loc, StrCat("expect floating point value for primitive type: ",
+                            PrimitiveType_Name(shape.element_type())));
           }
           if (!SetValueInLiteral(value, linear_index++, literal->get())) {
             return false;
@@ -1358,6 +1382,7 @@ bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands) {
     // empty
   } else {
     do {
+      LocTy loc = lexer_.GetLoc();
       Shape shape;
       string name;
       if (!ParseShape(&shape) || !ParseName(&name)) {
@@ -1366,7 +1391,7 @@ bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands) {
       HloInstruction* instruction =
           tensorflow::gtl::FindPtrOrNull(instruction_pool_, name);
       if (!instruction) {
-        return TokenError(StrCat("instruction does not exist: ", name));
+        return Error(loc, StrCat("instruction does not exist: ", name));
       }
       operands->push_back(instruction);
     } while (EatIfPresent(TokKind::kComma));
@@ -1376,11 +1401,12 @@ bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands) {
 
 bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands,
                               const int expected_size) {
+  LocTy loc = lexer_.GetLoc();
   if (!ParseOperands(operands)) {
     return false;
   }
   if (expected_size != operands->size()) {
-    return TokenError(StrCat("expects ", expected_size, " operands, but has ",
+    return Error(loc, StrCat("expects ", expected_size, " operands, but has ",
                              operands->size(), " operands"));
   }
   return true;
@@ -1389,6 +1415,7 @@ bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands,
 // sub_attributes ::= '{' (','? attribute)* '}'
 bool HloParser::ParseSubAttributes(
     const std::unordered_map<string, AttrConfig>& attrs) {
+  LocTy loc = lexer_.GetLoc();
   if (!ParseToken(TokKind::kLbrace, "expects '{' to start sub attributes")) {
     return false;
   }
@@ -1407,7 +1434,7 @@ bool HloParser::ParseSubAttributes(
   for (const auto& attr_it : attrs) {
     if (attr_it.second.required &&
         seen_attrs.find(attr_it.first) == seen_attrs.end()) {
-      return TokenError(Printf("sub-attribute %s is expected but not seen",
+      return Error(loc, Printf("sub-attribute %s is expected but not seen",
                                attr_it.first.c_str()));
     }
   }
@@ -1417,6 +1444,7 @@ bool HloParser::ParseSubAttributes(
 // attributes ::= (',' attribute)*
 bool HloParser::ParseAttributes(
     const std::unordered_map<string, AttrConfig>& attrs) {
+  LocTy loc = lexer_.GetLoc();
   std::unordered_set<string> seen_attrs;
   while (EatIfPresent(TokKind::kComma)) {
     if (!ParseAttributeHelper(attrs, &seen_attrs)) {
@@ -1427,7 +1455,7 @@ bool HloParser::ParseAttributes(
   for (const auto& attr_it : attrs) {
     if (attr_it.second.required &&
         seen_attrs.find(attr_it.first) == seen_attrs.end()) {
-      return TokenError(Printf("attribute %s is expected but not seen",
+      return Error(loc, Printf("attribute %s is expected but not seen",
                                attr_it.first.c_str()));
     }
   }
@@ -1437,21 +1465,23 @@ bool HloParser::ParseAttributes(
 bool HloParser::ParseAttributeHelper(
     const std::unordered_map<string, AttrConfig>& attrs,
     std::unordered_set<string>* seen_attrs) {
+  LocTy loc = lexer_.GetLoc();
   string name;
   if (!ParseAttributeName(&name)) {
-    return TokenError("error parsing attributes");
+    return Error(loc, "error parsing attributes");
   }
   VLOG(1) << "Parsing attribute " << name;
   if (!seen_attrs->insert(name).second) {
-    return TokenError(Printf("attribute %s already exists", name.c_str()));
+    return Error(loc, Printf("attribute %s already exists", name.c_str()));
   }
   auto attr_it = attrs.find(name);
   if (attr_it == attrs.end()) {
-    return TokenError(Printf("unexpected attribute %s", name.c_str()));
+    return Error(loc, Printf("unexpected attribute %s", name.c_str()));
   }
   AttrTy attr_type = attr_it->second.attr_type;
   void* attr_out_ptr = attr_it->second.result;
   bool success = [&] {
+    LocTy attr_loc = lexer_.GetLoc();
     switch (attr_type) {
       case AttrTy::kInt64: {
         int64 result;
@@ -1467,7 +1497,7 @@ bool HloParser::ParseAttributeHelper(
           return false;
         }
         if (result != static_cast<int32>(result)) {
-          return TokenError("value out of range for int32");
+          return Error(attr_loc, "value out of range for int32");
         }
         static_cast<optional<int32>*>(attr_out_ptr)
             ->emplace(static_cast<int32>(result));
@@ -1480,7 +1510,7 @@ bool HloParser::ParseAttributeHelper(
         }
         if (result > std::numeric_limits<float>::max() ||
             result < std::numeric_limits<float>::lowest()) {
-          return TokenError("value out of range for float");
+          return Error(attr_loc, "value out of range for float");
         }
         static_cast<optional<float>*>(attr_out_ptr)
             ->emplace(static_cast<float>(result));
@@ -1591,19 +1621,20 @@ bool HloParser::ParseAttributeHelper(
     }
   }();
   if (!success) {
-    return TokenError(Printf("error parsing attribute %s", name.c_str()));
+    return Error(loc, Printf("error parsing attribute %s", name.c_str()));
   }
   return true;
 }
 
 bool HloParser::ParseComputationName(HloComputation** value) {
   string name;
+  LocTy loc = lexer_.GetLoc();
   if (!ParseName(&name)) {
-    return TokenError("expects computation name");
+    return Error(loc, "expects computation name");
   }
   *value = tensorflow::gtl::FindPtrOrNull(computation_pool_, name);
   if (*value == nullptr) {
-    return TokenError(StrCat("computation does not exist: ", name));
+    return Error(loc, StrCat("computation does not exist: ", name));
   }
   return true;
 }
@@ -1612,6 +1643,7 @@ bool HloParser::ParseComputationName(HloComputation** value) {
 // The subattributes can appear in any order. 'size=' is required, others are
 // optional.
 bool HloParser::ParseWindow(Window* window) {
+  LocTy loc = lexer_.GetLoc();
   if (!ParseToken(TokKind::kLbrace, "expected '{' to start window attribute")) {
     return false;
   }
@@ -1622,9 +1654,10 @@ bool HloParser::ParseWindow(Window* window) {
   std::vector<int64> lhs_dilate;
   std::vector<int64> rhs_dilate;
   while (lexer_.GetKind() != TokKind::kRbrace) {
+    LocTy attr_loc = lexer_.GetLoc();
     string field_name;
     if (!ParseAttributeName(&field_name)) {
-      return TokenError("expects sub-attributes in window");
+      return Error(attr_loc, "expects sub-attributes in window");
     }
     bool ok = [&] {
       if (field_name == "size") {
@@ -1642,7 +1675,7 @@ bool HloParser::ParseWindow(Window* window) {
       if (field_name == "pad") {
         return ParseWindowPad(&pad);
       }
-      return TokenError(StrCat("unexpected attribute name: ", field_name));
+      return Error(loc, StrCat("unexpected attribute name: ", field_name));
     }();
     if (!ok) {
       return false;
@@ -1650,20 +1683,20 @@ bool HloParser::ParseWindow(Window* window) {
   }
 
   if (size.empty()) {
-    return TokenError(
-        "sub-attribute 'size=' is required in the window attribute");
+    return Error(loc,
+                 "sub-attribute 'size=' is required in the window attribute");
   }
   if (!stride.empty() && stride.size() != size.size()) {
-    return TokenError("expects 'stride=' has the same size as 'size='");
+    return Error(loc, "expects 'stride=' has the same size as 'size='");
   }
   if (!lhs_dilate.empty() && lhs_dilate.size() != size.size()) {
-    return TokenError("expects 'lhs_dilate=' has the same size as 'size='");
+    return Error(loc, "expects 'lhs_dilate=' has the same size as 'size='");
   }
   if (!rhs_dilate.empty() && rhs_dilate.size() != size.size()) {
-    return TokenError("expects 'rhs_dilate=' has the same size as 'size='");
+    return Error(loc, "expects 'rhs_dilate=' has the same size as 'size='");
   }
   if (!pad.empty() && pad.size() != size.size()) {
-    return TokenError("expects 'pad=' has the same size as 'size='");
+    return Error(loc, "expects 'pad=' has the same size as 'size='");
   }
 
   for (int i = 0; i < size.size(); i++) {
@@ -1823,20 +1856,19 @@ bool HloParser::ParseSliceRanges(SliceRanges* result) {
     return ParseToken(TokKind::kRbrace, "expects '}' to end ranges");
   }
   do {
+    LocTy loc = lexer_.GetLoc();
     ranges.emplace_back();
     if (!ParseInt64List(TokKind::kLsquare, TokKind::kRsquare, TokKind::kColon,
                         &ranges.back())) {
       return false;
     }
-  } while (EatIfPresent(TokKind::kComma));
-
-  for (const auto& range : ranges) {
+    const auto& range = ranges.back();
     if (range.size() != 2 && range.size() != 3) {
-      return TokenError(Printf(
-          "expects [start:limit:step] or [start:limit], but sees %ld elements.",
-          range.size()));
+      return Error(loc, Printf("expects [start:limit:step] or [start:limit], "
+                               "but sees %ld elements.",
+                               range.size()));
     }
-  }
+  } while (EatIfPresent(TokKind::kComma));
 
   for (const auto& range : ranges) {
     result->starts.push_back(range[0]);
@@ -1958,15 +1990,16 @@ bool HloParser::ParseString(string* result) {
 }
 
 bool HloParser::ParseDxD(const string& name, std::vector<int64>* result) {
+  LocTy loc = lexer_.GetLoc();
   if (!result->empty()) {
-    return TokenError(
-        Printf("sub-attribute '%s=' already exists", name.c_str()));
+    return Error(loc,
+                 Printf("sub-attribute '%s=' already exists", name.c_str()));
   }
   // 1D
   if (lexer_.GetKind() == TokKind::kInt) {
     int64 number;
     if (!ParseInt64(&number)) {
-      return TokenError(Printf("expects sub-attribute '%s=i'", name.c_str()));
+      return Error(loc, Printf("expects sub-attribute '%s=i'", name.c_str()));
     }
     result->push_back(number);
     return true;
@@ -1975,8 +2008,8 @@ bool HloParser::ParseDxD(const string& name, std::vector<int64>* result) {
   if (lexer_.GetKind() == TokKind::kDxD) {
     string str = lexer_.GetStrVal();
     if (!SplitAndParseAsInts(str, 'x', result)) {
-      return TokenError(
-          Printf("expects sub-attribute '%s=ixj...'", name.c_str()));
+      return Error(loc,
+                   Printf("expects sub-attribute '%s=ixj...'", name.c_str()));
     }
     lexer_.Lex();
     return true;
@@ -1985,8 +2018,9 @@ bool HloParser::ParseDxD(const string& name, std::vector<int64>* result) {
 }
 
 bool HloParser::ParseWindowPad(std::vector<std::vector<int64>>* pad) {
+  LocTy loc = lexer_.GetLoc();
   if (!pad->empty()) {
-    return TokenError("sub-attribute 'pad=' already exists");
+    return Error(loc, "sub-attribute 'pad=' already exists");
   }
   if (lexer_.GetKind() != TokKind::kPad) {
     return TokenError("expects window pad pattern, e.g., '0_0x3_3'");
@@ -1997,8 +2031,8 @@ bool HloParser::ParseWindowPad(std::vector<std::vector<int64>>* pad) {
     std::vector<int64> low_high;
     if (!SplitAndParseAsInts(padding_str[i], '_', &low_high) ||
         low_high.size() != 2) {
-      return TokenError(
-          "expects padding_low and padding_high separated by '_'");
+      return Error(loc,
+                   "expects padding_low and padding_high separated by '_'");
     }
     pad->push_back(low_high);
   }
@@ -2014,15 +2048,16 @@ bool HloParser::ParsePaddingConfig(PaddingConfig* padding) {
   if (lexer_.GetKind() != TokKind::kPad) {
     return TokenError("expects padding config, e.g., '0_0_0x3_3_1'");
   }
+  LocTy loc = lexer_.GetLoc();
   string str = lexer_.GetStrVal();
   std::vector<string> padding_str = Split(str, 'x');
   for (const auto& padding_dim_str : padding_str) {
     std::vector<int64> padding_dim;
     if (!SplitAndParseAsInts(padding_dim_str, '_', &padding_dim) ||
         (padding_dim.size() != 2 && padding_dim.size() != 3)) {
-      return TokenError(
-          "expects padding config pattern like 'low_high_interior' or "
-          "'low_high'");
+      return Error(loc,
+                   "expects padding config pattern like 'low_high_interior' or "
+                   "'low_high'");
     }
     auto* dim = padding->add_dimensions();
     dim->set_edge_padding_low(padding_dim[0]);
@@ -2174,20 +2209,20 @@ bool HloParser::EatIfPresent(TokKind kind) {
   return true;
 }
 
-bool HloParser::AddInstruction(const string& name,
-                               HloInstruction* instruction) {
+bool HloParser::AddInstruction(const string& name, HloInstruction* instruction,
+                               LocTy name_loc) {
   auto result = instruction_pool_.insert({name, instruction});
   if (!result.second) {
-    return TokenError(StrCat("instruction already exists: ", name));
+    return Error(name_loc, StrCat("instruction already exists: ", name));
   }
   return true;
 }
 
-bool HloParser::AddComputation(const string& name,
-                               HloComputation* computation) {
+bool HloParser::AddComputation(const string& name, HloComputation* computation,
+                               LocTy name_loc) {
   auto result = computation_pool_.insert({name, computation});
   if (!result.second) {
-    return TokenError(StrCat("computation already exists: ", name));
+    return Error(name_loc, StrCat("computation already exists: ", name));
   }
   return true;
 }
@@ -2198,7 +2233,7 @@ StatusOr<std::unique_ptr<HloModule>> Parse(StringPiece str,
                                            const HloModuleConfig& config) {
   HloParser parser(str, config);
   if (!parser.Run()) {
-    return InvalidArgument("Syntax error: %s", parser.GetError().c_str());
+    return InvalidArgument("Syntax error:\n%s", parser.GetError().c_str());
   }
   return parser.ConsumeHloModule();
 }
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 5b5326e7b7..36b0c94327 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -710,7 +710,7 @@ class HloParserTest : public ::testing::Test,
   void ExpectEqual() {
     const string& original = GetParam().module_string;
     auto result = Parse(original);
-    TF_EXPECT_OK(result.status());
+    TF_ASSERT_OK(result.status());
     EXPECT_EQ(original,
               result.ValueOrDie()->ToString(/*include_large_constants=*/true));
   }
-- 
GitLab


From fefa1c222fb883ebf119d35151fd67f91a73a07f Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Fri, 8 Dec 2017 19:27:55 -0800
Subject: [PATCH 0836/1225] Set Operation._id_value before adding to control
 flow context.

There is a comment indicating that Operation IDs should be in
topological order, and thus the ID should be set after calling
ControlFlowContext.AddOp since it may add input ops. I believe the
comment is stale and this invariant on the IDs isn't necessary
(testing corroborates this, and also while loops cannot maintain this
since there's a cycle).

Changing this will make it easier to refactor control flow processing
in the future, since we don't have to worry about the ID not being
set.

PiperOrigin-RevId: 178457622
---
 tensorflow/python/framework/ops.py            |  7 +-----
 .../kernel_tests/control_flow_ops_py_test.py  | 23 -------------------
 2 files changed, 1 insertion(+), 29 deletions(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 6969924c70..343150024f 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -1619,6 +1619,7 @@ class Operation(object):
                           "a Tensor, or IndexedSlices: %s" % c)
         self._control_inputs.append(control_op)
 
+    self._id_value = self._graph._next_id()  # pylint: disable=protected-access
     self._original_op = original_op
     self._op_def = op_def
     self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access
@@ -1671,12 +1672,6 @@ class Operation(object):
       control_flow_util.CheckInputFromValidContext(self, input_tensor.op)
     if self._control_flow_context is not None:
       self._control_flow_context.AddOp(self)
-    # NOTE(keveman): Control flow context's AddOp could be creating new ops and
-    # setting op.inputs[index] = new_op. Thus the new ops' id could be larger
-    # than this op's id even though this op depend on them. Therefore, delaying
-    # assigning id to this op until all ops this could be dependent on are
-    # created.
-    self._id_value = self._graph._next_id()  # pylint: disable=protected-access
     self._recompute_node_def()
 
   def _reconstruct_sequence_inputs(self, op_def, inputs, attrs):
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index e1d3f9a7d4..35ae89ed33 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -69,16 +69,6 @@ from tensorflow.python.training import gradient_descent
 from tensorflow.python.util import nest
 
 
-def check_op_order(graph):
-  """Sanity check on the ordering of op id."""
-
-  for op in graph.get_operations():
-    for v in op.inputs:
-      assert v.op._id < op._id or op.type == "Merge", (
-          "The id of %s must be less than the id of %s" % (v.op.name, op.name))
-  return True
-
-
 def check_consumers(graph):
   """Sanity check on the consumer list of the tensors."""
 
@@ -143,7 +133,6 @@ class ControlFlowTest(test.TestCase):
       op = state_ops.assign(v, 9)
       v2 = control_flow_ops.with_dependencies([op], v)
 
-      self.assertTrue(check_op_order(v.graph))
       self.assertTrue(isinstance(v2, ops.Tensor))
       variables.global_variables_initializer().run()
       self.assertEqual(9, v2.eval())
@@ -399,7 +388,6 @@ class ControlFlowTest(test.TestCase):
 
       val = r.values.eval()
       ind = r.indices.eval()
-    self.assertTrue(check_op_order(x.values.graph))
     self.assertAllEqual(11, val)
     self.assertAllEqual(0, ind)
 
@@ -446,7 +434,6 @@ class ControlFlowTest(test.TestCase):
 
       val = r.values.eval()
       ind = r.indices.eval()
-    self.assertTrue(check_op_order(x.values.graph))
     self.assertAllEqual(11, val)
     self.assertAllEqual(0, ind)
     self.assertTrue(ind.dtype == np.int64)
@@ -475,7 +462,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn1, fn2)
 
       result = r.eval()
-    self.assertTrue(check_op_order(x.graph))
     self.assertAllEqual(11, result)
 
   def testCond_1(self):
@@ -489,7 +475,6 @@ class ControlFlowTest(test.TestCase):
           math_ops.less(1, 0), lambda: math_ops.add(x, 1),
           lambda: math_ops.subtract(x, 1))
       result = r.eval()
-    self.assertTrue(check_op_order(x.graph))
     self.assertAllEqual(9, result)
 
   def testCond_3(self):
@@ -502,7 +487,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn3, fn2)
 
       result = r.eval()
-    self.assertTrue(check_op_order(x.graph))
     self.assertAllEqual(12, result)
 
   def testCond_4(self):
@@ -521,7 +505,6 @@ class ControlFlowTest(test.TestCase):
       variables.global_variables_initializer().run()
       self.assertEqual(len(r), 2)
       result = r[1].eval()
-      self.assertTrue(check_op_order(age.graph))
       self.assertAllEqual(True, result)
       self.assertAllEqual(7, v1.eval())
       self.assertAllEqual(2, v2.eval())
@@ -784,7 +767,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(lambda i, m, c, o: math_ops.less(i, d),
                                       compute, [i, m, c, o])
       result = r[3].eval()
-    self.assertTrue(check_op_order(i.graph))
     self.assertAllEqual(10100, result)
 
   def testWhile_4(self):
@@ -806,7 +788,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(lambda i, m, c, o: math_ops.less(i, s),
                                       compute, [i, m, c, o])
       result = r[3].eval()
-    self.assertTrue(check_op_order(i.graph))
     self.assertAllEqual(42, result)
 
   def testWhile_5(self):
@@ -831,7 +812,6 @@ class ControlFlowTest(test.TestCase):
               tensor_shape.unknown_shape()
           ])
       result = r[2].eval()
-    self.assertTrue(check_op_order(i.graph))
     self.assertAllEqual(np.array([0, 1, 2, 3, 4, 5, 6]), result)
 
   def testBufferForwarding(self):
@@ -1277,7 +1257,6 @@ class ControlFlowTest(test.TestCase):
 
       r = control_flow_ops.while_loop(
           loop_iterator, loop_body, [n], parallel_iterations=1)
-      self.assertTrue(check_op_order(n.graph))
       variables.global_variables_initializer().run()
       self.assertEqual(3, r.eval())
       result = select.eval()
@@ -1302,7 +1281,6 @@ class ControlFlowTest(test.TestCase):
 
       r = control_flow_ops.while_loop(
           loop_iterator, loop_body, [n], parallel_iterations=1)
-      self.assertTrue(check_op_order(n.graph))
       variables.global_variables_initializer().run()
       self.assertEqual(3, r.eval())
       result1 = select1.eval()
@@ -1329,7 +1307,6 @@ class ControlFlowTest(test.TestCase):
           parallel_iterations=1)
       variables.global_variables_initializer().run()
       result = r[1].eval()
-    self.assertTrue(check_op_order(n.graph))
     self.assertAllClose(np.array([10.0, 10.0, 10.0]), result)
 
   # b/24814703
-- 
GitLab


From 70644fc0427e77f9a34f538cce5badea2f34b4ae Mon Sep 17 00:00:00 2001
From: ManHyuk <manhyuk@kw.ac.kr>
Date: Sat, 9 Dec 2017 22:26:15 +0900
Subject: [PATCH 0837/1225] fix typo

---
 tensorflow/tools/docs/generate_lib.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py
index c0cde1d3bd..e07a1c967e 100644
--- a/tensorflow/tools/docs/generate_lib.py
+++ b/tensorflow/tools/docs/generate_lib.py
@@ -198,12 +198,12 @@ def add_dict_to_dict(add_from, add_to):
       add_to[key] = add_from[key]
 
 
-# Exclude some libaries in contrib from the documentation altogether.
+# Exclude some libraries in contrib from the documentation altogether.
 def _get_default_private_map():
   return {'tf.test': ['mock']}
 
 
-# Exclude members of some libaries.
+# Exclude members of some libraries.
 def _get_default_do_not_descend_map():
   # TODO(wicke): Shrink this list once the modules get sealed.
   return {
-- 
GitLab


From 5a69dcc7c76d06cba323e993ddbaf55b6a3aa9cf Mon Sep 17 00:00:00 2001
From: ManHyuk <manhyuk@kw.ac.kr>
Date: Sat, 9 Dec 2017 22:58:44 +0900
Subject: [PATCH 0838/1225] fix typo

---
 tensorflow/compiler/xla/service/copy_insertion_test.cc | 2 +-
 tensorflow/compiler/xla/shape_tree.h                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index 3278fd5f06..8388574716 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -339,7 +339,7 @@ TEST_F(CopyInsertionTest, ElementOfNestedTupleParameter) {
            ShapeUtil::MakeShape(F32, {42})}),
       "param0"));
 
-  // The return value of the computation is the zero-th elemnt of the nested
+  // The return value of the computation is the zero-th element of the nested
   // tuple. This element is itself a tuple.
   auto gte = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
       ShapeUtil::GetSubshape(param->shape(), {0}), param, 0));
diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h
index bf8d190150..d752619bd6 100644
--- a/tensorflow/compiler/xla/shape_tree.h
+++ b/tensorflow/compiler/xla/shape_tree.h
@@ -238,7 +238,7 @@ class ShapeTree {
   //           (or compatible).
   //   index : the index of the element in the shape. See ShapeUtil::GetSubshape
   //           for definition of index.
-  //   data : The data value at this elemnt.
+  //   data : The data value at this element.
   template <typename Fn>
   void ForEachElement(const Fn& func) const;
 
-- 
GitLab


From 37641276d8e6ff9617478f78afaefaf1a5c28332 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 9 Dec 2017 10:07:17 -0800
Subject: [PATCH 0839/1225] Replace StreamExecutorInterface::BlockHostUntilDone
 with BlockHostUntilDoneWithStatus

All known overrides of StreamExecutorInterface::BlockHostUntilDone are changed
by this CL.

PiperOrigin-RevId: 178492517
---
 .../xla/service/interpreter/executor.cc       |  4 ++--
 .../xla/service/interpreter/executor.h        |  2 +-
 .../stream_executor/cuda/cuda_driver.cc       | 13 +++++++------
 tensorflow/stream_executor/cuda/cuda_driver.h |  2 +-
 .../stream_executor/cuda/cuda_gpu_executor.cc |  2 +-
 .../stream_executor/cuda/cuda_gpu_executor.h  |  2 +-
 .../stream_executor/host/host_gpu_executor.cc |  4 ++--
 .../stream_executor/host/host_gpu_executor.h  |  2 +-
 .../stream_executor_internal.cc               | 19 -------------------
 .../stream_executor_internal.h                |  3 +--
 10 files changed, 17 insertions(+), 36 deletions(-)

diff --git a/tensorflow/compiler/xla/service/interpreter/executor.cc b/tensorflow/compiler/xla/service/interpreter/executor.cc
index 0bb3259ef4..511de87b1b 100644
--- a/tensorflow/compiler/xla/service/interpreter/executor.cc
+++ b/tensorflow/compiler/xla/service/interpreter/executor.cc
@@ -100,9 +100,9 @@ bool InterpreterExecutor::StopTimer(Stream *stream, Timer *timer) {
   return true;
 }
 
-bool InterpreterExecutor::BlockHostUntilDone(Stream *stream) {
+port::Status InterpreterExecutor::BlockHostUntilDoneWithStatus(Stream *stream) {
   AsExecutorStream(stream)->BlockUntilDone();
-  return true;
+  return port::Status::OK();
 }
 
 DeviceDescription *InterpreterExecutor::PopulateDeviceDescription() const {
diff --git a/tensorflow/compiler/xla/service/interpreter/executor.h b/tensorflow/compiler/xla/service/interpreter/executor.h
index c59b2ccb15..d3753a6a65 100644
--- a/tensorflow/compiler/xla/service/interpreter/executor.h
+++ b/tensorflow/compiler/xla/service/interpreter/executor.h
@@ -157,7 +157,7 @@ class InterpreterExecutor : public internal::StreamExecutorInterface {
   bool StartTimer(Stream *stream, Timer *timer) override;
   bool StopTimer(Stream *stream, Timer *timer) override;
 
-  bool BlockHostUntilDone(Stream *stream) override;
+  port::Status BlockHostUntilDoneWithStatus(Stream *stream) override;
 
   int PlatformDeviceCount() override { return 1; }
 
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc
index b6a96ed3e5..a017ff64d4 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.cc
+++ b/tensorflow/stream_executor/cuda/cuda_driver.cc
@@ -1115,19 +1115,20 @@ CUDADriver::ContextGetSharedMemConfig(CudaContext* context) {
   return true;
 }
 
-/* static */ bool CUDADriver::SynchronizeStream(CudaContext* context,
-                                                CUstream stream) {
+/* static */ port::Status CUDADriver::SynchronizeStream(CudaContext *context,
+                                                        CUstream stream) {
   ScopedActivateContext activated{context};
   CHECK(stream != nullptr);
   CUresult res = cuStreamSynchronize(stream);
   if (res != CUDA_SUCCESS) {
-    LOG(ERROR) << "could not synchronize on CUDA stream: " << ToString(res)
-               << " :: " << port::CurrentStackTrace();
-    return false;
+    port::Status status = port::InternalError(
+        port::StrCat("could not synchronize on CUDA stream: ", ToString(res)));
+    LOG(ERROR) << status << " :: " << port::CurrentStackTrace();
+    return status;
   }
   VLOG(2) << "successfully synchronized stream " << stream << " on context "
           << context;
-  return true;
+  return port::Status::OK();
 }
 
 /* static */ bool CUDADriver::IsStreamIdle(CudaContext *context,
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.h b/tensorflow/stream_executor/cuda/cuda_driver.h
index 68494aba65..4002ba2021 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.h
+++ b/tensorflow/stream_executor/cuda/cuda_driver.h
@@ -304,7 +304,7 @@ class CUDADriver {
   // amount of time?
   //
   // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__STREAM.html#group__CUDA__STREAM_1g15e49dd91ec15991eb7c0a741beb7dad
-  static bool SynchronizeStream(CudaContext* context, CUstream stream);
+  static port::Status SynchronizeStream(CudaContext* context, CUstream stream);
 
   // Blocks the calling thread until the operations associated with the context
   // have been completed, via cuCtxSynchronize.
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 60eaaba21c..7f8a7ca7c7 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -664,7 +664,7 @@ bool CUDAExecutor::StopTimer(Stream *stream, Timer *timer) {
   return AsCUDATimer(timer)->Stop(AsCUDAStream(stream));
 }
 
-bool CUDAExecutor::BlockHostUntilDone(Stream *stream) {
+port::Status CUDAExecutor::BlockHostUntilDoneWithStatus(Stream *stream) {
   return CUDADriver::SynchronizeStream(context_, AsCUDAStreamValue(stream));
 }
 
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index 8ff4a30d62..5adbb59856 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -152,7 +152,7 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
 
   Event::Status PollForEventStatus(Event *event) override;
 
-  bool BlockHostUntilDone(Stream *stream) override;
+  port::Status BlockHostUntilDoneWithStatus(Stream *stream) override;
 
   int PlatformDeviceCount() override { return CUDADriver::GetDeviceCount(); }
 
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.cc b/tensorflow/stream_executor/host/host_gpu_executor.cc
index 0af2c8cc3d..1fd8eeb881 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.cc
+++ b/tensorflow/stream_executor/host/host_gpu_executor.cc
@@ -177,9 +177,9 @@ bool HostExecutor::StopTimer(Stream *stream, Timer *timer) {
   return true;
 }
 
-bool HostExecutor::BlockHostUntilDone(Stream *stream) {
+port::Status HostExecutor::BlockHostUntilDoneWithStatus(Stream *stream) {
   AsHostStream(stream)->BlockUntilDone();
-  return true;
+  return port::Status::OK();
 }
 
 DeviceDescription *HostExecutor::PopulateDeviceDescription() const {
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.h b/tensorflow/stream_executor/host/host_gpu_executor.h
index 77b07e4a57..e884554a15 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.h
+++ b/tensorflow/stream_executor/host/host_gpu_executor.h
@@ -139,7 +139,7 @@ class HostExecutor : public internal::StreamExecutorInterface {
 
   bool StopTimer(Stream *stream, Timer *timer) override;
 
-  bool BlockHostUntilDone(Stream *stream) override;
+  port::Status BlockHostUntilDoneWithStatus(Stream *stream) override;
 
   int PlatformDeviceCount() override { return 1; }
 
diff --git a/tensorflow/stream_executor/stream_executor_internal.cc b/tensorflow/stream_executor/stream_executor_internal.cc
index 25b579fc16..273d970b6f 100644
--- a/tensorflow/stream_executor/stream_executor_internal.cc
+++ b/tensorflow/stream_executor/stream_executor_internal.cc
@@ -15,10 +15,6 @@ limitations under the License.
 
 #include "tensorflow/stream_executor/stream_executor_internal.h"
 
-#include "tensorflow/stream_executor/lib/error.h"
-#include "tensorflow/stream_executor/lib/statusor.h"
-#include "tensorflow/stream_executor/lib/stringprintf.h"
-
 namespace perftools {
 namespace gputools {
 namespace internal {
@@ -41,21 +37,6 @@ StreamExecutorFactory* MakeOpenCLExecutorImplementation() {
 
 StreamExecutorFactory MakeHostExecutorImplementation;
 
-// TODO(b/70298427) There are two similar methods:
-//   bool BlockHostUntilDone(Stream*);
-//   Status BlockHostUntilDoneWithStatus(Stream*);
-//
-// The intention is to replace all implementations of the bool version with the
-// Status version.  In the meantime, just implement one in terms of the other.
-port::Status StreamExecutorInterface::BlockHostUntilDoneWithStatus(
-    Stream* stream) {
-  if (!BlockHostUntilDone(stream)) {
-    return port::Status(port::error::INTERNAL,
-                        "Failed to block host until done.");
-  }
-  return port::Status::OK();
-}
-
 }  // namespace internal
 }  // namespace gputools
 }  // namespace perftools
diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h
index d2426f46e2..0a9bef71d0 100644
--- a/tensorflow/stream_executor/stream_executor_internal.h
+++ b/tensorflow/stream_executor/stream_executor_internal.h
@@ -219,8 +219,7 @@ class StreamExecutorInterface {
   virtual void DeallocateTimer(Timer *timer) = 0;
   virtual bool StartTimer(Stream *stream, Timer *timer) = 0;
   virtual bool StopTimer(Stream *stream, Timer *timer) = 0;
-  virtual bool BlockHostUntilDone(Stream *stream) = 0;
-  virtual port::Status BlockHostUntilDoneWithStatus(Stream *stream);
+  virtual port::Status BlockHostUntilDoneWithStatus(Stream *stream) = 0;
   virtual int PlatformDeviceCount() = 0;
   virtual port::Status EnablePeerAccessTo(StreamExecutorInterface *other) = 0;
   virtual bool CanEnablePeerAccessTo(StreamExecutorInterface *other) = 0;
-- 
GitLab


From 2ecec0b86767e4fd9c56fb7ae2c13cf1f8366878 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Fri, 8 Dec 2017 11:47:35 -0500
Subject: [PATCH 0840/1225] pip.sh: unify the way virtualenv is invoked

"python -m" also seems to be a more robust way of calling virtualenv
than relying on the virtualenv command on path.
---
 tensorflow/tools/ci_build/builds/pip.sh | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index a37cf226f9..f5764531a0 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -296,19 +296,12 @@ create_activate_virtualenv_and_install_tensorflow() {
     die "FAILED to create virtualenv directory: ${VIRTUALENV_DIR}"
   fi
 
-  if [[ ${PYTHON_BIN_PATH} == *"python3.6"* ]]; then
-    "${PYTHON_BIN_PATH}" -m venv "${VIRTUALENV_FLAGS}" \
-      "${VIRTUALENV_DIR}" || \
-      die "FAILED: Unable to create virtualenv"
-  else
-    # Verify that virtualenv exists
-    if [[ -z $(which virtualenv) ]]; then
-      die "FAILED: virtualenv not available on path"
-    fi
-    virtualenv ${VIRTUALENV_FLAGS} \
-      -p "${PYTHON_BIN_PATH}" "${VIRTUALENV_DIR}" || \
-      die "FAILED: Unable to create virtualenv"
-  fi
+  # Use the virtualenv from the default python version (i.e., python-virtualenv)
+  # to create the virtualenv directory for testing. Use the -p flag to specify
+  # the python version inside the to-be-created virtualenv directory.
+  python -m virtualenv -p "${PYTHON_BIN_PATH}" ${VIRTUALENV_FLAGS} \
+    "${VIRTUALENV_DIR}" || \
+    die "FAILED: Unable to create virtualenv"
 
   source "${VIRTUALENV_DIR}/bin/activate" || \
     die "FAILED: Unable to activate virtualenv in ${VIRTUALENV_DIR}"
-- 
GitLab


From bbfe1d3fb4118790a35323c2474801a9273e40ea Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 10 Dec 2017 10:38:54 -0800
Subject: [PATCH 0841/1225] Add IsDataTypeComplex helper function. In numerous
 places, we needlessly depend on Eigen templates to determine if a datatype is
 complex. Cleaning up these instances will be done in a separate CL.

PiperOrigin-RevId: 178544917
---
 tensorflow/core/framework/types.cc      | 10 ++++++++++
 tensorflow/core/framework/types.h       |  3 +++
 tensorflow/core/framework/types_test.cc |  7 +++++++
 3 files changed, 20 insertions(+)

diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc
index 02b2df448a..b082dfbd03 100644
--- a/tensorflow/core/framework/types.cc
+++ b/tensorflow/core/framework/types.cc
@@ -318,6 +318,16 @@ bool DataTypeIsFloating(DataType dt) {
   }
 }
 
+bool DataTypeIsComplex(DataType dt) {
+  switch (dt) {
+    case DT_COMPLEX64:
+    case DT_COMPLEX128:
+      return true;
+    default:
+      return false;
+  }
+}
+
 bool DataTypeIsQuantized(DataType dt) {
   switch (dt) {
     case DT_QINT8:
diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h
index c27a4d4605..652985658a 100644
--- a/tensorflow/core/framework/types.h
+++ b/tensorflow/core/framework/types.h
@@ -225,6 +225,9 @@ bool DataTypeCanUseMemcpy(DataType dt);
 // Returns true iff 'dt' is a real, non-quantized floating point type.
 bool DataTypeIsFloating(DataType dt);
 
+// Returns true iff 'dt' is a complex type.
+bool DataTypeIsComplex(DataType dt);
+
 bool DataTypeIsQuantized(DataType dt);
 
 // Is the dtype nonquantized integral?
diff --git a/tensorflow/core/framework/types_test.cc b/tensorflow/core/framework/types_test.cc
index bc57740469..5ddc986563 100644
--- a/tensorflow/core/framework/types_test.cc
+++ b/tensorflow/core/framework/types_test.cc
@@ -130,6 +130,13 @@ TEST(TypesTest, QuantizedTypes) {
   EXPECT_FALSE(DataTypeIsQuantized(DT_BFLOAT16));
 }
 
+TEST(TypesTest, ComplexTypes) {
+  EXPECT_TRUE(DataTypeIsComplex(DT_COMPLEX64));
+  EXPECT_TRUE(DataTypeIsComplex(DT_COMPLEX128));
+  EXPECT_FALSE(DataTypeIsComplex(DT_FLOAT));
+  EXPECT_FALSE(DataTypeIsComplex(DT_DOUBLE));
+}
+
 TEST(TypesTest, IntegerTypes) {
   for (auto dt : AllTypes()) {
     const string name = DataTypeString(dt);
-- 
GitLab


From 261268e9f17bd42cf742781c29ef238b28b4b4f1 Mon Sep 17 00:00:00 2001
From: hannesa2 <hannes.achleitner@googlemail.com>
Date: Sun, 10 Dec 2017 21:12:39 +0100
Subject: [PATCH 0842/1225] update Android libs (#15055)

---
 .gitignore                                    |   7 +
 tensorflow/examples/android/build.gradle      |   6 +-
 .../android/gradle/wrapper/gradle-wrapper.jar | Bin 0 -> 53636 bytes
 .../gradle/wrapper/gradle-wrapper.properties  |   6 +
 tensorflow/examples/android/gradlew           | 160 ++++++++++++++++++
 tensorflow/examples/android/gradlew.bat       |  90 ++++++++++
 6 files changed, 266 insertions(+), 3 deletions(-)
 create mode 100644 tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar
 create mode 100644 tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
 create mode 100644 tensorflow/examples/android/gradlew
 create mode 100644 tensorflow/examples/android/gradlew.bat

diff --git a/.gitignore b/.gitignore
index 900ad921a4..be75938ec4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,10 @@ Podfile.lock
 /tensorflow/contrib/lite/examples/ios/simple/data/*.txt
 /tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
 xcuserdata/**
+
+# Android
+.gradle
+.idea
+*.iml
+local.properties
+gradleBuild
diff --git a/tensorflow/examples/android/build.gradle b/tensorflow/examples/android/build.gradle
index 48f566f825..f7bdf8b816 100644
--- a/tensorflow/examples/android/build.gradle
+++ b/tensorflow/examples/android/build.gradle
@@ -28,8 +28,8 @@ buildscript {
     }
 
     dependencies {
-        classpath 'com.android.tools.build:gradle:2.3.0'
-        classpath 'org.apache.httpcomponents:httpclient:4.5.2'
+        classpath 'com.android.tools.build:gradle:3.0.1'
+        classpath 'org.apache.httpcomponents:httpclient:4.5.4'
     }
 }
 
@@ -75,7 +75,7 @@ apply plugin: 'com.android.application'
 
 android {
     compileSdkVersion 23
-    buildToolsVersion "25.0.2"
+    buildToolsVersion '26.0.2'
 
     if (nativeBuildSystem == 'cmake') {
         defaultConfig {
diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000000000000000000000000000000000..13372aef5e24af05341d49695ee84e5f9b594659
GIT binary patch
literal 53636
zcmWIWW@h1HVBp|j(AqTBoq>UYfeAz~Ffed3FfjPKhB)ea`nl;dGoUKGK6OTrJp%(n
zC<6n72(m(7M?X(D*WeI6U$@V`XHNTg>*`(P_14uocjo-&AcHH$51xKHqkF>htnXQE
zPaQ_CS8XZNo-B#d+##;I?8%a(6Nk1+y_977`l*N!$wDzSm$5J~Fyt4dqc{p(4L4Lx
zdQoCZPAXod!l+8iixLY8Qj0LOWHhCuEoNX~xXQr5pp0FMOMZD?PJUvFilJU|PGWI!
zZI3V4Ap?Qd`x&ND+GYAp+}GRY9h5In)U$ESan9lN^jx)fHaGu+g-1jRU)wyhl{-_j
z{`+H21?NKtB$AwJwwX^qUAs~>ao5(h7sEted);A8+-AIU+dh+58najHN~pE8mUd~Y
zbLm#Tc8T>qUSGeuhry}Hz-?Er*gbE54{NFGhcxPTg&5^?e72uZA}L^7vs}LAf)bCD
zn*^JDd+%Z1QeD|vv`{{Jul1S;69Yp53j>1-0Y3&;7MG;v1{CENq!yKArWOYj<yTZX
zmX>6s=9Oe7Czj+FK>}fEaBjYkqd=XQM*H!Lk5(qEoqFq9Rmjt>{EG=voV}f#hQ6zO
zee81+nbX&mr{qukwEjzXuE|ICAB%f7J4{tz6n=mB+#8$EXKbu@e}DY^`g?{`6<JEX
zO`P1U%-NcE%r^G*ikJmQTze-g9K9o3iSPJ<ZH?7DC&l9)tc-2j5p0v<aplmmgostu
zjTM(|EyX_R+JDnbx_+4LJO9ovU+P4BR%{bDTqFNQS=H}b>J7o5N+qTDTjJlY+Qu>E
z`U9;gJl9?;2yA-xSwrYZWh8&<>0HlyUs`WZ-s2onuK$L$?!@fbpSi;%rbRow4c-~2
zF*|vNI2(_&-^PE9^-uB?rdZpqOnP9)?6J={*n8(9c46zgi5h1-Qw;bIt;^{MpC{-4
zj7cj~Q_Lu0iBxflq0Y9cuclc2I`Ljfk=JHJK$Ca*2lgEgvQmya+&tuLb4gZCy5i7=
z-}Q%6Py7wDWL$G@4uf-<!<WhnMeVf`mwwG_yZehL+OJl})l)wsw5z^dVxF((ANQ;>
zBR8dxi0tCXsgovmv;^C%GJLXpi^Zy!uj6JG`mDJapXOL!c;w64OUhA;=6X+y6A{~X
zUf{QnA5Ub|<YtBAsZnbanQ~&3XFKry6x`_**!y(9;=g--bCM7G-8yC&6kqU?=h^bk
zqNFu3brF3Bc=CEq*{VxRXC_|W`lO9hS@%}-!Ly9Hr4#D8!%yCNvNZ6B=!_kHVmGho
z2f#}mP`-WsCZc#10|Uct1_lNt?5WNZmbD;h%C+|(*C7L*w*O~dJ`)k#!xi^6>EoW&
zT+EIEU*fhKnR!{iDzTf*E`Ok1$a<<I$I0n`etxdGE6*VIqWVP@2S?o^ZB<i$j>#|8
zo`;{CqH!}(yfyxmNpyNp%VuT8h`ULFQ)Y&x^V^=Tt~K1a;(LqZmOqhaUppU)m_21_
zn|jcOQy&%yw3$jB`jD)@)9dE9Dcw6%dj2hb7ToSo5Wv2(MJ(oYu6OC%8((tXAE;+W
z^(o8Bs=F~v3=DxR3=F#1ed-P_utJMdi#+mkQ+@LDvr7vgp6(5f_Lp)L`R6t<_S)B=
zH5yyDWks_@-;8=2diludRh=Om3Y`8at(@!hcBe$<{0ZNyl9hGoxZM|bQ`^Hq+9#TK
zpWJ!2d|&aiGiTo0+t)Eg99q>Bc;;lqq6eDuJUcWw65`xqesl`BZBTf)vuOguUV*AW
zx1bE)L*AU;Jd>k;ef#C86@TmGy8NYIE;;(0pH_Bxeo0%ws~Z-kCZ{*7jyhNDyL;Cu
zgS)-jsheK*%sVUYU0pRXG2`+^>3NCOtcSCT9_??v{+-*-WJlVqW#1<*w4bM8z+yI+
zOSNZ-&+PZ++3k1EO1F!N*IJ|=){<DK`KQd_f$U7)HkVU{tD~-(YKdL9+wHdZTx{<0
zr*bAccE7$-ajMX7R%ur4j$4B1620nwbZ7N#%Z-|2@G0tMR{7E=bCyLvOKMcS)orXf
zDb&TlF~+WPlV`^hK}VH6K^uES*}kUnGS>Tkc(-vyMYE{rcY(ASFXsJrySzo~PAW(2
z<<%A4<yY2E`@+4tqMx7VbGZzA>4*Iuzdp>EzPB&n-I|9Vr4A=dt(Ls5DD}QaQEY#$
zs?grNlNI_3a|<WD*iq}%mT`8SfzG|oO|}e$4)Y$jOT0T?edU<W-Cu!KvVS|eGAD^1
zmB>6a`L27t$Yb+A3W_r=oF`2dx#(ehiJ4nGAwGJ7#>oV2i#~RhQ27Q;8x`3@CURRR
z`$wgGF*#g2{e}L!zfD!G4ii_f?Q3bCKEcZQ&=0oA8P{8tTzHhiI?8zZJ<ln$?R<Sm
zNK8ZRo9g9P``1m^jIId|=MdFj(89j*pXHSg0-eP?_3b*}4&}X3bt+mGTJ{YsL&`jg
zy?vCGf#DYy1A{U4Wa?UxoKsqyS(fUSnUm_6oSa%*?3<XEn4StP^{0k;7YL`y{Lh`G
zmboie(LpgFhoN;#K$C{p<)&#jk}{$e@GfN)-G0(6cGunB*}mSj3U;r4mwtFLcd?PO
zZUp-=Nq6_-%~p>aKOQ<NsXtFbU+&2L?X#!tljZv<_y76(|G&)a|NXwo%fSD5vck57
zo0$Zc9J%z=y<8wp=KEt80lSi})1HN|I2VM!$#k?-owiTGZ}I*9Muu}Nfyy1G7SczI
zE6hBC&$AZto&UX%#Xv*Em@%bwmXwQbo7Tj`1p-p%!jo8b$uF*xsGA{lZAqWH)%jAD
z;J44K{7Xwzeyg?wJ)df*x=h<m?^<^BtclqZ1Ew1EHm#K?o4-grlv8w@*{WHtyH_hX
zdU>rlH+$DSlbowk!6lAgwq~`Z$xjQA^Hr1nrBWfBHm%E4^K8KG?#0ckriQVVhW;=%
z?z*=;V%Co79dXqWi>F4M+ETP}&eD`pQ?oqHpL?>7-qKI`lH@2IY;6*odphNc(?V~<
zDRZ&}SNIol2;EwgCo@Y@xJdWi+KlWfon)z*i&9s%$$D$-(zDrSa;E#&j|Y{e$(Lu$
zSBx&3eO97haB^Ono641)lh}CE+e8lsd~JH%nAp?dv-0ypLu2m5jtf(m&R+S?ufWOp
z@Vx1!V?jM@EMKv`o@93Z=G(|4!Uw!hJiDXgIJ-c9%G%f&|2zNW@XE!=CT3gREt>Jm
z@tX75+fvdkwHH@E<vFSsb>)4Q@A2trUq94dJz9F{K<J@cfvHIWzAw@oZ6mLQM9)|F
zQGHFOE!unA?0yTM#-zhG9K{})Yn6<(^%L(Jxk*H<kKAIk)acs!JEhgFn|d@p8Oa^;
zlia-_xXH%eOH%*9qz#21Q)D%>s=~IWh$NiWNa}gJG5mbb2?_BQ4ZRaH4s_S;QF)xY
zM=9~Lm+SNg8;_;N99dl#GwGvL-1LbDg6sAuKKAw56#wq$J58yin&+Do7aG0oD?TFA
z#UYbl_(f^js+6{?IVmw0bCP;_W`?D#+o}9+j%$vwk<0hOReRq$)gA9IPu<i0{qGNl
zW9=t?f1KFUKQVpYbj{Y5Wxr<?@*RFOZ&HAmsOkDsE5C8Ne(}%DTEyGke%^m#Uh0%H
zUoW;jm9v`vAwQ*4YuA*$A&WyC4A*XZIZ5SB#I~d25<;?8R+Fx-X=46&rfEf$I@dO<
zQ*HXb%^H5wm%W~)eP+4F2?L?c%KLK7CWW@uyk5n0raRYn$(-kJ8%5VnmD^G2aQW~X
z!Dp-XELg4e`gO0I@WiXGlUJv%YI|^-jeA+#maD>Dwr3<`^{&@$UJ+g;{i2_L`o0}g
zir0UCHuZ4AfoZE&do{XU)0<h#_QZzoPlWpq=2HU4%HA;Cce!W7p`@ZV_xS{gGIyt>
z*X9MePjVZKQtxC-Emt(n+sWmhd^dDTY~hr0>#VqSw|C^Zv+{;t{va2mv`%W)2Jr&F
zL%mPtv?%kRH+-{L<Zi{Q9}A~5o-}5Df8d?wl=BD74S4PQ8#O~(nqrm(Sn}@N-FZhd
zWbU*_ahh>;yjRcKY07c1?{jC@-@5OZ{==Cec7J)!8n2Md;95KL*sUGrZ{|8_#K|pt
zW##_s-T6eV_59U41D57|k}`|gXRZ9k<m`g71uP0%CeCX;shPvoTB)|^?B;}RckUF+
zmI^%*O<Z*6m4u4gakYnciv`yGP?C#@Q>!qJv-IEaNzM4D<+8%=nY&u=&#jfOJ0$!3
z-}cSZEobiUs9Lvs#&?b48wY&MgEn!p&-mDEv-F4gTKV5U^Ir+_Otfk`KEd7i{*mS#
zEnz;71Z=ln3}=d8EPH<e^ZiE}bq6kgx!xI{@ldRC`^n$Ud+bmC?%rel<gfF*6-E3^
zTWXr;ecRl`Zs;7}Kk1m)xv+W5nZM{*E>AmE(ecAfPJL~U(SF4~v15Dq_X}NCobuRU
z>J!VK>Kgx5G~<F^b%uSpE4WQ}KgUur+tYU+Z+TI^B5O|{yGl%(d1-5kKI`fg&$eah
z_A;w`zE$1ZKgZ<u*87}GrWswmb6ax7?IrG#T80WQci4QBM{T^E;f-^f$i%?#lMQF%
zB^=Usa?a07%S<mVN-W9D&(i~Ug?cCF`U^V>9RGj&`L$-driMj}l-?@NS@6J^lRwlj
zlt<K~#(7=AH07s#cVbOH24^Z6er%OLz@Iv2Z}fsSw~v?`Z#Mos|L@zF-@nhFk7s!H
zL8IM!#>oV;ZkZmQcZK%H&)S%O-&uL!tdmrCrcKI)Z@Km#-h`QNh;2{YP?%9?#Cx2p
zXYJXq%XwPw{pxxd&9ijH@?@*r#h#|ib-(X5Zf0&<crWDhfjRLJIc5D4m*U?%&JgNf
zbnlAs?TyW<Z53O$ot+R-`hDfR{2P5=w^d)+wC8);YT<it-_4wxdwKKENEP9Ojc<*w
zNUghWc5B|w`x|@C&#SD-sXXTQBs}){;=Avv)Y292)ZFH}rnhHLz}l1f=YJOkZ>-wb
zzIocxz>Al)mOs{8xAM*ES?#O-e_Q3|zI#%-%T2+R+tVK^*WC~53z7G;cR%;u)@5qO
zjujowm)}-ZU*n$rzAj|zgUEkUyrMi8<Kpx5)|<ZC9ewuKvv;d|%}aTkPZ;yesZ;!?
z{n1op!u5bh%g*$+G4i_Jv-mnce(E}__(yK9FWllbVEot45Y+LpQ{?E6w+r<-;zjnU
z)L4FDPM9<QjltY9jq@k^U)vw}67+_paPJfgN28wL#5w=OPU%mUb(@sK%Gp&kMKfW_
zBEz15ux_@%v>uzoqA{Y=C7x93uB>-|_WI_ADFSo4C#OFuj1hjM`l|0^UhOZInZ;9L
zX3SMMd$!<E!4*}t4<6k2PZh|TizpOIrUY>sIX)IuD3n~{CbgHJ<G;b5N6}srmhc!Y
zX4$!x&GVE(@sgSEnS%rrCVNd-!?S4t%g)7YCr>H#dwyQR?-AJGqv;rAbfQ?*Bb0a2
zB*$r<n=i31QE|xfny`f@D7@)K)`TrQoBU>7uXQ-Zlwx(|&R=fSv>jM0oB5Q5fkBds
zfx!TKiwC{G>yen3k^^h*^oG4I77i8ppF2C{aM|g_0bX9M988(NIvPZCxU?>KE#+4U
z&}rJ`n?JcV#XQGsi>T=QA8vmS@$?-Ix3g?MEOFr6iSqv%U(3xGp6$MsR`qW0^V0J_
z=N6xTbN~OJpW+RFe?%VE>se9Z>@4hNGUv#OE_2f#s-LH@i6(zC&?^_#QVX3E_HkNU
zSPx5~+d59qb6!P$&3!8s&Sh6fU%vFliTmEcqhc|Kc2C-)u5Ycr^nJF0fwE`NVV#zc
zdD)I!nWD?OPIECiFH7C!E^6YwYq`jezT2Cf4z?Ly77}iK(^GIx%2=}K*0DEBCeAdH
z37&V_XZ~!n&s<f<bJRt>Esy269eF0JJYCpNXXA`=*Kb>rKR=kIRDWrMM%g1Sv*j((
z+MyeajmmEBNIT+VZI+bgvS)XR&W<!Sv%Du6;wEcbUUMj~RtQ;Yo%NI}&(p=UwQ;Nd
zX?OK@@kvV0XKizh{4Barqxj<z$-`bpGt#0oyRNKxsCM~KWZApBZ#t$#Jor1$<5+^l
zN~f@hysgtu&OEL3GU#5)z6A;1t<8bq7H!$yx4-UuT9SGtB_w^#f~9LVo@hxG%o4m%
zB-)U>R$x7YrtwKhz5K0GsYSa~y1YNHYSMc)PpovouUiYNHoUTQe_0TF$S1Nby5L1}
z#n+^DH<wS_!0i__<-w~eH}9|B!Lqv@Uzo4GQ+(;f^2+I5+Ezvhmvb^(rZ@GaZ<+KZ
z<K>2lmkUFG-Mh13xk$-6mbzsLJJZ5KIIOcaTZI`IZ{BpWjNiKNZOik^hq%&nlBaF@
z;8l9KxaVmHAA6-|j4W%+WX7}FH)h1QJxr=!Vt;FGlH+kDy9<S~@~eJ$96!Ee;Ui($
z`Av~EZRbm64v1^*yHXfjwC~V$t$i0i>h8Pz(Y7w2(EE7*4^PLExBN33uYB9jv{%TR
zPki&8moj?d5p!;>o51X@wm#u@*NRW3-n{ij$<LP>OZK_m-RDuex#arBQy#~&zns=D
zHT}CFdwN99Opm=vri*x$Ry>&FYyE9cjZfSk+4B|6H7h)=XMg?>sk*eS-~52(G=X(3
zTY`FZxT04}6y;4z7m%-=T6HuhYxQ%ZeOo5Z7vL^=xOvY~slKNR%QqPPdYpOKvtu=D
zqN;1NOqg4ytlndP_6_;xrj&kLF0=c9<qbK>+=$lRtu~J)35K2Wt>!iNnl<U?`@i#l
z=|2x-oMYZF`(_6hpSeN&WM`Z6_w`QfSJNo6oA~ab=CRO+ulns;KJ8Y&=PmVN#_f&$
zA^MsMO?m5H3%K%c%~)g8!0-6!Uxd@;FU}_YuH7I0GS#2|tNP<4M~}Ya+-1)nxa7wR
zzW%ZPZnNEs$+IWCKWW>dK5c=&{=%uZUf(to**H<pO!VTl?*g;uZ^^A}`PO+)X`7u}
zdy`}QiSH7(MeZEr+{r!h>3i;<-7j>KekUl1Xe@fo_pOC#pVB<(5a%pG>vxOxsxSHG
zwRUbuO;3UGx%<Dr2cEAr5uRB4BAw}P#l>i6ix$T%0ZNWDoUC8+E!!b1EpSZjReQ%B
zJ+JSAeR52f&VLoC$`o~-Gw1V@7^PL$Uw%}x-<DT+=k77bJ4d~D_@8CBc++Ub{xk84
z-)XyhV(#K^lkVJgQ`i0e`SNQuTe$_P4g1(KY!3)6Ddf_>-0k1w=fA#y_mTNMx%7!n
zKSZ4^4n2x_^!-}k9<TcKQ@JB9Rn6U<95=DQxcty<-v8_YD5Ew@D-Cvqfd-3N85rcT
z7nwf!>F^F=>*U}0mjXnN@1K5LYrWyI0-qVebGcl@+wu+7+EO?L<vG*Tey^P}d!2XY
zLXqD8UG=R}M~}$=Vg9qY=lM2Xp@_{B=FKa9Z#n<7R{g(!zjz-A_)qqDkj6B<K)dnw
z3bnvy9hcY+Yo|L?3ltnDUP$QZU(Qx8d4u!tE#t#Rcb@ZXeC7FeMqF<70m)6Vhaat9
ze%P-3-pbm^erCJ3Et_?GRWgUQMbFOV$66L%*(w`*QMNRp=;`9ceQtLGcWql&y!@zu
zY3|&G0dME6ln*%?q8s~iXJ6t=-H8EGpH8}5-OHoR7c##glDX|zQqAv>(yAJpr_+u_
zZ*A=spSNAZPvx3ga$8b_jI;5Sn=e~}9t($;9{FN5<)C)%@>_gg)1+*3+fvTYvy!qm
zI&vfEUk;y(kCdd$vWZSnPR6~}JM<qqO7RDJEbef3vJrd4`IK|6(cxqN|K$Hlk6FyL
z{hW4Z*1E2Z`aizg`Nr5S;jZ=yWZf@(IPBgfu9zs6@b)Vn|K=p$y5|wG?C64+E%QYm
zJv@3jzEks@``(gPuG+tx7Z$v{De`zpz1y5GJl8zaPFgv&)HwwG<5Zhor|@a&8V)m$
zb;@3!jFN7sc?nvc|Foxb8oP~eK<3;VPKpWpcxFWYv{srw@lRVuZQBi_9W0!wm)7ew
zY%VUC=C{qZK+u};M<~O&pAV++_wAp)UuU6q%RWEF-`p?m$jm(~>h*5%X~w9<ddXkT
z{eIEoFugoIBDyu4Gfvs#Zu9B78#43wA2{7!dQWNIHutD2-W9rgreET@lx5X=$y8&B
z<R$M`E784oK0NyNpAj`lo-2CYCCbjgkjKZspoTq31|*hbI2ISD<|e`G?x|rvlcmE2
z|6RK#S~=OGtN6y&<|OrAott8w!5W^zAFjHrUH?nV@79j&Y~_5d_q%qlKGIestns~;
zg=xxfHt{IUBU>2jp7b;AyBQn3P3!VE_PyrsGw+=-H@-i6`~QFE|C%$-Ikx|iMb{y@
zIY)zJ*z%5O?p1IOY<sHVb8}nDMxmqKg+~G>$VRKEvxPqkk_o&s<!!Xl8n=yN#~o`_
z`ub1Rr+*CMk?(%=xUhz4e$T_;!Wyah6OX35?vg$FJmp`F%<&JKc;Y)AbqCgHw&@?g
z*t-76htR-(vv}ggXWtiiIoq(t`&s_+4|65<Ngx0Iq4r>*dYk-_56+gA>Rr3J_<AC`
zqY6(H#N3Y1*x8n<6;Qlv<?UwIvxOz@2Pb+@{_CuK__J~F;mMz;A9rtmzI>T_aq@H#
z6}P+$^Q}37#@shGr3|>Ai@uTF;&c3}PkEU3v=Y|eYHG*l%gD{SXSH>GLiW!^c8_0t
zOU;fnsWEPwT6;+2m5E^G9RuF)i<*vwd|%UaVZqepxBdwpzH;2_H1{sswYnmuAN3!k
zo8F#Kx<RsNI?tkaG8^AGT`QluH#J*Zd)M5#7w#;rbKC0UJG+l>N!c#3{NLQWb{oro
zURkwm-p6#e>bn|Ka(qk9m+xPDT{z!!$CI4G-J-0eXRnm5P}sJ>^4AQt&Yq{4*TiS%
zoKBM1w!iC?d<^R)(PY<y6|dG_trjo5wz6hRinxCEIU&_&7pBXtd%X1B!@Z|>J<edQ
zJ)!ZUZ}o!1;YDALPRiYpee>em`e6G#8=i4lORwY82#|QZFgL6{T>R@)1$)8c#Rm`F
z`n9PvSU#?_^zz3`GDdsfXq;)Alzgt~)NjkzJQHT`nmu>snYTilmJ}b;%bQpcJo{L9
zL>X(Llx#xlo;T(1=Jjg*b`A=Vo3qL5+akT$DZlvmzMkY16)y-pT4^J9v_FLB@o}wr
zhfPI`E254DOFVYHmlt`||3g<7tIj8{>x(2bB#-XNTm56@i$6T}>28~}>J(RV|5(};
z-}xnb&!PuscBk*S@xxbSf2d*O^2lA2{l2MqUypG$+*jZnU$Xee+^+kN`S<)-?A-76
z(OtCu(EQLfk3&W455|jbsc@h6QqMX;q-fb{ah9*yiwYRmb3fYD)Lc4a!Gg8BtRszL
zlOykcOaHwfRO{K~Wlz<nd)!%)xNgdwCH`~mHpV8pY1}z{KjhBwwyyo&ZtuExKfA@Y
z@)7SI&mxw&{o<RCq+OqG{zgM?#gBH87fk;=^SF%9se0~R@=(S58TYc%?u1GC)00@2
z-+Sn*x+|ny*OdR-mK}`!1-dJDiL4Ku_aQ@8Fgz{J@xAq~2aNld#O}NnQlPjyYgfLk
z^X7l7NryifvT@5h*WcZ@XY=vvpEvBiz4~_W=H^MWHI%RG&Ta|*&}cO=PIl$fg1>ej
zFZ|C;t9yL1vj3S<vd!LVxAM1bhA)lkoOzb+TB_>irCoV;>CS>zuNqvWPO{%qS~+Kb
zvBuQZ+-9-wQ&+t6(Z6|3w=#P#i`hrx#s{B&z4}pV7|Cw^dDn4G-JEX^53h`=-Tx_z
z<;|sdolMu|$L^&yt$(xT@~p?NPS**k=X8q{TrXoZ>VEv^p4F-R-ELbR{jK0xqZQ73
zqM$bK_O9vnWnrfb-)+2D^hY7QzaY$_{j#XW`l?m(k0hS+lqei|zq!t9)%q4jo+2@3
z=f+0$6`H#W4JVx2)7Q3YYHbUvO6F6GZwoGOF1c}?Pp4C51IrPE+@EIuz5N5vZNEIz
zNJ^WdcUiR-NB`1lV^NoLt!4sqC+|-ZeRI>HW?hrox+_NC)V>vF@D^$$*R!5|erWbC
z_OcghWiP6N{J#dIFtb0uwD#dO4_4jR(GRyM9x^-{uxq1yrcB~A_KVG-Y2i-0-!$Lj
zdTXXv*dKGy`@}{uUYk;3|DdcVk(2#zCHtIJ`075R$>8mqqZ&&&On=^CJ-Opxz0;ij
zrJWl#_Be~m$a%|f?r&UN+&gpS&Vct<Y`3htKINT>ciA!Lso(tb7g{}246yP1U>acD
zSjZpRyeGZ!{%#v}_1{9d-#=W?n0(Fd+U01s<7|CrTRWcGimJ@^pHf&;Tf!V$+$p@~
z;z{;-Pao@)J?~rdwOGbM*wAFM)m)u2@zc6L&Kd3FxMkR>IWHqAZJD97PVAKQk3HhP
z_r-qM)!cBtuMp45cq#je*rKf|Lj2PUUpf7bZ83g5^Vm0|psRaS_GHXU>UGPgoU&8y
z`ychco1^8T3XXmjSb8kf=Jmzd^A#KQXWvR%-uv@~+*`qCb9UWuY`1-FE4%RdX^(JQ
zxwMpokVLm7M$6KUO;-4q6?$vio^6S8=P$cUv@fWP39PI=vOlEtzD48z>6Ly)MpHZm
z9M8)hxwL*^qV41tU&GEieRsKUo1$Iue%BP$2Q`mOUd|O<H~;37pUV7M6YRW~*}1R3
z9?@JmN1^XktL!Z&tCpGo#=F+_vhP1f9B-exwMOLBf3$gFi+kJdN((bEywPM}P{3ZX
zdgc|EB<AEmMtkN)a@UAF-D<DC`{qrZ!`cFePG!3&RkoW;3GjMOxFIm%gMq@Ul=4YE
zZ<ODio8+=Ks@v=8zJTb9VQc!6bVCC@FKFDzs@?W&;nuBNuZCskzka)S@Aq%_ZvWQ$
zUY}NGK08I=k^cU_Y0s+XJpVuEeNBAa-`|(rJ2MpP1s2$yn|@5aSE)CCRuhv6W9G4w
z%cT|=$*upu!vAshc~*@B!l^lvOeQnOoCsbrQ|f`zJh27VJ0fPi<YH$wnVNp$&rGiK
zpFY1~IG^bI@G^6W)Op?yk~aQTUP~;4AO2*Vb@(xJ%&AnyRga5)u9>hbXx6I2n6su_
z`X4?!wThqYIm$Tey!Vp&mGh^cEZ*5sJ*{W1?1H3qUq5qIuG4u?ToKxE{^@6?iq*jt
zr47@X#gDyOnmJ)vpvD?OuIIli*M2e4tZ6(n|5Nei`PV(&y&KLw{(32g<3sD7z=ziu
z);|4McTeT~*-Ig;72NruKa^T;r(D{`u=e>|#;k|kqW1(Bggv}{X!?(zK|go+%+hN8
z%CNO?_o0OhxzEe&+yCbL*y;S_t<D2IGuPFp|LzH%Cn%`4szAp<c7w{S9~SSPX)p2A
zn8S0yFK^1RNu8Rl<(hYd7v#;9ejXQjLA@(Z^1<pKSwHtg-LuoR+ZpvRdqxv)Z}uxQ
z^ZZ>~7OpJ4d~Dg(+2*Ee^LLgw-(9xVr}gag*So*W>X3WACUdJ$+L~h4%Rhcq=w!Y$
z%<h)+bo`V(vu0vN`tpehvo6Z`ms~4R`%-XZ#Y@i@ckj9_T6R<Jh*!PEJ>l*nUkxtj
zZ142teOdVM(%R_Q(vwRBbNe}-?#$yhdvfmJqRTz^0#v4&luKrx=?E0(l05mu%68f9
zO;>jWX4pJrn(%q8N%FF-T`wnH>c9B<v8m<G8>!JV_I&-g;h@RJn>~`cD>HUays;!X
zz(#=o<wV`8LsQ<DEWiHBXtIxStYvq;lAwn?hbqrqBdIe>j+Sg&%@#dZUFKF|*)pMB
zsmGGFPsuEw{ZVRT=MJSQ*5^XXPAypwDDg%(t=em!xw?{%u*@gJ!>exY*~l5(RkVCd
z*~{)}dfC3NGu7N(zP8PAY+dhM{`o|n+Ez_5=Jy?Ziw_xi`(=HpT-@Ox;L4I|ARH;#
z?z7<5rX1tlPo<Z!=Je!6P7C>A{6UD#c_qi1xyt|VTzVLC>Q2+i%c)<CHg?7f%iK<!
ze7;-BDb4coWH*he25a}tDs50JxS3EU<>?((#=+LL$!1p4T}|#1UfZs~w$PflEBp8y
z>+XHK%CYa8U4!HHl4VQ=r78)t?7UW8{ibzc#?8!#(v2U)loQLoJiV2bY{TcFE3xo?
z##cARl@`~p^12yKG1x83Uv)T*>6K4WSb|N^<(ykpe$sM`wRdM~TyQ%RYdPn(R^;x}
z6K!RCzQ(#`qzBb(-cqt<i~3cz!c{J^`Ul#oPI$c(ynUJVWQCTXQBFm_an<hL38^h_
z1?!JAiiBLg^KioIRpI_B&lJ<P^>3MNo^-@uLi!@j3{&gmT~oaz7RMS)TGs8kOr3Z2
zM)3ukj&w=)xLom3V?EKaX5y7?9H&#nlwXu@NNh2es=DDh(NkN-KQdC`)|%@_gahwx
zdc%2-BS`8+$#!Mi=MtsPN_r>DN>BAzOxdvT;*^WRDMuvQK7YOWAbKKe<K9J@Y|}bJ
z%zNfrt3FU^+WJxBMNhfitH&2t$T?4086oJu=5*CnpFGp#8_O1KUA`qpTBJukx+F4E
zz)Y!a$>o;rr3QhX%ig?Y{`Bay{HvxUPUZ{qHr+M-aM(%1UUc`RmPf@g$(q*%f4ZIV
z`jT3kcw5ptd%|R?RF_F2G84}-3Ry@hy)ORMy7lZOX|t5%fR=Tdk(({ri;{IjO*eVQ
z>@=GEqszV6-?=17<KErrlbdI(@{TQ@ygP*NT7qtWubRJ$j`)S#?sL%_5*ULwzvi2E
z^WCvIU7xDNJvIl-a&=xK#-;uJtkL09rN2*KvygP^Zn<!3qwuM<jYeC<=Fhq2S?t|*
zO=w}LsaVwQHZlFFN5$tIdbIJ`)jju)I>$`ooGzMNR-U&d#Y0zq^R`VpuKGRadvx{7
zo~)V6E`9#e<h6fcd8n1oU#-7$>K0y7uw8tq^o!)wbqljYt6JnjU%Ks+>p31$aB9){
zRa<7~1-z{KwdT<7(0z^hAwSjbE&lNQ$>X%C`46K*|JmH{di(Q;*IDK@o-b-0pXz^_
zzQA?eB#sx?ww8vj2p0Lb=~VSkGnTS<EprN31*;q%-VWW@ynoTB@SlEDzkiH9q4zGe
z`B3@lKc@e7o%;TvZtDA|`5wjdPkrwe;$QJc;V+N$!3&RiSI$Uv>&-cow(Ctzw7Ty0
zunW(ot31zsxMSj#q(X0J$E=rWK9`egXG}X#Cp}#zY^~`s^Z2t}*|Hu_W?k&AwYs!i
z_sqH1tKDWA9;vVW{q#lH&)GE&Jl`gqy8q~}kF}qM%dB!Cokvk$CVMXUbofv1)OU(A
zUMSSNKXp$KeYZ(l*EnT)tnR&qJ5OF%c-ZmbW$z@_jb;a~l=_9dzr5>9Y}>Uw3B$9O
z7T?{Jo^8AAYG|y`X74X9A)9xVaQ7}eTdsTHdG_5-3$dtLDV@9Odt^3*E<G4FSFZim
ztSf70&1@+YTBIx^JWX}hyoQXIEo?nE-dsApwa>+7)yr8^Jyegz`SFJ8c4h87x~y%}
zsxWQOX-AA^22Sn@Xq)qH%~9u<pVnO~6FHGlTGnfp<?z&<wYPs|O^1B=56-FcMUKlq
zt9x_JdFE!li5GsW)UL8|un+#pVZZd#<xgfD^DoYBdg8EK(wlwZf#;Gr7t~i3wjI<9
zu6y3TYf<49#d%M?r?2{>^IlNPJ=E^ldCfnuQ}?SU=*Y=tXLd!cPGn4+S>*RxZq@TG
zeHDwsCcmhTdng|A&(3~I=dHk2ncxeltILjs)J=>RS{XO(+0;a_Q({$G`%hH|1b5i0
zOp811BOSM4<sasmX1^b2n=SkFt3pd?o3m}D5bMV1{0@n#pn_#5cSa-$`MDpNDQm`7
z{bh#g#TkcW4#rz0nVBmJYJ5y7<#{^yl>L+IqW>nIvVS&z#h>6h=44wx)fJy+^UU9W
z)ZSh4Rn)%~Uljj4J=`ApZ|Q=^;eY&&U0Qs~WNJOH{O5cYYh!n-<i8t!<yPf+os$)^
z-=m#Z>A%$eqC40Bu&4Jwe%0E4zFcYhyozXtpR-?1oZx<Td&V2~r#<Pp-=F^AdAe!J
z%B8-(pXSteg|leL6rPgts}IV&dBVEsrLx*nna#_m?UGxwb#mshl9v(hT<k9PIz8mr
z`1Y`K*xrRnCJmK0Pq*HkzvGi+-qzr-b9+@jzkK=FXYT8pN8HT59SY!*v`@Qw#o40s
zLgICqbI-U~(`(MevYk5<aQ~XVSkpi5*mKWP)pzpDd^Bm_guV|P+-lp`>t#2&t$ep3
z&+}(9AN#U>1yL;dnKNT1`N++i`KJ1CVOY)E3$>nsnUl5e)mUsSiHa?g;n3f>S;ioJ
z%C-J^^Y<pLwAr{W@ABU;8|C9I4@K>!7)|cIn8)qz65l>|+S}qY8n14<?>-(nU&;LB
zlVwJkah$?c4>koJJF)9_dQ79p92rN|!+w5O&n4dK{I}~{ip``=Je|uQ&Q6inV*YWl
zSy$;hR|4}ANvm^yg)iGIcRkfs3iozfpILq5qs6{ASHcy-jhQljKU<u0W$tMU<C_yS
zZzwd$wHcqc_&HTP!l%jaYn6_iv!;0Alv!b~iZ~OVdoQp{j5fMH!MR+1vU};NV@qa>
zZHxPzQtb8N<eiNy{uM=2mCIuiQrnaKctoG4tbQ2YbNTN1?~ATZS-9|v@5C>^RApwr
zeZ636veUB_-l`|3-?WxKDJOktsn+G0%Tp$aO3txlR9P9Vx6*8T+W+ShjW--R$9VRm
zoQj`_l(S9I^Btc80vsjwzP9!g@bmrlHR4j?{@1#J8%(aV9L`VeieVQBimfV&wMjH_
z>(MYWW-!)E`x5o?{6y(Avoz5e7k9q=<XUdEed9qz^S)hcR-W8?a6|VB;fJZKq;*#|
zC>I)L)$tqcc3d7TKUFZ$j`gM2uC5vDj)lwB&f<Dn(fi%WGW)jwoJnuh`_5GO9lRs`
zQf*81hf-OMijQ-`?HpL2Hj89v%b3rMlT-L=(5t`R!pE!fY2v4;;j+s(+jIElJ6B8n
zym0QQSMRb*k`of;&Mp($GuP8RdC8rHp4%_1b@?{?#F1~aJ*KAb?~IZAQa5j3gWSYt
z9IPG(i$h)QePi0wg1moDHeuACoW0jE-(K|Ev(w35Yx(~^Zr<lO>sFZD7uL_(+N;=B
zteC&d*8OOI?}W`fR@0h`R8!V2k__QJDSo49UFnY0r!lVaOY|KV)mY8m=Tq&d>frst
z?8f!xtxrp=?{c0^b2wjHBA#}3>C6)Ido^E7*L+KQdVM3?^H6vBJg0aDWpfcfd#8h?
z%_ULMM;Xp$eAM~uH_zq#kL=Igdn$itriB{ppP#1pS*-HC<-h5t680_U@H%Hz@Td7y
z-K<z{-RE0)Cxor8nD(>#kK_G|zcUV|Sjl}6{V5(_+~T)hmqYB!<(T(-P4rgyL|yPt
z&*9a&WSajv&tBf*Usb@%<&zXQuUU99W4qIv?hK`^j8eBB9C~ZguD9&x7Us5HE$5VE
zpK)CLDYC3L<wEKeN43&Rq8Yz0l<T_Y2$wk=JiXypoW#loy#-6Z@b2pWv#8|S8uJT(
z4TO}~R?975S3K%+%k};eW`k@+)t8EEJ~X+t>~rNmyk_-*ueU2cKit{Hcd7irvKVH2
zr~YM(pLyI^vUHzr$gcCg8eF1w*!(hkW?gjR%I}d{*`GtU$tf<_{Yb)1d!pC3<?XB8
z_^RhEdS&Cqd}1>50__WG3dKSTd>QR_ub5X=+;DHw)h$Qa*ly?dA5^N0*g1{w;^PJ1
zSx%-3FRz~G;=j(XeWm@{%bPbe6y&a{n)vFpHv5WY*P>Nd#21L`vMGq=>BcO%u_%1Y
zw#thecWl+&ziR7Q@h@FkTYvUn7CLx2=)GW+naJki+~qO6ZPzbYSR`pF8Fob2a4(wQ
zE|J6VDSun?fp)v8YzlRDY4dtl>^T*-*>H85vTf>fnM&b5&u`}Y-!7cA`ig?g6OFIJ
zHmcWNv1M9rsF}v{{pRlsL%FTGq3+WwZBKvd7U2CpF)Vb-{uS56zkI#iRl6jNt=9kI
zDVtlntmQrGwilf+-<;dD-Er;hhqJ9$Oxf48;hz@EzUFhS5i=R|S#Q1H-dv&;dSI9P
z#yYElx-)Kfd47Dc-1D@%Ad>a<rMfT8vnurUU)wH!?)jzWy4l<BAFAiY6<pHX&#cG#
z;=e$LRl0m7KW|WBwClrFj8^5c+j%Zk=I%>hx$bQ64ykq8YuX<_*xz=IF{|X$ZK)^b
zJKp`-{enluB6Y&GLlM5M;$JQ=KXS3+OK`FMT5E@Fwv|hBCtTZp@((X_$wP+RDLxmw
zSZ8O68s2}lE$V-Fh)f9Ac8dW1U(vraUxz&r_2dxWDzhMy?{;p5?DIWwcbINWeQI>y
zaMDg5vACw`&KLG_eEcI&)L}E<fv@mD8K>ytyZo>3b9diu_kCVheJshPR`*ez>9sA&
zhpGjhUsv3h@96X1{fJG6Mg_;WI*BcEhkpxi_>%aCm#ObK!;x}^J@+4b*WBhU_@*GW
z-(UaGtLursD?8)Zj@z1a$1#fheK2i7_)F7@ozu!L@xN&QuqkEg^_T81taJANz0vlq
zJ@R}#$BM4!%P&ZO36{R)`l9l1TiLvrw58_fa(@Wjm>*X0QgGLs!b`tb?%lb_;EMml
zJ?t9yt1lEANLRI*^)Ehf-f$M%7GL%(m*z~qR+IdxY8Rr*BA%XXt(<kfc|y>JoqG=T
zwP)G%l+Fv?thJGIPF&vM)kW8o9%MJ{+0>X2{YmD|+ldheue3TlS$zDj#PdZcVsB4E
z1*=-MsOB3}l|R!n1+R!KoYoM_nYPz=f6EPF0l7&RY$J-*{&MjNOq`y6cI%wF+}Yax
zdA~33zUTTt#c@OV1gn(&uS+IeRegDL%9or+Pm9_V%d86e-z-kLxFpDK(S@b+lN?@3
zH;b?z*?qgMm-m7w^Arb0fiGU+MlYllydTW7a(FEJV0qK2jm*dM#h2|~ayfrL>)+#i
zj*DeWEE61mM1C`Fh^Y{sXMWJjy!VFE{X;Kqn{ZaKFZ^lqB7#%*{KD@O4DYiqz2AII
zUc>J{gIB#n-e2X6+7_$V@{TW+Lj>y2sV%I$k#&FZ{X^TVH=O;)w_5e+`?I$S^=l{G
zv-sAu>Oh{*tHu0|SD7WhH5`^#tXA8;E%A^4g9lHpMPE=C(46_k)%RjhU~AN(+?n6=
zW(7p1h6~xU_x+blc;PCt+)b>o-tl*Sz_;}pssDtZ-np;vsy?-mVNs-@_kwGT(MN<d
zzU|w){p`l_9R7qLpZ_V(FR$sC{O8O01@Ag;bXdySO#Q@_lksn1(2;cEIf=$s=4gJ=
z&uruRn|(`-%Vo-0lik}Je#F1Le_1i8dHI=B(~msyiz-#KShDel{essoj_wPN{$nn2
zA~I&SACukgKLzn`S$68Lo_hWwSINKG|Bm<eeSaYF@{j)C_*-YB6J$M2>t;xP-SB*Z
zw(9bXkLj8}9Z%jo`**|U&9i^Me1EvS_=DxSC$G-gl$*)FKexN+4(A$!;(4<TZ_NG1
z+2dHLWAph&x0S=EJh{6ET~}}ytl9MZ;(un;8LPVod^(S?FfdHz#JQ%*DL=oYxTGkt
zz&9~7FSsPJs3aA-#<n*sGWvFyfbF48tgD0$<QTFPFp11Q>KE)Ky}*RCb%VsDglSew
zx;N;%xlCK3Bm82TUgmG<U$>_3Gt+NdU*miKxBjmy^%u+5r8Jv=y2@WZbEb9ixifo{
z|NcCmug|!rQ9kuYVYr)qn#vyi#4tDQXDN3M@9AOH`IP9{#%?HiI^yBszg0F5CbfT!
zm?|*cKxMIDw$QcIoP^**>|Z?!%=>?AINE;v>yLnA?mr?9secXlSrqLiK1nEusU~>V
z!kd?wY!-fKV<<f4W60QRcs4<M=NYrj*RJHwRrxn#X2@6GtCxQ4QxJI@tE;UbW4yM+
zBzL9eT-W(gum0I>UR3UT`P{87kzuCFd8?<LUE|4kPbZ%11%K|*Z&#R2SFLullG?6&
z*2907(Al1InO3t)KCB3vrL^UWftbQ>#ph>v5_iVbML0CCjt$Xuns358TlSXmu32Y~
z`IsK{kSOq%IMcUo{V}hQKLu?8)=G?LRX55C@}K>B`pi93&GxyfY+|=BMqcXMWwj|s
z-~Z0VI>XAfQ}dR$E!+A0=3$lF+f6T@@OrTH#>JixpR<xDW78~Cs+>eCSEcB^%kV1=
zIsHnVJvy{%uH+H{F_tTtTP8eCJLk2@@_ERDu!l-_nwu2XX;!drXDNx9V8!UQ((NGM
zC809Wj2n|bxHR_lFg5c#&Jg-0TEpe-C|06g^+L+^&MMb0Opj04rk~%aapGaah0j-n
z*LJ<ptmtxP^nL#5_wDKsPEL0&yTi|2Q(8?fv|itu+xLi@HU5z{>-i6+ABzt6iyiKF
zYm*bM^gd+HweM(o$eYg$t1S;-GkUWofhjHh%$#{#Qp;qotbAj}eQUSQiy#iMS~l_b
zYuHQ|YxreO$<4JsZp*^XzNuM|_54TM_HWG%2mk-v@}hSRv$o@tt!KZ6YLx_Q#8llC
zS39k}HDE(eXymaIo7O5_KN_`t*-TmcQ@3=2|83}!)AF}iKj|iaNVjCj`+|+^BFeD^
zUFJ=bcCC9G<+C)sTyy5;9g83B+p4oh`{{$tXXdT^_{Z9LJ;(J0mv?>m$L@6X?DDRJ
z#;A>|ch(iAt$yY6BeeJNDOankTeAui>=s(rtTySXzJL0Vh|9yQi4FU@%z~PiOy<}%
zFXp^-qDIVBZxyAzi$1Z%%=@=l?QV$XVq?jx%i0~}Zf7n1CMbBvP}gXcLWFX^DaSn5
zbuVj}ufO<zjsM?ghX=a7$6jygzNB;EjjUDMhbFPo7mF`G=dixuzxa_<0>k>tKK;Uq
zn;y0QSF`SXc6Q;!<ST3E9GWap-#+6*&$EYT4~yG3PI3QhD5Z8p_ynV2|8=HC@h2Z&
zYB7KCHD2xYv6khmz8`I7sn;;hcMz&SWbE^M+F5bFD8A*#IfWM#%-!<-kiohnp1WVp
zUT~IY*vogOQrwGCExy(8iNk%B--1nV4m<Z;>iR9?+_IzF<oiRm+Nl*M!-ex6`EHWZ
zo={erKQV8PijwC&HT^IC0VnPnx_-I1r=;uJkq!IbO1!Ns(dpka=UY#AZqoPjFJv=?
z^NUth#(ZS|^x_ZazJ~ADpD+4wdVl@gMcNPdN2$~_y-WV`r!cIsjEkE~c*$K2!99+z
z3s2cDxAIMFW+`tvQBgSam0_o*)pegI_f^-n+cm6vY#Y@nb1%|s+8mb;P4~+l-TBFn
zS{!;@+pcn%i-AE|f`LH;`#eK9WQnP3MQU;>e2>}G@X7?4P|5$kn@;ALU2e&nv`te}
zM>Y4-S=ZBdrzv{|OP#oyb7|J1YbW2CMBaGQa`UDf2bYG1)*=}t5f|6B+Z|uBbu=sz
zJ1YJDx827>%=sU_pY<`@d^?x7{>|*~d%y2}|D*PJ_3tOe{PtW4dxbtd)~&lEcp$o2
z<KW!7JEAU<d~6FJ<VEg0-oKr-V7+`k|0CIobOF&F`W;>}UmnLghCEJSwY;x;xH`o`
z<#6@WFTYbN_HU?BXy8Bf-Fefe#F@1!4U#|a=^u7y{xPFQe!|<sMyJ^fwC}U0R%kS|
zKebjreEe^PN`w2+uH!9#g&pVg`FG9jKm8`y=ywUjd_noXA0Jyj$V{C3JLOxXy`A`x
zn-&rWCV%={!w}E$pqsHqvq7Kz!&HWSTBrSge%z|CtL5$MPk-BZ{6inj{xQwt=QjEK
zoQuToi>lt|e)RiC<BV^I73x~#**^;ZF=?2ux#IpMX<5m4Z>3~k%F5iZwYsytI-{as
zMS4YwkK>B>M!D;+O}r8$EE{;*XNj7bX2hl3C6}g6+f<fuvS(dF>b!<^=Q8@;o&G%w
zcz(j{{Ijk-XJ2ePcUStHO?G+7miU98V)r`lUaZ`F*tS*3)|&NE)8%D`YiEZ!`D;f!
zn=x;r(8<1qcdX8?k9u%xmdB-kTJ6S(B^<hZ?=GJasg*qwaamGZ{gm6n;;JbBqgxAS
zZ?s8U@=ZleWX)^iGo5{$!QJX#`)ZfDsclYP{`u{;N~2|Isy!n7SKk==NR`Z#+q_N1
zr?0Kj;o#A;Kf^@AD{X9Te}9c=JGA1^_1q&bBTD0ymhz`8J62lk(&uf{J7e0Ng)=!9
z-gRv}yR}VmVd=U93z_DeGFK1cd|ku(Ty*PJftSe@8=M=R5=-a3o%h$U_Ll4MbYC@z
zy~mDP?#j7!Ww~ka{Yf6i*2j&NQm*)DSS+lZyI5gqwCv);dxh3)nP`~I{W0za=QAFU
z-ye&Q-M_MRhwdc_)5&KZ+EjJ>Ej2uQ>9Msb+q$L(-^^)CXIn1MQQ3A*KiJW=#xWxF
z(uGIP^IUSHL}oQbD@;rhe03|tPx$<oWiu9^`=n-HcW{OOi)rHgO$Vp$(FwV5>5o&;
zwe`0r2=Dfut#7+aRb=uNZC$0AI=il?N{GF^yzIB>M-O9G)xrzQX9~M4S=PZPHL<`&
zE5l-*6YJK;u219_yLHbx)KhR{n`H2pos&N7oS3lZx|w|Lon1-aR^`pNG+59%rMPQO
z^qM6*_PL){dH(zD&hANDR<BsmurhvP;`PqWT;-Y?w_kUxNtTpcc6GkT@{`*i<QQaK
z?+O0DmG$d&y-%WR{{5foCB8><-GZGB_Z4r)Y^q)8yn4nF{gt~Sd{5k4pW?CNhiTnm
ztJs1Hi^-dWf~(eT5Z$uNtW~_F{riO(1(#=)1Y6`y@R1CQxPSgqX8Kv36H+qM-@Q1V
zA8ONkzw^-j%~|4+wpwM6Q?-t5@IJb<Vq@3z2TQg19(ajrN@)FSun&s76TCh)Dk}V=
zy3OK8r8_*2&UcNKJ3b-Jch!&Xf<K{VulLNp=-E3x!^C*$qx`;}*`YStIj^R6zSLN}
z&|2!B-tPY=ZWplp4>tR!DsAdoWis)NnbFA|z0<yI`mj&wz^r3acyo3i_?j;|?Z+>9
z!9Vdf%O6#q6FHEuz3chbcS|Jx{Q6l_7`>&dXM>*pndj?#lq9EL`j&p?iGg-xsL0Zm
znRf#jW&}5SN!xzXdw=2VmbVv;-e;8uC5yiC>_~{WUKltv$i|r8tu8<O!;iwtUGJH@
zdsSuop6m5)n72r)$aJTh`PR1CT)nz6#k#p%*()ploom|a`}W~PPusQa-&H4_{IEmL
z?5k$7zUVSu<*RaXUaxqnEhd-wPR=`(y|$J&_)F%|=i#SV_7;h5=lXYg@7Gs5lO+6(
zuGeB*dT2dM=+Ypr526Ntx>jj<Ptn>Rv{p-9!&|h3Pa<*m&I>ZxMm0O@4g}7gcP%+7
zZw{Nt**TSOBAV5!CM#4m1ibscB+>UBo6MIr^QzvIU#aPS>Kc4YCwXV=o6OmFriOpw
z)3%&*dT+(~ulIud`ZezDd);+cA|!sU-kufH&7Rx)wya+Ez-s!VROcMMRjbrudv*r2
zO!;|n&$(A!O!s1}ABO#!5$!nd*3=ngS6DB{9lBZAvfG+Z(DI+DnNg<H?;`(WTwR^V
z)Jj)ARxOhMd~y1wIj2>=UyH6dc>U>(#eVyaRx~8)Z031%YDd<$?hwb@i}H%MUp(<Q
zQ}Xl9jXznJ9tq4pbSvq6GVAinrDgtEX;Z|v&FgwunfCNAcVyIrPv5+~tc;zn#eA8Y
z@jP@QFO!J3l+d=R$LkUedkl@Dzb(GKuIH$oN<UZdM(-^*x_io&>RLDSy5679`tGsF
zzu#+F1YB&lCC><%62f_HvfRfD+8+H?FW4?w+k82>q`c}yu;>00&g)%0^2*LR7wx<q
zfA9Ub>DTS{hpuR7E1cIcQOd#6pHchyf}XPrQ*?y1=C<bZr6zCI$e-C-e{7oc^N4fC
z4#p*Yr{@IUNPazGW=3LYf|9y5<NmW(#ZD>}h=-k7n4}}}#kT3x#!DVw99L|P_%!21
zuSjTkYf#@>kxOn8RyJ9;W3Hz>ThY2_r_9z<W%D#1pUO!Q3l)j=y{Im8?2^ap#1<vp
z_CVoG>z-R;Ql;v7*_pKhr4!kA*mkEz>TF-WQ1sKuMRj7@n=LLzW*1xwyP2(SnXPV}
zotRYSCANC*(crm9-DVf|Zr*k+GJDhYS^NtV7oTraestiXQo)tqX%|1;_*;_9`|gl!
z#B-*NqTZHy4gIgLTF7p-Xb!X&R)46oZca1DIn}L#dvCZ{m>oV+xr6a}u$%hQn1<>?
z#Z51^G1@d=xYPY~2ha3iIY%d<V6!Qk%~o{TPUg|tn9h{1`MgH5Xv&IXm44r(I3u#<
z19PgGzLq_+X_%#RYv-kdJf|LQnK`XBp)DsjdgWiG6<bP7^**v}xwv&v&rjQ4?Tl8>
zQ^)gX{SNc{URKR3aKm}w*T@`;rlrLeb0c@xiKU-;bVf3&G%S&iH}GlMvKJj|gGya?
zXzlGXvUELfbo1#GUa$Ky=hB$w3Uki&=8eqZ$$ccinOnB<bIz+jHm{hb@0#E&Hdo}L
ztKi+gYZvOK`h62#^<swp7Q0!l=lz1Vu8qv8<q)5m@LVzE+wFHbw>9T&?FinozGYfk
zk><u}t!!d-oXl4b3w+(Y<FQWu<m9%ed#A_6_~swI&++$l`l2^S?LNIvE%o@o`b|29
z$LmAZo9(ktFiAQ7bbqkuqgRsR!)Vj?dxlIkh4tKT_qOcc=gh%(Vta38wA12M0q)1F
zj&0QwZ<)#xyvvdGYFJ=m$IX!A`OFW+m=Bv>SzYA1_tZK2m#SMUEZ1#{EkCnsmi@lB
zcm8c^jOR=}uyXCcLw-{~wpc&dwd%sm>^q-j-~BYQjIa2ib^hU|I}fu9=XQHea`T?f
zxz*|xyV<W>QLkoxYyK{ta^LjGr>JQkN_R3%DVm|bTOiK$i)eM3t#VM(`MBompK>Oo
z-S-Hnv1CyB_s9H^)rOn8M;(^9Ccd{=dZGQbt*Y3GOr8h4Wouk7`L0{Cc1p-Ak1Soa
zmu~(G`lhLDHZKZGI=$!S)JpD^M!N$yMBEfL%lUHkci)0N_slO>v81jwUj5F+wdLpD
zT(`&z+h=WZ*xfWG!Z%WJ&rG>Hhq^zSshxRyXaC{m<m7YHj?K~SU8ieQE$q+Tr>&WI
z!|nAonJVXZU#}#XKU!$CU9kV>n^z_4mov${`=)Sf!sWXUUWH{W{&4@FJ!-?aMQX|F
zO<W8NP7(|ZTG$)Lkd5Qe<}qlj0J;<>g1tiIxadE2zgw5HLM=F#9G=3=ePzPAhTe?_
z1R{kLCI~D@Ys}p<N9^T?mtl9_20ogWwERZrWsi-MEo}WBiOjK3zuD#cMnv7Oyz2eg
zmwT;$rk9sm-PLu9-^YLU?VifNdn@1b`rCc}u$bXmcdypAhkByYAB=YHiP2jh@WD%z
zRVbz*T+`z5@zsATdhhHFJNmj{yZrYbAFEjN0|M1Hp8DWcS+Ty${NY)ydy5ubV%#5k
zly8skr<&a{_Y|j>uiC-?KIlUp>-&$7>o}{{cRj6`-}HTTL@WP_h_=mKash$LJMXmf
zYyFrY&%bKL(e}_KhKl>uo_?ATBlj)&RDb`9503?R)`uLl6qP@ktR;8+cu0-R{D6<1
zHVYpFbJ?v;GM*RmQL^S*|9Q_@=YM}>WsRRQKYmiX|H;Gp-fsRsKZ>4wvAV9t_u%!V
zy5%RIO4r@dtKa|f`1X(*>HC2nV(s^-ckW;Ri2cXHn7Sp8j{oTFn*S)Bt8U#Q{`~?g
zZdESWx_9;Cg8Ka9jfZzv?Xu?k{^s@TH#fBfURW)gf8ns9t)0-jtwI-noDgfAWM({X
z%a<Ld3yuf1IcW6n*}HiC*3GLI+a6wi$o1y^`!_F6Z`7O3dRAHMj#RJv+FT~?*RR-F
zYI2_$ES*~F`uNzrD>np{FD{qwlnnATUUZ!8i%ZD6*ahdMdevC3mCfv&_`$<TzEASu
zWX{@}J030Nw{AP8UtjC~*yNP?vG_*S()Lfj+m^oW6#TUMgN0Z??77`vTO3w43z{Cb
zUS!z3eCcD($?aiRJiIy@CK>pyw)MJmsL*zz$&Zaz(dRvMr1Ff9AN%rSgGt%^US~H~
zLF2ln9+iWsJ6ZNFpFGWQo4&GEx8rRS$x4l9J493e*tpy(YTNvxBW~WAL~n7+Z%fx?
z=-4n%FSFY7Yvmig;+`_GV`nA`u6*Mf7x-cmzi0R}Co4b6iBnRA_8;S}FEE={Qhj`7
zhheT?tDVi9*UXJmOE)hztX5}UZIvX`8~$X$_2<WmCa(6~*7IqXjfrF2Y1xE38xH+t
zx2pC(uKPE{E%DKhWS{7tb3G-+Lw8*gHBL~OWajpCOV6Cu6^9%P-Z6)IN_ZMi?T}dO
z`AFi|j02B)^5%a0IOS=B=PFrG*W|<OJspCw+l?~Je&^_#Ux~Hw{dDHOo&KVoMNV(G
zo-KHHdeR>5+d8aSJJ_ygNgULb$~)#O)%Q>>Y%zDN3h(|SmrFv|SxcJ!n`*tpS2M}1
z*GSXk57+*%57NB*kG}6Hj#==aLM!V+y7d2k?G+Ed|7dQrf8@{gZ_PvfAKH_%o;kg}
zG<oseUpw0NKYTCsudzPM;}IY4@{c*ZV(&se==1(RR?jl6YRCHEYfmG3#Lqq6aMwn%
zf95ZP?1ti^pu?XFXMcHQy!oz~??(UaZqbuHlY2x>6+iwv-KyuXmi<%cnoQ4bY03F<
z-rIXcopcgbr7mYbTdFZT{nVnA-2sAjz9Jn?z8XC%J_MTvw%m?zTbH;)*p%t$O%*56
zqm%rSch2zd4R`5O^KzZ3bjoz22a8DH!cMP^5rW<(fvT%sZn1o){nvJ~K6_B1wpCgR
z-%=?@SDmXzT!Z582hLjAnVPioz&0~mIrFs_HaM2|7z#Q~oH5U*@ABq_i_?^ztddT8
z&LQhNBY7dm>^p+CnJrn@RkCNNFU>qZG1=|Nr6XJ$gS>b$lctJ@^V~MysrfMGX{240
zu3lQk_gRY4s!tZREL+VI>T$GPbLpkkOR^Wat+{w4Do}evr_G%x;jY_9)uzapD_vY1
zm~0`^JHNetv(hV*!2ayWdA6sxm9M8Q^Dw{deA%w>si}9ew8+I3ou!L9uLn-ZjS!yx
zz|3{c^zV<wtRC(;>JpSc;jN;WvH!Q@`5m97zWA8;&f4|sr^!91%U8b_o&T7?CuV!O
zw4`s-mB&W<a#ts>?VNaJhTwL+B~fkb=WzD9xd(B^tud`kHNP)$^Ap#bmC5_ARrIk&
zrj%Vw)|N6{^JZOy?cI%^*jbOxO=ej)%YE^QM~6=BSZbH9-@anXRjt)=3-rXNA6vKY
zgN9>jr8s-B_3q%cWz~KH+g`3c($9ao^78K4PyKZ^zdif1>gUD}a<1Y}`7~Q%&DE3F
zhZ)}Gv+++4t&46*x)F9kv3<{`W5F$#du`6|+10B1{L$y#)~8u{<#dX7zt>X>@Zob6
zJ#&WJzwKvbYWkLW4{}S47HVvsu6t#}w^v0eSA>pL_)K5yrF63SZ0TpN#n%nOt?h$m
z#hWqbPJVl2<HT!`IRb0A-!A(+E7G=>eR5B%Qr29)=Z^y3bnP^hu|KhWy78osH>Vvh
zeKzIKs`KT?xZM3>XRb6iHn-N@o_+euk_Yc1SLjS$Yr!AvcjD^JbFZX*XQ<DuwlJ9O
zxm<YLy%Q@E_j~j_*0go2*FC3JF7<lauV$A7MTyIj$F*&m3Qsg2)1J_6oAK5<MSX(l
ztCrUe_g2KccP*dy@$U1UqN}Czo(DyJ^l`L1+y3Ea<@vCEhnnS;1v_VMN;Nq%ckyQC
ziMKz0%Cc@gJkR{@f1A6<X5@Sg`RlaDShncxAA_F?ix1!Vf9Zo%LHW6=yJa(<@vba9
zw(woTUxu(m=53506gwDsid^>1+Rdu*s7@n$eNnwZcKo5g3)j?YYPQr|E7;pN!Qjab
zUJ(-?2b+Y&GS8win8P1bt((=%AnX1kx`m^y(>1BE?NJwJb&U}BE-Cem8)kRszZc<Z
zQwzB9i2K*20Ji3RP3{lmqa-w5$$nzJa?hme@`bbmszNbZ2b+U6lv69c9XB{!(&PFi
zb%|q(h~J_`uAEveVtxxJX>?zXnB*2ZLH(-kzXkg<g&cQA-U`~H7h%%b9d%{FErZrn
z=Yxwhb)HK_7<Nt-=I~9t8gRkB@v88S3NF(pTh)5E8c%!g*6>=&s!BI^?b5@q0$bU1
z+?Dl?TD@^H{&qPxJlS^dW<j=v%QtXtPcqJP=Z<SyYtgLzQ6eh$eXq5g(!D_In*L9g
zmR|j04;5X0sycsCke{|V!o%|WW40q5Tjh@%wpr}0v)!9F$y?Df#yf+FD|Kt3hHUHn
zOJ*N0PE*=$eC0KZpOMn?980FVf1a&A$6L~}N9@D)Y5V3ly$x_)kjM0O@`C2oF#*Y9
z8OIcthVW##vs%5FB)cupTPAGBM6Uz?tkR^Ho)X`r{lUn5bJxjr7k`$UsqeSi{Au-#
zZ`>yyJ>%&P6JLKS%%ZpSPd1kc`=QJjqsFg0-|X6aBQ*8K$FJ`;?Vi4Fa@G5F2ii=`
zmKA17O<%e(^L(ww%UOq>Ej-2<Q`lz|#T0$%Miu`mRfB@1?*((Dqi$TV?tCj;am_M(
z<~7ck(reQ~{qD!S%i7dr)DU-wjpMDe>aUqr%|44(SJv~V?Fm0Q;cnOS3H#=FevYx}
zFW6tByL*@2Spl&R<qf;oZe7Z_fB6H8S5B>!jB1%dX?dAx(B#<O#&afR<?Cl3yP<4&
z?d{XR@XI&FD%*E8?JKT+vw82H7q-t&)T(Z~aPZQ}+;93D=AOKN_Wj*&>yyv3{SyA8
zwdk3u4r8d#lBqsNrIr;d*Gey&zCPi~c7xn<<HYX<kxP4dr(E#x&R_m^lh@WqtQCq6
zwK;1ACr1cuo*`GUQFgJ)yMnj((wFP~y&<6Cm3;s1ijqlFXaCKVnP4~H<-O07xo@^;
zD7G&3`9G5<A~9b2<lRGa^}e6g`+iCD%fw7hRi6d+=KXnR9tFKB65Nv|^m-}!4B5?d
zf)xwc7#OzjG2lEL3bOPZcIw*P@a%HwtD<+WOGS3sc*-y`2DY;pC}`jC;^9>Ba}u_0
za5Pl(dvwe<`q(9fe&cz8?#k*5`4{Xtq<n#C>5OuDfedfys5KT#c>BxNuDe?I#=VZU
zenF~yZ_4RSkJP?K*?qrf{CwW#_m;oyzMTJGQzUo5?N6=buEK~T<t{bhN4~q*C>-io
z$et{ilH8#r-YDtJlWbHGQ1w&e*wU>o#)^+RIHHfF@GLG^RFE1l;o$ehACByjXzrhs
zvFuiX$nhf&cJRn&9p?XV*z6C7z1YVBk^YWf7bWVNjwe;bJnDJWsWGqTk><;J0w1@W
zsaJE`a^CGj42!yQMcl&LQyZ*v&9iST4twZ#+DtSoMXg6uId^BK<af<2oxbO^elbs5
zUJ#l4Br;j`e$ia!dA8;!4gY>=tZkmN#p}$bb)OeZ)vs*3p;?j|qd)&#wc9zf%kH*|
z0}S?l@wo8q`s|{*MTTcS?6|l)^K(IFkfI#RW?uFAa?2VgKW_=EzoS)_7w{rOEcJrs
zueRLoyJn|1{*mSCSD$VaGovMK@vSKRs=)5Ra@mjzzwXLx-e;6-`79u_=**XI(#b*X
z6>g`BE_~3D44*n{O%Nm3uLX|{nsP<D)l9GJY@BhfH)gwAx=u*-rqvtw8P;zy2>%=4
z*L5eUR4YbK!svHd)dlmNT-Sv^dmgq<s%rdMbljx)DC74P$-CYB9D9Xkz85okHYIbi
zzRR_^=W2(WxWvwv*0x^d4zJh~5dCylo91mXr+bPTW%8$Iv7XE}Y!c_#wr!nP-FxZT
z2AhR4Z*G`c&a?l?#n<PKvXvH|xSDhFv(It=@5aGQOCoP1Zhw+`HsNEH>3-X!V?yuB
zHf@VeN&G(J-;UG850+ciwTYMJ91N3{-Vo@%z9YI|b)WBzHIL2itXs^le=)pPLsma|
z`=P15EjL3qy_xg!<qylaiyyi7oT~b|!@b1k^bY>@FDqp8UsMDiiC$DK{a&M`nyc;k
z>mN&3)VIbPN3BgN+2v>RSFNeMbWeM*)xG2Ff`7t}zyDAzEB}b!>R-oY!I~wH{eKj*
z$iJQ;XrKD#d7?qWt!4Ut9@1)8zV=<dD|yDPAndrhh4{hvl0Tx)XYOP^=Nz_BVVlYU
ztNTkIF8|?k{P_>3<M)p|ZP3}3tSL9yx7RczVsnZhXHnfLnM{#&`wC>5&Q>jZIGg#&
zyhGELvAbT@-ItNyI%(7K>Cb)Z*DG#ddgNjuXQX!4?bbQ@v`4oe*1hsGvosZ3xoXdY
zBP-ssX|=wdeAaL6vB%5LYVlq13U55kdnZejd5(5x@$-m_TfO(~y#C?%ObO-LrrkE_
z<{y@vjrqiRD@F42&aAVXGtc}~kXH5h{N{R-$=)ENJiGXl$C}=RZvPW>(>vyZWN`9s
zpI4%1UlkU}6|Hx;SN3uOd$#nZ_e|Ca|2le~J=>K%nK%4mQxDfV4Nvz)Z_6u{%|AG5
z#WvTT{@-@jpz`CweMt-3Hm-fO^x$N>n^*rBb?RR$s_v0Ks`yzVHp6e}%7c4C6UySb
zeAazj)_rFW<6ZVw^JGf5KmBvqZN2<?=i4e5-tFhe_<CmN4Bja24UtS={@%}@Ru#TI
zwn*wJ4|m@Y#m_5NZ0O@ZDQD+=DI+(3`flEY+tIm|heM;{il$Ec9<%hR>xt(j9sg@@
z9J_h@=Iw4VmmBwHMA<V>-Brn^yYii6OAu!+Ta=dIJV%SE8FL-8U-2l(U);oMtr2Z0
z-&4cBH16g7j{HUPpDZ5vI%!wse-vD%dL{a~fbEoD61u56*6lYI8fS33RY{(HtgUe>
zqOz@DG|M@4&8%ayzNQ{ozHt_p>Zu2ZQ#bGUQ(|EJPfF>QX2!y|vX^H>T>39s{I7l2
z=?GgTyY#};s|IzV_Q6Z1PFxTw8P0xxzv8ZUn_qA&v@Y0|wxlgh>^jfK%TcXsqnxa{
z<5pe1p|>|%)Xupidxzq&(w>PN#f*$ovL?7xh+XfW^XvWc(ETPabRM&H9{YbSGAwoB
z?Ac5!bQ>2}7)i8A{SA{9NDGhsx8s2HjQwk)RJWLQixlsvQ&~87-3F&sR$W)8?Dd%B
z9DQOD&(7i*d)ZIS5l%Vl`RSQ+*f*QC{~PE2FfV(k{bjWUTL}Ao{_Mi(yJVZQn7*%g
zd}77q8;j!(PN<q|BfRX*<Mj)K-`w?F)@D&``z}epNB`u^-G&^#JA$4D2Khdes|&SQ
zQ}!?}DKl|TeIkFGz@q1&JNn~ItN0`&YIe`B`yRMB(D=^2$#vh^W1hqXiq2~bPQ6o8
z#i`W3n&H`<0J#-6<d1kP{VT>7y07=qZ6Ago#S^WirtV*lwr4p@A#1`GMh1p3W(Ece
z1_lPu!KTndOUofso%$$ymQ{T6lM{1XGK))!GLuS6;0NqX4T|=bb`<${F758slGTM<
zx7~`~QrVIH`qrapZ$}{(#VxI3@--ShlN9}S-Q=C@+x_3`V8Dlk_6OucZH(J4tq`cW
zF|+u$Uh$37`^D=S_8eVpc_v{k2fx4XmoqZwYxjH!RQr)@dZIsMzQO$$UGtT~BKNEp
z`dN2pmA3DPPsye$-UygU&3t!6R`*tCPkrXa_ujJgf<HgrOHimj>ymY>Qa)tWr~a0`
zufBZrym&I_^MnW2Hf>YUF$vMK=dJc#xu26w=#bkrp_2<QoO<$jQFf1zSnm3B-?nD)
z6;EB-@<73N={nV>rIjh=tG1rf6{?HAG4bbr)!p-gN<Tk)*JX1&+Eh|Ke)Gb-ldo0g
z2;4N=!Qg7cA&_8uKqig($BeHV9V^P~``;Xk@^rb?X>Y!J(^O%u(9NelnV7BlJpbWG
z`(TF*n*tFjXNQ$3I=A@ph4_EYdH%*)W=D;_d$e-E7iqsQRV_|m(pbLtyyg?rX`dN*
z=1S@nejP=}PVLmRWF4`e&AmO-l9d<dZIMe%T_gJ0x8}UdbB#Iwl)vW$Mjv`mmEhx1
zQKNoiFQ?s3<~I*IZNqQs{&;(2uJ|9JT}%Pqu)}vj$6!uM_L7$5U|{g(XJAmoo)TQ3
z2h2i-_FBU`>qXKf>(VywHn!%m6cdWjn6QjD=baMQq%BueCY0SaIjF-K8&J5(`lim^
zGdbyLoFTb;SAEUfDi^vWPBglI+O`~x3y(rWY@*hNMgI~1BYx%i?7MFS!_Mgc?EC)v
z-s9@|&#RxGo?1V@p7lW4pWxKw)(y;-7w_mxMW;U2C<uL0=yPll7whrRIR}`v>{|G{
zpD8DuZ`P7K<epmNbL@Di$zx%ySsy$_`5y--{k+U27x+=KCd_1h@W;v;$D`sORWI5|
zEqZ%gq$cuc_=g=m>$QH&>N@+Qis9aQm)%YID=L`ohkVqX6S5)LXP<7*9HB|UT=POc
z$g<uK{b0*_Kl}qTtN+O#3%f4=nAo#C)kgX8aUliA?wQLH`_oE~8_ICXeiZJ@)_Jpi
z?VQBjl7}C@_|f@rk9GcxJhhd(r|B*%{D0+%Z1C?#AC(uE`}yC|b?owtu+5s;w|e%w
zo_S}bd6m34SGz4ce(amCZ0B*lw~I5PH@75Q^IFaub<-m6=F7${YnN|2c3{!k=PwEq
z?nKK>x^Sd2%;HSMvzC1c9vuDhSNmTwN8a_}KN4%5ylkg#%+}mv8)VB$3bT)$*-@?1
z<Ffwo<EY~6Uc1s%ez>m{xoh5=?vt#v-yr)2`<I(*#3xL>ve|goT*1$iF1$aNKI6FW
zSDE))#m=r-x?JKrujlDGr<Z)*w$bR|Jx-}bhc-{r()73?q|25r_3przkToJ%$D|}q
zr||0gMEWnyIlS=wv0H0h6jLqoj_KUYTXS{lf_K*QOp1~&PW&j$dG>T(^0Mq3%jTup
zR9f#o^ufYZ`g30*|MFg;7ia$D)aCg#ytcMEG~vvjLsJwF<)6A47Ug1|^|UoG=$T&B
z)=%qNwiVe-t~8%sKC{X>enF{y@sWz_*DF*{`zUKRJ>`pUd%ZUG?~KT`a%vVQYr@KV
zUq01PUDI23b@4>6hErbxkG4r3_TgN&UdzwRTX*VGuG>1xJuXam8p7TF_K8jJ(Y1T#
znz*d0be|@9LH|w0jqXC;?T>Ax+6)&ZhM1Hm&*_|7emADxFTj~a|M7CEy0&!HH#_;(
z=KcF$9=@Z-@0cX-`Nyo1`dJp;O>L@kE*tc6O6q4?^m9LXUgYO)<Io#oYIbDFET^m9
zhYq@4+@teYly~}vVqVP;FIHK&&u?$)iO=|<TDSD!^@`AAsl59S=BNJgJ1qJq?y$Uz
zf&2manZJ7;bj>}#)6MJHe3LzG*HzbO9=45CTi0@;NbHt)>GHi^8{=J^O;^Srn{WDu
zHUGqq)ouG9`tsHve=YS-QRv;lJN0W=c7@HFdLT#3Ph3mrX3;NS{*tMSr8d4hxNqZS
z$rmNJ7SBC5;rs1S(<i*E0}6`1X}r{VtjnAKSW9$0vq9p@%~Gp%@^{HuZ_{6X?sN3i
zU8^#>c#^CxwM9R=$jKSKxKt~7Z}yeI?f1fzK5&Ib|Fl~FW6g@bhwn_EeW{ryUYZ<w
zaP`gSH!rOEdAwTn(AC^`n&)Ou&#zp1f>r#TcWTaY?X~UGc&B(RX=%vbxGh98EGx$Q
z%dIHs&TH!<r(W>-wdbVIqDajsA+t3Ffx#DUy;{FvhM{@zvA>mKpSbjHzj_e#U;5e!
zme5dl&eyAqLZZ#CUlH6ab~{d%--W-w<Y<;gal*bGTdZboXJtD5`IcAoluLhJ^$4$y
zIrr+--uV}=aW3}q`EO~sILy|KE8*Ys=ks_3f+y?cuMCr$d;RSWk2Nz*vOH|xycLr#
zb=$m6?ha4-#dABJ%s#W_NRm<m+rB&v|1{m>EA~dlP3TEFw@JK2F0p*BvW#4Oq{OSc
z#&=G9e_Xcj?!8V1vjlTCOSwn8FSJKn?q8d=k3I2Y?L!M4JHt5pS6><5-R!Q-Ic`zB
z^ZNO^+V4V3-p$+mq4fJaSLgEkkv}xK=6qap_h1=wQ;vDz_q3J`ti2h>rQEjg$?lk}
zeb?2x<DuUBwY%<p)~{3+Y&<XBDIsO??D#s7gA&h^uHP#7AoJvO#vTS$MP0=bzO19w
z%H@o6Yl`xhUSIJ%Q2W(X8?PPg!gq9Y{ED~VxPK-3z$&IK&3A40dPnTN6vKb^YtfzO
zGqd$qP2s=L;`!!#jLE~jGZv-INw!yIx0n}KruMg6GxqG%)t6dKBwlDPC^V>G^zG!%
z@Vk(GTIi7Q#017{HdnXne5VBKcbF=PUl98!z9G-x!lt!97a6T+DO)+w!9_EnHYxqY
zcNMksia`h3)Gz$9F>A}$NVv?J<JYoXXW`$V*|(&sBJMBUwlcf$vD1r>!5MGs1owUL
zWxZ2#L0D?{m3<wTE=XxD`23}Q-nX#rVjFaYcJQbQX(-$(m%LlcS~B;TZ*$b&(#SRL
zTfEaNt}yt0YGwEr`{h@XlDvZSk$N9HBjbkhMA5P&<+6#w@726^?=(7bQDA3*^u+sz
zTkiF-R4$w>{Cy%X-_9QjH-B&h|I|pd@JzFD&9iYfnXk&AAMn^Fb$w7->!#}()y<o>
zbG9+)y7#aCAdyxu;ZM(`^WQJ^`{=)4KHr(8V$N=U<1T@x?H}w;g))XoK3knSWBs&*
zZx40tockv5$yRX6r>v=)_ty7}(#d_ZSZW?jiO}&_u;KblkN*`rW{2!kzjL@u>CWl0
z3-a%TV-@b`?@Fj}C@K0eE$wM)j=xF3YQ=dUz8?{8^iY_`;nwuAMP}k-&ZXw_GWfo2
zRLp(!{6pXKQ!C$pkU4kZ<6a*}CS99*RdS`={5z)}fAd(;rE6JN<5WJG2d`v;5Bl4d
zUcPrUCz0vd-fqox`EAPfYkMAO{Ab!?>axOZ#yvZ+d26P}$&2l=@Nv@h>h8bro?({g
zuUn1>uKq`BR~(SEI~Ksez~BcxxE|8F)P=S#b-~9o79^Hr=oP@vat}Ut)?f3iXQ+p#
z@98tAbxxi+|2XK|+4I2#p1xrQMj-}9n~Y308yN-`l<@`{7#Wy6)zUb1-pf}{bDOWe
zm(NMhtG?$>KGD)Tt9|l}@7AYGNT&&KZJO$?a&ubhIR*v>8Ab*M2?hp+q|(fs6y2iK
z<kZZ95+qlc_V3L*Y{1h}zRye0aZ0^$#>ub+CoV0@cJ$t|_`oC{PqSY?w$5(3UHjzY
ziNw;)d)L)HSmMmXbC_}8oaZqo^rJj;0yIN9de_{45|^;(`?@{RYj>4xIJW7nmZh0!
zV7@^6F6;Tfr)4ZzmL6idwV-<^%cnKE`5Mby%F+Zcv*^{cMSap0v%NG&^4*q;=F8tC
zUC^*ntY6YJV}1M2!>wP-#n;!VZV>u(W!2>)jgyTxEU8Ej{-Vq{sj`kO``MRz2g5D=
z;w^7CK5?44RY)WL!Apw^NQY^H!T@y10_gB85Y3O;*h|jIM7ewbq<q4OFVk$87#M<C
z85j&Pl{+SZR){B;IOpf)Cg!F1Wagzh=jWBBBG!se4T;Scaul&;=I++s){(pR{ssQ0
zi%V|X&2;kMxR~LzO7PN=<EfI@a}%cXI4`b0bbpb)$;Bh`5BiPnMNSBqw&hFW^Lv)n
z?{^kIyLZ>VzWyIuL5DIk=bINdOa%`1G0L8w`rWD0Qnpf-Ro1nw;^^0y$kdK!pO*7P
z-F;uscD3#G$H|H7&$^z@D(CbxuetqdUeuw`-<&H>o`2V{@XCkQWUE83N_J+J$8NqR
z=aG1G)4T6g)2;`ml-{>~?o<?*c4NnxuUr4mfB!<wWop>!7*(^oa>_i%cOIKrnm04{
zbn%((B8wtU8{gRdO>%pxrF@%5x2nvgP1`z})_Gpa*1Kw}bE5aVuHl2G#O$~OXAa(H
zHx$|XV#-v>HJYujEgQFKWTqIz)o!ee-?h*D!p7qTt6y5pQ*Ccr_a-6JY|$z95_zq)
zk2iE>N$hg+S;6ggS)Ob5{<2W{&Ea2j<F9%iI*{9Tru6z$7tY_FGb}tF$?-{cna>bt
zyRt{E?X9Xy>*+HtS6htV6#DzkcH&(q_~(h0qjOpD_45tCE+rj2W3VU2V$09UNeY{d
zon6cSsfB3CY`yX&JMx#dii6XO`+|zzXBV1!?v{#lGYi|n@Y~T(q&%aSJwZx)i`sF<
z2-mJRcfCDk=`23CHgMje-)>>byMJ;|>=*BtC*q}?(Hd0Pa^oJy$M=`Hv{M`_IoDjS
zQ(h4w@!U^KGk@;A4F`7n`?o9#u4rhVvijUaP75>j=Mu;5XC1R&_Cx(ic8kr?%??#<
z^EOUD8kVpf`H(G8QZ$NB?O(>g!0>_rV~s8-<sv1)fW)HW)FRlhY-^7r=rRn~{~24O
zxXYgI>d1CJA0WcwdQkXC*$UlECeN=0dNvL1LcuY+3hX}H%ctFY{kMYYgg`rovrEg1
zoV;xha@Wq-aKiVtSM0m&v+9|$w@a1ZPcpdw!bvjx^{WGSuTD`+YgxK+>WVcwqTNnc
zA3d*}D(KKr9<}-G)RI0~#g`MEJYzc_FMYU3dC7kd&sUbrHR&%{?=$t(ut`+U|JD14
z_p9#A8nlus$ca;S1rr0qcQytF8_aBn=~v&(;^NG_bpHZS$K0_fJ-8&XBo)$In>#hu
z|8j!J@&D(|cCCG#vOr;<k@%Fo3dtP}4QfsLhJGdo6Pm*L*UU7%8+=LZzW$z%$E5!%
z#Ca@I{&<N0LA%ks<+{bv-p8h?y|wf||8ws3E#PzT7PN?p8a@d<64)`>MD^-wEuk6H
z(p@#rx)@3yHDx<`tCZK<#qhYnmm8Dk7u>%1mUsGYtLASG9Czc4cRqf>cR1$erqxGR
zoz-3zcgL{z(X7zbg@KyVPT&8k^*Kz_Jk_Ph^Ihz)(>8gByvd52(n2-nJ7(_K7k>NN
z+vPR4mrQ=RY3t#$zIoC?{CvvqxSKD@R;F0=DXy~nv@Mo*Rn(h#uI~O?s-chbx0hbZ
zxnA!cxasNiKcf1xVnVv5*ZJn1-l^MWuzj`Ber0{hNz*bMb4ndb8opis+oF0+w@^as
zqS5`Ueml$gFH0$;cB)O@y~t^6dhzVQm~@82pZB%hySw~}uG@>VJ$t4~sP#QQv>-2{
z`deS!`3c=dRY%gIzBwOO2>N8U-tj0WFSo?y4IR=_=N|59Q|)ngT=Vnaw~oYB)5VO>
ze7on+=JUPGz(+3GqHb|=Tk#I(RpuXJgQq8^ZL-zxd9^iZ{W9+9ig(XSX8vk8su-Rb
z;n%Ffs%WWbC8_k*E>p2e?b92Eg*6Y=5B*-!c0wjaPVIcTM8f-poA@m}(--fv^qa`y
zKPkfPdS(yng{2Iuo^Y4C>@%(4dJ<Hh^GJiYugTcrbzz1#(>doZe@TUz{~HTrnOj#{
zO?jWY!(UGCt+ZLeNd|S*sY?V-iW|r^<g;-eQ=L)Nv(4Gw{Ha8~!{c6=%R59Ch}qaW
z1ooZ(x7~m0yEoiPZ&tTou5p&1&g7e@)Nn`f^oIKzm^b|0mJ(+kdgIQ9({YcNmatBa
zbB#Q2>~6{|>5^fgnj!d8SUZvBlX%!6*GtA<T(rI+t;h%E_C0HSjX;M!H?uM@*b<Wi
zAW1*Cq$o2l-8nzEAh9SBzD$2^$a#NZN0EPNyHXQ#7k*ltu(c^_VL*zef`?bfl1|Qz
zg2FtV_m;g~#uHg~^4+u_>kpiq&0hVHsZ;IP=K3ig&z7xq%FePVxi|a!zRLHv^Y48*
zeqR4RgUH7-2Lzt+N@QK!xFGprL!0W>8CKImXXOMQ@9GO}Yt(&tB4M`TwE6Z47f$8f
zO_EsYalGoP&AO<2(t%qwjiL&(57`OK3;C_Fg0=lpy2tyuESE~ZG5cHfZmO<PvJu=N
z=bUuCP-lG(_x<f0+anA2w%z>Ta(COAS-QT5<hYlwo3&SLUVi1ydsDr5s&wxJUfx;r
z)MF0UWih|QIm+I5C;p7JdfaxktE*Og!Q?na!9UK|xjByC`+ILqO!k&=+f7-HE3Fqv
zef)kpyxPTbs^Ze?5(3j+|NeP&(yo>}{~09PzNH=(bPVp?^i_Y=S%+KC<W;xtYQ3>6
z=-9s7U$U-ougo*Odo|Bf)=Qx=g~@;8y5#1EmGOP452JVY<y-FKd9w1Vt=#2WfmM$e
zv#3o-^N4+Zc0=Le%w0iibj1!==x>#f`?jBRmc;EMse_3>+A^fle$72-Wb4%I_DIY&
zk~fCadw1fknJJl30`WUo{`>G9=UB_Ebt2ef;`|Ke0_n#A#jc-R+U*tH3KNzWG|u?O
zxa*r>R8IBB_jh+qvu!q9I9K^x^v&X@^Ay)@-RD$&T2D}4plZR@%$oWCoDaAgo;g0@
z>Ejtsg-?9;6!|F<R;i}4<yxMUV9-64Wloc<cR0(b7p*?=K=0ydrS|G4nJZ<L=Jj^$
zU*gk~Q?&4d>@TIidImRM{rqa2o3K1`l7?UV*-2AohZ&ff20rxNWHV{<*J;Lg*-E&M
ztxV|qWN5?paq|rBT)E^FV-a~L#>G!;6}qn|+)ZHgpYW3vF#rcj>o1gfKe;n9FnBXz
zZDb*p_KrpArManjC9W08so<g>Qp8W4c-D*AP^8uWm+|7Q+oE!{Umbe4C6xX8)TWLm
zgB=@|{w|SPHgTqlk@2hSomx9SFxF@odU}iMHbvffGVjjsyTYITef-7tfak0b*A1h@
z+pc$An-5q%Z(45K^lM?gzx+z^K*6fNA%%iv{cS9*-j-=Bq4T-RW*=2v##SPdx%AoT
z1~=zbJ5D}NQk?st<-w8ZT3?)8j|+Zo>ZqP>)VESX`a~<&<{6z|URfsaYffL}{ot66
zXGX@#{V@VtrdRx4#-rwCH}mJU0>cH7i#)$B6=hkv?@Y*nbvr-a{&o7sO2#RHEy+%;
z3qJRU3%;p4yZ^MyspM@tT(~R$a;H6YIKv_vJNIkJyvw^Uzw>d7d-J!ePj$6f=~KS2
zuPY5#U-~s`sf9*ztTE4Oqx8EkBrEssetYht$j<p2Z0~N#Ph{V`iS^9f!)I1@@n#oZ
zF1v1rG(ZlD6?^fF|Hs)G7}|x2EP=f9^2_re#So+gf>a6LjmWMLc`91BKV@>H8gpAK
zLu1(TSrbx<`5C(}Y*Adaq|HIvr)SHp#Bv`tbK~1HZ?e2O>J|O=Zrla4u<DH*x2hVw
zgq-$ViQBN~>$OGx|G!oKvZ?)bfBstA^t7_gv+r)`wEz8l_UF0p@9Zo7eeT`S|MTh@
z9jxz7-N;(LZh@-eV&P;VA4$cC9?wsoD&unEWa46$`Fl$6Cp#Syl)V061rKYv;g?o}
zeG!fN4^R7)Z1;Jz|IqDfm736FRW-If)u}m$YmN3b=6^c5J*7tae&V6`Z69rq-HC5}
z|CoKxpMXRCKYm!q+bcYlkel)$S+aD)p&y*}>XnrmJ@Y?(Pn7I`knZ#QGuI!c@7_OD
zS67A{TCXxkAYjds?1*O#+0xHCS|+#LoZh-HcYWroGJmIHnY)4+Z#Tc0cug?6Tm8$m
zh(j4~Hy%4?Vtg-s#;>}wWhJ}SX4khn>|F2o;MM63zieD$1!ud9C43AyvRR^Y>6t>)
z@aY9zJ8Fg5Bor?M-MOYyWmX#Fz2wKVskQA}3)h-HmFm1^cewCn&d~<Lq^!JCY6nfZ
z`^ta)Zb|Fg!uI`LQsKK553g*A{r7AitIyuc2E5z7A2`((eoA#)xZ`=ZbyelL90?JQ
zuMvfxu8I0wnlEL&<Tm>jXU;<|zt+r{CUGUF`8%W0k$XZB<&!ggXY32Lym%+>PQ#%!
zPoi%c?S4D)%8a~SOuJuwK74h<oe<7!$;yc}g-ok&e4q99+iUljAl(aL`*UPhyOosp
zKRXj07j?g2b-CmtAGMNiUNfg}Vp4VMJ#cHyo;RhX(Z#yw)^MGcz52G+QBuoOf6L3<
zxJ^%8?M?`A_H#Qtxc{I3W#pV)cfB7@QqJA8<@2S3r^Vu|DsP^uP2fz@NMH^-)%Vg|
zV$QSIpE9B>GPu4i2tUZXW8y+}mTPC)H{IU0b@83_A1|(Rl`V8X7?u-WAX}z!qqAT0
zh12#XM;=ZSy}V$n$z}4LFCBkuXE}bB(<=OL*xGAXtxB`*T#~)ICFbhYcUMBshFq4s
z@AS9O?(uh7t2M_@Y`Xuz)9k$_tN&xk#<_Qwt?F03yI9h!)T#K&lH6YDLfdus5^rYZ
z2wv3{^3%3xzOS`|)m*zm%$e(4lE<Mb2evKJyPs{rmpXO6Wy#Y8^&z51HlMw$vi1Hi
zg*L4j2Q4`_bDc}}J6K%dcIY!#+<|Vcy5k35+?wsRgY9aT+pfiaFShMV`KkZ#yU@P{
z5BlFU+dp*bd;j-S;0L4Kf6rIA9(3KKwV_V&=(;HrJ6@Z8y}`VA*Qy7me_E$sXEj@L
zRkBa&`s8hL%JYw0O;f63Uhh#6d@x$)Pwt=4L-$#(Bu!oQK<J*}j&IE0vKsqTxAn;^
zyUJQ{_Vg=9Cg0PU3SX9q>(@5!dbHOl!{o}XOx@)<5|>w8&e#9oCi^BjqV!9SXhqd3
zuD$s>HVbDfzgw|<_4K&ucO=^`MA__XuwL61H<!cK?QUm_>BP$Z7aOPXq??QXOcg%7
zfBAufsus%sKgO+M*4`4~Q$A<bw{vD$H*LMIO_gNeswjH6^i!Oy6!*!@m&?CwW9rlY
zx8-RsYq#xzkbQlH;xF&)F<~?6OEQz_*)s80+n<GH$!aTJ+D#R`vq7*&;7i6+gY;gp
zi8VT@M^`-iaL1W#_PzAnTIaf;!V`}^{(QKsD`%%&hGWX#32C$AVxJkMCr$4B9@h8$
zn<|fLqr~B?yZ%OZ6lcFb-n@O9&nva1b;Sn{7EDarmCX2ek*KA=eA(KP&5^+eM2cc2
zm2=8nIw5I(D@@BWrQqngb07aVoVyo2vspnbT7L5}<BjY0&T+rA(wt*z-K4Th{`ZPp
zDwZf{913pzzVzUs-<NV8)@Ht2@bi6DllO+*n<ll3_HS-U7q|MU@>b48GqGV=#$yq$
zMM-Q+FIu*=r3aX`e#*EkvLxfP$f1j`E{KG=tmf37|54?h%j>BVGLG0VM%r{~eUvck
zn!Lknj?>x-nchd7Uj%*CPes^$OOk)!m{Z=#zVYc&xg*=!Z93NKE5H8sI9O6zwSS+z
zRc}}OhD^Rw1u3a7D%VKq_0DTGDc}n~v*6RwW0`CG<lR5@bL!>KIXX*-XX1jb%Z<0+
zI3XwC?<nLy`RnqHk#iiUSBUL>w5Ghdbl<l#yuUtX@=VBOVlOC8cq|a<|JW?c)4M_{
zbn=R{+RYK_2AxxjBka$dyIWut{#9$=zJyhu1CAEYvpOm+vO?HKmCeh_X6b^AA71m$
zoX||)Q=!>yKj-0o)!Zc&4Kkj7VySOVcwR}3d{Mo2*MjR|AO6Xh{faXG`ZGs<(Zy0J
z`EL#9Tm6>g*QHu9g^Ttb(77vIJv;5AU|6*4fvZhv>$87J&t4u_{OWK0&!jB<1<ea4
z72mLFxAHpv%j2-gBAr~R-id1RX--Yc9QWw{+gB=<8_8w1M0Csd&%Pb&9$)$WUnO83
z|DjS5whrzH<+x>Q_Ihlr7v0`{-9Rz!c;QDjwWG5uBKEXSev}&{H2uT83ikhk?}K#y
zy|diYeW!j-QQ3@5i$uOB2<IJjob*?Y^`UWG+y7>*qsJ$HZaz?QQQ}AB9kxKuRqnbR
z8+|va@Fi{8xN@geNURxiVVdI%`Hx%PeSB#Yx$=R))$Jb^L<A&qJ#uW-ywJ=QT+kk#
z$kUr}<u!ZT@`na?uT&!~w>^|t*V@y6`Ma><Kj}9TskNMW%k{MmNBG}g-}pb%y6w2r
zC*;#fK`jKCY_W}JnHU&uvoSE36W>Qc@10By&Go+=AX1li{bHb%M1JrGRu>-W{L}^o
z9>F<DV&Vof6+b3Rp4E97b*b#g{D=GxyW<adB>i~PuQczrS@8z1htAdKZJ%47-}!yd
zmil#df7t>K^!1oLOZoCCod21s&Iy5y8p)ieh2p#-CQf|xTO`gi;_-)4PFGH8p*Z7`
zT#0E9bB$Dre%7*WOp$-ZQ!9G-Qu*v&=DR=F$j(?Ix#EWD**n)|n%_GweNk%~({|?G
z#`GIuC-Y>o`<~t8YYy6`b60cbtk<tvB39k-i(PAdYvIb(rk4XaZ(msBEca~kvmLvP
zD<>~Lp>Xw&EbFfcyIU1L=p0l2l=^5(?al)Rg6}Uh>0gSKX8SI+LMBsvR#sM6?Aofn
z#H-zny$ZoQ%^u%7I5F;X*IcPOu7|e|m9DyQOqZ22gzrbm#z<|E%ByPcnNnV?lPdN8
zJj-VB)c>+uUrH|fwVp@%W8^KBtesirb7$>K{;=&tS<C$ASGbH`gnwX*7TP8m_Wp*D
z<FwnewWV_({0SDir={^ww)E*)C&QehZ919j;<m-yE#f_IanU-t|EPjU&jZ`T6*1Q=
z@}?{-c)xQ;*~SG$JkEhj7mM}hb8E%Mes5ARS@CygO|*1K>AgwY@2DmuO62wD&nj8g
zpy$}^n0Y>Q(VL6B;b+`?k8Zul^?Y6MWc}pEeffJ$Y-+!hiYR`Vw)if?+mNWtbra^;
zwSQ=4VQl{L?vYnntKy`o3=N4kEb|?-0$==@dt%a-b!X-%7BYV1ws!xdzDrN>?YXiQ
z*N$>8`LVA$&&8&>=2q9WZWeJvjSF*G4)*VoF=!9m`GuqZ@8mTPI6d!tww#yLQ?vX<
z(qom*nYmVXH-9LRKk2l`!~cBDqV#F=j5U@|442_=SeuaPBVhS?spq5?o*xpeN3yqY
zC7z2YIk$;%f$I#FLozpoPyGIodFID!9k~luj+H*anwzHF6yKsWS*h2{epBEt$CVLB
zU&N#KY@Q@tRe#9Dz>vqzz+g{I4hhZ6#?y-48+tlO*ippR_M}RpkpqXo$JP#wlL8zM
z7b<lqEnONa64kmX;kruj;}<DyvwWt+e~6Fq5bf6zuWR`K;Mny;z0&5pHteyT$tCEd
zTKMqUy?3|2&)HY|?(CDl@3+5acv2Yc$n%)X%HX2o>Ny6J4?Q`0a7t6=8J9xSu56y7
zn~&$o9hGtZBUpZV*KRc?o>$DbuiZ`Uo_@M&W@6IO!rnO#KP_7NoNL<5=rS+WJn?X!
z==G1>UOo8CqxepW=j7D%Kdk2?@0b;3&&$5WCVP~-J)-U5;xl&s8~f%fOk}-mw%3)-
zwsy`&OULS?>DMF=o%Wm9^;%4G_0&tt-q}5yJxwp}iO7zv=VF83G$-7s&YI-G?EmKZ
zw$GR9c08Tg_jmdn-6;h^Gv?h-eQ~cgF0QE8%X7PJkonx33pOMNx4mBb>h`@!6RM5o
zW^VWIN|wKLFLOcO?H<Fp$I3;g?rkbDOfr`^@Zpuc$%e|Zd-<ET*lPO3&CpC-Cg~Q%
zc=`0TIbG2g-p^e0cV)7r@S|R>s*CDp_H^Z`gnUt0>Et-QE0M=-<AO(OVvBbiHTZu1
z@<D^EBDWc?TvnKt>z?xKp_zDBqRHJX<F;A6jLz!%V(;^+_2Qq0wSN2Cdu%J?)s5PQ
zF7k)?enqrR5aC@I8nora_baNO1U(kjnr!^LMf;+j&%XF%_6b`y+O}+YBX{W554k6@
zRud*D&2I8B{wHYko29kZ&HQ(h-8a$YFML)DB`~Bj)T`>H{PT*@{HY!2)3GSMP;ZBo
zcU+3+!|PkCm6Q(c`?)9UtI2~@i~fTQGr1>h+SY$!w(^7br3pLP_BosiIavH;>Er|U
zH3h3?1yoP$>HgO}L*jiy+QgzGqMW(YuiSFYkPj^Sq_|iq;)ef`o9lN?5(*7_<)N>~
z9wWU(;br#>vuU#S47V+VuCmM92cKOTDtdUT!*dBY=DEopzpc7MkG=0{F`Td4?f2=h
zNN2KGR>Ys=bB|0yD?4q3l5)}tZP)xk>wzj&os62q%D^y_7vow9v=MJeuM;vDFgGl^
z_;Q%QzbO)@H@XV(9C;|G(2y*AM~rF8k^qkF4jv6lJe?P~ZVYt~nHUrK?9%VOf4TR3
zS^IP;|CZoGOU{)1I^vxEOZ}JgtvdIxl$)Epmrd?GH}C4ayZPn&Z9mD^|NkZ3F#Th$
zjJ1JkvQ?j%Picbny%z^~(ptYPt2m=t!2Fr>&Jo>@72d~gbKYswN_KR!>pS;qWzz%`
z#l6Z)7FFmRTX4L<UE_Z5L4j4Oh1W#*z1=*IRSTu8e)3{k<g^Q>b?$ptKhx^Vd$P<V
zJ;LeulsJ>n<w@B>->&VKU75#wRp;3<nP~Iw!|Ihzfp3#iJ&otSWLK5DC9pxt^h(i8
zwb@do%94dfffr6yMwcJ-@Myam9C^WO+qCf3lWVeOU-9aAowje2!`vGl<{Y~+qNiS3
z^G?Uh;tt2HP2D>etrg?Zoo@c+&Wf#Dmdt7`yn99M?uk61i-LEr%$+P0^pc^+(0%h&
zR%dCs850F0w{p$Wko5NpnI>jZ9koaJz}h`oXI5;}`!>%gt1@%z%fEGuF#+p>k8hGR
zeWdg1&7pbX$!pdx?Dux^uPxYifcb(>znJWqbvMJVD8^lxb25}E*Mq-BW6S!$>q<FE
zfx7d*?NCnCkX1T&AnH|-O^M%xY+v`^m$Qw6nJi`&seiVYR(ZGVYR&7F(p$F{9Zg7!
zcFI2=QnGjfCtJy-^0}KAzud6ibfJ&XwamjC-mz}nww&EUF8XhHIERP6WYhtbeO8H4
zj)&6Bw|_XIc_n_u*&6{n4rkBWl(MT)r^(;_^8drry^m<FPI|ue^R5Ne{wH2d_^|4M
zLe8ZvVh<NZ#ifX_T5t4ri)?J(%k{0p-7Q0l>)oSyJ-@l+5_}H+{^8(Mp6s7ev*Lkr
z%^ALHG5xPow^?6Zberp2;{GQ84=bCPoqqAm?|v)(#V|hG>3)K|{?=%&OA-kWpJ$iL
zmc}QA91Q;vb<kX??0&aaXnRf3!@g<dF+pe6JSvMn6Jb>?KXcyhsy<t;cvhjLUFDe@
zFO=@(k4ZbZVcLADPt{Jl&#w@?9Q&$$DOYFv_3MER5?7lRBwOWZbI68ztDaqQFyV4$
zrfpuJvvGIR!8HeGw7h+J@&Bei%dk*YB{41;MZV$*YZp&T6qvkV3HRx1)7I>kD*OC-
zck}&U8k3q*6Bh5tUcAp&;OzbHpXbjhc*=b5)Vg9{F1=r6k#)yZg(@d}Qp~Dzw^oYL
zPo3v<%EtNjo!~9@jw+vxuh}|oSvGOAc8BMVq5@V~A-N>!)*VMacXpo9@8DVIka_sc
z;`)ETI`7?@__&xqspj31|026Cbjv;4G&`cL^Qp~J!>Bzj-8{N&6Cb>v`PXLVv)SPV
zGk$b%xf`(Txa23J`YCye=(^Ceh4$;iHRmpvo)u>KuH~#1<J*nd6N<Yvru@FMAXNSJ
z9)?hL<tuyJCeNFk9mC3DIazdS#hd4%<yV<)<`l+$4LMu0X4CbVQeg{Ue&0B+=<uF%
z%Q=dBq!KSryxDOiYr!^$1<p3}xD(!eVDhxMd*$S>G(W!GKW154HV2+RIX~{uE#s0+
zFJm9CiThrx5xG$>`LNFQC&f!PpT6-!_R-CE`%-geomDgME<dF4lIx`0d9$wqdwaL-
zU3b!G+ZBd+3aSUgj@>w-_pa(@d#uidy0WARl5&aj)(bw1{1<!7GDf}fS;55pPWr5;
z)>m>KE$A+L9>X-}III5F;Dd2x?W>Fr#`$e~v2=#k!#7Glqe{yTX_uK7_Z0c<YnQ2P
z@%Y`(>oS|`%I@3izb}1OcVoMJ)Q_3Dubvluells{lV>yUoE1AM`(cvzZy%{o4ku@x
z5MJVRF7NV8sRPQtD~{F5u`buYX;B+B`6cr@6WQ3EA2-}9Dp9{vopPyoQKhISr}~~9
z+dgFpC8e?6*0{EAh2OdZLi+NPuWn=t_4;`u=-$z2>GXvse?6|5ddW0=*FU67jzPVK
zIXN-smoqXjEW=!Qi&m;bMiEsYg*$|UG^98+C>FHv)^>B`<}zR3Ewdh#_h`Cl=843s
zC}lW^G;vL|`C;bS_Vz?tSem-(|Hb>4__x~qVE(nZtK5uHkUO(X?%dqn&*z=CE&uoD
z?_Zt=0`WB|8(tnhx~zRtdv4pCl_8hr7^bYM&Sl#!_?^XhpXSsZl3%Qy?tk`knRztr
z&eOoz*_rPjFUj6_vL*51x05F#?iSAFxNCIh!umT~gPhEj=BYGk96uY;?zo~-U&zsX
zlB4pw?>k@Jv|GIEl=Vcb!!>I(t~_(&IlGh3c8cV)dtX+DepmC}FZwmvHL7`EK-<Gb
zw`!i;uM{X0Wz$`}l<Re{_PO>$F(03VPCZlOAszTMSf%H~hQkbPcVDHvJ-q8~i}4!6
zkbmvIy-^<>%8xGH;;p&wvCIw5;u((DoLe7e{0cP?iHqI!)}~93_h+fpbcs`w-4iu@
zm3FFD9nyO<y)`*qDcp7!YfFk@+PC_HKRp|pZ%#?6{olwh^4yK(#}AGjTg-Hp?C9{<
z)JwEHy6CL9PI0PEozsh+qc`^-?N&bIVbdG7aoV)C%m46Z@t;b4k&}4t>qK=OfjTvp
z#BXRr{m~V9Gp4XJFdP@eTBU;{IR$CJA5x`H4Zm409WMIM?e>XFE*nFS_uW+sy=*Zp
z)v`_HLdDAN$7;SZw>1kr3!=D$vz7cOq))kcqBGjdd*OxFrAt|7O{+ETZd_u*sb)XT
z{Kv<0()oKngbVDmHkR_fw!={0`1w7{@7B+2(sy36|M&a2J@XHPd;SZA+jxu{1$xAu
z1Wqf|Y+SK>!;!^Ghy4~Dy}MFvW6NpJn1h=tY?L2IO3wFQAnL|+`(upI{Nt&T=hZ(l
z?GrfOZM1L3gN^#C7y72B)L1s=Nfg$&?uqYvEc&r7G-HpNU!7^^`iGk$>S7scoR7UO
z+%wUEf8Wd_vn#w`)Og$M)0N9V`J=DpZQ9LizKe@eDz9xTS$tyd%I;T_tRl94z4Ty9
zl-$G%Mythnuda)l)iSv^-u-T>r)gKS-hmTSGDX?Dr$uRGHh)&F{ZM#yRYogkabch-
zSMaWdbBlEPdIT*)w{7W4yYVh;>BS#w(t@8JJT{l1nQy{U-C5pI2id(c!*jf4w|8B2
zeN?=7D_^?TdX5)+rMH<K?CX8ime=q3;><P8IU7v9ZR0Pw{W|B-;WLBPx8nSVv{l{D
zO<!|<^mVE_H7DFVJ;Yl)JAd=hr*qU=oF@BnD%P%V`DXQACV5-t$Js$;KT4RVPPnUe
zx99Meb8l`|sCif?>A#rDa96G7)v2`JZ{HN!R7y-PZHQSHSZLu;|K;7g&MiGGsmb2y
zuE!p2i`4${Ehj!eXo`2}<!i@!4=q{Ay{LJze2G!m#|f$7D>&Y*F58*q{UXb%V9BcK
zT(dmZt_kmZ8P*zg$#UY&1s|BSOG-}fYW%lNXT{Vg+qWmovcFuMeU0_%>YbPKmQPfW
zlQy50t-gCxDXVqjG_PBG4sW;EyEjE_`4-i+Prj$SZ`jll^Pt_ANuFbyq|C3Kye?N4
z-L7KaDr)-sMcd9v>q7nirrf-~Wv3^@?JG_P9c>m*TXjqA%jTHH-tWGPy}Hg(Z{_g)
z-P8Z|Evt7pZ%tdjJo6gMg3H&Btcl$EY-{qaj1PTkKmH!dWNrUw#u=;raz|9;gVsd}
z48LdF6@1X%lh3bV(ZVeBF7m%sUgWDy3K0k0KLkpKi|%PIPpt_*`29mItNj6f)s%P7
z%#F91hKc;)u%Gr(*zAbgyVO4+2hV>f=49VnXffyI$|uIX;uCjYuZZs4%XT36x&EdJ
zqGujVX%hX@6n<VUxVhZbZt;WZKSbNz);DSTXwP8y`PVco^^e;@b;IQkw*QcIue{rn
zuw3M7(Am|C!=IV<x6fVYKl9!^ul!>MZ+#qhA9%t2+x3{<i5<%xScQr1X<sh#M{f1m
zCuWA~igQFuC+5uyoO5_r`PPjgQnQx5JLPTuz9^#T*O>{?>~q)38qRTjY*5~*;?gwt
z_4#?**qq;;@5$wr=FJXM?X|f4yI`B-o=Wu#%{N#tM}>46id@*{UuSUQ*V#qWPR|OA
zkgw@&&iTAr)h=IZ+p6vt<t#@OICA)BE#cXt@#2PfiH63RkjFP3SNiEZtXOCerxjS>
zEzkcUJWIXa#eIU9EI(&+OzQ{5bvIx9`=xNb-E0krp{VlHu1E3I;RcfigOqoBChca`
zESFxmGn(VRLrc<>q&I9&N)7!swkADdoAft3K*i_z^$os<Lgy@*FzG{h&itLdVXwCE
zy0hge?QN@8;c?%+XkMI~M_<zK0-f)6O0gT)oscSPDJ*+>ZhzRjJ>_%HwHO|H&ZtpS
zX8c6<&)WNrD<|8ZEjM~nZP@wSpgVTMMrr*=b$eQP;?z7B+Z4`Wyy@{Uu}86O`NeMc
z4~GAa&rp42(I=2U>4$K|lrp(P$LFqBbY$#%B=NwbU(ods=P9oH@6Ijpf8^;kN2X?y
zQ%zG?i(maN<!|aG=UR(<y!W-v-CQiFs3vQub!3OY|Auwhd_5eOY;QfbcsJ|S_d6V&
zh0oGB|39=o_WkAU7i!zT`?(*B`PZ)8WZQnz+3C}a-gi~(oi)Gq&Aw4;Hvit+J-Zf_
zuKsZ2)ZG2g^B?`ow!8a#?x`2fGL|YYT&IanPhTsWa^bK_?VQrGIU;^V>9b7LY{Xd7
zH?sE!cWt`lCS#<0D0u#v%S$3ANIpM#*~h8l#qp0UncUX}b+(-Q!7_j85yv-zHBHt^
zj=4Mk%|0<l^o`LkcBhE1nLZX=hvvG=9Jji|yiYjy%y)^0Dsz4_+^8^kqmt-<kM*0U
z&C{o^eD|?3*RtFWzOHz8D_`dPlN*iY_H<>Wls-MT|M2QME-Q~kE0*U4ZERP0vvmF5
zFA+1&32vFq$Jx9n)wu5Nx1aC6JPcm*{nsv0>-pa@|MIBDmBsxLUDVRrm*H%++uY@O
z)bh{uhtJ)9G1L2fx5d>v`dr_{s{T)kUw><haAKqHvcP=~>f$o$#(D{Jo%QTBx2?(8
zEAaI5jPEm~N@g%GTI6ZA&1K~})kSg(q+J{Rd)@A8$bH?kf<wS<V(QnJ`x6crdH)WI
zWfHyb$m)9B(SYHuv&kNocX<y|-KNCeo0(B-Vz}X5{wdBVpE+8~FHM@StMsF+%=)rN
z(Gt$%&qbnSy^h@M{;?+gH}Cmx0rk0MzrIdhyIOvi(T28|j@z1n+he}8A7T0Xyy``s
zq3vPCpE`Fd`?eqWJm<Imk9Th$Y1H1(T*gwpKg`E@-)ZN*#Y)bq?8P7JQ3txZ9Zhp;
z7#SG2nHd<YFbBE<it@8klS=~N3tf?_c(etRb0>NGA94_BJ<rPGQez)*c+r~!EK<Uy
zkCg*^v=^yJe0R2<W|-T~!_4{LD_l+Q592?L$0Dl?Z@4_xdG~UT<^3;Z%ktmruV?tS
zG1@^)x-#e4#;NX_?QdnuIBp6Ha@D$dglCtx_yq5ov*C`A;(j@{*mZf4*UisGd0U;?
zpX}xG`No+S^CSfNqJk>7EuZna`1Z%vyJ4Rit#5hE*Vg^9K(o1_HE&CHaGP>+QK?VF
z*B|K_HJ7hDwYBtSt^Iny?aa5x7dK_sy1bbA;#~g=)$2Y#?x;=?zQ)!k^s%qCPe!Xf
zzH`OXz4l+Pby#UAwc9k_YDud1y4w4VQ(Si4+8bRZpVD5(X?cI&aaES<{?=u@5ux`!
zNQLc{?OA*$(?HI!{zNwKp@a2&y#*G{bIaMIHr}@gF-U)BFC`i9C2`rg*)wye^qhNZ
z+dS`#GZ$;{X5nI4?qy=j85eg8*KnRTjOl;)CF)fnn{KW1<@gs~Ota>>p6+~+#WZWC
zt2XC~pXKT+zoIQcGRg_x=EcOoz{1MF;D#BeNKpuGwiT&3Cl;rA<`t*r6=#-YmZf4}
zi?lQtvLM#DM(07&#BDz!k9JMd-@Wu#l0vUnl9Ni7)8+4sY;)3X=9W!Aw*OK6gQL@D
z{Ya?!<}UeON7{#*P1t!x_49K(pP#cW|Mq-+{(VN14>ruq{ze`zRU2*hPT>tH*^#v%
z@4?E(+O`yLOD&6}j;Y$}JRi4NPvMb{eR`t&vS)nN@kRN;D)&p-57`OMyYyQ_g5OSS
z{zJ#;ADbTkxb!h}!MRlZ4^xcuc272b*m>Zq#F@Uw9_w?t?@yodVb|-b<m@+{3w75v
zuGKl@*J6An)Slz~^tepk)X4`WTC8^+i`!YPb*?p)J3Z&@#IsL5mO5L$i(C-1eBtj%
zCY?V1m75nz^-4rM=lcEo-%1&lvrjwHp3mpg6LwGkwIuOCl7q+gZ9m<D&oBC-@6cA_
z-zzKVvsGyA{@79}joZJJ<Fj}ER$g@K%bk_x6Y{6X<t47Xv(L|H;z3hk_KX=8ESfWS
zd!;4`N3+c{-1FzYVSLhj^CKI!u2OEh`!&lj$F2X_jki6r*C+3?JN&JSeF^)lxCN43
zuU~iyHQSk=TEuU_x2|;2Tu<w69bWm)SK%M@pA@8I`o2=|e`NLZnM?!klZEp+Jtb88
zIb_vmufM7<V14Un&yjOJzdzn=(p)cS)2iaHeAw7vr{t!v=Q9OYf7=$wq5ZUQvW;K4
zK_T~c!P9#e%_?6sD_?N2{DigF>J$Q(|7V@Fd8)YnxyEjx$U8^(Zg3H^e)3Vqqch3n
zPm@mnl!Vn&HO??{A7T--Tk=_=%=wS<6sO-3=7;PoPR2R+y{ntAQ?&9vY6;Z3bc=Yp
z5Cg+8Ev!8YjMNJ0S<H>(t_ZohHBS4p#pKD%Diax+ZIt*=CQo8;YWQH}5;BFwfTOLw
z!ozP;<4K<xGZ_L`EV`ogHE{K!gxc_xYr|YUI|A3NeV>%Oy7sPJ&il7t??vCP&ab{*
zl^y+Z|F@Yl&-n1EeS7@)jcxV%z1MEN{`h`t`Mp0sb_%<gbo)QM#P_TI=O0%)`IFDr
zOa0ZI{?AYT^^=9cR&Nt7t?Z0^EHk$xGuBG-&9$3{<7Qqn`Ib3n_HnD{FGCKr`TewU
zdOXoe^y3PfbI+S>^qv?$F^+nyG?(v#gna+iOq>48H`ZTPk$eBx`000+pFSd8o>sAk
zC)dX4D5sxH@9OdS`q3og<mK9$ZO4pDxD2|V2=#iGu1fk=ICIO>Pn;*ZxPFFkHETVz
z4z}Cu;=eVoOm6<+;wQliKE})yE8cqSb4k>x&!%w;3VZ!dS1k0~y2qnKEPId3^3O-P
zyi0clq=|jrDRuo-$wq^iW5qQw>S31tY14jZEcdXCw-bL+qqu&;Jy)stIqt73miE6k
z&71H}OK|6{<Jm6ny=(4h?7yB>v(hh|<8f(>!rrfsx*EPPNyR^VtakG{+uW$i#b3;R
zR&0)VUNm?62T`lpd#)ZoyNL78#YLNsuC>dXe6p;!+2=^g8jkk08L=mNj;@ioJR@}O
z#{`u-68&t4b4w@o`59*&Qx5FiZs@ztIQd#v!5OaOO&=flY0vJ^ydO7rU!L{l9o~m;
zAD$SSSY5PLvN!VLu^p1zP52xi9bG>2b%Iub1fQ|!RrX}Vj&14Li*vKMs#oUrMh8}#
zEGulu+#+RZFVTBcdiOS`^?b&6nAlFQx#n@@l+^Vd#;FarV$-gin$T@?;<Utpy55p(
z!?_!?zTOgDEw#<$$~FD_n|77Gz4dguiNVkCJmd0Np|KwmR%m5yjoiIDY<6LWXxs@C
ztt9Q;YphQuhe(y~y~#Q2&as5(8$IQ&8xKF7xw-qzd`06sVX4LuJB);KS7&w4(p6l&
z%2H&F*PGl_gPfbE^kSIiv8HK=+zge!le<;r%+Vv#8<<V!J=8xVvMpk6;Od(p`x-<v
z_@!@cYM!WNy2VcFh(W}BLtiJ&Rnz8)_sooT7SnkbE5gJ6*n38d>eAOU?vzdx^ETt&
zDz(dMThzs^P8@;S%4gFu=1TIOdVger^*y;3sbIUr{B7wjHJ)DUTfCI??W-r8ysYUx
z<Jr_4nVh4Sass>Ry=OTToXD2@!FiA`hhgsNa`Whu4M*gjO<xkYwn1&v;fI~ZQx0*~
z8=jTCBz2iBm48RY+5&DPws&!j7rIm)hwHpwu&<#dP5A7gTOLl^PK)qa9?U$kVM|BI
zZ^vu;KD_C9!iCpo?lolJZj~Ds&MjOSXZhutvAM09@v2?clP5lX!rQ)N(v(QcoLh;N
z#h)D5m>Pv9imc}4fAV<CB*&gPOmX2oVTyaB+ojL=<=ja#;VR$d946uGSZtP4Q}j!9
zfvTtulT>rW5u=N1FLos#w4ABj-PRsowLIb6Qp3%>eqWYXFWa_RSTHYLROo(3io%x{
z9$SufsZBm1C%Tm(f9)yJlN0}#Yz<>eUbHfM_f_5-H!|CHDO^czeH{36@14AEqluNX
zq^5;VT(^T|!Q!?P4s)DkUs;?;DXc4!Fi|$1(pEH2U`^w-tBLQ@X3ffVJpN(sw5<y-
zPtYy7)pptN@s6r@7miPUZ1sJchk)R=6)L&4u__{)W=%al*Xb7H8XF6>BPU;aY|DCW
zW*~Scc=E<6+M7}jh#G2#o~YPfwsl*V|1n;nEk0*Ec7$ol8=e2o*w%e2CE0s!xyt0j
zcdjfy_aa2=Ra)_8(Qe<_*2QX<zf4-_xIl!};oQ5K%-YXSpDJ$KToiC|Qp!e$h$pIb
zXM$GgNZ1{AJldKs_~NaTQ+`j0>+Hb8oPGV0M-~N1NR)`*&As5b>+%Bq3%P7}9m|b+
z{XfNB6@CAv>+rH^Hxs)rSma&c-*I}u;XT6gRRMP283fqPDi%s+A6F}UWc@;eyF7Ex
z?FV`LE-h$(X}Ijs!oFt5HxrLZ#ixFdyQj@#uRMSC%vJ3+-(+4#Mb`En`^~d&+WN~C
z`wz~4`kMEj%zcJAr~7o9^PezxG<R_|+n+Z#4CT=@+51W?()_W+iWN61em*+;l<oc@
zcAkAx_a}VF{o~r4|J<4(L)ANZDQDNwsi|!DPqMeZ&})AG%$XtARR6M(cl(jd%Fc~l
zXQ!rmoHnuW4V!&>@kNg-4-PN-a^qm6G|yHKC*yCW{PiavRR6SVelPB9bD&(}kAi*5
zhr=}|9_U}TWwSrjpH*|@!FrDEIqZC=%$nsNuUBJM65g*`w{Pyl2U`PAH#N^p<K5gK
zy~D4wbidmC<GgiE`{&(BXS#Cu_S$J71?QyIKELsvbI|`&uw?YgUbnM(mPRIZQjLNd
zW-q3H(QfYVVx0dlo!9P|f99R!3;sK=KX@E@I(T`<!CIlhcP}@-eAshfv*E1=&wsEr
z|NdFYF>~FXFHB6DVKF+~Ja&qi7RMxeP3r<*>~B<3UF*htYuijkk=?Pf7HfiAEwc2h
zUmrDIc05glJ^5%F2gij=v3Hu36mqtPbcyMA&fK5*VY`ZS)~hoG7DD=F(NSH>nc2Q;
zc-7r!<<5$@({rr5ob~bBqsCfkAyOsWcUOHp&9?v0`m8^*QjUD}k#0Y^(L#u?RNUPC
zP@%Cz+oHK|(<E+pvd!+Eef#0Yw3Y~-kD8l8HmABhRcBlCwx&1U{0DDfiqpwuORLRF
z<3gpG?(dZj-rJ=+`?MEt-DJT%!EHUNF?{D_CuN*?-puN}$>Of}p@K(uf6i^p{I-0*
zJ1={e|7SPD@`?hh7blZ;{G9W*;GoD4lj%zzKB#@R*!}0rm$E_nfA_rXJG!>dk)QKh
z;hus8YtCD(2#vI_x}aO%yz5!|La|e}>m`NXOybIT{9xCHl7gA`O3b?sIEemvV6tw>
zmy)H6|C_X~eqnO$=du2oZx3s<wC(fas}-NObcrb6HKwm228F5YThem$-)hBMREX@j
zlXpyN&E3MU3z~Oblnt|Yn?A)#^!`uTwC`a%_qsnmc{?Sr(>Zdt<307Vl9>x5#2hch
zDnuBb$ojv>*JRGcvu_{Wxf5~yT7=S~Zm%$lX^)guUR>U`;b0>7|1)oIf1I0^;xt1d
za_{vsGRKM?u5E4?<Jlgu`)TZfUfs0<^6Lxk&0446rK)||Nd0<E)XmCHW9_nycV_Kf
zw(AR1tW4$nWl@ux?BzV~-u0FVmA{#GH1I3WiuJcegHH&Vd~RD)5WOe1Rg-6eIH$SG
zAy4*a+NYkFP3x{(XSVXpxw0<X<p=zjxkE4QESRS}<JO{y>&|_3oESZWkN49P#X|Gs
z7iaukGQ>8iocA()-n!vdzzg9A>@OE3JFjeSZt9WQE}!6*rnDgU!sj&+#U?Xq-oBoq
z-t&{sOmf-OJ-659rOunkJo)Rgh~vtPocn)V$?jV8(eaPf@7|-`>tyY|X`ej6?O4UZ
z!}$88zNYXTqiTg4HftBIlh`8lHpN$F$>E;6M}8k@@?V;r7GS+dIcDAJS4L^=VNoCF
ze=KhmS?7B7ZEZuKtkyBN2dhQ%wi)H5=&#={u3PdTX8sd%t=`#x&T`DSlytNyaJ6bF
zzc9;-LoL^O3PQJk_V=i2-`{m1a!d29juYOGmsYsxyXU<>^2PP)e%b$p6T|;DuhN}T
zZ5Q~@VyE__?-~~#-RgTk{X@3l&WndLBJC?@^-bD7eM{UY+qDvRy4Uwj%h2pyQvA~H
z3(xF1@0PQ7h^%H8USl@d{i2`F>!mRpF3+ygd@r%{g6XVNclVk+lK9B**vUgJs_i|~
zhSiHXOXdj~97~%dps;uM3&oBb?);ah8>VN^UA&^`^r`;KC*-8QhIFkBf4N<&H#Dy+
zRL|C_Kj|OGuT5{)Klp$6i^R^Bm9G`PoM)CUw|{E>HP|uu=H@4_7CSAAvsyge?fUoN
zz~IP+>P#Q!O8MTSSC79f|9;V8nR880Ro$}u3qPd%Tkl_XPM3S6{%77HPFs_Y0#CnR
zUZNIhweumz{qBd4<g8u`^s+C#&-_T!)@19k*WbTgFcEmec(BAZ;N|xx`tKHQUcJ{W
zRZt_1W9_r+e||jO=d@P$;3Dn~D?)Uwyf5yu<qX>JwR(@{u9NO7-x!EC?vlCEuDO0b
z(}$3w-4RTSq}J!$e*bpyDH)eD5)YsHdW77%xSR8y>8wRpgr@w~`n8g)Cr#|!tA#h6
zY<9Pwn^>0Bd;b0Fg_i9rF1+{76JK?KQF7kx2P@C%o)VoLYVY!SqxPRkwP#mcG>&}5
zIjySe>7BN}J8Pa?IqjaIS#r`d+}yCfWf}j{=>jiLrGA#`tFhU<sQnqc%Dfn@d3`|)
z_j^tH%r1r;snCt}6Tc`H<sCkuv;KCbc4@G2tgmi~aHPk^FqcSaTkp4Dgjiqh-qKsg
zw(7fvi+m9K+(icc3po0h`pxsO-Z5iQj91}2_t-lg)|JX{ALU*-JGVsj{nFJx7~Vc~
z&(yc?yMH`G-roQE$JJZr-(M#EgL&7)dVllJ@-M^wY2Q62fBCKDq<=!cZ9O*Rr5p}_
znf>FQ=j4}#RZM%`C*&9{b2L6YuT{@%E?Z{si`QRNUdlM1vu_pMc_C}{tgnuFd5glm
z)?dE7;PQgzz1Q03{$;#YCA2^Lr)3qJE&tTd)*Fr(dVZBDH{!oo&GPHvwFCQI^JLwp
z`8|yfl(%J{{axMAEzoxU#I<He_BQ<EtTJr0DmGM-Tt9WE;<6kDj+_U(q`hRknDum7
zW7y|LKH*6!)Q`CH{6LcYq`#`OS$8ko*|yQQNqX;=1^f0aeB`qAcIaEpPa-?zoxH><
z)2>%ApS-i`<iRrWXBU3`l{@lYpQ(0B$I69=*o38`Cmxt|w8@}RE~@CsmEWn%hL)_?
z)D&j&Z=1=o`;1F8ul|}w`D=&IOFuBbST6j>l(R6Q+SOg9UU+rPflore#4_%LI9z@>
zUwy-2(*+eX;||_Pa#;7{%u)-Ljm5%yS8cf5exjcD-g(c;Q*xEgk{^A4ZOawBuJwU=
z)~D#dTehs+GyNQE%{8X5>86aVud80}Y3mZ%_w8cQj79U$wZGI`Q5dd~eTCt$&^HUs
zy)BM=%w}`9p5@vQ=+V)$U*qJHB!kG#GuLwTcK*A=GQISYx5E}ag`l@}4a<Ji`bk`N
z|C&(NqQ>vzFJV*rDscb$4G%5WHi@j*5OrgpS7q1t#7kV)6ArvR|MhikNvHRVbR~r+
z3o@qIc9+F*vMs%<@=|rn<u02h|EtF{bGNcTRP@%9-Lu4aYs%b+h5gr7KlS=Cv+>aB
zJNf~^lbJHE?CQ1MA}Adt;PEb9cv}$5h1)R|JNIh(vsF&EK6o*%;+0y<!)eQd6)RMk
zydBiUIp4T*O?TRT)8oNa4sPL0X}4MLoMw4VyUV}UPvOfyR>p6|Ef;@tOuw(#V&`<B
z#z#GJ`F4iR`wwfXW<S4B{xjy6-1Up!clO?U82d%<zL))z@E6DbSp9ABZ}Jwp$G0fS
zLNmejI>QXrlw!r-hE?2qOg;9`-RGRIY;%8kCc|!Z!{62|-@aezx_aTO#-Y33rYX)-
z*f;UA)=BvXWft;WxWTsLzun&MjiwJ)98*~_IkWK||BH5xpD}ea>s;*Jgns;t{KaWf
za4}~e`>lI!d2imZ{#xl}>^aLaBiFWm+NICW8A7A4=^Lbdu24Su*mlWMJ^j<2zn)Le
zSjZlG#XtPO%(T7Lo<Fmn=*Dc`w5EUCGI2E%+rSxDD%9kb-C;QQ=h;_A%QsO=o-=Lw
z!!d6w+kvG)m%_d@swow#ED^h~o>A^U--1echtKmhSpRcRy`7(7S-W7*MwVMF0eOG5
z|4M&*=5EjU%R;NZw{%I>`@`Fq#81DlKcLKhr}g~YmkZXvsBga3_KTlkll#l<oD*`}
zQ&X<1pL)%#K5fUUyRPp#`7f?>m{?ub>#n$3J+S-7@0D9uZqS{`Fz?voEBCw|Lo6me
zxjpIg?aj~ogFJLDUvz$Wm~)ng+1WaChTq5PqVA|HP1i5I#xC+>$>Z-!x<a&ePmO!$
zAjEi&n@2|fstMD5`->UBj1~Sm&+@Nt%(Yu|`bEBm(m%gygL2)TB5q^ba~6g-T+csV
z;py}{V~T|PRm0iMJOUZ#9qZROm5INpS=v-)&etNSweGG3@A9P!{<?78@>f~#SL{c4
zk&^oTcAuZ`kGza%4YN@eE3=;$njXG<ZnxYo`NWQ+|GoE@N4z<ucWJKp|AT===RT<X
zm;ZY)(U9NS=*`#VSJtkGIxEm?-(2=3KqTUwom1X*<B%{3Yrl`_8eg`oQe^9D$$E0a
z=F7h|DzzPJ#Cl|0V|T3YOcGyox7Rw_{Fe4bJ?CoCTecbBn~tYnh+F$-NAtIxrMtx5
zmP*WTDYL(Ie$igJ&#oEv?cdn8nC~in!Eoz*^FOsM=FNGs7rw~mzq3c%-5k$vcWE&b
z1A`_z1A{RG%F$!VIhpzxLscq{X(g#ekYy0Xka4TEQ^9MdkN@8;{rUx)v_{8y?j{HB
zDGL{M7zWmGoOBT`>D*&jo-KQIooTu2N97-}HHw{Hi#p{G@Tb~rU%hzK#3n=Es`HlL
z@7%Y1ey{lU{Qdj>G6Wp!ViUbAwQ0snb<c3a)r!59ylP@0Q$lr&p1i8teA0IHqYAUb
z6-UK+H!JRVdMoX2=^6EyEjihx_tRSyi%)-?^T6!zlafupeWohzeHG23?{WB7+2*wF
z<$In@O5O43`p)R5hbC5~_^?O&bt~3CPfh4Pp3ijr`;uMHDz`<HR<2H+oF&&I(=Dl{
zsP^c!=If-%``6!;W)ye4eZs&fw({uxqRUU?4&UKud2AY%W+oXFW0F!7JDG=hpSbgi
z9fu6AOuHSJX|zDfta?KA5ih+(-MZiJ_kTM7^WLuae^;t~4`(Z{zB)}tU(WMrD4W3=
z*@rVGpM1JObjn=TpIXK~GO3)++aBh3@A`XS;#$L7PmT4v&nUO=sfj(kSbkQQYTHwn
zU&imZKHvYjYR<hIiNQw4N=3~-MI7^O+nM}T;qaTnZSxa9{<!OCZc!O7xonr;mkpn_
zRDT-Qytw-M|H8yaYk#bNS$1@(^cmgP|NImxrtZG|O;Ns4%j?V0Gfvz7Z+N3Q>DPi2
zcFLz2GWk7&GtRus;qS_;am<<?!rJ^r&ZR5t+J)n<F3f0Uy6JZNoyH>;0WYD`FDyGJ
za_n+h{3R$z-g*ts6V*FihEw>Q6;H36VA!@<@Uh_91urye+<)y{SnuYQcH{d?h7FQT
z-nrNLHV7~-E$LHfee!vS#KQM(rrDFSYFyp!FV8A!<4lRvp0?_mVs>s(pqQQ9$9EI^
zd*)y4wcT)+S#n=T>Wr5KMmt3EW=~NwkW4GFjInf8XMIuPr^UlF@p`7z`6W;JFC1W%
z@-|sCV`=?@M$3%-Xlps$FQ&bj#Kgcbhn2`Q4@vHjIRr?WpBntu548IEGXJ(ITVqd6
z*O=85dP_G|^O1VTF)3kXMqwVd|GaM_rsb5+zI*h=;vX#krn)FxJi`8AVNdxoZKvBT
zKZ>8<vo3yT`~2LQ_xAR6j4K}9R9LrQidyoriH?zN#hM|PX0$I&ROIDN&0KX!XIVk`
zyx2<x@rRk)EWXd?*}2vB-^6IX^4ZtYUfj+ro|$+cyM5d9Nt=^)m+d%Z5dFsX`fSZy
zLzS#JXM>dMIpJ&GZ!c^4KkdDRQ`x6icWyq~r~6oACP(?(2FLpWyw<52HBWaPT5oB?
z!SXgLd*1ymv+VLM4z04@7Wz3#_nlcvxA`6ZmUxR^b;g;?IO1<O-hMd$dS&;o9x?B~
zEz6wNKhCZ=!O(VH{8FXe{8h7D-&}4o3C$Ff<Ptw<$n&u(mc?=5t}PB9qCQS~^_$B#
zzv_2g?4B)8FC4GdI^*5ZmndOXBr);X#6BIKq_~wC6>E|uwRwc^E}j4Wwx9D4t9c2b
z$9a7WH+1#Ao^k2Jl!Zl_uXv9A|0Qi*y-3dJ%p7~;s?)BPd5dSuzT|Uy*M892`cA6E
z>*AeDzkfMZxm))R+r_u#3)Dq3m#utu{&Abl-yJ-EuSxh$Ip!JX=Qs1(l2g4GW=k*4
zn7zniZRBT(!p$`uEtcgRWeXN0b4^?D-Li${vf+sp8mANHeEP}9BfNu6!+%@ismZ-b
zPcG-W<l3!}h%nr*JkeV&@d5X;I;Y!qZeDH8Q_{D{DE<|l{NJqYTuzPkLlcGUjfqE{
z-(TsiF!(95Xr9>GBK=G`$M<ag^O@AG6P&xwx<BBZ<bG)3%%?k(kMQk!Fso_yTxE6R
z3kI@h9S--*YW$M7uwlo<L%DZeCb~V^H8V4PhVSZ2hG(>*k1W|WN#gE1+Zb0V^UT(X
zzqnCnAS%=zg(R>rFi3GTFj!+2>#(z8h?xXk8x~zI6)N(NkH^Po=Ch`SrfO?Fc|@6b
z*KW(yHSxN9CE=v&%FayfvIfqXB54I_9WMWy->-XG9sim;H^w`B{$JUDe5Lan6;d`V
zjXd-y=G)Em_xbg|=hXgqygt63y@1u;!=nAR(lpgMM`iD=@z~MruewI*?DF{^FWPcS
zuJMdGeoVmZ<4=Ko4%5uPvwv91!oQ7egKbM;%~5shJ(UWN6z~7{IC5R7i09vuW!c|^
zn$2ZyCJ9XIU3AJ|(#4X}y<sO;KdV)ZSMyXz7Mb#CT2fZcJt3}d^U@4s&P$|e#u-gb
zitH6X6=gGZdrq+CjoHfrLw%!z*QPEG3jCsEzB}gyyGqDemctRAwqaIxR@|Dk(72~v
zd1mcXhtBDudqvLQTDEMuN9m!ZRqk6w*RBzhTvoLvk~KIxQbtGO*SfG(Gg&M}>^QeE
z-&&Rw)zU5eaJ^M>+s*5*CvUr0^i}Jvn$aaOZeK2myxkWp)VgJN3;7ylZ42vLuX)ww
z$&oom;iej#I=-G&i{$b&pU&D9w0_pARLcoWdvAQnC_YlszS2b1Wh1X#&kdequLOIg
zWt&5C(`U-hnf&5hnf#o?uRcE9d2Q{ilEWq5({}8iTIHClB=V6r)qM5TlA~ptem|D}
zd~^Bj6PgFted9fGbLYhS;oN0PkHi8`y_#NGywrN}+ozA}V-LlJ>sPKSkomUR?X1p<
za?^Q-GU~-^8*fkgXr?CN=6`GIN4;gKXA+{0Bul<`7MwUe&>`@cPJG}-?L|kzzqw=>
zef#L*66waei^JJX^!_P>gX<6IN}6TyOek!4pK_(}#O(Ky=Sq%hD>qeIH7?hR7WpP^
z@b=of3tlsqM!8;n`>iu=MqTXIg+<bJFV6WbnSbar*W=??!|w&JNse%N8M8zG@Zl}1
zS3l8f`>)=y?)9MwZ;o%vj=z2Q=s%N{8!oJV6cYSMQQ+Mg2FbIJ!t-`SA2L|5b(O}{
zz3X^oubMuX7Iaee-?gi|-r5}If01}~uIKmQ8~a`@S|)z=*D*ck4dSAi|5UDI{O;XQ
zT|WP4+J#jgoqfMmXuK>ruWl*vPE|bZ#s8z0ANo0t<=bw3e?iC3_O9=bXY-r?Ix+LB
zt^8_zNMYH5_iGn7eBQyp!KBaYXMg!sdZoYB`^0srTfA<?z0!B9-G6ZN<GJ|<X5STD
zA-swA$F(HhAJ>yiM5QHe{JWmbz_fKTOG($h=YmT8G6zd8M+r9?b6xD8c#k7=fx+9f
zJrxtgzW-CN{2n~NyJ$ZzkC}pOVn^cNOM)qF^EWs(8}sNt+^Olq(fE+fO{~`O{&|V+
zizQz;lH+phkE)#A;~3A}$f@{3SkC-ISoX{J;v3VS9DKcCpTm@<8yted+(!<zA93#b
z@K#`xdrP<b2A2*ofu;F97g`HMr)TIT@O;-`oj&(R(c2$IU-CaQw||w1-0k%-Hv4&_
zx0q+q++$3hc@LD^_s?Fe`Dy9`!RHB6b&6(RsxkQ{^q7}r{?{|PlOtEYjda@dH1d$H
z)7_WHFMs-CXjk~_3eqJ<pcT23=k5{R!^FU#z{<d&gIREaXZMj8WkRZENTu8x91T8N
zWZQP#w@OQ6L>x857wVqs65kWBn#E8t^T-mm=6q-C$Tm==tSea0bzW`Xj|H7x6I^P)
zxl5jZqqkJVvu5U8%inX~*W9->-v7VmKVyQRwq{B|K%kFelN0|hgZYb|wsp<+)LAm2
zVcs!a9ks&fy7wz~Xz&02WYdyI`?}b<gsRWv6oxx4+jRQdjC-G2R))8{*42KolIw6%
z-06p0X}jvCH*G$t<GjAAZ$r;hqopcdhlJuH9ZlkIga|}$eLdw)xS&}5#jKp-=c2dG
zG$*aSVVag_<bI-9qNmcjY04ArZOJd%4!=^np1tfsywUNeUqg3aObCk&$~Cb~j1JMu
zEXWU*3NV<VKWDm&oS%Tp9NWf{S#xZ~_Z*0tb@jD&-^Hwg)4>K)?E5G6O<u!$@}IYV
zQTxt?Nm&=ASy!&QTa@)@=LA`a-3MMZrCc&-Nn~YSo)s?B{?_}X)USzG3uHHQsrjv5
zH(SkoM&#ES$FJYJw&T2OM}tZD_MNjg=jd-g{H>xd^0(!GBb^gY!CG=z6)cl%qz-vj
zuDfVtd(ot_p@cK)Q1ULDyMCrqKDrzZ*39_!|5#;PYI61T>nqx)pX+OT*P(rnb#L_Y
zscqWs8}C{EQkYueo|zEa%=Q1|zSW0fvrYEa`dpfQ@<Q{X#*=!N${YLyd6fLBvn<z6
z*9>x-Qs%I%f@5h#&xvb_i{b@;zgfKgg4TUS4P`e=<t>+)Y}O@Rc`ko=>x9*kMY^K<
z73&)AC97=P^h$A(eRsTowG+Q&t$bUs`okB3mjwSbeOnn@IK!kVYh}pd`9F@YbdnR6
zy*mBY)#I9SA6LIFne_kBq_0zS9<ndDp7c1e=)we<qiTktQ59~2nc<UFKD)o@eNbAi
z{cM7orSiT2dFf9l@0z65NS35K9}CjZmj3c!QWNuvjr=KplBXa0$=7>x+d)hDssGWc
z?`;P*6whE{U|7Y*z(B%@JC5avMXBH{QiQZPeQoGlKjA<D+wHNt-j;0?U0C;kJxOqv
z2$$)J<y=CBD!j9<m>jI%{x(W{`|fA&uJyZr<oxrP-`AyA#jb%rbx!oOs7te4AI(2^
z?##^MW#!M_{rmIvFMC7f8ubZBczLZheaduNJb$*7n&DDKcITs8*hG!B(o9thmM6uz
zPE?F6p6&bYiubf+qn_z87d1>+*56KBm=@i7Jf<S8_A`IlRK;iZ+xNVQkuMQn7ptLC
zb+$<7zuw`)D+?-i)T}$SAZJ%;mYFKE_1d(nBK{Rumz@YWxwdfkp;pzu3M)>ONgDOX
z{hIbVXU{XG0PWv*mhaj3)?(R>EjypjY>DQ3Yjk^l=fCV-C+~;(erl=jNR*hV_4Z@<
zo`mA%ff~OL6-lk$xtu3ZPioV{c`Cg}O3PxNcPLb@x%vC=i$m6S_m+i~MK4Ori~Lk!
zWcwm=<pSl_Eq$9KE*N$gTjeE(99`6>s<S%c;>Ir1w$^~Bz3KZjx9L3m`83Sb!<gqg
zxA#_S9%J>#Ha7B7Gw!|h;=8r=Xz;lSivRV~S3Z(smQXt!Zf$)(?(UAKvb<$FIoBoG
zD);_-(5qk3dRzCi{^^jHOJ4;ppZPiJ?3X=n4#~vc=@gD}-MK{n-|4BgFK_02Uom%H
zwvEj4o{Q2!%gfVQ4i_D5EQ!*ybzKyY(chM_>G=t5K_xG<T%Y&Nl5ez?-0I{n`a9~b
znD$ABzxS-d(KJJ+jXY0Rw7qd%@K?oD=}0B#V#d#0KZQh%m+g1>dEsHV<GRp|-RjpK
zc0M#(7clRk)-k4ewme$`T^}1Saxrz7R_pvm)`&fze5tzemC8T1LF^uH*w%g9xOk-}
z+qMpi=vS9?%SyT%^H{FmHhsLq;hMrzmD>1o+JaW6W!7na(w^)*Pk?h1>!ZD{VlRK>
zqqbPpSv+_o85tPTL4y&P1wYb21ll6K-audP!wv$qm#-bWvVN8Nc_kArxAnf79)TiE
zT%7l|zucAMX};<1lz(RY{}}40?C@R4GTBLd+T6;`@AoD?d;a~k^a0y*vL&)}gyyIO
zbFbrFc%OTD&cQ!ZG#~DlSa;LnsUv5#tbb(^(|RsJtNzn1^-?oe?rb)0lJ;CU<D{wj
zovWPuUJ)r<LX;%f3S}z33C*icT-K^@GJDA#|Dc!q4Q-!K4m6u>Xv(?t*3QkZWtW)k
zTQ~PuyNK)UaM7gBpo|v>s@1=+74FZnSu;0i%2TU{{zhAWiL36sSj8E0e_ce&>9^-*
zuWxx*&wc1@*`sE$pFP@}Z-&fv$Ve!irRyZDJNxa;HK`xXWTZ_u#E0CO<#jxauOQ+`
z_7jfdN2@=vqlT~ZMX_hEm>C#qxEY8$#?C1<Ex#z$EwiY&B)BvQ8vc-q_GnmcuyD9Y
zUEZdG!p#{Tyi>Q`@Mw03<8WN<Aiym+L8vo#irJ0rz7dzNrd+?7v#Nff{lfo;&TVO)
zw(?ta`md$`7uzqoTwI>ap|;^*;j;L5r+1#WeZKSjyy|cNzP``bXIN4ge=PFM+(l_|
zZW6^Zl9EDVsX7m$7`%6!``9@{r+5v=4PHs3Fw1Lf8_pF5A7itdYS`i1H>XEno5d%Y
z?UKLVzW-WP(w3-`9yK-OXi)j;s8umngCjSG1^3>alb5kQboIJONhkeRZF%~JYy7JY
z$GclRyK5GcmNnb))U4f7hb0qlWL@!^RqC!AdQ#}r%+#&B)<*4`Vct7w!D)fGi|=Mn
z+qNvaboT`f(^?}9>9&QDH&;ci&t{qZSw-^gs}4UY4S9#%9hWP`+8i%y+*p<RT>j>+
z%z~P822c9#Mr_HLyngP<JIr6%O+!O_y;dbe?+QFw618m4%JLs-e05h>alEwpb8)+|
ztWD<=zB{b5p7Hwz?`6xgl*^i|b-$-D#%$*d9`{B@9{&4YT9upqtY1E>Hd3CoB_mC8
z(cU-76S&h3YX#5hkdb+Jz=wOM=$ykHO^JI>7q%-_DoGxFwwr6)N7m!hxw>v7c2sz-
zcp$egX~y@SlIs;2Gx~e=PFB2Ip~@<5pcYYkTEO1wz^2g5D<+5IYJ;xLn*Yz~a_W+q
zVkfm8D&?JXRXg@t_%hGI=09hq%Vng_er(OE<)xG~^Wm!J6V3W9E);xceK`Hq;i=}U
z+4l+*IqTVn>Ap2sxL7RBzwY46xXR_PH+DX5k}#{*a@rfG6{$OYYn82)x%2ebLHyq<
zPxW5i@HOkzv|k<~=bhQKQbf<D?YQB0`RtVN>f65TJkMC0KgLPCDVQ*?(fQTC8@^Mw
z2UOk4E?GZ|Ek4~zbWYIbWs`J1$5!04j$0YFm9PA!_M^X_*-~#jzW(4Z|BwAgeCHg|
zR{FAl>#}tZYxAN#mrwA|U-9kB_p1Ih^UQb4<2sMlx9W@ezd88d_-t*({9BikdSjjC
zqn#RJkF%#pFEc%qJf-HaWZ+g+>BH_jZhZM=&UV3l?>7FG68;-(zvLdRYkV*HOZfc3
z+8@l!+?P7Os9#$5^NK&mTW7svnNl5sah1FJ4<^0X^2Pd`$lsHPj9I=Ld?}J&xah`2
znTc_V_bYELs+ODcBCYckZ_lRCo{gp|H>`|un(uX6w*O0=9?`w8jnhXc*KpRNiTsRj
z6xvv1Yo|Q%Op*!`P>I}Cxx;3y$CYAt-LeH?`wI?>%{4F-zr=Oe(EF8fddbO)-aUGc
zlb=_|6xPHXZNIX9LRtDt%bIyQfv=9MevFB8m=kwY^Oc}XLxo<tqTFHb9j-eJzRvfw
ztMUD7KIsqIfwhwvtY?B&R{vpQV6en2qk}7pOHy-*8B>@$>9p5j2a&e(H&*9v-F7YO
zs_Pw7Ct;T(t>P2J#Dnw$Ue?W$yF5`vSnaaYJ+JU4ZutZ4Lh6|UD;<wS&N2Rc{{PuI
zJNMM@*~fT*F=x60f3H%G$im2$vrIXn3tScIPuZxQ=S;cvYl6eJ)5kp)+)9_b=W|!E
ztJ<n)O3?e_ZYyW42ot{}YvycdI1o6W`}P)AS?i1_K4bPog^PQI@3<>Uvc<mJbw%2}
zyKJX<uu_*Wlc{uS5?6lh$sO(sE?)cHU*hK5UbW3~LR00oP21`ssx>r=3w#pHQ}s5m
zc!%lm{s@VvIJv0n>;5TeF03I77+jf_xG#`75^y|Rr13GMSMIuux=m}3$(vX@zRjvw
zyY=D*-nEyD4`+Nk!<&9(=At|Si$BLrSKf|Ypy$>oa%Ix~;tlMUUmpaXX6ih*=+P#r
znOeeAi+}4o%nMED^wN5kypb<eRJ*wOl(ykMuB$BXR0@{u`?96MZ2!WU`X#!oXLS}?
zuguV8J*&4Uifff!_M52xEU49ju4qORI}-!LZWaaxBg`m;7W+P#dC+#H3P$sCYViI1
zO93MP{4_<lix1ohF%dSGF)({oV=UpTGHb?!T+TH9>fH{9cTI^dzODPq{^z4^HM<||
zKinnH=iW?dX34xXPyYLy_chPw+`eC5_m8Q7;kcXBZ8M`A=N4r>mMqh`qHDr2Gko5c
zMb}$G_7*xBSRcHgx~9zbrCeOk?{&X4e%V!f-Az#VBfQWi*MCl0$+Om$I~xNQuCAKc
zabxCxg{7;l_Hj5*=a0VY&^u?Mkn{agwtde_nm$IHSh#M&lhEatZnqqYsCl_NXXT3*
z!B1ExZHRk6uiN|K52wC4aZ7A0v&{WCBlY)id2*Uw(n){Bq1`ygY4Zke4Ygmq_m&rT
zozdLAW3FV*vkBZ!5*5~;*f;n3`AfxIPeM*uTW-0a(h#s(zs)M+b*}XM`NFII3;S-U
zKG-U?<Gt<PAH6eYOv}^Y=Z<U&trs@?7VXPjUHN8l#zNN{(aUV#y}RGg_v`sP1*JU^
zb)Q<+9zA<kV_HsgwM2JD!7}}VRKcdmhx@ge|3(;m;i%v&zflzN$!)^Dzl@Kiwb^q1
zrEQsf>cqx*{!9_;nAD6N4cW9d`Z(O0lbZ1&P4lblv+L0YvklL<2~9e1<d)RCMu`Gh
z{-g)(8qV>3?$_VU+IaSTZkl!Q8I4Arj1y-)A9+tZwq)(hC!a*-e=(hJGH-#<a@*d=
z+Y6?rN^07zOgy^9a&g;l9r?{JioeBCla<>xj_<b23=F3@v5r4tCM)o?5{|~k>!8a4
z0{{G?IhHVbu^zEbN?oA4g;j7vg=VN;kVb^-#rTCsq%=4uvwak;VYttnUJ@{~@Xzy}
z{zuLh?>xCgyk{RWcsze|b=mit^zzNSE%*Pc`OT-$<|=r$!{m~X=*6ZEk4=vHZ3W&3
zHZ<IhG1hWAFr&em>6mLpp?^!qZzeeo4}&YS^RGB5oVqagQg4h(`ia$*s*}#Hp15S+
zM#b#anxVmBDT^0K6mF6WH@&*&xWU!eIsewTs>bPV?EbE6STX6>szVnJv3`!0*6#Wk
z5_fX)k@=T47u9*1h3wPkvb_`Aw(UrEu%TM$;)JwqVO<k=F7+(abTiTy=i-X&wfH4r
z!PoxiOB%m!zo&1d3XkuuX@;qXee!3mTF2>OwB^>GT2W2kLsd6tM@_zeYT}+XPn9Gu
z2W*Ym6LabLn@UNupAV+>%J|1f%GFxl-K4(U%gI|j_3FO8xx(TfCNT26PXF|9?&0pp
zqYtaZpL+M`H}f<`oipr^tUY}~YqHk*8GS~x8RyAy*3Fsxz<G6pyO5fpYC_Z@9y#L@
z%hcw?GZ!bO-`ce-Kl}I;hji7cE#5oM$j)ATt8`DoHl`YPRoD7n)70xcrNJ+!?KV9>
z-Q@6xmA6(+oAE|k`1YoSGQBG&C4G2O>-6+pjB9B30kcI`lQ%~$4dCc;Tj|xFmC<9j
z+jzT=sr<~z=0C4kC@tVSr>W-b|8j!I`Zb!7#s70wdK_TtU8THmuYKO0IgZxT&drRP
zUA}EuV7T_J_rW~#UWvw?-DC0fU-+v->W;4Bf1gIbusu*?XLq5a`0&x^(q~_r-SWMB
z&o_jt&F-)$Piy(Ze;(E1w=#2|S#9gE`o<DB&5`}2m52K`i~jD0qZd<@BoB-3I_vhg
zfvuuLt!e&)Urhc#v|q*iX#Dl_-~12n9V7TAE>k>Fc5%mM_QILB-yYm8!!_CC)%|d@
zw=z2(J#Tnp8FiC;%jQ&;8``JmE~vfOXw4bpZoc#2PU#~%HC|{i?cm(rzxaz-tWR*p
zpCt)L&+sL$d)|K2iE~E*gWiJv2Q!Ycu3cd%?sVf+*8Wz*25uL14d%079=%dI8M^4Y
zjZ#&|q3aInNfM0pSw{ub_fCHi`@ubmM_5igt#g^pqj=O@nR%}kynDulfq_Aufq@~t
zC^01`RW~^&Q#Uy$vADQ^oErd)0x)j?c#3lFKO>U}GZzB`2M2@Jrm5}>3=9mgQ|~!J
z955DOc<Tsa__~HT>U#RQ=?8eDYP&vlMiA)OqEH3~1`&`-C|=U2$-uw>*XHZ!=jrAe
z9HQszhHL`pZY7Wgn8hq04j2nC{B@iKt@!Zi;0CJ(5g;A=pgJHSh~ijutAs&%z!;?Q
zC8|c~1q>JlWHhCuEoNX~xC*_m0j!^afniG{=)wdn_k+Q1g;FtubkWega`ay7H4!ET
zh5{A_22k`P3<O06J_FHPbpb{B6_t*qB^jxCC7H>HgPqZBeEue)cohQ!!)*r0rJV>H
z%UDp>8NkB86Xpl(K44i{bvK5IfgzBEfk78$7&u50Sh1St4o=;n#i>Oe`MIe+`T5zU
z1=x+2c@%s5C@TZQFD}T0FwFEVjqDs)jdra_&M7U<EK7CE%t>`jPEIW@_D#%7OiwMs
zZowJeI5*G<+CSOQ-7d?A)q-%ywQtV(d1;yHrA3J)@H^hngE_EPHuEV91A`<N1A_s~
z;oxBTDS*{z^!W5h%uC6^<?yAI2D`#QnT8coV!$le(x@Yb)pVcybey4ZuIP1_C_4i~
z9v=gP8q6fHQ};_?H4Np_JM1BAac|pQX<-J2H<}C#3MghuC}1_y6LL{3cGK=2@aa6l
z!oV<<6Fr^^jj@{Ml%HP$x^|<$H!(9WxFoTt1XoCUT-&a4nTvrzS%QH<1I3{Y7Fdmj
zCIQ!q)a25Vd>jd@MQX|FO<W8NP7(|ZS}3N@bjE5bBvU}m200pgK%n2a3p094<77Xq
z=0bxNSMh^>!y&@F;!v#Sp+vn3j{6SL^8)(qc?er-!?D_ezr;bmGzVea)p&U22`#;_
zR-nl415IwBUwVTuFfD-r=M^~Urhs&TvLuKGT^j?U85ls7OI9L^FF^~q&^1p$yF><}
z8C0>%#MBI1CV<pjM;m-aHy-_B5QOn6DHv`;G8WRS#FlzM;f8(%2f|c=R3c2pb0G)1
z`_ZpDK-eUdL4-|^*+|GjA@rj~(XB#1X&zzKzbt%K;W&35-FWmP(-Fo`$-!qlG$!yJ
zppI^p4BFYN2&=Z05MdQ0&<F(=`tg+rOKy}CVF~`jEYW?0e%c?xGO=oWmO;FNS)_sz
zG5T3k2s7U_;4>2%ArPY>9HhxTbeE$aYJ{-mWfO)iVAGIi)3Lj}8|^>^gqa7sFwG?X
zcm;H~8=-CTN7ys92g4rB!o!)EE&u3twxVs-M%d}xPuNcE;f21Y7GdVQg@nz-vceYK
z(dY{p5f(gJLWBj7qy#CYu=@ag`53~2qsuWYfYlPj4E&<|1bw*&!lJr07#3k9SBOWj
zdjWk49bt0IdJK~>6BEQ}>=Wwfo<N_vL|9e637=I2>QZO489Rir*Ei!c7931PDtPAa
z&^>@Yu8*)udIyG0==}$zup{6h^s#1yHScy}Sc7CN)I$&oPuYh)=!h`ibq^8d;~s)U
m53fwLaTJ6_xA(&vhPXym0=!w-KpG4g3>gFj85rUYfOr7Y2S`i+

literal 0
HcmV?d00001

diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000..bd9ee87db3
--- /dev/null
+++ b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Sat Nov 18 15:06:47 CET 2017
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-all.zip
diff --git a/tensorflow/examples/android/gradlew b/tensorflow/examples/android/gradlew
new file mode 100644
index 0000000000..9d82f78915
--- /dev/null
+++ b/tensorflow/examples/android/gradlew
@@ -0,0 +1,160 @@
+#!/usr/bin/env bash
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS=""
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn ( ) {
+    echo "$*"
+}
+
+die ( ) {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MINGW* )
+    msys=true
+    ;;
+esac
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=$((i+1))
+    done
+    case $i in
+        (0) set -- ;;
+        (1) set -- "$args0" ;;
+        (2) set -- "$args0" "$args1" ;;
+        (3) set -- "$args0" "$args1" "$args2" ;;
+        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
+function splitJvmOpts() {
+    JVM_OPTS=("$@")
+}
+eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
+JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
+
+exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
diff --git a/tensorflow/examples/android/gradlew.bat b/tensorflow/examples/android/gradlew.bat
new file mode 100644
index 0000000000..8a0b282aa6
--- /dev/null
+++ b/tensorflow/examples/android/gradlew.bat
@@ -0,0 +1,90 @@
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS=
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto init
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto init
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:init
+@rem Get command-line arguments, handling Windowz variants
+
+if not "%OS%" == "Windows_NT" goto win9xME_args
+if "%@eval[2+2]" == "4" goto 4NT_args
+
+:win9xME_args
+@rem Slurp the command line arguments.
+set CMD_LINE_ARGS=
+set _SKIP=2
+
+:win9xME_args_slurp
+if "x%~1" == "x" goto execute
+
+set CMD_LINE_ARGS=%*
+goto execute
+
+:4NT_args
+@rem Get arguments from the 4NT Shell from JP Software
+set CMD_LINE_ARGS=%$
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
-- 
GitLab


From f0beb65189379b62d12bd751a5cdd0686279f3c4 Mon Sep 17 00:00:00 2001
From: ManHyuk <manhyuk@kw.ac.kr>
Date: Mon, 11 Dec 2017 05:15:54 +0900
Subject: [PATCH 0843/1225] Fix typos (#14904)

* fix typos
---
 tensorflow/compiler/xla/service/layout_assignment.cc | 2 +-
 tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h   | 4 ++--
 tensorflow/core/profiler/internal/tfprof_node.cc     | 4 ++--
 tensorflow/core/profiler/tfprof_log.proto            | 2 +-
 tensorflow/stream_executor/dnn.h                     | 2 +-
 tensorflow/tools/ci_build/builds/pip.sh              | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 7eda7c2284..8bd0ce982d 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -1303,7 +1303,7 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints,
     TF_RET_CHECK(LayoutUtil::HasLayout(instruction->shape()));
   }
 
-  // Copy the root instrucion's result if the it does not match the result
+  // Copy the root instruction's result if the it does not match the result
   // layout constraint
   if (constraints.ResultLayout() != nullptr &&
       !constraints.ResultLayout()->MatchesLayoutInShape(
diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index bdb5e01538..8066889078 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -108,7 +108,7 @@ enum {
  * The type of operations that can be added to a model.
  */
 enum {
-  /** Adds two tensors, elment-wise.
+  /** Adds two tensors, element-wise.
    *
    * Takes two input tensors of identical type and compatible dimensions. The
    * output is the sum of both input tensors, optionally modified by an
@@ -743,7 +743,7 @@ enum {
    */
   ANEURALNETWORKS_MAX_POOL_2D = 17,
 
-  /** Multiplies two tensors, elment-wise.
+  /** Multiplies two tensors, element-wise.
    *
    * Takes two input tensors of identical type and compatible dimensions. The
    * output is the product of both input tensors, optionally modified by an
diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc
index 5cd1050bcc..7654eba0c8 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node.cc
@@ -80,10 +80,10 @@ void ExecStep::AddTimeStats(const string& dev, const NodeExecStats& step_stat) {
 
 void ExecStep::AddMemoryStats(const string& dev,
                               const NodeExecStats& step_stat) {
-  if (exec_.memory_intialized()) {
+  if (exec_.memory_initialized()) {
     return;
   }
-  exec_.set_memory_intialized(true);
+  exec_.set_memory_initialized(true);
 
   int accelerator_allocator_cnt = 0;
   for (const auto& mem : step_stat.memory()) {
diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto
index b49bdf64ac..f258fd1246 100644
--- a/tensorflow/core/profiler/tfprof_log.proto
+++ b/tensorflow/core/profiler/tfprof_log.proto
@@ -134,7 +134,7 @@ message ExecProfile {
   // The total number of bytes currently allocated by the allocator if >0.
   int64 allocator_bytes_in_use = 15;
 
-  bool memory_intialized = 16;
+  bool memory_initialized = 16;
 }
 
 message ExecTime {
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 0d2cd4a9f2..73b96de438 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -1132,7 +1132,7 @@ class DnnSupport {
   //    space in order to speed up the convolution operation.
   //  algorithm: an integer to specify which algorithm should be used for the
   //    operation. kDefaultAlgorithm means the system will pick an algorithm
-  //    by default. The coding of the algorithm is be interpretted by the
+  //    by default. The coding of the algorithm is be interpreted by the
   //    underlying implementation.
   //  output_profile_result: the output profile result for this call. The
   //    profiling is only enabled when this is not nullptr.
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index f5764531a0..c5756cb30d 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -343,7 +343,7 @@ do_clean_virtualenv_smoke_test() {
   then
     echo "Smoke test of tensorflow install in clean virtualenv PASSED."
   else
-    echo "Smoke test of tensroflow install in clean virtualenv FAILED."
+    echo "Smoke test of tensorflow install in clean virtualenv FAILED."
     return 1
   fi
 
-- 
GitLab


From c767489e18550ea1b1be2675a4fe78841d617c05 Mon Sep 17 00:00:00 2001
From: arixlin <linmingzxx@gmail.com>
Date: Sun, 10 Dec 2017 12:24:20 -0800
Subject: [PATCH 0844/1225] mobilenet forzen (#14823)

---
 tensorflow/contrib/lite/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index b9828f7d31..2fb40070cb 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -167,6 +167,7 @@ graphviz, or [in tensorboard](https://codelabs.developers.google.com/codelabs/te
 This frozen Graphdef is now ready to be converted to flatbuffer format (.lite) for use on Android or iOS.  On Android users have the flexibility to use either the float or quantized versions of the frozen graphdef, if available, using the Tensorflow Optimizing Converter tool.
 
 Here is a sample command line to convert the frozen Graphdef to '.lite' format for  The Tensorflow Optimizing Converter supports both float and quantized models, however, different configuration parameters are needed depending on whether a FLOAT or QUANTIZED mode is being used.
+(Here is a link to the pb [file](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz)).
 
 ```
 bazel build tensorflow/contrib/lite/toco:toco
-- 
GitLab


From 0502939bdc070804477b9a08658aa9fdca2e27c0 Mon Sep 17 00:00:00 2001
From: Chris Filo Gorgolewski <krzysztof.gorgolewski@gmail.com>
Date: Sun, 10 Dec 2017 12:56:12 -0800
Subject: [PATCH 0845/1225] Improve variance_scaling_initializer description
 (#14781)

* Improve variance_scaling_initializer description

Added mention of the "MSRA initializer" atlas
---
 tensorflow/contrib/layers/python/layers/initializers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/layers/python/layers/initializers.py b/tensorflow/contrib/layers/python/layers/initializers.py
index b12a882d9a..51610f21b2 100644
--- a/tensorflow/contrib/layers/python/layers/initializers.py
+++ b/tensorflow/contrib/layers/python/layers/initializers.py
@@ -79,7 +79,8 @@ def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False,
   ```
 
   * To get [Delving Deep into Rectifiers](
-     http://arxiv.org/pdf/1502.01852v1.pdf), use (Default):<br/>
+     http://arxiv.org/pdf/1502.01852v1.pdf) (also know as the "MSRA 
+     initialization"), use (Default):<br/>
     `factor=2.0 mode='FAN_IN' uniform=False`
   * To get [Convolutional Architecture for Fast Feature Embedding](
      http://arxiv.org/abs/1408.5093), use:<br/>
-- 
GitLab


From ec57ca65c46a45a24454c6a2d33bcce379d56627 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 10 Dec 2017 16:13:38 -0600
Subject: [PATCH 0846/1225] Remove `non-fused` version of `adjust_saturation`
 as GPU kernel already exists (#14794)

* Remove `non-fused` version of `adjust_saturation` as GPU kernel already exists

In the existing implementation for `adjust_saturation` the non-fused version
was still in place. As the non-fused is for non-GPU support of `adjust_saturation`
and GPU kernel already exists now (See commit https://github.com/tensorflow/tensorflow/commit/25c4f279402c3f9516314ee40ee398fc57ffb78e#diff-b53c223158b7c4fd248ef581da6566c2), it makes sense to remove the non-fused version.

In addition, with the removal of non-fused implementation of `adjust_saturation`,
now it is possible to provide batch support (in 4-D instead of previous 3-D).
This resolves issue raised in 8926.

This fix removed non-fused version of `adjust_saturation` and added additional
test cases for batch support.

Note: In PR 14187, non-fused version of `adjust_hue` has been removed so
batch support for `adjust_hue` has been enabled as well. This PR also
adds additional test cases for batch support of `adjust_hue`.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Add batch support for `adjust_saturation` and ``adjust_hue``

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Update docstring for `random_hue`

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/ops/image_ops_impl.py | 29 ++++--------------------
 tensorflow/python/ops/image_ops_test.py | 30 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 4e77ef8fcf..46022e2e7f 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1210,7 +1210,7 @@ def random_hue(image, max_delta, seed=None):
       set_random_seed for its interaction with the graph-level random seed.
 
   Returns:
-    3-D float tensor of shape `[height, width, channels]`.
+    Adjusted image(s), same shape and DType as `image`.
 
   Raises:
     ValueError: if `max_delta` is invalid.
@@ -1317,30 +1317,9 @@ def adjust_saturation(image, saturation_factor, name=None):
     orig_dtype = image.dtype
     flt_image = convert_image_dtype(image, dtypes.float32)
 
-    # TODO(zhengxq): we will switch to the fused version after we add a GPU
-    # kernel for that.
-    fused = os.environ.get('TF_ADJUST_SATURATION_FUSED', '')
-    fused = fused.lower() in ('true', 't', '1')
-
-    if fused:
-      return convert_image_dtype(
-          gen_image_ops.adjust_saturation(flt_image, saturation_factor),
-          orig_dtype)
-
-    hsv = gen_image_ops.rgb_to_hsv(flt_image)
-
-    hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
-    saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
-    value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])
-
-    saturation *= saturation_factor
-    saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0)
-
-    hsv_altered = array_ops.concat([hue, saturation, value], 2)
-    rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)
-
-    return convert_image_dtype(rgb_altered, orig_dtype)
-
+    return convert_image_dtype(
+        gen_image_ops.adjust_saturation(flt_image, saturation_factor),
+        orig_dtype)
 
 def decode_image(contents, channels=None, name=None):
   """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`,
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index e18a47ff8a..f320b52b09 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -281,6 +281,21 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testBatchAdjustHue(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    delta = 0.25
+    y_data = [13, 0, 11, 226, 54, 221, 234, 8, 92, 1, 217, 255]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.adjust_hue(x, delta)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def _adjustHueNp(self, x_np, delta_h):
     self.assertEqual(x_np.shape[-1], 3)
     x_v = x_np.reshape([-1, 3])
@@ -632,6 +647,21 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testBatchSaturation(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    saturation_factor = 0.5
+    y_data = [6, 9, 13, 140, 180, 226, 135, 121, 234, 172, 255, 128]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.adjust_saturation(x, saturation_factor)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def _adjust_saturation(self, image, saturation_factor):
     image = ops.convert_to_tensor(image, name="image")
     orig_dtype = image.dtype
-- 
GitLab


From 373b4251a10d5b8622a28d89bebba457acfa1f70 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 10 Dec 2017 15:11:07 -0800
Subject: [PATCH 0847/1225] Add a library for the cwise ops headers and common
 source.

PiperOrigin-RevId: 178554846
---
 tensorflow/core/kernels/BUILD | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 94dbd53607..a3262bf06a 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -6424,3 +6424,31 @@ filegroup(
     ),
     visibility = ["//tensorflow:__subpackages__"],
 )
+
+# Library to link with when compiling the cwise_op kernels directly,
+# e.g. for selective registration.
+# should not be linked by projects that also link the cwise_op library.
+cc_library(
+    name = "cwise_lib",
+    srcs = [
+        "cwise_ops_common.cc",
+        "meta_support.cc",
+        "quantization_utils.cc",
+    ],
+    hdrs = [
+        "cwise_ops.h",
+        "cwise_ops_common.h",
+        "cwise_ops_gpu_common.cu.h",
+        "cwise_ops_gpu_gradients.cu.h",
+        "cwise_ops_gradients.h",
+        "meta_support.h",
+        "quantization_utils.h",
+    ],
+    deps = [
+        ":bounds_check",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//third_party/eigen3",
+        "@gemmlowp//:gemmlowp",
+    ],
+)
-- 
GitLab


From 6b2f1d6394f5333fae172f898406f92a86874828 Mon Sep 17 00:00:00 2001
From: FirefoxMetzger <S.Wallkoetter@gmx.de>
Date: Mon, 11 Dec 2017 01:58:36 +0100
Subject: [PATCH 0848/1225] update how_tos/reading_data to use Dataset API
 (#14751)

* updated reading_data to use Dataset

Since the Dataset API moved from .contrib.data into .data (core) update
the MNIST example to use Dataset over queues.
---
 .../api_guides/python/reading_data.md         |  27 ++--
 .../reading_data/fully_connected_reader.py    | 125 ++++++++----------
 2 files changed, 76 insertions(+), 76 deletions(-)

diff --git a/tensorflow/docs_src/api_guides/python/reading_data.md b/tensorflow/docs_src/api_guides/python/reading_data.md
index 4594887349..f316cce953 100644
--- a/tensorflow/docs_src/api_guides/python/reading_data.md
+++ b/tensorflow/docs_src/api_guides/python/reading_data.md
@@ -175,14 +175,25 @@ For example,
 [`tensorflow/examples/how_tos/reading_data/convert_to_records.py`](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/convert_to_records.py)
 converts MNIST data to this format.
 
-To read a file of TFRecords, use
-@{tf.TFRecordReader} with
-the @{tf.parse_single_example}
-decoder. The `parse_single_example` op decodes the example protocol buffers into
-tensors. An MNIST example using the data produced by `convert_to_records` can be
-found in
-[`tensorflow/examples/how_tos/reading_data/fully_connected_reader.py`](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py),
-which you can compare with the `fully_connected_feed` version.
+The recommended way to read a TFRecord file is with a @{tf.data.TFRecordDataset}, [as in this example](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py):
+
+``` python
+    dataset = tf.data.TFRecordDataset(filename)
+    dataset = dataset.repeat(num_epochs)
+
+    # map takes a python function and applies it to every sample
+    dataset = dataset.map(decode)
+```
+
+To acomplish the same task with a queue based input pipeline requires the following code 
+(using the same `decode` function from the above example): 
+
+``` python
+  filename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs)
+  reader = tf.TFRecordReader()
+  _, serialized_example = reader.read(filename_queue)
+  image,label = decode(serialized_example)
+```
 
 ### Preprocessing
 
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
index a9ed02dd1a..9db8835d92 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
@@ -45,9 +45,7 @@ TRAIN_FILE = 'train.tfrecords'
 VALIDATION_FILE = 'validation.tfrecords'
 
 
-def read_and_decode(filename_queue):
-  reader = tf.TFRecordReader()
-  _, serialized_example = reader.read(filename_queue)
+def decode(serialized_example):
   features = tf.parse_single_example(
       serialized_example,
       # Defaults are not specified since both keys are required.
@@ -60,22 +58,26 @@ def read_and_decode(filename_queue):
   # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
   # [mnist.IMAGE_PIXELS].
   image = tf.decode_raw(features['image_raw'], tf.uint8)
-  image.set_shape([mnist.IMAGE_PIXELS])
+  image.set_shape((mnist.IMAGE_PIXELS))
 
+  # Convert label from a scalar uint8 tensor to an int32 scalar.
+  label = tf.cast(features['label'], tf.int32)
+  
+  return image, label
+
+def augment(image, label):
   # OPTIONAL: Could reshape into a 28x28 image and apply distortions
   # here.  Since we are not applying any distortions in this
   # example, and the next step expects the image to be flattened
   # into a vector, we don't bother.
+  return image, label
 
+def normalize(image, label):
   # Convert from [0, 255] -> [-0.5, 0.5] floats.
   image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
 
-  # Convert label from a scalar uint8 tensor to an int32 scalar.
-  label = tf.cast(features['label'], tf.int32)
-
   return image, label
 
-
 def inputs(train, batch_size, num_epochs):
   """Reads input data num_epochs times.
 
@@ -91,31 +93,32 @@ def inputs(train, batch_size, num_epochs):
       in the range [-0.5, 0.5].
     * labels is an int32 tensor with shape [batch_size] with the true label,
       a number in the range [0, mnist.NUM_CLASSES).
-    Note that an tf.train.QueueRunner is added to the graph, which
-    must be run using e.g. tf.train.start_queue_runners().
+
+    This function creates a one_shot_iterator, meaning that it will only iterate
+    over the dataset once. On the other hand there is no special initialization
+    required.
   """
   if not num_epochs: num_epochs = None
   filename = os.path.join(FLAGS.train_dir,
                           TRAIN_FILE if train else VALIDATION_FILE)
 
   with tf.name_scope('input'):
-    filename_queue = tf.train.string_input_producer(
-        [filename], num_epochs=num_epochs)
+    # TFRecordDataset opens a protobuf and reads entries line by line
+    # could also be [list, of, filenames]
+    dataset = tf.data.TFRecordDataset(filename)
+    dataset = dataset.repeat(num_epochs)
 
-    # Even when reading in multiple threads, share the filename
-    # queue.
-    image, label = read_and_decode(filename_queue)
+    # map takes a python function and applies it to every sample
+    dataset = dataset.map(decode)
+    dataset = dataset.map(augment)
+    dataset = dataset.map(normalize)
 
-    # Shuffle the examples and collect them into batch_size batches.
-    # (Internally uses a RandomShuffleQueue.)
-    # We run this in two threads to avoid being a bottleneck.
-    images, sparse_labels = tf.train.shuffle_batch(
-        [image, label], batch_size=batch_size, num_threads=2,
-        capacity=1000 + 3 * batch_size,
-        # Ensures a minimum amount of shuffling of examples.
-        min_after_dequeue=1000)
+    #the parameter is the queue size
+    dataset = dataset.shuffle(1000 + 3 * batch_size)
+    dataset = dataset.batch(batch_size)
 
-    return images, sparse_labels
+    iterator = dataset.make_one_shot_iterator()
+  return iterator.get_next()
 
 
 def run_training():
@@ -124,16 +127,16 @@ def run_training():
   # Tell TensorFlow that the model will be built into the default Graph.
   with tf.Graph().as_default():
     # Input images and labels.
-    images, labels = inputs(train=True, batch_size=FLAGS.batch_size,
-                            num_epochs=FLAGS.num_epochs)
+    image_batch, label_batch = inputs(train=True, batch_size=FLAGS.batch_size,
+                               num_epochs=FLAGS.num_epochs)
 
     # Build a Graph that computes predictions from the inference model.
-    logits = mnist.inference(images,
+    logits = mnist.inference(image_batch,
                              FLAGS.hidden1,
                              FLAGS.hidden2)
 
     # Add to the Graph the loss calculation.
-    loss = mnist.loss(logits, labels)
+    loss = mnist.loss(logits, label_batch)
 
     # Add to the Graph operations that train the model.
     train_op = mnist.training(loss, FLAGS.learning_rate)
@@ -143,47 +146,33 @@ def run_training():
                        tf.local_variables_initializer())
 
     # Create a session for running operations in the Graph.
-    sess = tf.Session()
-
-    # Initialize the variables (the trained variables and the
-    # epoch counter).
-    sess.run(init_op)
-
-    # Start input enqueue threads.
-    coord = tf.train.Coordinator()
-    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
-
-    try:
-      step = 0
-      while not coord.should_stop():
-        start_time = time.time()
-
-        # Run one step of the model.  The return values are
-        # the activations from the `train_op` (which is
-        # discarded) and the `loss` op.  To inspect the values
-        # of your ops or variables, you may include them in
-        # the list passed to sess.run() and the value tensors
-        # will be returned in the tuple from the call.
-        _, loss_value = sess.run([train_op, loss])
-
-        duration = time.time() - start_time
-
-        # Print an overview fairly often.
-        if step % 100 == 0:
-          print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
+    with tf.Session() as sess:
+      # Initialize the variables (the trained variables and the
+      # epoch counter).
+      sess.run(init_op)
+      try:
+        step = 0
+        while True: #train until OutOfRangeError
+          start_time = time.time()
+
+          # Run one step of the model.  The return values are
+          # the activations from the `train_op` (which is
+          # discarded) and the `loss` op.  To inspect the values
+          # of your ops or variables, you may include them in
+          # the list passed to sess.run() and the value tensors
+          # will be returned in the tuple from the call.
+          _, loss_value = sess.run([train_op, loss])
+
+          duration = time.time() - start_time
+
+          # Print an overview fairly often.
+          if step % 100 == 0:
+            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
                                                      duration))
-        step += 1
-    except tf.errors.OutOfRangeError:
-      print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
-    finally:
-      # When done, ask the threads to stop.
-      coord.request_stop()
-
-    # Wait for threads to finish.
-    coord.join(threads)
-    sess.close()
-
-
+          step += 1
+      except tf.errors.OutOfRangeError:
+        print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
+      
 def main(_):
   run_training()
 
-- 
GitLab


From e17ae378063b46c894a8c193823f029d7d87de81 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 10 Dec 2017 19:44:54 -0600
Subject: [PATCH 0849/1225] Fix several potential memory leaks (#14816)

* Fix several potential memory leaks

This fix fixes several potential memory leaks, mostly
caused by error return without proper deleting.

Note: The original issue was raised by @orpillar, thanks!

This fix fixes 14800.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/c/c_api.cc                                          | 2 ++
 tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc           | 3 +--
 .../platform/profile_utils/android_armv7a_cpu_utils_helper.cc  | 3 ++-
 tensorflow/tools/proto_text/gen_proto_text_functions.cc        | 1 +
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 13253ced49..8a85eba5fc 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -579,6 +579,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
       status->status = InvalidArgument(
           "invalid string tensor encoding (string #", i, " of ",
           srcarray.size(), "): ", status->status.error_message());
+      delete[] base;
       return nullptr;
     }
     dst += consumed;
@@ -588,6 +589,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
     status->status = InvalidArgument(
         "invalid string tensor encoding (decoded ", (dst - base),
         " bytes, but the tensor is encoded in ", size, " bytes");
+    delete[] base;
     return nullptr;
   }
 
diff --git a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
index be1fa22c69..3c31016732 100644
--- a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
@@ -161,7 +161,7 @@ Status SnappyOutputBuffer::Deflate() {
   }
 
   // Write length of compressed block to output buffer.
-  char* compressed_length_array = new char[4];
+  char compressed_length_array[4];
   std::fill(compressed_length_array, compressed_length_array + 4, 0);
   for (int i = 0; i < 4; i++) {
     // Little endian.
@@ -173,7 +173,6 @@ Status SnappyOutputBuffer::Deflate() {
   TF_RETURN_IF_ERROR(AddToOutputBuffer(output.data(), output.size()));
   next_in_ += avail_in_;
   avail_in_ = 0;
-  delete[] compressed_length_array;
 
   return Status::OK();
 }
diff --git a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
index fb1955edde..12dc9c58b3 100644
--- a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
+++ b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
@@ -118,9 +118,10 @@ int64 AndroidArmV7ACpuUtilsHelper::ReadCpuFrequencyFile(
   const int retval = fscanf(fp, "%lld", &freq_in_khz);
   if (retval < 0) {
     LOG(WARNING) << "Failed to \"" << file_path << "\"";
+    fclose(fp);
     return INVALID_CPU_FREQUENCY;
   }
-  pclose(fp);
+  fclose(fp);
   return freq_in_khz * 1000;  // The file contains cpu frequency in khz
 }
 
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions.cc b/tensorflow/tools/proto_text/gen_proto_text_functions.cc
index ecb29a65a0..f0bb59acf8 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions.cc
@@ -132,6 +132,7 @@ int MainImpl(int argc, char** argv) {
       FILE* f = fopen(path.c_str(), "w");
       if (f == nullptr) return -1;
       if (fwrite(data.c_str(), 1, data.size(), f) != data.size()) {
+        fclose(f);
         return -1;
       }
       if (fclose(f) != 0) {
-- 
GitLab


From 22e0870f625adf4045bda8cc6c3879dea7736edb Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Sun, 10 Dec 2017 17:48:15 -0800
Subject: [PATCH 0850/1225] Make control_flow_op_py_test "medium" to avoid ASAN
 timeouts.

It takes longer to run now that it runs with and without the C API
enabled.

PiperOrigin-RevId: 178561206
---
 tensorflow/python/kernel_tests/BUILD | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index feee202829..2c73ecd8be 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1231,7 +1231,9 @@ cuda_py_test(
 
 cuda_py_test(
     name = "control_flow_ops_py_test",
-    size = "small",
+    # TOOD(b/70473603): change this back to "small" once the C API is
+    # permanently enabled
+    size = "medium",
     srcs = ["control_flow_ops_py_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
-- 
GitLab


From c9cc0551bac49a6bd18bbb4b2e14850854a0b0c8 Mon Sep 17 00:00:00 2001
From: Peng Yu <yupbank@users.noreply.github.com>
Date: Sun, 10 Dec 2017 21:11:51 -0500
Subject: [PATCH 0851/1225] improve int data type check in session (#14300)

* add data type check in session
---
 tensorflow/python/client/session.py      | 18 +++++++++++++-----
 tensorflow/python/client/session_test.py |  6 ++++++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index 759c36ad72..cd71f1a54e 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -126,6 +126,12 @@ _REGISTERED_EXPANSIONS = [
      lambda feed: [feed])]
 # pylint: enable=g-long-lambda
 
+
+def _convert_to_numpy_obj(numpy_dtype, obj):
+  """Explicitly convert obj based on numpy type except for string type."""
+  return numpy_dtype(obj) if numpy_dtype is not object else str(obj)
+
+
 def register_session_run_conversion_functions(tensor_type, fetch_function,
     feed_function=None, feed_function_for_partial_run=None):
   """Register fetch and feed conversion functions for `tf.Session.run()`.
@@ -1072,12 +1078,14 @@ class BaseSession(SessionInterface):
                             'strings, lists, numpy ndarrays, or TensorHandles.')
 
           subfeed_dtype = subfeed_t.dtype.as_numpy_dtype
-          if isinstance(subfeed_val,
-                        int) and subfeed_dtype(subfeed_val) != subfeed_val:
+          if isinstance(subfeed_val, int) and _convert_to_numpy_obj(
+              subfeed_dtype, subfeed_val) != subfeed_val:
             raise TypeError(
-                'Type of feed value ' + str(subfeed_val) + ' is not'
-                ' compatible with Tensor type ' + str(subfeed_dtype) + '.'
-                ' Try explicitly setting the type of the feed tensor'
+                'Type of feed value ' + str(subfeed_val) + ' with type ' +
+                str(type(subfeed_val)) +
+                ' is not compatible with Tensor type ' +
+                str(subfeed_dtype) +
+                '. Try explicitly setting the type of the feed tensor'
                 ' to a larger type (e.g. int64).')
 
           is_tensor_handle_feed = isinstance(subfeed_val,
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index e4545d287b..3d38c4a182 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -1743,6 +1743,12 @@ class SessionTest(test_util.TensorFlowTestCase):
     server = server_lib.Server.create_local_server()
     self.runTestAddFunctionToSession(server.target)
 
+  def testAutoConvertAndCheckData(self):
+    with self.test_session() as sess:
+      a = array_ops.placeholder(dtype=dtypes.string)
+      with self.assertRaisesRegexp(
+          TypeError, 'Type of feed value 1 with type <(\w+) \'int\'> is not'):
+        sess.run(a, feed_dict={a: 1})
 
 class GraphMutationTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From fd1263fb9b9a81b4c8d7e7922308146b4f57428d Mon Sep 17 00:00:00 2001
From: Zhengsheng Wei <zhengsheng.wei.cn@gmail.com>
Date: Mon, 11 Dec 2017 10:44:44 +0800
Subject: [PATCH 0852/1225] fixed bug that Dropout support_masking gets reset
 to False (#14831)

* fixed initial function of dropout
---
 tensorflow/python/keras/_impl/keras/layers/core.py      | 2 +-
 tensorflow/python/keras/_impl/keras/layers/core_test.py | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/_impl/keras/layers/core.py b/tensorflow/python/keras/_impl/keras/layers/core.py
index 517129fab0..ef9b435322 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core.py
@@ -104,13 +104,13 @@ class Dropout(tf_core_layers.Dropout, Layer):
   """
 
   def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
-    self.supports_masking = True
     # Inheritance call order:
     # 1) tf.layers.Dropout, 2) keras.layers.Layer, 3) tf.layers.Layer
     super(Dropout, self).__init__(rate=rate,
                                   noise_shape=noise_shape,
                                   seed=seed,
                                   **kwargs)
+    self.supports_masking = True
 
   def call(self, inputs, training=None):
     if training is None:
diff --git a/tensorflow/python/keras/_impl/keras/layers/core_test.py b/tensorflow/python/keras/_impl/keras/layers/core_test.py
index dd768dc268..d606662c79 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core_test.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core_test.py
@@ -47,6 +47,11 @@ class CoreLayersTest(test.TestCase):
                   'noise_shape': [3, 1]},
           input_shape=(3, 2))
 
+    # https://github.com/tensorflow/tensorflow/issues/14819
+    with self.test_session():
+      dropout = keras.layers.Dropout(0.5)
+      self.assertEqual(True, dropout.supports_masking)
+
     with self.test_session():
       testing_utils.layer_test(
           keras.layers.SpatialDropout1D,
-- 
GitLab


From dcd820616d352d1e8844b1db504af4e859176cbf Mon Sep 17 00:00:00 2001
From: Julian Niedermeier <sleighsoft@users.noreply.github.com>
Date: Mon, 11 Dec 2017 04:40:04 +0100
Subject: [PATCH 0853/1225] Allow GANEstimator get_hooks_fn to be set manually
 (#14723)

* Allow GANEstimator get_hooks_fn to be set manually
---
 .../gan/python/estimator/python/gan_estimator_impl.py      | 7 ++++++-
 .../contrib/gan/python/estimator/python/head_impl.py       | 6 ++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index 058dc1d1f8..9d14f39133 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -107,6 +107,7 @@ class GANEstimator(estimator.Estimator):
                discriminator_loss_fn=None,
                generator_optimizer=None,
                discriminator_optimizer=None,
+               get_hooks_fn=None,
                add_summaries=None,
                use_loss_summaries=True,
                config=None):
@@ -137,6 +138,10 @@ class GANEstimator(estimator.Estimator):
         work.
       discriminator_optimizer: Same as `generator_optimizer`, but for the
         discriminator updates.
+      get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a
+        list of hooks. These hooks are run on the generator and discriminator
+        train ops, and can be used to implement the GAN training scheme.
+        Defaults to `train.get_sequential_train_hooks()`.
       add_summaries: `None`, a single `SummaryType`, or a list of `SummaryType`.
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
@@ -151,7 +156,7 @@ class GANEstimator(estimator.Estimator):
               else discriminator_optimizer)
       gan_head = head_lib.gan_head(
           generator_loss_fn, discriminator_loss_fn, gopt, dopt,
-          use_loss_summaries)
+          use_loss_summaries, get_hooks_fn=get_hooks_fn)
       return _gan_model_fn(
           features, labels, mode, generator_fn, discriminator_fn, gan_head,
           add_summaries)
diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
index 204c646e19..a21358c50b 100644
--- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
@@ -71,7 +71,7 @@ class GANHead(head._Head):  # pylint: disable=protected-access
   def __init__(self, generator_loss_fn, discriminator_loss_fn,
                generator_optimizer, discriminator_optimizer,
                use_loss_summaries=True,
-               get_hooks_fn=tfgan_train.get_sequential_train_hooks(),
+               get_hooks_fn=None,
                name=None):
     """`Head` for GAN training.
 
@@ -86,10 +86,12 @@ class GANHead(head._Head):  # pylint: disable=protected-access
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
       get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list
-        of hooks.
+        of hooks. Defaults to `train.get_sequential_train_hooks()`
       name: name of the head. If provided, summary and metrics keys will be
         suffixed by `"/" + name`.
     """
+    if get_hooks_fn is None:
+      get_hooks_fn = tfgan_train.get_sequential_train_hooks()
     # TODO(joelshor): Validate inputs.
 
     if use_loss_summaries in [True, False]:
-- 
GitLab


From 441571a6e95efa45d473916fe9f78d7dc87169ae Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Sun, 10 Dec 2017 22:01:38 -0800
Subject: [PATCH 0854/1225] Instead of python, use PYTHON_BIN_PATH in pip.sh.
 (#15248)

* Instead of python, use PYTHON_BIN_PATH in pip.sh.

* Install virtualenv in py3.6 install script

* Install virtualenv in all python pip package install scripts.

* Bash style fix.
---
 tensorflow/tools/ci_build/builds/pip.sh                        | 2 +-
 tensorflow/tools/ci_build/install/install_pip_packages.sh      | 3 +++
 .../tools/ci_build/install/install_python3.5_pip_packages.sh   | 2 ++
 .../tools/ci_build/install/install_python3.6_pip_packages.sh   | 2 ++
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index c5756cb30d..82042b93c0 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -299,7 +299,7 @@ create_activate_virtualenv_and_install_tensorflow() {
   # Use the virtualenv from the default python version (i.e., python-virtualenv)
   # to create the virtualenv directory for testing. Use the -p flag to specify
   # the python version inside the to-be-created virtualenv directory.
-  python -m virtualenv -p "${PYTHON_BIN_PATH}" ${VIRTUALENV_FLAGS} \
+  ${PYTHON_BIN_PATH} -m virtualenv -p "${PYTHON_BIN_PATH}" ${VIRTUALENV_FLAGS} \
     "${VIRTUALENV_DIR}" || \
     die "FAILED: Unable to create virtualenv"
 
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index b8ed1ab767..da58ac2407 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -27,6 +27,9 @@ easy_install3 -U pip
 pip2 install wheel
 pip3 install wheel
 
+pip2 install virtualenv
+pip3 install virtualenv
+
 # Install six.
 pip2 install --upgrade six==1.10.0
 pip3 install --upgrade six==1.10.0
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 479242aa43..9881bd99c3 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -39,6 +39,8 @@ if [[ -z $pip35_version ]]; then
 fi
 
 set -e
+pip3.5 install --upgrade virtualenv
+
 # Install six.
 pip3.5 install --upgrade absl-py
 pip3.5 install --upgrade six==1.10.0
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index ec7d9bf195..1ca12c6c60 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -36,6 +36,8 @@ pip3.6 -V
 which pip3.6
 ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3
 
+pip3 install --upgrade virtualenv
+
 set -e
 # Install six.
 pip3 install --upgrade absl-py
-- 
GitLab


From 10e870116405b88ff318ca3a7861a3c45478d98f Mon Sep 17 00:00:00 2001
From: JxKing <jinxin900924@gmail.com>
Date: Mon, 11 Dec 2017 23:01:54 +0800
Subject: [PATCH 0855/1225] Elastic Average optimizer (#13012)

* easgd

* Add complete help document

Add complete help document for apply_gradients ,compute_gradients. Define a named constant.
---
 tensorflow/contrib/opt/BUILD                  |  19 +
 tensorflow/contrib/opt/__init__.py            |   3 +
 .../training/elastic_average_optimizer.py     | 345 ++++++++++++++++++
 .../elastic_average_optimizer_test.py         | 225 ++++++++++++
 4 files changed, 592 insertions(+)
 create mode 100644 tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
 create mode 100644 tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py

diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index a9a63cbce0..43fd65ea76 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -16,6 +16,7 @@ py_library(
         "__init__.py",
         "python/training/addsign.py",
         "python/training/drop_stale_gradient_optimizer.py",
+        "python/training/elastic_average_optimizer.py",
         "python/training/external_optimizer.py",
         "python/training/lazy_adam_optimizer.py",
         "python/training/moving_average_optimizer.py",
@@ -173,6 +174,24 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "elastic_average_optimizer_test",
+    srcs = ["python/training/elastic_average_optimizer_test.py"],
+    additional_deps = [
+        ":opt_py",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:variables",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:training",
+        "//tensorflow/python:ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "sign_decay_test",
     srcs = ["python/training/sign_decay_test.py"],
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 3275ad8239..2025e8b4fc 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -28,6 +28,7 @@ from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
 from tensorflow.contrib.opt.python.training.nadam_optimizer import *
 from tensorflow.contrib.opt.python.training.powersign import *
 from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import *
+from tensorflow.contrib.opt.python.training.elastic_average_optimizer import *
 # pylint: enable=wildcard-import
 
 from tensorflow.python.util.all_util import remove_undocumented
@@ -46,6 +47,8 @@ _allowed_symbols = [
     'VariableClippingOptimizer',
     'MultitaskOptimizerWrapper',
     'clip_gradients_by_global_norm',
+    'ElasticAverageOptimizer', 
+    'ElasticAverageCustomGetter'
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
new file mode 100644
index 0000000000..9941f22b1f
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
@@ -0,0 +1,345 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Wrapper optimizer for Elastic Average SGD """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import math_ops
+
+from tensorflow.python.ops import gen_nn_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import constant_op
+
+LOCAL_VARIABLE_NAME = 'local_center_variable'
+GLOBAL_VARIABLE_NAME = 'global_center_variable'
+
+
+class ElasticAverageCustomGetter(object):
+  """Custom_getter class is used to do:
+  1. Change trainable variables to local collection and place them at worker
+    device
+  2. Generate global variables(global center variables)
+  3. Generate local variables(local center variables) which record the global
+    variables and place them at worker device
+    Notice that the class should be used with tf.replica_device_setter,
+    so that the global center variables and global step variable can be placed
+    at ps device. Besides, use 'tf.get_variable' instead of 'tf.Variable' to
+    use this custom getter.
+
+  For example,
+  ea_custom_getter = ElasticAverageCustomGetter(worker_device)
+  with tf.device(
+    tf.train.replica_device_setter(
+      worker_device=worker_device,
+      ps_device="/job:ps/cpu:0",
+      cluster=cluster)),
+    tf.variable_scope('',custom_getter=ea_custom_getter):
+    hid_w = tf.get_variable(
+      initializer=tf.truncated_normal(
+          [IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units],
+          stddev=1.0 / IMAGE_PIXELS),
+      name="hid_w")
+    hid_b = tf.get_variable(initializer=tf.zeros([FLAGS.hidden_units]),
+                            name="hid_b")
+  """
+
+  def __init__(self, worker_device):
+    """Create a new `ElasticAverageCustomGetter`.
+
+    Args:
+      worker_device: String.  Name of the `worker` job.
+    """
+    self._worker_device = worker_device
+    self._local_map = {}
+    self._global_map = {}
+
+  def __call__(self, getter, name, trainable, collections, *args, **kwargs):
+    if trainable:
+      with ops.device(self._worker_device):
+        local_var = getter(name, trainable=True,
+                           collections=[ops.GraphKeys.LOCAL_VARIABLES], 
+                           *args, **kwargs)
+        
+      global_center_variable = variable_scope.variable(
+        name='%s/%s' %
+             (GLOBAL_VARIABLE_NAME,
+              name),
+        initial_value=local_var.initialized_value(),
+        trainable=False,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
+
+      with ops.device(self._worker_device):
+        local_center_variable = variable_scope.variable(
+          name='%s/%s' % (LOCAL_VARIABLE_NAME, name),
+          initial_value=local_var.initialized_value(),
+          trainable=False,
+          collections=[ops.GraphKeys.LOCAL_VARIABLES])
+        
+      self._local_map[local_var] = local_center_variable
+      self._global_map[local_var] = global_center_variable
+      return local_var
+    else:
+      return getter(name, trainable, collections, *args, **kwargs)
+
+
+class ElasticAverageOptimizer(optimizer.Optimizer):
+  """Wrapper optimizer that implements the Elastic Average SGD algorithm.
+  This is an async optimizer. During the training, Each worker will update
+  the local variables and maintains its own local_step, which starts from 0
+  and is incremented by 1 after each update of local variables. Whenever
+  the communication period divides the local step, the worker requests
+  the current global center variables and then computed the elastic difference
+  between global center variables and local variables. The elastic difference
+  then be used to update both local variables and global variables.
+  """
+
+  # Default value as paper described
+  BETA = 0.9
+
+  def __init__(
+      self,
+      opt,
+      num_worker,
+      ea_custom_getter,
+      communication_period=10,
+      moving_rate=None,
+      rho=None,
+      use_locking=True,
+      name="ElasticAverageOptimizer"):
+    """Construct a new gradient descent optimizer.
+
+    Args:
+      opt: The actual optimizer that will be used to update local variables.
+        Must be one of the Optimizer classes.
+      num_worker: The number of workers
+      ea_custom_getter: The ElasticAverageCustomGetter
+      communication_period: An int point value to controls the frequency
+        of the communication between every worker and the ps.
+      moving_rate: A floating point value to control the elastic difference.
+      rho: the amount of exploration we allow ine the model. The default
+        value is moving_rate/learning_rate
+      use_locking: If True use locks for update operations.
+      name: Optional name prefix for the operations created when applying
+        gradients. Defaults to "ElasticAverageOptimizer".
+    """
+    super(ElasticAverageOptimizer, self).__init__(use_locking, name)
+    self._opt = opt
+    self._num_worker = num_worker
+    self._period = communication_period
+    self._local_map = ea_custom_getter._local_map
+    self._global_map = ea_custom_getter._global_map
+
+    if moving_rate is None:
+      self._moving_rate = BETA / communication_period / num_worker
+    else:
+      self._moving_rate = moving_rate
+    if rho is None:
+      self._rho = self._moving_rate / self._opt._learning_rate
+    else:
+      self._rho = rho
+
+    self._local_step = variable_scope.get_variable(
+      initializer=0,
+      trainable=False,
+      collections=[ops.GraphKeys.LOCAL_VARIABLES],
+      name="local_step")
+    self._opt._prepare()
+
+  def compute_gradients(self, loss, var_list=None,
+                        gate_gradients=optimizer.Optimizer.GATE_OP,
+                        aggregation_method=None,
+                        colocate_gradients_with_ops=False,
+                        grad_loss=None):
+    """Compute gradients of `loss` for the variables in `var_list`.
+    
+    Add rho*elastic_difference to loss to control the exploration
+    This is the first part of `minimize()`.  It returns a list
+    of (gradient, variable) pairs where "gradient" is the gradient
+    for "variable".  Note that "gradient" can be a `Tensor`, an
+    `IndexedSlices`, or `None` if there is no gradient for the
+    given variable.
+
+    Args:
+      loss: A Tensor containing the value to minimize.
+      var_list: Optional list or tuple of `tf.Variable` to update to minimize
+        `loss`.  Defaults to the list of variables collected in the graph
+        under the key `GraphKey.TRAINABLE_VARIABLES`.
+      gate_gradients: How to gate the computation of gradients.  Can be
+        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
+      aggregation_method: Specifies the method used to combine gradient terms.
+        Valid values are defined in the class `AggregationMethod`.
+      colocate_gradients_with_ops: If True, try colocating gradients with
+        the corresponding op.
+      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
+
+    Returns:
+      A list of (gradient, variable) pairs. Variable is always present, but
+      gradient can be `None`.
+
+    Raises:
+      TypeError: If `var_list` contains anything else than `Variable` objects.
+      ValueError: If some arguments are invalid.
+    """
+    if not var_list:
+      var_list = variables.trainable_variables()
+      
+    elastic_difference = [math_ops.subtract(v, lv) for v, lv in zip(
+      variables.trainable_variables(),
+      [self._local_map[var] for var in var_list])]
+
+    distance_loss = self._rho * math_ops.add_n(
+                      [gen_nn_ops.l2_loss(ed) for ed in elastic_difference])
+
+    total_loss = loss + distance_loss
+    return self._opt.compute_gradients(total_loss, var_list,
+                                       gate_gradients, aggregation_method,
+                                       colocate_gradients_with_ops, grad_loss)
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    """Apply gradients to global variables.
+
+    This is the second part of `minimize()`. It returns an `Operation` that
+    applies gradients.
+
+    Args:
+      grads_and_vars: List of (gradient, variable) pairs as returned by
+        `compute_gradients()`.
+      global_step: Optional `Variable` to increment by one after the
+        variables have been updated.
+      name: Optional name for the returned operation.  Default to the
+        name passed to the `Optimizer` constructor.
+
+    Returns:
+      An `Operation` that applies the specified gradients. If `global_step`
+      was not None, that operation also increments `global_step`.
+
+    Raises:
+      TypeError: If `grads_and_vars` is malformed.
+      ValueError: If none of the variables have gradients.
+    """
+    apply_updates = self._opt.apply_gradients(grads_and_vars)
+    with ops.control_dependencies([apply_updates]):
+      local_update = state_ops.assign_add(
+        self._local_step, 1, name='local_step_update').op
+
+    # update global variables.
+    def _Update_global_variables():
+      local_vars = [v for g, v in grads_and_vars if g is not None]
+      global_center_vars = [self._global_map[var] for var in local_vars]
+      local_center_vars = [self._local_map[var] for var in local_vars]
+      local_center_vars_update = []
+      for lvar, var in zip(local_center_vars, global_center_vars):
+        local_center_vars_update.append(lvar.assign(var))
+      update_ops = []
+      differences = []
+      with ops.control_dependencies(local_center_vars_update):
+        for v, lv in zip(local_vars, local_center_vars):
+          with ops.device(v.device):
+            differences.append(math_ops.subtract(v, lv))
+        for lvar, diff in zip(local_vars, differences):
+          with ops.device(lvar.device):
+            update_ops.append(state_ops.assign_sub(lvar, math_ops.multiply(
+              self._moving_rate, diff)))
+        for var, diff in zip(global_center_vars, differences):
+          with ops.device(var.device):
+            update_ops.append(state_ops.assign_add(var, math_ops.multiply(
+              self._moving_rate, diff)))
+        if global_step:
+          with ops.colocate_with(global_step):
+            update_ops.append(state_ops.assign_add(global_step, 1))
+      variable_update = control_flow_ops.group(*(update_ops))
+      return variable_update
+
+    with ops.control_dependencies([local_update]):
+      condition = math_ops.equal(math_ops.mod(
+        self._local_step, self._period), 0)
+      conditional_update = control_flow_ops.cond(
+        condition, _Update_global_variables, control_flow_ops.no_op)
+    return conditional_update
+
+  def get_init_op(self, task_index):
+    """Returns the op to let all the local variables and local center
+    variables equal to the global center variables before the training begins"""
+
+    def _Add_sync_queues_and_barrier(enqueue_after_list):
+      """Adds ops to enqueu on all worker queues"""
+      sync_queues = [
+        data_flow_ops.FIFOQueue(self._num_worker, [dtypes.bool], shapes=[[]],
+                                shared_name='%s%s' % (
+                                  'variable_init_sync_queue', i)) for i in
+        range(self._num_worker)]
+      queue_ops = []
+      # For each other worker, add an entry in a queue
+      token = constant_op.constant(False)
+      with ops.control_dependencies(enqueue_after_list):
+        for i, q in enumerate(sync_queues):
+          if i == task_index:
+            queue_ops.append(control_flow_ops.no_op())
+          else:
+            queue_ops.append(q.enqueue(token))
+      queue_ops.append(
+        sync_queues[task_index].dequeue_many(len(sync_queues) - 1))
+      return control_flow_ops.group(*queue_ops)
+
+    init_ops = []
+    local_vars = variables.trainable_variables()
+    global_center_vars = [self._global_map[var] for var in local_vars]
+    local_center_vars = [self._local_map[var] for var in local_vars]
+    if not (local_vars and global_center_vars and local_center_vars):
+      raise ValueError(
+        'The lists of local_variables, global_center_variables, '
+        'local_center_variables should not be empty  ')
+    for lvar, gc_var, lc_var in zip(
+        local_vars, global_center_vars, local_center_vars):
+      init_ops.append(state_ops.assign(lvar, gc_var))
+      init_ops.append(state_ops.assign(lc_var, gc_var))
+
+    init_op = control_flow_ops.group(*(init_ops))
+    sync_queue_op = _Add_sync_queues_and_barrier([init_op])
+    return sync_queue_op
+
+  def make_session_run_hook(self, is_chief, task_index):
+    """Creates a hook to handle ElasticAverageOptimizerHook ops such as initialization."""
+    return _ElasticAverageOptimizerHook(self, is_chief, task_index)
+
+
+class _ElasticAverageOptimizerHook(session_run_hook.SessionRunHook):
+  def __init__(self, ea_optimizer, is_chief, task_index):
+    """Creates hook to handle ElasticAverageOptimizer initialization ops.
+
+    Args:
+      ea_optimizer: `ElasticAverageOptimizer` which this hook will initialize.
+      is_chief: `Bool`, whether is this a chief replica or not.
+    """
+    self._ea_optimizer = ea_optimizer
+    self._is_chief = is_chief
+    self._task_index = task_index
+
+  def begin(self):
+    self._local_init_op = variables.local_variables_initializer()
+    self._global_init_op = None
+    if self._is_chief:
+      self._global_init_op = variables.global_variables_initializer()
+    self._variable_init_op = self._ea_optimizer.get_init_op(self._task_index)
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
new file mode 100644
index 0000000000..59e55fceee
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
@@ -0,0 +1,225 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ElasticAverageOptimizer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import portpicker
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import server_lib
+from tensorflow.python.training import training
+from tensorflow.python.training import training_util
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import device_setter
+
+from tensorflow.contrib.opt.python.training.elastic_average_optimizer import \
+  ElasticAverageOptimizer, ElasticAverageCustomGetter, GLOBAL_VARIABLE_NAME
+
+
+def create_local_cluster(num_workers, num_ps, protocol="grpc"):
+  """Create local GRPC servers and return them."""
+  worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
+  ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
+  cluster_dict = {
+    "worker": ["localhost:%s" % port for port in worker_ports],
+    "ps": ["localhost:%s" % port for port in ps_ports]
+  }
+  cs = server_lib.ClusterSpec(cluster_dict)
+
+  workers = [
+    server_lib.Server(
+      cs, job_name="worker", protocol=protocol, task_index=ix, start=True)
+    for ix in range(num_workers)
+  ]
+  ps_servers = [
+    server_lib.Server(
+      cs, job_name="ps", protocol=protocol, task_index=ix, start=True)
+    for ix in range(num_ps)
+  ]
+
+  return cluster_dict, workers, ps_servers
+
+
+# Creates the workers and return their sessions, graphs, train_ops.
+# Cheif worker will update at last
+def _get_workers(num_workers, period, workers, moving_rate):
+  sessions = []
+  graphs = []
+  train_ops = []
+  for worker_id in range(num_workers):
+    graph = ops.Graph()
+    is_chief = (worker_id == 0)
+    with graph.as_default():
+      worker_device = "/job:worker/task:%d/cpu:0" % (worker_id)
+      ea_coustom = ElasticAverageCustomGetter(
+        worker_device=worker_device)
+      with variable_scope.variable_scope('',
+                                         custom_getter=ea_coustom), ops.device(
+        device_setter.replica_device_setter(worker_device=worker_device,
+                                            ps_device="/job:ps/task:0/cpu:0",
+                                            ps_tasks=1)):
+        global_step = variables.Variable(0, name='global_step',
+                                         trainable=False)
+        var_0 = variable_scope.get_variable(initializer=0.0, name="v0")
+        var_1 = variable_scope.get_variable(initializer=1.0, name="v1")
+
+      with ops.device("/job:worker/task:" + str(worker_id)):
+        grads_0 = constant_op.constant(-1.0)
+        grads_1 = constant_op.constant(-1.0)
+
+        sgd_opt = gradient_descent.GradientDescentOptimizer(1.0)
+        opt = ElasticAverageOptimizer(
+          opt=sgd_opt,
+          num_worker=num_workers,
+          moving_rate=moving_rate,
+          communication_period=period,
+          ea_custom_getter=ea_coustom
+        )
+        train_op = [
+          opt.apply_gradients(
+            ([grads_0, var_0],
+             [grads_1, var_1]), global_step)
+        ]
+        easgd_hook = opt.make_session_run_hook(is_chief, worker_id)
+      # Creates MonitoredSession
+      sess = training.MonitoredTrainingSession(workers[worker_id].target,
+                                               hooks=[easgd_hook])
+
+    sessions.append(sess)
+    graphs.append(graph)
+    train_ops.append(train_op)
+
+  return sessions, graphs, train_ops
+
+
+class ElasticAverageOptimizerTest(test.TestCase):
+  def _run(self, train_op, sess):
+    sess.run(train_op)
+
+  def test1Workers2Period(self):
+    num_workers = 1
+    communication_period = 2
+    num_ps = 1
+    cluster, workers, _ = create_local_cluster(num_workers=num_workers,
+                                               num_ps=num_ps)
+
+    sessions, graphs, train_ops = _get_workers(num_workers,
+                                               communication_period,
+                                               workers, 1.0)
+
+    var_0 = graphs[0].get_tensor_by_name('v0:0')
+    var_1 = graphs[0].get_tensor_by_name('v1:0')
+    global_step = training_util.get_global_step(graphs[0])
+    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
+    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
+    # Verify the initialized value.
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(0, sessions[0].run(global_step))
+
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(1.0, sessions[0].run(var_0))
+    self.assertAllEqual(2.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(0, sessions[0].run(global_step))
+
+    # iteration 2, global varibale update
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(2.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(3.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(1, sessions[0].run(global_step))
+
+    # iteration 3
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(1.0, sessions[0].run(var_0))
+    self.assertAllEqual(2.0, sessions[0].run(var_1))
+    self.assertAllEqual(2.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(3.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(1, sessions[0].run(global_step))
+
+  def test2Worker1Period(self):
+    num_workers = 2
+    communication_period = 1
+    num_ps = 2
+    cluster, workers, _ = create_local_cluster(num_workers=num_workers,
+                                               num_ps=num_ps)
+
+    sessions, graphs, train_ops = _get_workers(num_workers,
+                                               communication_period,
+                                               workers, 0.5)
+
+    var_0 = graphs[0].get_tensor_by_name('v0:0')
+    var_1 = graphs[0].get_tensor_by_name('v1:0')
+
+    var_0_1 = graphs[1].get_tensor_by_name('v0:0')
+    var_1_1 = graphs[1].get_tensor_by_name('v1:0')
+
+    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
+    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
+    # Verify the initialized value.
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[1].run(var_0_1))
+    self.assertAllEqual(1.0, sessions[1].run(var_1_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+
+    sessions[0].run(train_ops[0])
+    sessions[1].run(train_ops[1])
+
+    self.assertAllEqual(0.5, sessions[0].run(var_0))
+    self.assertAllEqual(1.5, sessions[0].run(var_1))
+    self.assertAllEqual(0.75, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.75, sessions[0].run(var_1_g))
+    self.assertAllEqual(0.75, sessions[1].run(var_0_1))
+    self.assertAllEqual(1.75, sessions[1].run(var_1_1))
+
+  def testPS2TasksWithClusterSpecClass(self):
+    cluster_spec = server_lib.ClusterSpec({
+      "ps": ["ps0:2222", "ps1:2222"],
+      "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]
+    })
+    ea_coustom = ElasticAverageCustomGetter(
+      worker_device="/job:worker/task:0")
+    from tensorflow.python.training import device_setter
+    with ops.device(
+        device_setter.replica_device_setter(cluster=cluster_spec,
+                                            worker_device="/job:worker/task:0",
+                                            ps_device="/job:ps")), \
+         variable_scope.variable_scope('', custom_getter=ea_coustom):
+      v = variable_scope.get_variable(initializer=[1, 2], name="v")
+      w = variable_scope.get_variable(initializer=[2, 1], name='w')
+      v_g, w_g = ea_coustom._global_map[v],ea_coustom._global_map[w]
+      self.assertDeviceEqual("/job:worker/task:0", v.device)
+      self.assertDeviceEqual("job:ps/task:0", v_g.device)
+      self.assertDeviceEqual("/job:worker/task:0", w.device)
+      self.assertDeviceEqual("job:ps/task:1", w_g.device)
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From 309f7e29a6f19ac060e9cf5f02e7de0eeac522de Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 07:49:41 -0800
Subject: [PATCH 0856/1225] Fix mismatched argument comments to match parameter
 names

PiperOrigin-RevId: 178617543
---
 tensorflow/compiler/xla/literal_util_test.cc        |  2 +-
 tensorflow/compiler/xla/service/cpu/cpu_compiler.cc | 10 +++++-----
 tensorflow/core/kernels/conv_grad_ops_3d.cc         |  2 +-
 tensorflow/core/kernels/conv_ops_3d.cc              |  2 +-
 tensorflow/stream_executor/cuda/cuda_dnn.cc         |  2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc
index 816bb3c549..7ff64c4134 100644
--- a/tensorflow/compiler/xla/literal_util_test.cc
+++ b/tensorflow/compiler/xla/literal_util_test.cc
@@ -515,7 +515,7 @@ TYPED_TEST(LiteralUtilTestTemplated, Relayout2x2) {
 
 TEST_F(LiteralUtilTest, ReshapeR0) {
   auto original = Literal::CreateR0<float>(1.7f);
-  auto reshape = original->Reshape(/*shape=*/{}).ConsumeValueOrDie();
+  auto reshape = original->Reshape(/*dimensions=*/{}).ConsumeValueOrDie();
   EXPECT_EQ(*original, *reshape);
 }
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index a476a75027..44d80d75f5 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -602,7 +602,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
           llvm::Function * ir_function,
           ir_emitter.EmitComputation(
               embedded_computation, embedded_computation->name(),
-              /*is_entry_computation=*/computation_is_parallel,
+              /*is_top_level_computation=*/computation_is_parallel,
               /*instruction_order=*/nullptr));
       // If this computation is parallel, remember it in the function name map.
       // This way we know what function to execute when we try to run code for
@@ -684,7 +684,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
           ir_emitter
               .EmitComputation(embedded_computation,
                                embedded_computation->name(),
-                               /*is_entry_computation=*/false,
+                               /*is_top_level_computation=*/false,
                                &module_sequence.at(embedded_computation))
               .status());
     }
@@ -693,7 +693,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     TF_ASSIGN_OR_RETURN(
         llvm::Function * entry_function,
         ir_emitter.EmitComputation(computation, function_name_prefix,
-                                   /*is_entry_computation=*/true,
+                                   /*is_top_level_computation=*/true,
                                    &module_sequence.at(computation)));
 
     string function_name = llvm_ir::AsString(entry_function->getName());
@@ -858,7 +858,7 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
           ir_emitter
               .EmitComputation(embedded_computation,
                                embedded_computation->name(),
-                               /*is_entry_computation=*/false,
+                               /*is_top_level_computation=*/false,
                                &module_sequence.at(embedded_computation))
               .status());
     }
@@ -866,7 +866,7 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
     TF_ASSIGN_OR_RETURN(
         llvm::Function * entry_function,
         ir_emitter.EmitComputation(computation, entry_point_name,
-                                   /*is_entry_computation=*/true,
+                                   /*is_top_level_computation=*/true,
                                    &module_sequence.at(computation)));
 
     CHECK(entry_function->getName() == llvm_ir::AsStringRef(entry_point_name));
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index 4d0f1ab317..3650ab53b2 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -647,7 +647,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
         {{filter_size[0], filter_size[1], filter_size[2]}},
         // TODO(yangzihao): Send in arbitrary dilation rates after the dilated
         // conv is supported.
-        /*dilations=*/{{1, 1, 1}},
+        /*dilation=*/{{1, 1, 1}},
         {{strides[0], strides[1], strides[2]}},
         {{padding_planes, padding_rows, padding_cols}},
         dtype,
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index 39202d7334..21c84b2a0e 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -379,7 +379,7 @@ struct LaunchConvOp<GPUDevice, T> {
         {{filter_planes, filter_rows, filter_cols}},
         // TODO(yangzihao): Send in arbitrary dilation rates after the dilated
         // conv is supported.
-        /*dilations=*/{{1, 1, 1}},
+        /*dilation=*/{{1, 1, 1}},
         {{strides[0], strides[1], strides[2]}},
         {{pad_planes, pad_rows, pad_cols}},
         dtype,
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index d78362d4fb..1e26f53ae1 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -561,7 +561,7 @@ static bool TensorOpMathEnabled() {
   static bool is_enabled = [] {
     bool ret;
     TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DISABLE_TENSOR_OP_MATH",
-                                               /*default=*/false, &ret));
+                                               /*default_val=*/false, &ret));
     return !ret;
   }();
   return is_enabled;
-- 
GitLab


From 218caf995a1f1ea4883ed5575f7cd70536eff60c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 07:50:08 -0800
Subject: [PATCH 0857/1225] Cleanup: Remove unused declarations and unnecessary
 conversions

PiperOrigin-RevId: 178617606
---
 .../contrib/lite/testing/generated_examples_zip_test.cc    | 2 +-
 tensorflow/contrib/lite/toco/tooling_util.cc               | 4 +---
 .../core/grappler/optimizers/arithmetic_optimizer.cc       | 2 +-
 tensorflow/core/kernels/bucketize_op.cc                    | 7 +++----
 4 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
index f8f6044b62..3b3266738c 100644
--- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
+++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
@@ -102,7 +102,7 @@ class ZipEnvironment : public ::testing::Environment {
     tensorflow::SubProcess proc;
     string unzip_binary =
         "/usr/bin/unzip";
-    proc.SetProgram(unzip_binary, {"unzip", "-d", dir, zip.c_str()});
+    proc.SetProgram(unzip_binary, {"unzip", "-d", dir, zip});
     proc.SetChannelAction(tensorflow::CHAN_STDOUT, tensorflow::ACTION_PIPE);
     proc.SetChannelAction(tensorflow::CHAN_STDERR, tensorflow::ACTION_PIPE);
     if (!proc.Start())
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 65bf1f215a..21b85c86cc 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -30,7 +30,6 @@ limitations under the License.
 #include "tensorflow/contrib/lite/toco/toco_port.h"
 #include "tensorflow/core/platform/logging.h"
 
-
 namespace toco {
 
 string LogName(const Operator& op) {
@@ -996,8 +995,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
               << dst_input_array->shape().dims(i);
         }
       } else {
-        dst_input_array->mutable_shape()->CopyFrom(
-            specified_input_array.shape());
+        *dst_input_array->mutable_shape() = specified_input_array.shape();
       }
     }
 
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 9629e074ee..d6bc8614f9 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -447,7 +447,7 @@ NodeDef* ArithmeticOptimizer::AddNode(const string& name,
       AddPrefixToNodeName(name, kArithmeticOptimizer);
   node_map_->AddNode(NodeName(name_with_prefix), new_node);
   if (node_to_copy != nullptr) {
-    new_node->CopyFrom(*node_to_copy);
+    *new_node = *node_to_copy;
   }
   new_node->set_name(name_with_prefix);
   return new_node;
diff --git a/tensorflow/core/kernels/bucketize_op.cc b/tensorflow/core/kernels/bucketize_op.cc
index c1693de538..4e4b6d5215 100644
--- a/tensorflow/core/kernels/bucketize_op.cc
+++ b/tensorflow/core/kernels/bucketize_op.cc
@@ -25,10 +25,8 @@ limitations under the License.
 
 namespace tensorflow {
 
-using thread::ThreadPool;
-
-typedef Eigen::ThreadPoolDevice CPUDevice;
-typedef Eigen::GpuDevice GPUDevice;
+using CPUDevice = Eigen::ThreadPoolDevice;
+using GPUDevice = Eigen::GpuDevice;
 
 namespace functor {
 
@@ -49,6 +47,7 @@ struct BucketizeFunctor<CPUDevice, T> {
     return Status::OK();
   }
 };
+
 }  // namespace functor
 
 template <typename Device, typename T>
-- 
GitLab


From 6c33765dd952ff0f9d1de9e3f81f972a9489f319 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 11 Dec 2017 07:58:48 -0800
Subject: [PATCH 0858/1225] Add `grpc_enabled` optional argument to various
 Python test rules.

PiperOrigin-RevId: 178618409
---
 .../contrib/data/python/kernel_tests/BUILD    |  34 ++---
 tensorflow/contrib/learn/BUILD                |  13 +-
 tensorflow/contrib/lookup/BUILD               |  12 +-
 tensorflow/contrib/opt/BUILD                  |  17 +--
 .../platform/default/build_config_root.bzl    |   3 +
 tensorflow/python/BUILD                       | 133 +++++++++---------
 tensorflow/python/data/kernel_tests/BUILD     |   3 +
 tensorflow/python/debug/BUILD                 |   1 +
 tensorflow/python/kernel_tests/BUILD          |   2 +
 tensorflow/tensorflow.bzl                     |  30 ++--
 10 files changed, 133 insertions(+), 115 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 2cb6b7e76c..9b6ad93294 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -4,7 +4,7 @@ licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "py_test", "tf_py_test")
 
 py_test(
     name = "batch_dataset_op_test",
@@ -157,14 +157,13 @@ py_test(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "flat_map_dataset_op_test",
     size = "small",
     srcs = ["flat_map_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
+    additional_deps = [
         ":dataset_serialization_test",
+        "//third_party/py/numpy",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -177,8 +176,9 @@ py_test(
         "//tensorflow/python:session",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
-        "//third_party/py/numpy",
     ],
+    grpc_enabled = True,
+    tags = ["no_pip"],
 )
 
 py_test(
@@ -207,16 +207,11 @@ py_test(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "iterator_ops_cluster_test",
     size = "small",
     srcs = ["iterator_ops_cluster_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_windows",
-        "oss_serial",
-    ],
-    deps = [
+    additional_deps = [
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
@@ -230,14 +225,19 @@ py_test(
         "//tensorflow/python:session",
         "//tensorflow/python/data/ops:iterator_ops",
     ],
+    grpc_enabled = True,
+    tags = [
+        "no_windows",
+        "oss_serial",
+    ],
 )
 
-py_test(
+tf_py_test(
     name = "iterator_ops_test",
     size = "small",
     srcs = ["iterator_ops_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
+        "//third_party/py/numpy",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/contrib/data/python/ops:readers",
         "//tensorflow/core:protos_all_py",
@@ -259,8 +259,8 @@ py_test(
         "//tensorflow/python:session",
         "//tensorflow/python:training",
         "//tensorflow/python/data/ops:iterator_ops",
-        "//third_party/py/numpy",
     ],
+    grpc_enabled = True,
 )
 
 py_test(
diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD
index 26bbcab307..33f509ec12 100644
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@@ -10,7 +10,7 @@ package(default_visibility = [
     "//tensorflow:internal",
 ])
 
-load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "py_test", "tf_py_test")
 
 py_library(
     name = "learn",
@@ -154,12 +154,11 @@ py_test(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "experiment_test",
     size = "medium",
     srcs = ["python/learn/experiment_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":learn",
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/core:protos_all_py",
@@ -716,12 +715,11 @@ py_test(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "graph_io_test",
     size = "small",
     srcs = ["python/learn/learn_io/graph_io_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":learn",
         "//tensorflow/python:client",
         "//tensorflow/python:client_testlib",
@@ -737,6 +735,7 @@ py_test(
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
     ],
+    grpc_enabled = True,
 )
 
 py_test(
diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD
index b7b5418fe9..8ca03f4193 100644
--- a/tensorflow/contrib/lookup/BUILD
+++ b/tensorflow/contrib/lookup/BUILD
@@ -7,7 +7,7 @@ exports_files(["LICENSE"])
 
 package(default_visibility = ["//tensorflow:internal"])
 
-load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
 
 # TODO(yleon): Refactor after one we switching to the V2 kernels.
 py_library(
@@ -26,13 +26,14 @@ py_library(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "lookup_ops_test",
     size = "small",
     srcs = ["lookup_ops_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":lookup_py",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -43,9 +44,8 @@ py_test(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
     ],
+    grpc_enabled = True,
 )
 
 filegroup(
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index a9a63cbce0..d2811f21af 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -80,22 +80,22 @@ py_test(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "variable_clipping_optimizer_test",
     srcs = ["python/training/variable_clipping_optimizer_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "manual",  # Flaky: b/29892493
-        "notap",  # data race due to b/62910646
-    ],
-    deps = [
+    additional_deps = [
         ":opt_py",
+        "//third_party/py/numpy",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:session",
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
-        "//third_party/py/numpy",
+    ],
+    grpc_enabled = True,
+    tags = [
+        "manual",  # Flaky: b/29892493
+        "notap",  # data race due to b/62910646
     ],
 )
 
@@ -168,6 +168,7 @@ tf_py_test(
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
     ],
+    grpc_enabled = True,
     tags = [
         "no_oss",  # Flaky due to port collisions
     ],
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index 6e98f12114..09029a4b25 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -19,6 +19,9 @@ def tf_additional_plugin_deps():
 def tf_additional_xla_deps_py():
   return []
 
+def tf_additional_grpc_deps_py():
+  return []
+
 def tf_additional_license_deps():
   return select({
       str(Label("//tensorflow:with_xla_support")): ["@llvm//:LICENSE.TXT"],
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index e5c4347833..3566a36ddd 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3147,130 +3147,124 @@ py_library(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_test",
     size = "small",
     srcs = ["training/server_lib_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_multiple_containers_test",
     size = "small",
     srcs = ["training/server_lib_multiple_containers_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_same_variables_clear_container_test",
     size = "small",
     srcs = ["training/server_lib_same_variables_clear_container_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_same_variables_clear_test",
     size = "small",
     srcs = ["training/server_lib_same_variables_clear_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_same_variables_no_clear_test",
     size = "small",
     srcs = ["training/server_lib_same_variables_no_clear_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_sparse_job_test",
     size = "small",
     srcs = ["training/server_lib_sparse_job_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
 cuda_py_test(
@@ -3290,6 +3284,7 @@ cuda_py_test(
         ":variables",
         "//third_party/py/numpy",
     ],
+    grpc_enabled = True,
     tags = [
         "no_oss",  # Test flaky due to port collisions.
         "oss_serial",
@@ -3308,6 +3303,7 @@ tf_py_test(
         ":training",
         ":variables",
     ],
+    grpc_enabled = True,
     tags = [
         "no_oss",  # Test flaky due to port collisions.
         "notsan",  # data race due to b/62910646
@@ -3338,17 +3334,11 @@ tf_cuda_library(
     alwayslink = 1,
 )
 
-py_test(
+tf_py_test(
     name = "session_test",
     size = "small",
     srcs = ["client/session_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_gpu",
-        "no_pip_gpu",  # testInteractivePlacePrunedGraph fails on invalid assumption about GPU ops.
-        "no_windows",
-    ],
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":control_flow_ops",
@@ -3366,21 +3356,19 @@ py_test(
         "//third_party/py/numpy",
         "@six_archive//:six",
     ],
+    grpc_enabled = True,
+    tags = [
+        "no_gpu",
+        "no_pip_gpu",  # testInteractivePlacePrunedGraph fails on invalid assumption about GPU ops.
+        "no_windows",
+    ],
 )
 
-py_test(
+tf_py_test(
     name = "session_clusterspec_prop_test",
     size = "small",
     srcs = ["client/session_clusterspec_prop_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_gpu",
-        "no_oss",
-        "no_pip",
-        "no_pip_gpu",
-        "notap",
-    ],
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
@@ -3395,37 +3383,40 @@ py_test(
         ":variables",
         "//third_party/py/numpy",
     ],
+    grpc_enabled = True,
+    tags = [
+        "no_gpu",
+        "no_oss",
+        "no_pip",
+        "no_pip_gpu",
+        "notap",
+    ],
 )
 
-py_test(
+tf_py_test(
     name = "session_list_devices_test",
     size = "small",
     srcs = ["client/session_list_devices_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_gpu",
-        "no_pip_gpu",
-        "notsan",  # data race due to b/62910646
-    ],
-    deps = [
+    additional_deps = [
         ":client",
         ":framework",
         ":framework_test_lib",
         ":platform_test",
         ":training",
     ],
+    grpc_enabled = True,
+    tags = [
+        "no_gpu",
+        "no_pip_gpu",
+        "notsan",  # data race due to b/62910646
+    ],
 )
 
-py_test(
+tf_py_test(
     name = "session_partial_run_test",
     size = "small",
     srcs = ["client/session_partial_run_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_gpu",
-        "no_windows",
-    ],
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":errors",
@@ -3438,6 +3429,11 @@ py_test(
         ":util",
         "@six_archive//:six",
     ],
+    grpc_enabled = True,
+    tags = [
+        "no_gpu",
+        "no_windows",
+    ],
 )
 
 cuda_py_test(
@@ -3694,20 +3690,18 @@ cuda_py_test(
         ":training",
         ":variables",
     ],
+    grpc_enabled = True,
     main = "training/session_manager_test.py",
 )
 
-py_test(
+tf_py_test(
     name = "supervisor_test",
     size = "small",
     srcs = ["training/supervisor_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_windows"],
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client_testlib",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework",
         ":framework_for_generated_wrappers",
         ":io_ops",
@@ -3718,6 +3712,8 @@ py_test(
         ":variables",
         "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
+    tags = ["no_windows"],
 )
 
 py_test(
@@ -4331,6 +4327,7 @@ cuda_py_test(
         ":variables",
         "//third_party/py/numpy",
     ],
+    grpc_enabled = True,
     main = "client/session_benchmark.py",
 )
 
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 4fd7c99ac6..5fb389cf92 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -103,6 +103,7 @@ tf_py_test(
         "//tensorflow/python:training",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
+    grpc_enabled = True,
 )
 
 tf_py_test(
@@ -336,6 +337,7 @@ tf_py_test(
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:training",
     ],
+    grpc_enabled = True,
 )
 
 tf_py_test(
@@ -356,6 +358,7 @@ tf_py_test(
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
     ],
+    grpc_enabled = True,
     tags = [
         "no_oss",  # Test flaky due to port collisions.
         "no_windows",
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index c52bf49b98..2315ad4653 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -962,6 +962,7 @@ cuda_py_test(
         "//tensorflow/python:variables",
     ],
     data = ["//tensorflow/tools/dist_test/server:grpc_tensorflow_server"],
+    grpc_enabled = True,
     tags = [
         "no_oss",  # Incompatible with bazel_pip.
         "no_windows",
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 2c73ecd8be..1d8ca99c07 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -483,6 +483,7 @@ tf_py_test(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:training",
     ],
+    grpc_enabled = True,
 )
 
 tf_py_test(
@@ -1443,6 +1444,7 @@ cuda_py_test(
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
     ],
+    grpc_enabled = True,
     tags = ["no_windows"],
 )
 
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 0db915f1b9..425997e41f 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -6,6 +6,7 @@ load(
     "//tensorflow/core:platform/default/build_config_root.bzl",
     "tf_cuda_tests_tags",
     "tf_sycl_tests_tags",
+    "tf_additional_grpc_deps_py",
     "tf_additional_xla_deps_py",
     "if_static",
 )
@@ -1340,9 +1341,12 @@ def tf_py_test(name,
                shard_count=1,
                additional_deps=[],
                flaky=0,
-               xla_enabled=False):
+               xla_enabled=False,
+               grpc_enabled=False):
   if xla_enabled:
     additional_deps = additional_deps + tf_additional_xla_deps_py()
+  if grpc_enabled:
+    additional_deps = additional_deps + tf_additional_grpc_deps_py()
   native.py_test(
       name=name,
       size=size,
@@ -1378,7 +1382,8 @@ def cuda_py_test(name,
                  additional_deps=[],
                  tags=[],
                  flaky=0,
-                 xla_enabled=False):
+                 xla_enabled=False,
+                 grpc_enabled=False):
   test_tags = tags + tf_cuda_tests_tags()
   tf_py_test(
       name=name,
@@ -1391,7 +1396,8 @@ def cuda_py_test(name,
       shard_count=shard_count,
       additional_deps=additional_deps,
       flaky=flaky,
-      xla_enabled=xla_enabled)
+      xla_enabled=xla_enabled,
+      grpc_enabled=grpc_enabled)
 
 register_extension_info(
     extension_name = "cuda_py_test",
@@ -1408,7 +1414,8 @@ def sycl_py_test(name,
                  additional_deps=[],
                  tags=[],
                  flaky=0,
-                 xla_enabled=False):
+                 xla_enabled=False,
+                 grpc_enabled=False):
   test_tags = tags + tf_sycl_tests_tags()
   tf_py_test(
       name=name,
@@ -1421,7 +1428,8 @@ def sycl_py_test(name,
       shard_count=shard_count,
       additional_deps=additional_deps,
       flaky=flaky,
-      xla_enabled=xla_enabled)
+      xla_enabled=xla_enabled,
+      grpc_enabled=grpc_enabled)
 
 register_extension_info(
     extension_name = "sycl_py_test",
@@ -1436,7 +1444,8 @@ def py_tests(name,
              tags=[],
              shard_count=1,
              prefix="",
-             xla_enabled=False):
+             xla_enabled=False,
+             grpc_enabled=False):
   for src in srcs:
     test_name = src.split("/")[-1].split(".")[0]
     if prefix:
@@ -1450,7 +1459,8 @@ def py_tests(name,
         shard_count=shard_count,
         data=data,
         additional_deps=additional_deps,
-        xla_enabled=xla_enabled)
+        xla_enabled=xla_enabled,
+        grpc_enabled=grpc_enabled)
 
 def cuda_py_tests(name,
                   srcs,
@@ -1460,7 +1470,8 @@ def cuda_py_tests(name,
                   shard_count=1,
                   tags=[],
                   prefix="",
-                  xla_enabled=False):
+                  xla_enabled=False,
+                  grpc_enabled=False):
   test_tags = tags + tf_cuda_tests_tags()
   py_tests(
       name=name,
@@ -1471,7 +1482,8 @@ def cuda_py_tests(name,
       tags=test_tags,
       shard_count=shard_count,
       prefix=prefix,
-      xla_enabled=xla_enabled)
+      xla_enabled=xla_enabled,
+      grpc_enabled=grpc_enabled)
 
 # Creates a genrule named <name> for running tools/proto_text's generator to
 # make the proto_text functions, for the protos passed in <srcs>.
-- 
GitLab


From 408029937b559300eb79168e9bd584d6d6c7c80c Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Mon, 11 Dec 2017 08:09:11 -0800
Subject: [PATCH 0859/1225] [XLA] Improve dot strength reductions to support
 transposes of the right and left hand side of a dot.

PiperOrigin-RevId: 178619673
---
 .../xla/service/algebraic_simplifier.cc       | 245 +++++++++++-------
 .../xla/service/algebraic_simplifier_test.cc  |  58 +++++
 2 files changed, 203 insertions(+), 100 deletions(-)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index b1d0345e70..2c0d1900eb 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -193,6 +193,33 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault {
         enable_dot_strength_reduction_(enable_dot_strength_reduction),
         enable_conv_simplification_(enable_conv_simplification) {}
 
+  // Transforms Dots where at least one input is a vector or has a degenerate
+  // dimension and converts it into a multiply and reduce. This should enable
+  // more fusion than leaving the nodes as Dot operations.
+  StatusOr<bool> HandleDotStrengthReduction(HloInstruction* dot);
+
+  // Reshapes an instruction to rank 1 if it is not already rank 1.
+  HloInstruction* Flatten(HloInstruction* hlo) {
+    if (ShapeUtil::Rank(hlo->shape()) == 1) {
+      return hlo;
+    }
+    return computation_->AddInstruction(HloInstruction::CreateReshape(
+        ShapeUtil::MakeShape(hlo->shape().element_type(),
+                             {ShapeUtil::ElementsIn(hlo->shape())}),
+        hlo));
+  }
+
+  // Helper method to perform and add reduction in a single dimension.
+  HloInstruction* AddReduce(HloInstruction* hlo, int64 dim) {
+    HloInstruction* zero = computation_->AddInstruction(
+        HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
+    HloComputation* AddReduce_computation = CreateScalarBinaryComputation(
+        computation_->parent(), F32, HloOpcode::kAdd);
+    Shape shape = ShapeUtil::DeleteDimension(dim, hlo->shape());
+    return computation_->AddInstruction(HloInstruction::CreateReduce(
+        shape, hlo, zero, {dim}, AddReduce_computation));
+  }
+
   // Convenience method for replacing an instruction with a bitcast.
   void ReplaceWithBitcast(HloInstruction* instruction);
 
@@ -574,6 +601,116 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) {
   return Status::OK();
 }
 
+StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
+    HloInstruction* dot) {
+  HloInstruction* lhs = dot->mutable_operand(0);
+  HloInstruction* rhs = dot->mutable_operand(1);
+  int64 lhs_collapsing_dim =
+      dot->dot_dimension_numbers().lhs_contracting_dimensions(0);
+  if (lhs->IsRank2Transpose()) {
+    lhs = lhs->mutable_operand(0);
+    lhs_collapsing_dim = 1 - lhs_collapsing_dim;
+  }
+  const int64 lhs_kept_dim = 1 - lhs_collapsing_dim;
+
+  int64 rhs_collapsing_dim =
+      dot->dot_dimension_numbers().rhs_contracting_dimensions(0);
+  if (rhs->IsRank2Transpose()) {
+    rhs = rhs->mutable_operand(0);
+    rhs_collapsing_dim = 1 - rhs_collapsing_dim;
+  }
+  const int64 rhs_kept_dim = 1 - rhs_collapsing_dim;
+
+  auto reshape_if_necessary = [&](HloInstruction* hlo) {
+    if (ShapeUtil::SameDimensions(hlo->shape(), dot->shape())) {
+      return hlo;
+    }
+    return computation_->AddInstruction(
+        HloInstruction::CreateReshape(dot->shape(), hlo));
+  };
+
+  auto broadcast_to_dim = [&](HloInstruction* hlo, const Shape& shape,
+                              int64 dim) {
+    return computation_->AddInstruction(
+        HloInstruction::CreateBroadcast(shape, hlo, {dim}));
+  };
+
+  auto multiply = [&](HloInstruction* local_lhs, HloInstruction* local_rhs) {
+    return computation_->AddInstruction(HloInstruction::CreateBinary(
+        local_lhs->shape(), HloOpcode::kMultiply, local_lhs, local_rhs));
+  };
+
+  // Strength reduce dot(a[K] , b[K]) =
+  //  reshape(result.shape,
+  //          reduce_sum(multiply(a, b), {0}))
+  if (ShapeUtil::Rank(rhs->shape()) == 1 &&
+      ShapeUtil::Rank(lhs->shape()) == 1) {
+    TF_RETURN_IF_ERROR(
+        ReplaceInstruction(dot, reshape_if_necessary(AddReduce(
+                                    multiply(Flatten(lhs), Flatten(rhs)), 0))));
+    return true;
+  }
+
+  if (ShapeUtil::IsEffectiveScalar(rhs->shape()) &&
+      ShapeUtil::IsEffectiveScalar(lhs->shape())) {
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, reshape_if_necessary(multiply(Flatten(lhs), Flatten(rhs)))));
+    return true;
+  }
+
+  // Simplify outer product into multiply with implicit broadcasting.
+  //
+  // A dot(a[M, 1], b[1, N]) = multiply(a [M,1], b [1, N])
+  if (ShapeUtil::Rank(rhs->shape()) == 2 &&
+      rhs->shape().dimensions(rhs_collapsing_dim) == 1) {
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, multiply(broadcast_to_dim(Flatten(lhs), dot->shape(), 0),
+                      broadcast_to_dim(Flatten(rhs), dot->shape(), 1))));
+    return true;
+  }
+
+  // Strength reduce dot(a[1, K], b) =
+  //    reshape(result.shape,
+  //      reduce_sum(
+  //        multiply(broadcast(reshape(a, [K]), {0}), b),
+  //        {0})
+  //      )
+  //    )
+  if (ShapeUtil::Rank(lhs->shape()) == 1 ||
+      (ShapeUtil::Rank(lhs->shape()) == 2 &&
+       lhs->shape().dimensions(lhs_kept_dim) == 1)) {
+    if (ShapeUtil::Rank(rhs->shape()) == 1) {
+      TF_RETURN_IF_ERROR(ReplaceInstruction(
+          dot,
+          reshape_if_necessary(AddReduce(multiply(Flatten(lhs), rhs), 0))));
+      return true;
+    }
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, reshape_if_necessary(
+                 AddReduce(multiply(broadcast_to_dim(Flatten(lhs), rhs->shape(),
+                                                     rhs_collapsing_dim),
+                                    rhs),
+                           rhs_collapsing_dim))));
+    return true;
+  }
+
+  // Strength reduce dot(a, b[K, 1]) =
+  //  reshape(result.shape,
+  //    reduce_sum(multiply(a, broadcast(reshape([K],b), {1})), {0})
+  //  )
+  if (ShapeUtil::Rank(rhs->shape()) == 1 ||
+      (ShapeUtil::Rank(rhs->shape()) == 2 &&
+       rhs->shape().dimensions(rhs_kept_dim) == 1)) {
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, reshape_if_necessary(AddReduce(
+                 multiply(lhs, broadcast_to_dim(Flatten(rhs), lhs->shape(),
+                                                lhs_collapsing_dim)),
+                 lhs_collapsing_dim))));
+    return true;
+  }
+  return false;
+}
+
 Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
   auto lhs = dot->mutable_operand(0);
   auto rhs = dot->mutable_operand(1);
@@ -595,6 +732,14 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
         dot, HloInstruction::CreateBroadcast(dot->shape(), zero, {}));
   }
 
+  if (enable_dot_strength_reduction_ && !is_layout_sensitive_) {
+    TF_ASSIGN_OR_RETURN(bool did_strength_reduction,
+                        HandleDotStrengthReduction(dot));
+    if (did_strength_reduction) {
+      return Status::OK();
+    }
+  }
+
   // Simplify dot(transpose(a), transpose(b)) to transpose(dot(b,a)).
   if (lhs->IsRank2Transpose() && rhs->IsRank2Transpose()) {
     DotDimensionNumbers dot_dimension_numbers;
@@ -608,106 +753,6 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
         dot, HloInstruction::CreateTranspose(dot->shape(), new_dot, {1, 0}));
   }
 
-  if (!enable_dot_strength_reduction_) {
-    return Status::OK();
-  }
-
-  // Simplify outer product into multiply with implicit broadcasting.
-  //
-  // A dot(a[M, 1], b[1, N]) = multiply(a [M,1], b [1, N])
-  if (ShapeUtil::Rank(rhs->shape()) == 2 && rhs->shape().dimensions(0) == 1) {
-    return ReplaceWithNewInstruction(
-        dot, HloInstruction::CreateBinary(dot->shape(), HloOpcode::kMultiply,
-                                          lhs, rhs));
-  }
-
-  // The following graph transformations take Dots where at least one input is a
-  // vector or has a degenerate dimension and converts it into a multiply and
-  // reduce. This should enable more fusion than leaving the nodes as Dot
-  // operations.
-
-  // Strength reduce dot(a[K] , b[K]) =
-  //  reshape(result.shape,
-  //          reduce_sum(multiply(a, b), {0}))
-  if (ShapeUtil::Rank(rhs->shape()) == 1 &&
-      ShapeUtil::Rank(lhs->shape()) == 1) {
-    auto multiply = computation_->AddInstruction(HloInstruction::CreateBinary(
-        rhs->shape(), HloOpcode::kMultiply, lhs, rhs));
-    HloComputation* add_reduce_computation = CreateScalarBinaryComputation(
-        computation_->parent(), F32, HloOpcode::kAdd);
-    auto zero = computation_->AddInstruction(
-        HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
-    auto reduce = computation_->AddInstruction(HloInstruction::CreateReduce(
-        ShapeUtil::MakeShape(dot->shape().element_type(), {}), multiply, zero,
-        {0}, add_reduce_computation));
-    return ReplaceWithNewInstruction(
-        dot, HloInstruction::CreateReshape(dot->shape(), reduce));
-  }
-
-  // Strength reduce dot(a[1, K], b) =
-  //    reshape(result.shape,
-  //      reduce_sum(
-  //        multiply(broadcast(reshape(a, [K]), {0}), b),
-  //        {0})
-  //      )
-  //    )
-  if (ShapeUtil::Rank(lhs->shape()) == 1 ||
-      (ShapeUtil::Rank(lhs->shape()) == 2 && lhs->shape().dimensions(0) == 1)) {
-    auto new_lhs = computation_->AddInstruction(HloInstruction::CreateReshape(
-        ShapeUtil::MakeShape(lhs->shape().element_type(),
-                             {ShapeUtil::ElementsIn(lhs->shape())}),
-        lhs));
-    HloComputation* add_reduce_computation = CreateScalarBinaryComputation(
-        computation_->parent(), F32, HloOpcode::kAdd);
-    auto zero = computation_->AddInstruction(
-        HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
-    HloInstruction* reduce;
-    if (ShapeUtil::Rank(rhs->shape()) == 1) {
-      auto multiply = computation_->AddInstruction(HloInstruction::CreateBinary(
-          rhs->shape(), HloOpcode::kMultiply, new_lhs, rhs));
-      reduce = computation_->AddInstruction(HloInstruction::CreateReduce(
-          ShapeUtil::MakeShape(dot->shape().element_type(), {}), multiply, zero,
-          {0}, add_reduce_computation));
-    } else {
-      new_lhs = computation_->AddInstruction(
-          HloInstruction::CreateBroadcast(rhs->shape(), new_lhs, {0}));
-      auto multiply = computation_->AddInstruction(HloInstruction::CreateBinary(
-          rhs->shape(), HloOpcode::kMultiply, new_lhs, rhs));
-
-      reduce = computation_->AddInstruction(HloInstruction::CreateReduce(
-          ShapeUtil::MakeShape(dot->shape().element_type(),
-                               {rhs->shape().dimensions(1)}),
-          multiply, zero, {0}, add_reduce_computation));
-    }
-    return ReplaceWithNewInstruction(
-        dot, HloInstruction::CreateReshape(dot->shape(), reduce));
-  }
-
-  // Strength reduce dot(a, b[K, 1]) =
-  //  reshape(result.shape,
-  //    reduce_sum(multiply(a, broadcast(reshape([K],b), {1})), {0})
-  //  )
-  if (ShapeUtil::Rank(rhs->shape()) == 1 ||
-      (ShapeUtil::Rank(rhs->shape()) == 2 && rhs->shape().dimensions(1) == 1)) {
-    auto new_rhs = computation_->AddInstruction(HloInstruction::CreateReshape(
-        ShapeUtil::MakeShape(rhs->shape().element_type(),
-                             {ShapeUtil::ElementsIn(rhs->shape())}),
-        rhs));
-    new_rhs = computation_->AddInstruction(
-        HloInstruction::CreateBroadcast(lhs->shape(), new_rhs, {1}));
-    auto multiply = computation_->AddInstruction(HloInstruction::CreateBinary(
-        lhs->shape(), HloOpcode::kMultiply, lhs, new_rhs));
-    HloComputation* add_reduce_computation = CreateScalarBinaryComputation(
-        computation_->parent(), F32, HloOpcode::kAdd);
-    auto zero = computation_->AddInstruction(
-        HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
-    auto reduce = computation_->AddInstruction(HloInstruction::CreateReduce(
-        ShapeUtil::MakeShape(dot->shape().element_type(),
-                             {lhs->shape().dimensions(0)}),
-        multiply, zero, {1}, add_reduce_computation));
-    return ReplaceWithNewInstruction(
-        dot, HloInstruction::CreateReshape(dot->shape(), reduce));
-  }
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 3d70505f6e..7462e397ff 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -2238,5 +2238,63 @@ TEST_F(AlgebraicSimplifierTest, TrivialDynamicUpdateSlice) {
               op::DynamicSlice(op::Parameter(), op::Parameter()));
 }
 
+class DotStrengthReductionTest
+    : public AlgebraicSimplifierTest,
+      public ::testing::WithParamInterface<
+          ::testing::tuple<int, int, int, bool, bool>> {};
+TEST_P(DotStrengthReductionTest, DotStrengthReduction) {
+  int m, k, n;
+  bool transpose_lhs, transpose_rhs;
+  std::tie(m, k, n, transpose_lhs, transpose_rhs) = GetParam();
+
+  Shape dot_shape = ShapeUtil::MakeShape(F32, {m, n});
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {m, k});
+  Shape transposed_lhs_shape = ShapeUtil::MakeShape(F32, {k, m});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {k, n});
+  Shape transposed_rhs_shape = ShapeUtil::MakeShape(F32, {n, k});
+  HloComputation::Builder builder(TestName());
+
+  auto lhs = builder.AddInstruction(HloInstruction::CreateParameter(
+      0, transpose_lhs ? transposed_lhs_shape : lhs_shape, "lhs"));
+  if (transpose_lhs) {
+    lhs = builder.AddInstruction(
+        HloInstruction::CreateTranspose(lhs_shape, lhs, {1, 0}));
+  }
+  auto rhs = builder.AddInstruction(HloInstruction::CreateParameter(
+      1, transpose_rhs ? transposed_rhs_shape : rhs_shape, "rhs"));
+  if (transpose_rhs) {
+    rhs = builder.AddInstruction(
+        HloInstruction::CreateTranspose(rhs_shape, rhs, {1, 0}));
+  }
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  builder.AddInstruction(
+      HloInstruction::CreateDot(dot_shape, lhs, rhs, dot_dnums));
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, simplifier.Run(module.get()));
+  const bool dot_should_be_transformed = m == 1 || k == 1 || n == 1;
+  const bool computation_should_be_modified =
+      dot_should_be_transformed || (transpose_lhs && transpose_rhs);
+  EXPECT_EQ(changed, computation_should_be_modified);
+  bool has_no_dot = true;
+  for (const auto& hlo : computation->instructions()) {
+    if (hlo->opcode() == HloOpcode::kDot) {
+      has_no_dot = false;
+      break;
+    }
+  }
+  EXPECT_EQ(has_no_dot, dot_should_be_transformed);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    DotStrengthReductionTestInstantiation, DotStrengthReductionTest,
+    ::testing::Combine(::testing::Values(1, 2), ::testing::Values(1, 2),
+                       ::testing::Values(1, 2), ::testing::Bool(),
+                       ::testing::Bool()));
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 0683cdbd8701e4e6a582db1e71d58fcad628e070 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 08:57:11 -0800
Subject: [PATCH 0860/1225] Enriching some C64 test coverage.

PiperOrigin-RevId: 178624364
---
 tensorflow/compiler/xla/literal_util.h        |  1 +
 tensorflow/compiler/xla/service/service.cc    |  3 +-
 .../compiler/xla/service/transfer_manager.h   | 12 +++-
 tensorflow/compiler/xla/shape_util.cc         |  8 +++
 tensorflow/compiler/xla/shape_util.h          |  4 ++
 .../compiler/xla/tests/dot_operation_test.cc  | 26 ++++++---
 tensorflow/compiler/xla/tests/tuple_test.cc   | 56 +++++++++++++++++++
 7 files changed, 100 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h
index 069d1b33ca..2981f9f875 100644
--- a/tensorflow/compiler/xla/literal_util.h
+++ b/tensorflow/compiler/xla/literal_util.h
@@ -99,6 +99,7 @@ class Literal {
     f16s_.clear();
     f32s_.clear();
     f64s_.clear();
+    c64s_.clear();
     tuple_literals_.clear();
   }
 
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 6696bdf329..fe6993db98 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -1081,7 +1081,8 @@ tensorflow::Status Service::TransferToServer(const TransferToServerRequest* arg,
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<ShapedBuffer> shaped_buffer,
       ShapedBuffer::Allocate(
-          shape, execute_backend_->memory_allocator(), master_device_ordinal,
+          execute_backend_->transfer_manager()->HostShapeToDeviceShape(shape),
+          execute_backend_->memory_allocator(), master_device_ordinal,
           [this](const Shape& shape) {
             return execute_backend_->transfer_manager()->GetByteSizeRequirement(
                 shape);
diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h
index fdc123e54e..be9b769ac8 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.h
+++ b/tensorflow/compiler/xla/service/transfer_manager.h
@@ -62,10 +62,18 @@ class TransferManager {
       perftools::gputools::StreamExecutor* executor, const Literal& literal,
       perftools::gputools::DeviceMemoryBase* region) = 0;
 
+  // Returns the shape of the on-device representation for the given shape on
+  // the host. This is intended for use with ShapedBuffer where buffers are
+  // pre-allocated by the host, e.g. TransferLiteralToDevice, without the user
+  // needing to consider device-specific behaviors.
+  virtual Shape HostShapeToDeviceShape(const Shape& host_shape) const {
+    return host_shape;
+  }
+
   // Transfers the data held in the given ShapedBuffer into the provided literal
   // using the provided executor. literal_shape will be the shape for the
-  // literal. The shape of the ShapedBuffer and literal_shape must be
-  // compatible, but need not have the same layout.
+  // literal. The shape of the ShapedBuffer and DeviceShape(literal_shape) must
+  // be compatible, but need not have the same layout.
   virtual StatusOr<std::unique_ptr<Literal>> TransferLiteralFromDevice(
       perftools::gputools::StreamExecutor* executor,
       const ShapedBuffer& device_buffer) = 0;
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 8845d6930e..fe5166643d 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -330,6 +330,14 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
   return MakeTupleShape(new_elements);
 }
 
+// Returns the shape of a real or imaginary component.
+/* static */ Shape ShapeUtil::ComplexComponentShape(
+    const Shape& complex_shape) {
+  CHECK(ElementIsComplex(complex_shape)) << HumanString(complex_shape);
+  return ChangeElementType(complex_shape, primitive_util::ComplexComponentType(
+                                              complex_shape.element_type()));
+}
+
 /* static */ bool ShapeUtil::ShapeIs(const Shape& shape,
                                      PrimitiveType element_type,
                                      std::initializer_list<int64> dimensions) {
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index df5b450438..666c7da697 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -351,6 +351,10 @@ class ShapeUtil {
   // shape. E.g. a tuple like (f32, s32, u32) would slice via 1,3 to (s32, u32).
   static Shape SliceTuple(const Shape& tuple, int64 start, int64 limit);
 
+  // Returns the shape of the real/imaginary components of the given complex
+  // shape.
+  static Shape ComplexComponentShape(const Shape& complex_shape);
+
   // Shorthand for testing whether a shape is of a given element type and
   // sequence of dimensions.
   //
diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index 680d790b57..aea72417a8 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc
@@ -334,9 +334,7 @@ XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1_FT) {
 }
 
 XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) {
-  constexpr bool kLhsRowMajor = false;
-  constexpr bool kRhsRowMajor = false;
-  TestSquareMatrixDot<float>(kLhsRowMajor, kRhsRowMajor);
+  TestSquareMatrixDot<float>(false, false);
 }
 
 XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFT) {
@@ -347,10 +345,24 @@ XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTF) {
   TestSquareMatrixDot<float>(true, false);
 }
 
-TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTT) {
-  constexpr bool kLhsRowMajor = true;
-  constexpr bool kRhsRowMajor = true;
-  TestSquareMatrixDot<float>(kLhsRowMajor, kRhsRowMajor);
+XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTT) {
+  TestSquareMatrixDot<float>(true, true);
+}
+
+XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorFF) {
+  TestSquareMatrixDot<complex64>(false, false);
+}
+
+XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorFT) {
+  TestSquareMatrixDot<complex64>(false, true);
+}
+
+XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorTF) {
+  TestSquareMatrixDot<complex64>(true, false);
+}
+
+XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorTT) {
+  TestSquareMatrixDot<complex64>(true, true);
 }
 
 XLA_TEST_F(DotOperationTest, SquareMatrixDotF64) {
diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc
index 5a012c93d6..65489cfff1 100644
--- a/tensorflow/compiler/xla/tests/tuple_test.cc
+++ b/tensorflow/compiler/xla/tests/tuple_test.cc
@@ -445,5 +445,61 @@ XLA_TEST_F(TupleTest, GetTupleElementOfNestedTuple) {
   ComputeAndCompareR1<float>(&builder, expected, arguments, ErrorSpec(1e-5));
 }
 
+XLA_TEST_F(TupleTest, ComplexTuples) {
+  ComputationBuilder builder(client_, TestName());
+  {
+    Shape c64r0 = ShapeUtil::MakeShape(C64, {});
+    Shape c64r1 = ShapeUtil::MakeShape(C64, {2});
+    Shape c64r2 = ShapeUtil::MakeShape(C64, {3, 2});
+    Shape arg0_shape = ShapeUtil::MakeTupleShape(
+        {c64r0, ShapeUtil::MakeTupleShape({c64r1, c64r2})});
+    auto input0 = builder.Parameter(0, arg0_shape, "input0");
+    auto t0 = builder.GetTupleElement(input0, 0);
+    auto t1 = builder.GetTupleElement(input0, 1);
+    auto t10 = builder.GetTupleElement(t1, 0);
+    auto t11 = builder.GetTupleElement(t1, 1);
+    auto sum = builder.Add(builder.Add(t10, t11, {1}), t0);
+    auto input1 = builder.Parameter(1, c64r1, "input1");
+    auto prod = builder.Mul(input1, sum, {1});
+    builder.Tuple({builder.Tuple({prod, sum}),
+                   builder.ConstantR0<complex64>({123, 456})});
+  }
+
+  std::unique_ptr<GlobalData> arg0 =
+      client_
+          ->TransferToServer(*Literal::MakeTuple(
+              {Literal::CreateR0<complex64>({1, 2}).get(),
+               Literal::MakeTuple(
+                   {Literal::CreateR1<complex64>({{10, 20}, {30, 40}}).get(),
+                    Literal::CreateR2<complex64>(
+                        {{{100, 200}, {300, 400}},
+                         {{1000, 2000}, {3000, 4000}},
+                         {{10000, 20000}, {30000, 40000}}})
+                        .get()})
+                   .get()}))
+          .ConsumeValueOrDie();
+  std::unique_ptr<GlobalData> arg1 =
+      client_
+          ->TransferToServer(*Literal::CreateR1<complex64>({{1, 2}, {1, -2}}))
+          .ConsumeValueOrDie();
+  auto sum = Literal::CreateR2<complex64>({{{111, 222}, {331, 442}},
+                                           {{1011, 2022}, {3031, 4042}},
+                                           {{10011, 20022}, {30031, 40042}}});
+  auto prod = Literal::CreateFromShape(sum->shape());
+  ASSERT_TRUE(prod->Populate<complex64>(
+                      [&sum](tensorflow::gtl::ArraySlice<int64> indexes) {
+                        return sum->Get<complex64>(indexes) *
+                               (indexes[indexes.size() - 1] == 0
+                                    ? complex64(1, 2)
+                                    : complex64(1, -2));
+                      })
+                  .ok());
+  auto expected =
+      Literal::MakeTuple({Literal::MakeTuple({prod.get(), sum.get()}).get(),
+                          Literal::CreateR0<complex64>({123, 456}).get()});
+  ComputeAndCompareTuple(&builder, *expected, {arg0.get(), arg1.get()},
+                         error_spec_);
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 93a652ef5b635ffbd678d3992767c4862bffeb15 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 11 Dec 2017 12:00:02 -0600
Subject: [PATCH 0861/1225] Add annotations support for tf.estimator.Estimator
 (#12276)

* Add annotations support for tf.estimator.Estimator
This fix adds annotations support for tf.estimator.Estimator
so that the following works in python 3:
```
import tensorflow as tf

def model_fn(features: dict, labels: tf.Tensor, mode: str):
    pass

estimator = tf.estimator.Estimator(model_fn)
```

This fix fixes 12249.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Update tf_inspect with getfullargspec

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/estimator/util.py  |  2 +-
 tensorflow/python/util/tf_inspect.py | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py
index 12f2592d84..b31486dfa1 100644
--- a/tensorflow/python/estimator/util.py
+++ b/tensorflow/python/estimator/util.py
@@ -52,7 +52,7 @@ def fn_args(fn):
   else:
     if _is_callable_object(fn):
       fn = fn.__call__
-    args = tf_inspect.getargspec(fn).args
+    args = tf_inspect.getfullargspec(fn).args
     if _is_bounded_method(fn):
       args.remove('self')
   return tuple(args)
diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py
index 9ed125704b..d14e710388 100644
--- a/tensorflow/python/util/tf_inspect.py
+++ b/tensorflow/python/util/tf_inspect.py
@@ -45,6 +45,26 @@ def getargspec(object):  # pylint: disable=redefined-builtin
                if d.decorator_argspec is not None), _inspect.getargspec(target))
 
 
+def getfullargspec(obj):  # pylint: disable=redefined-builtin
+  """TFDecorator-aware replacement for inspect.getfullargspec and fallback to
+  inspect.getargspec in Python 2.
+
+  Args:
+    obj: A callable, possibly decorated.
+
+  Returns:
+    The `FullArgSpec` (`ArgSpec` in Python 2) that describes the signature of
+    the outermost decorator that changes the callable's signature. If the
+    callable is not decorated, `inspect.getfullargspec()`
+    (`inspect.getargspec()` in Python 2) will be called directly on the
+    callable.
+  """
+  spec_fn = getattr(_inspect, 'getfullargspec', getattr(_inspect, 'getargspec'))
+  decorators, target = tf_decorator.unwrap(obj)
+  return next((d.decorator_argspec for d in decorators
+               if d.decorator_argspec is not None), spec_fn(target))
+
+
 def getcallargs(func, *positional, **named):
   """TFDecorator-aware replacement for inspect.getcallargs.
 
-- 
GitLab


From d423542a78257aa32966d6fc26915874803bc166 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 10:01:55 -0800
Subject: [PATCH 0862/1225] Remove using-directives

PiperOrigin-RevId: 178632103
---
 .../contrib/periodic_resample/ops/array_ops.cc |  4 +++-
 tensorflow/core/user_ops/fact.cc               | 18 +++++++++---------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops.cc b/tensorflow/contrib/periodic_resample/ops/array_ops.cc
index 6029ad6a0d..c90fc06c7f 100644
--- a/tensorflow/contrib/periodic_resample/ops/array_ops.cc
+++ b/tensorflow/contrib/periodic_resample/ops/array_ops.cc
@@ -19,7 +19,7 @@
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/shape_inference.h"
 
-using namespace tensorflow;
+namespace tensorflow {
 
 REGISTER_OP("PeriodicResample")
     .Attr("T: numbertype")
@@ -86,3 +86,5 @@ output: Periodically resampled tensor that has dimensions specified as in
   decreased as necessary.
 
 )doc");
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/user_ops/fact.cc b/tensorflow/core/user_ops/fact.cc
index c512275506..800008e0b8 100644
--- a/tensorflow/core/user_ops/fact.cc
+++ b/tensorflow/core/user_ops/fact.cc
@@ -18,27 +18,27 @@ limitations under the License.
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 
-using namespace tensorflow;
-
 REGISTER_OP("Fact")
     .Output("fact: string")
     .Doc(R"doc(
 Output a fact about factorials.
 )doc");
 
-class FactOp : public OpKernel {
+class FactOp : public tensorflow::OpKernel {
  public:
-  explicit FactOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit FactOp(tensorflow::OpKernelConstruction* context)
+      : OpKernel(context) {}
 
-  void Compute(OpKernelContext* context) override {
+  void Compute(tensorflow::OpKernelContext* context) override {
     // Output a scalar string.
-    Tensor* output_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, TensorShape(), &output_tensor));
+    tensorflow::Tensor* output_tensor = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                0, tensorflow::TensorShape(), &output_tensor));
+    using tensorflow::string;
     auto output = output_tensor->template scalar<string>();
 
     output() = "0! == 1";
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("Fact").Device(DEVICE_CPU), FactOp);
+REGISTER_KERNEL_BUILDER(Name("Fact").Device(tensorflow::DEVICE_CPU), FactOp);
-- 
GitLab


From 865bef39bcd563bac6216807bdd4dfa06647adf1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 10:15:59 -0800
Subject: [PATCH 0863/1225] Use the Snapshot kernel to force a copy of global
 step instead of the ugly "x + 0" hack.

PiperOrigin-RevId: 178634559
---
 tensorflow/python/training/training_util.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/training/training_util.py b/tensorflow/python/training/training_util.py
index 89a9e12932..2a42ff2003 100644
--- a/tensorflow/python/training/training_util.py
+++ b/tensorflow/python/training/training_util.py
@@ -23,6 +23,7 @@ from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_io
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
@@ -221,7 +222,6 @@ def _get_or_create_global_step_read(graph=None):
   global_step_tensor = get_global_step(graph)
   if global_step_tensor is None:
     return None
-  # add 'zero' so that it will create a copy of variable as Tensor.
   with graph.as_default() as g, g.name_scope(None):
     with g.name_scope(global_step_tensor.op.name + '/'):
       # using initialized_value to ensure that global_step is initialized before
@@ -229,7 +229,10 @@ def _get_or_create_global_step_read(graph=None):
       # under global_step_read_tensor dependency.
       global_step_value = global_step_tensor.initialized_value() if isinstance(
           global_step_tensor, variables.Variable) else global_step_tensor
-      global_step_read_tensor = global_step_value + 0
+      # pylint: disable=protected-access
+      # We use the snapshot kernel to make sure a copy is made of this tensor.
+      global_step_read_tensor = gen_array_ops._snapshot(global_step_value)
+      # pylint: enable=protected-access
       ops.add_to_collection(GLOBAL_STEP_READ_KEY, global_step_read_tensor)
   return _get_global_step_read(graph)
 
-- 
GitLab


From f022085110ff8eea0849c93c23269e923fa8a6a7 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Mon, 11 Dec 2017 10:29:43 -0800
Subject: [PATCH 0864/1225] [XLA] And window reversal to the parser.

PiperOrigin-RevId: 178636821
---
 tensorflow/compiler/xla/tools/parser/hlo_parser.cc |  6 ++++++
 .../compiler/xla/tools/parser/hlo_parser_test.cc   | 13 +++++++++++++
 tensorflow/compiler/xla/window_util.cc             | 14 ++++++++++++++
 tensorflow/compiler/xla/window_util.h              |  2 ++
 4 files changed, 35 insertions(+)

diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 2017648f01..457b655783 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -1653,6 +1653,7 @@ bool HloParser::ParseWindow(Window* window) {
   std::vector<std::vector<int64>> pad;
   std::vector<int64> lhs_dilate;
   std::vector<int64> rhs_dilate;
+  std::vector<int64> rhs_reversal;
   while (lexer_.GetKind() != TokKind::kRbrace) {
     LocTy attr_loc = lexer_.GetLoc();
     string field_name;
@@ -1675,6 +1676,9 @@ bool HloParser::ParseWindow(Window* window) {
       if (field_name == "pad") {
         return ParseWindowPad(&pad);
       }
+      if (field_name == "rhs_reversal") {
+        return ParseDxD("rhs_reversal", &rhs_reversal);
+      }
       return Error(loc, StrCat("unexpected attribute name: ", field_name));
     }();
     if (!ok) {
@@ -1711,6 +1715,8 @@ bool HloParser::ParseWindow(Window* window) {
         lhs_dilate.empty() ? 1 : lhs_dilate[i]);
     window->mutable_dimensions(i)->set_window_dilation(
         rhs_dilate.empty() ? 1 : rhs_dilate[i]);
+    window->mutable_dimensions(i)->set_window_reversal(
+        rhs_reversal.empty() ? false : (rhs_reversal[i] == 1));
   }
   return ParseToken(TokKind::kRbrace, "expected '}' to end window attribute");
 }
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 36b0c94327..7eebc5dc93 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -367,6 +367,19 @@ ENTRY %ConvolveR2.v3 (input: f32[1,2], filter: f32[1,1]) -> f32[1,2] {
   ROOT %convolution = f32[1,2]{0,1} convolution(f32[1,2]{1,0} %input, f32[1,1]{1,0} %filter), dim_labels=bf_io->bf
 }
 
+)"
+},
+// convolution backward
+{
+"ConvolutionBackward",
+R"(HloModule ConvolveBackward_module:
+
+ENTRY %ConvolveBackward (input: f32[128,7,7,512], filter: f32[3,3,512,512]) -> f32[128,14,14,512] {
+  %input = f32[128,7,7,512]{0,3,2,1} parameter(0)
+  %filter = f32[3,3,512,512]{3,2,1,0} parameter(1)
+  ROOT %convolution-base-dilated = f32[128,14,14,512]{0,3,2,1} convolution(f32[128,7,7,512]{0,3,2,1} %input, f32[3,3,512,512]{3,2,1,0} %filter), window={size=3x3 pad=1_2x1_2 lhs_dilate=2x2 rhs_reversal=1x1}, dim_labels=b01f_01oi->b01f
+}
+
 )"
 },
 // reverse(constant)
diff --git a/tensorflow/compiler/xla/window_util.cc b/tensorflow/compiler/xla/window_util.cc
index 2e0eba8de0..293f0781a2 100644
--- a/tensorflow/compiler/xla/window_util.cc
+++ b/tensorflow/compiler/xla/window_util.cc
@@ -88,6 +88,11 @@ string ToString(const Window& window) {
       return StrCat(dim.window_dilation());
     });
   }
+  if (HasWindowReversal(window)) {
+    add_field(" rhs_reversal", [](const WindowDimension& dim) {
+      return StrCat(dim.window_reversal() ? 1 : 0);
+    });
+  }
   return str;
 }
 
@@ -141,6 +146,15 @@ bool HasWindowDilation(const Window& window) {
   return false;
 }
 
+bool HasWindowReversal(const Window& window) {
+  for (const auto& dim : window.dimensions()) {
+    if (dim.window_reversal()) {
+      return true;
+    }
+  }
+  return false;
+}
+
 bool HasDilation(const Window& window) {
   return HasBaseDilation(window) || HasWindowDilation(window);
 }
diff --git a/tensorflow/compiler/xla/window_util.h b/tensorflow/compiler/xla/window_util.h
index 235cb2d59d..125900dac0 100644
--- a/tensorflow/compiler/xla/window_util.h
+++ b/tensorflow/compiler/xla/window_util.h
@@ -39,6 +39,8 @@ bool HasBaseDilation(const Window& window);
 bool HasWindowDilation(const Window& window);
 bool HasDilation(const Window& window);
 
+bool HasWindowReversal(const Window& window);
+
 // Returns the new bound after dilation.
 //
 // If a window with the given bound in some dimension is dilated with the given
-- 
GitLab


From bcdefca3364577ca9707ba118bddd2eec794558f Mon Sep 17 00:00:00 2001
From: Eli Bendersky <eliben@google.com>
Date: Mon, 11 Dec 2017 10:40:19 -0800
Subject: [PATCH 0865/1225] Fix-ups for XLA docs.

- Fix wording/grammar
- Remove obsolete "not implemented" notes on some ops

PiperOrigin-RevId: 178638405
---
 .../docs_src/performance/xla/broadcasting.md  | 10 +++----
 .../performance/xla/developing_new_backend.md |  6 ++--
 tensorflow/docs_src/performance/xla/index.md  | 17 ++++++-----
 .../performance/xla/operation_semantics.md    | 29 +++++++++----------
 4 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/tensorflow/docs_src/performance/xla/broadcasting.md b/tensorflow/docs_src/performance/xla/broadcasting.md
index 8dbf0d0446..ca3bddf758 100644
--- a/tensorflow/docs_src/performance/xla/broadcasting.md
+++ b/tensorflow/docs_src/performance/xla/broadcasting.md
@@ -33,11 +33,11 @@ In Numpy, this is called [broadcasting]
 
 ## Principles
 
-XLA is a low-level infrastructure with a XLA language this is as strict and
-explicit as possible, avoiding implicit and "magical" features that may make
-some computations slightly easier to define, at the cost of more assumptions
-baked into user code that will be difficult to change in the long term. If
-necessary, implicit and magical features can be added in client-level wrappers.
+The XLA language is as strict and explicit as possible, avoiding implicit and
+"magical" features. Such features may make some computations slightly easier to
+define, at the cost of more assumptions baked into user code that will be
+difficult to change in the long term. If necessary, implicit and magical
+features can be added in client-level wrappers.
 
 In regards to broadcasting, explicit broadcasting specifications on operations
 between arrays of different ranks is required. This is different from Numpy,
diff --git a/tensorflow/docs_src/performance/xla/developing_new_backend.md b/tensorflow/docs_src/performance/xla/developing_new_backend.md
index 28010ff1b7..74ea15bb2b 100644
--- a/tensorflow/docs_src/performance/xla/developing_new_backend.md
+++ b/tensorflow/docs_src/performance/xla/developing_new_backend.md
@@ -62,11 +62,11 @@ If it is not possible to utilize LLVM, then the best option is to implement a
 new backend for XLA for the desired hardware. This option requires the most
 effort. The classes that need to be implemented are as follows:
 
-*   [StreamExecutor](https://www.tensorflow.org/code/tensorflow/stream_executor/stream_executor.h):
+*   [`StreamExecutor`](https://www.tensorflow.org/code/tensorflow/stream_executor/stream_executor.h):
     For many devices not all methods of `StreamExecutor` are needed. See
     existing `StreamExecutor` implementations for details.
-*   [xla::Compiler](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/compiler.h):
-    This class encapsulates the compilation of a HLO computation into an
+*   [`xla::Compiler`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/compiler.h):
+    This class encapsulates the compilation of an HLO computation into an
     `xla::Executable`.
 *   [`xla::Executable`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/executable.h):
     This class is used to launch a compiled computation on the platform.
diff --git a/tensorflow/docs_src/performance/xla/index.md b/tensorflow/docs_src/performance/xla/index.md
index 19045b45d9..a884783074 100644
--- a/tensorflow/docs_src/performance/xla/index.md
+++ b/tensorflow/docs_src/performance/xla/index.md
@@ -65,18 +65,19 @@ The following diagram shows the compilation process in XLA:
   <img src="https://www.tensorflow.org/images/how-does-xla-work.png">
 </div>
 
-XLA comes with several optimizations and analyzes that are target-independent,
-such as [CSE](https://en.wikipedia.org/wiki/Common_subexpression_elimination),
+XLA comes with several optimizations and analysis passes that are
+target-independent, such as
+[CSE](https://en.wikipedia.org/wiki/Common_subexpression_elimination),
 target-independent operation fusion, and buffer analysis for allocating runtime
 memory for the computation.
 
 After the target-independent step, XLA sends the HLO computation to a backend.
-The backend can perform further HLO-level analyzes and optimizations, this time
-with target specific information and needs in mind. For example, the XLA GPU
-backend may perform operation fusion beneficial specifically for the GPU
-programming model and determine how to partition the computation into streams.
-At this stage, backends may also pattern-match certain operations or
-combinations thereof to optimized library calls.
+The backend can perform further HLO-level optimizations, this time with target
+specific information and needs in mind. For example, the XLA GPU backend may
+perform operation fusion beneficial specifically for the GPU programming model
+and determine how to partition the computation into streams. At this stage,
+backends may also pattern-match certain operations or combinations thereof to
+optimized library calls.
 
 The next step is target-specific code generation. The CPU and GPU backends
 included with XLA use [LLVM](http://llvm.org) for low-level IR, optimization,
diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
index 93d38a8438..d6f05f81bf 100644
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -16,9 +16,9 @@ and familiar names; for example a *vector* is a 1-dimensional array and a
 ## BatchNormGrad
 
 See also
-[`ComputationBuilder::BatchNormGrad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-<b> Warning: Not implemented yet. </b>
+[`ComputationBuilder::BatchNormGrad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h)
+and [the original batch normalization paper](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
 
 Calculates gradients of batch norm.
 
@@ -56,7 +56,7 @@ The three gradients are defined by the following formulas:
 The inputs `mean` and `variance` represents moments value
 across batch and spatial dimensions.
 
-The output type is a tuple of three ComputationDataHandles:
+The output type is a tuple of three handles:
 
 |Outputs       | Type                    | Semantics                           |
 |------------- | ----------------------- | ------------------------------------|
@@ -70,9 +70,9 @@ The output type is a tuple of three ComputationDataHandles:
 ## BatchNormInference
 
 See also
-[`ComputationBuilder::BatchNormInference`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-<b> Warning: Not implemented yet. </b>
+[`ComputationBuilder::BatchNormInference`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h) and
+[the original batch normalization paper](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
 
 Normalizes an array across batch and spatial dimensions.
 
@@ -92,7 +92,7 @@ Normalizes an array across batch and spatial dimensions.
 
 For each feature in the feature dimension (`feature_index` is the index for the
 feature dimension in `operand`), the operation calculates the mean and variance
-across all the other dimensions and use the mean and variance to normalize each
+across all the other dimensions and uses the mean and variance to normalize each
 element in `operand`. The `feature_index` must be a valid index for the feature
 dimension in `operand`.
 
@@ -101,7 +101,7 @@ computing `mean` and `variance` for each batch. It uses the input `mean` and
 `variance` instead as estimated values. The purpose of this op is to reduce
 latency in inference, hence the name `BatchNormInference`.
 
-The output is a n dimensional, normalized array with the same shape as input
+The output is an n-dimensional, normalized array with the same shape as input
 `operand`.
 
 ## BatchNormTraining
@@ -111,8 +111,6 @@ See also
 [`the original batch normalization paper`](https://arxiv.org/abs/1502.03167)
 for a detailed description of the algorithm.
 
-<b> Warning: Not implemented on GPU backend yet. </b>
-
 Normalizes an array across batch and spatial dimensions.
 
 <b> `BatchNormTraining(operand, scale, offset, epsilon, feature_index)` </b>
@@ -129,10 +127,9 @@ Normalizes an array across batch and spatial dimensions.
 | `feature_index` | `int64`                 | Index to feature dimension       |
 :                 :                         : in `operand`                     :
 
-
 For each feature in the feature dimension (`feature_index` is the index for the
 feature dimension in `operand`), the operation calculates the mean and variance
-across all the other dimensions and use the mean and variance to normalize each
+across all the other dimensions and uses the mean and variance to normalize each
 element in `operand`. The `feature_index` must be a valid index for the feature
 dimension in `operand`.
 
@@ -151,7 +148,7 @@ assuming `operand` is an 4 dimensional array):
 
 The epsilon value, usually a small number, is added to avoid divide-by-zero errors.
 
-The output type is a tuple of three ComputationDataHandles:
+The output type is a tuple of three `ComputationDataHandle`s:
 
 | Outputs      | Type                    | Semantics                            |
 | ------------ | ----------------------- | -------------------------------------|
@@ -588,9 +585,9 @@ Computes a sum across replicas.
 | `operand`    | `ComputationDataHandle` | Array to sum across replicas.      |
 
 The output shape is the same as the input shape. For example, if there are two
-replicas and the operand has the value `(1.0, 2.5)` and `(3.0, 5.1)`
+replicas and the operand has the value `(1.0, 2.5)` and `(3.0, 5.25)`
 respectively on the two replicas, then the output value from this op will be
-`(4.0, 7.6)` on both replicas.
+`(4.0, 7.75)` on both replicas.
 
 Computing the result of CrossReplicaSum requires having one input from each
 replica, so if one replica executes a CrossReplicaSum node more times than
-- 
GitLab


From f7837905c0a24b18841198863a64e8a878c53b44 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 11 Dec 2017 10:43:52 -0800
Subject: [PATCH 0866/1225] Make sure we serialize the creation and deletion of
 clusters from python to avoid race conditions. Added a cluster context
 manager. Only check that we have a unique SingleMachine running during the
 provisioning phase.

PiperOrigin-RevId: 178638954
---
 .../core/grappler/clusters/single_machine.cc  | 80 ++++++++++---------
 .../core/grappler/clusters/single_machine.h   |  1 +
 .../grappler/clusters/single_machine_test.cc  | 11 ++-
 .../grappler/costs/graph_properties_test.cc   |  5 +-
 tensorflow/python/grappler/cluster.i          |  4 +
 tensorflow/python/grappler/cluster.py         | 19 ++++-
 tensorflow/python/grappler/cluster_test.py    | 17 ++++
 7 files changed, 94 insertions(+), 43 deletions(-)

diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc
index ba27458104..b39d8c7526 100644
--- a/tensorflow/core/grappler/clusters/single_machine.cc
+++ b/tensorflow/core/grappler/clusters/single_machine.cc
@@ -31,20 +31,13 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-static std::atomic<bool> already_created(false);
+static std::atomic<bool> already_provisioned(false);
 
 SingleMachine::SingleMachine(int timeout_s, int num_cpu_cores, int num_gpus)
     : Cluster(timeout_s),
       num_gpus_(num_gpus),
       expected_init_time_s_(0),
       closing_(false) {
-  // This is really ugly: to avoid leaking variables, we need to reset the tf
-  // session every time we're done processing a grappler item. However,
-  // variables are global, and therefore we can't have more than 1 session alive
-  // at a time. This check detects when more that one cluster is created.
-  CHECK(!already_created);
-  already_created = true;
-
   VLOG(1) << "Number of CPU cores: " << num_cpu_cores
           << " Number of GPUs: " << num_gpus;
   thread_pool_.reset(new thread::ThreadPool(
@@ -71,17 +64,20 @@ SingleMachine::~SingleMachine() {
   // Reset the thread-pool so that there are no outstanding Session::Run(...)s
   // when we delete the session.
   thread_pool_.reset();
-
-  CHECK(already_created);
-  already_created = false;
 }
 
 Status SingleMachine::Provision() {
-  Status status = ResetSession();
-  if (!status.ok()) {
-    return status;
+  // This is really ugly: to avoid leaking variables, we need to reset the tf
+  // session every time we're done processing a grappler item. However,
+  // variables are global, and therefore we can't have more than 1 session alive
+  // at a time. This check detects when more that one cluster is provisioned.
+  if (already_provisioned) {
+    return errors::Unavailable(
+        "Can't provision more than one single cluster at a time");
   }
 
+  TF_RETURN_IF_ERROR(ResetSession());
+
   DeviceProperties attr = GetLocalCPUInfo();
   devices_["/job:localhost/replica:0/task:0/cpu:0"] = GetLocalCPUInfo();
 
@@ -92,8 +88,7 @@ Status SingleMachine::Provision() {
     VLOG(1) << "Adding GPU device " << device_name;
     devices_[device_name] = GetLocalGPUInfo(i);
   }
-  mutex_lock l(this->last_graph_mu_);
-  last_graph_ = nullptr;
+  already_provisioned = true;
   return Status::OK();
 }
 
@@ -110,27 +105,12 @@ Status SingleMachine::Initialize(const GrapplerItem& item) {
 }
 
 Status SingleMachine::Shutdown() {
-  TF_RETURN_IF_ERROR(CloseSession(true /*use_timeout*/));
+  TF_RETURN_IF_ERROR(ShutdownSession());
+
+  mutex_lock l(this->last_graph_mu_);
+  last_graph_ = nullptr;
+  already_provisioned = false;
 
-  // Delete the threadpool: this ensures that all the pending closures complete
-  // before we return. Note that if TF deadlocked on us, the closures will
-  // never complete, and the call to thread_pool_.reset() will never return:
-  // therefore we need to delete the threadpool with the background thread.
-  // That thread itself will also never complete, so the user should
-  // abort the process to avoid leaking too many resources.
-  auto n = std::make_shared<Notification>();
-  Env::Default()->SchedClosure([this, n]() {
-    thread_pool_.reset();
-    n->Notify();
-  });
-  int64 timeout_us = 1000000ll * timeout_s_;
-  const bool notified = WaitForNotificationWithTimeout(n.get(), timeout_us);
-  if (!notified) {
-    // Let the caller know that we can't shutdown the session properly since
-    // there are calls to Session::Run() still running.
-    return errors::Unavailable("The session is still running graphs after ",
-                               timeout_s_, " seconds");
-  }
   return Status::OK();
 }
 
@@ -276,12 +256,38 @@ Status SingleMachine::CloseSession(bool use_timeout) {
   return Status::OK();
 }
 
+Status SingleMachine::ShutdownSession() {
+  TF_RETURN_IF_ERROR(CloseSession(true /*use_timeout*/));
+
+  // Delete the threadpool: this ensures that all the pending closures complete
+  // before we return. Note that if TF deadlocked on us, the closures will
+  // never complete, and the call to thread_pool_.reset() will never return:
+  // therefore we need to delete the threadpool with the background thread.
+  // That thread itself will also never complete, so the user should
+  // abort the process to avoid leaking too many resources.
+  auto n = std::make_shared<Notification>();
+  Env::Default()->SchedClosure([this, n]() {
+    thread_pool_.reset();
+    n->Notify();
+  });
+  int64 timeout_us = 1000000ll * timeout_s_;
+  const bool notified = WaitForNotificationWithTimeout(n.get(), timeout_us);
+  if (!notified) {
+    // Let the caller know that we can't shutdown the session properly since
+    // there are calls to Session::Run() still running.
+    return errors::Unavailable("The session is still running graphs after ",
+                               timeout_s_, " seconds");
+  }
+
+  return Status::OK();
+}
+
 Status SingleMachine::ResetSession() {
   if (session_) {
     LOG(INFO) << "Cleaning up previous session";
 
     // Make sure the session is properly closed
-    TF_RETURN_IF_ERROR(Shutdown());
+    TF_RETURN_IF_ERROR(ShutdownSession());
 
     // Destroying the object deletes all its variables as well. This is only
     // true for DirectSession.
diff --git a/tensorflow/core/grappler/clusters/single_machine.h b/tensorflow/core/grappler/clusters/single_machine.h
index d3efbe3c61..be005a9509 100644
--- a/tensorflow/core/grappler/clusters/single_machine.h
+++ b/tensorflow/core/grappler/clusters/single_machine.h
@@ -49,6 +49,7 @@ class SingleMachine : public Cluster {
                         RunMetadata* run_metadata, int64 timeout_s);
   Status ResetSession();
   Status CloseSession(bool use_timeout);
+  Status ShutdownSession();
   void MergeCosts(CostGraphDef* graph_costs, const CostGraphDef& init_costs,
                   const CostGraphDef& queue_costs);
 
diff --git a/tensorflow/core/grappler/clusters/single_machine_test.cc b/tensorflow/core/grappler/clusters/single_machine_test.cc
index f6c325c2a4..df936efad1 100644
--- a/tensorflow/core/grappler/clusters/single_machine_test.cc
+++ b/tensorflow/core/grappler/clusters/single_machine_test.cc
@@ -48,6 +48,9 @@ class SingleMachineTest : public ::testing::Test {
   }
 
   void TearDown() override {
+    if (cluster_) {
+      TF_CHECK_OK(cluster_->Shutdown());
+    }
     cluster_.reset();
   }
 
@@ -178,8 +181,7 @@ TEST_F(SingleMachineTest, GraphOptimizations) {
   // With optimizations turned on, some nodes could have been optimized away,
   // and the cost model could be partial. Restart the cluster with optimizations
   // disabled and make sure we have all the information we're looking for.
-  cluster_.reset();
-  cluster_.reset(new SingleMachine(5, 3, 0));
+  TF_CHECK_OK(cluster_->Shutdown());
   cluster_->DisableOptimizer(true);
   TF_CHECK_OK(cluster_->Provision());
 
@@ -324,7 +326,7 @@ static void RunInfiniteTFLoop() {
 
 TEST_F(SingleMachineTest, InfiniteLoops) {
   // The RunInfiniteTFLoop function creates its own cluster.
-  cluster_.reset();
+  TF_CHECK_OK(cluster_->Shutdown());
 
   EXPECT_EXIT(RunInfiniteTFLoop(), ::testing::ExitedWithCode(0), ".*");
 }
@@ -578,7 +580,8 @@ TEST_F(SingleMachineTest, ReleaseMemoryAfterDestruction) {
   EXPECT_EQ(device_memory.size(), 1);
   EXPECT_GT(device_memory.begin()->second.bytes_in_use, 0);
 
-  // Reset cluster_ would release all memory.
+  // Shutting down the cluster_ would release all memory.
+  TF_CHECK_OK(cluster_->Shutdown());
   cluster_.reset();
   std::unordered_map<string, AllocatorStats> device_memory_after;
   TF_CHECK_OK(GetDeviceMemoryStats(options, &device_memory_after));
diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index a50ae0898c..5f2ac0c652 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -43,7 +43,10 @@ class GraphPropertiesTest : public ::testing::Test {
     TF_CHECK_OK(cluster_->Provision());
   }
 
-  void TearDown() override { cluster_.reset(); }
+  void TearDown() override {
+    TF_CHECK_OK(cluster_->Shutdown());
+    cluster_.reset();
+  }
 
  protected:
   // Returns a string form of <p>, suitable for comparing type and shape.
diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i
index 18fda345e6..c9bcfeb6e8 100644
--- a/tensorflow/python/grappler/cluster.i
+++ b/tensorflow/python/grappler/cluster.i
@@ -134,13 +134,17 @@ static GCluster TF_NewVirtualCluster(
   }
   tensorflow::grappler::Cluster*cluster_ =
       new tensorflow::grappler::VirtualCluster(devices);
+  PyGILState_STATE gstate = PyGILState_Ensure();
   tensorflow::Status status = cluster_->Provision();
+  PyGILState_Release(gstate);
   tensorflow::Set_TF_Status_from_Status(out_status, status);
   return GCluster(cluster_);
 }
 
 static void TF_ShutdownCluster(GCluster cluster) {
+  PyGILState_STATE gstate = PyGILState_Ensure();
   cluster->Shutdown();
+  PyGILState_Release(gstate);
 }
 
 tensorflow::Status _GetOpPerformanceDataAndRunTime(
diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py
index cf795fddb7..60e1322050 100644
--- a/tensorflow/python/grappler/cluster.py
+++ b/tensorflow/python/grappler/cluster.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import contextlib
+
 from tensorflow.core.framework import step_stats_pb2
 from tensorflow.core.grappler.costs import op_performance_data_pb2
 from tensorflow.core.protobuf import device_properties_pb2
@@ -56,9 +58,13 @@ class Cluster(object):
         self._tf_cluster = tf_cluster.TF_NewVirtualCluster(
             devices_serialized, status)
 
-  def __del__(self):
+  def Shutdown(self):
     if self._tf_cluster is not None:
       tf_cluster.TF_ShutdownCluster(self._tf_cluster)
+      self._tf_cluster = None
+
+  def __del__(self):
+    self.Shutdown()
 
   @property
   def tf_cluster(self):
@@ -108,3 +114,14 @@ class Cluster(object):
           item.tf_item, self._tf_cluster, status)
 
     return ret_from_swig
+
+
+@contextlib.contextmanager
+def Provision(allow_soft_placement=True,
+              disable_detailed_stats=True,
+              disable_timeline=True,
+              devices=None):
+  cluster = Cluster(allow_soft_placement, disable_detailed_stats,
+                    disable_timeline, devices)
+  yield cluster
+  cluster.Shutdown()
diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py
index f1f02963de..77dd55981b 100644
--- a/tensorflow/python/grappler/cluster_test.py
+++ b/tensorflow/python/grappler/cluster_test.py
@@ -106,6 +106,23 @@ class ClusterTest(test.TestCase):
       self.assertGreater(run_time, 0)
       self.assertEqual(len(op_perfs), 15)
 
+  def testContext(self):
+    with ops.Graph().as_default() as g:
+      a = random_ops.random_uniform(shape=())
+      b = random_ops.random_uniform(shape=())
+      c = a + b
+      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+      train_op.append(c)
+      mg = meta_graph.create_meta_graph_def(graph=g)
+      grappler_item = item.Item(mg)
+
+    with cluster.Provision(
+        disable_detailed_stats=False, disable_timeline=False) as gcluster:
+      op_perfs, run_time, step_stats = gcluster.MeasureCosts(grappler_item)
+      self.assertTrue(run_time > 0)
+      self.assertEqual(len(op_perfs), 10)
+      self.assertTrue(step_stats.dev_stats)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 0117c7a9fd9878b6a04f3affbee274cfd41138dd Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Mon, 11 Dec 2017 10:59:54 -0800
Subject: [PATCH 0867/1225] Use DataFormatVecPermute instead Gather, which is
 very slow.

PiperOrigin-RevId: 178641878
---
 .../grappler/optimizers/layout_optimizer.cc   | 136 +++---------------
 .../optimizers/layout_optimizer_test.cc       |  32 ++---
 .../python/grappler/layout_optimizer_test.py  |  40 +++++-
 3 files changed, 67 insertions(+), 141 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 7c177d8e98..196f00ba10 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -40,10 +40,8 @@ const char kPrefix[] = "LayoutOptimizer";
 const char kDataFormatOp[] = "LayoutOptimizerDataFormatOp";
 const char kPermNHWCToNCHW[] = "LayoutOptimizerPermConstNHWCToNCHW";
 const char kPermNCHWToNHWC[] = "LayoutOptimizerPermConstNCHWToNHWC";
-const char kGatherAxisConst[] = "LayoutOptimizerGatherAxisConst";
 const char kTransposeNHWCToNCHW[] = "LayoutOptimizerTransposeNHWCToNCHW";
 const char kTransposeNCHWToNHWC[] = "LayoutOptimizerTransposeNCHWToNHWC";
-const char kPermVecNHWCToNCHW[] = "LayoutOptimizerPermVecNHWCToNCHW";
 const char kReshapeNHWCToNCHW[] = "LayoutOptimizerReshapeNHWCToNCHW";
 const char kReshapeConst[] = "LayoutOptimizerReshapeConst";
 const char kReductionConst[] = "LayoutOptimizerReductionConst";
@@ -544,14 +542,15 @@ class NodeProcessor : public GraphProcessor {
     return const_node;
   }
 
-  void AddNodeDataFormatOp(const string& op, int input_pos) {
+  void AddNodeDataFormatOp(const string& op, int input_pos, DataType dtype) {
     NodeDef* added_node = graph_->add_node();
-    added_node->set_name(strings::StrCat(kDataFormatOp, "-", node_->name()));
+    added_node->set_name(
+        strings::StrCat(kDataFormatOp, "_", node_->name(), "_", input_pos));
     added_node->set_op(op);
     node_map_->AddNode(added_node->name(), added_node);
     added_node->set_device(node_->device());
     AttrValue attr_data_type;
-    attr_data_type.set_type(DT_INT32);
+    attr_data_type.set_type(dtype);
     added_node->mutable_attr()->insert({"T", attr_data_type});
     AttrValue attr_format;
     attr_format.set_s("NHWC");
@@ -756,7 +755,7 @@ class Conv2DBackpropInputProcessor : public Conv2DProcessor {
     if (IsConstant(*input_size_node)) {
       TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(0));
     } else {
-      AddNodeDataFormatOp("DataFormatVecPermute", 0);
+      AddNodeDataFormatOp("DataFormatVecPermute", 0, DT_INT32);
     }
     return Status::OK();
   }
@@ -1020,7 +1019,9 @@ class ConcatProcessor : public AgnosticNodeProcessor {
     if (IsConstant(*dim_node)) {
       TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(axis_node_pos_));
     } else {
-      AddNodeDataFormatOp("DataFormatDimMap", axis_node_pos_);
+      DataType dtype =
+          (IsSplit(*node_)) ? DT_INT32 : node_->attr().at("Tidx").type();
+      AddNodeDataFormatOp("DataFormatDimMap", axis_node_pos_, dtype);
     }
     return Status::OK();
   }
@@ -1103,107 +1104,13 @@ class SliceProcessor : public AgnosticNodeProcessor {
   Status CustomizedProcessing() override {
     // Skip the first input, which is the data to be sliced.
     for (int i = 1; i < node_->input_size(); i++) {
-      string base_name = strings::StrCat(node_->name(), "-input", i);
-      string node_name =
-          AddPrefixToNodeName(base_name, kPermVecNHWCToNCHW, "-");
-      TF_RETURN_IF_ERROR(HasAttribute(*node_, "Index"));
-      AddNodePermVec(node_name, node_->input(i), node_->device(),
-                     node_->attr().at("Index").type(), true);
-      node_map_->UpdateOutput(node_->input(i), node_->name(), node_name);
-      node_map_->AddOutput(node_name, node_->name());
-      *node_->mutable_input(i) = node_name;
-    }
-    return Status::OK();
-  }
-
- private:
-  NodeDef* AddNodeGatherAxisConst(const string& suffix,
-                                  const string& depended_node,
-                                  const string& device) {
-    auto const_node = AddNodeConstScalar(
-        strings::StrCat(kGatherAxisConst, "-", suffix), device, DT_INT32, 0);
-    // This is to ensure the Slice node and the const node are
-    // in the same frame.
-    *const_node->add_input() = AsControlDependency(depended_node);
-    return const_node;
-  }
-
-  string GetOrAddNodeGatherAxisConst() {
-    string const_name;
-    if (is_in_frame_) {
-      auto const_node = AddNodeGatherAxisConst(
-          node_->name(), NodeName(node_->input(0)), node_->device());
-      const_name = const_node->name();
-    } else {
-      const_name = kGatherAxisConst;
-    }
-    return const_name;
-  }
-
-  string GetOrAddNodePermNHWCToNCHW() {
-    string const_name;
-    if (is_in_frame_) {
-      auto const_node = AddNodePermNHWCToNCHW(
-          node_->name(), NodeName(node_->input(0)), node_->device());
-      const_name = const_node->name();
-    } else {
-      const_name = kPermNHWCToNCHW;
-    }
-    return const_name;
-  }
-
-  string GetOrAddNodePermNCHWToNHWC() {
-    string const_name;
-    if (is_in_frame_) {
-      auto const_node = AddNodePermNCHWToNHWC(
-          node_->name(), NodeName(node_->input(0)), node_->device());
-      const_name = const_node->name();
-    } else {
-      const_name = kPermNCHWToNHWC;
-    }
-    return const_name;
-  }
-
-  void AddNodePermVec(const string& node_name, const string& input_name,
-                      const string& device, DataType data_type,
-                      bool NHWCToNCHW) {
-    NodeDef* node = graph_->add_node();
-    node_map_->AddNode(node_name, node);
-    node->set_name(node_name);
-    node->set_device(device);
-    *node->add_input() = input_name;
-    *node->add_input() = NHWCToNCHW ? GetOrAddNodePermNHWCToNCHW()
-                                    : GetOrAddNodePermNCHWToNHWC();
-    *node->add_input() = GetOrAddNodeGatherAxisConst();
-    node->set_op("GatherV2");
-
-    AttrValue attr_type_indices;
-    attr_type_indices.set_type(DT_INT32);
-    node->mutable_attr()->insert({"Tindices", attr_type_indices});
-
-    AttrValue attr_type_axis;
-    attr_type_axis.set_type(DT_INT32);
-    node->mutable_attr()->insert({"Taxis", attr_type_axis});
-
-    AttrValue attr_type_params;
-    attr_type_params.set_type(data_type);
-    node->mutable_attr()->insert({"Tparams", attr_type_params});
-  }
-};
-
-// Specialized SliceProcessor, used if the second and third input are const
-// nodes, which could be the case if a constant folding pass is applied
-// before this optimization.
-class SliceProcessorConst : public AgnosticNodeProcessor {
- public:
-  explicit SliceProcessorConst(const OptimizeContext& opt_cxt)
-      : AgnosticNodeProcessor(opt_cxt) {}
-
- protected:
-  Status CustomizedProcessing() override {
-    // Skip the first input, which is the data to be sliced.
-    for (int i = 1; i < node_->input_size(); i++) {
-      TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(i));
+      auto index_node = node_map_->GetNode(node_->input(i));
+      if (IsConstant(*index_node)) {
+        TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(i));
+      } else {
+        AddNodeDataFormatOp("DataFormatVecPermute", i,
+                            node_->attr().at("Index").type());
+      }
     }
     return Status::OK();
   }
@@ -1332,10 +1239,6 @@ class DataLayoutOptimizer : GraphProcessor {
     return AddNodePermConst(kPermNCHWToNHWC, "", {0, 2, 3, 1});
   }
 
-  NodeDef* AddNodeGatherAxisConst() {
-    return AddNodeConstScalar(kGatherAxisConst, "", DT_INT32, 0);
-  }
-
   // Expand all nodes which is in NHWC, but supports NCHW or is layout agnostic.
   Status Expand() {
     int node_size_original = graph_->node_size();
@@ -1393,7 +1296,6 @@ class DataLayoutOptimizer : GraphProcessor {
     if (graph_->node_size() > node_size_original) {
       NodeDef* n = AddNodePermNHWCToNCHW();
       n = AddNodePermNCHWToNHWC();
-      n = AddNodeGatherAxisConst();
       std::set<string> ops_format_agnostic = GetOpsFormatAgnostic();
       for (int i = 0; i < graph_->node_size(); i++) {
         if (ops_format_agnostic.find(graph_->node(i).op()) !=
@@ -1415,13 +1317,7 @@ class DataLayoutOptimizer : GraphProcessor {
           } else if (IsReluGrad(*node)) {
             node_processor.reset(new ReluGradProcessor(opt_cxt));
           } else if (IsSlice(*node)) {
-            auto input1 = node_map_->GetNode(NodeName(node->input(1)));
-            auto input2 = node_map_->GetNode(NodeName(node->input(2)));
-            if (IsConstant(*input1) && IsConstant(*input2)) {
-              node_processor.reset(new SliceProcessorConst(opt_cxt));
-            } else {
-              node_processor.reset(new SliceProcessor(opt_cxt));
-            }
+            node_processor.reset(new SliceProcessor(opt_cxt));
           } else if (IsSplit(*node)) {
             node_processor.reset(new SplitProcessor(opt_cxt));
           } else if (IsSqueeze(*node)) {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 59796d38a9..6e1f47f0d3 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -198,9 +198,9 @@ TEST_F(LayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) {
   auto conv2d_backprop_node = node_map.GetNode("Conv2DBackpropInput");
   CHECK(conv2d_backprop_node);
   EXPECT_EQ(conv2d_backprop_node->input(0),
-            "LayoutOptimizerDataFormatOp-Conv2DBackpropInput");
+            "LayoutOptimizerDataFormatOp_Conv2DBackpropInput_0");
   auto input_sizes_node =
-      node_map.GetNode("LayoutOptimizerDataFormatOp-Conv2DBackpropInput");
+      node_map.GetNode("LayoutOptimizerDataFormatOp_Conv2DBackpropInput_0");
   CHECK(input_sizes_node);
   EXPECT_EQ(input_sizes_node->input(0), "InputSizesIdentity");
   EXPECT_EQ(input_sizes_node->op(), "DataFormatVecPermute");
@@ -561,9 +561,9 @@ TEST_F(LayoutOptimizerTest, SplitNonConstDim) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDataFormatOp-split");
+  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDataFormatOp_split_0");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto map_node = node_map.GetNode("LayoutOptimizerDataFormatOp-split");
+  auto map_node = node_map.GetNode("LayoutOptimizerDataFormatOp_split_0");
   EXPECT_EQ(map_node->op(), "DataFormatDimMap");
   EXPECT_EQ(map_node->input(0), "i1");
 }
@@ -629,8 +629,8 @@ TEST_F(LayoutOptimizerTest, ConcatNonConst) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDataFormatOp-concat");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDataFormatOp-concat");
+  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDataFormatOp_concat_2");
+  auto concat_dim = node_map.GetNode("LayoutOptimizerDataFormatOp_concat_2");
   EXPECT_EQ(concat_dim->op(), "DataFormatDimMap");
   EXPECT_EQ(concat_dim->input(0), "i");
 }
@@ -878,22 +878,14 @@ TEST_F(LayoutOptimizerTest, SliceNonConst) {
   NodeMap node_map(&output);
   auto slice_node = node_map.GetNode("slice");
   EXPECT_EQ(slice_node->input(0), "Conv2D");
-  EXPECT_EQ(slice_node->input(1),
-            "LayoutOptimizerPermVecNHWCToNCHW-slice-input1");
-  EXPECT_EQ(slice_node->input(2),
-            "LayoutOptimizerPermVecNHWCToNCHW-slice-input2");
-
-  auto perm1 =
-      node_map.GetNode("LayoutOptimizerPermVecNHWCToNCHW-slice-input1");
+  EXPECT_EQ(slice_node->input(1), "LayoutOptimizerDataFormatOp_slice_1");
+  EXPECT_EQ(slice_node->input(2), "LayoutOptimizerDataFormatOp_slice_2");
+  auto perm1 = node_map.GetNode("LayoutOptimizerDataFormatOp_slice_1");
+  EXPECT_EQ(perm1->op(), "DataFormatVecPermute");
   EXPECT_EQ(perm1->input(0), "ibegin");
-  EXPECT_EQ(perm1->input(1), "LayoutOptimizerPermConstNHWCToNCHW");
-  EXPECT_EQ(perm1->input(2), "LayoutOptimizerGatherAxisConst");
-
-  auto perm2 =
-      node_map.GetNode("LayoutOptimizerPermVecNHWCToNCHW-slice-input2");
+  auto perm2 = node_map.GetNode("LayoutOptimizerDataFormatOp_slice_2");
+  EXPECT_EQ(perm1->op(), "DataFormatVecPermute");
   EXPECT_EQ(perm2->input(0), "isize");
-  EXPECT_EQ(perm2->input(1), "LayoutOptimizerPermConstNHWCToNCHW");
-  EXPECT_EQ(perm2->input(2), "LayoutOptimizerGatherAxisConst");
 }
 
 TEST_F(LayoutOptimizerTest, DoNotApplyOptimizerTwice) {
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 749e6d8f59..831f182009 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -225,7 +225,45 @@ class LayoutOptimizerTest(test.TestCase):
       self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
                     nodes)
       self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-split-Sum-0', nodes)
-      self.assertIn('LayoutOptimizerDataFormatOp-split', nodes)
+      self.assertIn('LayoutOptimizerDataFormatOp_split_0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testSliceWithNonConstAxis(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      size = array_ops.placeholder(dtype='int32')
+      s = array_ops.slice(conv, [0, 0, 0, 0], size)
+      output = array_ops.identity(s)
+
+      size_val = [1, 2, 3, 4]
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={size: size_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                size: size_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if node.name.startswith('LayoutOptimizerTranspose'):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
+                    nodes)
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Slice-Identity-0',
+                    nodes)
+      self.assertIn('LayoutOptimizerDataFormatOp_Slice_2', nodes)
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
   def testLoop(self):
-- 
GitLab


From 1c46590d8b1431805e15e35beb98f24fe0a9dae5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 11:02:18 -0800
Subject: [PATCH 0868/1225] Fix incorrect parameter order in
 recall_at_precision.

PiperOrigin-RevId: 178642393
---
 tensorflow/contrib/metrics/python/ops/metric_ops.py    |  2 +-
 .../contrib/metrics/python/ops/metric_ops_test.py      | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index 6b08b749f8..2f27985634 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -2268,7 +2268,7 @@ def recall_at_precision(labels,
     thresholds = [0.0 - _EPSILON] + thresholds + [1.0 + _EPSILON]
 
     values, update_ops = _streaming_confusion_matrix_at_thresholds(
-        labels, predictions, thresholds, weights)
+        predictions, labels, thresholds, weights)
 
     recall = _compute_recall_at_precision(values['tp'], values['fp'],
                                           values['fn'], precision, 'value')
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
index 7db06609de..f05ae394e6 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
@@ -3162,7 +3162,7 @@ class RecallAtPrecisionTest(test.TestCase):
     labels = random_ops.random_uniform(
         (10, 3), maxval=2, dtype=dtypes_lib.int64, seed=2)
     recall, update_op = metrics.recall_at_precision(
-        predictions, labels, precision=0.7)
+        labels, predictions, precision=0.7)
 
     with self.test_session() as sess:
       sess.run(variables.local_variables_initializer())
@@ -3182,7 +3182,7 @@ class RecallAtPrecisionTest(test.TestCase):
     predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32)
     labels = constant_op.constant(inputs)
     recall, update_op = metrics.recall_at_precision(
-        predictions, labels, precision=1.0)
+        labels, predictions, precision=1.0)
 
     with self.test_session() as sess:
       sess.run(variables.local_variables_initializer())
@@ -3197,7 +3197,7 @@ class RecallAtPrecisionTest(test.TestCase):
         predictions_values, dtype=dtypes_lib.float32)
     labels = constant_op.constant(labels_values)
     recall, update_op = metrics.recall_at_precision(
-        predictions, labels, precision=0.8)
+        labels, predictions, precision=0.8)
 
     with self.test_session() as sess:
       sess.run(variables.local_variables_initializer())
@@ -3212,7 +3212,7 @@ class RecallAtPrecisionTest(test.TestCase):
         predictions_values, dtype=dtypes_lib.float32)
     labels = constant_op.constant(labels_values)
     recall, update_op = metrics.recall_at_precision(
-        predictions, labels, precision=0.4)
+        labels, predictions, precision=0.4)
 
     with self.test_session() as sess:
       sess.run(variables.local_variables_initializer())
@@ -3230,7 +3230,7 @@ class RecallAtPrecisionTest(test.TestCase):
     labels = constant_op.constant(labels_values)
     weights = constant_op.constant(weights_values)
     recall, update_op = metrics.recall_at_precision(
-        predictions, labels, weights=weights, precision=0.4)
+        labels, predictions, weights=weights, precision=0.4)
 
     with self.test_session() as sess:
       sess.run(variables.local_variables_initializer())
-- 
GitLab


From c4ef1b96f89b2a3dd9515caff90a6be0ece8496f Mon Sep 17 00:00:00 2001
From: Zhixian Yan <zhixianyan@google.com>
Date: Mon, 11 Dec 2017 11:11:33 -0800
Subject: [PATCH 0869/1225] Add hvx/nnapi supports for Tensorflow Lite
 benchmark

PiperOrigin-RevId: 178643959
---
 tensorflow/contrib/lite/nnapi_delegate.cc | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 6a199cc840..05853e853c 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -208,6 +208,11 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       add_scalar_float32(builtin->beta);
     };
 
+    auto add_space_to_depth_params = [&add_scalar_int32](void* data) {
+      auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(data);
+      add_scalar_int32(builtin->block_size);
+    };
+
 #if 0
     auto add_reshape_params = [&](void* data) {
       auto builtin = reinterpret_cast<TfLiteReshapeParams*>(data);
@@ -280,6 +285,10 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
         nn_op_type = ANEURALNETWORKS_RESHAPE;
         // add_reshape_params(node.builtin_data);
         break;
+      case tflite::BuiltinOperator_SPACE_TO_DEPTH:
+        add_space_to_depth_params(node.builtin_data);
+        nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
+        break;
       case tflite::BuiltinOperator_CONCAT_EMBEDDINGS:
       case tflite::BuiltinOperator_LSH_PROJECTION:
       case tflite::BuiltinOperator_SVDF:
@@ -295,7 +304,6 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       case tflite::BuiltinOperator_CALL:
       case tflite::BuiltinOperator_SKIP_GRAM:
       case tflite::BuiltinOperator_RELU1:
-      case tflite::BuiltinOperator_SPACE_TO_DEPTH:
         FATAL("Op code %d is currently not delegated to NNAPI", builtin);
         nn_op_type = -1;  // set to invalid
         break;
-- 
GitLab


From db3ab6c555e63069b471f46ed216d1b8d7f9f271 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 11:17:20 -0800
Subject: [PATCH 0870/1225] Adds remaining tests to _shared_embedding_column.

PiperOrigin-RevId: 178644910
---
 .../feature_column/feature_column_test.py     | 281 ++++++++++++++++--
 1 file changed, 257 insertions(+), 24 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py
index 3651c41720..019415857e 100644
--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -4350,25 +4350,256 @@ class SharedEmbeddingColumnTest(test.TestCase):
               dense_shape=[1, 2]),
           features['bbb'].eval())
 
-  def test_input_layer(self):
+  def test_transform_feature(self):
+    a = fc.categorical_column_with_identity(key='aaa', num_buckets=3)
+    b = fc.categorical_column_with_identity(key='bbb', num_buckets=3)
+    a_embedded, b_embedded = fc_lib._shared_embedding_columns(
+        [a, b], dimension=2)
+    features = {
+        'aaa': sparse_tensor.SparseTensor(
+            indices=((0, 0), (1, 0), (1, 1)),
+            values=(0, 1, 0),
+            dense_shape=(2, 2)),
+        'bbb': sparse_tensor.SparseTensor(
+            indices=((0, 0), (1, 0), (1, 1)),
+            values=(1, 2, 1),
+            dense_shape=(2, 2)),
+    }
+    outputs = _transform_features(features, [a, a_embedded, b, b_embedded])
+    output_a = outputs[a]
+    output_a_embedded = outputs[a_embedded]
+    output_b = outputs[b]
+    output_b_embedded = outputs[b_embedded]
+    with _initialized_session():
+      _assert_sparse_tensor_value(
+          self, output_a.eval(), output_a_embedded.eval())
+      _assert_sparse_tensor_value(
+          self, output_b.eval(), output_b_embedded.eval())
+
+  def test_get_dense_tensor(self):
+    # Inputs.
+    vocabulary_size = 3
+    # -1 values are ignored.
+    input_a = np.array(
+        [[2, -1, -1],  # example 0, ids [2]
+         [0, 1, -1]])  # example 1, ids [0, 1]
+    input_b = np.array(
+        [[0, -1, -1],  # example 0, ids [0]
+         [-1, -1, -1]])  # example 1, ids []
+    input_features = {
+        'aaa': input_a,
+        'bbb': input_b
+    }
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups_a = (
+        # example 0:
+        (7., 11.),  # ids [2], embedding = [7, 11]
+        # example 1:
+        (2., 3.5),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+    )
+    expected_lookups_b = (
+        # example 0:
+        (1., 2.),  # ids [0], embedding = [1, 2]
+        # example 1:
+        (0., 0.),  # ids [], embedding = [0, 0]
+    )
+
+    # Build columns.
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    embedding_column_a, embedding_column_b = fc_lib._shared_embedding_columns(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension, initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    embedding_lookup_a = embedding_column_a._get_dense_tensor(
+        _LazyBuilder(input_features))
+    embedding_lookup_b = embedding_column_b._get_dense_tensor(
+        _LazyBuilder(input_features))
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(
+        ('embedding_weights:0',), tuple([v.name for v in global_vars]))
+    embedding_var = global_vars[0]
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, embedding_var.eval())
+      self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
+      self.assertAllEqual(expected_lookups_b, embedding_lookup_b.eval())
+
+  def test_get_dense_tensor_placeholder_inputs(self):
+    # Inputs.
+    vocabulary_size = 3
+    # -1 values are ignored.
+    input_a = np.array(
+        [[2, -1, -1],  # example 0, ids [2]
+         [0, 1, -1]])  # example 1, ids [0, 1]
+    input_b = np.array(
+        [[0, -1, -1],  # example 0, ids [0]
+         [-1, -1, -1]])  # example 1, ids []
+    # Specify shape, because dense input must have rank specified.
+    input_a_placeholder = array_ops.placeholder(
+        dtype=dtypes.int64, shape=[None, 3])
+    input_b_placeholder = array_ops.placeholder(
+        dtype=dtypes.int64, shape=[None, 3])
+    input_features = {
+        'aaa': input_a_placeholder,
+        'bbb': input_b_placeholder,
+    }
+    feed_dict = {
+        input_a_placeholder: input_a,
+        input_b_placeholder: input_b,
+    }
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Build columns.
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    embedding_column_a, embedding_column_b = fc_lib._shared_embedding_columns(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension, initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    embedding_lookup_a = embedding_column_a._get_dense_tensor(
+        _LazyBuilder(input_features))
+    embedding_lookup_b = embedding_column_b._get_dense_tensor(
+        _LazyBuilder(input_features))
+
+    with _initialized_session() as sess:
+      sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict)
+
+  def test_linear_model(self):
+    # Inputs.
+    batch_size = 2
+    vocabulary_size = 3
+    # -1 values are ignored.
+    input_a = np.array(
+        [[2, -1, -1],  # example 0, ids [2]
+         [0, 1, -1]])  # example 1, ids [0, 1]
+    input_b = np.array(
+        [[0, -1, -1],  # example 0, ids [0]
+         [-1, -1, -1]])  # example 1, ids []
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_shape = (vocabulary_size, embedding_dimension)
+    zeros_embedding_values = np.zeros(embedding_shape)
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual(embedding_shape, shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return zeros_embedding_values
+
+    # Build columns.
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    embedding_column_a, embedding_column_b = fc_lib._shared_embedding_columns(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension, initializer=_initializer)
+
+    with ops.Graph().as_default():
+      predictions = fc.linear_model({
+          categorical_column_a.name: input_a,
+          categorical_column_b.name: input_b,
+      }, (embedding_column_a, embedding_column_b))
+      # Linear weights name should follow the column name.
+      # TODO(roumposg): Fix that.
+      expected_var_names = (
+          'linear_model/bias_weights:0',
+          'linear_model/aaa_bbb_shared_embedding/weights:0',
+          'linear_model/aaa_bbb_shared_embedding/embedding_weights:0',
+          'linear_model/aaa_bbb_shared_embedding_1/weights:0',
+      )
+      self.assertItemsEqual(
+          expected_var_names,
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+      trainable_vars = {
+          v.name: v for v in ops.get_collection(
+              ops.GraphKeys.TRAINABLE_VARIABLES)
+      }
+      self.assertItemsEqual(expected_var_names, trainable_vars.keys())
+      bias = trainable_vars['linear_model/bias_weights:0']
+      embedding_weights = trainable_vars[
+          'linear_model/aaa_bbb_shared_embedding/embedding_weights:0']
+      linear_weights_a = trainable_vars[
+          'linear_model/aaa_bbb_shared_embedding/weights:0']
+      linear_weights_b = trainable_vars[
+          'linear_model/aaa_bbb_shared_embedding_1/weights:0']
+      with _initialized_session():
+        # Predictions with all zero weights.
+        self.assertAllClose(np.zeros((1,)), bias.eval())
+        self.assertAllClose(zeros_embedding_values, embedding_weights.eval())
+        self.assertAllClose(
+            np.zeros((embedding_dimension, 1)), linear_weights_a.eval())
+        self.assertAllClose(
+            np.zeros((embedding_dimension, 1)), linear_weights_b.eval())
+        self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval())
+
+        # Predictions with all non-zero weights.
+        embedding_weights.assign((
+            (1., 2.),  # id 0
+            (3., 5.),  # id 1
+            (7., 11.)  # id 2
+        )).eval()
+        linear_weights_a.assign(((4.,), (6.,))).eval()
+        # example 0, ids [2], embedding[0] = [7, 11]
+        # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5]
+        # sum(embeddings * linear_weights)
+        # = [4*7 + 6*11, 4*2 + 6*3.5] = [94, 29]
+        linear_weights_b.assign(((3.,), (5.,))).eval()
+        # example 0, ids [0], embedding[0] = [1, 2]
+        # example 1, ids [], embedding[1] = 0, 0]
+        # sum(embeddings * linear_weights)
+        # = [3*1 + 5*2, 3*0 +5*0] = [13, 0]
+        self.assertAllClose([[94. + 13.], [29.]], predictions.eval())
+
+  def _test_input_layer(self, trainable=True):
     # Inputs.
     vocabulary_size = 3
     sparse_input_a = sparse_tensor.SparseTensorValue(
         # example 0, ids [2]
         # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(4, 5))
+        indices=((0, 0), (1, 0), (1, 4)),
+        values=(2, 0, 1),
+        dense_shape=(2, 5))
     sparse_input_b = sparse_tensor.SparseTensorValue(
         # example 0, ids [0]
         # example 1, ids []
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (3, 0)),
-        values=(0, 1),
-        dense_shape=(4, 5))
+        indices=((0, 0),),
+        values=(0,),
+        dense_shape=(2, 5))
 
     # Embedding variable.
     embedding_dimension = 2
@@ -4393,14 +4624,6 @@ class SharedEmbeddingColumnTest(test.TestCase):
         # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
         # B ids [], embedding = [0, 0]
         (2., 3.5, 0., 0.),
-        # example 2:
-        # A ids [], embedding = [0, 0]
-        # B ids [], embedding = [0, 0]
-        (0., 0., 0., 0.),
-        # example 3:
-        # A ids [1], embedding = [3, 5]
-        # B ids [1], embedding = [3, 5]
-        (3., 5., 3., 5.),
     )
 
     # Build columns.
@@ -4410,7 +4633,8 @@ class SharedEmbeddingColumnTest(test.TestCase):
         key='bbb', num_buckets=vocabulary_size)
     embedding_column_a, embedding_column_b = fc_lib._shared_embedding_columns(
         [categorical_column_a, categorical_column_b],
-        dimension=embedding_dimension, initializer=_initializer)
+        dimension=embedding_dimension, initializer=_initializer,
+        trainable=trainable)
 
     # Provide sparse input and get dense result.
     input_layer = fc.input_layer(
@@ -4423,17 +4647,26 @@ class SharedEmbeddingColumnTest(test.TestCase):
         ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
         tuple([v.name for v in global_vars]))
     trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-    self.assertItemsEqual(
-        ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
-        tuple([v.name for v in trainable_vars]))
+    if trainable:
+      self.assertItemsEqual(
+          ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
+          tuple([v.name for v in trainable_vars]))
+    else:
+      self.assertItemsEqual([], tuple([v.name for v in trainable_vars]))
     shared_embedding_vars = ops.get_collection('aaa_bbb_shared_embedding')
     self.assertItemsEqual(
         ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
         tuple([v.name for v in shared_embedding_vars]))
     with _initialized_session():
-      self.assertAllEqual(embedding_values, trainable_vars[0].eval())
+      self.assertAllEqual(embedding_values, shared_embedding_vars[0].eval())
       self.assertAllEqual(expected_lookups, input_layer.eval())
 
+  def test_input_layer(self):
+    self._test_input_layer()
+
+  def test_input_layer_no_trainable(self):
+    self._test_input_layer(trainable=False)
+
 
 class WeightedCategoricalColumnTest(test.TestCase):
 
-- 
GitLab


From bc871585ff433aeb3ade59c78b144716f2daf12d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 11:35:35 -0800
Subject: [PATCH 0871/1225] Optimized specializations for 3-channel depthwise
 with multiplier 2 and 4.

PiperOrigin-RevId: 178647824
---
 .../internal/optimized/depthwiseconv_float.h  | 73 +++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
index 974611f52a..da34c8aef9 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -311,6 +311,9 @@ struct FloatDepthwiseConvKernel<true, 0, 8> {
   }
 };
 
+// Note this implementation is very slow for input_depths < 8
+// (e.g. comparable to reference implementation) see, specializations for
+// input_depth=3 below.
 template <>
 struct FloatDepthwiseConvKernel<true, 0, 2> {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
@@ -417,6 +420,74 @@ struct FloatDepthwiseConvKernel<true, 0, 2> {
   }
 };
 
+template <>
+struct FloatDepthwiseConvKernel<true, 3, 2> {
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float* input_ptr, int input_ptr_increment,
+                  const float* filter_ptr, float* acc_buffer_ptr) {
+    // Load the filters
+    float32x2_t filter[3];
+    for (int i = 0; i < 3; i++) {
+      filter[i] = vld1_f32(filter_ptr + 2 * i);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++) {
+      const float32x2_t input01 = vld1_f32(input_ptr);
+      const float32x2_t input2 = vld1_dup_f32(input_ptr + 2);
+      // Load the accumulators from acc_buffer
+      float32x2_t acc[3];
+      for (int i = 0; i < 3; i++) {
+        acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+      }
+      // Multiply-accumulate for each input channel there 2 outputs
+      acc[0] = vmla_lane_f32(acc[0], filter[0], input01, 0);
+      acc[1] = vmla_lane_f32(acc[1], filter[1], input01, 1);
+      acc[2] = vmla_lane_f32(acc[2], filter[2], input2, 0);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 3; i++) {
+        vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+      }
+      acc_buffer_ptr += 6;
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 3, 4> {
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float* input_ptr, int input_ptr_increment,
+                  const float* filter_ptr, float* acc_buffer_ptr) {
+    // Load the filters
+    float32x4_t filter[3];
+    for (int i = 0; i < 3; i++) {
+      filter[i] = vld1q_f32(filter_ptr + 4 * i);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++) {
+      // NOTE: we only want 3 values, so we read it as two ops where
+      // the second op just duplicates the lane
+      const float32x2_t input01 = vld1_f32(input_ptr);
+      const float32x2_t input2 = vld1_dup_f32(input_ptr + 2);
+      // Load the accumulators from acc_buffer
+      float32x4_t acc[3];
+      for (int i = 0; i < 3; i++) {
+        acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate all outputs.
+      acc[0] = vmlaq_lane_f32(acc[0], filter[0], input01, 0);
+      acc[1] = vmlaq_lane_f32(acc[1], filter[1], input01, 1);
+      acc[2] = vmlaq_lane_f32(acc[2], filter[2], input2, 0);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 3; i++) {
+        vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 12;
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
 template <>
 struct FloatDepthwiseConvKernel<true, 1, 8> {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
@@ -857,6 +928,8 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
   TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8)
   TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32)
   TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 4)
   TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1)
 
   // Finally, the kernels allowing a variable input depth,
-- 
GitLab


From b4c447ae27f03663eac22ad878914d9e2b9aba17 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 11 Dec 2017 12:04:39 -0800
Subject: [PATCH 0872/1225] Be more conservative when optimizing full
 reductions

PiperOrigin-RevId: 178652323
---
 .../core/grappler/optimizers/constant_folding.cc      | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 90b796a613..7f2dcf6efc 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -396,6 +396,7 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs(
       (shape_node2->op() != "Shape" && !IsReallyConstant(*shape_node2))) {
     return Status::OK();
   }
+
   int64 min_id = 0;
   BCast::Vec shape1;
   if (!ExtractShape(*shape_node1, properties, &shape1, &min_id)) {
@@ -498,13 +499,19 @@ Status ConstantFolding::MaterializeReductionIndices(
   if (output_props.size() != 1) {
     return Status::OK();
   }
+  const bool keep_dims =
+      node->attr().count("keep_dims") && node->attr().at("keep_dims").b();
   const OpInfo::TensorProperties& output_prop = output_props[0];
   PartialTensorShape output_shape(output_prop.shape());
   if (output_shape.num_elements() != 1) {
     bool full_reduction = false;
     for (const NodeDef* fanout : node_map_->GetOutputs(node->name())) {
-      if (!IsReshape(*fanout)) {
-        continue;
+      if (!IsReshape(*fanout) && !keep_dims) {
+        // Depending on how it's setup, a full reduction will generate a tensor
+        // of shape [], [1], [1, 1], [1, 1, ...]. If keep_dims isn't true, we
+        // rely on the existence of a reshape node following the reduction to
+        // ensure that the fanout is fed a scalar of the right shape.
+        return Status::OK();
       }
       const std::vector<OpInfo::TensorProperties>& reshape_props =
           properties.GetOutputProperties(fanout->name());
-- 
GitLab


From 916b0d5cbea02e85e42a7ca677b594351db36547 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 12:24:41 -0800
Subject: [PATCH 0873/1225] Rename ABSL Macros

This LSC will rename ABSL macros. Most macro will be renamed with ABSL_ prefix.
Some might have completely new names. Please see the list of the macros
affected. For example, MUST_USE_RESULT will be renamed ABSL_MUST_USE_RESULT

The purpose of this LSC is to avoid name conflicts for the ABSL release. To see
the details go/absl-macros.

PiperOrigin-RevId: 178655181
---
 .../contrib/decision_trees/proto/generic_tree_model_proto.swig  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/decision_trees/proto/generic_tree_model_proto.swig b/tensorflow/contrib/decision_trees/proto/generic_tree_model_proto.swig
index d3d201afd5..cafb9314ca 100644
--- a/tensorflow/contrib/decision_trees/proto/generic_tree_model_proto.swig
+++ b/tensorflow/contrib/decision_trees/proto/generic_tree_model_proto.swig
@@ -2,7 +2,7 @@
 
 %include "net/proto/swig/protofunc.swig"
 
-#ifndef MUST_USE_RESULT
+#ifndef ABSL_MUST_USE_RESULT
 #error Use this file only as a %include or %import after google.swig.
 #endif
 
-- 
GitLab


From b2d9b8f6d10466789f2828933d01101b17576b3e Mon Sep 17 00:00:00 2001
From: Nathan Luehr <nluehr@nvidia.com>
Date: Fri, 1 Dec 2017 10:53:33 -0800
Subject: [PATCH 0874/1225] Fixed memory_stats_ops_test

Added explicit dependency to avoid matrix free prior to stats op execution.
---
 .../memory_stats/python/kernel_tests/memory_stats_ops_test.py  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py b/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py
index d1b430b803..02c2ac06fb 100644
--- a/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py
+++ b/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py
@@ -77,8 +77,9 @@ class MemoryStatsOpsTest(test_util.TensorFlowTestCase):
         bytes_in_use_op = memory_stats_ops.BytesInUse()
       with ops.control_dependencies([bytes_in_use_op]):
         b = random_ops.random_uniform(matrix_shape, dtype=dtype)
+        c = math_ops.matmul(a, b)
 
-      _, bytes_in_use, max_bytes_in_use = sess.run([a, bytes_in_use_op,
+      _, bytes_in_use, max_bytes_in_use = sess.run([c, bytes_in_use_op,
                                                     max_bytes_in_use_op])
 
       # intermediate result allocates 1 matrix, max usage is at least 2
-- 
GitLab


From dae1f7af9530b6f5ac752b6a55a3a2275550befc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 12:51:27 -0800
Subject: [PATCH 0875/1225] Add ReverseDFSFrom variant that works with const
 Node*.

PiperOrigin-RevId: 178658907
---
 tensorflow/core/graph/algorithm.cc | 41 ++++++++++++++++++++++--------
 tensorflow/core/graph/algorithm.h  |  4 +++
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/tensorflow/core/graph/algorithm.cc b/tensorflow/core/graph/algorithm.cc
index 6ef51aa7df..4652fbe406 100644
--- a/tensorflow/core/graph/algorithm.cc
+++ b/tensorflow/core/graph/algorithm.cc
@@ -83,13 +83,16 @@ void ReverseDFS(const Graph& g, const std::function<void(Node*)>& enter,
   ReverseDFSFrom(g, {g.sink_node()}, enter, leave, stable_comparator);
 }
 
-void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
-                    const std::function<void(Node*)>& enter,
-                    const std::function<void(Node*)>& leave,
-                    const NodeComparator& stable_comparator) {
+namespace {
+
+template <typename T>
+void ReverseDFSFromHelper(const Graph& g, gtl::ArraySlice<T> start,
+                          const std::function<void(T)>& enter,
+                          const std::function<void(T)>& leave,
+                          const NodeComparator& stable_comparator) {
   // Stack of work to do.
   struct Work {
-    Node* node;
+    T node;
     bool leave;  // Are we entering or leaving n?
   };
   std::vector<Work> stack(start.size());
@@ -102,7 +105,7 @@ void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
     Work w = stack.back();
     stack.pop_back();
 
-    Node* n = w.node;
+    T n = w.node;
     if (w.leave) {
       leave(n);
       continue;
@@ -117,7 +120,7 @@ void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
 
     gtl::iterator_range<NeighborIter> nodes = n->in_nodes();
 
-    auto add_work = [&visited, &stack](Node* out) {
+    auto add_work = [&visited, &stack](T out) {
       if (!visited[out->id()]) {
         // Note; we must not mark as visited until we actually process it.
         stack.push_back(Work{out, false});
@@ -125,22 +128,38 @@ void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
     };
 
     if (stable_comparator) {
-      std::vector<Node*> nodes_sorted;
-      for (Node* in : nodes) {
+      std::vector<T> nodes_sorted;
+      for (T in : nodes) {
         nodes_sorted.emplace_back(in);
       }
       std::sort(nodes_sorted.begin(), nodes_sorted.end(), stable_comparator);
-      for (Node* in : nodes_sorted) {
+      for (T in : nodes_sorted) {
         add_work(in);
       }
     } else {
-      for (Node* in : nodes) {
+      for (T in : nodes) {
         add_work(in);
       }
     }
   }
 }
 
+}  // namespace
+
+void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<const Node*> start,
+                    const std::function<void(const Node*)>& enter,
+                    const std::function<void(const Node*)>& leave,
+                    const NodeComparator& stable_comparator) {
+  ReverseDFSFromHelper(g, start, enter, leave, stable_comparator);
+}
+
+void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
+                    const std::function<void(Node*)>& enter,
+                    const std::function<void(Node*)>& leave,
+                    const NodeComparator& stable_comparator) {
+  ReverseDFSFromHelper(g, start, enter, leave, stable_comparator);
+}
+
 void GetPostOrder(const Graph& g, std::vector<Node*>* order,
                   const NodeComparator& stable_comparator) {
   order->clear();
diff --git a/tensorflow/core/graph/algorithm.h b/tensorflow/core/graph/algorithm.h
index 5bb6041d98..ac4a099013 100644
--- a/tensorflow/core/graph/algorithm.h
+++ b/tensorflow/core/graph/algorithm.h
@@ -69,6 +69,10 @@ extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
                            const std::function<void(Node*)>& enter,
                            const std::function<void(Node*)>& leave,
                            const NodeComparator& stable_comparator = {});
+extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<const Node*> start,
+                           const std::function<void(const Node*)>& enter,
+                           const std::function<void(const Node*)>& leave,
+                           const NodeComparator& stable_comparator = {});
 
 // Stores in *order the post-order numbering of all nodes
 // in graph found via a depth first search starting at the source node.
-- 
GitLab


From 96caf514b79d2d74c7e1b1db8e9452473f5d9522 Mon Sep 17 00:00:00 2001
From: Pete Warden <pete@petewarden.com>
Date: Mon, 11 Dec 2017 12:55:15 -0800
Subject: [PATCH 0876/1225] Switched optimization mode for Pi builds to avoid
 internal compiler error (#15244)

---
 tensorflow/tools/ci_build/pi/build_raspberry_pi.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 88116d9f24..1bd1852ffc 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -82,6 +82,7 @@ if [[ $1 == "PI_ONE" ]]; then
 else
   PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4
   --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
+  --copt=-O3
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8'
-- 
GitLab


From c290c47a82f571d0aa8527f35e5042c32248778b Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 11 Dec 2017 13:10:26 -0800
Subject: [PATCH 0877/1225] Always inline functions when creating an item.

PiperOrigin-RevId: 178661624
---
 tensorflow/python/grappler/item.i | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i
index 8f72a425c3..8f75b827b6 100644
--- a/tensorflow/python/grappler/item.i
+++ b/tensorflow/python/grappler/item.i
@@ -80,6 +80,7 @@ static GItem TF_NewItem(
   tensorflow::grappler::ItemConfig cfg;
   cfg.ignore_user_placement = ignore_user_placement;
   cfg.ignore_colocation = ignore_colocation;
+  cfg.inline_functions = true;
   std::unique_ptr<tensorflow::grappler::GrapplerItem> item =
       tensorflow::grappler::GrapplerItemFromMetaGraphDef("item", meta_graph, cfg);
   if (!item) {
-- 
GitLab


From 0afb4bbcbb757fa20d99d2a7d74f88622f93c9da Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Mon, 11 Dec 2017 13:14:01 -0800
Subject: [PATCH 0878/1225] Support all unary ops.

PiperOrigin-RevId: 178662178
---
 .../grappler/optimizers/layout_optimizer.cc   | 40 ++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 196f00ba10..e9112baaff 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -73,23 +73,61 @@ std::set<string> GetOpsFormatSupported() {
 std::set<string> GetOpsFormatAgnostic() {
   std::set<string> ops_format_agnostic = {"Add",
                                           "AddN",
+                                          "Acos",
+                                          "Acosh",
+                                          "Asin",
+                                          "Asinh",
+                                          "Atan",
+                                          "Atanh",
+                                          "Ceil",
+                                          "Cos",
+                                          "Cosh",
                                           "Concat",
                                           "ConcatV2",
+                                          "Digamma",
+                                          "Erf",
+                                          "Erfc",
+                                          "Exp",
+                                          "Expm1",
                                           "Floor",
                                           "Identity",
+                                          "Inv",
+                                          "InvGrad",
+                                          "IsFinite",
+                                          "IsInf",
+                                          "IsNan",
+                                          "Lgamma",
+                                          "Log",
+                                          "Log1p",
                                           "Mul",
                                           "Neg",
                                           "Pad",
                                           "RealDiv",
+                                          "Reciprocal",
+                                          "ReciprocalGrad",
                                           "Relu",
                                           "Relu6",
                                           "ReluGrad",
+                                          "Rint",
                                           "Sigmoid",
+                                          "SigmoidGrad",
+                                          "Sign",
+                                          "Sin",
+                                          "Sinh",
                                           "Slice",
                                           "Split",
+                                          "Round",
+                                          "Rsqrt",
+                                          "RsqrtGrad",
+                                          "Sqrt",
+                                          "SqrtGrad",
+                                          "Square",
                                           "SquaredDifference",
                                           "Squeeze",
-                                          /*"Sum",*/ "Sub"};
+                                          /*"Sum",*/ "Sub",
+                                          "Tan",
+                                          "Tanh",
+                                          "TanhGrad"};
   return ops_format_agnostic;
 }
 
-- 
GitLab


From c381794b2fc3227bfee9cf085e26bafb33da8f4b Mon Sep 17 00:00:00 2001
From: Xiaoqiang Zheng <zhengxq@google.com>
Date: Mon, 11 Dec 2017 13:14:17 -0800
Subject: [PATCH 0879/1225] Support different threading modes in GPU device.
 All modes are experimental for now. The goal is to find the best setting, and
 change the default to pick that.

PiperOrigin-RevId: 178662212
---
 .../core/common_runtime/direct_session.cc     | 22 ++++++++--
 .../core/common_runtime/direct_session.h      |  1 +
 .../core/common_runtime/gpu/gpu_device.cc     | 41 +++++++++++++++++++
 .../core/common_runtime/gpu/gpu_device.h      |  1 +
 tensorflow/core/framework/device_base.h       | 13 ++++++
 5 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 2d4f2a2d90..103b4b13c7 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -521,9 +521,7 @@ Status DirectSession::Run(const RunOptions& run_options,
 
   args.rendezvous = run_state.rendez;
   args.cancellation_manager = &step_cancellation_manager;
-  args.runner = [this, pool](Executor::Args::Closure c) {
-    SchedClosure(pool, std::move(c));
-  };
+
   args.session_state = &session_state_;
   args.tensor_store = &run_state.tensor_store;
   args.step_container = &run_state.step_container;
@@ -584,7 +582,24 @@ Status DirectSession::Run(const RunOptions& run_options,
     return errors::Cancelled("Run call was cancelled");
   }
 
+  Executor::Args::Runner default_runner = [this,
+                                           pool](Executor::Args::Closure c) {
+    SchedClosure(pool, std::move(c));
+  };
   for (const auto& item : executors_and_keys->items) {
+    // TODO(zhengxq): support partial run.
+    // TODO(zhengxq): support other session types.
+    // TODO(zhengxq): if the device picks its own threadpool, we need to assign
+    //     less threads to the main compute pool by default.
+    thread::ThreadPool* device_thread_pool =
+        item.device->tensorflow_device_thread_pool();
+    if (!device_thread_pool) {
+      args.runner = default_runner;
+    } else {
+      args.runner = [this, device_thread_pool](Executor::Args::Closure c) {
+        SchedClosure(device_thread_pool, std::move(c));
+      };
+    }
     item.executor->RunAsync(args, barrier->Get());
   }
 
@@ -1222,6 +1237,7 @@ Status DirectSession::GetOrCreateExecutors(
     // NewLocalExecutor takes ownership of partition_graph.
     item->graph = partition_graph.get();
     item->executor = nullptr;
+    item->device = device;
     Executor* executor;
     TF_RETURN_IF_ERROR(
         NewLocalExecutor(params, partition_graph.release(), &executor));
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index 780d0b46a8..ab768b97c4 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -112,6 +112,7 @@ class DirectSession : public Session {
   // every partition.
   struct PerPartitionExecutorsAndLib {
     Graph* graph = nullptr;                  // not owned.
+    Device* device = nullptr;                // not owned.
     FunctionLibraryRuntime* flib = nullptr;  // not owned.
     std::unique_ptr<Executor> executor;
   };
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 0fcea8ffd4..5664977833 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -60,6 +60,7 @@ limitations under the License.
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/session_options.h"
 #include "tensorflow/core/util/device_name_utils.h"
+#include "tensorflow/core/util/env_var.h"
 #include "tensorflow/core/util/stream_executor_util.h"
 
 namespace tensorflow {
@@ -305,6 +306,46 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
   gpu_device_info_->gpu_id = gpu_id_;
   set_tensorflow_gpu_device_info(gpu_device_info_);
 
+  // Whether and how the GPU device uses its own threadpool.
+  // This option is experimental. Once we confirm the best setting, we
+  // may change the default behavior and completely remove this flag.
+  // Default values might change in future releases.
+  // Possible values:
+  //   * global: GPU uses threads shared with CPU in the main compute
+  //          thread-pool. This is currently the default.
+  //   * gpu_private: GPU uses threads dedicated to this device.
+  //   * gpu_shared: All GPUs share a dedicated thread pool.
+  string gpu_thread_mode;
+  TF_RETURN_IF_ERROR(
+      ReadStringFromEnvVar("TF_GPU_THREAD_MODE", "global", &gpu_thread_mode));
+  gpu_thread_mode = str_util::Lowercase(gpu_thread_mode);
+  if (gpu_thread_mode != "global") {
+    int64 gpu_thread_count = -1;
+    // Default to two threads. One for device compute and another for memory
+    // copies.
+    TF_RETURN_IF_ERROR(
+        ReadInt64FromEnvVar("TF_GPU_THREAD_COUNT", 2, &gpu_thread_count));
+    if (gpu_thread_mode == "gpu_private") {
+      // TODO(zhengxq): since these threads only serve a single GPU device,
+      //   we should set the device context once for each thread, and avoid
+      //   setting them for each kernel.
+      // TODO(zhengxq): pin the thread to the same socket of the target GPU.
+      thread_pool_.reset(new thread::ThreadPool(
+          options.env, strings::StrCat("gpu_private_", gpu_id_),
+          static_cast<int32>(gpu_thread_count)));
+      set_tensorflow_device_thread_pool(thread_pool_.get());
+    } else if (gpu_thread_mode == "gpu_shared") {
+      static thread::ThreadPool* thread_pool = new thread::ThreadPool(
+          options.env, "gpu_shared", static_cast<int32>(gpu_thread_count));
+      set_tensorflow_device_thread_pool(thread_pool);
+    } else {
+      string error_message =
+          strings::StrCat("Invalid gpu_thread_mode: ", gpu_thread_mode);
+      LOG(WARNING) << error_message;
+      return errors::InvalidArgument(error_message);
+    }
+  }
+
   return Status::OK();
 }
 
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index 442496437a..4585d5b04d 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -116,6 +116,7 @@ class BaseGPUDevice : public LocalDevice {
   const bool sync_every_op_ = false;
   const int32 max_streams_;
   std::unique_ptr<EventMgr> em_;
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
 
   void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device,
                           int stream_id, Allocator* allocator);
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index 33bd5d250c..1838a8ad02 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -145,6 +145,12 @@ class DeviceBase {
     return gpu_device_info_;
   }
 
+  // The preferred thread pool for this device. If it is nullptr, the system
+  // automatically assigns a thread pool for execution.
+  virtual thread::ThreadPool* tensorflow_device_thread_pool() {
+    return device_thread_pool_;
+  }
+
   // Does not take ownership.
   void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d) {
     eigen_cpu_device_ = d;
@@ -215,10 +221,17 @@ class DeviceBase {
     return errors::Internal("Device does not implement MakeTensorFromProto()");
   }
 
+ protected:
+  // Does not take ownership.
+  void set_tensorflow_device_thread_pool(thread::ThreadPool* thread_pool) {
+    device_thread_pool_ = thread_pool;
+  }
+
  private:
   Env* const env_;
   CpuWorkerThreads* cpu_worker_threads_ = nullptr;
   GpuDeviceInfo* gpu_device_info_ = nullptr;
+  thread::ThreadPool* device_thread_pool_ = nullptr;
   Eigen::ThreadPoolDevice* eigen_cpu_device_ = nullptr;
 #ifdef TENSORFLOW_USE_SYCL
   Eigen::SyclDevice* eigen_sycl_device_ = nullptr;
-- 
GitLab


From a4b68b60985dfb518e748868dd2a316c53e8c413 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 13:38:28 -0800
Subject: [PATCH 0880/1225] Enable optimizations of operations with
 neutral/absorbing elements by default. We leave removal of addition and
 subtraction with zero out for now, since it is used as a "hack" to force a
 copy of a tensor in a few places. Once we have fixed this code, we can enable
 it.

PiperOrigin-RevId: 178665567
---
 .../core/grappler/optimizers/constant_folding.cc  | 15 +++++++--------
 tensorflow/python/grappler/cluster_test.py        |  4 ++--
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 7f2dcf6efc..cb9a5fde2e 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1304,6 +1304,7 @@ Status ConstantFolding::ReplaceOperationWithConstant(
 Status ConstantFolding::SimplifyGraph(GraphDef* output,
                                       const GraphProperties& properties,
                                       bool use_shape_info) {
+  const bool is_aggressive = opt_level_ == RewriterConfig::AGGRESSIVE;
   for (auto& node : *output->mutable_node()) {
     if (IsSimplifiableReduction(node)) {
       // Replace the reduction node with an identity node, that can be further
@@ -1321,8 +1322,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       *node.mutable_input(1) = AsControlDependency(node.input(1));
     }
     const bool safe_to_use_shapes =
-        use_shape_info &&
-        (feed_nodes_.empty() || opt_level_ == RewriterConfig::AGGRESSIVE);
+        use_shape_info && (feed_nodes_.empty() || is_aggressive);
     if (safe_to_use_shapes && IsSimplifiableReshape(node, properties)) {
       DataType output_type = node.attr().at("T").type();
       node.set_op("Identity");
@@ -1339,8 +1339,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
     bool is_add = IsAdd(node) || IsBiasAdd(node);
     bool is_sub = IsSub(node);
     bool is_div = IsAnyDiv(node);
-    if (opt_level_ == RewriterConfig::AGGRESSIVE && use_shape_info &&
-        (is_mul || is_matmul || is_add || is_sub || is_div) &&
+    if (use_shape_info && (is_mul || is_matmul || is_add || is_sub || is_div) &&
         properties.HasInputProperties(node.name()) &&
         properties.HasOutputProperties(node.name())) {
       const NodeDef* x = node_map_->GetNode(node.input(0));
@@ -1378,8 +1377,9 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       const bool y_is_zero = IsZeros(*y);
       const bool y_is_one = IsOnes(*y);
       const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
-      if (x_matches_output_shape && (((is_mul || is_div) && y_is_one) ||
-                                     ((is_add || is_sub) && y_is_zero))) {
+      if (x_matches_output_shape &&
+          (((is_mul || is_div) && y_is_one) ||
+           ((is_add || is_sub) && y_is_zero && is_aggressive))) {
         // x * 1 = x or x / 1 = x or x +/- 0 = x
         ReplaceOperationWithIdentity(0, &node);
         continue;
@@ -1388,8 +1388,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       // Simplify multiplication and matmul by zeros.
       // Also optimize zeros divided by a tensor, but only if we are in
       // aggressive mode, since we might get rid of divisions by zero.
-      bool optimize_zeros_divided_by_y =
-          is_div && x_is_zero && opt_level_ == RewriterConfig::AGGRESSIVE;
+      bool optimize_zeros_divided_by_y = is_div && x_is_zero && is_aggressive;
       if ((x_is_zero || y_is_zero) &&
           (is_mul || is_matmul || optimize_zeros_divided_by_y)) {
         const PartialTensorShape shp(output_shape);
diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py
index 77dd55981b..3ddcb741b5 100644
--- a/tensorflow/python/grappler/cluster_test.py
+++ b/tensorflow/python/grappler/cluster_test.py
@@ -43,7 +43,7 @@ class ClusterTest(test.TestCase):
       op_perfs, run_time, step_stats = grappler_cluster.MeasureCosts(
           grappler_item)
       self.assertTrue(run_time > 0)
-      self.assertEqual(len(op_perfs), 10)
+      self.assertEqual(len(op_perfs), 9)
       self.assertTrue(step_stats.dev_stats)
 
   def testNoDetailedStats(self):
@@ -120,7 +120,7 @@ class ClusterTest(test.TestCase):
         disable_detailed_stats=False, disable_timeline=False) as gcluster:
       op_perfs, run_time, step_stats = gcluster.MeasureCosts(grappler_item)
       self.assertTrue(run_time > 0)
-      self.assertEqual(len(op_perfs), 10)
+      self.assertEqual(len(op_perfs), 9)
       self.assertTrue(step_stats.dev_stats)
 
 
-- 
GitLab


From 80ac330044db0f5b1bcf95a48ce115bc92186f49 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 11 Dec 2017 13:43:25 -0800
Subject: [PATCH 0881/1225] [XLA:CPU] Error on unsupported dot instructions

This is a stopgap measure to avoid silently miscompiling dot operations.

PiperOrigin-RevId: 178666218
---
 tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index dd027986b2..c82a0c7ef4 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -808,6 +808,19 @@ Status IrEmitter::HandleDot(HloInstruction* dot) {
     return Unimplemented("Dot with batch dimensions not implemented.");
   }
 
+  if (dnums.lhs_contracting_dimensions_size() != 1) {
+    // This is disallowed by ShapeInference today.
+    return Unimplemented(
+        "Dot with multiple contracting dimensions not implemented.");
+  }
+
+  if (dnums.lhs_contracting_dimensions(0) !=
+          std::min(lhs->shape().dimensions_size() - 1, 1) ||
+      dnums.rhs_contracting_dimensions(0) != 0) {
+    return Unimplemented(
+        "Dot with non-standard contracting dimensions not implemented.");
+  }
+
   llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs));
   llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs));
 
-- 
GitLab


From db198b8618776731cc2871369f057b176488a1fe Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Mon, 11 Dec 2017 13:44:26 -0800
Subject: [PATCH 0882/1225] Tune SQLite

This change makes sure the b-tree page size isn't 1024 bytes. It also enables
WAL mode. This means TensorBoard can perform reads at the same time as
TensorFlow is performing writes.

We now also fsync() less often. This shouldn't carry any risk of database
corruption in WAL mode. Since WAL mode uses shared memory, writes become
immediately available to other processes, but they won't become durable until
after the OS decides to flush the FS cache.

This makes the DB writer faster than the file writer, at least in cases where
the DB is tiny. We probably make it go faster still, once we find a way to use
transactions.

Name                      Cold ?s   Average ?s  Flushing ?s       Size B
?i.i                        1,920           69            0            0
Scalar 1.0 FS               1,623          337        4,258       11,348
Scalar 1.0 TB FS            3,137          527        4,213       17,023
Scalar 2.0 FS               3,319          681        3,917       11,348
Scalar 2.0 DB               2,601          578          217      118,784
Tensor 1.0 FS 4             6,397          558        4,276       14,215
Tensor 2.0 FS 4             1,678          613        3,971       24,455
Tensor 2.0 DB 4             3,605          278          313      118,784
Tensor 1.0 FS 128           1,857          289        4,397       47,111
Tensor 2.0 FS 128           3,558          721       10,894       57,351
Tensor 2.0 DB 128           3,508          585          203      118,784
Tensor 1.0 FS 8192          2,677          525        4,400    2,119,816
Tensor 2.0 FS 8192          2,248          822        4,006    2,130,056
Tensor 2.0 DB 8192          4,346          370          449      126,976

PiperOrigin-RevId: 178666363
---
 tensorflow/core/kernels/summary_kernels.cc |  1 +
 tensorflow/core/lib/db/sqlite.cc           | 57 +++++++++++++++++++---
 tensorflow/core/lib/db/sqlite.h            | 14 ++++--
 3 files changed, 63 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc
index 7487e70acc..f092afe66c 100644
--- a/tensorflow/core/kernels/summary_kernels.cc
+++ b/tensorflow/core/kernels/summary_kernels.cc
@@ -67,6 +67,7 @@ class CreateSummaryDbWriterOp : public OpKernel {
     SummaryWriterInterface* s;
     auto db = Sqlite::Open(db_uri);
     OP_REQUIRES_OK(ctx, db.status());
+    db.ValueOrDie()->UseWriteAheadLogWithReducedDurabilityIfPossible();
     OP_REQUIRES_OK(
         ctx, CreateSummaryDbWriter(std::move(db.ValueOrDie()), experiment_name,
                                    run_name, user_name, ctx->env(), &s));
diff --git a/tensorflow/core/lib/db/sqlite.cc b/tensorflow/core/lib/db/sqlite.cc
index 701655f622..23361e6431 100644
--- a/tensorflow/core/lib/db/sqlite.cc
+++ b/tensorflow/core/lib/db/sqlite.cc
@@ -18,15 +18,36 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
+namespace {
+
+void ExecuteOrLog(Sqlite* db, const char* sql) {
+  Status s = db->Prepare(sql).StepAndReset();
+  if (!s.ok()) {
+    LOG(WARNING) << s.ToString();
+  }
+}
+
+string ExecuteOrEmpty(Sqlite* db, const char* sql) {
+  auto stmt = db->Prepare(sql);
+  bool is_done = false;
+  if (stmt.Step(&is_done).ok() && !is_done) {
+    return stmt.ColumnString(0);
+  }
+  return "";
+}
+
+}  // namespace
 
 /* static */
 xla::StatusOr<std::shared_ptr<Sqlite>> Sqlite::Open(const string& uri) {
   sqlite3* sqlite = nullptr;
-  Status s = MakeStatus(sqlite3_open(uri.c_str(), &sqlite));
-  if (s.ok()) {
-    return std::shared_ptr<Sqlite>(new Sqlite(sqlite));
-  }
-  return s;
+  TF_RETURN_IF_ERROR(MakeStatus(sqlite3_open(uri.c_str(), &sqlite)));
+  Sqlite* db = new Sqlite(sqlite, uri);
+  // This is the SQLite default since 2016. However it's good to set
+  // this anyway, since we might get linked against an older version of
+  // the library, and it's pretty much impossible to change later.
+  ExecuteOrLog(db, "PRAGMA page_size=4096");
+  return std::shared_ptr<Sqlite>(db);
 }
 
 /* static */ Status Sqlite::MakeStatus(int resultCode) {
@@ -75,7 +96,7 @@ xla::StatusOr<std::shared_ptr<Sqlite>> Sqlite::Open(const string& uri) {
   }
 }
 
-Sqlite::Sqlite(sqlite3* db) : db_(db) {}
+Sqlite::Sqlite(sqlite3* db, const string& uri) : db_(db), uri_(uri) {}
 
 Sqlite::~Sqlite() {
   // close_v2 doesn't care if a stmt hasn't been GC'd yet
@@ -97,6 +118,30 @@ Status Sqlite::Close() {
   return s;
 }
 
+void Sqlite::UseWriteAheadLogWithReducedDurabilityIfPossible() {
+  // TensorFlow summaries are intensively write-heavy, cf. most apps.
+  // This pragma loves writes and means that TensorBoard can read the
+  // database even as the training job inserts stuff. In other words,
+  // this makes SQLite almost as powerful as MySQL or PostgreSQL.
+  // https://www.sqlite.org/wal.html
+  string journal = ExecuteOrEmpty(this, "PRAGMA journal_mode=wal");
+  if (journal != "wal") {
+    LOG(WARNING) << "Failed to set journal_mode=wal because SQLite wants "
+                 << uri_ << " to be in '" << journal << "' mode, which might "
+                 << "be bad since WAL is important for the performance of "
+                 << "write-intensive apps. This might only happen for memory "
+                 << "databases or old versions of SQLite, but is definitely "
+                 << "worth fixing if that's not the case";
+  } else {
+    // This setting means we might lose transactions due to power loss,
+    // but the database can't become corrupted. In exchange, we get the
+    // the performance of a NoSQL database. This is a trade-off most data
+    // scientists would consider acceptable.
+    // https://www.sqlite.org/pragma.html#pragma_synchronous
+    ExecuteOrLog(this, "PRAGMA synchronous=NORMAL");
+  }
+}
+
 SqliteStatement Sqlite::Prepare(const string& sql) {
   sqlite3_stmt* stmt = nullptr;
   int rc = sqlite3_prepare_v2(db_, sql.c_str(), sql.size() + 1, &stmt, nullptr);
diff --git a/tensorflow/core/lib/db/sqlite.h b/tensorflow/core/lib/db/sqlite.h
index 774852efea..12840bd42b 100644
--- a/tensorflow/core/lib/db/sqlite.h
+++ b/tensorflow/core/lib/db/sqlite.h
@@ -15,7 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_DB_SQLITE_H_
 #define TENSORFLOW_CORE_LIB_DB_SQLITE_H_
 
-#include <stddef.h>
+#include <cstddef>
 #include <memory>
 #include <utility>
 
@@ -69,6 +69,13 @@ class Sqlite {
   /// beforehand. This is a no-op if already closed
   Status Close();
 
+  /// \brief Enables WAL mode with less fsync or log a warning.
+  ///
+  /// The synchronous pragma is only set to NORMAL if WAL mode was
+  /// successfully enabled. This must be called immediately after
+  /// creating the object.
+  void UseWriteAheadLogWithReducedDurabilityIfPossible();
+
   /// \brief Creates SQLite statement.
   ///
   /// Call result.status() to determine whether or not this operation
@@ -78,8 +85,9 @@ class Sqlite {
   SqliteStatement Prepare(const string& sql);
 
  private:
-  explicit Sqlite(sqlite3* db);
+  explicit Sqlite(sqlite3* db, const string& uri);
   sqlite3* db_;
+  string uri_;
   TF_DISALLOW_COPY_AND_ASSIGN(Sqlite);
 };
 
@@ -103,7 +111,7 @@ class SqliteStatement {
   SqliteStatement& operator=(SqliteStatement&& other);
 
   /// \brief Returns true if statement is not empty.
-  operator bool() const { return stmt_ != nullptr; }
+  explicit operator bool() const { return stmt_ != nullptr; }
 
   /// \brief Returns SQLite result code state.
   ///
-- 
GitLab


From dd77f385591c8b6ef7ab8dae7429c7eff7813a1e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 13:48:51 -0800
Subject: [PATCH 0883/1225] [XLA] Move BatchDot unrolling from TF2XLA bridge to
 AlgebraicSimplifier so that unrolling can be selectively enabled/disabled per
 backend (should be no performance change).

PiperOrigin-RevId: 178666990
---
 tensorflow/compiler/tf2xla/lib/batch_dot.cc   | 100 +++-------
 tensorflow/compiler/xla/service/BUILD         |  16 ++
 tensorflow/compiler/xla/service/cpu/BUILD     |   1 +
 .../compiler/xla/service/cpu/cpu_compiler.cc  |   3 +-
 .../compiler/xla/service/dot_decomposer.cc    | 185 ++++++++++++++++++
 .../compiler/xla/service/dot_decomposer.h     |  44 +++++
 tensorflow/compiler/xla/service/gpu/BUILD     |   1 +
 .../compiler/xla/service/gpu/gpu_compiler.cc  |   3 +-
 .../compiler/xla/service/shape_inference.cc   |  15 ++
 .../xla/service/shape_inference_test.cc       |   2 +-
 .../compiler/xla/tests/dot_operation_test.cc  |  20 --
 11 files changed, 299 insertions(+), 91 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/dot_decomposer.cc
 create mode 100644 tensorflow/compiler/xla/service/dot_decomposer.h

diff --git a/tensorflow/compiler/tf2xla/lib/batch_dot.cc b/tensorflow/compiler/tf2xla/lib/batch_dot.cc
index 28a5e6a58b..9b0e617447 100644
--- a/tensorflow/compiler/tf2xla/lib/batch_dot.cc
+++ b/tensorflow/compiler/tf2xla/lib/batch_dot.cc
@@ -27,7 +27,6 @@ namespace tensorflow {
 
 // The current implementation simply unrolls the computation along the batch
 // dimension.
-// TODO(andydavis): add batching support to XLA's Dot operator.
 xla::StatusOr<xla::ComputationDataHandle> BatchDot(
     xla::ComputationBuilder* builder, xla::ComputationDataHandle x,
     xla::ComputationDataHandle y, bool transpose_x, bool transpose_y) {
@@ -52,26 +51,20 @@ xla::StatusOr<xla::ComputationDataHandle> BatchDot(
 
   // The batch dimensions must be equal and the matrix dimensions must be
   // valid.
-  std::vector<int64> dimensions;
-  int64 batch_count = 1;
+  std::vector<int64> batch_dimension_numbers;
   for (int i = 0; i < ndims - 2; ++i) {
-    int64 x_size = x_shape->dimensions(i);
-    int64 y_size = y_shape->dimensions(i);
-    if (x_size != y_size) {
+    if (x_shape->dimensions(i) != y_shape->dimensions(i)) {
       return errors::InvalidArgument(
           "Dimension ", i, " of inputs to BatchedDot must be equal: ",
           xla::ShapeUtil::HumanString(*x_shape), " vs ",
           xla::ShapeUtil::HumanString(*y_shape));
     }
-    dimensions.push_back(x_size);
-    batch_count *= x_size;
+    batch_dimension_numbers.push_back(i);
   }
 
   int x_inner_dim = transpose_x ? (ndims - 2) : (ndims - 1);
   int y_inner_dim = transpose_y ? (ndims - 1) : (ndims - 2);
-  int64 x_inner_dim_size = x_shape->dimensions(x_inner_dim);
-  int64 y_inner_dim_size = y_shape->dimensions(y_inner_dim);
-  if (x_inner_dim_size != y_inner_dim_size) {
+  if (x_shape->dimensions(x_inner_dim) != y_shape->dimensions(y_inner_dim)) {
     return errors::InvalidArgument(
         "Dimensions ", x_inner_dim, " and ", y_inner_dim,
         " of arguments to BatchedDot must be equal: ",
@@ -80,19 +73,22 @@ xla::StatusOr<xla::ComputationDataHandle> BatchDot(
         " transpose: ", transpose_y);
   }
 
-  // If there are no batch dimensions, use a regular Dot. This case exists
-  // to improve the readability of the emitted graphs.
-  if (dimensions.empty()) {
-    auto lhs = transpose_x ? builder->Transpose(x, {1, 0}) : x;
-    auto rhs = transpose_y ? builder->Transpose(y, {1, 0}) : y;
-    return builder->Dot(lhs, rhs);
+  // Check for zero lhs/rhs dim size.
+  if (xla::ShapeUtil::HasZeroElements(*x_shape) ||
+      xla::ShapeUtil::HasZeroElements(*y_shape)) {
+    std::vector<int64> dimensions(batch_dimension_numbers.size());
+    for (int i = 0; i < batch_dimension_numbers.size(); ++i) {
+      dimensions[i] = x_shape->dimensions(batch_dimension_numbers[i]);
+    }
+    int x_outer_dim = transpose_x ? (ndims - 1) : (ndims - 2);
+    int y_outer_dim = transpose_y ? (ndims - 2) : (ndims - 1);
+    dimensions.push_back(x_shape->dimensions(x_outer_dim));
+    dimensions.push_back(y_shape->dimensions(y_outer_dim));
+    return builder->Broadcast(
+        builder->ConstantLiteral(xla::Literal::Zero(x_shape->element_type())),
+        dimensions);
   }
 
-  int x_outer_dim = transpose_x ? (ndims - 1) : (ndims - 2);
-  int y_outer_dim = transpose_y ? (ndims - 2) : (ndims - 1);
-  dimensions.push_back(x_shape->dimensions(x_outer_dim));
-  dimensions.push_back(y_shape->dimensions(y_outer_dim));
-
   if (x_shape->element_type() == xla::C64 && transpose_x) {
     x = builder->Conj(x);
   }
@@ -100,55 +96,23 @@ xla::StatusOr<xla::ComputationDataHandle> BatchDot(
     y = builder->Conj(y);
   }
 
-  // Reshape input tensors into 3D tensors by flattening the batch
-  // dimensions. This makes it easier to unroll the batch dimension.
-  auto x_flat =
-      builder->Reshape(x, {batch_count, x_shape->dimensions(ndims - 2),
-                           x_shape->dimensions(ndims - 1)});
-  auto y_flat =
-      builder->Reshape(y, {batch_count, y_shape->dimensions(ndims - 2),
-                           y_shape->dimensions(ndims - 1)});
-
-  // Slice batches into individual matrices and multiply them.
-  std::vector<xla::ComputationDataHandle> out_slices;
-  for (int64 i = 0; i < batch_count; ++i) {
-    // Slice off individual matrices and reshape to 2D tensors.
-    auto x_slice = builder->Slice(
-        x_flat, {i, 0, 0},
-        {i + 1, x_shape->dimensions(ndims - 2), x_shape->dimensions(ndims - 1)},
-        {1, 1, 1});
-    x_slice = builder->Reshape(x_slice, {x_shape->dimensions(ndims - 2),
-                                         x_shape->dimensions(ndims - 1)});
-    auto y_slice = builder->Slice(
-        y_flat, {i, 0, 0},
-        {i + 1, y_shape->dimensions(ndims - 2), y_shape->dimensions(ndims - 1)},
-        {1, 1, 1});
-    y_slice = builder->Reshape(y_slice, {y_shape->dimensions(ndims - 2),
-                                         y_shape->dimensions(ndims - 1)});
-
-    // Transpose if needed.
-    auto lhs = transpose_x ? builder->Transpose(x_slice, {1, 0}) : x_slice;
-    auto rhs = transpose_y ? builder->Transpose(y_slice, {1, 0}) : y_slice;
-
-    // Multiply matrices and add an outer singleton dimension to the output
-    // so we can concatenate along the flattened batch dimension later.
-    auto out = builder->Dot(lhs, rhs);
-    out = builder->Reshape(out,
-                           {1, dimensions[ndims - 2], dimensions[ndims - 1]});
-    out_slices.push_back(out);
+  // If there are no batch dimensions, use a regular Dot.
+  // TODO(b/69062148) Remove this code when Dot emitters can be passed
+  // dimensions to transpose directly (i.e. without requiring a Transpose HLO).
+  if (batch_dimension_numbers.empty()) {
+    auto lhs = transpose_x ? builder->Transpose(x, {1, 0}) : x;
+    auto rhs = transpose_y ? builder->Transpose(y, {1, 0}) : y;
+    return builder->Dot(lhs, rhs);
   }
 
-  // Concatenate output slices and reshape to original number of dimensions.
-  xla::ComputationDataHandle data;
-  if (out_slices.empty()) {
-    // It is illegal to pass an empty list to ConcatInDim.
-    // The batch count is empty, so both inputs must have zero elements.
-    // Arbitrarily use the left input as the argument to Reshape().
-    data = x;
-  } else {
-    data = builder->ConcatInDim(out_slices, 0);
+  xla::DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(x_inner_dim);
+  dot_dnums.add_rhs_contracting_dimensions(y_inner_dim);
+  for (auto batch_dimension_number : batch_dimension_numbers) {
+    dot_dnums.add_lhs_batch_dimensions(batch_dimension_number);
+    dot_dnums.add_rhs_batch_dimensions(batch_dimension_number);
   }
-  return builder->Reshape(data, dimensions);
+  return builder->DotGeneral(x, y, dot_dnums);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index baa4afde2d..179ab47ae7 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1145,6 +1145,22 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "dot_decomposer",
+    srcs = ["dot_decomposer.cc"],
+    hdrs = ["dot_decomposer.h"],
+    deps = [
+        ":hlo",
+        ":hlo_pass",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "tuple_simplifier",
     srcs = ["tuple_simplifier.cc"],
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index fe537dfdf2..b43597dca9 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -104,6 +104,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:buffer_assignment",
         "//tensorflow/compiler/xla/service:buffer_liveness",
         "//tensorflow/compiler/xla/service:call_inliner",
+        "//tensorflow/compiler/xla/service:dot_decomposer",
         "//tensorflow/compiler/xla/service:executable",
         "//tensorflow/compiler/xla/service:flatten_call_graph",
         "//tensorflow/compiler/xla/service:hlo",
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 44d80d75f5..55e7c7bc2c 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -62,6 +62,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/parallel_task_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
+#include "tensorflow/compiler/xla/service/dot_decomposer.h"
 #include "tensorflow/compiler/xla/service/flatten_call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -273,7 +274,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
   // TODO(b/65775800): Fix wrong output bug in Call and remove the CallInliner
   // pass.
   pipeline.AddPass<CallInliner>();
-
+  pipeline.AddPass<DotDecomposer>();
   pipeline.AddPass<ConvCanonicalization>();
   {
     auto& pass =
diff --git a/tensorflow/compiler/xla/service/dot_decomposer.cc b/tensorflow/compiler/xla/service/dot_decomposer.cc
new file mode 100644
index 0000000000..12faed6967
--- /dev/null
+++ b/tensorflow/compiler/xla/service/dot_decomposer.cc
@@ -0,0 +1,185 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/dot_decomposer.h"
+
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace xla {
+
+namespace {
+
+// TODO(b/69062148) Remove this code when all backends support BatchDot
+// natively.
+Status DecomposeBatchDot(HloInstruction* dot) {
+  auto computation = dot->parent();
+  const DotDimensionNumbers& dnums = dot->dot_dimension_numbers();
+  HloInstruction* lhs = dot->mutable_operand(0);
+  HloInstruction* rhs = dot->mutable_operand(1);
+  const Shape& lhs_shape = lhs->shape();
+  const Shape& rhs_shape = rhs->shape();
+  const Shape& dot_shape = dot->shape();
+
+  // ShapeInference should guarantee that lhs/rhs batch dimensions match.
+  CHECK_EQ(dnums.lhs_batch_dimensions_size(),
+           dnums.rhs_batch_dimensions_size());
+  const int64 num_batch_dims = dnums.lhs_batch_dimensions_size();
+  // Calculate total batch size (note that ShapeInference requires that
+  // the batch dimensions are most-major).
+  int64 batch_size = 1;
+  for (int i = 0; i < num_batch_dims; ++i) {
+    CHECK_EQ(lhs_shape.dimensions(dnums.lhs_batch_dimensions(i)),
+             rhs_shape.dimensions(dnums.rhs_batch_dimensions(i)));
+    batch_size *= lhs_shape.dimensions(dnums.lhs_batch_dimensions(i));
+  }
+
+  // Set lhs/rhs_transpose.
+  CHECK_EQ(1, dnums.lhs_contracting_dimensions_size());
+  const int64 lhs_contracting_dim_number = dnums.lhs_contracting_dimensions(0);
+  const bool lhs_transpose = (lhs_contracting_dim_number - num_batch_dims) == 0;
+
+  CHECK_EQ(1, dnums.rhs_contracting_dimensions_size());
+  const int64 rhs_contracting_dim_number = dnums.rhs_contracting_dimensions(0);
+  const bool rhs_transpose = (rhs_contracting_dim_number - num_batch_dims) == 1;
+
+  // Compute R3 and R3 shapes for lhs.
+  PrimitiveType lhs_type = lhs_shape.element_type();
+  const int64 lhs_rows = lhs_shape.dimensions(num_batch_dims + 0);
+  const int64 lhs_cols = lhs_shape.dimensions(num_batch_dims + 1);
+  Shape lhs_shape_r3 =
+      ShapeUtil::MakeShape(lhs_type, {batch_size, lhs_rows, lhs_cols});
+  Shape lhs_slice_shape_r3 =
+      ShapeUtil::MakeShape(lhs_type, {1, lhs_rows, lhs_cols});
+  Shape lhs_slice_shape_r2 =
+      ShapeUtil::MakeShape(lhs_type, {lhs_rows, lhs_cols});
+
+  // Compute R3 and R3 shapes for rhs.
+  PrimitiveType rhs_type = rhs_shape.element_type();
+  const int64 rhs_rows = rhs_shape.dimensions(num_batch_dims + 0);
+  const int64 rhs_cols = rhs_shape.dimensions(num_batch_dims + 1);
+  Shape rhs_shape_r3 =
+      ShapeUtil::MakeShape(rhs_type, {batch_size, rhs_rows, rhs_cols});
+  Shape rhs_slice_shape_r3 =
+      ShapeUtil::MakeShape(rhs_type, {1, rhs_rows, rhs_cols});
+  Shape rhs_slice_shape_r2 =
+      ShapeUtil::MakeShape(rhs_type, {rhs_rows, rhs_cols});
+
+  // Compute R3 and R3 shapes for dot output.
+  PrimitiveType dot_type = dot_shape.element_type();
+  const int64 dot_rows = dot_shape.dimensions(num_batch_dims + 0);
+  const int64 dot_cols = dot_shape.dimensions(num_batch_dims + 1);
+  Shape dot_shape_r2 = ShapeUtil::MakeShape(dot_type, {dot_rows, dot_cols});
+  Shape dot_shape_r3 = ShapeUtil::MakeShape(dot_type, {1, dot_rows, dot_cols});
+  Shape concat_shape_r3 =
+      ShapeUtil::MakeShape(dot_type, {batch_size, dot_rows, dot_cols});
+
+  // Reshape lhs/rhs into R3.
+  auto lhs_r3 = computation->AddInstruction(
+      HloInstruction::CreateReshape(lhs_shape_r3, lhs));
+  auto rhs_r3 = computation->AddInstruction(
+      HloInstruction::CreateReshape(rhs_shape_r3, rhs));
+
+  // Loop through batch size, slicing out required lhs/rhs to compute each Dot.
+  std::vector<HloInstruction*> output_slices(batch_size);
+  for (int64 i = 0; i < batch_size; ++i) {
+    // Slice R3 shape from 'lhs' and reshape to R2.
+    auto lhs_slice_r3 = computation->AddInstruction(
+        HloInstruction::CreateSlice(lhs_slice_shape_r3, lhs_r3, {i, 0, 0},
+                                    {i + 1, lhs_rows, lhs_cols}, {1, 1, 1}));
+    auto lhs_slice_r2 = computation->AddInstruction(
+        HloInstruction::CreateReshape(lhs_slice_shape_r2, lhs_slice_r3));
+
+    // Slice R3 shape from 'rhs' and reshape to R2.
+    auto rhs_slice_r3 = computation->AddInstruction(
+        HloInstruction::CreateSlice(rhs_slice_shape_r3, rhs_r3, {i, 0, 0},
+                                    {i + 1, rhs_rows, rhs_cols}, {1, 1, 1}));
+    auto rhs_slice_r2 = computation->AddInstruction(
+        HloInstruction::CreateReshape(rhs_slice_shape_r2, rhs_slice_r3));
+
+    // Transpose lhs/rhs (if needed).
+    if (lhs_transpose) {
+      Shape lhs_slice_shape_r2_transpose =
+          ShapeUtil::MakeShape(lhs_type, {lhs_cols, lhs_rows});
+      lhs_slice_r2 =
+          computation->AddInstruction(HloInstruction::CreateTranspose(
+              lhs_slice_shape_r2_transpose, lhs_slice_r2, {1, 0}));
+    }
+    if (rhs_transpose) {
+      Shape rhs_slice_shape_r2_transpose =
+          ShapeUtil::MakeShape(rhs_type, {rhs_cols, rhs_rows});
+      rhs_slice_r2 =
+          computation->AddInstruction(HloInstruction::CreateTranspose(
+              rhs_slice_shape_r2_transpose, rhs_slice_r2, {1, 0}));
+    }
+
+    // Compute Dot of lhs/rhs R2 slices.
+    DotDimensionNumbers dot_dnums;
+    dot_dnums.add_lhs_contracting_dimensions(1);
+    dot_dnums.add_rhs_contracting_dimensions(0);
+    auto dot_r2 = computation->AddInstruction(HloInstruction::CreateDot(
+        dot_shape_r2, lhs_slice_r2, rhs_slice_r2, dot_dnums));
+
+    // Reshape Dot to R3 so we can concat along batch dimension.
+    auto dot_r3 = computation->AddInstruction(
+        HloInstruction::CreateReshape(dot_shape_r3, dot_r2));
+
+    output_slices[i] = dot_r3;
+  }
+
+  // Concatenate slices from 'output_slices' along batch dimension.
+  auto concat = computation->AddInstruction(
+      HloInstruction::CreateConcatenate(concat_shape_r3, output_slices, 0));
+  // Reshape output 'new_dot' to original dimensions.
+  auto new_dot = computation->AddInstruction(
+      HloInstruction::CreateReshape(dot_shape, concat));
+
+  // Replace all uses of 'dot' in 'computation' with 'new_dot'.
+  return computation->ReplaceInstruction(dot, new_dot);
+}
+
+}  // namespace
+
+StatusOr<bool> DotDecomposer::Run(HloModule* module) {
+  XLA_VLOG_LINES(2, "DotDecomposer ENTRY\n" + module->ToString());
+  // Gather all batch Dot operations.
+  std::vector<HloInstruction*> batch_dots;
+  for (auto* computation : module->MakeNonfusionComputations()) {
+    for (auto* instruction : computation->instructions()) {
+      if (instruction->opcode() != HloOpcode::kDot) {
+        continue;
+      }
+      const DotDimensionNumbers& dnums = instruction->dot_dimension_numbers();
+      if (dnums.lhs_batch_dimensions_size() > 0 && decompose_batch_dot_) {
+        batch_dots.push_back(instruction);
+      }
+    }
+  }
+  // Decompose each batch Dot in 'batch_dots'.
+  bool changed = false;
+  for (auto* dot : batch_dots) {
+    TF_RETURN_IF_ERROR(DecomposeBatchDot(dot));
+    changed = true;
+  }
+  XLA_VLOG_LINES(2, "DotDecompose EXIT\n" + module->ToString());
+  return changed;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/dot_decomposer.h b/tensorflow/compiler/xla/service/dot_decomposer.h
new file mode 100644
index 0000000000..5ff0ab34ea
--- /dev/null
+++ b/tensorflow/compiler/xla/service/dot_decomposer.h
@@ -0,0 +1,44 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_DOT_DECOMPOSER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_DOT_DECOMPOSER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// DotDecomposer is a pass which decomposes batch Dot operations into a
+// sequence of smaller (R2) Dot operations.
+class DotDecomposer : public HloPassInterface {
+ public:
+  // Decomposes batch Dot operations when 'decompose_batch_dot' is true.
+  DotDecomposer(bool decompose_batch_dot = true)
+      : decompose_batch_dot_(decompose_batch_dot) {}
+  ~DotDecomposer() = default;
+  tensorflow::StringPiece name() const override { return "dot_decomposer"; }
+
+  // Run DotDecomposer pass on computations in 'module'.
+  // Returns whether the 'module' was changed.
+  StatusOr<bool> Run(HloModule* module) override;
+
+ private:
+  bool decompose_batch_dot_;
+};
+
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_DOT_DECOMPOSER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 8f59a4b8a5..4a72f87efd 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -449,6 +449,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:buffer_assignment",
         "//tensorflow/compiler/xla/service:buffer_liveness",
         "//tensorflow/compiler/xla/service:call_inliner",
+        "//tensorflow/compiler/xla/service:dot_decomposer",
         "//tensorflow/compiler/xla/service:executable",
         "//tensorflow/compiler/xla/service:flatten_call_graph",
         "//tensorflow/compiler/xla/service:hlo",
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 464e770b37..1ccfe323c5 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
+#include "tensorflow/compiler/xla/service/dot_decomposer.h"
 #include "tensorflow/compiler/xla/service/flatten_call_graph.h"
 #include "tensorflow/compiler/xla/service/gpu/convolution_folding.h"
 #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
@@ -137,7 +138,7 @@ tensorflow::Status OptimizeHloModule(
 
     // TODO(b/64094172): make Call work on GPU instead of inlining.
     pipeline.AddPass<CallInliner>();
-
+    pipeline.AddPass<DotDecomposer>();
     {
       auto& pass =
           pipeline.AddPass<HloPassFix<HloPassPipeline>>("simplification");
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 9c54654f0a..9c1b951d01 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -559,6 +559,8 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
 // Batch Dimensions:
 // *) Same number of batch dimensions on both lhs and rhs.
 // *) Same batch dimension numbers (and sizes) on both lhs and rhs.
+// *) Batch dimension numbers must be ordered before contracting and
+//    non-contracting/non-batch dimension numbers.
 //
 // Non-Contracting-Non-Batch Dimensions:
 // *) Can be 0 (matrix-vector) or 1 (matrix-matrix).
@@ -632,6 +634,19 @@ Status ValidateDotDimensionNumbers(
         "with rank ");
   }
 
+  // Check that batch dimension numbers are ordered before all others, and
+  // that they are monotonically increasing.
+  std::vector<int64> batch_dim_numbers(lhs_batch_dimensions.size());
+  std::iota(batch_dim_numbers.begin(), batch_dim_numbers.end(), 0);
+  if (!std::equal(batch_dim_numbers.begin(), batch_dim_numbers.end(),
+                  lhs_batch_dimensions.begin()) ||
+      !std::equal(batch_dim_numbers.begin(), batch_dim_numbers.end(),
+                  rhs_batch_dimensions.begin())) {
+    return InvalidArgument(
+        "batch dimension numbers must precede non-batch dimensions and be"
+        "monotonically increasing.");
+  }
+
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index 7af2805f12..99d87f3b55 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -1062,7 +1062,7 @@ TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimNumbersFails) {
       ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
   ASSERT_FALSE(inferred_status.ok());
   ASSERT_THAT(inferred_status.status().error_message(),
-              HasSubstr("batch dimension numbers and sizes must match"));
+              HasSubstr("batch dimension numbers must precede non-batch"));
 }
 
 // BatchMatMul with out-of-range dimension numbers fails.
diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index aea72417a8..2058cd04a5 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc
@@ -573,25 +573,5 @@ TEST_F(DotOperationTest, TransposeFolding) {
   }
 }
 
-XLA_TEST_F(DotOperationTest, DotGeneralUnimplemented) {
-  ComputationBuilder builder(client_, TestName());
-  auto lhs = builder.ConstantR3FromArray3D<float>(
-      {{{1.0, 2.0}, {3.0, 4.0}}, {{5.0, 6.0}, {7.0, 8.0}}});
-  auto rhs = builder.ConstantR3FromArray3D<float>(
-      {{{1.0, 0.0}, {0.0, 1.0}}, {{0.0, 1.0}, {1.0, 0.0}}});
-  DotDimensionNumbers dot_dnums;
-  dot_dnums.add_lhs_contracting_dimensions(2);
-  dot_dnums.add_rhs_contracting_dimensions(1);
-  dot_dnums.add_lhs_batch_dimensions(0);
-  dot_dnums.add_rhs_batch_dimensions(0);
-  builder.DotGeneral(lhs, rhs, dot_dnums);
-
-  auto status = Execute(&builder, {}).status();
-  EXPECT_FALSE(status.ok());
-  EXPECT_THAT(
-      status.error_message(),
-      ::testing::HasSubstr("Dot with batch dimensions not implemented."));
-}
-
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 037f036b2c76ef363148276dce83b7dd1d79e878 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 11 Dec 2017 14:41:17 -0800
Subject: [PATCH 0884/1225] Mark a FunctionDef's signature as stateful when it
 contains a stateful node.

This fixes a bug where two calls to the same stateful function will erroneously be eliminated as common subexpressions. It is also a step towards pruning nodes from function bodies, which is necessary for a variety of `Dataset` optimizations.

PiperOrigin-RevId: 178675527
---
 tensorflow/c/c_api_function.cc                |  5 +++
 tensorflow/c/c_api_function_test.cc           | 45 +++++++++++++++++++
 tensorflow/c/c_test_util.cc                   |  9 ++++
 tensorflow/c/c_test_util.h                    |  3 ++
 tensorflow/python/framework/function_test.py  | 42 +++++++++++++++++
 .../python/framework/graph_to_function_def.py |  7 +++
 6 files changed, 111 insertions(+)

diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index dcb818b88b..b9312c2974 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -226,6 +226,11 @@ Status FillFunctionBody(
       }
       node_def->add_input(strings::StrCat("^", normalized));
     }
+
+    // A function is stateful if any of its nodes are stateful.
+    if (node->op_def().is_stateful()) {
+      fdef->mutable_signature()->set_is_stateful(true);
+    }
   }
   return Status::OK();
 }
diff --git a/tensorflow/c/c_api_function_test.cc b/tensorflow/c/c_api_function_test.cc
index d5580b6589..4ffc9d6931 100644
--- a/tensorflow/c/c_api_function_test.cc
+++ b/tensorflow/c/c_api_function_test.cc
@@ -1482,6 +1482,51 @@ TEST_F(CApiFunctionTest, GetOpDef) {
   EXPECT_EQ(op_def.name(), func_name_);
   EXPECT_EQ(op_def.input_arg_size(), 1);
   EXPECT_EQ(op_def.output_arg_size(), 1);
+  EXPECT_FALSE(op_def.is_stateful());
+
+  TF_DeleteBuffer(buffer);
+}
+
+void DefineStatefulFunction(const char* name, TF_Function** func) {
+  std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)> func_graph(
+      TF_NewGraph(), TF_DeleteGraph);
+  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> s(TF_NewStatus(),
+                                                           TF_DeleteStatus);
+
+  TF_Tensor* tensor_shape = Int32Tensor({37, 1});
+  TF_Operation* shape = Const(tensor_shape, func_graph.get(), s.get(), "shape");
+  TF_Operation* random =
+      RandomUniform(shape, TF_FLOAT, func_graph.get(), s.get());
+
+  TF_Output inputs[] = {};
+  TF_Output outputs[] = {{random, 0}};
+  *func = TF_GraphToFunction(func_graph.get(), name, /*append_hash=*/0, -1,
+                             /*opers=*/nullptr, 0, inputs, 1, outputs,
+                             /*output_names=*/nullptr,
+                             /*opts=*/nullptr, "", s.get());
+  ASSERT_EQ(TF_OK, TF_GetCode(s.get())) << TF_Message(s.get());
+  ASSERT_NE(*func, nullptr);
+  TF_DeleteTensor(tensor_shape);
+}
+
+TEST_F(CApiFunctionTest, StatefulOpDef) {
+  DefineStatefulFunction(func_name_, &func_);
+  TF_GraphCopyFunction(host_graph_, func_, nullptr, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  // Test we can retrieve function OpDef from graph
+  TF_Buffer* buffer = TF_NewBuffer();
+  TF_GraphGetOpDef(host_graph_, func_name_, buffer, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  // Sanity check returned OpDef
+  string data(static_cast<const char*>(buffer->data), buffer->length);
+  OpDef op_def;
+  op_def.ParseFromString(data);
+  EXPECT_EQ(op_def.name(), func_name_);
+  EXPECT_EQ(op_def.input_arg_size(), 0);
+  EXPECT_EQ(op_def.output_arg_size(), 1);
+  EXPECT_TRUE(op_def.is_stateful());
 
   TF_DeleteBuffer(buffer);
 }
diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc
index c291a2e440..37439ff0be 100644
--- a/tensorflow/c/c_test_util.cc
+++ b/tensorflow/c/c_test_util.cc
@@ -193,6 +193,15 @@ TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph,
   return TF_FinishOperation(desc, s);
 }
 
+TF_Operation* RandomUniform(TF_Operation* shape, TF_DataType dtype,
+                            TF_Graph* graph, TF_Status* s) {
+  TF_OperationDescription* desc =
+      TF_NewOperation(graph, "RandomUniform", "random_uniform");
+  TF_AddInput(desc, {shape, 0});
+  TF_SetAttrType(desc, "dtype", dtype);
+  return TF_FinishOperation(desc, s);
+}
+
 void Split3Helper(TF_Operation* input, TF_Graph* graph, TF_Status* s,
                   const char* name, TF_Operation** op) {
   TF_Operation* zero = ScalarConst(
diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h
index d547337492..96a93afef3 100644
--- a/tensorflow/c/c_test_util.h
+++ b/tensorflow/c/c_test_util.h
@@ -74,6 +74,9 @@ TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s,
 
 TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s);
 
+TF_Operation* RandomUniform(TF_Operation* shape, TF_DataType dtype,
+                            TF_Graph* graph, TF_Status* s);
+
 // Split `input` along the first dimention into 3 tensors
 TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s,
                      const char* name = "split3");
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 11f343c579..8a7bf7a021 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -914,6 +914,48 @@ class FunctionTest(test.TestCase):
           np.array([1.0, 0.0]).astype(np.float32),
           sess.run(dinp, {inp: x}))
 
+  def testStatefulFunction(self):
+
+    @function.Defun()
+    def FunctionWithStatelessOp():
+      return constant_op.constant(42.0)
+
+    @function.Defun()
+    def FunctionWithStatefulOp():
+      return random_ops.random_uniform([100], maxval=10, dtype=dtypes.int32)
+
+    @function.Defun()
+    def FunctionWithStatelessFunctionCall():
+      return FunctionWithStatelessOp()
+
+    @function.Defun()
+    def FunctionWithStatefulFunctionCall():
+      return FunctionWithStatefulOp()
+
+    # Test that the `is_stateful` bit is propagated.
+    self.assertFalse(FunctionWithStatelessOp.definition.signature.is_stateful)
+    self.assertTrue(FunctionWithStatefulOp.definition.signature.is_stateful)
+    self.assertFalse(
+        FunctionWithStatelessFunctionCall.definition.signature.is_stateful)
+    self.assertTrue(
+        FunctionWithStatefulFunctionCall.definition.signature.is_stateful)
+
+    # Ensure that two invocations of the same random-number-generating
+    # function produce different results.
+    result1 = FunctionWithStatefulFunctionCall()
+    result2 = FunctionWithStatefulFunctionCall()
+
+    # Statefulness affects how the function is treated by the various
+    # optimization passes, so run the test in each optimizer
+    # configuration.
+    for config in _OptimizerOptions():
+      with session.Session(config=config) as sess:
+        val1, val2 = sess.run((result1, result2))
+        self.assertFalse(all(val1 == val2))
+        val3, val4 = sess.run((result1, result2))
+        self.assertFalse(all(val3 == val1))
+        self.assertFalse(all(val4 == val2))
+
 
 @test_util.with_c_api
 class FunctionsFromProtos(test.TestCase):
diff --git a/tensorflow/python/framework/graph_to_function_def.py b/tensorflow/python/framework/graph_to_function_def.py
index 448f87aa6e..625f31146b 100644
--- a/tensorflow/python/framework/graph_to_function_def.py
+++ b/tensorflow/python/framework/graph_to_function_def.py
@@ -110,6 +110,13 @@ def _add_op_node(op, func, input_dict):
                                                (node_def.input[i],
                                                 input_dict.items()))
       node_def.input[i] = input_dict[node_def.input[i]]
+  # The function is stateful if any of its operations are stateful.
+  # NOTE(mrry): The "Const" node typically does not have an `OpDef` associated
+  # with it, so we assume any nodes without an `OpDef` are stateless.
+  # TODO(skyewm): Remove the `is not None` test after we transition to the C
+  # API.
+  if op.op_def is not None and op.op_def.is_stateful:
+    func.signature.is_stateful = True
 
 
 def graph_to_function_def(graph, operations, inputs, outputs, out_names=None):
-- 
GitLab


From f86016ea29911d60643b0b606330ecbc792498d9 Mon Sep 17 00:00:00 2001
From: Justine Tunney <jart@google.com>
Date: Mon, 11 Dec 2017 14:41:46 -0800
Subject: [PATCH 0885/1225] Fix definition of tflite_smartreply

PiperOrigin-RevId: 178675580
---
 tensorflow/workspace.bzl | 20 ++++++++++----------
 third_party/repo.bzl     |  7 +++++--
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 20e1aaaf6e..b71f5dc4e5 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -651,7 +651,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")),
   )
 
-
   tf_http_archive(
       name = "tflite_mobilenet",
       sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b",
@@ -662,6 +661,16 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
   )
 
+  tf_http_archive(
+      name = "tflite_smartreply",
+      sha256 = "8980151b85a87a9c1a3bb1ed4748119e4a85abd3cb5744d83da4d4bd0fbeef7c",
+      urls = [
+          "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip",
+          "https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip"
+      ],
+      build_file = str(Label("//third_party:tflite_smartreply.BUILD")),
+  )
+
   ##############################################################################
   # BIND DEFINITIONS
   #
@@ -737,12 +746,3 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       name = "zlib",
       actual = "@zlib_archive//:zlib",
   )
-
-  native.new_http_archive(
-      name = "tflite_smartreply",
-      build_file = str(Label("//third_party:tflite_smartreply.BUILD")),
-      sha256 = "8980151b85a87a9c1a3bb1ed4748119e4a85abd3cb5744d83da4d4bd0fbeef7c",
-      urls = [
-          "https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip"
-      ],
-  )
diff --git a/third_party/repo.bzl b/third_party/repo.bzl
index d6e5dfced0..c29fef9629 100644
--- a/third_party/repo.bzl
+++ b/third_party/repo.bzl
@@ -67,8 +67,11 @@ def _tf_http_archive(ctx):
   if ("mirror.bazel.build" not in ctx.attr.urls[0] or
       (len(ctx.attr.urls) < 2 and
        ctx.attr.name not in _SINGLE_URL_WHITELIST)):
-    fail("tf_http_archive(urls) must have redundant URLs. The Bazel Mirror " +
-         "URL must come first. Please note mirroring happens after merge")
+    fail("tf_http_archive(urls) must have redundant URLs. The " +
+         "mirror.bazel.build URL must be present and it must come first. " +
+         "Even if you don't have permission to mirror the file, please " +
+         "put the correctly formatted mirror URL there anyway, because " +
+         "someone will come along shortly thereafter and mirror the file.")
   ctx.download_and_extract(
       ctx.attr.urls,
       "",
-- 
GitLab


From 4d1d1a1f2fc76f72903363f59853e346a7aa52aa Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Mon, 11 Dec 2017 14:49:48 -0800
Subject: [PATCH 0886/1225] Fix for variable naming when executing eagerly

name_scope bypassed the Graph.name_scope slash-stripping logic (strip a trailing
slash if it exists, then add one back unconditionally) when executing eagerly,
leading to extra slashes at the end of some variable names and a failure to break
out of nested name scopes.

PiperOrigin-RevId: 178676873
---
 tensorflow/python/framework/ops.py      | 15 ++++++++----
 tensorflow/python/framework/ops_test.py | 31 +++++++++++++++++++++++++
 tensorflow/python/training/adam_test.py |  3 +++
 3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 343150024f..95b1cefcbe 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -5352,11 +5352,18 @@ class name_scope(object):  # pylint: disable=invalid-name
     """
     if self._in_eager_mode:
       self._old_name = self._ctx.scope_name
-      if self._name:
-        scope_name = (self._old_name + self._name + "/"
-                      if self._old_name else self._name + "/")
-      else:
+      if not self._name:
         scope_name = ""
+      else:
+        if self._name[-1] == "/":
+          # A trailing slash breaks out of nested name scopes, indicating a
+          # fully specified scope name, for compatibility with Graph.name_scope.
+          scope_name = self._name
+        else:
+          name_with_trailing_slash = self._name + "/"
+          scope_name = (
+              self._old_name + name_with_trailing_slash
+              if self._old_name else name_with_trailing_slash)
       self._ctx.scope_name = scope_name
       return scope_name
     else:
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index a69c0a1f82..7d279760c8 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -1761,6 +1761,37 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase):
 @test_util.with_c_api
 class OpScopeTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testNames(self):
+    with ops.name_scope("foo") as foo:
+      self.assertEqual("foo/", foo)
+      with ops.name_scope("foo2") as foo2:
+        self.assertEqual("foo/foo2/", foo2)
+      with ops.name_scope(None) as empty1:
+        self.assertEqual("", empty1)
+        with ops.name_scope("foo3") as foo3:
+          self.assertEqual("foo3/", foo3)
+      with ops.name_scope("") as empty2:
+        self.assertEqual("", empty2)
+    with ops.name_scope("foo/") as outer_foo:
+      self.assertEqual("foo/", outer_foo)
+      with ops.name_scope("") as empty3:
+        self.assertEqual("", empty3)
+      with ops.name_scope("foo4") as foo4:
+        self.assertEqual("foo/foo4/", foo4)
+      with ops.name_scope("foo5//") as foo5:
+        self.assertEqual("foo5//", foo5)
+        with ops.name_scope("foo6") as foo6:
+          self.assertEqual("foo5//foo6/", foo6)
+      with ops.name_scope("/") as foo7:
+        self.assertEqual("/", foo7)
+      with ops.name_scope("//") as foo8:
+        self.assertEqual("//", foo8)
+      with ops.name_scope("a//b/c") as foo9:
+        self.assertEqual("foo/a//b/c/", foo9)
+    with ops.name_scope("a//b/c") as foo10:
+      self.assertEqual("a//b/c/", foo10)
+
   @test_util.run_in_graph_and_eager_modes()
   def testEagerDefaultScopeName(self):
     with ops.name_scope(None, "default") as scope:
diff --git a/tensorflow/python/training/adam_test.py b/tensorflow/python/training/adam_test.py
index 0d534db60d..ffb66abc4c 100644
--- a/tensorflow/python/training/adam_test.py
+++ b/tensorflow/python/training/adam_test.py
@@ -207,6 +207,9 @@ class AdamOptimizerTest(test.TestCase):
           # Validate updated params
           self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
           self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+          if use_resource:
+            self.assertEqual("var0_%d/Adam:0" % (i,),
+                             opt.get_slot(var=var0, name="m").name)
 
   def testBasic(self):
     with self.test_session():
-- 
GitLab


From 5c0a9452fdbfa621f23af28ccc365de58e40f845 Mon Sep 17 00:00:00 2001
From: Nathan Luehr <nluehr@nvidia.com>
Date: Mon, 11 Dec 2017 15:06:35 -0800
Subject: [PATCH 0887/1225] Fix minor typo in CUDNN_VERSION check

Effectively enables CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED in
CudnnSupport::GetConvolveBackwardFilterAlgorithms() for cuDNN v5.1.
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index d78362d4fb..02012f6ead 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -2677,7 +2677,7 @@ bool CudnnSupport::GetConvolveBackwardFilterAlgorithms(
       // CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD,
       // clang-format on
   };
-#if CUDNN_VERSION >= 5110
+#if CUDNN_VERSION >= 5100
   if (CudnnEnvVar<WinogradNonfused>::IsEnabled() && with_winograd_nonfused) {
     algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED);
   }
-- 
GitLab


From 1b4c6096e5024119dbed898ecaad63e3afd58ef0 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 11 Dec 2017 15:39:00 -0800
Subject: [PATCH 0888/1225] [tf.data] Use a more efficient dispatch mechanism
 for functions in datasets.

This change adds an overload of the `FunctionLibraryRuntime::Run()` method
that allows users to pass argument and return value containers in a
`CallFrameInterface` object, rather than using the current (and expensive for
large arities) default `FunctionCallFrame` implementation. It also specializes
`CapturedFunction` to use this interface.

Note that the new overload currently only supports local function execution,
and more restructuring will be required to take advantage of it in the remote
function execution case.

This change should especially benefit datasets where each element has a large
number of components (typically when training data have many features).

PiperOrigin-RevId: 178684431
---
 tensorflow/core/common_runtime/function.cc    |  81 +++++-
 .../core/common_runtime/function_test.cc      |  73 +++++-
 tensorflow/core/framework/function.h          |   8 +
 tensorflow/core/kernels/captured_function.cc  | 236 ++++++++++++++----
 tensorflow/core/kernels/captured_function.h   |  39 ++-
 tensorflow/core/kernels/dataset_utils.cc      |   3 +-
 tensorflow/core/kernels/filter_dataset_op.cc  |   3 +-
 .../kernels/group_by_window_dataset_op.cc     |  12 +-
 .../core/kernels/map_and_batch_dataset_op.cc  |   2 +-
 tensorflow/core/kernels/map_dataset_op.cc     |   3 +-
 .../core/kernels/parallel_map_dataset_op.cc   |   2 +-
 tensorflow/core/kernels/scan_dataset_op.cc    |   4 +-
 12 files changed, 392 insertions(+), 74 deletions(-)

diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index b152529711..c51b172066 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -161,6 +161,12 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
 
   void Run(const Options& opts, Handle handle, gtl::ArraySlice<Tensor> args,
            std::vector<Tensor>* rets, DoneCallback done) override;
+  // NOTE(mrry): This overload is currently only implemented for local function
+  // execution.
+  // TODO(b/70346412): Implement support for remote function execution when
+  // passing a call frame.
+  void Run(const Options& opts, Handle handle, CallFrameInterface* frame,
+           DoneCallback done) override;
 
   bool IsStateful(const string& function) override;
 
@@ -689,12 +695,6 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
     return;
   }
 
-  Item* item = nullptr;
-  Status s = GetOrCreateItem(handle, &item);
-  if (!s.ok()) {
-    done(s);
-    return;
-  }
   DCHECK(run_opts.runner != nullptr);
 
   Executor::Args* exec_args = new Executor::Args;
@@ -706,7 +706,16 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
   exec_args->step_container = run_opts.step_container;
   exec_args->runner = *run_opts.runner;
 
+  Item* item = nullptr;
+  Status s = GetOrCreateItem(handle, &item);
+  if (!s.ok()) {
+    delete exec_args;
+    done(s);
+    return;
+  }
+
   if (run_opts.remote_execution) {
+    // NOTE(mrry): `RunRemote()` will set `exec_args->call_frame` for us.
     RunRemote(run_opts, handle, args, rets, exec_args, item, done);
     return;
   }
@@ -738,6 +747,66 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
       });
 }
 
+void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
+                                     CallFrameInterface* frame,
+                                     DoneCallback done) {
+  if (opts.cancellation_manager && opts.cancellation_manager->IsCancelled()) {
+    done(errors::Cancelled(""));
+    return;
+  }
+  if (!parent_->IsInstantiatedOnDevice(device_name_, handle) ||
+      opts.remote_execution) {
+    done(errors::Unimplemented("Remote calling with CallFrameInterface"));
+    return;
+  }
+
+  Options run_opts = opts;
+  if (opts.create_rendezvous) {
+    Rendezvous* rendezvous = new IntraProcessRendezvous(device_mgr_);
+    run_opts.rendezvous = rendezvous;
+    run_opts.create_rendezvous = false;
+    done = std::bind(
+        [rendezvous](DoneCallback done,
+                     // Begin unbound arguments.
+                     const Status& status) {
+          rendezvous->Unref();
+          done(status);
+        },
+        std::move(done), std::placeholders::_1);
+  }
+
+  Item* item = nullptr;
+  Status s = GetOrCreateItem(handle, &item);
+  if (!s.ok()) {
+    done(s);
+    return;
+  }
+  DCHECK(run_opts.runner != nullptr);
+
+  Executor::Args* exec_args = new Executor::Args;
+  // Inherit the step_id from the caller.
+  exec_args->step_id = run_opts.step_id;
+  exec_args->rendezvous = run_opts.rendezvous;
+  exec_args->stats_collector = run_opts.stats_collector;
+  exec_args->cancellation_manager = run_opts.cancellation_manager;
+  exec_args->step_container = run_opts.step_container;
+  exec_args->runner = *run_opts.runner;
+  exec_args->call_frame = frame;
+
+  item->exec->RunAsync(
+      // Executor args
+      *exec_args,
+      // Done callback.
+      std::bind(
+          [item, frame, exec_args](DoneCallback done,
+                                   // Start unbound arguments.
+                                   const Status& status) {
+            delete exec_args;
+            done(status);
+          },
+          std::move(done), std::placeholders::_1));
+}
+
 bool FunctionLibraryRuntimeImpl::IsStateful(const string& func) {
   const OpDef* op_def;
   const Status s = lib_def_->LookUpOpDef(func, &op_def);
diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index 575af566d5..52bfb9e0ed 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc
@@ -222,6 +222,70 @@ class FunctionLibraryRuntimeTest : public ::testing::Test {
     return status;
   }
 
+  Status Run(FunctionLibraryRuntime* flr, FunctionLibraryRuntime::Handle handle,
+             FunctionLibraryRuntime::Options opts, CallFrameInterface* frame) {
+    std::atomic<int32> call_count(0);
+    std::function<void(std::function<void()>)> runner =
+        [&call_count](std::function<void()> fn) {
+          ++call_count;
+          test::function::FunctionTestSchedClosure(fn);
+        };
+
+    Notification done;
+    opts.runner = &runner;
+    std::vector<Tensor> out;
+    Status status;
+    flr->Run(opts, handle, frame, [&status, &done](const Status& s) {
+      status = s;
+      done.Notify();
+    });
+    done.WaitForNotification();
+    if (!status.ok()) {
+      return status;
+    }
+
+    EXPECT_GE(call_count, 1);  // Test runner is used.
+
+    return Status::OK();
+  }
+
+  Status InstantiateAndRunViaCallFrameInterface(FunctionLibraryRuntime* flr,
+                                                const string& name,
+                                                test::function::Attrs attrs,
+                                                const std::vector<Tensor>& args,
+                                                std::vector<Tensor*> rets) {
+    FunctionLibraryRuntime::Handle handle;
+    Status status = flr->Instantiate(name, attrs, &handle);
+    if (!status.ok()) {
+      return status;
+    }
+    const FunctionBody* fbody = flr->GetFunctionBody(handle);
+    FunctionCallFrame frame(fbody->arg_types, fbody->ret_types);
+    TF_RETURN_IF_ERROR(frame.SetArgs(args));
+
+    FunctionLibraryRuntime::Options opts;
+    status = Run(flr, handle, opts, &frame);
+    if (!status.ok()) return status;
+
+    std::vector<Tensor> retvals;
+    TF_RETURN_IF_ERROR(frame.GetRetvals(&retvals));
+    CHECK_EQ(rets.size(), retvals.size());
+    for (size_t i = 0; i < rets.size(); ++i) {
+      *rets[i] = retvals[i];
+    }
+
+    // Release the handle and try running again. It should not succeed.
+    status = flr->ReleaseHandle(handle);
+    if (!status.ok()) return status;
+
+    Status status2 = Run(flr, handle, opts, args, std::move(rets));
+    EXPECT_TRUE(errors::IsInvalidArgument(status2));
+    EXPECT_TRUE(
+        StringPiece(status2.error_message()).contains("remote execution."));
+
+    return status;
+  }
+
   std::unique_ptr<Graph> GetFuncBody(FunctionLibraryRuntime* flr,
                                      const string& name,
                                      test::function::Attrs attrs) {
@@ -280,6 +344,9 @@ TEST_F(FunctionLibraryRuntimeTest, XTimesTwo) {
   TF_CHECK_OK(
       InstantiateAndRun(flr0_, "XTimesTwo", {{"T", DT_FLOAT}}, {x}, {&y}));
   test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
+  TF_CHECK_OK(InstantiateAndRunViaCallFrameInterface(
+      flr0_, "XTimesTwo", {{"T", DT_FLOAT}}, {x}, {&y}));
+  test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
 }
 
 TEST_F(FunctionLibraryRuntimeTest, XTimesN) {
@@ -510,7 +577,7 @@ TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) {
     Scope s = Scope::NewRootScope();
     auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0);
     auto x4_x2_scale = ops::Const<float>(
-        s.WithOpName("x4/x2/scale/_12__cf__3")
+        s.WithOpName("x4/x2/scale/_12__cf__4")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         2.0f);
     auto x4_x2_y = ops::Mul(s.WithOpName("x4/x2/y"), x, x4_x2_scale);
@@ -706,13 +773,13 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) {
     auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0);
     auto func0 = ops::_Arg(s.WithOpName("Func/_0"), DT_FLOAT, 1);
     auto scale = ops::Const(
-        s.WithOpName("scale/_5__cf__7")
+        s.WithOpName("scale/_5__cf__8")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         2.0f);
     auto func1_gx = ops::Mul(s.WithOpName("Func/_1/gx"), func0, scale);
     auto func1_sx = ops::Shape(s.WithOpName("Func/_1/sx"), x);
     auto const0 = ops::Const(
-        s.WithOpName("Func/_1/sy/_6__cf__8")
+        s.WithOpName("Func/_1/sy/_6__cf__9")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         0, {0});
     auto func1_rx = ops::internal::BroadcastGradientArgs(
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index 6c5cc1da98..1a579ab631 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -247,6 +247,9 @@ class CallFrameInterface {
  public:
   virtual ~CallFrameInterface() {}
 
+  virtual size_t num_args() const = 0;
+  virtual size_t num_retvals() const = 0;
+
   virtual Status GetArg(int index, Tensor* val) const = 0;
   virtual Status SetRetval(int index, const Tensor& val) = 0;
 };
@@ -267,6 +270,9 @@ class FunctionCallFrame : public CallFrameInterface {
   Status GetRetvals(std::vector<Tensor>* rets) const;
   Status ConsumeRetvals(std::vector<Tensor>* rets);
 
+  size_t num_args() const override { return arg_types_.size(); }
+  size_t num_retvals() const override { return ret_types_.size(); }
+
   // Callee methods.
   Status GetArg(int index, Tensor* val) const override;
   Status SetRetval(int index, const Tensor& val) override;
@@ -464,6 +470,8 @@ class FunctionLibraryRuntime {
   virtual void Run(const Options& opts, Handle handle,
                    gtl::ArraySlice<Tensor> args, std::vector<Tensor>* rets,
                    DoneCallback done) = 0;
+  virtual void Run(const Options& opts, Handle handle,
+                   CallFrameInterface* call_frame, DoneCallback done) = 0;
 
   // Creates a "kernel" for the given node def "ndef".
   //
diff --git a/tensorflow/core/kernels/captured_function.cc b/tensorflow/core/kernels/captured_function.cc
index 00cdc1eff2..5ef331e592 100644
--- a/tensorflow/core/kernels/captured_function.cc
+++ b/tensorflow/core/kernels/captured_function.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/resource_handle.pb_text.h"
 #include "tensorflow/core/kernels/dataset.h"
 #include "tensorflow/core/kernels/variable_ops.h"
+#include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/platform/notification.h"
 #include "tensorflow/core/public/session_options.h"
 
@@ -113,70 +114,220 @@ Status CapturedFunction::Create(
   FunctionLibraryRuntime::Handle f_handle;
   TF_RETURN_IF_ERROR(
       lib->Instantiate(func.name(), AttrSlice(&func.attr()), &f_handle));
+  const FunctionBody* fbody = lib->GetFunctionBody(f_handle);
+  if (fbody == nullptr) {
+    return errors::Internal("Failed to instantiate function body.");
+  }
 
   out_function->reset(new CapturedFunction(
       device, std::move(device_mgr), std::move(flib_def), std::move(pflr), lib,
-      f_handle, std::move(captured_inputs)));
+      f_handle, std::move(captured_inputs), fbody->ret_types));
   return Status::OK();
 }
 
+namespace {
+class CallFrameBase : public CallFrameInterface {
+ public:
+  explicit CallFrameBase(DataTypeSlice ret_types)
+      : ret_types_(ret_types), retvals_(ret_types.size()) {}
+
+  // Caller methods.
+  Status ConsumeRetvals(std::vector<Tensor>* retvals) {
+    retvals->reserve(retvals_.size());
+    int i = 0;
+    for (auto&& val : retvals_) {
+      if (!val) {
+        return errors::Internal("No return value for index ", i, ".");
+      }
+      retvals->emplace_back(std::move(val.value()));
+      ++i;
+    }
+    return Status::OK();
+  }
+
+  size_t num_retvals() const override { return retvals_.size(); }
+
+  // Callee methods.
+  Status SetRetval(int index, const Tensor& val) override {
+    if (index < retvals_.size() && val.dtype() == ret_types_[index] &&
+        !retvals_[index]) {
+      retvals_[index] = val;
+      return Status::OK();
+    } else if (index >= retvals_.size()) {
+      return errors::InvalidArgument("Return value ", index,
+                                     " is out of range.");
+    } else if (val.dtype() != ret_types_[index]) {
+      return errors::InvalidArgument("Expected type ",
+                                     DataTypeString(ret_types_[index]),
+                                     " for return value ", index, " but got ",
+                                     DataTypeString(val.dtype()), ".");
+    } else {
+      return errors::Internal("Attempted to set return value ", index,
+                              " more than once.");
+    }
+  }
+
+ private:
+  DataTypeSlice ret_types_;
+  std::vector<gtl::optional<Tensor>> retvals_;
+  TF_DISALLOW_COPY_AND_ASSIGN(CallFrameBase);
+};
+
+class OwnedArgsCallFrame : public CallFrameBase {
+ public:
+  OwnedArgsCallFrame(std::vector<Tensor>&& args,
+                     const std::vector<Tensor>* captured_inputs,
+                     DataTypeSlice ret_types)
+      : CallFrameBase(ret_types),
+        args_(std::move(args)),
+        captured_inputs_(captured_inputs) {}
+
+  size_t num_args() const override {
+    return args_.size() + captured_inputs_->size();
+  }
+
+  // Callee methods.
+  Status GetArg(int index, Tensor* val) const override {
+    if (index < args_.size() && args_[index].IsInitialized()) {
+      // TODO(mrry): Consider making `CallFrameInterface::GetArg` non-const in
+      // order to be able to `std::move(args_[index])` into `*val`.
+      *val = args_[index];
+      return Status::OK();
+    } else if (index < args_.size() + captured_inputs_->size()) {
+      *val = (*captured_inputs_)[index - args_.size()];
+      return Status::OK();
+    } else if (index >= args_.size() + captured_inputs_->size()) {
+      return errors::InvalidArgument("Argument ", index, " is out of range.");
+    } else {
+      return errors::Internal("Attempted to get argument ", index,
+                              " more than once.");
+    }
+  }
+
+ private:
+  std::vector<Tensor> args_;
+  const std::vector<Tensor>* const captured_inputs_;  // Not owned.
+};
+
+class BorrowedArgsCallFrame : public CallFrameBase {
+ public:
+  BorrowedArgsCallFrame(const std::vector<Tensor>& args,
+                        const std::vector<Tensor>* captured_inputs,
+                        DataTypeSlice ret_types)
+      : CallFrameBase(ret_types),
+        args_(args),
+        captured_inputs_(captured_inputs) {}
+
+  size_t num_args() const override {
+    return args_.size() + captured_inputs_->size();
+  }
+
+  // Callee methods.
+  Status GetArg(int index, Tensor* val) const override {
+    if (index < args_.size() && args_[index].IsInitialized()) {
+      *val = args_[index];
+      return Status::OK();
+    } else if (index < args_.size() + captured_inputs_->size()) {
+      *val = (*captured_inputs_)[index - args_.size()];
+      return Status::OK();
+    } else if (index >= args_.size() + captured_inputs_->size()) {
+      return errors::InvalidArgument("Argument ", index, " is out of range.");
+    } else {
+      return errors::Internal("Attempted to get argument ", index,
+                              " more than once.");
+    }
+  }
+
+ private:
+  const std::vector<Tensor>& args_;                   // Not owned.
+  const std::vector<Tensor>* const captured_inputs_;  // Not owned.
+};
+
+}  // namespace
+
 Status CapturedFunction::Run(FunctionLibraryRuntime::Options f_opts,
-                             gtl::ArraySlice<Tensor> args,
+                             std::vector<Tensor>&& args,
                              std::vector<Tensor>* rets) {
+  // TODO(mrry): Add cancellation manager support to IteratorContext
+  // so that we can cancel running map functions. The local
+  // cancellation manager here is created so that we can run kernels
+  // (such as queue kernels) that depend on the non-nullness of
+  // `OpKernelContext::cancellation_manager()`, but additional effort
+  // will be required to plumb it through the `IteratorContext`.
+  auto c_mgr = new CancellationManager;
+  auto frame =
+      new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_);
+  f_opts.cancellation_manager = c_mgr;
   Notification n;
   Status s;
-  auto done_callback = [&n, &s](Status func_status) {
-    s.Update(func_status);
-    n.Notify();
-  };
+  lib_->Run(f_opts, f_handle_, frame,
+            [rets, c_mgr, frame, &n, &s](Status func_status) {
+              delete c_mgr;
+              s.Update(func_status);
+              if (s.ok()) {
+                s = frame->ConsumeRetvals(rets);
+              }
+              delete frame;
+              n.Notify();
+            });
+  n.WaitForNotification();
+  return s;
+}
+
+Status CapturedFunction::RunWithBorrowedArgs(
+    FunctionLibraryRuntime::Options f_opts, const std::vector<Tensor>& args,
+    std::vector<Tensor>* rets) {
   // TODO(mrry): Add cancellation manager support to IteratorContext
   // so that we can cancel running map functions. The local
   // cancellation manager here is created so that we can run kernels
-  // (such as queue kernels) that depend on the non-nullness
+  // (such as queue kernels) that depend on the non-nullness of
   // `OpKernelContext::cancellation_manager()`, but additional effort
   // will be required to plumb it through the `IteratorContext`.
-  CancellationManager c_mgr;
-  f_opts.cancellation_manager = &c_mgr;
-  RunHelper(std::move(f_opts), args, rets, std::move(done_callback));
+  auto c_mgr = new CancellationManager;
+  BorrowedArgsCallFrame frame(args, &captured_inputs_, ret_types_);
+  f_opts.cancellation_manager = c_mgr;
+  Notification n;
+  Status s;
+  lib_->Run(f_opts, f_handle_, &frame,
+            [rets, c_mgr, &frame, &n, &s](Status func_status) {
+              delete c_mgr;
+              s.Update(func_status);
+              if (s.ok()) {
+                s = frame.ConsumeRetvals(rets);
+              }
+              n.Notify();
+            });
   n.WaitForNotification();
   return s;
 }
 
 void CapturedFunction::RunAsync(FunctionLibraryRuntime::Options f_opts,
-                                gtl::ArraySlice<Tensor> args,
+                                std::vector<Tensor>&& args,
                                 std::vector<Tensor>* rets,
                                 FunctionLibraryRuntime::DoneCallback done) {
+  // TODO(mrry): Add cancellation manager support to IteratorContext
+  // so that we can cancel running map functions. The local
+  // cancellation manager here is created so that we can run kernels
+  // (such as queue kernels) that depend on the non-nullness of
+  // `OpKernelContext::cancellation_manager()`, but additional effort
+  // will be required to plumb it through the `IteratorContext`.
   auto c_mgr = new CancellationManager;
+  auto frame =
+      new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_);
   f_opts.cancellation_manager = c_mgr;
-  FunctionLibraryRuntime::DoneCallback wrapped_done = std::bind(
-      [c_mgr](FunctionLibraryRuntime::DoneCallback done,
-              // Begin unbound arguments.
-              Status s) {
-        delete c_mgr;
-        done(s);
-      },
-      std::move(done), std::placeholders::_1);
-  RunHelper(std::move(f_opts), args, rets, std::move(wrapped_done));
-}
-
-void CapturedFunction::RunHelper(FunctionLibraryRuntime::Options f_opts,
-                                 gtl::ArraySlice<Tensor> args,
-                                 std::vector<Tensor>* rets,
-                                 FunctionLibraryRuntime::DoneCallback done) {
-  // TODO(mrry): Implement a synchronous version of
-  // FunctionLibraryRuntime::Run() that avoids a context switch for small
-  // functions.
-  if (captured_inputs_.empty()) {
-    lib_->Run(f_opts, f_handle_, args, rets, std::move(done));
-  } else {
-    std::vector<Tensor> args_with_captured;
-    args_with_captured.reserve(args.size() + captured_inputs_.size());
-    args_with_captured.insert(args_with_captured.end(), args.begin(),
-                              args.end());
-    args_with_captured.insert(args_with_captured.end(),
-                              captured_inputs_.begin(), captured_inputs_.end());
-    lib_->Run(f_opts, f_handle_, args_with_captured, rets, std::move(done));
-  }
+  lib_->Run(f_opts, f_handle_, frame,
+            std::bind(
+                [rets, c_mgr, frame](FunctionLibraryRuntime::DoneCallback done,
+                                     // Begin unbound arguments.
+                                     Status s) {
+                  delete c_mgr;
+                  if (s.ok()) {
+                    s = frame->ConsumeRetvals(rets);
+                  }
+                  delete frame;
+                  done(s);
+                },
+                std::move(done), std::placeholders::_1));
 }
 
 CapturedFunction::CapturedFunction(
@@ -184,13 +335,14 @@ CapturedFunction::CapturedFunction(
     std::unique_ptr<FunctionLibraryDefinition> flib_def,
     std::unique_ptr<ProcessFunctionLibraryRuntime> pflr,
     FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle,
-    std::vector<Tensor> captured_inputs)
+    std::vector<Tensor> captured_inputs, DataTypeSlice ret_types)
     : device_(device),
       device_mgr_(std::move(device_mgr)),
       flib_def_(std::move(flib_def)),
       pflr_(std::move(pflr)),
       lib_(lib),
       f_handle_(f_handle),
-      captured_inputs_(std::move(captured_inputs)) {}
+      captured_inputs_(std::move(captured_inputs)),
+      ret_types_(ret_types) {}
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/captured_function.h b/tensorflow/core/kernels/captured_function.h
index 9430127600..c10472dde0 100644
--- a/tensorflow/core/kernels/captured_function.h
+++ b/tensorflow/core/kernels/captured_function.h
@@ -60,19 +60,38 @@ class CapturedFunction {
                        std::vector<Tensor> captured_inputs,
                        std::unique_ptr<CapturedFunction>* out_function);
 
-  Status Run(FunctionLibraryRuntime::Options f_opts,
-             gtl::ArraySlice<Tensor> args, std::vector<Tensor>* rets);
-
+  // Synchronously runs the captured function on the given `args`, and stores
+  // the results in `*rets`. This method takes ownership of the tensors in
+  // `args`, in order to be able to deallocate them as early as possible.
+  // Use `RunWithBorrowedArgs()` if the caller needs to retain ownership of
+  // the `args`.
+  Status Run(FunctionLibraryRuntime::Options f_opts, std::vector<Tensor>&& args,
+             std::vector<Tensor>* rets);
+
+  // Synchronously runs the captured function on the given `args`, and stores
+  // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when
+  // possible.
+  Status RunWithBorrowedArgs(FunctionLibraryRuntime::Options f_opts,
+                             const std::vector<Tensor>& args,
+                             std::vector<Tensor>* rets);
+
+  // Asynchronously runs the captured function on the given `args`, stores
+  // the results in `*rets`, and calls the given `done` callback when the
+  // function returns. This method takes ownership of the tensors in `args`,
+  // in order to be able to deallocate them as early as possible.
   void RunAsync(FunctionLibraryRuntime::Options f_opts,
-                gtl::ArraySlice<Tensor> args, std::vector<Tensor>* rets,
+                std::vector<Tensor>&& args, std::vector<Tensor>* rets,
                 FunctionLibraryRuntime::DoneCallback done);
 
-  const Device* device() const { return device_; }
-
+  // Returns a borrowed pointer to the `ResourceManager` used when this
+  // function is run.
   ResourceMgr* resource_manager() const { return device_->resource_manager(); }
 
+  // Returns that additional captured inputs that will be passed to the function
+  // when `Run*()` is called.
   const std::vector<Tensor>& captured_inputs() { return captured_inputs_; }
 
+  // Returns a step ID for use when running a `CapturedFunction`.
   static int64 generate_step_id() {
     // Choose a step ID that is guaranteed not to clash with any
     // Session-generated step ID. DirectSession only generates
@@ -88,11 +107,8 @@ class CapturedFunction {
                    std::unique_ptr<ProcessFunctionLibraryRuntime> pflr,
                    FunctionLibraryRuntime* lib,
                    FunctionLibraryRuntime::Handle f_handle,
-                   std::vector<Tensor> captured_inputs);
-
-  void RunHelper(FunctionLibraryRuntime::Options f_opts,
-                 gtl::ArraySlice<Tensor> args, std::vector<Tensor>* rets,
-                 FunctionLibraryRuntime::DoneCallback done);
+                   std::vector<Tensor> captured_inputs,
+                   DataTypeSlice ret_types);
 
   Device* const device_;  // owned by device_mgr_.
   const std::unique_ptr<DeviceMgr> device_mgr_;
@@ -101,6 +117,7 @@ class CapturedFunction {
   FunctionLibraryRuntime* const lib_;  // owned by pflr_.
   const FunctionLibraryRuntime::Handle f_handle_;
   const std::vector<Tensor> captured_inputs_;
+  DataTypeSlice ret_types_;  // owned by pflr_.
 
   TF_DISALLOW_COPY_AND_ASSIGN(CapturedFunction);
 };
diff --git a/tensorflow/core/kernels/dataset_utils.cc b/tensorflow/core/kernels/dataset_utils.cc
index bd20e20cad..3ce64504d0 100644
--- a/tensorflow/core/kernels/dataset_utils.cc
+++ b/tensorflow/core/kernels/dataset_utils.cc
@@ -37,7 +37,8 @@ Status MakeIteratorFromInputElement(
       });
   opts.step_container = &step_container;
   std::vector<Tensor> return_values;
-  TF_RETURN_IF_ERROR(captured_func->Run(opts, input_element, &return_values));
+  TF_RETURN_IF_ERROR(
+      captured_func->RunWithBorrowedArgs(opts, input_element, &return_values));
 
   if (!(return_values.size() == 1 && return_values[0].dtype() == DT_VARIANT &&
         TensorShapeUtils::IsScalar(return_values[0].shape()))) {
diff --git a/tensorflow/core/kernels/filter_dataset_op.cc b/tensorflow/core/kernels/filter_dataset_op.cc
index 67417d467d..04427d296c 100644
--- a/tensorflow/core/kernels/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/filter_dataset_op.cc
@@ -162,7 +162,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
           Notification n;
           Status ret;
           std::vector<Tensor> result;
-          ret = dataset()->captured_func_->Run(opts, *out_tensors, &result);
+          ret = dataset()->captured_func_->RunWithBorrowedArgs(
+              opts, *out_tensors, &result);
 
           if (!ret.ok()) {
             return ret;
diff --git a/tensorflow/core/kernels/group_by_window_dataset_op.cc b/tensorflow/core/kernels/group_by_window_dataset_op.cc
index 604555a560..c70a92d0d6 100644
--- a/tensorflow/core/kernels/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/group_by_window_dataset_op.cc
@@ -180,8 +180,9 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
               // Run the key function on the input element to identify its
               // group.
               std::vector<Tensor> key_func_output;
-              TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Run(
-                  opts, next_input_element, &key_func_output));
+              TF_RETURN_IF_ERROR(
+                  dataset()->captured_key_func_->RunWithBorrowedArgs(
+                      opts, next_input_element, &key_func_output));
 
               if (key_func_output.size() != 1 ||
                   key_func_output[0].dtype() != DT_INT64 ||
@@ -210,7 +211,8 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
                 // window size.
                 std::vector<Tensor> window_size_func_output;
                 TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Run(
-                    opts2, key_func_output, &window_size_func_output));
+                    opts2, std::move(key_func_output),
+                    &window_size_func_output));
 
                 if (window_size_func_output.size() != 1 ||
                     window_size_func_output[0].dtype() != DT_INT64 ||
@@ -282,8 +284,8 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
             {std::move(key_arg), std::move(group_dataset_arg)});
         std::vector<Tensor> return_values;
 
-        TF_RETURN_IF_ERROR(
-            dataset()->captured_reduce_func_->Run(opts, args, &return_values));
+        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run(
+            opts, std::move(args), &return_values));
 
         if (!(return_values.size() == 1 &&
               return_values[0].dtype() == DT_VARIANT &&
diff --git a/tensorflow/core/kernels/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/map_and_batch_dataset_op.cc
index d17b4237d3..65a3ee2a64 100644
--- a/tensorflow/core/kernels/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/map_and_batch_dataset_op.cc
@@ -253,7 +253,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         (*ctx->runner())(std::bind(
             [=](std::vector<Tensor> input_element) {
               dataset()->captured_func_->RunAsync(
-                  opts, input_element, &result->return_values,
+                  opts, std::move(input_element), &result->return_values,
                   [this, step_container, runner, result, batch_result,
                    offset](Status ret_status) {
                     delete step_container;
diff --git a/tensorflow/core/kernels/map_dataset_op.cc b/tensorflow/core/kernels/map_dataset_op.cc
index 29899a987e..f1b44beb7d 100644
--- a/tensorflow/core/kernels/map_dataset_op.cc
+++ b/tensorflow/core/kernels/map_dataset_op.cc
@@ -156,7 +156,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
         opts.runner = ctx->runner();
         // TODO(mrry): Avoid blocking a threadpool thread. We will need to
         // stack-rip the iterators and use async kernels.
-        Status s = dataset()->captured_func_->Run(opts, args, out_tensors);
+        Status s =
+            dataset()->captured_func_->Run(opts, std::move(args), out_tensors);
         if (errors::IsOutOfRange(s)) {
           // `f` may deliberately raise `errors::OutOfRange` to indicate
           // that we should terminate the iteration early.
diff --git a/tensorflow/core/kernels/parallel_map_dataset_op.cc b/tensorflow/core/kernels/parallel_map_dataset_op.cc
index b9175fe904..5ba1ad222d 100644
--- a/tensorflow/core/kernels/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/parallel_map_dataset_op.cc
@@ -205,7 +205,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
           opts.step_container = step_container;
           opts.runner = ctx->runner();
           dataset()->captured_func_->RunAsync(
-              opts, input_element, &result->return_values,
+              opts, std::move(input_element), &result->return_values,
               [result, step_container, result_index](Status ret_status) {
                 delete step_container;
                 result->status.Update(ret_status);
diff --git a/tensorflow/core/kernels/scan_dataset_op.cc b/tensorflow/core/kernels/scan_dataset_op.cc
index bc52322022..d0ba210a0c 100644
--- a/tensorflow/core/kernels/scan_dataset_op.cc
+++ b/tensorflow/core/kernels/scan_dataset_op.cc
@@ -143,8 +143,8 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
         std::vector<Tensor> state_and_output;
         state_and_output.reserve(dataset()->state_types_.size() +
                                  output_dtypes().size());
-        Status s =
-            dataset()->captured_func_->Run(opts, args, &state_and_output);
+        Status s = dataset()->captured_func_->Run(opts, std::move(args),
+                                                  &state_and_output);
         if (s.ok()) {
           state_.clear();
           size_t i = 0;
-- 
GitLab


From ab0866461e03480f5c5e8ae40b280d07b92639ae Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 11 Dec 2017 15:57:19 -0800
Subject: [PATCH 0889/1225] [XLA:CPU] Minor refactor to the CPU layout
 assignment code

Lifts unnecessary lambdas to reduce clutter.  This will make a later change
more readable.

PiperOrigin-RevId: 178686976
---
 .../xla/service/cpu/cpu_layout_assignment.cc  | 118 ++++++++++--------
 1 file changed, 63 insertions(+), 55 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
index 78732c31f9..0df10f4af3 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
@@ -25,56 +25,64 @@ limitations under the License.
 namespace xla {
 namespace cpu {
 
-Status CpuLayoutAssignment::AddBackendConstraints(
-    LayoutConstraints* constraints) {
-  auto row_major_shape = [](const Shape& old_shape) {
-    Shape new_shape(old_shape);
-    std::vector<int64> dimension_order(new_shape.dimensions_size());
-    std::iota(dimension_order.rbegin(), dimension_order.rend(), 0);
-    *new_shape.mutable_layout() = LayoutUtil::MakeLayout(dimension_order);
-    return new_shape;
-  };
-  auto col_major_shape = [](const Shape& old_shape) {
-    Shape new_shape(old_shape);
-    std::vector<int64> dimension_order(new_shape.dimensions_size());
-    std::iota(dimension_order.begin(), dimension_order.end(), 0);
-    *new_shape.mutable_layout() = LayoutUtil::MakeLayout(dimension_order);
-    return new_shape;
-  };
-
-  // We want to change the layout of constant arrays to be column major when all
-  // of their users are dot operations that can be made faster with the flipped
-  // layout.  To avoid going quadriatic over the # of instructions, we cache
-  // this property in should_make_rhs_col_major -- it maps a constant to true if
-  // all of the users of said constant are dot operations that can be sped up.
-  // This cache is populated lazily as we encounter dot operations traversing
-  // the instruction stream.
-  tensorflow::gtl::FlatMap<const HloInstruction*, bool>
-      should_make_rhs_col_major_cache;
-  auto should_make_rhs_col_major = [&](const HloInstruction& instruction) {
-    if (!ProfitableToMakeDotRhsColumnMajor(instruction)) {
-      return false;
-    }
+// We want to change the layout of constant arrays to be column major when all
+// of their users are dot operations that can be made faster with the flipped
+// layout.  To avoid going quadriatic over the # of instructions, we cache this
+// property in should_make_rhs_col_major -- it maps a constant to true if all of
+// the users of said constant are dot operations that can be sped up.  This
+// cache is populated lazily as we encounter dot operations traversing the
+// instruction stream.
+
+namespace {
+using ShouldMakeRhsColMajorCache =
+    tensorflow::gtl::FlatMap<const HloInstruction*, bool>;
+}
 
-    const auto* rhs = instruction.operand(1);
-    if (rhs->opcode() != HloOpcode::kConstant) {
-      return false;
-    }
+static bool ShouldMakeRhsColMajor(ShouldMakeRhsColMajorCache* cache,
+                                  const HloInstruction& instruction) {
+  if (!ProfitableToMakeDotRhsColumnMajor(instruction)) {
+    return false;
+  }
 
-    auto it = should_make_rhs_col_major_cache.find(rhs);
-    if (it != should_make_rhs_col_major_cache.end()) {
-      return it->second;
-    }
+  const auto* rhs = instruction.operand(1);
+  if (rhs->opcode() != HloOpcode::kConstant) {
+    return false;
+  }
+
+  auto it = cache->find(rhs);
+  if (it != cache->end()) {
+    return it->second;
+  }
+
+  bool result = std::all_of(rhs->users().begin(), rhs->users().end(),
+                            [&](HloInstruction* user) {
+                              return ProfitableToMakeDotRhsColumnMajor(*user) &&
+                                     user->operand(0) != rhs;
+                            });
 
-    bool result = std::all_of(
-        rhs->users().begin(), rhs->users().end(), [&](HloInstruction* user) {
-          return ProfitableToMakeDotRhsColumnMajor(*user) &&
-                 user->operand(0) != rhs;
-        });
+  InsertOrDie(cache, rhs, result);
+  return result;
+}
+
+static Shape RowMajorShape(const Shape& old_shape) {
+  Shape new_shape(old_shape);
+  std::vector<int64> dimension_order(new_shape.dimensions_size());
+  std::iota(dimension_order.rbegin(), dimension_order.rend(), 0);
+  *new_shape.mutable_layout() = LayoutUtil::MakeLayout(dimension_order);
+  return new_shape;
+}
 
-    InsertOrDie(&should_make_rhs_col_major_cache, rhs, result);
-    return result;
-  };
+static Shape ColMajorShape(const Shape& old_shape) {
+  Shape new_shape(old_shape);
+  std::vector<int64> dimension_order(new_shape.dimensions_size());
+  std::iota(dimension_order.begin(), dimension_order.end(), 0);
+  *new_shape.mutable_layout() = LayoutUtil::MakeLayout(dimension_order);
+  return new_shape;
+}
+
+Status CpuLayoutAssignment::AddBackendConstraints(
+    LayoutConstraints* constraints) {
+  ShouldMakeRhsColMajorCache cache;
 
   const HloComputation* computation = constraints->computation();
   for (auto* instruction : computation->instructions()) {
@@ -89,9 +97,9 @@ Status CpuLayoutAssignment::AddBackendConstraints(
       //
       // These constraints are not hard constraints. Ideally, we should decide
       // which layouts to choose according to some cost model.
-      Shape output_shape(row_major_shape(convolution->shape()));
-      Shape input_shape(row_major_shape(lhs_instruction->shape()));
-      Shape filter_shape(row_major_shape(rhs_instruction->shape()));
+      Shape output_shape(RowMajorShape(convolution->shape()));
+      Shape input_shape(RowMajorShape(lhs_instruction->shape()));
+      Shape filter_shape(RowMajorShape(rhs_instruction->shape()));
 
       // Set layouts of the instructions' shapes.
       TF_RETURN_IF_ERROR(
@@ -100,11 +108,11 @@ Status CpuLayoutAssignment::AddBackendConstraints(
           constraints->SetOperandLayout(filter_shape, convolution, 1));
       TF_RETURN_IF_ERROR(
           constraints->SetInstructionLayout(output_shape, convolution));
-    } else if (should_make_rhs_col_major(*instruction)) {
+    } else if (ShouldMakeRhsColMajor(&cache, *instruction)) {
       auto* dot = instruction;
       const auto& rhs_shape = dot->operand(1)->shape();
       TF_RETURN_IF_ERROR(
-          constraints->SetOperandLayout(col_major_shape(rhs_shape), dot, 1));
+          constraints->SetOperandLayout(ColMajorShape(rhs_shape), dot, 1));
     } else if (PotentiallyImplementedAsEigenDot(*instruction)) {
       const HloInstruction* dot = instruction;
       // In order to implement `dot` with Eigen dot, the layouts of the lhs,
@@ -112,17 +120,17 @@ Status CpuLayoutAssignment::AddBackendConstraints(
       //
       // These constraints are not hard constraints. Ideally, we should decide
       // which layouts to choose according to some cost model.
-      Shape output_shape(row_major_shape(dot->shape()));
+      Shape output_shape(RowMajorShape(dot->shape()));
 
       const HloInstruction* lhs_instruction = dot->operand(0);
-      Shape lhs_shape(row_major_shape(lhs_instruction->shape()));
+      Shape lhs_shape(RowMajorShape(lhs_instruction->shape()));
       TF_RETURN_IF_ERROR(constraints->SetOperandLayout(lhs_shape, dot, 0));
 
       // dot is a kDot or a kTransposeDot fusion node.  In the latter case, if
       // it represents X @ X, it may have just one operand.
       if (dot->operand_count() > 1) {
         const HloInstruction* rhs_instruction = dot->operand(1);
-        Shape rhs_shape(row_major_shape(rhs_instruction->shape()));
+        Shape rhs_shape(RowMajorShape(rhs_instruction->shape()));
         TF_RETURN_IF_ERROR(constraints->SetOperandLayout(rhs_shape, dot, 1));
       }
 
@@ -144,7 +152,7 @@ Status CpuLayoutAssignment::AddBackendConstraints(
           continue;
         }
         Shape operand_shape(
-            row_major_shape(instruction->operand(operand_no)->shape()));
+            RowMajorShape(instruction->operand(operand_no)->shape()));
         TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
             operand_shape, instruction, operand_no));
       }
-- 
GitLab


From da9ef31bfe82fad71721b3213891a491628736a0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 16:01:17 -0800
Subject: [PATCH 0890/1225] Fix the remote call graph construction so that the
 created _Send ops for returning the results point to the correct return
 value.

List available workers when the remote call target is not available.

PiperOrigin-RevId: 178687525
---
 .../cluster_function_library_runtime.cc       |   8 +-
 .../cluster_function_library_runtime_test.cc  | 120 +++++++++++++++---
 2 files changed, 109 insertions(+), 19 deletions(-)

diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc
index 593fe0e363..d84b69d06b 100644
--- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc
+++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc
@@ -105,6 +105,7 @@ Status ClusterFunctionLibraryRuntime::ConstructFunctionGraph(
         Rendezvous::CreateKey(target, 1 /* src_incarnation */, target,
                               out.name(), FrameAndIter(0, 0));
     recv_keys->push_back(key);
+    ++i;
   }
   return Status::OK();
 }
@@ -124,8 +125,11 @@ Status ClusterFunctionLibraryRuntime::Instantiate(
   WorkerInterface* wi = worker_session_->worker_cache->CreateWorker(target);
 
   if (wi == nullptr) {
-    return errors::InvalidArgument("Could not find worker with target: ",
-                                   target);
+    std::vector<string> workers;
+    worker_session_->worker_cache->ListWorkers(&workers);
+    return errors::InvalidArgument(
+        "Could not find worker with target: ", target,
+        " Available workers: ", str_util::Join(workers, ", "));
   }
 
   // Make RPC and obtain a graph handle.
diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc
index 04587dd8ca..6dd8b9ec73 100644
--- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc
+++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc
@@ -103,14 +103,54 @@ TEST_F(ClusterFunctionLibraryRuntimeTest, ConstructFunctionGraph) {
   GraphDef actual;
   std::vector<string> send_keys, recv_keys;
   TF_CHECK_OK(ConstructFunctionGraphHelper(
-      test::function::XTimesTwo().signature(),
+      test::function::Swap().signature(),
       {{"T", DT_FLOAT}, {"_target", "/job:a/replica:0/task:0/cpu:0"}}, &actual,
       &send_keys, &recv_keys));
-
   GraphDef expected;
   protobuf::TextFormat::ParseFromString(R"(
 node {
-  name: "_recv_x_0"
+  name: "_recv_i0_0"
+  op: "_Recv"
+  device: "/job:a/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "client_terminated"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "recv_device"
+    value {
+      s: "/job:a/replica:0/task:0/device:CPU:0"
+    }
+  }
+  attr {
+    key: "send_device"
+    value {
+      s: "/job:a/replica:0/task:0/device:CPU:0"
+    }
+  }
+  attr {
+    key: "send_device_incarnation"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "tensor_name"
+    value {
+      s: "i0"
+    }
+  }
+  attr {
+    key: "tensor_type"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "_recv_i1_1"
   op: "_Recv"
   device: "/job:a/replica:0/task:0/device:CPU:0"
   attr {
@@ -140,7 +180,7 @@ node {
   attr {
     key: "tensor_name"
     value {
-      s: "x"
+      s: "i1"
     }
   }
   attr {
@@ -151,9 +191,10 @@ node {
   }
 }
 node {
-  name: "XTimesTwo"
-  op: "XTimesTwo"
-  input: "_recv_x_0"
+  name: "Swap"
+  op: "Swap"
+  input: "_recv_i0_0"
+  input: "_recv_i1_1"
   device: "/job:a/replica:0/task:0/device:CPU:0"
   attr {
     key: "T"
@@ -163,15 +204,57 @@ node {
   }
   attr {
     key: "_target"
+    value {
+      s: "/job:a/replica:0/task:0/cpu:0"
+    }
+  }
+}
+node {
+  name: "_send_o0_0"
+  op: "_Send"
+  input: "Swap"
+  device: "/job:a/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "client_terminated"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "recv_device"
+    value {
+      s: "/job:a/replica:0/task:0/device:CPU:0"
+    }
+  }
+  attr {
+    key: "send_device"
     value {
       s: "/job:a/replica:0/task:0/device:CPU:0"
     }
   }
+  attr {
+    key: "send_device_incarnation"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "tensor_name"
+    value {
+      s: "o0"
+    }
+  }
 }
 node {
-  name: "_send_y_0"
+  name: "_send_o1_1"
   op: "_Send"
-  input: "XTimesTwo"
+  input: "Swap:1"
   device: "/job:a/replica:0/task:0/device:CPU:0"
   attr {
     key: "T"
@@ -206,10 +289,11 @@ node {
   attr {
     key: "tensor_name"
     value {
-      s: "y"
+      s: "o1"
     }
   }
-})",
+}
+)",
                                         &expected);
   TF_EXPECT_GRAPH_EQ(expected, actual);
 }
@@ -234,16 +318,18 @@ TEST_F(ClusterFunctionLibraryRuntimeTest, DISABLED_InstantiateAndRun) {
 TEST_F(ClusterFunctionLibraryRuntimeTest,
        DISABLED_InstantiateAndRunAttrSubstitution) {
   FunctionDefLibrary proto;
-  *(proto.add_function()) = test::function::XTimesTwo();
+  *(proto.add_function()) = test::function::Swap();
   FunctionLibraryDefinition lib_def(OpRegistry::Global(), proto);
 
-  Tensor y;
-  auto x = test::AsTensor<float>({1, 2, 3, 4});
+  Tensor y1, y2;
+  auto x1 = test::AsTensor<float>({1, 2, 3, 4});
+  auto x2 = test::AsTensor<float>({4, 3, 2, 1});
   TF_EXPECT_OK(InstantiateAndRun(
-      "XTimesTwo", lib_def,
+      "Swap", lib_def,
       {{"T", DT_FLOAT}, {"_target", "/job:localhost/replica:0/task:1/cpu:0"}},
-      {x}, {&y}));
-  test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
+      {x1, x2}, {&y1, &y2}));
+  test::ExpectTensorEqual<float>(y1, test::AsTensor<float>({4, 3, 2, 1}));
+  test::ExpectTensorEqual<float>(y2, test::AsTensor<float>({1, 2, 3, 4}));
 }
 
 }  // namespace tensorflow
-- 
GitLab


From 634515e14e8bf5aa4bdfe149b77c9aa53383891e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 16:11:26 -0800
Subject: [PATCH 0891/1225] Strength reduce division by a constant to
 multiplication by the reciprocal constant.

PiperOrigin-RevId: 178689056
---
 tensorflow/core/grappler/op_types.cc          |  18 +--
 tensorflow/core/grappler/op_types.h           |   3 +-
 .../grappler/optimizers/constant_folding.cc   | 122 ++++++++++++------
 .../optimizers/constant_folding_test.cc       |  68 ++++++++++
 4 files changed, 163 insertions(+), 48 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index e1935fa9b3..ac94c3f81e 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -31,6 +31,11 @@ bool IsAdd(const NodeDef& node) {
 
 bool IsAddN(const NodeDef& node) { return node.op() == "AddN"; }
 
+bool IsAnyDiv(const NodeDef& node) {
+  return node.op() == "RealDiv" || node.op() == "Div" ||
+         node.op() == "FloorDiv" || node.op() == "TruncateDiv";
+}
+
 bool IsAvgPoolGrad(const NodeDef& node) { return node.op() == "AvgPoolGrad"; }
 
 bool IsAssert(const NodeDef& node) { return node.op() == "Assert"; }
@@ -74,6 +79,8 @@ bool IsDequeueOp(const NodeDef& node) {
          op == "QueueDequeueUpToV2" || op == "QueueDequeueUpTo";
 }
 
+bool IsDiv(const NodeDef& node) { return node.op() == "Div"; }
+
 bool IsEnter(const NodeDef& node) {
   const auto& op = node.op();
   return op == "Enter" || op == "RefEnter";
@@ -96,13 +103,13 @@ bool IsIdentity(const NodeDef& node) {
 }
 
 bool IsMatMul(const NodeDef& node) {
-  const auto op = node.op();
+  const auto& op = node.op();
   return op == "MatMul" || op == "BatchMatMul" || op == "QuantizedMatMul" ||
          op == "SparseMatMul";
 }
 
 bool IsMerge(const NodeDef& node) {
-  const auto op = node.op();
+  const auto& op = node.op();
   return op == "Merge" || op == "RefMerge";
 }
 
@@ -118,16 +125,11 @@ bool IsNextIteration(const NodeDef& node) {
 bool IsPad(const NodeDef& node) { return node.op() == "Pad"; }
 
 bool IsPlaceholder(const NodeDef& node) {
-  const auto op = node.op();
+  const auto& op = node.op();
   return op == "Placeholder" || op == "PlaceholderV2" ||
          op == "PlaceholderWithDefault";
 }
 
-bool IsAnyDiv(const NodeDef& node) {
-  return node.op() == "RealDiv" || node.op() == "Div" ||
-         node.op() == "FloorDiv" || node.op() == "TruncateDiv";
-}
-
 bool IsRealDiv(const NodeDef& node) { return node.op() == "RealDiv"; }
 
 bool IsReluGrad(const NodeDef& node) { return node.op() == "ReluGrad"; }
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index fc5279c1b8..b8031e011c 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -24,6 +24,7 @@ namespace grappler {
 
 bool IsAdd(const NodeDef& node);
 bool IsAddN(const NodeDef& node);
+bool IsAnyDiv(const NodeDef& node);
 bool IsAvgPoolGrad(const NodeDef& node);
 bool IsAssert(const NodeDef& node);
 bool IsBiasAdd(const NodeDef& node);
@@ -37,6 +38,7 @@ bool IsDepthwiseConv2dNative(const NodeDef& node);
 bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node);
 bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node);
 bool IsDequeueOp(const NodeDef& node);
+bool IsDiv(const NodeDef& node);
 bool IsEnter(const NodeDef& node);
 bool IsExit(const NodeDef& node);
 bool IsFloorMod(const NodeDef& node);
@@ -49,7 +51,6 @@ bool IsNextIteration(const NodeDef& node);
 bool IsPad(const NodeDef& node);
 bool IsNoOp(const NodeDef& node);
 bool IsPlaceholder(const NodeDef& node);
-bool IsAnyDiv(const NodeDef& node);
 bool IsRealDiv(const NodeDef& node);
 bool IsReluGrad(const NodeDef& node);
 bool IsRecv(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index cb9a5fde2e..d90fe57040 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1072,6 +1072,7 @@ Status ConstantFolding::FoldGraph(GraphDef* output) {
     }
     // We need to record a copy of output nodes before FoldNode() modifies it.
     std::set<NodeDef*> outputs = node_map_->GetOutputs(node->name());
+
     Status s = FoldNode(node, output);
     processed_nodes.insert(node->name());
     if (!s.ok()) {
@@ -1305,56 +1306,59 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
                                       const GraphProperties& properties,
                                       bool use_shape_info) {
   const bool is_aggressive = opt_level_ == RewriterConfig::AGGRESSIVE;
-  for (auto& node : *output->mutable_node()) {
-    if (IsSimplifiableReduction(node)) {
+  for (int i = 0; i < output->node_size(); ++i) {
+    NodeDef* node = output->mutable_node(i);
+    if (IsSimplifiableReduction(*node)) {
       // Replace the reduction node with an identity node, that can be further
       // optimized by the model pruner.
       DataType output_type;
-      if (node.attr().count("T") > 0) {
-        output_type = node.attr().at("T").type();
+      if (node->attr().count("T") > 0) {
+        output_type = node->attr().at("T").type();
       } else {
         // This is an 'any' or 'all' reduction. The output is always boolean.
         output_type = DT_BOOL;
       }
-      node.set_op("Identity");
-      node.clear_attr();
-      (*node.mutable_attr())["T"].set_type(output_type);
-      *node.mutable_input(1) = AsControlDependency(node.input(1));
+      node->set_op("Identity");
+      node->clear_attr();
+      (*node->mutable_attr())["T"].set_type(output_type);
+      *node->mutable_input(1) = AsControlDependency(node->input(1));
+      continue;
     }
     const bool safe_to_use_shapes =
         use_shape_info && (feed_nodes_.empty() || is_aggressive);
-    if (safe_to_use_shapes && IsSimplifiableReshape(node, properties)) {
-      DataType output_type = node.attr().at("T").type();
-      node.set_op("Identity");
-      node.clear_attr();
-      (*node.mutable_attr())["T"].set_type(output_type);
-      *node.mutable_input(1) = AsControlDependency(node.input(1));
+    if (safe_to_use_shapes && IsSimplifiableReshape(*node, properties)) {
+      DataType output_type = node->attr().at("T").type();
+      node->set_op("Identity");
+      node->clear_attr();
+      (*node->mutable_attr())["T"].set_type(output_type);
+      *node->mutable_input(1) = AsControlDependency(node->input(1));
+      continue;
     }
 
+    const bool is_mul = IsMul(*node);
+    const bool is_matmul = IsMatMul(*node);
+    const bool is_add = IsAdd(*node) || IsBiasAdd(*node);
+    const bool is_sub = IsSub(*node);
+    const bool is_any_div = IsAnyDiv(*node);
     // Simplify multiplication by ones or zeros, and addition/subtraction of
     // zeros.
-    // TODO(rmlarsen): Rewrite x / const  -> x * (1/const).
-    bool is_mul = IsMul(node);
-    bool is_matmul = IsMatMul(node);
-    bool is_add = IsAdd(node) || IsBiasAdd(node);
-    bool is_sub = IsSub(node);
-    bool is_div = IsAnyDiv(node);
-    if (use_shape_info && (is_mul || is_matmul || is_add || is_sub || is_div) &&
-        properties.HasInputProperties(node.name()) &&
-        properties.HasOutputProperties(node.name())) {
-      const NodeDef* x = node_map_->GetNode(node.input(0));
-      const NodeDef* y = node_map_->GetNode(node.input(1));
+    if (use_shape_info &&
+        (is_mul || is_matmul || is_add || is_sub || is_any_div) &&
+        properties.HasInputProperties(node->name()) &&
+        properties.HasOutputProperties(node->name())) {
+      const NodeDef* x = node_map_->GetNode(node->input(0));
+      const NodeDef* y = node_map_->GetNode(node->input(1));
       if (x == nullptr || y == nullptr) {
         return errors::InvalidArgument("Invalid inputs to node: ",
-                                       node.DebugString());
+                                       node->DebugString());
       }
       const TensorShapeProto& output_shape =
-          properties.GetOutputProperties(node.name())[0].shape();
+          properties.GetOutputProperties(node->name())[0].shape();
 
       // Simplify element-wise  multiplication by ones or addition/subtraction
       // of zeros.
       const TensorShapeProto& y_shape =
-          properties.GetInputProperties(node.name())[1].shape();
+          properties.GetInputProperties(node->name())[1].shape();
       const bool x_is_zero = IsZeros(*x);
       const bool x_is_one = IsOnes(*x);
       const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape);
@@ -1362,52 +1366,91 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
           ((is_mul && x_is_one) || (is_add && x_is_zero))) {
         // TODO(rmlarsen): Handle subtraction 0 - y.
         // 1 * y = y or 0 + y = y.
-        ReplaceOperationWithIdentity(1, &node);
+        ReplaceOperationWithIdentity(1, node);
         continue;
       }
 
       // Replace 1 / y with Reciprocal op.
-      if (y_matches_output_shape && is_div && x_is_one) {
-        ReplaceDivisionOfOnesByReciprocal(&node);
+      if (y_matches_output_shape && is_any_div && x_is_one) {
+        ReplaceDivisionOfOnesByReciprocal(node);
         continue;
       }
 
       const TensorShapeProto& x_shape =
-          properties.GetInputProperties(node.name())[0].shape();
+          properties.GetInputProperties(node->name())[0].shape();
       const bool y_is_zero = IsZeros(*y);
       const bool y_is_one = IsOnes(*y);
       const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
       if (x_matches_output_shape &&
-          (((is_mul || is_div) && y_is_one) ||
+          (((is_mul || is_any_div) && y_is_one) ||
            ((is_add || is_sub) && y_is_zero && is_aggressive))) {
         // x * 1 = x or x / 1 = x or x +/- 0 = x
-        ReplaceOperationWithIdentity(0, &node);
+        ReplaceOperationWithIdentity(0, node);
         continue;
       }
 
       // Simplify multiplication and matmul by zeros.
       // Also optimize zeros divided by a tensor, but only if we are in
       // aggressive mode, since we might get rid of divisions by zero.
-      bool optimize_zeros_divided_by_y = is_div && x_is_zero && is_aggressive;
+      bool optimize_zeros_divided_by_y =
+          is_any_div && x_is_zero && is_aggressive;
       if ((x_is_zero || y_is_zero) &&
           (is_mul || is_matmul || optimize_zeros_divided_by_y)) {
         const PartialTensorShape shp(output_shape);
         if (shp.IsFullyDefined()) {
           TF_RETURN_IF_ERROR(
-              ReplaceOperationWithConstant(0, output_shape, &node));
+              ReplaceOperationWithConstant(0, output_shape, node));
           continue;
         }
         // Even if an input shape is only partially known, we may known that it
         // matches the output shape and thus forward the corresponding zero
         // input.
-        if ((is_mul || is_div) && x_is_zero && x_matches_output_shape) {
-          ReplaceOperationWithIdentity(0, &node);
+        if ((is_mul || is_any_div) && x_is_zero && x_matches_output_shape) {
+          ReplaceOperationWithIdentity(0, node);
+          continue;
         } else if (is_mul && y_is_zero && y_matches_output_shape) {
-          ReplaceOperationWithIdentity(1, &node);
+          ReplaceOperationWithIdentity(1, node);
+          continue;
         }
       }
     }
+
+    // Strength reduce floating point division by a constant Div(x, const) to
+    // multiplication by the reciprocal Mul(x, Reciprocal(const)). This in turn
+    // will be constant folded to Mul(x, 1.0/const).
+    if (node->input_size() >= 2 && (IsRealDiv(*node) || IsDiv(*node))) {
+      const string& const_input = node->input(1);
+      const NodeDef* denom = node_map_->GetNode(const_input);
+      CHECK(denom != nullptr);
+      if (!IsReallyConstant(*denom)) {
+        continue;
+      }
+      if (node->attr().count("T") == 0) {
+        continue;
+      }
+      DataType type = node->attr().at("T").type();
+      if (IsDiv(*node) && !DataTypeIsFloating(type)) {
+        continue;
+      }
+      // Insert new reciprocal op and change node from Div to Mul.
+      NodeDef* reciprocal_node = output->add_node();
+      reciprocal_node->set_name(AddPrefixToNodeName(
+          strings::StrCat(node->name(), "_recip"), kConstantFoldingConst));
+      reciprocal_node->set_op("Reciprocal");
+      reciprocal_node->set_device(node->device());
+      node->set_op("Mul");
+      // Re-wire inputs and outputs.
+      reciprocal_node->add_input(const_input);
+      (*reciprocal_node->mutable_attr())["T"].set_type(type);
+      node->set_input(1, reciprocal_node->name());
+      node_map_->AddNode(reciprocal_node->name(), reciprocal_node);
+      node_map_->UpdateInput(node->name(), const_input,
+                             reciprocal_node->name());
+      node_map_->AddOutput(NodeName(const_input), reciprocal_node->name());
+      graph_modified_ = true;
+    }
   }
+
   return Status::OK();
 }
 
@@ -1444,6 +1487,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
   }
 
   TF_RETURN_IF_ERROR(FoldGraph(output));
+  node_map_.reset(new NodeMap(output));
   TF_RETURN_IF_ERROR(SimplifyGraph(output, properties, can_use_shape_info));
 
   return Status::OK();
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index 7fc88cd466..813d0cdcb0 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -232,6 +232,74 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
   }
 }
 
+TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output cf_half = ops::Const(s.WithOpName("cf_half"), 0.5f, {1});
+  Output xf = ops::Placeholder(s.WithOpName("xf"), DT_FLOAT,
+                               ops::Placeholder::Shape(TensorShape({2, 2})));
+  Output xi = ops::Placeholder(s.WithOpName("xi"), DT_INT32,
+                               ops::Placeholder::Shape(TensorShape({2, 2})));
+  Output ci = ops::Const(s.WithOpName("ci"), 2, {1});
+  Output cf = ops::Const(s.WithOpName("cf"), 2.0f, {1});
+  Output div_i = ops::Div(s.WithOpName("div_i"), xi, ci);
+  Output div_f = ops::Div(s.WithOpName("div_f"), xf, cf);
+  Output realdiv = ops::RealDiv(s.WithOpName("realdiv"), xf, cf);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"div_f", "div_i", "realdiv"};
+  ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                            nullptr /* cpu_device */);
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  EXPECT_EQ(8, output.node_size());
+  for (int i = 0; i < output.node_size(); ++i) {
+    const NodeDef& node = output.node(i);
+    const string& name = node.name();
+    if (name == "div_i") {
+      // Integer division is unchanged.
+      EXPECT_EQ("Div", node.op());
+      EXPECT_EQ("xi", node.input(0));
+      EXPECT_EQ("ci", node.input(1));
+    } else if (name == "div_f") {
+      EXPECT_EQ("Mul", node.op());
+      EXPECT_EQ("xf", node.input(0));
+      EXPECT_EQ("ConstantFolding/div_f_recip", node.input(1));
+    } else if (name == "realdiv") {
+      EXPECT_EQ("Mul", node.op());
+      EXPECT_EQ("xf", node.input(0));
+      EXPECT_EQ("ConstantFolding/realdiv_recip", node.input(1));
+    } else if (name == "ConstantFolding/div_f_recip") {
+      EXPECT_EQ("Const", node.op());
+      EXPECT_EQ(DT_FLOAT, node.attr().at("dtype").type());
+      TensorProto t = node.attr().at("value").tensor();
+      EXPECT_EQ(DT_FLOAT, t.dtype());
+      EXPECT_EQ(1, t.tensor_shape().dim_size());
+      EXPECT_EQ(1, t.tensor_shape().dim(0).size());
+    } else if (name == "ConstantFolding/realdiv_recip") {
+      EXPECT_EQ("Const", node.op());
+      EXPECT_EQ(DT_FLOAT, node.attr().at("dtype").type());
+      TensorProto t = node.attr().at("value").tensor();
+      EXPECT_EQ(DT_FLOAT, t.dtype());
+      EXPECT_EQ(1, t.tensor_shape().dim_size());
+      EXPECT_EQ(1, t.tensor_shape().dim(0).size());
+    }
+  }
+
+  // Check that the reciprocals have the expected value.
+  std::vector<string> fetch = {"cf_half"};
+  auto tensor_expected = EvaluateNodes(item.graph, fetch);
+  EXPECT_EQ(fetch.size(), tensor_expected.size());
+  fetch = {"ConstantFolding/div_f_recip", "ConstantFolding/realdiv_recip"};
+  auto tensors = EvaluateNodes(output, fetch);
+  EXPECT_EQ(fetch.size(), tensors.size());
+  for (int i = 0; i < fetch.size(); i++) {
+    test::ExpectTensorEqual<float>(tensor_expected[0], tensors[i]);
+  }
+}
+
 TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x_known =
-- 
GitLab


From 379d59d01e4419aadf9103338315624eadfd9f81 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 11 Dec 2017 16:31:19 -0800
Subject: [PATCH 0892/1225] [XLA] Optimize dot(concat(..), constant)

dot(concat(..), constant) and dot(constant, concat(..)) can be rewritten to
avoid the concatenate.  This can itself be a win, but can also help unlock other
optimization opportunities.

PiperOrigin-RevId: 178691585
---
 .../xla/service/algebraic_simplifier.cc       | 153 ++++++++++++++++++
 .../xla/service/algebraic_simplifier_test.cc  | 147 +++++++++++++++++
 .../compiler/xla/tests/dot_operation_test.cc  |  90 +++++++++++
 3 files changed, 390 insertions(+)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 2c0d1900eb..d7bf4f37af 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -279,6 +279,11 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  StatusOr<HloInstruction*> OptimizeDotOfConcat(HloInstruction* dot);
+  StatusOr<HloInstruction*> OptimizeDotOfConcatHelper(
+      const Shape& dot_shape, HloInstruction* lhs, int64 lhs_contracting_dim,
+      HloInstruction* rhs, int64 rhs_contracting_dim, bool swapped);
+
   // Current HloComputation instance the AlgebraicSimplifierVisitor is
   // traversing.
   HloComputation* computation_;
@@ -711,6 +716,146 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
   return false;
 }
 
+StatusOr<HloInstruction*> AlgebraicSimplifierVisitor::OptimizeDotOfConcat(
+    HloInstruction* dot) {
+  const DotDimensionNumbers& dnums = dot->dot_dimension_numbers();
+  if (dnums.lhs_contracting_dimensions_size() != 1 ||
+      dnums.lhs_batch_dimensions_size() != 0) {
+    return nullptr;
+  }
+
+  const int64 lhs_contracting_dim = dnums.lhs_contracting_dimensions(0);
+  const int64 rhs_contracting_dim = dnums.rhs_contracting_dimensions(0);
+  HloInstruction* lhs = dot->mutable_operand(0);
+  HloInstruction* rhs = dot->mutable_operand(1);
+
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * optimized_lhs_concat,
+      OptimizeDotOfConcatHelper(dot->shape(), lhs, lhs_contracting_dim, rhs,
+                                rhs_contracting_dim, /*swapped=*/false));
+  if (optimized_lhs_concat) {
+    return optimized_lhs_concat;
+  }
+
+  return OptimizeDotOfConcatHelper(dot->shape(), rhs, rhs_contracting_dim, lhs,
+                                   lhs_contracting_dim, /*swapped=*/true);
+}
+
+StatusOr<HloInstruction*> AlgebraicSimplifierVisitor::OptimizeDotOfConcatHelper(
+    const Shape& dot_shape, HloInstruction* lhs, int64 lhs_contracting_dim,
+    HloInstruction* rhs, int64 rhs_contracting_dim, bool swapped) {
+  bool can_optimize = lhs->opcode() == HloOpcode::kConcatenate &&
+                      lhs->concatenate_dimension() == lhs_contracting_dim &&
+                      rhs->opcode() == HloOpcode::kConstant;
+  if (!can_optimize) {
+    return nullptr;
+  }
+
+  // We're replacing this:
+  //
+  //   +-----+-----+-----+      +-------------------+
+  //   |     |     |     |      |                   |
+  //   |     |     |     |      |        R_0        |
+  //   |     |     |     |      |                   |
+  //   |     |     |     |      +-------------------+
+  //   |     |     |     |      |                   |
+  //   | L_0 | L_1 | L_2 |   *  |        R_1        |
+  //   |     |     |     |      |                   |
+  //   |     |     |     |      +-------------------+
+  //   |     |     |     |      |                   |
+  //   |     |     |     |      |        R_2        |
+  //   |     |     |     |      |                   |
+  //   +-----+-----+-----+      +-------------------+
+  //
+  // with this:
+  //
+  // [Sum over i]
+  //
+  //   +-----+     +-------------------+
+  //   |     |     |                   |
+  //   |     |  *  |        R_i        |
+  //   |     |     |                   |
+  //   |     |     +-------------------+
+  //   |     |
+  //   | L_i |
+  //   |     |
+  //   |     |
+  //   |     |
+  //   |     |
+  //   |     |
+  //   +-----+
+  //
+  // where the LHS is a concatenate operation (so we can "split" the LHS tensor
+  // for free) and the RHS is a constant tensor (and thus can be split at
+  // compile time).  In the future, we may also want to do this when both the
+  // LHS and the RHS are concatenate operations that line up along the dimension
+  // being contracted over.
+  //
+  // We should be able to generalize this transform to work on a non-constant
+  // RHS when/if we have in-place slices or support input-fusing slices into
+  // Dots.
+
+  // Dimension numbers for the new dot instructions we'll create (L_i * R_i in
+  // the diagram above).
+  DotDimensionNumbers new_dot_dnums;
+  new_dot_dnums.add_lhs_contracting_dimensions(swapped ? rhs_contracting_dim
+                                                       : lhs_contracting_dim);
+  new_dot_dnums.add_rhs_contracting_dimensions(swapped ? lhs_contracting_dim
+                                                       : rhs_contracting_dim);
+
+  // Here we use the MKN notation, where the contracted dimension has K
+  // elements and the two non-contracted dimensions have M and N elements.
+  HloInstruction* add_result = nullptr;
+  int64 rhs_contracting_dim_offset = 0;
+  int64 n = rhs->shape().dimensions(1 - rhs_contracting_dim);
+  for (HloInstruction* concat_op : lhs->operands()) {
+    int64 sub_k = concat_op->shape().dimensions(lhs_contracting_dim);
+    Shape rhs_slice_shape(rhs->shape());
+    rhs_slice_shape.set_dimensions(rhs_contracting_dim, sub_k);
+
+    std::array<int64, 2> start_indices;
+    start_indices[rhs_contracting_dim] = rhs_contracting_dim_offset;
+    start_indices[1 - rhs_contracting_dim] = 0;
+
+    std::array<int64, 2> limit_indices;
+    limit_indices[rhs_contracting_dim] = rhs_contracting_dim_offset + sub_k;
+    limit_indices[1 - rhs_contracting_dim] = n;
+
+    HloInstruction* rhs_slice =
+        computation_->AddInstruction(HloInstruction::CreateSlice(
+            rhs_slice_shape, rhs, /*start_indices=*/start_indices,
+            /*limit_indices=*/limit_indices, /*strides=*/{1, 1}));
+
+    // TODO(b/69062148): We can get rid of `swapped` once all backends support
+    // "non-canonical" contraction dimensions (that contracts dimension 1 of the
+    // LHS with dimension 0 of the RHS).  But for now we keep the same
+    // contraction dimensions as the incoming dot operation to ensure the new
+    // dot operations can be lowered.
+    HloInstruction *new_dot_lhs, *new_dot_rhs;
+    if (swapped) {
+      new_dot_lhs = rhs_slice;
+      new_dot_rhs = concat_op;
+    } else {
+      new_dot_lhs = concat_op;
+      new_dot_rhs = rhs_slice;
+    }
+
+    auto* new_dot = computation_->AddInstruction(HloInstruction::CreateDot(
+        dot_shape, new_dot_lhs, new_dot_rhs, new_dot_dnums));
+
+    if (add_result) {
+      add_result = computation_->AddInstruction(HloInstruction::CreateBinary(
+          dot_shape, HloOpcode::kAdd, add_result, new_dot));
+    } else {
+      add_result = new_dot;
+    }
+
+    rhs_contracting_dim_offset += sub_k;
+  }
+
+  return add_result;
+}
+
 Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
   auto lhs = dot->mutable_operand(0);
   auto rhs = dot->mutable_operand(1);
@@ -732,6 +877,14 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
         dot, HloInstruction::CreateBroadcast(dot->shape(), zero, {}));
   }
 
+  TF_ASSIGN_OR_RETURN(HloInstruction * dot_of_concat_optimized,
+                      OptimizeDotOfConcat(dot));
+  if (dot_of_concat_optimized) {
+    VLOG(10) << "Replaced dot(concat(...), constant) with add(dot(..., "
+                "constant)...)";
+    return ReplaceInstruction(dot, dot_of_concat_optimized);
+  }
+
   if (enable_dot_strength_reduction_ && !is_layout_sensitive_) {
     TF_ASSIGN_OR_RETURN(bool did_strength_reduction,
                         HandleDotStrengthReduction(dot));
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 7462e397ff..d0b659eec3 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -2296,5 +2296,152 @@ INSTANTIATE_TEST_CASE_P(
                        ::testing::Values(1, 2), ::testing::Bool(),
                        ::testing::Bool()));
 
+struct DotOfConcatTestSpec {
+  int64 m;
+  int64 k;
+  int64 n;
+};
+
+class DotOfConcatSimplificationTest
+    : public HloTestBase,
+      public ::testing::WithParamInterface<DotOfConcatTestSpec> {};
+
+// Test that we transform
+//  dot(const, concat(A, B, C))
+// to
+//  add(dot(const_0, A), dot(const_1, B),  dot(const_2, C))
+TEST_P(DotOfConcatSimplificationTest, ConstantLHS) {
+  HloComputation::Builder builder(TestName());
+
+  DotOfConcatTestSpec spec = GetParam();
+
+  ASSERT_GE(spec.k, 3);
+
+  int64 k0 = spec.k / 3;
+  int64 k1 = spec.k / 3;
+  int64 k2 = spec.k - k0 - k1;
+
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {spec.m, spec.k});
+  auto* lhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR2F32Linspace(
+          /*from=*/10.0, /*to=*/10000.0, /*rows=*/spec.m, /*cols=*/spec.k)));
+
+  Shape rhs0_shape = ShapeUtil::MakeShape(F32, {k0, spec.n});
+  Shape rhs1_shape = ShapeUtil::MakeShape(F32, {k1, spec.n});
+  Shape rhs2_shape = ShapeUtil::MakeShape(F32, {k2, spec.n});
+
+  HloInstruction* rhs0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, rhs0_shape, "rhs0"));
+  HloInstruction* rhs1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, rhs1_shape, "rhs1"));
+  HloInstruction* rhs2 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, rhs2_shape, "rhs2"));
+
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {spec.k, spec.n});
+  HloInstruction* rhs = builder.AddInstruction(
+      HloInstruction::CreateConcatenate(rhs_shape, {rhs0, rhs1, rhs2}, 0));
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+
+  Shape dot_shape = ShapeUtil::MakeShape(F32, {spec.m, spec.n});
+  builder.AddInstruction(
+      HloInstruction::CreateDot(dot_shape, lhs, rhs, dot_dnums));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(module.get()));
+  ASSERT_TRUE(run_successful);
+
+  EXPECT_TRUE(
+      ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape));
+
+  auto match_dot_0 = op::Dot(op::Slice(op::Constant()), op::Parameter(0));
+  auto match_dot_1 = op::Dot(op::Slice(op::Constant()), op::Parameter(1));
+  auto match_dot_2 = op::Dot(op::Slice(op::Constant()), op::Parameter(2));
+  EXPECT_THAT(computation->root_instruction(),
+              op::Add(op::Add(match_dot_0, match_dot_1), match_dot_2));
+}
+
+// Test that we transform
+//  dot(concat(A, B, C), const)
+// to
+//  add(dot(A, const_0), dot(B, const_1),  dot(C, const_2))
+TEST_P(DotOfConcatSimplificationTest, ConstantRHS) {
+  HloComputation::Builder builder(TestName());
+
+  DotOfConcatTestSpec spec = GetParam();
+
+  ASSERT_GE(spec.k, 4);
+
+  int64 k0 = spec.k / 4;
+  int64 k1 = spec.k / 4;
+  int64 k2 = spec.k / 4;
+  int64 k3 = spec.k - k0 - k1 - k2;
+
+  Shape lhs0_shape = ShapeUtil::MakeShape(F32, {spec.m, k0});
+  Shape lhs1_shape = ShapeUtil::MakeShape(F32, {spec.m, k1});
+  Shape lhs2_shape = ShapeUtil::MakeShape(F32, {spec.m, k2});
+  Shape lhs3_shape = ShapeUtil::MakeShape(F32, {spec.m, k3});
+
+  HloInstruction* lhs0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, lhs0_shape, "lhs0"));
+  HloInstruction* lhs1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, lhs1_shape, "lhs1"));
+  HloInstruction* lhs2 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, lhs2_shape, "lhs2"));
+  HloInstruction* lhs3 = builder.AddInstruction(
+      HloInstruction::CreateParameter(3, lhs2_shape, "lhs3"));
+
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {spec.m, spec.k});
+  HloInstruction* lhs =
+      builder.AddInstruction(HloInstruction::CreateConcatenate(
+          lhs_shape, {lhs0, lhs1, lhs2, lhs3}, 1));
+
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {spec.k, spec.m});
+  auto* rhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR2F32Linspace(
+          /*from=*/10.0, /*to=*/10000.0, /*rows=*/spec.k, /*cols=*/spec.m)));
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+
+  Shape dot_shape = ShapeUtil::MakeShape(F32, {spec.m, spec.n});
+  builder.AddInstruction(
+      HloInstruction::CreateDot(dot_shape, lhs, rhs, dot_dnums));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(module.get()));
+  ASSERT_TRUE(run_successful);
+  EXPECT_TRUE(
+      ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape));
+
+  auto match_dot_0 = op::Dot(op::Parameter(0), op::Slice(op::Constant()));
+  auto match_dot_1 = op::Dot(op::Parameter(1), op::Slice(op::Constant()));
+  auto match_dot_2 = op::Dot(op::Parameter(2), op::Slice(op::Constant()));
+  auto match_dot_3 = op::Dot(op::Parameter(3), op::Slice(op::Constant()));
+  EXPECT_THAT(computation->root_instruction(),
+              op::Add(op::Add(op::Add(match_dot_0, match_dot_1), match_dot_2),
+                      match_dot_3));
+}
+
+DotOfConcatTestSpec kDotOfConcatTestSpecs[] = {
+    {/*m=*/3, /*k=*/9, /*n=*/3},    //
+    {/*m=*/3, /*k=*/20, /*n=*/3},   //
+    {/*m=*/1, /*k=*/18, /*n=*/5},   //
+    {/*m=*/20, /*k=*/20, /*n=*/1},  //
+    {/*m=*/1, /*k=*/16, /*n=*/1},   //
+};
+
+INSTANTIATE_TEST_CASE_P(DotOfConcatSimplificationTestInstantiation,
+                        DotOfConcatSimplificationTest,
+                        ::testing::ValuesIn(kDotOfConcatTestSpecs));
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index 2058cd04a5..8f11029c10 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc
@@ -573,5 +573,95 @@ TEST_F(DotOperationTest, TransposeFolding) {
   }
 }
 
+TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstLHS) {
+  auto prim_type = primitive_util::NativeToPrimitiveType<float>();
+
+  std::unique_ptr<Array2D<float>> constant_lhs_array(new Array2D<float>(
+      {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}}));
+
+  ComputationBuilder builder(client_, TestName());
+  auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array);
+  auto rhs_arg_0 = builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}),
+                                     "rhs_arg_0");
+  auto rhs_arg_1 = builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {3, 2}),
+                                     "rhs_arg_1");
+  auto rhs_arg_2 = builder.Parameter(2, ShapeUtil::MakeShape(prim_type, {1, 2}),
+                                     "rhs_arg_2");
+  auto result = builder.Dot(
+      lhs_constant, builder.ConcatInDim({rhs_arg_0, rhs_arg_1, rhs_arg_2}, 0));
+
+  std::unique_ptr<Array2D<float>> arg_0_value_array(
+      new Array2D<float>({{1.0, 2.0}, {3.0, 4.0}}));
+  std::unique_ptr<Array2D<float>> arg_1_value_array(
+      new Array2D<float>({{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}));
+  std::unique_ptr<Array2D<float>> arg_2_value_array(
+      new Array2D<float>({{1.0, 2.0}}));
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_0_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_0_value_array)));
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_1_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_1_value_array)));
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_2_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_2_value_array)));
+
+  Array2D<float> expected({{53.0, 74.0}, {45.0, 66.0}});
+  ComputeAndCompareR2<float>(
+      &builder, expected,
+      {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_);
+}
+
+TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstRHS) {
+  auto prim_type = primitive_util::NativeToPrimitiveType<float>();
+
+  std::unique_ptr<Array2D<float>> constant_rhs_array(
+      new Array2D<float>({{1.0, 2.0},
+                          {3.0, 4.0},
+                          {5.0, 6.0},
+                          {6.0, 5.0},
+                          {4.0, 3.0},
+                          {2.0, 1.0}}));
+
+  ComputationBuilder builder(client_, TestName());
+  auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array);
+  auto lhs_arg_0 = builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}),
+                                     "lhs_arg_0");
+  auto lhs_arg_1 = builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {2, 3}),
+                                     "lhs_arg_1");
+  auto lhs_arg_2 = builder.Parameter(2, ShapeUtil::MakeShape(prim_type, {2, 1}),
+                                     "lhs_arg_2");
+  auto result = builder.Dot(
+      builder.ConcatInDim({lhs_arg_0, lhs_arg_1, lhs_arg_2}, 1), rhs_constant);
+
+  std::unique_ptr<Array2D<float>> arg_0_value_array(
+      new Array2D<float>({{1.0, 2.0}, {3.0, 4.0}}));
+  std::unique_ptr<Array2D<float>> arg_1_value_array(
+      new Array2D<float>({{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}));
+  std::unique_ptr<Array2D<float>> arg_2_value_array(
+      new Array2D<float>({{1.0}, {2.0}}));
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_0_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_0_value_array)));
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_1_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_1_value_array)));
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_2_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_2_value_array)));
+
+  Array2D<float> expected({{38.0, 36.0}, {93.0, 91.0}});
+  ComputeAndCompareR2<float>(
+      &builder, expected,
+      {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_);
+}
 }  // namespace
 }  // namespace xla
-- 
GitLab


From c4a242f6d24378d722131b0cddf7d8700fb65f5a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 16:57:51 -0800
Subject: [PATCH 0893/1225] Initialize local_resources during session
 initialization.

PiperOrigin-RevId: 178694869
---
 tensorflow/python/training/monitored_session.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index f1cb81981a..b9bffa6b5c 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@@ -265,8 +265,10 @@ class Scaffold(object):
 
   @staticmethod
   def _default_local_init_op():
-    return control_flow_ops.group(variables.local_variables_initializer(),
-                                  lookup_ops.tables_initializer())
+    return control_flow_ops.group(
+        variables.local_variables_initializer(),
+        lookup_ops.tables_initializer(),
+        resources.initialize_resources(resources.local_resources()))
 
 
 def MonitoredTrainingSession(master='',  # pylint: disable=invalid-name
-- 
GitLab


From aaf2eb05502e1a0e37f30017d79bb08a9a534711 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 17:03:54 -0800
Subject: [PATCH 0894/1225] Automated g4 rollback of changelist 178634559

PiperOrigin-RevId: 178695724
---
 tensorflow/python/training/training_util.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/training/training_util.py b/tensorflow/python/training/training_util.py
index 2a42ff2003..89a9e12932 100644
--- a/tensorflow/python/training/training_util.py
+++ b/tensorflow/python/training/training_util.py
@@ -23,7 +23,6 @@ from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_io
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
@@ -222,6 +221,7 @@ def _get_or_create_global_step_read(graph=None):
   global_step_tensor = get_global_step(graph)
   if global_step_tensor is None:
     return None
+  # add 'zero' so that it will create a copy of variable as Tensor.
   with graph.as_default() as g, g.name_scope(None):
     with g.name_scope(global_step_tensor.op.name + '/'):
       # using initialized_value to ensure that global_step is initialized before
@@ -229,10 +229,7 @@ def _get_or_create_global_step_read(graph=None):
       # under global_step_read_tensor dependency.
       global_step_value = global_step_tensor.initialized_value() if isinstance(
           global_step_tensor, variables.Variable) else global_step_tensor
-      # pylint: disable=protected-access
-      # We use the snapshot kernel to make sure a copy is made of this tensor.
-      global_step_read_tensor = gen_array_ops._snapshot(global_step_value)
-      # pylint: enable=protected-access
+      global_step_read_tensor = global_step_value + 0
       ops.add_to_collection(GLOBAL_STEP_READ_KEY, global_step_read_tensor)
   return _get_global_step_read(graph)
 
-- 
GitLab


From e5246fb0b0d279ed3a641b814448643b207b35cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dandelion=20Man=C3=A9?= <dandelion@google.com>
Date: Mon, 11 Dec 2017 17:10:25 -0800
Subject: [PATCH 0895/1225] Satisfy buildifier

---
 tensorflow/python/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 0f662f92ea..31d3bd1b74 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2083,11 +2083,11 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
     ],
+    shard_count = 2,
     tags = [
         "no_gpu",
         "no_oss",
     ],
-    shard_count = 2,
 )
 
 cuda_py_test(
-- 
GitLab


From a5300ebeb6141d8eddebcca5041f3cbc1289b62a Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 11 Dec 2017 17:50:44 -0800
Subject: [PATCH 0896/1225] closes #15281

PiperOrigin-RevId: 178701096
---
 tensorflow/tools/pip_package/setup.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 5ddc688a4c..1b2e007f9d 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -33,12 +33,6 @@ _VERSION = '1.4.0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-    # weakref.finalize introduced in Python 3.4
-    'backports.weakref >= 1.0rc1; python_version < "3.4"',
-    # enum module introduced in Python 3.4
-    'enum34 >= 1.1.6; python_version < "3.4"',
-    # Needed for unittest.mock in Python 2
-    'mock >= 2.0.0; python_version < "3.0"',
     'numpy >= 1.12.1',
     'six >= 1.10.0',
     'protobuf >= 3.4.0',
-- 
GitLab


From ecdecc5a325482edac2cdef2d40e091ed8b5016d Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Mon, 11 Dec 2017 18:09:51 -0800
Subject: [PATCH 0897/1225] Don't materialize BroadcastGradientArgs by default.

PiperOrigin-RevId: 178703180
---
 tensorflow/core/grappler/optimizers/constant_folding.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index d90fe57040..45ba8d01e6 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -423,6 +423,9 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs(
   if (!bcast.IsValid()) {
     return Status::OK();
   }
+  // Beware: the reduction dimensions are valid iff we assume that two distinct
+  // symbolic dimensions can't be equal. This is often but not always true, so
+  // this optimization isn't safe.
   BCast::Vec reduce_dims[2];
   reduce_dims[0] = bcast.grad_x_reduce_idx();
   reduce_dims[1] = bcast.grad_y_reduce_idx();
@@ -570,11 +573,12 @@ Status ConstantFolding::MaterializeReductionIndices(
 
 Status ConstantFolding::MaterializeConstants(
     const GraphProperties& properties) {
+  const bool is_aggressive = opt_level_ == RewriterConfig::AGGRESSIVE;
   const int node_count = graph_->node_size();
   for (int i = 0; i < node_count; ++i) {
     NodeDef& node = *graph_->mutable_node(i);
     const string& op = node.op();
-    if (op == "BroadcastGradientArgs") {
+    if (is_aggressive && op == "BroadcastGradientArgs") {
       TF_RETURN_IF_ERROR(MaterializeBroadcastGradientArgs(node, properties));
     } else if (IsReduction(node)) {
       TF_RETURN_IF_ERROR(MaterializeReductionIndices(&node, properties));
-- 
GitLab


From fc24c588dfb8cd5ff55537e1db79587538a1acff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=BF=97=E8=B1=AA?= <izhangzhihao@hotmail.com>
Date: Tue, 12 Dec 2017 10:54:10 +0800
Subject: [PATCH 0898/1225] Replace `variables.get_global_step()` use
 `training_util.get_global_step()` (#15247)

* Replace `variables.get_global_step()` use `training_util.get_global_step()`
---
 .../contrib/factorization/python/ops/gmm.py   |  4 +--
 .../learn/estimators/composable_model_test.py |  4 +--
 .../learn/python/learn/estimators/dnn.py      |  4 +--
 .../learn/estimators/estimator_input_test.py  | 10 +++----
 .../python/learn/estimators/estimator_test.py | 26 +++++++++----------
 .../learn/estimators/estimators_test.py       |  8 +++---
 .../learn/python/learn/estimators/kmeans.py   |  4 +--
 .../learn/python/learn/estimators/linear.py   |  6 ++---
 .../estimators/logistic_regressor_test.py     |  4 +--
 .../learn/python/learn/utils/export.py        |  6 ++---
 .../linear_optimizer/python/sdca_estimator.py |  4 +--
 11 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/tensorflow/contrib/factorization/python/ops/gmm.py b/tensorflow/contrib/factorization/python/ops/gmm.py
index 0d67e09f81..f72280c4ec 100644
--- a/tensorflow/contrib/factorization/python/ops/gmm.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm.py
@@ -24,7 +24,7 @@ import numpy as np
 from tensorflow.contrib import framework
 from tensorflow.contrib.factorization.python.ops import gmm_ops
 from tensorflow.contrib.framework.python.framework import checkpoint_utils
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
 from tensorflow.python.framework import constant_op
@@ -167,7 +167,7 @@ class GMM(estimator.Estimator):
                                      self._num_clusters, self._random_seed,
                                      self._covariance_type,
                                      self._params)
-      incr_step = state_ops.assign_add(variables.get_global_step(), 1)
+      incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
       loss = math_ops.reduce_sum(losses)
       training_op = with_dependencies([training_op, incr_step], loss)
       training_hooks = [_InitializeClustersHook(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
index 14750961ef..ef5e620e8f 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.datasets import base
 from tensorflow.contrib.learn.python.learn.estimators import composable_model
@@ -55,7 +55,7 @@ def _base_model_fn(features, labels, mode, params):
     raise NotImplementedError
 
   def _train_op_fn(loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     assert global_step
     train_step = model.get_train_step(loss)
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
index cb15ef23e9..c17b41c0f7 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
@@ -23,7 +23,7 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import metric_spec
@@ -189,7 +189,7 @@ def _dnn_model_fn(features, labels, mode, params, config=None):
       """Returns the op to optimize the loss."""
       return optimizers.optimize_loss(
           loss=loss,
-          global_step=contrib_variables.get_global_step(),
+          global_step=training_util.get_global_step(),
           learning_rate=_LEARNING_RATE,
           optimizer=_get_optimizer(optimizer),
           gradient_multipliers=(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
index 248c6c733f..9d7c1a099a 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
@@ -23,7 +23,7 @@ import tempfile
 
 import numpy as np
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import metric_spec
 from tensorflow.contrib.learn.python.learn import models
@@ -114,7 +114,7 @@ def linear_model_params_fn(features, labels, mode, params):
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
       loss,
-      variables.get_global_step(),
+      training_util.get_global_step(),
       optimizer='Adagrad',
       learning_rate=params['learning_rate'])
   return prediction, loss, train_op
@@ -129,7 +129,7 @@ def linear_model_fn(features, labels, mode):
     (_, features), = features.items()
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return prediction, loss, train_op
 
 
@@ -139,7 +139,7 @@ def linear_model_fn_with_model_fn_ops(features, labels, mode):
                   model_fn.ModeKeys.INFER)
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return model_fn.ModelFnOps(
       mode=mode, predictions=prediction, loss=loss, train_op=train_op)
 
@@ -150,7 +150,7 @@ def logistic_model_no_mode_fn(features, labels):
   labels = array_ops.one_hot(labels, 3, 1, 0)
   prediction, loss = (models.logistic_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return {
       'class': math_ops.argmax(prediction, 1),
       'prob': prediction
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
index be2b0cb3ca..2a13a84627 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@@ -32,7 +32,7 @@ from google.protobuf import text_format
 
 from tensorflow.contrib import learn
 from tensorflow.contrib import lookup
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import experiment
@@ -132,7 +132,7 @@ def linear_model_params_fn(features, labels, mode, params):
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
       loss,
-      variables.get_global_step(),
+      training_util.get_global_step(),
       optimizer='Adagrad',
       learning_rate=params['learning_rate'])
   return prediction, loss, train_op
@@ -147,7 +147,7 @@ def linear_model_fn(features, labels, mode):
     (_, features), = features.items()
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return prediction, loss, train_op
 
 
@@ -157,7 +157,7 @@ def linear_model_fn_with_model_fn_ops(features, labels, mode):
                   model_fn.ModeKeys.INFER)
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return model_fn.ModelFnOps(
       mode=mode, predictions=prediction, loss=loss, train_op=train_op)
 
@@ -168,7 +168,7 @@ def logistic_model_no_mode_fn(features, labels):
   labels = array_ops.one_hot(labels, 3, 1, 0)
   prediction, loss = (models.logistic_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return {
       'class': math_ops.argmax(prediction, 1),
       'prob': prediction
@@ -241,7 +241,7 @@ def _build_estimator_for_resource_export_test():
     const = constant_op.constant(-1, dtype=dtypes.int64)
     table = lookup.MutableHashTable(
         dtypes.string, dtypes.int64, const, name='LookupTableModel')
-    update_global_step = variables.get_global_step().assign_add(1)
+    update_global_step = training_util.get_global_step().assign_add(1)
     if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL):
       key = constant_op.constant(['key'])
       value = constant_op.constant([42], dtype=dtypes.int64)
@@ -306,7 +306,7 @@ def _model_fn_ops(
         mode=mode,
         predictions=constant_op.constant(0.),
         loss=constant_op.constant(0.),
-        train_op=variables.get_global_step().assign_add(1))
+        train_op=training_util.get_global_step().assign_add(1))
 
 
 def _make_input_fn(features, labels):
@@ -389,7 +389,7 @@ class EstimatorModelFnTest(test.TestCase):
       self.assertEqual(expected_param, params)
       self.assertEqual(model_dir, expected_model_dir)
       return (constant_op.constant(0.), constant_op.constant(0.),
-              variables.get_global_step().assign_add(1))
+              training_util.get_global_step().assign_add(1))
     est = estimator.Estimator(model_fn=_argument_checker,
                               params=expected_param,
                               model_dir=expected_model_dir)
@@ -400,7 +400,7 @@ class EstimatorModelFnTest(test.TestCase):
     def _invalid_model_fn(features, labels):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         loss = 100.0 - w
       return None, loss, None
@@ -415,7 +415,7 @@ class EstimatorModelFnTest(test.TestCase):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
       loss = 100.0 - w
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         train_op = w.assign_add(loss / 100.0)
       predictions = loss
@@ -434,7 +434,7 @@ class EstimatorModelFnTest(test.TestCase):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
       loss = 100.0 - w
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         train_op = w.assign_add(loss / 100.0)
       return None, loss, train_op
@@ -464,7 +464,7 @@ class EstimatorModelFnTest(test.TestCase):
           mode=mode,
           predictions=constant_op.constant(0.),
           loss=constant_op.constant(0.),
-          train_op=variables.get_global_step().assign_add(1),
+          train_op=training_util.get_global_step().assign_add(1),
           scaffold=monitored_session.Scaffold(init_fn=_init_fn))
 
     est = estimator.Estimator(model_fn=_model_fn_scaffold)
@@ -483,7 +483,7 @@ class EstimatorModelFnTest(test.TestCase):
           mode=mode,
           predictions=constant_op.constant([[1.]]),
           loss=constant_op.constant(0.),
-          train_op=variables.get_global_step().assign_add(1),
+          train_op=training_util.get_global_step().assign_add(1),
           scaffold=monitored_session.Scaffold(saver=self.mock_saver))
 
     def input_fn():
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
index 1d89dfb55b..8131e0fde6 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
@@ -22,7 +22,7 @@ import random
 
 import numpy as np
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python import learn
 from tensorflow.contrib.learn.python.learn import datasets
 from tensorflow.contrib.learn.python.learn import metric_spec
@@ -62,7 +62,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["transformed_x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator = estimator_lib.Estimator(
@@ -100,7 +100,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator = estimator_lib.Estimator(
@@ -139,7 +139,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator_with_fe_fn = estimator_lib.Estimator(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
index 992b804f59..8f9d6fc318 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
@@ -28,7 +28,7 @@ import time
 import numpy as np
 
 from tensorflow.contrib.factorization.python.ops import clustering_ops
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators.model_fn import ModelFnOps
 from tensorflow.python.framework import ops
@@ -128,7 +128,7 @@ def _kmeans_clustering_model_fn(features, labels, mode, params, config):
        random_seed=params.get('random_seed'),
        kmeans_plus_plus_num_retries=params.get(
            'kmeans_plus_plus_num_retries')).training_graph()
-  incr_step = state_ops.assign_add(variables.get_global_step(), 1)
+  incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
   loss = math_ops.reduce_sum(losses, name=KMeansClustering.LOSS_OP_NAME)
   summary.scalar('loss/raw', loss)
   training_op = with_dependencies([training_op, incr_step], loss)
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py
index f5445ad4e7..37aa8b3396 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@@ -26,7 +26,7 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
@@ -170,7 +170,7 @@ def _linear_model_fn(features, labels, mode, params, config=None):
           weight_collections=[parent_scope])
 
     def _train_op_fn(loss):
-      global_step = contrib_variables.get_global_step()
+      global_step = training_util.get_global_step()
       my_vars = ops.get_collection(parent_scope)
       grads = gradients.gradients(loss, my_vars)
       if gradient_clip_norm:
@@ -252,7 +252,7 @@ def sdca_model_fn(features, labels, mode, params):
     _add_bias_column(feature_columns, features, bias, columns_to_variables)
 
   def _train_op_fn(unused_loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
                                                     weight_column_name,
                                                     loss_type, features,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
index 93c62f87e8..656d68b768 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
@@ -21,7 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib import layers
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn.datasets import base
 from tensorflow.contrib.learn.python.learn.estimators import logistic_regressor
@@ -57,7 +57,7 @@ def _logistic_regression_model_fn(features, labels, mode):
   predictions = math_ops.sigmoid(logits)
   loss = losses.sigmoid_cross_entropy(labels, logits)
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return predictions, loss, train_op
 
 
diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py
index 6af2287761..cb34cb1d26 100644
--- a/tensorflow/contrib/learn/python/learn/utils/export.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export.py
@@ -20,7 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.framework import deprecated
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.session_bundle import exporter
 from tensorflow.contrib.session_bundle import gc
 from tensorflow.python.client import session as tf_session
@@ -78,7 +78,7 @@ def _export_graph(graph, saver, checkpoint_path, export_dir,
           default_graph_signature=default_graph_signature,
           named_graph_signatures=named_graph_signatures,
           assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS))
-      return export.export(export_dir, contrib_variables.get_global_step(),
+      return export.export(export_dir, training_util.get_global_step(),
                            session, exports_to_keep=exports_to_keep)
 
 
@@ -295,7 +295,7 @@ def _export_estimator(estimator,
   checkpoint_path = (checkpoint_path or
                      tf_saver.latest_checkpoint(estimator._model_dir))
   with ops.Graph().as_default() as g:
-    contrib_variables.create_global_step(g)
+    training_util.create_global_step(g)
 
     if use_deprecated_input_fn:
       examples = array_ops.placeholder(dtype=dtypes.string,
diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
index 701fc1c059..05794a42c5 100644
--- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
+++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
@@ -19,7 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib import layers
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.contrib.learn.python.learn.estimators import prediction_key
@@ -154,7 +154,7 @@ def sdca_model_fn(features, labels, mode, params, config=None):
     _add_bias_column(feature_columns, features, bias, columns_to_variables)
 
   def _train_op_fn(unused_loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     sdca_model, train_op = optimizer.get_train_step(
         columns_to_variables, weight_column_name, loss_type, features, labels,
         global_step)
-- 
GitLab


From c210009d0a8d40d458c21b1faf7b1adf8e4deaee Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 19:19:26 -0800
Subject: [PATCH 0899/1225] Includes <cstdio> in the TF Lite
 kernels/op_macros.h to fix a compile errors when building externally using
 either the Makefile or Bazel.  The macros use stderr and fprintf which may
 not be defined depending on the order of headers included by the .cc files.

PiperOrigin-RevId: 178708839
---
 tensorflow/contrib/lite/kernels/op_macros.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/lite/kernels/op_macros.h b/tensorflow/contrib/lite/kernels/op_macros.h
index 7535afaf8e..63670efcb1 100644
--- a/tensorflow/contrib/lite/kernels/op_macros.h
+++ b/tensorflow/contrib/lite/kernels/op_macros.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_OP_UTIL_H_
 #define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_OP_UTIL_H_
 
+#include <cstdio>
+
 #define TF_LITE_FATAL(msg)          \
   do {                              \
     fprintf(stderr, "%s\n", (msg)); \
-- 
GitLab


From 2a48746874d2043cddb632fe825d88f3da0c185d Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Mon, 11 Dec 2017 19:39:35 -0800
Subject: [PATCH 0900/1225] Fix the handling of unknown rank. Previous code
 would wrongly treat a tensor of unknown rank as a scalar.

PiperOrigin-RevId: 178710185
---
 .../grappler/optimizers/layout_optimizer.cc   |  3 +++
 .../optimizers/layout_optimizer_test.cc       | 23 +++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index e9112baaff..b584df0882 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -284,6 +284,9 @@ class NodeProcessor : public GraphProcessor {
   bool IsDimsN(const NodeDef& node, int n) const {
     if (node.attr().find("_output_shapes") != node.attr().end()) {
       auto shape = node.attr().at("_output_shapes").list().shape(0);
+      if (shape.unknown_rank()) {
+        return false;
+      }
       if (shape.dim_size() == n) {
         return true;
       }
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 6e1f47f0d3..763e2d1b21 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -761,6 +761,29 @@ TEST_F(LayoutOptimizerTest, Mul4DAndScalar) {
   EXPECT_EQ(mul_node->input(1), "scalar");
 }
 
+TEST_F(LayoutOptimizerTest, Mul4DAndUnknownRank) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto unknown_rank =
+      ops::Placeholder(s.WithOpName("unknown"), DT_FLOAT,
+                       ops::Placeholder::Shape(PartialTensorShape()));
+  Output c = ops::Const(s.WithOpName("c"), 3.0f, {8, 2, 2, 2});
+  Output mul = ops::Mul(s.WithOpName("mul"), conv, unknown_rank);
+  auto o = ops::AddN(s.WithOpName("o"), {mul, c});
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  // Node mul should not be processed by layout optimizer, because one of its
+  // inputs is of unknown rank.
+  EXPECT_EQ(mul_node->input(0),
+            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-mul-0");
+  EXPECT_EQ(mul_node->input(1), "unknown");
+}
+
 TEST_F(LayoutOptimizerTest, Mul4DAnd4D) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv = SimpleConv2D(&s, 3, 2, "VALID");
-- 
GitLab


From f18d23b90a48e51013008d51320f5aedd555cc02 Mon Sep 17 00:00:00 2001
From: Ian Langmore <langmore@google.com>
Date: Mon, 11 Dec 2017 19:44:08 -0800
Subject: [PATCH 0901/1225] prefer_static_* functions added to
 CORE/distributions/util.py

PiperOrigin-RevId: 178710439
---
 .../python/ops/distribution_util.py           |  48 +------
 .../kernel_tests/distributions/util_test.py   | 118 ++++++++++++++++++
 tensorflow/python/ops/distributions/util.py   |  83 ++++++++++++
 3 files changed, 204 insertions(+), 45 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/ops/distribution_util.py b/tensorflow/contrib/distributions/python/ops/distribution_util.py
index 869b5698e5..a4d249d41e 100644
--- a/tensorflow/contrib/distributions/python/ops/distribution_util.py
+++ b/tensorflow/contrib/distributions/python/ops/distribution_util.py
@@ -19,9 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib import linalg
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
@@ -330,54 +328,14 @@ def shapes_from_loc_and_scale(loc, scale, name="shapes_from_loc_and_scale"):
       else:
         loc_batch_shape = ops.convert_to_tensor(loc_batch_shape,
                                                 name="loc_batch_shape")
+      # This is defined in the core util module.
+      # pylint: disable=undefined-variable
       batch_shape = prefer_static_broadcast_shape(batch_shape, loc_batch_shape)
+      # pylint: enable=undefined-variable
 
   return batch_shape, event_shape
 
 
-def prefer_static_broadcast_shape(
-    shape1, shape2, name="prefer_static_broadcast_shape"):
-  """Convenience function which statically broadcasts shape when possible.
-
-  Args:
-    shape1:  `1-D` integer `Tensor`.  Already converted to tensor!
-    shape2:  `1-D` integer `Tensor`.  Already converted to tensor!
-    name:  A string name to prepend to created ops.
-
-  Returns:
-    The broadcast shape, either as `TensorShape` (if broadcast can be done
-      statically), or as a `Tensor`.
-  """
-  with ops.name_scope(name, values=[shape1, shape2]):
-    def make_shape_tensor(x):
-      return ops.convert_to_tensor(x, name="shape", dtype=dtypes.int32)
-
-    def get_tensor_shape(s):
-      if isinstance(s, tensor_shape.TensorShape):
-        return s
-      s_ = tensor_util.constant_value(make_shape_tensor(s))
-      if s_ is not None:
-        return tensor_shape.TensorShape(s_)
-      return None
-
-    def get_shape_tensor(s):
-      if not isinstance(s, tensor_shape.TensorShape):
-        return make_shape_tensor(s)
-      if s.is_fully_defined():
-        return make_shape_tensor(s.as_list())
-      raise ValueError("Cannot broadcast from partially "
-                       "defined `TensorShape`.")
-
-    shape1_ = get_tensor_shape(shape1)
-    shape2_ = get_tensor_shape(shape2)
-    if shape1_ is not None and shape2_ is not None:
-      return array_ops.broadcast_static_shape(shape1_, shape2_)
-
-    shape1_ = get_shape_tensor(shape1)
-    shape2_ = get_shape_tensor(shape2)
-    return array_ops.broadcast_dynamic_shape(shape1_, shape2_)
-
-
 def get_broadcast_shape(*tensors):
   """Get broadcast shape as a Python list of integers (preferred) or `Tensor`.
 
diff --git a/tensorflow/python/kernel_tests/distributions/util_test.py b/tensorflow/python/kernel_tests/distributions/util_test.py
index 5950241141..00781d0150 100644
--- a/tensorflow/python/kernel_tests/distributions/util_test.py
+++ b/tensorflow/python/kernel_tests/distributions/util_test.py
@@ -557,6 +557,124 @@ class PickVectorTest(test.TestCase):
                               constant_op.constant(False), x, y))  # No eval.
 
 
+class PreferStaticRankTest(test.TestCase):
+
+  def testNonEmptyConstantTensor(self):
+    x = array_ops.zeros((2, 3, 4))
+    rank = du.prefer_static_rank(x)
+    self.assertIsInstance(rank, np.ndarray)
+    self.assertEqual(3, rank)
+
+  def testEmptyConstantTensor(self):
+    x = constant_op.constant([])
+    rank = du.prefer_static_rank(x)
+    self.assertIsInstance(rank, np.ndarray)
+    self.assertEqual(1, rank)
+
+  def testScalarTensor(self):
+    x = constant_op.constant(1.)
+    rank = du.prefer_static_rank(x)
+    self.assertIsInstance(rank, np.ndarray)
+    self.assertEqual(0, rank)
+
+  def testDynamicRankEndsUpBeingNonEmpty(self):
+    x = array_ops.placeholder(np.float64, shape=None)
+    rank = du.prefer_static_rank(x)
+    with self.test_session():
+      self.assertAllEqual(2, rank.eval(feed_dict={x: np.zeros((2, 3))}))
+
+  def testDynamicRankEndsUpBeingEmpty(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    rank = du.prefer_static_rank(x)
+    with self.test_session():
+      self.assertAllEqual(1, rank.eval(feed_dict={x: []}))
+
+  def testDynamicRankEndsUpBeingScalar(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    rank = du.prefer_static_rank(x)
+    with self.test_session():
+      self.assertAllEqual(0, rank.eval(feed_dict={x: 1}))
+
+
+class PreferStaticShapeTest(test.TestCase):
+
+  def testNonEmptyConstantTensor(self):
+    x = array_ops.zeros((2, 3, 4))
+    shape = du.prefer_static_shape(x)
+    self.assertIsInstance(shape, np.ndarray)
+    self.assertAllEqual(np.array([2, 3, 4]), shape)
+
+  def testEmptyConstantTensor(self):
+    x = constant_op.constant([])
+    shape = du.prefer_static_shape(x)
+    self.assertIsInstance(shape, np.ndarray)
+    self.assertAllEqual(np.array([0]), shape)
+
+  def testScalarTensor(self):
+    x = constant_op.constant(1.)
+    shape = du.prefer_static_shape(x)
+    self.assertIsInstance(shape, np.ndarray)
+    self.assertAllEqual(np.array([]), shape)
+
+  def testDynamicShapeEndsUpBeingNonEmpty(self):
+    x = array_ops.placeholder(np.float64, shape=None)
+    shape = du.prefer_static_shape(x)
+    with self.test_session():
+      self.assertAllEqual((2, 3), shape.eval(feed_dict={x: np.zeros((2, 3))}))
+
+  def testDynamicShapeEndsUpBeingEmpty(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    shape = du.prefer_static_shape(x)
+    with self.test_session():
+      self.assertAllEqual(np.array([0]), shape.eval(feed_dict={x: []}))
+
+  def testDynamicShapeEndsUpBeingScalar(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    shape = du.prefer_static_shape(x)
+    with self.test_session():
+      self.assertAllEqual(np.array([]), shape.eval(feed_dict={x: 1}))
+
+
+class PreferStaticValueTest(test.TestCase):
+
+  def testNonEmptyConstantTensor(self):
+    x = array_ops.zeros((2, 3, 4))
+    value = du.prefer_static_value(x)
+    self.assertIsInstance(value, np.ndarray)
+    self.assertAllEqual(np.zeros((2, 3, 4)), value)
+
+  def testEmptyConstantTensor(self):
+    x = constant_op.constant([])
+    value = du.prefer_static_value(x)
+    self.assertIsInstance(value, np.ndarray)
+    self.assertAllEqual(np.array([]), value)
+
+  def testScalarTensor(self):
+    x = constant_op.constant(1.)
+    value = du.prefer_static_value(x)
+    self.assertIsInstance(value, np.ndarray)
+    self.assertAllEqual(np.array(1.), value)
+
+  def testDynamicValueEndsUpBeingNonEmpty(self):
+    x = array_ops.placeholder(np.float64, shape=None)
+    value = du.prefer_static_value(x)
+    with self.test_session():
+      self.assertAllEqual(np.zeros((2, 3)),
+                          value.eval(feed_dict={x: np.zeros((2, 3))}))
+
+  def testDynamicValueEndsUpBeingEmpty(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    value = du.prefer_static_value(x)
+    with self.test_session():
+      self.assertAllEqual(np.array([]), value.eval(feed_dict={x: []}))
+
+  def testDynamicValueEndsUpBeingScalar(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    value = du.prefer_static_value(x)
+    with self.test_session():
+      self.assertAllEqual(np.array(1), value.eval(feed_dict={x: 1}))
+
+
 class FillTriangularTest(test.TestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py
index 28c74bf981..baca477eb7 100644
--- a/tensorflow/python/ops/distributions/util.py
+++ b/tensorflow/python/ops/distributions/util.py
@@ -25,6 +25,7 @@ import numpy as np
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
@@ -699,6 +700,88 @@ def pick_vector(cond,
         [array_ops.where(cond, 0, n)], [array_ops.where(cond, n, -1)])
 
 
+def prefer_static_broadcast_shape(
+    shape1, shape2, name="prefer_static_broadcast_shape"):
+  """Convenience function which statically broadcasts shape when possible.
+
+  Args:
+    shape1:  `1-D` integer `Tensor`.  Already converted to tensor!
+    shape2:  `1-D` integer `Tensor`.  Already converted to tensor!
+    name:  A string name to prepend to created ops.
+
+  Returns:
+    The broadcast shape, either as `TensorShape` (if broadcast can be done
+      statically), or as a `Tensor`.
+  """
+  with ops.name_scope(name, values=[shape1, shape2]):
+    def make_shape_tensor(x):
+      return ops.convert_to_tensor(x, name="shape", dtype=dtypes.int32)
+
+    def get_tensor_shape(s):
+      if isinstance(s, tensor_shape.TensorShape):
+        return s
+      s_ = tensor_util.constant_value(make_shape_tensor(s))
+      if s_ is not None:
+        return tensor_shape.TensorShape(s_)
+      return None
+
+    def get_shape_tensor(s):
+      if not isinstance(s, tensor_shape.TensorShape):
+        return make_shape_tensor(s)
+      if s.is_fully_defined():
+        return make_shape_tensor(s.as_list())
+      raise ValueError("Cannot broadcast from partially "
+                       "defined `TensorShape`.")
+
+    shape1_ = get_tensor_shape(shape1)
+    shape2_ = get_tensor_shape(shape2)
+    if shape1_ is not None and shape2_ is not None:
+      return array_ops.broadcast_static_shape(shape1_, shape2_)
+
+    shape1_ = get_shape_tensor(shape1)
+    shape2_ = get_shape_tensor(shape2)
+    return array_ops.broadcast_dynamic_shape(shape1_, shape2_)
+
+
+def prefer_static_rank(x):
+  """Return static rank of tensor `x` if available, else `tf.rank(x)`.
+
+  Args:
+    x: `Tensor` (already converted).
+
+  Returns:
+    Numpy array (if static rank is obtainable), else `Tensor`.
+  """
+  return prefer_static_value(array_ops.rank(x))
+
+
+def prefer_static_shape(x):
+  """Return static shape of tensor `x` if available, else `tf.shape(x)`.
+
+  Args:
+    x: `Tensor` (already converted).
+
+  Returns:
+    Numpy array (if static shape is obtainable), else `Tensor`.
+  """
+  return prefer_static_value(array_ops.shape(x))
+
+
+def prefer_static_value(x):
+  """Return static value of tensor `x` if available, else `x`.
+
+  Args:
+    x: `Tensor` (already converted).
+
+  Returns:
+    Numpy array (if static value is obtainable), else `Tensor`.
+  """
+  static_x = tensor_util.constant_value(x)
+  if static_x is not None:
+    return static_x
+  return x
+
+
 def gen_new_seed(seed, salt):
   """Generate a new seed, from the given seed and salt."""
   if seed is None:
-- 
GitLab


From 2adbc217b3eeed329d077050e0f1f7d88edd86d7 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 11 Dec 2017 20:34:08 -0800
Subject: [PATCH 0902/1225] [XLA:CPU] Teach the CPU layout assignment about dot
 dimension numbers

There is no great need for this yet, but I noticed that the test cases were
broken (they were constructing dots with unset dimension numbers), and one thing
led to another.

PiperOrigin-RevId: 178713597
---
 .../service/cpu/cpu_layout_assignment_test.cc | 28 +++++++++----------
 .../xla/service/cpu/dot_op_emitter.cc         |  3 +-
 .../compiler/xla/service/hlo_instruction.cc   | 14 ++++++++++
 .../compiler/xla/service/hlo_instruction.h    |  6 ++++
 4 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
index 401cf50717..5d37a41571 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
@@ -61,8 +61,8 @@ TEST_F(CpuLayoutAssignmentTest, DotWithConstantRhsTensor) {
       HloInstruction::CreateParameter(0, lhs_shape, "param0"));
   auto dot_rhs = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateFromShape(rhs_shape)));
-  auto result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_shape, HloOpcode::kDot, dot_lhs, dot_rhs));
+  auto result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_shape, dot_lhs, dot_rhs));
 
   auto module = CreateNewModule();
   HloComputation* computation = module->AddEntryComputation(builder.Build());
@@ -98,10 +98,10 @@ TEST_F(CpuLayoutAssignmentTest, MultipleDotsWithSameConstantRhsTensor0) {
       HloInstruction::CreateParameter(1, lhs_shape, "param1"));
   auto dot_rhs = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateFromShape(rhs_shape)));
-  auto dot_a_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_shape, HloOpcode::kDot, dot_a_lhs, dot_rhs));
-  auto dot_b_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_shape, HloOpcode::kDot, dot_b_lhs, dot_rhs));
+  auto dot_a_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_shape, dot_a_lhs, dot_rhs));
+  auto dot_b_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_shape, dot_b_lhs, dot_rhs));
   builder.AddInstruction(HloInstruction::CreateBinary(
       result_shape, HloOpcode::kAdd, dot_a_result, dot_b_result));
 
@@ -142,10 +142,10 @@ TEST_F(CpuLayoutAssignmentTest, MultipleDotsWithSameConstantRhsTensor1) {
       HloInstruction::CreateParameter(1, lhs_b_shape, "param1"));
   auto dot_rhs = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateFromShape(rhs_shape)));
-  auto dot_a_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_a_shape, HloOpcode::kDot, dot_a_lhs, dot_rhs));
-  auto dot_b_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_b_shape, HloOpcode::kDot, dot_b_lhs, dot_rhs));
+  auto dot_a_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_a_shape, dot_a_lhs, dot_rhs));
+  auto dot_b_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_b_shape, dot_b_lhs, dot_rhs));
   auto tuple_result = builder.AddInstruction(
       HloInstruction::CreateTuple({dot_a_result, dot_b_result}));
 
@@ -180,8 +180,8 @@ TEST_F(CpuLayoutAssignmentTest, DotWithConstantLhsTensor) {
       HloInstruction::CreateConstant(Literal::CreateFromShape(lhs_shape)));
   auto dot_rhs = builder.AddInstruction(
       HloInstruction::CreateParameter(0, rhs_shape, "param0"));
-  auto dot_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_shape, HloOpcode::kDot, dot_lhs, dot_rhs));
+  auto dot_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_shape, dot_lhs, dot_rhs));
 
   auto module = CreateNewModule();
   HloComputation* computation = module->AddEntryComputation(builder.Build());
@@ -220,8 +220,8 @@ TEST_F(CpuLayoutAssignmentTest, DotWithConstantRhsTensorThroughGTE) {
       HloInstruction::CreateParameter(0, lhs_shape, "param0"));
   auto dot_rhs = builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(rhs_shape, constant, 1));
-  auto dot_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_shape, HloOpcode::kDot, dot_lhs, dot_rhs));
+  auto dot_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_shape, dot_lhs, dot_rhs));
 
   auto module = CreateNewModule();
   HloComputation* computation = module->AddEntryComputation(builder.Build());
diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index 7f0bf2c8e4..296e018c6f 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -1048,7 +1048,8 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) {
 // column major.
 bool ProfitableToMakeDotRhsColumnMajor(const HloInstruction& hlo) {
   return hlo.opcode() == HloOpcode::kDot &&
-         hlo.shape().dimensions_size() == 2 && hlo.shape().dimensions(0) == 1;
+         hlo.shape().dimensions_size() == 2 && hlo.shape().dimensions(0) == 1 &&
+         hlo.dot_dimension_numbers().rhs_contracting_dimensions(0) == 0;
 }
 
 bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot) {
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 7849301957..10ac665083 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -347,6 +347,20 @@ HloInstruction::CreateGetTupleElement(const Shape& shape,
   return instruction;
 }
 
+/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateCanonicalDot(
+    const Shape& shape, HloInstruction* lhs, HloInstruction* rhs) {
+  CHECK_EQ(ShapeUtil::Rank(lhs->shape()), 2);
+  CHECK_EQ(ShapeUtil::Rank(rhs->shape()), 2);
+
+  auto instruction = WrapUnique(new HloInstruction(HloOpcode::kDot, shape));
+  instruction->AppendOperand(lhs);
+  instruction->AppendOperand(rhs);
+  instruction->dot_dimension_numbers_ = MakeUnique<DotDimensionNumbers>();
+  instruction->dot_dimension_numbers_->add_lhs_contracting_dimensions(1);
+  instruction->dot_dimension_numbers_->add_rhs_contracting_dimensions(0);
+  return instruction;
+}
+
 /* static */ std::unique_ptr<HloInstruction>
 HloInstruction::CreateReducePrecision(const Shape& shape,
                                       HloInstruction* operand,
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 03cf9aaf90..092105582e 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -166,6 +166,12 @@ class HloInstruction {
       const Shape& shape, HloInstruction* lhs, HloInstruction* rhs,
       const DotDimensionNumbers& dimension_numbers);
 
+  // Creates a dot op with operands 'lhs' and 'rhs' that contracts dimension 1
+  // of the LHS with dimension 0 of the RHS with no batch dimensions.  Both LHS
+  // and the RHS must be of rank 2.
+  static std::unique_ptr<HloInstruction> CreateCanonicalDot(
+      const Shape& shape, HloInstruction* lhs, HloInstruction* rhs);
+
   // Creates a reduce-precision op, where operand is the data to reduce in
   // precision, and exponent_bits and mantissa_bits describe the precision to
   // reduce it to.
-- 
GitLab


From 5ff2b9d1c76fa64969238d798b5df94980341988 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 21:04:20 -0800
Subject: [PATCH 0903/1225] Update ops-related pbtxt files.

PiperOrigin-RevId: 178715353
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 48043 ++++++++++------
 tensorflow/core/ops/ops.pbtxt                 |  1239 +-
 2 files changed, 31647 insertions(+), 17635 deletions(-)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index c7a296d938..713f6842d9 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -39,6 +39,79 @@ op {
     }
   }
 }
+op {
+  name: "Abs"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "AccumulateNV2"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "sum"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  is_aggregate: true
+  is_commutative: true
+}
 op {
   name: "AccumulateNV2"
   input_arg {
@@ -77,6 +150,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -165,6 +239,47 @@ op {
     }
   }
 }
+op {
+  name: "AccumulatorApplyGradient"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "local_step"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "gradient"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
 op {
   name: "AccumulatorNumAccumulated"
   input_arg {
@@ -267,6 +382,47 @@ op {
     }
   }
 }
+op {
+  name: "AccumulatorTakeGradient"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "num_required"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "average"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
 op {
   name: "Acos"
   input_arg {
@@ -293,6 +449,33 @@ op {
     }
   }
 }
+op {
+  name: "Acos"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "Acosh"
   input_arg {
@@ -317,6 +500,65 @@ op {
     }
   }
 }
+op {
+  name: "Acosh"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Add"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_STRING
+      }
+    }
+  }
+}
 op {
   name: "Add"
   input_arg {
@@ -337,6 +579,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -519,6 +762,52 @@ op {
   is_aggregate: true
   is_commutative: true
 }
+op {
+  name: "AddN"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "sum"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+        type: DT_VARIANT
+      }
+    }
+  }
+  is_aggregate: true
+  is_commutative: true
+}
 op {
   name: "AddSparseToTensorsMap"
   input_arg {
@@ -592,6 +881,42 @@ op {
   is_aggregate: true
   is_commutative: true
 }
+op {
+  name: "AddV2"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_aggregate: true
+  is_commutative: true
+}
 op {
   name: "AdjustContrast"
   input_arg {
@@ -1022,6 +1347,77 @@ op {
     }
   }
 }
+op {
+  name: "ApplyAdadelta"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "ApplyAdagrad"
   input_arg {
@@ -1134,6 +1530,64 @@ op {
     }
   }
 }
+op {
+  name: "ApplyAdagrad"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "ApplyAdagradDA"
   input_arg {
@@ -1281,28 +1735,24 @@ op {
   }
 }
 op {
-  name: "ApplyAdam"
+  name: "ApplyAdagradDA"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "m"
+    name: "gradient_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "v"
+    name: "gradient_squared_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "beta1_power"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "beta2_power"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
@@ -1310,20 +1760,16 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "beta1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "beta2"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "global_step"
+    type: DT_INT64
   }
   output_arg {
     name: "out"
@@ -1349,6 +1795,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1439,13 +1888,6 @@ op {
       b: false
     }
   }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
   name: "ApplyAdam"
@@ -1516,8 +1958,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -1537,7 +1977,7 @@ op {
   }
 }
 op {
-  name: "ApplyAddSign"
+  name: "ApplyAdam"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -1548,20 +1988,33 @@ op {
     type_attr: "T"
     is_ref: true
   }
+  input_arg {
+    name: "v"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
+    type_attr: "T"
+  }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "alpha"
+    name: "beta1"
     type_attr: "T"
   }
   input_arg {
-    name: "sign_decay"
+    name: "beta2"
     type_attr: "T"
   }
   input_arg {
-    name: "beta"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
@@ -1604,39 +2057,49 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ApplyCenteredRMSProp"
+  name: "ApplyAdam"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mg"
+    name: "m"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "ms"
+    name: "v"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mom"
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "beta1"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "beta2"
     type_attr: "T"
   }
   input_arg {
@@ -1671,6 +2134,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1681,26 +2147,23 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ApplyCenteredRMSProp"
+  name: "ApplyAddSign"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mg"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "ms"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "mom"
+    name: "m"
     type_attr: "T"
     is_ref: true
   }
@@ -1709,15 +2172,15 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "sign_decay"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "beta"
     type_attr: "T"
   }
   input_arg {
@@ -1762,40 +2225,35 @@ op {
   }
 }
 op {
-  name: "ApplyFtrl"
+  name: "ApplyAddSign"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "linear"
+    name: "m"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "lr"
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "sign_decay"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "beta"
     type_attr: "T"
   }
   input_arg {
-    name: "lr_power"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -1822,6 +2280,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1834,40 +2295,45 @@ op {
   }
 }
 op {
-  name: "ApplyFtrl"
+  name: "ApplyCenteredRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "mg"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "linear"
+    name: "ms"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "mom"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "rho"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "momentum"
     type_attr: "T"
   }
   input_arg {
-    name: "lr_power"
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -1894,8 +2360,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -1908,44 +2372,45 @@ op {
   }
 }
 op {
-  name: "ApplyFtrlV2"
+  name: "ApplyCenteredRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "mg"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "linear"
+    name: "ms"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "mom"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "rho"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "momentum"
     type_attr: "T"
   }
   input_arg {
-    name: "l2_shrinkage"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
-    name: "lr_power"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -1972,6 +2437,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -1984,44 +2451,45 @@ op {
   }
 }
 op {
-  name: "ApplyFtrlV2"
+  name: "ApplyCenteredRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "mg"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "linear"
+    name: "ms"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "mom"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "rho"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "momentum"
     type_attr: "T"
   }
   input_arg {
-    name: "l2_shrinkage"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
-    name: "lr_power"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -2050,6 +2518,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2062,18 +2531,40 @@ op {
   }
 }
 op {
-  name: "ApplyGradientDescent"
+  name: "ApplyFtrl"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "alpha"
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "delta"
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
@@ -2112,18 +2603,40 @@ op {
   }
 }
 op {
-  name: "ApplyGradientDescent"
+  name: "ApplyFtrl"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "alpha"
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "delta"
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
@@ -2164,7 +2677,7 @@ op {
   }
 }
 op {
-  name: "ApplyMomentum"
+  name: "ApplyFtrl"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -2176,15 +2689,28 @@ op {
     is_ref: true
   }
   input_arg {
-    name: "lr"
+    name: "linear"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
@@ -2211,6 +2737,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2221,8 +2750,77 @@ op {
       b: false
     }
   }
+}
+op {
+  name: "ApplyFtrlV2"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
   attr {
-    name: "use_nesterov"
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
@@ -2230,7 +2828,7 @@ op {
   }
 }
 op {
-  name: "ApplyMomentum"
+  name: "ApplyFtrlV2"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -2242,15 +2840,32 @@ op {
     is_ref: true
   }
   input_arg {
-    name: "lr"
+    name: "linear"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
@@ -2289,44 +2904,46 @@ op {
       b: false
     }
   }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ApplyPowerSign"
+  name: "ApplyFtrlV2"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "m"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
     type_attr: "T"
     is_ref: true
   }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "logbase"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "sign_decay"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "beta"
+    name: "l2_shrinkage"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
@@ -2355,6 +2972,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2367,31 +2985,18 @@ op {
   }
 }
 op {
-  name: "ApplyProximalAdagrad"
+  name: "ApplyGradientDescent"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "delta"
     type_attr: "T"
   }
   output_arg {
@@ -2430,31 +3035,18 @@ op {
   }
 }
 op {
-  name: "ApplyProximalAdagrad"
+  name: "ApplyGradientDescent"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "delta"
     type_attr: "T"
   }
   output_arg {
@@ -2495,7 +3087,7 @@ op {
   }
 }
 op {
-  name: "ApplyProximalGradientDescent"
+  name: "ApplyGradientDescent"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -2505,14 +3097,6 @@ op {
     name: "alpha"
     type_attr: "T"
   }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
-    type_attr: "T"
-  }
   input_arg {
     name: "delta"
     type_attr: "T"
@@ -2541,6 +3125,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2553,26 +3140,27 @@ op {
   }
 }
 op {
-  name: "ApplyProximalGradientDescent"
+  name: "ApplyMomentum"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "alpha"
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l1"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "delta"
+    name: "momentum"
     type_attr: "T"
   }
   output_arg {
@@ -2599,8 +3187,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -2611,21 +3197,23 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ApplyRMSProp"
+  name: "ApplyMomentum"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "ms"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "mom"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
@@ -2634,21 +3222,13 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
     name: "momentum"
     type_attr: "T"
   }
-  input_arg {
-    name: "epsilon"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
   output_arg {
     name: "out"
     type_attr: "T"
@@ -2673,6 +3253,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -2683,21 +3265,23 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ApplyRMSProp"
+  name: "ApplyMomentum"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "ms"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "mom"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
@@ -2706,21 +3290,13 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
     name: "momentum"
     type_attr: "T"
   }
-  input_arg {
-    name: "epsilon"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
   output_arg {
     name: "out"
     type_attr: "T"
@@ -2747,6 +3323,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2757,65 +3334,50 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ApproximateEqual"
+  name: "ApplyPowerSign"
   input_arg {
-    name: "x"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "y"
+    name: "m"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "z"
-    type: DT_BOOL
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
   }
-  attr {
-    name: "tolerance"
-    type: "float"
-    default_value {
-      f: 1e-05
-    }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
   }
-  is_commutative: true
-}
-op {
-  name: "ApproximateEqual"
   input_arg {
-    name: "x"
+    name: "beta"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -2842,27 +3404,49 @@ op {
     }
   }
   attr {
-    name: "tolerance"
-    type: "float"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      f: 1e-05
+      b: false
     }
   }
-  is_commutative: true
 }
 op {
-  name: "ArgMax"
+  name: "ApplyPowerSign"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "m"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_INT64
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -2883,36 +3467,52 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ArgMax"
+  name: "ApplyProximalAdagrad"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "output_type"
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -2937,45 +3537,45 @@ op {
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "output_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ArgMax"
+  name: "ApplyProximalAdagrad"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "output_type"
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -3002,45 +3602,45 @@ op {
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "output_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ArgMin"
+  name: "ApplyProximalAdagrad"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_INT64
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -3061,36 +3661,47 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ArgMin"
+  name: "ApplyProximalGradientDescent"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "output_type"
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -3115,45 +3726,40 @@ op {
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "output_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ArgMin"
+  name: "ApplyProximalGradientDescent"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "output_type"
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -3180,217 +3786,113 @@ op {
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "output_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "AsString"
+  name: "ApplyProximalGradientDescent"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
-    type: DT_STRING
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_BOOL
-        type: DT_INT8
-      }
-    }
-  }
-  attr {
-    name: "precision"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "scientific"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "shortest"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "width"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "fill"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-}
-op {
-  name: "Asin"
   input_arg {
-    name: "x"
+    name: "alpha"
     type_attr: "T"
   }
-  output_arg {
-    name: "y"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-}
-op {
-  name: "Asinh"
   input_arg {
-    name: "x"
+    name: "delta"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-}
-op {
-  name: "Assert"
-  input_arg {
-    name: "condition"
-    type: DT_BOOL
-  }
-  input_arg {
-    name: "data"
-    type_list_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
   attr {
-    name: "summarize"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: 3
+      b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Assign"
+  name: "ApplyRMSProp"
   input_arg {
-    name: "ref"
+    name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "value"
+    name: "ms"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output_ref"
+  input_arg {
+    name: "mom"
     type_attr: "T"
     is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "validate_shape"
-    type: "bool"
-    default_value {
-      b: true
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: true
-    }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-  allows_uninitialized_input: true
-}
-op {
-  name: "AssignAdd"
   input_arg {
-    name: "ref"
+    name: "epsilon"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "value"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "out"
     type_attr: "T"
     is_ref: true
   }
@@ -3425,18 +3927,44 @@ op {
   }
 }
 op {
-  name: "AssignAdd"
+  name: "ApplyRMSProp"
   input_arg {
-    name: "ref"
+    name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "value"
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "mom"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "out"
     type_attr: "T"
     is_ref: true
   }
@@ -3473,34 +4001,44 @@ op {
   }
 }
 op {
-  name: "AssignAddVariableOp"
+  name: "ApplyRMSProp"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "value"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "dtype"
-    type: "type"
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
   }
-  is_stateful: true
-}
-op {
-  name: "AssignSub"
   input_arg {
-    name: "ref"
+    name: "mom"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "value"
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "out"
     type_attr: "T"
     is_ref: true
   }
@@ -3523,6 +4061,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -3535,20 +4076,18 @@ op {
   }
 }
 op {
-  name: "AssignSub"
+  name: "ApproximateEqual"
   input_arg {
-    name: "ref"
+    name: "x"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "value"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
@@ -3569,90 +4108,78 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
+    name: "tolerance"
+    type: "float"
     default_value {
-      b: false
+      f: 1e-05
     }
   }
+  is_commutative: true
 }
 op {
-  name: "AssignSubVariableOp"
+  name: "ApproximateEqual"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "value"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "dtype"
-    type: "type"
-  }
-  is_stateful: true
-}
-op {
-  name: "AssignVariableOp"
-  input_arg {
-    name: "resource"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "value"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "dtype"
-    type: "type"
-  }
-  is_stateful: true
-}
-op {
-  name: "Atan"
-  input_arg {
-    name: "x"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "tolerance"
+    type: "float"
+    default_value {
+      f: 1e-05
+    }
+  }
+  is_commutative: true
 }
 op {
-  name: "Atan2"
+  name: "ApproximateEqual"
   input_arg {
-    name: "y"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "x"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
     name: "z"
-    type_attr: "T"
+    type: DT_BOOL
   }
   attr {
     name: "T"
@@ -3661,666 +4188,668 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
+  attr {
+    name: "tolerance"
+    type: "float"
+    default_value {
+      f: 1e-05
+    }
+  }
+  is_commutative: true
 }
 op {
-  name: "Atanh"
+  name: "ArgMax"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "dimension"
+    type_attr: "Tidx"
+  }
   output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "output"
+    type: DT_INT64
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
-}
-op {
-  name: "AudioSpectrogram"
-  input_arg {
-    name: "input"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "spectrogram"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "window_size"
-    type: "int"
-  }
-  attr {
-    name: "stride"
-    type: "int"
-  }
   attr {
-    name: "magnitude_squared"
-    type: "bool"
+    name: "Tidx"
+    type: "type"
     default_value {
-      b: false
+      type: DT_INT32
     }
-  }
-}
-op {
-  name: "AudioSummary"
-  input_arg {
-    name: "tag"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "summary"
-    type: DT_STRING
-  }
-  attr {
-    name: "sample_rate"
-    type: "float"
-  }
-  attr {
-    name: "max_outputs"
-    type: "int"
-    default_value {
-      i: 3
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
-    has_minimum: true
-    minimum: 1
-  }
-  deprecation {
-    version: 15
   }
 }
 op {
-  name: "AudioSummaryV2"
-  input_arg {
-    name: "tag"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor"
-    type: DT_FLOAT
-  }
+  name: "ArgMax"
   input_arg {
-    name: "sample_rate"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "summary"
-    type: DT_STRING
-  }
-  attr {
-    name: "max_outputs"
-    type: "int"
-    default_value {
-      i: 3
-    }
-    has_minimum: true
-    minimum: 1
+    name: "input"
+    type_attr: "T"
   }
-}
-op {
-  name: "AvgPool"
   input_arg {
-    name: "value"
-    type_attr: "T"
+    name: "dimension"
+    type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    type_attr: "output_type"
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "Tidx"
+    type: "type"
     default_value {
-      s: "NHWC"
+      type: DT_INT32
     }
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_HALF
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "AvgPool"
+  name: "ArgMax"
   input_arg {
-    name: "value"
+    name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "dimension"
+    type_attr: "Tidx"
+  }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    type_attr: "output_type"
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "Tidx"
+    type: "type"
     default_value {
-      s: "NHWC"
+      type: DT_INT32
     }
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_HALF
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "AvgPool"
+  name: "ArgMax"
   input_arg {
-    name: "value"
+    name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "dimension"
+    type_attr: "Tidx"
+  }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    type_attr: "output_type"
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "Tidx"
+    type: "type"
     default_value {
-      s: "NHWC"
+      type: DT_INT32
     }
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "AvgPool3D"
+  name: "ArgMin"
   input_arg {
     name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "dimension"
+    type_attr: "Tidx"
+  }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    type: DT_INT64
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tidx"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "AvgPool3D"
+  name: "ArgMin"
   input_arg {
     name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "dimension"
+    type_attr: "Tidx"
+  }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    type_attr: "output_type"
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "Tidx"
+    type: "type"
     default_value {
-      s: "NDHWC"
+      type: DT_INT32
     }
     allowed_values {
       list {
-        s: "NDHWC"
-        s: "NCDHW"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "AvgPool3DGrad"
+  name: "ArgMin"
   input_arg {
-    name: "orig_input_shape"
-    type: DT_INT32
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "dimension"
+    type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    type_attr: "output_type"
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "AvgPool3DGrad"
+  name: "ArgMin"
   input_arg {
-    name: "orig_input_shape"
-    type: DT_INT32
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "dimension"
+    type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    type_attr: "output_type"
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "Tidx"
+    type: "type"
     default_value {
-      s: "NDHWC"
+      type: DT_INT32
     }
     allowed_values {
       list {
-        s: "NDHWC"
-        s: "NCDHW"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "AvgPoolGrad"
-  input_arg {
-    name: "orig_input_shape"
-    type: DT_INT32
-  }
+  name: "AsString"
   input_arg {
-    name: "grad"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type: DT_STRING
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_BOOL
+        type: DT_INT8
+      }
+    }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "precision"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "scientific"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "shortest"
+    type: "bool"
     default_value {
-      s: "NHWC"
+      b: false
     }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+  }
+  attr {
+    name: "width"
+    type: "int"
+    default_value {
+      i: -1
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_HALF
-        type: DT_DOUBLE
-      }
+    name: "fill"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
 }
 op {
-  name: "AvgPoolGrad"
-  input_arg {
-    name: "orig_input_shape"
-    type: DT_INT32
-  }
+  name: "Asin"
   input_arg {
-    name: "grad"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+}
+op {
+  name: "Asin"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_HALF
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "AvgPoolGrad"
-  input_arg {
-    name: "orig_input_shape"
-    type: DT_INT32
-  }
+  name: "Asinh"
   input_arg {
-    name: "grad"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+}
+op {
+  name: "Asinh"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -4328,208 +4857,137 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Barrier"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+  name: "Assert"
+  input_arg {
+    name: "condition"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "data"
+    type_list_attr: "T"
   }
   attr {
-    name: "component_types"
+    name: "T"
     type: "list(type)"
     has_minimum: true
     minimum: 1
   }
   attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "capacity"
+    name: "summarize"
     type: "int"
     default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+      i: 3
     }
   }
   is_stateful: true
 }
 op {
-  name: "BarrierClose"
+  name: "Assign"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "ref"
+    type_attr: "T"
     is_ref: true
   }
-  attr {
-    name: "cancel_pending_enqueues"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-}
-op {
-  name: "BarrierIncompleteSize"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "value"
+    type_attr: "T"
   }
   output_arg {
-    name: "size"
-    type: DT_INT32
-  }
-}
-op {
-  name: "BarrierInsertMany"
-  input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  input_arg {
-    name: "keys"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "values"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
   }
   attr {
-    name: "component_index"
-    type: "int"
-  }
-}
-op {
-  name: "BarrierReadySize"
-  input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "size"
-    type: DT_INT32
-  }
-}
-op {
-  name: "BarrierTakeMany"
-  input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  input_arg {
-    name: "num_elements"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "keys"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "values"
-    type_list_attr: "component_types"
-  }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "allow_small_batch"
+    name: "validate_shape"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
   attr {
-    name: "wait_for_incomplete"
+    name: "use_locking"
     type: "bool"
     default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
+      b: true
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "BatchCholesky"
+  name: "AssignAdd"
   input_arg {
-    name: "input"
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
-  deprecation {
-    version: 13
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "BatchCholeskyGrad"
+  name: "AssignAdd"
   input_arg {
-    name: "l"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -4538,189 +4996,136 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  deprecation {
-    version: 13
-  }
-}
-op {
-  name: "BatchDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "batch_size"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "BatchDataset"
+  name: "AssignAdd"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "batch_size"
-    type: DT_INT64
+    name: "value"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output_ref"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "BatchFFT"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
-  }
-}
-op {
-  name: "BatchFFT2D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
-  }
-}
-op {
-  name: "BatchFFT3D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
-  }
-}
-op {
-  name: "BatchIFFT"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "BatchIFFT2D"
+  name: "AssignAddVariableOp"
   input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
+    name: "resource"
+    type: DT_RESOURCE
   }
-}
-op {
-  name: "BatchIFFT3D"
   input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+    name: "value"
+    type_attr: "dtype"
   }
-  deprecation {
-    version: 15
+  attr {
+    name: "dtype"
+    type: "type"
   }
+  is_stateful: true
 }
 op {
-  name: "BatchMatMul"
+  name: "AssignSub"
   input_arg {
-    name: "x"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "y"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "adj_x"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "adj_y"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
@@ -4728,40 +5133,20 @@ op {
   }
 }
 op {
-  name: "BatchMatrixBandPart"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "num_lower"
-    type: DT_INT64
-  }
+  name: "AssignSub"
   input_arg {
-    name: "num_upper"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "band"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  deprecation {
-    version: 14
-  }
-}
-op {
-  name: "BatchMatrixDeterminant"
   input_arg {
-    name: "input"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -4770,22 +5155,46 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  deprecation {
-    version: 13
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "BatchMatrixDeterminant"
+  name: "AssignSub"
   input_arg {
-    name: "input"
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -4794,155 +5203,154 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  deprecation {
-    version: 13
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "BatchMatrixDiag"
+  name: "AssignSubVariableOp"
   input_arg {
-    name: "diagonal"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  input_arg {
+    name: "value"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
   }
-  deprecation {
-    version: 14
-  }
+  is_stateful: true
 }
 op {
-  name: "BatchMatrixDiagPart"
+  name: "AssignVariableOp"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "diagonal"
-    type_attr: "T"
+  input_arg {
+    name: "value"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
   }
-  deprecation {
-    version: 14
-  }
+  is_stateful: true
 }
 op {
-  name: "BatchMatrixInverse"
+  name: "Atan"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
-  attr {
-    name: "adjoint"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  deprecation {
-    version: 13
-  }
 }
 op {
-  name: "BatchMatrixSetDiag"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
+  name: "Atan"
   input_arg {
-    name: "diagonal"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  deprecation {
-    version: 14
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
   }
 }
 op {
-  name: "BatchMatrixSolve"
+  name: "Atan2"
   input_arg {
-    name: "matrix"
+    name: "y"
     type_attr: "T"
   }
   input_arg {
-    name: "rhs"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
-  attr {
-    name: "adjoint"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  deprecation {
-    version: 13
-  }
 }
 op {
-  name: "BatchMatrixSolveLs"
+  name: "Atan2"
   input_arg {
-    name: "matrix"
+    name: "y"
     type_attr: "T"
   }
   input_arg {
-    name: "rhs"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "l2_regularizer"
-    type: DT_DOUBLE
-  }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -4950,88 +5358,45 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_BFLOAT16
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "fast"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  deprecation {
-    version: 13
-  }
 }
 op {
-  name: "BatchMatrixTriangularSolve"
-  input_arg {
-    name: "matrix"
-    type_attr: "T"
-  }
+  name: "Atanh"
   input_arg {
-    name: "rhs"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
-  attr {
-    name: "lower"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "adjoint"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  deprecation {
-    version: 13
-  }
 }
 op {
-  name: "BatchNormWithGlobalNormalization"
-  input_arg {
-    name: "t"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "m"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "v"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "beta"
-    type_attr: "T"
-  }
+  name: "Atanh"
   input_arg {
-    name: "gamma"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "result"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -5039,138 +5404,145 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
       }
     }
   }
+}
+op {
+  name: "AudioSpectrogram"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "spectrogram"
+    type: DT_FLOAT
+  }
   attr {
-    name: "variance_epsilon"
-    type: "float"
+    name: "window_size"
+    type: "int"
   }
   attr {
-    name: "scale_after_normalization"
-    type: "bool"
+    name: "stride"
+    type: "int"
   }
-  deprecation {
-    version: 9
+  attr {
+    name: "magnitude_squared"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "BatchNormWithGlobalNormalization"
+  name: "AudioSummary"
   input_arg {
-    name: "t"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "m"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "v"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "beta"
-    type_attr: "T"
+    name: "tag"
+    type: DT_STRING
   }
   input_arg {
-    name: "gamma"
-    type_attr: "T"
+    name: "tensor"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "result"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+    name: "summary"
+    type: DT_STRING
   }
   attr {
-    name: "variance_epsilon"
+    name: "sample_rate"
     type: "float"
   }
   attr {
-    name: "scale_after_normalization"
-    type: "bool"
+    name: "max_outputs"
+    type: "int"
+    default_value {
+      i: 3
+    }
+    has_minimum: true
+    minimum: 1
   }
   deprecation {
-    version: 9
+    version: 15
   }
 }
 op {
-  name: "BatchNormWithGlobalNormalizationGrad"
+  name: "AudioSummaryV2"
   input_arg {
-    name: "t"
-    type_attr: "T"
+    name: "tag"
+    type: DT_STRING
   }
   input_arg {
-    name: "m"
-    type_attr: "T"
+    name: "tensor"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "v"
-    type_attr: "T"
+    name: "sample_rate"
+    type: DT_FLOAT
   }
-  input_arg {
-    name: "gamma"
-    type_attr: "T"
+  output_arg {
+    name: "summary"
+    type: DT_STRING
+  }
+  attr {
+    name: "max_outputs"
+    type: "int"
+    default_value {
+      i: 3
+    }
+    has_minimum: true
+    minimum: 1
   }
+}
+op {
+  name: "AvgPool"
   input_arg {
-    name: "backprop"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "dx"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "dm"
-    type_attr: "T"
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "dv"
-    type_attr: "T"
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "db"
-    type_attr: "T"
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
-  output_arg {
-    name: "dg"
-    type_attr: "T"
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
     name: "T"
@@ -5178,75 +5550,56 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
         type: DT_HALF
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "variance_epsilon"
-    type: "float"
-  }
-  attr {
-    name: "scale_after_normalization"
-    type: "bool"
-  }
-  deprecation {
-    version: 9
-  }
 }
 op {
-  name: "BatchNormWithGlobalNormalizationGrad"
-  input_arg {
-    name: "t"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "m"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "v"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "gamma"
-    type_attr: "T"
-  }
+  name: "AvgPool"
   input_arg {
-    name: "backprop"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "dx"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "dm"
-    type_attr: "T"
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "dv"
-    type_attr: "T"
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "db"
-    type_attr: "T"
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
-  output_arg {
-    name: "dg"
-    type_attr: "T"
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
     name: "T"
@@ -5255,39 +5608,15 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "variance_epsilon"
-    type: "float"
-  }
-  attr {
-    name: "scale_after_normalization"
-    type: "bool"
-  }
-  deprecation {
-    version: 9
-  }
 }
 op {
-  name: "BatchSelfAdjointEig"
+  name: "AvgPool"
   input_arg {
-    name: "input"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
@@ -5295,38 +5624,38 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-  deprecation {
-    version: 11
-  }
-}
-op {
-  name: "BatchSelfAdjointEigV2"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "e"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "v"
-    type_attr: "T"
-  }
   attr {
-    name: "compute_v"
-    type: "bool"
+    name: "data_format"
+    type: "string"
     default_value {
-      b: true
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
   attr {
@@ -5334,45 +5663,56 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  deprecation {
-    version: 13
-  }
 }
 op {
-  name: "BatchSvd"
+  name: "AvgPool"
   input_arg {
-    name: "input"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "s"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "u"
-    type_attr: "T"
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "v"
-    type_attr: "T"
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "compute_uv"
-    type: "bool"
-    default_value {
-      b: true
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
   attr {
-    name: "full_matrices"
-    type: "bool"
+    name: "data_format"
+    type: "string"
     default_value {
-      b: false
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
   attr {
@@ -5380,122 +5720,102 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_DOUBLE
       }
     }
   }
-  deprecation {
-    version: 13
-  }
 }
 op {
-  name: "BatchToSpace"
+  name: "AvgPool3D"
   input_arg {
     name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "crops"
-    type_attr: "Tidx"
-  }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "block_size"
-    type: "int"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
-    minimum: 2
+    minimum: 5
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
+  }
+  attr {
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "BatchToSpaceND"
+  name: "AvgPool3D"
   input_arg {
     name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "block_shape"
-    type_attr: "Tblock_shape"
-  }
-  input_arg {
-    name: "crops"
-    type_attr: "Tcrops"
-  }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "Tblock_shape"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
   attr {
-    name: "Tcrops"
-    type: "type"
+    name: "data_format"
+    type: "string"
     default_value {
-      type: DT_INT32
+      s: "NDHWC"
     }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "NDHWC"
+        s: "NCDHW"
       }
     }
   }
-}
-op {
-  name: "Betainc"
-  input_arg {
-    name: "a"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "b"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
-  }
   attr {
     name: "T"
     type: "type"
@@ -5508,13 +5828,9 @@ op {
   }
 }
 op {
-  name: "BiasAdd"
-  input_arg {
-    name: "value"
-    type_attr: "T"
-  }
+  name: "AvgPool3D"
   input_arg {
-    name: "bias"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
@@ -5522,24 +5838,24 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
@@ -5547,24 +5863,35 @@ op {
     name: "data_format"
     type: "string"
     default_value {
-      s: "NHWC"
+      s: "NDHWC"
     }
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "BiasAdd"
+  name: "AvgPool3DGrad"
   input_arg {
-    name: "value"
-    type_attr: "T"
+    name: "orig_input_shape"
+    type: DT_INT32
   }
   input_arg {
-    name: "bias"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -5572,47 +5899,46 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "BiasAddGrad"
+  name: "AvgPool3DGrad"
   input_arg {
-    name: "out_backprop"
+    name: "orig_input_shape"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -5620,24 +5946,24 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
@@ -5645,20 +5971,34 @@ op {
     name: "data_format"
     type: "string"
     default_value {
-      s: "NHWC"
+      s: "NDHWC"
     }
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "BiasAddGrad"
+  name: "AvgPool3DGrad"
   input_arg {
-    name: "out_backprop"
+    name: "orig_input_shape"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -5666,26 +6006,24 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
@@ -5693,718 +6031,492 @@ op {
     name: "data_format"
     type: "string"
     default_value {
-      s: "NHWC"
+      s: "NDHWC"
     }
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        s: "NDHWC"
+        s: "NCDHW"
       }
     }
   }
-}
-op {
-  name: "BiasAddV1"
-  input_arg {
-    name: "value"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "bias"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "BiasAddV1"
+  name: "AvgPoolGrad"
   input_arg {
-    name: "value"
-    type_attr: "T"
+    name: "orig_input_shape"
+    type: DT_INT32
   }
   input_arg {
-    name: "bias"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "Bincount"
-  input_arg {
-    name: "arr"
-    type: DT_INT32
-  }
+  name: "AvgPoolGrad"
   input_arg {
-    name: "size"
+    name: "orig_input_shape"
     type: DT_INT32
   }
   input_arg {
-    name: "weights"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "bins"
+    name: "output"
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-}
-op {
-  name: "Bitcast"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "type"
-  }
   attr {
-    name: "T"
-    type: "type"
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
   attr {
-    name: "type"
+    name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
         type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "Bitcast"
+  name: "AvgPoolGrad"
   input_arg {
-    name: "input"
+    name: "orig_input_shape"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "type"
+    type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-        type: DT_HALF
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
   attr {
-    name: "type"
-    type: "type"
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-        type: DT_HALF
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
-}
-op {
-  name: "BitwiseAnd"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  is_commutative: true
 }
 op {
-  name: "BitwiseAnd"
+  name: "AvgPoolGrad"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "orig_input_shape"
+    type: DT_INT32
   }
   input_arg {
-    name: "y"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-  is_commutative: true
-}
-op {
-  name: "BitwiseOr"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  is_commutative: true
 }
 op {
-  name: "BitwiseOr"
-  input_arg {
-    name: "x"
-    type_attr: "T"
+  name: "Barrier"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
+    has_minimum: true
   }
-  is_commutative: true
-}
-op {
-  name: "BitwiseXor"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "y"
-    type_attr: "T"
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
-  output_arg {
-    name: "z"
-    type_attr: "T"
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-      }
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
-  is_commutative: true
+  is_stateful: true
 }
 op {
-  name: "BitwiseXor"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "BarrierClose"
   input_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
-      }
+    name: "cancel_pending_enqueues"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
-  is_commutative: true
 }
 op {
-  name: "BroadcastArgs"
-  input_arg {
-    name: "s0"
-    type_attr: "T"
-  }
+  name: "BarrierIncompleteSize"
   input_arg {
-    name: "s1"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   output_arg {
-    name: "r0"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "size"
+    type: DT_INT32
   }
 }
 op {
-  name: "BroadcastGradientArgs"
+  name: "BarrierInsertMany"
   input_arg {
-    name: "s0"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   input_arg {
-    name: "s1"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "r0"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "r1"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "keys"
+    type: DT_STRING
   }
-}
-op {
-  name: "Bucketize"
   input_arg {
-    name: "input"
+    name: "values"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type: DT_INT32
-  }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
   }
   attr {
-    name: "boundaries"
-    type: "list(float)"
+    name: "component_index"
+    type: "int"
   }
 }
 op {
-  name: "BytesProducedStatsDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "BarrierReadySize"
   input_arg {
-    name: "tag"
+    name: "handle"
     type: DT_STRING
+    is_ref: true
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "size"
+    type: DT_INT32
   }
 }
 op {
-  name: "CTCBeamSearchDecoder"
+  name: "BarrierTakeMany"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   input_arg {
-    name: "sequence_length"
+    name: "num_elements"
     type: DT_INT32
   }
   output_arg {
-    name: "decoded_indices"
-    type: DT_INT64
-    number_attr: "top_paths"
-  }
-  output_arg {
-    name: "decoded_values"
+    name: "indices"
     type: DT_INT64
-    number_attr: "top_paths"
   }
   output_arg {
-    name: "decoded_shape"
-    type: DT_INT64
-    number_attr: "top_paths"
+    name: "keys"
+    type: DT_STRING
   }
   output_arg {
-    name: "log_probability"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "beam_width"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "values"
+    type_list_attr: "component_types"
   }
   attr {
-    name: "top_paths"
-    type: "int"
+    name: "component_types"
+    type: "list(type)"
     has_minimum: true
     minimum: 1
   }
   attr {
-    name: "merge_repeated"
+    name: "allow_small_batch"
     type: "bool"
     default_value {
-      b: true
+      b: false
     }
   }
-}
-op {
-  name: "CTCGreedyDecoder"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "sequence_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "decoded_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "decoded_values"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "decoded_shape"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "log_probability"
-    type: DT_FLOAT
-  }
   attr {
-    name: "merge_repeated"
+    name: "wait_for_incomplete"
     type: "bool"
     default_value {
       b: false
     }
   }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
 }
 op {
-  name: "CTCLoss"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "labels_indices"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "labels_values"
-    type: DT_INT32
-  }
+  name: "BatchCholesky"
   input_arg {
-    name: "sequence_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "loss"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "gradient"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "preprocess_collapse_repeated"
-    type: "bool"
-    default_value {
-      b: false
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
     }
   }
-  attr {
-    name: "ctc_merge_repeated"
-    type: "bool"
-    default_value {
-      b: true
-    }
+  deprecation {
+    version: 13
   }
 }
 op {
-  name: "CTCLoss"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "labels_indices"
-    type: DT_INT64
-  }
+  name: "BatchCholeskyGrad"
   input_arg {
-    name: "labels_values"
-    type: DT_INT32
+    name: "l"
+    type_attr: "T"
   }
   input_arg {
-    name: "sequence_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "loss"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   output_arg {
-    name: "gradient"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "preprocess_collapse_repeated"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "ctc_merge_repeated"
-    type: "bool"
-    default_value {
-      b: true
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
-  attr {
-    name: "ignore_longer_outputs_than_inputs"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  deprecation {
+    version: 13
   }
 }
 op {
-  name: "CacheDataset"
+  name: "BatchDataset"
   input_arg {
     name: "input_dataset"
     type: DT_VARIANT
   }
   input_arg {
-    name: "filename"
-    type: DT_STRING
+    name: "batch_size"
+    type: DT_INT64
   }
   output_arg {
     name: "handle"
@@ -6425,14 +6537,14 @@ op {
   is_stateful: true
 }
 op {
-  name: "CacheDataset"
+  name: "BatchDataset"
   input_arg {
     name: "input_dataset"
     type: DT_VARIANT
   }
   input_arg {
-    name: "filename"
-    type: DT_STRING
+    name: "batch_size"
+    type: DT_INT64
   }
   output_arg {
     name: "handle"
@@ -6452,97 +6564,97 @@ op {
   }
 }
 op {
-  name: "Cast"
+  name: "BatchFFT"
   input_arg {
-    name: "x"
-    type_attr: "SrcT"
+    name: "input"
+    type: DT_COMPLEX64
   }
   output_arg {
-    name: "y"
-    type_attr: "DstT"
-  }
-  attr {
-    name: "SrcT"
-    type: "type"
+    name: "output"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "DstT"
-    type: "type"
+  deprecation {
+    version: 15
   }
 }
 op {
-  name: "Ceil"
+  name: "BatchFFT2D"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "input"
+    type: DT_COMPLEX64
   }
   output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "output"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  deprecation {
+    version: 15
   }
 }
 op {
-  name: "CheckNumerics"
+  name: "BatchFFT3D"
   input_arg {
-    name: "tensor"
-    type_attr: "T"
+    name: "input"
+    type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  deprecation {
+    version: 15
   }
-  attr {
-    name: "message"
-    type: "string"
+}
+op {
+  name: "BatchIFFT"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
+  }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+  deprecation {
+    version: 15
   }
 }
 op {
-  name: "Cholesky"
+  name: "BatchIFFT2D"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
-      }
-    }
+  deprecation {
+    version: 15
   }
 }
 op {
-  name: "Cholesky"
+  name: "BatchIFFT3D"
   input_arg {
     name: "input"
+    type: DT_COMPLEX64
+  }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+  deprecation {
+    version: 15
+  }
+}
+op {
+  name: "BatchMatMul"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
     type_attr: "T"
   }
   output_arg {
@@ -6554,22 +6666,38 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
     }
   }
+  attr {
+    name: "adj_x"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "adj_y"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "CholeskyGrad"
+  name: "BatchMatMul"
   input_arg {
-    name: "l"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
@@ -6581,63 +6709,70 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
+  attr {
+    name: "adj_x"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "adj_y"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "CompareAndBitpack"
+  name: "BatchMatrixBandPart"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "threshold"
-    type_attr: "T"
+    name: "num_lower"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "num_upper"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type: DT_UINT8
+    name: "band"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_BOOL
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  }
+  deprecation {
+    version: 14
   }
 }
 op {
-  name: "Complex"
-  input_arg {
-    name: "real"
-    type_attr: "T"
-  }
+  name: "BatchMatrixDeterminant"
   input_arg {
-    name: "imag"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "out"
-    type_attr: "Tout"
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
@@ -6645,250 +6780,269 @@ op {
       }
     }
   }
-  attr {
-    name: "Tout"
-    type: "type"
-    default_value {
-      type: DT_COMPLEX64
-    }
-    allowed_values {
-      list {
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+  deprecation {
+    version: 13
   }
 }
 op {
-  name: "ComplexAbs"
+  name: "BatchMatrixDeterminant"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
-    type_attr: "Tout"
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_COMPLEX64
-    }
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
     }
   }
-  attr {
-    name: "Tout"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  deprecation {
+    version: 13
   }
 }
 op {
-  name: "ComputeAccidentalHits"
-  input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
+  name: "BatchMatrixDiag"
   input_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
+    name: "diagonal"
+    type_attr: "T"
   }
   output_arg {
-    name: "indices"
-    type: DT_INT32
+    name: "output"
+    type_attr: "T"
   }
-  output_arg {
-    name: "ids"
-    type: DT_INT64
+  attr {
+    name: "T"
+    type: "type"
   }
-  output_arg {
-    name: "weights"
-    type: DT_FLOAT
+  deprecation {
+    version: 14
   }
-  attr {
-    name: "num_true"
-    type: "int"
+}
+op {
+  name: "BatchMatrixDiagPart"
+  input_arg {
+    name: "input"
+    type_attr: "T"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  output_arg {
+    name: "diagonal"
+    type_attr: "T"
   }
   attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "T"
+    type: "type"
+  }
+  deprecation {
+    version: 14
   }
 }
 op {
-  name: "Concat"
-  input_arg {
-    name: "concat_dim"
-    type: DT_INT32
-  }
+  name: "BatchMatrixInverse"
   input_arg {
-    name: "values"
+    name: "input"
     type_attr: "T"
-    number_attr: "N"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 2
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  deprecation {
+    version: 13
   }
 }
 op {
-  name: "ConcatOffset"
+  name: "BatchMatrixSetDiag"
   input_arg {
-    name: "concat_dim"
-    type: DT_INT32
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "shape"
-    type: DT_INT32
-    number_attr: "N"
+    name: "diagonal"
+    type_attr: "T"
   }
   output_arg {
-    name: "offset"
-    type: DT_INT32
-    number_attr: "N"
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 2
+    name: "T"
+    type: "type"
+  }
+  deprecation {
+    version: 14
   }
 }
 op {
-  name: "ConcatV2"
+  name: "BatchMatrixSolve"
   input_arg {
-    name: "values"
+    name: "matrix"
     type_attr: "T"
-    number_attr: "N"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "rhs"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 2
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_DOUBLE
+        type: DT_FLOAT
       }
     }
   }
+  deprecation {
+    version: 13
+  }
 }
 op {
-  name: "ConcatenateDataset"
+  name: "BatchMatrixSolveLs"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "matrix"
+    type_attr: "T"
   }
   input_arg {
-    name: "another_dataset"
-    type: DT_VARIANT
+    name: "rhs"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_regularizer"
+    type: DT_DOUBLE
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "fast"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  deprecation {
+    version: 13
   }
-  is_stateful: true
 }
 op {
-  name: "ConcatenateDataset"
+  name: "BatchMatrixTriangularSolve"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "matrix"
+    type_attr: "T"
   }
   input_arg {
-    name: "another_dataset"
-    type: DT_VARIANT
+    name: "rhs"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "lower"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  deprecation {
+    version: 13
   }
 }
 op {
-  name: "ConditionalAccumulator"
+  name: "BatchNormWithGlobalNormalization"
+  input_arg {
+    name: "t"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "m"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
   output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "result"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -6910,34 +7064,45 @@ op {
     }
   }
   attr {
-    name: "shape"
-    type: "shape"
+    name: "variance_epsilon"
+    type: "float"
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "scale_after_normalization"
+    type: "bool"
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  deprecation {
+    version: 9
   }
-  is_stateful: true
 }
 op {
-  name: "ConditionalAccumulator"
+  name: "BatchNormWithGlobalNormalization"
+  input_arg {
+    name: "t"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "m"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
   output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "result"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -6961,136 +7126,196 @@ op {
     }
   }
   attr {
-    name: "shape"
-    type: "shape"
+    name: "variance_epsilon"
+    type: "float"
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "scale_after_normalization"
+    type: "bool"
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  deprecation {
+    version: 9
   }
-  is_stateful: true
 }
 op {
-  name: "Conj"
+  name: "BatchNormWithGlobalNormalization"
   input_arg {
-    name: "input"
+    name: "t"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "m"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_COMPLEX64
-    }
-    allowed_values {
-      list {
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+  input_arg {
+    name: "v"
+    type_attr: "T"
   }
-}
-op {
-  name: "Conj"
   input_arg {
-    name: "input"
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "gamma"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "result"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_COMPLEX64
-    }
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_VARIANT
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
+  attr {
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+  deprecation {
+    version: 9
+  }
 }
 op {
-  name: "ConjugateTranspose"
+  name: "BatchNormWithGlobalNormalizationGrad"
   input_arg {
-    name: "x"
+    name: "t"
     type_attr: "T"
   }
   input_arg {
-    name: "perm"
-    type_attr: "Tperm"
+    name: "m"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "dx"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "dm"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dv"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "db"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dg"
+    type_attr: "T"
   }
   attr {
-    name: "Tperm"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
-}
-op {
-  name: "Const"
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
-  }
   attr {
-    name: "value"
-    type: "tensor"
+    name: "variance_epsilon"
+    type: "float"
   }
   attr {
-    name: "dtype"
-    type: "type"
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+  deprecation {
+    version: 9
   }
 }
 op {
-  name: "ControlTrigger"
-}
-op {
-  name: "Conv2D"
+  name: "BatchNormWithGlobalNormalizationGrad"
   input_arg {
-    name: "input"
+    name: "t"
     type_attr: "T"
   }
   input_arg {
-    name: "filter"
+    name: "m"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "dx"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dm"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dv"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "db"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dg"
     type_attr: "T"
   }
   attr {
@@ -7098,62 +7323,77 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "variance_epsilon"
+    type: "float"
   }
   attr {
-    name: "use_cudnn_on_gpu"
+    name: "scale_after_normalization"
     type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+  deprecation {
+    version: 9
   }
 }
 op {
-  name: "Conv2DBackpropFilter"
+  name: "BatchNormWithGlobalNormalizationGrad"
   input_arg {
-    name: "input"
+    name: "t"
     type_attr: "T"
   }
   input_arg {
-    name: "filter_sizes"
-    type: DT_INT32
+    name: "m"
+    type_attr: "T"
   }
   input_arg {
-    name: "out_backprop"
+    name: "v"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "dx"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dm"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dv"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "db"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dg"
     type_attr: "T"
   }
   attr {
@@ -7161,118 +7401,154 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "variance_epsilon"
+    type: "float"
   }
   attr {
-    name: "use_cudnn_on_gpu"
+    name: "scale_after_normalization"
     type: "bool"
-    default_value {
-      b: true
-    }
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  deprecation {
+    version: 9
+  }
+}
+op {
+  name: "BatchSelfAdjointEig"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        type: DT_DOUBLE
+        type: DT_FLOAT
       }
     }
   }
+  deprecation {
+    version: 11
+  }
 }
 op {
-  name: "Conv2DBackpropInput"
-  input_arg {
-    name: "input_sizes"
-    type: DT_INT32
-  }
+  name: "BatchSelfAdjointEigV2"
   input_arg {
-    name: "filter"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "out_backprop"
+  output_arg {
+    name: "e"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "v"
     type_attr: "T"
   }
+  attr {
+    name: "compute_v"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
+        type: DT_DOUBLE
         type: DT_FLOAT
       }
     }
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
+  deprecation {
+    version: 13
+  }
+}
+op {
+  name: "BatchSvd"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "s"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "u"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
   }
   attr {
-    name: "use_cudnn_on_gpu"
+    name: "compute_uv"
     type: "bool"
     default_value {
       b: true
     }
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
+  deprecation {
+    version: 13
+  }
 }
 op {
-  name: "Conv3D"
+  name: "BatchToSpace"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "filter"
-    type_attr: "T"
+    name: "crops"
+    type_attr: "Tidx"
   }
   output_arg {
     name: "output"
@@ -7281,39 +7557,40 @@ op {
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "block_size"
+    type: "int"
     has_minimum: true
-    minimum: 5
+    minimum: 2
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "Conv3D"
+  name: "BatchToSpaceND"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "filter"
-    type_attr: "T"
+    name: "block_shape"
+    type_attr: "Tblock_shape"
+  }
+  input_arg {
+    name: "crops"
+    type_attr: "Tcrops"
   }
   output_arg {
     name: "output"
@@ -7322,59 +7599,50 @@ op {
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "Tblock_shape"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "Tcrops"
+    type: "type"
     default_value {
-      s: "NDHWC"
+      type: DT_INT32
     }
     allowed_values {
       list {
-        s: "NDHWC"
-        s: "NCDHW"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "Conv3DBackpropFilter"
+  name: "Betainc"
   input_arg {
-    name: "input"
+    name: "a"
     type_attr: "T"
   }
   input_arg {
-    name: "filter"
+    name: "b"
     type_attr: "T"
   }
   input_arg {
-    name: "out_backprop"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -7387,38 +7655,15 @@ op {
       }
     }
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  deprecation {
-    version: 10
-  }
 }
 op {
-  name: "Conv3DBackpropFilterV2"
+  name: "BiasAdd"
   input_arg {
-    name: "input"
+    name: "value"
     type_attr: "T"
   }
   input_arg {
-    name: "filter_sizes"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "out_backprop"
+    name: "bias"
     type_attr: "T"
   }
   output_arg {
@@ -7432,38 +7677,43 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
+    name: "data_format"
     type: "string"
+    default_value {
+      s: "NHWC"
+    }
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
 }
 op {
-  name: "Conv3DBackpropFilterV2"
+  name: "BiasAdd"
   input_arg {
-    name: "input"
+    name: "value"
     type_attr: "T"
   }
   input_arg {
-    name: "filter_sizes"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "out_backprop"
+    name: "bias"
     type_attr: "T"
   }
   output_arg {
@@ -7477,51 +7727,45 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
+    name: "data_format"
     type: "string"
+    default_value {
+      s: "NHWC"
+    }
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NDHWC"
-    }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
 }
 op {
-  name: "Conv3DBackpropInput"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
+  name: "BiasAdd"
   input_arg {
-    name: "filter"
+    name: "value"
     type_attr: "T"
   }
   input_arg {
-    name: "out_backprop"
+    name: "bias"
     type_attr: "T"
   }
   output_arg {
@@ -7535,39 +7779,40 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
+    name: "data_format"
     type: "string"
+    default_value {
+      s: "NHWC"
+    }
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
-  deprecation {
-    version: 10
-  }
 }
 op {
-  name: "Conv3DBackpropInputV2"
-  input_arg {
-    name: "input_sizes"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "filter"
-    type_attr: "T"
-  }
+  name: "BiasAddGrad"
   input_arg {
     name: "out_backprop"
     type_attr: "T"
@@ -7583,36 +7828,37 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
+    name: "data_format"
     type: "string"
+    default_value {
+      s: "NHWC"
+    }
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
 }
 op {
-  name: "Conv3DBackpropInputV2"
-  input_arg {
-    name: "input_sizes"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "filter"
-    type_attr: "T"
-  }
+  name: "BiasAddGrad"
   input_arg {
     name: "out_backprop"
     type_attr: "T"
@@ -7628,22 +7874,20 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -7651,20 +7895,20 @@ op {
     name: "data_format"
     type: "string"
     default_value {
-      s: "NDHWC"
+      s: "NHWC"
     }
     allowed_values {
       list {
-        s: "NDHWC"
-        s: "NCDHW"
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
 }
 op {
-  name: "Copy"
+  name: "BiasAddGrad"
   input_arg {
-    name: "input"
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
@@ -7674,133 +7918,91 @@ op {
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
     }
   }
-  allows_uninitialized_input: true
-}
-op {
-  name: "Copy"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
-    type: "type"
-  }
-  attr {
-    name: "tensor_name"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ""
+      s: "NHWC"
     }
-  }
-  attr {
-    name: "debug_ops_spec"
-    type: "list(string)"
-    default_value {
+    allowed_values {
       list {
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "CopyHost"
+  name: "BiasAddV1"
   input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
+    name: "value"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  allows_uninitialized_input: true
-}
-op {
-  name: "CopyHost"
   input_arg {
-    name: "input"
+    name: "bias"
     type_attr: "T"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "debug_ops_spec"
-    type: "list(string)"
-    default_value {
-      list {
-      }
-    }
-  }
-  allows_uninitialized_input: true
-}
-op {
-  name: "Cos"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "Cosh"
+  name: "BiasAddV1"
   input_arg {
-    name: "x"
+    name: "value"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "bias"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -7808,281 +8010,284 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "CountUpTo"
+  name: "BiasAddV1"
   input_arg {
-    name: "ref"
+    name: "value"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "bias"
     type_attr: "T"
-    is_ref: true
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
-  attr {
-    name: "limit"
-    type: "int"
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "CropAndResize"
-  input_arg {
-    name: "image"
-    type_attr: "T"
-  }
+  name: "Bincount"
   input_arg {
-    name: "boxes"
-    type: DT_FLOAT
+    name: "arr"
+    type: DT_INT32
   }
   input_arg {
-    name: "box_ind"
+    name: "size"
     type: DT_INT32
   }
   input_arg {
-    name: "crop_size"
-    type: DT_INT32
+    name: "weights"
+    type_attr: "T"
   }
   output_arg {
-    name: "crops"
-    type: DT_FLOAT
+    name: "bins"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "method"
-    type: "string"
-    default_value {
-      s: "bilinear"
-    }
-    allowed_values {
-      list {
-        s: "bilinear"
-      }
-    }
-  }
-  attr {
-    name: "extrapolation_value"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
 }
 op {
-  name: "CropAndResize"
+  name: "Bitcast"
   input_arg {
-    name: "image"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "boxes"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "box_ind"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "crop_size"
-    type: DT_INT32
-  }
   output_arg {
-    name: "crops"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "type"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
         type: DT_UINT16
-        type: DT_INT8
         type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "method"
-    type: "string"
-    default_value {
-      s: "bilinear"
-    }
+    name: "type"
+    type: "type"
     allowed_values {
       list {
-        s: "bilinear"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
-  attr {
-    name: "extrapolation_value"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
 }
 op {
-  name: "CropAndResizeGradBoxes"
-  input_arg {
-    name: "grads"
-    type: DT_FLOAT
-  }
+  name: "Bitcast"
   input_arg {
-    name: "image"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "boxes"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "box_ind"
-    type: DT_INT32
-  }
   output_arg {
     name: "output"
-    type: DT_FLOAT
+    type_attr: "type"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT8
         type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
         type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "method"
-    type: "string"
-    default_value {
-      s: "bilinear"
-    }
+    name: "type"
+    type: "type"
     allowed_values {
       list {
-        s: "bilinear"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "CropAndResizeGradBoxes"
-  input_arg {
-    name: "grads"
-    type: DT_FLOAT
-  }
+  name: "Bitcast"
   input_arg {
-    name: "image"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "boxes"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "box_ind"
-    type: DT_INT32
-  }
   output_arg {
     name: "output"
-    type: DT_FLOAT
+    type_attr: "type"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
         type: DT_UINT16
         type: DT_INT8
         type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "method"
-    type: "string"
-    default_value {
-      s: "bilinear"
-    }
+    name: "type"
+    type: "type"
     allowed_values {
       list {
-        s: "bilinear"
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "CropAndResizeGradImage"
-  input_arg {
-    name: "grads"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "boxes"
-    type: DT_FLOAT
-  }
+  name: "BitwiseAnd"
   input_arg {
-    name: "box_ind"
-    type: DT_INT32
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "image_size"
-    type: DT_INT32
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -8090,37 +8295,29 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_HALF
-        type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "method"
-    type: "string"
-    default_value {
-      s: "bilinear"
-    }
-    allowed_values {
-      list {
-        s: "bilinear"
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
       }
     }
   }
+  is_commutative: true
 }
 op {
-  name: "Cross"
+  name: "BitwiseAnd"
   input_arg {
-    name: "a"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "b"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "product"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -8128,31 +8325,31 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
         type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
         type: DT_UINT16
-        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  is_commutative: true
 }
 op {
-  name: "Cross"
+  name: "BitwiseOr"
   input_arg {
-    name: "a"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "b"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "product"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -8160,139 +8357,127 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
         type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
         type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
+  is_commutative: true
 }
 op {
-  name: "Cumprod"
+  name: "BitwiseOr"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "out"
+    name: "z"
     type_attr: "T"
   }
-  attr {
-    name: "exclusive"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "reverse"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
         type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  is_commutative: true
+}
+op {
+  name: "BitwiseXor"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
   attr {
-    name: "Tidx"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
       }
     }
   }
+  is_commutative: true
 }
 op {
-  name: "Cumprod"
+  name: "BitwiseXor"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "out"
+    name: "z"
     type_attr: "T"
   }
-  attr {
-    name: "exclusive"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "reverse"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
         type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
+  is_commutative: true
+}
+op {
+  name: "BroadcastArgs"
+  input_arg {
+    name: "s0"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "s1"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "r0"
+    type_attr: "T"
+  }
   attr {
-    name: "Tidx"
+    name: "T"
     type: "type"
     default_value {
       type: DT_INT32
@@ -8306,58 +8491,26 @@ op {
   }
 }
 op {
-  name: "Cumsum"
+  name: "BroadcastGradientArgs"
   input_arg {
-    name: "x"
+    name: "s0"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "s1"
+    type_attr: "T"
   }
   output_arg {
-    name: "out"
+    name: "r0"
     type_attr: "T"
   }
-  attr {
-    name: "exclusive"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "reverse"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  output_arg {
+    name: "r1"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
     default_value {
       type: DT_INT32
     }
@@ -8370,80 +8523,45 @@ op {
   }
 }
 op {
-  name: "Cumsum"
+  name: "Bucketize"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "axis"
-    type_attr: "Tidx"
-  }
   output_arg {
-    name: "out"
-    type_attr: "T"
-  }
-  attr {
-    name: "exclusive"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "reverse"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "output"
+    type: DT_INT32
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_INT32
+        type: DT_INT64
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "boundaries"
+    type: "list(float)"
   }
 }
 op {
-  name: "DatasetToSingleElement"
+  name: "BytesProducedStatsDataset"
   input_arg {
-    name: "dataset"
+    name: "input_dataset"
     type: DT_VARIANT
   }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
   output_arg {
-    name: "components"
-    type_list_attr: "output_types"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
     name: "output_types"
@@ -8459,996 +8577,665 @@ op {
   }
 }
 op {
-  name: "DebugGradientIdentity"
+  name: "CTCBeamSearchDecoder"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "sequence_length"
+    type: DT_INT32
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "decoded_indices"
+    type: DT_INT64
+    number_attr: "top_paths"
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "decoded_values"
+    type: DT_INT64
+    number_attr: "top_paths"
   }
-  allows_uninitialized_input: true
-}
-op {
-  name: "DebugIdentity"
-  input_arg {
-    name: "input"
-    type_attr: "T"
+  output_arg {
+    name: "decoded_shape"
+    type: DT_INT64
+    number_attr: "top_paths"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "log_probability"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "beam_width"
+    type: "int"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "top_paths"
+    type: "int"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
+    name: "merge_repeated"
+    type: "bool"
     default_value {
-      list {
-      }
+      b: true
     }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugIdentity"
+  name: "CTCGreedyDecoder"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "sequence_length"
+    type: DT_INT32
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "decoded_indices"
+    type: DT_INT64
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "decoded_values"
+    type: DT_INT64
   }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  output_arg {
+    name: "decoded_shape"
+    type: DT_INT64
   }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
-      list {
-      }
-    }
+  output_arg {
+    name: "log_probability"
+    type: DT_FLOAT
   }
   attr {
-    name: "gated_grpc"
+    name: "merge_repeated"
     type: "bool"
     default_value {
       b: false
     }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugIdentity"
+  name: "CTCLoss"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  input_arg {
+    name: "labels_indices"
+    type: DT_INT64
   }
-  attr {
-    name: "T"
-    type: "type"
+  input_arg {
+    name: "labels_values"
+    type: DT_INT32
   }
-  attr {
-    name: "device_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "sequence_length"
+    type: DT_INT32
   }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  output_arg {
+    name: "loss"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient"
+    type: DT_FLOAT
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
+    name: "preprocess_collapse_repeated"
+    type: "bool"
     default_value {
-      list {
-      }
+      b: false
     }
   }
   attr {
-    name: "gated_grpc"
+    name: "ctc_merge_repeated"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNanCount"
+  name: "CTCLoss"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "labels_indices"
     type: DT_INT64
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
-      list {
-      }
-    }
+  input_arg {
+    name: "labels_values"
+    type: DT_INT32
   }
-  allows_uninitialized_input: true
-}
-op {
-  name: "DebugNanCount"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "sequence_length"
+    type: DT_INT32
   }
   output_arg {
-    name: "output"
-    type: DT_INT64
+    name: "loss"
+    type: DT_FLOAT
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "gradient"
+    type: DT_FLOAT
   }
   attr {
-    name: "tensor_name"
-    type: "string"
+    name: "preprocess_collapse_repeated"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
+    name: "ctc_merge_repeated"
+    type: "bool"
     default_value {
-      list {
-      }
+      b: true
     }
   }
   attr {
-    name: "gated_grpc"
+    name: "ignore_longer_outputs_than_inputs"
     type: "bool"
     default_value {
       b: false
     }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNanCount"
+  name: "CacheDataset"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "filename"
+    type: DT_STRING
   }
   output_arg {
-    name: "output"
-    type: DT_INT64
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "device_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  is_stateful: true
+}
+op {
+  name: "CacheDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "filename"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
-      list {
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "gated_grpc"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNumericSummary"
+  name: "Cast"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "x"
+    type_attr: "SrcT"
   }
   output_arg {
-    name: "output"
-    type: DT_DOUBLE
+    name: "y"
+    type_attr: "DstT"
   }
   attr {
-    name: "T"
+    name: "SrcT"
     type: "type"
   }
   attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
-      list {
-      }
-    }
+    name: "DstT"
+    type: "type"
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNumericSummary"
+  name: "Ceil"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_DOUBLE
+    name: "y"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    allowed_values {
       list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "lower_bound"
-    type: "float"
-    default_value {
-      f: -inf
-    }
-  }
-  attr {
-    name: "upper_bound"
-    type: "float"
-    default_value {
-      f: inf
-    }
-  }
-  attr {
-    name: "mute_if_healthy"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNumericSummary"
+  name: "Ceil"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_DOUBLE
+    name: "y"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "lower_bound"
-    type: "float"
-    default_value {
-      f: -inf
-    }
-  }
-  attr {
-    name: "upper_bound"
-    type: "float"
-    default_value {
-      f: inf
-    }
-  }
-  attr {
-    name: "mute_if_healthy"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "gated_grpc"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNumericSummary"
+  name: "CheckNumerics"
   input_arg {
-    name: "input"
+    name: "tensor"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    type: DT_DOUBLE
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "device_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    allowed_values {
       list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "lower_bound"
-    type: "float"
-    default_value {
-      f: -inf
-    }
-  }
-  attr {
-    name: "upper_bound"
-    type: "float"
-    default_value {
-      f: inf
-    }
-  }
-  attr {
-    name: "mute_if_healthy"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "gated_grpc"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "message"
+    type: "string"
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DecodeAndCropJpeg"
-  input_arg {
-    name: "contents"
-    type: DT_STRING
-  }
+  name: "CheckNumerics"
   input_arg {
-    name: "crop_window"
-    type: DT_INT32
+    name: "tensor"
+    type_attr: "T"
   }
   output_arg {
-    name: "image"
-    type: DT_UINT8
-  }
-  attr {
-    name: "channels"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "ratio"
-    type: "int"
-    default_value {
-      i: 1
-    }
-  }
-  attr {
-    name: "fancy_upscaling"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "try_recover_truncated"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "acceptable_fraction"
-    type: "float"
-    default_value {
-      f: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "dct_method"
+    name: "message"
     type: "string"
-    default_value {
-      s: ""
-    }
   }
 }
 op {
-  name: "DecodeBase64"
+  name: "Cholesky"
   input_arg {
     name: "input"
-    type: DT_STRING
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type: DT_STRING
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
   }
 }
 op {
-  name: "DecodeBmp"
+  name: "Cholesky"
   input_arg {
-    name: "contents"
-    type: DT_STRING
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "image"
-    type: DT_UINT8
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "channels"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
 }
 op {
-  name: "DecodeCSV"
+  name: "CholeskyGrad"
   input_arg {
-    name: "records"
-    type: DT_STRING
+    name: "l"
+    type_attr: "T"
   }
   input_arg {
-    name: "record_defaults"
-    type_list_attr: "OUT_TYPE"
+    name: "grad"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_list_attr: "OUT_TYPE"
+    type_attr: "T"
   }
   attr {
-    name: "OUT_TYPE"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_STRING
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "field_delim"
-    type: "string"
-    default_value {
-      s: ","
-    }
-  }
 }
 op {
-  name: "DecodeCSV"
+  name: "CompareAndBitpack"
   input_arg {
-    name: "records"
-    type: DT_STRING
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "record_defaults"
-    type_list_attr: "OUT_TYPE"
+    name: "threshold"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_list_attr: "OUT_TYPE"
+    type: DT_UINT8
   }
   attr {
-    name: "OUT_TYPE"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
     allowed_values {
       list {
+        type: DT_BOOL
+        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_STRING
       }
     }
   }
-  attr {
-    name: "field_delim"
-    type: "string"
-    default_value {
-      s: ","
-    }
-  }
-  attr {
-    name: "use_quote_delim"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
 }
 op {
-  name: "DecodeCSV"
+  name: "Complex"
   input_arg {
-    name: "records"
-    type: DT_STRING
+    name: "real"
+    type_attr: "T"
   }
   input_arg {
-    name: "record_defaults"
-    type_list_attr: "OUT_TYPE"
+    name: "imag"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_list_attr: "OUT_TYPE"
+    name: "out"
+    type_attr: "Tout"
   }
   attr {
-    name: "OUT_TYPE"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_STRING
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "field_delim"
-    type: "string"
+    name: "Tout"
+    type: "type"
     default_value {
-      s: ","
+      type: DT_COMPLEX64
     }
-  }
-  attr {
-    name: "use_quote_delim"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "na_value"
-    type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
 }
 op {
-  name: "DecodeCSV"
-  input_arg {
-    name: "records"
-    type: DT_STRING
-  }
+  name: "ComplexAbs"
   input_arg {
-    name: "record_defaults"
-    type_list_attr: "OUT_TYPE"
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_list_attr: "OUT_TYPE"
+    name: "y"
+    type_attr: "Tout"
   }
   attr {
-    name: "OUT_TYPE"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_COMPLEX64
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_STRING
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
   attr {
-    name: "field_delim"
-    type: "string"
-    default_value {
-      s: ","
-    }
-  }
-  attr {
-    name: "use_quote_delim"
-    type: "bool"
+    name: "Tout"
+    type: "type"
     default_value {
-      b: true
+      type: DT_FLOAT
     }
-  }
-  attr {
-    name: "na_value"
-    type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
 }
 op {
-  name: "DecodeGif"
+  name: "ComputeAccidentalHits"
   input_arg {
-    name: "contents"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "image"
-    type: DT_UINT8
+    name: "true_classes"
+    type: DT_INT64
   }
-}
-op {
-  name: "DecodeJSONExample"
   input_arg {
-    name: "json_examples"
-    type: DT_STRING
+    name: "sampled_candidates"
+    type: DT_INT64
   }
   output_arg {
-    name: "binary_examples"
-    type: DT_STRING
+    name: "indices"
+    type: DT_INT32
   }
-}
-op {
-  name: "DecodeJpeg"
-  input_arg {
-    name: "contents"
-    type: DT_STRING
+  output_arg {
+    name: "ids"
+    type: DT_INT64
   }
   output_arg {
-    name: "image"
-    type: DT_UINT8
+    name: "weights"
+    type: DT_FLOAT
   }
   attr {
-    name: "channels"
+    name: "num_true"
     type: "int"
-    default_value {
-      i: 0
-    }
   }
   attr {
-    name: "ratio"
+    name: "seed"
     type: "int"
     default_value {
-      i: 1
-    }
-  }
-  attr {
-    name: "fancy_upscaling"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "try_recover_truncated"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "acceptable_fraction"
-    type: "float"
-    default_value {
-      f: 1
-    }
-  }
-  attr {
-    name: "dct_method"
-    type: "string"
-    default_value {
-      s: ""
+      i: 0
     }
   }
-}
-op {
-  name: "DecodePng"
-  input_arg {
-    name: "contents"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "image"
-    type_attr: "dtype"
-  }
   attr {
-    name: "channels"
+    name: "seed2"
     type: "int"
     default_value {
       i: 0
     }
   }
-  attr {
-    name: "dtype"
-    type: "type"
-    default_value {
-      type: DT_UINT8
-    }
-    allowed_values {
-      list {
-        type: DT_UINT8
-        type: DT_UINT16
-      }
-    }
-  }
 }
 op {
-  name: "DecodeRaw"
+  name: "Concat"
   input_arg {
-    name: "bytes"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
-  }
-  attr {
-    name: "out_type"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "little_endian"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    name: "concat_dim"
+    type: DT_INT32
   }
-}
-op {
-  name: "DecodeRaw"
   input_arg {
-    name: "bytes"
-    type: DT_STRING
+    name: "values"
+    type_attr: "T"
+    number_attr: "N"
   }
   output_arg {
     name: "output"
-    type_attr: "out_type"
+    type_attr: "T"
   }
   attr {
-    name: "out_type"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_UINT16
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_INT64
-      }
-    }
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 2
   }
   attr {
-    name: "little_endian"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    name: "T"
+    type: "type"
   }
 }
 op {
-  name: "DecodeWav"
+  name: "ConcatOffset"
   input_arg {
-    name: "contents"
-    type: DT_STRING
+    name: "concat_dim"
+    type: DT_INT32
   }
-  output_arg {
-    name: "audio"
-    type: DT_FLOAT
+  input_arg {
+    name: "shape"
+    type: DT_INT32
+    number_attr: "N"
   }
   output_arg {
-    name: "sample_rate"
+    name: "offset"
     type: DT_INT32
+    number_attr: "N"
   }
   attr {
-    name: "desired_channels"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "desired_samples"
+    name: "N"
     type: "int"
-    default_value {
-      i: -1
-    }
-  }
-}
-op {
-  name: "DeleteSessionTensor"
-  input_arg {
-    name: "handle"
-    type: DT_STRING
+    has_minimum: true
+    minimum: 2
   }
 }
 op {
-  name: "DenseToDenseSetOperation"
+  name: "ConcatV2"
   input_arg {
-    name: "set1"
+    name: "values"
     type_attr: "T"
+    number_attr: "N"
   }
   input_arg {
-    name: "set2"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "result_indices"
-    type: DT_INT64
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "result_values"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "result_shape"
-    type: DT_INT64
-  }
   attr {
-    name: "set_operation"
-    type: "string"
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 2
   }
   attr {
-    name: "validate_indices"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "T"
+    name: "Tidx"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "DenseToSparseBatchDataset"
+  name: "ConcatenateDataset"
   input_arg {
     name: "input_dataset"
     type: DT_VARIANT
   }
   input_arg {
-    name: "batch_size"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "row_shape"
-    type: DT_INT64
+    name: "another_dataset"
+    type: DT_VARIANT
   }
   output_arg {
     name: "handle"
@@ -9469,18 +9256,14 @@ op {
   is_stateful: true
 }
 op {
-  name: "DenseToSparseBatchDataset"
+  name: "ConcatenateDataset"
   input_arg {
     name: "input_dataset"
     type: DT_VARIANT
   }
   input_arg {
-    name: "batch_size"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "row_shape"
-    type: DT_INT64
+    name: "another_dataset"
+    type: DT_VARIANT
   }
   output_arg {
     name: "handle"
@@ -9500,64 +9283,159 @@ op {
   }
 }
 op {
-  name: "DenseToSparseSetOperation"
-  input_arg {
-    name: "set1"
-    type_attr: "T"
+  name: "ConditionalAccumulator"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
-  input_arg {
-    name: "set2_indices"
-    type: DT_INT64
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
   }
-  input_arg {
-    name: "set2_values"
-    type_attr: "T"
+  attr {
+    name: "shape"
+    type: "shape"
   }
-  input_arg {
-    name: "set2_shape"
-    type: DT_INT64
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
-  output_arg {
-    name: "result_indices"
-    type: DT_INT64
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
+  is_stateful: true
+}
+op {
+  name: "ConditionalAccumulator"
   output_arg {
-    name: "result_values"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
-  output_arg {
-    name: "result_shape"
-    type: DT_INT64
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "set_operation"
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "container"
     type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "validate_indices"
-    type: "bool"
+    name: "shared_name"
+    type: "string"
     default_value {
-      b: true
+      s: ""
     }
   }
+  is_stateful: true
+}
+op {
+  name: "ConditionalAccumulator"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
         type: DT_UINT16
-        type: DT_STRING
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "DepthToSpace"
+  name: "Conj"
   input_arg {
     name: "input"
     type_attr: "T"
@@ -9569,16 +9447,19 @@ op {
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
+    default_value {
+      type: DT_COMPLEX64
+    }
+    allowed_values {
+      list {
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
   }
 }
 op {
-  name: "DepthToSpace"
+  name: "Conj"
   input_arg {
     name: "input"
     type_attr: "T"
@@ -9590,30 +9471,70 @@ op {
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_COMPLEX64
+    }
+    allowed_values {
+      list {
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_VARIANT
+      }
+    }
+  }
+}
+op {
+  name: "ConjugateTranspose"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "perm"
+    type_attr: "Tperm"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
-  }
-  attr {
-    name: "data_format"
-    type: "string"
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tperm"
+    type: "type"
     default_value {
-      s: "NHWC"
+      type: DT_INT32
     }
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
-        s: "NCHW_VECT_C"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "DepthwiseConv2dNative"
+  name: "Const"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "value"
+    type: "tensor"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+}
+op {
+  name: "ControlTrigger"
+}
+op {
+  name: "Conv2D"
   input_arg {
     name: "input"
     type_attr: "T"
@@ -9631,8 +9552,8 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
@@ -9640,6 +9561,13 @@ op {
     name: "strides"
     type: "list(int)"
   }
+  attr {
+    name: "use_cudnn_on_gpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   attr {
     name: "padding"
     type: "string"
@@ -9650,9 +9578,22 @@ op {
       }
     }
   }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
 }
 op {
-  name: "DepthwiseConv2dNative"
+  name: "Conv2D"
   input_arg {
     name: "input"
     type_attr: "T"
@@ -9670,8 +9611,9 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
@@ -9679,6 +9621,13 @@ op {
     name: "strides"
     type: "list(int)"
   }
+  attr {
+    name: "use_cudnn_on_gpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   attr {
     name: "padding"
     type: "string"
@@ -9702,9 +9651,21 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
-  name: "DepthwiseConv2dNativeBackpropFilter"
+  name: "Conv2DBackpropFilter"
   input_arg {
     name: "input"
     type_attr: "T"
@@ -9726,8 +9687,8 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
@@ -9735,6 +9696,13 @@ op {
     name: "strides"
     type: "list(int)"
   }
+  attr {
+    name: "use_cudnn_on_gpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   attr {
     name: "padding"
     type: "string"
@@ -9745,9 +9713,22 @@ op {
       }
     }
   }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
 }
 op {
-  name: "DepthwiseConv2dNativeBackpropFilter"
+  name: "Conv2DBackpropFilter"
   input_arg {
     name: "input"
     type_attr: "T"
@@ -9769,8 +9750,9 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
@@ -9778,6 +9760,13 @@ op {
     name: "strides"
     type: "list(int)"
   }
+  attr {
+    name: "use_cudnn_on_gpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   attr {
     name: "padding"
     type: "string"
@@ -9801,9 +9790,21 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
-  name: "DepthwiseConv2dNativeBackpropInput"
+  name: "Conv2DBackpropInput"
   input_arg {
     name: "input_sizes"
     type: DT_INT32
@@ -9825,8 +9826,8 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
@@ -9834,6 +9835,13 @@ op {
     name: "strides"
     type: "list(int)"
   }
+  attr {
+    name: "use_cudnn_on_gpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   attr {
     name: "padding"
     type: "string"
@@ -9844,9 +9852,22 @@ op {
       }
     }
   }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
 }
 op {
-  name: "DepthwiseConv2dNativeBackpropInput"
+  name: "Conv2DBackpropInput"
   input_arg {
     name: "input_sizes"
     type: DT_INT32
@@ -9868,8 +9889,9 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
@@ -9877,6 +9899,13 @@ op {
     name: "strides"
     type: "list(int)"
   }
+  attr {
+    name: "use_cudnn_on_gpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   attr {
     name: "padding"
     type: "string"
@@ -9900,195 +9929,122 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
-  name: "Dequantize"
+  name: "Conv3D"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "min_range"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "max_range"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type: DT_FLOAT
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "mode"
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
     type: "string"
-    default_value {
-      s: "MIN_COMBINED"
-    }
     allowed_values {
       list {
-        s: "MIN_COMBINED"
-        s: "MIN_FIRST"
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "Dequantize"
+  name: "Conv3D"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "min_range"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "max_range"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type: DT_FLOAT
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "mode"
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
     type: "string"
-    default_value {
-      s: "MIN_COMBINED"
-    }
     allowed_values {
       list {
-        s: "MIN_COMBINED"
-        s: "MIN_FIRST"
-        s: "SCALED"
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-}
-op {
-  name: "DeserializeIterator"
-  input_arg {
-    name: "resource_handle"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "serialized"
-    type: DT_VARIANT
-  }
-  is_stateful: true
-}
-op {
-  name: "DeserializeManySparse"
-  input_arg {
-    name: "serialized_sparse"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sparse_values"
-    type_attr: "dtype"
-  }
-  output_arg {
-    name: "sparse_shape"
-    type: DT_INT64
-  }
-  attr {
-    name: "dtype"
-    type: "type"
-  }
-}
-op {
-  name: "DeserializeSparse"
-  input_arg {
-    name: "serialized_sparse"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sparse_values"
-    type_attr: "dtype"
-  }
-  output_arg {
-    name: "sparse_shape"
-    type: DT_INT64
-  }
-  attr {
-    name: "dtype"
-    type: "type"
-  }
-}
-op {
-  name: "DestroyResourceOp"
-  input_arg {
-    name: "resource"
-    type: DT_RESOURCE
-  }
   attr {
-    name: "ignore_lookup_error"
-    type: "bool"
+    name: "data_format"
+    type: "string"
     default_value {
-      b: true
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "DestroyTemporaryVariable"
+  name: "Conv3D"
   input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  output_arg {
-    name: "value"
+    name: "input"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  attr {
-    name: "var_name"
-    type: "string"
-  }
-}
-op {
-  name: "Diag"
   input_arg {
-    name: "diagonal"
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
@@ -10100,65 +10056,58 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
-}
-op {
-  name: "DiagPart"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "diagonal"
-    type_attr: "T"
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-}
-op {
-  name: "Digamma"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
-    type: "type"
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
       }
     }
   }
 }
 op {
-  name: "Dilation2D"
+  name: "Conv3DBackpropFilter"
   input_arg {
     name: "input"
     type_attr: "T"
@@ -10167,6 +10116,10 @@ op {
     name: "filter"
     type_attr: "T"
   }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
+  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -10178,13 +10131,6 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
@@ -10192,13 +10138,7 @@ op {
     name: "strides"
     type: "list(int)"
     has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    minimum: 5
   }
   attr {
     name: "padding"
@@ -10210,9 +10150,12 @@ op {
       }
     }
   }
+  deprecation {
+    version: 10
+  }
 }
 op {
-  name: "Dilation2D"
+  name: "Conv3DBackpropFilter"
   input_arg {
     name: "input"
     type_attr: "T"
@@ -10221,6 +10164,10 @@ op {
     name: "filter"
     type_attr: "T"
   }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
+  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -10230,17 +10177,9 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -10248,13 +10187,7 @@ op {
     name: "strides"
     type: "list(int)"
     has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    minimum: 5
   }
   attr {
     name: "padding"
@@ -10266,23 +10199,26 @@ op {
       }
     }
   }
+  deprecation {
+    version: 10
+  }
 }
 op {
-  name: "Dilation2DBackpropFilter"
+  name: "Conv3DBackpropFilterV2"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "filter"
-    type_attr: "T"
+    name: "filter_sizes"
+    type: DT_INT32
   }
   input_arg {
     name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "filter_backprop"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -10292,13 +10228,6 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
@@ -10306,13 +10235,7 @@ op {
     name: "strides"
     type: "list(int)"
     has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    minimum: 5
   }
   attr {
     name: "padding"
@@ -10326,21 +10249,21 @@ op {
   }
 }
 op {
-  name: "Dilation2DBackpropFilter"
+  name: "Conv3DBackpropFilterV2"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "filter"
-    type_attr: "T"
+    name: "filter_sizes"
+    type: DT_INT32
   }
   input_arg {
     name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "filter_backprop"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -10350,15 +10273,6 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -10366,13 +10280,7 @@ op {
     name: "strides"
     type: "list(int)"
     has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    minimum: 5
   }
   attr {
     name: "padding"
@@ -10384,23 +10292,36 @@ op {
       }
     }
   }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
 }
 op {
-  name: "Dilation2DBackpropInput"
+  name: "Conv3DBackpropFilterV2"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "filter"
-    type_attr: "T"
+    name: "filter_sizes"
+    type: DT_INT32
   }
   input_arg {
     name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "in_backprop"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -10408,15 +10329,10 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
@@ -10424,13 +10340,7 @@ op {
     name: "strides"
     type: "list(int)"
     has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    minimum: 5
   }
   attr {
     name: "padding"
@@ -10442,9 +10352,35 @@ op {
       }
     }
   }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
-  name: "Dilation2DBackpropInput"
+  name: "Conv3DBackpropInput"
   input_arg {
     name: "input"
     type_attr: "T"
@@ -10458,7 +10394,7 @@ op {
     type_attr: "T"
   }
   output_arg {
-    name: "in_backprop"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -10468,15 +10404,6 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -10484,13 +10411,7 @@ op {
     name: "strides"
     type: "list(int)"
     has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    minimum: 5
   }
   attr {
     name: "padding"
@@ -10502,19 +10423,26 @@ op {
       }
     }
   }
+  deprecation {
+    version: 10
+  }
 }
 op {
-  name: "Div"
+  name: "Conv3DBackpropInput"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -10525,149 +10453,90 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
-}
-op {
-  name: "DrawBoundingBoxes"
-  input_arg {
-    name: "images"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "boxes"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_HALF
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
+  deprecation {
+    version: 10
+  }
 }
 op {
-  name: "DynamicPartition"
+  name: "Conv3DBackpropInputV2"
   input_arg {
-    name: "data"
+    name: "input_sizes"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "filter"
     type_attr: "T"
   }
   input_arg {
-    name: "partitions"
-    type: DT_INT32
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "outputs"
+    name: "output"
     type_attr: "T"
-    number_attr: "num_partitions"
-  }
-  attr {
-    name: "num_partitions"
-    type: "int"
-    has_minimum: true
-    minimum: 1
   }
   attr {
     name: "T"
     type: "type"
-  }
-}
-op {
-  name: "DynamicStitch"
-  input_arg {
-    name: "indices"
-    type: DT_INT32
-    number_attr: "N"
-  }
-  input_arg {
-    name: "data"
-    type_attr: "T"
-    number_attr: "N"
-  }
-  output_arg {
-    name: "merged"
-    type_attr: "T"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
-    name: "N"
-    type: "int"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
+    minimum: 5
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
 }
 op {
-  name: "EditDistance"
+  name: "Conv3DBackpropInputV2"
   input_arg {
-    name: "hypothesis_indices"
-    type: DT_INT64
+    name: "input_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "hypothesis_values"
+    name: "filter"
     type_attr: "T"
   }
   input_arg {
-    name: "hypothesis_shape"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "truth_indices"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "truth_values"
+    name: "out_backprop"
     type_attr: "T"
   }
-  input_arg {
-    name: "truth_shape"
-    type: DT_INT64
-  }
   output_arg {
     name: "output"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "normalize"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
-}
-op {
-  name: "Elu"
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -10677,71 +10546,55 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_HALF
       }
     }
   }
-}
-op {
-  name: "Elu"
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "activations"
-    type_attr: "T"
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-}
-op {
-  name: "EluGrad"
-  input_arg {
-    name: "gradients"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "outputs"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "backprops"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
-    type: "type"
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_HALF
+        s: "NDHWC"
+        s: "NCDHW"
       }
     }
   }
 }
 op {
-  name: "EluGrad"
+  name: "Conv3DBackpropInputV2"
   input_arg {
-    name: "gradients"
+    name: "input_sizes"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "filter"
     type_attr: "T"
   }
   input_arg {
-    name: "outputs"
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -10750,167 +10603,136 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-}
-op {
-  name: "EncodeBase64"
-  input_arg {
-    name: "input"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "output"
-    type: DT_STRING
-  }
   attr {
-    name: "pad"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-}
-op {
-  name: "EncodeJpeg"
-  input_arg {
-    name: "image"
-    type: DT_UINT8
-  }
-  output_arg {
-    name: "contents"
-    type: DT_STRING
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "format"
+    name: "padding"
     type: "string"
-    default_value {
-      s: ""
-    }
     allowed_values {
       list {
-        s: ""
-        s: "grayscale"
-        s: "rgb"
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
   attr {
-    name: "quality"
-    type: "int"
-    default_value {
-      i: 95
-    }
-  }
-  attr {
-    name: "progressive"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "optimize_size"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "chroma_downsampling"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "density_unit"
+    name: "data_format"
     type: "string"
     default_value {
-      s: "in"
+      s: "NDHWC"
     }
     allowed_values {
       list {
-        s: "in"
-        s: "cm"
+        s: "NDHWC"
+        s: "NCDHW"
       }
     }
   }
   attr {
-    name: "x_density"
-    type: "int"
+    name: "dilations"
+    type: "list(int)"
     default_value {
-      i: 300
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
     }
   }
+}
+op {
+  name: "Copy"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
-    name: "y_density"
-    type: "int"
-    default_value {
-      i: 300
-    }
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "xmp_metadata"
+    name: "tensor_name"
     type: "string"
     default_value {
       s: ""
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "EncodePng"
+  name: "Copy"
   input_arg {
-    name: "image"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "contents"
-    type: DT_STRING
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "compression"
-    type: "int"
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "tensor_name"
+    type: "string"
     default_value {
-      i: -1
+      s: ""
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "debug_ops_spec"
+    type: "list(string)"
     default_value {
-      type: DT_UINT8
-    }
-    allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_UINT16
       }
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "EncodeWav"
-  input_arg {
-    name: "audio"
-    type: DT_FLOAT
-  }
+  name: "CopyHost"
   input_arg {
-    name: "sample_rate"
-    type: DT_INT32
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "contents"
-    type: DT_STRING
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "Enter"
+  name: "CopyHost"
   input_arg {
-    name: "data"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
@@ -10922,38 +10744,32 @@ op {
     type: "type"
   }
   attr {
-    name: "frame_name"
+    name: "tensor_name"
     type: "string"
-  }
-  attr {
-    name: "is_constant"
-    type: "bool"
     default_value {
-      b: false
+      s: ""
     }
   }
   attr {
-    name: "parallel_iterations"
-    type: "int"
+    name: "debug_ops_spec"
+    type: "list(string)"
     default_value {
-      i: 10
+      list {
+      }
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "Equal"
+  name: "Cos"
   input_arg {
     name: "x"
     type_attr: "T"
   }
-  input_arg {
+  output_arg {
     name: "y"
     type_attr: "T"
   }
-  output_arg {
-    name: "z"
-    type: DT_BOOL
-  }
   attr {
     name: "T"
     type: "type"
@@ -10962,25 +10778,14 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
         type: DT_COMPLEX64
-        type: DT_QUINT8
-        type: DT_QINT8
-        type: DT_QINT32
-        type: DT_STRING
-        type: DT_BOOL
         type: DT_COMPLEX128
       }
     }
   }
-  is_commutative: true
 }
 op {
-  name: "Erf"
+  name: "Cos"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -10995,14 +10800,17 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-      }
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
 }
 op {
-  name: "Erfc"
+  name: "Cosh"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -11019,27 +10827,14 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Exit"
-  input_arg {
-    name: "data"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
-}
-op {
-  name: "Exp"
+  name: "Cosh"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -11054,6 +10849,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -11063,29 +10859,23 @@ op {
   }
 }
 op {
-  name: "ExpandDims"
+  name: "CountUpTo"
   input_arg {
-    name: "input"
+    name: "ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "dim"
-    type_attr: "Tdim"
+    is_ref: true
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "limit"
+    type: "int"
   }
   attr {
-    name: "Tdim"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -11095,1745 +10885,1825 @@ op {
   }
 }
 op {
-  name: "Expm1"
+  name: "CropAndResize"
   input_arg {
-    name: "x"
+    name: "image"
     type_attr: "T"
   }
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "box_ind"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "crop_size"
+    type: DT_INT32
+  }
   output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "crops"
+    type: DT_FLOAT
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
+  attr {
+    name: "method"
+    type: "string"
+    default_value {
+      s: "bilinear"
+    }
+    allowed_values {
+      list {
+        s: "bilinear"
+      }
+    }
+  }
+  attr {
+    name: "extrapolation_value"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
 }
 op {
-  name: "ExtractGlimpse"
+  name: "CropAndResize"
   input_arg {
-    name: "input"
+    name: "image"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "boxes"
     type: DT_FLOAT
   }
   input_arg {
-    name: "size"
+    name: "box_ind"
     type: DT_INT32
   }
   input_arg {
-    name: "offsets"
-    type: DT_FLOAT
+    name: "crop_size"
+    type: DT_INT32
   }
   output_arg {
-    name: "glimpse"
+    name: "crops"
     type: DT_FLOAT
   }
   attr {
-    name: "centered"
-    type: "bool"
-    default_value {
-      b: true
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "normalized"
-    type: "bool"
+    name: "method"
+    type: "string"
     default_value {
-      b: true
+      s: "bilinear"
+    }
+    allowed_values {
+      list {
+        s: "bilinear"
+      }
     }
   }
   attr {
-    name: "uniform_noise"
-    type: "bool"
+    name: "extrapolation_value"
+    type: "float"
     default_value {
-      b: true
+      f: 0
     }
   }
 }
 op {
-  name: "ExtractImagePatches"
+  name: "CropAndResizeGradBoxes"
   input_arg {
-    name: "images"
-    type_attr: "T"
+    name: "grads"
+    type: DT_FLOAT
   }
-  output_arg {
-    name: "patches"
+  input_arg {
+    name: "image"
     type_attr: "T"
   }
-  attr {
-    name: "ksizes"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  input_arg {
+    name: "box_ind"
+    type: DT_INT32
   }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
-        type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
         type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "padding"
+    name: "method"
     type: "string"
+    default_value {
+      s: "bilinear"
+    }
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        s: "bilinear"
       }
     }
   }
 }
 op {
-  name: "ExtractImagePatches"
+  name: "CropAndResizeGradBoxes"
   input_arg {
-    name: "images"
-    type_attr: "T"
+    name: "grads"
+    type: DT_FLOAT
   }
-  output_arg {
-    name: "patches"
+  input_arg {
+    name: "image"
     type_attr: "T"
   }
-  attr {
-    name: "ksizes"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  input_arg {
+    name: "box_ind"
+    type: DT_INT32
   }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
         type: DT_UINT16
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "padding"
+    name: "method"
     type: "string"
+    default_value {
+      s: "bilinear"
+    }
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        s: "bilinear"
       }
     }
   }
 }
 op {
-  name: "ExtractJpegShape"
+  name: "CropAndResizeGradImage"
   input_arg {
-    name: "contents"
-    type: DT_STRING
+    name: "grads"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "box_ind"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "image_size"
+    type: DT_INT32
   }
   output_arg {
-    name: "image_shape"
-    type_attr: "output_type"
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "output_type"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "method"
+    type: "string"
     default_value {
-      type: DT_INT32
+      s: "bilinear"
     }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "bilinear"
       }
     }
   }
 }
 op {
-  name: "FFT"
+  name: "Cross"
   input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+    name: "a"
+    type_attr: "T"
   }
-}
-op {
-  name: "FFT2D"
   input_arg {
-    name: "input"
-    type: DT_COMPLEX64
+    name: "b"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+    name: "product"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
   }
 }
 op {
-  name: "FFT3D"
+  name: "Cross"
   input_arg {
-    name: "input"
-    type: DT_COMPLEX64
+    name: "a"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+  input_arg {
+    name: "b"
+    type_attr: "T"
   }
-}
-op {
-  name: "FIFOQueue"
   output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "product"
+    type_attr: "T"
   }
   attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
-    has_minimum: true
   }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
+}
+op {
+  name: "Cross"
+  input_arg {
+    name: "a"
+    type_attr: "T"
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "b"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "product"
+    type_attr: "T"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "FIFOQueueV2"
-  output_arg {
-    name: "handle"
-    type: DT_RESOURCE
+  name: "Cumprod"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "axis"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
   }
   attr {
-    name: "shapes"
-    type: "list(shape)"
+    name: "exclusive"
+    type: "bool"
     default_value {
-      list {
-      }
+      b: false
     }
-    has_minimum: true
   }
   attr {
-    name: "capacity"
-    type: "int"
+    name: "reverse"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "Tidx"
+    type: "type"
     default_value {
-      s: ""
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Fact"
-  output_arg {
-    name: "fact"
-    type: DT_STRING
+  name: "Cumprod"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-}
-op {
-  name: "FakeQuantWithMinMaxArgs"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+    name: "out"
+    type_attr: "T"
   }
   attr {
-    name: "min"
-    type: "float"
+    name: "exclusive"
+    type: "bool"
     default_value {
-      f: -6
+      b: false
     }
   }
   attr {
-    name: "max"
-    type: "float"
+    name: "reverse"
+    type: "bool"
     default_value {
-      f: 6
+      b: false
     }
   }
-}
-op {
-  name: "FakeQuantWithMinMaxArgs"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "outputs"
-    type: DT_FLOAT
-  }
   attr {
-    name: "min"
-    type: "float"
-    default_value {
-      f: -6
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
   attr {
-    name: "max"
-    type: "float"
+    name: "Tidx"
+    type: "type"
     default_value {
-      f: 6
+      type: DT_INT32
     }
-  }
-  attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxArgs"
+  name: "Cumprod"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+    name: "out"
+    type_attr: "T"
   }
   attr {
-    name: "min"
-    type: "float"
+    name: "exclusive"
+    type: "bool"
     default_value {
-      f: -6
+      b: false
     }
   }
   attr {
-    name: "max"
-    type: "float"
+    name: "reverse"
+    type: "bool"
     default_value {
-      f: 6
+      b: false
     }
   }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
     }
   }
   attr {
-    name: "narrow_range"
-    type: "bool"
+    name: "Tidx"
+    type: "type"
     default_value {
-      b: false
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxArgsGradient"
+  name: "Cumsum"
   input_arg {
-    name: "gradients"
-    type: DT_FLOAT
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "backprops"
-    type: DT_FLOAT
+    name: "out"
+    type_attr: "T"
   }
   attr {
-    name: "min"
-    type: "float"
+    name: "exclusive"
+    type: "bool"
     default_value {
-      f: -6
+      b: false
     }
   }
   attr {
-    name: "max"
-    type: "float"
+    name: "reverse"
+    type: "bool"
     default_value {
-      f: 6
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxArgsGradient"
+  name: "Cumsum"
   input_arg {
-    name: "gradients"
-    type: DT_FLOAT
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "backprops"
-    type: DT_FLOAT
+    name: "out"
+    type_attr: "T"
   }
   attr {
-    name: "min"
-    type: "float"
+    name: "exclusive"
+    type: "bool"
     default_value {
-      f: -6
+      b: false
     }
   }
   attr {
-    name: "max"
-    type: "float"
+    name: "reverse"
+    type: "bool"
     default_value {
-      f: 6
+      b: false
     }
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
     default_value {
-      i: 8
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxArgsGradient"
+  name: "Cumsum"
   input_arg {
-    name: "gradients"
-    type: DT_FLOAT
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "backprops"
-    type: DT_FLOAT
+    name: "out"
+    type_attr: "T"
   }
   attr {
-    name: "min"
-    type: "float"
+    name: "exclusive"
+    type: "bool"
     default_value {
-      f: -6
+      b: false
     }
   }
   attr {
-    name: "max"
-    type: "float"
+    name: "reverse"
+    type: "bool"
     default_value {
-      f: 6
+      b: false
     }
   }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
     }
   }
   attr {
-    name: "narrow_range"
-    type: "bool"
+    name: "Tidx"
+    type: "type"
     default_value {
-      b: false
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxVars"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
-  }
+  name: "DataFormatDimMap"
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
-  }
-}
-op {
-  name: "FakeQuantWithMinMaxVars"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "y"
+    type_attr: "T"
   }
-  input_arg {
-    name: "max"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
-  output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+  attr {
+    name: "src_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "dst_format"
+    type: "string"
     default_value {
-      i: 8
+      s: "NCHW"
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxVars"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
-  }
+  name: "DataFormatVecPermute"
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "T"
+    type: "type"
     default_value {
-      i: 8
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "narrow_range"
-    type: "bool"
+    name: "src_format"
+    type: "string"
     default_value {
-      b: false
+      s: "NHWC"
+    }
+  }
+  attr {
+    name: "dst_format"
+    type: "string"
+    default_value {
+      s: "NCHW"
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxVarsGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
-  }
+  name: "DatasetToSingleElement"
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "dataset"
+    type: DT_VARIANT
   }
   output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
+    name: "components"
+    type_list_attr: "output_types"
   }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "FakeQuantWithMinMaxVarsGradient"
+  name: "DebugGradientIdentity"
   input_arg {
-    name: "gradients"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
   }
+  allows_uninitialized_input: true
+}
+op {
+  name: "DebugIdentity"
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
   }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "debug_urls"
+    type: "list(string)"
     default_value {
-      i: 8
+      list {
+      }
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "FakeQuantWithMinMaxVarsGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
+  name: "DebugIdentity"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
-  input_arg {
-    name: "max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
   }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "debug_urls"
+    type: "list(string)"
     default_value {
-      i: 8
+      list {
+      }
     }
   }
   attr {
-    name: "narrow_range"
+    name: "gated_grpc"
     type: "bool"
     default_value {
       b: false
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "FakeQuantWithMinMaxVarsPerChannel"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
-  }
+  name: "DebugIdentity"
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
-  }
-}
-op {
-  name: "FakeQuantWithMinMaxVarsPerChannel"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
-  output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "device_name"
+    type: "string"
     default_value {
-      i: 8
+      s: ""
     }
   }
-}
-op {
-  name: "FakeQuantWithMinMaxVarsPerChannel"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "debug_urls"
+    type: "list(string)"
     default_value {
-      i: 8
+      list {
+      }
     }
   }
   attr {
-    name: "narrow_range"
+    name: "gated_grpc"
     type: "bool"
     default_value {
       b: false
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
-  }
-}
-op {
-  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
-  }
+  name: "DebugNanCount"
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
+    name: "output"
+    type: DT_INT64
   }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
   }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "debug_urls"
+    type: "list(string)"
     default_value {
-      i: 8
+      list {
+      }
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
-  }
+  name: "DebugNanCount"
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
+    name: "output"
+    type: DT_INT64
   }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
   }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "debug_urls"
+    type: "list(string)"
     default_value {
-      i: 8
+      list {
+      }
     }
   }
   attr {
-    name: "narrow_range"
+    name: "gated_grpc"
     type: "bool"
     default_value {
       b: false
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "FakeQueue"
-  input_arg {
-    name: "resource"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  is_stateful: true
-}
-op {
-  name: "Fill"
-  input_arg {
-    name: "dims"
-    type: DT_INT32
-  }
+  name: "DebugNanCount"
   input_arg {
-    name: "value"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type: DT_INT64
   }
   attr {
     name: "T"
     type: "type"
   }
-}
-op {
-  name: "FilterDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
   attr {
-    name: "predicate"
-    type: "func"
+    name: "device_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "gated_grpc"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
-  is_stateful: true
+  allows_uninitialized_input: true
 }
 op {
-  name: "FilterDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "DebugNumericSummary"
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "predicate"
-    type: "func"
+    name: "output"
+    type: DT_DOUBLE
   }
   attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "FixedLengthRecordDataset"
-  input_arg {
-    name: "filenames"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "header_bytes"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "record_bytes"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "footer_bytes"
-    type: DT_INT64
-  }
+  name: "DebugNumericSummary"
   input_arg {
-    name: "buffer_size"
-    type: DT_INT64
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type: DT_DOUBLE
   }
-  is_stateful: true
-}
-op {
-  name: "FixedLengthRecordReader"
-  output_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
+  attr {
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "header_bytes"
-    type: "int"
+    name: "tensor_name"
+    type: "string"
     default_value {
-      i: 0
+      s: ""
     }
   }
   attr {
-    name: "record_bytes"
-    type: "int"
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
   }
   attr {
-    name: "footer_bytes"
-    type: "int"
+    name: "lower_bound"
+    type: "float"
     default_value {
-      i: 0
+      f: -inf
     }
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "upper_bound"
+    type: "float"
     default_value {
-      s: ""
+      f: inf
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "mute_if_healthy"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
-  is_stateful: true
+  allows_uninitialized_input: true
 }
 op {
-  name: "FixedLengthRecordReader"
+  name: "DebugNumericSummary"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
   output_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
+    name: "output"
+    type: DT_DOUBLE
   }
   attr {
-    name: "header_bytes"
-    type: "int"
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "tensor_name"
+    type: "string"
     default_value {
-      i: 0
+      s: ""
     }
   }
   attr {
-    name: "record_bytes"
-    type: "int"
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
   }
   attr {
-    name: "footer_bytes"
-    type: "int"
+    name: "lower_bound"
+    type: "float"
     default_value {
-      i: 0
+      f: -inf
     }
   }
   attr {
-    name: "hop_bytes"
-    type: "int"
+    name: "upper_bound"
+    type: "float"
     default_value {
-      i: 0
+      f: inf
     }
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "mute_if_healthy"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "gated_grpc"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
-  is_stateful: true
+  allows_uninitialized_input: true
 }
 op {
-  name: "FixedLengthRecordReaderV2"
-  output_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
-  }
-  attr {
-    name: "header_bytes"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  name: "DebugNumericSummary"
+  input_arg {
+    name: "input"
+    type_attr: "T"
   }
-  attr {
-    name: "record_bytes"
-    type: "int"
+  output_arg {
+    name: "output"
+    type: DT_DOUBLE
   }
   attr {
-    name: "footer_bytes"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "container"
+    name: "device_name"
     type: "string"
     default_value {
       s: ""
     }
   }
   attr {
-    name: "shared_name"
+    name: "tensor_name"
     type: "string"
     default_value {
       s: ""
     }
   }
-  is_stateful: true
-}
-op {
-  name: "FixedLengthRecordReaderV2"
-  output_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
-  }
   attr {
-    name: "header_bytes"
-    type: "int"
+    name: "debug_urls"
+    type: "list(string)"
     default_value {
-      i: 0
+      list {
+      }
     }
   }
   attr {
-    name: "record_bytes"
-    type: "int"
-  }
-  attr {
-    name: "footer_bytes"
-    type: "int"
+    name: "lower_bound"
+    type: "float"
     default_value {
-      i: 0
+      f: -inf
     }
   }
   attr {
-    name: "hop_bytes"
-    type: "int"
+    name: "upper_bound"
+    type: "float"
     default_value {
-      i: 0
+      f: inf
     }
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "mute_if_healthy"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "gated_grpc"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
-  is_stateful: true
+  allows_uninitialized_input: true
 }
 op {
-  name: "FixedLengthRecordReaderV2"
+  name: "DecodeAndCropJpeg"
+  input_arg {
+    name: "contents"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "crop_window"
+    type: DT_INT32
+  }
   output_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
+    name: "image"
+    type: DT_UINT8
   }
   attr {
-    name: "header_bytes"
+    name: "channels"
     type: "int"
     default_value {
       i: 0
     }
   }
   attr {
-    name: "record_bytes"
-    type: "int"
-  }
-  attr {
-    name: "footer_bytes"
+    name: "ratio"
     type: "int"
     default_value {
-      i: 0
+      i: 1
     }
   }
   attr {
-    name: "hop_bytes"
-    type: "int"
+    name: "fancy_upscaling"
+    type: "bool"
     default_value {
-      i: 0
+      b: true
     }
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "try_recover_truncated"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "acceptable_fraction"
+    type: "float"
     default_value {
-      s: ""
+      f: 1
     }
   }
   attr {
-    name: "encoding"
+    name: "dct_method"
     type: "string"
     default_value {
       s: ""
     }
   }
-  is_stateful: true
 }
 op {
-  name: "FixedUnigramCandidateSampler"
+  name: "DecodeBase64"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
+    name: "input"
+    type: DT_STRING
   }
   output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
+    name: "output"
+    type: DT_STRING
   }
-  output_arg {
-    name: "true_expected_count"
-    type: DT_FLOAT
+}
+op {
+  name: "DecodeBmp"
+  input_arg {
+    name: "contents"
+    type: DT_STRING
   }
   output_arg {
-    name: "sampled_expected_count"
-    type: DT_FLOAT
+    name: "image"
+    type: DT_UINT8
   }
   attr {
-    name: "num_true"
+    name: "channels"
     type: "int"
-    has_minimum: true
-    minimum: 1
+    default_value {
+      i: 0
+    }
   }
-  attr {
-    name: "num_sampled"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+}
+op {
+  name: "DecodeCSV"
+  input_arg {
+    name: "records"
+    type: DT_STRING
   }
-  attr {
-    name: "unique"
-    type: "bool"
+  input_arg {
+    name: "record_defaults"
+    type_list_attr: "OUT_TYPE"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "OUT_TYPE"
   }
   attr {
-    name: "range_max"
-    type: "int"
+    name: "OUT_TYPE"
+    type: "list(type)"
     has_minimum: true
     minimum: 1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
   }
   attr {
-    name: "vocab_file"
+    name: "field_delim"
     type: "string"
     default_value {
-      s: ""
+      s: ","
     }
   }
+}
+op {
+  name: "DecodeCSV"
+  input_arg {
+    name: "records"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "record_defaults"
+    type_list_attr: "OUT_TYPE"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "OUT_TYPE"
+  }
   attr {
-    name: "distortion"
-    type: "float"
-    default_value {
-      f: 1
+    name: "OUT_TYPE"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_STRING
+      }
     }
   }
   attr {
-    name: "num_reserved_ids"
-    type: "int"
+    name: "field_delim"
+    type: "string"
     default_value {
-      i: 0
+      s: ","
     }
   }
   attr {
-    name: "num_shards"
-    type: "int"
+    name: "use_quote_delim"
+    type: "bool"
     default_value {
-      i: 1
+      b: true
     }
-    has_minimum: true
-    minimum: 1
+  }
+}
+op {
+  name: "DecodeCSV"
+  input_arg {
+    name: "records"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "record_defaults"
+    type_list_attr: "OUT_TYPE"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "OUT_TYPE"
   }
   attr {
-    name: "shard"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "OUT_TYPE"
+    type: "list(type)"
     has_minimum: true
+    minimum: 1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
   }
   attr {
-    name: "unigrams"
-    type: "list(float)"
+    name: "field_delim"
+    type: "string"
     default_value {
-      list {
-      }
+      s: ","
     }
   }
   attr {
-    name: "seed"
-    type: "int"
+    name: "use_quote_delim"
+    type: "bool"
     default_value {
-      i: 0
+      b: true
     }
   }
   attr {
-    name: "seed2"
-    type: "int"
+    name: "na_value"
+    type: "string"
     default_value {
-      i: 0
+      s: ""
     }
   }
 }
 op {
-  name: "FixedUnigramCandidateSampler"
+  name: "DecodeCSV"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
+    name: "records"
+    type: DT_STRING
   }
-  output_arg {
-    name: "true_expected_count"
-    type: DT_FLOAT
+  input_arg {
+    name: "record_defaults"
+    type_list_attr: "OUT_TYPE"
   }
   output_arg {
-    name: "sampled_expected_count"
-    type: DT_FLOAT
+    name: "output"
+    type_list_attr: "OUT_TYPE"
   }
   attr {
-    name: "num_true"
-    type: "int"
+    name: "OUT_TYPE"
+    type: "list(type)"
     has_minimum: true
     minimum: 1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
   }
   attr {
-    name: "num_sampled"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "field_delim"
+    type: "string"
+    default_value {
+      s: ","
+    }
   }
   attr {
-    name: "unique"
+    name: "use_quote_delim"
     type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "range_max"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "vocab_file"
+    name: "na_value"
     type: "string"
     default_value {
       s: ""
     }
   }
-  attr {
-    name: "distortion"
-    type: "float"
-    default_value {
-      f: 1
-    }
+}
+op {
+  name: "DecodeGif"
+  input_arg {
+    name: "contents"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "image"
+    type: DT_UINT8
+  }
+}
+op {
+  name: "DecodeJSONExample"
+  input_arg {
+    name: "json_examples"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "binary_examples"
+    type: DT_STRING
+  }
+}
+op {
+  name: "DecodeJpeg"
+  input_arg {
+    name: "contents"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "image"
+    type: DT_UINT8
   }
   attr {
-    name: "num_reserved_ids"
+    name: "channels"
     type: "int"
     default_value {
       i: 0
     }
   }
   attr {
-    name: "num_shards"
+    name: "ratio"
     type: "int"
     default_value {
       i: 1
     }
-    has_minimum: true
-    minimum: 1
   }
   attr {
-    name: "shard"
-    type: "int"
+    name: "fancy_upscaling"
+    type: "bool"
     default_value {
-      i: 0
+      b: true
     }
-    has_minimum: true
   }
   attr {
-    name: "unigrams"
-    type: "list(float)"
+    name: "try_recover_truncated"
+    type: "bool"
     default_value {
-      list {
-      }
+      b: false
     }
   }
   attr {
-    name: "seed"
-    type: "int"
+    name: "acceptable_fraction"
+    type: "float"
     default_value {
-      i: 0
+      f: 1
     }
   }
   attr {
-    name: "seed2"
-    type: "int"
+    name: "dct_method"
+    type: "string"
     default_value {
-      i: 0
+      s: ""
     }
   }
-  is_stateful: true
-}
-op {
-  name: "FlatMapDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "f"
-    type: "func"
-  }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
 }
 op {
-  name: "FlatMapDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "DecodePng"
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "contents"
+    type: DT_STRING
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "f"
-    type: "func"
-  }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "image"
+    type_attr: "dtype"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "channels"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_UINT8
+    }
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_UINT16
+      }
+    }
   }
 }
 op {
-  name: "Floor"
+  name: "DecodeRaw"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "bytes"
+    type: DT_STRING
   }
   output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "output"
+    type_attr: "out_type"
   }
   attr {
-    name: "T"
+    name: "out_type"
     type: "type"
     allowed_values {
       list {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "little_endian"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
 }
 op {
-  name: "FloorDiv"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "DecodeRaw"
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "bytes"
+    type: DT_STRING
   }
   output_arg {
-    name: "z"
-    type_attr: "T"
+    name: "output"
+    type_attr: "out_type"
   }
   attr {
-    name: "T"
+    name: "out_type"
     type: "type"
     allowed_values {
       list {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_UINT8
-        type: DT_INT8
+        type: DT_INT32
         type: DT_UINT16
+        type: DT_UINT8
         type: DT_INT16
-        type: DT_INT32
+        type: DT_INT8
         type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
-}
-op {
-  name: "FloorMod"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
+    name: "little_endian"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
 }
 op {
-  name: "FractionalAvgPool"
+  name: "DecodeWav"
   input_arg {
-    name: "value"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "contents"
+    type: DT_STRING
   }
   output_arg {
-    name: "row_pooling_sequence"
-    type: DT_INT64
+    name: "audio"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "col_pooling_sequence"
-    type: DT_INT64
-  }
-  attr {
-    name: "pooling_ratio"
-    type: "list(float)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "pseudo_random"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "overlapping"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "deterministic"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "sample_rate"
+    type: DT_INT32
   }
   attr {
-    name: "seed"
+    name: "desired_channels"
     type: "int"
     default_value {
-      i: 0
+      i: -1
     }
   }
   attr {
-    name: "seed2"
+    name: "desired_samples"
     type: "int"
     default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-      }
+      i: -1
     }
   }
 }
 op {
-  name: "FractionalAvgPoolGrad"
+  name: "DeleteSessionTensor"
   input_arg {
-    name: "orig_input_tensor_shape"
-    type: DT_INT64
+    name: "handle"
+    type: DT_STRING
   }
+}
+op {
+  name: "DenseToDenseSetOperation"
   input_arg {
-    name: "out_backprop"
+    name: "set1"
     type_attr: "T"
   }
   input_arg {
-    name: "row_pooling_sequence"
-    type: DT_INT64
+    name: "set2"
+    type_attr: "T"
   }
-  input_arg {
-    name: "col_pooling_sequence"
+  output_arg {
+    name: "result_indices"
     type: DT_INT64
   }
   output_arg {
-    name: "output"
+    name: "result_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "result_shape"
+    type: DT_INT64
+  }
   attr {
-    name: "overlapping"
+    name: "set_operation"
+    type: "string"
+  }
+  attr {
+    name: "validate_indices"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
   attr {
@@ -12841,117 +12711,119 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "FractionalMaxPool"
+  name: "DenseToSparseBatchDataset"
   input_arg {
-    name: "value"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
   }
-  output_arg {
-    name: "row_pooling_sequence"
+  input_arg {
+    name: "row_shape"
     type: DT_INT64
   }
   output_arg {
-    name: "col_pooling_sequence"
-    type: DT_INT64
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "pooling_ratio"
-    type: "list(float)"
+    name: "output_types"
+    type: "list(type)"
     has_minimum: true
-    minimum: 4
+    minimum: 1
   }
   attr {
-    name: "pseudo_random"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
-  attr {
-    name: "overlapping"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  is_stateful: true
+}
+op {
+  name: "DenseToSparseBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
   }
-  attr {
-    name: "deterministic"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "row_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "FractionalMaxPoolGrad"
+  name: "DenseToSparseSetOperation"
   input_arg {
-    name: "orig_input"
+    name: "set1"
     type_attr: "T"
   }
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "set2_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "out_backprop"
+    name: "set2_values"
     type_attr: "T"
   }
   input_arg {
-    name: "row_pooling_sequence"
+    name: "set2_shape"
     type: DT_INT64
   }
-  input_arg {
-    name: "col_pooling_sequence"
+  output_arg {
+    name: "result_indices"
     type: DT_INT64
   }
   output_arg {
-    name: "output"
+    name: "result_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "result_shape"
+    type: DT_INT64
+  }
   attr {
-    name: "overlapping"
+    name: "set_operation"
+    type: "string"
+  }
+  attr {
+    name: "validate_indices"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
   attr {
@@ -12959,71 +12831,57 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "FusedBatchNorm"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "scale"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "offset"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "mean"
-    type_attr: "T"
-  }
+  name: "DepthToSpace"
   input_arg {
-    name: "variance"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "batch_mean"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
   }
-  output_arg {
-    name: "batch_variance"
-    type_attr: "T"
+  attr {
+    name: "block_size"
+    type: "int"
+    has_minimum: true
+    minimum: 2
   }
-  output_arg {
-    name: "reserve_space_1"
+}
+op {
+  name: "DepthToSpace"
+  input_arg {
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "reserve_space_2"
+    name: "output"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
-    }
   }
   attr {
-    name: "epsilon"
-    type: "float"
-    default_value {
-      f: 0.0001
-    }
+    name: "block_size"
+    type: "int"
+    has_minimum: true
+    minimum: 2
   }
   attr {
     name: "data_format"
@@ -13031,55 +12889,66 @@ op {
     default_value {
       s: "NHWC"
     }
-  }
-  attr {
-    name: "is_training"
-    type: "bool"
-    default_value {
-      b: true
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
+      }
     }
   }
 }
 op {
-  name: "FusedBatchNormGrad"
-  input_arg {
-    name: "y_backprop"
-    type_attr: "T"
-  }
+  name: "DepthwiseConv2dNative"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "scale"
+    name: "filter"
     type_attr: "T"
   }
-  input_arg {
-    name: "reserve_space_1"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "reserve_space_2"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  output_arg {
-    name: "x_backprop"
-    type_attr: "T"
+  attr {
+    name: "strides"
+    type: "list(int)"
   }
-  output_arg {
-    name: "scale_backprop"
-    type_attr: "T"
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
-  output_arg {
-    name: "offset_backprop"
+}
+op {
+  name: "DepthwiseConv2dNative"
+  input_arg {
+    name: "input"
     type_attr: "T"
   }
-  output_arg {
-    name: "reserve_space_3"
+  input_arg {
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
-    name: "reserve_space_4"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -13088,14 +12957,22 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "epsilon"
-    type: "float"
-    default_value {
-      f: 0.0001
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
   attr {
@@ -13104,164 +12981,163 @@ op {
     default_value {
       s: "NHWC"
     }
-  }
-  attr {
-    name: "is_training"
-    type: "bool"
-    default_value {
-      b: true
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
 }
 op {
-  name: "FusedBatchNormGradV2"
+  name: "DepthwiseConv2dNative"
   input_arg {
-    name: "y_backprop"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "x"
+    name: "filter"
     type_attr: "T"
   }
-  input_arg {
-    name: "scale"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "reserve_space_1"
-    type_attr: "U"
-  }
-  input_arg {
-    name: "reserve_space_2"
-    type_attr: "U"
-  }
   output_arg {
-    name: "x_backprop"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "scale_backprop"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "offset_backprop"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "reserve_space_3"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "reserve_space_4"
-    type_attr: "U"
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "U"
-    type: "type"
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-  attr {
-    name: "epsilon"
-    type: "float"
-    default_value {
-      f: 0.0001
-    }
-  }
   attr {
     name: "data_format"
     type: "string"
     default_value {
       s: "NHWC"
     }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
-    name: "is_training"
-    type: "bool"
+    name: "dilations"
+    type: "list(int)"
     default_value {
-      b: true
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
     }
   }
 }
 op {
-  name: "FusedBatchNormV2"
+  name: "DepthwiseConv2dNativeBackpropFilter"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "scale"
-    type_attr: "U"
-  }
-  input_arg {
-    name: "offset"
-    type_attr: "U"
-  }
-  input_arg {
-    name: "mean"
-    type_attr: "U"
+    name: "filter_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "variance"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "y"
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "batch_mean"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "batch_variance"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "reserve_space_1"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "reserve_space_2"
-    type_attr: "U"
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "U"
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
+op {
+  name: "DepthwiseConv2dNativeBackpropFilter"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "filter_sizes"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "epsilon"
-    type: "float"
-    default_value {
-      f: 0.0001
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
   attr {
@@ -13270,27 +13146,26 @@ op {
     default_value {
       s: "NHWC"
     }
-  }
-  attr {
-    name: "is_training"
-    type: "bool"
-    default_value {
-      b: true
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
 }
 op {
-  name: "FusedPadConv2D"
+  name: "DepthwiseConv2dNativeBackpropFilter"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "paddings"
+    name: "filter_sizes"
     type: DT_INT32
   }
   input_arg {
-    name: "filter"
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
@@ -13302,17 +13177,9 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
-      }
-    }
-  }
-  attr {
-    name: "mode"
-    type: "string"
-    allowed_values {
-      list {
-        s: "REFLECT"
-        s: "SYMMETRIC"
+        type: DT_DOUBLE
       }
     }
   }
@@ -13330,23 +13197,44 @@ op {
       }
     }
   }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
-  name: "FusedResizeAndPadConv2D"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
+  name: "DepthwiseConv2dNativeBackpropInput"
   input_arg {
-    name: "size"
+    name: "input_sizes"
     type: DT_INT32
   }
   input_arg {
-    name: "paddings"
-    type: DT_INT32
+    name: "filter"
+    type_attr: "T"
   }
   input_arg {
-    name: "filter"
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
@@ -13359,23 +13247,50 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "resize_align_corners"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "mode"
+    name: "padding"
     type: "string"
     allowed_values {
       list {
-        s: "REFLECT"
-        s: "SYMMETRIC"
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
+op {
+  name: "DepthwiseConv2dNativeBackpropInput"
+  input_arg {
+    name: "input_sizes"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
@@ -13393,274 +13308,318 @@ op {
       }
     }
   }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
 }
 op {
-  name: "Gather"
+  name: "DepthwiseConv2dNativeBackpropInput"
   input_arg {
-    name: "params"
-    type_attr: "Tparams"
+    name: "input_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "Tparams"
+    type_attr: "T"
   }
   attr {
-    name: "validate_indices"
-    type: "bool"
-    default_value {
-      b: true
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "Tparams"
-    type: "type"
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "Tindices"
-    type: "type"
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
       }
     }
   }
 }
 op {
-  name: "GatherNd"
+  name: "Dequantize"
   input_arg {
-    name: "params"
-    type_attr: "Tparams"
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "min_range"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_range"
+    type: DT_FLOAT
   }
   output_arg {
     name: "output"
-    type_attr: "Tparams"
+    type: DT_FLOAT
   }
   attr {
-    name: "Tparams"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
   }
   attr {
-    name: "Tindices"
-    type: "type"
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
       }
     }
   }
 }
 op {
-  name: "GatherV2"
+  name: "Dequantize"
   input_arg {
-    name: "params"
-    type_attr: "Tparams"
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "min_range"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "axis"
-    type_attr: "Taxis"
+    name: "max_range"
+    type: DT_FLOAT
   }
   output_arg {
     name: "output"
-    type_attr: "Tparams"
-  }
-  attr {
-    name: "Tparams"
-    type: "type"
+    type: DT_FLOAT
   }
   attr {
-    name: "Tindices"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
       }
     }
   }
   attr {
-    name: "Taxis"
-    type: "type"
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+        s: "SCALED"
       }
     }
   }
 }
 op {
-  name: "GenerateVocabRemapping"
+  name: "DeserializeIterator"
   input_arg {
-    name: "new_vocab_file"
-    type: DT_STRING
+    name: "resource_handle"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "old_vocab_file"
+    name: "serialized"
+    type: DT_VARIANT
+  }
+  is_stateful: true
+}
+op {
+  name: "DeserializeManySparse"
+  input_arg {
+    name: "serialized_sparse"
     type: DT_STRING
   }
   output_arg {
-    name: "remapping"
+    name: "sparse_indices"
     type: DT_INT64
   }
   output_arg {
-    name: "num_present"
-    type: DT_INT32
+    name: "sparse_values"
+    type_attr: "dtype"
   }
-  attr {
-    name: "new_vocab_offset"
-    type: "int"
-    has_minimum: true
+  output_arg {
+    name: "sparse_shape"
+    type: DT_INT64
   }
   attr {
-    name: "num_new_vocab"
-    type: "int"
-    has_minimum: true
+    name: "dtype"
+    type: "type"
   }
 }
 op {
-  name: "GenerateVocabRemapping"
-  input_arg {
-    name: "new_vocab_file"
-    type: DT_STRING
-  }
+  name: "DeserializeSparse"
   input_arg {
-    name: "old_vocab_file"
+    name: "serialized_sparse"
     type: DT_STRING
   }
   output_arg {
-    name: "remapping"
+    name: "sparse_indices"
     type: DT_INT64
   }
   output_arg {
-    name: "num_present"
-    type: DT_INT32
-  }
-  attr {
-    name: "new_vocab_offset"
-    type: "int"
-    has_minimum: true
+    name: "sparse_values"
+    type_attr: "dtype"
   }
-  attr {
-    name: "num_new_vocab"
-    type: "int"
-    has_minimum: true
+  output_arg {
+    name: "sparse_shape"
+    type: DT_INT64
   }
   attr {
-    name: "old_vocab_size"
-    type: "int"
-    default_value {
-      i: -1
-    }
-    has_minimum: true
-    minimum: -1
+    name: "dtype"
+    type: "type"
   }
 }
 op {
-  name: "GetSessionHandle"
+  name: "DeserializeSparse"
   input_arg {
-    name: "value"
-    type_attr: "T"
+    name: "serialized_sparse"
+    type_attr: "Tserialized"
   }
   output_arg {
-    name: "handle"
-    type: DT_STRING
-  }
-  attr {
-    name: "T"
-    type: "type"
+    name: "sparse_indices"
+    type: DT_INT64
   }
-}
-op {
-  name: "GetSessionHandle"
-  input_arg {
-    name: "value"
-    type_attr: "T"
+  output_arg {
+    name: "sparse_values"
+    type_attr: "dtype"
   }
   output_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "sparse_shape"
+    type: DT_INT64
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
   }
-  deprecation {
-    version: 23
-  }
-}
-op {
-  name: "GetSessionHandle"
-  input_arg {
-    name: "value"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-  }
   attr {
-    name: "T"
+    name: "Tserialized"
     type: "type"
+    default_value {
+      type: DT_STRING
+    }
+    allowed_values {
+      list {
+        type: DT_STRING
+        type: DT_VARIANT
+      }
+    }
   }
 }
 op {
-  name: "GetSessionHandleV2"
+  name: "DestroyResourceOp"
   input_arg {
-    name: "value"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "handle"
+    name: "resource"
     type: DT_RESOURCE
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ignore_lookup_error"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   is_stateful: true
 }
 op {
-  name: "GetSessionTensor"
+  name: "DestroyTemporaryVariable"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   output_arg {
     name: "value"
-    type_attr: "dtype"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
   }
+  attr {
+    name: "var_name"
+    type: "string"
+  }
 }
 op {
-  name: "Greater"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "Diag"
   input_arg {
-    name: "y"
+    name: "diagonal"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -13671,62 +13630,47 @@ op {
         type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Greater"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "Diag"
   input_arg {
-    name: "y"
+    name: "diagonal"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "GreaterEqual"
+  name: "DiagPart"
   input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "y"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "diagonal"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -13737,178 +13681,201 @@ op {
         type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "GreaterEqual"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "DiagPart"
   input_arg {
-    name: "y"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "diagonal"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "GroupByWindowDataset"
+  name: "Digamma"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "x"
+    type_attr: "T"
   }
-  input_arg {
-    name: "key_func_other_arguments"
-    type_list_attr: "Tkey_func_other_arguments"
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
-  input_arg {
-    name: "reduce_func_other_arguments"
-    type_list_attr: "Treduce_func_other_arguments"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
+}
+op {
+  name: "Digamma"
   input_arg {
-    name: "window_size_func_other_arguments"
-    type_list_attr: "Twindow_size_func_other_arguments"
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "key_func"
-    type: "func"
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  attr {
-    name: "reduce_func"
-    type: "func"
+}
+op {
+  name: "Dilation2D"
+  input_arg {
+    name: "input"
+    type_attr: "T"
   }
-  attr {
-    name: "window_size_func"
-    type: "func"
+  input_arg {
+    name: "filter"
+    type_attr: "T"
   }
-  attr {
-    name: "Tkey_func_other_arguments"
-    type: "list(type)"
-    has_minimum: true
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "Treduce_func_other_arguments"
-    type: "list(type)"
-    has_minimum: true
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
   }
   attr {
-    name: "Twindow_size_func_other_arguments"
-    type: "list(type)"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
+    name: "rates"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
+    minimum: 4
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "GroupByWindowDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "key_func_other_arguments"
-    type_list_attr: "Tkey_func_other_arguments"
-  }
+  name: "Dilation2D"
   input_arg {
-    name: "reduce_func_other_arguments"
-    type_list_attr: "Treduce_func_other_arguments"
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "window_size_func_other_arguments"
-    type_list_attr: "Twindow_size_func_other_arguments"
+    name: "filter"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "key_func"
-    type: "func"
-  }
-  attr {
-    name: "reduce_func"
-    type: "func"
-  }
-  attr {
-    name: "window_size_func"
-    type: "func"
-  }
-  attr {
-    name: "Tkey_func_other_arguments"
-    type: "list(type)"
-    has_minimum: true
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "Treduce_func_other_arguments"
-    type: "list(type)"
-    has_minimum: true
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "Twindow_size_func_other_arguments"
-    type: "list(type)"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
+    name: "rates"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
+    minimum: 4
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
 }
 op {
-  name: "HSVToRGB"
+  name: "Dilation2D"
   input_arg {
-    name: "images"
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
@@ -13918,156 +13885,185 @@ op {
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-}
-op {
-  name: "HashTable"
-  output_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "shared_name"
+    name: "padding"
     type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
-  attr {
-    name: "use_node_name_sharing"
-    type: "bool"
-    default_value {
-      b: false
-    }
+}
+op {
+  name: "Dilation2DBackpropFilter"
+  input_arg {
+    name: "input"
+    type_attr: "T"
   }
-  attr {
-    name: "key_dtype"
-    type: "type"
+  input_arg {
+    name: "filter"
+    type_attr: "T"
   }
-  attr {
-    name: "value_dtype"
-    type: "type"
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "HashTableV2"
   output_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "filter_backprop"
+    type_attr: "T"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
     }
   }
   attr {
-    name: "use_node_name_sharing"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "key_dtype"
-    type: "type"
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "value_dtype"
-    type: "type"
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "HistogramFixedWidth"
+  name: "Dilation2DBackpropFilter"
   input_arg {
-    name: "values"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "value_range"
+    name: "filter"
     type_attr: "T"
   }
   input_arg {
-    name: "nbins"
-    type: DT_INT32
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "out"
-    type_attr: "dtype"
+    name: "filter_backprop"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "dtype"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "HistogramSummary"
+  name: "Dilation2DBackpropFilter"
   input_arg {
-    name: "tag"
-    type: DT_STRING
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "values"
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "summary"
-    type: DT_STRING
+    name: "filter_backprop"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
@@ -14079,30 +14075,56 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "HistogramSummary"
+  name: "Dilation2DBackpropInput"
   input_arg {
-    name: "tag"
-    type: DT_STRING
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "values"
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "summary"
-    type: DT_STRING
+    name: "in_backprop"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
@@ -14114,175 +14136,195 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "IFFT"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
 }
 op {
-  name: "IFFT2D"
+  name: "Dilation2DBackpropInput"
   input_arg {
     name: "input"
-    type: DT_COMPLEX64
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-}
-op {
-  name: "IFFT3D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-}
-op {
-  name: "IRFFT"
   input_arg {
-    name: "input"
-    type: DT_COMPLEX64
+    name: "filter"
+    type_attr: "T"
   }
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_FLOAT
+    name: "in_backprop"
+    type_attr: "T"
   }
-}
-op {
-  name: "IRFFT2D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
-  input_arg {
-    name: "fft_length"
-    type: DT_INT32
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "output"
-    type: DT_FLOAT
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
 }
 op {
-  name: "IRFFT3D"
+  name: "Dilation2DBackpropInput"
   input_arg {
     name: "input"
-    type: DT_COMPLEX64
+    type_attr: "T"
   }
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
-}
-op {
-  name: "Identity"
   input_arg {
-    name: "input"
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "in_backprop"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-}
-op {
-  name: "IdentityN"
-  input_arg {
-    name: "input"
-    type_list_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_list_attr: "T"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "T"
-    type: "list(type)"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "IdentityReader"
-  output_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
+    minimum: 4
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "shared_name"
+    name: "padding"
     type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "IdentityReaderV2"
-  output_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
+  name: "Div"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Igamma"
+  name: "Div"
   input_arg {
-    name: "a"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "x"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
@@ -14294,544 +14336,515 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Igammac"
+  name: "DrawBoundingBoxes"
   input_arg {
-    name: "a"
+    name: "images"
     type_attr: "T"
   }
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "boxes"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "IgnoreErrorsDataset"
+  name: "DynamicPartition"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "partitions"
+    type: DT_INT32
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "outputs"
+    type_attr: "T"
+    number_attr: "num_partitions"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
+    name: "num_partitions"
+    type: "int"
     has_minimum: true
     minimum: 1
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
   }
-  is_stateful: true
 }
 op {
-  name: "IgnoreErrorsDataset"
+  name: "DynamicStitch"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "data"
+    type_attr: "T"
+    number_attr: "N"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "merged"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
+    name: "N"
+    type: "int"
     has_minimum: true
     minimum: 1
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
   }
 }
 op {
-  name: "Imag"
+  name: "EagerPyFunc"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type_list_attr: "Tin"
   }
   output_arg {
     name: "output"
-    type_attr: "Tout"
+    type_list_attr: "Tout"
   }
   attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_COMPLEX64
-    }
-    allowed_values {
-      list {
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+    name: "token"
+    type: "string"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
   }
   attr {
     name: "Tout"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+    type: "list(type)"
+    has_minimum: true
   }
+  is_stateful: true
 }
 op {
-  name: "ImageSummary"
+  name: "EditDistance"
   input_arg {
-    name: "tag"
-    type: DT_STRING
+    name: "hypothesis_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "tensor"
+    name: "hypothesis_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "hypothesis_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "truth_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "truth_values"
     type_attr: "T"
   }
+  input_arg {
+    name: "truth_shape"
+    type: DT_INT64
+  }
   output_arg {
-    name: "summary"
-    type: DT_STRING
+    name: "output"
+    type: DT_FLOAT
   }
   attr {
-    name: "max_images"
-    type: "int"
+    name: "normalize"
+    type: "bool"
     default_value {
-      i: 3
+      b: true
     }
-    has_minimum: true
-    minimum: 1
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_UINT8
-        type: DT_FLOAT
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "bad_color"
-    type: "tensor"
-    default_value {
-      tensor {
-        dtype: DT_UINT8
-        tensor_shape {
-          dim {
-            size: 4
-          }
-        }
-        int_val: 255
-        int_val: 0
-        int_val: 0
-        int_val: 255
-      }
-    }
   }
 }
 op {
-  name: "ImageSummary"
-  input_arg {
-    name: "tag"
-    type: DT_STRING
-  }
+  name: "Elu"
   input_arg {
-    name: "tensor"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "summary"
-    type: DT_STRING
-  }
-  attr {
-    name: "max_images"
-    type: "int"
-    default_value {
-      i: 3
-    }
-    has_minimum: true
-    minimum: 1
+    name: "activations"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
-        type: DT_UINT8
         type: DT_FLOAT
-        type: DT_HALF
         type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "bad_color"
-    type: "tensor"
-    default_value {
-      tensor {
-        dtype: DT_UINT8
-        tensor_shape {
-          dim {
-            size: 4
-          }
-        }
-        int_val: 255
-        int_val: 0
-        int_val: 0
-        int_val: 255
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "ImmutableConst"
+  name: "Elu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
   output_arg {
-    name: "tensor"
-    type_attr: "dtype"
+    name: "activations"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
-  }
-  attr {
-    name: "shape"
-    type: "shape"
-  }
-  attr {
-    name: "memory_region_name"
-    type: "string"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
 }
 op {
-  name: "InTopK"
-  input_arg {
-    name: "predictions"
-    type: DT_FLOAT
-  }
+  name: "Elu"
   input_arg {
-    name: "targets"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "precision"
-    type: DT_BOOL
-  }
-  attr {
-    name: "k"
-    type: "int"
+    name: "activations"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "InTopKV2"
-  input_arg {
-    name: "predictions"
-    type: DT_FLOAT
-  }
+  name: "EluGrad"
   input_arg {
-    name: "targets"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "k"
+    name: "outputs"
     type_attr: "T"
   }
   output_arg {
-    name: "precision"
-    type: DT_BOOL
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "InitializeTable"
+  name: "EluGrad"
   input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "keys"
-    type_attr: "Tkey"
+    name: "outputs"
+    type_attr: "T"
   }
-  input_arg {
-    name: "values"
-    type_attr: "Tval"
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "Tkey"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "EluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "outputs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "Tval"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
 }
 op {
-  name: "InitializeTableFromTextFile"
+  name: "EncodeBase64"
   input_arg {
-    name: "table_handle"
+    name: "input"
     type: DT_STRING
-    is_ref: true
   }
-  input_arg {
-    name: "filename"
+  output_arg {
+    name: "output"
     type: DT_STRING
   }
   attr {
-    name: "key_index"
-    type: "int"
-    has_minimum: true
-    minimum: -2
+    name: "pad"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "EncodeJpeg"
+  input_arg {
+    name: "image"
+    type: DT_UINT8
+  }
+  output_arg {
+    name: "contents"
+    type: DT_STRING
   }
   attr {
-    name: "value_index"
-    type: "int"
-    has_minimum: true
-    minimum: -2
+    name: "format"
+    type: "string"
+    default_value {
+      s: ""
+    }
+    allowed_values {
+      list {
+        s: ""
+        s: "grayscale"
+        s: "rgb"
+      }
+    }
   }
   attr {
-    name: "vocab_size"
+    name: "quality"
     type: "int"
     default_value {
-      i: -1
+      i: 95
     }
-    has_minimum: true
-    minimum: -1
   }
   attr {
-    name: "delimiter"
-    type: "string"
+    name: "progressive"
+    type: "bool"
     default_value {
-      s: "\t"
+      b: false
     }
   }
-}
-op {
-  name: "InitializeTableFromTextFileV2"
-  input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
+  attr {
+    name: "optimize_size"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
-  input_arg {
-    name: "filename"
-    type: DT_STRING
+  attr {
+    name: "chroma_downsampling"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "key_index"
-    type: "int"
-    has_minimum: true
-    minimum: -2
+    name: "density_unit"
+    type: "string"
+    default_value {
+      s: "in"
+    }
+    allowed_values {
+      list {
+        s: "in"
+        s: "cm"
+      }
+    }
   }
   attr {
-    name: "value_index"
+    name: "x_density"
     type: "int"
-    has_minimum: true
-    minimum: -2
+    default_value {
+      i: 300
+    }
   }
   attr {
-    name: "vocab_size"
+    name: "y_density"
     type: "int"
     default_value {
-      i: -1
+      i: 300
     }
-    has_minimum: true
-    minimum: -1
   }
   attr {
-    name: "delimiter"
+    name: "xmp_metadata"
     type: "string"
     default_value {
-      s: "\t"
+      s: ""
     }
   }
-  is_stateful: true
 }
 op {
-  name: "InitializeTableV2"
-  input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
+  name: "EncodePng"
   input_arg {
-    name: "keys"
-    type_attr: "Tkey"
+    name: "image"
+    type_attr: "T"
   }
-  input_arg {
-    name: "values"
-    type_attr: "Tval"
+  output_arg {
+    name: "contents"
+    type: DT_STRING
   }
   attr {
-    name: "Tkey"
-    type: "type"
+    name: "compression"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
   attr {
-    name: "Tval"
+    name: "T"
     type: "type"
+    default_value {
+      type: DT_UINT8
+    }
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_UINT16
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "InterleaveDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
+  name: "EncodeWav"
   input_arg {
-    name: "cycle_length"
-    type: DT_INT64
+    name: "audio"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "block_length"
-    type: DT_INT64
+    name: "sample_rate"
+    type: DT_INT32
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "f"
-    type: "func"
-  }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "contents"
+    type: DT_STRING
   }
-  is_stateful: true
 }
 op {
-  name: "InterleaveDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
-  input_arg {
-    name: "cycle_length"
-    type: DT_INT64
-  }
+  name: "Enter"
   input_arg {
-    name: "block_length"
-    type: DT_INT64
+    name: "data"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "f"
-    type: "func"
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "frame_name"
+    type: "string"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "is_constant"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "parallel_iterations"
+    type: "int"
+    default_value {
+      i: 10
+    }
   }
 }
 op {
-  name: "Inv"
+  name: "Equal"
   input_arg {
     name: "x"
     type_attr: "T"
   }
-  output_arg {
+  input_arg {
     name: "y"
     type_attr: "T"
   }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
   attr {
     name: "T"
     type: "type"
@@ -14840,19 +14853,25 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
         type: DT_COMPLEX64
+        type: DT_QUINT8
+        type: DT_QINT8
+        type: DT_QINT32
+        type: DT_STRING
+        type: DT_BOOL
         type: DT_COMPLEX128
       }
     }
   }
-  deprecation {
-    version: 17
-  }
+  is_commutative: true
 }
 op {
-  name: "InvGrad"
+  name: "Equal"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -14863,7 +14882,7 @@ op {
   }
   output_arg {
     name: "z"
-    type_attr: "T"
+    type: DT_BOOL
   }
   attr {
     name: "T"
@@ -14871,29 +14890,34 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
         type: DT_COMPLEX64
+        type: DT_QUINT8
+        type: DT_QINT8
+        type: DT_QINT32
+        type: DT_STRING
+        type: DT_BOOL
         type: DT_COMPLEX128
       }
     }
   }
-  deprecation {
-    version: 17
-  }
+  is_commutative: true
 }
 op {
-  name: "InvGrad"
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
+  name: "Erf"
   input_arg {
-    name: "dy"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -14904,17 +14928,12 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
-  deprecation {
-    version: 17
-  }
 }
 op {
-  name: "Invert"
+  name: "Erf"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -14928,18 +14947,16 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "Invert"
+  name: "Erfc"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -14953,20 +14970,15 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "InvertPermutation"
+  name: "Erfc"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -14978,26 +14990,40 @@ op {
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "IsFinite"
+  name: "Exit"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "Exp"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    type: DT_BOOL
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -15007,19 +15033,21 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "IsInf"
+  name: "Exp"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    type: DT_BOOL
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -15027,21 +15055,56 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "IsNan"
+  name: "ExpandDims"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dim"
+    type_attr: "Tdim"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tdim"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Expm1"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    type: DT_BOOL
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -15051,155 +15114,219 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "IsVariableInitialized"
+  name: "Expm1"
   input_arg {
-    name: "ref"
-    type_attr: "dtype"
-    is_ref: true
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "is_initialized"
-    type: DT_BOOL
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "Iterator"
-  output_arg {
-    name: "handle"
-    type: DT_RESOURCE
+  name: "ExtractGlimpse"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
   }
-  attr {
-    name: "shared_name"
-    type: "string"
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "offsets"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "glimpse"
+    type: DT_FLOAT
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "centered"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "normalized"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "uniform_noise"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "IteratorFromStringHandle"
+  name: "ExtractImagePatches"
   input_arg {
-    name: "string_handle"
-    type: DT_STRING
+    name: "images"
+    type_attr: "T"
   }
   output_arg {
-    name: "resource_handle"
-    type: DT_RESOURCE
+    name: "patches"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "IteratorFromStringHandle"
-  input_arg {
-    name: "string_handle"
-    type: DT_STRING
+  attr {
+    name: "ksizes"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "resource_handle"
-    type: DT_RESOURCE
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    default_value {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
-    has_minimum: true
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    default_value {
+    name: "padding"
+    type: "string"
+    allowed_values {
       list {
+        s: "SAME"
+        s: "VALID"
       }
     }
-    has_minimum: true
   }
-  is_stateful: true
 }
 op {
-  name: "IteratorGetNext"
+  name: "ExtractImagePatches"
   input_arg {
-    name: "iterator"
-    type: DT_RESOURCE
+    name: "images"
+    type_attr: "T"
   }
   output_arg {
-    name: "components"
-    type_list_attr: "output_types"
+    name: "patches"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
+    name: "ksizes"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
+    minimum: 4
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
-}
-op {
-  name: "IteratorSetStatsAggregator"
-  input_arg {
-    name: "iterator_handle"
-    type: DT_RESOURCE
+    minimum: 4
   }
-  input_arg {
-    name: "stats_aggregator_handle"
-    type: DT_RESOURCE
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  is_stateful: true
-}
-op {
-  name: "IteratorToStringHandle"
-  input_arg {
-    name: "resource_handle"
-    type: DT_RESOURCE
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
-  output_arg {
-    name: "string_handle"
-    type: DT_STRING
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "L2Loss"
+  name: "ExtractImagePatches"
   input_arg {
-    name: "t"
+    name: "images"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "patches"
     type_attr: "T"
   }
+  attr {
+    name: "ksizes"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
   attr {
     name: "T"
     type: "type"
@@ -15207,40 +15334,116 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "L2Loss"
+  name: "ExtractJpegShape"
   input_arg {
-    name: "t"
-    type_attr: "T"
+    name: "contents"
+    type: DT_STRING
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "image_shape"
+    type_attr: "output_type"
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "LMDBReader"
+  name: "FFT"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
+  }
   output_arg {
-    name: "reader_handle"
+    name: "output"
+    type: DT_COMPLEX64
+  }
+}
+op {
+  name: "FFT2D"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
+  }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+}
+op {
+  name: "FFT3D"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
+  }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+}
+op {
+  name: "FIFOQueue"
+  output_arg {
+    name: "handle"
     type: DT_STRING
     is_ref: true
   }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
   attr {
     name: "container"
     type: "string"
@@ -15258,1065 +15461,678 @@ op {
   is_stateful: true
 }
 op {
-  name: "LRN"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
+  name: "FIFOQueueV2"
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "depth_radius"
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
     type: "int"
     default_value {
-      i: 5
+      i: -1
     }
   }
   attr {
-    name: "bias"
-    type: "float"
+    name: "container"
+    type: "string"
     default_value {
-      f: 1
+      s: ""
     }
   }
   attr {
-    name: "alpha"
-    type: "float"
+    name: "shared_name"
+    type: "string"
     default_value {
-      f: 1
+      s: ""
     }
   }
+  is_stateful: true
+}
+op {
+  name: "Fact"
+  output_arg {
+    name: "fact"
+    type: DT_STRING
+  }
+}
+op {
+  name: "FakeQuantWithMinMaxArgs"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
+  }
   attr {
-    name: "beta"
+    name: "min"
     type: "float"
     default_value {
-      f: 0.5
+      f: -6
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "max"
+    type: "float"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_HALF
-      }
+      f: 6
     }
   }
 }
 op {
-  name: "LRNGrad"
-  input_arg {
-    name: "input_grads"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "input_image"
-    type_attr: "T"
-  }
+  name: "FakeQuantWithMinMaxArgs"
   input_arg {
-    name: "output_image"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "depth_radius"
-    type: "int"
+    name: "min"
+    type: "float"
     default_value {
-      i: 5
+      f: -6
     }
   }
   attr {
-    name: "bias"
+    name: "max"
     type: "float"
     default_value {
-      f: 1
+      f: 6
     }
   }
   attr {
-    name: "alpha"
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+}
+op {
+  name: "FakeQuantWithMinMaxArgs"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "min"
     type: "float"
     default_value {
-      f: 1
+      f: -6
     }
   }
   attr {
-    name: "beta"
+    name: "max"
     type: "float"
     default_value {
-      f: 0.5
+      f: 6
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "num_bits"
+    type: "int"
     default_value {
-      type: DT_FLOAT
+      i: 8
     }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_HALF
-      }
+  }
+  attr {
+    name: "narrow_range"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
 }
 op {
-  name: "LatencyStatsDataset"
+  name: "FakeQuantWithMinMaxArgsGradient"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "tag"
-    type: DT_STRING
+    name: "inputs"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "backprops"
+    type: DT_FLOAT
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "max"
+    type: "float"
+    default_value {
+      f: 6
+    }
   }
 }
 op {
-  name: "LearnedUnigramCandidateSampler"
+  name: "FakeQuantWithMinMaxArgsGradient"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
+    name: "gradients"
+    type: DT_FLOAT
   }
-  output_arg {
-    name: "true_expected_count"
+  input_arg {
+    name: "inputs"
     type: DT_FLOAT
   }
   output_arg {
-    name: "sampled_expected_count"
+    name: "backprops"
     type: DT_FLOAT
   }
   attr {
-    name: "num_true"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "num_sampled"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "unique"
-    type: "bool"
-  }
-  attr {
-    name: "range_max"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
+    }
   }
   attr {
-    name: "seed"
-    type: "int"
+    name: "max"
+    type: "float"
     default_value {
-      i: 0
+      f: 6
     }
   }
   attr {
-    name: "seed2"
+    name: "num_bits"
     type: "int"
     default_value {
-      i: 0
+      i: 8
     }
   }
 }
 op {
-  name: "LearnedUnigramCandidateSampler"
+  name: "FakeQuantWithMinMaxArgsGradient"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
+    name: "gradients"
+    type: DT_FLOAT
   }
-  output_arg {
-    name: "true_expected_count"
+  input_arg {
+    name: "inputs"
     type: DT_FLOAT
   }
   output_arg {
-    name: "sampled_expected_count"
+    name: "backprops"
     type: DT_FLOAT
   }
   attr {
-    name: "num_true"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
+    }
   }
   attr {
-    name: "num_sampled"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "unique"
-    type: "bool"
-  }
-  attr {
-    name: "range_max"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "max"
+    type: "float"
+    default_value {
+      f: 6
+    }
   }
   attr {
-    name: "seed"
+    name: "num_bits"
     type: "int"
     default_value {
-      i: 0
+      i: 8
     }
   }
   attr {
-    name: "seed2"
-    type: "int"
+    name: "narrow_range"
+    type: "bool"
     default_value {
-      i: 0
+      b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "LeftShift"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "FakeQuantWithMinMaxVars"
   input_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+    name: "inputs"
+    type: DT_FLOAT
   }
-  is_commutative: true
-}
-op {
-  name: "Less"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
+    name: "outputs"
+    type: DT_FLOAT
   }
 }
 op {
-  name: "Less"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "FakeQuantWithMinMaxVars"
   input_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type: DT_BOOL
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+    name: "inputs"
+    type: DT_FLOAT
   }
-}
-op {
-  name: "LessEqual"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
     }
   }
 }
 op {
-  name: "LessEqual"
+  name: "FakeQuantWithMinMaxVars"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
     }
   }
-}
-op {
-  name: "Lgamma"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
+    name: "narrow_range"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
 }
 op {
-  name: "LinSpace"
+  name: "FakeQuantWithMinMaxVarsGradient"
   input_arg {
-    name: "start"
-    type_attr: "T"
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "stop"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "num"
-    type_attr: "Tidx"
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  output_arg {
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
 }
 op {
-  name: "ListDiff"
+  name: "FakeQuantWithMinMaxVarsGradient"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "out"
-    type_attr: "T"
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "idx"
-    type_attr: "out_idx"
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
   attr {
-    name: "out_idx"
-    type: "type"
+    name: "num_bits"
+    type: "int"
     default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      i: 8
     }
   }
 }
 op {
-  name: "LoadAndRemapMatrix"
+  name: "FakeQuantWithMinMaxVarsGradient"
   input_arg {
-    name: "ckpt_path"
-    type: DT_STRING
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "old_tensor_name"
-    type: DT_STRING
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "row_remapping"
-    type: DT_INT64
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "col_remapping"
-    type: DT_INT64
+    name: "max"
+    type: DT_FLOAT
   }
-  input_arg {
-    name: "initializing_values"
+  output_arg {
+    name: "backprops_wrt_input"
     type: DT_FLOAT
   }
   output_arg {
-    name: "output_matrix"
+    name: "backprop_wrt_min"
     type: DT_FLOAT
   }
-  attr {
-    name: "num_rows"
-    type: "int"
-    has_minimum: true
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
   attr {
-    name: "num_cols"
+    name: "num_bits"
     type: "int"
-    has_minimum: true
-    minimum: 1
+    default_value {
+      i: 8
+    }
   }
   attr {
-    name: "max_rows_in_memory"
-    type: "int"
+    name: "narrow_range"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Log"
+  name: "FakeQuantWithMinMaxVarsPerChannel"
   input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
   }
-}
-op {
-  name: "Log1p"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+    name: "outputs"
+    type: DT_FLOAT
   }
 }
 op {
-  name: "LogMatrixDeterminant"
+  name: "FakeQuantWithMinMaxVarsPerChannel"
   input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "sign"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "log_abs_determinant"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
   }
-}
-op {
-  name: "LogSoftmax"
   input_arg {
-    name: "logits"
-    type_attr: "T"
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "logsoftmax"
-    type_attr: "T"
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
     }
   }
 }
 op {
-  name: "LogUniformCandidateSampler"
+  name: "FakeQuantWithMinMaxVarsPerChannel"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
+    name: "inputs"
+    type: DT_FLOAT
   }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
   }
-  output_arg {
-    name: "true_expected_count"
+  input_arg {
+    name: "max"
     type: DT_FLOAT
   }
   output_arg {
-    name: "sampled_expected_count"
+    name: "outputs"
     type: DT_FLOAT
   }
   attr {
-    name: "num_true"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "num_sampled"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "unique"
-    type: "bool"
-  }
-  attr {
-    name: "range_max"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "seed"
+    name: "num_bits"
     type: "int"
     default_value {
-      i: 0
+      i: 8
     }
   }
   attr {
-    name: "seed2"
-    type: "int"
+    name: "narrow_range"
+    type: "bool"
     default_value {
-      i: 0
+      b: false
     }
   }
 }
 op {
-  name: "LogUniformCandidateSampler"
+  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "true_expected_count"
+    name: "gradients"
     type: DT_FLOAT
   }
-  output_arg {
-    name: "sampled_expected_count"
+  input_arg {
+    name: "inputs"
     type: DT_FLOAT
   }
-  attr {
-    name: "num_true"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "num_sampled"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "unique"
-    type: "bool"
-  }
-  attr {
-    name: "range_max"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  is_stateful: true
-}
-op {
-  name: "LogicalAnd"
   input_arg {
-    name: "x"
-    type: DT_BOOL
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "y"
-    type: DT_BOOL
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
   }
-  is_commutative: true
-}
-op {
-  name: "LogicalNot"
-  input_arg {
-    name: "x"
-    type: DT_BOOL
+  output_arg {
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "y"
-    type: DT_BOOL
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
 }
 op {
-  name: "LogicalOr"
+  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
   input_arg {
-    name: "x"
-    type: DT_BOOL
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "y"
-    type: DT_BOOL
-  }
-  output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "inputs"
+    type: DT_FLOAT
   }
-  is_commutative: true
-}
-op {
-  name: "LookupTableExport"
   input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "keys"
-    type_attr: "Tkeys"
-  }
-  output_arg {
-    name: "values"
-    type_attr: "Tvalues"
-  }
-  attr {
-    name: "Tkeys"
-    type: "type"
-  }
-  attr {
-    name: "Tvalues"
-    type: "type"
+    name: "min"
+    type: DT_FLOAT
   }
-}
-op {
-  name: "LookupTableExportV2"
   input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "keys"
-    type_attr: "Tkeys"
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "values"
-    type_attr: "Tvalues"
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "Tkeys"
-    type: "type"
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
   attr {
-    name: "Tvalues"
-    type: "type"
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "LookupTableFind"
-  input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  input_arg {
-    name: "keys"
-    type_attr: "Tin"
-  }
+  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
   input_arg {
-    name: "default_value"
-    type_attr: "Tout"
-  }
-  output_arg {
-    name: "values"
-    type_attr: "Tout"
-  }
-  attr {
-    name: "Tin"
-    type: "type"
-  }
-  attr {
-    name: "Tout"
-    type: "type"
+    name: "gradients"
+    type: DT_FLOAT
   }
-}
-op {
-  name: "LookupTableFindV2"
   input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "keys"
-    type_attr: "Tin"
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "default_value"
-    type_attr: "Tout"
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "values"
-    type_attr: "Tout"
-  }
-  attr {
-    name: "Tin"
-    type: "type"
-  }
-  attr {
-    name: "Tout"
-    type: "type"
-  }
-  is_stateful: true
-}
-op {
-  name: "LookupTableImport"
-  input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
   }
-  input_arg {
-    name: "keys"
-    type_attr: "Tin"
+  output_arg {
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
   }
-  input_arg {
-    name: "values"
-    type_attr: "Tout"
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
   attr {
-    name: "Tin"
-    type: "type"
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
   }
   attr {
-    name: "Tout"
-    type: "type"
+    name: "narrow_range"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "LookupTableImportV2"
+  name: "FakeQueue"
   input_arg {
-    name: "table_handle"
+    name: "resource"
     type: DT_RESOURCE
   }
-  input_arg {
-    name: "keys"
-    type_attr: "Tin"
-  }
-  input_arg {
-    name: "values"
-    type_attr: "Tout"
-  }
-  attr {
-    name: "Tin"
-    type: "type"
-  }
-  attr {
-    name: "Tout"
-    type: "type"
-  }
-  is_stateful: true
-}
-op {
-  name: "LookupTableInsert"
-  input_arg {
-    name: "table_handle"
+  output_arg {
+    name: "handle"
     type: DT_STRING
     is_ref: true
   }
-  input_arg {
-    name: "keys"
-    type_attr: "Tin"
-  }
-  input_arg {
-    name: "values"
-    type_attr: "Tout"
-  }
-  attr {
-    name: "Tin"
-    type: "type"
-  }
-  attr {
-    name: "Tout"
-    type: "type"
-  }
-}
-op {
-  name: "LookupTableInsertV2"
-  input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "keys"
-    type_attr: "Tin"
-  }
-  input_arg {
-    name: "values"
-    type_attr: "Tout"
-  }
-  attr {
-    name: "Tin"
-    type: "type"
-  }
-  attr {
-    name: "Tout"
-    type: "type"
-  }
   is_stateful: true
 }
 op {
-  name: "LookupTableSize"
-  input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "size"
-    type: DT_INT64
-  }
-}
-op {
-  name: "LookupTableSizeV2"
+  name: "Fill"
   input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "size"
-    type: DT_INT64
+    name: "dims"
+    type: DT_INT32
   }
-  is_stateful: true
-}
-op {
-  name: "LoopCond"
   input_arg {
-    name: "input"
-    type: DT_BOOL
+    name: "value"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type: DT_BOOL
-  }
-}
-op {
-  name: "MakeIterator"
-  input_arg {
-    name: "dataset"
-    type: DT_VARIANT
+    type_attr: "T"
   }
-  input_arg {
-    name: "iterator"
-    type: DT_RESOURCE
+  attr {
+    name: "T"
+    type: "type"
   }
-  is_stateful: true
 }
 op {
-  name: "MapAndBatchDataset"
+  name: "FilterDataset"
   input_arg {
     name: "input_dataset"
     type: DT_VARIANT
@@ -16325,20 +16141,12 @@ op {
     name: "other_arguments"
     type_list_attr: "Targuments"
   }
-  input_arg {
-    name: "batch_size"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "num_parallel_batches"
-    type: DT_INT64
-  }
   output_arg {
     name: "handle"
     type: DT_VARIANT
   }
   attr {
-    name: "f"
+    name: "predicate"
     type: "func"
   }
   attr {
@@ -16358,47 +16166,10 @@ op {
     has_minimum: true
     minimum: 1
   }
-}
-op {
-  name: "MapClear"
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
   is_stateful: true
 }
 op {
-  name: "MapDataset"
+  name: "FilterDataset"
   input_arg {
     name: "input_dataset"
     type: DT_VARIANT
@@ -16412,7 +16183,7 @@ op {
     type: DT_VARIANT
   }
   attr {
-    name: "f"
+    name: "predicate"
     type: "func"
   }
   attr {
@@ -16432,69 +16203,59 @@ op {
     has_minimum: true
     minimum: 1
   }
-  is_stateful: true
 }
 op {
-  name: "MapDataset"
+  name: "FixedLengthRecordDataset"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "filenames"
+    type: DT_STRING
   }
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "header_bytes"
+    type: DT_INT64
   }
-  attr {
-    name: "f"
-    type: "func"
+  input_arg {
+    name: "record_bytes"
+    type: DT_INT64
   }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+  input_arg {
+    name: "footer_bytes"
+    type: DT_INT64
   }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
   }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
+  is_stateful: true
 }
 op {
-  name: "MapIncompleteSize"
+  name: "FixedLengthRecordReader"
   output_arg {
-    name: "size"
-    type: DT_INT32
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "capacity"
+    name: "header_bytes"
     type: "int"
     default_value {
       i: 0
     }
-    has_minimum: true
   }
   attr {
-    name: "memory_limit"
+    name: "record_bytes"
+    type: "int"
+  }
+  attr {
+    name: "footer_bytes"
     type: "int"
     default_value {
       i: 0
     }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
   }
   attr {
     name: "container"
@@ -16513,40 +16274,36 @@ op {
   is_stateful: true
 }
 op {
-  name: "MapPeek"
-  input_arg {
-    name: "key"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "indices"
-    type: DT_INT32
-  }
+  name: "FixedLengthRecordReader"
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "capacity"
+    name: "header_bytes"
     type: "int"
     default_value {
       i: 0
     }
-    has_minimum: true
   }
   attr {
-    name: "memory_limit"
+    name: "record_bytes"
+    type: "int"
+  }
+  attr {
+    name: "footer_bytes"
     type: "int"
     default_value {
       i: 0
     }
-    has_minimum: true
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "hop_bytes"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
     name: "container"
@@ -16565,30 +16322,28 @@ op {
   is_stateful: true
 }
 op {
-  name: "MapSize"
+  name: "FixedLengthRecordReaderV2"
   output_arg {
-    name: "size"
-    type: DT_INT32
+    name: "reader_handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "capacity"
+    name: "header_bytes"
     type: "int"
     default_value {
       i: 0
     }
-    has_minimum: true
   }
   attr {
-    name: "memory_limit"
+    name: "record_bytes"
+    type: "int"
+  }
+  attr {
+    name: "footer_bytes"
     type: "int"
     default_value {
       i: 0
     }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
   }
   attr {
     name: "container"
@@ -16607,44 +16362,35 @@ op {
   is_stateful: true
 }
 op {
-  name: "MapStage"
-  input_arg {
-    name: "key"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "indices"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "values"
-    type_list_attr: "fake_dtypes"
+  name: "FixedLengthRecordReaderV2"
+  output_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "capacity"
+    name: "header_bytes"
     type: "int"
     default_value {
       i: 0
     }
-    has_minimum: true
   }
   attr {
-    name: "memory_limit"
+    name: "record_bytes"
+    type: "int"
+  }
+  attr {
+    name: "footer_bytes"
     type: "int"
     default_value {
       i: 0
     }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
   }
   attr {
-    name: "fake_dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "hop_bytes"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
     name: "container"
@@ -16663,40 +16409,35 @@ op {
   is_stateful: true
 }
 op {
-  name: "MapUnstage"
-  input_arg {
-    name: "key"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "indices"
-    type: DT_INT32
-  }
+  name: "FixedLengthRecordReaderV2"
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
+    name: "reader_handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "capacity"
+    name: "header_bytes"
     type: "int"
     default_value {
       i: 0
     }
-    has_minimum: true
   }
   attr {
-    name: "memory_limit"
+    name: "record_bytes"
+    type: "int"
+  }
+  attr {
+    name: "footer_bytes"
     type: "int"
     default_value {
       i: 0
     }
-    has_minimum: true
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "hop_bytes"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
     name: "container"
@@ -16712,219 +16453,299 @@ op {
       s: ""
     }
   }
+  attr {
+    name: "encoding"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
   is_stateful: true
 }
 op {
-  name: "MapUnstageNoKey"
+  name: "FixedUnigramCandidateSampler"
   input_arg {
-    name: "indices"
-    type: DT_INT32
+    name: "true_classes"
+    type: DT_INT64
   }
   output_arg {
-    name: "key"
+    name: "sampled_candidates"
     type: DT_INT64
   }
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
   }
   attr {
-    name: "capacity"
+    name: "num_true"
     type: "int"
-    default_value {
-      i: 0
-    }
     has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "memory_limit"
+    name: "num_sampled"
     type: "int"
-    default_value {
-      i: 0
-    }
     has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
+    name: "unique"
+    type: "bool"
+  }
+  attr {
+    name: "range_max"
+    type: "int"
     has_minimum: true
     minimum: 1
   }
   attr {
-    name: "container"
+    name: "vocab_file"
     type: "string"
     default_value {
       s: ""
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "distortion"
+    type: "float"
     default_value {
-      s: ""
+      f: 1
     }
   }
-  is_stateful: true
-}
-op {
-  name: "MatMul"
-  input_arg {
-    name: "a"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "b"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "product"
-    type_attr: "T"
+  attr {
+    name: "num_reserved_ids"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
-    name: "transpose_a"
-    type: "bool"
+    name: "num_shards"
+    type: "int"
     default_value {
-      b: false
+      i: 1
     }
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "transpose_b"
-    type: "bool"
+    name: "shard"
+    type: "int"
     default_value {
-      b: false
+      i: 0
     }
+    has_minimum: true
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
+    name: "unigrams"
+    type: "list(float)"
+    default_value {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
-}
-op {
-  name: "MatchingFiles"
-  input_arg {
-    name: "pattern"
-    type: DT_STRING
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
-  output_arg {
-    name: "filenames"
-    type: DT_STRING
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
 }
 op {
-  name: "MatrixBandPart"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
+  name: "FixedUnigramCandidateSampler"
   input_arg {
-    name: "num_lower"
+    name: "true_classes"
     type: DT_INT64
   }
-  input_arg {
-    name: "num_upper"
+  output_arg {
+    name: "sampled_candidates"
     type: DT_INT64
   }
   output_arg {
-    name: "band"
-    type_attr: "T"
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "num_true"
+    type: "int"
+    has_minimum: true
+    minimum: 1
   }
-}
-op {
-  name: "MatrixDeterminant"
-  input_arg {
-    name: "input"
-    type_attr: "T"
+  attr {
+    name: "num_sampled"
+    type: "int"
+    has_minimum: true
+    minimum: 1
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  attr {
+    name: "unique"
+    type: "bool"
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "vocab_file"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
-}
-op {
-  name: "MatrixDeterminant"
-  input_arg {
-    name: "input"
-    type_attr: "T"
+  attr {
+    name: "distortion"
+    type: "float"
+    default_value {
+      f: 1
+    }
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  attr {
+    name: "num_reserved_ids"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
+    name: "num_shards"
+    type: "int"
+    default_value {
+      i: 1
+    }
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shard"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "unigrams"
+    type: "list(float)"
+    default_value {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "MatrixDiag"
+  name: "FlatMapDataset"
   input_arg {
-    name: "diagonal"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
+  is_stateful: true
 }
 op {
-  name: "MatrixDiagPart"
+  name: "FlatMapDataset"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
   }
   output_arg {
-    name: "diagonal"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "MatrixExponential"
+  name: "Floor"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -16932,66 +16753,64 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
         type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "MatrixInverse"
+  name: "Floor"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
-  attr {
-    name: "adjoint"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "MatrixInverse"
+  name: "FloorDiv"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "y"
     type_attr: "T"
   }
-  attr {
-    name: "adjoint"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  output_arg {
+    name: "z"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
@@ -16999,74 +16818,79 @@ op {
   }
 }
 op {
-  name: "MatrixSetDiag"
+  name: "FloorDiv"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "diagonal"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
   }
 }
 op {
-  name: "MatrixSolve"
+  name: "FloorMod"
   input_arg {
-    name: "matrix"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "rhs"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
-  attr {
-    name: "adjoint"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
         type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "MatrixSolveLs"
+  name: "FloorMod"
   input_arg {
-    name: "matrix"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "rhs"
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "l2_regularizer"
-    type: DT_DOUBLE
-  }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -17074,119 +16898,111 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "fast"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
 }
 op {
-  name: "MatrixSolveLs"
+  name: "FractionalAvgPool"
   input_arg {
-    name: "matrix"
+    name: "value"
     type_attr: "T"
   }
-  input_arg {
-    name: "rhs"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "l2_regularizer"
-    type: DT_DOUBLE
+  output_arg {
+    name: "row_pooling_sequence"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "col_pooling_sequence"
+    type: DT_INT64
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+    name: "pooling_ratio"
+    type: "list(float)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "fast"
+    name: "pseudo_random"
     type: "bool"
     default_value {
-      b: true
+      b: false
     }
   }
-}
-op {
-  name: "MatrixTriangularSolve"
-  input_arg {
-    name: "matrix"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "rhs"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
-    name: "lower"
+    name: "overlapping"
     type: "bool"
     default_value {
-      b: true
+      b: false
     }
   }
   attr {
-    name: "adjoint"
+    name: "deterministic"
     type: "bool"
     default_value {
       b: false
     }
   }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "MatrixTriangularSolve"
+  name: "FractionalAvgPoolGrad"
   input_arg {
-    name: "matrix"
-    type_attr: "T"
+    name: "orig_input_tensor_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "rhs"
+    name: "out_backprop"
     type_attr: "T"
   }
+  input_arg {
+    name: "row_pooling_sequence"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "col_pooling_sequence"
+    type: DT_INT64
+  }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "lower"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "adjoint"
+    name: "overlapping"
     type: "bool"
     default_value {
       b: false
@@ -17197,65 +17013,80 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "Max"
+  name: "FractionalMaxPool"
   input_arg {
-    name: "input"
+    name: "value"
     type_attr: "T"
   }
-  input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
-  }
   output_arg {
     name: "output"
     type_attr: "T"
   }
+  output_arg {
+    name: "row_pooling_sequence"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "col_pooling_sequence"
+    type: DT_INT64
+  }
   attr {
-    name: "keep_dims"
+    name: "pooling_ratio"
+    type: "list(float)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "pseudo_random"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
+    name: "overlapping"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
+    name: "deterministic"
+    type: "bool"
     default_value {
-      type: DT_INT32
+      b: false
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
     }
+  }
+  attr {
+    name: "T"
+    type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
       }
@@ -17263,21 +17094,33 @@ op {
   }
 }
 op {
-  name: "Max"
+  name: "FractionalMaxPoolGrad"
   input_arg {
-    name: "input"
+    name: "orig_input"
     type_attr: "T"
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "row_pooling_sequence"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "col_pooling_sequence"
+    type: DT_INT64
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "keep_dims"
+    name: "overlapping"
     type: "bool"
     default_value {
       b: false
@@ -17290,31 +17133,6 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
         type: DT_INT32
         type: DT_INT64
       }
@@ -17322,48 +17140,61 @@ op {
   }
 }
 op {
-  name: "MaxPool"
+  name: "FusedBatchNorm"
   input_arg {
-    name: "input"
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "scale"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "offset"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "mean"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "variance"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "batch_mean"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "batch_variance"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space_1"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space_2"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_HALF
       }
     }
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
     }
   }
   attr {
@@ -17372,64 +17203,71 @@ op {
     default_value {
       s: "NHWC"
     }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
 }
 op {
-  name: "MaxPool"
+  name: "FusedBatchNormGrad"
   input_arg {
-    name: "input"
+    name: "y_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "scale"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reserve_space_1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reserve_space_2"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "x_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "scale_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "offset_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space_3"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space_4"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
     }
   }
   attr {
@@ -17438,221 +17276,236 @@ op {
     default_value {
       s: "NHWC"
     }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
 }
 op {
-  name: "MaxPool"
+  name: "FusedBatchNormGradV2"
   input_arg {
-    name: "input"
+    name: "y_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x"
     type_attr: "T"
   }
+  input_arg {
+    name: "scale"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "reserve_space_1"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "reserve_space_2"
+    type_attr: "U"
+  }
   output_arg {
-    name: "output"
+    name: "x_backprop"
     type_attr: "T"
   }
+  output_arg {
+    name: "scale_backprop"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "offset_backprop"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_3"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_4"
+    type_attr: "U"
+  }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
         type: DT_HALF
-        type: DT_QINT8
+        type: DT_FLOAT
       }
     }
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
+    name: "U"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
       }
     }
   }
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
+  }
   attr {
     name: "data_format"
     type: "string"
     default_value {
       s: "NHWC"
     }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-        s: "NCHW_VECT_C"
-      }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
 }
 op {
-  name: "MaxPool3D"
+  name: "FusedBatchNormGradV2"
   input_arg {
-    name: "input"
+    name: "y_backprop"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "x"
     type_attr: "T"
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  input_arg {
+    name: "scale"
+    type: DT_FLOAT
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  input_arg {
+    name: "reserve_space_1"
+    type_attr: "U"
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  input_arg {
+    name: "reserve_space_2"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "x_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "scale_backprop"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "offset_backprop"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_3"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_4"
+    type_attr: "U"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
   }
-}
-op {
-  name: "MaxPool3D"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
   attr {
-    name: "padding"
-    type: "string"
+    name: "U"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
       }
     }
   }
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
+  }
   attr {
     name: "data_format"
     type: "string"
     default_value {
-      s: "NDHWC"
-    }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
-      }
+      s: "NHWC"
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
 }
 op {
-  name: "MaxPool3DGrad"
+  name: "FusedBatchNormV2"
   input_arg {
-    name: "orig_input"
-    type: DT_FLOAT
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "orig_output"
-    type: DT_FLOAT
+    name: "scale"
+    type_attr: "U"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "offset"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "mean"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "variance"
+    type_attr: "U"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  output_arg {
+    name: "batch_mean"
+    type_attr: "U"
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  output_arg {
+    name: "batch_variance"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_1"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_2"
+    type_attr: "U"
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_HALF
+        type: DT_FLOAT
       }
     }
   }
   attr {
-    name: "T"
+    name: "U"
     type: "type"
     allowed_values {
       list {
@@ -17660,82 +17513,124 @@ op {
       }
     }
   }
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
 }
 op {
-  name: "MaxPool3DGrad"
+  name: "FusedBatchNormV2"
   input_arg {
-    name: "orig_input"
-    type: DT_FLOAT
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "orig_output"
-    type: DT_FLOAT
+    name: "scale"
+    type_attr: "U"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "offset"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "mean"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "variance"
+    type_attr: "U"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  output_arg {
+    name: "batch_mean"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "batch_variance"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_1"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_2"
+    type_attr: "U"
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "U"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
       }
     }
   }
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
+  }
   attr {
     name: "data_format"
     type: "string"
     default_value {
-      s: "NDHWC"
-    }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
-      }
+      s: "NHWC"
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
 }
 op {
-  name: "MaxPool3DGrad"
+  name: "FusedPadConv2D"
   input_arg {
-    name: "orig_input"
-    type_attr: "TInput"
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "orig_output"
-    type_attr: "TInput"
+    name: "paddings"
+    type: DT_INT32
   }
   input_arg {
-    name: "grad"
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
@@ -17743,77 +17638,55 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
       }
     }
   }
   attr {
-    name: "data_format"
+    name: "mode"
     type: "string"
-    default_value {
-      s: "NDHWC"
-    }
     allowed_values {
       list {
-        s: "NDHWC"
-        s: "NCDHW"
+        s: "REFLECT"
+        s: "SYMMETRIC"
       }
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
-    }
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "TInput"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "MaxPool3DGradGrad"
+  name: "FusedResizeAndPadConv2D"
   input_arg {
-    name: "orig_input"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "size"
+    type: DT_INT32
   }
   input_arg {
-    name: "grad"
+    name: "paddings"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
@@ -17821,390 +17694,313 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
       }
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "resize_align_corners"
+    type: "bool"
     default_value {
-      s: "NDHWC"
+      b: false
     }
+  }
+  attr {
+    name: "mode"
+    type: "string"
     allowed_values {
       list {
-        s: "NDHWC"
-        s: "NCDHW"
+        s: "REFLECT"
+        s: "SYMMETRIC"
       }
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "MaxPoolGrad"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
+  name: "Gather"
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "params"
+    type_attr: "Tparams"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type_attr: "Tparams"
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "validate_indices"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "Tparams"
+    type: "type"
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "Tindices"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+}
+op {
+  name: "GatherNd"
+  input_arg {
+    name: "params"
+    type_attr: "Tparams"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "Tparams"
+  }
   attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+    name: "Tparams"
+    type: "type"
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_HALF
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "MaxPoolGrad"
+  name: "GatherV2"
   input_arg {
-    name: "orig_input"
-    type_attr: "T"
+    name: "params"
+    type_attr: "Tparams"
   }
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "axis"
+    type_attr: "Taxis"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    type_attr: "Tparams"
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "Tparams"
+    type: "type"
   }
   attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
+    name: "Tindices"
+    type: "type"
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "Taxis"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "MaxPoolGrad"
+  name: "GenerateVocabRemapping"
   input_arg {
-    name: "orig_input"
-    type_attr: "T"
+    name: "new_vocab_file"
+    type: DT_STRING
   }
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "old_vocab_file"
+    type: DT_STRING
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
+  output_arg {
+    name: "remapping"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "num_present"
+    type: DT_INT32
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
+    name: "new_vocab_offset"
+    type: "int"
     has_minimum: true
-    minimum: 4
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "num_new_vocab"
+    type: "int"
     has_minimum: true
-    minimum: 4
+  }
+}
+op {
+  name: "GenerateVocabRemapping"
+  input_arg {
+    name: "new_vocab_file"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "old_vocab_file"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "remapping"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "num_present"
+    type: DT_INT32
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "new_vocab_offset"
+    type: "int"
+    has_minimum: true
   }
   attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+    name: "num_new_vocab"
+    type: "int"
+    has_minimum: true
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "old_vocab_size"
+    type: "int"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
+      i: -1
     }
+    has_minimum: true
+    minimum: -1
   }
 }
 op {
-  name: "MaxPoolGradGrad"
+  name: "GetSessionHandle"
   input_arg {
-    name: "orig_input"
+    name: "value"
     type_attr: "T"
   }
-  input_arg {
-    name: "orig_output"
-    type_attr: "T"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+  }
+  attr {
+    name: "T"
+    type: "type"
   }
+}
+op {
+  name: "GetSessionHandle"
   input_arg {
-    name: "grad"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "T"
+    type: "type"
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  deprecation {
+    version: 23
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+}
+op {
+  name: "GetSessionHandle"
+  input_arg {
+    name: "value"
+    type_attr: "T"
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+  output_arg {
+    name: "handle"
+    type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
   }
 }
 op {
-  name: "MaxPoolGradGrad"
+  name: "GetSessionHandleV2"
   input_arg {
-    name: "orig_input"
+    name: "value"
     type_attr: "T"
   }
-  input_arg {
-    name: "orig_output"
-    type_attr: "T"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "T"
+    type: "type"
   }
+  is_stateful: true
+}
+op {
+  name: "GetSessionTensor"
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "value"
+    type_attr: "dtype"
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "dtype"
+    type: "type"
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+}
+op {
+  name: "Greater"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  input_arg {
+    name: "y"
+    type_attr: "T"
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
@@ -18220,60 +18016,23 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "MaxPoolGradGradV2"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
+  name: "Greater"
   input_arg {
-    name: "orig_output"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "ksize"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "strides"
-    type: DT_INT32
-  }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
@@ -18289,58 +18048,25 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "MaxPoolGradGradV2"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
+  name: "Greater"
   input_arg {
-    name: "orig_output"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "ksize"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "strides"
-    type: DT_INT32
-  }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
@@ -18358,59 +18084,24 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "MaxPoolGradGradWithArgmax"
+  name: "GreaterEqual"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "argmax"
-    type_attr: "Targmax"
-  }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "Targmax"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
@@ -18431,54 +18122,18 @@ op {
   }
 }
 op {
-  name: "MaxPoolGradGradWithArgmax"
+  name: "GreaterEqual"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "argmax"
-    type_attr: "Targmax"
-  }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "Targmax"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
@@ -18501,60 +18156,22 @@ op {
   }
 }
 op {
-  name: "MaxPoolGradV2"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
+  name: "GreaterEqual"
   input_arg {
-    name: "orig_output"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "ksize"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "strides"
-    type: DT_INT32
-  }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
@@ -18566,363 +18183,310 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "MaxPoolGradV2"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
+  name: "GroupByWindowDataset"
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "key_func_other_arguments"
+    type_list_attr: "Tkey_func_other_arguments"
   }
   input_arg {
-    name: "ksize"
-    type: DT_INT32
+    name: "reduce_func_other_arguments"
+    type_list_attr: "Treduce_func_other_arguments"
   }
   input_arg {
-    name: "strides"
-    type: DT_INT32
+    name: "window_size_func_other_arguments"
+    type_list_attr: "Twindow_size_func_other_arguments"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "key_func"
+    type: "func"
   }
   attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+    name: "reduce_func"
+    type: "func"
   }
   attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+    name: "window_size_func"
+    type: "func"
+  }
+  attr {
+    name: "Tkey_func_other_arguments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Treduce_func_other_arguments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Twindow_size_func_other_arguments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
+  is_stateful: true
 }
 op {
-  name: "MaxPoolGradWithArgmax"
+  name: "GroupByWindowDataset"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "key_func_other_arguments"
+    type_list_attr: "Tkey_func_other_arguments"
   }
   input_arg {
-    name: "argmax"
-    type_attr: "Targmax"
+    name: "reduce_func_other_arguments"
+    type_list_attr: "Treduce_func_other_arguments"
+  }
+  input_arg {
+    name: "window_size_func_other_arguments"
+    type_list_attr: "Twindow_size_func_other_arguments"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
+    name: "key_func"
+    type: "func"
+  }
+  attr {
+    name: "reduce_func"
+    type: "func"
+  }
+  attr {
+    name: "window_size_func"
+    type: "func"
+  }
+  attr {
+    name: "Tkey_func_other_arguments"
+    type: "list(type)"
     has_minimum: true
-    minimum: 4
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "Treduce_func_other_arguments"
+    type: "list(type)"
     has_minimum: true
-    minimum: 4
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "Twindow_size_func_other_arguments"
+    type: "list(type)"
+    has_minimum: true
   }
   attr {
-    name: "Targmax"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_HALF
-      }
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "MaxPoolGradWithArgmax"
+  name: "GuaranteeConst"
   input_arg {
     name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "HSVToRGB"
   input_arg {
-    name: "argmax"
-    type_attr: "Targmax"
+    name: "images"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "HashTable"
+  output_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "padding"
+    name: "shared_name"
     type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    default_value {
+      s: ""
     }
   }
   attr {
-    name: "Targmax"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   attr {
-    name: "T"
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
   }
+  is_stateful: true
 }
 op {
-  name: "MaxPoolGradWithArgmax"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "argmax"
-    type_attr: "Targmax"
-  }
+  name: "HashTableV2"
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "table_handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   attr {
-    name: "Targmax"
+    name: "key_dtype"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
   }
   attr {
-    name: "T"
+    name: "value_dtype"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
   }
+  is_stateful: true
 }
 op {
-  name: "MaxPoolV2"
+  name: "HistogramFixedWidth"
   input_arg {
-    name: "input"
+    name: "values"
     type_attr: "T"
   }
   input_arg {
-    name: "ksize"
-    type: DT_INT32
+    name: "value_range"
+    type_attr: "T"
   }
   input_arg {
-    name: "strides"
+    name: "nbins"
     type: DT_INT32
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "out"
+    type_attr: "dtype"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "dtype"
+    type: "type"
     default_value {
-      s: "NHWC"
+      type: DT_INT32
     }
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "MaxPoolV2"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
+  name: "HistogramSummary"
   input_arg {
-    name: "ksize"
-    type: DT_INT32
+    name: "tag"
+    type: DT_STRING
   }
   input_arg {
-    name: "strides"
-    type: DT_INT32
+    name: "values"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "summary"
+    type: DT_STRING
   }
   attr {
     name: "T"
@@ -18941,83 +18505,23 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
-        type: DT_QINT8
-      }
-    }
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-        s: "NCHW_VECT_C"
       }
     }
   }
 }
 op {
-  name: "MaxPoolWithArgmax"
+  name: "HistogramSummary"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "tag"
+    type: DT_STRING
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "values"
     type_attr: "T"
   }
   output_arg {
-    name: "argmax"
-    type_attr: "Targmax"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "Targmax"
-    type: "type"
-    default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "summary"
+    type: DT_STRING
   }
   attr {
     name: "T"
@@ -19028,63 +18532,40 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "MaxPoolWithArgmax"
+  name: "HistogramSummary"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "tag"
+    type: DT_STRING
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "values"
     type_attr: "T"
   }
   output_arg {
-    name: "argmax"
-    type_attr: "Targmax"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "summary"
+    type: DT_STRING
   }
   attr {
-    name: "Targmax"
+    name: "T"
     type: "type"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+      type: DT_FLOAT
     }
-  }
-  attr {
-    name: "T"
-    type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
@@ -19096,369 +18577,182 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "MaxPoolWithArgmax"
+  name: "IFFT"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "argmax"
-    type_attr: "Targmax"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+}
+op {
+  name: "IFFT2D"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "Targmax"
-    type: "type"
-    default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+}
+op {
+  name: "IFFT3D"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
   }
 }
 op {
-  name: "Maximum"
+  name: "IRFFT"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "input"
+    type: DT_COMPLEX64
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "fft_length"
+    type: DT_INT32
   }
   output_arg {
-    name: "z"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "output"
+    type: DT_FLOAT
   }
-  is_commutative: true
 }
 op {
-  name: "Mean"
+  name: "IRFFT2D"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+    name: "fft_length"
+    type: DT_INT32
   }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    type: DT_FLOAT
   }
 }
 op {
-  name: "Mean"
+  name: "IRFFT3D"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+    name: "fft_length"
+    type: DT_INT32
   }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    type: DT_FLOAT
   }
 }
 op {
-  name: "Merge"
+  name: "Identity"
   input_arg {
-    name: "inputs"
+    name: "input"
     type_attr: "T"
-    number_attr: "N"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "value_index"
-    type: DT_INT32
-  }
   attr {
     name: "T"
     type: "type"
   }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
 }
 op {
-  name: "MergeSummary"
+  name: "IdentityN"
   input_arg {
-    name: "inputs"
-    type: DT_STRING
-    number_attr: "N"
+    name: "input"
+    type_list_attr: "T"
   }
   output_arg {
-    name: "summary"
-    type: DT_STRING
+    name: "output"
+    type_list_attr: "T"
   }
   attr {
-    name: "N"
-    type: "int"
+    name: "T"
+    type: "list(type)"
     has_minimum: true
     minimum: 1
   }
 }
 op {
-  name: "MergeV2Checkpoints"
-  input_arg {
-    name: "checkpoint_prefixes"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "destination_prefix"
+  name: "IdentityReader"
+  output_arg {
+    name: "reader_handle"
     type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "delete_old_dirs"
-    type: "bool"
+    name: "container"
+    type: "string"
     default_value {
-      b: true
+      s: ""
     }
   }
-}
-op {
-  name: "MergeV2Checkpoints"
-  input_arg {
-    name: "checkpoint_prefixes"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "destination_prefix"
-    type: DT_STRING
-  }
   attr {
-    name: "delete_old_dirs"
-    type: "bool"
+    name: "shared_name"
+    type: "string"
     default_value {
-      b: true
+      s: ""
     }
   }
   is_stateful: true
 }
 op {
-  name: "Mfcc"
-  input_arg {
-    name: "spectrogram"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "sample_rate"
-    type: DT_INT32
-  }
+  name: "IdentityReaderV2"
   output_arg {
-    name: "output"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "upper_frequency_limit"
-    type: "float"
-    default_value {
-      f: 4000
-    }
-  }
-  attr {
-    name: "lower_frequency_limit"
-    type: "float"
-    default_value {
-      f: 20
-    }
+    name: "reader_handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "filterbank_channel_count"
-    type: "int"
+    name: "container"
+    type: "string"
     default_value {
-      i: 40
+      s: ""
     }
   }
   attr {
-    name: "dct_coefficient_count"
-    type: "int"
+    name: "shared_name"
+    type: "string"
     default_value {
-      i: 13
+      s: ""
     }
   }
+  is_stateful: true
 }
 op {
-  name: "Min"
+  name: "Igamma"
   input_arg {
-    name: "input"
+    name: "a"
     type_attr: "T"
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
-  attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
@@ -19466,56 +18760,24 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "Min"
+  name: "Igammac"
   input_arg {
-    name: "input"
+    name: "a"
     type_attr: "T"
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
-  attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
@@ -19523,673 +18785,12932 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "Minimum"
+  name: "IgnoreErrorsDataset"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "IgnoreErrorsDataset"
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   output_arg {
-    name: "z"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
-  is_commutative: true
 }
 op {
-  name: "MirrorPad"
+  name: "Imag"
   input_arg {
     name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
-  }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type_attr: "Tout"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tpaddings"
-    type: "type"
     default_value {
-      type: DT_INT32
+      type: DT_COMPLEX64
     }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
   attr {
-    name: "mode"
-    type: "string"
+    name: "Tout"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
     allowed_values {
       list {
-        s: "REFLECT"
-        s: "SYMMETRIC"
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "MirrorPadGrad"
+  name: "ImageSummary"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "tag"
+    type: DT_STRING
   }
   input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
+    name: "tensor"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "summary"
+    type: DT_STRING
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "max_images"
+    type: "int"
+    default_value {
+      i: 3
+    }
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "Tpaddings"
+    name: "T"
     type: "type"
     default_value {
-      type: DT_INT32
+      type: DT_FLOAT
     }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_UINT8
+        type: DT_FLOAT
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "mode"
-    type: "string"
-    allowed_values {
-      list {
-        s: "REFLECT"
-        s: "SYMMETRIC"
+    name: "bad_color"
+    type: "tensor"
+    default_value {
+      tensor {
+        dtype: DT_UINT8
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        int_val: 255
+        int_val: 0
+        int_val: 0
+        int_val: 255
       }
     }
   }
 }
 op {
-  name: "Mod"
+  name: "ImageSummary"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "tag"
+    type: DT_STRING
   }
   input_arg {
-    name: "y"
+    name: "tensor"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type_attr: "T"
+    name: "summary"
+    type: DT_STRING
+  }
+  attr {
+    name: "max_images"
+    type: "int"
+    default_value {
+      i: 3
+    }
+    has_minimum: true
+    minimum: 1
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_UINT8
         type: DT_FLOAT
+        type: DT_HALF
         type: DT_DOUBLE
       }
     }
   }
+  attr {
+    name: "bad_color"
+    type: "tensor"
+    default_value {
+      tensor {
+        dtype: DT_UINT8
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        int_val: 255
+        int_val: 0
+        int_val: 0
+        int_val: 255
+      }
+    }
+  }
 }
 op {
-  name: "Mul"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
+  name: "ImmutableConst"
   output_arg {
-    name: "z"
-    type_attr: "T"
+    name: "tensor"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
   }
-  is_commutative: true
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "memory_region_name"
+    type: "string"
+  }
 }
 op {
-  name: "Multinomial"
+  name: "InTopK"
   input_arg {
-    name: "logits"
-    type_attr: "T"
+    name: "predictions"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "num_samples"
-    type: DT_INT32
+    name: "targets"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_INT64
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "precision"
+    type: DT_BOOL
   }
   attr {
-    name: "seed2"
+    name: "k"
     type: "int"
-    default_value {
-      i: 0
-    }
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Multinomial"
+  name: "InTopKV2"
   input_arg {
-    name: "logits"
+    name: "predictions"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "targets"
     type_attr: "T"
   }
   input_arg {
-    name: "num_samples"
-    type: DT_INT32
+    name: "k"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_INT64
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "precision"
+    type: DT_BOOL
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "MutableDenseHashTable"
+  name: "InitializeTable"
   input_arg {
-    name: "empty_key"
-    type_attr: "key_dtype"
-  }
-  output_arg {
     name: "table_handle"
     type: DT_STRING
     is_ref: true
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "keys"
+    type_attr: "Tkey"
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "values"
+    type_attr: "Tval"
   }
   attr {
-    name: "use_node_name_sharing"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "Tkey"
+    type: "type"
   }
   attr {
-    name: "key_dtype"
+    name: "Tval"
     type: "type"
   }
+}
+op {
+  name: "InitializeTableFromTextFile"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "filename"
+    type: DT_STRING
+  }
   attr {
-    name: "value_dtype"
-    type: "type"
+    name: "key_index"
+    type: "int"
+    has_minimum: true
+    minimum: -2
   }
   attr {
-    name: "value_shape"
-    type: "shape"
-    default_value {
-      shape {
-      }
-    }
+    name: "value_index"
+    type: "int"
+    has_minimum: true
+    minimum: -2
   }
   attr {
-    name: "initial_num_buckets"
+    name: "vocab_size"
     type: "int"
     default_value {
-      i: 131072
+      i: -1
     }
+    has_minimum: true
+    minimum: -1
   }
   attr {
-    name: "max_load_factor"
-    type: "float"
+    name: "delimiter"
+    type: "string"
     default_value {
-      f: 0.8
+      s: "\t"
     }
   }
-  is_stateful: true
 }
 op {
-  name: "MutableDenseHashTableV2"
+  name: "InitializeTableFromTextFileV2"
   input_arg {
-    name: "empty_key"
-    type_attr: "key_dtype"
-  }
-  output_arg {
     name: "table_handle"
     type: DT_RESOURCE
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "use_node_name_sharing"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "key_dtype"
-    type: "type"
+  input_arg {
+    name: "filename"
+    type: DT_STRING
   }
   attr {
-    name: "value_dtype"
-    type: "type"
+    name: "key_index"
+    type: "int"
+    has_minimum: true
+    minimum: -2
   }
   attr {
-    name: "value_shape"
-    type: "shape"
-    default_value {
-      shape {
-      }
-    }
+    name: "value_index"
+    type: "int"
+    has_minimum: true
+    minimum: -2
   }
   attr {
-    name: "initial_num_buckets"
+    name: "vocab_size"
     type: "int"
     default_value {
-      i: 131072
+      i: -1
     }
+    has_minimum: true
+    minimum: -1
   }
   attr {
-    name: "max_load_factor"
-    type: "float"
+    name: "delimiter"
+    type: "string"
     default_value {
-      f: 0.8
+      s: "\t"
     }
   }
   is_stateful: true
 }
 op {
-  name: "MutableHashTable"
-  output_arg {
+  name: "InitializeTableV2"
+  input_arg {
     name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    type: DT_RESOURCE
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "keys"
+    type_attr: "Tkey"
   }
-  attr {
-    name: "use_node_name_sharing"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "values"
+    type_attr: "Tval"
   }
   attr {
-    name: "key_dtype"
+    name: "Tkey"
     type: "type"
   }
   attr {
-    name: "value_dtype"
+    name: "Tval"
     type: "type"
   }
   is_stateful: true
 }
 op {
-  name: "MutableHashTableOfTensors"
-  output_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
+  name: "InterleaveDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "cycle_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "block_length"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "use_node_name_sharing"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "f"
+    type: "func"
   }
   attr {
-    name: "key_dtype"
-    type: "type"
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
   }
   attr {
-    name: "value_dtype"
-    type: "type"
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "value_shape"
-    type: "shape"
-    default_value {
-      shape {
-      }
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
   is_stateful: true
 }
 op {
-  name: "MutableHashTableOfTensorsV2"
+  name: "InterleaveDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "cycle_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "block_length"
+    type: DT_INT64
+  }
   output_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "f"
+    type: "func"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
   }
   attr {
-    name: "use_node_name_sharing"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "key_dtype"
-    type: "type"
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
-  attr {
-    name: "value_dtype"
-    type: "type"
+}
+op {
+  name: "Inv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "value_shape"
-    type: "shape"
-    default_value {
-      shape {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  is_stateful: true
+  deprecation {
+    version: 17
+  }
 }
 op {
-  name: "MutableHashTableV2"
-  output_arg {
-    name: "table_handle"
+  name: "Inv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "InvGrad"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "InvGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "InvGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "Invert"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+      }
+    }
+  }
+}
+op {
+  name: "Invert"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "InvertPermutation"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "IsFinite"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsFinite"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsInf"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsInf"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsNan"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsNan"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsVariableInitialized"
+  input_arg {
+    name: "ref"
+    type_attr: "dtype"
+    is_ref: true
+  }
+  output_arg {
+    name: "is_initialized"
+    type: DT_BOOL
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  allows_uninitialized_input: true
+}
+op {
+  name: "Iterator"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+  }
+  attr {
+    name: "container"
+    type: "string"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "IteratorFromStringHandle"
+  input_arg {
+    name: "string_handle"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "resource_handle"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "IteratorFromStringHandle"
+  input_arg {
+    name: "string_handle"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "resource_handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  is_stateful: true
+}
+op {
+  name: "IteratorGetNext"
+  input_arg {
+    name: "iterator"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "IteratorSetStatsAggregator"
+  input_arg {
+    name: "iterator_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "stats_aggregator_handle"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "IteratorToStringHandle"
+  input_arg {
+    name: "resource_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "string_handle"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "L2Loss"
+  input_arg {
+    name: "t"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "L2Loss"
+  input_arg {
+    name: "t"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "L2Loss"
+  input_arg {
+    name: "t"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LMDBReader"
+  output_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "LRN"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "depth_radius"
+    type: "int"
+    default_value {
+      i: 5
+    }
+  }
+  attr {
+    name: "bias"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "beta"
+    type: "float"
+    default_value {
+      f: 0.5
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "LRN"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "depth_radius"
+    type: "int"
+    default_value {
+      i: 5
+    }
+  }
+  attr {
+    name: "bias"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "beta"
+    type: "float"
+    default_value {
+      f: 0.5
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "LRNGrad"
+  input_arg {
+    name: "input_grads"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_image"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_image"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "depth_radius"
+    type: "int"
+    default_value {
+      i: 5
+    }
+  }
+  attr {
+    name: "bias"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "beta"
+    type: "float"
+    default_value {
+      f: 0.5
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "LRNGrad"
+  input_arg {
+    name: "input_grads"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_image"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_image"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "depth_radius"
+    type: "int"
+    default_value {
+      i: 5
+    }
+  }
+  attr {
+    name: "bias"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "beta"
+    type: "float"
+    default_value {
+      f: 0.5
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "LatencyStatsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "LearnedUnigramCandidateSampler"
+  input_arg {
+    name: "true_classes"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sampled_candidates"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "num_true"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_sampled"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "unique"
+    type: "bool"
+  }
+  attr {
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+}
+op {
+  name: "LearnedUnigramCandidateSampler"
+  input_arg {
+    name: "true_classes"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sampled_candidates"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "num_true"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_sampled"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "unique"
+    type: "bool"
+  }
+  attr {
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "LeftShift"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Less"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "Less"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Less"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "LessEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "LessEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "LessEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "Lgamma"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "Lgamma"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LinSpace"
+  input_arg {
+    name: "start"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "stop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "LinSpace"
+  input_arg {
+    name: "start"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "stop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "ListDiff"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "idx"
+    type_attr: "out_idx"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "out_idx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "LoadAndRemapMatrix"
+  input_arg {
+    name: "ckpt_path"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "old_tensor_name"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "row_remapping"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "col_remapping"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "initializing_values"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_matrix"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "num_rows"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_cols"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "max_rows_in_memory"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Log"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Log"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Log1p"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Log1p"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "LogMatrixDeterminant"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "sign"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "log_abs_determinant"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "LogSoftmax"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "logsoftmax"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LogSoftmax"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "logsoftmax"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LogUniformCandidateSampler"
+  input_arg {
+    name: "true_classes"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sampled_candidates"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "num_true"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_sampled"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "unique"
+    type: "bool"
+  }
+  attr {
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+}
+op {
+  name: "LogUniformCandidateSampler"
+  input_arg {
+    name: "true_classes"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sampled_candidates"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "num_true"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_sampled"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "unique"
+    type: "bool"
+  }
+  attr {
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "LogicalAnd"
+  input_arg {
+    name: "x"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  is_commutative: true
+}
+op {
+  name: "LogicalNot"
+  input_arg {
+    name: "x"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+}
+op {
+  name: "LogicalOr"
+  input_arg {
+    name: "x"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  is_commutative: true
+}
+op {
+  name: "LookupTableExport"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "keys"
+    type_attr: "Tkeys"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "Tvalues"
+  }
+  attr {
+    name: "Tkeys"
+    type: "type"
+  }
+  attr {
+    name: "Tvalues"
+    type: "type"
+  }
+}
+op {
+  name: "LookupTableExportV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "keys"
+    type_attr: "Tkeys"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "Tvalues"
+  }
+  attr {
+    name: "Tkeys"
+    type: "type"
+  }
+  attr {
+    name: "Tvalues"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "LookupTableFind"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "default_value"
+    type_attr: "Tout"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+}
+op {
+  name: "LookupTableFindV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "default_value"
+    type_attr: "Tout"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "LookupTableImport"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+}
+op {
+  name: "LookupTableImportV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "LookupTableInsert"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+}
+op {
+  name: "LookupTableInsertV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "LookupTableSize"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "size"
+    type: DT_INT64
+  }
+}
+op {
+  name: "LookupTableSizeV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "size"
+    type: DT_INT64
+  }
+  is_stateful: true
+}
+op {
+  name: "LoopCond"
+  input_arg {
+    name: "input"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "output"
+    type: DT_BOOL
+  }
+}
+op {
+  name: "MakeIterator"
+  input_arg {
+    name: "dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "iterator"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "MapAndBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "num_parallel_batches"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "MapClear"
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "MapDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "MapIncompleteSize"
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapPeek"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapSize"
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapStage"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "values"
+    type_list_attr: "fake_dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "fake_dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapUnstage"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapUnstageNoKey"
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MatMul"
+  input_arg {
+    name: "a"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "product"
+    type_attr: "T"
+  }
+  attr {
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatMul"
+  input_arg {
+    name: "a"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "product"
+    type_attr: "T"
+  }
+  attr {
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatchingFiles"
+  input_arg {
+    name: "pattern"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "filenames"
+    type: DT_STRING
+  }
+}
+op {
+  name: "MatrixBandPart"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_lower"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "num_upper"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "band"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "MatrixDeterminant"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "MatrixDeterminant"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatrixDiag"
+  input_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "MatrixDiagPart"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "MatrixExponential"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatrixInverse"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MatrixInverse"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatrixSetDiag"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "MatrixSolve"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatrixSolveLs"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_regularizer"
+    type: DT_DOUBLE
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "fast"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
+op {
+  name: "MatrixSolveLs"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_regularizer"
+    type: DT_DOUBLE
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  attr {
+    name: "fast"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
+op {
+  name: "MatrixTriangularSolve"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "lower"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MatrixTriangularSolve"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "lower"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Max"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Max"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Max"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_QINT8
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_QINT8
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3D"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3D"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3D"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3DGrad"
+  input_arg {
+    name: "orig_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "orig_output"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3DGrad"
+  input_arg {
+    name: "orig_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "orig_output"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3DGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "TInput"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "TInput"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "TInput"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3DGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "TInput"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "TInput"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "TInput"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3DGradGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_QINT8
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_QINT8
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "Maximum"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Maximum"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Mean"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Mean"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Mean"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Merge"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "value_index"
+    type: DT_INT32
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "MergeSummary"
+  input_arg {
+    name: "inputs"
+    type: DT_STRING
+    number_attr: "N"
+  }
+  output_arg {
+    name: "summary"
+    type: DT_STRING
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "MergeV2Checkpoints"
+  input_arg {
+    name: "checkpoint_prefixes"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "destination_prefix"
+    type: DT_STRING
+  }
+  attr {
+    name: "delete_old_dirs"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
+op {
+  name: "MergeV2Checkpoints"
+  input_arg {
+    name: "checkpoint_prefixes"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "destination_prefix"
+    type: DT_STRING
+  }
+  attr {
+    name: "delete_old_dirs"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Mfcc"
+  input_arg {
+    name: "spectrogram"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "sample_rate"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "upper_frequency_limit"
+    type: "float"
+    default_value {
+      f: 4000
+    }
+  }
+  attr {
+    name: "lower_frequency_limit"
+    type: "float"
+    default_value {
+      f: 20
+    }
+  }
+  attr {
+    name: "filterbank_channel_count"
+    type: "int"
+    default_value {
+      i: 40
+    }
+  }
+  attr {
+    name: "dct_coefficient_count"
+    type: "int"
+    default_value {
+      i: 13
+    }
+  }
+}
+op {
+  name: "Min"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Min"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Min"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Minimum"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Minimum"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "MirrorPad"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    allowed_values {
+      list {
+        s: "REFLECT"
+        s: "SYMMETRIC"
+      }
+    }
+  }
+}
+op {
+  name: "MirrorPadGrad"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    allowed_values {
+      list {
+        s: "REFLECT"
+        s: "SYMMETRIC"
+      }
+    }
+  }
+}
+op {
+  name: "Mod"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "Mod"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "Mul"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Mul"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Multinomial"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_samples"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_INT64
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Multinomial"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_samples"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_INT64
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Multinomial"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_samples"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "output_dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "output_dtype"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableDenseHashTable"
+  input_arg {
+    name: "empty_key"
+    type_attr: "key_dtype"
+  }
+  output_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+  attr {
+    name: "initial_num_buckets"
+    type: "int"
+    default_value {
+      i: 131072
+    }
+  }
+  attr {
+    name: "max_load_factor"
+    type: "float"
+    default_value {
+      f: 0.8
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableDenseHashTableV2"
+  input_arg {
+    name: "empty_key"
+    type_attr: "key_dtype"
+  }
+  output_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+  attr {
+    name: "initial_num_buckets"
+    type: "int"
+    default_value {
+      i: 131072
+    }
+  }
+  attr {
+    name: "max_load_factor"
+    type: "float"
+    default_value {
+      f: 0.8
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableHashTable"
+  output_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableHashTableOfTensors"
+  output_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableHashTableOfTensorsV2"
+  output_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableHashTableV2"
+  output_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "Neg"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Neg"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "NegTrain"
+  input_arg {
+    name: "w_in"
+    type: DT_FLOAT
+    is_ref: true
+  }
+  input_arg {
+    name: "w_out"
+    type: DT_FLOAT
+    is_ref: true
+  }
+  input_arg {
+    name: "examples"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "labels"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "lr"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "vocab_count"
+    type: "list(int)"
+  }
+  attr {
+    name: "num_negative_samples"
+    type: "int"
+  }
+  deprecation {
+    version: 19
+  }
+  is_stateful: true
+}
+op {
+  name: "NextIteration"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "NoOp"
+}
+op {
+  name: "NonMaxSuppression"
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "scores"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_output_size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "selected_indices"
+    type: DT_INT32
+  }
+  attr {
+    name: "iou_threshold"
+    type: "float"
+    default_value {
+      f: 0.5
+    }
+  }
+}
+op {
+  name: "NonMaxSuppressionV2"
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "scores"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_output_size"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "iou_threshold"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "selected_indices"
+    type: DT_INT32
+  }
+}
+op {
+  name: "NotEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_QUINT8
+        type: DT_QINT8
+        type: DT_QINT32
+        type: DT_STRING
+        type: DT_BOOL
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "NotEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_QUINT8
+        type: DT_QINT8
+        type: DT_QINT32
+        type: DT_STRING
+        type: DT_BOOL
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "NthElement"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  attr {
+    name: "reverse"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "NthElement"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  attr {
+    name: "reverse"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "OneHot"
+  input_arg {
+    name: "indices"
+    type_attr: "TI"
+  }
+  input_arg {
+    name: "depth"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "on_value"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "off_value"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "axis"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "TI"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "OneShotIterator"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "dataset_factory"
+    type: "func"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OnesLike"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "OnesLike"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_BOOL
+      }
+    }
+  }
+}
+op {
+  name: "OrderedMapClear"
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapIncompleteSize"
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapPeek"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapSize"
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapStage"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "values"
+    type_list_attr: "fake_dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "fake_dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapUnstage"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapUnstageNoKey"
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Pack"
+  input_arg {
+    name: "values"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "axis"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+}
+op {
+  name: "Pad"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "PadV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
+  }
+  input_arg {
+    name: "constant_values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "PaddedBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "padded_shapes"
+    type: DT_INT64
+    number_attr: "N"
+  }
+  input_arg {
+    name: "padding_values"
+    type_list_attr: "Toutput_types"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "Toutput_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "PaddedBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "padded_shapes"
+    type: DT_INT64
+    number_attr: "N"
+  }
+  input_arg {
+    name: "padding_values"
+    type_list_attr: "Toutput_types"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "Toutput_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "PaddingFIFOQueue"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "PaddingFIFOQueueV2"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ParallelConcat"
+  input_arg {
+    name: "values"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+}
+op {
+  name: "ParallelDynamicStitch"
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "data"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "merged"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "ParallelInterleaveDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "cycle_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "block_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sloppy"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ParallelMapDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ParallelMapDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ParameterizedTruncatedNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "means"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "stdevs"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "minvals"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "maxvals"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ParameterizedTruncatedNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "means"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "stdevs"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "minvals"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "maxvals"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ParseExample"
+  input_arg {
+    name: "serialized"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "names"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "sparse_keys"
+    type: DT_STRING
+    number_attr: "Nsparse"
+  }
+  input_arg {
+    name: "dense_keys"
+    type: DT_STRING
+    number_attr: "Ndense"
+  }
+  input_arg {
+    name: "dense_defaults"
+    type_list_attr: "Tdense"
+  }
+  output_arg {
+    name: "sparse_indices"
+    type: DT_INT64
+    number_attr: "Nsparse"
+  }
+  output_arg {
+    name: "sparse_values"
+    type_list_attr: "sparse_types"
+  }
+  output_arg {
+    name: "sparse_shapes"
+    type: DT_INT64
+    number_attr: "Nsparse"
+  }
+  output_arg {
+    name: "dense_values"
+    type_list_attr: "Tdense"
+  }
+  attr {
+    name: "Nsparse"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "Ndense"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "sparse_types"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "Tdense"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "dense_shapes"
+    type: "list(shape)"
+    has_minimum: true
+  }
+}
+op {
+  name: "ParseSingleSequenceExample"
+  input_arg {
+    name: "serialized"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "feature_list_dense_missing_assumed_empty"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "context_sparse_keys"
+    type: DT_STRING
+    number_attr: "Ncontext_sparse"
+  }
+  input_arg {
+    name: "context_dense_keys"
+    type: DT_STRING
+    number_attr: "Ncontext_dense"
+  }
+  input_arg {
+    name: "feature_list_sparse_keys"
+    type: DT_STRING
+    number_attr: "Nfeature_list_sparse"
+  }
+  input_arg {
+    name: "feature_list_dense_keys"
+    type: DT_STRING
+    number_attr: "Nfeature_list_dense"
+  }
+  input_arg {
+    name: "context_dense_defaults"
+    type_list_attr: "Tcontext_dense"
+  }
+  input_arg {
+    name: "debug_name"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "context_sparse_indices"
+    type: DT_INT64
+    number_attr: "Ncontext_sparse"
+  }
+  output_arg {
+    name: "context_sparse_values"
+    type_list_attr: "context_sparse_types"
+  }
+  output_arg {
+    name: "context_sparse_shapes"
+    type: DT_INT64
+    number_attr: "Ncontext_sparse"
+  }
+  output_arg {
+    name: "context_dense_values"
+    type_list_attr: "Tcontext_dense"
+  }
+  output_arg {
+    name: "feature_list_sparse_indices"
+    type: DT_INT64
+    number_attr: "Nfeature_list_sparse"
+  }
+  output_arg {
+    name: "feature_list_sparse_values"
+    type_list_attr: "feature_list_sparse_types"
+  }
+  output_arg {
+    name: "feature_list_sparse_shapes"
+    type: DT_INT64
+    number_attr: "Nfeature_list_sparse"
+  }
+  output_arg {
+    name: "feature_list_dense_values"
+    type_list_attr: "feature_list_dense_types"
+  }
+  attr {
+    name: "Ncontext_sparse"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "Ncontext_dense"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "Nfeature_list_sparse"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "Nfeature_list_dense"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "context_sparse_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "Tcontext_dense"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "feature_list_dense_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "context_dense_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "feature_list_sparse_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "feature_list_dense_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+}
+op {
+  name: "ParseTensor"
+  input_arg {
+    name: "serialized"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+  }
+}
+op {
+  name: "Placeholder"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+}
+op {
+  name: "Placeholder"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+    default_value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+}
+op {
+  name: "PlaceholderV2"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+}
+op {
+  name: "PlaceholderV2"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  deprecation {
+    version: 23
+  }
+}
+op {
+  name: "PlaceholderWithDefault"
+  input_arg {
+    name: "input"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+}
+op {
+  name: "Polygamma"
+  input_arg {
+    name: "a"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "PopulationCount"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_UINT8
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+      }
+    }
+  }
+}
+op {
+  name: "PopulationCount"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_UINT8
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Pow"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Pow"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "PrefetchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "PrefetchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "PreventGradient"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "message"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+}
+op {
+  name: "Print"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "data"
+    type_list_attr: "U"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "U"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "message"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "first_n"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "summarize"
+    type: "int"
+    default_value {
+      i: 3
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Print"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "data"
+    type_list_attr: "U"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "U"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "message"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "first_n"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "summarize"
+    type: "int"
+    default_value {
+      i: 3
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "PriorityQueue"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "PriorityQueueV2"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Prod"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Prod"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Prod"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "PyFunc"
+  input_arg {
+    name: "input"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "token"
+    type: "string"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  is_stateful: true
+}
+op {
+  name: "PyFuncStateless"
+  input_arg {
+    name: "input"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "token"
+    type: "string"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+}
+op {
+  name: "Qr"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "q"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "r"
+    type_attr: "T"
+  }
+  attr {
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeAndDequantize"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "input_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "input_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeAndDequantize"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "input_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "input_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  deprecation {
+    version: 21
+  }
+}
+op {
+  name: "QuantizeAndDequantize"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "input_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "input_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  deprecation {
+    version: 22
+  }
+}
+op {
+  name: "QuantizeAndDequantize"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "input_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "input_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  deprecation {
+    version: 22
+  }
+}
+op {
+  name: "QuantizeAndDequantizeV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_max"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeAndDequantizeV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_max"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeAndDequantizeV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_max"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_bits"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeAndDequantizeV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_max"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_bits"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeDownAndShrinkRange"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeV2"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_range"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_range"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
+    allowed_values {
+      list {
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeV2"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_range"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_range"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
+    allowed_values {
+      list {
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+        s: "SCALED"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeV2"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_range"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_range"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
+    allowed_values {
+      list {
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+        s: "SCALED"
+      }
+    }
+  }
+  attr {
+    name: "round_mode"
+    type: "string"
+    default_value {
+      s: "HALF_AWAY_FROM_ZERO"
+    }
+    allowed_values {
+      list {
+        s: "HALF_AWAY_FROM_ZERO"
+        s: "HALF_TO_EVEN"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedAdd"
+  input_arg {
+    name: "x"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_y"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_y"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "z"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_z"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_z"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "QuantizedAvgPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedBatchNormWithGlobalNormalization"
+  input_arg {
+    name: "t"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "t_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "t_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "m"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "m_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "m_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "v"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "v_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "v_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "beta_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "beta_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "gamma_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gamma_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "result"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "result_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "result_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+}
+op {
+  name: "QuantizedBiasAdd"
+  input_arg {
+    name: "input"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "bias"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_bias"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_bias"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_out"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_out"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedConcat"
+  input_arg {
+    name: "concat_dim"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  input_arg {
+    name: "input_mins"
+    type: DT_FLOAT
+    number_attr: "N"
+  }
+  input_arg {
+    name: "input_maxes"
+    type: DT_FLOAT
+    number_attr: "N"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 2
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "QuantizedConv2D"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "Tfilter"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_filter"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_filter"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Tfilter"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedConv2D"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "Tfilter"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_filter"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_filter"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Tfilter"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedInstanceNorm"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "x_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "y_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "output_range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "given_y_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "given_y_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "variance_epsilon"
+    type: "float"
+    default_value {
+      f: 1e-05
+    }
+  }
+  attr {
+    name: "min_separation"
+    type: "float"
+    default_value {
+      f: 0.001
+    }
+  }
+}
+op {
+  name: "QuantizedMatMul"
+  input_arg {
+    name: "a"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_a"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_a"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_b"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_b"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_out"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_out"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "Tactivation"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedMaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedMul"
+  input_arg {
+    name: "x"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_y"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_y"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "z"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_z"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_z"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "QuantizedRelu"
+  input_arg {
+    name: "features"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "min_features"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_features"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_activations"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedRelu6"
+  input_arg {
+    name: "features"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "min_features"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_features"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_activations"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedReluX"
+  input_arg {
+    name: "features"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "max_value"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_features"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_features"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_activations"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedReshape"
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "shape"
+    type_attr: "Tshape"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tshape"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedResizeBilinear"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "resized_images"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "QueueClose"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "cancel_pending_enqueues"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "QueueCloseV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "cancel_pending_enqueues"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueDequeue"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "QueueDequeueMany"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "QueueDequeueManyV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueDequeueUpTo"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "QueueDequeueUpToV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueDequeueV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueEnqueue"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "components"
+    type_list_attr: "Tcomponents"
+  }
+  attr {
+    name: "Tcomponents"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "QueueEnqueueMany"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "components"
+    type_list_attr: "Tcomponents"
+  }
+  attr {
+    name: "Tcomponents"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "QueueEnqueueManyV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "components"
+    type_list_attr: "Tcomponents"
+  }
+  attr {
+    name: "Tcomponents"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueEnqueueV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "components"
+    type_list_attr: "Tcomponents"
+  }
+  attr {
+    name: "Tcomponents"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueIsClosed"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "is_closed"
+    type: DT_BOOL
+  }
+}
+op {
+  name: "QueueIsClosedV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "is_closed"
+    type: DT_BOOL
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueSize"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+}
+op {
+  name: "QueueSizeV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  is_stateful: true
+}
+op {
+  name: "RFFT"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "fft_length"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+}
+op {
+  name: "RFFT2D"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "fft_length"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+}
+op {
+  name: "RFFT3D"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "fft_length"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+}
+op {
+  name: "RGBToHSV"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "RandomCrop"
+  input_arg {
+    name: "image"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  deprecation {
+    version: 8
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomDataset"
+  input_arg {
+    name: "seed"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomGamma"
+  input_arg {
+    name: "shape"
+    type_attr: "S"
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "S"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomPoisson"
+  input_arg {
+    name: "shape"
+    type_attr: "S"
+  }
+  input_arg {
+    name: "rate"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "S"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomPoisson"
+  input_arg {
+    name: "shape"
+    type_attr: "S"
+  }
+  input_arg {
+    name: "rate"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "S"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  deprecation {
+    version: 25
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomPoissonV2"
+  input_arg {
+    name: "shape"
+    type_attr: "S"
+  }
+  input_arg {
+    name: "rate"
+    type_attr: "R"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "S"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "R"
+    type: "type"
+    default_value {
+      type: DT_DOUBLE
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomShuffle"
+  input_arg {
+    name: "value"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomShuffleQueue"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "min_after_dequeue"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomShuffleQueueV2"
+  output_arg {
+    name: "handle"
     type: DT_RESOURCE
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "min_after_dequeue"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomStandardNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomStandardNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomUniform"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomUniform"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomUniformInt"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "minval"
+    type_attr: "Tout"
+  }
+  input_arg {
+    name: "maxval"
+    type_attr: "Tout"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "Tout"
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "seed"
+    type: "int"
     default_value {
-      s: ""
+      i: 0
     }
   }
   attr {
-    name: "use_node_name_sharing"
-    type: "bool"
+    name: "seed2"
+    type: "int"
     default_value {
-      b: false
+      i: 0
     }
   }
   attr {
-    name: "key_dtype"
+    name: "Tout"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
-    name: "value_dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   is_stateful: true
 }
 op {
-  name: "Neg"
+  name: "Range"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "start"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "limit"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "output"
+    type_attr: "Tidx"
   }
   attr {
-    name: "T"
+    name: "Tidx"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "NegTrain"
+  name: "Range"
   input_arg {
-    name: "w_in"
-    type: DT_FLOAT
-    is_ref: true
+    name: "start"
+    type_attr: "Tidx"
   }
   input_arg {
-    name: "w_out"
-    type: DT_FLOAT
-    is_ref: true
+    name: "limit"
+    type_attr: "Tidx"
   }
   input_arg {
-    name: "examples"
-    type: DT_INT32
+    name: "delta"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "Tidx"
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
+}
+op {
+  name: "RangeDataset"
   input_arg {
-    name: "labels"
-    type: DT_INT32
+    name: "start"
+    type: DT_INT64
   }
   input_arg {
-    name: "lr"
-    type: DT_FLOAT
+    name: "stop"
+    type: DT_INT64
   }
-  attr {
-    name: "vocab_count"
-    type: "list(int)"
+  input_arg {
+    name: "step"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "num_negative_samples"
-    type: "int"
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
-  deprecation {
-    version: 19
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
   is_stateful: true
 }
 op {
-  name: "NextIteration"
+  name: "Rank"
   input_arg {
-    name: "data"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type: DT_INT32
   }
   attr {
     name: "T"
@@ -20197,59 +31718,273 @@ op {
   }
 }
 op {
-  name: "NoOp"
+  name: "ReadFile"
+  input_arg {
+    name: "filename"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "contents"
+    type: DT_STRING
+  }
 }
 op {
-  name: "NonMaxSuppression"
+  name: "ReadVariableOp"
   input_arg {
-    name: "boxes"
-    type: DT_FLOAT
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "value"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
   }
+  is_stateful: true
+}
+op {
+  name: "ReaderNumRecordsProduced"
   input_arg {
-    name: "scores"
-    type: DT_FLOAT
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "records_produced"
+    type: DT_INT64
   }
+}
+op {
+  name: "ReaderNumRecordsProducedV2"
   input_arg {
-    name: "max_output_size"
-    type: DT_INT32
+    name: "reader_handle"
+    type: DT_RESOURCE
   }
   output_arg {
-    name: "selected_indices"
-    type: DT_INT32
+    name: "records_produced"
+    type: DT_INT64
   }
-  attr {
-    name: "iou_threshold"
-    type: "float"
-    default_value {
-      f: 0.5
-    }
+  is_stateful: true
+}
+op {
+  name: "ReaderNumWorkUnitsCompleted"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "units_completed"
+    type: DT_INT64
   }
 }
 op {
-  name: "NonMaxSuppressionV2"
+  name: "ReaderNumWorkUnitsCompletedV2"
   input_arg {
-    name: "boxes"
-    type: DT_FLOAT
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "units_completed"
+    type: DT_INT64
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderRead"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
   }
   input_arg {
-    name: "scores"
-    type: DT_FLOAT
+    name: "queue_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "key"
+    type: DT_STRING
   }
+  output_arg {
+    name: "value"
+    type: DT_STRING
+  }
+}
+op {
+  name: "ReaderReadUpTo"
   input_arg {
-    name: "max_output_size"
-    type: DT_INT32
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
   }
   input_arg {
-    name: "iou_threshold"
-    type: DT_FLOAT
+    name: "queue_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "num_records"
+    type: DT_INT64
   }
   output_arg {
-    name: "selected_indices"
-    type: DT_INT32
+    name: "keys"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "values"
+    type: DT_STRING
   }
 }
 op {
-  name: "NotEqual"
+  name: "ReaderReadUpToV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "queue_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "num_records"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "keys"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "values"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderReadV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "queue_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "key"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "value"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderReset"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+}
+op {
+  name: "ReaderResetV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderRestoreState"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "state"
+    type: DT_STRING
+  }
+}
+op {
+  name: "ReaderRestoreStateV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "state"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderSerializeState"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "state"
+    type: DT_STRING
+  }
+}
+op {
+  name: "ReaderSerializeStateV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "state"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "Real"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_COMPLEX64
+    }
+    allowed_values {
+      list {
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "RealDiv"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -20260,7 +31995,7 @@ op {
   }
   output_arg {
     name: "z"
-    type: DT_BOOL
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -20272,156 +32007,172 @@ op {
         type: DT_DOUBLE
         type: DT_UINT8
         type: DT_INT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT32
         type: DT_INT64
         type: DT_COMPLEX64
-        type: DT_QUINT8
-        type: DT_QINT8
-        type: DT_QINT32
-        type: DT_STRING
-        type: DT_BOOL
         type: DT_COMPLEX128
       }
     }
   }
-  is_commutative: true
 }
 op {
-  name: "NthElement"
+  name: "RealDiv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Reciprocal"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Reciprocal"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "ReciprocalGrad"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "n"
-    type: DT_INT32
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "values"
+    name: "z"
     type_attr: "T"
   }
-  attr {
-    name: "reverse"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "OneHot"
-  input_arg {
-    name: "indices"
-    type_attr: "TI"
-  }
-  input_arg {
-    name: "depth"
-    type: DT_INT32
-  }
+  name: "ReciprocalGrad"
   input_arg {
-    name: "on_value"
+    name: "y"
     type_attr: "T"
   }
   input_arg {
-    name: "off_value"
+    name: "dy"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
-  attr {
-    name: "axis"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "TI"
-    type: "type"
-    default_value {
-      type: DT_INT64
-    }
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "OneShotIterator"
-  output_arg {
-    name: "handle"
-    type: DT_RESOURCE
-  }
-  attr {
-    name: "dataset_factory"
-    type: "func"
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  name: "ReciprocalGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "OnesLike"
   input_arg {
-    name: "x"
+    name: "dy"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -20429,10 +32180,10 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
@@ -20440,738 +32191,701 @@ op {
   }
 }
 op {
-  name: "OrderedMapClear"
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
+  name: "RecordInput"
+  output_arg {
+    name: "records"
+    type: DT_STRING
   }
   attr {
-    name: "container"
+    name: "file_pattern"
     type: "string"
-    default_value {
-      s: ""
-    }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "file_random_seed"
+    type: "int"
     default_value {
-      s: ""
+      i: 301
     }
   }
-  is_stateful: true
-}
-op {
-  name: "OrderedMapIncompleteSize"
-  output_arg {
-    name: "size"
-    type: DT_INT32
-  }
   attr {
-    name: "capacity"
-    type: "int"
+    name: "file_shuffle_shift_ratio"
+    type: "float"
     default_value {
-      i: 0
+      f: 0
     }
-    has_minimum: true
   }
   attr {
-    name: "memory_limit"
+    name: "file_buffer_size"
     type: "int"
     default_value {
-      i: 0
+      i: 10000
     }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "file_parallelism"
+    type: "int"
     default_value {
-      s: ""
+      i: 16
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "batch_size"
+    type: "int"
     default_value {
-      s: ""
+      i: 32
     }
   }
   is_stateful: true
 }
 op {
-  name: "OrderedMapPeek"
+  name: "ReduceJoin"
   input_arg {
-    name: "key"
-    type: DT_INT64
+    name: "inputs"
+    type: DT_STRING
   }
   input_arg {
-    name: "indices"
+    name: "reduction_indices"
     type: DT_INT32
   }
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "output"
+    type: DT_STRING
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "keep_dims"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   attr {
-    name: "shared_name"
+    name: "separator"
     type: "string"
     default_value {
       s: ""
     }
   }
-  is_stateful: true
 }
 op {
-  name: "OrderedMapSize"
-  output_arg {
-    name: "size"
-    type: DT_INT32
+  name: "RefEnter"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
+    name: "frame_name"
+    type: "string"
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "is_constant"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "parallel_iterations"
+    type: "int"
     default_value {
-      s: ""
+      i: 10
     }
   }
-  is_stateful: true
 }
 op {
-  name: "OrderedMapStage"
+  name: "RefExit"
   input_arg {
-    name: "key"
-    type: DT_INT64
+    name: "data"
+    type_attr: "T"
+    is_ref: true
   }
-  input_arg {
-    name: "indices"
-    type: DT_INT32
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
   }
+}
+op {
+  name: "RefIdentity"
   input_arg {
-    name: "values"
-    type_list_attr: "fake_dtypes"
+    name: "input"
+    type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "T"
+    type: "type"
+  }
+  allows_uninitialized_input: true
+}
+op {
+  name: "RefMerge"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "value_index"
+    type: DT_INT32
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "fake_dtypes"
-    type: "list(type)"
+    name: "N"
+    type: "int"
     has_minimum: true
     minimum: 1
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+}
+op {
+  name: "RefNextIteration"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "T"
+    type: "type"
   }
-  is_stateful: true
 }
 op {
-  name: "OrderedMapUnstage"
+  name: "RefSelect"
   input_arg {
-    name: "key"
-    type: DT_INT64
+    name: "index"
+    type: DT_INT32
   }
   input_arg {
-    name: "indices"
-    type: DT_INT32
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+    is_ref: true
   }
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
+    name: "output"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "memory_limit"
+    name: "N"
     type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
     has_minimum: true
     minimum: 1
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "OrderedMapUnstageNoKey"
+  name: "RefSwitch"
   input_arg {
-    name: "indices"
-    type: DT_INT32
+    name: "data"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "pred"
+    type: DT_BOOL
   }
   output_arg {
-    name: "key"
-    type: DT_INT64
+    name: "output_false"
+    type_attr: "T"
+    is_ref: true
   }
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
+    name: "output_true"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "T"
+    type: "type"
   }
-  attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+  allows_uninitialized_input: true
+}
+op {
+  name: "Relu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
   }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  output_arg {
+    name: "activations"
+    type_attr: "T"
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
     }
   }
+}
+op {
+  name: "Relu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Pack"
+  name: "Relu"
   input_arg {
-    name: "values"
+    name: "features"
     type_attr: "T"
-    number_attr: "N"
   }
   output_arg {
-    name: "output"
+    name: "activations"
     type_attr: "T"
   }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "axis"
-    type: "int"
-    default_value {
-      i: 0
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
     }
   }
 }
 op {
-  name: "Pad"
+  name: "Relu6"
   input_arg {
-    name: "input"
+    name: "features"
     type_attr: "T"
   }
-  input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
-  }
   output_arg {
-    name: "output"
+    name: "activations"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tpaddings"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "PadV2"
+  name: "Relu6"
   input_arg {
-    name: "input"
+    name: "features"
     type_attr: "T"
   }
-  input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
+}
+op {
+  name: "Relu6"
   input_arg {
-    name: "constant_values"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "activations"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tpaddings"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "PaddedBatchDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "batch_size"
-    type: DT_INT64
-  }
+  name: "Relu6Grad"
   input_arg {
-    name: "padded_shapes"
-    type: DT_INT64
-    number_attr: "N"
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "padding_values"
-    type_list_attr: "Toutput_types"
+    name: "features"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "Toutput_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "PaddedBatchDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "batch_size"
-    type: DT_INT64
-  }
+  name: "Relu6Grad"
   input_arg {
-    name: "padded_shapes"
-    type: DT_INT64
-    number_attr: "N"
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "padding_values"
-    type_list_attr: "Toutput_types"
+    name: "features"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "Toutput_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
 }
 op {
-  name: "PaddingFIFOQueue"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+  name: "Relu6Grad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
-    has_minimum: true
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
   }
-  is_stateful: true
 }
 op {
-  name: "PaddingFIFOQueueV2"
-  output_arg {
-    name: "handle"
-    type: DT_RESOURCE
+  name: "ReluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
-    has_minimum: true
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
   }
-  is_stateful: true
 }
 op {
-  name: "ParallelConcat"
+  name: "ReluGrad"
   input_arg {
-    name: "values"
+    name: "gradients"
     type_attr: "T"
-    number_attr: "N"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "features"
     type_attr: "T"
   }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "shape"
-    type: "shape"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
 }
 op {
-  name: "ParallelDynamicStitch"
+  name: "ReluGrad"
   input_arg {
-    name: "indices"
-    type: DT_INT32
-    number_attr: "N"
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "data"
+    name: "features"
     type_attr: "T"
-    number_attr: "N"
   }
   output_arg {
-    name: "merged"
+    name: "backprops"
     type_attr: "T"
   }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
 }
 op {
-  name: "ParallelInterleaveDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "RemoteCall"
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "target"
+    type: DT_STRING
   }
   input_arg {
-    name: "cycle_length"
-    type: DT_INT64
+    name: "args"
+    type_list_attr: "Tin"
   }
-  input_arg {
-    name: "block_length"
-    type: DT_INT64
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
   }
-  input_arg {
-    name: "sloppy"
-    type: DT_BOOL
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
     name: "f"
     type: "func"
   }
+}
+op {
+  name: "RemoteFusedGraphExecute"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "Tinputs"
+  }
+  output_arg {
+    name: "outputs"
+    type_list_attr: "Toutputs"
+  }
   attr {
-    name: "Targuments"
+    name: "Tinputs"
     type: "list(type)"
     has_minimum: true
   }
   attr {
-    name: "output_types"
+    name: "Toutputs"
     type: "list(type)"
     has_minimum: true
-    minimum: 1
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "serialized_remote_fused_graph_execute_info"
+    type: "string"
   }
 }
 op {
-  name: "ParallelMapDataset"
+  name: "RepeatDataset"
   input_arg {
     name: "input_dataset"
     type: DT_VARIANT
   }
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
-  input_arg {
-    name: "num_parallel_calls"
-    type: DT_INT32
+    name: "count"
+    type: DT_INT64
   }
   output_arg {
     name: "handle"
     type: DT_VARIANT
   }
-  attr {
-    name: "f"
-    type: "func"
-  }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
-  }
   attr {
     name: "output_types"
     type: "list(type)"
@@ -21187,32 +32901,19 @@ op {
   is_stateful: true
 }
 op {
-  name: "ParallelMapDataset"
+  name: "RepeatDataset"
   input_arg {
     name: "input_dataset"
     type: DT_VARIANT
   }
-  input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
-  input_arg {
-    name: "num_parallel_calls"
-    type: DT_INT32
+  input_arg {
+    name: "count"
+    type: DT_INT64
   }
   output_arg {
     name: "handle"
     type: DT_VARIANT
   }
-  attr {
-    name: "f"
-    type: "func"
-  }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
-  }
   attr {
     name: "output_types"
     type: "list(type)"
@@ -21227,457 +32928,409 @@ op {
   }
 }
 op {
-  name: "ParameterizedTruncatedNormal"
-  input_arg {
-    name: "shape"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "means"
-    type_attr: "dtype"
-  }
+  name: "RequantizationRange"
   input_arg {
-    name: "stdevs"
-    type_attr: "dtype"
+    name: "input"
+    type_attr: "Tinput"
   }
   input_arg {
-    name: "minvals"
-    type_attr: "dtype"
+    name: "input_min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "maxvals"
-    type_attr: "dtype"
+    name: "input_max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "output_min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "dtype"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
+    name: "Tinput"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ParseExample"
+  name: "Requantize"
   input_arg {
-    name: "serialized"
-    type: DT_STRING
+    name: "input"
+    type_attr: "Tinput"
   }
   input_arg {
-    name: "names"
-    type: DT_STRING
+    name: "input_min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "sparse_keys"
-    type: DT_STRING
-    number_attr: "Nsparse"
+    name: "input_max"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "dense_keys"
-    type: DT_STRING
-    number_attr: "Ndense"
+    name: "requested_output_min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "dense_defaults"
-    type_list_attr: "Tdense"
-  }
-  output_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-    number_attr: "Nsparse"
+    name: "requested_output_max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "sparse_values"
-    type_list_attr: "sparse_types"
+    name: "output"
+    type_attr: "out_type"
   }
   output_arg {
-    name: "sparse_shapes"
-    type: DT_INT64
-    number_attr: "Nsparse"
+    name: "output_min"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "dense_values"
-    type_list_attr: "Tdense"
-  }
-  attr {
-    name: "Nsparse"
-    type: "int"
-    has_minimum: true
-  }
-  attr {
-    name: "Ndense"
-    type: "int"
-    has_minimum: true
+    name: "output_max"
+    type: DT_FLOAT
   }
   attr {
-    name: "sparse_types"
-    type: "list(type)"
-    has_minimum: true
+    name: "Tinput"
+    type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_INT64
-        type: DT_STRING
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
       }
     }
   }
   attr {
-    name: "Tdense"
-    type: "list(type)"
-    has_minimum: true
+    name: "out_type"
+    type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_INT64
-        type: DT_STRING
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
       }
     }
   }
-  attr {
-    name: "dense_shapes"
-    type: "list(shape)"
-    has_minimum: true
-  }
 }
 op {
-  name: "ParseSingleSequenceExample"
-  input_arg {
-    name: "serialized"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "feature_list_dense_missing_assumed_empty"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "context_sparse_keys"
-    type: DT_STRING
-    number_attr: "Ncontext_sparse"
-  }
-  input_arg {
-    name: "context_dense_keys"
-    type: DT_STRING
-    number_attr: "Ncontext_dense"
-  }
-  input_arg {
-    name: "feature_list_sparse_keys"
-    type: DT_STRING
-    number_attr: "Nfeature_list_sparse"
-  }
-  input_arg {
-    name: "feature_list_dense_keys"
-    type: DT_STRING
-    number_attr: "Nfeature_list_dense"
-  }
+  name: "Reshape"
   input_arg {
-    name: "context_dense_defaults"
-    type_list_attr: "Tcontext_dense"
+    name: "tensor"
+    type_attr: "T"
   }
   input_arg {
-    name: "debug_name"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "context_sparse_indices"
-    type: DT_INT64
-    number_attr: "Ncontext_sparse"
-  }
-  output_arg {
-    name: "context_sparse_values"
-    type_list_attr: "context_sparse_types"
-  }
-  output_arg {
-    name: "context_sparse_shapes"
-    type: DT_INT64
-    number_attr: "Ncontext_sparse"
-  }
-  output_arg {
-    name: "context_dense_values"
-    type_list_attr: "Tcontext_dense"
-  }
-  output_arg {
-    name: "feature_list_sparse_indices"
-    type: DT_INT64
-    number_attr: "Nfeature_list_sparse"
-  }
-  output_arg {
-    name: "feature_list_sparse_values"
-    type_list_attr: "feature_list_sparse_types"
-  }
-  output_arg {
-    name: "feature_list_sparse_shapes"
-    type: DT_INT64
-    number_attr: "Nfeature_list_sparse"
+    name: "shape"
+    type_attr: "Tshape"
   }
   output_arg {
-    name: "feature_list_dense_values"
-    type_list_attr: "feature_list_dense_types"
-  }
-  attr {
-    name: "Ncontext_sparse"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "Ncontext_dense"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "Nfeature_list_sparse"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "Nfeature_list_dense"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "context_sparse_types"
-    type: "list(type)"
+    name: "Tshape"
+    type: "type"
     default_value {
-      list {
-      }
+      type: DT_INT32
     }
-    has_minimum: true
     allowed_values {
       list {
-        type: DT_FLOAT
+        type: DT_INT32
         type: DT_INT64
-        type: DT_STRING
       }
     }
   }
+}
+op {
+  name: "ResizeArea"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
+  }
   attr {
-    name: "Tcontext_dense"
-    type: "list(type)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
         type: DT_INT64
-        type: DT_STRING
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "feature_list_dense_types"
-    type: "list(type)"
+    name: "align_corners"
+    type: "bool"
     default_value {
-      list {
-      }
+      b: false
     }
-    has_minimum: true
+  }
+}
+op {
+  name: "ResizeArea"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
         type: DT_INT64
-        type: DT_STRING
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "context_dense_shapes"
-    type: "list(shape)"
+    name: "align_corners"
+    type: "bool"
     default_value {
-      list {
-      }
+      b: false
     }
-    has_minimum: true
+  }
+}
+op {
+  name: "ResizeBicubic"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
   }
   attr {
-    name: "feature_list_sparse_types"
-    type: "list(type)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
         type: DT_INT64
-        type: DT_STRING
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "feature_list_dense_shapes"
-    type: "list(shape)"
+    name: "align_corners"
+    type: "bool"
     default_value {
-      list {
-      }
+      b: false
     }
-    has_minimum: true
   }
 }
 op {
-  name: "ParseTensor"
+  name: "ResizeBicubic"
   input_arg {
-    name: "serialized"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
+    name: "images"
+    type_attr: "T"
   }
-  attr {
-    name: "out_type"
-    type: "type"
+  input_arg {
+    name: "size"
+    type: DT_INT32
   }
-}
-op {
-  name: "Placeholder"
   output_arg {
-    name: "output"
-    type_attr: "dtype"
+    name: "resized_images"
+    type: DT_FLOAT
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
-    name: "shape"
-    type: "shape"
+    name: "align_corners"
+    type: "bool"
     default_value {
-      shape {
-      }
+      b: false
     }
   }
 }
 op {
-  name: "Placeholder"
+  name: "ResizeBicubicGrad"
+  input_arg {
+    name: "grads"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "original_image"
+    type_attr: "T"
+  }
   output_arg {
     name: "output"
-    type_attr: "dtype"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
-    name: "shape"
-    type: "shape"
+    name: "align_corners"
+    type: "bool"
     default_value {
-      shape {
-        unknown_rank: true
-      }
+      b: false
     }
   }
 }
 op {
-  name: "PlaceholderV2"
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "dtype"
-    type: "type"
+  name: "ResizeBilinear"
+  input_arg {
+    name: "images"
+    type_attr: "T"
   }
-  attr {
-    name: "shape"
-    type: "shape"
+  input_arg {
+    name: "size"
+    type: DT_INT32
   }
-}
-op {
-  name: "PlaceholderV2"
   output_arg {
-    name: "output"
-    type_attr: "dtype"
+    name: "resized_images"
+    type: DT_FLOAT
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
-    name: "shape"
-    type: "shape"
-  }
-  deprecation {
-    version: 23
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "PlaceholderWithDefault"
+  name: "ResizeBilinear"
   input_arg {
-    name: "input"
-    type_attr: "dtype"
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
   }
   output_arg {
-    name: "output"
-    type_attr: "dtype"
+    name: "resized_images"
+    type: DT_FLOAT
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
-    name: "shape"
-    type: "shape"
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "Polygamma"
+  name: "ResizeBilinearGrad"
   input_arg {
-    name: "a"
-    type_attr: "T"
+    name: "grads"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "x"
+    name: "original_image"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -21686,45 +33339,70 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_HALF
         type: DT_DOUBLE
       }
     }
   }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "PopulationCount"
+  name: "ResizeNearestNeighbor"
   input_arg {
-    name: "x"
+    name: "images"
     type_attr: "T"
   }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
   output_arg {
-    name: "y"
-    type: DT_UINT8
+    name: "resized_images"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_UINT8
         type: DT_INT8
         type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "PopulationCount"
+  name: "ResizeNearestNeighbor"
   input_arg {
-    name: "x"
+    name: "images"
     type_attr: "T"
   }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
   output_arg {
-    name: "y"
-    type: DT_UINT8
+    name: "resized_images"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -21732,29 +33410,37 @@ op {
     allowed_values {
       list {
         type: DT_INT8
+        type: DT_UINT8
         type: DT_INT16
+        type: DT_UINT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "Pow"
+  name: "ResizeNearestNeighborGrad"
   input_arg {
-    name: "x"
+    name: "grads"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "size"
+    type: DT_INT32
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -21762,294 +33448,328 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT32
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
-}
-op {
-  name: "PrefetchDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "buffer_size"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "PrefetchDataset"
+  name: "ResourceApplyAdadelta"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "buffer_size"
-    type: DT_INT64
+    name: "accum"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-}
-op {
-  name: "PreventGradient"
   input_arg {
-    name: "input"
+    name: "epsilon"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
   }
   attr {
-    name: "message"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "Print"
+  name: "ResourceApplyAdadelta"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "data"
-    type_list_attr: "U"
+    name: "accum"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "U"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  attr {
-    name: "message"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "first_n"
-    type: "int"
-    default_value {
-      i: -1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
   attr {
-    name: "summarize"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: 3
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "Print"
+  name: "ResourceApplyAdadelta"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "data"
-    type_list_attr: "U"
+    name: "accum"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "U"
-    type: "list(type)"
-    has_minimum: true
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  attr {
-    name: "message"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "first_n"
-    type: "int"
-    default_value {
-      i: -1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
     }
   }
   attr {
-    name: "summarize"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: 3
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "PriorityQueue"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+  name: "ResourceApplyAdagrad"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "shapes"
-    type: "list(shape)"
-    has_minimum: true
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "PriorityQueueV2"
-  output_arg {
-    name: "handle"
+  name: "ResourceApplyAdagrad"
+  input_arg {
+    name: "var"
     type: DT_RESOURCE
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "shapes"
-    type: "list(shape)"
-    has_minimum: true
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "Prod"
+  name: "ResourceApplyAdagrad"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+    name: "accum"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -22070,43 +33790,54 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "Prod"
+  name: "ResourceApplyAdagradDA"
   input_arg {
-    name: "input"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
   }
   attr {
     name: "T"
@@ -22127,210 +33858,118 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
-}
-op {
-  name: "PyFunc"
-  input_arg {
-    name: "input"
-    type_list_attr: "Tin"
-  }
-  output_arg {
-    name: "output"
-    type_list_attr: "Tout"
-  }
-  attr {
-    name: "token"
-    type: "string"
-  }
-  attr {
-    name: "Tin"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "Tout"
-    type: "list(type)"
-    has_minimum: true
-  }
   is_stateful: true
 }
 op {
-  name: "PyFuncStateless"
+  name: "ResourceApplyAdagradDA"
   input_arg {
-    name: "input"
-    type_list_attr: "Tin"
-  }
-  output_arg {
-    name: "output"
-    type_list_attr: "Tout"
+    name: "var"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "token"
-    type: "string"
+  input_arg {
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "Tin"
-    type: "list(type)"
-    has_minimum: true
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "Tout"
-    type: "list(type)"
-    has_minimum: true
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-}
-op {
-  name: "Qr"
   input_arg {
-    name: "input"
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "q"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  output_arg {
-    name: "r"
+  input_arg {
+    name: "l2"
     type_attr: "T"
   }
-  attr {
-    name: "full_matrices"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "QuantizeAndDequantize"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "signed_input"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
-    }
-  }
   attr {
-    name: "range_given"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "input_min"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdagradDA"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "input_max"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  input_arg {
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
   }
-}
-op {
-  name: "QuantizeAndDequantize"
   input_arg {
-    name: "input"
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  attr {
-    name: "signed_input"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
-    }
-  }
-  attr {
-    name: "range_given"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  attr {
-    name: "input_min"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "input_max"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
   }
   attr {
     name: "T"
@@ -22339,157 +33978,74 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  deprecation {
-    version: 21
-  }
-}
-op {
-  name: "QuantizeAndDequantize"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "signed_input"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
-    }
-  }
   attr {
-    name: "range_given"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "input_min"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "input_max"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
-  }
-  deprecation {
-    version: 22
-  }
+  is_stateful: true
 }
 op {
-  name: "QuantizeAndDequantizeV2"
+  name: "ResourceApplyAdam"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_min"
-    type_attr: "T"
+    name: "m"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_max"
-    type_attr: "T"
+    name: "v"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "beta1_power"
     type_attr: "T"
   }
-  attr {
-    name: "signed_input"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
-    }
-  }
-  attr {
-    name: "range_given"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
-  }
-}
-op {
-  name: "QuantizeAndDequantizeV3"
   input_arg {
-    name: "input"
+    name: "beta2_power"
     type_attr: "T"
   }
   input_arg {
-    name: "input_min"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "input_max"
+    name: "beta1"
     type_attr: "T"
   }
   input_arg {
-    name: "num_bits"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
+    name: "beta2"
     type_attr: "T"
   }
-  attr {
-    name: "signed_input"
-    type: "bool"
-    default_value {
-      b: true
-    }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  attr {
-    name: "range_given"
-    type: "bool"
-    default_value {
-      b: true
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -22498,1723 +34054,1727 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizeDownAndShrinkRange"
+  name: "ResourceApplyAdam"
   input_arg {
-    name: "input"
-    type_attr: "Tinput"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_min"
-    type: DT_FLOAT
+    name: "m"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
-  }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "Tinput"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+    name: "v"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "out_type"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+  input_arg {
+    name: "beta1_power"
+    type_attr: "T"
   }
-}
-op {
-  name: "QuantizeV2"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "beta2_power"
+    type_attr: "T"
   }
   input_arg {
-    name: "min_range"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_range"
-    type: DT_FLOAT
+    name: "beta1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "beta2"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "mode"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: "MIN_COMBINED"
+      b: false
     }
-    allowed_values {
-      list {
-        s: "MIN_COMBINED"
-        s: "MIN_FIRST"
-      }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizeV2"
+  name: "ResourceApplyAdam"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_range"
-    type: DT_FLOAT
+    name: "m"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_range"
-    type: DT_FLOAT
+    name: "v"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "beta1_power"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "beta2_power"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "mode"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: "MIN_COMBINED"
+      b: false
     }
-    allowed_values {
-      list {
-        s: "MIN_COMBINED"
-        s: "MIN_FIRST"
-        s: "SCALED"
-      }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizeV2"
+  name: "ResourceApplyAdam"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_range"
-    type: DT_FLOAT
+    name: "m"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_range"
-    type: DT_FLOAT
+    name: "v"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "beta1_power"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "beta2_power"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "mode"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: "MIN_COMBINED"
-    }
-    allowed_values {
-      list {
-        s: "MIN_COMBINED"
-        s: "MIN_FIRST"
-        s: "SCALED"
-      }
+      b: false
     }
   }
   attr {
-    name: "round_mode"
-    type: "string"
+    name: "use_nesterov"
+    type: "bool"
     default_value {
-      s: "HALF_AWAY_FROM_ZERO"
-    }
-    allowed_values {
-      list {
-        s: "HALF_AWAY_FROM_ZERO"
-        s: "HALF_TO_EVEN"
-      }
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedAdd"
+  name: "ResourceApplyAddSign"
   input_arg {
-    name: "x"
-    type_attr: "T1"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "y"
-    type_attr: "T2"
+    name: "m"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_x"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_x"
-    type: DT_FLOAT
+    name: "alpha"
+    type_attr: "T"
   }
   input_arg {
-    name: "min_y"
-    type: DT_FLOAT
+    name: "sign_decay"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_y"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "z"
-    type_attr: "Toutput"
-  }
-  output_arg {
-    name: "min_z"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "max_z"
-    type: DT_FLOAT
+    name: "beta"
+    type_attr: "T"
   }
-  attr {
-    name: "T1"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "T2"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Toutput"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_QINT32
-    }
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
+      b: false
     }
   }
-  is_commutative: true
+  is_stateful: true
 }
 op {
-  name: "QuantizedAvgPool"
+  name: "ResourceApplyAddSign"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_input"
-    type: DT_FLOAT
+    name: "m"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_input"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "alpha"
     type_attr: "T"
   }
-  output_arg {
-    name: "min_output"
-    type: DT_FLOAT
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_output"
-    type: DT_FLOAT
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedBatchNormWithGlobalNormalization"
+  name: "ResourceApplyCenteredRMSProp"
   input_arg {
-    name: "t"
-    type_attr: "Tinput"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "t_min"
-    type: DT_FLOAT
+    name: "mg"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "t_max"
-    type: DT_FLOAT
+    name: "ms"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "m"
-    type_attr: "Tinput"
+    name: "mom"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "m_min"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "m_max"
-    type: DT_FLOAT
+    name: "rho"
+    type_attr: "T"
   }
   input_arg {
-    name: "v"
-    type_attr: "Tinput"
+    name: "momentum"
+    type_attr: "T"
   }
   input_arg {
-    name: "v_min"
-    type: DT_FLOAT
+    name: "epsilon"
+    type_attr: "T"
   }
   input_arg {
-    name: "v_max"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
-  input_arg {
-    name: "beta"
-    type_attr: "Tinput"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyCenteredRMSProp"
   input_arg {
-    name: "beta_min"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "beta_max"
-    type: DT_FLOAT
+    name: "mg"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "gamma"
-    type_attr: "Tinput"
+    name: "ms"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "gamma_min"
-    type: DT_FLOAT
+    name: "mom"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "gamma_max"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "result"
-    type_attr: "out_type"
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  output_arg {
-    name: "result_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-  output_arg {
-    name: "result_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  attr {
-    name: "Tinput"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "out_type"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "variance_epsilon"
-    type: "float"
-  }
-  attr {
-    name: "scale_after_normalization"
+    name: "use_locking"
     type: "bool"
+    default_value {
+      b: false
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedBiasAdd"
+  name: "ResourceApplyCenteredRMSProp"
   input_arg {
-    name: "input"
-    type_attr: "T1"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "bias"
-    type_attr: "T2"
+    name: "mg"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_input"
-    type: DT_FLOAT
+    name: "ms"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_input"
-    type: DT_FLOAT
+    name: "mom"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_bias"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_bias"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
+    name: "rho"
+    type_attr: "T"
   }
-  output_arg {
-    name: "min_out"
-    type: DT_FLOAT
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_out"
-    type: DT_FLOAT
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  attr {
-    name: "T1"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "T2"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "out_type"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedConcat"
+  name: "ResourceApplyFtrl"
   input_arg {
-    name: "concat_dim"
-    type: DT_INT32
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "values"
-    type_attr: "T"
-    number_attr: "N"
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_mins"
-    type: DT_FLOAT
-    number_attr: "N"
+    name: "linear"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_maxes"
-    type: DT_FLOAT
-    number_attr: "N"
+    name: "grad"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 2
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedConv2D"
+  name: "ResourceApplyFtrl"
   input_arg {
-    name: "input"
-    type_attr: "Tinput"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "filter"
-    type_attr: "Tfilter"
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_input"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_input"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "min_filter"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_filter"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
-  }
-  output_arg {
-    name: "min_output"
-    type: DT_FLOAT
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_output"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "Tinput"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
-    name: "Tfilter"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "out_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_QINT32
-    }
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedInstanceNorm"
+  name: "ResourceApplyFtrl"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "x_min"
-    type: DT_FLOAT
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "x_max"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "y"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "y_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "y_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "output_range_given"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "given_y_min"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "given_y_max"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "variance_epsilon"
-    type: "float"
-    default_value {
-      f: 1e-05
-    }
-  }
-  attr {
-    name: "min_separation"
-    type: "float"
-    default_value {
-      f: 0.001
-    }
-  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedMatMul"
+  name: "ResourceApplyFtrlV2"
   input_arg {
-    name: "a"
-    type_attr: "T1"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "b"
-    type_attr: "T2"
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_a"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_a"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "min_b"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_b"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "out"
-    type_attr: "Toutput"
-  }
-  output_arg {
-    name: "min_out"
-    type: DT_FLOAT
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_out"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "T1"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
   }
-  attr {
-    name: "T2"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
-    name: "Toutput"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_QINT32
-    }
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "transpose_a"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "transpose_b"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "Tactivation"
-    type: "type"
-    default_value {
-      type: DT_QUINT8
-    }
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
-  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedMaxPool"
+  name: "ResourceApplyFtrlV2"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_input"
-    type: DT_FLOAT
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_input"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
     type_attr: "T"
   }
-  output_arg {
-    name: "min_output"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_output"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedMul"
+  name: "ResourceApplyFtrlV2"
   input_arg {
-    name: "x"
-    type_attr: "T1"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "y"
-    type_attr: "T2"
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_x"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_x"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "min_y"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_y"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "z"
-    type_attr: "Toutput"
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "min_z"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_z"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
   }
-  attr {
-    name: "T1"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
-    name: "T2"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Toutput"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_QINT32
-    }
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
+      b: false
     }
   }
-  is_commutative: true
+  is_stateful: true
 }
 op {
-  name: "QuantizedRelu"
+  name: "ResourceApplyGradientDescent"
   input_arg {
-    name: "features"
-    type_attr: "Tinput"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_features"
-    type: DT_FLOAT
+    name: "alpha"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_features"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "activations"
-    type_attr: "out_type"
-  }
-  output_arg {
-    name: "min_activations"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "max_activations"
-    type: DT_FLOAT
+    name: "delta"
+    type_attr: "T"
   }
   attr {
-    name: "Tinput"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "out_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_QUINT8
-    }
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedRelu6"
+  name: "ResourceApplyGradientDescent"
   input_arg {
-    name: "features"
-    type_attr: "Tinput"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_features"
-    type: DT_FLOAT
+    name: "alpha"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_features"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "activations"
-    type_attr: "out_type"
-  }
-  output_arg {
-    name: "min_activations"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "max_activations"
-    type: DT_FLOAT
+    name: "delta"
+    type_attr: "T"
   }
   attr {
-    name: "Tinput"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "out_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_QUINT8
-    }
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedReluX"
-  input_arg {
-    name: "features"
-    type_attr: "Tinput"
-  }
+  name: "ResourceApplyGradientDescent"
   input_arg {
-    name: "max_value"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_features"
-    type: DT_FLOAT
+    name: "alpha"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_features"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "activations"
-    type_attr: "out_type"
-  }
-  output_arg {
-    name: "min_activations"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "max_activations"
-    type: DT_FLOAT
+    name: "delta"
+    type_attr: "T"
   }
   attr {
-    name: "Tinput"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "out_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_QUINT8
-    }
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedReshape"
+  name: "ResourceApplyMomentum"
   input_arg {
-    name: "tensor"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "shape"
-    type_attr: "Tshape"
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_min"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "input_max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output"
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tshape"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedResizeBilinear"
+  name: "ResourceApplyMomentum"
   input_arg {
-    name: "images"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "resized_images"
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "out_min"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "out_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
-        type: DT_FLOAT
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-}
-op {
-  name: "QueueClose"
-  input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
   attr {
-    name: "cancel_pending_enqueues"
+    name: "use_nesterov"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QueueCloseV2"
+  name: "ResourceApplyMomentum"
   input_arg {
-    name: "handle"
+    name: "var"
     type: DT_RESOURCE
   }
-  attr {
-    name: "cancel_pending_enqueues"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
-}
-op {
-  name: "QueueDequeue"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
-  }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "accum"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-}
-op {
-  name: "QueueDequeueMany"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "n"
-    type: DT_INT32
+    name: "momentum"
+    type_attr: "T"
   }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
+    name: "use_nesterov"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QueueDequeueManyV2"
+  name: "ResourceApplyPowerSign"
   input_arg {
-    name: "handle"
+    name: "var"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "n"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
+    name: "m"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "QueueDequeueUpTo"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "sign_decay"
+    type_attr: "T"
   }
   input_arg {
-    name: "n"
-    type: DT_INT32
+    name: "beta"
+    type_attr: "T"
   }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QueueDequeueUpToV2"
+  name: "ResourceApplyPowerSign"
   input_arg {
-    name: "handle"
+    name: "var"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "n"
-    type: DT_INT32
+    name: "m"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
   }
-  attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "QueueDequeueV2"
   input_arg {
-    name: "handle"
-    type: DT_RESOURCE
+    name: "beta"
+    type_attr: "T"
   }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "QueueEnqueue"
+  name: "ResourceApplyProximalAdagrad"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "components"
-    type_list_attr: "Tcomponents"
+    name: "accum"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "Tcomponents"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-}
-op {
-  name: "QueueEnqueueMany"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "l2"
+    type_attr: "T"
   }
   input_arg {
-    name: "components"
-    type_list_attr: "Tcomponents"
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "Tcomponents"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QueueEnqueueManyV2"
+  name: "ResourceApplyProximalAdagrad"
   input_arg {
-    name: "handle"
+    name: "var"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "components"
-    type_list_attr: "Tcomponents"
+    name: "accum"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "Tcomponents"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "QueueEnqueueV2"
   input_arg {
-    name: "handle"
-    type: DT_RESOURCE
+    name: "l2"
+    type_attr: "T"
   }
   input_arg {
-    name: "components"
-    type_list_attr: "Tcomponents"
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "Tcomponents"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "QueueIsClosed"
-  input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "is_closed"
-    type: DT_BOOL
-  }
-}
-op {
-  name: "QueueIsClosedV2"
+  name: "ResourceApplyProximalAdagrad"
   input_arg {
-    name: "handle"
+    name: "var"
     type: DT_RESOURCE
   }
-  output_arg {
-    name: "is_closed"
-    type: DT_BOOL
-  }
-  is_stateful: true
-}
-op {
-  name: "QueueSize"
-  input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "size"
-    type: DT_INT32
-  }
-}
-op {
-  name: "QueueSizeV2"
   input_arg {
-    name: "handle"
+    name: "accum"
     type: DT_RESOURCE
   }
-  output_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  is_stateful: true
-}
-op {
-  name: "RFFT"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+    name: "l1"
+    type_attr: "T"
   }
-}
-op {
-  name: "RFFT2D"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "l2"
+    type_attr: "T"
   }
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
+    name: "grad"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "RFFT3D"
+  name: "ResourceApplyProximalGradientDescent"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
+    name: "alpha"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-}
-op {
-  name: "RGBToHSV"
   input_arg {
-    name: "images"
+    name: "l2"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "delta"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "RandomCrop"
+  name: "ResourceApplyProximalGradientDescent"
   input_arg {
-    name: "image"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type: DT_INT64
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
     type_attr: "T"
   }
   attr {
@@ -24222,461 +35782,391 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: 0
+      b: false
     }
   }
-  deprecation {
-    version: 8
-  }
   is_stateful: true
 }
 op {
-  name: "RandomGamma"
+  name: "ResourceApplyProximalGradientDescent"
   input_arg {
-    name: "shape"
-    type_attr: "S"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
     name: "alpha"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
   }
   attr {
-    name: "S"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "RandomPoisson"
+  name: "ResourceApplyRMSProp"
   input_arg {
-    name: "shape"
-    type_attr: "S"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "rate"
-    type_attr: "dtype"
+    name: "ms"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "S"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "dtype"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "RandomPoisson"
+  name: "ResourceApplyRMSProp"
   input_arg {
-    name: "shape"
-    type_attr: "S"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "rate"
-    type_attr: "dtype"
+    name: "ms"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "S"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  deprecation {
-    version: 25
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   is_stateful: true
 }
 op {
-  name: "RandomPoissonV2"
+  name: "ResourceApplyRMSProp"
   input_arg {
-    name: "shape"
-    type_attr: "S"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "rate"
-    type_attr: "R"
+    name: "ms"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "S"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
-    name: "R"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_DOUBLE
-    }
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "dtype"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "RandomShuffle"
+  name: "ResourceCountUpTo"
   input_arg {
-    name: "value"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
+    name: "limit"
     type: "int"
-    default_value {
-      i: 0
-    }
   }
   attr {
     name: "T"
     type: "type"
-  }
-  is_stateful: true
-}
-op {
-  name: "RandomShuffleQueue"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
+    allowed_values {
       list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
-    has_minimum: true
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "min_after_dequeue"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
   }
   is_stateful: true
 }
 op {
-  name: "RandomShuffleQueueV2"
-  output_arg {
-    name: "handle"
+  name: "ResourceGather"
+  input_arg {
+    name: "resource"
     type: DT_RESOURCE
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "min_after_dequeue"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
   }
   attr {
-    name: "seed2"
-    type: "int"
+    name: "validate_indices"
+    type: "bool"
     default_value {
-      i: 0
+      b: true
     }
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "dtype"
+    type: "type"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   is_stateful: true
 }
 op {
-  name: "RandomStandardNormal"
+  name: "ResourceScatterAdd"
   input_arg {
-    name: "shape"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+    name: "resource"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "updates"
+    type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
@@ -24688,42 +36178,45 @@ op {
   is_stateful: true
 }
 op {
-  name: "RandomUniform"
+  name: "ResourceScatterAdd"
   input_arg {
-    name: "shape"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+    name: "resource"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "updates"
+    type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
@@ -24735,49 +36228,46 @@ op {
   is_stateful: true
 }
 op {
-  name: "RandomUniformInt"
+  name: "ResourceScatterAdd"
   input_arg {
-    name: "shape"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "minval"
-    type_attr: "Tout"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "maxval"
-    type_attr: "Tout"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "Tout"
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "updates"
+    type_attr: "dtype"
   }
   attr {
-    name: "Tout"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
@@ -24789,723 +36279,604 @@ op {
   is_stateful: true
 }
 op {
-  name: "Range"
+  name: "ResourceScatterNdUpdate"
   input_arg {
-    name: "start"
-    type_attr: "Tidx"
+    name: "ref"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "limit"
-    type_attr: "Tidx"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "delta"
-    type_attr: "Tidx"
+    name: "updates"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type_attr: "Tidx"
+  attr {
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "Tidx"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
       }
     }
   }
-}
-op {
-  name: "RangeDataset"
-  input_arg {
-    name: "start"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "stop"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "step"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   is_stateful: true
 }
 op {
-  name: "Rank"
+  name: "ResourceScatterUpdate"
   input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type: DT_INT32
-  }
-  attr {
-    name: "T"
-    type: "type"
+    name: "resource"
+    type: DT_RESOURCE
   }
-}
-op {
-  name: "ReadFile"
   input_arg {
-    name: "filename"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "contents"
-    type: DT_STRING
+    name: "indices"
+    type_attr: "Tindices"
   }
-}
-op {
-  name: "ReadVariableOp"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "value"
+    name: "updates"
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
-  is_stateful: true
-}
-op {
-  name: "ReaderNumRecordsProduced"
-  input_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "records_produced"
-    type: DT_INT64
-  }
-}
-op {
-  name: "ReaderNumRecordsProducedV2"
-  input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "records_produced"
-    type: DT_INT64
-  }
-  is_stateful: true
-}
-op {
-  name: "ReaderNumWorkUnitsCompleted"
-  input_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "units_completed"
-    type: DT_INT64
-  }
-}
-op {
-  name: "ReaderNumWorkUnitsCompletedV2"
-  input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "units_completed"
-    type: DT_INT64
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   is_stateful: true
 }
 op {
-  name: "ReaderRead"
-  input_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  input_arg {
-    name: "queue_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "key"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "value"
-    type: DT_STRING
-  }
-}
-op {
-  name: "ReaderReadUpTo"
-  input_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  input_arg {
-    name: "queue_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  input_arg {
-    name: "num_records"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "keys"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "values"
-    type: DT_STRING
-  }
-}
-op {
-  name: "ReaderReadUpToV2"
+  name: "ResourceScatterUpdate"
   input_arg {
-    name: "reader_handle"
+    name: "resource"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "queue_handle"
-    type: DT_RESOURCE
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "num_records"
-    type: DT_INT64
+    name: "updates"
+    type_attr: "dtype"
   }
-  output_arg {
-    name: "keys"
-    type: DT_STRING
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
-  output_arg {
-    name: "values"
-    type: DT_STRING
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   is_stateful: true
 }
 op {
-  name: "ReaderReadV2"
-  input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
-  }
+  name: "ResourceSparseApplyAdadelta"
   input_arg {
-    name: "queue_handle"
+    name: "var"
     type: DT_RESOURCE
   }
-  output_arg {
-    name: "key"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "value"
-    type: DT_STRING
-  }
-  is_stateful: true
-}
-op {
-  name: "ReaderReset"
-  input_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-}
-op {
-  name: "ReaderResetV2"
   input_arg {
-    name: "reader_handle"
+    name: "accum"
     type: DT_RESOURCE
   }
-  is_stateful: true
-}
-op {
-  name: "ReaderRestoreState"
-  input_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  input_arg {
-    name: "state"
-    type: DT_STRING
-  }
-}
-op {
-  name: "ReaderRestoreStateV2"
   input_arg {
-    name: "reader_handle"
+    name: "accum_update"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "state"
-    type: DT_STRING
+    name: "lr"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "ReaderSerializeState"
   input_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "state"
-    type: DT_STRING
+    name: "rho"
+    type_attr: "T"
   }
-}
-op {
-  name: "ReaderSerializeStateV2"
   input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "state"
-    type: DT_STRING
+    name: "epsilon"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "Real"
   input_arg {
-    name: "input"
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type_attr: "Tout"
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_COMPLEX64
-    }
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "Tout"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "RealDiv"
+  name: "ResourceSparseApplyAdadelta"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
   input_arg {
-    name: "x"
+    name: "rho"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "epsilon"
     type_attr: "T"
   }
-  output_arg {
-    name: "z"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT8
         type: DT_UINT16
         type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "Reciprocal"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "ReciprocalGrad"
+  name: "ResourceSparseApplyAdadelta"
   input_arg {
-    name: "x"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "rho"
     type_attr: "T"
   }
-  output_arg {
-    name: "z"
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "ReciprocalGrad"
+  name: "ResourceSparseApplyAdagrad"
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "dy"
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "z"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
-}
-op {
-  name: "RecordInput"
-  output_arg {
-    name: "records"
-    type: DT_STRING
-  }
-  attr {
-    name: "file_pattern"
-    type: "string"
-  }
-  attr {
-    name: "file_random_seed"
-    type: "int"
-    default_value {
-      i: 301
-    }
-  }
-  attr {
-    name: "file_shuffle_shift_ratio"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "file_buffer_size"
-    type: "int"
-    default_value {
-      i: 10000
-    }
-  }
   attr {
-    name: "file_parallelism"
-    type: "int"
-    default_value {
-      i: 16
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "batch_size"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: 32
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "ReduceJoin"
+  name: "ResourceSparseApplyAdagrad"
   input_arg {
-    name: "inputs"
-    type: DT_STRING
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "reduction_indices"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
-    type: DT_STRING
-  }
-  attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "separator"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "accum"
+    type: DT_RESOURCE
   }
-}
-op {
-  name: "RefEnter"
   input_arg {
-    name: "data"
+    name: "lr"
     type_attr: "T"
-    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
     type_attr: "T"
-    is_ref: true
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "frame_name"
-    type: "string"
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
-    name: "is_constant"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "parallel_iterations"
-    type: "int"
-    default_value {
-      i: 10
-    }
-  }
+  is_stateful: true
 }
 op {
-  name: "RefExit"
+  name: "ResourceSparseApplyAdagrad"
   input_arg {
-    name: "data"
-    type_attr: "T"
-    is_ref: true
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-    is_ref: true
-  }
-  attr {
-    name: "T"
-    type: "type"
+    name: "var"
+    type: DT_RESOURCE
   }
-}
-op {
-  name: "RefIdentity"
   input_arg {
-    name: "input"
-    type_attr: "T"
-    is_ref: true
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-    is_ref: true
-  }
-  attr {
-    name: "T"
-    type: "type"
+    name: "accum"
+    type: DT_RESOURCE
   }
-  allows_uninitialized_input: true
-}
-op {
-  name: "RefMerge"
   input_arg {
-    name: "inputs"
+    name: "lr"
     type_attr: "T"
-    number_attr: "N"
-    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
     type_attr: "T"
-    is_ref: true
   }
-  output_arg {
-    name: "value_index"
-    type: DT_INT32
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "RefNextIteration"
-  input_arg {
-    name: "data"
-    type_attr: "T"
-    is_ref: true
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-    is_ref: true
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "RefSelect"
+  name: "ResourceSparseApplyAdagradDA"
   input_arg {
-    name: "index"
-    type: DT_INT32
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "inputs"
-    type_attr: "T"
-    number_attr: "N"
-    is_ref: true
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-    is_ref: true
-  }
-  attr {
-    name: "T"
-    type: "type"
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
   }
-}
-op {
-  name: "RefSwitch"
   input_arg {
-    name: "data"
+    name: "grad"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "pred"
-    type: DT_BOOL
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "output_false"
+  input_arg {
+    name: "lr"
     type_attr: "T"
-    is_ref: true
   }
-  output_arg {
-    name: "output_true"
+  input_arg {
+    name: "l1"
     type_attr: "T"
-    is_ref: true
-  }
-  attr {
-    name: "T"
-    type: "type"
   }
-  allows_uninitialized_input: true
-}
-op {
-  name: "Relu"
   input_arg {
-    name: "features"
+    name: "l2"
     type_attr: "T"
   }
-  output_arg {
-    name: "activations"
-    type_attr: "T"
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
   }
   attr {
     name: "T"
@@ -25514,27 +36885,78 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "Relu"
+  name: "ResourceSparseApplyAdagradDA"
   input_arg {
-    name: "features"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "activations"
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
+  }
   attr {
     name: "T"
     type: "type"
@@ -25542,57 +36964,80 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "Relu6"
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "activations"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "Relu6"
+  name: "ResourceSparseApplyAdagradDA"
   input_arg {
-    name: "features"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "activations"
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
     type_attr: "T"
   }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
+  }
   attr {
     name: "T"
     type: "type"
@@ -25600,33 +37045,85 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "Relu6Grad"
+  name: "ResourceSparseApplyCenteredRMSProp"
   input_arg {
-    name: "gradients"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mg"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "features"
+    name: "rho"
     type_attr: "T"
   }
-  output_arg {
-    name: "backprops"
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
@@ -25634,31 +37131,82 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "Relu6Grad"
+  name: "ResourceSparseApplyCenteredRMSProp"
   input_arg {
-    name: "gradients"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mg"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "features"
+    name: "rho"
     type_attr: "T"
   }
-  output_arg {
-    name: "backprops"
+  input_arg {
+    name: "momentum"
     type_attr: "T"
   }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
@@ -25666,65 +37214,84 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "ReluGrad"
-  input_arg {
-    name: "gradients"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "backprops"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "ReluGrad"
+  name: "ResourceSparseApplyCenteredRMSProp"
   input_arg {
-    name: "gradients"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mg"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "features"
+    name: "rho"
     type_attr: "T"
   }
-  output_arg {
-    name: "backprops"
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
@@ -25732,425 +37299,563 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-}
-op {
-  name: "RemoteCall"
-  input_arg {
-    name: "target"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "args"
-    type_list_attr: "Tin"
-  }
-  output_arg {
-    name: "output"
-    type_list_attr: "Tout"
-  }
-  attr {
-    name: "Tin"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
   attr {
-    name: "Tout"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
-    name: "f"
-    type: "func"
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "RemoteFusedGraphExecute"
+  name: "ResourceSparseApplyFtrl"
   input_arg {
-    name: "inputs"
-    type_list_attr: "Tinputs"
-  }
-  output_arg {
-    name: "outputs"
-    type_list_attr: "Toutputs"
-  }
-  attr {
-    name: "Tinputs"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "Toutputs"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "serialized_remote_fused_graph_execute_info"
-    type: "string"
+    name: "var"
+    type: DT_RESOURCE
   }
-}
-op {
-  name: "RepeatDataset"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "count"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "linear"
+    type: DT_RESOURCE
   }
-  is_stateful: true
-}
-op {
-  name: "RepeatDataset"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "count"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "indices"
+    type_attr: "Tindices"
   }
-}
-op {
-  name: "RequantizationRange"
   input_arg {
-    name: "input"
-    type_attr: "Tinput"
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "input_min"
-    type: DT_FLOAT
+    name: "l1"
+    type_attr: "T"
   }
   input_arg {
-    name: "input_max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+    name: "l2"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
-    name: "Tinput"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "Requantize"
+  name: "ResourceSparseApplyFtrl"
   input_arg {
-    name: "input"
-    type_attr: "Tinput"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_min"
-    type: DT_FLOAT
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_max"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "requested_output_min"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "requested_output_max"
-    type: DT_FLOAT
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
-    name: "Tinput"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "out_type"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "Reshape"
+  name: "ResourceSparseApplyFtrl"
   input_arg {
-    name: "tensor"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "shape"
-    type_attr: "Tshape"
+    name: "accum"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  attr {
-    name: "Tshape"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-}
-op {
-  name: "ResizeArea"
   input_arg {
-    name: "images"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "l2"
+    type_attr: "T"
   }
-  output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT8
+        type: DT_UINT16
         type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "ResizeArea"
+  name: "ResourceSparseApplyFtrlV2"
   input_arg {
-    name: "images"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT16
         type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "ResizeBicubic"
+  name: "ResourceSparseApplyFtrlV2"
   input_arg {
-    name: "images"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT8
+        type: DT_UINT16
         type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "ResizeBicubic"
+  name: "ResourceSparseApplyFtrlV2"
   input_arg {
-    name: "images"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT16
         type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "ResizeBicubicGrad"
+  name: "ResourceSparseApplyMomentum"
   input_arg {
-    name: "grads"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "original_image"
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "momentum"
     type_attr: "T"
   }
   attr {
@@ -26160,270 +37865,444 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "ResizeBilinear"
+  name: "ResourceSparseApplyMomentum"
   input_arg {
-    name: "images"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "grad"
+    type_attr: "T"
   }
-  output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT8
+        type: DT_UINT16
         type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "ResizeBilinear"
+  name: "ResourceSparseApplyMomentum"
   input_arg {
-    name: "images"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "grad"
+    type_attr: "T"
   }
-  output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT16
         type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "ResizeBilinearGrad"
+  name: "ResourceSparseApplyProximalAdagrad"
   input_arg {
-    name: "grads"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "original_image"
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_HALF
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "align_corners"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-}
-op {
-  name: "ResizeNearestNeighbor"
-  input_arg {
-    name: "images"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "resized_images"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "ResizeNearestNeighbor"
+  name: "ResourceSparseApplyProximalAdagrad"
   input_arg {
-    name: "images"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "resized_images"
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT16
         type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "ResizeNearestNeighborGrad"
+  name: "ResourceSparseApplyProximalAdagrad"
   input_arg {
-    name: "grads"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
         type: DT_INT8
-        type: DT_INT32
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "ResourceApplyAdadelta"
+  name: "ResourceSparseApplyProximalGradientDescent"
   input_arg {
     name: "var"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum_update"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
     name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
@@ -26446,6 +38325,16 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -26456,35 +38345,31 @@ op {
   is_stateful: true
 }
 op {
-  name: "ResourceApplyAdadelta"
+  name: "ResourceSparseApplyProximalGradientDescent"
   input_arg {
     name: "var"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum_update"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
     name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
@@ -26509,6 +38394,16 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -26519,23 +38414,31 @@ op {
   is_stateful: true
 }
 op {
-  name: "ResourceApplyAdagrad"
+  name: "ResourceSparseApplyProximalGradientDescent"
   input_arg {
     name: "var"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "alpha"
+    type_attr: "T"
   }
   input_arg {
-    name: "lr"
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
     name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
@@ -26555,6 +38458,19 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -26568,23 +38484,43 @@ op {
   is_stateful: true
 }
 op {
-  name: "ResourceApplyAdagrad"
+  name: "ResourceSparseApplyRMSProp"
   input_arg {
     name: "var"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "accum"
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
   input_arg {
     name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
@@ -26604,8 +38540,16 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -26619,38 +38563,42 @@ op {
   is_stateful: true
 }
 op {
-  name: "ResourceApplyAdagradDA"
+  name: "ResourceSparseApplyRMSProp"
   input_arg {
     name: "var"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "gradient_accumulator"
+    name: "ms"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "gradient_squared_accumulator"
+    name: "mom"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "grad"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "lr"
+    name: "rho"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "momentum"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
-    name: "global_step"
-    type: DT_INT64
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
@@ -26671,6 +38619,18 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -26684,38 +38644,42 @@ op {
   is_stateful: true
 }
 op {
-  name: "ResourceApplyAdagradDA"
+  name: "ResourceSparseApplyRMSProp"
   input_arg {
     name: "var"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "gradient_accumulator"
+    name: "ms"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "gradient_squared_accumulator"
+    name: "mom"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "grad"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "lr"
+    name: "rho"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "momentum"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
-    name: "global_step"
-    type: DT_INT64
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
@@ -26738,6 +38702,17 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -26745,124 +38720,261 @@ op {
     name: "use_locking"
     type: "bool"
     default_value {
-      b: false
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceStridedSliceAssign"
+  input_arg {
+    name: "ref"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "begin"
+    type_attr: "Index"
+  }
+  input_arg {
+    name: "end"
+    type_attr: "Index"
+  }
+  input_arg {
+    name: "strides"
+    type_attr: "Index"
+  }
+  input_arg {
+    name: "value"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Index"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "begin_mask"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "end_mask"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "ellipsis_mask"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "new_axis_mask"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "shrink_axis_mask"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Restore"
+  input_arg {
+    name: "file_pattern"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "tensor_name"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "tensor"
+    type_attr: "dt"
+  }
+  attr {
+    name: "dt"
+    type: "type"
+  }
+  attr {
+    name: "preferred_shard"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "Restore"
+  input_arg {
+    name: "file_pattern"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "tensor_name"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "tensor"
+    type_attr: "dt"
+  }
+  attr {
+    name: "dt"
+    type: "type"
+  }
+  attr {
+    name: "preferred_shard"
+    type: "int"
+    default_value {
+      i: -1
     }
   }
   is_stateful: true
 }
 op {
-  name: "ResourceApplyAdam"
+  name: "RestoreSlice"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "file_pattern"
+    type: DT_STRING
   }
   input_arg {
-    name: "m"
-    type: DT_RESOURCE
+    name: "tensor_name"
+    type: DT_STRING
   }
   input_arg {
-    name: "v"
-    type: DT_RESOURCE
+    name: "shape_and_slice"
+    type: DT_STRING
   }
-  input_arg {
-    name: "beta1_power"
-    type_attr: "T"
+  output_arg {
+    name: "tensor"
+    type_attr: "dt"
   }
-  input_arg {
-    name: "beta2_power"
-    type_attr: "T"
+  attr {
+    name: "dt"
+    type: "type"
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
+  attr {
+    name: "preferred_shard"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
+}
+op {
+  name: "RestoreSlice"
   input_arg {
-    name: "beta1"
-    type_attr: "T"
+    name: "file_pattern"
+    type: DT_STRING
   }
   input_arg {
-    name: "beta2"
-    type_attr: "T"
+    name: "tensor_name"
+    type: DT_STRING
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "shape_and_slice"
+    type: DT_STRING
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
+  output_arg {
+    name: "tensor"
+    type_attr: "dt"
   }
   attr {
-    name: "T"
+    name: "dt"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
+    name: "preferred_shard"
+    type: "int"
     default_value {
-      b: false
+      i: -1
     }
   }
   is_stateful: true
 }
 op {
-  name: "ResourceApplyAdam"
+  name: "RestoreV2"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "prefix"
+    type: DT_STRING
   }
   input_arg {
-    name: "m"
-    type: DT_RESOURCE
+    name: "tensor_names"
+    type: DT_STRING
   }
   input_arg {
-    name: "v"
-    type: DT_RESOURCE
+    name: "shape_and_slices"
+    type: DT_STRING
   }
-  input_arg {
-    name: "beta1_power"
-    type_attr: "T"
+  output_arg {
+    name: "tensors"
+    type_list_attr: "dtypes"
   }
-  input_arg {
-    name: "beta2_power"
-    type_attr: "T"
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
+}
+op {
+  name: "RestoreV2"
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "prefix"
+    type: DT_STRING
   }
   input_arg {
-    name: "beta1"
-    type_attr: "T"
+    name: "tensor_names"
+    type: DT_STRING
   }
   input_arg {
-    name: "beta2"
-    type_attr: "T"
+    name: "shape_and_slices"
+    type: DT_STRING
   }
+  output_arg {
+    name: "tensors"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "Reverse"
   input_arg {
-    name: "epsilon"
+    name: "tensor"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "dims"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -26870,79 +38982,66 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
         type: DT_INT8
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAdam"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "m"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "v"
-    type: DT_RESOURCE
-  }
+  name: "Reverse"
   input_arg {
-    name: "beta1_power"
+    name: "tensor"
     type_attr: "T"
   }
   input_arg {
-    name: "beta2_power"
-    type_attr: "T"
+    name: "dims"
+    type: DT_BOOL
   }
-  input_arg {
-    name: "lr"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "beta1"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_STRING
+      }
+    }
   }
+}
+op {
+  name: "Reverse"
   input_arg {
-    name: "beta2"
+    name: "tensor"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "dims"
+    type: DT_BOOL
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -26950,209 +39049,270 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
         type: DT_UINT8
+        type: DT_INT8
         type: DT_UINT16
         type: DT_INT16
-        type: DT_INT8
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_STRING
       }
     }
   }
+}
+op {
+  name: "ReverseSequence"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seq_lengths"
+    type_attr: "Tlen"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
-    name: "use_locking"
-    type: "bool"
+    name: "seq_dim"
+    type: "int"
+  }
+  attr {
+    name: "batch_dim"
+    type: "int"
     default_value {
-      b: false
+      i: 0
     }
   }
   attr {
-    name: "use_nesterov"
-    type: "bool"
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tlen"
+    type: "type"
     default_value {
-      b: false
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAddSign"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "m"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ReverseV2"
   input_arg {
-    name: "alpha"
+    name: "tensor"
     type_attr: "T"
   }
   input_arg {
-    name: "sign_decay"
-    type_attr: "T"
+    name: "axis"
+    type_attr: "Tidx"
   }
-  input_arg {
-    name: "beta"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
         type: DT_INT8
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyCenteredRMSProp"
+  name: "ReverseV2"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "tensor"
+    type_attr: "T"
   }
   input_arg {
-    name: "mg"
-    type: DT_RESOURCE
+    name: "axis"
+    type_attr: "Tidx"
   }
-  input_arg {
-    name: "ms"
-    type: DT_RESOURCE
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
-  input_arg {
-    name: "mom"
-    type: DT_RESOURCE
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_STRING
+      }
+    }
   }
+}
+op {
+  name: "ReverseV2"
   input_arg {
-    name: "rho"
+    name: "tensor"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
-    type_attr: "T"
+    name: "axis"
+    type_attr: "Tidx"
   }
-  input_arg {
-    name: "epsilon"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
         type: DT_UINT8
+        type: DT_INT8
         type: DT_UINT16
         type: DT_INT16
-        type: DT_INT8
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
+        type: DT_STRING
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyCenteredRMSProp"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mg"
-    type: DT_RESOURCE
-  }
+  name: "ReverseV2"
   input_arg {
-    name: "ms"
-    type: DT_RESOURCE
+    name: "tensor"
+    type_attr: "T"
   }
   input_arg {
-    name: "mom"
-    type: DT_RESOURCE
+    name: "axis"
+    type_attr: "Tidx"
   }
-  input_arg {
-    name: "lr"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "rho"
-    type_attr: "T"
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_STRING
+      }
+    }
   }
+}
+op {
+  name: "RightShift"
   input_arg {
-    name: "momentum"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -27160,66 +39320,70 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
         type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
+  is_commutative: true
 }
 op {
-  name: "ResourceApplyFtrl"
+  name: "Rint"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "x"
+    type_attr: "T"
   }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
+}
+op {
+  name: "Rint"
   input_arg {
-    name: "grad"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr"
+  output_arg {
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
+}
+op {
+  name: "Round"
   input_arg {
-    name: "l2"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -27227,64 +39391,25 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
+        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyFtrl"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
+  name: "Round"
   input_arg {
-    name: "l2"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -27292,70 +39417,50 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
+        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyFtrlV2"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
+  name: "Rsqrt"
   input_arg {
-    name: "lr"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "l1"
+  output_arg {
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "l2"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
   }
+}
+op {
+  name: "Rsqrt"
   input_arg {
-    name: "l2_shrinkage"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -27363,68 +39468,56 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyFtrlV2"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
+  name: "RsqrtGrad"
   input_arg {
-    name: "grad"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "lr"
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "l1"
+  output_arg {
+    name: "z"
     type_attr: "T"
   }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "RsqrtGrad"
   input_arg {
-    name: "l2"
+    name: "y"
     type_attr: "T"
   }
   input_arg {
-    name: "l2_shrinkage"
+    name: "dy"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -27432,46 +39525,27 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyGradientDescent"
+  name: "RsqrtGrad"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "y"
+    type_attr: "T"
   }
   input_arg {
-    name: "alpha"
+    name: "dy"
     type_attr: "T"
   }
-  input_arg {
-    name: "delta"
+  output_arg {
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -27479,72 +39553,101 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyGradientDescent"
+  name: "SampleDistortedBoundingBox"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "image_size"
+    type_attr: "T"
   }
   input_arg {
-    name: "alpha"
+    name: "bounding_boxes"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "begin"
     type_attr: "T"
   }
-  input_arg {
-    name: "delta"
+  output_arg {
+    name: "size"
     type_attr: "T"
   }
+  output_arg {
+    name: "bboxes"
+    type: DT_FLOAT
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "use_locking"
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "min_object_covered"
+    type: "float"
+    default_value {
+      f: 0.1
+    }
+  }
+  attr {
+    name: "aspect_ratio_range"
+    type: "list(float)"
+    default_value {
+      list {
+        f: 0.75
+        f: 1.33
+      }
+    }
+  }
+  attr {
+    name: "area_range"
+    type: "list(float)"
+    default_value {
+      list {
+        f: 0.05
+        f: 1
+      }
+    }
+  }
+  attr {
+    name: "max_attempts"
+    type: "int"
+    default_value {
+      i: 100
+    }
+  }
+  attr {
+    name: "use_image_if_no_bounding_boxes"
     type: "bool"
     default_value {
       b: false
@@ -27553,58 +39656,87 @@ op {
   is_stateful: true
 }
 op {
-  name: "ResourceApplyMomentum"
+  name: "SampleDistortedBoundingBoxV2"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "image_size"
+    type_attr: "T"
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "bounding_boxes"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "min_object_covered"
+    type: DT_FLOAT
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "begin"
     type_attr: "T"
   }
-  input_arg {
-    name: "momentum"
+  output_arg {
+    name: "size"
     type_attr: "T"
   }
+  output_arg {
+    name: "bboxes"
+    type: DT_FLOAT
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "aspect_ratio_range"
+    type: "list(float)"
+    default_value {
+      list {
+        f: 0.75
+        f: 1.33
+      }
+    }
+  }
+  attr {
+    name: "area_range"
+    type: "list(float)"
+    default_value {
+      list {
+        f: 0.05
+        f: 1
+      }
+    }
+  }
+  attr {
+    name: "max_attempts"
+    type: "int"
     default_value {
-      b: false
+      i: 100
     }
   }
   attr {
-    name: "use_nesterov"
+    name: "use_image_if_no_bounding_boxes"
     type: "bool"
     default_value {
       b: false
@@ -27613,213 +39745,164 @@ op {
   is_stateful: true
 }
 op {
-  name: "ResourceApplyMomentum"
+  name: "Save"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "filename"
+    type: DT_STRING
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "tensor_names"
+    type: DT_STRING
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "data"
+    type_list_attr: "T"
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
+}
+op {
+  name: "Save"
   input_arg {
-    name: "momentum"
-    type_attr: "T"
+    name: "filename"
+    type: DT_STRING
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+  input_arg {
+    name: "tensor_names"
+    type: DT_STRING
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "data"
+    type_list_attr: "T"
   }
   attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "T"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   is_stateful: true
 }
 op {
-  name: "ResourceApplyPowerSign"
+  name: "SaveSlices"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "filename"
+    type: DT_STRING
   }
   input_arg {
-    name: "m"
-    type: DT_RESOURCE
+    name: "tensor_names"
+    type: DT_STRING
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "shapes_and_slices"
+    type: DT_STRING
   }
   input_arg {
-    name: "logbase"
-    type_attr: "T"
+    name: "data"
+    type_list_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
+}
+op {
+  name: "SaveSlices"
   input_arg {
-    name: "sign_decay"
-    type_attr: "T"
+    name: "filename"
+    type: DT_STRING
   }
   input_arg {
-    name: "beta"
-    type_attr: "T"
+    name: "tensor_names"
+    type: DT_STRING
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "shapes_and_slices"
+    type: DT_STRING
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+  input_arg {
+    name: "data"
+    type_list_attr: "T"
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "T"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   is_stateful: true
 }
 op {
-  name: "ResourceApplyProximalAdagrad"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
+  name: "SaveV2"
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "prefix"
+    type: DT_STRING
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "tensor_names"
+    type: DT_STRING
   }
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "shape_and_slices"
+    type: DT_STRING
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
+    name: "tensors"
+    type_list_attr: "dtypes"
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyProximalAdagrad"
+  name: "SaveV2"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "prefix"
+    type: DT_STRING
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "tensor_names"
+    type: DT_STRING
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "shape_and_slices"
+    type: DT_STRING
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "tensors"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
+  is_stateful: true
+}
+op {
+  name: "ScalarSummary"
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "tags"
+    type: DT_STRING
   }
   input_arg {
-    name: "grad"
+    name: "values"
     type_attr: "T"
   }
+  output_arg {
+    name: "summary"
+    type: DT_STRING
+  }
   attr {
     name: "T"
     type: "type"
@@ -27827,53 +39910,30 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyProximalGradientDescent"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "alpha"
-    type_attr: "T"
-  }
+  name: "ScalarSummary"
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "tags"
+    type: DT_STRING
   }
   input_arg {
-    name: "l2"
+    name: "values"
     type_attr: "T"
   }
-  input_arg {
-    name: "delta"
-    type_attr: "T"
+  output_arg {
+    name: "summary"
+    type: DT_STRING
   }
   attr {
     name: "T"
@@ -27882,51 +39942,32 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyProximalGradientDescent"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "alpha"
-    type_attr: "T"
-  }
+  name: "ScalarSummary"
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "tags"
+    type: DT_STRING
   }
   input_arg {
-    name: "l2"
+    name: "values"
     type_attr: "T"
   }
-  input_arg {
-    name: "delta"
-    type_attr: "T"
+  output_arg {
+    name: "summary"
+    type: DT_STRING
   }
   attr {
     name: "T"
@@ -27935,65 +39976,85 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyRMSProp"
+  name: "ScanDataset"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "ms"
-    type: DT_RESOURCE
+    name: "initial_state"
+    type_list_attr: "Tstate"
   }
   input_arg {
-    name: "mom"
-    type: DT_RESOURCE
+    name: "other_arguments"
+    type_list_attr: "Targuments"
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Tstate"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
+}
+op {
+  name: "ScatterAdd"
   input_arg {
-    name: "rho"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "momentum"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -28017,6 +40078,16 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -28024,41 +40095,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyRMSProp"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "ms"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mom"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterAdd"
   input_arg {
-    name: "rho"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "momentum"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -28085,30 +40141,7 @@ op {
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
-}
-op {
-  name: "ResourceCountUpTo"
-  input_arg {
-    name: "resource"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "limit"
-    type: "int"
-  }
-  attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
@@ -28117,50 +40150,20 @@ op {
       }
     }
   }
-  is_stateful: true
-}
-op {
-  name: "ResourceGather"
-  input_arg {
-    name: "resource"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
-  }
   attr {
-    name: "validate_indices"
+    name: "use_locking"
     type: "bool"
     default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "dtype"
-    type: "type"
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceScatterAdd"
+  name: "ScatterAdd"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "indices"
@@ -28168,10 +40171,15 @@ op {
   }
   input_arg {
     name: "updates"
-    type_attr: "dtype"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_ref"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -28189,6 +40197,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28202,13 +40213,20 @@ op {
       }
     }
   }
-  is_stateful: true
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ResourceScatterAdd"
+  name: "ScatterDiv"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "indices"
@@ -28216,10 +40234,15 @@ op {
   }
   input_arg {
     name: "updates"
-    type_attr: "dtype"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_ref"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -28237,8 +40260,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -28252,13 +40273,20 @@ op {
       }
     }
   }
-  is_stateful: true
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ResourceScatterUpdate"
+  name: "ScatterDiv"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "indices"
@@ -28266,10 +40294,15 @@ op {
   }
   input_arg {
     name: "updates"
-    type_attr: "dtype"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_ref"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -28302,41 +40335,33 @@ op {
       }
     }
   }
-  is_stateful: true
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ResourceSparseApplyAdadelta"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum_update"
-    type: DT_RESOURCE
-  }
+  name: "ScatterDiv"
   input_arg {
-    name: "lr"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "rho"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -28357,6 +40382,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28377,41 +40405,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdadelta"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum_update"
-    type: DT_RESOURCE
-  }
+  name: "ScatterMul"
   input_arg {
-    name: "lr"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "rho"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -28432,8 +40445,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -28454,29 +40465,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdagrad"
+  name: "ScatterMul"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -28497,6 +40505,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -28517,29 +40527,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdagrad"
+  name: "ScatterMul"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -28562,6 +40569,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28582,45 +40590,59 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdagradDA"
+  name: "ScatterNd"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "gradient_accumulator"
-    type: DT_RESOURCE
+    name: "updates"
+    type_attr: "T"
   }
   input_arg {
-    name: "gradient_squared_accumulator"
-    type: DT_RESOURCE
+    name: "shape"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
+}
+op {
+  name: "ScatterNdAdd"
   input_arg {
-    name: "lr"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "global_step"
-    type: DT_INT64
+  output_arg {
+    name: "output_ref"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -28661,45 +40683,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdagradDA"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "gradient_accumulator"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "gradient_squared_accumulator"
-    type: DT_RESOURCE
-  }
+  name: "ScatterNdAdd"
   input_arg {
-    name: "grad"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "indices"
     type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "l2"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "global_step"
-    type: DT_INT64
+    is_ref: true
   }
   attr {
     name: "T"
@@ -28742,49 +40745,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyCenteredRMSProp"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mg"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "ms"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mom"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterNdAdd"
   input_arg {
-    name: "rho"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "momentum"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -28805,6 +40785,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28825,50 +40808,76 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyCenteredRMSProp"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mg"
-    type: DT_RESOURCE
-  }
+  name: "ScatterNdNonAliasingAdd"
   input_arg {
-    name: "ms"
-    type: DT_RESOURCE
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "mom"
-    type: DT_RESOURCE
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "rho"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "momentum"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
   }
-  input_arg {
-    name: "epsilon"
-    type_attr: "T"
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
+}
+op {
+  name: "ScatterNdNonAliasingAdd"
   input_arg {
-    name: "grad"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
     name: "indices"
     type_attr: "Tindices"
   }
+  input_arg {
+    name: "updates"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
@@ -28903,31 +40912,11 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyFtrl"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
+  name: "ScatterNdNonAliasingAdd"
   input_arg {
-    name: "grad"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
@@ -28935,19 +40924,11 @@ op {
     type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -28969,6 +40950,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28982,52 +40966,26 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyFtrl"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
+  name: "ScatterNdSub"
   input_arg {
-    name: "grad"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "indices"
     type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -29048,8 +41006,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -29070,49 +41026,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyFtrlV2"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterNdSub"
   input_arg {
-    name: "l1"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2_shrinkage"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -29133,6 +41066,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -29153,49 +41088,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyFtrlV2"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
+  name: "ScatterNdSub"
   input_arg {
-    name: "grad"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "indices"
     type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2_shrinkage"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -29218,6 +41130,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -29238,33 +41151,68 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyMomentum"
+  name: "ScatterNdUpdate"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
+    name: "updates"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
+op {
+  name: "ScatterSub"
   input_arg {
-    name: "grad"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "indices"
     type_attr: "Tindices"
   }
   input_arg {
-    name: "momentum"
+    name: "updates"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -29305,40 +41253,26 @@ op {
       b: false
     }
   }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyMomentum"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterSub"
   input_arg {
-    name: "grad"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
     name: "indices"
     type_attr: "Tindices"
   }
   input_arg {
-    name: "momentum"
+    name: "updates"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -29381,44 +41315,26 @@ op {
       b: false
     }
   }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyProximalAdagrad"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
+  name: "ScatterSub"
   input_arg {
-    name: "lr"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -29439,6 +41355,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -29459,61 +41378,30 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyProximalAdagrad"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
+  name: "ScatterUpdate"
   input_arg {
-    name: "lr"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
   }
   attr {
     name: "Tindices"
@@ -29529,37 +41417,175 @@ op {
     name: "use_locking"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyProximalGradientDescent"
+  name: "SdcaFprint"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "input"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "output"
+    type: DT_INT64
   }
+}
+op {
+  name: "SdcaOptimizer"
   input_arg {
-    name: "alpha"
-    type_attr: "T"
+    name: "sparse_example_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_feature_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_feature_values"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features_with_values"
+  }
+  input_arg {
+    name: "dense_features"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_weights"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "example_labels"
+    type: DT_FLOAT
   }
   input_arg {
+    name: "sparse_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_delta_sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  output_arg {
+    name: "out_delta_dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  attr {
+    name: "loss_type"
+    type: "string"
+    allowed_values {
+      list {
+        s: "logistic_loss"
+        s: "squared_loss"
+        s: "hinge_loss"
+        s: "smooth_hinge_loss"
+      }
+    }
+  }
+  attr {
+    name: "adaptative"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "num_sparse_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_sparse_features_with_values"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_dense_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
     name: "l1"
-    type_attr: "T"
+    type: "float"
+  }
+  attr {
+    name: "l2"
+    type: "float"
+  }
+  attr {
+    name: "num_loss_partitions"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_inner_iterations"
+    type: "int"
+    has_minimum: true
+    minimum: 1
   }
+}
+op {
+  name: "SdcaShrinkL1"
   input_arg {
+    name: "weights"
+    type: DT_FLOAT
+    number_attr: "num_features"
+    is_ref: true
+  }
+  attr {
+    name: "num_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "l1"
+    type: "float"
+  }
+  attr {
     name: "l2"
-    type_attr: "T"
+    type: "float"
   }
+}
+op {
+  name: "SegmentMax"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
@@ -29567,17 +41593,12 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
       }
     }
@@ -29592,41 +41613,21 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyProximalGradientDescent"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "alpha"
-    type_attr: "T"
-  }
+  name: "SegmentMax"
   input_arg {
-    name: "l1"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   attr {
     name: "T"
     type: "type"
@@ -29634,17 +41635,12 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -29661,53 +41657,66 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyRMSProp"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "ms"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mom"
-    type: DT_RESOURCE
-  }
+  name: "SegmentMax"
   input_arg {
-    name: "lr"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
-    type_attr: "T"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "momentum"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "epsilon"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
+}
+op {
+  name: "SegmentMean"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
@@ -29715,17 +41724,12 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
       }
     }
@@ -29740,53 +41744,21 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyRMSProp"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "ms"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mom"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "rho"
-    type_attr: "T"
-  }
+  name: "SegmentMean"
   input_arg {
-    name: "momentum"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   attr {
     name: "T"
     type: "type"
@@ -29794,17 +41766,12 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -29821,262 +41788,147 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceStridedSliceAssign"
-  input_arg {
-    name: "ref"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "begin"
-    type_attr: "Index"
-  }
+  name: "SegmentMean"
   input_arg {
-    name: "end"
-    type_attr: "Index"
+    name: "data"
+    type_attr: "T"
   }
   input_arg {
-    name: "strides"
-    type_attr: "Index"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "value"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Index"
-    type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "begin_mask"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "end_mask"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "ellipsis_mask"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "new_axis_mask"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "shrink_axis_mask"
-    type: "int"
-    default_value {
-      i: 0
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Restore"
+  name: "SegmentMin"
   input_arg {
-    name: "file_pattern"
-    type: DT_STRING
+    name: "data"
+    type_attr: "T"
   }
   input_arg {
-    name: "tensor_name"
-    type: DT_STRING
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "tensor"
-    type_attr: "dt"
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "dt"
+    name: "T"
     type: "type"
-  }
-  attr {
-    name: "preferred_shard"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
     }
   }
-}
-op {
-  name: "Restore"
-  input_arg {
-    name: "file_pattern"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_name"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "tensor"
-    type_attr: "dt"
-  }
   attr {
-    name: "dt"
+    name: "Tindices"
     type: "type"
-  }
-  attr {
-    name: "preferred_shard"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "RestoreSlice"
-  input_arg {
-    name: "file_pattern"
-    type: DT_STRING
-  }
+  name: "SegmentMin"
   input_arg {
-    name: "tensor_name"
-    type: DT_STRING
+    name: "data"
+    type_attr: "T"
   }
   input_arg {
-    name: "shape_and_slice"
-    type: DT_STRING
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "tensor"
-    type_attr: "dt"
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "dt"
+    name: "T"
     type: "type"
-  }
-  attr {
-    name: "preferred_shard"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
-}
-op {
-  name: "RestoreSlice"
-  input_arg {
-    name: "file_pattern"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_name"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "shape_and_slice"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "tensor"
-    type_attr: "dt"
-  }
   attr {
-    name: "dt"
+    name: "Tindices"
     type: "type"
-  }
-  attr {
-    name: "preferred_shard"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
-}
-op {
-  name: "RestoreV2"
-  input_arg {
-    name: "prefix"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_names"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "shape_and_slices"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "tensors"
-    type_list_attr: "dtypes"
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "RestoreV2"
-  input_arg {
-    name: "prefix"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_names"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "shape_and_slices"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "tensors"
-    type_list_attr: "dtypes"
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
 }
 op {
-  name: "Reverse"
+  name: "SegmentMin"
   input_arg {
-    name: "tensor"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "dims"
-    type: DT_BOOL
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "output"
@@ -30087,29 +41939,41 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
+        type: DT_INT16
         type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
-        type: DT_BOOL
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Reverse"
+  name: "SegmentProd"
   input_arg {
-    name: "tensor"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "dims"
-    type: DT_BOOL
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "output"
@@ -30120,30 +41984,43 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_BOOL
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_STRING
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "Reverse"
+  name: "SegmentProd"
   input_arg {
-    name: "tensor"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "dims"
-    type: DT_BOOL
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "output"
@@ -30154,58 +42031,78 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT8
         type: DT_UINT16
         type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_BOOL
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_STRING
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "ReverseSequence"
+  name: "SegmentProd"
   input_arg {
-    name: "input"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "seq_lengths"
-    type_attr: "Tlen"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
-  attr {
-    name: "seq_dim"
-    type: "int"
-  }
-  attr {
-    name: "batch_dim"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "Tlen"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_INT64
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -30215,187 +42112,182 @@ op {
   }
 }
 op {
-  name: "ReverseV2"
+  name: "SegmentSum"
   input_arg {
-    name: "tensor"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "Tidx"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
         type: DT_INT32
         type: DT_INT64
-        type: DT_BOOL
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "ReverseV2"
+  name: "SegmentSum"
   input_arg {
-    name: "tensor"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "Tidx"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
         type: DT_INT32
         type: DT_INT64
-        type: DT_BOOL
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "ReverseV2"
+  name: "SegmentSum"
   input_arg {
-    name: "tensor"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "Tidx"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_BOOL
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "RightShift"
+  name: "Select"
   input_arg {
-    name: "x"
+    name: "condition"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "t"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "e"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
   }
-  is_commutative: true
 }
 op {
-  name: "Rint"
+  name: "SelfAdjointEig"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -30403,56 +42295,75 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_FLOAT
       }
     }
   }
+  deprecation {
+    version: 11
+  }
 }
 op {
-  name: "Round"
+  name: "SelfAdjointEigV2"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "e"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
     type_attr: "T"
   }
+  attr {
+    name: "compute_v"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_FLOAT
       }
     }
   }
 }
 op {
-  name: "Rsqrt"
+  name: "SelfAdjointEigV2"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "e"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
     type_attr: "T"
   }
+  attr {
+    name: "compute_v"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_FLOAT
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
@@ -30460,17 +42371,13 @@ op {
   }
 }
 op {
-  name: "RsqrtGrad"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "Selu"
   input_arg {
-    name: "y"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -30481,24 +42388,18 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "RsqrtGrad"
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
+  name: "Selu"
   input_arg {
-    name: "dy"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -30507,559 +42408,272 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "SampleDistortedBoundingBox"
+  name: "SeluGrad"
   input_arg {
-    name: "image_size"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "bounding_boxes"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "begin"
+    name: "outputs"
     type_attr: "T"
   }
   output_arg {
-    name: "size"
+    name: "backprops"
     type_attr: "T"
   }
-  output_arg {
-    name: "bboxes"
-    type: DT_FLOAT
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "min_object_covered"
-    type: "float"
-    default_value {
-      f: 0.1
-    }
-  }
-  attr {
-    name: "aspect_ratio_range"
-    type: "list(float)"
-    default_value {
-      list {
-        f: 0.75
-        f: 1.33
-      }
-    }
-  }
-  attr {
-    name: "area_range"
-    type: "list(float)"
-    default_value {
-      list {
-        f: 0.05
-        f: 1
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "max_attempts"
-    type: "int"
-    default_value {
-      i: 100
-    }
-  }
-  attr {
-    name: "use_image_if_no_bounding_boxes"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "SampleDistortedBoundingBoxV2"
+  name: "SeluGrad"
   input_arg {
-    name: "image_size"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "bounding_boxes"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min_object_covered"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "begin"
+    name: "outputs"
     type_attr: "T"
   }
   output_arg {
-    name: "size"
+    name: "backprops"
     type_attr: "T"
   }
-  output_arg {
-    name: "bboxes"
-    type: DT_FLOAT
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "aspect_ratio_range"
-    type: "list(float)"
-    default_value {
-      list {
-        f: 0.75
-        f: 1.33
-      }
-    }
-  }
-  attr {
-    name: "area_range"
-    type: "list(float)"
-    default_value {
-      list {
-        f: 0.05
-        f: 1
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "max_attempts"
-    type: "int"
-    default_value {
-      i: 100
-    }
-  }
-  attr {
-    name: "use_image_if_no_bounding_boxes"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
-}
-op {
-  name: "Save"
-  input_arg {
-    name: "filename"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_names"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "data"
-    type_list_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
 }
 op {
-  name: "Save"
-  input_arg {
-    name: "filename"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_names"
-    type: DT_STRING
-  }
+  name: "SerializeIterator"
   input_arg {
-    name: "data"
-    type_list_attr: "T"
+    name: "resource_handle"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "T"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  output_arg {
+    name: "serialized"
+    type: DT_VARIANT
   }
   is_stateful: true
 }
 op {
-  name: "SaveSlices"
-  input_arg {
-    name: "filename"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_names"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "shapes_and_slices"
-    type: DT_STRING
-  }
+  name: "SerializeManySparse"
   input_arg {
-    name: "data"
-    type_list_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "sparse_indices"
+    type: DT_INT64
   }
-}
-op {
-  name: "SaveSlices"
   input_arg {
-    name: "filename"
-    type: DT_STRING
+    name: "sparse_values"
+    type_attr: "T"
   }
   input_arg {
-    name: "tensor_names"
-    type: DT_STRING
+    name: "sparse_shape"
+    type: DT_INT64
   }
-  input_arg {
-    name: "shapes_and_slices"
+  output_arg {
+    name: "serialized_sparse"
     type: DT_STRING
   }
-  input_arg {
-    name: "data"
-    type_list_attr: "T"
-  }
   attr {
     name: "T"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
-}
-op {
-  name: "SaveV2"
-  input_arg {
-    name: "prefix"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_names"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "shape_and_slices"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensors"
-    type_list_attr: "dtypes"
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    type: "type"
   }
 }
 op {
-  name: "SaveV2"
+  name: "SerializeManySparse"
   input_arg {
-    name: "prefix"
-    type: DT_STRING
+    name: "sparse_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "tensor_names"
-    type: DT_STRING
+    name: "sparse_values"
+    type_attr: "T"
   }
   input_arg {
-    name: "shape_and_slices"
-    type: DT_STRING
+    name: "sparse_shape"
+    type: DT_INT64
   }
-  input_arg {
-    name: "tensors"
-    type_list_attr: "dtypes"
+  output_arg {
+    name: "serialized_sparse"
+    type_attr: "out_type"
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
-}
-op {
-  name: "ScalarSummary"
-  input_arg {
-    name: "tags"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "values"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "summary"
-    type: DT_STRING
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "T"
+    name: "out_type"
     type: "type"
+    default_value {
+      type: DT_STRING
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_STRING
+        type: DT_VARIANT
       }
     }
   }
 }
 op {
-  name: "ScalarSummary"
+  name: "SerializeSparse"
   input_arg {
-    name: "tags"
-    type: DT_STRING
+    name: "sparse_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "values"
+    name: "sparse_values"
     type_attr: "T"
   }
+  input_arg {
+    name: "sparse_shape"
+    type: DT_INT64
+  }
   output_arg {
-    name: "summary"
+    name: "serialized_sparse"
     type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
   }
 }
 op {
-  name: "ScanDataset"
+  name: "SerializeSparse"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "sparse_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "initial_state"
-    type_list_attr: "Tstate"
+    name: "sparse_values"
+    type_attr: "T"
   }
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "sparse_shape"
+    type: DT_INT64
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "serialized_sparse"
+    type_attr: "out_type"
   }
   attr {
-    name: "f"
-    type: "func"
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "Tstate"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_STRING
+    }
+    allowed_values {
+      list {
+        type: DT_STRING
+        type: DT_VARIANT
+      }
+    }
   }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+}
+op {
+  name: "SerializeTensor"
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
   }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  output_arg {
+    name: "serialized"
+    type: DT_STRING
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
   }
 }
 op {
-  name: "ScatterAdd"
+  name: "SetSize"
   input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
+    name: "set_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "set_values"
+    type_attr: "T"
   }
   input_arg {
-    name: "updates"
-    type_attr: "T"
+    name: "set_shape"
+    type: DT_INT64
   }
   output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+    name: "size"
+    type: DT_INT32
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
+    name: "validate_indices"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
   attr {
-    name: "Tindices"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_STRING
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterAdd"
-  input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
+  name: "Shape"
   input_arg {
-    name: "updates"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+    name: "output"
+    type_attr: "out_type"
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
   }
   attr {
-    name: "Tindices"
+    name: "out_type"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
         type: DT_INT32
@@ -31067,59 +42681,35 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterDiv"
-  input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
+  name: "ShapeN"
   input_arg {
-    name: "updates"
+    name: "input"
     type_attr: "T"
+    number_attr: "N"
   }
   output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+    name: "output"
+    type_attr: "out_type"
+    number_attr: "N"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
   }
   attr {
-    name: "Tindices"
+    name: "out_type"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
         type: DT_INT32
@@ -31127,369 +42717,243 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterDiv"
+  name: "ShardedFilename"
   input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
+    name: "basename"
+    type: DT_STRING
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "shard"
+    type: DT_INT32
   }
   input_arg {
-    name: "updates"
-    type_attr: "T"
+    name: "num_shards"
+    type: DT_INT32
   }
   output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+    name: "filename"
+    type: DT_STRING
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+}
+op {
+  name: "ShardedFilespec"
+  input_arg {
+    name: "basename"
+    type: DT_STRING
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  input_arg {
+    name: "num_shards"
+    type: DT_INT32
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  output_arg {
+    name: "filename"
+    type: DT_STRING
   }
 }
 op {
-  name: "ScatterMul"
+  name: "ShuffleDataset"
   input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "buffer_size"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
-    type_attr: "T"
+    name: "seed"
+    type: DT_INT64
   }
-  output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+  input_arg {
+    name: "seed2"
+    type: DT_INT64
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
+  is_stateful: true
 }
 op {
-  name: "ScatterMul"
+  name: "ShuffleDataset"
   input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "buffer_size"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
-    type_attr: "T"
+    name: "seed"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    type: DT_INT64
   }
   output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ShuffleDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "use_locking"
+    name: "reshuffle_each_iteration"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
 }
 op {
-  name: "ScatterNd"
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
+  name: "Sigmoid"
   input_arg {
-    name: "updates"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "shape"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "ScatterNdAdd"
-  input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
+  name: "Sigmoid"
   input_arg {
-    name: "updates"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterNdAdd"
+  name: "SigmoidGrad"
   input_arg {
-    name: "ref"
+    name: "x"
     type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
   }
   input_arg {
-    name: "updates"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "z"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterNdNonAliasingAdd"
+  name: "SigmoidGrad"
   input_arg {
-    name: "input"
+    name: "y"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
-  input_arg {
-    name: "updates"
+    name: "dy"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -31497,50 +42961,27 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "ScatterNdNonAliasingAdd"
+  name: "SigmoidGrad"
   input_arg {
-    name: "input"
+    name: "y"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
-  input_arg {
-    name: "updates"
+    name: "dy"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -31548,524 +42989,327 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "ScatterNdSub"
-  input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
+  name: "Sign"
   input_arg {
-    name: "updates"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
+        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterNdSub"
-  input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
+  name: "Sign"
   input_arg {
-    name: "updates"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
+        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterNdUpdate"
-  input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
+  name: "Sin"
   input_arg {
-    name: "updates"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
 }
 op {
-  name: "ScatterSub"
-  input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
+  name: "Sin"
   input_arg {
-    name: "updates"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterSub"
-  input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
+  name: "Sinh"
   input_arg {
-    name: "updates"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterUpdate"
-  input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
+  name: "Sinh"
   input_arg {
-    name: "updates"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
 }
 op {
-  name: "SdcaFprint"
+  name: "Size"
   input_arg {
     name: "input"
-    type: DT_STRING
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type: DT_INT64
-  }
-}
-op {
-  name: "SdcaOptimizer"
-  input_arg {
-    name: "sparse_example_indices"
-    type: DT_INT64
-    number_attr: "num_sparse_features"
-  }
-  input_arg {
-    name: "sparse_feature_indices"
-    type: DT_INT64
-    number_attr: "num_sparse_features"
-  }
-  input_arg {
-    name: "sparse_feature_values"
-    type: DT_FLOAT
-    number_attr: "num_sparse_features_with_values"
-  }
-  input_arg {
-    name: "dense_features"
-    type: DT_FLOAT
-    number_attr: "num_dense_features"
-  }
-  input_arg {
-    name: "example_weights"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "example_labels"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-    number_attr: "num_sparse_features"
-  }
-  input_arg {
-    name: "sparse_weights"
-    type: DT_FLOAT
-    number_attr: "num_sparse_features"
-  }
-  input_arg {
-    name: "dense_weights"
-    type: DT_FLOAT
-    number_attr: "num_dense_features"
-  }
-  input_arg {
-    name: "example_state_data"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "out_example_state_data"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "out_delta_sparse_weights"
-    type: DT_FLOAT
-    number_attr: "num_sparse_features"
+    type_attr: "out_type"
   }
-  output_arg {
-    name: "out_delta_dense_weights"
-    type: DT_FLOAT
-    number_attr: "num_dense_features"
+  attr {
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "loss_type"
-    type: "string"
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
-      list {
-        s: "logistic_loss"
-        s: "squared_loss"
-        s: "hinge_loss"
-        s: "smooth_hinge_loss"
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
-  attr {
-    name: "adaptative"
-    type: "bool"
-    default_value {
-      b: false
-    }
+}
+op {
+  name: "SkipDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
   }
-  attr {
-    name: "num_sparse_features"
-    type: "int"
-    has_minimum: true
+  input_arg {
+    name: "count"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "num_sparse_features_with_values"
-    type: "int"
+    name: "output_types"
+    type: "list(type)"
     has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "num_dense_features"
-    type: "int"
+    name: "output_shapes"
+    type: "list(shape)"
     has_minimum: true
+    minimum: 1
   }
-  attr {
-    name: "l1"
-    type: "float"
+  is_stateful: true
+}
+op {
+  name: "SkipDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
   }
-  attr {
-    name: "l2"
-    type: "float"
+  input_arg {
+    name: "count"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "num_loss_partitions"
-    type: "int"
+    name: "output_types"
+    type: "list(type)"
     has_minimum: true
     minimum: 1
   }
   attr {
-    name: "num_inner_iterations"
-    type: "int"
+    name: "output_shapes"
+    type: "list(shape)"
     has_minimum: true
     minimum: 1
   }
 }
 op {
-  name: "SdcaShrinkL1"
-  input_arg {
-    name: "weights"
-    type: DT_FLOAT
-    number_attr: "num_features"
-    is_ref: true
+  name: "Skipgram"
+  output_arg {
+    name: "vocab_word"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "vocab_freq"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "words_per_epoch"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "current_epoch"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "total_words_processed"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "examples"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "labels"
+    type: DT_INT32
   }
   attr {
-    name: "num_features"
+    name: "filename"
+    type: "string"
+  }
+  attr {
+    name: "batch_size"
     type: "int"
-    has_minimum: true
   }
   attr {
-    name: "l1"
-    type: "float"
+    name: "window_size"
+    type: "int"
+    default_value {
+      i: 5
+    }
   }
   attr {
-    name: "l2"
+    name: "min_count"
+    type: "int"
+    default_value {
+      i: 5
+    }
+  }
+  attr {
+    name: "subsample"
     type: "float"
+    default_value {
+      f: 0.001
+    }
   }
+  deprecation {
+    version: 19
+  }
+  is_stateful: true
 }
 op {
-  name: "SegmentMax"
+  name: "Slice"
   input_arg {
-    name: "data"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
+    name: "begin"
+    type_attr: "Index"
+  }
+  input_arg {
+    name: "size"
+    type_attr: "Index"
   }
   output_arg {
     name: "output"
@@ -32074,22 +43318,9 @@ op {
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
   }
   attr {
-    name: "Tindices"
+    name: "Index"
     type: "type"
     allowed_values {
       list {
@@ -32100,15 +43331,11 @@ op {
   }
 }
 op {
-  name: "SegmentMax"
+  name: "Snapshot"
   input_arg {
-    name: "data"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
-  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -32116,45 +43343,16 @@ op {
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
   }
 }
 op {
-  name: "SegmentMean"
+  name: "Softmax"
   input_arg {
-    name: "data"
+    name: "logits"
     type_attr: "T"
   }
-  input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "output"
+    name: "softmax"
     type_attr: "T"
   }
   attr {
@@ -32162,41 +43360,21 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "SegmentMean"
+  name: "Softmax"
   input_arg {
-    name: "data"
+    name: "logits"
     type_attr: "T"
   }
-  input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "output"
+    name: "softmax"
     type_attr: "T"
   }
   attr {
@@ -32204,43 +43382,30 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "SegmentMin"
+  name: "SoftmaxCrossEntropyWithLogits"
   input_arg {
-    name: "data"
+    name: "features"
     type_attr: "T"
   }
   input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
+    name: "labels"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "loss"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprop"
     type_attr: "T"
   }
   attr {
@@ -32248,41 +43413,29 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "SegmentMin"
+  name: "SoftmaxCrossEntropyWithLogits"
   input_arg {
-    name: "data"
+    name: "features"
     type_attr: "T"
   }
   input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
+    name: "labels"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "loss"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprop"
     type_attr: "T"
   }
   attr {
@@ -32290,43 +43443,22 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "SegmentProd"
+  name: "Softplus"
   input_arg {
-    name: "data"
+    name: "features"
     type_attr: "T"
   }
-  input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "output"
+    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -32336,44 +43468,25 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
 }
 op {
-  name: "SegmentProd"
+  name: "Softplus"
   input_arg {
-    name: "data"
+    name: "features"
     type_attr: "T"
   }
-  input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "output"
+    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -32383,46 +43496,27 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
 }
 op {
-  name: "SegmentSum"
+  name: "Softplus"
   input_arg {
-    name: "data"
+    name: "features"
     type_attr: "T"
   }
-  input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "output"
+    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -32432,44 +43526,32 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "SegmentSum"
+  name: "SoftplusGrad"
   input_arg {
-    name: "data"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
+    name: "features"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "backprops"
     type_attr: "T"
   }
   attr {
@@ -32479,65 +43561,29 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "Select"
-  input_arg {
-    name: "condition"
-    type: DT_BOOL
-  }
-  input_arg {
-    name: "t"
-    type_attr: "T"
-  }
+  name: "SoftplusGrad"
   input_arg {
-    name: "e"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
+    name: "gradients"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-}
-op {
-  name: "SelfAdjointEig"
   input_arg {
-    name: "input"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "backprops"
     type_attr: "T"
   }
   attr {
@@ -32545,83 +43591,86 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  deprecation {
-    version: 11
-  }
 }
 op {
-  name: "SelfAdjointEigV2"
+  name: "SoftplusGrad"
   input_arg {
-    name: "input"
+    name: "gradients"
     type_attr: "T"
   }
-  output_arg {
-    name: "e"
+  input_arg {
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "v"
+    name: "backprops"
     type_attr: "T"
   }
-  attr {
-    name: "compute_v"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "SelfAdjointEigV2"
+  name: "Softsign"
   input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "e"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "v"
+    name: "activations"
     type_attr: "T"
   }
-  attr {
-    name: "compute_v"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "Selu"
+  name: "Softsign"
   input_arg {
     name: "features"
     type_attr: "T"
@@ -32635,25 +43684,29 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "SeluGrad"
-  input_arg {
-    name: "gradients"
-    type_attr: "T"
-  }
+  name: "Softsign"
   input_arg {
-    name: "outputs"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
+    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -32661,143 +43714,143 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "SerializeIterator"
-  input_arg {
-    name: "resource_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "serialized"
-    type: DT_VARIANT
-  }
-  is_stateful: true
-}
-op {
-  name: "SerializeManySparse"
-  input_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-  }
+  name: "SoftsignGrad"
   input_arg {
-    name: "sparse_values"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "sparse_shape"
-    type: DT_INT64
+    name: "features"
+    type_attr: "T"
   }
   output_arg {
-    name: "serialized_sparse"
-    type: DT_STRING
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
   }
 }
 op {
-  name: "SerializeSparse"
-  input_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-  }
+  name: "SoftsignGrad"
   input_arg {
-    name: "sparse_values"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "sparse_shape"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "serialized_sparse"
-    type: DT_STRING
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
-}
-op {
-  name: "SerializeTensor"
-  input_arg {
-    name: "tensor"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "serialized"
-    type: DT_STRING
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
 }
 op {
-  name: "SetSize"
-  input_arg {
-    name: "set_indices"
-    type: DT_INT64
-  }
+  name: "SoftsignGrad"
   input_arg {
-    name: "set_values"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "set_shape"
-    type: DT_INT64
+    name: "features"
+    type_attr: "T"
   }
   output_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  attr {
-    name: "validate_indices"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
         type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
         type: DT_UINT16
-        type: DT_STRING
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "Shape"
+  name: "SpaceToBatch"
   input_arg {
     name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
+  }
   output_arg {
     name: "output"
-    type_attr: "out_type"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
   attr {
-    name: "out_type"
+    name: "Tpaddings"
     type: "type"
     default_value {
       type: DT_INT32
@@ -32809,31 +43862,37 @@ op {
       }
     }
   }
+  attr {
+    name: "block_size"
+    type: "int"
+    has_minimum: true
+    minimum: 2
+  }
 }
 op {
-  name: "ShapeN"
+  name: "SpaceToBatchND"
   input_arg {
     name: "input"
     type_attr: "T"
-    number_attr: "N"
+  }
+  input_arg {
+    name: "block_shape"
+    type_attr: "Tblock_shape"
+  }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
   }
   output_arg {
     name: "output"
-    type_attr: "out_type"
-    number_attr: "N"
-  }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
   attr {
-    name: "out_type"
+    name: "Tblock_shape"
     type: "type"
     default_value {
       type: DT_INT32
@@ -32845,552 +43904,440 @@ op {
       }
     }
   }
-}
-op {
-  name: "ShardedFilename"
-  input_arg {
-    name: "basename"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "shard"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "num_shards"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "filename"
-    type: DT_STRING
-  }
-}
-op {
-  name: "ShardedFilespec"
-  input_arg {
-    name: "basename"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "num_shards"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "filename"
-    type: DT_STRING
+  attr {
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
 }
 op {
-  name: "ShuffleDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "buffer_size"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "seed"
-    type: DT_INT64
-  }
+  name: "SpaceToDepth"
   input_arg {
-    name: "seed2"
-    type: DT_INT64
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
+    name: "block_size"
+    type: "int"
     has_minimum: true
-    minimum: 1
+    minimum: 2
   }
-  is_stateful: true
 }
 op {
-  name: "ShuffleDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "buffer_size"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "seed"
-    type: DT_INT64
-  }
+  name: "SpaceToDepth"
   input_arg {
-    name: "seed2"
-    type: DT_INT64
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
+    name: "block_size"
+    type: "int"
     has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "ShuffleDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "buffer_size"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "seed"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "seed2"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    minimum: 2
   }
   attr {
-    name: "reshuffle_each_iteration"
-    type: "bool"
+    name: "data_format"
+    type: "string"
     default_value {
-      b: true
+      s: "NHWC"
     }
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "Sigmoid"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
       }
     }
   }
 }
 op {
-  name: "SigmoidGrad"
+  name: "SparseAccumulatorApplyGradient"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+    name: "local_step"
+    type: DT_INT64
   }
-}
-op {
-  name: "SigmoidGrad"
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "gradient_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "dy"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+    name: "gradient_values"
+    type_attr: "dtype"
   }
-}
-op {
-  name: "Sign"
   input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "gradient_shape"
+    type: DT_INT64
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "has_known_shape"
+    type: "bool"
+  }
 }
 op {
-  name: "Sin"
+  name: "SparseAccumulatorApplyGradient"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
-  output_arg {
-    name: "y"
-    type_attr: "T"
+  input_arg {
+    name: "local_step"
+    type: DT_INT64
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+  input_arg {
+    name: "gradient_indices"
+    type: DT_INT64
   }
-}
-op {
-  name: "Sinh"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "gradient_values"
+    type_attr: "dtype"
   }
-  output_arg {
-    name: "y"
-    type_attr: "T"
+  input_arg {
+    name: "gradient_shape"
+    type: DT_INT64
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "Size"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
   attr {
-    name: "out_type"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "has_known_shape"
+    type: "bool"
   }
 }
 op {
-  name: "SkipDataset"
+  name: "SparseAccumulatorApplyGradient"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   input_arg {
-    name: "count"
+    name: "local_step"
     type: DT_INT64
   }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "gradient_indices"
+    type: DT_INT64
   }
-  is_stateful: true
-}
-op {
-  name: "SkipDataset"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "gradient_values"
+    type_attr: "dtype"
   }
   input_arg {
-    name: "count"
+    name: "gradient_shape"
     type: DT_INT64
   }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "has_known_shape"
+    type: "bool"
   }
 }
 op {
-  name: "Skipgram"
-  output_arg {
-    name: "vocab_word"
+  name: "SparseAccumulatorTakeGradient"
+  input_arg {
+    name: "handle"
     type: DT_STRING
+    is_ref: true
   }
-  output_arg {
-    name: "vocab_freq"
+  input_arg {
+    name: "num_required"
     type: DT_INT32
   }
   output_arg {
-    name: "words_per_epoch"
+    name: "indices"
     type: DT_INT64
   }
   output_arg {
-    name: "current_epoch"
-    type: DT_INT32
+    name: "values"
+    type_attr: "dtype"
   }
   output_arg {
-    name: "total_words_processed"
+    name: "shape"
     type: DT_INT64
   }
-  output_arg {
-    name: "examples"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "labels"
-    type: DT_INT32
-  }
-  attr {
-    name: "filename"
-    type: "string"
-  }
-  attr {
-    name: "batch_size"
-    type: "int"
-  }
-  attr {
-    name: "window_size"
-    type: "int"
-    default_value {
-      i: 5
-    }
-  }
   attr {
-    name: "min_count"
-    type: "int"
-    default_value {
-      i: 5
-    }
-  }
-  attr {
-    name: "subsample"
-    type: "float"
-    default_value {
-      f: 0.001
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
     }
   }
-  deprecation {
-    version: 19
-  }
-  is_stateful: true
 }
 op {
-  name: "Slice"
+  name: "SparseAccumulatorTakeGradient"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   input_arg {
-    name: "begin"
-    type_attr: "Index"
+    name: "num_required"
+    type: DT_INT32
   }
-  input_arg {
-    name: "size"
-    type_attr: "Index"
+  output_arg {
+    name: "indices"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "values"
+    type_attr: "dtype"
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "shape"
+    type: DT_INT64
   }
   attr {
-    name: "Index"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "Softmax"
+  name: "SparseAccumulatorTakeGradient"
   input_arg {
-    name: "logits"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "num_required"
+    type: DT_INT32
   }
   output_arg {
-    name: "softmax"
-    type_attr: "T"
+    name: "indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "values"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "shape"
+    type: DT_INT64
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "SoftmaxCrossEntropyWithLogits"
+  name: "SparseAdd"
   input_arg {
-    name: "features"
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
     type_attr: "T"
   }
   input_arg {
-    name: "labels"
+    name: "a_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
     type_attr: "T"
   }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "thresh"
+    type_attr: "Treal"
+  }
   output_arg {
-    name: "loss"
-    type_attr: "T"
+    name: "sum_indices"
+    type: DT_INT64
   }
   output_arg {
-    name: "backprop"
+    name: "sum_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "sum_shape"
+    type: DT_INT64
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
-}
-op {
-  name: "Softplus"
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "activations"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Treal"
     type: "type"
     allowed_values {
       list {
@@ -33408,15 +44355,47 @@ op {
   }
 }
 op {
-  name: "Softplus"
+  name: "SparseAdd"
   input_arg {
-    name: "features"
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
     type_attr: "T"
   }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "thresh"
+    type_attr: "Treal"
+  }
   output_arg {
-    name: "activations"
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sum_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "sum_shape"
+    type: DT_INT64
+  }
   attr {
     name: "T"
     type: "type"
@@ -33424,35 +44403,25 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "SoftplusGrad"
-  input_arg {
-    name: "gradients"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "backprops"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Treal"
     type: "type"
     allowed_values {
       list {
@@ -33465,24 +44434,54 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "SoftplusGrad"
+  name: "SparseAdd"
   input_arg {
-    name: "gradients"
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
     type_attr: "T"
   }
   input_arg {
-    name: "features"
+    name: "a_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
     type_attr: "T"
   }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "thresh"
+    type_attr: "Treal"
+  }
   output_arg {
-    name: "backprops"
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sum_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "sum_shape"
+    type: DT_INT64
+  }
   attr {
     name: "T"
     type: "type"
@@ -33490,31 +44489,26 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-}
-op {
-  name: "Softsign"
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "activations"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Treal"
     type: "type"
     allowed_values {
       list {
@@ -33527,18 +44521,37 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "Softsign"
+  name: "SparseAddGrad"
   input_arg {
-    name: "features"
+    name: "backprop_val_grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "a_val_grad"
     type_attr: "T"
   }
   output_arg {
-    name: "activations"
+    name: "b_val_grad"
     type_attr: "T"
   }
   attr {
@@ -33548,31 +44561,46 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "SoftsignGrad"
+}
+op {
+  name: "SparseAddGrad"
+  input_arg {
+    name: "backprop_val_grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_indices"
+    type: DT_INT64
+  }
   input_arg {
-    name: "gradients"
-    type_attr: "T"
+    name: "b_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "features"
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "a_val_grad"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
+    name: "b_val_grad"
     type_attr: "T"
   }
   attr {
@@ -33582,29 +44610,48 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "SoftsignGrad"
+  name: "SparseAddGrad"
   input_arg {
-    name: "gradients"
+    name: "backprop_val_grad"
     type_attr: "T"
   }
   input_arg {
-    name: "features"
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "a_val_grad"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
+    name: "b_val_grad"
     type_attr: "T"
   }
   attr {
@@ -33614,98 +44661,92 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "SpaceToBatch"
+  name: "SparseApplyAdadelta"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
-  }
-  output_arg {
-    name: "output"
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
+  input_arg {
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "Tpaddings"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-}
-op {
-  name: "SpaceToBatchND"
   input_arg {
-    name: "input"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
-    name: "block_shape"
-    type_attr: "Tblock_shape"
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tblock_shape"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "Tpaddings"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -33713,88 +44754,58 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SpaceToDepth"
+  name: "SparseApplyAdadelta"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
-  }
-}
-op {
-  name: "SpaceToDepth"
   input_arg {
-    name: "input"
+    name: "accum_update"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-        s: "NCHW_VECT_C"
-      }
-    }
-  }
-}
-op {
-  name: "SparseAccumulatorApplyGradient"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "rho"
+    type_attr: "T"
   }
   input_arg {
-    name: "local_step"
-    type: DT_INT64
+    name: "epsilon"
+    type_attr: "T"
   }
   input_arg {
-    name: "gradient_indices"
-    type: DT_INT64
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "gradient_values"
-    type_attr: "dtype"
+    name: "indices"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "gradient_shape"
-    type: DT_INT64
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -33812,39 +44823,73 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "has_known_shape"
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
     type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "SparseAccumulatorApplyGradient"
+  name: "SparseApplyAdadelta"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "local_step"
-    type: DT_INT64
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "gradient_indices"
-    type: DT_INT64
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "gradient_values"
-    type_attr: "dtype"
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "gradient_shape"
-    type: DT_INT64
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -33864,39 +44909,59 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "has_known_shape"
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
     type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "SparseAccumulatorTakeGradient"
+  name: "SparseApplyAdagrad"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "num_required"
-    type: DT_INT32
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "indices"
-    type: DT_INT64
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "values"
-    type_attr: "dtype"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "shape"
-    type: DT_INT64
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -33917,32 +44982,55 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseAccumulatorTakeGradient"
+  name: "SparseApplyAdagrad"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "num_required"
-    type: DT_INT32
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "indices"
-    type: DT_INT64
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "values"
-    type_attr: "dtype"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "shape"
-    type: DT_INT64
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -33965,48 +45053,52 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseAdd"
-  input_arg {
-    name: "a_indices"
-    type: DT_INT64
-  }
+  name: "SparseApplyAdagrad"
   input_arg {
-    name: "a_values"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "a_shape"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "b_indices"
-    type: DT_INT64
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "b_values"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "b_shape"
-    type: DT_INT64
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "thresh"
-    type_attr: "Treal"
-  }
-  output_arg {
-    name: "sum_indices"
-    type: DT_INT64
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "sum_values"
+    name: "out"
     type_attr: "T"
-  }
-  output_arg {
-    name: "sum_shape"
-    type: DT_INT64
+    is_ref: true
   }
   attr {
     name: "T"
@@ -34027,68 +45119,75 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Treal"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseAdd"
+  name: "SparseApplyAdagradDA"
   input_arg {
-    name: "a_indices"
-    type: DT_INT64
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "a_values"
+    name: "gradient_accumulator"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "a_shape"
-    type: DT_INT64
+    name: "gradient_squared_accumulator"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "b_indices"
-    type: DT_INT64
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "b_values"
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "b_shape"
-    type: DT_INT64
+    name: "l1"
+    type_attr: "T"
   }
   input_arg {
-    name: "thresh"
-    type_attr: "Treal"
+    name: "l2"
+    type_attr: "T"
   }
-  output_arg {
-    name: "sum_indices"
+  input_arg {
+    name: "global_step"
     type: DT_INT64
   }
   output_arg {
-    name: "sum_values"
+    name: "out"
     type_attr: "T"
-  }
-  output_arg {
-    name: "sum_shape"
-    type: DT_INT64
+    is_ref: true
   }
   attr {
     name: "T"
@@ -34109,105 +45208,72 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Treal"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseAddGrad"
+  name: "SparseApplyAdagradDA"
   input_arg {
-    name: "backprop_val_grad"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "a_indices"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "b_indices"
-    type: DT_INT64
+    name: "gradient_accumulator"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "sum_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "a_val_grad"
+    name: "gradient_squared_accumulator"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "b_val_grad"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-}
-op {
-  name: "SparseAddGrad"
   input_arg {
-    name: "backprop_val_grad"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "a_indices"
-    type: DT_INT64
+    name: "l1"
+    type_attr: "T"
   }
   input_arg {
-    name: "b_indices"
-    type: DT_INT64
+    name: "l2"
+    type_attr: "T"
   }
   input_arg {
-    name: "sum_indices"
+    name: "global_step"
     type: DT_INT64
   }
   output_arg {
-    name: "a_val_grad"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "b_val_grad"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -34233,43 +45299,64 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseApplyAdadelta"
+  name: "SparseApplyAdagradDA"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "gradient_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum_update"
+    name: "gradient_squared_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "lr"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "global_step"
+    type: DT_INT64
   }
   output_arg {
     name: "out"
@@ -34295,6 +45382,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -34317,19 +45407,24 @@ op {
   }
 }
 op {
-  name: "SparseApplyAdadelta"
+  name: "SparseApplyCenteredRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "mg"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum_update"
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "mom"
     type_attr: "T"
     is_ref: true
   }
@@ -34341,6 +45436,10 @@ op {
     name: "rho"
     type_attr: "T"
   }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
   input_arg {
     name: "epsilon"
     type_attr: "T"
@@ -34377,8 +45476,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -34401,14 +45498,24 @@ op {
   }
 }
 op {
-  name: "SparseApplyAdagrad"
+  name: "SparseApplyCenteredRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "mg"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "mom"
     type_attr: "T"
     is_ref: true
   }
@@ -34416,6 +45523,18 @@ op {
     name: "lr"
     type_attr: "T"
   }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
   input_arg {
     name: "grad"
     type_attr: "T"
@@ -34448,6 +45567,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -34470,14 +45591,24 @@ op {
   }
 }
 op {
-  name: "SparseApplyAdagrad"
+  name: "SparseApplyCenteredRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "mg"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "mom"
     type_attr: "T"
     is_ref: true
   }
@@ -34485,6 +45616,18 @@ op {
     name: "lr"
     type_attr: "T"
   }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
   input_arg {
     name: "grad"
     type_attr: "T"
@@ -34519,6 +45662,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -34541,19 +45685,19 @@ op {
   }
 }
 op {
-  name: "SparseApplyAdagradDA"
+  name: "SparseApplyFtrl"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "gradient_accumulator"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "gradient_squared_accumulator"
+    name: "linear"
     type_attr: "T"
     is_ref: true
   }
@@ -34578,8 +45722,8 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "global_step"
-    type: DT_INT64
+    name: "lr_power"
+    type_attr: "T"
   }
   output_arg {
     name: "out"
@@ -34627,19 +45771,19 @@ op {
   }
 }
 op {
-  name: "SparseApplyAdagradDA"
+  name: "SparseApplyFtrl"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "gradient_accumulator"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "gradient_squared_accumulator"
+    name: "linear"
     type_attr: "T"
     is_ref: true
   }
@@ -34664,8 +45808,8 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "global_step"
-    type: DT_INT64
+    name: "lr_power"
+    type_attr: "T"
   }
   output_arg {
     name: "out"
@@ -34715,51 +45859,46 @@ op {
   }
 }
 op {
-  name: "SparseApplyCenteredRMSProp"
+  name: "SparseApplyFtrl"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mg"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "ms"
+    name: "linear"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mom"
+    name: "grad"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "rho"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "lr_power"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   output_arg {
     name: "out"
     type_attr: "T"
@@ -34784,6 +45923,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -34806,50 +45948,49 @@ op {
   }
 }
 op {
-  name: "SparseApplyCenteredRMSProp"
+  name: "SparseApplyFtrlV2"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mg"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "ms"
+    name: "linear"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mom"
+    name: "grad"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "rho"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "l2_shrinkage"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "lr_power"
+    type_attr: "T"
   }
   output_arg {
     name: "out"
@@ -34875,8 +46016,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -34899,7 +46038,7 @@ op {
   }
 }
 op {
-  name: "SparseApplyFtrl"
+  name: "SparseApplyFtrlV2"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -34935,6 +46074,10 @@ op {
     name: "l2"
     type_attr: "T"
   }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
   input_arg {
     name: "lr_power"
     type_attr: "T"
@@ -34963,6 +46106,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -34985,7 +46130,7 @@ op {
   }
 }
 op {
-  name: "SparseApplyFtrl"
+  name: "SparseApplyFtrlV2"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -35021,6 +46166,10 @@ op {
     name: "l2"
     type_attr: "T"
   }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
   input_arg {
     name: "lr_power"
     type_attr: "T"
@@ -35051,6 +46200,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -35073,7 +46223,7 @@ op {
   }
 }
 op {
-  name: "SparseApplyFtrlV2"
+  name: "SparseApplyMomentum"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -35085,9 +46235,8 @@ op {
     is_ref: true
   }
   input_arg {
-    name: "linear"
+    name: "lr"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
     name: "grad"
@@ -35098,23 +46247,7 @@ op {
     type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2_shrinkage"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr_power"
+    name: "momentum"
     type_attr: "T"
   }
   output_arg {
@@ -35161,9 +46294,16 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseApplyFtrlV2"
+  name: "SparseApplyMomentum"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -35175,9 +46315,8 @@ op {
     is_ref: true
   }
   input_arg {
-    name: "linear"
+    name: "lr"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
     name: "grad"
@@ -35188,23 +46327,7 @@ op {
     type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2_shrinkage"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr_power"
+    name: "momentum"
     type_attr: "T"
   }
   output_arg {
@@ -35253,6 +46376,13 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
   name: "SparseApplyMomentum"
@@ -35306,6 +46436,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -35335,7 +46468,7 @@ op {
   }
 }
 op {
-  name: "SparseApplyMomentum"
+  name: "SparseApplyProximalAdagrad"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -35351,17 +46484,21 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "l2"
+    type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   output_arg {
     name: "out"
     type_attr: "T"
@@ -35386,8 +46523,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -35408,13 +46543,6 @@ op {
       b: false
     }
   }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
   name: "SparseApplyProximalAdagrad"
@@ -35472,6 +46600,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -35551,6 +46681,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -35718,6 +46849,167 @@ op {
     }
   }
 }
+op {
+  name: "SparseApplyProximalGradientDescent"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "SparseApplyRMSProp"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "mom"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "SparseApplyRMSProp"
   input_arg {
@@ -35783,6 +47075,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -35871,6 +47165,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -36036,6 +47331,58 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "SparseConditionalAccumulator"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "SparseCross"
   input_arg {
@@ -36221,6 +47568,54 @@ op {
     }
   }
 }
+op {
+  name: "SparseDenseCwiseAdd"
+  input_arg {
+    name: "sp_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sp_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sp_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "dense"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
 op {
   name: "SparseDenseCwiseDiv"
   input_arg {
@@ -36313,6 +47708,99 @@ op {
     }
   }
 }
+op {
+  name: "SparseDenseCwiseDiv"
+  input_arg {
+    name: "sp_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sp_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sp_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "dense"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "SparseDenseCwiseMul"
+  input_arg {
+    name: "sp_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sp_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sp_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "dense"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+}
 op {
   name: "SparseDenseCwiseMul"
   input_arg {
@@ -36354,6 +47842,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -36401,6 +47891,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -36584,7 +48075,218 @@ op {
   }
 }
 op {
-  name: "SparseReduceMax"
+  name: "SparseReduceMax"
+  input_arg {
+    name: "input_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "input_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "reduction_axes"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseReduceMax"
+  input_arg {
+    name: "input_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "input_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "reduction_axes"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "SparseReduceMaxSparse"
+  input_arg {
+    name: "input_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "input_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "reduction_axes"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "SparseReduceMaxSparse"
+  input_arg {
+    name: "input_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "input_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "reduction_axes"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseReduceMaxSparse"
   input_arg {
     name: "input_indices"
     type: DT_INT64
@@ -36602,9 +48304,17 @@ op {
     type: DT_INT32
   }
   output_arg {
-    name: "output"
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
+  }
   attr {
     name: "keep_dims"
     type: "bool"
@@ -36628,12 +48338,13 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "SparseReduceMaxSparse"
+  name: "SparseReduceSum"
   input_arg {
     name: "input_indices"
     type: DT_INT64
@@ -36651,17 +48362,9 @@ op {
     type: DT_INT32
   }
   output_arg {
-    name: "output_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "output_values"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_shape"
-    type: DT_INT64
-  }
   attr {
     name: "keep_dims"
     type: "bool"
@@ -36676,19 +48379,24 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "SparseReduceMaxSparse"
+  name: "SparseReduceSum"
   input_arg {
     name: "input_indices"
     type: DT_INT64
@@ -36706,17 +48414,9 @@ op {
     type: DT_INT32
   }
   output_arg {
-    name: "output_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "output_values"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_shape"
-    type: DT_INT64
-  }
   attr {
     name: "keep_dims"
     type: "bool"
@@ -36731,12 +48431,17 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -36792,12 +48497,15 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "SparseReduceSum"
+  name: "SparseReduceSumSparse"
   input_arg {
     name: "input_indices"
     type: DT_INT64
@@ -36815,9 +48523,17 @@ op {
     type: DT_INT32
   }
   output_arg {
-    name: "output"
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
+  }
   attr {
     name: "keep_dims"
     type: "bool"
@@ -36844,8 +48560,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -36906,6 +48620,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -36968,6 +48684,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -37110,6 +48827,65 @@ op {
     }
   }
 }
+op {
+  name: "SparseSegmentMeanWithNumSegments"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "SparseSegmentSqrtN"
   input_arg {
@@ -37153,9 +48929,163 @@ op {
   }
 }
 op {
-  name: "SparseSegmentSqrtNGrad"
+  name: "SparseSegmentSqrtNGrad"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "output_dim0"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSegmentSqrtNWithNumSegments"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSegmentSum"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSegmentSum"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
@@ -37166,10 +49096,6 @@ op {
     name: "segment_ids"
     type: DT_INT32
   }
-  input_arg {
-    name: "output_dim0"
-    type: DT_INT32
-  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -37181,6 +49107,15 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -37230,6 +49165,9 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -37248,7 +49186,7 @@ op {
   }
 }
 op {
-  name: "SparseSegmentSum"
+  name: "SparseSegmentSumWithNumSegments"
   input_arg {
     name: "data"
     type_attr: "T"
@@ -37261,6 +49199,10 @@ op {
     name: "segment_ids"
     type: DT_INT32
   }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -37281,6 +49223,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -37297,6 +49240,19 @@ op {
       }
     }
   }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "SparseSlice"
@@ -37409,6 +49365,50 @@ op {
     }
   }
 }
+op {
+  name: "SparseSoftmaxCrossEntropyWithLogits"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "labels"
+    type_attr: "Tlabels"
+  }
+  output_arg {
+    name: "loss"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprop"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tlabels"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "SparseSparseMaximum"
   input_arg {
@@ -37515,6 +49515,118 @@ op {
     }
   }
 }
+op {
+  name: "SparseSparseMaximum"
+  input_arg {
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "SparseSparseMinimum"
+  input_arg {
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+}
 op {
   name: "SparseSparseMinimum"
   input_arg {
@@ -37568,6 +49680,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -37627,6 +49741,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -37787,6 +49902,64 @@ op {
     }
   }
 }
+op {
+  name: "SparseTensorDenseAdd"
+  input_arg {
+    name: "a_indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "a_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_shape"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "SparseTensorDenseMatMul"
   input_arg {
@@ -38140,6 +50313,31 @@ op {
     }
   }
 }
+op {
+  name: "Sqrt"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "SqrtGrad"
   input_arg {
@@ -38196,6 +50394,61 @@ op {
     }
   }
 }
+op {
+  name: "SqrtGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Square"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "Square"
   input_arg {
@@ -38212,6 +50465,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -38253,6 +50507,38 @@ op {
   }
   is_commutative: true
 }
+op {
+  name: "SquaredDifference"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_commutative: true
+}
 op {
   name: "Squeeze"
   input_arg {
@@ -38664,6 +50950,61 @@ op {
     }
   }
 }
+op {
+  name: "StatelessRandomNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatelessRandomUniform"
   input_arg {
@@ -38706,6 +51047,61 @@ op {
     }
   }
 }
+op {
+  name: "StatelessRandomUniform"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatelessTruncatedNormal"
   input_arg {
@@ -38748,6 +51144,61 @@ op {
     }
   }
 }
+op {
+  name: "StatelessTruncatedNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatsAggregatorHandle"
   output_arg {
@@ -39270,6 +51721,41 @@ op {
     }
   }
 }
+op {
+  name: "Sub"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "Substr"
   input_arg {
@@ -39415,6 +51901,66 @@ op {
     }
   }
 }
+op {
+  name: "Sum"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "Svd"
   input_arg {
@@ -39708,6 +52254,57 @@ op {
     }
   }
 }
+op {
+  name: "Tan"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Tanh"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "Tanh"
   input_arg {
@@ -39724,6 +52321,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -39788,6 +52386,35 @@ op {
     }
   }
 }
+op {
+  name: "TanhGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "TemporaryVariable"
   output_arg {
@@ -41200,6 +53827,56 @@ op {
     version: 7
   }
 }
+op {
+  name: "TopK"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  attr {
+    name: "k"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "sorted"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  deprecation {
+    version: 7
+  }
+}
 op {
   name: "TopKV2"
   input_arg {
@@ -41288,6 +53965,52 @@ op {
     }
   }
 }
+op {
+  name: "TopKV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "k"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  attr {
+    name: "sorted"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
 op {
   name: "Transpose"
   input_arg {
@@ -41354,6 +54077,41 @@ op {
     }
   }
 }
+op {
+  name: "TruncateDiv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "TruncateMod"
   input_arg {
@@ -41381,6 +54139,34 @@ op {
     }
   }
 }
+op {
+  name: "TruncateMod"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
 op {
   name: "TruncatedNormal"
   input_arg {
@@ -41428,6 +54214,54 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "TruncatedNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "UniformCandidateSampler"
   input_arg {
@@ -41571,6 +54405,42 @@ op {
     }
   }
 }
+op {
+  name: "UniqueV2"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "axis"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "idx"
+    type_attr: "out_idx"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "out_idx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "UniqueWithCounts"
   input_arg {
@@ -41729,6 +54599,68 @@ op {
     }
   }
 }
+op {
+  name: "UnsortedSegmentMax"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "segment_ids"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "UnsortedSegmentSum"
   input_arg {
@@ -41833,6 +54765,73 @@ op {
     }
   }
 }
+op {
+  name: "UnsortedSegmentSum"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "segment_ids"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "Unstage"
   output_arg {
@@ -42084,6 +55083,46 @@ op {
     }
   }
 }
+op {
+  name: "Where"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "index"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_BOOL
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+        type: DT_BOOL
+      }
+    }
+  }
+}
 op {
   name: "WholeFileReader"
   output_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 9c41957ae6..438c2dc13b 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -34,6 +34,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -83,6 +84,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -136,6 +138,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -213,6 +216,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -235,6 +239,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -262,6 +267,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -291,6 +297,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -392,6 +399,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
         type: DT_VARIANT
       }
     }
@@ -466,6 +474,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -835,6 +844,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -900,6 +910,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -986,6 +997,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1081,6 +1093,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1169,6 +1182,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1260,6 +1274,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1346,6 +1361,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1436,6 +1452,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1495,6 +1512,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1564,6 +1582,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1652,6 +1671,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1727,6 +1747,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1796,6 +1817,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1881,6 +1903,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1930,6 +1953,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1979,6 +2003,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2047,6 +2072,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2163,6 +2189,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -2190,6 +2217,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -2312,6 +2340,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2387,6 +2416,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2459,6 +2489,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -2489,6 +2520,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -2513,6 +2545,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -2686,6 +2719,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -2750,6 +2784,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -2818,6 +2853,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -2887,6 +2923,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -3283,6 +3320,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -3628,6 +3666,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -3721,6 +3760,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -4017,6 +4057,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -4070,6 +4111,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -4128,6 +4170,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -4186,6 +4229,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT64
@@ -4210,6 +4254,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT64
@@ -4686,6 +4731,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -4709,6 +4755,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5108,6 +5155,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -5242,6 +5290,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -5249,7 +5298,7 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 4.  The stride of the sliding window for each\ndimension of `input`. The dimension order is determined by the value of\n  `data_format`, see below for details."
+    description: "1-D tensor of length 4.  The stride of the sliding window for each\ndimension of `input`. The dimension order is determined by the value of\n`data_format`, see below for details."
   }
   attr {
     name: "use_cudnn_on_gpu"
@@ -5283,6 +5332,19 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+    description: "1-D tensor of length 4.  The dilation factor for each dimension of\n`input`. If set to k > 1, there will be k-1 skipped cells between each\nfilter element on that dimension. The dimension order is determined by the\nvalue of `data_format`, see above for details. Dilations in the batch and\ndepth dimensions must be 1."
+  }
   summary: "Computes a 2-D convolution given 4-D `input` and `filter` tensors."
   description: "Given an input tensor of shape `[batch, in_height, in_width, in_channels]`\nand a filter / kernel tensor of shape\n`[filter_height, filter_width, in_channels, out_channels]`, this op\nperforms the following:\n\n1. Flattens the filter to a 2-D matrix with shape\n   `[filter_height * filter_width * in_channels, output_channels]`.\n2. Extracts image patches from the input tensor to form a *virtual*\n   tensor of shape `[batch, out_height, out_width,\n   filter_height * filter_width * in_channels]`.\n3. For each patch, right-multiplies the filter matrix and the image patch\n   vector.\n\nIn detail, with the default NHWC format,\n\n    output[b, i, j, k] =\n        sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *\n                        filter[di, dj, q, k]\n\nMust have `strides[0] = strides[3] = 1`.  For the most common case of the same\nhorizontal and vertices strides, `strides = [1, stride, stride, 1]`."
 }
@@ -5314,6 +5376,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -5355,6 +5418,19 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+    description: "1-D tensor of length 4.  The dilation factor for each dimension of\n`input`. If set to k > 1, there will be k-1 skipped cells between each filter\nelement on that dimension. The dimension order is determined by the value of\n`data_format`, see above for details. Dilations in the batch and depth\ndimensions must be 1."
+  }
   summary: "Computes the gradients of convolution with respect to the filter."
 }
 op {
@@ -5385,6 +5461,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -5426,6 +5503,19 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+    description: "1-D tensor of length 4.  The dilation factor for each dimension of\n`input`. If set to k > 1, there will be k-1 skipped cells between each filter\nelement on that dimension. The dimension order is determined by the value of\n`data_format`, see above for details. Dilations in the batch and depth\ndimensions must be 1."
+  }
   summary: "Computes the gradients of convolution with respect to the input."
 }
 op {
@@ -5450,6 +5540,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5487,6 +5578,20 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+    description: "1-D tensor of length 5.  The dilation factor for each dimension of\n`input`. If set to k > 1, there will be k-1 skipped cells between each\nfilter element on that dimension. The dimension order is determined by the\nvalue of `data_format`, see above for details. Dilations in the batch and\ndepth dimensions must be 1."
+  }
   summary: "Computes a 3-D convolution given 5-D `input` and `filter` tensors."
   description: "In signal processing, cross-correlation is a measure of similarity of\ntwo waveforms as a function of a time-lag applied to one of them. This\nis also known as a sliding dot product or sliding inner-product.\n\nOur Conv3D implements a form of cross-correlation."
 }
@@ -5573,6 +5678,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5610,6 +5716,20 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+    description: "1-D tensor of length 5.  The dilation factor for each dimension of\n`input`. If set to k > 1, there will be k-1 skipped cells between each\nfilter element on that dimension. The dimension order is determined by the\nvalue of `data_format`, see above for details. Dilations in the batch and\ndepth dimensions must be 1."
+  }
   summary: "Computes the gradients of 3-D convolution with respect to the filter."
 }
 op {
@@ -5695,6 +5815,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5732,6 +5853,20 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+    description: "1-D tensor of length 5.  The dilation factor for each dimension of\n`input`. If set to k > 1, there will be k-1 skipped cells between each\nfilter element on that dimension. The dimension order is determined by the\nvalue of `data_format`, see above for details. Dilations in the batch and\ndepth dimensions must be 1."
+  }
   summary: "Computes the gradients of 3-D convolution with respect to the input."
 }
 op {
@@ -5824,6 +5959,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -5849,6 +5985,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -6101,6 +6238,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -6160,6 +6298,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -6232,6 +6371,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -6252,164 +6392,252 @@ op {
   description: "By default, this op performs an inclusive cumsum, which means that the first\nelement of the input is identical to the first element of the output:\n\n```python\ntf.cumsum([a, b, c])  # => [a, a + b, a + b + c]\n```\n\nBy setting the `exclusive` kwarg to `True`, an exclusive cumsum is\nperformed instead:\n\n```python\ntf.cumsum([a, b, c], exclusive=True)  # => [0, a, a + b]\n```\n\nBy setting the `reverse` kwarg to `True`, the cumsum is performed in the\nopposite direction:\n\n```python\ntf.cumsum([a, b, c], reverse=True)  # => [a + b + c, b + c, c]\n```\n\nThis is more efficient than using separate `tf.reverse` ops.\n\nThe `reverse` and `exclusive` kwargs can also be combined:\n\n```python\ntf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]\n```"
 }
 op {
-  name: "DatasetToSingleElement"
+  name: "DataFormatDimMap"
   input_arg {
-    name: "dataset"
-    description: "A handle to a dataset that contains a single element."
-    type: DT_VARIANT
-  }
-  output_arg {
-    name: "components"
-    description: "The components of the single element of `input`."
-    type_list_attr: "output_types"
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-  summary: "Outputs the single element from the given dataset."
-}
-op {
-  name: "DebugGradientIdentity"
-  input_arg {
-    name: "input"
+    name: "x"
+    description: "Scalar. Dimension index in source data format. Must be in the range [-4, 4)."
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
+    description: "Scalar. Dimension index in destination data format."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
-  summary: "Identity op for gradient debugging."
-  description: "This op is hidden from public in Python. It is used by TensorFlow Debugger to\nregister gradient tensors for gradient debugging."
-  allows_uninitialized_input: true
+  attr {
+    name: "src_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    description: "source data format."
+  }
+  attr {
+    name: "dst_format"
+    type: "string"
+    default_value {
+      s: "NCHW"
+    }
+    description: "destination data format."
+  }
+  summary: "Returns the dimension index in the destination data format given the one in"
+  description: "the source data format."
 }
 op {
-  name: "DebugIdentity"
+  name: "DataFormatVecPermute"
   input_arg {
-    name: "input"
-    description: "Input tensor, non-Reference type."
+    name: "x"
+    description: "Vector in source data format. Must be of size 4."
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    description: "Output tensor that equals the input tensor."
+    name: "y"
+    description: "Vector in destination data format. Must be of size 4."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
-    name: "device_name"
+    name: "src_format"
     type: "string"
     default_value {
-      s: ""
+      s: "NHWC"
     }
+    description: "source data format."
   }
   attr {
-    name: "tensor_name"
+    name: "dst_format"
     type: "string"
     default_value {
-      s: ""
+      s: "NCHW"
     }
-    description: "Name of the input tensor."
+    description: "destination data format."
+  }
+  summary: "Returns the permuted vector in the destination data format given the one in"
+  description: "the source data format."
+}
+op {
+  name: "DatasetToSingleElement"
+  input_arg {
+    name: "dataset"
+    description: "A handle to a dataset that contains a single element."
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "components"
+    description: "The components of the single element of `input`."
+    type_list_attr: "output_types"
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
-      list {
-      }
-    }
-    description: "List of URLs to debug targets, e.g.,\nfile:///foo/tfdbg_dump, grpc:://localhost:11011"
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "gated_grpc"
-    type: "bool"
-    default_value {
-      b: false
-    }
-    description: "Whether this op will be gated. If any of the debug_urls of this\ndebug node is of the grpc:// scheme, when the value of this attribute is set\nto True, the data will not actually be sent via the grpc stream unless this\ndebug op has been enabled at the debug_url. If all of the debug_urls of this\ndebug node are of the grpc:// scheme and the debug op is enabled at none of\nthem, the output will be an empty Tensor."
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
-  summary: "Debug Identity Op."
-  description: "Provides an identity mapping of the non-Ref type input tensor for debugging."
-  allows_uninitialized_input: true
+  summary: "Outputs the single element from the given dataset."
 }
 op {
-  name: "DebugNanCount"
+  name: "DebugGradientIdentity"
   input_arg {
     name: "input"
-    description: "Input tensor, non-Reference type."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "An integer output tensor that is the number of NaNs in the input."
-    type: DT_INT64
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  attr {
-    name: "device_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-    description: "Name of the input tensor."
-  }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
-      list {
-      }
-    }
-    description: "List of URLs to debug targets, e.g.,\nfile:///foo/tfdbg_dump, grpc:://localhost:11011."
-  }
-  attr {
-    name: "gated_grpc"
-    type: "bool"
-    default_value {
-      b: false
-    }
-    description: "Whether this op will be gated. If any of the debug_urls of this\ndebug node is of the grpc:// scheme, when the value of this attribute is set\nto True, the data will not actually be sent via the grpc stream unless this\ndebug op has been enabled at the debug_url. If all of the debug_urls of this\ndebug node are of the grpc:// scheme and the debug op is enabled at none of\nthem, the output will be an empty Tensor."
-  }
-  summary: "Debug NaN Value Counter Op"
-  description: "Counts number of NaNs in the input tensor, for debugging."
+  summary: "Identity op for gradient debugging."
+  description: "This op is hidden from public in Python. It is used by TensorFlow Debugger to\nregister gradient tensors for gradient debugging."
   allows_uninitialized_input: true
 }
 op {
-  name: "DebugNumericSummary"
+  name: "DebugIdentity"
   input_arg {
     name: "input"
-    description: "Input tensor, non-Reference type, float or double."
+    description: "Input tensor, non-Reference type."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "A double tensor of shape [14 + nDimensions], where nDimensions is the\n  the number of dimensions of the tensor\'s shape. The elements of output are:\n  [0]: is initialized (1.0) or not (0.0).\n  [1]: total number of elements\n  [2]: NaN element count\n  [3]: generalized -inf count: elements <= lower_bound. lower_bound is -inf by\n    default.\n  [4]: negative element count (excluding -inf), if lower_bound is the default\n    -inf. Otherwise, this is the count of elements > lower_bound and < 0.\n  [5]: zero element count\n  [6]: positive element count (excluding +inf), if upper_bound is the default\n    -inf. Otherwise, this is the count of elements < upper_bound and > 0.\n  [7]: generalized +inf count, elements >= upper_bound. upper_bound is +inf by\n    default.\nOutput elements [1:8] are all zero, if the tensor is uninitialized.\n  [8]: minimum of all non-inf and non-NaN elements.\n       If uninitialized or no such element exists: +inf.\n  [9]: maximum of all non-inf and non-NaN elements.\n       If uninitialized or no such element exists: -inf.\n  [10]: mean of all non-inf and non-NaN elements.\n        If uninitialized or no such element exists: NaN.\n  [11]: variance of all non-inf and non-NaN elements.\n        If uninitialized or no such element exists: NaN.\n  [12]: Data type of the tensor encoded as an enum integer. See the DataType\n        proto for more details.\n  [13]: Number of dimensions of the tensor (ndims).\n  [14+]: Sizes of the dimensions."
-    type: DT_DOUBLE
+    description: "Output tensor that equals the input tensor."
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "device_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+    description: "Name of the input tensor."
+  }
+  attr {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+    description: "List of URLs to debug targets, e.g.,\nfile:///foo/tfdbg_dump, grpc:://localhost:11011"
+  }
+  attr {
+    name: "gated_grpc"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "Whether this op will be gated. If any of the debug_urls of this\ndebug node is of the grpc:// scheme, when the value of this attribute is set\nto True, the data will not actually be sent via the grpc stream unless this\ndebug op has been enabled at the debug_url. If all of the debug_urls of this\ndebug node are of the grpc:// scheme and the debug op is enabled at none of\nthem, the output will be an empty Tensor."
+  }
+  summary: "Debug Identity Op."
+  description: "Provides an identity mapping of the non-Ref type input tensor for debugging."
+  allows_uninitialized_input: true
+}
+op {
+  name: "DebugNanCount"
+  input_arg {
+    name: "input"
+    description: "Input tensor, non-Reference type."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    description: "An integer output tensor that is the number of NaNs in the input."
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "device_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+    description: "Name of the input tensor."
+  }
+  attr {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+    description: "List of URLs to debug targets, e.g.,\nfile:///foo/tfdbg_dump, grpc:://localhost:11011."
+  }
+  attr {
+    name: "gated_grpc"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "Whether this op will be gated. If any of the debug_urls of this\ndebug node is of the grpc:// scheme, when the value of this attribute is set\nto True, the data will not actually be sent via the grpc stream unless this\ndebug op has been enabled at the debug_url. If all of the debug_urls of this\ndebug node are of the grpc:// scheme and the debug op is enabled at none of\nthem, the output will be an empty Tensor."
+  }
+  summary: "Debug NaN Value Counter Op"
+  description: "Counts number of NaNs in the input tensor, for debugging."
+  allows_uninitialized_input: true
+}
+op {
+  name: "DebugNumericSummary"
+  input_arg {
+    name: "input"
+    description: "Input tensor, non-Reference type, float or double."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    description: "A double tensor of shape [14 + nDimensions], where nDimensions is the\n  the number of dimensions of the tensor\'s shape. The elements of output are:\n  [0]: is initialized (1.0) or not (0.0).\n  [1]: total number of elements\n  [2]: NaN element count\n  [3]: generalized -inf count: elements <= lower_bound. lower_bound is -inf by\n    default.\n  [4]: negative element count (excluding -inf), if lower_bound is the default\n    -inf. Otherwise, this is the count of elements > lower_bound and < 0.\n  [5]: zero element count\n  [6]: positive element count (excluding +inf), if upper_bound is the default\n    -inf. Otherwise, this is the count of elements < upper_bound and > 0.\n  [7]: generalized +inf count, elements >= upper_bound. upper_bound is +inf by\n    default.\nOutput elements [1:8] are all zero, if the tensor is uninitialized.\n  [8]: minimum of all non-inf and non-NaN elements.\n       If uninitialized or no such element exists: +inf.\n  [9]: maximum of all non-inf and non-NaN elements.\n       If uninitialized or no such element exists: -inf.\n  [10]: mean of all non-inf and non-NaN elements.\n        If uninitialized or no such element exists: NaN.\n  [11]: variance of all non-inf and non-NaN elements.\n        If uninitialized or no such element exists: NaN.\n  [12]: Data type of the tensor encoded as an enum integer. See the DataType\n        proto for more details.\n  [13]: Number of dimensions of the tensor (ndims).\n  [14+]: Sizes of the dimensions."
+    type: DT_DOUBLE
   }
   attr {
     name: "T"
@@ -7045,7 +7273,7 @@ op {
     }
   }
   summary: "DepthToSpace for tensors of type T."
-  description: "Rearranges data from depth into blocks of spatial data.\nThis is the reverse transformation of SpaceToDepth. More specifically,\nthis op outputs a copy of the input tensor where values from the `depth`\ndimension are moved in spatial blocks to the `height` and `width` dimensions.\nThe attr `block_size` indicates the input block size and how the data is moved.\n\n  * Chunks of data of size `block_size * block_size` from depth are rearranged\n    into non-overlapping blocks of size `block_size x block_size`\n  * The width the output tensor is `input_depth * block_size`, whereas the\n    height is `input_height * block_size`.\n  * The Y, X coordinates within each block of the output image are determined\n    by the high order component of the input channel index.\n  * The depth of the input tensor must be divisible by\n    `block_size * block_size`.\n\nThe `data_format` attr specifies the layout of the input and output tensors\nwith the following options:\n  \"NHWC\": `[ batch, height, width, channels ]`\n  \"NCHW\": `[ batch, channels, height, width ]`\n  \"NCHW_VECT_C\":\n      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`\n\nIt is useful to consider the operation as transforming a 6-D Tensor.\ne.g. for data_format = NHWC,\n     Each element in the input tensor can be specified via 6 coordinates,\n     ordered by decreasing memory layout significance as:\n     n,iY,iX,bY,bX,oC  (where n=batch index, iX, iY means X or Y coordinates\n                        within the input image, bX, bY means coordinates\n                        within the output block, oC means output channels).\n     The output would be the input transposed to the following layout:\n     n,iY,bY,iX,bX,oC\n\nThis operation is useful for resizing the activations between convolutions\n(but keeping all data), e.g. instead of pooling. It is also useful for training\npurely convolutional models.\n\nFor example, given an input of shape `[1, 1, 1, 4]`, data_format = \"NHWC\" and\nblock_size = 2:\n\n```\nx = [[[[1, 2, 3, 4]]]]\n\n```\n\nThis operation will output a tensor of shape `[1, 2, 2, 1]`:\n\n```\n   [[[[1], [2]],\n     [[3], [4]]]]\n```\n\nHere, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`,\nthe corresponding output will have 2x2 elements and will have a depth of\n1 channel (1 = `4 / (block_size * block_size)`).\nThe output element shape is `[2, 2, 1]`.\n\nFor an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g.\n\n```\nx = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]\n```\n\nThis operation, for block size of 2, will return the following tensor of shape\n`[1, 2, 2, 3]`\n\n```\n   [[[[1, 2, 3], [4, 5, 6]],\n     [[7, 8, 9], [10, 11, 12]]]]\n\n```\n\nSimilarly, for the following input of shape `[1 2 2 4]`, and a block size of 2:\n\n```\nx =  [[[[1, 2, 3, 4],\n       [5, 6, 7, 8]],\n      [[9, 10, 11, 12],\n       [13, 14, 15, 16]]]]\n```\n\nthe operator will return the following tensor of shape `[1 4 4 1]`:\n\n```\nx = [[[ [1],   [2],  [5],  [6]],\n      [ [3],   [4],  [7],  [8]],\n      [ [9],  [10], [13],  [14]],\n      [ [11], [12], [15],  [16]]]]\n\n```"
+  description: "Rearranges data from depth into blocks of spatial data.\nThis is the reverse transformation of SpaceToDepth. More specifically,\nthis op outputs a copy of the input tensor where values from the `depth`\ndimension are moved in spatial blocks to the `height` and `width` dimensions.\nThe attr `block_size` indicates the input block size and how the data is moved.\n\n  * Chunks of data of size `block_size * block_size` from depth are rearranged\n    into non-overlapping blocks of size `block_size x block_size`\n  * The width the output tensor is `input_depth * block_size`, whereas the\n    height is `input_height * block_size`.\n  * The Y, X coordinates within each block of the output image are determined\n    by the high order component of the input channel index.\n  * The depth of the input tensor must be divisible by\n    `block_size * block_size`.\n\nThe `data_format` attr specifies the layout of the input and output tensors\nwith the following options:\n  \"NHWC\": `[ batch, height, width, channels ]`\n  \"NCHW\": `[ batch, channels, height, width ]`\n  \"NCHW_VECT_C\":\n      `qint8 [ batch, channels / 4, height, width, 4 ]`\n\nIt is useful to consider the operation as transforming a 6-D Tensor.\ne.g. for data_format = NHWC,\n     Each element in the input tensor can be specified via 6 coordinates,\n     ordered by decreasing memory layout significance as:\n     n,iY,iX,bY,bX,oC  (where n=batch index, iX, iY means X or Y coordinates\n                        within the input image, bX, bY means coordinates\n                        within the output block, oC means output channels).\n     The output would be the input transposed to the following layout:\n     n,iY,bY,iX,bX,oC\n\nThis operation is useful for resizing the activations between convolutions\n(but keeping all data), e.g. instead of pooling. It is also useful for training\npurely convolutional models.\n\nFor example, given an input of shape `[1, 1, 1, 4]`, data_format = \"NHWC\" and\nblock_size = 2:\n\n```\nx = [[[[1, 2, 3, 4]]]]\n\n```\n\nThis operation will output a tensor of shape `[1, 2, 2, 1]`:\n\n```\n   [[[[1], [2]],\n     [[3], [4]]]]\n```\n\nHere, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`,\nthe corresponding output will have 2x2 elements and will have a depth of\n1 channel (1 = `4 / (block_size * block_size)`).\nThe output element shape is `[2, 2, 1]`.\n\nFor an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g.\n\n```\nx = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]\n```\n\nThis operation, for block size of 2, will return the following tensor of shape\n`[1, 2, 2, 3]`\n\n```\n   [[[[1, 2, 3], [4, 5, 6]],\n     [[7, 8, 9], [10, 11, 12]]]]\n\n```\n\nSimilarly, for the following input of shape `[1 2 2 4]`, and a block size of 2:\n\n```\nx =  [[[[1, 2, 3, 4],\n       [5, 6, 7, 8]],\n      [[9, 10, 11, 12],\n       [13, 14, 15, 16]]]]\n```\n\nthe operator will return the following tensor of shape `[1 4 4 1]`:\n\n```\nx = [[[ [1],   [2],  [5],  [6]],\n      [ [3],   [4],  [7],  [8]],\n      [ [9],  [10], [13],  [14]],\n      [ [11], [12], [15],  [16]]]]\n\n```"
 }
 op {
   name: "DepthwiseConv2dNative"
@@ -7066,6 +7294,8 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -7101,6 +7331,19 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+    description: "1-D tensor of length 4.  The dilation factor for each dimension of\n`input`. If set to k > 1, there will be k-1 skipped cells between each filter\nelement on that dimension. The dimension order is determined by the value of\n`data_format`, see above for details. Dilations in the batch and depth\ndimensions must be 1."
+  }
   summary: "Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors."
   description: "Given an input tensor of shape `[batch, in_height, in_width, in_channels]`\nand a filter / kernel tensor of shape\n`[filter_height, filter_width, in_channels, channel_multiplier]`, containing\n`in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies\na different filter to each input channel (expanding from 1 channel to\n`channel_multiplier` channels for each), then concatenates the results\ntogether. Thus, the output has `in_channels * channel_multiplier` channels.\n\n```\nfor k in 0..in_channels-1\n  for q in 0..channel_multiplier-1\n    output[b, i, j, k * channel_multiplier + q] =\n      sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *\n                        filter[di, dj, k, q]\n```\n\nMust have `strides[0] = strides[3] = 1`.  For the most common case of the same\nhorizontal and vertices strides, `strides = [1, stride, stride, 1]`."
 }
@@ -7131,6 +7374,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -7166,6 +7410,19 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+    description: "1-D tensor of length 4.  The dilation factor for each dimension of\n`input`. If set to k > 1, there will be k-1 skipped cells between each filter\nelement on that dimension. The dimension order is determined by the value of\n`data_format`, see above for details. Dilations in the batch and depth\ndimensions must be 1."
+  }
   summary: "Computes the gradients of depthwise convolution with respect to the filter."
 }
 op {
@@ -7195,6 +7452,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -7230,6 +7488,19 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+    description: "1-D tensor of length 4.  The dilation factor for each dimension of\n`input`. If set to k > 1, there will be k-1 skipped cells between each filter\nelement on that dimension. The dimension order is determined by the value of\n`data_format`, see above for details. Dilations in the batch and depth\ndimensions must be 1."
+  }
   summary: "Computes the gradients of depthwise convolution with respect to the input."
 }
 op {
@@ -7329,7 +7600,7 @@ op {
   input_arg {
     name: "serialized_sparse"
     description: "The serialized `SparseTensor` objects. The last dimension\nmust have 3 columns."
-    type: DT_STRING
+    type_attr: "Tserialized"
   }
   output_arg {
     name: "sparse_indices"
@@ -7348,7 +7619,21 @@ op {
     type: "type"
     description: "The `dtype` of the serialized `SparseTensor` objects."
   }
+  attr {
+    name: "Tserialized"
+    type: "type"
+    default_value {
+      type: DT_STRING
+    }
+    allowed_values {
+      list {
+        type: DT_STRING
+        type: DT_VARIANT
+      }
+    }
+  }
   summary: "Deserialize `SparseTensor` objects."
+  description: "The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where\nthe last dimension stores serialized `SparseTensor` objects and the other N\ndimensions (N >= 0) correspond to a batch. The ranks of the original\n`SparseTensor` objects must all match. When the final `SparseTensor` is\ncreated, its rank is the rank of the incoming `SparseTensor` objects plus N;\nthe sparse tensors have been concatenated along new dimensions, one for each\nbatch.\n\nThe output `SparseTensor` object\'s shape values for the original dimensions\nare the max across the input `SparseTensor` objects\' shape values for the\ncorresponding dimensions. The new dimensions match the size of the batch.\n\nThe input `SparseTensor` objects\' indices are assumed ordered in\nstandard lexicographic order.  If this is not the case, after this\nstep run `SparseReorder` to restore index ordering.\n\nFor example, if the serialized input is a `[2 x 3]` matrix representing two\noriginal `SparseTensor` objects:\n\n    index = [ 0]\n            [10]\n            [20]\n    values = [1, 2, 3]\n    shape = [50]\n\nand\n\n    index = [ 2]\n            [10]\n    values = [4, 5]\n    shape = [30]\n\nthen the final deserialized `SparseTensor` will be:\n\n    index = [0  0]\n            [0 10]\n            [0 20]\n            [1  2]\n            [1 10]\n    values = [1, 2, 3, 4, 5]\n    shape = [2 50]"
 }
 op {
   name: "DestroyResourceOp"
@@ -7409,6 +7694,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -7438,6 +7724,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -7466,6 +7753,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -7507,6 +7795,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -7576,6 +7865,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -7644,6 +7934,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -7694,6 +7985,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -7802,6 +8094,34 @@ op {
   summary: "Interleave the values from the `data` tensors into a single tensor."
   description: "Builds a merged tensor such that\n\n```python\n    merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]\n```\n\nFor example, if each `indices[m]` is scalar or vector, we have\n\n```python\n    # Scalar indices:\n    merged[indices[m], ...] = data[m][...]\n\n    # Vector indices:\n    merged[indices[m][i], ...] = data[m][i, ...]\n```\n\nEach `data[i].shape` must start with the corresponding `indices[i].shape`,\nand the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we\nmust have `data[i].shape = indices[i].shape + constant`.  In terms of this\n`constant`, the output shape is\n\n    merged.shape = [max(indices)] + constant\n\nValues are merged in order, so if an index appears in both `indices[m][i]` and\n`indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the\nmerged result. If you do not need this guarantee, ParallelDynamicStitch might\nperform better on some devices.\n\nFor example:\n\n```python\n    indices[0] = 6\n    indices[1] = [4, 1]\n    indices[2] = [[5, 2], [0, 3]]\n    data[0] = [61, 62]\n    data[1] = [[41, 42], [11, 12]]\n    data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]\n    merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],\n              [51, 52], [61, 62]]\n```\n\nThis method can be used to merge partitions created by `dynamic_partition`\nas illustrated on the following example:\n\n```python\n    # Apply function (increments x_i) on elements for which a certain condition\n    # apply (x_i != -1 in this example).\n    x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])\n    condition_mask=tf.not_equal(x,tf.constant(-1.))\n    partitioned_data = tf.dynamic_partition(\n        x, tf.cast(condition_mask, tf.int32) , 2)\n    partitioned_data[1] = partitioned_data[1] + 1.0\n    condition_indices = tf.dynamic_partition(\n        tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)\n    x = tf.dynamic_stitch(condition_indices, partitioned_data)\n    # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain\n    # unchanged.\n```\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/DynamicStitch.png\" alt>\n</div>"
 }
+op {
+  name: "EagerPyFunc"
+  input_arg {
+    name: "input"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "token"
+    type: "string"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  summary: "Eagerly executes a python function to compute func(input)->output. The"
+  description: "semantics of the input, output, and attributes are the same as those for\nPyFunc."
+  is_stateful: true
+}
 op {
   name: "EditDistance"
   input_arg {
@@ -7870,6 +8190,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -7901,6 +8222,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -8147,6 +8469,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -8184,6 +8507,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -8207,6 +8531,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -8249,6 +8574,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -8310,6 +8636,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -8418,6 +8745,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -9288,6 +9616,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -9315,6 +9644,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -9352,6 +9682,7 @@ op {
       list {
         type: DT_INT32
         type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -9869,6 +10200,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -9969,6 +10301,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -10395,6 +10728,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -10431,6 +10765,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -10502,6 +10837,24 @@ op {
   summary: "Creates a dataset that computes a windowed group-by on `input_dataset`."
   description: "// TODO(mrry): Support non-int64 keys."
 }
+op {
+  name: "GuaranteeConst"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  summary: "Gives a guarantee to the TF runtime that the input tensor is a constant."
+  description: "The runtime is then free to make optimizations based on this.\n\nOnly accepts value typed tensors as inputs and rejects resource variable handles\nas input.\n\nReturns the input tensor without modification."
+  is_stateful: true
+}
 op {
   name: "HSVToRGB"
   input_arg {
@@ -10707,6 +11060,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -11404,6 +11758,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -11440,6 +11795,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -11527,6 +11883,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -11551,6 +11908,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -11575,6 +11933,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -11740,6 +12099,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -11826,8 +12186,9 @@ op {
     }
     allowed_values {
       list {
-        type: DT_FLOAT
         type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
       }
     }
   }
@@ -11896,8 +12257,9 @@ op {
     }
     allowed_values {
       list {
-        type: DT_FLOAT
         type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
       }
     }
   }
@@ -12063,6 +12425,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -12099,6 +12462,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -12121,6 +12485,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -12155,6 +12520,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -12291,6 +12657,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -12317,6 +12684,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -12377,6 +12745,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -13268,6 +13637,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -13651,6 +14021,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -13690,6 +14061,8 @@ op {
     }
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -13698,7 +14071,6 @@ op {
         type: DT_INT16
         type: DT_INT8
         type: DT_UINT16
-        type: DT_HALF
         type: DT_QINT8
       }
     }
@@ -13801,6 +14173,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -13875,6 +14248,7 @@ op {
     }
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -13887,6 +14261,7 @@ op {
     }
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -14045,6 +14420,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -14127,6 +14503,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -14205,6 +14582,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -14283,6 +14661,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -14364,6 +14743,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -14442,6 +14822,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -14477,6 +14858,8 @@ op {
     }
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -14485,7 +14868,6 @@ op {
         type: DT_INT16
         type: DT_INT8
         type: DT_UINT16
-        type: DT_HALF
         type: DT_QINT8
       }
     }
@@ -14589,6 +14971,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -14615,6 +14998,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -14672,6 +15056,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -14865,6 +15250,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -14904,6 +15290,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -15032,6 +15419,7 @@ op {
       list {
         type: DT_INT32
         type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -15060,6 +15448,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -15092,7 +15481,7 @@ op {
   output_arg {
     name: "output"
     description: "2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`\ncontains the drawn class labels with range `[0, num_classes)`."
-    type: DT_INT64
+    type_attr: "output_dtype"
   }
   attr {
     name: "seed"
@@ -15126,6 +15515,20 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "output_dtype"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -15493,6 +15896,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -15655,6 +16059,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -15717,6 +16122,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -15838,12 +16244,18 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
         type: DT_INT32
         type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_BOOL
       }
     }
   }
@@ -16665,6 +17077,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -17144,6 +17557,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -17415,6 +17829,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -17583,6 +17998,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -17644,6 +18060,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -17693,6 +18110,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -18362,6 +18780,19 @@ op {
       }
     }
   }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+    description: "1-D tensor of length 4.  The dilation factor for each dimension of\n`input`. If set to k > 1, there will be k-1 skipped cells between each\nfilter element on that dimension. The dimension order is determined by the\nvalue of `data_format`, see above for details. Dilations in the batch and\ndepth dimensions must be 1."
+  }
   summary: "Computes a 2D convolution given quantized 4D input and filter tensors."
   description: "The inputs are quantized tensors where the lowest value represents the real\nnumber of the associated minimum, and the highest represents the maximum.\nThis means that you can only interpret the quantized output in the same way, by\ntaking the returned minimum and maximum values into account."
 }
@@ -19600,6 +20031,37 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "RandomDataset"
+  input_arg {
+    name: "seed"
+    description: "A scalar seed for the random number generator. If either seed or\nseed2 is set to be non-zero, the random number generator is seeded\nby the given seed.  Otherwise, a random seed is used."
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    description: "A second scalar seed to avoid seed collision."
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  summary: "Creates a Dataset that returns pseudorandom numbers."
+  is_stateful: true
+}
 op {
   name: "RandomGamma"
   input_arg {
@@ -20015,6 +20477,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -20069,6 +20532,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -20180,6 +20644,7 @@ op {
     }
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -20590,6 +21055,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -20622,6 +21088,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -20654,6 +21121,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -20981,6 +21449,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -21012,6 +21481,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -21050,6 +21520,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -21088,6 +21559,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -21692,6 +22164,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -21750,6 +22223,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -21828,6 +22302,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -21915,6 +22390,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -21996,6 +22472,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22078,6 +22555,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22156,6 +22634,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22238,6 +22717,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22291,6 +22771,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22353,6 +22834,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22434,6 +22916,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22502,6 +22985,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22565,6 +23049,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22642,6 +23127,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22764,6 +23250,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22781,6 +23268,49 @@ op {
   description: "This operation computes\n\n    # Scalar indices\n    ref[indices, ...] += updates[...]\n\n    # Vector indices (for each i)\n    ref[indices[i], ...] += updates[i, ...]\n\n    # High rank indices (for each i, ..., j)\n    ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]\n\nDuplicate entries are handled correctly: if multiple `indices` reference\nthe same location, their contributions add.\n\nRequires `updates.shape = indices.shape + ref.shape[1:]`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\'https://www.tensorflow.org/images/ScatterAdd.png\' alt>\n</div>"
   is_stateful: true
 }
+op {
+  name: "ResourceScatterNdUpdate"
+  input_arg {
+    name: "ref"
+    description: "A resource handle. Must be from a VarHandleOp."
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "indices"
+    description: "A Tensor. Must be one of the following types: int32, int64.\nA tensor of indices into ref."
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "updates"
+    description: "A Tensor. Must have the same type as ref. A tensor of updated\nvalues to add to ref."
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: true
+    }
+    description: "An optional bool. Defaults to True. If True, the assignment will\nbe protected by a lock; otherwise the behavior is undefined,\nbut may exhibit less contention."
+  }
+  summary: "Applies sparse `updates` to individual values or slices within a given"
+  description: "variable according to `indices`.\n\n`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.\n\n`indices` must be integer tensor, containing indices into `ref`.\nIt must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.\n\nThe innermost dimension of `indices` (with length `K`) corresponds to\nindices into elements (if `K = P`) or slices (if `K < P`) along the `K`th\ndimension of `ref`.\n\n`updates` is `Tensor` of rank `Q-1+P-K` with shape:\n\n```\n[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].\n```\n\nFor example, say we want to update 4 scattered elements to a rank-1 tensor to\n8 elements. In Python, that update would look like this:\n\n```python\n    ref = tfe.Variable([1, 2, 3, 4, 5, 6, 7, 8])\n    indices = tf.constant([[4], [3], [1] ,[7]])\n    updates = tf.constant([9, 10, 11, 12])\n    update = tf.scatter_nd_update(ref, indices, updates)\n    with tf.Session() as sess:\n      print sess.run(update)\n```\n\nThe resulting update to ref would look like this:\n\n    [1, 11, 3, 10, 9, 6, 7, 12]\n\nSee @{tf.scatter_nd} for more details about how to make updates to\nslices."
+  is_stateful: true
+}
 op {
   name: "ResourceScatterUpdate"
   input_arg {
@@ -22819,6 +23349,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22898,6 +23429,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -22970,6 +23502,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -23063,6 +23596,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -23159,6 +23693,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -23252,6 +23787,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -23349,6 +23885,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -23427,6 +23964,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -23518,6 +24056,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -23596,6 +24135,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -23688,6 +24228,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -24028,6 +24569,7 @@ op {
         type: DT_INT64
         type: DT_BOOL
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -24088,6 +24630,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -24112,6 +24655,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -24140,6 +24684,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -24170,6 +24715,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -24513,6 +25059,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -24611,6 +25158,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -24680,6 +25228,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -24749,6 +25298,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -24857,6 +25407,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -24924,6 +25475,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -24985,6 +25537,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -25103,6 +25656,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -25391,6 +25945,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -25439,6 +25994,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -25487,6 +26043,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -25540,6 +26097,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -25593,6 +26151,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -25722,6 +26281,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -25753,6 +26313,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -25794,13 +26355,27 @@ op {
   }
   output_arg {
     name: "serialized_sparse"
-    type: DT_STRING
+    type_attr: "out_type"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` string `Tensor`."
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_STRING
+    }
+    description: "The `dtype` to use for serialization; the supported types are `string`\n(default) and `variant`."
+    allowed_values {
+      list {
+        type: DT_STRING
+        type: DT_VARIANT
+      }
+    }
+  }
+  summary: "Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object."
   description: "The `SparseTensor` must have rank `R` greater than 1, and the first dimension\nis treated as the minibatch dimension.  Elements of the `SparseTensor`\nmust be sorted in increasing order of this first dimension.  The serialized\n`SparseTensor` objects going into each row of `serialized_sparse` will have\nrank `R-1`.\n\nThe minibatch size `N` is extracted from `sparse_shape[0]`."
 }
 op {
@@ -25822,13 +26397,27 @@ op {
   }
   output_arg {
     name: "serialized_sparse"
-    type: DT_STRING
+    type_attr: "out_type"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object."
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_STRING
+    }
+    description: "The `dtype` to use for serialization; the supported types are `string`\n(default) and `variant`."
+    allowed_values {
+      list {
+        type: DT_STRING
+        type: DT_VARIANT
+      }
+    }
+  }
+  summary: "Serialize a `SparseTensor` into a `[3]` `Tensor` object."
 }
 op {
   name: "SerializeTensor"
@@ -26064,6 +26653,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -26094,6 +26684,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -26120,6 +26711,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -26148,6 +26740,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -26173,6 +26766,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -26356,6 +26950,22 @@ op {
   summary: "Return a slice from \'input\'."
   description: "The output tensor is a tensor with dimensions described by \'size\'\nwhose values are extracted from \'input\' starting at the offsets in\n\'begin\'.\n\n*Requirements*:\n  0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)"
 }
+op {
+  name: "Snapshot"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  summary: "Returns a copy of the input tensor."
+}
 op {
   name: "Softmax"
   input_arg {
@@ -26374,6 +26984,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -26410,6 +27021,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -26444,6 +27056,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -26482,6 +27095,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -26513,6 +27127,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -26551,6 +27166,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -26688,7 +27304,7 @@ op {
     }
   }
   summary: "SpaceToDepth for tensors of type T."
-  description: "Rearranges blocks of spatial data, into depth. More specifically,\nthis op outputs a copy of the input tensor where values from the `height`\nand `width` dimensions are moved to the `depth` dimension.\nThe attr `block_size` indicates the input block size.\n\n  * Non-overlapping blocks of size `block_size x block size` are rearranged\n    into depth at each location.\n  * The depth of the output tensor is `block_size * block_size * input_depth`.\n  * The Y, X coordinates within each block of the input become the high order\n    component of the output channel index.\n  * The input tensor\'s height and width must be divisible by block_size.\n\nThe `data_format` attr specifies the layout of the input and output tensors\nwith the following options:\n  \"NHWC\": `[ batch, height, width, channels ]`\n  \"NCHW\": `[ batch, channels, height, width ]`\n  \"NCHW_VECT_C\":\n      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`\n\nIt is useful to consider the operation as transforming a 6-D Tensor.\ne.g. for data_format = NHWC,\n     Each element in the input tensor can be specified via 6 coordinates,\n     ordered by decreasing memory layout significance as:\n     n,oY,bY,oX,bX,iC  (where n=batch index, oX, oY means X or Y coordinates\n                        within the output image, bX, bY means coordinates\n                        within the input block, iC means input channels).\n     The output would be a transpose to the following layout:\n     n,oY,oX,bY,bX,iC\n\nThis operation is useful for resizing the activations between convolutions\n(but keeping all data), e.g. instead of pooling. It is also useful for training\npurely convolutional models.\n\nFor example, given an input of shape `[1, 2, 2, 1]`, data_format = \"NHWC\" and\nblock_size = 2:\n\n```\nx = [[[[1], [2]],\n      [[3], [4]]]]\n```\n\nThis operation will output a tensor of shape `[1, 1, 1, 4]`:\n\n```\n[[[[1, 2, 3, 4]]]]\n```\n\nHere, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`,\nthe corresponding output will have a single element (i.e. width and height are\nboth 1) and will have a depth of 4 channels (1 * block_size * block_size).\nThe output element shape is `[1, 1, 4]`.\n\nFor an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g.\n\n```\nx = [[[[1, 2, 3], [4, 5, 6]],\n      [[7, 8, 9], [10, 11, 12]]]]\n```\n\nThis operation, for block_size of 2, will return the following tensor of shape\n`[1, 1, 1, 12]`\n\n```\n[[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]\n```\n\nSimilarly, for the following input of shape `[1 4 4 1]`, and a block size of 2:\n\n```\nx = [[[[1],   [2],  [5],  [6]],\n      [[3],   [4],  [7],  [8]],\n      [[9],  [10], [13],  [14]],\n      [[11], [12], [15],  [16]]]]\n```\n\nthe operator will return the following tensor of shape `[1 2 2 4]`:\n\n```\nx = [[[[1, 2, 3, 4],\n       [5, 6, 7, 8]],\n      [[9, 10, 11, 12],\n       [13, 14, 15, 16]]]]\n```"
+  description: "Rearranges blocks of spatial data, into depth. More specifically,\nthis op outputs a copy of the input tensor where values from the `height`\nand `width` dimensions are moved to the `depth` dimension.\nThe attr `block_size` indicates the input block size.\n\n  * Non-overlapping blocks of size `block_size x block size` are rearranged\n    into depth at each location.\n  * The depth of the output tensor is `block_size * block_size * input_depth`.\n  * The Y, X coordinates within each block of the input become the high order\n    component of the output channel index.\n  * The input tensor\'s height and width must be divisible by block_size.\n\nThe `data_format` attr specifies the layout of the input and output tensors\nwith the following options:\n  \"NHWC\": `[ batch, height, width, channels ]`\n  \"NCHW\": `[ batch, channels, height, width ]`\n  \"NCHW_VECT_C\":\n      `qint8 [ batch, channels / 4, height, width, 4 ]`\n\nIt is useful to consider the operation as transforming a 6-D Tensor.\ne.g. for data_format = NHWC,\n     Each element in the input tensor can be specified via 6 coordinates,\n     ordered by decreasing memory layout significance as:\n     n,oY,bY,oX,bX,iC  (where n=batch index, oX, oY means X or Y coordinates\n                        within the output image, bX, bY means coordinates\n                        within the input block, iC means input channels).\n     The output would be a transpose to the following layout:\n     n,oY,oX,bY,bX,iC\n\nThis operation is useful for resizing the activations between convolutions\n(but keeping all data), e.g. instead of pooling. It is also useful for training\npurely convolutional models.\n\nFor example, given an input of shape `[1, 2, 2, 1]`, data_format = \"NHWC\" and\nblock_size = 2:\n\n```\nx = [[[[1], [2]],\n      [[3], [4]]]]\n```\n\nThis operation will output a tensor of shape `[1, 1, 1, 4]`:\n\n```\n[[[[1, 2, 3, 4]]]]\n```\n\nHere, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`,\nthe corresponding output will have a single element (i.e. width and height are\nboth 1) and will have a depth of 4 channels (1 * block_size * block_size).\nThe output element shape is `[1, 1, 4]`.\n\nFor an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g.\n\n```\nx = [[[[1, 2, 3], [4, 5, 6]],\n      [[7, 8, 9], [10, 11, 12]]]]\n```\n\nThis operation, for block_size of 2, will return the following tensor of shape\n`[1, 1, 1, 12]`\n\n```\n[[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]\n```\n\nSimilarly, for the following input of shape `[1 4 4 1]`, and a block size of 2:\n\n```\nx = [[[[1],   [2],  [5],  [6]],\n      [[3],   [4],  [7],  [8]],\n      [[9],  [10], [13],  [14]],\n      [[11], [12], [15],  [16]]]]\n```\n\nthe operator will return the following tensor of shape `[1 2 2 4]`:\n\n```\nx = [[[[1, 2, 3, 4],\n       [5, 6, 7, 8]],\n      [[9, 10, 11, 12],\n       [13, 14, 15, 16]]]]\n```"
 }
 op {
   name: "SparseAccumulatorApplyGradient"
@@ -26740,6 +27356,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -26801,6 +27418,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -26877,6 +27495,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -26896,6 +27515,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -26955,6 +27575,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27032,6 +27653,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27111,6 +27733,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27212,6 +27835,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27317,6 +27941,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27418,6 +28043,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27523,6 +28149,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27608,6 +28235,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27706,6 +28334,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27790,6 +28419,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27890,6 +28520,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -27997,6 +28628,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28178,6 +28810,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28232,6 +28865,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28286,6 +28920,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28491,6 +29126,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28555,6 +29191,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28617,6 +29254,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28686,6 +29324,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28856,7 +29495,7 @@ op {
   description: "Returns tensor \"output\" with same shape as grad, except for dimension 0 whose\nvalue is output_dim0."
 }
 op {
-  name: "SparseSegmentSqrtN"
+  name: "SparseSegmentMeanWithNumSegments"
   input_arg {
     name: "data"
     type_attr: "T"
@@ -28871,9 +29510,14 @@ op {
     description: "A 1-D tensor. Values should be sorted and can be repeated."
     type: DT_INT32
   }
+  input_arg {
+    name: "num_segments"
+    description: "Should equal the number of distinct segment IDs."
+    type_attr: "Tnumsegments"
+  }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
+    description: "Has same shape as data, except for dimension 0 which has size\n`num_segments`."
     type_attr: "T"
   }
   attr {
@@ -28899,29 +29543,89 @@ op {
       }
     }
   }
-  summary: "Computes the sum along sparse segments of a tensor divided by the sqrt of N."
-  description: "N is the size of the segment being reduced.\n\nRead @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments."
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  summary: "Computes the mean along sparse segments of a tensor."
+  description: "Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is\nmisisng, the `output` tensor at that position will be zeroed.\n\nRead @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments."
 }
 op {
-  name: "SparseSegmentSqrtNGrad"
+  name: "SparseSegmentSqrtN"
   input_arg {
-    name: "grad"
-    description: "gradient propagated to the SparseSegmentSqrtN op."
+    name: "data"
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "indices passed to the corresponding SparseSegmentSqrtN op."
+    description: "A 1-D tensor. Has same rank as `segment_ids`."
     type_attr: "Tidx"
   }
   input_arg {
     name: "segment_ids"
-    description: "segment_ids passed to the corresponding SparseSegmentSqrtN op."
-    type: DT_INT32
-  }
-  input_arg {
-    name: "output_dim0"
-    description: "dimension 0 of \"data\" passed to SparseSegmentSqrtN op."
+    description: "A 1-D tensor. Values should be sorted and can be repeated."
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  summary: "Computes the sum along sparse segments of a tensor divided by the sqrt of N."
+  description: "N is the size of the segment being reduced.\n\nRead @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments."
+}
+op {
+  name: "SparseSegmentSqrtNGrad"
+  input_arg {
+    name: "grad"
+    description: "gradient propagated to the SparseSegmentSqrtN op."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    description: "indices passed to the corresponding SparseSegmentSqrtN op."
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    description: "segment_ids passed to the corresponding SparseSegmentSqrtN op."
+    type: DT_INT32
+  }
+  input_arg {
+    name: "output_dim0"
+    description: "dimension 0 of \"data\" passed to SparseSegmentSqrtN op."
     type: DT_INT32
   }
   output_arg {
@@ -28954,6 +29658,71 @@ op {
   summary: "Computes gradients for SparseSegmentSqrtN."
   description: "Returns tensor \"output\" with same shape as grad, except for dimension 0 whose\nvalue is output_dim0."
 }
+op {
+  name: "SparseSegmentSqrtNWithNumSegments"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    description: "A 1-D tensor. Has same rank as `segment_ids`."
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    description: "A 1-D tensor. Values should be sorted and can be repeated."
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_segments"
+    description: "Should equal the number of distinct segment IDs."
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  summary: "Computes the sum along sparse segments of a tensor divided by the sqrt of N."
+  description: "N is the size of the segment being reduced.\n\nLike `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is\nmisisng, the `output` tensor at that position will be zeroed.\n\nRead @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments."
+}
 op {
   name: "SparseSegmentSum"
   input_arg {
@@ -28991,6 +29760,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -29010,6 +29780,81 @@ op {
   summary: "Computes the sum along sparse segments of a tensor."
   description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nLike `SegmentSum`, but `segment_ids` can have rank less than `data`\'s first\ndimension, selecting a subset of dimension 0, specified by `indices`.\n\nFor example:\n\n```python\nc = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])\n\n# Select two rows, one segment.\ntf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))\n# => [[0 0 0 0]]\n\n# Select two rows, two segment.\ntf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))\n# => [[ 1  2  3  4]\n#     [-1 -2 -3 -4]]\n\n# Select all rows, two segments.\ntf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))\n# => [[0 0 0 0]\n#     [5 6 7 8]]\n\n# Which is equivalent to:\ntf.segment_sum(c, tf.constant([0, 0, 1]))\n```"
 }
+op {
+  name: "SparseSegmentSumWithNumSegments"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    description: "A 1-D tensor. Has same rank as `segment_ids`."
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    description: "A 1-D tensor. Values should be sorted and can be repeated."
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_segments"
+    description: "Should equal the number of distinct segment IDs."
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    description: "Has same shape as data, except for dimension 0 which\nhas size `num_segments`."
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  summary: "Computes the sum along sparse segments of a tensor."
+  description: "Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is\nmisisng, the `output` tensor at that position will be zeroed.\n\nRead @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nFor example:\n\n```python\nc = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])\n\ntf.sparse_segment_sum_with_num_segments(\n    c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)\n# => [[0 0 0 0]\n#     [0 0 0 0]\n#     [0 0 0 0]]\n\ntf.sparse_segment_sum_with_num_segments(c,\n                                        tf.constant([0, 1]),\n                                        tf.constant([0, 2],\n                                        num_segments=4))\n# => [[ 1  2  3  4]\n#     [ 0  0  0  0]\n#     [-1 -2 -3 -4]\n#     [ 0  0  0  0]]\n```"
+}
 op {
   name: "SparseSlice"
   input_arg {
@@ -29121,6 +29966,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -29200,6 +30046,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -29269,6 +30116,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -29375,6 +30223,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -29737,6 +30586,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -29767,6 +30617,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -29793,6 +30644,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -29825,6 +30677,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -30225,7 +31078,7 @@ op {
   input_arg {
     name: "seed"
     description: "2 seeds (shape [2])."
-    type: DT_INT64
+    type_attr: "Tseed"
   }
   output_arg {
     name: "output"
@@ -30260,6 +31113,19 @@ op {
       }
     }
   }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   summary: "Outputs deterministic pseudorandom values from a normal distribution."
   description: "The generated values will have mean 0 and standard deviation 1.\n\nThe outputs are a deterministic function of `shape` and `seed`."
 }
@@ -30273,7 +31139,7 @@ op {
   input_arg {
     name: "seed"
     description: "2 seeds (shape [2])."
-    type: DT_INT64
+    type_attr: "Tseed"
   }
   output_arg {
     name: "output"
@@ -30308,6 +31174,19 @@ op {
       }
     }
   }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   summary: "Outputs deterministic pseudorandom random values from a uniform distribution."
   description: "The generated values follow a uniform distribution in the range `[0, 1)`. The\nlower bound 0 is included in the range, while the upper bound 1 is excluded.\n\nThe outputs are a deterministic function of `shape` and `seed`."
 }
@@ -30321,7 +31200,7 @@ op {
   input_arg {
     name: "seed"
     description: "2 seeds (shape [2])."
-    type: DT_INT64
+    type_attr: "Tseed"
   }
   output_arg {
     name: "output"
@@ -30356,6 +31235,19 @@ op {
       }
     }
   }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   summary: "Outputs deterministic pseudorandom values from a truncated normal distribution."
   description: "The generated values follow a normal distribution with mean 0 and standard\ndeviation 1, except that values whose magnitude is more than 2 standard\ndeviations from the mean are dropped and re-picked.\n\nThe outputs are a deterministic function of `shape` and `seed`."
 }
@@ -30839,6 +31731,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -30936,6 +31829,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -31253,6 +32147,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -31280,6 +32175,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -31309,6 +32205,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -32714,6 +33611,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -32770,6 +33668,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -32830,6 +33729,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -32867,6 +33767,7 @@ op {
       list {
         type: DT_INT32
         type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -32910,6 +33811,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -33034,6 +33936,48 @@ op {
   summary: "Finds unique elements in a 1-D tensor."
   description: "This operation returns a tensor `y` containing all of the unique elements of `x`\nsorted in the same order that they occur in `x`. This operation also returns a\ntensor `idx` the same size as `x` that contains the index of each value of `x`\nin the unique output `y`. In other words:\n\n`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`\n\nFor example:\n\n```\n# tensor \'x\' is [1, 1, 2, 4, 4, 4, 7, 8, 8]\ny, idx = unique(x)\ny ==> [1, 2, 4, 7, 8]\nidx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]\n```"
 }
+op {
+  name: "UniqueV2"
+  input_arg {
+    name: "x"
+    description: "A `Tensor`."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "axis"
+    description: "A `Tensor` of type `int64` (default: 0). The axis of the Tensor to\nfind the unique elements."
+    type: DT_INT64
+  }
+  output_arg {
+    name: "y"
+    description: "A `Tensor`. Unique elements along the `axis` of `Tensor` x."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "idx"
+    description: "A 1-D Tensor. Has the same type as x that contains the index of each\nvalue of x in the output y."
+    type_attr: "out_idx"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "out_idx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  summary: "Finds unique elements in a 1-D tensor."
+  description: "This operation returns a tensor `y` containing all of the unique elements of `x`\nsorted in the same order that they occur in `x`. This operation also returns a\ntensor `idx` the same size as `x` that contains the index of each value of `x`\nin the unique output `y`. In other words:\n\n`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`\n\nFor example:\n\n```\n# tensor \'x\' is [1, 1, 2, 4, 4, 4, 7, 8, 8]\ny, idx = unique(x)\ny ==> [1, 2, 4, 7, 8]\nidx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]\n```"
+}
 op {
   name: "UniqueWithCounts"
   input_arg {
@@ -33122,7 +34066,7 @@ op {
   }
   input_arg {
     name: "num_segments"
-    type: DT_INT32
+    type_attr: "Tnumsegments"
   }
   output_arg {
     name: "output"
@@ -33145,6 +34089,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -33158,6 +34103,19 @@ op {
       }
     }
   }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   summary: "Computes the Max along segments of a tensor."
   description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nThis operator is similar to the [unsorted segment sum operator](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).\nInstead of computing the sum over segments, it computes the maximum\nsuch that:\n\n\\\\(output_i = \\max_j data_j\\\\) where max is over `j` such\nthat `segment_ids[j] == i`.\n\nIf the maximum is empty for a given segment ID `i`, it outputs the smallest possible value for specific numeric type,\n `output[i] = numeric_limits<T>::min()`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/UnsortedSegmentMax.png\" alt>\n</div>"
 }
@@ -33174,7 +34132,7 @@ op {
   }
   input_arg {
     name: "num_segments"
-    type: DT_INT32
+    type_attr: "Tnumsegments"
   }
   output_arg {
     name: "output"
@@ -33202,6 +34160,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -33215,8 +34174,21 @@ op {
       }
     }
   }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   summary: "Computes the sum along segments of a tensor."
-  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nComputes a tensor such that\n`(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such\nthat `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`\nneed not be sorted and need not cover all values in the full\nrange of valid values.\n\nIf the sum is empty for a given segment ID `i`, `output[i] = 0`.\n\n`num_segments` should equal the number of distinct segment IDs.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/UnsortedSegmentSum.png\" alt>\n</div>"
+  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nComputes a tensor such that\n`(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such\nthat `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`\nneed not be sorted and need not cover all values in the full\nrange of valid values.\n\nIf the sum is empty for a given segment ID `i`, `output[i] = 0`.\nIf the given segment ID `i` is negative, the value is dropped and will not be\nadded to the sum of the segment.\n\n`num_segments` should equal the number of distinct segment IDs.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/UnsortedSegmentSum.png\" alt>\n</div>"
 }
 op {
   name: "Unstage"
@@ -33445,6 +34417,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
         type: DT_BOOL
       }
     }
-- 
GitLab


From 228b3ebc91ff351befcef641d01d1a82d532a6d1 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 11 Dec 2017 21:38:05 -0800
Subject: [PATCH 0904/1225] [XLA] Don't call timer->Nanoseconds() on a not-ok
 stream.

If the stream is not OK, the timer might not have been initialized and
finalized, in which case calling timer->Nanoseconds() is illegal and
will crash.

PiperOrigin-RevId: 178717089
---
 tensorflow/compiler/xla/service/executable.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
index 08862308c9..145af45fb0 100644
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -230,7 +230,11 @@ StatusOr<ReturnT> Executable::ExecuteOnStreamWrapper(
     profile->MergeFrom(execution_profile());
 
     // Overall execution time (in nanoseconds) from the executor timer.
-    profile->set_compute_and_transfer_time_ns(timer->Nanoseconds());
+    if (stream->ok()) {
+      // Don't read timer->Nanoseconds() if the stream isn't OK -- that's
+      // illegal.
+      profile->set_compute_and_transfer_time_ns(timer->Nanoseconds());
+    }
 
     // TODO(b/28123297): On GPU we end up including transfer time in
     // the compute time this way. Instead, we should get the correct
-- 
GitLab


From 913175c2bd38f6e97de399b29cfe1195bffbaa25 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 11 Dec 2017 22:26:00 -0800
Subject: [PATCH 0905/1225] [XLA] Add stringification to BatchNormTestParam.

This way when a test fails, it prints out useful information about the
failure, instead of

  "<48-byte object with these bytes: de ad be ef ...>"

PiperOrigin-RevId: 178719733
---
 .../compiler/xla/tests/batch_normalization_test.cc     | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tensorflow/compiler/xla/tests/batch_normalization_test.cc b/tensorflow/compiler/xla/tests/batch_normalization_test.cc
index 028d1251b4..7525bc4bdf 100644
--- a/tensorflow/compiler/xla/tests/batch_normalization_test.cc
+++ b/tensorflow/compiler/xla/tests/batch_normalization_test.cc
@@ -39,6 +39,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/test_utils.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -203,6 +204,15 @@ struct BatchNormTestParam {
   int64 feature_index;
   float random_value_mean;
   float random_value_var;
+
+  friend ::std::ostream& operator<<(::std::ostream& os,
+                                    const BatchNormTestParam& p) {
+    os << "bounds={" << tensorflow::str_util::Join(p.bounds, ", ") << "}, ";
+    os << "feature_index=" << p.feature_index << ", ";
+    os << "random_value_mean=" << p.random_value_mean << ", ";
+    os << "random_value_var=" << p.random_value_var;
+    return os;
+  }
 };
 
 // Tests to test the fused operation of BatchNorm.
-- 
GitLab


From f379d3369505457928ccda5adf72583f95709026 Mon Sep 17 00:00:00 2001
From: Bjarke Hammersholt Roune <broune@google.com>
Date: Mon, 11 Dec 2017 22:31:27 -0800
Subject: [PATCH 0906/1225]  * HloTestBase now prints out the HLO parser error
 message when there is one.  * TestUtils now supports generating random
 literals with more than one constraint.      There is still an error if the
 constraints conflict.

PiperOrigin-RevId: 178720092
---
 .../compiler/xla/tests/hlo_test_base.cc       |  8 ++-
 tensorflow/compiler/xla/tests/test_utils.cc   | 56 ++++++++++++-------
 2 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc
index e7a18828db..2b38f9c719 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc
@@ -228,7 +228,9 @@ template <typename LiteralPtr>
   auto module_or_status =
       HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest());
   if (!module_or_status.ok()) {
-    return ::testing::AssertionFailure() << "failed parsing hlo textual IR";
+    return ::testing::AssertionFailure()
+           << "Error while parsing HLO text format: "
+           << module_or_status.status().ToString();
   }
   return RunAndCompare(module_or_status.ConsumeValueOrDie(), error,
                        reference_preprocessor);
@@ -254,7 +256,9 @@ template <typename LiteralPtr>
   auto module_or_status =
       HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest());
   if (!module_or_status.ok()) {
-    return ::testing::AssertionFailure() << "failed parsing hlo textual IR";
+    return ::testing::AssertionFailure()
+           << "Error while parsing HLO text format: "
+           << module_or_status.status().ToString();
   }
   return RunAndCompareNoHloPasses(module_or_status.ConsumeValueOrDie(), error,
                                   reference_preprocessor);
diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc
index 780b292d1a..56859542a9 100644
--- a/tensorflow/compiler/xla/tests/test_utils.cc
+++ b/tensorflow/compiler/xla/tests/test_utils.cc
@@ -138,27 +138,45 @@ std::vector<HloInstruction*> FindConstrainedUses(
 StatusOr<std::unique_ptr<Literal>> CreateLiteralForConstrainedUses(
     const tensorflow::gtl::ArraySlice<HloInstruction*> constrained_uses,
     const HloInstruction& param) {
-  const auto count = constrained_uses.size();
-  if (count > 1) {
-    return Unimplemented("multiple constrained uses not yet supported");
-  }
+  HloInstruction* needs_index = nullptr;
+  HloInstruction* needs_zero = nullptr;
+  for (HloInstruction* use : constrained_uses) {
+    switch (use->opcode()) {
+      case HloOpcode::kDynamicSlice:
+      case HloOpcode::kDynamicUpdateSlice:
+        TF_RET_CHECK(ShapeUtil::Equal(param.shape(), use->operand(0)->shape()));
+        if (needs_index != nullptr &&
+            !ShapeUtil::Equal(needs_index->shape(), use->shape())) {
+          return Unimplemented(
+              "Conflicting operand generation slice index constraints\n");
+        }
+        needs_index = use;
+        break;
 
-  if (count == 0) {
-    return MakeFakeLiteral(param.shape());
-  }
+      case HloOpcode::kReduce:
+      case HloOpcode::kReduceWindow:
+      case HloOpcode::kSelectAndScatter:
+        needs_zero = use;
+        break;
 
-  const HloInstruction* const use = constrained_uses[0];
-  switch (use->opcode()) {
-    case HloOpcode::kDynamicSlice:
-    case HloOpcode::kDynamicUpdateSlice:
-      return MakeRandomNonwrappingSliceIndex(use->operand(0)->shape(),
-                                             use->shape());
-    case HloOpcode::kReduce:
-    case HloOpcode::kReduceWindow:
-    case HloOpcode::kSelectAndScatter:
-      return Literal::CreateFromShape(param.shape());
-    default:
-      return Unimplemented("constrained use given; no equivalent literal");
+      default:
+        return Unimplemented(
+            "Constrained operand generation not implemented for %s.",
+            use->ToString().c_str());
+    }
+  }
+  if (needs_index != nullptr && needs_zero != nullptr) {
+    return Unimplemented(
+        "Conflicting operand generation constraints.\nNeeds index: %s\nNeeds "
+        "zero: %s\n",
+        needs_index->ToString().c_str(), needs_zero->ToString().c_str());
+  }
+  if (needs_index != nullptr) {
+    return MakeRandomNonwrappingSliceIndex(param.shape(), needs_index->shape());
+  } else if (needs_zero != nullptr) {
+    return Literal::CreateFromShape(param.shape());
+  } else {
+    return MakeFakeLiteral(param.shape());
   }
 }
 
-- 
GitLab


From fcca16c5e3d8168ea95616956fdcf777798b164b Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 11 Dec 2017 23:26:35 -0800
Subject: [PATCH 0907/1225] Disable flaky random ops test.

PiperOrigin-RevId: 178723108
---
 tensorflow/compiler/tests/BUILD | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 8ace678daa..a251dd1a3c 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -381,6 +381,11 @@ tf_xla_py_test(
     srcs = ["random_ops_test.py"],
     # TODO(b/31361304): enable RNG ops on GPU when parallelized.
     disabled_backends = ["gpu"],
+    tags = [
+        "manual",
+        "no_oss",
+        "notap",
+    ],
     deps = [
         ":xla_test",
         "//tensorflow/python:framework_for_generated_wrappers",
-- 
GitLab


From 383a3226a9ad08ac507a3fbd6c220c5c1e15a540 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 23:35:29 -0800
Subject: [PATCH 0908/1225] Use BlockHostUntilDoneWithStatus in various places.

PiperOrigin-RevId: 178723711
---
 tensorflow/compiler/jit/xla_device_context.cc  | 16 ++++++++++------
 tensorflow/compiler/xla/service/executable.cc  |  2 +-
 tensorflow/compiler/xla/service/executable.h   |  3 ++-
 .../compiler/xla/service/execution_tracker.cc  |  4 +---
 .../compiler/xla/service/gpu/gpu_executable.cc | 16 ++++++++++------
 .../xla/service/gpu/gpu_transfer_manager.cc    |  7 ++++---
 .../compiler/xla/service/gpu/infeed_thunk.cc   | 14 +++++++-------
 .../compiler/xla/service/gpu/infeed_thunk.h    |  5 ++---
 .../compiler/xla/service/gpu/while_thunk.cc    | 18 +++++++++---------
 .../compiler/xla/service/gpu/while_thunk.h     |  7 +++----
 tensorflow/compiler/xla/service/hlo_runner.cc  |  2 +-
 .../xla/service/interpreter/executor.cc        |  2 +-
 tensorflow/compiler/xla/service/service.cc     |  6 ++++--
 .../contrib/nccl/kernels/nccl_manager_test.cc  |  4 ++--
 tensorflow/core/common_runtime/gpu/gpu_util.cc |  6 +-----
 tensorflow/stream_executor/cuda/cuda_dnn.cc    |  7 ++++++-
 .../stream_executor/host/host_gpu_executor.cc  |  2 +-
 17 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index fed2c92d76..be5a288b37 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -71,12 +71,14 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
     void* dst_ptr = DMAHelper::base(device_tensor);
     se::DeviceMemoryBase dev_dst_ptr(dst_ptr, total_bytes);
 
-    Status status = Status::OK();
+    Status status;
     stream_->ThenMemcpy(&dev_dst_ptr, src_ptr, total_bytes);
     // TODO(hpucha): Make this asynchronous.
-    if (!stream_->BlockHostUntilDone()) {
+    Status block_status = stream_->BlockHostUntilDoneWithStatus();
+    if (!block_status.ok()) {
       status = xla::InternalError(
-          "Failed to complete data transfer on stream %p", stream_);
+          "Failed to complete data transfer on stream %p: %s", stream_,
+          block_status.error_message().c_str());
     }
 
     done(status);
@@ -105,12 +107,14 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor,
     se::DeviceMemoryBase dev_src_ptr(src_ptr, total_bytes);
     void* dst_ptr = DMAHelper::base(cpu_tensor);
 
-    Status status = Status::OK();
+    Status status;
     stream_->ThenMemcpy(dst_ptr, dev_src_ptr, total_bytes);
     // TODO(hpucha): Make this asynchronous.
-    if (!stream_->BlockHostUntilDone()) {
+    Status block_status = stream_->BlockHostUntilDoneWithStatus();
+    if (!block_status.ok()) {
       status = xla::InternalError(
-          "Failed to complete data transfer on stream %p", stream_);
+          "Failed to complete data transfer on stream %p: %s", stream_,
+          block_status.error_message().c_str());
     }
 
     done(status);
diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc
index 9c96d9eb30..8b8991b540 100644
--- a/tensorflow/compiler/xla/service/executable.cc
+++ b/tensorflow/compiler/xla/service/executable.cc
@@ -52,7 +52,7 @@ Executable::ExecuteOnStreams(
   }
   for (const auto& options : run_options) {
     TF_RET_CHECK(options.stream() != nullptr);
-    options.stream()->BlockHostUntilDone();
+    TF_RETURN_IF_ERROR(options.stream()->BlockHostUntilDoneWithStatus());
   }
   return return_values;
 }
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
index 145af45fb0..12faf28d02 100644
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -223,7 +223,8 @@ StatusOr<ReturnT> Executable::ExecuteOnStreamWrapper(
 
   if (profile != nullptr) {
     VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";
-    stream->ThenStopTimer(timer.get()).BlockHostUntilDone();
+    stream->ThenStopTimer(timer.get());
+    SE_CHECK_OK(stream->BlockHostUntilDoneWithStatus());
     VLOG(1) << "done with block-host-until-done";
 
     // Merge in run-time profile information from execution_profile.
diff --git a/tensorflow/compiler/xla/service/execution_tracker.cc b/tensorflow/compiler/xla/service/execution_tracker.cc
index c225e62e3e..6a34c2638b 100644
--- a/tensorflow/compiler/xla/service/execution_tracker.cc
+++ b/tensorflow/compiler/xla/service/execution_tracker.cc
@@ -39,9 +39,7 @@ AsyncExecution::AsyncExecution(Backend* backend,
 
 tensorflow::Status AsyncExecution::BlockUntilDone() const {
   for (auto& stream : streams_) {
-    if (!stream->BlockHostUntilDone()) {
-      return InternalError("failed to block until done");
-    }
+    TF_RETURN_IF_ERROR(stream->BlockHostUntilDoneWithStatus());
   }
   return tensorflow::Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 21e9fc96f6..061707f32b 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -69,7 +69,7 @@ class HloExecutionProfiler {
   ~HloExecutionProfiler() {
     if (do_profile_) {
       stream_->ThenStopTimer(execution_timer_.get());
-      stream_->BlockHostUntilDone();
+      stream_->BlockHostUntilDoneWithStatus().IgnoreError();
       profile_->set_total_cycles_executed(
           *computation_, execution_timer_->Nanoseconds() * clock_rate_ghz_);
     }
@@ -87,7 +87,7 @@ class HloExecutionProfiler {
   void FinishOperation(const HloInstruction* hlo_instruction) {
     if (do_profile_) {
       stream_->ThenStopTimer(per_op_timer_.get());
-      stream_->BlockHostUntilDone();
+      stream_->BlockHostUntilDoneWithStatus().IgnoreError();
       profile_->SetCyclesTakenBy(
           hlo_instruction, per_op_timer_->Nanoseconds() * clock_rate_ghz_);
     }
@@ -170,7 +170,7 @@ Status GpuExecutable::ExecuteThunks(
     // If this thunk requests it, wait for all currently-executing thunks to
     // finish.  This is useful e.g. if the thunk is about to perform autotuning.
     if (thunk->ShouldHaltAllActivityBeforeRunning(stream)) {
-      main_stream->BlockHostUntilDone();
+      TF_RETURN_IF_ERROR(main_stream->BlockHostUntilDoneWithStatus());
     }
 
     profiler.StartOperation();
@@ -191,9 +191,13 @@ Status GpuExecutable::ExecuteThunks(
   // Make sure kernels are completed before deallocating temporary buffers.
   // TODO(b/30100571): we could potentially postpone deallocating the temp
   // buffers until a different computation is executed.
-  if (block_host_until_done && !main_stream->BlockHostUntilDone()) {
-    return InternalError("Failed to complete all kernels launched on stream %p",
-                         main_stream);
+  if (block_host_until_done) {
+    Status block_status = main_stream->BlockHostUntilDoneWithStatus();
+    if (!block_status.ok()) {
+      return InternalError(
+          "Failed to complete all kernels launched on stream %p: %s",
+          main_stream, block_status.error_message().c_str());
+    }
   }
 
   return Status::OK();
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
index 4cf49d4a72..550df33576 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
@@ -105,12 +105,13 @@ Status GpuTransferManager::EnqueueBuffersToInfeed(
   // infeed requests, blocking on the stream might be
   // heavy-handed. Figure out if finer-grained acknowledgement is
   // possible.
-  if (!stream->BlockHostUntilDone()) {
+  Status block_status = stream->BlockHostUntilDoneWithStatus();
+  if (!block_status.ok()) {
     for (gpu::InfeedBuffer* b : buffers) {
       b->Done();
     }
-    return InternalError("Failed to complete data transfer on stream %p",
-                         stream);
+    return InternalError("Failed to complete data transfer on stream %p: %s",
+                         stream, block_status.error_message().c_str());
   }
 
   infeed_manager->EnqueueBuffers(buffers);
diff --git a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
index e33e904692..db8659a8b9 100644
--- a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
@@ -30,9 +30,8 @@ InfeedThunk::InfeedThunk(
                              tuple_element_buffers.end()),
       destination_buffer_(destination_buffer) {}
 
-tensorflow::Status InfeedThunk::ExecuteOnStream(
-    const BufferAllocations& buffer_allocations,
-    perftools::gputools::Stream* stream) {
+Status InfeedThunk::ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                                    perftools::gputools::Stream* stream) {
   VLOG(2) << "Infeeding to GPU ";
 
   perftools::gputools::DeviceMemoryBase destination_address =
@@ -66,15 +65,16 @@ tensorflow::Status InfeedThunk::ExecuteOnStream(
                        buffer->length());
   }
 
-  if (!stream->BlockHostUntilDone()) {
-    return InternalError("Failed to complete data transfer on stream %p",
-                         stream);
+  Status block_status = stream->BlockHostUntilDoneWithStatus();
+  if (!block_status.ok()) {
+    return InternalError("Failed to complete data transfer on stream %p: %s",
+                         stream, block_status.error_message().c_str());
   }
 
   infeed_manager->ReleaseBuffers(infeed_buffers);
 
   VLOG(2) << "Infeeding to GPU complete";
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
 }  // namespace gpu
diff --git a/tensorflow/compiler/xla/service/gpu/infeed_thunk.h b/tensorflow/compiler/xla/service/gpu/infeed_thunk.h
index 371d71f9db..86918705fa 100644
--- a/tensorflow/compiler/xla/service/gpu/infeed_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/infeed_thunk.h
@@ -43,9 +43,8 @@ class InfeedThunk : public Thunk {
   InfeedThunk(const InfeedThunk&) = delete;
   InfeedThunk& operator=(const InfeedThunk&) = delete;
 
-  tensorflow::Status ExecuteOnStream(
-      const BufferAllocations& buffer_allocations,
-      perftools::gputools::Stream* stream) override;
+  Status ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                         perftools::gputools::Stream* stream) override;
 
  private:
   const std::vector<BufferAllocation::Slice> tuple_element_buffers_;
diff --git a/tensorflow/compiler/xla/service/gpu/while_thunk.cc b/tensorflow/compiler/xla/service/gpu/while_thunk.cc
index 0d2412096a..e3ecb784dd 100644
--- a/tensorflow/compiler/xla/service/gpu/while_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/while_thunk.cc
@@ -34,16 +34,14 @@ WhileThunk::WhileThunk(
       body_thunk_sequence_(
           MakeUnique<SequentialThunk>(std::move(*body_thunk_sequence), hlo)) {}
 
-tensorflow::Status WhileThunk::Initialize(const GpuExecutable& executable) {
+Status WhileThunk::Initialize(const GpuExecutable& executable) {
   TF_RETURN_IF_ERROR(condition_thunk_sequence_->Initialize(executable));
   TF_RETURN_IF_ERROR(body_thunk_sequence_->Initialize(executable));
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status WhileThunk::ExecuteOnStream(
-    const BufferAllocations& buffer_allocations,
-    perftools::gputools::Stream* stream) {
-
+Status WhileThunk::ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                                   perftools::gputools::Stream* stream) {
   perftools::gputools::DeviceMemoryBase condition_result_data =
       buffer_allocations.GetDeviceAddress(condition_result_buffer_index_);
 
@@ -55,9 +53,11 @@ tensorflow::Status WhileThunk::ExecuteOnStream(
     // Copy the result of condition computation and break the loop if 'false'.
     bool condition_result;
     stream->ThenMemcpy(&condition_result, condition_result_data, sizeof(bool));
-    if (!stream->BlockHostUntilDone()) {
+    Status block_status = stream->BlockHostUntilDoneWithStatus();
+    if (!block_status.ok()) {
       return InternalError(
-          "Failed to complete all kernels launched on stream %p", stream);
+          "Failed to complete all kernels launched on stream %p: %s", stream,
+          block_status.error_message().c_str());
     }
 
     if (!condition_result) {
@@ -68,7 +68,7 @@ tensorflow::Status WhileThunk::ExecuteOnStream(
     TF_RETURN_IF_ERROR(
         body_thunk_sequence_->ExecuteOnStream(buffer_allocations, stream));
   }
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
 }  // namespace gpu
diff --git a/tensorflow/compiler/xla/service/gpu/while_thunk.h b/tensorflow/compiler/xla/service/gpu/while_thunk.h
index 95ed5497ce..4c9f45de9e 100644
--- a/tensorflow/compiler/xla/service/gpu/while_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/while_thunk.h
@@ -45,10 +45,9 @@ class WhileThunk : public Thunk {
   WhileThunk(const WhileThunk&) = delete;
   WhileThunk& operator=(const WhileThunk&) = delete;
 
-  tensorflow::Status Initialize(const GpuExecutable& executable) override;
-  tensorflow::Status ExecuteOnStream(
-      const BufferAllocations& buffer_allocations,
-      perftools::gputools::Stream* stream) override;
+  Status Initialize(const GpuExecutable& executable) override;
+  Status ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                         perftools::gputools::Stream* stream) override;
 
  private:
   const BufferAllocation::Slice condition_result_buffer_index_;
diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index 4a7caf3ebd..b4ca8d12a1 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc
@@ -150,7 +150,7 @@ StatusOr<se::DeviceMemoryBase> HloRunner::Execute(
       se::DeviceMemoryBase result,
       executable->ExecuteOnStream(&service_run_options, arguments,
                                   /*hlo_execution_profile=*/nullptr));
-  TF_RET_CHECK(stream.BlockHostUntilDone());
+  TF_RETURN_IF_ERROR(stream.BlockHostUntilDoneWithStatus());
 
   allocations_.push_back(result);
 
diff --git a/tensorflow/compiler/xla/service/interpreter/executor.cc b/tensorflow/compiler/xla/service/interpreter/executor.cc
index 511de87b1b..f16651c969 100644
--- a/tensorflow/compiler/xla/service/interpreter/executor.cc
+++ b/tensorflow/compiler/xla/service/interpreter/executor.cc
@@ -85,7 +85,7 @@ bool InterpreterExecutor::HostCallback(Stream *stream,
 bool InterpreterExecutor::CreateStreamDependency(Stream *dependent,
                                                  Stream *other) {
   AsExecutorStream(dependent)->EnqueueTask(
-      [other]() { other->BlockHostUntilDone(); });
+      [other]() { SE_CHECK_OK(other->BlockHostUntilDoneWithStatus()); });
   AsExecutorStream(dependent)->BlockUntilDone();
   return true;
 }
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index fe6993db98..462b76e9b4 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -566,8 +566,10 @@ Service::ExecuteParallelAndRegisterResult(
 
   // Wait for all executions to complete.
   for (int64 i = 0; i < streams.size(); ++i) {
-    if (!streams[i]->BlockHostUntilDone()) {
-      return InternalError("failed to complete execution for stream %lld", i);
+    Status block_status = streams[i]->BlockHostUntilDoneWithStatus();
+    if (!block_status.ok()) {
+      return InternalError("failed to complete execution for stream %lld: %s",
+                           i, block_status.error_message().c_str());
     }
   }
 
diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
index abafe4b407..ef76df6b0d 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
+++ b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
@@ -175,7 +175,7 @@ class NcclManagerTest : public ::testing::Test {
       auto out_gpu_mem = AsDeviceMemory(out_gpu.flat<float>().data());
       stream->ThenMemcpy(out_cpu.flat<float>().data(), out_gpu_mem,
                          out_cpu.TotalBytes());
-      stream->BlockHostUntilDone();
+      SE_ASSERT_OK(stream->BlockHostUntilDoneWithStatus());
       test::ExpectTensorEqual<float>(test_case->expected, out_cpu);
     }
   }
@@ -236,7 +236,7 @@ TEST_F(NcclManagerTest, MultipleCallers) {
     for (int i = 0; i < num_ranks; ++i) {
       auto* device = devices->at(i % devices->size());
       auto* stream = device->tensorflow_gpu_device_info()->stream;
-      stream->BlockHostUntilDone();
+      SE_ASSERT_OK(stream->BlockHostUntilDoneWithStatus());
     }
 
     std::random_shuffle(case_and_device_num.begin(), case_and_device_num.end());
diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.cc b/tensorflow/core/common_runtime/gpu/gpu_util.cc
index 657bdf0601..9d8210a877 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_util.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_util.cc
@@ -352,11 +352,7 @@ Status GPUUtil::Sync(Device* gpu_device) {
   if (!dev_info) {
     return errors::Internal("Failed to find dest device GPUDeviceInfo");
   }
-  dev_info->stream->BlockHostUntilDone();
-  if (!dev_info->stream->ok()) {
-    return errors::Internal("GPU sync failed");
-  }
-  return Status::OK();
+  return dev_info->stream->BlockHostUntilDoneWithStatus();
 }
 
 Status GPUUtil::SyncAll(Device* gpu_device) {
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 1e26f53ae1..60c889c053 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -4251,7 +4251,12 @@ bool CudnnSupport::DoDepthConcatenate(
   for (size_t i = 0; i < input_data.size(); ++i) {
     const auto& dimensions = input_dimensions[i];
     tmp.resize(dimensions.ElementCount());
-    stream->ThenMemcpyD2H<float>(*input_data[i], &tmp).BlockHostUntilDone();
+    stream->ThenMemcpyD2H<float>(*input_data[i], &tmp);
+    port::Status block_status = stream->BlockHostUntilDoneWithStatus();
+    if (!block_status.ok()) {
+      LOG(ERROR) << "BlockHostUntilDone failed: " << block_status;
+      return false;
+    }
 
     for (int64 batch = 0; batch < output_dimensions.count(); ++batch) {
       for (int64 yx = 0; yx < area; ++yx) {
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.cc b/tensorflow/stream_executor/host/host_gpu_executor.cc
index 1fd8eeb881..d103dcd033 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.cc
+++ b/tensorflow/stream_executor/host/host_gpu_executor.cc
@@ -162,7 +162,7 @@ void HostExecutor::DeallocateStream(Stream *stream) {}
 
 bool HostExecutor::CreateStreamDependency(Stream *dependent, Stream *other) {
   AsHostStream(dependent)->EnqueueTask(
-      [other]() { other->BlockHostUntilDone(); });
+      [other]() { SE_CHECK_OK(other->BlockHostUntilDoneWithStatus()); });
   AsHostStream(dependent)->BlockUntilDone();
   return true;
 }
-- 
GitLab


From 1ffba992df5159cf88c438a62ede4ede3dc5dc2d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Dec 2017 23:47:10 -0800
Subject: [PATCH 0909/1225] [XLA] Properly set layout requirements in Hlo
 parser.

PiperOrigin-RevId: 178724659
---
 .../compiler/xla/tools/parser/hlo_parser.cc   | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 457b655783..78372dedd8 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -300,6 +300,25 @@ bool HloParser::ParseComputation() {
       is_entry_computation
           ? module_->AddEntryComputation(builder->Build(root))
           : module_->AddEmbeddedComputation(builder->Build(root));
+
+  // The parameters and result layouts were set to default layout. Here we set
+  // the layouts to what the hlo text says.
+  if (is_entry_computation) {
+    for (int i = 0; i < computation->num_parameters(); i++) {
+      const Shape& param_shape = computation->parameter_instruction(i)->shape();
+      if (param_shape.has_layout()) {
+        module_->mutable_entry_computation_layout()
+            ->mutable_parameter_layout(i)
+            ->ResetLayout(param_shape.layout());
+      }
+    }
+    const Shape& result_shape = computation->root_instruction()->shape();
+    if (result_shape.has_layout()) {
+      module_->mutable_entry_computation_layout()
+          ->mutable_result_layout()
+          ->ResetLayout(result_shape.layout());
+    }
+  }
   return AddComputation(name, computation, name_loc);
 }
 
-- 
GitLab


From 95390fd43d19c7a22a2057393de2a95171095fde Mon Sep 17 00:00:00 2001
From: Rajendra arora <rajendraarora16@yahoo.com>
Date: Tue, 12 Dec 2017 13:25:05 +0530
Subject: [PATCH 0910/1225] Removing extra "d" after close() method in
 SessionTest.java

---
 tensorflow/java/src/test/java/org/tensorflow/SessionTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
index a86b4dd117..28e8628a85 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
@@ -151,7 +151,7 @@ public class SessionTest {
       s.close();
       try {
         s.runner().run();
-        fail("methods on a close()d session should fail");
+        fail("methods on a close() session should fail");
       } catch (IllegalStateException e) {
         // expected exception
       }
-- 
GitLab


From 3d4aa4eacd4beca250ed52cc284e9caa7fb45e22 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Tue, 12 Dec 2017 02:02:28 -0800
Subject: [PATCH 0911/1225] Disable flaky
 //tensorflow/contrib/learn:dnn_linear_combined_test

PiperOrigin-RevId: 178734940
---
 tensorflow/contrib/learn/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD
index 33f509ec12..5df2c77249 100644
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@@ -345,6 +345,7 @@ py_test(
     srcs = ["python/learn/estimators/dnn_linear_combined_test.py"],
     shard_count = 4,
     srcs_version = "PY2AND3",
+    tags = ["no_oss"],  # flaky b/70524820
     deps = [
         ":learn",
         "//tensorflow/contrib/layers:layers_py",
-- 
GitLab


From 65d23686c6404025d0554960d4cc4eda1bbd3c9d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 02:33:55 -0800
Subject: [PATCH 0912/1225] Remove real-data shape check in GANEstimator. Fixes
 github issue #14257.

PiperOrigin-RevId: 178737278
---
 .../gan/python/estimator/python/gan_estimator_impl.py      | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index 058dc1d1f8..eef66af7f9 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -160,11 +160,6 @@ class GANEstimator(estimator.Estimator):
         model_fn=_model_fn, model_dir=model_dir, config=config)
 
 
-def _use_check_shapes(real_data):
-  """Determines whether TFGAN should check Tensor shapes."""
-  return isinstance(real_data, ops.Tensor)
-
-
 def _gan_model_fn(
     features,
     labels,
@@ -242,7 +237,7 @@ def _make_gan_model(generator_fn, discriminator_fn, real_data,
       real_data,
       generator_inputs,
       generator_scope=generator_scope,
-      check_shapes=_use_check_shapes(real_data))
+      check_shapes=False)
   if add_summaries:
     if not isinstance(add_summaries, (tuple, list)):
       add_summaries = [add_summaries]
-- 
GitLab


From 3f5445b3432fdf775bdef289c10338cd20d10edc Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Tue, 12 Dec 2017 03:24:43 -0800
Subject: [PATCH 0913/1225] Simplifying tfe function.py

PiperOrigin-RevId: 178740804
---
 tensorflow/python/eager/function.py       | 169 ++++++++++------------
 tensorflow/python/eager/graph_callable.py |  25 +---
 2 files changed, 77 insertions(+), 117 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index cadabb3a24..a8f3516f79 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -51,43 +51,6 @@ _scoped_captures = threading.local()
 _scoped_captures.tensors = None
 
 
-def make_function_def(name, graph, operations, inputs, outputs):
-  """Makes FunctionDef proto and defined function.
-
-  Args:
-    name: the function name
-    graph: the graph from which to build the function
-    operations: the operations in the function body
-    inputs: tensors to be used as function arguments
-    outputs: tensors to be returned from the function
-
-  Returns:
-   fdef: a FunctionDef protocol buffer for the function
-   fn: a wrapped TF_Function for the function
-  """
-  with errors.raise_exception_on_not_ok_status() as status:
-    fn = pywrap_tensorflow.TF_GraphToFunction_wrapper(
-        graph._c_graph,  # pylint: disable=protected-access
-        compat.as_str(name),
-        False,
-        [o._c_op for o in operations],  # pylint: disable=protected-access
-        [t._as_tf_output() for t in inputs],  # pylint: disable=protected-access
-        [t._as_tf_output() for t in outputs],  # pylint: disable=protected-access
-        [],
-        None,
-        compat.as_str(""),
-        status)
-  # TODO(apassos) avoid creating a FunctionDef (specially to grab the signature,
-  # but also in general it's nice not to depend on it.
-  with c_api_util.tf_buffer() as buffer_:
-    with errors.raise_exception_on_not_ok_status() as status:
-      pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_, status)
-    proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
-  fdef = function_pb2.FunctionDef()
-  fdef.ParseFromString(compat.as_bytes(proto_data))
-  return fdef, fn
-
-
 @contextlib.contextmanager
 def capture_tensors(captures):
   old = _scoped_captures.__dict__.get("tensors", None)
@@ -120,7 +83,7 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False):
   Arguments:
     value: A Tensor object.
     dtype: The datatype of the value produced by the node in the graph.
-    name:  Name of the node in the graph.
+    name:  str, Name of the node in the graph.
     as_ref: Ignored (required by register_tensor_conversion_function).
 
   Returns:
@@ -249,13 +212,44 @@ def _inference_name(n):
 # TODO(apassos) get rid of this by splitting framework.function._DefinedFunction
 # so it doesn't have the definition-generating logic and is just a container for
 # an already-defined function.
-class _DefinedFunction(object):
-  """Mocks the interface of tf _DefinedFunction."""
+class _EagerDefinedFunction(object):
+  """Function object with the interface of tf _DefinedFunction."""
+
+  def __init__(self, name, graph, operations, inputs, outputs):
+    """Initializes an eager defined function.
 
-  def __init__(self, fdef, fn):
-    self.definition = fdef
-    self.name = fdef.signature.name
-    self.signature = fdef.signature
+    Args:
+      name: str, the name for the created function.
+      graph: Graph, the graph containing the operations in the function
+      operations: list of Operation; the subset of operations in the graph
+        which will be in the function
+      inputs: the tensors in the graph to be used as inputs to the function
+      outputs: the tensors in the graph which will be outputs to the function
+    """
+    with errors.raise_exception_on_not_ok_status() as status:
+      fn = pywrap_tensorflow.TF_GraphToFunction_wrapper(
+          graph._c_graph,  # pylint: disable=protected-access
+          compat.as_str(name),
+          False,
+          [o._c_op for o in operations],  # pylint: disable=protected-access
+          [t._as_tf_output() for t in inputs],  # pylint: disable=protected-access
+          [t._as_tf_output() for t in outputs],  # pylint: disable=protected-access
+          [],
+          None,
+          compat.as_str(""),
+          status)
+    # TODO(apassos) avoid creating a FunctionDef (specially to grab the
+    # signature, but also in general it's nice not to depend on it.
+    with c_api_util.tf_buffer() as buffer_:
+      with errors.raise_exception_on_not_ok_status() as status:
+        pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_, status)
+      proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
+    function_def = function_pb2.FunctionDef()
+    function_def.ParseFromString(compat.as_bytes(proto_data))
+    _register(fn)
+    self.definition = function_def
+    self.name = function_def.signature.name
+    self.signature = function_def.signature
     self.grad_func_name = None
     self.python_grad_func = None
     self._c_func = fn
@@ -271,51 +265,51 @@ class GraphModeFunction(object):
   """Callable object representing a graph-mode function.
 
   Args:
-    input_placeholders: list of placeholder values to feed when calling
-      the wrapped function.
+    name: str the name of the created function
+    input_placeholders: list of placeholder values (tensors) to feed when
+      calling the wrapped function.
     extra_inputs: Tensor inputs this function definition closed over which
       are passed as arguments. Need to track so gradients are supported
       correctly.
-    fdef: the function definition we want to call.
-    graph: the graph from which the fdef operations were pulled. Used as
+    graph: the Graph from which the operations will be pulled. Used as
       a context when computing gradients.
-    operations: the subset of operations in the graph used in the function
+    operations: the subset of Operations in the graph used in the function
       definition.
-    func_outputs: the python outputs of the graph-mode function, with
-      tensorflow.Tensor objects to be replaced by tfe values when called.
-    func_outputs_to_fdef_outputs: Maps id(obj) in func_outputs to index of
-      fdef's outputs. It allows mapping fdef output tensors to nested
-      func_outputs structure.
-    output_shapes: List of shapes of all tensors which are output by the
-      internal function.
+    outputs: a flat list of the Tensors in the graph used as outputs to the
+      function
+    func_outputs: a possibly nested python object which will be returned by
+      this function. The Tensors in this structure will be replaced by their
+      corresponding values in outputs.
+    output_shapes: List of shapes of all tensors in outputs
     variables: (optional) List of variables to watch during function execution.
   """
 
   def __init__(self,
+               name,
                input_placeholders,
                extra_inputs,
-               fdef,
-               fn,
                graph,
                operations,
+               outputs,
                func_outputs,
-               func_outputs_to_fdef_outputs,
                output_shapes,
                variables=None):
-    assert len(input_placeholders) == len(fdef.signature.input_arg), "%s %s" % (
-        len(input_placeholders), len(fdef.signature.input_arg))
+    defined_function = _EagerDefinedFunction(
+        name, graph, operations, input_placeholders, outputs)
+    if  len(input_placeholders) != len(defined_function.signature.input_arg):
+      raise ValueError("Internal error: invalid lengths. %s %s" % (
+          len(input_placeholders), len(defined_function.signature.input_arg)))
     self._input_placeholders = input_placeholders
     self._extra_inputs = list(extra_inputs)
     self._graph = graph
     self._has_backprop = False
-    self._func_name = fdef.signature.name
-    self._fdef = _DefinedFunction(fdef, fn)
-    self._num_outputs = len(fdef.signature.output_arg)
+    self._func_name = name
+    self._function_def = defined_function
+    self._num_outputs = len(defined_function.signature.output_arg)
     self._ops = operations
     self._func_outputs = func_outputs
     self._returns = [func_outputs] if isinstance(
         func_outputs, (ops.Tensor, type(None))) else list(func_outputs)
-    self._returns_to_fedf_outputs = func_outputs_to_fdef_outputs
     self._output_shapes = output_shapes
     self._variables = variables if variables is not None else []
 
@@ -339,11 +333,9 @@ class GraphModeFunction(object):
         shapes = tuple(x.shape for x in in_gradients if x is not None)
     captures = list(sorted(c.captured_tensors, key=lambda x: x.name))
     forward_name = _forward_name(self._func_name)
-    forward_function_def, forward_fn = make_function_def(
+    self._forward_fdef = _EagerDefinedFunction(
         forward_name, self._graph, self._ops, self._input_placeholders,
         filtered_outputs + captures)
-    self._forward_fdef = _DefinedFunction(forward_function_def, forward_fn)
-    _register(forward_fn)
     backward_outputs = tuple(x for x in in_gradients if x is not None)
     all_inputs = self._out_grad_placeholders + captures
     # Excluding input ops from the body as we do not intend to execute these
@@ -352,17 +344,13 @@ class GraphModeFunction(object):
     # Enforce a deterministic order of operations in the generated graph. This
     # means rerunning the function-defining code will always define the same
     # function, which is useful if we serialize this etc.
-    fdef_ops = tuple(x for x in sorted(c.known_ops, key=lambda x: x.name)
-                     if x not in all_ignored_ops)
+    function_def_ops = tuple(x
+                             for x in sorted(c.known_ops, key=lambda x: x.name)
+                             if x not in all_ignored_ops)
     bname = _backward_name(self._func_name)
-    backward_function_def, backward_fn = make_function_def(
-        bname, self._graph, fdef_ops,
-        all_inputs, backward_outputs)
-    _register(backward_fn)
     self._backward_function = GraphModeFunction(
-        all_inputs, [], backward_function_def, backward_fn, self._graph,
-        c.known_ops, in_gradients, _map_sequence_obj_to_idx(backward_outputs),
-        shapes)
+        bname, all_inputs, [], self._graph, function_def_ops,
+        backward_outputs, in_gradients, shapes)
 
   def _backprop_call(self, args):
     """Calls the wrapped function and records the result on a tape."""
@@ -398,7 +386,7 @@ class GraphModeFunction(object):
     side_outputs = outputs[len(self._returns):]
 
     def backward_function(*args):
-      return self._backward_function(*(list(args) + side_outputs))
+      return self._backward_function(*(list(args) + side_outputs))  # pylint: disable=not-callable
 
     tape.record_operation(
         signature.name,
@@ -425,12 +413,12 @@ class GraphModeFunction(object):
     ctx = context.context()
     if ctx.in_graph_mode():
       g = ops.get_default_graph()
-      if self._fdef.name not in g._functions:  # pylint: disable=protected-access
-        g._add_function(self._fdef)  # pylint: disable=protected-access
+      if self._function_def.name not in g._functions:  # pylint: disable=protected-access
+        g._add_function(self._function_def)  # pylint: disable=protected-access
       for f in self._graph._functions.values():  # pylint: disable=protected-access
         if f.name not in g._functions:  # pylint: disable=protected-access
           g._add_function(f)  # pylint: disable=protected-access
-      signature = self._fdef.definition.signature
+      signature = self._function_def.definition.signature
       args = list(tensor_inputs) + self._extra_inputs
       op = g.create_op(
           signature.name, [ops.convert_to_tensor(x) for x in args],
@@ -529,26 +517,14 @@ def _defun_internal(name, func, args, kwds):
   fname = _inference_name(name)
   operations = tuple(x for x in tmp_graph.get_operations()
                      if x not in all_ignored_ops)
-  inference_function_def, fn = make_function_def(
-      fname, tmp_graph, operations, all_inputs, func_def_outputs)
   # Register any other functions defined in the graph
   # TODO(ashankar): Oh lord, forgive me for this lint travesty.
   for f in tmp_graph._functions.values():  # pylint: disable=protected-access
     # TODO(ashankar): What about the gradient registry?
     _register(f._c_func)  # pylint: disable=protected-access
-  _register(fn)
-
   return GraphModeFunction(
-      all_inputs,
-      extra_inputs,
-      inference_function_def,
-      fn,
-      tmp_graph,
-      operations,
-      func_outputs,
-      _map_sequence_obj_to_idx(func_def_outputs),
-      output_shapes,
-      variables=variables)
+      fname, all_inputs, extra_inputs, tmp_graph, operations, func_def_outputs,
+      func_outputs, output_shapes, variables)
 
 
 # Defun uses this instead of Tensor as a cache key. Using dtype because
@@ -595,7 +571,8 @@ def named_defun(func, name):
     """Decorated version of func."""
     # Macroexpand on non-Tensor arguments
     cache_key = tuple(_cache_key(x) for x in args)
-    assert all(not isinstance(x, ops.EagerTensor) for x in kwds.values())
+    if not all(not isinstance(x, ops.EagerTensor) for x in kwds.values()):
+      raise ValueError("Tensor keyword arguments are not supported.")
     cache_key = (cache_key, tuple(kwds.items()))
 
     if cache_key not in arguments_to_functions:
diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py
index 3da100d800..5c13ea8908 100644
--- a/tensorflow/python/eager/graph_callable.py
+++ b/tensorflow/python/eager/graph_callable.py
@@ -319,49 +319,32 @@ def _graph_callable_internal(func, shape_and_dtypes):
 
   func_def_outputs = [x for x in outputs_list if isinstance(x, tf_ops.Tensor)]
   initialization_name = function._inference_name(func.__name__)  # pylint: disable=protected-access
-  initializer_function_def, initializer_fn = function.make_function_def(
-      initialization_name,
-      tmp_graph,
-      initializing_operations,
-      placeholder_inputs,
-      func_def_outputs)
   # TODO(ashankar): Oh lord, forgive me for this lint travesty.
   # Also, what about the gradient registry of these functions? Those need to be
   # addressed as well.
   for f in tmp_graph._functions.values():  # pylint: disable=protected-access
     function._register(f._c_func)  # pylint: disable=protected-access
-  function._register(initializer_fn)  # pylint: disable=protected-access
   initializer_function = function.GraphModeFunction(
+      initialization_name,
       placeholder_inputs,
       extra_inputs,
-      initializer_function_def,
-      initializer_fn,
       tmp_graph,
       initializing_operations,
+      func_def_outputs,
       func_outputs,
-      function._map_sequence_obj_to_idx(func_def_outputs),  # pylint: disable=protected-access
       output_shapes)
 
   capture_func_def_outputs = [
       x for x in captured_outlist if isinstance(x, tf_ops.Tensor)]
   captured_function_name = function._inference_name(func.__name__)  # pylint: disable=protected-access
-  captured_function_def, capturing_fn = function.make_function_def(
-      captured_function_name,
-      tmp_graph,
-      capturing_operations,
-      placeholder_inputs,
-      capture_func_def_outputs)
-  function._register(capturing_fn)  # pylint: disable=protected-access
-
   captured_function = function.GraphModeFunction(
+      captured_function_name,
       placeholder_inputs,
       extra_inputs,
-      captured_function_def,
-      capturing_fn,
       tmp_graph,
       capturing_operations,
+      capture_func_def_outputs,
       captured_outputs,
-      function._map_sequence_obj_to_idx(capture_func_def_outputs),  # pylint: disable=protected-access
       output_shapes,
       variables=[x.variable for x in sorted_variables])
 
-- 
GitLab


From 444e319256de3229aef688c39b85689d3e2eab4f Mon Sep 17 00:00:00 2001
From: Rajendra arora <rajendraarora16@yahoo.com>
Date: Tue, 12 Dec 2017 19:10:23 +0530
Subject: [PATCH 0914/1225] Fixing typo

---
 .../eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h
index 89190eb1af..2864f83299 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h
@@ -151,7 +151,7 @@ Extract3DPatches(
 
   // TODO(mjanusz): Consider getting rid of pad(), and stride() and extend
   // extract_patches to take additional parameters for padding/striding,
-  // similarly to etract_image_patches.
+  // similarly to extract_image_patches.
   return input.pad(paddings, padding_value).extract_patches(patch_dims).reshape(pre_stride_dims).stride(strides);
 }
 
-- 
GitLab


From 5f470cf9b0b8adf9cf74da33e30024c62647165c Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Tue, 12 Dec 2017 06:02:03 -0800
Subject: [PATCH 0915/1225] Add get started `Datasets` doc

PiperOrigin-RevId: 178751067
---
 .../get_started/datasets_quickstart.md        | 398 ++++++++++++++++++
 .../get_started/premade_estimators.md         |  12 +-
 2 files changed, 406 insertions(+), 4 deletions(-)
 create mode 100644 tensorflow/docs_src/get_started/datasets_quickstart.md

diff --git a/tensorflow/docs_src/get_started/datasets_quickstart.md b/tensorflow/docs_src/get_started/datasets_quickstart.md
new file mode 100644
index 0000000000..7daa08454c
--- /dev/null
+++ b/tensorflow/docs_src/get_started/datasets_quickstart.md
@@ -0,0 +1,398 @@
+# Datasets Quick Start
+
+The @{tf.data} module contains a collection of classes that allows you to
+easily load data, manipulate it, and pipe it into your model. This document
+introduces the API by walking through two simple examples:
+
+* Reading in-memory data from numpy arrays.
+* Reading lines from a csv file.
+
+<!-- TODO(markdaoust): Add links to an example reading from multiple-files
+(image_retraining), and a from_generator example. -->
+
+## Basic input
+
+Taking slices from an array is the simplest way to get started with `tf.data`.
+
+The @{$get_started/premade_estimators$Premade Estimators} chapter describes
+the following `train_input_fn`, from
+[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py),
+to pipe the data into the Estimator:
+
+``` python
+def train_input_fn(features, labels, batch_size):
+    """An input function for training"""
+    # Convert the inputs to a Dataset.
+    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+
+    # Shuffle, repeat, and batch the examples.
+    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
+
+    # Build the Iterator, and return the read end of the pipeline.
+    return dataset.make_one_shot_iterator().get_next()
+```
+
+Let's look at this more closely.
+
+### Arguments
+
+This function expects three arguments. Arguments expecting an "array" can
+accept nearly anything that can be converted to an array with `numpy.array`.
+One exception is
+[`tuple`](https://docs.python.org/3/tutorial/datastructures.html#tuples-and-sequences)
+which has special meaning for `Datasets`.
+
+* `features`: A `{'feature_name':array}` dictionary (or
+  [`DataFrame`](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html))
+  containing the raw input features.
+* `labels` : An array containing the
+  [label](https://developers.google.com/machine-learning/glossary/#label)
+  for each example.
+* `batch_size` : An integer indicating the desired batch size.
+
+In [`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py)
+we retrieved the Iris data using the `iris_data.load_data()` function.
+You can run it, and unpack the results as follows:
+
+``` python
+import iris_data
+
+# Fetch the data
+train, test = iris_data.load_data()
+features, labels = train
+```
+
+Then we passed this data to the input function, with a line similar to this:
+
+``` python
+batch_size=100
+iris_data.train_input_fn(features, labels, batch_size)
+```
+
+Let's walk through the `train_input_fn()`.
+
+### Slices
+
+In the simplest cases, @{tf.data.Dataset.from_tensor_slices} function takes an
+array and returns a @{tf.data.Dataset} representing slices of the array. For
+example, an array containing the @{$mnist/beginners$mnist training data}
+has a shape of `(60000, 28, 28)`. Passing this to `from_tensor_slices` returns
+a `Dataset` object containing 60000 slices, each one a 28x28 image.
+
+The code that returns this `Dataset` is as follows:
+
+``` python
+train, test = tf.keras.datasets.mnist.load_data()
+mnist_x, mnist_y = train
+
+mnist_ds = tf.data.Dataset.from_tensor_slices(mnist_x)
+print(mnist_ds)
+```
+
+This will print the following line, showing the @{$programmers_guide/tensors#shapes$shapes} and @{$programmers_guide/tensors#data_types$types} of the items in
+the dataset. Note that the dataset does not know how many items it contains.
+
+``` None
+<TensorSliceDataset shapes: (28,28), types: tf.uint8>
+```
+
+The dataset above represents a collection of simple arrays, but datasets are
+much more powerful than this. Datasets transparently handle any nested
+combination of dictionaries or tuples. For example, ensuring that `features`
+is a standard dictionary, you can then convert the dictionary of arrays to
+a `Dataset` of dictionaries as follows:
+
+``` python
+dataset = tf.data.Dataset.from_tensor_slices(dict(features))
+print(dataset)
+```
+``` None
+<TensorSliceDataset
+
+  shapes: {
+    SepalLength: (), PetalWidth: (),
+    PetalLength: (), SepalWidth: ()},
+
+  types: {
+      SepalLength: tf.float64, PetalWidth: tf.float64,
+      PetalLength: tf.float64, SepalWidth: tf.float64}
+>
+```
+
+Here we see that when a `Dataset` contains structured elements, the `shapes`
+and `types` of the `Dataset` take on the same structure. This dataset contains
+dictionaries of @{$programmers_guide/tensors#rank$scalars}, all of type
+`tf.float64`.
+
+The first line of `train_input_fn` uses the same functionality, but adds
+another level of structure. It creates a dataset containing
+`(features, labels)` pairs.
+
+The following code shows that the label is a scalar with type `int64`:
+
+``` python
+# Convert the inputs to a Dataset.
+dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+print(dataset)
+```
+```
+<TensorSliceDataset
+    shapes: (
+        {
+          SepalLength: (), PetalWidth: (),
+          PetalLength: (), SepalWidth: ()},
+        ()),
+
+    types: (
+        {
+          SepalLength: tf.float64, PetalWidth: tf.float64,
+          PetalLength: tf.float64, SepalWidth: tf.float64},
+        tf.int64)>
+```
+
+### Manipulation
+
+Currently the `Dataset` would iterate over the data once, in a fixed order, and
+only produce a single element at a time. It needs further processing before it
+can be used for training. Fortunately, the `tf.data.Dataset` class provides
+methods to better prepare the data for training. The next line of the input
+function takes advantage of several of these methods:
+
+``` python
+# Shuffle, repeat, and batch the examples.
+dataset = dataset.shuffle(1000).repeat().batch(batch_size)
+```
+
+The @{tf.data.Dataset.shuffle$`shuffle`} method uses a fixed-size buffer to
+shuffle the items as they pass through. Setting a `buffer_size` greater than
+the number of examples in the `Dataset` ensures that the data is completely
+shuffled. The Iris data set only contains 150 examples.
+
+The @{tf.data.Dataset.repeat$`repeat`} method has the `Dataset` restart when
+it reaches the end. To limit the number of epochss, set the `count` argument.
+
+The @{tf.data.Dataset.repeat$`batch`} method collects a number of examples and
+stacks them, to create batches. This adds a dimension to their shape. The new
+dimension is added as the first dimension. The following code uses
+the `batch` method on the MNIST `Dataset`, from earlier. This results in a
+`Dataset` containing 3D arrays representing stacks of `(28,28)` images:
+
+``` python
+print(mnist_ds.batch(100))
+```
+
+``` none
+<BatchDataset
+  shapes: (?, 28, 28),
+  types: tf.uint8>
+```
+Note that the dataset has an unknown batch size because the last batch will
+have fewer elements.
+
+In `train_input_fn`, after batching the `Dataset` contains 1D vectors of
+elements where each scalar was previously:
+
+```python
+print(dataset)
+```
+```
+<TensorSliceDataset
+    shapes: (
+        {
+          SepalLength: (?,), PetalWidth: (?,),
+          PetalLength: (?,), SepalWidth: (?,)},
+        (?,)),
+
+    types: (
+        {
+          SepalLength: tf.float64, PetalWidth: tf.float64,
+          PetalLength: tf.float64, SepalWidth: tf.float64},
+        tf.int64)>
+```
+
+
+### Return
+
+<!-- TODO(markdaoust) This line can be simplified to "return dataset" -->
+
+The `train`, `evaluate`, and `predict` methods of every Estimator require
+input functions to return a `(features, label)` pair containing
+@{$programmers_guide/tensors$tensorflow tensors}. The `train_input_fn` uses
+the following line to convert the Dataset into the expected format:
+
+```python
+# Build the Iterator, and return the read end of the pipeline.
+features_result, labels_result = dataset.make_one_shot_iterator().get_next()
+```
+
+The result is a structure of @{$programmers_guide/tensors$TensorFlow tensors},
+matching the layout of the items in the `Dataset`.
+For an introduction to what these objects are and how to work with them,
+see @{$get_started/get_started}.
+
+``` python
+print((features_result, labels_result))
+```
+
+```None
+({
+    'SepalLength': <tf.Tensor 'IteratorGetNext:2' shape=(?,) dtype=float64>,
+    'PetalWidth': <tf.Tensor 'IteratorGetNext:1' shape=(?,) dtype=float64>,
+    'PetalLength': <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=float64>,
+    'SepalWidth': <tf.Tensor 'IteratorGetNext:3' shape=(?,) dtype=float64>},
+Tensor("IteratorGetNext_1:4", shape=(?,), dtype=int64))
+```
+
+## Reading a CSV File
+
+The most common real-world use case for the `Dataset` class is to stream data
+from files on disk. The @{tf.data} module includes a variety of
+file readers. Let's see how parsing the Iris dataset from the csv file looks
+using a `Dataset`.
+
+The following call to the `iris_data.maybe_download` function downloads the
+data if necessary, and returns the pathnames of the resulting files:
+
+``` python
+import iris_data
+train_path, test_path = iris_data.maybe_download()
+```
+
+The [`iris_data.csv_input_fn`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py)
+function contains an alternative implementation that parses the csv files using
+a `Dataset`.
+
+Let's look at how to build an Estimator-compatible input function that reads
+from the local files.
+
+### Build the `Dataset`
+
+We start by building a @{tf.data.TextLineDataset$`TextLineDataset`} object to
+read the file one line at a time. Then, we call the
+@{tf.data.Dataset.skip$`skip`} method to skip over the first line of the file, which contains a header, not an example:
+
+``` python
+ds = tf.data.TextLineDataset(train_path).skip(1)
+```
+
+### Build a csv line parser
+
+Ultimately we will need to parse each of the lines in the dataset, to
+produce the necessary `(features, label)` pairs.
+
+We will start by building a function to parse a single line.
+
+The following `iris_data.parse_line` function acomplishes this taks using the
+@{tf.decode_csv} function, and some simple python code:
+
+We must parse each of the lines in the dataset in order to generate the
+necessary `(features, label)` pairs. The following `_parse_line` function
+calls @{tf.decode_csv} to parse a single line into its features
+and the label. Since Estimators require that features be represented as a
+dictionary, we rely on Python's built-in `dict` and `zip` functions to build
+that dictionary.  The feature names are the keys of that dictionary.
+We then then call the dictionary's `pop` method to remove the label field from
+the features dictionary:
+
+``` python
+# Metadata describing the text columns
+COLUMNS = ['SepalLength', 'SepalWidth',
+           'PetalLength', 'PetalWidth',
+           'label']
+FIELD_DEFAULTS = [[0.0], [0.0], [0.0], [0.0], [0]]
+def _parse_line(line):
+    # Decode the line into its fields
+    fields = tf.decode_csv(line, FIELD_DEFAULTS)
+
+    # Pack the result into a dictionary
+    features = dict(zip(COLUMNS,fields))
+
+    # Separate the label from the features
+    label = features.pop('label')
+
+    return features, label
+```
+
+### Parse the lines
+
+Datasets have many methods for manipulating the data while it is being piped
+to a model. The most heavily-used method is @{tf.data.Dataset.map$`map`}, which
+applies a transformation to each element of the `Dataset`.
+
+The `map` method takes a `map_func` argument that describes how each item in the
+`Dataset` should be transformed.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/datasets/map.png">
+</div>
+<div style="text-align: center">
+The @{tf.data.Dataset.map$`map`} method applies the `map_func` to
+transform each item in the <code>Dataset</code>.
+</div>
+
+So to parse the lines as they are streamed out of the csv file, we pass our
+`_parse_line` function to the `map` method:
+
+``` python
+ds = ds.map(_parse_line)
+print(ds)
+```
+``` None
+<MapDataset
+shapes: (
+    {SepalLength: (), PetalWidth: (), ...},
+    ()),
+types: (
+    {SepalLength: tf.float32, PetalWidth: tf.float32, ...},
+    tf.int32)>
+```
+
+Now instead of simple scalar strings, the dataset contains `(features, label)`
+pairs.
+
+the remainder of the `iris_data.csv_input_fn` function is identical
+to `iris_data.train_input_fn` which was covered in the in the
+[Basic input](#basic_input) section.
+
+### Try it out
+
+This function can be used as a replacement for
+`iris_data.train_input_fn`. It can be used to feed an estimator as follows:
+
+``` python
+train_path, test_path = iris_data.maybe_download()
+
+# All the inputs are numeric
+feature_columns = [
+    tf.feature_column.numeric_column(name)
+    for name in iris_data.CSV_COLUMN_NAMES[:-1]]
+
+# Build the estimator
+est = tf.estimator.LinearClassifier(feature_columns,
+                                    n_classes=3)
+# Train the estimator
+batch_size = 100
+est.train(
+    steps=1000,
+    input_fn=lambda : iris_data.csv_input_fn(train_path, batch_size))
+```
+
+Estimators expect an `input_fn` to take no arguments. To work around this
+restriction, we use `lambda` to capture the arguments and provide the expected
+interface.
+
+## Summary
+
+The `tf.data` module provides a collection of classes and functions for easily
+reading data from a variety of sources. Furthermore, `tf.data` has simple
+powerful methods for applying a wide variety of standard and custom
+transformations.
+
+Now that you have the basic idea of how to efficiently load data for an
+Estimator. The next step is to learn how to build your own custom estimator in:
+
+* @{$get_started/custom_estimators}
+
+If you'd like to learn more about additional functionality of `Datasets` see:
+
+* @{$programmers_guide/datasets}
diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/get_started/premade_estimators.md
index ff839fd040..d6fc1643f0 100644
--- a/tensorflow/docs_src/get_started/premade_estimators.md
+++ b/tensorflow/docs_src/get_started/premade_estimators.md
@@ -106,8 +106,10 @@ and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862)
 
 ### The data set
 
-The Iris data set contains four features and one label.  The four features
-identify the following botanical characteristics of individual Iris flowers:
+The Iris data set contains four features and one
+[label](https://developers.google.com/machine-learning/glossary/#label).
+The four features identify the following botanical characteristics of
+individual Iris flowers:
 
 * sepal length
 * sepal width
@@ -207,7 +209,9 @@ tuple:
 * "features" - A Python dictionary in which:
     * Each key is the name of a feature.
     * Each value is an array containing all of that feature's values.
-* "label" - An array containing the values of the label for every example.
+* "label" - An array containing the values of the
+  [label](https://developers.google.com/machine-learning/glossary/#label) for
+  every example.
 
 Just to demonstrate the format of the input function here's a simple
 implementation:
@@ -256,7 +260,7 @@ Here is the input function used for training in this program:
 def train_input_fn(features, labels, batch_size):
     """An input function for training"""
     # Convert the inputs to a Dataset.
-    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
+    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
 
     # Shuffle, repeat, and batch the examples.
     dataset = dataset.shuffle(1000).repeat().batch(batch_size)
-- 
GitLab


From 8903e318bf1840421162dab916c55e8fe1d1bcbc Mon Sep 17 00:00:00 2001
From: Robin Richtsfeld <robin.richtsfeld@gmail.com>
Date: Tue, 12 Dec 2017 15:05:35 +0100
Subject: [PATCH 0916/1225] Exclude tests from contrib_py

---
 tensorflow/contrib/BUILD | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 61f7821519..604c41bf8a 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -9,7 +9,12 @@ load("//third_party/mpi:mpi.bzl", "if_mpi")
 
 py_library(
     name = "contrib_py",
-    srcs = glob(["**/*.py"]),
+    srcs = glob(
+        ["**/*.py"],
+        exclude = [
+            "**/*_test.py",
+        ],
+    ),
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = [
-- 
GitLab


From 94686be226fc999437618959a791d7bda18e1806 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 12 Dec 2017 07:43:24 -0800
Subject: [PATCH 0917/1225] Automated g4 rollback of changelist 178675527

PiperOrigin-RevId: 178759398
---
 tensorflow/c/c_api_function.cc                |  5 ---
 tensorflow/c/c_api_function_test.cc           | 45 -------------------
 tensorflow/c/c_test_util.cc                   |  9 ----
 tensorflow/c/c_test_util.h                    |  3 --
 tensorflow/python/framework/function_test.py  | 42 -----------------
 .../python/framework/graph_to_function_def.py |  7 ---
 6 files changed, 111 deletions(-)

diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index b9312c2974..dcb818b88b 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -226,11 +226,6 @@ Status FillFunctionBody(
       }
       node_def->add_input(strings::StrCat("^", normalized));
     }
-
-    // A function is stateful if any of its nodes are stateful.
-    if (node->op_def().is_stateful()) {
-      fdef->mutable_signature()->set_is_stateful(true);
-    }
   }
   return Status::OK();
 }
diff --git a/tensorflow/c/c_api_function_test.cc b/tensorflow/c/c_api_function_test.cc
index 4ffc9d6931..d5580b6589 100644
--- a/tensorflow/c/c_api_function_test.cc
+++ b/tensorflow/c/c_api_function_test.cc
@@ -1482,51 +1482,6 @@ TEST_F(CApiFunctionTest, GetOpDef) {
   EXPECT_EQ(op_def.name(), func_name_);
   EXPECT_EQ(op_def.input_arg_size(), 1);
   EXPECT_EQ(op_def.output_arg_size(), 1);
-  EXPECT_FALSE(op_def.is_stateful());
-
-  TF_DeleteBuffer(buffer);
-}
-
-void DefineStatefulFunction(const char* name, TF_Function** func) {
-  std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)> func_graph(
-      TF_NewGraph(), TF_DeleteGraph);
-  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> s(TF_NewStatus(),
-                                                           TF_DeleteStatus);
-
-  TF_Tensor* tensor_shape = Int32Tensor({37, 1});
-  TF_Operation* shape = Const(tensor_shape, func_graph.get(), s.get(), "shape");
-  TF_Operation* random =
-      RandomUniform(shape, TF_FLOAT, func_graph.get(), s.get());
-
-  TF_Output inputs[] = {};
-  TF_Output outputs[] = {{random, 0}};
-  *func = TF_GraphToFunction(func_graph.get(), name, /*append_hash=*/0, -1,
-                             /*opers=*/nullptr, 0, inputs, 1, outputs,
-                             /*output_names=*/nullptr,
-                             /*opts=*/nullptr, "", s.get());
-  ASSERT_EQ(TF_OK, TF_GetCode(s.get())) << TF_Message(s.get());
-  ASSERT_NE(*func, nullptr);
-  TF_DeleteTensor(tensor_shape);
-}
-
-TEST_F(CApiFunctionTest, StatefulOpDef) {
-  DefineStatefulFunction(func_name_, &func_);
-  TF_GraphCopyFunction(host_graph_, func_, nullptr, s_);
-  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
-
-  // Test we can retrieve function OpDef from graph
-  TF_Buffer* buffer = TF_NewBuffer();
-  TF_GraphGetOpDef(host_graph_, func_name_, buffer, s_);
-  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
-
-  // Sanity check returned OpDef
-  string data(static_cast<const char*>(buffer->data), buffer->length);
-  OpDef op_def;
-  op_def.ParseFromString(data);
-  EXPECT_EQ(op_def.name(), func_name_);
-  EXPECT_EQ(op_def.input_arg_size(), 0);
-  EXPECT_EQ(op_def.output_arg_size(), 1);
-  EXPECT_TRUE(op_def.is_stateful());
 
   TF_DeleteBuffer(buffer);
 }
diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc
index 37439ff0be..c291a2e440 100644
--- a/tensorflow/c/c_test_util.cc
+++ b/tensorflow/c/c_test_util.cc
@@ -193,15 +193,6 @@ TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph,
   return TF_FinishOperation(desc, s);
 }
 
-TF_Operation* RandomUniform(TF_Operation* shape, TF_DataType dtype,
-                            TF_Graph* graph, TF_Status* s) {
-  TF_OperationDescription* desc =
-      TF_NewOperation(graph, "RandomUniform", "random_uniform");
-  TF_AddInput(desc, {shape, 0});
-  TF_SetAttrType(desc, "dtype", dtype);
-  return TF_FinishOperation(desc, s);
-}
-
 void Split3Helper(TF_Operation* input, TF_Graph* graph, TF_Status* s,
                   const char* name, TF_Operation** op) {
   TF_Operation* zero = ScalarConst(
diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h
index 96a93afef3..d547337492 100644
--- a/tensorflow/c/c_test_util.h
+++ b/tensorflow/c/c_test_util.h
@@ -74,9 +74,6 @@ TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s,
 
 TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s);
 
-TF_Operation* RandomUniform(TF_Operation* shape, TF_DataType dtype,
-                            TF_Graph* graph, TF_Status* s);
-
 // Split `input` along the first dimention into 3 tensors
 TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s,
                      const char* name = "split3");
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 8a7bf7a021..11f343c579 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -914,48 +914,6 @@ class FunctionTest(test.TestCase):
           np.array([1.0, 0.0]).astype(np.float32),
           sess.run(dinp, {inp: x}))
 
-  def testStatefulFunction(self):
-
-    @function.Defun()
-    def FunctionWithStatelessOp():
-      return constant_op.constant(42.0)
-
-    @function.Defun()
-    def FunctionWithStatefulOp():
-      return random_ops.random_uniform([100], maxval=10, dtype=dtypes.int32)
-
-    @function.Defun()
-    def FunctionWithStatelessFunctionCall():
-      return FunctionWithStatelessOp()
-
-    @function.Defun()
-    def FunctionWithStatefulFunctionCall():
-      return FunctionWithStatefulOp()
-
-    # Test that the `is_stateful` bit is propagated.
-    self.assertFalse(FunctionWithStatelessOp.definition.signature.is_stateful)
-    self.assertTrue(FunctionWithStatefulOp.definition.signature.is_stateful)
-    self.assertFalse(
-        FunctionWithStatelessFunctionCall.definition.signature.is_stateful)
-    self.assertTrue(
-        FunctionWithStatefulFunctionCall.definition.signature.is_stateful)
-
-    # Ensure that two invocations of the same random-number-generating
-    # function produce different results.
-    result1 = FunctionWithStatefulFunctionCall()
-    result2 = FunctionWithStatefulFunctionCall()
-
-    # Statefulness affects how the function is treated by the various
-    # optimization passes, so run the test in each optimizer
-    # configuration.
-    for config in _OptimizerOptions():
-      with session.Session(config=config) as sess:
-        val1, val2 = sess.run((result1, result2))
-        self.assertFalse(all(val1 == val2))
-        val3, val4 = sess.run((result1, result2))
-        self.assertFalse(all(val3 == val1))
-        self.assertFalse(all(val4 == val2))
-
 
 @test_util.with_c_api
 class FunctionsFromProtos(test.TestCase):
diff --git a/tensorflow/python/framework/graph_to_function_def.py b/tensorflow/python/framework/graph_to_function_def.py
index 625f31146b..448f87aa6e 100644
--- a/tensorflow/python/framework/graph_to_function_def.py
+++ b/tensorflow/python/framework/graph_to_function_def.py
@@ -110,13 +110,6 @@ def _add_op_node(op, func, input_dict):
                                                (node_def.input[i],
                                                 input_dict.items()))
       node_def.input[i] = input_dict[node_def.input[i]]
-  # The function is stateful if any of its operations are stateful.
-  # NOTE(mrry): The "Const" node typically does not have an `OpDef` associated
-  # with it, so we assume any nodes without an `OpDef` are stateless.
-  # TODO(skyewm): Remove the `is not None` test after we transition to the C
-  # API.
-  if op.op_def is not None and op.op_def.is_stateful:
-    func.signature.is_stateful = True
 
 
 def graph_to_function_def(graph, operations, inputs, outputs, out_names=None):
-- 
GitLab


From 8693bf519399495cedd91293ec82b492ea401f6f Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Tue, 12 Dec 2017 08:31:48 -0800
Subject: [PATCH 0918/1225] This CL makes two improvements to the
 `map_and_batch` transformation:

1) It fixes a bug that manifested as `OutOfRange` being returned prematurely.

2) It changes the behavior on sequences of elements whose size is not a multiple of `batch_size`. Previously, the implementation would drop the last small batch (similar to `batch_and_drop_remainder). Newly, the implementation returns the last small batch (similar to `batch`).

PiperOrigin-RevId: 178764508
---
 .../kernel_tests/batch_dataset_op_test.py     | 12 +--
 .../contrib/data/python/ops/batching.py       | 17 ++---
 .../core/kernels/map_and_batch_dataset_op.cc  | 73 +++++++++++++++----
 3 files changed, 72 insertions(+), 30 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
index d975a0167f..506eefbef0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
@@ -630,7 +630,11 @@ class BatchDatasetTest(test.TestCase):
           for j in range(8):
             self.assertAllEqual(component[(i * 8 + j) % 7]**2,
                                 result_component[j])
-      # The last batch should fail with `OutOfRange`.
+      result = sess.run(get_next)
+      for component, result_component in zip(components, result):
+        for j in range((14 * 7) % 8):
+          self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2,
+                              result_component[j])
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
@@ -647,11 +651,7 @@ class BatchDatasetTest(test.TestCase):
     return self._testBatchAndMapDatasetHelper()
 
   def testBatchAndMapDatasetWithParallelBatching(self):
-    # TODO(b/70299909): This test surfaces a bug in the `map_and_batch`
-    # transformation, which manifests as premature EOF. Fix it.
-    #
-    # return self._testBatchAndMapDatasetHelper(num_parallel_batches=10)
-    pass
+    return self._testBatchAndMapDatasetHelper(num_parallel_batches=10)
 
   def testMapAndBatchSparse(self):
 
diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py
index 63782d229e..e8b2d44a8b 100644
--- a/tensorflow/contrib/data/python/ops/batching.py
+++ b/tensorflow/contrib/data/python/ops/batching.py
@@ -390,17 +390,12 @@ def map_and_batch(map_func, batch_size, num_parallel_batches=1):
   """Fused implementation of `map` and `batch`.
 
   Maps `map_func` across `batch_size` consecutive elements of this dataset
-  and then combines them into a batch. Similarly to `batch_and_drop_remainder`,
-  if the batch size does not evenly divide the input dataset size, this
-  transformation will drop the final smaller element.
-
-
-  Functionally, it is equivalent to `map` followed by
-  `batch_and_drop_remainder`. However, by fusing the two transformations
-  together, the implementation can be more efficient. This transformation is a
-  stop gap solution for performance critical workloads. Once automatic input
-  pipeline optimization are implemented, the fusing of map and batch will not
-  need to be exposed at the API level and this method will be removed.
+  and then combines them into a batch. Functionally, it is equivalent to `map`
+  followed by `batch`. However, by fusing the two transformations together, the
+  implementation can be more efficient. Surfacing this transformation in the API
+  is temporary. Once automatic input pipeline optimization is implemented,
+  the fusing of `map` and `batch` will happen automatically and this API will be
+  deprecated.
 
   Args:
     map_func: A function mapping a nested structure of tensors to another
diff --git a/tensorflow/core/kernels/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/map_and_batch_dataset_op.cc
index 65a3ee2a64..7cd9ab60ab 100644
--- a/tensorflow/core/kernels/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/map_and_batch_dataset_op.cc
@@ -132,7 +132,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         if (current_batch_index_ != -1) {
           for (size_t batch_index = 0;
                batch_index < dataset()->num_parallel_batches_; ++batch_index) {
-            WaitForBatch(batch_index).IgnoreError();
+            int64 num_elements;
+            WaitForBatch(batch_index, &num_elements).IgnoreError();
             // Deallocate tensors allocated for the output.
             batch_results_[batch_index].output.clear();
           }
@@ -166,17 +167,35 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           }
         }
 
-        if (end_of_input_) {
+        int64 num_elements = 0;
+        Status status = WaitForBatch(current_batch_index_, &num_elements);
+        if (num_elements == 0) {
           *end_of_sequence = true;
           return Status::OK();
         }
-
-        Status status = WaitForBatch(current_batch_index_);
         if (!status.ok()) {
           // Deallocate tensors allocated for the output.
           batch_results_[current_batch_index_].output.clear();
         } else {
-          *out_tensors = std::move(batch_results_[current_batch_index_].output);
+          if (num_elements < dataset()->batch_size_) {
+            const std::vector<Tensor>& output =
+                batch_results_[current_batch_index_].output;
+            for (size_t i = 0; i < output.size(); ++i) {
+              TensorShape component_shape(
+                  batch_results_[current_batch_index_].output[i].shape());
+              component_shape.set_dim(0, num_elements);
+              Tensor component(cpu_allocator(), output[i].dtype(),
+                               component_shape);
+              TF_RETURN_IF_ERROR(
+                  CopyPartialBatch(&component, output[i], num_elements));
+              out_tensors->emplace_back(std::move(component));
+            }
+            // Deallocate tensors allocated for the output.
+            batch_results_[current_batch_index_].output.clear();
+          } else {
+            *out_tensors =
+                std::move(batch_results_[current_batch_index_].output);
+          }
           *end_of_sequence = false;
         }
         StartInvocationBatch(ctx, current_batch_index_);
@@ -195,6 +214,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       struct InvocationResult {
         Status status;
+        bool end_of_input;
         std::vector<Tensor> return_values;
       };
 
@@ -202,6 +222,29 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         return batch_index * dataset()->batch_size_ + offset;
       }
 
+      Status CopyPartialBatch(Tensor* output, const Tensor& value,
+                              int64 num_elements) {
+        switch (value.dtype()) {
+#define CASE(type)                                                \
+  case DataTypeToEnum<type>::value: {                             \
+    auto output_t = output->flat_outer_dims<type>();              \
+    auto value_t = value.flat_outer_dims<type>();                 \
+    for (size_t i = 0; i < num_elements; i++) {                   \
+      output_t.template chip<0>(i) = value_t.template chip<0>(i); \
+    }                                                             \
+    return Status::OK();                                          \
+  }
+          TF_CALL_NUMBER_TYPES(CASE);
+          TF_CALL_string(CASE);
+          TF_CALL_variant(CASE);
+#undef CASE
+          default:
+            return errors::InvalidArgument("Unsupported data type: ",
+                                           value.dtype());
+        }
+        return Status::OK();
+      }
+
       void EnsureOutputAllocated(BatchResult* batch_result,
                                  const std::vector<Tensor>& return_values) {
         mutex_lock l(batch_result->mu);
@@ -228,8 +271,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         // Get the next input element.
         std::vector<Tensor> input_element;
         result->status =
-            input_impl_->GetNext(ctx, &input_element, &end_of_input_);
-        if (end_of_input_ || !result->status.ok()) {
+            input_impl_->GetNext(ctx, &input_element, &result->end_of_input);
+        if (result->end_of_input || !result->status.ok()) {
           batch_result->counter->DecrementCount();
           return;
         }
@@ -316,9 +359,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         for (size_t i = 0; i < dataset()->batch_size_; ++i) {
           size_t index = ComputeInvocationIndex(batch_index, i);
           InvocationResult* result = &invocation_results_[index];
-          // Reset the state of `result`.
-          // NOTE(mrry): `result->return_values` were cleared when the previous
-          // invocation completed.
+          // Reset the state of `result`; `result->return_values` was cleared
+          // when the previous invocation completed.
+          result->end_of_input = false;
           result->status = Status::OK();
         }
         // Start individual invocations.
@@ -327,13 +370,18 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         }
       }
 
-      Status WaitForBatch(int64 batch_index) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      Status WaitForBatch(int64 batch_index, int64* num_elements)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         port::Tracing::TraceMe activity(strings::StrCat(prefix(), "::Wait"));
         batch_results_[batch_index].counter->Wait();
         Status status = Status::OK();
-        for (size_t i = 0; i < dataset()->batch_size_; ++i) {
+        for (size_t i = 0; i < dataset()->batch_size_; ++i, ++*num_elements) {
           size_t index = ComputeInvocationIndex(batch_index, i);
           InvocationResult* result = &invocation_results_[index];
+          if (result->end_of_input) {
+            VLOG(3) << "end of input encountered at element[" << i << "]: ";
+            return Status::OK();
+          }
           if (!result->status.ok()) {
             VLOG(3) << "failed to process element[" << i
                     << "]: " << result->status;
@@ -348,7 +396,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       const std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       std::vector<InvocationResult> invocation_results_ GUARDED_BY(mu_);
       std::vector<BatchResult> batch_results_ GUARDED_BY(mu_);
-      bool end_of_input_ GUARDED_BY(mu_) = false;
     };
 
     const DatasetBase* const input_;
-- 
GitLab


From 0e1c5577da2c9f4f8bdf3346a12a739e3752db03 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 09:01:37 -0800
Subject: [PATCH 0919/1225] Disable neutral element and reciprocal
 optimizations again.

PiperOrigin-RevId: 178767676
---
 tensorflow/core/grappler/optimizers/constant_folding.cc | 5 +++--
 tensorflow/python/grappler/cluster_test.py              | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 45ba8d01e6..3658594edd 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1346,7 +1346,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
     const bool is_any_div = IsAnyDiv(*node);
     // Simplify multiplication by ones or zeros, and addition/subtraction of
     // zeros.
-    if (use_shape_info &&
+    if (is_aggressive && use_shape_info &&
         (is_mul || is_matmul || is_add || is_sub || is_any_div) &&
         properties.HasInputProperties(node->name()) &&
         properties.HasOutputProperties(node->name())) {
@@ -1422,7 +1422,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
     // Strength reduce floating point division by a constant Div(x, const) to
     // multiplication by the reciprocal Mul(x, Reciprocal(const)). This in turn
     // will be constant folded to Mul(x, 1.0/const).
-    if (node->input_size() >= 2 && (IsRealDiv(*node) || IsDiv(*node))) {
+    if (is_aggressive && node->input_size() >= 2 &&
+        (IsRealDiv(*node) || IsDiv(*node))) {
       const string& const_input = node->input(1);
       const NodeDef* denom = node_map_->GetNode(const_input);
       CHECK(denom != nullptr);
diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py
index 3ddcb741b5..77dd55981b 100644
--- a/tensorflow/python/grappler/cluster_test.py
+++ b/tensorflow/python/grappler/cluster_test.py
@@ -43,7 +43,7 @@ class ClusterTest(test.TestCase):
       op_perfs, run_time, step_stats = grappler_cluster.MeasureCosts(
           grappler_item)
       self.assertTrue(run_time > 0)
-      self.assertEqual(len(op_perfs), 9)
+      self.assertEqual(len(op_perfs), 10)
       self.assertTrue(step_stats.dev_stats)
 
   def testNoDetailedStats(self):
@@ -120,7 +120,7 @@ class ClusterTest(test.TestCase):
         disable_detailed_stats=False, disable_timeline=False) as gcluster:
       op_perfs, run_time, step_stats = gcluster.MeasureCosts(grappler_item)
       self.assertTrue(run_time > 0)
-      self.assertEqual(len(op_perfs), 9)
+      self.assertEqual(len(op_perfs), 10)
       self.assertTrue(step_stats.dev_stats)
 
 
-- 
GitLab


From c8a5ffdeb2a17df2d2822c7a6df8a44f3ab85614 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 09:19:42 -0800
Subject: [PATCH 0920/1225] Integrate tensor pool feature to `gan_loss`
 function.

PiperOrigin-RevId: 178769850
---
 tensorflow/contrib/gan/BUILD                  |  1 +
 .../python/random_tensor_pool_impl.py         |  5 +-
 tensorflow/contrib/gan/python/train.py        | 56 ++++++++++++++++++-
 tensorflow/contrib/gan/python/train_test.py   | 46 +++++++++++++++
 4 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD
index a2e6fa51f1..fa3f42d6af 100644
--- a/tensorflow/contrib/gan/BUILD
+++ b/tensorflow/contrib/gan/BUILD
@@ -56,6 +56,7 @@ py_test(
     srcs = ["python/train_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":features",
         ":namedtuples",
         ":train",
         "//tensorflow/contrib/framework:framework_py",
diff --git a/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py
index 9d733b6ff9..9d10db0f5a 100644
--- a/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py
+++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py
@@ -49,7 +49,7 @@ def _to_tuple(x):
 
 
 def tensor_pool(input_values,
-                pool_size,
+                pool_size=50,
                 pooling_probability=0.5,
                 name='tensor_pool'):
   """Queue storing input values and returning random previously stored ones.
@@ -65,7 +65,8 @@ def tensor_pool(input_values,
   Args:
     input_values: A `Tensor`, or a list or tuple of `Tensor`s from which to read
       values to be pooled.
-    pool_size: An integer specifying the maximum size of the pool.
+    pool_size: An integer specifying the maximum size of the pool. Defaults to
+      50.
     pooling_probability: A float `Tensor` specifying the probability of getting
       a value from the pool, as opposed to just the current input.
     name: A string prefix for the name scope for all tensorflow ops.
diff --git a/tensorflow/contrib/gan/python/train.py b/tensorflow/contrib/gan/python/train.py
index 27c1a22451..edd0113977 100644
--- a/tensorflow/contrib/gan/python/train.py
+++ b/tensorflow/contrib/gan/python/train.py
@@ -327,6 +327,53 @@ def _use_aux_loss(aux_loss_weight):
     return False
 
 
+def _tensor_pool_adjusted_model(model, tensor_pool_fn):
+  """Adjusts model using `tensor_pool_fn`.
+
+  Args:
+    model: A GANModel tuple.
+    tensor_pool_fn: A function that takes (generated_data, generator_inputs),
+      stores them in an internal pool and returns a previously stored
+      (generated_data, generator_inputs) with some probability. For example
+      tfgan.features.tensor_pool.
+
+  Returns:
+    A new GANModel tuple where discriminator outputs are adjusted by taking
+    pooled generator outputs as inputs. Returns the original model if
+    `tensor_pool_fn` is None.
+
+  Raises:
+    ValueError: If tensor pool does not suport the `model`.
+  """
+  if tensor_pool_fn is None:
+    return model
+
+  pooled_generated_data, pooled_generator_inputs = tensor_pool_fn(
+      (model.generated_data, model.generator_inputs))
+
+  if isinstance(model, namedtuples.GANModel):
+    dis_gen_outputs = model.discriminator_fn(pooled_generated_data,
+                                             pooled_generator_inputs)
+    return model._replace(discriminator_gen_outputs=dis_gen_outputs)
+  elif isinstance(model, namedtuples.ACGANModel):
+    (dis_pooled_gen_outputs,
+     dis_pooled_gen_classification_logits) = model.discriminator_fn(
+         pooled_generated_data, pooled_generator_inputs)
+    return model._replace(
+        discriminator_gen_outputs=dis_pooled_gen_outputs,
+        discriminator_gen_classification_logits=
+        dis_pooled_gen_classification_logits)
+  elif isinstance(model, namedtuples.InfoGANModel):
+    (dis_pooled_gen_outputs,
+     pooled_predicted_distributions) = model.discriminator_and_aux_fn(
+         pooled_generated_data, pooled_generator_inputs)
+    return model._replace(
+        discriminator_gen_outputs=dis_pooled_gen_outputs,
+        predicted_distributions=pooled_predicted_distributions)
+  else:
+    raise ValueError('Tensor pool does not support `model`: %s.' % type(model))
+
+
 def gan_loss(
     # GANModel.
     model,
@@ -339,6 +386,7 @@ def gan_loss(
     mutual_information_penalty_weight=None,
     aux_cond_generator_weight=None,
     aux_cond_discriminator_weight=None,
+    tensor_pool_fn=None,
     # Options.
     add_summaries=True):
   """Returns losses necessary to train generator and discriminator.
@@ -364,6 +412,10 @@ def gan_loss(
       https://arxiv.org/abs/1610.09585
     aux_cond_discriminator_weight: If not None: add a classification loss as in
       https://arxiv.org/abs/1610.09585
+    tensor_pool_fn: A function that takes (generated_data, generator_inputs),
+      stores them in an internal pool and returns previous stored
+      (generated_data, generator_inputs). For example
+      `tf.gan.features.tensor_pool`. Defaults to None (not using tensor pool).
     add_summaries: Whether or not to add summaries for the losses.
 
   Returns:
@@ -403,7 +455,9 @@ def gan_loss(
 
   # Create standard losses.
   gen_loss = generator_loss_fn(model, add_summaries=add_summaries)
-  dis_loss = discriminator_loss_fn(model, add_summaries=add_summaries)
+  dis_loss = discriminator_loss_fn(
+      _tensor_pool_adjusted_model(model, tensor_pool_fn),
+      add_summaries=add_summaries)
 
   # Add optional extra losses.
   if _use_aux_loss(gradient_penalty_weight):
diff --git a/tensorflow/contrib/gan/python/train_test.py b/tensorflow/contrib/gan/python/train_test.py
index 4d4ede706c..519d101e07 100644
--- a/tensorflow/contrib/gan/python/train_test.py
+++ b/tensorflow/contrib/gan/python/train_test.py
@@ -23,6 +23,7 @@ import numpy as np
 from tensorflow.contrib.framework.python.ops import variables as variables_lib
 from tensorflow.contrib.gan.python import namedtuples
 from tensorflow.contrib.gan.python import train
+from tensorflow.contrib.gan.python.features.python import random_tensor_pool
 from tensorflow.contrib.slim.python.slim import learning as slim_learning
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -411,6 +412,51 @@ class GANLossTest(test.TestCase):
   def test_callable_acgan(self):
     self._test_acgan_helper(create_callable_acgan_model)
 
+  # Test tensor pool.
+  def _test_tensor_pool_helper(self, create_gan_model_fn):
+    model = create_gan_model_fn()
+    if isinstance(model, namedtuples.InfoGANModel):
+
+      def tensor_pool_fn_impl(input_values):
+        generated_data, generator_inputs = input_values
+        output_values = random_tensor_pool.tensor_pool(
+            [generated_data] + generator_inputs, pool_size=5)
+        return output_values[0], output_values[1:]
+
+      tensor_pool_fn = tensor_pool_fn_impl
+    else:
+
+      def tensor_pool_fn_impl(input_values):
+        return random_tensor_pool.tensor_pool(input_values, pool_size=5)
+
+      tensor_pool_fn = tensor_pool_fn_impl
+    loss = train.gan_loss(model, tensor_pool_fn=tensor_pool_fn)
+    self.assertTrue(isinstance(loss, namedtuples.GANLoss))
+
+    # Check values.
+    with self.test_session(use_gpu=True) as sess:
+      variables.global_variables_initializer().run()
+      for _ in range(10):
+        sess.run([loss.generator_loss, loss.discriminator_loss])
+
+  def test_tensor_pool_gan(self):
+    self._test_tensor_pool_helper(create_gan_model)
+
+  def test_tensor_pool_callable_gan(self):
+    self._test_tensor_pool_helper(create_callable_gan_model)
+
+  def test_tensor_pool_infogan(self):
+    self._test_tensor_pool_helper(create_infogan_model)
+
+  def test_tensor_pool_callable_infogan(self):
+    self._test_tensor_pool_helper(create_callable_infogan_model)
+
+  def test_tensor_pool_acgan(self):
+    self._test_tensor_pool_helper(create_acgan_model)
+
+  def test_tensor_pool_callable_acgan(self):
+    self._test_tensor_pool_helper(create_callable_acgan_model)
+
   def test_doesnt_crash_when_in_nested_scope(self):
     with variable_scope.variable_scope('outer_scope'):
       gan_model = train.gan_model(
-- 
GitLab


From 968da4bf2722b1303cc223e8342357d62c27dfc1 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Tue, 12 Dec 2017 10:58:31 -0800
Subject: [PATCH 0921/1225] Raise exception on missing unused input_map keys
 with C API enabled.

Without this change, the C++ ImportGraphDef API returns unused
input_map keys (which are plumbed through to the C API as
well). However, the Python import_graph_def API requires slightly
different semantics: it throws an error for unused input_map keys that
are missing from the GraphDef.

This change modifies the C and C++ APIs to limit the returned keys to
those missing from the GraphDef, and plumbs this through to the C
API-enabled import_graph_def implementation.

Note that this is a change to the existing C API. Luckily the modified
method hasn't been released yet, so it's ok to change it.

PiperOrigin-RevId: 178783957
---
 tensorflow/c/c_api.cc                         | 37 +++++-----
 tensorflow/c/c_api.h                          | 14 ++--
 tensorflow/c/c_api_internal.h                 |  8 +--
 tensorflow/c/c_api_test.cc                    |  4 +-
 tensorflow/core/graph/graph_constructor.cc    | 71 ++++++++++++-------
 tensorflow/core/graph/graph_constructor.h     |  7 +-
 .../core/graph/graph_constructor_test.cc      | 54 ++++++++++----
 tensorflow/python/BUILD                       |  1 +
 tensorflow/python/client/tf_session.i         | 17 +++++
 tensorflow/python/client/tf_session_helper.cc | 15 ++++
 tensorflow/python/client/tf_session_helper.h  |  4 ++
 tensorflow/python/framework/importer.py       | 12 +++-
 tensorflow/python/framework/importer_test.py  | 14 ++--
 13 files changed, 179 insertions(+), 79 deletions(-)

diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 13253ced49..6f5abd074c 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -1917,12 +1917,12 @@ void TF_ImportGraphDefResultsReturnOperations(TF_ImportGraphDefResults* results,
   *opers = results->return_nodes.data();
 }
 
-void TF_ImportGraphDefResultsUnusedInputMappings(
-    TF_ImportGraphDefResults* results, int* num_unused_input_mappings,
+void TF_ImportGraphDefResultsMissingUnusedInputMappings(
+    TF_ImportGraphDefResults* results, int* num_missing_unused_input_mappings,
     const char*** src_names, int** src_indexes) {
-  *num_unused_input_mappings = results->unused_key_names.size();
-  *src_names = results->unused_key_names.data();
-  *src_indexes = results->unused_key_indexes.data();
+  *num_missing_unused_input_mappings = results->missing_unused_key_names.size();
+  *src_names = results->missing_unused_key_names.data();
+  *src_indexes = results->missing_unused_key_indexes.data();
 }
 
 void TF_DeleteImportGraphDefResults(TF_ImportGraphDefResults* results) {
@@ -1962,18 +1962,21 @@ static void GraphImportGraphDefLocked(TF_Graph* graph, const GraphDef& def,
     tf_results->return_nodes[i] = ToOperation(results.return_nodes[i]);
   }
 
-  // Populate unused map keys
-  DCHECK(tf_results->unused_key_names.empty());
-  DCHECK(tf_results->unused_key_indexes.empty());
-  DCHECK(tf_results->unused_key_names_data.empty());
-  tf_results->unused_key_names.resize(results.unused_input_map_keys.size());
-  tf_results->unused_key_indexes.resize(results.unused_input_map_keys.size());
-  for (int i = 0; i < results.unused_input_map_keys.size(); ++i) {
-    TensorId id = results.unused_input_map_keys[i];
-    tf_results->unused_key_names_data.push_back(id.first.ToString());
-    tf_results->unused_key_names[i] =
-        tf_results->unused_key_names_data.back().c_str();
-    tf_results->unused_key_indexes[i] = id.second;
+  // Populate missing unused map keys
+  DCHECK(tf_results->missing_unused_key_names.empty());
+  DCHECK(tf_results->missing_unused_key_indexes.empty());
+  DCHECK(tf_results->missing_unused_key_names_data.empty());
+
+  size_t size = results.missing_unused_input_map_keys.size();
+  tf_results->missing_unused_key_names.resize(size);
+  tf_results->missing_unused_key_indexes.resize(size);
+
+  for (int i = 0; i < size; ++i) {
+    TensorId id = results.missing_unused_input_map_keys[i];
+    tf_results->missing_unused_key_names_data.push_back(id.first.ToString());
+    tf_results->missing_unused_key_names[i] =
+        tf_results->missing_unused_key_names_data.back().c_str();
+    tf_results->missing_unused_key_indexes[i] = id.second;
   }
 }
 
diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index df7fe222b1..de9527f86d 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -962,16 +962,16 @@ TF_CAPI_EXPORT extern void TF_ImportGraphDefResultsReturnOperations(
     TF_ImportGraphDefResults* results, int* num_opers, TF_Operation*** opers);
 
 // Fetches any input mappings requested via
-// TF_ImportGraphDefOptionsAddInputMapping() that weren't used as input to any
-// node in the imported graph def. The number of fetched mappings is returned in
-// `num_unused_input_mappings`. The array of each mapping's source node name is
-// returned in `src_names`, and the array of each mapping's source index is
-// returned in `src_indexes`.
+// TF_ImportGraphDefOptionsAddInputMapping() that didn't appear in the GraphDef
+// and weren't used as input to any node in the imported graph def. The number
+// of fetched mappings is returned in `num_missing_unused_input_mappings`. The
+// array of each mapping's source node name is returned in `src_names`, and the
+// array of each mapping's source index is returned in `src_indexes`.
 //
 // `*src_names`, `*src_indexes`, and the memory backing each string in
 // `src_names` are owned by and have the lifetime of `results`.
-TF_CAPI_EXPORT extern void TF_ImportGraphDefResultsUnusedInputMappings(
-    TF_ImportGraphDefResults* results, int* num_unused_input_mappings,
+TF_CAPI_EXPORT extern void TF_ImportGraphDefResultsMissingUnusedInputMappings(
+    TF_ImportGraphDefResults* results, int* num_missing_unused_input_mappings,
     const char*** src_names, int** src_indexes);
 
 // Deletes a results object returned by TF_GraphImportGraphDefWithResults().
diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h
index aac333d9e2..6df77a7f9b 100644
--- a/tensorflow/c/c_api_internal.h
+++ b/tensorflow/c/c_api_internal.h
@@ -143,11 +143,11 @@ struct TF_ImportGraphDefOptions {
 struct TF_ImportGraphDefResults {
   std::vector<TF_Output> return_tensors;
   std::vector<TF_Operation*> return_nodes;
-  std::vector<const char*> unused_key_names;
-  std::vector<int> unused_key_indexes;
+  std::vector<const char*> missing_unused_key_names;
+  std::vector<int> missing_unused_key_indexes;
 
-  // Backing memory for unused_key_names values.
-  std::list<tensorflow::string> unused_key_names_data;
+  // Backing memory for missing_unused_key_names values.
+  std::list<tensorflow::string> missing_unused_key_names_data;
 };
 
 struct TF_DeviceList {
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index 6ec1db8ccf..4e89b4fc43 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -773,7 +773,7 @@ TEST(CAPI, ImportGraphDef_WithReturnOutputs) {
   TF_DeleteStatus(s);
 }
 
-TEST(CAPI, ImportGraphDef_UnusedInputMappings) {
+TEST(CAPI, ImportGraphDef_MissingUnusedInputMappings) {
   TF_Status* s = TF_NewStatus();
   TF_Graph* graph = TF_NewGraph();
 
@@ -816,7 +816,7 @@ TEST(CAPI, ImportGraphDef_UnusedInputMappings) {
   int num_unused_input_mappings;
   const char** src_names;
   int* src_indexes;
-  TF_ImportGraphDefResultsUnusedInputMappings(
+  TF_ImportGraphDefResultsMissingUnusedInputMappings(
       results, &num_unused_input_mappings, &src_names, &src_indexes);
   ASSERT_EQ(1, num_unused_input_mappings);
   EXPECT_EQ(string("fake"), string(src_names[0]));
diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index 6e72d73918..e19f4aebba 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -113,20 +113,20 @@ class GraphConstructor {
   typedef gtl::ArraySlice<const NodeDef*> NodeDefSlice;
 
   // versions and library may be nullptr
-  static Status Construct(const Options& opts, NodeDefSlice node_defs,
-                          const VersionDef* versions,
-                          const FunctionDefLibrary* library, Graph* g,
-                          ShapeRefiner* refiner,
-                          std::vector<std::pair<Node*, int>>* return_tensors,
-                          std::vector<Node*>* return_nodes,
-                          std::vector<TensorId>* unused_input_map_keys) {
+  static Status Construct(
+      const Options& opts, NodeDefSlice node_defs, const VersionDef* versions,
+      const FunctionDefLibrary* library, Graph* g, ShapeRefiner* refiner,
+      std::vector<std::pair<Node*, int>>* return_tensors,
+      std::vector<Node*>* return_nodes,
+      std::vector<TensorId>* missing_unused_input_map_keys) {
     if (versions) {
       TF_RETURN_IF_ERROR(CheckVersions(*versions, TF_GRAPH_DEF_VERSION,
                                        TF_GRAPH_DEF_VERSION_MIN_PRODUCER,
                                        "GraphDef", "graph"));
     }
     GraphConstructor c(opts, node_defs, versions, library, g, refiner,
-                       return_tensors, return_nodes, unused_input_map_keys);
+                       return_tensors, return_nodes,
+                       missing_unused_input_map_keys);
     const Status s = c.TryImport();
     if (!s.ok()) c.Undo();
     return s;
@@ -139,7 +139,7 @@ class GraphConstructor {
                    ShapeRefiner* refiner,
                    std::vector<std::pair<Node*, int>>* return_tensors,
                    std::vector<Node*>* return_nodes,
-                   std::vector<TensorId>* unused_input_map_keys)
+                   std::vector<TensorId>* missing_unused_input_map_keys)
       : opts_(opts),
         node_defs_(node_defs),
         versions_(versions),
@@ -150,7 +150,7 @@ class GraphConstructor {
         refiner_(refiner),
         return_tensors_(return_tensors),
         return_nodes_(return_nodes),
-        unused_input_map_keys_(unused_input_map_keys) {}
+        missing_unused_input_map_keys_(missing_unused_input_map_keys) {}
 
   Status TryImport() {
     TF_RETURN_IF_ERROR(EnsureNoNameCollisions());
@@ -162,6 +162,7 @@ class GraphConstructor {
     TF_RETURN_IF_ERROR(UpdateVersionDef());
     TF_RETURN_IF_ERROR(PopulateReturnTensors());
     TF_RETURN_IF_ERROR(PopulateReturnNodes());
+    TF_RETURN_IF_ERROR(PopulateMissingUnusedInputMapKeys());
     UpdateUniquifiedColocationNames();
     FixupSourceAndSinkEdges(g_);
     return Status::OK();
@@ -176,6 +177,7 @@ class GraphConstructor {
   Status UpdateVersionDef();
   Status PopulateReturnTensors();
   Status PopulateReturnNodes();
+  Status PopulateMissingUnusedInputMapKeys();
 
   void Undo();
 
@@ -242,9 +244,10 @@ class GraphConstructor {
   std::vector<Node*>* return_nodes_;
 
   // May be null. Not owned.
-  std::vector<TensorId>* unused_input_map_keys_;
+  std::vector<TensorId>* missing_unused_input_map_keys_;
 
-  // Intermediate datastructure used to populate `unused_input_map_keys_`.
+  // Intermediate datastructure used to populate
+  // `missing_unused_input_map_keys_`.
   std::set<TensorId> used_input_map_keys_;
 
   // Mapping from node name to the index within node_defs_.
@@ -1024,15 +1027,6 @@ Status GraphConstructor::Convert() {
                                    " nodes in a cycle");
   }
 
-  // Update unused_input_map_keys_
-  if (unused_input_map_keys_ != nullptr) {
-    for (const auto& pair : opts_.input_map) {
-      if (used_input_map_keys_.find(pair.first) == used_input_map_keys_.end()) {
-        unused_input_map_keys_->push_back(pair.first);
-      }
-    }
-  }
-
   return Status::OK();
 }
 
@@ -1122,6 +1116,33 @@ Status GraphConstructor::PopulateReturnNodes() {
   return Status::OK();
 }
 
+Status GraphConstructor::PopulateMissingUnusedInputMapKeys() {
+  if (missing_unused_input_map_keys_ == nullptr) return Status::OK();
+  for (const auto& input_map_pair : opts_.input_map) {
+    TensorId key = input_map_pair.first;
+    if (used_input_map_keys_.count(key) > 0) continue;
+
+    auto pair = gdef_nodes_.find(key.first);
+    if (pair == gdef_nodes_.end()) {
+      // key's node doesn't exist in GraphDef
+      missing_unused_input_map_keys_->push_back(key);
+      continue;
+    }
+
+    // Check that key's index is in bounds. Get the number of outputs from the
+    // NodeDef, rather than the imported Node, since the Node may not exist if
+    // opts_.skip_mapped_nodes is true.
+    const NodeDef* node_def = node_defs_[pair->second.gdef_index];
+    const OpDef* op_def;
+    TF_RETURN_IF_ERROR(g_->op_registry()->LookUpOpDef(node_def->op(), &op_def));
+    if (key.second >= op_def->output_arg_size()) {
+      // key's index out of bounds
+      missing_unused_input_map_keys_->push_back(key);
+    }
+  }
+  return Status::OK();
+}
+
 void GraphConstructor::Undo() {
   for (const auto& iter : gdef_nodes_) {
     if (iter.second.node != nullptr) {
@@ -1153,7 +1174,7 @@ Status ConvertGraphDefToGraph(const GraphConstructorOptions& opts,
   return GraphConstructor::Construct(
       opts, gdef.node(), &gdef.versions(), &gdef.library(), g, &refiner,
       /*return_tensors=*/nullptr, /*return_nodes=*/nullptr,
-      /*unused_input_map_keys=*/nullptr);
+      /*missing_unused_input_map_keys=*/nullptr);
 }
 
 Status ConvertNodeDefsToGraph(const GraphConstructorOptions& opts,
@@ -1167,7 +1188,7 @@ Status ConvertNodeDefsToGraph(const GraphConstructorOptions& opts,
   return GraphConstructor::Construct(opts, node_defs, nullptr, nullptr, g,
                                      &refiner, /*return_tensors=*/nullptr,
                                      /*return_nodes=*/nullptr,
-                                     /*unused_input_map_keys=*/nullptr);
+                                     /*missing_unused_input_map_keys=*/nullptr);
 }
 
 Status ImportGraphDef(const ImportGraphDefOptions& opts, const GraphDef& gdef,
@@ -1196,7 +1217,7 @@ Status ImportGraphDef(const ImportGraphDefOptions& opts, const GraphDef& gdef,
 
   if (results != nullptr) {
     if (!results->return_tensors.empty() || !results->return_nodes.empty() ||
-        !results->unused_input_map_keys.empty()) {
+        !results->missing_unused_input_map_keys.empty()) {
       return errors::InvalidArgument(
           "All fields in results argument to ImportGraphDef() must be empty.");
     }
@@ -1239,7 +1260,7 @@ Status ImportGraphDef(const ImportGraphDefOptions& opts, const GraphDef& gdef,
     return GraphConstructor::Construct(
         opts, gdef.node(), &gdef.versions(), &gdef.library(), g, refiner,
         &results->return_tensors, &results->return_nodes,
-        &results->unused_input_map_keys);
+        &results->missing_unused_input_map_keys);
   }
 }
 
diff --git a/tensorflow/core/graph/graph_constructor.h b/tensorflow/core/graph/graph_constructor.h
index b4dd2ba51a..07814b2ef7 100644
--- a/tensorflow/core/graph/graph_constructor.h
+++ b/tensorflow/core/graph/graph_constructor.h
@@ -148,9 +148,10 @@ struct ImportGraphDefResults {
   // The requested nodes associated with ImportGraphDefOptions::return_nodes.
   std::vector<Node*> return_nodes;
 
-  // Keys in ImportGraphDefOptions::input_map that weren't used as an input to
-  // any node in`gdef`.
-  std::vector<TensorId> unused_input_map_keys;
+  // Keys in ImportGraphDefOptions::input_map that don't appear in `gdef` and
+  // weren't used as an input to any node in `gdef`. These keys are likely due
+  // to typos, and callers may wish to treat their existence as an error.
+  std::vector<TensorId> missing_unused_input_map_keys;
 };
 
 // Adds the graph in GraphDef `gdef` into an existing Graph `*g`.
diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc
index 9be3de2388..01bb1ac748 100644
--- a/tensorflow/core/graph/graph_constructor_test.cc
+++ b/tensorflow/core/graph/graph_constructor_test.cc
@@ -1433,7 +1433,7 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapDuplicateNodeNames) {
       &refiner);
 }
 
-TEST_F(GraphConstructorTest, ImportGraphDef_InputMapUnusedKeys) {
+TEST_F(GraphConstructorTest, ImportGraphDef_InputMapMissingUnusedKeys) {
   ShapeRefiner refiner(TF_GRAPH_DEF_VERSION, graph_.op_registry());
 
   // No input map
@@ -1443,10 +1443,10 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapUnusedKeys) {
       "node { name: 'W1' op: 'TestParams' }"
       "node { name: 'input' op: 'TestInput' }",
       opts, &refiner, &results);
-  EXPECT_TRUE(results.unused_input_map_keys.empty());
+  EXPECT_TRUE(results.missing_unused_input_map_keys.empty());
 
-  // Non-empty unused_input_map_keys
-  results.unused_input_map_keys.push_back(TensorId());
+  // Non-empty missing_unused_input_map_keys
+  results.missing_unused_input_map_keys.push_back(TensorId());
   ExpectError(
       "node { name: 'W2' op: 'TestParams' }", opts,
       {"All fields in results argument to ImportGraphDef() must be empty."},
@@ -1454,13 +1454,16 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapUnusedKeys) {
 
   // Input map with some used, some unused keys
   const int kControlSlot = Graph::kControlSlot;
-  results.unused_input_map_keys.clear();
+  results.missing_unused_input_map_keys.clear();
   opts.input_map[TensorId("W2", kControlSlot)] = TensorId("W1", kControlSlot);
   opts.input_map[TensorId("new_input", 0)] = TensorId("input", 0);
   opts.input_map[TensorId("new_input", 1)] = TensorId("input", 0);
-  opts.input_map[TensorId("new_input", kControlSlot)] =
-      TensorId("input", kControlSlot);
-  opts.input_map[TensorId("t1", 1)] = TensorId("input", 0);
+  // Unused and missing (nonexistent index)
+  opts.input_map[TensorId("new_input", 3)] = TensorId("input", 0);
+  // Unused and missing (nonexistent node)
+  opts.input_map[TensorId("DNE", 0)] = TensorId("input", 0);
+  // Unused but not missing
+  opts.input_map[TensorId("t1", 0)] = TensorId("W1", 0);
   ExpectOK(
       R"EOF(
       node { name: 'W2' op: 'TestParams' }
@@ -1470,9 +1473,36 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapUnusedKeys) {
       )EOF",
       opts, &refiner, &results);
 
-  std::vector<TensorId> expected_unused_keys = {
-      TensorId("new_input", kControlSlot), TensorId("t1", 1)};
-  EXPECT_EQ(results.unused_input_map_keys, expected_unused_keys);
+  std::set<TensorId> expected_unused_keys = {TensorId("new_input", 3),
+                                             TensorId("DNE", 0)};
+  ASSERT_EQ(results.missing_unused_input_map_keys.size(),
+            expected_unused_keys.size());
+
+  std::set<TensorId> actual_unused_keys(
+      results.missing_unused_input_map_keys.begin(),
+      results.missing_unused_input_map_keys.end());
+  EXPECT_EQ(actual_unused_keys, expected_unused_keys);
+
+  // Test edge case: node isn't imported due to skip_mapped_nodes, but we still
+  // have a bad input_map key involving it.
+  opts = ImportGraphDefOptions();
+  opts.input_map[TensorId("new_input", 0)] = TensorId("input", 0);
+  opts.input_map[TensorId("new_input", 1)] = TensorId("input", 1);
+  // Index out of bounds
+  opts.input_map[TensorId("new_input", 2)] = TensorId("input", 1);
+  opts.skip_mapped_nodes = true;
+  opts.prefix = "import";
+  results = ImportGraphDefResults();
+  ExpectOK(
+      R"EOF(
+      node { name: 'W2' op: 'TestParams' }
+      node { name: 'new_input' op: 'TestInput' input: [ '^W2' ] }
+      node { name: 't1' op: 'TestMul' input: [ 'new_input:0', 'new_input:1' ] }
+      )EOF",
+      opts, &refiner, &results);
+
+  ASSERT_EQ(results.missing_unused_input_map_keys.size(), 1);
+  EXPECT_EQ(results.missing_unused_input_map_keys[0], TensorId("new_input", 2));
 }
 
 TEST_F(GraphConstructorTest, ImportGraphDef_InputMapWithUnboundInput) {
@@ -1709,7 +1739,7 @@ TEST_F(GraphConstructorTest, ImportGraphDef_ReturnNodes) {
   // Check return tensors
   ASSERT_EQ(results.return_nodes.size(), 2);
   EXPECT_EQ(results.return_tensors.size(), 0);
-  EXPECT_EQ(results.unused_input_map_keys.size(), 0);
+  EXPECT_EQ(results.missing_unused_input_map_keys.size(), 0);
   EXPECT_EQ(results.return_nodes[0]->name(), "input");
   EXPECT_EQ(results.return_nodes[1]->name(), "t1");
 
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 3566a36ddd..20944d1678 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3018,6 +3018,7 @@ tf_cuda_library(
         "//tensorflow/core:direct_session",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//third_party/py/numpy:headers",
diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index f57c5d73bc..e424e19c77 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -183,6 +183,23 @@ tensorflow::TF_OperationOutputConsumers_wrapper {
   }
 }
 
+%ignore TF_ImportGraphDefResultsMissingUnusedInputMappings;
+%unignore TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper;
+// See comment for "%noexception TF_SessionRun_wrapper;"
+%noexception TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper;
+
+%typemap(out) std::vector<string>
+TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper{
+  $result = PyList_New($1.size());
+  if (!$result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
+  }
+  for (size_t i = 0; i < $1.size(); ++i) {
+    const string& input_str = $1[i];
+    PyList_SET_ITEM($result, i, PyBytes_FromStringAndSize(input_str.data(),
+                                                          input_str.size()));
+  }
+}
 
 ////////////////////////////////////////////////////////////////////////////////
 // BEGIN TYPEMAPS FOR tensorflow::TF_Run_wrapper()
diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index a00fade7ac..efe50dc247 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/equal_graph_def.h"
@@ -439,4 +440,18 @@ std::vector<int64_t> TF_GraphGetTensorShape_wrapper(TF_Graph* graph,
   return dims;
 }
 
+std::vector<string> TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper(
+    TF_ImportGraphDefResults* results) {
+  int num_missing_unused_input_mappings;
+  const char** src_names;
+  int* src_indexes;
+  TF_ImportGraphDefResultsMissingUnusedInputMappings(
+      results, &num_missing_unused_input_mappings, &src_names, &src_indexes);
+  std::vector<string> input_strs(num_missing_unused_input_mappings);
+  for (int i = 0; i < num_missing_unused_input_mappings; ++i) {
+    input_strs[i] = TensorId(src_names[i], src_indexes[i]).ToString();
+  }
+  return input_strs;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h
index 3a8506de4d..cdb68d2a23 100644
--- a/tensorflow/python/client/tf_session_helper.h
+++ b/tensorflow/python/client/tf_session_helper.h
@@ -187,6 +187,10 @@ std::vector<int64_t> TF_GraphGetTensorShape_wrapper(TF_Graph* graph,
                                                     int num_dims,
                                                     TF_Status* status);
 
+// Returns the string representations of the missing unused input mappings.
+std::vector<string> TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper(
+    TF_ImportGraphDefResults* results);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_
diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index 62765aff00..d74fb25bb3 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -478,7 +478,17 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
         f.add_to_graph(graph)
       # pylint: enable=protected-access
 
-    # TODO(skyewm): error if unused input map key
+    # Treat input mappings that don't appear in the graph as an error, because
+    # they are likely to be due to a typo.
+    missing_unused_input_keys = (
+        c_api.TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper(
+            results))
+    if missing_unused_input_keys:
+      missing_unused_input_keys = [compat.as_str(s)
+                                   for s in missing_unused_input_keys]
+      raise ValueError(
+          'Attempted to map inputs that were not found in graph_def: [%s]'
+          % ', '.join(missing_unused_input_keys))
 
     if return_elements is None:
       return None
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index 7bf13ba93d..0da651c607 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -570,20 +570,17 @@ class ImportGraphDefTest(test.TestCase):
             return_elements=["A:B:0"])
 
   def testMissingInputMap(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(
+          ValueError,
+          r"Attempted to map inputs that were not found in graph_def: \[B:0\]"):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'None' }
             """),
             input_map={"B:0": constant_op.constant(5.0)})
-      self.assertTrue("not found in graph_def: [B:0]" in str(e.exception))
 
   def testInputMapUnusedAsInput(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       # Mapping an unused node output should succeed.
       importer.import_graph_def(
@@ -593,13 +590,14 @@ class ImportGraphDefTest(test.TestCase):
           input_map={"A:0": constant_op.constant(5.0)})
 
       # Mapping a non-existent output of an existing node should fail.
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(
+          ValueError,
+          r"Attempted to map inputs that were not found in graph_def: \[A:2\]"):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             """),
             input_map={"A:2": constant_op.constant(5.0)})
-      self.assertTrue("not found in graph_def: [A:2]" in str(e.exception))
 
   def testInputMapTypeMismatch(self):
     if ops._USE_C_API:
-- 
GitLab


From 2bf344a6ab8f5465b6d5ed8a98bd7af0083dbe8b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 11:04:58 -0800
Subject: [PATCH 0922/1225] Associative operator optimization:

Push constants down add/mul to canonicalize chains and possibly create constant nodes at the bottom. Example:

      +                +             +
     / \              / \           / \
    c1   +     -->   x   +    -->  x c1+c2
        / \             / \
       c2  x           c2 c1

Small cleanup: Consolidate code for manipulating names of nodes added or modified during constant folding.

PiperOrigin-RevId: 178785218
---
 .../grappler/optimizers/constant_folding.cc   | 105 +++++++++++++++---
 .../grappler/optimizers/constant_folding.h    |   4 +
 .../optimizers/constant_folding_test.cc       |  77 +++++++++++++
 3 files changed, 171 insertions(+), 15 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 3658594edd..fa3039d3f6 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -224,6 +224,21 @@ Status ConvertShapeToConstant(const string& op, const DataType& type,
   return Status::OK();
 }
 
+// TODO(rmlarsen): Perhaps we should move this to the GraphOptimizer base class.
+bool ConstantFolding::OptimizedNodeExists(const NodeDef& node,
+                                          StringPiece suffix) const {
+  return node_map_->NodeExists(OptimizedNodeName(node, suffix));
+}
+
+string ConstantFolding::OptimizedNodeName(const NodeDef& node) const {
+  return OptimizedNodeName(node, "");
+}
+string ConstantFolding::OptimizedNodeName(const NodeDef& node,
+                                          StringPiece suffix) const {
+  return AddPrefixToNodeName(strings::StrCat(node.name(), suffix),
+                             kConstantFoldingConst);
+}
+
 bool ConstantFolding::IsReallyConstant(const NodeDef& node) const {
   if (!IsConstant(node)) {
     return false;
@@ -296,9 +311,8 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
               string node_name = ParseNodeName(output->input(k), &port);
               if (node_name == node.name() && port == j) {
                 // Create a const node as ShapeN's output if not already.
-                string const_name =
-                    AddPrefixToNodeName(strings::StrCat(node.name(), "-", j),
-                                        kConstantFoldingConst);
+                const string const_name =
+                    OptimizedNodeName(node, strings::StrCat("-", j));
                 if (node_map_->GetNode(const_name) == nullptr) {
                   NodeDef* added_node = graph_->add_node();
                   added_node->set_name(const_name);
@@ -439,8 +453,7 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs(
       // which case there would be no reduction.
       out[j] = nullptr;
     } else {
-      string const_name = AddPrefixToNodeName(
-          strings::StrCat(node.name(), "-", j), kConstantFoldingConst);
+      string const_name = OptimizedNodeName(node, strings::StrCat("-", j));
       out[j] = node_map_->GetNode(const_name);
       if (out[j] == nullptr) {
         out[j] = graph_->add_node();
@@ -541,9 +554,7 @@ Status ConstantFolding::MaterializeReductionIndices(
   }
   // We know it's a full reduction. We can generate the set of indices to
   // reduce.
-  string const_name =
-      AddPrefixToNodeName(strings::StrCat(node->name(), "-reduction_indices"),
-                          kConstantFoldingConst);
+  string const_name = OptimizedNodeName(*node, "-reduction_indices");
   if (node_map_->GetNode(const_name)) {
     return Status::OK();
   }
@@ -844,7 +855,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node,
   }
 
   for (size_t i = 0; i < output_tensors.size(); i++) {
-    string node_name = AddPrefixToNodeName(node.name(), kConstantFoldingConst);
+    string node_name = OptimizedNodeName(node, "");
     if (output_tensors.size() > 1) {
       node_name = strings::StrCat(node_name, "-", i);
     }
@@ -896,10 +907,8 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) {
         continue;
       }
 
-      string const_out_name =
-          AddPrefixToNodeName(node->name(), kConstantFoldingConst);
-      string const_index_name = AddPrefixToNodeName(
-          strings::StrCat(node->name(), "_index"), kConstantFoldingConst);
+      string const_out_name = OptimizedNodeName(*node);
+      string const_index_name = OptimizedNodeName(*node, "_index");
       if (node_map_->GetNode(const_out_name) ||
           node_map_->GetNode(const_index_name)) {
         // Intended name already exists.
@@ -1326,6 +1335,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       node->clear_attr();
       (*node->mutable_attr())["T"].set_type(output_type);
       *node->mutable_input(1) = AsControlDependency(node->input(1));
+      graph_modified_ = true;
       continue;
     }
     const bool safe_to_use_shapes =
@@ -1336,6 +1346,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       node->clear_attr();
       (*node->mutable_attr())["T"].set_type(output_type);
       *node->mutable_input(1) = AsControlDependency(node->input(1));
+      graph_modified_ = true;
       continue;
     }
 
@@ -1454,8 +1465,73 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       node_map_->AddOutput(NodeName(const_input), reciprocal_node->name());
       graph_modified_ = true;
     }
-  }
 
+    // Consider the transformation
+    //
+    //                      +                +       = parent
+    //                     / \              / \
+    //                  Const +    -- >    X   +     = children
+    //                       / \              / \
+    //                      X   Y          Const Y   = leaves
+    //
+    // where '+' denotes an associative and commutative operator like addition
+    // or multiplication. This optimization pushes constants down in the tree
+    // to canonicalize it. Moreoever, in cases where the child node has a
+    // constant input we will create a node that can be folded, e.g.
+    //
+    //    Add(C1, Add(C2, X)) -> Add(X, Add(C1, C2)) -> Add(X, C1 + C2)
+    //
+    // TODO(rmlarsen): Handle non-associative/non-commutative operators like
+    // subtraction and division, as well as mixed subtraction/addition,
+    // division/multiplication.
+    if (is_aggressive && (is_add || is_mul) &&
+        NumNonControlInputs(*node) == 2) {
+      NodeDef* left_child = node_map_->GetNode(node->input(0));
+      NodeDef* right_child = node_map_->GetNode(node->input(1));
+      // One child must be constant, and the other the same op as the parent.
+      if (node->op() != left_child->op() && node->op() != right_child->op()) {
+        continue;
+      }
+      const bool left_child_is_constant = IsReallyConstant(*left_child);
+      const bool right_child_is_constant = IsReallyConstant(*right_child);
+      if (!left_child_is_constant && !right_child_is_constant) {
+        continue;
+      }
+      if (node->device() != left_child->device() ||
+          node->device() != right_child->device()) {
+        continue;
+      }
+      NodeDef* child_node = left_child_is_constant ? right_child : left_child;
+      // Make sure that it is safe to change the value of the child node->
+      if (child_node->input_size() < 2 ||
+          NumNonControlOutputs(*child_node, *node_map_) > 1 || !has_fetch_ ||
+          nodes_to_preserve_.find(child_node->name()) !=
+              nodes_to_preserve_.end()) {
+        continue;
+      }
+
+      const int parent_const_input = left_child_is_constant ? 0 : 1;
+      const NodeDef* left_leaf = node_map_->GetNode(child_node->input(0));
+      const NodeDef* right_leaf = node_map_->GetNode(child_node->input(1));
+      const bool left_leaf_is_constant = IsReallyConstant(*left_leaf);
+      const bool right_leaf_is_constant = IsReallyConstant(*right_leaf);
+      if (left_leaf_is_constant && right_leaf_is_constant) {
+        // Child is already foldable, leave it alone.
+        continue;
+      }
+      int non_const_leaf_input = left_leaf_is_constant ? 1 : 0;
+
+      // Swap the constant child with a non-constant leaf node.
+      node_map_->UpdateInput(node->name(), node->input(parent_const_input),
+                             child_node->input(non_const_leaf_input));
+      node_map_->UpdateInput(child_node->name(),
+                             child_node->input(non_const_leaf_input),
+                             node->input(parent_const_input));
+      std::swap(*node->mutable_input(parent_const_input),
+                *child_node->mutable_input(non_const_leaf_input));
+      graph_modified_ = true;
+    }
+  }
   return Status::OK();
 }
 
@@ -1494,7 +1570,6 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
   TF_RETURN_IF_ERROR(FoldGraph(output));
   node_map_.reset(new NodeMap(output));
   TF_RETURN_IF_ERROR(SimplifyGraph(output, properties, can_use_shape_info));
-
   return Status::OK();
 }
 
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index db281dc98d..87f275c1c0 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h
@@ -51,6 +51,10 @@ class ConstantFolding : public GraphOptimizer {
                 const GraphDef& optimize_output, double result) override;
 
  private:
+  string OptimizedNodeName(const NodeDef& node, StringPiece suffix) const;
+  string OptimizedNodeName(const NodeDef& node) const;
+  bool OptimizedNodeExists(const NodeDef& node, StringPiece suffix) const;
+
   bool IsReallyConstant(const NodeDef& node) const;
 
   Status MaterializeShapes(const GraphProperties& properties);
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index 813d0cdcb0..31e52c7a4e 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -77,6 +77,83 @@ TEST_F(ConstantFoldingTest, SimpleFolding) {
   test::ExpectTensorEqual<float>(tensors_expected[0], tensors[0]);
 }
 
+TEST_F(ConstantFoldingTest, AddTree) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+
+  Output c1 = ops::Const(s.WithOpName("c1"), 2.0f, {1});
+  Output c2 = ops::Const(s.WithOpName("c2"), 2.0f, {2});
+  Output c4 = ops::Const(s.WithOpName("c4"), 4.0f, {2});
+  Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT,
+                              ops::Placeholder::Shape(TensorShape({2, 2})));
+  Output add_child = ops::Add(s.WithOpName("add_child"), c2, x);
+  Output add_parent = ops::Add(s.WithOpName("add_parent"), c1, add_child);
+  Output mul_child = ops::Mul(s.WithOpName("mul_child"), c2, x);
+  Output mul_parent = ops::Mul(s.WithOpName("mul_parent"), c1, mul_child);
+  Output addmul_child = ops::Add(s.WithOpName("addmul_child"), c2, x);
+  Output addmul_parent =
+      ops::Mul(s.WithOpName("addmul_parent"), c1, addmul_child);
+
+  GrapplerItem item;
+  item.fetch = {"add_parent", "mul_parent", "addmul_parent"};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */);
+  GraphDef output;
+  Status status = fold.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  LOG(INFO) << "Final results =\n" << output.DebugString();
+
+  EXPECT_EQ(9, output.node_size());
+
+  // We expect the following rewrite(s) to occur (for both Add and Mul):
+  //    +                +             +
+  //   / \              / \           / \
+  // 2.0   +     -->   x   +    -->  x  4.0
+  //      / \             / \
+  //    2.0  x          2.0 2.0
+
+  for (const auto& node : output.node()) {
+    if (node.name() == "add_child") {
+      EXPECT_EQ("Const", node.op());
+      TensorProto t = node.attr().at("value").tensor();
+      EXPECT_EQ(1, t.tensor_shape().dim_size());
+      EXPECT_EQ(2, t.tensor_shape().dim(0).size());
+    } else if (node.name() == "add_parent") {
+      EXPECT_EQ("Add", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("x", node.input(0));
+      EXPECT_EQ("add_child", node.input(1));
+    } else if (node.name() == "mul_child") {
+      EXPECT_EQ("Const", node.op());
+      TensorProto t = node.attr().at("value").tensor();
+      EXPECT_EQ(1, t.tensor_shape().dim_size());
+      EXPECT_EQ(2, t.tensor_shape().dim(0).size());
+    } else if (node.name() == "mul_parent") {
+      EXPECT_EQ("Mul", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("x", node.input(0));
+      EXPECT_EQ("mul_child", node.input(1));
+    } else if (node.name() == "addmul_child") {
+      // Unchanged.
+      EXPECT_EQ("Add", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("c2", node.input(0));
+      EXPECT_EQ("x", node.input(1));
+    }
+  }
+
+  // Check that the reciprocals have the expected value.
+  std::vector<string> fetch = {"c4"};
+  auto tensor_expected = EvaluateNodes(item.graph, fetch);
+  EXPECT_EQ(fetch.size(), tensor_expected.size());
+  fetch = {"add_child", "mul_child"};
+  auto tensors = EvaluateNodes(output, fetch);
+  EXPECT_EQ(fetch.size(), tensors.size());
+  for (int i = 0; i < fetch.size(); i++) {
+    test::ExpectTensorEqual<float>(tensor_expected[0], tensors[i]);
+  }
+}
+
 TEST_F(ConstantFoldingTest, NeutralElement) {
   for (bool use_const : {true, false}) {
     tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-- 
GitLab


From a7c11aa3cafea286947a8131523fe1c6dc0b7174 Mon Sep 17 00:00:00 2001
From: Olivia Nordquist <nolivia@google.com>
Date: Tue, 12 Dec 2017 11:17:00 -0800
Subject: [PATCH 0923/1225] disabling flaky test

PiperOrigin-RevId: 178787158
---
 tensorflow/compiler/tests/BUILD | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index a251dd1a3c..4f458ecff8 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -380,7 +380,10 @@ tf_xla_py_test(
     size = "small",
     srcs = ["random_ops_test.py"],
     # TODO(b/31361304): enable RNG ops on GPU when parallelized.
-    disabled_backends = ["gpu"],
+    disabled_backends = [
+        "gpu",
+        "cpu",
+    ],
     tags = [
         "manual",
         "no_oss",
-- 
GitLab


From 4e1a7a74b61aa02bc9c3104706afb2153faefddf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 11:18:28 -0800
Subject: [PATCH 0924/1225] Add CompositeNodeManager for Grappler
 VirtualScheduler.

CompositeNodeManager has per-device LIFO manager, FirstReadyManagers for _Send
and _Recv ops, and chooses FirstReady among the ops from per-device LIFOManager
and _Send and _Recv FirstReadyManagers.

This one can maximizes producer-consumer locality within a device (with LIFO),
but does not introduce previously reported scheduling inefficiency w.r.t.
multi-device execution with separately managing _Send and _Recv ops and global
FirstReady policy across devices.

It's implemented, but not enabled; VirtualScheduler still uses
FirstReadyManager.

PiperOrigin-RevId: 178787352
---
 .../core/grappler/costs/virtual_scheduler.cc  | 148 +++++++++++++
 .../core/grappler/costs/virtual_scheduler.h   | 104 ++++------
 .../grappler/costs/virtual_scheduler_test.cc  | 196 ++++++++++++++----
 3 files changed, 350 insertions(+), 98 deletions(-)

diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 1554aeb3c0..1e3da6f525 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -91,6 +91,152 @@ struct RecvNodeDescriptorEqual {
 };
 }  // namespace
 
+// ReadyNodeManager
+const NodeDef* LIFOManager::GetCurrNode() {
+  CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node";
+  if (curr_pos_ == nodes_.end()) {
+    curr_pos_ = --(nodes_.rbegin().base());  // Last one in the list.
+  }
+  // Once curr_pos_ is set to a valid entry in the list, we keep using the
+  // cached curr_pos_ until RemoveCurrNode() is called. AddNode() will not
+  // change the GetCurrNode() return value.
+  return *curr_pos_;
+}
+
+void LIFOManager::RemoveCurrNode() {
+  // Make sure we have curr_pos_ ready to be removed.
+  GetCurrNode();
+  // Note curr_pos_ may not be pointing the last element if some nodes are
+  // added.
+  nodes_.erase(curr_pos_);
+
+  curr_pos_ = nodes_.end();  // Reset curr_pos_.
+}
+
+FirstReadyManager::FirstReadyManager(
+    const std::unordered_map<const NodeDef*, NodeState>* node_state)
+    : ReadyNodeManager(), node_state_(node_state) {
+  std::make_heap(nodes_.begin(), nodes_.end());
+  greater_ = [this](const NodeDef* a, const NodeDef* b) -> bool {
+    // Note: we need a node with minimum time_ready, not
+    // maximum; hence, using a > b for comparison function.
+    return node_state_->at(a).time_ready > node_state_->at(b).time_ready;
+  };
+}
+
+const NodeDef* FirstReadyManager::GetCurrNode() {
+  if (nodes_.empty()) {
+    // Nothing in the node_; probably, the very first call. Move
+    // waiting_queue_ to node_.
+    DrainWaitingQueue();
+    CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node";
+  }
+  return nodes_.front();
+}
+
+void FirstReadyManager::RemoveCurrNode() {
+  if (nodes_.empty()) {
+    // Make sure that there is a node to be removed at the front of nodes_.
+    GetCurrNode();
+  }
+  std::pop_heap(nodes_.begin(), nodes_.end(), greater_);
+  nodes_.pop_back();
+  DrainWaitingQueue();
+}
+
+bool FirstReadyManager::Empty() const {
+  return nodes_.empty() && waiting_queue_.empty();
+}
+
+void FirstReadyManager::DrainWaitingQueue() {
+  for (const auto* node : waiting_queue_) {
+    // push_heap in AddNode() and pop_heap in RemoveCurrNode() guarantees that
+    // the first element is the node with minimum time_ready.
+    nodes_.push_back(node);
+    std::push_heap(nodes_.begin(), nodes_.end(), greater_);
+  }
+  waiting_queue_.clear();
+}
+
+CompositeNodeManager::CompositeNodeManager(
+    const std::unordered_map<const NodeDef*, NodeState>* node_state)
+    : ReadyNodeManager(),
+      send_manager_(node_state),
+      recv_manager_(node_state),
+      node_state_(node_state) {
+  curr_node_ = nullptr;
+}
+
+void CompositeNodeManager::AddNode(const NodeDef* node) {
+  if (IsSend(*node)) {
+    send_manager_.AddNode(node);
+  } else if (IsRecv(*node)) {
+    recv_manager_.AddNode(node);
+  } else {
+    const auto& device = node_state_->at(node).device_name;
+    ops_lifo_map_[device].AddNode(node);
+  }
+}
+
+const NodeDef* CompositeNodeManager::GetCurrNode() {
+  if (curr_node_) return curr_node_;
+
+  // Locally (normal ops, not _Send / _Recv) LIFO,
+  // Globally (among the LIFO-selected ops from each device and _Send and
+  // _Recv) FirstReady.
+  std::vector<std::pair<const NodeDef*, Costs::Duration>> candidates;
+  for (auto& ops_lifo : ops_lifo_map_) {
+    if (!ops_lifo.second.Empty()) {
+      const auto* op = ops_lifo.second.GetCurrNode();
+      candidates.emplace_back(op, node_state_->at(op).time_ready);
+    }
+  }
+  if (!send_manager_.Empty()) {
+    const auto* send = send_manager_.GetCurrNode();
+    candidates.emplace_back(send, node_state_->at(send).time_ready);
+  }
+  if (!recv_manager_.Empty()) {
+    const auto* recv = recv_manager_.GetCurrNode();
+    candidates.emplace_back(recv, node_state_->at(recv).time_ready);
+  }
+  CHECK(!candidates.empty());
+  auto first_ready =
+      std::min_element(candidates.begin(), candidates.end(),
+                       [](const std::pair<const NodeDef*, Costs::Duration>& a,
+                          const std::pair<const NodeDef*, Costs::Duration>& b) {
+                         return a.second < b.second;
+                       });
+  // Next time we call GetCurrNode(), it just returns the cached one,
+  // curr_node_ until we call RemovCurrNode().
+  curr_node_ = first_ready->first;
+
+  return curr_node_;
+}
+
+void CompositeNodeManager::RemoveCurrNode() {
+  const auto* node = GetCurrNode();
+  if (IsSend(*node)) {
+    send_manager_.RemoveCurrNode();
+  } else if (IsRecv(*node)) {
+    recv_manager_.RemoveCurrNode();
+  } else {
+    const auto device = node_state_->at(node).device_name;
+    ops_lifo_map_[device].RemoveCurrNode();
+  }
+  // Reset curr_node_ so that GetCurrNode() finds another node.
+  curr_node_ = nullptr;
+}
+
+bool CompositeNodeManager::Empty() const {
+  // Empty if all the ready managers are empty.
+  bool empty = true;
+  for (const auto& ops_lifo : ops_lifo_map_) {
+    empty &= ops_lifo.second.Empty();
+  }
+  return empty && send_manager_.Empty() && recv_manager_.Empty();
+}
+
+// VirtualScheduler
 VirtualScheduler::VirtualScheduler(const GrapplerItem* grappler_item,
                                    const bool use_static_shapes,
                                    Cluster* cluster)
@@ -112,6 +258,8 @@ ReadyNodeManager* VirtualScheduler::ReadyNodeManagerFactory(
     return new LIFOManager();
   } else if (ready_node_manager == "FirstReady") {
     return new FirstReadyManager(GetNodeStates());
+  } else if (ready_node_manager == "Composite") {
+    return new CompositeNodeManager(GetNodeStates());
   }
   LOG(FATAL) << "Not a valid ready node manager: " << ready_node_manager;
 }
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h
index 3018e3509a..74088780cb 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@@ -158,25 +158,8 @@ class LIFOManager : public ReadyNodeManager {
   LIFOManager() : ReadyNodeManager() {}
   ~LIFOManager() override {}
   void AddNode(const NodeDef* node) override { nodes_.push_back(node); }
-  const NodeDef* GetCurrNode() override {
-    CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node";
-    if (curr_pos_ == nodes_.end()) {
-      curr_pos_ = --(nodes_.rbegin().base());  // Last one in the list.
-    }
-    // Once curr_pos_ is set to a valid entry in the list, we keep using the
-    // cached curr_pos_ until RemoveCurrNode() is called. AddNode() will not
-    // change the GetCurrNode() return value.
-    return *curr_pos_;
-  }
-  void RemoveCurrNode() override {
-    // Make sure we have curr_pos_ ready to be removed.
-    GetCurrNode();
-    // Note curr_pos_ may not be pointing the last element if some nodes are
-    // added.
-    nodes_.erase(curr_pos_);
-
-    curr_pos_ = nodes_.end();  // Reset curr_pos_.
-  }
+  const NodeDef* GetCurrNode() override;
+  void RemoveCurrNode() override;
   bool Empty() const override { return nodes_.empty(); }
 
  private:
@@ -194,54 +177,16 @@ class LIFOManager : public ReadyNodeManager {
 class FirstReadyManager : public ReadyNodeManager {
  public:
   FirstReadyManager(
-      const std::unordered_map<const NodeDef*, NodeState>* node_state)
-      : ReadyNodeManager(), node_state_(node_state) {
-    std::make_heap(nodes_.begin(), nodes_.end());
-    greater_ = [this](const NodeDef* a, const NodeDef* b) -> bool {
-      // Note: we need a node with minimum time_ready, not
-      // maximum; hence, using a > b for comparison function.
-      return node_state_->at(a).time_ready > node_state_->at(b).time_ready;
-    };
-  }
+      const std::unordered_map<const NodeDef*, NodeState>* node_state);
   ~FirstReadyManager() override {}
-
   void AddNode(const NodeDef* node) override { waiting_queue_.push_back(node); }
-
-  const NodeDef* GetCurrNode() override {
-    if (nodes_.empty()) {
-      // Nothing in the node_; probably, the very first call. Move
-      // waiting_queue_ to node_.
-      _DrainWaitingQueue();
-      CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node";
-    }
-    return nodes_.front();
-  }
-
-  void RemoveCurrNode() override {
-    if (nodes_.empty()) {
-      // Make sure that there is a node to be removed at the front of nodes_.
-      GetCurrNode();
-    }
-    std::pop_heap(nodes_.begin(), nodes_.end(), greater_);
-    nodes_.pop_back();
-    _DrainWaitingQueue();
-  }
-
-  bool Empty() const override {
-    return nodes_.empty() && waiting_queue_.empty();
-  }
+  const NodeDef* GetCurrNode() override;
+  void RemoveCurrNode() override;
+  bool Empty() const override;
 
  private:
   // Move all the nodes in the waiting_queue_ to nodes_.
-  void _DrainWaitingQueue() {
-    for (const auto* node : waiting_queue_) {
-      // push_heap in AddNode() and pop_heap in RemoveCurrNode() guarantees that
-      // the first element is the node with minimum time_ready.
-      nodes_.push_back(node);
-      std::push_heap(nodes_.begin(), nodes_.end(), greater_);
-    }
-    waiting_queue_.clear();
-  }
+  void DrainWaitingQueue();
 
   // nodes_ is the main queue, where we construct heap, and the front is the
   // current node.
@@ -259,6 +204,41 @@ class FirstReadyManager : public ReadyNodeManager {
   const std::unordered_map<const NodeDef*, NodeState>* node_state_;
 };
 
+// CompositeNodeManager has a few other NodeManagers: per-device LIFO for normal
+// ops (neither _Send nor _Recv) and FirstyReadyManagers for _Send ops and _Recv
+// ops, and then it chooses FirstReady among the ops chosen from each
+// internal NodeManagers. The objective is to maximize producer-consumer
+// locality within device, while processing nodes across devices, including
+// _Send and _Recv, fairly, in terms of their time_ready.
+class CompositeNodeManager : public ReadyNodeManager {
+ public:
+  CompositeNodeManager(
+      const std::unordered_map<const NodeDef*, NodeState>* node_state);
+  ~CompositeNodeManager() override {}
+
+  void AddNode(const NodeDef* node) override;
+  const NodeDef* GetCurrNode() override;
+  void RemoveCurrNode() override;
+  bool Empty() const override;
+
+ private:
+  // Internal ready node managers:
+  // LIFO for normal ops to maximize producer consumer locality.
+  // One LIFO per device.
+  std::unordered_map<string, LIFOManager> ops_lifo_map_;
+  // FirstReady for send and recv. Handle send and recv separately ensures that
+  // send and recv do not block previously read ops with LIFO schedule.
+  FirstReadyManager send_manager_;
+  FirstReadyManager recv_manager_;
+
+  // NodeState structure from VirtualScheduler to get time_ready of ready nodes.
+  // Not owned by FirstReadyManager.
+  const std::unordered_map<const NodeDef*, NodeState>* node_state_;
+
+  // Cached curr node. Set back to nullptr from RemoveCurrNode().
+  const NodeDef* curr_node_;
+};
+
 // The virtual scheduler emulates execution of nodes in a graph, considering
 // dependencies, device, etc.
 class VirtualScheduler {
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
index 412b494be7..c5e6aa8989 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
@@ -44,8 +44,15 @@ class VirtualSchedulerTest : public ::testing::Test {
   NodeDef node1_, node2_, node3_, node4_, node5_, node6_;
   std::unordered_map<const NodeDef*, NodeState> node_states_;
 
+  // Device names:
   const string kCPU0 = "/job:localhost/replica:0/task:0/cpu:0";
   const string kCPU1 = "/job:localhost/replica:0/task:0/cpu:1";
+  const string kChannelFrom0To1 = "Channel from CPU0 to CPU1";
+  const string kChannelFrom1To0 = "Channel from CPU1 to CPU0";
+  // Op names:
+  const string kSend = "_Send";
+  const string kRecv = "_Recv";
+  const string kConv2D = "Conv2D";
 
   DeviceProperties GetDummyCPUDevice() {
     // Create CPU with 2 cores, 4 Ghz freq, 2 GB/s mem bandwidth.
@@ -59,29 +66,26 @@ class VirtualSchedulerTest : public ::testing::Test {
     return cpu_device;
   }
 
+  void NodeSetUp(const string& name, const string& op_name,
+                 const string& device_name, const uint64 time_ready,
+                 NodeDef* node) {
+    node->set_name(name);
+    node->set_op(op_name);
+    node->set_device(device_name);
+
+    node_states_[node] = NodeState();
+    node_states_[node].time_ready = time_ready;
+    node_states_[node].device_name = device_name;
+  }
+
   void SetUp() override {
-    // Initializes nodes for manager
-    node1_.set_name("Node1");
-    node2_.set_name("Node2");
-    node3_.set_name("Node3");
-    node4_.set_name("Node4");
-    node5_.set_name("Node5");
-    node6_.set_name("Node6");
-
-    // Initialize node_states, with time_ready in reverse order.
-    node_states_[&node1_] = NodeState();
-    node_states_[&node2_] = NodeState();
-    node_states_[&node3_] = NodeState();
-    node_states_[&node4_] = NodeState();
-    node_states_[&node5_] = NodeState();
-    node_states_[&node6_] = NodeState();
-
-    node_states_[&node6_].time_ready = 1000;
-    node_states_[&node5_].time_ready = 2000;
-    node_states_[&node4_].time_ready = 3000;
-    node_states_[&node3_].time_ready = 4000;
-    node_states_[&node2_].time_ready = 5000;
-    node_states_[&node1_].time_ready = 6000;
+    // node1_ to node6_ on kCPU0, with time_ready in reverse_order.
+    NodeSetUp("Node1", kConv2D, kCPU0, 6000, &node1_);
+    NodeSetUp("Node2", kConv2D, kCPU0, 5000, &node2_);
+    NodeSetUp("Node3", kConv2D, kCPU0, 4000, &node3_);
+    NodeSetUp("Node4", kConv2D, kCPU0, 3000, &node4_);
+    NodeSetUp("Node5", kConv2D, kCPU0, 2000, &node5_);
+    NodeSetUp("Node6", kConv2D, kCPU0, 1000, &node6_);
 
     // Initializes cluster_ and placer_.
     std::unordered_map<string, DeviceProperties> devices;
@@ -1207,15 +1211,9 @@ TEST_F(VirtualSchedulerTest, GetCurrNodeFirstReadyManager) {
   NodeDef node7;
   NodeDef node8;
   NodeDef node9;
-  node7.set_name("Node7");
-  node8.set_name("Node8");
-  node9.set_name("Node9");
-  node_states_[&node7] = NodeState();
-  node_states_[&node8] = NodeState();
-  node_states_[&node9] = NodeState();
-  node_states_[&node7].time_ready = 5;
-  node_states_[&node8].time_ready = 4;
-  node_states_[&node9].time_ready = 3;
+  NodeSetUp("Node7", kConv2D, kCPU0, 5, &node7);
+  NodeSetUp("Node8", kConv2D, kCPU0, 4, &node8);
+  NodeSetUp("Node9", kConv2D, kCPU0, 3, &node9);
 
   manager.AddNode(&node7);
   EXPECT_EQ("Node6", manager.GetCurrNode()->name());
@@ -1249,6 +1247,132 @@ TEST_F(VirtualSchedulerTest, GetCurrNodeFirstReadyManager) {
   EXPECT_TRUE(manager.Empty());
 }
 
+TEST_F(VirtualSchedulerTest, RemoveSingleNodeCompositeNodeManager) {
+  CompositeNodeManager manager = CompositeNodeManager(&node_states_);
+
+  manager.AddNode(&node1_);
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+}
+
+TEST_F(VirtualSchedulerTest, RemoveSingleNodeComopsiteNodeManager) {
+  CompositeNodeManager manager = CompositeNodeManager(&node_states_);
+
+  manager.AddNode(&node1_);
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+}
+
+TEST_F(VirtualSchedulerTest, GetAndRemoveMultipleComopsiteNodeManager) {
+  CompositeNodeManager manager = CompositeNodeManager(&node_states_);
+
+  // Add the nodes to LIFOManager.
+  manager.AddNode(&node1_);
+  manager.AddNode(&node2_);
+  manager.AddNode(&node3_);
+  manager.AddNode(&node4_);
+
+  // Keep checking current node as nodes are removed and added.
+  EXPECT_EQ("Node4", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node3", manager.GetCurrNode()->name());
+  manager.AddNode(&node5_);
+  // GetCurrNode()  should return the same node even if some nodes are added,
+  // until RemoveCurrNode() is called.
+  EXPECT_EQ("Node3", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node5", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node2", manager.GetCurrNode()->name());
+  manager.AddNode(&node6_);
+  EXPECT_EQ("Node2", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node6", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node1", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+}
+
+TEST_F(VirtualSchedulerTest, MultiDeviceSendRecvComopsiteNodeManager) {
+  CompositeNodeManager manager = CompositeNodeManager(&node_states_);
+
+  // Additional nodes on kCPU1
+  NodeDef node7;
+  NodeDef node8;
+  NodeDef node9;
+  NodeSetUp("Node7", kConv2D, kCPU1, 1001, &node7);
+  NodeSetUp("Node8", kConv2D, kCPU1, 2001, &node8);
+  NodeSetUp("Node9", kConv2D, kCPU1, 3001, &node9);
+
+  // Send and Recv nodes.
+  NodeDef send1;
+  NodeDef send2;
+  NodeDef recv1;
+  NodeDef recv2;
+  NodeSetUp("Send1", kSend, kChannelFrom0To1, 2002, &send1);
+  NodeSetUp("Send2", kSend, kChannelFrom1To0, 2005, &send2);
+  NodeSetUp("Recv1", kRecv, kCPU0, 2003, &recv1);
+  NodeSetUp("Recv2", kRecv, kCPU1, 2003, &recv2);
+
+  // Insert nodes.
+  manager.AddNode(&node1_);
+  manager.AddNode(&node2_);
+  manager.AddNode(&node3_);
+  manager.AddNode(&node4_);
+  manager.AddNode(&node5_);
+  manager.AddNode(&node6_);
+  manager.AddNode(&node7);
+  manager.AddNode(&node8);
+  manager.AddNode(&node9);
+  manager.AddNode(&send1);
+  manager.AddNode(&send2);
+  manager.AddNode(&recv1);
+  manager.AddNode(&recv2);
+
+  // on kCPU0; last one is node6_, on kCPU1: last one is node9;
+  // so choose one that has earliest time_ready among node6_, node9,
+  // Send1, Send2, Recv1, and Recv2.
+  EXPECT_EQ("Node6", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Then, the next one on kCPU0 is node5_; choose the earliest time_ready node
+  // among node5_, node9, Send1, Send2, Recv1, and Recv2.
+  EXPECT_EQ("Node5", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose among node4_, node9, Send1, Send2, Recv1, and Recv2.
+  EXPECT_EQ("Send1", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose among node4_, node9, Sen2, Recv1, and Recv2.
+  EXPECT_EQ("Recv1", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose among node4_, node9, Send2, and Recv2.
+  EXPECT_EQ("Recv2", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose among node4_, node9, and Send2.
+  EXPECT_EQ("Send2", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose between node4_, node9.
+  EXPECT_EQ("Node4", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose between node3_, node9.
+  EXPECT_EQ("Node9", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose between node3_, node8.
+  EXPECT_EQ("Node8", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose between node3_, node7.
+  EXPECT_EQ("Node7", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Then, just the nodes on kCPU1 -- LIFO.
+  EXPECT_EQ("Node3", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node2", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node1", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+}
+
 // Create small graph, run predict costs on it, make sure the costs from the
 // summary match the hand-calculated costs.
 TEST_F(VirtualSchedulerTest, SummaryCostTest) {
@@ -1634,20 +1758,20 @@ TEST_F(VirtualSchedulerTest, InterDeviceTransfer) {
     const auto& name = x.first;
     const auto& node_info = x.second;
     const auto& op = node_info.op_info.op();
-    if (op == "_Recv") {
+    if (op == kRecv) {
       recv_op_names[get_port_num(name)] = name;
-    } else if (op == "_Send") {
+    } else if (op == kSend) {
       send_op_names[get_port_num(name)] = name;
     }
     op_count[op]++;
   }
 
   // Same number of _Send and _Recv.
-  EXPECT_EQ(op_count.at("_Send"), op_count.at("_Recv"));
+  EXPECT_EQ(op_count.at(kSend), op_count.at(kRecv));
 
   // Expect 4 Send and Recvs each: port 0, 1, and, 2, and control dependency.
-  EXPECT_EQ(op_count.at("_Recv"), 4);
-  EXPECT_EQ(op_count.at("_Send"), 4);
+  EXPECT_EQ(op_count.at(kRecv), 4);
+  EXPECT_EQ(op_count.at(kSend), 4);
 
   // Helper lambda for extracting output Tensor size.
   auto get_output_size = [this, ops_executed](const string& name) -> int64 {
-- 
GitLab


From a6af4dc3a3c116638f97df01b9e1972fcddea488 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 11:26:41 -0800
Subject: [PATCH 0925/1225] Sliced Wasserstein Distance metric for GANs
 evaluation.

PiperOrigin-RevId: 178788810
---
 tensorflow/contrib/gan/BUILD                  |  36 +++
 .../contrib/gan/python/eval/__init__.py       |   7 +-
 .../python/eval/python/sliced_wasserstein.py  |  28 ++
 .../eval/python/sliced_wasserstein_impl.py    | 282 ++++++++++++++++++
 .../eval/python/sliced_wasserstein_test.py    | 131 ++++++++
 5 files changed, 483 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py
 create mode 100644 tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py
 create mode 100644 tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_test.py

diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD
index fa3f42d6af..b355a79b1a 100644
--- a/tensorflow/contrib/gan/BUILD
+++ b/tensorflow/contrib/gan/BUILD
@@ -83,6 +83,7 @@ py_library(
     deps = [
         ":classifier_metrics",
         ":eval_utils",
+        ":sliced_wasserstein",
         ":summaries",
         "//tensorflow/python:util",
     ],
@@ -503,6 +504,41 @@ py_test(
     ],
 )
 
+py_library(
+    name = "sliced_wasserstein",
+    srcs = [
+        "python/eval/python/sliced_wasserstein.py",
+        "python/eval/python/sliced_wasserstein_impl.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:linalg_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:nn",
+        "//tensorflow/python:nn_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:util",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "sliced_wasserstein_test",
+    srcs = ["python/eval/python/sliced_wasserstein_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":sliced_wasserstein",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:random_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/gan/python/eval/__init__.py b/tensorflow/contrib/gan/python/eval/__init__.py
index 7daf78bc5d..f86b851305 100644
--- a/tensorflow/contrib/gan/python/eval/__init__.py
+++ b/tensorflow/contrib/gan/python/eval/__init__.py
@@ -26,10 +26,12 @@ from __future__ import print_function
 # Collapse eval into a single namespace.
 from tensorflow.contrib.gan.python.eval.python import classifier_metrics
 from tensorflow.contrib.gan.python.eval.python import eval_utils
+from tensorflow.contrib.gan.python.eval.python import sliced_wasserstein
 from tensorflow.contrib.gan.python.eval.python import summaries
 
 from tensorflow.contrib.gan.python.eval.python.classifier_metrics import *
 from tensorflow.contrib.gan.python.eval.python.eval_utils import *
+from tensorflow.contrib.gan.python.eval.python.sliced_wasserstein import *
 from tensorflow.contrib.gan.python.eval.python.summaries import *
 # pylint: enable=wildcard-import,unused-import
 
@@ -37,7 +39,10 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
     'classifier_metrics',
+    'sliced_wasserstein_distance',
     'summaries',
     'eval_utils',
-] + classifier_metrics.__all__ + summaries.__all__ + eval_utils.__all__
+] + (
+    classifier_metrics.__all__ + sliced_wasserstein.__all__ +
+    summaries.__all__ + eval_utils.__all__)
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py
new file mode 100644
index 0000000000..523968bed9
--- /dev/null
+++ b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py
@@ -0,0 +1,28 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model evaluation tools for TFGAN."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.gan.python.eval.python import sliced_wasserstein_impl
+# pylint: disable=wildcard-import
+from tensorflow.contrib.gan.python.eval.python.sliced_wasserstein_impl import *
+# pylint: enable=wildcard-import
+from tensorflow.python.util.all_util import remove_undocumented
+
+__all__ = sliced_wasserstein_impl.__all__
+remove_undocumented(__name__, __all__)
diff --git a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py
new file mode 100644
index 0000000000..9bebcacbe4
--- /dev/null
+++ b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py
@@ -0,0 +1,282 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of Sliced Wasserstein Distance.
+
+Proposed in https://arxiv.org/abs/1710.10196 and the official Theano
+implementation that we used as reference can be found here:
+https://github.com/tkarras/progressive_growing_of_gans
+
+Note: this is not an exact distance but an approximation through random
+projections.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import linalg_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import script_ops
+
+__all__ = ['sliced_wasserstein_distance']
+_GAUSSIAN_FILTER = np.float32([[1, 4, 6, 4, 1], [4, 16, 24, 16, 4], [
+    6, 24, 36, 24, 6
+], [4, 16, 24, 16, 4], [1, 4, 6, 4, 1]]).reshape([5, 5, 1, 1]) / 256.0
+
+
+def _laplacian_pyramid(batch, num_levels):
+  """Compute a Laplacian pyramid.
+
+  Args:
+      batch: (tensor) The batch of images (batch, height, width, channels).
+      num_levels: (int) Desired number of hierarchical levels.
+  Returns:
+      List of tensors from the highest to lowest resolution.
+  """
+  gaussian_filter = constant_op.constant(_GAUSSIAN_FILTER)
+
+  def spatial_conv(batch, gain):
+    s = array_ops.shape(batch)
+    padded = array_ops.pad(batch, [[0, 0], [2, 2], [2, 2], [0, 0]], 'REFLECT')
+    xt = array_ops.transpose(padded, [0, 3, 1, 2])
+    xt = array_ops.reshape(xt, [s[0] * s[3], s[1] + 4, s[2] + 4, 1])
+    conv_out = nn_ops.conv2d(xt, gaussian_filter * gain, [1] * 4, 'VALID')
+    conv_xt = array_ops.reshape(conv_out, [s[0], s[3], s[1], s[2]])
+    conv_xt = array_ops.transpose(conv_xt, [0, 2, 3, 1])
+    return conv_xt
+
+  def pyr_down(batch):  # matches cv2.pyrDown()
+    return spatial_conv(batch, 1)[:, ::2, ::2]
+
+  def pyr_up(batch):  # matches cv2.pyrUp()
+    s = array_ops.shape(batch)
+    zeros = array_ops.zeros([3 * s[0], s[1], s[2], s[3]])
+    res = array_ops.concat([batch, zeros], 0)
+    res = array_ops.batch_to_space(res, crops=[[0, 0], [0, 0]], block_size=2)
+    res = spatial_conv(res, 4)
+    return res
+
+  pyramid = [math_ops.to_float(batch)]
+  for _ in range(1, num_levels):
+    pyramid.append(pyr_down(pyramid[-1]))
+    pyramid[-2] -= pyr_up(pyramid[-1])
+  return pyramid
+
+
+def _batch_to_patches(batch, patches_per_image, patch_size):
+  """Extract patches from a batch.
+
+  Args:
+      batch: (tensor) The batch of images (batch, height, width, channels).
+      patches_per_image: (int) Number of patches to extract per image.
+      patch_size: (int) Size of the patches (size, size, channels) to extract.
+  Returns:
+      Tensor (batch*patches_per_image, patch_size, patch_size, channels) of
+      patches.
+  """
+
+  def py_func_random_patches(batch):
+    """Numpy wrapper."""
+    batch_size, height, width, channels = batch.shape
+    patch_count = patches_per_image * batch_size
+    hs = patch_size // 2
+    # Randomly pick patches.
+    patch_id, y, x, chan = np.ogrid[0:patch_count, -hs:hs + 1, -hs:hs + 1, 0:3]
+    img_id = patch_id // patches_per_image
+    # pylint: disable=g-no-augmented-assignment
+    # Need explicit addition for broadcast to work properly.
+    y = y + np.random.randint(hs, height - hs, size=(patch_count, 1, 1, 1))
+    x = x + np.random.randint(hs, width - hs, size=(patch_count, 1, 1, 1))
+    # pylint: enable=g-no-augmented-assignment
+    idx = ((img_id * height + y) * width + x) * channels + chan
+    patches = batch.flat[idx]
+    return patches
+
+  patches = script_ops.py_func(
+      py_func_random_patches, [batch], batch.dtype, stateful=False)
+  return patches
+
+
+def _normalize_patches(patches):
+  """Normalize patches by their mean and standard deviation.
+
+  Args:
+      patches: (tensor) The batch of patches (batch, size, size, channels).
+  Returns:
+      Tensor (batch, size, size, channels) of the normalized patches.
+  """
+  patches = array_ops.concat(patches, 0)
+  mean, variance = nn.moments(patches, [1, 2, 3], keep_dims=True)
+  patches = (patches - mean) / math_ops.sqrt(variance)
+  return array_ops.reshape(patches, [array_ops.shape(patches)[0], -1])
+
+
+def _sort_rows(matrix, num_rows):
+  """Sort matrix rows by the last column.
+
+  Args:
+      matrix: a matrix of values (row,col).
+      num_rows: (int) number of sorted rows to return from the matrix.
+  Returns:
+      Tensor (num_rows, col) of the sorted matrix top K rows.
+  """
+  tmatrix = array_ops.transpose(matrix, [1, 0])
+  sorted_tmatrix = nn_ops.top_k(tmatrix, num_rows)[0]
+  return array_ops.transpose(sorted_tmatrix, [1, 0])
+
+
+def _sliced_wasserstein(a, b, random_sampling_count, random_projection_dim):
+  """Compute the approximate sliced Wasserstein distance.
+
+  Args:
+      a: (matrix) Distribution "a" of samples (row, col).
+      b: (matrix) Distribution "b" of samples (row, col).
+      random_sampling_count: (int) Number of random projections to average.
+      random_projection_dim: (int) Dimension of the random projection space.
+  Returns:
+      Float containing the approximate distance between "a" and "b".
+  """
+  s = array_ops.shape(a)
+  means = []
+  for _ in range(random_sampling_count):
+    # Random projection matrix.
+    proj = random_ops.random_normal(
+        [array_ops.shape(a)[1], random_projection_dim])
+    proj *= math_ops.rsqrt(
+        math_ops.reduce_sum(math_ops.square(proj), 0, keep_dims=True))
+    # Project both distributions and sort them.
+    proj_a = math_ops.matmul(a, proj)
+    proj_b = math_ops.matmul(b, proj)
+    proj_a = _sort_rows(proj_a, s[0])
+    proj_b = _sort_rows(proj_b, s[0])
+    # Pairwise Wasserstein distance.
+    wdist = math_ops.reduce_mean(math_ops.abs(proj_a - proj_b))
+    means.append(wdist)
+  return math_ops.reduce_mean(means)
+
+
+def _sliced_wasserstein_svd(a, b):
+  """Compute the approximate sliced Wasserstein distance using an SVD.
+
+  This is not part of the paper, it's a variant with possibly more accurate
+  measure.
+
+  Args:
+      a: (matrix) Distribution "a" of samples (row, col).
+      b: (matrix) Distribution "b" of samples (row, col).
+  Returns:
+      Float containing the approximate distance between "a" and "b".
+  """
+  s = array_ops.shape(a)
+  # Random projection matrix.
+  sig, u = linalg_ops.svd(array_ops.concat([a, b], 0))[:2]
+  proj_a, proj_b = array_ops.split(u * sig, 2, axis=0)
+  proj_a = _sort_rows(proj_a[:, ::-1], s[0])
+  proj_b = _sort_rows(proj_b[:, ::-1], s[0])
+  # Pairwise Wasserstein distance.
+  wdist = math_ops.reduce_mean(math_ops.abs(proj_a - proj_b))
+  return wdist
+
+
+def sliced_wasserstein_distance(real_images,
+                                fake_images,
+                                resolution_min=16,
+                                patches_per_image=64,
+                                patch_size=7,
+                                random_sampling_count=1,
+                                random_projection_dim=7 * 7 * 3,
+                                use_svd=False):
+  """Compute the Wasserstein distance between two distributions of images.
+
+  Note that measure vary with the number of images. Use 8192 images to get
+  numbers comparable to the ones in the original paper.
+
+  Args:
+      real_images: (tensor) Real images (batch, height, width, channels).
+      fake_images: (tensor) Fake images (batch, height, width, channels).
+      resolution_min: (int) Minimum resolution for the Laplacion pyramid.
+      patches_per_image: (int) Number of patches to extract per image per
+        Laplacian level.
+      patch_size: (int) Width of a square patch.
+      random_sampling_count: (int) Number of random projections to average.
+      random_projection_dim: (int) Dimension of the random projection space.
+      use_svd: experimental method to compute a more accurate distance.
+  Returns:
+      List of tuples (distance_real, distance_fake) for each level of the
+      Laplacian pyramid from the highest resoluion to the lowest.
+        distance_real is the Wasserstein distance between real images
+        distance_fake is the Wasserstein distance between real and fake images.
+  Raises:
+      ValueError: If the inputs shapes are incorrect. Input tensor dimensions
+      (batch, height, width, channels) are expected to be known at graph
+      construction time. In addition height and width must be the same and the
+      number of colors should be exactly 3. Real and fake images must have the
+      same size.
+  """
+  height = real_images.shape[1]
+  real_images.shape.assert_is_compatible_with([None, None, height, 3])
+  fake_images.shape.assert_is_compatible_with(real_images.shape)
+
+  # Select resolutions.
+  resolution_full = int(height)
+  resolution_min = min(resolution_min, resolution_full)
+  resolution_max = resolution_full
+  # Base loss of detail.
+  resolutions = [
+      2**i
+      for i in range(
+          int(np.log2(resolution_max)),
+          int(np.log2(resolution_min)) - 1, -1)
+  ]
+
+  # Gather patches for each level of the Laplacian pyramids.
+  patches_real, patches_fake, patches_test = (
+      [[] for _ in resolutions] for _ in range(3))
+  for lod, level in enumerate(
+      _laplacian_pyramid(real_images, len(resolutions))):
+    patches_real[lod].append(
+        _batch_to_patches(level, patches_per_image, patch_size))
+    patches_test[lod].append(
+        _batch_to_patches(level, patches_per_image, patch_size))
+
+  for lod, level in enumerate(
+      _laplacian_pyramid(fake_images, len(resolutions))):
+    patches_fake[lod].append(
+        _batch_to_patches(level, patches_per_image, patch_size))
+
+  for lod in range(len(resolutions)):
+    for patches in [patches_real, patches_test, patches_fake]:
+      patches[lod] = _normalize_patches(patches[lod])
+
+  # Evaluate scores.
+  scores = []
+  for lod in range(len(resolutions)):
+    if not use_svd:
+      scores.append(
+          (_sliced_wasserstein(patches_real[lod], patches_test[lod],
+                               random_sampling_count, random_projection_dim),
+           _sliced_wasserstein(patches_real[lod], patches_fake[lod],
+                               random_sampling_count, random_projection_dim)))
+    else:
+      scores.append(
+          (_sliced_wasserstein_svd(patches_real[lod], patches_test[lod]),
+           _sliced_wasserstein_svd(patches_real[lod], patches_fake[lod])))
+  return scores
diff --git a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_test.py b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_test.py
new file mode 100644
index 0000000000..b960af28ea
--- /dev/null
+++ b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_test.py
@@ -0,0 +1,131 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Sliced Wasserstein Distance."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from scipy import ndimage
+from tensorflow.contrib.gan.python.eval.python import sliced_wasserstein_impl as swd
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test
+
+
+class ClassifierMetricsTest(test.TestCase):
+
+  def test_laplacian_pyramid(self):
+    # The numpy/scipy code for reference estimation comes from:
+    # https://github.com/tkarras/progressive_growing_of_gans
+    gaussian_filter = np.float32([[1, 4, 6, 4, 1], [4, 16, 24, 16, 4], [
+        6, 24, 36, 24, 6
+    ], [4, 16, 24, 16, 4], [1, 4, 6, 4, 1]]) / 256.0
+
+    def np_pyr_down(minibatch):  # matches cv2.pyrDown()
+      assert minibatch.ndim == 4
+      return ndimage.convolve(
+          minibatch,
+          gaussian_filter[np.newaxis, np.newaxis, :, :],
+          mode='mirror')[:, :, ::2, ::2]
+
+    def np_pyr_up(minibatch):  # matches cv2.pyrUp()
+      assert minibatch.ndim == 4
+      s = minibatch.shape
+      res = np.zeros((s[0], s[1], s[2] * 2, s[3] * 2), minibatch.dtype)
+      res[:, :, ::2, ::2] = minibatch
+      return ndimage.convolve(
+          res,
+          gaussian_filter[np.newaxis, np.newaxis, :, :] * 4.0,
+          mode='mirror')
+
+    def np_laplacian_pyramid(minibatch, num_levels):
+      # Note: there's a bug in the original SWD, fixed repeatability.
+      pyramid = [minibatch.astype('f').copy()]
+      for _ in range(1, num_levels):
+        pyramid.append(np_pyr_down(pyramid[-1]))
+        pyramid[-2] -= np_pyr_up(pyramid[-1])
+      return pyramid
+
+    data = np.random.normal(size=[256, 3, 32, 32]).astype('f')
+    pyramid = np_laplacian_pyramid(data, 3)
+    data_tf = array_ops.placeholder(dtypes.float32, [256, 32, 32, 3])
+    pyramid_tf = swd._laplacian_pyramid(data_tf, 3)
+    with self.test_session() as sess:
+      pyramid_tf = sess.run(
+          pyramid_tf, feed_dict={
+              data_tf: data.transpose(0, 2, 3, 1)
+          })
+    for x in range(3):
+      self.assertAllClose(
+          pyramid[x].transpose(0, 2, 3, 1), pyramid_tf[x], atol=1e-6)
+
+  def test_sliced_wasserstein_distance(self):
+    """Test the distance."""
+    d1 = random_ops.random_uniform([256, 32, 32, 3])
+    d2 = random_ops.random_normal([256, 32, 32, 3])
+    wfunc = swd.sliced_wasserstein_distance(d1, d2)
+    with self.test_session() as sess:
+      wscores = [sess.run(x) for x in wfunc]
+    self.assertAllClose(
+        np.array([0.014, 0.014], 'f'),
+        np.array([x[0] for x in wscores], 'f'),
+        rtol=0.1)
+    self.assertAllClose(
+        np.array([0.014, 0.020], 'f'),
+        np.array([x[1] for x in wscores], 'f'),
+        rtol=0.1)
+
+  def test_sliced_wasserstein_distance_svd(self):
+    """Test the distance."""
+    d1 = random_ops.random_uniform([256, 32, 32, 3])
+    d2 = random_ops.random_normal([256, 32, 32, 3])
+    wfunc = swd.sliced_wasserstein_distance(d1, d2, use_svd=True)
+    with self.test_session() as sess:
+      wscores = [sess.run(x) for x in wfunc]
+    self.assertAllClose(
+        np.array([0.013, 0.013], 'f'),
+        np.array([x[0] for x in wscores], 'f'),
+        rtol=0.15)
+    self.assertAllClose(
+        np.array([0.014, 0.019], 'f'),
+        np.array([x[1] for x in wscores], 'f'),
+        rtol=0.15)
+
+  def test_swd_mismatched(self):
+    """Test the inputs mismatched shapes are detected."""
+    d1 = random_ops.random_uniform([256, 32, 32, 3])
+    d2 = random_ops.random_normal([256, 32, 31, 3])
+    d3 = random_ops.random_normal([256, 31, 32, 3])
+    d4 = random_ops.random_normal([255, 32, 32, 3])
+    with self.assertRaises(ValueError):
+      swd.sliced_wasserstein_distance(d1, d2)
+    with self.assertRaises(ValueError):
+      swd.sliced_wasserstein_distance(d1, d3)
+    with self.assertRaises(ValueError):
+      swd.sliced_wasserstein_distance(d1, d4)
+
+  def test_swd_not_rgb(self):
+    """Test that only RGB is supported."""
+    d1 = random_ops.random_uniform([256, 32, 32, 1])
+    d2 = random_ops.random_normal([256, 32, 32, 1])
+    with self.assertRaises(ValueError):
+      swd.sliced_wasserstein_distance(d1, d2)
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From bb70caa0fb79c9f6cc8756816849d09a690fe198 Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Tue, 12 Dec 2017 11:35:20 -0800
Subject: [PATCH 0926/1225] [XLA] Always fold transposes into convs or dots
 regardless of use count

PiperOrigin-RevId: 178790193
---
 tensorflow/compiler/xla/service/transpose_folding.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/transpose_folding.cc b/tensorflow/compiler/xla/service/transpose_folding.cc
index 42b616f4c3..83185ac49e 100644
--- a/tensorflow/compiler/xla/service/transpose_folding.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding.cc
@@ -42,7 +42,7 @@ TransposeFolding::OperandIndices CanFoldOperandsIntoDot(
   TransposeFolding::OperandIndices operand_set;
   for (int64 i = 0; i < dot.operand_count(); ++i) {
     auto& operand = *dot.operand(i);
-    if (operand.IsRank2Transpose() && operand.user_count() == 1) {
+    if (operand.IsRank2Transpose()) {
       operand_set.push_back(i);
     }
   }
@@ -61,8 +61,7 @@ TransposeFolding::OperandIndices CanFoldOperandsIntoConvolution(
   TransposeFolding::OperandIndices operand_set;
   for (int64 i = 0; i < convolution.operand_count(); ++i) {
     auto& operand = *convolution.operand(i);
-    if (operand.opcode() == HloOpcode::kTranspose &&
-        operand.user_count() == 1) {
+    if (operand.opcode() == HloOpcode::kTranspose) {
       operand_set.push_back(i);
     }
   }
-- 
GitLab


From 10c97ff4dafffcb5c407c205d55c56f3d7dd7635 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 12 Dec 2017 12:26:15 -0800
Subject: [PATCH 0927/1225] Parameterize tensorflow CUDA and cudnn versions in
 cmake build. (#15298)

Also upgrade the defaults to cuda9 and cudnn 7.
---
 tensorflow/contrib/cmake/CMakeLists.txt | 28 ++++++++++++++++---------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index c83c2bc8a2..481caf6bb0 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -18,7 +18,6 @@ cmake_policy(SET CMP0022 NEW)
 
 # Options
 option(tensorflow_VERBOSE "Enable for verbose output" OFF)
-option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
 option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF)
 option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON)
 option(tensorflow_ENABLE_HDFS_SUPPORT "Enable HDFS support" OFF)
@@ -34,6 +33,12 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF)
 option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON)
 option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions")
 option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON)
+
+# GPU, CUDA and cuDNN options
+option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
+option(tensorflow_CUDA_VERSION "CUDA version to build against" 9.0)
+option(tensorflow_CUDNN_VERSION "cuDNN version to build against" 7)
+
 if(HAIKU)
 	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF)
 else()
@@ -262,7 +267,7 @@ if (tensorflow_ENABLE_GPU)
     list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs")
   endif (NOT WIN32)
 
-  find_package(CUDA 8.0 REQUIRED)
+  find_package(CUDA ${tensorflow_CUDA_VERSION} REQUIRED)
 
   # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
   # CUDA_NVCC_FLAGS and cuda_config.h below
@@ -316,13 +321,16 @@ if (tensorflow_ENABLE_GPU)
       ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY})
   endif (WIN32)
 
+  # Remove "." from CUDA version variable.
+  string(REPLACE "." "" short_CUDA_VER ${tensorflow_CUDA_VERSION})
+
   # create cuda_config.h
   FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
     "#ifndef CUDA_CUDA_CONFIG_H_\n"
     "#define CUDA_CUDA_CONFIG_H_\n"
     "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
-    "#define TF_CUDA_VERSION \"64_80\"\n"
-    "#define TF_CUDNN_VERSION \"64_6\"\n"
+    "#define TF_CUDA_VERSION \"64_${short_CUDA_VER}\"\n"
+    "#define TF_CUDNN_VERSION \"64_${tensorflow_CUDNN_VERSION}\"\n"
     "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
     "#endif  // CUDA_CUDA_CONFIG_H_\n"
   )
@@ -360,15 +368,15 @@ if (tensorflow_ENABLE_GPU)
   if(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
       msvcp_dll_name=msvcp140.dll
-      cudart_dll_name=cudart64_80.dll
-      cuda_version_number=8.0
+      cudart_dll_name=cudart64_${short_CUDA_VER}.dll
+      cuda_version_number=${tensorflow_CUDA_VERSION}
       nvcuda_dll_name=nvcuda.dll
-      cudnn_dll_name=cudnn64_6.dll
-      cudnn_version_number=6)
+      cudnn_dll_name=cudnn64_${tensorflow_CUDNN_VERSION}.dll
+      cudnn_version_number=${tensorflow_CUDNN_VERSION})
   else(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
-      cuda_version_number=8.0
-      cudnn_version_number=6)
+	    cuda_version_number=${tensorflow_CUDA_VERSION}
+	    cudnn_version_number=${tensorflow_CUDNN_VERSION})
   endif(WIN32)
 else(tensorflow_ENABLE_GPU)
   set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value
-- 
GitLab


From 50784e4ef8d920ed5fe7b3a9d0bafbac073ebb26 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Tue, 12 Dec 2017 12:51:41 -0800
Subject: [PATCH 0928/1225] Automated g4 rollback of changelist 177619402

PiperOrigin-RevId: 178800980
---
 .../core/platform/cloud/curl_http_request.cc  |  29 +-
 .../core/platform/cloud/curl_http_request.h   |  12 +
 .../core/platform/cloud/gcs_dns_cache_test.cc |   5 +
 .../core/platform/cloud/gcs_file_system.cc    |  65 +-
 .../core/platform/cloud/gcs_file_system.h     |  42 +-
 .../platform/cloud/gcs_file_system_test.cc    | 641 +++++++++++-------
 tensorflow/core/platform/cloud/http_request.h |  10 +
 .../core/platform/cloud/http_request_fake.h   |  10 +-
 8 files changed, 560 insertions(+), 254 deletions(-)

diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc
index 6575ee8c97..c2533b4314 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request.cc
@@ -29,16 +29,6 @@ namespace {
 // Set to 1 to enable verbose debug output from curl.
 constexpr uint64 kVerboseOutput = 0;
 
-// Timeout for the whole request. Set only to prevent hanging indefinitely.
-constexpr uint32 kRequestTimeoutSeconds = 3600;  // 1 hour
-
-// Timeout for the connection phase.
-constexpr uint32 kConnectTimeoutSeconds = 120;  // 2 minutes
-
-// The maximum period of request inactivity, after which the request
-// is terminated.
-constexpr uint64 kInactivityTimeoutSeconds = 60;  // 1 minute
-
 // Proxy to the real libcurl implementation.
 class LibCurlProxy : public LibCurl {
  public:
@@ -165,9 +155,6 @@ Status CurlHttpRequest::Init() {
       strings::StrCat("TensorFlow/", TF_VERSION_STRING).c_str());
   // Do not use signals for timeouts - does not work in multi-threaded programs.
   libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L);
-  libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, kRequestTimeoutSeconds);
-  libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT,
-                             kConnectTimeoutSeconds);
   libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION,
                              CURL_HTTP_VERSION_2_0);
 
@@ -340,6 +327,16 @@ Status CurlHttpRequest::SetResultBuffer(std::vector<char>* out_buffer) {
   return Status::OK();
 }
 
+Status CurlHttpRequest::SetTimeouts(uint32 connection, uint32 inactivity,
+                                    uint32 total) {
+  TF_RETURN_IF_ERROR(CheckInitialized());
+  TF_RETURN_IF_ERROR(CheckNotSent());
+  connect_timeout_secs_ = connection;
+  inactivity_timeout_secs_ = inactivity;
+  request_timeout_secs_ = total;
+  return Status::OK();
+}
+
 size_t CurlHttpRequest::WriteCallback(const void* ptr, size_t size,
                                       size_t nmemb, void* this_object) {
   CHECK(ptr);
@@ -403,6 +400,10 @@ Status CurlHttpRequest::Send() {
   libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION,
                              &CurlHttpRequest::HeaderCallback);
 
+  libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, request_timeout_secs_);
+  libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT,
+                             connect_timeout_secs_);
+
   char error_buffer[CURL_ERROR_SIZE] = {0};
   libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer);
 
@@ -533,7 +534,7 @@ int CurlHttpRequest::ProgressCallback(void* this_object, curl_off_t dltotal,
     return 0;
   }
 
-  if (now - that->last_progress_timestamp_ > kInactivityTimeoutSeconds) {
+  if (now - that->last_progress_timestamp_ > that->inactivity_timeout_secs_) {
     double lookup_time = -1;
     const auto lookup_time_status = that->libcurl_->curl_easy_getinfo(
         that->curl_, CURLINFO_NAMELOOKUP_TIME, &lookup_time);
diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h
index b2a5870cf7..e4c91dac8d 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.h
+++ b/tensorflow/core/platform/cloud/curl_http_request.h
@@ -120,6 +120,9 @@ class CurlHttpRequest : public HttpRequest {
   // Url encodes str and returns a new string.
   string EscapeString(const string& str) override;
 
+  Status SetTimeouts(uint32 connection, uint32 inactivity,
+                     uint32 total) override;
+
  private:
   /// A write callback in the form which can be accepted by libcurl.
   static size_t WriteCallback(const void* ptr, size_t size, size_t nmemb,
@@ -162,6 +165,15 @@ class CurlHttpRequest : public HttpRequest {
   // The last progress in terms of bytes transmitted.
   curl_off_t last_progress_bytes_ = 0;
 
+  // The maximum period of request inactivity.
+  uint32 inactivity_timeout_secs_ = 60;  // 1 minute
+
+  // Timeout for the connection phase.
+  uint32 connect_timeout_secs_ = 120;  // 2 minutes
+
+  // Tiemout for the whole request. Set only to prevent hanging indefinitely.
+  uint32 request_timeout_secs_ = 3600;  // 1 hour
+
   // Members to enforce the usage flow.
   bool is_initialized_ = false;
   bool is_uri_set_ = false;
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
index 8d1a108f30..266879ddf5 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
@@ -64,6 +64,11 @@ class TestHttpRequest : public HttpRequest {
   Status Send() override { return Status::OK(); }
   string EscapeString(const string& str) override { return ""; }
 
+  Status SetTimeouts(uint32 connection, uint32 inactivity,
+                     uint32 total) override {
+    return Status::OK();
+  }
+
   std::map<string, string> resolve_overrides_;
 };
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 45e9b05092..ab82643ad5 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -94,6 +94,20 @@ const FileStatistics DIRECTORY_STAT(0, 0, true);
 // variable to a positive integer describing the frequency used to refresh the
 // userspace DNS cache.
 constexpr char kResolveCacheSecs[] = "GCS_RESOLVE_REFRESH_SECS";
+// The environment variable to configure the http request's connection timeout.
+constexpr char kRequestConnectionTimeout[] =
+    "GCS_REQUEST_CONNECTION_TIMEOUT_SECS";
+// The environment varaible to configure the http request's idle timeout.
+constexpr char kRequestIdleTimeout[] = "GCS_REQUEST_IDLE_TIMEOUT_SECS";
+// The environment variable to configure the overall request timeout for
+// metadata requests.
+constexpr char kMetadataRequestTimeout[] = "GCS_METADATA_REQUEST_TIMEOUT_SECS";
+// The environment variable to configure the overall request timeout for
+// block reads requests.
+constexpr char kReadRequestTimeout[] = "GCS_READ_REQUEST_TIMEOUT_SECS";
+// The environment variable to configure the overall request timeout for
+// upload requests.
+constexpr char kWriteRequestTimeout[] = "GCS_WRITE_REQUEST_TIMEOUT_SECS";
 
 Status GetTmpFilename(string* filename) {
   if (!filename) {
@@ -283,12 +297,14 @@ class GcsWritableFile : public WritableFile {
   GcsWritableFile(const string& bucket, const string& object,
                   AuthProvider* auth_provider,
                   HttpRequest::Factory* http_request_factory,
+                  GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
                   int64 initial_retry_delay_usec)
       : bucket_(bucket),
         object_(object),
         auth_provider_(auth_provider),
         http_request_factory_(http_request_factory),
+        timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
@@ -307,12 +323,14 @@ class GcsWritableFile : public WritableFile {
                   AuthProvider* auth_provider,
                   const string& tmp_content_filename,
                   HttpRequest::Factory* http_request_factory,
+                  GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
                   int64 initial_retry_delay_usec)
       : bucket_(bucket),
         object_(object),
         auth_provider_(auth_provider),
         http_request_factory_(http_request_factory),
+        timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
@@ -446,6 +464,8 @@ class GcsWritableFile : public WritableFile {
                                           std::to_string(file_size)));
     TF_RETURN_IF_ERROR(request->SetPostEmptyBody());
     TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_->connect, timeouts_->idle,
+                                            timeouts_->metadata));
     TF_RETURN_WITH_CONTEXT_IF_ERROR(
         request->Send(), " when initiating an upload to ", GetGcsPath());
     *session_uri = request->GetResponseHeader("Location");
@@ -477,6 +497,8 @@ class GcsWritableFile : public WritableFile {
     TF_RETURN_IF_ERROR(request->Init());
     TF_RETURN_IF_ERROR(request->SetUri(session_uri));
     TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
+    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_->connect, timeouts_->idle,
+                                            timeouts_->metadata));
     TF_RETURN_IF_ERROR(request->AddHeader(
         "Content-Range", strings::StrCat("bytes */", file_size)));
     TF_RETURN_IF_ERROR(request->SetPutEmptyBody());
@@ -531,6 +553,9 @@ class GcsWritableFile : public WritableFile {
           "Content-Range", strings::StrCat("bytes ", start_offset, "-",
                                            file_size - 1, "/", file_size)));
     }
+    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_->connect, timeouts_->idle,
+                                            timeouts_->write));
+
     TF_RETURN_IF_ERROR(
         request->SetPutFromFile(tmp_content_filename_, start_offset));
     TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when uploading ",
@@ -550,6 +575,7 @@ class GcsWritableFile : public WritableFile {
   string tmp_content_filename_;
   std::ofstream outfile_;
   HttpRequest::Factory* http_request_factory_;
+  GcsFileSystem::TimeoutConfig* timeouts_;
   std::function<void()> file_cache_erase_;
   bool sync_needed_;  // whether there is buffered data that needs to be synced
   int64 initial_retry_delay_usec_;
@@ -635,6 +661,25 @@ GcsFileSystem::GcsFileSystem()
                 &resolve_frequency_secs)) {
     dns_cache_.reset(new GcsDnsCache(resolve_frequency_secs));
   }
+  // Apply the overrides for request timeouts
+  uint32 timeout_value;
+  if (GetEnvVar(kRequestConnectionTimeout, strings::safe_strtou32,
+                &timeout_value)) {
+    timeouts_.connect = timeout_value;
+  }
+  if (GetEnvVar(kRequestIdleTimeout, strings::safe_strtou32, &timeout_value)) {
+    timeouts_.idle = timeout_value;
+  }
+  if (GetEnvVar(kMetadataRequestTimeout, strings::safe_strtou32,
+                &timeout_value)) {
+    timeouts_.metadata = timeout_value;
+  }
+  if (GetEnvVar(kReadRequestTimeout, strings::safe_strtou32, &timeout_value)) {
+    timeouts_.read = timeout_value;
+  }
+  if (GetEnvVar(kWriteRequestTimeout, strings::safe_strtou32, &timeout_value)) {
+    timeouts_.write = timeout_value;
+  }
 }
 
 GcsFileSystem::GcsFileSystem(
@@ -643,7 +688,8 @@ GcsFileSystem::GcsFileSystem(
     size_t block_size, size_t max_bytes, uint64 max_staleness,
     uint64 stat_cache_max_age, size_t stat_cache_max_entries,
     uint64 matching_paths_cache_max_age,
-    size_t matching_paths_cache_max_entries, int64 initial_retry_delay_usec)
+    size_t matching_paths_cache_max_entries, int64 initial_retry_delay_usec,
+    TimeoutConfig timeouts)
     : auth_provider_(std::move(auth_provider)),
       http_request_factory_(std::move(http_request_factory)),
       file_block_cache_(
@@ -651,6 +697,7 @@ GcsFileSystem::GcsFileSystem(
       stat_cache_(new StatCache(stat_cache_max_age, stat_cache_max_entries)),
       matching_paths_cache_(new MatchingPathsCache(
           matching_paths_cache_max_age, matching_paths_cache_max_entries)),
+      timeouts_(timeouts),
       initial_retry_delay_usec_(initial_retry_delay_usec) {}
 
 Status GcsFileSystem::NewRandomAccessFile(
@@ -689,6 +736,8 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset,
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
   TF_RETURN_IF_ERROR(request->SetRange(offset, offset + n - 1));
   TF_RETURN_IF_ERROR(request->SetResultBuffer(out));
+  TF_RETURN_IF_ERROR(
+      request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.read));
 
   if (dns_cache_) {
     TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
@@ -723,7 +772,7 @@ Status GcsFileSystem::NewWritableFile(const string& fname,
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
       bucket, object, auth_provider_.get(), http_request_factory_.get(),
-      [this, fname]() { file_block_cache_->RemoveFile(fname); },
+      &timeouts_, [this, fname]() { file_block_cache_->RemoveFile(fname); },
       initial_retry_delay_usec_));
   return Status::OK();
 }
@@ -764,7 +813,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname,
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
       bucket, object, auth_provider_.get(), old_content_filename,
-      http_request_factory_.get(),
+      http_request_factory_.get(), &timeouts_,
       [this, fname]() { file_block_cache_->RemoveFile(fname); },
       initial_retry_delay_usec_));
   return Status::OK();
@@ -852,6 +901,8 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket,
             "?fields=size%2Cupdated")));
         TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
         TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+        TF_RETURN_IF_ERROR(request->SetTimeouts(
+            timeouts_.connect, timeouts_.idle, timeouts_.metadata));
 
         if (dns_cache_) {
           TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
@@ -902,6 +953,8 @@ Status GcsFileSystem::BucketExists(const string& bucket, bool* result) {
   TF_RETURN_IF_ERROR(
       request->SetUri(strings::StrCat(kGcsUriBase, "b/", bucket)));
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
+  TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
+                                          timeouts_.metadata));
   const Status status = request->Send();
   switch (status.code()) {
     case errors::Code::OK:
@@ -1033,6 +1086,8 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
     TF_RETURN_IF_ERROR(request->SetUri(uri));
     TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
     TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+    TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
+                                            timeouts_.metadata));
 
     if (dns_cache_) {
       TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
@@ -1157,6 +1212,8 @@ Status GcsFileSystem::DeleteFile(const string& fname) {
   TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
       kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object))));
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
+  TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
+                                          timeouts_.metadata));
   TF_RETURN_IF_ERROR(request->SetDeleteRequest());
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when deleting ", fname);
   file_block_cache_->RemoveFile(fname);
@@ -1251,6 +1308,8 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) {
       request->EscapeString(target_object))));
   TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
   TF_RETURN_IF_ERROR(request->SetPostEmptyBody());
+  TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
+                                          timeouts_.metadata));
   std::vector<char> output_buffer;
   TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when renaming ", src,
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h
index 4b4853c838..7cfcebd5c9 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.h
+++ b/tensorflow/core/platform/cloud/gcs_file_system.h
@@ -35,6 +35,8 @@ namespace tensorflow {
 /// which adds retry logic to GCS operations.
 class GcsFileSystem : public FileSystem {
  public:
+  struct TimeoutConfig;
+
   GcsFileSystem();
   GcsFileSystem(std::unique_ptr<AuthProvider> auth_provider,
                 std::unique_ptr<HttpRequest::Factory> http_request_factory,
@@ -42,7 +44,7 @@ class GcsFileSystem : public FileSystem {
                 uint64 stat_cache_max_age, size_t stat_cache_max_entries,
                 uint64 matching_paths_cache_max_age,
                 size_t matching_paths_cache_max_entries,
-                int64 initial_retry_delay_usec);
+                int64 initial_retry_delay_usec, TimeoutConfig timeouts);
 
   Status NewRandomAccessFile(
       const string& filename,
@@ -87,6 +89,7 @@ class GcsFileSystem : public FileSystem {
   size_t block_size() const { return file_block_cache_->block_size(); }
   size_t max_bytes() const { return file_block_cache_->max_bytes(); }
   uint64 max_staleness() const { return file_block_cache_->max_staleness(); }
+  TimeoutConfig timeouts() const { return timeouts_; }
 
   uint64 stat_cache_max_age() const { return stat_cache_->max_age(); }
   size_t stat_cache_max_entries() const { return stat_cache_->max_entries(); }
@@ -98,6 +101,41 @@ class GcsFileSystem : public FileSystem {
     return matching_paths_cache_->max_entries();
   }
 
+  /// Structure containing the information for timeouts related to accessing the
+  /// GCS APIs.
+  ///
+  /// All values are in seconds.
+  struct TimeoutConfig {
+    // The request connection timeout. If a connection cannot be established
+    // within `connect` seconds, abort the request.
+    uint32 connect = 120;  // 2 minutes
+
+    // The request idle timeout. If a request has seen no activity in `idle`
+    // seconds, abort the request.
+    uint32 idle = 60;  // 1 minute
+
+    // The maximum total time a metadata request can take. If a request has not
+    // completed within `metadata` seconds, the request is aborted.
+    uint32 metadata = 3600;  // 1 hour
+
+    // The maximum total time a block read request can take. If a request has
+    // not completed within `read` seconds, the request is aborted.
+    uint32 read = 3600;  // 1 hour
+
+    // The maximum total time an upload request can take. If a request has not
+    // completed within `write` seconds, the request is aborted.
+    uint32 write = 3600;  // 1 hour
+
+    TimeoutConfig() {}
+    TimeoutConfig(uint32 connect, uint32 idle, uint32 metadata, uint32 read,
+                  uint32 write)
+        : connect(connect),
+          idle(idle),
+          metadata(metadata),
+          read(read),
+          write(write) {}
+  };
+
  private:
   /// \brief Checks if the bucket exists. Returns OK if the check succeeded.
   ///
@@ -150,6 +188,8 @@ class GcsFileSystem : public FileSystem {
   using MatchingPathsCache = ExpiringLRUCache<std::vector<string>>;
   std::unique_ptr<MatchingPathsCache> matching_paths_cache_;
 
+  TimeoutConfig timeouts_;
+
   /// The initial delay for exponential backoffs when retrying failed calls.
   const int64 initial_retry_delay_usec_ = 1000000L;
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 7614ec4d7f..385e0fda02 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -22,6 +22,8 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+static GcsFileSystem::TimeoutConfig kTestTimeoutConfig(5, 1, 10, 20, 30);
+
 class FakeAuthProvider : public AuthProvider {
  public:
   Status GetToken(string* token) override {
@@ -35,12 +37,14 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-5\n",
+           "Range: 0-5\n"
+           "Timeouts: 5 1 20\n",
            "012345"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 6-11\n",
+           "Range: 6-11\n"
+           "Timeouts: 5 1 20\n",
            "6789")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -49,7 +53,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -73,12 +77,14 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_differentN) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-2\n",
+           "Range: 0-2\n"
+           "Timeouts: 5 1 20\n",
            "012"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 3-12\n",
+           "Range: 3-12\n"
+           "Timeouts: 5 1 20\n",
            "3456789")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -87,7 +93,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_differentN) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -116,26 +122,30 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-8\n",
+           "Range: 0-8\n"
+           "Timeouts: 5 1 20\n",
            "012345678"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 9-17\n",
+           "Range: 9-17\n"
+           "Timeouts: 5 1 20\n",
            "9abcde"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 18-26\n",
+           "Range: 18-26\n"
+           "Timeouts: 5 1 20\n",
            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      9 /* block size */, 18 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   9 /* block size */, 18 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   char scratch[100];
   StringPiece result;
@@ -191,20 +201,23 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest("Uri: https://storage.googleapis.com/bucket/object\n"
                            "Auth Token: fake_token\n"
-                           "Range: 0-7\n",
+                           "Range: 0-7\n"
+                           "Timeouts: 5 1 20\n",
                            "01234567"),
        new FakeHttpRequest("Uri: https://storage.googleapis.com/bucket/object\n"
                            "Auth Token: fake_token\n"
-                           "Range: 8-15\n",
+                           "Range: 8-15\n"
+                           "Timeouts: 5 1 20\n",
                            "89abcdef")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      8 /* block size */, 16 /* max bytes */, 3600 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   8 /* block size */, 16 /* max bytes */,
+                   3600 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
   char scratch[100];
   StringPiece result;
   // There should only be two HTTP requests issued to GCS even though we iterate
@@ -238,14 +251,15 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
 
 TEST(GcsFileSystemTest, NewRandomAccessFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      0 /* read ahead bytes */, 0 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */, 0 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<RandomAccessFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -257,24 +271,28 @@ TEST(GcsFileSystemTest, NewWritableFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fwriteable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -283,7 +301,7 @@ TEST(GcsFileSystemTest, NewWritableFile) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   // Read from the file first, to fill the block cache.
   std::unique_ptr<RandomAccessFile> rfile;
@@ -315,15 +333,18 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
                            "", errors::FailedPrecondition("308"), nullptr,
@@ -331,10 +352,12 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 11-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: ntent2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
                            "", errors::FailedPrecondition("308"), nullptr,
@@ -342,6 +365,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 13-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: ent2\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -351,7 +375,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -369,38 +393,44 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceedsOnGetStatus) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fwriteable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
                            "", Status::OK(), nullptr, {}, 201),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      8 /* block size */, 8 /* max bytes */, 3600 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   8 /* block size */, 8 /* max bytes */,
+                   3600 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
   // Pull the file's first block into the cache. This will trigger the first
   // HTTP request to GCS.
   std::unique_ptr<RandomAccessFile> rfile;
@@ -434,17 +464,20 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503)});
   for (int i = 0; i < 10; i++) {
     requests.emplace_back(new FakeHttpRequest(
         "Uri: https://custom/upload/location\n"
         "Auth Token: fake_token\n"
+        "Timeouts: 5 1 10\n"
         "Header Content-Range: bytes */17\n"
         "Put: yes\n",
         "", errors::FailedPrecondition("important HTTP error 308"), nullptr,
@@ -453,6 +486,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
         "Uri: https://custom/upload/location\n"
         "Auth Token: fake_token\n"
         "Header Content-Range: bytes 11-16/17\n"
+        "Timeouts: 5 1 30\n"
         "Put body: ntent2\n",
         "", errors::Unavailable("important HTTP error 503"), 503));
   }
@@ -463,12 +497,14 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
       "uploadType=resumable&name=path%2Fwriteable.txt\n"
       "Auth Token: fake_token\n"
       "Header X-Upload-Content-Length: 17\n"
-      "Post: yes\n",
+      "Post: yes\n"
+      "Timeouts: 5 1 10\n",
       "", {{"Location", "https://custom/upload/location"}}));
   requests.emplace_back(
       new FakeHttpRequest("Uri: https://custom/upload/location\n"
                           "Auth Token: fake_token\n"
                           "Header Content-Range: bytes 0-16/17\n"
+                          "Timeouts: 5 1 30\n"
                           "Put body: content1,content2\n",
                           ""));
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -478,7 +514,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   2 /* initial retry delay */);
+                   2 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -500,11 +536,13 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::NotFound("important HTTP error 410"),
                            410),
@@ -515,11 +553,13 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -529,7 +569,7 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -558,7 +598,7 @@ TEST(GcsFileSystemTest, NewWritableFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -570,33 +610,38 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fappendable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-31\n",
+           "Range: 0-31\n"
+           "Timeouts: 5 1 20\n",
            "content1,"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fappendable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fappendable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-31\n",
+           "Range: 0-31\n"
+           "Timeouts: 5 1 20\n",
            "01234567")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      32 /* block size */, 32 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   32 /* block size */, 32 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   // Create an appendable file. This should read the file from GCS, and pull its
   // contents into the block cache.
@@ -629,7 +674,7 @@ TEST(GcsFileSystemTest, NewAppendableFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -642,7 +687,8 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Frandom_access.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"", content.size(),
                            "\", \"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
@@ -650,7 +696,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
                            "path%2Frandom_access.txt\n"
                            "Auth Token: fake_token\n"
                            "Range: 0-",
-                           content.size() - 1, "\n"),
+                           content.size() - 1, "\n", "Timeouts: 5 1 20\n"),
            content)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -659,7 +705,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile(
@@ -678,7 +724,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -689,7 +735,8 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "path%2Ffile1.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -699,7 +746,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt"));
 }
@@ -709,13 +756,15 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsubfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subfolder/\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -725,7 +774,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder"));
 }
@@ -734,11 +783,13 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"size\": \"100\"}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"size\": \"100\"}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -747,7 +798,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket1"));
   TF_EXPECT_OK(fs.FileExists("gs://bucket1/"));
@@ -758,13 +809,15 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile1.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Ffile1.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": []}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -773,7 +826,7 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   EXPECT_EQ(errors::Code::NOT_FOUND,
             fs.FileExists("gs://bucket/path/file1.txt").code());
@@ -783,11 +836,13 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket2\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket2\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -796,7 +851,7 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.FileExists("gs://bucket2/").code());
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -808,29 +863,33 @@ TEST(GcsFileSystemTest, FileExists_StatCache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile1.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsubfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subfolder/\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
-      3600 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   // The stat cache will ensure that repeated lookups don't trigger additional
   // HTTP requests.
@@ -845,7 +904,8 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"prefixes\": [\"path/subpath/\"]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -854,7 +914,7 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -867,7 +927,8 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -879,7 +940,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -893,7 +954,8 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -905,7 +967,7 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -918,7 +980,8 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -930,7 +993,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -943,7 +1006,8 @@ TEST(GcsFileSystemTest, GetChildren_Root) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket-a-b-c/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -952,7 +1016,7 @@ TEST(GcsFileSystemTest, GetChildren_Root) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket-a-b-c", &children));
@@ -965,7 +1029,8 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -974,7 +1039,7 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -988,7 +1053,8 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&"
            "prefix=path%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"nextPageToken\": \"ABCD==\", "
            "\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
@@ -999,7 +1065,8 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
            "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&"
            "prefix=path%2F"
            "&pageToken=ABCD==\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file4.txt\" },"
            "  { \"name\": \"path/file5.txt\" }]}")});
@@ -1011,7 +1078,7 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -1025,7 +1092,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/subpath/file2.txt\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1035,7 +1103,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(
@@ -1048,7 +1116,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1060,7 +1129,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", &result));
@@ -1074,7 +1143,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1086,7 +1156,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file2.txt", &result));
@@ -1098,7 +1168,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
@@ -1109,7 +1180,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*", &result));
@@ -1120,7 +1191,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1132,7 +1204,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file3.txt", &result));
@@ -1148,7 +1220,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_OnlyWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1160,13 +1232,15 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subpath/file2.txt\" }]}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
            "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1178,7 +1252,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    3600 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   // Repeated calls to fs.GetMatchingPaths on these patterns should not lead to
   // any additional HTTP requests to GCS.
@@ -1201,26 +1275,30 @@ TEST(GcsFileSystemTest, DeleteFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "76543210")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      16 /* block size */, 16 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   16 /* block size */, 16 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   // Do an initial read of the file to load its contents into the block cache.
   char scratch[100];
@@ -1246,7 +1324,7 @@ TEST(GcsFileSystemTest, DeleteFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.DeleteFile("gs://bucket/").code());
@@ -1256,7 +1334,8 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1265,7 +1344,7 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1275,12 +1354,14 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/\" }]}"),
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2F\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1290,7 +1371,7 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1298,7 +1379,8 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
 TEST(GcsFileSystemTest, DeleteDir_BucketOnly) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?fields=items%2F"
-      "name%2CnextPageToken&maxResults=2\nAuth Token: fake_token\n",
+      "name%2CnextPageToken&maxResults=2\nAuth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1307,7 +1389,7 @@ TEST(GcsFileSystemTest, DeleteDir_BucketOnly) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket"));
 }
@@ -1316,7 +1398,8 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1326,7 +1409,7 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.DeleteDir("gs://bucket/path/").code());
@@ -1336,7 +1419,8 @@ TEST(GcsFileSystemTest, GetFileSize) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "file.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1346,7 +1430,7 @@ TEST(GcsFileSystemTest, GetFileSize) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   uint64 size;
   TF_EXPECT_OK(fs.GetFileSize("gs://bucket/file.txt", &size));
@@ -1362,7 +1446,7 @@ TEST(GcsFileSystemTest, GetFileSize_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   uint64 size;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1376,14 +1460,16 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path1%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path1/subfolder/file1.txt\" }]}"),
        // Requesting the full list of files in the folder.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path1%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path1/\" },"  // A directory marker.
            "  { \"name\": \"path1/subfolder/file1.txt\" },"
@@ -1393,13 +1479,15 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2F/rewriteTo/b/bucket/o/path2%2F\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the original directory marker.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2F\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        // Copying the first file.
@@ -1408,13 +1496,15 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "path1%2Fsubfolder%2Ffile1.txt/rewriteTo/b/bucket/o/"
            "path2%2Fsubfolder%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the first original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Fsubfolder%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        // Copying the second file.
@@ -1422,13 +1512,15 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Ffile2.txt/rewriteTo/b/bucket/o/path2%2Ffile2.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the second original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Ffile2.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1438,7 +1530,7 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.RenameFile("gs://bucket/path1", "gs://bucket/path2/"));
 }
@@ -1448,25 +1540,29 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "76543210"),
        // IsDirectory is checking whether there are children objects.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1474,33 +1570,38 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "89abcdef"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "fedcba98")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      16 /* block size */, 64 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   16 /* block size */, 64 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
   // Do an initial read of the source and destination files to load their
   // contents into the block cache.
   char scratch[100];
@@ -1531,13 +1632,15 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1545,13 +1648,15 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the original file - the deletion returns a failure.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "", errors::Unavailable("503"), 503),
        // Deleting the original file again - the deletion returns NOT_FOUND.
@@ -1559,6 +1664,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1568,7 +1674,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(
       fs.RenameFile("gs://bucket/path/src.txt", "gs://bucket/path/dst.txt"));
@@ -1582,13 +1688,15 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1596,7 +1704,8 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": false}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1605,7 +1714,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(
       errors::Code::UNIMPLEMENTED,
@@ -1617,7 +1726,8 @@ TEST(GcsFileSystemTest, Stat_Object) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "file.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1627,7 +1737,7 @@ TEST(GcsFileSystemTest, Stat_Object) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat));
@@ -1641,13 +1751,15 @@ TEST(GcsFileSystemTest, Stat_Folder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "subfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"subfolder/\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1657,7 +1769,7 @@ TEST(GcsFileSystemTest, Stat_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder", &stat));
@@ -1671,13 +1783,15 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1686,7 +1800,7 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/path", &stat).code());
@@ -1695,7 +1809,8 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
 TEST(GcsFileSystemTest, Stat_Bucket) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1704,7 +1819,7 @@ TEST(GcsFileSystemTest, Stat_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/", &stat));
@@ -1716,7 +1831,8 @@ TEST(GcsFileSystemTest, Stat_Bucket) {
 TEST(GcsFileSystemTest, Stat_BucketNotFound) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1725,7 +1841,7 @@ TEST(GcsFileSystemTest, Stat_BucketNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/", &stat).code());
@@ -1736,29 +1852,33 @@ TEST(GcsFileSystemTest, Stat_Cache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "subfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"subfolder/\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
-      3600 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   // Repeated calls to fs.Stat on these paths should not lead to any additional
   // HTTP requests to GCS.
@@ -1781,12 +1901,14 @@ TEST(GcsFileSystemTest, IsDirectory_NotFound) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=file.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1795,7 +1917,7 @@ TEST(GcsFileSystemTest, IsDirectory_NotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::NOT_FOUND,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -1807,12 +1929,14 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=file.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1822,7 +1946,7 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -1834,13 +1958,15 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [{\"name\": \"subfolder/\"}]}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [{\"name\": \"subfolder/\"}]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1849,7 +1975,7 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder/"));
@@ -1859,11 +1985,13 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1872,7 +2000,7 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/"));
@@ -1881,7 +2009,8 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
 TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1890,7 +2019,7 @@ TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::NOT_FOUND, fs.IsDirectory("gs://bucket/").code());
 }
@@ -1902,10 +2031,12 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
            "uploadType=resumable&name=subpath%2F\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 0\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: \n",
                            ""),
        new FakeHttpRequest(
@@ -1913,10 +2044,12 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
            "uploadType=resumable&name=subpath%2F\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 0\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: \n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1926,7 +2059,7 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath/"));
@@ -1936,11 +2069,13 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            ""),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1949,7 +2084,7 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket"));
@@ -1962,14 +2097,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" }]}"),
        // GetChildren recursively.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/\" },"  // The current directory's marker.
            "  { \"name\": \"path/file1.txt\" },"
@@ -1979,30 +2116,35 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2F\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object - fails and will be retried.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::Unavailable("500"), 500),
        // Delete the object again.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2Ffile2.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile3.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -2012,7 +2154,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -2028,14 +2170,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" }]}"),
        // Calling GetChildren recursively.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
            "  { \"name\": \"path/subpath/\" },"
@@ -2045,12 +2189,14 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Deleting the directory marker gs://bucket/path/ - fails with 404.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2F\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::NotFound("404"), 404),
        // Checking if gs://bucket/path/subpath/ is a folder - it is.
@@ -2058,19 +2204,22 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"items\": [ "
                            "    { \"name\": \"path/subpath/\" }]}")),
        // Deleting the object gs://bucket/path/subpath/file2.txt
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2Ffile2.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Deleting the object s://bucket/path/file3.txt - fails with 404.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile3.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::NotFound("404"), 404),
        // Checking if gs://bucket/path/file3.txt/ is a folder - it's not.
@@ -2078,13 +2227,15 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Ffile3.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // Checking if gs://bucket/path/file3.txt is an object - fails with 404.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile3.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
 
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -2094,7 +2245,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -2110,13 +2261,15 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -2125,7 +2278,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   int64 undeleted_files, undeleted_dirs;
   EXPECT_EQ(error::Code::NOT_FOUND,
@@ -2142,6 +2295,11 @@ TEST(GcsFileSystemTest, OverrideCacheParameters) {
   EXPECT_EQ(128 * 1024 * 1024, fs1.block_size());
   EXPECT_EQ(2 * fs1.block_size(), fs1.max_bytes());
   EXPECT_EQ(0, fs1.max_staleness());
+  EXPECT_EQ(120, fs1.timeouts().connect);
+  EXPECT_EQ(60, fs1.timeouts().idle);
+  EXPECT_EQ(3600, fs1.timeouts().metadata);
+  EXPECT_EQ(3600, fs1.timeouts().read);
+  EXPECT_EQ(3600, fs1.timeouts().write);
 
   // Verify legacy readahead buffer override sets block size.
   setenv("GCS_READAHEAD_BUFFER_SIZE_BYTES", "123456789", 1);
@@ -2167,6 +2325,19 @@ TEST(GcsFileSystemTest, OverrideCacheParameters) {
   EXPECT_EQ(32, fs4.stat_cache_max_entries());
   EXPECT_EQ(30, fs4.matching_paths_cache_max_age());
   EXPECT_EQ(64, fs4.matching_paths_cache_max_entries());
+
+  // Verify timeout overrides.
+  setenv("GCS_REQUEST_CONNECTION_TIMEOUT_SECS", "10", 1);
+  setenv("GCS_REQUEST_IDLE_TIMEOUT_SECS", "5", 1);
+  setenv("GCS_METADATA_REQUEST_TIMEOUT_SECS", "20", 1);
+  setenv("GCS_READ_REQUEST_TIMEOUT_SECS", "30", 1);
+  setenv("GCS_WRITE_REQUEST_TIMEOUT_SECS", "40", 1);
+  GcsFileSystem fs5;
+  EXPECT_EQ(10, fs5.timeouts().connect);
+  EXPECT_EQ(5, fs5.timeouts().idle);
+  EXPECT_EQ(20, fs5.timeouts().metadata);
+  EXPECT_EQ(30, fs5.timeouts().read);
+  EXPECT_EQ(40, fs5.timeouts().write);
 }
 
 }  // namespace
diff --git a/tensorflow/core/platform/cloud/http_request.h b/tensorflow/core/platform/cloud/http_request.h
index 02d9e9054a..95a436c622 100644
--- a/tensorflow/core/platform/cloud/http_request.h
+++ b/tensorflow/core/platform/cloud/http_request.h
@@ -118,6 +118,16 @@ class HttpRequest {
   // Url encodes str and returns a new string.
   virtual string EscapeString(const string& str) = 0;
 
+  /// \brief Set timeouts for this request.
+  ///
+  /// The connection parameter controls how long we should wait for the
+  /// connection to be established. The inactivity parameter controls how long
+  /// we should wait between additional responses from the server. Finally the
+  /// total parameter controls the maximum total connection time to prevent
+  /// hanging indefinitely.
+  virtual Status SetTimeouts(uint32 connection, uint32 inactivity,
+                             uint32 total) = 0;
+
   TF_DISALLOW_COPY_AND_ASSIGN(HttpRequest);
 };
 
diff --git a/tensorflow/core/platform/cloud/http_request_fake.h b/tensorflow/core/platform/cloud/http_request_fake.h
index bfe04f6363..8161ee6e25 100644
--- a/tensorflow/core/platform/cloud/http_request_fake.h
+++ b/tensorflow/core/platform/cloud/http_request_fake.h
@@ -37,7 +37,8 @@ class FakeHttpRequest : public CurlHttpRequest {
  public:
   /// Return the response for the given request.
   FakeHttpRequest(const string& request, const string& response)
-      : FakeHttpRequest(request, response, Status::OK(), nullptr, {}, 200) {}
+      : FakeHttpRequest(request, response, Status::OK(), nullptr, {}, 200) {
+  }
 
   /// Return the response with headers for the given request.
   FakeHttpRequest(const string& request, const string& response,
@@ -160,6 +161,13 @@ class FakeHttpRequest : public CurlHttpRequest {
 
   virtual uint64 GetResponseCode() const override { return response_code_; }
 
+  Status SetTimeouts(uint32 connection, uint32 inactivity,
+                     uint32 total) override {
+    actual_request_ += strings::StrCat("Timeouts: ", connection, " ",
+                                       inactivity, " ", total, "\n");
+    return Status::OK();
+  }
+
  private:
   std::vector<char>* buffer_ = nullptr;
   string expected_request_;
-- 
GitLab


From 9810da1b87aae689cac42bae754e7e4cb5a99d57 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 13:03:52 -0800
Subject: [PATCH 0929/1225] Adds XLA support for tf.nn.dynamic_rnn

Changes tf.nn.dynamic_rnn to specify `maximum_iterations` argument for the while_loop.

When `maximum_iterations` argument is supplied to tf.while_loop, use this to provide an upper bound on the size of Stacks used for gradient computation.
By specifying the stack limit we can generate gradient code for while loops that uses fixed shape TensorArrays and hence can be compiled with XLA.

PiperOrigin-RevId: 178802710
---
 tensorflow/core/protobuf/control_flow.proto   |  5 ++
 tensorflow/python/ops/control_flow_ops.py     | 78 ++++++++++++++-----
 .../python/ops/control_flow_ops_test.py       | 21 +++--
 tensorflow/python/ops/rnn.py                  | 10 ++-
 .../python/profiler/model_analyzer_test.py    |  4 +-
 5 files changed, 88 insertions(+), 30 deletions(-)

diff --git a/tensorflow/core/protobuf/control_flow.proto b/tensorflow/core/protobuf/control_flow.proto
index 48f5032254..2c9476a08a 100644
--- a/tensorflow/core/protobuf/control_flow.proto
+++ b/tensorflow/core/protobuf/control_flow.proto
@@ -66,4 +66,9 @@ message WhileContextDef {
 
   // Values and external values in control flow context.
   ValuesDef values_def = 9;
+
+  // Optional name of the maximum_iterations tensor.
+  string maximum_iterations_name = 11;
+
+  // Next available id: 12.
 }
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 8e8e7d4f8c..6e97fe00bd 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -748,22 +748,26 @@ class GradLoopState(object):
 
       outer_grad_ctxt = outer_grad_state.grad_context
       outer_grad_ctxt.Enter()
-      self._grad_context = WhileContext(forward_ctxt.parallel_iterations,
-                                        forward_ctxt.back_prop,
-                                        forward_ctxt.swap_memory,
-                                        forward_ctxt.name,
-                                        self)
+      self._grad_context = WhileContext(
+          maximum_iterations=forward_ctxt.maximum_iterations,
+          parallel_iterations=forward_ctxt.parallel_iterations,
+          back_prop=forward_ctxt.back_prop,
+          swap_memory=forward_ctxt.swap_memory,
+          name=forward_ctxt.name,
+          grad_state=self)
       real_cnt = outer_grad_state.AddBackpropAccumulatedValue(history_cnt, cnt)
       self._grad_index = self._grad_context.AddBackpropLoopCounter(
           real_cnt, outer_grad_state)
       outer_grad_ctxt.Exit()
     else:
       if outer_forward_ctxt: outer_forward_ctxt.Enter()
-      self._grad_context = WhileContext(forward_ctxt.parallel_iterations,
-                                        forward_ctxt.back_prop,
-                                        forward_ctxt.swap_memory,
-                                        forward_ctxt.name,
-                                        self)
+      self._grad_context = WhileContext(
+          maximum_iterations=forward_ctxt.maximum_iterations,
+          parallel_iterations=forward_ctxt.parallel_iterations,
+          back_prop=forward_ctxt.back_prop,
+          swap_memory=forward_ctxt.swap_memory,
+          name=forward_ctxt.name,
+          grad_state=self)
       self._grad_index = self._grad_context.AddBackpropLoopCounter(
           cnt, outer_grad_state)
       if outer_forward_ctxt: outer_forward_ctxt.Exit()
@@ -893,9 +897,14 @@ class GradLoopState(object):
     with ops.control_dependencies(None):
       if curr_ctxt: curr_ctxt.Enter()
       with ops.colocate_with(value):
+        maximum_iterations = self.forward_context.maximum_iterations
+        if maximum_iterations is None:
+          maximum_iterations = constant_op.constant(-1, dtypes.int32)
         # pylint: disable=protected-access
-        acc = gen_data_flow_ops._stack_v2(-1, value.dtype.base_dtype,
-                                          name="f_acc")
+        acc = gen_data_flow_ops._stack_v2(
+            max_size=maximum_iterations,
+            elem_type=value.dtype.base_dtype,
+            name="f_acc")
         # pylint: enable=protected-access
       if curr_ctxt: curr_ctxt.Exit()
 
@@ -1767,6 +1776,7 @@ def _UnpackIfSingleton(res):
     return res
 
 
+# pylint: disable=redefined-outer-name
 # pylint: disable=g-doc-args
 @deprecation.deprecated_args(
     None,
@@ -1943,6 +1953,7 @@ def cond(pred, true_fn=None, false_fn=None, strict=False, name=None,
       merges = _UnpackIfSingleton(merges)
     return merges
 # pylint: enable=g-doc-args
+# pylint: enable=redefined-outer-name
 
 
 def _resource_safe_shape(t):
@@ -1960,12 +1971,19 @@ def _resource_safe_shape(t):
 class WhileContext(ControlFlowContext):
   """The context for the loop construct."""
 
-  def __init__(self, parallel_iterations=10, back_prop=True, swap_memory=False,
-               name="while_context", grad_state=None, context_def=None,
+  def __init__(self,
+               maximum_iterations=None,
+               parallel_iterations=10,
+               back_prop=True,
+               swap_memory=False,
+               name="while_context",
+               grad_state=None,
+               context_def=None,
                import_scope=None):
     """"Creates a `WhileContext`.
 
     Args:
+      maximum_iterations: Optional upper bound on number of loop iterations.
       parallel_iterations: The number of iterations allowed to run in parallel.
       back_prop: Whether backprop is enabled for this while loop.
       swap_memory: Whether GPU-CPU memory swap is enabled for this loop.
@@ -1980,16 +1998,17 @@ class WhileContext(ControlFlowContext):
       self._init_from_proto(context_def, import_scope=import_scope)
     else:
       ControlFlowContext.__init__(self)
-      self._init_from_args(parallel_iterations, back_prop, swap_memory,
-                           name)
+      self._init_from_args(maximum_iterations, parallel_iterations, back_prop,
+                           swap_memory, name)
     # The gradient loop state.
     self._grad_state = grad_state
 
-  def _init_from_args(self, parallel_iterations, back_prop, swap_memory,
-                      name):
+  def _init_from_args(self, maximum_iterations, parallel_iterations, back_prop,
+                      swap_memory, name):
     """Creates a new `WhileContext` from arguments.
 
     Args:
+      maximum_iterations: Optional upper bound on number of loop iterations.
       parallel_iterations: The number of iterations allowed to run in parallel.
       back_prop: Whether backprop is enabled for this while loop.
       swap_memory: Whether GPU-CPU memory swap is enabled for this loop.
@@ -2002,6 +2021,7 @@ class WhileContext(ControlFlowContext):
       raise ValueError("`parallel_iterations` must be a positive integer: "
                        "%s" % parallel_iterations)
     self._name = ops.get_default_graph().unique_name(name)
+    self._maximum_iterations = maximum_iterations
     self._parallel_iterations = parallel_iterations
     self._back_prop = back_prop
     self._swap_memory = swap_memory
@@ -2029,6 +2049,12 @@ class WhileContext(ControlFlowContext):
     g = ops.get_default_graph()
     self._name = ops.prepend_name_scope(
         context_def.context_name, import_scope)
+    if context_def.maximum_iterations_name:
+      self._maximum_iterations = g.as_graph_element(
+          ops.prepend_name_scope(context_def.maximum_iterations_name,
+                                 import_scope))
+    else:
+      self._maximum_iterations = None
     self._parallel_iterations = context_def.parallel_iterations
     self._back_prop = context_def.back_prop
     self._swap_memory = context_def.swap_memory
@@ -2056,6 +2082,11 @@ class WhileContext(ControlFlowContext):
   def name(self):
     return self._name
 
+  @property
+  def maximum_iterations(self):
+    """The maximum number of iterations that will be executed."""
+    return self._maximum_iterations
+
   @property
   def parallel_iterations(self):
     """The number of iterations allowed to run in parallel."""
@@ -2106,6 +2137,9 @@ class WhileContext(ControlFlowContext):
       context_def.context_name = ops.strip_name_scope(
           self.name, export_scope)
       context_def.parallel_iterations = self._parallel_iterations
+      if self._maximum_iterations is not None:
+        context_def.maximum_iterations_name = ops.strip_name_scope(
+            self._maximum_iterations.name, export_scope)
       context_def.back_prop = self._back_prop
       context_def.swap_memory = self._swap_memory
       context_def.pivot_for_pred_name = ops.strip_name_scope(
@@ -2724,6 +2758,7 @@ class WhileContext(ControlFlowContext):
     return True
 
 
+# pylint: disable=redefined-outer-name
 def while_loop(cond, body, loop_vars, shape_invariants=None,
                parallel_iterations=10, back_prop=True, swap_memory=False,
                name=None, maximum_iterations=None):
@@ -2889,13 +2924,18 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
         shape_invariants = (tensor_shape.TensorShape([]), shape_invariants)
       nest.assert_same_structure(loop_vars, shape_invariants)
 
-    loop_context = WhileContext(parallel_iterations, back_prop, swap_memory)  # pylint: disable=redefined-outer-name
+    loop_context = WhileContext(
+        maximum_iterations=maximum_iterations,
+        parallel_iterations=parallel_iterations,
+        back_prop=back_prop,
+        swap_memory=swap_memory)
     ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
     result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
     if maximum_iterations is not None:
       return result[1]
     else:
       return result
+# pylint: enable=redefined-outer-name
 
 
 def _AsTensorList(x, p):
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index cd3c02f562..a666fd33a2 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -452,18 +452,25 @@ class ContextTest(test_util.TensorFlowTestCase):
               c.to_proto(),
               control_flow_ops.CondContext.from_proto(c.to_proto()).to_proto())
 
-  def testWhileContext(self):
+  def _testWhileContextHelper(self, maximum_iterations=None):
     with self.test_session() as sess:
       i = constant_op.constant(0)
       c = lambda i: math_ops.less(i, 10)
       b = lambda i: math_ops.add(i, 1)
-      control_flow_ops.while_loop(c, b, [i])
+      control_flow_ops.while_loop(
+          c, b, [i], maximum_iterations=maximum_iterations)
       for op in sess.graph.get_operations():
-        c = op._get_control_flow_context()
-        if c:
-          self.assertProtoEquals(
-              c.to_proto(),
-              control_flow_ops.WhileContext.from_proto(c.to_proto()).to_proto())
+        context = op._get_control_flow_context()
+        if context:
+          self.assertProtoEquals(context.to_proto(),
+                                 control_flow_ops.WhileContext.from_proto(
+                                     context.to_proto()).to_proto())
+
+  def testWhileContext(self):
+    self._testWhileContextHelper()
+
+  def testWhileContextWithMaximumIterations(self):
+    self._testWhileContextHelper(maximum_iterations=10)
 
   def testControlContextImportScope(self):
     with self.test_session():
diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index e30b19842f..fa48297672 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -665,7 +665,7 @@ def _dynamic_rnn_loop(cell,
     final_outputs:
       A `Tensor` of shape `[time, batch_size, cell.output_size]`.  If
       `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape`
-      objects, then this returns a (possibly nsted) tuple of Tensors matching
+      objects, then this returns a (possibly nested) tuple of Tensors matching
       the corresponding shapes.
     final_state:
       A `Tensor`, or possibly nested tuple of Tensors, matching in length
@@ -806,11 +806,17 @@ def _dynamic_rnn_loop(cell,
 
     return (time + 1, output_ta_t, new_state)
 
+  # TODO(pbar) `loop_bound` can be reduced to `max_sequence_length` once
+  # TensorArray shape inference is working.  When sequence lengths are highly
+  # variable, this will reduce the performance overheads of padding to a fixed
+  # maximum length.
+  loop_bound = time_steps
   _, output_final_ta, final_state = control_flow_ops.while_loop(
-      cond=lambda time, *_: time < time_steps,
+      cond=lambda time, *_: time < loop_bound,
       body=_time_step,
       loop_vars=(time, output_ta, state),
       parallel_iterations=parallel_iterations,
+      maximum_iterations=time_steps,
       swap_memory=swap_memory)
 
   # Unpack final output if not using output tuples.
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 5d524c8c74..a379bd5236 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -230,12 +230,12 @@ class PrintModelAnalysisTest(test.TestCase):
         with gfile.Open(outfile, 'r') as f:
           lines = f.read().split('\n')
           result = '\n'.join([l[:min(len(l), 80)] for l in lines])
-          self.assertEqual(compat.as_bytes('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/168.85k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (0/1.80k params, 0/45.37k flops)\n    model_analyzer_testlib.py:40:BuildSmallModel (0/0 params, 0/0 flops)\n    model_analyzer_testlib.py:44:BuildSmallModel (0/4 params, 0/8 flops)\n    model_analyzer_testlib.py:48:BuildSmallModel (0/648 params, 0/1.30k flops)\n    model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/23.33k flops)\n    model_analyzer_testlib.py:53:BuildSmallModel (0/1.15k params, 0/2.30k flops)\n    model_analyzer_testlib.py:54:BuildSmallModel (0/0 params, 0/18.43k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (gradient) (0/0 params, 0/67.39k f\n    model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/46.66\n    model_analyzer_testlib.py:54:BuildSmallModel (gradient) (0/0 params, 0/20.74\n  model_analyzer_testlib.py:67:BuildFullModel (0/1.04k params, 0/18.57k flops)\n  model_analyzer_testlib.py:67:BuildFullModel (gradient) (0/0 params, 0/37.00k f\n  model_analyzer_testlib.py:69:BuildFullModel (0/0 params, 0/0 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (0/0 params, 0/258 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (gradient) (0/0 params, 0/129 flop\n  model_analyzer_testlib.py:72:BuildFullModel (0/0 params, 0/141 flops)\n'),
+          self.assertEqual(compat.as_bytes('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/168.86k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (0/1.80k params, 0/45.37k flops)\n    model_analyzer_testlib.py:40:BuildSmallModel (0/0 params, 0/0 flops)\n    model_analyzer_testlib.py:44:BuildSmallModel (0/4 params, 0/8 flops)\n    model_analyzer_testlib.py:48:BuildSmallModel (0/648 params, 0/1.30k flops)\n    model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/23.33k flops)\n    model_analyzer_testlib.py:53:BuildSmallModel (0/1.15k params, 0/2.30k flops)\n    model_analyzer_testlib.py:54:BuildSmallModel (0/0 params, 0/18.43k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (gradient) (0/0 params, 0/67.39k f\n    model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/46.66\n    model_analyzer_testlib.py:54:BuildSmallModel (gradient) (0/0 params, 0/20.74\n  model_analyzer_testlib.py:67:BuildFullModel (0/1.04k params, 0/18.58k flops)\n  model_analyzer_testlib.py:67:BuildFullModel (gradient) (0/0 params, 0/37.00k f\n  model_analyzer_testlib.py:69:BuildFullModel (0/0 params, 0/0 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (0/0 params, 0/258 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (gradient) (0/0 params, 0/129 flop\n  model_analyzer_testlib.py:72:BuildFullModel (0/0 params, 0/141 flops)\n'),
                            compat.as_bytes(result))
 
         self.assertLess(0, tfprof_node.total_exec_micros)
         self.assertEqual(2844, tfprof_node.total_parameters)
-        self.assertEqual(168854, tfprof_node.total_float_ops)
+        self.assertEqual(168863, tfprof_node.total_float_ops)
         self.assertEqual(8, len(tfprof_node.children))
         self.assertEqual('_TFProfRoot', tfprof_node.name)
         self.assertEqual(
-- 
GitLab


From 389ffa842b0c0e7344ac721bbee8639de96cc81e Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Tue, 12 Dec 2017 14:15:09 -0800
Subject: [PATCH 0930/1225] Added a debug mode to the model analyzer to make it
 easier to figure out why shapes are missing.

PiperOrigin-RevId: 178813305
---
 tensorflow/python/BUILD                       |  2 +-
 tensorflow/python/grappler/model_analyzer.cc  | 32 ++++++++++++++++---
 tensorflow/python/grappler/model_analyzer.h   |  4 +--
 tensorflow/python/grappler/model_analyzer.i   |  6 ++--
 tensorflow/python/grappler/model_analyzer.py  |  6 ++--
 .../python/grappler/model_analyzer_test.py    | 18 +++++++++++
 6 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 20944d1678..4012197bce 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -204,11 +204,11 @@ cc_library(
     srcs = ["grappler/model_analyzer.cc"],
     hdrs = ["grappler/model_analyzer.h"],
     deps = [
+        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler/costs:graph_properties",
-        "//tensorflow/core/grappler/costs:utils",
     ],
 )
 
diff --git a/tensorflow/python/grappler/model_analyzer.cc b/tensorflow/python/grappler/model_analyzer.cc
index da5b03234e..d23eb811ac 100644
--- a/tensorflow/python/grappler/model_analyzer.cc
+++ b/tensorflow/python/grappler/model_analyzer.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/python/grappler/model_analyzer.h"
 
 #include <iomanip>
+#include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/grappler_item.h"
@@ -25,26 +26,26 @@ namespace grappler {
 
 ModelAnalyzer::ModelAnalyzer(const GrapplerItem& item) : item_(item) {}
 
-Status ModelAnalyzer::GenerateReport(std::ostream& os) {
+Status ModelAnalyzer::GenerateReport(bool debug, std::ostream& os) {
   GraphProperties properties(item_);
   TF_RETURN_IF_ERROR(properties.InferStatically(false));
 
   for (const auto& node : item_.MainOpsFanin()) {
-    PrintNodeInfo(node, properties, os);
+    PrintNodeInfo(node, properties, debug, os);
   }
   for (const auto& node : item_.EnqueueOpsFanin()) {
-    PrintNodeInfo(node, properties, os);
+    PrintNodeInfo(node, properties, debug, os);
   }
 
   return Status::OK();
 }
 
 void ModelAnalyzer::PrintNodeInfo(const NodeDef* node,
-                                  const GraphProperties& properties,
+                                  const GraphProperties& properties, bool debug,
                                   std::ostream& os) const {
   os << node->name() << " [" << node->op() << "]" << std::endl;
   if (properties.HasOutputProperties(node->name())) {
-    std::vector<OpInfo::TensorProperties> props =
+    const std::vector<OpInfo::TensorProperties>& props =
         properties.GetOutputProperties(node->name());
     for (int i = 0; i < props.size(); ++i) {
       const OpInfo::TensorProperties& prop = props[i];
@@ -75,6 +76,27 @@ void ModelAnalyzer::PrintNodeInfo(const NodeDef* node,
       os << std::endl;
     }
   }
+
+  if (debug) {
+    const OpRegistrationData* op_reg_data;
+    Status status = OpRegistry::Global()->LookUp(node->op(), &op_reg_data);
+    if (!status.ok()) {
+      os << "\tCouldn't find op registration for " << node->op() << std::endl;
+    } else if (!op_reg_data->shape_inference_fn) {
+      os << "\tCouldn't find shape function for op " << node->op() << std::endl;
+    } else if (properties.HasInputProperties(node->name())) {
+      const std::vector<OpInfo::TensorProperties>& props =
+          properties.GetInputProperties(node->name());
+      for (int i = 0; i < props.size(); ++i) {
+        const OpInfo::TensorProperties& prop = props[i];
+        if (prop.has_value()) {
+          os << "\t"
+             << "input " << i << " (" << DataTypeString(prop.dtype())
+             << ") has known value" << std::endl;
+        }
+      }
+    }
+  }
 }
 
 }  // end namespace grappler
diff --git a/tensorflow/python/grappler/model_analyzer.h b/tensorflow/python/grappler/model_analyzer.h
index a14034103c..5bc551927d 100644
--- a/tensorflow/python/grappler/model_analyzer.h
+++ b/tensorflow/python/grappler/model_analyzer.h
@@ -31,11 +31,11 @@ class GraphProperties;
 class ModelAnalyzer {
  public:
   explicit ModelAnalyzer(const GrapplerItem& item);
-  Status GenerateReport(std::ostream& os);
+  Status GenerateReport(bool debug, std::ostream& os);
 
  private:
   void PrintNodeInfo(const NodeDef* node, const GraphProperties& properties,
-                     std::ostream& os) const;
+                     bool debug, std::ostream& os) const;
 
   const GrapplerItem& item_;
 };
diff --git a/tensorflow/python/grappler/model_analyzer.i b/tensorflow/python/grappler/model_analyzer.i
index 726143a0bb..7c3a692d0e 100644
--- a/tensorflow/python/grappler/model_analyzer.i
+++ b/tensorflow/python/grappler/model_analyzer.i
@@ -40,7 +40,7 @@ limitations under the License.
 %}
 
 %{
-string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph) {
+string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph, bool debug) {
   tensorflow::grappler::ItemConfig cfg;
   cfg.apply_optimizations = false;
   std::unique_ptr<tensorflow::grappler::GrapplerItem> item =
@@ -53,10 +53,10 @@ string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph) {
   tensorflow::grappler::ModelAnalyzer analyzer(*item);
 
   std::stringstream os;
-  analyzer.GenerateReport(os);
+  analyzer.GenerateReport(debug, os);
   return os.str();
 }
 
 %}
 
-string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph);
+string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph, bool debug);
diff --git a/tensorflow/python/grappler/model_analyzer.py b/tensorflow/python/grappler/model_analyzer.py
index c852d71ad8..535889e1c4 100644
--- a/tensorflow/python/grappler/model_analyzer.py
+++ b/tensorflow/python/grappler/model_analyzer.py
@@ -22,16 +22,18 @@ from tensorflow.python import pywrap_tensorflow as tf_wrap
 from tensorflow.python.framework import errors
 
 
-def GenerateModelReport(metagraph):
+def GenerateModelReport(metagraph, debug=False):
   """Report what's known statically about each node in the provided metagraph.
 
   Args:
     metagraph: A TensorFlow MetaGraphDef.
+    debug: Add some information useful for debugging.
 
   Returns:
     A string containing the report.
   """
   with errors.raise_exception_on_not_ok_status():
-    ret_from_swig = tf_wrap.GenerateModelReport(metagraph.SerializeToString())
+    ret_from_swig = tf_wrap.GenerateModelReport(metagraph.SerializeToString(),
+                                                debug)
 
   return ret_from_swig
diff --git a/tensorflow/python/grappler/model_analyzer_test.py b/tensorflow/python/grappler/model_analyzer_test.py
index b59d1650f4..ec172755f1 100644
--- a/tensorflow/python/grappler/model_analyzer_test.py
+++ b/tensorflow/python/grappler/model_analyzer_test.py
@@ -49,6 +49,24 @@ class PyWrapOptimizeGraphTest(test.TestCase):
     # Also print the report to make it easier to debug
     print("{}".format(report))
 
+  def testDebugMode(self):
+    """Make sure arguments can be passed correctly."""
+    a = constant_op.constant([10, 11], name="a")
+    b = constant_op.constant([10], name="b")
+    c = math_ops.add(a, b, name="c")
+    train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+    train_op.append(c)
+    mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
+
+    report = model_analyzer.GenerateModelReport(mg, debug=True)
+
+    # Check the report headers
+    self.assertTrue(b"input 0 (int32) has known value" in report)
+    self.assertTrue(b"input 1 (int32) has known value" in report)
+
+    # Also print the report to make it easier to debug
+    print("{}".format(report))
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From d0a4a79c02b8c2a64763e1ef02c878f34e9defff Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Tue, 12 Dec 2017 14:33:55 -0800
Subject: [PATCH 0931/1225] Add test case for
 record_summaries_every_n_global_steps

This test case illustrates how to use
record_summaries_every_n_global_steps and tf.all_summaries()
in graph mode. There are no tests using
record_summaries_every_n_global_steps. All existing graph
based tests don't use tf.all_summaries() creating the impression
that summary ops will somehow always run, which is not the case.

PiperOrigin-RevId: 178816316
---
 .../contrib/summary/summary_ops_graph_test.py | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index f8da790188..42ebb7ab9d 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training import training_util
 
@@ -53,6 +54,37 @@ class DbTest(summary_test_util.SummaryDbTest):
     six.assertCountEqual(self, [name],
                          get_all(self.db, 'SELECT node_name FROM Nodes'))
 
+  def testScalarSummary(self):
+    """Test record_summaries_every_n_global_steps and all_summaries()."""
+    with ops.Graph().as_default(), self.test_session() as sess:
+      global_step = training_util.get_or_create_global_step()
+      global_step.initializer.run()
+      with ops.device('/cpu:0'):
+        step_increment = state_ops.assign_add(global_step, 1)
+      sess.run(step_increment)  # Increment global step from 0 to 1
+
+      logdir = tempfile.mkdtemp()
+      with summary_ops.create_file_writer(logdir, max_queue=0,
+                                          name='t2').as_default():
+        with summary_ops.record_summaries_every_n_global_steps(2):
+          summary_ops.initialize()
+          summary_op = summary_ops.scalar('my_scalar', 2.0)
+
+          # Neither of these should produce a summary because
+          # global_step is 1 and "1 % 2 != 0"
+          sess.run(summary_ops.all_summary_ops())
+          sess.run(summary_op)
+          events = summary_test_util.events_from_logdir(logdir)
+          self.assertEqual(len(events), 1)
+
+          # Increment global step from 1 to 2 and check that the summary
+          # is now written
+          sess.run(step_increment)
+          sess.run(summary_ops.all_summary_ops())
+          events = summary_test_util.events_from_logdir(logdir)
+          self.assertEqual(len(events), 2)
+          self.assertEqual(events[1].summary.value[0].tag, 'cond/my_scalar')
+
   def testSummaryGraphModeCond(self):
     with ops.Graph().as_default(), self.test_session():
       training_util.get_or_create_global_step()
-- 
GitLab


From 1c2bcf947f2e192512857887fb1301d13fe332ec Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 12 Dec 2017 14:57:55 -0800
Subject: [PATCH 0932/1225] Fix bug in kernel creation with functions marked
 "stateful".

The CallOp kernel caches a handle for invoking the function. This
handle is only valid in a single subgraph (it is scoped to the
FunctionLibraryRuntime). Marking a function as stateful causes its
CallOp kernel to be shared between multiple subgraphs. Therefore, this
change overrides the kernel creation logic to ensure that each
subgraph gets its own CallOp.

PiperOrigin-RevId: 178820064
---
 .../core/common_runtime/direct_session.cc     | 10 +++++--
 .../core/distributed_runtime/graph_mgr.cc     | 10 +++++--
 tensorflow/python/framework/function_test.py  | 27 +++++++++++++++++++
 3 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 103b4b13c7..875a3ac841 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -1201,8 +1201,14 @@ Status DirectSession::GetOrCreateExecutors(
     auto opseg = device->op_segment();
     params.create_kernel = [this, lib, opseg](const NodeDef& ndef,
                                               OpKernel** kernel) {
-      // Caches the kernel only if the node is stateful.
-      if (!lib->IsStateful(ndef.op())) {
+      // We do not share the kernel via the OpSegment if the node is
+      // stateless, or a function.
+      // NOTE(mrry): We must not share function kernels (implemented
+      // using `CallOp`) between subgraphs, because `CallOp::handle_`
+      // is tied to a particular subgraph. Even if the function itself
+      // is stateful, the `CallOp` that invokes it is not.
+      if (!lib->IsStateful(ndef.op()) ||
+          lib->GetFunctionLibraryDefinition()->Find(ndef.op()) != nullptr) {
         return lib->CreateKernel(ndef, kernel);
       }
       auto create_fn = [lib, &ndef](OpKernel** kernel) {
diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index 60d58af61d..45dfb7b2eb 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc
@@ -228,8 +228,14 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef,
     params.function_library = lib;
     params.create_kernel = [session, lib, opseg](const NodeDef& ndef,
                                                  OpKernel** kernel) {
-      // Caches the kernel only if the node is stateful.
-      if (!lib->IsStateful(ndef.op())) {
+      // We do not share the kernel via the OpSegment if the node is
+      // stateless, or a function.
+      // NOTE(mrry): We must not share function kernels (implemented
+      // using `CallOp`) between subgraphs, because `CallOp::handle_`
+      // is tied to a particular subgraph. Even if the function itself
+      // is stateful, the `CallOp` that invokes it is not.
+      if (!lib->IsStateful(ndef.op()) ||
+          lib->GetFunctionLibraryDefinition()->Find(ndef.op()) != nullptr) {
         return lib->CreateKernel(ndef, kernel);
       }
       auto create_fn = [lib, &ndef](OpKernel** kernel) {
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 11f343c579..886c6f04b9 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -914,6 +914,33 @@ class FunctionTest(test.TestCase):
           np.array([1.0, 0.0]).astype(np.float32),
           sess.run(dinp, {inp: x}))
 
+  def testFunctionMarkedStateful(self):
+
+    @function.Defun(dtypes.int32, dtypes.float32)
+    def Foo(t, x):
+      return x[t]
+
+    @function.Defun(dtypes.int64)
+    def Bar(x):
+      return x
+
+    # NOTE(mrry): All functions are currently considered stateless by the
+    # runtime, so we simulate a "stateful" function.
+    # TODO(b/70565970): Remove this hack when we are able to build stateful
+    # functions using the API.
+    # pylint: disable=protected-access
+    Foo._signature.is_stateful = True
+    Bar._signature.is_stateful = True
+    # pylint: enable=protected-access
+
+    result_1 = Foo(3, [1.0, 2.0, 3.0, 4.0])
+    result_2 = Bar(constant_op.constant(100, dtype=dtypes.int64))
+
+    with session.Session() as sess:
+      self.assertEqual(4.0, sess.run(result_1))
+      self.assertEqual(100, sess.run(result_2))
+      self.assertEqual((4.0, 100), sess.run((result_1, result_2)))
+
 
 @test_util.with_c_api
 class FunctionsFromProtos(test.TestCase):
-- 
GitLab


From a94506b831aac707979aa615b0f424f2d4b2f316 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Tue, 12 Dec 2017 15:29:16 -0800
Subject: [PATCH 0933/1225] Support permutation from NCHW to NHWC.

PiperOrigin-RevId: 178824999
---
 tensorflow/core/kernels/data_format_ops.cc | 25 ++++++++++---------
 tensorflow/core/kernels/data_format_ops.h  | 28 +++++++++++++++++++---
 tensorflow/python/ops/nn_test.py           | 10 +++++++-
 3 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/tensorflow/core/kernels/data_format_ops.cc b/tensorflow/core/kernels/data_format_ops.cc
index 0d427eddf3..a6ac119002 100644
--- a/tensorflow/core/kernels/data_format_ops.cc
+++ b/tensorflow/core/kernels/data_format_ops.cc
@@ -72,15 +72,14 @@ class DataFormatVecPermuteOp : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("src_format", &src_format));
     string dst_format;
     OP_REQUIRES_OK(context, context->GetAttr("dst_format", &dst_format));
-    OP_REQUIRES(
-        context, src_format == "NHWC",
-        errors::InvalidArgument(strings::StrCat(
-            "Current implementation doesn't support source data format ",
-            src_format)));
-    OP_REQUIRES(context, dst_format == "NCHW",
+    OP_REQUIRES(context,
+                (src_format == "NHWC" && dst_format == "NCHW") ||
+                    (src_format == "NCHW" && dst_format == "NHWC"),
                 errors::InvalidArgument(strings::StrCat(
-                    "Current implementation doesn't support dst data format ",
-                    dst_format)));
+                    "Current implementation only supports NCHW-to-NHWC and "
+                    "NHWC-to-NCHW format conversion; got source format ",
+                    src_format, " and destination format ", dst_format)));
+    nhwc_to_nchw_ = (src_format == "NHWC") ? true : false;
   }
 
   void Compute(OpKernelContext* context) override {
@@ -96,9 +95,13 @@ class DataFormatVecPermuteOp : public OpKernel {
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
-    functor::DataFormatVecPermute<Device, T>()(
-        context->eigen_device<Device>(), input.vec<T>(), output->vec<T>());
+    functor::DataFormatVecPermute<Device, T>()(context->eigen_device<Device>(),
+                                               input.vec<T>(), output->vec<T>(),
+                                               nhwc_to_nchw_);
   }
+
+ private:
+  bool nhwc_to_nchw_;
 };
 
 #define REGISTER_KERNEL(T)                                                \
@@ -135,7 +138,7 @@ TF_CALL_int64(DECLARE_GPU_SPECS);
   template <>                                             \
   void DataFormatVecPermute<GPUDevice, T>::operator()(    \
       const GPUDevice& d, typename TTypes<T>::ConstVec x, \
-      typename TTypes<T>::Vec y);                         \
+      typename TTypes<T>::Vec y, bool nhwc_to_nchw);      \
   extern template struct DataFormatVecPermute<GPUDevice, T>;
 #define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
 TF_CALL_int32(DECLARE_GPU_SPECS);
diff --git a/tensorflow/core/kernels/data_format_ops.h b/tensorflow/core/kernels/data_format_ops.h
index 54798cc6ab..6a25823c73 100644
--- a/tensorflow/core/kernels/data_format_ops.h
+++ b/tensorflow/core/kernels/data_format_ops.h
@@ -40,7 +40,7 @@ struct DataFormatDimMap {
 };
 
 template <typename T>
-struct VecPermute {
+struct VecPermuteNHWCToNCHW {
   Eigen::DSizes<Eigen::DenseIndex, 1> dimensions(
       typename TTypes<T>::ConstVec input) const {
     Eigen::DSizes<Eigen::DenseIndex, 1> result;
@@ -57,12 +57,34 @@ struct VecPermute {
   }
 };
 
+template <typename T>
+struct VecPermuteNCHWToNHWC {
+  Eigen::DSizes<Eigen::DenseIndex, 1> dimensions(
+      typename TTypes<T>::ConstVec input) const {
+    Eigen::DSizes<Eigen::DenseIndex, 1> result;
+    result[0] = input.dimension(0);
+    return result;
+  }
+  template <typename Output, typename Device>
+  void eval(typename TTypes<T>::ConstVec input, Output& output,
+            const Device& d) const {
+    output.template chip<0>(0).device(d) = input.template chip<0>(0);
+    output.template chip<0>(1).device(d) = input.template chip<0>(2);
+    output.template chip<0>(2).device(d) = input.template chip<0>(3);
+    output.template chip<0>(3).device(d) = input.template chip<0>(1);
+  }
+};
+
 // Functor used by DataFormatVecPermuteOp to do the computations.
 template <typename Device, typename T>
 struct DataFormatVecPermute {
   void operator()(const Device& d, typename TTypes<T>::ConstVec x,
-                  typename TTypes<T>::Vec y) {
-    y.device(d) = x.customOp(VecPermute<T>());
+                  typename TTypes<T>::Vec y, bool nhwc_to_nchw) {
+    if (nhwc_to_nchw) {
+      y.device(d) = x.customOp(VecPermuteNHWCToNCHW<T>());
+    } else {
+      y.device(d) = x.customOp(VecPermuteNCHWToNHWC<T>());
+    }
   }
 };
 
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index 8dfd0740bb..38b8430996 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -975,7 +975,7 @@ class DataFormatDimMapTest(test_lib.TestCase):
 
 class DataFormatVectorPermuteTest(test_lib.TestCase):
 
-  def test(self):
+  def testNHWCToNCHW(self):
     x_val = [7, 4, 9, 3]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x)
@@ -983,6 +983,14 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [7, 3, 4, 9])
 
+  def testNCHWToNHWC(self):
+    x_val = [7, 4, 9, 3]
+    x = constant_op.constant(x_val)
+    y = nn_ops.data_format_vec_permute(x, src_format="NCHW", dst_format="NHWC")
+    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+      y_val = sess.run(y)
+      self.assertAllEqual(y_val, [7, 9, 3, 4])
+
 
 if __name__ == "__main__":
   test_lib.main()
-- 
GitLab


From 87cfa5696122c2173902accd47418ee4f25995d7 Mon Sep 17 00:00:00 2001
From: Guangda Lai <laigd@google.com>
Date: Tue, 12 Dec 2017 15:39:52 -0800
Subject: [PATCH 0934/1225] Refactor helper functions a bit for virtual gpu
 changes later.

PiperOrigin-RevId: 178826426
---
 .../core/common_runtime/gpu/gpu_device.cc     | 176 +++++++++---------
 .../core/common_runtime/gpu/gpu_device.h      |   9 +-
 2 files changed, 94 insertions(+), 91 deletions(-)

diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 5664977833..646568a3e5 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -646,6 +646,79 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
   EigenCudaStreamDevice stream_device_;
   Eigen::GpuDevice device_;
 };
+
+Status ParseVisibleDeviceList(const string& visible_device_list,
+                              std::vector<int>* visible_gpu_order) {
+  visible_gpu_order->clear();
+  gpu::Platform* gpu_manager = GPUMachineManager();
+
+  // If the user wants to remap the visible to virtual GPU mapping,
+  // check for that here.
+  if (visible_device_list.empty()) {
+    visible_gpu_order->resize(gpu_manager->VisibleDeviceCount());
+    // By default, visible to virtual mapping is unchanged.
+    int deviceNo = 0;
+    std::generate(visible_gpu_order->begin(), visible_gpu_order->end(),
+                  [&deviceNo] { return deviceNo++; });
+  } else {
+    const std::vector<string> order_str =
+        str_util::Split(visible_device_list, ',');
+    for (const string& cuda_gpu_id_str : order_str) {
+      int32 cuda_gpu_id;
+      if (!strings::safe_strto32(cuda_gpu_id_str, &cuda_gpu_id)) {
+        return errors::InvalidArgument(
+            "Could not parse entry in 'visible_device_list': '",
+            cuda_gpu_id_str, "'. visible_device_list = ", visible_device_list);
+      }
+      if (cuda_gpu_id < 0 || cuda_gpu_id >= gpu_manager->VisibleDeviceCount()) {
+        return errors::InvalidArgument(
+            "'visible_device_list' listed an invalid GPU id '", cuda_gpu_id,
+            "' but visible device count is ",
+            gpu_manager->VisibleDeviceCount());
+      }
+      visible_gpu_order->push_back(cuda_gpu_id);
+    }
+  }
+
+  // Validate no repeats.
+  std::set<int> visible_device_set(visible_gpu_order->begin(),
+                                   visible_gpu_order->end());
+  if (visible_device_set.size() != visible_gpu_order->size()) {
+    return errors::InvalidArgument(
+        "visible_device_list contained a duplicate entry: ",
+        visible_device_list);
+  }
+  return Status::OK();
+}
+
+int64 MinSystemMemory(int64 available_memory) {
+  // We use the following heuristic for now:
+  //
+  // If the available_memory is < 2GiB, we allocate 225MiB to system memory.
+  // Otherwise, allocate max(300MiB, 0.05 * available_memory) to system memory.
+  //
+  // In the future we could be more sophisticated by using a table of devices.
+  int64 min_system_memory;
+  if (available_memory < (1LL << 31)) {
+    // 225MiB
+    min_system_memory = 225 * 1024 * 1024;
+  } else {
+    // max(300 MiB, 0.05 * available_memory)
+    min_system_memory =
+        std::max(314572800LL, static_cast<int64>(available_memory * 0.05));
+  }
+#if defined(__GNUC__) && defined(__OPTIMIZE__)
+// Do nothing
+#elif !defined(__GNUC__) && defined(NDEBUG)
+// Do nothing
+#else
+  // Double the amount of available GPU memory in non-opt builds (debug
+  // builds in windows); because in non-opt builds more system memory
+  // is necessary.
+  min_system_memory *= 2;
+#endif
+  return min_system_memory;
+}
 }  // namespace
 
 void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
@@ -683,14 +756,27 @@ void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
 Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
                                            const string& name_prefix,
                                            std::vector<Device*>* devices) {
+  TF_RETURN_IF_ERROR(ValidateGPUMachineManager());
+  gpu::Platform* gpu_manager = GPUMachineManager();
+  if (gpu_manager == nullptr) {
+    return Status::OK();
+  }
+  // If there are no GPUs visible, do nothing.
+  if (gpu_manager->VisibleDeviceCount() <= 0) {
+    return Status::OK();
+  }
+
   size_t n = INT_MAX;
   auto iter = options.config.device_count().find("GPU");
   if (iter != options.config.device_count().end()) {
     n = iter->second;
   }
+  const auto& gpu_options = options.config.gpu_options();
+  std::vector<int> visible_gpu_order;
+  TF_RETURN_IF_ERROR(ParseVisibleDeviceList(gpu_options.visible_device_list(),
+                                            &visible_gpu_order));
   std::vector<int> valid_gpu_ids;
-  TF_RETURN_IF_ERROR(GetValidDeviceIds(
-      options.config.gpu_options().visible_device_list(), &valid_gpu_ids));
+  TF_RETURN_IF_ERROR(GetValidDeviceIds(visible_gpu_order, &valid_gpu_ids));
   if (static_cast<size_t>(n) > valid_gpu_ids.size()) {
     n = valid_gpu_ids.size();
   }
@@ -736,38 +822,6 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
   return Status::OK();
 }
 
-namespace {
-int64 MinSystemMemory(int64 available_memory) {
-  // We use the following heuristic for now:
-  //
-  // If the available_memory is < 2GiB, we allocate 225MiB to system memory.
-  // Otherwise, allocate max(300MiB, 0.05 * available_memory) to system memory.
-  //
-  // In the future we could be more sophisticated by using a table of devices.
-  int64 min_system_memory;
-  if (available_memory < (1LL << 31)) {
-    // 225MiB
-    min_system_memory = 225 * 1024 * 1024;
-  } else {
-    // max(300 MiB, 0.05 * available_memory)
-    min_system_memory =
-        std::max(314572800LL, static_cast<int64>(available_memory * 0.05));
-  }
-#if defined(__GNUC__) && defined(__OPTIMIZE__)
-// Do nothing
-#elif !defined(__GNUC__) && defined(NDEBUG)
-// Do nothing
-#else
-  // Double the amount of available GPU memory in non-opt builds (debug
-  // builds in windows); because in non-opt builds more system memory
-  // is necessary.
-  min_system_memory *= 2;
-#endif
-  return min_system_memory;
-}
-
-}  // namespace
-
 static string GetShortDeviceDescription(int device_id,
                                         const gpu::DeviceDescription& desc) {
   int cc_major;
@@ -1013,60 +1067,8 @@ Status EnablePeerAccess(gpu::Platform* platform,
 }  // namespace
 
 Status BaseGPUDeviceFactory::GetValidDeviceIds(
-    const string& visible_device_list, std::vector<int>* ids) {
-  TF_RETURN_IF_ERROR(ValidateGPUMachineManager());
-
+    const std::vector<int>& visible_gpu_order, std::vector<int>* ids) {
   gpu::Platform* gpu_manager = GPUMachineManager();
-  if (gpu_manager == nullptr) {
-    return Status::OK();
-  }
-
-  // If there are no GPUs visible, do nothing.
-  if (gpu_manager->VisibleDeviceCount() <= 0) {
-    return Status::OK();
-  }
-
-  // If the user wants to remap the visible to virtual GPU mapping,
-  // check for that here.
-  std::vector<int> visible_gpu_order;
-  if (visible_device_list.empty()) {
-    visible_gpu_order.resize(gpu_manager->VisibleDeviceCount());
-    // By default, visible to virtual mapping is unchanged.
-    int deviceNo = 0;
-    std::generate(visible_gpu_order.begin(), visible_gpu_order.end(),
-                  [&deviceNo] { return deviceNo++; });
-  } else {
-    std::vector<string> order_str = str_util::Split(visible_device_list, ',');
-    for (int i = 0; i < order_str.size(); ++i) {
-      const string& gpu_id_str = order_str[i];
-      int32 gpu_id;
-      if (!strings::safe_strto32(gpu_id_str, &gpu_id)) {
-        return errors::InvalidArgument(
-            "Could not parse entry in 'visible_device_list': '", gpu_id_str,
-            "'.  visible_device_list = ", visible_device_list);
-      }
-
-      if (gpu_id < 0 || gpu_id >= gpu_manager->VisibleDeviceCount()) {
-        return errors::InvalidArgument(
-            "'visible_device_list' listed an invalid GPU id '", gpu_id,
-            "' but visible device count is ",
-            gpu_manager->VisibleDeviceCount());
-      }
-
-      visible_gpu_order.push_back(gpu_id);
-    }
-  }
-
-  // Validate no repeats.
-  std::set<int> visible_device_set(visible_gpu_order.begin(),
-                                   visible_gpu_order.end());
-  if (visible_device_set.size() != visible_gpu_order.size()) {
-    return errors::InvalidArgument(
-        "visible_device_list contained "
-        "a duplicate entry: ",
-        visible_device_list);
-  }
-
   bool new_gpu_found = false;
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
     int gpu_id = visible_gpu_order[i];
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index 4585d5b04d..74176cd448 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -151,10 +151,11 @@ class BaseGPUDeviceFactory : public DeviceFactory {
                                          Allocator* cpu_allocator) = 0;
 
   // Returns into 'ids' the list of valid GPU ids, in the order that
-  // they should map to logical gpu ids "/device:GPU:0", "/device:GPU:1", etc, based
-  // upon 'visible_device_list', a comma-separated list of 'visible
-  // gpu ids'.
-  Status GetValidDeviceIds(const string& visible_device_list,
+  // they should map to logical gpu ids "/device:GPU:0", "/device:GPU:1", etc,
+  // based upon 'visible_gpu_order' which was generated by parsing
+  // GPUOptions::visible_device_list which is a comma-separated list of
+  // 'visible gpu ids'.
+  Status GetValidDeviceIds(const std::vector<int>& visible_gpu_order,
                            std::vector<int>* ids);
 
   // visible_gpu_initialized_[gpu_id] is true if visible GPU gpu_id
-- 
GitLab


From 82afab2e9c9fd0f5da7bf6c27c597a9320a14d84 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 15:58:15 -0800
Subject: [PATCH 0935/1225] Only require validation that a fetch is requested
 for tf2xla::Config. It is legitimate to convert a graph with only fetches, eg
 in that case where the inputs to the graph are supplied by the infeed rather
 than by a feed node.

PiperOrigin-RevId: 178828952
---
 tensorflow/compiler/tf2xla/tf2xla_util.cc      |  4 ++--
 tensorflow/compiler/tf2xla/tf2xla_util_test.cc | 14 ++------------
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc
index 55f2f3149c..f428a19432 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc
@@ -88,8 +88,8 @@ Status ValidateConfig(const tf2xla::Config& config) {
     TF_RETURN_IF_ERROR(CheckNameDuplicates("fetch", fetch.name(), &names));
   }
   TF_RETURN_IF_ERROR(CheckFeedFetchNameConflicts("fetch", names));
-  if (config.feed().empty() || config.fetch().empty()) {
-    return errors::InvalidArgument("feeds and fetches must be specified");
+  if (config.fetch().empty()) {
+    return errors::InvalidArgument("fetches must be specified");
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
index 436039e154..ed10d80609 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
@@ -58,24 +58,14 @@ TEST(ValidateConfig, Good) {
 
 TEST(ValidateConfig, BadEmpty) {
   tf2xla::Config config;
-  ExpectErrorContains(ValidateConfig(config),
-                      "feeds and fetches must be specified");
-}
-
-TEST(ValidateConfig, BadNoFeed) {
-  tf2xla::Config config;
-  tf2xla::Fetch* fetch = config.add_fetch();
-  fetch->mutable_id()->set_node_name("foo");
-  ExpectErrorContains(ValidateConfig(config),
-                      "feeds and fetches must be specified");
+  ExpectErrorContains(ValidateConfig(config), "fetches must be specified");
 }
 
 TEST(ValidateConfig, BadNoFetch) {
   tf2xla::Config config;
   tf2xla::Feed* feed = config.add_feed();
   feed->mutable_id()->set_node_name("foo");
-  ExpectErrorContains(ValidateConfig(config),
-                      "feeds and fetches must be specified");
+  ExpectErrorContains(ValidateConfig(config), "fetches must be specified");
 }
 
 TEST(ValidateConfig, BadFeedNodeName) {
-- 
GitLab


From 7d8ab0e440cf895dceb31f260ce5ae177b84c05d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 16:06:31 -0800
Subject: [PATCH 0936/1225] For many requests, the GCS filesystem client did
 not provide DNS lookup hints.  This change allows all GCS HTTP requests to
 use the GCS DNS cache.  It also simplifies the code, and eliminates a lot of
 redundant code.

The GCS DNS cache has been simplified and made more general. It is now easy to add more DNS names, simply by adding an entry to the GcsDnsCache::names_ list.

PiperOrigin-RevId: 178830317
---
 .../core/platform/cloud/gcs_dns_cache.cc      |  82 +++++++----
 .../core/platform/cloud/gcs_dns_cache.h       |   7 +-
 .../core/platform/cloud/gcs_dns_cache_test.cc |   3 +-
 .../core/platform/cloud/gcs_file_system.cc    | 132 +++++++-----------
 .../core/platform/cloud/gcs_file_system.h     |   2 +
 .../platform/cloud/gcs_file_system_test.cc    |  23 +++
 .../core/platform/cloud/http_request_fake.h   |  13 +-
 7 files changed, 149 insertions(+), 113 deletions(-)

diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.cc b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
index 63f2da065d..78bf680317 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
@@ -23,9 +23,19 @@ namespace tensorflow {
 
 namespace {
 
-constexpr char kStorageHost[] = "storage.googleapis.com";
-constexpr char kWwwHost[] = "www.googleapis.com";
-
+const std::vector<string>& kCachedDomainNames =
+    *new std::vector<string>{"www.googleapis.com", "storage.googleapis.com"};
+
+// Selects one item at random from a vector of items, using a uniform
+// distribution.
+template <typename T>
+const T& SelectRandomItemUniform(std::default_random_engine* random,
+                                 const std::vector<T>& items) {
+  CHECK_GT(items.size(), 0);
+  std::uniform_int_distribution<size_t> distribution(0u, items.size() - 1u);
+  size_t choice_index = distribution(*random);
+  return items[choice_index];
+}
 }  // namespace
 
 GcsDnsCache::GcsDnsCache(Env* env, int64 refresh_rate_secs)
@@ -35,39 +45,38 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
   // TODO(saeta): Blacklist failing IP addresses.
   mutex_lock l(mu_);
   if (!started_) {
+    VLOG(1) << "Starting GCS DNS cache.";
     DCHECK(!worker_) << "Worker thread already exists!";
     // Perform DNS resolutions to warm the cache.
-    std::vector<string> www_addresses = ResolveName(kWwwHost);
-    std::vector<string> storage_addresses = ResolveName(kStorageHost);
-    www_addresses.swap(www_addresses_);
-    storage_addresses.swap(storage_addresses_);
+    addresses_ = ResolveNames(kCachedDomainNames);
 
     // Note: we opt to use a thread instead of a delayed closure.
     worker_.reset(env_->StartThread(
         {}, "gcs_dns_worker", std::bind(&GcsDnsCache::WorkerThread, this)));
     started_ = true;
   }
-  if (!storage_addresses_.empty()) {
-    std::uniform_int_distribution<> storage_dist(0,
-                                                 storage_addresses_.size() - 1);
-    size_t index = storage_dist(random_);
-    TF_RETURN_IF_ERROR(request->AddResolveOverride(kStorageHost, 443,
-                                                   storage_addresses_[index]));
-  } else {
-    LOG(WARNING) << "No IP addresses available for " << kStorageHost;
-  }
-  if (!www_addresses_.empty()) {
-    std::uniform_int_distribution<> www_dist(0, www_addresses_.size() - 1);
-    size_t index = www_dist(random_);
-    TF_RETURN_IF_ERROR(
-        request->AddResolveOverride(kWwwHost, 443, www_addresses_[index]));
-  } else {
-    LOG(WARNING) << "No IP addresses available for " << kWwwHost;
+
+  CHECK_EQ(kCachedDomainNames.size(), addresses_.size());
+  for (size_t i = 0; i < kCachedDomainNames.size(); ++i) {
+    const string& name = kCachedDomainNames[i];
+    const std::vector<string>& addresses = addresses_[i];
+    if (!addresses.empty()) {
+      const string& chosen_address =
+          SelectRandomItemUniform(&random_, addresses);
+      TF_RETURN_IF_ERROR(
+          request->AddResolveOverride(name, 443, chosen_address));
+      VLOG(1) << "Annotated DNS mapping: " << name << " --> " << chosen_address;
+    } else {
+      LOG(WARNING) << "No IP addresses available for " << name;
+    }
   }
+
   return Status::OK();
 }
 
 /* static */ std::vector<string> GcsDnsCache::ResolveName(const string& name) {
+  VLOG(1) << "Resolving DNS name: " << name;
+
   addrinfo hints;
   memset(&hints, 0, sizeof(hints));
   hints.ai_family = AF_INET;  // Only use IPv4 for now.
@@ -93,6 +102,7 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
                    << ": " << strerror(errno);
       } else {
         output.emplace_back(buf);
+        VLOG(1) << "... address: " << buf;
       }
     }
   } else {
@@ -110,6 +120,25 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
   return output;
 }
 
+// Performs DNS resolution for a set of DNS names. The return vector contains
+// one element for each element in 'names', and each element is itself a
+// vector of IP addresses (in textual form).
+//
+// If DNS resolution fails for any name, then that slot in the return vector
+// will still be present, but will be an empty vector.
+//
+// Ensures: names.size() == return_value.size()
+
+std::vector<std::vector<string>> GcsDnsCache::ResolveNames(
+    const std::vector<string>& names) {
+  std::vector<std::vector<string>> all_addresses;
+  all_addresses.reserve(names.size());
+  for (const string& name : names) {
+    all_addresses.push_back(ResolveName(name));
+  }
+  return all_addresses;
+}
+
 void GcsDnsCache::WorkerThread() {
   while (true) {
     {
@@ -119,15 +148,14 @@ void GcsDnsCache::WorkerThread() {
       cond_var_.wait_for(l, std::chrono::seconds(refresh_rate_secs_));
       if (cancelled_) return;
     }
+
     // Resolve DNS values
-    std::vector<string> www_addresses = ResolveName(kWwwHost);
-    std::vector<string> storage_addresses = ResolveName(kStorageHost);
+    auto new_addresses = ResolveNames(kCachedDomainNames);
 
     {
       mutex_lock l(mu_);
       // Update instance variables.
-      www_addresses.swap(www_addresses_);
-      storage_addresses.swap(storage_addresses_);
+      addresses_.swap(new_addresses);
     }
   }
 }
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.h b/tensorflow/core/platform/cloud/gcs_dns_cache.h
index 7a4d3847a5..2ef7c9bdbe 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache.h
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache.h
@@ -52,6 +52,8 @@ class GcsDnsCache {
 
  private:
   static std::vector<string> ResolveName(const string& name);
+  static std::vector<std::vector<string>> ResolveNames(
+      const std::vector<string>& names);
   void WorkerThread();
 
   // Define a friend class for testing.
@@ -63,10 +65,11 @@ class GcsDnsCache {
   std::default_random_engine random_ GUARDED_BY(mu_);
   bool started_ GUARDED_BY(mu_) = false;
   bool cancelled_ GUARDED_BY(mu_) = false;
-  std::vector<string> www_addresses_ GUARDED_BY(mu_);
-  std::vector<string> storage_addresses_ GUARDED_BY(mu_);
   std::unique_ptr<Thread> worker_ GUARDED_BY(mu_);  // After mutable vars.
   const int64 refresh_rate_secs_;
+
+  // Entries in this vector correspond to entries in kCachedDomainNames.
+  std::vector<std::vector<string>> addresses_ GUARDED_BY(mu_);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
index 266879ddf5..2c3819f1e2 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
@@ -88,8 +88,7 @@ class GcsDnsCacheTest : public ::testing::Test {
     {
       mutex_lock l(d.mu_);
       d.started_ = true;  // Avoid creating a thread.
-      d.www_addresses_ = {"192.168.1.1"};
-      d.storage_addresses_ = {"172.134.1.1"};
+      d.addresses_ = {{"192.168.1.1"}, {"172.134.1.1"}};
     }
 
     TestHttpRequest req;
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index ab82643ad5..f80cbf7626 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -295,15 +295,13 @@ class GcsRandomAccessFile : public RandomAccessFile {
 class GcsWritableFile : public WritableFile {
  public:
   GcsWritableFile(const string& bucket, const string& object,
-                  AuthProvider* auth_provider,
-                  HttpRequest::Factory* http_request_factory,
+                  GcsFileSystem* filesystem,
                   GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
                   int64 initial_retry_delay_usec)
       : bucket_(bucket),
         object_(object),
-        auth_provider_(auth_provider),
-        http_request_factory_(http_request_factory),
+        filesystem_(filesystem),
         timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
@@ -320,16 +318,13 @@ class GcsWritableFile : public WritableFile {
   /// with the content to be appended. The class takes onwnership of the
   /// specified tmp file and deletes it on close.
   GcsWritableFile(const string& bucket, const string& object,
-                  AuthProvider* auth_provider,
-                  const string& tmp_content_filename,
-                  HttpRequest::Factory* http_request_factory,
+                  GcsFileSystem* filesystem, const string& tmp_content_filename,
                   GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
                   int64 initial_retry_delay_usec)
       : bucket_(bucket),
         object_(object),
-        auth_provider_(auth_provider),
-        http_request_factory_(http_request_factory),
+        filesystem_(filesystem),
         timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
@@ -450,16 +445,13 @@ class GcsWritableFile : public WritableFile {
     uint64 file_size;
     TF_RETURN_IF_ERROR(GetCurrentFileSize(&file_size));
 
-    string auth_token;
-    TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_, &auth_token));
-
     std::vector<char> output_buffer;
-    std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-    TF_RETURN_IF_ERROR(request->Init());
+    std::unique_ptr<HttpRequest> request;
+    TF_RETURN_IF_ERROR(filesystem_->CreateHttpRequest(&request));
+
     TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
         kGcsUploadUriBase, "b/", bucket_,
         "/o?uploadType=resumable&name=", request->EscapeString(object_))));
-    TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
     TF_RETURN_IF_ERROR(request->AddHeader("X-Upload-Content-Length",
                                           std::to_string(file_size)));
     TF_RETURN_IF_ERROR(request->SetPostEmptyBody());
@@ -490,13 +482,9 @@ class GcsWritableFile : public WritableFile {
     uint64 file_size;
     TF_RETURN_IF_ERROR(GetCurrentFileSize(&file_size));
 
-    string auth_token;
-    TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_, &auth_token));
-
-    std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-    TF_RETURN_IF_ERROR(request->Init());
+    std::unique_ptr<HttpRequest> request;
+    TF_RETURN_IF_ERROR(filesystem_->CreateHttpRequest(&request));
     TF_RETURN_IF_ERROR(request->SetUri(session_uri));
-    TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
     TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_->connect, timeouts_->idle,
                                             timeouts_->metadata));
     TF_RETURN_IF_ERROR(request->AddHeader(
@@ -541,13 +529,9 @@ class GcsWritableFile : public WritableFile {
     uint64 file_size;
     TF_RETURN_IF_ERROR(GetCurrentFileSize(&file_size));
 
-    string auth_token;
-    TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_, &auth_token));
-
-    std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-    TF_RETURN_IF_ERROR(request->Init());
+    std::unique_ptr<HttpRequest> request;
+    TF_RETURN_IF_ERROR(filesystem_->CreateHttpRequest(&request));
     TF_RETURN_IF_ERROR(request->SetUri(session_uri));
-    TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
     if (file_size > 0) {
       TF_RETURN_IF_ERROR(request->AddHeader(
           "Content-Range", strings::StrCat("bytes ", start_offset, "-",
@@ -571,10 +555,9 @@ class GcsWritableFile : public WritableFile {
 
   string bucket_;
   string object_;
-  AuthProvider* auth_provider_;
+  GcsFileSystem* const filesystem_;  // Not owned.
   string tmp_content_filename_;
   std::ofstream outfile_;
-  HttpRequest::Factory* http_request_factory_;
   GcsFileSystem::TimeoutConfig* timeouts_;
   std::function<void()> file_cache_erase_;
   bool sync_needed_;  // whether there is buffered data that needs to be synced
@@ -660,6 +643,11 @@ GcsFileSystem::GcsFileSystem()
   if (GetEnvVar(kResolveCacheSecs, strings::safe_strto64,
                 &resolve_frequency_secs)) {
     dns_cache_.reset(new GcsDnsCache(resolve_frequency_secs));
+    VLOG(1) << "GCS DNS cache is enabled.  " << kResolveCacheSecs << " = "
+            << resolve_frequency_secs;
+  } else {
+    VLOG(1) << "GCS DNS cache is disabled, because " << kResolveCacheSecs
+            << " = 0 (or is not set)";
   }
   // Apply the overrides for request timeouts
   uint32 timeout_value;
@@ -725,24 +713,17 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset,
                                         size_t n, std::vector<char>* out) {
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(filename, false, &bucket, &object));
-  string auth_token;
-  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
 
-  std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-  TF_RETURN_IF_ERROR(request->Init());
+  std::unique_ptr<HttpRequest> request;
+  TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
   TF_RETURN_IF_ERROR(
       request->SetUri(strings::StrCat("https://", kStorageHost, "/", bucket,
                                       "/", request->EscapeString(object))));
-  TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
   TF_RETURN_IF_ERROR(request->SetRange(offset, offset + n - 1));
   TF_RETURN_IF_ERROR(request->SetResultBuffer(out));
   TF_RETURN_IF_ERROR(
       request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.read));
 
-  if (dns_cache_) {
-    TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
-  }
-
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading gs://",
                                   bucket, "/", object);
 
@@ -771,8 +752,8 @@ Status GcsFileSystem::NewWritableFile(const string& fname,
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
-      bucket, object, auth_provider_.get(), http_request_factory_.get(),
-      &timeouts_, [this, fname]() { file_block_cache_->RemoveFile(fname); },
+      bucket, object, this, &timeouts_,
+      [this, fname]() { file_block_cache_->RemoveFile(fname); },
       initial_retry_delay_usec_));
   return Status::OK();
 }
@@ -812,8 +793,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname,
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
-      bucket, object, auth_provider_.get(), old_content_filename,
-      http_request_factory_.get(), &timeouts_,
+      bucket, object, this, old_content_filename, &timeouts_,
       [this, fname]() { file_block_cache_->RemoveFile(fname); },
       initial_retry_delay_usec_));
   return Status::OK();
@@ -889,24 +869,16 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket,
 
   StatCache::ComputeFunc compute_func =
       [this, &bucket, &object](const string& fname, FileStatistics* stat) {
-        string auth_token;
-        TF_RETURN_IF_ERROR(
-            AuthProvider::GetToken(auth_provider_.get(), &auth_token));
-
         std::vector<char> output_buffer;
-        std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-        TF_RETURN_IF_ERROR(request->Init());
+        std::unique_ptr<HttpRequest> request;
+        TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
         TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
             kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object),
             "?fields=size%2Cupdated")));
-        TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
         TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
         TF_RETURN_IF_ERROR(request->SetTimeouts(
             timeouts_.connect, timeouts_.idle, timeouts_.metadata));
 
-        if (dns_cache_) {
-          TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
-        }
         TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(),
                                         " when reading metadata of gs://",
                                         bucket, "/", object);
@@ -945,14 +917,11 @@ Status GcsFileSystem::BucketExists(const string& bucket, bool* result) {
   if (!result) {
     return errors::Internal("'result' cannot be nullptr.");
   }
-  string auth_token;
-  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
 
-  std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-  TF_RETURN_IF_ERROR(request->Init());
+  std::unique_ptr<HttpRequest> request;
+  TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
   TF_RETURN_IF_ERROR(
       request->SetUri(strings::StrCat(kGcsUriBase, "b/", bucket)));
-  TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
   TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
                                           timeouts_.metadata));
   const Status status = request->Send();
@@ -1054,13 +1023,9 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
   string nextPageToken;
   uint64 retrieved_results = 0;
   while (true) {  // A loop over multiple result pages.
-    string auth_token;
-    TF_RETURN_IF_ERROR(
-        AuthProvider::GetToken(auth_provider_.get(), &auth_token));
-
     std::vector<char> output_buffer;
-    std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-    TF_RETURN_IF_ERROR(request->Init());
+    std::unique_ptr<HttpRequest> request;
+    TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
     auto uri = strings::StrCat(kGcsUriBase, "b/", bucket, "/o");
     if (recursive) {
       uri = strings::StrCat(uri, "?fields=items%2Fname%2CnextPageToken");
@@ -1084,15 +1049,10 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
           strings::StrCat(uri, "&maxResults=", max_results - retrieved_results);
     }
     TF_RETURN_IF_ERROR(request->SetUri(uri));
-    TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
     TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
     TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
                                             timeouts_.metadata));
 
-    if (dns_cache_) {
-      TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
-    }
-
     TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading ", dirname);
     Json::Value root;
     StringPiece response_piece =
@@ -1204,17 +1164,14 @@ Status GcsFileSystem::DeleteFile(const string& fname) {
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
 
-  string auth_token;
-  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
-
-  std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-  TF_RETURN_IF_ERROR(request->Init());
+  std::unique_ptr<HttpRequest> request;
+  TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
   TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
       kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object))));
-  TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
   TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
                                           timeouts_.metadata));
   TF_RETURN_IF_ERROR(request->SetDeleteRequest());
+
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when deleting ", fname);
   file_block_cache_->RemoveFile(fname);
   return Status::OK();
@@ -1297,16 +1254,12 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) {
   TF_RETURN_IF_ERROR(
       ParseGcsPath(target, false, &target_bucket, &target_object));
 
-  string auth_token;
-  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
-
-  std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-  TF_RETURN_IF_ERROR(request->Init());
+  std::unique_ptr<HttpRequest> request;
+  TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
   TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
       kGcsUriBase, "b/", src_bucket, "/o/", request->EscapeString(src_object),
       "/rewriteTo/b/", target_bucket, "/o/",
       request->EscapeString(target_object))));
-  TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
   TF_RETURN_IF_ERROR(request->SetPostEmptyBody());
   TF_RETURN_IF_ERROR(request->SetTimeouts(timeouts_.connect, timeouts_.idle,
                                           timeouts_.metadata));
@@ -1409,6 +1362,25 @@ Status GcsFileSystem::DeleteRecursively(const string& dirname,
   return Status::OK();
 }
 
+// Creates an HttpRequest and sets several parameters that are common to all
+// requests.  All code (in GcsFileSystem) that creates an HttpRequest should
+// go through this method, rather than directly using http_request_factory_.
+Status GcsFileSystem::CreateHttpRequest(std::unique_ptr<HttpRequest>* request) {
+  std::unique_ptr<HttpRequest> new_request{http_request_factory_->Create()};
+  TF_RETURN_IF_ERROR(new_request->Init());
+  if (dns_cache_) {
+    TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(new_request.get()));
+  }
+
+  string auth_token;
+  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
+
+  TF_RETURN_IF_ERROR(new_request->AddAuthBearerHeader(auth_token));
+
+  *request = std::move(new_request);
+  return Status::OK();
+}
+
 REGISTER_FILE_SYSTEM("gs", RetryingGcsFileSystem);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h
index 7cfcebd5c9..f4190b3f1e 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.h
+++ b/tensorflow/core/platform/cloud/gcs_file_system.h
@@ -136,6 +136,8 @@ class GcsFileSystem : public FileSystem {
           write(write) {}
   };
 
+  Status CreateHttpRequest(std::unique_ptr<HttpRequest>* request);
+
  private:
   /// \brief Checks if the bucket exists. Returns OK if the check succeeded.
   ///
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 385e0fda02..cdf6c2b97e 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -2340,5 +2340,28 @@ TEST(GcsFileSystemTest, OverrideCacheParameters) {
   EXPECT_EQ(40, fs5.timeouts().write);
 }
 
+TEST(GcsFileSystemTest, CreateHttpRequest) {
+  std::vector<HttpRequest*> requests(
+      {// IsDirectory is checking whether there are children objects.
+       new FakeHttpRequest("Uri: https://www.googleapis.com/fake\n"
+                           "Auth Token: fake_token\n"
+                           "Header Hello: world\n",
+                           "{}")});
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
+
+  std::unique_ptr<HttpRequest> request;
+  TF_EXPECT_OK(fs.CreateHttpRequest(&request));
+  TF_EXPECT_OK(request->SetUri("https://www.googleapis.com/fake"));
+  TF_EXPECT_OK(request->AddHeader("Hello", "world"));
+  TF_EXPECT_OK(request->Send());
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/http_request_fake.h b/tensorflow/core/platform/cloud/http_request_fake.h
index 8161ee6e25..f65c15dac7 100644
--- a/tensorflow/core/platform/cloud/http_request_fake.h
+++ b/tensorflow/core/platform/cloud/http_request_fake.h
@@ -77,7 +77,7 @@ class FakeHttpRequest : public CurlHttpRequest {
 
   Status Init() override { return Status::OK(); }
   Status SetUri(const string& uri) override {
-    actual_request_ += "Uri: " + uri + "\n";
+    actual_uri_ += "Uri: " + uri + "\n";
     return Status::OK();
   }
   Status SetRange(uint64 start, uint64 end) override {
@@ -131,7 +131,8 @@ class FakeHttpRequest : public CurlHttpRequest {
     return Status::OK();
   }
   Status Send() override {
-    EXPECT_EQ(expected_request_, actual_request_) << "Unexpected HTTP request.";
+    EXPECT_EQ(expected_request_, actual_request())
+        << "Unexpected HTTP request.";
     if (buffer_) {
       buffer_->insert(buffer_->begin(), response_.c_str(),
                       response_.c_str() + response_.size());
@@ -169,8 +170,16 @@ class FakeHttpRequest : public CurlHttpRequest {
   }
 
  private:
+  string actual_request() const {
+    string s;
+    s.append(actual_uri_);
+    s.append(actual_request_);
+    return s;
+  }
+
   std::vector<char>* buffer_ = nullptr;
   string expected_request_;
+  string actual_uri_;
   string actual_request_;
   string response_;
   Status response_status_;
-- 
GitLab


From 47b674c938a38c6d88f27244a12ce3944c2f0464 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 16:09:47 -0800
Subject: [PATCH 0937/1225] [XLA] Remove a source of nondeterminism in HLO
 clustering.

Record the HLO clusters with std::set instead of std::unordered_set to ensure
that the algorithm to assign each cluster a sequence number during a set
traversal is deterministic.

PiperOrigin-RevId: 178830794
---
 tensorflow/compiler/jit/mark_for_compilation_pass.cc | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index aceedeb823..1f311a3aed 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -172,10 +172,15 @@ bool HasResourceInputOrOutput(const Node& node) {
                    DT_RESOURCE) != node.output_types().end();
 }
 
+struct NodeCompare {
+  bool operator()(const Node* a, const Node* b) { return a->id() < b->id(); }
+};
+using OrderedNodeSet = std::set<Node*, NodeCompare>;
+
 Status FindCompilationCandidates(
     const Graph& graph, FunctionLibraryDefinition* flib_def, Env* env,
     const std::function<bool(const Node*, const DeviceType&)>& is_compilable_fn,
-    std::unordered_set<Node*>* candidates) {
+    OrderedNodeSet* candidates) {
   OptimizerOptions opts;
   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
       new ProcessFunctionLibraryRuntime(nullptr, env, TF_GRAPH_DEF_VERSION,
@@ -354,7 +359,7 @@ Status MarkForCompilationPass::RunImpl(
 
   Graph* graph = options.graph->get();
 
-  std::unordered_set<Node*> compilation_candidates;
+  OrderedNodeSet compilation_candidates;
   TF_RETURN_IF_ERROR(FindCompilationCandidates(
       *graph, options.flib_def,
       (options.session_options != nullptr) ? options.session_options->env
-- 
GitLab


From c373a16f61bff835181163dc07417e3cba6f47bc Mon Sep 17 00:00:00 2001
From: Yangzihao Wang <yangzihao@google.com>
Date: Tue, 12 Dec 2017 16:21:26 -0800
Subject: [PATCH 0938/1225] Return unimplemented error when trying to use
 dilated rate > 1 combined with NHWC format on the CPU. Add test for
 unimplemented errors in Conv2D op.

PiperOrigin-RevId: 178832407
---
 tensorflow/core/kernels/conv_ops.cc           | 13 ++++--
 .../python/kernel_tests/conv_ops_test.py      | 46 +++++++++++++++++--
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index ba40c428e4..985586d626 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -112,9 +112,9 @@ struct LaunchGeneric {
 template <typename T>
 struct LaunchConv2DOp<CPUDevice, T> {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-                  const Tensor& input, const Tensor& filter,
-                  int /*row_dilation*/, int /*col_dilation*/, int row_stride,
-                  int col_stride, const Padding& padding, Tensor* output,
+                  const Tensor& input, const Tensor& filter, int row_dilation,
+                  int col_dilation, int row_stride, int col_stride,
+                  const Padding& padding, Tensor* output,
                   TensorFormat data_format) {
     if (data_format != FORMAT_NHWC) {
       ctx->SetStatus(
@@ -122,6 +122,13 @@ struct LaunchConv2DOp<CPUDevice, T> {
                                 "NHWC tensor format for now."));
       return;
     }
+    // TODO(yangzihao): Add the CPU implementation of dilated conv 2D.
+    if (row_dilation > 1 || col_dilation > 1) {
+      ctx->SetStatus(
+          errors::Unimplemented("Generic conv implementation only supports "
+                                "dilated rate of 1 for now."));
+      return;
+    }
     LaunchGeneric<CPUDevice, T>()(ctx, input, filter, row_stride, col_stride,
                                   padding, output, data_format);
   }
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index a85134c288..a7cbc76b87 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -164,8 +164,8 @@ class Conv2DTest(test.TestCase):
       # as we will be using its gradients as reference for fp16 gradients.
       return [dtypes.float32, dtypes.float16]
 
-  def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, strides,
-                            padding, data_format, dtype, use_gpu):
+  def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, dilations,
+                            strides, padding, data_format, dtype, use_gpu):
     """Verifies the output values of the convolution function.
 
     Args:
@@ -173,6 +173,7 @@ class Conv2DTest(test.TestCase):
         [batch, input_rows, input_cols, input_depth].
       filter_in_sizes: Filter tensor dimensions in
         [kernel_rows, kernel_cols, input_depth, output_depth].
+      dilations: Dilated rate: [col_dilation, row_dilation]
       strides: Stride: [col_stride, row_stride]
       padding: Padding type.
       data_format: Format of the data tensors.
@@ -196,11 +197,18 @@ class Conv2DTest(test.TestCase):
       t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
       t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
       strides = [1] + strides + [1]
+      dilations = [1] + dilations + [1]
       if data_format == "NCHW":
         t1 = test_util.NHWCToNCHW(t1)
         strides = test_util.NHWCToNCHW(strides)
+        dilations = test_util.NHWCToNCHW(dilations)
       conv = nn_ops.conv2d(
-          t1, t2, strides=strides, padding=padding, data_format=data_format)
+          t1,
+          t2,
+          dilations=dilations,
+          strides=strides,
+          padding=padding,
+          data_format=data_format)
       if data_format == "NCHW":
         conv = test_util.NCHWToNHWC(conv)
 
@@ -316,11 +324,13 @@ class Conv2DTest(test.TestCase):
   def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, strides, padding,
                     expected):
     tensors = []
+    dilations = [1, 1]
     for (data_format, use_gpu) in GetTestConfigs():
       for dtype in self._DtypesToTest(use_gpu):
         result = self._SetupValuesForDevice(
             tensor_in_sizes,
             filter_in_sizes,
+            dilations,
             strides,
             padding,
             data_format,
@@ -1498,6 +1508,36 @@ class Conv2DTest(test.TestCase):
                 strides=[1, 1, 1, 1],
                 padding="VALID"))
 
+  def testCPUConv2DNCHWUnimplemented(self):
+    with self.test_session(use_gpu=False):
+      with self.assertRaisesRegexp(errors_impl.UnimplementedError,
+                                   "NHWC tensor format for now"):
+        conv = self._SetupValuesForDevice(
+            tensor_in_sizes=[1, 4, 4, 1],
+            filter_in_sizes=[2, 2, 1, 1],
+            dilations=[1, 1],
+            strides=[1, 1],
+            padding="VALID",
+            data_format="NCHW",
+            dtype=dtypes.float32,
+            use_gpu=False)
+        self.evaluate(conv)
+
+  def testCPUConv2DDilatedUnimplemented(self):
+    with self.test_session(use_gpu=False):
+      with self.assertRaisesRegexp(errors_impl.UnimplementedError,
+                                   "dilated rate of 1 for now"):
+        conv = self._SetupValuesForDevice(
+            tensor_in_sizes=[1, 4, 4, 1],
+            filter_in_sizes=[2, 2, 1, 1],
+            dilations=[2, 1],
+            strides=[1, 1],
+            padding="VALID",
+            data_format="NHWC",
+            dtype=dtypes.float32,
+            use_gpu=False)
+        self.evaluate(conv)
+
 
 class DepthwiseConv2DTest(test.TestCase):
 
-- 
GitLab


From 618d5c5fad4f70456856625322db104b851a399d Mon Sep 17 00:00:00 2001
From: Ian Langmore <langmore@google.com>
Date: Tue, 12 Dec 2017 16:30:09 -0800
Subject: [PATCH 0939/1225] BUGFIX: MVN Full Covariance:  Use dtype dependent
 tolerance to verify symmetric.

PiperOrigin-RevId: 178833453
---
 .../distributions/python/ops/mvn_full_covariance.py  | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
index 8e69dadfb4..00a18569fc 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
@@ -18,12 +18,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.contrib.distributions.python.ops import mvn_tril
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import linalg_ops
+from tensorflow.python.ops import math_ops
 
 
 __all__ = [
@@ -167,9 +170,12 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL):
           covariance_matrix = ops.convert_to_tensor(
               covariance_matrix, name="covariance_matrix")
           if validate_args:
-            assert_symmetric = check_ops.assert_equal(
-                covariance_matrix,
-                array_ops.matrix_transpose(covariance_matrix),
+            tol = np.finfo(covariance_matrix.dtype.as_numpy_dtype).eps * 10
+            diff = math_ops.abs(
+                covariance_matrix
+                - array_ops.matrix_transpose(covariance_matrix))
+            assert_symmetric = check_ops.assert_less(
+                diff, tol + tol * math_ops.abs(covariance_matrix),
                 message="Matrix was not symmetric.")
             covariance_matrix = control_flow_ops.with_dependencies(
                 [assert_symmetric], covariance_matrix)
-- 
GitLab


From e115b064f57f5c373f1acdb56b210c541ccf63fb Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Tue, 12 Dec 2017 17:01:02 -0800
Subject: [PATCH 0940/1225] [TF] Mark DT_STRING and DT_RESOURCE types as always
 sitting on host memory.

This is important when these arguments may appear in op input lists or output lists,
where the signature may not be able to declare them as sitting on host.

For DT_RESOURCE types, just the handles are marked as sitting on host memory;
the actual data may reside on GPU.

PiperOrigin-RevId: 178837213
---
 tensorflow/core/framework/memory_types.cc     | 17 +++++++++++++++-
 .../core/framework/memory_types_test.cc       | 20 +++++++++++++------
 tensorflow/core/framework/types.cc            | 12 +++++++++++
 tensorflow/core/framework/types.h             |  5 +++++
 4 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/framework/memory_types.cc b/tensorflow/core/framework/memory_types.cc
index 6a2eed94b9..270118bb67 100644
--- a/tensorflow/core/framework/memory_types.cc
+++ b/tensorflow/core/framework/memory_types.cc
@@ -61,7 +61,8 @@ void MemoryTypesHelper(const NameRangeMap& name_map,
 }
 
 MemoryType MTypeFromDType(const DataType dtype) {
-  return (dtype == DT_INT32) ? HOST_MEMORY : DEVICE_MEMORY;
+  return (dtype == DT_INT32 || DataTypeAlwaysOnHost(dtype)) ? HOST_MEMORY
+                                                            : DEVICE_MEMORY;
 }
 
 }  // namespace
@@ -118,6 +119,20 @@ Status MemoryTypesForNode(const OpRegistryInterface* op_registry,
         "HostMemory args '", str_util::Join(host_memory_args, "', '"),
         "' not found in OpDef: ", SummarizeOpDef(*op_def));
   }
+  CHECK_LE(inp_mtypes->size(), inp_dtypes.size());
+  CHECK_LE(out_mtypes->size(), out_dtypes.size());
+
+  // Mark e.g. all resource and string types as host memory.
+  for (int i = 0; i < inp_mtypes->size(); ++i) {
+    if (DataTypeAlwaysOnHost(inp_dtypes[i])) {
+      (*inp_mtypes)[i] = HOST_MEMORY;
+    }
+  }
+  for (int i = 0; i < out_mtypes->size(); ++i) {
+    if (DataTypeAlwaysOnHost(out_dtypes[i])) {
+      (*out_mtypes)[i] = HOST_MEMORY;
+    }
+  }
 
   std::vector<int32> hostmem_attr;
   if (GetNodeAttr(ndef, "_input_hostmem", &hostmem_attr).ok()) {
diff --git a/tensorflow/core/framework/memory_types_test.cc b/tensorflow/core/framework/memory_types_test.cc
index 4704da9a11..3126ea8e5f 100644
--- a/tensorflow/core/framework/memory_types_test.cc
+++ b/tensorflow/core/framework/memory_types_test.cc
@@ -36,11 +36,13 @@ REGISTER_OP("HostMemoryTest")
     .Input("b: T")
     .Input("c: N * string")
     .Input("d: Tlist")
+    .Input("e: Rlist")
     .Output("o: N * T")
     .Output("p: Tlist")
     .Attr("T: type")
     .Attr("N: int")
-    .Attr("Tlist: list(type)");
+    .Attr("Tlist: list(type)")
+    .Attr("Rlist: list(type)");
 REGISTER_KERNEL_BUILDER(Name("HostMemoryTest").Device(DEVICE_CPU), DummyKernel);
 REGISTER_KERNEL_BUILDER(Name("HostMemoryTest")
                             .Device(DEVICE_GPU)
@@ -57,15 +59,20 @@ TEST(MemoryTypesForNode, Simple) {
                    .Input(FakeInput(DT_BOOL))
                    .Input(FakeInput(3))
                    .Input(FakeInput({DT_INT32, DT_FLOAT, DT_INT32}))
+                   .Input(FakeInput({DT_RESOURCE, DT_STRING, DT_RESOURCE}))
                    .Finalize(&node_def));
   MemoryTypeVector input, output;
 
   TF_EXPECT_OK(MemoryTypesForNode(OpRegistry::Global(), DEVICE_CPU, node_def,
                                   &input, &output));
-  EXPECT_EQ(MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY,
-                              DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY,
-                              DEVICE_MEMORY, DEVICE_MEMORY}),
-            input);
+  // a:float, b:bool, c:3*string, d:(int32, float, int32),
+  // e:(resource, string, resource)
+  EXPECT_EQ(
+      MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, HOST_MEMORY, HOST_MEMORY,
+                        HOST_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY,
+                        DEVICE_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY}),
+      input);
+  // o:3*bool, p:(int32, float, int32)
   EXPECT_EQ(MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY,
                               DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY}),
             output);
@@ -74,7 +81,8 @@ TEST(MemoryTypesForNode, Simple) {
                                   &input, &output));
   EXPECT_EQ(
       MemoryTypeVector({HOST_MEMORY, DEVICE_MEMORY, HOST_MEMORY, HOST_MEMORY,
-                        HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY}),
+                        HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY,
+                        HOST_MEMORY, HOST_MEMORY, HOST_MEMORY}),
       input);
   EXPECT_EQ(MemoryTypeVector({HOST_MEMORY, HOST_MEMORY, HOST_MEMORY,
                               DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY}),
diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc
index b082dfbd03..58354d6f4e 100644
--- a/tensorflow/core/framework/types.cc
+++ b/tensorflow/core/framework/types.cc
@@ -306,6 +306,18 @@ bool DataTypeCanUseMemcpy(DataType dt) {
   }
 }
 
+bool DataTypeAlwaysOnHost(DataType dt) {
+  // Includes DT_STRING and DT_RESOURCE.
+  switch (dt) {
+    case DT_STRING:
+    case DT_STRING_REF:
+    case DT_RESOURCE:
+      return true;
+    default:
+      return false;
+  }
+}
+
 bool DataTypeIsFloating(DataType dt) {
   switch (dt) {
     case DT_HALF:
diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h
index 652985658a..27005c0e93 100644
--- a/tensorflow/core/framework/types.h
+++ b/tensorflow/core/framework/types.h
@@ -239,6 +239,11 @@ bool DataTypeIsUnsigned(DataType dt);
 // Returns a 0 on failure
 int DataTypeSize(DataType dt);
 
+// Types that always sit on host: DT_STRING, DT_STRING_REF, DT_RESOURCE.
+// For DT_RESOURCE, the handle always sits on host (even if the underlying
+// object has device-allocated resources).
+bool DataTypeAlwaysOnHost(DataType dt);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_FRAMEWORK_TYPES_H_
-- 
GitLab


From dd6f9d5f43870dc39dbed91c6897dc4bb22ca495 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Tue, 12 Dec 2017 17:45:50 -0800
Subject: [PATCH 0941/1225] Move more contrib RNN objects to be Layers.

PiperOrigin-RevId: 178842373
---
 .../rnn/python/kernel_tests/core_rnn_test.py  |   4 +-
 .../rnn/python/kernel_tests/lstm_ops_test.py  |  50 +--
 tensorflow/contrib/rnn/python/ops/gru_ops.py  | 139 ++++----
 tensorflow/contrib/rnn/python/ops/lstm_ops.py | 296 ++++++++++--------
 tensorflow/contrib/rnn/python/ops/rnn_cell.py |   2 +-
 tensorflow/python/ops/rnn_cell_impl.py        |  15 +-
 6 files changed, 259 insertions(+), 247 deletions(-)

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
index 9cea2ec79a..0258d7202d 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
@@ -2175,9 +2175,9 @@ class DeviceWrapperCell(rnn_cell.RNNCell):
   def __call__(self, input_, state, scope=None):
     if self._device is not None:
       with ops_lib.device(self._device):
-        return self._cell(input_, state, scope)
+        return self._cell(input_, state, scope=scope)
     else:
-      return self._cell(input_, state, scope)
+      return self._cell(input_, state, scope=scope)
 
 
 class TensorArrayOnCorrectDeviceTest(test.TestCase):
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
index a288072ae5..4ada2118ba 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
@@ -49,6 +49,7 @@ def blocks_match(sess, use_peephole):
     inp = ops.convert_to_tensor(
         np.random.randn(batch_size, input_size), dtype=dtypes.float32)
     inputs.append(inp)
+  stacked_inputs = array_ops.stack(inputs)
 
   initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890212)
 
@@ -72,23 +73,6 @@ def blocks_match(sess, use_peephole):
         dtype=dtypes.float32,
         initializer=init_ops.zeros_initializer())
 
-    if use_peephole:
-      wci_block = variable_scope.get_variable(
-          "rnn/lstm_cell/lstm_block_wrapper/w_i_diag",
-          initializer=wci.initialized_value())
-      wcf_block = variable_scope.get_variable(
-          "rnn/lstm_cell/lstm_block_wrapper/w_f_diag",
-          initializer=wcf.initialized_value())
-      wco_block = variable_scope.get_variable(
-          "rnn/lstm_cell/lstm_block_wrapper/w_o_diag",
-          initializer=wco.initialized_value())
-    w_block = variable_scope.get_variable(
-        "rnn/lstm_cell/lstm_block_wrapper/kernel",
-        initializer=w.initialized_value())
-    b_block = variable_scope.get_variable(
-        "rnn/lstm_cell/lstm_block_wrapper/bias",
-        initializer=b.initialized_value())
-
     basic_cell = rnn_cell.LSTMCell(
         cell_size, use_peepholes=use_peephole, state_is_tuple=True, reuse=True)
     basic_outputs_op, basic_state_op = rnn.static_rnn(
@@ -113,11 +97,10 @@ def blocks_match(sess, use_peephole):
           b,
           cell_clip=0)
 
-    with variable_scope.variable_scope("rnn/lstm_cell", reuse=True):
-      fused_cell = lstm_ops.LSTMBlockFusedCell(
-          cell_size, cell_clip=0, use_peephole=use_peephole)
-      fused_outputs_op, fused_state_op = fused_cell(
-          inputs, dtype=dtypes.float32)
+    fused_cell = lstm_ops.LSTMBlockFusedCell(
+        cell_size, cell_clip=0, use_peephole=use_peephole, reuse=True)
+    fused_outputs_op, fused_state_op = fused_cell(
+        stacked_inputs, dtype=dtypes.float32)
 
     sess.run([variables.global_variables_initializer()])
     basic_outputs, basic_state = sess.run([basic_outputs_op, basic_state_op[0]])
@@ -131,9 +114,9 @@ def blocks_match(sess, use_peephole):
     block_grads = sess.run(gradients_impl.gradients(block_outputs_op, inputs))
     block_wgrads = sess.run(gradients_impl.gradients(block_outputs_op, xs))
 
-    xs = [w_block, b_block]
+    xs = [w, b]
     if use_peephole:
-      xs += [wci_block, wcf_block, wco_block]
+      xs += [wci, wcf, wco]
     fused_outputs, fused_state = sess.run([fused_outputs_op, fused_state_op[0]])
     fused_grads = sess.run(gradients_impl.gradients(fused_outputs_op, inputs))
     fused_wgrads = sess.run(gradients_impl.gradients(fused_outputs_op, xs))
@@ -216,7 +199,7 @@ class LSTMBlockCellTest(test.TestCase):
     with self.test_session(use_gpu=True, graph=ops.Graph()):
       cell = lstm_ops.LSTMBlockFusedCell(10)
       pcell = lstm_ops.LSTMBlockFusedCell(10, use_peephole=True)
-      inputs = [array_ops.zeros([4, 5])] * 6
+      inputs = array_ops.stack([array_ops.zeros([4, 5])] * 6)
       cell(inputs, dtype=dtypes.float32, scope="basic/lstm_cell")
       pcell(inputs, dtype=dtypes.float32, scope="peephole/lstm_cell")
       fused_names = {
@@ -380,13 +363,14 @@ class LSTMBlockCellTest(test.TestCase):
             np.random.randn(batch_size, input_size), dtype=dtypes.float32)
         inputs.append(inp)
       seq_lengths = constant_op.constant([3, 4, 5])
+      cell_inputs = array_ops.stack(inputs)
 
       initializer = init_ops.random_uniform_initializer(
           -0.01, 0.01, seed=19890213)
 
       with variable_scope.variable_scope(
-          "lstm_block_wrapper", initializer=initializer):
-        # magic naming so that the cells pick up these variables and resuse them
+          "rnn/lstm_cell", initializer=initializer):
+        # magic naming so that the cells pick up these variables and reuse them
         variable_scope.get_variable(
             "kernel",
             shape=[input_size + cell_size, cell_size * 4],
@@ -398,13 +382,11 @@ class LSTMBlockCellTest(test.TestCase):
             dtype=dtypes.float32,
             initializer=init_ops.zeros_initializer())
 
-      with variable_scope.variable_scope(
-          variable_scope.get_variable_scope(), reuse=True):
-        cell = lstm_ops.LSTMBlockFusedCell(
-            cell_size, cell_clip=0, use_peephole=False)
+      cell = lstm_ops.LSTMBlockFusedCell(
+          cell_size, cell_clip=0, use_peephole=False, reuse=True)
 
-        fused_outputs_op, fused_state_op = cell(
-            inputs, dtype=dtypes.float32, sequence_length=seq_lengths)
+      fused_outputs_op, fused_state_op = cell(
+          cell_inputs, dtype=dtypes.float32, sequence_length=seq_lengths)
 
       cell_vars = [
           v for v in variables.trainable_variables()
@@ -420,7 +402,7 @@ class LSTMBlockCellTest(test.TestCase):
         for i, inp in enumerate(inputs):
           lengths = [int(i < l) for l in seq_lengths.eval()]
           output, state = cell(
-              [inp],
+              array_ops.expand_dims(inp, 0),
               initial_state=state,
               dtype=dtypes.float32,
               sequence_length=lengths)
diff --git a/tensorflow/contrib/rnn/python/ops/gru_ops.py b/tensorflow/contrib/rnn/python/ops/gru_ops.py
index 75536e3f5f..cbe53bb4ce 100644
--- a/tensorflow/contrib/rnn/python/ops/gru_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/gru_ops.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from tensorflow.contrib.rnn.ops import gen_gru_ops
 from tensorflow.contrib.util import loader
 from tensorflow.python.framework import ops
+from tensorflow.python.layers import base as base_layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
@@ -32,6 +33,8 @@ from tensorflow.python.util.deprecation import deprecated_args
 _gru_ops_so = loader.load_op_library(
     resource_loader.get_path_to_datafile("_gru_ops.so"))
 
+LayerRNNCell = rnn_cell_impl._LayerRNNCell  # pylint: disable=invalid-name,protected-access
+
 
 @ops.RegisterGradient("GRUBlockCell")
 def _GRUBlockCellGrad(op, *grad):
@@ -95,7 +98,7 @@ def _GRUBlockCellGrad(op, *grad):
   return d_x, d_h_prev, d_w_ru, d_w_c, d_b_ru, d_b_c
 
 
-class GRUBlockCell(rnn_cell_impl.RNNCell):
+class GRUBlockCell(LayerRNNCell):
   r"""Block GRU cell implementation.
 
   Deprecated: use GRUBlockCellV2 instead.
@@ -132,22 +135,37 @@ class GRUBlockCell(rnn_cell_impl.RNNCell):
 
   @deprecated_args(None, "cell_size is deprecated, use num_units instead",
                    "cell_size")
-  def __init__(self, num_units=None, cell_size=None):
+  def __init__(self,
+               num_units=None,
+               cell_size=None,
+               reuse=None,
+               name="gru_cell"):
     """Initialize the Block GRU cell.
 
     Args:
       num_units: int, The number of units in the GRU cell.
       cell_size: int, The old (deprecated) name for `num_units`.
+      reuse: (optional) boolean describing whether to reuse variables in an
+        existing scope.  If not `True`, and the existing scope already has the
+        given variables, an error is raised.
+      name: String, the name of the layer. Layers with the same name will
+        share weights, but to avoid mistakes we require reuse=True in such
+        cases.  By default this is "lstm_cell", for variable-name compatibility
+        with `tf.nn.rnn_cell.GRUCell`.
 
     Raises:
       ValueError: if both cell_size and num_units are not None;
         or both are None.
     """
+    super(GRUBlockCell, self).__init__(_reuse=reuse, name=name)
     if (cell_size is None) == (num_units is None):
-      raise ValueError("Exactly one of num_units or cell_size must be provided.")
+      raise ValueError(
+          "Exactly one of num_units or cell_size must be provided.")
     if num_units is None:
       num_units = cell_size
     self._cell_size = num_units
+    # Inputs must be 2-dimensional.
+    self.input_spec = base_layer.InputSpec(ndim=2)
 
   @property
   def state_size(self):
@@ -157,40 +175,41 @@ class GRUBlockCell(rnn_cell_impl.RNNCell):
   def output_size(self):
     return self._cell_size
 
-  def __call__(self, x, h_prev, scope=None):
+  def build(self, input_shape):
+    # Check if the input size exist.
+    input_size = input_shape[1].value
+    if input_size is None:
+      raise ValueError("Expecting input_size to be set.")
+
+    self._gate_kernel = vs.get_variable(
+        "w_c", [input_size + self._cell_size, self._cell_size * 2])
+    self._gate_bias = vs.get_variable(
+        "b_ru", [self._cell_size * 2],
+        initializer=init_ops.constant_initializer(1.0))
+    self._candidate_kernel = vs.get_variable(
+        "w_ru", [input_size + self._cell_size, self._cell_size])
+    self._candidate_bias = vs.get_variable(
+        "b_c", [self._cell_size],
+        initializer=init_ops.constant_initializer(0.0))
+
+  def call(self, inputs, h_prev):
     """GRU cell."""
-    with vs.variable_scope(scope or type(self).__name__):
-      input_size = x.get_shape().with_rank(2)[1]
-
-      # Check if the input size exist.
-      if input_size is None:
-        raise ValueError("Expecting input_size to be set.")
-
-      # Check cell_size == state_size from h_prev.
-      cell_size = h_prev.get_shape().with_rank(2)[1]
-      if cell_size != self._cell_size:
-        raise ValueError("Shape of h_prev[1] incorrect: cell_size %i vs %s" %
-                         (self._cell_size, cell_size))
-
-      if cell_size is None:
-        raise ValueError("cell_size from `h_prev` should not be None.")
-
-      w_ru = vs.get_variable("w_ru", [input_size + self._cell_size,
-                                      self._cell_size * 2])
-      b_ru = vs.get_variable(
-          "b_ru", [self._cell_size * 2],
-          initializer=init_ops.constant_initializer(1.0))
-      w_c = vs.get_variable("w_c",
-                            [input_size + self._cell_size, self._cell_size])
-      b_c = vs.get_variable(
-          "b_c", [self._cell_size],
-          initializer=init_ops.constant_initializer(0.0))
+    # Check cell_size == state_size from h_prev.
+    cell_size = h_prev.get_shape().with_rank(2)[1]
+    if cell_size != self._cell_size:
+      raise ValueError("Shape of h_prev[1] incorrect: cell_size %i vs %s" %
+                       (self._cell_size, cell_size))
 
-      _gru_block_cell = gen_gru_ops.gru_block_cell  # pylint: disable=invalid-name
-      _, _, _, new_h = _gru_block_cell(
-          x=x, h_prev=h_prev, w_ru=w_ru, w_c=w_c, b_ru=b_ru, b_c=b_c)
+    _gru_block_cell = gen_gru_ops.gru_block_cell  # pylint: disable=invalid-name
+    _, _, _, new_h = _gru_block_cell(
+        x=inputs,
+        h_prev=h_prev,
+        w_ru=self._gate_kernel,
+        w_c=self._candidate_kernel,
+        b_ru=self._gate_bias,
+        b_c=self._candidate_bias)
 
-      return new_h, new_h
+    return new_h, new_h
 
 
 class GRUBlockCellV2(GRUBlockCell):
@@ -199,39 +218,21 @@ class GRUBlockCellV2(GRUBlockCell):
   Only differs from GRUBlockCell by variable names.
   """
 
-  def __call__(self, x, h_prev, scope=None):
+  def build(self, input_shape):
     """GRU cell."""
-    with vs.variable_scope(scope or type(self).__name__):
-      input_size = x.get_shape().with_rank(2)[1]
-
-      # Check if the input size exist.
-      if input_size is None:
-        raise ValueError("Expecting input_size to be set.")
-
-      # Check cell_size == state_size from h_prev.
-      cell_size = h_prev.get_shape().with_rank(2)[1]
-      if cell_size != self._cell_size:
-        raise ValueError("Shape of h_prev[1] incorrect: cell_size %i vs %s" %
-                         (self._cell_size, cell_size))
-
-      if cell_size is None:
-        raise ValueError("cell_size from `h_prev` should not be None.")
-
-      with vs.variable_scope("gates"):
-        w_ru = vs.get_variable("kernel", [input_size + self._cell_size,
-                                          self._cell_size * 2])
-        b_ru = vs.get_variable(
-            "bias", [self._cell_size * 2],
-            initializer=init_ops.constant_initializer(1.0))
-      with vs.variable_scope("candidate"):
-        w_c = vs.get_variable("kernel",
-                              [input_size + self._cell_size, self._cell_size])
-        b_c = vs.get_variable(
-            "bias", [self._cell_size],
-            initializer=init_ops.constant_initializer(0.0))
-
-      _gru_block_cell = gen_gru_ops.gru_block_cell  # pylint: disable=invalid-name
-      _, _, _, new_h = _gru_block_cell(
-          x=x, h_prev=h_prev, w_ru=w_ru, w_c=w_c, b_ru=b_ru, b_c=b_c)
-
-      return new_h, new_h
+    input_size = input_shape[1].value
+    if input_size is None:
+      raise ValueError("Expecting input_size to be set.")
+
+    with vs.variable_scope("gates"):
+      self._gate_kernel = vs.get_variable(
+          "kernel", [input_size + self._cell_size, self._cell_size * 2])
+      self._gate_bias = vs.get_variable(
+          "bias", [self._cell_size * 2],
+          initializer=init_ops.constant_initializer(1.0))
+    with vs.variable_scope("candidate"):
+      self._candidate_kernel = vs.get_variable(
+          "kernel", [input_size + self._cell_size, self._cell_size])
+      self._candidate_bias = vs.get_variable(
+          "bias", [self._cell_size],
+          initializer=init_ops.constant_initializer(0.0))
diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index df910a3423..805121a8f1 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -20,10 +20,10 @@ from __future__ import print_function
 import abc
 
 from tensorflow.contrib.rnn.ops import gen_lstm_ops
-from tensorflow.contrib.rnn.python.ops import fused_rnn_cell
 from tensorflow.contrib.util import loader
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.layers import base as base_layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
@@ -35,6 +35,8 @@ from tensorflow.python.platform import resource_loader
 _lstm_ops_so = loader.load_op_library(
     resource_loader.get_path_to_datafile("_lstm_ops.so"))
 
+LayerRNNCell = rnn_cell_impl._LayerRNNCell  # pylint: disable=invalid-name,protected-access
+
 
 # pylint: disable=invalid-name
 def _lstm_block_cell(x,
@@ -327,7 +329,7 @@ def _BlockLSTMGrad(op, *grad):
   ]
 
 
-class LSTMBlockCell(rnn_cell_impl.RNNCell):
+class LSTMBlockCell(LayerRNNCell):
   """Basic LSTM recurrent network cell.
 
   The implementation is based on: http://arxiv.org/abs/1409.2329.
@@ -345,7 +347,8 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell):
                forget_bias=1.0,
                cell_clip=None,
                use_peephole=False,
-               reuse=None):
+               reuse=None,
+               name="lstm_cell"):
     """Initialize the basic LSTM cell.
 
     Args:
@@ -356,11 +359,15 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell):
       reuse: (optional) boolean describing whether to reuse variables in an
         existing scope.  If not `True`, and the existing scope already has the
         given variables, an error is raised.
+      name: String, the name of the layer. Layers with the same name will
+        share weights, but to avoid mistakes we require reuse=True in such
+        cases.  By default this is "lstm_cell", for variable-name compatibility
+        with `tf.nn.rnn_cell.LSTMCell`.
 
       When restoring from CudnnLSTM-trained checkpoints, must use
       CudnnCompatibleLSTMBlockCell instead.
     """
-    super(LSTMBlockCell, self).__init__(_reuse=reuse)
+    super(LSTMBlockCell, self).__init__(_reuse=reuse, name=name)
     self._num_units = num_units
     self._forget_bias = forget_bias
     self._use_peephole = use_peephole
@@ -373,6 +380,8 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell):
         "wco": "w_o_diag",
         "scope": "lstm_cell"
     }
+    # Inputs must be 2-dimensional.
+    self.input_spec = base_layer.InputSpec(ndim=2)
 
   @property
   def state_size(self):
@@ -382,45 +391,52 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell):
   def output_size(self):
     return self._num_units
 
-  def __call__(self, x, states_prev, scope=None):
+  def build(self, inputs_shape):
+    if not inputs_shape[1].value:
+      raise ValueError(
+          "Expecting inputs_shape[1] to be set: %s" % str(inputs_shape))
+    input_size = inputs_shape[1].value
+    self._kernel = vs.get_variable(
+        self._names["W"], [input_size + self._num_units, self._num_units * 4])
+    self._bias = vs.get_variable(
+        self._names["b"], [self._num_units * 4],
+        initializer=init_ops.constant_initializer(0.0))
+    if self._use_peephole:
+      self._w_i_diag = vs.get_variable(self._names["wci"], [self._num_units])
+      self._w_f_diag = vs.get_variable(self._names["wcf"], [self._num_units])
+      self._w_o_diag = vs.get_variable(self._names["wco"], [self._num_units])
+
+  def call(self, inputs, state):
     """Long short-term memory cell (LSTM)."""
-    with vs.variable_scope(scope or self._names["scope"]):
-      x_shape = x.get_shape().with_rank(2)
-      if not x_shape[1].value:
-        raise ValueError("Expecting x_shape[1] to be set: %s" % str(x_shape))
-      if len(states_prev) != 2:
-        raise ValueError("Expecting states_prev to be a tuple with length 2.")
-      input_size = x_shape[1].value
-      w = vs.get_variable(self._names["W"], [input_size + self._num_units,
-                                             self._num_units * 4])
-      b = vs.get_variable(
-          self._names["b"], [w.get_shape().with_rank(2)[1].value],
-          initializer=init_ops.constant_initializer(0.0))
-      if self._use_peephole:
-        wci = vs.get_variable(self._names["wci"], [self._num_units])
-        wcf = vs.get_variable(self._names["wcf"], [self._num_units])
-        wco = vs.get_variable(self._names["wco"], [self._num_units])
-      else:
-        wci = wcf = wco = array_ops.zeros([self._num_units])
-      (cs_prev, h_prev) = states_prev
-      (_, cs, _, _, _, _, h) = _lstm_block_cell(
-          x,
-          cs_prev,
-          h_prev,
-          w,
-          b,
-          wci=wci,
-          wcf=wcf,
-          wco=wco,
-          forget_bias=self._forget_bias,
-          cell_clip=self._cell_clip,
-          use_peephole=self._use_peephole)
-
-      new_state = rnn_cell_impl.LSTMStateTuple(cs, h)
-      return h, new_state
-
-
-class LSTMBlockWrapper(fused_rnn_cell.FusedRNNCell):
+    if len(state) != 2:
+      raise ValueError("Expecting state to be a tuple with length 2.")
+
+    if self._use_peephole:
+      wci = self._w_i_diag
+      wcf = self._w_f_diag
+      wco = self._w_o_diag
+    else:
+      wci = wcf = wco = array_ops.zeros([self._num_units])
+
+    (cs_prev, h_prev) = state
+    (_, cs, _, _, _, _, h) = _lstm_block_cell(
+        inputs,
+        cs_prev,
+        h_prev,
+        self._kernel,
+        self._bias,
+        wci=wci,
+        wcf=wcf,
+        wco=wco,
+        forget_bias=self._forget_bias,
+        cell_clip=self._cell_clip,
+        use_peephole=self._use_peephole)
+
+    new_state = rnn_cell_impl.LSTMStateTuple(cs, h)
+    return h, new_state
+
+
+class LSTMBlockWrapper(LayerRNNCell):
   """This is a helper class that provides housekeeping for LSTM cells.
 
   This may be useful for alternative LSTM and similar type of cells.
@@ -459,12 +475,7 @@ class LSTMBlockWrapper(fused_rnn_cell.FusedRNNCell):
     """
     pass
 
-  def __call__(self,
-               inputs,
-               initial_state=None,
-               dtype=None,
-               sequence_length=None,
-               scope=None):
+  def call(self, inputs, initial_state=None, dtype=None, sequence_length=None):
     """Run this LSTM on inputs, starting from the given state.
 
     Args:
@@ -480,7 +491,6 @@ class LSTMBlockWrapper(fused_rnn_cell.FusedRNNCell):
         `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
         time_len).`
         Defaults to `time_len` for each element.
-      scope: `VariableScope` for the created subgraph; defaults to class name.
 
     Returns:
       A pair containing:
@@ -493,75 +503,71 @@ class LSTMBlockWrapper(fused_rnn_cell.FusedRNNCell):
     Raises:
       ValueError: in case of shape mismatches
     """
-    with vs.variable_scope(scope or "lstm_block_wrapper"):
-      is_list = isinstance(inputs, list)
-      if is_list:
-        inputs = array_ops.stack(inputs)
-      inputs_shape = inputs.get_shape().with_rank(3)
-      if not inputs_shape[2]:
-        raise ValueError("Expecting inputs_shape[2] to be set: %s" %
-                         inputs_shape)
-      batch_size = inputs_shape[1].value
-      if batch_size is None:
-        batch_size = array_ops.shape(inputs)[1]
-      time_len = inputs_shape[0].value
-      if time_len is None:
-        time_len = array_ops.shape(inputs)[0]
-
-      # Provide default values for initial_state and dtype
-      if initial_state is None:
-        if dtype is None:
-          raise ValueError(
-              "Either initial_state or dtype needs to be specified")
-        z = array_ops.zeros(
-            array_ops.stack([batch_size, self.num_units]), dtype=dtype)
-        initial_state = z, z
-      else:
-        if len(initial_state) != 2:
-          raise ValueError(
-              "Expecting initial_state to be a tuple with length 2 or None")
-        if dtype is None:
-          dtype = initial_state[0].dtype
-
-      # create the actual cell
-      if sequence_length is not None:
-        sequence_length = ops.convert_to_tensor(sequence_length)
-      initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
-      cell_states, outputs = self._call_cell(inputs, initial_cell_state,
-                                             initial_output, dtype,
-                                             sequence_length)
-
-      if sequence_length is not None:
-        # Mask out the part beyond sequence_length
-        mask = array_ops.transpose(
-            array_ops.sequence_mask(
-                sequence_length, time_len, dtype=dtype), [1, 0])
-        mask = array_ops.tile(
-            array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units])
-        outputs *= mask
-        # Prepend initial states to cell_states and outputs for indexing to work
-        # correctly,since we want to access the last valid state at
-        # sequence_length - 1, which can even be -1, corresponding to the
-        # initial state.
-        mod_cell_states = array_ops.concat(
-            [array_ops.expand_dims(initial_cell_state, [0]), cell_states], 0)
-        mod_outputs = array_ops.concat(
-            [array_ops.expand_dims(initial_output, [0]), outputs], 0)
-        final_cell_state = self._gather_states(mod_cell_states, sequence_length,
-                                               batch_size)
-        final_output = self._gather_states(mod_outputs, sequence_length,
-                                           batch_size)
-      else:
-        # No sequence_lengths used: final state is the last state
-        final_cell_state = cell_states[-1]
-        final_output = outputs[-1]
-
-      if is_list:
-        # Input was a list, so return a list
-        outputs = array_ops.unstack(outputs)
-
-      final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state, final_output)
-      return outputs, final_state
+    is_list = isinstance(inputs, list)
+    if is_list:
+      inputs = array_ops.stack(inputs)
+    inputs_shape = inputs.get_shape().with_rank(3)
+    if not inputs_shape[2]:
+      raise ValueError("Expecting inputs_shape[2] to be set: %s" % inputs_shape)
+    batch_size = inputs_shape[1].value
+    if batch_size is None:
+      batch_size = array_ops.shape(inputs)[1]
+    time_len = inputs_shape[0].value
+    if time_len is None:
+      time_len = array_ops.shape(inputs)[0]
+
+    # Provide default values for initial_state and dtype
+    if initial_state is None:
+      if dtype is None:
+        raise ValueError("Either initial_state or dtype needs to be specified")
+      z = array_ops.zeros(
+          array_ops.stack([batch_size, self.num_units]), dtype=dtype)
+      initial_state = z, z
+    else:
+      if len(initial_state) != 2:
+        raise ValueError(
+            "Expecting initial_state to be a tuple with length 2 or None")
+      if dtype is None:
+        dtype = initial_state[0].dtype
+
+    # create the actual cell
+    if sequence_length is not None:
+      sequence_length = ops.convert_to_tensor(sequence_length)
+    initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
+    cell_states, outputs = self._call_cell(
+        inputs, initial_cell_state, initial_output, dtype, sequence_length)
+
+    if sequence_length is not None:
+      # Mask out the part beyond sequence_length
+      mask = array_ops.transpose(
+          array_ops.sequence_mask(sequence_length, time_len, dtype=dtype),
+          [1, 0])
+      mask = array_ops.tile(
+          array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units])
+      outputs *= mask
+      # Prepend initial states to cell_states and outputs for indexing to work
+      # correctly,since we want to access the last valid state at
+      # sequence_length - 1, which can even be -1, corresponding to the
+      # initial state.
+      mod_cell_states = array_ops.concat(
+          [array_ops.expand_dims(initial_cell_state, [0]), cell_states], 0)
+      mod_outputs = array_ops.concat(
+          [array_ops.expand_dims(initial_output, [0]), outputs], 0)
+      final_cell_state = self._gather_states(mod_cell_states, sequence_length,
+                                             batch_size)
+      final_output = self._gather_states(mod_outputs, sequence_length,
+                                         batch_size)
+    else:
+      # No sequence_lengths used: final state is the last state
+      final_cell_state = cell_states[-1]
+      final_output = outputs[-1]
+
+    if is_list:
+      # Input was a list, so return a list
+      outputs = array_ops.unstack(outputs)
+
+    final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state, final_output)
+    return outputs, final_state
 
   def _gather_states(self, data, indices, batch_size):
     """Produce `out`, s.t. out(i, j) = data(indices(i), i, j)."""
@@ -589,7 +595,9 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
                num_units,
                forget_bias=1.0,
                cell_clip=None,
-               use_peephole=False):
+               use_peephole=False,
+               reuse=None,
+               name="rnn/lstm_cell"):
     """Initialize the LSTM cell.
 
     Args:
@@ -597,19 +605,46 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
       forget_bias: float, The bias added to forget gates (see above).
       cell_clip: clip the cell to this value. Default is no cell clipping.
       use_peephole: Whether to use peephole connections or not.
+      reuse: (optional) boolean describing whether to reuse variables in an
+        existing scope.  If not `True`, and the existing scope already has the
+        given variables, an error is raised.
+      name: String, the name of the layer. Layers with the same name will
+        share weights, but to avoid mistakes we require reuse=True in such
+        cases.  By default this is "lstm_cell", for variable-name compatibility
+        with `tf.nn.rnn_cell.LSTMCell`.
     """
+    super(LSTMBlockFusedCell, self).__init__(_reuse=reuse, name=name)
     self._num_units = num_units
     self._forget_bias = forget_bias
     self._cell_clip = cell_clip if cell_clip is not None else -1
     self._use_peephole = use_peephole
 
+    # Inputs must be 3-dimensional.
+    self.input_spec = base_layer.InputSpec(ndim=3)
+
   @property
   def num_units(self):
     """Number of units in this cell (output dimension)."""
     return self._num_units
 
-  def _call_cell(self, inputs, initial_cell_state, initial_output, dtype,
-                 sequence_length):
+  def build(self, input_shape):
+    input_size = input_shape[2].value
+    self._kernel = vs.get_variable(
+        "kernel", [input_size + self._num_units, self._num_units * 4])
+    self._bias = vs.get_variable(
+        "bias", [self._num_units * 4],
+        initializer=init_ops.constant_initializer(0.0))
+    if self._use_peephole:
+      self._w_i_diag = vs.get_variable("w_i_diag", [self._num_units])
+      self._w_f_diag = vs.get_variable("w_f_diag", [self._num_units])
+      self._w_o_diag = vs.get_variable("w_o_diag", [self._num_units])
+
+  def _call_cell(self,
+                 inputs,
+                 initial_cell_state=None,
+                 initial_output=None,
+                 dtype=None,
+                 sequence_length=None):
     """Run this LSTM on inputs, starting from the given state.
 
     Args:
@@ -636,18 +671,11 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
     time_len = inputs_shape[0].value
     if time_len is None:
       time_len = array_ops.shape(inputs)[0]
-    input_size = inputs_shape[2].value
-    w = vs.get_variable(
-        "kernel",
-        [input_size + self._num_units, self._num_units * 4], dtype=dtype)
-    b = vs.get_variable(
-        "bias", [w.get_shape().with_rank(2)[1]],
-        initializer=init_ops.constant_initializer(0.0),
-        dtype=dtype)
+
     if self._use_peephole:
-      wci = vs.get_variable("w_i_diag", [self._num_units], dtype=dtype)
-      wcf = vs.get_variable("w_f_diag", [self._num_units], dtype=dtype)
-      wco = vs.get_variable("w_o_diag", [self._num_units], dtype=dtype)
+      wci = self._w_i_diag
+      wco = self._w_o_diag
+      wcf = self._w_f_diag
     else:
       wci = wcf = wco = array_ops.zeros([self._num_units], dtype=dtype)
 
@@ -661,11 +689,11 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
         x=inputs,
         cs_prev=initial_cell_state,
         h_prev=initial_output,
-        w=w,
+        w=self._kernel,
         wci=wci,
         wcf=wcf,
         wco=wco,
-        b=b,
+        b=self._bias,
         forget_bias=self._forget_bias,
         cell_clip=self._cell_clip,
         use_peephole=self._use_peephole)
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 9685b58392..5a6d287c68 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -1821,7 +1821,7 @@ class CompiledWrapper(rnn_cell_impl.RNNCell):
         return not _REGISTERED_OPS[node_def.op].is_stateful
 
     with jit.experimental_jit_scope(compile_ops=compile_ops):
-      return self._cell(inputs, state, scope)
+      return self._cell(inputs, state, scope=scope)
 
 
 def _random_exp_initializer(minval,
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index 8aaf77f173..7c759d852c 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -265,17 +265,18 @@ class _LayerRNNCell(RNNCell):
   `call` methods do not access Variables `tf.get_variable`.
   """
 
-  def __call__(self, inputs, state, scope=None):
+  def __call__(self, inputs, *args, **kwargs):
     """Run this RNN cell on inputs, starting from the given state.
 
     Args:
       inputs: `2-D` tensor with shape `[batch_size, input_size]`.
-      state: if `self.state_size` is an integer, this should be a `2-D Tensor`
-        with shape `[batch_size, self.state_size]`.  Otherwise, if
+      *args: Additional positional arguments.
+        Usually composesed of `[state]`: if `self.state_size` is an integer,
+        this should be a `2-D Tensor` with shape
+        `[batch_size, self.state_size]`.  Otherwise, if
         `self.state_size` is a tuple of integers, this should be a tuple
         with shapes `[batch_size, s] for s in self.state_size`.
-      scope: `VariableScope` for the created subgraph; if not provided,
-        defaults to standard `tf.layers.Layer` behavior.
+      **kwargs: Additional keyword arguments.  Common keys include `scope`.
 
     Returns:
       A pair containing:
@@ -287,7 +288,7 @@ class _LayerRNNCell(RNNCell):
     # Bypass RNNCell's variable capturing semantics for LayerRNNCell.
     # Instead, it is up to subclasses to provide a proper build
     # method.  See the class docstring for more details.
-    return base_layer.Layer.__call__(self, inputs, state, scope=scope)
+    return base_layer.Layer.__call__(self, inputs, *args, **kwargs)
 
 
 class BasicRNNCell(_LayerRNNCell):
@@ -1037,7 +1038,7 @@ class DropoutWrapper(RNNCell):
       inputs = self._dropout(inputs, "input",
                              self._recurrent_input_noise,
                              self._input_keep_prob)
-    output, new_state = self._cell(inputs, state, scope)
+    output, new_state = self._cell(inputs, state, scope=scope)
     if _should_dropout(self._state_keep_prob):
       # Identify which subsets of the state to perform dropout on and
       # which ones to keep.
-- 
GitLab


From 24fda91f22e3df55187199c99b3e4a6bf5c0d1a1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 18:19:29 -0800
Subject: [PATCH 0942/1225] Allow Tensor::bit_casted_shaped() to take type
 parameter T with different size from the buffer data type size.

PiperOrigin-RevId: 178845870
---
 tensorflow/core/framework/tensor.h       |  76 +++++++-----
 tensorflow/core/framework/tensor_test.cc | 145 ++++++++++++++++++-----
 2 files changed, 160 insertions(+), 61 deletions(-)

diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index c195623b27..92d10f0d8c 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -424,7 +424,8 @@ class Tensor {
   typename TTypes<T, NDIMS>::ConstTensor flat_outer_dims() const;
 
   template <typename T, size_t NDIMS = 3>
-  typename TTypes<T, NDIMS>::ConstTensor flat_inner_outer_dims(int64 begin) const;
+  typename TTypes<T, NDIMS>::ConstTensor flat_inner_outer_dims(
+      int64 begin) const;
 
   /// Render the first `max_entries` values in `*this` into a string.
   string SummarizeValue(int64 max_entries) const;
@@ -464,10 +465,6 @@ class Tensor {
   void CheckTypeAndIsAligned(DataType expected_dtype) const;
   void CheckIsAlignedAndSingleElement() const;
   void set_dtype(DataType t) { shape_.set_data_type(t); }
-  template <size_t NDIMS>
-  void FillDimsAndValidateCompatibleShape(
-      gtl::ArraySlice<int64> new_sizes,
-      Eigen::array<Eigen::DenseIndex, NDIMS>* dims) const;
 
   // TensorShape's InlineVector.
   static gtl::InlinedVector<int64, 4> ComputeFlatInnerDims(
@@ -520,8 +517,13 @@ class Tensor {
 
   template <size_t NDIMS>
   void FillDimsAndValidateCompatibleShape(
-      Eigen::array<Eigen::DenseIndex, NDIMS>* dims,
-      gtl::ArraySlice<int64> new_sizes) const;
+      gtl::ArraySlice<int64> new_sizes,
+      Eigen::array<Eigen::DenseIndex, NDIMS>* dims) const;
+
+  template <typename T, size_t NDIMS>
+  void FillDimsAndValidateCompatibleShape(
+      gtl::ArraySlice<int64> new_sizes,
+      Eigen::array<Eigen::DenseIndex, NDIMS>* dims) const;
 };
 
 // Implementation details
@@ -631,12 +633,36 @@ void Tensor::FillDimsAndValidateCompatibleShape(
   CHECK_EQ(new_num_elements, NumElements());
 }
 
+template <typename T, size_t NDIMS>
+void Tensor::FillDimsAndValidateCompatibleShape(
+    gtl::ArraySlice<int64> new_sizes,
+    Eigen::array<Eigen::DenseIndex, NDIMS>* dims) const {
+  CHECK_EQ(NDIMS, new_sizes.size());
+  int64 new_num_elements = 1;
+  for (size_t d = 0; d < NDIMS; d++) {
+    new_num_elements *= new_sizes[d];
+    (*dims)[d] = new_sizes[d];
+  }
+  const int element_size = DataTypeSize(BaseType(dtype()));
+  if (element_size > 0) {
+    CHECK_EQ(new_num_elements * sizeof(T), NumElements() * element_size);
+  } else {
+    // DataTypeSize() returns 0 for some data types. In this case, assume that T
+    // has the same size as the buffer type.
+    // NOTE: If we can be sure that DataTypeSize() does not return 0 for all POD
+    // types, then we should check DataTypeToEnum<T>::v() == dtype(). Or simply
+    // check if `element_size > 0` to err when bit cast is attempted on Tensor
+    // of unknown data type size.
+    CHECK_EQ(new_num_elements, NumElements());
+  }
+}
+
 template <typename T, size_t NDIMS>
 typename TTypes<T, NDIMS>::Tensor Tensor::shaped(
     gtl::ArraySlice<int64> new_sizes) {
   CheckTypeAndIsAligned(DataTypeToEnum<T>::v());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape<NDIMS>(new_sizes, &dims);
+  FillDimsAndValidateCompatibleShape(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::Tensor(base<T>(), dims);
 }
 
@@ -645,7 +671,7 @@ typename TTypes<T, NDIMS>::Tensor Tensor::bit_casted_shaped(
     gtl::ArraySlice<int64> new_sizes) {
   CHECK(IsAligned());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape<NDIMS>(new_sizes, &dims);
+  FillDimsAndValidateCompatibleShape<T>(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::Tensor(base<T>(), dims);
 }
 
@@ -654,29 +680,16 @@ typename TTypes<T, NDIMS>::UnalignedTensor Tensor::unaligned_shaped(
     gtl::ArraySlice<int64> new_sizes) {
   CheckType(DataTypeToEnum<T>::v());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape<NDIMS>(new_sizes, &dims);
+  FillDimsAndValidateCompatibleShape(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::UnalignedTensor(base<T>(), dims);
 }
 
-template <size_t NDIMS>
-void Tensor::FillDimsAndValidateCompatibleShape(
-    Eigen::array<Eigen::DenseIndex, NDIMS>* dims,
-    gtl::ArraySlice<int64> new_sizes) const {
-  CHECK_EQ(NDIMS, new_sizes.size());
-  int64 new_num_elements = 1;
-  for (size_t d = 0; d < NDIMS; d++) {
-    new_num_elements *= new_sizes[d];
-    (*dims)[d] = new_sizes[d];
-  }
-  CHECK_EQ(new_num_elements, NumElements());
-}
-
 template <typename T, size_t NDIMS>
 typename TTypes<T, NDIMS>::ConstTensor Tensor::shaped(
     gtl::ArraySlice<int64> new_sizes) const {
   CheckTypeAndIsAligned(DataTypeToEnum<T>::v());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape(&dims, new_sizes);
+  FillDimsAndValidateCompatibleShape(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::ConstTensor(base<T>(), dims);
 }
 
@@ -685,7 +698,7 @@ typename TTypes<T, NDIMS>::ConstTensor Tensor::bit_casted_shaped(
     gtl::ArraySlice<int64> new_sizes) const {
   CHECK(IsAligned());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape(&dims, new_sizes);
+  FillDimsAndValidateCompatibleShape<T>(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::ConstTensor(base<T>(), dims);
 }
 
@@ -694,7 +707,7 @@ typename TTypes<T, NDIMS>::UnalignedConstTensor Tensor::unaligned_shaped(
     gtl::ArraySlice<int64> new_sizes) const {
   CheckType(DataTypeToEnum<T>::v());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape(&dims, new_sizes);
+  FillDimsAndValidateCompatibleShape(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::UnalignedConstTensor(base<T>(), dims);
 }
 
@@ -722,8 +735,8 @@ typename TTypes<T, NDIMS>::Tensor Tensor::flat_outer_dims() {
 
 template <typename T, size_t NDIMS>
 typename TTypes<T, NDIMS>::Tensor Tensor::flat_inner_outer_dims(int64 begin) {
-  gtl::InlinedVector<int64,4> flat_outer = ComputeFlatOuterDims(
-      shape_.dim_sizes(), begin + NDIMS);
+  gtl::InlinedVector<int64, 4> flat_outer =
+      ComputeFlatOuterDims(shape_.dim_sizes(), begin + NDIMS);
   return shaped<T, NDIMS>(ComputeFlatInnerDims(flat_outer, NDIMS));
 }
 
@@ -738,9 +751,10 @@ typename TTypes<T, NDIMS>::ConstTensor Tensor::flat_outer_dims() const {
 }
 
 template <typename T, size_t NDIMS>
-typename TTypes<T, NDIMS>::ConstTensor Tensor::flat_inner_outer_dims(int64 begin) const {
-  gtl::InlinedVector<int64,4> flat_outer = ComputeFlatOuterDims(
-      shape_.dim_sizes(), begin + NDIMS);
+typename TTypes<T, NDIMS>::ConstTensor Tensor::flat_inner_outer_dims(
+    int64 begin) const {
+  gtl::InlinedVector<int64, 4> flat_outer =
+      ComputeFlatOuterDims(shape_.dim_sizes(), begin + NDIMS);
   return shaped<T, NDIMS>(ComputeFlatInnerDims(flat_outer, NDIMS));
 }
 
diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index 47ff29fbe1..cbc921ccd0 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc
@@ -334,41 +334,126 @@ class TensorReshapeTest : public ::testing::Test {
     tensor(0, 0, 0, 0) = 0.01f;
     tensor(1, 2, 3, 4) = 0.02f;
   }
-};
 
-TEST_F(TensorReshapeTest, Reshape) {
-  LOG(INFO) << "shaped";
-  {
-    auto shaped = t.shaped<float, 1>({120});
-    EXPECT_EQ(120, shaped.dimension(0));
-    EXPECT_EQ(shaped(0), 0.01f);
-    EXPECT_EQ(shaped(119), 0.02f);
-  }
-  {
-    auto shaped = t.shaped<float, 2>({6, 20});
-    EXPECT_EQ(6, shaped.dimension(0));
-    EXPECT_EQ(20, shaped.dimension(1));
-    EXPECT_EQ(shaped(0, 0), 0.01f);
-    EXPECT_EQ(shaped(5, 19), 0.02f);
+  template <typename T>
+  using ReshapeFunc = T (Tensor::*)(gtl::ArraySlice<int64>);
+  template <typename T>
+  using ConstReshapeFunc = T (Tensor::*)(gtl::ArraySlice<int64>) const;
+
+  template <typename T, ReshapeFunc<T> Func>
+  void TestReshape(std::initializer_list<int64> sizes) {
+    T shaped = (t.*Func)(sizes);
+    TestReshapeImpl(shaped, sizes);
   }
-  {
-    auto shaped = t.shaped<float, 3>({6, 4, 5});
-    EXPECT_EQ(6, shaped.dimension(0));
-    EXPECT_EQ(4, shaped.dimension(1));
-    EXPECT_EQ(5, shaped.dimension(2));
-    EXPECT_EQ(shaped(0, 0, 0), 0.01f);
-    EXPECT_EQ(shaped(5, 3, 4), 0.02f);
+
+  template <typename T, ConstReshapeFunc<T> Func>
+  void TestReshape(std::initializer_list<int64> sizes) {
+    T shaped = (static_cast<const Tensor&>(t).*Func)(sizes);
+    TestReshapeImpl(shaped, sizes);
   }
-  {
-    auto shaped = t.shaped<float, 4>({2, 3, 4, 5});
-    EXPECT_EQ(2, shaped.dimension(0));
-    EXPECT_EQ(3, shaped.dimension(1));
-    EXPECT_EQ(4, shaped.dimension(2));
-    EXPECT_EQ(5, shaped.dimension(3));
 
-    EXPECT_EQ(shaped(0, 0, 0, 0), 0.01f);
-    EXPECT_EQ(shaped(1, 2, 3, 4), 0.02f);
+  template <typename T>
+  void TestReshapeImpl(T shaped, std::initializer_list<int64> sizes) {
+    auto iter = sizes.begin();
+    for (int i = 0; i < shaped.rank(); ++i, ++iter) {
+      EXPECT_EQ(*iter, shaped.dimension(i));
+    }
+
+    using Index = typename T::Index;
+    using Scalar = typename T::Scalar;
+    constexpr int N = T::NumIndices;
+
+    // To handle the cast when `shaped` is bit casted into a different type.
+    const float expected_first = 0.01f;
+    Eigen::DSizes<Index, N> coord;
+    EXPECT_EQ(shaped(coord), *reinterpret_cast<const Scalar*>(&expected_first));
+
+    for (int i = 0; i < N; ++i) {
+      coord[i] = shaped.dimension(i) - 1;
+    }
+    const float expected_last = 0.02f;
+    constexpr int kNumScalarPerFloat =
+        sizeof(float) / sizeof(Scalar);  // Assuming even divide.
+    EXPECT_EQ(shaped(coord), reinterpret_cast<const Scalar*>(
+                                 &expected_last)[kNumScalarPerFloat - 1]);
   }
+};
+
+TEST_F(TensorReshapeTest, Reshape) {
+  LOG(INFO) << "shaped";
+
+#define TEST_RESHAPE(...)                                                  \
+  {                                                                        \
+    constexpr int N = (sizeof((int[]){__VA_ARGS__}) / sizeof(int));        \
+    TestReshape<TTypes<float, N>::Tensor, &Tensor::shaped<float, N>>(      \
+        {__VA_ARGS__});                                                    \
+    TestReshape<TTypes<float, N>::ConstTensor, &Tensor::shaped<float, N>>( \
+        {__VA_ARGS__});                                                    \
+    TestReshape<TTypes<float, N>::UnalignedTensor,                         \
+                &Tensor::unaligned_shaped<float, N>>({__VA_ARGS__});       \
+    TestReshape<TTypes<float, N>::UnalignedConstTensor,                    \
+                &Tensor::unaligned_shaped<float, N>>({__VA_ARGS__});       \
+    TestReshape<TTypes<float, N>::Tensor,                                  \
+                &Tensor::bit_casted_shaped<float, N>>({__VA_ARGS__});      \
+    TestReshape<TTypes<float, N>::ConstTensor,                             \
+                &Tensor::bit_casted_shaped<float, N>>({__VA_ARGS__});      \
+    TestReshape<TTypes<int32, N>::Tensor,                                  \
+                &Tensor::bit_casted_shaped<int32, N>>({__VA_ARGS__});      \
+    TestReshape<TTypes<int32, N>::ConstTensor,                             \
+                &Tensor::bit_casted_shaped<int32, N>>({__VA_ARGS__});      \
+  }
+
+  TEST_RESHAPE(120);
+  TEST_RESHAPE(6, 20);
+  TEST_RESHAPE(6, 4, 5);
+  TEST_RESHAPE(2, 3, 4, 5);
+#undef TEST_RESHAPE
+}
+
+TEST_F(TensorReshapeTest, BitcastReshapeDifferentSize) {
+#define TEST_BITCAST8_RESHAPE(...)                                    \
+  {                                                                   \
+    constexpr int N = (sizeof((int[]){__VA_ARGS__}) / sizeof(int));   \
+    TestReshape<TTypes<uint8, N>::Tensor,                             \
+                &Tensor::bit_casted_shaped<uint8, N>>({__VA_ARGS__}); \
+  }
+
+  TEST_BITCAST8_RESHAPE(480);
+  TEST_BITCAST8_RESHAPE(24, 20);
+  TEST_BITCAST8_RESHAPE(6, 16, 5);
+  TEST_BITCAST8_RESHAPE(2, 3, 4, 20);
+#undef TEST_BITCAST8_RESHAPE
+#define TEST_BITCAST16_RESHAPE(...)                                   \
+  {                                                                   \
+    constexpr int N = (sizeof((int[]){__VA_ARGS__}) / sizeof(int));   \
+    TestReshape<TTypes<int16, N>::Tensor,                             \
+                &Tensor::bit_casted_shaped<int16, N>>({__VA_ARGS__}); \
+  }
+
+  TEST_BITCAST16_RESHAPE(240);
+  TEST_BITCAST16_RESHAPE(6, 40);
+  TEST_BITCAST16_RESHAPE(12, 4, 5);
+  TEST_BITCAST16_RESHAPE(2, 3, 8, 5);
+  TEST_BITCAST16_RESHAPE(2, 3, 4, 1, 10);
+#undef TEST_BITCAST16_RESHAPE
+}
+
+TEST_F(TensorReshapeTest, ReshapeError) {
+  EXPECT_DEATH((t.shaped<float, 0>({})), "1 vs. 120");
+  EXPECT_DEATH((t.shaped<float, 1>({119})), "119 vs. 120");
+  EXPECT_DEATH((t.shaped<float, 4>({2, 3, 4, 6})), "144 vs. 120");
+
+  EXPECT_DEATH((t.unaligned_shaped<float, 0>({})), "1 vs. 120");
+  EXPECT_DEATH((t.unaligned_shaped<float, 1>({119})), "119 vs. 120");
+  EXPECT_DEATH((t.unaligned_shaped<float, 4>({2, 3, 4, 6})), "144 vs. 120");
+
+  EXPECT_DEATH((t.bit_casted_shaped<float, 0>({})), "4 vs. 480");
+  EXPECT_DEATH((t.bit_casted_shaped<float, 1>({119})), "476 vs. 480");
+  EXPECT_DEATH((t.bit_casted_shaped<float, 4>({2, 3, 4, 6})), "576 vs. 480");
+
+  Tensor string_tensor{DT_STRING, {10}};
+  // Note that the error message compare # of elements, not # of bytes.
+  EXPECT_DEATH((string_tensor.bit_casted_shaped<string, 1>({9})), "9 vs. 10");
 }
 
 TEST_F(TensorReshapeTest, Flat) {
-- 
GitLab


From 192bcb86d9e988d4f3b12bd2677e3638a9a0cae3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 18:44:25 -0800
Subject: [PATCH 0943/1225] Fully-qualify function call in TF_CHECK_OK macro
 implementation, so that it can be safely used outside of the tensorflow
 namespace.

Note that the StreamExecutor SE_CHECK_OK simply uses TF_CHECK_OK, so this helps
those cases.

PiperOrigin-RevId: 178847904
---
 tensorflow/core/lib/core/status.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/lib/core/status.h b/tensorflow/core/lib/core/status.h
index 3b8a322854..58a50a70c2 100644
--- a/tensorflow/core/lib/core/status.h
+++ b/tensorflow/core/lib/core/status.h
@@ -127,9 +127,9 @@ inline tensorflow::string* TfCheckOpHelper(::tensorflow::Status v,
   return TfCheckOpHelperOutOfLine(v, msg);
 }
 
-#define TF_DO_CHECK_OK(val, level)                  \
-  while (auto _result = TfCheckOpHelper(val, #val)) \
-    LOG(level) << *(_result)
+#define TF_DO_CHECK_OK(val, level)                                \
+  while (auto _result = ::tensorflow::TfCheckOpHelper(val, #val)) \
+  LOG(level) << *(_result)
 
 #define TF_CHECK_OK(val)  TF_DO_CHECK_OK(val, FATAL)
 #define TF_QCHECK_OK(val) TF_DO_CHECK_OK(val, QFATAL)
-- 
GitLab


From b7308e3bd69349e9023497948a6bf55d3b0895d9 Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Tue, 12 Dec 2017 19:28:44 -0800
Subject: [PATCH 0944/1225] Correctly pass name in layers.util.smart_cond

Before this change arguments were passed positionally and "name"
argument was wrongly mapped to "strict" argument of tf.cond instead
of the itended "name". Such a fix could potentially change operation
names and cause an error when restoring a graph, but it seems like
this particular change is safe for the following reasons.

 - smart_cond is not a public API. So users should not be calling it
 directly.

 - smart_cond is used in 38 places internally. All of them, except for
 in tf.contrib.summary don't use the "name" parameter. Such usage leads
 to the same names before and after this change. The names will change
 for users of tf.contrib.summary. Luckily, this is a very recent
 addition and has utility only in context of eager execution, which is
 in pre-alpha stage yet.

Because this change reroutes the wrong "name" -> "strict" mapping to
"name" -> "name", the value of "strict" is changing from "None" to
"False". Luckily, this has no effect on the function's behavior.

PiperOrigin-RevId: 178850766
---
 tensorflow/contrib/summary/summary_ops_graph_test.py | 2 +-
 tensorflow/python/layers/utils.py                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index 42ebb7ab9d..2b7806f80d 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py
@@ -83,7 +83,7 @@ class DbTest(summary_test_util.SummaryDbTest):
           sess.run(summary_ops.all_summary_ops())
           events = summary_test_util.events_from_logdir(logdir)
           self.assertEqual(len(events), 2)
-          self.assertEqual(events[1].summary.value[0].tag, 'cond/my_scalar')
+          self.assertEqual(events[1].summary.value[0].tag, 'my_scalar')
 
   def testSummaryGraphModeCond(self):
     with ops.Graph().as_default(), self.test_session():
diff --git a/tensorflow/python/layers/utils.py b/tensorflow/python/layers/utils.py
index 766a6800d4..64c7124a43 100644
--- a/tensorflow/python/layers/utils.py
+++ b/tensorflow/python/layers/utils.py
@@ -208,7 +208,7 @@ def smart_cond(pred, fn1, fn2, name=None):
     else:
       return fn2()
   else:
-    return control_flow_ops.cond(pred, fn1, fn2, name)
+    return control_flow_ops.cond(pred, true_fn=fn1, false_fn=fn2, name=name)
 
 
 def constant_value(pred):
-- 
GitLab


From 216878ea3dafdc5fbe6a15d389edb003ad2fd4b4 Mon Sep 17 00:00:00 2001
From: Alexander Gorban <gorban@google.com>
Date: Tue, 12 Dec 2017 20:09:45 -0800
Subject: [PATCH 0945/1225] Simplify tf.case implementation.

PiperOrigin-RevId: 178853258
---
 .../kernel_tests/control_flow_ops_py_test.py  |   4 +-
 tensorflow/python/ops/control_flow_ops.py     | 268 ++++++++----------
 .../python/ops/control_flow_ops_test.py       |  14 +-
 3 files changed, 118 insertions(+), 168 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 35ae89ed33..5b0abaa2eb 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -2279,8 +2279,7 @@ class ControlFlowTest(test.TestCase):
       # Duplicate events cause an error if exclusive = True
       r4 = control_flow_ops.case(
           [(x < y, f1), (x < y, f2)], default=f3, exclusive=True)
-      with self.assertRaisesOpError(
-          "More than one condition evaluated as True but exclusive=True."):
+      with self.assertRaisesOpError("Input error:"):
         r4.eval()
 
       # Check that the default is called if none of the others are
@@ -3045,5 +3044,6 @@ class EagerTest(test.TestCase):
                                  default=f3, exclusive=True)
       self.assertAllEqual(r1.numpy(), 17)
 
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 6e97fe00bd..3418f33717 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -52,6 +52,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import functools
 
 import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
@@ -3148,23 +3149,105 @@ def tuple(tensors, name=None, control_inputs=None):
     return tpl
 
 
-def _assert_exclusive(preds):
-  """Returns an Assert op that checks that the predicates are exclusive."""
-  preds_c = array_ops.stack(preds, name="preds_c")
+def _assert_at_most_n_true(predicates, n, msg):
+  """Returns an Assert op that checks that at most n predicates are True.
+
+  Args:
+    predicates: list of bool scalar tensors.
+    n: maximum number of true predicates allowed.
+    msg: Error message.
+  """
+  preds_c = array_ops.stack(predicates, name="preds_c")
   num_true_conditions = math_ops.reduce_sum(
       math_ops.cast(preds_c, dtypes.int32), name="num_true_conds")
-  at_most_one_true_condition = math_ops.less(
-      num_true_conditions, constant_op.constant(2, name="two_true_conds"))
+  condition = math_ops.less_equal(num_true_conditions,
+                                  constant_op.constant(n, name="n_true_conds"))
+  preds_names = ", ".join(getattr(p, "name", "?") for p in predicates)
+  error_msg = [
+      "%s: more than %d conditions (%s) evaluated as True:" %
+      (msg, n, preds_names), preds_c
+  ]
+  return Assert(condition, data=error_msg, summarize=len(predicates))
+
 
-  error_msg = [("More than one condition evaluated as True but "
-                "exclusive=True.  Conditions: (%s), Values:"
-                % ", ".join([p.name for p in preds])),
-               preds_c]
-  return Assert(condition=at_most_one_true_condition, data=error_msg,
-                summarize=len(preds))
+def _case_create_default_action(predicates, actions):
+  """Creates default action for a list of actions and their predicates.
 
+  It uses the input actions to select an arbitrary as default and makes sure
+  that corresponding predicates have valid values.
 
-def case(pred_fn_pairs, default=None, exclusive=False, strict=False,
+  Args:
+    predicates: a list of bool scalar tensors
+    actions: a list of callable objects which return tensors.
+
+  Returns:
+    a callable
+  """
+  k = len(predicates) - 1  # could pick any
+  predicate, action = predicates[k], actions[k]
+  other_predicates, other_actions = predicates[:k], actions[:k]
+
+  def default_action():
+    others_msg = ("Implementation error: "
+                  "selected default action #%d was called, but some of other "
+                  "predicates are True: " % k)
+    default_msg = ("Input error: "
+                   "None of conditions evaluated as True:",
+                   array_ops.stack(predicates, name="preds_c"))
+    with ops.control_dependencies([
+        _assert_at_most_n_true(other_predicates, n=0, msg=others_msg),
+        Assert(predicate, data=default_msg)
+    ]):
+      return action()
+
+  return default_action, other_predicates, other_actions
+
+
+def _case_verify_and_canonicalize_args(pred_fn_pairs, exclusive, name):
+  """Verifies input arguments for the case function.
+
+  Args:
+    pred_fn_pairs: Dict or list of pairs of a boolean scalar tensor and a
+                   callable which returns a list of tensors.
+    exclusive: True iff at most one predicate is allowed to evaluate to `True`.
+    name: A name for the case operation.
+
+  Raises:
+    TypeError: If `pred_fn_pairs` is not a list/dictionary.
+    TypeError: If `pred_fn_pairs` is a list but does not contain 2-tuples.
+    TypeError: If `fns[i]` is not callable for any i, or `default` is not
+               callable.
+
+  Returns:
+    a tuple <list of scalar bool tensors, list of callables>.
+  """
+  if not isinstance(pred_fn_pairs, (list, _basetuple, dict)):
+    raise TypeError("fns must be a list, tuple, or dict")
+
+  if isinstance(pred_fn_pairs, collections.OrderedDict):
+    pred_fn_pairs = pred_fn_pairs.items()
+  elif isinstance(pred_fn_pairs, dict):
+    pred_fn_pairs = sorted(pred_fn_pairs.items(), key=lambda item: item[0].name)
+    if not exclusive:
+      logging.warn("%s: An unordered dictionary of predicate/fn pairs was "
+                   "provided, but exclusive=False. The order of conditional "
+                   "tests is deterministic but not guaranteed.", name)
+  for pred_fn_pair in pred_fn_pairs:
+    if not isinstance(pred_fn_pair, _basetuple) or len(pred_fn_pair) != 2:
+      raise TypeError("Each entry in pred_fn_pairs must be a 2-tuple")
+    pred, fn = pred_fn_pair
+    if pred.dtype != dtypes.bool:
+      raise TypeError("pred must be of type bool: %s", pred.name)
+    if not callable(fn):
+      raise TypeError("fn for pred %s must be callable." % pred.name)
+  predicates, actions = zip(*pred_fn_pairs)
+  return predicates, actions
+
+
+def case(pred_fn_pairs,
+         default=None,
+         exclusive=False,
+         strict=False,
          name="case"):
   """Create a case operation.
 
@@ -3249,152 +3332,27 @@ def case(pred_fn_pairs, default=None, exclusive=False, strict=False,
     TypeError: If `pred_fn_pairs` is a list but does not contain 2-tuples.
     TypeError: If `fns[i]` is not callable for any i, or `default` is not
                callable.
-    ValueError: If in eager mode and all predicates are false and no
-               default is provided.
-    ValueError: If in eager mode and is passed a dictionary.
   """
-  pfp = pred_fn_pairs  # For readability
-  if not (isinstance(pfp, list) or isinstance(pfp, _basetuple)
-          or isinstance(pfp, dict)):
-    raise TypeError("fns must be a list, tuple, or dict")
-  if isinstance(pfp, dict):
-    if context.in_eager_mode():
-      raise ValueError(
-          "In eager mode the predicates must be a list, not a dictionary.")
-    if isinstance(pfp, collections.OrderedDict):
-      pfp = pfp.items()
-    else:
-      pfp = sorted(pfp.items(), key=lambda item: item[0].name)
-      if not exclusive:
-        logging.warn("%s: An unordered dictionary of predicate/fn pairs was "
-                     "provided, but exclusive=False. The order of conditional "
-                     "tests is deterministic but not guaranteed.", name)
-  for tup in pfp:
-    if not isinstance(tup, _basetuple) or len(tup) != 2:
-      raise TypeError("Each entry in pred_fn_pairs must be a 2-tuple")
-    pred, fn = tup
-    if pred.dtype != dtypes.bool:
-      raise TypeError("pred must be of type bool: %s", pred.name)
-    if not callable(fn):
-      raise TypeError("fn for pred %s must be callable." % pred.name)
-
-  if default is not None and not callable(default):
-    raise TypeError("default must be callable.")
-
-  if context.in_eager_mode():
-    for pred, fn in pfp:
-      if pred:
-        return fn()
-    if default is None:
-      raise ValueError("tf.case received all false predicates and no default.")
-    return default()
-
-  preds, fns = map(list, zip(*pfp))
-  del pfp  # From now on, preds and fns form the source of truth.
-
-  with ops.name_scope(name, "case", [preds]):
-    exclusivity_assert = _assert_exclusive(preds) if exclusive else None
-    # If no default is provided, then we remove one of the (predicate, function)
-    # pairs and define the default to be the removed function with an additional
-    # control dependency that asserts that the removed predicate holds.
+  predicates, actions = _case_verify_and_canonicalize_args(
+      pred_fn_pairs, exclusive, name)
+  with ops.name_scope(name, "case", [predicates]):
     if default is None:
-      all_preds = _basetuple(preds)  # For the error message.
-      last_pred, last_fn = preds.pop(), fns.pop()
-      def new_default():
-        preds_c = array_ops.stack(all_preds, name="preds_c")
-        error_msg = [
-            ("None of the conditions evaluated as True. Conditions: (%s), "
-             "Values:" % ", ".join([p.name for p in all_preds])),
-            preds_c]
-        assertion = Assert(condition=last_pred,
-                           data=error_msg, summarize=len(all_preds))
-        with ops.control_dependencies([assertion]):
-          return last_fn()
-      default = new_default
-
-    if not preds:
-      return default()
-    not_preds = []
-    for i, p in enumerate(preds):
-      with ops.name_scope("not_%d" % i):
-        not_preds.append(math_ops.logical_not(p))
-    and_not_preds = [constant_op.constant(True, name="always_true")]
-    for i, notp in enumerate(not_preds):
-      with ops.name_scope("and_not_%d" % i):
-        and_not_preds.append(math_ops.logical_and(and_not_preds[-1], notp))
-
-    # preds = [p1, p2, p3]
-    # fns = [f1, f2, f3]
-    # not_preds = [~p1, ~p2, ~p3]
-    # and_not_preds = [True, ~p1, ~p1 & ~p2, ~p1 & ~p2 & ~p3]
-    # case_preds = [p1,
-    #               p2 & ~p1,
-    #               p3 & ~p2 & ~p1,
-    #              ~p3 & ~p2 & ~p1]
-
-    case_preds = []
-    for i, (p, and_not_p_prev) in enumerate(zip(preds, and_not_preds[:-1])):
-      with ops.name_scope("case_%d" % i):
-        case_preds.append(math_ops.logical_and(p, and_not_p_prev))
-    with ops.name_scope("case_none_are_true"):
-      case_preds.append(and_not_preds[-1])
-
-    # Create an empty tensor, or list, with the right type and shape
-    with ops.name_scope("case_create_empty"):
-      def _create_empty_constant(dtype, shape):
-        value = ("" if dtype == dtypes.string else dtype.as_numpy_dtype())
-        if shape.ndims is None:
-          return array_ops.constant(value, dtype=dtype)
-        else:
-          temp_shape = [1 if x.value is None else x.value for x in shape]
-          result = array_ops.constant(value, shape=temp_shape, dtype=dtype)
-          result._shape = shape  # pylint: disable=protected-access
-          return result
-
-      def _correct_empty(v):
-        if isinstance(v, ops.Operation):
-          return no_op()
-        elif isinstance(v, tensor_array_ops.TensorArray):
-          return v
-        elif not hasattr(v, "dtype"):
-          return ops.convert_to_tensor(v)
-        elif isinstance(v, sparse_tensor.SparseTensor):
-          return sparse_tensor.SparseTensor(indices=[[0] * len(v.get_shape())],
-                                            values=[v.dtype.as_numpy_dtype()],
-                                            dense_shape=v.get_shape())
-        else:
-          return _create_empty_constant(v.dtype, v.get_shape())
-
-      empty = lambda: nest.map_structure(_correct_empty, default())
-
-    # case_sequence = [
-    #   cond(~p3 & ~p2 & ~p1, default, empty),
-    #   cond(p3 & ~p2 & ~p1, f3, lambda: case_sequence[0]),
-    #   cond(p2 & ~p1, f2, lambda: case_sequence[1]),
-    #   cond(p1, f1, lambda: case_sequence[2])
-    # ]
-    #
-    # And the return value will be case_sequence[-1]
-    def _build_case():
-      all_fns = [fn for fn in fns]
-      all_fns.append(default)
-      prev_case = None
-      for i, (cp, fn) in enumerate(list(zip(case_preds, all_fns))[::-1]):
-        prev_case = cond(
-            cp, fn,
-            empty if i == 0 else lambda: prev_case,
-            strict=strict, name="If_%d" % i)
-      return prev_case
-
-    if exclusivity_assert is not None:
-      with ops.control_dependencies([exclusivity_assert]):
-        case_seq = _build_case()
+      default, predicates, actions = _case_create_default_action(
+          predicates, actions)
+    fn = default
+    # To eval conditions in direct order we create nested conditions in reverse:
+    #   cond(c[0], true_fn=.., false_fn=cond(c[1], ...))
+    for predicate, action in reversed(list(zip(predicates, actions))):
+      fn = functools.partial(
+          cond, predicate, true_fn=action, false_fn=fn, strict=strict)
+    if exclusive:
+      with ops.control_dependencies([
+          _assert_at_most_n_true(
+              predicates, n=1, msg="Input error: exclusive=True")
+      ]):
+        return fn()
     else:
-      case_seq = _build_case()
-
-    if not strict:
-      case_seq = _UnpackIfSingleton(case_seq)
-    return case_seq
+      return fn()
 
 
 ops.register_proto_function(ops.GraphKeys.COND_CONTEXT,
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index a666fd33a2..cc5a42bf3d 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -883,8 +883,7 @@ class CaseTest(test_util.TensorFlowTestCase):
     with self.test_session() as sess:
       self.assertEqual(sess.run(output, feed_dict={x: 1}), 2)
       self.assertEqual(sess.run(output, feed_dict={x: 3}), 8)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   "More than one condition evaluated as True"):
+      with self.assertRaisesRegexp(errors.InvalidArgumentError, "Input error:"):
         sess.run(output, feed_dict={x: 2})
 
   def testCase_multiple_matches_non_exclusive(self):
@@ -909,11 +908,7 @@ class CaseTest(test_util.TensorFlowTestCase):
       self.assertEqual(sess.run(output, feed_dict={x: 1}), 2)
       self.assertEqual(sess.run(output, feed_dict={x: 2}), 4)
       self.assertEqual(sess.run(output, feed_dict={x: 3}), 6)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r"\[None of the conditions evaluated as True. "
-          r"Conditions: \(Equal:0, Equal_1:0, Equal_2:0\), Values:\] "
-          r"\[0 0 0\]"):
+      with self.assertRaisesRegexp(errors.InvalidArgumentError, "Input error:"):
         sess.run(output, feed_dict={x: 4})
 
   def testCase_withoutDefault_oneCondition(self):
@@ -922,10 +917,7 @@ class CaseTest(test_util.TensorFlowTestCase):
     output = control_flow_ops.case(conditions, exclusive=True)
     with self.test_session() as sess:
       self.assertEqual(sess.run(output, feed_dict={x: 1}), 2)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r"\[None of the conditions evaluated as True. "
-          r"Conditions: \(Equal:0\), Values:\] \[0\]"):
+      with self.assertRaisesRegexp(errors.InvalidArgumentError, "Input error:"):
         sess.run(output, feed_dict={x: 4})
 
 
-- 
GitLab


From d109bd2f3fc6a018629469a6497dfe953bf5e305 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 12 Dec 2017 20:19:12 -0800
Subject: [PATCH 0946/1225] Disable a test case in params_test for CPU.

This test has thousands of parameters, and the resulting graph takes take too long
to compile on the CPU backend.

PiperOrigin-RevId: 178853687
---
 tensorflow/compiler/xla/tests/params_test.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/params_test.cc b/tensorflow/compiler/xla/tests/params_test.cc
index 24c5daed3d..c260258d6e 100644
--- a/tensorflow/compiler/xla/tests/params_test.cc
+++ b/tensorflow/compiler/xla/tests/params_test.cc
@@ -349,10 +349,10 @@ XLA_TEST_F(ParamsTest, DISABLED_ON_CPU(DISABLED_ON_GPU(
 // }
 // result = {p0, p1, ..., pN}
 //
-// TODO(b/70173746): Times out during compilation on GPU and CPU-parallel
-// backend as of 2017-12-03.
-XLA_TEST_F(ParamsTest, DISABLED_ON_CPU_PARALLEL(
-                           DISABLED_ON_GPU(ManyParametersIntoWhileLoop))) {
+// TODO(b/70173746): Times out during compilation on GPU and CPU backends as of
+// 2017-12-12.
+XLA_TEST_F(ParamsTest,
+           DISABLED_ON_CPU(DISABLED_ON_GPU(ManyParametersIntoWhileLoop))) {
   ComputationBuilder builder(client_, TestName());
 
   std::vector<std::unique_ptr<GlobalData>> param_data_owner;
-- 
GitLab


From 8f19188a14b62f2612783f3ebba0cd1c9d08aba8 Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Tue, 12 Dec 2017 21:12:14 -0800
Subject: [PATCH 0947/1225] Create global_step when recording summaries if
 needed

User might have not created global_step prior to using
some summary method.

PiperOrigin-RevId: 178857144
---
 tensorflow/contrib/summary/summary_ops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py
index 4556162bfe..ee661dfdc1 100644
--- a/tensorflow/contrib/summary/summary_ops.py
+++ b/tensorflow/contrib/summary/summary_ops.py
@@ -71,7 +71,7 @@ def should_record_summaries():
 def record_summaries_every_n_global_steps(n, global_step=None):
   """Sets the should_record_summaries Tensor to true if global_step % n == 0."""
   if global_step is None:
-    global_step = training_util.get_global_step()
+    global_step = training_util.get_or_create_global_step()
   collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME)
   old = collection_ref[:]
   with ops.device("cpu:0"):
@@ -560,7 +560,7 @@ def _serialize_graph(arbitrary_graph):
 
 def _choose_step(step):
   if step is None:
-    return training_util.get_global_step()
+    return training_util.get_or_create_global_step()
   if not isinstance(step, ops.Tensor):
     return ops.convert_to_tensor(step, dtypes.int64)
   return step
-- 
GitLab


From d7dbf21bda3fae90ced99db1bb5c592264c02526 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Tue, 12 Dec 2017 21:24:11 -0800
Subject: [PATCH 0948/1225] Explicitly specify CUDA and CUDNN versions (#15328)

A workaround to the base problem causing https://github.com/tensorflow/tensorflow/issues/15291
---
 tensorflow/tools/docker/Dockerfile.devel-gpu | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 339a39bcf3..9f4cc74a66 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -81,7 +81,8 @@ ENV CI_BUILD_PYTHON python
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1
-
+ENV TF_CUDA_VERSION=9.0
+ENV TF_CUDNN_VERSION=7
 
 RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
     LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
-- 
GitLab


From c94f1fb57f884e479f177b818b335e6dcab0ab3c Mon Sep 17 00:00:00 2001
From: Rajendra arora <rajendraarora16@yahoo.com>
Date: Wed, 13 Dec 2017 12:19:27 +0530
Subject: [PATCH 0949/1225] Updating phrase

---
 tensorflow/java/src/test/java/org/tensorflow/SessionTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
index 28e8628a85..e8cc76c2a6 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
@@ -151,7 +151,7 @@ public class SessionTest {
       s.close();
       try {
         s.runner().run();
-        fail("methods on a close() session should fail");
+        fail("methods on a session should fail after close() is called");
       } catch (IllegalStateException e) {
         // expected exception
       }
-- 
GitLab


From d2185de9703d9a1b98f0e41bcc78ee493639bdaa Mon Sep 17 00:00:00 2001
From: Taehoon Lee <me@taehoonlee.com>
Date: Wed, 13 Dec 2017 16:58:06 +0900
Subject: [PATCH 0950/1225] Fix typos

---
 tensorflow/c/eager/tape.h                                       | 2 +-
 .../lite/kernels/internal/reference/portable_tensor_utils.h     | 2 +-
 tensorflow/contrib/lite/kernels/internal/tensor_utils.h         | 2 +-
 .../toco/graph_transformations/resolve_tensorflow_switch.cc     | 2 +-
 tensorflow/contrib/lite/toco/import_tensorflow.cc               | 2 +-
 .../opt/python/training/elastic_average_optimizer_test.py       | 2 +-
 tensorflow/core/grappler/costs/virtual_placer.h                 | 2 +-
 tensorflow/core/grappler/costs/virtual_scheduler_test.cc        | 2 +-
 tensorflow/docs_src/get_started/feature_columns.md              | 2 +-
 tensorflow/python/util/util.cc                                  | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 20ed037c52..c81b8058cb 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -350,7 +350,7 @@ BackpropInitialState<BackwardFunction> PrepareBackprop(
     // Call destructors for all unneeded gradient functions and
     // clear the op_tape. We can clear the tape because ownership of
     // backward functions that will be used for gradient computation
-    // has been transfered to `result`.
+    // has been transferred to `result`.
     for (const auto& op_pair : *op_tape) {
       op_pair.second.backward_function_deleter();
     }
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h
index c2ab78000b..7f90d731b8 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h
@@ -22,7 +22,7 @@ limitations under the License.
 namespace tflite {
 namespace tensor_utils {
 
-// Limit a float input f betweeen +abs_limit and -abs_limit.
+// Limit a float input f between +abs_limit and -abs_limit.
 float PortableClip(float f, float abs_limit);
 
 // Multiply a matrix by a batch vector, and store results in a batch-size
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils.h b/tensorflow/contrib/lite/kernels/internal/tensor_utils.h
index 0e69ef5982..e7e2994397 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor_utils.h
+++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils.h
@@ -20,7 +20,7 @@ limitations under the License.
 namespace tflite {
 namespace tensor_utils {
 
-// Limit a float input f betweeen +abs_limit and -abs_limit.
+// Limit a float input f between +abs_limit and -abs_limit.
 float Clip(float f, float abs_limit);
 
 // Multiply a matrix by a batch vector, and store results in a batch-size
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
index 55adfca037..150cf53da3 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
@@ -59,7 +59,7 @@ bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) {
   // From the TensorFlow docs on .switch() in
   // third_party/tensorflow/python/ops/control_flow_ops.py
   //
-  //    If `pred` is false, the `data` input is forwared to the first output.
+  //    If `pred` is false, the `data` input is forwarded to the first output.
   //    Otherwise, the data goes to the second output.
   //
   // Note that this comment used to say the opposite and was recently fixed:
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 34d38f1fcb..b79f506269 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -1511,7 +1511,7 @@ void ConvertOperatorSpecialCasedAsRNNBackEdge(
     const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
     Model* model) {
   // At the moment, the only type of operator special-cased in this way is
-  // NextIteration, occuring only in control-flow cycles.
+  // NextIteration, occurring only in control-flow cycles.
   CHECK_EQ(node.op(), "NextIteration");
   CHECK_EQ(node.input_size(), 1);
   auto* rnn_state = model->flags.add_rnn_states();
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
index 59e55fceee..446e91018d 100644
--- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
@@ -144,7 +144,7 @@ class ElasticAverageOptimizerTest(test.TestCase):
     self.assertAllEqual(1.0, sessions[0].run(var_1_g))
     self.assertAllEqual(0, sessions[0].run(global_step))
 
-    # iteration 2, global varibale update
+    # iteration 2, global variable update
     sessions[0].run(train_ops[0])
 
     self.assertAllEqual(0.0, sessions[0].run(var_0))
diff --git a/tensorflow/core/grappler/costs/virtual_placer.h b/tensorflow/core/grappler/costs/virtual_placer.h
index 7ccb1ebb99..fee5ce0f51 100644
--- a/tensorflow/core/grappler/costs/virtual_placer.h
+++ b/tensorflow/core/grappler/costs/virtual_placer.h
@@ -41,7 +41,7 @@ class VirtualPlacer {
  private:
   // Converts given device name to Lowercase Fully-Qualified Name (LFQN) string.
   // This helps us disambiguate device names internally and simplify matching.
-  // If device_name couldn't be parsed succesfully, returns empty string.
+  // If device_name couldn't be parsed successfully, returns empty string.
   string to_lfqn_or_empty(const string& device_name) const;
 
   // Map based on the cluster info: cluster device name -> device properties.
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
index 412b494be7..08a31646da 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
@@ -1580,7 +1580,7 @@ TEST_F(VirtualSchedulerTest, WhileLoop) {
   EXPECT_NE(exit_start_micro, exit_1_start_micro);
 
   // Check dependency among the nodes; no matter what scheduling mechanism we
-  // use, the scheduled ops should follow these depedency chains.
+  // use, the scheduled ops should follow these dependency chains.
   // Note that currently, VirtualScheduler executes while/Merge twice; hence,
   // we're not testing dependency chains related to while/Merge.
   // TODO(dyoon): after fixing while loop behavior correctly (run nodes in the
diff --git a/tensorflow/docs_src/get_started/feature_columns.md b/tensorflow/docs_src/get_started/feature_columns.md
index f9537927b7..e034483508 100644
--- a/tensorflow/docs_src/get_started/feature_columns.md
+++ b/tensorflow/docs_src/get_started/feature_columns.md
@@ -419,7 +419,7 @@ still adds significant value to the learning capability of your models.
 Somewhat counterintuitively, when creating feature crosses, you typically still
 should include the original (uncrossed) features in your model (as in the
 preceding code snippet). The independent latitude and longitude features help the
-model distinguish between examples where a hash collision has occured in the
+model distinguish between examples where a hash collision has occurred in the
 crossed feature.
 
 ## Indicator and embedding columns
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index c3d7611ad4..a41fa7df25 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -29,7 +29,7 @@ bool WarnedThatSetIsNotSequence = false;
 
 // Returns 1 if `o` is considered a sequence for the purposes of Flatten().
 // Returns 0 otherwise.
-// Returns -1 if an error occured.
+// Returns -1 if an error occurred.
 int IsSequenceHelper(PyObject* o) {
   if (PyDict_Check(o)) return true;
   if (PySet_Check(o) && !WarnedThatSetIsNotSequence) {
-- 
GitLab


From 2eae1ac21ce28f3b2cafe9e12a25b3bddc475847 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 08:03:03 -0800
Subject: [PATCH 0951/1225] Standardize attribute naming for operators
 specifying a dimension to "axis". This mirrors TensorFlow's attribute naming.

PiperOrigin-RevId: 178903728
---
 .../contrib/lite/toco/export_tensorflow.cc    | 25 ++++++-------
 .../propagate_fixed_sizes.cc                  |  8 ++---
 .../resolve_constant_concatenation.cc         |  2 +-
 .../resolve_mean_attributes.cc                | 12 +++----
 .../resolve_tensorflow_concat.cc              | 35 +++++++++----------
 .../resolve_constant_concatenation_test.cc    | 18 +++++-----
 tensorflow/contrib/lite/toco/model.h          |  6 ++--
 .../contrib/lite/toco/tflite/operator.cc      |  4 +--
 .../contrib/lite/toco/tflite/operator_test.cc |  4 +--
 9 files changed, 54 insertions(+), 60 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc
index e18cf46c69..bddb83206b 100644
--- a/tensorflow/contrib/lite/toco/export_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc
@@ -780,13 +780,12 @@ void ConvertConcatenationOperator(const Model& model,
   auto* dc_op = tensorflow_graph->add_node();
   dc_op->set_op("ConcatV2");
   dc_op->set_name(src_op.outputs[0]);
-  const string dummy_concat_dim = src_op.outputs[0] + "/concat_dim";
-  CreateDummyConcatDimTensorConst(dummy_concat_dim, src_op.concat_dim,
-                                  tensorflow_graph);
+  const string dummy_axis = src_op.outputs[0] + "/axis";
+  CreateDummyConcatDimTensorConst(dummy_axis, src_op.axis, tensorflow_graph);
   for (const auto& input : src_op.inputs) {
     *dc_op->add_input() = input;
   }
-  *dc_op->add_input() = dummy_concat_dim;
+  *dc_op->add_input() = dummy_axis;
   (*dc_op->mutable_attr())["T"].set_type(DT_FLOAT);
   (*dc_op->mutable_attr())["Tidx"].set_type(DT_INT32);
   (*dc_op->mutable_attr())["N"].set_i(src_op.inputs.size());
@@ -993,22 +992,21 @@ void ConvertLstmCellOperator(const Model& model, const LstmCellOperator& src_op,
   const string concat_output = base + "basic_lstm_cell/concat";
   // Op names have been chosen to match the tf.slim LSTM naming
   // as closely as possible.
-  const int concat_dim =
+  const int axis =
       model.arrays.at(src_op.inputs[LstmCellOperator::PREV_ACTIV_INPUT])
           ->shape()
           .dimensions_count() -
       1;
   // Note that DATA_INPUT may have extra size 1 dimensions, but TF concat
   // works the same since the tensor has the same underlying data layout.
-  const string concat_dim_output = concat_output + "/concat_dim";
-  CreateDummyConcatDimTensorConst(concat_dim_output, concat_dim,
-                                  tensorflow_graph);
+  const string axis_output = concat_output + "/axis";
+  CreateDummyConcatDimTensorConst(axis_output, axis, tensorflow_graph);
   auto* concat_op = tensorflow_graph->add_node();
   concat_op->set_op("ConcatV2");
   concat_op->set_name(concat_output);
   *concat_op->add_input() = src_op.inputs[LstmCellOperator::DATA_INPUT];
   *concat_op->add_input() = src_op.inputs[LstmCellOperator::PREV_ACTIV_INPUT];
-  *concat_op->add_input() = concat_dim_output;
+  *concat_op->add_input() = axis_output;
   (*concat_op->mutable_attr())["T"].set_type(DT_FLOAT);
   (*concat_op->mutable_attr())["Tidx"].set_type(DT_INT32);
   (*concat_op->mutable_attr())["N"].set_i(2);  // Number of inputs
@@ -1069,8 +1067,7 @@ void ConvertLstmCellOperator(const Model& model, const LstmCellOperator& src_op,
   // Split
   string split_dim_output = base + "split/split_dim";
   // The dimension is the same as the concatenation dimension
-  CreateDummyConcatDimTensorConst(split_dim_output, concat_dim,
-                                  tensorflow_graph);
+  CreateDummyConcatDimTensorConst(split_dim_output, axis, tensorflow_graph);
   string split_output = base + "split";
   auto* split_op = tensorflow_graph->add_node();
   split_op->set_op("Split");
@@ -1298,11 +1295,11 @@ void ConvertMeanOperator(const Model& model, const MeanOperator& src_op,
   auto* tensor = (*params_op->mutable_attr())["value"].mutable_tensor();
   tensor->set_dtype(DT_INT32);
 
-  for (int i = 0; i < src_op.reduction_indices.size(); ++i) {
-    tensor->add_int_val(src_op.reduction_indices[i]);
+  for (int i = 0; i < src_op.axis.size(); ++i) {
+    tensor->add_int_val(src_op.axis[i]);
   }
   auto* shape = tensor->mutable_tensor_shape();
-  shape->add_dim()->set_size(src_op.reduction_indices.size());
+  shape->add_dim()->set_size(src_op.axis.size());
 }
 
 void ConvertSqueezeOperator(const Model& model, const SqueezeOperator& src_op,
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index 4530806ede..308dadfdeb 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -498,14 +498,14 @@ void ProcessConcatenationOperator(Model* model, ConcatenationOperator* op) {
     CHECK_EQ(input_array.shape().dimensions_count(),
              output_array.shape().dimensions_count());
     const std::vector<int>& input_dims = input_array.shape().dims();
-    CHECK_LT(op->concat_dim, input_dims.size());
-    concat_size += input_dims[op->concat_dim];
+    CHECK_LT(op->axis, input_dims.size());
+    concat_size += input_dims[op->axis];
   }
   // Write out the concat_size on the output array shape.
   auto& output_shape = *output_array.mutable_shape();
   auto& output_dims = *output_shape.mutable_dims();
-  CHECK_LT(op->concat_dim, output_shape.dimensions_count());
-  output_dims[op->concat_dim] = concat_size;
+  CHECK_LT(op->axis, output_shape.dimensions_count());
+  output_dims[op->axis] = concat_size;
 }
 
 void ProcessTensorFlowSplitOperator(Model* model, TensorFlowSplitOperator* op) {
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
index 0983c43849..86033275a0 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
@@ -151,7 +151,7 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
     if (!IsDiscardableArray(*model, input_name)) return false;
   }
 
-  const int concatenation_axis = concat_op->concat_dim;
+  const int concatenation_axis = concat_op->axis;
 
   CHECK_EQ(concat_op->outputs.size(), 1);
   string concatenated_array_name = concat_op->outputs[0];
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc
index 5d6ac331be..444f59d14b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc
@@ -29,7 +29,7 @@ bool ResolveMeanAttributes::Run(Model* model, std::size_t op_index) {
   if (mean_op->type != OperatorType::kMean) return false;
   auto* op = static_cast<MeanOperator*>(mean_op);
 
-  if (!op->reduction_indices.empty()) {
+  if (!op->axis.empty()) {
     // Attributes already resolved
     return false;
   }
@@ -40,17 +40,15 @@ bool ResolveMeanAttributes::Run(Model* model, std::size_t op_index) {
   if (!indices_array.has_shape()) return false;
 
   // We only support simultaneous reduction over width and height.
-  std::vector<int> reduction_indices =
-      indices_array.GetBuffer<ArrayDataType::kInt32>().data;
-  if (reduction_indices.size() != 2) {
+  std::vector<int> axis = indices_array.GetBuffer<ArrayDataType::kInt32>().data;
+  if (axis.size() != 2) {
     return false;
   }
-  if (!((reduction_indices[0] == 1 && reduction_indices[1] == 2) ||
-        (reduction_indices[0] == 2 && reduction_indices[1] == 1))) {
+  if (!((axis[0] == 1 && axis[1] == 2) || (axis[0] == 2 && axis[1] == 1))) {
     return false;
   }
 
-  op->reduction_indices = reduction_indices;
+  op->axis = axis;
   return true;
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
index b482f5cf51..c6723a880e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
@@ -35,37 +35,36 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(tf_concat_op->inputs.size(), 2);
   // TensorFlow Concat and ConcatV2 nodes only differ by the ordering
-  // of inputs: in Concat, the concat_dim is the first input, while in
+  // of inputs: in Concat,the axis is the first input, while in
   // ConcatV2, it is the last input.
-  std::size_t concat_dim_pos = 0;
+  std::size_t axis_pos = 0;
   if (tf_concat_op->type == OperatorType::kTensorFlowConcatV2) {
-    concat_dim_pos = tf_concat_op->inputs.size() - 1;
+    axis_pos = tf_concat_op->inputs.size() - 1;
   }
-  const string concat_dim_name = tf_concat_op->inputs[concat_dim_pos];
+  const string axis_name = tf_concat_op->inputs[axis_pos];
   std::vector<string> concat_input_names;
   for (std::size_t i = 0; i < tf_concat_op->inputs.size(); i++) {
-    if (i != concat_dim_pos) {
+    if (i != axis_pos) {
       concat_input_names.push_back(tf_concat_op->inputs[i]);
     }
   }
-  // If the concat_dim array hasn't been resolved to a constant yet,
+  // If the axis array hasn't been resolved to a constant yet,
   // we need to yield.
-  const auto& concat_dim_array = model->GetArray(concat_dim_name);
-  if (!concat_dim_array.buffer) {
-    AddMessageF("Waiting for the concat_dim of %s to be resolved to a constant",
+  const auto& axis_array = model->GetArray(axis_name);
+  if (!axis_array.buffer) {
+    AddMessageF("Waiting for the axis of %s to be resolved to a constant",
                 LogName(*tf_concat_op));
     return false;
   }
 
-  CHECK(concat_dim_array.data_type == ArrayDataType::kInt32);
-  const auto& concat_dim_data =
-      concat_dim_array.GetBuffer<ArrayDataType::kInt32>().data;
-  CHECK_EQ(concat_dim_data.size(), 1);
-  const int concat_dim = concat_dim_data[0];
+  CHECK(axis_array.data_type == ArrayDataType::kInt32);
+  const auto& axis_data = axis_array.GetBuffer<ArrayDataType::kInt32>().data;
+  CHECK_EQ(axis_data.size(), 1);
+  const int axis = axis_data[0];
 
   // Create the Concatenation op replacing the TensorFlowConcat op.
   auto* concatenation_op = new ConcatenationOperator;
-  concatenation_op->concat_dim = concat_dim;
+  concatenation_op->axis = axis;
   concatenation_op->inputs = concat_input_names;
   concatenation_op->outputs = {tf_concat_op->outputs[0]};
   auto depth_concat_it = model->operators.emplace(concat_it, concatenation_op);
@@ -74,9 +73,9 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) {
   concat_it = depth_concat_it + 1;
   CHECK_EQ(concat_it->get(), tf_concat_op);
 
-  // Remove the concat_dim array if it is not used by anything else.
-  if (CountOpsWithInput(*model, concat_dim_name) == 1) {
-    model->arrays.erase(concat_dim_name);
+  // Remove the axis array if it is not used by anything else.
+  if (CountOpsWithInput(*model, axis_name) == 1) {
+    model->arrays.erase(axis_name);
   }
   // Remove the TensorFlowConcat op
   model->operators.erase(concat_it);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
index c6705ad305..a14016e8e2 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
@@ -109,7 +109,7 @@ class ResolveConstantConcatenationTest : public ::testing::Test {
   // Prepare a hypothetical TOCO model with one Concatenation operator in it
   // together with 4 arrays as its inputs.
   // It receives the dimension of concatenation as input.
-  void PrepareModel(Model* model, int concat_dim) {
+  void PrepareModel(Model* model, int axis) {
     std::vector<string> concat_input_names = {"array0", "array1", "array2",
                                               "array3"};
 
@@ -142,7 +142,7 @@ class ResolveConstantConcatenationTest : public ::testing::Test {
       cnt++;
     }
     auto* concatenation_op = new ConcatenationOperator;
-    concatenation_op->concat_dim = concat_dim;
+    concatenation_op->axis = axis;
     concatenation_op->inputs = concat_input_names;
     concatenation_op->outputs = {"concat_op_outputs"};
     Array& out_array = model->GetOrCreateArray(concatenation_op->outputs[0]);
@@ -151,7 +151,7 @@ class ResolveConstantConcatenationTest : public ::testing::Test {
     std::vector<int>* out_array_shape_dim = out_array_shape->mutable_dims();
     out_array_shape_dim->resize(kDim);
     for (int i = 0; i < kDim; i++) {
-      if (i == concat_dim) {
+      if (i == axis) {
         (*out_array_shape_dim)[i] = kNumArrays * kElementPerDim;
       } else {
         (*out_array_shape_dim)[i] = kElementPerDim;
@@ -163,8 +163,8 @@ class ResolveConstantConcatenationTest : public ::testing::Test {
 
 TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis0) {
   Model model;
-  const int concat_dim = 0;
-  PrepareModel(&model, concat_dim);
+  const int axis = 0;
+  PrepareModel(&model, axis);
 
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
@@ -182,8 +182,8 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis0) {
 
 TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis1) {
   Model model;
-  const int concat_dim = 1;
-  PrepareModel(&model, concat_dim);
+  const int axis = 1;
+  PrepareModel(&model, axis);
 
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
@@ -201,8 +201,8 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis1) {
 
 TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis2) {
   Model model;
-  const int concat_dim = 2;
-  PrepareModel(&model, concat_dim);
+  const int axis = 2;
+  PrepareModel(&model, axis);
 
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 57911b1e89..a481d8801c 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -546,7 +546,7 @@ struct AddOperator : Operator {
 };
 
 // Concatenation operator: concatenates its inputs
-// along the concat_dim dimension.
+// along the axis.
 //
 // Inputs: this operator accepts any number >= 1 of inputs.
 //   inputs[i]: the i-th array to concatenate.
@@ -554,7 +554,7 @@ struct AddOperator : Operator {
 // TensorFlow equivalent: Concat.
 struct ConcatenationOperator : Operator {
   ConcatenationOperator() : Operator(OperatorType::kConcatenation) {}
-  int concat_dim = 0;
+  int axis = 0;
 };
 
 // Reordering dimensions. Used only during tooling to transform graphs from
@@ -1260,7 +1260,7 @@ struct BatchToSpaceNDOperator : Operator {
 struct MeanOperator : Operator {
   MeanOperator() : Operator(OperatorType::kMean) {}
 
-  std::vector<int> reduction_indices;
+  std::vector<int> axis;
   bool keep_dims = false;
 };
 
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 8a33500ddc..37f0378231 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -153,12 +153,12 @@ class Concatenation
   flatbuffers::Offset<TfLiteOptions> WriteOptions(
       const TocoOperator& op,
       flatbuffers::FlatBufferBuilder* builder) const override {
-    return ::tflite::CreateConcatenationOptions(*builder, op.concat_dim);
+    return ::tflite::CreateConcatenationOptions(*builder, op.axis);
   }
 
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {
-    op->concat_dim = options.axis();
+    op->axis = options.axis();
   }
 };
 
diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
index 8e77c56d8a..7301c6fb42 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
@@ -132,10 +132,10 @@ TEST_F(OperatorTest, CustomCast) {
 
 TEST_F(OperatorTest, CustomConcatenation) {
   ConcatenationOperator op;
-  op.concat_dim = 123;
+  op.axis = 123;
   auto output_toco_op = SerializeAndDeserialize(
       GetOperator("CONCATENATION", OperatorType::kConcatenation), op);
-  EXPECT_EQ(op.concat_dim, output_toco_op->concat_dim);
+  EXPECT_EQ(op.axis, output_toco_op->axis);
 }
 
 TEST_F(OperatorTest, CustomDepthToSpace) {
-- 
GitLab


From 9e993d2dc641c4fef0837b1109d99ceddc2a1c21 Mon Sep 17 00:00:00 2001
From: Johan Ju <johanj93@gmail.com>
Date: Wed, 13 Dec 2017 17:33:03 +0100
Subject: [PATCH 0952/1225] Always use std::cerr

---
 tensorflow/core/kernels/logging_ops.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/logging_ops.cc b/tensorflow/core/kernels/logging_ops.cc
index 67d603dd0a..bacf3e7740 100644
--- a/tensorflow/core/kernels/logging_ops.cc
+++ b/tensorflow/core/kernels/logging_ops.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <iostream>
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -76,7 +77,7 @@ class PrintOp : public OpKernel {
       strings::StrAppend(&msg, "[", ctx->input(i).SummarizeValue(summarize_),
                          "]");
     }
-    LOG(INFO) << msg;
+    std::cerr << msg << std::endl;
   }
 
  private:
-- 
GitLab


From 246d5b95723a275b80f4b803024182b4221b4e43 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 08:43:09 -0800
Subject: [PATCH 0953/1225] Stream::BlockHostUntilDone now returns Status
 rather than bool.

The now-deprecated Stream::BlockHostUntilDoneWithStatus remains, to facilitate a
multi-CL renaming transition.  Once all callers have been renamed to
BlockHostUntilDone, *WithStatus will be removed.

The StreamExecutor (private) method has also been renamed to BlockHostUntilDone.
It's only used by Stream.

The StreamExecutorInterface method will be renamed in a separate atomic CL.
It's harder to perform that transition gradually, and we've already performed an
atomic change previously, so we might as well fix it up in one shot.

PiperOrigin-RevId: 178907807
---
 tensorflow/stream_executor/stream.cc                | 10 +++++-----
 tensorflow/stream_executor/stream.h                 | 11 ++++-------
 tensorflow/stream_executor/stream_executor_pimpl.cc |  2 +-
 tensorflow/stream_executor/stream_executor_pimpl.h  |  2 +-
 4 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index de65038d17..0512f4c79a 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -5055,7 +5055,7 @@ Stream &Stream::ThenEnqueueOnBackgroundThread(
   });
 }
 
-port::Status Stream::BlockHostUntilDoneWithStatus() {
+port::Status Stream::BlockHostUntilDone() {
   VLOG_CALL();
 
   if (!ok()) {
@@ -5072,7 +5072,7 @@ port::Status Stream::BlockHostUntilDoneWithStatus() {
     mutex_lock lock{mu_};
     for (auto &stream : sub_streams_) {
       if (!stream.second) {
-        first_error.Update(stream.first->BlockHostUntilDoneWithStatus());
+        first_error.Update(stream.first->BlockHostUntilDone());
         // Set this sub-stream as available.
         stream.second = true;
       }
@@ -5081,13 +5081,13 @@ port::Status Stream::BlockHostUntilDoneWithStatus() {
 
   temporary_memory_manager_.DeallocateFinalizedTemporaries();
 
-  first_error.Update(parent_->BlockHostUntilDoneWithStatus(this));
+  first_error.Update(parent_->BlockHostUntilDone(this));
   CheckError(first_error.ok());
   return first_error;
 }
 
-bool Stream::BlockHostUntilDone() {
-  return BlockHostUntilDoneWithStatus().ok();
+port::Status Stream::BlockHostUntilDoneWithStatus() {
+  return BlockHostUntilDone();
 }
 
 }  // namespace gputools
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index 15a5a2b6cb..4c34452048 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -1905,15 +1905,12 @@ class Stream {
   //
   // Returns an OK status if the blocking was successful and the stream is ok().
   // Otherwise returns an error describing why the blocking failed.
-  //
-  // TODO(b/70298427): Rename to BlockHostUntilDone, once all callers have been
-  // converted from the bool form.
-  port::Status BlockHostUntilDoneWithStatus() LOCKS_EXCLUDED(mu_);
+  port::Status BlockHostUntilDone() LOCKS_EXCLUDED(mu_);
 
-  // DEPRECATED(b/70298427) - new code should use BlockHostUntilDoneWithStatus()
+  // DEPRECATED(b/70298427) - new code should use BlockHostUntilDone()
   //
-  // Equivalent to BlockHostUntilDoneWithStatus().ok().
-  bool BlockHostUntilDone() LOCKS_EXCLUDED(mu_);
+  // Equivalent to BlockHostUntilDone()
+  port::Status BlockHostUntilDoneWithStatus() LOCKS_EXCLUDED(mu_);
 
   // Warning! This method interacts with internal threads in
   // sometimes-unpredictable ways and is intended for GPU-Executor-internal
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index 719f292937..c4b248657e 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -432,7 +432,7 @@ bool StreamExecutor::Launch(Stream *stream, const ThreadDim &thread_dims,
   return implementation_->Launch(stream, thread_dims, block_dims, kernel, args);
 }
 
-port::Status StreamExecutor::BlockHostUntilDoneWithStatus(Stream *stream) {
+port::Status StreamExecutor::BlockHostUntilDone(Stream *stream) {
   port::Status result;
   SCOPED_TRACE(TraceListener::BlockHostUntilDone, &result, stream);
 
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h
index d2965dbfd7..a2a77218cb 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h
@@ -481,7 +481,7 @@ class StreamExecutor {
   // Causes the host code to synchronously wait for operations entrained onto
   // stream to complete. Effectively a join on the asynchronous device
   // operations enqueued on the stream before this program point.
-  port::Status BlockHostUntilDoneWithStatus(Stream *stream);
+  port::Status BlockHostUntilDone(Stream *stream);
 
   // Synchronously allocates size bytes on the underlying platform and returns
   // an opaque void* representing that allocation. In the case of failure,
-- 
GitLab


From 2b1b7dffcd2c76876efdbcfc431424e259da3bf4 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Wed, 13 Dec 2017 08:52:40 -0800
Subject: [PATCH 0954/1225] Check that all the inputs to a Concat op are of the
 same rank.

PiperOrigin-RevId: 178908773
---
 tensorflow/core/framework/common_shape_fns.cc | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 036e3473b1..7ab8e3ec18 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -1125,16 +1125,20 @@ Status ConcatShapeHelper(InferenceContext* c, int start_value_index,
     for (int i = start_value_index; i < end_value_index; ++i) {
       if (rank == InferenceContext::kUnknownRank) rank = c->Rank(c->input(i));
       if (rank != InferenceContext::kUnknownRank) {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), rank, &unused));
+        break;
       }
     }
     if (rank == InferenceContext::kUnknownRank) {
       c->set_output(0, c->UnknownShape());
       return Status::OK();
-    }
-    if (rank == 0) {
+    } else if (rank == 0) {
       return errors::InvalidArgument(
           "Can't concatenate scalars (use tf.stack instead)");
+    } else {
+      for (int i = start_value_index; i < end_value_index; ++i) {
+        // Check that all the inputs are of the correct rank.
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), rank, &unused));
+      }
     }
     // Build result of <rank> different unknown dims.
     std::vector<DimensionHandle> dims;
-- 
GitLab


From 185c593cb71cb6d8116ba05c97e9385642648f1b Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 13 Dec 2017 08:56:20 -0800
Subject: [PATCH 0955/1225] Automated g4 rollback of changelist 178759398

PiperOrigin-RevId: 178909147
---
 tensorflow/c/c_api_function.cc                |  5 +++
 tensorflow/c/c_api_function_test.cc           | 45 +++++++++++++++++++
 tensorflow/c/c_test_util.cc                   |  9 ++++
 tensorflow/c/c_test_util.h                    |  3 ++
 tensorflow/python/framework/function_test.py  | 42 +++++++++++++++++
 .../python/framework/graph_to_function_def.py |  7 +++
 6 files changed, 111 insertions(+)

diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index dcb818b88b..b9312c2974 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -226,6 +226,11 @@ Status FillFunctionBody(
       }
       node_def->add_input(strings::StrCat("^", normalized));
     }
+
+    // A function is stateful if any of its nodes are stateful.
+    if (node->op_def().is_stateful()) {
+      fdef->mutable_signature()->set_is_stateful(true);
+    }
   }
   return Status::OK();
 }
diff --git a/tensorflow/c/c_api_function_test.cc b/tensorflow/c/c_api_function_test.cc
index d5580b6589..2e2293ca85 100644
--- a/tensorflow/c/c_api_function_test.cc
+++ b/tensorflow/c/c_api_function_test.cc
@@ -1482,6 +1482,51 @@ TEST_F(CApiFunctionTest, GetOpDef) {
   EXPECT_EQ(op_def.name(), func_name_);
   EXPECT_EQ(op_def.input_arg_size(), 1);
   EXPECT_EQ(op_def.output_arg_size(), 1);
+  EXPECT_FALSE(op_def.is_stateful());
+
+  TF_DeleteBuffer(buffer);
+}
+
+void DefineStatefulFunction(const char* name, TF_Function** func) {
+  std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)> func_graph(
+      TF_NewGraph(), TF_DeleteGraph);
+  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> s(TF_NewStatus(),
+                                                           TF_DeleteStatus);
+
+  TF_Tensor* tensor_shape = Int32Tensor({37, 1});
+  TF_Operation* shape = Const(tensor_shape, func_graph.get(), s.get(), "shape");
+  TF_Operation* random =
+      RandomUniform(shape, TF_FLOAT, func_graph.get(), s.get());
+
+  TF_Output inputs[] = {};
+  TF_Output outputs[] = {{random, 0}};
+  *func = TF_GraphToFunction(func_graph.get(), name, /*append_hash=*/false, -1,
+                             /*opers=*/nullptr, 0, inputs, 1, outputs,
+                             /*output_names=*/nullptr,
+                             /*opts=*/nullptr, "", s.get());
+  ASSERT_EQ(TF_OK, TF_GetCode(s.get())) << TF_Message(s.get());
+  ASSERT_NE(*func, nullptr);
+  TF_DeleteTensor(tensor_shape);
+}
+
+TEST_F(CApiFunctionTest, StatefulOpDef) {
+  DefineStatefulFunction(func_name_, &func_);
+  TF_GraphCopyFunction(host_graph_, func_, nullptr, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  // Test we can retrieve function OpDef from graph
+  TF_Buffer* buffer = TF_NewBuffer();
+  TF_GraphGetOpDef(host_graph_, func_name_, buffer, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  // Sanity check returned OpDef
+  string data(static_cast<const char*>(buffer->data), buffer->length);
+  OpDef op_def;
+  op_def.ParseFromString(data);
+  EXPECT_EQ(op_def.name(), func_name_);
+  EXPECT_EQ(op_def.input_arg_size(), 0);
+  EXPECT_EQ(op_def.output_arg_size(), 1);
+  EXPECT_TRUE(op_def.is_stateful());
 
   TF_DeleteBuffer(buffer);
 }
diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc
index c291a2e440..37439ff0be 100644
--- a/tensorflow/c/c_test_util.cc
+++ b/tensorflow/c/c_test_util.cc
@@ -193,6 +193,15 @@ TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph,
   return TF_FinishOperation(desc, s);
 }
 
+TF_Operation* RandomUniform(TF_Operation* shape, TF_DataType dtype,
+                            TF_Graph* graph, TF_Status* s) {
+  TF_OperationDescription* desc =
+      TF_NewOperation(graph, "RandomUniform", "random_uniform");
+  TF_AddInput(desc, {shape, 0});
+  TF_SetAttrType(desc, "dtype", dtype);
+  return TF_FinishOperation(desc, s);
+}
+
 void Split3Helper(TF_Operation* input, TF_Graph* graph, TF_Status* s,
                   const char* name, TF_Operation** op) {
   TF_Operation* zero = ScalarConst(
diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h
index d547337492..96a93afef3 100644
--- a/tensorflow/c/c_test_util.h
+++ b/tensorflow/c/c_test_util.h
@@ -74,6 +74,9 @@ TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s,
 
 TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s);
 
+TF_Operation* RandomUniform(TF_Operation* shape, TF_DataType dtype,
+                            TF_Graph* graph, TF_Status* s);
+
 // Split `input` along the first dimention into 3 tensors
 TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s,
                      const char* name = "split3");
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 886c6f04b9..f5a97eb197 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -941,6 +941,48 @@ class FunctionTest(test.TestCase):
       self.assertEqual(100, sess.run(result_2))
       self.assertEqual((4.0, 100), sess.run((result_1, result_2)))
 
+  def testStatefulFunction(self):
+
+    @function.Defun()
+    def FunctionWithStatelessOp():
+      return constant_op.constant(42.0)
+
+    @function.Defun()
+    def FunctionWithStatefulOp():
+      return random_ops.random_uniform([100], maxval=10, dtype=dtypes.int32)
+
+    @function.Defun()
+    def FunctionWithStatelessFunctionCall():
+      return FunctionWithStatelessOp()
+
+    @function.Defun()
+    def FunctionWithStatefulFunctionCall():
+      return FunctionWithStatefulOp()
+
+    # Test that the `is_stateful` bit is propagated.
+    self.assertFalse(FunctionWithStatelessOp.definition.signature.is_stateful)
+    self.assertTrue(FunctionWithStatefulOp.definition.signature.is_stateful)
+    self.assertFalse(
+        FunctionWithStatelessFunctionCall.definition.signature.is_stateful)
+    self.assertTrue(
+        FunctionWithStatefulFunctionCall.definition.signature.is_stateful)
+
+    # Ensure that two invocations of the same random-number-generating
+    # function produce different results.
+    result1 = FunctionWithStatefulFunctionCall()
+    result2 = FunctionWithStatefulFunctionCall()
+
+    # Statefulness affects how the function is treated by the various
+    # optimization passes, so run the test in each optimizer
+    # configuration.
+    for config in _OptimizerOptions():
+      with session.Session(config=config) as sess:
+        val1, val2 = sess.run((result1, result2))
+        self.assertFalse(all(val1 == val2))
+        val3, val4 = sess.run((result1, result2))
+        self.assertFalse(all(val3 == val1))
+        self.assertFalse(all(val4 == val2))
+
 
 @test_util.with_c_api
 class FunctionsFromProtos(test.TestCase):
diff --git a/tensorflow/python/framework/graph_to_function_def.py b/tensorflow/python/framework/graph_to_function_def.py
index 448f87aa6e..625f31146b 100644
--- a/tensorflow/python/framework/graph_to_function_def.py
+++ b/tensorflow/python/framework/graph_to_function_def.py
@@ -110,6 +110,13 @@ def _add_op_node(op, func, input_dict):
                                                (node_def.input[i],
                                                 input_dict.items()))
       node_def.input[i] = input_dict[node_def.input[i]]
+  # The function is stateful if any of its operations are stateful.
+  # NOTE(mrry): The "Const" node typically does not have an `OpDef` associated
+  # with it, so we assume any nodes without an `OpDef` are stateless.
+  # TODO(skyewm): Remove the `is not None` test after we transition to the C
+  # API.
+  if op.op_def is not None and op.op_def.is_stateful:
+    func.signature.is_stateful = True
 
 
 def graph_to_function_def(graph, operations, inputs, outputs, out_names=None):
-- 
GitLab


From e31f38913d4018c2cee094e05a04833ac96f8b68 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 09:54:52 -0800
Subject: [PATCH 0956/1225] Fix 'tags' parameter in
 predictor_factories.load_from_model.

tags was incorrectly being mapped to inputs.
Added basic unit tests.

PiperOrigin-RevId: 178916192
---
 tensorflow/contrib/predictor/BUILD            | 11 ++++
 .../contrib/predictor/predictor_factories.py  | 23 ++++-----
 .../predictor/predictor_factories_test.py     | 51 +++++++++++++++++++
 3 files changed, 72 insertions(+), 13 deletions(-)
 create mode 100644 tensorflow/contrib/predictor/predictor_factories_test.py

diff --git a/tensorflow/contrib/predictor/BUILD b/tensorflow/contrib/predictor/BUILD
index 82cd7b4c8a..d7c3d6c3be 100644
--- a/tensorflow/contrib/predictor/BUILD
+++ b/tensorflow/contrib/predictor/BUILD
@@ -136,6 +136,17 @@ py_test(
     ],
 )
 
+py_test(
+    name = "predictor_factories_test",
+    srcs = ["predictor_factories_test.py"],
+    data = [":test_export_dir"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":predictor_factories",
+    ],
+)
+
 py_test(
     name = "core_estimator_predictor_test",
     srcs = ["core_estimator_predictor_test.py"],
diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py
index e3f30d917d..9485187c5d 100644
--- a/tensorflow/contrib/predictor/predictor_factories.py
+++ b/tensorflow/contrib/predictor/predictor_factories.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Factory functions for `Predictor`s."""
 
 from __future__ import absolute_import
@@ -59,9 +58,9 @@ def from_contrib_estimator(estimator,
   return contrib_estimator_predictor.ContribEstimatorPredictor(
       estimator,
       prediction_input_fn,
-      input_alternative_key,
-      output_alternative_key,
-      graph)
+      input_alternative_key=input_alternative_key,
+      output_alternative_key=output_alternative_key,
+      graph=graph)
 
 
 def from_estimator(estimator,
@@ -92,10 +91,7 @@ def from_estimator(estimator,
                     'tf.contrib.learn.Estimator. You likely want to call '
                     'from_contrib_estimator.')
   return core_estimator_predictor.CoreEstimatorPredictor(
-      estimator,
-      serving_input_receiver_fn,
-      output_key,
-      graph)
+      estimator, serving_input_receiver_fn, output_key=output_key, graph=graph)
 
 
 def from_saved_model(export_dir,
@@ -125,8 +121,9 @@ def from_saved_model(export_dir,
     ValueError: More than one of `signature_def_key` and `signature_def` is
       specified.
   """
-  return saved_model_predictor.SavedModelPredictor(export_dir,
-                                                   signature_def_key,
-                                                   signature_def,
-                                                   tags,
-                                                   graph)
+  return saved_model_predictor.SavedModelPredictor(
+      export_dir,
+      signature_def_key=signature_def_key,
+      signature_def=signature_def,
+      tags=tags,
+      graph=graph)
diff --git a/tensorflow/contrib/predictor/predictor_factories_test.py b/tensorflow/contrib/predictor/predictor_factories_test.py
new file mode 100644
index 0000000000..60ffeec653
--- /dev/null
+++ b/tensorflow/contrib/predictor/predictor_factories_test.py
@@ -0,0 +1,51 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for predictor.predictor_factories."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.predictor import predictor_factories
+from tensorflow.python.platform import test
+
+MODEL_DIR_NAME = 'contrib/predictor/test_export_dir'
+
+
+class PredictorFactoriesTest(test.TestCase):
+
+  @classmethod
+  def setUpClass(cls):
+    # Load a saved model exported from the arithmetic `Estimator`.
+    # See `testing_common.py`.
+    cls._export_dir = test.test_src_dir_path(MODEL_DIR_NAME)
+
+  def testFromSavedModel(self):
+    """Test loading from_saved_model."""
+    predictor_factories.from_saved_model(self._export_dir)
+
+  def testFromSavedModelWithTags(self):
+    """Test loading from_saved_model with tags."""
+    predictor_factories.from_saved_model(self._export_dir, tags='serve')
+
+  def testFromSavedModelWithBadTags(self):
+    """Test that loading fails for bad tags."""
+    bad_tags_regex = ('.*? could not be found in SavedModel')
+    with self.assertRaisesRegexp(RuntimeError, bad_tags_regex):
+      predictor_factories.from_saved_model(self._export_dir, tags='bad_tag')
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From 84f01b93f9e55b22c0edaebd4acb9c7be79f4a9c Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 8 Nov 2017 16:13:29 +0000
Subject: [PATCH 0957/1225] Change `NHWC`/`NCHW` to `NWC`/`NCW` for conv1d

While working on 13105 I noticed that in the current code
base `conv1d` uses `NHWC`/`NCHW` which should really
be `NWC`/`NCW`.

This fix addresses this issue and keep `NHWC`/`NCHW`
compatible internally so that users will not be impacted.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/ops/nn_ops.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index b3c0a22efc..0a1a276565 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -2260,9 +2260,9 @@ def conv1d(value, filters, stride, padding,
 
   Given an input tensor of shape
     [batch, in_width, in_channels]
-  if data_format is "NHWC", or
+  if data_format is "NWC", or
     [batch, in_channels, in_width]
-  if data_format is "NCHW",
+  if data_format is "NCW",
   and a filter / kernel tensor of shape
   [filter_width, in_channels, out_channels], this op reshapes
   the arguments to pass them to conv2d to perform the equivalent
@@ -2287,9 +2287,9 @@ def conv1d(value, filters, stride, padding,
       the filter is moved right at each step.
     padding: 'SAME' or 'VALID'
     use_cudnn_on_gpu: An optional `bool`.  Defaults to `True`.
-    data_format: An optional `string` from `"NHWC", "NCHW"`.  Defaults
-      to `"NHWC"`, the data is stored in the order of
-      [batch, in_width, in_channels].  The `"NCHW"` format stores
+    data_format: An optional `string` from `"NWC", "NCW"`.  Defaults
+      to `"NWC"`, the data is stored in the order of
+      [batch, in_width, in_channels].  The `"NCW"` format stores
       data as [batch, in_channels, in_width].
     name: A name for the operation (optional).
 
@@ -2301,15 +2301,16 @@ def conv1d(value, filters, stride, padding,
   """
   with ops.name_scope(name, "conv1d", [value, filters]) as name:
     # Reshape the input tensor to [batch, 1, in_width, in_channels]
-    if data_format is None or data_format == "NHWC":
+    if data_format is None or data_format == "NHWC" or data_format == "NWC":
       data_format = "NHWC"
       spatial_start_dim = 1
       strides = [1, 1, stride, 1]
-    elif data_format == "NCHW":
+    elif data_format == "NCHW" or data_format == "NCW":
+      data_format = "NCHW"
       spatial_start_dim = 2
       strides = [1, 1, 1, stride]
     else:
-      raise ValueError("data_format must be \"NHWC\" or \"NCHW\".")
+      raise ValueError("data_format must be \"NWC\" or \"NCW\".")
     value = array_ops.expand_dims(value, spatial_start_dim)
     filters = array_ops.expand_dims(filters, 0)
     result = gen_nn_ops.conv2d(value, filters, strides, padding,
-- 
GitLab


From 10d45f9ca118ed37b190140f9310e58f95d4d52c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 10:01:47 -0800
Subject: [PATCH 0958/1225] Make bfloat16 works with complex

PiperOrigin-RevId: 178917043
---
 tensorflow/core/framework/numeric_types.h   | 16 ++++++++++++++++
 tensorflow/python/lib/core/bfloat16.cc      | 10 +++++++++-
 tensorflow/python/lib/core/bfloat16_test.py | 15 ++++++++++++++-
 3 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index 569a4c3756..70563d53ef 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -58,6 +58,14 @@ struct bfloat16 {
 #endif
   }
 
+  // Following the convention of numpy, converting between complex and
+  // float will lead to loss of imag value.
+  explicit EIGEN_DEVICE_FUNC bfloat16(const complex64& val)
+      : bfloat16(val.real()) {}
+
+  explicit EIGEN_DEVICE_FUNC bfloat16(const complex128& val)
+      : bfloat16(static_cast<float>(val.real())) {}
+
   template <class T>
   explicit EIGEN_DEVICE_FUNC bfloat16(const T& val)
       : bfloat16(static_cast<float>(val)) {}
@@ -129,6 +137,14 @@ struct bfloat16 {
     return static_cast<double>(float(*this));
   }
 
+  EIGEN_DEVICE_FUNC explicit operator complex64() const {
+    return complex64(float(*this), float(0.0));
+  }
+
+  EIGEN_DEVICE_FUNC explicit operator complex128() const {
+    return complex128(double(*this), double(0.0));
+  }
+
   static bfloat16 epsilon() {
     bfloat16 x;
     x.value = 0x3c00;  // 0x1.0p-7
diff --git a/tensorflow/python/lib/core/bfloat16.cc b/tensorflow/python/lib/core/bfloat16.cc
index dfe9eba03d..4902978e2d 100644
--- a/tensorflow/python/lib/core/bfloat16.cc
+++ b/tensorflow/python/lib/core/bfloat16.cc
@@ -527,7 +527,15 @@ bool Initialize() {
   if (!RegisterBfloat16Cast<int64>(NPY_INT64, /*cast_is_safe=*/false)) {
     return false;
   }
-
+  // Following the numpy convention. imag part is dropped when converting to
+  // float.
+  if (!RegisterBfloat16Cast<complex64>(NPY_COMPLEX64, /*cast_is_safe=*/true)) {
+    return false;
+  }
+  if (!RegisterBfloat16Cast<complex128>(NPY_COMPLEX128,
+                                        /*cast_is_safe=*/true)) {
+    return false;
+  }
   return true;
 }
 
diff --git a/tensorflow/python/lib/core/bfloat16_test.py b/tensorflow/python/lib/core/bfloat16_test.py
index 02af33d98b..0872348c51 100644
--- a/tensorflow/python/lib/core/bfloat16_test.py
+++ b/tensorflow/python/lib/core/bfloat16_test.py
@@ -174,7 +174,9 @@ class Bfloat16NumPyTest(test.TestCase):
     self.assertAllClose(x, x)
 
   def testCasts(self):
-    for dtype in [np.float16, np.float32, np.float64, np.int32, np.int64]:
+    for dtype in [
+        np.float16, np.float32, np.float64, np.int32, np.int64,
+        np.complex64, np.complex128]:
       x = np.array([[1, 2, 3]], dtype=dtype)
       y = x.astype(bfloat16)
       z = y.astype(dtype)
@@ -183,6 +185,17 @@ class Bfloat16NumPyTest(test.TestCase):
       self.assertTrue(np.all(x == z))
       self.assertEqual(dtype, z.dtype)
 
+  def testConformNumpyComplex(self):
+    for dtype in [np.complex64, np.complex128]:
+      x = np.array([1.1, 2.2 + 2.2j, 3.3], dtype=dtype)
+      y_np = x.astype(np.float32)
+      y_tf = x.astype(bfloat16)
+      self.assertAllClose(y_np, y_tf, atol=2e-2)
+
+      z_np = y_np.astype(dtype)
+      z_tf = y_tf.astype(dtype)
+      self.assertAllClose(z_np, z_tf, atol=2e-2)
+
   def testAdd(self):
     x = np.array([[1, 2, 3]], dtype=bfloat16)
     y = np.array([[4, 5, 6]], dtype=bfloat16)
-- 
GitLab


From 5ecc3bf148f3e260352638c40f0787f57e4be382 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Wed, 13 Dec 2017 10:12:42 -0800
Subject: [PATCH 0959/1225] Simplify the naming of added transpose node.
 Refactor dimension check and the node creation for data format transform for
 better code reuse (data format transform to be used for both inputs and
 outputs).

PiperOrigin-RevId: 178918771
---
 .../grappler/optimizers/layout_optimizer.cc   | 95 +++++++++++--------
 .../optimizers/layout_optimizer_test.cc       |  8 +-
 .../python/grappler/layout_optimizer_test.py  | 17 ++--
 3 files changed, 65 insertions(+), 55 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index b584df0882..f6af1f0c3e 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -281,9 +281,9 @@ class NodeProcessor : public GraphProcessor {
   }
 
  protected:
-  bool IsDimsN(const NodeDef& node, int n) const {
+  bool IsPortDimsN(const NodeDef& node, int port, int n) const {
     if (node.attr().find("_output_shapes") != node.attr().end()) {
-      auto shape = node.attr().at("_output_shapes").list().shape(0);
+      auto shape = node.attr().at("_output_shapes").list().shape(port);
       if (shape.unknown_rank()) {
         return false;
       }
@@ -294,7 +294,13 @@ class NodeProcessor : public GraphProcessor {
     return false;
   }
 
-  bool IsDimsFour(const NodeDef& node) const { return IsDimsN(node, 4); }
+  bool IsDimsN(const NodeDef& node, int n) const {
+    return IsPortDimsN(node, 0, n);
+  }
+
+  bool IsDimsFour(const NodeDef& node) const {
+    return NodeProcessor::IsDimsN(node, 4) || IsNodeNCHWToNHWC(node.name());
+  }
 
   bool IsNHWC() const {
     if (node_->attr().find("data_format") != node_->attr().end()) {
@@ -507,16 +513,14 @@ class NodeProcessor : public GraphProcessor {
   virtual Status AddLayoutTransposeToInputs() {
     std::vector<int> input_pos = GetInputPos();
     for (const auto& pos : input_pos) {
-      int output_pos;
-      string input_node_name = ParseNodeName(node_->input(pos), &output_pos);
-      string base_name =
-          strings::StrCat(node_->name(), "-", input_node_name, "-", output_pos);
       string node_name =
-          AddPrefixToNodeName(base_name, kTransposeNHWCToNCHW, "-");
-      auto input_node = node_map_->GetNode(node_->input(pos));
+          strings::StrCat(kTransposeNHWCToNCHW, "-", node_->name(), "-", pos);
       TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
+      auto input_node = node_map_->GetNode(node_->input(pos));
       TF_RETURN_IF_ERROR(HasAttribute(*input_node, "_output_shapes"));
       string const_name = GetOrAddNodePermNHWCToNCHW(pos);
+      int output_pos;
+      ParseNodeName(node_->input(pos), &output_pos);
       AddNodeTranspose(
           node_name, node_->input(pos), const_name,
           node_->attr().at("T").type(),
@@ -532,29 +536,38 @@ class NodeProcessor : public GraphProcessor {
   virtual Status AddLayoutTransposeToOutputs() {
     auto outputs = node_map_->GetOutputs(node_->name());
     string const_name = GetOrAddNodePermNCHWToNHWC();
+    int output_count = 0;
     for (const auto& output : outputs) {
+      int connections = 0;
+      int connections_removed = 0;
       for (int i = 0; i < output->input_size(); i++) {
         auto& input = *output->mutable_input(i);
         int input_port;
         string input_name = ParseNodeName(input, &input_port);
         auto output_pos = GetOutputPos();
-        if (input_name == node_->name() &&
-            output_pos.find(input_port) != output_pos.end()) {
-          string base_name =
-              strings::StrCat(node_->name(), "-", output->name(), "-", i);
-          string node_name =
-              AddPrefixToNodeName(base_name, kTransposeNCHWToNHWC, "-");
-          TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
-          TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes"));
-          AddNodeTranspose(
-              node_name, input, const_name, node_->attr().at("T").type(),
-              node_->attr().at("_output_shapes").list().shape(0), false);
-          input = node_name;
-          node_map_->AddOutput(node_->name(), node_name);
-          node_map_->AddOutput(node_name, output->name());
+        if (input_name == node_->name()) {
+          connections++;
+          if (output_pos.find(input_port) != output_pos.end()) {
+            connections_removed++;
+            string added_node_name =
+                strings::StrCat(kTransposeNCHWToNHWC, "-", node_->name(), "-",
+                                output_count, "-", i);
+            TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
+            TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes"));
+            AddNodeTranspose(added_node_name, input, const_name,
+                             node_->attr().at("T").type(),
+                             node_->attr().at("_output_shapes").list().shape(0),
+                             false);
+            input = added_node_name;
+            node_map_->AddOutput(node_->name(), added_node_name);
+            node_map_->AddOutput(added_node_name, output->name());
+          }
         }
       }
-      node_map_->RemoveOutput(node_->name(), output->name());
+      if (connections == connections_removed) {
+        node_map_->RemoveOutput(node_->name(), output->name());
+      }
+      output_count++;
     }
     return Status::OK();
   }
@@ -583,10 +596,10 @@ class NodeProcessor : public GraphProcessor {
     return const_node;
   }
 
-  void AddNodeDataFormatOp(const string& op, int input_pos, DataType dtype) {
+  NodeDef* AddNodeDataFormatOp(const string& name, const string& input_name,
+                               const string& op, DataType dtype) {
     NodeDef* added_node = graph_->add_node();
-    added_node->set_name(
-        strings::StrCat(kDataFormatOp, "_", node_->name(), "_", input_pos));
+    added_node->set_name(name);
     added_node->set_op(op);
     node_map_->AddNode(added_node->name(), added_node);
     added_node->set_device(node_->device());
@@ -598,7 +611,16 @@ class NodeProcessor : public GraphProcessor {
     added_node->mutable_attr()->insert({"src_format", attr_format});
     attr_format.set_s("NCHW");
     added_node->mutable_attr()->insert({"dst_format", attr_format});
-    *added_node->add_input() = node_->input(input_pos);
+    *added_node->add_input() = input_name;
+    return added_node;
+  }
+
+  void AddDataFormatTranformToInput(const string& op, int input_pos,
+                                    DataType dtype) {
+    string name =
+        strings::StrCat(kDataFormatOp, "_", node_->name(), "_", input_pos);
+    auto added_node =
+        AddNodeDataFormatOp(name, node_->input(input_pos), op, dtype);
     *node_->mutable_input(input_pos) = added_node->name();
     node_map_->UpdateOutput(added_node->input(0), node_->name(),
                             added_node->name());
@@ -671,7 +693,7 @@ class BiasAddGradProcessor : public NodeProcessor {
     }
     auto input = node_map_->GetNode(node_->input(0));
     if (input) {
-      if ((IsNHWC() && IsDimsFour(*input)) || IsNodeNCHWToNHWC(input->name())) {
+      if (IsNHWC() && IsDimsFour(*input)) {
         return true;
       }
     }
@@ -796,7 +818,7 @@ class Conv2DBackpropInputProcessor : public Conv2DProcessor {
     if (IsConstant(*input_size_node)) {
       TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(0));
     } else {
-      AddNodeDataFormatOp("DataFormatVecPermute", 0, DT_INT32);
+      AddDataFormatTranformToInput("DataFormatVecPermute", 0, DT_INT32);
     }
     return Status::OK();
   }
@@ -945,10 +967,6 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
     return input_pos;
   }
 
-  bool IsDimsFour(const NodeDef& node) const {
-    return NodeProcessor::IsDimsFour(node) || IsNodeNCHWToNHWC(node.name());
-  }
-
   bool IsNDOperateWithMD(int n, int m) const {
     auto input0 = node_map_->GetNode(node_->input(0));
     auto input1 = node_map_->GetNode(node_->input(1));
@@ -1062,7 +1080,7 @@ class ConcatProcessor : public AgnosticNodeProcessor {
     } else {
       DataType dtype =
           (IsSplit(*node_)) ? DT_INT32 : node_->attr().at("Tidx").type();
-      AddNodeDataFormatOp("DataFormatDimMap", axis_node_pos_, dtype);
+      AddDataFormatTranformToInput("DataFormatDimMap", axis_node_pos_, dtype);
     }
     return Status::OK();
   }
@@ -1149,8 +1167,8 @@ class SliceProcessor : public AgnosticNodeProcessor {
       if (IsConstant(*index_node)) {
         TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(i));
       } else {
-        AddNodeDataFormatOp("DataFormatVecPermute", i,
-                            node_->attr().at("Index").type());
+        AddDataFormatTranformToInput("DataFormatVecPermute", i,
+                                     node_->attr().at("Index").type());
       }
     }
     return Status::OK();
@@ -1216,8 +1234,7 @@ class SumProcessor : public AgnosticNodeProcessor {
   bool ShouldProcess() const override {
     auto input0 = node_map_->GetNode(node_->input(0));
     return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
-           (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) &&
-           IsAlongDimNHW() && IsOnGPU();
+           IsDimsFour(*input0) && IsAlongDimNHW() && IsOnGPU();
   }
 
   Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 763e2d1b21..05df1bf918 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -258,8 +258,7 @@ TEST_F(LayoutOptimizerTest, EqualSizeWithSamePadding) {
   GraphDef output;
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
-  EXPECT_TRUE(
-      node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input-0"));
+  EXPECT_TRUE(node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0"));
 }
 
 TEST_F(LayoutOptimizerTest, NotEqualSizeWithValidPadding) {
@@ -272,8 +271,7 @@ TEST_F(LayoutOptimizerTest, NotEqualSizeWithValidPadding) {
   GraphDef output;
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
-  EXPECT_TRUE(
-      node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input-0"));
+  EXPECT_TRUE(node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0"));
 }
 
 TEST_F(LayoutOptimizerTest, Pad) {
@@ -780,7 +778,7 @@ TEST_F(LayoutOptimizerTest, Mul4DAndUnknownRank) {
   // Node mul should not be processed by layout optimizer, because one of its
   // inputs is of unknown rank.
   EXPECT_EQ(mul_node->input(0),
-            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-mul-0");
+            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-0-0");
   EXPECT_EQ(mul_node->input(1), "unknown");
 }
 
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 831f182009..5d8bc12ac4 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -188,10 +188,8 @@ class LayoutOptimizerTest(test.TestCase):
       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
       expected_num_transposes = 2
       self.assertEqual(expected_num_transposes, num_transposes)
-      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
-                    nodes)
-      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Relu_1-MaxPool_1-0',
-                    nodes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Relu_1-0-0', nodes)
 
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
@@ -222,9 +220,8 @@ class LayoutOptimizerTest(test.TestCase):
       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
       expected_num_transposes = 2
       self.assertEqual(expected_num_transposes, num_transposes)
-      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
-                    nodes)
-      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-split-Sum-0', nodes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-split-0-0', nodes)
       self.assertIn('LayoutOptimizerDataFormatOp_split_0', nodes)
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
@@ -259,10 +256,8 @@ class LayoutOptimizerTest(test.TestCase):
       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
       expected_num_transposes = 2
       self.assertEqual(expected_num_transposes, num_transposes)
-      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
-                    nodes)
-      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Slice-Identity-0',
-                    nodes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Slice-0-0', nodes)
       self.assertIn('LayoutOptimizerDataFormatOp_Slice_2', nodes)
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
-- 
GitLab


From c25bf74a3ebb13e4fd8a46022bc4e231edd43869 Mon Sep 17 00:00:00 2001
From: Javier Luraschi <javierluraschi@hotmail.com>
Date: Wed, 13 Dec 2017 10:19:14 -0800
Subject: [PATCH 0960/1225] Fix link in tensorflow lite readme

---
 tensorflow/contrib/lite/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index 2fb40070cb..852284cbc7 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -186,7 +186,7 @@ bazel-bin/tensorflow/contrib/lite/toco/toco -- \
 - Setting the input_array, output_array and input_shape arguments are a bit trickier. The easiest way to find these values is to explore the graph in tensorboard .  The user should reuse the arguments that were used for specifying the output nodes for inference in the `freeze_graph`step.
 
 Note, it is also possible to use the Tensorflow Optimizing Converter through protos either from Python or from the command line see the
-documentation [here](https://github.com/tensorflow/tensorflow/tree/mastertensorflow/contrib/lite/python:toco_from_protos target) A developer can then integrate the conversion step into their model design workflow to ensure that a model will be easily convertible to a mobile inference graph. For example,
+documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/python/toco_from_protos.py). A developer can then integrate the conversion step into their model design workflow to ensure that a model will be easily convertible to a mobile inference graph. For example,
 
 ```
 import tensorflow as tf
-- 
GitLab


From dcbf6c972d7b4203735bca04f4d33d575ef7b22b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 11:06:55 -0800
Subject: [PATCH 0961/1225] Run HardcodeMinMax graph transformation again after
 applying --default_ranges_{min,max}. The specific issue that this fixes was
 that with a Concatenation op involving one constant and one non-constant
 input, the non-constant input got its minmax from --default_ranges_{min,max},
 but then it was too late for HardcodeMinMax to propagate that to the other
 constant input of this Concatenation.

PiperOrigin-RevId: 178928533
---
 tensorflow/contrib/lite/toco/toco_tooling.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index a93fade6af..7e50c2207f 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -226,6 +226,10 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
         toco_flags.has_default_ranges_max()) {
       UseDefaultMinMaxRangeValues(model, toco_flags.default_ranges_min(),
                                   toco_flags.default_ranges_max());
+      // The new MinMax info may need to be propagated a bit.
+      RunGraphTransformations(
+          model, "default min-max range propagation graph transformations",
+          {new HardcodeMinMax});
     }
     CheckIsReadyForQuantization(*model);
     RunGraphTransformations(
-- 
GitLab


From 1f699bfac94360fdf4cedfee0c823bffd352d94d Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 13 Dec 2017 19:19:25 +0000
Subject: [PATCH 0962/1225] Add @deprecated_arg_values to conv1d for
 `NHWC/NCHW`

This commit adds @deprecated_arg_values to conv1d for `NHWC/NCHW`
(use `NWC/NCW` instead)

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/ops/nn_ops.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 0a1a276565..3a77d89760 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -2253,6 +2253,12 @@ def nth_element(input, n, reverse=False, name=None):
   return gen_nn_ops.nth_element(input, n, reverse=reverse, name=name)
 
 
+@deprecation.deprecated_arg_values(
+    None, '`NCHW` for data_format is deprecated, use `NCW` instead',
+    warn_once=True, data_format="NCHW")
+@deprecation.deprecated_arg_values(
+    None, '`NHWC` for data_format is deprecated, use `NWC` instead',
+    warn_once=True, data_format="NHWC")
 def conv1d(value, filters, stride, padding,
            use_cudnn_on_gpu=None, data_format=None,
            name=None):
-- 
GitLab


From 52a44f28174f3a08fa92c3d43a9531c7c1101666 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 13 Dec 2017 11:17:07 -0800
Subject: [PATCH 0963/1225] Convert LSTMFusedBlockCell to a plain Layer; it is
 not really an RNNCell.

This allows us to revert a change to the public API for most RNNCells.
That breaking change was introduced yesterday (wherein scope argument had to be
passed by keyword arg).

PiperOrigin-RevId: 178930316
---
 tensorflow/contrib/rnn/python/ops/lstm_ops.py |  2 +-
 tensorflow/python/ops/rnn_cell_impl.py        | 15 ++++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index 805121a8f1..9217617e77 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -436,7 +436,7 @@ class LSTMBlockCell(LayerRNNCell):
     return h, new_state
 
 
-class LSTMBlockWrapper(LayerRNNCell):
+class LSTMBlockWrapper(base_layer.Layer):
   """This is a helper class that provides housekeeping for LSTM cells.
 
   This may be useful for alternative LSTM and similar type of cells.
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index 7c759d852c..7cb9f7762d 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -265,18 +265,18 @@ class _LayerRNNCell(RNNCell):
   `call` methods do not access Variables `tf.get_variable`.
   """
 
-  def __call__(self, inputs, *args, **kwargs):
+  def __call__(self, inputs, state, scope=None, *args, **kwargs):
     """Run this RNN cell on inputs, starting from the given state.
 
     Args:
       inputs: `2-D` tensor with shape `[batch_size, input_size]`.
-      *args: Additional positional arguments.
-        Usually composesed of `[state]`: if `self.state_size` is an integer,
-        this should be a `2-D Tensor` with shape
-        `[batch_size, self.state_size]`.  Otherwise, if
+      state: if `self.state_size` is an integer, this should be a `2-D Tensor`
+        with shape `[batch_size, self.state_size]`.  Otherwise, if
         `self.state_size` is a tuple of integers, this should be a tuple
         with shapes `[batch_size, s] for s in self.state_size`.
-      **kwargs: Additional keyword arguments.  Common keys include `scope`.
+      scope: optional cell scope.
+      *args: Additional positional arguments.
+      **kwargs: Additional keyword arguments.
 
     Returns:
       A pair containing:
@@ -288,7 +288,8 @@ class _LayerRNNCell(RNNCell):
     # Bypass RNNCell's variable capturing semantics for LayerRNNCell.
     # Instead, it is up to subclasses to provide a proper build
     # method.  See the class docstring for more details.
-    return base_layer.Layer.__call__(self, inputs, *args, **kwargs)
+    return base_layer.Layer.__call__(self, inputs, state, scope=scope,
+                                     *args, **kwargs)
 
 
 class BasicRNNCell(_LayerRNNCell):
-- 
GitLab


From 3425ae40aa54f16776bfcfe3be9c42bdb426cc20 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Wed, 13 Dec 2017 11:27:29 -0800
Subject: [PATCH 0964/1225] Update docs as per #15325

PiperOrigin-RevId: 178932328
---
 tensorflow/docs_src/programmers_guide/tensors.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md
index 47d4db2a56..58a80d5339 100644
--- a/tensorflow/docs_src/programmers_guide/tensors.md
+++ b/tensorflow/docs_src/programmers_guide/tensors.md
@@ -112,8 +112,8 @@ For example, the following method programmatically determines the rank
 of the `tf.Tensor` defined in the previous section:
 
 ```python
-r = tf.rank(my3d)
-# After the graph runs, r will hold the value 3.
+r = tf.rank(my_image)
+# After the graph runs, r will hold the value 4.
 ```
 
 ### Referring to `tf.Tensor` slices
-- 
GitLab


From f9a88f8a2b9fc371bc5dbd7a9404494c29857f6e Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Wed, 13 Dec 2017 11:49:40 -0800
Subject: [PATCH 0965/1225] Cap prompt attempts before aborting, like other
 prompts

PiperOrigin-RevId: 178936195
---
 configure.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/configure.py b/configure.py
index 680448d7b6..589f6c9501 100644
--- a/configure.py
+++ b/configure.py
@@ -806,7 +806,7 @@ def set_tf_cuda_version(environ_cp):
       'Please specify the CUDA SDK version you want to use, '
       'e.g. 7.0. [Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION
 
-  while True:
+  for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
     # Configure the Cuda SDK version to use.
     tf_cuda_version = get_from_env_or_user_or_default(
         environ_cp, 'TF_CUDA_VERSION', ask_cuda_version, _DEFAULT_CUDA_VERSION)
@@ -844,6 +844,11 @@ def set_tf_cuda_version(environ_cp):
     environ_cp['TF_CUDA_VERSION'] = ''
     environ_cp['CUDA_TOOLKIT_PATH'] = ''
 
+  else:
+    raise UserInputError('Invalid TF_CUDA_SETTING setting was provided %d '
+                         'times in a row. Assuming to be a scripting mistake.' %
+                         _DEFAULT_PROMPT_ASK_ATTEMPTS)
+
   # Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION
   environ_cp['CUDA_TOOLKIT_PATH'] = cuda_toolkit_path
   write_action_env_to_bazelrc('CUDA_TOOLKIT_PATH', cuda_toolkit_path)
@@ -857,7 +862,7 @@ def set_tf_cudnn_version(environ_cp):
       'Please specify the cuDNN version you want to use. '
       '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION
 
-  while True:
+  for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
     tf_cudnn_version = get_from_env_or_user_or_default(
         environ_cp, 'TF_CUDNN_VERSION', ask_cudnn_version,
         _DEFAULT_CUDNN_VERSION)
@@ -916,6 +921,10 @@ def set_tf_cudnn_version(environ_cp):
       print('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version))
 
     environ_cp['TF_CUDNN_VERSION'] = ''
+  else:
+    raise UserInputError('Invalid TF_CUDNN setting was provided %d '
+                         'times in a row. Assuming to be a scripting mistake.' %
+                         _DEFAULT_PROMPT_ASK_ATTEMPTS)
 
   # Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION
   environ_cp['CUDNN_INSTALL_PATH'] = cudnn_install_path
-- 
GitLab


From e256c813f1d1cdb857014a8617628c7c812d98c6 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Wed, 13 Dec 2017 12:03:20 -0800
Subject: [PATCH 0966/1225] A start on a prototype for object-based variable
 saving.

Doesn't do anything useful yet, but has an API for Checkpointable:
  - add_variable (compatible with Layer)
  - track_checkpointable (kinda like tfe.Network's track_layer)
  - a property indicating checkpoint dependencies and their names

Also has some logic for collecting variables (using a private attribute).

Main TODOs:
 - Prototype checkpointing a full object DAG (to eventually be added to BundleHeaderProto?). This is needed in case one path to an object with multiple paths is broken, and it happens to be the path we used in the checkpoint keys for that object's variables.
 - Logic to attribute human-readable checkpoint names to objects in this full checkpointed DAG, and from the checkpointed DAG to the Python DAG being loaded into. Need some escaping when writing checkpoint names for this to work.
 - Deferred loading (copy from tfe.Network?), including some management of the creation of slot variables
 - APIs for saving/loading, some end-to-end examples

PiperOrigin-RevId: 178938256
---
 tensorflow/contrib/eager/python/BUILD         |  31 ++
 .../contrib/eager/python/checkpointable.py    | 303 ++++++++++++++++++
 .../eager/python/checkpointable_test.py       | 204 ++++++++++++
 tensorflow/tools/pip_package/BUILD            |   1 +
 4 files changed, 539 insertions(+)
 create mode 100644 tensorflow/contrib/eager/python/checkpointable.py
 create mode 100644 tensorflow/contrib/eager/python/checkpointable_test.py

diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index fb667cd91b..21efc107b9 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -217,6 +217,37 @@ py_test(
     ],
 )
 
+py_library(
+    name = "checkpointable",
+    srcs = ["checkpointable.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//tensorflow:internal"],
+    deps = [
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+    ],
+)
+
+py_test(
+    name = "checkpointable_test",
+    srcs = ["checkpointable_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":checkpointable",
+        ":network",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:layers",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:test",
+        "@six_archive//:six",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/eager/python/checkpointable.py b/tensorflow/contrib/eager/python/checkpointable.py
new file mode 100644
index 0000000000..bc868a47bf
--- /dev/null
+++ b/tensorflow/contrib/eager/python/checkpointable.py
@@ -0,0 +1,303 @@
+"""An object-local variable management scheme."""
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import optimizer
+
+
+_CheckpointableReference = collections.namedtuple("_CheckpointableReference", [
+    "name",  # The local name if explicitly specified, else None.
+    "local_uid",  # 0 for the first dependency, 1 for the next, ... Used for
+                  # routing checkpointed variables to their correct
+                  # Checkpointables when "name" is not set (see docstring of
+                  # `track_checkpointable`).
+    "ref"  # The Checkpointable object being referenced.
+])
+
+
+_OwnedVariable = collections.namedtuple("_OwnedVariable", [
+    "name",  # The variable's (local) name.
+    "variable"  # The owned variable object.
+])
+
+
+# Validation regular expression for the local names of Checkpointable
+# objects. In particular, disallows "/" in names, and ensures that the
+# checkpoint names of variables are valid Operation names .
+_VALID_LOCAL_NAME = re.compile(r"^[A-Za-z0-9.][A-Za-z0-9_.-]*$")
+
+
+# Keyword for identifying that the next bit of a checkpoint variable
+# name is a slot name. May not be the local name of a checkpointable. Checkpoint
+# names for slot variables look like:
+#
+#   <path to variable>/<_OPTIMIZER_SLOTS_NAME>/<path to optimizer>/<slot name>
+#
+# Where <path to variable> is a full path from the checkpoint root to the
+# variable being slotted for.
+_OPTIMIZER_SLOTS_NAME = "_OPTIMIZER_SLOT"
+
+
+class Checkpointable(object):
+  """Manages variables and dependencies on other objects.
+
+  To make reliable checkpoints, all `Checkpointable`s on which this object
+  depends must be registered in the constructor using `track_checkpointable` in
+  a deterministic order, and if possible they should be named. Variables may be
+  created using `add_variable` outside of the constructor and in any order, but
+  only these variables will be saved.
+  """
+
+  def __init__(self):
+    # Basically less useful OrderedDicts but without the reference cycles.
+    # TODO(allenl): Switch these to OrderedDict once TensorFlow supports only
+    # Python 3.6+.
+    self._checkpoint_dependencies = []  # A list of _CheckpointableReference
+                                        # objects.
+    self._dependency_names = set()
+    self._owned_variables = []  # A list of _OwnedVariable objects.
+    self._owned_variable_names = set()
+
+  def add_variable(self, name, shape, dtype=None, initializer=None, **kwargs):
+    """Create a new variable object to be saved with this `Checkpointable`.
+
+    If the user has requested that this object or another `Checkpointable` which
+    depends on this object be restored from a checkpoint (deferred loading
+    before variable object creation), `initializer` may be ignored and the value
+    from the checkpoint used instead.
+
+    Args:
+      name: A name for the variable. Must be unique within this object.
+      shape: The shape of the variable.
+      dtype: The data type of the variable.
+      initializer: The initializer to use. Ignored if deferred loading has been
+        requested.
+      **kwargs: Passed to get_variable.
+
+    Returns:
+      The new variable object.
+
+    Raises:
+      ValueError: If the variable name is not unique.
+    """
+    if name in self._owned_variable_names:
+      raise ValueError(
+          ("A variable named '%s' already exists in this Checkpointable, but "
+           "Checkpointable.add_variable called to create another with "
+           "that name. Variable names must be unique within a Checkpointable "
+           "object.")
+          % (name,))
+    if "getter" in kwargs:
+      # Allow the getter to be overridden, typically because there is a need for
+      # compatibility with some other variable creation mechanism. This should
+      # be relatively uncommon in user code.
+      getter = kwargs.pop("getter")
+    else:
+      getter = variable_scope.get_variable
+    # TODO(allenl): handle deferred loading
+    new_variable = getter(
+        name=name, shape=shape, dtype=dtype, initializer=initializer, **kwargs)
+    self._owned_variables.append(
+        _OwnedVariable(name=name, variable=new_variable))
+    self._owned_variable_names.add(name)
+    return new_variable
+
+  def track_checkpointable(self, checkpointable, name=None):
+    """Declare a dependency on another `Checkpointable` object.
+
+    Indicates that checkpoints for this object should include variables from
+    `checkpointable`.
+
+    Variables in a checkpoint are mapped to `Checkpointable`s based on names if
+    provided when the checkpoint was written, but otherwise use the order those
+    `Checkpointable`s were declared as dependencies. Both `name` arguments and
+    the dependency declaration order should be deterministic.
+
+    There are two sufficient conditions to avoid breaking existing checkpoints
+    when modifying a class: (1) New dependencies must be declared after existing
+    dependencies, and (2) dependencies which were previously declared may never
+    be removed (a trivial placeholder with the same name may be used instead).
+
+    Args:
+      checkpointable: A `Checkpointable` which this object depends on.
+      name: A local name for `checkpointable`, used for loading checkpoints into
+        the correct objects. If provided, it must be unique within this
+        `Checkpointable`. If None, dependency declaration order is used instead.
+
+    Returns:
+      `checkpointable`, for convenience when declaring a dependency and
+      assigning to a member variable in one statement.
+
+    Raises:
+      RuntimeError: If __init__ was not called.
+      TypeError: If `checkpointable` does not inherit from `Checkpointable`.
+      ValueError: For invalid names.
+    """
+    if not hasattr(self, "_checkpoint_dependencies"):
+      raise RuntimeError(
+          "Need to call Checkpointable.__init__ before calling "
+          "Checkpointable.track_checkpointable().")
+    if not isinstance(checkpointable, Checkpointable):
+      raise TypeError(
+          ("Checkpointable.track_checkpointable() passed type %s, not a "
+           "Checkpointable.") % (type(checkpointable),))
+    if name is not None:
+      if not _VALID_LOCAL_NAME.match(name):
+        raise ValueError(
+            ("Checkpointable names must match the regular expression '%s', but "
+             "got an invalid name '%s' instead.")
+            % (_VALID_LOCAL_NAME.pattern, name))
+      if name in self._dependency_names:
+        raise ValueError(
+            ("Called Checkpointable.track_checkpointable() with name='%s', but "
+             "a Checkpointable with this name is already declared as a "
+             "dependency. If provided, names must be unique.")
+            % (name,))
+      self._dependency_names.add(name)
+    self._checkpoint_dependencies.append(_CheckpointableReference(
+        name=name,
+        ref=checkpointable,
+        # TODO(allenl): Should this be exposed to allow users to stop depending
+        # on things and still load checkpoints when not using names?
+        local_uid=len(self._checkpoint_dependencies)))
+    return checkpointable
+
+  @property
+  def checkpoint_dependencies(self):
+    """Other `Checkpointable` objects on which this object depends."""
+    return self._checkpoint_dependencies
+
+
+def _breadth_first_checkpointable_traversal(root_checkpointable):
+  """Find shortest paths to all variables owned by dependencies of root."""
+  bfs_sorted = []
+  root_checkpointable_reference = _CheckpointableReference(
+      name=None, local_uid=0, ref=root_checkpointable)
+  to_visit = collections.deque([root_checkpointable_reference])
+  path_to_root = {root_checkpointable_reference: ()}
+  while to_visit:
+    current_checkpointable = to_visit.popleft()
+    bfs_sorted.append(current_checkpointable)
+    for child_checkpointable in (
+        current_checkpointable.ref.checkpoint_dependencies):
+      if child_checkpointable not in path_to_root:
+        path_to_root[child_checkpointable] = (
+            path_to_root[current_checkpointable] + (child_checkpointable,))
+        to_visit.append(child_checkpointable)
+  return bfs_sorted, path_to_root
+
+
+# TODO(allenl): Save the Checkpointable graph with the checkpoint so that a
+# redundant path to a Checkpointable can be removed from the Python program
+# without breaking the checkpoint (e.g. a graph with root -> b -> c and root ->
+# d -> c, edge "root -> b" gets removed and we should be able to still load
+# variables into c since it's referenced through d, even if our names were
+# "b/c/variable_name").
+#
+# TODO(allenl): Convenience utility for saving multiple objects (i.e. construct
+# a root Checkpointable if passed a list of Checkpointables).
+def _name_variables(root_checkpointable):
+  """Determine checkpoint keys for variables.
+
+  Non-slot variables are keyed based on a shortest path from the root saveable
+  to the object which owns the variable (i.e. the one which called
+  `Checkpointable.add_variable` to create it).
+
+  Slot variables are keyed based on a shortest path to the variable being
+  slotted for, a shortest path to their optimizer, and the slot name.
+
+  Args:
+    root_checkpointable: A `Checkpointable` object whose variables (including
+      the variables of dependencies, recursively) should be saved.
+
+  Returns:
+    A dictionary mapping names to variable objects.
+
+  Raises:
+    ValueError: If there are invalid characters in an optimizer's slot names.
+  """
+  bfs_sorted, path_to_root = _breadth_first_checkpointable_traversal(
+      root_checkpointable)
+
+  # Gather non-slot variables, name them:
+  #
+  #   <path to node>/<local variable name>
+  #
+  # <path to node> is not necessarily unique, but this is fine since we also
+  # save the graph of `Checkpointable`s with the checkpoint. Even if this path
+  # no longer exists because of a change in the Python program, we can look up
+  # the `Checkpointable` which owns the variable in the checkpoint's graph and
+  # use another path if one still exists.
+  named_variables = {}
+
+  def _name_from_path(path):
+    return "/".join(checkpointable.name or "%d" % (checkpointable.local_uid,)
+                    for checkpointable in path)
+
+  for checkpointable in bfs_sorted:
+    human_readable_prefix = _name_from_path(path_to_root[checkpointable])
+    for owned_variable in checkpointable.ref._owned_variables:  # pylint: disable=protected-access
+      # TODO(allenl): Escape names/with/slashes. We need to accept them for
+      # variables at least to maintain compatibility with
+      # e.g. Layer.add_variable, but need to escape them before writing
+      # checkpoints if we want the human readable names to be parsable. Also
+      # need to escape local Checkpointable names which look like they're
+      # positional (name="1").
+      if human_readable_prefix:
+        variable_name = human_readable_prefix + "/" + owned_variable.name
+      else:
+        variable_name = owned_variable.name
+      named_variables[variable_name] = owned_variable.variable
+
+  # Gather slot variables, name them:
+  #
+  #   <variable name>/<_OPTIMIZER_SLOTS_NAME>/<optimizer path>/<slot name>
+  #
+  # where <variable name> is exactly the name used for the original variable
+  # above, including the path from the checkpoint root and the local name in the
+  # object which owns it. Note that we only save slot variables if the variable
+  # it's slotting for is also being saved.
+  non_slot_variables = list(named_variables.items())
+  for checkpointable_ref in bfs_sorted:
+    if isinstance(checkpointable_ref.ref, optimizer.Optimizer):
+      slot_names = checkpointable_ref.ref.get_slot_names()
+      for slot_name in slot_names:
+        if not _VALID_LOCAL_NAME.match(slot_name):
+          # Slot variable names include the name of the slot. We need to
+          # validate that part of the name to be sure that the checkpoint name
+          # is a valid name scope name.
+          raise ValueError(
+              ("Could not save slot variables for optimizer %s, because its "
+               "slot name has invalid characters (got '%s', was expecting it "
+               "to match the regular expression '%s').")
+              % (checkpointable_ref.ref, slot_name,
+                 _VALID_LOCAL_NAME.pattern))
+        suffix = "/".join((
+            _OPTIMIZER_SLOTS_NAME,
+            _name_from_path(path_to_root[checkpointable_ref]),
+            slot_name))
+        for variable_name, variable in non_slot_variables:
+          slot_variable = checkpointable_ref.ref.get_slot(
+              variable, slot_name)
+          if slot_variable is not None:
+            named_variables[variable_name + "/" + suffix] = slot_variable
+  return named_variables
diff --git a/tensorflow/contrib/eager/python/checkpointable_test.py b/tensorflow/contrib/eager/python/checkpointable_test.py
new file mode 100644
index 0000000000..5427b35c0b
--- /dev/null
+++ b/tensorflow/contrib/eager/python/checkpointable_test.py
@@ -0,0 +1,204 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import six
+
+from tensorflow.contrib.eager.python import checkpointable
+from tensorflow.contrib.eager.python import network as network_lib
+from tensorflow.python.eager import context
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.layers import core
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.training import adam
+from tensorflow.python.training import training_util
+
+
+class CheckpointableDenseLayer(core.Dense, checkpointable.Checkpointable):
+
+  def __init__(self, *args, **kwargs):
+    checkpointable.Checkpointable.__init__(self)
+    core.Dense.__init__(self, *args, **kwargs)
+
+  def add_variable(self, name, shape, **kwargs):
+    # Calls both Checkpointable.add_variable and Layer.add_variable. Eventually
+    # Layer.add_variable should inherit from Checkpointable and simply call
+    # super and then do post-processing.
+    return checkpointable.Checkpointable.add_variable(
+        self, name=name, shape=shape,
+        getter=functools.partial(core.Dense.add_variable, self), **kwargs)
+
+
+# pylint: disable=not-callable
+class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable):
+
+  def __init__(self):
+    network_lib.Network.__init__(self)
+    checkpointable.Checkpointable.__init__(self)
+
+  def track_layer(self, layer, name=None):
+    self.track_checkpointable(layer, name=name)
+    return super(CheckpointableNetwork, self).track_layer(layer)
+
+
+class CheckpointableAdam(adam.AdamOptimizer, checkpointable.Checkpointable):
+
+  def __init__(self, *args, **kwargs):
+    checkpointable.Checkpointable.__init__(self)
+    adam.AdamOptimizer.__init__(self, *args, **kwargs)
+
+  # NOTE: Copied from AdamOptimizer with modifications to use add_variable
+  # for non-slot variables. These contortions are necessary to maintain
+  # checkpoint compatibility with variable.name based saving.
+  def _create_slots(self, var_list):
+    # Create the beta1 and beta2 accumulators on the same device as the first
+    # variable. Sort the var_list to make sure this device is consistent across
+    # workers (these need to go on the same PS, otherwise some updates are
+    # silently ignored).
+    first_var = min(var_list, key=lambda x: x.name)
+
+    create_new = self._beta1_power is None
+    if not create_new and context.in_graph_mode():
+      create_new = (self._beta1_power.graph is not first_var.graph)
+
+    if create_new:
+      with ops.colocate_with(first_var):
+        def _variable_getter(name, shape, dtype, initializer):
+          del shape, dtype  # not used, but there for compatibility
+          return variable_scope.variable(
+              name=name, initial_value=initializer, trainable=False)
+        self._beta1_power = self.add_variable(
+            name="beta1_power", shape=[], initializer=self._beta1,
+            getter=_variable_getter)
+        self._beta2_power = self.add_variable(
+            name="beta2_power", shape=[], initializer=self._beta2,
+            getter=_variable_getter)
+    # Create slots for the first and second moments.
+    for v in var_list:
+      self._zeros_slot(v, "m", self._name)
+      self._zeros_slot(v, "v", self._name)
+
+  # TODO(allenl): Override slot variable creation (_get_or_make_slot,
+  # _get_or_make_slot_with_initializer, _zeros_slot) to allow deferred
+  # loading. Likely no need to run this through add_variable, since gathering
+  # slot variables is special cased anyway.
+
+
+class MyNetwork(CheckpointableNetwork):
+  """A concrete Network for testing."""
+
+  def __init__(self):
+    super(MyNetwork, self).__init__()
+    self._named = self.track_layer(
+        CheckpointableDenseLayer(1, use_bias=True),
+        name="named_dense")
+    self._unnamed = self.track_layer(
+        CheckpointableDenseLayer(1, use_bias=False))
+
+  def call(self, values):
+    return self._unnamed(self._named(values))
+
+
+class Root(checkpointable.Checkpointable):
+  """A stand-in for a Trainer class."""
+
+  def __init__(self, optimizer, network):
+    super(Root, self).__init__()
+    self.track_checkpointable(optimizer, name="optimizer")
+    self.track_checkpointable(network, name="network")
+    self._global_step = None
+
+  @property
+  def global_step(self):
+    if self._global_step is None:
+      # Get the default create_global_step utility to actually call
+      # self.add_variable, by setting a custom getter.
+      def _owned_variable_as_custom_getter(getter, *args, **kwargs):
+        return self.add_variable(*args, getter=getter, **kwargs)
+      with variable_scope.variable_scope(
+          "", custom_getter=_owned_variable_as_custom_getter):
+        self._global_step = training_util.create_global_step()
+    return self._global_step
+
+
+class CheckpointNamingTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testNamingWithOptimizer(self):
+    input_value = constant_op.constant([[3.]])
+    network = MyNetwork()
+    # A nuisance Network using the same optimizer. Its slot variables should not
+    # go in the checkpoint, since it is never depended on.
+    other_network = MyNetwork()
+    optimizer = CheckpointableAdam(0.001)
+    root_checkpointable = Root(optimizer=optimizer, network=network)
+    if context.in_eager_mode():
+      optimizer.minimize(lambda: network(input_value),
+                         global_step=root_checkpointable.global_step)
+      optimizer.minimize(lambda: other_network(input_value),
+                         global_step=root_checkpointable.global_step)
+    else:
+      train_op = optimizer.minimize(
+          network(input_value),
+          global_step=root_checkpointable.global_step)
+      optimizer.minimize(
+          other_network(input_value),
+          global_step=root_checkpointable.global_step)
+      self.evaluate(variables.global_variables_initializer())
+      self.evaluate(train_op)
+    named_variables = checkpointable._name_variables(root_checkpointable)
+    expected_checkpoint_names = (
+        # Created in the root node, so no prefix.
+        "global_step",
+        # No name provided to track_checkpointable(), so the position (1, after
+        # the named track_checkpointable() which is 0) is used instead.
+        "network/1/kernel",
+        # track_checkpointable() with a name provided, so that's used
+        "network/named_dense/kernel",
+        "network/named_dense/bias",
+        # The optimizer creates two non-slot variables
+        "optimizer/beta1_power",
+        "optimizer/beta2_power",
+        # Slot variables
+        "network/1/kernel/_OPTIMIZER_SLOT/optimizer/m",
+        "network/1/kernel/_OPTIMIZER_SLOT/optimizer/v",
+        "network/named_dense/kernel/_OPTIMIZER_SLOT/optimizer/m",
+        "network/named_dense/kernel/_OPTIMIZER_SLOT/optimizer/v",
+        "network/named_dense/bias/_OPTIMIZER_SLOT/optimizer/m",
+        "network/named_dense/bias/_OPTIMIZER_SLOT/optimizer/v",
+    )
+    six.assertCountEqual(
+        self, expected_checkpoint_names, named_variables.keys())
+    # Check that we've mapped to the right variable objects (not exhaustive)
+    self.assertEqual("global_step:0", named_variables["global_step"].name)
+    self.assertEqual("my_network/checkpointable_dense_layer_1/kernel:0",
+                     named_variables["network/1/kernel"].name)
+    self.assertEqual("my_network/checkpointable_dense_layer/kernel:0",
+                     named_variables["network/named_dense/kernel"].name)
+    self.assertEqual("beta1_power:0",
+                     named_variables["optimizer/beta1_power"].name)
+    self.assertEqual("beta2_power:0",
+                     named_variables["optimizer/beta2_power"].name)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 48fc4c91be..33af4532c8 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -155,6 +155,7 @@ sh_binary(
             "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip",
             "//tensorflow/contrib/data/python/ops:prefetching_py",
             "//tensorflow/contrib/eager/python/examples:examples_pip",
+            "//tensorflow/contrib/eager/python:checkpointable",
             "//tensorflow/contrib/eager/python:evaluator",
             "//tensorflow/contrib/gan:gan",
             "//tensorflow/contrib/graph_editor:graph_editor_pip",
-- 
GitLab


From ac4d418e3cd1d3236037508b815db4cff82bcfda Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 12:10:18 -0800
Subject: [PATCH 0967/1225] Test consistently that the strings passed in
 input_arrays and output_arrays consist of printable ASCII characters (this is
 motivated by a user having unwittingly passed unicode zero-width characters,
 probably by copy-pasting), and are names of arrays actually existing in the
 model. Centralize these tests in CheckInvariants.

This can be overridden with new model flags: --allow_nonascii_arrays,
--allow_nonexistent_arrays. These are model flags because this is about
self-consistency of the model and its existing modelflags.

This CL partly undoes a recent relaxation of checks on input arrays that
was done to support getting graphviz out of incorrectly specified graphs.
Such users will now have to pass --allow_nonexistent_arrays.

PiperOrigin-RevId: 178939235
---
 tensorflow/contrib/lite/toco/args.h           |  2 +
 .../contrib/lite/toco/model_cmdline_flags.cc  | 31 +++---
 .../contrib/lite/toco/model_cmdline_flags.h   |  2 -
 .../contrib/lite/toco/model_flags.proto       | 15 ++-
 tensorflow/contrib/lite/toco/tooling_util.cc  | 94 ++++++++++++++++---
 5 files changed, 117 insertions(+), 27 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h
index 5268902346..a2f80fae9b 100644
--- a/tensorflow/contrib/lite/toco/args.h
+++ b/tensorflow/contrib/lite/toco/args.h
@@ -203,6 +203,8 @@ struct ParsedModelFlags {
   Arg<string> graphviz_last_array;
   Arg<string> dump_graphviz;
   Arg<bool> dump_graphviz_video = Arg<bool>(false);
+  Arg<bool> allow_nonexistent_arrays = Arg<bool>(false);
+  Arg<bool> allow_nonascii_arrays = Arg<bool>(false);
 };
 
 // Flags that describe the operation you would like to do (what conversion
diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
index 29802da9fe..790b3443ce 100644
--- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
@@ -17,7 +17,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/ascii.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_join.h"
 #include "absl/strings/str_split.h"
@@ -28,6 +27,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/toco/toco_port.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/command_line_flags.h"
+
 // "batch" flag only exists internally
 #ifdef PLATFORM_GOOGLE
 #include "base/commandlineflags.h"
@@ -134,6 +134,20 @@ bool ParseModelFlagsFromCommandLineFlags(
            parsed_flags.dump_graphviz_video.default_value(),
            "If true, will dump graphviz at each "
            "graph transformation, which may be used to generate a video."),
+      Flag("allow_nonexistent_arrays",
+           parsed_flags.allow_nonexistent_arrays.bind(),
+           parsed_flags.allow_nonexistent_arrays.default_value(),
+           "If true, will allow passing inexistent arrays in --input_arrays "
+           "and --output_arrays. This makes little sense, is only useful to "
+           "more easily get graph visualizations."),
+      Flag("allow_nonascii_arrays", parsed_flags.allow_nonascii_arrays.bind(),
+           parsed_flags.allow_nonascii_arrays.default_value(),
+           "If true, will allow passing non-ascii-printable characters in "
+           "--input_arrays and --output_arrays. By default (if false), only "
+           "ascii printable characters are allowed, i.e. character codes "
+           "ranging from 32 to 127. This is disallowed by default so as to "
+           "catch common copy-and-paste issues where invisible unicode "
+           "characters are unwittingly added to these strings."),
   };
   bool asked_for_help =
       *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help"));
@@ -350,7 +364,10 @@ void ReadModelFlagsFromCommandLineFlags(
     }
   }
 
-  CheckInputArraysAreNotOutputArrays(*model_flags);
+  model_flags->set_allow_nonascii_arrays(
+      parsed_model_flags.allow_nonascii_arrays.value());
+  model_flags->set_allow_nonexistent_arrays(
+      parsed_model_flags.allow_nonexistent_arrays.value());
 }
 
 ParsedModelFlags* UncheckedGlobalParsedModelFlags(bool must_already_exist) {
@@ -386,14 +403,4 @@ void ParseModelFlagsOrDie(int* argc, char* argv[]) {
   }
 }
 
-void CheckInputArraysAreNotOutputArrays(const ModelFlags& model_flags) {
-  for (const auto& input_array : model_flags.input_arrays()) {
-    for (const string& output_array : model_flags.output_arrays()) {
-      QCHECK_NE(input_array.name(), output_array)
-          << "The array " << output_array
-          << " is listed in both --input_arrays and --output_arrays.";
-    }
-  }
-}
-
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.h b/tensorflow/contrib/lite/toco/model_cmdline_flags.h
index 61bcde234e..027d7ae1aa 100644
--- a/tensorflow/contrib/lite/toco/model_cmdline_flags.h
+++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.h
@@ -38,8 +38,6 @@ void ParseModelFlagsOrDie(int* argc, char* argv[]);
 // Get the global parsed model flags
 ParsedModelFlags* GlobalParsedModelFlags();
 
-void CheckInputArraysAreNotOutputArrays(const ModelFlags& model_flags);
-
 }  // namespace toco
 
 
diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto
index 05c48bc369..13fea29a07 100644
--- a/tensorflow/contrib/lite/toco/model_flags.proto
+++ b/tensorflow/contrib/lite/toco/model_flags.proto
@@ -117,7 +117,7 @@ message RnnState {
 //   optional int32 input_dims = 11 [ default = 4];
 //   repeated int32 input_shape = 13;
 //
-// Next ID to USE: 16.
+// Next ID to USE: 18.
 message ModelFlags {
   // Information about the input arrays, i.e. the arrays from which input
   // activations will be read.
@@ -147,4 +147,17 @@ message ModelFlags {
     optional int32 count_max = 3 [default = -1];
   }
   repeated ModelCheck model_checks = 14;
+
+  // If true, will allow passing inexistent arrays in --input_arrays
+  // and --output_arrays. This makes little sense, is only useful to
+  // more easily get graph visualizations.
+  optional bool allow_nonexistent_arrays = 16;
+
+  // If true, will allow passing non-ascii-printable characters in
+  // --input_arrays and --output_arrays. By default (if false), only
+  // ascii printable characters are allowed, i.e. character codes
+  // ranging from 32 to 127. This is disallowed by default so as to
+  // catch common copy-and-paste issues where invisible unicode
+  // characters are unwittingly added to these strings.
+  optional bool allow_nonascii_arrays = 17;
 }
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 21b85c86cc..f3daac175e 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <unordered_set>
 #include <utility>
 
+#include "absl/strings/ascii.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "absl/strings/str_replace.h"
@@ -563,15 +564,65 @@ bool IsConstantParameterArray(const Model& model, const string& name) {
   return !!model.arrays.at(name)->buffer;
 }
 
-void CheckNoMissingArray(const Model& model) {
-  for (const auto& op : model.operators) {
-    for (const auto& input : op->inputs) {
-      CHECK(model.arrays.count(input));
+namespace {
+void CheckInputArraysAreNotOutputArrays(const ModelFlags& model_flags) {
+  for (const auto& input_array : model_flags.input_arrays()) {
+    for (const string& output_array : model_flags.output_arrays()) {
+      QCHECK_NE(input_array.name(), output_array)
+          << "The array " << output_array
+          << " is listed in both --input_arrays and --output_arrays.";
     }
-    for (const auto& output : op->outputs) {
-      CHECK(model.arrays.count(output));
+  }
+}
+
+bool IsAsciiPrintable(const string& name) {
+  for (char c : name) {
+    if (!absl::ascii_isprint(c)) {
+      return false;
     }
   }
+  return true;
+}
+
+string DumpAscii(const string& name) {
+  string result;
+  port::AppendF(&result, "ASCII | Hex\n");
+  port::AppendF(&result, "------+----\n");
+  for (char c : name) {
+    if (absl::ascii_isprint(c)) {
+      port::AppendF(&result, "%c     | %x\n", c, c);
+    } else {
+      port::AppendF(&result, "      | %x   Not ASCII printable!\n", c);
+    }
+  }
+  return result;
+}
+
+void CheckNonAsciiIOArrays(const ModelFlags& model_flags) {
+  if (model_flags.allow_nonascii_arrays()) {
+    return;
+  }
+  for (const auto& input_array : model_flags.input_arrays()) {
+    QCHECK(IsAsciiPrintable(input_array.name()))
+        << "Non-ASCII-printable character found in --input_arrays: "
+        << input_array.name()
+        << ". Pass --allow_nonascii_arrays to allow that. "
+        << "Here is a dump of the string:\n\n"
+        << DumpAscii(input_array.name());
+  }
+  for (const string& output_array : model_flags.output_arrays()) {
+    QCHECK(IsAsciiPrintable(output_array))
+        << "Non-ASCII-printable character found in --output_arrays: "
+        << output_array << ". Pass --allow_nonascii_arrays to allow that. "
+        << "Here is a dump of the string:\n\n"
+        << DumpAscii(output_array);
+  }
+}
+
+void CheckNonExistentIOArrays(const Model& model) {
+  if (model.flags.allow_nonexistent_arrays()) {
+    return;
+  }
   for (const auto& input_array : model.flags.input_arrays()) {
     CHECK(model.arrays.count(input_array.name()))
         << "Input array not found: " << input_array.name();
@@ -587,6 +638,19 @@ void CheckNoMissingArray(const Model& model) {
     }
   }
 }
+}  // namespace
+
+void CheckNoMissingArray(const Model& model) {
+  for (const auto& op : model.operators) {
+    for (const auto& input : op->inputs) {
+      CHECK(model.arrays.count(input));
+    }
+    for (const auto& output : op->outputs) {
+      CHECK(model.arrays.count(output));
+    }
+  }
+  CheckNonExistentIOArrays(model);
+}
 
 void FixNoMissingArray(Model* model) {
   for (const auto& op : model->operators) {
@@ -601,14 +665,14 @@ void FixNoMissingArray(Model* model) {
       }
     }
   }
-  for (const string& output_array : model->flags.output_arrays()) {
-    if (!model->arrays.count(output_array)) {
+  if (model->flags.allow_nonexistent_arrays()) {
+    for (const string& output_array : model->flags.output_arrays()) {
       model->GetOrCreateArray(output_array);
     }
-  }
-  for (const auto& rnn_state : model->flags.rnn_states()) {
-    model->GetOrCreateArray(rnn_state.state_array());
-    model->GetOrCreateArray(rnn_state.back_edge_source_array());
+    for (const auto& rnn_state : model->flags.rnn_states()) {
+      model->GetOrCreateArray(rnn_state.state_array());
+      model->GetOrCreateArray(rnn_state.back_edge_source_array());
+    }
   }
 }
 
@@ -818,6 +882,8 @@ void FixOperatorOrdering(Model* model) {
 }
 
 void CheckInvariants(const Model& model) {
+  CheckInputArraysAreNotOutputArrays(model.flags);
+  CheckNonAsciiIOArrays(model.flags);
   CheckNoMissingArray(model);
   CheckNoOrphanedArray(model);
   CheckArrayFieldsConsistent(model);
@@ -1111,6 +1177,10 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
       CHECK(input_array.shape().dims_size());
     }
   }
+
+  model->flags.set_allow_nonascii_arrays(model_flags.allow_nonascii_arrays());
+  model->flags.set_allow_nonexistent_arrays(
+      model_flags.allow_nonexistent_arrays());
 }
 
 void CheckIsReadyForQuantization(const Model& model) {
-- 
GitLab


From ea78050f20f7508537ccd1dee6dd60f0f40829cc Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Wed, 13 Dec 2017 12:11:59 -0800
Subject: [PATCH 0968/1225] Support Shape and ShapeN ops.

PiperOrigin-RevId: 178939498
---
 tensorflow/core/grappler/op_types.cc          |   4 +
 tensorflow/core/grappler/op_types.h           |   2 +
 .../grappler/optimizers/layout_optimizer.cc   | 105 +++++++++++++++---
 .../optimizers/layout_optimizer_test.cc       |  82 ++++++++++++--
 .../python/grappler/layout_optimizer_test.py  |  35 +++++-
 5 files changed, 197 insertions(+), 31 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index ac94c3f81e..75a11a4d36 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -151,6 +151,10 @@ bool IsRestore(const NodeDef& node) {
 
 bool IsSend(const NodeDef& node) { return node.op() == "_Send"; }
 
+bool IsShape(const NodeDef& node) { return node.op() == "Shape"; }
+
+bool IsShapeN(const NodeDef& node) { return node.op() == "ShapeN"; }
+
 bool IsSlice(const NodeDef& node) { return node.op() == "Slice"; }
 
 bool IsSplit(const NodeDef& node) { return node.op() == "Split"; }
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index b8031e011c..6a88dc21e0 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -59,6 +59,8 @@ bool IsReshape(const NodeDef& node);
 bool IsRestore(const NodeDef& node);
 bool IsSend(const NodeDef& node);
 bool IsSlice(const NodeDef& node);
+bool IsShape(const NodeDef& node);
+bool IsShapeN(const NodeDef& node);
 bool IsSplit(const NodeDef& node);
 bool IsSquaredDifference(const NodeDef& node);
 bool IsSqueeze(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index f6af1f0c3e..aabdb670a2 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -37,11 +37,12 @@ namespace grappler {
 namespace {
 
 const char kPrefix[] = "LayoutOptimizer";
-const char kDataFormatOp[] = "LayoutOptimizerDataFormatOp";
 const char kPermNHWCToNCHW[] = "LayoutOptimizerPermConstNHWCToNCHW";
 const char kPermNCHWToNHWC[] = "LayoutOptimizerPermConstNCHWToNHWC";
 const char kTransposeNHWCToNCHW[] = "LayoutOptimizerTransposeNHWCToNCHW";
 const char kTransposeNCHWToNHWC[] = "LayoutOptimizerTransposeNCHWToNHWC";
+const char kVecPermuteNHWCToNCHW[] = "LayoutOptimizerVecPermuteNHWCToNCHW";
+const char kVecPermuteNCHWToNHWC[] = "LayoutOptimizerVecPermuteNCHWToNHWC";
 const char kReshapeNHWCToNCHW[] = "LayoutOptimizerReshapeNHWCToNCHW";
 const char kReshapeConst[] = "LayoutOptimizerReshapeConst";
 const char kReductionConst[] = "LayoutOptimizerReductionConst";
@@ -109,6 +110,8 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "Relu6",
                                           "ReluGrad",
                                           "Rint",
+                                          "Shape",
+                                          "ShapeN",
                                           "Sigmoid",
                                           "SigmoidGrad",
                                           "Sign",
@@ -533,7 +536,7 @@ class NodeProcessor : public GraphProcessor {
     return Status::OK();
   }
 
-  virtual Status AddLayoutTransposeToOutputs() {
+  Status AddTransformToOutputs(const string& op) {
     auto outputs = node_map_->GetOutputs(node_->name());
     string const_name = GetOrAddNodePermNCHWToNHWC();
     int output_count = 0;
@@ -549,15 +552,29 @@ class NodeProcessor : public GraphProcessor {
           connections++;
           if (output_pos.find(input_port) != output_pos.end()) {
             connections_removed++;
-            string added_node_name =
-                strings::StrCat(kTransposeNCHWToNHWC, "-", node_->name(), "-",
-                                output_count, "-", i);
-            TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
-            TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes"));
-            AddNodeTranspose(added_node_name, input, const_name,
-                             node_->attr().at("T").type(),
-                             node_->attr().at("_output_shapes").list().shape(0),
-                             false);
+            string added_node_base_name =
+                strings::StrCat(node_->name(), "-", output_count, "-", i);
+            string added_node_name;
+            if (op == "Transpose") {
+              added_node_name = AddPrefixToNodeName(added_node_base_name,
+                                                    kTransposeNCHWToNHWC, "-");
+              TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
+              TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes"));
+              AddNodeTranspose(
+                  added_node_name, input, const_name,
+                  node_->attr().at("T").type(),
+                  node_->attr().at("_output_shapes").list().shape(0), false);
+            } else if (op == "DataFormatVecPermute") {
+              added_node_name = AddPrefixToNodeName(added_node_base_name,
+                                                    kVecPermuteNCHWToNHWC, "-");
+              TF_RETURN_IF_ERROR(HasAttribute(*node_, "out_type"));
+              DataType dtype = (IsSplit(*node_))
+                                   ? DT_INT32
+                                   : node_->attr().at("out_type").type();
+              AddNodeDataFormatOp(added_node_name, input, op, dtype, false);
+            } else {
+              return errors::InvalidArgument("Unsupported op type: ", op);
+            }
             input = added_node_name;
             node_map_->AddOutput(node_->name(), added_node_name);
             node_map_->AddOutput(added_node_name, output->name());
@@ -572,6 +589,10 @@ class NodeProcessor : public GraphProcessor {
     return Status::OK();
   }
 
+  virtual Status AddLayoutTransposeToOutputs() {
+    return AddTransformToOutputs("Transpose");
+  }
+
   virtual Status CustomizedProcessing() { return Status::OK(); }
 
   NodeDef* AddNodePermNHWCToNCHW(const string& suffix,
@@ -597,7 +618,8 @@ class NodeProcessor : public GraphProcessor {
   }
 
   NodeDef* AddNodeDataFormatOp(const string& name, const string& input_name,
-                               const string& op, DataType dtype) {
+                               const string& op, DataType dtype,
+                               bool nhwc_to_nchw) {
     NodeDef* added_node = graph_->add_node();
     added_node->set_name(name);
     added_node->set_op(op);
@@ -606,10 +628,12 @@ class NodeProcessor : public GraphProcessor {
     AttrValue attr_data_type;
     attr_data_type.set_type(dtype);
     added_node->mutable_attr()->insert({"T", attr_data_type});
+    string src_format = (nhwc_to_nchw) ? "NHWC" : "NCHW";
+    string dst_format = (nhwc_to_nchw) ? "NCHW" : "NHWC";
     AttrValue attr_format;
-    attr_format.set_s("NHWC");
+    attr_format.set_s(src_format);
     added_node->mutable_attr()->insert({"src_format", attr_format});
-    attr_format.set_s("NCHW");
+    attr_format.set_s(dst_format);
     added_node->mutable_attr()->insert({"dst_format", attr_format});
     *added_node->add_input() = input_name;
     return added_node;
@@ -617,10 +641,10 @@ class NodeProcessor : public GraphProcessor {
 
   void AddDataFormatTranformToInput(const string& op, int input_pos,
                                     DataType dtype) {
-    string name =
-        strings::StrCat(kDataFormatOp, "_", node_->name(), "_", input_pos);
+    string name = strings::StrCat(kVecPermuteNHWCToNCHW, "_", node_->name(),
+                                  "_", input_pos);
     auto added_node =
-        AddNodeDataFormatOp(name, node_->input(input_pos), op, dtype);
+        AddNodeDataFormatOp(name, node_->input(input_pos), op, dtype, true);
     *node_->mutable_input(input_pos) = added_node->name();
     node_map_->UpdateOutput(added_node->input(0), node_->name(),
                             added_node->name());
@@ -905,7 +929,6 @@ class AgnosticNodeProcessor : public NodeProcessor {
 
  private:
   std::vector<int> DataInputPos(const NodeDef& node) const {
-    std::vector<int> pos;
     if (IsSplit(node)) {
       return {1};
     }
@@ -916,6 +939,13 @@ class AgnosticNodeProcessor : public NodeProcessor {
         IsSquaredDifference(node) || IsSub(node)) {
       return {0, 1};
     }
+    if (IsShapeN(node)) {
+      std::vector<int> pos;
+      for (int i = 0; i < node.input_size(); i++) {
+        pos.push_back(i);
+      }
+      return pos;
+    }
     if (node.input_size() > 0 && !IsControlInput(node.input(0))) {
       return {0};
     }
@@ -1154,6 +1184,43 @@ class ReluGradProcessor : public AgnosticNodeProcessor {
   }
 };
 
+class ShapeProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit ShapeProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
+
+ protected:
+  bool ShouldProcess() const override {
+    return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
+           IsOnGPU();
+  }
+
+  std::vector<int> GetInputPos() const override {
+    std::vector<int> input_pos;
+    for (int i = 0; i < node_->input_size(); i++) {
+      auto input = node_map_->GetNode(node_->input(i));
+      if (IsDimsFour(*input)) {
+        input_pos.push_back(i);
+      }
+    }
+    return input_pos;
+  }
+
+  std::set<int> GetOutputPos() const override {
+    std::set<int> output_pos{};
+    for (const auto& input_pos : GetInputPos()) {
+      output_pos.insert(input_pos);
+    }
+    return output_pos;
+  }
+
+  Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
+
+  Status CustomizedProcessing() override {
+    return AddTransformToOutputs("DataFormatVecPermute");
+  }
+};
+
 class SliceProcessor : public AgnosticNodeProcessor {
  public:
   explicit SliceProcessor(const OptimizeContext& opt_cxt)
@@ -1376,6 +1443,8 @@ class DataLayoutOptimizer : GraphProcessor {
             node_processor.reset(new ReluGradProcessor(opt_cxt));
           } else if (IsSlice(*node)) {
             node_processor.reset(new SliceProcessor(opt_cxt));
+          } else if (IsShape(*node) || IsShapeN(*node)) {
+            node_processor.reset(new ShapeProcessor(opt_cxt));
           } else if (IsSplit(*node)) {
             node_processor.reset(new SplitProcessor(opt_cxt));
           } else if (IsSqueeze(*node)) {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 05df1bf918..d45f4ae9b9 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -198,9 +198,9 @@ TEST_F(LayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) {
   auto conv2d_backprop_node = node_map.GetNode("Conv2DBackpropInput");
   CHECK(conv2d_backprop_node);
   EXPECT_EQ(conv2d_backprop_node->input(0),
-            "LayoutOptimizerDataFormatOp_Conv2DBackpropInput_0");
-  auto input_sizes_node =
-      node_map.GetNode("LayoutOptimizerDataFormatOp_Conv2DBackpropInput_0");
+            "LayoutOptimizerVecPermuteNHWCToNCHW_Conv2DBackpropInput_0");
+  auto input_sizes_node = node_map.GetNode(
+      "LayoutOptimizerVecPermuteNHWCToNCHW_Conv2DBackpropInput_0");
   CHECK(input_sizes_node);
   EXPECT_EQ(input_sizes_node->input(0), "InputSizesIdentity");
   EXPECT_EQ(input_sizes_node->op(), "DataFormatVecPermute");
@@ -559,9 +559,11 @@ TEST_F(LayoutOptimizerTest, SplitNonConstDim) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerDataFormatOp_split_0");
+  EXPECT_EQ(split_node->input(0),
+            "LayoutOptimizerVecPermuteNHWCToNCHW_split_0");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto map_node = node_map.GetNode("LayoutOptimizerDataFormatOp_split_0");
+  auto map_node =
+      node_map.GetNode("LayoutOptimizerVecPermuteNHWCToNCHW_split_0");
   EXPECT_EQ(map_node->op(), "DataFormatDimMap");
   EXPECT_EQ(map_node->input(0), "i1");
 }
@@ -627,8 +629,10 @@ TEST_F(LayoutOptimizerTest, ConcatNonConst) {
   auto concat_node = node_map.GetNode("concat");
   EXPECT_EQ(concat_node->input(0), "split");
   EXPECT_EQ(concat_node->input(1), "split:1");
-  EXPECT_EQ(concat_node->input(2), "LayoutOptimizerDataFormatOp_concat_2");
-  auto concat_dim = node_map.GetNode("LayoutOptimizerDataFormatOp_concat_2");
+  EXPECT_EQ(concat_node->input(2),
+            "LayoutOptimizerVecPermuteNHWCToNCHW_concat_2");
+  auto concat_dim =
+      node_map.GetNode("LayoutOptimizerVecPermuteNHWCToNCHW_concat_2");
   EXPECT_EQ(concat_dim->op(), "DataFormatDimMap");
   EXPECT_EQ(concat_dim->input(0), "i");
 }
@@ -899,12 +903,14 @@ TEST_F(LayoutOptimizerTest, SliceNonConst) {
   NodeMap node_map(&output);
   auto slice_node = node_map.GetNode("slice");
   EXPECT_EQ(slice_node->input(0), "Conv2D");
-  EXPECT_EQ(slice_node->input(1), "LayoutOptimizerDataFormatOp_slice_1");
-  EXPECT_EQ(slice_node->input(2), "LayoutOptimizerDataFormatOp_slice_2");
-  auto perm1 = node_map.GetNode("LayoutOptimizerDataFormatOp_slice_1");
+  EXPECT_EQ(slice_node->input(1),
+            "LayoutOptimizerVecPermuteNHWCToNCHW_slice_1");
+  EXPECT_EQ(slice_node->input(2),
+            "LayoutOptimizerVecPermuteNHWCToNCHW_slice_2");
+  auto perm1 = node_map.GetNode("LayoutOptimizerVecPermuteNHWCToNCHW_slice_1");
   EXPECT_EQ(perm1->op(), "DataFormatVecPermute");
   EXPECT_EQ(perm1->input(0), "ibegin");
-  auto perm2 = node_map.GetNode("LayoutOptimizerDataFormatOp_slice_2");
+  auto perm2 = node_map.GetNode("LayoutOptimizerVecPermuteNHWCToNCHW_slice_2");
   EXPECT_EQ(perm1->op(), "DataFormatVecPermute");
   EXPECT_EQ(perm2->input(0), "isize");
 }
@@ -923,6 +929,60 @@ TEST_F(LayoutOptimizerTest, DoNotApplyOptimizerTwice) {
   EXPECT_TRUE(errors::IsInvalidArgument(status));
 }
 
+TEST_F(LayoutOptimizerTest, ShapeNWithInputs4DAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto shapen = ops::ShapeN(s.WithOpName("shapen"), {conv, conv});
+  auto add = ops::Add(s.WithOpName("add"), shapen[0], shapen[1]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto shapen_node = node_map.GetNode("shapen");
+  EXPECT_EQ(shapen_node->input(0), "Conv2D");
+  EXPECT_EQ(shapen_node->input(1), "Conv2D");
+  auto add_node = node_map.GetNode("add");
+  EXPECT_EQ(add_node->input(0),
+            "LayoutOptimizerVecPermuteNCHWToNHWC-shapen-0-0");
+  EXPECT_EQ(add_node->input(1),
+            "LayoutOptimizerVecPermuteNCHWToNHWC-shapen-0-1");
+  auto vec_permute1 =
+      node_map.GetNode("LayoutOptimizerVecPermuteNCHWToNHWC-shapen-0-0");
+  EXPECT_EQ(vec_permute1->input(0), "shapen");
+  EXPECT_EQ(vec_permute1->op(), "DataFormatVecPermute");
+  auto vec_permute2 =
+      node_map.GetNode("LayoutOptimizerVecPermuteNCHWToNHWC-shapen-0-1");
+  EXPECT_EQ(vec_permute2->input(0), "shapen:1");
+  EXPECT_EQ(vec_permute2->op(), "DataFormatVecPermute");
+}
+
+TEST_F(LayoutOptimizerTest, ShapeNWithInputsVectorAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto vector = ops::Const(s.WithOpName("vector"), 3.0f, {7});
+  auto shapen = ops::ShapeN(s.WithOpName("shapen"), {vector, conv});
+  auto add = ops::Add(s.WithOpName("add"), shapen[0], shapen[1]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto shapen_node = node_map.GetNode("shapen");
+  EXPECT_EQ(shapen_node->input(0), "vector");
+  EXPECT_EQ(shapen_node->input(1), "Conv2D");
+  auto add_node = node_map.GetNode("add");
+  EXPECT_EQ(add_node->input(0), "shapen");
+  EXPECT_EQ(add_node->input(1),
+            "LayoutOptimizerVecPermuteNCHWToNHWC-shapen-0-1");
+  auto vec_permute =
+      node_map.GetNode("LayoutOptimizerVecPermuteNCHWToNHWC-shapen-0-1");
+  EXPECT_EQ(vec_permute->input(0), "shapen:1");
+  EXPECT_EQ(vec_permute->op(), "DataFormatVecPermute");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 5d8bc12ac4..5d339ec697 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -222,7 +222,7 @@ class LayoutOptimizerTest(test.TestCase):
       self.assertEqual(expected_num_transposes, num_transposes)
       self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
       self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-split-0-0', nodes)
-      self.assertIn('LayoutOptimizerDataFormatOp_split_0', nodes)
+      self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_split_0', nodes)
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
   def testSliceWithNonConstAxis(self):
@@ -258,9 +258,40 @@ class LayoutOptimizerTest(test.TestCase):
       self.assertEqual(expected_num_transposes, num_transposes)
       self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
       self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Slice-0-0', nodes)
-      self.assertIn('LayoutOptimizerDataFormatOp_Slice_2', nodes)
+      self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_Slice_2', nodes)
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
+  def testShapeN(self):
+    if test.is_gpu_available(cuda_only=True):
+      x = array_ops.placeholder(dtype='float32')
+      conv = _two_layer_model(x)
+      shapen = array_ops.shape_n([conv, conv])
+      output = math_ops.add(shapen[0], shapen[1])
+
+      x_val = [1.7] * 784
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={x: x_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                x: x_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if node.name.startswith('LayoutOptimizerTranspose'):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 1
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
+      self.assertIn('LayoutOptimizerVecPermuteNCHWToNHWC-ShapeN-0-0', nodes)
+      self.assertAllEqual(output_val_ref, output_val)
+
   def testLoop(self):
     if test.is_gpu_available(cuda_only=True):
       output = _loop()
-- 
GitLab


From df9189cc4671facfecd3e8249c9e8b01b11c0df5 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Wed, 13 Dec 2017 12:41:38 -0800
Subject: [PATCH 0969/1225] Update location for x86_64 android build (#15346)

See https://github.com/tensorflow/tensorflow/issues/15345
---
 tensorflow/contrib/makefile/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index e2e6c05591..ee84b5b4c8 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -300,7 +300,7 @@ ifeq ($(TARGET),ANDROID)
 	ifeq ($(ANDROID_ARCH),x86_64)
 		TOOLCHAIN := x86_64-4.9
 		SYSROOT_ARCH := x86_64
-		BIN_PREFIX := x86-64-linux-android
+		BIN_PREFIX := x86_64-linux-android
 		MARCH_OPTION :=
 	endif
     
-- 
GitLab


From 2971a85370358789fa07a075be5e265e20dd8c34 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Wed, 13 Dec 2017 13:10:04 -0800
Subject: [PATCH 0970/1225] Prototype for serializing the Checkpointable graph.

Includes protocol buffers which will likely be added to BundleHeaderProto. We'll pass this protocol buffer plus the standard map of variable names to variable objects to the Saver, both of which will then be stored along with the checkpoint. This extra information means that the path we stored to a variable in its checkpoint name can be removed as long as there is another in the checkpointed graph.

PiperOrigin-RevId: 178946669
---
 tensorflow/BUILD                              |   1 +
 tensorflow/contrib/eager/proto/BUILD          |  24 ++
 .../proto/checkpointable_object_graph.proto   |  56 ++++
 tensorflow/contrib/eager/python/BUILD         |   2 +
 .../contrib/eager/python/checkpointable.py    | 317 +++++++++++-------
 .../eager/python/checkpointable_test.py       | 111 ++++--
 6 files changed, 378 insertions(+), 133 deletions(-)
 create mode 100644 tensorflow/contrib/eager/proto/BUILD
 create mode 100644 tensorflow/contrib/eager/proto/checkpointable_object_graph.proto

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 5a12fd17ed..5167ebe473 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -446,6 +446,7 @@ filegroup(
         "//tensorflow/contrib/data/python/ops:all_files",
         "//tensorflow/contrib/decision_trees/proto:all_files",
         "//tensorflow/contrib/distributions:all_files",
+        "//tensorflow/contrib/eager/proto:all_files",
         "//tensorflow/contrib/eager/python:all_files",
         "//tensorflow/contrib/estimator:all_files",
         "//tensorflow/contrib/factorization:all_files",
diff --git a/tensorflow/contrib/eager/proto/BUILD b/tensorflow/contrib/eager/proto/BUILD
new file mode 100644
index 0000000000..aedfec8924
--- /dev/null
+++ b/tensorflow/contrib/eager/proto/BUILD
@@ -0,0 +1,24 @@
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library")
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+tf_proto_library(
+    name = "checkpointable_object_graph_proto",
+    srcs = [
+        "checkpointable_object_graph.proto",
+    ],
+    visibility = ["//tensorflow/contrib/eager/python:__subpackages__"],
+)
diff --git a/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto b/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto
new file mode 100644
index 0000000000..c962638aa1
--- /dev/null
+++ b/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto
@@ -0,0 +1,56 @@
+syntax = "proto3";
+
+option cc_enable_arenas = true;
+
+package tensorflow.contrib.eager;
+
+// Prototype for an addition to BundleHeaderProto which saves extra information
+// about the objects which own variables, allowing for more robust checkpoint
+// loading into modified programs.
+
+message CheckpointableObjectGraph {
+  message Object {
+    message ObjectReference {
+      // An index into `CheckpointableObjectGraph.nodes`, indicating the object
+      // being referenced.
+      int32 node_id = 1;
+      // A numeric identifier for this object within its parent.
+      int32 local_uid = 2;
+      // A user-provided name for the edge. May be blank/omitted, in which case
+      // there is no explicitly provided local name; fall back on local_uid.
+      string local_name = 3;
+    }
+
+    message VariableReference {
+      // A name for the variable which is unique within the object which owns
+      // it. Does not include a name_scope or variable_scope prefix.
+      string local_name = 1;
+      // The full name of the variable. Used to allow name-based loading of
+      // checkpoints which were saved using an object-based API.
+      string full_name = 2;
+    }
+
+    message SlotVariableReference {
+      // An index into `CheckpointableObjectGraph.nodes`, indicating the object
+      // which created the variable that this variable is slotting for.
+      int32 original_variable_node_id = 1;
+      // The local name of the variable being slotted for within the object that
+      // owns it.
+      string original_variable_local_name = 2;
+      // The name of the slot (e.g. "m"/"v").
+      string slot_name = 3;
+      // The full name of the slot variable. Used to allow name-based loading of
+      // checkpoints which were saved using an object-based API.
+      string full_name = 4;
+    }
+
+    // Objects which this object depends on.
+    repeated ObjectReference children = 1;
+    // Non-slot variables owned by this object.
+    repeated VariableReference variables = 2;
+    // Slot variables owned by this object.
+    repeated SlotVariableReference slot_variables = 3;
+  }
+
+  repeated Object nodes = 1;
+}
diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index 21efc107b9..086315464c 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -223,6 +223,7 @@ py_library(
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:internal"],
     deps = [
+        "//tensorflow/contrib/eager/proto:checkpointable_object_graph_proto_py",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
     ],
@@ -236,6 +237,7 @@ py_test(
         ":checkpointable",
         ":network",
         "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:layers",
diff --git a/tensorflow/contrib/eager/python/checkpointable.py b/tensorflow/contrib/eager/python/checkpointable.py
index bc868a47bf..b141ffb2bc 100644
--- a/tensorflow/contrib/eager/python/checkpointable.py
+++ b/tensorflow/contrib/eager/python/checkpointable.py
@@ -20,35 +20,37 @@ from __future__ import print_function
 import collections
 import re
 
+from tensorflow.contrib.eager.proto import checkpointable_object_graph_pb2
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.training import optimizer
-
-
-_CheckpointableReference = collections.namedtuple("_CheckpointableReference", [
-    "name",  # The local name if explicitly specified, else None.
-    "local_uid",  # 0 for the first dependency, 1 for the next, ... Used for
-                  # routing checkpointed variables to their correct
-                  # Checkpointables when "name" is not set (see docstring of
-                  # `track_checkpointable`).
-    "ref"  # The Checkpointable object being referenced.
-])
-
-
-_OwnedVariable = collections.namedtuple("_OwnedVariable", [
-    "name",  # The variable's (local) name.
-    "variable"  # The owned variable object.
-])
-
+from tensorflow.python.training import optimizer as optimizer_lib
+from tensorflow.python.training import saver as saver_lib
+
+_CheckpointableReference = collections.namedtuple(
+    "_CheckpointableReference",
+    [
+        "name",  # The local name if explicitly specified, else None.
+        "local_uid",  # 0 for the first dependency, 1 for the next, ... Used for
+        # routing checkpointed variables to their correct
+        # Checkpointables when "name" is not set (see docstring of
+        # `track_checkpointable`).
+        "ref"  # The Checkpointable object being referenced.
+    ])
+
+_OwnedVariable = collections.namedtuple(
+    "_OwnedVariable",
+    [
+        "name",  # The variable's (local) name.
+        "variable"  # The owned variable object.
+    ])
 
 # Validation regular expression for the local names of Checkpointable
-# objects. In particular, disallows "/" in names, and ensures that the
-# checkpoint names of variables are valid Operation names .
+# objects. In particular, disallows "/" in names, and reserves
+# underscore-prefixed names.
 _VALID_LOCAL_NAME = re.compile(r"^[A-Za-z0-9.][A-Za-z0-9_.-]*$")
 
-
-# Keyword for identifying that the next bit of a checkpoint variable
-# name is a slot name. May not be the local name of a checkpointable. Checkpoint
-# names for slot variables look like:
+# Keyword for identifying that the next bit of a checkpoint variable name is a
+# slot name. May not be the local name of a checkpointable. Checkpoint names for
+# slot variables look like:
 #
 #   <path to variable>/<_OPTIMIZER_SLOTS_NAME>/<path to optimizer>/<slot name>
 #
@@ -72,7 +74,7 @@ class Checkpointable(object):
     # TODO(allenl): Switch these to OrderedDict once TensorFlow supports only
     # Python 3.6+.
     self._checkpoint_dependencies = []  # A list of _CheckpointableReference
-                                        # objects.
+    # objects.
     self._dependency_names = set()
     self._owned_variables = []  # A list of _OwnedVariable objects.
     self._owned_variable_names = set()
@@ -104,8 +106,7 @@ class Checkpointable(object):
           ("A variable named '%s' already exists in this Checkpointable, but "
            "Checkpointable.add_variable called to create another with "
            "that name. Variable names must be unique within a Checkpointable "
-           "object.")
-          % (name,))
+           "object.") % (name,))
     if "getter" in kwargs:
       # Allow the getter to be overridden, typically because there is a need for
       # compatibility with some other variable creation mechanism. This should
@@ -153,9 +154,8 @@ class Checkpointable(object):
       ValueError: For invalid names.
     """
     if not hasattr(self, "_checkpoint_dependencies"):
-      raise RuntimeError(
-          "Need to call Checkpointable.__init__ before calling "
-          "Checkpointable.track_checkpointable().")
+      raise RuntimeError("Need to call Checkpointable.__init__ before calling "
+                         "Checkpointable.track_checkpointable().")
     if not isinstance(checkpointable, Checkpointable):
       raise TypeError(
           ("Checkpointable.track_checkpointable() passed type %s, not a "
@@ -164,21 +164,22 @@ class Checkpointable(object):
       if not _VALID_LOCAL_NAME.match(name):
         raise ValueError(
             ("Checkpointable names must match the regular expression '%s', but "
-             "got an invalid name '%s' instead.")
-            % (_VALID_LOCAL_NAME.pattern, name))
+             "got an invalid name '%s' instead.") % (_VALID_LOCAL_NAME.pattern,
+                                                     name))
       if name in self._dependency_names:
         raise ValueError(
             ("Called Checkpointable.track_checkpointable() with name='%s', but "
              "a Checkpointable with this name is already declared as a "
-             "dependency. If provided, names must be unique.")
-            % (name,))
+             "dependency. If provided, names must be unique.") % (name,))
       self._dependency_names.add(name)
-    self._checkpoint_dependencies.append(_CheckpointableReference(
-        name=name,
-        ref=checkpointable,
-        # TODO(allenl): Should this be exposed to allow users to stop depending
-        # on things and still load checkpoints when not using names?
-        local_uid=len(self._checkpoint_dependencies)))
+    self._checkpoint_dependencies.append(
+        _CheckpointableReference(
+            name=name,
+            ref=checkpointable,
+            # TODO(allenl): Should this be exposed to allow users to stop
+            # depending on things and still load checkpoints when not using
+            # names?
+            local_uid=len(self._checkpoint_dependencies)))
     return checkpointable
 
   @property
@@ -206,39 +207,23 @@ def _breadth_first_checkpointable_traversal(root_checkpointable):
   return bfs_sorted, path_to_root
 
 
-# TODO(allenl): Save the Checkpointable graph with the checkpoint so that a
-# redundant path to a Checkpointable can be removed from the Python program
-# without breaking the checkpoint (e.g. a graph with root -> b -> c and root ->
-# d -> c, edge "root -> b" gets removed and we should be able to still load
-# variables into c since it's referenced through d, even if our names were
-# "b/c/variable_name").
-#
-# TODO(allenl): Convenience utility for saving multiple objects (i.e. construct
-# a root Checkpointable if passed a list of Checkpointables).
-def _name_variables(root_checkpointable):
-  """Determine checkpoint keys for variables.
+def _object_prefix_from_path(path_to_root):
+  return "/".join((checkpointable.name if checkpointable.name else "_%d" % (
+      checkpointable.local_uid,)) for checkpointable in path_to_root)
 
-  Non-slot variables are keyed based on a shortest path from the root saveable
-  to the object which owns the variable (i.e. the one which called
-  `Checkpointable.add_variable` to create it).
 
-  Slot variables are keyed based on a shortest path to the variable being
-  slotted for, a shortest path to their optimizer, and the slot name.
+def _escape_variable_name(variable_name):
+  # We need to support slashes in variable names for compatibility, since this
+  # naming scheme is being patched in to things like Layer.add_variable where
+  # slashes were previously accepted. We also want to use slashes to indicate
+  # edges traversed to reach the variable, so we escape forward slashes in
+  # variable names.
+  return variable_name.replace("_S_", "_S_.").replace(r"/", r"_S__")
 
-  Args:
-    root_checkpointable: A `Checkpointable` object whose variables (including
-      the variables of dependencies, recursively) should be saved.
-
-  Returns:
-    A dictionary mapping names to variable objects.
-
-  Raises:
-    ValueError: If there are invalid characters in an optimizer's slot names.
-  """
-  bfs_sorted, path_to_root = _breadth_first_checkpointable_traversal(
-      root_checkpointable)
 
-  # Gather non-slot variables, name them:
+def _variable_naming_for_object(path_to_root):
+  """Make a function for naming variables in an object."""
+  # Name non-slot variables:
   #
   #   <path to node>/<local variable name>
   #
@@ -247,57 +232,161 @@ def _name_variables(root_checkpointable):
   # no longer exists because of a change in the Python program, we can look up
   # the `Checkpointable` which owns the variable in the checkpoint's graph and
   # use another path if one still exists.
-  named_variables = {}
 
-  def _name_from_path(path):
-    return "/".join(checkpointable.name or "%d" % (checkpointable.local_uid,)
-                    for checkpointable in path)
+  object_prefix = _object_prefix_from_path(path_to_root)
+  if object_prefix:
+    object_prefix += "/"
+
+  def _name_single_variable(owned_variable):
+    """Names a variable within an object."""
+    return object_prefix + _escape_variable_name(owned_variable.name)
+
+  return _name_single_variable
 
-  for checkpointable in bfs_sorted:
-    human_readable_prefix = _name_from_path(path_to_root[checkpointable])
-    for owned_variable in checkpointable.ref._owned_variables:  # pylint: disable=protected-access
-      # TODO(allenl): Escape names/with/slashes. We need to accept them for
-      # variables at least to maintain compatibility with
-      # e.g. Layer.add_variable, but need to escape them before writing
-      # checkpoints if we want the human readable names to be parsable. Also
-      # need to escape local Checkpointable names which look like they're
-      # positional (name="1").
-      if human_readable_prefix:
-        variable_name = human_readable_prefix + "/" + owned_variable.name
-      else:
-        variable_name = owned_variable.name
-      named_variables[variable_name] = owned_variable.variable
 
-  # Gather slot variables, name them:
+def _slot_variable_naming_for_optimizer(optimizer, path_to_root):
+  """Make a function for naming slot variables in an optimizer."""
+  # Name slot variables:
   #
   #   <variable name>/<_OPTIMIZER_SLOTS_NAME>/<optimizer path>/<slot name>
   #
-  # where <variable name> is exactly the name used for the original variable
-  # above, including the path from the checkpoint root and the local name in the
-  # object which owns it. Note that we only save slot variables if the variable
-  # it's slotting for is also being saved.
-  non_slot_variables = list(named_variables.items())
-  for checkpointable_ref in bfs_sorted:
-    if isinstance(checkpointable_ref.ref, optimizer.Optimizer):
+  # where <variable name> is exactly the checkpoint name used for the original
+  # variable, including the path from the checkpoint root and the local name in
+  # the object which owns it. Note that we only save slot variables if the
+  # variable it's slotting for is also being saved.
+
+  optimizer_identifier = "/%s/%s/" % (_OPTIMIZER_SLOTS_NAME,
+                                      _object_prefix_from_path(path_to_root))
+
+  def _name_slot_variable(variable_path, slot_name):
+    """With an optimizer specified, name a slot variable."""
+
+    if not _VALID_LOCAL_NAME.match(slot_name):
+      # Slot variable names include the name of the slot. We need to
+      # validate that part of the name to be sure that the checkpoint name
+      # is a valid name scope name.
+      raise ValueError(
+          ("Could not save slot variables for optimizer %s, because its "
+           "slot name has invalid characters (got '%s', was expecting it "
+           "to match the regular expression '%s').") %
+          (optimizer, slot_name, _VALID_LOCAL_NAME.pattern))
+
+    return variable_path + optimizer_identifier + slot_name
+
+  return _name_slot_variable
+
+
+def _serialize_non_slot_variables(checkpointable_objects, path_to_root,
+                                  object_graph_proto):
+  """Name non-slot variables and add them to `object_graph_proto`."""
+  named_variables = {}
+  non_slot_variables = []
+  checkpoint_node_ids = {}
+
+  for checkpoint_id, checkpointable in enumerate(checkpointable_objects):
+    checkpoint_node_ids[checkpointable] = checkpoint_id
+
+  for checkpoint_id, checkpointable in enumerate(checkpointable_objects):
+    naming_scheme = _variable_naming_for_object(path_to_root[checkpointable])
+    object_proto = object_graph_proto.nodes.add()
+    for owned_variable in checkpointable.ref._owned_variables:  # pylint: disable=protected-access
+      variable_name = naming_scheme(owned_variable)
+      named_variables[variable_name] = owned_variable.variable
+      non_slot_variables.append((
+          variable_name,  # The variable's full checkpoint name
+          owned_variable,  # The variable's _OwnedVariable object
+          checkpoint_id))  # The checkpoint ID of the node which owns this
+      # variable.
+      variable_proto = object_proto.variables.add()
+      variable_proto.local_name = owned_variable.name
+      # Figure out the name-based Saver's name for this variable.
+      saver_dict = saver_lib.BaseSaverBuilder.OpListToDict(
+          [owned_variable.variable], convert_variable_to_tensor=False)
+      variable_full_name, = saver_dict.keys()
+      variable_proto.full_name = variable_full_name
+
+    for child in checkpointable.ref.checkpoint_dependencies:
+      child_proto = object_proto.children.add()
+      child_proto.node_id = checkpoint_node_ids[child]
+      child_proto.local_uid = child.local_uid
+      if child.name is not None:
+        child_proto.local_name = child.name
+  return named_variables, non_slot_variables
+
+
+def _serialize_slot_variables(checkpointable_objects, path_to_root,
+                              non_slot_variables, object_graph_proto):
+  """Name slot variables and add them to `object_graph_proto`."""
+  named_slot_variables = {}
+  for optimizer_checkpoint_id, checkpointable_ref in enumerate(
+      checkpointable_objects):
+    if isinstance(checkpointable_ref.ref, optimizer_lib.Optimizer):
+      optimizer_object_proto = object_graph_proto.nodes[optimizer_checkpoint_id]
+      naming_scheme = _slot_variable_naming_for_optimizer(
+          optimizer=checkpointable_ref.ref,
+          path_to_root=path_to_root[checkpointable_ref])
       slot_names = checkpointable_ref.ref.get_slot_names()
-      for slot_name in slot_names:
-        if not _VALID_LOCAL_NAME.match(slot_name):
-          # Slot variable names include the name of the slot. We need to
-          # validate that part of the name to be sure that the checkpoint name
-          # is a valid name scope name.
-          raise ValueError(
-              ("Could not save slot variables for optimizer %s, because its "
-               "slot name has invalid characters (got '%s', was expecting it "
-               "to match the regular expression '%s').")
-              % (checkpointable_ref.ref, slot_name,
-                 _VALID_LOCAL_NAME.pattern))
-        suffix = "/".join((
-            _OPTIMIZER_SLOTS_NAME,
-            _name_from_path(path_to_root[checkpointable_ref]),
-            slot_name))
-        for variable_name, variable in non_slot_variables:
+      for (variable_path, owned_variable,
+           original_node_checkpoint_id) in non_slot_variables:
+        for slot_name in slot_names:
           slot_variable = checkpointable_ref.ref.get_slot(
-              variable, slot_name)
+              owned_variable.variable, slot_name)
           if slot_variable is not None:
-            named_variables[variable_name + "/" + suffix] = slot_variable
-  return named_variables
+            checkpoint_name = naming_scheme(
+                variable_path=variable_path, slot_name=slot_name)
+            named_slot_variables[checkpoint_name] = slot_variable
+            slot_variable_proto = optimizer_object_proto.slot_variables.add()
+            slot_variable_proto.slot_name = slot_name
+            # Figure out the name-based Saver's name for this variable.
+            saver_dict = saver_lib.BaseSaverBuilder.OpListToDict(
+                [slot_variable], convert_variable_to_tensor=False)
+            slot_variable_full_name, = saver_dict.keys()
+            slot_variable_proto.full_name = slot_variable_full_name
+            slot_variable_proto.original_variable_local_name = (
+                owned_variable.name)
+            slot_variable_proto.original_variable_node_id = (
+                original_node_checkpoint_id)
+  return named_slot_variables
+
+
+# TODO(allenl): Convenience utility for saving multiple objects (i.e. construct
+# a root Checkpointable if passed a list of Checkpointables).
+def _serialize_object_graph(root_checkpointable):
+  """Determine checkpoint keys for variables and build a serialized graph.
+
+  Non-slot variables are keyed based on a shortest path from the root saveable
+  to the object which owns the variable (i.e. the one which called
+  `Checkpointable.add_variable` to create it).
+
+  Slot variables are keyed based on a shortest path to the variable being
+  slotted for, a shortest path to their optimizer, and the slot name.
+
+  Args:
+    root_checkpointable: A `Checkpointable` object whose variables (including
+      the variables of dependencies, recursively) should be saved.
+
+  Returns:
+    A tuple of (named_variables, object_graph_proto):
+      named_variables: A dictionary mapping names to variable objects.
+      object_graph_proto: A CheckpointableObjectGraph protocol buffer containing
+        the serialized object graph and variable references.
+
+  Raises:
+    ValueError: If there are invalid characters in an optimizer's slot names.
+  """
+  checkpointable_objects, path_to_root = (
+      _breadth_first_checkpointable_traversal(root_checkpointable))
+  object_graph_proto = (
+      checkpointable_object_graph_pb2.CheckpointableObjectGraph())
+
+  # Gather non-slot variables.
+  named_variables, non_slot_variables = _serialize_non_slot_variables(
+      checkpointable_objects, path_to_root, object_graph_proto)
+
+  # Gather slot variables which are associated with variables gathered above.
+  named_slot_variables = _serialize_slot_variables(
+      checkpointable_objects, path_to_root, non_slot_variables,
+      object_graph_proto)
+
+  named_variables.update(named_slot_variables)
+  return named_variables, object_graph_proto
diff --git a/tensorflow/contrib/eager/python/checkpointable_test.py b/tensorflow/contrib/eager/python/checkpointable_test.py
index 5427b35c0b..f820990bbe 100644
--- a/tensorflow/contrib/eager/python/checkpointable_test.py
+++ b/tensorflow/contrib/eager/python/checkpointable_test.py
@@ -24,6 +24,7 @@ from tensorflow.contrib.eager.python import network as network_lib
 from tensorflow.python.eager import context
 from tensorflow.python.eager import test
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.layers import core
@@ -44,8 +45,11 @@ class CheckpointableDenseLayer(core.Dense, checkpointable.Checkpointable):
     # Layer.add_variable should inherit from Checkpointable and simply call
     # super and then do post-processing.
     return checkpointable.Checkpointable.add_variable(
-        self, name=name, shape=shape,
-        getter=functools.partial(core.Dense.add_variable, self), **kwargs)
+        self,
+        name=name,
+        shape=shape,
+        getter=functools.partial(core.Dense.add_variable, self),
+        **kwargs)
 
 
 # pylint: disable=not-callable
@@ -82,15 +86,21 @@ class CheckpointableAdam(adam.AdamOptimizer, checkpointable.Checkpointable):
 
     if create_new:
       with ops.colocate_with(first_var):
+
         def _variable_getter(name, shape, dtype, initializer):
           del shape, dtype  # not used, but there for compatibility
           return variable_scope.variable(
               name=name, initial_value=initializer, trainable=False)
+
         self._beta1_power = self.add_variable(
-            name="beta1_power", shape=[], initializer=self._beta1,
+            name="beta1_power",
+            shape=[],
+            initializer=self._beta1,
             getter=_variable_getter)
         self._beta2_power = self.add_variable(
-            name="beta2_power", shape=[], initializer=self._beta2,
+            name="beta2_power",
+            shape=[],
+            initializer=self._beta2,
             getter=_variable_getter)
     # Create slots for the first and second moments.
     for v in var_list:
@@ -109,8 +119,7 @@ class MyNetwork(CheckpointableNetwork):
   def __init__(self):
     super(MyNetwork, self).__init__()
     self._named = self.track_layer(
-        CheckpointableDenseLayer(1, use_bias=True),
-        name="named_dense")
+        CheckpointableDenseLayer(1, use_bias=True), name="named_dense")
     self._unnamed = self.track_layer(
         CheckpointableDenseLayer(1, use_bias=False))
 
@@ -134,6 +143,7 @@ class Root(checkpointable.Checkpointable):
       # self.add_variable, by setting a custom getter.
       def _owned_variable_as_custom_getter(getter, *args, **kwargs):
         return self.add_variable(*args, getter=getter, **kwargs)
+
       with variable_scope.variable_scope(
           "", custom_getter=_owned_variable_as_custom_getter):
         self._global_step = training_util.create_global_step()
@@ -152,26 +162,28 @@ class CheckpointNamingTests(test.TestCase):
     optimizer = CheckpointableAdam(0.001)
     root_checkpointable = Root(optimizer=optimizer, network=network)
     if context.in_eager_mode():
-      optimizer.minimize(lambda: network(input_value),
-                         global_step=root_checkpointable.global_step)
-      optimizer.minimize(lambda: other_network(input_value),
-                         global_step=root_checkpointable.global_step)
+      optimizer.minimize(
+          lambda: network(input_value),
+          global_step=root_checkpointable.global_step)
+      optimizer.minimize(
+          lambda: other_network(input_value),
+          global_step=root_checkpointable.global_step)
     else:
       train_op = optimizer.minimize(
-          network(input_value),
-          global_step=root_checkpointable.global_step)
+          network(input_value), global_step=root_checkpointable.global_step)
       optimizer.minimize(
           other_network(input_value),
           global_step=root_checkpointable.global_step)
       self.evaluate(variables.global_variables_initializer())
       self.evaluate(train_op)
-    named_variables = checkpointable._name_variables(root_checkpointable)
+    named_variables, serialized_graph = checkpointable._serialize_object_graph(
+        root_checkpointable)
     expected_checkpoint_names = (
         # Created in the root node, so no prefix.
         "global_step",
         # No name provided to track_checkpointable(), so the position (1, after
         # the named track_checkpointable() which is 0) is used instead.
-        "network/1/kernel",
+        "network/_1/kernel",
         # track_checkpointable() with a name provided, so that's used
         "network/named_dense/kernel",
         "network/named_dense/bias",
@@ -179,25 +191,86 @@ class CheckpointNamingTests(test.TestCase):
         "optimizer/beta1_power",
         "optimizer/beta2_power",
         # Slot variables
-        "network/1/kernel/_OPTIMIZER_SLOT/optimizer/m",
-        "network/1/kernel/_OPTIMIZER_SLOT/optimizer/v",
+        "network/_1/kernel/_OPTIMIZER_SLOT/optimizer/m",
+        "network/_1/kernel/_OPTIMIZER_SLOT/optimizer/v",
         "network/named_dense/kernel/_OPTIMIZER_SLOT/optimizer/m",
         "network/named_dense/kernel/_OPTIMIZER_SLOT/optimizer/v",
         "network/named_dense/bias/_OPTIMIZER_SLOT/optimizer/m",
         "network/named_dense/bias/_OPTIMIZER_SLOT/optimizer/v",
     )
-    six.assertCountEqual(
-        self, expected_checkpoint_names, named_variables.keys())
+    six.assertCountEqual(self, expected_checkpoint_names,
+                         named_variables.keys())
     # Check that we've mapped to the right variable objects (not exhaustive)
     self.assertEqual("global_step:0", named_variables["global_step"].name)
     self.assertEqual("my_network/checkpointable_dense_layer_1/kernel:0",
-                     named_variables["network/1/kernel"].name)
+                     named_variables["network/_1/kernel"].name)
     self.assertEqual("my_network/checkpointable_dense_layer/kernel:0",
                      named_variables["network/named_dense/kernel"].name)
     self.assertEqual("beta1_power:0",
                      named_variables["optimizer/beta1_power"].name)
     self.assertEqual("beta2_power:0",
                      named_variables["optimizer/beta2_power"].name)
+    # Spot check the generated protocol buffers.
+    self.assertEqual(0, serialized_graph.nodes[0].children[0].local_uid)
+    self.assertEqual("optimizer",
+                     serialized_graph.nodes[0].children[0].local_name)
+    optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
+        0].node_id]
+    self.assertEqual("beta1_power", optimizer_node.variables[0].local_name)
+    self.assertEqual("beta1_power", optimizer_node.variables[0].full_name)
+    self.assertEqual(
+        "kernel", optimizer_node.slot_variables[0].original_variable_local_name)
+    original_variable_owner = serialized_graph.nodes[
+        optimizer_node.slot_variables[0].original_variable_node_id]
+    self.assertEqual("kernel", original_variable_owner.variables[0].local_name)
+    self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
+    # We strip off the :0 suffix, as variable.name-based saving does.
+    self.assertEqual("my_network/checkpointable_dense_layer/kernel/Adam",
+                     optimizer_node.slot_variables[0].full_name)
+    self.assertEqual("my_network/checkpointable_dense_layer/kernel/Adam:0",
+                     optimizer.get_slot(
+                         var=named_variables["network/named_dense/kernel"],
+                         name="m").name)
+
+  def _get_checkpoint_name(self, name):
+    root = checkpointable.Checkpointable()
+    with variable_scope.variable_scope("get_checkpoint_name"):
+      # Create the variable in a variable scope so that we get more relaxed
+      # naming rules (variables outside a scope may not start with "_", "/" or
+      # "-"). Since we don't use the scope part of the name, these cases are
+      # somewhat annoying.
+      root.add_variable(name=name, shape=[1, 2], dtype=dtypes.float64)
+    named_variables, _ = checkpointable._serialize_object_graph(root)
+    checkpoint_name, = named_variables.keys()
+    with ops.name_scope("root/" + checkpoint_name):
+      pass  # Make sure we can use this as an op name if we prefix it.
+    return checkpoint_name
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testVariableNameEscaping(self):
+    self.assertEqual(r"a_S__b_S__c", self._get_checkpoint_name(r"a/b/c"))
+    self.assertEqual(r"", self._get_checkpoint_name(r""))
+    self.assertEqual(r"_S__", self._get_checkpoint_name(r"/"))
+    self.assertEqual(r"_S___S_._", self._get_checkpoint_name(r"/_S__"))
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testNumberedPath(self):
+    root = checkpointable.Checkpointable()
+    leaf = checkpointable.Checkpointable()
+    root.track_checkpointable(leaf)
+    leaf.add_variable(name="v", shape=[])
+    named_variables, _ = checkpointable._serialize_object_graph(root)
+    variable_name, = named_variables.keys()
+    self.assertEqual(r"_0/v", variable_name)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testLocalNameValidation(self):
+    root = checkpointable.Checkpointable()
+    leaf = checkpointable.Checkpointable()
+    with self.assertRaisesRegexp(ValueError, "invalid name"):
+      # Leading underscores are reserved, which avoids conflicts with
+      # un-named edges in paths and the optimizer slots identifier.
+      root.track_checkpointable(leaf, name="_12")
 
 
 if __name__ == "__main__":
-- 
GitLab


From 3d74c715a2ed66ced2bd87c90befb9e70ba8a3d1 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Wed, 13 Dec 2017 13:21:49 -0800
Subject: [PATCH 0971/1225] Generate a unique name for const used by transpose
 node, so that we don't use a name that is previously used already. Add
 dependency for const in binary op if in a loop/frame.

PiperOrigin-RevId: 178948174
---
 .../grappler/optimizers/layout_optimizer.cc   |  31 +++--
 .../optimizers/layout_optimizer_test.cc       |   8 +-
 .../python/grappler/layout_optimizer_test.py  | 111 ++++++++++++++++++
 3 files changed, 138 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index aabdb670a2..40c5cbe3b8 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -598,8 +598,8 @@ class NodeProcessor : public GraphProcessor {
   NodeDef* AddNodePermNHWCToNCHW(const string& suffix,
                                  const string& depended_node,
                                  const string& device) {
-    auto const_node = AddNodePermConst(
-        strings::StrCat(kPermNHWCToNCHW, "-", suffix), device, {0, 3, 1, 2});
+    string name = strings::StrCat(kPermNHWCToNCHW, "-", suffix);
+    auto const_node = AddNodePermConst(name, device, {0, 3, 1, 2});
     // This is to ensure the transpose node and the const node are in the
     // same frame.
     *const_node->add_input() = AsControlDependency(depended_node);
@@ -658,8 +658,17 @@ class NodeProcessor : public GraphProcessor {
   string GetOrAddNodePermNHWCToNCHW(int pos) {
     string const_name;
     if (is_in_frame_) {
-      auto const_node = AddNodePermNHWCToNCHW(
-          node_->input(pos), NodeName(node_->input(pos)), node_->device());
+      string suffix = strings::StrCat(node_->name(), "_", pos);
+      string input = NodeName(node_->input(pos));
+      string depended_node;
+      if (!IsNodeNCHWToNHWC(input)) {
+        depended_node = input;
+      } else {
+        auto input_node = node_map_->GetNode(input);
+        depended_node = NodeName(input_node->input(0));
+      }
+      auto const_node =
+          AddNodePermNHWCToNCHW(suffix, depended_node, node_->device());
       const_name = const_node->name();
     } else {
       const_name = kPermNHWCToNCHW;
@@ -1008,7 +1017,8 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
     return false;
   }
 
-  NodeDef* AddNodeShapeConst(const string& name, int num_channels) {
+  NodeDef* AddNodeShapeConst(const string& name, int num_channels,
+                             const string& depended_node) {
     NodeDef* node = graph_->add_node();
     node_map_->AddNode(name, node);
     node->set_name(name);
@@ -1026,6 +1036,11 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
     }
     tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
     node->mutable_attr()->insert({"value", attr_tensor});
+    if (is_in_frame_) {
+      // This is to ensure the transpose node and the const node are in the
+      // same frame.
+      *node->add_input() = AsControlDependency(depended_node);
+    }
     return node;
   }
 
@@ -1058,8 +1073,7 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
       vector_index = 0;
     }
     if (vector_index != -1) {
-      string base_name =
-          strings::StrCat(node_->name(), "-", node_->input(vector_index));
+      string base_name = strings::StrCat(node_->name(), "-", vector_index);
       string reshape_node_name =
           AddPrefixToNodeName(base_name, kReshapeNHWCToNCHW, "-");
       string shape_const_node_name =
@@ -1068,7 +1082,8 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
       TF_RETURN_IF_ERROR(HasAttribute(*input_node, "_output_shapes"));
       int vector_size =
           input_node->attr().at("_output_shapes").list().shape(0).dim(0).size();
-      AddNodeShapeConst(shape_const_node_name, vector_size);
+      AddNodeShapeConst(shape_const_node_name, vector_size,
+                        NodeName(node_->input(vector_index)));
       TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
       AddNodeReshape(reshape_node_name, node_->input(vector_index),
                      shape_const_node_name, node_->attr().at("T").type());
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index d45f4ae9b9..b1d841a99b 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -817,8 +817,8 @@ TEST_F(LayoutOptimizerTest, Mul4DAndVector) {
   NodeMap node_map(&output);
   auto mul_node = node_map.GetNode("mul");
   EXPECT_EQ(mul_node->input(0), "Conv2D");
-  EXPECT_EQ(mul_node->input(1), "LayoutOptimizerReshapeNHWCToNCHW-mul-vector");
-  auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-vector");
+  EXPECT_EQ(mul_node->input(1), "LayoutOptimizerReshapeNHWCToNCHW-mul-1");
+  auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-1");
   Tensor tensor;
   EXPECT_TRUE(
       tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor()));
@@ -840,9 +840,9 @@ TEST_F(LayoutOptimizerTest, MulVectorAnd4D) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto mul_node = node_map.GetNode("mul");
-  EXPECT_EQ(mul_node->input(0), "LayoutOptimizerReshapeNHWCToNCHW-mul-vector");
+  EXPECT_EQ(mul_node->input(0), "LayoutOptimizerReshapeNHWCToNCHW-mul-0");
   EXPECT_EQ(mul_node->input(1), "Conv2D");
-  auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-vector");
+  auto mul_const = node_map.GetNode("LayoutOptimizerReshapeConst-mul-0");
   Tensor tensor;
   EXPECT_TRUE(
       tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor()));
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 5d339ec697..3b7941f7c3 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -78,6 +78,25 @@ def _two_layer_model(x):
   return h_pool2
 
 
+def _model_with_branch(x):
+  x_image = array_ops.reshape(x, [-1, 28, 28, 1])
+  w_conv1 = _weight([5, 5, 1, 32])
+  w_conv2 = _weight([5, 5, 1, 32])
+  c_conv1 = _conv2d(x_image, w_conv1)
+  c_conv2 = _conv2d(x_image, w_conv2)
+  add = math_ops.add(c_conv1, c_conv2)
+  return add
+
+
+def _model_with_vec_and_4d(x):
+  x_image = array_ops.reshape(x, [-1, 28, 28, 1])
+  w_conv1 = _weight([5, 5, 1, 32])
+  c_conv1 = _conv2d(x_image, w_conv1)
+  vector = constant_op.constant(6.4, shape=[32])
+  add = math_ops.add(c_conv1, vector)
+  return add
+
+
 def _loop():
   random_seed.set_random_seed(0)
   x1 = random_ops.truncated_normal([1, 784], seed=0)
@@ -89,6 +108,30 @@ def _loop():
   return outputs
 
 
+def _loop_with_branch():
+  random_seed.set_random_seed(0)
+  x1 = random_ops.truncated_normal([1, 784], seed=0)
+  x2 = random_ops.truncated_normal([1, 784], seed=0)
+  x3 = random_ops.truncated_normal([1, 784], seed=0)
+  x4 = random_ops.truncated_normal([1, 784], seed=0)
+  elems = (x1, x2, x3, x4)
+  outputs = functional_ops.map_fn(
+      _model_with_branch, elems, dtype=dtypes.float32)
+  return outputs
+
+
+def _loop_with_vec_and_4d():
+  random_seed.set_random_seed(0)
+  x1 = random_ops.truncated_normal([1, 784], seed=0)
+  x2 = random_ops.truncated_normal([1, 784], seed=0)
+  x3 = random_ops.truncated_normal([1, 784], seed=0)
+  x4 = random_ops.truncated_normal([1, 784], seed=0)
+  elems = (x1, x2, x3, x4)
+  outputs = functional_ops.map_fn(
+      _model_with_vec_and_4d, elems, dtype=dtypes.float32)
+  return outputs
+
+
 def _get_config(layout_optimizer=True):
   if layout_optimizer:
     rewrite_options = rewriter_config_pb2.RewriterConfig(
@@ -303,6 +346,74 @@ class LayoutOptimizerTest(test.TestCase):
         metadata = config_pb2.RunMetadata()
         output_val = sess.run(output, run_metadata=metadata)
 
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if node.name.startswith('LayoutOptimizerTranspose'):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-map/while/Conv2D-0',
+                    nodes)
+      self.assertIn(
+          'LayoutOptimizerTransposeNCHWToNHWC-map/while/MaxPool_1-0-2', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testLoopWithBranch(self):
+    if test.is_gpu_available(cuda_only=True):
+      output = _loop_with_branch()
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if node.name.startswith('LayoutOptimizerTranspose'):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-map/while/Conv2D-0',
+                    nodes)
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-map/while/Add-0-2',
+                    nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testLoopWithVecAnd4D(self):
+    if test.is_gpu_available(cuda_only=True):
+      output = _loop_with_vec_and_4d()
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if node.name.startswith('LayoutOptimizerTranspose'):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-map/while/Conv2D-0',
+                    nodes)
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-map/while/Add-0-2',
+                    nodes)
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
   def testGradient(self):
-- 
GitLab


From 553e8f14c8c025a8c09e0a6cb824c786bc258f56 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 13:39:02 -0800
Subject: [PATCH 0972/1225] Update Stream::BlockHostUntilDone examples and
 documentation.

The new Status return value must be explicitly handled or ignored.

PiperOrigin-RevId: 178950527
---
 tensorflow/stream_executor/blas.h | 4 ++--
 tensorflow/stream_executor/fft.h  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/stream_executor/blas.h b/tensorflow/stream_executor/blas.h
index eb1b19c5d9..072f085546 100644
--- a/tensorflow/stream_executor/blas.h
+++ b/tensorflow/stream_executor/blas.h
@@ -30,8 +30,8 @@ limitations under the License.
 //  Stream stream{stream_exec};
 //  stream
 //    .Init()
-//    .ThenBlasAxpy(1024, 5.5, x, 1, &y, 1)
-//    .BlockHostUntilDone();
+//    .ThenBlasAxpy(1024, 5.5, x, 1, &y, 1);
+//  SE_CHECK_OK(stream.BlockHostUntilDone());
 //
 // By using stream operations in this manner the user can easily intermix custom
 // kernel launches (via StreamExecutor::ThenLaunch()) with these pre-canned BLAS
diff --git a/tensorflow/stream_executor/fft.h b/tensorflow/stream_executor/fft.h
index 98cd77e206..408516a416 100644
--- a/tensorflow/stream_executor/fft.h
+++ b/tensorflow/stream_executor/fft.h
@@ -34,8 +34,8 @@ limitations under the License.
 //     stream_exec.AsFft()->Create1dPlan(&stream, 1024, Type::kC2CForward);
 //  stream
 //    .Init()
-//    .ThenFft(plan.get(), x, &y)
-//    .BlockHostUntilDone();
+//    .ThenFft(plan.get(), x, &y);
+//  SE_CHECK_OK(stream.BlockHostUntilDone());
 //
 // By using stream operations in this manner the user can easily intermix custom
 // kernel launches (via StreamExecutor::ThenLaunch()) with these pre-canned FFT
-- 
GitLab


From 70062d11bf11d6579bfdbc87c3350a0074a12ae8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 13:45:05 -0800
Subject: [PATCH 0973/1225] Rename Stream::BlockHostUntilDoneWithStatus to
 BlockHostUntilDone.

PiperOrigin-RevId: 178951330
---
 tensorflow/compiler/jit/xla_device_context.cc             | 4 ++--
 tensorflow/compiler/xla/service/executable.cc             | 2 +-
 tensorflow/compiler/xla/service/executable.h              | 2 +-
 tensorflow/compiler/xla/service/execution_tracker.cc      | 2 +-
 tensorflow/compiler/xla/service/gpu/gpu_executable.cc     | 8 ++++----
 .../compiler/xla/service/gpu/gpu_transfer_manager.cc      | 2 +-
 tensorflow/compiler/xla/service/gpu/infeed_thunk.cc       | 2 +-
 tensorflow/compiler/xla/service/gpu/while_thunk.cc        | 2 +-
 tensorflow/compiler/xla/service/hlo_runner.cc             | 2 +-
 tensorflow/compiler/xla/service/service.cc                | 2 +-
 tensorflow/contrib/nccl/kernels/nccl_manager_test.cc      | 4 ++--
 tensorflow/core/common_runtime/gpu/gpu_util.cc            | 2 +-
 tensorflow/stream_executor/cuda/cuda_dnn.cc               | 2 +-
 13 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index be5a288b37..c936222f32 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -74,7 +74,7 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
     Status status;
     stream_->ThenMemcpy(&dev_dst_ptr, src_ptr, total_bytes);
     // TODO(hpucha): Make this asynchronous.
-    Status block_status = stream_->BlockHostUntilDoneWithStatus();
+    Status block_status = stream_->BlockHostUntilDone();
     if (!block_status.ok()) {
       status = xla::InternalError(
           "Failed to complete data transfer on stream %p: %s", stream_,
@@ -110,7 +110,7 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor,
     Status status;
     stream_->ThenMemcpy(dst_ptr, dev_src_ptr, total_bytes);
     // TODO(hpucha): Make this asynchronous.
-    Status block_status = stream_->BlockHostUntilDoneWithStatus();
+    Status block_status = stream_->BlockHostUntilDone();
     if (!block_status.ok()) {
       status = xla::InternalError(
           "Failed to complete data transfer on stream %p: %s", stream_,
diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc
index 8b8991b540..ad5d5ead00 100644
--- a/tensorflow/compiler/xla/service/executable.cc
+++ b/tensorflow/compiler/xla/service/executable.cc
@@ -52,7 +52,7 @@ Executable::ExecuteOnStreams(
   }
   for (const auto& options : run_options) {
     TF_RET_CHECK(options.stream() != nullptr);
-    TF_RETURN_IF_ERROR(options.stream()->BlockHostUntilDoneWithStatus());
+    TF_RETURN_IF_ERROR(options.stream()->BlockHostUntilDone());
   }
   return return_values;
 }
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
index 12faf28d02..cb9ee47dc6 100644
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -224,7 +224,7 @@ StatusOr<ReturnT> Executable::ExecuteOnStreamWrapper(
   if (profile != nullptr) {
     VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";
     stream->ThenStopTimer(timer.get());
-    SE_CHECK_OK(stream->BlockHostUntilDoneWithStatus());
+    SE_CHECK_OK(stream->BlockHostUntilDone());
     VLOG(1) << "done with block-host-until-done";
 
     // Merge in run-time profile information from execution_profile.
diff --git a/tensorflow/compiler/xla/service/execution_tracker.cc b/tensorflow/compiler/xla/service/execution_tracker.cc
index 6a34c2638b..2f0b9ed2bd 100644
--- a/tensorflow/compiler/xla/service/execution_tracker.cc
+++ b/tensorflow/compiler/xla/service/execution_tracker.cc
@@ -39,7 +39,7 @@ AsyncExecution::AsyncExecution(Backend* backend,
 
 tensorflow::Status AsyncExecution::BlockUntilDone() const {
   for (auto& stream : streams_) {
-    TF_RETURN_IF_ERROR(stream->BlockHostUntilDoneWithStatus());
+    TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
   }
   return tensorflow::Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 061707f32b..b802ae9c7a 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -69,7 +69,7 @@ class HloExecutionProfiler {
   ~HloExecutionProfiler() {
     if (do_profile_) {
       stream_->ThenStopTimer(execution_timer_.get());
-      stream_->BlockHostUntilDoneWithStatus().IgnoreError();
+      stream_->BlockHostUntilDone().IgnoreError();
       profile_->set_total_cycles_executed(
           *computation_, execution_timer_->Nanoseconds() * clock_rate_ghz_);
     }
@@ -87,7 +87,7 @@ class HloExecutionProfiler {
   void FinishOperation(const HloInstruction* hlo_instruction) {
     if (do_profile_) {
       stream_->ThenStopTimer(per_op_timer_.get());
-      stream_->BlockHostUntilDoneWithStatus().IgnoreError();
+      stream_->BlockHostUntilDone().IgnoreError();
       profile_->SetCyclesTakenBy(
           hlo_instruction, per_op_timer_->Nanoseconds() * clock_rate_ghz_);
     }
@@ -170,7 +170,7 @@ Status GpuExecutable::ExecuteThunks(
     // If this thunk requests it, wait for all currently-executing thunks to
     // finish.  This is useful e.g. if the thunk is about to perform autotuning.
     if (thunk->ShouldHaltAllActivityBeforeRunning(stream)) {
-      TF_RETURN_IF_ERROR(main_stream->BlockHostUntilDoneWithStatus());
+      TF_RETURN_IF_ERROR(main_stream->BlockHostUntilDone());
     }
 
     profiler.StartOperation();
@@ -192,7 +192,7 @@ Status GpuExecutable::ExecuteThunks(
   // TODO(b/30100571): we could potentially postpone deallocating the temp
   // buffers until a different computation is executed.
   if (block_host_until_done) {
-    Status block_status = main_stream->BlockHostUntilDoneWithStatus();
+    Status block_status = main_stream->BlockHostUntilDone();
     if (!block_status.ok()) {
       return InternalError(
           "Failed to complete all kernels launched on stream %p: %s",
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
index 550df33576..ae92daef88 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
@@ -105,7 +105,7 @@ Status GpuTransferManager::EnqueueBuffersToInfeed(
   // infeed requests, blocking on the stream might be
   // heavy-handed. Figure out if finer-grained acknowledgement is
   // possible.
-  Status block_status = stream->BlockHostUntilDoneWithStatus();
+  Status block_status = stream->BlockHostUntilDone();
   if (!block_status.ok()) {
     for (gpu::InfeedBuffer* b : buffers) {
       b->Done();
diff --git a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
index db8659a8b9..2ac95ceb69 100644
--- a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
@@ -65,7 +65,7 @@ Status InfeedThunk::ExecuteOnStream(const BufferAllocations& buffer_allocations,
                        buffer->length());
   }
 
-  Status block_status = stream->BlockHostUntilDoneWithStatus();
+  Status block_status = stream->BlockHostUntilDone();
   if (!block_status.ok()) {
     return InternalError("Failed to complete data transfer on stream %p: %s",
                          stream, block_status.error_message().c_str());
diff --git a/tensorflow/compiler/xla/service/gpu/while_thunk.cc b/tensorflow/compiler/xla/service/gpu/while_thunk.cc
index e3ecb784dd..c21559af6d 100644
--- a/tensorflow/compiler/xla/service/gpu/while_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/while_thunk.cc
@@ -53,7 +53,7 @@ Status WhileThunk::ExecuteOnStream(const BufferAllocations& buffer_allocations,
     // Copy the result of condition computation and break the loop if 'false'.
     bool condition_result;
     stream->ThenMemcpy(&condition_result, condition_result_data, sizeof(bool));
-    Status block_status = stream->BlockHostUntilDoneWithStatus();
+    Status block_status = stream->BlockHostUntilDone();
     if (!block_status.ok()) {
       return InternalError(
           "Failed to complete all kernels launched on stream %p: %s", stream,
diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index b4ca8d12a1..a6101bbe60 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc
@@ -150,7 +150,7 @@ StatusOr<se::DeviceMemoryBase> HloRunner::Execute(
       se::DeviceMemoryBase result,
       executable->ExecuteOnStream(&service_run_options, arguments,
                                   /*hlo_execution_profile=*/nullptr));
-  TF_RETURN_IF_ERROR(stream.BlockHostUntilDoneWithStatus());
+  TF_RETURN_IF_ERROR(stream.BlockHostUntilDone());
 
   allocations_.push_back(result);
 
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 462b76e9b4..ecc3c0ff12 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -566,7 +566,7 @@ Service::ExecuteParallelAndRegisterResult(
 
   // Wait for all executions to complete.
   for (int64 i = 0; i < streams.size(); ++i) {
-    Status block_status = streams[i]->BlockHostUntilDoneWithStatus();
+    Status block_status = streams[i]->BlockHostUntilDone();
     if (!block_status.ok()) {
       return InternalError("failed to complete execution for stream %lld: %s",
                            i, block_status.error_message().c_str());
diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
index ef76df6b0d..658baf18d3 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
+++ b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
@@ -175,7 +175,7 @@ class NcclManagerTest : public ::testing::Test {
       auto out_gpu_mem = AsDeviceMemory(out_gpu.flat<float>().data());
       stream->ThenMemcpy(out_cpu.flat<float>().data(), out_gpu_mem,
                          out_cpu.TotalBytes());
-      SE_ASSERT_OK(stream->BlockHostUntilDoneWithStatus());
+      SE_ASSERT_OK(stream->BlockHostUntilDone());
       test::ExpectTensorEqual<float>(test_case->expected, out_cpu);
     }
   }
@@ -236,7 +236,7 @@ TEST_F(NcclManagerTest, MultipleCallers) {
     for (int i = 0; i < num_ranks; ++i) {
       auto* device = devices->at(i % devices->size());
       auto* stream = device->tensorflow_gpu_device_info()->stream;
-      SE_ASSERT_OK(stream->BlockHostUntilDoneWithStatus());
+      SE_ASSERT_OK(stream->BlockHostUntilDone());
     }
 
     std::random_shuffle(case_and_device_num.begin(), case_and_device_num.end());
diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.cc b/tensorflow/core/common_runtime/gpu/gpu_util.cc
index 9d8210a877..a0f5877d62 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_util.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_util.cc
@@ -352,7 +352,7 @@ Status GPUUtil::Sync(Device* gpu_device) {
   if (!dev_info) {
     return errors::Internal("Failed to find dest device GPUDeviceInfo");
   }
-  return dev_info->stream->BlockHostUntilDoneWithStatus();
+  return dev_info->stream->BlockHostUntilDone();
 }
 
 Status GPUUtil::SyncAll(Device* gpu_device) {
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 60c889c053..daeb9a4b77 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -4252,7 +4252,7 @@ bool CudnnSupport::DoDepthConcatenate(
     const auto& dimensions = input_dimensions[i];
     tmp.resize(dimensions.ElementCount());
     stream->ThenMemcpyD2H<float>(*input_data[i], &tmp);
-    port::Status block_status = stream->BlockHostUntilDoneWithStatus();
+    port::Status block_status = stream->BlockHostUntilDone();
     if (!block_status.ok()) {
       LOG(ERROR) << "BlockHostUntilDone failed: " << block_status;
       return false;
-- 
GitLab


From 9da6e87662c54d03310fc72fe7efdd2f22b87f85 Mon Sep 17 00:00:00 2001
From: Akshay Agrawal <akshayka@google.com>
Date: Wed, 13 Dec 2017 14:22:17 -0800
Subject: [PATCH 0974/1225] Call convert_to_tensor on all inputs to the op for
 _backprop_call in graph mode

Fixes a bug in which EagerTensors were provided as input to an op.

PiperOrigin-RevId: 178957283
---
 tensorflow/python/eager/function.py      | 12 +++++-------
 tensorflow/python/eager/function_test.py | 13 +++++++++++++
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index a8f3516f79..6b2bc2f380 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -296,7 +296,7 @@ class GraphModeFunction(object):
                variables=None):
     defined_function = _EagerDefinedFunction(
         name, graph, operations, input_placeholders, outputs)
-    if  len(input_placeholders) != len(defined_function.signature.input_arg):
+    if len(input_placeholders) != len(defined_function.signature.input_arg):
       raise ValueError("Internal error: invalid lengths. %s %s" % (
           len(input_placeholders), len(defined_function.signature.input_arg)))
     self._input_placeholders = input_placeholders
@@ -360,12 +360,9 @@ class GraphModeFunction(object):
     if ctx.in_graph_mode():
       g = ops.get_default_graph()
       g._add_function(self._forward_fdef)  # pylint: disable=protected-access
-      def make_tensor(x):
-        if isinstance(x, ops.Tensor):
-          return x
-        return ops.internal_convert_to_tensor(x, ctx=ctx)
       op = g.create_op(
-          signature.name, [make_tensor(x) for x in all_args],
+          signature.name,
+          [ops.internal_convert_to_tensor(x, ctx=ctx) for x in all_args],
           tuple(dtypes_module.DType(x.type) for x in signature.output_arg),
           op_def=signature,
           name="FunctionCall",
@@ -421,7 +418,8 @@ class GraphModeFunction(object):
       signature = self._function_def.definition.signature
       args = list(tensor_inputs) + self._extra_inputs
       op = g.create_op(
-          signature.name, [ops.convert_to_tensor(x) for x in args],
+          signature.name,
+          [ops.internal_convert_to_tensor(x, ctx=ctx) for x in args],
           tuple(dtypes_module.DType(x.type) for x in signature.output_arg),
           op_def=signature,
           name="FunctionCall",
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index c55f2f1d59..aee2a91a0e 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -310,6 +310,19 @@ class FunctionTest(test.TestCase):
 
     self.assertAllEqual(3, add_one(constant_op.constant(2)))
 
+  def testVariableCaptureInNestedFunctions(self):
+    v = resource_variable_ops.ResourceVariable(1)
+
+    @function.defun
+    def read():
+      return v.read_value()
+
+    @function.defun
+    def outer():
+      return read()
+
+    self.assertEqual(1, int(outer()))
+
   def testSequenceInputs(self):
     clip_by_global_norm = function.defun(clip_ops.clip_by_global_norm)
     t_list = [constant_op.constant(1.0), constant_op.constant(2.0)]
-- 
GitLab


From 38b20f83dbaada96902bdd5b419feb5a8e47395c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 14:23:40 -0800
Subject: [PATCH 0975/1225] - adds support constant cast operators - adds
 support for legacy "BatchMatMul" operators - adds constant scalar values to
 graphviz output

PiperOrigin-RevId: 178957498
---
 tensorflow/contrib/lite/toco/dump_graphviz.cc |  65 ++++++++++-
 .../resolve_constant_unary.cc                 | 104 ++++++++++++------
 .../contrib/lite/toco/import_tensorflow.cc    |  24 +++-
 3 files changed, 149 insertions(+), 44 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/dump_graphviz.cc b/tensorflow/contrib/lite/toco/dump_graphviz.cc
index d1a7b26d91..47d5e96825 100644
--- a/tensorflow/contrib/lite/toco/dump_graphviz.cc
+++ b/tensorflow/contrib/lite/toco/dump_graphviz.cc
@@ -106,6 +106,51 @@ Color GetColorForArray(const Model& model, const string& array_name) {
   return Color(0xF5, 0xF5, 0xF5);
 }
 
+bool ArrayIsScalarFloat(Model const* model, const std::string& name,
+                        float* val) {
+  const auto& op_array = model->GetArray(name);
+  if (!op_array.buffer || op_array.buffer->type != ArrayDataType::kFloat ||
+      RequiredBufferSizeForShape(op_array.shape()) != 1) {
+    return false;
+  }
+  const auto& data = op_array.GetBuffer<ArrayDataType::kFloat>().data;
+  if (data.empty()) {
+    return false;
+  }
+  *val = data[0];
+  return true;
+}
+
+bool ArrayIsScalarInt(Model const* model, const std::string& name, int* val) {
+  const auto& op_array = model->GetArray(name);
+  if (!op_array.buffer || RequiredBufferSizeForShape(op_array.shape()) != 1) {
+    return false;
+  }
+
+  if (op_array.buffer->type == ArrayDataType::kUint8) {
+    const auto& data = op_array.GetBuffer<ArrayDataType::kUint8>().data;
+    if (data.empty()) {
+      return false;
+    }
+    *val = data[0];
+  } else if (op_array.buffer->type == ArrayDataType::kInt32) {
+    const auto& data = op_array.GetBuffer<ArrayDataType::kInt32>().data;
+    if (data.empty()) {
+      return false;
+    }
+    *val = data[0];
+  } else if (op_array.buffer->type == ArrayDataType::kInt64) {
+    const auto& data = op_array.GetBuffer<ArrayDataType::kInt64>().data;
+    if (data.empty()) {
+      return false;
+    }
+    *val = data[0];
+  } else {
+    return false;
+  }
+  return true;
+}
+
 NodeProperties GetPropertiesForArray(const Model& model,
                                      const string& array_name) {
   NodeProperties node_properties;
@@ -130,12 +175,20 @@ NodeProperties GetPropertiesForArray(const Model& model,
       if (id == 0) {
         AppendF(&node_properties.label, "%d", array_shape.dims(id));
       } else {
-        AppendF(&node_properties.label, "x%d", array_shape.dims(id));
+        // 00D7 is multiplication symbol in unicode
+        AppendF(&node_properties.label, "\u00D7%d", array_shape.dims(id));
       }
     }
     node_properties.label += "]";
+    float flt_val;
+    if (ArrayIsScalarFloat(&model, array_name, &flt_val)) {
+      AppendF(&node_properties.label, " = %.3f", flt_val);
+    }
+    int int_val;
+    if (ArrayIsScalarInt(&model, array_name, &int_val)) {
+      AppendF(&node_properties.label, " = %d", int_val);
+    }
   }
-
   if (array.minmax) {
     AppendF(&node_properties.label, "\\nMinMax: [%.3g, %.3g]",
             array.minmax->min, array.minmax->max);
@@ -274,6 +327,10 @@ void DumpGraphviz(const Model& model, string* output_file_contents) {
             op_properties.color.TextColorString().c_str());
     // Add nodes and edges for all inputs of the operator.
     for (const auto& input : op.inputs) {
+      if (model.arrays.count(input) == 0) {
+        // Arrays should _always_ exist. Except, perhaps, during development.
+        continue;
+      }
       auto array_properties = GetPropertiesForArray(model, input);
       if (!already_added_arrays.count(input)) {
         AppendF(output_file_contents, kNodeFormat, input,
@@ -286,6 +343,10 @@ void DumpGraphviz(const Model& model, string* output_file_contents) {
     }
     // Add nodes and edges for all outputs of the operator.
     for (const auto& output : op.outputs) {
+      if (model.arrays.count(output) == 0) {
+        // Arrays should _always_ exist. Except, perhaps, during development.
+        continue;
+      }
       auto array_properties = GetPropertiesForArray(model, output);
       if (!already_added_arrays.count(output)) {
         AppendF(output_file_contents, kNodeFormat, output,
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index bb9bda3c82..ebc110483e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -32,7 +32,8 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
   const auto unary_it = model->operators.begin() + op_index;
   const auto* unary_op = unary_it->get();
   // Test for unary ops of types that we know how to resolve
-  if (unary_op->type != OperatorType::kTensorFlowRsqrt &&
+  if (unary_op->type != OperatorType::kCast &&
+      unary_op->type != OperatorType::kTensorFlowRsqrt &&
       unary_op->type != OperatorType::kTensorFlowSqrt &&
       unary_op->type != OperatorType::kTensorFlowSquare &&
       unary_op->type != OperatorType::kTensorFlowSum &&
@@ -56,6 +57,12 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     }
   }
 
+  auto& output_array = model->GetArray(unary_op->outputs[0]);
+  if (!output_array.has_shape()) {
+    // Yield until the output array dims have been resolved.
+    return false;
+  }
+
   // At the moment we don't want to care about fused activation functions.
   // The idea is that we should do the present constants-propagation before
   // activation functions get fused.
@@ -67,48 +74,76 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
         LogName(*unary_op));
     return false;
   }
+
   const auto& input_array = model->GetArray(unary_op->inputs[0]);
   // We have already tested above for existence of buffers (synonymous to being
   // a constant param).
   CHECK(input_array.buffer);
-  // At the moment we only support float buffers.
-  if (input_array.buffer->type != ArrayDataType::kFloat) {
-    return false;
-  }
-  const auto& input_float_data =
-      input_array.GetBuffer<ArrayDataType::kFloat>().data;
-  // Create the float buffer on the output array, effectively turning it into
-  // a constant parameter
-  const auto& output_name = unary_op->outputs[0];
-  auto& output_array = model->GetArray(output_name);
-  // Yield until the output array dims have been resolved.
-  if (!output_array.has_shape()) {
-    return false;
+  std::vector<DataType<ArrayDataType::kFloat>> const* input_float_data;
+  if (unary_op->type == OperatorType::kCast) {
+    CastOperator const* cast_op = static_cast<CastOperator const*>(unary_op);
+    if (cast_op->dst_data_type != ArrayDataType::kFloat) {
+      AddMessageF(
+          "Not resolving constant %s because we currently only support casting "
+          "to float",
+          LogName(*unary_op));
+      return false;
+    }
+    if (cast_op->src_data_type != input_array.buffer->type) {
+      AddMessageF(
+          "Not resolving constant %s because cast op source type does not "
+          "match input type",
+          LogName(*unary_op));
+    }
+  } else {
+    if (input_array.buffer->type != ArrayDataType::kFloat) {
+      return false;
+    }
+    input_float_data = &(input_array.GetBuffer<ArrayDataType::kFloat>().data);
   }
 
-  int input_buffer_size = RequiredBufferSizeForShape(input_array.shape());
-  int output_buffer_size = RequiredBufferSizeForShape(output_array.shape());
-  const Shape& input_shape = input_array.shape();
+  // Create a float buffer on the output array, which are always constant.
   const Shape& output_shape = output_array.shape();
-
+  const int output_dims_count = output_shape.dimensions_count();
+  const int output_buffer_size = RequiredBufferSizeForShape(output_shape);
   auto& output_float_data =
       output_array.GetMutableBuffer<ArrayDataType::kFloat>().data;
   output_float_data.resize(output_buffer_size);
 
-  const int output_dims_count = output_shape.dimensions_count();
-  if (unary_op->type == OperatorType::kTensorFlowReshape) {
+  const Shape& input_shape = input_array.shape();
+  const int input_buffer_size = RequiredBufferSizeForShape(input_shape);
+  if (unary_op->type == OperatorType::kCast) {
+    for (int i = 0; i < output_buffer_size; i++) {
+      float outval = 0.0f;
+      if (input_array.buffer->type == ArrayDataType::kFloat) {
+        outval = static_cast<float>(
+            input_array.GetBuffer<ArrayDataType::kFloat>().data[i]);
+      } else if (input_array.buffer->type == ArrayDataType::kUint8) {
+        outval = static_cast<float>(
+            input_array.GetBuffer<ArrayDataType::kUint8>().data[i]);
+      } else if (input_array.buffer->type == ArrayDataType::kInt32) {
+        outval = static_cast<float>(
+            input_array.GetBuffer<ArrayDataType::kInt32>().data[i]);
+      } else if (input_array.buffer->type == ArrayDataType::kInt64) {
+        outval = static_cast<float>(
+            input_array.GetBuffer<ArrayDataType::kInt64>().data[i]);
+      } else {
+        LOG(FATAL) << "Unsupported cast op input type";
+      }
+      output_float_data[i] = outval;
+    }
+  } else if (unary_op->type == OperatorType::kTensorFlowReshape) {
     CHECK(input_buffer_size == output_buffer_size);
-    memcpy(output_float_data.data(), input_float_data.data(),
-           input_buffer_size * sizeof(input_float_data[0]));
+    memcpy(output_float_data.data(), (*input_float_data).data(),
+           output_buffer_size * sizeof(output_float_data[0]));
   } else if (unary_op->type == OperatorType::kTensorFlowSum) {
     // At the moment only full reduction across all dimensions is supported.
     for (int i = 0; i < output_dims_count; i++) {
       CHECK_EQ(output_shape.dims(i), 1);
     }
     float sum = 0.f;
-    const int input_size = RequiredBufferSizeForShape(input_shape);
-    for (int i = 0; i < input_size; i++) {
-      sum += input_float_data[i];
+    for (int i = 0; i < input_buffer_size; i++) {
+      sum += (*input_float_data)[i];
     }
     output_float_data[0] = sum;
   } else if (unary_op->type == OperatorType::kTensorFlowMin) {
@@ -117,10 +152,9 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     for (int i = 0; i < output_dims_count; i++) {
       CHECK_EQ(output_shape.dims(i), 1);
     }
-    float min = input_float_data[0];
-    const int input_size = RequiredBufferSizeForShape(input_shape);
-    for (int i = 0; i < input_size; i++) {
-      min = std::min(min, input_float_data[i]);
+    float min = (*input_float_data)[0];
+    for (int i = 0; i < input_buffer_size; i++) {
+      min = std::min(min, (*input_float_data)[i]);
     }
     output_float_data[0] = min;
   } else if (unary_op->type == OperatorType::kTensorFlowMax) {
@@ -129,23 +163,21 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     for (int i = 0; i < output_dims_count; i++) {
       CHECK_EQ(output_shape.dims(i), 1);
     }
-    float max = input_float_data[0];
-    const int input_size = RequiredBufferSizeForShape(input_shape);
-    for (int i = 0; i < input_size; i++) {
-      max = std::max(max, input_float_data[i]);
+    float max = (*input_float_data)[0];
+    for (int i = 0; i < input_buffer_size; i++) {
+      max = std::max(max, (*input_float_data)[i]);
     }
     output_float_data[0] = max;
   } else if (unary_op->type == OperatorType::kTensorFlowRsqrt ||
              unary_op->type == OperatorType::kTensorFlowSqrt ||
              unary_op->type == OperatorType::kTensorFlowSquare) {
     // Element-wise ops. Should have perfectly matching sizes here.
-    const int input_size = RequiredBufferSizeForShape(input_shape);
     for (int i = 0; i < output_dims_count; i++) {
       CHECK_EQ(output_shape.dims(i), input_shape.dims(i));
     }
 
-    for (int i = 0; i < input_size; i++) {
-      const float val = input_float_data[i];
+    for (int i = 0; i < output_buffer_size; i++) {
+      const float val = (*input_float_data)[i];
       float outval = 0.f;
       if (unary_op->type == OperatorType::kTensorFlowRsqrt) {
         outval = 1.0f / std::sqrt(val);
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 34d38f1fcb..99cf1a7ca8 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -854,12 +854,24 @@ void ConvertReshapeOperator(const NodeDef& node,
 void ConvertMatMulOperator(const NodeDef& node,
                            const TensorFlowImportFlags& tf_import_flags,
                            Model* model) {
-  CHECK_EQ(node.op(), "MatMul");
   CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
-  // Transpose flags should be easy to support, but we don't have a
-  // GraphDef with them to test on at the moment.
-  CHECK_EQ(GetBoolAttr(node, "transpose_a"), false);
-  CHECK_EQ(GetBoolAttr(node, "transpose_b"), false);
+  if (node.op() == "MatMul") {
+    // Transpose flags should be easy to support, but we don't have a
+    // GraphDef with them to test on at the moment.
+    CHECK_EQ(GetBoolAttr(node, "transpose_a"), false);
+    CHECK_EQ(GetBoolAttr(node, "transpose_b"), false);
+    CHECK(!HasAttr(node, "adjoint_a") ||
+          (GetBoolAttr(node, "adjoint_a") == false));
+    CHECK(!HasAttr(node, "adjoint_b") ||
+          (GetBoolAttr(node, "adjoint_b") == false));
+  } else if (node.op() == "BatchMatMul") {
+    // https://www.tensorflow.org/versions/r0.12/api_docs/python/math_ops/matrix_math_functions
+    CHECK(!HasAttr(node, "adj_a") || (GetBoolAttr(node, "adj_a") == false));
+    CHECK(!HasAttr(node, "adj_b") || (GetBoolAttr(node, "adj_b") == false));
+  } else {
+    LOG(FATAL) << "op must be 'MatMul' or 'BatchMatMul'";
+  }
+
   const auto& input_name = node.input(0);
   const auto& weights_name = node.input(1);
   const auto& reordered_weights_name = weights_name + "_reordered";
@@ -1715,7 +1727,7 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
       ConvertAvgPoolOperator(node, tf_import_flags, model);
     } else if (node.op() == "Reshape") {
       ConvertReshapeOperator(node, tf_import_flags, model);
-    } else if (node.op() == "MatMul") {
+    } else if (node.op() == "MatMul" || node.op() == "BatchMatMul") {
       ConvertMatMulOperator(node, tf_import_flags, model);
     } else if (node.op() == "Div" || node.op() == "RealDiv") {
       ConvertDivOperator(node, tf_import_flags, model);
-- 
GitLab


From 82c1af4a693395489cb43c1bbb7c7be41af654b8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 14:43:00 -0800
Subject: [PATCH 0976/1225] Simplify and improve AdaptiveSharedBatchScheduler
 implementation. The new implementation will exist alongside the old one
 (selectable through the scheduler options) until its superiority is
 confirmed, at which point the old rate-based implementation will be removed.

The new implementation requires fewer options and no user feedback to achieve a low latency batching. Instead of processing batches at an adjustable rate, we limit the number of batches which can be concurrently processed. Below the limit, batches are immediately processed upon creation. At the limit, the oldest batch is processed once an in-processing batch finishes.  The scheduler continuously adjusts the limit in order to maintain the smallest overall latency.

PiperOrigin-RevId: 178960621
---
 .../adaptive_shared_batch_scheduler.h         | 218 ++++++++++++++++--
 .../adaptive_shared_batch_scheduler_test.cc   | 111 +++++++++
 2 files changed, 315 insertions(+), 14 deletions(-)

diff --git a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
index 9e32bee505..a2cb146b8d 100644
--- a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
+++ b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
@@ -16,9 +16,11 @@ limitations under the License.
 #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
 #define THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
 
+#include <algorithm>
 #include <functional>
 #include <memory>
 #include <queue>
+#include <random>
 #include <unordered_map>
 #include <vector>
 
@@ -42,19 +44,36 @@ template <typename TaskType>
 class ASBSQueue;
 }  // namespace internal
 
+// EXPERIMENTAL: API MAY BE SUBJECTED TO SUDDEN CHANGES.
+//
 // Shared batch scheduler designed to minimize latency. The scheduler keeps
 // track of a number of queues (one per model or model version) which are
 // continuously enqueuing requests. The scheduler groups the requests into
 // batches which it periodically sends off for processing (see
 // shared_batch_scheduler.h for more details). The AdaptiveSharedBatchScheduler
 // prioritizes batches by age (i.e. the batch's oldest request) irrespective of
-// queue. The scheduler will process the oldest batch at an adjustable rate,
-// regardless of batch size. The user can provide feedback to help set this rate
-// to achieve some goal (i.e. minimize overall latency, limit cpu usage, etc).
+// queue or batch size.
+//
+// The scheduling decision currently exists in two flavors, controlled by the
+// option use_in_flight_batches_implementation. It is expected that setting this
+// option to true will give universally better results; after a period of
+// testing to confirm, the old implementation will be removed.
 //
-// The rate (or rather, the corresponding period) is adjusted each time a batch
-// is processed, using an exponentially weighted moving average to smooth
-// potentially noisy feedback:
+// If use_in_flight_batches_implementation is set to true, the scheduler
+// limits the number of batches which can be processed concurrently.  If a new
+// batch is created, and the number of in flight batches is below the limit,
+// the next (i.e. oldest) batch is immediately scheduled.  Similarly, when a
+// batch finishes processing, the limit is rechecked, and another batch may be
+// scheduled.  To avoid the need to carefully tune the limit for workload,
+// model type, platform, etc, it is dynamically adjusted in order to provide the
+// lowest latency.
+//
+// If use_in_flight_batches_implementation is set to false, the scheduler will
+// process the oldest batch at an adjustable rate, regardless of batch size.
+// The user can provide feedback to help set this rate to achieve some goal
+// (i.e. minimize overall latency, limit cpu usage, etc). The rate (or rather,
+// the corresponding period) is adjusted each time a batch is processed, using
+// an exponentially weighted moving average to smooth noisy feedback:
 // ewma_feedback = ((N - 1) * ewma_feedback + feedback()) / N
 // period *= (1 + K * emwa_feedback)
 //
@@ -82,6 +101,20 @@ class AdaptiveSharedBatchScheduler
     int64 num_batch_threads = port::NumSchedulableCPUs();
     // The environment to use (typically only overridden by test code).
     Env* env = Env::Default();
+    // Which implementation to use (described in class comments above).
+    bool use_in_flight_batches_implementation = false;
+    // Initial limit for number of batches being concurrently processed.
+    // Non-integer values correspond to probabilistic limits - i.e. a value of
+    // 3.2 results in an actual cap of 3 80% of the time, and 4 20% of the time.
+    double initial_in_flight_batches_limit = 3;
+    // Number of batches between adjustments of in_flight_batches_limit.  Larger
+    // numbers will give less noisy latency measurements, but will be less
+    // responsive to changes in workload.
+    int64 batches_to_average_over = 1000;
+
+    // TODO(kte): remove the rate based implementation and corresponding options
+    // below once testing confirms the superiority of the in flight batches
+    // implementation.
     // Initial batch scheduling period in microseconds. Will be altered for
     // non-zero rate_feedback.
     double initial_scheduling_period_micros = 500;
@@ -122,6 +155,11 @@ class AdaptiveSharedBatchScheduler
                   BatchProcessor process_batch_callback,
                   std::unique_ptr<BatchScheduler<TaskType>>* queue);
 
+  double in_flight_batches_limit() {
+    mutex_lock l(mu_);
+    return in_flight_batches_limit_;
+  }
+
  private:
   // access to AddBatch, RemoveQueue, GetEnv.
   friend class internal::ASBSQueue<TaskType>;
@@ -129,10 +167,20 @@ class AdaptiveSharedBatchScheduler
   explicit AdaptiveSharedBatchScheduler(const Options& options);
 
   // Batch scheduling function which runs every scheduling_period_ microseconds.
+  // Only used when options_.use_in_flight_batches_implementation == false.
   void ProcessOneBatch();
 
+  // Tracks processing latency and adjusts in_flight_batches_limit to minimize.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  void CallbackWrapper(const internal::ASBSBatch<TaskType>* batch,
+                       BatchProcessor callback);
+
+  // Schedules batch if in_flight_batches_limit_ is not met.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  void MaybeScheduleNextBatch() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
   // Notifies scheduler of non-empty batch which is eligible for processing.
-  void AddBatch(internal::ASBSBatch<TaskType>*);
+  void AddBatch(const internal::ASBSBatch<TaskType>* batch);
 
   // Removes queue from scheduler.
   void RemoveQueue(const internal::ASBSQueue<TaskType>* queue);
@@ -149,7 +197,8 @@ class AdaptiveSharedBatchScheduler
   // Collection of batches added by AddBatch, ordered by age. Owned by scheduler
   // until they are released for processing.
   std::priority_queue<const internal::ASBSBatch<TaskType>*,
-                      std::vector<internal::ASBSBatch<TaskType>*>, BatchCompare>
+                      std::vector<const internal::ASBSBatch<TaskType>*>,
+                      BatchCompare>
       batches_ GUARDED_BY(mu_);
 
   // Unowned queues and callbacks added by AddQueue.
@@ -160,19 +209,56 @@ class AdaptiveSharedBatchScheduler
 
   // Responsible for running ProcessOneBatch. PeriodicFunction was used in order
   // to check for deletion so that the thread can be shut down.
+  // Only used when options_.use_in_flight_batches_implementation == false.
   std::unique_ptr<PeriodicFunction> scheduling_thread_;
 
   // Responsible for running the batch processing callbacks.
   std::unique_ptr<thread::ThreadPool> batch_thread_pool_;
 
   // Time interval in microseconds between successive ProcessOneBatch calls.
+  // Only used when options_.use_in_flight_batches_implementation == false.
   double scheduling_period_;
 
   // Exponentially weighted moving average of
   // options_.scheduling_period_feedback() evaluated in each ProcessOneBatch
   // call.
+  // Only used when options_.use_in_flight_batches_implementation == false.
   double ewma_feedback_ = 0;
 
+  // Limit on number of batches which can be concurrently processed.
+  // Non-integer values correspond to probabilistic limits - i.e. a value of 3.2
+  // results in an actual cap of 3 80% of the time, and 4 20% of the time.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  double in_flight_batches_limit_ GUARDED_BY(mu_);
+
+  // Number of batches currently being processed.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  int64 in_flight_batches_ GUARDED_BY(mu_) = 0;
+
+  // RNG engine and distribution.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  std::default_random_engine rand_engine_;
+  std::uniform_real_distribution<double> rand_double_;
+
+  // Fields controlling the dynamic adjustment of in_flight_batches_limit_.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  // Number of batches since the last in_flight_batches_limit_ adjustment.
+  int64 batch_count_ GUARDED_BY(mu_) = 0;
+  // Sum of processing latency for batches counted by batch_count_.
+  int64 batch_latency_sum_ GUARDED_BY(mu_) = 0;
+  // Average batch latency for previous value of in_flight_batches_limit_.
+  double last_avg_latency_ms_ GUARDED_BY(mu_) = 0;
+  // Did last_avg_latency_ms_ decrease from the previous last_avg_latency_ms_?
+  bool last_latency_decreased_ GUARDED_BY(mu_) = false;
+  // Current direction (+-) to adjust in_flight_batches_limit_
+  int step_direction_ GUARDED_BY(mu_) = 1;
+  // Max adjustment size (as a fraction of in_flight_batches_limit_).
+  constexpr static double kMaxStepSizeMultiplier = 0.125;  // 1/8;
+  // Min adjustment size (as a fraction of in_flight_batches_limit_).
+  constexpr static double kMinStepSizeMultiplier = 0.0078125;  // 1/128
+  // Current adjustment size (as a fraction of in_flight_batches_limit_).
+  double step_size_multiplier_ GUARDED_BY(mu_) = kMaxStepSizeMultiplier;
+
   TF_DISALLOW_COPY_AND_ASSIGN(AdaptiveSharedBatchScheduler);
 };
 
@@ -243,6 +329,12 @@ class ASBSBatch : public Batch<TaskType> {
 
 // ---------------- AdaptiveSharedBatchScheduler ----------------
 
+template <typename TaskType>
+constexpr double AdaptiveSharedBatchScheduler<TaskType>::kMaxStepSizeMultiplier;
+
+template <typename TaskType>
+constexpr double AdaptiveSharedBatchScheduler<TaskType>::kMinStepSizeMultiplier;
+
 template <typename TaskType>
 Status AdaptiveSharedBatchScheduler<TaskType>::Create(
     const Options& options,
@@ -277,6 +369,25 @@ Status AdaptiveSharedBatchScheduler<TaskType>::Create(
         "feedback_smoothing_batches must be positive; was ",
         options.feedback_smoothing_batches);
   }
+  if (options.initial_in_flight_batches_limit > options.num_batch_threads) {
+    return errors::InvalidArgument(
+        "initial_in_flight_batches_limit (",
+        options.initial_in_flight_batches_limit,
+        ") should not be larger than num_batch_threads (",
+        options.num_batch_threads, ")");
+  }
+  if (options.initial_in_flight_batches_limit < 1) {
+    return errors::InvalidArgument(
+        "initial_in_flight_batches_limit should be "
+        "greater than or equal to 1; was ",
+        options.initial_in_flight_batches_limit);
+  }
+  if (options.batches_to_average_over < 1) {
+    return errors::InvalidArgument(
+        "batches_to_average_over should be "
+        "greater than or equal to 1; was ",
+        options.batches_to_average_over);
+  }
   scheduler->reset(new AdaptiveSharedBatchScheduler<TaskType>(options));
   return Status::OK();
 }
@@ -285,14 +396,20 @@ template <typename TaskType>
 AdaptiveSharedBatchScheduler<TaskType>::AdaptiveSharedBatchScheduler(
     const Options& options)
     : options_(options),
-      scheduling_period_(options.initial_scheduling_period_micros) {
+      scheduling_period_(options.initial_scheduling_period_micros),
+      in_flight_batches_limit_(options.initial_in_flight_batches_limit),
+      rand_double_(0.0, 1.0) {
+  std::random_device device;
+  rand_engine_.seed(device());
   PeriodicFunction::Options opts;
   opts.thread_name_prefix = "scheduling_thread";
   opts.env = GetEnv();
-  scheduling_thread_.reset(
-      new PeriodicFunction([this] { ProcessOneBatch(); }, 0, opts));
   batch_thread_pool_.reset(new thread::ThreadPool(
       GetEnv(), options.thread_pool_name, options.num_batch_threads));
+  if (!options.use_in_flight_batches_implementation) {
+    scheduling_thread_.reset(
+        new PeriodicFunction([this] { ProcessOneBatch(); }, 0, opts));
+  }
 }
 
 template <typename TaskType>
@@ -318,9 +435,12 @@ Status AdaptiveSharedBatchScheduler<TaskType>::AddQueue(
 
 template <typename TaskType>
 void AdaptiveSharedBatchScheduler<TaskType>::AddBatch(
-    internal::ASBSBatch<TaskType>* batch) {
+    const internal::ASBSBatch<TaskType>* batch) {
   mutex_lock l(mu_);
   batches_.push(batch);
+  if (options_.use_in_flight_batches_implementation) {
+    MaybeScheduleNextBatch();
+  }
 }
 
 template <typename TaskType>
@@ -330,10 +450,78 @@ void AdaptiveSharedBatchScheduler<TaskType>::RemoveQueue(
   queues_and_callbacks_.erase(queue);
 }
 
+template <typename TaskType>
+void AdaptiveSharedBatchScheduler<TaskType>::MaybeScheduleNextBatch() {
+  if (batches_.empty() || in_flight_batches_ >= in_flight_batches_limit_)
+    return;
+  // Non-integer limit handled probabilistially.
+  if (in_flight_batches_limit_ - in_flight_batches_ < 1 &&
+      rand_double_(rand_engine_) >
+          (in_flight_batches_limit_ - in_flight_batches_))
+    return;
+  const internal::ASBSBatch<TaskType>* batch = batches_.top();
+  batches_.pop();
+  // Queue may destroy itself after ReleaseBatch is called.
+  batch->queue()->ReleaseBatch(batch);
+  batch_thread_pool_->Schedule(
+      std::bind(&AdaptiveSharedBatchScheduler<TaskType>::CallbackWrapper, this,
+                batch, queues_and_callbacks_[batch->queue()]));
+  in_flight_batches_++;
+}
+
+template <typename TaskType>
+void AdaptiveSharedBatchScheduler<TaskType>::CallbackWrapper(
+    const internal::ASBSBatch<TaskType>* batch,
+    AdaptiveSharedBatchScheduler<TaskType>::BatchProcessor callback) {
+  int64 start_time = batch->creation_time_micros();
+  callback(std::unique_ptr<Batch<TaskType>>(
+      const_cast<internal::ASBSBatch<TaskType>*>(batch)));
+  int64 end_time = GetEnv()->NowMicros();
+  mutex_lock l(mu_);
+  in_flight_batches_--;
+  batch_count_++;
+  batch_latency_sum_ += end_time - start_time;
+  // Occasionally adjust in_flight_batches_limit_ to minimize average latency.
+  // Although the optimal value may depend on the workload, the latency should
+  // be a simple convex function of in_flight_batches_limit_, allowing us to
+  // locate the global minimum relatively quickly.
+  if (batch_count_ == options_.batches_to_average_over) {
+    double current_avg_latency_ms = (batch_latency_sum_ / 1000.) / batch_count_;
+    bool current_latency_decreased =
+        current_avg_latency_ms < last_avg_latency_ms_;
+    if (current_latency_decreased) {
+      // If latency improvement was because we're moving in the correct
+      // direction, increase step_size so that we can get to the minimum faster.
+      // If latency improvement was due to backtracking from a previous failure,
+      // decrease step_size in order to refine our location.
+      step_size_multiplier_ *= (last_latency_decreased_ ? 2 : 0.5);
+      step_size_multiplier_ =
+          std::min(step_size_multiplier_, kMaxStepSizeMultiplier);
+      step_size_multiplier_ =
+          std::max(step_size_multiplier_, kMinStepSizeMultiplier);
+    } else {
+      // Return (nearly) to previous position and confirm that latency is better
+      // there before decreasing step size.
+      step_direction_ = -step_direction_;
+    }
+    in_flight_batches_limit_ +=
+        step_direction_ * in_flight_batches_limit_ * step_size_multiplier_;
+    in_flight_batches_limit_ =
+        std::min(in_flight_batches_limit_,
+                 static_cast<double>(options_.num_batch_threads));
+    in_flight_batches_limit_ = std::max(in_flight_batches_limit_, 1.0);
+    last_avg_latency_ms_ = current_avg_latency_ms;
+    last_latency_decreased_ = current_latency_decreased;
+    batch_count_ = 0;
+    batch_latency_sum_ = 0;
+  }
+  MaybeScheduleNextBatch();
+}
+
 template <typename TaskType>
 void AdaptiveSharedBatchScheduler<TaskType>::ProcessOneBatch() {
   static const double kFeedbackMultiplier = .001;
-  internal::ASBSBatch<TaskType>* batch = nullptr;
+  const internal::ASBSBatch<TaskType>* batch = nullptr;
   BatchProcessor callback;
   const int64 start_time_micros = GetEnv()->NowMicros();
   {
@@ -357,7 +545,8 @@ void AdaptiveSharedBatchScheduler<TaskType>::ProcessOneBatch() {
     // Queue may destroy itself after ReleaseBatch is called.
     batch->queue()->ReleaseBatch(batch);
     batch_thread_pool_->Schedule([callback, batch] {
-      callback(std::unique_ptr<Batch<TaskType>>(batch));
+      callback(std::unique_ptr<Batch<TaskType>>(
+          const_cast<internal::ASBSBatch<TaskType>*>(batch)));
     });
   }
   const int64 sleep_time =
@@ -427,6 +616,7 @@ Status ASBSQueue<TaskType>::Schedule(std::unique_ptr<TaskType>* task) {
     current_batch_->AddTask(std::move(*task));
     num_enqueued_tasks_++;
   }
+  // AddBatch must be called outside of lock, since it may call ReleaseBatch.
   if (new_batch != nullptr) scheduler_->AddBatch(new_batch);
   return Status::OK();
 }
diff --git a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc
index e2aac54eeb..18f1e55452 100644
--- a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc
+++ b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc
@@ -141,6 +141,16 @@ TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) {
   options = Scheduler::Options();
   options.feedback_smoothing_batches = 0;
   EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok());
+  options = Scheduler::Options();
+  options.initial_in_flight_batches_limit = 0.5;
+  EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok());
+  options = Scheduler::Options();
+  options.num_batch_threads = 5;
+  options.initial_in_flight_batches_limit = 8;
+  EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok());
+  options = Scheduler::Options();
+  options.batches_to_average_over = -5;
+  EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok());
 }
 
 TEST(AdaptiveSharedBatchSchedulerTest, ObeysQueueOptions) {
@@ -434,6 +444,107 @@ TEST(AdaptiveSharedBatchSchedulerTest, QueueCapacityInfo) {
   }
   stop_teardown.Notify();
 }
+
+TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesImplementation) {
+  AdaptiveSharedBatchScheduler<FakeTask>::Options options;
+  options.use_in_flight_batches_implementation = true;
+  options.initial_in_flight_batches_limit = 2;
+  options.batches_to_average_over = 1000;
+  std::shared_ptr<AdaptiveSharedBatchScheduler<FakeTask>> scheduler;
+  TF_ASSERT_OK(
+      AdaptiveSharedBatchScheduler<FakeTask>::Create(options, &scheduler));
+  std::unique_ptr<BatchScheduler<FakeTask>> queue;
+  mutex mu;
+  int processed_batches = 0;
+  Notification finish_processing;
+  auto queue_callback = [&mu, &processed_batches, &finish_processing](
+                            std::unique_ptr<Batch<FakeTask>> batch) {
+    ASSERT_TRUE(batch->IsClosed());
+    EXPECT_GT(batch->num_tasks(), 0);
+    mu.lock();
+    int batch_num = ++processed_batches;
+    mu.unlock();
+    if (batch_num == 2) {
+      // Give third batch a chance to process if it's going to.
+      Env::Default()->SleepForMicroseconds(1000);
+      finish_processing.Notify();
+    }
+    if (batch_num == 3) {
+      ASSERT_TRUE(finish_processing.HasBeenNotified());
+    }
+    finish_processing.WaitForNotification();
+  };
+
+  TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue));
+
+  // Enqueue 3 batches.
+  for (int i = 0; i < 3; i++) {
+    TF_ASSERT_OK(ScheduleTask(100, queue.get()));
+  }
+}
+
+TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) {
+  test_util::FakeClockEnv env(Env::Default());
+  Notification start_teardown, stop_teardown;
+  std::unique_ptr<Thread> teardown_thread =
+      CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown);
+  {
+    AdaptiveSharedBatchScheduler<FakeTask>::Options options;
+    options.env = &env;
+    options.use_in_flight_batches_implementation = true;
+    options.initial_in_flight_batches_limit = 2;
+    options.batches_to_average_over = 1;
+    std::shared_ptr<AdaptiveSharedBatchScheduler<FakeTask>> scheduler;
+    TF_ASSERT_OK(
+        AdaptiveSharedBatchScheduler<FakeTask>::Create(options, &scheduler));
+    std::unique_ptr<BatchScheduler<FakeTask>> queue;
+    auto queue_callback = [&env](std::unique_ptr<Batch<FakeTask>> batch) {
+      ASSERT_TRUE(batch->IsClosed());
+      switch (batch->size()) {
+        case 0:
+          env.AdvanceByMicroseconds(10);
+          break;
+        case 1:
+          env.AdvanceByMicroseconds(15);
+          break;
+        case 2:
+          env.AdvanceByMicroseconds(10);
+          break;
+        case 3:
+          env.AdvanceByMicroseconds(11);
+          break;
+      }
+    };
+
+    TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue));
+    TF_ASSERT_OK(ScheduleTask(0, queue.get()));
+    double in_flight_batches_limit = 2;
+    while (scheduler->in_flight_batches_limit() == in_flight_batches_limit) {
+    }
+    // Initial direction will be negative.
+    EXPECT_LT(scheduler->in_flight_batches_limit(), in_flight_batches_limit);
+    in_flight_batches_limit = scheduler->in_flight_batches_limit();
+    TF_ASSERT_OK(ScheduleTask(1, queue.get()));
+    while (scheduler->in_flight_batches_limit() == in_flight_batches_limit) {
+    }
+    // Latency increased -> change direction.
+    EXPECT_GT(scheduler->in_flight_batches_limit(), in_flight_batches_limit);
+    in_flight_batches_limit = scheduler->in_flight_batches_limit();
+    TF_ASSERT_OK(ScheduleTask(2, queue.get()));
+    while (scheduler->in_flight_batches_limit() == in_flight_batches_limit) {
+    }
+    // Latency decreased -> keep going in same direction.
+    EXPECT_GT(scheduler->in_flight_batches_limit(), in_flight_batches_limit);
+    in_flight_batches_limit = scheduler->in_flight_batches_limit();
+    TF_ASSERT_OK(ScheduleTask(3, queue.get()));
+    while (scheduler->in_flight_batches_limit() == in_flight_batches_limit) {
+    }
+    // Latency increased -> change direction.
+    EXPECT_LT(scheduler->in_flight_batches_limit(), in_flight_batches_limit);
+    start_teardown.Notify();
+  }
+  stop_teardown.Notify();
+}
 }  // namespace anonymous
 }  // namespace serving
 }  // namespace tensorflow
-- 
GitLab


From 7fbcd3724379161e379f4693ba66666b6ac4b0aa Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Wed, 13 Dec 2017 14:50:24 -0800
Subject: [PATCH 0977/1225] Avoid modifying items in dataset_data_provider.

PiperOrigin-RevId: 178961790
---
 .../slim/python/slim/data/dataset_data_provider.py        | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
index 82c6b5a619..a781c647a1 100644
--- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
+++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
@@ -96,12 +96,12 @@ class DatasetDataProvider(data_provider.DataProvider):
     items = dataset.decoder.list_items()
     tensors = dataset.decoder.decode(data, items)
 
-    if record_key in items:
+    items_to_tensors = dict(zip(items, tensors))
+    if record_key in items_to_tensors:
       raise ValueError('The item name used for `record_key` cannot also be '
                        'used for a dataset item: %s', record_key)
-    items.append(record_key)
-    tensors.append(key)
+    items_to_tensors[record_key] = key
 
     super(DatasetDataProvider, self).__init__(
-        items_to_tensors=dict(zip(items, tensors)),
+        items_to_tensors=items_to_tensors,
         num_samples=dataset.num_samples)
-- 
GitLab


From 57600a8d7739f6fbea445c6efa1f29f12f769748 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Wed, 13 Dec 2017 14:54:10 -0800
Subject: [PATCH 0978/1225] Add support for "Pad".

PiperOrigin-RevId: 178962340
---
 tensorflow/contrib/lite/builtin_op_data.h     |   8 +
 tensorflow/contrib/lite/kernels/BUILD         |  13 ++
 tensorflow/contrib/lite/kernels/pad.cc        | 139 ++++++++++++
 tensorflow/contrib/lite/kernels/pad_test.cc   |  99 +++++++++
 tensorflow/contrib/lite/kernels/register.cc   |   2 +
 tensorflow/contrib/lite/model.cc              |  62 ++++--
 tensorflow/contrib/lite/nnapi_delegate.cc     |   1 +
 tensorflow/contrib/lite/schema/schema.fbs     |   7 +
 .../contrib/lite/schema/schema_generated.h    | 201 +++++++++++++++++-
 tensorflow/contrib/lite/testing/BUILD         |   1 +
 .../contrib/lite/testing/generate_examples.py |  32 +++
 .../testing/generated_examples_zip_test.cc    |   6 +
 .../contrib/lite/toco/tflite/operator.cc      |  25 +++
 .../contrib/lite/toco/tflite/operator_test.cc |  10 +
 14 files changed, 587 insertions(+), 19 deletions(-)
 create mode 100644 tensorflow/contrib/lite/kernels/pad.cc
 create mode 100644 tensorflow/contrib/lite/kernels/pad_test.cc

diff --git a/tensorflow/contrib/lite/builtin_op_data.h b/tensorflow/contrib/lite/builtin_op_data.h
index 93072bf90b..7249d124e9 100644
--- a/tensorflow/contrib/lite/builtin_op_data.h
+++ b/tensorflow/contrib/lite/builtin_op_data.h
@@ -130,6 +130,14 @@ typedef struct {
   int new_width;
 } TfLiteResizeBilinearParams;
 
+typedef struct {
+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
+  // For now we will fix the maximum possible number of dimensions.
+  int before_padding[8];
+  int after_padding[8];
+  int num_dimensions;
+} TfLitePadParams;
+
 typedef struct {
   // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
   // For now we will fix the maximum possible number of dimensions.
diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index ad76e90606..83eb7f2cb8 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -90,6 +90,7 @@ cc_library(
         "lsh_projection.cc",
         "lstm.cc",
         "mul.cc",
+        "pad.cc",
         "pooling.cc",
         "register.cc",
         "reshape.cc",
@@ -225,6 +226,18 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "pad_test",
+    size = "small",
+    srcs = ["pad_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 tf_cc_test(
     name = "reshape_test",
     size = "small",
diff --git a/tensorflow/contrib/lite/kernels/pad.cc b/tensorflow/contrib/lite/kernels/pad.cc
new file mode 100644
index 0000000000..5e90282a43
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/pad.cc
@@ -0,0 +1,139 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <string.h>
+#include <vector>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace pad {
+
+// This file has two implementations of Pad.
+enum KernelType {
+  kReference,
+  kGenericOptimized,
+};
+
+// TODO(nupurgarg): Padding represented as a tensor is ignored. Only use the
+// `left_padding` and `right_padding` specified in `params`.
+struct PadContext {
+  PadContext(TfLiteContext* context, TfLiteNode* node) {
+    params = reinterpret_cast<TfLitePadParams*>(node->builtin_data);
+    input = GetInput(context, node, 0);
+    output = GetOutput(context, node, 0);
+  }
+  TfLitePadParams* params;
+  TfLiteTensor* input;
+  TfLiteTensor* output;
+};
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  // Determines size of output tensor.
+  PadContext op_context(context, node);
+  int dims = NumDimensions(op_context.input);
+  TF_LITE_ENSURE_EQ(context, dims, op_context.params->num_dimensions);
+
+  // TODO(nupurgarg): Our current implementations rely on the inputs being 4D.
+  TF_LITE_ENSURE_EQ(context, dims, 4);
+
+  const TfLiteIntArray* input_size = op_context.input->dims;
+  TfLiteIntArray* output_size = TfLiteIntArrayCreate(dims);
+  for (int idx = 0; idx < dims; ++idx) {
+    TF_LITE_ENSURE_MSG(context,
+                       (op_context.params->before_padding[idx] >= 0 &&
+                        op_context.params->after_padding[idx] >= 0),
+                       "Pad value has to be greater than equal to 0.");
+    output_size->data[idx] =
+        (input_size->data[idx] + op_context.params->before_padding[idx] +
+         op_context.params->after_padding[idx]);
+  }
+
+  return context->ResizeTensor(context, op_context.output, output_size);
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  PadContext op_context(context, node);
+
+  // TODO(nupurgarg): Support different data types.
+  if (op_context.output->type == kTfLiteFloat32) {
+    std::vector<int> before_padding(
+        op_context.params->before_padding,
+        op_context.params->before_padding + op_context.params->num_dimensions);
+    std::vector<int> after_padding(
+        op_context.params->after_padding,
+        op_context.params->after_padding + op_context.params->num_dimensions);
+
+    // TODO(nupurgarg): Change TOCO's implementation to use padding arrays
+    // in forward order (depth, width, height, batch).
+    // Converts from int[] = {depth, width, height, batch} to int[] = {batch,
+    // height, width, depth} to match TOCO's implementation of pad in
+    // referenced_ops.h and optimized_ops.h.
+    std::reverse(before_padding.begin(), before_padding.end());
+    std::reverse(after_padding.begin(), after_padding.end());
+
+#define TF_LITE_PAD(type)                                                   \
+  type::Pad(GetTensorData<float>(op_context.input),                         \
+            GetTensorDims(op_context.input), before_padding, after_padding, \
+            GetTensorData<float>(op_context.output),                        \
+            GetTensorDims(op_context.output))
+
+    if (kernel_type == kReference) {
+      TF_LITE_PAD(reference_ops);
+    }
+    if (kernel_type == kGenericOptimized) {
+      TF_LITE_PAD(optimized_ops);
+    }
+#undef TF_LITE_PAD
+  } else {
+    context->ReportError(context, "Inputs and outputs not all float types.");
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace pad
+
+TfLiteRegistration* Register_PAD_REF() {
+  static TfLiteRegistration r = {nullptr, nullptr, pad::Prepare,
+                                 pad::Eval<pad::kReference>};
+  return &r;
+}
+
+TfLiteRegistration* Register_PAD_GENERIC_OPT() {
+  static TfLiteRegistration r = {nullptr, nullptr, pad::Prepare,
+                                 pad::Eval<pad::kGenericOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_PAD() {
+  return Register_PAD_GENERIC_OPT();
+  // return Register_PAD_REF();
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/pad_test.cc b/tensorflow/contrib/lite/kernels/pad_test.cc
new file mode 100644
index 0000000000..f3ea9417df
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/pad_test.cc
@@ -0,0 +1,99 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class PadOpModel : public SingleOpModel {
+ public:
+  PadOpModel(std::initializer_list<int> input_shape,
+             std::initializer_list<int> before_padding,
+             std::initializer_list<int> after_padding) {
+    input_ = AddInput(TensorType_FLOAT32);
+    output_ = AddOutput(TensorType_FLOAT32);
+    SetBuiltinOp(
+        BuiltinOperator_PAD, BuiltinOptions_PadOptions,
+        CreatePadOptions(builder_, builder_.CreateVector<int>(before_padding),
+                         builder_.CreateVector<int>(after_padding))
+            .Union());
+    BuildInterpreter({input_shape});
+  }
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor<float>(input_, data);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST(PadOpTest, TooManyDimensions) {
+  EXPECT_DEATH(
+      PadOpModel({1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 4, 5, 6, 7, 8, 9},
+                 {1, 2, 3, 4, 5, 6, 7, 8, 9}),
+      "dims != 4");
+}
+
+// TODO(nupurgarg): Test case where before padding and after padding arrays
+// don't contain the same number of dimensions.
+TEST(PadOpTest, UnequalDimensions) {
+  EXPECT_DEATH(PadOpModel({1, 1, 2, 1}, {1, 2, 3}, {1, 2, 3}),
+               "dims != op_context.params->num_dimensions");
+}
+
+TEST(PadOpTest, InvalidPadValue) {
+  EXPECT_DEATH(PadOpModel({1, 1, 2, 1}, {0, 1, 2, 0}, {0, -1, -1, 0}),
+               "Pad value has to be greater than equal to 0.");
+}
+
+TEST(PadOpTest, SimpleTest) {
+  PadOpModel m({1, 2, 2, 1}, {0, 1, 1, 0}, {0, 1, 1, 0});
+  m.SetInput({1, 2, 3, 4});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4,
+                                               0, 0, 0, 0, 0}));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
+}
+
+TEST(PadOpTest, AdvancedTest) {
+  // The padding is input in the order of batch, height, width, depth.
+  PadOpModel m({1, 2, 3, 1}, {0, 0, 1, 0}, {0, 2, 3, 0});
+  m.SetInput({1, 2, 3, 4, 5, 6});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray({0, 1, 2, 3, 0, 0, 0, 0, 4, 5, 6, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1}));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc
index ca7a0dd194..bef6967a90 100644
--- a/tensorflow/contrib/lite/kernels/register.cc
+++ b/tensorflow/contrib/lite/kernels/register.cc
@@ -43,6 +43,7 @@ TfLiteRegistration* Register_MUL();
 TfLiteRegistration* Register_L2_NORMALIZATION();
 TfLiteRegistration* Register_LOCAL_RESPONSE_NORMALIZATION();
 TfLiteRegistration* Register_LSTM();
+TfLiteRegistration* Register_PAD();
 TfLiteRegistration* Register_RESHAPE();
 TfLiteRegistration* Register_RESIZE_BILINEAR();
 TfLiteRegistration* Register_SKIP_GRAM();
@@ -75,6 +76,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
              Register_LOCAL_RESPONSE_NORMALIZATION());
   AddBuiltin(BuiltinOperator_LSTM, Register_LSTM());
+  AddBuiltin(BuiltinOperator_PAD, Register_PAD());
   AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
   AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR());
   AddBuiltin(BuiltinOperator_SKIP_GRAM, Register_SKIP_GRAM());
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index 54efad94af..de7a39b62c 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -175,6 +175,27 @@ std::vector<int> FlatBufferIntArrayToVector(T* flat_array) {
   return ret;
 }
 
+// Copies the contents from the flatbuffer int vector `flatbuffer` into the
+// int array `buffer`. `flat_vector` and `buffer` represent the same
+// configuration operation for a given operation.
+void FlatBufferIntVectorToArray(int max_size_of_buffer,
+                                const flatbuffers::Vector<int32_t>* flat_vector,
+                                int* buffer, ErrorReporter* error_reporter) {
+  if (!flat_vector) {
+    error_reporter->Report("Input array not provided for operation.\n");
+  } else {
+    int num_dimensions = flat_vector->Length();
+    if (num_dimensions > max_size_of_buffer / sizeof(int)) {
+      error_reporter->Report(
+          "Found too many dimensions in the operation's input array.\n");
+    } else {
+      for (int i = 0; i < num_dimensions; ++i) {
+        buffer[i] = flat_vector->Get(i);
+      }
+    }
+  }
+}
+
 // Allocate a structure using C malloc, but make sure the structure is a
 // POD structure that doesn't require constructors to run. The reason we do
 // this, is that Interpreter's C extension part will take ownership and wants
@@ -190,6 +211,9 @@ T* MallocPOD() {
 // This handles builtin data explicitly as there are flatbuffer schemas.
 //
 // Returns memory that must be feed.
+//
+// TODO(nupurgarg): Pass in void ** and return TfLiteStatus to ensure program
+// crashes if error reporter is called.
 void* ParseOpData(const Operator* op, BuiltinOperator op_type,
                   ErrorReporter* error_reporter) {
   auto parse_padding = [](Padding padding) {
@@ -432,23 +456,35 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type,
       builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_PAD: {
+      auto* params = MallocPOD<TfLitePadParams>();
+      if (auto* schema_params = op->builtin_options_as_PadOptions()) {
+        auto* before_padding = schema_params->before_padding();
+        FlatBufferIntVectorToArray(sizeof(params->before_padding),
+                                   before_padding, params->before_padding,
+                                   error_reporter);
+
+        auto* after_padding = schema_params->after_padding();
+        FlatBufferIntVectorToArray(sizeof(params->after_padding), after_padding,
+                                   params->after_padding, error_reporter);
+
+        if (before_padding->Length() != after_padding->Length()) {
+          error_reporter->Report(
+              "Before padding and after padding arrays need to contain the "
+              "same number of dimensions.\n");
+        }
+        params->num_dimensions = after_padding->Length();
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
     case BuiltinOperator_RESHAPE: {
       auto* params = MallocPOD<TfLiteReshapeParams>();
       if (auto* schema_params = op->builtin_options_as_ReshapeOptions()) {
         auto* new_shape = schema_params->new_shape();
-        if (!new_shape) {
-          error_reporter->Report("No new_shape provided for Reshape\n");
-        } else {
-          params->num_dimensions = new_shape->Length();
-          if (params->num_dimensions > sizeof(params->shape) / sizeof(int)) {
-            error_reporter->Report(
-                "Found too many dimensions in Reshape's new_shape\n");
-          } else {
-            for (int i = 0; i < params->num_dimensions; ++i) {
-              params->shape[i] = new_shape->Get(i);
-            }
-          }
-        }
+        FlatBufferIntVectorToArray(sizeof(params->shape), new_shape,
+                                   params->shape, error_reporter);
+        params->num_dimensions = new_shape->Length();
       }
       builtin_data = reinterpret_cast<void*>(params);
       break;
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 05853e853c..7fe968aa0a 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -300,6 +300,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       case tflite::BuiltinOperator_L2_NORMALIZATION:
       case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
       case tflite::BuiltinOperator_MUL:
+      case tflite::BuiltinOperator_PAD:
       case tflite::BuiltinOperator_RESIZE_BILINEAR:
       case tflite::BuiltinOperator_CALL:
       case tflite::BuiltinOperator_SKIP_GRAM:
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index ddb2ab792c..7fa9b7a3a9 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -104,6 +104,7 @@ enum BuiltinOperator : byte {
   CALL = 31,
   CUSTOM = 32,
   EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
 }
 
 // Options for the builtin operators.
@@ -129,6 +130,7 @@ union BuiltinOptions {
   SpaceToDepthOptions,
   EmbeddingLookupSparseOptions,
   MulOptions,
+  PadOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -244,6 +246,11 @@ table CallOptions {
   subgraph:uint;
 }
 
+table PadOptions {
+  before_padding:[int];
+  after_padding:[int];
+}
+
 table ReshapeOptions {
   new_shape:[int];
 }
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index cbf10275f3..34cef71175 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -79,6 +79,9 @@ struct ResizeBilinearOptionsT;
 struct CallOptions;
 struct CallOptionsT;
 
+struct PadOptions;
+struct PadOptionsT;
+
 struct ReshapeOptions;
 struct ReshapeOptionsT;
 
@@ -167,11 +170,12 @@ enum BuiltinOperator {
   BuiltinOperator_CALL = 31,
   BuiltinOperator_CUSTOM = 32,
   BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
+  BuiltinOperator_PAD = 34,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_EMBEDDING_LOOKUP_SPARSE
+  BuiltinOperator_MAX = BuiltinOperator_PAD
 };
 
-inline BuiltinOperator (&EnumValuesBuiltinOperator())[31] {
+inline BuiltinOperator (&EnumValuesBuiltinOperator())[32] {
   static BuiltinOperator values[] = {
       BuiltinOperator_ADD,
       BuiltinOperator_AVERAGE_POOL_2D,
@@ -203,7 +207,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[31] {
       BuiltinOperator_SKIP_GRAM,
       BuiltinOperator_CALL,
       BuiltinOperator_CUSTOM,
-      BuiltinOperator_EMBEDDING_LOOKUP_SPARSE};
+      BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
+      BuiltinOperator_PAD};
   return values;
 }
 
@@ -242,6 +247,7 @@ inline const char **EnumNamesBuiltinOperator() {
                                 "CALL",
                                 "CUSTOM",
                                 "EMBEDDING_LOOKUP_SPARSE",
+                                "PAD",
                                 nullptr};
   return names;
 }
@@ -274,11 +280,12 @@ enum BuiltinOptions {
   BuiltinOptions_SpaceToDepthOptions = 19,
   BuiltinOptions_EmbeddingLookupSparseOptions = 20,
   BuiltinOptions_MulOptions = 21,
+  BuiltinOptions_PadOptions = 22,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_MulOptions
+  BuiltinOptions_MAX = BuiltinOptions_PadOptions
 };
 
-inline BuiltinOptions (&EnumValuesBuiltinOptions())[22] {
+inline BuiltinOptions (&EnumValuesBuiltinOptions())[23] {
   static BuiltinOptions values[] = {
       BuiltinOptions_NONE,
       BuiltinOptions_Conv2DOptions,
@@ -301,7 +308,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[22] {
       BuiltinOptions_SkipGramOptions,
       BuiltinOptions_SpaceToDepthOptions,
       BuiltinOptions_EmbeddingLookupSparseOptions,
-      BuiltinOptions_MulOptions};
+      BuiltinOptions_MulOptions,
+      BuiltinOptions_PadOptions};
   return values;
 }
 
@@ -328,6 +336,7 @@ inline const char **EnumNamesBuiltinOptions() {
                                 "SpaceToDepthOptions",
                                 "EmbeddingLookupSparseOptions",
                                 "MulOptions",
+                                "PadOptions",
                                 nullptr};
   return names;
 }
@@ -451,6 +460,11 @@ struct BuiltinOptionsTraits<MulOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
 };
 
+template <>
+struct BuiltinOptionsTraits<PadOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -708,6 +722,16 @@ struct BuiltinOptionsUnion {
                ? reinterpret_cast<const MulOptionsT *>(value)
                : nullptr;
   }
+  PadOptionsT *AsPadOptions() {
+    return type == BuiltinOptions_PadOptions
+               ? reinterpret_cast<PadOptionsT *>(value)
+               : nullptr;
+  }
+  const PadOptionsT *AsPadOptions() const {
+    return type == BuiltinOptions_PadOptions
+               ? reinterpret_cast<const PadOptionsT *>(value)
+               : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj,
@@ -2318,6 +2342,85 @@ flatbuffers::Offset<CallOptions> CreateCallOptions(
     flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
     const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct PadOptionsT : public flatbuffers::NativeTable {
+  typedef PadOptions TableType;
+  std::vector<int32_t> before_padding;
+  std::vector<int32_t> after_padding;
+  PadOptionsT() {}
+};
+
+struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef PadOptionsT NativeTableType;
+  enum { VT_BEFORE_PADDING = 4, VT_AFTER_PADDING = 6 };
+  const flatbuffers::Vector<int32_t> *before_padding() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BEFORE_PADDING);
+  }
+  const flatbuffers::Vector<int32_t> *after_padding() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_AFTER_PADDING);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_BEFORE_PADDING) &&
+           verifier.Verify(before_padding()) &&
+           VerifyOffset(verifier, VT_AFTER_PADDING) &&
+           verifier.Verify(after_padding()) && verifier.EndTable();
+  }
+  PadOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      PadOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<PadOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct PadOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_before_padding(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> before_padding) {
+    fbb_.AddOffset(PadOptions::VT_BEFORE_PADDING, before_padding);
+  }
+  void add_after_padding(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> after_padding) {
+    fbb_.AddOffset(PadOptions::VT_AFTER_PADDING, after_padding);
+  }
+  explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  PadOptionsBuilder &operator=(const PadOptionsBuilder &);
+  flatbuffers::Offset<PadOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<PadOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> before_padding = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> after_padding = 0) {
+  PadOptionsBuilder builder_(_fbb);
+  builder_.add_after_padding(after_padding);
+  builder_.add_before_padding(before_padding);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *before_padding = nullptr,
+    const std::vector<int32_t> *after_padding = nullptr) {
+  return tflite::CreatePadOptions(
+      _fbb, before_padding ? _fbb.CreateVector<int32_t>(*before_padding) : 0,
+      after_padding ? _fbb.CreateVector<int32_t>(*after_padding) : 0);
+}
+
+flatbuffers::Offset<PadOptions> CreatePadOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct ReshapeOptionsT : public flatbuffers::NativeTable {
   typedef ReshapeOptions TableType;
   std::vector<int32_t> new_shape;
@@ -2807,6 +2910,11 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
                ? static_cast<const MulOptions *>(builtin_options())
                : nullptr;
   }
+  const PadOptions *builtin_options_as_PadOptions() const {
+    return builtin_options_type() == BuiltinOptions_PadOptions
+               ? static_cast<const PadOptions *>(builtin_options())
+               : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -2958,6 +3066,11 @@ inline const MulOptions *Operator::builtin_options_as<MulOptions>() const {
   return builtin_options_as_MulOptions();
 }
 
+template <>
+inline const PadOptions *Operator::builtin_options_as<PadOptions>() const {
+  return builtin_options_as_PadOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -4311,6 +4424,61 @@ inline flatbuffers::Offset<CallOptions> CreateCallOptions(
   return tflite::CreateCallOptions(_fbb, _subgraph);
 }
 
+inline PadOptionsT *PadOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new PadOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void PadOptions::UnPackTo(
+    PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = before_padding();
+    if (_e) {
+      _o->before_padding.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->before_padding[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = after_padding();
+    if (_e) {
+      _o->after_padding.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->after_padding[_i] = _e->Get(_i);
+      }
+    }
+  };
+}
+
+inline flatbuffers::Offset<PadOptions> PadOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreatePadOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const PadOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _before_padding =
+      _o->before_padding.size() ? _fbb.CreateVector(_o->before_padding) : 0;
+  auto _after_padding =
+      _o->after_padding.size() ? _fbb.CreateVector(_o->after_padding) : 0;
+  return tflite::CreatePadOptions(_fbb, _before_padding, _after_padding);
+}
+
 inline ReshapeOptionsT *ReshapeOptions::UnPack(
     const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new ReshapeOptionsT();
@@ -4959,6 +5127,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier,
       auto ptr = reinterpret_cast<const MulOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<const PadOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default:
       return false;
   }
@@ -5067,6 +5239,10 @@ inline void *BuiltinOptionsUnion::UnPack(
       auto ptr = reinterpret_cast<const MulOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<const PadOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default:
       return nullptr;
   }
@@ -5162,6 +5338,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(
       auto ptr = reinterpret_cast<const MulOptionsT *>(value);
       return CreateMulOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<const PadOptionsT *>(value);
+      return CreatePadOptions(_fbb, ptr, _rehasher).Union();
+    }
     default:
       return 0;
   }
@@ -5267,6 +5447,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u)
       value = new MulOptionsT(*reinterpret_cast<MulOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_PadOptions: {
+      value = new PadOptionsT(*reinterpret_cast<PadOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -5379,6 +5563,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<PadOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default:
       break;
   }
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index 3ff65dd381..b9c5cbe715 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -31,6 +31,7 @@ gen_zipped_test_files(
         "local_response_norm.zip",
         "max_pool.zip",
         "mul.zip",
+        "pad.zip",
         "relu.zip",
         "relu1.zip",
         "relu6.zip",
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 5bca82ded0..4848ca8062 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -999,6 +999,37 @@ def make_local_response_norm_tests(zip_path):
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
+def make_pad_tests(zip_path):
+  """Make a set of tests to do pad."""
+
+  test_parameters = [{
+      "dtype": [tf.int32, tf.float32],
+      "input_shape": [[1, 1, 2, 1], [2, 1, 1, 1]],
+      "paddings": [[[0, 0], [0, 1], [2, 3], [0, 0]], [[0, 1], [0, 0], [0, 0],
+                                                      [2, 3]]],
+  }, {
+      "dtype": [tf.int32, tf.float32],
+      "input_shape": [[1, 2], [0, 1, 2]],
+      "paddings": [[[0, 1], [2, 3]]],
+  }]
+
+  def build_graph(parameters):
+    input_tensor = tf.placeholder(
+        dtype=parameters["dtype"],
+        name="input",
+        shape=parameters["input_shape"])
+    out = tf.pad(input_tensor, paddings=parameters["paddings"])
+    return [input_tensor], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    input_values = create_tensor_data(parameters["dtype"],
+                                      parameters["input_shape"])
+    return [input_values], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [input_values])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
 def make_reshape_tests(zip_path):
   """Make a set of tests to do reshape."""
 
@@ -1169,6 +1200,7 @@ def main(unused_args):
         "l2_pool.zip": make_pool_tests(make_l2_pool),
         "avg_pool.zip": make_pool_tests(tf.nn.avg_pool),
         "max_pool.zip": make_pool_tests(tf.nn.max_pool),
+        "pad.zip": make_pad_tests,
         "reshape.zip": make_reshape_tests,
         "resize_bilinear.zip": make_resize_bilinear_tests,
         "sigmoid.zip": make_sigmoid_tests,
diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
index 3b3266738c..76e8767617 100644
--- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
+++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
@@ -59,6 +59,11 @@ std::map<string, string> kBrokenTests = {
     // more than 1 element.
     {R"(constant.*input_shape=\[(2|2,2,2,2)\])", "68721522"},
 
+    // Pad only supports 4D float32 tensors.
+    {R"(paddtype=.*,input_shape=\[.,.\],paddings=\[\[.,.\],\[.,.\]\])",
+     "70527055"},
+    {R"(padd.*int32)", "70527055"},
+
     // L2Norm only supports 4D tensors.
     {R"(l2normdim=.*,epsilon=.*,input_shape=\[.,.\])", "67963684"},
     {R"(l2normdim=.*,epsilon=.*,input_shape=\[.,.,.,.,.*\])", "67963684"},
@@ -249,6 +254,7 @@ INSTANTIATE_TESTS(l2_pool)
 INSTANTIATE_TESTS(local_response_norm)
 INSTANTIATE_TESTS(max_pool)
 INSTANTIATE_TESTS(mul)
+INSTANTIATE_TESTS(pad)
 INSTANTIATE_TESTS(relu)
 INSTANTIATE_TESTS(relu1)
 INSTANTIATE_TESTS(relu6)
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 37f0378231..8d25336bb7 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -348,6 +348,30 @@ class Mul : public BuiltinOperator<MulOperator, ::tflite::MulOptions,
   }
 };
 
+class Pad : public BuiltinOperator<PadOperator, ::tflite::PadOptions,
+                                   ::tflite::BuiltinOptions_PadOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    auto before_padding = builder->CreateVector(op.left_padding);
+    auto after_padding = builder->CreateVector(op.right_padding);
+    return ::tflite::CreatePadOptions(*builder, before_padding, after_padding);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->left_padding.insert(op->left_padding.end(),
+                            options.before_padding()->begin(),
+                            options.before_padding()->end());
+    op->right_padding.insert(op->right_padding.end(),
+                             options.after_padding()->begin(),
+                             options.after_padding()->end());
+  }
+};
+
 class Reshape
     : public BuiltinOperator<TensorFlowReshapeOperator,
                              ::tflite::ReshapeOptions,
@@ -551,6 +575,7 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList() {
   ops.emplace_back(new MaxPool(::tflite::BuiltinOperator_MAX_POOL_2D,
                                OperatorType::kMaxPool));
   ops.emplace_back(new Mul(::tflite::BuiltinOperator_MUL, OperatorType::kMul));
+  ops.emplace_back(new Pad(::tflite::BuiltinOperator_PAD, OperatorType::kPad));
   ops.emplace_back(new Reshape(::tflite::BuiltinOperator_RESHAPE,
                                OperatorType::kTensorFlowReshape));
   ops.emplace_back(
diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
index 7301c6fb42..fe079e833d 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
@@ -215,6 +215,16 @@ TEST_F(OperatorTest, BuiltinMaxPool) {
   EXPECT_EQ(op.kheight, output_toco_op->kheight);
 }
 
+TEST_F(OperatorTest, BuiltinPad) {
+  PadOperator op;
+  op.left_padding = {1, 2, 3};
+  op.right_padding = {1, 2, 3};
+  auto output_toco_op =
+      SerializeAndDeserialize(GetOperator("PAD", OperatorType::kPad), op);
+  EXPECT_EQ(op.left_padding, output_toco_op->left_padding);
+  EXPECT_EQ(op.right_padding, output_toco_op->right_padding);
+}
+
 TEST_F(OperatorTest, BuiltinReshape) {
   TensorFlowReshapeOperator op;
   op.shape = {1, 2, 4, 5, 8};
-- 
GitLab


From 828dfee507623c9b8496904e7b94beb9c6ca8306 Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Wed, 13 Dec 2017 15:01:14 -0800
Subject: [PATCH 0979/1225] Average the loss across `replicate_model_fn`'s
 towers.

This avoids the need for users to add `loss = loss / num_of_towers` code and is in more in line with the current best practices.

I verified this by running cnn_mnist.

PiperOrigin-RevId: 178963334
---
 tensorflow/contrib/estimator/BUILD            |   5 +-
 .../python/estimator/replicate_model_fn.py    |  53 ++++++---
 .../estimator/replicate_model_fn_test.py      | 101 +++++++++++++++++-
 3 files changed, 143 insertions(+), 16 deletions(-)

diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index ba272d7e88..bd65ece85d 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -331,16 +331,17 @@ py_library(
         "//tensorflow/python:device",
         "//tensorflow/python:device_lib",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:gradients",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
         "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:model_fn",
         "//tensorflow/python/estimator:util",
+        "//tensorflow/python/ops/losses",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
index ca3a2394ee..2177ae2366 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
@@ -41,21 +41,25 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import tf_logging
 from tensorflow.python.training import device_setter as device_setter_lib
 from tensorflow.python.training import training_util
 
 
-def replicate_model_fn(model_fn, optimizer_fn, devices=None):
+def replicate_model_fn(model_fn,
+                       optimizer_fn,
+                       loss_reduction=losses.Reduction.SUM,
+                       devices=None):
   """Replicate `Estimator.model_fn` over GPUs within a single host.
 
   The given `model_fn` specifies a single forward pass of a model.  To replicate
   such a model over GPUs, each GPU gets its own instance of the forward pass
   (a.k.a. a tower).  The input features and labels get sharded into the chunks
-  that correspond to the number of GPUs.  Each tower computes its own loss based
+  that correspond to the number of GPUs.  Each tower computes a loss based
   on its input.  For each such loss, gradients are computed.  After that, the
-  available losses are summed to form aggregated loss.  The available
-  gradients are summed too.  Then, they update weights using the specified
+  available losses are aggregated to form aggregated loss.  Available
+  gradients are summed.  Then, they update weights using the specified
   optimizer.
 
   If `devices` are `None`, then all available GPUs are going to be used for
@@ -102,7 +106,7 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
   On reduction algorithms:
   Certain algorithms were chosen for aggregating results of computations on
   multiple towers:
-    - Losses from all towers are reduced using sum.
+    - Losses from all towers are reduced according to `loss_reduction`.
     - Gradients are reduced using sum for each trainable variable.
     - `eval_metrics_ops` are reduced per metric using `reduce_mean`.
     - `EstimatorSpec.predictions` and `EstimatorSpec.export_outputs` are
@@ -124,6 +128,7 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
     optimizer_fn: a function that returns an optimizer instance.  The function
       may accept one `params` argument.  This is the `params` argument as
       defined by `Estimator`.  See  the `Estimator` documentation for details.
+    loss_reduction: controls whether losses are summed or averaged.
     devices: Optional list of devices to replicate the model across.  This
       argument can be used to replice only on the subset of available GPUs.
       If `None`, then all available GPUs are going to be used for replication.
@@ -137,9 +142,11 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
   return _replicate_model_fn_with_mode(
       model_fn,
       optimizer_fn,
+      loss_reduction,
       devices,
-      # TODO(isaprykin): Query system configuration to choose modes other than
-      # `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often appropriate.
+      # TODO(isaprykin): Query the system configuration to choose modes other
+      # than `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often
+      # appropriate.
       mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER)
 
 
@@ -171,9 +178,13 @@ class _VariableDistributionMode(object):
 def _replicate_model_fn_with_mode(
     model_fn,
     optimizer_fn,
+    loss_reduction=losses.Reduction.SUM,
     devices=None,
     mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER):
   """A version of `replicate_model_fn` that allows to specify a `mode`."""
+  if loss_reduction == losses.Reduction.NONE:
+    raise ValueError('Tower losses need to be reduced in some way, yet {} '
+                     'reduction is specified.'.format(loss_reduction))
   if not devices:
     devices = _get_local_devices('GPU') or _get_local_devices('CPU')
 
@@ -199,6 +210,7 @@ def _replicate_model_fn_with_mode(
         features=feature_shards,
         labels=label_shards,
         params=params,
+        loss_reduction=loss_reduction,
         config=config,
         devices=devices,
         local_ps_devices=ps_devices)
@@ -269,6 +281,7 @@ def _get_loss_towers(model_fn,
                      config,
                      devices,
                      local_ps_devices,
+                     loss_reduction=losses.Reduction.SUM,
                      name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN):
   """Replicate the loss computation across devices."""
   tower_specs = []
@@ -307,12 +320,15 @@ def _get_loss_towers(model_fn,
           if labels:
             labels_shard = labels[i]
 
-          tower_specs.append(
-              model_fn(
-                  mode=mode,
-                  features=features[i],
-                  labels=labels_shard,
-                  **optional_params))
+          tower_spec = model_fn(
+              mode=mode,
+              features=features[i],
+              labels=labels_shard,
+              **optional_params)
+          if loss_reduction != losses.Reduction.SUM:
+            tower_spec = _scale_tower_loss(
+                tower_spec, number_of_towers=len(devices))
+          tower_specs.append(tower_spec)
   return tower_specs
 
 
@@ -339,6 +355,17 @@ def _local_device_setter(worker_device, ps_devices, ps_strategy):
   return local_device_chooser
 
 
+def _scale_tower_loss(tower_spec, number_of_towers):
+  """Scale down the loss for arriving at the average loss by summing."""
+  if tower_spec.loss is None:
+    return tower_spec
+
+  estimator_spec = tower_spec._asdict()
+  estimator_spec['loss'] = math_ops.div(
+      estimator_spec['loss'], 1.0 * number_of_towers, name='averaged_loss')
+  return model_fn_lib.EstimatorSpec(**estimator_spec)
+
+
 def _minimize_towers(tower_specs, optimizer):
   """Aggregate and apply gradients for computed losses."""
   grad_lists = {}
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
index a83a1b8407..c1b4e7b1a7 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
@@ -40,6 +40,7 @@ from tensorflow.python.framework import ops as ops_lib
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import losses
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics as metrics_lib
 from tensorflow.python.ops import variable_scope
@@ -221,13 +222,40 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
       total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
       self.assertEqual(total_loss, session.run(estimator_spec.loss))
 
-      # loss' of c is 3.
+      # derivative of loss = (1*c - 1) + (2*c - 2) is 3.
       # new value of c = 10 - learning rate * 3 = 7.0.
       session.run(estimator_spec.train_op)
       with variable_scope.variable_scope('', reuse=True):
         c = variable_scope.get_variable('c', dtype=dtypes.float64)
         self.assertEqual(7.0, session.run(c))
 
+  def test_train_with_mean_reduction(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.test_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn,
+          self.optimizer_fn,
+          losses.Reduction.MEAN,
+          devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+      session.run(variables.global_variables_initializer())
+
+      # loss = feature * c - label
+      total_loss = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)) / 2.0
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      # derivative of loss = (1*c - 1)/2 + (2*c - 2)/2 is 1.5.
+      # It's the same computation as without mean reduction, but the
+      # loss from every tower is scaled by 1/<number of towers>.
+      # new value of c = 10 - learning rate * 1.5 = 8.5
+      session.run(estimator_spec.train_op)
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(8.5, session.run(c))
+
   def test_train_spec_with_optimizer_without_params(self):
 
     def optimizer_fn_without_params():
@@ -276,6 +304,38 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
       self.assertEqual(0, auc)
       self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
 
+  def test_eval_with_mean_reduction(self):
+    features = np.array([[0.01], [0.002]])
+    labels = np.array([[0.01], [0.02]])
+
+    with self.test_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn,
+          self.optimizer_fn,
+          losses.Reduction.MEAN,
+          devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
+      session.run(variables.local_variables_initializer())
+      session.run(variables.global_variables_initializer())
+
+      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
+      auc, b = estimator_spec.eval_metric_ops['auc']
+
+      session.run([a, b])
+      accuracy = session.run(accuracy)
+      auc = session.run(auc)
+
+      # loss[i] = features[i] * 10 - labels[i].
+      # Accuracy is 0.0 (no match) in the first tower.
+      # Accuracy is 1.0 (match) in the second tower, since the feature
+      # times weight "c" happened to be equal to the label.
+      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) / 2.0
+
+      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
+      self.assertEqual(0, auc)
+      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
+
   def test_predict(self):
     features = np.array([[0.01], [0.002]])
     labels = np.array([[0.01], [0.02]])
@@ -356,6 +416,11 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
           'probabilities': np.array([[0.1], [0.02]])
       }, session.run(estimator_spec.predictions))
 
+  def test_unsupported_loss_reduction(self):
+    with self.assertRaisesRegexp(ValueError, ''):
+      _ = replicate_model_fn.replicate_model_fn(
+          self.model_fn, self.optimizer_fn, losses.Reduction.NONE)
+
 
 class GetLossTowersTest(test_util.TensorFlowTestCase):
 
@@ -406,6 +471,40 @@ class GetLossTowersTest(test_util.TensorFlowTestCase):
         c = variable_scope.get_variable('c', dtype=dtypes.float64)
         self.assertEqual(0.25, session.run(c))
 
+  def test_gradients_are_computed_with_mean_reduction(self):
+    with self.test_session() as session:
+      tower_specs = replicate_model_fn._get_loss_towers(
+          self.model_fn,
+          mode=None,
+          features=[[0.6], [1.6]],
+          labels=[[0.6], [0.6]],
+          params=None,
+          loss_reduction=losses.Reduction.MEAN,
+          config=None,
+          devices=['/gpu:0', '/gpu:1'],
+          local_ps_devices=['/gpu:0'],
+          name_scope_pattern='test_tower_{}')
+      session.run(variables.global_variables_initializer())
+
+      self.assertEqual(len(tower_specs), 2)
+
+      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
+      self.assertEqual('averaged_loss:0', tower_specs[0].loss.name)
+      self.assertEqual(0.5, session.run(tower_specs[0].loss))
+
+      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
+      self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name)
+      # The input batch for the second tower had a loss that is 1.0
+      # bigger: 0.6 vs 1.6.
+      self.assertEqual(1.0, session.run(tower_specs[1].loss))
+
+      self.assertEqual(1, len(variables.global_variables()))
+      self.assertEqual(1, len(variables.trainable_variables()))
+
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(0.25, session.run(c))
+
   def test_variables_are_round_robined_correctly(self):
     """Test that creates multiple variables and tests round-robin placement."""
 
-- 
GitLab


From b09be8eff9505486b0f838e2cb281c3ebe8ecfc6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 15:13:16 -0800
Subject: [PATCH 0980/1225] Enable Div -> Mul by reciprocal strength reduction.

PiperOrigin-RevId: 178965261
---
 tensorflow/core/grappler/optimizers/constant_folding.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index fa3039d3f6..007e3161f1 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1433,8 +1433,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
     // Strength reduce floating point division by a constant Div(x, const) to
     // multiplication by the reciprocal Mul(x, Reciprocal(const)). This in turn
     // will be constant folded to Mul(x, 1.0/const).
-    if (is_aggressive && node->input_size() >= 2 &&
-        (IsRealDiv(*node) || IsDiv(*node))) {
+    if (node->input_size() >= 2 && (IsRealDiv(*node) || IsDiv(*node))) {
       const string& const_input = node->input(1);
       const NodeDef* denom = node_map_->GetNode(const_input);
       CHECK(denom != nullptr);
-- 
GitLab


From d80d6de2890112f0013ffede31767907ec3291ca Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 13 Dec 2017 15:21:11 -0800
Subject: [PATCH 0981/1225] Fix bfloat16 serialization of Tensors.

Previously, Python serialization and deserialization used the half_val field of TensorProto, whereas C++ serialization used the int_val field. However, C++ bfloat16 deserialization was always broken, so it was never possible to correctly deserialize a bfloat16 Tensor.

The only reason serialization worked at all was because of the generic tensor_contents bytes serialization.

PiperOrigin-RevId: 178966536
---
 tensorflow/core/framework/tensor.cc           | 42 +++++++++++++------
 tensorflow/core/framework/tensor.proto        |  4 +-
 tensorflow/core/framework/tensor_test.cc      | 22 ++++++++++
 .../python/kernel_tests/constant_op_test.py   | 19 ++++++++-
 4 files changed, 70 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 24b7b08ebc..4f08cdc1d7 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -415,18 +415,10 @@ struct ProtoHelper<qint32> {
 
 template <>
 struct ProtoHelper<bfloat16> {
-  typedef Helper<float>::RepeatedFieldType FieldType;
-  static const bfloat16* Begin(const TensorProto& proto) {
-    // TODO: Isn't this wrong, given that int_val is 32 bits long?
-    return reinterpret_cast<const bfloat16*>(proto.int_val().data());
-  }
-  static size_t NumElements(const TensorProto& proto) {
-    return proto.int_val().size();
-  }
   static void Fill(const bfloat16* data, size_t n, TensorProto* proto) {
-    proto->mutable_int_val()->Reserve(n);
+    proto->mutable_half_val()->Reserve(n);
     for (size_t i = 0; i < n; ++i) {
-      proto->mutable_int_val()->AddAlreadyReserved(data[i].value);
+      proto->mutable_half_val()->AddAlreadyReserved(data[i].value);
     }
   }
 };
@@ -529,9 +521,9 @@ TensorBuffer* FromProtoField<Variant>(Allocator* a, const TensorProto& in,
   return buf;
 }
 
-// fp16 is opaque to the protobuf, so we deserialize these identical to uint16
-// but with data stored in half_val instead of int_val (ie., we don't use
-// ProtoHelper<uint16>).
+// fp16 and bfloat16 are opaque to the protobuf, so we deserialize these
+// identical to uint16 but with data stored in half_val instead of int_val (ie.,
+// we don't use ProtoHelper<uint16>).
 template <>
 TensorBuffer* FromProtoField<Eigen::half>(Allocator* a, const TensorProto& in,
                                           int64 n) {
@@ -556,6 +548,30 @@ TensorBuffer* FromProtoField<Eigen::half>(Allocator* a, const TensorProto& in,
   return buf;
 }
 
+template <>
+TensorBuffer* FromProtoField<bfloat16>(Allocator* a, const TensorProto& in,
+                                       int64 n) {
+  CHECK_GT(n, 0);
+  Buffer<bfloat16>* buf = new Buffer<bfloat16>(a, n);
+  uint16* data = buf->template base<uint16>();
+  if (data == nullptr) {
+    buf->Unref();
+    return nullptr;
+  }
+  const int64 in_n = in.half_val().size();
+  auto begin = in.half_val().begin();
+  if (n <= in_n) {
+    std::copy_n(begin, n, data);
+  } else if (in_n > 0) {
+    std::copy_n(begin, in_n, data);
+    const uint16 last = *(data + in_n - 1);
+    std::fill_n(data + in_n, n - in_n, last);
+  } else {
+    std::fill_n(data, n, 0);
+  }
+  return buf;
+}
+
 // Copies T[n] stored in the buffer "in" into the repeated field in
 // "out" corresponding to type T.
 template <typename T>
diff --git a/tensorflow/core/framework/tensor.proto b/tensorflow/core/framework/tensor.proto
index 6dab325969..abbf16e810 100644
--- a/tensorflow/core/framework/tensor.proto
+++ b/tensorflow/core/framework/tensor.proto
@@ -40,8 +40,8 @@ message TensorProto {
   // be set.  The values hold the flattened representation of the tensor in
   // row major order.
 
-  // DT_HALF. Note that since protobuf has no int16 type, we'll have some
-  // pointless zero padding for each value here.
+  // DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll
+  // have some pointless zero padding for each value here.
   repeated int32 half_val = 13 [packed = true];
 
   // DT_FLOAT.
diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index cbc921ccd0..1482880428 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc
@@ -175,6 +175,28 @@ void TestCopies(const Tensor& t) {
   }
 }
 
+TEST(Tensor_Half, Simple) {
+  Tensor t(DT_HALF, TensorShape({5, 7}));
+  EXPECT_TRUE(t.shape().IsSameSize(TensorShape({5, 7})));
+  for (int64 a = 0; a < t.shape().dim_size(0); a++) {
+    for (int64 b = 0; b < t.shape().dim_size(1); b++) {
+      t.matrix<Eigen::half>()(a, b) = static_cast<Eigen::half>(a * b);
+    }
+  }
+  TestCopies<Eigen::half>(t);
+}
+
+TEST(Tensor_Bfloat16, Simple) {
+  Tensor t(DT_BFLOAT16, TensorShape({5, 7}));
+  EXPECT_TRUE(t.shape().IsSameSize(TensorShape({5, 7})));
+  for (int64 a = 0; a < t.shape().dim_size(0); a++) {
+    for (int64 b = 0; b < t.shape().dim_size(1); b++) {
+      t.matrix<bfloat16>()(a, b) = static_cast<bfloat16>(a * b);
+    }
+  }
+  TestCopies<bfloat16>(t);
+}
+
 TEST(Tensor_Float, Simple) {
   Tensor t(DT_FLOAT, TensorShape({10, 20}));
   EXPECT_TRUE(t.shape().IsSameSize(TensorShape({10, 20})));
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 68817cc256..030c690167 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -44,7 +44,8 @@ class ConstantTest(test.TestCase):
     np_ans = np.array(x)
     with self.test_session(use_gpu=False):
       tf_ans = ops.convert_to_tensor(x).eval()
-    if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]:
+    dtype = dtypes_lib.as_dtype(np_ans.dtype)
+    if dtype.is_floating or dtype.is_complex:
       self.assertAllClose(np_ans, tf_ans)
     else:
       self.assertAllEqual(np_ans, tf_ans)
@@ -53,7 +54,8 @@ class ConstantTest(test.TestCase):
     np_ans = np.array(x)
     with self.test_session(use_gpu=True):
       tf_ans = ops.convert_to_tensor(x).eval()
-    if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]:
+    dtype = dtypes_lib.as_dtype(np_ans.dtype)
+    if dtype.is_floating or dtype.is_complex:
       self.assertAllClose(np_ans, tf_ans)
     else:
       self.assertAllEqual(np_ans, tf_ans)
@@ -62,6 +64,19 @@ class ConstantTest(test.TestCase):
     self._testCpu(x)
     self._testGpu(x)
 
+  def testBFloat16(self):
+    bfloat16 = dtypes_lib.bfloat16.as_numpy_dtype
+    self._testAll(np.arange(-15, 15).reshape([2, 3, 5]).astype(bfloat16))
+    self._testAll(
+        np.random.normal(size=30).reshape([2, 3, 5]).astype(bfloat16))
+    self._testAll(np.empty((2, 0, 5)).astype(bfloat16))
+
+  def testHalf(self):
+    self._testAll(np.arange(-15, 15).reshape([2, 3, 5]).astype(np.float16))
+    self._testAll(
+        np.random.normal(size=30).reshape([2, 3, 5]).astype(np.float16))
+    self._testAll(np.empty((2, 0, 5)).astype(np.float16))
+
   def testFloat(self):
     self._testAll(np.arange(-15, 15).reshape([2, 3, 5]).astype(np.float32))
     self._testAll(
-- 
GitLab


From e2e15df4175a3c13aa550b17bad4bb5d92185e7e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 15:22:39 -0800
Subject: [PATCH 0982/1225] [XLA] Update parser to handle conditional. Also fix
 the stringification of conditional HloInstruction.

PiperOrigin-RevId: 178966782
---
 .../compiler/xla/service/hlo_instruction.cc   |  3 +++
 .../xla/service/hlo_instruction_test.cc       | 12 +++++++++-
 .../compiler/xla/tools/parser/hlo_parser.cc   | 18 +++++++++++++-
 .../xla/tools/parser/hlo_parser_test.cc       | 24 +++++++++++++++++++
 4 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 10ac665083..1dab6076a5 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2033,6 +2033,9 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
   } else if (opcode() == HloOpcode::kSelectAndScatter) {
     extra.push_back(StrCat("select=%", select()->name()));
     extra.push_back(StrCat("scatter=%", scatter()->name()));
+  } else if (opcode() == HloOpcode::kConditional) {
+    extra.push_back(StrCat("true_computation=%", true_computation()->name()));
+    extra.push_back(StrCat("false_computation=%", false_computation()->name()));
   } else if (opcode() == HloOpcode::kCall || opcode() == HloOpcode::kMap ||
              opcode() == HloOpcode::kReduceWindow ||
              opcode() == HloOpcode::kReduce) {
diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index aa3fd0cf4f..54788fa2da 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -1130,7 +1130,7 @@ TEST_F(HloInstructionTest, CloneSuffixNames) {
 }
 
 TEST_F(HloInstructionTest, Stringification) {
-  // Tests stringification of a simple op, fusion, and while.
+  // Tests stringification of a simple op, fusion, while, and conditional.
   const Shape s1 = ShapeUtil::MakeShape(F32, {5, 10});
   const Shape s2 = ShapeUtil::MakeShape(F32, {20, 10});
   const Shape s2t = ShapeUtil::MakeShape(F32, {10, 20});
@@ -1168,6 +1168,16 @@ TEST_F(HloInstructionTest, Stringification) {
   EXPECT_EQ(loop->ToString(false, false),
             "%while = f32[5,20]{1,0} while(f32[5,10]{1,0} %x), "
             "condition=%TransposeDot, body=%TransposeDot");
+
+  auto pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  HloInstruction* conditional =
+      builder.AddInstruction(HloInstruction::CreateConditional(
+          sout, pred, x, computation, x, computation));
+  EXPECT_EQ(conditional->ToString(false, false),
+            "%conditional = f32[5,20]{1,0} conditional(pred[] %constant, "
+            "f32[5,10]{1,0} %x, f32[5,10]{1,0} %x), "
+            "true_computation=%TransposeDot, false_computation=%TransposeDot");
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 78372dedd8..192f134cb9 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -884,7 +884,23 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
               static_cast<int>(*mantissa_bits)));
       break;
     }
-    case HloOpcode::kConditional:
+    case HloOpcode::kConditional: {
+      optional<HloComputation*> true_computation;
+      optional<HloComputation*> false_computation;
+      attrs["true_computation"] = {/*required=*/true, AttrTy::kHloComputation,
+                                   &true_computation};
+      attrs["false_computation"] = {/*required=*/true, AttrTy::kHloComputation,
+                                    &false_computation};
+      if (!ParseOperands(&operands, /*expected_size=*/3) ||
+          !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction = builder->AddInstruction(HloInstruction::CreateConditional(
+          shape, /*pred=*/operands[0],
+          /*true_computation_arg=*/operands[1], *true_computation,
+          /*false_computation_arg=*/operands[2], *false_computation));
+      break;
+    }
     case HloOpcode::kCustomCall:
     case HloOpcode::kTrace:
       return TokenError(StrCat("parsing not yet implemented for op: ",
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 7eebc5dc93..3b1f81134b 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -703,6 +703,30 @@ ENTRY %ReducePrecision () -> f32[1] {
   ROOT %reduce-precision = f32[1]{0} reduce-precision(f32[1]{0} %constant), exponent_bits=8, mantissa_bits=10
 }
 
+)"
+},
+// Conditional
+{
+"Conditional",
+R"(HloModule conditional:
+
+%Negate (x: f32[]) -> f32[] {
+  %x = f32[] parameter(0)
+  ROOT %negate = f32[] negate(f32[] %x)
+}
+
+%Identity (y: f32[]) -> f32[] {
+  %y = f32[] parameter(0)
+  ROOT %copy = f32[] copy(f32[] %y)
+}
+
+ENTRY %Parameters1.v4 () -> f32[] {
+  %constant = pred[] constant(true)
+  %constant.1 = f32[] constant(56)
+  %constant.2 = f32[] constant(12)
+  ROOT %conditional = f32[] conditional(pred[] %constant, f32[] %constant.1, f32[] %constant.2), true_computation=%Negate, false_computation=%Identity
+}
+
 )"
 }
   });
-- 
GitLab


From 8a9aaa3bf4e81fef532ea3dad860797ecf85ef93 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Wed, 13 Dec 2017 15:23:20 -0800
Subject: [PATCH 0983/1225] Adds build rule for scan_dataset_op_test.py

PiperOrigin-RevId: 178966883
---
 .../contrib/data/python/kernel_tests/BUILD      | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 9b6ad93294..375e3ad612 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -391,6 +391,23 @@ py_test(
     ],
 )
 
+py_test(
+    name = "scan_dataset_op_test",
+    size = "small",
+    srcs = ["scan_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/data/python/ops:transformation_ops",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "sequence_dataset_op_test",
     size = "medium",
-- 
GitLab


From b3e97d56bd10bdf1976c61aab1f50a8902068c5c Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 13 Dec 2017 15:54:11 -0800
Subject: [PATCH 0984/1225] [XLA:CPU] Implement Ax+b dot output fusion for
 Matrix-vector products

I had to roll in the change to generalize CPU layout assignment as without it we
lose the make-rhs-column-major optimization and that causes a performance
regression.

PiperOrigin-RevId: 178970986
---
 .../xla/service/cpu/cpu_instruction_fusion.cc |  44 ++++-
 .../xla/service/cpu/cpu_instruction_fusion.h  |   2 +
 .../cpu/cpu_instruction_fusion_test.cc        |  88 ++++++++-
 .../xla/service/cpu/cpu_layout_assignment.cc  |  63 ++++---
 .../service/cpu/cpu_layout_assignment_test.cc | 169 ++++++++++++++++++
 .../xla/service/cpu/dot_op_emitter.cc         | 129 +++++++++----
 .../compiler/xla/service/cpu/dot_op_emitter.h |  17 +-
 .../compiler/xla/service/cpu/ir_emitter.cc    |  37 +++-
 .../service/llvm_ir/kernel_support_library.cc |  20 ++-
 .../service/llvm_ir/kernel_support_library.h  |  21 ++-
 .../service/llvm_ir/vector_support_library.cc |  18 +-
 .../service/llvm_ir/vector_support_library.h  |   7 +-
 .../compiler/xla/tests/dot_operation_test.cc  | 140 +++++++++++++++
 tensorflow/core/lib/gtl/iterator_range.h      |   4 +
 14 files changed, 676 insertions(+), 83 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
index f87ee3cecd..482e04052d 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
@@ -26,7 +26,7 @@ int64 BytesInDimension(const Shape& shape, int64 dimension) {
          shape.dimensions(dimension);
 }
 
-bool IsFusile(const HloInstruction& hlo) {
+bool CanBeLoopFused(const HloInstruction& hlo) {
   // These are the only ones we fuse since we rely on effective elemental IR
   // generation.
   return hlo.IsElementwise() ||  //
@@ -42,6 +42,23 @@ bool IsFusile(const HloInstruction& hlo) {
          hlo.opcode() == HloOpcode::kTranspose;
 }
 
+bool IsMatrixVectorDot(const HloInstruction* hlo) {
+  const Shape& hlo_shape = hlo->shape();
+  return hlo->opcode() == HloOpcode::kDot && hlo_shape.dimensions_size() == 2 &&
+         (hlo_shape.dimensions(0) == 1 || hlo_shape.dimensions(1) == 1);
+}
+
+bool CanBeOutputFused(const HloInstruction* producer,
+                      const HloInstruction* consumer) {
+  return consumer->opcode() == HloOpcode::kAdd && IsMatrixVectorDot(producer) &&
+         producer->user_count() == 1;
+}
+
+bool CanBeOutputFusedIntoSomeOperand(const HloInstruction* consumer) {
+  return consumer->opcode() == HloOpcode::kAdd &&
+         (CanBeOutputFused(consumer->operand(0), consumer) ||
+          CanBeOutputFused(consumer->operand(1), consumer));
+}
 }  // namespace
 
 bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
@@ -52,7 +69,15 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
 
   constexpr int kFusionThresholdBytes = 16 * 1024;
 
-  if (!IsFusile(*producer)) {
+  if (CanBeOutputFused(producer, consumer)) {
+    return true;
+  }
+
+  if (CanBeOutputFusedIntoSomeOperand(producer)) {
+    return false;
+  }
+
+  if (!CanBeLoopFused(*producer)) {
     VLOG(2) << "Producer is not fusile.";
     return false;
   }
@@ -108,16 +133,13 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
     }
   }
 
-  if (consumer->opcode() == HloOpcode::kFusion) {
-    // InstructionFusion::ShouldFuse above only allows kLoop and kInput fusions.
-    // The CPU backend does not create kInput fusions, so we only expect to see
-    // kLoop here.
-    CHECK(consumer->fusion_kind() == HloInstruction::FusionKind::kLoop);
+  if (consumer->opcode() == HloOpcode::kFusion &&
+      consumer->fusion_kind() == HloInstruction::FusionKind::kLoop) {
     VLOG(2) << "Fusing: consumer is a fusion node.";
     return true;
   }
 
-  if (IsFusile(*consumer)) {
+  if (CanBeLoopFused(*consumer)) {
     VLOG(2) << "Fusing: consumer is elementwise or fusile.";
     return true;
   }
@@ -126,5 +148,11 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
   return false;
 }
 
+HloInstruction::FusionKind CpuInstructionFusion::ChooseKind(
+    const HloInstruction* producer, const HloInstruction* consumer) {
+  return CanBeOutputFused(producer, consumer)
+             ? HloInstruction::FusionKind::kOutput
+             : HloInstruction::FusionKind::kLoop;
+}
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h
index 0eca4c3473..07aff34974 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h
@@ -30,6 +30,8 @@ class CpuInstructionFusion : public InstructionFusion {
 
  protected:
   bool ShouldFuse(HloInstruction* consumer, int64 operand_index) override;
+  HloInstruction::FusionKind ChooseKind(
+      const HloInstruction* producer, const HloInstruction* consumer) override;
 };
 
 }  // namespace cpu
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
index 1c04c9835e..595c3f55b3 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
@@ -196,7 +196,9 @@ class OpcodeFusionTest : public InstructionFusionTest {
   // Runs CPU instruction fusion on the given module, and tests that the result
   // contains a fused op at the root with exactly the given multiset of opcodes.
   void RunFusionAndCheckOpcodesWereFused(
-      HloModule* module, const std::multiset<HloOpcode>& expected_opcodes) {
+      HloModule* module, const std::multiset<HloOpcode>& expected_opcodes,
+      HloInstruction::FusionKind fusion_kind =
+          HloInstruction::FusionKind::kLoop) {
     auto computation = module->entry_computation();
     auto did_fusion = CpuInstructionFusion().Run(module);
     ASSERT_TRUE(did_fusion.ok());
@@ -204,7 +206,7 @@ class OpcodeFusionTest : public InstructionFusionTest {
 
     HloInstruction* root = computation->root_instruction();
     ASSERT_THAT(root, op::Fusion());
-    EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop);
+    EXPECT_EQ(root->fusion_kind(), fusion_kind);
 
     std::vector<HloOpcode> fused_opcodes(root->fused_instruction_count());
     std::transform(root->fused_instructions().begin(),
@@ -616,6 +618,88 @@ TEST_F(OpcodeFusionTest, ReuseViaImplicitBroadcastBinary) {
               Not(op::Fusion()));
 }
 
+void CreateComputationForDotAddOutputFusionTest(const string& test_name,
+                                                HloModule* module, int m, int k,
+                                                int n,
+                                                bool add_extra_use_for_dot) {
+  HloComputation::Builder builder(test_name);
+
+  Shape dot_lhs_shape = ShapeUtil::MakeShape(F32, {m, k});
+  Shape dot_rhs_shape = ShapeUtil::MakeShape(F32, {k, n});
+  Shape dot_shape = ShapeUtil::MakeShape(F32, {m, n});
+
+  auto* dot_lhs = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, dot_lhs_shape, "param0"));
+  auto* dot_rhs = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, dot_rhs_shape, "param1"));
+  auto* addend = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, dot_shape, "param2"));
+
+  auto* dot = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(dot_shape, dot_lhs, dot_rhs));
+  builder.AddInstruction(
+      HloInstruction::CreateBinary(dot_shape, HloOpcode::kAdd, dot, addend));
+
+  if (add_extra_use_for_dot) {
+    builder.AddInstruction(
+        HloInstruction::CreateOutfeed(dot_shape, dot, "no_config"));
+  }
+
+  module->AddEntryComputation(builder.Build());
+}
+
+TEST_F(OpcodeFusionTest, DotAddOutputFusion_1x50x19) {
+  auto module = CreateNewModule();
+  CreateComputationForDotAddOutputFusionTest(TestName(), module.get(), /*m=*/1,
+                                             /*k=*/50, /*n=*/19,
+                                             /*add_extra_use_for_dot=*/false);
+
+  RunFusionAndCheckOpcodesWereFused(
+      module.get(),
+      {HloOpcode::kDot, HloOpcode::kAdd, HloOpcode::kParameter,
+       HloOpcode::kParameter, HloOpcode::kParameter},
+      HloInstruction::FusionKind::kOutput);
+}
+
+TEST_F(OpcodeFusionTest, DotAddOutputFusion_19x50x1) {
+  auto module = CreateNewModule();
+  CreateComputationForDotAddOutputFusionTest(TestName(), module.get(), /*m=*/19,
+                                             /*k=*/50, /*n=*/1,
+                                             /*add_extra_use_for_dot=*/false);
+
+  RunFusionAndCheckOpcodesWereFused(
+      module.get(),
+      {HloOpcode::kDot, HloOpcode::kAdd, HloOpcode::kParameter,
+       HloOpcode::kParameter, HloOpcode::kParameter},
+      HloInstruction::FusionKind::kOutput);
+}
+
+TEST_F(OpcodeFusionTest, DotAddOutputFusion_19x50x19) {
+  auto module = CreateNewModule();
+  CreateComputationForDotAddOutputFusionTest(TestName(), module.get(), /*m=*/19,
+                                             /*k=*/50, /*n=*/19,
+                                             /*add_extra_use_for_dot=*/false);
+
+  TF_ASSERT_OK_AND_ASSIGN(bool fused_something,
+                          CpuInstructionFusion().Run(module.get()));
+  EXPECT_FALSE(fused_something);
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              Not(op::Fusion()));
+}
+
+TEST_F(OpcodeFusionTest, DotAddOutputFusion_19x50x1_multi_use) {
+  auto module = CreateNewModule();
+  CreateComputationForDotAddOutputFusionTest(TestName(), module.get(), /*m=*/19,
+                                             /*k=*/50, /*n=*/1,
+                                             /*add_extra_use_for_dot=*/true);
+
+  TF_ASSERT_OK_AND_ASSIGN(bool fused_something,
+                          CpuInstructionFusion().Run(module.get()));
+  EXPECT_FALSE(fused_something);
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              Not(op::Fusion()));
+}
+
 }  // namespace
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
index 0df10f4af3..e8117377e6 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
@@ -34,34 +34,47 @@ namespace cpu {
 // instruction stream.
 
 namespace {
-using ShouldMakeRhsColMajorCache =
+using ::tensorflow::gtl::nullopt;
+using ::tensorflow::gtl::optional;
+
+using ShouldMakeOperandColMajorCache =
     tensorflow::gtl::FlatMap<const HloInstruction*, bool>;
+}  // namespace
+
+static bool ShouldMakeAllUsersColMajor(const HloInstruction* instruction) {
+  for (auto* user : instruction->users()) {
+    optional<int64> operand_idx = ProfitableToMakeDotOperandColumnMajor(*user);
+    if (!operand_idx || user->operand(*operand_idx) != instruction ||
+        std::count(user->operands().begin(), user->operands().end(),
+                   instruction) != 1) {
+      return false;
+    }
+  }
+  return true;
 }
 
-static bool ShouldMakeRhsColMajor(ShouldMakeRhsColMajorCache* cache,
-                                  const HloInstruction& instruction) {
-  if (!ProfitableToMakeDotRhsColumnMajor(instruction)) {
-    return false;
+static optional<int64> ShouldMakeOperandColumnMajor(
+    ShouldMakeOperandColMajorCache* cache, const HloInstruction& instruction) {
+  optional<int64> operand_idx =
+      ProfitableToMakeDotOperandColumnMajor(instruction);
+  if (!operand_idx) {
+    return nullopt;
   }
 
-  const auto* rhs = instruction.operand(1);
-  if (rhs->opcode() != HloOpcode::kConstant) {
-    return false;
+  const HloInstruction* operand = instruction.operand(*operand_idx);
+  if (operand->opcode() != HloOpcode::kConstant) {
+    return nullopt;
   }
 
-  auto it = cache->find(rhs);
-  if (it != cache->end()) {
-    return it->second;
+  auto it = cache->find(operand);
+  if (it == cache->end()) {
+    auto insert_result =
+        cache->insert({operand, ShouldMakeAllUsersColMajor(operand)});
+    CHECK(insert_result.second);
+    it = insert_result.first;
   }
 
-  bool result = std::all_of(rhs->users().begin(), rhs->users().end(),
-                            [&](HloInstruction* user) {
-                              return ProfitableToMakeDotRhsColumnMajor(*user) &&
-                                     user->operand(0) != rhs;
-                            });
-
-  InsertOrDie(cache, rhs, result);
-  return result;
+  return it->second ? operand_idx : nullopt;
 }
 
 static Shape RowMajorShape(const Shape& old_shape) {
@@ -82,7 +95,7 @@ static Shape ColMajorShape(const Shape& old_shape) {
 
 Status CpuLayoutAssignment::AddBackendConstraints(
     LayoutConstraints* constraints) {
-  ShouldMakeRhsColMajorCache cache;
+  ShouldMakeOperandColMajorCache cache;
 
   const HloComputation* computation = constraints->computation();
   for (auto* instruction : computation->instructions()) {
@@ -108,11 +121,11 @@ Status CpuLayoutAssignment::AddBackendConstraints(
           constraints->SetOperandLayout(filter_shape, convolution, 1));
       TF_RETURN_IF_ERROR(
           constraints->SetInstructionLayout(output_shape, convolution));
-    } else if (ShouldMakeRhsColMajor(&cache, *instruction)) {
-      auto* dot = instruction;
-      const auto& rhs_shape = dot->operand(1)->shape();
-      TF_RETURN_IF_ERROR(
-          constraints->SetOperandLayout(ColMajorShape(rhs_shape), dot, 1));
+    } else if (optional<int64> op_idx =
+                   ShouldMakeOperandColumnMajor(&cache, *instruction)) {
+      const HloInstruction* op = instruction->operand(*op_idx);
+      TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
+          ColMajorShape(op->shape()), instruction, *op_idx));
     } else if (PotentiallyImplementedAsEigenDot(*instruction)) {
       const HloInstruction* dot = instruction;
       // In order to implement `dot` with Eigen dot, the layouts of the lhs,
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
index 5d37a41571..6ba030fff3 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
@@ -40,6 +40,8 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 
+namespace op = xla::testing::opcode_matchers;
+
 namespace xla {
 namespace {
 
@@ -241,5 +243,172 @@ TEST_F(CpuLayoutAssignmentTest, DotWithConstantRhsTensorThroughGTE) {
     EXPECT_NE(instruction->opcode(), HloOpcode::kCopy);
   }
 }
+
+struct DotOutputFusionLayoutAssignmentResult {
+  bool layout_assignment_changed_something;
+  const HloInstruction* dot_lhs_fusion_param;
+  const HloInstruction* dot_rhs_fusion_param;
+  const HloInstruction* addend_fusion_param;
+};
+
+static StatusOr<DotOutputFusionLayoutAssignmentResult> RunDotOutputFusion(
+    HloModule* module, const string& test_name, int m, int k, int n,
+    const int64 dot_operand_idx_in_add) {
+  DotOutputFusionLayoutAssignmentResult result;
+
+  CHECK(dot_operand_idx_in_add == 0 || dot_operand_idx_in_add == 1);
+
+  auto builder = HloComputation::Builder(test_name);
+
+  Shape dot_lhs_shape = ShapeUtil::MakeShape(F32, {m, k});
+  Shape dot_rhs_shape = ShapeUtil::MakeShape(F32, {k, n});
+  Shape dot_shape = ShapeUtil::MakeShape(F32, {m, n});
+
+  HloInstruction* dot_lhs = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, dot_lhs_shape, "param0"));
+  HloInstruction* addend = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, dot_shape, "param1"));
+  HloInstruction* dot_rhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateFromShape(dot_rhs_shape)));
+  HloInstruction* dot_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(dot_shape, dot_lhs, dot_rhs));
+  HloInstruction* add_result;
+  if (dot_operand_idx_in_add == 0) {
+    add_result = builder.AddInstruction(HloInstruction::CreateBinary(
+        dot_shape, HloOpcode::kAdd, dot_result, addend));
+  } else {
+    add_result = builder.AddInstruction(HloInstruction::CreateBinary(
+        dot_shape, HloOpcode::kAdd, addend, dot_result));
+  }
+
+  HloComputation* computation = module->AddEntryComputation(builder.Build());
+
+  HloInstruction* fusion_instruction =
+      module->entry_computation()->AddInstruction(HloInstruction::CreateFusion(
+          dot_shape, HloInstruction::FusionKind::kOutput, add_result));
+  TF_RETURN_IF_ERROR(
+      computation->ReplaceInstruction(add_result, fusion_instruction));
+
+  HloInstruction* fused_add =
+      fusion_instruction->fused_instructions_computation()->root_instruction();
+  HloInstruction* fused_dot = fusion_instruction->FuseInstruction(dot_result);
+
+  TF_RETURN_IF_ERROR(
+      computation->RemoveInstructionAndUnusedOperands(dot_result));
+
+  ComputationLayout computation_layout(computation->ComputeProgramShape());
+  *computation_layout.mutable_parameter_layout(0) =
+      ShapeLayout(LayoutUtil::GetWithDefaultLayout(dot_lhs_shape));
+  *computation_layout.mutable_parameter_layout(1) =
+      ShapeLayout(LayoutUtil::GetWithDefaultLayout(dot_shape));
+  *computation_layout.mutable_result_layout() =
+      ShapeLayout(LayoutUtil::GetWithDefaultLayout(dot_shape));
+
+  result.dot_lhs_fusion_param =
+      fusion_instruction->operand(fused_dot->operand(0)->parameter_number());
+  result.dot_rhs_fusion_param =
+      fusion_instruction->operand(fused_dot->operand(1)->parameter_number());
+  result.addend_fusion_param = fusion_instruction->operand(
+      fused_add->operand(1 - dot_operand_idx_in_add)->parameter_number());
+
+  cpu::CpuLayoutAssignment layout_assignment(&computation_layout);
+  TF_ASSIGN_OR_RETURN(result.layout_assignment_changed_something,
+                      layout_assignment.Run(module));
+
+  return result;
+}
+
+static void AssertCorrectLayoutForDotOutputFusion(
+    const HloComputation* computation,
+    const DotOutputFusionLayoutAssignmentResult& layout_assignment_result,
+    bool expect_col_major_dot_rhs) {
+  Layout expected_dot_rhs_layout = expect_col_major_dot_rhs
+                                       ? LayoutUtil::MakeLayout({0, 1})
+                                       : LayoutUtil::MakeLayout({1, 0});
+  EXPECT_TRUE(LayoutUtil::Equal(
+      expected_dot_rhs_layout,
+      layout_assignment_result.dot_rhs_fusion_param->shape().layout()));
+
+  EXPECT_TRUE(LayoutUtil::Equal(
+      LayoutUtil::MakeLayout({1, 0}),
+      layout_assignment_result.dot_lhs_fusion_param->shape().layout()));
+
+  EXPECT_TRUE(LayoutUtil::Equal(
+      LayoutUtil::MakeLayout({1, 0}),
+      layout_assignment_result.addend_fusion_param->shape().layout()));
+  EXPECT_THAT(computation->instructions(), Each(Not(op::Copy())));
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_1x50x19_dot_idx_0) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/1, /*k=*/50, /*n=*/19,
+                         /*dot_operand_idx_in_add=*/0));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/true);
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_1x50x19_dot_idx_1) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/1, /*k=*/50, /*n=*/19,
+                         /*dot_operand_idx_in_add=*/1));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/true);
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_19x50x1_dot_idx_0) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/19, /*k=*/50, /*n=*/1,
+                         /*dot_operand_idx_in_add=*/0));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/false);
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_19x50x1_dot_idx_1) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/19, /*k=*/50, /*n=*/1,
+                         /*dot_operand_idx_in_add=*/1));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/false);
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_19x50x19_dot_idx_0) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/19, /*k=*/50, /*n=*/19,
+                         /*dot_operand_idx_in_add=*/0));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/false);
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_19x50x19_dot_idx_1) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/19, /*k=*/50, /*n=*/19,
+                         /*dot_operand_idx_in_add=*/1));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/false);
+}
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index 296e018c6f..0631454d5c 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -143,7 +143,8 @@ class ColumnMajorMatrixVectorProductEmitter {
   ColumnMajorMatrixVectorProductEmitter(PrimitiveType scalar_type,
                                         int64 tile_rows, int64 tile_cols,
                                         int64 m, int64 k, llvm::Value* lhs,
-                                        llvm::Value* rhs, llvm::Value* result,
+                                        llvm::Value* rhs, llvm::Value* addend,
+                                        llvm::Value* result,
                                         llvm::IRBuilder<>* ir_builder)
       : scalar_type_(scalar_type),
         tile_rows_(tile_rows),
@@ -152,6 +153,7 @@ class ColumnMajorMatrixVectorProductEmitter {
         k_(k),
         lhs_(lhs),
         rhs_(rhs),
+        addend_(addend),
         result_(result),
         ir_builder_(ir_builder),
         ksl_(ir_builder_),
@@ -198,6 +200,7 @@ class ColumnMajorMatrixVectorProductEmitter {
   int64 k_;
   llvm::Value* lhs_;
   llvm::Value* rhs_;
+  llvm::Value* addend_;
   llvm::Value* result_;
   llvm::IRBuilder<>* ir_builder_;
   KernelSupportLibrary ksl_;
@@ -242,9 +245,10 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopTiled(
            /*step=*/tile_rows_, [&](llvm::Value* row) {
              std::vector<llvm::Value*> lhs_tile =
                  lhs_tile_loader->LoadTile(/*minor_dim_offset=*/row);
-             llvm::Value* accumulator = is_first_column
-                                            ? vsl_.GetZeroVector()
-                                            : vsl_.LoadVector(result_, row);
+             llvm::Value* accumulator =
+                 is_first_column ? (addend_ ? vsl_.LoadVector(addend_, row)
+                                            : vsl_.GetZeroVector())
+                                 : vsl_.LoadVector(result_, row);
              for (int i = 0; i < columns; i++) {
                accumulator = vsl_.MulAdd(lhs_tile[i], rhs_tile[i], accumulator);
              }
@@ -288,7 +292,18 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue(
                   ir_builder_->getInt1(is_first_tiled_column));
               ksl_.If(
                   setting_result_first_time,
-                  [&]() { vsl_.StoreScalar(product, result_, scalar_row); },
+                  /*true_block_generator=*/
+                  [&]() {
+                    if (addend_) {
+                      vsl_.StoreScalar(
+                          vsl_.Add(vsl_.LoadScalar(addend_, scalar_row),
+                                   product),
+                          result_, scalar_row);
+                    } else {
+                      vsl_.StoreScalar(product, result_, scalar_row);
+                    }
+                  },
+                  /*false_block_generator=*/
                   [&]() {
                     vsl_.StoreScalar(
                         vsl_.Add(vsl_.LoadScalar(result_, scalar_row), product),
@@ -353,7 +368,7 @@ class RowMajorMatrixVectorProductEmitter {
   RowMajorMatrixVectorProductEmitter(PrimitiveType scalar_type, int64 tile_rows,
                                      int64 tile_cols, int64 m, int64 k,
                                      llvm::Value* lhs, llvm::Value* rhs,
-                                     llvm::Value* result,
+                                     llvm::Value* addend, llvm::Value* result,
                                      llvm::IRBuilder<>* ir_builder)
       : scalar_type_(scalar_type),
         tile_rows_(tile_rows),
@@ -362,6 +377,7 @@ class RowMajorMatrixVectorProductEmitter {
         k_(k),
         lhs_(lhs),
         rhs_(rhs),
+        addend_(addend),
         result_(result),
         ir_builder_(ir_builder),
         ksl_(ir_builder_),
@@ -394,6 +410,7 @@ class RowMajorMatrixVectorProductEmitter {
   int64 k_;
   llvm::Value* lhs_;
   llvm::Value* rhs_;
+  llvm::Value* addend_;
   llvm::Value* result_;
   llvm::IRBuilder<>* ir_builder_;
   KernelSupportLibrary ksl_;
@@ -420,13 +437,27 @@ void RowMajorMatrixVectorProductEmitter::EmitOuterLoopBody(llvm::Value* row,
       vector_accumulators.begin(), vector_accumulators.end(),
       std::back_inserter(accumulator_values),
       [](const VectorVariable& vector_var) { return vector_var.Get(); });
-  std::vector<llvm::Value*> horizontal_sums =
-      vsl_.ComputeHorizontalSums(std::move(accumulator_values));
+
+  std::vector<llvm::Value*> horizontal_sums;
+  if (row_count == vsl_.vector_size()) {
+    if (addend_) {
+      horizontal_sums = vsl_.ComputeHorizontalSums(
+          std::move(accumulator_values), vsl_.LoadVector(addend_, row));
+    } else {
+      horizontal_sums =
+          vsl_.ComputeHorizontalSums(std::move(accumulator_values));
+    }
+  } else {
+    horizontal_sums = vsl_.ComputeHorizontalSums(std::move(accumulator_values));
+  }
 
   for (int i = 0; i < row_count; i++) {
     llvm::Value* result_value =
         vsl_.Add(horizontal_sums[i], scalar_accumulators[i].Get());
     llvm::Value* offset = ir_builder_->CreateAdd(ir_builder_->getInt64(i), row);
+    if (addend_ && row_count != vsl_.vector_size()) {
+      result_value = vsl_.Add(vsl_.LoadScalar(addend_, offset), result_value);
+    }
     vsl_.StoreScalar(result_value, result_, offset);
   }
 }
@@ -490,20 +521,19 @@ void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue(
 
 }  // namespace
 
-DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs,
-                           bool transpose_rhs,
-                           const llvm_ir::IrArray& target_array,
-                           const llvm_ir::IrArray& lhs_array,
-                           const llvm_ir::IrArray& rhs_array,
-                           llvm::Value* executable_run_options_value,
-                           llvm::IRBuilder<>* ir_builder,
-                           const HloModuleConfig& hlo_module_config)
+DotOpEmitter::DotOpEmitter(
+    const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs,
+    const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array,
+    const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array,
+    llvm::Value* executable_run_options_value, llvm::IRBuilder<>* ir_builder,
+    const HloModuleConfig& hlo_module_config)
     : dot_(dot),
       transpose_lhs_(transpose_lhs),
       transpose_rhs_(transpose_rhs),
       target_array_(target_array),
       lhs_array_(lhs_array),
       rhs_array_(rhs_array),
+      addend_array_(addend_array),
       executable_run_options_value_(executable_run_options_value),
       ir_builder_(ir_builder),
       hlo_module_config_(hlo_module_config) {}
@@ -511,14 +541,15 @@ DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs,
 /* static */ tensorflow::Status DotOpEmitter::EmitDotOperation(
     const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs,
     const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array,
-    const llvm_ir::IrArray& rhs_array,
+    const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array,
     llvm::Value* executable_run_options_value, llvm::IRBuilder<>* ir_builder,
     const HloModuleConfig& hlo_module_config) {
   PrimitiveType type = target_array.GetShape().element_type();
   TF_RET_CHECK(F32 == type || F64 == type || C64 == type);
   DotOpEmitter dot_emitter(dot, transpose_lhs, transpose_rhs, target_array,
-                           lhs_array, rhs_array, executable_run_options_value,
-                           ir_builder, hlo_module_config);
+                           lhs_array, rhs_array, addend_array,
+                           executable_run_options_value, ir_builder,
+                           hlo_module_config);
   return dot_emitter.Emit();
 }
 
@@ -601,17 +632,19 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() {
 
     string kernel_name = tensorflow::strings::StrCat(
         "col_major_gemv_", PrimitiveType_Name(primitive_type), "_", tile_rows,
-        "_", tile_cols, "_", m, "_", k);
+        "_", tile_cols, "_", m, "_", k, addend_array_ ? "_with_addend" : "");
 
     KernelSupportLibrary::EmitAndCallOutlinedKernel(
         /*enable_fast_math=*/enable_fast_math,
         /*optimize_for_size=*/optimize_for_size, ir_builder_, kernel_name,
-        lhs_op, rhs_op, result_op,
+        lhs_op, rhs_op,
+        addend_array_ ? addend_array_->GetBasePointer() : nullptr, result_op,
         [this, tile_rows, tile_cols, m, k, primitive_type](
-            llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* result_op) {
+            llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* addend_op,
+            llvm::Value* result_op) {
           ColumnMajorMatrixVectorProductEmitter emitter(
               primitive_type, tile_rows, tile_cols, m, k, lhs_op, rhs_op,
-              result_op, ir_builder_);
+              addend_op, result_op, ir_builder_);
           emitter.Emit();
         });
   } else {
@@ -622,17 +655,19 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() {
 
     string kernel_name = tensorflow::strings::StrCat(
         "row_major_gemv_", PrimitiveType_Name(primitive_type), "_", tile_rows,
-        "_", tile_cols, "_", m, "_", k);
+        "_", tile_cols, "_", m, "_", k, addend_array_ ? "_with_addend" : "");
 
     KernelSupportLibrary::EmitAndCallOutlinedKernel(
         /*enable_fast_math=*/enable_fast_math,
         /*optimize_for_size=*/optimize_for_size, ir_builder_, kernel_name,
-        lhs_op, rhs_op, result_op,
+        lhs_op, rhs_op,
+        addend_array_ ? addend_array_->GetBasePointer() : nullptr, result_op,
         [this, tile_rows, tile_cols, m, k, primitive_type](
-            llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* result_op) {
+            llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* addend_op,
+            llvm::Value* result_op) {
           RowMajorMatrixVectorProductEmitter emitter(
               primitive_type, tile_rows, tile_cols, m, k, lhs_op, rhs_op,
-              result_op, ir_builder_);
+              addend_op, result_op, ir_builder_);
           emitter.Emit();
         });
   }
@@ -677,6 +712,8 @@ tensorflow::Status DotOpEmitter::Emit() {
     return Status::OK();
   }
 
+  CHECK_EQ(addend_array_, nullptr);
+
   if (PotentiallyImplementedAsEigenDot(dot_)) {
     return EmitCallToRuntime();
   }
@@ -1046,10 +1083,40 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) {
 
 // For vector-matrix dot products, it is always profitable to make the Rhs
 // column major.
-bool ProfitableToMakeDotRhsColumnMajor(const HloInstruction& hlo) {
-  return hlo.opcode() == HloOpcode::kDot &&
-         hlo.shape().dimensions_size() == 2 && hlo.shape().dimensions(0) == 1 &&
-         hlo.dot_dimension_numbers().rhs_contracting_dimensions(0) == 0;
+tensorflow::gtl::optional<int64> ProfitableToMakeDotOperandColumnMajor(
+    const HloInstruction& hlo) {
+  if (hlo.opcode() == HloOpcode::kDot && hlo.shape().dimensions_size() == 2 &&
+      hlo.shape().dimensions(0) == 1) {
+    if (hlo.dot_dimension_numbers().rhs_contracting_dimensions(0) == 0) {
+      return 1;
+    }
+    return {};
+  }
+
+  if (hlo.opcode() == HloOpcode::kFusion &&
+      hlo.fusion_kind() == HloInstruction::FusionKind::kOutput) {
+    auto* fusion_root =
+        hlo.fused_instructions_computation()->root_instruction();
+    if (fusion_root->opcode() != HloOpcode::kAdd) {
+      return {};
+    }
+
+    for (auto* fusion_root_op : fusion_root->operands()) {
+      if (fusion_root_op->opcode() != HloOpcode::kDot) {
+        continue;
+      }
+      if (auto operand_num =
+              ProfitableToMakeDotOperandColumnMajor(*fusion_root_op)) {
+        auto* operand = fusion_root_op->operand(*operand_num);
+        if (operand->opcode() == HloOpcode::kParameter &&
+            operand->user_count() == 1) {
+          return operand->parameter_number();
+        }
+      }
+    }
+  }
+
+  return {};
 }
 
 bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot) {
diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h
index 2badb26f90..2118965a70 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h
@@ -32,9 +32,11 @@ namespace cpu {
 
 bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo);
 
-// Returns true to indicate that |hlo| is a dot, and that it is profitable to
-// switch the layout of the |hlo|'s RHS operand to column major.
-bool ProfitableToMakeDotRhsColumnMajor(const HloInstruction& hlo);
+// Returns the index for an operand to `hlo` that should ideally be column
+// major.  Returns nullopt if there is no such operand or if `hlo` is not a dot
+// or a fusion containing a dot.
+tensorflow::gtl::optional<int64> ProfitableToMakeDotOperandColumnMajor(
+    const HloInstruction& hlo);
 
 // Returns true to indicate that we can generate a tiled LLVM IR implementation
 // for |dot|.
@@ -47,10 +49,15 @@ class DotOpEmitter {
   // place the result in target_array. IR is emitted at current insert point of
   // the builder. Upon completion of the method, the insert point is set to the
   // end of all instructions emitted for this operation.
+  //
+  // If `addend_array` is not nullptr then it must be an array of the same
+  // dimensions as the result, and the result is computed as `addend_array` +
+  // dot(`lhs_array`, `rhs_array`).  A non-null `addend_array` is only supported
+  // for Matrix-vector products.
   static tensorflow::Status EmitDotOperation(
       const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs,
       const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array,
-      const llvm_ir::IrArray& rhs_array,
+      const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array,
       llvm::Value* executable_run_options_value, llvm::IRBuilder<>* ir_builder,
       const HloModuleConfig& hlo_module_config);
 
@@ -59,6 +66,7 @@ class DotOpEmitter {
                bool transpose_rhs, const llvm_ir::IrArray& target_array,
                const llvm_ir::IrArray& lhs_array,
                const llvm_ir::IrArray& rhs_array,
+               const llvm_ir::IrArray* addend_array,
                llvm::Value* executable_run_options_value,
                llvm::IRBuilder<>* ir_builder,
                const HloModuleConfig& hlo_module_config);
@@ -130,6 +138,7 @@ class DotOpEmitter {
   const llvm_ir::IrArray& target_array_;
   const llvm_ir::IrArray& lhs_array_;
   const llvm_ir::IrArray& rhs_array_;
+  const llvm_ir::IrArray* addend_array_;
   llvm::Value* executable_run_options_value_;
   llvm::IRBuilder<>* ir_builder_;
   const HloModuleConfig& hlo_module_config_;
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index c82a0c7ef4..a15baf7a4b 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -838,8 +838,8 @@ Status IrEmitter::HandleDot(HloInstruction* dot) {
   // Dot operation is complicated so we delegate to a helper class.
   return DotOpEmitter::EmitDotOperation(
       *dot, /*transpose_lhs=*/false, /*transpose_rhs=*/false, target_array,
-      lhs_array, rhs_array, GetExecutableRunOptionsArgument(), &ir_builder_,
-      hlo_module_config_);
+      lhs_array, rhs_array, /*addend_array=*/nullptr,
+      GetExecutableRunOptionsArgument(), &ir_builder_, hlo_module_config_);
 }
 
 Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
@@ -2172,8 +2172,8 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) {
     TF_RETURN_IF_ERROR(DotOpEmitter::EmitDotOperation(
         *root, root->operand(0)->IsRank2Transpose(),
         root->operand(1)->IsRank2Transpose(), target_array, lhs_array,
-        rhs_array, GetExecutableRunOptionsArgument(), &ir_builder_,
-        hlo_module_config_));
+        rhs_array, /*addend_array=*/nullptr, GetExecutableRunOptionsArgument(),
+        &ir_builder_, hlo_module_config_));
     return Status::OK();
   } else if (llvm_ir::CanEmitFusedDynamicUpdateSliceInPlace(fusion,
                                                             assignment_)) {
@@ -2194,6 +2194,35 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) {
     TF_RETURN_IF_ERROR(fusion->fused_expression_root()->Accept(&fused_emitter));
 
     return EmitTargetElementLoop(fusion, fused_emitter.GetRootGenerator());
+  } else if (fusion->fusion_kind() == HloInstruction::FusionKind::kOutput) {
+    VLOG(3) << "HandleFusion kOutput";
+    int64 dot_op_index = root->operand(0)->opcode() == HloOpcode::kDot ? 0 : 1;
+    const HloInstruction* dot = root->operand(dot_op_index);
+    CHECK_EQ(dot->opcode(), HloOpcode::kDot)
+        << dot->ToString() << "  "
+        << fusion->fused_instructions_computation()->ToString();
+
+    int64 dot_lhs_param_number = dot->operand(0)->parameter_number();
+    int64 dot_rhs_param_number = dot->operand(1)->parameter_number();
+    int64 addend_param_number =
+        root->operand(1 - dot_op_index)->parameter_number();
+
+    Shape target_shape = fusion->shape();
+    TF_RETURN_IF_ERROR(EmitTargetAddressForOp(fusion));
+    llvm_ir::IrArray target_array = GetIrArrayFor(fusion);
+
+    llvm_ir::IrArray lhs_array(
+        GetIrArrayFor(fusion->operand(dot_lhs_param_number)));
+    llvm_ir::IrArray rhs_array(
+        GetIrArrayFor(fusion->operand(dot_rhs_param_number)));
+    llvm_ir::IrArray addend_array(
+        GetIrArrayFor(fusion->operand(addend_param_number)));
+
+    TF_RETURN_IF_ERROR(DotOpEmitter::EmitDotOperation(
+        *dot, /*transpose_lhs=*/false, /*transpose_rhs=*/false, target_array,
+        lhs_array, rhs_array, &addend_array, GetExecutableRunOptionsArgument(),
+        &ir_builder_, hlo_module_config_));
+    return Status::OK();
   } else {
     return Unimplemented("Fusion kind not implemented on CPU");
   }
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
index d68d699d7e..5f6f9810c3 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
@@ -73,10 +73,23 @@ void KernelSupportLibrary::EmitAndCallOutlinedKernel(
   llvm::Module* module = ir_builder->GetInsertBlock()->getModule();
   llvm::Function* function =
       module->getFunction(llvm_ir::AsStringRef(kernel_name));
+
+  int64 null_arg_idx = -1;
+  std::vector<llvm::Value*> sanitized_args;
+  sanitized_args.reserve(arguments.size());
+  for (int64 i = 0, e = arguments.size(); i < e; i++) {
+    if (arguments[i]) {
+      sanitized_args.push_back(arguments[i]);
+    } else {
+      CHECK_EQ(null_arg_idx, -1);
+      null_arg_idx = i;
+    }
+  }
+
   if (!function) {
     VLOG(2) << "Generating kernel for " << kernel_name;
     std::vector<llvm::Type*> arg_types;
-    std::transform(arguments.begin(), arguments.end(),
+    std::transform(sanitized_args.begin(), sanitized_args.end(),
                    std::back_inserter(arg_types),
                    [](llvm::Value* arg) { return arg->getType(); });
 
@@ -100,12 +113,15 @@ void KernelSupportLibrary::EmitAndCallOutlinedKernel(
     std::vector<llvm::Value*> arg_values;
     std::transform(function->arg_begin(), function->arg_end(),
                    std::back_inserter(arg_values), std::addressof<llvm::Value>);
+    if (null_arg_idx != -1) {
+      arg_values.insert(arg_values.begin() + null_arg_idx, nullptr);
+    }
     kernel_body_generator(arg_values);
   } else {
     VLOG(3) << "Re-using kernel for " << kernel_name;
   }
 
-  ir_builder->CreateCall(function, llvm_ir::AsArrayRef(arguments));
+  ir_builder->CreateCall(function, llvm_ir::AsArrayRef(sanitized_args));
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
index 150a464c66..827e092a3f 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
@@ -133,13 +133,18 @@ class KernelSupportLibrary {
   // If a function called `kernel_name` is already present in the module then
   // that function is re-used.  In that sense we're using the llvm::Module as a
   // cache of outlined kernels, keyed by function name.
+  //
+  // If any of the values in `arguments` is nullptr (i.e. a nullptr
+  // llvm::Value*) then we ignore it when generating LLVM IR, and instead pass
+  // in a nullptr llvm::Value* in its position to `kernel_body_generator`.
+  // Currently we only support at most one nullptr value in `arguments`.
   static void EmitAndCallOutlinedKernel(
       bool enable_fast_math, bool optimize_for_size,
       llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
       ArgumentVector arguments,
       const std::function<void(ArgumentVector)>& kernel_body_generator);
 
-  // Thin wrapper around the more general EmitAndCallOutlinedKernel above.
+  // Thin wrappers around the more general EmitAndCallOutlinedKernel above.
   static void EmitAndCallOutlinedKernel(
       bool enable_fast_math, bool optimize_for_size,
       llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
@@ -153,6 +158,20 @@ class KernelSupportLibrary {
         });
   }
 
+  static void EmitAndCallOutlinedKernel(
+      bool enable_fast_math, bool optimize_for_size,
+      llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
+      llvm::Value* arg0, llvm::Value* arg1, llvm::Value* arg2,
+      llvm::Value* arg3,
+      const std::function<void(llvm::Value*, llvm::Value*, llvm::Value*,
+                               llvm::Value*)>& kernel_body_generator) {
+    EmitAndCallOutlinedKernel(
+        enable_fast_math, optimize_for_size, ir_builder, kernel_name,
+        {arg0, arg1, arg2, arg3}, [&](ArgumentVector args) {
+          kernel_body_generator(args[0], args[1], args[2], args[3]);
+        });
+  }
+
  private:
   llvm::IRBuilder<>* ir_builder_;
   bool prevent_unrolling_;
diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc
index 59e8296078..0f6d8483da 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc
@@ -205,22 +205,28 @@ llvm::Value* VectorSupportLibrary::ExtractHighHalf(llvm::Value* vector) {
 }
 
 std::vector<llvm::Value*> VectorSupportLibrary::ComputeHorizontalSums(
-    std::vector<llvm::Value*> vectors) {
+    std::vector<llvm::Value*> vectors, llvm::Value* init_values) {
   // TODO(sanjoy): Move this magic constant to TargetMachineFeatures.
   const int kAvxVectorWidth = 8;
   if (vector_size() == kAvxVectorWidth && vectors.size() == kAvxVectorWidth) {
-    return ComputeAvxOptimizedHorizontalSums(std::move(vectors));
+    return ComputeAvxOptimizedHorizontalSums(std::move(vectors), init_values);
   }
 
   std::vector<llvm::Value*> result;
   std::transform(vectors.begin(), vectors.end(), std::back_inserter(result),
                  [this](llvm::Value* vector) { return AddReduce(vector); });
+  if (init_values) {
+    for (int64 i = 0, e = result.size(); i < e; i++) {
+      result[i] = Add(result[i], ir_builder()->CreateExtractElement(
+                                     init_values, ir_builder()->getInt32(i)));
+    }
+  }
   return result;
 }
 
 std::vector<llvm::Value*>
 VectorSupportLibrary::ComputeAvxOptimizedHorizontalSums(
-    std::vector<llvm::Value*> vectors) {
+    std::vector<llvm::Value*> vectors, llvm::Value* init_values) {
   while (vectors.size() != 2) {
     std::vector<llvm::Value*> new_vectors;
     for (int i = 0; i < vectors.size(); i += 2) {
@@ -232,8 +238,14 @@ VectorSupportLibrary::ComputeAvxOptimizedHorizontalSums(
 
   llvm::Value* low =
       AddInternal(ExtractLowHalf(vectors[0]), ExtractHighHalf(vectors[0]));
+  if (init_values) {
+    low = AddInternal(ExtractLowHalf(init_values), low);
+  }
   llvm::Value* high =
       AddInternal(ExtractLowHalf(vectors[1]), ExtractHighHalf(vectors[1]));
+  if (init_values) {
+    high = AddInternal(ExtractHighHalf(init_values), high);
+  }
 
   std::vector<llvm::Value*> results;
   for (int i = 0; i < 8; i++) {
diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h
index f4c7a6a420..f404687ab6 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h
@@ -113,9 +113,10 @@ class VectorSupportLibrary {
 
   // Compute the horizontal sum of each vector in `vectors`.  The i'th element
   // in the result vector is the (scalar) horizontal sum of the i'th vector in
-  // `vectors`.
+  // `vectors`.  If `init_values` is not nullptr then the value in the i'th lane
+  // in `init_values` is added to the i'th horizontal sum.
   std::vector<llvm::Value*> ComputeHorizontalSums(
-      std::vector<llvm::Value*> vectors);
+      std::vector<llvm::Value*> vectors, llvm::Value* init_values = nullptr);
 
   llvm::Value* GetZeroVector();
   llvm::Value* GetZeroScalar();
@@ -155,7 +156,7 @@ class VectorSupportLibrary {
   llvm::Value* AvxStyleHorizontalAdd(llvm::Value* lhs, llvm::Value* rhs);
 
   std::vector<llvm::Value*> ComputeAvxOptimizedHorizontalSums(
-      std::vector<llvm::Value*> vectors);
+      std::vector<llvm::Value*> vectors, llvm::Value* init_values);
 
   int64 vector_size_;
   PrimitiveType primitive_type_;
diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index 8f11029c10..bb7af4c4b8 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc
@@ -53,6 +53,8 @@ class DotOperationTest : public ClientLibraryTestBase {
                               bool rhs_row_major = false);
   void TestMatrixDot(int M, int K, int N, bool lhs_row_major = false,
                      bool rhs_row_major = false);
+  void TestMatrixDotWithAdd(int M, int K, int N, bool dot_lhs_row_major,
+                            bool dot_rhs_row_major, bool addend_row_major);
 };
 
 XLA_TEST_F(DotOperationTest, ZeroElementVectorDotF32) {
@@ -229,6 +231,54 @@ void DotOperationTest::TestMatrixDot(int M, int K, int N, bool lhs_row_major,
                              ErrorSpec(0.3, 3e-3));
 }
 
+void DotOperationTest::TestMatrixDotWithAdd(int M, int K, int N,
+                                            bool dot_lhs_row_major,
+                                            bool dot_rhs_row_major,
+                                            bool addend_row_major) {
+  std::unique_ptr<Array2D<float>> dot_lhs_data =
+      MakeLinspaceArray2D(0.0, 1.0, M, K);
+  std::unique_ptr<Literal> dot_lhs_lit = Literal::CreateR2FromArray2DWithLayout(
+      *dot_lhs_data,
+      LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(dot_lhs_row_major)));
+  auto dot_lhs_handle =
+      client_->TransferToServer(*dot_lhs_lit).ConsumeValueOrDie();
+
+  std::unique_ptr<Array2D<float>> dot_rhs_data =
+      MakeLinspaceArray2D(0.0, 1.0, K, N);
+  std::unique_ptr<Literal> dot_rhs_lit = Literal::CreateR2FromArray2DWithLayout(
+      *dot_rhs_data,
+      LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(dot_rhs_row_major)));
+  auto dot_rhs_handle =
+      client_->TransferToServer(*dot_rhs_lit).ConsumeValueOrDie();
+
+  std::unique_ptr<Array2D<float>> addend_data =
+      MakeLinspaceArray2D(0.0, 1.0, M, N);
+  std::unique_ptr<Literal> addend_lit = Literal::CreateR2FromArray2DWithLayout(
+      *addend_data,
+      LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(addend_row_major)));
+  auto addend_handle =
+      client_->TransferToServer(*addend_lit).ConsumeValueOrDie();
+
+  ComputationBuilder builder(client_, TestName());
+  auto prim_type = primitive_util::NativeToPrimitiveType<float>();
+  auto result = builder.Add(
+      builder.Dot(builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {M, K}),
+                                    "dot_lhs"),
+                  builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {K, N}),
+                                    "dot_rhs")),
+      builder.Parameter(2, ShapeUtil::MakeShape(prim_type, {M, N}), "addend"));
+
+  std::unique_ptr<Array2D<float>> expected = ReferenceUtil::ApplyElementwise2D(
+      std::plus<float>(),
+      *ReferenceUtil::MatmulArray2D(*dot_lhs_data, *dot_rhs_data),
+      *addend_data);
+
+  ComputeAndCompareR2<float>(
+      &builder, *expected,
+      {dot_lhs_handle.get(), dot_rhs_handle.get(), addend_handle.get()},
+      ErrorSpec(0.3, 3e-3));
+}
+
 XLA_TEST_F(DotOperationTest, MatrixDotF32_12_117_7_MinorToMajorTF) {
   TestMatrixDot(12, 117, 7, true, false);
 }
@@ -333,6 +383,96 @@ XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1_FT) {
   TestMatrixDot(259, 258, 1, false, true);
 }
 
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x8x8) {
+  TestMatrixDotWithAdd(1, 8, 8, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x130x8) {
+  TestMatrixDotWithAdd(1, 130, 8, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x8x130) {
+  TestMatrixDotWithAdd(1, 8, 130, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x290x130) {
+  TestMatrixDotWithAdd(1, 290, 130, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_2x1x1) {
+  TestMatrixDotWithAdd(2, 1, 1, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_8x8x1) {
+  TestMatrixDotWithAdd(8, 8, 1, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_16x1x1) {
+  TestMatrixDotWithAdd(16, 1, 1, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_16x3x1) {
+  TestMatrixDotWithAdd(16, 3, 1, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_3x3x1) {
+  TestMatrixDotWithAdd(3, 3, 1, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_29x29x1) {
+  TestMatrixDotWithAdd(29, 29, 1, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x8x2) {
+  TestMatrixDotWithAdd(1, 8, 2, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x2x8) {
+  TestMatrixDotWithAdd(1, 2, 8, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1) {
+  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1_FTT) {
+  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/false,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1_FFT) {
+  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/false,
+                       /*dot_rhs_row_major=*/false, /*addend_row_major=*/true);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1_FFF) {
+  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/false,
+                       /*dot_rhs_row_major=*/false, /*addend_row_major=*/false);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1_TFF) {
+  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/false, /*addend_row_major=*/false);
+}
+
+XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1_TTF) {
+  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/true,
+                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/false);
+}
+
 XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) {
   TestSquareMatrixDot<float>(false, false);
 }
diff --git a/tensorflow/core/lib/gtl/iterator_range.h b/tensorflow/core/lib/gtl/iterator_range.h
index e7fea7579d..0ba4587fde 100644
--- a/tensorflow/core/lib/gtl/iterator_range.h
+++ b/tensorflow/core/lib/gtl/iterator_range.h
@@ -37,6 +37,10 @@ namespace gtl {
 template <typename IteratorT>
 class iterator_range {
  public:
+  using value_type = decltype(*std::declval<IteratorT>());
+  using iterator = IteratorT;
+  using const_iterator = IteratorT;
+
   iterator_range() : begin_iterator_(), end_iterator_() {}
   iterator_range(IteratorT begin_iterator, IteratorT end_iterator)
       : begin_iterator_(std::move(begin_iterator)),
-- 
GitLab


From 937604abad720291e62635b6623b26ab5fb51a7e Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Wed, 13 Dec 2017 16:19:56 -0800
Subject: [PATCH 0985/1225] Avoid unnecessary layout transpose to input of
 ShapeN.

PiperOrigin-RevId: 178974641
---
 .../grappler/optimizers/layout_optimizer.cc   | 13 ++++++++-----
 .../optimizers/layout_optimizer_test.cc       | 19 +++++++++++++++++++
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 40c5cbe3b8..a6172c5fa0 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -906,12 +906,12 @@ class AgnosticNodeProcessor : public NodeProcessor {
            IsNodeAfterNCHWToNHWC() && IsOnGPU();
   }
 
-  bool IsNodeAfterNCHWToNHWC() const {
+  bool IsNodeAfterNCHWToNHWC(const NodeDef& node) const {
     std::set<string> ops_format_agnostic = GetOpsFormatAgnostic();
     std::deque<NodeDef*> queue;
-    auto first_node_pos = DataInputPos(*node_);
-    for (const auto& pos : first_node_pos) {
-      auto input_node = node_map_->GetNode(node_->input(pos));
+    auto data_node_pos = DataInputPos(node);
+    for (const auto& pos : data_node_pos) {
+      auto input_node = node_map_->GetNode(node.input(pos));
       queue.push_back(input_node);
     }
     // The code will exit this while loop in one iteration in most cases, as the
@@ -936,6 +936,8 @@ class AgnosticNodeProcessor : public NodeProcessor {
     return false;
   }
 
+  bool IsNodeAfterNCHWToNHWC() const { return IsNodeAfterNCHWToNHWC(*node_); }
+
  private:
   std::vector<int> DataInputPos(const NodeDef& node) const {
     if (IsSplit(node)) {
@@ -1214,7 +1216,8 @@ class ShapeProcessor : public AgnosticNodeProcessor {
     std::vector<int> input_pos;
     for (int i = 0; i < node_->input_size(); i++) {
       auto input = node_map_->GetNode(node_->input(i));
-      if (IsDimsFour(*input)) {
+      if (IsDimsFour(*input) &&
+          (IsNodeAfterNCHWToNHWC(*input) || IsNodeNCHWToNHWC(input->name()))) {
         input_pos.push_back(i);
       }
     }
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index b1d841a99b..9a49319821 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -983,6 +983,25 @@ TEST_F(LayoutOptimizerTest, ShapeNWithInputsVectorAnd4D) {
   EXPECT_EQ(vec_permute->op(), "DataFormatVecPermute");
 }
 
+TEST_F(LayoutOptimizerTest, ShapeNWithInputs4DAndNoNeedToTransform4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto tensor_4d = ops::Const(s.WithOpName("tensor_4d"), 3.0f, {1, 1, 1, 3});
+  auto i1 = ops::Identity(s.WithOpName("i1"), tensor_4d);
+  Output i2 = ops::Identity(s.WithOpName("i2"), i1);
+  auto shapen = ops::ShapeN(s.WithOpName("shapen"), {conv, i2});
+  auto add = ops::Add(s.WithOpName("add"), shapen[0], shapen[1]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto shapen_node = node_map.GetNode("shapen");
+  EXPECT_EQ(shapen_node->input(0), "Conv2D");
+  EXPECT_EQ(shapen_node->input(1), "i2");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From f675c122de5f336b3eb49bb0a74e0a0e7eb890fc Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Wed, 13 Dec 2017 16:41:58 -0800
Subject: [PATCH 0986/1225] Update tf.keras to the 2.1.2 API.

PiperOrigin-RevId: 178977412
---
 .../python/keras/_impl/keras/__init__.py      |   2 +-
 .../keras/applications/imagenet_utils.py      |  97 +++++++-
 .../keras/applications/imagenet_utils_test.py |  65 +++++-
 .../keras/applications/inception_resnet_v2.py |  16 +-
 .../_impl/keras/applications/inception_v3.py  |  16 +-
 .../_impl/keras/applications/mobilenet.py     |  41 ++--
 .../_impl/keras/applications/resnet50.py      |  16 +-
 .../keras/_impl/keras/applications/vgg16.py   |  16 +-
 .../keras/_impl/keras/applications/vgg19.py   |  18 +-
 .../_impl/keras/applications/xception.py      |  19 +-
 .../python/keras/_impl/keras/backend.py       |   4 +-
 .../python/keras/_impl/keras/callbacks.py     |   1 +
 .../python/keras/_impl/keras/datasets/imdb.py |   2 +-
 .../keras/_impl/keras/engine/topology.py      |   3 +-
 .../keras/_impl/keras/engine/training.py      | 208 +++++++++++-------
 .../keras/_impl/keras/engine/training_test.py |  14 ++
 .../python/keras/_impl/keras/layers/core.py   |  79 ++++++-
 .../keras/_impl/keras/layers/core_test.py     |  28 +++
 tensorflow/python/keras/_impl/keras/losses.py |   6 +-
 tensorflow/python/keras/_impl/keras/models.py |  18 +-
 .../keras/_impl/keras/preprocessing/image.py  |  28 ++-
 .../keras/_impl/keras/utils/data_utils.py     |  45 +++-
 .../_impl/keras/utils/data_utils_test.py      |   4 +-
 .../keras/_impl/keras/utils/generic_utils.py  |  33 ++-
 .../keras/_impl/keras/utils/np_utils.py       |   2 +-
 .../keras/_impl/keras/utils/np_utils_test.py  |   5 +-
 .../keras/_impl/keras/utils/training_utils.py |  40 ++--
 .../_impl/keras/utils/training_utils_test.py  |  23 ++
 .../_impl/keras/wrappers/scikit_learn.py      |  12 +-
 .../api/golden/tensorflow.keras.-model.pbtxt  |   6 +-
 .../golden/tensorflow.keras.-sequential.pbtxt |   6 +-
 .../tensorflow.keras.layers.-lambda.pbtxt     |   2 +-
 .../tensorflow.keras.models.-model.pbtxt      |   6 +-
 .../tensorflow.keras.models.-sequential.pbtxt |   6 +-
 ...processing.image.-directory-iterator.pbtxt |   2 +-
 ...ocessing.image.-image-data-generator.pbtxt |   2 +-
 36 files changed, 652 insertions(+), 239 deletions(-)

diff --git a/tensorflow/python/keras/_impl/keras/__init__.py b/tensorflow/python/keras/_impl/keras/__init__.py
index 74cc9d0488..a70250d796 100644
--- a/tensorflow/python/keras/_impl/keras/__init__.py
+++ b/tensorflow/python/keras/_impl/keras/__init__.py
@@ -40,4 +40,4 @@ from tensorflow.python.keras._impl.keras.layers import Input
 from tensorflow.python.keras._impl.keras.models import Model
 from tensorflow.python.keras._impl.keras.models import Sequential
 
-__version__ = '2.1.1-tf'
+__version__ = '2.1.2-tf'
diff --git a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py
index 58841e5db0..63ee83cb51 100644
--- a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py
+++ b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 
 import json
 
+import numpy as np
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.utils.data_utils import get_file
 from tensorflow.python.platform import tf_logging as logging
@@ -28,12 +30,15 @@ from tensorflow.python.platform import tf_logging as logging
 CLASS_INDEX = None
 CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json'
 
+# Global tensor of imagenet mean for preprocessing symbolic inputs
+_IMAGENET_MEAN = None
 
-def preprocess_input(x, data_format=None, mode='caffe'):
-  """Preprocesses a tensor encoding a batch of images.
+
+def _preprocess_numpy_input(x, data_format, mode):
+  """Preprocesses a image tensor as a Numpy array.
 
   Arguments:
-      x: input Numpy tensor, 4D.
+      x: input Numpy, 3D or 4D.
       data_format: data format of the image tensor.
       mode: One of "caffe", "tf".
           - caffe: will convert the images from RGB to BGR,
@@ -44,16 +49,11 @@ def preprocess_input(x, data_format=None, mode='caffe'):
               sample-wise.
 
   Returns:
-      Preprocessed tensor.
+      Preprocessed array.
   """
-  if data_format is None:
-    data_format = K.image_data_format()
-  assert data_format in {'channels_last', 'channels_first'}
-
   if mode == 'tf':
-    x /= 255.
-    x -= 0.5
-    x *= 2.
+    x /= 127.5
+    x -= 1.
     return x
 
   if data_format == 'channels_first':
@@ -79,6 +79,81 @@ def preprocess_input(x, data_format=None, mode='caffe'):
   return x
 
 
+def _preprocess_symbolic_input(x, data_format, mode):
+  """Preprocesses a symbolic image tensor.
+
+  Arguments:
+      x: symoblic tensor, 3D or 4D.
+      data_format: data format of the image tensor.
+      mode: One of "caffe", "tf".
+          - caffe: will convert the images from RGB to BGR,
+              then will zero-center each color channel with
+              respect to the ImageNet dataset,
+              without scaling.
+          - tf: will scale pixels between -1 and 1,
+              sample-wise.
+
+  Returns:
+      Preprocessed tensor.
+  """
+  global _IMAGENET_MEAN
+
+  if mode == 'tf':
+    x /= 127.5
+    x -= 1.
+    return x
+
+  if data_format == 'channels_first':
+    # 'RGB'->'BGR'
+    if K.ndim(x) == 3:
+      x = x[::-1, ...]
+    else:
+      x = x[:, ::-1, ...]
+  else:
+    # 'RGB'->'BGR'
+    x = x[..., ::-1]
+
+  if _IMAGENET_MEAN is None:
+    _IMAGENET_MEAN = K.constant(-np.array([103.939, 116.779, 123.68]))
+  # Zero-center by mean pixel
+  if K.dtype(x) != K.dtype(_IMAGENET_MEAN):
+    x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format)
+  else:
+    x = K.bias_add(x, _IMAGENET_MEAN, data_format)
+  return x
+
+
+def preprocess_input(x, data_format=None, mode='caffe'):
+  """Preprocesses a tensor encoding a batch of images.
+
+  Arguments:
+      x: input Numpy or symoblic tensor, 3D or 4D.
+      data_format: data format of the image tensor.
+      mode: One of "caffe", "tf".
+          - caffe: will convert the images from RGB to BGR,
+              then will zero-center each color channel with
+              respect to the ImageNet dataset,
+              without scaling.
+          - tf: will scale pixels between -1 and 1,
+              sample-wise.
+
+  Returns:
+      Preprocessed tensor.
+
+  Raises:
+      ValueError: in case of incorrect data_format.
+  """
+  if data_format is None:
+    data_format = K.image_data_format()
+  if data_format not in {'channels_first', 'channels_last'}:
+    raise ValueError('Unknown data_format ' + str(data_format))
+
+  if isinstance(x, np.ndarray):
+    return _preprocess_numpy_input(x, data_format=data_format, mode=mode)
+  else:
+    return _preprocess_symbolic_input(x, data_format=data_format, mode=mode)
+
+
 def decode_predictions(preds, top=5):
   """Decodes the prediction of an ImageNet model.
 
diff --git a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils_test.py b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils_test.py
index 517ba91219..d843dace59 100644
--- a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils_test.py
+++ b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.keras._impl import keras
+from tensorflow.python.keras._impl.keras.applications.imagenet_utils import preprocess_input
 from tensorflow.python.platform import test
 
 
@@ -29,22 +30,62 @@ class ImageNetUtilsTest(test.TestCase):
   def test_preprocess_input(self):
     # Test batch of images
     x = np.random.uniform(0, 255, (2, 10, 10, 3))
-    self.assertEqual(
-        keras.applications.imagenet_utils.preprocess_input(x).shape, x.shape)
-    out1 = keras.applications.imagenet_utils.preprocess_input(
-        x, 'channels_last')
-    out2 = keras.applications.imagenet_utils.preprocess_input(
-        np.transpose(x, (0, 3, 1, 2)), 'channels_first')
+    self.assertEqual(preprocess_input(x).shape, x.shape)
+    out1 = preprocess_input(x, 'channels_last')
+    out2 = preprocess_input(np.transpose(x, (0, 3, 1, 2)), 'channels_first')
     self.assertAllClose(out1, out2.transpose(0, 2, 3, 1))
 
     # Test single image
     x = np.random.uniform(0, 255, (10, 10, 3))
-    self.assertEqual(
-        keras.applications.imagenet_utils.preprocess_input(x).shape, x.shape)
-    out1 = keras.applications.imagenet_utils.preprocess_input(
-        x, 'channels_last')
-    out2 = keras.applications.imagenet_utils.preprocess_input(
-        np.transpose(x, (2, 0, 1)), 'channels_first')
+    self.assertEqual(preprocess_input(x).shape, x.shape)
+    out1 = preprocess_input(x, 'channels_last')
+    out2 = preprocess_input(np.transpose(x, (2, 0, 1)), 'channels_first')
+    self.assertAllClose(out1, out2.transpose(1, 2, 0))
+
+  def test_preprocess_input_symbolic(self):
+    # Test image batch
+    x = np.random.uniform(0, 255, (2, 10, 10, 3))
+    inputs = keras.layers.Input(shape=x.shape[1:])
+    outputs = keras.layers.Lambda(
+        preprocess_input, output_shape=x.shape[1:])(inputs)
+    model = keras.models.Model(inputs, outputs)
+    assert model.predict(x).shape == x.shape
+    # pylint: disable=g-long-lambda
+    outputs1 = keras.layers.Lambda(lambda x:
+                                   preprocess_input(x, 'channels_last'),
+                                   output_shape=x.shape[1:])(inputs)
+    model1 = keras.models.Model(inputs, outputs1)
+    out1 = model1.predict(x)
+    x2 = np.transpose(x, (0, 3, 1, 2))
+    inputs2 = keras.layers.Input(shape=x2.shape[1:])
+    # pylint: disable=g-long-lambda
+    outputs2 = keras.layers.Lambda(lambda x:
+                                   preprocess_input(x, 'channels_first'),
+                                   output_shape=x2.shape[1:])(inputs2)
+    model2 = keras.models.Model(inputs2, outputs2)
+    out2 = model2.predict(x2)
+    self.assertAllClose(out1, out2.transpose(0, 2, 3, 1))
+
+    # Test single image
+    x = np.random.uniform(0, 255, (10, 10, 3))
+    inputs = keras.layers.Input(shape=x.shape)
+    outputs = keras.layers.Lambda(preprocess_input,
+                                  output_shape=x.shape)(inputs)
+    model = keras.models.Model(inputs, outputs)
+    assert model.predict(x[np.newaxis])[0].shape == x.shape
+    # pylint: disable=g-long-lambda
+    outputs1 = keras.layers.Lambda(lambda x:
+                                   preprocess_input(x, 'channels_last'),
+                                   output_shape=x.shape)(inputs)
+    model1 = keras.models.Model(inputs, outputs1)
+    out1 = model1.predict(x[np.newaxis])[0]
+    x2 = np.transpose(x, (2, 0, 1))
+    inputs2 = keras.layers.Input(shape=x2.shape)
+    outputs2 = keras.layers.Lambda(lambda x:
+                                   preprocess_input(x, 'channels_first'),
+                                   output_shape=x2.shape)(inputs2)  # pylint: disable=g-long-lambda
+    model2 = keras.models.Model(inputs2, outputs2)
+    out2 = model2.predict(x2[np.newaxis])[0]
     self.assertAllClose(out1, out2.transpose(1, 2, 0))
 
   def test_obtain_input_shape(self):
diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
index de29b92575..c66b4b395e 100644
--- a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
+++ b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
@@ -23,6 +23,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.applications import imagenet_utils
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
@@ -208,8 +210,9 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or `'imagenet'` (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          "imagenet" (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -239,10 +242,11 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -365,5 +369,7 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
           cache_subdir='models',
           file_hash='d19885ff4a710c122648d3b5c3b684e4')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
 
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_v3.py b/tensorflow/python/keras/_impl/keras/applications/inception_v3.py
index d4fea4fbb0..4424b92804 100644
--- a/tensorflow/python/keras/_impl/keras/applications/inception_v3.py
+++ b/tensorflow/python/keras/_impl/keras/applications/inception_v3.py
@@ -29,6 +29,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import layers
 from tensorflow.python.keras._impl.keras.applications import imagenet_utils
@@ -118,8 +120,9 @@ def InceptionV3(include_top=True,
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          "imagenet" (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -151,10 +154,11 @@ def InceptionV3(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -383,6 +387,8 @@ def InceptionV3(include_top=True,
           cache_subdir='models',
           file_hash='bcbd6486424b2319ff4ef7d526e38f63')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
index 653bd8c09f..4d5ac72604 100644
--- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
+++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
@@ -67,7 +67,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import warnings
+import os
 
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import constraints
@@ -89,6 +89,7 @@ from tensorflow.python.keras._impl.keras.layers import Reshape
 from tensorflow.python.keras._impl.keras.models import Model
 from tensorflow.python.keras._impl.keras.utils import conv_utils
 from tensorflow.python.keras._impl.keras.utils.data_utils import get_file
+from tensorflow.python.platform import tf_logging as logging
 
 BASE_WEIGHT_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.6/'
 
@@ -348,8 +349,9 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
       dropout: dropout rate
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: `None` (random initialization) or
-          `imagenet` (ImageNet weights)
+      weights: one of `None` (random initialization),
+          "imagenet" (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of
           `layers.Input()`)
           to use as image input for the model.
@@ -378,16 +380,11 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
       RuntimeError: If attempting to run this model with a
           backend that does not support separable convolutions.
   """
-
-  if K.backend() != 'tensorflow':
-    raise RuntimeError('Only TensorFlow backend is currently supported, '
-                       'as other backends do not support '
-                       'depthwise convolution.')
-
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as ImageNet with `include_top` '
@@ -438,15 +435,15 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
                        ' Input shape provided = %s' % (input_shape,))
 
   if K.image_data_format() != 'channels_last':
-    warnings.warn('The MobileNet family of models is only available '
-                  'for the input data format "channels_last" '
-                  '(width, height, channels). '
-                  'However your settings specify the default '
-                  'data format "channels_first" (channels, width, height).'
-                  ' You should set `image_data_format="channels_last"` '
-                  'in your Keras config located at ~/.keras/keras.json. '
-                  'The model being returned right now will expect inputs '
-                  'to follow the "channels_last" data format.')
+    logging.warning('The MobileNet family of models is only available '
+                    'for the input data format "channels_last" '
+                    '(width, height, channels). '
+                    'However your settings specify the default '
+                    'data format "channels_first" (channels, width, height).'
+                    ' You should set `image_data_format="channels_last"` '
+                    'in your Keras config located at ~/.keras/keras.json. '
+                    'The model being returned right now will expect inputs '
+                    'to follow the "channels_last" data format.')
     K.set_image_data_format('channels_last')
     old_data_format = 'channels_first'
   else:
@@ -534,6 +531,8 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
       weigh_path = BASE_WEIGHT_PATH + model_name
       weights_path = get_file(model_name, weigh_path, cache_subdir='models')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
diff --git a/tensorflow/python/keras/_impl/keras/applications/resnet50.py b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
index 717b626fdc..f7cdf2be99 100644
--- a/tensorflow/python/keras/_impl/keras/applications/resnet50.py
+++ b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
@@ -26,6 +26,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import layers
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
@@ -161,8 +163,9 @@ def ResNet50(include_top=True,
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          "imagenet" (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -194,10 +197,11 @@ def ResNet50(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -283,4 +287,6 @@ def ResNet50(include_top=True,
           cache_subdir='models',
           md5_hash='a268eb855778b3df3c7506639542a6af')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg16.py b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
index a0862e6407..ab205aa689 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg16.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
@@ -25,6 +25,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions  # pylint: disable=unused-import
@@ -68,8 +70,9 @@ def VGG16(include_top=True,
   Arguments:
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+     weights: one of `None` (random initialization),
+          "imagenet" (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -101,10 +104,11 @@ def VGG16(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -211,4 +215,6 @@ def VGG16(include_top=True,
         dense = model.get_layer(name='fc1')
         layer_utils.convert_dense_weights_data_format(dense, shape,
                                                       'channels_first')
+  elif weights is not None:
+    model.load_weights(weights)
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg19.py b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
index cfa1c95336..5e5179f332 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg19.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
@@ -25,6 +25,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions  # pylint: disable=unused-import
@@ -68,8 +70,9 @@ def VGG19(include_top=True,
   Arguments:
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          "imagenet" (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -101,10 +104,11 @@ def VGG19(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -207,8 +211,6 @@ def VGG19(include_top=True,
           cache_subdir='models',
           file_hash='253f8cb515780f3b799900260a226db6')
     model.load_weights(weights_path)
-    if K.backend() == 'theano':
-      layer_utils.convert_all_kernels_in_model(model)
 
     if K.image_data_format() == 'channels_first':
       if include_top:
@@ -217,4 +219,6 @@ def VGG19(include_top=True,
         dense = model.get_layer(name='fc1')
         layer_utils.convert_dense_weights_data_format(dense, shape,
                                                       'channels_first')
+  elif weights is not None:
+    model.load_weights(weights)
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/xception.py b/tensorflow/python/keras/_impl/keras/applications/xception.py
index 14f6ad8090..a9efd5d64c 100644
--- a/tensorflow/python/keras/_impl/keras/applications/xception.py
+++ b/tensorflow/python/keras/_impl/keras/applications/xception.py
@@ -36,6 +36,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import layers
 from tensorflow.python.keras._impl.keras.applications import imagenet_utils
@@ -80,8 +82,9 @@ def Xception(include_top=True,
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          "imagenet" (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -114,18 +117,16 @@ def Xception(include_top=True,
       RuntimeError: If attempting to run this model with a
           backend that does not support separable convolutions.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
                      ' as true, `classes` should be 1000')
 
-  if K.backend() != 'tensorflow':
-    raise RuntimeError('The Xception model is only available with '
-                       'the TensorFlow backend.')
   if K.image_data_format() != 'channels_last':
     logging.warning(
         'The Xception model is only available for the '
@@ -297,6 +298,8 @@ def Xception(include_top=True,
           cache_subdir='models',
           file_hash='b0042744bf5b25fce3cb969f33bebb97')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py
index ec7a5dcffd..9476085bd8 100644
--- a/tensorflow/python/keras/_impl/keras/backend.py
+++ b/tensorflow/python/keras/_impl/keras/backend.py
@@ -3120,8 +3120,8 @@ def sparse_categorical_crossentropy(target, output, from_logits=False):
   logits = array_ops.reshape(output, [-1, int(output_shape[-1])])
   res = nn.sparse_softmax_cross_entropy_with_logits(
       labels=targets, logits=logits)
-  if len(output_shape) == 3:
-    # if our output includes timesteps we need to reshape
+  if len(output_shape) >= 3:
+    # If our output includes timesteps or spatial dimensions we need to reshape
     return array_ops.reshape(res, array_ops.shape(output)[:-1])
   else:
     return res
diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py
index 16109b52b3..8da3b85718 100644
--- a/tensorflow/python/keras/_impl/keras/callbacks.py
+++ b/tensorflow/python/keras/_impl/keras/callbacks.py
@@ -189,6 +189,7 @@ class Callback(object):
 
   def __init__(self):
     self.validation_data = None
+    self.model = None
 
   def set_params(self, params):
     self.params = params
diff --git a/tensorflow/python/keras/_impl/keras/datasets/imdb.py b/tensorflow/python/keras/_impl/keras/datasets/imdb.py
index 0e83473899..c5b3f0476b 100644
--- a/tensorflow/python/keras/_impl/keras/datasets/imdb.py
+++ b/tensorflow/python/keras/_impl/keras/datasets/imdb.py
@@ -43,7 +43,7 @@ def load_data(path='imdb.npz',
           the most frequent words are kept
       skip_top: skip the top N most frequently occurring words
           (which may not be informative).
-      maxlen: truncate sequences after this length.
+      maxlen: sequences longer than this will be filtered out.
       seed: random seed for sample shuffling.
       start_char: The start of a sequence will be marked with this character.
           Set to 1 because 0 is usually the padding character.
diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py
index 4a7bb2e838..0ccb172269 100644
--- a/tensorflow/python/keras/_impl/keras/engine/topology.py
+++ b/tensorflow/python/keras/_impl/keras/engine/topology.py
@@ -1426,10 +1426,11 @@ def preprocess_weights_for_loading(layer,
         weights[1] = np.transpose(weights[1], (3, 2, 0, 1))
 
   # convert the weights of CuDNNLSTM so that they could be loaded into LSTM
-  if layer.__class__.__name__ == 'LSTM':
+  if layer.__class__.__name__ == 'LSTM' and len(weights) == 3:
     # determine if we're loading a CuDNNLSTM layer from the number of bias
     # weights:
     # CuDNNLSTM has (units * 8) weights; while LSTM has (units * 4)
+    # if there's no bias weight in the file, skip this conversion
     units = weights[1].shape[0]
     bias = weights[2]
     if len(bias) == units * 8:
diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py
index b4205bf4a3..debea2503e 100644
--- a/tensorflow/python/keras/_impl/keras/engine/training.py
+++ b/tensorflow/python/keras/_impl/keras/engine/training.py
@@ -194,7 +194,7 @@ def _standardize_sample_or_class_weights(x_weight, output_names, weight_type):
     return x_weights
   else:
     raise TypeError('The model has multiple outputs, so `' + weight_type + '` '
-                    'should be either a list of a dict. '
+                    'should be either a list or a dict. '
                     'Provided `' + weight_type + '` type not understood: ' +
                     str(x_weight))
 
@@ -375,7 +375,7 @@ def _make_batches(size, batch_size):
   Returns:
       A list of tuples of array indices.
   """
-  num_batches = int(np.ceil(size / float(batch_size)))
+  num_batches = (size + batch_size - 1) // batch_size  # round up
   return [(i * batch_size, min(size, (i + 1) * batch_size))
           for i in range(num_batches)]
 
@@ -657,7 +657,7 @@ class Model(Network):
     elif isinstance(loss, list):
       if len(loss) != len(self.outputs):
         raise ValueError('When passing a list as loss, '
-                         'it should have one entry per model outputs. '
+                         'it should have one entry per model output. '
                          'The model has ' + str(len(self.outputs)) +
                          ' outputs, but you passed loss=' + str(loss))
       loss_functions = [losses.get(l) for l in loss]
@@ -700,7 +700,7 @@ class Model(Network):
     elif isinstance(loss_weights, list):
       if len(loss_weights) != len(self.outputs):
         raise ValueError('When passing a list as loss_weights, '
-                         'it should have one entry per model outputs. '
+                         'it should have one entry per model output. '
                          'The model has ' + str(len(self.outputs)) +
                          ' outputs, but you passed loss_weights=' +
                          str(loss_weights))
@@ -716,7 +716,7 @@ class Model(Network):
       if isinstance(target_tensors, list):
         if len(target_tensors) != len(self.outputs):
           raise ValueError('When passing a list as `target_tensors`, '
-                           'it should have one entry per model outputs. '
+                           'it should have one entry per model output. '
                            'The model has ' + str(len(self.outputs)) +
                            ' outputs, but you passed target_tensors=' +
                            str(target_tensors))
@@ -789,7 +789,7 @@ class Model(Network):
     elif isinstance(sample_weight_mode, list):
       if len(sample_weight_mode) != len(self.outputs):
         raise ValueError('When passing a list as sample_weight_mode, '
-                         'it should have one entry per model outputs. '
+                         'it should have one entry per model output. '
                          'The model has ' + str(len(self.outputs)) +
                          ' outputs, but you passed '
                          'sample_weight_mode=' + str(sample_weight_mode))
@@ -1414,6 +1414,13 @@ class Model(Network):
                                      self._feed_loss_fns):
       if loss_fn is losses.sparse_categorical_crossentropy:
         output_shapes.append(output_shape[:-1] + (1,))
+      elif (not hasattr(loss_fn, '__name__') or
+            getattr(losses, loss_fn.__name__, None) is None):
+        # If `loss_fn` is not a function (e.g. callable class)
+        # or if it not in the `losses` module, then
+        # it is a user-defined loss and we make no assumptions
+        # about it.
+        output_shapes.append(None)
       else:
         output_shapes.append(output_shape)
     x = _standardize_input_data(
@@ -1919,7 +1926,7 @@ class Model(Network):
 
   def fit_generator(self,
                     generator,
-                    steps_per_epoch,
+                    steps_per_epoch=None,
                     epochs=1,
                     verbose=1,
                     callbacks=None,
@@ -1956,7 +1963,9 @@ class Model(Network):
             to yield from `generator` before declaring one epoch
             finished and starting the next epoch. It should typically
             be equal to the number of unique samples of your dataset
-            divided by the batch size. Not used if using `Sequence`.
+            divided by the batch size.
+            Optional for `Sequence`: if unspecified, will use
+            `len(generator)` as a number of steps.
         epochs: Integer, total number of iterations on the data.
         verbose: Verbosity mode, 0, 1, or 2.
         callbacks: List of callbacks to be called during training.
@@ -1967,11 +1976,15 @@ class Model(Network):
         validation_steps: Only relevant if `validation_data`
             is a generator. Total number of steps (batches of samples)
             to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            `len(generator)` as a number of steps.
         class_weight: Dictionary mapping class indices to a weight
             for the class.
-        max_queue_size: Maximum size for the generator queue
-        workers: Maximum number of processes to spin up
-            when using process-based threading.
+        max_queue_size: Maximum size for the generator queue.
+        workers: Integer. Maximum number of processes to spin up
+            when using process based threading.
+            If unspecified, `workers` will default to 1. If 0, will
+            execute the generator on the main thread.
         use_multiprocessing: If True, use process based threading.
             Note that because
             this implementation relies on multiprocessing,
@@ -2031,15 +2044,33 @@ class Model(Network):
     if do_validation:
       self._make_test_function()
 
+    is_sequence = isinstance(generator, Sequence)
+    if not is_sequence and use_multiprocessing and workers > 1:
+      logging.warning('Using a generator with `use_multiprocessing=True`'
+                      ' and multiple workers may duplicate your data.'
+                      ' Please consider using the`keras.utils.Sequence'
+                      ' class.')
+    if steps_per_epoch is None:
+      if is_sequence:
+        steps_per_epoch = len(generator)
+      else:
+        raise ValueError('`steps_per_epoch=None` is only valid for a'
+                         ' generator based on the `keras.utils.Sequence`'
+                         ' class. Please specify `steps_per_epoch` or use'
+                         ' the `keras.utils.Sequence` class.')
+
     # python 2 has 'next', 3 has '__next__'
     # avoid any explicit version checks
-    val_gen = (hasattr(validation_data, 'next') or
-               hasattr(validation_data, '__next__') or
-               isinstance(validation_data, Sequence))
-    if val_gen and not validation_steps:
-      raise ValueError('When using a generator for validation data, '
-                       'you must specify a value for '
-                       '`validation_steps`.')
+    val_gen = (
+        hasattr(validation_data, 'next') or
+        hasattr(validation_data, '__next__') or
+        isinstance(validation_data, Sequence))
+    if (val_gen and not isinstance(validation_data, Sequence) and
+        not validation_steps):
+      raise ValueError('`validation_steps=None` is only valid for a'
+                       ' generator based on the `keras.utils.Sequence`'
+                       ' class. Please specify `validation_steps` or use'
+                       ' the `keras.utils.Sequence` class.')
 
     # Prepare display labels.
     out_labels = self._get_deduped_metrics_names()
@@ -2084,28 +2115,24 @@ class Model(Network):
         val_data += [0.]
       for cbk in callbacks:
         cbk.validation_data = val_data
-    is_sequence = isinstance(generator, Sequence)
-    if not is_sequence and use_multiprocessing and workers > 1:
-      logging.warning(
-          logging.warning('Using a generator with `use_multiprocessing=True`'
-                          ' and multiple workers may duplicate your data.'
-                          ' Please consider using the`keras.utils.Sequence'
-                          ' class.'))
-    if is_sequence:
-      steps_per_epoch = len(generator)
     enqueuer = None
 
     try:
-      if is_sequence:
-        enqueuer = OrderedEnqueuer(
-            generator, use_multiprocessing=use_multiprocessing, shuffle=shuffle)
+      if workers > 0:
+        if is_sequence:
+          enqueuer = OrderedEnqueuer(
+              generator,
+              use_multiprocessing=use_multiprocessing,
+              shuffle=shuffle)
+        else:
+          enqueuer = GeneratorEnqueuer(
+              generator,
+              use_multiprocessing=use_multiprocessing,
+              wait_time=wait_time)
+        enqueuer.start(workers=workers, max_queue_size=max_queue_size)
+        output_generator = enqueuer.get()
       else:
-        enqueuer = GeneratorEnqueuer(
-            generator,
-            use_multiprocessing=use_multiprocessing,
-            wait_time=wait_time)
-      enqueuer.start(workers=workers, max_queue_size=max_queue_size)
-      output_generator = enqueuer.get()
+        output_generator = generator
 
       callback_model.stop_training = False
       while epoch < epochs:
@@ -2119,6 +2146,7 @@ class Model(Network):
             raise ValueError('Output of generator should be '
                              'a tuple `(x, y, sample_weight)` '
                              'or `(x, y)`. Found: ' + str(generator_output))
+
           if len(generator_output) == 2:
             x, y = generator_output
             sample_weight = None
@@ -2196,7 +2224,7 @@ class Model(Network):
 
   def evaluate_generator(self,
                          generator,
-                         steps,
+                         steps=None,
                          max_queue_size=10,
                          workers=1,
                          use_multiprocessing=False,
@@ -2214,10 +2242,13 @@ class Model(Network):
             when using multiprocessing.
         steps: Total number of steps (batches of samples)
             to yield from `generator` before stopping.
-            Not used if using `Sequence`.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
         max_queue_size: maximum size for the generator queue
-        workers: maximum number of processes to spin up
-            when using process-based threading.
+        workers: Integer. Maximum number of processes to spin up
+            when using process based threading.
+            If unspecified, `workers` will default to 1. If 0, will
+            execute the generator on the main thread.
         use_multiprocessing: if True, use process based threading.
             Note that because
             this implementation relies on multiprocessing,
@@ -2258,26 +2289,34 @@ class Model(Network):
     batch_sizes = []
     is_sequence = isinstance(generator, Sequence)
     if not is_sequence and use_multiprocessing and workers > 1:
-      logging.warning(
-          logging.warning('Using a generator with `use_multiprocessing=True`'
-                          ' and multiple workers may duplicate your data.'
-                          ' Please consider using the`keras.utils.Sequence'
-                          ' class.'))
-    if is_sequence:
-      steps = len(generator)
+      logging.warning('Using a generator with `use_multiprocessing=True`'
+                      ' and multiple workers may duplicate your data.'
+                      ' Please consider using the`keras.utils.Sequence'
+                      ' class.')
+    if steps is None:
+      if is_sequence:
+        steps = len(generator)
+      else:
+        raise ValueError('`steps=None` is only valid for a generator'
+                         ' based on the `keras.utils.Sequence` class.'
+                         ' Please specify `steps` or use the'
+                         ' `keras.utils.Sequence` class.')
     enqueuer = None
 
     try:
-      if is_sequence:
-        enqueuer = OrderedEnqueuer(
-            generator, use_multiprocessing=use_multiprocessing)
+      if workers > 0:
+        if is_sequence:
+          enqueuer = OrderedEnqueuer(
+              generator, use_multiprocessing=use_multiprocessing)
+        else:
+          enqueuer = GeneratorEnqueuer(
+              generator,
+              use_multiprocessing=use_multiprocessing,
+              wait_time=wait_time)
+        enqueuer.start(workers=workers, max_queue_size=max_queue_size)
+        output_generator = enqueuer.get()
       else:
-        enqueuer = GeneratorEnqueuer(
-            generator,
-            use_multiprocessing=use_multiprocessing,
-            wait_time=wait_time)
-      enqueuer.start(workers=workers, max_queue_size=max_queue_size)
-      output_generator = enqueuer.get()
+        output_generator = generator
 
       while steps_done < steps:
         generator_output = next(output_generator)
@@ -2297,11 +2336,11 @@ class Model(Network):
         outs = self.test_on_batch(x, y, sample_weight=sample_weight)
 
         if isinstance(x, list):
-          batch_size = len(x[0])
+          batch_size = x[0].shape[0]
         elif isinstance(x, dict):
-          batch_size = len(list(x.values())[0])
+          batch_size = list(x.values())[0].shape[0]
         else:
-          batch_size = len(x)
+          batch_size = x.shape[0]
         if batch_size == 0:
           raise ValueError('Received an empty batch. '
                            'Batches should at least contain one item.')
@@ -2325,7 +2364,7 @@ class Model(Network):
 
   def predict_generator(self,
                         generator,
-                        steps,
+                        steps=None,
                         max_queue_size=10,
                         workers=1,
                         use_multiprocessing=False,
@@ -2343,10 +2382,13 @@ class Model(Network):
                 when using multiprocessing.
         steps: Total number of steps (batches of samples)
             to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
         max_queue_size: Maximum size for the generator queue.
-          Not used if using `Sequence`.
-        workers: Maximum number of processes to spin up
-            when using process-based threading.
+        workers: Integer. Maximum number of processes to spin up
+            when using process based threading.
+            If unspecified, `workers` will default to 1. If 0, will
+            execute the generator on the main thread.
         use_multiprocessing: If `True`, use process based threading.
             Note that because
             this implementation relies on multiprocessing,
@@ -2382,26 +2424,34 @@ class Model(Network):
     all_outs = []
     is_sequence = isinstance(generator, Sequence)
     if not is_sequence and use_multiprocessing and workers > 1:
-      logging.warning(
-          logging.warning('Using a generator with `use_multiprocessing=True`'
-                          ' and multiple workers may duplicate your data.'
-                          ' Please consider using the`keras.utils.Sequence'
-                          ' class.'))
-    if is_sequence:
-      steps = len(generator)
+      logging.warn('Using a generator with `use_multiprocessing=True`'
+                   ' and multiple workers may duplicate your data.'
+                   ' Please consider using the`keras.utils.Sequence'
+                   ' class.')
+    if steps is None:
+      if is_sequence:
+        steps = len(generator)
+      else:
+        raise ValueError('`steps=None` is only valid for a generator'
+                         ' based on the `keras.utils.Sequence` class.'
+                         ' Please specify `steps` or use the'
+                         ' `keras.utils.Sequence` class.')
     enqueuer = None
 
     try:
-      if is_sequence:
-        enqueuer = OrderedEnqueuer(
-            generator, use_multiprocessing=use_multiprocessing)
+      if workers > 0:
+        if is_sequence:
+          enqueuer = OrderedEnqueuer(
+              generator, use_multiprocessing=use_multiprocessing)
+        else:
+          enqueuer = GeneratorEnqueuer(
+              generator,
+              use_multiprocessing=use_multiprocessing,
+              wait_time=wait_time)
+        enqueuer.start(workers=workers, max_queue_size=max_queue_size)
+        output_generator = enqueuer.get()
       else:
-        enqueuer = GeneratorEnqueuer(
-            generator,
-            use_multiprocessing=use_multiprocessing,
-            wait_time=wait_time)
-      enqueuer.start(workers=workers, max_queue_size=max_queue_size)
-      output_generator = enqueuer.get()
+        output_generator = generator
 
       if verbose == 1:
         progbar = Progbar(target=steps)
diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py
index 17a26f978e..78224814d3 100644
--- a/tensorflow/python/keras/_impl/keras/engine/training_test.py
+++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py
@@ -836,6 +836,11 @@ class TestGeneratorMethods(test.TestCase):
                             use_multiprocessing=False,
                             validation_data=custom_generator(),
                             validation_steps=10)
+        model.fit_generator(custom_generator(),
+                            steps_per_epoch=5,
+                            validation_data=custom_generator(),
+                            validation_steps=1,
+                            workers=0)
         model.predict_generator(custom_generator(),
                                 steps=5,
                                 max_queue_size=10,
@@ -845,6 +850,10 @@ class TestGeneratorMethods(test.TestCase):
                                 steps=5,
                                 max_queue_size=10,
                                 use_multiprocessing=False)
+        model.predict_generator(custom_generator(),
+                                steps=5,
+                                max_queue_size=10,
+                                workers=0)
         model.evaluate_generator(custom_generator(),
                                  steps=5,
                                  max_queue_size=10,
@@ -854,6 +863,11 @@ class TestGeneratorMethods(test.TestCase):
                                  steps=5,
                                  max_queue_size=10,
                                  use_multiprocessing=False)
+        model.evaluate_generator(custom_generator(),
+                                 steps=5,
+                                 max_queue_size=10,
+                                 use_multiprocessing=False,
+                                 workers=0)
 
         # Test legacy API
         model.fit_generator(custom_generator(),
diff --git a/tensorflow/python/keras/_impl/keras/layers/core.py b/tensorflow/python/keras/_impl/keras/layers/core.py
index 517129fab0..712db33c69 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core.py
@@ -547,8 +547,19 @@ class Lambda(Layer):
   Arguments:
       function: The function to be evaluated.
           Takes input tensor as first argument.
+      output_shape: Expected output shape from function.
+            This argument can be inferred if not explicitly provided.
+            Can be a tuple or function.
+            If a tuple, it only specifies the first dimension onward;
+                 sample dimension is assumed either the same as the input:
+                 `output_shape = (input_shape[0], ) + output_shape`
+                 or, the input is `None` and
+                 the sample dimension is also `None`:
+                 `output_shape = (None, ) + output_shape`
+            If a function, it specifies the entire shape as a function of the
+            input shape: `output_shape = f(input_shape)`
       arguments: optional dictionary of keyword arguments to be passed
-          to the function.
+            to the function.
 
   Input shape:
       Arbitrary. Use the keyword argument input_shape
@@ -557,16 +568,52 @@ class Lambda(Layer):
 
   Output shape:
       Specified by `output_shape` argument
-      (or auto-inferred when using TensorFlow).
   """
 
-  def __init__(self, function, mask=None, arguments=None, **kwargs):
+  def __init__(self, function, output_shape=None, mask=None, arguments=None,
+               **kwargs):
     super(Lambda, self).__init__(**kwargs)
     self.function = function
     self.arguments = arguments if arguments else {}
     if mask is not None:
       self.supports_masking = True
     self.mask = mask
+    if output_shape is None:
+      self._output_shape = None
+    elif isinstance(output_shape, (tuple, list)):
+      self._output_shape = tuple(output_shape)
+    else:
+      if not callable(output_shape):
+        raise TypeError('In Lambda, `output_shape` '
+                        'must be a list, a tuple, or a function.')
+      self._output_shape = output_shape
+
+  def _compute_output_shape(self, input_shape):
+    input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list())
+
+    if self._output_shape is None:
+      x = K.placeholder(shape=input_shape)
+      x = self.call(x)
+      if isinstance(x, list):
+        return [tensor_shape.TensorShape(K.int_shape(x_elem)) for x_elem in x]
+      else:
+        return tensor_shape.TensorShape(K.int_shape(x))
+    elif isinstance(self._output_shape, (tuple, list)):
+      if isinstance(input_shape, list):
+        num_samples = input_shape[0][0]
+      else:
+        num_samples = input_shape[0] if input_shape else None
+      return tensor_shape.TensorShape((num_samples,) +
+                                      tuple(self._output_shape))
+    else:
+      shape = self._output_shape(input_shape)
+      if not isinstance(shape, (list, tuple)):
+        raise ValueError(
+            '`output_shape` function must return a tuple or a list of tuples.')
+      if isinstance(shape, list):
+        if isinstance(shape[0], int) or shape[0] is None:
+          shape = tuple(shape)
+      return tensor_shape.TensorShape(shape)
 
   def call(self, inputs, mask=None):
     arguments = self.arguments
@@ -587,9 +634,21 @@ class Lambda(Layer):
       function = self.function.__name__
       function_type = 'function'
 
+    if isinstance(self._output_shape, python_types.LambdaType):
+      output_shape = func_dump(self._output_shape)
+      output_shape_type = 'lambda'
+    elif callable(self._output_shape):
+      output_shape = self._output_shape.__name__
+      output_shape_type = 'function'
+    else:
+      output_shape = self._output_shape
+      output_shape_type = 'raw'
+
     config = {
         'function': function,
         'function_type': function_type,
+        'output_shape': output_shape,
+        'output_shape_type': output_shape_type,
         'arguments': self.arguments
     }
     base_config = super(Lambda, self).get_config()
@@ -614,6 +673,19 @@ class Lambda(Layer):
     else:
       raise TypeError('Unknown function type:', function_type)
 
+    output_shape_type = config.pop('output_shape_type')
+    if output_shape_type == 'function':
+      # Simple lookup in custom objects
+      output_shape = deserialize_keras_object(
+          config['output_shape'],
+          custom_objects=custom_objects,
+          printable_module_name='output_shape function in Lambda layer')
+    elif output_shape_type == 'lambda':
+      # Unsafe deserialization from bytecode
+      output_shape = func_load(config['output_shape'], globs=globs)
+    else:
+      output_shape = config['output_shape']
+
     # If arguments were numpy array, they have been saved as
     # list. We need to recover the ndarray
     if 'arguments' in config:
@@ -625,6 +697,7 @@ class Lambda(Layer):
             config['arguments'][key] = np.array(arg_dict['value'])
 
     config['function'] = function
+    config['output_shape'] = output_shape
     return cls(**config)
 
 
diff --git a/tensorflow/python/keras/_impl/keras/layers/core_test.py b/tensorflow/python/keras/_impl/keras/layers/core_test.py
index dd768dc268..1fe043561d 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core_test.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core_test.py
@@ -220,6 +220,34 @@ class CoreLayersTest(test.TestCase):
       self.assertEqual(1, len(layer.losses))
       _ = layer.get_config()
 
+  def test_lambda_output_shape(self):
+    with self.test_session():
+      l = keras.layers.Lambda(lambda x: x + 1, output_shape=(1, 1))
+      l(keras.backend.variable(np.ones((1, 1))))
+      self.assertEqual((1, 1), l.get_config()['output_shape'])
+
+  def test_lambda_output_shape_function(self):
+    def get_output_shape(input_shape):
+      return 1 * input_shape
+
+    with self.test_session():
+      l = keras.layers.Lambda(lambda x: x + 1, output_shape=get_output_shape)
+      l(keras.backend.variable(np.ones((1, 1))))
+      self.assertEqual('lambda', l.get_config()['output_shape_type'])
+
+  def test_lambda_config_serialization(self):
+    with self.test_session():
+      # test serialization with output_shape and output_shape_type
+      layer = keras.layers.Lambda(lambda x: x + 1, output_shape=(1, 1))
+      layer(keras.backend.variable(np.ones((1, 1))))
+      config = layer.get_config()
+      layer = keras.layers.deserialize({
+          'class_name': 'Lambda',
+          'config': config
+      })
+
+      layer = keras.layers.Lambda.from_config(config)
 
 if __name__ == '__main__':
   test.main()
+
diff --git a/tensorflow/python/keras/_impl/keras/losses.py b/tensorflow/python/keras/_impl/keras/losses.py
index 19212aeee8..1d6319abb1 100644
--- a/tensorflow/python/keras/_impl/keras/losses.py
+++ b/tensorflow/python/keras/_impl/keras/losses.py
@@ -61,10 +61,10 @@ def categorical_hinge(y_true, y_pred):
 
 def logcosh(y_true, y_pred):
 
-  def cosh(x):
-    return (K.exp(x) + K.exp(-x)) / 2
+  def _logcosh(x):
+    return x + K.softplus(-2. * x) - K.log(2.)
 
-  return K.mean(K.log(cosh(y_pred - y_true)), axis=-1)
+  return K.mean(_logcosh(y_pred - y_true), axis=-1)
 
 
 def categorical_crossentropy(y_true, y_pred):
diff --git a/tensorflow/python/keras/_impl/keras/models.py b/tensorflow/python/keras/_impl/keras/models.py
index ba202827ce..e262cc8c8e 100644
--- a/tensorflow/python/keras/_impl/keras/models.py
+++ b/tensorflow/python/keras/_impl/keras/models.py
@@ -1070,7 +1070,7 @@ class Sequential(Model):
 
   def fit_generator(self,
                     generator,
-                    steps_per_epoch,
+                    steps_per_epoch=None,
                     epochs=1,
                     verbose=1,
                     callbacks=None,
@@ -1101,8 +1101,10 @@ class Sequential(Model):
         steps_per_epoch: Total number of steps (batches of samples)
             to yield from `generator` before declaring one epoch
             finished and starting the next epoch. It should typically
-            be equal to the number of unique samples of your dataset
+            be equal to the number of samples of your dataset
             divided by the batch size.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
         epochs: Integer, total number of iterations on the data.
             Note that in conjunction with initial_epoch, the parameter
             epochs is to be understood as "final epoch". The model is
@@ -1118,8 +1120,10 @@ class Sequential(Model):
             is a generator.
             Number of steps to yield from validation generator
             at the end of every epoch. It should typically
-            be equal to the number of unique samples of your
+            be equal to the number of samples of your
             validation dataset divided by the batch size.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(validation_data)` as a number of steps.
         class_weight: Dictionary mapping class indices to a weight
             for the class.
         max_queue_size: Maximum size for the generator queue
@@ -1195,7 +1199,7 @@ class Sequential(Model):
 
   def evaluate_generator(self,
                          generator,
-                         steps,
+                         steps=None,
                          max_queue_size=10,
                          workers=1,
                          use_multiprocessing=False,
@@ -1210,6 +1214,8 @@ class Sequential(Model):
             or (inputs, targets, sample_weights)
         steps: Total number of steps (batches of samples)
             to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
         max_queue_size: maximum size for the generator queue
         workers: maximum number of processes to spin up
         use_multiprocessing: if True, use process based threading.
@@ -1254,7 +1260,7 @@ class Sequential(Model):
 
   def predict_generator(self,
                         generator,
-                        steps,
+                        steps=None,
                         max_queue_size=10,
                         workers=1,
                         use_multiprocessing=False,
@@ -1269,6 +1275,8 @@ class Sequential(Model):
         generator: generator yielding batches of input samples.
         steps: Total number of steps (batches of samples)
             to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
         max_queue_size: maximum size for the generator queue
         workers: maximum number of processes to spin up
         use_multiprocessing: if True, use process based threading.
diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/image.py b/tensorflow/python/keras/_impl/keras/preprocessing/image.py
index 12dc718cd7..82441de592 100644
--- a/tensorflow/python/keras/_impl/keras/preprocessing/image.py
+++ b/tensorflow/python/keras/_impl/keras/preprocessing/image.py
@@ -556,7 +556,8 @@ class ImageDataGenerator(object):
                           save_to_dir=None,
                           save_prefix='',
                           save_format='png',
-                          follow_links=False):
+                          follow_links=False,
+                          interpolation='nearest'):
     return DirectoryIterator(
         directory,
         self,
@@ -571,7 +572,8 @@ class ImageDataGenerator(object):
         save_to_dir=save_to_dir,
         save_prefix=save_prefix,
         save_format=save_format,
-        follow_links=follow_links)
+        follow_links=follow_links,
+        interpolation=interpolation)
 
   def standardize(self, x):
     """Apply the normalization configuration to a batch of inputs.
@@ -596,7 +598,7 @@ class ImageDataGenerator(object):
         x -= self.mean
       else:
         logging.warning('This ImageDataGenerator specifies '
-                        '`featurewise_center`, but it hasn\'t'
+                        '`featurewise_center`, but it hasn\'t '
                         'been fit on any training data. Fit it '
                         'first by calling `.fit(numpy_data)`.')
     if self.featurewise_std_normalization:
@@ -604,7 +606,7 @@ class ImageDataGenerator(object):
         x /= (self.std + 1e-7)
       else:
         logging.warning('This ImageDataGenerator specifies '
-                        '`featurewise_std_normalization`, but it hasn\'t'
+                        '`featurewise_std_normalization`, but it hasn\'t '
                         'been fit on any training data. Fit it '
                         'first by calling `.fit(numpy_data)`.')
     if self.zca_whitening:
@@ -614,7 +616,7 @@ class ImageDataGenerator(object):
         x = np.reshape(whitex, x.shape)
       else:
         logging.warning('This ImageDataGenerator specifies '
-                        '`zca_whitening`, but it hasn\'t'
+                        '`zca_whitening`, but it hasn\'t '
                         'been fit on any training data. Fit it '
                         'first by calling `.fit(numpy_data)`.')
     return x
@@ -833,8 +835,7 @@ class Iterator(Sequence):
     return self._get_batches_of_transformed_samples(index_array)
 
   def __len__(self):
-    length = int(np.ceil(self.n / float(self.batch_size)))
-    return np.maximum(length, 0)
+    return (self.n + self.batch_size - 1) // self.batch_size  # round up
 
   def on_epoch_end(self):
     self._set_index_array()
@@ -1091,6 +1092,12 @@ class DirectoryIterator(Iterator):
           images (if `save_to_dir` is set).
       save_format: Format to use for saving sample images
           (if `save_to_dir` is set).
+      interpolation: Interpolation method used to resample the image if the
+          target size is different from that of the loaded image.
+          Supported methods are "nearest", "bilinear", and "bicubic".
+          If PIL version 1.1.3 or newer is installed, "lanczos" is also
+          supported. If PIL version 3.4.0 or newer is installed, "box" and
+          "hamming" are also supported. By default, "nearest" is used.
   """
 
   def __init__(self,
@@ -1107,7 +1114,8 @@ class DirectoryIterator(Iterator):
                save_to_dir=None,
                save_prefix='',
                save_format='png',
-               follow_links=False):
+               follow_links=False,
+               interpolation='nearest'):
     if data_format is None:
       data_format = K.image_data_format()
     self.directory = directory
@@ -1138,6 +1146,7 @@ class DirectoryIterator(Iterator):
     self.save_to_dir = save_to_dir
     self.save_prefix = save_prefix
     self.save_format = save_format
+    self.interpolation = interpolation
 
     white_list_formats = {'png', 'jpg', 'jpeg', 'bmp', 'ppm'}
 
@@ -1192,7 +1201,8 @@ class DirectoryIterator(Iterator):
       fname = self.filenames[j]
       img = load_img(os.path.join(self.directory, fname),
                      grayscale=grayscale,
-                     target_size=self.target_size)
+                     target_size=self.target_size,
+                     interpolation=self.interpolation)
       x = img_to_array(img, data_format=self.data_format)
       x = self.image_data_generator.random_transform(x)
       x = self.image_data_generator.standardize(x)
diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils.py b/tensorflow/python/keras/_impl/keras/utils/data_utils.py
index 1f2e9ac440..df76e6712a 100644
--- a/tensorflow/python/keras/_impl/keras/utils/data_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/data_utils.py
@@ -28,6 +28,7 @@ import sys
 import tarfile
 import threading
 import time
+import traceback
 import zipfile
 
 import numpy as np
@@ -560,9 +561,9 @@ class OrderedEnqueuer(SequenceEnqueuer):
         self.queue.task_done()
         if inputs is not None:
           yield inputs
-    except Exception as e:
+    except Exception as e:  # pylint: disable=broad-except
       self.stop()
-      raise StopIteration(e)
+      six.raise_from(StopIteration(e), e)
 
   def _send_sequence(self):
     """Send current Sequence to all workers."""
@@ -623,6 +624,7 @@ class GeneratorEnqueuer(SequenceEnqueuer):
     self._use_multiprocessing = use_multiprocessing
     self._threads = []
     self._stop_event = None
+    self._manager = None
     self.queue = None
     self.seed = seed
 
@@ -640,18 +642,27 @@ class GeneratorEnqueuer(SequenceEnqueuer):
         try:
           if self._use_multiprocessing or self.queue.qsize() < max_queue_size:
             generator_output = next(self._generator)
-            self.queue.put(generator_output)
+            self.queue.put((True, generator_output))
           else:
             time.sleep(self.wait_time)
         except StopIteration:
           break
-        except Exception:
+        except Exception as e:  # pylint: disable=broad-except
+          # Can't pick tracebacks.
+          # As a compromise, print the traceback and pickle None instead.
+          if self._use_multiprocessing:
+            traceback.print_exc()
+            setattr(e, '__traceback__', None)
+          elif not hasattr(e, '__traceback__'):
+            setattr(e, '__traceback__', sys.exc_info()[2])
+          self.queue.put((False, e))
           self._stop_event.set()
-          raise
+          break
 
     try:
       if self._use_multiprocessing:
-        self.queue = multiprocessing.Queue(maxsize=max_queue_size)
+        self._manager = multiprocessing.Manager()
+        self.queue = self._manager.Queue(maxsize=max_queue_size)
         self._stop_event = multiprocessing.Event()
       else:
         self.queue = queue.Queue()
@@ -695,9 +706,8 @@ class GeneratorEnqueuer(SequenceEnqueuer):
         else:
           thread.join(timeout)
 
-    if self._use_multiprocessing:
-      if self.queue is not None:
-        self.queue.close()
+    if self._manager:
+      self._manager.shutdown()
 
     self._threads = []
     self._stop_event = None
@@ -713,12 +723,23 @@ class GeneratorEnqueuer(SequenceEnqueuer):
     """
     while self.is_running():
       if not self.queue.empty():
-        inputs = self.queue.get()
-        if inputs is not None:
-          yield inputs
+        success, value = self.queue.get()
+        # Rethrow any exceptions found in the queue
+        if not success:
+          six.reraise(value.__class__, value, value.__traceback__)
+        # Yield regular values
+        if value is not None:
+          yield value
       else:
         all_finished = all([not thread.is_alive() for thread in self._threads])
         if all_finished and self.queue.empty():
           raise StopIteration()
         else:
           time.sleep(self.wait_time)
+
+      # Make sure to rethrow the first exception in the queue, if any
+    while not self.queue.empty():
+      success, value = self.queue.get()
+      if not success:
+        six.reraise(value.__class__, value, value.__traceback__)
+
diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
index 47c5b4cff0..d541cccbe5 100644
--- a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
+++ b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
@@ -186,7 +186,7 @@ class TestEnqueuers(test.TestCase):
         use_multiprocessing=False)
     enqueuer.start(3, 10)
     gen_output = enqueuer.get()
-    with self.assertRaises(StopIteration):
+    with self.assertRaises(IndexError):
       next(gen_output)
 
   @unittest.skipIf(
@@ -198,7 +198,7 @@ class TestEnqueuers(test.TestCase):
         use_multiprocessing=True)
     enqueuer.start(3, 10)
     gen_output = enqueuer.get()
-    with self.assertRaises(StopIteration):
+    with self.assertRaises(IndexError):
       next(gen_output)
 
   def test_ordered_enqueuer_threads(self):
diff --git a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py
index 025e5d30a5..e9e54c2a2a 100644
--- a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import codecs
 import marshal
 import os
 import sys
@@ -197,10 +198,11 @@ def func_dump(func):
       A tuple `(code, defaults, closure)`.
   """
   if os.name == 'nt':
-    code = marshal.dumps(
-        func.__code__).replace(b'\\', b'/').decode('raw_unicode_escape')
+    raw_code = marshal.dumps(func.__code__).replace(b'\\', b'/')
+    code = codecs.encode(raw_code, 'base64').decode('ascii')
   else:
-    code = marshal.dumps(func.__code__).decode('raw_unicode_escape')
+    raw_code = marshal.dumps(func.__code__)
+    code = codecs.encode(raw_code, 'base64').decode('ascii')
   defaults = func.__defaults__
   if func.__closure__:
     closure = tuple(c.cell_contents for c in func.__closure__)
@@ -225,7 +227,30 @@ def func_load(code, defaults=None, closure=None, globs=None):
     code, defaults, closure = code
     if isinstance(defaults, list):
       defaults = tuple(defaults)
-  code = marshal.loads(code.encode('raw_unicode_escape'))
+
+  def ensure_value_to_cell(value):
+    """Ensures that a value is converted to a python cell object.
+
+    Arguments:
+        value: Any value that needs to be casted to the cell type
+
+    Returns:
+        A value wrapped as a cell object (see function "func_load")
+    """
+    def dummy_fn():
+      # pylint: disable=pointless-statement
+      value  # just access it so it gets captured in .__closure__
+
+    cell_value = dummy_fn.__closure__[0]
+    if not isinstance(value, type(cell_value)):
+      return cell_value
+    else:
+      return value
+
+  if closure is not None:
+    closure = tuple(ensure_value_to_cell(_) for _ in closure)
+  raw_code = codecs.decode(code.encode('ascii'), 'base64')
+  code = marshal.loads(raw_code)
   if globs is None:
     globs = globals()
   return python_types.FunctionType(
diff --git a/tensorflow/python/keras/_impl/keras/utils/np_utils.py b/tensorflow/python/keras/_impl/keras/utils/np_utils.py
index 896016d4d8..67d83bf42c 100644
--- a/tensorflow/python/keras/_impl/keras/utils/np_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/np_utils.py
@@ -35,7 +35,7 @@ def to_categorical(y, num_classes=None):
   """
   y = np.array(y, dtype='int')
   input_shape = y.shape
-  if input_shape and input_shape[-1] == 1:
+  if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
     input_shape = tuple(input_shape[:-1])
   y = y.ravel()
   if not num_classes:
diff --git a/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py
index 9680c295cd..1e974c2ef2 100644
--- a/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py
+++ b/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py
@@ -28,8 +28,9 @@ class TestNPUtils(test.TestCase):
 
   def test_to_categorical(self):
     num_classes = 5
-    shapes = [(3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
-    expected_shapes = [(3, num_classes),
+    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
+    expected_shapes = [(1, num_classes),
+                       (3, num_classes),
                        (4, 3, num_classes),
                        (5, 4, 3, num_classes),
                        (3, num_classes)]
diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils.py b/tensorflow/python/keras/_impl/keras/utils/training_utils.py
index 8939c814cf..0bf4ac8a24 100644
--- a/tensorflow/python/keras/_impl/keras/utils/training_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/training_utils.py
@@ -112,12 +112,22 @@ def multi_gpu_model(model, gpus):
   from tensorflow.python.keras._impl.keras.layers.core import Lambda
   from tensorflow.python.keras._impl.keras.layers.merge import concatenate
 
-  if gpus <= 1:
-    raise ValueError('For multi-gpu usage to be effective, '
-                     'call `multi_gpu_model` with `gpus >= 2`. '
-                     'Received: `gpus=%d`' % gpus)
-
-  target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in range(gpus)]
+  if isinstance(gpus, (list, tuple)):
+    if len(gpus) <= 1:
+      raise ValueError('For multi-gpu usage to be effective, '
+                       'call `multi_gpu_model` with `len(gpus) >= 2`. '
+                       'Received: `gpus=%s`' % gpus)
+    num_gpus = len(gpus)
+    target_gpu_ids = gpus
+  else:
+    if gpus <= 1:
+      raise ValueError('For multi-gpu usage to be effective, '
+                       'call `multi_gpu_model` with `gpus >= 2`. '
+                       'Received: `gpus=%d`' % gpus)
+    num_gpus = gpus
+    target_gpu_ids = range(num_gpus)
+
+  target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids]
   available_devices = _get_available_devices()
   available_devices = [
       _normalize_device_name(name) for name in available_devices
@@ -145,7 +155,7 @@ def multi_gpu_model(model, gpus):
     batch_size = shape[:1]
     input_shape = shape[1:]
     step = batch_size // parts
-    if i == gpus - 1:
+    if i == num_gpus - 1:
       size = batch_size - step * i
     else:
       size = step
@@ -160,9 +170,9 @@ def multi_gpu_model(model, gpus):
 
   # Place a copy of the model on each GPU,
   # each getting a slice of the inputs.
-  for i in range(gpus):
-    with ops.device('/gpu:%d' % i):
-      with ops.name_scope('replica_%d' % i):
+  for i, gpu_id in enumerate(target_gpu_ids):
+    with ops.device('/gpu:%d' % gpu_id):
+      with ops.name_scope('replica_%d' % gpu_id):
         inputs = []
         # Retrieve a slice of the input.
         for x in model.inputs:
@@ -172,8 +182,9 @@ def multi_gpu_model(model, gpus):
               output_shape=input_shape,
               arguments={
                   'i': i,
-                  'parts': gpus
-              })(x)
+                  'parts': num_gpus
+              })(
+                  x)
           inputs.append(slice_i)
 
         # Apply model on slice
@@ -189,6 +200,7 @@ def multi_gpu_model(model, gpus):
   # Merge outputs on CPU.
   with ops.device('/cpu:0'):
     merged = []
-    for outputs in all_outputs:
-      merged.append(concatenate(outputs, axis=0))
+    for name, outputs in zip(model.output_names, all_outputs):
+      merged.append(concatenate(outputs, axis=0, name=name))
     return Model(model.inputs, merged)
+
diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py
index 51fbd041a4..12354c49ca 100644
--- a/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py
+++ b/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py
@@ -33,6 +33,7 @@ class TestMultiGPUModel(test.TestCase):
     output_dim = 1
     hidden_dim = 10
     epochs = 2
+    target_gpu_id = [0, 2, 4]
 
     with self.test_session():
       model = keras.models.Sequential()
@@ -42,8 +43,12 @@ class TestMultiGPUModel(test.TestCase):
 
       x = np.random.random((num_samples, input_dim))
       y = np.random.random((num_samples, output_dim))
+
       parallel_model = keras.utils.multi_gpu_model(model, gpus=gpus)
+      parallel_model.compile(loss='mse', optimizer='rmsprop')
+      parallel_model.fit(x, y, epochs=epochs)
 
+      parallel_model = keras.utils.multi_gpu_model(model, gpus=target_gpu_id)
       parallel_model.compile(loss='mse', optimizer='rmsprop')
       parallel_model.fit(x, y, epochs=epochs)
 
@@ -56,6 +61,7 @@ class TestMultiGPUModel(test.TestCase):
     output_dim_b = 2
     hidden_dim = 10
     epochs = 2
+    target_gpu_id = [0, 2, 4]
 
     with self.test_session():
       input_a = keras.Input((input_dim_a,))
@@ -76,6 +82,10 @@ class TestMultiGPUModel(test.TestCase):
       parallel_model.compile(loss='mse', optimizer='rmsprop')
       parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs)
 
+      parallel_model = keras.utils.multi_gpu_model(model, gpus=target_gpu_id)
+      parallel_model.compile(loss='mse', optimizer='rmsprop')
+      parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs)
+
   def multi_gpu_test_invalid_devices(self):
     with self.test_session():
       input_shape = (1000, 10)
@@ -92,3 +102,16 @@ class TestMultiGPUModel(test.TestCase):
         parallel_model = keras.utils.multi_gpu_model(
             model, gpus=len(keras.backend._get_available_gpus()) + 1)
         parallel_model.fit(x, y, epochs=2)
+
+      with self.assertRaises(ValueError):
+        parallel_model = keras.utils.multi_gpu_model(
+            model, gpus=[0, 2, 4, 6, 8])
+        parallel_model.fit(x, y, epochs=2)
+
+      with self.assertRaises(ValueError):
+        parallel_model = keras.utils.multi_gpu_model(model, gpus=1)
+        parallel_model.fit(x, y, epochs=2)
+
+      with self.assertRaises(ValueError):
+        parallel_model = keras.utils.multi_gpu_model(model, gpus=[0])
+        parallel_model.fit(x, y, epochs=2)
diff --git a/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py b/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py
index 31ef4773ad..bc788d874f 100644
--- a/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py
+++ b/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py
@@ -38,18 +38,18 @@ class BaseWrapper(object):
       build_fn: callable function or class instance
       **sk_params: model parameters & fitting parameters
 
-  The build_fn should construct, compile and return a Keras model, which
+  The `build_fn` should construct, compile and return a Keras model, which
   will then be used to fit/predict. One of the following
-  three values could be passed to build_fn:
+  three values could be passed to `build_fn`:
   1. A function
-  2. An instance of a class that implements the __call__ method
+  2. An instance of a class that implements the `__call__` method
   3. None. This means you implement a class that inherits from either
-  `KerasClassifier` or `KerasRegressor`. The __call__ method of the
-  present class will then be treated as the default build_fn.
+  `KerasClassifier` or `KerasRegressor`. The `__call__` method of the
+  present class will then be treated as the default `build_fn`.
 
   `sk_params` takes both model parameters and fitting parameters. Legal model
   parameters are the arguments of `build_fn`. Note that like all other
-  estimators in scikit-learn, 'build_fn' should provide default values for
+  estimators in scikit-learn, `build_fn` should provide default values for
   its arguments, so that you could create the estimator without passing any
   values to `sk_params`.
 
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt
index 07b8d900da..af8278be93 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt
@@ -156,7 +156,7 @@ tf_class {
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "fit"
@@ -164,7 +164,7 @@ tf_class {
   }
   member_method {
     name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
   }
   member_method {
     name: "from_config"
@@ -224,7 +224,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt
index 546bac44e4..c17fbc45bd 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt
@@ -169,7 +169,7 @@ tf_class {
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "fit"
@@ -177,7 +177,7 @@ tf_class {
   }
   member_method {
     name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
   }
   member_method {
     name: "from_config"
@@ -245,7 +245,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt
index 59508c2f11..508cea005a 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt
@@ -90,7 +90,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'function\', \'mask\', \'arguments\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], "
+    argspec: "args=[\'self\', \'function\', \'output_shape\', \'mask\', \'arguments\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt
index 4e522813a5..af287497dd 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt
@@ -156,7 +156,7 @@ tf_class {
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "fit"
@@ -164,7 +164,7 @@ tf_class {
   }
   member_method {
     name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
   }
   member_method {
     name: "from_config"
@@ -224,7 +224,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt
index ddbb358c84..0fd7dd9e29 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt
@@ -169,7 +169,7 @@ tf_class {
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "fit"
@@ -177,7 +177,7 @@ tf_class {
   }
   member_method {
     name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
   }
   member_method {
     name: "from_config"
@@ -245,7 +245,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt
index 66cd37bb3a..04174bff5f 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt
@@ -6,7 +6,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'directory\', \'image_data_generator\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'data_format\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'None\', \'\', \'png\', \'False\'], "
+    argspec: "args=[\'self\', \'directory\', \'image_data_generator\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'data_format\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'None\', \'\', \'png\', \'False\', \'nearest\'], "
   }
   member_method {
     name: "next"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt
index 7e33285e7a..41f27d1f74 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt
@@ -16,7 +16,7 @@ tf_class {
   }
   member_method {
     name: "flow_from_directory"
-    argspec: "args=[\'self\', \'directory\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'\', \'png\', \'False\'], "
+    argspec: "args=[\'self\', \'directory\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'\', \'png\', \'False\', \'nearest\'], "
   }
   member_method {
     name: "random_transform"
-- 
GitLab


From f22809c16ef54be6c515c9889c88ade746146e2e Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 13 Dec 2017 17:09:12 -0800
Subject: [PATCH 0987/1225] Enable TF_GPU_THREAD_MODE and TF_GPU_THREAD_COUNT
 support in distributed TensorFlow.

PiperOrigin-RevId: 178980799
---
 tensorflow/core/common_runtime/direct_session.cc |  1 -
 tensorflow/core/distributed_runtime/graph_mgr.cc | 12 +++++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 875a3ac841..6e243c4b7c 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -588,7 +588,6 @@ Status DirectSession::Run(const RunOptions& run_options,
   };
   for (const auto& item : executors_and_keys->items) {
     // TODO(zhengxq): support partial run.
-    // TODO(zhengxq): support other session types.
     // TODO(zhengxq): if the device picks its own threadpool, we need to assign
     //     less threads to the main compute pool by default.
     thread::ThreadPool* device_thread_pool =
diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index 45dfb7b2eb..0120f612ac 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc
@@ -481,8 +481,18 @@ void GraphMgr::StartParallelExecutors(const string& handle, int64 step_id,
   using std::placeholders::_1;
   // Line below is equivalent to this code, but does one less indirect call:
   //  args.runner = [pool](std::function<void()> fn) { pool->Schedule(fn); };
-  args.runner = std::bind(&thread::ThreadPool::Schedule, pool, _1);
+  auto default_runner = std::bind(&thread::ThreadPool::Schedule, pool, _1);
   for (const auto& unit : item->units) {
+    // TODO(zhengxq): if the device picks its own threadpool, we need to assign
+    //     less threads to the main compute pool by default.
+    thread::ThreadPool* device_thread_pool =
+        unit.device->tensorflow_device_thread_pool();
+    if (!device_thread_pool) {
+      args.runner = default_runner;
+    } else {
+      args.runner =
+          std::bind(&thread::ThreadPool::Schedule, device_thread_pool, _1);
+    }
     unit.root->RunAsync(args, barrier->Get());
   }
 }
-- 
GitLab


From 69e5969d159fa8560eb61d82ec55b04d19bb0560 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Wed, 13 Dec 2017 17:09:29 -0800
Subject: [PATCH 0988/1225] [TFXLA] Simplify identification of cond branches.

* Remove the clustered graph part as it was difficult to keep it updated with the rest of the graph and instead operate on the graph directly;

PiperOrigin-RevId: 178980836
---
 .../tf2xla/functionalize_control_flow.cc      | 1006 +++++------------
 .../tf2xla/functionalize_control_flow_test.cc |    2 +-
 2 files changed, 262 insertions(+), 746 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 267268298c..dd67a1dea9 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/control_flow.h"
 #include "tensorflow/core/lib/gtl/optional.h"
 
@@ -528,259 +529,101 @@ Status FunctionalizeLoop(Graph* graph, Frame* frame,
 
 class FunctionalizeCond {
  public:
-  // Identifies the connected parts of the tf.Cond.
-  struct ClusterHandle {
-    explicit ClusterHandle(int representative = -1)
-        : representative(representative) {}
-
-    bool operator==(const ClusterHandle& other) const {
-      return representative == other.representative;
-    }
-
-    bool operator!=(const ClusterHandle& other) const {
-      return !(*this == other);
-    }
+  // All nodes are assumed to be either in no branch, then branch, else branch,
+  // or both branches (such as merge nodes).
+  enum Branch {
+    kElseBranch = 0,
+    kThenBranch = 1,
+    kBoth = 2,
+    kNeither = 3,
+    kNumBranchTypes = 4
+  };
 
-    bool operator<(const ClusterHandle& other) const {
-      return representative < other.representative;
+  // Returns a textual representation of the Branch b.
+  static string Branch_Name(FunctionalizeCond::Branch b);
+
+  // Comparison function used for sorting nodes consistently.
+  struct CondCmp {
+    bool operator()(const Node* lhs, const Node* rhs) const {
+      bool lhs_is_resource =
+          lhs->num_inputs() > 0 ? (lhs->input_type(0) == DT_RESOURCE) : false;
+      bool rhs_is_resource =
+          rhs->num_inputs() > 0 ? (rhs->input_type(0) == DT_RESOURCE) : false;
+      return std::tie(lhs_is_resource, lhs->name()) <
+             std::tie(rhs_is_resource, rhs->name());
     }
+  };
 
-    bool operator>(const ClusterHandle& other) const {
-      return representative > other.representative;
-    }
+  // Functionalize all the switch-merge nodes of a loop-free graph into XlaIf
+  // nodes. That is, attempt to transform every remaining switch and merge nodes
+  // in the graph into XlaIf nodes.
+  // Precondition: All while loops have been removed from graph.
+  static Status Functionalize(Graph* graph, FunctionLibraryDefinition* library);
 
+ private:
+  struct ForwardFlowNode {
+    explicit ForwardFlowNode(Branch branch = Branch::kNeither)
+        : branch(branch), count(0) {}
     string ToString() const {
-      return strings::StrCat("Cluster_", representative);
+      return strings::StrCat("branch=", Branch_Name(branch), " count=", count);
     }
-
-    // Vector of UnionFind<ClusterHandle> indexable by ClusterHandle and Node*.
-    struct Vector {
-      explicit Vector(size_t size) : clusters(size) {}
-
-      UnionFind<ClusterHandle>& at(const ClusterHandle& cluster) {
-        return clusters.at(cluster.representative);
-      }
-
-      UnionFind<ClusterHandle>& at(const Node* node) {
-        return clusters.at(node->id());
-      }
-
-      UnionFind<ClusterHandle>& operator[](const Node* node) {
-        return clusters.at(node->id());
-      }
-
-      size_t size() const { return clusters.size(); }
-
-      void resize(size_t count) { return clusters.resize(count); }
-
-     private:
-      std::vector<UnionFind<ClusterHandle>> clusters;
-    };
-
-   private:
-    int representative;
-  };
-
-  // Represents a node in the clustered graph consisting of switch_nodes,
-  // merge_nodes as well as the edges into and out of this node to other
-  // Clusters. Each Cluster corresponds to a ClusterHandle and has a
-  // corresponding representative.
-  struct Cluster {
-    std::unordered_set<Node*> switch_nodes;
-    std::unordered_set<Node*> merge_nodes;
-    std::unordered_set<Cluster*> in_nodes;
-    std::unordered_set<Cluster*> out_nodes;
-
-    // A member of the ClusterHandle corresponding to this Cluster.
-    ClusterHandle representative;
-    bool visited = false;
+    Branch branch;
+    int count;
   };
 
-  // Represent the clustered graph as map from cluster representative to
-  // Cluster.
-  using ClusteredGraph = std::map<ClusterHandle, Cluster>;
-
-  // The arguments and condition of a XlaIf. The arguments are ordered by node
-  // id in the original graph.
-  struct CondArgs {
-    struct CondCmp {
-      bool operator()(const Node* lhs, const Node* rhs) const {
-        bool lhs_is_resource =
-            lhs->num_inputs() > 0 ? (lhs->input_type(0) == DT_RESOURCE) : false;
-        bool rhs_is_resource =
-            rhs->num_inputs() > 0 ? (rhs->input_type(0) == DT_RESOURCE) : false;
-        return std::tie(lhs_is_resource, lhs->name()) <
-               std::tie(rhs_is_resource, rhs->name());
-      }
-    };
-    Node* conditional = nullptr;
-    std::set<Node*, CondCmp> args;
-  };
-
-  static Status Functionalize(Graph* graph, FunctionLibraryDefinition* library);
-
- private:
   FunctionalizeCond(Graph* graph, FunctionLibraryDefinition* library)
-      : clusters_(graph->num_node_ids()), library_(library), graph_(graph) {}
-
-  // Returns a vector of Switch nodes from the clustered graph where the nodes
-  // are sorted by the number of switch nodes minus number of merge nodes
-  // from a root of the clustered graph to the given Merge node, with ties
-  // broken by the representative of the Cluster. This corresponds to sorting by
-  // nesting depth, from deepest nested to outermost.
-  std::vector<std::pair<int, Cluster*>> SortedSwitchNodes();
-
-  // Returns whether the graph has no conditionals.
-  bool NoConditionals() const { return merge_nodes_.empty(); }
-
-  // Construct the clustered graph by creating nodes for each cluster and the
-  // connections between the clusters. Switch and Merge nodes partition
-  // clusters, so iterate over those. Note: a Cluster may have neither a
-  // Merge or Switch but will have an in/out edge from a Cluster that has.
-  void CreateClusters();
-
-  // Creates the clustered graph by identifying all the edges between different
-  // clusters and collecting all switch and merge nodes that correspond to a
-  // cluster.
-  void CreateClusteredGraph();
-
-  // If `from` and `to` correspond to different clusters, then merge the nodes
-  // in the clustered graph corresponding to `from` and `to`.
-  //
-  // If `remove_from_graph` is specified then the `from` node is also removed
-  // from the clustered graph post contracting the edge.
-  void ContractEdge(Cluster* from, Cluster* to, bool remove_from_graph = false);
+      : library_(library), graph_(graph) {}
+
+  // Perform the actual cond functionalization. Iterate over groups of switch
+  // nodes (linked by common predicate), from innermost to outermost, and
+  // extract into XlaIf nodes.
+  Status FunctionalizeInternal();
 
   // Converts a Merge node to a XlaIf. This encapsulates the process of
   // extracting the bodies needed for the then and else branch, creates a XlaIf
   // node, removing the nodes of the branches from the graph and replacing the
   // merge node with a XlaIf.
-  Status ConvertCorrespondingMergeToXlaIf(Cluster* switch_cluster);
-
-  // Removes a Switch cluster feeding directly into a Merge cluster by removing
-  // the Switch and Merge nodes and collapsing into a single cluster.
-  Status RemoveTrivialSwitch(Cluster* switch_cluster);
-
-  // Returns the merge cluster corresponding to the switch node. This function
-  // only returns the merge cluster in the case where we have a switch node that
-  // is the single entry point for all paths to a common merge cluster, this
-  // merge cluster may be created by combining multiple merge clusters, that
-  // share the switch cluster as common ancestor, together.
-  //
-  //           Switch
-  //          /      \
-  //     Branch      Branch
-  //          \      /
-  //        merge_cluster
-  //
-  // Note: either of the branches may be empty. The case where both branches are
-  // empty is handled by RemoveTrivialSwitch.
-  gtl::optional<Cluster*> CreateCorrespondingMergeCluster(
-      Cluster* switch_cluster);
-
-  // Determines the arguments needed as input to the Merge cluster originating
-  // from the Switch cluster.
-  xla::StatusOr<CondArgs> DetermineCondArgs(const Cluster& merge_cluster,
-                                            const Cluster& switch_cluster);
-
-  // Builds a XlaIfOp to replace the Merge node with.
-  xla::StatusOr<Node*> BuildAndAddXlaIfOp(const CondArgs& cond_args,
-                                          const Cluster& merge_cluster,
-                                          const std::vector<Node*>& outputs);
+  Status ConvertCorrespondingMergeToXlaIf(
+      const std::vector<Node*>& switch_nodes,
+      const std::vector<Node*>& merge_nodes, Node* predicate);
+
+  // Builds a XlaIfOp to replace the Switch-Graph-Merge cluster with.
+  xla::StatusOr<Node*> BuildAndAddXlaIfOp(
+      const std::vector<Node*>& switch_nodes,
+      const std::vector<Node*>& merge_nodes, Node* predicate);
 
   // Extracts a function body corresponding to the given input edge of the merge
   // node.
-  Status ExtractBody(const CondArgs& cond_args, const Cluster& merge_cluster,
-                     const std::vector<Node*>& outputs, int input_edge,
+  Status ExtractBody(const std::vector<Node*>& switch_nodes,
+                     const std::vector<Node*>& merge_nodes, int input_edge,
                      Graph* body);
 
   // Adds all the input edges to `if_node` corresponding to the arguments.
-  Status AddInputEdges(const CondArgs& cond_args, Node* if_node);
+  Status AddInputEdges(const std::vector<Node*>& cond_args, Node* predicate,
+                       Node* if_node);
 
   // Adds all output edges from the `if_node`.
   Status AddOutputEdges(const std::vector<Node*>& outputs, Node* if_node);
 
-  // Removes all nodes from the graph that are part of cluster.
-  void RemoveClusterNodes(Cluster* cluster);
+  // Returns the switches of graph_ in postorder. Dead switch nodes are skipped
+  // and removed from the graph.
+  std::vector<Node*> DetermineSwitchOrder();
 
-  // Removes all argument nodes that are unused.
-  template <class T>
-  void RemoveUnusedArgs(const T& args);
+  // Update the state for destination based on the state of source and the node
+  // being updated.
+  Status Join(const ForwardFlowNode& src_state, const Node* dst,
+              ForwardFlowNode* dst_state);
 
-  // Removes all Merge nodes in merge_cluster.
-  void RemoveMergeNodes(Cluster* merge_cluster);
+  // Validates that the branch_map and frontier of nodes for the conditional
+  // section are as expected.
+  Status ValidBranchMapAndFrontier(
+      const std::unordered_map<Node*, ForwardFlowNode>& branch_map,
+      const std::unordered_set<Node*>& frontier);
 
-  // Returns the representative member of the corresponding cluster.
-  ClusterHandle Representative(const Node* node) {
-    return clusters_.at(node).Get();
-  }
-
-  ClusteredGraph clustered_graph_;
-  ClusterHandle::Vector clusters_;
-  std::unordered_set<Node*> merge_nodes_;
-  std::unordered_set<Node*> switch_nodes_;
   FunctionLibraryDefinition* library_;
   Graph* graph_;
 };
 
-std::ostream& operator<<(std::ostream& os,
-                         const FunctionalizeCond::ClusterHandle& c) {
-  os << c.ToString();
-  return os;
-}
-
-// Returns a dot representation of the clustered graph showing the connections
-// between the nodes and the nodes in each cluster.
-string DebugString(const Graph& graph,
-                   FunctionalizeCond::ClusterHandle::Vector* clusters) {
-  string ret = "digraph {\ncompound=true;labeljust=\"r\";ranksep=0.24\n";
-  std::map<FunctionalizeCond::ClusterHandle, string> subgraphs;
-  auto name = [](const Node* n) {
-    return strings::StrCat(n->type_string(), "_", n->id());
-  };
-  for (Node* n : graph.nodes()) {
-    strings::StrAppend(&subgraphs[clusters->at(n).Get()], n->id(), " [label=\"",
-                       name(n), "\"];\n");
-  }
-  for (auto kv : subgraphs) {
-    strings::StrAppend(&ret, "subgraph cluster_", kv.first.ToString(), " {\n",
-                       "style=filled; color=lightgrey;", "label = \"",
-                       kv.first.ToString(), "\";\n", kv.second, "}\n");
-  }
-  for (Node* n : graph.nodes()) {
-    for (Node* in : n->in_nodes()) {
-      strings::StrAppend(&ret, in->id(), " -> ", n->id(), ";\n");
-    }
-  }
-  return strings::StrCat(ret, "} // end");
-}
-
-string DebugString(const FunctionalizeCond::ClusteredGraph& clustered_graph) {
-  string ret = "digraph {\ncompound=true;labeljust=\"r\";\n";
-  auto name = [](const FunctionalizeCond::Cluster& cluster) {
-    return cluster.representative.ToString();
-  };
-  for (auto kv : clustered_graph) {
-    if (!kv.second.switch_nodes.empty() || !kv.second.merge_nodes.empty()) {
-      strings::StrAppend(
-          &ret, kv.first.ToString(), " [label=\"", name(kv.second),
-          kv.second.switch_nodes.empty()
-              ? ""
-              : strings::StrCat(" switches=", kv.second.switch_nodes.size()),
-          kv.second.merge_nodes.empty()
-              ? ""
-              : strings::StrCat(" merges=", kv.second.merge_nodes.size()),
-          "\"];\n");
-    }
-  }
-  for (auto kv : clustered_graph) {
-    for (auto in : kv.second.in_nodes) {
-      strings::StrAppend(&ret, name(*in), " -> ", name(kv.second), ";\n");
-    }
-  }
-  return strings::StrCat(ret, "} // end");
-}
-
 bool IsDeadSwitch(const Node* node) {
   for (const Edge* e : node->out_edges()) {
     const Node* dst = e->dst();
@@ -796,341 +639,212 @@ bool IsDeadSwitch(const Node* node) {
   return true;
 }
 
-void FunctionalizeCond::CreateClusters() {
-  ClusterHandle source_cluster = ClusterHandle(Graph::kSourceId);
-  auto& source = clusters_.at(source_cluster);
-  std::deque<std::pair<ClusterHandle, std::deque<Node*>>> workqueue;
-  workqueue.push_back({source_cluster, {}});
-  for (Node* node : graph_->nodes()) {
-    if (IsSwitch(node)) {
-      switch_nodes_.insert(node);
-    } else if (IsMerge(node)) {
-      merge_nodes_.insert(node);
+string FunctionalizeCond::Branch_Name(FunctionalizeCond::Branch b) {
+  const string branch_name[FunctionalizeCond::kNumBranchTypes + 1] = {
+      "else", "then", "both", "neither", "count"};
+  return branch_name[b];
+}
+
+Status FunctionalizeCond::ValidBranchMapAndFrontier(
+    const std::unordered_map<Node*, FunctionalizeCond::ForwardFlowNode>&
+        branch_map,
+    const std::unordered_set<Node*>& frontier) {
+  std::unordered_set<const Node*> pending[kNumBranchTypes];
+  for (const auto& kv : branch_map) {
+    if (kv.second.count != kv.first->in_edges().size()) {
+      return errors::FailedPrecondition("Value ", kv.first->DebugString(),
+                                        " not dominated by switch nodes.");
     }
-    ClusterHandle& cluster = clusters_.at(node).Get();
-    cluster = ClusterHandle(node->id());
-    // Group all source clusters together.
-    if (node->IsSource() || node->in_edges().empty()) {
-      clusters_.at(node).Merge(&source);
-      source.Merge(&clusters_.at(node));
-      workqueue.front().second.push_back(node);
+    if (VLOG_IS_ON(1)) {
+      // Append attribute to the graph if running with logging to make the
+      // changes clearer in the visualization.
+      kv.first->AddAttr("_XlaFunctionalizeBranch",
+                        Branch_Name(kv.second.branch));
     }
   }
-
-  // If there are no Merge nodes, then terminate.
-  if (merge_nodes_.empty()) {
-    return;
+  for (Node* n : frontier) {
+    pending[branch_map.at(n).branch].insert(n);
   }
-
-  // Remove all dead Switch nodes.
-  RemoveUnusedArgs(switch_nodes_);
-
-  // All parent_'s are still nullptr so clusters_ may still be resized. Resize
-  // conservatively assuming all merge nodes become XlaIf nodes.
-  clusters_.resize(clusters_.size() + merge_nodes_.size());
-
-  std::unordered_set<Node*> marked;
-  while (!workqueue.empty()) {
-    auto cluster_queue = workqueue.front();
-    VLOG(4) << "Cluster: " << cluster_queue.first << " Queue: {"
-            << str_util::Join(cluster_queue.second, ",",
-                              [](string* output, const Node* node) {
-                                strings::StrAppend(output, node->id());
-                              })
-            << "}";
-
-    UnionFind<ClusterHandle>& repr = clusters_.at(cluster_queue.first);
-    workqueue.pop_front();
-    std::deque<Node*> switch_nodes;
-    std::deque<Node*> merge_nodes;
-    std::unordered_set<Node*> cluster_member;
-    while (!cluster_queue.second.empty()) {
-      // Iterate node workqueue and flow forward merging all nodes reachable
-      // that are neither a Switch or a Merge and whose inputs are all part of
-      // the same cluster.
-      Node* cur = cluster_queue.second.front();
-      cluster_queue.second.pop_front();
-      if (marked.find(cur) != marked.end()) {
-        continue;
-      }
-      if (IsMerge(cur)) {
-        merge_nodes.push_back(cur);
-        marked.insert(cur);
-        continue;
-      }
-      if (IsSwitch(cur)) {
-        switch_nodes.push_back(cur);
-        marked.insert(cur);
-        continue;
-      }
-      clusters_.at(cur).Merge(&repr);
-      cluster_member.insert(cur);
-      for (Node* out : cur->out_nodes()) {
-        bool all_ancestors_in_cluster = true;
-        for (Node* in : out->in_nodes()) {
-          if (IsMerge(out)) {
-            merge_nodes.push_back(out);
-          }
-          if (IsSwitch(out)) {
-            switch_nodes.push_back(out);
-          }
-          if (cluster_member.find(in) == cluster_member.end()) {
-            all_ancestors_in_cluster = false;
-            break;
-          }
-        }
-        if (all_ancestors_in_cluster && out->IsOp()) {
-          cluster_queue.second.push_back(out);
-          marked.insert(cur);
-        }
-      }
-    }
-
-    VLOG(4) << "Switches: {"
-            << str_util::Join(switch_nodes, ",",
-                              [](string* output, const Node* node) {
-                                strings::StrAppend(output, node->id());
-                              })
-            << "}";
-
-    // Merge Switch nodes with common predicate.
-    std::unordered_map<Node*, std::vector<Node*>> predicate_to_switch;
-    for (Node* node : switch_nodes) {
-      Node* tmp;
-      TF_CHECK_OK(node->input_node(1, &tmp));
-      predicate_to_switch[tmp].push_back(node);
-    }
-    for (auto kv : predicate_to_switch) {
-      Node* first = kv.second.front();
-      for (Node* switch_node : kv.second) {
-        clusters_.at(first).Merge(&clusters_.at(switch_node));
-      }
-    }
-
-    // Enqueue each edge of the switch node separately. That is, group all the
-    // nodes that are due to the true/false edge of the switch together and
-    // consider all nodes that only have a control dependency on the switch node
-    // separately. We want to group together all nodes that are part of the same
-    // branch, as these will be extracted into the `then` and `else` functions
-    // of the functional if. The ops due to control edges are different as they
-    // could be involved with either branch and merging them here could result
-    // in invalid graphs.
-    for (auto kv : predicate_to_switch) {
-      ClusterHandle none = ClusterHandle(-1);
-      ClusterHandle first[2] = {none, none};
-      std::deque<Node*>* queue[2];
-      for (auto switch_node : kv.second) {
-        for (const auto e : switch_node->out_edges()) {
-          if (IsSwitch(e->dst()) || IsMerge(e->dst())) {
-            continue;
-          }
-          // Control edges are enqueued on their own.
-          if (e->IsControlEdge()) {
-            workqueue.push_back({Representative(e->dst()), {e->dst()}});
-            continue;
-          }
-          // Combine all outputs of the same output port of a switch cluster
-          // into the same workqueue entry.
-          if (first[e->src_output()] == none) {
-            ClusterHandle repr = Representative(e->dst());
-            first[e->src_output()] = repr;
-            workqueue.push_back({repr, {}});
-            queue[e->src_output()] = &workqueue.back().second;
-          }
-          clusters_.at(first[e->src_output()]).Merge(&clusters_.at(e->dst()));
-          queue[e->src_output()]->push_back(e->dst());
-        }
-      }
+  TF_RET_CHECK(pending[kNeither].empty()) << NodesToString(pending[kNeither]);
+  for (const Node* n : pending[kBoth]) {
+    TF_RET_CHECK(IsMerge(n)) << n->DebugString();
+    // Merge nodes may be in then or else branch too
+  }
+  int index = (pending[kThenBranch].size() <= pending[kElseBranch].size())
+                  ? kThenBranch
+                  : kElseBranch;
+  int other = 1 - index;
+  for (const Node* n : pending[index]) {
+    if (pending[other].find(n) != pending[other].end()) {
+      return errors::Internal(
+          "Node (", n->DebugString().c_str(),
+          ") in both Else and Then branch should be in Both.");
     }
   }
+  return Status::OK();
 }
 
-void FunctionalizeCond::ContractEdge(Cluster* from, Cluster* to,
-                                     bool remove_from_graph) {
-  VLOG(3) << "ContractEdge from = " << from->representative
-          << " to = " << to->representative;
-  if (from->representative == to->representative) {
-    return;
-  }
-  to->merge_nodes.insert(from->merge_nodes.begin(), from->merge_nodes.end());
-  from->merge_nodes.clear();
-  to->switch_nodes.insert(from->switch_nodes.begin(), from->switch_nodes.end());
-  from->switch_nodes.clear();
-
-  for (Cluster* from_out : from->out_nodes) {
-    from_out->in_nodes.erase(from);
-    if (from_out->representative != to->representative) {
-      from_out->in_nodes.insert(to);
-      to->out_nodes.insert(from_out);
+Status FunctionalizeCond::Join(const ForwardFlowNode& src_state,
+                               const Node* dst, ForwardFlowNode* dst_state) {
+  TF_RET_CHECK(dst_state->branch != Branch::kBoth &&
+               dst_state->branch != Branch::kNumBranchTypes)
+      << "Unexpected/Invalid branch type: Merging "
+      << Branch_Name(src_state.branch) << " with "
+      << Branch_Name(dst_state->branch);
+  if (dst_state->branch == Branch::kNeither) {
+    dst_state->branch = src_state.branch;
+  } else if (src_state.branch != dst_state->branch &&
+             src_state.branch != Branch::kNeither) {
+    if (IsMerge(dst)) {
+      dst_state->branch = Branch::kBoth;
+    } else {
+      return errors::Internal("Illegal merge: ", src_state.ToString(), " with ",
+                              dst_state->ToString(), " for ",
+                              dst->DebugString());
     }
   }
-  from->out_nodes.clear();
+  ++dst_state->count;
+  return Status::OK();
+}
 
-  for (Cluster* from_in : from->in_nodes) {
-    from_in->out_nodes.erase(from);
-    if (from_in->representative != to->representative) {
-      from_in->out_nodes.insert(to);
-      to->in_nodes.insert(from_in);
+std::vector<Node*> FunctionalizeCond::DetermineSwitchOrder() {
+  std::vector<Node*> dead_switches;
+  std::vector<Node*> switch_order;
+  DFS(*graph_, nullptr, [this, &dead_switches, &switch_order](Node* n) {
+    if (IsSwitch(n)) {
+      if (IsDeadSwitch(n)) {
+        dead_switches.push_back(n);
+      } else {
+        switch_order.push_back(n);
+      }
     }
-  }
-  from->in_nodes.clear();
-
-  to->in_nodes.erase(from);
-  to->out_nodes.erase(from);
-  clusters_.at(to->representative).Merge(&clusters_.at(from->representative));
-  from->visited = true;
+  });
 
-  if (remove_from_graph) {
-    clustered_graph_.erase(from->representative);
+  // Remove all dead switch nodes.
+  for (Node* n : dead_switches) {
+    graph_->RemoveNode(n);
   }
+
+  return switch_order;
 }
 
-void FunctionalizeCond::CreateClusteredGraph() {
-  auto update_cluster_for_node = [this](Node* node) -> Cluster& {
-    ClusterHandle repr = Representative(node);
-    Cluster& cluster_node = clustered_graph_[repr];
-    cluster_node.representative = repr;
-    for (const Node* in : node->in_nodes()) {
-      ClusterHandle other_repr = Representative(in);
-      // Skip source, sink and internal edges.
-      if (other_repr == repr) {
-        continue;
-      }
-      Cluster& cluster_node_in = clustered_graph_[other_repr];
-      cluster_node.in_nodes.insert(&cluster_node_in);
-      cluster_node_in.out_nodes.insert(&cluster_node);
-      cluster_node_in.representative = other_repr;
-    }
-    for (const Node* out : node->out_nodes()) {
-      ClusterHandle other_repr = Representative(out);
-      // Skip source, sink and internal edges.
-      if (other_repr == repr) {
-        continue;
-      }
-      Cluster& cluster_node_out = clustered_graph_[other_repr];
-      cluster_node.out_nodes.insert(&cluster_node_out);
-      cluster_node_out.in_nodes.insert(&cluster_node);
-      cluster_node_out.representative = other_repr;
-    }
-    return cluster_node;
-  };
-  update_cluster_for_node(graph_->source_node());
-  for (Node* node : switch_nodes_) {
-    update_cluster_for_node(node).switch_nodes.insert(node);
-  }
-  for (Node* node : merge_nodes_) {
-    update_cluster_for_node(node).merge_nodes.insert(node);
+Status FunctionalizeCond::FunctionalizeInternal() {
+  std::vector<Node*> switch_order = DetermineSwitchOrder();
+  // If there are no switch nodes, then terminate.
+  if (switch_order.empty()) {
+    return Status::OK();
   }
 
-  VLOG(3) << "Graph with clusters: " << DebugString(*graph_, &clusters_);
-  VLOG(3) << "ClusteredGraph: " << DebugString(clustered_graph_);
-}
+  struct PredicateSwitches {
+    explicit PredicateSwitches(Node* predicate) : predicate(predicate) {}
 
-gtl::optional<FunctionalizeCond::Cluster*>
-FunctionalizeCond::CreateCorrespondingMergeCluster(Cluster* switch_cluster) {
-  VLOG(3) << "CreateCorrespondingMergeCluster for "
-          << switch_cluster->representative;
-  std::unordered_set<Cluster*> merges;
-  std::unordered_set<Cluster*> dominated;
-  dominated.insert(switch_cluster);
-  std::deque<Cluster*> queue;
-  auto enqueue_or_update_merge = [this, &queue, &merges](Cluster* c) {
-    if (c->merge_nodes.empty()) {
-      queue.push_back(c);
-    } else {
-      merges.insert(c);
-    }
+    Node* predicate;
+    std::vector<Node*> switches;
   };
-  // Enqueue all the outputs of the switch cluster in the workqueue.
-  for (auto* out : switch_cluster->out_nodes) {
-    enqueue_or_update_merge(out);
-  }
-  std::unordered_set<Cluster*> visited;
-  while (!queue.empty()) {
-    Cluster* cur = queue.front();
-    queue.pop_front();
-    if (visited.find(cur) != visited.end()) {
-      continue;
-    }
-    visited.insert(cur);
-    // Ensure all inputs to the current node are in the dominated set.
-    for (Cluster* in : cur->in_nodes) {
-      if (dominated.find(in) == dominated.end()) {
-        return gtl::nullopt;
-      }
-    }
-    for (Cluster* out : cur->out_nodes) {
-      // No switch nodes beyond the entry one is expected.
-      if (!out->switch_nodes.empty()) {
-        return gtl::nullopt;
-      }
-      enqueue_or_update_merge(out);
+
+  // Merge Switch nodes with common predicate.
+  std::vector<PredicateSwitches> predicate_switch_order;
+  std::unordered_map<Node*, int> predicate_index;
+  // The nodes in switch_order are in reverse topological order, but the
+  // clustered switches need not be (i.e., when considered as a cluster one
+  // element of a cluster may be later in the topological order than another
+  // node whose cluster is later in the topological order of clustered
+  // switches).
+  for (auto it = switch_order.rbegin(); it != switch_order.rend(); ++it) {
+    Node* pred;
+    TF_CHECK_OK((*it)->input_node(1, &pred));
+    if (predicate_index.find(pred) == predicate_index.end()) {
+      predicate_index[pred] = predicate_switch_order.size();
+      predicate_switch_order.emplace_back(pred);
     }
-  }
-  // Return if there are no merge nodes.
-  if (merges.empty()) {
-    return gtl::nullopt;
-  }
-  auto it = merges.begin();
-  Cluster* merge_cluster = *it;
-  for (++it; it != merges.end(); ++it) {
-    ContractEdge(*it, merge_cluster);
+    predicate_switch_order[predicate_index[pred]].switches.push_back(*it);
   }
 
-  // TODO(jpienaar): Clean up graph, merging nodes.
+  // Iterate from innermost set of clustered switches to outermost, replacing
+  // matching switch->merge subgraphs with single XlaIf nodes.
+  for (auto it = predicate_switch_order.rbegin();
+       it != predicate_switch_order.rend(); ++it) {
+    auto& ps = *it;
+    VLOG(3) << "Flow down from: " << ps.predicate->name() << " -> "
+            << NodesToString(ps.switches);
 
-  return merge_cluster;
-}
+    std::unordered_map<Node*, ForwardFlowNode> branch_map;
+    std::unordered_set<Node*> frontier;
 
-xla::StatusOr<FunctionalizeCond::CondArgs> FunctionalizeCond::DetermineCondArgs(
-    const Cluster& merge_cluster, const Cluster& switch_cluster) {
-  VLOG(2) << "DetermineCondArgs for " << merge_cluster.representative
-          << " with switch cluster " << switch_cluster.representative;
-  CondArgs ret;
-  auto feeds_into_branch_cluster = [&](Node* switch_cluster) {
-    for (Node* out : switch_cluster->out_nodes()) {
-      ClusterHandle repr = Representative(out);
-      if (repr == merge_cluster.representative) {
-        return true;
+    std::vector<Node*> stack = ps.switches;
+    std::vector<bool> visited(graph_->num_node_ids(), false);
+    while (!stack.empty()) {
+      Node* n = stack.back();
+      stack.pop_back();
+
+      if (visited[n->id()]) {
+        continue;
       }
-      for (Cluster* in : merge_cluster.in_nodes) {
-        if (repr == in->representative) {
-          return true;
+      visited[n->id()] = true;
+
+      // Propagate branch state along each edge of a switch node.
+      bool sink_only = true;
+      for (const Edge* e : n->out_edges()) {
+        Node* out = e->dst();
+        if (!out->IsOp()) {
+          continue;
+        }
+        sink_only = false;
+        // Propagate branch information.
+        ForwardFlowNode& ffn = branch_map[out];
+        if (IsSwitch(n)) {
+          int index = e->IsControlEdge() ? Branch::kNeither : e->src_output();
+          TF_RETURN_IF_ERROR(Join(ForwardFlowNode(Branch(index)), out, &ffn));
+        } else {
+          TF_RETURN_IF_ERROR(Join(branch_map[n], out, &ffn));
+        }
+        if (IsMerge(out)) {
+          if (out->in_edges().size() == ffn.count) {
+            frontier.insert(out);
+          }
+        } else if (!visited[out->id()] && ffn.count == out->in_edges().size()) {
+          // If all predecessors are dominated by the switch nodes, then add
+          // the output to the stack.
+          stack.push_back(out);
+        }
+      }
+      if (sink_only) {
+        if (!IsIdentity(n)) {
+          VLOG(1) << "Feeding into sink: " << n->DebugString();
         }
       }
-    }
-    return false;
-  };
-  for (Node* switch_cluster_node : switch_cluster.switch_nodes) {
-    if (!feeds_into_branch_cluster(switch_cluster_node)) {
-      continue;
     }
 
-    Node* tmp;
-    TF_RETURN_IF_ERROR(switch_cluster_node->input_node(1, &tmp));
-    if (ret.conditional == nullptr) {
-      ret.conditional = tmp;
-    } else if (ret.conditional != tmp) {
-      return errors::Unimplemented(
-          "Switch statements with different conditionals cannot be "
-          "converted into functional conditional.");
+    TF_RETURN_IF_ERROR(ValidBranchMapAndFrontier(branch_map, frontier));
+    VLOG(2) << "FunctionalizeControlFlow (before XlaIf conversion): "
+            << dump_graph::DumpGraphToFile("functionalize_bc", *graph_);
+    std::vector<Node*> switch_nodes(ps.switches);
+    std::sort(switch_nodes.begin(), switch_nodes.end(), CondCmp());
+    std::vector<Node*> merge_nodes(frontier.begin(), frontier.end());
+    std::sort(merge_nodes.begin(), merge_nodes.end(), CondCmp());
+    TF_RETURN_IF_ERROR(ConvertCorrespondingMergeToXlaIf(
+        switch_nodes, merge_nodes, ps.predicate));
+    for (auto& del_kv : branch_map) {
+      graph_->RemoveNode(del_kv.first);
+    }
+    for (Node* node : switch_nodes) {
+      graph_->RemoveNode(node);
     }
-    ret.args.insert(switch_cluster_node);
+    VLOG(2) << "FunctionalizeControlFlow (after XlaIf conversion): "
+            << dump_graph::DumpGraphToFile("functionalize_ac", *graph_);
   }
-  return ret;
+  return Status::OK();
 }
 
 xla::StatusOr<Node*> FunctionalizeCond::BuildAndAddXlaIfOp(
-    const CondArgs& cond_args, const Cluster& merge_cluster,
-    const std::vector<Node*>& outputs) {
-  VLOG(2) << "Build if op for " << NodesToString(merge_cluster.merge_nodes)
-          << " with input " << NodesToString(cond_args.args);
+    const std::vector<Node*>& switch_nodes,
+    const std::vector<Node*>& merge_nodes, Node* predicate) {
+  VLOG(2) << "Build if op for " << NodesToString(merge_nodes) << " with input "
+          << NodesToString(switch_nodes);
 
   NodeDef if_def;
   // Create a new If node using the name of the merge node.
-  NodeDefBuilder builder(
-      strings::StrCat((*merge_cluster.merge_nodes.begin())->name(), "_If"),
-      "XlaIf");
+  NodeDefBuilder builder(strings::StrCat(predicate->name(), "_If"), "XlaIf");
   string branch[] = {"else_branch", "then_branch"};
   for (int i = 0; i < 2; ++i) {
     static std::atomic<int64> sequence_num(0LL);
@@ -1140,8 +854,7 @@ xla::StatusOr<Node*> FunctionalizeCond::BuildAndAddXlaIfOp(
     body_name.set_name(
         strings::StrCat("_functionalize_if_", branch[i], "_", id));
     auto body = xla::MakeUnique<Graph>(graph_->op_registry());
-    TF_RETURN_IF_ERROR(
-        ExtractBody(cond_args, merge_cluster, outputs, i, body.get()));
+    TF_RETURN_IF_ERROR(ExtractBody(switch_nodes, merge_nodes, i, body.get()));
     VLOG(3) << "Body " << branch[i] << ": " << DebugString(body.get());
     FunctionDef body_fdef;
     TF_RETURN_IF_ERROR(GraphToFunctionDef(*body, body_name.name(), &body_fdef));
@@ -1152,7 +865,7 @@ xla::StatusOr<Node*> FunctionalizeCond::BuildAndAddXlaIfOp(
   // Build input type.
   std::vector<NodeDefBuilder::NodeOut> inputs;
   DataTypeVector in_arg_types;
-  for (const Node* arg : cond_args.args) {
+  for (const Node* arg : switch_nodes) {
     const Edge* in_edge;
     TF_RETURN_IF_ERROR(arg->input_edge(0, &in_edge));
     if (in_edge->IsControlEdge()) {
@@ -1168,17 +881,17 @@ xla::StatusOr<Node*> FunctionalizeCond::BuildAndAddXlaIfOp(
 
   // Build output type.
   DataTypeVector out_type;
-  for (const Node* merge : merge_cluster.merge_nodes) {
+  for (const Node* merge : merge_nodes) {
     DataType dtype = merge->output_type(0);
     out_type.push_back(dtype);
   }
   builder.Attr("Tout", out_type);
 
   builder.Attr("Tcond", DT_BOOL);
-  builder.Device(cond_args.conditional->assigned_device_name());
+  builder.Device(predicate->assigned_device_name());
   // Conditional should be the first input ...
-  builder.Input(NodeDefBuilder::NodeOut(cond_args.conditional->name(), 0,
-                                        cond_args.conditional->output_type(0)));
+  builder.Input(
+      NodeDefBuilder::NodeOut(predicate->name(), 0, predicate->output_type(0)));
   // ... followed by the other inputs.
   builder.Input(inputs);
 
@@ -1187,53 +900,15 @@ xla::StatusOr<Node*> FunctionalizeCond::BuildAndAddXlaIfOp(
   return if_node;
 }
 
-void FunctionalizeCond::RemoveClusterNodes(Cluster* cluster) {
-  VLOG(3) << "RemoveClusterNodes for " << cluster->representative;
-  ClusterHandle repr = cluster->representative;
-  std::deque<Node*> to_delete;
-  for (Node* node : graph_->nodes()) {
-    if (Representative(node) == repr) {
-      to_delete.push_back(node);
-    }
-  }
-  for (Node* n : to_delete) {
-    graph_->RemoveNode(n);
-  }
-}
-
-template <class T>
-void FunctionalizeCond::RemoveUnusedArgs(const T& args) {
-  VLOG(2) << "RemoveUnusedArgs among: " << NodesToString(args);
-
-  std::deque<Node*> to_delete;
-  for (Node* arg : args) {
-    if (IsDeadSwitch(arg)) {
-      to_delete.push_back(arg);
-      for (Node* n : arg->out_nodes()) {
-        to_delete.push_back(n);
-      }
-    }
-  }
-  for (Node* n : to_delete) {
-    switch_nodes_.erase(n);
-    auto it = clustered_graph_.find(Representative(n));
-    if (it != clustered_graph_.end()) {
-      it->second.switch_nodes.erase(n);
-    }
-    graph_->RemoveNode(n);
-  }
-}
-
-Status FunctionalizeCond::ExtractBody(const CondArgs& cond_args,
-                                      const Cluster& merge_cluster,
-                                      const std::vector<Node*>& outputs,
+Status FunctionalizeCond::ExtractBody(const std::vector<Node*>& switch_nodes,
+                                      const std::vector<Node*>& merge_nodes,
                                       int input_edge, Graph* body) {
-  VLOG(2) << "ExtractBody for " << merge_cluster.representative
-          << " along edge " << input_edge;
+  VLOG(2) << "ExtractBody for " << NodesToString(merge_nodes) << " along edge "
+          << input_edge;
   std::vector<bool> squash_src_outputs(graph_->num_node_ids(), false);
   std::vector<Node*> node_map(graph_->num_node_ids(), nullptr);
   int arg_count = 0;
-  for (const auto* arg : cond_args.args) {
+  for (const auto* arg : switch_nodes) {
     DataType dtype = arg->input_type(0);
     TF_ASSIGN_OR_RETURN(Node * arg_node,
                         BuildArgNode(body, dtype, arg_count++));
@@ -1242,9 +917,9 @@ Status FunctionalizeCond::ExtractBody(const CondArgs& cond_args,
   }
 
   std::vector<Node*> stack;
-  stack.reserve(outputs.size());
-  for (int j = 0; j < outputs.size(); ++j) {
-    Node* node = outputs[j];
+  stack.reserve(switch_nodes.size());
+  for (int j = 0; j < merge_nodes.size(); ++j) {
+    Node* node = merge_nodes[j];
     TF_ASSIGN_OR_RETURN(node_map.at(node->id()),
                         BuildRetvalNode(body, node->output_type(0),
                                         /*index=*/j));
@@ -1255,7 +930,8 @@ Status FunctionalizeCond::ExtractBody(const CondArgs& cond_args,
       node_map.at(in->id()) = body->CopyNode(in);
     }
 
-    if (cond_args.args.find(in) == cond_args.args.end()) {
+    if (std::find(switch_nodes.begin(), switch_nodes.end(), in) ==
+        switch_nodes.end()) {
       body->AddEdge(node_map.at(in->id()), in_edge->src_output(),
                     node_map.at(node->id()), 0);
     } else {
@@ -1270,12 +946,12 @@ Status FunctionalizeCond::ExtractBody(const CondArgs& cond_args,
                       body);
 }
 
-Status FunctionalizeCond::AddInputEdges(const CondArgs& cond_args,
-                                        Node* if_node) {
+Status FunctionalizeCond::AddInputEdges(const std::vector<Node*>& cond_args,
+                                        Node* predicate, Node* if_node) {
   VLOG(3) << "AddInputEdges for " << if_node->name();
   int i = 0;
-  graph_->AddEdge(cond_args.conditional, 0, if_node, i++);
-  for (const Node* arg : cond_args.args) {
+  graph_->AddEdge(predicate, 0, if_node, i++);
+  for (const Node* arg : cond_args) {
     const Edge* in_edge;
     TF_RETURN_IF_ERROR(arg->input_edge(0, &in_edge));
     if (in_edge->IsControlEdge()) {
@@ -1312,186 +988,26 @@ Status FunctionalizeCond::AddOutputEdges(const std::vector<Node*>& outputs,
   return Status::OK();
 }
 
-void FunctionalizeCond::RemoveMergeNodes(Cluster* merge_cluster) {
-  VLOG(3) << "RemoveMergeNodes for " << merge_cluster->representative;
-  // Remove all merge nodes now dead post extraction of If.
-  for (auto it = merge_cluster->merge_nodes.begin();
-       it != merge_cluster->merge_nodes.end();) {
-    Node* node = *it;
-    graph_->RemoveNode(node);
-    merge_cluster->merge_nodes.erase(*it++);
-  }
-}
-
-Status FunctionalizeCond::RemoveTrivialSwitch(Cluster* switch_cluster) {
-  Cluster* merge_cluster = *switch_cluster->out_nodes.begin();
-  if (merge_cluster->merge_nodes.empty()) {
-    return errors::FailedPrecondition(
-        "Not a trivial switch: no Merge node feeding into Switch node");
-  }
-
-  for (auto it = merge_cluster->merge_nodes.begin();
-       it != merge_cluster->merge_nodes.end();) {
-    // We have the following structure:
-    //   Op -> Switch -> Merge -> Consumer
-    // and we want to transform it to:
-    //   Op -> Consumer
-    Node* merge_node = *it;
-    Node* switch_node;
-    const Edge* in = nullptr;
-    TF_RETURN_IF_ERROR(merge_node->input_node(0, &switch_node));
-    TF_RETURN_IF_ERROR(switch_node->input_edge(0, &in));
-    for (auto out : merge_node->out_edges()) {
-      int src_output = out->dst_input() == Graph::kControlSlot
-                           ? Graph::kControlSlot
-                           : in->src_output();
-      graph_->AddEdge(in->src(), src_output, out->dst(), out->dst_input());
-    }
-    graph_->RemoveNode(*it++);
-  }
-  RemoveUnusedArgs(switch_cluster->switch_nodes);
-
-  return Status::OK();
-}
-
 Status FunctionalizeCond::ConvertCorrespondingMergeToXlaIf(
-    Cluster* switch_cluster) {
-  VLOG(1) << "ConvertMergeToXlaIf for " << switch_cluster->representative;
-  gtl::optional<Cluster*> maybe_merge =
-      CreateCorrespondingMergeCluster(switch_cluster);
-  if (!maybe_merge.has_value()) {
-    return errors::FailedPrecondition(
-        "Switch cluster was not part of a simple conditional in the clustered "
-        "graph. Graph nodes in switch cluster ",
-        NodesToString(switch_cluster->switch_nodes));
-  }
-  Cluster* merge_cluster = *maybe_merge;
-  if (merge_cluster->merge_nodes.empty()) {
-    return errors::Internal(
-        "Merge node in clustered graph contains no merge nodes: ",
-        merge_cluster->representative.ToString());
-  }
-  TF_ASSIGN_OR_RETURN(auto cond_args,
-                      DetermineCondArgs(*merge_cluster, *switch_cluster));
-
-  // Sort the outputs by ID to produce more stable output.
-  std::vector<Node*> outputs(merge_cluster->merge_nodes.begin(),
-                             merge_cluster->merge_nodes.end());
-  std::sort(outputs.begin(), outputs.end(), CondArgs::CondCmp());
+    const std::vector<Node*>& switch_nodes,
+    const std::vector<Node*>& merge_nodes, Node* predicate) {
+  VLOG(1) << "ConvertMergeToXlaIf for " << NodesToString(switch_nodes) << " -> "
+          << NodesToString(merge_nodes);
 
   // Extract bodies and builds a If operator.
   TF_ASSIGN_OR_RETURN(Node * if_node,
-                      BuildAndAddXlaIfOp(cond_args, *merge_cluster, outputs));
-  TF_RETURN_IF_ERROR(AddInputEdges(cond_args, if_node));
-  TF_RETURN_IF_ERROR(AddOutputEdges(outputs, if_node));
-
-  // Remove the old nodes from the graph_ and contract the edges of the
-  // clustered graph.
-  for (auto in : merge_cluster->in_nodes) {
-    if (in != switch_cluster) {
-      RemoveClusterNodes(in);
-    }
-  }
-  RemoveMergeNodes(merge_cluster);
-  RemoveUnusedArgs(cond_args.args);
-  auto in_nodes = merge_cluster->in_nodes;
-  for (auto it = in_nodes.begin(); it != in_nodes.end();) {
-    ContractEdge(*it++, switch_cluster);
-  }
-  ContractEdge(merge_cluster, switch_cluster);
-  clusters_[if_node].Get() = ClusterHandle(switch_cluster->representative);
+                      BuildAndAddXlaIfOp(switch_nodes, merge_nodes, predicate));
+  TF_RETURN_IF_ERROR(AddInputEdges(switch_nodes, predicate, if_node));
+  TF_RETURN_IF_ERROR(AddOutputEdges(merge_nodes, if_node));
 
   return Status::OK();
 }
 
-std::vector<std::pair<int, FunctionalizeCond::Cluster*>>
-FunctionalizeCond::SortedSwitchNodes() {
-  VLOG(2) << "ProcessClusteredGraph";
-  std::stack<std::pair<int, Cluster*>> stack;
-  // Initialize with the source node.
-  stack.push({0, &clustered_graph_[Representative(graph_->source_node())]});
-
-  // Perform a depth-first traversal of the clustered graph computing the
-  // switch-merge depth.
-  std::vector<std::pair<int, Cluster*>> queue;
-  std::unordered_set<Cluster*> visited;
-  while (!stack.empty()) {
-    Cluster* n = stack.top().second;
-    size_t depth = stack.top().first;
-    stack.pop();
-
-    auto inserted = visited.insert(n);
-    if (!inserted.second) {
-      continue;
-    }
-
-    size_t new_depth = depth;
-    if (!n->merge_nodes.empty()) {
-      --new_depth;
-    }
-    if (!n->switch_nodes.empty()) {
-      queue.emplace_back(depth, n);
-      ++new_depth;
-    }
-    for (Cluster* e : n->out_nodes) {
-      stack.emplace(new_depth, e);
-    }
-  }
-
-  // Sort in reverse order of switch-merge depth with ties broken by the
-  // ClusterHandle.
-  std::sort(queue.begin(), queue.end(),
-            [](const std::pair<int, Cluster*>& lhs,
-               const std::pair<int, Cluster*>& rhs) {
-              return std::tie(lhs.first, lhs.second->representative) >
-                     std::tie(rhs.first, rhs.second->representative);
-            });
-
-  return queue;
-}
-
 Status FunctionalizeCond::Functionalize(Graph* graph,
                                         FunctionLibraryDefinition* library) {
   VLOG(1) << "FunctionalizeCond::Functionalize";
   FunctionalizeCond fc(graph, library);
-  fc.CreateClusters();
-  if (fc.NoConditionals()) {
-    return Status::OK();
-  }
-  fc.CreateClusteredGraph();
-
-  auto queue = fc.SortedSwitchNodes();
-  for (auto it = queue.begin(); it != queue.end();) {
-    Cluster* switch_cluster = (*it).second;
-    ++it;
-    if (switch_cluster->out_nodes.size() == 1) {
-      TF_RETURN_IF_ERROR(fc.RemoveTrivialSwitch(switch_cluster));
-    } else {
-      TF_RETURN_IF_ERROR(fc.ConvertCorrespondingMergeToXlaIf(switch_cluster));
-    }
-
-    // Contract newly Switch free switch_cluster with outgoing nodes without
-    // Switch or Merge nodes.
-    for (auto& nodes : {switch_cluster->out_nodes, switch_cluster->in_nodes}) {
-      std::vector<Cluster*> copy_nodes(nodes.begin(), nodes.end());
-      for (auto* node : copy_nodes) {
-        if (node->merge_nodes.empty() && node->switch_nodes.empty()) {
-          fc.ContractEdge(node, switch_cluster);
-        }
-      }
-    }
-
-    VLOG(3) << "Graph with clusters: "
-            << DebugString(*fc.graph_, &fc.clusters_);
-    VLOG(3) << "ClusteredGraph: " << DebugString(fc.clustered_graph_);
-  }
-
-  if (!fc.switch_nodes_.empty()) {
-    return errors::Internal(
-        "Failed to functionalize control flow with Switch nodes remaining: ",
-        NodesToString(fc.switch_nodes_));
-  }
-  return Status::OK();
+  return fc.FunctionalizeInternal();
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc
index 01d2b28275..71f12a1333 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc
@@ -109,7 +109,7 @@ TEST(FunctionalizeControlFlow, Conditional) {
     auto y = ops::Placeholder(scope.WithOpName("y"), DT_INT32);
     auto x = ops::Placeholder(scope.WithOpName("x"), DT_INT32);
     auto less = ops::Less(scope.WithOpName("cond/Less"), y, x);
-    auto if_op = ops::XlaIf(scope.WithOpName("cond/Merge_If"), less,
+    auto if_op = ops::XlaIf(scope.WithOpName("cond/Less_If"), less,
                             std::initializer_list<Input>{less, y, x}, then_fn,
                             else_fn, {DT_INT32});
     GraphDef expected;
-- 
GitLab


From eb1bbef6c291829f2c1856a35b2556fa0f477bdb Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Wed, 13 Dec 2017 17:45:55 -0800
Subject: [PATCH 0989/1225] [XLA] Add support for CustomCall in HLO parser.

PiperOrigin-RevId: 178984357
---
 tensorflow/compiler/xla/service/hlo_instruction.cc  |  4 ++++
 tensorflow/compiler/xla/tools/parser/hlo_parser.cc  | 12 +++++++++++-
 .../compiler/xla/tools/parser/hlo_parser_test.cc    | 13 +++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 1dab6076a5..220d5044a2 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2082,6 +2082,10 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
     extra.push_back(StrCat("exponent_bits=", exponent_bits_));
     extra.push_back(StrCat("mantissa_bits=", mantissa_bits_));
   }
+  if (opcode() == HloOpcode::kCustomCall) {
+    extra.push_back(
+        StrCat("custom_call_target=\"", CEscape(custom_call_target_), "\""));
+  }
   return extra;
 }
 
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 192f134cb9..4f67ed2380 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -901,7 +901,17 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
           /*false_computation_arg=*/operands[2], *false_computation));
       break;
     }
-    case HloOpcode::kCustomCall:
+    case HloOpcode::kCustomCall: {
+      optional<string> custom_call_target;
+      attrs["custom_call_target"] = {/*required=*/true, AttrTy::kString,
+                                     &custom_call_target};
+      if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction = builder->AddInstruction(HloInstruction::CreateCustomCall(
+          shape, operands, *custom_call_target));
+      break;
+    }
     case HloOpcode::kTrace:
       return TokenError(StrCat("parsing not yet implemented for op: ",
                                HloOpcodeString(opcode)));
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 3b1f81134b..61d8902855 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -728,7 +728,20 @@ ENTRY %Parameters1.v4 () -> f32[] {
 }
 
 )"
+},
+
+// CustomCall
+{
+"CustomCall",
+R"(HloModule custom_call:
+
+ENTRY %CustomCall () -> f32[1,2,3] {
+  %constant = f32[1]{0} constant({12345})
+  ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(f32[1]{0} %constant), custom_call_target="foo\"bar"
 }
+
+)"
+},
   });
   // clang-format on
 }
-- 
GitLab


From baef8c35900a01d7b9444e001482a8fbd99f1fbe Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 18:08:58 -0800
Subject: [PATCH 0990/1225] Add checkpoint_path to evaluation predicate_fn

PiperOrigin-RevId: 178986670
---
 .../contrib/learn/python/learn/experiment.py  | 102 +++++++++++++-----
 .../learn/python/learn/experiment_test.py     |  27 +++++
 2 files changed, 100 insertions(+), 29 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py
index fc4bd1f461..9576ff21c2 100644
--- a/tensorflow/contrib/learn/python/learn/experiment.py
+++ b/tensorflow/contrib/learn/python/learn/experiment.py
@@ -35,6 +35,7 @@ from tensorflow.contrib.learn.python.learn import trainable
 from tensorflow.contrib.learn.python.learn.estimators import run_config
 from tensorflow.contrib.tpu.python.tpu import tpu_estimator
 from tensorflow.python.estimator import estimator as core_estimator
+from tensorflow.python.estimator import util as estimator_util
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import basic_session_run_hooks
@@ -46,6 +47,18 @@ from tensorflow.python.util import compat
 __all__ = ["Experiment"]
 
 
+def _get_standardized_predicate_fn(predicate_fn):
+  pred_fn_args = estimator_util.fn_args(predicate_fn)
+  if "checkpoint_path" not in pred_fn_args:
+    # pylint: disable=unused-argument
+    def _pred_fn_wrapper(eval_results, checkpoint_path):
+      return predicate_fn(eval_results)
+
+    return _pred_fn_wrapper
+  else:
+    return predicate_fn
+
+
 class _EvalAndExportListener(basic_session_run_hooks.CheckpointSaverListener):
   """Listener that evaluates and exports a model after creating a checkpoint.
 
@@ -446,22 +459,33 @@ class Experiment(object):
       evaluate_checkpoint_only_once: Whether to skip evaluation of checkpoints
         that have already been evaluated. Default is `True`.
       continuous_eval_predicate_fn: A predicate function determining whether to
-        continue eval after each iteration. `predicate_fn` takes the evaluation
-        results as arguments. At the beginning of evaluation, the passed eval
-        results will be None so it's expected that the predicate function
-        handles that gracefully. When `predicate_fn` is not specified,
-        continuous eval will run in an infinite loop (if `train_steps` is None)
-        or exit once global step reaches `train_steps`.
+        continue eval after each iteration. A `predicate_fn` has one of the
+        following signatures:
+          * (eval_results) -> boolean
+          * (eval_results, checkpoint_path) -> boolean
+        Where `eval_results` is the dictionary of metric evaluations and
+        checkpoint_path is the path to the checkpoint containing the parameters
+        on which that evaluation was based.
+        At the beginning of evaluation, the passed `eval_results` will be None
+        so it's expected that the predicate function handles that gracefully.
+        When `predicate_fn` is not specified, continuous eval will run in an
+        infinite loop (if `train_steps` is None). or exit once global step
+        reaches `train_steps`.
+
       export: Whether to export from this step. Default is 'True'.
 
     Raises:
       ValueError: if `continuous_eval_predicate_fn` is neither None nor
         callable.
     """
-    if (continuous_eval_predicate_fn is not None and
-        not callable(continuous_eval_predicate_fn)):
-      raise ValueError(
-          "`continuous_eval_predicate_fn` must be a callable, or None.")
+    if continuous_eval_predicate_fn is not None:
+      if not callable(continuous_eval_predicate_fn):
+        raise ValueError(
+            "`continuous_eval_predicate_fn` must be a callable, or None.")
+      predicate_fn = _get_standardized_predicate_fn(
+          continuous_eval_predicate_fn)
+    else:
+      predicate_fn = None
 
     if delay_secs is None:
       delay_secs = self._eval_delay_secs
@@ -475,8 +499,10 @@ class Experiment(object):
     previous_path = None
     eval_result = None
     last_warning_time = 0
-    while (not continuous_eval_predicate_fn or
-           continuous_eval_predicate_fn(eval_result)):
+    while (not predicate_fn or
+           predicate_fn(
+               eval_result,
+               checkpoint_path=previous_path if eval_result else None)):
       # Exit if we have already reached number of steps to train.
       if self._has_training_stopped(eval_result):
         logging.info("Exiting continuous eval, global_step=%s >= "
@@ -682,11 +708,19 @@ class Experiment(object):
 
     Args:
       continuous_eval_predicate_fn: A predicate function determining whether to
-        continue after each iteration. `predicate_fn` takes the evaluation
-        results as its arguments. At the beginning of evaluation, the passed
-        eval results will be None so it's expected that the predicate function
-        handles that gracefully. When `predicate_fn` is not specified, this will
-        run in an infinite loop or exit when global_step reaches `train_steps`.
+        continue eval after each iteration. A `predicate_fn` has one of the
+        following signatures:
+          * (eval_results) -> boolean
+          * (eval_results, checkpoint_path) -> boolean
+        Where `eval_results` is the dictionary of metric evaluations and
+        checkpoint_path is the path to the checkpoint containing the parameters
+        on which that evaluation was based.
+        At the beginning of evaluation, the passed `eval_results` and
+        `checkpoint_path` will be None so it's expected that the predicate
+        function handles that gracefully.
+        When `predicate_fn` is not specified, continuous eval will run in an
+        infinite loop (if `train_steps` is None). or exit once global step
+        reaches `train_steps`.
 
     Returns:
       A tuple of the result of the `evaluate` call to the `Estimator` and the
@@ -697,13 +731,18 @@ class Experiment(object):
         callable.
     """
 
-    if (continuous_eval_predicate_fn is not None and
-        not callable(continuous_eval_predicate_fn)):
-      raise ValueError(
-          "`continuous_eval_predicate_fn` must be a callable, or None.")
+    if continuous_eval_predicate_fn is not None:
+      if not callable(continuous_eval_predicate_fn):
+        raise ValueError(
+            "`continuous_eval_predicate_fn` must be a callable, or None.")
+      predicate_fn = _get_standardized_predicate_fn(
+          continuous_eval_predicate_fn)
+    else:
+      predicate_fn = None
 
-    eval_result = None
     export_results = None
+    latest_checkpoint = None
+    eval_result = None
 
     # Set the default value for train_steps_per_iteration, which will be
     # overridden by other settings.
@@ -713,8 +752,10 @@ class Experiment(object):
     elif self._train_steps is not None:
       train_steps_per_iteration = int(self._train_steps / 10)
 
-    while (not continuous_eval_predicate_fn or
-           continuous_eval_predicate_fn(eval_result)):
+    while (not predicate_fn or
+           predicate_fn(
+               eval_result,
+               checkpoint_path=latest_checkpoint if eval_result else None)):
 
       if self._has_training_stopped(eval_result):
         # Exits once max steps of training is satisfied.
@@ -729,11 +770,14 @@ class Experiment(object):
           saving_listeners=self._saving_listeners)
 
       logging.info("Evaluating model now.")
-      eval_result = self._call_evaluate(input_fn=self._eval_input_fn,
-                                        steps=self._eval_steps,
-                                        metrics=self._eval_metrics,
-                                        name="one_pass",
-                                        hooks=self._eval_hooks)
+      latest_checkpoint = saver.latest_checkpoint(self._estimator.model_dir)
+      eval_result = self._call_evaluate(
+          input_fn=self._eval_input_fn,
+          steps=self._eval_steps,
+          metrics=self._eval_metrics,
+          name="one_pass",
+          checkpoint_path=latest_checkpoint,
+          hooks=self._eval_hooks)
       export_results = self._maybe_export(eval_result)
 
     return eval_result, export_results
diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py
index c29c198d09..545d7d8924 100644
--- a/tensorflow/contrib/learn/python/learn/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/experiment_test.py
@@ -492,6 +492,33 @@ class ExperimentTest(test.TestCase):
       self.assertEqual(3, est.eval_count)
       self.assertEqual([noop_hook], est.eval_hooks)
 
+  def test_continuous_eval_predicate_fn_with_checkpoint(self):
+    for est in self._estimators_for_tests():
+      eval_metrics = 'eval_metrics' if not isinstance(
+          est, core_estimator.Estimator) else None
+      est.fake_checkpoint()
+      noop_hook = _NoopHook()
+
+      def _predicate_fn(eval_result, checkpoint_path):
+        self.assertEqual(not eval_result,
+                         checkpoint_path is None)
+        return est.eval_count < 3  # pylint: disable=cell-var-from-loop
+
+      ex = experiment.Experiment(
+          est,
+          train_input_fn='train_input',
+          eval_input_fn='eval_input',
+          eval_metrics=eval_metrics,
+          eval_hooks=[noop_hook],
+          eval_delay_secs=0,
+          continuous_eval_throttle_secs=0)
+      ex.continuous_eval(
+          evaluate_checkpoint_only_once=False,
+          continuous_eval_predicate_fn=_predicate_fn)
+      self.assertEqual(0, est.fit_count)
+      self.assertEqual(3, est.eval_count)
+      self.assertEqual([noop_hook], est.eval_hooks)
+
   def test_run_local(self):
     for est in self._estimators_for_tests():
       eval_metrics = 'eval_metrics' if not isinstance(
-- 
GitLab


From 466926a9900804cd4d5d7a719ee6f8c1b3ff9a75 Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Wed, 13 Dec 2017 18:30:33 -0800
Subject: [PATCH 0991/1225] Automated g4 rollback of changelist 178963334

PiperOrigin-RevId: 178988579
---
 tensorflow/contrib/estimator/BUILD            |   5 +-
 .../python/estimator/replicate_model_fn.py    |  53 +++------
 .../estimator/replicate_model_fn_test.py      | 101 +-----------------
 3 files changed, 16 insertions(+), 143 deletions(-)

diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index bd65ece85d..ba272d7e88 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -331,17 +331,16 @@ py_library(
         "//tensorflow/python:device",
         "//tensorflow/python:device_lib",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:gradients",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
         "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:model_fn",
         "//tensorflow/python/estimator:util",
-        "//tensorflow/python/ops/losses",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
index 2177ae2366..ca3a2394ee 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
@@ -41,25 +41,21 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import tf_logging
 from tensorflow.python.training import device_setter as device_setter_lib
 from tensorflow.python.training import training_util
 
 
-def replicate_model_fn(model_fn,
-                       optimizer_fn,
-                       loss_reduction=losses.Reduction.SUM,
-                       devices=None):
+def replicate_model_fn(model_fn, optimizer_fn, devices=None):
   """Replicate `Estimator.model_fn` over GPUs within a single host.
 
   The given `model_fn` specifies a single forward pass of a model.  To replicate
   such a model over GPUs, each GPU gets its own instance of the forward pass
   (a.k.a. a tower).  The input features and labels get sharded into the chunks
-  that correspond to the number of GPUs.  Each tower computes a loss based
+  that correspond to the number of GPUs.  Each tower computes its own loss based
   on its input.  For each such loss, gradients are computed.  After that, the
-  available losses are aggregated to form aggregated loss.  Available
-  gradients are summed.  Then, they update weights using the specified
+  available losses are summed to form aggregated loss.  The available
+  gradients are summed too.  Then, they update weights using the specified
   optimizer.
 
   If `devices` are `None`, then all available GPUs are going to be used for
@@ -106,7 +102,7 @@ def replicate_model_fn(model_fn,
   On reduction algorithms:
   Certain algorithms were chosen for aggregating results of computations on
   multiple towers:
-    - Losses from all towers are reduced according to `loss_reduction`.
+    - Losses from all towers are reduced using sum.
     - Gradients are reduced using sum for each trainable variable.
     - `eval_metrics_ops` are reduced per metric using `reduce_mean`.
     - `EstimatorSpec.predictions` and `EstimatorSpec.export_outputs` are
@@ -128,7 +124,6 @@ def replicate_model_fn(model_fn,
     optimizer_fn: a function that returns an optimizer instance.  The function
       may accept one `params` argument.  This is the `params` argument as
       defined by `Estimator`.  See  the `Estimator` documentation for details.
-    loss_reduction: controls whether losses are summed or averaged.
     devices: Optional list of devices to replicate the model across.  This
       argument can be used to replice only on the subset of available GPUs.
       If `None`, then all available GPUs are going to be used for replication.
@@ -142,11 +137,9 @@ def replicate_model_fn(model_fn,
   return _replicate_model_fn_with_mode(
       model_fn,
       optimizer_fn,
-      loss_reduction,
       devices,
-      # TODO(isaprykin): Query the system configuration to choose modes other
-      # than `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often
-      # appropriate.
+      # TODO(isaprykin): Query system configuration to choose modes other than
+      # `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often appropriate.
       mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER)
 
 
@@ -178,13 +171,9 @@ class _VariableDistributionMode(object):
 def _replicate_model_fn_with_mode(
     model_fn,
     optimizer_fn,
-    loss_reduction=losses.Reduction.SUM,
     devices=None,
     mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER):
   """A version of `replicate_model_fn` that allows to specify a `mode`."""
-  if loss_reduction == losses.Reduction.NONE:
-    raise ValueError('Tower losses need to be reduced in some way, yet {} '
-                     'reduction is specified.'.format(loss_reduction))
   if not devices:
     devices = _get_local_devices('GPU') or _get_local_devices('CPU')
 
@@ -210,7 +199,6 @@ def _replicate_model_fn_with_mode(
         features=feature_shards,
         labels=label_shards,
         params=params,
-        loss_reduction=loss_reduction,
         config=config,
         devices=devices,
         local_ps_devices=ps_devices)
@@ -281,7 +269,6 @@ def _get_loss_towers(model_fn,
                      config,
                      devices,
                      local_ps_devices,
-                     loss_reduction=losses.Reduction.SUM,
                      name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN):
   """Replicate the loss computation across devices."""
   tower_specs = []
@@ -320,15 +307,12 @@ def _get_loss_towers(model_fn,
           if labels:
             labels_shard = labels[i]
 
-          tower_spec = model_fn(
-              mode=mode,
-              features=features[i],
-              labels=labels_shard,
-              **optional_params)
-          if loss_reduction != losses.Reduction.SUM:
-            tower_spec = _scale_tower_loss(
-                tower_spec, number_of_towers=len(devices))
-          tower_specs.append(tower_spec)
+          tower_specs.append(
+              model_fn(
+                  mode=mode,
+                  features=features[i],
+                  labels=labels_shard,
+                  **optional_params))
   return tower_specs
 
 
@@ -355,17 +339,6 @@ def _local_device_setter(worker_device, ps_devices, ps_strategy):
   return local_device_chooser
 
 
-def _scale_tower_loss(tower_spec, number_of_towers):
-  """Scale down the loss for arriving at the average loss by summing."""
-  if tower_spec.loss is None:
-    return tower_spec
-
-  estimator_spec = tower_spec._asdict()
-  estimator_spec['loss'] = math_ops.div(
-      estimator_spec['loss'], 1.0 * number_of_towers, name='averaged_loss')
-  return model_fn_lib.EstimatorSpec(**estimator_spec)
-
-
 def _minimize_towers(tower_specs, optimizer):
   """Aggregate and apply gradients for computed losses."""
   grad_lists = {}
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
index c1b4e7b1a7..a83a1b8407 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
@@ -40,7 +40,6 @@ from tensorflow.python.framework import ops as ops_lib
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import losses
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics as metrics_lib
 from tensorflow.python.ops import variable_scope
@@ -222,40 +221,13 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
       total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
       self.assertEqual(total_loss, session.run(estimator_spec.loss))
 
-      # derivative of loss = (1*c - 1) + (2*c - 2) is 3.
+      # loss' of c is 3.
       # new value of c = 10 - learning rate * 3 = 7.0.
       session.run(estimator_spec.train_op)
       with variable_scope.variable_scope('', reuse=True):
         c = variable_scope.get_variable('c', dtype=dtypes.float64)
         self.assertEqual(7.0, session.run(c))
 
-  def test_train_with_mean_reduction(self):
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    with self.test_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn,
-          self.optimizer_fn,
-          losses.Reduction.MEAN,
-          devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-      session.run(variables.global_variables_initializer())
-
-      # loss = feature * c - label
-      total_loss = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)) / 2.0
-      self.assertEqual(total_loss, session.run(estimator_spec.loss))
-
-      # derivative of loss = (1*c - 1)/2 + (2*c - 2)/2 is 1.5.
-      # It's the same computation as without mean reduction, but the
-      # loss from every tower is scaled by 1/<number of towers>.
-      # new value of c = 10 - learning rate * 1.5 = 8.5
-      session.run(estimator_spec.train_op)
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual(8.5, session.run(c))
-
   def test_train_spec_with_optimizer_without_params(self):
 
     def optimizer_fn_without_params():
@@ -304,38 +276,6 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
       self.assertEqual(0, auc)
       self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
 
-  def test_eval_with_mean_reduction(self):
-    features = np.array([[0.01], [0.002]])
-    labels = np.array([[0.01], [0.02]])
-
-    with self.test_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn,
-          self.optimizer_fn,
-          losses.Reduction.MEAN,
-          devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
-      session.run(variables.local_variables_initializer())
-      session.run(variables.global_variables_initializer())
-
-      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
-      auc, b = estimator_spec.eval_metric_ops['auc']
-
-      session.run([a, b])
-      accuracy = session.run(accuracy)
-      auc = session.run(auc)
-
-      # loss[i] = features[i] * 10 - labels[i].
-      # Accuracy is 0.0 (no match) in the first tower.
-      # Accuracy is 1.0 (match) in the second tower, since the feature
-      # times weight "c" happened to be equal to the label.
-      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) / 2.0
-
-      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
-      self.assertEqual(0, auc)
-      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
-
   def test_predict(self):
     features = np.array([[0.01], [0.002]])
     labels = np.array([[0.01], [0.02]])
@@ -416,11 +356,6 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
           'probabilities': np.array([[0.1], [0.02]])
       }, session.run(estimator_spec.predictions))
 
-  def test_unsupported_loss_reduction(self):
-    with self.assertRaisesRegexp(ValueError, ''):
-      _ = replicate_model_fn.replicate_model_fn(
-          self.model_fn, self.optimizer_fn, losses.Reduction.NONE)
-
 
 class GetLossTowersTest(test_util.TensorFlowTestCase):
 
@@ -471,40 +406,6 @@ class GetLossTowersTest(test_util.TensorFlowTestCase):
         c = variable_scope.get_variable('c', dtype=dtypes.float64)
         self.assertEqual(0.25, session.run(c))
 
-  def test_gradients_are_computed_with_mean_reduction(self):
-    with self.test_session() as session:
-      tower_specs = replicate_model_fn._get_loss_towers(
-          self.model_fn,
-          mode=None,
-          features=[[0.6], [1.6]],
-          labels=[[0.6], [0.6]],
-          params=None,
-          loss_reduction=losses.Reduction.MEAN,
-          config=None,
-          devices=['/gpu:0', '/gpu:1'],
-          local_ps_devices=['/gpu:0'],
-          name_scope_pattern='test_tower_{}')
-      session.run(variables.global_variables_initializer())
-
-      self.assertEqual(len(tower_specs), 2)
-
-      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
-      self.assertEqual('averaged_loss:0', tower_specs[0].loss.name)
-      self.assertEqual(0.5, session.run(tower_specs[0].loss))
-
-      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
-      self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name)
-      # The input batch for the second tower had a loss that is 1.0
-      # bigger: 0.6 vs 1.6.
-      self.assertEqual(1.0, session.run(tower_specs[1].loss))
-
-      self.assertEqual(1, len(variables.global_variables()))
-      self.assertEqual(1, len(variables.trainable_variables()))
-
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual(0.25, session.run(c))
-
   def test_variables_are_round_robined_correctly(self):
     """Test that creates multiple variables and tests round-robin placement."""
 
-- 
GitLab


From b8831d34a24b10a84c711362ae810e5faf3f27ae Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Dec 2017 18:46:18 -0800
Subject: [PATCH 0992/1225] Minor fix on degenerative case.

PiperOrigin-RevId: 178989673
---
 .../learner/batch/ordinal_split_handler.py    |  4 +-
 .../batch/ordinal_split_handler_test.py       | 81 +++++++++++++++++++
 2 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py
index 72e20aaa12..7df514cd20 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py
+++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py
@@ -436,7 +436,7 @@ def dense_make_stats_update(is_active, are_buckets_ready, float_column,
     quantized_feature = quantile_ops.quantiles([float_column], [],
                                                [quantile_buckets], [], [])
     quantized_feature = math_ops.cast(quantized_feature[0], dtypes.int64)
-    quantized_feature = array_ops.squeeze(quantized_feature)
+    quantized_feature = array_ops.squeeze(quantized_feature, axis=0)
     return (example_partition_ids, quantized_feature, gradients, hessians)
 
   def not_ready_inputs_fn():
@@ -468,7 +468,7 @@ def sparse_make_stats_update(
                                                [sparse_column_indices])
 
     quantized_feature = math_ops.cast(quantized_feature[1], dtypes.int64)
-    quantized_feature = array_ops.squeeze(quantized_feature)
+    quantized_feature = array_ops.squeeze(quantized_feature, axis=0)
 
     example_indices, _ = array_ops.split(
         sparse_column_indices, num_or_size_splits=2, axis=1)
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py
index ee16a5f838..54d03018d9 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py
+++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py
@@ -1121,6 +1121,87 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase):
     self.assertEqual(len(gains), 0)
     self.assertEqual(len(splits), 0)
 
+  def testDegenerativeCase(self):
+    with self.test_session() as sess:
+      # One data example only, one leaf and thus one quantile bucket.The same
+      # situation is when all examples have the same values. This case was
+      # causing before a failure.
+      gradients = array_ops.constant([0.2])
+      hessians = array_ops.constant([0.12])
+      example_partitions = array_ops.constant([1], dtype=dtypes.int32)
+      indices = array_ops.constant([[0, 0]], dtype=dtypes.int64)
+      values = array_ops.constant([0.58])
+      sparse_column = sparse_tensor.SparseTensor(indices, values, [1, 1])
+
+      gradient_shape = tensor_shape.scalar()
+      hessian_shape = tensor_shape.scalar()
+      class_id = -1
+
+      split_handler = ordinal_split_handler.SparseSplitHandler(
+          l1_regularization=0,
+          l2_regularization=2,
+          tree_complexity_regularization=0,
+          min_node_weight=0,
+          epsilon=0.01,
+          num_quantiles=2,
+          feature_column_group_id=0,
+          sparse_float_column=sparse_column,
+          init_stamp_token=0,
+          gradient_shape=gradient_shape,
+          hessian_shape=hessian_shape,
+          multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS)
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      empty_gradients, empty_hessians = get_empty_tensors(
+          gradient_shape, hessian_shape)
+      example_weights = array_ops.ones([1, 1], dtypes.float32)
+
+      update_1 = split_handler.update_stats_sync(
+          0,
+          example_partitions,
+          gradients,
+          hessians,
+          empty_gradients,
+          empty_hessians,
+          example_weights,
+          is_active=array_ops.constant([True, True]))
+      with ops.control_dependencies([update_1]):
+        are_splits_ready = split_handler.make_splits(0, 1, class_id)[0]
+
+      with ops.control_dependencies([are_splits_ready]):
+        update_2 = split_handler.update_stats_sync(
+            1,
+            example_partitions,
+            gradients,
+            hessians,
+            empty_gradients,
+            empty_hessians,
+            example_weights,
+            is_active=array_ops.constant([True, True]))
+      with ops.control_dependencies([update_2]):
+        are_splits_ready2, partitions, gains, splits = (
+            split_handler.make_splits(1, 2, class_id))
+        are_splits_ready, are_splits_ready2, partitions, gains, splits = (
+            sess.run([
+                are_splits_ready, are_splits_ready2, partitions, gains, splits
+            ]))
+
+    # During the first iteration, inequality split handlers are not going to
+    # have any splits. Make sure that we return not_ready in that case.
+    self.assertFalse(are_splits_ready)
+    self.assertTrue(are_splits_ready2)
+
+    self.assertAllEqual([1], partitions)
+    self.assertAllEqual([0.0], gains)
+
+    split_info = split_info_pb2.SplitInfo()
+    split_info.ParseFromString(splits[0])
+    split_node = split_info.split_node.sparse_float_binary_split_default_left
+
+    self.assertEqual(0, split_node.split.feature_column)
+
+    self.assertAllClose(0.58, split_node.split.threshold)
+
 
 if __name__ == "__main__":
   googletest.main()
-- 
GitLab


From ed0e2508183ce1bdc1b06cbb752f6a8ed9af99e4 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 13 Dec 2017 20:27:14 -0800
Subject: [PATCH 0993/1225] Bugfix in variable naming of GRUBlockCell.

PiperOrigin-RevId: 178995589
---
 tensorflow/contrib/rnn/python/ops/gru_ops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/rnn/python/ops/gru_ops.py b/tensorflow/contrib/rnn/python/ops/gru_ops.py
index cbe53bb4ce..8e4b60451e 100644
--- a/tensorflow/contrib/rnn/python/ops/gru_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/gru_ops.py
@@ -182,12 +182,12 @@ class GRUBlockCell(LayerRNNCell):
       raise ValueError("Expecting input_size to be set.")
 
     self._gate_kernel = vs.get_variable(
-        "w_c", [input_size + self._cell_size, self._cell_size * 2])
+        "w_ru", [input_size + self._cell_size, self._cell_size * 2])
     self._gate_bias = vs.get_variable(
         "b_ru", [self._cell_size * 2],
         initializer=init_ops.constant_initializer(1.0))
     self._candidate_kernel = vs.get_variable(
-        "w_ru", [input_size + self._cell_size, self._cell_size])
+        "w_c", [input_size + self._cell_size, self._cell_size])
     self._candidate_bias = vs.get_variable(
         "b_c", [self._cell_size],
         initializer=init_ops.constant_initializer(0.0))
-- 
GitLab


From eea28cb2d458d41dfa8129afb34e4420fefc6997 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 13 Dec 2017 20:50:37 -0800
Subject: [PATCH 0994/1225] [TF Variants] The ResourceAssignOp now supports
 Variants on GPU.

This allows Variants to sit on resource variables; before, though the
ReadValue op was enabled for Variants on GPU, because assignment happened
on CPU, Variant-based Resource Variables always had to reside on CPU
due to the associated colocation constraints.

PiperOrigin-RevId: 178996911
---
 .../core/framework/variant_encode_decode.h    |  1 +
 .../kernels/dense_update_functor_gpu.cu.cc    |  2 +
 .../core/kernels/resource_variable_ops.cc     | 96 +++++++++++++++----
 3 files changed, 78 insertions(+), 21 deletions(-)

diff --git a/tensorflow/core/framework/variant_encode_decode.h b/tensorflow/core/framework/variant_encode_decode.h
index 09ebf6257b..5a84f9d943 100644
--- a/tensorflow/core/framework/variant_encode_decode.h
+++ b/tensorflow/core/framework/variant_encode_decode.h
@@ -233,6 +233,7 @@ void EncodeVariant(const T& value, string* buf) {
   VariantTensorData data;
   EncodeVariantImpl(value, TypeResolver<T>(), &data);
   data.set_type_name(TypeNameVariant(value));
+  DCHECK(buf != nullptr);
   data.SerializeToString(buf);
 }
 
diff --git a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
index 208401cb24..c9c97dc072 100644
--- a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
+++ b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
@@ -62,6 +62,8 @@ TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 #define DEFINE_GPU_KERNELS(T) \
   template struct functor::DenseUpdate<GPUDevice, T, ASSIGN>;
 TF_CALL_GPU_ALL_TYPES(DEFINE_GPU_KERNELS);
+TF_CALL_int32(DEFINE_GPU_KERNELS);
+TF_CALL_int64(DEFINE_GPU_KERNELS);
 #undef DEFINE_GPU_KERNELS
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index 0ae8a8fdbc..e632baa2b4 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -55,6 +55,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/variant_op_registry.h"
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/dense_update_functor.h"
 #include "tensorflow/core/kernels/gather_functor.h"
@@ -110,7 +111,6 @@ REGISTER_KERNEL_BUILDER(Name("ReadVariableOp").Device(DEVICE_CPU),
                         ReadVariableOp);
 
 #if GOOGLE_CUDA
-
 REGISTER_KERNEL_BUILDER(
     Name("ReadVariableOp").Device(DEVICE_GPU).HostMemory("resource"),
     ReadVariableOp);
@@ -130,6 +130,7 @@ REGISTER_KERNEL_BUILDER(
                           ResourceHandleOp<Var>)
 
 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS);
+TF_CALL_variant(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
 #endif  // GOOGLE_CUDA
 
@@ -275,6 +276,64 @@ class AssignVariableOp : public OpKernel {
   DataType dtype_;
 };
 
+template <typename Device>
+Status VariantCopyFn(OpKernelContext* context, const Tensor& from, Tensor* to);
+
+#define CPU_DENSE_COPY(T)                                                \
+  case DataTypeToEnum<T>::value: {                                       \
+    functor::DenseUpdate<CPUDevice, T, ASSIGN> copy_functor_;            \
+    copy_functor_(context->eigen_device<CPUDevice>(), tensor->flat<T>(), \
+                  from.flat<T>());                                       \
+    break;                                                               \
+  }
+
+#define INSTANTIATE_GET_VARIANT_COPY_FN(Device, TYPE_CALLER, TYPE_DENSE_COPY) \
+  template <>                                                                 \
+  Status VariantCopyFn<Device>(OpKernelContext * context, const Tensor& from, \
+                               Tensor* to) {                                  \
+    PersistentTensor tmp;                                                     \
+    Tensor* tensor;                                                           \
+    AllocatorAttributes attr;                                                 \
+    attr.set_gpu_compatible(true);                                            \
+    attr.set_nic_compatible(true);                                            \
+    TF_RETURN_IF_ERROR(context->allocate_persistent(                          \
+        from.dtype(), from.shape(), &tmp, &tensor, attr));                    \
+    switch (from.dtype()) {                                                   \
+      TYPE_CALLER(TYPE_DENSE_COPY);                                           \
+      default:                                                                \
+        return errors::InvalidArgument(                                       \
+            "VariantCopyFn: Could not perform a deep copy of variant "        \
+            "element of type: ",                                              \
+            DataTypeString(from.dtype()),                                     \
+            " using device: ", context->device()->name());                    \
+    }                                                                         \
+    *to = *tensor;                                                            \
+    return Status::OK();                                                      \
+  }
+
+INSTANTIATE_GET_VARIANT_COPY_FN(CPUDevice, TF_CALL_ALL_TYPES, CPU_DENSE_COPY);
+
+#if GOOGLE_CUDA
+#define GPU_DENSE_COPY(T)                                                \
+  case DataTypeToEnum<T>::value: {                                       \
+    functor::DenseUpdate<GPUDevice, T, ASSIGN> copy_functor_;            \
+    copy_functor_(context->eigen_device<GPUDevice>(), tensor->flat<T>(), \
+                  from.flat<T>());                                       \
+    break;                                                               \
+  }
+#define TF_CALL_GPU_AND_ADDITIONAL_TYPES(T) \
+  TF_CALL_GPU_ALL_TYPES(T);                 \
+  TF_CALL_int32(T);                         \
+  TF_CALL_int64(T);
+INSTANTIATE_GET_VARIANT_COPY_FN(GPUDevice, TF_CALL_GPU_AND_ADDITIONAL_TYPES,
+                                GPU_DENSE_COPY);
+#undef TF_CALL_GPU_AND_ADDITIONAL_TYPES
+#undef GPU_DENSE_COPY
+#endif  // GOOGLE_CUDA
+
+#undef CPU_DENSE_COPY
+#undef INSTANTIATE_GET_VARIANT_COPY_FN
+
 template <typename Device>
 class AssignVariableOp<Device, Variant> : public OpKernel {
  public:
@@ -287,21 +346,15 @@ class AssignVariableOp<Device, Variant> : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& value = context->input(1);
-    OP_REQUIRES(context, dtype_ == value.dtype(),
-                errors::InvalidArgument(
-                    "Variable and value dtypes don't match; respectively, ",
-                    dtype_, " and ", context->input(1).dtype()));
-
     Var* variable = nullptr;
     OP_REQUIRES_OK(context, LookupOrCreateResource<Var>(
                                 context, HandleFromInput(context, 0), &variable,
                                 [this, context](Var** ptr) {
-                                  *ptr = new Var(dtype_);
-                                  // Create an empty new Variant tensor.
+                                  // Created on host.
+                                  *ptr = new Var(DT_VARIANT);
                                   return Status::OK();
                                 }));
     core::ScopedUnref s(variable);
-
     OP_REQUIRES(context, variable->tensor()->dtype() == DT_VARIANT,
                 errors::InvalidArgument(
                     "Trying to assign variable with wrong dtype. Expected ",
@@ -309,16 +362,17 @@ class AssignVariableOp<Device, Variant> : public OpKernel {
                     DataTypeString(DT_VARIANT)));
 
     mutex_lock ml(*variable->mu());
-    // TODO(ebrevdo): Add a proper Variant deep copy / assign registry
-    // entry and use that here.  For now, use a serialization
-    // roundtrip to perform the copy on CPU.  This is OK because this
-    // op is not registered for GPU.
-    *variable->tensor() = Tensor();
-    TensorProto tmp;
-    value.AsProtoTensorContent(&tmp);
-    OP_REQUIRES(context, variable->tensor()->FromProto(tmp),
-                errors::Internal("Could not properly reserialize values "
-                                 "Variant.  Check logs for more details."));
+
+    *variable->tensor() = Tensor(DT_VARIANT, value.shape());
+    const auto elements_in = value.flat<Variant>();
+    auto elements_out = variable->tensor()->flat<Variant>();
+    auto copy_fn = std::bind(&VariantCopyFn<Device>, context,
+                             std::placeholders::_1, std::placeholders::_2);
+    for (int64 i = 0; i < elements_in.size(); ++i) {
+      OP_REQUIRES_OK(context, VariantDeviceCopy(
+                                  VariantDeviceCopyDirection::DEVICE_TO_DEVICE,
+                                  elements_in(i), &elements_out(i), copy_fn));
+    };
   }
 
  private:
@@ -345,6 +399,7 @@ TF_CALL_variant(REGISTER_KERNELS);
                           AssignVariableOp<GPUDevice, type>);
 
 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS);
+TF_CALL_variant(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
 #endif  // GOOGLE_CUDA
 
@@ -464,8 +519,7 @@ class ResourceGatherOp : public OpKernel {
       auto out_flat = out->shaped<T, 3>({1, N, out->NumElements() / N});
 
       functor::GatherFunctor<Device, T, Index> functor;
-      int64 bad_i = functor(c, params_flat,
-                            indices_flat, out_flat);
+      int64 bad_i = functor(c, params_flat, indices_flat, out_flat);
 
       OP_REQUIRES(
           c, bad_i < 0,
-- 
GitLab


From d9769ce6410c2d95f371941855c46a0b2c461c0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dandelion=20Man=C3=A9?= <dandelion@google.com>
Date: Wed, 13 Dec 2017 22:21:25 -0800
Subject: [PATCH 0995/1225] Fix syntax error I introduced when re-applying my
 merge fix.

---
 tensorflow/core/platform/cloud/gcs_dns_cache.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.cc b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
index 1ec5893dee..87b0dde136 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
@@ -44,6 +44,8 @@ inline void print_getaddrinfo_error(const string& name, int error_code) {
   // TODO:WSAGetLastError is better than gai_strerror
   LOG(ERROR) << "Error resolving " << name << ": " << gai_strerror(error_code);
 #endif
+}
+
 // Selects one item at random from a vector of items, using a uniform
 // distribution.
 template <typename T>
-- 
GitLab


From 58fe964e7f60f287999804c8b70d24f28ed5dab1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 00:59:26 -0800
Subject: [PATCH 0996/1225] Adds missing new line characters at the end of
 error messsages.

PiperOrigin-RevId: 179013079
---
 tensorflow/contrib/lite/error_reporter.cc | 4 +++-
 tensorflow/contrib/lite/error_reporter.h  | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/lite/error_reporter.cc b/tensorflow/contrib/lite/error_reporter.cc
index 6ba5384a94..03fcd5409c 100644
--- a/tensorflow/contrib/lite/error_reporter.cc
+++ b/tensorflow/contrib/lite/error_reporter.cc
@@ -39,7 +39,9 @@ int ErrorReporter::ReportError(void*, const char* format, ...) {
 }
 
 int StderrReporter::Report(const char* format, va_list args) {
-  return vfprintf(stderr, format, args);
+  const int result = vfprintf(stderr, format, args);
+  fputc('\n', stderr);
+  return result;
 }
 
 ErrorReporter* DefaultErrorReporter() {
diff --git a/tensorflow/contrib/lite/error_reporter.h b/tensorflow/contrib/lite/error_reporter.h
index 637d456ce7..d5715e4f90 100644
--- a/tensorflow/contrib/lite/error_reporter.h
+++ b/tensorflow/contrib/lite/error_reporter.h
@@ -25,10 +25,10 @@ namespace tflite {
 //
 // Usage:
 //  ErrorReporter foo;
-//  foo.Report("test %d\n", 5);
+//  foo.Report("test %d", 5);
 // or
 //  va_list args;
-//  foo.Report("test %d\n", args); // where args is va_list
+//  foo.Report("test %d", args); // where args is va_list
 //
 // Sublclass ErrorReporter to provide another reporting destination.
 // For example, if you have a GUI program, you might redirect to a buffer
-- 
GitLab


From bfaa55b4bf8a1734d76b85243211f32302ca9957 Mon Sep 17 00:00:00 2001
From: Akimasa KIMURA <kimura.akim.asa@gmail.com>
Date: Thu, 14 Dec 2017 19:50:50 +0900
Subject: [PATCH 0997/1225] Fix broken image link in TensorFlow Lite's Docs

I fixed the link of image in the same way as other documents in [tensorflow/tensorflow/docs_src/](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/docs_src).
---
 tensorflow/docs_src/mobile/tflite/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/docs_src/mobile/tflite/index.md b/tensorflow/docs_src/mobile/tflite/index.md
index 49d93669a2..6c4589d693 100644
--- a/tensorflow/docs_src/mobile/tflite/index.md
+++ b/tensorflow/docs_src/mobile/tflite/index.md
@@ -155,7 +155,7 @@ retraining for both floating point and quantized inference.
 
 The following diagram shows the architectural design of TensorFlow Lite:
 
-<img src = "/images/tflite-architecture.jpg">
+![tensorflow lite architecture](https://www.tensorflow.org/images/tflite-architecture.jpg)
 
 Starting with a trained TensorFlow model on disk, you'll convert that model to
 the TensorFlow Lite file format (`.tflite`) using the TensorFlow Lite
-- 
GitLab


From 934066da90cce263fba7f2049a455a070f0595e6 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 14 Dec 2017 08:53:04 -0800
Subject: [PATCH 0998/1225] Make nn_batchnorm_test.py work with C API enabled.

PiperOrigin-RevId: 179050051
---
 tensorflow/python/ops/nn_batchnorm_test.py | 23 ++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/ops/nn_batchnorm_test.py b/tensorflow/python/ops/nn_batchnorm_test.py
index 8aed2e293f..fc013b565b 100644
--- a/tensorflow/python/ops/nn_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_batchnorm_test.py
@@ -21,9 +21,12 @@ from __future__ import print_function
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.core.framework import graph_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import importer
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradient_checker
@@ -34,8 +37,18 @@ import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
 from tensorflow.python.platform import test
 
 
+@test_util.with_c_api
 class BatchNormalizationTest(test.TestCase):
 
+  def SetProducerVersion(self, graph, producer_version):
+    # The C API doesn't expose altering GraphDefVersions. We can indirectly set
+    # it via import_graph_def though.
+    graph_def = graph_pb2.GraphDef()
+    graph_def.versions.producer = producer_version
+    with graph.as_default():
+      importer.import_graph_def(graph_def)
+    assert graph.graph_def_versions.producer, producer_version
+
   def _npBatchNorm(self, x, m, v, beta, gamma, epsilon,
                    scale_after_normalization, shift_after_normalization):
     y = (x - m) / np.sqrt(v + epsilon)
@@ -52,9 +65,7 @@ class BatchNormalizationTest(test.TestCase):
   def _tfBatchNormV1(self, x, m, v, beta, gamma, epsilon,
                      scale_after_normalization):
     """Original implementation."""
-    # _batch_norm_with_global_normalization is deprecated in v9
-    ops.get_default_graph().graph_def_versions.producer = 8
-    # pylint: disable=protected-access
+    self.SetProducerVersion(ops.get_default_graph(), 8)
     return gen_nn_ops._batch_norm_with_global_normalization(
         x, m, v, beta, gamma, epsilon, scale_after_normalization)
     # pylint: enable=protected-access
@@ -222,7 +233,7 @@ class BatchNormalizationTest(test.TestCase):
         epsilon = 0.001
         for scale_after_normalization in [True, False]:
           # _batch_norm_with_global_normalization_grad is deprecated in v9
-          ops.get_default_graph().graph_def_versions.producer = 8
+          self.SetProducerVersion(ops.get_default_graph(), 8)
           grad = gen_nn_ops._batch_norm_with_global_normalization_grad(
               x, m, v, gamma, backprop, epsilon, scale_after_normalization)
           dx, dm, dv, db, dg = grad
@@ -334,6 +345,7 @@ class BatchNormalizationTest(test.TestCase):
         (2, 3, 2, 4, 5), (1, 1, 1, 4, 5), atol=0.005)
 
 
+@test_util.with_c_api
 class SufficientStatisticsTest(test.TestCase):
 
   def _npSuffStats(self, x, axes, shift, keep_dims):
@@ -393,6 +405,7 @@ class SufficientStatisticsTest(test.TestCase):
           self._testSuffStats([1, 2, 3], [0, 2], shift, keep_dims, has_shape)
 
 
+@test_util.with_c_api
 class NormalizeMomentsTest(test.TestCase):
 
   def _npNormalizeMoments(self, counts, mean_ss, variance_ss, shift):
@@ -436,6 +449,7 @@ class NormalizeMomentsTest(test.TestCase):
       self._testNormalizeMoments([2, 3], shift)
 
 
+@test_util.with_c_api
 class MomentsTest(test.TestCase):
 
   def _unweighted_moments(self, x, axes, keep_dims=False, extra_out_grads=None):
@@ -573,6 +587,7 @@ class MomentsTest(test.TestCase):
     self._testGlobalGradient(from_y="var")
 
 
+@test_util.with_c_api
 class WeightedMomentsTest(MomentsTest):
   """Tests for nn.weighted_moments.
 
-- 
GitLab


From 2cdd76e65c37eda1cac7547ab3d32d8f19f5fef4 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 14 Dec 2017 09:20:51 -0800
Subject: [PATCH 0999/1225] Refactor Graph._create_op_from_tf_operation to not
 depend on the op's inputs

Previously, importer._ProcessNewOps may have call
_create_op_from_tf_operation on a newly-imported op before
_create_op_from_tf_operation has been called on all its inputs. This
would fail since _create_op_from_tf_operation contained calls to
Operation.inputs (some indirectly, e.g. through Operation.__init__).

This change factors out the _create_op_from_tf_operation and
Operation.__init__ logic requiring the inputs, and creates a new
Graph._add_new_tf_operations method that creates all the Operation and
then applies the factored-out logic.

This also removes ImportGraphDefTest.TestCyclic and replaces it with a
new test, testWhileLoop. The current Python implementation of
import_graph_def allows any cycle to be imported. However, with the C
API enabled, while loops are the only legal cycles. This test exposes
this case since not all inputs can be available when creating the
Operations forming the while loop cycle (it wasn't exposed before
since the C++ ImportGraphDef function create nodes in topological
order, although this isn't part of the API contract).

PiperOrigin-RevId: 179052930
---
 tensorflow/python/framework/importer.py      |  8 +--
 tensorflow/python/framework/importer_test.py | 29 ++++----
 tensorflow/python/framework/ops.py           | 69 +++++++++++++++-----
 tensorflow/python/framework/ops_test.py      | 36 +++++-----
 4 files changed, 81 insertions(+), 61 deletions(-)

diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index d74fb25bb3..33c966ad88 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -278,8 +278,6 @@ def _PopulateTFImportGraphDefOptions(options, prefix, input_map,
       c_api.TF_ImportGraphDefOptionsAddReturnOperation(options,
                                                        compat.as_str(name))
 
-  # TODO(skyewm): control dependencies
-
 
 def _ProcessNewOps(graph):
   """Processes the newly-added TF_Operations in `graph`."""
@@ -287,11 +285,7 @@ def _ProcessNewOps(graph):
   # is specified in the attributes.
   colocation_pairs = {}
 
-  for c_op in c_api_util.new_tf_operations(graph):
-    # pylint: disable=protected-access
-    new_op = graph._create_op_from_tf_operation(c_op, compute_device=False)
-    # pylint: enable=protected-access
-
+  for new_op in graph._add_new_tf_operations(compute_devices=False):  # pylint: disable=protected-access
     colocation_names = _GetColocationNames(new_op)
     if colocation_names:
       colocation_pairs[new_op] = colocation_names
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index 0da651c607..c57b7d47b8 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -34,6 +34,7 @@ from tensorflow.python.framework import test_ops  # pylint: disable=unused-impor
 from tensorflow.python.framework import test_util
 from tensorflow.python.framework import versions
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
@@ -336,24 +337,20 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(d._input_dtypes, [dtypes.int32_ref, dtypes.int32])
       self.assertEqual(d.outputs, [])
 
-  def testCyclic(self):
-    # Importing cycles not supported with C API enabled (this test will
-    # eventually be deleted).
-    # TODO(skyewm): write while loop test
-    if ops._USE_C_API: return
+  def testWhileLoop(self):
+    # Produce GraphDef containing while loop.
+    graph = ops.Graph()
+    with graph.as_default():
+      r = control_flow_ops.while_loop(lambda i: i < 10, lambda i: i + 1, [0])
+    graph_def = graph.as_graph_def()
 
+    # Import the GraphDef and make sure it runs.
     with ops.Graph().as_default():
-      a, b = importer.import_graph_def(
-          self._MakeGraphDef("""
-          node { name: 'A' op: 'Unary'
-                 attr { key: 'T' value { type: DT_INT32 } } input: 'B:0' }
-          node { name: 'B' op: 'Unary'
-                 attr { key: 'T' value { type: DT_INT32 } } input: 'A:0' }
-          """),
-          return_elements=["A", "B"])
-
-      self.assertEqual(a.inputs[0], b.outputs[0])
-      self.assertEqual(b.inputs[0], a.outputs[0])
+      imported_r, = importer.import_graph_def(graph_def,
+                                              return_elements=[r.name])
+      self.assertEqual(imported_r.name, "import/" + r.name)
+      with self.test_session() as sess:
+        self.assertEqual(sess.run(imported_r), 10)
 
   def testTypeMismatchInGraphDef(self):
     if ops._USE_C_API:
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 95b1cefcbe..836f09fba8 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -1561,7 +1561,7 @@ class Operation(object):
     # an Operation for that op. This is useful for creating Operations for ops
     # indirectly created by C API methods, e.g. the ops created by
     # TF_ImportGraphDef. When `node_def` is a TF_Operation, all optional fields
-    # except `control_inputs` should be None.
+    # should be None.
 
     if isinstance(node_def, node_def_pb2.NodeDef):
       if node_def.ByteSize() >= (1 << 31) or node_def.ByteSize() < 0:
@@ -1574,6 +1574,7 @@ class Operation(object):
     elif type(node_def).__name__ == "SwigPyObject":
       assert inputs is None
       assert output_types is None
+      assert control_inputs is None
       assert input_types is None
       assert original_op is None
       assert op_def is None
@@ -1623,13 +1624,13 @@ class Operation(object):
     self._original_op = original_op
     self._op_def = op_def
     self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access
+    self._control_flow_context = self.graph._get_control_flow_context()  # pylint: disable=protected-access
 
     # Initialize self._c_op.
     if c_op:
       # TODO(skyewm): remove this assert when we remove USE_C_API
       assert self._graph._c_graph  # pylint: disable=protected-access
       self._c_op = c_op
-      self._add_control_inputs(self._control_inputs)
     elif self._graph._c_graph:  # pylint: disable=protected-access
       if self._op_def:
         # TODO(skyewm): op_def_library.apply_op() flattens the incoming
@@ -1648,7 +1649,7 @@ class Operation(object):
     # Mark that we consume the inputs. This is unnecessary and unsupported with
     # the C API enabled, since the C API tracks the tensor consumers instead.
     if not self._c_op:
-      for input_tensor in self.inputs:
+      for input_tensor in self._inputs:
         input_tensor._add_consumer(self)  # pylint: disable=protected-access
 
     # Initialize self._outputs.
@@ -1666,8 +1667,15 @@ class Operation(object):
         for i, output_type in enumerate(output_types)
     ]
 
-    # Add this op to the current control flow context.
-    self._control_flow_context = g._get_control_flow_context()  # pylint: disable=protected-access
+    if not c_op:
+      self._control_flow_post_processing()
+
+  def _control_flow_post_processing(self):
+    """Add this op to its control flow context.
+
+    This may add new ops and change this op's inputs. self.inputs must be
+    available before calling this method.
+    """
     for input_tensor in self.inputs:
       control_flow_util.CheckInputFromValidContext(self, input_tensor.op)
     if self._control_flow_context is not None:
@@ -3170,6 +3178,10 @@ class Graph(object):
     field. This is used to create Operation objects around TF_Operations created
     indirectly by the C API (e.g. by TF_ImportGraphDef, TF_FinishWhile).
 
+    This function does not call Operation._control_flow_post_processing or
+    Graph._control_dependencies_for_inputs (since the inputs may not be
+    available yet). The caller is responsible for calling these methods.
+
     Args:
       c_op: a wrapped TF_Operation
       compute_device: (Optional.) If True, device functions will be executed
@@ -3179,19 +3191,9 @@ class Graph(object):
       An `Operation` object.
     """
     self._check_not_finalized()
-    tf_outputs = c_api.GetOperationInputs(c_op)
-    input_ops = set(self._get_operation_by_tf_operation(output.oper)
-                    for output in tf_outputs)
-    control_inputs = self._control_dependencies_for_inputs(input_ops)
-
-    # Update _names_in_use before calling the Operation constructor since the
-    # control flow code may create more Operations, and we don't want the names
-    # to conflict.
-    op_name = c_api.TF_OperationName(c_op)
-    assert op_name not in self._names_in_use
-    self._names_in_use[op_name] = 1
-
-    ret = Operation(c_op, self, control_inputs=control_inputs)
+    ret = Operation(c_op, self)
+    assert ret.name not in self._names_in_use
+    self._names_in_use[ret.name] = 1
     self._create_op_helper(ret, compute_device=compute_device)
     return ret
 
@@ -3287,6 +3289,37 @@ class Graph(object):
           op._set_attr("container", attr_value_pb2.AttrValue(  # pylint: disable=protected-access
               s=compat.as_bytes(self._container)))
 
+  def _add_new_tf_operations(self, compute_devices=True):
+    """Creates `Operations` in this graph for any new TF_Operations.
+
+    This is useful for when TF_Operations are indirectly created by the C API
+    outside of the Operation constructor (e.g. by TF_ImportGraphDef,
+    TF_FinishWhile). This ensures there are corresponding Operations for all
+    TF_Operations in the underlying TF_Graph.
+
+    Args:
+      compute_devices: (Optional.) If True, device functions will be executed
+        to compute the device properties of each new Operation.
+
+    Returns:
+      A list of the new `Operation` objects.
+    """
+    # Create all Operation objects before accessing their inputs since an op may
+    # be created before its inputs.
+    new_ops = [
+        self._create_op_from_tf_operation(c_op, compute_device=compute_devices)
+        for c_op in c_api_util.new_tf_operations(self)
+    ]
+
+    for op in new_ops:
+      new_control_inputs = self._control_dependencies_for_inputs(op.inputs)
+      # pylint: disable=protected-access
+      op._add_control_inputs(new_control_inputs)
+      op._control_flow_post_processing()
+      # pylint: enable=protected-access
+
+    return new_ops
+
   def as_graph_element(self, obj, allow_tensor=True, allow_operation=True):
     """Returns the object referred to by `obj`, as an `Operation` or `Tensor`.
 
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 7d279760c8..f04f0cc56d 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -797,10 +797,10 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
 
       def true_fn():
         if ops._USE_C_API:
-          c_op = ops._create_c_op(ops.get_default_graph(),
-                                  ops._NodeDef("IntInput", "cond/myop"), [x],
-                                  [])
-          ops.get_default_graph()._create_op_from_tf_operation(c_op)
+          ops._create_c_op(ops.get_default_graph(),
+                           ops._NodeDef("IntInput", "cond/myop"), [x], [])
+          new_ops = g._add_new_tf_operations()
+          self.assertEqual(len(new_ops), 1)
         else:
           # Test pure-Python version to make sure C API has same behavior.
           test_ops.int_input(x, name="myop")
@@ -830,10 +830,10 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
 
       def body(i):
         if ops._USE_C_API:
-          c_op = ops._create_c_op(ops.get_default_graph(),
-                                  ops._NodeDef("IntInput", "myloop/myop"), [x],
-                                  [])
-          ops.get_default_graph()._create_op_from_tf_operation(c_op)
+          ops._create_c_op(ops.get_default_graph(),
+                           ops._NodeDef("IntInput", "myloop/myop"), [x], [])
+          new_ops = g._add_new_tf_operations()
+          self.assertEqual(len(new_ops), 1)
         else:
           # Test pure-Python version to make sure C API has same behavior.
           test_ops.int_input(x, name="myop")
@@ -864,11 +864,11 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
       def body(i):
         c = constant_op.constant(1.0, name="c")
         if ops._USE_C_API:
-          c_op = ops._create_c_op(ops.get_default_graph(),
-                                  ops._NodeDef("IntInput", "myloop/myop"), [x],
-                                  [])
+          ops._create_c_op(ops.get_default_graph(),
+                           ops._NodeDef("IntInput", "myloop/myop"), [x], [])
           with ops.control_dependencies([c]):
-            ops.get_default_graph()._create_op_from_tf_operation(c_op)
+            new_ops = g._add_new_tf_operations()
+            self.assertEqual(len(new_ops), 1)
         else:
           with ops.control_dependencies([c]):
             test_ops.int_input(x, name="myop")
@@ -884,10 +884,6 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(op.control_inputs, [c])
 
   def testWhileLoopWithExternalControlDep(self):
-    # TODO(skyewm): enable once ControlFlowContext._RemoveExternalControlEdges
-    # works with C API enabled
-    if ops._USE_C_API: self.skipTest("Not yet implemented with C API enabled")
-
     g = ops.Graph()
     with g.as_default():
       x = test_ops.int_output()
@@ -895,11 +891,11 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
 
       def body(i):
         if ops._USE_C_API:
-          c_op = ops._create_c_op(ops.get_default_graph(),
-                                  ops._NodeDef("IntInput", "myloop/myop"), [x],
-                                  [])
+          ops._create_c_op(ops.get_default_graph(),
+                           ops._NodeDef("IntInput", "myloop/myop"), [x], [])
           with ops.control_dependencies([c]):
-            ops.get_default_graph()._create_op_from_tf_operation(c_op)
+            new_ops = g._add_new_tf_operations()
+            self.assertEqual(len(new_ops), 1)
         else:
           with ops.control_dependencies([c]):
             test_ops.int_input(x, name="myop")
-- 
GitLab


From 2bb302ee26c9389df138b82f0bebac5ee01f3c30 Mon Sep 17 00:00:00 2001
From: David Norman <DavidNorman@users.noreply.github.com>
Date: Thu, 14 Dec 2017 18:08:05 +0000
Subject: [PATCH 1000/1225] [XLA] Add fast path cases for common scatter and
 gather operations (#15185)

* Add fast path cases for common scatter and gather operations

* Updates following code review
---
 .../tf2xla/kernels/tensor_array_ops.cc        | 87 ++++++++++++++-----
 1 file changed, 66 insertions(+), 21 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
index 351fda2517..03c22354a9 100644
--- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
@@ -311,6 +311,32 @@ class TensorArrayGatherOp : public XlaOpKernel {
 
     xla::ComputationDataHandle ta = resource->value;
 
+    // Look for the case where the gather takes a simple slice from the
+    // tensor array (0, 1, 2, 3, 4, ..., N)
+    std::vector<int64> const_indices;
+    Status status = ctx->ConstantInputAsIntVector(1, &const_indices);
+    if (status.ok()) {
+      bool gather_is_dense_slice = true;
+      for (auto i = 0; i < const_indices.size(); i++) {
+        if (const_indices[i] != i) {
+          gather_is_dense_slice = false;
+          break;
+        }
+      }
+
+      if (gather_is_dense_slice) {
+        std::vector<int64> begin(ta_shape.dims(), 0);
+        std::vector<int64> strides(ta_shape.dims(), 1);
+        std::vector<int64> end(ta_shape.dims(), 1);
+        end[0] = const_indices.size();
+        for (auto i = 1; i < ta_shape.dims(); i++) {
+          end[i] = ta_shape.dim_size(i);
+        }
+        ctx->SetOutput(0, b->Slice(ta, begin, end, strides));
+        return;
+      }
+    }
+
     xla::ComputationDataHandle gather = XlaComputeGatherDynamicSlice(
         ctx, ta, ta_shape, indices, indices_shape, 0, dtype_, index_type, b);
     ctx->SetOutput(0, gather);
@@ -352,28 +378,47 @@ class TensorArrayScatterOp : public XlaOpKernel {
     const xla::ComputationDataHandle value = ctx->Input(2);
     const xla::ComputationDataHandle flow = ctx->Input(3);
 
-    auto slice_dims = value_shape.dim_sizes();
-    slice_dims[0] = 1LL;
-
-    std::vector<int64> value_starts(value_shape.dims(), 0);
-    auto value_ends = value_shape.dim_sizes();
-
-    std::vector<int64> value_strides(value_shape.dims(), 1);
-
-    // For every (index, value) pair, update the corresponding TensorArray
-    // storage.
-    for (int i = 0; i < num_indices; ++i) {
-      // Slice out part of the value.
-      value_starts[0] = i;
-      value_ends[0] = i + 1;
-      auto slice = b->Slice(value, value_starts, value_ends, value_strides);
+    // Look for the case where the scatter is for each sub-tensor in order. The
+    // tensor array implementation allows for this to be a straight addition.
+    bool scatter_all_elements_in_order = false;
+    std::vector<int64> const_indices;
+    Status status = ctx->ConstantInputAsIntVector(1, &const_indices);
+    if (status.ok() && num_indices == value_shape.dim_size(0)) {
+      scatter_all_elements_in_order = true;
+      for (auto i = 0; i < num_indices; i++) {
+        if (const_indices[i] != i) {
+          scatter_all_elements_in_order = false;
+          break;
+        }
+      }
+    }
 
-      // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
-      auto index = b->Slice(indices, {i}, {i + 1}, {1});
-      auto start_indices =
-          b->Pad(b->Reshape(index, {1}), b->ConstantR0<int32>(0),
-                 xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
-      ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
+    if (scatter_all_elements_in_order) {
+      ta = b->Add(ta, value);
+    } else {
+      auto slice_dims = value_shape.dim_sizes();
+      slice_dims[0] = 1LL;
+
+      std::vector<int64> value_starts(value_shape.dims(), 0);
+      auto value_ends = value_shape.dim_sizes();
+
+      std::vector<int64> value_strides(value_shape.dims(), 1);
+
+      // For every (index, value) pair, update the corresponding TensorArray
+      // storage.
+      for (int i = 0; i < num_indices; ++i) {
+        // Slice out part of the value.
+        value_starts[0] = i;
+        value_ends[0] = i + 1;
+        auto slice = b->Slice(value, value_starts, value_ends, value_strides);
+
+        // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
+        auto index = b->Slice(indices, {i}, {i + 1}, {1});
+        auto start_indices =
+                b->Pad(b->Reshape(index, {1}), b->ConstantR0<int32>(0),
+                       xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
+        ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
+      }
     }
 
     resource->value = ta;
-- 
GitLab


From ae0b6e6ed7179412434c4f484a13b1ecb42af610 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Thu, 14 Dec 2017 10:09:01 -0800
Subject: [PATCH 1001/1225] .base_dtype when inferring dtypes of layers

Fixes #15262

PiperOrigin-RevId: 179059001
---
 tensorflow/python/layers/base.py      | 2 +-
 tensorflow/python/layers/core_test.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index c083f8a5d2..4c92aac915 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -629,7 +629,7 @@ class Layer(object):
           self._assert_input_compatibility(inputs)
           if input_list and self._dtype is None:
             try:
-              self._dtype = input_list[0].dtype.name
+              self._dtype = input_list[0].dtype.base_dtype.name
             except AttributeError:
               pass
           input_shapes = nest.map_structure(lambda x: x.get_shape(), inputs)
diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 2d47cc6979..2e99f783e0 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -59,6 +59,14 @@ class DenseTest(test.TestCase):
     dense.apply(random_ops.random_uniform((5, 2)))
     self.assertEqual(dense.name, 'dense_2')
 
+  def testVariableInput(self):
+    with self.test_session():
+      v = variable_scope.get_variable(
+          'X', initializer=init_ops.zeros_initializer(), shape=(1, 1))
+      x = core_layers.Dense(1)(v)
+      variables.global_variables_initializer().run()
+      self.assertAllEqual(x.eval(), [[0.0]])
+
   @test_util.run_in_graph_and_eager_modes()
   def testCall(self):
     dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense')
-- 
GitLab


From ebca7dc035a2ba2a78914851d381d065b8b8f449 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Thu, 14 Dec 2017 10:24:57 -0800
Subject: [PATCH 1002/1225] Minor fixes for "Linear" tutorial

PiperOrigin-RevId: 179061248
---
 tensorflow/docs_src/tutorials/index.md  |  2 +-
 tensorflow/docs_src/tutorials/linear.md | 44 ++++++++++++++-----------
 2 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/tensorflow/docs_src/tutorials/index.md b/tensorflow/docs_src/tutorials/index.md
index 6e24f47882..15f8b54a29 100644
--- a/tensorflow/docs_src/tutorials/index.md
+++ b/tensorflow/docs_src/tutorials/index.md
@@ -48,7 +48,7 @@ The following tutorials focus on linear models:
 
 The following tutorial covers building a classification model for sequences:
 
-  * ${$recurrent_quickdraw$Classifying Drawings using Recurrent Neural Networks}
+  * @{$recurrent_quickdraw$Classifying Drawings using Recurrent Neural Networks}
 
 Although TensorFlow specializes in machine learning, you may also use
 TensorFlow to solve other kinds of math problems.  For example:
diff --git a/tensorflow/docs_src/tutorials/linear.md b/tensorflow/docs_src/tutorials/linear.md
index d333d01279..dddb034107 100644
--- a/tensorflow/docs_src/tutorials/linear.md
+++ b/tensorflow/docs_src/tutorials/linear.md
@@ -1,36 +1,40 @@
 # Large-scale Linear Models with TensorFlow
 
-The tf.estimator API provides (among other things) a rich set of tools for
+@{tf.estimator$Estimators} provides (among other things) a rich set of tools for
 working with linear models in TensorFlow. This document provides an overview of
 those tools. It explains:
 
-   * what a linear model is.
-   * why you might want to use a linear model.
-   * how tf.estimator makes it easy to build linear models in TensorFlow.
-   * how you can use tf.estimator to combine linear models with
-   deep learning to get the advantages of both.
+   * What a linear model is.
+   * Why you might want to use a linear model.
+   * How Estimators make it easy to build linear models in TensorFlow.
+   * How you can use Estimators to combine linear models with.
+     deep learning to get the advantages of both.
 
-Read this overview to decide whether the tf.estimator linear model tools might
+Read this overview to decide whether the Estimator's linear model tools  might
 be useful to you. Then do the @{$wide$Linear Models tutorial} to
 give it a try. This overview uses code samples from the tutorial, but the
 tutorial walks through the code in greater detail.
 
 To understand this overview it will help to have some familiarity
-with basic machine learning concepts, and also with @{$get_started/estimator$`tf.estimator`}.
+with basic machine learning concepts, and also with
+@{$get_started/estimator$Estimators}.
 
 [TOC]
 
 ## What is a linear model?
 
-A *linear model* uses a single weighted sum of features to make a prediction.
-For example, if you have [data](https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names)
+A **linear model** uses a single weighted sum of features to make a prediction.
+For example, if you have
+[data](https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names)
 on age, years of education, and weekly hours of
-work for a population, you can learn weights for each of those numbers so that
+work for a population, a model can learn weights for each of those numbers so that
 their weighted sum estimates a person's salary. You can also use linear models
 for classification.
 
 Some linear models transform the weighted sum into a more convenient form. For
-example, *logistic regression* plugs the weighted sum into the logistic
+example, 
+[**logistic regression**](https://developers.google.com/machine-learning/glossary/#logistic_regression)
+plugs the weighted sum into the logistic
 function to turn the output into a value between 0 and 1. But you still just
 have one weight for each input feature.
 
@@ -51,10 +55,10 @@ Linear models:
    * provide an excellent starting point for learning about machine learning.
    * are widely used in industry.
 
-## How does tf.estimator help you build linear models?
+## How do Estimators help you build linear models?
 
 You can build a linear model from scratch in TensorFlow without the help of a
-special API. But tf.estimator provides some tools that make it easier to build
+special API. But Estimators provides some tools that make it easier to build
 effective large-scale linear models.
 
 ### Feature columns and transformations
@@ -86,10 +90,10 @@ become [0, 1, 0] and 'green' would become [0, 0, 1]. These vectors are called
 "sparse" because they may be very long, with many zeros, when the set of
 possible values is very large (such as all English words).
 
-While you don't need to use categorical columns to use tf.estimator linear
-models, one of the strengths of linear models is their ability to deal with
-large sparse vectors. Sparse features are a primary use case for the
-tf.estimator linear model tools.
+While you don't need to use categorical columns to use the linear model tools
+provided by Estimators, one of the strengths of linear models is their ability
+to deal with large sparse vectors. Sparse features are a primary use case for
+the linear model tools provided by Estimators.
 
 ##### Encoding sparse columns
 
@@ -173,7 +177,7 @@ the data itself. You provide the data through an input function.
 The input function must return a dictionary of tensors. Each key corresponds to
 the name of a `FeatureColumn`. Each key's value is a tensor containing the
 values of that feature for all data instances. See
-@{$input_fn$Building Input Functions with tf.estimator} for a
+@{$input_fn$Building Input Functions} for a
 more comprehensive look at input functions, and `input_fn` in the
 [linear models tutorial code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py)
 for an example implementation of an input function.
@@ -220,7 +224,7 @@ for key in sorted(results):
 
 ### Wide and deep learning
 
-The tf.estimator API also provides an estimator class that lets you jointly
+The `tf.estimator` module also provides an estimator class that lets you jointly
 train a linear model and a deep neural network. This novel approach combines the
 ability of linear models to "memorize" key features with the generalization
 ability of neural nets. Use `tf.estimator.DNNLinearCombinedClassifier` to
-- 
GitLab


From 3b52e0323d1a6328f27614e7b9ba401cfc29f8ef Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 10:32:33 -0800
Subject: [PATCH 1003/1225] Enable neutral element optimization.

PiperOrigin-RevId: 179062410
---
 tensorflow/core/grappler/optimizers/constant_folding.cc | 7 +++----
 tensorflow/python/grappler/cluster_test.py              | 4 ++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 007e3161f1..a43c9bf270 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1355,9 +1355,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
     const bool is_add = IsAdd(*node) || IsBiasAdd(*node);
     const bool is_sub = IsSub(*node);
     const bool is_any_div = IsAnyDiv(*node);
-    // Simplify multiplication by ones or zeros, and addition/subtraction of
-    // zeros.
-    if (is_aggressive && use_shape_info &&
+    // Simplify arithmetic operations with ones or zeros.
+    if (safe_to_use_shapes &&
         (is_mul || is_matmul || is_add || is_sub || is_any_div) &&
         properties.HasInputProperties(node->name()) &&
         properties.HasOutputProperties(node->name())) {
@@ -1370,7 +1369,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       const TensorShapeProto& output_shape =
           properties.GetOutputProperties(node->name())[0].shape();
 
-      // Simplify element-wise  multiplication by ones or addition/subtraction
+      // Simplify element-wise multiplication by ones or addition/subtraction
       // of zeros.
       const TensorShapeProto& y_shape =
           properties.GetInputProperties(node->name())[1].shape();
diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py
index 77dd55981b..3ddcb741b5 100644
--- a/tensorflow/python/grappler/cluster_test.py
+++ b/tensorflow/python/grappler/cluster_test.py
@@ -43,7 +43,7 @@ class ClusterTest(test.TestCase):
       op_perfs, run_time, step_stats = grappler_cluster.MeasureCosts(
           grappler_item)
       self.assertTrue(run_time > 0)
-      self.assertEqual(len(op_perfs), 10)
+      self.assertEqual(len(op_perfs), 9)
       self.assertTrue(step_stats.dev_stats)
 
   def testNoDetailedStats(self):
@@ -120,7 +120,7 @@ class ClusterTest(test.TestCase):
         disable_detailed_stats=False, disable_timeline=False) as gcluster:
       op_perfs, run_time, step_stats = gcluster.MeasureCosts(grappler_item)
       self.assertTrue(run_time > 0)
-      self.assertEqual(len(op_perfs), 10)
+      self.assertEqual(len(op_perfs), 9)
       self.assertTrue(step_stats.dev_stats)
 
 
-- 
GitLab


From 0761849e0a3327f1e3499910ad8af8e6ed712f26 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 10:49:17 -0800
Subject: [PATCH 1004/1225] Makes feature_column.InputLayer an exported symbol.

PiperOrigin-RevId: 179064942
---
 tensorflow/contrib/layers/python/layers/feature_column.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py
index 092d418c3f..8d2931b486 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@@ -156,6 +156,10 @@ from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import deprecation
 
 
+# Imports the core `InputLayer` symbol in contrib during development.
+InputLayer = fc_core.InputLayer  # pylint: disable=invalid-name
+
+
 class _LinearEmbeddingLookupArguments(
     collections.namedtuple("_LinearEmbeddingLookupArguments",
                            ["input_tensor",
-- 
GitLab


From 3d854a744d1236944eb0ecdc172b1825ace565e1 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 14 Dec 2017 11:08:00 -0800
Subject: [PATCH 1005/1225] [tf.data] Reimplement
 `tf.contrib.data.get_single_element()` as an async op.

The current `ToSingleElementOp` kernel has a synchronous
implementation, and yet it can block an inter-op threadpool thread (in
`iterator->GetNext()`) while depending on another (e.g. if the
iterator calls a TensorFlow function to produce an element). This can
lead to deadlock if the number of inter-op threadpool threads is less
than or equal to the number of concurrent activations of that
kernel. This change fixes that deadlock by moving the blocking
computation onto a background thread.

PiperOrigin-RevId: 179067816
---
 tensorflow/core/kernels/iterator_ops.cc | 72 ++++++++++++++++---------
 1 file changed, 46 insertions(+), 26 deletions(-)

diff --git a/tensorflow/core/kernels/iterator_ops.cc b/tensorflow/core/kernels/iterator_ops.cc
index 439775157b..4e81d40a82 100644
--- a/tensorflow/core/kernels/iterator_ops.cc
+++ b/tensorflow/core/kernels/iterator_ops.cc
@@ -448,40 +448,60 @@ class MakeIteratorOp : public OpKernel {
   }
 };
 
-class ToSingleElementOp : public OpKernel {
+class ToSingleElementOp : public AsyncOpKernel {
  public:
-  explicit ToSingleElementOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+  explicit ToSingleElementOp(OpKernelConstruction* ctx)
+      : AsyncOpKernel(ctx),
+        thread_pool_(new thread::ThreadPool(
+            ctx->env(), ThreadOptions(),
+            strings::StrCat("to_single_element_op_thread_",
+                            SanitizeThreadSuffix(name())),
+            1 /* num_threads */, false /* low_latency_hint */)) {}
 
-  void Compute(OpKernelContext* ctx) override {
-    DatasetBase* dataset;
-    OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset));
-    auto iterator = dataset->MakeIterator("SingleElementIterator");
+  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
+    // The call to `iterator->GetNext()` may block and depend on an
+    // inter-op thread pool thread, so we issue the call from the
+    // owned thread pool.
+    thread_pool_->Schedule([ctx, done]() {
+      DatasetBase* dataset;
+      OP_REQUIRES_OK_ASYNC(
+          ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset), done);
+      auto iterator = dataset->MakeIterator("SingleElementIterator");
 
-    IteratorContext::Params params;
-    params.env = ctx->env();
-    params.runner = *(ctx->runner());
-    IteratorContext iter_ctx(std::move(params));
+      IteratorContext::Params params;
+      params.env = ctx->env();
+      params.runner = *(ctx->runner());
+      IteratorContext iter_ctx(std::move(params));
 
-    std::vector<Tensor> components;
-    components.reserve(dataset->output_dtypes().size());
-    bool end_of_sequence;
+      std::vector<Tensor> components;
+      components.reserve(dataset->output_dtypes().size());
+      bool end_of_sequence;
 
-    OP_REQUIRES_OK(ctx,
-                   iterator->GetNext(&iter_ctx, &components, &end_of_sequence));
-    OP_REQUIRES(ctx, !end_of_sequence,
-                errors::InvalidArgument("Dataset was empty."));
+      OP_REQUIRES_OK_ASYNC(
+          ctx, iterator->GetNext(&iter_ctx, &components, &end_of_sequence),
+          done);
+      OP_REQUIRES_ASYNC(ctx, !end_of_sequence,
+                        errors::InvalidArgument("Dataset was empty."), done);
 
-    for (int i = 0; i < components.size(); ++i) {
-      // TODO(mrry): Check that the shapes match the shape attrs.
-      ctx->set_output(i, components[i]);
-    }
+      for (int i = 0; i < components.size(); ++i) {
+        // TODO(mrry): Check that the shapes match the shape attrs.
+        ctx->set_output(i, components[i]);
+      }
 
-    components.clear();
-    OP_REQUIRES_OK(ctx,
-                   iterator->GetNext(&iter_ctx, &components, &end_of_sequence));
-    OP_REQUIRES(ctx, end_of_sequence,
-                errors::InvalidArgument("Dataset had more than one element."));
+      components.clear();
+      OP_REQUIRES_OK_ASYNC(
+          ctx, iterator->GetNext(&iter_ctx, &components, &end_of_sequence),
+          done);
+      OP_REQUIRES_ASYNC(
+          ctx, end_of_sequence,
+          errors::InvalidArgument("Dataset had more than one element."), done);
+
+      done();
+    });
   }
+
+ private:
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
 };
 
 class OneShotIteratorOp : public AsyncOpKernel {
-- 
GitLab


From 9277bb73a926684d4346a56fec6c117873a9a84a Mon Sep 17 00:00:00 2001
From: Yilei Yang <yileiyang@google.com>
Date: Thu, 14 Dec 2017 11:12:52 -0800
Subject: [PATCH 1006/1225] Continue to allow flag access before explicit
 parse.

Made tf.flags.FLAGS a wrapper of absl.flags.FLAGS, when the flag is access, parse flags implicitly with sys.argv if not yet.

PiperOrigin-RevId: 179068530
---
 tensorflow/python/BUILD                  |  5 ++-
 tensorflow/python/platform/flags.py      | 55 +++++++++++++++++++++++
 tensorflow/python/platform/flags_test.py | 57 +++++++++++++++++++++++-
 3 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 4012197bce..e77fba4a4c 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -171,7 +171,10 @@ tf_py_test(
     name = "flags_test",
     size = "small",
     srcs = ["platform/flags_test.py"],
-    additional_deps = [":platform"],
+    additional_deps = [
+        ":client_testlib",
+        ":platform",
+    ],
 )
 
 tf_py_test(
diff --git a/tensorflow/python/platform/flags.py b/tensorflow/python/platform/flags.py
index abd6f3d855..6225db7744 100644
--- a/tensorflow/python/platform/flags.py
+++ b/tensorflow/python/platform/flags.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import logging as _logging
+import sys as _sys
 
 # go/tf-wildcard-import
 from absl.flags import *  # pylint: disable=wildcard-import
@@ -59,6 +60,58 @@ def _wrap_define_function(original_function):
   return tf_decorator.make_decorator(original_function, wrapper)
 
 
+class _FlagValuesWrapper(object):
+  """Wrapper class for absl.flags.FLAGS.
+
+  The difference is that tf.flags.FLAGS implicitly parses flags with sys.argv
+  when accessing the FLAGS values before it's explicitly parsed,
+  while absl.flags.FLAGS raises an exception.
+  """
+
+  def __init__(self, flags_object):
+    self.__dict__['__wrapped'] = flags_object
+
+  def __getattribute__(self, name):
+    if name == '__dict__':
+      return super(_FlagValuesWrapper, self).__getattribute__(name)
+    return self.__dict__['__wrapped'].__getattribute__(name)
+
+  def __getattr__(self, name):
+    wrapped = self.__dict__['__wrapped']
+    # To maintain backwards compatibility, implicitly parse flags when reading
+    # a flag.
+    if not wrapped.is_parsed():
+      wrapped(_sys.argv)
+    return wrapped.__getattr__(name)
+
+  def __setattr__(self, name, value):
+    return self.__dict__['__wrapped'].__setattr__(name, value)
+
+  def __delattr__(self, name):
+    return self.__dict__['__wrapped'].__delattr__(name)
+
+  def __dir__(self):
+    return self.__dict__['__wrapped'].__dir__()
+
+  def __getitem__(self, name):
+    return self.__dict__['__wrapped'].__getitem__(name)
+
+  def __setitem__(self, name, flag):
+    return self.__dict__['__wrapped'].__setitem__(name, flag)
+
+  def __len__(self):
+    return self.__dict__['__wrapped'].__len__()
+
+  def __iter__(self):
+    return self.__dict__['__wrapped'].__iter__()
+
+  def __str__(self):
+    return self.__dict__['__wrapped'].__str__()
+
+  def __call__(self, *args, **kwargs):
+    return self.__dict__['__wrapped'].__call__(*args, **kwargs)
+
+
 # pylint: disable=invalid-name,used-before-assignment
 # absl.flags APIs use `default` as the name of the default value argument.
 # Allow the following functions continue to accept `default_value`.
@@ -68,3 +121,5 @@ DEFINE_bool = DEFINE_boolean
 DEFINE_float = _wrap_define_function(DEFINE_float)
 DEFINE_integer = _wrap_define_function(DEFINE_integer)
 # pylint: enable=invalid-name,used-before-assignment
+
+FLAGS = _FlagValuesWrapper(FLAGS)  # pylint: disable=used-before-assignment
diff --git a/tensorflow/python/platform/flags_test.py b/tensorflow/python/platform/flags_test.py
index e8200142dd..bd3c8e3995 100644
--- a/tensorflow/python/platform/flags_test.py
+++ b/tensorflow/python/platform/flags_test.py
@@ -17,11 +17,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import sys
 import unittest
 
 from absl import flags as absl_flags
 
 from tensorflow.python.platform import flags
+from tensorflow.python.platform import test
 
 
 flags.DEFINE_string(
@@ -48,8 +50,59 @@ flags.DEFINE_boolean(
 
 class FlagsTest(unittest.TestCase):
 
-  def test_global_flags_object(self):
-    self.assertIs(flags.FLAGS, absl_flags.FLAGS)
+  def setUp(self):
+    self.original_flags = flags.FlagValues()
+    self.wrapped_flags = flags._FlagValuesWrapper(self.original_flags)
+    flags.DEFINE_string(
+        'test', 'default', 'test flag', flag_values=self.wrapped_flags)
+
+  def test_attribute_overrides(self):
+    # Test that methods defined in absl.flags.FlagValues are the same as the
+    # wrapped ones.
+    self.assertEqual(flags.FLAGS.is_parsed, absl_flags.FLAGS.is_parsed)
+
+  def test_getattr(self):
+    self.assertFalse(self.wrapped_flags.is_parsed())
+    with test.mock.patch.object(sys, 'argv', new=['program', '--test=new']):
+      self.assertEqual('new', self.wrapped_flags.test)
+    self.assertTrue(self.wrapped_flags.is_parsed())
+
+  def test_setattr(self):
+    self.assertEqual('default', self.wrapped_flags.test)
+    self.wrapped_flags.test = 'new'
+    self.assertEqual('new', self.wrapped_flags.test)
+
+  def test_delattr(self):
+    del self.wrapped_flags.test
+    self.assertNotIn('test', self.wrapped_flags)
+    with self.assertRaises(AttributeError):
+      _ = self.wrapped_flags.test
+
+  def test_dir(self):
+    self.assertEqual(['test'], dir(self.wrapped_flags))
+
+  def test_getitem(self):
+    self.assertIs(self.original_flags['test'], self.wrapped_flags['test'])
+
+  def test_setitem(self):
+    flag = flags.Flag(flags.ArgumentParser(), flags.ArgumentSerializer(),
+                      'fruit', 'apple', 'the fruit type')
+    self.wrapped_flags['fruit'] = flag
+    self.assertIs(self.original_flags['fruit'], self.wrapped_flags['fruit'])
+    self.assertEqual('apple', self.wrapped_flags.fruit)
+
+  def test_len(self):
+    self.assertEqual(1, len(self.wrapped_flags))
+
+  def test_iter(self):
+    self.assertEqual(['test'], list(self.wrapped_flags))
+
+  def test_str(self):
+    self.assertEqual(str(self.wrapped_flags), str(self.original_flags))
+
+  def test_call(self):
+    self.wrapped_flags(['program', '--test=new'])
+    self.assertEqual('new', self.wrapped_flags.test)
 
   def test_keyword_arguments(self):
     test_cases = (
-- 
GitLab


From 9bbd25da077881696875447c3081f96c20e8728c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 11:18:05 -0800
Subject: [PATCH 1007/1225] Enable bfloat16 tests and add a filter for
 currently failed tests.

PiperOrigin-RevId: 179069257
---
 tensorflow/compiler/tests/binary_ops_test.py  |   2 +-
 .../compiler/tests/tensor_array_ops_test.py   |   3 +-
 tensorflow/compiler/tests/unary_ops_test.py   |   3 +-
 tensorflow/compiler/tests/xla_test.py         | 105 ++++++++++++++----
 tensorflow/compiler/tf2xla/lib/util.cc        |   2 +-
 tensorflow/compiler/tf2xla/lib/util.h         |   2 +-
 tensorflow/compiler/tf2xla/xla_op_registry.h  |   8 +-
 tensorflow/core/kernels/split_op.cc           |   2 +
 tensorflow/python/framework/test_util.py      |   6 +
 9 files changed, 100 insertions(+), 33 deletions(-)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index 654dc15e86..905dd9fc7b 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -547,7 +547,7 @@ class BinaryOpsTest(XLATestCase):
       self._testDivision(dtype)
 
   def testFloatDivision(self):
-    for dtype in self.float_types + self.complex_types:
+    for dtype in self.float_types | self.complex_types:
       self._testDivision(dtype)
 
   def _testRemainder(self, dtype):
diff --git a/tensorflow/compiler/tests/tensor_array_ops_test.py b/tensorflow/compiler/tests/tensor_array_ops_test.py
index ac039e0162..a62925a181 100644
--- a/tensorflow/compiler/tests/tensor_array_ops_test.py
+++ b/tensorflow/compiler/tests/tensor_array_ops_test.py
@@ -330,8 +330,7 @@ class TensorArrayTest(xla_test.XLATestCase):
     # Find two different floating point types, create an array of
     # the first type, but try to read the other type.
     if len(self.float_types) > 1:
-      dtype1 = self.float_types[0]
-      dtype2 = self.float_types[1]
+      dtype1, dtype2 = list(self.float_types)[:2]
       with self.test_session(), self.test_scope():
         ta = tensor_array_ops.TensorArray(
             dtype=dtype1, tensor_array_name="foo", size=3)
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index 0da7442a24..b0623c0fbc 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -573,7 +573,8 @@ class UnaryOpsTest(XLATestCase):
 
   def testCast(self):
     shapes = [[], [4], [2, 3], [2, 0, 4]]
-    types = [dtypes.bool, dtypes.int32, dtypes.float32] + self.complex_tf_types
+    types = (set([dtypes.bool, dtypes.int32, dtypes.float32]) |
+             self.complex_tf_types)
     for shape in shapes:
       for src_type in types:
         for dst_type in types:
diff --git a/tensorflow/compiler/tests/xla_test.py b/tensorflow/compiler/tests/xla_test.py
index 0be127997e..7e1f5c76ed 100644
--- a/tensorflow/compiler/tests/xla_test.py
+++ b/tensorflow/compiler/tests/xla_test.py
@@ -53,41 +53,100 @@ class XLATestCase(test.TestCase):
     super(XLATestCase, self).__init__(method_name)
     self.device = FLAGS.test_device
     self.has_custom_call = (self.device == 'XLA_CPU')
-    self.all_tf_types = [
+    self._all_tf_types = set([
         dtypes.as_dtype(types_pb2.DataType.Value(name))
         for name in FLAGS.types.split(',')
-    ]
-    self.int_tf_types = [
-        dtype for dtype in self.all_tf_types if dtype.is_integer
-    ]
-    self.float_tf_types = [
-        dtype for dtype in self.all_tf_types if dtype.is_floating
-    ]
-    self.complex_tf_types = [
-        dtype for dtype in self.all_tf_types if dtype.is_complex
-    ]
-    self.numeric_tf_types = (
-        self.int_tf_types + self.float_tf_types + self.complex_tf_types)
-
-    self.all_types = [dtype.as_numpy_dtype for dtype in self.all_tf_types]
-    self.int_types = [dtype.as_numpy_dtype for dtype in self.int_tf_types]
-    self.float_types = [dtype.as_numpy_dtype for dtype in self.float_tf_types]
-    self.complex_types = [
+    ])
+    self.int_tf_types = set([
+        dtype for dtype in self._all_tf_types if dtype.is_integer
+    ])
+    self._float_tf_types = set([
+        dtype for dtype in self._all_tf_types if dtype.is_floating
+    ])
+    self.complex_tf_types = set([
+        dtype for dtype in self._all_tf_types if dtype.is_complex
+    ])
+    self._numeric_tf_types = set(
+        self.int_tf_types | self._float_tf_types | self.complex_tf_types)
+
+    self._all_types = set(
+        [dtype.as_numpy_dtype for dtype in self._all_tf_types])
+    self.int_types = set([dtype.as_numpy_dtype for dtype in self.int_tf_types])
+    self._float_types = set(
+        [dtype.as_numpy_dtype for dtype in self._float_tf_types])
+    self.complex_types = set([
         dtype.as_numpy_dtype for dtype in self.complex_tf_types
-    ]
-    self.numeric_types = self.int_types + self.float_types + self.complex_types
+    ])
+    self._numeric_types = set(
+        self.int_types | self._float_types | self.complex_types)
 
     # Parse the manifest file, if any, into a regex identifying tests to
     # disable
     self.disabled_regex = None
+    self._method_types_filter = dict()
+    # TODO(xpan): Make it text proto if it doesn't scale.
+    # Each line of the manifest file specifies an entry. The entry can be
+    # 1) TestNameRegex  // E.g. CumprodTest.* Or
+    # 2) TestName TypeName  // E.g. AdamOptimizerTest.testSharing DT_BFLOAT16
+    # The 1) disables the entire test. While 2) only filter some numeric types
+    # so that they are not used in those tests.
+
     if FLAGS.disabled_manifest is not None:
       comments_re = re.compile('#.*$')
       manifest_file = open(FLAGS.disabled_manifest, 'r')
-      lines = manifest_file.read().splitlines()
-      lines = [comments_re.sub('', l).strip() for l in lines]
-      self.disabled_regex = re.compile('|'.join(lines))
+      disabled_tests = []
+      disabled_method_types = []
+      for l in manifest_file.read().splitlines():
+        entry = comments_re.sub('', l).strip().split(' ')
+        if len(entry) == 1:
+          disabled_tests.append(entry[0])
+        elif len(entry) == 2:
+          disabled_method_types.append(
+              (entry[0], entry[1].strip().split(',')))
+        else:
+          raise ValueError('Bad entry in manifest file.')
+
+      self.disabled_regex = re.compile('|'.join(disabled_tests))
+      for method, types in disabled_method_types:
+        self._method_types_filter[method] = set([
+            dtypes.as_dtype(types_pb2.DataType.Value(name)).as_numpy_dtype
+            for name in types])
       manifest_file.close()
 
+  @property
+  def all_tf_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    tf_types = set([dtypes.as_dtype(t)
+                    for t in self._method_types_filter.get(name, set())])
+    return self._all_tf_types - tf_types
+
+  @property
+  def float_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    return self._float_types - self._method_types_filter.get(name, set())
+
+  @property
+  def float_tf_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    return self._float_tf_types - self._method_types_filter.get(name, set())
+
+  @property
+  def numeric_tf_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    tf_types = set([dtypes.as_dtype(t)
+                    for t in self._method_types_filter.get(name, set())])
+    return self._numeric_tf_types - tf_types
+
+  @property
+  def numeric_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    return self._numeric_types - self._method_types_filter.get(name, set())
+
+  @property
+  def all_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    return self._all_types - self._method_types_filter.get(name, set())
+
   def setUp(self):
     super(XLATestCase, self).setUp()
     name = '{}.{}'.format(type(self).__name__, self._testMethodName)
diff --git a/tensorflow/compiler/tf2xla/lib/util.cc b/tensorflow/compiler/tf2xla/lib/util.cc
index 943248aedb..ce24b61b5d 100644
--- a/tensorflow/compiler/tf2xla/lib/util.cc
+++ b/tensorflow/compiler/tf2xla/lib/util.cc
@@ -28,7 +28,7 @@ limitations under the License.
 namespace tensorflow {
 
 xla::ComputationDataHandle Zeros(xla::ComputationBuilder* builder,
-                                 xla::Shape& shape) {
+                                 const xla::Shape& shape) {
   return builder->Broadcast(
       builder->ConstantLiteral(xla::Literal::Zero(shape.element_type())),
       xla::AsInt64Slice(shape.dimensions()));
diff --git a/tensorflow/compiler/tf2xla/lib/util.h b/tensorflow/compiler/tf2xla/lib/util.h
index 8fba6b5cf2..fb138b4f73 100644
--- a/tensorflow/compiler/tf2xla/lib/util.h
+++ b/tensorflow/compiler/tf2xla/lib/util.h
@@ -25,7 +25,7 @@ namespace tensorflow {
 
 // Returns a zero-filled tensor with shape `shape`.
 xla::ComputationDataHandle Zeros(xla::ComputationBuilder* builder,
-                                 xla::Shape& shape);
+                                 const xla::Shape& shape);
 
 // Returns a floating point scalar constant of 'type' with 'value'.
 // If 'type' is complex, returns a real value with zero imaginary component.
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h
index 2959d2ab69..8bfd9758f7 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.h
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.h
@@ -45,11 +45,11 @@ extern const char* const DEVICE_GPU_XLA_JIT;  // "GPU_XLA_JIT"
 extern const char* const DEVICE_XLA_CPU;
 extern const char* const DEVICE_XLA_GPU;
 
-constexpr std::array<DataType, 3> kFloatTypes = {
-    {DT_HALF, DT_FLOAT, DT_DOUBLE}};
-constexpr std::array<DataType, 8> kNumericTypes = {
+constexpr std::array<DataType, 4> kFloatTypes = {
+    {DT_HALF, DT_FLOAT, DT_DOUBLE, DT_BFLOAT16}};
+constexpr std::array<DataType, 9> kNumericTypes = {
     {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE,
-     DT_COMPLEX64}};
+     DT_COMPLEX64, DT_BFLOAT16}};
 
 constexpr std::array<DataType, 8> kCpuAllTypes = {
     {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE,
diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc
index 58e1a73be6..094ba8bb86 100644
--- a/tensorflow/core/kernels/split_op.cc
+++ b/tensorflow/core/kernels/split_op.cc
@@ -360,6 +360,8 @@ class SplitOpSYCL : public SplitOpBase<SYCLDevice, T> {
 
 TF_CALL_ALL_TYPES(REGISTER_SPLIT);
 REGISTER_SPLIT(quint8);
+// TODO(xpan): Merge bfloat16 into TF_CALL_ALL_TYPES
+REGISTER_SPLIT(bfloat16);
 
 #undef REGISTER_SPLIT
 
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index ae3b6c584a..509c5ec8d6 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -50,6 +50,7 @@ from tensorflow.python.client import session
 from tensorflow.python.eager import context
 from tensorflow.python.eager import tape
 from tensorflow.python.framework import device as pydev
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
@@ -1108,6 +1109,7 @@ class TensorFlowTestCase(googletest.TestCase):
     """
     a = self._GetNdArray(a)
     b = self._GetNdArray(b)
+    # types with lower tol are put later to overwrite previous ones.
     if (a.dtype == np.float32 or b.dtype == np.float32 or
         a.dtype == np.complex64 or b.dtype == np.complex64):
       rtol = max(rtol, float_rtol)
@@ -1115,6 +1117,10 @@ class TensorFlowTestCase(googletest.TestCase):
     if a.dtype == np.float16 or b.dtype == np.float16:
       rtol = max(rtol, half_rtol)
       atol = max(atol, half_atol)
+    if (a.dtype == dtypes.bfloat16.as_numpy_dtype or
+        b.dtype == dtypes.bfloat16.as_numpy_dtype):
+      rtol = max(rtol, half_rtol)
+      atol = max(atol, half_atol)
 
     self.assertAllClose(a, b, rtol=rtol, atol=atol)
 
-- 
GitLab


From b6ece81d39ac852e8e310d9b738d560ca7d21b2b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 11:23:45 -0800
Subject: [PATCH 1008/1225] Splits _SharedEmbeddingColumn into a separate class
 and some bug fixes.

PiperOrigin-RevId: 179070001
---
 .../python/feature_column/feature_column.py   | 210 ++++++++++--------
 .../feature_column/feature_column_test.py     |   7 +-
 2 files changed, 118 insertions(+), 99 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 060fa640d5..a7fe528ee1 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -191,23 +191,14 @@ def _internal_input_layer(features,
       ordered_columns.append(column)
       with variable_scope.variable_scope(
           None, default_name=column._var_scope_name):  # pylint: disable=protected-access
-        if column._var_scope_name == column.name:  # pylint: disable=protected-access
-          tensor = _get_dense_tensor(
-              column=column,
-              builder=builder,
-              weight_collections=weight_collections,
-              trainable=trainable)
-        else:
-          # This is typically the case for shared_embedding_columns. The
-          # embedding weights variable will be under the common variable_scope,
-          # but the ops for each column will be under a separate name_scope.
-          with ops.name_scope(column.name):
-            tensor = _get_dense_tensor(
-                column=column,
-                builder=builder,
-                weight_collections=weight_collections,
-                trainable=trainable)
-        output_tensors.append(tensor)
+        tensor = column._get_dense_tensor(  # pylint: disable=protected-access
+            builder,
+            weight_collections=weight_collections,
+            trainable=trainable)
+        num_elements = column._variable_shape.num_elements()  # pylint: disable=protected-access
+        batch_size = array_ops.shape(tensor)[0]
+        output_tensors.append(
+            array_ops.reshape(tensor, shape=(batch_size, num_elements)))
         if cols_to_vars is not None:
           # Retrieve any variables created (some _DenseColumn's don't create
           # variables, in which case an empty list is returned).
@@ -429,26 +420,13 @@ def linear_model(features,
       with variable_scope.variable_scope(
           None, default_name=column._var_scope_name):  # pylint: disable=protected-access
         ordered_columns.append(column)
-        if column._var_scope_name == column.name:  # pylint: disable=protected-access
-          weighted_sum = _create_weighted_sum(
-              column=column,
-              builder=builder,
-              units=units,
-              sparse_combiner=sparse_combiner,
-              weight_collections=weight_collections,
-              trainable=trainable)
-        else:
-          # This is typically the case for shared_embedding_columns. The
-          # embedding weights variable will be under the common variable_scope,
-          # but the ops for each column will be under a separate name_scope.
-          with ops.name_scope(column.name):
-            weighted_sum = _create_weighted_sum(
-                column=column,
-                builder=builder,
-                units=units,
-                sparse_combiner=sparse_combiner,
-                weight_collections=weight_collections,
-                trainable=trainable)
+        weighted_sum = _create_weighted_sum(
+            column=column,
+            builder=builder,
+            units=units,
+            sparse_combiner=sparse_combiner,
+            weight_collections=weight_collections,
+            trainable=trainable)
         weighted_sums.append(weighted_sum)
         if cols_to_vars is not None:
           # Retrieve the variables created.
@@ -673,7 +651,6 @@ def embedding_column(
       dimension=dimension,
       combiner=combiner,
       initializer=initializer,
-      shared_embedding_collection_name=None,
       ckpt_to_load_from=ckpt_to_load_from,
       tensor_name_in_ckpt=tensor_name_in_ckpt,
       max_norm=max_norm,
@@ -817,7 +794,7 @@ def _shared_embedding_columns(
 
   result = []
   for column in categorical_columns:
-    result.append(_EmbeddingColumn(
+    result.append(_SharedEmbeddingColumn(
         categorical_column=column,
         dimension=dimension,
         combiner=combiner,
@@ -1691,21 +1668,6 @@ class _DenseColumn(_FeatureColumn):
     pass
 
 
-def _get_dense_tensor(
-    column,
-    builder,
-    weight_collections,
-    trainable):
-  """Creates a dense Tensor for a _DenseColumn for input_layer."""
-  tensor = column._get_dense_tensor(  # pylint: disable=protected-access
-      builder,
-      weight_collections=weight_collections,
-      trainable=trainable)
-  num_elements = column._variable_shape.num_elements()  # pylint: disable=protected-access
-  batch_size = array_ops.shape(tensor)[0]
-  return array_ops.reshape(tensor, shape=(batch_size, num_elements))
-
-
 def _create_weighted_sum(
     column,
     builder,
@@ -1716,11 +1678,19 @@ def _create_weighted_sum(
   """Creates a weighted sum for a dense or sparse column for linear_model."""
   if isinstance(column, _CategoricalColumn):
     return _create_categorical_column_weighted_sum(
-        column, builder, units, sparse_combiner, weight_collections,
-        trainable)
+        column=column,
+        builder=builder,
+        units=units,
+        sparse_combiner=sparse_combiner,
+        weight_collections=weight_collections,
+        trainable=trainable)
   else:
     return _create_dense_column_weighted_sum(
-        column, builder, units, weight_collections, trainable)
+        column=column,
+        builder=builder,
+        units=units,
+        weight_collections=weight_collections,
+        trainable=trainable)
 
 
 def _create_dense_column_weighted_sum(
@@ -2168,24 +2138,16 @@ class _EmbeddingColumn(
     _DenseColumn,
     collections.namedtuple('_EmbeddingColumn', (
         'categorical_column', 'dimension', 'combiner', 'initializer',
-        'shared_embedding_collection_name', 'ckpt_to_load_from',
-        'tensor_name_in_ckpt', 'max_norm', 'trainable'
+        'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'
     ))):
   """See `embedding_column`."""
 
   @property
   def name(self):
     if not hasattr(self, '_name'):
-      if self.shared_embedding_collection_name:
-        self._name = '{}_shared_embedding'.format(self.categorical_column.name)
-      else:
-        self._name = '{}_embedding'.format(self.categorical_column.name)
+      self._name = '{}_embedding'.format(self.categorical_column.name)
     return self._name
 
-  @property
-  def _var_scope_name(self):
-    return self.shared_embedding_collection_name or self.name
-
   @property
   def _parse_example_spec(self):
     return self.categorical_column._parse_example_spec  # pylint: disable=protected-access
@@ -2207,7 +2169,75 @@ class _EmbeddingColumn(
     sparse_weights = sparse_tensors.weight_tensor
 
     embedding_shape = (self.categorical_column._num_buckets, self.dimension)  # pylint: disable=protected-access
-    if self.shared_embedding_collection_name:
+    embedding_weights = variable_scope.get_variable(
+        name='embedding_weights',
+        shape=embedding_shape,
+        dtype=dtypes.float32,
+        initializer=self.initializer,
+        trainable=self.trainable and trainable,
+        collections=weight_collections)
+    if self.ckpt_to_load_from is not None:
+      to_restore = embedding_weights
+      if isinstance(to_restore, variables.PartitionedVariable):
+        to_restore = to_restore._get_variable_list()  # pylint: disable=protected-access
+      checkpoint_utils.init_from_checkpoint(self.ckpt_to_load_from, {
+          self.tensor_name_in_ckpt: to_restore
+      })
+
+    # Return embedding lookup result.
+    return _safe_embedding_lookup_sparse(
+        embedding_weights=embedding_weights,
+        sparse_ids=sparse_ids,
+        sparse_weights=sparse_weights,
+        combiner=self.combiner,
+        name='%s_weights' % self.name,
+        max_norm=self.max_norm)
+
+
+class _SharedEmbeddingColumn(
+    _DenseColumn,
+    collections.namedtuple('_SharedEmbeddingColumn', (
+        'categorical_column', 'dimension', 'combiner', 'initializer',
+        'shared_embedding_collection_name', 'ckpt_to_load_from',
+        'tensor_name_in_ckpt', 'max_norm', 'trainable'
+    ))):
+  """See `embedding_column`."""
+
+  @property
+  def name(self):
+    if not hasattr(self, '_name'):
+      self._name = '{}_shared_embedding'.format(self.categorical_column.name)
+    return self._name
+
+  @property
+  def _var_scope_name(self):
+    return self.shared_embedding_collection_name
+
+  @property
+  def _parse_example_spec(self):
+    return self.categorical_column._parse_example_spec  # pylint: disable=protected-access
+
+  def _transform_feature(self, inputs):
+    return inputs.get(self.categorical_column)
+
+  @property
+  def _variable_shape(self):
+    if not hasattr(self, '_shape'):
+      self._shape = tensor_shape.vector(self.dimension)
+    return self._shape
+
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    # This method is called from a variable_scope with name _var_scope_name,
+    # which is shared among all shared embeddings. Open a name_scope here, so
+    # that the ops for different columns have distinct names.
+    with ops.name_scope(None, default_name=self.name):
+      # Get sparse IDs and weights.
+      sparse_tensors = self.categorical_column._get_sparse_tensors(  # pylint: disable=protected-access
+          inputs, weight_collections=weight_collections, trainable=trainable)
+      sparse_ids = sparse_tensors.id_tensor
+      sparse_weights = sparse_tensors.weight_tensor
+
+      embedding_shape = (self.categorical_column._num_buckets, self.dimension)  # pylint: disable=protected-access
       shared_embedding_collection = ops.get_collection(
           self.shared_embedding_collection_name)
       if shared_embedding_collection:
@@ -2219,7 +2249,7 @@ class _EmbeddingColumn(
               'The feature_column library already adds a variable under the '
               'hood.'.format(shared_embedding_collection))
         embedding_weights = shared_embedding_collection[0]
-        if embedding_weights.shape != embedding_shape:
+        if embedding_weights.get_shape() != embedding_shape:
           raise ValueError(
               'Shared embedding collection {} contains variable {} of '
               'unexpected shape {}. Expected shape is {}. '
@@ -2228,7 +2258,7 @@ class _EmbeddingColumn(
               'The feature_column library already adds a variable under the '
               'hood.'.format(
                   self.shared_embedding_collection_name, embedding_weights.name,
-                  embedding_weights.shape, embedding_shape))
+                  embedding_weights.get_shape(), embedding_shape))
       else:
         embedding_weights = variable_scope.get_variable(
             name='embedding_weights',
@@ -2239,30 +2269,22 @@ class _EmbeddingColumn(
             collections=weight_collections)
         ops.add_to_collection(
             self.shared_embedding_collection_name, embedding_weights)
-    else:
-      embedding_weights = variable_scope.get_variable(
-          name='embedding_weights',
-          shape=embedding_shape,
-          dtype=dtypes.float32,
-          initializer=self.initializer,
-          trainable=self.trainable and trainable,
-          collections=weight_collections)
-    if self.ckpt_to_load_from is not None:
-      to_restore = embedding_weights
-      if isinstance(to_restore, variables.PartitionedVariable):
-        to_restore = to_restore._get_variable_list()  # pylint: disable=protected-access
-      checkpoint_utils.init_from_checkpoint(self.ckpt_to_load_from, {
-          self.tensor_name_in_ckpt: to_restore
-      })
-
-    # Return embedding lookup result.
-    return _safe_embedding_lookup_sparse(
-        embedding_weights=embedding_weights,
-        sparse_ids=sparse_ids,
-        sparse_weights=sparse_weights,
-        combiner=self.combiner,
-        name='%s_weights' % self.name,
-        max_norm=self.max_norm)
+      if self.ckpt_to_load_from is not None:
+        to_restore = embedding_weights
+        if isinstance(to_restore, variables.PartitionedVariable):
+          to_restore = to_restore._get_variable_list()  # pylint: disable=protected-access
+        checkpoint_utils.init_from_checkpoint(self.ckpt_to_load_from, {
+            self.tensor_name_in_ckpt: to_restore
+        })
+
+      # Return embedding lookup result.
+      return _safe_embedding_lookup_sparse(
+          embedding_weights=embedding_weights,
+          sparse_ids=sparse_ids,
+          sparse_weights=sparse_weights,
+          combiner=self.combiner,
+          name='%s_weights' % self.name,
+          max_norm=self.max_norm)
 
 
 def _create_tuple(shape, value):
diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py
index 019415857e..2374680b96 100644
--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -3551,7 +3551,6 @@ class EmbeddingColumnTest(test.TestCase):
     self.assertEqual('mean', embedding_column.combiner)
     self.assertIsNotNone(embedding_column.initializer)
     self.assertIsNone(embedding_column.ckpt_to_load_from)
-    self.assertIsNone(embedding_column.shared_embedding_collection_name)
     self.assertIsNone(embedding_column.tensor_name_in_ckpt)
     self.assertIsNone(embedding_column.max_norm)
     self.assertTrue(embedding_column.trainable)
@@ -3576,7 +3575,6 @@ class EmbeddingColumnTest(test.TestCase):
     self.assertEqual(embedding_dimension, embedding_column.dimension)
     self.assertEqual('my_combiner', embedding_column.combiner)
     self.assertEqual('my_initializer', embedding_column.initializer())
-    self.assertIsNone(embedding_column.shared_embedding_collection_name)
     self.assertEqual('my_ckpt', embedding_column.ckpt_to_load_from)
     self.assertEqual('my_ckpt_tensor', embedding_column.tensor_name_in_ckpt)
     self.assertEqual(42., embedding_column.max_norm)
@@ -3608,7 +3606,6 @@ class EmbeddingColumnTest(test.TestCase):
       self.assertEqual(embedding_dimension, embedding_column.dimension)
       self.assertEqual('my_combiner', embedding_column.combiner)
       self.assertEqual('my_initializer', embedding_column.initializer())
-      self.assertIsNone(embedding_column.shared_embedding_collection_name)
       self.assertEqual('my_ckpt', embedding_column.ckpt_to_load_from)
       self.assertEqual('my_ckpt_tensor', embedding_column.tensor_name_in_ckpt)
       self.assertEqual(42., embedding_column.max_norm)
@@ -4534,8 +4531,8 @@ class SharedEmbeddingColumnTest(test.TestCase):
           categorical_column_a.name: input_a,
           categorical_column_b.name: input_b,
       }, (embedding_column_a, embedding_column_b))
-      # Linear weights name should follow the column name.
-      # TODO(roumposg): Fix that.
+      # Linear weights do not follow the column name. But this is a rare use
+      # case, and fixing it would add too much complexity to the code.
       expected_var_names = (
           'linear_model/bias_weights:0',
           'linear_model/aaa_bbb_shared_embedding/weights:0',
-- 
GitLab


From 0b3d9859a0ad81e5dfdf6474b7cf76bba32b88ac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 11:28:20 -0800
Subject: [PATCH 1009/1225] Add an optional sleep op to the beginning of the
 TPU infeed thread.

This allows the compilation thread to finish compiling a large model and
prevents a JF request timeout, currently set to a 60 second default.

PiperOrigin-RevId: 179070643
---
 .../contrib/tpu/python/tpu/tpu_config.py      | 15 +++++++++++++--
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 19 ++++++++++++++-----
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py
index 916b9b3082..77ce38991b 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py
@@ -31,6 +31,7 @@ class TPUConfig(
         'num_shards',
         'per_host_input_for_training',
         'tpu_job_name',
+        'initial_infeed_sleep_secs',
     ])):
   """TPU related configuration required by `TPUEstimator`.
 
@@ -50,13 +51,17 @@ class TPUConfig(
       within TPUEstimator, however when using ClusterSpec propagation in more
       esoteric cluster configurations, you may need to specify the job name as a
       string.
+    initial_infeed_sleep_secs: The number of seconds the infeed thread should
+      wait before enqueueing the first batch. This helps avoid timeouts for
+      models that require a long compilation time.
   """
 
   def __new__(cls,
               iterations_per_loop=2,
               num_shards=2,
               per_host_input_for_training=True,
-              tpu_job_name=None):
+              tpu_job_name=None,
+              initial_infeed_sleep_secs=None):
 
     # Check iterations_per_loop.
     util_lib.check_positive_integer(iterations_per_loop,
@@ -64,12 +69,18 @@ class TPUConfig(
 
     # Check num_shards.
     util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')
+
+    # Check initial_infeed_sleep_secs.
+    if initial_infeed_sleep_secs:
+      util_lib.check_positive_integer(initial_infeed_sleep_secs,
+                                      'TPUConfig initial_infeed_sleep_secs')
     return super(TPUConfig, cls).__new__(
         cls,
         iterations_per_loop=iterations_per_loop,
         num_shards=num_shards,
         per_host_input_for_training=per_host_input_for_training,
-        tpu_job_name=tpu_job_name)
+        tpu_job_name=tpu_job_name,
+        initial_infeed_sleep_secs=initial_infeed_sleep_secs)
 
 
 class RunConfig(run_config_lib.RunConfig):
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 84a4208be3..e324948be5 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -467,13 +467,20 @@ class _OutfeedThreadController(_InfeedOutfeedThreadBaseController):
 class _InfeedThreadController(_InfeedOutfeedThreadBaseController):
   """This wraps the infeed thread and stops when Estimator finishes."""
 
-  def __init__(self, session, enqueue_ops):
+  def __init__(self, session, enqueue_ops, initial_infeed_sleep_secs):
     super(_InfeedThreadController, self).__init__(
-        threading.Thread(target=self._input_thread_fn_for_loading,
-                         args=(session, enqueue_ops)))
+        threading.Thread(
+            target=self._input_thread_fn_for_loading,
+            args=(session, enqueue_ops, initial_infeed_sleep_secs)))
 
-  def _input_thread_fn_for_loading(self, session, enqueue_ops):
+  def _input_thread_fn_for_loading(self, session, enqueue_ops,
+                                   initial_infeed_sleep_secs):
     count = 0
+    if initial_infeed_sleep_secs:
+      logging.info('Infeed thread sleeping for %d seconds.',
+                   initial_infeed_sleep_secs)
+      time.sleep(initial_infeed_sleep_secs)
+      logging.info('Infeed thread starting after sleep')
     try:
       while True:
         signal = self._signal_queue.get()
@@ -535,6 +542,8 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
     self._master_job = ctx.master_job
     self._enqueue_ops = enqueue_ops
     self._dequeue_ops = dequeue_ops
+    self._initial_infeed_sleep_secs = (
+        ctx.config.tpu_config.initial_infeed_sleep_secs)
 
   def begin(self):
     logging.info('TPU job name %s', self._master_job)
@@ -549,7 +558,7 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
 
     logging.info('Start infeed thread controller')
     self._infeed_thd_controller = _InfeedThreadController(
-        session, self._enqueue_ops)
+        session, self._enqueue_ops, self._initial_infeed_sleep_secs)
 
     if self._dequeue_ops is not None:
       logging.info('Start outfeed thread controller')
-- 
GitLab


From 2fb40006976490d7323eb6be8e2bae2275948551 Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <vomjom@vomjom.net>
Date: Thu, 14 Dec 2017 11:33:21 -0800
Subject: [PATCH 1010/1225] Revert "Add batch support for various image_ops
 (#14854)" (#15349)

This reverts commit 20aa9e0a9f129ed929cea1fb45ec12b7be3ac68e.
---
 tensorflow/python/ops/image_ops_impl.py | 244 ++++++++++--------------
 tensorflow/python/ops/image_ops_test.py | 194 ++-----------------
 2 files changed, 116 insertions(+), 322 deletions(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 46022e2e7f..21561f3689 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -182,81 +182,8 @@ def _CheckAtLeast3DImage(image, require_static=True):
     return []
 
 
-def _EnsureTensorIs4D(image):
-  """Converts `image` to a 4-D Tensor if it is not already one.
-
-  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
-  Raises:
-    ValueError: if image is not a 3-D or 4-D Tensor.
-
-  Returns:
-    If `image` was 4-D, a 4-D float Tensor of shape
-    `[batch, width, height, channels]`
-    If `image` was 3-D, a 4-D float Tensor of shape
-    `[1, width, height, channels]`
-  """
-  original_shape = image.get_shape()
-  is_batch = True
-  if original_shape.ndims == 3:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-  elif original_shape.ndims is None:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-    image.set_shape([None] * 4)
-  elif original_shape.ndims != 4:
-    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
-
-  return (image, is_batch)
-
-def _flip_image(image, axis, random=False, seed=None):
-  """
-  Flips image(s) around a given axis.
-
-  Args:
-    image:  4-D Tensor of shape `[batch, height, width, channels]` or
-            3-D Tensor of shape `[height, width, channels]`.
-    axis:   A Python integer representing the axis on which the image(s)
-            will be flipped. Note: The provided axis must be specified relative
-            to the shape `[batch, height, width, channels]` as 3-D images will
-            be expanded to fit this shape before being flipped.
-    random: A boolean representing whether or not we should flip the
-            image(s) at random.
-    seed:   Python integer. Used to create a random seed. See
-            tf.set_random_seed for behavior.
-
-  Raises:
-    ValueError: if image is not a 3-D or 4-D Tensor.
-
-  Returns:
-    A tensor of the same type and shape as `image`
-  """
-  image = ops.convert_to_tensor(image, name='image')
-  original_image = image
-  image, is_batch = _EnsureTensorIs4D(image)
-
-  image = control_flow_ops.with_dependencies(
-    _CheckAtLeast3DImage(image, require_static=False), image)
-
-  batch, _, _, _ = _ImageDimensions(image, rank=4)
-  flipped = array_ops.reverse(image, [axis])
-
-  if random == True:
-    uniform_random = random_ops.random_uniform([batch], 0, 1.0, seed=seed)
-    mirror_cond = math_ops.less(uniform_random, 0.5)
-    flipped = array_ops.where(mirror_cond, x=image, y=flipped)
-
-  if is_batch:
-    return fix_image_flip_shape(original_image, flipped, rank=4)
-
-  flipped = array_ops.squeeze(flipped, squeeze_dims=[0])
-  return fix_image_flip_shape(original_image, flipped, rank=3)
-
-
-def fix_image_flip_shape(image, result, rank=3):
-  """Set the shape to original dimensional if we don't know anything else.
+def fix_image_flip_shape(image, result):
+  """Set the shape to 3 dimensional if we don't know anything else.
 
   Args:
     image: original image size
@@ -268,174 +195,171 @@ def fix_image_flip_shape(image, result, rank=3):
 
   image_shape = image.get_shape()
   if image_shape == tensor_shape.unknown_shape():
-    result.set_shape([None] * rank)
+    result.set_shape([None, None, None])
   else:
     result.set_shape(image_shape)
   return result
 
 
 def random_flip_up_down(image, seed=None):
-  """Randomly flips image(s) vertically (upside down).
+  """Randomly flips an image vertically (upside down).
 
-  With a 1 in 2 chance, outputs the contents of `image` flipped along the height
-  dimension. Otherwise output the image as-is.
+  With a 1 in 2 chance, outputs the contents of `image` flipped along the first
+  dimension, which is `height`.  Otherwise output the image as-is.
 
   Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: A 3-D tensor of shape `[height, width, channels].`
     seed: A Python integer. Used to create a random seed. See
       @{tf.set_random_seed}
       for behavior.
 
   Returns:
-    A tensor of the same type and shape as `image`.
+    A 3-D tensor of the same type and shape as `image`.
 
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  return _flip_image(image, axis=1, random=True, seed=seed)
+  image = ops.convert_to_tensor(image, name='image')
+  image = control_flow_ops.with_dependencies(
+      _Check3DImage(image, require_static=False), image)
+  uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
+  mirror_cond = math_ops.less(uniform_random, .5)
+  result = control_flow_ops.cond(mirror_cond,
+                                 lambda: array_ops.reverse(image, [0]),
+                                 lambda: image)
+  return fix_image_flip_shape(image, result)
 
 
 def random_flip_left_right(image, seed=None):
-  """Randomly flip image(s) horizontally (left to right).
+  """Randomly flip an image horizontally (left to right).
 
   With a 1 in 2 chance, outputs the contents of `image` flipped along the
-  width dimension. Otherwise output the image as-is.
+  second dimension, which is `width`.  Otherwise output the image as-is.
 
   Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: A 3-D tensor of shape `[height, width, channels].`
     seed: A Python integer. Used to create a random seed. See
       @{tf.set_random_seed}
       for behavior.
 
   Returns:
-    A tensor of the same type and shape as `image`.
+    A 3-D tensor of the same type and shape as `image`.
 
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  return _flip_image(image, axis=2, random=True, seed=seed)
+  image = ops.convert_to_tensor(image, name='image')
+  image = control_flow_ops.with_dependencies(
+      _Check3DImage(image, require_static=False), image)
+  uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
+  mirror_cond = math_ops.less(uniform_random, .5)
+  result = control_flow_ops.cond(mirror_cond,
+                                 lambda: array_ops.reverse(image, [1]),
+                                 lambda: image)
+  return fix_image_flip_shape(image, result)
 
 
 def flip_left_right(image):
   """Flip an image horizontally (left to right).
 
-  Outputs the contents of `image` flipped along the width dimension.
+  Outputs the contents of `image` flipped along the second dimension, which is
+  `width`.
 
   See also `reverse()`.
 
   Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: A 3-D tensor of shape `[height, width, channels].`
 
   Returns:
-    A tensor of the same type and shape as `image`.
+    A 3-D tensor of the same type and shape as `image`.
 
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  return _flip_image(image, axis=2, random=False)
+  image = ops.convert_to_tensor(image, name='image')
+  image = control_flow_ops.with_dependencies(
+      _Check3DImage(image, require_static=False), image)
+  return fix_image_flip_shape(image, array_ops.reverse(image, [1]))
+
 
 def flip_up_down(image):
   """Flip an image vertically (upside down).
 
-  Outputs the contents of `image` flipped along the height dimension.
+  Outputs the contents of `image` flipped along the first dimension, which is
+  `height`.
 
   See also `reverse()`.
 
   Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: A 3-D tensor of shape `[height, width, channels].`
 
   Returns:
-    A tensor of the same type and shape as `image`.
+    A 3-D tensor of the same type and shape as `image`.
 
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  return _flip_image(image, axis=1, random=False)
+  image = ops.convert_to_tensor(image, name='image')
+  image = control_flow_ops.with_dependencies(
+      _Check3DImage(image, require_static=False), image)
+  return fix_image_flip_shape(image, array_ops.reverse(image, [0]))
 
 
 def rot90(image, k=1, name=None):
-  """Rotate image(s) counter-clockwise by 90 degrees.
+  """Rotate an image counter-clockwise by 90 degrees.
 
   Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: A 3-D tensor of shape `[height, width, channels]`.
     k: A scalar integer. The number of times the image is rotated by 90 degrees.
     name: A name for this operation (optional).
 
   Returns:
-    A rotated of the same type and shape as `image`.
-
-  Raises:
-    ValueError: if the shape of `image` not supported.
+    A rotated 3-D tensor of the same type and shape as `image`.
   """
   with ops.name_scope(name, 'rot90', [image, k]) as scope:
     image = ops.convert_to_tensor(image, name='image')
-    image, is_batch = _EnsureTensorIs4D(image)
     image = control_flow_ops.with_dependencies(
-        _CheckAtLeast3DImage(image, require_static=False), image)
+        _Check3DImage(image, require_static=False), image)
     k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k')
     k.get_shape().assert_has_rank(0)
     k = math_ops.mod(k, 4)
 
     def _rot90():
-      return array_ops.transpose(array_ops.reverse_v2(image, [2]),
-                                 [0, 2, 1, 3])
+      return array_ops.transpose(array_ops.reverse_v2(image, [1]),
+                                 [1, 0, 2])
     def _rot180():
-      return array_ops.reverse_v2(image, [1, 2])
+      return array_ops.reverse_v2(image, [0, 1])
     def _rot270():
-      return array_ops.reverse_v2(array_ops.transpose(image, [0, 2, 1, 3]),
-                                  [2])
+      return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]),
+                                  [1])
     cases = [(math_ops.equal(k, 1), _rot90),
              (math_ops.equal(k, 2), _rot180),
              (math_ops.equal(k, 3), _rot270)]
 
-    result = control_flow_ops.case(cases, default=lambda: image, exclusive=True,
+    ret = control_flow_ops.case(cases, default=lambda: image, exclusive=True,
                                 name=scope)
-
-    shape = image.get_shape()
-    result.set_shape([shape[0], None, None, shape[3]])
-
-    if is_batch == True:
-      return result
-
-    result = array_ops.squeeze(result, squeeze_dims=[0])
-    return result
+    ret.set_shape([None, None, image.get_shape()[2]])
+    return ret
 
 
 def transpose_image(image):
-  """Transpose an image by swapping the height and width dimension.
+  """Transpose an image by swapping the first and second dimension.
 
   See also `transpose()`.
 
   Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 3-D tensor of shape `[height, width, channels]`
 
   Returns:
-    If `image` was 4-D, a 4-D float Tensor of shape
-    `[batch, width, height, channels]`
-    If `image` was 3-D, a 3-D float Tensor of shape
-    `[width, height, channels]`
+    A 3-D tensor of shape `[width, height, channels]`
 
   Raises:
     ValueError: if the shape of `image` not supported.
   """
   image = ops.convert_to_tensor(image, name='image')
-  image, is_batch = _EnsureTensorIs4D(image)
   image = control_flow_ops.with_dependencies(
-      _CheckAtLeast3DImage(image, require_static=False), image)
-
-  result = array_ops.transpose(image, [0, 2, 1, 3], name='transpose_image')
-
-  if is_batch:
-    return result
-
-  result = array_ops.squeeze(result, squeeze_dims=[0])
-  return result
+      _Check3DImage(image, require_static=False), image)
+  return array_ops.transpose(image, [1, 0, 2], name='transpose_image')
 
 
 def central_crop(image, central_fraction):
@@ -521,9 +445,21 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
       negative.
   """
   image = ops.convert_to_tensor(image, name='image')
-  image, is_batch = _EnsureTensorIs4D(image)
+
+  is_batch = True
+  image_shape = image.get_shape()
+  if image_shape.ndims == 3:
+    is_batch = False
+    image = array_ops.expand_dims(image, 0)
+  elif image_shape.ndims is None:
+    is_batch = False
+    image = array_ops.expand_dims(image, 0)
+    image.set_shape([None] * 4)
+  elif image_shape.ndims != 4:
+    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
 
   assert_ops = _CheckAtLeast3DImage(image, require_static=False)
+
   batch, height, width, depth = _ImageDimensions(image, rank=4)
 
   after_padding_width = target_width - offset_width - width
@@ -588,9 +524,21 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
       negative, or either `target_height` or `target_width` is not positive.
   """
   image = ops.convert_to_tensor(image, name='image')
-  image, is_batch = _EnsureTensorIs4D(image)
+
+  is_batch = True
+  image_shape = image.get_shape()
+  if image_shape.ndims == 3:
+    is_batch = False
+    image = array_ops.expand_dims(image, 0)
+  elif image_shape.ndims is None:
+    is_batch = False
+    image = array_ops.expand_dims(image, 0)
+    image.set_shape([None] * 4)
+  elif image_shape.ndims != 4:
+    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
 
   assert_ops = _CheckAtLeast3DImage(image, require_static=False)
+
   batch, height, width, depth = _ImageDimensions(image, rank=4)
 
   assert_ops += _assert(offset_width >= 0, ValueError,
@@ -651,7 +599,17 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
     `[new_height, new_width, channels]`.
   """
   image = ops.convert_to_tensor(image, name='image')
-  image, is_batch = _EnsureTensorIs4D(image)
+  image_shape = image.get_shape()
+  is_batch = True
+  if image_shape.ndims == 3:
+    is_batch = False
+    image = array_ops.expand_dims(image, 0)
+  elif image_shape.ndims is None:
+    is_batch = False
+    image = array_ops.expand_dims(image, 0)
+    image.set_shape([None] * 4)
+  elif image_shape.ndims != 4:
+    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
 
   assert_ops = _CheckAtLeast3DImage(image, require_static=False)
   assert_ops += _assert(target_width > 0, ValueError,
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index f320b52b09..4fc18ed7ec 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -759,7 +759,7 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase):
 
 class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
 
-  def testInvolutionLeftRight(self):
+  def testIdempotentLeftRight(self):
     x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
@@ -767,15 +767,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, x_np)
 
-  def testInvolutionLeftRightWithBatch(self):
-    x_np = np.array([[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-    with self.test_session(use_gpu=True):
-      x_tf = constant_op.constant(x_np, shape=x_np.shape)
-      y = image_ops.flip_left_right(image_ops.flip_left_right(x_tf))
-      y_tf = y.eval()
-      self.assertAllEqual(y_tf, x_np)
-
   def testLeftRight(self):
     x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[3, 2, 1], [3, 2, 1]], dtype=np.uint8).reshape([2, 3, 1])
@@ -786,30 +777,17 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
-  def testLeftRightWithBatch(self):
-    x_np = np.array([[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-    y_np = np.array([[[3, 2, 1], [3, 2, 1]], [[3, 2, 1], [3, 2, 1]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-
-    with self.test_session(use_gpu=True):
-      x_tf = constant_op.constant(x_np, shape=x_np.shape)
-      y = image_ops.flip_left_right(x_tf)
-      y_tf = y.eval()
-      self.assertAllEqual(y_tf, y_np)
-
   def testRandomFlipLeftRight(self):
     x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[3, 2, 1], [3, 2, 1]], dtype=np.uint8).reshape([2, 3, 1])
-    seed = 42
 
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
-      y = image_ops.random_flip_left_right(x_tf, seed=seed)
+      y = image_ops.random_flip_left_right(x_tf)
 
       count_flipped = 0
       count_unflipped = 0
-      for _ in range(100):
+      for _ in range(50):
         y_tf = y.eval()
         if y_tf[0][0] == 1:
           self.assertAllEqual(y_tf, x_np)
@@ -817,46 +795,10 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
         else:
           self.assertAllEqual(y_tf, y_np)
           count_flipped += 1
-      # 100 trials
-      # Mean: 50
-      # Std Dev: ~5
-      # Six Sigma: 50 - (5 * 6) = 20
-      self.assertGreaterEqual(count_flipped, 20)
-      self.assertGreaterEqual(count_unflipped, 20)
-
-  def testRandomFlipLeftRightWithBatch(self):
-    x_np = np.array([[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-    y_np = np.array([[[3, 2, 1], [3, 2, 1]], [[3, 2, 1], [3, 2, 1]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-    seed = 42
+      self.assertGreaterEqual(count_flipped, 1)
+      self.assertGreaterEqual(count_unflipped, 1)
 
-    with self.test_session(use_gpu=True):
-      x_tf = constant_op.constant(x_np, shape=x_np.shape).eval()
-      y = image_ops.random_flip_left_right(x_tf, seed=seed)
-      count_flipped = 0
-      count_unflipped = 0
-      for _ in range(50):
-        y_tf = y.eval()
-        for index in range(0, x_tf.shape[0]):
-          current_x_tf = x_tf[index]
-          current_y_tf = y_tf[index]
-          current_y_np = y_np[index]
-
-          if current_y_tf[0][0] == 1:
-            self.assertAllEqual(current_y_tf, current_x_tf)
-            count_unflipped += 1
-          else:
-            self.assertAllEqual(current_y_tf, current_y_np)
-            count_flipped += 1
-      # Batch size 2 * 50 trials = 100
-      # Mean: 50
-      # Std Dev: ~5
-      # Six Sigma: 50 - (5 * 6) = 20
-      self.assertGreaterEqual(count_flipped, 20)
-      self.assertGreaterEqual(count_unflipped, 20)
-
-  def testInvolutionUpDown(self):
+  def testIdempotentUpDown(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
 
     with self.test_session(use_gpu=True):
@@ -865,16 +807,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, x_np)
 
-  def testInvolutionUpDownWithBatch(self):
-    x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-
-    with self.test_session(use_gpu=True):
-      x_tf = constant_op.constant(x_np, shape=x_np.shape)
-      y = image_ops.flip_up_down(image_ops.flip_up_down(x_tf))
-      y_tf = y.eval()
-      self.assertAllEqual(y_tf, x_np)
-
   def testUpDown(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
@@ -885,29 +817,16 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
-  def testUpDownWithBatch(self):
-    x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-    y_np = np.array([[[4, 5, 6], [1, 2, 3]], [[10, 11, 12], [7, 8, 9]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-
-    with self.test_session(use_gpu=True):
-      x_tf = constant_op.constant(x_np, shape=x_np.shape)
-      y = image_ops.flip_up_down(x_tf)
-      y_tf = y.eval()
-      self.assertAllEqual(y_tf, y_np)
-
   def testRandomFlipUpDown(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
-    seed = 42
 
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
-      y = image_ops.random_flip_up_down(x_tf, seed=42)
+      y = image_ops.random_flip_up_down(x_tf)
       count_flipped = 0
       count_unflipped = 0
-      for _ in range(100):
+      for _ in range(50):
         y_tf = y.eval()
         if y_tf[0][0] == 1:
           self.assertAllEqual(y_tf, x_np)
@@ -915,45 +834,10 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
         else:
           self.assertAllEqual(y_tf, y_np)
           count_flipped += 1
-      # 100 trials
-      # Mean: 50
-      # Std Dev: ~5
-      # Six Sigma: 50 - (5 * 6) = 20
-      self.assertGreaterEqual(count_flipped, 20)
-      self.assertGreaterEqual(count_unflipped, 20)
-
-  def testRandomFlipUpDownWithBatch(self):
-    x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-    y_np = np.array([[[4, 5, 6], [1, 2, 3]], [[4, 5, 6], [1, 2, 3]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-    seed = 42
+      self.assertGreaterEqual(count_flipped, 1)
+      self.assertGreaterEqual(count_unflipped, 1)
 
-    with self.test_session(use_gpu=True):
-      x_tf = constant_op.constant(x_np, shape=x_np.shape).eval()
-      y = image_ops.random_flip_up_down(x_tf, seed=42)
-      count_flipped = 0
-      count_unflipped = 0
-      for _ in range(50):
-        y_tf = y.eval()
-        for index in range(0, x_tf.shape[0]):
-          current_x_tf = x_tf[index]
-          current_y_tf = y_tf[index]
-          current_y_np = y_np[index]
-        if current_y_tf[0][0] == 1:
-          self.assertAllEqual(current_y_tf, current_x_tf)
-          count_unflipped += 1
-        else:
-          self.assertAllEqual(current_y_tf, current_y_np)
-          count_flipped += 1
-      # Batch size 2 * 50 trials = 100
-      # Mean: 50
-      # Std Dev: ~5
-      # Six Sigma: 50 - (5 * 6) = 20
-      self.assertGreaterEqual(count_flipped, 20)
-      self.assertGreaterEqual(count_unflipped, 20)
-
-  def testInvolutionTranspose(self):
+  def testIdempotentTranspose(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
 
     with self.test_session(use_gpu=True):
@@ -962,16 +846,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, x_np)
 
-  def testInvolutionTransposeWithBatch(self):
-    x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-
-    with self.test_session(use_gpu=True):
-      x_tf = constant_op.constant(x_np, shape=x_np.shape)
-      y = image_ops.transpose_image(image_ops.transpose_image(x_tf))
-      y_tf = y.eval()
-      self.assertAllEqual(y_tf, x_np)
-
   def testTranspose(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
     y_np = np.array([[1, 4], [2, 5], [3, 6]], dtype=np.uint8).reshape([3, 2, 1])
@@ -982,28 +856,11 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
-  def testTransposeWithBatch(self):
-    x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
-                    dtype=np.uint8).reshape([2, 2, 3, 1])
-
-    y_np = np.array([[[1, 4], [2, 5], [3, 6]], [[7, 10], [8, 11], [9, 12]]],
-                    dtype=np.uint8).reshape([2, 3, 2, 1])
-
-    with self.test_session(use_gpu=True):
-      x_tf = constant_op.constant(x_np, shape=x_np.shape)
-      y = image_ops.transpose_image(x_tf)
-      y_tf = y.eval()
-      self.assertAllEqual(y_tf, y_np)
-
   def testPartialShapes(self):
     p_unknown_rank = array_ops.placeholder(dtypes.uint8)
-    p_unknown_dims_3 = array_ops.placeholder(
+    p_unknown_dims = array_ops.placeholder(
         dtypes.uint8, shape=[None, None, None])
-    p_unknown_dims_4 = array_ops.placeholder(
-        dtypes.uint8, shape=[None, None, None, None])
     p_unknown_width = array_ops.placeholder(dtypes.uint8, shape=[64, None, 3])
-    p_unknown_batch = array_ops.placeholder(dtypes.uint8,
-                                            shape=[None, 64, 64, 3])
 
     p_wrong_rank = array_ops.placeholder(dtypes.uint8, shape=[None, None])
     p_zero_dim = array_ops.placeholder(dtypes.uint8, shape=[64, 0, 3])
@@ -1015,17 +872,12 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
     ]:
       transformed_unknown_rank = op(p_unknown_rank)
       self.assertEqual(3, transformed_unknown_rank.get_shape().ndims)
-      transformed_unknown_dims_3 = op(p_unknown_dims_3)
-      self.assertEqual(3, transformed_unknown_dims_3.get_shape().ndims)
-      transformed_unknown_dims_4 = op(p_unknown_dims_4)
-      self.assertEqual(4, transformed_unknown_dims_4.get_shape().ndims)
+      transformed_unknown_dims = op(p_unknown_dims)
+      self.assertEqual(3, transformed_unknown_dims.get_shape().ndims)
       transformed_unknown_width = op(p_unknown_width)
       self.assertEqual(3, transformed_unknown_width.get_shape().ndims)
-      transformed_unknown_batch = op(p_unknown_batch)
-      self.assertEqual(4, transformed_unknown_batch.get_shape().ndims)
 
-      with self.assertRaisesRegexp(ValueError,
-                                   "must have either 3 or 4 dimensions."):
+      with self.assertRaisesRegexp(ValueError, "must be three-dimensional"):
         op(p_wrong_rank)
       with self.assertRaisesRegexp(ValueError, "must be > 0"):
         op(p_zero_dim)
@@ -1038,14 +890,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
         rotated = image_ops.rot90(rotated)
       self.assertAllEqual(image, rotated.eval())
 
-  def testRot90GroupOrderWithBatch(self):
-    image = np.arange(48, dtype=np.uint8).reshape([2, 2, 4, 3])
-    with self.test_session(use_gpu=True):
-      rotated = image
-      for _ in xrange(4):
-        rotated = image_ops.rot90(rotated)
-      self.assertAllEqual(image, rotated.eval())
-
   def testRot90NumpyEquivalence(self):
     image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3])
     with self.test_session(use_gpu=True):
@@ -1055,14 +899,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
         y_np = np.rot90(image, k=k)
         self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k}))
 
-  def testRot90NumpyEquivalenceWithBatch(self):
-    image = np.arange(48, dtype=np.uint8).reshape([2, 2, 4, 3])
-    with self.test_session(use_gpu=True):
-      k_placeholder = array_ops.placeholder(dtypes.int32, shape=[])
-      y_tf = image_ops.rot90(image, k_placeholder)
-      for k in xrange(4):
-        y_np = np.rot90(image, k=k, axes=(1, 2))
-        self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k}))
 
 class RandomFlipTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 73658420db2498ad7f07363bfa72cba6e2d9fdd2 Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <vomjom@vomjom.net>
Date: Thu, 14 Dec 2017 11:33:34 -0800
Subject: [PATCH 1011/1225] Benchmarks for flipping and random flipping
 (#15348)

* Random flip benchmark

* More benchmarks
---
 tensorflow/python/ops/image_ops_test.py | 83 +++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 4fc18ed7ec..4af9bd2a00 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -374,6 +374,89 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
       self._adjustHueTf(x_np, delta_h)
 
 
+class FlipImageBenchmark(test.Benchmark):
+
+  def _benchmarkFlipLeftRight(self, device, cpu_count):
+    image_shape = [299, 299, 3]
+    warmup_rounds = 100
+    benchmark_rounds = 1000
+    config = config_pb2.ConfigProto()
+    if cpu_count is not None:
+      config.inter_op_parallelism_threads = 1
+      config.intra_op_parallelism_threads = cpu_count
+    with session.Session("", graph=ops.Graph(), config=config) as sess:
+      with ops.device(device):
+        inputs = variables.Variable(
+            random_ops.random_uniform(
+                image_shape, dtype=dtypes.float32) * 255,
+            trainable=False,
+            dtype=dtypes.float32)
+        run_op = image_ops.flip_left_right(inputs)
+        sess.run(variables.global_variables_initializer())
+        for i in xrange(warmup_rounds + benchmark_rounds):
+          if i == warmup_rounds:
+            start = time.time()
+          sess.run(run_op)
+    end = time.time()
+    step_time = (end - start) / benchmark_rounds
+    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+    print("benchmarkFlipLeftRight_299_299_3_%s step_time: %.2f us" %
+          (tag, step_time * 1e6))
+    self.report_benchmark(
+        name="benchmarkFlipLeftRight_299_299_3_%s" % (tag),
+        iters=benchmark_rounds,
+        wall_time=step_time)
+
+  def _benchmarkRandomFlipLeftRight(self, device, cpu_count):
+    image_shape = [299, 299, 3]
+    warmup_rounds = 100
+    benchmark_rounds = 1000
+    config = config_pb2.ConfigProto()
+    if cpu_count is not None:
+      config.inter_op_parallelism_threads = 1
+      config.intra_op_parallelism_threads = cpu_count
+    with session.Session("", graph=ops.Graph(), config=config) as sess:
+      with ops.device(device):
+        inputs = variables.Variable(
+            random_ops.random_uniform(
+                image_shape, dtype=dtypes.float32) * 255,
+            trainable=False,
+            dtype=dtypes.float32)
+        run_op = image_ops.random_flip_left_right(inputs)
+        sess.run(variables.global_variables_initializer())
+        for i in xrange(warmup_rounds + benchmark_rounds):
+          if i == warmup_rounds:
+            start = time.time()
+          sess.run(run_op)
+    end = time.time()
+    step_time = (end - start) / benchmark_rounds
+    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+    print("benchmarkRandomFlipLeftRight_299_299_3_%s step_time: %.2f us" %
+          (tag, step_time * 1e6))
+    self.report_benchmark(
+        name="benchmarkRandomFlipLeftRight_299_299_3_%s" % (tag),
+        iters=benchmark_rounds,
+        wall_time=step_time)
+
+  def benchmarkFlipLeftRightCpu1(self):
+    self._benchmarkFlipLeftRight("/cpu:0", 1)
+
+  def benchmarkFlipLeftRightCpuAll(self):
+    self._benchmarkFlipLeftRight("/cpu:0", None)
+
+  def benchmarkFlipLeftRightGpu(self):
+    self._benchmarkFlipLeftRight(test.gpu_device_name(), None)
+
+  def benchmarkRandomFlipLeftRightCpu1(self):
+    self._benchmarkRandomFlipLeftRight("/cpu:0", 1)
+
+  def benchmarkRandomFlipLeftRightCpuAll(self):
+    self._benchmarkRandomFlipLeftRight("/cpu:0", None)
+
+  def benchmarkRandomFlipLeftRightGpu(self):
+    self._benchmarkRandomFlipLeftRight(test.gpu_device_name(), None)
+
+
 class AdjustHueBenchmark(test.Benchmark):
 
   def _benchmarkAdjustHue(self, device, cpu_count):
-- 
GitLab


From 4c86ece040cb96ea689f5c0d084b6959274eab91 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Thu, 14 Dec 2017 11:59:23 -0800
Subject: [PATCH 1012/1225] Add auto-conversion to float32 for keras estimator.
 Keras always expects float32 I/O.

PiperOrigin-RevId: 179074913
---
 .../python/keras/_impl/keras/estimator.py     | 14 +++-
 .../keras/_impl/keras/estimator_test.py       | 73 +++----------------
 2 files changed, 24 insertions(+), 63 deletions(-)

diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py
index 4370341ad1..624e92a04b 100644
--- a/tensorflow/python/keras/_impl/keras/estimator.py
+++ b/tensorflow/python/keras/_impl/keras/estimator.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import models
 from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics as metrics_module
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.saved_model import signature_constants
@@ -40,6 +41,14 @@ from tensorflow.python.training import training_util
 _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
 
 
+def _cast_tensor_to_floatx(x):
+  """Cast tensor to keras's floatx dtype if it is not already the same dtype."""
+  if x.dtype == K.floatx():
+    return x
+  else:
+    return math_ops.cast(x, K.floatx())
+
+
 def _create_ordered_io(keras_model, estimator_io_dict, is_input=True):
   """Create a list of tensors from IO dictionary based on Keras IO order.
 
@@ -68,7 +77,7 @@ def _create_ordered_io(keras_model, estimator_io_dict, is_input=True):
                                         ', '.join(keras_io_names)))
   tensors = []
   for io_name in keras_io_names:
-    tensors.append(estimator_io_dict[io_name])
+    tensors.append(_cast_tensor_to_floatx(estimator_io_dict[io_name]))
   return tensors
 
 
@@ -116,7 +125,8 @@ def _clone_and_build_model(mode,
       target_tensors = _create_ordered_io(keras_model, labels, is_input=False)
     else:
       target_tensors = [
-          sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels)
+          _cast_tensor_to_floatx(
+              sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels))
       ]
 
     model.compile(
diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py
index a7ea3b48a3..9fc48b4117 100644
--- a/tensorflow/python/keras/_impl/keras/estimator_test.py
+++ b/tensorflow/python/keras/_impl/keras/estimator_test.py
@@ -25,8 +25,6 @@ import numpy as np
 
 from tensorflow.python.estimator import run_config as run_config_lib
 from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras._impl import keras
 from tensorflow.python.keras._impl.keras import testing_utils
@@ -80,22 +78,17 @@ def get_resource_for_simple_model(is_sequential, is_evaluate):
   y_test = keras.utils.to_categorical(y_test)
 
   train_input_fn = numpy_io.numpy_input_fn(
-      x={input_name: np.array(x_train, dtype=np.float32)},
-      y=np.array(y_train, dtype=np.float32),
+      x={input_name: x_train},
+      y=y_train,
       shuffle=False,
       num_epochs=None,
       batch_size=16)
 
   evaluate_input_fn = numpy_io.numpy_input_fn(
-      x={input_name: np.array(x_test, dtype=np.float32)},
-      y=np.array(y_test, dtype=np.float32),
-      num_epochs=1,
-      shuffle=False)
+      x={input_name: x_test}, y=y_test, num_epochs=1, shuffle=False)
 
   predict_input_fn = numpy_io.numpy_input_fn(
-      x={input_name: np.array(x_test, dtype=np.float32)},
-      num_epochs=1,
-      shuffle=False)
+      x={input_name: x_test}, num_epochs=1, shuffle=False)
 
   inference_input_fn = evaluate_input_fn if is_evaluate else predict_input_fn
 
@@ -243,41 +236,13 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     d_test = keras.utils.to_categorical(d_test)
 
     def train_input_fn():
-      input_dict = {
-          'input_a':
-              ops.convert_to_tensor(
-                  np.array(a_train, dtype=np.float32), dtype=dtypes.float32),
-          'input_b':
-              ops.convert_to_tensor(
-                  np.array(b_train, dtype=np.float32), dtype=dtypes.float32)
-      }
-      output_dict = {
-          'dense_2':
-              ops.convert_to_tensor(
-                  np.array(c_train, dtype=np.float32), dtype=dtypes.float32),
-          'dense_3':
-              ops.convert_to_tensor(
-                  np.array(d_train, dtype=np.float32), dtype=dtypes.float32)
-      }
+      input_dict = {'input_a': a_train, 'input_b': b_train}
+      output_dict = {'dense_2': c_train, 'dense_3': d_train}
       return input_dict, output_dict
 
     def eval_input_fn():
-      input_dict = {
-          'input_a':
-              ops.convert_to_tensor(
-                  np.array(a_test, dtype=np.float32), dtype=dtypes.float32),
-          'input_b':
-              ops.convert_to_tensor(
-                  np.array(b_test, dtype=np.float32), dtype=dtypes.float32)
-      }
-      output_dict = {
-          'dense_2':
-              ops.convert_to_tensor(
-                  np.array(c_test, dtype=np.float32), dtype=dtypes.float32),
-          'dense_3':
-              ops.convert_to_tensor(
-                  np.array(d_test, dtype=np.float32), dtype=dtypes.float32)
-      }
+      input_dict = {'input_a': a_test, 'input_b': b_test}
+      output_dict = {'dense_2': c_test, 'dense_3': d_test}
       return input_dict, output_dict
 
     with self.test_session():
@@ -347,26 +312,12 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     y_train = keras.utils.to_categorical(y_train)
 
     def invald_input_name_input_fn():
-      input_dict = {
-          'invalid_input_name':
-              ops.convert_to_tensor(
-                  np.array(x_train, dtype=np.float32), dtype=dtypes.float32),
-      }
-      output = ops.convert_to_tensor(
-          np.array(y_train, dtype=np.float32), dtype=dtypes.float32)
-      return input_dict, output
+      input_dict = {'invalid_input_name': x_train}
+      return input_dict, y_train
 
     def invald_output_name_input_fn():
-      input_dict = {
-          'input_1':
-              ops.convert_to_tensor(
-                  np.array(x_train, dtype=np.float32), dtype=dtypes.float32),
-      }
-      output_dict = {
-          'invalid_output_name':
-              ops.convert_to_tensor(
-                  np.array(y_train, dtype=np.float32), dtype=dtypes.float32),
-      }
+      input_dict = {'input_1': x_train}
+      output_dict = {'invalid_output_name': y_train}
       return input_dict, output_dict
 
     model = simple_functional_model()
-- 
GitLab


From a99b32fb149d028cd31fe638f81c6ca56c6e3b57 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 12:31:29 -0800
Subject: [PATCH 1013/1225] [XLA] Gather the bool parameters into one thing to
 control the text format.

PiperOrigin-RevId: 179079727
---
 .../compiler/xla/service/hlo_computation.cc   | 15 ++---
 .../compiler/xla/service/hlo_computation.h    |  3 +-
 .../xla/service/hlo_execution_profile.cc      |  4 +-
 .../compiler/xla/service/hlo_graph_dumper.cc  |  3 +-
 .../compiler/xla/service/hlo_instruction.cc   | 18 +++---
 .../compiler/xla/service/hlo_instruction.h    | 55 +++++++++++++++++--
 .../xla/service/hlo_instruction_test.cc       | 10 ++--
 tensorflow/compiler/xla/service/hlo_module.cc |  7 +--
 tensorflow/compiler/xla/service/hlo_module.h  |  2 +-
 .../compiler/xla/service/hlo_module_test.cc   |  5 +-
 .../compiler/xla/service/reshape_mover.cc     | 32 ++++++-----
 .../xla/service/while_loop_simplifier.cc      | 24 ++++----
 .../xla/tools/parser/hlo_parser_test.cc       |  4 +-
 13 files changed, 114 insertions(+), 68 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 014a851c96..2fac52d853 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -364,26 +364,21 @@ std::list<HloComputation*> HloComputation::MakeEmbeddedComputationsList()
   return post_order;
 }
 
-string HloComputation::ToString(int nested_level,
-                                bool include_large_constants) const {
+string HloComputation::ToString(const HloPrintOptions& options) const {
   std::ostringstream s;
-  for (int i = 0; i < nested_level; i++) {
+  for (int i = 0; i < options.indent_amount(); i++) {
     s << "    ";
   }
   s << "%" << name() << " " << ShapeUtil::HumanString(ComputeProgramShape())
     << " {\n";
   for (const HloInstruction* instruction : MakeInstructionPostOrder()) {
-    for (int i = 0; i < nested_level; i++) {
+    for (int i = 0; i < options.indent_amount(); i++) {
       s << "    ";
     }
     s << "  " << (instruction == root_instruction_ ? "ROOT " : "")
-      << instruction->ToString(
-             /*compact_operands=*/false,
-             /*include_metadata=*/true,
-             /*include_large_constants=*/include_large_constants)
-      << "\n";
+      << instruction->ToString(options) << "\n";
   }
-  for (int i = 0; i < nested_level; i++) {
+  for (int i = 0; i < options.indent_amount(); i++) {
     s << "    ";
   }
   s << "}";
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index ccedda2a03..e87f240540 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -138,8 +138,7 @@ class HloComputation {
   void UniquifyName(NameUniquer* name_uniquer);
 
   // Return a string representation of the computation.
-  string ToString(int nested_level = 0,
-                  bool include_large_constants = false) const;
+  string ToString(const HloPrintOptions& options = HloPrintOptions()) const;
 
   // Returns a serialized representation of this computation.
   HloComputationProto ToProto() const;
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index 0809fe780d..0111cfd5a3 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -76,8 +76,8 @@ std::unique_ptr<HloProfilePrinter> CreateHloProfilePrinter(
       HloProfilePrinter::HloInstructionInfo* instruction_info =
           &computation_info->instructions[instruction_index_in_static_data++];
       instruction_info->long_name = strdup(hlo->ToString().c_str());
-      instruction_info->short_name =
-          strdup(hlo->ToString(/*compact_operands=*/true).c_str());
+      instruction_info->short_name = strdup(
+          hlo->ToString(HloPrintOptions().set_compact_operands(true)).c_str());
       instruction_info->category = strdup(hlo->ToCategory().c_str());
       instruction_info->flop_count = cost_analysis.flop_count(*hlo);
       instruction_info->transcendental_count =
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 84187d5783..142e2066c8 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1438,7 +1438,8 @@ void DumpText(const HloModule& module, const string& label,
       do_prefix ? StrCat(prefix, "-", label, ".txt") : StrCat(label, ".txt");
   string path = JoinPath(directory_path, filename);
   TF_CHECK_OK(WriteStringToFile(
-      env, path, module.ToString(/*include_large_constants=*/true)));
+      env, path,
+      module.ToString(HloPrintOptions().set_print_large_constants(true))));
   LOG(INFO) << "dumping module '" << module.name() << "' to " << path;
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 220d5044a2..9d377198d5 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -1915,16 +1915,14 @@ string HloInstruction::SignatureString() const {
   return StrCat("(", operands, ") -> ", ShapeUtil::HumanString(shape()));
 }
 
-string HloInstruction::ToString(bool compact_operands, bool include_metadata,
-                                bool include_large_constants) const {
+string HloInstruction::ToString(const HloPrintOptions& options) const {
   string result =
       StrCat("%", name(), " = ", ShapeUtil::HumanStringWithLayout(shape()), " ",
-             HloOpcodeString(opcode()), "(",
-             OperandsToString(compact_operands, include_large_constants), ")");
+             HloOpcodeString(opcode()), "(", OperandsToString(options), ")");
   for (const string& extra : ExtraAttributesToString()) {
     StrAppend(&result, ", ", extra);
   }
-  if (include_metadata &&
+  if (options.print_metadata() &&
       (!metadata_.op_type().empty() || !metadata_.op_name().empty() ||
        !metadata_.source_file().empty())) {
     StrAppend(&result, ", metadata={", xla::OpMetadataToString(metadata_), "}");
@@ -1932,14 +1930,13 @@ string HloInstruction::ToString(bool compact_operands, bool include_metadata,
   return result;
 }
 
-string HloInstruction::OperandsToString(bool compact,
-                                        bool include_large_constants) const {
+string HloInstruction::OperandsToString(const HloPrintOptions& options) const {
   string operands;
   if (opcode() == HloOpcode::kConstant) {
     // For constants, show the actual value in place of an empty operand list.
     if ((!ShapeUtil::IsTuple(shape()) &&
          ShapeUtil::ElementsIn(shape()) <= 10) ||
-        include_large_constants) {
+        options.print_large_constants()) {
       // Literal::ToString emits multidimensional arrays over multiple
       // lines. Compact this into one line by stripping out white space.
       string tmp = literal().ToString();
@@ -1964,12 +1961,13 @@ string HloInstruction::OperandsToString(bool compact,
   } else {
     tensorflow::gtl::ArraySlice<HloInstruction*> slice(operands_);
     const int64 kMaxOperandsToShowIfCompact = 4;
-    if (compact && slice.size() > kMaxOperandsToShowIfCompact) {
+    if (options.compact_operands() &&
+        slice.size() > kMaxOperandsToShowIfCompact) {
       slice.remove_suffix(slice.size() - kMaxOperandsToShowIfCompact);
     }
     operands = Join(slice, ", ", [&](string* out, HloInstruction* operand) {
       *out += ShapeUtil::HumanStringWithLayout(operand->shape());
-      if (!compact) {
+      if (!options.compact_operands()) {
         StrAppend(out, " %", operand->name());
       }
     });
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 092105582e..094dbc5b2d 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -56,6 +56,54 @@ namespace xla {
 class HloComputation;
 class HloModule;
 
+// A bunch of switches that control how the hlo text should be printed.
+class HloPrintOptions {
+ public:
+  // Constructs the default print options: don't print large constants, print
+  // metadata, don't compact operands, and no indentation.
+  HloPrintOptions()
+      : print_large_constants_(false),
+        print_metadata_(true),
+        compact_operands_(false),
+        indent_amount_(0) {}
+
+  // If true, large constants will be printed out.
+  HloPrintOptions& set_print_large_constants(bool value) {
+    print_large_constants_ = value;
+    return *this;
+  }
+
+  // If true, metatdata will be printed.
+  HloPrintOptions& set_print_metadata(bool value) {
+    print_metadata_ = value;
+    return *this;
+  }
+
+  // If true, only a part of operands will be printed out, and their names will
+  // be omitted (note that in this case the text will not be parsable).
+  HloPrintOptions& set_compact_operands(bool value) {
+    compact_operands_ = value;
+    return *this;
+  }
+
+  // The indent of the hlo text block.
+  HloPrintOptions& set_indent_amount(int value) {
+    indent_amount_ = value;
+    return *this;
+  }
+
+  bool print_large_constants() const { return print_large_constants_; }
+  bool print_metadata() const { return print_metadata_; }
+  bool compact_operands() const { return compact_operands_; }
+  int indent_amount() const { return indent_amount_; }
+
+ private:
+  bool print_large_constants_;
+  bool print_metadata_;
+  bool compact_operands_;
+  int indent_amount_;
+};
+
 // HLO instructions are the IR used by the high-level compiler.
 class HloInstruction {
  public:
@@ -650,19 +698,16 @@ class HloInstruction {
   string SignatureString() const;
 
   // Returns a debugging string that represents this instruction.
-  string ToString(bool compact_operands = false, bool include_metadata = true,
-                  bool include_large_constants = false) const;
+  string ToString(const HloPrintOptions& options = HloPrintOptions()) const;
 
   // Components of the ToString() representation:
 
   // Returns a string representation of the operand list.
-  string OperandsToString(bool compact, bool include_large_constants) const;
+  string OperandsToString(const HloPrintOptions& options) const;
 
   // Returns string representation of op-specific attributes.
   std::vector<string> ExtraAttributesToString() const;
 
-  string ToStringNoMetadata() const { return ToString(false, false); }
-
   // As ToString, but returns a shorter string.
   string ToShortString() const;
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index 54788fa2da..043c751a5e 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -1149,7 +1149,9 @@ TEST_F(HloInstructionTest, Stringification) {
   HloInstruction* dot = builder.AddInstruction(
       HloInstruction::CreateDot(sout, x, reshape, dot_dnums));
 
-  EXPECT_EQ(dot->ToString(false, false),
+  auto options = HloPrintOptions().set_print_metadata(false);
+
+  EXPECT_EQ(dot->ToString(options),
             "%dot = f32[5,20]{1,0} dot(f32[5,10]{1,0} %x, f32[10,20]{1,0} "
             "%transpose), lhs_contracting_dims=1,rhs_contracting_dims=0");
 
@@ -1159,13 +1161,13 @@ TEST_F(HloInstructionTest, Stringification) {
       {dot, reshape}, HloInstruction::FusionKind::kTransposeDot);
 
   EXPECT_EQ(
-      fusion->ToString(false, false),
+      fusion->ToString(options),
       "%fusion = f32[5,20]{1,0} fusion(f32[5,10]{1,0} %x, "
       "f32[20,10]{1,0} %y), kind=kTransposeDot, calls=%fused_computation");
 
   HloInstruction* loop = builder.AddInstruction(
       HloInstruction::CreateWhile(sout, computation, computation, x));
-  EXPECT_EQ(loop->ToString(false, false),
+  EXPECT_EQ(loop->ToString(options),
             "%while = f32[5,20]{1,0} while(f32[5,10]{1,0} %x), "
             "condition=%TransposeDot, body=%TransposeDot");
 
@@ -1174,7 +1176,7 @@ TEST_F(HloInstructionTest, Stringification) {
   HloInstruction* conditional =
       builder.AddInstruction(HloInstruction::CreateConditional(
           sout, pred, x, computation, x, computation));
-  EXPECT_EQ(conditional->ToString(false, false),
+  EXPECT_EQ(conditional->ToString(options),
             "%conditional = f32[5,20]{1,0} conditional(pred[] %constant, "
             "f32[5,10]{1,0} %x, f32[5,10]{1,0} %x), "
             "true_computation=%TransposeDot, false_computation=%TransposeDot");
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 6fe2134466..1f7d8ed991 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -171,17 +171,14 @@ void HloModule::ReplaceComputations(
   computations_ = std::move(new_computations);
 }
 
-string HloModule::ToString(bool include_large_constants) const {
+string HloModule::ToString(const HloPrintOptions& options) const {
   std::ostringstream s;
   s << "HloModule " << name() << ":\n\n";
   for (const HloComputation* computation : MakeComputationPostOrder()) {
     if (computation == entry_computation()) {
       s << "ENTRY ";
     }
-    s << computation->ToString(
-             /*nested_level=*/0,
-             /*include_large_constants=*/include_large_constants)
-      << "\n\n";
+    s << computation->ToString(options) << "\n\n";
   }
   return s.str();
 }
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 5141e7bc8d..ea2d3771c6 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -143,7 +143,7 @@ class HloModule {
 
   const HloModuleConfig& config() const { return config_; }
 
-  string ToString(bool include_large_constants = false) const;
+  string ToString(const HloPrintOptions& options = HloPrintOptions()) const;
 
   // Convert an HloModule to or from a proto.
   HloModuleProto ToProto() const;
diff --git a/tensorflow/compiler/xla/service/hlo_module_test.cc b/tensorflow/compiler/xla/service/hlo_module_test.cc
index bf6440d66c..de1eb1f094 100644
--- a/tensorflow/compiler/xla/service/hlo_module_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_test.cc
@@ -137,12 +137,13 @@ TEST_F(HloModuleTest, LargeConstantToString) {
   EXPECT_EQ(
       "HloModule LargeConstantToString:\n\nENTRY %Constant () -> f32[16] {\n  "
       "ROOT %constant = f32[16]{0} constant({...})\n}\n\n",
-      module->ToString(/*include_large_constants=*/false));
+      module->ToString(HloPrintOptions().set_print_large_constants(false)));
+
   EXPECT_EQ(
       "HloModule LargeConstantToString:\n\nENTRY %Constant () -> f32[16] {\n  "
       "ROOT %constant = f32[16]{0} constant({42, 42, 42, 42, 42, 42, 42, 42, "
       "42, 42, 42, 42, 42, 42, 42, 42})\n}\n\n",
-      module->ToString(/*include_large_constants=*/true));
+      module->ToString(HloPrintOptions().set_print_large_constants(true)));
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/service/reshape_mover.cc b/tensorflow/compiler/xla/service/reshape_mover.cc
index 0fb90230f2..e62bafc50b 100644
--- a/tensorflow/compiler/xla/service/reshape_mover.cc
+++ b/tensorflow/compiler/xla/service/reshape_mover.cc
@@ -101,8 +101,9 @@ HloInstruction* FirstNonScalarAndNonTrivialReshapeOperand(
         IsReshapeOrTranspose(operand) &&
         !CanTriviallyChangeShape(operand->operand(0))) {
       VLOG(5) << "Found first non-scalar and non-trivial reshape operand of "
-              << hlo->ToStringNoMetadata() << ":\n\t"
-              << operand->ToStringNoMetadata();
+              << hlo->ToString(HloPrintOptions().set_print_metadata(false))
+              << ":\n\t"
+              << operand->ToString(HloPrintOptions().set_print_metadata(false));
       return operand;
     }
   }
@@ -133,8 +134,9 @@ bool AreEquivalentReshapes(const HloInstruction* a, const HloInstruction* b) {
 bool AllOperandsHaveEasyShapeChanges(
     const HloInstruction* instruction,
     const HloInstruction* first_reshape_operand) {
+  auto print_no_metadata = HloPrintOptions().set_print_metadata(false);
   VLOG(3) << "** Checking whether all operands have easy shape changes: "
-          << instruction->ToStringNoMetadata();
+          << instruction->ToString(print_no_metadata);
   // Check whether all operands:
   //    0. Have the same dimensions as the output -- if not, it may be
   //       implicitly broadcast, which can confound the movement's
@@ -151,21 +153,21 @@ bool AllOperandsHaveEasyShapeChanges(
       VLOG(5) << "Operand shape differs from output shape; may be "
                  "implicitly broadcast, so preventing "
                  "movement\n\toperand: "
-              << operand->ToStringNoMetadata()
-              << "\n\tinstruction: " << instruction->ToStringNoMetadata();
+              << operand->ToString(print_no_metadata) << "\n\tinstruction: "
+              << instruction->ToString(print_no_metadata);
       return false;
     }
 
     if (AreEquivalentReshapes(first_reshape_operand, operand)) {
       VLOG(5) << "Are equivalent reshapes:\n\tfirst_reshape_operand: "
-              << first_reshape_operand->ToStringNoMetadata()
-              << "\n\toperand: " << operand->ToStringNoMetadata();
+              << first_reshape_operand->ToString(print_no_metadata)
+              << "\n\toperand: " << operand->ToString(print_no_metadata);
       continue;
     }
 
     if (CanTriviallyChangeShape(operand)) {
       VLOG(5) << "Operand can trivially change shape: "
-              << operand->ToStringNoMetadata();
+              << operand->ToString(print_no_metadata);
       continue;
     }
 
@@ -173,12 +175,12 @@ bool AllOperandsHaveEasyShapeChanges(
     // well.
     VLOG(5) << "Operand is neither equalivant to the first Reshape operand"
                "nor can trivially change shape: "
-            << operand->ToStringNoMetadata();
+            << operand->ToString(print_no_metadata);
     return false;
   }
 
   VLOG(3) << "All operands have easy shape changes: "
-          << instruction->ToStringNoMetadata();
+          << instruction->ToString(print_no_metadata);
   return true;
 }
 
@@ -250,11 +252,13 @@ StatusOr<bool> TrySinkReshapeOrTranspose(HloComputation* computation,
     return false;
   }
 
+  auto print_no_metadata = HloPrintOptions().set_print_metadata(false);
   // At this point we've decided to sink reshape/transpose operands.
   const Shape& new_operand_shape = first_reshape_operand->operand(0)->shape();
   VLOG(3) << "** Sinking reshape or transpose: "
-          << instruction->ToStringNoMetadata() << "\n\tfirst reshape operand: "
-          << first_reshape_operand->ToStringNoMetadata()
+          << instruction->ToString(print_no_metadata)
+          << "\n\tfirst reshape operand: "
+          << first_reshape_operand->ToString(print_no_metadata)
           << "\n\tnew operand shape: "
           << ShapeUtil::HumanString(new_operand_shape);
 
@@ -267,7 +271,7 @@ StatusOr<bool> TrySinkReshapeOrTranspose(HloComputation* computation,
       continue;
     }
     VLOG(3) << "Updating operand #" << i << ": "
-            << operands[i]->ToStringNoMetadata();
+            << operands[i]->ToString(print_no_metadata);
     operands[i] = UpdateOperand(computation, first_reshape_operand,
                                 new_operand_shape, operands[i]);
   }
@@ -298,7 +302,7 @@ StatusOr<bool> TrySinkReshapeOrTranspose(HloComputation* computation,
   switch (first_reshape_operand->opcode()) {
     case HloOpcode::kReshape:
       VLOG(3) << "Creating new reshape for new elementwise op: "
-              << new_elementwise->ToStringNoMetadata();
+              << new_elementwise->ToString(print_no_metadata);
       new_reshape =
           HloInstruction::CreateReshape(instruction->shape(), new_elementwise);
       break;
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index b2fd64a4d9..fb0e6f7ce0 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -306,6 +306,8 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
     return false;
   }
 
+  auto print_no_metadata = HloPrintOptions().set_print_metadata(false);
+
   // Bail if param0 of while_cond or while_body has users which aren't of type
   // get-tuple-element.
   for (const HloInstruction* instr : {while_body->parameter_instruction(0),
@@ -313,9 +315,10 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
     for (const HloInstruction* user : instr->users()) {
       if (user->opcode() != HloOpcode::kGetTupleElement) {
         VLOG(2) << "Cowardly refusing to analyze while loop with "
-                << instr->ToStringNoMetadata()
-                << " used by non-GTE instruction " << user->ToStringNoMetadata()
-                << " in computation " << instr->parent()->name();
+                << instr->ToString(print_no_metadata)
+                << " used by non-GTE instruction "
+                << user->ToString(print_no_metadata) << " in computation "
+                << instr->parent()->name();
         return false;
       }
     }
@@ -351,7 +354,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
 
       used_tuple_indices.insert(user->tuple_index());
       if (used_tuple_indices.size() == tuple_size) {
-        VLOG(2) << "Loop " << while_op->ToStringNoMetadata()
+        VLOG(2) << "Loop " << while_op->ToString(print_no_metadata)
                 << " uses all of its inputs; no simplification possible.";
         return false;
       }
@@ -375,7 +378,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
       used_tuple_indices.insert(i);
 
       if (used_tuple_indices.size() == tuple_size) {
-        VLOG(2) << "Loop " << while_op->ToStringNoMetadata()
+        VLOG(2) << "Loop " << while_op->ToString(print_no_metadata)
                 << " uses all of its inputs; no simplification possible.";
         return false;
       }
@@ -387,7 +390,8 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
   CHECK_LT(used_tuple_indices.size(), tuple_size);
 
   VLOG(1) << "Eliminating " << tuple_size - used_tuple_indices.size()
-          << " elements from tuple of " << while_op->ToStringNoMetadata();
+          << " elements from tuple of "
+          << while_op->ToString(print_no_metadata);
 
   // Build up maps from the old/new to the new/old tuple indices.
   std::vector<int64> new_to_old_tuple_idx(used_tuple_indices.begin(),
@@ -431,7 +435,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
         continue;
       }
       CHECK_EQ(user->opcode(), HloOpcode::kGetTupleElement)
-          << user->ToStringNoMetadata();
+          << user->ToString(print_no_metadata);
 
       int64 old_idx = user->tuple_index();
       auto new_idx_iter = old_to_new_tuple_idx.find(old_idx);
@@ -446,14 +450,14 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
         CHECK(user->user_count() == 0 ||
               user->user_count() == 1 &&
                   user->users().front() == while_body_root)
-            << "Instruction " << user->ToStringNoMetadata()
+            << "Instruction " << user->ToString(print_no_metadata)
             << " should be unused (except by root of while body), but has "
                "users: {"
             << tensorflow::str_util::Join(
                    user->users(), ", ",
-                   [](string* out, const HloInstruction* instr) {
+                   [&](string* out, const HloInstruction* instr) {
                      tensorflow::strings::StrAppend(
-                         out, instr->ToStringNoMetadata());
+                         out, instr->ToString(print_no_metadata));
                    })
             << "}";
 
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 61d8902855..189de59b6f 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -761,8 +761,8 @@ class HloParserTest : public ::testing::Test,
     const string& original = GetParam().module_string;
     auto result = Parse(original);
     TF_ASSERT_OK(result.status());
-    EXPECT_EQ(original,
-              result.ValueOrDie()->ToString(/*include_large_constants=*/true));
+    EXPECT_EQ(original, result.ValueOrDie()->ToString(
+                            HloPrintOptions().set_print_large_constants(true)));
   }
 };
 
-- 
GitLab


From a44c2f3281453c5210cd6d8336744b7264aa3e96 Mon Sep 17 00:00:00 2001
From: Felix Abecassis <felix.abecassis@gmail.com>
Date: Thu, 14 Dec 2017 12:38:56 -0800
Subject: [PATCH 1014/1225] Dockerfile.devel-gpu: optimize the size of the
 generated image (#15355)

- Use `nvidia/cuda:9.0-base-ubuntu16.04` as the base image to select
  just the CUDA libraries we need.
- Remove the installed static libraries.
- Remove the dependency on openjdk-8 since Bazel ships with a local copy.
- Perform a shallow clone of the repository.

The image is 2.94GB, down from 4.87GB.

Signed-off-by: Felix Abecassis <fabecassis@nvidia.com>
---
 tensorflow/tools/docker/Dockerfile.devel      |  5 +---
 tensorflow/tools/docker/Dockerfile.devel-gpu  | 23 +++++++++++--------
 .../docker/parameterized_docker_build.sh      |  2 +-
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 3525c7524f..0a6860e791 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -69,11 +69,8 @@ RUN mkdir /bazel && \
     rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Download and build TensorFlow.
-
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
-    cd tensorflow && \
-    git checkout r1.4
 WORKDIR /tensorflow
+RUN git clone --branch=r1.4 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # TODO(craigcitro): Don't install the pip package, since it makes it
 # more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 9f4cc74a66..4164cc3f88 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -1,11 +1,20 @@
-FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
+FROM nvidia/cuda:9.0-base-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
+        cuda-command-line-tools-9-0 \
+        cuda-cublas-dev-9-0 \
+        cuda-cudart-dev-9-0 \
+        cuda-cufft-dev-9-0 \
+        cuda-curand-dev-9-0 \
+        cuda-cusolver-dev-9-0 \
+        cuda-cusparse-dev-9-0 \
         curl \
         git \
+        libcudnn7=7.0.5.15-1+cuda9.0 \
+        libcudnn7-dev=7.0.5.15-1+cuda9.0 \
         libcurl3-dev \
         libfreetype6-dev \
         libpng12-dev \
@@ -17,12 +26,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         unzip \
         zip \
         zlib1g-dev \
-        openjdk-8-jdk \
-        openjdk-8-jre-headless \
         wget \
         && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+    rm -rf /var/lib/apt/lists/* && \
+    find /usr/local/cuda-9.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
 
 RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
     python get-pip.py && \
@@ -70,11 +78,8 @@ RUN mkdir /bazel && \
     rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Download and build TensorFlow.
-
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
-    cd tensorflow && \
-    git checkout r1.4
 WORKDIR /tensorflow
+RUN git clone --branch=r1.4 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh
index 80a07b9b3b..e7de7df856 100755
--- a/tensorflow/tools/docker/parameterized_docker_build.sh
+++ b/tensorflow/tools/docker/parameterized_docker_build.sh
@@ -265,7 +265,7 @@ else
   DOCKERFILE="${TMP_DIR}/Dockerfile"
 
   # Modify the devel Dockerfile to specify the git branch
-  sed -r "s/([\s]*git checkout )(.*)/\1${TF_DOCKER_BUILD_DEVEL_BRANCH}/g" \
+  sed "s/^RUN git clone --branch=.* --depth=1/RUN git clone --branch=${TF_DOCKER_BUILD_DEVEL_BRANCH} --depth=1/" \
       "${ORIG_DOCKERFILE}" > "${DOCKERFILE}"
 
   # Modify python/pip version if necessary.
-- 
GitLab


From ccef6a711dcadfc57b80783216ee025bfcae4b47 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 13:05:24 -0800
Subject: [PATCH 1015/1225] Add RNN performance information.

Update cudnn_rnn_ops_benchmark as it had API rotted.

PiperOrigin-RevId: 179084042
---
 tensorflow/contrib/cudnn_rnn/BUILD            |  5 +-
 .../kernel_tests/cudnn_rnn_ops_benchmark.py   | 43 +++++++--------
 .../docs_src/performance/performance_guide.md | 52 +++++++++++++++++++
 3 files changed, 72 insertions(+), 28 deletions(-)

diff --git a/tensorflow/contrib/cudnn_rnn/BUILD b/tensorflow/contrib/cudnn_rnn/BUILD
index fce2c03e69..0751624bc4 100644
--- a/tensorflow/contrib/cudnn_rnn/BUILD
+++ b/tensorflow/contrib/cudnn_rnn/BUILD
@@ -146,10 +146,10 @@ cuda_py_test(
 
 cuda_py_test(
     name = "cudnn_rnn_ops_benchmark",
-    size = "large",
+    size = "small",
     srcs = ["python/kernel_tests/cudnn_rnn_ops_benchmark.py"],
     additional_deps = [
-        ":cudnn_rnn_ops_py",
+        ":cudnn_rnn_py",
         "//tensorflow/contrib/rnn:rnn_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client",
@@ -164,7 +164,6 @@ cuda_py_test(
         "//tensorflow/python:variables",
     ],
     tags = [
-        "manual",
         "noasan",  # http://b/62067814
         "nomsan",
         "notsan",
diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
index ff409ac718..4fc5ff1bd1 100644
--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
@@ -20,8 +20,8 @@ from __future__ import print_function
 
 import time
 
+from tensorflow.contrib import rnn as contrib_rnn
 from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
-from tensorflow.contrib.rnn.python.ops import core_rnn
 from tensorflow.contrib.rnn.python.ops import lstm_ops
 from tensorflow.python.client import session
 from tensorflow.python.framework import dtypes
@@ -29,8 +29,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import rnn_cell
+from tensorflow.python.ops import rnn
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
@@ -44,19 +43,19 @@ class CudnnRNNBenchmark(test.Benchmark):
         "large": {
             "num_layers": 4,
             "num_units": 1024,
-            "seq_length": 40,
+            "seq_length": 50,
             "batch_size": 64,
         },
         "medium": {
             "num_layers": 4,
             "num_units": 512,
-            "seq_length": 30,
+            "seq_length": 50,
             "batch_size": 64,
         },
         "small": {
             "num_layers": 4,
             "num_units": 128,
-            "seq_length": 20,
+            "seq_length": 50,
             "batch_size": 64,
         },
     }
@@ -71,7 +70,7 @@ class CudnnRNNBenchmark(test.Benchmark):
 
   def _BenchmarkOp(self, op, desc):
     burn_in_steps = 10
-    benchmark_steps = 40
+    benchmark_steps = 20
     with session.Session() as sess:
       sess.run(variables.global_variables_initializer())
       for i in xrange(burn_in_steps + benchmark_steps):
@@ -126,16 +125,12 @@ class CudnnRNNBenchmark(test.Benchmark):
       seq_length = config["seq_length"]
 
       with ops.Graph().as_default(), ops.device("/device:GPU:0"):
-        inputs = seq_length * [
-            array_ops.zeros([batch_size, num_units], dtypes.float32)
-        ]
-        initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127)
-
-        cell = rnn_cell.LSTMCell(
-            num_units=num_units, initializer=initializer, state_is_tuple=True)
-        multi_cell = rnn_cell.MultiRNNCell(
-            [cell() for _ in range(num_layers)])
-        outputs, final_state = core_rnn.static_rnn(
+        inputs = array_ops.zeros([batch_size, seq_length, num_units],
+                                 dtypes.float32)
+
+        multi_cell = contrib_rnn.MultiRNNCell(
+            [contrib_rnn.BasicLSTMCell(num_units) for _ in range(num_layers)])
+        outputs, final_state = rnn.dynamic_rnn(
             multi_cell, inputs, dtype=dtypes.float32)
         trainable_variables = ops.get_collection(
             ops.GraphKeys.TRAINABLE_VARIABLES)
@@ -154,14 +149,12 @@ class CudnnRNNBenchmark(test.Benchmark):
       seq_length = config["seq_length"]
 
       with ops.Graph().as_default(), ops.device("/device:GPU:0"):
-        inputs = seq_length * [
-            array_ops.zeros([batch_size, num_units], dtypes.float32)
-        ]
-        cell = lambda: lstm_ops.LSTMBlockCell(num_units=num_units)  # pylint: disable=cell-var-from-loop
-
-        multi_cell = rnn_cell.MultiRNNCell(
-            [cell() for _ in range(num_layers)])
-        outputs, final_state = core_rnn.static_rnn(
+        inputs = array_ops.zeros([batch_size, seq_length, num_units],
+                                 dtypes.float32)
+
+        multi_cell = contrib_rnn.MultiRNNCell(
+            [lstm_ops.LSTMBlockCell(num_units) for _ in range(num_layers)])
+        outputs, final_state = rnn.dynamic_rnn(
             multi_cell, inputs, dtype=dtypes.float32)
         trainable_variables = ops.get_collection(
             ops.GraphKeys.TRAINABLE_VARIABLES)
diff --git a/tensorflow/docs_src/performance/performance_guide.md b/tensorflow/docs_src/performance/performance_guide.md
index 17f71a6d77..3ebafb9074 100644
--- a/tensorflow/docs_src/performance/performance_guide.md
+++ b/tensorflow/docs_src/performance/performance_guide.md
@@ -18,6 +18,7 @@ following sections:
 *   [Input pipeline optimizations](#input-pipeline-optimization)
 *   [Data formats](#data-formats)
 *   [Common fused Ops](#common-fused-ops)
+*   [RNN Performance](#rnn-performance)
 *   [Building and installing from source](#building-and-installing-from-source)
 
 ### Input pipeline optimization
@@ -197,6 +198,57 @@ since before TensorFlow 1.0.
 bn = tf.contrib.layers.batch_norm(input_layer, fused=True, data_format='NCHW')
 ```
 
+### RNN Performance
+
+There are many ways to specify an RNN computation in Tensorflow and they have
+have trade-offs with respect to model flexibility and performance. The
+@{tf.nn.rnn_cell.BasicLSTMCell} should be considered a reference implementation
+and used only as a last resort when no other options will work.
+
+When using one of the cells, rather than the fully fused RNN layers, you have a
+choice of whether to use @{tf.nn.static_rnn} or @{tf.nn.dynamic_rnn}.  There
+shouldn't generally be a performance difference at runtime, but large unroll
+amounts can increase the graph size of the @{tf.nn.static_rnn} and cause long
+compile times.  An additional advantage of @{tf.nn.dynamic_rnn} is that it can
+optionally swap memory from the GPU to the CPU to enable training of very long
+sequences.  Depending on the model and hardware configuration, this can come at
+a performance cost.  It is also possible to run multiple iterations of
+@{tf.nn.dynamic_rnn} and the underlying @{tf.while_loop} construct in parallel,
+although this is rarely useful with RNN models as they are inherently
+sequential.
+
+On NVIDIA GPUs, the use of @{tf.contrib.cudnn_rnn} should always be preferred
+unless you want layer normalization, which it doesn't support.  It is often at
+least an order of magnitude faster than @{tf.contrib.rnn.BasicLSTMCell} and
+@{tf.contrib.rnn.LSTMBlockCell} and uses 3-4x less memory than
+@{tf.contrib.rnn.BasicLSTMCell}.  Unfortunately, @{tf.contrib.cudnn_rnn} is not
+compatible with @{tf.train.SyncReplicasOptimizer} so you should either use a
+different synchronization mechanism (consider an all-reduce based strategy) or
+use the @{tf.contrib.rnn.LSTMBlockFusedCell} (at a significant performance
+penalty).
+
+If you need to run one step of the RNN at a time, as might be the case in
+reinforcement learning with a recurrent policy, then you should use the
+@{tf.contrib.rnn.LSTMBlockCell} with your own environment interaction loop
+inside a @{tf.while_loop} construct. Running one step of the RNN at a time and
+returning to python is possible but it will be slower.
+
+On CPUs, mobile devices, and if @{tf.contrib.cudnn_rnn} is not available on
+your GPU, the fastest and most memory efficient option is
+@{tf.contrib.rnn.LSTMBlockFusedCell}.
+
+For all of the less common cell types like @{tf.contrib.rnn.NASCell},
+@{tf.contrib.rnn.PhasedLSTMCell}, @{tf.contrib.rnn.UGRNNCell},
+@{tf.contrib.rnn.GLSTMCell}, @{tf.contrib.rnn.Conv1DLSTMCell},
+@{tf.contrib.rnn.Conv2DLSTMCell}, @{tf.contrib.rnn.LayerNormBasicLSTMCell},
+etc., one should be aware that they are implemented in the graph like
+@{tf.contrib.rnn.BasicLSTMCell} and as such will suffer from the same poor
+performance and high memory usage.  One should consider whether or not those
+trade-offs are worth it before using these cells. For example, while layer
+normalization can speed up convergence, because cuDNN is 20x faster the fastest
+wall clock time to convergence is usually obtained without it.
+
+
 ### Building and installing from source
 
 The default TensorFlow binaries target the broadest range of hardware to make
-- 
GitLab


From 1ba4c37cee105d1fdfa6bda88079904beddef831 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 13:26:44 -0800
Subject: [PATCH 1016/1225] Don't generate Reciprocal nodes for integer types.
 Enable Reciprocal optimizations for complex types.

PiperOrigin-RevId: 179086680
---
 .../core/grappler/optimizers/constant_folding.cc       | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index a43c9bf270..360ada4b1c 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1386,8 +1386,11 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
 
       // Replace 1 / y with Reciprocal op.
       if (y_matches_output_shape && is_any_div && x_is_one) {
-        ReplaceDivisionOfOnesByReciprocal(node);
-        continue;
+        DataType type = node->attr().at("T").type();
+        if (DataTypeIsFloating(type) || DataTypeIsComplex(type)) {
+          ReplaceDivisionOfOnesByReciprocal(node);
+          continue;
+        }
       }
 
       const TensorShapeProto& x_shape =
@@ -1443,7 +1446,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
         continue;
       }
       DataType type = node->attr().at("T").type();
-      if (IsDiv(*node) && !DataTypeIsFloating(type)) {
+      if (IsDiv(*node) &&
+          !(DataTypeIsFloating(type) || DataTypeIsComplex(type))) {
         continue;
       }
       // Insert new reciprocal op and change node from Div to Mul.
-- 
GitLab


From 03b30967f2f600d67d0c33a52321bac7004c5ba1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 13:27:20 -0800
Subject: [PATCH 1017/1225] Add unidirectional sequence RNN to TFLite Ops.

PiperOrigin-RevId: 179086788
---
 tensorflow/contrib/lite/kernels/BUILD         |  13 +
 tensorflow/contrib/lite/kernels/register.cc   |   3 +
 .../kernels/unidirectional_sequence_rnn.cc    | 169 +++++++++++
 .../unidirectional_sequence_rnn_test.cc       | 270 ++++++++++++++++++
 tensorflow/contrib/lite/model.cc              |   1 +
 tensorflow/contrib/lite/nnapi_delegate.cc     |   1 +
 tensorflow/contrib/lite/schema/schema.fbs     |   1 +
 .../contrib/lite/schema/schema_generated.h    |   9 +-
 8 files changed, 464 insertions(+), 3 deletions(-)
 create mode 100644 tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
 create mode 100644 tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn_test.cc

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index 83eb7f2cb8..32bbe2670e 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -98,6 +98,7 @@ cc_library(
         "skip_gram.cc",
         "space_to_depth.cc",
         "svdf.cc",
+        "unidirectional_sequence_rnn.cc",
     ],
     hdrs = [
         "kernel_util.h",
@@ -202,6 +203,18 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "unidirectional_sequence_rnn_test",
+    size = "small",
+    srcs = ["unidirectional_sequence_rnn_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 tf_cc_test(
     name = "l2norm_test",
     size = "small",
diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc
index bef6967a90..12d360f15c 100644
--- a/tensorflow/contrib/lite/kernels/register.cc
+++ b/tensorflow/contrib/lite/kernels/register.cc
@@ -31,6 +31,7 @@ TfLiteRegistration* Register_CONV_2D();
 TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
 TfLiteRegistration* Register_SVDF();
 TfLiteRegistration* Register_RNN();
+TfLiteRegistration* Register_UNIDIRECTIONAL_SEQUENCE_RNN();
 TfLiteRegistration* Register_EMBEDDING_LOOKUP();
 TfLiteRegistration* Register_EMBEDDING_LOOKUP_SPARSE();
 TfLiteRegistration* Register_FULLY_CONNECTED();
@@ -62,6 +63,8 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D());
   AddBuiltin(BuiltinOperator_SVDF, Register_SVDF());
   AddBuiltin(BuiltinOperator_RNN, Register_RNN());
+  AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
+             Register_UNIDIRECTIONAL_SEQUENCE_RNN());
   AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP, Register_EMBEDDING_LOOKUP());
   AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
              Register_EMBEDDING_LOOKUP_SPARSE());
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
new file mode 100644
index 0000000000..85e09049ee
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
@@ -0,0 +1,169 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <unistd.h>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+#include <limits>
+
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/activation_functor.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace unidirectional_sequence_rnn {
+
+constexpr int kInputTensor = 0;
+constexpr int kWeightsTensor = 1;
+constexpr int kRecurrentWeightsTensor = 2;
+constexpr int kBiasTensor = 3;
+constexpr int KHiddenStateTensor = 0;
+constexpr int kOutputTensor = 1;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  // Check we have all the inputs and outputs we need.
+  TF_LITE_ENSURE_EQ(context, node->inputs->size, 4);
+  TF_LITE_ENSURE_EQ(context, node->outputs->size, 2);
+
+  TfLiteTensor* input = &context->tensors[node->inputs->data[kInputTensor]];
+  TfLiteTensor* input_weights =
+      &context->tensors[node->inputs->data[kWeightsTensor]];
+  TfLiteTensor* recurrent_weights =
+      &context->tensors[node->inputs->data[kRecurrentWeightsTensor]];
+  TfLiteTensor* bias = &context->tensors[node->inputs->data[kBiasTensor]];
+
+  // Check all the parameters of tensor match within themselves and match the
+  // input configuration.
+  const int batch_size = input->dims->data[0];
+  const int max_time = input->dims->data[1];
+  const int num_units = input_weights->dims->data[0];
+  TF_LITE_ASSERT_EQ(input->dims->data[2], input_weights->dims->data[1]);
+  TF_LITE_ASSERT_EQ(input_weights->dims->data[0], bias->dims->data[0]);
+  TF_LITE_ASSERT_EQ(recurrent_weights->dims->data[0], bias->dims->data[0]);
+  TF_LITE_ASSERT_EQ(recurrent_weights->dims->data[1], bias->dims->data[0]);
+
+  TfLiteTensor* hidden_state =
+      &context->tensors[node->outputs->data[KHiddenStateTensor]];
+  TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputTensor]];
+
+  // Resize state.
+  TfLiteIntArray* hidden_state_size_array = TfLiteIntArrayCreate(2);
+  hidden_state_size_array->data[0] = batch_size;
+  hidden_state_size_array->data[1] = num_units;
+  TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, hidden_state,
+                                                   hidden_state_size_array));
+
+  // Mark hidden state as a persistent tensor.
+  hidden_state->allocation_type = kTfLiteArenaRwPersistent;
+
+  // Resize output.
+  TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(3);
+  output_size_array->data[0] = batch_size;
+  output_size_array->data[1] = max_time;
+  output_size_array->data[2] = num_units;
+  TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, output,
+                                                   output_size_array));
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteRNNParams*>(node->builtin_data);
+
+  TfLiteTensor* input = &context->tensors[node->inputs->data[kInputTensor]];
+  TfLiteTensor* input_weights =
+      &context->tensors[node->inputs->data[kWeightsTensor]];
+  TfLiteTensor* recurrent_weights =
+      &context->tensors[node->inputs->data[kRecurrentWeightsTensor]];
+  TfLiteTensor* bias = &context->tensors[node->inputs->data[kBiasTensor]];
+  TfLiteTensor* hidden_state =
+      &context->tensors[node->outputs->data[KHiddenStateTensor]];
+  TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputTensor]];
+
+  // Initialize the pointer bias.
+  const float* bias_ptr = bias->data.f;
+
+  const int batch_size = input->dims->data[0];
+  const int max_time = input->dims->data[1];
+  const int num_units = input_weights->dims->data[0];
+  const int input_size = input->dims->data[2];
+  const int input_weights_stride = input_weights->dims->data[1];
+  const int recurrent_weights_stride = recurrent_weights->dims->data[1];
+
+  // For each batch
+  for (int b = 0; b < batch_size; b++) {
+    // Initialize the pointer to hidden state.
+    float* hidden_state_ptr_batch = hidden_state->data.f + b * num_units;
+    for (int s = 0; s < max_time; s++) {
+      // Initialize the pointer to input and output.
+      const float* input_ptr_batch =
+          input->data.f + b * input_size * max_time + s * input_size;
+     float* output_ptr_batch =
+         output->data.f + b * num_units * max_time + s * num_units;
+
+      // Initialize input_weights and recurrent_weights.
+      const float* input_weights_ptr = input_weights->data.f;
+      const float* recurrent_weights_ptr = recurrent_weights->data.f;
+
+      // Output = bias
+      for (int o = 0; o < num_units; o++) {
+        output_ptr_batch[o] = bias_ptr[o];
+      }
+
+      // Output += input * input_weights
+      for (int o = 0; o < num_units; o++) {
+        for (int i = 0; i < input_size; i++) {
+          output_ptr_batch[o] += input_ptr_batch[i] * input_weights_ptr[i];
+        }
+        input_weights_ptr += input_weights_stride;
+      }
+
+      // Output += recurrent_weights * hidden_state
+      for (int o = 0; o < num_units; o++) {
+        for (int h = 0; h < num_units; h++) {
+          output_ptr_batch[o] +=
+              hidden_state_ptr_batch[h] * recurrent_weights_ptr[h];
+        }
+        recurrent_weights_ptr += recurrent_weights_stride;
+      }
+
+      // Output = activation(Output) and update hidden_state
+      for (int o = 0; o < num_units; o++) {
+        output_ptr_batch[o] =
+            (ActivationFunctor(params->activation))(output_ptr_batch[o]);
+        hidden_state_ptr_batch[o] = output_ptr_batch[o];
+      }
+    }
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace unidirectional_sequence_rnn
+
+TfLiteRegistration* Register_UNIDIRECTIONAL_SEQUENCE_RNN() {
+  static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
+                                 unidirectional_sequence_rnn::Prepare,
+                                 unidirectional_sequence_rnn::Eval};
+  return &r;
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn_test.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn_test.cc
new file mode 100644
index 0000000000..a1c1eda160
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn_test.cc
@@ -0,0 +1,270 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// Unit test for TFLite RNN op.
+
+#include <vector>
+#include <iomanip>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+static float rnn_input[] = {
+    0.23689353,   0.285385,     0.037029743, -0.19858193,  -0.27569133,
+    0.43773448,   0.60379338,   0.35562468,  -0.69424844,  -0.93421471,
+    -0.87287879,  0.37144363,   -0.62476718, 0.23791671,   0.40060222,
+    0.1356622,    -0.99774903,  -0.98858172, -0.38952237,  -0.47685933,
+    0.31073618,   0.71511042,   -0.63767755, -0.31729108,  0.33468103,
+    0.75801885,   0.30660987,   -0.37354088, 0.77002847,   -0.62747043,
+    -0.68572164,  0.0069220066, 0.65791464,  0.35130811,   0.80834007,
+    -0.61777675,  -0.21095741,  0.41213346,  0.73784804,   0.094794154,
+    0.47791874,   0.86496925,   -0.53376222, 0.85315156,   0.10288584,
+    0.86684,      -0.011186242, 0.10513687,  0.87825835,   0.59929144,
+    0.62827742,   0.18899453,   0.31440187,  0.99059987,   0.87170351,
+    -0.35091716,  0.74861872,   0.17831337,  0.2755419,    0.51864719,
+    0.55084288,   0.58982027,   -0.47443086, 0.20875752,   -0.058871567,
+    -0.66609079,  0.59098077,   0.73017097,  0.74604273,   0.32882881,
+    -0.17503482,  0.22396147,   0.19379807,  0.29120302,   0.077113032,
+    -0.70331609,  0.15804303,   -0.93407321, 0.40182066,   0.036301374,
+    0.66521823,   0.0300982,    -0.7747041,  -0.02038002,  0.020698071,
+    -0.90300065,  0.62870288,   -0.23068321, 0.27531278,   -0.095755219,
+    -0.712036,    -0.17384434,  -0.50593495, -0.18646687,  -0.96508682,
+    0.43519354,   0.14744234,   0.62589407,  0.1653645,    -0.10651493,
+    -0.045277178, 0.99032974,   -0.88255352, -0.85147917,  0.28153265,
+    0.19455957,   -0.55479527,  -0.56042433, 0.26048636,   0.84702539,
+    0.47587705,   -0.074295521, -0.12287641, 0.70117295,   0.90532446,
+    0.89782166,   0.79817224,   0.53402734,  -0.33286154,  0.073485017,
+    -0.56172788,  -0.044897556, 0.89964068,  -0.067662835, 0.76863563,
+    0.93455386,   -0.6324693,   -0.083922029};
+
+static float rnn_golden_output[] = {
+    0.496726,   0,          0.965996,  0,         0.0584254, 0,
+    0,          0.12315,    0,         0,         0.612266,  0.456601,
+    0,          0.52286,    1.16099,   0.0291232,
+
+    0,          0,          0.524901,  0,         0,         0,
+    0,          1.02116,    0,         1.35762,   0,         0.356909,
+    0.436415,   0.0355727,  0,         0,
+
+    0,          0,          0,         0.262335,  0,         0,
+    0,          1.33992,    0,         2.9739,    0,         0,
+    1.31914,    2.66147,    0,         0,
+
+    0.942568,   0,          0,         0,         0.025507,  0,
+    0,          0,          0.321429,  0.569141,  1.25274,   1.57719,
+    0.8158,     1.21805,    0.586239,  0.25427,
+
+    1.04436,    0,          0.630725,  0,         0.133801,  0.210693,
+    0.363026,   0,          0.533426,  0,         1.25926,   0.722707,
+    0,          1.22031,    1.30117,   0.495867,
+
+    0.222187,   0,          0.72725,   0,         0.767003,  0,
+    0,          0.147835,   0,         0,         0,         0.608758,
+    0.469394,   0.00720298, 0.927537,  0,
+
+    0.856974,   0.424257,   0,         0,         0.937329,  0,
+    0,          0,          0.476425,  0,         0.566017,  0.418462,
+    0.141911,   0.996214,   1.13063,   0,
+
+    0.967899,   0,          0,         0,         0.0831304, 0,
+    0,          1.00378,    0,         0,         0,         1.44818,
+    1.01768,    0.943891,   0.502745,  0,
+
+    0.940135,   0,          0,         0,         0,         0,
+    0,          2.13243,    0,         0.71208,   0.123918,  1.53907,
+    1.30225,    1.59644,    0.70222,   0,
+
+    0.804329,   0,          0.430576,  0,         0.505872,  0.509603,
+    0.343448,   0,          0.107756,  0.614544,  1.44549,   1.52311,
+    0.0454298,  0.300267,   0.562784,  0.395095,
+
+    0.228154,   0,          0.675323,  0,         1.70536,   0.766217,
+    0,          0,          0,         0.735363,  0.0759267, 1.91017,
+    0.941888,   0,          0,         0,
+
+    0,          0,          1.5909,    0,         0,         0,
+    0,          0.5755,     0,         0.184687,  0,         1.56296,
+    0.625285,   0,          0,         0,
+
+    0,          0,          0.0857888, 0,         0,         0,
+    0,          0.488383,   0.252786,  0,         0,         0,
+    1.02817,    1.85665,    0,         0,
+
+    0.00981836, 0,          1.06371,   0,         0,         0,
+    0,          0,          0,         0.290445,  0.316406,  0,
+    0.304161,   1.25079,    0.0707152, 0,
+
+    0.986264,   0.309201,   0,         0,         0,         0,
+    0,          1.64896,    0.346248,  0,         0.918175,  0.78884,
+    0.524981,   1.92076,    2.07013,   0.333244,
+
+    0.415153,   0.210318,   0,         0,         0,         0,
+    0,          2.02616,    0,         0.728256,  0.84183,   0.0907453,
+    0.628881,   3.58099,    1.49974,   0
+};
+
+class UnidirectionalRNNOpModel : public SingleOpModel {
+ public:
+  UnidirectionalRNNOpModel(int batches, int sequence_len, int units, int size)
+      : batches_(batches),
+        sequence_len_(sequence_len),
+        units_(units),
+        input_size_(size) {
+    input_ = AddInput(TensorType_FLOAT32);
+    weights_ = AddInput(TensorType_FLOAT32);
+    recurrent_weights_ = AddInput(TensorType_FLOAT32);
+    bias_ = AddInput(TensorType_FLOAT32);
+    hidden_state_ = AddOutput(TensorType_FLOAT32);
+    output_ = AddOutput(TensorType_FLOAT32);
+    SetBuiltinOp(
+        BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN, BuiltinOptions_RNNOptions,
+        CreateRNNOptions(builder_, ActivationFunctionType_RELU).Union());
+    BuildInterpreter({{batches_, sequence_len_, input_size_},
+                      {units_, input_size_},
+                      {units_, units_},
+                      {units_}});
+  }
+
+  void SetBias(std::initializer_list<float> f) { PopulateTensor(bias_, f); }
+
+  void SetWeights(std::initializer_list<float> f) {
+    PopulateTensor(weights_, f);
+  }
+
+  void SetRecurrentWeights(std::initializer_list<float> f) {
+    PopulateTensor(recurrent_weights_, f);
+  }
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor(input_, data);
+  }
+
+  void SetInput(int offset, float* begin, float* end) {
+    PopulateTensor(input_, offset, begin, end);
+  }
+
+  void ResetHiddenState() {
+    const int zero_buffer_size = units_ * batches_;
+    std::unique_ptr<float[]> zero_buffer(new float[zero_buffer_size]);
+    memset(zero_buffer.get(), 0, zero_buffer_size * sizeof(float));
+    PopulateTensor(hidden_state_, 0, zero_buffer.get(),
+                   zero_buffer.get() + zero_buffer_size);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+  int input_size() { return input_size_; }
+  int num_units() { return units_; }
+  int num_batches() { return batches_; }
+  int sequence_len() { return sequence_len_; }
+
+ private:
+  int input_;
+  int weights_;
+  int recurrent_weights_;
+  int bias_;
+  int hidden_state_;
+  int output_;
+
+  int batches_;
+  int sequence_len_;
+  int units_;
+  int input_size_;
+};
+
+// TODO(mirkov): add another test which directly compares to TF once TOCO
+// supports the conversion from dynamic_rnn with BasicRNNCell.
+TEST(FullyConnectedOpTest, BlackBoxTest) {
+  UnidirectionalRNNOpModel rnn(2, 16, 16, 8);
+  rnn.SetWeights(
+      {0.461459,    0.153381,   0.529743,    -0.00371218, 0.676267,   -0.211346,
+       0.317493,    0.969689,   -0.343251,   0.186423,    0.398151,   0.152399,
+       0.448504,    0.317662,   0.523556,    -0.323514,   0.480877,   0.333113,
+       -0.757714,   -0.674487,  -0.643585,   0.217766,    -0.0251462, 0.79512,
+       -0.595574,   -0.422444,  0.371572,    -0.452178,   -0.556069,  -0.482188,
+       -0.685456,   -0.727851,  0.841829,    0.551535,    -0.232336,  0.729158,
+       -0.00294906, -0.69754,   0.766073,    -0.178424,   0.369513,   -0.423241,
+       0.548547,    -0.0152023, -0.757482,   -0.85491,    0.251331,   -0.989183,
+       0.306261,    -0.340716,  0.886103,    -0.0726757,  -0.723523,  -0.784303,
+       0.0354295,   0.566564,   -0.485469,   -0.620498,   0.832546,   0.697884,
+       -0.279115,   0.294415,   -0.584313,   0.548772,    0.0648819,  0.968726,
+       0.723834,    -0.0080452, -0.350386,   -0.272803,   0.115121,   -0.412644,
+       -0.824713,   -0.992843,  -0.592904,   -0.417893,   0.863791,   -0.423461,
+       -0.147601,   -0.770664,  -0.479006,   0.654782,    0.587314,   -0.639158,
+       0.816969,    -0.337228,  0.659878,    0.73107,     0.754768,   -0.337042,
+       0.0960841,   0.368357,   0.244191,    -0.817703,   -0.211223,  0.442012,
+       0.37225,     -0.623598,  -0.405423,   0.455101,    0.673656,   -0.145345,
+       -0.511346,   -0.901675,  -0.81252,    -0.127006,   0.809865,   -0.721884,
+       0.636255,    0.868989,   -0.347973,   -0.10179,    -0.777449,  0.917274,
+       0.819286,    0.206218,   -0.00785118, 0.167141,    0.45872,    0.972934,
+       -0.276798,   0.837861,   0.747958,    -0.0151566,  -0.330057,  -0.469077,
+       0.277308,    0.415818});
+
+  rnn.SetBias({0.065691948, -0.69055247, 0.1107955, -0.97084129, -0.23957068,
+               -0.23566568, -0.389184, 0.47481549, -0.4791103, 0.29931796,
+               0.10463274, 0.83918178, 0.37197268, 0.61957061, 0.3956964,
+               -0.37609905});
+
+  rnn.SetRecurrentWeights({0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1});
+
+  rnn.ResetHiddenState();
+  const int input_sequence_size = rnn.input_size() * rnn.sequence_len();
+  float* batch_start = rnn_input;
+  float* batch_end = batch_start + input_sequence_size;
+  rnn.SetInput(0, batch_start, batch_end);
+  rnn.SetInput(input_sequence_size, batch_start, batch_end);
+
+  rnn.Invoke();
+
+  float* golden_start = rnn_golden_output;
+  float* golden_end = golden_start + rnn.num_units() * rnn.sequence_len();
+  std::vector<float> expected;
+  expected.insert(expected.end(), golden_start, golden_end);
+  expected.insert(expected.end(), golden_start, golden_end);
+
+  EXPECT_THAT(rnn.GetOutput(), ElementsAreArray(ArrayFloatNear(expected)));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  // On Linux, add: tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index de7a39b62c..2f508d9d60 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -340,6 +340,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type,
       builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN:
     case BuiltinOperator_RNN: {
       TfLiteRNNParams* params = MallocPOD<TfLiteRNNParams>();
       if (auto* rnn_params = op->builtin_options_as_RNNOptions()) {
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 7fe968aa0a..86f2afbaf2 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -294,6 +294,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       case tflite::BuiltinOperator_SVDF:
       case tflite::BuiltinOperator_HASHTABLE_LOOKUP:
       case tflite::BuiltinOperator_RNN:
+      case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN:
       case tflite::BuiltinOperator_EMBEDDING_LOOKUP:
       case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE:
       case tflite::BuiltinOperator_LSTM:
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index 7fa9b7a3a9..d1302bdc63 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -105,6 +105,7 @@ enum BuiltinOperator : byte {
   CUSTOM = 32,
   EMBEDDING_LOOKUP_SPARSE = 33,
   PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
 }
 
 // Options for the builtin operators.
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index 34cef71175..ba645c2764 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -171,11 +171,12 @@ enum BuiltinOperator {
   BuiltinOperator_CUSTOM = 32,
   BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
   BuiltinOperator_PAD = 34,
+  BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_PAD
+  BuiltinOperator_MAX = BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN
 };
 
-inline BuiltinOperator (&EnumValuesBuiltinOperator())[32] {
+inline BuiltinOperator (&EnumValuesBuiltinOperator())[33] {
   static BuiltinOperator values[] = {
       BuiltinOperator_ADD,
       BuiltinOperator_AVERAGE_POOL_2D,
@@ -208,7 +209,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[32] {
       BuiltinOperator_CALL,
       BuiltinOperator_CUSTOM,
       BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
-      BuiltinOperator_PAD};
+      BuiltinOperator_PAD,
+      BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN};
   return values;
 }
 
@@ -248,6 +250,7 @@ inline const char **EnumNamesBuiltinOperator() {
                                 "CUSTOM",
                                 "EMBEDDING_LOOKUP_SPARSE",
                                 "PAD",
+                                "UNIDIRECTIONAL_SEQUENCE_RNN",
                                 nullptr};
   return names;
 }
-- 
GitLab


From 60bb8e301e175f79c2d6541c5733af40321bc27a Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Thu, 14 Dec 2017 13:27:32 -0800
Subject: [PATCH 1018/1225] Fix typo in boston housing dataset loader.

PiperOrigin-RevId: 179086824
---
 tensorflow/python/keras/_impl/keras/datasets/boston_housing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py b/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py
index 4359be8928..5d5d2c4f75 100644
--- a/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py
+++ b/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py
@@ -48,7 +48,7 @@ def load_data(path='boston_housing.npz', seed=113, test_split=0.2):
   f.close()
 
   np.random.seed(seed)
-  indices = np.arrange(len(x))
+  indices = np.arange(len(x))
   np.random.shuffle(indices)
   x = x[indices]
   y = y[indices]
-- 
GitLab


From 3b195ebccb54c85c7fc2121b9f50eb9ab0a46989 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 13:28:10 -0800
Subject: [PATCH 1019/1225] Allow an unshaped input array flag if the model has
 the shape already.

PiperOrigin-RevId: 179086934
---
 tensorflow/contrib/lite/toco/tooling_util.cc | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index f3daac175e..39b030c338 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -1138,11 +1138,16 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
         }
       }
     } else {
-      const auto& input_array_dims =
-          *input_array.mutable_shape()->mutable_dims();
-      CHECK_EQ(input_array_dims.size(), input_array_proto.shape().dims_size());
-      for (int i = 0; i < input_array_dims.size(); i++) {
-        CHECK_EQ(input_array_dims[i], input_array_proto.shape().dims(i));
+      if (input_array_proto.has_shape()) {
+        // If an input shape was specified on the flags ensure that it matches
+        // the actual shape in the model.
+        const auto& input_array_dims =
+            *input_array.mutable_shape()->mutable_dims();
+        CHECK_EQ(input_array_dims.size(),
+                 input_array_proto.shape().dims_size());
+        for (int i = 0; i < input_array_dims.size(); i++) {
+          CHECK_EQ(input_array_dims[i], input_array_proto.shape().dims(i));
+        }
       }
     }
 
-- 
GitLab


From 2d95be8795e4d8ac3d1547cebffcd12f6bb73563 Mon Sep 17 00:00:00 2001
From: Max Galkin <maxgalkin@google.com>
Date: Thu, 14 Dec 2017 14:20:28 -0800
Subject: [PATCH 1020/1225] Add a utility struct for MilliSeconds time unit.

PiperOrigin-RevId: 179096409
---
 .../core/grappler/costs/cost_estimator.h      | 24 ++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h
index cf9fa4fdaf..852e69737b 100644
--- a/tensorflow/core/grappler/costs/cost_estimator.h
+++ b/tensorflow/core/grappler/costs/cost_estimator.h
@@ -40,6 +40,16 @@ struct Costs {
   // Builds a Costs structure with all zero values, rather than unknowns.
   static inline Costs ZeroCosts();
 
+  struct MilliSeconds : std::chrono::milliseconds {
+    MilliSeconds() : std::chrono::milliseconds(0) {}
+    MilliSeconds(double d) : std::chrono::milliseconds(static_cast<int64>(d)) {}
+    MilliSeconds(const std::chrono::milliseconds& d)
+        : std::chrono::milliseconds(d) {}
+    MilliSeconds& operator=(const std::chrono::milliseconds& d) {
+      std::chrono::milliseconds::operator=(d);
+      return *this;
+    }
+  };
   struct MicroSeconds : std::chrono::microseconds {
     MicroSeconds() : std::chrono::microseconds(0) {}
     MicroSeconds(double d) : std::chrono::microseconds(static_cast<int64>(d)) {}
@@ -49,6 +59,9 @@ struct Costs {
       std::chrono::microseconds::operator=(d);
       return *this;
     }
+    MilliSeconds asMilliSeconds() const {
+      return std::chrono::duration_cast<std::chrono::milliseconds>(*this);
+    }
   };
   struct NanoSeconds : std::chrono::nanoseconds {
     NanoSeconds() : std::chrono::nanoseconds(0) {}
@@ -60,9 +73,10 @@ struct Costs {
       return *this;
     }
     MicroSeconds asMicroSeconds() const {
-      std::chrono::microseconds us =
-          std::chrono::duration_cast<std::chrono::microseconds>(*this);
-      return MicroSeconds(us);
+      return std::chrono::duration_cast<std::chrono::microseconds>(*this);
+    }
+    MilliSeconds asMilliSeconds() const {
+      return std::chrono::duration_cast<std::chrono::milliseconds>(*this);
     }
   };
 
@@ -100,6 +114,10 @@ struct Costs {
   std::unordered_map<string, uint64> estimated_max_memory_per_device;
 };
 
+inline std::ostream& operator<<(std::ostream& os, const Costs::MilliSeconds d) {
+  os << d.count() << "ms";
+  return os;
+}
 inline std::ostream& operator<<(std::ostream& os, const Costs::MicroSeconds d) {
   os << d.count() << "us";
   return os;
-- 
GitLab


From 16beb00ce75c840b0859bdcf732b32c965c6a4ba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 14:22:36 -0800
Subject: [PATCH 1021/1225] Don't remove noops if rerouting their control
 inputs increases the number of edges crossing device boundaries.

PiperOrigin-RevId: 179096786
---
 .../optimizers/dependency_optimizer.cc        | 47 +++++++++++++++++--
 .../optimizers/dependency_optimizer_test.cc   | 34 ++++++++++++++
 2 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 77525cc788..6cc50845b3 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -187,8 +187,9 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
     node->clear_attr();
   }
 
-  // Remove NoOp nodes if their fan-in or fan-out is less than 2.
-  // The non-trivial rewrites take the following form:
+  // Remove NoOp nodes if the product of their fan-in and fan-out is less than
+  // or equal to the sum of the fan-in and fan-out. The non-trivial rewrites
+  // take the following form:
   //
   // Case a)
   //    x --^> +------+                x --^> +---+
@@ -201,8 +202,22 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
   //    x --^> | NoOp | --^> b  ==>    | x | --^> b
   //           |      | ...            |   | ...
   //           +------+ --^> c         +---+ --^> c
+  // Case c)
+  //           +------+                x ---^> a
+  //    x --^> | NoOp | --^> a  ==>      \/
+  //    y --^> |      | --^> b           /\
+  //           +------+                y ---^> b
+  //
+  // We only apply this optimization if we don't increase the number of control
+  // edges across device boundaries, e.g. in cases a) and b) if NoOp and
+  // a and x, respectively, are on the same device. Control edges across device
+  // boundaries require inter-device communication (Send/Recv pairs to be
+  // inserted in the graph), which is very costly.
+
   if (node->op() == "NoOp") {
-    const auto output_nodes = node_map_->GetOutputs(node->name());
+    const auto& output_node_set = node_map_->GetOutputs(node->name());
+    const std::vector<NodeDef*> output_nodes(output_node_set.begin(),
+                                             output_node_set.end());
     const int num_outputs = output_nodes.size();
     const int num_inputs = node->input_size();
 
@@ -217,6 +232,32 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
       input_nodes.push_back(tmp);
     }
 
+    // Make sure that we don't increase the number of control edges that cross
+    // device boundaries.
+    if ((num_inputs == 1 && num_outputs > 1 &&
+         input_nodes[0]->device() != node->device()) ||
+        (num_inputs > 1 && num_outputs == 1 &&
+         output_nodes[0]->device() != node->device())) {
+      return;
+    }
+    if (num_inputs == 2 && num_outputs == 2) {
+      const string& noop_dev = node->device();
+      const string& in0_dev = input_nodes[0]->device();
+      const string& in1_dev = input_nodes[1]->device();
+      const string& out0_dev = output_nodes[0]->device();
+      const string& out1_dev = output_nodes[1]->device();
+      const int num_cross_before = static_cast<int>(in0_dev != noop_dev) +
+                                   static_cast<int>(in1_dev != noop_dev) +
+                                   static_cast<int>(out0_dev != noop_dev) +
+                                   static_cast<int>(out1_dev != noop_dev);
+      const int num_cross_after = static_cast<int>(in0_dev != out0_dev) +
+                                  static_cast<int>(in0_dev != out1_dev) +
+                                  static_cast<int>(in1_dev != out0_dev) +
+                                  static_cast<int>(in1_dev != out1_dev);
+      if (num_cross_after > num_cross_before) {
+        return;
+      }
+    }
     for (auto consumer : output_nodes) {
       bool updated_consumer = false;
       VLOG(1) << "***** Considering consumer  " << consumer->name() << "\n"
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
index e17a8eb1cf..837fbba2fc 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
@@ -197,6 +197,40 @@ TEST_F(DependencyOptimizerTest, RemoveNoOps_EmptyInputOrOutput) {
   }
 }
 
+TEST_F(DependencyOptimizerTest, RemoveNoOps_DeviceBoundaries) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output x = ops::RandomUniform(s.WithOpName("x").WithDevice("/CPU:0"), {1, 2},
+                                DT_FLOAT);
+  Output y = ops::RandomUniform(s.WithOpName("y").WithDevice("/CPU:0"), {1, 2},
+                                DT_FLOAT);
+  // NoOp with a single input- and two output dependencies.
+  auto noop = ops::NoOp(s.WithControlDependencies(x).WithDevice("/CPU:1"));
+  // NoOp with a two input- and a single output dependency.
+  auto noop_1 = ops::NoOp(
+      s.WithControlDependencies(x).WithControlDependencies(y).WithDevice(
+          "/CPU:0"));
+  Output id = ops::Identity(
+      s.WithControlDependencies({noop.operation}).WithDevice("/CPU:1"), x);
+  Output id_1 = ops::Identity(
+      s.WithControlDependencies({noop.operation, noop_1.operation})
+          .WithDevice("/CPU:1"),
+      y);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch.push_back("Identity");
+  item.fetch.push_back("Identity_1");
+
+  DependencyOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  // The optimization should be disabled to prevent increasing the number of
+  // nodes crossing device boundaries.
+  VerifyGraphsEqual(item.graph, output, __FUNCTION__);
+}
+
 TEST_F(DependencyOptimizerTest, RemoveNoOps_SingleInputOrOutput) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::RandomUniform(s.WithOpName("x"), {1, 2}, DT_FLOAT);
-- 
GitLab


From cfd1deab8f7c875a313536cdc6fdadd3cfb0eb22 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Thu, 14 Dec 2017 14:25:43 -0800
Subject: [PATCH 1022/1225] [XLA] Emit an error in the HLO parser if the root
 instruction's shape doesn't match the computation's shape.

PiperOrigin-RevId: 179097287
---
 .../compiler/xla/tools/parser/hlo_parser.cc   | 23 +++++++++++++++++--
 .../xla/tools/parser/hlo_parser_test.cc       | 12 ++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 4f67ed2380..3bb86cf7b8 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -279,8 +279,12 @@ bool HloParser::ParseComputation() {
 
   Shape shape;
   string root_name;
-  if (!ParseParamList() || !ParseToken(TokKind::kArrow, "expects '->'") ||
-      !ParseShape(&shape) || !ParseInstructionList(builder.get(), &root_name)) {
+  if (!ParseParamList() || !ParseToken(TokKind::kArrow, "expects '->'")) {
+    return false;
+  }
+
+  LocTy shape_ty = lexer_.GetLoc();
+  if (!ParseShape(&shape) || !ParseInstructionList(builder.get(), &root_name)) {
     return false;
   }
 
@@ -301,6 +305,21 @@ bool HloParser::ParseComputation() {
           ? module_->AddEntryComputation(builder->Build(root))
           : module_->AddEmbeddedComputation(builder->Build(root));
 
+  if (!root) {
+    root = computation->root_instruction();
+  } else {
+    CHECK_EQ(root, computation->root_instruction());
+  }
+
+  if (!ShapeUtil::Compatible(root->shape(), shape)) {
+    return Error(
+        shape_ty,
+        StrCat("Shape of computation ", name, ", ",
+               ShapeUtil::HumanString(shape),
+               ", is not compatible with that of its root instruction ",
+               root_name, ", ", ShapeUtil::HumanString(root->shape())));
+  }
+
   // The parameters and result layouts were set to default layout. Here we set
   // the layouts to what the hlo text says.
   if (is_entry_computation) {
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 189de59b6f..98f9f4d333 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -1047,6 +1047,18 @@ ENTRY %test_comma.v4 () -> f32[] {
   TF_EXPECT_OK(Parse(original).status());
 }
 
+TEST_F(HloParserTest, ComputationShapeDoesNotMatchRootShape) {
+  const string original = R"(HloModule custom_call:
+
+ENTRY %CustomCall () -> f32[1] {
+  %constant = f32[1]{0} constant({12345})
+  ROOT %foo = f32[1,2,3]{0,2,1} custom-call(f32[1]{0} %constant), custom_call_target="foo\"bar"
+})";
+  ExpectHasSubstr(Parse(original).status().error_message(),
+                  "Shape of computation CustomCall, f32[1], is not compatible "
+                  "with that of its root instruction foo, f32[1,2,3]");
+}
+
 }  // namespace
 }  // namespace tools
 }  // namespace xla
-- 
GitLab


From c0ae51c498789c4d876d2a00ef7c41eb46ed55be Mon Sep 17 00:00:00 2001
From: Tayo Oguntebi <tayo@google.com>
Date: Thu, 14 Dec 2017 14:44:06 -0800
Subject: [PATCH 1023/1225] [XLA] Adds ReduceWindow test for arbitrary sized
 padding.

PiperOrigin-RevId: 179100614
---
 .../compiler/xla/tests/reduce_window_test.cc  | 39 ++++---------------
 1 file changed, 8 insertions(+), 31 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index b32df74312..bf81514bc9 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -242,37 +242,6 @@ TEST_P(ReduceWindowTest, AmongMajor2DimsMediumSize) {
                            DefaultErrorSpec());
 }
 
-// TODO(b/32173947): Test support for arbitrary-sized padding.
-TEST_P(ReduceWindowTest, DISABLED_AmongMajor2DimsMediumSizeLargePadding) {
-  Array4D<float> input_array(9, 12, 4, 89);  // simulate Dim0IsMinor layout
-  input_array.FillRandom(2.f, 2.f);
-
-  int64 rank = 4;
-  int win_len = 3;
-  int win_stride = 2;
-
-  const auto input_data_handle =
-      CreateConstantFromArray(input_array, &builder_);
-
-  Padding padding = Padding::kSame;
-  // Reduce only along the x and y dimensions, according to the win_len.
-  // Create padding vector with large padding values in the reduction dims.
-  std::vector<std::pair<int64, int64>> low_high_padding;
-  low_high_padding.resize(rank, {4, 4});
-
-  builder_.ReduceWindowWithGeneralPadding(
-      input_data_handle, builder_.ConstantR0<float>(0.0f),
-      CreateScalarAddComputation(F32, &builder_), {win_len, win_len, 1, 1},
-      {win_stride, win_stride, 1, 1}, low_high_padding);
-
-  auto result = ReferenceUtil::ReduceWindow4DAdd(
-      input_array, 0.0f, {win_len, win_len, 1, 1},
-      {win_stride, win_stride, 1, 1}, padding);
-
-  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*result), {},
-                           DefaultErrorSpec());
-}
-
 XLA_TEST_P(ReduceWindowTest, Add1x1x2In2x1x2) {
   Array3D<float> input_array(2, 1, 2);
   input_array(0, 0, 0) = 1000;
@@ -691,6 +660,14 @@ const R4ReduceWindowTestData kR4ReduceWindowTestValues[] = {
                            /*pad_high=*/{0, 0, 0, 0},
                            /*reducer=*/kAdd},
 
+    // Arbitrary padding (not kSame or kValid).
+    R4ReduceWindowTestData{/*base_bounds=*/{9, 12, 4, 89},
+                           /*window_bounds=*/{3, 3, 1, 1},
+                           /*strides=*/{2, 2, 1, 1},
+                           /*pad_low=*/{4, 4, 0, 0},
+                           /*pad_high=*/{4, 4, 0, 0},
+                           /*reducer=*/kAdd},
+
     // Zero base bound edge case.
     R4ReduceWindowTestData{/*base_bounds=*/{1, 0, 1, 1},
                            /*window_bounds=*/{1, 1, 1, 1},
-- 
GitLab


From ab54c4885366e87bd2805036c9c8ab3ffccf823e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 14:47:40 -0800
Subject: [PATCH 1024/1225] With fused batch norm, collect the tangle of nodes
 used for folding BN into BatchNorm_Fold scope.

PiperOrigin-RevId: 179101289
---
 .../quantize/python/fold_batch_norms.py       | 37 ++++++++++---------
 .../quantize/python/fold_batch_norms_test.py  | 14 ++++---
 2 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index 647d404400..7df5fa8372 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -66,23 +66,26 @@ def _FoldFusedBatchNorms(graph):
     # `scope`.
     with graph.as_default(), graph.name_scope(scope + sep), ops.device(
         match.bn_op.device):
-      # new weights = old weights * gamma / sqrt(variance + epsilon)
-      # new biases = -mean * gamma / sqrt(variance + epsilon) + beta
-      multiplier_tensor = match.gamma_tensor * math_ops.rsqrt(
-          match.variance_tensor + match.bn_op.get_attr('epsilon'))
-      bias_tensor = math_ops.subtract(
-          match.beta_tensor, match.mean_tensor * multiplier_tensor, name='bias')
-
-      # The shape of depthwise weights is different, so we need to reshape the
-      # multiplier_tensor to ensure that the scaled_weight_tensor has the
-      # expected shape.
-      if match.layer_op.type == 'DepthwiseConv2dNative':
-        new_shape = [
-            match.weight_tensor.get_shape().as_list()[2],
-            match.weight_tensor.get_shape().as_list()[3]
-        ]
-        multiplier_tensor = array_ops.reshape(
-            multiplier_tensor, new_shape, name='scale_reshape')
+      with graph.name_scope(scope + sep + 'BatchNorm_Fold' + sep):
+        # new weights = old weights * gamma / sqrt(variance + epsilon)
+        # new biases = -mean * gamma / sqrt(variance + epsilon) + beta
+        multiplier_tensor = match.gamma_tensor * math_ops.rsqrt(
+            match.variance_tensor + match.bn_op.get_attr('epsilon'))
+        bias_tensor = math_ops.subtract(
+            match.beta_tensor,
+            match.mean_tensor * multiplier_tensor,
+            name='bias')
+
+        # The shape of depthwise weights is different, so we need to reshape the
+        # multiplier_tensor to ensure that the scaled_weight_tensor has the
+        # expected shape.
+        if match.layer_op.type == 'DepthwiseConv2dNative':
+          new_shape = [
+              match.weight_tensor.get_shape().as_list()[2],
+              match.weight_tensor.get_shape().as_list()[3]
+          ]
+          multiplier_tensor = array_ops.reshape(
+              multiplier_tensor, new_shape, name='scale_reshape')
 
       # TODO(suharshs): This naming of the following ops needs to carefully
       # follow the naming expected by quantize.py. Generalize the quantize code
diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
index 2cecf68514..4dc5994885 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
@@ -284,16 +284,20 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
 
     folded_mul = g.get_operation_by_name(scope + '/mul_fold')
     self.assertEqual(folded_mul.type, 'Mul')
+    if fused_batch_norm:
+      scale_reshape_op_name = scope + '/BatchNorm_Fold/scale_reshape'
+    else:
+      scale_reshape_op_name = scope + '/scale_reshape'
     self._AssertInputOpsAre(folded_mul,
                             [scope + '/depthwise_weights/read',
-                             scope + '/scale_reshape'])
+                             scale_reshape_op_name])
     self._AssertOutputGoesToOps(folded_mul, g, [scope + '/depthwise_Fold'])
 
-    scale_reshape = g.get_operation_by_name(scope + '/scale_reshape')
+    scale_reshape = g.get_operation_by_name(scale_reshape_op_name)
     self.assertEqual(scale_reshape.type, 'Reshape')
     self._AssertInputOpsAre(scale_reshape, [
         self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm),
-        scope + '/scale_reshape/shape'
+        scale_reshape_op_name + '/shape'
     ])
     self._AssertOutputGoesToOps(scale_reshape, g, [scope + '/mul_fold'])
 
@@ -326,13 +330,13 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
   def _BatchNormMultiplierName(self, scope, has_scaling, fused):
     if has_scaling:
       if fused:
-        return scope + '/mul'
+        return scope + '/BatchNorm_Fold/mul'
       return scope + '/BatchNorm/batchnorm/mul'
     return scope + '/BatchNorm/batchnorm/Rsqrt'
 
   def _BathNormBiasName(self, scope, fused):
     if fused:
-      return scope + '/bias'
+      return scope + '/BatchNorm_Fold/bias'
     return scope + '/BatchNorm/batchnorm/sub'
 
   def _WeightInit(self, stddev):
-- 
GitLab


From ea703f4e0e72d1e016f8157e206dcc9e80602862 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 14:48:02 -0800
Subject: [PATCH 1025/1225] Implementation of Gather in TfLite

PiperOrigin-RevId: 179101363
---
 tensorflow/contrib/lite/builtin_op_data.h     |   4 +
 tensorflow/contrib/lite/kernels/BUILD         |  14 ++
 tensorflow/contrib/lite/kernels/gather.cc     | 130 ++++++++++++++
 .../contrib/lite/kernels/gather_test.cc       | 121 +++++++++++++
 tensorflow/contrib/lite/kernels/register.cc   |   2 +
 tensorflow/contrib/lite/kernels/test_util.cc  |  13 ++
 tensorflow/contrib/lite/kernels/test_util.h   |   3 +
 tensorflow/contrib/lite/model.cc              |  10 ++
 tensorflow/contrib/lite/nnapi_delegate.cc     |   1 +
 tensorflow/contrib/lite/schema/schema.fbs     |   7 +-
 .../contrib/lite/schema/schema_generated.h    | 162 +++++++++++++++++-
 tensorflow/contrib/lite/testing/BUILD         |   1 +
 .../contrib/lite/testing/generate_examples.py |  45 ++++-
 .../testing/generated_examples_zip_test.cc    |   1 +
 .../propagate_fixed_sizes.cc                  |   1 +
 tensorflow/contrib/lite/toco/model.h          |   3 +-
 .../contrib/lite/toco/tflite/operator.cc      |  20 ++-
 .../contrib/lite/toco/tflite/operator_test.cc |   8 +-
 18 files changed, 533 insertions(+), 13 deletions(-)
 create mode 100644 tensorflow/contrib/lite/kernels/gather.cc
 create mode 100644 tensorflow/contrib/lite/kernels/gather_test.cc
 mode change 100755 => 100644 tensorflow/contrib/lite/schema/schema_generated.h

diff --git a/tensorflow/contrib/lite/builtin_op_data.h b/tensorflow/contrib/lite/builtin_op_data.h
index 7249d124e9..548864a1e9 100644
--- a/tensorflow/contrib/lite/builtin_op_data.h
+++ b/tensorflow/contrib/lite/builtin_op_data.h
@@ -165,6 +165,10 @@ typedef struct {
   TfLiteCombinerType combiner;
 } TfLiteEmbeddingLookupSparseParams;
 
+typedef struct {
+  int axis;
+} TfLiteGatherParams;
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index 32bbe2670e..3908960c33 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -83,6 +83,7 @@ cc_library(
         "embedding_lookup.cc",
         "embedding_lookup_sparse.cc",
         "fully_connected.cc",
+        "gather.cc",
         "hashtable_lookup.cc",
         "kernel_util.cc",
         "l2norm.cc",
@@ -263,6 +264,19 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "gather_test",
+    size = "small",
+    srcs = ["gather_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:builtin_op_data",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 tf_cc_test(
     name = "resize_bilinear_test",
     size = "small",
diff --git a/tensorflow/contrib/lite/kernels/gather.cc b/tensorflow/contrib/lite/kernels/gather.cc
new file mode 100644
index 0000000000..f8df797daf
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/gather.cc
@@ -0,0 +1,130 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <string.h>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+#include "tensorflow/contrib/lite/string_util.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace gather {
+constexpr int kInputTensor = 0;
+constexpr int kInputPositions = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const auto* params =
+      reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
+  TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* positions = GetInput(context, node, kInputPositions);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  // Only INT32 positions are supported.
+  TF_LITE_ENSURE_EQ(context, positions->type, kTfLiteInt32);
+  // Check that input and output types match.
+  TF_LITE_ENSURE_EQ(context, input->type, output->type);
+  // TODO(mgubin): only 1D positions are currently supported.
+  TF_LITE_ENSURE_EQ(context, NumDimensions(positions), 1);
+  // TODO(mgubin): Only default axis == 0 is supported.
+  // Check conditions for different types.
+  switch (input->type) {
+    case kTfLiteFloat32:
+    case kTfLiteUInt8:
+    case kTfLiteInt32: {
+      // Fully supported by reference_ops::Gather.
+    } break;
+
+    case kTfLiteString: {
+      // Only 1D input is supported.
+      TF_LITE_ENSURE_EQ(context, NumDimensions(input), 1);
+    } break;
+    default:
+      context->ReportError(context,
+                           "Only float32 and string types are supported");
+      return kTfLiteError;
+  }
+  const int num_dimensions =
+      NumDimensions(input) + NumDimensions(positions) - 1;
+  TF_LITE_ENSURE(context, params->axis < num_dimensions);
+  TfLiteIntArray* output_shape = TfLiteIntArrayCreate(num_dimensions);
+  int output_index = 0;
+  for (int i = 0; i < params->axis; ++i) {
+    output_shape->data[output_index++] = input->dims->data[i];
+  }
+  for (int i = 0; i < positions->dims->size; ++i) {
+    output_shape->data[output_index++] = positions->dims->data[i];
+  }
+  for (int i = params->axis + 1; i < input->dims->size; ++i) {
+    output_shape->data[output_index++] = input->dims->data[i];
+  }
+  return context->ResizeTensor(context, output, output_shape);
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* positions = GetInput(context, node, kInputPositions);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const int input_rank = NumDimensions(input);
+#define TF_LITE_GATHER(data_type, index_type)                            \
+  optimized_ops::Gather(                                                 \
+      GetTensorData<data_type>(input), GetTensorDims(input), input_rank, \
+      GetTensorData<index_type>(positions), GetTensorDims(positions),    \
+      GetTensorData<data_type>(output), GetTensorDims(output));
+  switch (input->type) {
+    case kTfLiteFloat32:
+      TF_LITE_GATHER(float, int32_t);
+      break;
+    case kTfLiteUInt8:
+      TF_LITE_GATHER(uint8_t, int32_t);
+      break;
+    case kTfLiteInt32:
+      TF_LITE_GATHER(int32_t, int32_t);
+      break;
+    case kTfLiteString: {
+      DynamicBuffer buffer;
+      const int32* indexes = positions->data.i32;
+      const int num_strings = GetStringCount(input);
+      for (int i = 0; i < positions->dims->data[0]; ++i) {
+        const int pos = indexes[i];
+        TF_LITE_ENSURE(context, pos < num_strings);
+        const auto string_ref = GetString(input, pos);
+        buffer.AddString(string_ref.str, string_ref.len);
+      }
+      buffer.WriteToTensor(output);
+    } break;
+    default:
+      return kTfLiteError;
+  }
+#undef TF_LITE_GATHER
+  return kTfLiteOk;
+}
+}  // namespace gather
+
+TfLiteRegistration* Register_GATHER() {
+  static TfLiteRegistration r = {nullptr, nullptr, gather::Prepare,
+                                 gather::Eval};
+  return &r;
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/gather_test.cc b/tensorflow/contrib/lite/kernels/gather_test.cc
new file mode 100644
index 0000000000..6343d3b4ef
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/gather_test.cc
@@ -0,0 +1,121 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class GatherOpModel : public SingleOpModel {
+ public:
+  GatherOpModel(std::initializer_list<int> input_shape, TensorType input_type,
+                std::initializer_list<int> positions_shape) {
+    input_ = AddInput(input_type);
+    positions_ = AddInput(TensorType_INT32);
+    output_ = AddOutput(input_type);
+    SetBuiltinOp(BuiltinOperator_GATHER, BuiltinOptions_GatherOptions,
+                 CreateGatherOptions(builder_, 0).Union());
+    BuildInterpreter({input_shape, positions_shape});
+  }
+
+  void SetInputFloat(std::initializer_list<float> data) {
+    PopulateTensor<float>(input_, data);
+  }
+
+  void SetInputUint8(std::initializer_list<uint8_t> data) {
+    PopulateTensor<uint8_t>(input_, data);
+  }
+
+  void SetInput(std::initializer_list<string> data) {
+    PopulateStringTensor(input_, data);
+  }
+
+  void SetPositions(std::initializer_list<int32> data) {
+    PopulateTensor<int32>(positions_, data);
+  }
+
+  std::vector<float> GetOutputFloat() { return ExtractVector<float>(output_); }
+  std::vector<uint8_t> GetOutputUint8() {
+    return ExtractVector<uint8_t>(output_);
+  }
+  std::vector<string> GetOutputString() {
+    return ExtractVector<string>(output_);
+  }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ protected:
+  int input_;
+  int positions_;
+  int output_;
+};
+
+TEST(GatherOpTest, Shuffle) {
+  GatherOpModel m({2, 2}, TensorType_FLOAT32, {2});
+  m.SetInputFloat({-2.0, 0.2, 0.7, 0.8});
+  m.SetPositions({1, 0});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputFloat(),
+              ElementsAreArray(ArrayFloatNear({0.7, 0.8, -2, 0.2})));
+}
+
+TEST(FloatGatherOpTest, Duplicate) {
+  GatherOpModel m({1, 2, 2}, TensorType_FLOAT32, {2});
+  m.SetInputFloat({-2.0, 0.2, 0.7, 0.8});
+  m.SetPositions({0, 0});
+  m.Invoke();
+  EXPECT_THAT(
+      m.GetOutputFloat(),
+      ElementsAreArray(ArrayFloatNear({-2, 0.2, 0.7, 0.8, -2, 0.2, 0.7, 0.8})));
+}
+
+TEST(FloatGatherOpTest, Slice) {
+  GatherOpModel m({4, 1}, TensorType_FLOAT32, {2});
+  m.SetInputFloat({-2.0, 0.2, 0.7, 0.8});
+  m.SetPositions({1, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputFloat(), ElementsAreArray(ArrayFloatNear({0.2, 0.8})));
+}
+
+TEST(Uint8tGatherOpTest, Shuffle) {
+  GatherOpModel m({2, 2}, TensorType_UINT8, {2});
+  m.SetInputUint8({133, 134, 14, 15});
+  m.SetPositions({1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutputUint8(), ElementsAreArray({14, 15, 133, 134}));
+}
+
+TEST(GatherOpTest, SimpleString) {
+  GatherOpModel m({3}, TensorType_STRING, {2});
+  m.SetInput({"A", "B", "C"});
+  m.SetPositions({0, 2});
+  m.Invoke();
+  ASSERT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
+  EXPECT_THAT(m.GetOutputString(), ElementsAreArray({"A", "C"}));
+}
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc
index 12d360f15c..3d1edeef01 100644
--- a/tensorflow/contrib/lite/kernels/register.cc
+++ b/tensorflow/contrib/lite/kernels/register.cc
@@ -49,6 +49,7 @@ TfLiteRegistration* Register_RESHAPE();
 TfLiteRegistration* Register_RESIZE_BILINEAR();
 TfLiteRegistration* Register_SKIP_GRAM();
 TfLiteRegistration* Register_SPACE_TO_DEPTH();
+TfLiteRegistration* Register_GATHER();
 
 BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_RELU, Register_RELU());
@@ -84,6 +85,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR());
   AddBuiltin(BuiltinOperator_SKIP_GRAM, Register_SKIP_GRAM());
   AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH());
+  AddBuiltin(BuiltinOperator_GATHER, Register_GATHER());
 }
 
 TfLiteRegistration* BuiltinOpResolver::FindOp(
diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc
index f716ba8741..b69f2b3e4b 100644
--- a/tensorflow/contrib/lite/kernels/test_util.cc
+++ b/tensorflow/contrib/lite/kernels/test_util.cc
@@ -180,4 +180,17 @@ int32_t SingleOpModel::GetTensorSize(int index) const {
   return total_size;
 }
 
+template <>
+std::vector<string> SingleOpModel::ExtractVector(int index) {
+  TfLiteTensor* tensor_ptr = interpreter_->tensor(index);
+  CHECK(tensor_ptr != nullptr);
+  const int num_strings = GetStringCount(tensor_ptr);
+  std::vector<string> result;
+  result.reserve(num_strings);
+  for (int i = 0; i < num_strings; ++i) {
+    const auto str = GetString(tensor_ptr, i);
+    result.emplace_back(str.str, str.len);
+  }
+  return result;
+}
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h
index adcdeddbfc..531c1366a8 100644
--- a/tensorflow/contrib/lite/kernels/test_util.h
+++ b/tensorflow/contrib/lite/kernels/test_util.h
@@ -192,6 +192,9 @@ class SingleOpModel {
   std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
 };
 
+// Strings have a special implementation that is in test_util.cc
+template <>
+std::vector<string> SingleOpModel::ExtractVector(int index);
 }  // namespace tflite
 
 #endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_TEST_UTIL_H_
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index 2f508d9d60..4ef2c942c1 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -508,6 +508,16 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type,
       builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_GATHER: {
+      TfLiteGatherParams* params = MallocPOD<TfLiteGatherParams>();
+      params->axis = 0;
+      if (auto* gather_params = op->builtin_options_as_GatherOptions()) {
+        params->axis = gather_params->axis();
+      }
+
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
   }
   return builtin_data;
 }
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 86f2afbaf2..6b93a70bff 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -306,6 +306,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       case tflite::BuiltinOperator_CALL:
       case tflite::BuiltinOperator_SKIP_GRAM:
       case tflite::BuiltinOperator_RELU1:
+      case tflite::BuiltinOperator_GATHER:
         FATAL("Op code %d is currently not delegated to NNAPI", builtin);
         nn_op_type = -1;  // set to invalid
         break;
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index d1302bdc63..8b48543fc8 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -106,6 +106,7 @@ enum BuiltinOperator : byte {
   EMBEDDING_LOOKUP_SPARSE = 33,
   PAD = 34,
   UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
 }
 
 // Options for the builtin operators.
@@ -132,6 +133,7 @@ union BuiltinOptions {
   EmbeddingLookupSparseOptions,
   MulOptions,
   PadOptions,
+  GatherOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -276,6 +278,10 @@ table EmbeddingLookupSparseOptions {
   combiner:CombinerType;
 }
 
+table GatherOptions {
+  axis: int;
+}
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
@@ -351,4 +357,3 @@ table Model {
 }
 
 root_type Model;
-
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
old mode 100755
new mode 100644
index ba645c2764..7de205e1e4
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -1,4 +1,4 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -94,6 +94,9 @@ struct SpaceToDepthOptionsT;
 struct EmbeddingLookupSparseOptions;
 struct EmbeddingLookupSparseOptionsT;
 
+struct GatherOptions;
+struct GatherOptionsT;
+
 struct OperatorCode;
 struct OperatorCodeT;
 
@@ -172,11 +175,12 @@ enum BuiltinOperator {
   BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
   BuiltinOperator_PAD = 34,
   BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  BuiltinOperator_GATHER = 36,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN
+  BuiltinOperator_MAX = BuiltinOperator_GATHER
 };
 
-inline BuiltinOperator (&EnumValuesBuiltinOperator())[33] {
+inline BuiltinOperator (&EnumValuesBuiltinOperator())[34] {
   static BuiltinOperator values[] = {
       BuiltinOperator_ADD,
       BuiltinOperator_AVERAGE_POOL_2D,
@@ -210,7 +214,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[33] {
       BuiltinOperator_CUSTOM,
       BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
       BuiltinOperator_PAD,
-      BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN};
+      BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
+      BuiltinOperator_GATHER};
   return values;
 }
 
@@ -251,6 +256,7 @@ inline const char **EnumNamesBuiltinOperator() {
                                 "EMBEDDING_LOOKUP_SPARSE",
                                 "PAD",
                                 "UNIDIRECTIONAL_SEQUENCE_RNN",
+                                "GATHER",
                                 nullptr};
   return names;
 }
@@ -284,11 +290,12 @@ enum BuiltinOptions {
   BuiltinOptions_EmbeddingLookupSparseOptions = 20,
   BuiltinOptions_MulOptions = 21,
   BuiltinOptions_PadOptions = 22,
+  BuiltinOptions_GatherOptions = 23,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_PadOptions
+  BuiltinOptions_MAX = BuiltinOptions_GatherOptions
 };
 
-inline BuiltinOptions (&EnumValuesBuiltinOptions())[23] {
+inline BuiltinOptions (&EnumValuesBuiltinOptions())[24] {
   static BuiltinOptions values[] = {
       BuiltinOptions_NONE,
       BuiltinOptions_Conv2DOptions,
@@ -312,7 +319,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[23] {
       BuiltinOptions_SpaceToDepthOptions,
       BuiltinOptions_EmbeddingLookupSparseOptions,
       BuiltinOptions_MulOptions,
-      BuiltinOptions_PadOptions};
+      BuiltinOptions_PadOptions,
+      BuiltinOptions_GatherOptions};
   return values;
 }
 
@@ -340,6 +348,7 @@ inline const char **EnumNamesBuiltinOptions() {
                                 "EmbeddingLookupSparseOptions",
                                 "MulOptions",
                                 "PadOptions",
+                                "GatherOptions",
                                 nullptr};
   return names;
 }
@@ -468,6 +477,11 @@ struct BuiltinOptionsTraits<PadOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
 };
 
+template <>
+struct BuiltinOptionsTraits<GatherOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -735,6 +749,16 @@ struct BuiltinOptionsUnion {
                ? reinterpret_cast<const PadOptionsT *>(value)
                : nullptr;
   }
+  GatherOptionsT *AsGatherOptions() {
+    return type == BuiltinOptions_GatherOptions
+               ? reinterpret_cast<GatherOptionsT *>(value)
+               : nullptr;
+  }
+  const GatherOptionsT *AsGatherOptions() const {
+    return type == BuiltinOptions_GatherOptions
+               ? reinterpret_cast<const GatherOptionsT *>(value)
+               : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj,
@@ -2681,6 +2705,59 @@ CreateEmbeddingLookupSparseOptions(
     const EmbeddingLookupSparseOptionsT *_o,
     const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct GatherOptionsT : public flatbuffers::NativeTable {
+  typedef GatherOptions TableType;
+  int32_t axis;
+  GatherOptionsT() : axis(0) {}
+};
+
+struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef GatherOptionsT NativeTableType;
+  enum { VT_AXIS = 4 };
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+  }
+  GatherOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      GatherOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GatherOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GatherOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_axis(int32_t axis) {
+    fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0);
+  }
+  explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
+  flatbuffers::Offset<GatherOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GatherOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0) {
+  GatherOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GatherOptions> CreateGatherOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct OperatorCodeT : public flatbuffers::NativeTable {
   typedef OperatorCode TableType;
   BuiltinOperator builtin_code;
@@ -2918,6 +2995,11 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
                ? static_cast<const PadOptions *>(builtin_options())
                : nullptr;
   }
+  const GatherOptions *builtin_options_as_GatherOptions() const {
+    return builtin_options_type() == BuiltinOptions_GatherOptions
+               ? static_cast<const GatherOptions *>(builtin_options())
+               : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -3074,6 +3156,12 @@ inline const PadOptions *Operator::builtin_options_as<PadOptions>() const {
   return builtin_options_as_PadOptions();
 }
 
+template <>
+inline const GatherOptions *Operator::builtin_options_as<GatherOptions>()
+    const {
+  return builtin_options_as_GatherOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -4658,6 +4746,45 @@ CreateEmbeddingLookupSparseOptions(
   return tflite::CreateEmbeddingLookupSparseOptions(_fbb, _combiner);
 }
 
+inline GatherOptionsT *GatherOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new GatherOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void GatherOptions::UnPackTo(
+    GatherOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = axis();
+    _o->axis = _e;
+  };
+}
+
+inline flatbuffers::Offset<GatherOptions> GatherOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateGatherOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const GatherOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _axis = _o->axis;
+  return tflite::CreateGatherOptions(_fbb, _axis);
+}
+
 inline OperatorCodeT *OperatorCode::UnPack(
     const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorCodeT();
@@ -5134,6 +5261,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier,
       auto ptr = reinterpret_cast<const PadOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<const GatherOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default:
       return false;
   }
@@ -5246,6 +5377,10 @@ inline void *BuiltinOptionsUnion::UnPack(
       auto ptr = reinterpret_cast<const PadOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<const GatherOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default:
       return nullptr;
   }
@@ -5345,6 +5480,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(
       auto ptr = reinterpret_cast<const PadOptionsT *>(value);
       return CreatePadOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<const GatherOptionsT *>(value);
+      return CreateGatherOptions(_fbb, ptr, _rehasher).Union();
+    }
     default:
       return 0;
   }
@@ -5454,6 +5593,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u)
       value = new PadOptionsT(*reinterpret_cast<PadOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_GatherOptions: {
+      value = new GatherOptionsT(*reinterpret_cast<GatherOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -5571,6 +5714,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<GatherOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default:
       break;
   }
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index b9c5cbe715..b63c0c058c 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -25,6 +25,7 @@ gen_zipped_test_files(
         "depthwiseconv.zip",
         "fully_connected.zip",
         "fused_batch_norm.zip",
+        "gather.zip",
         "global_batch_norm.zip",
         "l2_pool.zip",
         "l2norm.zip",
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 4848ca8062..4c01fedb1e 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -94,6 +94,8 @@ KNOWN_BUGS = {
     r"softmax.*input_shape=\[1,3,4,3\]": "67749831",
     # SpaceToDepth only supports float32.
     r"space_to_depth.*(float16|int32|uint8|int64)": "68018134",
+    # Gather doesn't support int64 indices.
+    r"gather.*indices_dtype=int64": "XXXX",
 }
 
 
@@ -120,7 +122,7 @@ def toco_options(data_types,
   # to change
   if data_types[0] == "QUANTIZED_UINT8":
     inference_type = "QUANTIZED_UINT8"
-  s = (" --input_types=%s" % ",".join(data_types) +
+  s = (" --input_data_types=%s" % ",".join(data_types) +
        " --inference_type=%s" % inference_type +
        " --input_format=TENSORFLOW_GRAPHDEF" + " --output_format=TFLITE" +
        " --input_arrays=%s" % ",".join(input_arrays) +
@@ -704,6 +706,46 @@ def make_mul_tests(zip_path):
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
+def make_gather_tests(zip_path):
+  """Make a set of tests to do gather."""
+
+  test_parameters = [{
+      # TODO(mgubin): add string tests when they are supported by Toco.
+      # TODO(mgubin): add tests for Nd indices when they are supported by
+      # TfLite.
+      # TODO(mgubin): add tests for axis != 0 when it is supported by TfLite.
+      "params_dtype": [tf.float32, tf.int32],
+      "params_shape": [[10], [1, 2, 20]],
+      "indices_dtype": [tf.int32],
+      "indices_shape": [[3], [5]],
+      "axis": [0],  # axis!=0 is GatherV2
+  }]
+
+  def build_graph(parameters):
+    """Build the gather op testing graph."""
+    params = tf.placeholder(
+        dtype=parameters["params_dtype"],
+        name="params",
+        shape=parameters["params_shape"])
+    indices = tf.placeholder(
+        dtype=parameters["indices_dtype"],
+        name="indices",
+        shape=parameters["indices_shape"])
+    out = tf.gather(params, indices, axis=parameters["axis"])
+    return [params, indices], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    params = create_tensor_data(parameters["params_dtype"],
+                                parameters["params_shape"])
+    indices = create_tensor_data(parameters["indices_dtype"],
+                                 parameters["indices_shape"], 0,
+                                 parameters["params_shape"][0] - 1)
+    return [params, indices], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [params, indices])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
 def make_global_batch_norm_tests(zip_path):
   """Make a set of tests to do batch_norm_with_global_normalization."""
 
@@ -1190,6 +1232,7 @@ def main(unused_args):
         "concat.zip": make_concatenation_tests,
         "fully_connected.zip": make_fully_connected_tests,
         "global_batch_norm.zip": make_global_batch_norm_tests,
+        "gather.zip": make_gather_tests,
         "fused_batch_norm.zip": make_fused_batch_norm_tests,
         "l2norm.zip": make_l2norm_tests,
         "local_response_norm.zip": make_local_response_norm_tests,
diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
index 76e8767617..29f0c68ba4 100644
--- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
+++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
@@ -248,6 +248,7 @@ INSTANTIATE_TESTS(conv)
 INSTANTIATE_TESTS(depthwiseconv)
 INSTANTIATE_TESTS(fully_connected)
 INSTANTIATE_TESTS(fused_batch_norm)
+INSTANTIATE_TESTS(gather)
 INSTANTIATE_TESTS(global_batch_norm)
 INSTANTIATE_TESTS(l2norm)
 INSTANTIATE_TESTS(l2_pool)
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index 308dadfdeb..786d3da7cf 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -821,6 +821,7 @@ void ProcessGatherOperator(Model* model, GatherOperator* op) {
 
   // Copy the input dimensions to the output except for dimension 0,
   // where the dimension of indices_shape is used.
+  // TODO(mgubin): if axis != 0 this is not true, change when it's supported.
   auto output_dims = output_array.mutable_shape()->mutable_dims();
   output_dims->push_back(indices_shape.dims(0));
   for (int dim = 1; dim < input_shape.dimensions_count(); dim++) {
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index a481d8801c..a53c751d3c 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -1208,7 +1208,8 @@ struct FloorOperator : Operator {
 // TensorFlow equivalent: Gather
 struct GatherOperator : Operator {
   GatherOperator() : Operator(OperatorType::kGather) {}
-  int input_rank;
+  int axis = 0;
+  int input_rank = 0;
 };
 
 // ResizeBilinear operator. It resizes input images with bilinear interpolation.
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 8d25336bb7..7fee47a90b 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -211,6 +211,22 @@ class FullyConnected
   }
 };
 
+class Gather : public BuiltinOperator<GatherOperator, ::tflite::GatherOptions,
+                                      ::tflite::BuiltinOptions_GatherOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    return ::tflite::CreateGatherOptions(*builder, op.axis);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->axis = options.axis();
+  }
+};
+
 class Svdf : public BuiltinOperator<SvdfOperator, ::tflite::SVDFOptions,
                                     ::tflite::BuiltinOptions_SVDFOptions> {
  public:
@@ -564,6 +580,8 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList() {
                                OperatorType::kDepthwiseConv));
   ops.emplace_back(new FullyConnected(::tflite::BuiltinOperator_FULLY_CONNECTED,
                                       OperatorType::kFullyConnected));
+  ops.emplace_back(
+      new Gather(::tflite::BuiltinOperator_GATHER, OperatorType::kGather));
   ops.emplace_back(
       new L2Normalization(::tflite::BuiltinOperator_L2_NORMALIZATION,
                           OperatorType::kL2Normalization));
@@ -606,8 +624,6 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList() {
       "DEQUANTIZE", OperatorType::kDequantize));
   ops.emplace_back(
       new SimpleOperator<FloorOperator>("FLOOR", OperatorType::kFloor));
-  ops.emplace_back(
-      new SimpleOperator<GatherOperator>("GATHER", OperatorType::kGather));
   ops.emplace_back(
       new SimpleOperator<ReluOperator>("RELU", OperatorType::kRelu));
   ops.emplace_back(
diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
index fe079e833d..caecbd0325 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
@@ -101,7 +101,6 @@ TEST_F(OperatorTest, SimpleOperators) {
   CheckSimpleOperator<DequantizeOperator>("DEQUANTIZE",
                                           OperatorType::kDequantize);
   CheckSimpleOperator<FloorOperator>("FLOOR", OperatorType::kFloor);
-  CheckSimpleOperator<GatherOperator>("GATHER", OperatorType::kGather);
   CheckSimpleOperator<ReluOperator>("RELU", OperatorType::kRelu);
   CheckSimpleOperator<Relu1Operator>("RELU1", OperatorType::kRelu1);
   CheckSimpleOperator<Relu6Operator>("RELU6", OperatorType::kRelu6);
@@ -167,6 +166,13 @@ TEST_F(OperatorTest, CustomFullyConnected) {
             output_toco_op->fused_activation_function);
 }
 
+TEST_F(OperatorTest, BuiltinGather) {
+  GatherOperator op;
+  auto output_toco_op =
+      SerializeAndDeserialize(GetOperator("GATHER", OperatorType::kGather), op);
+  ASSERT_NE(nullptr, output_toco_op.get());
+}
+
 TEST_F(OperatorTest, BuiltinL2Pool) {
   L2PoolOperator op;
   op.stride_width = 123;
-- 
GitLab


From e92f85e78573fbf88accdf4b76535b0c70e7f674 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 14:54:39 -0800
Subject: [PATCH 1026/1225] Refactor encapsulate_subgraphs_pass in preparation
 for adding new functionality.

PiperOrigin-RevId: 179102453
---
 .../jit/encapsulate_subgraphs_pass.cc         | 894 ++++++++++++------
 1 file changed, 586 insertions(+), 308 deletions(-)

diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index dc06b7a402..407b7dcbfb 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -16,7 +16,11 @@ limitations under the License.
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 
 #include <functional>
+#include <memory>
 #include <numeric>
+#include <string>
+#include <unordered_map>
+#include <vector>
 
 #include "tensorflow/compiler/jit/graph_to_functiondef.h"
 #include "tensorflow/compiler/jit/legacy_flags/encapsulate_subgraphs_pass_flags.h"
@@ -72,15 +76,15 @@ bool AreAllParentsConst(const Node& n,
 
 void MarkGuaranteedConstants(
     const Graph& graph,
-    const std::vector<std::pair<Node*, Node*>>& src_arg_pairs) {
+    const std::vector<std::pair<const Node*, Node*>>& src_arg_pairs) {
   gtl::FlatSet<const Node*> guaranteed_const_nodes;
-  std::vector<Node*> srcs;
+  std::vector<const Node*> srcs;
   srcs.reserve(src_arg_pairs.size());
   for (const auto& src_arg : src_arg_pairs) {
     srcs.push_back(src_arg.first);
   }
   ReverseDFSFrom(graph, srcs, /*enter=*/nullptr,
-                 /*leave=*/[&guaranteed_const_nodes](Node* n) {
+                 /*leave=*/[&guaranteed_const_nodes](const Node* n) {
                    // TODO(vinuraja): Doesn't work in the presence of loops.
                    if (AreAllParentsConst(*n, guaranteed_const_nodes)) {
                      guaranteed_const_nodes.insert(n);
@@ -122,6 +126,11 @@ struct NodeSlot {
   };
 };
 
+// TODO(phawkins) add a canonical copy of these operator names and refactor
+// everything to use it.
+static const char* const kArgOp = "_Arg";
+static const char* const kRetValOp = "_Retval";
+
 class Encapsulator {
  public:
   Encapsulator(string group_attribute, Graph const* graph_in)
@@ -146,54 +155,167 @@ class Encapsulator {
   Status BuildOutputGraph(bool parallel_checking, Graph* graph_out);
 
  private:
-  // Returns the key attribute associated with a node. Returns the empty string
-  // if no key attribute is found.
-  string GetFunctionNameAttr(const Node* node) const;
-
   // A subgraph of the input, all marked with a common 'group_attribute'
   // value.
-  struct Subgraph {
+  class Subgraph {
+   public:
+    // Creates a graph to build the subgraph in, if it doesn't already exist,
+    // using the same op registry and versions as graph_in.
+    Node* MakeNodeImage(const Graph* graph_in, Node* node);
+
+    // Returns the graph the subgraph is being built in.
+    Graph* GetGraph() const;
+
+    // Builds a FunctionDef, and adds it to 'library'. The value of the
+    // 'group_attribute' annotations becomes the function name.  If
+    // 'reuse_existing_functions' is set, use an existing function with the same
+    // name, if any.  If 'rewrite_subgraph_fn' is set, it is applied to the
+    // subgraph before function conversion.
+    Status BuildFunctionDef(const string& name_in,
+                            const RewriteSubgraphFn& rewrite_subgraph_fn,
+                            bool reuse_existing_functions,
+                            FunctionLibraryDefinition* library);
+
+    // Adds the function call node to graph_out.
+    Status AddFunctionCallNode(
+        const std::unordered_map<const Node*, Node*>& node_images,
+        bool parallel_checking, Graph* graph_out);
+
+    // Returns the Node that inputs to the function should be wired up to.
+    Node* GetCallNodeForInputs() const;
+
+    // Returns the Node that outputs to the function should be wired up to.
+    Node* GetCallNodeForOutputs() const;
+
+    // Returns the index of the arg that the dst of edge should connect to.
+    int GetArgIndexForEdge(const Edge* edge) const;
+
+    // Returns the index of the result that the src of edge should connect to.
+    int GetResultIndexForEdge(const Edge* edge) const;
+
+    // Creates an _Arg node for the src node of edge, and add its index to
+    // args_by_src_, if none exists yet. Also adds its index to args_by_dst_,
+    // and adds the edge within the subgraph from the _Arg node to the image of
+    // the dst node.
+    Status RecordArg(const Edge* edge,
+                     const std::unordered_map<const Node*, Node*>& node_images,
+                     std::vector<std::pair<const Node*, Node*>>* src_arg_pairs);
+
+    // Creates a _Retval node for the src node of edge, and add it to results_,
+    // if none exists yet. If a new _Retval node is created, also adds the edge
+    // within the subgraph from the src to the _Retval node.
+    Status RecordResult(
+        const Edge* edge,
+        const std::unordered_map<const Node*, Node*>& node_images);
+
+   private:
+    // Builds a ParallelCheck op that compares the output of the original
+    // subgraph with the encapsulated subgraph.
+    Status BuildParallelCheckOp(
+        const std::unordered_map<const Node*, Node*>& node_images,
+        Graph* graph_out);
+
     // The subgraph extracted from the input graph, suitable for being turned
     // into a FunctionDef. Inputs are fed by _Arg nodes, and outputs are
     // returned by _Retval nodes.
-    std::unique_ptr<Graph> graph;
+    std::unique_ptr<Graph> graph_;
 
     // Which device are these nodes on? Used to assign a device to the call
     // node.
-    string device;
+    string device_;
 
     // NodeDef for the function call node.
-    NodeDef call_node_def;
+    NodeDef call_node_def_;
 
     // Function call node(s) in the output graph. Not owned.
     // If parallel_checking is enabled, 'call_node_inputs' is the function call
     // node to which inputs should be fed, and 'call_node_outputs' is the
     // parallel check op from which outputs should be read. If parallel checking
     // is disabled, both point to the function call node.
-    Node* call_node_inputs;
-    Node* call_node_outputs;
+    Node* call_node_inputs_;
+    Node* call_node_outputs_;
 
     // Maps from source (producer node/slot) and destination
     // (consumer node/slot) tensors in the input graph to _Arg numbers in
     // the subgraph. The source map is one-to-one, whereas the dest map may be
     // many-to-one.
-    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> args_by_src;
-    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> args_by_dst;
+    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> args_by_src_;
+    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> args_by_dst_;
 
     // The _Arg nodes in the subgraph, in order by argument number.
-    std::vector<Node*> args;
+    std::vector<Node*> args_;
 
     // Map from source tensor in the input graph to result #.
-    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> results;
+    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> results_;
   };
 
-  // Builds a ParallelCheck op that compares the output of the original subgraph
-  // with the encapsulated subgraph.
-  Status BuildParallelCheckOp(
+  // Returns the key attribute associated with a node in attr. Sets attr to the
+  // empty string if the attribute is not found.
+  Status GetFunctionNameAttr(const Node* node, string* attr) const;
+
+  // Copies edges local to a subgraph. Adds _Arg and _Retval nodes to subgraphs
+  // for data edges that cross subgraph boundaries.
+  Status CopySubgraphEdges(
+      const std::unordered_map<const Node*, Node*>& node_images,
+      std::vector<std::pair<const Node*, Node*>>* src_arg_pairs);
+
+  // Copies all marked nodes to a subgraph. Does nothing for unmarked nodes.
+  Status CopySubgraphNodes(std::unordered_map<const Node*, Node*>* node_images);
+
+  // Copies all nodes that aren't in a compiled subgraph to the output graph.
+  Status CopyNodesToOutputGraph(
+      bool parallel_checking, Graph* graph_out,
+      std::unordered_map<const Node*, Node*>* node_images);
+
+  // Adds function call nodes for each compiled subgraph.
+  Status AddFunctionCallNodes(
+      const std::unordered_map<const Node*, Node*>& node_images,
+      bool parallel_checking, Graph* graph_out);
+
+  // Finds the image of an edge source in the output graph. If the edge crosses
+  // a subgraph boundary it is the output of a call node, otherwise it is a node
+  // in the output graph.
+  Status FindOutputImageOfEdgeSrc(
+      const string& src_func_id, const string& dst_func_id,
+      const std::unordered_map<const Node*, Node*>& node_images,
+      const Node* original_src_node, Node** src_image);
+
+  // Finds an edge source slot in the output graph. If the edge crosses a
+  // subgraph boundary it is a slot on the output of a call node, otherwise it
+  // is a slot on a node in the output graph.
+  int FindOutputSlotOfEdgeSrc(const string& src_func_id,
+                              const string& dst_func_id, const Edge* edge);
+
+  // Finds the image of an edge destination in the output graph. If the edge
+  // crosses a subgraph boundary it is the input of a call node, otherwise it is
+  // a node in the output graph.
+  Status FindOutputImageOfEdgeDst(
+      const string& src_func_id, const string& dst_func_id,
+      const std::unordered_map<const Node*, Node*>& node_images,
+      const Node* original_dst_node, Node** dst_image);
+
+  // Finds an edge destination slot in the output graph. If the edge crosses a
+  // subgraph boundary it is a slot on the input of a call node, otherwise it is
+  // a slot on a node in the output graph.
+  int FindOutputSlotOfEdgeDst(const string& src_func_id,
+                              const string& dst_func_id, const Edge* edge);
+
+  // Copies a single edge to the output graph. The edge is either entirely
+  // within the output graph, or crosses into or out of a compiled subgraph.
+  Status CopyEdgeToOutputGraph(
+      const Edge* edge, const string& src_func_id, const string& dst_func_id,
+      const std::unordered_map<const Node*, Node*>& node_images,
+      bool parallel_checking, Graph* graph_out,
+      std::unordered_set<std::pair<NodeSlot, NodeSlot>, NodeSlot::PairHasher>*
+          edges_added);
+
+  // Adds all edges to the output graph.
+  Status AddEdgesToOutputGraph(
       const std::unordered_map<const Node*, Node*>& node_images,
-      const Subgraph& subgraph, Graph* graph_out, Node** parallel_check_op);
+      bool parallel_checking, Graph* graph_out);
 
   const string group_attribute_;
+  const string outside_compilation_attribute_;
   const Graph* graph_in_;
 
   std::unordered_map<string, Subgraph> subgraphs_;
@@ -201,230 +323,184 @@ class Encapsulator {
   TF_DISALLOW_COPY_AND_ASSIGN(Encapsulator);
 };
 
-// TODO(phawkins) add a canonical copy of these operator names and refactor
-// everything to use it.
-static const char* const kArgOp = "_Arg";
-static const char* const kRetValOp = "_Retval";
-
-// Returns the function name attached to 'node', or the empty string if there is
-// none.
-string Encapsulator::GetFunctionNameAttr(Node const* node) const {
-  string attr;
-  if (!GetNodeAttr(node->attrs(), group_attribute_, &attr).ok()) {
-    attr.clear();
-  }
-  return attr;
+Node* Encapsulator::Subgraph::GetCallNodeForInputs() const {
+  return call_node_inputs_;
 }
 
-Status Encapsulator::SplitIntoSubgraphs() {
-  Status s;
-
-  // Map from input graph nodes to subgraph nodes.
-  std::unordered_map<Node*, Node*> node_images;
-
-  std::vector<std::pair<Node*, Node*>> src_arg_pairs;
-  // Copy all marked nodes to a subgraph. Do nothing for unmarked nodes.
-  for (Node* node : graph_in_->op_nodes()) {
-    string func_id = GetFunctionNameAttr(node);
-
-    if (func_id.empty()) continue;
+Node* Encapsulator::Subgraph::GetCallNodeForOutputs() const {
+  return call_node_outputs_;
+}
 
-    Subgraph& subgraph = subgraphs_[func_id];
-    if (!subgraph.graph) {
-      subgraph.graph.reset(new Graph(graph_in_->op_registry()));
-      subgraph.graph->set_versions(graph_in_->versions());
-    }
+int Encapsulator::Subgraph::GetArgIndexForEdge(const Edge* edge) const {
+  return args_by_dst_.at(NodeSlot(edge->dst(), edge->dst_input()));
+}
 
-    Node* image = subgraph.graph->CopyNode(node);
-    image->ClearAttr(group_attribute_);
-    node_images[node] = image;
+int Encapsulator::Subgraph::GetResultIndexForEdge(const Edge* edge) const {
+  return results_.at(NodeSlot(edge->src(), edge->src_output()));
+}
 
-    if (subgraph.device.empty()) {
-      subgraph.device = node->assigned_device_name().empty()
-                            ? node->requested_device()
-                            : node->assigned_device_name();
-    }
+Node* Encapsulator::Subgraph::MakeNodeImage(const Graph* graph_in, Node* node) {
+  if (!graph_) {
+    graph_.reset(new Graph(graph_in->op_registry()));
+    graph_->set_versions(graph_in->versions());
   }
 
-  // Copy edges local to a subgraph. Add _Arg and _Retval nodes to subgraphs for
-  // data edges that cross subgraph boundaries.
-  for (const Edge* edge : graph_in_->edges()) {
-    string src_func_id = GetFunctionNameAttr(edge->src());
-    string dst_func_id = GetFunctionNameAttr(edge->dst());
-    Node* src_image = gtl::FindWithDefault(node_images, edge->src(), nullptr);
-    Node* dst_image = gtl::FindWithDefault(node_images, edge->dst(), nullptr);
-
-    // Copy edges that are local to a subgraph.
-    if (!src_func_id.empty() && src_func_id == dst_func_id) {
-      Graph* g = subgraphs_[src_func_id].graph.get();
-      if (edge->IsControlEdge()) {
-        g->AddControlEdge(src_image, dst_image);
-      } else {
-        g->AddEdge(src_image, edge->src_output(), dst_image, edge->dst_input());
-      }
-      continue;
-    }
-
-    // Ignore cross-boundary control edges for right now. We will lift them
-    // onto the enclosing call operators in BuildOutputGraph().
-    if (edge->IsControlEdge()) continue;
+  if (device_.empty()) {
+    device_ = node->assigned_device_name().empty()
+                  ? node->requested_device()
+                  : node->assigned_device_name();
+  }
 
-    // Add 'src' as an output of its subgraph, if applicable.
-    if (!src_func_id.empty()) {
-      Subgraph& src_subgraph = subgraphs_[src_func_id];
-      int ret_index = src_subgraph.results.size();
-      if (src_subgraph.results
-              .emplace(NodeSlot(edge->src(), edge->src_output()), ret_index)
-              .second) {
-        // Create a new _Retval node
-        DataType dtype = edge->src()->output_type(edge->src_output());
-
-        if (IsRefType(dtype)) {
-          return errors::InvalidArgument(
-              "Ref Tensors (e.g., Variables) are not supported: tensor ",
-              edge->src()->name(), ":", edge->src_output());
-        }
+  return graph_->CopyNode(node);
+}
 
-        NodeDef ret_def;
-        ret_def.set_op(kRetValOp);
-        ret_def.set_name(strings::StrCat(edge->src()->name(), "_",
-                                         edge->src_output(), "_retval"));
-        AddNodeAttr("T", dtype, &ret_def);
-        AddNodeAttr("index", ret_index, &ret_def);
-        Node* ret = src_subgraph.graph->AddNode(ret_def, &s);
-        if (!s.ok()) return s;
-
-        // Add an edge from 'src' to _Retval.
-        src_subgraph.graph->AddEdge(src_image, edge->src_output(), ret, 0);
-      }
+Graph* Encapsulator::Subgraph::GetGraph() const { return graph_.get(); }
+
+Status Encapsulator::Subgraph::RecordArg(
+    const Edge* edge, const std::unordered_map<const Node*, Node*>& node_images,
+    std::vector<std::pair<const Node*, Node*>>* src_arg_pairs) {
+  Node* src_node = edge->src();
+  int src_slot = edge->src_output();
+  std::unordered_map<NodeSlot, int, NodeSlot::Hasher>::iterator iter;
+  bool inserted;
+  std::tie(iter, inserted) =
+      args_by_src_.emplace(NodeSlot(src_node, src_slot), args_by_src_.size());
+  int arg_index = iter->second;
+  if (inserted) {
+    // Look at the type of the destination not the source, since Ref output
+    // Tensors can be automatically cast to non-Ref Tensors at the destination.
+    DataType dtype = edge->dst()->input_type(edge->dst_input());
+
+    if (IsRefType(dtype)) {
+      return errors::InvalidArgument(
+          "Ref Tensors (e.g., Variables) are not supported as args: tensor ",
+          src_node->name(), ":", src_slot);
     }
 
-    // Add 'dst' as an input of its subgraph, if applicable.
-    if (!dst_func_id.empty()) {
-      Subgraph& dst_subgraph = subgraphs_[dst_func_id];
-
-      // Create an _Arg node for this tensor, if none exists yet.
-      std::unordered_map<NodeSlot, int, NodeSlot::Hasher>::iterator iter;
-      bool inserted;
-      std::tie(iter, inserted) = dst_subgraph.args_by_src.emplace(
-          NodeSlot(edge->src(), edge->src_output()), dst_subgraph.args.size());
-      int arg_index = iter->second;
-      if (inserted) {
-        // This is the first time we have seen this tensor. Create an _Arg node.
-        DataType dtype = edge->dst()->input_type(edge->dst_input());
-
-        if (IsRefType(dtype)) {
-          return errors::InvalidArgument(
-              "Ref Tensors (e.g., Variables) are not supported: tensor ",
-              edge->src()->name(), ":", edge->src_output());
-        }
-
-        NodeDef arg_def;
-        NodeDefBuilder builder(strings::StrCat(edge->src()->name(), "_",
-                                               edge->src_output(), "_arg"),
-                               kArgOp);
-        builder.Attr("T", dtype);
-        builder.Attr("index", arg_index);
+    NodeDef arg_def;
+    NodeDefBuilder builder(
+        strings::StrCat(src_node->name(), "_", src_slot, "_arg"), kArgOp);
+    builder.Attr("T", dtype);
+    builder.Attr("index", arg_index);
+    Status s = builder.Finalize(&arg_def);
+    if (!s.ok()) return s;
 
-        s = builder.Finalize(&arg_def);
-        if (!s.ok()) return s;
+    Node* arg = graph_->AddNode(arg_def, &s);
+    if (!s.ok()) return s;
 
-        Node* arg = dst_subgraph.graph->AddNode(arg_def, &s);
-        if (!s.ok()) return s;
-        src_arg_pairs.push_back({edge->src(), arg});
+    src_arg_pairs->push_back({src_node, arg});
+    args_.push_back(arg);
+  }
+  Node* dst_node = edge->dst();
+  Node* dst_image = node_images.at(dst_node);
+  int dst_slot = edge->dst_input();
+  args_by_dst_[NodeSlot(dst_node, dst_slot)] = arg_index;
+  graph_->AddEdge(args_[arg_index], 0, dst_image, dst_slot);
+  return Status::OK();
+}
 
-        dst_subgraph.args.push_back(arg);
-      }
-      // Add an edge from the _Arg node to 'dst' in the subgraph.
-      dst_subgraph.args_by_dst[NodeSlot(edge->dst(), edge->dst_input())] =
-          arg_index;
-      dst_subgraph.graph->AddEdge(dst_subgraph.args[arg_index], 0, dst_image,
-                                  edge->dst_input());
+Status Encapsulator::Subgraph::RecordResult(
+    const Edge* edge,
+    const std::unordered_map<const Node*, Node*>& node_images) {
+  Node* src_node = edge->src();
+  Node* src_image = node_images.at(src_node);
+  int src_slot = edge->src_output();
+  std::unordered_map<NodeSlot, int, NodeSlot::Hasher>::iterator iter;
+  bool inserted;
+  std::tie(iter, inserted) =
+      results_.emplace(NodeSlot(src_node, src_slot), results_.size());
+  int ret_index = iter->second;
+  if (inserted) {
+    DataType dtype = src_node->output_type(src_slot);
+
+    if (IsRefType(dtype)) {
+      return errors::InvalidArgument(
+          "Ref Tensors (e.g., Variables) are not supported as results: tensor ",
+          src_node->name(), ":", src_slot);
     }
-  }
 
-  MarkGuaranteedConstants(*graph_in_, src_arg_pairs);
+    NodeDef ret_def;
+    NodeDefBuilder builder(
+        strings::StrCat(src_node->name(), "_", src_slot, "_retval"), kRetValOp);
+    builder.Attr("T", dtype);
+    builder.Attr("index", ret_index);
+    builder.Input(src_image->name(), src_slot, dtype);
+    Status s = builder.Finalize(&ret_def);
+    if (!s.ok()) return s;
+    Node* ret = graph_->AddNode(ret_def, &s);
+    if (!s.ok()) return s;
 
-  for (auto& entry : subgraphs_) {
-    FixupSourceAndSinkEdges(entry.second.graph.get());
+    graph_->AddEdge(src_image, src_slot, ret, 0);
   }
-
-  return s;
+  return Status::OK();
 }
 
-Status Encapsulator::BuildFunctionDefs(
-    const RewriteSubgraphFn& rewrite_subgraph_fn, bool reuse_existing_functions,
-    FunctionLibraryDefinition* library) {
-  // For each subgraph, build a FunctionDef.
-  for (auto& subgraph_entry : subgraphs_) {
-    string name = subgraph_entry.first;
-    Subgraph& subgraph = subgraph_entry.second;
-
-    subgraph.call_node_def.set_op(name);
-    subgraph.call_node_def.set_name(name);
-    subgraph.call_node_def.set_device(subgraph.device);
-
-    if (rewrite_subgraph_fn) {
-      // Initialize the input and output permutations to the identity.
-      std::vector<int> input_permutation(subgraph.args_by_src.size());
-      std::iota(input_permutation.begin(), input_permutation.end(), 0);
-      std::vector<int> output_permutation(subgraph.results.size());
-      std::iota(output_permutation.begin(), output_permutation.end(), 0);
-
-      TF_RETURN_IF_ERROR(
-          rewrite_subgraph_fn(&subgraph.graph, &input_permutation,
-                              &output_permutation, &subgraph.call_node_def));
-
-      // Apply the input/output permutations to the 'args_by_...' and 'results'
-      // mappings in 'subgraph', so when we build edges in BuildOutputGraph() we
-      // connect them to the right input/output positions.
-      if (input_permutation.size() != subgraph.args_by_src.size()) {
-        return errors::InvalidArgument("Input permutation has incorrect size.");
-      }
-      if (output_permutation.size() != subgraph.results.size()) {
-        return errors::InvalidArgument(
-            "Output permutation has incorrect size.");
-      }
-      for (auto& arg : subgraph.args_by_src) {
-        arg.second = input_permutation[arg.second];
-      }
-      for (auto& arg : subgraph.args_by_dst) {
-        arg.second = input_permutation[arg.second];
-      }
-      for (auto& result : subgraph.results) {
-        result.second = output_permutation[result.second];
-      }
-
-      name = subgraph.call_node_def.op();
+Status Encapsulator::Subgraph::BuildFunctionDef(
+    const string& name_in, const RewriteSubgraphFn& rewrite_subgraph_fn,
+    bool reuse_existing_functions, FunctionLibraryDefinition* library) {
+  // name_in is copied here because name may be modified below if
+  // rewrite_subgraph_fn is true.
+  string name = name_in;
+  call_node_def_.set_op(name);
+  call_node_def_.set_name(name);
+  call_node_def_.set_device(device_);
+
+  if (rewrite_subgraph_fn) {
+    // Initialize the input and output permutations to the identity.
+    std::vector<int> input_permutation(args_by_src_.size());
+    std::iota(input_permutation.begin(), input_permutation.end(), 0);
+    std::vector<int> output_permutation(results_.size());
+    std::iota(output_permutation.begin(), output_permutation.end(), 0);
+
+    TF_RETURN_IF_ERROR(rewrite_subgraph_fn(
+        &graph_, &input_permutation, &output_permutation, &call_node_def_));
+
+    // Apply the input/output permutations to the 'args_by_...' and 'results_'
+    // mappings, so when we build edges in BuildOutputGraph() we
+    // connect them to the right input/output positions.
+    if (input_permutation.size() != args_by_src_.size()) {
+      return errors::InvalidArgument("Input permutation has incorrect size.");
+    }
+    if (output_permutation.size() != results_.size()) {
+      return errors::InvalidArgument("Output permutation has incorrect size.");
+    }
+    for (auto& arg : args_by_src_) {
+      arg.second = input_permutation[arg.second];
+    }
+    for (auto& arg : args_by_dst_) {
+      arg.second = input_permutation[arg.second];
+    }
+    for (auto& result : results_) {
+      result.second = output_permutation[result.second];
     }
 
-    FunctionDef fdef;
-    TF_RETURN_IF_ERROR(GraphToFunctionDef(*subgraph.graph, name, &fdef));
+    name = call_node_def_.op();
+  }
 
-    if (VLOG_IS_ON(1)) {
-      VLOG(2) << "Build function def " << name;
-      dump_graph::DumpGraphToFile(
-          strings::StrCat("encapsulate_fdef_graph_", name), *subgraph.graph,
-          library);
-      dump_graph::DumpFunctionDefToFile(
-          strings::StrCat("encapsulate_fdef_", name), fdef);
-    }
+  FunctionDef fdef;
+  TF_RETURN_IF_ERROR(GraphToFunctionDef(*graph_, name, &fdef));
 
-    if (!reuse_existing_functions || library->Find(name) == nullptr) {
-      TF_RETURN_IF_ERROR(library->AddFunctionDef(fdef));
-    }
+  if (VLOG_IS_ON(1)) {
+    VLOG(2) << "Build function def " << name;
+    dump_graph::DumpGraphToFile(
+        strings::StrCat("encapsulate_fdef_graph_", name), *graph_, library);
+    dump_graph::DumpFunctionDefToFile(
+        strings::StrCat("encapsulate_fdef_", name), fdef);
+  }
+
+  if (!reuse_existing_functions || library->Find(name) == nullptr) {
+    TF_RETURN_IF_ERROR(library->AddFunctionDef(fdef));
   }
   return Status::OK();
 }
 
-Status Encapsulator::BuildParallelCheckOp(
+Status Encapsulator::Subgraph::BuildParallelCheckOp(
     const std::unordered_map<const Node*, Node*>& node_images,
-    const Encapsulator::Subgraph& subgraph, Graph* graph_out,
-    Node** parallel_check_op) {
+    Graph* graph_out) {
   // Build an index mapping output positions to node/slot pairs in the
   // original graph.
-  std::vector<NodeSlot> results_by_num(subgraph.results.size());
-  for (const auto& entry : subgraph.results) {
+  std::vector<NodeSlot> results_by_num(results_.size());
+  for (const auto& entry : results_) {
     results_by_num[entry.second] = entry.first;
   }
 
@@ -439,22 +515,22 @@ Status Encapsulator::BuildParallelCheckOp(
     expected_outputs[i] =
         NodeDefBuilder::NodeOut(node_images.at(node_slot.node)->name(),
                                 node_slot.slot, result_dtypes[i]);
-    actual_outputs[i] = NodeDefBuilder::NodeOut(subgraph.call_node_def.name(),
-                                                i, result_dtypes[i]);
+    actual_outputs[i] =
+        NodeDefBuilder::NodeOut(call_node_def_.name(), i, result_dtypes[i]);
   }
   // Assign the parallel check op to a CPU on the same task as the cluster it is
   // checking.
   string device, dummy;
   if (!DeviceNameUtils::SplitDeviceName(
-          subgraph.call_node_inputs->assigned_device_name(), &device, &dummy)) {
+          call_node_inputs_->assigned_device_name(), &device, &dummy)) {
     return errors::InvalidArgument("Could not parse device name");
   }
   strings::StrAppend(&device, "/cpu:0");
 
   NodeDef check_def;
   TF_RETURN_IF_ERROR(
-      NodeDefBuilder(graph_out->NewName(strings::StrCat(
-                         subgraph.call_node_def.name(), "_parallel_check")),
+      NodeDefBuilder(graph_out->NewName(strings::StrCat(call_node_def_.name(),
+                                                        "_parallel_check")),
                      "ParallelCheck")
           .Device(device)
           .Attr("T", result_dtypes)
@@ -474,65 +550,303 @@ Status Encapsulator::BuildParallelCheckOp(
     const NodeSlot& node_slot = results_by_num[i];
     graph_out->AddEdge(node_images.at(node_slot.node), node_slot.slot, check_op,
                        i);
-    graph_out->AddEdge(subgraph.call_node_inputs, i, check_op, num_results + i);
+    graph_out->AddEdge(call_node_inputs_, i, check_op, num_results + i);
   }
 
-  *parallel_check_op = check_op;
+  call_node_outputs_ = check_op;
   return Status::OK();
 }
 
-Status Encapsulator::BuildOutputGraph(bool parallel_checking,
-                                      Graph* graph_out) {
+Status Encapsulator::Subgraph::AddFunctionCallNode(
+    const std::unordered_map<const Node*, Node*>& node_images,
+    bool parallel_checking, Graph* graph_out) {
   Status s;
+  call_node_inputs_ = graph_out->AddNode(call_node_def_, &s);
+  if (!s.ok()) return s;
 
-  // Map from nodes in the input graph to nodes in the output graph.
+  // Copy the assigned device and the key_annotation over.
+  call_node_inputs_->set_assigned_device_name(device_);
+  call_node_outputs_ = call_node_inputs_;
+
+  if (parallel_checking) {
+    TF_RETURN_IF_ERROR(BuildParallelCheckOp(node_images, graph_out));
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::GetFunctionNameAttr(Node const* node, string* attr) const {
+  Status s = GetNodeAttr(node->attrs(), group_attribute_, attr);
+  if (s.code() == error::Code::NOT_FOUND) {
+    // Return empty attr if there's no group_attribute.
+    attr->clear();
+    return Status::OK();
+  }
+  return s;
+}
+
+bool IsInSubgraph(const string& func_id) { return !func_id.empty(); }
+
+Status Encapsulator::CopySubgraphNodes(
+    std::unordered_map<const Node*, Node*>* node_images) {
+  for (Node* node : graph_in_->op_nodes()) {
+    string func_id;
+    TF_RETURN_IF_ERROR(GetFunctionNameAttr(node, &func_id));
+    if (!IsInSubgraph(func_id)) continue;
+
+    Subgraph& subgraph = subgraphs_[func_id];
+    Node* image = subgraph.MakeNodeImage(graph_in_, node);
+    image->ClearAttr(group_attribute_);
+    (*node_images)[node] = image;
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::CopySubgraphEdges(
+    const std::unordered_map<const Node*, Node*>& node_images,
+    std::vector<std::pair<const Node*, Node*>>* src_arg_pairs) {
+  for (const Edge* edge : graph_in_->edges()) {
+    string src_func_id;
+    TF_RETURN_IF_ERROR(GetFunctionNameAttr(edge->src(), &src_func_id));
+    string dst_func_id;
+    TF_RETURN_IF_ERROR(GetFunctionNameAttr(edge->dst(), &dst_func_id));
+    Node* src_image = gtl::FindWithDefault(node_images, edge->src(), nullptr);
+    Node* dst_image = gtl::FindWithDefault(node_images, edge->dst(), nullptr);
+
+    // Copy edges that are local to a subgraph.
+    if (IsInSubgraph(src_func_id) && IsInSubgraph(dst_func_id) &&
+        src_func_id == dst_func_id) {
+      Graph* g = subgraphs_[src_func_id].GetGraph();
+      if (edge->IsControlEdge()) {
+        g->AddControlEdge(src_image, dst_image);
+      } else {
+        g->AddEdge(src_image, edge->src_output(), dst_image, edge->dst_input());
+      }
+      continue;
+    }
+
+    // Record 'src' as an output of its subgraph, if applicable.
+    if (IsInSubgraph(src_func_id)) {
+      Subgraph& src_subgraph = subgraphs_[src_func_id];
+      // Ignore control edges leaving the subgraph. We will lift them onto the
+      // enclosing call operators in BuildOutputGraph().
+      if (!edge->IsControlEdge()) {
+        TF_RETURN_IF_ERROR(src_subgraph.RecordResult(edge, node_images));
+      }
+    }
+
+    // Record 'dst' as an input of its subgraph, if applicable.
+    if (IsInSubgraph(dst_func_id)) {
+      Subgraph& dst_subgraph = subgraphs_[dst_func_id];
+      // Ignore control edges entering the subgraph. We will lift them onto
+      // the enclosing call operators in BuildOutputGraph().
+      if (!edge->IsControlEdge()) {
+        TF_RETURN_IF_ERROR(
+            dst_subgraph.RecordArg(edge, node_images, src_arg_pairs));
+      }
+    }
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::SplitIntoSubgraphs() {
+  Status s;
+
+  // Map from input graph nodes to subgraph nodes.
   std::unordered_map<const Node*, Node*> node_images;
 
-  // Copy all unmarked nodes to the output graph.
+  // Each entry of src_arg_pairs is a pair whose first element is a node in the
+  // original graph that has an output edge in the subgraph, and whose second
+  // element is the arg node in the subgraph that it sends to. The vector will
+  // be filled in below in AddArgs.
+  std::vector<std::pair<const Node*, Node*>> src_arg_pairs;
+
+  TF_RETURN_IF_ERROR(CopySubgraphNodes(&node_images));
+  TF_RETURN_IF_ERROR(CopySubgraphEdges(node_images, &src_arg_pairs));
+
+  MarkGuaranteedConstants(*graph_in_, src_arg_pairs);
+
+  for (auto& entry : subgraphs_) {
+    Subgraph& subgraph = entry.second;
+    FixupSourceAndSinkEdges(subgraph.GetGraph());
+  }
+
+  return s;
+}
+
+Status Encapsulator::BuildFunctionDefs(
+    const RewriteSubgraphFn& rewrite_subgraph_fn, bool reuse_existing_functions,
+    FunctionLibraryDefinition* library) {
+  for (auto& subgraph_entry : subgraphs_) {
+    string name = subgraph_entry.first;
+    Subgraph& subgraph = subgraph_entry.second;
+    TF_RETURN_IF_ERROR(subgraph.BuildFunctionDef(
+        name, rewrite_subgraph_fn, reuse_existing_functions, library));
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::CopyNodesToOutputGraph(
+    bool parallel_checking, Graph* graph_out,
+    std::unordered_map<const Node*, Node*>* node_images) {
   for (Node* node : graph_in_->op_nodes()) {
-    string func_id = GetFunctionNameAttr(node);
+    string func_id;
+    TF_RETURN_IF_ERROR(GetFunctionNameAttr(node, &func_id));
 
-    // Don't copy nodes that going to be encapsulated, unless parallel checking
-    // is enabled.
-    if (!func_id.empty() && !parallel_checking) continue;
+    // Don't copy nodes that are going to be encapsulated, unless parallel
+    // checking is enabled.
+    if (IsInSubgraph(func_id) && !parallel_checking) continue;
 
     Node* image = graph_out->CopyNode(node);
-    node_images[node] = image;
+    (*node_images)[node] = image;
   }
-  node_images[graph_in_->source_node()] = graph_out->source_node();
-  node_images[graph_in_->sink_node()] = graph_out->sink_node();
+  (*node_images)[graph_in_->source_node()] = graph_out->source_node();
+  (*node_images)[graph_in_->sink_node()] = graph_out->sink_node();
+  return Status::OK();
+}
 
-  // Add function call nodes for each subgraph.
+Status Encapsulator::AddFunctionCallNodes(
+    const std::unordered_map<const Node*, Node*>& node_images,
+    bool parallel_checking, Graph* graph_out) {
   for (auto& subgraph_entry : subgraphs_) {
-    Subgraph& subgraph = subgraph_entry.second;
+    TF_RETURN_IF_ERROR(subgraph_entry.second.AddFunctionCallNode(
+        node_images, parallel_checking, graph_out));
+  }
+  return Status::OK();
+}
 
-    subgraph.call_node_inputs = graph_out->AddNode(subgraph.call_node_def, &s);
-    if (!s.ok()) return s;
+Status Encapsulator::FindOutputImageOfEdgeSrc(
+    const string& src_func_id, const string& dst_func_id,
+    const std::unordered_map<const Node*, Node*>& node_images,
+    const Node* original_src_node, Node** src_image) {
+  if (IsInSubgraph(src_func_id)) {
+    // The edge is from a subgraph to a regular node in the output graph so
+    // use the subgraph's call node output.
+    *src_image = subgraphs_.at(src_func_id).GetCallNodeForOutputs();
+  } else {
+    // The source of the edge is in the output graph so use the node image in
+    // the output graph.
+    *src_image = node_images.at(original_src_node);
+  }
+  return Status::OK();
+}
+
+int Encapsulator::FindOutputSlotOfEdgeSrc(const string& src_func_id,
+                                          const string& dst_func_id,
+                                          const Edge* edge) {
+  if (IsInSubgraph(src_func_id)) {
+    const Subgraph& src_subgraph = subgraphs_.at(src_func_id);
+    // 'src' is in a subgraph and 'dst' is a regular node in the output
+    // graph. Use the corresponding call output instead.
+    return src_subgraph.GetResultIndexForEdge(edge);
+  } else {
+    // The source of the edge is in the output graph so use the regular edge
+    // slot.
+    return edge->src_output();
+  }
+}
 
-    // Copy the assigned device and the key_annotation over.
-    subgraph.call_node_inputs->set_assigned_device_name(subgraph.device);
-    subgraph.call_node_outputs = subgraph.call_node_inputs;
+Status Encapsulator::FindOutputImageOfEdgeDst(
+    const string& src_func_id, const string& dst_func_id,
+    const std::unordered_map<const Node*, Node*>& node_images,
+    const Node* original_dst_node, Node** dst_image) {
+  if (IsInSubgraph(dst_func_id)) {
+    // The edge is to a subgraph from a regular node in the output graph so
+    // use the subgraph's call node input.
+    *dst_image = subgraphs_.at(dst_func_id).GetCallNodeForInputs();
+  } else {
+    // The destination of the edge is in the output graph so use the node image
+    // in the output graph.
+    *dst_image = node_images.at(original_dst_node);
+  }
+  return Status::OK();
+}
+
+int Encapsulator::FindOutputSlotOfEdgeDst(const string& src_func_id,
+                                          const string& dst_func_id,
+                                          const Edge* edge) {
+  if (IsInSubgraph(dst_func_id)) {
+    const Subgraph& dst_subgraph = subgraphs_.at(dst_func_id);
+    // 'dst' is in a subgraph and 'src' is a regular node in the output
+    // graph. Use the corresponding call input instead.
+    return dst_subgraph.GetArgIndexForEdge(edge);
+  } else {
+    // The destination of the edge is in the output graph so use the regular
+    // edge slot.
+    return edge->dst_input();
+  }
+}
+
+Status Encapsulator::CopyEdgeToOutputGraph(
+    const Edge* edge, const string& src_func_id, const string& dst_func_id,
+    const std::unordered_map<const Node*, Node*>& node_images,
+    bool parallel_checking, Graph* graph_out,
+    std::unordered_set<std::pair<NodeSlot, NodeSlot>, NodeSlot::PairHasher>*
+        edges_added) {
+  Node* src_image;
+  TF_RETURN_IF_ERROR(FindOutputImageOfEdgeSrc(
+      src_func_id, dst_func_id, node_images, edge->src(), &src_image));
+  Node* dst_image;
+  TF_RETURN_IF_ERROR(FindOutputImageOfEdgeDst(
+      src_func_id, dst_func_id, node_images, edge->dst(), &dst_image));
+
+  // If this is a control edge then copy it and return. Lift control edges onto
+  // the enclosing call operator.
+  if (edge->IsControlEdge()) {
+    // Add the control edge, if we have not already added it, using the images
+    // determined above (potentially call operators or RecvAtHost/SendFromHost).
+    if (edges_added->emplace(NodeSlot(src_image, -1), NodeSlot(dst_image, -1))
+            .second) {
+      graph_out->AddControlEdge(src_image, dst_image);
+    }
 
+    // If parallel checking is enabled, also add a control edge to the
+    // corresponding parallel check op.
     if (parallel_checking) {
-      TF_RETURN_IF_ERROR(BuildParallelCheckOp(node_images, subgraph, graph_out,
-                                              &subgraph.call_node_outputs));
+      graph_out->AddControlEdge(src_image, node_images.at(edge->dst()));
     }
+    return Status::OK();
+  }
+
+  int src_output = FindOutputSlotOfEdgeSrc(src_func_id, dst_func_id, edge);
+
+  int dst_input = FindOutputSlotOfEdgeDst(src_func_id, dst_func_id, edge);
+
+  if (IsInSubgraph(dst_func_id) && parallel_checking) {
+    // If we are parallel checking, also feed the tensor as an input to the
+    // corresponding parallel check subgraph.
+    graph_out->AddEdge(src_image, src_output, node_images.at(edge->dst()),
+                       edge->dst_input());
   }
 
+  // Add the edge, if we have not already added it.
+  if (edges_added
+          ->emplace(NodeSlot(src_image, src_output),
+                    NodeSlot(dst_image, dst_input))
+          .second) {
+    graph_out->AddEdge(src_image, src_output, dst_image, dst_input);
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::AddEdgesToOutputGraph(
+    const std::unordered_map<const Node*, Node*>& node_images,
+    bool parallel_checking, Graph* graph_out) {
   // Set of edges already added to the output graph, represented as (src, dst)
   // pairs. We use the set to deduplicate edges; multiple edges in the input
   // graph may map to one edge in the output graph.
   std::unordered_set<std::pair<NodeSlot, NodeSlot>, NodeSlot::PairHasher>
       edges_added;
 
-  // Add edges to the graph_out graph.
   for (const Edge* edge : graph_in_->edges()) {
-    string src_func_id = GetFunctionNameAttr(edge->src());
-    string dst_func_id = GetFunctionNameAttr(edge->dst());
+    string src_func_id;
+    TF_RETURN_IF_ERROR(GetFunctionNameAttr(edge->src(), &src_func_id));
+    string dst_func_id;
+    TF_RETURN_IF_ERROR(GetFunctionNameAttr(edge->dst(), &dst_func_id));
 
     // Ignore edges that are strictly contained within one subgraph, unless
     // we are constructing parallel check graphs.
-    if (!src_func_id.empty() && src_func_id == dst_func_id) {
+    if (IsInSubgraph(src_func_id) && IsInSubgraph(dst_func_id) &&
+        src_func_id == dst_func_id) {
       if (parallel_checking) {
         Node* src_image = node_images.at(edge->src());
         Node* dst_image = node_images.at(edge->dst());
@@ -546,63 +860,29 @@ Status Encapsulator::BuildOutputGraph(bool parallel_checking,
       continue;
     }
 
-    // We have an edge that crosses a cluster boundary.
-    Node* src_image = src_func_id.empty()
-                          ? node_images.at(edge->src())
-                          : subgraphs_.at(src_func_id).call_node_outputs;
-    Node* dst_image = dst_func_id.empty()
-                          ? node_images.at(edge->dst())
-                          : subgraphs_.at(dst_func_id).call_node_inputs;
-
-    // Copy control edges. Lift control edges onto the enclosing call operator.
-    if (edge->IsControlEdge()) {
-      // Add the control edge, if we have not already added it.
-      if (edges_added.emplace(NodeSlot(src_image, -1), NodeSlot(dst_image, -1))
-              .second) {
-        graph_out->AddControlEdge(src_image, dst_image);
-      }
-
-      // If parallel checking is enabled, also add a control edge to the
-      // corresponding parallel check op.
-      if (parallel_checking) {
-        graph_out->AddControlEdge(src_image, node_images.at(edge->dst()));
-      }
-      continue;
-    }
-
-    int src_output = edge->src_output();
-    if (!src_func_id.empty()) {
-      // 'src' is in a subgraph. Use the corresponding call output instead.
-      const Subgraph& src_subgraph = subgraphs_.at(src_func_id);
-      src_output =
-          src_subgraph.results.at(NodeSlot(edge->src(), edge->src_output()));
-    }
+    // We have an edge that crosses a cluster boundary or is entirely within the
+    // unclustered graph.
+    TF_RETURN_IF_ERROR(CopyEdgeToOutputGraph(edge, src_func_id, dst_func_id,
+                                             node_images, parallel_checking,
+                                             graph_out, &edges_added));
+  }
 
-    int dst_input = edge->dst_input();
+  return Status::OK();
+}
 
-    if (!dst_func_id.empty()) {
-      // 'dst' is in a subgraph. Use the corresponding call input instead.
-      const Subgraph& dst_subgraph = subgraphs_.at(dst_func_id);
-      dst_input =
-          dst_subgraph.args_by_dst.at(NodeSlot(edge->dst(), edge->dst_input()));
+Status Encapsulator::BuildOutputGraph(bool parallel_checking,
+                                      Graph* graph_out) {
+  // Map from nodes in the input graph to nodes in the output graph.
+  std::unordered_map<const Node*, Node*> node_images;
 
-      // If we are parallel checking, also feed the tensor as an input to the
-      // corresponding parallel check subgraph.
-      if (parallel_checking) {
-        graph_out->AddEdge(src_image, src_output, node_images.at(edge->dst()),
-                           edge->dst_input());
-      }
-    }
-    // Add the edge, if we have not already added it.
-    if (edges_added
-            .emplace(NodeSlot(src_image, src_output),
-                     NodeSlot(dst_image, dst_input))
-            .second) {
-      graph_out->AddEdge(src_image, src_output, dst_image, dst_input);
-    }
-  }
+  TF_RETURN_IF_ERROR(
+      CopyNodesToOutputGraph(parallel_checking, graph_out, &node_images));
+  TF_RETURN_IF_ERROR(
+      AddFunctionCallNodes(node_images, parallel_checking, graph_out));
+  TF_RETURN_IF_ERROR(
+      AddEdgesToOutputGraph(node_images, parallel_checking, graph_out));
 
-  return s;
+  return Status::OK();
 }
 
 }  // anonymous namespace
@@ -615,20 +895,18 @@ Status EncapsulateSubgraphsInFunctions(
   Status s;
 
   Encapsulator encapsulator(std::move(group_attribute), &graph_in);
-  s = encapsulator.SplitIntoSubgraphs();
-  if (!s.ok()) return s;
+  TF_RETURN_IF_ERROR(encapsulator.SplitIntoSubgraphs());
 
-  s = encapsulator.BuildFunctionDefs(rewrite_subgraph_fn,
-                                     reuse_existing_functions, library);
-  if (!s.ok()) return s;
+  TF_RETURN_IF_ERROR(encapsulator.BuildFunctionDefs(
+      rewrite_subgraph_fn, reuse_existing_functions, library));
 
   std::unique_ptr<Graph> out(new Graph(library));
   out->set_versions(graph_in.versions());
-  s = encapsulator.BuildOutputGraph(parallel_checking, out.get());
-  if (!s.ok()) return s;
+  TF_RETURN_IF_ERROR(
+      encapsulator.BuildOutputGraph(parallel_checking, out.get()));
 
   *graph_out = std::move(out);
-  return s;
+  return Status::OK();
 }
 
 // Finds the types of the _Arg nodes, indexed by position.
@@ -744,8 +1022,8 @@ Status EncapsulateSubgraphsPass::Run(
 
   TF_RETURN_IF_ERROR(EncapsulateSubgraphsInFunctions(
       kXlaClusterAttr, **options.graph, rewrite_subgraph,
-      flags->tf_xla_parallel_checking, /*reuse_existing_functions=*/false,
-      &graph_out, library));
+      flags->tf_xla_parallel_checking,
+      /*reuse_existing_functions=*/false, &graph_out, library));
 
   if (VLOG_IS_ON(1)) {
     dump_graph::DumpGraphToFile("after_encapsulate_subgraphs", *graph_out,
-- 
GitLab


From dea51b668ba9858c914a7fcb0fb6fdc3df132d72 Mon Sep 17 00:00:00 2001
From: Akshay Agrawal <akshayka@google.com>
Date: Thu, 14 Dec 2017 15:05:12 -0800
Subject: [PATCH 1027/1225] Add `init_scope`, a scope for wrapping variable
 creation and initialization.

There is often a need to lift variable initialization ops out of control
flow contexts, graphs that are building functions, and gradient tapes.
Entering an `init_scope` is a mechanism for satisfying these desiderata. In
particular, entering an `init_scope` has three effects:

  (1) All control dependencies are cleared the moment the scope is entered;
      this is equivalent to entering the manager returned from
      `control_dependencies(None)`, which is how we exit control-flow contexts like
      `tf.while_loop` and `tf.cond`.
  (2) All operations that are created while the scope is active are lifted
      into the lowest context on the `context_stack` that is not building a
      graph function.
  (3) The gradient tape is paused while the scope is active.

In (2), a context is defined as either a graph or an eager context. Every
context switch, i.e., every installation of a graph as the default graph and
every switch into eager mode, is logged in a thread-local stack called
the `context_stack`; the log entry for a context switch is popped from the
stack when the context is exited. Entering an `init_scope`, with respect to
(2), is equivalent to crawling up the `context_stack`, finding the first
context that is not building a graph function, and entering it.

PiperOrigin-RevId: 179104270
---
 tensorflow/python/BUILD                 |   1 +
 tensorflow/python/eager/context.py      |  40 +++++
 tensorflow/python/framework/ops.py      |  61 ++++++++
 tensorflow/python/framework/ops_test.py | 191 ++++++++++++++++++++++++
 4 files changed, 293 insertions(+)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index e77fba4a4c..45383eda99 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1108,6 +1108,7 @@ py_test(
         ":variables",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:function",
     ],
 )
 
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 415416cfae..8aec242f1d 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import contextlib
 import copy
 import random
@@ -62,6 +63,41 @@ class _EagerContext(threading.local):
     self.scalar_cache = {}
 
 
+ContextStackEntry = collections.namedtuple(
+    "ContextStackEntry", ["is_building_function", "enter_context_fn"])
+
+
+class ContextStack(threading.local):
+  """A thread-local stack of context switches."""
+
+  def __init__(self):
+    super(ContextStack, self).__init__()
+    self.stack = []
+
+  def push(self, is_building_function, enter_context_fn):
+    """Push metadata about a context switch onto the stack.
+
+    A context switch can take one of two forms: installing a graph as the
+    default graph, or entering the eager context.
+
+    Args:
+      is_building_function: (bool.) Whether the context is building a function.
+      enter_context_fn: (function.) A callable that executes the context switch.
+        For example, `graph.as_default` or `eager_mode`.
+    """
+
+    self.stack.append(
+        ContextStackEntry(is_building_function, enter_context_fn))
+
+  def pop(self):
+    """Pop the stack."""
+
+    self.stack.pop()
+
+
+context_stack = ContextStack()
+
+
 # TODO(agarwal): rename to EagerContext / EagerRuntime ?
 # TODO(agarwal): consider keeping the corresponding Graph here.
 class Context(object):
@@ -183,10 +219,14 @@ class Context(object):
     ctx = self._eager_context
     old_mode = ctx.mode
     ctx.mode = mode
+    if mode == EAGER_MODE:
+      context_stack.push(False, eager_mode)
     try:
       yield
     finally:
       ctx.mode = old_mode
+      if mode == EAGER_MODE:
+        context_stack.pop()
 
   def in_graph_mode(self):
     """Returns True if current thread is in GRAPH mode."""
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 836f09fba8..947a9e49cc 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -4850,10 +4850,71 @@ class _DefaultGraphStack(_DefaultStack):  # pylint: disable=protected-access
     super(_DefaultGraphStack, self).reset()
     self._global_default_graph = None
 
+  @tf_contextlib.contextmanager
+  def get_controller(self, default):
+    try:
+      context.context_stack.push(default.building_function, default.as_default)
+      with super(_DefaultGraphStack, self).get_controller(default) as g:
+        yield g
+    finally:
+      context.context_stack.pop()
+
 
 _default_graph_stack = _DefaultGraphStack()
 
 
+# pylint: disable=g-doc-return-or-yield,line-too-long
+@tf_contextlib.contextmanager
+def init_scope():
+  """A context manager that lifts ops out of control-flow scopes and function-building graphs.
+
+  There is often a need to lift variable initialization ops out of control-flow
+  scopes, function-building graphs, and gradient tapes. Entering an
+  `init_scope` is a mechanism for satisfying these desiderata. In particular,
+  entering an `init_scope` has three effects:
+
+    (1) All control dependencies are cleared the moment the scope is entered;
+        this is equivalent to entering the context manager returned from
+        `control_dependencies(None)`, which has the side-effect of exiting
+        control-flow scopes like `tf.cond` and `tf.while_loop`.
+
+    (2) All operations that are created while the scope is active are lifted
+        into the lowest context on the `context_stack` that is not building a
+        graph function. Here, a context is defined as either a graph or an eager
+        context. Every context switch, i.e., every installation of a graph as
+        the default graph and every switch into eager mode, is logged in a
+        thread-local stack called the `context_stack`; the log entry for a
+        context switch is popped from the stack when the context is exited.
+        Entering an `init_scope` is equivalent to crawling up the
+        `context_stack`, finding the first context that is not building a graph
+        function, and entering it.
+
+    (3) The gradient tape is paused while the scope is active.
+  """
+# pylint: enable=g-doc-return-or-yield,line-too-long
+
+  outer_context = None
+  if not context.context_stack.stack:
+    # This is correct because of an invariant: the stack is
+    # empty if and only if eager execution has not been enabled.
+    outer_context = get_default_graph().as_default
+  else:
+    for stack_entry in reversed(context.context_stack.stack):
+      if not stack_entry.is_building_function:
+        outer_context = stack_entry.enter_context_fn
+        break
+
+  if outer_context is None:
+    raise AssertionError("All graphs are building functions, and no "
+                         "eager context was previously active.")
+
+  try:
+    with outer_context(), control_dependencies(None), tape.stop_recording():
+      yield
+  finally:
+    pass
+
+
 def enable_eager_execution(config=None, device_policy=None):
   """Enables, for the rest of the lifetime of this program, eager execution.
 
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index f04f0cc56d..92d42c1807 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -26,6 +26,7 @@ from tensorflow.core.framework import types_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.eager import context
+from tensorflow.python.eager import function as eager_function
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import device as pydev
@@ -43,6 +44,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import resources
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
@@ -1868,6 +1870,195 @@ class OpScopeTest(test_util.TensorFlowTestCase):
     self._testGraphElements([a, variable, b])
 
 
+class InitScopeTest(test_util.TensorFlowTestCase):
+
+  def testClearsControlDependencies(self):
+    g = ops.Graph()
+    a_1 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+    a_2 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+    a_3 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+    a_4 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+
+    with g.as_default():
+      with g.control_dependencies([a_1]):
+        with g.control_dependencies([a_2]):
+          with ops.init_scope():
+            with g.control_dependencies([a_3]):
+              with g.control_dependencies([a_4]):
+                # deps [a_3, a_4]
+                b_3_4 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+              # deps = [a_3]
+              b_3 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+            # deps back to None
+            b_none = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+          # deps back to [a_1, a_2]
+          b_1_2 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+        # deps back to [a_1]
+        b_1 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+        with ops.init_scope():
+          # deps are None again
+          b_none2 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+
+    self.assertItemsEqual([a_3.op, a_4.op], b_3_4.op.control_inputs)
+    self.assertItemsEqual([a_3.op], b_3.op.control_inputs)
+    self.assertItemsEqual([], b_none.op.control_inputs)
+    self.assertItemsEqual([a_1.op, a_2.op], b_1_2.op.control_inputs)
+    self.assertItemsEqual([a_1.op], b_1.op.control_inputs)
+    self.assertItemsEqual([], b_none2.op.control_inputs)
+
+  def testLiftsOpsFromFunctions(self):
+    g0 = ops.Graph()
+    g1 = ops.Graph()
+    g1._building_function = True  # pylint: disable=protected-access
+    g2 = ops.Graph()
+    g2._building_function = True  # pylint: disable=protected-access
+
+    with g0.as_default():
+      with g1.as_default():
+        with g2.as_default():
+          with ops.init_scope():
+            _ = constant_op.constant(1.0)
+
+    self.assertEqual(len(g2.get_operations()), 0)
+    self.assertEqual(len(g1.get_operations()), 0)
+    self.assertEqual(len(g0.get_operations()), 1)
+
+  def testComposes(self):
+    g0 = ops.Graph()
+    g1 = ops.Graph()
+    g1._building_function = True  # pylint: disable=protected-access
+    g2 = ops.Graph()
+    g2._building_function = True  # pylint: disable=protected-access
+    g3 = ops.Graph()
+    g3._building_function = False  # pylint: disable=protected-access
+
+    with g0.as_default():
+      with g1.as_default():
+        with ops.init_scope():
+          # This op should be lifted into g0.
+          _ = constant_op.constant(1.0)
+          self.assertIs(g0, ops.get_default_graph())
+          self.assertEqual(len(g2.get_operations()), 0)
+          self.assertEqual(len(g1.get_operations()), 0)
+          self.assertEqual(len(g0.get_operations()), 1)
+        with g2.as_default():
+          with ops.init_scope():
+            # This op should be lifted into g0.
+            _ = constant_op.constant(1.0)
+            self.assertIs(g0, ops.get_default_graph())
+            with g3.as_default():
+              with ops.init_scope():
+                # This op should be lifted into g3, because g3 is not building a
+                # function.
+                _ = constant_op.constant(1.0)
+                self.assertIs(g3, ops.get_default_graph())
+
+    self.assertEqual(len(g3.get_operations()), 1)
+    self.assertEqual(len(g2.get_operations()), 0)
+    self.assertEqual(len(g1.get_operations()), 0)
+    self.assertEqual(len(g0.get_operations()), 2)
+
+  def testEscapesToEagerContext(self):
+    g = ops.Graph()
+    g._building_function = True  # pylint: disable=protected-access
+    with context.eager_mode():
+      with context.graph_mode():
+        with g.as_default():
+          with ops.init_scope():
+            # Because g is building a function, init_scope should
+            # escape out to the eager context.
+            self.assertTrue(context.in_eager_mode())
+          # g should be reinstated as the default graph, and the
+          # graph context should be re-entered.
+          self.assertIs(g, ops.get_default_graph())
+          self.assertTrue(context.in_graph_mode())
+
+  def testAllGraphsBuildingFunctionsRaisesError(self):
+    g = ops.Graph()
+    g._building_function = True  # pylint: disable=protected-access
+    with g.as_default():
+      with self.assertRaises(AssertionError):
+        with ops.init_scope():
+          pass
+
+  def testStaysInEagerWhenOnlyEagerContextActive(self):
+    with context.eager_mode():
+      with ops.init_scope():
+        self.assertTrue(context.eager_mode())
+      self.assertTrue(context.eager_mode())
+
+  def testEscapesDefunWhenInEagerMode(self):
+
+    def function_with_variables():
+      with ops.init_scope():
+        v = resource_variable_ops.ResourceVariable(3)
+      return v.assign_add(1)
+
+    with context.eager_mode():
+      # Each invocation of function_with_variables recreates a variable.
+      self.assertEqual(4, int(function_with_variables()))
+      self.assertEqual(4, int(function_with_variables()))
+
+      compiled = eager_function.defun(function_with_variables)
+      # The init_scope in function_with_variables lifts the variable out
+      # of the graph function constructed by defun; hence,
+      # compiled now appears to be stateful.
+      self.assertEqual(4, int(compiled()))
+      self.assertEqual(5, int(compiled()))
+
+  def testEscapesDefunWhenInGraphMode(self):
+    def function_with_variables(name):
+      with ops.init_scope():
+        _ = variable_scope.get_variable(name, shape=(1,))
+
+    g = ops.Graph()
+    with g.as_default():
+      with self.test_session():
+        # First ensure that graphs that are not building functions are
+        # not escaped.
+        function_with_variables("foo")
+        with self.assertRaisesRegexp(ValueError,
+                                     r"Variable foo already exists.*"):
+          # This will fail because reuse is not set to True.
+          function_with_variables("foo")
+
+        compiled = eager_function.defun(function_with_variables)
+        compiled("bar")
+        self.assertEqual(
+            len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)), 2)
+
+        # The second call to `compiled` should not create variables: the
+        # init_scope has lifted the variable creation code out of the defun.
+        compiled("bar")
+        self.assertEqual(
+            len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)), 2)
+
+  def testEscapesNestedDefun(self):
+
+    def inner_function():
+      with ops.init_scope():
+        v = resource_variable_ops.ResourceVariable(1)
+      return v.assign_add(2)
+
+    def outer_function(inner=None):
+      with ops.init_scope():
+        v0 = resource_variable_ops.ResourceVariable(0)
+      return v0.assign_add(1) + inner()
+
+    with context.eager_mode():
+      # Each invocation of outer_function recreates variables.
+      self.assertEqual(4, int(outer_function(inner=inner_function)))
+      self.assertEqual(4, int(outer_function(inner=inner_function)))
+
+      compiled_inner = eager_function.defun(inner_function)
+      compiled_outer = eager_function.defun(outer_function)
+      # The init_scope lifts variables out of the graph functions
+      # constructed by defun; hence, compiled_outer should now appear to be
+      # stateful.
+      self.assertEqual(4, int(compiled_outer(inner=compiled_inner)))
+      self.assertEqual(7, int(compiled_outer(inner=compiled_inner)))
+
+
 @test_util.with_c_api
 class GraphTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 888c1dc18948802e20f42a710f1853a784883f72 Mon Sep 17 00:00:00 2001
From: Chris Donahue <chrisdonahue@users.noreply.github.com>
Date: Thu, 14 Dec 2017 15:10:02 -0800
Subject: [PATCH 1028/1225] Changed ffmpeg verbosity semantics (#14582)

---
 tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
index 1245f515fe..1e8af1458c 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
@@ -49,7 +49,8 @@ std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
           "-nostdin",             // No interactive commands accepted.
           "-f", input_format_id,  // eg: "mp3"
           "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
-          "-loglevel", "info",  // Enable verbose logging to support debugging.
+          "-loglevel", "error",   // Print errors only.
+          "-hide_banner",         // Skip printing build options, version, etc.
           "-map_metadata", "-1",  // Copy global metadata from input to output.
           "-vn",                  // No video recording.
           "-ac:a:0", StrCat(channel_count), "-ar:a:0",
@@ -72,7 +73,8 @@ std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
           "-probesize",
           StrCat(kDefaultProbeSize),
           "-loglevel",
-          "info",  // Enable verbose logging to support debugging.
+          "error",  // Print errors only.
+          "-hide_banner",  // Skip printing build options, version, etc.
           "-vcodec",
           "rawvideo",
           "-pix_fmt",
-- 
GitLab


From 264e7e8b4b28a84a94310e20fa26d8e8e2a9cd60 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Thu, 14 Dec 2017 15:13:45 -0800
Subject: [PATCH 1029/1225] Added the ability to sort important ops
 topologically

PiperOrigin-RevId: 179105527
---
 tensorflow/python/grappler/item.i  | 26 ++++++++++++++++++++++----
 tensorflow/python/grappler/item.py |  6 ++++--
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i
index 8f75b827b6..eb396ef1ad 100644
--- a/tensorflow/python/grappler/item.i
+++ b/tensorflow/python/grappler/item.i
@@ -45,6 +45,7 @@ struct GItem {
 #include "tensorflow/core/grappler/costs/op_performance_data.pb.h"
 #include "tensorflow/core/grappler/grappler_item_builder.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
+#include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
@@ -93,7 +94,8 @@ static GItem TF_NewItem(
   return GItem(item.release());
 }
 
-static std::vector<string> TF_IdentifyImportantOps(GItem item) {
+static std::vector<string> TF_IdentifyImportantOps(GItem item, bool sort_topologically,
+                                                   TF_Status* status) {
   if (item.is_none()) {
     return {};
   }
@@ -109,8 +111,23 @@ static std::vector<string> TF_IdentifyImportantOps(GItem item) {
   }
 
   std::vector<string> ops;
-  for (const auto& op_name : op_names) {
-    ops.push_back(op_name);
+  if (sort_topologically) {
+    tensorflow::GraphDef subgraph;
+    for (const tensorflow::NodeDef& node : item->graph.node()) {
+      if (op_names.find(node.name()) != op_names.end()) {
+        *subgraph.add_node() = node;
+      }
+    }
+    tensorflow::Status s = tensorflow::grappler::TopologicalSort(&subgraph);
+    tensorflow::Set_TF_Status_from_Status(status, s);
+    for (const tensorflow::NodeDef& node : subgraph.node()) {
+      ops.push_back(node.name());
+    }
+  }
+  else {
+    for (const auto& op_name : op_names) {
+      ops.push_back(op_name);
+    }
   }
 
   return ops;
@@ -153,5 +170,6 @@ static PyObject* TF_GetOpProperties(GItem item) {
 static GItem TF_NewItem(
     const tensorflow::MetaGraphDef& meta_graph, bool ignore_colocation,
     bool ignore_user_placement, TF_Status* out_status);
-static std::vector<string> TF_IdentifyImportantOps(GItem item);
+static std::vector<string> TF_IdentifyImportantOps(GItem item, bool sort_topologically,
+                                                   TF_Status* status);
 static PyObject* TF_GetOpProperties(GItem item);
diff --git a/tensorflow/python/grappler/item.py b/tensorflow/python/grappler/item.py
index 4fc94ec968..c6e66d3c27 100644
--- a/tensorflow/python/grappler/item.py
+++ b/tensorflow/python/grappler/item.py
@@ -50,8 +50,10 @@ class Item(object):
     self._tf_item = None
     self._BuildTFItem()
 
-  def IdentifyImportantOps(self):
-    return tf_item.TF_IdentifyImportantOps(self.tf_item)
+  def IdentifyImportantOps(self, sort_topologically=False):
+    with errors.raise_exception_on_not_ok_status() as status:
+      return tf_item.TF_IdentifyImportantOps(self.tf_item, sort_topologically,
+                                             status)
 
   def GetOpProperties(self):
     ret_from_swig = tf_item.TF_GetOpProperties(self.tf_item)
-- 
GitLab


From 481b5f4410b34b65570f9dce62b34e9199769a38 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 15:58:25 -0800
Subject: [PATCH 1030/1225] Enable associative & commutative operator
 optimization.

PiperOrigin-RevId: 179111549
---
 tensorflow/core/grappler/op_types.cc                      | 6 +++++-
 tensorflow/core/grappler/optimizers/constant_folding.cc   | 8 ++++----
 .../core/grappler/optimizers/constant_folding_test.cc     | 3 +--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 75a11a4d36..24c372a7cf 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -26,7 +26,11 @@ namespace tensorflow {
 namespace grappler {
 
 bool IsAdd(const NodeDef& node) {
-  return node.op() == "Add" || node.op() == "AddV2";
+  if (node.op() == "AddV2" || node.op() == "Add") {
+    DataType type = node.attr().at("T").type();
+    return type != DT_STRING;
+  }
+  return false;
 }
 
 bool IsAddN(const NodeDef& node) { return node.op() == "AddN"; }
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 360ada4b1c..59df49c245 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -1486,8 +1486,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
     // TODO(rmlarsen): Handle non-associative/non-commutative operators like
     // subtraction and division, as well as mixed subtraction/addition,
     // division/multiplication.
-    if (is_aggressive && (is_add || is_mul) &&
-        NumNonControlInputs(*node) == 2) {
+    if ((is_add || is_mul) && NumNonControlInputs(*node) == 2) {
       NodeDef* left_child = node_map_->GetNode(node->input(0));
       NodeDef* right_child = node_map_->GetNode(node->input(1));
       // One child must be constant, and the other the same op as the parent.
@@ -1512,7 +1511,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
         continue;
       }
 
-      const int parent_const_input = left_child_is_constant ? 0 : 1;
+      // Identify the nodes to swap.
       const NodeDef* left_leaf = node_map_->GetNode(child_node->input(0));
       const NodeDef* right_leaf = node_map_->GetNode(child_node->input(1));
       const bool left_leaf_is_constant = IsReallyConstant(*left_leaf);
@@ -1521,7 +1520,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
         // Child is already foldable, leave it alone.
         continue;
       }
-      int non_const_leaf_input = left_leaf_is_constant ? 1 : 0;
+      const int non_const_leaf_input = left_leaf_is_constant ? 1 : 0;
+      const int parent_const_input = left_child_is_constant ? 0 : 1;
 
       // Swap the constant child with a non-constant leaf node.
       node_map_->UpdateInput(node->name(), node->input(parent_const_input),
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index 31e52c7a4e..a3b3e522eb 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -97,11 +97,10 @@ TEST_F(ConstantFoldingTest, AddTree) {
   item.fetch = {"add_parent", "mul_parent", "addmul_parent"};
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
-  ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */);
+  ConstantFolding fold(nullptr /* cpu_device */);
   GraphDef output;
   Status status = fold.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
-  LOG(INFO) << "Final results =\n" << output.DebugString();
 
   EXPECT_EQ(9, output.node_size());
 
-- 
GitLab


From a5b2a0c9a3335d10c4dd3dfdff96149f74a4d120 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 14 Dec 2017 16:05:52 -0800
Subject: [PATCH 1031/1225] Moving tf.data kernels to their own package.

PiperOrigin-RevId: 179112798
---
 tensorflow/BUILD                              |   2 +
 tensorflow/core/kernels/BUILD                 | 502 +--------------
 tensorflow/core/kernels/captured_function.h   | 115 +---
 tensorflow/core/kernels/data/BUILD            | 531 ++++++++++++++++
 .../kernels/{ => data}/batch_dataset_op.cc    |   3 +-
 .../kernels/{ => data}/cache_dataset_ops.cc   |   3 +-
 .../kernels/{ => data}/captured_function.cc   |   4 +-
 .../core/kernels/data/captured_function.h     | 127 ++++
 .../{ => data}/concatenate_dataset_op.cc      |   3 +-
 tensorflow/core/kernels/{ => data}/dataset.cc |   4 +-
 tensorflow/core/kernels/data/dataset.h        | 578 ++++++++++++++++++
 .../core/kernels/{ => data}/dataset_utils.cc  |   2 +-
 .../core/kernels/{ => data}/dataset_utils.h   |  10 +-
 .../dense_to_sparse_batch_dataset_op.cc       |   3 +-
 .../kernels/{ => data}/filter_dataset_op.cc   |   6 +-
 .../kernels/{ => data}/flat_map_dataset_op.cc |   8 +-
 .../{ => data}/group_by_window_dataset_op.cc  |   7 +-
 .../{ => data}/ignore_errors_dataset_op.cc    |   3 +-
 .../{ => data}/interleave_dataset_op.cc       |   8 +-
 .../core/kernels/{ => data}/iterator_ops.cc   |   4 +-
 .../{ => data}/map_and_batch_dataset_op.cc    |   4 +-
 .../core/kernels/{ => data}/map_dataset_op.cc |   6 +-
 .../{ => data}/padded_batch_dataset_op.cc     |   3 +-
 .../parallel_interleave_dataset_op.cc         |   7 +-
 .../{ => data}/parallel_map_dataset_op.cc     |   6 +-
 .../kernels/{ => data}/prefetch_dataset_op.cc |   2 +-
 .../kernels/{ => data}/random_dataset_op.cc   |   2 +-
 .../kernels/{ => data}/range_dataset_op.cc    |   3 +-
 .../kernels/{ => data}/reader_dataset_ops.cc  |   3 +-
 .../kernels/{ => data}/repeat_dataset_op.cc   |   3 +-
 .../kernels/{ => data}/scan_dataset_op.cc     |   4 +-
 .../kernels/{ => data}/shuffle_dataset_op.cc  |   3 +-
 .../kernels/{ => data}/skip_dataset_op.cc     |   3 +-
 .../sparse_tensor_slice_dataset_op.cc         |   3 +-
 tensorflow/core/kernels/data/sql/BUILD        |  39 ++
 .../kernels/{ => data}/sql/driver_manager.cc  |   4 +-
 .../kernels/{ => data}/sql/driver_manager.h   |   8 +-
 .../kernels/{ => data}/sql/query_connection.h |   6 +-
 .../{ => data}/sql/sqlite_query_connection.cc |   2 +-
 .../{ => data}/sql/sqlite_query_connection.h  |   8 +-
 .../kernels/{ => data}/sql_dataset_ops.cc     |   6 +-
 .../kernels/{ => data}/stats_aggregator.h     |   6 +-
 .../{ => data}/stats_aggregator_ops.cc        |   2 +-
 .../kernels/{ => data}/stats_dataset_ops.cc   |   4 +-
 .../kernels/{ => data}/take_dataset_op.cc     |   3 +-
 .../kernels/{ => data}/tensor_dataset_op.cc   |   3 +-
 .../{ => data}/tensor_slice_dataset_op.cc     |   3 +-
 .../core/kernels/{ => data}/window_dataset.cc |   2 +-
 .../core/kernels/{ => data}/window_dataset.h  |   8 +-
 .../core/kernels/{ => data}/zip_dataset_op.cc |   3 +-
 tensorflow/core/kernels/dataset.h             | 560 +----------------
 51 files changed, 1366 insertions(+), 1276 deletions(-)
 create mode 100644 tensorflow/core/kernels/data/BUILD
 rename tensorflow/core/kernels/{ => data}/batch_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/cache_dataset_ops.cc (99%)
 rename tensorflow/core/kernels/{ => data}/captured_function.cc (99%)
 create mode 100644 tensorflow/core/kernels/data/captured_function.h
 rename tensorflow/core/kernels/{ => data}/concatenate_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/dataset.cc (99%)
 create mode 100644 tensorflow/core/kernels/data/dataset.h
 rename tensorflow/core/kernels/{ => data}/dataset_utils.cc (97%)
 rename tensorflow/core/kernels/{ => data}/dataset_utils.h (77%)
 rename tensorflow/core/kernels/{ => data}/dense_to_sparse_batch_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/filter_dataset_op.cc (98%)
 rename tensorflow/core/kernels/{ => data}/flat_map_dataset_op.cc (98%)
 rename tensorflow/core/kernels/{ => data}/group_by_window_dataset_op.cc (98%)
 rename tensorflow/core/kernels/{ => data}/ignore_errors_dataset_op.cc (98%)
 rename tensorflow/core/kernels/{ => data}/interleave_dataset_op.cc (98%)
 rename tensorflow/core/kernels/{ => data}/iterator_ops.cc (99%)
 rename tensorflow/core/kernels/{ => data}/map_and_batch_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/map_dataset_op.cc (98%)
 rename tensorflow/core/kernels/{ => data}/padded_batch_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/parallel_interleave_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/parallel_map_dataset_op.cc (98%)
 rename tensorflow/core/kernels/{ => data}/prefetch_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/random_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/range_dataset_op.cc (98%)
 rename tensorflow/core/kernels/{ => data}/reader_dataset_ops.cc (99%)
 rename tensorflow/core/kernels/{ => data}/repeat_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/scan_dataset_op.cc (98%)
 rename tensorflow/core/kernels/{ => data}/shuffle_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/skip_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/sparse_tensor_slice_dataset_op.cc (99%)
 create mode 100644 tensorflow/core/kernels/data/sql/BUILD
 rename tensorflow/core/kernels/{ => data}/sql/driver_manager.cc (89%)
 rename tensorflow/core/kernels/{ => data}/sql/driver_manager.h (82%)
 rename tensorflow/core/kernels/{ => data}/sql/query_connection.h (92%)
 rename tensorflow/core/kernels/{ => data}/sql/sqlite_query_connection.cc (98%)
 rename tensorflow/core/kernels/{ => data}/sql/sqlite_query_connection.h (84%)
 rename tensorflow/core/kernels/{ => data}/sql_dataset_ops.cc (97%)
 rename tensorflow/core/kernels/{ => data}/stats_aggregator.h (93%)
 rename tensorflow/core/kernels/{ => data}/stats_aggregator_ops.cc (98%)
 rename tensorflow/core/kernels/{ => data}/stats_dataset_ops.cc (98%)
 rename tensorflow/core/kernels/{ => data}/take_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/tensor_dataset_op.cc (98%)
 rename tensorflow/core/kernels/{ => data}/tensor_slice_dataset_op.cc (99%)
 rename tensorflow/core/kernels/{ => data}/window_dataset.cc (98%)
 rename tensorflow/core/kernels/{ => data}/window_dataset.h (87%)
 rename tensorflow/core/kernels/{ => data}/zip_dataset_op.cc (99%)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 5167ebe473..d80fe5c829 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -562,6 +562,8 @@ filegroup(
         "//tensorflow/core/grappler/optimizers:all_files",
         "//tensorflow/core/grappler/utils:all_files",
         "//tensorflow/core/kernels:all_files",
+        "//tensorflow/core/kernels/data:all_files",
+        "//tensorflow/core/kernels/data/sql:all_files",
         "//tensorflow/core/kernels/fuzzing:all_files",
         "//tensorflow/core/kernels/hexagon:all_files",
         "//tensorflow/core/kernels/neon:all_files",
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index a3262bf06a..a24335e1ea 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4922,7 +4922,6 @@ filegroup(
             "summary_interface.*",
             "summary_kernels.*",
             "spectrogram_convert_test_data.cc",
-            "sql_dataset_ops.cc",
             # Excluded due to experimental status:
             "debug_ops.*",
             "scatter_nd_op*",
@@ -5846,26 +5845,6 @@ tf_mkl_kernel_library(
     ],
 )
 
-cc_library(
-    name = "stats_aggregator",
-    hdrs = ["stats_aggregator.h"],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-    ],
-)
-
-tf_kernel_library(
-    name = "stats_aggregator_ops",
-    srcs = ["stats_aggregator_ops.cc"],
-    deps = [
-        ":stats_aggregator",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-    ],
-)
-
 cc_library(
     name = "batch_util",
     srcs = ["batch_util.cc"],
@@ -5876,497 +5855,26 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "dataset",
-    srcs = ["dataset.cc"],
-    hdrs = ["dataset.h"],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:graph",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core:protos_all_cc",
-    ],
-)
-
-cc_library(
-    name = "dataset_utils",
-    srcs = ["dataset_utils.cc"],
-    hdrs = ["dataset_utils.h"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
 cc_library(
     name = "captured_function",
-    srcs = ["captured_function.cc"],
     hdrs = ["captured_function.h"],
     deps = [
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core:proto_text",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:session_options",
-        "//tensorflow/core/kernels:variable_ops",
+        "//tensorflow/core/kernels/data:captured_function",
     ],
 )
 
 cc_library(
-    name = "window_dataset",
-    srcs = ["window_dataset.cc"],
-    hdrs = ["window_dataset.h"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "batch_dataset_op",
-    srcs = ["batch_dataset_op.cc"],
-    deps = [
-        ":batch_util",
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "padded_batch_dataset_op",
-    srcs = ["padded_batch_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "dense_to_sparse_batch_dataset_op",
-    srcs = ["dense_to_sparse_batch_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "group_by_window_dataset_op",
-    srcs = ["group_by_window_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        ":window_dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "filter_dataset_op",
-    srcs = ["filter_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "map_dataset_op",
-    srcs = ["map_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "map_and_batch_dataset_op",
-    srcs = ["map_and_batch_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        ":inplace_ops",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "parallel_map_dataset_op",
-    srcs = ["parallel_map_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "scan_dataset_op",
-    srcs = ["scan_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "flat_map_dataset_op",
-    srcs = ["flat_map_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        ":dataset_utils",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "interleave_dataset_op",
-    srcs = ["interleave_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        ":dataset_utils",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "parallel_interleave_dataset_op",
-    srcs = ["parallel_interleave_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        ":dataset_utils",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "prefetch_dataset_op",
-    srcs = ["prefetch_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core:protos_all_cc",
-    ],
-)
-
-tf_kernel_library(
-    name = "repeat_dataset_op",
-    srcs = ["repeat_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "take_dataset_op",
-    srcs = ["take_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "skip_dataset_op",
-    srcs = ["skip_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "ignore_errors_dataset_op",
-    srcs = ["ignore_errors_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "stats_dataset_ops",
-    srcs = ["stats_dataset_ops.cc"],
-    deps = [
-        ":dataset",
-        ":stats_aggregator",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "random_dataset_op",
-    srcs = ["random_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "range_dataset_op",
-    srcs = ["range_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "shuffle_dataset_op",
-    srcs = ["shuffle_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "sparse_tensor_slice_dataset_op",
-    srcs = ["sparse_tensor_slice_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "tensor_dataset_op",
-    srcs = ["tensor_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "tensor_slice_dataset_op",
-    srcs = ["tensor_slice_dataset_op.cc"],
-    deps = [
-        ":batch_util",
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "zip_dataset_op",
-    srcs = ["zip_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "concatenate_dataset_op",
-    srcs = ["concatenate_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "reader_dataset_ops",
-    srcs = ["reader_dataset_ops.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "sql_dataset_ops",
-    srcs = [
-        "sql/driver_manager.cc",
-        "sql/sqlite_query_connection.cc",
-        "sql_dataset_ops.cc",
-    ],
-    hdrs = [
-        "sql/driver_manager.h",
-        "sql/query_connection.h",
-        "sql/sqlite_query_connection.h",
-    ],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core/lib/db:sqlite",
-        "@sqlite_archive//:sqlite",
-    ],
-)
-
-tf_kernel_library(
-    name = "iterator_ops",
-    srcs = ["iterator_ops.cc"],
-    deps = [
-        ":dataset",
-        ":ops_util",
-        ":stats_aggregator",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core:protos_all_cc",
-    ],
-)
-
-tf_kernel_library(
-    name = "cache_dataset_ops",
-    srcs = ["cache_dataset_ops.cc"],
+    name = "dataset",
+    hdrs = ["dataset.h"],
     deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core/util/tensor_bundle",
+        "//tensorflow/core/kernels/data:dataset",
     ],
 )
 
 tf_kernel_library(
     name = "dataset_ops",
     deps = [
-        ":batch_dataset_op",
-        ":cache_dataset_ops",
-        ":concatenate_dataset_op",
-        ":dense_to_sparse_batch_dataset_op",
-        ":filter_dataset_op",
-        ":flat_map_dataset_op",
-        ":group_by_window_dataset_op",
-        ":ignore_errors_dataset_op",
-        ":interleave_dataset_op",
-        ":iterator_ops",
-        ":map_and_batch_dataset_op",
-        ":map_dataset_op",
-        ":padded_batch_dataset_op",
-        ":parallel_interleave_dataset_op",
-        ":parallel_map_dataset_op",
-        ":prefetch_dataset_op",
-        ":random_dataset_op",
-        ":range_dataset_op",
-        ":reader_dataset_ops",
-        ":repeat_dataset_op",
-        ":scan_dataset_op",
-        ":shuffle_dataset_op",
-        ":skip_dataset_op",
-        ":sparse_tensor_slice_dataset_op",
-        ":sql_dataset_ops",
-        ":stats_aggregator_ops",
-        ":stats_dataset_ops",
-        ":take_dataset_op",
-        ":tensor_dataset_op",
-        ":tensor_slice_dataset_op",
-        ":zip_dataset_op",
+        "//tensorflow/core/kernels/data:dataset_ops",
     ],
 )
 
diff --git a/tensorflow/core/kernels/captured_function.h b/tensorflow/core/kernels/captured_function.h
index c10472dde0..cdf191f4c7 100644
--- a/tensorflow/core/kernels/captured_function.h
+++ b/tensorflow/core/kernels/captured_function.h
@@ -12,116 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_KERNELS_CAPTURED_FUNCTION_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_KERNELS_CAPTURED_FUNCTION_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_CAPTURED_FUNCTION_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_CAPTURED_FUNCTION_H_
 
-#include <memory>
-#include <vector>
+#include "tensorflow/core/kernels/data/captured_function.h"
 
-#include "tensorflow/core/common_runtime/function.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/array_slice.h"
-#include "tensorflow/core/lib/random/random.h"
-#include "tensorflow/core/platform/macros.h"
-
-namespace tensorflow {
-
-class Device;
-class OpKernelContext;
-class ResourceMgr;
-
-// A `CapturedFunction` encapsulates a TensorFlow function and all of
-// the runtime support required to execute it.
-//
-// The `Dataset`-related classes use `CapturedFunction` to execute
-// TensorFlow functions outside a the normal `OpKernel::Compute()`
-// context.
-//
-// NOTE(mrry): Here we are taking a conservative approach to dealing with
-// ownership of the various framework and runtime objects that are needed
-// to execute functions. We copy the function library *definition* (i.e.
-// a set of FunctionDefs) out of this kernel's context's function library
-// *runtime*, then we use that together with a specially-created
-// ThreadPoolDevice to build a new FunctionLibraryRuntime for the Dataset.
-//
-// We need to do this (or refactor the ownership of framework components
-// in each of the session implementations) to make it possible to close
-// down a ParallelMapDataset::Iterator when its session is closed.
-//
-// TODO(mrry): Clean this up. Investigate whether it would be possible to
-// reuse the session's FunctionLibraryRuntime(s) or Device(s).
-class CapturedFunction {
- public:
-  // NOTE(mrry): The `captured_inputs` are passed by value. For
-  // efficiency, you are recommended to move this argument into the call.
-  static Status Create(OpKernelContext* ctx, const NameAttrList& func,
-                       int graph_def_version,
-                       std::vector<Tensor> captured_inputs,
-                       std::unique_ptr<CapturedFunction>* out_function);
-
-  // Synchronously runs the captured function on the given `args`, and stores
-  // the results in `*rets`. This method takes ownership of the tensors in
-  // `args`, in order to be able to deallocate them as early as possible.
-  // Use `RunWithBorrowedArgs()` if the caller needs to retain ownership of
-  // the `args`.
-  Status Run(FunctionLibraryRuntime::Options f_opts, std::vector<Tensor>&& args,
-             std::vector<Tensor>* rets);
-
-  // Synchronously runs the captured function on the given `args`, and stores
-  // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when
-  // possible.
-  Status RunWithBorrowedArgs(FunctionLibraryRuntime::Options f_opts,
-                             const std::vector<Tensor>& args,
-                             std::vector<Tensor>* rets);
-
-  // Asynchronously runs the captured function on the given `args`, stores
-  // the results in `*rets`, and calls the given `done` callback when the
-  // function returns. This method takes ownership of the tensors in `args`,
-  // in order to be able to deallocate them as early as possible.
-  void RunAsync(FunctionLibraryRuntime::Options f_opts,
-                std::vector<Tensor>&& args, std::vector<Tensor>* rets,
-                FunctionLibraryRuntime::DoneCallback done);
-
-  // Returns a borrowed pointer to the `ResourceManager` used when this
-  // function is run.
-  ResourceMgr* resource_manager() const { return device_->resource_manager(); }
-
-  // Returns that additional captured inputs that will be passed to the function
-  // when `Run*()` is called.
-  const std::vector<Tensor>& captured_inputs() { return captured_inputs_; }
-
-  // Returns a step ID for use when running a `CapturedFunction`.
-  static int64 generate_step_id() {
-    // Choose a step ID that is guaranteed not to clash with any
-    // Session-generated step ID. DirectSession only generates
-    // non-negative step IDs (contiguous, starting from 0), and
-    // MasterSession generates 56-bit random step IDs whose MSB is
-    // always 0, so a negative random step ID should suffice.
-    return -std::abs(static_cast<int64>(random::New64()));
-  }
-
- private:
-  CapturedFunction(Device* device, std::unique_ptr<DeviceMgr> device_mgr,
-                   std::unique_ptr<FunctionLibraryDefinition> flib_def,
-                   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr,
-                   FunctionLibraryRuntime* lib,
-                   FunctionLibraryRuntime::Handle f_handle,
-                   std::vector<Tensor> captured_inputs,
-                   DataTypeSlice ret_types);
-
-  Device* const device_;  // owned by device_mgr_.
-  const std::unique_ptr<DeviceMgr> device_mgr_;
-  const std::unique_ptr<FunctionLibraryDefinition> flib_def_;
-  const std::unique_ptr<ProcessFunctionLibraryRuntime> pflr_;
-  FunctionLibraryRuntime* const lib_;  // owned by pflr_.
-  const FunctionLibraryRuntime::Handle f_handle_;
-  const std::vector<Tensor> captured_inputs_;
-  DataTypeSlice ret_types_;  // owned by pflr_.
-
-  TF_DISALLOW_COPY_AND_ASSIGN(CapturedFunction);
-};
-
-}  // namespace tensorflow
-
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_KERNELS_CAPTURED_FUNCTION_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_CAPTURED_FUNCTION_H_
diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
new file mode 100644
index 0000000000..58cf36f454
--- /dev/null
+++ b/tensorflow/core/kernels/data/BUILD
@@ -0,0 +1,531 @@
+# Description:
+#   OpKernels for tf.data
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_kernel_library",
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+cc_library(
+    name = "stats_aggregator",
+    hdrs = ["stats_aggregator.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_kernel_library(
+    name = "stats_aggregator_ops",
+    srcs = ["stats_aggregator_ops.cc"],
+    deps = [
+        ":stats_aggregator",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "dataset",
+    srcs = ["dataset.cc"],
+    hdrs = ["dataset.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "dataset_utils",
+    srcs = ["dataset_utils.cc"],
+    hdrs = ["dataset_utils.h"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+cc_library(
+    name = "captured_function",
+    srcs = ["captured_function.cc"],
+    hdrs = ["captured_function.h"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:proto_text",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:session_options",
+        "//tensorflow/core/kernels:variable_ops",
+    ],
+)
+
+cc_library(
+    name = "window_dataset",
+    srcs = ["window_dataset.cc"],
+    hdrs = ["window_dataset.h"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "batch_dataset_op",
+    srcs = ["batch_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:batch_util",
+    ],
+)
+
+tf_kernel_library(
+    name = "padded_batch_dataset_op",
+    srcs = ["padded_batch_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "dense_to_sparse_batch_dataset_op",
+    srcs = ["dense_to_sparse_batch_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "group_by_window_dataset_op",
+    srcs = ["group_by_window_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        ":window_dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "filter_dataset_op",
+    srcs = ["filter_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "map_dataset_op",
+    srcs = ["map_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "map_and_batch_dataset_op",
+    srcs = ["map_and_batch_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:inplace_ops",
+    ],
+)
+
+tf_kernel_library(
+    name = "parallel_map_dataset_op",
+    srcs = ["parallel_map_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "scan_dataset_op",
+    srcs = ["scan_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "flat_map_dataset_op",
+    srcs = ["flat_map_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        ":dataset_utils",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "interleave_dataset_op",
+    srcs = ["interleave_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        ":dataset_utils",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "parallel_interleave_dataset_op",
+    srcs = ["parallel_interleave_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        ":dataset_utils",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "prefetch_dataset_op",
+    srcs = ["prefetch_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+tf_kernel_library(
+    name = "repeat_dataset_op",
+    srcs = ["repeat_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "take_dataset_op",
+    srcs = ["take_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "skip_dataset_op",
+    srcs = ["skip_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "ignore_errors_dataset_op",
+    srcs = ["ignore_errors_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "stats_dataset_ops",
+    srcs = ["stats_dataset_ops.cc"],
+    deps = [
+        ":dataset",
+        ":stats_aggregator",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "random_dataset_op",
+    srcs = ["random_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "range_dataset_op",
+    srcs = ["range_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "shuffle_dataset_op",
+    srcs = ["shuffle_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "sparse_tensor_slice_dataset_op",
+    srcs = ["sparse_tensor_slice_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "tensor_dataset_op",
+    srcs = ["tensor_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "tensor_slice_dataset_op",
+    srcs = ["tensor_slice_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:batch_util",
+    ],
+)
+
+tf_kernel_library(
+    name = "zip_dataset_op",
+    srcs = ["zip_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "concatenate_dataset_op",
+    srcs = ["concatenate_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "reader_dataset_ops",
+    srcs = ["reader_dataset_ops.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "sql_dataset_ops",
+    srcs = [
+        "sql_dataset_ops.cc",
+    ],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels/data/sql",
+    ],
+)
+
+tf_kernel_library(
+    name = "iterator_ops",
+    srcs = ["iterator_ops.cc"],
+    deps = [
+        ":dataset",
+        ":stats_aggregator",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_kernel_library(
+    name = "cache_dataset_ops",
+    srcs = ["cache_dataset_ops.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/util/tensor_bundle",
+    ],
+)
+
+tf_kernel_library(
+    name = "dataset_ops",
+    deps = [
+        ":batch_dataset_op",
+        ":cache_dataset_ops",
+        ":concatenate_dataset_op",
+        ":dense_to_sparse_batch_dataset_op",
+        ":filter_dataset_op",
+        ":flat_map_dataset_op",
+        ":group_by_window_dataset_op",
+        ":ignore_errors_dataset_op",
+        ":interleave_dataset_op",
+        ":iterator_ops",
+        ":map_and_batch_dataset_op",
+        ":map_dataset_op",
+        ":padded_batch_dataset_op",
+        ":parallel_interleave_dataset_op",
+        ":parallel_map_dataset_op",
+        ":prefetch_dataset_op",
+        ":random_dataset_op",
+        ":range_dataset_op",
+        ":reader_dataset_ops",
+        ":repeat_dataset_op",
+        ":scan_dataset_op",
+        ":shuffle_dataset_op",
+        ":skip_dataset_op",
+        ":sparse_tensor_slice_dataset_op",
+        ":sql_dataset_ops",
+        ":stats_aggregator_ops",
+        ":stats_dataset_ops",
+        ":take_dataset_op",
+        ":tensor_dataset_op",
+        ":tensor_slice_dataset_op",
+        ":zip_dataset_op",
+    ],
+)
diff --git a/tensorflow/core/kernels/batch_dataset_op.cc b/tensorflow/core/kernels/data/batch_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/batch_dataset_op.cc
rename to tensorflow/core/kernels/data/batch_dataset_op.cc
index d5f2fd4c19..876f76fb43 100644
--- a/tensorflow/core/kernels/batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/batch_dataset_op.cc
@@ -12,11 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/batch_util.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc
similarity index 99%
rename from tensorflow/core/kernels/cache_dataset_ops.cc
rename to tensorflow/core/kernels/data/cache_dataset_ops.cc
index 137002b9d7..f0a2192826 100644
--- a/tensorflow/core/kernels/cache_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/util/tensor_bundle/tensor_bundle.h"
diff --git a/tensorflow/core/kernels/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc
similarity index 99%
rename from tensorflow/core/kernels/captured_function.cc
rename to tensorflow/core/kernels/data/captured_function.cc
index 5ef331e592..17ee1db407 100644
--- a/tensorflow/core/kernels/captured_function.cc
+++ b/tensorflow/core/kernels/data/captured_function.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/captured_function.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
 
 #include <utility>
 
@@ -24,7 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/queue_interface.h"
 #include "tensorflow/core/framework/reader_interface.h"
 #include "tensorflow/core/framework/resource_handle.pb_text.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/kernels/variable_ops.h"
 #include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/platform/notification.h"
diff --git a/tensorflow/core/kernels/data/captured_function.h b/tensorflow/core/kernels/data/captured_function.h
new file mode 100644
index 0000000000..0f62b74470
--- /dev/null
+++ b/tensorflow/core/kernels/data/captured_function.h
@@ -0,0 +1,127 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_CAPTURED_FUNCTION_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_CAPTURED_FUNCTION_H_
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/macros.h"
+
+namespace tensorflow {
+
+class Device;
+class OpKernelContext;
+class ResourceMgr;
+
+// A `CapturedFunction` encapsulates a TensorFlow function and all of
+// the runtime support required to execute it.
+//
+// The `Dataset`-related classes use `CapturedFunction` to execute
+// TensorFlow functions outside a the normal `OpKernel::Compute()`
+// context.
+//
+// NOTE(mrry): Here we are taking a conservative approach to dealing with
+// ownership of the various framework and runtime objects that are needed
+// to execute functions. We copy the function library *definition* (i.e.
+// a set of FunctionDefs) out of this kernel's context's function library
+// *runtime*, then we use that together with a specially-created
+// ThreadPoolDevice to build a new FunctionLibraryRuntime for the Dataset.
+//
+// We need to do this (or refactor the ownership of framework components
+// in each of the session implementations) to make it possible to close
+// down a ParallelMapDataset::Iterator when its session is closed.
+//
+// TODO(mrry): Clean this up. Investigate whether it would be possible to
+// reuse the session's FunctionLibraryRuntime(s) or Device(s).
+class CapturedFunction {
+ public:
+  // NOTE(mrry): The `captured_inputs` are passed by value. For
+  // efficiency, you are recommended to move this argument into the call.
+  static Status Create(OpKernelContext* ctx, const NameAttrList& func,
+                       int graph_def_version,
+                       std::vector<Tensor> captured_inputs,
+                       std::unique_ptr<CapturedFunction>* out_function);
+
+  // Synchronously runs the captured function on the given `args`, and stores
+  // the results in `*rets`. This method takes ownership of the tensors in
+  // `args`, in order to be able to deallocate them as early as possible.
+  // Use `RunWithBorrowedArgs()` if the caller needs to retain ownership of
+  // the `args`.
+  Status Run(FunctionLibraryRuntime::Options f_opts, std::vector<Tensor>&& args,
+             std::vector<Tensor>* rets);
+
+  // Synchronously runs the captured function on the given `args`, and stores
+  // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when
+  // possible.
+  Status RunWithBorrowedArgs(FunctionLibraryRuntime::Options f_opts,
+                             const std::vector<Tensor>& args,
+                             std::vector<Tensor>* rets);
+
+  // Asynchronously runs the captured function on the given `args`, stores
+  // the results in `*rets`, and calls the given `done` callback when the
+  // function returns. This method takes ownership of the tensors in `args`,
+  // in order to be able to deallocate them as early as possible.
+  void RunAsync(FunctionLibraryRuntime::Options f_opts,
+                std::vector<Tensor>&& args, std::vector<Tensor>* rets,
+                FunctionLibraryRuntime::DoneCallback done);
+
+  // Returns a borrowed pointer to the `ResourceManager` used when this
+  // function is run.
+  ResourceMgr* resource_manager() const { return device_->resource_manager(); }
+
+  // Returns that additional captured inputs that will be passed to the function
+  // when `Run*()` is called.
+  const std::vector<Tensor>& captured_inputs() { return captured_inputs_; }
+
+  // Returns a step ID for use when running a `CapturedFunction`.
+  static int64 generate_step_id() {
+    // Choose a step ID that is guaranteed not to clash with any
+    // Session-generated step ID. DirectSession only generates
+    // non-negative step IDs (contiguous, starting from 0), and
+    // MasterSession generates 56-bit random step IDs whose MSB is
+    // always 0, so a negative random step ID should suffice.
+    return -std::abs(static_cast<int64>(random::New64()));
+  }
+
+ private:
+  CapturedFunction(Device* device, std::unique_ptr<DeviceMgr> device_mgr,
+                   std::unique_ptr<FunctionLibraryDefinition> flib_def,
+                   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr,
+                   FunctionLibraryRuntime* lib,
+                   FunctionLibraryRuntime::Handle f_handle,
+                   std::vector<Tensor> captured_inputs,
+                   DataTypeSlice ret_types);
+
+  Device* const device_;  // owned by device_mgr_.
+  const std::unique_ptr<DeviceMgr> device_mgr_;
+  const std::unique_ptr<FunctionLibraryDefinition> flib_def_;
+  const std::unique_ptr<ProcessFunctionLibraryRuntime> pflr_;
+  FunctionLibraryRuntime* const lib_;  // owned by pflr_.
+  const FunctionLibraryRuntime::Handle f_handle_;
+  const std::vector<Tensor> captured_inputs_;
+  DataTypeSlice ret_types_;  // owned by pflr_.
+
+  TF_DISALLOW_COPY_AND_ASSIGN(CapturedFunction);
+};
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_CAPTURED_FUNCTION_H_
diff --git a/tensorflow/core/kernels/concatenate_dataset_op.cc b/tensorflow/core/kernels/data/concatenate_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/concatenate_dataset_op.cc
rename to tensorflow/core/kernels/data/concatenate_dataset_op.cc
index ad78ba0186..24efadfd47 100644
--- a/tensorflow/core/kernels/concatenate_dataset_op.cc
+++ b/tensorflow/core/kernels/data/concatenate_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/dataset.cc b/tensorflow/core/kernels/data/dataset.cc
similarity index 99%
rename from tensorflow/core/kernels/dataset.cc
rename to tensorflow/core/kernels/data/dataset.cc
index 0972129787..2ea6875567 100644
--- a/tensorflow/core/kernels/dataset.cc
+++ b/tensorflow/core/kernels/data/dataset.cc
@@ -12,9 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-
-#include "tensorflow/core/kernels/dataset.h"
-
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/graph/node_builder.h"
 
diff --git a/tensorflow/core/kernels/data/dataset.h b/tensorflow/core/kernels/data/dataset.h
new file mode 100644
index 0000000000..7e01535bd8
--- /dev/null
+++ b/tensorflow/core/kernels/data/dataset.h
@@ -0,0 +1,578 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_H_
+
+#include <memory>
+
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/variant_encode_decode.h"
+#include "tensorflow/core/framework/variant_tensor_data.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/tracing.h"
+
+// Polymorphic datasets should support all primitive TensorFlow
+// types. Use this macro to expand `m(T)` once for each primitive type
+// `T`, e.g. to build a `switch` statement.
+#define TF_CALL_DATASET_TYPES(m) TF_CALL_ALL_TYPES(m) TF_CALL_QUANTIZED_TYPES(m)
+
+namespace tensorflow {
+
+// Interface for reading values from a key-value store.
+// Used for restoring iterator state.
+class IteratorStateReader {
+ public:
+  virtual Status ReadScalar(StringPiece key, int64* val) = 0;
+  virtual Status ReadScalar(StringPiece key, string* val) = 0;
+  virtual Status ReadTensor(StringPiece key, Tensor* val) = 0;
+  virtual bool Contains(StringPiece key) = 0;
+
+  virtual ~IteratorStateReader() {}
+};
+
+// Interface for writing values to a key-value store.
+// Used for saving iterator state.
+class IteratorStateWriter {
+ public:
+  virtual Status WriteScalar(StringPiece key, const int64 val) = 0;
+  virtual Status WriteScalar(StringPiece key, const string& val) = 0;
+  virtual Status WriteTensor(StringPiece key, const Tensor& val) = 0;
+
+  virtual ~IteratorStateWriter() {}
+};
+
+// Forward declarations to avoid introducing a dependency on headers in
+// "tensorflow/core/graph/...".
+class GraphDefBuilder;
+class GraphDatasetBase;
+class Node;
+
+// Wrapper around GraphDefBuilder. Used to serialize Dataset graph.
+class GraphDefBuilderWrapper {
+ public:
+  explicit GraphDefBuilderWrapper(GraphDefBuilder* b) : b_(b) {}
+
+  // Adds a Const node with scalar value to the Graph.
+  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
+  // non-null if the method returns with an OK status.
+  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
+  template <typename T>
+  Status AddScalar(const T& val, Node** output) {
+    Tensor val_t = Tensor(DataTypeToEnum<T>::v(), TensorShape({}));
+    val_t.scalar<T>()() = val;
+    AddTensorInternal(val_t, output);
+    if (*output == nullptr) {
+      return errors::Internal("AddScalar: Failed to build Const op.");
+    }
+    return Status::OK();
+  }
+
+  // Adds a Const node with vector value to the Graph.
+  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
+  // non-null if the method returns with an OK status.
+  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
+  // TODO(shivaniagrawal): Consider changing to gtl::ArraySlice?
+  template <typename T>
+  Status AddVector(const std::vector<T>& val, Node** output) {
+    Tensor val_t = Tensor(DataTypeToEnum<T>::v(),
+                          TensorShape({static_cast<int64>(val.size())}));
+    for (int i = 0; i < val.size(); i++) {
+      val_t.flat<T>()(i) = val[i];
+    }
+    AddTensorInternal(val_t, output);
+    if (*output == nullptr) {
+      return errors::Internal("AddVector: Failed to build Const op.");
+    }
+    return Status::OK();
+  }
+
+  // Adds a Const node with Tensor value to the Graph.
+  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
+  // non-null if the method returns with an OK status.
+  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
+  Status AddTensor(const Tensor& val, Node** output) {
+    AddTensorInternal(val, output);
+    if (*output == nullptr) {
+      return errors::Internal("AddTesor: Failed to build Const op.");
+    }
+    return Status::OK();
+  }
+
+  Status AddDataset(const GraphDatasetBase* dataset,
+                    const std::vector<Node*>& inputs, Node** output) {
+    return AddDataset(dataset, inputs, {}, output);
+  }
+
+  // Adds a node corresponding to the `DatasetType` to the Graph.
+  // Return value of `DatasetType::op_name()` is used as the op type for the
+  // node.
+  // Values for the output_types and output_shapes node attributes are also
+  // written if those attributes are defined in the OpDef.
+  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
+  // non-null if the method returns with an OK status.
+  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
+  Status AddDataset(const GraphDatasetBase* dataset,
+                    const std::vector<Node*>& inputs,
+                    const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
+                    Node** output) {
+    std::vector<std::pair<size_t, Node*>> enumerated_inputs(inputs.size());
+    for (int i = 0; i < inputs.size(); i++) {
+      enumerated_inputs[i] = std::make_pair(i, inputs[i]);
+    }
+    return AddDataset(dataset, enumerated_inputs, {}, attrs, output);
+  }
+
+  Status AddDataset(
+      const GraphDatasetBase* dataset,
+      const std::vector<std::pair<size_t, Node*>>& inputs,
+      const std::vector<std::pair<size_t, gtl::ArraySlice<Node*>>>& list_inputs,
+      const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
+      Node** output);
+
+  // Adds a user-defined function with name `function_name` to the graph and
+  // recursively adds all functions it references. If a function with a matching
+  // name has already been added, returns with OK status. If a user-defined with
+  // name `function_name` is not found in the FunctionLibraryDefinition, returns
+  // an InvalidArgumentError. If the function with name `function_name` or any
+  // of its dependent functions are stateful, returns an InvalidArgument error.
+  Status AddFunction(OpKernelContext* ctx, const string& function_name);
+
+  template <typename T>
+  void BuildAttrValue(const T& value, AttrValue* attr) {
+    SetAttrValue(value, attr);
+  }
+
+ private:
+  void AddTensorInternal(const Tensor& val, Node** output);
+
+  Status EnsureFunctionIsStateless(OpKernelContext* ctx,
+                                   const string& function_name) const {
+    const FunctionLibraryDefinition* lib_def =
+        ctx->function_library()->GetFunctionLibraryDefinition();
+    const FunctionDef* function_def = lib_def->Find(function_name);
+    if (!function_def) {
+      return errors::InvalidArgument("Unable to find FunctionDef for ",
+                                     function_name, " in registry.");
+    }
+    for (const NodeDef& node_def : function_def->node_def()) {
+      const OpDef* op_def;
+      TF_RETURN_IF_ERROR(lib_def->LookUpOpDef(node_def.op(), &op_def));
+      // TODO(b/65524810): Hack to allow functions to capture Dataset op
+      // nodes needed for FlatMap. Currently, source datasets nodes have been
+      // marked stateful to avoid constant folding since we do not have a
+      // good way of serializing them.
+      if (IsOpWhitelisted(op_def)) {
+        continue;
+      }
+      if (op_def->is_stateful()) {
+        return errors::InvalidArgument(
+            "Op[name: ", node_def.name(), ", type: ", node_def.op(), "] ",
+            "in function ", function_name, " is stateful. ",
+            "Saving stateful functions is not supported yet.");
+      }
+    }
+    return Status::OK();
+  }
+
+  bool IsOpWhitelisted(const OpDef* op_def) const {
+    return StringPiece(op_def->name()).ends_with("Dataset") &&
+           HasAttr(op_def, "output_shapes");
+  }
+
+  bool HasAttr(const string& op_type_name, const string& attr_name) const;
+
+  bool HasAttr(const OpDef* op_def, const string& attr_name) const {
+    for (auto attr : op_def->attr()) {
+      if (attr.name() == attr_name) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  Status AddAttrFunctions(const AttrValue& attr_value, OpKernelContext* ctx) {
+    if (attr_value.has_func()) {
+      TF_RETURN_IF_ERROR(AddFunction(ctx, attr_value.func().name()));
+    } else if (attr_value.has_list()) {
+      for (const NameAttrList& name_attr_list : attr_value.list().func()) {
+        TF_RETURN_IF_ERROR(AddFunction(ctx, name_attr_list.name()));
+      }
+    }
+    return Status::OK();
+  }
+
+  GraphDefBuilder* b_;
+};
+
+class StatsAggregator;
+
+// A cut-down version of OpKernelContext for running computations in
+// iterators. Note that we cannot simply use OpKernelContext here
+// because we might run computation in an iterator whose lifetime is
+// not nested within the lifetime of a single OpKernelContext
+// (e.g. asynchronous prefetching).
+//
+// TODO(mrry): We will probably need to support more of
+// OpKernelContext here. For example, should allocation be handled by
+// the IteratorContext?
+// TODO(mrry): We're making some daring assumptions about the lifetime
+// of the runner passed in here. A runner will be deleted when the original
+// step ends, but all existing runners only close over session-lifetime (or
+// longer-lived) state, so we can make a copy of the function. There's nothing
+// in the definition of the API from which we took the runner to guarantee that
+// what we are doing is safe. We should formalize the properties here.
+class IteratorContext {
+ public:
+  struct Params {
+    // Interface to operating system functionality.
+    Env* env;
+
+    // Function call support.
+    std::function<void(std::function<void()>)> runner = nullptr;
+
+    // A function that returns the current `StatsAggregator` instance to be
+    // used when recording statistics about the iterator.
+    //
+    // NOTE(mrry): This is somewhat awkward, because (i) the `StatsAggregator`
+    // is a property of the `IteratorResource` (which this class does not know
+    // about), and (ii) it can change after the `IteratorContext` has been
+    // created. Better suggestions are welcome!
+    std::function<std::shared_ptr<StatsAggregator>()> stats_aggregator_getter =
+        nullptr;
+  };
+
+  explicit IteratorContext(Params params) : params_(std::move(params)) {}
+
+  Env* env() const { return params_.env; }
+
+  std::function<void(std::function<void()>)>* runner() {
+    return &params_.runner;
+  }
+
+  std::shared_ptr<StatsAggregator> stats_aggregator() {
+    if (params_.stats_aggregator_getter) {
+      return params_.stats_aggregator_getter();
+    } else {
+      return nullptr;
+    }
+  }
+
+ private:
+  Params params_;
+};
+
+// Represents the current position in a range of outputs, where the
+// range of outputs is typically represented by an `DatasetBase`,
+// defined below.
+class IteratorBase {
+ public:
+  virtual ~IteratorBase() {}
+
+  // Gets the next output from the range that this iterator is traversing.
+  //
+  // If at least one output remains in this iterator's range, that
+  // output will be stored in `*out_tensors` and `false` will be
+  // stored in `*end_of_sequence`.
+  //
+  // If no more outputs remain in this iterator's range, `true` will
+  // be stored in `*end_of_sequence`, and the content of
+  // `*out_tensors` will be undefined.
+  //
+  // This method is thread-safe.
+  //
+  // TODO(mrry): Define `GetNextAsync()` or `GetNextManyAsync()`, and
+  // potentially remove this method.
+  virtual Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
+                         bool* end_of_sequence) = 0;
+
+  // Returns a vector of DataType values, representing the respective
+  // element types of each tuple component in the outputs of this
+  // iterator.
+  virtual const DataTypeVector& output_dtypes() const = 0;
+
+  // Returns a vector of tensor shapes, representing the respective
+  // (and possibly partially defined) shapes of each tuple component
+  // in the outputs of this iterator.
+  virtual const std::vector<PartialTensorShape>& output_shapes() const = 0;
+
+  // Saves the state of this iterator.
+  virtual Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) {
+    return SaveInternal(writer);
+  }
+
+  // Restores the state of this iterator.
+  virtual Status Restore(OpKernelContext* ctx, IteratorStateReader* reader) {
+    return RestoreInternal(ctx, reader);
+  }
+
+ protected:
+  // This is needed so that sub-classes of IteratorBase can call
+  // `SaveInternal` on their parent iterators, e.g., in
+  // `RepeatDataasetOp::Dataset`.
+  Status SaveParent(IteratorStateWriter* writer,
+                    const std::unique_ptr<IteratorBase>& parent) {
+    return parent->SaveInternal(writer);
+  }
+
+  // This is needed so that sub-classes of IteratorBase can call
+  // `RestoreInternal` on their parent iterators, e.g., in
+  // `RepeatDataasetOp::Dataset`.
+  Status RestoreParent(OpKernelContext* ctx, IteratorStateReader* reader,
+                       const std::unique_ptr<IteratorBase>& parent) {
+    return parent->RestoreInternal(ctx, reader);
+  }
+
+  // Saves the state of this iterator recursively.
+  virtual Status SaveInternal(IteratorStateWriter* writer) {
+    return errors::Unimplemented("SaveInternal");
+  }
+
+  // Restores the state of this iterator recursively.
+  virtual Status RestoreInternal(OpKernelContext* ctx,
+                                 IteratorStateReader* reader) {
+    return errors::Unimplemented("RestoreInternal");
+  }
+};
+
+// Represents a (potentially infinite) range of outputs, where each
+// output is a tuple of tensors.
+class DatasetBase : public core::RefCounted {
+ public:
+  // Returns a new iterator for iterating over the range of elements in
+  // this dataset.
+  //
+  // This method may be called multiple times on the same instance,
+  // and the resulting iterators will have distinct state. Each
+  // iterator will traverse all elements in this dataset from the
+  // start.
+  //
+  // Ownership of the created iterator will be transferred to the caller.
+  //
+  // The prefix identifies the sequence of iterators leading up to the newly
+  // created iterator.
+  virtual std::unique_ptr<IteratorBase> MakeIterator(
+      const string& prefix) const = 0;
+
+  // Returns a vector of DataType values, representing the respective
+  // element types of each tuple component in the outputs of this
+  // dataset.
+  virtual const DataTypeVector& output_dtypes() const = 0;
+
+  // Returns a vector of tensor shapes, representing the respective
+  // (and possibly partially defined) shapes of each tuple component
+  // in the outputs of this dataset.
+  virtual const std::vector<PartialTensorShape>& output_shapes() const = 0;
+
+  // A human-readable debug string for this dataset.
+  virtual string DebugString() = 0;
+
+  // Serializes the dataset and writes it to the `writer`.
+  virtual Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) const {
+    return errors::Unimplemented("DatasetBase::Save");
+  }
+
+ protected:
+  // TODO(srbs): Ideally all graph related logic should reside in
+  // GraphDatasetBase. However, that would require Datasets defined in all ops
+  // to derive from GraphDatasetBase. Once that is done we can move
+  // DatasetGraphDefBuilder and AsGraphDefInternal to GraphDatasetBase.
+  class DatasetGraphDefBuilder : public GraphDefBuilderWrapper {
+   public:
+    DatasetGraphDefBuilder(GraphDefBuilder* b) : GraphDefBuilderWrapper(b) {}
+    Status AddParentDataset(OpKernelContext* ctx, const DatasetBase* dataset,
+                            Node** output) {
+      return dataset->AsGraphDefInternal(ctx, this, output);
+    }
+  };
+
+  virtual Status AsGraphDefInternal(OpKernelContext* ctx,
+                                    DatasetGraphDefBuilder* b,
+                                    Node** node) const {
+    return AsGraphDefInternal(b, node);
+  }
+
+  virtual Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
+                                    Node** node) const {
+    return errors::Unimplemented("AsGraphDefInternal");
+  }
+};
+
+// Base-class for datasets that are built by ops.
+class GraphDatasetBase : public DatasetBase {
+ public:
+  GraphDatasetBase(OpKernelContext* ctx)
+      : op_name_(ctx->op_kernel().type_string()) {}
+
+  const string op_name() const { return op_name_; }
+
+  Status Save(OpKernelContext* ctx,
+              IteratorStateWriter* writer) const override {
+    string serialized_graph_def;
+    string output_node;
+    TF_RETURN_IF_ERROR(Serialize(ctx, &serialized_graph_def, &output_node));
+    TF_RETURN_IF_ERROR(
+        writer->WriteScalar(kDatasetGraphKey, serialized_graph_def));
+    TF_RETURN_IF_ERROR(
+        writer->WriteScalar(kDatasetGraphOutputNodeKey, output_node));
+    return Status::OK();
+  }
+
+  // Key for storing the Dataset graph in the serialized format.
+  static const char kDatasetGraphKey[];
+
+  // Key for storing the output node of the Dataset graph in the serialized
+  // format.
+  static const char kDatasetGraphOutputNodeKey[];
+
+ private:
+  Status Serialize(OpKernelContext* ctx, string* serialized_graph_def,
+                   string* output_node) const;
+
+  const string op_name_;
+};
+
+// Represents an iterator that is associated with a particular parent dataset.
+template <class DatasetType>
+class DatasetIterator : public IteratorBase {
+ public:
+  struct Params {
+    // Owns one reference on the shared dataset resource.
+    const DatasetType* dataset;
+
+    // Identifies the sequence of iterators leading up to this iterator.
+    const string prefix;
+  };
+
+  explicit DatasetIterator(const Params& params) : params_(params) {
+    params_.dataset->Ref();
+  }
+
+  ~DatasetIterator() override { params_.dataset->Unref(); }
+
+  // The dataset from which this iterator was created.
+  const DatasetType* dataset() const { return params_.dataset; }
+
+  // The sequence of iterators leading up to this iterator.
+  const string prefix() const { return params_.prefix; }
+
+  const DataTypeVector& output_dtypes() const override {
+    return params_.dataset->output_dtypes();
+  }
+
+  const std::vector<PartialTensorShape>& output_shapes() const override {
+    return params_.dataset->output_shapes();
+  }
+
+  Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
+                 bool* end_of_sequence) final {
+    port::Tracing::TraceMe activity(params_.prefix);
+    return GetNextInternal(ctx, out_tensors, end_of_sequence);
+  }
+
+  Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) final {
+    TF_RETURN_IF_ERROR(dataset()->Save(ctx, writer));
+    return IteratorBase::Save(ctx, writer);
+  }
+
+ protected:
+  // Internal implementation of GetNext that is wrapped in tracing logic.
+  virtual Status GetNextInternal(IteratorContext* ctx,
+                                 std::vector<Tensor>* out_tensors,
+                                 bool* end_of_sequence) = 0;
+
+  string full_name(const string& name) const {
+    return strings::StrCat(prefix(), ":", name);
+  }
+
+ private:
+  Params params_;
+};
+
+// Encapsulates the work required to plug a DatasetBase into the core TensorFlow
+// graph execution engine.
+class DatasetOpKernel : public OpKernel {
+ public:
+  DatasetOpKernel(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+  void Compute(OpKernelContext* ctx) final;
+
+ protected:
+  // Subclasses should implement this method. It will be called during Compute
+  // execution.
+  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase** output) = 0;
+
+  template <typename T>
+  Status ParseScalarArgument(OpKernelContext* ctx,
+                             const StringPiece& argument_name, T* output) {
+    const Tensor* argument_t;
+    TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
+    if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
+      return errors::InvalidArgument(argument_name, " must be a scalar");
+    }
+    *output = argument_t->scalar<T>()();
+    return Status::OK();
+  }
+};
+
+// Encapsulates the work required to plug unary Datasets into the core
+// TensorFlow graph execution engine.
+class UnaryDatasetOpKernel : public DatasetOpKernel {
+ public:
+  UnaryDatasetOpKernel(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {}
+
+ protected:
+  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) final;
+  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                           DatasetBase** output) = 0;
+};
+
+// Encapsulates the work required to plug binary Datasets into the core
+// TensorFlow graph execution engine.
+class BinaryDatasetOpKernel : public DatasetOpKernel {
+ public:
+  BinaryDatasetOpKernel(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {}
+
+ protected:
+  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) final;
+  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                           DatasetBase* another_input,
+                           DatasetBase** output) = 0;
+};
+
+// Validates and extracts a `DatasetBase` object from `tensor`.
+//
+// `tensor` must have been written by a call to SetVariantTensorToDataset().
+//
+// The retrieved pointer is a borrowed reference to the dataset, which is owned
+// by the tensor. The consumer must either acquire its own reference to the
+// dataset by calling `(*out_dataset)->Ref()`, or ensure that `tensor` is not
+// destroyed or mutated while the retrieved pointer is in use.
+Status GetDatasetFromVariantTensor(const Tensor& tensor,
+                                   DatasetBase** out_dataset);
+
+// Stores a `DatasetBase` object in `tensor`.
+//
+// The ownership of `dataset` is transferred to `tensor`.
+Status StoreDatasetInVariantTensor(DatasetBase* dataset, Tensor* tensor);
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_H_
diff --git a/tensorflow/core/kernels/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
similarity index 97%
rename from tensorflow/core/kernels/dataset_utils.cc
rename to tensorflow/core/kernels/data/dataset_utils.cc
index 3ce64504d0..1afc823e05 100644
--- a/tensorflow/core/kernels/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/dataset_utils.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
similarity index 77%
rename from tensorflow/core/kernels/dataset_utils.h
rename to tensorflow/core/kernels/data/dataset_utils.h
index eea2b8802b..40bc873584 100644
--- a/tensorflow/core/kernels/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_UTILS_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_UTILS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_UTILS_H_
 
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -32,4 +32,4 @@ Status MakeIteratorFromInputElement(
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_UTILS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_UTILS_H_
diff --git a/tensorflow/core/kernels/dense_to_sparse_batch_dataset_op.cc b/tensorflow/core/kernels/data/dense_to_sparse_batch_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/dense_to_sparse_batch_dataset_op.cc
rename to tensorflow/core/kernels/data/dense_to_sparse_batch_dataset_op.cc
index e80d11eaea..fe0e498a3b 100644
--- a/tensorflow/core/kernels/dense_to_sparse_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/dense_to_sparse_batch_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/filter_dataset_op.cc
rename to tensorflow/core/kernels/data/filter_dataset_op.cc
index 04427d296c..9372228465 100644
--- a/tensorflow/core/kernels/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -12,15 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-
 namespace tensorflow {
 
 namespace {
diff --git a/tensorflow/core/kernels/flat_map_dataset_op.cc b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/flat_map_dataset_op.cc
rename to tensorflow/core/kernels/data/flat_map_dataset_op.cc
index 8fe8489371..a3c03c9916 100644
--- a/tensorflow/core/kernels/flat_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
@@ -12,16 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset_utils.h"
-
 namespace tensorflow {
 
 namespace {
diff --git a/tensorflow/core/kernels/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/group_by_window_dataset_op.cc
rename to tensorflow/core/kernels/data/group_by_window_dataset_op.cc
index c70a92d0d6..35ac67fce5 100644
--- a/tensorflow/core/kernels/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
@@ -17,12 +17,11 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/window_dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset.h"
-#include "tensorflow/core/kernels/window_dataset.h"
-
 namespace tensorflow {
 
 namespace {
diff --git a/tensorflow/core/kernels/ignore_errors_dataset_op.cc b/tensorflow/core/kernels/data/ignore_errors_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/ignore_errors_dataset_op.cc
rename to tensorflow/core/kernels/data/ignore_errors_dataset_op.cc
index 8cf263d87f..beedc7c677 100644
--- a/tensorflow/core/kernels/ignore_errors_dataset_op.cc
+++ b/tensorflow/core/kernels/data/ignore_errors_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/interleave_dataset_op.cc b/tensorflow/core/kernels/data/interleave_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/interleave_dataset_op.cc
rename to tensorflow/core/kernels/data/interleave_dataset_op.cc
index 833e8cb9c5..81d7b75498 100644
--- a/tensorflow/core/kernels/interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/interleave_dataset_op.cc
@@ -13,16 +13,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset_utils.h"
-
 namespace tensorflow {
 
 namespace {
diff --git a/tensorflow/core/kernels/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
similarity index 99%
rename from tensorflow/core/kernels/iterator_ops.cc
rename to tensorflow/core/kernels/data/iterator_ops.cc
index 4e81d40a82..b7fdfab5fa 100644
--- a/tensorflow/core/kernels/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -20,9 +20,9 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
 #include "tensorflow/core/graph/graph_constructor.h"
-#include "tensorflow/core/kernels/dataset.h"
 #include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/kernels/stats_aggregator.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/stats_aggregator.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
diff --git a/tensorflow/core/kernels/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/map_and_batch_dataset_op.cc
rename to tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 7cd9ab60ab..2f3959772c 100644
--- a/tensorflow/core/kernels/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -17,8 +17,8 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/kernels/inplace_ops_functor.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/random/random.h"
diff --git a/tensorflow/core/kernels/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/map_dataset_op.cc
rename to tensorflow/core/kernels/data/map_dataset_op.cc
index f1b44beb7d..8fb1472e52 100644
--- a/tensorflow/core/kernels/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -12,15 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-
 namespace tensorflow {
 
 namespace {
diff --git a/tensorflow/core/kernels/padded_batch_dataset_op.cc b/tensorflow/core/kernels/data/padded_batch_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/padded_batch_dataset_op.cc
rename to tensorflow/core/kernels/data/padded_batch_dataset_op.cc
index cef5bde156..00743324a8 100644
--- a/tensorflow/core/kernels/padded_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/padded_batch_dataset_op.cc
@@ -12,11 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_util.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/parallel_interleave_dataset_op.cc
rename to tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 56942a5c01..cb6a83606e 100644
--- a/tensorflow/core/kernels/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset_utils.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 
diff --git a/tensorflow/core/kernels/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/parallel_map_dataset_op.cc
rename to tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 5ba1ad222d..930ea35859 100644
--- a/tensorflow/core/kernels/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -14,15 +14,13 @@ limitations under the License.
 ==============================================================================*/
 #include <deque>
 
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-
 namespace tensorflow {
 
 namespace {
diff --git a/tensorflow/core/kernels/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/prefetch_dataset_op.cc
rename to tensorflow/core/kernels/data/prefetch_dataset_op.cc
index b02269f525..6899767ce5 100644
--- a/tensorflow/core/kernels/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -16,7 +16,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/random_dataset_op.cc b/tensorflow/core/kernels/data/random_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/random_dataset_op.cc
rename to tensorflow/core/kernels/data/random_dataset_op.cc
index 03d481a593..569df12df7 100644
--- a/tensorflow/core/kernels/random_dataset_op.cc
+++ b/tensorflow/core/kernels/data/random_dataset_op.cc
@@ -15,7 +15,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/philox_random.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/random/random_distributions.h"
diff --git a/tensorflow/core/kernels/range_dataset_op.cc b/tensorflow/core/kernels/data/range_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/range_dataset_op.cc
rename to tensorflow/core/kernels/data/range_dataset_op.cc
index e7ae840fc7..e75a3f8d4d 100644
--- a/tensorflow/core/kernels/range_dataset_op.cc
+++ b/tensorflow/core/kernels/data/range_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/reader_dataset_ops.cc b/tensorflow/core/kernels/data/reader_dataset_ops.cc
similarity index 99%
rename from tensorflow/core/kernels/reader_dataset_ops.cc
rename to tensorflow/core/kernels/data/reader_dataset_ops.cc
index d942ddc4a7..557e98c1e6 100644
--- a/tensorflow/core/kernels/reader_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/reader_dataset_ops.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/io/buffered_inputstream.h"
 #include "tensorflow/core/lib/io/inputbuffer.h"
 #include "tensorflow/core/lib/io/random_inputstream.h"
diff --git a/tensorflow/core/kernels/repeat_dataset_op.cc b/tensorflow/core/kernels/data/repeat_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/repeat_dataset_op.cc
rename to tensorflow/core/kernels/data/repeat_dataset_op.cc
index 3d977a0fa3..f5c686dfc8 100644
--- a/tensorflow/core/kernels/repeat_dataset_op.cc
+++ b/tensorflow/core/kernels/data/repeat_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/scan_dataset_op.cc
rename to tensorflow/core/kernels/data/scan_dataset_op.cc
index d0ba210a0c..84ba051468 100644
--- a/tensorflow/core/kernels/scan_dataset_op.cc
+++ b/tensorflow/core/kernels/data/scan_dataset_op.cc
@@ -18,8 +18,8 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/shuffle_dataset_op.cc
rename to tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 72facb3a0d..4ac4ab0f5a 100644
--- a/tensorflow/core/kernels/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/philox_random.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/random/random_distributions.h"
diff --git a/tensorflow/core/kernels/skip_dataset_op.cc b/tensorflow/core/kernels/data/skip_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/skip_dataset_op.cc
rename to tensorflow/core/kernels/data/skip_dataset_op.cc
index 1fe49271e2..58a149c7cf 100644
--- a/tensorflow/core/kernels/skip_dataset_op.cc
+++ b/tensorflow/core/kernels/data/skip_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/sparse_tensor_slice_dataset_op.cc b/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/sparse_tensor_slice_dataset_op.cc
rename to tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
index de5ab1a367..fdfb2b70e0 100644
--- a/tensorflow/core/kernels/sparse_tensor_slice_dataset_op.cc
+++ b/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
@@ -14,11 +14,10 @@ limitations under the License.
 ==============================================================================*/
 #include <numeric>
 
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/data/sql/BUILD b/tensorflow/core/kernels/data/sql/BUILD
new file mode 100644
index 0000000000..bec5b2e7d6
--- /dev/null
+++ b/tensorflow/core/kernels/data/sql/BUILD
@@ -0,0 +1,39 @@
+# Description:
+# SQL library.
+#
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+cc_library(
+    name = "sql",
+    srcs = [
+        "driver_manager.cc",
+        "sqlite_query_connection.cc",
+    ],
+    hdrs = [
+        "driver_manager.h",
+        "query_connection.h",
+        "sqlite_query_connection.h",
+    ],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/lib/db:sqlite",
+        "@sqlite_archive//:sqlite",
+    ],
+)
diff --git a/tensorflow/core/kernels/sql/driver_manager.cc b/tensorflow/core/kernels/data/sql/driver_manager.cc
similarity index 89%
rename from tensorflow/core/kernels/sql/driver_manager.cc
rename to tensorflow/core/kernels/data/sql/driver_manager.cc
index 9a5d5aa853..ffabda1a8a 100644
--- a/tensorflow/core/kernels/sql/driver_manager.cc
+++ b/tensorflow/core/kernels/data/sql/driver_manager.cc
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/sql/driver_manager.h"
-#include "tensorflow/core/kernels/sql/sqlite_query_connection.h"
+#include "tensorflow/core/kernels/data/sql/driver_manager.h"
+#include "tensorflow/core/kernels/data/sql/sqlite_query_connection.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/sql/driver_manager.h b/tensorflow/core/kernels/data/sql/driver_manager.h
similarity index 82%
rename from tensorflow/core/kernels/sql/driver_manager.h
rename to tensorflow/core/kernels/data/sql/driver_manager.h
index 53350268d3..0d0c38eb58 100644
--- a/tensorflow/core/kernels/sql/driver_manager.h
+++ b/tensorflow/core/kernels/data/sql/driver_manager.h
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_DRIVER_MANAGER_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_DRIVER_MANAGER_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_DRIVER_MANAGER_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_DRIVER_MANAGER_H_
 
-#include "tensorflow/core/kernels/sql/query_connection.h"
+#include "tensorflow/core/kernels/data/sql/query_connection.h"
 
 namespace tensorflow {
 
@@ -38,4 +38,4 @@ class DriverManager {
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_DRIVER_MANAGER_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_DRIVER_MANAGER_H_
diff --git a/tensorflow/core/kernels/sql/query_connection.h b/tensorflow/core/kernels/data/sql/query_connection.h
similarity index 92%
rename from tensorflow/core/kernels/sql/query_connection.h
rename to tensorflow/core/kernels/data/sql/query_connection.h
index f9945aee7d..1947148972 100644
--- a/tensorflow/core/kernels/sql/query_connection.h
+++ b/tensorflow/core/kernels/data/sql/query_connection.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_QUERY_CONNECTION_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_QUERY_CONNECTION_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_QUERY_CONNECTION_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_QUERY_CONNECTION_H_
 
 #include "tensorflow/core/framework/tensor.h"
 
@@ -64,4 +64,4 @@ class QueryConnection {
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_QUERY_CONNECTION_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_QUERY_CONNECTION_H_
diff --git a/tensorflow/core/kernels/sql/sqlite_query_connection.cc b/tensorflow/core/kernels/data/sql/sqlite_query_connection.cc
similarity index 98%
rename from tensorflow/core/kernels/sql/sqlite_query_connection.cc
rename to tensorflow/core/kernels/data/sql/sqlite_query_connection.cc
index 1330506d28..abe31261a3 100644
--- a/tensorflow/core/kernels/sql/sqlite_query_connection.cc
+++ b/tensorflow/core/kernels/data/sql/sqlite_query_connection.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/sql/sqlite_query_connection.h"
+#include "tensorflow/core/kernels/data/sql/sqlite_query_connection.h"
 
 #include "tensorflow/core/lib/strings/stringprintf.h"
 
diff --git a/tensorflow/core/kernels/sql/sqlite_query_connection.h b/tensorflow/core/kernels/data/sql/sqlite_query_connection.h
similarity index 84%
rename from tensorflow/core/kernels/sql/sqlite_query_connection.h
rename to tensorflow/core/kernels/data/sql/sqlite_query_connection.h
index 435dd8e234..00b7cb3213 100644
--- a/tensorflow/core/kernels/sql/sqlite_query_connection.h
+++ b/tensorflow/core/kernels/data/sql/sqlite_query_connection.h
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_SQLITE_QUERY_CONNECTION_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_SQLITE_QUERY_CONNECTION_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_SQLITE_QUERY_CONNECTION_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_SQLITE_QUERY_CONNECTION_H_
 
 #include <memory>
 
-#include "tensorflow/core/kernels/sql/query_connection.h"
+#include "tensorflow/core/kernels/data/sql/query_connection.h"
 #include "tensorflow/core/lib/db/sqlite.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -53,4 +53,4 @@ class SqliteQueryConnection : public QueryConnection {
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_SQLITE_QUERY_CONNECTION_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_SQLITE_QUERY_CONNECTION_H_
diff --git a/tensorflow/core/kernels/sql_dataset_ops.cc b/tensorflow/core/kernels/data/sql_dataset_ops.cc
similarity index 97%
rename from tensorflow/core/kernels/sql_dataset_ops.cc
rename to tensorflow/core/kernels/data/sql_dataset_ops.cc
index 23846d65bb..7230219080 100644
--- a/tensorflow/core/kernels/sql_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/sql_dataset_ops.cc
@@ -16,9 +16,9 @@ limitations under the License.
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/dataset.h"
-#include "tensorflow/core/kernels/sql/driver_manager.h"
-#include "tensorflow/core/kernels/sql/query_connection.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/sql/driver_manager.h"
+#include "tensorflow/core/kernels/data/sql/query_connection.h"
 #include "tensorflow/core/lib/io/inputbuffer.h"
 #include "tensorflow/core/lib/io/record_reader.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
diff --git a/tensorflow/core/kernels/stats_aggregator.h b/tensorflow/core/kernels/data/stats_aggregator.h
similarity index 93%
rename from tensorflow/core/kernels/stats_aggregator.h
rename to tensorflow/core/kernels/data/stats_aggregator.h
index 5f602c5f3b..4cb8dba5cb 100644
--- a/tensorflow/core/kernels/stats_aggregator.h
+++ b/tensorflow/core/kernels/data/stats_aggregator.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_STATS_AGGREGATOR_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_STATS_AGGREGATOR_H_
 
 #include <memory>
 #include <string>
@@ -81,4 +81,4 @@ class StatsAggregatorResource : public ResourceBase {
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_STATS_AGGREGATOR_H_
diff --git a/tensorflow/core/kernels/stats_aggregator_ops.cc b/tensorflow/core/kernels/data/stats_aggregator_ops.cc
similarity index 98%
rename from tensorflow/core/kernels/stats_aggregator_ops.cc
rename to tensorflow/core/kernels/data/stats_aggregator_ops.cc
index 037ec64a83..5a2dd9c43d 100644
--- a/tensorflow/core/kernels/stats_aggregator_ops.cc
+++ b/tensorflow/core/kernels/data/stats_aggregator_ops.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/stats_aggregator.h"
+#include "tensorflow/core/kernels/data/stats_aggregator.h"
 
 #include <memory>
 
diff --git a/tensorflow/core/kernels/stats_dataset_ops.cc b/tensorflow/core/kernels/data/stats_dataset_ops.cc
similarity index 98%
rename from tensorflow/core/kernels/stats_dataset_ops.cc
rename to tensorflow/core/kernels/data/stats_dataset_ops.cc
index 7b1853aba6..7a8b8b17f0 100644
--- a/tensorflow/core/kernels/stats_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/stats_dataset_ops.cc
@@ -15,8 +15,8 @@ limitations under the License.
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/dataset.h"
-#include "tensorflow/core/kernels/stats_aggregator.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/stats_aggregator.h"
 #include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/take_dataset_op.cc b/tensorflow/core/kernels/data/take_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/take_dataset_op.cc
rename to tensorflow/core/kernels/data/take_dataset_op.cc
index 7a6d20d6c7..22824a957e 100644
--- a/tensorflow/core/kernels/take_dataset_op.cc
+++ b/tensorflow/core/kernels/data/take_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/tensor_dataset_op.cc b/tensorflow/core/kernels/data/tensor_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/tensor_dataset_op.cc
rename to tensorflow/core/kernels/data/tensor_dataset_op.cc
index 5cf9931188..5f53fe026e 100644
--- a/tensorflow/core/kernels/tensor_dataset_op.cc
+++ b/tensorflow/core/kernels/data/tensor_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/tensor_slice_dataset_op.cc b/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/tensor_slice_dataset_op.cc
rename to tensorflow/core/kernels/data/tensor_slice_dataset_op.cc
index 86f8f436d4..c7f9efeea1 100644
--- a/tensorflow/core/kernels/tensor_slice_dataset_op.cc
+++ b/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc
@@ -12,11 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/batch_util.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/window_dataset.cc b/tensorflow/core/kernels/data/window_dataset.cc
similarity index 98%
rename from tensorflow/core/kernels/window_dataset.cc
rename to tensorflow/core/kernels/data/window_dataset.cc
index 77345fd3df..815d420c68 100644
--- a/tensorflow/core/kernels/window_dataset.cc
+++ b/tensorflow/core/kernels/data/window_dataset.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/window_dataset.h"
+#include "tensorflow/core/kernels/data/window_dataset.h"
 
 namespace tensorflow {
 namespace {
diff --git a/tensorflow/core/kernels/window_dataset.h b/tensorflow/core/kernels/data/window_dataset.h
similarity index 87%
rename from tensorflow/core/kernels/window_dataset.h
rename to tensorflow/core/kernels/data/window_dataset.h
index a4fccf17b4..25396bd3e7 100644
--- a/tensorflow/core/kernels/window_dataset.h
+++ b/tensorflow/core/kernels/data/window_dataset.h
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_WINDOW_DATASET_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_WINDOW_DATASET_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_WINDOW_DATASET_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_WINDOW_DATASET_H_
 
 #include <vector>
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -45,4 +45,4 @@ Status NewWindowDataset(std::vector<std::vector<Tensor>> elements,
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_WINDOW_DATASET_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_WINDOW_DATASET_H_
diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/data/zip_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/zip_dataset_op.cc
rename to tensorflow/core/kernels/data/zip_dataset_op.cc
index 31e5737f62..dbc4331c9e 100644
--- a/tensorflow/core/kernels/zip_dataset_op.cc
+++ b/tensorflow/core/kernels/data/zip_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h
index 504a88a309..2aa6dbe6f3 100644
--- a/tensorflow/core/kernels/dataset.h
+++ b/tensorflow/core/kernels/dataset.h
@@ -15,564 +15,6 @@ limitations under the License.
 #ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_H_
 #define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_H_
 
-#include <memory>
-
-#include "tensorflow/core/framework/attr_value.pb.h"
-#include "tensorflow/core/framework/attr_value_util.h"
-#include "tensorflow/core/framework/function.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/variant_encode_decode.h"
-#include "tensorflow/core/framework/variant_tensor_data.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/platform/tracing.h"
-
-// Polymorphic datasets should support all primitive TensorFlow
-// types. Use this macro to expand `m(T)` once for each primitive type
-// `T`, e.g. to build a `switch` statement.
-#define TF_CALL_DATASET_TYPES(m) TF_CALL_ALL_TYPES(m) TF_CALL_QUANTIZED_TYPES(m)
-
-namespace tensorflow {
-
-// Interface for reading values from a key-value store.
-// Used for restoring iterator state.
-class IteratorStateReader {
- public:
-  virtual Status ReadScalar(StringPiece key, int64* val) = 0;
-  virtual Status ReadScalar(StringPiece key, string* val) = 0;
-  virtual Status ReadTensor(StringPiece key, Tensor* val) = 0;
-  virtual bool Contains(StringPiece key) = 0;
-
-  virtual ~IteratorStateReader() {}
-};
-
-// Interface for writing values to a key-value store.
-// Used for saving iterator state.
-class IteratorStateWriter {
- public:
-  virtual Status WriteScalar(StringPiece key, const int64 val) = 0;
-  virtual Status WriteScalar(StringPiece key, const string& val) = 0;
-  virtual Status WriteTensor(StringPiece key, const Tensor& val) = 0;
-
-  virtual ~IteratorStateWriter() {}
-};
-
-// Forward declarations to avoid introducing a dependency on headers in
-// "tensorflow/core/graph/...".
-class GraphDefBuilder;
-class GraphDatasetBase;
-class Node;
-
-// Wrapper around GraphDefBuilder. Used to serialize Dataset graph.
-class GraphDefBuilderWrapper {
- public:
-  explicit GraphDefBuilderWrapper(GraphDefBuilder* b) : b_(b) {}
-
-  // Adds a Const node with scalar value to the Graph.
-  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
-  // non-null if the method returns with an OK status.
-  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  template <typename T>
-  Status AddScalar(const T& val, Node** output) {
-    Tensor val_t = Tensor(DataTypeToEnum<T>::v(), TensorShape({}));
-    val_t.scalar<T>()() = val;
-    AddTensorInternal(val_t, output);
-    if (*output == nullptr) {
-      return errors::Internal("AddScalar: Failed to build Const op.");
-    }
-    return Status::OK();
-  }
-
-  // Adds a Const node with vector value to the Graph.
-  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
-  // non-null if the method returns with an OK status.
-  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  // TODO(shivaniagrawal): Consider changing to gtl::ArraySlice?
-  template <typename T>
-  Status AddVector(const std::vector<T>& val, Node** output) {
-    Tensor val_t = Tensor(DataTypeToEnum<T>::v(),
-                          TensorShape({static_cast<int64>(val.size())}));
-    for (int i = 0; i < val.size(); i++) {
-      val_t.flat<T>()(i) = val[i];
-    }
-    AddTensorInternal(val_t, output);
-    if (*output == nullptr) {
-      return errors::Internal("AddVector: Failed to build Const op.");
-    }
-    return Status::OK();
-  }
-
-  // Adds a Const node with Tensor value to the Graph.
-  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
-  // non-null if the method returns with an OK status.
-  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  Status AddTensor(const Tensor& val, Node** output) {
-    AddTensorInternal(val, output);
-    if (*output == nullptr) {
-      return errors::Internal("AddTesor: Failed to build Const op.");
-    }
-    return Status::OK();
-  }
-
-  Status AddDataset(const GraphDatasetBase* dataset,
-                    const std::vector<Node*>& inputs, Node** output) {
-    return AddDataset(dataset, inputs, {}, output);
-  }
-
-  // Adds a node corresponding to the `DatasetType` to the Graph.
-  // Return value of `DatasetType::op_name()` is used as the op type for the
-  // node.
-  // Values for the output_types and output_shapes node attributes are also
-  // written if those attributes are defined in the OpDef.
-  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
-  // non-null if the method returns with an OK status.
-  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  Status AddDataset(const GraphDatasetBase* dataset,
-                    const std::vector<Node*>& inputs,
-                    const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
-                    Node** output) {
-    std::vector<std::pair<size_t, Node*>> enumerated_inputs(inputs.size());
-    for (int i = 0; i < inputs.size(); i++) {
-      enumerated_inputs[i] = std::make_pair(i, inputs[i]);
-    }
-    return AddDataset(dataset, enumerated_inputs, {}, attrs, output);
-  }
-
-  Status AddDataset(
-      const GraphDatasetBase* dataset,
-      const std::vector<std::pair<size_t, Node*>>& inputs,
-      const std::vector<std::pair<size_t, gtl::ArraySlice<Node*>>>& list_inputs,
-      const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
-      Node** output);
-
-  // Adds a user-defined function with name `function_name` to the graph and
-  // recursively adds all functions it references. If a function with a matching
-  // name has already been added, returns with OK status. If a user-defined with
-  // name `function_name` is not found in the FunctionLibraryDefinition, returns
-  // an InvalidArgumentError. If the function with name `function_name` or any
-  // of its dependent functions are stateful, returns an InvalidArgument error.
-  Status AddFunction(OpKernelContext* ctx, const string& function_name);
-
-  template <typename T>
-  void BuildAttrValue(const T& value, AttrValue* attr) {
-    SetAttrValue(value, attr);
-  }
-
- private:
-  void AddTensorInternal(const Tensor& val, Node** output);
-
-  Status EnsureFunctionIsStateless(OpKernelContext* ctx,
-                                   const string& function_name) const {
-    const FunctionLibraryDefinition* lib_def =
-        ctx->function_library()->GetFunctionLibraryDefinition();
-    const FunctionDef* function_def = lib_def->Find(function_name);
-    if (!function_def) {
-      return errors::InvalidArgument("Unable to find FunctionDef for ",
-                                     function_name, " in registry.");
-    }
-    for (const NodeDef& node_def : function_def->node_def()) {
-      const OpDef* op_def;
-      TF_RETURN_IF_ERROR(lib_def->LookUpOpDef(node_def.op(), &op_def));
-      // TODO(b/65524810): Hack to allow functions to capture Dataset op
-      // nodes needed for FlatMap. Currently, source datasets nodes have been
-      // marked stateful to avoid constant folding since we do not have a
-      // good way of serializing them.
-      if (IsOpWhitelisted(op_def)) {
-        continue;
-      }
-      if (op_def->is_stateful()) {
-        return errors::InvalidArgument(
-            "Op[name: ", node_def.name(), ", type: ", node_def.op(), "] ",
-            "in function ", function_name, " is stateful. ",
-            "Saving stateful functions is not supported yet.");
-      }
-    }
-    return Status::OK();
-  }
-
-  bool IsOpWhitelisted(const OpDef* op_def) const {
-    return StringPiece(op_def->name()).ends_with("Dataset") &&
-           HasAttr(op_def, "output_shapes");
-  }
-
-  bool HasAttr(const string& op_type_name, const string& attr_name) const;
-
-  bool HasAttr(const OpDef* op_def, const string& attr_name) const {
-    for (auto attr : op_def->attr()) {
-      if (attr.name() == attr_name) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  Status AddAttrFunctions(const AttrValue& attr_value, OpKernelContext* ctx) {
-    if (attr_value.has_func()) {
-      TF_RETURN_IF_ERROR(AddFunction(ctx, attr_value.func().name()));
-    } else if (attr_value.has_list()) {
-      for (const NameAttrList& name_attr_list : attr_value.list().func()) {
-        TF_RETURN_IF_ERROR(AddFunction(ctx, name_attr_list.name()));
-      }
-    }
-    return Status::OK();
-  }
-
-  GraphDefBuilder* b_;
-};
-
-class StatsAggregator;
-
-// A cut-down version of OpKernelContext for running computations in
-// iterators. Note that we cannot simply use OpKernelContext here
-// because we might run computation in an iterator whose lifetime is
-// not nested within the lifetime of a single OpKernelContext
-// (e.g. asynchronous prefetching).
-//
-// TODO(mrry): We will probably need to support more of
-// OpKernelContext here. For example, should allocation be handled by
-// the IteratorContext?
-// TODO(mrry): We're making some daring assumptions about the lifetime
-// of the runner passed in here. A runner will be deleted when the original
-// step ends, but all existing runners only close over session-lifetime (or
-// longer-lived) state, so we can make a copy of the function. There's nothing
-// in the definition of the API from which we took the runner to guarantee that
-// what we are doing is safe. We should formalize the properties here.
-class IteratorContext {
- public:
-  struct Params {
-    // Interface to operating system functionality.
-    Env* env;
-
-    // Function call support.
-    std::function<void(std::function<void()>)> runner = nullptr;
-
-    // A function that returns the current `StatsAggregator` instance to be
-    // used when recording statistics about the iterator.
-    //
-    // NOTE(mrry): This is somewhat awkward, because (i) the `StatsAggregator`
-    // is a property of the `IteratorResource` (which this class does not know
-    // about), and (ii) it can change after the `IteratorContext` has been
-    // created. Better suggestions are welcome!
-    std::function<std::shared_ptr<StatsAggregator>()> stats_aggregator_getter =
-        nullptr;
-  };
-
-  explicit IteratorContext(Params params) : params_(std::move(params)) {}
-
-  Env* env() const { return params_.env; }
-
-  std::function<void(std::function<void()>)>* runner() {
-    return &params_.runner;
-  }
-
-  std::shared_ptr<StatsAggregator> stats_aggregator() {
-    if (params_.stats_aggregator_getter) {
-      return params_.stats_aggregator_getter();
-    } else {
-      return nullptr;
-    }
-  }
-
- private:
-  Params params_;
-};
-
-// Represents the current position in a range of outputs, where the
-// range of outputs is typically represented by an `DatasetBase`,
-// defined below.
-class IteratorBase {
- public:
-  virtual ~IteratorBase() {}
-
-  // Gets the next output from the range that this iterator is traversing.
-  //
-  // If at least one output remains in this iterator's range, that
-  // output will be stored in `*out_tensors` and `false` will be
-  // stored in `*end_of_sequence`.
-  //
-  // If no more outputs remain in this iterator's range, `true` will
-  // be stored in `*end_of_sequence`, and the content of
-  // `*out_tensors` will be undefined.
-  //
-  // This method is thread-safe.
-  //
-  // TODO(mrry): Define `GetNextAsync()` or `GetNextManyAsync()`, and
-  // potentially remove this method.
-  virtual Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
-                         bool* end_of_sequence) = 0;
-
-  // Returns a vector of DataType values, representing the respective
-  // element types of each tuple component in the outputs of this
-  // iterator.
-  virtual const DataTypeVector& output_dtypes() const = 0;
-
-  // Returns a vector of tensor shapes, representing the respective
-  // (and possibly partially defined) shapes of each tuple component
-  // in the outputs of this iterator.
-  virtual const std::vector<PartialTensorShape>& output_shapes() const = 0;
-
-  // Saves the state of this iterator.
-  virtual Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) {
-    return SaveInternal(writer);
-  }
-
-  // Restores the state of this iterator.
-  virtual Status Restore(OpKernelContext* ctx, IteratorStateReader* reader) {
-    return RestoreInternal(ctx, reader);
-  }
-
- protected:
-  // This is needed so that sub-classes of IteratorBase can call
-  // `SaveInternal` on their parent iterators, e.g., in
-  // `RepeatDataasetOp::Dataset`.
-  Status SaveParent(IteratorStateWriter* writer,
-                    const std::unique_ptr<IteratorBase>& parent) {
-    return parent->SaveInternal(writer);
-  }
-
-  // This is needed so that sub-classes of IteratorBase can call
-  // `RestoreInternal` on their parent iterators, e.g., in
-  // `RepeatDataasetOp::Dataset`.
-  Status RestoreParent(OpKernelContext* ctx, IteratorStateReader* reader,
-                       const std::unique_ptr<IteratorBase>& parent) {
-    return parent->RestoreInternal(ctx, reader);
-  }
-
-  // Saves the state of this iterator recursively.
-  virtual Status SaveInternal(IteratorStateWriter* writer) {
-    return errors::Unimplemented("SaveInternal");
-  }
-
-  // Restores the state of this iterator recursively.
-  virtual Status RestoreInternal(OpKernelContext* ctx,
-                                 IteratorStateReader* reader) {
-    return errors::Unimplemented("RestoreInternal");
-  }
-};
-
-// Represents a (potentially infinite) range of outputs, where each
-// output is a tuple of tensors.
-class DatasetBase : public core::RefCounted {
- public:
-  // Returns a new iterator for iterating over the range of elements in
-  // this dataset.
-  //
-  // This method may be called multiple times on the same instance,
-  // and the resulting iterators will have distinct state. Each
-  // iterator will traverse all elements in this dataset from the
-  // start.
-  //
-  // Ownership of the created iterator will be transferred to the caller.
-  //
-  // The prefix identifies the sequence of iterators leading up to the newly
-  // created iterator.
-  virtual std::unique_ptr<IteratorBase> MakeIterator(
-      const string& prefix) const = 0;
-
-  // Returns a vector of DataType values, representing the respective
-  // element types of each tuple component in the outputs of this
-  // dataset.
-  virtual const DataTypeVector& output_dtypes() const = 0;
-
-  // Returns a vector of tensor shapes, representing the respective
-  // (and possibly partially defined) shapes of each tuple component
-  // in the outputs of this dataset.
-  virtual const std::vector<PartialTensorShape>& output_shapes() const = 0;
-
-  // A human-readable debug string for this dataset.
-  virtual string DebugString() = 0;
-
-  // Serializes the dataset and writes it to the `writer`.
-  virtual Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) const {
-    return errors::Unimplemented("DatasetBase::Save");
-  }
-
- protected:
-  // TODO(srbs): Ideally all graph related logic should reside in
-  // GraphDatasetBase. However, that would require Datasets defined in all ops
-  // to derive from GraphDatasetBase. Once that is done we can move
-  // DatasetGraphDefBuilder and AsGraphDefInternal to GraphDatasetBase.
-  class DatasetGraphDefBuilder : public GraphDefBuilderWrapper {
-   public:
-    DatasetGraphDefBuilder(GraphDefBuilder* b) : GraphDefBuilderWrapper(b) {}
-    Status AddParentDataset(OpKernelContext* ctx, const DatasetBase* dataset,
-                            Node** output) {
-      return dataset->AsGraphDefInternal(ctx, this, output);
-    }
-  };
-
-  virtual Status AsGraphDefInternal(OpKernelContext* ctx,
-                                    DatasetGraphDefBuilder* b,
-                                    Node** node) const {
-    return AsGraphDefInternal(b, node);
-  }
-
-  virtual Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
-                                    Node** node) const {
-    return errors::Unimplemented("AsGraphDefInternal");
-  }
-};
-
-// Base-class for datasets that are built by ops.
-class GraphDatasetBase : public DatasetBase {
- public:
-  GraphDatasetBase(OpKernelContext* ctx)
-      : op_name_(ctx->op_kernel().type_string()) {}
-
-  const string op_name() const { return op_name_; }
-
-  Status Save(OpKernelContext* ctx,
-              IteratorStateWriter* writer) const override {
-    string serialized_graph_def;
-    string output_node;
-    TF_RETURN_IF_ERROR(Serialize(ctx, &serialized_graph_def, &output_node));
-    TF_RETURN_IF_ERROR(
-        writer->WriteScalar(kDatasetGraphKey, serialized_graph_def));
-    TF_RETURN_IF_ERROR(
-        writer->WriteScalar(kDatasetGraphOutputNodeKey, output_node));
-    return Status::OK();
-  }
-
-  // Key for storing the Dataset graph in the serialized format.
-  static const char kDatasetGraphKey[];
-
-  // Key for storing the output node of the Dataset graph in the serialized
-  // format.
-  static const char kDatasetGraphOutputNodeKey[];
-
- private:
-  Status Serialize(OpKernelContext* ctx, string* serialized_graph_def,
-                   string* output_node) const;
-
-  const string op_name_;
-};
-
-// Represents an iterator that is associated with a particular parent dataset.
-template <class DatasetType>
-class DatasetIterator : public IteratorBase {
- public:
-  struct Params {
-    // Owns one reference on the shared dataset resource.
-    const DatasetType* dataset;
-
-    // Identifies the sequence of iterators leading up to this iterator.
-    const string prefix;
-  };
-
-  explicit DatasetIterator(const Params& params) : params_(params) {
-    params_.dataset->Ref();
-  }
-
-  ~DatasetIterator() override { params_.dataset->Unref(); }
-
-  // The dataset from which this iterator was created.
-  const DatasetType* dataset() const { return params_.dataset; }
-
-  // The sequence of iterators leading up to this iterator.
-  const string prefix() const { return params_.prefix; }
-
-  const DataTypeVector& output_dtypes() const override {
-    return params_.dataset->output_dtypes();
-  }
-
-  const std::vector<PartialTensorShape>& output_shapes() const override {
-    return params_.dataset->output_shapes();
-  }
-
-  Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
-                 bool* end_of_sequence) final {
-    port::Tracing::TraceMe activity(params_.prefix);
-    return GetNextInternal(ctx, out_tensors, end_of_sequence);
-  }
-
-  Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) final {
-    TF_RETURN_IF_ERROR(dataset()->Save(ctx, writer));
-    return IteratorBase::Save(ctx, writer);
-  }
-
- protected:
-  // Internal implementation of GetNext that is wrapped in tracing logic.
-  virtual Status GetNextInternal(IteratorContext* ctx,
-                                 std::vector<Tensor>* out_tensors,
-                                 bool* end_of_sequence) = 0;
-
-  string full_name(const string& name) const {
-    return strings::StrCat(prefix(), ":", name);
-  }
-
- private:
-  Params params_;
-};
-
-// Encapsulates the work required to plug a DatasetBase into the core TensorFlow
-// graph execution engine.
-class DatasetOpKernel : public OpKernel {
- public:
-  DatasetOpKernel(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-  void Compute(OpKernelContext* ctx) final;
-
- protected:
-  // Subclasses should implement this method. It will be called during Compute
-  // execution.
-  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase** output) = 0;
-
-  template <typename T>
-  Status ParseScalarArgument(OpKernelContext* ctx,
-                             const StringPiece& argument_name, T* output) {
-    const Tensor* argument_t;
-    TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
-    if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
-      return errors::InvalidArgument(argument_name, " must be a scalar");
-    }
-    *output = argument_t->scalar<T>()();
-    return Status::OK();
-  }
-};
-
-// Encapsulates the work required to plug unary Datasets into the core
-// TensorFlow graph execution engine.
-class UnaryDatasetOpKernel : public DatasetOpKernel {
- public:
-  UnaryDatasetOpKernel(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {}
-
- protected:
-  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) final;
-  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
-                           DatasetBase** output) = 0;
-};
-
-// Encapsulates the work required to plug binary Datasets into the core
-// TensorFlow graph execution engine.
-class BinaryDatasetOpKernel : public DatasetOpKernel {
- public:
-  BinaryDatasetOpKernel(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {}
-
- protected:
-  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) final;
-  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
-                           DatasetBase* another_input,
-                           DatasetBase** output) = 0;
-};
-
-// Validates and extracts a `DatasetBase` object from `tensor`.
-//
-// `tensor` must have been written by a call to SetVariantTensorToDataset().
-//
-// The retrieved pointer is a borrowed reference to the dataset, which is owned
-// by the tensor. The consumer must either acquire its own reference to the
-// dataset by calling `(*out_dataset)->Ref()`, or ensure that `tensor` is not
-// destroyed or mutated while the retrieved pointer is in use.
-Status GetDatasetFromVariantTensor(const Tensor& tensor,
-                                   DatasetBase** out_dataset);
-
-// Stores a `DatasetBase` object in `tensor`.
-//
-// The ownership of `dataset` is transferred to `tensor`.
-Status StoreDatasetInVariantTensor(DatasetBase* dataset, Tensor* tensor);
-
-}  // namespace tensorflow
+#include "tensorflow/core/kernels/data/dataset.h"
 
 #endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_H_
-- 
GitLab


From b28214f3bfed52d2667641c1775a90aa8bebb986 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Thu, 14 Dec 2017 16:11:14 -0800
Subject: [PATCH 1032/1225] Add HLO structure verifier to HloVerifier. The
 structure verification includes checks on parent() pointers and that an
 instruction's operands are in the same computation as the instruction.

PiperOrigin-RevId: 179113538
---
 tensorflow/compiler/xla/service/BUILD         |  16 +++
 .../compiler/xla/service/hlo_verifier.cc      |  59 ++++++++++
 .../compiler/xla/service/hlo_verifier_test.cc | 101 ++++++++++++++++++
 tensorflow/compiler/xla/tests/BUILD           |   1 +
 .../compiler/xla/tests/hlo_test_base.cc       |   6 +-
 tensorflow/compiler/xla/tests/hlo_test_base.h |   6 ++
 6 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/compiler/xla/service/hlo_verifier_test.cc

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 179ab47ae7..318263ec5f 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1721,6 +1721,22 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "hlo_verifier_test",
+    srcs = ["hlo_verifier_test.cc"],
+    deps = [
+        ":hlo",
+        ":hlo_verifier",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "hlo_rematerialization",
     srcs = ["hlo_rematerialization.cc"],
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index b8fd7a89ef..d963a8a2f4 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -434,6 +434,63 @@ string ComputationsToString(
       });
 }
 
+// Verifies various invariants about the structure of the HLO:
+//
+// (1) each instruction has a non-null parent() set to the HloComputation which
+//     contains it.
+//
+// (2) each computation has a non-null parent() set to the HloModule which
+//     contains it.
+//
+// (3) the operands of each instruction are in the same computation as the
+//     instruction.
+Status VerifyHloStructure(HloModule* module) {
+  for (const HloComputation* computation : module->computations()) {
+    if (computation->parent() == nullptr) {
+      return FailedPrecondition("Computation %s has a null parent pointer",
+                                computation->name().c_str());
+    }
+    if (computation->parent() != module) {
+      return FailedPrecondition(
+          "Computation %s parent() does not point to parent module",
+          computation->name().c_str());
+    }
+
+    for (const HloInstruction* instruction : computation->instructions()) {
+      if (instruction->parent() == nullptr) {
+        return FailedPrecondition("Instruction %s has a null parent pointer",
+                                  instruction->name().c_str());
+      }
+      if (instruction->parent() != computation) {
+        return FailedPrecondition(
+            "Instruction %s parent() does not point to parent computation",
+            instruction->name().c_str());
+      }
+    }
+  }
+
+  // Check that operands are in the same computation separately from verifying
+  // parent() correctness so conditions like a null HloInstruction::parent() are
+  // identified and reported explicitly above rather than reporting a mismatched
+  // operand.
+  for (const HloComputation* computation : module->computations()) {
+    for (const HloInstruction* instruction : computation->instructions()) {
+      for (int i = 0; i < instruction->operand_count(); ++i) {
+        const HloInstruction* operand = instruction->operand(i);
+        if (operand->parent() != instruction->parent()) {
+          return FailedPrecondition(
+              "Operand %d (%s) of instruction %s is in a different "
+              "computation: %s vs %s",
+              i, operand->name().c_str(), instruction->name().c_str(),
+              operand->parent()->name().c_str(),
+              instruction->parent()->name().c_str());
+        }
+      }
+    }
+  }
+  return tensorflow::Status::OK();
+}
+
 }  // namespace
 
 Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
@@ -554,6 +611,8 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
 }
 
 StatusOr<bool> HloVerifier::Run(HloModule* module) {
+  TF_RETURN_IF_ERROR(VerifyHloStructure(module));
+
   tensorflow::gtl::FlatMap<string, const HloInstruction*> instructions;
   ShapeVerifier shape_verifier(shape_size_fn_);
 
diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
new file mode 100644
index 0000000000..2a3b55decc
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
@@ -0,0 +1,101 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
+
+#include <memory>
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+using ::testing::HasSubstr;
+
+using HloVerifierTest = HloTestBase;
+
+TEST_F(HloVerifierTest, NullInstructionParent) {
+  HloComputation::Builder builder(TestName());
+  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  HloInstruction* param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param"));
+  HloInstruction* negate = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape, HloOpcode::kNegate, param));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK(verifier().Run(module.get()).status());
+
+  negate->set_parent(nullptr);
+
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(), HasSubstr("has a null parent pointer"));
+}
+
+TEST_F(HloVerifierTest, NullComputationParent) {
+  HloComputation::Builder builder(TestName());
+  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  HloInstruction* param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param"));
+  builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape, HloOpcode::kNegate, param));
+  auto module = CreateNewModule();
+  HloComputation* computation = module->AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK(verifier().Run(module.get()).status());
+
+  computation->set_parent(nullptr);
+
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(), HasSubstr("has a null parent pointer"));
+}
+
+TEST_F(HloVerifierTest, DifferentOperandParents) {
+  HloComputation::Builder builder(TestName());
+  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  HloInstruction* param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param"));
+  HloInstruction* negate = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape, HloOpcode::kNegate, param));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  HloComputation::Builder emb_builder(TestName());
+  HloInstruction* emb_param = emb_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param"));
+  module->AddEmbeddedComputation(emb_builder.Build());
+
+  TF_ASSERT_OK(verifier().Run(module.get()).status());
+  TF_ASSERT_OK(negate->ReplaceOperandWith(0, emb_param));
+
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("is in a different computation"));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 6af01ae80d..7d7b13da84 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -117,6 +117,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:computation_layout",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_runner",
+        "//tensorflow/compiler/xla/service:hlo_verifier",
         "//tensorflow/compiler/xla/service:interpreter_plugin",  # reference backend
         "//tensorflow/compiler/xla/service:platform_util",
         "//tensorflow/compiler/xla/tools/parser:hlo_parser",
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc
index 2b38f9c719..f9458f5b74 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc
@@ -90,7 +90,11 @@ HloTestBase::HloTestBase()
 
 HloTestBase::HloTestBase(se::Platform* test_platform,
                          se::Platform* reference_platform)
-    : test_runner_(test_platform), reference_runner_(reference_platform) {}
+    : test_runner_(test_platform), reference_runner_(reference_platform) {
+  hlo_verifier_ = MakeUnique<HloVerifier>([this](const Shape& shape) {
+    return backend().transfer_manager()->GetByteSizeRequirement(shape);
+  });
+}
 
 /* static */
 std::unique_ptr<HloModule> HloTestBase::CreateNewModule() {
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h
index 3cbbb7aa24..2c5ce04402 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.h
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_runner.h"
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
 #include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/shape_layout.h"
 #include "tensorflow/compiler/xla/statusor.h"
@@ -209,6 +210,9 @@ class HloTestBase : public ::testing::Test {
         ->Clear();
   }
 
+  // Return an HLO verifier constructed for the test backend.
+  HloVerifier& verifier() const { return *hlo_verifier_; }
+
   static string TestName();
 
   // Returns the backend owned by the test runner.
@@ -217,6 +221,8 @@ class HloTestBase : public ::testing::Test {
   HloRunner test_runner_;
   HloRunner reference_runner_;
 
+  std::unique_ptr<HloVerifier> hlo_verifier_;
+
   ErrorSpec error_spec_{0.0001};
 
  private:
-- 
GitLab


From 9a6cd47ced30c4c45085131e1369832fdbf2369c Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Thu, 14 Dec 2017 16:35:59 -0800
Subject: [PATCH 1033/1225] Fix missing port number in a few places for shape
 checking.

PiperOrigin-RevId: 179116834
---
 .../grappler/optimizers/layout_optimizer.cc   | 83 +++++++++++++------
 .../python/grappler/layout_optimizer_test.py  | 36 ++++++++
 2 files changed, 93 insertions(+), 26 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index a6172c5fa0..a61080253d 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -286,23 +286,31 @@ class NodeProcessor : public GraphProcessor {
  protected:
   bool IsPortDimsN(const NodeDef& node, int port, int n) const {
     if (node.attr().find("_output_shapes") != node.attr().end()) {
-      auto shape = node.attr().at("_output_shapes").list().shape(port);
-      if (shape.unknown_rank()) {
-        return false;
-      }
-      if (shape.dim_size() == n) {
-        return true;
+      if (node.attr().at("_output_shapes").list().shape_size() > port) {
+        auto shape = node.attr().at("_output_shapes").list().shape(port);
+        if (shape.unknown_rank()) {
+          return false;
+        }
+        if (shape.dim_size() == n) {
+          return true;
+        }
       }
     }
     return false;
   }
 
-  bool IsDimsN(const NodeDef& node, int n) const {
+  bool IsPortZeroDimsN(const NodeDef& node, int n) const {
     return IsPortDimsN(node, 0, n);
   }
 
-  bool IsDimsFour(const NodeDef& node) const {
-    return NodeProcessor::IsDimsN(node, 4) || IsNodeNCHWToNHWC(node.name());
+  bool IsPortZeroDimsFour(const NodeDef& node) const {
+    return NodeProcessor::IsPortZeroDimsN(node, 4) ||
+           IsNodeNCHWToNHWC(node.name());
+  }
+
+  bool IsPortDimsFour(const NodeDef& node, int port) const {
+    return NodeProcessor::IsPortDimsN(node, port, 4) ||
+           IsNodeNCHWToNHWC(node.name());
   }
 
   bool IsNHWC() const {
@@ -332,8 +340,8 @@ class NodeProcessor : public GraphProcessor {
   }
 
   virtual bool ShouldProcess() const {
-    return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs() &&
-           IsOnGPU();
+    return !MustPreserve() && IsNHWC() && IsPortZeroDimsFour(*node_) &&
+           HasOutputs() && IsOnGPU();
   }
 
   virtual bool IsOnGPU() const {
@@ -726,7 +734,9 @@ class BiasAddGradProcessor : public NodeProcessor {
     }
     auto input = node_map_->GetNode(node_->input(0));
     if (input) {
-      if (IsNHWC() && IsDimsFour(*input)) {
+      int port;
+      ParseNodeName(node_->input(0), &port);
+      if (IsNHWC() && IsPortDimsFour(*input, port)) {
         return true;
       }
     }
@@ -743,8 +753,8 @@ class Conv2DProcessor : public NodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs() &&
-           (!IsGemmUsed() || no_gemm_) && IsOnGPU();
+    return !MustPreserve() && IsNHWC() && IsPortZeroDimsFour(*node_) &&
+           HasOutputs() && (!IsGemmUsed() || no_gemm_) && IsOnGPU();
   }
 
   TensorShapeProto GetShape(const string& input_name) const {
@@ -902,7 +912,7 @@ class AgnosticNodeProcessor : public NodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
+    return !MustPreserve() && IsPortZeroDimsFour(*node_) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() && IsOnGPU();
   }
 
@@ -987,7 +997,7 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
+    return !MustPreserve() && IsPortZeroDimsFour(*node_) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() &&
            (IsNDOperateWithMD(4, 0) || IsNDOperateWithMD(4, 1) ||
             IsNDOperateWithMD(4, 4) || IsNDOperateWithMD(0, 4) ||
@@ -999,10 +1009,14 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
     std::vector<int> input_pos;
     auto input0 = node_map_->GetNode(node_->input(0));
     auto input1 = node_map_->GetNode(node_->input(1));
-    if (IsDimsFour(*input0)) {
+    int input0_port;
+    ParseNodeName(node_->input(0), &input0_port);
+    int input1_port;
+    ParseNodeName(node_->input(1), &input1_port);
+    if (IsPortDimsFour(*input0, input0_port)) {
       input_pos.push_back(0);
     }
-    if (IsDimsFour(*input1)) {
+    if (IsPortDimsFour(*input1, input1_port)) {
       input_pos.push_back(1);
     }
     return input_pos;
@@ -1011,9 +1025,16 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
   bool IsNDOperateWithMD(int n, int m) const {
     auto input0 = node_map_->GetNode(node_->input(0));
     auto input1 = node_map_->GetNode(node_->input(1));
+    int input0_port;
+    ParseNodeName(node_->input(0), &input0_port);
+    int input1_port;
+    ParseNodeName(node_->input(1), &input1_port);
+
     if (input0 && input1) {
-      bool input0_is_n = (n == 4) ? IsDimsFour(*input0) : IsDimsN(*input0, n);
-      bool input1_is_m = (m == 4) ? IsDimsFour(*input1) : IsDimsN(*input1, m);
+      bool input0_is_n = (n == 4) ? IsPortDimsFour(*input0, input0_port)
+                                  : IsPortDimsN(*input0, input0_port, n);
+      bool input1_is_m = (m == 4) ? IsPortDimsFour(*input1, input1_port)
+                                  : IsPortDimsN(*input1, input1_port, m);
       return input0_is_n && input1_is_m;
     }
     return false;
@@ -1082,8 +1103,14 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
           AddPrefixToNodeName(base_name, kReshapeConst, "-");
       auto input_node = node_map_->GetNode(node_->input(vector_index));
       TF_RETURN_IF_ERROR(HasAttribute(*input_node, "_output_shapes"));
-      int vector_size =
-          input_node->attr().at("_output_shapes").list().shape(0).dim(0).size();
+      int port;
+      ParseNodeName(node_->input(vector_index), &port);
+      int vector_size = input_node->attr()
+                            .at("_output_shapes")
+                            .list()
+                            .shape(port)
+                            .dim(0)
+                            .size();
       AddNodeShapeConst(shape_const_node_name, vector_size,
                         NodeName(node_->input(vector_index)));
       TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
@@ -1141,7 +1168,7 @@ class PadProcessor : public AgnosticNodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
+    return !MustPreserve() && IsPortZeroDimsFour(*node_) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() && PaddingSupported() && IsOnGPU();
   }
   Status CustomizedProcessing() override { return UpdateAttrValueOfInput(1); }
@@ -1216,7 +1243,9 @@ class ShapeProcessor : public AgnosticNodeProcessor {
     std::vector<int> input_pos;
     for (int i = 0; i < node_->input_size(); i++) {
       auto input = node_map_->GetNode(node_->input(i));
-      if (IsDimsFour(*input) &&
+      int port;
+      ParseNodeName(node_->input(i), &port);
+      if (IsPortDimsFour(*input, port) &&
           (IsNodeAfterNCHWToNHWC(*input) || IsNodeNCHWToNHWC(input->name()))) {
         input_pos.push_back(i);
       }
@@ -1267,7 +1296,7 @@ class SqueezeProcessor : public AgnosticNodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsDimsN(*node_, 2) && HasOutputs() &&
+    return !MustPreserve() && IsPortZeroDimsN(*node_, 2) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() && IsInputConvertible() && IsAlongDimHW() &&
            IsOnGPU();
   }
@@ -1318,8 +1347,10 @@ class SumProcessor : public AgnosticNodeProcessor {
  protected:
   bool ShouldProcess() const override {
     auto input0 = node_map_->GetNode(node_->input(0));
+    int port;
+    ParseNodeName(node_->input(0), &port);
     return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
-           IsDimsFour(*input0) && IsAlongDimNHW() && IsOnGPU();
+           IsPortDimsFour(*input0, port) && IsAlongDimNHW() && IsOnGPU();
   }
 
   Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 3b7941f7c3..d9b979e623 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -78,6 +78,17 @@ def _two_layer_model(x):
   return h_pool2
 
 
+def _model_with_second_port():
+  random_seed.set_random_seed(0)
+  x = random_ops.truncated_normal([2, 5, 5, 4], seed=0)
+  scale = constant_op.constant(0.1, shape=[4])
+  offset = constant_op.constant(0.3, shape=[4])
+  y, mean, _ = nn.fused_batch_norm(x, scale, offset)
+  mul = math_ops.add(y, mean)
+  output = array_ops.identity(mul)
+  return output
+
+
 def _model_with_branch(x):
   x_image = array_ops.reshape(x, [-1, 28, 28, 1])
   w_conv1 = _weight([5, 5, 1, 32])
@@ -416,6 +427,31 @@ class LayoutOptimizerTest(test.TestCase):
                     nodes)
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
+  def testBinaryOpSecondPort(self):
+    if test.is_gpu_available(cuda_only=True):
+      output = _model_with_second_port()
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if node.name.startswith('LayoutOptimizerTranspose'):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-FusedBatchNorm-0',
+                    nodes)
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Add-0-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
   def testGradient(self):
     meta_graph = _simple_metagraph()
     rewrite_options = rewriter_config_pb2.RewriterConfig(
-- 
GitLab


From 0ea2d74f883914109eb154bcf2a7d61ae0557f2d Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Thu, 14 Dec 2017 16:52:40 -0800
Subject: [PATCH 1034/1225] [XLA] Remove the notion of a "parameter name"
 separate from the instruction's name.

Also set the instruction's name in the HLO parser, so that after
parsing, the instructions have the names they're given in the input
string.

PiperOrigin-RevId: 179119003
---
 tensorflow/compiler/xla/service/hlo.proto        |  6 ++++--
 .../compiler/xla/service/hlo_computation.cc      |  6 +++---
 .../compiler/xla/service/hlo_instruction.cc      |  5 +----
 .../compiler/xla/service/hlo_instruction.h       | 16 ++--------------
 tensorflow/compiler/xla/service/hlo_module.cc    |  4 ++--
 .../compiler/xla/service/user_computation.cc     |  4 ++--
 .../compiler/xla/tools/parser/hlo_parser.cc      |  2 ++
 .../compiler/xla/tools/parser/hlo_parser_test.cc | 12 ++++++++++++
 8 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 5d0cfba1fc..e4aed7593c 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -36,6 +36,9 @@ option cc_enable_arenas = true;
 
 // Serialization of HloInstruction.
 message HloInstructionProto {
+  reserved 10;
+  reserved "parameter_name";
+
   string name = 1;
   string opcode = 2;
   xla.Shape shape = 3;
@@ -50,9 +53,8 @@ message HloInstructionProto {
   // Literal, only present for kConstant.
   xla.LiteralProto literal = 8;
 
-  // Parameter info, only present for kParameter.
+  // Parameter number is only present for kParameter.
   int64 parameter_number = 9;
-  string parameter_name = 10;
 
   // Fusion state, only present for kFusion.
   string fusion_kind = 11;
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 2fac52d853..4f6feefb43 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -131,9 +131,9 @@ Status HloComputation::RemoveParameter(int64 param_no) {
 
   while (param_no < param_instructions_.size()) {
     param_instruction = param_instructions_[param_no];
-    string param_name = param_instruction->parameter_name();
+    string param_name = param_instruction->name();
     // Fusion parameters are named foo.param_1, bar.param_2, etc. We are
-    // renumbering the parameters so replace the final number in the name with
+    // renumbering the parameters, so replace the final number in the name with
     // the updated value.
     const string param_underscore = ".param_";
     size_t index = param_name.rfind(param_underscore);
@@ -535,7 +535,7 @@ ProgramShape HloComputation::ComputeProgramShape() const {
 
   for (auto* param_instruction : param_instructions_) {
     *program_shape.add_parameters() = param_instruction->shape();
-    *program_shape.add_parameter_names() = param_instruction->parameter_name();
+    *program_shape.add_parameter_names() = param_instruction->name();
   }
   *program_shape.mutable_result() = root_instruction_->shape();
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 9d377198d5..9e37ab64a0 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -104,7 +104,6 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     instruction->literal_ = MakeUnique<Literal>(proto.literal());
   }
   instruction->parameter_number_ = proto.parameter_number();
-  instruction->parameter_name_ = proto.parameter_name();
 
   instruction->tuple_index_ = proto.tuple_index();
   for (int64 dimension : proto.dimensions()) {
@@ -154,7 +153,6 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
   auto instruction =
       WrapUnique(new HloInstruction(HloOpcode::kParameter, shape));
   instruction->parameter_number_ = parameter_number;
-  instruction->parameter_name_ = name;
   instruction->name_ = name;
   return instruction;
 }
@@ -1245,7 +1243,7 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
       clone = CloneFusionWithNewOperands(shape, new_operands, module);
       break;
     case HloOpcode::kParameter:
-      clone = CreateParameter(parameter_number_, shape, parameter_name_);
+      clone = CreateParameter(parameter_number_, shape, name_);
       break;
     case HloOpcode::kBatchNormTraining:
       CHECK_EQ(new_operands.size(), 3);
@@ -2113,7 +2111,6 @@ HloInstructionProto HloInstruction::ToProto() const {
     *proto.mutable_literal() = literal_->ToProto();
   }
   proto.set_parameter_number(parameter_number_);
-  proto.set_parameter_name(parameter_name_);
   if (opcode() == HloOpcode::kFusion) {
     proto.set_fusion_kind(xla::ToString(fusion_kind()));
     *proto.mutable_fused_instructions_computation() =
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 094dbc5b2d..47c3fb684e 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -601,16 +601,6 @@ class HloInstruction {
     return parameter_number_;
   }
 
-  const string& parameter_name() const {
-    CHECK_EQ(HloOpcode::kParameter, opcode_);
-    return parameter_name_;
-  }
-
-  void set_parameter_name(const string& str) {
-    CHECK_EQ(HloOpcode::kParameter, opcode_);
-    parameter_name_ = str;
-  }
-
   // Returns the dimension sizes or numbers associated with this instruction.
   //
   // Precondition: opcode() is one of: concatenate, reduce, broadcast, reshape,
@@ -1073,10 +1063,9 @@ class HloInstruction {
   std::tuple<bool, std::vector<int64>, std::vector<int64>>
   ReshapeMerelyInsertsOrDeletes1SizedDimensions() const;
 
-  // Returns a string identifier for this instruction. If no string identifier
-  // has been explicitly set, then the identifier is the serialized pointer to
-  // this instruction.
+  // Gets/sets the string identifier for this instruction.
   const string& name() const { return name_; }
+  void set_name(tensorflow::StringPiece name) { name_ = name.ToString(); }
 
   // Use the given NameUniquer to select a unique name for the instruction based
   // on the instruction's existing name.
@@ -1268,7 +1257,6 @@ class HloInstruction {
 
   // For parameter instructions this field holds the parameter number.
   int64 parameter_number_ = 0;
-  string parameter_name_;
 
   // Name of a global symbol to call, only present for kCustomCall.
   string custom_call_target_;
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 1f7d8ed991..7d3ea8def7 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -230,8 +230,8 @@ StatusOr<ProgramShape> ProgramShapeFromProto(const HloModuleProto& module) {
           << "Entry computation has more than one parameter instruction "
              "with parameter number "
           << instruction.parameter_number();
-      parameters[instruction.parameter_number()] = {
-          instruction.parameter_name(), &instruction.shape()};
+      parameters[instruction.parameter_number()] = {instruction.name(),
+                                                    &instruction.shape()};
     }
   }
   TF_RET_CHECK(root != nullptr)
diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index e6893c8133..4a4f00f4ea 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -1552,8 +1552,8 @@ UserComputation::ComputeProgramShape(
             request.request().parameter_request();
         int64 param_no = parameter_request.parameter();
         // Parameters may be out of order so expand ProgramShape parameters
-        // until
-        // it is at least large enough to hold the current parameter number.
+        // until it is at least large enough to hold the current parameter
+        // number.
         while (program_shape->parameters_size() <= param_no) {
           program_shape->add_parameters();
           program_shape->add_parameter_names();
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 3bb86cf7b8..710e76f53d 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -936,6 +936,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
                                HloOpcodeString(opcode)));
   }
 
+  instruction->set_name(name);
+
   // Add common attrs (sharding, control predecessors) to the instruction, if
   // they were seen.
   if (sharding) {
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 98f9f4d333..5c12a991cc 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -740,6 +740,18 @@ ENTRY %CustomCall () -> f32[1,2,3] {
   ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(f32[1]{0} %constant), custom_call_target="foo\"bar"
 }
 
+)"
+},
+// Variables with non-default names
+{
+"NonDefaultNames",
+R"(HloModule add_constants_module:
+
+ENTRY %add_constants () -> f32[] {
+  %foo = f32[] constant(3.14)
+  ROOT %bar = f32[] add(f32[] %foo, f32[] %foo)
+}
+
 )"
 },
   });
-- 
GitLab


From c02147704b1244010a93557de33ad4c64a20a5a5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 16:57:09 -0800
Subject: [PATCH 1035/1225] Adding basic support for constant Tensorflow 'Neg'
 (tf.negative) ops. This only handles constant operations a la
 kTensorFlowRsqrt.

PiperOrigin-RevId: 179119551
---
 .../graph_transformations/propagate_fixed_sizes.cc  |  1 +
 .../graph_transformations/resolve_constant_unary.cc |  8 ++++++--
 tensorflow/contrib/lite/toco/import_tensorflow.cc   | 13 +++++++++++++
 tensorflow/contrib/lite/toco/model.h                | 11 +++++++++++
 tensorflow/contrib/lite/toco/tflite/operator.cc     |  1 +
 tensorflow/contrib/lite/toco/tooling_util.cc        |  1 +
 6 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index 786d3da7cf..5f2fa7e439 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -965,6 +965,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
     case OperatorType::kLocalResponseNormalization:
     case OperatorType::kTensorFlowIdentity:
     case OperatorType::kFakeQuant:
+    case OperatorType::kNeg:
     case OperatorType::kTensorFlowRsqrt:
     case OperatorType::kTensorFlowSqrt:
     case OperatorType::kTensorFlowSquare:
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index ebc110483e..26ff9d887b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -33,6 +33,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
   const auto* unary_op = unary_it->get();
   // Test for unary ops of types that we know how to resolve
   if (unary_op->type != OperatorType::kCast &&
+      unary_op->type != OperatorType::kNeg &&
       unary_op->type != OperatorType::kTensorFlowRsqrt &&
       unary_op->type != OperatorType::kTensorFlowSqrt &&
       unary_op->type != OperatorType::kTensorFlowSquare &&
@@ -168,7 +169,8 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
       max = std::max(max, (*input_float_data)[i]);
     }
     output_float_data[0] = max;
-  } else if (unary_op->type == OperatorType::kTensorFlowRsqrt ||
+  } else if (unary_op->type == OperatorType::kNeg ||
+             unary_op->type == OperatorType::kTensorFlowRsqrt ||
              unary_op->type == OperatorType::kTensorFlowSqrt ||
              unary_op->type == OperatorType::kTensorFlowSquare) {
     // Element-wise ops. Should have perfectly matching sizes here.
@@ -179,7 +181,9 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     for (int i = 0; i < output_buffer_size; i++) {
       const float val = (*input_float_data)[i];
       float outval = 0.f;
-      if (unary_op->type == OperatorType::kTensorFlowRsqrt) {
+      if (unary_op->type == OperatorType::kNeg) {
+        outval = -val;
+      } else if (unary_op->type == OperatorType::kTensorFlowRsqrt) {
         outval = 1.0f / std::sqrt(val);
       } else if (unary_op->type == OperatorType::kTensorFlowSqrt) {
         outval = std::sqrt(val);
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 99cf1a7ca8..156b5e1266 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -537,6 +537,17 @@ void ConvertFakeQuantWithMinMaxVars(
   model->operators.emplace_back(op);
 }
 
+void ConvertNegOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
+  CHECK_EQ(node.op(), "Neg");
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  auto* op = new NegOperator;
+  op->inputs.push_back(node.input(0));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
 void ConvertRsqrtOperator(const NodeDef& node,
                           const TensorFlowImportFlags& tf_import_flags,
                           Model* model) {
@@ -1738,6 +1749,8 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
       ConvertFakeQuantWithMinMaxVars(node, tf_import_flags, model);
     } else if (node.op() == "FakeQuantWithMinMaxArgs") {
       ConvertFakeQuantWithMinMaxArgs(node, tf_import_flags, model);
+    } else if (node.op() == "Neg") {
+      ConvertNegOperator(node, tf_import_flags, model);
     } else if (node.op() == "Rsqrt") {
       ConvertRsqrtOperator(node, tf_import_flags, model);
     } else if (node.op() == "Squeeze") {
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index a53c751d3c..d155d2bb5c 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -97,6 +97,7 @@ enum class OperatorType {
   kTensorFlowMinimum,
   kTensorFlowMatMul,
   kTensorFlowMerge,
+  kNeg,
   kTensorFlowReshape,
   kTensorFlowRsqrt,
   kTensorFlowShape,
@@ -863,6 +864,16 @@ struct RankOperator : Operator {
   RankOperator() : Operator(OperatorType::kRank) {}
 };
 
+// Element-wise negation (-x) operator.
+//
+// Inputs:
+//   inputs[0]: required: the input array
+//
+// TensorFlow equivalent: Neg
+struct NegOperator : Operator {
+  NegOperator() : Operator(OperatorType::kNeg) {}
+};
+
 // Element-wise reciprocal-square-root (x^-0.5) operator.
 //
 // Inputs:
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 7fee47a90b..7a68c6dbc9 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -614,6 +614,7 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList() {
 
   // There operators are supported by Toco, but not by TF Lite, and has no
   // attributes.
+  ops.emplace_back(new SimpleOperator<NegOperator>("NEG", OperatorType::kNeg));
   ops.emplace_back(new SimpleOperator<TensorFlowRsqrtOperator>(
       "RSQRT", OperatorType::kTensorFlowRsqrt));
   ops.emplace_back(
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 39b030c338..d8824015ee 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -238,6 +238,7 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowMerge)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowMin)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowMinimum)
+    HANDLE_OPERATORTYPENAME_CASE(Neg)
     HANDLE_OPERATORTYPENAME_CASE(Pad)
     HANDLE_OPERATORTYPENAME_CASE(StridedSlice)
     HANDLE_OPERATORTYPENAME_CASE(Stack)
-- 
GitLab


From 1d976cbd2ab76c778de994e8b23534f684f65521 Mon Sep 17 00:00:00 2001
From: Yifei Feng <fengyifei2026@gmail.com>
Date: Thu, 14 Dec 2017 17:16:46 -0800
Subject: [PATCH 1036/1225] Add description for new PR workflow.

---
 CONTRIBUTING.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1b537ca73c..b50b89c351 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -20,6 +20,9 @@ Follow either of the two links above to access the appropriate CLA and instructi
 If you have improvements to TensorFlow, send us your pull requests! For those
 just getting started, Github has a [howto](https://help.github.com/articles/using-pull-requests/).
 
+TensorFlow team members will be assigned to review your pull requests. Once the pull requests are approved and pass continuous integration checks, we will merge the pull request.
+For some pull requests, we will apply the patch for each pull request to our internal version control system first, and export the change out as a new commit later, at which point the original pull request will be closed. The commits in the pull request will be squashed into a single commit with the pull request creator as the author. These pull requests will be labeled as pending merge internally. This process might take up to a few business days. 
+
 If you want to contribute but you're not sure where to start, take a look at the
 [issues with the "contributions welcome" label](https://github.com/tensorflow/tensorflow/labels/stat%3Acontributions%20welcome).
 These are issues that we believe are particularly well suited for outside
-- 
GitLab


From 5eccb29930deeccd7199a79539f3c60e3769db30 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 14 Dec 2017 17:23:18 -0800
Subject: [PATCH 1037/1225] [XLA] Change some dot test cases to be parametric

 - Use a generator instead of manually writing out all the test configurations

 - Call the tests MatrixDotF32_12_117_7_MajorToMinorFF etc. instead of
   MatrixDotF32_12_117_7_MinorToMajorFF.  I think this is the correct naming
   scheme since the T/F denotes whether an operand's dimensions are laid out as
   major to minor or not.  If this is correct, names like
   SquareMatrixDotF32MinorToMajorTF are also incorrect and I'll fix those in a
   separate CL.

 - Remove a couple of unnecessary vector-matrix product tests -- we don't need
   to test with different layouts since the layouts are only used for the
   GlobalData literals that are passed in as arguments to the computation, and
   not for the operations in the computation itself.  I suspect this is
   accidental though -- we probably should be changing the layouts of the
   computations as well?

PiperOrigin-RevId: 179122732
---
 .../compiler/xla/tests/dot_operation_test.cc  | 378 ++++++------------
 1 file changed, 124 insertions(+), 254 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index bb7af4c4b8..cc683701e6 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc
@@ -51,10 +51,6 @@ class DotOperationTest : public ClientLibraryTestBase {
   template <typename Element>
   void TestNonsquareMatrixDot(bool lhs_row_major = false,
                               bool rhs_row_major = false);
-  void TestMatrixDot(int M, int K, int N, bool lhs_row_major = false,
-                     bool rhs_row_major = false);
-  void TestMatrixDotWithAdd(int M, int K, int N, bool dot_lhs_row_major,
-                            bool dot_rhs_row_major, bool addend_row_major);
 };
 
 XLA_TEST_F(DotOperationTest, ZeroElementVectorDotF32) {
@@ -201,277 +197,151 @@ void DotOperationTest::TestSquareMatrixDot(bool lhs_row_major,
       &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_);
 }
 
-void DotOperationTest::TestMatrixDot(int M, int K, int N, bool lhs_row_major,
-                                     bool rhs_row_major) {
-  std::unique_ptr<Array2D<float>> lhs_data =
-      MakeLinspaceArray2D(0.0, 1.0, M, K);
-  std::unique_ptr<Literal> lhs_lit = Literal::CreateR2FromArray2DWithLayout(
-      *lhs_data,
-      LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)));
-  auto lhs_handle = client_->TransferToServer(*lhs_lit).ConsumeValueOrDie();
+struct DotTestParam {
+  int m;
+  int k;
+  int n;
+  bool dot_lhs_row_major;
+  bool dot_rhs_row_major;
+  bool has_addend;
+  bool addend_row_major;
+};
 
-  std::unique_ptr<Array2D<float>> rhs_data =
-      MakeLinspaceArray2D(0.0, 1.0, K, N);
-  std::unique_ptr<Literal> rhs_lit = Literal::CreateR2FromArray2DWithLayout(
-      *rhs_data,
-      LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)));
-  auto rhs_handle = client_->TransferToServer(*rhs_lit).ConsumeValueOrDie();
+string PrintDotTestParam(
+    const ::testing::TestParamInfo<DotTestParam>& test_param) {
+  const DotTestParam& param = test_param.param;
+  if (param.has_addend) {
+    return tensorflow::strings::StrCat(param.m, "x", param.k, "x", param.n,
+                                       "_MajorToMinor",
+                                       param.dot_lhs_row_major ? "T" : "F",
+                                       param.dot_rhs_row_major ? "T" : "F",
+                                       param.addend_row_major ? "T" : "F");
+  } else {
+    return tensorflow::strings::StrCat(param.m, "x", param.k, "x", param.n,
+                                       "_MajorToMinor",
+                                       param.dot_lhs_row_major ? "T" : "F",
+                                       param.dot_rhs_row_major ? "T" : "F");
+  }
+}
 
-  ComputationBuilder builder(client_, TestName());
-  auto prim_type = primitive_util::NativeToPrimitiveType<float>();
-  auto result = builder.Dot(
-      builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {M, K}), "lhs"),
-      builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {K, N}), "rhs"));
+class ParametricDotTest : public DotOperationTest,
+                          public ::testing::WithParamInterface<DotTestParam> {};
 
-  std::unique_ptr<Array2D<float>> expected =
-      ReferenceUtil::MatmulArray2D(*lhs_data, *rhs_data);
+XLA_TEST_P(ParametricDotTest, TestF32) {
+  DotTestParam param = GetParam();
 
-  ComputeAndCompareR2<float>(&builder, *expected,
-                             {lhs_handle.get(), rhs_handle.get()},
-                             ErrorSpec(0.3, 3e-3));
-}
-
-void DotOperationTest::TestMatrixDotWithAdd(int M, int K, int N,
-                                            bool dot_lhs_row_major,
-                                            bool dot_rhs_row_major,
-                                            bool addend_row_major) {
   std::unique_ptr<Array2D<float>> dot_lhs_data =
-      MakeLinspaceArray2D(0.0, 1.0, M, K);
+      MakeLinspaceArray2D(0.0, 1.0, param.m, param.k);
   std::unique_ptr<Literal> dot_lhs_lit = Literal::CreateR2FromArray2DWithLayout(
-      *dot_lhs_data,
-      LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(dot_lhs_row_major)));
-  auto dot_lhs_handle =
+      *dot_lhs_data, LayoutUtil::MakeLayout(
+                         MinorToMajorForIsRowMajor(param.dot_lhs_row_major)));
+  std::unique_ptr<GlobalData> dot_lhs_handle =
       client_->TransferToServer(*dot_lhs_lit).ConsumeValueOrDie();
 
   std::unique_ptr<Array2D<float>> dot_rhs_data =
-      MakeLinspaceArray2D(0.0, 1.0, K, N);
+      MakeLinspaceArray2D(0.0, 1.0, param.k, param.n);
   std::unique_ptr<Literal> dot_rhs_lit = Literal::CreateR2FromArray2DWithLayout(
-      *dot_rhs_data,
-      LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(dot_rhs_row_major)));
-  auto dot_rhs_handle =
+      *dot_rhs_data, LayoutUtil::MakeLayout(
+                         MinorToMajorForIsRowMajor(param.dot_rhs_row_major)));
+  std::unique_ptr<GlobalData> dot_rhs_handle =
       client_->TransferToServer(*dot_rhs_lit).ConsumeValueOrDie();
 
-  std::unique_ptr<Array2D<float>> addend_data =
-      MakeLinspaceArray2D(0.0, 1.0, M, N);
-  std::unique_ptr<Literal> addend_lit = Literal::CreateR2FromArray2DWithLayout(
-      *addend_data,
-      LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(addend_row_major)));
-  auto addend_handle =
-      client_->TransferToServer(*addend_lit).ConsumeValueOrDie();
+  std::unique_ptr<Array2D<float>> addend_data;
+  std::unique_ptr<Literal> addend_lit;
+  std::unique_ptr<GlobalData> addend_handle;
+
+  if (param.has_addend) {
+    addend_data = MakeLinspaceArray2D(0.0, 1.0, param.m, param.n);
+    addend_lit = Literal::CreateR2FromArray2DWithLayout(
+        *addend_data, LayoutUtil::MakeLayout(
+                          MinorToMajorForIsRowMajor(param.addend_row_major)));
+    addend_handle = client_->TransferToServer(*addend_lit).ConsumeValueOrDie();
+  }
 
   ComputationBuilder builder(client_, TestName());
   auto prim_type = primitive_util::NativeToPrimitiveType<float>();
-  auto result = builder.Add(
-      builder.Dot(builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {M, K}),
-                                    "dot_lhs"),
-                  builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {K, N}),
-                                    "dot_rhs")),
-      builder.Parameter(2, ShapeUtil::MakeShape(prim_type, {M, N}), "addend"));
-
-  std::unique_ptr<Array2D<float>> expected = ReferenceUtil::ApplyElementwise2D(
-      std::plus<float>(),
-      *ReferenceUtil::MatmulArray2D(*dot_lhs_data, *dot_rhs_data),
-      *addend_data);
-
-  ComputeAndCompareR2<float>(
-      &builder, *expected,
-      {dot_lhs_handle.get(), dot_rhs_handle.get(), addend_handle.get()},
-      ErrorSpec(0.3, 3e-3));
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_12_117_7_MinorToMajorTF) {
-  TestMatrixDot(12, 117, 7, true, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_12_117_7_MinorToMajorFT) {
-  TestMatrixDot(12, 117, 7, false, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_12_117_7_MinorToMajorTT) {
-  TestMatrixDot(12, 117, 7, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_12_117_7_MinorToMajorFF) {
-  TestMatrixDot(12, 117, 7, false, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_270_270_520_MinorToMajorTT) {
-  TestMatrixDot(270, 270, 520, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_270_270_520_MinorToMajorTF) {
-  TestMatrixDot(270, 270, 520, true, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_270_270_520_MinorToMajorFT) {
-  TestMatrixDot(270, 270, 520, false, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_270_270_520_MinorToMajorFF) {
-  TestMatrixDot(270, 270, 520, false, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorTT) {
-  TestMatrixDot(269, 3, 520, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorTF) {
-  TestMatrixDot(260, 3, 520, true, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorFT) {
-  TestMatrixDot(260, 3, 520, false, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorFF) {
-  TestMatrixDot(260, 3, 520, false, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x8) {
-  TestMatrixDot(1, 8, 8, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x130x8) {
-  TestMatrixDot(1, 130, 8, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x130) {
-  TestMatrixDot(1, 8, 130, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x290x130) {
-  TestMatrixDot(1, 290, 130, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_2x1x1) {
-  TestMatrixDot(2, 1, 1, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_8x8x1) {
-  TestMatrixDot(8, 8, 1, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_16x1x1) {
-  TestMatrixDot(16, 1, 1, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_16x3x1) {
-  TestMatrixDot(16, 3, 1, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_3x3x1) {
-  TestMatrixDot(3, 3, 1, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_29x29x1) {
-  TestMatrixDot(29, 29, 1, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x2) {
-  TestMatrixDot(1, 8, 2, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x2x8) {
-  TestMatrixDot(1, 2, 8, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1) {
-  TestMatrixDot(259, 258, 1, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1_FT) {
-  TestMatrixDot(259, 258, 1, false, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x8x8) {
-  TestMatrixDotWithAdd(1, 8, 8, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x130x8) {
-  TestMatrixDotWithAdd(1, 130, 8, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x8x130) {
-  TestMatrixDotWithAdd(1, 8, 130, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x290x130) {
-  TestMatrixDotWithAdd(1, 290, 130, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_2x1x1) {
-  TestMatrixDotWithAdd(2, 1, 1, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_8x8x1) {
-  TestMatrixDotWithAdd(8, 8, 1, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_16x1x1) {
-  TestMatrixDotWithAdd(16, 1, 1, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_16x3x1) {
-  TestMatrixDotWithAdd(16, 3, 1, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_3x3x1) {
-  TestMatrixDotWithAdd(3, 3, 1, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_29x29x1) {
-  TestMatrixDotWithAdd(29, 29, 1, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x8x2) {
-  TestMatrixDotWithAdd(1, 8, 2, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_1x2x8) {
-  TestMatrixDotWithAdd(1, 2, 8, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1) {
-  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
+  auto result = builder.Dot(
+      builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {param.m, param.k}),
+                        "dot_lhs"),
+      builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {param.k, param.n}),
+                        "dot_rhs"));
+
+  if (param.has_addend) {
+    result = builder.Add(
+        result,
+        builder.Parameter(
+            2, ShapeUtil::MakeShape(prim_type, {param.m, param.n}), "addend"));
+  }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1_FTT) {
-  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/false,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/true);
-}
+  std::unique_ptr<Array2D<float>> expected;
+  if (param.has_addend) {
+    expected = ReferenceUtil::ApplyElementwise2D(
+        std::plus<float>(),
+        *ReferenceUtil::MatmulArray2D(*dot_lhs_data, *dot_rhs_data),
+        *addend_data);
+  } else {
+    expected = ReferenceUtil::MatmulArray2D(*dot_lhs_data, *dot_rhs_data);
+  }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1_FFT) {
-  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/false,
-                       /*dot_rhs_row_major=*/false, /*addend_row_major=*/true);
-}
+  std::vector<GlobalData*> args = {dot_lhs_handle.get(), dot_rhs_handle.get()};
+  if (param.has_addend) {
+    args.push_back(addend_handle.get());
+  }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1_FFF) {
-  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/false,
-                       /*dot_rhs_row_major=*/false, /*addend_row_major=*/false);
+  ComputeAndCompareR2<float>(&builder, *expected, args, ErrorSpec(0.3, 3e-3));
 }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1_TFF) {
-  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/false, /*addend_row_major=*/false);
-}
+std::vector<DotTestParam> CreateDotTestParameters() {
+  std::vector<DotTestParam> params;
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotWithAddF32_259x258x1_TTF) {
-  TestMatrixDotWithAdd(259, 258, 1, /*dot_lhs_row_major=*/true,
-                       /*dot_rhs_row_major=*/true, /*addend_row_major=*/false);
-}
+  auto add_matrix_matrix_dot_test = [&](int m, int k, int n) {
+    for (bool lhs_row_major : {true, false}) {
+      for (bool rhs_row_major : {true, false}) {
+        params.push_back({/*m=*/m, /*k=*/k, /*n=*/n,
+                          /*dot_lhs_row_major=*/lhs_row_major,
+                          /*dot_rhs_row_major=*/rhs_row_major,
+                          /*has_addend=*/false, /*addend_row_major=*/true});
+      }
+    }
+  };
+
+  auto add_matrix_vector_dot_test = [&](int k, int n) {
+    for (bool has_addend : {false, true}) {
+      params.push_back({/*m=*/1, /*k=*/k, /*n=*/n,
+                        /*dot_lhs_row_major=*/true, /*dot_rhs_row_major=*/true,
+                        /*has_addend=*/has_addend, /*addend_row_major=*/true});
+      if (n != 1) {
+        params.push_back(
+            {/*m=*/n, /*k=*/k, /*n=*/1,
+             /*dot_lhs_row_major=*/true, /*dot_rhs_row_major=*/true,
+             /*has_addend=*/has_addend, /*addend_row_major=*/true});
+      }
+    }
+  };
+
+  add_matrix_matrix_dot_test(/*m=*/12, /*k=*/117, /*n=*/7);
+  add_matrix_matrix_dot_test(/*m=*/270, /*k=*/270, /*n=*/520);
+  add_matrix_matrix_dot_test(/*m=*/260, /*k=*/3, /*n=*/520);
+
+  add_matrix_vector_dot_test(/*k=*/8, /*n=*/8);
+  add_matrix_vector_dot_test(/*k=*/130, /*n=*/8);
+  add_matrix_vector_dot_test(/*k=*/8, /*n=*/130);
+  add_matrix_vector_dot_test(/*k=*/290, /*n=*/130);
+  add_matrix_vector_dot_test(/*k=*/1, /*n=*/1);
+  add_matrix_vector_dot_test(/*k=*/1, /*n=*/16);
+  add_matrix_vector_dot_test(/*k=*/3, /*n=*/16);
+  add_matrix_vector_dot_test(/*k=*/3, /*n=*/3);
+  add_matrix_vector_dot_test(/*k=*/29, /*n=*/29);
+  add_matrix_vector_dot_test(/*k=*/8, /*n=*/2);
+  add_matrix_vector_dot_test(/*k=*/2, /*n=*/8);
+  add_matrix_vector_dot_test(/*k=*/259, /*n=*/258);
+
+  return params;
+}
+
+INSTANTIATE_TEST_CASE_P(DotTests, ParametricDotTest,
+                        ::testing::ValuesIn(CreateDotTestParameters()),
+                        PrintDotTestParam);
 
 XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) {
   TestSquareMatrixDot<float>(false, false);
-- 
GitLab


From 14db6c339cc4aa0a1640dd7b86029f3a1ebad395 Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Thu, 14 Dec 2017 17:34:18 -0800
Subject: [PATCH 1038/1225] BF16 support for HloEvaluator.

- Add a second type parameter <ElementWiseT> to HloEvaluator, indicating which type the elementwise operation needs to be done in.
- Add BF16 support for HloEvaluator.
- Use a type converter to implement test cases.

PiperOrigin-RevId: 179123689
---
 tensorflow/compiler/xla/service/BUILD         |   2 +
 .../compiler/xla/service/hlo_evaluator.cc     | 261 +++++++------
 .../compiler/xla/service/hlo_evaluator.h      |   6 +-
 .../xla/service/hlo_evaluator_test.cc         | 345 +++++++++---------
 tensorflow/core/framework/numeric_types.h     |  17 +-
 5 files changed, 349 insertions(+), 282 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 318263ec5f..07ef98076e 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -110,6 +110,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:reference_util",
         "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:test",
@@ -117,6 +118,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/service:hlo_element_type_converter",
         "//tensorflow/compiler/xla/tests:hlo_verified_test_base",
         "//tensorflow/compiler/xla/tests:literal_test_util",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index e693d167a1..150f9f2d6e 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -167,11 +167,37 @@ StatusOr<std::unique_ptr<Literal>> ElementWiseUnaryOpImpl(
 
 }  // namespace
 
-template <typename ReturnT>
+template <typename ReturnT, typename ElementwiseT>
 class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
  public:
   explicit TypedVisitor(HloEvaluator* p) : parent_(p) {}
 
+  // The following higher-order functions convert a function with ElementwiseT
+  // to a function with ReturnT.
+  std::function<ReturnT(ReturnT)> ConvertUnaryFunction(
+      const std::function<ElementwiseT(ElementwiseT)>& unary_op) {
+    return [&unary_op](ReturnT arg) {
+      return static_cast<ReturnT>(unary_op(static_cast<ElementwiseT>(arg)));
+    };
+  }
+  std::function<ReturnT(ReturnT, ReturnT)> ConvertBinaryFunction(
+      const std::function<ElementwiseT(ElementwiseT, ElementwiseT)>&
+          binary_op) {
+    return [&binary_op](ReturnT arg1, ReturnT arg2) {
+      return static_cast<ReturnT>(binary_op(static_cast<ElementwiseT>(arg1),
+                                            static_cast<ElementwiseT>(arg2)));
+    };
+  }
+  std::function<ReturnT(ReturnT, ReturnT, ReturnT)> ConvertTernaryFunction(
+      const std::function<ElementwiseT(ElementwiseT, ElementwiseT,
+                                       ElementwiseT)>& ternary_op) {
+    return [&ternary_op](ReturnT arg1, ReturnT arg2, ReturnT arg3) {
+      return static_cast<ReturnT>(ternary_op(static_cast<ElementwiseT>(arg1),
+                                             static_cast<ElementwiseT>(arg2),
+                                             static_cast<ElementwiseT>(arg3)));
+    };
+  }
+
   Status DefaultAction(HloInstruction* hlo_instruction) override {
     return Unimplemented("unhandled HLO ops for HloEvaluator: %s.",
                          HloOpcodeString(hlo_instruction->opcode()).c_str());
@@ -197,24 +223,25 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                               is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleAbs(HloInstruction* abs) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[abs],
-                        ElementWiseUnaryOp(abs, [](NativeT elem_operand) {
+                        ElementWiseUnaryOp(abs, [](ElementwiseT elem_operand) {
                           return std::abs(elem_operand);
                         }));
     return Status::OK();
   }
 
   Status HandleAbs(HloInstruction* abs) override {
-    return HandleAbs<ReturnT>(abs);
+    return HandleAbs<ElementwiseT>(abs);
   }
 
   template <
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleRound(HloInstruction* round) {
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[round],
-                        ElementWiseUnaryOp(round, [](ReturnT elem_operand) {
-                          return std::round(elem_operand);
-                        }));
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[round],
+        ElementWiseUnaryOp(round, [](ElementwiseT elem_operand) {
+          return std::round(elem_operand);
+        }));
     return Status::OK();
   }
 
@@ -264,7 +291,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleCeil(HloInstruction* ceil) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[ceil],
-                        ElementWiseUnaryOp(ceil, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(ceil, [](ElementwiseT elem_operand) {
                           return std::ceil(elem_operand);
                         }));
     return Status::OK();
@@ -299,7 +326,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
 
   Status HandleExp(HloInstruction* exp) override {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[exp],
-                        ElementWiseUnaryOp(exp, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(exp, [](ElementwiseT elem_operand) {
                           return std::exp(elem_operand);
                         }));
     return Status::OK();
@@ -309,10 +336,11 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleFloor(HloInstruction* floor) {
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[floor],
-                        ElementWiseUnaryOp(floor, [](ReturnT elem_operand) {
-                          return std::floor(elem_operand);
-                        }));
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[floor],
+        ElementWiseUnaryOp(floor, [](ElementwiseT elem_operand) {
+          return std::floor(elem_operand);
+        }));
     return Status::OK();
   }
 
@@ -329,7 +357,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
 
   Status HandleLog(HloInstruction* log) override {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[log],
-                        ElementWiseUnaryOp(log, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(log, [](ElementwiseT elem_operand) {
                           return std::log(elem_operand);
                         }));
     return Status::OK();
@@ -341,7 +369,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                 !std::is_same<NativeT, bool>::value>::type* = nullptr>
   Status HandleNot(HloInstruction* not_) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_],
-                        ElementWiseUnaryOp(not_, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) {
                           return ~elem_operand;
                         }));
     return Status::OK();
@@ -351,7 +379,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                                   NativeT>::value>::type* = nullptr>
   Status HandleNot(HloInstruction* not_) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_],
-                        ElementWiseUnaryOp(not_, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) {
                           return !elem_operand;
                         }));
     return Status::OK();
@@ -362,7 +390,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                 nullptr>
   Status HandleNot(HloInstruction* not_) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_],
-                        ElementWiseUnaryOp(not_, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) {
                           return !elem_operand;
                         }));
     return Status::OK();
@@ -376,7 +404,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleNot(HloInstruction* not_) override {
-    return HandleNot<ReturnT>(not_);
+    return HandleNot<ElementwiseT>(not_);
   }
 
   template <typename NativeT,
@@ -385,10 +413,11 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                 !std::is_floating_point<NativeT>::value>::type* = nullptr>
   Status HandleNegate(HloInstruction* negate) {
     using type = typename std::make_unsigned<NativeT>::type;
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[negate],
-                        ElementWiseUnaryOp(negate, [](ReturnT elem_operand) {
-                          return NativeT(-type(elem_operand));
-                        }));
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[negate],
+        ElementWiseUnaryOp(negate, [](ElementwiseT elem_operand) {
+          return NativeT(-type(elem_operand));
+        }));
     return Status::OK();
   }
 
@@ -397,10 +426,10 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                 !std::is_signed<NativeT>::value ||
                 std::is_floating_point<NativeT>::value>::type* = nullptr>
   Status HandleNegate(HloInstruction* negate) {
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[negate],
-                        ElementWiseUnaryOp(negate, [](ReturnT elem_operand) {
-                          return -elem_operand;
-                        }));
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[negate],
+        ElementWiseUnaryOp(
+            negate, [](ElementwiseT elem_operand) { return -elem_operand; }));
     return Status::OK();
   }
 
@@ -413,9 +442,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleSign(HloInstruction* sign) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[sign],
-                        ElementWiseUnaryOp(sign, [](ReturnT elem_operand) {
-                          return (ReturnT(0) < elem_operand) -
-                                 (elem_operand < ReturnT(0));
+                        ElementWiseUnaryOp(sign, [](ElementwiseT elem_operand) {
+                          return (ElementwiseT(0) < elem_operand) -
+                                 (elem_operand < ElementwiseT(0));
                         }));
     return Status::OK();
   }
@@ -425,9 +454,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleSign(HloInstruction* sign) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[sign],
-                        ElementWiseUnaryOp(sign, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(sign, [](ElementwiseT elem_operand) {
                           auto abs_val = std::abs(elem_operand);
-                          return 0 == abs_val ? ReturnT(0)
+                          return 0 == abs_val ? ElementwiseT(0)
                                               : elem_operand / abs_val;
                         }));
     return Status::OK();
@@ -439,7 +468,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
 
   Status HandleTanh(HloInstruction* tanh) override {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[tanh],
-                        ElementWiseUnaryOp(tanh, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(tanh, [](ElementwiseT elem_operand) {
                           return std::tanh(elem_operand);
                         }));
     return Status::OK();
@@ -453,9 +482,10 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     using type = typename std::make_unsigned<NativeT>::type;
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[multiply],
-        ElementWiseBinaryOp(multiply, [](ReturnT lhs_elem, ReturnT rhs_elem) {
-          return NativeT(type(lhs_elem) * type(rhs_elem));
-        }));
+        ElementWiseBinaryOp(multiply,
+                            [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) {
+                              return NativeT(type(lhs_elem) * type(rhs_elem));
+                            }));
     return Status::OK();
   }
 
@@ -467,40 +497,42 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleMultiply(HloInstruction* multiply) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[multiply],
-        ElementWiseBinaryOp(multiply, [](ReturnT lhs_elem, ReturnT rhs_elem) {
-          return lhs_elem * rhs_elem;
-        }));
+        ElementWiseBinaryOp(multiply,
+                            [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) {
+                              return lhs_elem * rhs_elem;
+                            }));
     return Status::OK();
   }
 
   Status HandleMultiply(HloInstruction* multiply) override {
-    return HandleMultiply<ReturnT>(multiply);
+    return HandleMultiply<ElementwiseT>(multiply);
   }
 
   Status HandleSubtract(HloInstruction* subtract) override {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[subtract],
-        ElementWiseBinaryOp(subtract, [](ReturnT lhs_elem, ReturnT rhs_elem) {
-          return lhs_elem - rhs_elem;
-        }));
+        ElementWiseBinaryOp(subtract,
+                            [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) {
+                              return lhs_elem - rhs_elem;
+                            }));
     return Status::OK();
   }
 
   Status HandleAdd(HloInstruction* add) override {
-    TF_ASSIGN_OR_RETURN(
-        parent_->evaluated_[add],
-        ElementWiseBinaryOp(add, [](ReturnT lhs_elem, ReturnT rhs_elem) {
-          return lhs_elem + rhs_elem;
-        }));
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[add],
+                        ElementWiseBinaryOp(add, [](ElementwiseT lhs_elem,
+                                                    ElementwiseT rhs_elem) {
+                          return lhs_elem + rhs_elem;
+                        }));
     return Status::OK();
   }
 
   Status HandleDivide(HloInstruction* divide) override {
-    TF_ASSIGN_OR_RETURN(
-        parent_->evaluated_[divide],
-        ElementWiseBinaryOp(divide, [](ReturnT lhs_elem, ReturnT rhs_elem) {
-          return lhs_elem / rhs_elem;
-        }));
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[divide],
+                        ElementWiseBinaryOp(divide, [](ElementwiseT lhs_elem,
+                                                       ElementwiseT rhs_elem) {
+                          return lhs_elem / rhs_elem;
+                        }));
     return Status::OK();
   }
 
@@ -510,7 +542,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleMaximum(HloInstruction* maximum) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[maximum],
-        ElementWiseBinaryOp(maximum, [](ReturnT lhs, ReturnT rhs) {
+        ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) {
           return std::fmax(lhs, rhs);
         }));
     return Status::OK();
@@ -524,18 +556,18 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleMaximum(HloInstruction* maximum) override {
-    return HandleMaximum<ReturnT>(maximum);
+    return HandleMaximum<ElementwiseT>(maximum);
   }
 
   template <
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleMinimum(HloInstruction* minimum) {
-    TF_ASSIGN_OR_RETURN(
-        parent_->evaluated_[minimum],
-        ElementWiseBinaryOp(minimum, [](ReturnT lhs_el, ReturnT rhs_el) {
-          return std::fmin(lhs_el, rhs_el);
-        }));
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[minimum],
+                        ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el,
+                                                        ElementwiseT rhs_el) {
+                          return std::fmin(lhs_el, rhs_el);
+                        }));
     return Status::OK();
   }
 
@@ -547,15 +579,15 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleMinimum(HloInstruction* minimum) override {
-    return HandleMinimum<ReturnT>(minimum);
+    return HandleMinimum<ElementwiseT>(minimum);
   }
 
   Status HandlePower(HloInstruction* power) override {
-    TF_ASSIGN_OR_RETURN(
-        parent_->evaluated_[power],
-        ElementWiseBinaryOp(power, [](ReturnT lhs_el, ReturnT rhs_el) {
-          return std::pow(lhs_el, rhs_el);
-        }));
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[power],
+                        ElementWiseBinaryOp(power, [](ElementwiseT lhs_el,
+                                                      ElementwiseT rhs_el) {
+                          return std::pow(lhs_el, rhs_el);
+                        }));
     return Status::OK();
   }
 
@@ -563,11 +595,11 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleRemainder(HloInstruction* remainder) {
-    TF_ASSIGN_OR_RETURN(
-        parent_->evaluated_[remainder],
-        ElementWiseBinaryOp(remainder, [](ReturnT lhs_el, ReturnT rhs_el) {
-          return std::fmod(lhs_el, rhs_el);
-        }));
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[remainder],
+                        ElementWiseBinaryOp(remainder, [](ElementwiseT lhs_el,
+                                                          ElementwiseT rhs_el) {
+                          return std::fmod(lhs_el, rhs_el);
+                        }));
     return Status::OK();
   }
 
@@ -579,7 +611,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleRemainder(HloInstruction* remainder) override {
-    return HandleRemainder<ReturnT>(remainder);
+    return HandleRemainder<ElementwiseT>(remainder);
   }
 
   template <typename NativeT,
@@ -588,7 +620,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleAnd(HloInstruction* and_) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[and_],
-        ElementWiseBinaryOp(and_, [](ReturnT lhs_el, ReturnT rhs_el) {
+        ElementWiseBinaryOp(and_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) {
           return lhs_el & rhs_el;
         }));
     return Status::OK();
@@ -599,7 +631,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleAnd(HloInstruction* and_) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[and_],
-        ElementWiseBinaryOp(and_, [](ReturnT lhs_el, ReturnT rhs_el) {
+        ElementWiseBinaryOp(and_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) {
           return lhs_el && rhs_el;
         }));
     return Status::OK();
@@ -613,7 +645,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleAnd(HloInstruction* and_) override {
-    return HandleAnd<ReturnT>(and_);
+    return HandleAnd<ElementwiseT>(and_);
   }
 
   template <typename NativeT,
@@ -622,7 +654,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleOr(HloInstruction* or_) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[or_],
-        ElementWiseBinaryOp(or_, [](ReturnT lhs_el, ReturnT rhs_el) {
+        ElementWiseBinaryOp(or_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) {
           return lhs_el | rhs_el;
         }));
     return Status::OK();
@@ -633,7 +665,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleOr(HloInstruction* or_) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[or_],
-        ElementWiseBinaryOp(or_, [](ReturnT lhs_el, ReturnT rhs_el) {
+        ElementWiseBinaryOp(or_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) {
           return lhs_el || rhs_el;
         }));
     return Status::OK();
@@ -647,7 +679,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleOr(HloInstruction* or_) override {
-    return HandleOr<ReturnT>(or_);
+    return HandleOr<ElementwiseT>(or_);
   }
 
   template <typename NativeT,
@@ -672,7 +704,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleShiftLeft(HloInstruction* shl) override {
-    return HandleShiftLeft<ReturnT>(shl);
+    return HandleShiftLeft<ElementwiseT>(shl);
   }
   template <typename NativeT,
             typename std::enable_if<
@@ -698,7 +730,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleShiftRightArithmetic(HloInstruction* shra) override {
-    return HandleShiftRightArithmetic<ReturnT>(shra);
+    return HandleShiftRightArithmetic<ElementwiseT>(shra);
   }
 
   template <typename NativeT,
@@ -725,19 +757,21 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleShiftRightLogical(HloInstruction* shrl) override {
-    return HandleShiftRightLogical<ReturnT>(shrl);
+    return HandleShiftRightLogical<ElementwiseT>(shrl);
   }
 
   template <
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleClamp(HloInstruction* clamp) {
-    std::function<ReturnT(ReturnT, ReturnT, ReturnT)> clamp_op =
-        [](ReturnT low, ReturnT value, ReturnT high) {
+    std::function<ElementwiseT(ElementwiseT, ElementwiseT, ElementwiseT)>
+        clamp_op = [](ElementwiseT low, ElementwiseT value, ElementwiseT high) {
           return std::fmax(low, std::fmin(value, high));
         };
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[clamp],
-                        ElementWiseTernaryOp(clamp, std::move(clamp_op)));
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[clamp],
+        ElementwiseTernaryOp(clamp,
+                             std::move(ConvertTernaryFunction(clamp_op))));
     return Status::OK();
   }
 
@@ -749,7 +783,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleClamp(HloInstruction* clamp) override {
-    return HandleClamp<ReturnT>(clamp);
+    return HandleClamp<ElementwiseT>(clamp);
   }
 
   Status HandleSelect(HloInstruction* select) override {
@@ -762,7 +796,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
           return on_false;
         };
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[select],
-                        ElementWiseTernaryOp(select, std::move(select_op)));
+                        ElementwiseTernaryOp(select, std::move(select_op)));
     return Status::OK();
   }
 
@@ -860,7 +894,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     DimensionVector rhs_spatial_index(dnums.kernel_spatial_dimensions_size());
 
     auto func = [&](tensorflow::gtl::ArraySlice<int64> out_index) {
-      ReturnT result_val = static_cast<ReturnT>(0);
+      ElementwiseT result_val = static_cast<ElementwiseT>(0);
 
       std::fill(lhs_index.begin(), lhs_index.end(), 0);
       std::fill(rhs_index.begin(), rhs_index.end(), 0);
@@ -911,13 +945,14 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                     : rhs_spatial_index[ki];
           }
 
-          result_val += lhs_literal.Get<ReturnT>(lhs_index) *
-                        rhs_literal.Get<ReturnT>(rhs_index);
+          result_val +=
+              static_cast<ElementwiseT>(lhs_literal.Get<ReturnT>(lhs_index)) *
+              static_cast<ElementwiseT>(rhs_literal.Get<ReturnT>(rhs_index));
         }
       cnt : {}
       } while (IndexUtil::BumpIndices(window_shape, &rhs_spatial_index));
 
-      return result_val;
+      return static_cast<ReturnT>(result_val);
     };
 
     auto result = Literal::CreateFromShape(result_shape);
@@ -967,7 +1002,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     auto result = Literal::CreateFromShape(dot->shape());
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
         [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
-          ReturnT result_val = static_cast<ReturnT>(0);
+          ElementwiseT result_val = static_cast<ElementwiseT>(0);
 
           std::vector<int64> lhs_index(lhs_rank, 0);
           std::vector<int64> rhs_index(rhs_rank, 0);
@@ -984,11 +1019,12 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
             lhs_index[lhs_contracted_dimension] = i;
             rhs_index[rhs_contracted_dimension] = i;
 
-            result_val += lhs_literal.Get<ReturnT>(lhs_index) *
-                          rhs_literal.Get<ReturnT>(rhs_index);
+            result_val +=
+                static_cast<ElementwiseT>(lhs_literal.Get<ReturnT>(lhs_index)) *
+                static_cast<ElementwiseT>(rhs_literal.Get<ReturnT>(rhs_index));
           }
 
-          return result_val;
+          return static_cast<ReturnT>(result_val);
         }));
 
     parent_->evaluated_[dot] = std::move(result);
@@ -1385,7 +1421,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                                   NativeT>::value>::type* = nullptr>
   Status HandleSin(HloInstruction* sin) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[sin],
-                        ElementWiseUnaryOp(sin, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(sin, [](ElementwiseT elem_operand) {
                           return std::sin(elem_operand);
                         }));
     return Status::OK();
@@ -1400,14 +1436,14 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleSin(HloInstruction* sin) override {
-    return HandleSin<ReturnT>(sin);
+    return HandleSin<ElementwiseT>(sin);
   }
 
   template <typename NativeT, typename std::enable_if<std::is_floating_point<
                                   NativeT>::value>::type* = nullptr>
   Status HandleCos(HloInstruction* cos) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[cos],
-                        ElementWiseUnaryOp(cos, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(cos, [](ElementwiseT elem_operand) {
                           return std::cos(elem_operand);
                         }));
     return Status::OK();
@@ -1422,7 +1458,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleCos(HloInstruction* cos) override {
-    return HandleCos<ReturnT>(cos);
+    return HandleCos<ElementwiseT>(cos);
   }
 
  private:
@@ -1487,16 +1523,21 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
 
   StatusOr<std::unique_ptr<Literal>> ElementWiseUnaryOp(
       HloInstruction* instruction,
-      const std::function<ReturnT(ReturnT)>& unary_op) {
+      const std::function<ElementwiseT(ElementwiseT)>& unary_op) {
     const Literal& operand_literal =
         parent_->GetEvaluatedLiteralFor(instruction->operand(0));
-    return ElementWiseUnaryOpImpl<ReturnT, ReturnT>(instruction, unary_op,
-                                                    operand_literal);
+    TF_ASSIGN_OR_RETURN(
+        auto result_literal,
+        (ElementWiseUnaryOpImpl<ReturnT, ReturnT>(
+            instruction, ConvertUnaryFunction(unary_op), operand_literal)));
+
+    return std::move(result_literal);
   }
 
   StatusOr<std::unique_ptr<Literal>> ElementWiseBinaryOp(
       HloInstruction* instruction,
-      const std::function<ReturnT(ReturnT, ReturnT)>& binary_op) {
+      const std::function<ElementwiseT(ElementwiseT, ElementwiseT)>&
+          binary_op) {
     const auto shape = instruction->shape();
     const auto* lhs = instruction->operand(0);
     const auto* rhs = instruction->operand(1);
@@ -1520,14 +1561,15 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
 
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
         [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
-          return binary_op(lhs_literal.Get<ReturnT>(multi_index),
-                           rhs_literal.Get<ReturnT>(multi_index));
+          return ConvertBinaryFunction(binary_op)(
+              lhs_literal.Get<ReturnT>(multi_index),
+              rhs_literal.Get<ReturnT>(multi_index));
         }));
     return std::move(result);
   }
 
   template <typename LhsType, typename RhsType, typename EhsType>
-  StatusOr<std::unique_ptr<Literal>> ElementWiseTernaryOp(
+  StatusOr<std::unique_ptr<Literal>> ElementwiseTernaryOp(
       HloInstruction* instruction,
       const std::function<ReturnT(LhsType, RhsType, EhsType)>& ternary_op) {
     const auto shape = instruction->shape();
@@ -1589,9 +1631,11 @@ HloEvaluator::HloEvaluator() {
   typed_visitors_[F64] = MakeUnique<TypedVisitor<double>>(this);
   typed_visitors_[C64] = MakeUnique<TypedVisitor<complex64>>(this);
 
-  typed_visitors_[BF16] = MakeUnique<FunctionVisitor>([](HloInstruction*) {
-    return Unimplemented("HloEvaluator: unhandled primitive type: BF16.");
-  });
+  // Most of the evaluator computations we use don't support BF16 (e.g.,
+  // std::ceil, std::tanh). To make evaluator work with BF16, we set all
+  // elementwise computations to be done in F32 and do BF16<->F32 conversion
+  // around the input and the output of the computations.
+  typed_visitors_[BF16] = MakeUnique<TypedVisitor<bfloat16, float>>(this);
   typed_visitors_[TUPLE] = MakeUnique<FunctionVisitor>([](HloInstruction*) {
     return Unimplemented("HloEvaluator: unhandled primitive type: TUPLE.");
   });
@@ -1722,6 +1766,7 @@ StatusOr<std::unique_ptr<Literal>> HloEvaluator::EvaluateWithSubstitutions(
 }
 
 Status HloEvaluator::HandleParameter(HloInstruction* parameter) {
+  CHECK_LT(parameter->parameter_number(), arg_literals_.size());
   const Literal* input_literal = arg_literals_[parameter->parameter_number()];
   VLOG(2) << "Parameter evaluated to: " << input_literal->ToString();
   DCHECK(ShapeUtil::Equal(parameter->shape(), input_literal->shape()));
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index 7557aaa248..e7f6254a0c 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -105,7 +105,11 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // always boolean.
   // These operations are handled outside of the parent HloEvaluator handlers
   // instead of from within TypedVisitor.
-  template <typename ReturnT>
+  //
+  // Type params:
+  //   - ReturnT: The type of input and output of each operation.
+  //   - ElementwiseT: The type in which internal computation are done.
+  template <typename ReturnT, typename ElementwiseT = ReturnT>
   class TypedVisitor;
 
   // Wraps around instruction handling to infer types before dispatching to
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index a5d39fe086..97697d06b7 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -25,8 +25,10 @@ limitations under the License.
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/reference_util.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_element_type_converter.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/test.h"
@@ -35,15 +37,33 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
 namespace {
 
-class HloEvaluatorTest : public HloVerifiedTestBase {
+static std::array<bool, 2> use_bf16_params{true, false};
+
+class HloEvaluatorTest : public ::testing::WithParamInterface<bool>,
+                         public HloVerifiedTestBase {
  protected:
-  HloEvaluatorTest() { evaluator_ = MakeUnique<HloEvaluator>(); }
+  HloEvaluatorTest() : use_bfloat16_(GetParam()) {
+    evaluator_ = MakeUnique<HloEvaluator>();
+  }
+
+  std::unique_ptr<Literal> Evaluate(
+      tensorflow::gtl::ArraySlice<const Literal*> arg_literals = {}) {
+    if (use_bfloat16_) {
+      // In BF16 mode, we convert all F32 type to BF16 and evaluate the module.
+      auto type_converter = HloElementTypeConverter(F32, BF16);
+      type_converter.Run(&module()).ValueOrDie();
+    }
+    return evaluator_->Evaluate(*module().entry_computation(), arg_literals)
+        .ConsumeValueOrDie();
+  }
 
   std::unique_ptr<HloEvaluator> evaluator_;
 
@@ -52,12 +72,11 @@ class HloEvaluatorTest : public HloVerifiedTestBase {
     HloComputation::Builder b(TestName());
     auto c1 =
         b.AddInstruction(HloInstruction::CreateConstant(std::move(input)));
-    auto instruction = b.AddInstruction(
+    b.AddInstruction(
         HloInstruction::CreateUnary(expected->shape(), opcode, c1));
     module().AddEntryComputation(b.Build());
 
-    std::unique_ptr<Literal> result =
-        evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+    std::unique_ptr<Literal> result = Evaluate();
 
     auto element_type = expected->shape().element_type();
     if (element_type == F32 || element_type == F64) {
@@ -74,20 +93,24 @@ class HloEvaluatorTest : public HloVerifiedTestBase {
     HloComputation::Builder b(TestName());
     auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs)));
     auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs)));
-    auto instruction = b.AddInstruction(
+    b.AddInstruction(
         HloInstruction::CreateBinary(expected->shape(), opcode, c1, c2));
     module().AddEntryComputation(b.Build());
 
-    std::unique_ptr<Literal> result =
-        evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+    std::unique_ptr<Literal> result = Evaluate();
 
     LiteralTestUtil::ExpectEqual(*expected, *result);
   }
+
+  bool use_bfloat16_;
 };
 
+#define XLA_TYPED_TEST_P(test_case_name, test_name, test_type1) \
+  TEST_P(test_case_name, test_name)
+
 // Verifies that HloEvaluator evaluates a HLO instruction that performs clamp
 // with 3 operands.
-TEST_F(HloEvaluatorTest, DoesClamp) {
+TEST_P(HloEvaluatorTest, DoesClamp) {
   auto low = Literal::CreateR2<float>({{0.f, 2.f}, {2.f, 4.f}});
   auto value = Literal::CreateR2<float>({{0.f, 5.f}, {0.f, 4.f}});
   auto high = Literal::CreateR2<float>({{2.f, 4.f}, {4.f, 4.f}});
@@ -97,19 +120,18 @@ TEST_F(HloEvaluatorTest, DoesClamp) {
   auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(low)));
   auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(value)));
   auto c3 = b.AddInstruction(HloInstruction::CreateConstant(std::move(high)));
-  auto instruction = b.AddInstruction(
+  b.AddInstruction(
       HloInstruction::CreateTernary(shape, HloOpcode::kClamp, c1, c2, c3));
   module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({{0, 4}, {2, 4}});
 
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DISABLED_DoesClampSpecialBroadcast) {
+TEST_P(HloEvaluatorTest, DISABLED_DoesClampSpecialBroadcast) {
   auto low = Literal::CreateR0<float>(0.f);
   auto value = Literal::CreateR2<float>({{-1.f, 0.f}, {1.f, 2.f}});
   auto high = Literal::CreateR0<float>(1.f);
@@ -119,12 +141,11 @@ TEST_F(HloEvaluatorTest, DISABLED_DoesClampSpecialBroadcast) {
   auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(low)));
   auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(value)));
   auto c3 = b.AddInstruction(HloInstruction::CreateConstant(std::move(high)));
-  auto instruction = b.AddInstruction(
+  b.AddInstruction(
       HloInstruction::CreateTernary(shape, HloOpcode::kClamp, c1, c2, c3));
   module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({{0, 0}, {1, 1}});
 
@@ -133,7 +154,7 @@ TEST_F(HloEvaluatorTest, DISABLED_DoesClampSpecialBroadcast) {
 
 // Verifies that HloEvaluator evaluates a HLO instruction that performs select
 // with 3 operands.
-TEST_F(HloEvaluatorTest, DoesSelect) {
+TEST_P(HloEvaluatorTest, DoesSelect) {
   auto pred = Literal::CreateR2<bool>({{true, false}, {false, true}});
   auto on_true = Literal::CreateR2<float>({{2.f, 4.f}, {4.f, 4.f}});
   auto on_false = Literal::CreateR2<float>({{0.f, 5.f}, {0.f, 4.f}});
@@ -145,12 +166,11 @@ TEST_F(HloEvaluatorTest, DoesSelect) {
       b.AddInstruction(HloInstruction::CreateConstant(std::move(on_true)));
   auto c3 =
       b.AddInstruction(HloInstruction::CreateConstant(std::move(on_false)));
-  auto instruction = b.AddInstruction(
+  b.AddInstruction(
       HloInstruction::CreateTernary(shape, HloOpcode::kSelect, c1, c2, c3));
   module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate({});
 
   auto expected = Literal::CreateR2<float>({{2, 5}, {0, 4}});
 
@@ -159,7 +179,7 @@ TEST_F(HloEvaluatorTest, DoesSelect) {
 
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise addition with 2 operands.
-TEST_F(HloEvaluatorTest, DoesAdd) {
+TEST_P(HloEvaluatorTest, DoesAdd) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto expected = Literal::CreateR2<int64>({{3, 4}, {-96, 8}});
@@ -168,7 +188,7 @@ TEST_F(HloEvaluatorTest, DoesAdd) {
 }
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise and with 2 operands.
-TEST_F(HloEvaluatorTest, DoesAnd) {
+TEST_P(HloEvaluatorTest, DoesAnd) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto expected = Literal::CreateR2<int64>({{0, 0}, {4, 4}});
@@ -177,7 +197,7 @@ TEST_F(HloEvaluatorTest, DoesAnd) {
 }
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise or with 2 operands.
-TEST_F(HloEvaluatorTest, DoesOr) {
+TEST_P(HloEvaluatorTest, DoesOr) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto expected = Literal::CreateR2<int64>({{3, 4}, {-100, 4}});
@@ -186,7 +206,7 @@ TEST_F(HloEvaluatorTest, DoesOr) {
 }
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise multiply with 2 operands.
-TEST_F(HloEvaluatorTest, DoesMultiply) {
+TEST_P(HloEvaluatorTest, DoesMultiply) {
   auto lhs = Literal::CreateR2<int32>({{-1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int32>(
       {{std::numeric_limits<int32>::min(), 4}, {4, 4}});
@@ -197,14 +217,14 @@ TEST_F(HloEvaluatorTest, DoesMultiply) {
 }
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise divide with 2 operands.
-TEST_F(HloEvaluatorTest, DoesDivideInt64) {
+TEST_P(HloEvaluatorTest, DoesDivideInt64) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto expected = Literal::CreateR2<int64>({{0, 0}, {-25, 1}});
   TestBinaryOp(HloOpcode::kDivide, std::move(expected), std::move(lhs),
                std::move(rhs));
 }
-TEST_F(HloEvaluatorTest, DoesDivideDouble) {
+TEST_P(HloEvaluatorTest, DoesDivideDouble) {
   auto lhs = Literal::CreateR2<double>({{1.0, 0.0}, {-100.0, 4.0}});
   auto rhs = Literal::CreateR2<double>({{2.2, 4.0}, {4.0, 4.0}});
   auto expected =
@@ -215,40 +235,41 @@ TEST_F(HloEvaluatorTest, DoesDivideDouble) {
 
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise abs op with 1 operand.
-TEST_F(HloEvaluatorTest, DoesAbsR2) {
+TEST_P(HloEvaluatorTest, DoesAbsR2) {
   auto operand = Literal::CreateR2<int64>({{1, -20}, {-100, 4}});
   auto expected = Literal::CreateR2<int64>({{1, 20}, {100, 4}});
   TestUnaryOp(HloOpcode::kAbs, std::move(expected), std::move(operand));
 }
-TEST_F(HloEvaluatorTest, DoesAbsR0) {
+TEST_P(HloEvaluatorTest, DoesAbsR0) {
   auto operand = Literal::CreateR0<float>(-1.0f);
   auto expected = Literal::CreateR0<float>(1.0f);
   TestUnaryOp(HloOpcode::kAbs, std::move(expected), std::move(operand));
 }
-TEST_F(HloEvaluatorTest, DoesAbsR1WithZeroSize) {
+TEST_P(HloEvaluatorTest, DoesAbsR1WithZeroSize) {
   auto operand = Literal::CreateR1<float>({});
   auto expected = Literal::CreateR1<float>({});
   TestUnaryOp(HloOpcode::kAbs, std::move(expected), std::move(operand));
 }
-TEST_F(HloEvaluatorTest, DoesNegateR2) {
+TEST_P(HloEvaluatorTest, DoesNegateR2) {
   auto operand = Literal::CreateR2<int32>(
       {{0, std::numeric_limits<int32>::min()}, {-1, 4}});
   auto expected =
       Literal::CreateR2<int32>({{0, std::numeric_limits<int>::min()}, {1, -4}});
   TestUnaryOp(HloOpcode::kNegate, std::move(expected), std::move(operand));
 }
-TEST_F(HloEvaluatorTest, DoesCosR2) {
+TEST_P(HloEvaluatorTest, DoesCosR2) {
   auto operand = Literal::CreateR2<float>({{0, M_PI}, {-M_PI, 2 * M_PI}});
   auto expected = Literal::CreateR2<float>({{1, -1}, {-1, 1}});
-  TestUnaryOp(HloOpcode::kCos, std::move(expected), std::move(operand));
+  TestUnaryOp(HloOpcode::kCos, std::move(expected), std::move(operand),
+              use_bfloat16_ ? 0x1.0P-5 : 0x1.0P-20);
 }
-TEST_F(HloEvaluatorTest, DoesSinR2) {
+TEST_P(HloEvaluatorTest, DoesSinR2) {
   auto operand = Literal::CreateR2<float>({{0, M_PI}, {-M_PI, 2 * M_PI}});
   auto expected = Literal::CreateR2<float>({{0, 0}, {0, 0}});
   TestUnaryOp(HloOpcode::kSin, std::move(expected), std::move(operand),
-              0x1.0P-20);
+              use_bfloat16_ ? 0x1.0P-5 : 0x1.0P-20);
 }
-TEST_F(HloEvaluatorTest, DoesNotR2) {
+TEST_P(HloEvaluatorTest, DoesNotR2) {
   auto operand =
       Literal::CreateR2<int32>({{0, std::numeric_limits<int>::min()},
                                 {-1, std::numeric_limits<int>::max()}});
@@ -259,7 +280,7 @@ TEST_F(HloEvaluatorTest, DoesNotR2) {
 }
 // Verifies that HloEvaluator evaluates a HLO Computation with non-parameter nor
 // constant operands.
-TEST_F(HloEvaluatorTest, DoesTraverseInstructions) {
+TEST_P(HloEvaluatorTest, DoesTraverseInstructions) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto rhs2 = Literal::CreateR2<int64>({{1, -20}, {-100, 4}});
@@ -279,10 +300,9 @@ TEST_F(HloEvaluatorTest, DoesTraverseInstructions) {
       b.AddInstruction(HloInstruction::CreateParameter(2, shape, "rhs2"));
   b.AddInstruction(HloInstruction::CreateBinary(shape, HloOpcode::kAdd,
                                                 lhs_instruction, param_rhs2));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, args).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate(args);
 
   auto expected = Literal::CreateR2<int64>({{4, -16}, {-196, 12}});
 
@@ -290,7 +310,7 @@ TEST_F(HloEvaluatorTest, DoesTraverseInstructions) {
 }
 
 // Verifies Reshape operation is correctly evaluated.
-TEST_F(HloEvaluatorTest, DoesReshape) {
+TEST_P(HloEvaluatorTest, DoesReshape) {
   HloComputation::Builder b(TestName());
   const int64 dimensions[] = {11, 8, 7, 5, 9};
   TF_ASSERT_OK_AND_ASSIGN(auto literal,
@@ -304,21 +324,20 @@ TEST_F(HloEvaluatorTest, DoesReshape) {
   const int64 permutation[] = {1, 2, 0, 4, 3};
   b.AddInstruction(
       HloInstruction::CreateTranspose(shape, literal_instruction, permutation));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate({});
 
   using NativeT = typename primitive_util::PrimitiveTypeToNative<F32>::type;
   result->EachCell<NativeT>(
       [&](tensorflow::gtl::ArraySlice<int64> indices, NativeT value) {
         std::vector<int64> rindexes = Permute(permutation, indices);
-        EXPECT_TRUE(value == literal_clone->Get<NativeT>(rindexes));
+        EXPECT_NEAR(value, literal_clone->Get<NativeT>(rindexes), 0x1.0P-5);
       });
 }
 
 // Verifies Broadcast operation is correctly evaluated.
-TEST_F(HloEvaluatorTest, DoesBroadcast) {
+TEST_P(HloEvaluatorTest, DoesBroadcast) {
   HloComputation::Builder b(TestName());
   auto input_literal = Literal::CreateR2<int32>({{1, 2}, {3, 4}, {5, 6}});
   auto output_literal = Literal::CreateR3<int32>(
@@ -327,15 +346,14 @@ TEST_F(HloEvaluatorTest, DoesBroadcast) {
       HloInstruction::CreateConstant(std::move(input_literal)));
   b.AddInstruction(HloInstruction::CreateBroadcast(
       output_literal->shape(), literal_instruction, {1, 2}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate({});
 
   LiteralTestUtil::ExpectEqual(*result, *output_literal);
 }
 
-TEST_F(HloEvaluatorTest, DoesBroadcastScalar) {
+TEST_P(HloEvaluatorTest, DoesBroadcastScalar) {
   HloComputation::Builder b(TestName());
   auto input_literal = Literal::CreateR0<int32>(111);
   auto output_literal = Literal::CreateR2<int32>(
@@ -347,15 +365,14 @@ TEST_F(HloEvaluatorTest, DoesBroadcastScalar) {
   b.AddInstruction(HloInstruction::CreateBroadcast(
       output_literal->shape(), literal_instruction,
       /*broadcast_dimensions=*/{}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate({});
 
   LiteralTestUtil::ExpectEqual(*result, *output_literal);
 }
 
-TEST_F(HloEvaluatorTest, DoesConcatenateSimple) {
+TEST_P(HloEvaluatorTest, DoesConcatenateSimple) {
   HloComputation::Builder b(TestName());
 
   HloInstruction* operand1 = b.AddInstruction(HloInstruction::CreateConstant(
@@ -368,17 +385,16 @@ TEST_F(HloEvaluatorTest, DoesConcatenateSimple) {
   Shape shape = ShapeUtil::MakeShape(S64, {4, 2});
   b.AddInstruction(HloInstruction::CreateConcatenate(shape, operands, 0));
 
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected =
       Literal::CreateR2<int64>({{-1, -2}, {100, 200}, {-2, -3}, {-100, -200}});
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ConcatenateHandlesShapeWithZeroElement) {
+TEST_P(HloEvaluatorTest, ConcatenateHandlesShapeWithZeroElement) {
   HloComputation::Builder b(TestName());
 
   HloInstruction* operand1 = b.AddInstruction(
@@ -391,16 +407,15 @@ TEST_F(HloEvaluatorTest, ConcatenateHandlesShapeWithZeroElement) {
   Shape shape = ShapeUtil::MakeShape(S64, {2});
   b.AddInstruction(HloInstruction::CreateConcatenate(shape, operands, 0));
 
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR1<int64>({100, 200});
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ConvertWithSameLayout) {
+TEST_P(HloEvaluatorTest, ConvertWithSameLayout) {
   HloComputation::Builder b(TestName());
 
   auto input_literal = Literal::CreateR2<int32>({{1, 2}, {3, 4}, {5, 6}});
@@ -412,15 +427,14 @@ TEST_F(HloEvaluatorTest, ConvertWithSameLayout) {
   HloInstruction* constant = b.AddInstruction(
       HloInstruction::CreateConstant(std::move(input_literal)));
   b.AddInstruction(HloInstruction::CreateConvert(expected->shape(), constant));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   LiteralTestUtil::ExpectEqual(*result, *expected);
 }
 
-TEST_F(HloEvaluatorTest, ConvertWithDifferentLayout) {
+TEST_P(HloEvaluatorTest, ConvertWithDifferentLayout) {
   HloComputation::Builder b(TestName());
 
   auto input_literal = Literal::CreateR2WithLayout<int32>(
@@ -433,10 +447,9 @@ TEST_F(HloEvaluatorTest, ConvertWithDifferentLayout) {
   HloInstruction* constant = b.AddInstruction(
       HloInstruction::CreateConstant(std::move(input_literal)));
   b.AddInstruction(HloInstruction::CreateConvert(expected->shape(), constant));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   LiteralTestUtil::ExpectEqual(*result, *expected);
 }
@@ -454,7 +467,7 @@ PaddingConfig CreatePaddingConfig(
   return padding_config;
 }
 
-TEST_F(HloEvaluatorTest, Pad2DIntegerArrayWithZeroDimension) {
+TEST_P(HloEvaluatorTest, Pad2DIntegerArrayWithZeroDimension) {
   auto operand = Literal::CreateR2<int32>({{}, {}});
   HloComputation::Builder b(TestName());
   auto operand_instruction =
@@ -467,11 +480,11 @@ TEST_F(HloEvaluatorTest, Pad2DIntegerArrayWithZeroDimension) {
 
   auto padding_config = CreatePaddingConfig({{{1, 0, 2}}, {{0, 2, 1}}});
   Shape shape = ShapeUtil::MakeShape(S32, {5, 2});
-  auto pad_instruction = b.AddInstruction(HloInstruction::CreatePad(
+  b.AddInstruction(HloInstruction::CreatePad(
       shape, operand_instruction, padding_value_instruction, padding_config));
   module().AddEntryComputation(b.Build());
 
-  auto result = evaluator_->Evaluate(pad_instruction).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<int32>(
       {{10, 10}, {10, 10}, {10, 10}, {10, 10}, {10, 10}});
@@ -479,7 +492,7 @@ TEST_F(HloEvaluatorTest, Pad2DIntegerArrayWithZeroDimension) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, Pad4DFloatArrayWithInteriorPadding) {
+TEST_P(HloEvaluatorTest, Pad4DFloatArrayWithInteriorPadding) {
   HloComputation::Builder b(TestName());
 
   Array4D<float> input_array(3, 2, 1, 1, {1, 2, 3, 4, 5, 6});
@@ -496,10 +509,9 @@ TEST_F(HloEvaluatorTest, Pad4DFloatArrayWithInteriorPadding) {
       CreatePaddingConfig({{{1, 0, 2}}, {{0, 2, 1}}, {{0, 0, 0}}, {{0, 0, 0}}});
   b.AddInstruction(HloInstruction::CreatePad(
       shape, input_instruction, pad_instruction, r4_padding_on_dim0_dim1));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected_array = MakeUnique<Array4D<float>>(8, 5, 1, 1);
   expected_array->Fill(kPadValue);
@@ -515,7 +527,7 @@ TEST_F(HloEvaluatorTest, Pad4DFloatArrayWithInteriorPadding) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, NegativePadding2D) {
+TEST_P(HloEvaluatorTest, NegativePadding2D) {
   HloComputation::Builder b(TestName());
 
   // input_array:
@@ -541,10 +553,9 @@ TEST_F(HloEvaluatorTest, NegativePadding2D) {
                                              pad_value_instruction,
                                              r2_padding_on_dim0_dim1));
 
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   // f32[1,5] { 7.0, 2.718, 2.718, 2.718, 2.718 }
   auto expected_array = MakeUnique<Array2D<float>>(1, 5);
@@ -555,10 +566,10 @@ TEST_F(HloEvaluatorTest, NegativePadding2D) {
   (*expected_array)(0, 4) = 2.718f;
   auto expected = Literal::CreateR2FromArray2D<float>(*expected_array);
 
-  LiteralTestUtil::ExpectEqual(*expected, *result);
+  LiteralTestUtil::ExpectNear(*expected, *result, ErrorSpec(0x1.0P-5));
 }
 
-TEST_F(HloEvaluatorTest, NegativeAndInteriorPadding2D) {
+TEST_P(HloEvaluatorTest, NegativeAndInteriorPadding2D) {
   HloComputation::Builder b(TestName());
 
   // f32[4,3] {
@@ -587,10 +598,9 @@ TEST_F(HloEvaluatorTest, NegativeAndInteriorPadding2D) {
                                              pad_value_instruction,
                                              r2_padding_on_dim0_dim1));
 
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected_array = MakeUnique<Array2D<float>>(0, 9);
   auto expected = Literal::CreateR2FromArray2D<float>(*expected_array);
@@ -598,7 +608,7 @@ TEST_F(HloEvaluatorTest, NegativeAndInteriorPadding2D) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DotRank2AndRank1) {
+TEST_P(HloEvaluatorTest, DotRank2AndRank1) {
   HloComputation::Builder b(TestName());
 
   // lhs:
@@ -626,10 +636,9 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank1) {
   dot_dnums.add_rhs_contracting_dimensions(0);
   b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction,
                                              rhs_instruction, dot_dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   // clang-format off
   auto expected_array = Array2D<float>({
@@ -644,7 +653,7 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank1) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DotRank1AndRank2) {
+TEST_P(HloEvaluatorTest, DotRank1AndRank2) {
   HloComputation::Builder b(TestName());
 
   // lhs:
@@ -672,17 +681,16 @@ TEST_F(HloEvaluatorTest, DotRank1AndRank2) {
   dot_dnums.add_rhs_contracting_dimensions(0);
   b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction,
                                              rhs_instruction, dot_dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR1<float>({22.f, 28.f});
 
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DotRank2AndRank2) {
+TEST_P(HloEvaluatorTest, DotRank2AndRank2) {
   HloComputation::Builder b(TestName());
 
   // lhs:
@@ -716,10 +724,9 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank2) {
   dot_dnums.add_rhs_contracting_dimensions(0);
   b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction,
                                              rhs_instruction, dot_dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected_array = Array2D<float>({
       {22.f, 28.f},
@@ -732,7 +739,7 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank2) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, SimpleConv1D) {
+TEST_P(HloEvaluatorTest, SimpleConv1D) {
   HloComputation::Builder b(TestName());
 
   Array3D<float> lhs_array = {{{1, 2, 3}}};
@@ -770,10 +777,9 @@ TEST_F(HloEvaluatorTest, SimpleConv1D) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 3});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   Array3D<float> expected_array = {{{11.f, 18.f, 9.f}}};
   auto expected = Literal::CreateR3FromArray3D<float>(expected_array);
@@ -781,7 +787,7 @@ TEST_F(HloEvaluatorTest, SimpleConv1D) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
+TEST_P(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
   HloComputation::Builder b(TestName());
 
   Array4D<float> lhs_array(1, 1, 4, 4);
@@ -825,10 +831,9 @@ TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   Array4D<float> expected_array(1, 1, 4, 4);
   // clang-format off
@@ -844,7 +849,7 @@ TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) {
+TEST_P(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) {
   HloComputation::Builder b(TestName());
 
   // clang-format off
@@ -909,21 +914,22 @@ TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   // clang-format off
   // Result dimensions: [feature=1, height=1, batch=1, width=2]
   Array4D<float> expected_array({{{{2514, 2685}}}});
+  Array4D<float> expected_array_bf16({{{{2512, 2672}}}});
   // clang-format on
-  auto expected = Literal::CreateR4FromArray4D<float>(expected_array);
+  auto expected = Literal::CreateR4FromArray4D<float>(
+      use_bfloat16_ ? expected_array_bf16 : expected_array);
 
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) {
+TEST_P(HloEvaluatorTest, Conv2DGeneralDimensions) {
   HloComputation::Builder b(TestName());
 
   // clang-format off
@@ -985,21 +991,22 @@ TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   // clang-format off
   // Result dimensions: [feature=1, height=1, batch=1, width=2]
   Array4D<float> expected_array({{{{2514, 2685}}}});
+  Array4D<float> expected_array_bf16({{{{2512, 2672}}}});
   // clang-format on
-  auto expected = Literal::CreateR4FromArray4D<float>(expected_array);
+  auto expected = Literal::CreateR4FromArray4D<float>(
+      use_bfloat16_ ? expected_array_bf16 : expected_array);
 
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithHighPadding) {
+TEST_P(HloEvaluatorTest, DilatedBaseConv2DWithHighPadding) {
   HloComputation::Builder b(TestName());
 
   Array4D<float> lhs_array(1, 1, 4, 4);
@@ -1043,10 +1050,9 @@ TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithHighPadding) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 7, 7});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   Array4D<float> expected_array(1, 1, 7, 7);
   expected_array.FillWithYX(Array2D<float>({
@@ -1063,7 +1069,7 @@ TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithHighPadding) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithLowAndHighPadding) {
+TEST_P(HloEvaluatorTest, DilatedBaseConv2DWithLowAndHighPadding) {
   HloComputation::Builder b(TestName());
 
   Array4D<float> lhs_array(1, 1, 4, 4);
@@ -1107,10 +1113,9 @@ TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithLowAndHighPadding) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 8, 8});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   Array4D<float> expected_array(1, 1, 8, 8);
   expected_array.FillWithYX(Array2D<float>({
@@ -1128,7 +1133,7 @@ TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithLowAndHighPadding) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest,
+TEST_P(HloEvaluatorTest,
        DilatedWindowAndBaseConv2DWithDifferentLowAndHighPaddingAndStrides) {
   HloComputation::Builder b(TestName());
 
@@ -1179,10 +1184,9 @@ TEST_F(HloEvaluatorTest,
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 9, 3});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   Array4D<float> expected_array(1, 1, 9, 3);
   expected_array.FillWithYX(Array2D<float>({
@@ -1201,7 +1205,7 @@ TEST_F(HloEvaluatorTest,
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ReduceAdd) {
+TEST_P(HloEvaluatorTest, ReduceAdd) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1234,17 +1238,16 @@ TEST_F(HloEvaluatorTest, ReduceAdd) {
       HloInstruction::CreateReduce(shape, arg_instruction, init_value,
                                    /*dimensions_to_reduce=*/{1}, add_func));
 
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR1<float>({6, 18});
 
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ReduceWindowMax) {
+TEST_P(HloEvaluatorTest, ReduceWindowMax) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1287,15 +1290,15 @@ TEST_F(HloEvaluatorTest, ReduceWindowMax) {
   b.AddInstruction(HloInstruction::CreateReduceWindow(
       shape, arg_instruction, init_value, window, max_func));
 
-  auto computation = module().AddEntryComputation(b.Build());
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({{6, 7}});
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ReduceWindowAdd) {
+TEST_P(HloEvaluatorTest, ReduceWindowAdd) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1344,15 +1347,15 @@ TEST_F(HloEvaluatorTest, ReduceWindowAdd) {
   b.AddInstruction(HloInstruction::CreateReduceWindow(
       shape, arg_instruction, init_value, window, add_func));
 
-  auto computation = module().AddEntryComputation(b.Build());
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({{1, 3, 5}, {5, 11, 13}});
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ReduceWindowAdd6D) {
+TEST_P(HloEvaluatorTest, ReduceWindowAdd6D) {
   HloComputation::Builder b(TestName());
 
   // arg: f32[4,4,4,4,4,4] full of ones. Using small dims to limit run-time.
@@ -1405,9 +1408,9 @@ TEST_F(HloEvaluatorTest, ReduceWindowAdd6D) {
   b.AddInstruction(HloInstruction::CreateReduceWindow(
       shape, arg_instruction, init_value, window, add_func));
 
-  auto computation = module().AddEntryComputation(b.Build());
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result = Evaluate();
 
   std::vector<int64> output_dims = {4, 3, 3, 3, 4, 4};
   std::unique_ptr<Literal> result_literal =
@@ -1415,7 +1418,7 @@ TEST_F(HloEvaluatorTest, ReduceWindowAdd6D) {
   LiteralTestUtil::ExpectEqual(*result_literal, *result);
 }
 
-TEST_F(HloEvaluatorTest, StridedSlice) {
+TEST_P(HloEvaluatorTest, StridedSlice) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1436,10 +1439,9 @@ TEST_F(HloEvaluatorTest, StridedSlice) {
                                                /*start_indices=*/{0, 2},
                                                /*limit_indices=*/{3, 5},
                                                /*strides=*/{2, 3}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({
       {3},
@@ -1449,7 +1451,7 @@ TEST_F(HloEvaluatorTest, StridedSlice) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DynamicSlice) {
+TEST_P(HloEvaluatorTest, DynamicSlice) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1470,10 +1472,9 @@ TEST_F(HloEvaluatorTest, DynamicSlice) {
   Shape shape = ShapeUtil::MakeShape(F32, {2, 3});
   b.AddInstruction(HloInstruction::CreateDynamicSlice(shape, operand,
                                                       start_indices, {2, 3}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({
       {2, 3, 4},
@@ -1485,7 +1486,7 @@ TEST_F(HloEvaluatorTest, DynamicSlice) {
 
 // Verifies that the HloEvaluator's implementation goes along with existing
 // backends' behavior, although this is not required by the spec.
-TEST_F(HloEvaluatorTest, DynamicSliceModSlice) {
+TEST_P(HloEvaluatorTest, DynamicSliceModSlice) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1506,10 +1507,9 @@ TEST_F(HloEvaluatorTest, DynamicSliceModSlice) {
   Shape shape = ShapeUtil::MakeShape(F32, {2, 3});
   b.AddInstruction(HloInstruction::CreateDynamicSlice(shape, operand,
                                                       start_indices, {2, 3}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({
       {2, 3, 4},
@@ -1519,7 +1519,7 @@ TEST_F(HloEvaluatorTest, DynamicSliceModSlice) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DynamicSliceUpdate) {
+TEST_P(HloEvaluatorTest, DynamicSliceUpdate) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1543,10 +1543,9 @@ TEST_F(HloEvaluatorTest, DynamicSliceUpdate) {
   Shape shape = ShapeUtil::MakeShape(F64, {2, 3});
   b.AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
       shape, operand, update, start_indices));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<double>({
       {1, -2, -3},
@@ -1556,7 +1555,7 @@ TEST_F(HloEvaluatorTest, DynamicSliceUpdate) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, SetAndGetTuples) {
+TEST_P(HloEvaluatorTest, SetAndGetTuples) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1579,9 +1578,9 @@ TEST_F(HloEvaluatorTest, SetAndGetTuples) {
   Shape shape = ShapeUtil::MakeShape(F64, {2, 3});
   b.AddInstruction(HloInstruction::CreateGetTupleElement(shape, tuple, 1));
 
-  auto computation = module().AddEntryComputation(b.Build());
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<double>({
       {1, 2, 3},
@@ -1591,7 +1590,7 @@ TEST_F(HloEvaluatorTest, SetAndGetTuples) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, SetAndGetNestedTuples) {
+TEST_P(HloEvaluatorTest, SetAndGetNestedTuples) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1618,9 +1617,9 @@ TEST_F(HloEvaluatorTest, SetAndGetNestedTuples) {
   b.AddInstruction(
       HloInstruction::CreateGetTupleElement(tuple2->shape(), outer_tuple, 1));
 
-  auto computation = module().AddEntryComputation(b.Build());
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto result_inner_literal =
       Literal::CreateR2FromArray2D<double>(*operand_array);
@@ -1632,7 +1631,7 @@ TEST_F(HloEvaluatorTest, SetAndGetNestedTuples) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, Reverse) {
+TEST_P(HloEvaluatorTest, Reverse) {
   HloComputation::Builder b(TestName());
 
   // Input shape is float[4x3x2x1].
@@ -1658,10 +1657,9 @@ TEST_F(HloEvaluatorTest, Reverse) {
 
   const Shape shape = ShapeUtil::MakeShape(F32, {4, 3, 2, 1});
   b.AddInstruction(HloInstruction::CreateReverse(shape, operand, {0, 1}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   // clang-format off
   auto expected = Literal::CreateR4FromArray4D<float>({
@@ -1686,7 +1684,7 @@ TEST_F(HloEvaluatorTest, Reverse) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, EvaluateWithSubstitutions) {
+TEST_P(HloEvaluatorTest, EvaluateWithSubstitutions) {
   HloComputation::Builder b(TestName());
   Shape shape = ShapeUtil::MakeShape(F32, {4});
 
@@ -1709,7 +1707,7 @@ TEST_F(HloEvaluatorTest, EvaluateWithSubstitutions) {
 
 // Check that EvaluateWithSubstitutions works if one of the operands to the op
 // we're evaluating is a constant.
-TEST_F(HloEvaluatorTest, EvaluateWithSubstitutionsWithConstantOperand) {
+TEST_P(HloEvaluatorTest, EvaluateWithSubstitutionsWithConstantOperand) {
   HloComputation::Builder b(TestName());
   Shape shape = ShapeUtil::MakeShape(F32, {4});
 
@@ -1731,5 +1729,8 @@ TEST_F(HloEvaluatorTest, EvaluateWithSubstitutionsWithConstantOperand) {
                                *result.ValueOrDie());
 }
 
+INSTANTIATE_TEST_CASE_P(HloEvaluatorTest_Instantiation, HloEvaluatorTest,
+                        ::testing::ValuesIn(use_bf16_params));
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index 70563d53ef..edd952b824 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -191,7 +191,22 @@ inline bool operator>(bfloat16 a, bfloat16 b) {
 inline bool operator>=(bfloat16 a, bfloat16 b) {
   return static_cast<float>(a) >= static_cast<float>(b);
 }
-
+inline bfloat16& operator+=(bfloat16& a, bfloat16 b) {
+  a = a + b;
+  return a;
+}
+inline bfloat16& operator-=(bfloat16& a, bfloat16 b) {
+  a = a - b;
+  return a;
+}
+inline bfloat16& operator*=(bfloat16& a, bfloat16 b) {
+  a = a * b;
+  return a;
+}
+inline bfloat16& operator/=(bfloat16& a, bfloat16 b) {
+  a = a / b;
+  return a;
+}
 }  // end namespace tensorflow
 
 namespace Eigen {
-- 
GitLab


From ca431de46797155f296639cd978f1d2c370c89d5 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 14 Dec 2017 17:50:27 -0800
Subject: [PATCH 1039/1225] Raise error if maximum_iterations argument to
 while_loop is defined in control flow context.

This also modifies dynamic_rnn to not provide a maximum_iterations
argument if it's called within control flow. This is a hopefully
temporary solution until we better support this usage.

PiperOrigin-RevId: 179125216
---
 .../kernel_tests/control_flow_ops_py_test.py  | 13 +++++++++++
 tensorflow/python/ops/control_flow_ops.py     | 23 +++++++++++++++++++
 tensorflow/python/ops/rnn.py                  | 12 +++++++++-
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 5b0abaa2eb..7f2c2545dc 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -747,6 +747,19 @@ class ControlFlowTest(test.TestCase):
           maximum_iterations=1)
       self.assertEqual(1, r.eval())
 
+  def testInvalidMaximumIterationsContext(self):
+    def outer_body(i, r):
+      r = control_flow_ops.while_loop(lambda i: i < 3, lambda i: i + 1, [0],
+                                      maximum_iterations=r.shape[0])
+      return i, r
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        "maximum_iterations tensor cannot be declared in tf.cond or "
+        "tf.while_loop"):
+      control_flow_ops.while_loop(lambda i, r: i < 3, outer_body,
+                                  [0, constant_op.constant([1])])
+
   # Have more than 10 parallel iterations and hence exercise k-bound
   # most of the time.
   def testWhile_3(self):
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 3418f33717..cb5f4a66fd 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -2897,6 +2897,29 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
       if maximum_iterations.shape.ndims != 0:
         raise ValueError("maximum_iterations must be a scalar, saw shape: %s" %
                          maximum_iterations.shape)
+
+      # If/when we generated the gradient for this while loop, the
+      # maximum_iterations tensor will be used as the input to any generated
+      # stack ops. It's likely the stacks will be outside any control flow
+      # context (i.e. if gradients() is called outside any control flow
+      # context), which will result in the maximum_iterations tensor being an
+      # illegal input (see control_flow_util.CheckInputFromValidContext).
+      #
+      # NOTE(skyewm): we could technically allow tensors from CondContexts, but
+      # that will be error-prone and hard to reason about for users.
+      #
+      # TODO(skyewm): make this work (it's tricky).
+      # pylint: disable=protected-access
+      if (context.in_graph_mode() and
+          maximum_iterations.op._get_control_flow_context() is not None):
+        raise ValueError(
+            "maximum_iterations tensor cannot be declared in tf.cond or "
+            "tf.while_loop. Please file an issue at "
+            "https://github.com/tensorflow/tensorflow/issues if you require "
+            "this functionality. (Control flow context: %s)" %
+            maximum_iterations.op._get_control_flow_context().name)
+      # pylint: enable=protected-access
+
       counter = constant_op.constant(
           0, dtype=maximum_iterations.dtype, name="iteration_counter")
       orig_cond = cond
diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index fa48297672..ececc53719 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -811,12 +811,22 @@ def _dynamic_rnn_loop(cell,
   # variable, this will reduce the performance overheads of padding to a fixed
   # maximum length.
   loop_bound = time_steps
+
+  # This is a workaround since we cannot currently use maximum_iterations if
+  # time_steps is defined inside control flow, see the comment in
+  # control_flow_ops.py.
+  if (context.in_eager_mode() or
+      time_steps.op._get_control_flow_context() is None):  # pylint: disable=protected-access
+    maximum_iterations = time_steps
+  else:
+    maximum_iterations = None
+
   _, output_final_ta, final_state = control_flow_ops.while_loop(
       cond=lambda time, *_: time < loop_bound,
       body=_time_step,
       loop_vars=(time, output_ta, state),
       parallel_iterations=parallel_iterations,
-      maximum_iterations=time_steps,
+      maximum_iterations=maximum_iterations,
       swap_memory=swap_memory)
 
   # Unpack final output if not using output tuples.
-- 
GitLab


From b03f0e408710c5a92b87d748360b03c6cb60760d Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Thu, 14 Dec 2017 17:55:08 -0800
Subject: [PATCH 1040/1225] [XLA] Express the default options to ToString using
 an overload, rather than a default param.

No functional change.

The motivation for this is that GDB ignores default params, but resolves
overloads just fine.

PiperOrigin-RevId: 179125588
---
 tensorflow/compiler/xla/service/hlo_computation.h | 6 +++++-
 tensorflow/compiler/xla/service/hlo_instruction.h | 6 +++++-
 tensorflow/compiler/xla/service/hlo_module.h      | 7 ++++++-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index e87f240540..6436815f91 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -138,7 +138,11 @@ class HloComputation {
   void UniquifyName(NameUniquer* name_uniquer);
 
   // Return a string representation of the computation.
-  string ToString(const HloPrintOptions& options = HloPrintOptions()) const;
+  //
+  // (We express the default options using an overload rather than a default
+  // param because gdb ignores default params, but does resolve overloads.)
+  string ToString() const { return ToString(HloPrintOptions()); }
+  string ToString(const HloPrintOptions& options) const;
 
   // Returns a serialized representation of this computation.
   HloComputationProto ToProto() const;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 47c3fb684e..753b7dc0bf 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -688,7 +688,11 @@ class HloInstruction {
   string SignatureString() const;
 
   // Returns a debugging string that represents this instruction.
-  string ToString(const HloPrintOptions& options = HloPrintOptions()) const;
+  //
+  // (We express the default options using an overload rather than a default
+  // param because gdb ignores default params, but does resolve overloads.)
+  string ToString() const { return ToString(HloPrintOptions()); }
+  string ToString(const HloPrintOptions& options) const;
 
   // Components of the ToString() representation:
 
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index ea2d3771c6..f37885d043 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -143,7 +143,12 @@ class HloModule {
 
   const HloModuleConfig& config() const { return config_; }
 
-  string ToString(const HloPrintOptions& options = HloPrintOptions()) const;
+  // Return a string representation of the module.
+  //
+  // (We express the default options using an overload rather than a default
+  // param because gdb ignores default params, but does resolve overloads.)
+  string ToString() const { return ToString(HloPrintOptions()); }
+  string ToString(const HloPrintOptions& options) const;
 
   // Convert an HloModule to or from a proto.
   HloModuleProto ToProto() const;
-- 
GitLab


From 81fe1b8700d839c00914fb3ade0cb626b97f6a08 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Thu, 14 Dec 2017 18:16:53 -0800
Subject: [PATCH 1041/1225] Made the list of op kernels available.

PiperOrigin-RevId: 179127885
---
 tensorflow/python/grappler/cluster.i       | 14 +++++++++++++-
 tensorflow/python/grappler/cluster.py      |  4 ++++
 tensorflow/python/grappler/cluster_test.py |  7 +++++++
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i
index c9bcfeb6e8..2f99a910f1 100644
--- a/tensorflow/python/grappler/cluster.i
+++ b/tensorflow/python/grappler/cluster.i
@@ -185,6 +185,18 @@ static PyObject* TF_ListDevices(GCluster cluster) {
   return result;
 }
 
+static std::vector<string> TF_ListAvailableOps() {
+  tensorflow::OpRegistry* registry = tensorflow::OpRegistry::Global();
+  std::vector<tensorflow::OpDef> ops;
+  registry->GetRegisteredOps(&ops);
+  std::vector<string> op_names;
+  for (const tensorflow::OpDef& op : ops) {
+    op_names.push_back(op.name());
+  }
+  std::sort(op_names.begin(), op_names.end());
+  return op_names;
+}
+
 static PyObject* TF_MeasureCosts(
     GItem item,
     GCluster cluster,
@@ -311,10 +323,10 @@ static GCluster TF_NewVirtualCluster(
     TF_Status* out_status);
 static void TF_ShutdownCluster(GCluster cluster);
 static PyObject* TF_ListDevices(GCluster cluster);
+static std::vector<string> TF_ListAvailableOps();
 static PyObject* TF_MeasureCosts(
     GItem item, GCluster cluster,
     bool generate_timeline, TF_Status* out_status);
 static PyObject* TF_DeterminePeakMemoryUsage(
     GItem item, GCluster cluster,
     TF_Status* out_status);
-
diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py
index 60e1322050..1072e66c73 100644
--- a/tensorflow/python/grappler/cluster.py
+++ b/tensorflow/python/grappler/cluster.py
@@ -80,6 +80,10 @@ class Cluster(object):
         devices.append(device_properties_pb2.NamedDevice.FromString(raw_dev))
     return devices
 
+  def ListAvailableOps(self):
+    """Returns a list of all the available operations (sorted alphatically)."""
+    return tf_cluster.TF_ListAvailableOps()
+
   def MeasureCosts(self, item):
     """Returns the cost of running the specified item.
 
diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py
index 3ddcb741b5..0e78c33a85 100644
--- a/tensorflow/python/grappler/cluster_test.py
+++ b/tensorflow/python/grappler/cluster_test.py
@@ -123,6 +123,13 @@ class ClusterTest(test.TestCase):
       self.assertEqual(len(op_perfs), 9)
       self.assertTrue(step_stats.dev_stats)
 
+  def testAvailableOps(self):
+    with cluster.Provision() as gcluster:
+      op_names = gcluster.ListAvailableOps()
+      self.assertTrue(b'Add' in op_names)
+      self.assertTrue(b'MatMul' in op_names)
+      self.assertEqual(op_names, sorted(op_names))
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 147f4acd4b4f7b1c81d780adce698b2056837796 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Thu, 14 Dec 2017 18:42:19 -0800
Subject: [PATCH 1042/1225] internal change

PiperOrigin-RevId: 179130198
---
 .../docs_src/api_guides/python/contrib.bayesflow.entropy.md      | 1 +
 .../api_guides/python/contrib.bayesflow.stochastic_graph.md      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md
index fc5d5d70d7..b59be5471f 100644
--- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md
+++ b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md
@@ -1 +1,2 @@
 # BayesFlow Entropy (contrib)
+
diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md
index d855787ae6..b6e5502ec4 100644
--- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md
+++ b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md
@@ -1 +1,2 @@
 # BayesFlow Stochastic Graph (contrib)
+
-- 
GitLab


From aadc84cce45cccce0c6967cbb50793276bcf4874 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 18:42:59 -0800
Subject: [PATCH 1043/1225] Add block sparsity support for 2D weight tensors
 only.

PiperOrigin-RevId: 179130257
---
 tensorflow/contrib/model_pruning/README.md    |  14 ++-
 .../contrib/model_pruning/python/pruning.py   | 100 +++++++++++++++++-
 .../model_pruning/python/pruning_test.py      |  34 ++++++
 3 files changed, 143 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/model_pruning/README.md b/tensorflow/contrib/model_pruning/README.md
index 764e126e0d..d286750c25 100644
--- a/tensorflow/contrib/model_pruning/README.md
+++ b/tensorflow/contrib/model_pruning/README.md
@@ -42,10 +42,13 @@ The pruning library allows for specification of the following hyper parameters:
 | name | string | model_pruning | Name of the pruning specification. Used for adding summaries and ops under a common tensorflow name_scope |
 | begin_pruning_step | integer | 0 | The global step at which to begin pruning |
 | end_pruning_step   | integer | -1 | The global step at which to terminate pruning. Defaults to -1 implying that pruning continues till  the training stops |
-| do_not_prune | list of strings | [""] | list of layers strings that are not pruned |
+| do_not_prune | list of strings | [""] | list of layers names that are not pruned |
 | threshold_decay | float | 0.9 | The decay factor to use for exponential decay of the thresholds |
 | pruning_frequency | integer | 10 | How often should the masks be updated? (in # of global_steps) |
 | nbins | integer | 255 | Number of bins to use for histogram computation |
+| block_height|integer | 1 | Number of rows in a block for block sparse matrices|
+| block_width |integer | 1 | Number of cols in a block for block sparse matrices|
+| block_pooling_function| string | AVG | The function to use to pool weight values in a block: average (AVG) or max (MAX)|
 | initial_sparsity | float | 0.0 | Initial sparsity value |
 | target_sparsity | float | 0.5 | Target sparsity value |
 | sparsity_function_begin_step | integer | 0 | The global step at this which the gradual sparsity function begins to take effect |
@@ -128,3 +131,12 @@ Eval:
 ```shell
 $ bazel-bin/$examples_dir/cifar10/cifar10_eval --run_once
 ```
+
+### Block Sparsity
+
+For some hardware architectures, it may be beneficial to induce spatially correlated sparsity. To train models in which the weight tensors have block sparse structure, set *block_height* and *block_width* hyperparameters to the desired block configuration (2x2, 4x4, 4x1, 1x8, etc). Currently, block sparsity is supported for weight tensors with rank 2 only. The matrix is partitioned into non-overlapping blocks of size *[block_height, block_dim]* and the either the average or max absolute value in this block is taken as a proxy for the entire block (set by *block_pooling_function* hyperparameter).
+The convolution layer tensors are always pruned used block dimensions of [1,1].
+
+## References
+
+Michael Zhu and Suyog Gupta, “To prune, or not to prune: exploring the efficacy of pruning for model compression”, *2017 NIPS Workshop on Machine Learning of Phones and other Consumer Devices* (https://arxiv.org/pdf/1710.01878.pdf)
diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py
index 39eb79daf0..d16af9da19 100644
--- a/tensorflow/contrib/model_pruning/python/pruning.py
+++ b/tensorflow/contrib/model_pruning/python/pruning.py
@@ -72,6 +72,7 @@ from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_impl
+from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
@@ -130,6 +131,23 @@ def _weight_threshold_variable(var, scope):
     return threshold
 
 
+def _kronecker_product(mat1, mat2):
+  """Computes the Kronecker product of two matrices mat1 and mat2.
+
+  Args:
+    mat1: A matrix of size m x n
+    mat2: A matrix of size p x q
+  Returns:
+    Kronecker product of matrices mat1 and mat2 of size mp x nq
+  """
+
+  m1, n1 = mat1.get_shape().as_list()
+  mat1_rsh = array_ops.reshape(mat1, [m1, 1, n1, 1])
+  m2, n2 = mat2.get_shape().as_list()
+  mat2_rsh = array_ops.reshape(mat2, [1, m2, 1, n2])
+  return array_ops.reshape(mat1_rsh * mat2_rsh, [m1 * m2, n1 * n2])
+
+
 def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None):
   """Return histogram of values.
 
@@ -298,6 +316,13 @@ def get_pruning_hparams():
       How often should the masks be updated? (in # of global_steps)
     nbins: integer
       number of bins to use for histogram computation
+    block_height: integer
+      number of rows in a block (defaults to 1)
+    block_width: integer
+      number of cols in a block (defaults to 1)
+    block_pooling_function: string
+      Whether to perform average (AVG) or max (MAX) pooling in the block
+      (default: AVG)
     initial_sparsity: float
       initial sparsity value
     target_sparsity: float
@@ -333,6 +358,9 @@ def get_pruning_hparams():
       threshold_decay=0.9,
       pruning_frequency=10,
       nbins=255,
+      block_height=1,
+      block_width=1,
+      block_pooling_function='AVG',
       initial_sparsity=0,
       target_sparsity=0.5,
       sparsity_function_begin_step=0,
@@ -375,6 +403,12 @@ class Pruning(object):
     # were updated
     self._last_update_step = self._setup_last_update_step()
 
+    # Block dimensions
+    self._block_dim = [self._spec.block_height, self._spec.block_width]
+
+    # Block pooling function
+    self._block_pooling_function = self._spec.block_pooling_function
+
   def _setup_global_step(self, global_step):
     graph_global_step = global_step
     if graph_global_step is None:
@@ -449,9 +483,10 @@ class Pruning(object):
 
     Returns:
       new_threshold: The new value of the threshold based on weights, and
-        desired_sparsity
-      new_mask: A n-D numpy array containing 0 or 1 to indicate which of the
-        values in weights falls below the threshold
+        sparsity at the current global_step
+      new_mask: A numpy array of the same size and shape as weights containing
+        0 or 1 to indicate which of the values in weights falls below
+        the threshold
 
     Raises:
       ValueError: if sparsity is not defined
@@ -484,6 +519,63 @@ class Pruning(object):
           math_ops.greater(abs_weights, smoothed_threshold), np.float32)
     return smoothed_threshold, new_mask
 
+  def _maybe_update_block_mask(self, weights, threshold):
+    """Performs block-granular masking of the weights.
+
+    Block pruning occurs only if the block_height or block_width is > 1 and
+    if the weight tensor has ndims = 2. Otherwise, elementwise pruning occurs.
+    Args:
+      weights: The weight tensor that needs to be masked.
+      threshold: The current threshold value. The function will compute a new
+        threshold and return the exponential moving average using the current
+        value of threshold
+
+    Returns:
+      new_threshold: The new value of the threshold based on weights, and
+        sparsity at the current global_step
+      new_mask: A numpy array of the same size and shape as weights containing
+        0 or 1 to indicate which of the values in weights falls below
+        the threshold
+
+    Raises:
+      ValueError: if block pooling function is not AVG or MAX
+    """
+    if weights.get_shape().ndims != 2 or self._block_dim == [1, 1]:
+      return self._update_mask(weights, threshold)
+
+    if self._block_pooling_function not in ['AVG', 'MAX']:
+      raise ValueError('Unknown pooling function for block sparsity: %s' %
+                       self._block_pooling_function)
+
+    with ops.name_scope(weights.op.name + '_pruning_ops'):
+      abs_weights = math_ops.abs(
+          array_ops.reshape(
+              weights, [1, weights.get_shape()[0],
+                        weights.get_shape()[1], 1]))
+      pool_window = [self._block_dim[0], self._block_dim[1]]
+      pooled_weights = nn_ops.pool(
+          abs_weights,
+          window_shape=pool_window,
+          pooling_type=self._block_pooling_function,
+          strides=pool_window,
+          padding='SAME',
+          name=weights.op.name + '_pooled')
+
+      smoothed_threshold, new_mask = self._update_mask(pooled_weights,
+                                                       threshold)
+
+      reshaped_mask = array_ops.reshape(
+          new_mask,
+          [pooled_weights.get_shape()[1],
+           pooled_weights.get_shape()[2]])
+      updated_mask = _kronecker_product(reshaped_mask,
+                                        array_ops.ones(self._block_dim))
+      sliced_mask = array_ops.slice(
+          updated_mask, [0, 0],
+          [weights.get_shape()[0],
+           weights.get_shape()[1]])
+    return smoothed_threshold, sliced_mask
+
   def _get_mask_assign_ops(self):
     # Make sure the assignment ops have not already been added to the list
     if self._assign_ops:
@@ -510,7 +602,7 @@ class Pruning(object):
         if self._exists_in_do_not_prune_list(mask.name):
           continue
 
-      new_threshold, new_mask = self._update_mask(weight, threshold)
+      new_threshold, new_mask = self._maybe_update_block_mask(weight, threshold)
       self._assign_ops.append(_variable_assign(threshold, new_threshold))
 
       self._assign_ops.append(
diff --git a/tensorflow/contrib/model_pruning/python/pruning_test.py b/tensorflow/contrib/model_pruning/python/pruning_test.py
index 34b4584f49..1767b4bb94 100644
--- a/tensorflow/contrib/model_pruning/python/pruning_test.py
+++ b/tensorflow/contrib/model_pruning/python/pruning_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.model_pruning.python import pruning
+from tensorflow.python.framework import constant_op
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import random_ops
@@ -111,6 +112,39 @@ class PruningTest(test.TestCase):
       masked_weights_val = masked_weights.eval()
       self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
 
+  def _blockMasking(self, hparams, weights, expected_mask):
+
+    threshold = variables.Variable(0.0, name="threshold")
+    sparsity = variables.Variable(0.51, name="sparsity")
+    test_spec = ",".join(hparams)
+    pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)
+
+    # Set up pruning
+    p = pruning.Pruning(pruning_hparams, sparsity=sparsity)
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      _, new_mask = p._maybe_update_block_mask(weights, threshold)
+      # Check if the mask is the same size as the weights
+      self.assertAllEqual(new_mask.get_shape(), weights.get_shape())
+      mask_val = new_mask.eval()
+      self.assertAllEqual(mask_val, expected_mask)
+
+  def testBlockMasking(self):
+    param_list = ["block_height=2", "block_width=2", "threshold_decay=0"]
+
+    weights_avg = constant_op.constant(
+        [[0.1, 0.1, 0.2, 0.2], [0.1, 0.1, 0.2, 0.2], [0.3, 0.3, 0.4, 0.4],
+         [0.3, 0.3, 0.4, 0.4]])
+    weights_max = constant_op.constant(
+        [[0.1, 0.0, 0.2, 0.0], [0.0, -0.1, 0.0, -0.2], [0.3, 0.0, 0.4, 0.0],
+         [0.0, -0.3, 0.0, -0.4]])
+    expected_mask = [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]]
+
+    self._blockMasking(param_list + ["block_pooling_function=MAX"], weights_max,
+                       expected_mask)
+    self._blockMasking(param_list + ["block_pooling_function=AVG"],
+                       weights_avg, expected_mask)
+
   def testPartitionedVariableMasking(self):
     partitioner = partitioned_variables.variable_axis_size_partitioner(40)
     with self.test_session() as session:
-- 
GitLab


From d57ab2c4a7cd13e47f942aaff495912fdc96f84a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 19:07:06 -0800
Subject: [PATCH 1044/1225] [XLA] Allow omitting operands shapes and program
 shapes.

PiperOrigin-RevId: 179132435
---
 .../compiler/xla/service/hlo_computation.cc   |   7 +-
 .../compiler/xla/service/hlo_instruction.cc   |   8 +-
 .../compiler/xla/service/hlo_instruction.h    |  26 ++++
 .../compiler/xla/tools/parser/README.md       |  11 +-
 .../compiler/xla/tools/parser/hlo_parser.cc   |  53 +++++--
 .../xla/tools/parser/hlo_parser_test.cc       | 135 ++++++++++--------
 6 files changed, 168 insertions(+), 72 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 4f6feefb43..4202c08336 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -369,8 +369,11 @@ string HloComputation::ToString(const HloPrintOptions& options) const {
   for (int i = 0; i < options.indent_amount(); i++) {
     s << "    ";
   }
-  s << "%" << name() << " " << ShapeUtil::HumanString(ComputeProgramShape())
-    << " {\n";
+  s << "%" << name();
+  if (options.print_program_shape()) {
+    s << " " << ShapeUtil::HumanString(ComputeProgramShape());
+  }
+  s << " {\n";
   for (const HloInstruction* instruction : MakeInstructionPostOrder()) {
     for (int i = 0; i < options.indent_amount(); i++) {
       s << "    ";
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 9e37ab64a0..58883101a5 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -1964,10 +1964,14 @@ string HloInstruction::OperandsToString(const HloPrintOptions& options) const {
       slice.remove_suffix(slice.size() - kMaxOperandsToShowIfCompact);
     }
     operands = Join(slice, ", ", [&](string* out, HloInstruction* operand) {
-      *out += ShapeUtil::HumanStringWithLayout(operand->shape());
+      std::vector<string> str;
+      if (options.print_operand_shape()) {
+        str.push_back(ShapeUtil::HumanStringWithLayout(operand->shape()));
+      }
       if (!options.compact_operands()) {
-        StrAppend(out, " %", operand->name());
+        str.push_back(StrCat("%", operand->name()));
       }
+      StrAppend(out, Join(str, " "));
     });
     const int64 remaining = operands_.size() - slice.size();
     if (slice.size() != operands_.size()) {
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 753b7dc0bf..6d6068c66a 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -65,8 +65,18 @@ class HloPrintOptions {
       : print_large_constants_(false),
         print_metadata_(true),
         compact_operands_(false),
+        print_operand_shape_(true),
+        print_program_shape_(true),
         indent_amount_(0) {}
 
+  static HloPrintOptions ShortParsable() {
+    return HloPrintOptions()
+        .set_print_large_constants(true)
+        .set_print_metadata(false)
+        .set_print_operand_shape(false)
+        .set_print_program_shape(false);
+  }
+
   // If true, large constants will be printed out.
   HloPrintOptions& set_print_large_constants(bool value) {
     print_large_constants_ = value;
@@ -79,6 +89,18 @@ class HloPrintOptions {
     return *this;
   }
 
+  // If true, operands' shapes will be printed.
+  HloPrintOptions& set_print_operand_shape(bool value) {
+    print_operand_shape_ = value;
+    return *this;
+  }
+
+  // If true, program shape of hlo computations will be printed.
+  HloPrintOptions& set_print_program_shape(bool value) {
+    print_program_shape_ = value;
+    return *this;
+  }
+
   // If true, only a part of operands will be printed out, and their names will
   // be omitted (note that in this case the text will not be parsable).
   HloPrintOptions& set_compact_operands(bool value) {
@@ -95,12 +117,16 @@ class HloPrintOptions {
   bool print_large_constants() const { return print_large_constants_; }
   bool print_metadata() const { return print_metadata_; }
   bool compact_operands() const { return compact_operands_; }
+  bool print_operand_shape() const { return print_operand_shape_; }
+  bool print_program_shape() const { return print_program_shape_; }
   int indent_amount() const { return indent_amount_; }
 
  private:
   bool print_large_constants_;
   bool print_metadata_;
   bool compact_operands_;
+  bool print_operand_shape_;
+  bool print_program_shape_;
   int indent_amount_;
 };
 
diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md
index 6232967f5f..45e005581e 100644
--- a/tensorflow/compiler/xla/tools/parser/README.md
+++ b/tensorflow/compiler/xla/tools/parser/README.md
@@ -15,8 +15,10 @@ computations
   ;
 
 computation
-  : 'ENTRY' name param_list '->' shape instruction_list
-  | name param_list '->' shape instruction_list
+  : 'ENTRY' name param_list_to_shape instruction_list
+  | name param_list_to_shape instruction_list
+  | 'ENTRY' name instruction_list
+  | name instruction_list
   ;
 
 instruction_list
@@ -41,6 +43,7 @@ operands1
   ;
 operand
   : shape name
+  | name
   ;
 
 attributes
@@ -60,6 +63,10 @@ attribute_value
   | '{' sub_attributes '}'
   ;
 
+param_list_to_shape
+  : param_list '->' shape
+  ;
+
 param_list
   : '(' param_list1 ')'
   ;
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 710e76f53d..e47c3b03ed 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -171,6 +171,7 @@ class HloParser {
   bool ParseInt64List(const TokKind start, const TokKind end,
                       const TokKind delim, std::vector<int64>* result);
 
+  bool ParseParamListToShape(Shape* shape, LocTy* shape_loc);
   bool ParseParamList();
   bool ParseName(string* result);
   bool ParseAttributeName(string* result);
@@ -184,6 +185,12 @@ class HloParser {
   bool ParseBool(bool* result);
   bool ParseToken(TokKind kind, const string& msg);
 
+  // Returns true if the current token is the beginning of a shape.
+  bool CanBeShape();
+  // Returns true if the current token is the beginning of a
+  // param_list_to_shape.
+  bool CanBeParamListToShape();
+
   // Logs the current parsing line and the given message. Always returns false.
   bool TokenError(StringPiece msg);
   bool Error(LocTy loc, StringPiece msg);
@@ -267,7 +274,7 @@ bool HloParser::ParseComputations() {
   return true;
 }
 
-// computation ::= ('ENTRY')? name param_list '->' shape instruction_list
+// computation ::= ('ENTRY')? name (param_list_to_shape)? instruction_list
 bool HloParser::ParseComputation() {
   const bool is_entry_computation = EatIfPresent(TokKind::kw_ENTRY);
   string name;
@@ -277,14 +284,14 @@ bool HloParser::ParseComputation() {
   }
   auto builder = MakeUnique<HloComputation::Builder>(name);
 
+  LocTy shape_loc = nullptr;
   Shape shape;
-  string root_name;
-  if (!ParseParamList() || !ParseToken(TokKind::kArrow, "expects '->'")) {
+  if (CanBeParamListToShape() && !ParseParamListToShape(&shape, &shape_loc)) {
     return false;
   }
 
-  LocTy shape_ty = lexer_.GetLoc();
-  if (!ParseShape(&shape) || !ParseInstructionList(builder.get(), &root_name)) {
+  string root_name;
+  if (!ParseInstructionList(builder.get(), &root_name)) {
     return false;
   }
 
@@ -311,9 +318,10 @@ bool HloParser::ParseComputation() {
     CHECK_EQ(root, computation->root_instruction());
   }
 
-  if (!ShapeUtil::Compatible(root->shape(), shape)) {
+  // If param_list_to_shape was present, check compatibility.
+  if (shape_loc != nullptr && !ShapeUtil::Compatible(root->shape(), shape)) {
     return Error(
-        shape_ty,
+        shape_loc,
         StrCat("Shape of computation ", name, ", ",
                ShapeUtil::HumanString(shape),
                ", is not compatible with that of its root instruction ",
@@ -1438,7 +1446,7 @@ bool HloParser::ParseNonTupleLiteral(std::unique_ptr<Literal>* literal,
 // operands1
 //   ::= /*empty*/
 //   ::= operand (, operand)*
-// operand ::= shape name
+// operand ::= (shape)? name
 bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands) {
   if (!ParseToken(TokKind::kLparen,
                   "expects '(' at the beginning of operands")) {
@@ -1449,9 +1457,14 @@ bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands) {
   } else {
     do {
       LocTy loc = lexer_.GetLoc();
-      Shape shape;
       string name;
-      if (!ParseShape(&shape) || !ParseName(&name)) {
+      if (CanBeShape()) {
+        Shape shape;
+        if (!ParseShape(&shape)) {
+          return false;
+        }
+      }
+      if (!ParseName(&name)) {
         return false;
       }
       HloInstruction* instruction =
@@ -1976,6 +1989,19 @@ bool HloParser::ParseInt64List(const TokKind start, const TokKind end,
       end, StrCat("expects an int64 list to end with ", TokKindToString(end)));
 }
 
+// param_list_to_shape ::= param_list '->' shape
+bool HloParser::ParseParamListToShape(Shape* shape, LocTy* shape_loc) {
+  if (!ParseParamList() || !ParseToken(TokKind::kArrow, "expects '->'")) {
+    return false;
+  }
+  *shape_loc = lexer_.GetLoc();
+  return ParseShape(shape);
+}
+
+bool HloParser::CanBeParamListToShape() {
+  return lexer_.GetKind() == TokKind::kLparen;
+}
+
 // param_list ::= '(' param_list1 ')'
 // param_list1
 //   ::= /*empty*/
@@ -2032,6 +2058,13 @@ bool HloParser::ParseShape(Shape* result) {
   return true;
 }
 
+bool HloParser::CanBeShape() {
+  // A non-tuple shape starts with a kShape token; a tuple shape starts with
+  // '('.
+  return lexer_.GetKind() == TokKind::kShape ||
+         lexer_.GetKind() == TokKind::kLparen;
+}
+
 bool HloParser::ParseName(string* result) {
   VLOG(1) << "ParseName";
   if (lexer_.GetKind() != TokKind::kName) {
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 5c12a991cc..29b3cc83e7 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -405,44 +405,6 @@ ENTRY %Concat2x3With2x5.v3 () -> f32[2,8] {
   ROOT %concatenate = f32[2,8]{1,0} concatenate(f32[2,3]{1,0} %constant, f32[2,5]{1,0} %constant.1), dimensions={1}
 }
 
-)"
-},
-// map
-{
-"Map",
-R"(HloModule MapBinaryAdder_module:
-
-%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] {
-  %lhs = f32[] parameter(0)
-  %rhs = f32[] parameter(1)
-  ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs)
-}
-
-ENTRY %MapBinaryAdder.v3 (param0: f32[4], param1: f32[4]) -> f32[4] {
-  %param0 = f32[4]{0} parameter(0)
-  %param1 = f32[4]{0} parameter(1)
-  ROOT %map = f32[4]{0} map(f32[4]{0} %param0, f32[4]{0} %param1), to_apply=%add_F32.v3
-}
-
-)"
-},
-// reduce
-{
-"Reduce",
-R"(HloModule ReduceR3ToR2_module:
-
-%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] {
-  %lhs = f32[] parameter(0)
-  %rhs = f32[] parameter(1)
-  ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs)
-}
-
-ENTRY %ReduceR3ToR2.v3 (input: f32[8,16,256]) -> f32[8,16] {
-  %input = f32[8,16,256]{2,1,0} parameter(0)
-  %constant = f32[] constant(0)
-  ROOT %reduce = f32[8,16]{1,0} reduce(f32[8,16,256]{2,1,0} %input, f32[] %constant), dimensions={2}, to_apply=%add_F32.v3
-}
-
 )"
 },
 // select and scatter
@@ -664,6 +626,51 @@ ENTRY %fusion.v3 () -> f32[3,2,1,1] {
   ROOT %fusion = f32[3,2,1,1]{3,2,1,0} fusion(f32[3,2,1,1]{3,2,1,0} %constant, f32[2]{0} %constant.1), kind=kLoop, calls=%fused_computation
 }
 
+)"
+}
+  });
+  // clang-format on
+}
+
+std::vector<TestData> CreateShortTestCases() {
+  // clang-format off
+  return std::vector<TestData>({
+// map
+{
+"Map",
+R"(HloModule MapBinaryAdder_module:
+
+%add_F32.v3 {
+  %lhs = f32[] parameter(0)
+  %rhs = f32[] parameter(1)
+  ROOT %add = f32[] add(%lhs, %rhs)
+}
+
+ENTRY %MapBinaryAdder.v3 {
+  %param0 = f32[4]{0} parameter(0)
+  %param1 = f32[4]{0} parameter(1)
+  ROOT %map = f32[4]{0} map(%param0, %param1), to_apply=%add_F32.v3
+}
+
+)"
+},
+// reduce
+{
+"Reduce",
+R"(HloModule ReduceR3ToR2_module:
+
+%add_F32.v3 {
+  %lhs = f32[] parameter(0)
+  %rhs = f32[] parameter(1)
+  ROOT %add = f32[] add(%lhs, %rhs)
+}
+
+ENTRY %ReduceR3ToR2.v3 {
+  %input = f32[8,16,256]{2,1,0} parameter(0)
+  %constant = f32[] constant(0)
+  ROOT %reduce = f32[8,16]{1,0} reduce(%input, %constant), dimensions={2}, to_apply=%add_F32.v3
+}
+
 )"
 },
 // infeed/outfeed
@@ -671,11 +678,11 @@ ENTRY %fusion.v3 () -> f32[3,2,1,1] {
 "InfeedOutfeed",
 R"(HloModule outfeed_module:
 
-ENTRY %InfeedToOutfeed () -> (u32[3], pred[]) {
+ENTRY %InfeedToOutfeed {
   %infeed = (u32[3]{0}, pred[]) infeed()
-  %outfeed = () outfeed((u32[3]{0}, pred[]) %infeed)
+  %outfeed = () outfeed(%infeed)
   ROOT %infeed.1 = (u32[3]{0}, pred[]) infeed()
-  %outfeed.1 = () outfeed((u32[3]{0}, pred[]) %infeed.1)
+  %outfeed.1 = () outfeed(%infeed.1)
 }
 
 )"
@@ -685,10 +692,10 @@ ENTRY %InfeedToOutfeed () -> (u32[3], pred[]) {
 "Rng",
 R"(HloModule rng_module:
 
-ENTRY %Rng () -> f32[8] {
+ENTRY %Rng {
   %constant = f32[] constant(0)
   %constant.1 = f32[] constant(1)
-  ROOT %rng = f32[8]{0} rng(f32[] %constant, f32[] %constant.1), distribution=rng_uniform
+  ROOT %rng = f32[8]{0} rng(%constant, %constant.1), distribution=rng_uniform
 }
 
 )"
@@ -698,9 +705,9 @@ ENTRY %Rng () -> f32[8] {
 "ReducePrevison",
 R"(HloModule reduce_precision:
 
-ENTRY %ReducePrecision () -> f32[1] {
+ENTRY %ReducePrecision {
   %constant = f32[1]{0} constant({3.14159})
-  ROOT %reduce-precision = f32[1]{0} reduce-precision(f32[1]{0} %constant), exponent_bits=8, mantissa_bits=10
+  ROOT %reduce-precision = f32[1]{0} reduce-precision(%constant), exponent_bits=8, mantissa_bits=10
 }
 
 )"
@@ -710,34 +717,33 @@ ENTRY %ReducePrecision () -> f32[1] {
 "Conditional",
 R"(HloModule conditional:
 
-%Negate (x: f32[]) -> f32[] {
+%Negate {
   %x = f32[] parameter(0)
-  ROOT %negate = f32[] negate(f32[] %x)
+  ROOT %negate = f32[] negate(%x)
 }
 
-%Identity (y: f32[]) -> f32[] {
+%Identity {
   %y = f32[] parameter(0)
-  ROOT %copy = f32[] copy(f32[] %y)
+  ROOT %copy = f32[] copy(%y)
 }
 
-ENTRY %Parameters1.v4 () -> f32[] {
+ENTRY %Parameters1.v4 {
   %constant = pred[] constant(true)
   %constant.1 = f32[] constant(56)
   %constant.2 = f32[] constant(12)
-  ROOT %conditional = f32[] conditional(pred[] %constant, f32[] %constant.1, f32[] %constant.2), true_computation=%Negate, false_computation=%Identity
+  ROOT %conditional = f32[] conditional(%constant, %constant.1, %constant.2), true_computation=%Negate, false_computation=%Identity
 }
 
 )"
 },
-
 // CustomCall
 {
 "CustomCall",
 R"(HloModule custom_call:
 
-ENTRY %CustomCall () -> f32[1,2,3] {
+ENTRY %CustomCall {
   %constant = f32[1]{0} constant({12345})
-  ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(f32[1]{0} %constant), custom_call_target="foo\"bar"
+  ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(%constant), custom_call_target="foo\"bar"
 }
 
 )"
@@ -747,9 +753,9 @@ ENTRY %CustomCall () -> f32[1,2,3] {
 "NonDefaultNames",
 R"(HloModule add_constants_module:
 
-ENTRY %add_constants () -> f32[] {
+ENTRY %add_constants {
   %foo = f32[] constant(3.14)
-  ROOT %bar = f32[] add(f32[] %foo, f32[] %foo)
+  ROOT %bar = f32[] add(%foo, %foo)
 }
 
 )"
@@ -778,12 +784,29 @@ class HloParserTest : public ::testing::Test,
   }
 };
 
+class HloParserShortTest : public HloParserTest {
+ protected:
+  void ExpectEqualShort() {
+    const string& original = GetParam().module_string;
+    auto result = Parse(original);
+    TF_ASSERT_OK(result.status());
+    EXPECT_EQ(original,
+              result.ValueOrDie()->ToString(HloPrintOptions::ShortParsable()));
+  }
+};
+
 TEST_P(HloParserTest, Run) { ExpectEqual(); }
 
+TEST_P(HloParserShortTest, Run) { ExpectEqualShort(); }
+
 INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTest,
                         ::testing::ValuesIn(CreateTestCases()),
                         TestDataToString);
 
+INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserShortTest,
+                        ::testing::ValuesIn(CreateShortTestCases()),
+                        TestDataToString);
+
 TEST_F(HloParserTest, Empty) {
   const string original = "";
   auto result = Parse(original);
-- 
GitLab


From f806269602219d5095265d036f294cc9a6260971 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 14 Dec 2017 20:19:59 -0800
Subject: [PATCH 1045/1225] [XLA] Remove '%' when printing the hlo text in
 short parsable mode.

PiperOrigin-RevId: 179138523
---
 .../compiler/xla/service/hlo_computation.cc   |   5 +-
 .../compiler/xla/service/hlo_instruction.cc   |  46 ++--
 .../compiler/xla/service/hlo_instruction.h    |  19 +-
 tensorflow/compiler/xla/service/hlo_module.cc |   2 +-
 .../compiler/xla/service/hlo_module_test.cc   |   4 +-
 .../compiler/xla/tools/parser/README.md       |   1 +
 .../compiler/xla/tools/parser/hlo_parser.cc   |   7 +-
 .../xla/tools/parser/hlo_parser_test.cc       | 216 +++++++++---------
 8 files changed, 166 insertions(+), 134 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 4202c08336..a63affa06c 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -369,7 +369,10 @@ string HloComputation::ToString(const HloPrintOptions& options) const {
   for (int i = 0; i < options.indent_amount(); i++) {
     s << "    ";
   }
-  s << "%" << name();
+  if (options.print_percent()) {
+    s << "%";
+  }
+  s << name();
   if (options.print_program_shape()) {
     s << " " << ShapeUtil::HumanString(ComputeProgramShape());
   }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 58883101a5..79855a1393 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -1913,11 +1913,20 @@ string HloInstruction::SignatureString() const {
   return StrCat("(", operands, ") -> ", ShapeUtil::HumanString(shape()));
 }
 
+namespace {
+
+string PrintName(const string& name, const HloPrintOptions& options) {
+  return StrCat(options.print_percent() ? "%" : "", name);
+}
+
+}  // namespace
+
 string HloInstruction::ToString(const HloPrintOptions& options) const {
   string result =
-      StrCat("%", name(), " = ", ShapeUtil::HumanStringWithLayout(shape()), " ",
+      StrCat(PrintName(name(), options), " = ",
+             ShapeUtil::HumanStringWithLayout(shape()), " ",
              HloOpcodeString(opcode()), "(", OperandsToString(options), ")");
-  for (const string& extra : ExtraAttributesToString()) {
+  for (const string& extra : ExtraAttributesToString(options)) {
     StrAppend(&result, ", ", extra);
   }
   if (options.print_metadata() &&
@@ -1969,7 +1978,7 @@ string HloInstruction::OperandsToString(const HloPrintOptions& options) const {
         str.push_back(ShapeUtil::HumanStringWithLayout(operand->shape()));
       }
       if (!options.compact_operands()) {
-        str.push_back(StrCat("%", operand->name()));
+        str.push_back(PrintName(operand->name(), options));
       }
       StrAppend(out, Join(str, " "));
     });
@@ -1981,7 +1990,8 @@ string HloInstruction::OperandsToString(const HloPrintOptions& options) const {
   return operands;
 }
 
-std::vector<string> HloInstruction::ExtraAttributesToString() const {
+std::vector<string> HloInstruction::ExtraAttributesToString(
+    const HloPrintOptions& options) const {
   std::vector<string> extra;
   if (opcode() == HloOpcode::kFusion) {
     extra.push_back(StrCat("kind=", xla::ToString(fusion_kind())));
@@ -2028,23 +2038,28 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
   }
 
   if (opcode() == HloOpcode::kWhile) {
-    extra.push_back(StrCat("condition=%", while_condition()->name()));
-    extra.push_back(StrCat("body=%", while_body()->name()));
+    extra.push_back(
+        StrCat("condition=", PrintName(while_condition()->name(), options)));
+    extra.push_back(StrCat("body=", PrintName(while_body()->name(), options)));
   } else if (opcode() == HloOpcode::kSelectAndScatter) {
-    extra.push_back(StrCat("select=%", select()->name()));
-    extra.push_back(StrCat("scatter=%", scatter()->name()));
+    extra.push_back(StrCat("select=", PrintName(select()->name(), options)));
+    extra.push_back(StrCat("scatter=", PrintName(scatter()->name(), options)));
   } else if (opcode() == HloOpcode::kConditional) {
-    extra.push_back(StrCat("true_computation=%", true_computation()->name()));
-    extra.push_back(StrCat("false_computation=%", false_computation()->name()));
+    extra.push_back(StrCat("true_computation=",
+                           PrintName(true_computation()->name(), options)));
+    extra.push_back(StrCat("false_computation=",
+                           PrintName(false_computation()->name(), options)));
   } else if (opcode() == HloOpcode::kCall || opcode() == HloOpcode::kMap ||
              opcode() == HloOpcode::kReduceWindow ||
              opcode() == HloOpcode::kReduce) {
-    extra.push_back(StrCat("to_apply=%", to_apply()->name()));
+    extra.push_back(
+        StrCat("to_apply=", PrintName(to_apply()->name(), options)));
   } else if (!called_computations().empty()) {
     extra.push_back(StrCat(
         "calls=", Join(called_computations(), ", ",
-                       [](string* out, const HloComputation* computation) {
-                         StrAppend(out, "%", computation->name());
+                       [&](string* out, const HloComputation* computation) {
+                         StrAppend(out,
+                                   PrintName(computation->name(), options));
                        })));
   }
 
@@ -2062,8 +2077,9 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
   if (!control_predecessors_.empty()) {
     extra.push_back(StrCat("control-predecessors={",
                            Join(control_predecessors_, ", ",
-                                [](string* out, HloInstruction* pre) {
-                                  StrAppend(out, "%", pre->name());
+                                [&](string* out, HloInstruction* pre) {
+                                  StrAppend(out,
+                                            PrintName(pre->name(), options));
                                 }),
                            "}"));
   }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 6d6068c66a..7139db1f81 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -59,14 +59,15 @@ class HloModule;
 // A bunch of switches that control how the hlo text should be printed.
 class HloPrintOptions {
  public:
-  // Constructs the default print options: don't print large constants, print
-  // metadata, don't compact operands, and no indentation.
+  // Constructs the default print options: don't print large constants, don't
+  // compact operands, no indentation.
   HloPrintOptions()
       : print_large_constants_(false),
         print_metadata_(true),
         compact_operands_(false),
         print_operand_shape_(true),
         print_program_shape_(true),
+        print_percent_(true),
         indent_amount_(0) {}
 
   static HloPrintOptions ShortParsable() {
@@ -74,7 +75,8 @@ class HloPrintOptions {
         .set_print_large_constants(true)
         .set_print_metadata(false)
         .set_print_operand_shape(false)
-        .set_print_program_shape(false);
+        .set_print_program_shape(false)
+        .set_print_percent(false);
   }
 
   // If true, large constants will be printed out.
@@ -101,6 +103,12 @@ class HloPrintOptions {
     return *this;
   }
 
+  // If true, names will be printed with prefix '%'.
+  HloPrintOptions& set_print_percent(bool value) {
+    print_percent_ = value;
+    return *this;
+  }
+
   // If true, only a part of operands will be printed out, and their names will
   // be omitted (note that in this case the text will not be parsable).
   HloPrintOptions& set_compact_operands(bool value) {
@@ -119,6 +127,7 @@ class HloPrintOptions {
   bool compact_operands() const { return compact_operands_; }
   bool print_operand_shape() const { return print_operand_shape_; }
   bool print_program_shape() const { return print_program_shape_; }
+  bool print_percent() const { return print_percent_; }
   int indent_amount() const { return indent_amount_; }
 
  private:
@@ -127,6 +136,7 @@ class HloPrintOptions {
   bool compact_operands_;
   bool print_operand_shape_;
   bool print_program_shape_;
+  bool print_percent_;
   int indent_amount_;
 };
 
@@ -726,7 +736,8 @@ class HloInstruction {
   string OperandsToString(const HloPrintOptions& options) const;
 
   // Returns string representation of op-specific attributes.
-  std::vector<string> ExtraAttributesToString() const;
+  std::vector<string> ExtraAttributesToString(
+      const HloPrintOptions& options) const;
 
   // As ToString, but returns a shorter string.
   string ToShortString() const;
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 7d3ea8def7..6103cab3e7 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -173,7 +173,7 @@ void HloModule::ReplaceComputations(
 
 string HloModule::ToString(const HloPrintOptions& options) const {
   std::ostringstream s;
-  s << "HloModule " << name() << ":\n\n";
+  s << "HloModule " << name() << "\n\n";
   for (const HloComputation* computation : MakeComputationPostOrder()) {
     if (computation == entry_computation()) {
       s << "ENTRY ";
diff --git a/tensorflow/compiler/xla/service/hlo_module_test.cc b/tensorflow/compiler/xla/service/hlo_module_test.cc
index de1eb1f094..0f5d3dccb7 100644
--- a/tensorflow/compiler/xla/service/hlo_module_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_test.cc
@@ -135,12 +135,12 @@ TEST_F(HloModuleTest, LargeConstantToString) {
   module->AddEntryComputation(builder.Build());
 
   EXPECT_EQ(
-      "HloModule LargeConstantToString:\n\nENTRY %Constant () -> f32[16] {\n  "
+      "HloModule LargeConstantToString\n\nENTRY %Constant () -> f32[16] {\n  "
       "ROOT %constant = f32[16]{0} constant({...})\n}\n\n",
       module->ToString(HloPrintOptions().set_print_large_constants(false)));
 
   EXPECT_EQ(
-      "HloModule LargeConstantToString:\n\nENTRY %Constant () -> f32[16] {\n  "
+      "HloModule LargeConstantToString\n\nENTRY %Constant () -> f32[16] {\n  "
       "ROOT %constant = f32[16]{0} constant({42, 42, 42, 42, 42, 42, 42, 42, "
       "42, 42, 42, 42, 42, 42, 42, 42})\n}\n\n",
       module->ToString(HloPrintOptions().set_print_large_constants(true)));
diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md
index 45e005581e..9acdfd4b02 100644
--- a/tensorflow/compiler/xla/tools/parser/README.md
+++ b/tensorflow/compiler/xla/tools/parser/README.md
@@ -91,6 +91,7 @@ tuple_elements
 name
   : identifier ':'
   | '%' identifier
+  | identifier
   ;
 
 identifier
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index e47c3b03ed..06812f677d 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -2018,8 +2018,8 @@ bool HloParser::ParseParamList() {
   } else {
     do {
       Shape shape;
-      if (!ParseToken(TokKind::kName, "expects name in parameter") ||
-          !ParseShape(&shape)) {
+      string name;
+      if (!ParseName(&name) || !ParseShape(&shape)) {
         return false;
       }
     } while (EatIfPresent(TokKind::kComma));
@@ -2067,7 +2067,8 @@ bool HloParser::CanBeShape() {
 
 bool HloParser::ParseName(string* result) {
   VLOG(1) << "ParseName";
-  if (lexer_.GetKind() != TokKind::kName) {
+  if (lexer_.GetKind() != TokKind::kIdent &&
+      lexer_.GetKind() != TokKind::kName) {
     return TokenError("expects name");
   }
   *result = lexer_.GetStrVal();
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 29b3cc83e7..8b6b855218 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -46,7 +46,7 @@ std::vector<TestData> CreateTestCases() {
 // ax + y
 {
 "AxpyParam",
-R"(HloModule axpy_module:
+R"(HloModule axpy_module
 
 ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] {
   %alpha = f32[] parameter(0)
@@ -62,7 +62,7 @@ ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] {
 // pred constant
 {
 "ConstantPred",
-R"(HloModule constant_pred_module:
+R"(HloModule constant_pred_module
 
 ENTRY %constant_pred () -> pred[] {
   ROOT %constant = pred[] constant(true), metadata={op_type="const" op_name="\"it\'s not a problem\n" source_file="path/to/test.cc" source_line=68}
@@ -73,7 +73,7 @@ ENTRY %constant_pred () -> pred[] {
 // s32 constant
 {
 "ConstantS32",
-R"(HloModule constant_s32_module:
+R"(HloModule constant_s32_module
 
 ENTRY %constant_s32 () -> s32[] {
   ROOT %constant = s32[] constant(-42)
@@ -84,7 +84,7 @@ ENTRY %constant_s32 () -> s32[] {
 // f32 constant, but the value is not a decimal
 {
 "ConstantF32",
-R"(HloModule ConstantF32_module:
+R"(HloModule ConstantF32_module
 
 ENTRY %ConstantF32.v4 () -> f32[] {
   ROOT %constant = f32[] constant(42)
@@ -95,7 +95,7 @@ ENTRY %ConstantF32.v4 () -> f32[] {
 // f32 constant, rank 1 empty array.
 {
 "ConstantF32R1Empty",
-R"(HloModule ConstantF32Empty_module:
+R"(HloModule ConstantF32Empty_module
 
 ENTRY %ConstantF32Empty.v4 () -> f32[0] {
   ROOT %constant = f32[0]{0} constant({})
@@ -106,7 +106,7 @@ ENTRY %ConstantF32Empty.v4 () -> f32[0] {
 // f32 constant, rank 4 empty array.
 {
 "ConstantF32R4Empty",
-R"(HloModule ConstantF32R4Empty_module:
+R"(HloModule ConstantF32R4Empty_module
 
 ENTRY %ConstantF32R4Empty.v4 () -> f32[2,0,4,3] {
   ROOT %constant = f32[2,0,4,3]{3,2,1,0} constant(f32[2,0,4,3] { { /*i0=0*/ }, { /*i0=1*/ } })
@@ -117,7 +117,7 @@ ENTRY %ConstantF32R4Empty.v4 () -> f32[2,0,4,3] {
 // constant 4D
 {
 "Constant4D",
-R"(HloModule Small_3x2x1x1_module:
+R"(HloModule Small_3x2x1x1_module
 
 ENTRY %Small_3x2x1x1.v1 () -> f32[3,2,1,1] {
   ROOT %constant = f32[3,2,1,1]{3,2,1,0} constant(f32[3,2,1,1] { { /*i0=0*/ { /*i1=0*/ {-1} }, { /*i1=1*/ {4.1} } }, { /*i0=1*/ { /*i1=0*/ {2} }, { /*i1=1*/ {4.1} } }, { /*i0=2*/ { /*i1=0*/ {5} }, { /*i1=1*/ {4.4} } } })
@@ -128,7 +128,7 @@ ENTRY %Small_3x2x1x1.v1 () -> f32[3,2,1,1] {
 // non-finite constants: nan, inf, -inf
 {
 "ConstantNonFinite",
-R"(HloModule IsFiniteR1F32s_module:
+R"(HloModule IsFiniteR1F32s_module
 
 ENTRY %IsFiniteR1F32s.v2 () -> pred[6] {
   %constant = f32[6]{0} constant({nan, 7, nan, -1, inf, -inf})
@@ -140,7 +140,7 @@ ENTRY %IsFiniteR1F32s.v2 () -> pred[6] {
 // constant f16
 {
 "ConstantF16",
-R"(HloModule ConstantF16_module:
+R"(HloModule ConstantF16_module
 
 ENTRY %ConstantF16.v4 () -> f16[] {
   ROOT %constant = f16[] constant(500)
@@ -151,7 +151,7 @@ ENTRY %ConstantF16.v4 () -> f16[] {
 // bf16
 {
 "BF16",
-R"(HloModule BF16:
+R"(HloModule BF16
 
 ENTRY %BF16.v4 () -> bf16[] {
   ROOT %constant = bf16[] constant(500)
@@ -162,7 +162,7 @@ ENTRY %BF16.v4 () -> bf16[] {
 // constant + constant
 {
 "AddConstants",
-R"(HloModule add_constants_module:
+R"(HloModule add_constants_module
 
 ENTRY %add_constants () -> f32[] {
   %constant = f32[] constant(3.14)
@@ -174,7 +174,7 @@ ENTRY %add_constants () -> f32[] {
 // tuple constant
 {
 "TupleConstant",
-R"(HloModule TupleConstant_module:
+R"(HloModule TupleConstant_module
 
 ENTRY %TupleConstant.v1 () -> (f32[2,1], f32[2]) {
   ROOT %constant = (f32[2,1]{1,0}, f32[2]{0}) constant((f32[2,1], f32[2]) ( f32[2,1] { { 1 }, { 2 } }, {2, 42} ))
@@ -185,7 +185,7 @@ ENTRY %TupleConstant.v1 () -> (f32[2,1], f32[2]) {
 // v1 > v2 ? v1 : v2
 {
 "SelectR1F32",
-R"(HloModule SelectR1F32WithCmpR1F32sFromParamsSmall_module:
+R"(HloModule SelectR1F32WithCmpR1F32sFromParamsSmall_module
 
 ENTRY %SelectR1F32WithCmpR1F32sFromParamsSmall.v4 (v1: f32[4], v2: f32[4]) -> f32[4] {
   %v1 = f32[4]{0} parameter(0), sharding={maximal device=1}
@@ -199,7 +199,7 @@ ENTRY %SelectR1F32WithCmpR1F32sFromParamsSmall.v4 (v1: f32[4], v2: f32[4]) -> f3
 // empty tuple
 {
 "EmptyTupleCreate",
-R"(HloModule EmptyTupleCreate_module:
+R"(HloModule EmptyTupleCreate_module
 
 ENTRY %EmptyTupleCreate.v1 () -> () {
   ROOT %tuple = () tuple()
@@ -210,7 +210,7 @@ ENTRY %EmptyTupleCreate.v1 () -> () {
 // tuple
 {
 "TupleCreate",
-R"(HloModule TupleCreate_module:
+R"(HloModule TupleCreate_module
 
 ENTRY %TupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f32[2,3]) {
   %v1 = f32[] parameter(0)
@@ -223,7 +223,7 @@ ENTRY %TupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f
 },
 {
 "ShardedTupleCreate",
-R"(HloModule ShardedTupleCreate_module:
+R"(HloModule ShardedTupleCreate_module
 
 ENTRY %ShardedTupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f32[2,3]) {
   %v1 = f32[] parameter(0)
@@ -238,7 +238,7 @@ ENTRY %ShardedTupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f3
 // while (result < 5) { result = result + 1; }
 {
 "WhileWithScalarS32Result",
-R"(HloModule WhileWithScalarS32Result_module:
+R"(HloModule WhileWithScalarS32Result_module
 
 %body.v3 (prev.1: s32[]) -> s32[] {
   %constant = s32[] constant(1)
@@ -262,7 +262,7 @@ ENTRY %WhileWithScalarS32Result.v2 () -> s32[] {
 // send and recv
 {
 "SendRecv",
-R"(HloModule TwoSendRecvBothWayRecvFist_module:
+R"(HloModule TwoSendRecvBothWayRecvFist_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
   %recv = (f32[], u32[]) recv(), channel_id=15, sharding={maximal device=1}
@@ -277,7 +277,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
 // get-tuple-element
 {
 "GetTupleElement",
-R"(HloModule GetTupleElement_module:
+R"(HloModule GetTupleElement_module
 
 ENTRY %GetTupleElement.v4 () -> s32[2,3] {
   %constant = f32[3]{0} constant({1, 2, 3})
@@ -291,7 +291,7 @@ ENTRY %GetTupleElement.v4 () -> s32[2,3] {
 // call
 {
 "Call",
-R"(HloModule CallR0F32IdentityScalar_module:
+R"(HloModule CallR0F32IdentityScalar_module
 
 %Identity.v1 (x: f32[]) -> f32[] {
   ROOT %x = f32[] parameter(0)
@@ -307,7 +307,7 @@ ENTRY %CallR0F32IdentityScalar.v2 () -> f32[] {
 // reduce window
 {
 "ReduceWindow",
-R"(HloModule R4UnitWindow_module:
+R"(HloModule R4UnitWindow_module
 
 %add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] {
   %lhs = f32[] parameter(0)
@@ -326,7 +326,7 @@ ENTRY %R4UnitWindow.v3 (operand: f32[13,12,8,15]) -> f32[13,3,8,15] {
 // reduce window on scalar
 {
 "ReduceWindowScalar",
-R"(HloModule reduce_window_scalar:
+R"(HloModule reduce_window_scalar
 
 %add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] {
   %lhs = f32[] parameter(0)
@@ -345,7 +345,7 @@ ENTRY %R4UnitWindowScalar () -> f32[] {
 // convolution
 {
 "Convolution",
-R"(HloModule Convolve1D1Window_0_module:
+R"(HloModule Convolve1D1Window_0_module
 
 ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] {
   %input = f32[1,2,1]{2,1,0} parameter(0)
@@ -359,7 +359,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2
 // convolution rank 2
 {
 "ConvolutionR2",
-R"(HloModule ConvolveR2_module:
+R"(HloModule ConvolveR2_module
 
 ENTRY %ConvolveR2.v3 (input: f32[1,2], filter: f32[1,1]) -> f32[1,2] {
   %input = f32[1,2]{1,0} parameter(0)
@@ -372,7 +372,7 @@ ENTRY %ConvolveR2.v3 (input: f32[1,2], filter: f32[1,1]) -> f32[1,2] {
 // convolution backward
 {
 "ConvolutionBackward",
-R"(HloModule ConvolveBackward_module:
+R"(HloModule ConvolveBackward_module
 
 ENTRY %ConvolveBackward (input: f32[128,7,7,512], filter: f32[3,3,512,512]) -> f32[128,14,14,512] {
   %input = f32[128,7,7,512]{0,3,2,1} parameter(0)
@@ -385,7 +385,7 @@ ENTRY %ConvolveBackward (input: f32[128,7,7,512], filter: f32[3,3,512,512]) -> f
 // reverse(constant)
 {
 "Reverse4D",
-R"(HloModule Reverse4DFloatArrayOnDim01_module:
+R"(HloModule Reverse4DFloatArrayOnDim01_module
 
 ENTRY %Reverse4DFloatArrayOnDim01.v2 () -> f32[4,3,2,1] {
   %constant = f32[4,3,2,1]{0,1,2,3} constant(f32[4,3,2,1] { { /*i0=0*/ { /*i1=0*/ {1}, {2} }, { /*i1=1*/ {3}, {4} }, { /*i1=2*/ {5}, {6} } }, { /*i0=1*/ { /*i1=0*/ {7}, {8} }, { /*i1=1*/ {9}, {10} }, { /*i1=2*/ {11}, {12} } }, { /*i0=2*/ { /*i1=0*/ {13}, {14} }, { /*i1=1*/ {15}, {16} }, { /*i1=2*/ {17}, {18} } }, { /*i0=3*/ { /*i1=0*/ {19}, {20} }, { /*i1=1*/ {21}, {22} }, { /*i1=2*/ {23}, {24} } } })
@@ -397,7 +397,7 @@ ENTRY %Reverse4DFloatArrayOnDim01.v2 () -> f32[4,3,2,1] {
 // concat
 {
 "Concat",
-R"(HloModule Concat2x3With2x5_module:
+R"(HloModule Concat2x3With2x5_module
 
 ENTRY %Concat2x3With2x5.v3 () -> f32[2,8] {
   %constant = f32[2,3]{1,0} constant(f32[2,3] { { 0, 1, 2 }, { 1000, 1001, 1002 } })
@@ -410,7 +410,7 @@ ENTRY %Concat2x3With2x5.v3 () -> f32[2,8] {
 // select and scatter
 {
 "SelectAndScatter",
-R"(HloModule R4F32OverlapSmall_module:
+R"(HloModule R4F32OverlapSmall_module
 
 %ge_F32.v3 (lhs: f32[], rhs: f32[]) -> pred[] {
   %lhs = f32[] parameter(0)
@@ -436,7 +436,7 @@ ENTRY %R4F32OverlapSmall.v4 () -> f32[4,5,1,1] {
 // select and scatter on scalar
 {
 "SelectAndScatterScalar",
-R"(HloModule select_and_scatter_scalar:
+R"(HloModule select_and_scatter_scalar
 
 %ge_F32.v3 (lhs: f32[], rhs: f32[]) -> pred[] {
   %lhs = f32[] parameter(0)
@@ -462,7 +462,7 @@ ENTRY %SelectAndScatterScalar () -> f32[] {
 // slice
 {
 "Slice",
-R"(HloModule slice_module:
+R"(HloModule slice_module
 
 ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] {
   %p0 = f32[3,3,4,4]{3,2,1,0} parameter(0)
@@ -474,7 +474,7 @@ ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] {
 // slice, no stride
 {
 "SliceNoStride",
-R"(HloModule Slice3x3x3_To_1x3x3_F32_module:
+R"(HloModule Slice3x3x3_To_1x3x3_F32_module
 
 ENTRY %Slice3x3x3_To_1x3x3_F32.v2 () -> f32[1,3,3] {
   %constant = f32[3,3,3]{2,1,0} constant(f32[3,3,3] { { { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 } }, { { 9, 10, 11 }, { 12, 13, 14 }, { 15, 16, 17 } }, { { 18, 19, 20 }, { 21, 22, 23 }, { 24, 25, 26 } } })
@@ -486,7 +486,7 @@ ENTRY %Slice3x3x3_To_1x3x3_F32.v2 () -> f32[1,3,3] {
 // slice R0
 {
 "SliceR0",
-R"(HloModule SliceR0_module:
+R"(HloModule SliceR0_module
 
 ENTRY %SliceR0.v2 () -> s32[] {
   %constant = s32[] constant(1)
@@ -498,7 +498,7 @@ ENTRY %SliceR0.v2 () -> s32[] {
 // transpose
 {
 "Transpose",
-R"(HloModule Transpose_module:
+R"(HloModule Transpose_module
 
 ENTRY %Transpose.v2 () -> s32[1,2,3] {
   %constant = s32[1,2,3]{2,1,0} constant(s32[1,2,3] { { { 1, 2, 3 }, { 4, 5, 6 } } })
@@ -510,7 +510,7 @@ ENTRY %Transpose.v2 () -> s32[1,2,3] {
 // Dynamic slice
 {
 "DynamicSlice",
-R"(HloModule DynamicSlice_module:
+R"(HloModule DynamicSlice_module
 
 ENTRY %DynamicSlice.v5 (original_parameter: s32[2,2,258], start_index: s32[1]) -> s32[2,2,258] {
   %original_parameter = s32[2,2,258]{2,1,0} parameter(0)
@@ -525,7 +525,7 @@ ENTRY %DynamicSlice.v5 (original_parameter: s32[2,2,258], start_index: s32[1]) -
 // Dynamic update slice
 {
 "DynamicUpdateSlice",
-R"(HloModule DynamicUpdateSlice_module:
+R"(HloModule DynamicUpdateSlice_module
 
 ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_indices: s32[4]) -> s32[1,1,25,1] {
   %input = s32[1,1,25,1]{3,2,1,0} parameter(0)
@@ -539,7 +539,7 @@ ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_
 // batch norm training
 {
 "BatchNormTraining",
-R"(HloModule BasicTraining_module:
+R"(HloModule BasicTraining_module
 
 ENTRY %BasicTraining.v4 () -> (f32[2,2,1,2], f32[2], f32[2]) {
   %constant = f32[2,2,1,2]{3,2,1,0} constant(f32[2,2,1,2] { { /*i0=0*/ { /*i1=0*/ {1, 2} }, { /*i1=1*/ {3, 4} } }, { /*i0=1*/ { /*i1=0*/ {5, 6} }, { /*i1=1*/ {7, 8} } } })
@@ -553,7 +553,7 @@ ENTRY %BasicTraining.v4 () -> (f32[2,2,1,2], f32[2], f32[2]) {
 // batch norm inference
 {
 "BatchNormInference",
-R"(HloModule BatchNormInference_module:
+R"(HloModule BatchNormInference_module
 
 ENTRY %BatchNormInference.v6 (input: f32[2,2,2,2], offset: f32[2], scale: f32[2], mean: f32[2], variance: f32[2]) -> f32[2,2,2,2] {
   %input = f32[2,2,2,2]{3,2,1,0} parameter(0)
@@ -569,7 +569,7 @@ ENTRY %BatchNormInference.v6 (input: f32[2,2,2,2], offset: f32[2], scale: f32[2]
 // batch norm grad
 {
 "BatchNormGrad",
-R"(HloModule BatchNormGrad_module:
+R"(HloModule BatchNormGrad_module
 
 ENTRY %BatchNormGrad.v4 (input: f32[2,2,2,2], scale: f32[2], mean: f32[2], variance: f32[2], grad_output: f32[2,2,2,2]) -> (f32[2,2,2,2], f32[2], f32[2]) {
   %input = f32[2,2,2,2]{3,2,1,0} parameter(0)
@@ -585,7 +585,7 @@ ENTRY %BatchNormGrad.v4 (input: f32[2,2,2,2], scale: f32[2], mean: f32[2], varia
 // pad
 {
 "Pad",
-R"(HloModule Pad1DS3Array_module:
+R"(HloModule Pad1DS3Array_module
 
 ENTRY %Pad1DS3Array.v3 () -> f32[8] {
   %constant = f32[3]{0} constant({1, 2, 3})
@@ -598,7 +598,7 @@ ENTRY %Pad1DS3Array.v3 () -> f32[8] {
 // pad has interior
 {
 "PadHasInterior",
-R"(HloModule PadHasInterior_module:
+R"(HloModule PadHasInterior_module
 
 ENTRY %PadHasInterior.v3 (input: f32[1,25,7,7]) -> f32[1,25,17,11] {
   %input = f32[1,25,7,7]{3,2,1,0} parameter(0)
@@ -611,7 +611,7 @@ ENTRY %PadHasInterior.v3 (input: f32[1,25,7,7]) -> f32[1,25,17,11] {
 // fusion
 {
 "Fusion",
-R"(HloModule fusion_module:
+R"(HloModule fusion_module
 
 %fused_computation (constant.param_0: f32[3,2,1,1], constant.1.param_1: f32[2]) -> f32[3,2,1,1] {
   %constant.param_0 = f32[3,2,1,1]{3,2,1,0} parameter(0)
@@ -638,18 +638,18 @@ std::vector<TestData> CreateShortTestCases() {
 // map
 {
 "Map",
-R"(HloModule MapBinaryAdder_module:
+R"(HloModule MapBinaryAdder_module
 
-%add_F32.v3 {
-  %lhs = f32[] parameter(0)
-  %rhs = f32[] parameter(1)
-  ROOT %add = f32[] add(%lhs, %rhs)
+add_F32.v3 {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT add = f32[] add(lhs, rhs)
 }
 
-ENTRY %MapBinaryAdder.v3 {
-  %param0 = f32[4]{0} parameter(0)
-  %param1 = f32[4]{0} parameter(1)
-  ROOT %map = f32[4]{0} map(%param0, %param1), to_apply=%add_F32.v3
+ENTRY MapBinaryAdder.v3 {
+  param0 = f32[4]{0} parameter(0)
+  param1 = f32[4]{0} parameter(1)
+  ROOT map = f32[4]{0} map(param0, param1), to_apply=add_F32.v3
 }
 
 )"
@@ -657,18 +657,18 @@ ENTRY %MapBinaryAdder.v3 {
 // reduce
 {
 "Reduce",
-R"(HloModule ReduceR3ToR2_module:
+R"(HloModule ReduceR3ToR2_module
 
-%add_F32.v3 {
-  %lhs = f32[] parameter(0)
-  %rhs = f32[] parameter(1)
-  ROOT %add = f32[] add(%lhs, %rhs)
+add_F32.v3 {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT add = f32[] add(lhs, rhs)
 }
 
-ENTRY %ReduceR3ToR2.v3 {
-  %input = f32[8,16,256]{2,1,0} parameter(0)
-  %constant = f32[] constant(0)
-  ROOT %reduce = f32[8,16]{1,0} reduce(%input, %constant), dimensions={2}, to_apply=%add_F32.v3
+ENTRY ReduceR3ToR2.v3 {
+  input = f32[8,16,256]{2,1,0} parameter(0)
+  constant = f32[] constant(0)
+  ROOT reduce = f32[8,16]{1,0} reduce(input, constant), dimensions={2}, to_apply=add_F32.v3
 }
 
 )"
@@ -676,13 +676,13 @@ ENTRY %ReduceR3ToR2.v3 {
 // infeed/outfeed
 {
 "InfeedOutfeed",
-R"(HloModule outfeed_module:
+R"(HloModule outfeed_module
 
-ENTRY %InfeedToOutfeed {
-  %infeed = (u32[3]{0}, pred[]) infeed()
-  %outfeed = () outfeed(%infeed)
-  ROOT %infeed.1 = (u32[3]{0}, pred[]) infeed()
-  %outfeed.1 = () outfeed(%infeed.1)
+ENTRY InfeedToOutfeed {
+  infeed = (u32[3]{0}, pred[]) infeed()
+  outfeed = () outfeed(infeed)
+  ROOT infeed.1 = (u32[3]{0}, pred[]) infeed()
+  outfeed.1 = () outfeed(infeed.1)
 }
 
 )"
@@ -690,12 +690,12 @@ ENTRY %InfeedToOutfeed {
 // Rng
 {
 "Rng",
-R"(HloModule rng_module:
+R"(HloModule rng_module
 
-ENTRY %Rng {
-  %constant = f32[] constant(0)
-  %constant.1 = f32[] constant(1)
-  ROOT %rng = f32[8]{0} rng(%constant, %constant.1), distribution=rng_uniform
+ENTRY Rng {
+  constant = f32[] constant(0)
+  constant.1 = f32[] constant(1)
+  ROOT rng = f32[8]{0} rng(constant, constant.1), distribution=rng_uniform
 }
 
 )"
@@ -703,11 +703,11 @@ ENTRY %Rng {
 // Reduce precision
 {
 "ReducePrevison",
-R"(HloModule reduce_precision:
+R"(HloModule reduce_precision
 
-ENTRY %ReducePrecision {
-  %constant = f32[1]{0} constant({3.14159})
-  ROOT %reduce-precision = f32[1]{0} reduce-precision(%constant), exponent_bits=8, mantissa_bits=10
+ENTRY ReducePrecision {
+  constant = f32[1]{0} constant({3.14159})
+  ROOT reduce-precision = f32[1]{0} reduce-precision(constant), exponent_bits=8, mantissa_bits=10
 }
 
 )"
@@ -715,23 +715,23 @@ ENTRY %ReducePrecision {
 // Conditional
 {
 "Conditional",
-R"(HloModule conditional:
+R"(HloModule conditional
 
-%Negate {
-  %x = f32[] parameter(0)
-  ROOT %negate = f32[] negate(%x)
+Negate {
+  x = f32[] parameter(0)
+  ROOT negate = f32[] negate(x)
 }
 
-%Identity {
-  %y = f32[] parameter(0)
-  ROOT %copy = f32[] copy(%y)
+Identity {
+  y = f32[] parameter(0)
+  ROOT copy = f32[] copy(y)
 }
 
-ENTRY %Parameters1.v4 {
-  %constant = pred[] constant(true)
-  %constant.1 = f32[] constant(56)
-  %constant.2 = f32[] constant(12)
-  ROOT %conditional = f32[] conditional(%constant, %constant.1, %constant.2), true_computation=%Negate, false_computation=%Identity
+ENTRY Parameters1.v4 {
+  constant = pred[] constant(true)
+  constant.1 = f32[] constant(56)
+  constant.2 = f32[] constant(12)
+  ROOT conditional = f32[] conditional(constant, constant.1, constant.2), true_computation=Negate, false_computation=Identity
 }
 
 )"
@@ -739,11 +739,11 @@ ENTRY %Parameters1.v4 {
 // CustomCall
 {
 "CustomCall",
-R"(HloModule custom_call:
+R"(HloModule custom_call
 
-ENTRY %CustomCall {
-  %constant = f32[1]{0} constant({12345})
-  ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(%constant), custom_call_target="foo\"bar"
+ENTRY CustomCall {
+  constant = f32[1]{0} constant({12345})
+  ROOT custom-call = f32[1,2,3]{0,2,1} custom-call(constant), custom_call_target="foo\"bar"
 }
 
 )"
@@ -751,11 +751,11 @@ ENTRY %CustomCall {
 // Variables with non-default names
 {
 "NonDefaultNames",
-R"(HloModule add_constants_module:
+R"(HloModule add_constants_module
 
-ENTRY %add_constants {
-  %foo = f32[] constant(3.14)
-  ROOT %bar = f32[] add(%foo, %foo)
+ENTRY add_constants {
+  foo = f32[] constant(3.14)
+  ROOT bar = f32[] add(foo, foo)
 }
 
 )"
@@ -870,7 +870,7 @@ ENTRY %blabla (x: f32[]) -> pred[] {
 }
 
 TEST_F(HloParserTest, MoreConstants) {
-  const string original = R"(HloModule SelectScalarS32True_module:
+  const string original = R"(HloModule SelectScalarS32True_module
 
 ENTRY %SelectScalarS32True.v4 () -> s32[] {
   %constant.2 = pred[] constant(true)
@@ -887,7 +887,7 @@ ENTRY %SelectScalarS32True.v4 () -> s32[] {
 }
 
 TEST_F(HloParserTest, LiteralDimensionsMismatch_1) {
-  const string original = R"(HloModule some_2_module:
+  const string original = R"(HloModule some_2_module
 
 ENTRY %some_2 () -> f32[2] {
   ROOT %constant = f32[2]{0} constant({1,{2}})
@@ -901,7 +901,7 @@ ENTRY %some_2 () -> f32[2] {
 }
 
 TEST_F(HloParserTest, LiteralDimensionsMismatch_2) {
-  const string original = R"(HloModule some_2x3_module:
+  const string original = R"(HloModule some_2x3_module
 
 ENTRY %some_2x3 () -> f32[2,3] {
   ROOT %constant = f32[2,3]{1,0} constant(f32[2,3] {1, 2, 3, 4, 5, 6})
@@ -915,7 +915,7 @@ ENTRY %some_2x3 () -> f32[2,3] {
 }
 
 TEST_F(HloParserTest, LiteralDimensionsMismatch_3) {
-  const string original = R"(HloModule some_2x3x2_module:
+  const string original = R"(HloModule some_2x3x2_module
 
 ENTRY %some_2x3x2 () -> f32[2,3,2] {
   ROOT %constant = f32[2,3,2]{2,1,0} constant(f32[2,3,2] {{{1, 2}, {3, 4}, {5, 6}, {7, 8}, {9, 10}, {11, 12}}})
@@ -930,7 +930,7 @@ ENTRY %some_2x3x2 () -> f32[2,3,2] {
 
 TEST_F(HloParserTest, ConstantF16Overflow) {
   const string original =
-      R"(HloModule ConstantF16Overflow_module:
+      R"(HloModule ConstantF16Overflow_module
 
 ENTRY %ConstantF16Overflow.v4 () -> f16[] {
   ROOT %constant = f16[] constant(-65505)
@@ -944,7 +944,7 @@ ENTRY %ConstantF16Overflow.v4 () -> f16[] {
 }
 
 TEST_F(HloParserTest, ConstantWithExp) {
-  const string original = R"(HloModule ConstantWithExp_module:
+  const string original = R"(HloModule ConstantWithExp_module
 
 ENTRY %ConstantWithExp.v4 () -> f32[] {
   %constant.1 = f32[] constant(3e+2)
@@ -959,7 +959,7 @@ ENTRY %ConstantWithExp.v4 () -> f32[] {
 }
 
 TEST_F(HloParserTest, AttibutesAnyOrder) {
-  const string original = R"(HloModule any_order_module:
+  const string original = R"(HloModule any_order_module
 
 ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] {
   %input = f32[1,2,1]{2,1,0} parameter(0)
@@ -973,7 +973,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2
 }
 
 TEST_F(HloParserTest, InvalidDimLabels) {
-  string prefix = R"(HloModule invalid_dim_labels_module:
+  string prefix = R"(HloModule invalid_dim_labels_module
 
 ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] {
   %input = f32[1,2,1]{2,1,0} parameter(0)
@@ -997,7 +997,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2
 }
 
 TEST_F(HloParserTest, UnexpectedAttribute) {
-  const string original = R"(HloModule unexpected_attr_module:
+  const string original = R"(HloModule unexpected_attr_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
   %recv = (f32[], u32[]) recv(), channel_id=15
@@ -1013,7 +1013,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
 }
 
 TEST_F(HloParserTest, MissingAttribute) {
-  const string original = R"(HloModule missing_attr_module:
+  const string original = R"(HloModule missing_attr_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
   %recv = (f32[], u32[]) recv(), channel_id=15
@@ -1029,7 +1029,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
 }
 
 TEST_F(HloParserTest, PredecessorUndefined) {
-  const string original = R"(HloModule pre_not_found_module:
+  const string original = R"(HloModule pre_not_found_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
   %recv = (f32[], u32[]) recv(), channel_id=15
@@ -1045,7 +1045,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
 }
 
 TEST_F(HloParserTest, SliceAllowOmitStride1) {
-  const string original = R"(HloModule slice_module:
+  const string original = R"(HloModule slice_module
 
 ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] {
   %p0 = f32[3,3,4,4]{3,2,1,0} parameter(0)
@@ -1057,7 +1057,7 @@ ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] {
 }
 
 TEST_F(HloParserTest, PaddingConfigIsNotWindowPad) {
-  const string original = R"(HloModule window_pad_module:
+  const string original = R"(HloModule window_pad_module
 
 ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] {
   %input = f32[1,2,1]{2,1,0} parameter(0)
@@ -1072,7 +1072,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2
 }
 
 TEST_F(HloParserTest, CommaBetweenSubAttributes) {
-  const string original = R"(HloModule test_comma_module:
+  const string original = R"(HloModule test_comma_module
 
 ENTRY %test_comma.v4 () -> f32[] {
   ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="::const"}
-- 
GitLab


From dbcb1ffcca6a3c52e3c109a1739018350bc41925 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Thu, 14 Dec 2017 21:26:55 -0800
Subject: [PATCH 1046/1225] Support BatchToSpaceND in TFLite

The internal implementation only support 4D tensors for now.
The dimension has to be 1 batch + 2 spatial + 1 other.
The most common format within this restriction is NHWC.

Cropping is not supported by the internal implementation.

PiperOrigin-RevId: 179143332
---
 tensorflow/contrib/lite/builtin_op_data.h     |  11 +
 tensorflow/contrib/lite/kernels/BUILD         |  13 +
 .../contrib/lite/kernels/batch_to_space_nd.cc | 161 ++++++++++++
 .../lite/kernels/batch_to_space_nd_test.cc    |  78 ++++++
 tensorflow/contrib/lite/kernels/register.cc   |   2 +
 tensorflow/contrib/lite/model.cc              |  18 ++
 tensorflow/contrib/lite/nnapi_delegate.cc     |   1 +
 tensorflow/contrib/lite/schema/schema.fbs     |   8 +
 .../contrib/lite/schema/schema_generated.h    | 234 +++++++++++++++++-
 tensorflow/contrib/lite/testing/BUILD         |   1 +
 .../contrib/lite/testing/generate_examples.py |  42 ++++
 .../testing/generated_examples_zip_test.cc    |   1 +
 tensorflow/contrib/lite/toco/BUILD            |   1 +
 .../graph_transformations.h                   |   1 +
 .../resolve_batch_to_space_nd_attributes.cc   |  70 ++++++
 tensorflow/contrib/lite/toco/model.h          |   4 +
 .../contrib/lite/toco/tflite/operator.cc      |  34 +++
 .../contrib/lite/toco/tflite/operator_test.cc |  13 +
 tensorflow/contrib/lite/toco/toco_tooling.cc  |   1 +
 19 files changed, 688 insertions(+), 6 deletions(-)
 create mode 100644 tensorflow/contrib/lite/kernels/batch_to_space_nd.cc
 create mode 100644 tensorflow/contrib/lite/kernels/batch_to_space_nd_test.cc
 mode change 100644 => 100755 tensorflow/contrib/lite/schema/schema_generated.h
 create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc

diff --git a/tensorflow/contrib/lite/builtin_op_data.h b/tensorflow/contrib/lite/builtin_op_data.h
index 548864a1e9..5c6f3016b1 100644
--- a/tensorflow/contrib/lite/builtin_op_data.h
+++ b/tensorflow/contrib/lite/builtin_op_data.h
@@ -104,6 +104,17 @@ typedef struct {
   TfLiteFusedActivation activation;
 } TfLiteAddParams;
 
+typedef struct {
+  // Number of spatial dimensions.
+  // For now only NHWC is supported, and the value should always be 2.
+  int num_spatial_dimensions;
+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
+  // For now we will fix the maximum possible number of dimensions.
+  int block_shape[2];
+  int before_crops[2];
+  int after_crops[2];
+} TfLiteBatchToSpaceNDParams;
+
 typedef struct {
   TfLiteFusedActivation activation;
 } TfLiteMulParams;
diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index 3908960c33..cc02cddb3d 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -77,6 +77,7 @@ cc_library(
         "activations.cc",
         "add.cc",
         "basic_rnn.cc",
+        "batch_to_space_nd.cc",
         "concatenation.cc",
         "conv.cc",
         "depthwise_conv.cc",
@@ -156,6 +157,18 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "batch_to_space_nd_test",
+    size = "small",
+    srcs = ["batch_to_space_nd_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 tf_cc_test(
     name = "concatenation_test",
     size = "small",
diff --git a/tensorflow/contrib/lite/kernels/batch_to_space_nd.cc b/tensorflow/contrib/lite/kernels/batch_to_space_nd.cc
new file mode 100644
index 0000000000..0eed680fdc
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/batch_to_space_nd.cc
@@ -0,0 +1,161 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <string.h>
+#include <vector>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace batch_to_space_nd {
+
+// This file has two implementations of BatchToSpaceND.
+enum KernelType {
+  kReference,
+  kGenericOptimized,
+};
+
+struct BatchToSpaceNDContext {
+  BatchToSpaceNDContext(TfLiteContext* context, TfLiteNode* node) {
+    params = reinterpret_cast<TfLiteBatchToSpaceNDParams*>(node->builtin_data);
+    input = GetInput(context, node, 0);
+    output = GetOutput(context, node, 0);
+  }
+  TfLiteBatchToSpaceNDParams* params;
+  TfLiteTensor* input;
+  TfLiteTensor* output;
+};
+
+// Currently, only 4D NHWC input/output op_context are supported.
+// The 4D array need to have exactly 2 spatial dimensions.
+// TODO(ycling): Support arbitrary dimension in BatchToSpaceND.
+const int kInputDimensionNum = 4;
+const int kOutputDimensionNum = 4;
+const int kSpatialDimensionNum = 2;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  // The 2nd tensor (block_shape) and the 3rd tensor (crops) are ignored now.
+  TF_LITE_ENSURE(context, NumInputs(node) >= 1 && NumInputs(node) <= 3);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  BatchToSpaceNDContext op_context(context, node);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(op_context.input),
+                    kInputDimensionNum);
+  TF_LITE_ENSURE_EQ(context, op_context.params->num_spatial_dimensions,
+                    kSpatialDimensionNum);
+  TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
+
+  const TfLiteIntArray* input_size = op_context.input->dims;
+  const int* block_shape = op_context.params->block_shape;
+
+  // Number of batch must be multiple of (block_shape[0] * block_shape[1]).
+  TF_LITE_ENSURE_EQ(context,
+                    input_size->data[0] % (block_shape[0] * block_shape[1]), 0);
+
+  const int output_batch_size =
+      input_size->data[0] / (block_shape[0] * block_shape[1]);
+  const int output_height = input_size->data[1] * block_shape[0];
+  const int output_width = input_size->data[2] * block_shape[1];
+  const int output_channel_size = input_size->data[3];
+
+  TfLiteIntArray* output_size = TfLiteIntArrayCreate(kOutputDimensionNum);
+  output_size->data[0] = output_batch_size;
+  output_size->data[1] = output_height;
+  output_size->data[2] = output_width;
+  output_size->data[3] = output_channel_size;
+
+  return context->ResizeTensor(context, op_context.output, output_size);
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  BatchToSpaceNDContext op_context(context, node);
+
+  int block_shape_dims_array[1] = {kSpatialDimensionNum};
+  Dims<4> block_shape_dims = GetTensorDims(block_shape_dims_array, 1);
+
+#define TF_LITE_BATCH_TO_SPACE_ND(type, scalar)                          \
+  type::BatchToSpaceND(GetTensorData<scalar>(op_context.input),          \
+                       GetTensorDims(op_context.input),                  \
+                       op_context.params->block_shape, block_shape_dims, \
+                       GetTensorData<scalar>(op_context.output),         \
+                       GetTensorDims(op_context.output))
+  switch (op_context.input->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      if (kernel_type == kReference) {
+        TF_LITE_BATCH_TO_SPACE_ND(reference_ops, float);
+      } else {
+        TF_LITE_BATCH_TO_SPACE_ND(optimized_ops, float);
+      }
+      break;
+    case kTfLiteUInt8:
+      if (kernel_type == kReference) {
+        TF_LITE_BATCH_TO_SPACE_ND(reference_ops, uint8_t);
+      } else {
+        TF_LITE_BATCH_TO_SPACE_ND(optimized_ops, uint8_t);
+      }
+      break;
+    case kTfLiteInt32:
+      if (kernel_type == kReference) {
+        TF_LITE_BATCH_TO_SPACE_ND(reference_ops, int32_t);
+      } else {
+        TF_LITE_BATCH_TO_SPACE_ND(optimized_ops, int32_t);
+      }
+      break;
+    case kTfLiteInt64:
+      if (kernel_type == kReference) {
+        TF_LITE_BATCH_TO_SPACE_ND(reference_ops, int64_t);
+      } else {
+        TF_LITE_BATCH_TO_SPACE_ND(optimized_ops, int64_t);
+      }
+      break;
+    default:
+      context->ReportError(context,
+                           "Type is currently not supported by BatchToSpace.");
+      return kTfLiteError;
+  }
+#undef TF_LITE_BATCH_TO_SPACE_ND
+  return kTfLiteOk;
+}
+
+}  // namespace batch_to_space_nd
+
+TfLiteRegistration* Register_BATCH_TO_SPACE_ND_REF() {
+  static TfLiteRegistration r = {
+      nullptr, nullptr, batch_to_space_nd::Prepare,
+      batch_to_space_nd::Eval<batch_to_space_nd::kReference>};
+  return &r;
+}
+
+TfLiteRegistration* Register_BATCH_TO_SPACE_ND_GENERIC_OPT() {
+  static TfLiteRegistration r = {
+      nullptr, nullptr, batch_to_space_nd::Prepare,
+      batch_to_space_nd::Eval<batch_to_space_nd::kGenericOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_BATCH_TO_SPACE_ND() {
+  return Register_BATCH_TO_SPACE_ND_GENERIC_OPT();
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/batch_to_space_nd_test.cc b/tensorflow/contrib/lite/kernels/batch_to_space_nd_test.cc
new file mode 100644
index 0000000000..3ec4efbebc
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/batch_to_space_nd_test.cc
@@ -0,0 +1,78 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class BatchToSpaceNDOpModel : public SingleOpModel {
+ public:
+  BatchToSpaceNDOpModel(std::initializer_list<int> input_shape,
+                        std::initializer_list<int> block_shape,
+                        std::initializer_list<int> before_crops,
+                        std::initializer_list<int> after_crops) {
+    input_ = AddInput(TensorType_FLOAT32);
+    output_ = AddOutput(TensorType_FLOAT32);
+    SetBuiltinOp(BuiltinOperator_BATCH_TO_SPACE_ND,
+                 BuiltinOptions_BatchToSpaceNDOptions,
+                 CreateBatchToSpaceNDOptions(
+                     builder_, builder_.CreateVector<int>(block_shape),
+                     builder_.CreateVector<int>(before_crops),
+                     builder_.CreateVector<int>(after_crops))
+                     .Union());
+    BuildInterpreter({input_shape});
+  }
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor<float>(input_, data);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST(BatchToSpaceNDOpTest, SimpleTest) {
+  BatchToSpaceNDOpModel m({4, 2, 2, 1}, {2, 2}, {0, 0}, {0, 0});
+  m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 5, 2, 6, 9, 13, 10, 14, 3, 7,
+                                               4, 8, 11, 15, 12, 16}));
+}
+
+TEST(BatchToSpaceNDOpTest, InvalidShapeTest) {
+  EXPECT_DEATH(BatchToSpaceNDOpModel({3, 2, 2, 1}, {2, 2}, {0, 0}, {0, 0}),
+               "Cannot allocate tensors");
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc
index 3d1edeef01..d4e7503f48 100644
--- a/tensorflow/contrib/lite/kernels/register.cc
+++ b/tensorflow/contrib/lite/kernels/register.cc
@@ -40,6 +40,7 @@ TfLiteRegistration* Register_HASHTABLE_LOOKUP();
 TfLiteRegistration* Register_SOFTMAX();
 TfLiteRegistration* Register_CONCATENATION();
 TfLiteRegistration* Register_ADD();
+TfLiteRegistration* Register_BATCH_TO_SPACE_ND();
 TfLiteRegistration* Register_MUL();
 TfLiteRegistration* Register_L2_NORMALIZATION();
 TfLiteRegistration* Register_LOCAL_RESPONSE_NORMALIZATION();
@@ -75,6 +76,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX());
   AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION());
   AddBuiltin(BuiltinOperator_ADD, Register_ADD());
+  AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND, Register_BATCH_TO_SPACE_ND());
   AddBuiltin(BuiltinOperator_MUL, Register_MUL());
   AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
   AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index 4ef2c942c1..94e22b2659 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -518,6 +518,24 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type,
       builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_BATCH_TO_SPACE_ND: {
+      auto* params = MallocPOD<TfLiteBatchToSpaceNDParams>();
+      if (auto* schema_params =
+              op->builtin_options_as_BatchToSpaceNDOptions()) {
+        const auto& block_shape = schema_params->block_shape();
+        FlatBufferIntVectorToArray(sizeof(params->block_shape), block_shape,
+                                   params->block_shape, error_reporter);
+        const auto& before_crops = schema_params->before_crops();
+        FlatBufferIntVectorToArray(sizeof(params->before_crops), before_crops,
+                                   params->before_crops, error_reporter);
+        const auto& after_crops = schema_params->after_crops();
+        FlatBufferIntVectorToArray(sizeof(params->after_crops), after_crops,
+                                   params->after_crops, error_reporter);
+        params->num_spatial_dimensions = block_shape->Length();
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
   }
   return builtin_data;
 }
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 6b93a70bff..5cb0afcea0 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -307,6 +307,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       case tflite::BuiltinOperator_SKIP_GRAM:
       case tflite::BuiltinOperator_RELU1:
       case tflite::BuiltinOperator_GATHER:
+      case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
         FATAL("Op code %d is currently not delegated to NNAPI", builtin);
         nn_op_type = -1;  // set to invalid
         break;
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index 8b48543fc8..cc31e03dfc 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -107,6 +107,7 @@ enum BuiltinOperator : byte {
   PAD = 34,
   UNIDIRECTIONAL_SEQUENCE_RNN = 35,
   GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
 }
 
 // Options for the builtin operators.
@@ -134,6 +135,7 @@ union BuiltinOptions {
   MulOptions,
   PadOptions,
   GatherOptions,
+  BatchToSpaceNDOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -258,6 +260,12 @@ table ReshapeOptions {
   new_shape:[int];
 }
 
+table BatchToSpaceNDOptions {
+  block_shape:[int];
+  before_crops:[int];
+  after_crops:[int];
+}
+
 table SkipGramOptions {
   ngram_size: int;
   max_skip_size: int;
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
old mode 100644
new mode 100755
index 7de205e1e4..aa169198fe
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -1,3 +1,4 @@
+
 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -85,6 +86,9 @@ struct PadOptionsT;
 struct ReshapeOptions;
 struct ReshapeOptionsT;
 
+struct BatchToSpaceNDOptions;
+struct BatchToSpaceNDOptionsT;
+
 struct SkipGramOptions;
 struct SkipGramOptionsT;
 
@@ -176,11 +180,12 @@ enum BuiltinOperator {
   BuiltinOperator_PAD = 34,
   BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
   BuiltinOperator_GATHER = 36,
+  BuiltinOperator_BATCH_TO_SPACE_ND = 37,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_GATHER
+  BuiltinOperator_MAX = BuiltinOperator_BATCH_TO_SPACE_ND
 };
 
-inline BuiltinOperator (&EnumValuesBuiltinOperator())[34] {
+inline BuiltinOperator (&EnumValuesBuiltinOperator())[35] {
   static BuiltinOperator values[] = {
       BuiltinOperator_ADD,
       BuiltinOperator_AVERAGE_POOL_2D,
@@ -215,7 +220,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[34] {
       BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
       BuiltinOperator_PAD,
       BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
-      BuiltinOperator_GATHER};
+      BuiltinOperator_GATHER,
+      BuiltinOperator_BATCH_TO_SPACE_ND};
   return values;
 }
 
@@ -257,6 +263,7 @@ inline const char **EnumNamesBuiltinOperator() {
                                 "PAD",
                                 "UNIDIRECTIONAL_SEQUENCE_RNN",
                                 "GATHER",
+                                "BATCH_TO_SPACE_ND",
                                 nullptr};
   return names;
 }
@@ -291,11 +298,12 @@ enum BuiltinOptions {
   BuiltinOptions_MulOptions = 21,
   BuiltinOptions_PadOptions = 22,
   BuiltinOptions_GatherOptions = 23,
+  BuiltinOptions_BatchToSpaceNDOptions = 24,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_GatherOptions
+  BuiltinOptions_MAX = BuiltinOptions_BatchToSpaceNDOptions
 };
 
-inline BuiltinOptions (&EnumValuesBuiltinOptions())[24] {
+inline BuiltinOptions (&EnumValuesBuiltinOptions())[25] {
   static BuiltinOptions values[] = {
       BuiltinOptions_NONE,
       BuiltinOptions_Conv2DOptions,
@@ -320,7 +328,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[24] {
       BuiltinOptions_EmbeddingLookupSparseOptions,
       BuiltinOptions_MulOptions,
       BuiltinOptions_PadOptions,
-      BuiltinOptions_GatherOptions};
+      BuiltinOptions_GatherOptions,
+      BuiltinOptions_BatchToSpaceNDOptions};
   return values;
 }
 
@@ -349,6 +358,7 @@ inline const char **EnumNamesBuiltinOptions() {
                                 "MulOptions",
                                 "PadOptions",
                                 "GatherOptions",
+                                "BatchToSpaceNDOptions",
                                 nullptr};
   return names;
 }
@@ -482,6 +492,11 @@ struct BuiltinOptionsTraits<GatherOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
 };
 
+template <>
+struct BuiltinOptionsTraits<BatchToSpaceNDOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -759,6 +774,16 @@ struct BuiltinOptionsUnion {
                ? reinterpret_cast<const GatherOptionsT *>(value)
                : nullptr;
   }
+  BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() {
+    return type == BuiltinOptions_BatchToSpaceNDOptions
+               ? reinterpret_cast<BatchToSpaceNDOptionsT *>(value)
+               : nullptr;
+  }
+  const BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() const {
+    return type == BuiltinOptions_BatchToSpaceNDOptions
+               ? reinterpret_cast<const BatchToSpaceNDOptionsT *>(value)
+               : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj,
@@ -2512,6 +2537,101 @@ flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(
     flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
     const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct BatchToSpaceNDOptionsT : public flatbuffers::NativeTable {
+  typedef BatchToSpaceNDOptions TableType;
+  std::vector<int32_t> block_shape;
+  std::vector<int32_t> before_crops;
+  std::vector<int32_t> after_crops;
+  BatchToSpaceNDOptionsT() {}
+};
+
+struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef BatchToSpaceNDOptionsT NativeTableType;
+  enum { VT_BLOCK_SHAPE = 4, VT_BEFORE_CROPS = 6, VT_AFTER_CROPS = 8 };
+  const flatbuffers::Vector<int32_t> *block_shape() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_SHAPE);
+  }
+  const flatbuffers::Vector<int32_t> *before_crops() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BEFORE_CROPS);
+  }
+  const flatbuffers::Vector<int32_t> *after_crops() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_AFTER_CROPS);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_BLOCK_SHAPE) &&
+           verifier.Verify(block_shape()) &&
+           VerifyOffset(verifier, VT_BEFORE_CROPS) &&
+           verifier.Verify(before_crops()) &&
+           VerifyOffset(verifier, VT_AFTER_CROPS) &&
+           verifier.Verify(after_crops()) && verifier.EndTable();
+  }
+  BatchToSpaceNDOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      BatchToSpaceNDOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BatchToSpaceNDOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BatchToSpaceNDOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_block_shape(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_shape) {
+    fbb_.AddOffset(BatchToSpaceNDOptions::VT_BLOCK_SHAPE, block_shape);
+  }
+  void add_before_crops(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> before_crops) {
+    fbb_.AddOffset(BatchToSpaceNDOptions::VT_BEFORE_CROPS, before_crops);
+  }
+  void add_after_crops(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> after_crops) {
+    fbb_.AddOffset(BatchToSpaceNDOptions::VT_AFTER_CROPS, after_crops);
+  }
+  explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
+  flatbuffers::Offset<BatchToSpaceNDOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_shape = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> before_crops = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> after_crops = 0) {
+  BatchToSpaceNDOptionsBuilder builder_(_fbb);
+  builder_.add_after_crops(after_crops);
+  builder_.add_before_crops(before_crops);
+  builder_.add_block_shape(block_shape);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions>
+CreateBatchToSpaceNDOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *block_shape = nullptr,
+    const std::vector<int32_t> *before_crops = nullptr,
+    const std::vector<int32_t> *after_crops = nullptr) {
+  return tflite::CreateBatchToSpaceNDOptions(
+      _fbb, block_shape ? _fbb.CreateVector<int32_t>(*block_shape) : 0,
+      before_crops ? _fbb.CreateVector<int32_t>(*before_crops) : 0,
+      after_crops ? _fbb.CreateVector<int32_t>(*after_crops) : 0);
+}
+
+flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct SkipGramOptionsT : public flatbuffers::NativeTable {
   typedef SkipGramOptions TableType;
   int32_t ngram_size;
@@ -3000,6 +3120,12 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
                ? static_cast<const GatherOptions *>(builtin_options())
                : nullptr;
   }
+  const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
+               ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
+               : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -3162,6 +3288,12 @@ inline const GatherOptions *Operator::builtin_options_as<GatherOptions>()
   return builtin_options_as_GatherOptions();
 }
 
+template <>
+inline const BatchToSpaceNDOptions *
+Operator::builtin_options_as<BatchToSpaceNDOptions>() const {
+  return builtin_options_as_BatchToSpaceNDOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -4614,6 +4746,74 @@ inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(
   return tflite::CreateReshapeOptions(_fbb, _new_shape);
 }
 
+inline BatchToSpaceNDOptionsT *BatchToSpaceNDOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new BatchToSpaceNDOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void BatchToSpaceNDOptions::UnPackTo(
+    BatchToSpaceNDOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = block_shape();
+    if (_e) {
+      _o->block_shape.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->block_shape[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = before_crops();
+    if (_e) {
+      _o->before_crops.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->before_crops[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = after_crops();
+    if (_e) {
+      _o->after_crops.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->after_crops[_i] = _e->Get(_i);
+      }
+    }
+  };
+}
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions> BatchToSpaceNDOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBatchToSpaceNDOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BatchToSpaceNDOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _block_shape =
+      _o->block_shape.size() ? _fbb.CreateVector(_o->block_shape) : 0;
+  auto _before_crops =
+      _o->before_crops.size() ? _fbb.CreateVector(_o->before_crops) : 0;
+  auto _after_crops =
+      _o->after_crops.size() ? _fbb.CreateVector(_o->after_crops) : 0;
+  return tflite::CreateBatchToSpaceNDOptions(_fbb, _block_shape, _before_crops,
+                                             _after_crops);
+}
+
 inline SkipGramOptionsT *SkipGramOptions::UnPack(
     const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new SkipGramOptionsT();
@@ -5265,6 +5465,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier,
       auto ptr = reinterpret_cast<const GatherOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default:
       return false;
   }
@@ -5381,6 +5585,10 @@ inline void *BuiltinOptionsUnion::UnPack(
       auto ptr = reinterpret_cast<const GatherOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default:
       return nullptr;
   }
@@ -5484,6 +5692,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(
       auto ptr = reinterpret_cast<const GatherOptionsT *>(value);
       return CreateGatherOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<const BatchToSpaceNDOptionsT *>(value);
+      return CreateBatchToSpaceNDOptions(_fbb, ptr, _rehasher).Union();
+    }
     default:
       return 0;
   }
@@ -5597,6 +5809,11 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u)
       value = new GatherOptionsT(*reinterpret_cast<GatherOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      value = new BatchToSpaceNDOptionsT(
+          *reinterpret_cast<BatchToSpaceNDOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -5719,6 +5936,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<BatchToSpaceNDOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default:
       break;
   }
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index b63c0c058c..96800304e5 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -18,6 +18,7 @@ gen_zipped_test_files(
     files = [
         "add.zip",
         "avg_pool.zip",
+        "batch_to_space_nd.zip",
         "concat.zip",
         "constant.zip",
         "control_dep.zip",
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 4c01fedb1e..02f59438cd 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -96,6 +96,10 @@ KNOWN_BUGS = {
     r"space_to_depth.*(float16|int32|uint8|int64)": "68018134",
     # Gather doesn't support int64 indices.
     r"gather.*indices_dtype=int64": "XXXX",
+    # BatchToSpaceND doesn't support cropping.
+    r"batch_to_space_nd.*crops=\[\[1,1\],\[1,1\]\]": "70594634",
+    # BatchToSpaceND only supports 4D tensors.
+    r"batch_to_space_nd.*input_shape=\[8,2,2,2,1,1\]": "70594733",
 }
 
 
@@ -1198,6 +1202,43 @@ def make_space_to_depth_tests(zip_path):
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
+def make_batch_to_space_nd_tests(zip_path):
+  """Make a set of tests to do batch_to_space_nd."""
+
+  test_parameters = [
+      {
+          "dtype": [tf.float32, tf.int64, tf.int32],
+          "input_shape": [[12, 2, 2, 1]],
+          "block_shape": [[1, 4], [2, 2], [3, 4]],
+          "crops": [[[0, 0], [0, 0]], [[1, 1], [1, 1]]],
+      },
+      # Non-4D use case: 1 bath dimension, 3 spatial dimensions, 2 others.
+      {
+          "dtype": [tf.float32],
+          "input_shape": [[8, 2, 2, 2, 1, 1]],
+          "block_shape": [[2, 2, 2]],
+          "crops": [[[0, 0], [0, 0], [0, 0]]],
+      },
+  ]
+
+  def build_graph(parameters):
+    input_tensor = tf.placeholder(
+        dtype=parameters["dtype"],
+        name="input",
+        shape=parameters["input_shape"])
+    out = tf.batch_to_space_nd(input_tensor, parameters["block_shape"],
+                               parameters["crops"])
+    return [input_tensor], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    input_values = create_tensor_data(parameters["dtype"],
+                                      parameters["input_shape"])
+    return [input_values], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [input_values])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
 def make_l2_pool(input_tensor, ksize, strides, padding, data_format):
   """Given an input perform a sequence of TensorFlow ops to produce l2pool."""
   return tf.sqrt(tf.nn.avg_pool(
@@ -1226,6 +1267,7 @@ def main(unused_args):
     dispatch = {
         "control_dep.zip": make_control_dep_tests,
         "add.zip": make_add_tests,
+        "batch_to_space_nd.zip": make_batch_to_space_nd_tests,
         "conv.zip": make_conv_tests,
         "constant.zip": make_constant_tests,
         "depthwiseconv.zip": make_depthwiseconv_tests,
diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
index 29f0c68ba4..4c05979e24 100644
--- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
+++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
@@ -241,6 +241,7 @@ TEST_P(OpsTest, RunStuff) {
 
 INSTANTIATE_TESTS(add)
 INSTANTIATE_TESTS(avg_pool)
+INSTANTIATE_TESTS(batch_to_space_nd)
 INSTANTIATE_TESTS(concat)
 INSTANTIATE_TESTS(constant)
 INSTANTIATE_TESTS(control_dep)
diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD
index 78c036fa77..7556a402f9 100644
--- a/tensorflow/contrib/lite/toco/BUILD
+++ b/tensorflow/contrib/lite/toco/BUILD
@@ -202,6 +202,7 @@ cc_library(
         "graph_transformations/remove_trivial_reshape.cc",
         "graph_transformations/remove_unused_op.cc",
         "graph_transformations/resolve_batch_normalization.cc",
+        "graph_transformations/resolve_batch_to_space_nd_attributes.cc",
         "graph_transformations/resolve_constant_binary.cc",
         "graph_transformations/resolve_constant_concatenation.cc",
         "graph_transformations/resolve_constant_fake_quant.cc",
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
index c1dc41170c..2eb244ee08 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
@@ -152,6 +152,7 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFakeQuant)
 DECLARE_GRAPH_TRANSFORMATION(ResolveConstantConcatenation)
 DECLARE_GRAPH_TRANSFORMATION(DropFakeQuant)
 DECLARE_GRAPH_TRANSFORMATION(UnfuseActivationFunctions)
+DECLARE_GRAPH_TRANSFORMATION(ResolveBatchToSpaceNDAttributes)
 DECLARE_GRAPH_TRANSFORMATION(ResolvePadAttributes)
 DECLARE_GRAPH_TRANSFORMATION(ResolveStridedSliceAttributes)
 DECLARE_GRAPH_TRANSFORMATION(ResolveSliceAttributes)
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc
new file mode 100644
index 0000000000..a4f198e92f
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc
@@ -0,0 +1,70 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) {
+  const auto op_it = model->operators.begin() + op_index;
+  if (op_it->get()->type != OperatorType::kBatchToSpaceND) return false;
+
+  auto* op = static_cast<BatchToSpaceNDOperator*>(op_it->get());
+
+  // The attributes are resolved only when the 3 attributes (block_shape,
+  // before_crops, after_crops) are all constant.
+  if (!op->block_shape.empty()) {
+    return false;
+  }
+
+  CHECK_EQ(op->inputs.size(), 3);
+  if (!IsConstantParameterArray(*model, op->inputs[1]) or
+      !IsConstantParameterArray(*model, op->inputs[2]))
+    return false;
+
+  // Handling block_shape.
+  const auto& block_shape_array = *model->arrays[op->inputs[1]];
+  if (!block_shape_array.has_shape()) return false;
+  const std::vector<int>& block_shape_dims = block_shape_array.shape().dims();
+  CHECK_EQ(block_shape_dims.size(), 1);
+  std::vector<int> block_shape_buffer =
+      block_shape_array.GetBuffer<ArrayDataType::kInt32>().data;
+  for (int i = 0; i < block_shape_dims[0]; ++i) {
+    op->block_shape.push_back(block_shape_buffer[i]);
+  }
+
+  // Handling crops.
+  const auto& crops_array = *model->arrays[op->inputs[2]];
+  if (!crops_array.has_shape()) return false;
+  const std::vector<int>& crops_dims = crops_array.shape().dims();
+  CHECK_EQ(crops_dims.size(), 2);
+  std::vector<int> crops_buffer =
+      crops_array.GetBuffer<ArrayDataType::kInt32>().data;
+  for (int i = 0; i < crops_dims[0]; ++i) {
+    op->before_crops.push_back(crops_buffer[i * 2]);
+    op->after_crops.push_back(crops_buffer[i * 2 + 1]);
+  }
+
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index d155d2bb5c..7305f858da 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -1261,6 +1261,10 @@ struct SpaceToBatchNDOperator : Operator {
 // TensorFlow equivalent: BatchToSpaceND
 struct BatchToSpaceNDOperator : Operator {
   BatchToSpaceNDOperator() : Operator(OperatorType::kBatchToSpaceND) {}
+
+  std::vector<int> block_shape;
+  std::vector<int> before_crops;
+  std::vector<int> after_crops;
 };
 
 // Mean operator.
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 7a68c6dbc9..ede6df88ab 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -130,6 +130,37 @@ class Add : public BuiltinOperator<AddOperator, ::tflite::AddOptions,
   }
 };
 
+class BatchToSpaceND
+    : public BuiltinOperator<BatchToSpaceNDOperator,
+                             ::tflite::BatchToSpaceNDOptions,
+                             ::tflite::BuiltinOptions_BatchToSpaceNDOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    auto block_shape = builder->CreateVector(op.block_shape);
+    auto before_crops = builder->CreateVector(op.before_crops);
+    auto after_crops = builder->CreateVector(op.after_crops);
+    return ::tflite::CreateBatchToSpaceNDOptions(*builder, block_shape,
+                                                 before_crops, after_crops);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->block_shape.insert(op->block_shape.end(),
+                           options.block_shape()->begin(),
+                           options.block_shape()->end());
+    op->before_crops.insert(op->before_crops.end(),
+                            options.before_crops()->begin(),
+                            options.before_crops()->end());
+    op->after_crops.insert(op->after_crops.end(),
+                           options.after_crops()->begin(),
+                           options.after_crops()->end());
+  }
+};
+
 class Cast : public CustomOperator<CastOperator> {
  public:
   using CustomOperator::CustomOperator;
@@ -571,6 +602,9 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList() {
   ops.emplace_back(new Add(::tflite::BuiltinOperator_ADD, OperatorType::kAdd));
   ops.emplace_back(new AveragePool(::tflite::BuiltinOperator_AVERAGE_POOL_2D,
                                    OperatorType::kAveragePool));
+  ops.emplace_back(
+      new BatchToSpaceND(::tflite::BuiltinOperator_BATCH_TO_SPACE_ND,
+                         OperatorType::kBatchToSpaceND));
   ops.emplace_back(new Concatenation(::tflite::BuiltinOperator_CONCATENATION,
                                      OperatorType::kConcatenation));
   ops.emplace_back(
diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
index caecbd0325..735eea4ddc 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
@@ -119,6 +119,19 @@ TEST_F(OperatorTest, BuiltinAdd) {
             output_toco_op->fused_activation_function);
 }
 
+TEST_F(OperatorTest, BuiltinBatchToSpaceND) {
+  BatchToSpaceNDOperator op;
+  op.block_shape = {2, 2};
+  op.before_crops = {1, 2};
+  op.after_crops = {3, 4};
+
+  auto output_toco_op = SerializeAndDeserialize(
+      GetOperator("BATCH_TO_SPACE_ND", OperatorType::kBatchToSpaceND), op);
+  EXPECT_EQ(op.block_shape, output_toco_op->block_shape);
+  EXPECT_EQ(op.before_crops, output_toco_op->before_crops);
+  EXPECT_EQ(op.after_crops, output_toco_op->after_crops);
+}
+
 TEST_F(OperatorTest, CustomCast) {
   CastOperator op;
   op.src_data_type = ArrayDataType::kFloat;
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index 7e50c2207f..d6652b7a41 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -78,6 +78,7 @@ void MakeGeneralGraphTransformationsSet(
   transformations->Add(new IdentifyRelu1);
   transformations->Add(new RemoveTrivialBinaryOperator);
   transformations->Add(new ReadFakeQuantMinMax);
+  transformations->Add(new ResolveBatchToSpaceNDAttributes);
   transformations->Add(new ResolvePadAttributes);
   transformations->Add(new ResolveStridedSliceAttributes);
   transformations->Add(new ResolveSliceAttributes);
-- 
GitLab


From 798fa36d11119e6fdc13b90a14abfe1805e7de90 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 14 Dec 2017 22:02:41 -0800
Subject: [PATCH 1047/1225] Instead of "option" use "set" to define non-bool
 cmake build args. (#15372)

---
 tensorflow/contrib/cmake/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 89c1c86d68..0257d66f71 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -36,8 +36,8 @@ option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON)
 
 # GPU, CUDA and cuDNN options
 option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
-option(tensorflow_CUDA_VERSION "CUDA version to build against" 9.0)
-option(tensorflow_CUDNN_VERSION "cuDNN version to build against" 7)
+set(tensorflow_CUDA_VERSION "9.0" CACHE STRING "CUDA version to build against")
+set(tensorflow_CUDNN_VERSION "7" CACHE STRING "cuDNN version to build against")
 
 if(HAIKU)
 	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF)
-- 
GitLab


From 7d2a601fb5c5cb06173ec4fa3737a363fce58f5b Mon Sep 17 00:00:00 2001
From: CQY <qychen@pku.edu.cn>
Date: Fri, 15 Dec 2017 15:43:55 +0800
Subject: [PATCH 1048/1225] Fix api usage in examples of gan

---
 tensorflow/contrib/gan/README.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/gan/README.md b/tensorflow/contrib/gan/README.md
index 4bca0a1d62..4ead66ca13 100644
--- a/tensorflow/contrib/gan/README.md
+++ b/tensorflow/contrib/gan/README.md
@@ -99,8 +99,8 @@ gan_model = tfgan.gan_model(
 # Build the GAN loss.
 gan_loss = tfgan.gan_loss(
     gan_model,
-    generator_loss_fn=tfgan_losses.wasserstein_generator_loss,
-    discriminator_loss_fn=tfgan_losses.wasserstein_discriminator_loss)
+    generator_loss_fn=tfgan.losses.wasserstein_generator_loss,
+    discriminator_loss_fn=tfgan.losses.wasserstein_discriminator_loss)
 
 # Create the train ops, which calculate gradients and apply updates to weights.
 train_ops = tfgan.gan_train_ops(
@@ -161,8 +161,8 @@ gan_model = tfgan.gan_model(
 # Build the GAN loss and standard pixel loss.
 gan_loss = tfgan.gan_loss(
     gan_model,
-    generator_loss_fn=tfgan_losses.wasserstein_generator_loss,
-    discriminator_loss_fn=tfgan_losses.wasserstein_discriminator_loss,
+    generator_loss_fn=tfgan.losses.wasserstein_generator_loss,
+    discriminator_loss_fn=tfgan.losses.wasserstein_discriminator_loss,
     gradient_penalty=1.0)
 l1_pixel_loss = tf.norm(gan_model.real_data - gan_model.generated_data, ord=1)
 
@@ -193,8 +193,8 @@ gan_model = tfgan.gan_model(
 # Build the GAN loss and standard pixel loss.
 gan_loss = tfgan.gan_loss(
     gan_model,
-    generator_loss_fn=tfgan_losses.least_squares_generator_loss,
-    discriminator_loss_fn=tfgan_losses.least_squares_discriminator_loss)
+    generator_loss_fn=tfgan.losses.least_squares_generator_loss,
+    discriminator_loss_fn=tfgan.losses.least_squares_discriminator_loss)
 l1_pixel_loss = tf.norm(gan_model.real_data - gan_model.generated_data, ord=1)
 
 # Modify the loss tuple to include the pixel loss.
@@ -223,8 +223,8 @@ gan_model = tfgan.infogan_model(
 # Build the GAN loss with mutual information penalty.
 gan_loss = tfgan.gan_loss(
     gan_model,
-    generator_loss_fn=tfgan_losses.wasserstein_generator_loss,
-    discriminator_loss_fn=tfgan_losses.wasserstein_discriminator_loss,
+    generator_loss_fn=tfgan.losses.wasserstein_generator_loss,
+    discriminator_loss_fn=tfgan.losses.wasserstein_discriminator_loss,
     gradient_penalty=1.0,
     mutual_information_penalty_weight=1.0)
 
-- 
GitLab


From 88bec2d66a5a2a9ebca8db9cfa4c7e95e48d2d4f Mon Sep 17 00:00:00 2001
From: Jiongyan Zhang <qmick@live.cn>
Date: Fri, 15 Dec 2017 20:12:19 +0800
Subject: [PATCH 1049/1225] Fix typo of tf.abs docstring

Change "'", which causes incorrect highlight, to "`"
---
 tensorflow/python/ops/math_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 6af36343d5..6c5bdc661f 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -252,7 +252,7 @@ def abs(x, name=None):
   Returns:
     A `Tensor` or `SparseTensor` the same size and type as `x` with absolute
       values.
-    Note, for `complex64` or `complex128' input, the returned `Tensor` will be
+    Note, for `complex64` or `complex128` input, the returned `Tensor` will be
       of type `float32` or `float64`, respectively.
   """
   with ops.name_scope(name, "Abs", [x]) as name:
-- 
GitLab


From 518acb6eb6ca3729406e6f62afdd4de75a0dda44 Mon Sep 17 00:00:00 2001
From: Daniel Ylitalo <daniel.ylitalo@mytaste.com>
Date: Fri, 15 Dec 2017 13:41:31 +0100
Subject: [PATCH 1050/1225] - Bumping abseil-cpp to latest for FreeBSD compat -
 Adding comment about error if not casted to long first - Return error message
 if resolving path failed on FreeBSD

---
 tensorflow/core/platform/env.cc | 5 ++++-
 tensorflow/workspace.bzl        | 8 ++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index 5118c4cb59..4b5bfeab0f 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -275,7 +275,7 @@ string Env::GetExecutablePath() {
   size_t exe_path_size = PATH_MAX;
 
   if (sysctl(mib, 4, exe_path, &exe_path_size, NULL, 0) != 0) {
-    // Not sure what to do if it fails?
+    return "Resolving ", exe_path, " failed";
   }
 #elif defined(PLATFORM_WINDOWS)
   HMODULE hModule = GetModuleHandleW(NULL);
@@ -305,6 +305,9 @@ bool Env::LocalTempFilename(string* filename) {
     int32 tid = static_cast<int32>(tid64);
     int32 pid = static_cast<int32>(getpid());
 #elif defined(__FreeBSD__)
+    // Has to be casted to long first, else this error appears:
+    // static_cast from 'pthread_t' (aka 'pthread *') to 'int32' (aka 'int')
+    // is not allowed
     int32 tid = static_cast<int32>((long) pthread_self());
     int32 pid = static_cast<int32>(getpid());
 #elif defined(PLATFORM_WINDOWS)
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 7d07769a45..a0ae527933 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -178,11 +178,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   native.http_archive(
       name = "com_google_absl",
       urls = [
-          "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/cc4bed2d74f7c8717e31f9579214ab52a9c9c610.tar.gz",
-          "https://github.com/abseil/abseil-cpp/archive/cc4bed2d74f7c8717e31f9579214ab52a9c9c610.tar.gz",
+          "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/720c017e30339fd1786ce4aac68bc8559736e53f.tar.gz",
+          "https://github.com/abseil/abseil-cpp/archive/720c017e30339fd1786ce4aac68bc8559736e53f.tar.gz",
       ],
-     sha256 = "f1a7349f88d2846210c42e2f7271dabeee404c2a3b4198e34a797993e3569b03",
-     strip_prefix = "abseil-cpp-cc4bed2d74f7c8717e31f9579214ab52a9c9c610",
+     sha256 = "5996380e3e8b981f55d1c8d58e709c00dbb4806ba367be75d0925a68cc2f6478",
+     strip_prefix = "abseil-cpp-720c017e30339fd1786ce4aac68bc8559736e53f",
   )
 
   native.new_http_archive(
-- 
GitLab


From 77a701d393853eb6923f56d23dd58902ad1b2dd2 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Fri, 15 Dec 2017 07:42:51 -0800
Subject: [PATCH 1051/1225] Add a note explaining that executing things
 directly with bazel may require additional pyhton packages.

PiperOrigin-RevId: 179191795
---
 tensorflow/docs_src/install/install_sources.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index e187b0e51c..e453bd6ca1 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -180,7 +180,7 @@ If bazel is not installed on your system, install it now by following
 
 ### Install python dependencies
 
-To install TensorFlow, you must install the following packages:
+To build TensorFlow, you must install the following packages:
 
   * six
   * numpy, which is a numerical processing package that TensorFlow requires.
@@ -196,7 +196,11 @@ After installing pip, invoke the following commands:
 
 <pre> $ <b>sudo pip install six numpy wheel</b> </pre>
 
-
+Note: These are just the minimum requirements to _build_ tensorflow. Installing
+the pip package will download additional packages required to _run_ it. If you
+plan on executing tasks directly with `bazel` , without the pip installation,
+you may need to install additional python packages. For example, you should
+`pip install mock enum34` before running TensorFlow's tests with bazel.
 
 ### Optional: install TensorFlow for GPU prerequisites
 
-- 
GitLab


From ff87b4f9d8fc376bd18a3b31c1519a6b8421deea Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Fri, 15 Dec 2017 08:11:45 -0800
Subject: [PATCH 1052/1225] Scaffold support in TPUEstimator.

PiperOrigin-RevId: 179194460
---
 tensorflow/contrib/tpu/python/tpu/tpu.py      |   6 +-
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 160 +++++++++++++++---
 2 files changed, 136 insertions(+), 30 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 7fb8a33698..24596bdb0a 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -52,6 +52,8 @@ _NOT_IMPLEMENTED_OPS = set([
     "TensorSummaryV2",
     ])
 
+_TPU_REPLICATE_ATTR = "_tpu_replicate"
+
 
 def _tpu_system_device_name(job):
   """Returns the device name for the TPU_SYSTEM device of `job`."""
@@ -138,9 +140,9 @@ class TPUReplicateContext(control_flow_ops.ControlFlowContext):
           "Non-resource Variables are not supported inside TPU computations "
           "(operator name: %s)" % op.name)
     # pylint: enable=protected-access
-    if "_tpu_replicate" in op.node_def.attr:
+    if _TPU_REPLICATE_ATTR in op.node_def.attr:
       raise ValueError("TPU computations cannot be nested")
-    op.node_def.attr["_tpu_replicate"].s = self._name
+    op.node_def.attr[_TPU_REPLICATE_ATTR].s = self._name
     op.graph.prevent_feeding(op)
     op.graph.prevent_fetching(op)
 
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index e324948be5..d66abd7b66 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -365,13 +365,17 @@ class TPUEstimatorSpec(collections.namedtuple('TPUEstimatorSpec', [
     'loss',
     'train_op',
     'eval_metrics',
-    'export_outputs'])):
+    'export_outputs',
+    'scaffold_fn'])):
   """Ops and objects returned from a `model_fn` and passed to `TPUEstimator`.
 
   See `EstimatorSpec` for `mode`, 'predictions, 'loss', 'train_op', and
   'export_outputs`.
 
-  TPU evaluation expects a slightly different signature from the
+  For evaluation, `eval_metrics `is a tuple of `metric_fn` and `tensors`, where
+  `metric_fn` runs on CPU to generate metrics and `tensors` represents the
+  `Tensor`s transferred from TPU system to CPU host and passed to `metric_fn`.
+  To be precise, TPU evaluation expects a slightly different signature from the
   ${tf.estimator.Estimator}. While `EstimatorSpec.eval_metric_ops` expects a
   dict, `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`.
   The `tensors` could be a list of `Tensor`s or dict of names to `Tensor`s. The
@@ -382,9 +386,11 @@ class TPUEstimatorSpec(collections.namedtuple('TPUEstimatorSpec', [
   to the `metric_fn` if `tensors` is list or keyword arguments if `tensors` is
   dict. `metric_fn` takes the `tensors` and returns a dict from metric string
   name to the result of calling a metric function, namely a `(metric_tensor,
-  update_op)` tuple.
+  update_op)` tuple. See `TPUEstimator` for MNIST example how to specify the
+  `eval_metrics`.
 
-  See `TPUEstimator` for MNIST example how to specify the `eval_metrics`.
+  `scaffold_fn` is a function running on CPU to generate the `Scaffold`. This
+  function should not capture any Tensors in `model_fn`.
   """
 
   def __new__(cls,
@@ -393,7 +399,8 @@ class TPUEstimatorSpec(collections.namedtuple('TPUEstimatorSpec', [
               loss=None,
               train_op=None,
               eval_metrics=None,
-              export_outputs=None):
+              export_outputs=None,
+              scaffold_fn=None):
     """Creates a validated `TPUEstimatorSpec` instance."""
     if eval_metrics is not None:
       _EvalMetrics.validate(eval_metrics)
@@ -403,18 +410,21 @@ class TPUEstimatorSpec(collections.namedtuple('TPUEstimatorSpec', [
                                                 loss=loss,
                                                 train_op=train_op,
                                                 eval_metrics=eval_metrics,
-                                                export_outputs=export_outputs)
+                                                export_outputs=export_outputs,
+                                                scaffold_fn=scaffold_fn)
 
   def as_estimator_spec(self):
     """Creates an equivalent `EstimatorSpec` used by CPU train/eval."""
     eval_metric_ops = _EvalMetrics.to_metric_metric_ops_for_cpu(
         self.eval_metrics)
+    scaffold = self.scaffold_fn() if self.scaffold_fn else None
     return model_fn_lib.EstimatorSpec(mode=self.mode,
                                       predictions=self.predictions,
                                       loss=self.loss,
                                       train_op=self.train_op,
                                       eval_metric_ops=eval_metric_ops,
-                                      export_outputs=self.export_outputs)
+                                      export_outputs=self.export_outputs,
+                                      scaffold=scaffold)
 
 
 class _InfeedOutfeedThreadBaseController(object):
@@ -679,7 +689,7 @@ class _SetEvalIterationsHook(session_run_hook.SessionRunHook):
 def generate_per_core_enqueue_ops_fn_for_host(
     ctx, input_fn, inputs_structure_recorder):
   """Generates infeed enqueue ops for per-core input_fn on a single host."""
-  infeed_queue_holder = {'instance': None}
+  captured_infeed_queue = _CapturedObject()
 
   def enqueue_ops_fn():
     """A fn returns enqueue_ops."""
@@ -702,7 +712,7 @@ def generate_per_core_enqueue_ops_fn_for_host(
 
     infeed_queue = tpu_feed.InfeedQueue(
         number_of_tuple_elements=len(per_host_sharded_inputs[0]))
-    infeed_queue_holder['instance'] = infeed_queue
+    captured_infeed_queue.capture(infeed_queue)
     infeed_queue.set_configuration_from_sharded_input_tensors(
         per_host_sharded_inputs)
 
@@ -710,13 +720,13 @@ def generate_per_core_enqueue_ops_fn_for_host(
         per_host_sharded_inputs,
         tpu_ordinal_function=ctx.tpu_ordinal_function)
     return per_host_enqueue_ops
-  return enqueue_ops_fn, (lambda: infeed_queue_holder['instance'])
+  return enqueue_ops_fn, captured_infeed_queue
 
 
 def generate_per_host_enqueue_ops_fn_for_host(
     ctx, input_fn, inputs_structure_recorder, batch_axis, device):
   """Generates infeed enqueue ops for per-host input_fn on a single host."""
-  infeed_queue_holder = {'instance': None}
+  captured_infeed_queue = _CapturedObject()
 
   def enqueue_ops_fn():
     with ops.device(device):
@@ -736,7 +746,7 @@ def generate_per_host_enqueue_ops_fn_for_host(
           tuple_types=[t.dtype for t in unsharded_tensor_list],
           tuple_shapes=[t.shape for t in unsharded_tensor_list],
           shard_dimensions=batch_axis)
-      infeed_queue_holder['instance'] = infeed_queue
+      captured_infeed_queue.capture(infeed_queue)
       infeed_queue.set_number_of_shards(num_cores_per_host)
 
       per_host_enqueue_ops = (
@@ -744,7 +754,7 @@ def generate_per_host_enqueue_ops_fn_for_host(
               unsharded_tensor_list,
               placement_function=lambda x: device))
       return per_host_enqueue_ops
-  return enqueue_ops_fn, (lambda: infeed_queue_holder['instance'])
+  return enqueue_ops_fn, captured_infeed_queue
 
 
 class _InputPipeline(object):
@@ -934,7 +944,7 @@ class _InputPipeline(object):
         host_device = tpu_host_placement_fn(host_id=host_id)
         with ops.device(host_device):
           with ops.name_scope('input_pipeline_task%d' % (host_id)):
-            enqueue_ops_fn, infeed_queue_getter = (
+            enqueue_ops_fn, captured_infeed_queue = (
                 generate_per_core_enqueue_ops_fn_for_host(
                     self._ctx, self._input_fn, self._inputs_structure_recorder))
 
@@ -944,14 +954,14 @@ class _InputPipeline(object):
             else:
               enqueue_ops.append(enqueue_ops_fn())
             # Infeed_queue_getter must be called after enqueue_ops_fn is called.
-            infeed_queues.append(infeed_queue_getter())
+            infeed_queues.append(captured_infeed_queue.get())
 
     else:
       for host_id in range(num_hosts):
         host_device = tpu_host_placement_fn(host_id=host_id)
         with ops.device(host_device):
           with ops.name_scope('input_pipeline_task%d' % (host_id)):
-            enqueue_ops_fn, infeed_queue_getter = (
+            enqueue_ops_fn, captured_infeed_queue = (
                 generate_per_host_enqueue_ops_fn_for_host(
                     self._ctx, self._input_fn, self._inputs_structure_recorder,
                     self._batch_axis, host_device))
@@ -961,7 +971,7 @@ class _InputPipeline(object):
                   device=host_device, op_fn=enqueue_ops_fn))
             else:
               enqueue_ops.append(enqueue_ops_fn())
-            infeed_queues.append(infeed_queue_getter())
+            infeed_queues.append(captured_infeed_queue.get())
     # infeed_queue is used to generate dequeue ops. The only thing it uses for
     # dequeue is dtypes and types. So, any one can be used. Here, grab the
     # first one.
@@ -1029,6 +1039,8 @@ class _ModelFnWrapper(object):
       A Fn representing the train step for TPU.
     """
 
+    captured_scaffold_fn = _CapturedObject()
+
     def train_step(loss):
       """Training step function for use inside a while loop."""
       del loss  # unused; required in function signature.
@@ -1037,9 +1049,15 @@ class _ModelFnWrapper(object):
       estimator_spec = self._verify_estimator_spec(
           self._call_model_fn(features, labels))
       loss, train_op = estimator_spec.loss, estimator_spec.train_op
+
+      if isinstance(estimator_spec, TPUEstimatorSpec):
+        captured_scaffold_fn.capture(estimator_spec.scaffold_fn)
+      else:
+        captured_scaffold_fn.capture(None)
+
       with ops.control_dependencies([train_op]):
         return array_ops.identity(loss)
-    return train_step
+    return train_step, captured_scaffold_fn
 
   def convert_to_single_tpu_eval_step(self, dequeue_fn):
     """Converts user provided model_fn` as a single eval step on TPU.
@@ -1068,6 +1086,7 @@ class _ModelFnWrapper(object):
       step for TPU. and eval_metrics is an `_EvalMetrics` instance.
     """
     eval_metrics = _EvalMetrics(self._ctx)
+    captured_scaffold_fn = _CapturedObject()
 
     def eval_step(total_loss):
       """Evaluation step function for use inside a while loop."""
@@ -1080,12 +1099,13 @@ class _ModelFnWrapper(object):
             '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec)))
 
       loss = tpu_estimator_spec.loss
+      captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn)
       eval_metrics.record(tpu_estimator_spec)
       outfeed_ops = tpu_ops.outfeed_enqueue_tuple(eval_metrics.outfeed_tensors)
 
       with ops.control_dependencies([outfeed_ops]):
         return math_ops.add(total_loss, loss)
-    return eval_step, eval_metrics
+    return eval_step, eval_metrics, captured_scaffold_fn
 
   def _call_model_fn(self, features, labels):
     """Calls the model_fn with required parameters."""
@@ -1139,6 +1159,10 @@ class _ModelFnWrapper(object):
       raise ValueError(err_msg.format('training_hooks'))
     if estimator_spec.evaluation_hooks:
       raise ValueError(err_msg.format('evaluation_hooks'))
+
+    if estimator_spec.scaffold:
+      logging.warning('EstimatorSpec.Scaffold is ignored by TPU train/eval. '
+                      'Please use TPUEstimatorSpec.')
     return estimator_spec
 
 
@@ -1607,7 +1631,8 @@ class TPUEstimator(estimator_lib.Estimator):
             input_holders.generate_infeed_enqueue_ops_and_dequeue_fn())
 
         if mode == model_fn_lib.ModeKeys.TRAIN:
-          loss = _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn)
+          loss, scaffold = (
+              _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
           hooks = [
               TPUInfeedOutfeedSessionHook(ctx, enqueue_ops),
               training.LoggingTensorHook(
@@ -1626,10 +1651,11 @@ class TPUEstimator(estimator_lib.Estimator):
               mode,
               loss=loss,
               training_hooks=hooks,
-              train_op=control_flow_ops.group(*update_ops))
+              train_op=control_flow_ops.group(*update_ops),
+              scaffold=scaffold)
 
         # Now eval.
-        total_loss, eval_metric_ops = _eval_on_tpu_system(
+        total_loss, eval_metric_ops, scaffold = _eval_on_tpu_system(
             ctx, model_fn_wrapper, dequeue_fn)
         iterations_per_loop_var = _create_or_get_iterations_per_loop()
         mean_loss = math_ops.div(
@@ -1660,7 +1686,8 @@ class TPUEstimator(estimator_lib.Estimator):
             mode,
             loss=mean_loss,
             evaluation_hooks=hooks,
-            eval_metric_ops=eval_metric_ops)
+            eval_metric_ops=eval_metric_ops,
+            scaffold=scaffold)
     return _model_fn
 
 
@@ -1669,7 +1696,7 @@ def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
   num_cores = ctx.num_cores
   iterations_per_loop_var = _create_or_get_iterations_per_loop()
 
-  single_tpu_eval_step, eval_metric_ops = (
+  single_tpu_eval_step, eval_metric_ops, captured_scaffold_fn = (
       model_fn_wrapper.convert_to_single_tpu_eval_step(dequeue_fn))
 
   def multi_tpu_eval_steps_on_single_shard():
@@ -1682,7 +1709,9 @@ def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
                       inputs=[],
                       num_shards=num_cores,
                       outputs_from_all_shards=False)
-  return loss, eval_metric_ops
+
+  scaffold = _get_scaffold(captured_scaffold_fn)
+  return loss, eval_metric_ops, scaffold
 
 
 def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
@@ -1690,8 +1719,8 @@ def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
   num_cores = ctx.num_cores
   iterations_per_loop_var = _create_or_get_iterations_per_loop()
 
-  single_tpu_train_step = model_fn_wrapper.convert_to_single_tpu_train_step(
-      dequeue_fn)
+  single_tpu_train_step, captured_scaffold_fn = (
+      model_fn_wrapper.convert_to_single_tpu_train_step(dequeue_fn))
 
   def multi_tpu_train_steps_on_single_shard():
     return training_loop.repeat(
@@ -1704,7 +1733,9 @@ def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
                       inputs=[],
                       num_shards=num_cores,
                       outputs_from_all_shards=False)
-  return loss
+
+  scaffold = _get_scaffold(captured_scaffold_fn)
+  return loss, scaffold
 
 
 def _wrap_computation_in_while_loop(device, op_fn):
@@ -1740,3 +1771,76 @@ def _validate_tpu_training_graph():
         'CrossShardOptimizer must be used for model training on TPUs.')
 
 
+class _CapturedObject(object):
+  """A placeholder to capture an object.
+
+  This is useful when we need to capture a Python object in the Tensorflow
+  control flow body function and use it outside the control flow.
+  """
+
+  def __init__(self):
+    self._object = None
+    self._captured = False
+
+  def capture(self, o):
+    if self._captured:
+      raise RuntimeError(
+          'InternalError: Object can be captured only. Please file bug .')
+
+    self._captured = True
+    self._object = o
+
+  def get(self):
+    if not self._captured:
+      raise RuntimeError(
+          'InternalError: Object is not captured properly before `get`. '
+          'Please file bug .')
+    return self._object
+
+
+def _get_scaffold(captured_scaffold_fn):
+  """Retrieves the Scaffold from `captured_scaffold_fn`."""
+  with _CapturingContext(message='Inside scaffold_fn'):
+    scaffold_fn = captured_scaffold_fn.get()
+    if scaffold_fn:
+      scaffold = scaffold_fn()
+      if scaffold is None:
+        raise ValueError(
+            'TPUEstimatorSpec.scaffold_fn returns None, which is not allowed')
+    else:
+      scaffold = None
+
+  if scaffold:
+    wrapped_finalize = scaffold.finalize
+    def _finalize():
+      with _CapturingContext('Inside Scaffold.finalize'):
+        wrapped_finalize()
+    scaffold.finalize = _finalize
+  return scaffold
+
+
+class _CapturingContext(control_flow_ops.ControlFlowContext):
+  """Tracks references to Tensors defined in TPU replication."""
+
+  def __init__(self, message):
+    control_flow_ops.ControlFlowContext.__init__(self)
+    self._message = message
+
+  def AddOp(self, op):  # pylint: disable=invalid-name
+    for c in op.inputs:
+      if tpu._TPU_REPLICATE_ATTR in c.op.node_def.attr:  # pylint: disable=protected-access
+        raise ValueError(
+            '{}: Op {} depends on TPU computation {}, '
+            'which is not allowed.'.format(self._message, op, c))
+
+  def __enter__(self):
+    # pylint: disable=protected-access
+    self._g = ops.get_default_graph()
+    self._old = self._g._get_control_flow_context()
+    self._g._set_control_flow_context(self)
+    # pylint: enable=protected-access
+
+  def __exit__(self, _, __, ___):  # pylint: disable=invalid-name
+    self._g._set_control_flow_context(self._old)  # pylint: disable=protected-access
+
+
-- 
GitLab


From 483f51a259cd31294f80229dee901919fa9ab698 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 15 Dec 2017 08:30:32 -0800
Subject: [PATCH 1053/1225] Do not create context when defining functions in
 graph mode.

PiperOrigin-RevId: 179196137
---
 tensorflow/python/eager/function.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 6b2bc2f380..239216243a 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -246,7 +246,8 @@ class _EagerDefinedFunction(object):
       proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
     function_def = function_pb2.FunctionDef()
     function_def.ParseFromString(compat.as_bytes(proto_data))
-    _register(fn)
+    if context.in_eager_mode():
+      _register(fn)
     self.definition = function_def
     self.name = function_def.signature.name
     self.signature = function_def.signature
@@ -517,9 +518,10 @@ def _defun_internal(name, func, args, kwds):
                      if x not in all_ignored_ops)
   # Register any other functions defined in the graph
   # TODO(ashankar): Oh lord, forgive me for this lint travesty.
-  for f in tmp_graph._functions.values():  # pylint: disable=protected-access
-    # TODO(ashankar): What about the gradient registry?
-    _register(f._c_func)  # pylint: disable=protected-access
+  if context.in_eager_mode():
+    for f in tmp_graph._functions.values():  # pylint: disable=protected-access
+      # TODO(ashankar): What about the gradient registry?
+      _register(f._c_func)  # pylint: disable=protected-access
   return GraphModeFunction(
       fname, all_inputs, extra_inputs, tmp_graph, operations, func_def_outputs,
       func_outputs, output_shapes, variables)
-- 
GitLab


From 2df1a5cffe483c4f43828eb5a1c499560056661e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 08:53:08 -0800
Subject: [PATCH 1054/1225] Rename
 StreamExecutorInterface::BlockHostUntilDoneWithStatus to BlockHostUntilDone.

PiperOrigin-RevId: 179198370
---
 tensorflow/compiler/xla/service/interpreter/executor.cc | 4 ++--
 tensorflow/compiler/xla/service/interpreter/executor.h  | 2 +-
 tensorflow/stream_executor/cuda/cuda_gpu_executor.cc    | 2 +-
 tensorflow/stream_executor/cuda/cuda_gpu_executor.h     | 2 +-
 tensorflow/stream_executor/host/host_gpu_executor.cc    | 4 ++--
 tensorflow/stream_executor/host/host_gpu_executor.h     | 2 +-
 tensorflow/stream_executor/stream_executor_internal.h   | 2 +-
 tensorflow/stream_executor/stream_executor_pimpl.cc     | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/xla/service/interpreter/executor.cc b/tensorflow/compiler/xla/service/interpreter/executor.cc
index f16651c969..68371910d7 100644
--- a/tensorflow/compiler/xla/service/interpreter/executor.cc
+++ b/tensorflow/compiler/xla/service/interpreter/executor.cc
@@ -85,7 +85,7 @@ bool InterpreterExecutor::HostCallback(Stream *stream,
 bool InterpreterExecutor::CreateStreamDependency(Stream *dependent,
                                                  Stream *other) {
   AsExecutorStream(dependent)->EnqueueTask(
-      [other]() { SE_CHECK_OK(other->BlockHostUntilDoneWithStatus()); });
+      [other]() { SE_CHECK_OK(other->BlockHostUntilDone()); });
   AsExecutorStream(dependent)->BlockUntilDone();
   return true;
 }
@@ -100,7 +100,7 @@ bool InterpreterExecutor::StopTimer(Stream *stream, Timer *timer) {
   return true;
 }
 
-port::Status InterpreterExecutor::BlockHostUntilDoneWithStatus(Stream *stream) {
+port::Status InterpreterExecutor::BlockHostUntilDone(Stream *stream) {
   AsExecutorStream(stream)->BlockUntilDone();
   return port::Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/interpreter/executor.h b/tensorflow/compiler/xla/service/interpreter/executor.h
index d3753a6a65..c5d07e906d 100644
--- a/tensorflow/compiler/xla/service/interpreter/executor.h
+++ b/tensorflow/compiler/xla/service/interpreter/executor.h
@@ -157,7 +157,7 @@ class InterpreterExecutor : public internal::StreamExecutorInterface {
   bool StartTimer(Stream *stream, Timer *timer) override;
   bool StopTimer(Stream *stream, Timer *timer) override;
 
-  port::Status BlockHostUntilDoneWithStatus(Stream *stream) override;
+  port::Status BlockHostUntilDone(Stream *stream) override;
 
   int PlatformDeviceCount() override { return 1; }
 
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 7f8a7ca7c7..878fa8d9ad 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -664,7 +664,7 @@ bool CUDAExecutor::StopTimer(Stream *stream, Timer *timer) {
   return AsCUDATimer(timer)->Stop(AsCUDAStream(stream));
 }
 
-port::Status CUDAExecutor::BlockHostUntilDoneWithStatus(Stream *stream) {
+port::Status CUDAExecutor::BlockHostUntilDone(Stream *stream) {
   return CUDADriver::SynchronizeStream(context_, AsCUDAStreamValue(stream));
 }
 
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index 5adbb59856..dbbbcd476f 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -152,7 +152,7 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
 
   Event::Status PollForEventStatus(Event *event) override;
 
-  port::Status BlockHostUntilDoneWithStatus(Stream *stream) override;
+  port::Status BlockHostUntilDone(Stream *stream) override;
 
   int PlatformDeviceCount() override { return CUDADriver::GetDeviceCount(); }
 
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.cc b/tensorflow/stream_executor/host/host_gpu_executor.cc
index d103dcd033..542f521ef7 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.cc
+++ b/tensorflow/stream_executor/host/host_gpu_executor.cc
@@ -162,7 +162,7 @@ void HostExecutor::DeallocateStream(Stream *stream) {}
 
 bool HostExecutor::CreateStreamDependency(Stream *dependent, Stream *other) {
   AsHostStream(dependent)->EnqueueTask(
-      [other]() { SE_CHECK_OK(other->BlockHostUntilDoneWithStatus()); });
+      [other]() { SE_CHECK_OK(other->BlockHostUntilDone()); });
   AsHostStream(dependent)->BlockUntilDone();
   return true;
 }
@@ -177,7 +177,7 @@ bool HostExecutor::StopTimer(Stream *stream, Timer *timer) {
   return true;
 }
 
-port::Status HostExecutor::BlockHostUntilDoneWithStatus(Stream *stream) {
+port::Status HostExecutor::BlockHostUntilDone(Stream *stream) {
   AsHostStream(stream)->BlockUntilDone();
   return port::Status::OK();
 }
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.h b/tensorflow/stream_executor/host/host_gpu_executor.h
index e884554a15..e2c0e6d6b7 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.h
+++ b/tensorflow/stream_executor/host/host_gpu_executor.h
@@ -139,7 +139,7 @@ class HostExecutor : public internal::StreamExecutorInterface {
 
   bool StopTimer(Stream *stream, Timer *timer) override;
 
-  port::Status BlockHostUntilDoneWithStatus(Stream *stream) override;
+  port::Status BlockHostUntilDone(Stream *stream) override;
 
   int PlatformDeviceCount() override { return 1; }
 
diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h
index 0a9bef71d0..37ef182e14 100644
--- a/tensorflow/stream_executor/stream_executor_internal.h
+++ b/tensorflow/stream_executor/stream_executor_internal.h
@@ -219,7 +219,7 @@ class StreamExecutorInterface {
   virtual void DeallocateTimer(Timer *timer) = 0;
   virtual bool StartTimer(Stream *stream, Timer *timer) = 0;
   virtual bool StopTimer(Stream *stream, Timer *timer) = 0;
-  virtual port::Status BlockHostUntilDoneWithStatus(Stream *stream) = 0;
+  virtual port::Status BlockHostUntilDone(Stream *stream) = 0;
   virtual int PlatformDeviceCount() = 0;
   virtual port::Status EnablePeerAccessTo(StreamExecutorInterface *other) = 0;
   virtual bool CanEnablePeerAccessTo(StreamExecutorInterface *other) = 0;
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index c4b248657e..afca1c2e59 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -436,7 +436,7 @@ port::Status StreamExecutor::BlockHostUntilDone(Stream *stream) {
   port::Status result;
   SCOPED_TRACE(TraceListener::BlockHostUntilDone, &result, stream);
 
-  result = implementation_->BlockHostUntilDoneWithStatus(stream);
+  result = implementation_->BlockHostUntilDone(stream);
   return result;
 }
 
-- 
GitLab


From 8d3bd70a547fa843ba3e3da6f1379c4fdd836f2b Mon Sep 17 00:00:00 2001
From: Alex Sergeev <alexander.sergeev@live.com>
Date: Fri, 15 Dec 2017 08:59:53 -0800
Subject: [PATCH 1055/1225] Support --config=monolithic in
 tf.sysconfig.get_link_flags() (#15139)

* Don't add -ltensorflow_framework to sysconfig LFLAGS if the build is monolithic
---
 tensorflow/core/public/version.h             | 2 ++
 tensorflow/python/__init__.py                | 2 ++
 tensorflow/python/client/tf_session.i        | 3 +++
 tensorflow/python/framework/versions.py      | 4 ++++
 tensorflow/python/platform/sysconfig.py      | 6 ++++--
 tensorflow/python/pywrap_tensorflow.py       | 1 +
 tensorflow/tensorflow.bzl                    | 4 ++++
 tensorflow/tools/api/golden/tensorflow.pbtxt | 4 ++++
 tensorflow/tools/git/gen_git_source.py       | 7 +++++++
 tensorflow/tools/git/gen_git_source.sh       | 7 +++++++
 10 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index ec077c4283..d8e7df48c2 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -119,5 +119,7 @@ extern const char* tf_compiler_version();
 extern const char* tf_git_version();
 // Value of the _GLIBCXX_USE_CXX11_ABI flag, or 0 if it's not set.
 extern const int tf_cxx11_abi_flag();
+// Returns 1 if build is monolithic, or 0 otherwise.
+extern const int tf_monolithic_build();
 
 #endif  // TENSORFLOW_CORE_PUBLIC_VERSION_H_
diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py
index af34aca3e3..bc9ddec2a5 100644
--- a/tensorflow/python/__init__.py
+++ b/tensorflow/python/__init__.py
@@ -263,6 +263,7 @@ _allowed_symbols.extend([
     'GIT_VERSION',
     'COMPILER_VERSION',
     'CXX11_ABI_FLAG',
+    'MONOLITHIC_BUILD',
 ])
 
 # Remove all extra symbols that don't have a docstring or are not explicitly
@@ -282,6 +283,7 @@ _exported_dunders = set([
     '__git_version__',
     '__compiler_version__',
     '__cxx11_abi_flag__',
+    '__monolithic_build__',
 ])
 
 # Expose symbols minus dunders, unless they are whitelisted above.
diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index e424e19c77..a94910042f 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -91,6 +91,9 @@ tensorflow::ImportNumpy();
 // _GLIBCXX_USE_CXX11_ABI flag value
 %constant const int __cxx11_abi_flag__ = tf_cxx11_abi_flag();
 
+// Flag indicating whether the build is monolithic
+%constant const int __monolithic_build__ = tf_monolithic_build();
+
 // Release the Python GIL for the duration of most methods.
 %exception {
   Py_BEGIN_ALLOW_THREADS;
diff --git a/tensorflow/python/framework/versions.py b/tensorflow/python/framework/versions.py
index 81529e2b1e..f03b81eb28 100644
--- a/tensorflow/python/framework/versions.py
+++ b/tensorflow/python/framework/versions.py
@@ -25,11 +25,13 @@ __version__ = pywrap_tensorflow.__version__
 __git_version__ = pywrap_tensorflow.__git_version__
 __compiler_version__ = pywrap_tensorflow.__compiler_version__
 __cxx11_abi_flag__ = pywrap_tensorflow.__cxx11_abi_flag__
+__monolithic_build__ = pywrap_tensorflow.__monolithic_build__
 
 VERSION = __version__
 GIT_VERSION = __git_version__
 COMPILER_VERSION = __compiler_version__
 CXX11_ABI_FLAG = __cxx11_abi_flag__
+MONOLITHIC_BUILD = __monolithic_build__
 
 GRAPH_DEF_VERSION = pywrap_tensorflow.GRAPH_DEF_VERSION
 GRAPH_DEF_VERSION_MIN_CONSUMER = (
@@ -42,6 +44,7 @@ __all__ = [
     "__git_version__",
     "__compiler_version__",
     "__cxx11_abi_flag__",
+    "__monolithic_build__",
     "COMPILER_VERSION",
     "CXX11_ABI_FLAG",
     "GIT_VERSION",
@@ -49,4 +52,5 @@ __all__ = [
     "GRAPH_DEF_VERSION_MIN_CONSUMER",
     "GRAPH_DEF_VERSION_MIN_PRODUCER",
     "VERSION",
+    "MONOLITHIC_BUILD",
 ]
diff --git a/tensorflow/python/platform/sysconfig.py b/tensorflow/python/platform/sysconfig.py
index 57635fb4d9..f6c4f2227f 100644
--- a/tensorflow/python/platform/sysconfig.py
+++ b/tensorflow/python/platform/sysconfig.py
@@ -27,6 +27,7 @@ from __future__ import print_function
 import os.path as _os_path
 
 from tensorflow.python.framework.versions import CXX11_ABI_FLAG as _CXX11_ABI_FLAG
+from tensorflow.python.framework.versions import MONOLITHIC_BUILD as _MONOLITHIC_BUILD
 from tensorflow.python.util.all_util import remove_undocumented
 
 
@@ -75,8 +76,9 @@ def get_link_flags():
     The link flags.
   """
   flags = []
-  flags.append('-L%s' % get_lib())
-  flags.append('-ltensorflow_framework')
+  if not _MONOLITHIC_BUILD:
+    flags.append('-L%s' % get_lib())
+    flags.append('-ltensorflow_framework')
   return flags
 
 _allowed_symbols = []
diff --git a/tensorflow/python/pywrap_tensorflow.py b/tensorflow/python/pywrap_tensorflow.py
index 91373fa544..5c0c5783dc 100644
--- a/tensorflow/python/pywrap_tensorflow.py
+++ b/tensorflow/python/pywrap_tensorflow.py
@@ -60,6 +60,7 @@ try:
   from tensorflow.python.pywrap_tensorflow_internal import __git_version__
   from tensorflow.python.pywrap_tensorflow_internal import __compiler_version__
   from tensorflow.python.pywrap_tensorflow_internal import __cxx11_abi_flag__
+  from tensorflow.python.pywrap_tensorflow_internal import __monolithic_build__
 
   if _use_dlopen_global_flags:
     pywrap_dlopen_global_flags.reset_dlopen_flags()
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 044c9a96a3..fcefe23d76 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -184,6 +184,10 @@ def tf_copts(android_optimization_level_override="-O2"):
       + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML", "-fopenmp",])
       + if_android_arm(["-mfpu=neon"])
       + if_linux_x86_64(["-msse3"])
+      + select({
+            "//tensorflow:framework_shared_object": [],
+            "//conditions:default": ["-DTENSORFLOW_MONOLITHIC_BUILD"],
+      })
       + select({
             clean_dep("//tensorflow:android"): android_copts,
             clean_dep("//tensorflow:darwin"): [],
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index d6a7a2d19f..4c66cb68c2 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -124,6 +124,10 @@ tf_module {
     name: "LogMessage"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
+  member {
+    name: "MONOLITHIC_BUILD"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "MetaGraphDef"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py
index 0307d2a0eb..2e27487d2f 100755
--- a/tensorflow/tools/git/gen_git_source.py
+++ b/tensorflow/tools/git/gen_git_source.py
@@ -180,6 +180,13 @@ const int tf_cxx11_abi_flag() {
   return 0;
 #endif
 }
+const int tf_monolithic_build() {
+#ifdef TENSORFLOW_MONOLITHIC_BUILD
+  return 1;
+#else
+  return 0;
+#endif
+}
 """ % git_version
   open(filename, "w").write(contents)
 
diff --git a/tensorflow/tools/git/gen_git_source.sh b/tensorflow/tools/git/gen_git_source.sh
index 788f9e6e57..db20bb00e8 100755
--- a/tensorflow/tools/git/gen_git_source.sh
+++ b/tensorflow/tools/git/gen_git_source.sh
@@ -36,5 +36,12 @@ const int tf_cxx11_abi_flag() {
   return 0;
 #endif
 }
+const int tf_monolithic_build() {
+#ifdef TENSORFLOW_MONOLITHIC_BUILD
+  return 1;
+#else
+  return 0;
+#endif
+}
 EOF
 
-- 
GitLab


From 897076aea855c8cfcb8b365689d0a34259c67f6a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 09:48:35 -0800
Subject: [PATCH 1056/1225] Add quantized uint8 pad support.

PiperOrigin-RevId: 179204392
---
 .../lite/toco/graph_transformations/hardcode_min_max.cc      | 5 +++--
 .../contrib/lite/toco/graph_transformations/quantize.cc      | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
index 9cb26c8752..9689b205cd 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
@@ -143,7 +143,7 @@ bool HardcodeMinMaxForAverageOrMaxPool(Model* model, Operator* op) {
   return true;
 }
 
-bool HardcodeMinMaxForReshapeOrSqueeze(Model* model, Operator* op) {
+bool HardcodeMinMaxFromFirstInput(Model* model, Operator* op) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.minmax) {
     return false;
@@ -203,7 +203,8 @@ bool HardcodeMinMax::Run(Model* model, std::size_t op_index) {
 
     case OperatorType::kSqueeze:
     case OperatorType::kTensorFlowReshape:
-      changed = HardcodeMinMaxForReshapeOrSqueeze(model, op);
+    case OperatorType::kPad:
+      changed = HardcodeMinMaxFromFirstInput(model, op);
       break;
 
     case OperatorType::kLogistic:
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
index d33597d381..56082b965a 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
@@ -42,7 +42,7 @@ bool SupportsQuantization(const Operator& op) {
          type == OperatorType::kL2Normalization || type == OperatorType::kAdd ||
          type == OperatorType::kAveragePool || type == OperatorType::kMaxPool ||
          type == OperatorType::kLogistic || type == OperatorType::kSoftmax ||
-         type == OperatorType::kSqueeze ||
+         type == OperatorType::kSqueeze || type == OperatorType::kPad ||
          type == OperatorType::kTensorFlowReshape ||
          type == OperatorType::kMul || type == OperatorType::kSpaceToDepth ||
          type == OperatorType::kDepthToSpace;
-- 
GitLab


From 65c108990382ecfdb9a4a8a58f2303096300e5e0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 10:09:07 -0800
Subject: [PATCH 1057/1225] Fix bfloat16 numerics issues in the tests.

PiperOrigin-RevId: 179207115
---
 tensorflow/compiler/tests/ftrl_test.py      | 40 ++++++++++-----------
 tensorflow/compiler/tests/momentum_test.py  | 23 ++++++------
 tensorflow/compiler/tests/unary_ops_test.py |  2 +-
 tensorflow/python/framework/test_util.py    | 10 ++++--
 4 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/tensorflow/compiler/tests/ftrl_test.py b/tensorflow/compiler/tests/ftrl_test.py
index 7e3871312c..f9db4cf201 100644
--- a/tensorflow/compiler/tests/ftrl_test.py
+++ b/tensorflow/compiler/tests/ftrl_test.py
@@ -161,9 +161,9 @@ class FtrlOptimizerTest(XLATestCase):
           ftrl_update.run()
 
         # Validate updated params
-        self.assertAllClose(
+        self.assertAllCloseAccordingToType(
             np.array([-2.55607247, -3.98729396]), var0.eval(), 1e-5, 1e-5)
-        self.assertAllClose(
+        self.assertAllCloseAccordingToType(
             np.array([-0.28232238, -0.56096673]), var1.eval(), 1e-5, 1e-5)
 
   def testFtrlWithL1(self):
@@ -189,10 +189,10 @@ class FtrlOptimizerTest(XLATestCase):
           ftrl_update.run()
 
         # Validate updated params
-        self.assertAllClose(np.array([-7.66718769, -10.91273689]), var0.eval(),
-                            rtol=1e-4)
-        self.assertAllClose(np.array([-0.93460727, -1.86147261]), var1.eval(),
-                            rtol=1e-4)
+        self.assertAllCloseAccordingToType(
+            np.array([-7.66718769, -10.91273689]), var0.eval(), rtol=1e-4)
+        self.assertAllCloseAccordingToType(
+            np.array([-0.93460727, -1.86147261]), var1.eval(), rtol=1e-4)
 
   def testFtrlWithL1_L2(self):
     for dtype in self.float_types:
@@ -217,10 +217,10 @@ class FtrlOptimizerTest(XLATestCase):
           ftrl_update.run()
 
         # Validate updated params
-        self.assertAllClose(np.array([-0.24059935, -0.46829352]), var0.eval(),
-                            rtol=1e-5)
-        self.assertAllClose(np.array([-0.02406147, -0.04830509]), var1.eval(),
-                            rtol=1e-5)
+        self.assertAllCloseAccordingToType(
+            np.array([-0.24059935, -0.46829352]), var0.eval(), rtol=1e-5)
+        self.assertAllCloseAccordingToType(
+            np.array([-0.02406147, -0.04830509]), var1.eval(), rtol=1e-5)
 
   def testFtrlWithL1_L2_L2Shrinkage(self):
     """Test the new FTRL op with support for l2 shrinkage.
@@ -244,18 +244,18 @@ class FtrlOptimizerTest(XLATestCase):
         ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
         # Fetch params to validate initial values
-        self.assertAllClose([1.0, 2.0], var0.eval())
-        self.assertAllClose([4.0, 3.0], var1.eval())
+        self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
+        self.assertAllCloseAccordingToType([4.0, 3.0], var1.eval())
 
         # Run 10 steps FTRL
         for _ in range(10):
           ftrl_update.run()
 
         # Validate updated params
-        self.assertAllClose(np.array([-0.21931979, -0.40642974]), var0.eval(),
-                            rtol=1e-4)
-        self.assertAllClose(np.array([-0.0282721, -0.07188385]), var1.eval(),
-                            rtol=1e-4)
+        self.assertAllCloseAccordingToType(
+            np.array([-0.21931979, -0.40642974]), var0.eval(), rtol=1e-4)
+        self.assertAllCloseAccordingToType(
+            np.array([-0.0282721, -0.07188385]), var1.eval(), rtol=1e-4)
 
   # When variables are initialized with Zero, FTRL-Proximal has two properties:
   # 1. Without L1&L2 but with fixed learning rate, FTRL-Proximal is identical
@@ -272,8 +272,8 @@ class FtrlOptimizerTest(XLATestCase):
       with self.test_session(), self.test_scope():
         val2, val3 = self.equivAdagradTest_AdagradPart(steps, dtype)
 
-    self.assertAllClose(val0, val2, rtol=1e-4)
-    self.assertAllClose(val1, val3, rtol=1e-4)
+    self.assertAllCloseAccordingToType(val0, val2, rtol=1e-4)
+    self.assertAllCloseAccordingToType(val1, val3, rtol=1e-4)
 
   def testEquivGradientDescentwithoutRegularization(self):
     steps = 5
@@ -284,8 +284,8 @@ class FtrlOptimizerTest(XLATestCase):
         val2, val3 = self.equivGradientDescentTest_GradientDescentPart(
             steps, dtype)
 
-    self.assertAllClose(val0, val2, rtol=1e-5)
-    self.assertAllClose(val1, val3, rtol=1e-5)
+    self.assertAllCloseAccordingToType(val0, val2, rtol=1e-5)
+    self.assertAllCloseAccordingToType(val1, val3, rtol=1e-5)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/compiler/tests/momentum_test.py b/tensorflow/compiler/tests/momentum_test.py
index c00e3035a0..af9394e7d7 100644
--- a/tensorflow/compiler/tests/momentum_test.py
+++ b/tensorflow/compiler/tests/momentum_test.py
@@ -96,28 +96,27 @@ class MomentumOptimizerTest(XLATestCase):
   def testNesterovMomentum(self):
     for dtype in self.float_types:
       with self.test_session(), self.test_scope():
-        var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
-        var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
-        var0_np = np.array([1.0, 2.0], dtype=dtype)
-        var1_np = np.array([3.0, 4.0], dtype=dtype)
+        var0 = resource_variable_ops.ResourceVariable([0.1, 0.2], dtype=dtype)
+        var1 = resource_variable_ops.ResourceVariable([0.3, 0.4], dtype=dtype)
+        var0_np = np.array([0.1, 0.2], dtype=dtype)
+        var1_np = np.array([0.3, 0.4], dtype=dtype)
         accum0_np = np.array([0.0, 0.0], dtype=dtype)
         accum1_np = np.array([0.0, 0.0], dtype=dtype)
-        cost = 5 * var0 * var0 + 3 * var1
+        cost = 0.4 * var0 * var0 + 0.9 * var1
         global_step = resource_variable_ops.ResourceVariable(
             array_ops.zeros([], dtypes.int32), name="global_step")
         mom_op = momentum_lib.MomentumOptimizer(
-            learning_rate=2.0, momentum=0.9, use_nesterov=True)
+            learning_rate=0.1, momentum=0.9, use_nesterov=True)
         opt_op = mom_op.minimize(cost, global_step, [var0, var1])
         variables.global_variables_initializer().run()
         for _ in range(1, 5):
           opt_op.run()
           var0_np, accum0_np = self._update_nesterov_momentum_numpy(
-              var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
-          var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
-                                                                    accum1_np,
-                                                                    3, 2.0, 0.9)
-          self.assertAllClose(var0_np, var0.eval())
-          self.assertAllClose(var1_np, var1.eval())
+              var0_np, accum0_np, var0_np * 0.8, 0.1, 0.9)
+          var1_np, accum1_np = self._update_nesterov_momentum_numpy(
+              var1_np, accum1_np, 0.9, 0.1, 0.9)
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
 
   def testTensorLearningRateAndMomentum(self):
     for dtype in self.float_types:
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index b0623c0fbc..ecba5a4fb0 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -67,7 +67,7 @@ class UnaryOpsTest(XLATestCase):
         output = op(pinp)
       result = session.run(output, {pinp: inp})
       if equality_test is None:
-        equality_test = self.assertAllClose
+        equality_test = self.assertAllCloseAccordingToType
       equality_test(result, expected, rtol=rtol, atol=atol)
 
   def ListsAreClose(self, result, expected, rtol, atol):
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 509c5ec8d6..8875d45a07 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1091,7 +1091,9 @@ class TensorFlowTestCase(googletest.TestCase):
                                     float_rtol=1e-6,
                                     float_atol=1e-6,
                                     half_rtol=1e-3,
-                                    half_atol=1e-3):
+                                    half_atol=1e-3,
+                                    bfloat16_rtol=1e-2,
+                                    bfloat16_atol=1e-2):
     """Like assertAllClose, but also suitable for comparing fp16 arrays.
 
     In particular, the tolerance is reduced to 1e-3 if at least
@@ -1106,6 +1108,8 @@ class TensorFlowTestCase(googletest.TestCase):
       float_atol: absolute tolerance for float32.
       half_rtol: relative tolerance for float16.
       half_atol: absolute tolerance for float16.
+      bfloat16_rtol: relative tolerance for bfloat16.
+      bfloat16_atol: absolute tolerance for bfloat16.
     """
     a = self._GetNdArray(a)
     b = self._GetNdArray(b)
@@ -1119,8 +1123,8 @@ class TensorFlowTestCase(googletest.TestCase):
       atol = max(atol, half_atol)
     if (a.dtype == dtypes.bfloat16.as_numpy_dtype or
         b.dtype == dtypes.bfloat16.as_numpy_dtype):
-      rtol = max(rtol, half_rtol)
-      atol = max(atol, half_atol)
+      rtol = max(rtol, bfloat16_rtol)
+      atol = max(atol, bfloat16_atol)
 
     self.assertAllClose(a, b, rtol=rtol, atol=atol)
 
-- 
GitLab


From 6c01a35b39d030e3e96b1a961163a0b29e377639 Mon Sep 17 00:00:00 2001
From: Akshay Agrawal <akshayka@google.com>
Date: Fri, 15 Dec 2017 10:11:17 -0800
Subject: [PATCH 1058/1225] Push eager context onto context_stack when eager
 execution is enabled

PiperOrigin-RevId: 179207409
---
 tensorflow/python/eager/core_test.py | 14 ++++++++++++++
 tensorflow/python/framework/ops.py   |  7 +++++++
 2 files changed, 21 insertions(+)

diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py
index 2449162dca..02694b34fe 100644
--- a/tensorflow/python/eager/core_test.py
+++ b/tensorflow/python/eager/core_test.py
@@ -84,6 +84,20 @@ class TFETest(test_util.TensorFlowTestCase):
     self.assertTrue(has_cpu_device)
     del ctx
 
+  def testContextStackContainsEagerMode(self):
+    # Eager execution has been enabled, and no other context
+    # switch has occurred, so `context_stack` should contain
+    # exactly one entry.
+    self.assertEqual(len(context.context_stack.stack), 1)
+    stack_entry = context.context_stack.stack[0]
+
+    # The entry should log that eager mode was entered.
+    self.assertIs(stack_entry.enter_context_fn, context.eager_mode)
+
+    # It is not possible to build a graph function when eager execution
+    # is enabled; the stack entry should reflect this fact.
+    self.assertFalse(stack_entry.is_building_function)
+
   def _runInThread(self, target, args):
     t = threading.Thread(target=target, args=args)
     try:
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 947a9e49cc..169fa36161 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -4971,6 +4971,13 @@ def enable_eager_execution(config=None, device_policy=None):
   if context._context is None:
     context._context = context.Context(config=config,
                                        device_policy=device_policy)
+    if context.context_stack.stack:
+      raise AssertionError("Invariant violated: The context stack must "
+                           "be empty when eager execution is enabled.")
+    # Log that eager execution has been enabled by pushing an entry onto the
+    # context stack; this entry won't ever be popped, as it's impossible to
+    # disable eager execution
+    context.context_stack.push(False, context.eager_mode)
   elif ((config is not None and config is not context._context._config)
         or (device_policy is not None
             and device_policy is not context._context._device_policy)):
-- 
GitLab


From 979436cd29cbe908614f1475cca344e89fcea1bc Mon Sep 17 00:00:00 2001
From: Yifei Feng <fengyifei2026@gmail.com>
Date: Fri, 15 Dec 2017 10:27:54 -0800
Subject: [PATCH 1059/1225] Update CONTRIBUTING.md

---
 CONTRIBUTING.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b50b89c351..dc96bc2e3d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -20,8 +20,8 @@ Follow either of the two links above to access the appropriate CLA and instructi
 If you have improvements to TensorFlow, send us your pull requests! For those
 just getting started, Github has a [howto](https://help.github.com/articles/using-pull-requests/).
 
-TensorFlow team members will be assigned to review your pull requests. Once the pull requests are approved and pass continuous integration checks, we will merge the pull request.
-For some pull requests, we will apply the patch for each pull request to our internal version control system first, and export the change out as a new commit later, at which point the original pull request will be closed. The commits in the pull request will be squashed into a single commit with the pull request creator as the author. These pull requests will be labeled as pending merge internally. This process might take up to a few business days. 
+TensorFlow team members will be assigned to review your pull requests. Once the pull requests are approved and pass continuous integration checks, we will merge the pull requests.
+For some pull requests, we will apply the patch for each pull request to our internal version control system first, and export the change out as a new commit later, at which point the original pull request will be closed. The commits in the pull request will be squashed into a single commit with the pull request creator as the author. These pull requests will be labeled as pending merge internally.
 
 If you want to contribute but you're not sure where to start, take a look at the
 [issues with the "contributions welcome" label](https://github.com/tensorflow/tensorflow/labels/stat%3Acontributions%20welcome).
-- 
GitLab


From 5582df1a70c00e34c1254f798585cfd3ca05d90c Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Fri, 15 Dec 2017 10:31:27 -0800
Subject: [PATCH 1060/1225] Added a python API to estimate the performance of
 HW devices

PiperOrigin-RevId: 179210330
---
 .../grappler/costs/op_level_cost_estimator.h  |  2 +-
 tensorflow/python/grappler/cluster.i          | 25 +++++++++++++++++++
 tensorflow/python/grappler/cluster.py         |  5 ++++
 tensorflow/python/grappler/cluster_test.py    |  3 +++
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
index c6f23ee0aa..5f541ccf04 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
@@ -35,7 +35,6 @@ class OpLevelCostEstimator {
 
   virtual Costs PredictCosts(const OpContext& op_context) const;
 
- protected:
   // Basic device performance info, sufficient for roofline estimate.
   struct DeviceInfo {
     double gigaops;     // Billions of operations executed per second.
@@ -45,6 +44,7 @@ class OpLevelCostEstimator {
   // Returns basic device performance info.
   virtual DeviceInfo GetDeviceInfo(const DeviceProperties& device) const;
 
+ protected:
   // Predict cost of an op for which no accurate estimator is defined.
   Costs PredictCostOfAnUnknownOp(const OpContext& op_context) const;
 
diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i
index 2f99a910f1..9981c1d22d 100644
--- a/tensorflow/python/grappler/cluster.i
+++ b/tensorflow/python/grappler/cluster.i
@@ -54,6 +54,23 @@ bool _PyObjAs(PyObject *input, tensorflow::NamedDevice *out) {
   $1 = &temp;
 }
 
+%typemap(in) const tensorflow::NamedDevice& (tensorflow::NamedDevice temp) {
+  char* c_string;
+  Py_ssize_t py_size;
+  if (PyBytes_AsStringAndSize($input, &c_string, &py_size) == -1) {
+    // Python has raised an error (likely TypeError or UnicodeEncodeError).
+    SWIG_fail;
+  }
+
+  if (!temp.ParseFromString(string(c_string, py_size))) {
+    PyErr_SetString(
+        PyExc_TypeError,
+        "The NamedDevice could not be parsed as a valid protocol buffer");
+    SWIG_fail;
+  }
+  $1 = &temp;
+}
+
 %typemap(in) const tensorflow::RunMetadata& (tensorflow::RunMetadata temp) {
   char* c_string;
   Py_ssize_t py_size;
@@ -197,6 +214,13 @@ static std::vector<string> TF_ListAvailableOps() {
   return op_names;
 }
 
+static double TF_EstimatePerformance(const tensorflow::NamedDevice& device) {
+  tensorflow::grappler::OpLevelCostEstimator estimator;
+  tensorflow::grappler::OpLevelCostEstimator::DeviceInfo info =
+      estimator.GetDeviceInfo(device.properties());
+  return info.gigaops;
+}
+
 static PyObject* TF_MeasureCosts(
     GItem item,
     GCluster cluster,
@@ -324,6 +348,7 @@ static GCluster TF_NewVirtualCluster(
 static void TF_ShutdownCluster(GCluster cluster);
 static PyObject* TF_ListDevices(GCluster cluster);
 static std::vector<string> TF_ListAvailableOps();
+static float TF_EstimatePerformance(const tensorflow::NamedDevice& device);
 static PyObject* TF_MeasureCosts(
     GItem item, GCluster cluster,
     bool generate_timeline, TF_Status* out_status);
diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py
index 1072e66c73..ba1a734ee0 100644
--- a/tensorflow/python/grappler/cluster.py
+++ b/tensorflow/python/grappler/cluster.py
@@ -84,6 +84,11 @@ class Cluster(object):
     """Returns a list of all the available operations (sorted alphatically)."""
     return tf_cluster.TF_ListAvailableOps()
 
+  def EstimatePerformance(self, device):
+    """Estimate the performance of the specified device."""
+    serialized = device.SerializeToString()
+    return tf_cluster.TF_EstimatePerformance(serialized)
+
   def MeasureCosts(self, item):
     """Returns the cost of running the specified item.
 
diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py
index 0e78c33a85..26feac0a23 100644
--- a/tensorflow/python/grappler/cluster_test.py
+++ b/tensorflow/python/grappler/cluster_test.py
@@ -106,6 +106,9 @@ class ClusterTest(test.TestCase):
       self.assertGreater(run_time, 0)
       self.assertEqual(len(op_perfs), 15)
 
+      estimated_perf = grappler_cluster.EstimatePerformance(named_device)
+      self.assertEqual(7680.0, estimated_perf)
+
   def testContext(self):
     with ops.Graph().as_default() as g:
       a = random_ops.random_uniform(shape=())
-- 
GitLab


From 22fe6558a958c6cc81d16d371031c06e262b1c83 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Fri, 15 Dec 2017 10:33:47 -0800
Subject: [PATCH 1061/1225] [tf.data] Saveable iterator for
 BytesProducedStatsDataset.

PiperOrigin-RevId: 179210688
---
 .../contrib/data/python/kernel_tests/BUILD    |  1 +
 .../kernel_tests/stats_dataset_ops_test.py    | 16 ++++++++
 .../core/kernels/data/stats_dataset_ops.cc    | 37 ++++++++++++++++---
 3 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 375e3ad612..e0d0759567 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -482,6 +482,7 @@ py_test(
     srcs = ["stats_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/contrib/data/python/ops:transformation_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
index 8f24d6b2f6..2b04b278ba 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import stats_ops
 from tensorflow.core.framework import summary_pb2
 from tensorflow.python.data.ops import dataset_ops
@@ -209,5 +210,20 @@ class StatsDatasetTest(test.TestCase):
         sess.run(stats_aggregator_1.subscribe(iterator))
 
 
+class StatsDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_dataset_bytes_stats(self, num_elements):
+    return dataset_ops.Dataset.range(num_elements).map(
+        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply(
+            stats_ops.bytes_produced_stats("bytes_produced"))
+
+  def testBytesStatsDatasetSaveableCore(self):
+    num_outputs = 100
+    self.run_core_tests(
+        lambda: self._build_dataset_bytes_stats(num_outputs),
+        lambda: self._build_dataset_bytes_stats(num_outputs // 10), num_outputs)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/core/kernels/data/stats_dataset_ops.cc b/tensorflow/core/kernels/data/stats_dataset_ops.cc
index 7a8b8b17f0..09704d4b25 100644
--- a/tensorflow/core/kernels/data/stats_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/stats_dataset_ops.cc
@@ -110,14 +110,14 @@ class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel {
                    DatasetBase** output) override {
     string tag;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag));
-    *output = new Dataset(input, std::move(tag));
+    *output = new Dataset(ctx, input, std::move(tag));
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    explicit Dataset(const DatasetBase* input, string tag)
-        : input_(input), tag_(std::move(tag)) {
+    explicit Dataset(OpKernelContext* ctx, const DatasetBase* input, string tag)
+        : GraphDatasetBase(ctx), input_(input), tag_(std::move(tag)) {
       input_->Ref();
     }
 
@@ -140,6 +140,17 @@ class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel {
       return "BytesProducedStatsDatasetOp::Dataset";
     }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_node;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_node));
+      Node* tag_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(tag_, &tag_node));
+      TF_RETURN_IF_ERROR(b->AddDataset(this, {input_node, tag_node}, output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -150,6 +161,7 @@ class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel {
       Status GetNextInternal(IteratorContext* ctx,
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
+        tf_shared_lock l(mu_);
         Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
         auto stats_aggregator = ctx->stats_aggregator();
         if (stats_aggregator && s.ok() && !*end_of_sequence) {
@@ -163,8 +175,23 @@ class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel {
         return s;
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(OpKernelContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        return Status::OK();
+      }
+
      private:
-      const std::unique_ptr<IteratorBase> input_impl_;
+      mutex mu_;
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
-- 
GitLab


From 75a91cf3be635af4f6004f20f3c3cc50c37d3145 Mon Sep 17 00:00:00 2001
From: Roy Frostig <frostig@google.com>
Date: Fri, 15 Dec 2017 10:38:16 -0800
Subject: [PATCH 1062/1225] Python library and C++ bindings for creating and
 compiling local XLA computations.

PiperOrigin-RevId: 179211353
---
 tensorflow/BUILD                              |   1 +
 tensorflow/compiler/xla/BUILD                 |  10 +
 tensorflow/compiler/xla/python/BUILD          |  82 ++
 tensorflow/compiler/xla/python/__init__.py    |   0
 .../xla/python/local_computation_builder.cc   | 265 ++++++
 .../xla/python/local_computation_builder.h    | 210 ++++
 .../xla/python/local_computation_builder.i    | 348 +++++++
 .../compiler/xla/python/numpy_bridge.cc       | 389 ++++++++
 tensorflow/compiler/xla/python/numpy_bridge.h | 123 +++
 tensorflow/compiler/xla/python/xla.i          |  18 +
 tensorflow/compiler/xla/python/xla_client.py  | 605 ++++++++++++
 .../compiler/xla/python/xla_client_test.py    | 898 ++++++++++++++++++
 tensorflow/tf_exported_symbols.lds            |   1 +
 tensorflow/tf_version_script.lds              |   1 +
 14 files changed, 2951 insertions(+)
 create mode 100644 tensorflow/compiler/xla/python/BUILD
 create mode 100644 tensorflow/compiler/xla/python/__init__.py
 create mode 100644 tensorflow/compiler/xla/python/local_computation_builder.cc
 create mode 100644 tensorflow/compiler/xla/python/local_computation_builder.h
 create mode 100644 tensorflow/compiler/xla/python/local_computation_builder.i
 create mode 100644 tensorflow/compiler/xla/python/numpy_bridge.cc
 create mode 100644 tensorflow/compiler/xla/python/numpy_bridge.h
 create mode 100644 tensorflow/compiler/xla/python/xla.i
 create mode 100644 tensorflow/compiler/xla/python/xla_client.py
 create mode 100644 tensorflow/compiler/xla/python/xla_client_test.py

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index d80fe5c829..9437bef99f 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -411,6 +411,7 @@ filegroup(
         "//tensorflow/compiler/xla/client:all_files",
         "//tensorflow/compiler/xla/client/lib:all_files",
         "//tensorflow/compiler/xla/legacy_flags:all_files",
+        "//tensorflow/compiler/xla/python:all_files",
         "//tensorflow/compiler/xla/service:all_files",
         "//tensorflow/compiler/xla/service/cpu:all_files",
         "//tensorflow/compiler/xla/service/gpu:all_files",
diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index d3f292207f..cd69c69889 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -20,6 +20,10 @@ package_group(
 load("//tensorflow:tensorflow.bzl", "cc_header_only_library")
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library")
+load(
+    "//tensorflow/core:platform/default/build_config.bzl",
+    "tf_proto_library_py",
+)
 
 # Filegroup used to collect source files for dependency checking.
 filegroup(
@@ -36,6 +40,12 @@ xla_proto_library(
     visibility = ["//visibility:public"],
 )
 
+tf_proto_library_py(
+    name = "xla_data_proto",  # bzl adds a _py suffix
+    srcs = ["xla_data.proto"],
+    visibility = ["//visibility:public"],
+)
+
 xla_proto_library(
     name = "xla_proto",
     srcs = ["xla.proto"],
diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD
new file mode 100644
index 0000000000..a6b8158671
--- /dev/null
+++ b/tensorflow/compiler/xla/python/BUILD
@@ -0,0 +1,82 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//tensorflow:internal"])
+
+load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
+
+py_library(
+    name = "xla_client",
+    srcs = ["xla_client.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":pywrap_xla",
+        "//tensorflow/compiler/xla:xla_data_proto_py",
+    ],
+)
+
+py_test(
+    name = "xla_client_test",
+    srcs = ["xla_client_test.py"],
+    main = "xla_client_test.py",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":xla_client",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
+cc_library(
+    name = "numpy_bridge",
+    srcs = ["numpy_bridge.cc"],
+    hdrs = ["numpy_bridge.h"],
+    deps = [
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/core:lib",
+        "//tensorflow/python:numpy_lib",
+    ],
+)
+
+cc_library(
+    name = "local_computation_builder",
+    srcs = ["local_computation_builder.cc"],
+    hdrs = ["local_computation_builder.h"],
+    deps = [
+        "//tensorflow/compiler/xla:executable_run_options",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/client:client_library",
+        "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/service:cpu_plugin",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_py_wrap_cc(
+    name = "pywrap_xla",
+    srcs = ["xla.i"],
+    swig_includes = [
+        "local_computation_builder.i",
+    ],
+    deps = [
+        ":local_computation_builder",
+        ":numpy_bridge",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+    ],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/compiler/xla/python/__init__.py b/tensorflow/compiler/xla/python/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
new file mode 100644
index 0000000000..0b0a53fac7
--- /dev/null
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -0,0 +1,265 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/python/local_computation_builder.h"
+#include "tensorflow/compiler/xla/executable_run_options.h"
+#include "tensorflow/compiler/xla/util.h"
+
+namespace xla {
+
+namespace swig {
+
+CompiledLocalComputation::CompiledLocalComputation(
+    std::unique_ptr<LocalExecutable> executable)
+    : executable_(std::move(executable)) {}
+
+std::unique_ptr<Literal> CompiledLocalComputation::Execute(
+    const std::vector<Literal>& arguments) {
+  LocalClient* client = ClientLibrary::LocalClientOrDie();
+
+  // Transfer arguments in
+  std::vector<std::unique_ptr<ScopedShapedBuffer>> scoped_buffers;
+  scoped_buffers.reserve(arguments.size());
+  for (const Literal& argument : arguments) {
+    scoped_buffers.push_back(
+        client
+            ->LiteralToShapedBuffer(argument,
+                                    /*device_ordinal=*/0,
+                                    client->backend().memory_allocator())
+            .ConsumeValueOrDie());
+  }
+
+  // Execute
+  std::vector<const ShapedBuffer*> argument_buffers;
+  argument_buffers.reserve(scoped_buffers.size());
+  for (auto& buffer : scoped_buffers) {
+    argument_buffers.push_back(buffer.get());
+  }
+  ExecutableRunOptions options;
+  options.set_allocator(client->backend().memory_allocator());
+  options.set_inter_op_thread_pool(client->backend().inter_op_thread_pool());
+  options.set_intra_op_thread_pool(
+      client->backend().eigen_intra_op_thread_pool_device());
+  std::unique_ptr<ScopedShapedBuffer> result_buffer =
+      executable_->Run(argument_buffers, options).ConsumeValueOrDie();
+
+  // Transfer result out
+  return client->ShapedBufferToLiteral(*result_buffer).ConsumeValueOrDie();
+}
+
+LocalComputation::LocalComputation(std::unique_ptr<Computation> computation)
+    : computation_(std::move(computation)) {}
+
+CompiledLocalComputation* LocalComputation::Compile(
+    const std::vector<Shape>& argument_shapes) {
+  std::vector<const Shape*> argument_shape_pointers;
+  argument_shape_pointers.reserve(argument_shapes.size());
+  for (auto& argument_shape : argument_shapes) {
+    argument_shape_pointers.push_back(&argument_shape);
+  }
+
+  LocalClient* client = ClientLibrary::LocalClientOrDie();
+  ExecutableBuildOptions options;
+  return new CompiledLocalComputation(
+      client->Compile(*computation_, argument_shape_pointers, options)
+          .ValueOrDie());
+}
+
+const Computation& LocalComputation::computation() const {
+  return *computation_;
+}
+
+LocalComputationBuilder::LocalComputationBuilder(const string& computation_name)
+    : builder_(ClientLibrary::LocalClientOrDie(), computation_name) {}
+
+LocalComputation* LocalComputationBuilder::Build() {
+  return new LocalComputation(std::unique_ptr<Computation>(
+      new Computation(builder_.Build().ConsumeValueOrDie())));
+}
+
+ComputationDataHandle LocalComputationBuilder::Parameter(int64 parameter_number,
+                                                         const Shape& shape,
+                                                         const string& name) {
+  return builder_.Parameter(parameter_number, shape, name);
+}
+
+std::unique_ptr<Shape> LocalComputationBuilder::GetShape(
+    const ComputationDataHandle& operand) {
+  return builder_.GetShape(operand).ConsumeValueOrDie();
+}
+
+ComputationDataHandle LocalComputationBuilder::ConstantLiteral(
+    const Literal& literal) {
+  return builder_.ConstantLiteral(literal);
+}
+
+ComputationDataHandle LocalComputationBuilder::Broadcast(
+    const ComputationDataHandle& operand,
+    tensorflow::gtl::ArraySlice<int64> broadcast_sizes) {
+  return builder_.Broadcast(operand, broadcast_sizes);
+}
+
+ComputationDataHandle LocalComputationBuilder::Reshape(
+    const ComputationDataHandle& operand,
+    tensorflow::gtl::ArraySlice<int64> dimensions,
+    tensorflow::gtl::ArraySlice<int64> new_sizes) {
+  return builder_.Reshape(operand, dimensions, new_sizes);
+}
+
+ComputationDataHandle LocalComputationBuilder::Slice(
+    const ComputationDataHandle& operand,
+    tensorflow::gtl::ArraySlice<int64> start_indices,
+    tensorflow::gtl::ArraySlice<int64> limit_indices,
+    tensorflow::gtl::ArraySlice<int64> strides) {
+  return builder_.Slice(operand, start_indices, limit_indices, strides);
+}
+
+ComputationDataHandle LocalComputationBuilder::DynamicSlice(
+    const ComputationDataHandle& operand,
+    const ComputationDataHandle& start_indices,
+    tensorflow::gtl::ArraySlice<int64> slice_sizes) {
+  return builder_.DynamicSlice(operand, start_indices, slice_sizes);
+}
+
+ComputationDataHandle LocalComputationBuilder::DynamicUpdateSlice(
+    const ComputationDataHandle& operand, const ComputationDataHandle& update,
+    const ComputationDataHandle& start_indices) {
+  return builder_.DynamicUpdateSlice(operand, update, start_indices);
+}
+
+ComputationDataHandle LocalComputationBuilder::ConcatInDim(
+    tensorflow::gtl::ArraySlice<ComputationDataHandle> operands,
+    int64 dimension) {
+  return builder_.ConcatInDim(operands, dimension);
+}
+
+ComputationDataHandle LocalComputationBuilder::Select(
+    const ComputationDataHandle& pred, const ComputationDataHandle& on_true,
+    const ComputationDataHandle& on_false) {
+  return builder_.Select(pred, on_true, on_false);
+}
+
+ComputationDataHandle LocalComputationBuilder::Tuple(
+    tensorflow::gtl::ArraySlice<ComputationDataHandle> elements) {
+  return builder_.Tuple(elements);
+}
+
+ComputationDataHandle LocalComputationBuilder::GetTupleElement(
+    const ComputationDataHandle& tuple_data, int64 index) {
+  return builder_.GetTupleElement(tuple_data, index);
+}
+
+ComputationDataHandle LocalComputationBuilder::Dot(
+    const ComputationDataHandle& lhs, const ComputationDataHandle& rhs) {
+  return builder_.Dot(lhs, rhs);
+}
+
+ComputationDataHandle LocalComputationBuilder::ConvertElementType(
+    const ComputationDataHandle& operand, PrimitiveType new_element_type) {
+  return builder_.ConvertElementType(operand, new_element_type);
+}
+
+ComputationDataHandle LocalComputationBuilder::Call(
+    const LocalComputation& local_computation,
+    tensorflow::gtl::ArraySlice<ComputationDataHandle> operands) {
+  return builder_.Call(local_computation.computation(), operands);
+}
+
+ComputationDataHandle LocalComputationBuilder::Transpose(
+    const ComputationDataHandle& operand,
+    tensorflow::gtl::ArraySlice<int64> permutation) {
+  return builder_.Transpose(operand, permutation);
+}
+
+ComputationDataHandle LocalComputationBuilder::Map(
+    tensorflow::gtl::ArraySlice<ComputationDataHandle> operands,
+    const LocalComputation& local_computation,
+    tensorflow::gtl::ArraySlice<int64> dimensions,
+    tensorflow::gtl::ArraySlice<ComputationDataHandle> static_operands) {
+  return builder_.Map(operands, local_computation.computation(), dimensions,
+                      static_operands);
+}
+
+ComputationDataHandle LocalComputationBuilder::Reduce(
+    const ComputationDataHandle& operand,
+    const ComputationDataHandle& init_value,
+    const LocalComputation& local_computation,
+    tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce) {
+  return builder_.Reduce(operand, init_value, local_computation.computation(),
+                         dimensions_to_reduce);
+}
+
+ComputationDataHandle LocalComputationBuilder::While(
+    const LocalComputation& condition, const LocalComputation& body,
+    const ComputationDataHandle& init) {
+  return builder_.While(condition.computation(), body.computation(), init);
+}
+
+#define _FORWARD(method_name, return_sig, args_sig, args)    \
+  return_sig LocalComputationBuilder::method_name args_sig { \
+    return builder_.method_name args;                        \
+  }
+
+#define _FORWARD_UNOP(method_name)             \
+  _FORWARD(method_name, ComputationDataHandle, \
+           (const ComputationDataHandle& operand), (operand))
+
+#define _FORWARD_BINOP(method_name)                                        \
+  _FORWARD(                                                                \
+      method_name, ComputationDataHandle,                                  \
+      (const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, \
+       tensorflow::gtl::ArraySlice<int64> broadcast_dimensions),           \
+      (lhs, rhs, broadcast_dimensions))
+
+_FORWARD_BINOP(Eq)
+_FORWARD_BINOP(Ne)
+_FORWARD_BINOP(Ge)
+_FORWARD_BINOP(Gt)
+_FORWARD_BINOP(Lt)
+_FORWARD_BINOP(Le)
+_FORWARD_BINOP(Add)
+_FORWARD_BINOP(Sub)
+_FORWARD_BINOP(Mul)
+_FORWARD_BINOP(Div)
+_FORWARD_BINOP(Rem)
+_FORWARD_BINOP(Max)
+_FORWARD_BINOP(Min)
+_FORWARD_BINOP(And)
+_FORWARD_BINOP(Or)
+_FORWARD_UNOP(Not)
+_FORWARD_UNOP(Abs)
+_FORWARD_UNOP(Exp)
+_FORWARD_UNOP(Floor)
+_FORWARD_UNOP(Ceil)
+_FORWARD_UNOP(Log)
+_FORWARD_UNOP(Sign)
+_FORWARD_UNOP(Cos)
+_FORWARD_UNOP(Sin)
+_FORWARD_UNOP(Tanh)
+_FORWARD_UNOP(SqrtF32)
+_FORWARD_UNOP(SquareF32)
+_FORWARD_BINOP(Pow)
+_FORWARD_UNOP(IsFinite)
+_FORWARD_UNOP(ReciprocalF32)
+_FORWARD_UNOP(Neg)
+_FORWARD_UNOP(Sort)
+
+#undef _FORWARD
+#undef _FORWARD_UNOP
+#undef _FORWARD_BINOP
+
+}  // namespace swig
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
new file mode 100644
index 0000000000..cbab45a5f0
--- /dev/null
+++ b/tensorflow/compiler/xla/python/local_computation_builder.h
@@ -0,0 +1,210 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_COMPUTATION_BUILDER_H_
+#define TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_COMPUTATION_BUILDER_H_
+
+#include "tensorflow/compiler/xla/client/client_library.h"
+#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/client/local_client.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+
+namespace xla {
+
+namespace swig {
+
+// Wraps a LocalExecutable produced by compiling a
+// LocalComputation. The Execute method forwards to that of the
+// underlying LocalExecutable, and additionally handles tranferring
+// arguments and return values in and back out of the client library's
+// local client. This class is intended to be made available to Python
+// via SWIG.
+class CompiledLocalComputation {
+ public:
+  CompiledLocalComputation(std::unique_ptr<LocalExecutable> executable);
+  std::unique_ptr<Literal> Execute(const std::vector<Literal>& arguments);
+
+ private:
+  std::unique_ptr<LocalExecutable> executable_;
+};
+
+// Wraps a Computation produced by a LocalComputationBuilder. The
+// Compile method compiles the computation to a (local) executable via
+// the client library's local client. This class is intended to be
+// made available to Python via SWIG.
+class LocalComputation {
+ public:
+  LocalComputation(std::unique_ptr<Computation> computation);
+  CompiledLocalComputation* Compile(const std::vector<Shape>& argument_shapes);
+  const Computation& computation() const;
+
+ private:
+  std::unique_ptr<Computation> computation_;
+};
+
+// Wraps the ComputationBuilder API in order to:
+// - Support consumption by SWIG in order to be made available to
+//   Python.
+// - Set up the underlying builder to use the client library's
+//   LocalClient.
+// - Wrap Computations in LocalComputations for Python access.
+// - Correspondingly unwrap incoming LocalComputations.
+class LocalComputationBuilder {
+ public:
+  LocalComputationBuilder(const string& computation_name);
+
+  LocalComputation* Build();
+
+  ComputationDataHandle Parameter(int64 parameter_number, const Shape& shape,
+                                  const string& name);
+
+  std::unique_ptr<Shape> GetShape(const ComputationDataHandle& operand);
+
+  ComputationDataHandle ConstantLiteral(const Literal& literal);
+
+  ComputationDataHandle Broadcast(
+      const ComputationDataHandle& operand,
+      tensorflow::gtl::ArraySlice<int64> broadcast_sizes);
+
+  ComputationDataHandle Reshape(const ComputationDataHandle& operand,
+                                tensorflow::gtl::ArraySlice<int64> dimensions,
+                                tensorflow::gtl::ArraySlice<int64> new_sizes);
+
+  ComputationDataHandle Slice(const ComputationDataHandle& operand,
+                              tensorflow::gtl::ArraySlice<int64> start_indices,
+                              tensorflow::gtl::ArraySlice<int64> limit_indices,
+                              tensorflow::gtl::ArraySlice<int64> strides);
+
+  ComputationDataHandle DynamicSlice(
+      const ComputationDataHandle& operand,
+      const ComputationDataHandle& start_indices,
+      tensorflow::gtl::ArraySlice<int64> slice_sizes);
+
+  ComputationDataHandle DynamicUpdateSlice(
+      const ComputationDataHandle& operand, const ComputationDataHandle& update,
+      const ComputationDataHandle& start_indices);
+
+  ComputationDataHandle ConcatInDim(
+      tensorflow::gtl::ArraySlice<ComputationDataHandle> operands,
+      int64 dimension);
+
+  ComputationDataHandle Select(const ComputationDataHandle& pred,
+                               const ComputationDataHandle& on_true,
+                               const ComputationDataHandle& on_false);
+
+  ComputationDataHandle Tuple(
+      tensorflow::gtl::ArraySlice<ComputationDataHandle> elements);
+
+  ComputationDataHandle GetTupleElement(const ComputationDataHandle& tuple_data,
+                                        int64 index);
+
+  ComputationDataHandle Dot(const ComputationDataHandle& lhs,
+                            const ComputationDataHandle& rhs);
+
+  ComputationDataHandle ConvertElementType(const ComputationDataHandle& operand,
+                                           PrimitiveType new_element_type);
+
+  ComputationDataHandle Call(
+      const LocalComputation& local_computation,
+      tensorflow::gtl::ArraySlice<ComputationDataHandle> operands);
+
+  ComputationDataHandle Transpose(
+      const ComputationDataHandle& operand,
+      tensorflow::gtl::ArraySlice<int64> permutation);
+
+  ComputationDataHandle Map(
+      tensorflow::gtl::ArraySlice<ComputationDataHandle> operands,
+      const LocalComputation& local_computation,
+      tensorflow::gtl::ArraySlice<int64> dimensions,
+      tensorflow::gtl::ArraySlice<ComputationDataHandle> static_operands);
+
+  ComputationDataHandle Reduce(
+      const ComputationDataHandle& operand,
+      const ComputationDataHandle& init_value,
+      const LocalComputation& local_computation,
+      tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce);
+
+  ComputationDataHandle While(const LocalComputation& condition,
+                              const LocalComputation& body,
+                              const ComputationDataHandle& init);
+
+#define _FORWARD(method_name, return_sig, args_sig) \
+  return_sig method_name args_sig;
+
+#define _FORWARD_UNOP(method_name)             \
+  _FORWARD(method_name, ComputationDataHandle, \
+           (const ComputationDataHandle& operand))
+
+#define _FORWARD_BINOP(method_name)                                        \
+  _FORWARD(                                                                \
+      method_name, ComputationDataHandle,                                  \
+      (const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, \
+       tensorflow::gtl::ArraySlice<int64> broadcast_dimensions))
+
+  _FORWARD_BINOP(Eq)
+  _FORWARD_BINOP(Ne)
+  _FORWARD_BINOP(Ge)
+  _FORWARD_BINOP(Gt)
+  _FORWARD_BINOP(Lt)
+  _FORWARD_BINOP(Le)
+  _FORWARD_BINOP(Add)
+  _FORWARD_BINOP(Sub)
+  _FORWARD_BINOP(Mul)
+  _FORWARD_BINOP(Div)
+  _FORWARD_BINOP(Rem)
+  _FORWARD_BINOP(Max)
+  _FORWARD_BINOP(Min)
+  _FORWARD_BINOP(And)
+  _FORWARD_BINOP(Or)
+  _FORWARD_UNOP(Not)
+  _FORWARD_UNOP(Abs)
+  _FORWARD_UNOP(Exp)
+  _FORWARD_UNOP(Floor)
+  _FORWARD_UNOP(Ceil)
+  _FORWARD_UNOP(Log)
+  _FORWARD_UNOP(Sign)
+  _FORWARD_UNOP(Cos)
+  _FORWARD_UNOP(Sin)
+  _FORWARD_UNOP(Tanh)
+  _FORWARD_UNOP(SqrtF32)
+  _FORWARD_UNOP(SquareF32)
+  _FORWARD_BINOP(Pow)
+  _FORWARD_UNOP(IsFinite)
+  _FORWARD_UNOP(ReciprocalF32)
+  _FORWARD_UNOP(Neg)
+  _FORWARD_UNOP(Sort)
+
+#undef _FORWARD
+#undef _FORWARD_UNOP
+#undef _FORWARD_BINOP
+
+ private:
+  ComputationBuilder builder_;
+};
+
+static void DeleteLocalComputation(LocalComputation* computation) {
+  delete computation;
+}
+
+static void DeleteCompiledLocalComputation(
+    CompiledLocalComputation* computation) {
+  delete computation;
+}
+
+}  // namespace swig
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_COMPUTATION_BUILDER_H_
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i
new file mode 100644
index 0000000000..ac8f3e4277
--- /dev/null
+++ b/tensorflow/compiler/xla/python/local_computation_builder.i
@@ -0,0 +1,348 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// SWIG typemaps and declarations for building, compiling, and
+// executing XLA computations, wrapping most of what is declared in
+// local_computation_builder.h.
+//
+// The typemaps below implement/assert the following correspondences
+// (with elaborations below):
+//
+//    C++                                  Python
+// -------------------------------------+---------------------------------------
+//  ComputationDataHandle              <-> long
+//  ArraySlice<int64>                  <-  sequence of long
+//  ArraySlice<ComputationDataHandle>  <-  sequence of long
+//  Literal                            <-> (nested tuple of) numpy ndarray
+//  std::vector<Literal>               <-  sequence of (nested tuple of) ndarray
+//  Shape                              <-> pair holding (dtype, dimensions)
+//  std::vector<Shape>                 <-  sequence of shape information pairs
+//  PrimitiveType                      <-  int
+//
+// Arrows indicate whether a conversion only ever occurs in one
+// direction, or whether it is maintained bidirectionally. Also,
+// "long" and "int" denote the Python types so named, not C.
+//
+// The Python objects corresponding to C++ Literals have the type:
+//
+//   T = ndarray | (T, ...)
+//
+// where a terminal numpy ndarray translates to a Literal with a
+// non-tuple Shape, an XLA primitive element type corresponding to the
+// ndarray's dtype. Meanwhile, a non-terminal "tuple of T" translates
+// to a tuple-shaped Literal whose tuple components are translated
+// recursively. For example, if x is a numpy ndarray in Python, with
+// shape (2, 3) and dtype of dtype('float32'), then x translates to a
+// Literal with rank 2, dimension 2 and 3, and XLA primitive type
+// F32. Meanwhile,
+//
+//   (x, (x, x), (x,)),
+//
+// translates to a tuple-shaped XLA Literal, whose component subshapes
+// are a 2x3 F32-shaped literal followed by two tuple-shaped literals.
+//
+// The Python objects corresponding to C++ Shapes have the type:
+//
+//   T            = (dtype, S)
+//   S            = DIMENSIONS | TUPLE_SHAPES
+//   DIMENSIONS   = (int, ...)
+//   TUPLE_SHAPES = (T, ...)
+//
+// In the pair described by the T rule, the terminal dtype determines
+// whether S expands as DIMENSIONS or TUPLE_SHAPES. Namely if it is
+// dtype('O'), numpy's object dtype, the structure represents a tuple
+// shape and the expansion of the non-terminal S is
+// TUPLE_SHAPES. Otherwise, dtype describes a primitive element type
+// and S expands into DIMENSIONS giving dimension sizes. For example:
+//
+//   (dtype('float32'), (3, 5, 7))
+//
+// describes a 3x5x7 array of F32s, and
+//
+//   (dtype('O'), ((dtype('float32'), (2, 3)),
+//                 (dtype('float64'), (4, 5))))
+//
+// describes a tuple shape with two subshapes: the first a 2x3 F32,
+// and the other a 4x5 F64.
+//
+// The Python int corresponding to a PrimitiveType enum must be valid
+// per xla_data.proto (e.g. xla_data.PRED, xla_data.F32).
+//
+// The SWIG object wrappers generated by this file are not intended
+// for end use, but rather for internal use in the Python XLA client,
+// xla_client.py.
+//
+// One central reason for the Python-side indirection is that the
+// Python-side objects produced by the typemaps in this file are
+// further packaged up by xla_client before being passed on. For
+// instance, xla_client wraps the long produced for a C++
+// ComputationDataHandle in a Python ComputationDataHandle proto,
+// rather than exposing a raw long outside of the client. Similarly,
+// the Python pair produced for a C++ Shape is further wrapped in a
+// Python class (xla_client.Shape) so as not to expose the raw pair
+// externally.
+//
+// Other SWIG object wrappers (e.g. of LocalComputation) are further
+// wrapped by xla_client in order to set up a custom destructor that
+// triggers memory deallocation on the C++ side.
+
+%include "tensorflow/python/platform/base.i"
+
+%{
+// Must be included first
+#include "tensorflow/python/lib/core/numpy.h"
+
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/compiler/xla/python/numpy_bridge.h"
+#include "tensorflow/compiler/xla/python/local_computation_builder.h"
+
+using namespace xla;
+using namespace xla::swig;
+%}
+
+// Required to use PyArray_* functions.
+%init %{
+tensorflow::ImportNumpy();
+%}
+
+// ComputationDataHandle
+
+%typemap(in) const ComputationDataHandle& (ComputationDataHandle temp) {
+  const int64 handle = numpy::PyIntOrPyLongToLong($input);
+  if (handle == -1 && PyErr_Occurred()) {
+    return NULL;
+  }
+  temp.set_handle(handle);
+  $1 = &temp;
+}
+
+%typemap(out) ComputationDataHandle {
+  $result = numpy::LongToPyIntOrPyLong($1.handle());
+}
+
+// ArraySlice<int64>
+
+%typemap(in) tensorflow::gtl::ArraySlice<int64>
+    (std::vector<int64> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    return NULL;
+  }
+  const int size = PySequence_Size($input);
+  temps.resize(size);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    PyObject* py_int = numpy::PyNumberToPyInt(o);
+    if (!py_int) {
+      PyErr_SetString(
+          PyExc_TypeError,
+          "Argument sequence element cannot be converted to int");
+      Py_DECREF(o);
+      return NULL;
+    }
+    temps[i] = numpy::PyIntOrPyLongToLong(py_int);
+    if (temps[i] == -1 && PyErr_Occurred()) {
+      Py_DECREF(py_int);
+      Py_DECREF(o);
+      return NULL;
+    }
+    Py_DECREF(py_int);
+    Py_DECREF(o);
+  }
+  $1 = temps;
+}
+
+// ComputationDataHandle
+
+%typemap(in) tensorflow::gtl::ArraySlice<ComputationDataHandle>
+    (std::vector<ComputationDataHandle> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    return NULL;
+  }
+  const int size = PySequence_Size($input);
+  temps.resize(size);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    PyObject* py_int = numpy::PyNumberToPyInt(o);
+    if (!py_int) {
+      PyErr_SetString(
+          PyExc_TypeError,
+          "Argument sequence element cannot be converted to int");
+      return NULL;
+    }
+    const int64 handle = numpy::PyIntOrPyLongToLong(py_int);
+    if (handle == -1 && PyErr_Occurred()) {
+      Py_DECREF(py_int);
+      Py_DECREF(o);
+      return NULL;
+    }
+    temps[i].set_handle(handle);
+    Py_DECREF(py_int);
+    Py_DECREF(o);
+  }
+  $1 = temps;
+}
+
+// Literal
+
+%typemap(in) const Literal& (std::unique_ptr<Literal> temp) {
+  temp = numpy::XlaLiteralFromPyObject($input);
+  $1 = &*temp;
+}
+
+%typemap(out) std::unique_ptr<Literal> {
+  $result = numpy::PyObjectFromXlaLiteral(*$1);
+}
+
+%typemap(in) const std::vector<Literal>& (std::vector<Literal> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    return NULL;
+  }
+  const int size = PySequence_Size($input);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    temps.push_back(*numpy::XlaLiteralFromPyObject(o));
+    Py_DECREF(o);
+  }
+  $1 = &temps;
+}
+
+// Shape
+
+%typemap(in) const Shape& (Shape temp) {
+  if (!numpy::CheckPyShapeInfo($input)) {
+    return NULL;
+  }
+  temp = numpy::XlaShapeFromPyShapeInfo($input);
+  $1 = &temp;
+}
+
+%typemap(out) std::unique_ptr<Shape> {
+  $result = numpy::PyShapeInfoFromXlaShape(*$1);
+}
+
+%typemap(in) const std::vector<Shape>& (std::vector<Shape> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    return NULL;
+  }
+  const int size = PySequence_Size($input);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    if (!numpy::CheckPyShapeInfo(o)) {
+      Py_DECREF(o);
+      return NULL;
+    }
+    temps.push_back(numpy::XlaShapeFromPyShapeInfo(o));
+    Py_DECREF(o);
+  }
+  $1 = &temps;
+}
+
+// PrimitiveType
+
+%typemap(in) PrimitiveType {
+  PyObject* py_int = numpy::PyNumberToPyInt($input);
+  if (!py_int) {
+    PyErr_SetString(PyExc_TypeError, "Argument cannot be converted to int");
+    return NULL;
+  }
+  const long value = numpy::PyIntOrPyLongToLong(py_int);
+  if (value == -1 && PyErr_Occurred()) {
+    Py_DECREF(py_int);
+    return NULL;
+  }
+  if (!PrimitiveType_IsValid(value)) {
+    PyErr_SetString(
+        PyExc_TypeError, "Argument not valid for PrimitiveType enum");
+    Py_DECREF(py_int);
+    return NULL;
+  }
+  $1 = static_cast<PrimitiveType>(value);
+}
+
+%ignoreall
+%unignore xla;
+%unignore xla::swig;
+%unignore xla::swig::CompiledLocalComputation;
+%unignore xla::swig::CompiledLocalComputation::Execute;
+%unignore xla::swig::LocalComputation;
+%unignore xla::swig::LocalComputation::Compile;
+%unignore xla::swig::LocalComputationBuilder;
+%unignore xla::swig::LocalComputationBuilder::LocalComputationBuilder;
+%unignore xla::swig::LocalComputationBuilder::Build;
+%unignore xla::swig::LocalComputationBuilder::Parameter;
+%unignore xla::swig::LocalComputationBuilder::GetShape;
+%unignore xla::swig::LocalComputationBuilder::ConstantLiteral;
+%unignore xla::swig::LocalComputationBuilder::ConstantR0;
+%unignore xla::swig::LocalComputationBuilder::Broadcast;
+%unignore xla::swig::LocalComputationBuilder::Reshape;
+%unignore xla::swig::LocalComputationBuilder::Slice;
+%unignore xla::swig::LocalComputationBuilder::DynamicSlice;
+%unignore xla::swig::LocalComputationBuilder::DynamicUpdateSlice;
+%unignore xla::swig::LocalComputationBuilder::ConcatInDim;
+%unignore xla::swig::LocalComputationBuilder::Select;
+%unignore xla::swig::LocalComputationBuilder::Tuple;
+%unignore xla::swig::LocalComputationBuilder::GetTupleElement;
+%unignore xla::swig::LocalComputationBuilder::ConvertElementType;
+%unignore xla::swig::LocalComputationBuilder::Call;
+%unignore xla::swig::LocalComputationBuilder::Transpose;
+%unignore xla::swig::LocalComputationBuilder::Map;
+%unignore xla::swig::LocalComputationBuilder::Reduce;
+%unignore xla::swig::LocalComputationBuilder::While;
+%unignore xla::swig::LocalComputationBuilder::Eq;
+%unignore xla::swig::LocalComputationBuilder::Ne;
+%unignore xla::swig::LocalComputationBuilder::Ge;
+%unignore xla::swig::LocalComputationBuilder::Gt;
+%unignore xla::swig::LocalComputationBuilder::Lt;
+%unignore xla::swig::LocalComputationBuilder::Le;
+%unignore xla::swig::LocalComputationBuilder::Dot;
+%unignore xla::swig::LocalComputationBuilder::Add;
+%unignore xla::swig::LocalComputationBuilder::Sub;
+%unignore xla::swig::LocalComputationBuilder::Mul;
+%unignore xla::swig::LocalComputationBuilder::Div;
+%unignore xla::swig::LocalComputationBuilder::Rem;
+%unignore xla::swig::LocalComputationBuilder::Max;
+%unignore xla::swig::LocalComputationBuilder::Min;
+%unignore xla::swig::LocalComputationBuilder::And;
+%unignore xla::swig::LocalComputationBuilder::Or;
+%unignore xla::swig::LocalComputationBuilder::Not;
+%unignore xla::swig::LocalComputationBuilder::Abs;
+%unignore xla::swig::LocalComputationBuilder::Exp;
+%unignore xla::swig::LocalComputationBuilder::Floor;
+%unignore xla::swig::LocalComputationBuilder::Ceil;
+%unignore xla::swig::LocalComputationBuilder::Log;
+%unignore xla::swig::LocalComputationBuilder::Sign;
+%unignore xla::swig::LocalComputationBuilder::Cos;
+%unignore xla::swig::LocalComputationBuilder::Sin;
+%unignore xla::swig::LocalComputationBuilder::Tanh;
+%unignore xla::swig::LocalComputationBuilder::SqrtF32;
+%unignore xla::swig::LocalComputationBuilder::SquareF32;
+%unignore xla::swig::LocalComputationBuilder::Pow;
+%unignore xla::swig::LocalComputationBuilder::IsFinite;
+%unignore xla::swig::LocalComputationBuilder::ReciprocalF32;
+%unignore xla::swig::LocalComputationBuilder::Neg;
+%unignore xla::swig::LocalComputationBuilder::Sort;
+%unignore xla::swig::DeleteLocalComputation;
+%unignore xla::swig::DeleteCompiledLocalComputation;
+
+%include "tensorflow/compiler/xla/python/local_computation_builder.h"
+
+%unignoreall
diff --git a/tensorflow/compiler/xla/python/numpy_bridge.cc b/tensorflow/compiler/xla/python/numpy_bridge.cc
new file mode 100644
index 0000000000..b30bdc3669
--- /dev/null
+++ b/tensorflow/compiler/xla/python/numpy_bridge.cc
@@ -0,0 +1,389 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/python/numpy_bridge.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace xla {
+
+namespace swig {
+
+namespace numpy {
+
+int PrimitiveTypeToNumpyType(PrimitiveType primitive_type) {
+  switch (primitive_type) {
+    case PRED:
+      return NPY_BOOL;
+    case S8:
+      return NPY_INT8;
+    case S16:
+      return NPY_INT16;
+    case S32:
+      return NPY_INT32;
+    case S64:
+      return NPY_INT64;
+    case U8:
+      return NPY_UINT8;
+    case U16:
+      return NPY_UINT16;
+    case U32:
+      return NPY_UINT32;
+    case U64:
+      return NPY_UINT64;
+    case F16:
+      return NPY_FLOAT16;
+    case F32:
+      return NPY_FLOAT32;
+    case F64:
+      return NPY_FLOAT64;
+    case TUPLE:
+      return NPY_OBJECT;
+    default:
+      LOG(FATAL) << "No Numpy type for XLA primitive type " << primitive_type;
+  }
+}
+
+PrimitiveType NumpyTypeToPrimitiveType(int np_type) {
+  switch (np_type) {
+    case NPY_BOOL:
+      return PRED;
+    case NPY_INT8:
+      return S8;
+    case NPY_INT16:
+      return S16;
+    case NPY_INT32:
+      return S32;
+    case NPY_INT64:
+      return S64;
+    case NPY_UINT8:
+      return U8;
+    case NPY_UINT16:
+      return U16;
+    case NPY_UINT32:
+      return U32;
+    case NPY_UINT64:
+      return U64;
+    case NPY_FLOAT16:
+      return F16;
+    case NPY_FLOAT32:
+      return F32;
+    case NPY_FLOAT64:
+      return F64;
+    case NPY_OBJECT:
+      return TUPLE;
+    default:
+      LOG(FATAL) << "No XLA primitive type for Numpy type " << np_type;
+  }
+}
+
+bool NumpyTypeIsValid(int np_type) {
+  switch (np_type) {
+    case NPY_BOOL:
+    case NPY_INT8:
+    case NPY_INT16:
+    case NPY_INT32:
+    case NPY_INT64:
+    case NPY_UINT8:
+    case NPY_UINT16:
+    case NPY_UINT32:
+    case NPY_UINT64:
+    case NPY_FLOAT16:
+    case NPY_FLOAT32:
+    case NPY_FLOAT64:
+    case NPY_OBJECT:
+      return true;
+    default:
+      return false;
+  }
+}
+
+PyObject* PyShapeInfoFromXlaShape(const Shape& shape) {
+  int np_typenum = PrimitiveTypeToNumpyType(shape.element_type());
+  PyArray_Descr* np_dtype = PyArray_DescrFromType(np_typenum);
+
+  PyObject* dimensions;
+  if (ShapeUtil::IsTuple(shape)) {
+    int num_elements = ShapeUtil::TupleElementCount(shape);
+    dimensions = PyTuple_New(ShapeUtil::TupleElementCount(shape));
+    for (int i = 0; i < num_elements; ++i) {
+      PyTuple_SET_ITEM(
+          dimensions, i,
+          PyShapeInfoFromXlaShape(ShapeUtil::GetTupleElementShape(shape, i)));
+    }
+  } else {
+    int rank = ShapeUtil::Rank(shape);
+    dimensions = PyTuple_New(rank);
+    for (int i = 0; i < rank; ++i) {
+      PyTuple_SET_ITEM(dimensions, i,
+                       LongToPyIntOrPyLong(ShapeUtil::GetDimension(shape, i)));
+    }
+  }
+  return PyTuple_Pack(2, np_dtype, dimensions);
+}
+
+// Precondition: o->ob_type == &PyArrayDescr_Type
+static int NumpyTypenum(PyObject* o) {
+  return reinterpret_cast<PyArray_Descr*>(o)->type_num;
+}
+
+bool CheckPyShapeInfo(PyObject* o) {
+  // The object is a tuple (a pair)
+  if (!PyTuple_Check(o)) {
+    PyErr_SetString(PyExc_TypeError, "Shape record must be a tuple");
+    return false;
+  }
+  if (PyTuple_Size(o) != 2) {
+    PyErr_SetString(PyExc_ValueError, "Shape record tuple must be of length 2");
+    return false;
+  }
+
+  // It has a first element, which is a numpy dtype object
+  PyObject* first = PyTuple_GetItem(o, 0);
+  if (!first) {
+    return false;
+  }
+  if (first->ob_type != &PyArrayDescr_Type) {
+    PyErr_SetString(
+        PyExc_TypeError,
+        "Shape record does not have a numpy dtype as its first element");
+    return false;
+  }
+  const int np_type = NumpyTypenum(first);
+  if (!NumpyTypeIsValid(np_type)) {
+    PyErr_SetString(PyExc_ValueError,
+                    "Shape record has an invalid integer dtype");
+    return false;
+  }
+
+  // It has a second element, which is a tuple, either of shape
+  // records or of Python ints
+  PyObject* second = PyTuple_GetItem(o, 1);
+  if (!second) {
+    return false;
+  }
+  if (!PyTuple_Check(second)) {
+    PyErr_SetString(PyExc_TypeError,
+                    "Shape record does not have a tuple as its second element");
+    return false;
+  }
+  const int length = PyTuple_Size(second);
+  const PrimitiveType element_type = NumpyTypeToPrimitiveType(np_type);
+  for (int i = 0; i < length; i++) {
+    PyObject* dimension = PyTuple_GetItem(second, i);
+    if (element_type == TUPLE) {
+      if (!CheckPyShapeInfo(dimension)) {
+        return false;
+      }
+    } else if (!CheckPyIntOrLong(dimension)) {
+      PyErr_SetString(PyExc_TypeError,
+                      "Non-tuple shape record has a non-integer dimension");
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Precondition: CheckPyShapeInfo(o)
+Shape XlaShapeFromPyShapeInfo(PyObject* o) {
+  const int np_type = NumpyTypenum(PyTuple_GetItem(o, 0));
+  const PrimitiveType element_type = NumpyTypeToPrimitiveType(np_type);
+  PyObject* py_dimensions = PyTuple_GetItem(o, 1);
+  const int length = PyTuple_Size(py_dimensions);
+  if (element_type == TUPLE) {
+    std::vector<Shape> subshapes;
+    subshapes.reserve(length);
+    for (int i = 0; i < length; i++) {
+      subshapes.push_back(
+          XlaShapeFromPyShapeInfo(PyTuple_GetItem(py_dimensions, i)));
+    }
+    return ShapeUtil::MakeTupleShape(subshapes);
+  } else {
+    std::vector<int64> dimensions(length);
+    for (int i = 0; i < length; i++) {
+      dimensions[i] = PyIntOrPyLongToLong(PyTuple_GetItem(py_dimensions, i));
+      if (dimensions[i] == -1) {
+        CHECK(!PyErr_Occurred());
+      }
+    }
+    return ShapeUtil::MakeShape(element_type, dimensions);
+  }
+}
+
+PyObject* PyObjectFromXlaLiteral(const Literal& literal) {
+  if (ShapeUtil::IsTuple(literal.shape())) {
+    const std::vector<Literal>& tuple_literals = literal.tuple_literals();
+    int num_elements = ShapeUtil::TupleElementCount(literal.shape());
+    PyObject* tuple = PyTuple_New(num_elements);
+    for (int i = 0; i < num_elements; i++) {
+      PyTuple_SET_ITEM(tuple, i, PyObjectFromXlaLiteral(tuple_literals[i]));
+    }
+    return tuple;
+  } else {
+    int rank = ShapeUtil::Rank(literal.shape());
+    std::vector<long> dimensions(rank);  // NOLINT - PyArray requires a long*
+    for (int i = 0; i < rank; i++) {
+      dimensions[i] = ShapeUtil::GetDimension(literal.shape(), i);
+    }
+    int np_type = PrimitiveTypeToNumpyType(literal.shape().element_type());
+    PyObject* array =
+        PyArray_EMPTY(rank, dimensions.data(), np_type, /*fortran=*/0);
+    CopyLiteralToNumpyArray(np_type, literal,
+                            reinterpret_cast<PyArrayObject*>(array));
+    return array;
+  }
+}
+
+std::unique_ptr<Literal> XlaLiteralFromPyObject(PyObject* o) {
+  if (PyTuple_Check(o)) {
+    int num_elements = PyTuple_Size(o);
+    std::vector<std::unique_ptr<Literal>> elements;
+    elements.reserve(num_elements);
+    for (int i = 0; i < num_elements; i++) {
+      PyObject* element = PyTuple_GetItem(o, i);
+      elements.push_back(XlaLiteralFromPyObject(element));
+    }
+    return Literal::MakeTupleOwned(std::move(elements));
+  } else if (PyArray_Check(o)) {
+    PyArrayObject* py_array = reinterpret_cast<PyArrayObject*>(o);
+    int rank = PyArray_NDIM(py_array);
+    std::vector<int64> dimensions(rank);
+    for (int i = 0; i < rank; i++) {
+      dimensions[i] = PyArray_DIM(py_array, i);
+    }
+    int np_type = PyArray_TYPE(py_array);
+    auto literal = Literal::CreateFromDimensions(
+        NumpyTypeToPrimitiveType(np_type), dimensions);
+    CopyNumpyArrayToLiteral(np_type, py_array, literal.get());
+    return literal;
+  } else {
+    LOG(FATAL)
+        << "Non-tuple or Numpy array encountered in conversion to XLA literal";
+  }
+}
+
+void CopyNumpyArrayToLiteral(int np_type, PyArrayObject* py_array,
+                             Literal* literal) {
+  switch (np_type) {
+    case NPY_BOOL:
+      CopyNumpyArrayToLiteral<bool>(py_array, literal);
+      break;
+    case NPY_INT32:
+      CopyNumpyArrayToLiteral<int32>(py_array, literal);
+      break;
+    case NPY_INT64:
+      CopyNumpyArrayToLiteral<int64>(py_array, literal);
+      break;
+    case NPY_UINT8:
+      CopyNumpyArrayToLiteral<uint8>(py_array, literal);
+      break;
+    case NPY_UINT32:
+      CopyNumpyArrayToLiteral<uint32>(py_array, literal);
+      break;
+    case NPY_UINT64:
+      CopyNumpyArrayToLiteral<uint64>(py_array, literal);
+      break;
+    case NPY_FLOAT16:
+      CopyNumpyArrayToLiteral<half>(py_array, literal);
+      break;
+    case NPY_FLOAT32:
+      CopyNumpyArrayToLiteral<float>(py_array, literal);
+      break;
+    case NPY_FLOAT64:
+      CopyNumpyArrayToLiteral<double>(py_array, literal);
+      break;
+    default:
+      LOG(FATAL) << "No XLA literal container for Numpy type" << np_type;
+  }
+}
+
+void CopyLiteralToNumpyArray(int np_type, const Literal& literal,
+                             PyArrayObject* py_array) {
+  switch (np_type) {
+    case NPY_BOOL:
+      CopyLiteralToNumpyArray<bool>(literal, py_array);
+      break;
+    case NPY_INT32:
+      CopyLiteralToNumpyArray<int32>(literal, py_array);
+      break;
+    case NPY_INT64:
+      CopyLiteralToNumpyArray<int64>(literal, py_array);
+      break;
+    case NPY_UINT8:
+      CopyLiteralToNumpyArray<uint8>(literal, py_array);
+      break;
+    case NPY_UINT32:
+      CopyLiteralToNumpyArray<uint32>(literal, py_array);
+      break;
+    case NPY_UINT64:
+      CopyLiteralToNumpyArray<uint64>(literal, py_array);
+      break;
+    case NPY_FLOAT16:
+      CopyLiteralToNumpyArray<half>(literal, py_array);
+      break;
+    case NPY_FLOAT32:
+      CopyLiteralToNumpyArray<float>(literal, py_array);
+      break;
+    case NPY_FLOAT64:
+      CopyLiteralToNumpyArray<double>(literal, py_array);
+      break;
+    default:
+      LOG(FATAL) << "No XLA literal container for Numpy type" << np_type;
+  }
+}
+
+PyObject* LongToPyIntOrPyLong(long x) {  // NOLINT
+#if PY_MAJOR_VERSION < 3
+  return PyInt_FromLong(x);
+#else
+  return PyLong_FromLong(x);
+#endif
+}
+
+long PyIntOrPyLongToLong(PyObject* o) {  // NOLINT
+#if PY_MAJOR_VERSION < 3
+  return PyInt_AsLong(o);
+#else
+  return PyLong_AsLong(o);
+#endif
+}
+
+bool CheckPyIntOrLong(PyObject* o) {
+#if PY_MAJOR_VERSION < 3
+  return PyInt_Check(o);
+#else
+  if (!PyLong_Check(o)) {
+    return false;
+  }
+  int overflow = 0;
+  PyLong_AsLongAndOverflow(o, &overflow);
+  return (overflow == 0);
+#endif
+}
+
+PyObject* PyNumberToPyInt(PyObject* o) {
+#if PY_MAJOR_VERSION < 3
+  return PyNumber_Int(o);
+#else
+  return PyNumber_Long(o);
+#endif
+}
+
+}  // namespace numpy
+
+}  // namespace swig
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/python/numpy_bridge.h b/tensorflow/compiler/xla/python/numpy_bridge.h
new file mode 100644
index 0000000000..4e6ecbb0e8
--- /dev/null
+++ b/tensorflow/compiler/xla/python/numpy_bridge.h
@@ -0,0 +1,123 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// These functions transform Python/Numpy data structures to XLA data
+// structures and vice versa, performing copies where
+// appropriate. Python tuples and Numpy ndarrays translate to XLA
+// tuples and XLA literals, respectively, and Numpy shape/dtype
+// information is translated to XLA shape information.
+
+#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_NUMPY_BRIDGE_H_
+#define TENSORFLOW_COMPILER_XLA_PYTHON_NUMPY_BRIDGE_H_
+
+#include <algorithm>
+#include <memory>
+
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/python/lib/core/numpy.h"
+
+namespace xla {
+
+namespace swig {
+
+namespace numpy {
+
+// Maps XLA primitive types (PRED, S8, F32, ..., and TUPLE) to numpy
+// dtypes (NPY_BOOL, NPY_INT8, NPY_FLOAT32, ..., and NPY_OBJECT), and
+// vice versa.
+int PrimitiveTypeToNumpyType(PrimitiveType primitive_type);
+PrimitiveType NumpyTypeToPrimitiveType(int np_type);
+
+// Determines whether an integer-encoded Numpy dtype is valid,
+// i.e. has a supported conversion to an XLA PrimitiveType.
+bool NumpyTypeIsValid(int np_type);
+
+// Converts XLA shape information into a Python pair of the form
+// (numpy dtype, dimensions). If the XLA shape represents a tuple,
+// then the numpy dtype is NPY_OBJECT ('O') and `dimensions` is a
+// Python tuple of shape-description pairs, created
+// recursively. Otherwise, `dimensions` is a Python tuple-of-integers
+// providing the array dimensions.
+//
+// The return value is a new reference.
+PyObject* PyShapeInfoFromXlaShape(const Shape& shape);
+
+// Returns the outcome of a best-effort check that the Python object
+// is a pair of the form (numpy dtype, dimensions), as produced by
+// PyShapeInfoFromXlaShape.
+bool CheckPyShapeInfo(PyObject* o);
+
+// Performs the inverse conversion to that of PyShapeInfoFromXlaShape.
+//
+// The return value is a new reference.
+Shape XlaShapeFromPyShapeInfo(PyObject* o);
+
+// Converts an XLA literal to a Python object, either a Numpy ndarray
+// or a nested Python tuple thereof.
+//
+// To avoid transferring ownership of the data buffers that underlie
+// PyArrays and XLA literals, this function makes deep copies of all
+// array data.
+//
+// The return value is a new reference.
+PyObject* PyObjectFromXlaLiteral(const Literal& literal);
+
+// Converts a Numpy ndarray or a nested Python tuple thereof to a
+// corresponding XLA literal.
+//
+// To avoid transferring ownership of the data buffers that underlie
+// PyArrays and XLA literals, this function makes deep copies of all
+// array data.
+std::unique_ptr<Literal> XlaLiteralFromPyObject(PyObject* o);
+
+// The following functions copy array data from the buffers underlying Numpy
+// ndarrays into those underlying XLA literals, and vice versa.
+
+void CopyNumpyArrayToLiteral(int np_type, PyArrayObject* py_array,
+                             Literal* literal);
+
+void CopyLiteralToNumpyArray(int np_type, const Literal& literal,
+                             PyArrayObject* py_array);
+
+template <typename NativeT>
+void CopyNumpyArrayToLiteral(PyArrayObject* py_array, Literal* literal) {
+  NativeT* source = static_cast<NativeT*>(PyArray_DATA(py_array));
+  auto dest = literal->GetMutableArraySlice<NativeT>();
+  std::copy(source, source + PyArray_SIZE(py_array), dest.data());
+}
+
+template <typename NativeT>
+void CopyLiteralToNumpyArray(const Literal& literal, PyArrayObject* py_array) {
+  NativeT* dest = static_cast<NativeT*>(PyArray_DATA(py_array));
+  auto source = literal.GetArraySlice<NativeT>();
+  std::copy(source.begin(), source.end(), dest);
+}
+
+// Workarounds for Python 2 and 3 interop
+
+PyObject* LongToPyIntOrPyLong(long x);  // NOLINT
+long PyIntOrPyLongToLong(PyObject* o);  // NOLINT
+bool CheckPyIntOrLong(PyObject* o);
+PyObject* PyNumberToPyInt(PyObject* o);
+
+}  // namespace numpy
+
+}  // namespace swig
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_NUMPY_BRIDGE_H_
diff --git a/tensorflow/compiler/xla/python/xla.i b/tensorflow/compiler/xla/python/xla.i
new file mode 100644
index 0000000000..1c4021a558
--- /dev/null
+++ b/tensorflow/compiler/xla/python/xla.i
@@ -0,0 +1,18 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+/* XLA-wide SWIG wrapper */
+
+%include "tensorflow/compiler/xla/python/local_computation_builder.i"
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
new file mode 100644
index 0000000000..c75d54856d
--- /dev/null
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -0,0 +1,605 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""An in-process, local XLA client in Python, supporting AOT compilation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+
+import numpy as np
+
+from tensorflow.compiler.xla import xla_data_pb2
+from tensorflow.compiler.xla.python import pywrap_xla as c_api
+
+_UNARY_OPS = [
+    'Not',
+    'Abs',
+    'Exp',
+    'Floor',
+    'Ceil',
+    'Log',
+    'Sign',
+    'Cos',
+    'Sin',
+    'Tanh',
+    'SqrtF32',
+    'SquareF32',
+    'IsFinite',
+    'ReciprocalF32',
+    'Neg',
+    'Sort',
+]
+
+_BINARY_OPS = [
+    'Eq',
+    'Ne',
+    'Ge',
+    'Gt',
+    'Lt',
+    'Le',
+    'Add',
+    'Sub',
+    'Mul',
+    'Div',
+    'Rem',
+    'Max',
+    'Min',
+    'And',
+    'Or',
+    'Pow',
+]
+
+# Most functions are snake_case for consistency with other modules,
+# whereas method names of ComputationBuilder and LocalComputation are
+# CamelCase for consistency with XLA.
+# pylint: disable=invalid-name
+
+XLA_ELEMENT_TYPE_TO_DTYPE = {
+    xla_data_pb2.F32: np.dtype(np.float32),
+    xla_data_pb2.F64: np.dtype(np.float64),
+    xla_data_pb2.S32: np.dtype(np.int32),
+    xla_data_pb2.S64: np.dtype(np.int64),
+    xla_data_pb2.PRED: np.dtype(np.bool),
+    xla_data_pb2.TUPLE: np.dtype(np.object),
+}
+
+DTYPE_TO_XLA_ELEMENT_TYPE = {
+    str(v): k
+    for k, v in XLA_ELEMENT_TYPE_TO_DTYPE.items()
+}
+
+
+class Shape(object):
+  """XLA shape.
+
+  Represents an XLA shape by a corresponding Python/Numpy type and a
+  list of dimensions, which are themselves Shapes in case this one
+  represents an XLA tuple.
+  """
+
+  def __init__(self, np_dtype, dimensions):
+    self.np_dtype = np_dtype
+    self._dimensions = dimensions
+
+  def element_type(self):
+    return DTYPE_TO_XLA_ELEMENT_TYPE[str(self.np_dtype)]
+
+  def is_tuple(self):
+    return self.element_type() == xla_data_pb2.TUPLE
+
+  def dimensions(self):
+    if self.is_tuple():
+      raise ValueError('Tuple shape has no dimensions')
+    return self._dimensions
+
+  def tuple_shapes(self):
+    if not self.is_tuple():
+      raise ValueError('Shape is not a tuple shape')
+    return self._dimensions
+
+  @staticmethod
+  def from_numpy(npval):
+
+    def convert(npval):
+      if isinstance(npval, tuple):
+        return Shape(np.dtype('O'), tuple(convert(elt) for elt in npval))
+      else:
+        return Shape(npval.dtype, np.shape(npval))
+
+    return convert(require_numpy_array_layout(npval))
+
+
+def _wrap_shape(shape_info):
+  dtype, dims = shape_info
+  element_type = DTYPE_TO_XLA_ELEMENT_TYPE[str(dtype)]
+  if element_type == xla_data_pb2.TUPLE:
+    dims = [_wrap_shape(subshape_info) for subshape_info in dims]
+  return Shape(dtype, dims)
+
+
+def _unwrap_shape(shape):
+  if shape.is_tuple():
+    components = tuple(
+        _unwrap_shape(subshape) for subshape in shape.tuple_shapes())
+  else:
+    components = shape.dimensions()
+  return (shape.np_dtype, components)
+
+
+def _unwrap_shapes(shapes):
+  return [_unwrap_shape(shape) for shape in shapes]
+
+
+def _wrap_data_handle(handle):
+  cdh = xla_data_pb2.ComputationDataHandle()
+  cdh.handle = handle
+  return cdh
+
+
+def _unwrap_data_handle(handle_proto):
+  return handle_proto.handle
+
+
+def _unwrap_data_handles(handle_protos):
+  return [_unwrap_data_handle(cdh) for cdh in handle_protos]
+
+
+def require_numpy_array_layout(value):
+  if isinstance(value, tuple):
+    return tuple(require_numpy_array_layout(x) for x in value)
+  else:
+    return np.require(value, requirements=['C', 'A'])
+
+
+class LocalComputation(object):
+  """Python wrapper for a local XLA Computation.
+
+  A LocalComputation can be executed if it is compiled. Otherwise, it
+  can still be used as a Computation where required by the
+  ComputationBuilder methods.
+  """
+
+  def __init__(self, c_local_computation, is_compiled):
+    self.c_local_computation = c_local_computation
+    self.is_compiled = is_compiled
+
+    # Ensure a reference to C-based destructor for use in __del__.
+    if is_compiled:
+      self._delete = c_api.DeleteCompiledLocalComputation
+    else:
+      self._delete = c_api.DeleteLocalComputation
+
+  def Compile(self, argument_shapes=()):
+    if self.is_compiled:
+      raise ValueError('Attempt to compile a compiled local XLA computation.')
+    return LocalComputation(
+        self.c_local_computation.Compile(_unwrap_shapes(argument_shapes)),
+        is_compiled=True)
+
+  def CompileWithExampleArguments(self, arguments=()):
+    return self.Compile(
+        argument_shapes=[Shape.from_numpy(arg) for arg in arguments])
+
+  def Execute(self, arguments=()):
+    if not self.is_compiled:
+      raise ValueError('Cannot execute an uncompiled local XLA computation.')
+    arguments = tuple(map(require_numpy_array_layout, arguments))
+    return self.c_local_computation.Execute(arguments)
+
+  def __del__(self):
+    self._delete(self.c_local_computation)
+
+
+class ComputationBuilder(object):
+  """XLA computation builder.
+
+  Enqueues XLA ops in sequence and in order to build a
+  LocalComputation, which in turn can be compiled into a
+  CompiledLocalComputation, which in turn can be locally executed.
+  """
+
+  # The methods of this class map 1-to-1 onto the XLA C++
+  # computation builder API. Therefore, there's no need to laboriously list
+  # arguments and return values for every method, especially where it's obvious.
+  #
+  # pylint: disable=g-doc-return-or-yield
+  # pylint: disable=g-doc-args
+
+  def __init__(self, name):
+    self._client = c_api.LocalComputationBuilder(name.encode('utf8'))
+    self._parameter_numbering = itertools.count()
+
+  def Build(self):
+    return LocalComputation(self._client.Build(), is_compiled=False)
+
+  def Constant(self, value):
+    """Enqueues a constant op onto the computation.
+
+    Args:
+      value: value for the constant, as a np.array with an explicit dtype set
+             to one of the supported types.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    value = require_numpy_array_layout(value)
+    return _wrap_data_handle(self._client.ConstantLiteral(value))
+
+  def ConstantF32Scalar(self, value):
+    """Convenience method to enqueue a scalar F32 constant op.
+
+    Args:
+      value: a floating-point number.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.Constant(np.array(value, dtype=np.float32))
+
+  def ConstantF64Scalar(self, value):
+    """Convenience method to enqueue a scalar F32 constant op.
+
+    Args:
+      value: a floating-point number.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.Constant(np.array(value, dtype=np.float64))
+
+  def ConstantS32Scalar(self, value):
+    """Convenience method to enqueue a scalar S32 constant op.
+
+    Args:
+      value: a floating-point number.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.Constant(np.array(value, dtype=np.int32))
+
+  def ConstantS64Scalar(self, value):
+    """Convenience method to enqueue a scalar S64 constant op.
+
+    Args:
+      value: a floating-point number.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.Constant(np.array(value, dtype=np.int64))
+
+  def ConstantPredScalar(self, value):
+    """Convenience method to enqueue a scalar PRED constant op.
+
+    Args:
+      value: a boolean value.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.Constant(np.array(value, dtype=np.bool))
+
+  def ParameterWithShape(self, shape, name=None, parameter_num=None):
+    """Enqueues a Parameter op onto the computation, given a shape.
+
+    Args:
+      shape: the parameter's shape as a Shape object.
+      name: optional string name for the parameter.
+      parameter_num: parameter number in the computation function. If None,
+        the next linear parameter number is used. The default value capability
+        can be used for auto-numbering. If you're using auto-numbering for some
+        parameters, use it for *all* parameters to avoid clashes.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    if name is None:
+      name = ''
+    if parameter_num is None:
+      parameter_num = next(self._parameter_numbering)
+
+    return _wrap_data_handle(
+        self._client.Parameter(
+            parameter_num, _unwrap_shape(shape), name.encode('utf8')))
+
+  def ParameterFromNumpy(self, value, name=None, parameter_num=None):
+    """Enqueues a Parameter op onto the computation.
+
+    Args:
+      value: a Numpy array, or a nested tuple thereof, from which the
+        shape is inferred.
+      name: as in ParameterWithShape.
+      parameter_num: as in ParameterWithShape.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.ParameterWithShape(
+        Shape.from_numpy(value), name=name, parameter_num=parameter_num)
+
+  def Broadcast(self, operand, sizes):
+    """Enqueues a broadcast operation onto the computation.
+
+    Args:
+      operand: the operand ComputationDataHandle to broadcast.
+      sizes: an iterable of broadcast sizes.
+
+    Returns:
+      A ComputationDataHandle representing the added broadcast op.
+    """
+    return _wrap_data_handle(
+        self._client.Broadcast(_unwrap_data_handle(operand), sizes))
+
+  def Concatenate(self, operands, dimension):
+    """Enqueues a concatenate operation onto the computation.
+
+    Args:
+      operands: the operands to concatenate.
+      dimension: the dimension in which to perform the concatenation.
+
+    Returns:
+      A ComputationDataHandle representing the added concatenate op.
+    """
+    return _wrap_data_handle(
+        self._client.ConcatInDim(_unwrap_data_handles(operands), dimension))
+
+  def ConvertElementType(self, operand, new_element_type):
+    """Enqueues an element type conversion operation onto the computation.
+
+    Args:
+      operand: the operand to convert.
+      new_element_type: the target primitive type.
+
+    Returns:
+      A ComputationDataHandle representing the added conversion op.
+    """
+    return _wrap_data_handle(
+        self._client.ConvertElementType(
+            _unwrap_data_handle(operand), new_element_type))
+
+  def GetShape(self, operand):
+    return _wrap_shape(self._client.GetShape(_unwrap_data_handle(operand)))
+
+  def GetComputationStats(self):
+    raise NotImplementedError()
+
+  def Reshape(self, operand, dimensions, new_sizes):
+    """Reshape op."""
+    return _wrap_data_handle(
+        self._client.Reshape(
+            _unwrap_data_handle(operand), dimensions, new_sizes))
+
+  def Trans(self, operand):
+    """Specialized matrix transpose op."""
+    return _wrap_data_handle(
+        self._client.Transpose(_unwrap_data_handle(operand), [1, 0]))
+
+  def Transpose(self, operand, permutation):
+    """Transpose op."""
+    return _wrap_data_handle(
+        self._client.Transpose(_unwrap_data_handle(operand), permutation))
+
+  def Select(self, pred, on_true, on_false):
+    """Element-wise selection op.
+
+    Constructs an output array from elements of two input arrays, based on the
+    values of a predicate array.
+    """
+    return _wrap_data_handle(
+        self._client.Select(
+            _unwrap_data_handle(pred),
+            _unwrap_data_handle(on_true),
+            _unwrap_data_handle(on_false)))
+
+  def Slice(self, operand, start_indices, limit_indices, strides=None):
+    """Enqueues a slice operation onto the computation.
+
+    Args:
+      operand: ComputationDataHandle for the N dimensional array to be sliced.
+      start_indices: iterable of N integers containing the starting indices of
+        the slice for each dimension.
+      limit_indices: iterable of N integers containing the ending indices
+        (exclusive) of the slice for each dimension.
+      strides: optional iterable of N integers containing the stride sizes for
+        each dimension.
+
+    Returns:
+      A ComputationDataHandle representing the added Slice op.
+    """
+    if strides is None:
+      start_indices = list(start_indices)
+      strides = [1] * len(start_indices)
+    return _wrap_data_handle(
+        self._client.Slice(
+            _unwrap_data_handle(operand),
+            start_indices,
+            limit_indices,
+            strides))
+
+  def DynamicSlice(self, operand, start_indices, slice_sizes):
+    """Enqueues a slice op with dynamic start indices onto the computation.
+
+    Args:
+      operand: ComputationDataHandle for the N dimensional array to be sliced.
+      start_indices: ComputationDataHandle for the 1D array of N integers
+        containing the starting indices of the slice.
+      slice_sizes: iterable of N integers containing the slice sizes in each
+        dimension.
+
+    Returns:
+      A ComputationDataHandle representing the added DynamicSlice op.
+    """
+    return _wrap_data_handle(
+        self._client.DynamicSlice(
+            _unwrap_data_handle(operand),
+            _unwrap_data_handle(start_indices),
+            slice_sizes))
+
+  def DynamicUpdateSlice(self, operand, update, start_indices):
+    """Enqueues a dynamic update slice operation onto the computation.
+
+    Args:
+      operand: ComputationDataHandle for the N dimensional array to be updated.
+      update: N dimensional array comprising the slice update.
+      start_indices: Rank-1 array of N integers comprising the starting indices
+        of the slice along each dimension.
+    Returns:
+      A ComputationDataHandle representing the added DynamicUpdateSlice op.
+    """
+    return _wrap_data_handle(
+        self._client.DynamicUpdateSlice(
+            _unwrap_data_handle(operand),
+            _unwrap_data_handle(update),
+            _unwrap_data_handle(start_indices)))
+
+  def Tuple(self, *ops):
+    """Enqueues a tuple operation onto the computation.
+
+    Args:
+      ops: a sequence of tuple operands (each a ComputationDataHandle).
+
+    Returns:
+      A ComputationDataHandle representing the added Tuple op.
+    """
+    return _wrap_data_handle(self._client.Tuple(_unwrap_data_handles(ops)))
+
+  def GetTupleElement(self, tup, index):
+    """Enqueues a 'get tuple element' operation onto the computation.
+
+    Args:
+      tup: the tuple operand (a ComputationDataHandle).
+      index: numeric index to select from the tuple.
+
+    Returns:
+      A ComputationDataHandle representing the added GetTupleElement op.
+    """
+    return _wrap_data_handle(
+        self._client.GetTupleElement(_unwrap_data_handle(tup), index))
+
+  def Call(self, computation_to_apply, operands):
+    """Enqueues a call operation onto the computation.
+
+    Args:
+      computation_to_apply: a Computation object.
+      operands: an iterable of ComputationDataHandle. The number and types of
+        operands must match the arity of computation_to_apply.
+
+    Returns:
+      A ComputationDataHandle representing the added call op.
+    """
+    return _wrap_data_handle(
+        self._client.Call(computation_to_apply.c_local_computation,
+                          _unwrap_data_handles(operands)))
+
+  def Map(self, operands, computation_to_apply, dimensions, static_operands=()):
+    """Enqueues a map operation onto the computation.
+
+    Args:
+      operands: an iterable of ComputationDataHandle.
+      computation_to_apply: a Computation object.
+      dimensions: dimensions over which to apply map the function.
+      static_operands: auxiliary arguments passed to the applied computation.
+
+    Returns:
+      A ComputationDataHandle representing the added Map op.
+    """
+    return _wrap_data_handle(
+        self._client.Map(
+            _unwrap_data_handles(operands),
+            computation_to_apply.c_local_computation,
+            dimensions,
+            _unwrap_data_handles(static_operands)))
+
+  def Reduce(self, operand, init_value, computation_to_apply, dimensions):
+    """Enqueues a reduction operation onto the computation.
+
+    Args:
+      operand: reduction operand (ComputationDataHandle).
+      init_value: reduction initial value (ComputationDataHandle).
+      computation_to_apply: a Computation object - binary reduction function.
+      dimensions: sequence of dimensions (integers) to reduce on.
+
+    Returns:
+      A ComputationDataHandle representing the added Reduce op.
+    """
+    return _wrap_data_handle(
+        self._client.Reduce(
+            _unwrap_data_handle(operand),
+            _unwrap_data_handle(init_value),
+            computation_to_apply.c_local_computation,
+            dimensions))
+
+  def While(self, cond, body, init):
+    """Enqueues a While operation onto the computation.
+
+    Args:
+      cond: a Computation for the loop condition, which has type T -> PRED
+      body: a Computation for the loop body, which has type T -> T
+      init: an ComputationDataHandle for the initial parameter, which has type T
+
+    Returns: a ComputationDataHandle representing the While operation.
+    """
+    return _wrap_data_handle(
+        self._client.While(cond.c_local_computation,
+                           body.c_local_computation,
+                           _unwrap_data_handle(init)))
+
+  def Dot(self, lhs, rhs):
+    """Matrix multiplication between lhs and rhs."""
+    return _wrap_data_handle(
+        self._client.Dot(_unwrap_data_handle(lhs), _unwrap_data_handle(rhs)))
+
+
+def _forward_methods_to_local_builder():
+  """Forward remaining ComputationBuilder methods to the C API.
+
+  Set up methods, corresponding to unary and binary XLA operations,
+  whose calls are forwarded in a boilerplate manner to the underlying
+  LocalComputationBuilder C-extension API.
+  """
+
+  def forward_to_local_builder_with_handles(target_method, is_binop=False):
+    """Generate a forwarding method that wraps/unwraps data handles."""
+
+    def forward(self, *args, **kwargs):
+      unwrapped_args = [_unwrap_data_handle(arg) for arg in args]
+
+      if is_binop and len(unwrapped_args) < 3:
+        unwrapped_args.append(kwargs.get('broadcast_dimensions', ()))
+
+      return _wrap_data_handle(
+          target_method(
+              self._client,  # pylint: disable=protected-access
+              *unwrapped_args))
+
+    return forward
+
+  for method_name in _UNARY_OPS:
+    forward = forward_to_local_builder_with_handles(
+        getattr(c_api.LocalComputationBuilder, method_name))
+    forward.__name__ = method_name
+    setattr(ComputationBuilder, method_name, forward)
+
+  for method_name in _BINARY_OPS:
+    forward = forward_to_local_builder_with_handles(
+        getattr(c_api.LocalComputationBuilder, method_name), is_binop=True)
+    forward.__name__ = method_name
+    setattr(ComputationBuilder, method_name, forward)
+
+
+_forward_methods_to_local_builder()
diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
new file mode 100644
index 0000000000..878cd83edc
--- /dev/null
+++ b/tensorflow/compiler/xla/python/xla_client_test.py
@@ -0,0 +1,898 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the Python extension-based XLA client."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+
+import numpy as np
+
+from tensorflow.compiler.xla.python import xla_client
+import unittest
+
+
+class LocalComputationTest(unittest.TestCase):
+  """Base class for running an XLA Computation through the local client."""
+
+  def _NewComputation(self, name=None):
+    if name is None:
+      name = self.id()
+    return xla_client.ComputationBuilder(name)
+
+  def _ExecuteAndAssertWith(self, assert_func, c, arguments, expected):
+    assert expected is not None
+    compiled_c = c.Build().CompileWithExampleArguments(arguments)
+    result = compiled_c.Execute(arguments)
+    # Numpy's comparison methods are a bit too lenient by treating inputs as
+    # "array-like", meaning that scalar 4 will be happily compared equal to
+    # [[4]]. We'd like to be more strict so assert shapes as well.
+    self.assertEqual(np.asanyarray(result).shape, np.asanyarray(expected).shape)
+    assert_func(result, expected)
+
+  def _ExecuteAndCompareExact(self, c, arguments=(), expected=None):
+    self._ExecuteAndAssertWith(np.testing.assert_equal, c, arguments, expected)
+
+  def _ExecuteAndCompareClose(self, c, arguments=(), expected=None):
+    self._ExecuteAndAssertWith(np.testing.assert_allclose, c, arguments,
+                               expected)
+
+
+def NumpyArrayF32(*args, **kwargs):
+  """Convenience wrapper to create Numpy arrays with a np.float32 dtype."""
+  return np.array(*args, dtype=np.float32, **kwargs)
+
+
+def NumpyArrayF64(*args, **kwargs):
+  """Convenience wrapper to create Numpy arrays with a np.float64 dtype."""
+  return np.array(*args, dtype=np.float64, **kwargs)
+
+
+def NumpyArrayS32(*args, **kwargs):
+  """Convenience wrapper to create Numpy arrays with a np.int32 dtype."""
+  return np.array(*args, dtype=np.int32, **kwargs)
+
+
+def NumpyArrayS64(*args, **kwargs):
+  """Convenience wrapper to create Numpy arrays with a np.int64 dtype."""
+  return np.array(*args, dtype=np.int64, **kwargs)
+
+
+def NumpyArrayBool(*args, **kwargs):
+  """Convenience wrapper to create Numpy arrays with a np.bool dtype."""
+  return np.array(*args, dtype=np.bool, **kwargs)
+
+
+class ComputationsWithConstantsTest(LocalComputationTest):
+  """Tests focusing on Constant ops."""
+
+  def testConstantScalarSumF32(self):
+    c = self._NewComputation()
+    c.Add(c.ConstantF32Scalar(1.11), c.ConstantF32Scalar(3.14))
+    self._ExecuteAndCompareClose(c, expected=4.25)
+
+  def testConstantScalarSumF64(self):
+    c = self._NewComputation()
+    c.Add(c.ConstantF64Scalar(1.11), c.ConstantF64Scalar(3.14))
+    self._ExecuteAndCompareClose(c, expected=4.25)
+
+  def testConstantScalarSumS32(self):
+    c = self._NewComputation()
+    c.Add(c.ConstantS32Scalar(1), c.ConstantS32Scalar(2))
+    self._ExecuteAndCompareClose(c, expected=3)
+
+  def testConstantScalarSumS64(self):
+    c = self._NewComputation()
+    c.Add(c.ConstantS64Scalar(1), c.ConstantS64Scalar(2))
+    self._ExecuteAndCompareClose(c, expected=3)
+
+  def testConstantVectorMulF32(self):
+    c = self._NewComputation()
+    c.Mul(
+        c.Constant(NumpyArrayF32([2.5, 3.3, -1.2, 0.7])),
+        c.Constant(NumpyArrayF32([-1.2, 2, -2, -3])))
+    self._ExecuteAndCompareClose(c, expected=[-3, 6.6, 2.4, -2.1])
+
+  def testConstantVectorMulF64(self):
+    c = self._NewComputation()
+    c.Mul(
+        c.Constant(NumpyArrayF64([2.5, 3.3, -1.2, 0.7])),
+        c.Constant(NumpyArrayF64([-1.2, 2, -2, -3])))
+    self._ExecuteAndCompareClose(c, expected=[-3, 6.6, 2.4, -2.1])
+
+  def testConstantVectorScalarDivF32(self):
+    c = self._NewComputation()
+    c.Div(
+        c.Constant(NumpyArrayF32([1.5, 2.5, 3.0, -10.8])),
+        c.ConstantF32Scalar(2.0))
+    self._ExecuteAndCompareClose(c, expected=[0.75, 1.25, 1.5, -5.4])
+
+  def testConstantVectorScalarDivF64(self):
+    c = self._NewComputation()
+    c.Div(
+        c.Constant(NumpyArrayF64([1.5, 2.5, 3.0, -10.8])),
+        c.ConstantF64Scalar(2.0))
+    self._ExecuteAndCompareClose(c, expected=[0.75, 1.25, 1.5, -5.4])
+
+  def testConstantVectorScalarPowF32(self):
+    c = self._NewComputation()
+    c.Pow(c.Constant(NumpyArrayF32([1.5, 2.5, 3.0])), c.ConstantF32Scalar(2.))
+    self._ExecuteAndCompareClose(c, expected=[2.25, 6.25, 9.])
+
+  def testConstantVectorScalarPowF64(self):
+    c = self._NewComputation()
+    c.Pow(c.Constant(NumpyArrayF64([1.5, 2.5, 3.0])), c.ConstantF64Scalar(2.))
+    self._ExecuteAndCompareClose(c, expected=[2.25, 6.25, 9.])
+
+  def testBooleanAnd(self):
+    c = self._NewComputation()
+    c.And(
+        c.Constant(NumpyArrayBool([True, False, True, False])),
+        c.Constant(NumpyArrayBool([True, True, False, False])))
+    self._ExecuteAndCompareExact(c, expected=[True, False, False, False])
+
+  def testBooleanOr(self):
+    c = self._NewComputation()
+    c.Or(
+        c.Constant(NumpyArrayBool([True, False, True, False])),
+        c.Constant(NumpyArrayBool([True, True, False, False])))
+    self._ExecuteAndCompareExact(c, expected=[True, True, True, False])
+
+  def testSum2DF32(self):
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF32([[1, 2, 3], [4, 5, 6]])),
+        c.Constant(NumpyArrayF32([[1, -1, 1], [-1, 1, -1]])))
+    self._ExecuteAndCompareClose(c, expected=[[2, 1, 4], [3, 6, 5]])
+
+  def testSum2DF64(self):
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF64([[1, 2, 3], [4, 5, 6]])),
+        c.Constant(NumpyArrayF64([[1, -1, 1], [-1, 1, -1]])))
+    self._ExecuteAndCompareClose(c, expected=[[2, 1, 4], [3, 6, 5]])
+
+  def testSum2DWith1DBroadcastDim0F32(self):
+    # sum of a 2D array with a 1D array where the latter is replicated across
+    # dimension 0 to match the former's shape.
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF32([10, 20, 30])),
+        broadcast_dimensions=(0,))
+    self._ExecuteAndCompareClose(
+        c, expected=[[11, 12, 13], [24, 25, 26], [37, 38, 39]])
+
+  def testSum2DWith1DBroadcastDim0F64(self):
+    # sum of a 2D array with a 1D array where the latter is replicated across
+    # dimension 0 to match the former's shape.
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF64([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF64([10, 20, 30])),
+        broadcast_dimensions=(0,))
+    self._ExecuteAndCompareClose(
+        c, expected=[[11, 12, 13], [24, 25, 26], [37, 38, 39]])
+
+  def testSum2DWith1DBroadcastDim1F32(self):
+    # sum of a 2D array with a 1D array where the latter is replicated across
+    # dimension 1 to match the former's shape.
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF32([10, 20, 30])),
+        broadcast_dimensions=(1,))
+    self._ExecuteAndCompareClose(
+        c, expected=[[11, 22, 33], [14, 25, 36], [17, 28, 39]])
+
+  def testSum2DWith1DBroadcastDim1F64(self):
+    # sum of a 2D array with a 1D array where the latter is replicated across
+    # dimension 1 to match the former's shape.
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF64([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF64([10, 20, 30])),
+        broadcast_dimensions=(1,))
+    self._ExecuteAndCompareClose(
+        c, expected=[[11, 22, 33], [14, 25, 36], [17, 28, 39]])
+
+  def testConstantAxpyF32(self):
+    c = self._NewComputation()
+    c.Add(
+        c.Mul(
+            c.ConstantF32Scalar(2),
+            c.Constant(NumpyArrayF32([2.2, 3.3, 4.4, 5.5]))),
+        c.Constant(NumpyArrayF32([100, -100, 200, -200])))
+    self._ExecuteAndCompareClose(c, expected=[104.4, -93.4, 208.8, -189])
+
+  def testConstantAxpyF64(self):
+    c = self._NewComputation()
+    c.Add(
+        c.Mul(
+            c.ConstantF64Scalar(2),
+            c.Constant(NumpyArrayF64([2.2, 3.3, 4.4, 5.5]))),
+        c.Constant(NumpyArrayF64([100, -100, 200, -200])))
+    self._ExecuteAndCompareClose(c, expected=[104.4, -93.4, 208.8, -189])
+
+
+class ParametersTest(LocalComputationTest):
+  """Tests focusing on Parameter ops and argument-passing."""
+
+  def setUp(self):
+    self.f32_scalar_2 = NumpyArrayF32(2.0)
+    self.f32_4vector = NumpyArrayF32([-2.3, 3.3, -4.3, 5.3])
+    self.f64_scalar_2 = NumpyArrayF64(2.0)
+    self.f64_4vector = NumpyArrayF64([-2.3, 3.3, -4.3, 5.3])
+    self.s32_scalar_3 = NumpyArrayS32(3)
+    self.s32_4vector = NumpyArrayS32([10, 15, -2, 7])
+    self.s64_scalar_3 = NumpyArrayS64(3)
+    self.s64_4vector = NumpyArrayS64([10, 15, -2, 7])
+
+  def testScalarTimesVectorAutonumberF32(self):
+    c = self._NewComputation()
+    p0 = c.ParameterFromNumpy(self.f32_scalar_2)
+    p1 = c.ParameterFromNumpy(self.f32_4vector)
+    c.Mul(p0, p1)
+    self._ExecuteAndCompareClose(
+        c,
+        arguments=[self.f32_scalar_2, self.f32_4vector],
+        expected=[-4.6, 6.6, -8.6, 10.6])
+
+  def testScalarTimesVectorAutonumberF64(self):
+    c = self._NewComputation()
+    p0 = c.ParameterFromNumpy(self.f64_scalar_2)
+    p1 = c.ParameterFromNumpy(self.f64_4vector)
+    c.Mul(p0, p1)
+    self._ExecuteAndCompareClose(
+        c,
+        arguments=[self.f64_scalar_2, self.f64_4vector],
+        expected=[-4.6, 6.6, -8.6, 10.6])
+
+  def testScalarTimesVectorS32(self):
+    c = self._NewComputation()
+    p0 = c.ParameterFromNumpy(self.s32_scalar_3)
+    p1 = c.ParameterFromNumpy(self.s32_4vector)
+    c.Mul(p0, p1)
+    self._ExecuteAndCompareExact(
+        c,
+        arguments=[self.s32_scalar_3, self.s32_4vector],
+        expected=[30, 45, -6, 21])
+
+  def testScalarTimesVectorS64(self):
+    c = self._NewComputation()
+    p0 = c.ParameterFromNumpy(self.s64_scalar_3)
+    p1 = c.ParameterFromNumpy(self.s64_4vector)
+    c.Mul(p0, p1)
+    self._ExecuteAndCompareExact(
+        c,
+        arguments=[self.s64_scalar_3, self.s64_4vector],
+        expected=[30, 45, -6, 21])
+
+  def testScalarMinusVectorExplicitNumberingF32(self):
+    # Use explicit numbering and pass parameter_num first. Sub is used since
+    # it's not commutative and can help catch parameter reversal within the
+    # computation.
+    c = self._NewComputation()
+    p1 = c.ParameterFromNumpy(self.f32_4vector, parameter_num=1)
+    p0 = c.ParameterFromNumpy(self.f32_scalar_2, parameter_num=0)
+    c.Sub(p1, p0)
+    self._ExecuteAndCompareClose(
+        c,
+        arguments=[self.f32_scalar_2, self.f32_4vector],
+        expected=[-4.3, 1.3, -6.3, 3.3])
+
+  def testScalarMinusVectorExplicitNumberingF64(self):
+    # Use explicit numbering and pass parameter_num first. Sub is used since
+    # it's not commutative and can help catch parameter reversal within the
+    # computation.
+    c = self._NewComputation()
+    p1 = c.ParameterFromNumpy(self.f64_4vector, parameter_num=1)
+    p0 = c.ParameterFromNumpy(self.f64_scalar_2, parameter_num=0)
+    c.Sub(p1, p0)
+    self._ExecuteAndCompareClose(
+        c,
+        arguments=[self.f64_scalar_2, self.f64_4vector],
+        expected=[-4.3, 1.3, -6.3, 3.3])
+
+
+class SingleOpTest(LocalComputationTest):
+  """Tests for single ops.
+
+  The goal here is smoke testing - to exercise the most basic functionality of
+  single XLA ops. As minimal as possible number of additional ops are added
+  around the op being tested.
+  """
+
+  def testConcatenateF32(self):
+    c = self._NewComputation()
+    c.Concatenate(
+        (c.Constant(NumpyArrayF32([1.0, 2.0, 3.0])),
+         c.Constant(NumpyArrayF32([4.0, 5.0, 6.0]))),
+        dimension=0)
+    self._ExecuteAndCompareClose(c, expected=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
+
+  def testConcatenateF64(self):
+    c = self._NewComputation()
+    c.Concatenate(
+        (c.Constant(NumpyArrayF64([1.0, 2.0, 3.0])),
+         c.Constant(NumpyArrayF64([4.0, 5.0, 6.0]))),
+        dimension=0)
+    self._ExecuteAndCompareClose(c, expected=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
+
+  def testConvertElementType(self):
+    xla_types = {
+        np.bool: xla_client.xla_data_pb2.PRED,
+        np.int32: xla_client.xla_data_pb2.S32,
+        np.int64: xla_client.xla_data_pb2.S64,
+        np.float32: xla_client.xla_data_pb2.F32,
+        np.float64: xla_client.xla_data_pb2.F64,
+    }
+
+    def _ConvertAndTest(template, src_dtype, dst_dtype):
+      c = self._NewComputation()
+      x = c.Constant(np.array(template, dtype=src_dtype))
+      c.ConvertElementType(x, xla_types[dst_dtype])
+
+      result = c.Build().Compile().Execute()
+      expected = np.array(template, dtype=dst_dtype)
+
+      self.assertEqual(result.shape, expected.shape)
+      self.assertEqual(result.dtype, expected.dtype)
+      np.testing.assert_equal(result, expected)
+
+    x = [0, 1, 0, 0, 1]
+    for src_dtype, dst_dtype in itertools.product(xla_types, xla_types):
+      _ConvertAndTest(x, src_dtype, dst_dtype)
+
+  def testDotMatrixVectorF32(self):
+    c = self._NewComputation()
+    lhs = NumpyArrayF32([[2.0, 3.0], [4.0, 5.0]])
+    rhs = NumpyArrayF32([[10.0], [20.0]])
+    c.Dot(c.Constant(lhs), c.Constant(rhs))
+    self._ExecuteAndCompareClose(c, expected=np.dot(lhs, rhs))
+
+  def testDotMatrixVectorF64(self):
+    c = self._NewComputation()
+    lhs = NumpyArrayF64([[2.0, 3.0], [4.0, 5.0]])
+    rhs = NumpyArrayF64([[10.0], [20.0]])
+    c.Dot(c.Constant(lhs), c.Constant(rhs))
+    self._ExecuteAndCompareClose(c, expected=np.dot(lhs, rhs))
+
+  def testDotMatrixMatrixF32(self):
+    c = self._NewComputation()
+    lhs = NumpyArrayF32([[2.0, 3.0], [4.0, 5.0]])
+    rhs = NumpyArrayF32([[10.0, 20.0], [100.0, 200.0]])
+    c.Dot(c.Constant(lhs), c.Constant(rhs))
+    self._ExecuteAndCompareClose(c, expected=np.dot(lhs, rhs))
+
+  def testDotMatrixMatrixF64(self):
+    c = self._NewComputation()
+    lhs = NumpyArrayF64([[2.0, 3.0], [4.0, 5.0]])
+    rhs = NumpyArrayF64([[10.0, 20.0], [100.0, 200.0]])
+    c.Dot(c.Constant(lhs), c.Constant(rhs))
+    self._ExecuteAndCompareClose(c, expected=np.dot(lhs, rhs))
+
+  def testBooleanNot(self):
+    c = self._NewComputation()
+    arr = NumpyArrayBool([True, False, True])
+    c.Not(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=~arr)
+
+  def testExp(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Exp(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.exp(arr))
+
+  def testLog(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Log(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.log(arr))
+
+  def testNeg(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Neg(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=-arr)
+
+  def testFloor(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Floor(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.floor(arr))
+
+  def testCeil(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Ceil(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.ceil(arr))
+
+  def testAbs(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, -12.1, 2.4, -1.])
+    c.Abs(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.abs(arr))
+
+  def testTanh(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Tanh(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.tanh(arr))
+
+  def testTrans(self):
+
+    def _TransposeAndTest(array):
+      c = self._NewComputation()
+      c.Trans(c.Constant(array))
+      self._ExecuteAndCompareClose(c, expected=array.T)
+
+    # Test square and non-square matrices in both default (C) and F orders.
+    for array_fun in [NumpyArrayF32, NumpyArrayF64]:
+      _TransposeAndTest(array_fun([[1, 2, 3], [4, 5, 6]]))
+      _TransposeAndTest(array_fun([[1, 2, 3], [4, 5, 6]], order="F"))
+      _TransposeAndTest(array_fun([[1, 2], [4, 5]]))
+      _TransposeAndTest(array_fun([[1, 2], [4, 5]], order="F"))
+
+  def testTranspose(self):
+
+    def _TransposeAndTest(array, permutation):
+      c = self._NewComputation()
+      c.Transpose(c.Constant(array), permutation)
+      expected = np.transpose(array, permutation)
+      self._ExecuteAndCompareClose(c, expected=expected)
+
+    _TransposeAndTest(NumpyArrayF32([[1, 2, 3], [4, 5, 6]]), [0, 1])
+    _TransposeAndTest(NumpyArrayF32([[1, 2, 3], [4, 5, 6]]), [1, 0])
+    _TransposeAndTest(NumpyArrayF32([[1, 2], [4, 5]]), [0, 1])
+    _TransposeAndTest(NumpyArrayF32([[1, 2], [4, 5]]), [1, 0])
+
+    arr = np.random.RandomState(0).randn(2, 3, 4).astype(np.float32)
+    for permutation in itertools.permutations(range(arr.ndim)):
+      _TransposeAndTest(arr, permutation)
+      _TransposeAndTest(np.asfortranarray(arr), permutation)
+
+  def testEq(self):
+    c = self._NewComputation()
+    c.Eq(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4])),
+        c.Constant(NumpyArrayS32([4, 2, 3, 1])))
+    self._ExecuteAndCompareExact(c, expected=[False, True, True, False])
+
+  def testNe(self):
+    c = self._NewComputation()
+    c.Ne(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4])),
+        c.Constant(NumpyArrayS32([4, 2, 3, 1])))
+    self._ExecuteAndCompareExact(c, expected=[True, False, False, True])
+
+    c.Ne(
+        c.Constant(NumpyArrayF32([-2.0, 0.0,
+                                  float("nan"),
+                                  float("nan")])),
+        c.Constant(NumpyArrayF32([2.0, -0.0, 1.0, float("nan")])))
+    self._ExecuteAndAssertWith(
+        np.testing.assert_allclose, c, (), expected=[True, False, True, True])
+
+  def testGt(self):
+    c = self._NewComputation()
+    c.Gt(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4, 9])),
+        c.Constant(NumpyArrayS32([1, 0, 2, 7, 12])))
+    self._ExecuteAndCompareExact(c, expected=[False, True, True, False, False])
+
+  def testGe(self):
+    c = self._NewComputation()
+    c.Ge(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4, 9])),
+        c.Constant(NumpyArrayS32([1, 0, 2, 7, 12])))
+    self._ExecuteAndCompareExact(c, expected=[True, True, True, False, False])
+
+  def testLt(self):
+    c = self._NewComputation()
+    c.Lt(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4, 9])),
+        c.Constant(NumpyArrayS32([1, 0, 2, 7, 12])))
+    self._ExecuteAndCompareExact(c, expected=[False, False, False, True, True])
+
+  def testLe(self):
+    c = self._NewComputation()
+    c.Le(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4, 9])),
+        c.Constant(NumpyArrayS32([1, 0, 2, 7, 12])))
+    self._ExecuteAndCompareExact(c, expected=[True, False, False, True, True])
+
+  def testMax(self):
+    c = self._NewComputation()
+    c.Max(
+        c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0, 9.0])),
+        c.Constant(NumpyArrayF32([1.0, 0.0, 2.0, 7.0, 12.0])))
+    self._ExecuteAndCompareExact(c, expected=[1.0, 2.0, 3.0, 7.0, 12.0])
+
+  def testMaxExplicitBroadcastDim0(self):
+    c = self._NewComputation()
+    c.Max(
+        c.Constant(NumpyArrayF32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF32([3, 4, 5])),
+        broadcast_dimensions=(0,))
+    self._ExecuteAndCompareExact(c, expected=[[3, 3, 3], [4, 5, 6], [7, 8, 9]])
+
+  def testMaxExplicitBroadcastDim1(self):
+    c = self._NewComputation()
+    c.Max(
+        c.Constant(NumpyArrayF32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF32([3, 4, 5])),
+        broadcast_dimensions=(1,))
+    self._ExecuteAndCompareExact(c, expected=[[3, 4, 5], [4, 5, 6], [7, 8, 9]])
+
+  def testMin(self):
+    c = self._NewComputation()
+    c.Min(
+        c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0, 9.0])),
+        c.Constant(NumpyArrayF32([1.0, 0.0, 2.0, 7.0, 12.0])))
+    self._ExecuteAndCompareExact(c, expected=[1.0, 0.0, 2.0, 4.0, 9.0])
+
+  def testReshape(self):
+    c = self._NewComputation()
+    c.Reshape(
+        c.Constant(NumpyArrayS32([[1, 2], [3, 4], [5, 6]])),
+        dimensions=[0, 1],
+        new_sizes=[2, 3])
+    self._ExecuteAndCompareExact(c, expected=[[1, 2, 3], [4, 5, 6]])
+
+  def testSelect(self):
+    c = self._NewComputation()
+    c.Select(
+        c.Constant(NumpyArrayBool([True, False, False, True, False])),
+        c.Constant(NumpyArrayS32([1, 2, 3, 4, 5])),
+        c.Constant(NumpyArrayS32([-1, -2, -3, -4, -5])))
+    self._ExecuteAndCompareExact(c, expected=[1, -2, -3, 4, -5])
+
+  def testSlice(self):
+    c = self._NewComputation()
+    c.Slice(
+        c.Constant(NumpyArrayS32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])), [1, 0],
+        [3, 2])
+    self._ExecuteAndCompareExact(c, expected=[[4, 5], [7, 8]])
+
+  def testDynamicSlice(self):
+    c = self._NewComputation()
+    c.DynamicSlice(
+        c.Constant(NumpyArrayS32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayS32([1, 0])), [2, 2])
+    self._ExecuteAndCompareExact(c, expected=[[4, 5], [7, 8]])
+
+  def testDynamicUpdateSlice(self):
+    c = self._NewComputation()
+    c.DynamicUpdateSlice(
+        c.Constant(NumpyArrayS32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayS32([[1, 2], [3, 4]])),
+        c.Constant(NumpyArrayS32([1, 1])))
+    self._ExecuteAndCompareExact(c, expected=[[1, 2, 3], [4, 1, 2], [7, 3, 4]])
+
+  def testTuple(self):
+    c = self._NewComputation()
+    c.Tuple(
+        c.ConstantS32Scalar(42), c.Constant(NumpyArrayF32([1.0, 2.0])),
+        c.Constant(NumpyArrayBool([True, False, False, True])))
+    result = c.Build().Compile().Execute()
+    self.assertIsInstance(result, tuple)
+    np.testing.assert_equal(result[0], 42)
+    np.testing.assert_allclose(result[1], [1.0, 2.0])
+    np.testing.assert_equal(result[2], [True, False, False, True])
+
+  def testGetTupleElement(self):
+    c = self._NewComputation()
+    c.GetTupleElement(
+        c.Tuple(
+            c.ConstantS32Scalar(42), c.Constant(NumpyArrayF32([1.0, 2.0])),
+            c.Constant(NumpyArrayBool([True, False, False, True]))), 1)
+    self._ExecuteAndCompareClose(c, expected=[1.0, 2.0])
+
+  def testBroadcast(self):
+    c = self._NewComputation()
+    c.Broadcast(c.Constant(NumpyArrayS32([10, 20, 30, 40])), sizes=(3,))
+    self._ExecuteAndCompareExact(
+        c, expected=[[10, 20, 30, 40], [10, 20, 30, 40], [10, 20, 30, 40]])
+
+
+class EmbeddedComputationsTest(LocalComputationTest):
+  """Tests for XLA graphs with embedded computations (such as maps)."""
+
+  def _CreateConstantS32Computation(self):
+    """Computation (f32) -> s32 that returns a constant 1 for any input."""
+    c = self._NewComputation("constant_s32_one")
+    # TODO(eliben): consider adding a nicer way to create new parameters without
+    # having to create dummy Numpy arrays or populating Shape messages. Perhaps
+    # we need our own (Python-client-own) way to represent Shapes conveniently.
+    c.ParameterFromNumpy(NumpyArrayF32(0))
+    c.ConstantS32Scalar(1)
+    return c.Build()
+
+  def _CreateConstantS64Computation(self):
+    """Computation (f64) -> s64 that returns a constant 1 for any input."""
+    c = self._NewComputation("constant_s64_one")
+    # TODO(eliben): consider adding a nicer way to create new parameters without
+    # having to create dummy Numpy arrays or populating Shape messages. Perhaps
+    # we need our own (Python-client-own) way to represent Shapes conveniently.
+    c.ParameterFromNumpy(NumpyArrayF64(0))
+    c.ConstantS64Scalar(1)
+    return c.Build()
+
+  def _CreateConstantF32Computation(self):
+    """Computation (f32) -> f32 that returns a constant 1.0 for any input."""
+    c = self._NewComputation("constant_f32_one")
+    c.ParameterFromNumpy(NumpyArrayF32(0))
+    c.ConstantF32Scalar(1.0)
+    return c.Build()
+
+  def _CreateConstantF64Computation(self):
+    """Computation (f64) -> f64 that returns a constant 1.0 for any input."""
+    c = self._NewComputation("constant_f64_one")
+    c.ParameterFromNumpy(NumpyArrayF64(0))
+    c.ConstantF64Scalar(1.0)
+    return c.Build()
+
+  def _CreateMulF32By2Computation(self):
+    """Computation (f32) -> f32 that multiplies its parameter by 2."""
+    c = self._NewComputation("mul_f32_by2")
+    c.Mul(c.ParameterFromNumpy(NumpyArrayF32(0)), c.ConstantF32Scalar(2.0))
+    return c.Build()
+
+  def _CreateMulF64By2Computation(self):
+    """Computation (f64) -> f64 that multiplies its parameter by 2."""
+    c = self._NewComputation("mul_f64_by2")
+    c.Mul(c.ParameterFromNumpy(NumpyArrayF64(0)), c.ConstantF64Scalar(2.0))
+    return c.Build()
+
+  def _CreateBinaryAddF32Computation(self):
+    """Computation (f32, f32) -> f32 that adds its two parameters."""
+    c = self._NewComputation("add_param0_by_param1")
+    c.Add(
+        c.ParameterFromNumpy(NumpyArrayF32(0)),
+        c.ParameterFromNumpy(NumpyArrayF32(0)))
+    return c.Build()
+
+  def _CreateBinaryAddF64Computation(self):
+    """Computation (f64, f64) -> f64 that adds its two parameters."""
+    c = self._NewComputation("add_param0_by_param1")
+    c.Add(
+        c.ParameterFromNumpy(NumpyArrayF64(0)),
+        c.ParameterFromNumpy(NumpyArrayF64(0)))
+    return c.Build()
+
+  def _CreateBinaryDivF32Computation(self):
+    """Computation (f32, f32) -> f32 that divides its two parameters."""
+    c = self._NewComputation("div_param0_by_param1")
+    c.Div(
+        c.ParameterFromNumpy(NumpyArrayF32(0)),
+        c.ParameterFromNumpy(NumpyArrayF32(0)))
+    return c.Build()
+
+  def _CreateBinaryDivF64Computation(self):
+    """Computation (f64, f64) -> f64 that divides its two parameters."""
+    c = self._NewComputation("div_param0_by_param1")
+    c.Div(
+        c.ParameterFromNumpy(NumpyArrayF64(0)),
+        c.ParameterFromNumpy(NumpyArrayF64(0)))
+    return c.Build()
+
+  def _CreateTestF32Lt10Computation(self):
+    """Computation (f32) -> bool that tests if its parameter is less than 10."""
+    c = self._NewComputation("test_f32_lt_10")
+    c.Lt(c.ParameterFromNumpy(NumpyArrayF32(0)), c.ConstantF32Scalar(10.))
+    return c.Build()
+
+  def _CreateTestF64Lt10Computation(self):
+    """Computation (f64) -> bool that tests if its parameter is less than 10."""
+    c = self._NewComputation("test_f64_lt_10")
+    c.Lt(c.ParameterFromNumpy(NumpyArrayF64(0)), c.ConstantF64Scalar(10.))
+    return c.Build()
+
+  def _MakeSample3DArrayF32(self):
+    return NumpyArrayF32([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]],
+                          [[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])
+
+  def _MakeSample3DArrayF64(self):
+    return NumpyArrayF64([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]],
+                          [[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])
+
+  def testCallF32(self):
+    c = self._NewComputation()
+    c.Call(
+        self._CreateMulF32By2Computation(),
+        operands=(c.ConstantF32Scalar(5.0),))
+    self._ExecuteAndCompareClose(c, expected=10.0)
+
+  def testCallF64(self):
+    c = self._NewComputation()
+    c.Call(
+        self._CreateMulF64By2Computation(),
+        operands=(c.ConstantF64Scalar(5.0),))
+    self._ExecuteAndCompareClose(c, expected=10.0)
+
+  def testMapEachElementToS32Constant(self):
+    c = self._NewComputation()
+    c.Map([c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0]))],
+          self._CreateConstantS32Computation(), [0])
+    self._ExecuteAndCompareExact(c, expected=[1, 1, 1, 1])
+
+  def testMapEachElementToS64Constant(self):
+    c = self._NewComputation()
+    c.Map([c.Constant(NumpyArrayF64([1.0, 2.0, 3.0, 4.0]))],
+          self._CreateConstantS64Computation(), [0])
+    self._ExecuteAndCompareExact(c, expected=[1, 1, 1, 1])
+
+  def testMapMulBy2F32(self):
+    c = self._NewComputation()
+    c.Map([c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0]))],
+          self._CreateMulF32By2Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[2.0, 4.0, 6.0, 8.0])
+
+  def testMapMulBy2F64(self):
+    c = self._NewComputation()
+    c.Map([c.Constant(NumpyArrayF64([1.0, 2.0, 3.0, 4.0]))],
+          self._CreateMulF64By2Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[2.0, 4.0, 6.0, 8.0])
+
+  def testSimpleMapChainF32(self):
+    # Chains a map of constant-f32 with a map of mul-by-2
+    c = self._NewComputation()
+    const_f32 = c.Map([c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0]))],
+                      self._CreateConstantF32Computation(), [0])
+    c.Map([const_f32], self._CreateMulF32By2Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[2.0, 2.0, 2.0, 2.0])
+
+  def testSimpleMapChainF64(self):
+    # Chains a map of constant-f64 with a map of mul-by-2
+    c = self._NewComputation()
+    const_f64 = c.Map([c.Constant(NumpyArrayF64([1.0, 2.0, 3.0, 4.0]))],
+                      self._CreateConstantF64Computation(), [0])
+    c.Map([const_f64], self._CreateMulF64By2Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[2.0, 2.0, 2.0, 2.0])
+
+  def testDivVectorsWithMapF32(self):
+    c = self._NewComputation()
+    c.Map((c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0])),
+           c.Constant(NumpyArrayF32([5.0, 5.0, 4.0, 4.0]))),
+          self._CreateBinaryDivF32Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[0.2, 0.4, 0.75, 1.0])
+
+  def testDivVectorsWithMapF64(self):
+    c = self._NewComputation()
+    c.Map((c.Constant(NumpyArrayF64([1.0, 2.0, 3.0, 4.0])),
+           c.Constant(NumpyArrayF64([5.0, 5.0, 4.0, 4.0]))),
+          self._CreateBinaryDivF64Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[0.2, 0.4, 0.75, 1.0])
+
+  def testReduce1DtoScalarF32(self):
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0])),
+        init_value=c.ConstantF32Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF32Computation(),
+        dimensions=[0])
+    self._ExecuteAndCompareClose(c, expected=10)
+
+  def testReduce1DtoScalarF64(self):
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(NumpyArrayF64([1.0, 2.0, 3.0, 4.0])),
+        init_value=c.ConstantF64Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF64Computation(),
+        dimensions=[0])
+    self._ExecuteAndCompareClose(c, expected=10)
+
+  def testReduce2DTo1DDim0F32(self):
+    input_array = NumpyArrayF32([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(input_array),
+        init_value=c.ConstantF32Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF32Computation(),
+        dimensions=[0])
+    self._ExecuteAndCompareClose(c, expected=[5, 7, 9])
+
+  def testReduce2DTo1DDim0F64(self):
+    input_array = NumpyArrayF64([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(input_array),
+        init_value=c.ConstantF64Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF64Computation(),
+        dimensions=[0])
+    self._ExecuteAndCompareClose(c, expected=[5, 7, 9])
+
+  def testReduce2DTo1DDim1F32(self):
+    input_array = NumpyArrayF32([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(input_array),
+        init_value=c.ConstantF32Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF32Computation(),
+        dimensions=[1])
+    self._ExecuteAndCompareClose(c, expected=[6, 15])
+
+  def testReduce2DTo1DDim1F64(self):
+    input_array = NumpyArrayF64([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(input_array),
+        init_value=c.ConstantF64Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF64Computation(),
+        dimensions=[1])
+    self._ExecuteAndCompareClose(c, expected=[6, 15])
+
+  def testReduce3DAllPossibleWaysF32(self):
+    input_array = self._MakeSample3DArrayF32()
+
+    def _ReduceAndTest(*dims):
+      c = self._NewComputation()
+      c.Reduce(
+          operand=c.Constant(input_array),
+          init_value=c.ConstantF32Scalar(0),
+          computation_to_apply=self._CreateBinaryAddF32Computation(),
+          dimensions=dims)
+      self._ExecuteAndCompareClose(
+          c, expected=np.sum(input_array, axis=tuple(dims)))
+
+    _ReduceAndTest(0)
+    _ReduceAndTest(0)
+    _ReduceAndTest(0, 1)
+    _ReduceAndTest(0, 2)
+    _ReduceAndTest(1, 2)
+    _ReduceAndTest(0, 1, 2)
+
+  def testReduce3DAllPossibleWaysF64(self):
+    input_array = self._MakeSample3DArrayF64()
+
+    def _ReduceAndTest(*dims):
+      c = self._NewComputation()
+      c.Reduce(
+          operand=c.Constant(input_array),
+          init_value=c.ConstantF64Scalar(0),
+          computation_to_apply=self._CreateBinaryAddF64Computation(),
+          dimensions=dims)
+      self._ExecuteAndCompareClose(
+          c, expected=np.sum(input_array, axis=tuple(dims)))
+
+    _ReduceAndTest(0)
+    _ReduceAndTest(0)
+    _ReduceAndTest(0, 1)
+    _ReduceAndTest(0, 2)
+    _ReduceAndTest(1, 2)
+    _ReduceAndTest(0, 1, 2)
+
+  def testWhileF32(self):
+    cond = self._CreateTestF32Lt10Computation()
+    body = self._CreateMulF32By2Computation()
+    c = self._NewComputation()
+    init = c.ConstantF32Scalar(1.)
+    c.While(cond, body, init)
+    self._ExecuteAndCompareClose(c, expected=16.)
+
+  def testWhileF64(self):
+    cond = self._CreateTestF64Lt10Computation()
+    body = self._CreateMulF64By2Computation()
+    c = self._NewComputation()
+    init = c.ConstantF64Scalar(1.)
+    c.While(cond, body, init)
+    self._ExecuteAndCompareClose(c, expected=16.)
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/tensorflow/tf_exported_symbols.lds b/tensorflow/tf_exported_symbols.lds
index bddb87f00c..3ff824e5e1 100644
--- a/tensorflow/tf_exported_symbols.lds
+++ b/tensorflow/tf_exported_symbols.lds
@@ -4,3 +4,4 @@
 *TF_*
 *TFE_*
 *nsync_*
+*pywrap_xla*
diff --git a/tensorflow/tf_version_script.lds b/tensorflow/tf_version_script.lds
index 11f66c5c8b..6b28943f01 100644
--- a/tensorflow/tf_version_script.lds
+++ b/tensorflow/tf_version_script.lds
@@ -5,6 +5,7 @@ tensorflow {
     *TF_*;
     *TFE_*;
     *nsync_*;
+    *pywrap_xla*;
   local:
     *;
 };
-- 
GitLab


From c18bb75059a25c4feeed2e075a6772b7813d77a4 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 7 Nov 2017 18:17:14 +0000
Subject: [PATCH 1063/1225] Add `decode_libsvm` for libsvm format support

This fix is an effort to add libsvm format support with
the implementation of `decode_libsvm`, as was proposed in 14313.

The implementation is done in contrib with:

This fix fixes 14313.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../libsvm/kernels/decode_libsvm_op.cc        | 112 ++++++++++++++++++
 tensorflow/contrib/libsvm/ops/libsvm_ops.cc   |  43 +++++++
 2 files changed, 155 insertions(+)
 create mode 100644 tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
 create mode 100644 tensorflow/contrib/libsvm/ops/libsvm_ops.cc

diff --git a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
new file mode 100644
index 0000000000..0592f722c1
--- /dev/null
+++ b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
@@ -0,0 +1,112 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+
+namespace tensorflow {
+
+template <typename T>
+class DecodeLibsvmOp : public OpKernel {
+ public:
+  explicit DecodeLibsvmOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("num_features", &num_features_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor* input_tensor;
+    OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor));
+    const auto& input_flat = input_tensor->flat<string>();
+
+    Tensor* label_tensor;
+    Tensor* feature_tensor;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(0, TensorShape({input_flat.size()}),
+                                        &label_tensor));
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(
+                            1, TensorShape({input_flat.size(), num_features_}),
+                            &feature_tensor));
+
+    auto label = label_tensor->flat<int64>();
+    auto feature = feature_tensor->matrix<T>();
+    for (int i = 0; i < input_flat.size(); ++i) {
+      std::vector<string> entries =
+          str_util::Split(input_flat(i), " ", str_util::SkipEmpty());
+      OP_REQUIRES(ctx, (entries.size() > 0),
+                  errors::InvalidArgument("No entries found for input[", i,
+                                          "]: \"", input_flat(i), "\""));
+      int64 label_value;
+      OP_REQUIRES(
+          ctx, strings::safe_strto64(entries[0].c_str(), &label_value),
+          errors::InvalidArgument("Label format incorrect: ", entries[0]));
+      label(i) = label_value;
+      for (int j = 1; j < entries.size(); j++) {
+        std::vector<string> pair = str_util::Split(entries[j], ":");
+        OP_REQUIRES(
+            ctx, (pair.size() == 2),
+            errors::InvalidArgument("Invalid feature \"", entries[j], "\""));
+        int64 feature_index;
+        OP_REQUIRES(
+            ctx, strings::safe_strto64(pair[0].c_str(), &feature_index),
+            errors::InvalidArgument("Feature format incorrect: ", entries[j]));
+        T feature_value;
+        OP_REQUIRES(
+            ctx, Convert(pair[1], &feature_value),
+            errors::InvalidArgument("Feature format incorrect: ", entries[j]));
+        feature(i, feature_index) = feature_value;
+      }
+    }
+  }
+
+ private:
+  int64 num_features_;
+
+  bool Convert(const string& s, T* value);
+};
+
+template <>
+bool DecodeLibsvmOp<float>::Convert(const string& s, float* value) {
+  return strings::safe_strtof(s.c_str(), value);
+}
+template <>
+bool DecodeLibsvmOp<double>::Convert(const string& s, double* value) {
+  return strings::safe_strtod(s.c_str(), value);
+}
+template <>
+bool DecodeLibsvmOp<int32>::Convert(const string& s, int32* value) {
+  return strings::safe_strto32(s.c_str(), value);
+}
+template <>
+bool DecodeLibsvmOp<int64>::Convert(const string& s, int64* value) {
+  return strings::safe_strto64(s.c_str(), value);
+}
+
+#define REGISTER_KERNEL(type)                                                \
+  REGISTER_KERNEL_BUILDER(                                                   \
+      Name("DecodeLibsvm").Device(DEVICE_CPU).TypeConstraint<type>("dtype"), \
+      DecodeLibsvmOp<type>);
+
+REGISTER_KERNEL(float);
+REGISTER_KERNEL(double);
+REGISTER_KERNEL(int32);
+REGISTER_KERNEL(int64);
+#undef REGISTER_KERNEL
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/libsvm/ops/libsvm_ops.cc b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
new file mode 100644
index 0000000000..36c6da288f
--- /dev/null
+++ b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
@@ -0,0 +1,43 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+
+REGISTER_OP("DecodeLibsvm")
+    .Input("input: string")
+    .Output("label: int64")
+    .Output("feature: dtype")
+    .Attr("dtype: {float, double, int32, int64} = DT_FLOAT")
+    .Attr("num_features: int >= 1")
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Convert LibSVM input to tensors. The output consists of
+a label and a feature tensor. The shape of the label tensor
+is the same as input and the shape of the feature tensor is
+`[input_shape, num_features]`.
+
+input: Each string is a record/row in the LibSVM.
+label: A tensor of the same shape as input.
+feature: A tensor of the shape `[input_shape, num_features]`.
+num_features: The number of features.
+)doc");
+
+}  // namespace tensorflow
-- 
GitLab


From 461513266534d0d7ef54e3616f1e6ac106497f59 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 7 Nov 2017 18:38:44 +0000
Subject: [PATCH 1064/1225] Add python wrapper for decode_libsvm

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/libsvm/__init__.py         | 32 +++++++++++++
 .../contrib/libsvm/python/ops/libsvm_ops.py   | 47 +++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 tensorflow/contrib/libsvm/__init__.py
 create mode 100644 tensorflow/contrib/libsvm/python/ops/libsvm_ops.py

diff --git a/tensorflow/contrib/libsvm/__init__.py b/tensorflow/contrib/libsvm/__init__.py
new file mode 100644
index 0000000000..a875863caa
--- /dev/null
+++ b/tensorflow/contrib/libsvm/__init__.py
@@ -0,0 +1,32 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Libsvm decoder.
+
+@@decode_libsvm
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.libsvm.python.ops.libsvm_ops import decode_libsvm
+
+from tensorflow.python.util.all_util import remove_undocumented
+
+_allowed_symbols = [
+    "decode_libsvm",
+]
+
+remove_undocumented(__name__)
diff --git a/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py b/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
new file mode 100644
index 0000000000..359b464129
--- /dev/null
+++ b/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
@@ -0,0 +1,47 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Libsvm decoder."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.libsvm.ops import gen_libsvm_ops
+from tensorflow.contrib.util import loader
+from tensorflow.python.framework import common_shapes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import io_ops
+from tensorflow.python.platform import resource_loader
+
+
+_libsvm_ops_so = loader.load_op_library(
+    resource_loader.get_path_to_datafile("_libsvm_ops.so"))
+
+def decode_libsvm(content, num_features, dtype=None):
+  """Convert Libsvm records to a tensor of label and a tensor of feature.
+
+  Args:
+    content: A `Tensor` of type `string`. Each string is a record/row in
+      the Libsvm format.
+    num_features: The number of features.
+    dtype: The type of the output feature tensor. Default to tf.float32.
+
+  Returns:
+    label: A `Tensor` of the same shape as content.
+    feature: A `Tensor` of the shape `[input_shape, num_features]`.
+  """
+  return gen_libsvm_ops.decode_libsvm(content, num_features, dtype=dtype)
+
+
+ops.NotDifferentiable('DecodeLibSVM')
-- 
GitLab


From da683ec659d046a504e24116900e869bbe7124de Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 7 Nov 2017 18:46:02 +0000
Subject: [PATCH 1065/1225] Add bazel BUILD and test file

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/BUILD                      |   1 +
 tensorflow/contrib/libsvm/BUILD               | 102 ++++++++++++++++++
 .../kernel_tests/decode_libsvm_op_test.py     |  43 ++++++++
 3 files changed, 146 insertions(+)
 create mode 100644 tensorflow/contrib/libsvm/BUILD
 create mode 100644 tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 604c41bf8a..6e2320bd0d 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -53,6 +53,7 @@ py_library(
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/contrib/learn",
         "//tensorflow/contrib/legacy_seq2seq:seq2seq_py",
+        "//tensorflow/contrib/libsvm",
         "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/contrib/linear_optimizer:sdca_estimator_py",
         "//tensorflow/contrib/linear_optimizer:sdca_ops_py",
diff --git a/tensorflow/contrib/libsvm/BUILD b/tensorflow/contrib/libsvm/BUILD
new file mode 100644
index 0000000000..e4bcbb3afb
--- /dev/null
+++ b/tensorflow/contrib/libsvm/BUILD
@@ -0,0 +1,102 @@
+package(
+    default_visibility = ["//visibility:private"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
+load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
+
+tf_custom_op_library(
+    name = "python/ops/_libsvm_ops.so",
+    srcs = [
+        "kernels/decode_libsvm_op.cc",
+        "ops/libsvm_ops.cc",
+    ],
+    deps = [
+        "//tensorflow/core/kernels:bounds_check_lib",
+    ],
+)
+
+tf_kernel_library(
+    name = "libsvm_kernels",
+    srcs = ["kernels/decode_libsvm_ops.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:bounds_check_lib",
+    ],
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["libsvm_ops"],
+    deps = [
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_gen_op_wrapper_py(
+    name = "libsvm_ops",
+    deps = [":libsvm_ops_op_lib"],
+)
+
+tf_custom_op_py_library(
+    name = "libsvm",
+    srcs = [
+        "__init__.py",
+        "python/ops/libsvm_ops.py",
+    ],
+    dso = [
+        ":python/ops/_libsvm_ops.so",
+    ],
+    kernels = [
+        ":libsvm_kernels",
+        ":libsvm_ops_op_lib",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":libsvm_ops",
+        "//tensorflow/contrib/util:util_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
+    ],
+)
+
+tf_py_test(
+    name = "decode_libsvm_op_test",
+    srcs = ["python/kernel_tests/decode_libsvm_op_test.py"],
+    additional_deps = [
+        ":libsvm",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
new file mode 100644
index 0000000000..1b16915fba
--- /dev/null
+++ b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
@@ -0,0 +1,43 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for DecodeLibsvm op."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import sys
+from tensorflow.contrib.libsvm.python.ops import libsvm_ops
+from tensorflow.python.platform import test
+
+
+class DecodeLibsvmOpTest(test.TestCase):
+
+  def testBasic(self):
+    with self.test_session() as sess:
+      content = ["1 1:3.4 2:0.5 4:0.231",
+                 "1 2:2.5 3:0.1 5:0.503",
+                 "2 3:2.5 2:0.1 1:0.105"]
+      label, feature = libsvm_ops.decode_libsvm(content, num_features=6)
+      label, feature = sess.run([label, feature])
+      self.assertAllEqual(label, [1, 1, 2])
+      self.assertAllClose(feature, [[0, 3.4, 0.5, 0, 0.231, 0],
+                                    [0, 0, 2.5, 0.1, 0, 0.503],
+                                    [0, 0.105, 0.1, 2.5, 0, 0]])
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From 3adaa332c7c5055398f38c189a6ea741f5c799ed Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 7 Nov 2017 19:18:57 +0000
Subject: [PATCH 1066/1225] Add shape inference and upate tests

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/libsvm/ops/libsvm_ops.cc       | 15 ++++++++++++++-
 .../python/kernel_tests/decode_libsvm_op_test.py  |  8 +++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/libsvm/ops/libsvm_ops.cc b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
index 36c6da288f..4fc4304d2d 100644
--- a/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
+++ b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
@@ -20,6 +20,7 @@ limitations under the License.
 namespace tensorflow {
 
 using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
 
 REGISTER_OP("DecodeLibsvm")
     .Input("input: string")
@@ -27,7 +28,19 @@ REGISTER_OP("DecodeLibsvm")
     .Output("feature: dtype")
     .Attr("dtype: {float, double, int32, int64} = DT_FLOAT")
     .Attr("num_features: int >= 1")
-    .SetShapeFn(shape_inference::UnknownShape)
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->input(0));
+
+      int32 num_features;
+      TF_RETURN_IF_ERROR(c->GetAttr("num_features", &num_features));
+      ShapeHandle out;
+      TF_RETURN_IF_ERROR(
+          c->Concatenate(c->input(0), c->Vector(num_features), &out));
+      c->set_output(1, out);
+
+      return Status::OK();
+    })
+
     .Doc(R"doc(
 Convert LibSVM input to tensors. The output consists of
 a label and a feature tensor. The shape of the label tensor
diff --git a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
index 1b16915fba..29a69af2d3 100644
--- a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
+++ b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
@@ -19,7 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import numpy as np
-import sys
+
 from tensorflow.contrib.libsvm.python.ops import libsvm_ops
 from tensorflow.python.platform import test
 
@@ -32,6 +32,12 @@ class DecodeLibsvmOpTest(test.TestCase):
                  "1 2:2.5 3:0.1 5:0.503",
                  "2 3:2.5 2:0.1 1:0.105"]
       label, feature = libsvm_ops.decode_libsvm(content, num_features=6)
+
+      # shape inference
+      self.assertAllEqual(label.get_shape().as_list(), [3])
+      self.assertAllEqual(feature.get_shape().as_list(), [3, 6])
+
+      # sess.run()
       label, feature = sess.run([label, feature])
       self.assertAllEqual(label, [1, 1, 2])
       self.assertAllClose(feature, [[0, 3.4, 0.5, 0, 0.231, 0],
-- 
GitLab


From d4e44429c49a0aa3523ec27a20484a71d92b4ddf Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 15 Nov 2017 20:34:12 +0000
Subject: [PATCH 1067/1225] Update tests to use sparse tensor

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../libsvm/python/kernel_tests/decode_libsvm_op_test.py  | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
index 29a69af2d3..2a093f1e0f 100644
--- a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
+++ b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.libsvm.python.ops import libsvm_ops
+from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
@@ -31,13 +32,13 @@ class DecodeLibsvmOpTest(test.TestCase):
       content = ["1 1:3.4 2:0.5 4:0.231",
                  "1 2:2.5 3:0.1 5:0.503",
                  "2 3:2.5 2:0.1 1:0.105"]
-      label, feature = libsvm_ops.decode_libsvm(content, num_features=6)
+      label, indices, values, shape = libsvm_ops.decode_libsvm(content,
+                                                               num_features=6)
+      feature = sparse_ops.sparse_to_dense(indices, shape, values,
+                                           validate_indices=False)
 
-      # shape inference
       self.assertAllEqual(label.get_shape().as_list(), [3])
-      self.assertAllEqual(feature.get_shape().as_list(), [3, 6])
 
-      # sess.run()
       label, feature = sess.run([label, feature])
       self.assertAllEqual(label, [1, 1, 2])
       self.assertAllClose(feature, [[0, 3.4, 0.5, 0, 0.231, 0],
-- 
GitLab


From a6b231ad844398e90874bfd94960340c10812aa8 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 15 Nov 2017 20:36:26 +0000
Subject: [PATCH 1068/1225] Update kernel to use sparse tensor

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../libsvm/kernels/decode_libsvm_op.cc        | 39 +++++++++++++++----
 tensorflow/contrib/libsvm/ops/libsvm_ops.cc   | 18 +++++----
 2 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
index 0592f722c1..41b7431e24 100644
--- a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
+++ b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
@@ -36,16 +36,13 @@ class DecodeLibsvmOp : public OpKernel {
     const auto& input_flat = input_tensor->flat<string>();
 
     Tensor* label_tensor;
-    Tensor* feature_tensor;
     OP_REQUIRES_OK(ctx,
                    ctx->allocate_output(0, TensorShape({input_flat.size()}),
                                         &label_tensor));
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(
-                            1, TensorShape({input_flat.size(), num_features_}),
-                            &feature_tensor));
-
     auto label = label_tensor->flat<int64>();
-    auto feature = feature_tensor->matrix<T>();
+
+    std::vector<T> out_values;
+    std::vector<std::pair<int64, int64>> out_indices;
     for (int i = 0; i < input_flat.size(); ++i) {
       std::vector<string> entries =
           str_util::Split(input_flat(i), " ", str_util::SkipEmpty());
@@ -70,9 +67,37 @@ class DecodeLibsvmOp : public OpKernel {
         OP_REQUIRES(
             ctx, Convert(pair[1], &feature_value),
             errors::InvalidArgument("Feature format incorrect: ", entries[j]));
-        feature(i, feature_index) = feature_value;
+        out_values.emplace_back(feature_value);
+        out_indices.emplace_back(std::pair<int64, int64>(i, feature_index));
       }
     }
+
+    Tensor* indices_tensor;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(1, TensorShape({out_indices.size(), 2}),
+                                        &indices_tensor));
+    auto indices = indices_tensor->matrix<int64>();
+    for (int i = 0; i < out_indices.size(); i++) {
+      indices(i, 0) = out_indices[i].first;
+      indices(i, 1) = out_indices[i].second;
+    }
+
+    Tensor* values_tensor;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(2, TensorShape({out_values.size()}),
+                                        &values_tensor));
+    auto values = values_tensor->vec<T>();
+    std::copy_n(out_values.begin(), out_values.size(), &values(0));
+
+    Tensor* shape_tensor;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(
+                            3, TensorShape({input_tensor->shape().dims() + 1}),
+                            &shape_tensor));
+    auto shape = shape_tensor->flat<int64>();
+    for (int i = 0; i < input_tensor->shape().dims(); i++) {
+      shape(i) = input_tensor->shape().dim_size(i);
+    }
+    shape(input_tensor->shape().dims()) = num_features_;
   }
 
  private:
diff --git a/tensorflow/contrib/libsvm/ops/libsvm_ops.cc b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
index 4fc4304d2d..dcc16019a9 100644
--- a/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
+++ b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
@@ -25,18 +25,18 @@ using shape_inference::ShapeHandle;
 REGISTER_OP("DecodeLibsvm")
     .Input("input: string")
     .Output("label: int64")
-    .Output("feature: dtype")
+    .Output("feature_indices: int64")
+    .Output("feature_values: dtype")
+    .Output("feature_shape: int64")
     .Attr("dtype: {float, double, int32, int64} = DT_FLOAT")
     .Attr("num_features: int >= 1")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(0));
 
-      int32 num_features;
-      TF_RETURN_IF_ERROR(c->GetAttr("num_features", &num_features));
-      ShapeHandle out;
-      TF_RETURN_IF_ERROR(
-          c->Concatenate(c->input(0), c->Vector(num_features), &out));
-      c->set_output(1, out);
+      c->set_output(1, c->Matrix(InferenceContext::kUnknownDim,
+                                 InferenceContext::kUnknownDim));
+      c->set_output(2, c->Vector(InferenceContext::kUnknownDim));
+      c->set_output(3, c->Vector(InferenceContext::kUnknownDim));
 
       return Status::OK();
     })
@@ -49,7 +49,9 @@ is the same as input and the shape of the feature tensor is
 
 input: Each string is a record/row in the LibSVM.
 label: A tensor of the same shape as input.
-feature: A tensor of the shape `[input_shape, num_features]`.
+feature_indices: A 2-D int64 tensor of dense_shape [N, ndims].
+feature_values: A 1-D tensor of any type and dense_shape [N].
+feature_shape: A 1-D int64 tensor of dense_shape [ndims].
 num_features: The number of features.
 )doc");
 
-- 
GitLab


From ca93c2adad451572596a2a04196ca43ca8ad00f6 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 15 Nov 2017 21:26:13 +0000
Subject: [PATCH 1069/1225] Expand to 2+-D inputs

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../libsvm/kernels/decode_libsvm_op.cc        | 21 +++++++++++++++----
 tensorflow/contrib/libsvm/ops/libsvm_ops.cc   |  2 +-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
index 41b7431e24..213af1b0c6 100644
--- a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
+++ b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
@@ -73,13 +73,26 @@ class DecodeLibsvmOp : public OpKernel {
     }
 
     Tensor* indices_tensor;
-    OP_REQUIRES_OK(ctx,
-                   ctx->allocate_output(1, TensorShape({out_indices.size(), 2}),
-                                        &indices_tensor));
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(
+                            1, TensorShape({out_indices.size(),
+                                            input_tensor->shape().dims() + 1}),
+                            &indices_tensor));
     auto indices = indices_tensor->matrix<int64>();
+    // Translate flat index to shaped index like np.unravel_index
+    // Calculate factors for each dimension
+    std::vector<int64> factors(input_tensor->shape().dims());
+    factors[input_tensor->shape().dims() - 1] = 1;
+    for (int j = input_tensor->shape().dims() - 2; j >= 0; j--) {
+      factors[j] = factors[j + 1] * input_tensor->shape().dim_size(j + 1);
+    }
     for (int i = 0; i < out_indices.size(); i++) {
       indices(i, 0) = out_indices[i].first;
-      indices(i, 1) = out_indices[i].second;
+      int64 value = out_indices[i].first;
+      for (int j = 0; j < input_tensor->shape().dims(); j++) {
+        indices(i, j) = value / factors[j];
+        value = value % factors[j];
+      }
+      indices(i, input_tensor->shape().dims()) = out_indices[i].second;
     }
 
     Tensor* values_tensor;
diff --git a/tensorflow/contrib/libsvm/ops/libsvm_ops.cc b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
index dcc16019a9..f51772a440 100644
--- a/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
+++ b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
@@ -47,7 +47,7 @@ a label and a feature tensor. The shape of the label tensor
 is the same as input and the shape of the feature tensor is
 `[input_shape, num_features]`.
 
-input: Each string is a record/row in the LibSVM.
+input: Each string is a record in the LibSVM.
 label: A tensor of the same shape as input.
 feature_indices: A 2-D int64 tensor of dense_shape [N, ndims].
 feature_values: A 1-D tensor of any type and dense_shape [N].
-- 
GitLab


From 1800b86866c8a48460d15b33114609d5a977ac48 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 15 Nov 2017 21:36:08 +0000
Subject: [PATCH 1070/1225] Add num_features_ >= 1 constraint.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/libsvm/BUILD                       | 2 +-
 tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/libsvm/BUILD b/tensorflow/contrib/libsvm/BUILD
index e4bcbb3afb..df96402a4f 100644
--- a/tensorflow/contrib/libsvm/BUILD
+++ b/tensorflow/contrib/libsvm/BUILD
@@ -26,7 +26,7 @@ tf_custom_op_library(
 
 tf_kernel_library(
     name = "libsvm_kernels",
-    srcs = ["kernels/decode_libsvm_ops.cc"],
+    srcs = ["kernels/decode_libsvm_op.cc"],
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/core:framework",
diff --git a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
index 213af1b0c6..e9fd97cbeb 100644
--- a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
+++ b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
@@ -28,6 +28,9 @@ class DecodeLibsvmOp : public OpKernel {
  public:
   explicit DecodeLibsvmOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("num_features", &num_features_));
+    OP_REQUIRES(ctx, (num_features_ >= 1),
+                errors::InvalidArgument("Invalid number of features \"",
+                                        num_features_, "\""));
   }
 
   void Compute(OpKernelContext* ctx) override {
-- 
GitLab


From 5e9e96052bd67cb056962a4f0fca23dd5c95a24c Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 20 Nov 2017 17:32:59 -0800
Subject: [PATCH 1071/1225] Address review feedback

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../libsvm/kernels/decode_libsvm_op.cc        | 18 +++++----
 .../kernel_tests/decode_libsvm_op_test.py     | 37 +++++++++++++++----
 .../contrib/libsvm/python/ops/libsvm_ops.py   |  8 +++-
 3 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
index e9fd97cbeb..6076d9c727 100644
--- a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
+++ b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
@@ -39,9 +39,8 @@ class DecodeLibsvmOp : public OpKernel {
     const auto& input_flat = input_tensor->flat<string>();
 
     Tensor* label_tensor;
-    OP_REQUIRES_OK(ctx,
-                   ctx->allocate_output(0, TensorShape({input_flat.size()}),
-                                        &label_tensor));
+    OP_REQUIRES_OK(
+        ctx, ctx->allocate_output(0, input_tensor->shape(), &label_tensor));
     auto label = label_tensor->flat<int64>();
 
     std::vector<T> out_values;
@@ -66,6 +65,9 @@ class DecodeLibsvmOp : public OpKernel {
         OP_REQUIRES(
             ctx, strings::safe_strto64(pair[0].c_str(), &feature_index),
             errors::InvalidArgument("Feature format incorrect: ", entries[j]));
+        OP_REQUIRES(ctx, (feature_index >= 0),
+                    errors::InvalidArgument(
+                        "Feature index should be >= 0, got ", feature_index));
         T feature_value;
         OP_REQUIRES(
             ctx, Convert(pair[1], &feature_value),
@@ -77,8 +79,9 @@ class DecodeLibsvmOp : public OpKernel {
 
     Tensor* indices_tensor;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(
-                            1, TensorShape({out_indices.size(),
-                                            input_tensor->shape().dims() + 1}),
+                            1,
+                            TensorShape({static_cast<int64>(out_indices.size()),
+                                         input_tensor->shape().dims() + 1}),
                             &indices_tensor));
     auto indices = indices_tensor->matrix<int64>();
     // Translate flat index to shaped index like np.unravel_index
@@ -100,8 +103,9 @@ class DecodeLibsvmOp : public OpKernel {
 
     Tensor* values_tensor;
     OP_REQUIRES_OK(ctx,
-                   ctx->allocate_output(2, TensorShape({out_values.size()}),
-                                        &values_tensor));
+                   ctx->allocate_output(
+                       2, TensorShape({static_cast<int64>(out_values.size())}),
+                       &values_tensor));
     auto values = values_tensor->vec<T>();
     std::copy_n(out_values.begin(), out_values.size(), &values(0));
 
diff --git a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
index 2a093f1e0f..0f54014ec7 100644
--- a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
+++ b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
@@ -30,20 +30,41 @@ class DecodeLibsvmOpTest(test.TestCase):
   def testBasic(self):
     with self.test_session() as sess:
       content = ["1 1:3.4 2:0.5 4:0.231",
-                 "1 2:2.5 3:0.1 5:0.503",
-                 "2 3:2.5 2:0.1 1:0.105"]
-      label, indices, values, shape = libsvm_ops.decode_libsvm(content,
-                                                               num_features=6)
-      feature = sparse_ops.sparse_to_dense(indices, shape, values,
-                                           validate_indices=False)
+                 "1 2:2.5 3:inf 5:0.503",
+                 "2 3:2.5 2:nan 1:0.105"]
+      label, sparse_feature = libsvm_ops.decode_libsvm(content,
+                                                       num_features=6)
+      feature = sparse_ops.sparse_tensor_to_dense(sparse_feature,
+                                                  validate_indices=False)
 
       self.assertAllEqual(label.get_shape().as_list(), [3])
 
       label, feature = sess.run([label, feature])
       self.assertAllEqual(label, [1, 1, 2])
       self.assertAllClose(feature, [[0, 3.4, 0.5, 0, 0.231, 0],
-                                    [0, 0, 2.5, 0.1, 0, 0.503],
-                                    [0, 0.105, 0.1, 2.5, 0, 0]])
+                                    [0, 0, 2.5, np.inf, 0, 0.503],
+                                    [0, 0.105, np.nan, 2.5, 0, 0]])
+
+  def testNDimension(self):
+    with self.test_session() as sess:
+      content = [["1 1:3.4 2:0.5 4:0.231", "1 1:3.4 2:0.5 4:0.231"],
+                 ["1 2:2.5 3:inf 5:0.503", "1 2:2.5 3:inf 5:0.503"],
+                 ["2 3:2.5 2:nan 1:0.105", "2 3:2.5 2:nan 1:0.105"]]
+      label, sparse_feature = libsvm_ops.decode_libsvm(content,
+                                                       num_features=6)
+      feature = sparse_ops.sparse_tensor_to_dense(sparse_feature,
+                                                  validate_indices=False)
+
+      self.assertAllEqual(label.get_shape().as_list(), [3, 2])
+
+      label, feature = sess.run([label, feature])
+      self.assertAllEqual(label, [[1, 1], [1, 1], [2, 2]])
+      self.assertAllClose(feature, [[[0, 3.4, 0.5, 0, 0.231, 0],
+                                     [0, 3.4, 0.5, 0, 0.231, 0]],
+                                    [[0, 0, 2.5, np.inf, 0, 0.503],
+                                     [0, 0, 2.5, np.inf, 0, 0.503]],
+                                    [[0, 0.105, np.nan, 2.5, 0, 0],
+                                     [0, 0.105, np.nan, 2.5, 0, 0]]])
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py b/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
index 359b464129..a6c5eba563 100644
--- a/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
+++ b/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
@@ -21,6 +21,7 @@ from tensorflow.contrib.libsvm.ops import gen_libsvm_ops
 from tensorflow.contrib.util import loader
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import io_ops
 from tensorflow.python.platform import resource_loader
 
@@ -39,9 +40,12 @@ def decode_libsvm(content, num_features, dtype=None):
 
   Returns:
     label: A `Tensor` of the same shape as content.
-    feature: A `Tensor` of the shape `[input_shape, num_features]`.
+    feature: A `SparseTensor` of the shape `[input_shape, num_features]`.
   """
-  return gen_libsvm_ops.decode_libsvm(content, num_features, dtype=dtype)
+  label, indices, values, shape = gen_libsvm_ops.decode_libsvm(content,
+                                                               num_features,
+                                                               dtype=dtype)
+  return label, sparse_tensor.SparseTensor(indices, values, shape)
 
 
 ops.NotDifferentiable('DecodeLibSVM')
-- 
GitLab


From 81e444d245395184ce9b7c14c96f6d4776fe1875 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 21 Nov 2017 15:07:38 -0800
Subject: [PATCH 1072/1225] Add different dtype support for label

So that int32, int64, float, double could be rendered for label

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../libsvm/kernels/decode_libsvm_op.cc        | 53 +++++++++++++------
 tensorflow/contrib/libsvm/ops/libsvm_ops.cc   |  3 +-
 .../kernel_tests/decode_libsvm_op_test.py     |  5 +-
 .../contrib/libsvm/python/ops/libsvm_ops.py   |  8 +--
 4 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
index 6076d9c727..fc7889b27c 100644
--- a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
+++ b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
@@ -22,8 +22,12 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
-
+namespace {
 template <typename T>
+bool ConvertHelper(const string& s, T* value);
+}
+
+template <typename T, typename Tlabel>
 class DecodeLibsvmOp : public OpKernel {
  public:
   explicit DecodeLibsvmOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
@@ -41,7 +45,7 @@ class DecodeLibsvmOp : public OpKernel {
     Tensor* label_tensor;
     OP_REQUIRES_OK(
         ctx, ctx->allocate_output(0, input_tensor->shape(), &label_tensor));
-    auto label = label_tensor->flat<int64>();
+    auto label = label_tensor->flat<Tlabel>();
 
     std::vector<T> out_values;
     std::vector<std::pair<int64, int64>> out_indices;
@@ -51,9 +55,9 @@ class DecodeLibsvmOp : public OpKernel {
       OP_REQUIRES(ctx, (entries.size() > 0),
                   errors::InvalidArgument("No entries found for input[", i,
                                           "]: \"", input_flat(i), "\""));
-      int64 label_value;
+      Tlabel label_value;
       OP_REQUIRES(
-          ctx, strings::safe_strto64(entries[0].c_str(), &label_value),
+          ctx, ConvertHelper<Tlabel>(entries[0].c_str(), &label_value),
           errors::InvalidArgument("Label format incorrect: ", entries[0]));
       label(i) = label_value;
       for (int j = 1; j < entries.size(); j++) {
@@ -70,7 +74,7 @@ class DecodeLibsvmOp : public OpKernel {
                         "Feature index should be >= 0, got ", feature_index));
         T feature_value;
         OP_REQUIRES(
-            ctx, Convert(pair[1], &feature_value),
+            ctx, ConvertHelper<T>(pair[1], &feature_value),
             errors::InvalidArgument("Feature format incorrect: ", entries[j]));
         out_values.emplace_back(feature_value);
         out_indices.emplace_back(std::pair<int64, int64>(i, feature_index));
@@ -122,31 +126,48 @@ class DecodeLibsvmOp : public OpKernel {
 
  private:
   int64 num_features_;
-
-  bool Convert(const string& s, T* value);
 };
 
+namespace {
 template <>
-bool DecodeLibsvmOp<float>::Convert(const string& s, float* value) {
+bool ConvertHelper<float>(const string& s, float* value) {
   return strings::safe_strtof(s.c_str(), value);
 }
 template <>
-bool DecodeLibsvmOp<double>::Convert(const string& s, double* value) {
+bool ConvertHelper<double>(const string& s, double* value) {
   return strings::safe_strtod(s.c_str(), value);
 }
 template <>
-bool DecodeLibsvmOp<int32>::Convert(const string& s, int32* value) {
+bool ConvertHelper<int32>(const string& s, int32* value) {
   return strings::safe_strto32(s.c_str(), value);
 }
 template <>
-bool DecodeLibsvmOp<int64>::Convert(const string& s, int64* value) {
+bool ConvertHelper<int64>(const string& s, int64* value) {
   return strings::safe_strto64(s.c_str(), value);
 }
-
-#define REGISTER_KERNEL(type)                                                \
-  REGISTER_KERNEL_BUILDER(                                                   \
-      Name("DecodeLibsvm").Device(DEVICE_CPU).TypeConstraint<type>("dtype"), \
-      DecodeLibsvmOp<type>);
+}  // namespace
+
+#define REGISTER_KERNEL(type)                                         \
+  REGISTER_KERNEL_BUILDER(Name("DecodeLibsvm")                        \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<type>("dtype")          \
+                              .TypeConstraint<int32>("label_dtype"),  \
+                          DecodeLibsvmOp<type, int32>);               \
+  REGISTER_KERNEL_BUILDER(Name("DecodeLibsvm")                        \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<type>("dtype")          \
+                              .TypeConstraint<int64>("label_dtype"),  \
+                          DecodeLibsvmOp<type, int64>);               \
+  REGISTER_KERNEL_BUILDER(Name("DecodeLibsvm")                        \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<type>("dtype")          \
+                              .TypeConstraint<float>("label_dtype"),  \
+                          DecodeLibsvmOp<type, float>);               \
+  REGISTER_KERNEL_BUILDER(Name("DecodeLibsvm")                        \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<type>("dtype")          \
+                              .TypeConstraint<double>("label_dtype"), \
+                          DecodeLibsvmOp<type, double>);
 
 REGISTER_KERNEL(float);
 REGISTER_KERNEL(double);
diff --git a/tensorflow/contrib/libsvm/ops/libsvm_ops.cc b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
index f51772a440..4c65e67629 100644
--- a/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
+++ b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
@@ -24,11 +24,12 @@ using shape_inference::ShapeHandle;
 
 REGISTER_OP("DecodeLibsvm")
     .Input("input: string")
-    .Output("label: int64")
+    .Output("label: label_dtype")
     .Output("feature_indices: int64")
     .Output("feature_values: dtype")
     .Output("feature_shape: int64")
     .Attr("dtype: {float, double, int32, int64} = DT_FLOAT")
+    .Attr("label_dtype: {float, double, int32, int64} = DT_INT64")
     .Attr("num_features: int >= 1")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(0));
diff --git a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
index 0f54014ec7..ca12db28fc 100644
--- a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
+++ b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.libsvm.python.ops import libsvm_ops
+from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
@@ -50,8 +51,8 @@ class DecodeLibsvmOpTest(test.TestCase):
       content = [["1 1:3.4 2:0.5 4:0.231", "1 1:3.4 2:0.5 4:0.231"],
                  ["1 2:2.5 3:inf 5:0.503", "1 2:2.5 3:inf 5:0.503"],
                  ["2 3:2.5 2:nan 1:0.105", "2 3:2.5 2:nan 1:0.105"]]
-      label, sparse_feature = libsvm_ops.decode_libsvm(content,
-                                                       num_features=6)
+      label, sparse_feature = libsvm_ops.decode_libsvm(
+          content, num_features=6, label_dtype=dtypes.float64)
       feature = sparse_ops.sparse_tensor_to_dense(sparse_feature,
                                                   validate_indices=False)
 
diff --git a/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py b/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
index a6c5eba563..8d0e5ab423 100644
--- a/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
+++ b/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
@@ -29,7 +29,7 @@ from tensorflow.python.platform import resource_loader
 _libsvm_ops_so = loader.load_op_library(
     resource_loader.get_path_to_datafile("_libsvm_ops.so"))
 
-def decode_libsvm(content, num_features, dtype=None):
+def decode_libsvm(content, num_features, dtype=None, label_dtype=None):
   """Convert Libsvm records to a tensor of label and a tensor of feature.
 
   Args:
@@ -37,14 +37,14 @@ def decode_libsvm(content, num_features, dtype=None):
       the Libsvm format.
     num_features: The number of features.
     dtype: The type of the output feature tensor. Default to tf.float32.
+    label_dtype: The type of the output label tensor. Default to tf.int64.
 
   Returns:
     label: A `Tensor` of the same shape as content.
     feature: A `SparseTensor` of the shape `[input_shape, num_features]`.
   """
-  label, indices, values, shape = gen_libsvm_ops.decode_libsvm(content,
-                                                               num_features,
-                                                               dtype=dtype)
+  label, indices, values, shape = gen_libsvm_ops.decode_libsvm(
+      content, num_features, dtype=dtype, label_dtype=label_dtype)
   return label, sparse_tensor.SparseTensor(indices, values, shape)
 
 
-- 
GitLab


From 50ebf8fcfa0a2bfa37111e9b7a7a3ba3c99f1f18 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 21 Nov 2017 15:14:12 -0800
Subject: [PATCH 1073/1225] Update
 tensorflow/contrib/cmake/tf_core_kernels.cmake and
 tensorflow/contrib/cmake/tf_python.cmake.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/cmake/python_modules.txt    | 4 ++++
 tensorflow/contrib/cmake/tf_core_kernels.cmake | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
index a0fca690ef..92edce77df 100644
--- a/tensorflow/contrib/cmake/python_modules.txt
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -289,6 +289,10 @@ tensorflow/contrib/learn/python/learn/utils
 tensorflow/contrib/legacy_seq2seq
 tensorflow/contrib/legacy_seq2seq/python
 tensorflow/contrib/legacy_seq2seq/python/ops
+tensorflow/contrib/libsvm
+tensorflow/contrib/libsvm/python
+tensorflow/contrib/libsvm/python/kernel_tests
+tensorflow/contrib/libsvm/python/ops
 tensorflow/contrib/linalg
 tensorflow/contrib/linalg/python
 tensorflow/contrib/linalg/python/ops
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index eb6bf567aa..d3b6c0bdd3 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -85,6 +85,8 @@ if(tensorflow_BUILD_CONTRIB_KERNELS)
       "${tensorflow_source_dir}/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc"
+      "${tensorflow_source_dir}/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc"
+      "${tensorflow_source_dir}/tensorflow/contrib/libsvm/ops/libsvm_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_manager.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc"
-- 
GitLab


From 2de6cfd86fe521fd63d48eea7f25ed32f1f55309 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 28 Nov 2017 22:52:18 -0800
Subject: [PATCH 1074/1225] Update return values to `features, labels` to be
 consistent with other APIs.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../kernel_tests/decode_libsvm_op_test.py     | 34 +++++++++----------
 .../contrib/libsvm/python/ops/libsvm_ops.py   |  8 ++---
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
index ca12db28fc..7d9d5ceed3 100644
--- a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
+++ b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
@@ -33,34 +33,34 @@ class DecodeLibsvmOpTest(test.TestCase):
       content = ["1 1:3.4 2:0.5 4:0.231",
                  "1 2:2.5 3:inf 5:0.503",
                  "2 3:2.5 2:nan 1:0.105"]
-      label, sparse_feature = libsvm_ops.decode_libsvm(content,
-                                                       num_features=6)
-      feature = sparse_ops.sparse_tensor_to_dense(sparse_feature,
-                                                  validate_indices=False)
+      sparse_features, labels = libsvm_ops.decode_libsvm(content,
+                                                         num_features=6)
+      features = sparse_ops.sparse_tensor_to_dense(sparse_features,
+                                                   validate_indices=False)
 
-      self.assertAllEqual(label.get_shape().as_list(), [3])
+      self.assertAllEqual(labels.get_shape().as_list(), [3])
 
-      label, feature = sess.run([label, feature])
-      self.assertAllEqual(label, [1, 1, 2])
-      self.assertAllClose(feature, [[0, 3.4, 0.5, 0, 0.231, 0],
-                                    [0, 0, 2.5, np.inf, 0, 0.503],
-                                    [0, 0.105, np.nan, 2.5, 0, 0]])
+      features, labels = sess.run([features, labels])
+      self.assertAllEqual(labels, [1, 1, 2])
+      self.assertAllClose(features, [[0, 3.4, 0.5, 0, 0.231, 0],
+                                     [0, 0, 2.5, np.inf, 0, 0.503],
+                                     [0, 0.105, np.nan, 2.5, 0, 0]])
 
   def testNDimension(self):
     with self.test_session() as sess:
       content = [["1 1:3.4 2:0.5 4:0.231", "1 1:3.4 2:0.5 4:0.231"],
                  ["1 2:2.5 3:inf 5:0.503", "1 2:2.5 3:inf 5:0.503"],
                  ["2 3:2.5 2:nan 1:0.105", "2 3:2.5 2:nan 1:0.105"]]
-      label, sparse_feature = libsvm_ops.decode_libsvm(
+      sparse_features, labels = libsvm_ops.decode_libsvm(
           content, num_features=6, label_dtype=dtypes.float64)
-      feature = sparse_ops.sparse_tensor_to_dense(sparse_feature,
-                                                  validate_indices=False)
+      features = sparse_ops.sparse_tensor_to_dense(sparse_features,
+                                                   validate_indices=False)
 
-      self.assertAllEqual(label.get_shape().as_list(), [3, 2])
+      self.assertAllEqual(labels.get_shape().as_list(), [3, 2])
 
-      label, feature = sess.run([label, feature])
-      self.assertAllEqual(label, [[1, 1], [1, 1], [2, 2]])
-      self.assertAllClose(feature, [[[0, 3.4, 0.5, 0, 0.231, 0],
+      features, labels = sess.run([features, labels])
+      self.assertAllEqual(labels, [[1, 1], [1, 1], [2, 2]])
+      self.assertAllClose(features, [[[0, 3.4, 0.5, 0, 0.231, 0],
                                      [0, 3.4, 0.5, 0, 0.231, 0]],
                                     [[0, 0, 2.5, np.inf, 0, 0.503],
                                      [0, 0, 2.5, np.inf, 0, 0.503]],
diff --git a/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py b/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
index 8d0e5ab423..9c133e7e7f 100644
--- a/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
+++ b/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
@@ -40,12 +40,12 @@ def decode_libsvm(content, num_features, dtype=None, label_dtype=None):
     label_dtype: The type of the output label tensor. Default to tf.int64.
 
   Returns:
-    label: A `Tensor` of the same shape as content.
-    feature: A `SparseTensor` of the shape `[input_shape, num_features]`.
+    features: A `SparseTensor` of the shape `[input_shape, num_features]`.
+    labels: A `Tensor` of the same shape as content.
   """
-  label, indices, values, shape = gen_libsvm_ops.decode_libsvm(
+  labels, indices, values, shape = gen_libsvm_ops.decode_libsvm(
       content, num_features, dtype=dtype, label_dtype=label_dtype)
-  return label, sparse_tensor.SparseTensor(indices, values, shape)
+  return sparse_tensor.SparseTensor(indices, values, shape), labels
 
 
 ops.NotDifferentiable('DecodeLibSVM')
-- 
GitLab


From 224cf1c94edbf484aaaf3986312331d3ce6787b8 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Fri, 15 Dec 2017 10:39:03 -0800
Subject: [PATCH 1075/1225] [XLA:CPU] Use LLVM's TargetTransformInfo in
 TargetMachineFeatures

I'll add more uses of TargetMachineFeatures is subsequence CLs.

PiperOrigin-RevId: 179211454
---
 tensorflow/compiler/xla/service/cpu/BUILD     |  1 +
 .../compiler/xla/service/cpu/ir_emitter.cc    | 52 +++++++------------
 .../compiler/xla/service/cpu/ir_emitter.h     | 27 ++++------
 3 files changed, 30 insertions(+), 50 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index b43597dca9..b0c959f40b 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -277,6 +277,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:ops",
         "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
         "//tensorflow/core:lib",
+        "@llvm//:analysis",
         "@llvm//:code_gen",
         "@llvm//:core",
         "@llvm//:support",
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index a15baf7a4b..e23eb88e97 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -1509,13 +1509,9 @@ IrEmitter::ReductionGenerator IrEmitter::MatchReductionGenerator(
 
 IrEmitter::ShardedVectorType IrEmitter::CreateShardedVectorType(
     PrimitiveType element_type, unsigned element_count) {
-  // Here we assume that the largest register is a vector register.
-  int max_vector_register_size_in_bytes =
-      target_machine_features_.largest_register_size_in_bytes(
-          compute_function_->function());
-
   int vector_register_size_in_elements =
-      max_vector_register_size_in_bytes /
+      target_machine_features_.vector_register_byte_size(
+          *compute_function_->function()) /
       ShapeUtil::ByteSizeOfPrimitiveType(element_type);
 
   ShardedVectorType sharded_vector_type;
@@ -3042,36 +3038,26 @@ StatusOr<llvm::Value*> IrEmitter::EmitScalarCall(
                                  argument_addrs, name);
 }
 
-unsigned TargetMachineFeatures::largest_register_size_in_bytes(
-    llvm::Function* function) {
-  auto itr = largest_register_size_in_bytes_.find(function);
-  if (itr != largest_register_size_in_bytes_.end()) {
-    return itr->second;
+llvm::TargetTransformInfo* TargetMachineFeatures::GetTargetTransformInfoFor(
+    const llvm::Function& function) {
+  auto it = target_transform_infos_.find(&function);
+  if (it == target_transform_infos_.end()) {
+    // Using a dummy function analysis manager is kind of hacky, but LLVM's
+    // TargetTransformInfoWrapperPass::getTTI does the same thing.
+    //
+    // TODO(sanjoy): Fix this within LLVM by directly exposing
+    // TargetTransformInfo factories from TargetMachine.
+    llvm::FunctionAnalysisManager DummyFAM;
+    llvm::TargetTransformInfo target_transform_info =
+        target_machine_->getTargetIRAnalysis().run(function, DummyFAM);
+    auto emplace_result = target_transform_infos_.emplace(
+        &function, std::move(target_transform_info));
+    CHECK(emplace_result.second);
+    it = emplace_result.first;
   }
 
-  int result = largest_register_size_in_bytes_impl(function);
-
-  InsertOrDie(&largest_register_size_in_bytes_, function, result);
-  DCHECK_EQ(result, largest_register_size_in_bytes_.begin()->second);
-  return result;
+  return &it->second;
 }
 
-unsigned TargetMachineFeatures::largest_register_size_in_bytes_impl(
-    llvm::Function* function) const {
-  auto register_info =
-      target_machine_->getSubtargetImpl(*function)->getRegisterInfo();
-
-  unsigned largest_register_size = 0;
-  for (const llvm::TargetRegisterClass* register_class :
-       register_info->regclasses()) {
-    if (register_class->isAllocatable()) {
-      largest_register_size =
-          std::max(largest_register_size,
-                   register_info->getRegSizeInBits(*register_class));
-    }
-  }
-
-  return largest_register_size / 8;
-}
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 9bc2d97397..2341e3ea72 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include <vector>
 
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
@@ -54,15 +55,6 @@ namespace cpu {
 
 // Wraps an llvm::TargetMachine and parses out some information that feeds into
 // code LLVM IR generation decisions.
-//
-// Ideally we'd be able to use llvm::TargetTransformInfo here (since its
-// interface is pretty much a perfect fit for our use case), but obtaining an
-// instance of llvm::TargetTransformInfo outside an LLVM pass pipeline without
-// super-ugly hacks is difficult.
-//
-// TODO(b/66049221): See if the LLVM community will be receptive to exposing an
-// API that lets us directly create and use llvm::TargetTransformInfo instances
-// outside of a pass manager.
 class TargetMachineFeatures {
  public:
   TargetMachineFeatures(llvm::TargetMachine* target_machine)
@@ -77,20 +69,21 @@ class TargetMachineFeatures {
     return 128;
   }
 
-  // Return the size of the largest register size in bytes.  We need to pass in
+  // Return the size of the largest vector size in bytes.  We need to pass in
   // "function" since llvm functions can contain annotations for specializing
   // them to specific micro-architectures (though currently XLA does not use
   // this functionality).
-  //
-  // Ideally we should have been able to use
-  // llvm::TargetTransformInfo::getRegisterBitWidth(true) here.
-  unsigned largest_register_size_in_bytes(llvm::Function* function);
+  int vector_register_byte_size(const llvm::Function& function) {
+    llvm::TargetTransformInfo* tti = GetTargetTransformInfoFor(function);
+    return tti->getRegisterBitWidth(/*Vector=*/true) / 8;
+  }
 
  private:
-  unsigned largest_register_size_in_bytes_impl(llvm::Function* function) const;
+  llvm::TargetTransformInfo* GetTargetTransformInfoFor(
+      const llvm::Function& function);
 
-  tensorflow::gtl::FlatMap<llvm::Function*, int>
-      largest_register_size_in_bytes_;
+  tensorflow::gtl::FlatMap<const llvm::Function*, llvm::TargetTransformInfo>
+      target_transform_infos_;
   llvm::TargetMachine* target_machine_;
 };
 
-- 
GitLab


From e355290854e0c9dedd878741cfb2740a72059a27 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 10:52:15 -0800
Subject: [PATCH 1076/1225] Remove Stream::BlockHostUntilDoneWithStatus; all
 callers use BlockHostUntilDone.

PiperOrigin-RevId: 179213341
---
 tensorflow/stream_executor/stream.cc | 4 ----
 tensorflow/stream_executor/stream.h  | 5 -----
 2 files changed, 9 deletions(-)

diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 0512f4c79a..e92ed14779 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -5086,9 +5086,5 @@ port::Status Stream::BlockHostUntilDone() {
   return first_error;
 }
 
-port::Status Stream::BlockHostUntilDoneWithStatus() {
-  return BlockHostUntilDone();
-}
-
 }  // namespace gputools
 }  // namespace perftools
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index 4c34452048..37828d9882 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -1907,11 +1907,6 @@ class Stream {
   // Otherwise returns an error describing why the blocking failed.
   port::Status BlockHostUntilDone() LOCKS_EXCLUDED(mu_);
 
-  // DEPRECATED(b/70298427) - new code should use BlockHostUntilDone()
-  //
-  // Equivalent to BlockHostUntilDone()
-  port::Status BlockHostUntilDoneWithStatus() LOCKS_EXCLUDED(mu_);
-
   // Warning! This method interacts with internal threads in
   // sometimes-unpredictable ways and is intended for GPU-Executor-internal
   // use
-- 
GitLab


From 4952f981be07b8bf508f8226f83c10cdafa3f0c4 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 15 Dec 2017 13:02:54 -0600
Subject: [PATCH 1077/1225] Replace loop iteration with `chip` (#15289)

In unique_op.cc, the ouput tensor was generated
through loop iteration. It seems that this could be
improved through Eigen's `chip`.

The fix addresses this improvement.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/unique_op.cc | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc
index d087784c8a..782470210f 100644
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@@ -133,11 +133,7 @@ class UniqueOp : public OpKernel {
     auto Tout = output->shaped<T, 3>(new_sizes);
 
     for (auto it : uniq) {
-      for (int64 i = 0; i < Tin.dimension(0); i++) {
-        for (int64 j = 0; j < Tin.dimension(2); j++) {
-          Tout(i, it.second, j) = Tin(i, it.first, j);
-        }
-      }
+      Tout.chip(it.second, 1) = Tin.chip(it.first, 1);
     }
 
     if (num_outputs() > 2) {
-- 
GitLab


From 04b5890cbdf6161c6d02db95d3365fac9cbfea05 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Fri, 15 Dec 2017 11:07:48 -0800
Subject: [PATCH 1078/1225] Delete empty api_guides.

PiperOrigin-RevId: 179215745
---
 tensorflow/contrib/copy_graph/__init__.py           |  2 --
 .../api_guides/python/contrib.bayesflow.entropy.md  |  2 --
 .../python/contrib.bayesflow.stochastic_graph.md    |  2 --
 .../python/contrib.bayesflow.stochastic_tensor.md   |  3 ---
 .../contrib.bayesflow.variational_inference.md      |  4 ----
 .../api_guides/python/contrib.copy_graph.md         |  4 ----
 .../docs_src/api_guides/python/contrib.opt.md       |  4 ----
 .../docs_src/api_guides/python/histogram_ops.md     |  6 ------
 tensorflow/docs_src/api_guides/python/script_ops.md | 13 -------------
 9 files changed, 40 deletions(-)
 delete mode 100644 tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md
 delete mode 100644 tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md
 delete mode 100644 tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_tensor.md
 delete mode 100644 tensorflow/docs_src/api_guides/python/contrib.bayesflow.variational_inference.md
 delete mode 100644 tensorflow/docs_src/api_guides/python/contrib.copy_graph.md
 delete mode 100644 tensorflow/docs_src/api_guides/python/contrib.opt.md
 delete mode 100644 tensorflow/docs_src/api_guides/python/histogram_ops.md
 delete mode 100644 tensorflow/docs_src/api_guides/python/script_ops.md

diff --git a/tensorflow/contrib/copy_graph/__init__.py b/tensorflow/contrib/copy_graph/__init__.py
index 30a0aac140..61ee39e4be 100644
--- a/tensorflow/contrib/copy_graph/__init__.py
+++ b/tensorflow/contrib/copy_graph/__init__.py
@@ -13,8 +13,6 @@
 # limitations under the License.
 # ==============================================================================
 """Functions to copy elements between graphs.
-
-See the @{$python/contrib.copy_graph} guide.
 """
 
 from __future__ import absolute_import
diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md
deleted file mode 100644
index b59be5471f..0000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# BayesFlow Entropy (contrib)
-
diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md
deleted file mode 100644
index b6e5502ec4..0000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# BayesFlow Stochastic Graph (contrib)
-
diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_tensor.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_tensor.md
deleted file mode 100644
index 1cc1ac5d7e..0000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_tensor.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# BayesFlow Stochastic Tensors (contrib)
-[TOC]
-
diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.variational_inference.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.variational_inference.md
deleted file mode 100644
index 8f08c09c8f..0000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.variational_inference.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# BayesFlow Variational Inference (contrib)
-[TOC]
-
-Variational inference.
diff --git a/tensorflow/docs_src/api_guides/python/contrib.copy_graph.md b/tensorflow/docs_src/api_guides/python/contrib.copy_graph.md
deleted file mode 100644
index f61f4c764d..0000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.copy_graph.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copying Graph Elements (contrib)
-[TOC]
-
-Functions for copying elements from one graph to another.
diff --git a/tensorflow/docs_src/api_guides/python/contrib.opt.md b/tensorflow/docs_src/api_guides/python/contrib.opt.md
deleted file mode 100644
index 944a80a5cc..0000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.opt.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Optimization (contrib)
-[TOC]
-
-opt: A module containing optimization routines.
diff --git a/tensorflow/docs_src/api_guides/python/histogram_ops.md b/tensorflow/docs_src/api_guides/python/histogram_ops.md
deleted file mode 100644
index dbd4555429..0000000000
--- a/tensorflow/docs_src/api_guides/python/histogram_ops.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# Histograms
-[TOC]
-
-## Histograms
-
-*   @{tf.histogram_fixed_width}
diff --git a/tensorflow/docs_src/api_guides/python/script_ops.md b/tensorflow/docs_src/api_guides/python/script_ops.md
deleted file mode 100644
index ab49a570c1..0000000000
--- a/tensorflow/docs_src/api_guides/python/script_ops.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Wraps python functions
-
-Note: Functions taking `Tensor` arguments can also take anything accepted by
-@{tf.convert_to_tensor}.
-
-[TOC]
-
-## Script Language Operators
-
-TensorFlow provides allows you to wrap python/numpy functions as
-TensorFlow operators.
-
-*   @{tf.py_func}
-- 
GitLab


From 9393d604ebd63664a27d28617f95fa5f60495270 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 11:21:13 -0800
Subject: [PATCH 1079/1225] internal change

PiperOrigin-RevId: 179217499
---
 .../internal/optimized/optimized_ops.h        | 37 +++++++++++++++++++
 .../internal/reference/reference_ops.h        | 34 +++++++++++++++++
 .../contrib/lite/toco/export_tensorflow.cc    | 23 +++++++++++-
 .../propagate_array_data_types.cc             |  5 +++
 .../propagate_fixed_sizes.cc                  | 28 ++++++++++++++
 .../contrib/lite/toco/import_tensorflow.cc    | 19 ++++++++++
 tensorflow/contrib/lite/toco/model.h          | 12 ++++++
 tensorflow/contrib/lite/toco/tooling_util.cc  |  3 ++
 8 files changed, 160 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index cd565c16a1..2df919e579 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -3704,6 +3704,43 @@ void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims,
   auto max_value = input2_data[0];
   output_map.array() = input1_map.array().max(max_value);
 }
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims,
+            T2* output_data, const Dims<4>& output_dims) {
+  gemmlowp::ScopedProfilingLabel label("ArgMax");
+
+  // The current ArgMax implemention can only determine the index of the maximum
+  // value in the last dimension. So the axis argument is ignored.
+  TFLITE_DCHECK_EQ(axis[0], 3);
+
+  // For ArgMax, the number of output dimensions = (number of input dimensions -
+  // 1). For the sake of simplicity, the output dimensions are equal to the
+  // input dimensions here. We enforce the constraint that the last dimension
+  // must always be 1.
+  TFLITE_DCHECK_EQ(ArraySize(output_dims, 0), 1);
+  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+  const int height = MatchingArraySize(input_dims, 2, output_dims, 2);
+  const int width = MatchingArraySize(input_dims, 1, output_dims, 1);
+  const int depth = ArraySize(input_dims, 0);
+  for (int b = 0; b < batches; ++b) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        auto max_value = input_data[Offset(input_dims, 0, x, y, b)];
+        int max_index = 0;
+        for (int d = 1; d < depth; ++d) {
+          const auto& curr_value = input_data[Offset(input_dims, d, x, y, b)];
+          if (curr_value > max_value) {
+            max_value = curr_value;
+            max_index = d;
+          }
+        }
+        output_data[Offset(output_dims, 0, x, y, b)] = max_index;
+      }
+    }
+  }
+}
+
 }  // namespace optimized_ops
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index b9ca3d5c62..14c4302587 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -2449,6 +2449,40 @@ void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims,
   }
 }
 
+template <typename T1, typename T2, typename T3>
+void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims,
+            T2* output_data, const Dims<4>& output_dims) {
+  // The current ArgMax implemention can only determine the index of the maximum
+  // value in the last dimension. So the axis argument is ignored.
+  TFLITE_DCHECK_EQ(axis[0], 3);
+
+  // For ArgMax, the number of output dimensions = (number of input dimensions -
+  // 1). For the sake of simplicity, the output dimensions are equal to the
+  // input dimensions here. We enforce the constraint that the last dimension
+  // must always be 1.
+  TFLITE_DCHECK_EQ(ArraySize(output_dims, 0), 1);
+  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+  const int height = MatchingArraySize(input_dims, 2, output_dims, 2);
+  const int width = MatchingArraySize(input_dims, 1, output_dims, 1);
+  const int depth = ArraySize(input_dims, 0);
+  for (int b = 0; b < batches; ++b) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        auto max_value = input_data[Offset(input_dims, 0, x, y, b)];
+        int max_index = 0;
+        for (int d = 1; d < depth; ++d) {
+          const auto& curr_value = input_data[Offset(input_dims, d, x, y, b)];
+          if (curr_value > max_value) {
+            max_value = curr_value;
+            max_index = d;
+          }
+        }
+        output_data[Offset(output_dims, 0, x, y, b)] = max_index;
+      }
+    }
+  }
+}
+
 }  // namespace reference_ops
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc
index bddb83206b..51d76e44a0 100644
--- a/tensorflow/contrib/lite/toco/export_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc
@@ -899,13 +899,15 @@ tensorflow::DataType GetTensorFlowDataType(const Model& model,
                                            const string& array_name) {
   auto& dtype = model.GetArray(array_name).data_type;
   CHECK(dtype == ArrayDataType::kFloat || dtype == ArrayDataType::kInt32 ||
-        dtype == ArrayDataType::kUint8);
+        dtype == ArrayDataType::kUint8 || dtype == ArrayDataType::kInt64);
   if (dtype == ArrayDataType::kFloat) {
     return tensorflow::DT_FLOAT;
   } else if (dtype == ArrayDataType::kInt32) {
     return tensorflow::DT_INT32;
   } else if (dtype == ArrayDataType::kUint8) {
     return tensorflow::DT_UINT8;
+  } else if (dtype == ArrayDataType::kInt64) {
+    return tensorflow::DT_INT64;
   } else {
     LOG(FATAL) << "Wrong data type";
   }
@@ -949,6 +951,22 @@ void ConvertGatherOperator(const Model& model, const GatherOperator& src_op,
   (*gather_op->mutable_attr())["Tparams"].set_type(params_type);
 }
 
+void ConvertArgMaxOperator(const Model& model, const ArgMaxOperator& src_op,
+                           GraphDef* tensorflow_graph) {
+  auto* argmax_op = tensorflow_graph->add_node();
+  argmax_op->set_op("ArgMax");
+  argmax_op->set_name(src_op.outputs[0]);
+  CHECK_EQ(src_op.inputs.size(), 2);
+  *argmax_op->add_input() = src_op.inputs[0];
+  *argmax_op->add_input() = src_op.inputs[1];
+  (*argmax_op->mutable_attr())["T"].set_type(
+      GetTensorFlowDataType(model, src_op.inputs[0]));
+  (*argmax_op->mutable_attr())["Tidx"].set_type(
+      GetTensorFlowDataType(model, src_op.inputs[1]));
+  (*argmax_op->mutable_attr())["output_type"].set_type(
+      GetTensorFlowDataType(model, src_op.outputs[0]));
+}
+
 void ConvertResizeBilinearOperator(const Model& model,
                                    const ResizeBilinearOperator& src_op,
                                    GraphDef* tensorflow_graph) {
@@ -1495,6 +1513,9 @@ void ConvertOperator(const Model& model, const Operator& src_op,
   } else if (src_op.type == OperatorType::kSlice) {
     ConvertSliceOperator(model, static_cast<const SliceOperator&>(src_op),
                          tensorflow_graph);
+  } else if (src_op.type == OperatorType::kArgMax) {
+    ConvertArgMaxOperator(model, static_cast<const ArgMaxOperator&>(src_op),
+                          tensorflow_graph);
   } else {
     LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(src_op.type);
   }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
index 1d92bcbccd..4fe127544b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
@@ -75,6 +75,11 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
     CHECK_EQ(op->outputs.size(), 1);
     auto* cast_op = static_cast<CastOperator*>(op);
     model->arrays[op->outputs[0]]->data_type = cast_op->dst_data_type;
+  } else if (op->type == OperatorType::kArgMax) {
+    // Data type of the ArgMax op is specified.
+    CHECK_EQ(op->outputs.size(), 1);
+    auto* argmax_op = static_cast<ArgMaxOperator*>(op);
+    model->arrays[op->outputs[0]]->data_type = argmax_op->output_data_type;
   } else if (op->type == OperatorType::kTensorFlowUnsupported) {
     auto* unsupported_op = static_cast<TensorFlowUnsupportedOperator*>(op);
     if (unsupported_op->output_data_types.size() != op->outputs.size()) {
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index 5f2fa7e439..0760182a2e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -940,6 +940,31 @@ void ProcessSvdfOperator(Model* model, SvdfOperator* op) {
   auto& output_array = model->GetArray(op->outputs[1]);
   output_array.mutable_shape()->ReplaceDims({batch_size, num_units});
 }
+
+void ProcessArgMaxOperator(Model* model, ArgMaxOperator* op) {
+  CHECK_EQ(op->inputs.size(), 2);
+  const auto& input_array = *model->arrays[op->inputs[0]];
+  // Yield until input dims have been resolved.
+  if (!input_array.has_shape()) {
+    return;
+  }
+
+  const std::vector<int>& input_dims = input_array.shape().dims();
+  std::vector<int> output_dims;
+
+  output_dims.reserve(input_dims.size() - 1);
+  for (int i = 0; i < input_dims.size() - 1; ++i) {
+    output_dims.push_back(input_dims[i]);
+  }
+  output_dims.push_back(1);
+  const string& output_name = op->outputs[0];
+  auto& output_array = *model->arrays[output_name];
+  if (output_array.has_shape()) {
+    return;
+  }
+  *output_array.mutable_shape()->mutable_dims() = output_dims;
+}
+
 }  // namespace
 
 bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
@@ -1117,6 +1142,9 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
       ProcessStridedSliceOperator(model,
                                   static_cast<StridedSliceOperator*>(op));
       break;
+    case OperatorType::kArgMax:
+      ProcessArgMaxOperator(model, static_cast<ArgMaxOperator*>(op));
+      break;
     case OperatorType::kTensorFlowUnsupported:
       break;
     case OperatorType::kSvdf:
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 156b5e1266..59b199bb6b 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -1199,6 +1199,23 @@ void ConvertGatherOperator(const NodeDef& node,
   model->operators.emplace_back(op);
 }
 
+void ConvertArgMaxOperator(const NodeDef& node,
+                           const TensorFlowImportFlags& tf_import_flags,
+                           Model* model) {
+  CHECK_EQ(node.op(), "ArgMax");
+  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  const auto axis_data_type = GetDataTypeAttr(node, "Tidx");
+  const auto output_type = GetDataTypeAttr(node, "output_type");
+  CHECK(axis_data_type == DT_INT64 || axis_data_type == DT_INT32);
+  CHECK(output_type == DT_INT64 || output_type == DT_INT32);
+  auto* op = new ArgMaxOperator;
+  op->output_data_type = ConvertDataType(output_type);
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
 void ConvertResizeBilinearOperator(const NodeDef& node,
                                    const TensorFlowImportFlags& tf_import_flags,
                                    Model* model) {
@@ -1856,6 +1873,8 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
       ConvertStackOperator(node, tf_import_flags, model);
     } else if (node.op() == "Transpose") {
       ConvertTransposeOperator(node, tf_import_flags, model);
+    } else if (node.op() == "ArgMax") {
+      ConvertArgMaxOperator(node, tf_import_flags, model);
     } else {
       ConvertUnsupportedOperator(node, tf_import_flags, model);
     }
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 7305f858da..253b163649 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -75,6 +75,7 @@ enum class OperatorType {
   kSlice,
   kSqueeze,
   kMean,
+  kArgMax,
   // The SVDF Op is a decomposition of a densely connected Op into
   // low rank filters. For details:
   // https://research.google.com/pubs/pub43813.html
@@ -1223,6 +1224,17 @@ struct GatherOperator : Operator {
   int input_rank = 0;
 };
 
+// ArgMax operator. It returns the index of the maximum value along axis.
+//
+// Inputs:
+//   inputs[0]: required: the input tensor
+//
+// TensorFlow equivalent: ArgMax
+struct ArgMaxOperator : Operator {
+  ArgMaxOperator() : Operator(OperatorType::kArgMax) {}
+  ArrayDataType output_data_type = ArrayDataType::kInt64;
+};
+
 // ResizeBilinear operator. It resizes input images with bilinear interpolation.
 // It does not support align_corners at the moment.
 //
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index d8824015ee..381168d15a 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -268,6 +268,7 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(BatchToSpaceND)
     HANDLE_OPERATORTYPENAME_CASE(Mean)
     HANDLE_OPERATORTYPENAME_CASE(Svdf)
+    HANDLE_OPERATORTYPENAME_CASE(ArgMax)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowUnsupported)
     default:
       LOG(FATAL) << "Unhandled op type";
@@ -1250,6 +1251,8 @@ int ElementSize(ArrayDataType data_type) {
       return 4;
     case ArrayDataType::kUint8:
       return 1;
+    case ArrayDataType::kInt64:
+      return 8;
     default:
       LOG(FATAL) << "Should not get here.";
       return 0;
-- 
GitLab


From af5a45260eb9393195dd8c02de7a258300e3ea90 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Fri, 15 Dec 2017 11:32:44 -0800
Subject: [PATCH 1080/1225] Enable C API in Python TF_C_API_GRAPH_CONSTRUCTION
 environment variable is set.

The variable can be set to 0 to disable the C API (or not set at all).
I manually verified that _USE_C_API is set as expected.

PiperOrigin-RevId: 179219013
---
 tensorflow/python/framework/ops.py | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 169fa36161..29825bdda6 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import collections
 import copy
 import linecache
+import os
 import re
 import sys
 import threading
@@ -55,22 +56,12 @@ from tensorflow.python.util import compat
 from tensorflow.python.util import decorator_utils
 from tensorflow.python.util import tf_contextlib
 
+
 # Temporary global switch determining if we should enable the work-in-progress
 # calls to the C API. Currently disabled by default but can be manually enabled
-# e.g. in tests. This will be removed once all functionality is supported and
-# there's no performance penalty with it enabled.
-#
-# TODO(skyewm) before we can remove this:
-# - functions
-# - import_graph_def() incrementally adds inputs to ops (i.e. creates an
-#   Operation and then calls _add_input()). The current code requires that all
-#   inputs be specified when creating the Operation (since we call
-#   TF_FinishOperation()).
-# - ops_test.py (and others?) create unregistered op types
-# - while loop
-# - performance (e.g. delete/refactor redundant Python functionality, switch to
-#   new session API)
-_USE_C_API = False
+# in code or via the environment variable. This will be removed once all
+# functionality is supported and there's no performance penalty with it enabled.
+_USE_C_API = os.getenv("TF_C_API_GRAPH_CONSTRUCTION", "0") is not "0"
 
 
 def tensor_id(tensor):
-- 
GitLab


From 8d3690c5649fb6dac481e15eda365e73aeaab84a Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 15 Dec 2017 11:39:54 -0800
Subject: [PATCH 1081/1225] Plug an eager memory leak, add tests for reference
 counts.

There are still some slightly less serious leaks. Will follow up with a fix once I track those down.

PiperOrigin-RevId: 179220052
---
 tensorflow/c/eager/tape.h                     |  5 ++
 tensorflow/python/BUILD                       | 15 ++++-
 tensorflow/python/eager/backprop_test.py      | 30 +++++++++
 tensorflow/python/framework/test_util.py      | 64 ++++++++++++++++++-
 tensorflow/python/framework/test_util_test.py | 20 ++++++
 5 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 20ed037c52..17c9c8cc9a 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -530,6 +530,11 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
       if (!persistent_) {
         vspace.ReleaseBackwardFunction(trace.backward_function);
       }
+      for (Gradient* grad : out_gradients) {
+        if (grad != nullptr) {
+          vspace.DeleteGradient(grad);
+        }
+      }
     }
     VLOG(1) << "Got " << in_gradients.size() << " in_gradients for "
             << trace.input_tensor_id.size() << " sources";
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 45383eda99..80f3ec6681 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -800,15 +800,23 @@ py_library(
     srcs = ["framework/test_util.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":array_ops",
         ":client",
         ":errors",
-        ":framework",
         ":framework_for_generated_wrappers",
         ":platform",
         ":platform_test",
         ":pywrap_tensorflow",
+        ":random_seed",
+        ":resource_variable_ops",
+        ":session",
         ":training",
         ":util",
+        ":variables",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/eager:backprop",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:tape",
         "//third_party/py/numpy",
         "@six_archive//:six",
     ],
@@ -1215,6 +1223,11 @@ py_test(
         ":framework_test_lib",
         ":platform_test",
         ":random_ops",
+        ":resource_variable_ops",
+        ":session",
+        ":variables",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 90c0e47ff9..7c44d55467 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import gradients
@@ -151,6 +152,7 @@ class BackpropTest(test.TestCase):
     opt.apply_gradients([(grad, embedding)])
     self.assertAllClose(expected, embedding.read_value())
 
+  @test_util.assert_no_new_tensors
   def testGradientNone(self):
 
     def loss(x, l):
@@ -165,6 +167,7 @@ class BackpropTest(test.TestCase):
     g, = backprop.gradients_function(loss, [0])(logits, labels)
     self.assertAllEqual(g.numpy(), [[-0.5, 0.5]])
 
+  @test_util.assert_no_new_tensors
   def testSecondGrad(self):
 
     def first(x):
@@ -181,6 +184,7 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(second, [0])(f)[0]
     self.assertAllEqual([[0.0]], grad)
 
+  @test_util.assert_no_new_tensors
   def testMakeVJP(self):
 
     def f(x):
@@ -191,6 +195,7 @@ class BackpropTest(test.TestCase):
     self.assertAllEqual(result, 9.0)
     self.assertAllEqual(vjp(2.0)[0], 12.0)
 
+  @test_util.assert_no_new_tensors
   def testGradGrad(self):
 
     def sq(x):
@@ -204,6 +209,7 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(gradgrad(constant_op.constant(3.0))[0], 2.0)
 
+  @test_util.assert_no_new_tensors
   def testGradGradExp(self):
 
     def grad(x):
@@ -214,11 +220,13 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(gradgrad(constant_op.constant(0.0))[0], 1.0)
 
+  @test_util.assert_no_new_tensors
   def testStopGradient(self):
     grad = backprop.gradients_function(
         lambda x: array_ops.stop_gradient(math_ops.argmax(x)))
     self.assertAllEqual(grad([0.0])[0], None)
 
+  @test_util.assert_no_new_tensors
   def testArgmax(self):
     def argmax(x):
       i = math_ops.argmax(x)
@@ -227,6 +235,7 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(argmax)
     self.assertAllEqual(grad([0.0])[0], None)
 
+  @test_util.assert_no_new_tensors
   def testGPU(self):
     if not context.context().num_gpus():
       self.skipTest('No GPUs found')
@@ -242,6 +251,8 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0]
     self.assertAllEqual(grad, 1.0)
 
+  # TODO(b/70675592): Fix leaked Tensors in this test.
+  # @test_util.assert_no_new_tensors
   def testGPUImplicitGrad(self):
     if not context.context().num_gpus():
       self.skipTest('No GPU found')
@@ -257,6 +268,7 @@ class BackpropTest(test.TestCase):
     self.assertEqual(
         backprop.implicit_grad(f)()[0][0].cpu().numpy(), 1.0)
 
+  @test_util.assert_no_new_tensors
   def testCPU(self):
 
     def fn(x):
@@ -267,6 +279,7 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0]
     self.assertAllEqual(grad, 1.0)
 
+  @test_util.assert_no_new_tensors
   def testTensorCopyGPU2CPU2GPU(self):
     if not context.context().num_gpus():
       self.skipTest('No GPUs found')
@@ -281,6 +294,7 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(f, [0])(a, b)[0]
     self.assertAllEqual(grad, 1.0)
 
+  @test_util.assert_no_new_tensors
   def testEmptyParams(self):
 
     def fn(a, b):
@@ -292,6 +306,7 @@ class BackpropTest(test.TestCase):
     self.assertAllEqual(dx, y.numpy())
     self.assertAllEqual(dy, x.numpy())
 
+  @test_util.assert_no_new_tensors
   def testUnconnectedNone(self):
     v = resource_variable_ops.ResourceVariable(
         1.0, name='testUnconnectedNone')
@@ -302,6 +317,7 @@ class BackpropTest(test.TestCase):
 
     self.assertEqual(backprop.implicit_grad(f)()[0][0], None)
 
+  @test_util.assert_no_new_tensors
   def testGradientTape(self):
     with backprop.GradientTape() as g:
       x = constant_op.constant(3.0)
@@ -316,6 +332,7 @@ class BackpropTest(test.TestCase):
     grad = g.gradient(y, [x])[0]
     self.assertEqual(grad.numpy(), 6.0)
 
+  @test_util.assert_no_new_tensors
   def testGradientTapeGradientCalledMultipleTimes(self):
     with backprop.GradientTape() as g:
       x = constant_op.constant(3.0)
@@ -327,6 +344,7 @@ class BackpropTest(test.TestCase):
         RuntimeError, 'GradientTape.gradient can only be called once'):
       g.gradient(y, [x])
 
+  @test_util.assert_no_new_tensors
   def testPersistentTape(self):
     with backprop.GradientTape(persistent=True) as g:
       x = constant_op.constant(3.0)
@@ -339,6 +357,7 @@ class BackpropTest(test.TestCase):
     self.assertEqual(dy_dx.numpy(), 2*3)
     del g
 
+  @test_util.assert_no_new_tensors
   def testPersistentNestedTape(self):
     with backprop.GradientTape(persistent=True) as g:
       x = constant_op.constant(3.0)
@@ -358,6 +377,8 @@ class BackpropTest(test.TestCase):
     self.assertEqual(grad.numpy(), 12.0)
     del g
 
+  # TODO(b/70675592): Fix leaked Tensors in this test.
+  # @test_util.assert_no_new_tensors
   def testGradientTapeVariable(self):
     v = resource_variable_ops.ResourceVariable(1.0, name='v')
     with backprop.GradientTape() as g:
@@ -365,6 +386,7 @@ class BackpropTest(test.TestCase):
     grad = g.gradient(y, [v])[0]
     self.assertAllEqual(grad, 2.0)
 
+  @test_util.assert_no_new_tensors
   def testEmptyParamsForValueAndGradFunction(self):
     def fn(a, b):
       return a * b
@@ -377,6 +399,7 @@ class BackpropTest(test.TestCase):
     self.assertAllEqual(dx, y)
     self.assertAllEqual(dy, x)
 
+  @test_util.assert_no_new_tensors
   def testNonEmptyParamsForValueAndGradFunction(self):
     def fn(a, b):
       return a * b
@@ -389,6 +412,7 @@ class BackpropTest(test.TestCase):
     self.assertEqual(1, len(grads))
     self.assertAllEqual(grads[0], x)
 
+  @test_util.assert_no_new_tensors
   def testTensorCopyCPU2GPU2CPU(self):
     if not context.context().num_gpus():
       self.skipTest('No GPUs found')
@@ -473,6 +497,7 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(backprop.gradients_function(f)(1.0)[0], 3.0)
 
+  @test_util.assert_no_new_tensors
   def testExceptionSafety(self):
 
     def f(unused_x):
@@ -488,6 +513,8 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(backprop.gradients_function(real_f)(1.0)[0], 2.0)
 
+  # TODO(b/70675592): Fix leaked Tensors in this test.
+  # @test_util.assert_no_new_tensors
   def testMultiValueConvertToTensor(self):
     x = resource_variable_ops.ResourceVariable(
         initial_value=array_ops.constant([1.0]), name='x')
@@ -548,6 +575,7 @@ class BackpropTest(test.TestCase):
         initial_value=1., name='testSameObjectForMultipleArguments.Variable')
     self.assertAllEqual([1., 1.], np_g(v, v))
 
+  @test_util.assert_no_new_tensors
   def testImplicitGradientsCustomGradientAndCachedVariableValue(self):
 
     @custom_gradient.custom_gradient
@@ -573,6 +601,7 @@ class BackpropTest(test.TestCase):
     self.assertAllEqual(7, grad)
     self.assertAllEqual(x, var)
 
+  @test_util.assert_no_new_tensors
   def testCustomGradient(self):
 
     @custom_gradient.custom_gradient
@@ -599,6 +628,7 @@ class BackpropTest(test.TestCase):
         var.assign_sub(lr*grad)
     self.assertAllEqual(losses, [4.0, 3., 2., 1., 0.])
 
+  @test_util.assert_no_new_tensors
   def testCustomGradientIdentity(self):
 
     @custom_gradient.custom_gradient
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 8875d45a07..7627fb3e69 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -47,6 +47,7 @@ from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.client import device_lib
 from tensorflow.python.client import session
+from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import tape
 from tensorflow.python.framework import device as pydev
@@ -57,6 +58,7 @@ from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import versions
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
@@ -455,6 +457,62 @@ class IsolateTest(object):
         type_arg, value_arg, traceback_arg)
 
 
+def assert_no_new_tensors(f):
+  """Decorator for asserting that no new Tensors persist after a test.
+
+  Mainly useful for checking that code using the Python C API has correctly
+  manipulated reference counts.
+
+  Clears the caches that it knows about, runs the garbage collector, then checks
+  that there are no Tensor or Tensor-like objects still around. This includes
+  Tensors to which something still has a reference (e.g. from missing
+  Py_DECREFs) and uncollectable cycles (i.e. Python reference cycles where one
+  of the objects has __del__ defined).
+
+  Args:
+    f: The test case to run.
+  Returns:
+    The decorated test case.
+  """
+
+  def decorator(self, **kwargs):
+    """Finds existing Tensors, runs the test, checks for new Tensors."""
+
+    def _is_tensor(obj):
+      try:
+        return (isinstance(obj, ops.Tensor) or
+                isinstance(obj, variables.Variable))
+      except ReferenceError:
+        # If the object no longer exists, we don't care about it.
+        return False
+
+    tensors_before = set(id(obj) for obj in gc.get_objects() if _is_tensor(obj))
+    outside_container_prefix = ops.get_default_graph()._container_prefix
+    with IsolateTest():
+      # Run the test in a new graph so that collections get cleared when it's
+      # done, but inherit the container prefix so that we can print the values
+      # of variables which get leaked when executing eagerly.
+      ops.get_default_graph()._container_prefix = outside_container_prefix
+      f(self, **kwargs)
+    # Make an effort to clear caches, which would otherwise look like leaked
+    # Tensors.
+    backprop._last_zero = [None]
+    backprop._shape_dtype = [None, None]
+    context.get_default_context().scalar_cache().clear()
+    gc.collect()
+    tensors_after = [
+        obj for obj in gc.get_objects()
+        if _is_tensor(obj) and id(obj) not in tensors_before
+    ]
+    if tensors_after:
+      raise AssertionError(("%d Tensors not deallocated after test: %s" % (
+          len(tensors_after),
+          str(tensors_after),
+      )))
+
+  return decorator
+
+
 def assert_no_garbage_created(f):
   """Test method decorator to assert that no garbage has been created.
 
@@ -509,7 +567,8 @@ def run_in_graph_and_eager_modes(
       garbage for legitimate reasons (e.g. they define a class which inherits
       from `object`), and because DEBUG_SAVEALL is sticky in some Python
       interpreters (meaning that tests which rely on objects being collected
-      elsewhere in the unit test file will not work).
+      elsewhere in the unit test file will not work). Additionally, checks that
+      nothing still has a reference to Tensors that the test allocated.
   Returns:
     Returns a decorator that will run the decorated test function
         using both a graph and using eager execution.
@@ -546,7 +605,8 @@ def run_in_graph_and_eager_modes(
             f(self, **kwargs)
 
       if assert_no_eager_garbage:
-        run_eager_mode = assert_no_garbage_created(run_eager_mode)
+        run_eager_mode = assert_no_new_tensors(
+            assert_no_garbage_created(run_eager_mode))
 
       with context.eager_mode():
         with IsolateTest():
diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py
index 90b5290626..f6aed118ca 100644
--- a/tensorflow/python/framework/test_util_test.py
+++ b/tensorflow/python/framework/test_util_test.py
@@ -373,6 +373,26 @@ class GarbageCollectionTest(test_util.TensorFlowTestCase):
 
     ReferenceCycleTest().test_has_no_cycle()
 
+  def test_no_leaked_tensor_decorator(self):
+
+    class LeakedTensorTest(object):
+
+      def __init__(inner_self):  # pylint: disable=no-self-argument
+        inner_self.assertEqual = self.assertEqual  # pylint: disable=invalid-name
+
+      @test_util.assert_no_new_tensors
+      def test_has_leak(self):
+        self.a = constant_op.constant([3.])
+
+      @test_util.assert_no_new_tensors
+      def test_has_no_leak(self):
+        constant_op.constant([3.])
+
+    with self.assertRaisesRegexp(AssertionError, "Tensors not deallocated"):
+      LeakedTensorTest().test_has_leak()
+
+    LeakedTensorTest().test_has_no_leak()
+
 
 @test_util.with_c_api
 class IsolationTest(test_util.TensorFlowTestCase):
-- 
GitLab


From faf7f05f5ed3d92405656a318fb2d571a7d31532 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Sat, 16 Dec 2017 03:54:37 +0800
Subject: [PATCH 1082/1225] GPU: Add Complex kernel for tf.exp() (#15182)

* TST: complex case
---
 tensorflow/core/kernels/cwise_op_exp.cc       |  3 ++-
 .../core/kernels/cwise_op_gpu_exp.cu.cc       |  2 +-
 tensorflow/core/kernels/cwise_ops.h           | 19 +++++++++++++++++++
 .../python/kernel_tests/cwise_ops_test.py     |  4 ++--
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc
index 9d4d654427..66d7b7d22e 100644
--- a/tensorflow/core/kernels/cwise_op_exp.cc
+++ b/tensorflow/core/kernels/cwise_op_exp.cc
@@ -20,7 +20,8 @@ REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double,
           complex64, complex128);
 
 #if GOOGLE_CUDA
-REGISTER3(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double);
+REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double,
+          complex64, complex128);
 #endif
 
 #if TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc
index 0f492917bd..417e5da758 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc
@@ -19,7 +19,7 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_UNARY3(exp, Eigen::half, float, double);
+DEFINE_UNARY5(exp, Eigen::half, float, double, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index d32185b6bf..062487b8c3 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -26,6 +26,25 @@ limitations under the License.
 #include "tensorflow/core/kernels/bounds_check.h"
 
 namespace Eigen {
+namespace numext {
+#if GOOGLE_CUDA
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+std::complex<float> exp(const std::complex<float> &x) {
+  auto com = ::expf(x.real());
+  auto res_real = com * ::cosf(x.imag());
+  auto res_imag = com * ::sinf(x.imag());
+  return std::complex<float>(res_real, res_imag);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+std::complex<double> exp(const std::complex<double> &x) {
+  auto com = ::exp(x.real());
+  auto res_real = com * ::cos(x.imag());
+  auto res_imag = com * ::sin(x.imag());
+  return std::complex<double>(res_real, res_imag);
+}
+#endif
+}
+
 namespace internal {
 
 // TODO(rmlarsen): Get rid of fmod2 once fmod is upstreamed to Eigen.
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index e0c53950e6..cea12ea8ec 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -416,7 +416,7 @@ class UnaryOpTest(test.TestCase):
     self._compareCpu(x, np.square, math_ops.square)
     self._compareCpu(y, np.sqrt, math_ops.sqrt)
     self._compareCpu(y, self._rsqrt, math_ops.rsqrt)
-    self._compareCpu(x, np.exp, math_ops.exp)
+    self._compareBoth(x, np.exp, math_ops.exp)
     self._compareCpu(x, np.expm1, math_ops.expm1)
     self._compareCpu(y, np.log, math_ops.log)
     self._compareCpu(y, np.log1p, math_ops.log1p)
@@ -460,7 +460,7 @@ class UnaryOpTest(test.TestCase):
     self._compareCpu(x, np.square, math_ops.square)
     self._compareCpu(y, np.sqrt, math_ops.sqrt)
     self._compareCpu(y, self._rsqrt, math_ops.rsqrt)
-    self._compareCpu(x, np.exp, math_ops.exp)
+    self._compareBoth(x, np.exp, math_ops.exp)
     self._compareCpu(x, np.expm1, math_ops.expm1)
     self._compareCpu(y, np.log, math_ops.log)
     self._compareCpu(y, np.log1p, math_ops.log1p)
-- 
GitLab


From fbb5392a65ebaeca19f95cb13fca9166bb5ba3ce Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 11:52:01 -0800
Subject: [PATCH 1083/1225] Merged commit includes the following changes:
 179221620  by akshayka:

    Internal cleanup: Delete extraneous print statement in test case.

--
179220917  by A. Unique TensorFlower:

    [XLA:JF] Make HLO parser recognize negative padding.

--

PiperOrigin-RevId: 179221620
---
 tensorflow/compiler/xla/BUILD                       |  6 ++++++
 tensorflow/compiler/xla/python/BUILD                |  4 +++-
 .../compiler/xla/python/local_computation_builder.i |  2 +-
 tensorflow/compiler/xla/python/xla_client_test.py   |  6 +++---
 tensorflow/compiler/xla/tools/parser/hlo_lexer.cc   |  5 +++--
 .../compiler/xla/tools/parser/hlo_parser_test.cc    | 13 +++++++++++++
 tensorflow/python/framework/ops_test.py             |  1 -
 7 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index cd69c69889..be0dd0bc82 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -641,6 +641,12 @@ filegroup(
     visibility = ["//tensorflow:__subpackages__"],
 )
 
+py_proto_library(
+    name = "xla_data_proto_py_pb2",
+    api_version = 2,
+    deps = [":xla_data_proto"],
+)
+
 # This is a headers target that extra XLA devices can use to prevent circular dependencies.  Devices that are compiled as separate shared objects can also use it to prevent linking of library code.
 cc_header_only_library(
     name = "xla_headers_lib",
diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD
index a6b8158671..7734e55967 100644
--- a/tensorflow/compiler/xla/python/BUILD
+++ b/tensorflow/compiler/xla/python/BUILD
@@ -11,7 +11,7 @@ py_library(
     visibility = ["//visibility:public"],
     deps = [
         ":pywrap_xla",
-        "//tensorflow/compiler/xla:xla_data_proto_py",
+        "//tensorflow/compiler/xla:xla_data_proto_py_pb2",
     ],
 )
 
@@ -23,6 +23,7 @@ py_test(
     deps = [
         ":xla_client",
         "//tensorflow/python:platform_test",
+        "//third_party/py/numpy",
     ],
 )
 
@@ -51,6 +52,7 @@ cc_library(
         "//tensorflow/compiler/xla/client:local_client",
         "//tensorflow/compiler/xla/service:cpu_plugin",
         "//tensorflow/core:lib",
+        "//tensorflow/stream_executor/host:host_platform",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i
index ac8f3e4277..678de3e762 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.i
+++ b/tensorflow/compiler/xla/python/local_computation_builder.i
@@ -106,7 +106,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/compiler/xla/xla_data.proto.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/compiler/xla/python/numpy_bridge.h"
 #include "tensorflow/compiler/xla/python/local_computation_builder.h"
diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
index 878cd83edc..cf71212fdb 100644
--- a/tensorflow/compiler/xla/python/xla_client_test.py
+++ b/tensorflow/compiler/xla/python/xla_client_test.py
@@ -23,10 +23,10 @@ import itertools
 import numpy as np
 
 from tensorflow.compiler.xla.python import xla_client
-import unittest
+from tensorflow.python.platform import googletest
 
 
-class LocalComputationTest(unittest.TestCase):
+class LocalComputationTest(googletest.TestCase):
   """Base class for running an XLA Computation through the local client."""
 
   def _NewComputation(self, name=None):
@@ -895,4 +895,4 @@ class EmbeddedComputationsTest(LocalComputationTest):
 
 
 if __name__ == "__main__":
-  unittest.main()
+  googletest.main()
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
index 459d511e90..6d1e4173d2 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
@@ -257,7 +257,8 @@ TokKind HloLexer::LexPercent() {
 // fp without exp ::= [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+)
 // dim_labels_pattern ::= [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
 // dxd_pattern ::= [0-9]+(x[0-9]+)+
-// pad_pattern ::= [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*
+// pad_pattern ::=
+//   [-]?[0-9]+_[-]?[0-9]+(_[0-9]+)?(x[-]?[0-9]+_[-]?[0-9]+(_[0-9]+)?)*
 // int ::=  [-]?[0-9]+
 // negative inf ::= '-inf'
 TokKind HloLexer::LexNumberOrPattern() {
@@ -275,7 +276,7 @@ TokKind HloLexer::LexNumberOrPattern() {
       R"([0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,})"};
   static LazyRE2 dxd_pattern = {R"([0-9]+(x[0-9]+)+)"};
   static LazyRE2 pad_pattern = {
-      R"([0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*)"};
+      R"([-]?[0-9]+_[-]?[0-9]+(_[0-9]+)?(x[-]?[0-9]+_[-]?[0-9]+(_[0-9]+)?)*)"};
 
   if (RE2::Consume(&consumable, *dim_labels_pattern)) {
     current_ptr_ = consumable.begin();
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 8b6b855218..74a0e35839 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -606,6 +606,19 @@ ENTRY %PadHasInterior.v3 (input: f32[1,25,7,7]) -> f32[1,25,17,11] {
   ROOT %pad = f32[1,25,17,11]{3,2,1,0} pad(f32[1,25,7,7]{3,2,1,0} %input, f32[] %constant), padding=0_0_0x0_0_0x2_2_1x2_2_0
 }
 
+)"
+},
+// Negative padding
+{
+"PadHasNegativePadding",
+R"(HloModule PadHasNegativePadding_module
+
+ENTRY %PadHasNegativePadding (input: f32[1,25,7,7,10]) -> f32[1,15,6,3,29] {
+  %input = f32[1,25,7,7,10]{4,3,2,1,0} parameter(0)
+  %constant = f32[] constant(-5.123)
+  ROOT %pad = f32[1,15,6,3,29]{4,3,2,1,0} pad(f32[1,25,7,7,10]{4,3,2,1,0} %input, f32[] %constant), padding=0_0_0x0_-10_0x0_-1_0x-2_-2_0x-1_-1_3
+}
+
 )"
 },
 // fusion
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 92d42c1807..e327e22f30 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -2182,7 +2182,6 @@ class AttrScopeTest(test_util.TensorFlowTestCase):
       b = compat.as_text(x.get_attr("_B"))
     except ValueError:
       b = None
-    print(a, b)
     return (a, b)
 
   def testNoLabel(self):
-- 
GitLab


From f962b77042b6fb207d18d00fd9ef9aa838e14a3d Mon Sep 17 00:00:00 2001
From: Akshay Agrawal <akshayka@google.com>
Date: Fri, 15 Dec 2017 12:10:18 -0800
Subject: [PATCH 1084/1225] Capture tensors that do not trigger
 convert_to_tensor in defun

Returning a closed-over Tensor does not trigger a call to convert_to_tensor,
so we need to manually coerce such Tensors to graph tensors and capture them.

PiperOrigin-RevId: 179224063
---
 tensorflow/compiler/xla/BUILD                 |  6 -----
 tensorflow/compiler/xla/python/BUILD          |  4 +---
 .../xla/python/local_computation_builder.i    |  2 +-
 .../compiler/xla/python/xla_client_test.py    |  6 ++---
 tensorflow/python/eager/function.py           | 23 ++++++++++++++++---
 tensorflow/python/eager/function_test.py      | 19 +++++++++++++++
 6 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index be0dd0bc82..cd69c69889 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -641,12 +641,6 @@ filegroup(
     visibility = ["//tensorflow:__subpackages__"],
 )
 
-py_proto_library(
-    name = "xla_data_proto_py_pb2",
-    api_version = 2,
-    deps = [":xla_data_proto"],
-)
-
 # This is a headers target that extra XLA devices can use to prevent circular dependencies.  Devices that are compiled as separate shared objects can also use it to prevent linking of library code.
 cc_header_only_library(
     name = "xla_headers_lib",
diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD
index 7734e55967..a6b8158671 100644
--- a/tensorflow/compiler/xla/python/BUILD
+++ b/tensorflow/compiler/xla/python/BUILD
@@ -11,7 +11,7 @@ py_library(
     visibility = ["//visibility:public"],
     deps = [
         ":pywrap_xla",
-        "//tensorflow/compiler/xla:xla_data_proto_py_pb2",
+        "//tensorflow/compiler/xla:xla_data_proto_py",
     ],
 )
 
@@ -23,7 +23,6 @@ py_test(
     deps = [
         ":xla_client",
         "//tensorflow/python:platform_test",
-        "//third_party/py/numpy",
     ],
 )
 
@@ -52,7 +51,6 @@ cc_library(
         "//tensorflow/compiler/xla/client:local_client",
         "//tensorflow/compiler/xla/service:cpu_plugin",
         "//tensorflow/core:lib",
-        "//tensorflow/stream_executor/host:host_platform",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i
index 678de3e762..ac8f3e4277 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.i
+++ b/tensorflow/compiler/xla/python/local_computation_builder.i
@@ -106,7 +106,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/xla_data.proto.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/compiler/xla/python/numpy_bridge.h"
 #include "tensorflow/compiler/xla/python/local_computation_builder.h"
diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
index cf71212fdb..878cd83edc 100644
--- a/tensorflow/compiler/xla/python/xla_client_test.py
+++ b/tensorflow/compiler/xla/python/xla_client_test.py
@@ -23,10 +23,10 @@ import itertools
 import numpy as np
 
 from tensorflow.compiler.xla.python import xla_client
-from tensorflow.python.platform import googletest
+import unittest
 
 
-class LocalComputationTest(googletest.TestCase):
+class LocalComputationTest(unittest.TestCase):
   """Base class for running an XLA Computation through the local client."""
 
   def _NewComputation(self, name=None):
@@ -895,4 +895,4 @@ class EmbeddedComputationsTest(LocalComputationTest):
 
 
 if __name__ == "__main__":
-  googletest.main()
+  unittest.main()
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 239216243a..b068d5e584 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -91,13 +91,24 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False):
     is not enabled. A placeholder which will have the value of the
     tensor at runtime otherwise.
   """
+  del as_ref  # Unused.
+
   if context.in_eager_mode():
     return value
-  _ = as_ref
+
+  default_graph = ops.get_default_graph()
+  if not default_graph.building_function:
+    return value
+
   tensor_map = _scoped_captures.tensors
   if tensor_map is None:
     # Capturing is not enabled.
     return constant_op.constant(value.numpy())
+  if type(value) == ops.Tensor and value.graph is default_graph:
+    # The tensor has already been converted and captured. The type check
+    # is intentional: we are checking that value is a Tensor and not an
+    # EagerTensor.
+    return value
   return capture_value(tensor_map, value, dtype, name)
 
 
@@ -499,20 +510,26 @@ def _defun_internal(name, func, args, kwds):
           func_outputs = func(*func_inputs, **kwds)
         finally:
           variables = tape.pop_tape().watched_variables()
+
+        # Returning a closed-over tensor as an output does not trigger a
+        # call to convert_to_tensor, so we manually capture all such tensors.
+        outputs_list = nest.flatten(func_outputs)
+        func_def_outputs = [
+            _convert_to_graph_tensor(x) for x in outputs_list if x is not None
+        ]
+
       ids = list(sorted(captures.keys()))
       if ids:
         extra_inputs, extra_placeholders = zip(* [captures[x] for x in ids])
       else:
         extra_inputs = []
         extra_placeholders = []
-      outputs_list = nest.flatten(func_outputs)
       output_shapes = tuple(x.shape for x in outputs_list if x is not None)
 
   flat_inputs = [x for x in nest.flatten(func_inputs)
                  if isinstance(x, ops.Tensor)]
   all_inputs = flat_inputs + list(extra_placeholders)
   all_ignored_ops = frozenset(x.op for x in all_inputs)
-  func_def_outputs = [x for x in outputs_list if x is not None]
   fname = _inference_name(name)
   operations = tuple(x for x in tmp_graph.get_operations()
                      if x not in all_ignored_ops)
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index aee2a91a0e..7018027386 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -323,6 +323,25 @@ class FunctionTest(test.TestCase):
 
     self.assertEqual(1, int(outer()))
 
+  def testReturnCapturedEagerTensor(self):
+    t = constant_op.constant(1)
+
+    @function.defun
+    def read():
+      return t
+
+    self.assertEqual(1, int(read()))
+
+  def testReturnCapturedGraphTensor(self):
+    with context.graph_mode(), self.test_session():
+      t = constant_op.constant(1)
+
+      @function.defun
+      def read():
+        return t
+
+      self.assertEqual(1, int(self.evaluate(read())))
+
   def testSequenceInputs(self):
     clip_by_global_norm = function.defun(clip_ops.clip_by_global_norm)
     t_list = [constant_op.constant(1.0), constant_op.constant(2.0)]
-- 
GitLab


From ed24130f90c2c45db0473df3e9158d4895ce326b Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Fri, 15 Dec 2017 12:23:55 -0800
Subject: [PATCH 1085/1225] [tf.data] Saveable iterator for
 LatencyStatsDataset.

PiperOrigin-RevId: 179225632
---
 .../kernel_tests/stats_dataset_ops_test.py    | 28 ++++++++++++++
 .../core/kernels/data/stats_dataset_ops.cc    | 37 ++++++++++++++++---
 2 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
index 2b04b278ba..07bdf92044 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
@@ -224,6 +224,34 @@ class StatsDatasetSerializationTest(
         lambda: self._build_dataset_bytes_stats(num_outputs),
         lambda: self._build_dataset_bytes_stats(num_outputs // 10), num_outputs)
 
+  def _build_dataset_latency_stats(self, num_elements, tag="record_latency"):
+    return dataset_ops.Dataset.range(num_elements).apply(
+        stats_ops.latency_stats(tag))
+
+  def _build_dataset_multiple_tags(self,
+                                   num_elements,
+                                   tag1="record_latency",
+                                   tag2="record_latency_2"):
+    return dataset_ops.Dataset.range(num_elements).apply(
+        stats_ops.latency_stats(tag1)).apply(stats_ops.latency_stats(tag2))
+
+  def testLatencyStatsDatasetSaveableCore(self):
+    num_outputs = 100
+
+    self.run_core_tests(
+        lambda: self._build_dataset_latency_stats(num_outputs),
+        lambda: self._build_dataset_latency_stats(num_outputs // 10),
+        num_outputs)
+
+    self.run_core_tests(lambda: self._build_dataset_multiple_tags(num_outputs),
+                        None, num_outputs)
+
+    tag1 = "record_latency"
+    tag2 = "record_latency"
+    self.run_core_tests(
+        lambda: self._build_dataset_multiple_tags(num_outputs, tag1, tag2),
+        None, num_outputs)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/core/kernels/data/stats_dataset_ops.cc b/tensorflow/core/kernels/data/stats_dataset_ops.cc
index 09704d4b25..8742e6c55f 100644
--- a/tensorflow/core/kernels/data/stats_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/stats_dataset_ops.cc
@@ -43,14 +43,14 @@ class LatencyStatsDatasetOp : public UnaryDatasetOpKernel {
                    DatasetBase** output) override {
     string tag;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag));
-    *output = new Dataset(input, std::move(tag));
+    *output = new Dataset(ctx, input, std::move(tag));
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    explicit Dataset(const DatasetBase* input, string tag)
-        : input_(input), tag_(std::move(tag)) {
+    explicit Dataset(OpKernelContext* ctx, const DatasetBase* input, string tag)
+        : GraphDatasetBase(ctx), input_(input), tag_(std::move(tag)) {
       input_->Ref();
     }
 
@@ -71,6 +71,17 @@ class LatencyStatsDatasetOp : public UnaryDatasetOpKernel {
 
     string DebugString() override { return "LatencyStatsDatasetOp::Dataset"; }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_node;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_node));
+      Node* tag_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(tag_, &tag_node));
+      TF_RETURN_IF_ERROR(b->AddDataset(this, {input_node, tag_node}, output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -81,6 +92,7 @@ class LatencyStatsDatasetOp : public UnaryDatasetOpKernel {
       Status GetNextInternal(IteratorContext* ctx,
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
+        tf_shared_lock l(mu_);
         uint64 start = ctx->env()->NowMicros();
         Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
         uint64 end = ctx->env()->NowMicros();
@@ -92,8 +104,23 @@ class LatencyStatsDatasetOp : public UnaryDatasetOpKernel {
         return s;
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(OpKernelContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        return Status::OK();
+      }
+
      private:
-      const std::unique_ptr<IteratorBase> input_impl_;
+      mutex mu_;
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
-- 
GitLab


From 22828e18071a682fcccf12f79d75eb6d57144dba Mon Sep 17 00:00:00 2001
From: Bjarke Hammersholt Roune <broune@google.com>
Date: Fri, 15 Dec 2017 12:29:06 -0800
Subject: [PATCH 1086/1225] Resolves an issue that came up with ResNet50 when
 using the CPU backend as a reference.

 * The CPU now handles window reversal on convolutions instead of ignoring it.
 * The GPU backend now reports an error on window reversal instead of ignoring it.

PiperOrigin-RevId: 179226118
---
 tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc  | 3 +++
 tensorflow/compiler/xla/service/cpu/ir_emitter.cc         | 8 +++++++-
 .../compiler/xla/service/gpu/convolution_folding.cc       | 8 ++++++++
 tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc  | 4 ++++
 tensorflow/compiler/xla/tests/test_utils.cc               | 1 +
 5 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
index 3993779da6..788217aab6 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
@@ -44,6 +44,9 @@ bool PotentiallyImplementedAsEigenConvolution(
       ShapeUtil::ElementIsComplex(kernel_shape)) {
     return false;
   }
+  if (window_util::HasWindowReversal(convolution.window())) {
+    return false;
+  }
 
   const ConvolutionDimensionNumbers& dnums =
       convolution.convolution_dimension_numbers();
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index e23eb88e97..4bf3e22751 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -1111,8 +1111,14 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
         llvm_ir::IrArray kernel_array(GetIrArrayFor(rhs));
         llvm_ir::IrArray::Index kernel_index(num_dims);
         for (int i = 0; i < num_spatial_dims; ++i) {
-          kernel_index[dnums.kernel_spatial_dimensions(i)] = kernel_spatial[i];
+          kernel_index[dnums.kernel_spatial_dimensions(i)] =
+              window.dimensions(i).window_reversal()
+                  ? ir_builder_.CreateNSWSub(
+                        ir_builder_.getInt64(window.dimensions(i).size() - 1),
+                        kernel_spatial[i])
+                  : kernel_spatial[i];
         }
+
         kernel_index[dnums.kernel_input_feature_dimension()] = input_feature;
         kernel_index[dnums.kernel_output_feature_dimension()] = output_feature;
 
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
index f198c4c08e..b0626ca3bc 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
@@ -87,6 +87,10 @@ MatchBackwardFilter(HloInstruction* conv) {
       VLOG(1) << "Padding low should be non-negative.";
       return no_match_result;
     }
+    if (window_dim.window_reversal()) {
+      VLOG(1) << "Window reversal field not supported";
+      return no_match_result;
+    }
     // Padding high will be checked in Step 3.
   }
   if (input_batch_dim == output_batch_dim &&
@@ -246,6 +250,10 @@ MatchBackwardInput(HloInstruction* conv) {
               << " should have no window dilation.";
       return no_match_result;
     }
+    if (window_dim.window_reversal()) {
+      VLOG(1) << "Window reversal field not supported";
+      return no_match_result;
+    }
   }
 
   const auto& input_spatial_dims = dnums.input_spatial_dimensions();
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
index 658fd05cd4..c04a7e0bf8 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
@@ -110,6 +110,10 @@ bool ImplementedAsDnnConvolution(const HloInstruction& hlo) {
       return false;
     }
 
+    if (window_util::HasWindowReversal(hlo.window())) {
+      return false;
+    }
+
     return true;
   }
 
diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc
index 56859542a9..f9c62ec217 100644
--- a/tensorflow/compiler/xla/tests/test_utils.cc
+++ b/tensorflow/compiler/xla/tests/test_utils.cc
@@ -25,6 +25,7 @@ namespace {
 
 template <typename FloatT>
 void PopulateWithRandomFloatingPointData(Literal* literal) {
+  // TODO(b/69179121): Generate data that is less self-similar.
   CHECK_EQ(literal->shape().element_type(),
            primitive_util::NativeToPrimitiveType<FloatT>());
   std::minstd_rand0 engine;
-- 
GitLab


From d52113a3b0bb3cc5e52e8fdbc0b2c98675b37d8c Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 15 Dec 2017 12:59:10 -0800
Subject: [PATCH 1087/1225] Support permute 2d tensors.

PiperOrigin-RevId: 179229453
---
 .../api_def_DataFormatVecPermute.pbtxt        |  6 +--
 tensorflow/core/kernels/data_format_ops.cc    | 46 ++++++++++++-------
 tensorflow/core/kernels/data_format_ops.h     | 24 +++++++---
 tensorflow/core/ops/nn_ops.cc                 |  6 +--
 tensorflow/python/ops/nn_test.py              | 17 +++++++
 5 files changed, 71 insertions(+), 28 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt b/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
index c2fa61aaed..d41f4df304 100644
--- a/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
@@ -3,13 +3,13 @@ op {
   in_arg {
     name: "x"
     description: <<END
-Vector in source data format. Must be of size 4.
+Vector of size 4 or Tensor of shape (2, 4) in source data format.
 END
   }
   out_arg {
     name: "y"
     description: <<END
-Vector in destination data format. Must be of size 4.
+Vector of size 4 or Tensor of shape (2, 4) in destination data format.
 END
   }
   attr {
@@ -24,7 +24,7 @@ END
 destination data format.
 END
   }
-  summary: "Returns the permuted vector in the destination data format given the one in"
+  summary: "Returns the permuted vector/tensor in the destination data format given the one in"
   description: <<END
 the source data format.
 END
diff --git a/tensorflow/core/kernels/data_format_ops.cc b/tensorflow/core/kernels/data_format_ops.cc
index a6ac119002..7a0b44dfe7 100644
--- a/tensorflow/core/kernels/data_format_ops.cc
+++ b/tensorflow/core/kernels/data_format_ops.cc
@@ -84,20 +84,34 @@ class DataFormatVecPermuteOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    OP_REQUIRES(
-        context, input.dims() == 1,
-        errors::InvalidArgument("input must be a vector, but got shape ",
-                                input.shape().DebugString()));
-    OP_REQUIRES(
-        context, input.NumElements() == 4,
-        errors::InvalidArgument("input must be of size 4, but got shape ",
-                                input.shape().DebugString()));
+    OP_REQUIRES(context, input.dims() == 1 || input.dims() == 2,
+                errors::InvalidArgument(
+                    "input must be a vector or 2D tensor, but got shape ",
+                    input.shape().DebugString()));
+    if (input.dims() == 1) {
+      OP_REQUIRES(
+          context, input.NumElements() == 4,
+          errors::InvalidArgument("1D input must be of size 4, but got shape ",
+                                  input.shape().DebugString()));
+    } else if (input.dims() == 2) {
+      OP_REQUIRES(
+          context, input.dim_size(0) == 2,
+          errors::InvalidArgument(
+              "First dimension of 2D input must be of size 2, but got shape ",
+              input.shape().DebugString()));
+      OP_REQUIRES(
+          context, input.dim_size(1) == 4,
+          errors::InvalidArgument(
+              "Second dimension of 2D input must be of size 4, but got shape ",
+              input.shape().DebugString()));
+    }
+
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
-    functor::DataFormatVecPermute<Device, T>()(context->eigen_device<Device>(),
-                                               input.vec<T>(), output->vec<T>(),
-                                               nhwc_to_nchw_);
+    functor::DataFormatVecPermute<Device, T>()(
+        context->eigen_device<Device>(), input.flat<T>(), output->flat<T>(),
+        nhwc_to_nchw_);
   }
 
  private:
@@ -134,11 +148,11 @@ TF_CALL_int32(DECLARE_GPU_SPECS);
 TF_CALL_int64(DECLARE_GPU_SPECS);
 #undef DECLARE_GPU_SPEC
 
-#define DECLARE_GPU_SPEC(T)                               \
-  template <>                                             \
-  void DataFormatVecPermute<GPUDevice, T>::operator()(    \
-      const GPUDevice& d, typename TTypes<T>::ConstVec x, \
-      typename TTypes<T>::Vec y, bool nhwc_to_nchw);      \
+#define DECLARE_GPU_SPEC(T)                                \
+  template <>                                              \
+  void DataFormatVecPermute<GPUDevice, T>::operator()(     \
+      const GPUDevice& d, typename TTypes<T>::ConstFlat x, \
+      typename TTypes<T>::Vec y, bool nhwc_to_nchw);       \
   extern template struct DataFormatVecPermute<GPUDevice, T>;
 #define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
 TF_CALL_int32(DECLARE_GPU_SPECS);
diff --git a/tensorflow/core/kernels/data_format_ops.h b/tensorflow/core/kernels/data_format_ops.h
index 6a25823c73..d69f0326a4 100644
--- a/tensorflow/core/kernels/data_format_ops.h
+++ b/tensorflow/core/kernels/data_format_ops.h
@@ -42,44 +42,56 @@ struct DataFormatDimMap {
 template <typename T>
 struct VecPermuteNHWCToNCHW {
   Eigen::DSizes<Eigen::DenseIndex, 1> dimensions(
-      typename TTypes<T>::ConstVec input) const {
+      typename TTypes<T>::ConstFlat input) const {
     Eigen::DSizes<Eigen::DenseIndex, 1> result;
     result[0] = input.dimension(0);
     return result;
   }
   template <typename Output, typename Device>
-  void eval(typename TTypes<T>::ConstVec input, Output& output,
+  void eval(typename TTypes<T>::ConstFlat input, Output& output,
             const Device& d) const {
     output.template chip<0>(0).device(d) = input.template chip<0>(0);
     output.template chip<0>(1).device(d) = input.template chip<0>(3);
     output.template chip<0>(2).device(d) = input.template chip<0>(1);
     output.template chip<0>(3).device(d) = input.template chip<0>(2);
+    if (input.size() == 8) {
+      output.template chip<0>(4).device(d) = input.template chip<0>(4);
+      output.template chip<0>(5).device(d) = input.template chip<0>(7);
+      output.template chip<0>(6).device(d) = input.template chip<0>(5);
+      output.template chip<0>(7).device(d) = input.template chip<0>(6);
+    }
   }
 };
 
 template <typename T>
 struct VecPermuteNCHWToNHWC {
   Eigen::DSizes<Eigen::DenseIndex, 1> dimensions(
-      typename TTypes<T>::ConstVec input) const {
+      typename TTypes<T>::ConstFlat input) const {
     Eigen::DSizes<Eigen::DenseIndex, 1> result;
     result[0] = input.dimension(0);
     return result;
   }
   template <typename Output, typename Device>
-  void eval(typename TTypes<T>::ConstVec input, Output& output,
+  void eval(typename TTypes<T>::ConstFlat input, Output& output,
             const Device& d) const {
     output.template chip<0>(0).device(d) = input.template chip<0>(0);
     output.template chip<0>(1).device(d) = input.template chip<0>(2);
     output.template chip<0>(2).device(d) = input.template chip<0>(3);
     output.template chip<0>(3).device(d) = input.template chip<0>(1);
+    if (input.size() == 8) {
+      output.template chip<0>(4).device(d) = input.template chip<0>(4);
+      output.template chip<0>(5).device(d) = input.template chip<0>(6);
+      output.template chip<0>(6).device(d) = input.template chip<0>(7);
+      output.template chip<0>(7).device(d) = input.template chip<0>(5);
+    }
   }
 };
 
 // Functor used by DataFormatVecPermuteOp to do the computations.
 template <typename Device, typename T>
 struct DataFormatVecPermute {
-  void operator()(const Device& d, typename TTypes<T>::ConstVec x,
-                  typename TTypes<T>::Vec y, bool nhwc_to_nchw) {
+  void operator()(const Device& d, typename TTypes<T>::ConstFlat x,
+                  typename TTypes<T>::Flat y, bool nhwc_to_nchw) {
     if (nhwc_to_nchw) {
       y.device(d) = x.customOp(VecPermuteNHWCToNCHW<T>());
     } else {
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 8c31be0c0d..980d0c31a3 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -776,11 +776,11 @@ REGISTER_OP("DataFormatVecPermute")
     .Attr("dst_format: string = 'NCHW'")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
-Returns the permuted vector in the destination data format given the one in
+Returns the permuted vector/tensor in the destination data format given the one in
 the source data format.
 
-x: Vector in source data format. Must be of size 4.
-y: Vector in destination data format. Must be of size 4.
+x: Vector of size 4 or Tensor of shape (2, 4) in source data format.
+y: Vector of size 4 or Tensor of shape (2, 4) in destination data format.
 src_format: source data format.
 dst_format: destination data format.
 )doc");
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index 38b8430996..b87aef6585 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -991,6 +991,23 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [7, 9, 3, 4])
 
+  def testNCHWToNHWC2D(self):
+    x_val = [[7, 4, 9, 3], [4, 3, 5, 1]]
+    x = constant_op.constant(x_val)
+    y = nn_ops.data_format_vec_permute(x)
+    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+      y_val = sess.run(y)
+      print(y_val)
+      self.assertAllEqual(y_val, [[7, 3, 4, 9], [4, 1, 3, 5]])
+
+  def testNHWCToNCHW2D(self):
+    x_val = [[7, 4, 9, 3], [4, 3, 5, 1]]
+    x = constant_op.constant(x_val)
+    y = nn_ops.data_format_vec_permute(x, src_format="NCHW", dst_format="NHWC")
+    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+      y_val = sess.run(y)
+      self.assertAllEqual(y_val, [[7, 9, 3, 4], [4, 5, 1, 3]])
+
 
 if __name__ == "__main__":
   test_lib.main()
-- 
GitLab


From 83b3350baf9c9fc7e733c40df44f2e3f11e23d93 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Fri, 15 Dec 2017 13:10:11 -0800
Subject: [PATCH 1088/1225] [TF Eager] Non-public API for registering TF
 functions with the graph.

We have tfe.defun, which registers and calls the function (possibly
with caching).  But for ops that accept a function as an attribute,
we need to register/attach the function based on an input signature,
and query the output signature, but not actually create an op containing
the function call.

PiperOrigin-RevId: 179230747
---
 tensorflow/python/eager/function.py      | 90 ++++++++++++++++++++++--
 tensorflow/python/eager/function_test.py | 60 ++++++++++++++++
 2 files changed, 143 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index b068d5e584..d94a7acd09 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -405,6 +405,32 @@ class GraphModeFunction(object):
 
     return self._build_call_outputs(real_outputs)
 
+  @property
+  def output_shapes(self):
+    # TODO(ebrevdo): Should we only keep the output shapes associated
+    # with len(self._returns) outputs?
+    return nest.pack_sequence_as(self._func_outputs, self._output_shapes)
+
+  @property
+  def output_dtypes(self):
+    return nest.map_structure(
+        lambda x: x.dtype if x is not None else None, self._func_outputs)
+
+  @property
+  def captured_inputs(self):
+    return self._extra_inputs
+
+  @property
+  def name(self):
+    return self._function_def.name
+
+  def add_to_graph(self, g):
+    if self._function_def.name not in g._functions:  # pylint: disable=protected-access
+      g._add_function(self._function_def)  # pylint: disable=protected-access
+    for f in self._graph._functions.values():  # pylint: disable=protected-access
+      if f.name not in g._functions:  # pylint: disable=protected-access
+        g._add_function(f)  # pylint: disable=protected-access
+
   def __call__(self, *args):
     """Executes the passed function in eager mode."""
     for v in self._variables:
@@ -422,11 +448,7 @@ class GraphModeFunction(object):
     ctx = context.context()
     if ctx.in_graph_mode():
       g = ops.get_default_graph()
-      if self._function_def.name not in g._functions:  # pylint: disable=protected-access
-        g._add_function(self._function_def)  # pylint: disable=protected-access
-      for f in self._graph._functions.values():  # pylint: disable=protected-access
-        if f.name not in g._functions:  # pylint: disable=protected-access
-          g._add_function(f)  # pylint: disable=protected-access
+      self.add_to_graph(g)
       signature = self._function_def.definition.signature
       args = list(tensor_inputs) + self._extra_inputs
       op = g.create_op(
@@ -524,7 +546,9 @@ def _defun_internal(name, func, args, kwds):
       else:
         extra_inputs = []
         extra_placeholders = []
-      output_shapes = tuple(x.shape for x in outputs_list if x is not None)
+      output_shapes = tuple(
+          x.shape if isinstance(x, ops.Tensor) else None
+          for x in outputs_list)
 
   flat_inputs = [x for x in nest.flatten(func_inputs)
                  if isinstance(x, ops.Tensor)]
@@ -588,7 +612,7 @@ def named_defun(func, name):
     """Decorated version of func."""
     # Macroexpand on non-Tensor arguments
     cache_key = tuple(_cache_key(x) for x in args)
-    if not all(not isinstance(x, ops.EagerTensor) for x in kwds.values()):
+    if any(isinstance(x, ops.EagerTensor) for x in kwds.values()):
       raise ValueError("Tensor keyword arguments are not supported.")
     cache_key = (cache_key, tuple(kwds.items()))
 
@@ -652,3 +676,55 @@ def defun(func):
   """
   # TODO(apassos): deal with captured global state. Deal with control flow.
   return tf_decorator.make_decorator(func, named_defun(func, func.__name__))
+
+
+def make_defun_op(func, *args, **kwds):
+  """Compile func into graph_mode, assuming func arguments are *args, **kwargs.
+
+  `make_defun_op` converts a function that constructs a TensorFlow graph into
+  a function object and attaches it to the graph.  The resulting function
+  object can be queried for its properties, and called directly with different
+  inputs to execute.
+
+  More details on use cases and limitations are available in the
+  documentation for `defun`.
+
+  Example:
+  ```python
+  def f(x, y):
+    return tf.reduce_mean(tf.multiply(x ** 2, 3) + y)
+
+  def g(x, y):
+    return tf.reduce_mean(tf.multiply(x ** 2, 3) + y)
+
+  z = tf.constant([[0.0, 0.0]])
+  g_op = make_defun_op(g, z, z)
+
+  assert g_op.output_shapes == tf.TensorShape([])
+  assert g_op.output_types == tf.float32
+
+  x = tf.constant([[2.0, 3.0]])
+  y = tf.constant([[3.0, -2.0]])
+
+  # The plain function and defun-compiled function should return the same value.
+  assert f(x, y).numpy() == g_op(x, y).numpy()
+  ```
+
+  Args:
+    func: function to be compiled.
+    *args: List arguments to pass to `func` when attaching to the graph.
+    **kwds: Keyword arguments to pass to `func` when attaching to the graph.
+
+  Returns:
+     A wrapper object which can be queried for its output properties,
+     and which can be called directly the way a `@defun` wrapped function
+     can.
+
+  Raises:
+    ValueError: if any of the keyword arguments to `func` are `EagerTensor`
+      objects (not yet supported).
+  """
+  name = func.__name__
+  if any(isinstance(x, ops.EagerTensor) for x in kwds.values()):
+    raise ValueError("Tensor keyword arguments are not supported.")
+  return _defun_internal(name, func, args, kwds)
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 7018027386..e3ea35a640 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import function as tf_function
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import math_ops
@@ -68,6 +69,65 @@ class FunctionTest(test.TestCase):
 
     self.assertAllEqual(step(), 2.0)
 
+  def testBasicDefunOpGraphMode(self):
+    matmul = function.defun(math_ops.matmul)
+
+    def sq(a):
+      return matmul(a, a)
+
+    t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+
+    sq_op = function.make_defun_op(sq, t)
+
+    self.assertEqual(sq_op.output_shapes, tensor_shape.TensorShape([2, 2]))
+    out = sq_op(t)
+    self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
+
+  def testNestedOutputDefunOpGraphMode(self):
+    matmul = function.defun(math_ops.matmul)
+
+    def sq(a):
+      return (matmul(a, a), {'b': constant_op.constant(1.0)})
+
+    t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+
+    sq_op = function.make_defun_op(sq, t)
+
+    self.assertEqual(sq_op.output_shapes,
+                     (tensor_shape.TensorShape([2, 2]),
+                      {'b': tensor_shape.TensorShape([])}))
+    self.assertEqual(sq_op.output_dtypes,
+                     (dtypes.float32, {'b': dtypes.float32}))
+    (a, b) = sq_op(t)
+    self.assertAllEqual(a, math_ops.matmul(t, t).numpy())
+    self.assertAllEqual(b['b'].numpy(), 1.0)
+
+  def testDefunOpGraphModeWithGradients(self):
+    v = resource_variable_ops.ResourceVariable(1.0, name='v')
+
+    def step():
+      def inner():
+        return v * v
+
+      return backprop.implicit_grad(inner)()[0][0]
+
+    step_op = function.make_defun_op(step)
+
+    self.assertEqual(step_op.output_dtypes, dtypes.float32)
+    self.assertEqual(step_op.output_shapes, tensor_shape.TensorShape(None))
+    self.assertAllEqual(step_op(), 2.0)
+
+  def testDefunOpGraphModeNoneOutput(self):
+    def fn(unused_a, unused_b):
+      return None
+
+    x = constant_op.constant(1)
+    fn_op = function.make_defun_op(fn, x, x)
+
+    self.assertEqual(fn_op.output_dtypes, None)
+    self.assertEqual(fn_op.output_shapes, None)
+    self.assertAllEqual(fn_op(x, x), None)
+
   def testDefunReadVariable(self):
     v = resource_variable_ops.ResourceVariable(1.0)
 
-- 
GitLab


From 6373db931a8444083d12e7ac08d4d62c859a6c64 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 15 Dec 2017 13:18:52 -0800
Subject: [PATCH 1089/1225] Fix reference counts when watching variables (eager
 tape)

Adds unit test assertions that variables are properly dealloacted once the tape is deleted and there are no remaining Python references.

PiperOrigin-RevId: 179231752
---
 tensorflow/python/eager/backprop_test.py  |  9 +++------
 tensorflow/python/eager/pywrap_tfe_src.cc | 15 ++++++++++++---
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 7c44d55467..3da22d4c34 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -251,8 +251,7 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0]
     self.assertAllEqual(grad, 1.0)
 
-  # TODO(b/70675592): Fix leaked Tensors in this test.
-  # @test_util.assert_no_new_tensors
+  @test_util.assert_no_new_tensors
   def testGPUImplicitGrad(self):
     if not context.context().num_gpus():
       self.skipTest('No GPU found')
@@ -377,8 +376,7 @@ class BackpropTest(test.TestCase):
     self.assertEqual(grad.numpy(), 12.0)
     del g
 
-  # TODO(b/70675592): Fix leaked Tensors in this test.
-  # @test_util.assert_no_new_tensors
+  @test_util.assert_no_new_tensors
   def testGradientTapeVariable(self):
     v = resource_variable_ops.ResourceVariable(1.0, name='v')
     with backprop.GradientTape() as g:
@@ -513,8 +511,7 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(backprop.gradients_function(real_f)(1.0)[0], 2.0)
 
-  # TODO(b/70675592): Fix leaked Tensors in this test.
-  # @test_util.assert_no_new_tensors
+  @test_util.assert_no_new_tensors
   def testMultiValueConvertToTensor(self):
     x = resource_variable_ops.ResourceVariable(
         initial_value=array_ops.constant([1.0]), name='x')
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index b52d71dc6c..3ba81fb3d0 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -472,9 +472,19 @@ class GradientTape
   explicit GradientTape(bool persistent)
       : tensorflow::eager::GradientTape<PyObject, PyObject>(persistent) {}
 
+  virtual ~GradientTape() {
+    for (PyObject* v : watched_variables_) {
+      Py_DECREF(v);
+    }
+  }
+
   void WatchVariable(PyObject* v) {
-    watched_variables_.insert(v);
-    Py_INCREF(v);
+    auto insert_result = watched_variables_.insert(v);
+    if (insert_result.second) {
+      // Only increment the reference count if we aren't already watching this
+      // variable.
+      Py_INCREF(v);
+    }
     PyObject* handle = PyObject_GetAttrString(v, "handle");
     if (handle == nullptr) {
       return;
@@ -722,7 +732,6 @@ PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape) {
   PyObject* result = PySet_New(nullptr);
   for (PyObject* variable : watched_variables) {
     PySet_Add(result, variable);
-    Py_DECREF(variable);
   }
   return result;
 }
-- 
GitLab


From a71e79184b1d55c11ed309ce062a02e0ed5678fd Mon Sep 17 00:00:00 2001
From: Olivia Nordquist <nolivia@google.com>
Date: Fri, 15 Dec 2017 13:20:45 -0800
Subject: [PATCH 1090/1225] When the CAPI is true, the return type of
 Operation.inputs is now _InputList which is the same return type as when CAPI
 is not enabled. Previously Operation.inputs was returning a list of inputs.

PiperOrigin-RevId: 179231950
---
 tensorflow/python/framework/ops.py      | 25 +++++++++++++------------
 tensorflow/python/framework/ops_test.py |  9 +++++++++
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 29825bdda6..721836f025 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -1975,23 +1975,23 @@ class Operation(object):
   class _InputList(object):
     """Immutable input list wrapper."""
 
-    def __init__(self, op):
-      self._op = op
+    def __init__(self, inputs):
+      self._inputs = inputs
 
     def __iter__(self):
-      return iter(self._op._inputs)
+      return iter(self._inputs)
 
     def __len__(self):
-      return len(self._op._inputs)
+      return len(self._inputs)
 
     def __bool__(self):
-      return bool(self._op._inputs)
+      return bool(self._inputs)
 
     # Python 3 wants __bool__, Python 2.7 wants __nonzero__
     __nonzero__ = __bool__
 
     def __getitem__(self, i):
-      return self._op._inputs[i]
+      return self._inputs[i]
 
 # pylint: enable=protected-access
 
@@ -2000,13 +2000,14 @@ class Operation(object):
     """The list of `Tensor` objects representing the data inputs of this op."""
     if self._c_op:
       tf_outputs = c_api.GetOperationInputs(self._c_op)
-      # TODO(skyewm): return Operation._InputList
       # pylint: disable=protected-access
-      return [self.graph._get_tensor_by_tf_output(tf_output)
-              for tf_output in tf_outputs]
+      retval = [
+          self.graph._get_tensor_by_tf_output(tf_output)
+          for tf_output in tf_outputs
+      ]
       # pylint: enable=protected-access
-    else:
-      return Operation._InputList(self)
+      return Operation._InputList(retval)
+    return Operation._InputList(self._inputs)
 
   @property
   def _input_dtypes(self):
@@ -4882,7 +4883,7 @@ def init_scope():
 
     (3) The gradient tape is paused while the scope is active.
   """
-# pylint: enable=g-doc-return-or-yield,line-too-long
+  # pylint: enable=g-doc-return-or-yield,line-too-long
 
   outer_context = None
   if not context.context_stack.stack:
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index e327e22f30..bfaddefc46 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -660,6 +660,15 @@ class OperationTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegexp(ValueError, "must be from the same graph"):
         y * x  # pylint: disable=pointless-statement
 
+  def testInputsAreImmutable(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = test_ops.int_output()
+      op = test_ops.int_input_int_output(x, name="myop").op
+    with self.assertRaisesRegexp(
+        AttributeError, "'_InputList' object has no attribute 'append'"):
+      op.inputs.append(None)
+
 
 @test_util.with_c_api
 class CreateOpTest(test_util.TensorFlowTestCase):
-- 
GitLab


From 7a029658b8e749233d5e0e98997ce1b3b8135520 Mon Sep 17 00:00:00 2001
From: "Joshua V. Dillon" <jvdillon@google.com>
Date: Fri, 15 Dec 2017 13:28:20 -0800
Subject: [PATCH 1091/1225] Make `tf.contrib.distributions` QuadratureCompound
 classes support batch semantics when specifying grid and probs. Change
 default grid to one based on quantiles. Change interface to allow users to
 specify their own quadrature grid and probs.

PiperOrigin-RevId: 179232675
---
 tensorflow/contrib/distributions/__init__.py  |   4 +
 .../kernel_tests/distribution_util_test.py    | 105 ++++
 .../kernel_tests/poisson_lognormal_test.py    | 107 ++--
 .../kernel_tests/vector_diffeomixture_test.py |  54 +-
 .../python/ops/bijectors/softmax_centered.py  |  17 +-
 .../python/ops/poisson_lognormal.py           | 282 +++++++----
 .../distributions/python/ops/test_util.py     |   8 +-
 .../python/ops/vector_diffeomixture.py        | 468 ++++++++++++------
 tensorflow/python/ops/distributions/util.py   |  92 +++-
 9 files changed, 787 insertions(+), 350 deletions(-)

diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 66827179e9..7b401e178f 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -159,6 +159,10 @@ _allowed_symbols = [
     'assign_log_moving_mean_exp',
     'moving_mean_variance',
     'estimator_head_distribution_regression',
+    'quadrature_scheme_softmaxnormal_gauss_hermite',
+    'quadrature_scheme_softmaxnormal_quantiles',
+    'quadrature_scheme_lognormal_gauss_hermite',
+    'quadrature_scheme_lognormal_quantiles',
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
index 2d74aa1f32..a255d4fc89 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
@@ -395,5 +395,110 @@ class MixtureStddevTest(test.TestCase):
     self.assertAllClose(actual_devs, expected_devs)
 
 
+class _PadTest(object):
+
+  def testNegAxisCorrectness(self):
+    x_ = np.float32([[1., 2, 3],
+                     [4, 5, 6]])
+    value_ = np.float32(0.25)
+    count_ = np.int32(2)
+    with self.test_session() as sess:
+      x = array_ops.placeholder_with_default(
+          x_, shape=x_.shape if self.is_static_shape else None)
+      value = (constant_op.constant(value_) if self.is_static_shape
+               else array_ops.placeholder_with_default(value_, shape=None))
+      count = (constant_op.constant(count_) if self.is_static_shape
+               else array_ops.placeholder_with_default(count_, shape=None))
+
+      x0_front = distribution_util.pad(
+          x, axis=-2, value=value, count=count, front=True)
+      x0_back = distribution_util.pad(
+          x, axis=-2, count=count, back=True)
+      x0_both = distribution_util.pad(
+          x, axis=-2, value=value, front=True, back=True)
+
+      if self.is_static_shape:
+        self.assertAllEqual([4, 3], x0_front.shape)
+        self.assertAllEqual([4, 3], x0_back.shape)
+        self.assertAllEqual([4, 3], x0_both.shape)
+
+      [x0_front_, x0_back_, x0_both_] = sess.run([
+          x0_front, x0_back, x0_both])
+
+      self.assertAllClose(
+          np.float32([[value_]*3,
+                      [value_]*3,
+                      [1, 2, 3],
+                      [4, 5, 6]]),
+          x0_front_, atol=0., rtol=1e-6)
+      self.assertAllClose(
+          np.float32([[1, 2, 3],
+                      [4, 5, 6],
+                      [0.]*3,
+                      [0.]*3]),
+          x0_back_, atol=0., rtol=1e-6)
+      self.assertAllClose(
+          np.float32([[value_]*3,
+                      [1, 2, 3],
+                      [4, 5, 6],
+                      [value_]*3]),
+          x0_both_, atol=0., rtol=1e-6)
+
+  def testPosAxisCorrectness(self):
+    x_ = np.float32([[1., 2, 3],
+                     [4, 5, 6]])
+    value_ = np.float32(0.25)
+    count_ = np.int32(2)
+    with self.test_session() as sess:
+      x = array_ops.placeholder_with_default(
+          x_, shape=x_.shape if self.is_static_shape else None)
+      value = (constant_op.constant(value_) if self.is_static_shape
+               else array_ops.placeholder_with_default(value_, shape=None))
+      count = (constant_op.constant(count_) if self.is_static_shape
+               else array_ops.placeholder_with_default(count_, shape=None))
+
+      x1_front = distribution_util.pad(
+          x, axis=1, value=value, count=count, front=True)
+      x1_back = distribution_util.pad(
+          x, axis=1, count=count, back=True)
+      x1_both = distribution_util.pad(
+          x, axis=1, value=value, front=True, back=True)
+
+      if self.is_static_shape:
+        self.assertAllEqual([2, 5], x1_front.shape)
+        self.assertAllEqual([2, 5], x1_back.shape)
+        self.assertAllEqual([2, 5], x1_both.shape)
+
+      [x1_front_, x1_back_, x1_both_] = sess.run([
+          x1_front, x1_back, x1_both])
+
+      self.assertAllClose(
+          np.float32([[value_]*2 + [1, 2, 3],
+                      [value_]*2 + [4, 5, 6]]),
+          x1_front_, atol=0., rtol=1e-6)
+      self.assertAllClose(
+          np.float32([[1, 2, 3] + [0.]*2,
+                      [4, 5, 6] + [0.]*2]),
+          x1_back_, atol=0., rtol=1e-6)
+      self.assertAllClose(
+          np.float32([[value_, 1, 2, 3, value_],
+                      [value_, 4, 5, 6, value_]]),
+          x1_both_, atol=0., rtol=1e-6)
+
+
+class PadStaticTest(_PadTest, test.TestCase):
+
+  @property
+  def is_static_shape(self):
+    return True
+
+
+class PadDynamicTest(_PadTest, test.TestCase):
+
+  @property
+  def is_static_shape(self):
+    return False
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/poisson_lognormal_test.py b/tensorflow/contrib/distributions/python/kernel_tests/poisson_lognormal_test.py
index 3c0147b8cf..1035cb00f7 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/poisson_lognormal_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/poisson_lognormal_test.py
@@ -18,37 +18,40 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
 from tensorflow.contrib.distributions.python.ops import poisson_lognormal
 from tensorflow.contrib.distributions.python.ops import test_util
-from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class PoissonLogNormalQuadratureCompoundTest(
-    test_util.DiscreteScalarDistributionTestHelpers, test.TestCase):
+class _PoissonLogNormalQuadratureCompoundTest(
+    test_util.DiscreteScalarDistributionTestHelpers):
   """Tests the PoissonLogNormalQuadratureCompoundTest distribution."""
 
   def testSampleProbConsistent(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=-2.,
-          scale=1.1,
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              -2.,
+              shape=[] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              1.1,
+              shape=[] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_log_prob(
-          sess.run, pln, rtol=0.1)
+          sess.run, pln, batch_size=1, rtol=0.1)
 
   def testMeanVariance(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=0.,
-          scale=1.,
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              0.,
+              shape=[] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              1.,
+              shape=[] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_mean_variance(
           sess.run, pln, rtol=0.02)
@@ -56,21 +59,27 @@ class PoissonLogNormalQuadratureCompoundTest(
   def testSampleProbConsistentBroadcastScalar(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=[0., -0.5],
-          scale=1.,
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              [0., -0.5],
+              shape=[2] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              1.,
+              shape=[] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_log_prob(
-          sess.run, pln, rtol=0.1, atol=0.01)
+          sess.run, pln, batch_size=2, rtol=0.1, atol=0.01)
 
   def testMeanVarianceBroadcastScalar(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=[0., -0.5],
-          scale=1.,
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              [0., -0.5],
+              shape=[2] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              1.,
+              shape=[] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_mean_variance(
           sess.run, pln, rtol=0.1, atol=0.01)
@@ -78,38 +87,46 @@ class PoissonLogNormalQuadratureCompoundTest(
   def testSampleProbConsistentBroadcastBoth(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=[[0.], [-0.5]],
-          scale=[[1., 0.9]],
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              [[0.], [-0.5]],
+              shape=[2, 1] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              [[1., 0.9]],
+              shape=[1, 2] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_log_prob(
-          sess.run, pln, rtol=0.1, atol=0.08)
+          sess.run, pln, batch_size=4, rtol=0.1, atol=0.08)
 
   def testMeanVarianceBroadcastBoth(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=[[0.], [-0.5]],
-          scale=[[1., 0.9]],
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              [[0.], [-0.5]],
+              shape=[2, 1] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              [[1., 0.9]],
+              shape=[1, 2] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_mean_variance(
           sess.run, pln, rtol=0.1, atol=0.01)
 
-  def testSampleProbConsistentDynamicQuadrature(self):
-    with self.test_session() as sess:
-      qgrid = array_ops.placeholder(dtype=dtypes.float32)
-      qprobs = array_ops.placeholder(dtype=dtypes.float32)
-      g, p = np.polynomial.hermite.hermgauss(deg=10)
-      pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=-2.,
-          scale=1.1,
-          quadrature_grid_and_probs=(g, p),
-          validate_args=True)
-      self.run_test_sample_consistent_log_prob(
-          lambda x: sess.run(x, feed_dict={qgrid: g, qprobs: p}),
-          pln, rtol=0.1)
+
+class PoissonLogNormalQuadratureCompoundStaticShapeTest(
+    _PoissonLogNormalQuadratureCompoundTest, test.TestCase):
+
+  @property
+  def static_shape(self):
+    return True
+
+
+class PoissonLogNormalQuadratureCompoundDynamicShapeTest(
+    _PoissonLogNormalQuadratureCompoundTest, test.TestCase):
+
+  @property
+  def static_shape(self):
+    return False
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py b/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py
index de4a221f7b..d292b04665 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py
@@ -21,9 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import test_util
-from tensorflow.contrib.distributions.python.ops import vector_diffeomixture as vector_diffeomixture_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
+from tensorflow.contrib.distributions.python.ops import vector_diffeomixture as vdm_lib
 from tensorflow.python.ops.distributions import normal as normal_lib
 from tensorflow.python.ops.linalg import linear_operator_diag as linop_diag_lib
 from tensorflow.python.ops.linalg import linear_operator_identity as linop_identity_lib
@@ -37,7 +35,7 @@ class VectorDiffeomixtureTest(
   def testSampleProbConsistentBroadcastMixNoBatch(self):
     with self.test_session() as sess:
       dims = 4
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [1.]],
           mix_scale=[1.],
           distribution=normal_lib.Normal(0., 1.),
@@ -54,18 +52,19 @@ class VectorDiffeomixtureTest(
                   diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       # Ball centered at component0's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=2., center=0., rtol=0.005)
+          sess.run, vdm, radius=2., center=0., rtol=0.015)
       # Larger ball centered at component1's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=4., center=2., rtol=0.005)
+          sess.run, vdm, radius=4., center=2., rtol=0.015)
 
   def testSampleProbConsistentBroadcastMixNonStandardBase(self):
     with self.test_session() as sess:
       dims = 4
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [1.]],
           mix_scale=[1.],
           distribution=normal_lib.Normal(1., 1.5),
@@ -82,18 +81,19 @@ class VectorDiffeomixtureTest(
                   diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       # Ball centered at component0's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=2., center=1., rtol=0.006)
+          sess.run, vdm, radius=2., center=1., rtol=0.015)
       # Larger ball centered at component1's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=4., center=3., rtol=0.009)
+          sess.run, vdm, radius=4., center=3., rtol=0.01)
 
   def testSampleProbConsistentBroadcastMixBatch(self):
     with self.test_session() as sess:
       dims = 4
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [1.]],
           mix_scale=[1.],
           distribution=normal_lib.Normal(0., 1.),
@@ -113,18 +113,19 @@ class VectorDiffeomixtureTest(
                   ]),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       # Ball centered at component0's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=2., center=0., rtol=0.005)
+          sess.run, vdm, radius=2., center=0., rtol=0.01)
       # Larger ball centered at component1's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=4., center=2., rtol=0.005)
+          sess.run, vdm, radius=4., center=2., rtol=0.01)
 
   def testMeanCovarianceNoBatch(self):
     with self.test_session() as sess:
       dims = 3
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [4.]],
           mix_scale=[10.],
           distribution=normal_lib.Normal(0., 1.),
@@ -141,14 +142,15 @@ class VectorDiffeomixtureTest(
                   diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       self.run_test_sample_consistent_mean_covariance(
-          sess.run, vdm, rtol=0.02, cov_rtol=0.06)
+          sess.run, vdm, rtol=0.02, cov_rtol=0.08)
 
   def testMeanCovarianceNoBatchUncenteredNonStandardBase(self):
     with self.test_session() as sess:
       dims = 3
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [4.]],
           mix_scale=[10.],
           distribution=normal_lib.Normal(-1., 1.5),
@@ -165,6 +167,7 @@ class VectorDiffeomixtureTest(
                   diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       self.run_test_sample_consistent_mean_covariance(
           sess.run, vdm, num_samples=int(1e6), rtol=0.01, cov_atol=0.025)
@@ -172,7 +175,7 @@ class VectorDiffeomixtureTest(
   def testMeanCovarianceBatch(self):
     with self.test_session() as sess:
       dims = 3
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [4.]],
           mix_scale=[10.],
           distribution=normal_lib.Normal(0., 1.),
@@ -192,18 +195,16 @@ class VectorDiffeomixtureTest(
                   ]),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       self.run_test_sample_consistent_mean_covariance(
-          sess.run, vdm, rtol=0.02, cov_rtol=0.06)
+          sess.run, vdm, rtol=0.02, cov_rtol=0.07)
 
-  def testSampleProbConsistentDynamicQuadrature(self):
+  def testSampleProbConsistentQuadrature(self):
     with self.test_session() as sess:
-      qgrid = array_ops.placeholder(dtype=dtypes.float32)
-      qprobs = array_ops.placeholder(dtype=dtypes.float32)
-      g, p = np.polynomial.hermite.hermgauss(deg=8)
       dims = 4
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
-          mix_loc=[[0.], [1.]],
+      vdm = vdm_lib.VectorDiffeomixture(
+          mix_loc=[0.],
           mix_scale=[1.],
           distribution=normal_lib.Normal(0., 1.),
           loc=[
@@ -219,15 +220,14 @@ class VectorDiffeomixtureTest(
                   diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
                   is_positive_definite=True),
           ],
-          quadrature_grid_and_probs=(g, p),
+          quadrature_size=3,
           validate_args=True)
       # Ball centered at component0's mean.
-      sess_run_fn = lambda x: sess.run(x, feed_dict={qgrid: g, qprobs: p})
       self.run_test_sample_consistent_log_prob(
-          sess_run_fn, vdm, radius=2., center=0., rtol=0.005)
+          sess.run, vdm, radius=2., center=0., rtol=0.015)
       # Larger ball centered at component1's mean.
       self.run_test_sample_consistent_log_prob(
-          sess_run_fn, vdm, radius=4., center=2., rtol=0.005)
+          sess.run, vdm, radius=4., center=2., rtol=0.005)
 
   # TODO(jvdillon): We've tested that (i) .sample and .log_prob are consistent,
   # (ii) .mean, .stddev etc... and .sample are consistent. However, we haven't
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py
index e4a1d3dde2..a9dcce6c52 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -134,11 +135,8 @@ class SoftmaxCentered(bijector.Bijector):
     # Pad the last dim with a zeros vector. We need this because it lets us
     # infer the scale in the inverse function.
     y = array_ops.expand_dims(x, dim=-1) if self._static_event_ndims == 0 else x
-    ndims = _get_ndims(y)
-    y = array_ops.pad(y, paddings=array_ops.one_hot(indices=[-1, ndims - 1],
-                                                    depth=ndims,
-                                                    axis=0,
-                                                    dtype=dtypes.int32))
+    y = distribution_util.pad(y, axis=-1, back=True)
+
     # Set shape hints.
     if x.shape.ndims is not None:
       shape = x.shape.as_list()
@@ -166,7 +164,7 @@ class SoftmaxCentered(bijector.Bijector):
     shape = (np.asarray(y.shape.as_list(), dtype=np.int32)
              if y.shape.is_fully_defined()
              else array_ops.shape(y, name="shape"))
-    ndims = _get_ndims(y)
+    ndims = distribution_util.prefer_static_rank(y)
 
     # Do this first to make sure CSE catches that it'll happen again in
     # _inverse_log_det_jacobian.
@@ -240,10 +238,3 @@ class SoftmaxCentered(bijector.Bijector):
                                   axis=-1,
                                   keep_dims=True))
       return array_ops.squeeze(fldj, squeeze_dims=-1)
-
-
-def _get_ndims(x):
-  """Returns `ndims`, statically if possible."""
-  if x.shape.ndims is not None:
-    return x.shape.ndims
-  return array_ops.rank(x, name="ndims")
diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
index 2701c36fb5..92f2bba182 100644
--- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
+++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
@@ -22,21 +22,135 @@ import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.contrib.distributions.python.ops import poisson as poisson_lib
+from tensorflow.contrib.distributions.python.ops.bijectors.exp import Exp
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import categorical as categorical_lib
 from tensorflow.python.ops.distributions import distribution as distribution_lib
+from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.ops.distributions import transformed_distribution as transformed_lib
 
 
 __all__ = [
     "PoissonLogNormalQuadratureCompound",
+    "quadrature_scheme_lognormal_gauss_hermite",
+    "quadrature_scheme_lognormal_quantiles",
 ]
 
 
+def quadrature_scheme_lognormal_gauss_hermite(
+    loc, scale, quadrature_size,
+    validate_args=False, name=None):  # pylint: disable=unused-argument
+  """Use Gauss-Hermite quadrature to form quadrature on positive-reals.
+
+  Note: for a given `quadrature_size`, this method is generally less accurate
+  than `quadrature_scheme_lognormal_quantiles`.
+
+  Args:
+    loc: `float`-like (batch of) scalar `Tensor`; the location parameter of
+      the LogNormal prior.
+    scale: `float`-like (batch of) scalar `Tensor`; the scale parameter of
+      the LogNormal prior.
+    quadrature_size: Python `int` scalar representing the number of quadrature
+      points.
+    validate_args: Python `bool`, default `False`. When `True` distribution
+      parameters are checked for validity despite possibly degrading runtime
+      performance. When `False` invalid inputs may silently render incorrect
+      outputs.
+    name: Python `str` name prefixed to Ops created by this class.
+
+  Returns:
+    grid: (Batch of) length-`quadrature_size` vectors representing the
+      `log_rate` parameters of a `Poisson`.
+    probs: (Batch of) length-`quadrature_size` vectors representing the
+      weight associate with each `grid` value.
+  """
+  with ops.name_scope(name, "vector_diffeomixture_quadrature_gauss_hermite",
+                      [loc, scale]):
+    grid, probs = np.polynomial.hermite.hermgauss(deg=quadrature_size)
+    grid = grid.astype(loc.dtype.as_numpy_dtype)
+    probs = probs.astype(loc.dtype.as_numpy_dtype)
+    probs /= np.linalg.norm(probs, ord=1, keepdims=True)
+    probs = ops.convert_to_tensor(probs, name="probs", dtype=loc.dtype)
+    # The following maps the broadcast of `loc` and `scale` to each grid
+    # point, i.e., we are creating several log-rates that correspond to the
+    # different Gauss-Hermite quadrature points and (possible) batches of
+    # `loc` and `scale`.
+    grid = (loc[..., array_ops.newaxis]
+            + np.sqrt(2.) * scale[..., array_ops.newaxis] * grid)
+    return grid, probs
+
+
+def quadrature_scheme_lognormal_quantiles(
+    loc, scale, quadrature_size,
+    validate_args=False, name=None):
+  """Use LogNormal quantiles to form quadrature on positive-reals.
+
+  Args:
+    loc: `float`-like (batch of) scalar `Tensor`; the location parameter of
+      the LogNormal prior.
+    scale: `float`-like (batch of) scalar `Tensor`; the scale parameter of
+      the LogNormal prior.
+    quadrature_size: Python `int` scalar representing the number of quadrature
+      points.
+    validate_args: Python `bool`, default `False`. When `True` distribution
+      parameters are checked for validity despite possibly degrading runtime
+      performance. When `False` invalid inputs may silently render incorrect
+      outputs.
+    name: Python `str` name prefixed to Ops created by this class.
+
+  Returns:
+    grid: (Batch of) length-`quadrature_size` vectors representing the
+      `log_rate` parameters of a `Poisson`.
+    probs: (Batch of) length-`quadrature_size` vectors representing the
+      weight associate with each `grid` value.
+  """
+  with ops.name_scope(name, "quadrature_scheme_lognormal_quantiles",
+                      [loc, scale]):
+    # Create a LogNormal distribution.
+    dist = transformed_lib.TransformedDistribution(
+        distribution=normal_lib.Normal(loc=loc, scale=scale),
+        bijector=Exp(event_ndims=0),
+        validate_args=validate_args)
+    batch_ndims = dist.batch_shape.ndims
+    if batch_ndims is None:
+      batch_ndims = array_ops.shape(dist.batch_shape_tensor())[0]
+
+    def _compute_quantiles():
+      """Helper to build quantiles."""
+      # Omit {0, 1} since they might lead to Inf/NaN.
+      zero = array_ops.zeros([], dtype=dist.dtype)
+      edges = math_ops.linspace(zero, 1., quadrature_size + 3)[1:-1]
+      # Expand edges so its broadcast across batch dims.
+      edges = array_ops.reshape(edges, shape=array_ops.concat([
+          [-1], array_ops.ones([batch_ndims], dtype=dtypes.int32)], axis=0))
+      quantiles = dist.quantile(edges)
+      # Cyclically permute left by one.
+      perm = array_ops.concat([
+          math_ops.range(1, 1 + batch_ndims), [0]], axis=0)
+      quantiles = array_ops.transpose(quantiles, perm)
+      return quantiles
+    quantiles = _compute_quantiles()
+
+    # Compute grid as quantile midpoints.
+    grid = (quantiles[..., :-1] + quantiles[..., 1:]) / 2.
+    # Set shape hints.
+    grid.set_shape(dist.batch_shape.concatenate([quadrature_size]))
+
+    # By construction probs is constant, i.e., `1 / quadrature_size`. This is
+    # important, because non-constant probs leads to non-reparameterizable
+    # samples.
+    probs = array_ops.fill(
+        dims=[quadrature_size],
+        value=1. / math_ops.cast(quadrature_size, dist.dtype))
+
+    return grid, probs
+
+
 class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   """`PoissonLogNormalQuadratureCompound` distribution.
 
@@ -47,30 +161,18 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   ```none
   p(k|loc, scale)
   = int_{R_+} dl LogNormal(l | loc, scale) Poisson(k | l)
-  = int_{R} dz ((lambda(z) sqrt(2) scale)
-                * exp(-z**2) / (lambda(z) sqrt(2 pi) sigma)
-                * Poisson(k | lambda(z)))
-  = int_{R} dz exp(-z**2) / sqrt(pi) Poisson(k | lambda(z))
   approx= sum{ prob[d] Poisson(k | lambda(grid[d])) : d=0, ..., deg-1 }
   ```
 
-  where `lambda(z) = exp(sqrt(2) scale z + loc)` and the `prob,grid` terms
-  are from [numerical quadrature](
-  https://en.wikipedia.org/wiki/Numerical_integration) (default:
-  [Gauss--Hermite quadrature](
-  https://en.wikipedia.org/wiki/Gauss%E2%80%93Hermite_quadrature)). Note that
-  the second line made the substitution:
-  `z(l) = (log(l) - loc) / (sqrt(2) scale)` which implies `lambda(z)` [above]
-  and `dl = sqrt(2) scale lambda(z) dz`
+  By default, the `grid` is chosen as quantiles of the `LogNormal` distribution
+  parameterized by `loc`, `scale` and the `prob` vector is
+  `[1. / quadrature_size]*quadrature_size`.
 
   In the non-approximation case, a draw from the LogNormal prior represents the
   Poisson rate parameter. Unfortunately, the non-approximate distribution lacks
   an analytical probability density function (pdf). Therefore the
   `PoissonLogNormalQuadratureCompound` class implements an approximation based
-  on [numerical quadrature](
-  https://en.wikipedia.org/wiki/Numerical_integration) (default:
-  [Gauss--Hermite quadrature](
-  https://en.wikipedia.org/wiki/Gauss%E2%80%93Hermite_quadrature)).
+  on [quadrature](https://en.wikipedia.org/wiki/Numerical_integration).
 
   Note: although the `PoissonLogNormalQuadratureCompound` is approximately the
   Poisson-LogNormal compound distribution, it is itself a valid distribution.
@@ -84,10 +186,8 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   https://en.wikipedia.org/wiki/Compound_probability_distribution). Using
   variable-substitution and [numerical quadrature](
   https://en.wikipedia.org/wiki/Numerical_integration) (default:
-  [Gauss--Hermite quadrature](
-  https://en.wikipedia.org/wiki/Gauss%E2%80%93Hermite_quadrature)) we can
-  redefine the distribution to be a parameter-less convex combination of `deg`
-  different Poisson samples.
+  based on `LogNormal` quantiles) we can redefine the distribution to be a
+  parameter-less convex combination of `deg` different Poisson samples.
 
   That is, defined over positive integers, this distribution is parameterized
   by a (batch of) `loc` and `scale` scalars.
@@ -96,14 +196,10 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
 
   ```none
   pdf(k | loc, scale, deg)
-    = sum{ prob[d] Poisson(k | lambda=exp(sqrt(2) scale grid[d] + loc))
+    = sum{ prob[d] Poisson(k | lambda=exp(grid[d]))
           : d=0, ..., deg-1 }
   ```
 
-  where, [e.g., `grid, w = numpy.polynomial.hermite.hermgauss(deg)`](
-  https://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.polynomial.hermite.hermgauss.html)
-  and `prob = w / sqrt(pi)`.
-
   #### Examples
 
   ```python
@@ -114,29 +210,37 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   pln = tfd.PoissonLogNormalQuadratureCompound(
       loc=[0., -0.5],
       scale=1.,
-      quadrature_grid_and_probs=(
-        np.polynomial.hermite.hermgauss(deg=10)),
+      quadrature_size=10,
       validate_args=True)
   """
 
   def __init__(self,
                loc,
                scale,
-               quadrature_grid_and_probs=None,
+               quadrature_size=8,
+               quadrature_fn=quadrature_scheme_lognormal_quantiles,
                validate_args=False,
                allow_nan_stats=True,
                name="PoissonLogNormalQuadratureCompound"):
-    """Constructs the PoissonLogNormalQuadratureCompound on `R**k`.
+    """Constructs the PoissonLogNormalQuadratureCompound`.
+
+    Note: `probs` returned by (optional) `quadrature_fn` are presumed to be
+    either a length-`quadrature_size` vector or a batch of vectors in 1-to-1
+    correspondence with the returned `grid`. (I.e., broadcasting is only
+    partially supported.)
 
     Args:
       loc: `float`-like (batch of) scalar `Tensor`; the location parameter of
         the LogNormal prior.
       scale: `float`-like (batch of) scalar `Tensor`; the scale parameter of
         the LogNormal prior.
-      quadrature_grid_and_probs: Python pair of `float`-like `Tensor`s
-        representing the sample points and the corresponding (possibly
-        normalized) weight.  When `None`, defaults to:
-        `np.polynomial.hermite.hermgauss(deg=8)`.
+      quadrature_size: Python `int` scalar representing the number of quadrature
+        points.
+      quadrature_fn: Python callable taking `loc`, `scale`,
+        `quadrature_size`, `validate_args` and returning `tuple(grid, probs)`
+        representing the LogNormal grid and corresponding normalized weight.
+        normalized) weight.
+        Default value: `quadrature_scheme_lognormal_quantiles`.
       validate_args: Python `bool`, default `False`. When `True` distribution
         parameters are checked for validity despite possibly degrading runtime
         performance. When `False` invalid inputs may silently render incorrect
@@ -148,47 +252,41 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
       name: Python `str` name prefixed to Ops created by this class.
 
     Raises:
-      TypeError: if `loc.dtype != scale[0].dtype`.
+      TypeError: if `quadrature_grid` and `quadrature_probs` have different base
+        `dtype`.
     """
     parameters = locals()
     with ops.name_scope(name, values=[loc, scale]):
-      loc = ops.convert_to_tensor(loc, name="loc")
-      self._loc = loc
-
-      scale = ops.convert_to_tensor(scale, name="scale")
-      self._scale = scale
-
-      dtype = loc.dtype.base_dtype
-      if dtype != scale.dtype.base_dtype:
-        raise TypeError(
-            "loc.dtype(\"{}\") does not match scale.dtype(\"{}\")".format(
-                loc.dtype.name, scale.dtype.name))
+      if loc is not None:
+        loc = ops.convert_to_tensor(loc, name="loc")
+      if scale is not None:
+        scale = ops.convert_to_tensor(
+            scale, dtype=None if loc is None else loc.dtype, name="scale")
+      self._quadrature_grid, self._quadrature_probs = tuple(quadrature_fn(
+          loc, scale, quadrature_size, validate_args))
+
+      dt = self._quadrature_grid.dtype
+      if dt.base_dtype != self._quadrature_probs.dtype.base_dtype:
+        raise TypeError("Quadrature grid dtype ({}) does not match quadrature "
+                        "probs dtype ({}).".format(
+                            dt.name, self._quadrature_probs.dtype.name))
 
-      grid, probs = distribution_util.process_quadrature_grid_and_probs(
-          quadrature_grid_and_probs, dtype, validate_args)
-      self._quadrature_grid = grid
-      self._quadrature_probs = probs
-      self._quadrature_size = distribution_util.dimension_size(probs, axis=0)
+      self._distribution = poisson_lib.Poisson(
+          log_rate=self._quadrature_grid,
+          validate_args=validate_args,
+          allow_nan_stats=allow_nan_stats)
 
       self._mixture_distribution = categorical_lib.Categorical(
           logits=math_ops.log(self._quadrature_probs),
           validate_args=validate_args,
           allow_nan_stats=allow_nan_stats)
 
-      # The following maps the broadcast of `loc` and `scale` to each grid
-      # point, i.e., we are creating several log-rates that correspond to the
-      # different Gauss-Hermite quadrature points and (possible) batches of
-      # `loc` and `scale`.
-      self._log_rate = (loc[..., array_ops.newaxis]
-                        + np.sqrt(2.) * scale[..., array_ops.newaxis] * grid)
-
-      self._distribution = poisson_lib.Poisson(
-          log_rate=self._log_rate,
-          validate_args=validate_args,
-          allow_nan_stats=allow_nan_stats)
+      self._loc = loc
+      self._scale = scale
+      self._quadrature_size = quadrature_size
 
       super(PoissonLogNormalQuadratureCompound, self).__init__(
-          dtype=dtype,
+          dtype=dt,
           reparameterization_type=distribution_lib.NOT_REPARAMETERIZED,
           validate_args=validate_args,
           allow_nan_stats=allow_nan_stats,
@@ -198,12 +296,12 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
 
   @property
   def mixture_distribution(self):
-    """Distribution which randomly selects a Poisson with Gauss-Hermite rate."""
+    """Distribution which randomly selects a Poisson with quadrature param."""
     return self._mixture_distribution
 
   @property
   def distribution(self):
-    """Base Poisson parameterized by a Gauss-Hermite grid of rates."""
+    """Base Poisson parameterized by a quadrature grid."""
     return self._distribution
 
   @property
@@ -217,24 +315,18 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
     return self._scale
 
   @property
-  def quadrature_grid(self):
-    """Quadrature grid points."""
-    return self._quadrature_grid
-
-  @property
-  def quadrature_probs(self):
-    """Quadrature normalized weights."""
-    return self._quadrature_probs
+  def quadrature_size(self):
+    return self._quadrature_size
 
   def _batch_shape_tensor(self):
     return array_ops.broadcast_dynamic_shape(
-        array_ops.shape(self.loc),
-        array_ops.shape(self.scale))
+        self.distribution.batch_shape_tensor(),
+        array_ops.shape(self.mixture_distribution.logits))[:-1]
 
   def _batch_shape(self):
     return array_ops.broadcast_static_shape(
-        self.loc.shape,
-        self.scale.shape)
+        self.distribution.batch_shape,
+        self.mixture_distribution.logits.shape)[:-1]
 
   def _event_shape(self):
     return tensor_shape.scalar()
@@ -242,18 +334,31 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   def _sample_n(self, n, seed=None):
     # Get ids as a [n, batch_size]-shaped matrix, unless batch_shape=[] then get
     # ids as a [n]-shaped vector.
-    batch_size = (np.prod(self.batch_shape.as_list(), dtype=np.int32)
-                  if self.batch_shape.is_fully_defined()
-                  else math_ops.reduce_prod(self.batch_shape_tensor()))
+    batch_size = self.batch_shape.num_elements()
+    if batch_size is None:
+      batch_size = math_ops.reduce_prod(self.batch_shape_tensor())
+    # We need to "sample extra" from the mixture distribution if it doesn't
+    # already specify a probs vector for each batch coordinate.
+    # We only support this kind of reduced broadcasting, i.e., there is exactly
+    # one probs vector for all batch dims or one for each.
     ids = self._mixture_distribution.sample(
         sample_shape=concat_vectors(
             [n],
             distribution_util.pick_vector(
-                self.is_scalar_batch(),
-                np.int32([]),
-                [batch_size])),
+                self.mixture_distribution.is_scalar_batch(),
+                [batch_size],
+                np.int32([]))),
         seed=distribution_util.gen_new_seed(
             seed, "poisson_lognormal_quadrature_compound"))
+    # We need to flatten batch dims in case mixture_distribution has its own
+    # batch dims.
+    ids = array_ops.reshape(ids, shape=concat_vectors(
+        [n],
+        distribution_util.pick_vector(
+            self.is_scalar_batch(),
+            np.int32([]),
+            np.int32([-1]))))
+
     # Stride `quadrature_size` for `batch_size` number of times.
     offset = math_ops.range(start=0,
                             limit=batch_size * self._quadrature_size,
@@ -276,7 +381,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   def _mean(self):
     return math_ops.exp(
         math_ops.reduce_logsumexp(
-            self.mixture_distribution.logits + self._log_rate,
+            self.mixture_distribution.logits + self.distribution.log_rate,
             axis=-1))
 
   def _variance(self):
@@ -301,7 +406,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
     # Var[E[Z | V]] = sum{ prob[d] (Mean[d] - Mean)**2 : d=0, ..., deg-1 }
     v = array_ops.stack([
         # log(self.distribution.variance()) = log(Var[d]) = log(rate[d])
-        self._log_rate,
+        self.distribution.log_rate,
         # log((Mean[d] - Mean)**2)
         2. * math_ops.log(
             math_ops.abs(self.distribution.mean()
@@ -312,14 +417,9 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
         axis=[-2, -1])
 
 
-def static_value(x):
-  """Returns the static value of a `Tensor` or `None`."""
-  return tensor_util.constant_value(ops.convert_to_tensor(x))
-
-
 def concat_vectors(*args):
   """Concatenates input vectors, statically if possible."""
-  args_ = [static_value(x) for x in args]
+  args_ = [distribution_util.static_value(x) for x in args]
   if any(vec is None for vec in args_):
     return array_ops.concat(args, axis=0)
   return [val for vec in args_ for val in vec]
diff --git a/tensorflow/contrib/distributions/python/ops/test_util.py b/tensorflow/contrib/distributions/python/ops/test_util.py
index 77f2a39273..bfc727450f 100644
--- a/tensorflow/contrib/distributions/python/ops/test_util.py
+++ b/tensorflow/contrib/distributions/python/ops/test_util.py
@@ -40,6 +40,7 @@ class DiscreteScalarDistributionTestHelpers(object):
   def run_test_sample_consistent_log_prob(
       self, sess_run_fn, dist,
       num_samples=int(1e5), num_threshold=int(1e3), seed=42,
+      batch_size=None,
       rtol=1e-2, atol=0.):
     """Tests that sample/log_prob are consistent with each other.
 
@@ -66,6 +67,8 @@ class DiscreteScalarDistributionTestHelpers(object):
       seed: Python `int` indicating the seed to use when sampling from `dist`.
         In general it is not recommended to use `None` during a test as this
         increases the likelihood of spurious test failure.
+      batch_size: Hint for unpacking result of samples. Default: `None` means
+        batch_size is inferred.
       rtol: Python `float`-type indicating the admissible relative error between
         analytical and sample statistics.
       atol: Python `float`-type indicating the admissible absolute error between
@@ -80,10 +83,11 @@ class DiscreteScalarDistributionTestHelpers(object):
     # Histogram only supports vectors so we call it once per batch coordinate.
     y = dist.sample(num_samples, seed=seed)
     y = array_ops.reshape(y, shape=[num_samples, -1])
-    batch_size = math_ops.reduce_prod(dist.batch_shape_tensor())
+    if batch_size is None:
+      batch_size = math_ops.reduce_prod(dist.batch_shape_tensor())
     batch_dims = array_ops.shape(dist.batch_shape_tensor())[0]
     edges_expanded_shape = 1 + array_ops.pad([-2], paddings=[[0, batch_dims]])
-    for b, x in enumerate(array_ops.unstack(y, axis=1)):
+    for b, x in enumerate(array_ops.unstack(y, num=batch_size, axis=1)):
       counts, edges = self.histogram(x)
       edges = array_ops.reshape(edges, edges_expanded_shape)
       probs = math_ops.exp(dist.log_prob(edges))
diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
index 904724af42..7ce8a83fd9 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
@@ -22,30 +22,176 @@ import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.contrib.distributions.python.ops.bijectors.affine_linear_operator import AffineLinearOperator
+from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import SoftmaxCentered
 from tensorflow.contrib.linalg.python.ops import linear_operator_addition as linop_add_lib
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops.distributions import categorical as categorical_lib
 from tensorflow.python.ops.distributions import distribution as distribution_lib
+from tensorflow.python.ops.distributions import normal as normal_lib
 from tensorflow.python.ops.linalg import linear_operator_diag as linop_diag_lib
 from tensorflow.python.ops.linalg import linear_operator_full_matrix as linop_full_lib
 from tensorflow.python.ops.linalg import linear_operator_identity as linop_identity_lib
 from tensorflow.python.ops.linalg import linear_operator_lower_triangular as linop_tril_lib
 
-static_value = distribution_util.static_value
-
 
 __all__ = [
     "VectorDiffeomixture",
+    "quadrature_scheme_softmaxnormal_gauss_hermite",
+    "quadrature_scheme_softmaxnormal_quantiles",
 ]
 
 
+def quadrature_scheme_softmaxnormal_gauss_hermite(
+    loc, scale, quadrature_size,
+    validate_args=False, name=None):
+  """Use Gauss-Hermite quadrature to form quadrature on `K - 1` simplex.
+
+  Note: for a given `quadrature_size`, this method is generally less accurate
+  than `quadrature_scheme_softmaxnormal_quantiles`.
+
+  Args:
+    loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`, B>=0.
+      Represents the `location` parameter of the SoftmaxNormal used for
+      selecting one of the `K` affine transformations.
+    scale: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`, B>=0.
+      Represents the `scale` parameter of the SoftmaxNormal used for
+      selecting one of the `K` affine transformations.
+    quadrature_size: Python `int` scalar representing the number of quadrature
+      points.
+    validate_args: Python `bool`, default `False`. When `True` distribution
+      parameters are checked for validity despite possibly degrading runtime
+      performance. When `False` invalid inputs may silently render incorrect
+      outputs.
+    name: Python `str` name prefixed to Ops created by this class.
+
+  Returns:
+    grid: Shape `[b1, ..., bB, K, quadrature_size]` `Tensor` representing the
+      convex combination of affine parameters for `K` components.
+      `grid[..., :, n]` is the `n`-th grid point, living in the `K - 1` simplex.
+    probs:  Shape `[b1, ..., bB, K, quadrature_size]` `Tensor` representing the
+      associated with each grid point.
+  """
+  with ops.name_scope(name, "quadrature_scheme_softmaxnormal_gauss_hermite",
+                      [loc, scale]):
+    loc = ops.convert_to_tensor(loc, name="loc")
+    dt = loc.dtype.base_dtype
+    scale = ops.convert_to_tensor(scale, dtype=dt, name="scale")
+
+    loc = maybe_check_quadrature_param(loc, "loc", validate_args)
+    scale = maybe_check_quadrature_param(scale, "scale", validate_args)
+
+    grid, probs = np.polynomial.hermite.hermgauss(deg=quadrature_size)
+    grid = grid.astype(loc.dtype.as_numpy_dtype)
+    probs = probs.astype(loc.dtype.as_numpy_dtype)
+    probs /= np.linalg.norm(probs, ord=1, keepdims=True)
+    probs = ops.convert_to_tensor(probs, name="probs", dtype=loc.dtype)
+
+    grid = softmax(
+        -distribution_util.pad(
+            (loc[..., array_ops.newaxis] +
+             np.sqrt(2.) * scale[..., array_ops.newaxis] * grid),
+            axis=-2,
+            front=True),
+        axis=-2)  # shape: [B, components, deg]
+
+    return grid, probs
+
+
+def quadrature_scheme_softmaxnormal_quantiles(
+    loc, scale, quadrature_size,
+    validate_args=False, name=None):
+  """Use SoftmaxNormal quantiles to form quadrature on `K - 1` simplex.
+
+  Args:
+    loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`, B>=0.
+      Represents the `location` parameter of the SoftmaxNormal used for
+      selecting one of the `K` affine transformations.
+    scale: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`, B>=0.
+      Represents the `scale` parameter of the SoftmaxNormal used for
+      selecting one of the `K` affine transformations.
+    quadrature_size: Python scalar `int` representing the number of quadrature
+      points.
+    validate_args: Python `bool`, default `False`. When `True` distribution
+      parameters are checked for validity despite possibly degrading runtime
+      performance. When `False` invalid inputs may silently render incorrect
+      outputs.
+    name: Python `str` name prefixed to Ops created by this class.
+
+  Returns:
+    grid: Shape `[b1, ..., bB, K, quadrature_size]` `Tensor` representing the
+      convex combination of affine parameters for `K` components.
+      `grid[..., :, n]` is the `n`-th grid point, living in the `K - 1` simplex.
+    probs:  Shape `[b1, ..., bB, K, quadrature_size]` `Tensor` representing the
+      associated with each grid point.
+  """
+  with ops.name_scope(name, "softmax_normal_grid_and_probs", [loc, scale]):
+    loc = ops.convert_to_tensor(loc, name="loc")
+    dt = loc.dtype.base_dtype
+    scale = ops.convert_to_tensor(scale, dtype=dt, name="scale")
+
+    loc = maybe_check_quadrature_param(loc, "loc", validate_args)
+    scale = maybe_check_quadrature_param(scale, "scale", validate_args)
+
+    dist = normal_lib.Normal(loc=loc, scale=scale)
+
+    def _get_batch_ndims():
+      """Helper to get dist.batch_shape.ndims, statically if possible."""
+      ndims = dist.batch_shape.ndims
+      if ndims is None:
+        ndims = array_ops.shape(dist.batch_shape_tensor())[0]
+      return ndims
+    batch_ndims = _get_batch_ndims()
+
+    def _get_final_shape(qs):
+      """Helper to build `TensorShape`."""
+      bs = dist.batch_shape.with_rank_at_least(1)
+      num_components = bs[-1].value
+      if num_components is not None:
+        num_components += 1
+      tail = tensor_shape.TensorShape([num_components, qs])
+      return bs[:-1].concatenate(tail)
+
+    def _compute_quantiles():
+      """Helper to build quantiles."""
+      # Omit {0, 1} since they might lead to Inf/NaN.
+      zero = array_ops.zeros([], dtype=dist.dtype)
+      edges = math_ops.linspace(zero, 1., quadrature_size + 3)[1:-1]
+      # Expand edges so its broadcast across batch dims.
+      edges = array_ops.reshape(edges, shape=array_ops.concat([
+          [-1], array_ops.ones([batch_ndims], dtype=dtypes.int32)], axis=0))
+      quantiles = dist.quantile(edges)
+      quantiles = SoftmaxCentered(event_ndims=1).forward(quantiles)
+      # Cyclically permute left by one.
+      perm = array_ops.concat([
+          math_ops.range(1, 1 + batch_ndims), [0]], axis=0)
+      quantiles = array_ops.transpose(quantiles, perm)
+      quantiles.set_shape(_get_final_shape(quadrature_size + 1))
+      return quantiles
+    quantiles = _compute_quantiles()
+
+    # Compute grid as quantile midpoints.
+    grid = (quantiles[..., :-1] + quantiles[..., 1:]) / 2.
+    # Set shape hints.
+    grid.set_shape(_get_final_shape(quadrature_size))
+
+    # By construction probs is constant, i.e., `1 / quadrature_size`. This is
+    # important, because non-constant probs leads to non-reparameterizable
+    # samples.
+    probs = array_ops.fill(
+        dims=[quadrature_size],
+        value=1. / math_ops.cast(quadrature_size, dist.dtype))
+
+    return grid, probs
+
+
 class VectorDiffeomixture(distribution_lib.Distribution):
   """VectorDiffeomixture distribution.
 
@@ -222,17 +368,20 @@ class VectorDiffeomixture(distribution_lib.Distribution):
                distribution,
                loc=None,
                scale=None,
-               quadrature_grid_and_probs=None,
+               quadrature_size=8,
+               quadrature_fn=quadrature_scheme_softmaxnormal_quantiles,
                validate_args=False,
                allow_nan_stats=True,
                name="VectorDiffeomixture"):
-    """Constructs the VectorDiffeomixture on `R**k`.
+    """Constructs the VectorDiffeomixture on `R**d`.
 
     Args:
-      mix_loc: `float`-like `Tensor`. Represents the `location` parameter of the
-        SoftmaxNormal used for selecting one of the `K` affine transformations.
-      mix_scale: `float`-like `Tensor`. Represents the `scale` parameter of the
-        SoftmaxNormal used for selecting one of the `K` affine transformations.
+      mix_loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`. Represents
+        the `location` parameter of the SoftmaxNormal used for selecting one of
+        the `K` affine transformations.
+      mix_scale: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`.
+        Represents the `scale` parameter of the SoftmaxNormal used for selecting
+        one of the `K` affine transformations.
       distribution: `tf.Distribution`-like instance. Distribution from which `d`
         iid samples are used as input to the selected affine transformation.
         Must be a scalar-batch, scalar-event distribution.  Typically
@@ -251,10 +400,13 @@ class VectorDiffeomixture(distribution_lib.Distribution):
         `k`-th element represents the `scale` used for the `k`-th affine
         transformation. `LinearOperator`s must have shape `[B1, ..., Bb, d, d]`,
         `b >= 0`, i.e., characterizes `b`-batches of `d x d` matrices
-      quadrature_grid_and_probs: Python pair of `float`-like `Tensor`s
-        representing the sample points and the corresponding (possibly
-        normalized) weight.  When `None`, defaults to:
-        `np.polynomial.hermite.hermgauss(deg=8)`.
+      quadrature_size: Python `int` scalar representing number of
+        quadrature points.
+      quadrature_fn: Python callable taking `mix_loc`, `mix_scale`,
+        `quadrature_size`, `validate_args` and returning `tuple(grid, probs)`
+        representing the SoftmaxNormal grid and corresponding normalized weight.
+        normalized) weight.
+        Default value: `quadrature_scheme_softmaxnormal_quantiles`.
       validate_args: Python `bool`, default `False`. When `True` distribution
         parameters are checked for validity despite possibly degrading runtime
         performance. When `False` invalid inputs may silently render incorrect
@@ -321,11 +473,8 @@ class VectorDiffeomixture(distribution_lib.Distribution):
         raise NotImplementedError("Currently only bimixtures are supported; "
                                   "len(scale)={} is not 2.".format(len(scale)))
 
-      grid, probs = distribution_util.process_quadrature_grid_and_probs(
-          quadrature_grid_and_probs, dtype, validate_args)
-      self._quadrature_grid = grid
-      self._quadrature_probs = probs
-      self._quadrature_size = distribution_util.dimension_size(probs, axis=0)
+      self._grid, probs = tuple(quadrature_fn(
+          mix_loc, mix_scale, quadrature_size, validate_args))
 
       # Note: by creating the logits as `log(prob)` we ensure that
       # `self.mixture_distribution.logits` is equivalent to
@@ -335,22 +484,13 @@ class VectorDiffeomixture(distribution_lib.Distribution):
           validate_args=validate_args,
           allow_nan_stats=allow_nan_stats)
 
-      mix_loc = maybe_check_mix_param(
-          mix_loc, "mix_loc", dtype, validate_args)
-      mix_scale = maybe_check_mix_param(
-          mix_scale, "mix_scale", dtype, validate_args)
-
       asserts = distribution_util.maybe_check_scalar_distribution(
           distribution, dtype, validate_args)
       if asserts:
-        mix_loc = control_flow_ops.with_dependencies(asserts, mix_loc)
+        self._grid = control_flow_ops.with_dependencies(
+            asserts, self._grid)
       self._distribution = distribution
 
-      # shape: [B, deg]
-      self._interpolate_weight = math_ops.sigmoid(
-          mix_loc
-          + np.sqrt(2.) * mix_scale * grid)
-
       self._interpolated_affine = [
           AffineLinearOperator(shift=loc_,
                                scale=scale_,
@@ -358,15 +498,16 @@ class VectorDiffeomixture(distribution_lib.Distribution):
                                validate_args=validate_args,
                                name="interpolated_affine_{}".format(k))
           for k, (loc_, scale_) in enumerate(zip(
-              interpolate_loc(self._quadrature_size,
-                              self._interpolate_weight,
-                              loc),
-              interpolate_scale(self._quadrature_size,
-                                self._interpolate_weight,
-                                scale)))]
+              interpolate_loc(self._grid, loc),
+              interpolate_scale(self._grid, scale)))]
 
-      self._batch_shape_, self._event_shape_ = determine_batch_event_shapes(
-          mix_loc, mix_scale, self._endpoint_affine)
+      [
+          self._batch_shape_,
+          self._batch_shape_tensor_,
+          self._event_shape_,
+          self._event_shape_tensor_,
+      ] = determine_batch_event_shapes(self._grid,
+                                       self._endpoint_affine)
 
       super(VectorDiffeomixture, self).__init__(
           dtype=dtype,
@@ -385,8 +526,7 @@ class VectorDiffeomixture(distribution_lib.Distribution):
           allow_nan_stats=allow_nan_stats,
           parameters=parameters,
           graph_parents=(
-              [mix_loc, mix_scale]
-              + distribution._graph_parents  # pylint: disable=protected-access
+              distribution._graph_parents  # pylint: disable=protected-access
               + [loc_ for loc_ in loc if loc_ is not None]
               + [p for scale_ in scale for p in scale_.graph_parents]),
           name=name)
@@ -402,9 +542,9 @@ class VectorDiffeomixture(distribution_lib.Distribution):
     return self._distribution
 
   @property
-  def interpolate_weight(self):
+  def grid(self):
     """Grid of mixing probabilities, one for each grid point."""
-    return self._interpolate_weight
+    return self._grid
 
   @property
   def endpoint_affine(self):
@@ -416,27 +556,17 @@ class VectorDiffeomixture(distribution_lib.Distribution):
     """Affine transformation for each convex combination of `K` components."""
     return self._interpolated_affine
 
-  @property
-  def quadrature_grid(self):
-    """Quadrature grid points."""
-    return self._quadrature_grid
-
-  @property
-  def quadrature_probs(self):
-    """Quadrature normalized weights."""
-    return self._quadrature_probs
-
   def _batch_shape_tensor(self):
-    return self._batch_shape_
+    return self._batch_shape_tensor_
 
   def _batch_shape(self):
-    return tensor_shape.TensorShape(static_value(self._batch_shape_))
+    return self._batch_shape_
 
   def _event_shape_tensor(self):
-    return self._event_shape_
+    return self._event_shape_tensor_
 
   def _event_shape(self):
-    return tensor_shape.TensorShape(static_value(self._event_shape_))
+    return self._event_shape_
 
   def _sample_n(self, n, seed=None):
     x = self.distribution.sample(
@@ -449,25 +579,44 @@ class VectorDiffeomixture(distribution_lib.Distribution):
 
     # Get ids as a [n, batch_size]-shaped matrix, unless batch_shape=[] then get
     # ids as a [n]-shaped vector.
-    batch_size = reduce_prod(self.batch_shape_tensor())
-    ids = self._mixture_distribution.sample(
+    batch_size = self.batch_shape.num_elements()
+    if batch_size is None:
+      batch_size = array_ops.reduce_prod(self.batch_shape_tensor())
+    mix_batch_size = self.mixture_distribution.batch_shape.num_elements()
+    if mix_batch_size is None:
+      mix_batch_size = math_ops.reduce_prod(
+          self.mixture_distribution.batch_shape_tensor())
+    ids = self.mixture_distribution.sample(
         sample_shape=concat_vectors(
             [n],
             distribution_util.pick_vector(
                 self.is_scalar_batch(),
                 np.int32([]),
-                [batch_size])),
+                [batch_size // mix_batch_size])),
         seed=distribution_util.gen_new_seed(
             seed, "vector_diffeomixture"))
-
-    # Stride `quadrature_size` for `batch_size` number of times.
+    # We need to flatten batch dims in case mixture_distribution has its own
+    # batch dims.
+    ids = array_ops.reshape(ids, shape=concat_vectors(
+        [n],
+        distribution_util.pick_vector(
+            self.is_scalar_batch(),
+            np.int32([]),
+            np.int32([-1]))))
+
+    # Stride `components * quadrature_size` for `batch_size` number of times.
+    stride = self.grid.shape.with_rank_at_least(
+        2)[-2:].num_elements()
+    if stride is None:
+      stride = array_ops.reduce_prod(
+          array_ops.shape(self.grid)[-2:])
     offset = math_ops.range(start=0,
-                            limit=batch_size * self._quadrature_size,
-                            delta=self._quadrature_size,
+                            limit=batch_size * stride,
+                            delta=stride,
                             dtype=ids.dtype)
 
     weight = array_ops.gather(
-        array_ops.reshape(self.interpolate_weight, shape=[-1]),
+        array_ops.reshape(self.grid, shape=[-1]),
         ids + offset)
     weight = weight[..., array_ops.newaxis]
 
@@ -499,10 +648,7 @@ class VectorDiffeomixture(distribution_lib.Distribution):
         self.mixture_distribution.logits - fldj + log_prob, axis=-1)
 
   def _mean(self):
-    # Since we created logits to already be scaled, we can use exp which is
-    # slightly cheaper than `self.mixture_distribution.probs`.
-    p = math_ops.exp(self.mixture_distribution.logits)
-
+    p = self._expand_mix_distribution_probs()
     m = self._expand_base_distribution_mean()
     mean = None
     for k, aff in enumerate(self.interpolated_affine):
@@ -536,9 +682,7 @@ class VectorDiffeomixture(distribution_lib.Distribution):
         self._covariance_of_mean_given_quadrature_component(diag_only=True))
 
   def _mean_of_covariance_given_quadrature_component(self, diag_only):
-    # Since we created logits to already be scaled, we can use exp which is
-    # slightly cheaper than `self.mixture_distribution.probs`.
-    p = math_ops.exp(self.mixture_distribution.logits)
+    p = self.mixture_distribution.probs
 
     # To compute E[Cov(Z|V)], we'll add matrices within three categories:
     # scaled-identity, diagonal, and full. Then we'll combine these at the end.
@@ -610,10 +754,9 @@ class VectorDiffeomixture(distribution_lib.Distribution):
   def _covariance_of_mean_given_quadrature_component(self, diag_only):
     square = math_ops.square if diag_only else vec_osquare
 
-    # Since we created logits to already be scaled, we can use exp which is
-    # slightly cheaper than `self.mixture_distribution.probs`.
-    p = math_ops.exp(self.mixture_distribution.logits)
-
+    p = self._expand_mix_distribution_probs()
+    if not diag_only:
+      p = p[..., array_ops.newaxis, :]  # Assuming event.ndims=1.
     m = self._expand_base_distribution_mean()
 
     cov_e_z_given_v = None
@@ -637,17 +780,25 @@ class VectorDiffeomixture(distribution_lib.Distribution):
     m.set_shape(self.batch_shape.concatenate(self.event_shape))
     return m
 
-
-def maybe_check_mix_param(param, name, expected_base_dtype, validate_args):
-  """Helper which checks validity of `mix_loc` and `mix_scale` init args."""
+  def _expand_mix_distribution_probs(self):
+    p = self.mixture_distribution.probs  # [B, deg]
+    deg = p.shape.with_rank_at_least(1)[-1].value
+    if deg is None:
+      deg = array_ops.shape(p)[-1]
+    event_ndims = self.event_shape.ndims
+    if event_ndims is None:
+      event_ndims = array_ops.shape(self.event_shape_tensor())[0]
+    expand_shape = array_ops.concat([
+        self.mixture_distribution.batch_shape_tensor(),
+        array_ops.ones([event_ndims], dtype=dtypes.int32),
+        [deg],
+    ], axis=0)
+    return array_ops.reshape(p, shape=expand_shape)
+
+
+def maybe_check_quadrature_param(param, name, validate_args):
+  """Helper which checks validity of `loc` and `scale` init args."""
   with ops.name_scope(name="check_" + name, values=[param]):
-    param = ops.convert_to_tensor(param, dtype=expected_base_dtype, name=name)
-
-    if param.dtype.base_dtype != expected_base_dtype:
-      raise TypeError(
-          "dtype mismatch; {}.base_dtype=\"{}\" is not \"{}\".".format(
-              name, param.dtype.base_dtype.name, expected_base_dtype.name))
-
     assertions = []
     if param.shape.ndims is not None:
       if param.shape.ndims == 0:
@@ -678,79 +829,84 @@ def maybe_check_mix_param(param, name, expected_base_dtype, validate_args):
     return param
 
 
-def determine_batch_event_shapes(mix_loc, mix_scale, endpoint_affine):
+def determine_batch_event_shapes(grid, endpoint_affine):
   """Helper to infer batch_shape and event_shape."""
   with ops.name_scope(name="determine_batch_event_shapes"):
-    mix_batch_shape = distribution_util.prefer_static_broadcast_shape(
-        array_ops.shape(mix_loc, name="mix_loc_shape"),
-        array_ops.shape(mix_scale, name="mix_scale_shape"))
-    if isinstance(mix_batch_shape, tensor_shape.TensorShape):
-      mix_batch_shape = mix_batch_shape.with_rank_at_least(1)[:-1]
-    else:
-      s = static_value(mix_batch_shape)
-      if s is not None:
-        mix_batch_shape = ops.convert_to_tensor(
-            s[:-1], dtype=dtypes.int32, name="mix_batch_shape")
-      else:
-        mix_batch_shape = mix_batch_shape[:-1]
-
-    # We broadcast with a 1D constant to automatically make the result a
-    # TensorShape if possible.
-    batch_shape = distribution_util.prefer_static_broadcast_shape(
-        mix_batch_shape,
-        constant_op.constant([], dtype=dtypes.int32, name="batch_shape"))
-    event_shape = constant_op.constant(
-        [], dtype=dtypes.int32, name="event_shape")
+    # grid  # shape: [B, k, q]
+    # endpoint_affine     # len=k, shape: [B, d, d]
+    batch_shape = grid.shape[:-2]
+    batch_shape_tensor = array_ops.shape(grid)[:-2]
+    event_shape = None
+    event_shape_tensor = None
+
+    def _set_event_shape(shape, shape_tensor):
+      if event_shape is None:
+        return shape, shape_tensor
+      return (array_ops.broadcast_static_shape(event_shape, shape),
+              array_ops.broadcast_dynamic_shape(
+                  event_shape_tensor, shape_tensor))
+
     for aff in endpoint_affine:
-      b, e = distribution_util.shapes_from_loc_and_scale(aff.shift, aff.scale)
-      if batch_shape is None:
-        batch_shape = distribution_util.prefer_static_broadcast_shape(
-            mix_batch_shape, b)
-      else:
-        batch_shape = distribution_util.prefer_static_broadcast_shape(
-            batch_shape, b)
-      event_shape = distribution_util.prefer_static_broadcast_shape(
-          event_shape, e)
-    if isinstance(batch_shape, tensor_shape.TensorShape):
-      batch_shape = ops.convert_to_tensor(
-          batch_shape.as_list(), dtype=dtypes.int32, name="batch_shape")
-    if isinstance(event_shape, tensor_shape.TensorShape):
-      event_shape = ops.convert_to_tensor(
-          event_shape.as_list(), dtype=dtypes.int32, name="event_shape")
-    return batch_shape, event_shape
-
-
-def interpolate_loc(deg, interpolate_weight, loc):
+      if aff.shift is not None:
+        batch_shape = array_ops.broadcast_static_shape(
+            batch_shape, aff.shift.shape[:-1])
+        batch_shape_tensor = array_ops.broadcast_dynamic_shape(
+            batch_shape_tensor, array_ops.shape(aff.shift)[:-1])
+        event_shape, event_shape_tensor = _set_event_shape(
+            aff.shift.shape[-1:], array_ops.shape(aff.shift)[-1:])
+
+      if aff.scale is not None:
+        batch_shape = array_ops.broadcast_static_shape(
+            batch_shape, aff.scale.batch_shape)
+        batch_shape_tensor = array_ops.broadcast_dynamic_shape(
+            batch_shape_tensor, aff.scale.batch_shape_tensor())
+        event_shape, event_shape_tensor = _set_event_shape(
+            tensor_shape.TensorShape([aff.scale.range_dimension]),
+            aff.scale.range_dimension_tensor()[array_ops.newaxis])
+
+    return batch_shape, batch_shape_tensor, event_shape, event_shape_tensor
+
+
+def interpolate_loc(grid, loc):
   """Helper which interpolates between two locs."""
   if len(loc) != 2:
     raise NotImplementedError("Currently only bimixtures are supported; "
                               "len(scale)={} is not 2.".format(len(loc)))
-  with ops.name_scope("interpolate_loc", values=[interpolate_weight, loc]):
+  deg = grid.shape.with_rank_at_least(1)[-1].value
+  if deg is None:
+    raise ValueError("Num quadrature grid points must be known prior "
+                     "to graph execution.")
+  with ops.name_scope("interpolate_loc", values=[grid, loc]):
     if loc is None or loc[0] is None and loc[1] is None:
       return [None]*deg
-    w = interpolate_weight[..., array_ops.newaxis, :]  # shape: [B, 1, deg]
+    # shape: [B, 1, k, deg]
+    w = grid[..., array_ops.newaxis, :, :]
     loc = [x[..., array_ops.newaxis]                   # shape: [B, e, 1]
            if x is not None else None for x in loc]
     if loc[0] is None:
-      x = (1. - w) * loc[1]                            # shape: [B, e, deg]
+      x = w[..., 1, :] * loc[1]                        # shape: [B, e, deg]
     elif loc[1] is None:
-      x = w * loc[0]                                   # shape: [B, e, deg]
+      x = w[..., 0, :] * loc[0]                        # shape: [B, e, deg]
     else:
       delta = loc[0] - loc[1]
-      x = w * delta + loc[1]                           # shape: [B, e, deg]
+      x = w[..., 0, :] * delta + loc[1]                # shape: [B, e, deg]
     return [x[..., k] for k in range(deg)]             # list(shape:[B, e])
 
 
-def interpolate_scale(deg, interpolate_weight, scale):
+def interpolate_scale(grid, scale):
   """Helper which interpolates between two scales."""
   if len(scale) != 2:
     raise NotImplementedError("Currently only bimixtures are supported; "
                               "len(scale)={} is not 2.".format(len(scale)))
-  with ops.name_scope("interpolate_scale", values=[interpolate_weight]):
+  deg = grid.shape.with_rank_at_least(1)[-1].value
+  if deg is None:
+    raise ValueError("Num quadrature grid points must be known prior "
+                     "to graph execution.")
+  with ops.name_scope("interpolate_scale", values=[grid]):
     return [linop_add_lib.add_operators([
-        linop_scale(interpolate_weight[..., k], scale[0]),
-        linop_scale(1. - interpolate_weight[..., k], scale[1]),
-    ])[0] for k in range(deg)]
+        linop_scale(grid[..., k, q], s)
+        for k, s in enumerate(scale)
+    ])[0] for q in range(deg)]
 
 
 def linop_scale(w, op):
@@ -790,39 +946,12 @@ def linop_scale(w, op):
 
 def concat_vectors(*args):
   """Concatenates input vectors, statically if possible."""
-  args_ = [static_value(x) for x in args]
+  args_ = [distribution_util.static_value(x) for x in args]
   if any(vec is None for vec in args_):
     return array_ops.concat(args, axis=0)
   return [val for vec in args_ for val in vec]
 
 
-def reduce_prod(x):
-  """Same as `math_ops.reduce_prod` but statically if possible."""
-  x_ = static_value(x)
-  if x_ is not None:
-    return np.prod(x_, dtype=x.dtype.as_numpy_dtype)
-  return array_ops.reduce_prod(x)
-
-
-def ndims_from_shape(shape):
-  """Returns `Tensor`'s `rank` implied by a `Tensor` shape."""
-  if shape.shape.ndims not in (None, 1):
-    raise ValueError("input is not a valid shape: not 1D")
-  if not shape.dtype.is_integer:
-    raise TypeError("input is not a valid shape: wrong dtype")
-  if shape.shape.is_fully_defined():
-    return shape.shape.as_list()[0]
-  return array_ops.shape(shape)[0]
-
-
-def ndims(x):
-  """Returns rank, statically if possible."""
-  x = ops.convert_to_tensor(x)
-  if x.shape.ndims is not None:
-    return x.shape.ndims
-  return array_ops.rank(x)
-
-
 def add(x, y):
   """Adds inputs; interprets `None` as zero."""
   if x is None:
@@ -835,3 +964,18 @@ def add(x, y):
 def vec_osquare(x):
   """Computes the outer-product of a (batch of) vector, i.e., x.T x."""
   return x[..., :, array_ops.newaxis] * x[..., array_ops.newaxis, :]
+
+
+def softmax(x, axis, name=None):
+  """Equivalent to tf.nn.softmax but works around b/70297725."""
+  with ops.name_scope(name, "softmax", [x, axis]):
+    x = ops.convert_to_tensor(x, name="x")
+    ndims = (x.shape.ndims if x.shape.ndims is not None
+             else array_ops.rank(x, name="ndims"))
+    axis = ops.convert_to_tensor(axis, dtype=dtypes.int32, name="axis")
+    axis_ = tensor_util.constant_value(axis)
+    if axis_ is not None:
+      axis = np.int(ndims + axis_ if axis_ < 0 else axis_)
+    else:
+      axis = array_ops.where(axis < 0, ndims + axis, axis)
+  return nn_ops.softmax(x, axis=axis)
diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py
index baca477eb7..5bc25128a8 100644
--- a/tensorflow/python/ops/distributions/util.py
+++ b/tensorflow/python/ops/distributions/util.py
@@ -1134,8 +1134,8 @@ def dimension_size(x, axis):
   """Returns the size of a specific dimension."""
   # Since tf.gather isn't "constant-in, constant-out", we must first check the
   # static shape or fallback to dynamic shape.
-  s = x.shape.with_rank_at_least(axis + 1)[axis].value
-  if axis > -1 and s is not None:
+  s = x.shape.with_rank_at_least(np.abs(axis))[axis].value
+  if s is not None:
     return s
   return array_ops.shape(x)[axis]
 
@@ -1183,28 +1183,100 @@ def process_quadrature_grid_and_probs(
     probs /= linalg_ops.norm(probs, ord=1, axis=-1, keep_dims=True,
                              name="probs")
 
-    def _static_dim_size(x, axis):
+    def _static_event_size(x):
       """Returns the static size of a specific dimension or `None`."""
-      return x.shape.with_rank_at_least(axis + 1)[axis].value
+      return x.shape.with_rank_at_least(1)[-1].value
 
-    m, n = _static_dim_size(probs, axis=0), _static_dim_size(grid, axis=0)
+    m, n = _static_event_size(probs), _static_event_size(grid)
     if m is not None and n is not None:
       if m != n:
         raise ValueError("`quadrature_grid_and_probs` must be a `tuple` of "
                          "same-length zero-th-dimension `Tensor`s "
                          "(saw lengths {}, {})".format(m, n))
     elif validate_args:
-      grid = control_flow_ops.with_dependencies([
+      assertions = [
           check_ops.assert_equal(
-              dimension_size(probs, axis=0),
-              dimension_size(grid, axis=0),
+              dimension_size(probs, axis=-1),
+              dimension_size(grid, axis=-1),
               message=("`quadrature_grid_and_probs` must be a `tuple` of "
                        "same-length zero-th-dimension `Tensor`s")),
-      ], grid)
-
+      ]
+      with ops.control_dependencies(assertions):
+        grid = array_ops.identity(grid)
+        probs = array_ops.identity(probs)
     return grid, probs
 
 
+def pad(x, axis, front=False, back=False, value=0, count=1, name=None):
+  """Pads `value` to the front and/or back of a `Tensor` dim, `count` times.
+
+  Args:
+    x: `Tensor` input.
+    axis: Scalar `int`-like `Tensor` representing the single dimension to pad.
+      (Negative indexing is supported.)
+    front: Python `bool`; if `True` the beginning of the `axis` dimension is
+      padded with `value`, `count` times. If `False` no front padding is made.
+    back: Python `bool`; if `True` the end of the `axis` dimension is
+      padded with `value`, `count` times. If `False` no end padding is made.
+    value: Scalar `int`-like `Tensor` representing the actual value added to the
+      front and/or back of the `axis` dimension of `x`.
+    count: Scalar `int`-like `Tensor` representing number of elements added to
+      the front and/or back of the `axis` dimension of `x`. E.g., if
+      `front = back = True` then `2 * count` elements are added.
+    name: Python `str` name prefixed to Ops created by this function.
+
+  Returns:
+    pad: The padded version of input `x`.
+
+  Raises:
+    ValueError: if both `front` and `back` are `False`.
+    TypeError: if `count` is not `int`-like.
+  """
+  with ops.name_scope(name, "pad", [x, value, count]):
+    x = ops.convert_to_tensor(x, name="x")
+    value = ops.convert_to_tensor(value, dtype=x.dtype, name="value")
+    count = ops.convert_to_tensor(count, name="count")
+    if not count.dtype.is_integer:
+      raise TypeError("`count.dtype` (`{}`) must be `int`-like.".format(
+          count.dtype.name))
+    if not front and not back:
+      raise ValueError("At least one of `front`, `back` must be `True`.")
+    ndims = (x.shape.ndims if x.shape.ndims is not None
+             else array_ops.rank(x, name="ndims"))
+    axis = ops.convert_to_tensor(axis, name="axis")
+    axis_ = tensor_util.constant_value(axis)
+    if axis_ is not None:
+      axis = axis_
+      if axis < 0:
+        axis = ndims + axis
+      count_ = tensor_util.constant_value(count)
+      if axis_ >= 0 or x.shape.ndims is not None:
+        head = x.shape[:axis]
+        middle = tensor_shape.TensorShape(
+            None if count_ is None
+            else (x.shape[axis] + count_ * (front + back)))
+        tail = x.shape[axis+1:]
+        final_shape = head.concatenate(middle.concatenate(tail))
+      else:
+        final_shape = None
+    else:
+      axis = array_ops.where(axis < 0, ndims + axis, axis)
+      final_shape = None
+    x = array_ops.pad(
+        x,
+        paddings=array_ops.one_hot(
+            indices=array_ops.stack([axis if front else -1,
+                                     axis if back else -1]),
+            depth=ndims,
+            axis=0,
+            on_value=count,
+            dtype=dtypes.int32),
+        constant_values=value)
+    if final_shape is not None:
+      x.set_shape(final_shape)
+    return x
+
+
 class AppendDocstring(object):
   """Helper class to promote private subclass docstring to public counterpart.
 
-- 
GitLab


From d06ebf2ae4e37ae74317bc364d5605994169fc50 Mon Sep 17 00:00:00 2001
From: Asim Shankar <asimshankar@gmail.com>
Date: Fri, 15 Dec 2017 13:51:03 -0800
Subject: [PATCH 1092/1225] [Go]: Make op wrapper generation more robust.
 (#15353)

- Since Go 1.8, GOPATH has a default value, so handle
  that (https://golang.org/doc/go1.8#gopath)
- generate.sh expected bash (for the string substitution syntax)
  while 'sh' may point to another shell. So explicitly require bash.
---
 tensorflow/go/genop/generate.sh | 5 +++++
 tensorflow/go/genop/main.go     | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/go/genop/generate.sh b/tensorflow/go/genop/generate.sh
index 01fcfb9058..a894c87c27 100644
--- a/tensorflow/go/genop/generate.sh
+++ b/tensorflow/go/genop/generate.sh
@@ -19,6 +19,11 @@ set -e
 go get github.com/golang/protobuf/proto
 go get github.com/golang/protobuf/protoc-gen-go
 
+if [ -z "${GOPATH}" ]
+then
+  GOPATH=$(go env GOPATH)
+fi
+
 cd $(dirname $0)
 for g in $(echo "${GOPATH//:/ }"); do
     TF_DIR="${g}/src/github.com/tensorflow/tensorflow"
diff --git a/tensorflow/go/genop/main.go b/tensorflow/go/genop/main.go
index b6f8e2d5a8..0c7d9be5c1 100644
--- a/tensorflow/go/genop/main.go
+++ b/tensorflow/go/genop/main.go
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-//go:generate sh generate.sh
+//go:generate bash generate.sh
 
 // Command genop generates a Go source file with functions for TensorFlow ops.
 package main
-- 
GitLab


From c35a5425b9657c94c0229c128e2306fdec4c013f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 13:54:09 -0800
Subject: [PATCH 1093/1225] internal change

PiperOrigin-RevId: 179235588
---
 .../lite/toco/graph_transformations/propagate_fixed_sizes.cc   | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index 0760182a2e..5a95b9961f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -949,7 +949,10 @@ void ProcessArgMaxOperator(Model* model, ArgMaxOperator* op) {
     return;
   }
 
+  // The current ArgMax implementation only supports 4-dimensional inputs with
+  // the last dimension as the axis to perform ArgMax for.
   const std::vector<int>& input_dims = input_array.shape().dims();
+  CHECK_EQ(input_dims.size(), 4);
   std::vector<int> output_dims;
 
   output_dims.reserve(input_dims.size() - 1);
-- 
GitLab


From 7f8e7437693d1051fa378047ae9ee75f91201cb5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 14:36:32 -0800
Subject: [PATCH 1094/1225] Adds DNN + GBDT combined estimators to
 tensorflow/contrib/boosted_trees/estimator_batch/combined/.

PiperOrigin-RevId: 179241340
---
 tensorflow/contrib/boosted_trees/BUILD        |   1 +
 .../boosted_trees/estimator_batch/BUILD       |  31 ++
 .../dnn_tree_combined_estimator.py            | 515 ++++++++++++++++++
 .../dnn_tree_combined_estimator_test.py       | 105 ++++
 .../estimator_batch/trainer_hooks.py          |  38 ++
 .../boosted_trees/examples/boston_combined.py | 165 ++++++
 6 files changed, 855 insertions(+)
 create mode 100644 tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py
 create mode 100644 tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
 create mode 100644 tensorflow/contrib/boosted_trees/examples/boston_combined.py

diff --git a/tensorflow/contrib/boosted_trees/BUILD b/tensorflow/contrib/boosted_trees/BUILD
index 7072f56420..392ac7fa1c 100644
--- a/tensorflow/contrib/boosted_trees/BUILD
+++ b/tensorflow/contrib/boosted_trees/BUILD
@@ -601,6 +601,7 @@ py_library(
         ":init_py",
         "//tensorflow/contrib/boosted_trees:gbdt_batch",
         "//tensorflow/contrib/boosted_trees/estimator_batch:custom_export_strategy",
+        "//tensorflow/contrib/boosted_trees/estimator_batch:dnn_tree_combined_estimator",
         "//tensorflow/contrib/boosted_trees/estimator_batch:init_py",
         "//tensorflow/contrib/boosted_trees/estimator_batch:trainer_hooks",
         "//tensorflow/contrib/boosted_trees/lib:categorical_split_handler",
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
index 7792c7127c..48084d8016 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
@@ -50,6 +50,7 @@ py_library(
     deps = [
         "//tensorflow/contrib/learn",
         "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:training",
@@ -129,3 +130,33 @@ py_library(
         "//tensorflow/python:math_ops",
     ],
 )
+
+py_library(
+    name = "dnn_tree_combined_estimator",
+    srcs = ["dnn_tree_combined_estimator.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":trainer_hooks",
+        "//tensorflow/contrib/boosted_trees:gbdt_batch",
+        "//tensorflow/contrib/boosted_trees:model_ops_py",
+        "//tensorflow/contrib/learn",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
+    ],
+)
+
+py_test(
+    name = "dnn_tree_combined_estimator_test",
+    size = "small",
+    srcs = ["dnn_tree_combined_estimator_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":dnn_tree_combined_estimator",
+        "//tensorflow/contrib/boosted_trees:gbdt_batch",
+        "//tensorflow/contrib/layers:layers_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+    ],
+)
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py
new file mode 100644
index 0000000000..cec3892b57
--- /dev/null
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py
@@ -0,0 +1,515 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TensorFlow estimators for combined DNN + GBDT training model.
+
+The combined model trains a DNN first, then trains boosted trees to boost the
+logits of the DNN. The input layer of the DNN (including the embeddings learned
+over sparse features) can optionally be provided to the boosted trees as
+an additional input feature.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import six
+
+from tensorflow.contrib import layers
+from tensorflow.contrib.boosted_trees.estimator_batch import trainer_hooks
+from tensorflow.contrib.boosted_trees.python.ops import model_ops
+from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch
+from tensorflow.contrib.layers.python.layers import optimizers
+from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
+from tensorflow.contrib.learn.python.learn.estimators import model_fn
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.summary import summary
+from tensorflow.python.training import training_util
+
+
+_DNN_LEARNING_RATE = 0.001
+
+
+def _get_optimizer(optimizer):
+  if callable(optimizer):
+    return optimizer()
+  else:
+    return optimizer
+
+
+def _add_hidden_layer_summary(value, tag):
+  summary.scalar("%s_fraction_of_zero_values" % tag, nn.zero_fraction(value))
+  summary.histogram("%s_activation" % tag, value)
+
+
+def _dnn_tree_combined_model_fn(
+    features, labels, mode, head, dnn_hidden_units,
+    dnn_feature_columns, tree_learner_config, num_trees,
+    tree_examples_per_layer,
+    config=None, dnn_optimizer="Adagrad",
+    dnn_activation_fn=nn.relu, dnn_dropout=None,
+    dnn_input_layer_partitioner=None,
+    dnn_input_layer_to_tree=True, dnn_steps_to_train=10000,
+    tree_feature_columns=None,
+    tree_center_bias=True):
+  """DNN and GBDT combined model_fn.
+
+  Args:
+    features: `dict` of `Tensor` objects.
+    labels: Labels used to train on.
+    mode: Mode we are in. (TRAIN/EVAL/INFER)
+    head: A `Head` instance.
+    dnn_hidden_units: List of hidden units per layer.
+    dnn_feature_columns: An iterable containing all the feature columns
+      used by the model's DNN.
+    tree_learner_config: A config for the tree learner.
+    num_trees: Number of trees to grow model to after training DNN.
+    tree_examples_per_layer: Number of examples to accumulate before
+      growing the tree a layer. This value has a big impact on model
+      quality and should be set equal to the number of examples in
+      training dataset if possible. It can also be a function that computes
+      the number of examples based on the depth of the layer that's
+      being built.
+    config: `RunConfig` of the estimator.
+    dnn_optimizer: string, `Optimizer` object, or callable that defines the
+      optimizer to use for training the DNN. If `None`, will use the Adagrad
+      optimizer with default learning rate of 0.001.
+    dnn_activation_fn: Activation function applied to each layer of the DNN.
+      If `None`, will use `tf.nn.relu`.
+    dnn_dropout: When not `None`, the probability to drop out a given
+      unit in the DNN.
+    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
+      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
+    as a feature to the tree.
+    dnn_steps_to_train: Number of steps to train dnn for before switching
+      to gbdt.
+    tree_feature_columns: An iterable containing all the feature columns
+      used by the model's boosted trees. If dnn_input_layer_to_tree is
+      set to True, these features are in addition to dnn_feature_columns.
+    tree_center_bias: Whether a separate tree should be created for
+      first fitting the bias.
+
+  Returns:
+    A `ModelFnOps` object.
+  Raises:
+    ValueError: if inputs are not valid.
+  """
+  if not isinstance(features, dict):
+    raise ValueError("features should be a dictionary of `Tensor`s. "
+                     "Given type: {}".format(type(features)))
+
+  if not dnn_feature_columns:
+    raise ValueError("dnn_feature_columns must be specified")
+
+  # Build DNN Logits.
+  dnn_parent_scope = "dnn"
+  dnn_partitioner = dnn_input_layer_partitioner or (
+      partitioned_variables.min_max_variable_partitioner(
+          max_partitions=config.num_ps_replicas,
+          min_slice_size=64 << 20))
+
+  with variable_scope.variable_scope(
+      dnn_parent_scope,
+      values=tuple(six.itervalues(features)),
+      partitioner=dnn_partitioner):
+
+    with variable_scope.variable_scope(
+        "input_from_feature_columns",
+        values=tuple(six.itervalues(features)),
+        partitioner=dnn_partitioner) as input_layer_scope:
+      input_layer = layers.input_from_feature_columns(
+          columns_to_tensors=features,
+          feature_columns=dnn_feature_columns,
+          weight_collections=[dnn_parent_scope],
+          scope=input_layer_scope)
+    previous_layer = input_layer
+    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
+      with variable_scope.variable_scope(
+          "hiddenlayer_%d" % layer_id,
+          values=(previous_layer,)) as hidden_layer_scope:
+        net = layers.fully_connected(
+            previous_layer,
+            num_hidden_units,
+            activation_fn=dnn_activation_fn,
+            variables_collections=[dnn_parent_scope],
+            scope=hidden_layer_scope)
+        if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
+          net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
+      _add_hidden_layer_summary(net, hidden_layer_scope.name)
+      previous_layer = net
+    with variable_scope.variable_scope(
+        "logits",
+        values=(previous_layer,)) as logits_scope:
+      dnn_logits = layers.fully_connected(
+          previous_layer,
+          head.logits_dimension,
+          activation_fn=None,
+          variables_collections=[dnn_parent_scope],
+          scope=logits_scope)
+    _add_hidden_layer_summary(dnn_logits, logits_scope.name)
+
+    def _dnn_train_op_fn(loss):
+      """Returns the op to optimize the loss."""
+      return optimizers.optimize_loss(
+          loss=loss,
+          global_step=training_util.get_global_step(),
+          learning_rate=_DNN_LEARNING_RATE,
+          optimizer=_get_optimizer(dnn_optimizer),
+          name=dnn_parent_scope,
+          variables=ops.get_collection(
+              ops.GraphKeys.TRAINABLE_VARIABLES,
+              scope=dnn_parent_scope),
+          # Empty summaries to prevent optimizers from logging training_loss.
+          summaries=[])
+
+  # Build Tree Logits.
+  global_step = training_util.get_global_step()
+  with ops.device(global_step.device):
+    ensemble_handle = model_ops.tree_ensemble_variable(
+        stamp_token=0,
+        tree_ensemble_config="",  # Initialize an empty ensemble.
+        name="ensemble_model")
+
+  tree_features = features.copy()
+  if dnn_input_layer_to_tree:
+    tree_features["dnn_input_layer"] = input_layer
+    tree_feature_columns.append(layers.real_valued_column("dnn_input_layer"))
+  gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
+      is_chief=config.is_chief,
+      num_ps_replicas=config.num_ps_replicas,
+      ensemble_handle=ensemble_handle,
+      center_bias=tree_center_bias,
+      examples_per_layer=tree_examples_per_layer,
+      learner_config=tree_learner_config,
+      feature_columns=tree_feature_columns,
+      logits_dimension=head.logits_dimension,
+      features=tree_features)
+
+  with ops.name_scope("gbdt"):
+    predictions_dict = gbdt_model.predict(mode)
+    tree_logits = predictions_dict["predictions"]
+
+    def _tree_train_op_fn(loss):
+      """Returns the op to optimize the loss."""
+      update_op = gbdt_model.train(loss, predictions_dict, labels)
+      with ops.control_dependencies(
+          [update_op]), (ops.colocate_with(global_step)):
+        update_op = state_ops.assign_add(global_step, 1).op
+        return update_op
+
+  tree_train_logits = dnn_logits + tree_logits
+
+  def _no_train_op_fn(loss):
+    """Returns a no-op."""
+    del loss
+    return control_flow_ops.no_op()
+
+  model_fn_ops = head.create_model_fn_ops(
+      features=features,
+      mode=mode,
+      labels=labels,
+      train_op_fn=_no_train_op_fn,
+      logits=tree_train_logits)
+  dnn_train_op = head.create_model_fn_ops(
+      features=features,
+      mode=mode,
+      labels=labels,
+      train_op_fn=_dnn_train_op_fn,
+      logits=dnn_logits).train_op
+  tree_train_op = head.create_model_fn_ops(
+      features=tree_features,
+      mode=mode,
+      labels=labels,
+      train_op_fn=_tree_train_op_fn,
+      logits=tree_train_logits).train_op
+
+  if tree_center_bias:
+    num_trees += 1
+  finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()
+
+  model_fn_ops.training_hooks.extend([
+      trainer_hooks.SwitchTrainOp(
+          dnn_train_op, dnn_steps_to_train, tree_train_op),
+      trainer_hooks.StopAfterNTrees(
+          num_trees, attempted_trees, finalized_trees)])
+
+  return model_fn_ops
+
+
+class DNNBoostedTreeCombinedClassifier(estimator.Estimator):
+  """A classifier that uses a combined DNN/GBDT model."""
+
+  def __init__(self,
+               dnn_hidden_units,
+               dnn_feature_columns,
+               tree_learner_config,
+               num_trees,
+               tree_examples_per_layer,
+               n_classes=2,
+               weight_column_name=None,
+               model_dir=None,
+               config=None,
+               label_name=None,
+               label_keys=None,
+               feature_engineering_fn=None,
+               dnn_optimizer="Adagrad",
+               dnn_activation_fn=nn.relu,
+               dnn_dropout=None,
+               dnn_input_layer_partitioner=None,
+               dnn_input_layer_to_tree=True,
+               dnn_steps_to_train=10000,
+               tree_feature_columns=None,
+               tree_center_bias=True):
+    """Initializes a DNNBoostedTreeCombinedClassifier instance.
+
+    Args:
+      dnn_hidden_units: List of hidden units per layer for DNN.
+      dnn_feature_columns: An iterable containing all the feature columns
+        used by the model's DNN.
+      tree_learner_config: A config for the tree learner.
+      num_trees: Number of trees to grow model to after training DNN.
+      tree_examples_per_layer: Number of examples to accumulate before
+        growing the tree a layer. This value has a big impact on model
+        quality and should be set equal to the number of examples in
+        training dataset if possible. It can also be a function that computes
+        the number of examples based on the depth of the layer that's
+        being built.
+      n_classes: The number of label classes.
+      weight_column_name: The name of weight column.
+      model_dir: Directory for model exports.
+      config: `RunConfig` of the estimator.
+      label_name: String, name of the key in label dict. Can be null if label
+        is a tensor (single headed models).
+      label_keys: Optional list of strings with size `[n_classes]` defining the
+        label vocabulary. Only supported for `n_classes` > 2.
+      feature_engineering_fn: Feature engineering function. Takes features and
+        labels which are the output of `input_fn` and returns features and
+        labels which will be fed into the model.
+      dnn_optimizer: string, `Optimizer` object, or callable that defines the
+        optimizer to use for training the DNN. If `None`, will use the Adagrad
+        optimizer with default learning rate.
+      dnn_activation_fn: Activation function applied to each layer of the DNN.
+        If `None`, will use `tf.nn.relu`.
+      dnn_dropout: When not `None`, the probability to drop out a given
+        unit in the DNN.
+      dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
+        Defaults to `min_max_variable_partitioner` with `min_slice_size`
+        64 << 20.
+      dnn_input_layer_to_tree: Whether to provide the DNN's input layer
+      as a feature to the tree.
+      dnn_steps_to_train: Number of steps to train dnn for before switching
+        to gbdt.
+      tree_feature_columns: An iterable containing all the feature columns
+        used by the model's boosted trees. If dnn_input_layer_to_tree is
+        set to True, these features are in addition to dnn_feature_columns.
+      tree_center_bias: Whether a separate tree should be created for
+        first fitting the bias.
+    """
+    head = head_lib.multi_class_head(
+        n_classes=n_classes,
+        label_name=label_name,
+        label_keys=label_keys,
+        weight_column_name=weight_column_name,
+        enable_centered_bias=False)
+
+    def _model_fn(features, labels, mode, config):
+      return _dnn_tree_combined_model_fn(
+          features, labels, mode, head, dnn_hidden_units, dnn_feature_columns,
+          tree_learner_config, num_trees, tree_examples_per_layer, config,
+          dnn_optimizer, dnn_activation_fn, dnn_dropout,
+          dnn_input_layer_partitioner, dnn_input_layer_to_tree,
+          dnn_steps_to_train,
+          tree_feature_columns, tree_center_bias)
+
+    super(DNNBoostedTreeCombinedClassifier, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir,
+        config=config, feature_engineering_fn=feature_engineering_fn)
+
+
+class DNNBoostedTreeCombinedRegressor(estimator.Estimator):
+  """A regressor that uses a combined DNN/GBDT model."""
+
+  def __init__(self,
+               dnn_hidden_units,
+               dnn_feature_columns,
+               tree_learner_config,
+               num_trees,
+               tree_examples_per_layer,
+               weight_column_name=None,
+               model_dir=None,
+               config=None,
+               label_name=None,
+               label_dimension=1,
+               feature_engineering_fn=None,
+               dnn_optimizer="Adagrad",
+               dnn_activation_fn=nn.relu,
+               dnn_dropout=None,
+               dnn_input_layer_partitioner=None,
+               dnn_input_layer_to_tree=True,
+               dnn_steps_to_train=10000,
+               tree_feature_columns=None,
+               tree_center_bias=True):
+    """Initializes a DNNBoostedTreeCombinedRegressor instance.
+
+    Args:
+      dnn_hidden_units: List of hidden units per layer for DNN.
+      dnn_feature_columns: An iterable containing all the feature columns
+        used by the model's DNN.
+      tree_learner_config: A config for the tree learner.
+      num_trees: Number of trees to grow model to after training DNN.
+      tree_examples_per_layer: Number of examples to accumulate before
+        growing the tree a layer. This value has a big impact on model
+        quality and should be set equal to the number of examples in
+        training dataset if possible. It can also be a function that computes
+        the number of examples based on the depth of the layer that's
+        being built.
+      weight_column_name: The name of weight column.
+      model_dir: Directory for model exports.
+      config: `RunConfig` of the estimator.
+      label_name: String, name of the key in label dict. Can be null if label
+        is a tensor (single headed models).
+      label_dimension: Number of regression labels per example. This is the size
+        of the last dimension of the labels `Tensor` (typically, this has shape
+        `[batch_size, label_dimension]`).
+      feature_engineering_fn: Feature engineering function. Takes features and
+        labels which are the output of `input_fn` and returns features and
+        labels which will be fed into the model.
+      dnn_optimizer: string, `Optimizer` object, or callable that defines the
+        optimizer to use for training the DNN. If `None`, will use the Adagrad
+        optimizer with default learning rate.
+      dnn_activation_fn: Activation function applied to each layer of the DNN.
+        If `None`, will use `tf.nn.relu`.
+      dnn_dropout: When not `None`, the probability to drop out a given
+        unit in the DNN.
+      dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
+        Defaults to `min_max_variable_partitioner` with `min_slice_size`
+        64 << 20.
+      dnn_input_layer_to_tree: Whether to provide the DNN's input layer
+      as a feature to the tree.
+      dnn_steps_to_train: Number of steps to train dnn for before switching
+        to gbdt.
+      tree_feature_columns: An iterable containing all the feature columns
+        used by the model's boosted trees. If dnn_input_layer_to_tree is
+        set to True, these features are in addition to dnn_feature_columns.
+      tree_center_bias: Whether a separate tree should be created for
+        first fitting the bias.
+    """
+    head = head_lib.regression_head(
+        label_name=label_name,
+        label_dimension=label_dimension,
+        weight_column_name=weight_column_name,
+        enable_centered_bias=False)
+
+    # num_classes needed for GradientBoostedDecisionTreeModel
+    if label_dimension == 1:
+      tree_learner_config.num_classes = 2
+    else:
+      tree_learner_config.num_classes = label_dimension
+
+    def _model_fn(features, labels, mode, config):
+      return _dnn_tree_combined_model_fn(
+          features, labels, mode, head, dnn_hidden_units, dnn_feature_columns,
+          tree_learner_config, num_trees, tree_examples_per_layer, config,
+          dnn_optimizer, dnn_activation_fn, dnn_dropout,
+          dnn_input_layer_partitioner, dnn_input_layer_to_tree,
+          dnn_steps_to_train, tree_feature_columns, tree_center_bias)
+
+    super(DNNBoostedTreeCombinedRegressor, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir,
+        config=config, feature_engineering_fn=feature_engineering_fn)
+
+
+class DNNBoostedTreeCombinedEstimator(estimator.Estimator):
+  """An estimator that uses a combined DNN/GBDT model.
+
+  Useful for training with user specified `Head`.
+  """
+
+  def __init__(self,
+               dnn_hidden_units,
+               dnn_feature_columns,
+               tree_learner_config,
+               num_trees,
+               tree_examples_per_layer,
+               head,
+               model_dir=None,
+               config=None,
+               feature_engineering_fn=None,
+               dnn_optimizer="Adagrad",
+               dnn_activation_fn=nn.relu,
+               dnn_dropout=None,
+               dnn_input_layer_partitioner=None,
+               dnn_input_layer_to_tree=True,
+               dnn_steps_to_train=10000,
+               tree_feature_columns=None,
+               tree_center_bias=True):
+    """Initializes a DNNBoostedTreeCombinedEstimator instance.
+
+    Args:
+      dnn_hidden_units: List of hidden units per layer for DNN.
+      dnn_feature_columns: An iterable containing all the feature columns
+        used by the model's DNN.
+      tree_learner_config: A config for the tree learner.
+      num_trees: Number of trees to grow model to after training DNN.
+      tree_examples_per_layer: Number of examples to accumulate before
+        growing the tree a layer. This value has a big impact on model
+        quality and should be set equal to the number of examples in
+        training dataset if possible. It can also be a function that computes
+        the number of examples based on the depth of the layer that's
+        being built.
+      head: `Head` instance.
+      model_dir: Directory for model exports.
+      config: `RunConfig` of the estimator.
+      feature_engineering_fn: Feature engineering function. Takes features and
+        labels which are the output of `input_fn` and returns features and
+        labels which will be fed into the model.
+      dnn_optimizer: string, `Optimizer` object, or callable that defines the
+        optimizer to use for training the DNN. If `None`, will use the Adagrad
+        optimizer with default learning rate.
+      dnn_activation_fn: Activation function applied to each layer of the DNN.
+        If `None`, will use `tf.nn.relu`.
+      dnn_dropout: When not `None`, the probability to drop out a given
+        unit in the DNN.
+      dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
+        Defaults to `min_max_variable_partitioner` with `min_slice_size`
+        64 << 20.
+      dnn_input_layer_to_tree: Whether to provide the DNN's input layer
+      as a feature to the tree.
+      dnn_steps_to_train: Number of steps to train dnn for before switching
+        to gbdt.
+      tree_feature_columns: An iterable containing all the feature columns
+        used by the model's boosted trees. If dnn_input_layer_to_tree is
+        set to True, these features are in addition to dnn_feature_columns.
+      tree_center_bias: Whether a separate tree should be created for
+        first fitting the bias.
+    """
+    def _model_fn(features, labels, mode, config):
+      return _dnn_tree_combined_model_fn(
+          features, labels, mode, head, dnn_hidden_units, dnn_feature_columns,
+          tree_learner_config, num_trees, tree_examples_per_layer, config,
+          dnn_optimizer, dnn_activation_fn, dnn_dropout,
+          dnn_input_layer_partitioner, dnn_input_layer_to_tree,
+          dnn_steps_to_train,
+          tree_feature_columns, tree_center_bias)
+
+    super(DNNBoostedTreeCombinedEstimator, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir,
+        config=config, feature_engineering_fn=feature_engineering_fn)
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
new file mode 100644
index 0000000000..83d58c5610
--- /dev/null
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
@@ -0,0 +1,105 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for combined DNN + GBDT estimators."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tempfile
+
+from tensorflow.contrib.boosted_trees.estimator_batch import dnn_tree_combined_estimator as estimator
+from tensorflow.contrib.boosted_trees.proto import learner_pb2
+from tensorflow.contrib.layers.python.layers import feature_column
+from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils
+from tensorflow.contrib.learn.python.learn.estimators import run_config
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import googletest
+
+
+def _train_input_fn():
+  features = {
+      "x": constant_op.constant([[2.], [1.], [1.]])
+  }
+  label = constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
+  return features, label
+
+
+def _eval_input_fn():
+  features = {
+      "x": constant_op.constant([[1.], [2.], [2.]])
+  }
+  label = constant_op.constant([[0], [1], [1]], dtype=dtypes.int32)
+  return features, label
+
+
+class DNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase):
+
+  def testClassifierContract(self):
+    estimator_test_utils.assert_estimator_contract(
+        self, estimator.DNNBoostedTreeCombinedClassifier)
+
+  def testRegressorContract(self):
+    estimator_test_utils.assert_estimator_contract(
+        self, estimator.DNNBoostedTreeCombinedRegressor)
+
+  def testEstimatorContract(self):
+    estimator_test_utils.assert_estimator_contract(
+        self, estimator.DNNBoostedTreeCombinedEstimator)
+
+  def testNoDNNFeatureColumns(self):
+    learner_config = learner_pb2.LearnerConfig()
+    learner_config.num_classes = 2
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        "dnn_feature_columns must be specified"):
+      classifier = estimator.DNNBoostedTreeCombinedClassifier(
+          dnn_hidden_units=[1],
+          dnn_feature_columns=[],
+          tree_learner_config=learner_config,
+          num_trees=1,
+          tree_examples_per_layer=3,
+          n_classes=2)
+      classifier.fit(input_fn=_train_input_fn, steps=5)
+
+  def testFitAndEvaluateDontThrowException(self):
+    learner_config = learner_pb2.LearnerConfig()
+    learner_config.num_classes = 2
+    learner_config.constraints.max_tree_depth = 1
+    model_dir = tempfile.mkdtemp()
+    config = run_config.RunConfig()
+
+    classifier = estimator.DNNBoostedTreeCombinedClassifier(
+        dnn_hidden_units=[1],
+        dnn_feature_columns=[feature_column.real_valued_column("x")],
+        tree_learner_config=learner_config,
+        num_trees=1,
+        tree_examples_per_layer=3,
+        n_classes=2,
+        model_dir=model_dir,
+        config=config,
+        dnn_steps_to_train=10,
+        dnn_input_layer_to_tree=False,
+        tree_feature_columns=[feature_column.real_valued_column("x")])
+
+    classifier.fit(input_fn=_train_input_fn, steps=15)
+    classifier.evaluate(input_fn=_eval_input_fn, steps=1)
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/trainer_hooks.py b/tensorflow/contrib/boosted_trees/estimator_batch/trainer_hooks.py
index 79193fffc3..2e4151cac4 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/trainer_hooks.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/trainer_hooks.py
@@ -24,6 +24,7 @@ from tensorflow.contrib.learn.python.learn import session_run_hook
 from tensorflow.contrib.learn.python.learn.session_run_hook import SessionRunArgs
 from tensorflow.core.framework.summary_pb2 import Summary
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import training_util
 from tensorflow.python.training.summary_io import SummaryWriterCache
@@ -175,3 +176,40 @@ class StopAfterNTrees(session_run_hook.SessionRunHook):
       logging.info("Requesting stop since we have reached %d trees.",
                    num_finalized_trees)
       run_context.request_stop()
+
+
+class SwitchTrainOp(session_run_hook.SessionRunHook):
+  """Hook that switches the train op after specified number of steps.
+
+  Hook that replaces the train op depending on the number of steps of training
+  that have taken place. The first_train_op is used till train_steps steps
+  are reached. Thereafter the second_train_op is used.
+  """
+
+  def __init__(self, first_train_op, train_steps, second_train_op):
+    """Initializes a `SwitchTrainOp`."""
+    self._first_train_op = first_train_op
+    self._second_train_op = second_train_op
+    self._train_steps = train_steps
+
+  def _get_train_op_for_global_step(self, current_step):
+    """Gets train_op for current global step."""
+    if current_step < self._train_steps:
+      return self._first_train_op
+    return self._second_train_op
+
+  def begin(self):
+    self._global_step_tensor = training_util.get_global_step()
+    self._current_train_op = control_flow_ops.no_op()
+    if self._global_step_tensor is None:
+      raise RuntimeError(
+          "Global step should be created to use SwitchTrainOp.")
+
+  def before_run(self, run_context):  # pylint: disable=unused-argument
+    return session_run_hook.SessionRunArgs(
+        {"global_step": self._global_step_tensor,
+         "train_op": self._current_train_op})
+
+  def after_run(self, run_context, run_values):
+    self._current_train_op = self._get_train_op_for_global_step(
+        run_values.results["global_step"])
diff --git a/tensorflow/contrib/boosted_trees/examples/boston_combined.py b/tensorflow/contrib/boosted_trees/examples/boston_combined.py
new file mode 100644
index 0000000000..e04b56afbf
--- /dev/null
+++ b/tensorflow/contrib/boosted_trees/examples/boston_combined.py
@@ -0,0 +1,165 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Regression on Boston housing data using DNNBoostedTreeCombinedRegressor.
+
+  Example Usage:
+
+  python tensorflow/contrib/boosted_trees/examples/boston_combined.py \
+  --batch_size=404 --output_dir="/tmp/boston" \
+  --dnn_hidden_units="8,4" --dnn_steps_to_train=1000 \
+  --tree_depth=4 --tree_learning_rate=0.1 \
+  --num_trees=100 --tree_l2=0.001 --num_eval_steps=1 \
+  --vmodule=training_ops=1
+
+  When training is done, mean squared error on eval data is reported.
+  Point tensorboard to the directory for the run to see how the training
+  progresses:
+
+  tensorboard --logdir=/tmp/boston
+
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import sys
+import tensorflow as tf
+
+from tensorflow.contrib.boosted_trees.estimator_batch.dnn_tree_combined_estimator import DNNBoostedTreeCombinedRegressor
+from tensorflow.contrib.boosted_trees.proto import learner_pb2
+from tensorflow.contrib.layers.python.layers import feature_column
+from tensorflow.contrib.learn.python.learn import learn_runner
+from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
+from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils
+
+_BOSTON_NUM_FEATURES = 13
+
+
+def _get_estimator(output_dir, feature_cols):
+  """Configures DNNBoostedTreeCombinedRegressor based on flags."""
+  learner_config = learner_pb2.LearnerConfig()
+  learner_config.learning_rate_tuner.fixed.learning_rate = (
+      FLAGS.tree_learning_rate)
+  learner_config.regularization.l1 = 0.0
+  learner_config.regularization.l2 = FLAGS.tree_l2
+  learner_config.constraints.max_tree_depth = FLAGS.tree_depth
+
+  run_config = tf.contrib.learn.RunConfig(save_summary_steps=1)
+
+  # Create a DNNBoostedTreeCombinedRegressor estimator.
+  estimator = DNNBoostedTreeCombinedRegressor(
+      dnn_hidden_units=[int(x) for x in FLAGS.dnn_hidden_units.split(",")],
+      dnn_feature_columns=feature_cols,
+      tree_learner_config=learner_config,
+      num_trees=FLAGS.num_trees,
+      # This should be the number of examples. For large datasets it can be
+      # larger than the batch_size.
+      tree_examples_per_layer=FLAGS.batch_size,
+      model_dir=output_dir,
+      config=run_config,
+      dnn_input_layer_to_tree=True,
+      dnn_steps_to_train=FLAGS.dnn_steps_to_train)
+  return estimator
+
+
+def _make_experiment_fn(output_dir):
+  """Creates experiment for DNNBoostedTreeCombinedRegressor."""
+  (x_train, y_train), (x_test,
+                       y_test) = tf.keras.datasets.boston_housing.load_data()
+
+  train_input_fn = tf.estimator.inputs.numpy_input_fn(
+      x={"x": x_train},
+      y=y_train,
+      batch_size=FLAGS.batch_size,
+      num_epochs=None,
+      shuffle=True)
+  eval_input_fn = tf.estimator.inputs.numpy_input_fn(
+      x={"x": x_test}, y=y_test, num_epochs=1, shuffle=False)
+
+  feature_columns = [
+      feature_column.real_valued_column("x", dimension=_BOSTON_NUM_FEATURES)
+  ]
+  feature_spec = tf.contrib.layers.create_feature_spec_for_parsing(
+      feature_columns)
+  serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
+  export_strategies = [
+      saved_model_export_utils.make_export_strategy(serving_input_fn)]
+  return tf.contrib.learn.Experiment(
+      estimator=_get_estimator(output_dir, feature_columns),
+      train_input_fn=train_input_fn,
+      eval_input_fn=eval_input_fn,
+      train_steps=None,
+      eval_steps=FLAGS.num_eval_steps,
+      eval_metrics=None,
+      export_strategies=export_strategies)
+
+
+def main(unused_argv):
+  learn_runner.run(
+      experiment_fn=_make_experiment_fn,
+      output_dir=FLAGS.output_dir,
+      schedule="train_and_evaluate")
+
+
+if __name__ == "__main__":
+  tf.logging.set_verbosity(tf.logging.INFO)
+  parser = argparse.ArgumentParser()
+  # Define the list of flags that users can change.
+  parser.add_argument(
+      "--batch_size",
+      type=int,
+      default=1000,
+      help="The batch size for reading data.")
+  parser.add_argument(
+      "--output_dir",
+      type=str,
+      required=True,
+      help="Choose the dir for the output.")
+  parser.add_argument(
+      "--num_eval_steps",
+      type=int,
+      default=1,
+      help="The number of steps to run evaluation for.")
+  # Flags for configuring DNNBoostedTreeCombinedRegressor.
+  parser.add_argument(
+      "--dnn_hidden_units",
+      type=str,
+      default="8,4",
+      help="Hidden layers for DNN.")
+  parser.add_argument(
+      "--dnn_steps_to_train",
+      type=int,
+      default=1000,
+      help="Number of steps to train DNN.")
+  parser.add_argument(
+      "--tree_depth", type=int, default=4, help="Maximum depth of trees.")
+  parser.add_argument(
+      "--tree_l2", type=float, default=1.0, help="l2 regularization per batch.")
+  parser.add_argument(
+      "--tree_learning_rate",
+      type=float,
+      default=0.1,
+      help=("Learning rate (shrinkage weight) with which each "
+            "new tree is added."))
+  parser.add_argument(
+      "--num_trees",
+      type=int,
+      default=None,
+      required=True,
+      help="Number of trees to grow before stopping.")
+
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
-- 
GitLab


From 908343b5c0f957224ee44512d7ccfc81a8349e2f Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 15 Dec 2017 15:02:02 -0800
Subject: [PATCH 1095/1225] Bump the eigen dependency version. (#15405)

Fixes #12052
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 046c2b2391..846b9bc645 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -95,11 +95,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "eigen_archive",
       urls = [
-          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
-          "https://bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
+          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/c2947c341c68.tar.gz",
+          "https://bitbucket.org/eigen/eigen/get/c2947c341c68.tar.gz",
       ],
-      sha256 = "0840c497f2749b5e90bda666aab96be6da90dc75b4e21ca9843cae69b7fed52a",
-      strip_prefix = "eigen-eigen-b6e6d0cf6a77",
+      sha256 = "f21f8ab8a8dbcb91cd0deeade19a043f47708d0da7a4000164cdf203b4a71e34",
+      strip_prefix = "eigen-eigen-c2947c341c68",
       build_file = str(Label("//third_party:eigen.BUILD")),
   )
 
-- 
GitLab


From a81e83eea823ff1f3e6871eb24f85e7ca09dcf72 Mon Sep 17 00:00:00 2001
From: Chris Leary <leary@google.com>
Date: Fri, 15 Dec 2017 15:13:31 -0800
Subject: [PATCH 1096/1225] [XLA] Add a flag to control the HLO scheduling
 algorithm choice.

List scheduling is more easily rematerialized sometimes, this
gives the ability to force list scheduling via the API.

PiperOrigin-RevId: 179246142
---
 .../xla/service/hlo_rematerialization.cc      | 14 ++--
 .../xla/service/hlo_rematerialization.h       | 13 +++-
 .../xla/service/hlo_rematerialization_test.cc | 72 +++++++++----------
 .../compiler/xla/service/hlo_scheduling.cc    | 30 +++++---
 .../compiler/xla/service/hlo_scheduling.h     | 15 +++-
 5 files changed, 88 insertions(+), 56 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index 1747790e63..c6b4dc0368 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -1213,11 +1213,12 @@ StatusOr<bool> HloRematerialization::Run(
 
   XLA_VLOG_LINES(3, "Before HloRematerialization:\n" + module->ToString());
   // Create initial sequence of HLO instructions.
-  TF_ASSIGN_OR_RETURN(*sequence,
-                      CreateMemoryMinimizingSequence(
-                          *module, [this](const LogicalBuffer& buffer) {
-                            return size_function_(buffer.shape());
-                          }));
+  TF_ASSIGN_OR_RETURN(*sequence, CreateMemoryMinimizingSequence(
+                                     *module,
+                                     [this](const LogicalBuffer& buffer) {
+                                       return size_function_(buffer.shape());
+                                     },
+                                     scheduler_algorithm_));
   // Compute peak memory usage of all computations in the module called in a
   // sequential context.
   call_graph_ = CallGraph::Build(module);
@@ -1318,9 +1319,10 @@ StatusOr<bool> HloRematerialization::Run(
 /* static */ StatusOr<bool> HloRematerialization::RematerializeAndSchedule(
     const HloRematerialization::ShapeSizeFunction& size_function,
     int64 memory_limit_bytes, HloModule* hlo_module,
+    SchedulerAlgorithm scheduler_algorithm,
     SequentialHloOrdering::HloModuleSequence* sequence,
     RematerializationSizes* sizes) {
-  HloRematerialization remat(size_function);
+  HloRematerialization remat(scheduler_algorithm, size_function);
   return remat.Run(hlo_module, sequence, memory_limit_bytes, sizes);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h
index 11f79a6d41..5255343903 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.h
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h
@@ -20,6 +20,7 @@
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_scheduling.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 
 namespace xla {
@@ -65,12 +66,15 @@ class HloRematerialization {
   // code generation.
   static StatusOr<bool> RematerializeAndSchedule(
       const ShapeSizeFunction& size_function, int64 memory_limit_bytes,
-      HloModule* hlo_module, SequentialHloOrdering::HloModuleSequence* sequence,
+      HloModule* hlo_module, SchedulerAlgorithm scheduler_algorithm,
+      SequentialHloOrdering::HloModuleSequence* sequence,
       RematerializationSizes* sizes = nullptr);
 
  protected:
-  HloRematerialization(const ShapeSizeFunction& size_function)
-      : size_function_(size_function) {}
+  HloRematerialization(SchedulerAlgorithm scheduler_algorithm,
+                       const ShapeSizeFunction& size_function)
+      : scheduler_algorithm_(scheduler_algorithm),
+        size_function_(size_function) {}
   ~HloRematerialization() {}
 
   // Runs rematerialization on the given module. Returns whether the module was
@@ -103,6 +107,9 @@ class HloRematerialization {
   StatusOr<int64> CalledComputationsMemoryUsage(
       const HloInstruction* instruction) const;
 
+  // Selects an algorithm to use for HLO scheduling.
+  SchedulerAlgorithm scheduler_algorithm_;
+
   // Function which computes the size of the top-level buffer of a shape.
   const ShapeSizeFunction size_function_;
 
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
index c9b57166af..216825959a 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
@@ -158,11 +158,11 @@ TEST_F(HloRematerializationTest, SingleComputation) {
   SequentialHloOrdering::HloModuleSequence sequence;
   // Computation requires 16KB without rematerialization, but uses only 12KB
   // with rematerialization so pick a memory limit between these values (14KB).
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/14 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/14 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
 
   // Root should not have changed.
@@ -191,11 +191,11 @@ TEST_F(HloRematerializationTest, SingleComputationNoRematerialization) {
   EXPECT_EQ(computation->instruction_count(), 7);
 
   SequentialHloOrdering::HloModuleSequence sequence;
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/20 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/20 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
 
   // No instructions should have been materialized.
   EXPECT_FALSE(changed);
@@ -232,11 +232,11 @@ TEST_F(HloRematerializationTest, RematerializeAroundWhile) {
   // while so the peak memory use of the module is 18KB. Set the memory limit a
   // bit lower (17KB) to force rematerialization of the entry computation.
   SequentialHloOrdering::HloModuleSequence sequence;
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/17 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/17 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
 
   // Only the entry computation should have a rematerialized instruction added.
@@ -268,11 +268,11 @@ TEST_F(HloRematerializationTest, RematerializeEntryAndWhileBody) {
   EXPECT_EQ(body_computation->instruction_count(), 7);
 
   SequentialHloOrdering::HloModuleSequence sequence;
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/15 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/15 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
 
   // Both computations should have a rematerialized instruction added.
@@ -310,11 +310,11 @@ TEST_F(HloRematerializationTest, RematerializeNestedComputations) {
   // If all computations are maximally rematerialized then peak memory usage is
   // ~12K so pick something slightly larger.
   SequentialHloOrdering::HloModuleSequence sequence;
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/13 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/13 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
 
   // All computations should have a rematerialized instruction added.
@@ -385,7 +385,7 @@ TEST_F(HloRematerializationTest, RngNotRematerialized) {
       bool changed, HloRematerialization::RematerializeAndSchedule(
                         ByteSizeOf,
                         /*memory_limit_bytes=*/4 * ByteSizeOf(vec1024_shape_),
-                        module.get(), &sequence));
+                        module.get(), SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
   // The rng should not have been rematerialized.
   EXPECT_EQ(count_rngs(entry_computation), 1);
@@ -476,11 +476,11 @@ TEST_F(HloRematerializationTest, InstructionRematerializedMultipleTimes) {
   // Pick a memory limit some where between 24KB (initial peak memory including
   // parameter and output) and 20KB (peak memory possible with
   // rematerialization).
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/22 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/22 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
 
   // The broadcast should have been rematerialized 3 times.
@@ -573,11 +573,11 @@ TEST_P(IndirectUseTest, IndirectUseNotRematerialized) {
   // Pick a memory limit some where between 24KB (initial peak memory including
   // parameter and output) and 20KB (peak memory possible with
   // rematerialization).
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/22 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/22 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   // Rematerialization should only occur if the rematerializable instruction has
   // no indirect uses.
   if (indirectly_used) {
diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc
index 0dc17392f1..2594c29efd 100644
--- a/tensorflow/compiler/xla/service/hlo_scheduling.cc
+++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc
@@ -369,7 +369,17 @@ StatusOr<int64> MinimumMemoryForComputation(
 StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
-    const LogicalBuffer::SizeFunction& size_function) {
+    const LogicalBuffer::SizeFunction& size_function,
+    SchedulerAlgorithm algorithm) {
+  VLOG(2) << "Computation: " << computation.name();
+  if (algorithm == SchedulerAlgorithm::kListSchedule) {
+    return ListScheduler::Run(computation, points_to_analysis, size_function);
+  }
+  if (algorithm == SchedulerAlgorithm::kDfsSchedule) {
+    return RunDFSMemoryScheduler(computation, points_to_analysis,
+                                 size_function);
+  }
+
   // We try both a list-scheduler based ordering and a DFS based ordering, and
   // choose whichever returns a lower min-memory, not accounting for
   // fragmentation.
@@ -377,7 +387,6 @@ StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
   // Note that this is just a heuristic. One obvious inaccuracy is that the
   // memory required for sub-computations might be different when considered
   // within the caller's context. But it's good enough for now.
-  VLOG(2) << "Computation: " << computation.name();
   TF_ASSIGN_OR_RETURN(
       std::vector<const HloInstruction*> list_sequence,
       ListScheduler::Run(computation, points_to_analysis, size_function));
@@ -410,27 +419,30 @@ StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
 }  // namespace
 
 StatusOr<SequentialHloOrdering::HloModuleSequence>
-CreateMemoryMinimizingSequence(
-    const HloModule& module, const LogicalBuffer::SizeFunction& size_function) {
+CreateMemoryMinimizingSequence(const HloModule& module,
+                               const LogicalBuffer::SizeFunction& size_function,
+                               SchedulerAlgorithm algorithm) {
   SequentialHloOrdering::HloModuleSequence sequence;
   TF_ASSIGN_OR_RETURN(std::unique_ptr<TuplePointsToAnalysis> points_to_analysis,
                       TuplePointsToAnalysis::Run(&module));
   for (const auto* computation : module.MakeNonfusionComputations()) {
-    TF_ASSIGN_OR_RETURN(sequence[computation],
-                        CreateMemoryMinimizingSequence(
-                            *computation, *points_to_analysis, size_function));
+    TF_ASSIGN_OR_RETURN(
+        sequence[computation],
+        CreateMemoryMinimizingSequence(*computation, *points_to_analysis,
+                                       size_function, algorithm));
   }
   return sequence;
 }
 
 StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
     const HloComputation& computation,
-    const LogicalBuffer::SizeFunction& size_function) {
+    const LogicalBuffer::SizeFunction& size_function,
+    SchedulerAlgorithm algorithm) {
   CHECK(!computation.IsFusionComputation());
   TF_ASSIGN_OR_RETURN(std::unique_ptr<TuplePointsToAnalysis> points_to_analysis,
                       TuplePointsToAnalysis::Run(computation.parent()));
   return CreateMemoryMinimizingSequence(computation, *points_to_analysis,
-                                        size_function);
+                                        size_function, algorithm);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.h b/tensorflow/compiler/xla/service/hlo_scheduling.h
index ec92a56b96..1d1eb1e064 100644
--- a/tensorflow/compiler/xla/service/hlo_scheduling.h
+++ b/tensorflow/compiler/xla/service/hlo_scheduling.h
@@ -33,17 +33,28 @@ StatusOr<int64> MinimumMemoryForSequence(
     const SequentialHloOrdering::HloModuleSequence& module_sequence,
     const LogicalBuffer::SizeFunction& size_function);
 
+enum class SchedulerAlgorithm {
+  kListSchedule,
+  kDfsSchedule,
+
+  // Selects the available scheduler algorithm that had the minimum memory in
+  // the resulting sequence (a la MinimumMemoryForSequence).
+  kAuto,
+};
+
 // Returns an HloModuleSequence which seeks to minimize the memory required for
 // the computation. size_function is the function returning the number of bytes
 // required for a LogicalBuffer.
 StatusOr<SequentialHloOrdering::HloModuleSequence>
 CreateMemoryMinimizingSequence(
-    const HloModule& module, const LogicalBuffer::SizeFunction& size_function);
+    const HloModule& module, const LogicalBuffer::SizeFunction& size_function,
+    SchedulerAlgorithm algorithm = SchedulerAlgorithm::kAuto);
 
 // Overload of above that computes the sequence for a single computation.
 StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
     const HloComputation& computation,
-    const LogicalBuffer::SizeFunction& size_function);
+    const LogicalBuffer::SizeFunction& size_function,
+    SchedulerAlgorithm algorithm = SchedulerAlgorithm::kAuto);
 
 }  // namespace xla
 
-- 
GitLab


From 117f458ac520fd76efe57b2bc4078ed8ca6b75fc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 15:22:11 -0800
Subject: [PATCH 1097/1225] Makes inception_v3 in tensorflow/tensorflow using
 the same initializer as tensorflow/models.

Experiments show that variance_scaling_initializer, which is used by https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_v3.py#L579 converges faster than truncated_normal_initializer, which is used by https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/inception_v3.py

PiperOrigin-RevId: 179247166
---
 tensorflow/contrib/slim/python/slim/nets/inception_v3.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/slim/python/slim/nets/inception_v3.py b/tensorflow/contrib/slim/python/slim/nets/inception_v3.py
index e3c0c036d9..432e1f79f1 100644
--- a/tensorflow/contrib/slim/python/slim/nets/inception_v3.py
+++ b/tensorflow/contrib/slim/python/slim/nets/inception_v3.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework.python.ops import arg_scope
+from tensorflow.contrib.layers.python.layers import initializers
 from tensorflow.contrib.layers.python.layers import layers as layers_lib
 from tensorflow.contrib.layers.python.layers import regularizers
 from tensorflow.python.framework import ops
@@ -675,14 +676,12 @@ def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
 
 
 def inception_v3_arg_scope(weight_decay=0.00004,
-                           stddev=0.1,
                            batch_norm_var_collection='moving_vars',
                            use_fused_batchnorm=True):
   """Defines the default InceptionV3 arg scope.
 
   Args:
     weight_decay: The weight decay to use for regularizing the model.
-    stddev: The standard deviation of the trunctated normal weight initializer.
     batch_norm_var_collection: The name of the collection for the batch norm
       variables.
     use_fused_batchnorm: Enable fused batchnorm.
@@ -714,8 +713,7 @@ def inception_v3_arg_scope(weight_decay=0.00004,
       weights_regularizer=regularizers.l2_regularizer(weight_decay)):
     with arg_scope(
         [layers.conv2d],
-        weights_initializer=init_ops.truncated_normal_initializer(
-            stddev=stddev),
+        weights_initializer=initializers.variance_scaling_initializer(),
         activation_fn=nn_ops.relu,
         normalizer_fn=layers_lib.batch_norm,
         normalizer_params=batch_norm_params) as sc:
-- 
GitLab


From b831830334bc7a57cd3052f5e1ce39cb9e16f363 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 17 Aug 2017 19:30:49 +0000
Subject: [PATCH 1098/1225] Add compression support to RecordInput

This fix tries to fix the request raised in 12344 so that
it is possible to process RecordInput with compressions.

An attr of `compression_type` has been added.

Additional tests have been created to cover the changes.

This fix fixes 12344.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/record_input_op.cc |  3 +++
 tensorflow/core/kernels/record_yielder.cc  |  5 ++++-
 tensorflow/core/kernels/record_yielder.h   |  2 ++
 tensorflow/core/ops/data_flow_ops.cc       |  3 +++
 tensorflow/python/ops/data_flow_ops.py     | 12 +++++++++++-
 5 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/record_input_op.cc b/tensorflow/core/kernels/record_input_op.cc
index 878996c9d6..8a4bf4745c 100644
--- a/tensorflow/core/kernels/record_input_op.cc
+++ b/tensorflow/core/kernels/record_input_op.cc
@@ -37,6 +37,8 @@ class RecordInputOp : public OpKernel {
     GETATTR(int64, file_parallelism);
     GETATTR(int64, batch_size);
 #undef GETATTR
+    string compression_type;
+    ctx->GetAttr("compression_type", &compression_type);
 
     RecordYielder::Options yopts;
     yopts.file_pattern = file_pattern;
@@ -44,6 +46,7 @@ class RecordInputOp : public OpKernel {
     yopts.bufsize = file_buffer_size;
     yopts.file_shuffle_shift_ratio = file_shuffle_shift_ratio;
     yopts.parallelism = file_parallelism;
+    yopts.compression_type = compression_type;
     yielder_ = std::unique_ptr<RecordYielder>(new RecordYielder(ctx, yopts));
 
     batch_size_ = batch_size;
diff --git a/tensorflow/core/kernels/record_yielder.cc b/tensorflow/core/kernels/record_yielder.cc
index e4fa0ed322..3fd9bf9def 100644
--- a/tensorflow/core/kernels/record_yielder.cc
+++ b/tensorflow/core/kernels/record_yielder.cc
@@ -206,7 +206,10 @@ void RecordYielder::ShardLoop(Shard* shard) {
       shard->status = errors::InvalidArgument("Can't open ", filename);
       break;
     }
-    io::RecordReader rdr(file.get());
+    io::RecordReaderOptions options =
+        io::RecordReaderOptions::CreateRecordReaderOptions(
+            opts_.compression_type);
+    io::RecordReader rdr(file.get(), options);
     uint64 offset = 0;
     string record;
     while (true) {
diff --git a/tensorflow/core/kernels/record_yielder.h b/tensorflow/core/kernels/record_yielder.h
index c630181221..34817ad51b 100644
--- a/tensorflow/core/kernels/record_yielder.h
+++ b/tensorflow/core/kernels/record_yielder.h
@@ -78,6 +78,8 @@ class RecordYielder {
     // Uses these many concurrent tfrecord iterators to iterate through
     // tfrecords.
     int32 parallelism = 1;
+
+    string compression_type;
   };
 
   explicit RecordYielder(OpKernelConstruction* context,
diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc
index ac2dc601f1..b3d7653359 100644
--- a/tensorflow/core/ops/data_flow_ops.cc
+++ b/tensorflow/core/ops/data_flow_ops.cc
@@ -2497,6 +2497,7 @@ REGISTER_OP("RecordInput")
     .Attr("file_buffer_size: int = 10000")
     .Attr("file_parallelism: int = 16")
     .Attr("batch_size: int = 32")
+    .Attr("compression_type: string = ''")
     .SetIsStateful()
     .SetShapeFn(shape_inference::UnknownShape)
     .Doc(R"doc(
@@ -2510,6 +2511,8 @@ file_shuffle_shift_ratio: Shifts the list of files after the list is randomly
 file_buffer_size: The randomization shuffling buffer.
 file_parallelism: How many sstables are opened and concurrently iterated over.
 batch_size: The batch size.
+compression_type: The type of compression for the file. Currently ZLIB and
+    GZIP are supported. Defaults to none.
 )doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index c186eb5b7e..f441f6d4bf 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
+from tensorflow.python.lib.io import python_io
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_data_flow_ops
@@ -2225,7 +2226,8 @@ class RecordInput(object):
                shift_ratio=0,
                seed=0,
                name=None,
-               batches=None):
+               batches=None,
+               compression_type=None):
     """Constructs a RecordInput Op.
 
     Args:
@@ -2243,6 +2245,8 @@ class RecordInput(object):
         how many batches to create, which are returned as a list when
         `get_yield_op()` is called. An example use case is to split processing
         between devices on one computer.
+      compression_type: The type of compression for the file. Currently ZLIB and
+        GZIP are supported. Defaults to none.
 
     Raises:
       ValueError: If one of the arguments is invalid.
@@ -2257,12 +2261,17 @@ class RecordInput(object):
     self._shift_ratio = shift_ratio
     self._seed = seed
     self._name = name
+    self._compression_type = python_io.TFRecordCompressionType.NONE
+    if compression_type is not None:
+      self._compression_type = compression_type
 
   def get_yield_op(self):
     """Adds a node that yields a group of records every time it is executed.
     If RecordInput `batches` parameter is not None, it yields a list of
     record batches with the specified `batch_size`.
     """
+    compression_type = python_io.TFRecordOptions.get_compression_type_string(
+        python_io.TFRecordOptions(self._compression_type))
     records = gen_data_flow_ops.record_input(
         file_pattern=self._file_pattern,
         file_buffer_size=self._buffer_size,
@@ -2270,6 +2279,7 @@ class RecordInput(object):
         file_shuffle_shift_ratio=self._shift_ratio,
         batch_size=self._batch_size,
         file_random_seed=self._seed,
+        compression_type=compression_type,
         name=self._name)
     if self._batches is None:
       return records
-- 
GitLab


From fe54ab55eee43814b033d0efa6334e81cced59e6 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 17 Aug 2017 19:31:11 +0000
Subject: [PATCH 1099/1225] Add test cases of compression_type for RecordInput.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../python/kernel_tests/record_input_test.py  | 39 +++++++++++++++++--
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/kernel_tests/record_input_test.py b/tensorflow/python/kernel_tests/record_input_test.py
index 1ec48ac361..0945ed24bf 100644
--- a/tensorflow/python/kernel_tests/record_input_test.py
+++ b/tensorflow/python/kernel_tests/record_input_test.py
@@ -26,13 +26,14 @@ from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
-
 class RecordInputOpTest(test.TestCase):
 
-  def generateTestData(self, prefix, n, m):
+  def generateTestData(self, prefix, n, m,
+      compression_type=tf_record.TFRecordCompressionType.NONE):
+    options = tf_record.TFRecordOptions(compression_type)
     for i in range(n):
       f = os.path.join(self.get_temp_dir(), prefix + "." + str(i))
-      w = tf_record.TFRecordWriter(f)
+      w = tf_record.TFRecordWriter(f, options=options)
 
       for j in range(m):
         w.write("{0:0{width}}".format(i * m + j, width=10).encode("utf-8"))
@@ -52,6 +53,38 @@ class RecordInputOpTest(test.TestCase):
 
       self.assertEqual(sess.run(yield_op), b"0000000000")
 
+  def testRecordInputSimpleGzip(self):
+    with self.test_session() as sess:
+      self.generateTestData("basic", 1, 1,
+          compression_type=tf_record.TFRecordCompressionType.GZIP)
+
+      yield_op = data_flow_ops.RecordInput(
+          file_pattern=os.path.join(self.get_temp_dir(), "basic.*"),
+          parallelism=1,
+          buffer_size=1,
+          batch_size=1,
+          name="record_input",
+          compression_type=
+              tf_record.TFRecordCompressionType.GZIP).get_yield_op()
+
+      self.assertEqual(sess.run(yield_op), b"0000000000")
+
+  def testRecordInputSimpleZlib(self):
+    with self.test_session() as sess:
+      self.generateTestData("basic", 1, 1,
+          compression_type=tf_record.TFRecordCompressionType.ZLIB)
+
+      yield_op = data_flow_ops.RecordInput(
+          file_pattern=os.path.join(self.get_temp_dir(), "basic.*"),
+          parallelism=1,
+          buffer_size=1,
+          batch_size=1,
+          name="record_input",
+          compression_type=
+              tf_record.TFRecordCompressionType.ZLIB).get_yield_op()
+
+      self.assertEqual(sess.run(yield_op), b"0000000000")
+
   def testRecordInputEpochs(self):
     files = 100
     records_per_file = 100
-- 
GitLab


From b87336318894f3dee1f83f17dcbcc33ed56e1d81 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Fri, 15 Dec 2017 15:44:04 -0800
Subject: [PATCH 1100/1225] Fix duplicated trisycl_include_dir

A recent botched three-way-merge sync seems to have duplicated (more than once on Github) the set_trisycl_include_dir function. This change removes the duplicates in favor of a cleaned-up variant that I added in another recent change.

PiperOrigin-RevId: 179249451
---
 configure.py | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/configure.py b/configure.py
index 589f6c9501..3d553e1c14 100644
--- a/configure.py
+++ b/configure.py
@@ -1097,28 +1097,6 @@ def set_computecpp_toolkit_path(environ_cp):
                               computecpp_toolkit_path)
 
 
-def set_trisycl_include_dir(environ_cp):
-  """Set TRISYCL_INCLUDE_DIR."""
-  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
-                             'include directory. (Use --config=sycl_trisycl '
-                             'when building with Bazel) '
-                             '[Default is %s]: ') % _DEFAULT_TRISYCL_INCLUDE_DIR
-  while True:
-    trisycl_include_dir = get_from_env_or_user_or_default(
-        environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
-        _DEFAULT_TRISYCL_INCLUDE_DIR)
-    if os.path.exists(trisycl_include_dir):
-      break
-
-    print('Invalid triSYCL include directory, %s cannot be found'
-          % (trisycl_include_dir))
-
-  # Set TRISYCL_INCLUDE_DIR
-  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
-  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
-                              trisycl_include_dir)
-
-
 def set_trisycl_include_dir(environ_cp):
   """Set TRISYCL_INCLUDE_DIR."""
 
-- 
GitLab


From f122ae338f958fa643ba05e3ff9660959bdde3a0 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Fri, 15 Dec 2017 16:01:42 -0800
Subject: [PATCH 1101/1225] Support merge and switch nodes.

PiperOrigin-RevId: 179251262
---
 .../grappler/optimizers/layout_optimizer.cc   |  51 +++++++
 .../optimizers/layout_optimizer_test.cc       | 124 +++++++++++++-----
 2 files changed, 145 insertions(+), 30 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index a61080253d..cb848dfce5 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -100,6 +100,7 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "Lgamma",
                                           "Log",
                                           "Log1p",
+                                          "Merge",
                                           "Mul",
                                           "Neg",
                                           "Pad",
@@ -119,6 +120,9 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "Sinh",
                                           "Slice",
                                           "Split",
+                                          "Switch",
+                                          "RefMerge",
+                                          "RefSwitch",
                                           "Round",
                                           "Rsqrt",
                                           "RsqrtGrad",
@@ -1161,6 +1165,40 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   int axis_node_pos_;
 };
 
+class MergeProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit MergeProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
+
+ protected:
+  bool ShouldProcess() const override {
+    return !MustPreserve() && IsPortZeroDimsFour(*node_) && HasOutputs() &&
+           IsEveryInputAfterNCHWToNHWC() && IsOnGPU();
+  }
+
+  std::vector<int> GetInputPos() const override {
+    std::vector<int> input_pos;
+    input_pos.reserve(node_->input_size());
+    for (int i = 0; i < node_->input_size(); i++) {
+      input_pos.push_back(i);
+    }
+    return input_pos;
+  }
+
+ private:
+  bool IsEveryInputAfterNCHWToNHWC() const {
+    for (const auto& input : node_->input()) {
+      auto input_node = node_map_->GetNode(input);
+      if (IsNodeAfterNCHWToNHWC(*input_node) ||
+          IsNodeNCHWToNHWC(input_node->name())) {
+        continue;
+      }
+      return false;
+    }
+    return true;
+  }
+};
+
 class PadProcessor : public AgnosticNodeProcessor {
  public:
   explicit PadProcessor(const OptimizeContext& opt_cxt)
@@ -1385,6 +1423,15 @@ class SumProcessor : public AgnosticNodeProcessor {
   }
 };
 
+class SwitchProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit SwitchProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
+
+ protected:
+  std::set<int> GetOutputPos() const override { return {0, 1}; }
+};
+
 class DataLayoutOptimizer : GraphProcessor {
  public:
   explicit DataLayoutOptimizer(
@@ -1486,6 +1533,8 @@ class DataLayoutOptimizer : GraphProcessor {
             node_processor.reset(new BinaryOpProcessor(opt_cxt));
           } else if (IsConcat(*node)) {
             node_processor.reset(new ConcatProcessor(opt_cxt));
+          } else if (IsMerge(*node)) {
+            node_processor.reset(new MergeProcessor(opt_cxt));
           } else if (IsPad(*node)) {
             node_processor.reset(new PadProcessor(opt_cxt));
           } else if (IsReluGrad(*node)) {
@@ -1500,6 +1549,8 @@ class DataLayoutOptimizer : GraphProcessor {
             node_processor.reset(new SqueezeProcessor(opt_cxt));
           } else if (IsSum(*node)) {
             node_processor.reset(new SumProcessor(opt_cxt));
+          } else if (IsSwitch(*node)) {
+            node_processor.reset(new SwitchProcessor(opt_cxt));
           } else {
             node_processor.reset(new AgnosticNodeProcessor(opt_cxt));
           }
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 9a49319821..98109f724e 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -263,7 +263,7 @@ TEST_F(LayoutOptimizerTest, EqualSizeWithSamePadding) {
 
 TEST_F(LayoutOptimizerTest, NotEqualSizeWithValidPadding) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -276,7 +276,7 @@ TEST_F(LayoutOptimizerTest, NotEqualSizeWithValidPadding) {
 
 TEST_F(LayoutOptimizerTest, Pad) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto c = ops::Const(s.WithOpName("c"), {1, 2, 3, 4, 5, 6, 7, 8}, {4, 2});
   auto p = ops::Pad(s.WithOpName("p"), conv, c);
   auto o = ops::Identity(s.WithOpName("o"), p);
@@ -303,7 +303,7 @@ TEST_F(LayoutOptimizerTest, Pad) {
 
 TEST_F(LayoutOptimizerTest, Connectivity) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto i1 = ops::Identity(s.WithOpName("i1"), conv);
   auto i2 = ops::Identity(s.WithOpName("i2"), i1);
   auto i3 = ops::Identity(s.WithOpName("i3"), i2);
@@ -333,7 +333,7 @@ TEST_F(LayoutOptimizerTest, Connectivity) {
 
 TEST_F(LayoutOptimizerTest, ConnectivityBinaryOpWithInputScalarAnd4D) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto i1 = ops::Identity(s.WithOpName("i1"), conv);
   auto i2 = ops::Identity(s.WithOpName("i2"), i1);
   auto scalar_sub = ops::Const(s.WithOpName("scalar_sub"), 3.0f, {});
@@ -366,7 +366,7 @@ TEST_F(LayoutOptimizerTest, ConnectivityBinaryOpWithInputScalarAnd4D) {
 
 TEST_F(LayoutOptimizerTest, PreserveFetch) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto i = ops::Identity(s.WithOpName("i"), conv);
   GrapplerItem item;
   item.fetch.push_back("Conv2D");
@@ -381,7 +381,7 @@ TEST_F(LayoutOptimizerTest, PreserveFetch) {
 
 TEST_F(LayoutOptimizerTest, EmptyDevice) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -396,7 +396,7 @@ TEST_F(LayoutOptimizerTest, EmptyDevice) {
 TEST_F(LayoutOptimizerTest, GPUDevice) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv =
-      SimpleConv2D(&s, 3, 2, "VALID", "/job:w/replica:0/task:0/device:gpu:0");
+      SimpleConv2D(&s, 4, 2, "VALID", "/job:w/replica:0/task:0/device:gpu:0");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -411,7 +411,7 @@ TEST_F(LayoutOptimizerTest, GPUDevice) {
 TEST_F(LayoutOptimizerTest, CPUDeviceLowercase) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv =
-      SimpleConv2D(&s, 3, 2, "VALID", "/job:w/replica:0/task:0/device:cpu:0");
+      SimpleConv2D(&s, 4, 2, "VALID", "/job:w/replica:0/task:0/device:cpu:0");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -425,7 +425,7 @@ TEST_F(LayoutOptimizerTest, CPUDeviceLowercase) {
 
 TEST_F(LayoutOptimizerTest, CPUDeviceUppercase) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID", "/CPU:0");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID", "/CPU:0");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -467,7 +467,7 @@ TEST_F(LayoutOptimizerTest, FusedBatchNormGradTrainingFalse) {
 
 TEST_F(LayoutOptimizerTest, SplitDimC) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
   auto c = ops::Const(s.WithOpName("c"), 3, {});
   auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
   auto i = ops::Identity(s.WithOpName("i"), split[0]);
@@ -487,7 +487,7 @@ TEST_F(LayoutOptimizerTest, SplitDimC) {
 
 TEST_F(LayoutOptimizerTest, SplitDimH) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 6, 2, "SAME");
   auto c = ops::Const(s.WithOpName("c"), 1, {});
   auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
   auto i = ops::Identity(s.WithOpName("i"), split[0]);
@@ -507,7 +507,7 @@ TEST_F(LayoutOptimizerTest, SplitDimH) {
 
 TEST_F(LayoutOptimizerTest, SplitDimW) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
   auto c = ops::Const(s.WithOpName("c"), 2, {});
   auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
   auto i = ops::Identity(s.WithOpName("i"), split[0]);
@@ -527,7 +527,7 @@ TEST_F(LayoutOptimizerTest, SplitDimW) {
 
 TEST_F(LayoutOptimizerTest, SplitDimN) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
   auto c = ops::Const(s.WithOpName("c"), 0, {});
   auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
   auto i = ops::Identity(s.WithOpName("i"), split[0]);
@@ -547,7 +547,7 @@ TEST_F(LayoutOptimizerTest, SplitDimN) {
 
 TEST_F(LayoutOptimizerTest, SplitNonConstDim) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
   auto c = ops::Const(s.WithOpName("c"), 0, {});
   auto i1 = ops::Identity(s.WithOpName("i1"), c);
   auto split = ops::Split(s.WithOpName("split"), i1, conv, 2);
@@ -570,7 +570,7 @@ TEST_F(LayoutOptimizerTest, SplitNonConstDim) {
 
 TEST_F(LayoutOptimizerTest, SplitSamePortToMultipleInputsOfSameNode) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
   auto axis = ops::Const(s.WithOpName("axis"), 3);
   auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
   auto concat =
@@ -593,7 +593,7 @@ TEST_F(LayoutOptimizerTest, SplitSamePortToMultipleInputsOfSameNode) {
 
 TEST_F(LayoutOptimizerTest, ConcatDimH) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "SAME");
   auto axis = ops::Const(s.WithOpName("axis"), 1);
   auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
   auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
@@ -614,7 +614,7 @@ TEST_F(LayoutOptimizerTest, ConcatDimH) {
 
 TEST_F(LayoutOptimizerTest, ConcatNonConst) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "SAME");
   auto axis = ops::Const(s.WithOpName("axis"), 1);
   auto i = ops::Identity(s.WithOpName("i"), axis);
   auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
@@ -639,7 +639,7 @@ TEST_F(LayoutOptimizerTest, ConcatNonConst) {
 
 TEST_F(LayoutOptimizerTest, ConcatDimW) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "SAME");
   auto axis = ops::Const(s.WithOpName("axis"), 2);
   auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
   auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
@@ -660,7 +660,7 @@ TEST_F(LayoutOptimizerTest, ConcatDimW) {
 
 TEST_F(LayoutOptimizerTest, ConcatDimN) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto axis = ops::Const(s.WithOpName("axis"), 0);
   auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
   auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
@@ -681,7 +681,7 @@ TEST_F(LayoutOptimizerTest, ConcatDimN) {
 
 TEST_F(LayoutOptimizerTest, ConcatDimC) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto axis = ops::Const(s.WithOpName("axis"), 3);
   auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
   auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
@@ -702,7 +702,7 @@ TEST_F(LayoutOptimizerTest, ConcatDimC) {
 
 TEST_F(LayoutOptimizerTest, Sum) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto reduction_indices =
       ops::Const(s.WithOpName("reduction_indices"), {0, 1, 2}, {3});
   auto sum = ops::Sum(s.WithOpName("sum"), conv, reduction_indices);
@@ -731,7 +731,7 @@ TEST_F(LayoutOptimizerTest, Sum) {
 
 TEST_F(LayoutOptimizerTest, MulScalarAnd4D) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {});
   auto mul = ops::Mul(s.WithOpName("mul"), scalar, conv);
   auto o = ops::Identity(s.WithOpName("o"), mul);
@@ -748,7 +748,7 @@ TEST_F(LayoutOptimizerTest, MulScalarAnd4D) {
 
 TEST_F(LayoutOptimizerTest, Mul4DAndScalar) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {});
   auto mul = ops::Mul(s.WithOpName("mul"), conv, scalar);
   auto o = ops::Identity(s.WithOpName("o"), mul);
@@ -765,7 +765,7 @@ TEST_F(LayoutOptimizerTest, Mul4DAndScalar) {
 
 TEST_F(LayoutOptimizerTest, Mul4DAndUnknownRank) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto unknown_rank =
       ops::Placeholder(s.WithOpName("unknown"), DT_FLOAT,
                        ops::Placeholder::Shape(PartialTensorShape()));
@@ -788,7 +788,7 @@ TEST_F(LayoutOptimizerTest, Mul4DAndUnknownRank) {
 
 TEST_F(LayoutOptimizerTest, Mul4DAnd4D) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto i = ops::Identity(s.WithOpName("i"), conv);
   auto mul = ops::Mul(s.WithOpName("mul"), conv, i);
   auto o = ops::Identity(s.WithOpName("o"), mul);
@@ -805,7 +805,7 @@ TEST_F(LayoutOptimizerTest, Mul4DAnd4D) {
 
 TEST_F(LayoutOptimizerTest, Mul4DAndVector) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2});
   auto mul = ops::Mul(s.WithOpName("mul"), conv, vector);
   auto o = ops::Identity(s.WithOpName("o"), mul);
@@ -829,7 +829,7 @@ TEST_F(LayoutOptimizerTest, Mul4DAndVector) {
 
 TEST_F(LayoutOptimizerTest, MulVectorAnd4D) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2});
   auto mul = ops::Mul(s.WithOpName("mul"), vector, conv);
   auto o = ops::Identity(s.WithOpName("o"), mul);
@@ -931,7 +931,7 @@ TEST_F(LayoutOptimizerTest, DoNotApplyOptimizerTwice) {
 
 TEST_F(LayoutOptimizerTest, ShapeNWithInputs4DAnd4D) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto shapen = ops::ShapeN(s.WithOpName("shapen"), {conv, conv});
   auto add = ops::Add(s.WithOpName("add"), shapen[0], shapen[1]);
   GrapplerItem item;
@@ -960,7 +960,7 @@ TEST_F(LayoutOptimizerTest, ShapeNWithInputs4DAnd4D) {
 
 TEST_F(LayoutOptimizerTest, ShapeNWithInputsVectorAnd4D) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto vector = ops::Const(s.WithOpName("vector"), 3.0f, {7});
   auto shapen = ops::ShapeN(s.WithOpName("shapen"), {vector, conv});
   auto add = ops::Add(s.WithOpName("add"), shapen[0], shapen[1]);
@@ -985,7 +985,7 @@ TEST_F(LayoutOptimizerTest, ShapeNWithInputsVectorAnd4D) {
 
 TEST_F(LayoutOptimizerTest, ShapeNWithInputs4DAndNoNeedToTransform4D) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto tensor_4d = ops::Const(s.WithOpName("tensor_4d"), 3.0f, {1, 1, 1, 3});
   auto i1 = ops::Identity(s.WithOpName("i1"), tensor_4d);
   Output i2 = ops::Identity(s.WithOpName("i2"), i1);
@@ -1002,6 +1002,70 @@ TEST_F(LayoutOptimizerTest, ShapeNWithInputs4DAndNoNeedToTransform4D) {
   EXPECT_EQ(shapen_node->input(1), "i2");
 }
 
+TEST_F(LayoutOptimizerTest, Switch) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  ops::Variable ctrl(s.WithOpName("ctrl"), {}, DT_BOOL);
+  auto sw = ops::Switch(s.WithOpName("switch"), conv, ctrl);
+  auto i1 = ops::Identity(s.WithOpName("i1"), sw.output_true);
+  auto i2 = ops::Identity(s.WithOpName("i2"), sw.output_false);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto switch_node = node_map.GetNode("switch");
+  EXPECT_EQ(switch_node->input(0), "Conv2D");
+  EXPECT_EQ(switch_node->input(1), "ctrl");
+  auto i1_node = node_map.GetNode("i1");
+  auto i2_node = node_map.GetNode("i2");
+  auto trans1 = node_map.GetNode(i1_node->input(0));
+  EXPECT_EQ(trans1->input(0), "switch:1");
+  auto trans2 = node_map.GetNode(i2_node->input(0));
+  EXPECT_EQ(trans2->input(0), "switch");
+}
+
+TEST_F(LayoutOptimizerTest, MergeBothInputsConvertible) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  Output i1 = ops::Identity(s.WithOpName("i1"), conv);
+  auto merge = ops::Merge(s.WithOpName("merge"), {conv, i1});
+  auto i2 = ops::Identity(s.WithOpName("i2"), merge.output);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto merge_node = node_map.GetNode("merge");
+  EXPECT_EQ(merge_node->input(0), "Conv2D");
+  EXPECT_EQ(merge_node->input(1), "i1");
+  auto i2_node = node_map.GetNode("i2");
+  EXPECT_EQ(i2_node->input(0), "LayoutOptimizerTransposeNCHWToNHWC-merge-0-0");
+  auto transpose =
+      node_map.GetNode("LayoutOptimizerTransposeNCHWToNHWC-merge-0-0");
+  EXPECT_EQ(transpose->input(0), "merge");
+}
+
+TEST_F(LayoutOptimizerTest, MergeOneInputNotConvertible) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto tensor_4d = ops::Const(s.WithOpName("tensor_4d"), 3.0f, {1, 1, 1, 3});
+  auto merge = ops::Merge(s.WithOpName("merge"), {tensor_4d, conv});
+  auto i2 = ops::Identity(s.WithOpName("i2"), merge.output);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto merge_node = node_map.GetNode("merge");
+  EXPECT_EQ(merge_node->input(0), "tensor_4d");
+  EXPECT_EQ(merge_node->input(1),
+            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-0-1");
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 0b80606c4d635250810d5b2c950986f91fe1cfae Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 15 Dec 2017 18:06:13 -0600
Subject: [PATCH 1102/1225] Load boundaries array into shared memory before
 hand for `bucketize` (#14774)

This fix is a follow up to 13922. This fix loads boundaries
array into shared memory before each thread, in order to improve
performance for `bucketize` op.

The fix is based on feedback 13922.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../core/kernels/bucketize_op_gpu.cu.cc       | 39 ++++++++++++++++---
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
index 325dee793b..b08ccdbdc0 100644
--- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
@@ -33,11 +33,28 @@ namespace tensorflow {
 
 typedef Eigen::GpuDevice GPUDevice;
 
-template <typename T>
+template <typename T, bool useSharedMem>
 __global__ void BucketizeCustomKernel(
     const int32 size_in, const T* in, const int32 size_boundaries,
     CudaDeviceArrayStruct<float> boundaries_array, int32* out) {
   const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array);
+
+  extern __shared__ __align__(sizeof(float)) unsigned char shared_mem[];
+  float* shared_mem_boundaries = reinterpret_cast<float*>(shared_mem);
+
+  if (useSharedMem) {
+    int32 lidx = threadIdx.y * blockDim.x + threadIdx.x;
+    int32 blockSize = blockDim.x * blockDim.y;
+
+    for (int32 i = lidx; i < size_boundaries; i += blockSize) {
+      shared_mem_boundaries[i] = boundaries[i];
+    }
+
+    __syncthreads();
+
+    boundaries = shared_mem_boundaries;
+  }
+
   CUDA_1D_KERNEL_LOOP(i, size_in) {
     T value = in[i];
     int32 bucket = 0;
@@ -77,11 +94,21 @@ struct BucketizeFunctor<GPUDevice, T> {
     TF_RETURN_IF_ERROR(boundaries_array.Finalize());
 
     CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d);
-    BucketizeCustomKernel<T>
-        <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-            input.size(), input.data(), boundaries_vector.size(),
-            boundaries_array.data(), output.data());
-
+    int32 shared_mem_size = sizeof(float) * boundaries_vector.size();
+    const int32 kMaxSharedMemBytes = 16384;
+    if (shared_mem_size < d.sharedMemPerBlock() &&
+        shared_mem_size < kMaxSharedMemBytes) {
+      BucketizeCustomKernel<T,
+                            true><<<config.block_count, config.thread_per_block,
+                                    shared_mem_size, d.stream()>>>(
+          input.size(), input.data(), boundaries_vector.size(),
+          boundaries_array.data(), output.data());
+    } else {
+      BucketizeCustomKernel<T, false><<<
+          config.block_count, config.thread_per_block, 0, d.stream()>>>(
+          input.size(), input.data(), boundaries_vector.size(),
+          boundaries_array.data(), output.data());
+    }
     return Status::OK();
   }
 };
-- 
GitLab


From deb50f80e5a87325adec5db826673e05acb4f5ab Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Fri, 15 Dec 2017 16:05:58 -0800
Subject: [PATCH 1103/1225] [XLA] Update docs to indicate that BatchNormGrad
 returns {operand, scale, offset}, not {operand, offset, scale}.

PiperOrigin-RevId: 179251837
---
 tensorflow/docs_src/performance/xla/operation_semantics.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
index d6f05f81bf..71e5db5d9f 100644
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -62,10 +62,11 @@ The output type is a tuple of three handles:
 |------------- | ----------------------- | ------------------------------------|
 |`grad_operand`| `ComputationDataHandle` | gradient with respect to input      |
 :              :                         : `operand`                           :
-|`grad_offset` | `ComputationDataHandle` | gradient with respect to input      |
-:              :                         : `offset`                            :
 |`grad_scale`  | `ComputationDataHandle` | gradient with respect to input      |
 :              :                         : `scale`                             :
+|`grad_offset` | `ComputationDataHandle` | gradient with respect to input      |
+:              :                         : `offset`                            :
+
 
 ## BatchNormInference
 
-- 
GitLab


From f3df9fcaefeb3ab0fd83f255bec93e1a3c013a5e Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Fri, 15 Dec 2017 16:06:31 -0800
Subject: [PATCH 1104/1225] [tf.contrib.seq2seq] Modify AttentionMechanisms to
 propagate state.

By default, the state is just the previous alignment.

This allows for more complex attention mechanisms (upcoming).

PiperOrigin-RevId: 179251889
---
 .../kernel_tests/attention_wrapper_test.py    |  32 +++++-
 .../seq2seq/python/ops/attention_wrapper.py   | 103 +++++++++++++-----
 2 files changed, 103 insertions(+), 32 deletions(-)

diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
index 01a5540121..e5d591788f 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
@@ -254,6 +254,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
         shape=(3, 5, 8), dtype=dtype('float32'), mean=0.12500001)
@@ -286,6 +288,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
         alignment_history=())
 
     self._testWithAttention(
@@ -313,6 +317,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
         alignment_history=())
 
     self._testWithAttention(
@@ -342,6 +348,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
         alignment_history=())
 
     self._testWithAttention(
@@ -370,6 +378,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
         alignment_history=())
 
     self._testWithAttention(
@@ -545,6 +555,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.032228071),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.032228071),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
         shape=(3, 5, 8), dtype=dtype('float32'), mean=0.050430927)
@@ -578,6 +590,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.028698336),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.028698336),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
         shape=(3, 5, 8), dtype=dtype('float32'), mean=0.046009291)
@@ -599,7 +613,8 @@ class AttentionWrapperTest(test.TestCase):
           random_ops.random_normal((b, t, u)),
           mode='hard')
       # Just feed previous attention as [1, 0, 0, ...]
-      attn = a(random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t))
+      attn, unused_state = a(
+          random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t))
       sess.run(variables.global_variables_initializer())
       attn_out = attn.eval()
       # All values should be 0 or 1
@@ -629,6 +644,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.032198936),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.032198936),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
         shape=(3, 5, 8), dtype=dtype('float32'), mean=0.050387777)
@@ -663,6 +680,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.032198936),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.032198936),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
         shape=(3, 5, 8), dtype=dtype('float32'), mean=0.050387777)
@@ -697,6 +716,9 @@ class AttentionWrapperTest(test.TestCase):
         alignments=(
             ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),
             ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)),
+        attention_state=(
+            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)),
         alignment_history=())
 
     expected_final_alignment_history = (
@@ -723,7 +745,8 @@ class AttentionWrapperTest(test.TestCase):
           random_ops.random_normal((b, t, u)),
           mode='hard')
       # Just feed previous attention as [1, 0, 0, ...]
-      attn = a(random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t))
+      attn, unused_state = a(
+          random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t))
       sess.run(variables.global_variables_initializer())
       attn_out = attn.eval()
       # All values should be 0 or 1
@@ -753,6 +776,9 @@ class AttentionWrapperTest(test.TestCase):
         alignments=(
             ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),
             ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)),
+        attention_state=(
+            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)),
         alignment_history=())
     expected_final_alignment_history = (
         ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125),
@@ -787,6 +813,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=(
             ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),),
+        attention_state=(
+            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),),
         alignment_history=())
 
     expected_final_alignment_history = (
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index e87ef41388..36bfc5685d 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
@@ -61,7 +61,14 @@ _zero_state_tensors = rnn_cell_impl._zero_state_tensors  # pylint: disable=prote
 
 
 class AttentionMechanism(object):
-  pass
+
+  @property
+  def alignments_size(self):
+    raise NotImplementedError
+
+  @property
+  def state_size(self):
+    raise NotImplementedError
 
 
 def _prepare_memory(memory, memory_sequence_length, check_inner_dims_defined):
@@ -161,7 +168,7 @@ class _BaseAttentionMechanism(AttentionMechanism):
         tensor should be shaped `[batch_size, max_time, ...]`.
       probability_fn: A `callable`.  Converts the score and previous alignments
         to probabilities. Its signature should be:
-        `probabilities = probability_fn(score, previous_alignments)`.
+        `probabilities = probability_fn(score, state)`.
       memory_sequence_length (optional): Sequence lengths for the batch entries
         in memory.  If provided, the memory tensor rows are masked with zeros
         for values past the respective sequence lengths.
@@ -235,6 +242,10 @@ class _BaseAttentionMechanism(AttentionMechanism):
   def alignments_size(self):
     return self._alignments_size
 
+  @property
+  def state_size(self):
+    return self._alignments_size
+
   def initial_alignments(self, batch_size, dtype):
     """Creates the initial alignment values for the `AttentionWrapper` class.
 
@@ -254,6 +265,23 @@ class _BaseAttentionMechanism(AttentionMechanism):
     max_time = self._alignments_size
     return _zero_state_tensors(max_time, batch_size, dtype)
 
+  def initial_state(self, batch_size, dtype):
+    """Creates the initial state values for the `AttentionWrapper` class.
+
+    This is important for AttentionMechanisms that use the previous alignment
+    to calculate the alignment at the next time step (e.g. monotonic attention).
+
+    The default behavior is to return the same output as initial_alignments.
+
+    Args:
+      batch_size: `int32` scalar, the batch_size.
+      dtype: The `dtype`.
+
+    Returns:
+      A structure of all-zero tensors with shapes as described by `state_size`.
+    """
+    return self.initial_alignments(batch_size, dtype)
+
 
 def _luong_score(query, keys, scale):
   """Implements Luong-style (multiplicative) scoring function.
@@ -381,13 +409,13 @@ class LuongAttention(_BaseAttentionMechanism):
     self._scale = scale
     self._name = name
 
-  def __call__(self, query, previous_alignments):
+  def __call__(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
       query: Tensor of dtype matching `self.values` and shape
         `[batch_size, query_depth]`.
-      previous_alignments: Tensor of dtype matching `self.values` and shape
+      state: Tensor of dtype matching `self.values` and shape
         `[batch_size, alignments_size]`
         (`alignments_size` is memory's `max_time`).
 
@@ -398,8 +426,9 @@ class LuongAttention(_BaseAttentionMechanism):
     """
     with variable_scope.variable_scope(None, "luong_attention", [query]):
       score = _luong_score(query, self._keys, self._scale)
-    alignments = self._probability_fn(score, previous_alignments)
-    return alignments
+    alignments = self._probability_fn(score, state)
+    next_state = alignments
+    return alignments, next_state
 
 
 def _bahdanau_score(processed_query, keys, normalize):
@@ -526,13 +555,13 @@ class BahdanauAttention(_BaseAttentionMechanism):
     self._normalize = normalize
     self._name = name
 
-  def __call__(self, query, previous_alignments):
+  def __call__(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
       query: Tensor of dtype matching `self.values` and shape
         `[batch_size, query_depth]`.
-      previous_alignments: Tensor of dtype matching `self.values` and shape
+      state: Tensor of dtype matching `self.values` and shape
         `[batch_size, alignments_size]`
         (`alignments_size` is memory's `max_time`).
 
@@ -544,8 +573,9 @@ class BahdanauAttention(_BaseAttentionMechanism):
     with variable_scope.variable_scope(None, "bahdanau_attention", [query]):
       processed_query = self.query_layer(query) if self.query_layer else query
       score = _bahdanau_score(processed_query, self._keys, self._normalize)
-    alignments = self._probability_fn(score, previous_alignments)
-    return alignments
+    alignments = self._probability_fn(score, state)
+    next_state = alignments
+    return alignments, next_state
 
 
 def safe_cumprod(x, *args, **kwargs):
@@ -805,13 +835,13 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism):
     self._name = name
     self._score_bias_init = score_bias_init
 
-  def __call__(self, query, previous_alignments):
+  def __call__(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
       query: Tensor of dtype matching `self.values` and shape
         `[batch_size, query_depth]`.
-      previous_alignments: Tensor of dtype matching `self.values` and shape
+      state: Tensor of dtype matching `self.values` and shape
         `[batch_size, alignments_size]`
         (`alignments_size` is memory's `max_time`).
 
@@ -828,8 +858,9 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism):
           "attention_score_bias", dtype=processed_query.dtype,
           initializer=self._score_bias_init)
       score += score_bias
-    alignments = self._probability_fn(score, previous_alignments)
-    return alignments
+    alignments = self._probability_fn(score, state)
+    next_state = alignments
+    return alignments, next_state
 
 
 class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
@@ -906,13 +937,13 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
     self._score_bias_init = score_bias_init
     self._name = name
 
-  def __call__(self, query, previous_alignments):
+  def __call__(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
       query: Tensor of dtype matching `self.values` and shape
         `[batch_size, query_depth]`.
-      previous_alignments: Tensor of dtype matching `self.values` and shape
+      state: Tensor of dtype matching `self.values` and shape
         `[batch_size, alignments_size]`
         (`alignments_size` is memory's `max_time`).
 
@@ -928,14 +959,15 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
           "attention_score_bias", dtype=query.dtype,
           initializer=self._score_bias_init)
       score += score_bias
-    alignments = self._probability_fn(score, previous_alignments)
-    return alignments
+    alignments = self._probability_fn(score, state)
+    next_state = alignments
+    return alignments, next_state
 
 
 class AttentionWrapperState(
     collections.namedtuple("AttentionWrapperState",
                            ("cell_state", "attention", "time", "alignments",
-                            "alignment_history"))):
+                            "alignment_history", "attention_state"))):
   """`namedtuple` storing the state of a `AttentionWrapper`.
 
   Contains:
@@ -949,6 +981,9 @@ class AttentionWrapperState(
     - `alignment_history`: (if enabled) a single or tuple of `TensorArray`(s)
        containing alignment matrices from all time steps for each attention
        mechanism. Call `stack()` on each to convert to a `Tensor`.
+    - `attention_state`: A single or tuple of nested objects
+       containing attention mechanism state for each attention mechanism.
+       The objects may contain Tensors or TensorArrays.
   """
 
   def clone(self, **kwargs):
@@ -993,11 +1028,11 @@ def hardmax(logits, name=None):
         math_ops.argmax(logits, -1), depth, dtype=logits.dtype)
 
 
-def _compute_attention(attention_mechanism, cell_output, previous_alignments,
+def _compute_attention(attention_mechanism, cell_output, attention_state,
                        attention_layer):
   """Computes the attention and alignments for a given attention_mechanism."""
-  alignments = attention_mechanism(
-      cell_output, previous_alignments=previous_alignments)
+  alignments, next_attention_state = attention_mechanism(
+      cell_output, state=attention_state)
 
   # Reshape from [batch_size, memory_time] to [batch_size, 1, memory_time]
   expanded_alignments = array_ops.expand_dims(alignments, 1)
@@ -1018,7 +1053,7 @@ def _compute_attention(attention_mechanism, cell_output, previous_alignments,
   else:
     attention = context
 
-  return attention, alignments
+  return attention, alignments, next_attention_state
 
 
 class AttentionWrapper(rnn_cell_impl.RNNCell):
@@ -1229,6 +1264,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
         attention=self._attention_layer_size,
         alignments=self._item_or_tuple(
             a.alignments_size for a in self._attention_mechanisms),
+        attention_state=self._item_or_tuple(
+            a.state_size for a in self._attention_mechanisms),
         alignment_history=self._item_or_tuple(
             () for _ in self._attention_mechanisms))  # sometimes a TensorArray
 
@@ -1278,6 +1315,9 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
           alignments=self._item_or_tuple(
               attention_mechanism.initial_alignments(batch_size, dtype)
               for attention_mechanism in self._attention_mechanisms),
+          attention_state=self._item_or_tuple(
+              attention_mechanism.initial_state(batch_size, dtype)
+              for attention_mechanism in self._attention_mechanisms),
           alignment_history=self._item_or_tuple(
               tensor_array_ops.TensorArray(dtype=dtype, size=0,
                                            dynamic_size=True)
@@ -1339,33 +1379,36 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
           cell_output, name="checked_cell_output")
 
     if self._is_multi:
-      previous_alignments = state.alignments
+      previous_attention_state = state.attention_state
       previous_alignment_history = state.alignment_history
     else:
-      previous_alignments = [state.alignments]
+      previous_attention_state = [state.attention_state]
       previous_alignment_history = [state.alignment_history]
 
     all_alignments = []
     all_attentions = []
-    all_histories = []
+    all_attention_states = []
+    maybe_all_histories = []
     for i, attention_mechanism in enumerate(self._attention_mechanisms):
-      attention, alignments = _compute_attention(
-          attention_mechanism, cell_output, previous_alignments[i],
+      attention, alignments, next_attention_state = _compute_attention(
+          attention_mechanism, cell_output, previous_attention_state[i],
           self._attention_layers[i] if self._attention_layers else None)
       alignment_history = previous_alignment_history[i].write(
           state.time, alignments) if self._alignment_history else ()
 
+      all_attention_states.append(next_attention_state)
       all_alignments.append(alignments)
-      all_histories.append(alignment_history)
       all_attentions.append(attention)
+      maybe_all_histories.append(alignment_history)
 
     attention = array_ops.concat(all_attentions, 1)
     next_state = AttentionWrapperState(
         time=state.time + 1,
         cell_state=next_cell_state,
         attention=attention,
+        attention_state=self._item_or_tuple(all_attention_states),
         alignments=self._item_or_tuple(all_alignments),
-        alignment_history=self._item_or_tuple(all_histories))
+        alignment_history=self._item_or_tuple(maybe_all_histories))
 
     if self._output_attention:
       return attention, next_state
-- 
GitLab


From 5287eb13f352680449d2f759c379f459c434fdad Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 16 Dec 2017 00:23:49 +0000
Subject: [PATCH 1105/1225] Update API with
 `tensorflow/core/api_def/update_api_def.sh`

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/api_def/base_api/api_def_RecordInput.pbtxt | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/core/api_def/base_api/api_def_RecordInput.pbtxt b/tensorflow/core/api_def/base_api/api_def_RecordInput.pbtxt
index 7efc8cd833..333144d76e 100644
--- a/tensorflow/core/api_def/base_api/api_def_RecordInput.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_RecordInput.pbtxt
@@ -41,6 +41,13 @@ END
     name: "batch_size"
     description: <<END
 The batch size.
+END
+  }
+  attr {
+    name: "compression_type"
+    description: <<END
+The type of compression for the file. Currently ZLIB and
+GZIP are supported. Defaults to none.
 END
   }
   summary: "Emits randomized records."
-- 
GitLab


From 4f4abcacedcba5430e03320f39205d2f327df2ac Mon Sep 17 00:00:00 2001
From: Dustin Tran <trandustin@google.com>
Date: Fri, 15 Dec 2017 16:38:28 -0800
Subject: [PATCH 1106/1225] Restandardize `DenseVariational` as simpler
 template for other probabilistic layers.

PiperOrigin-RevId: 179255435
---
 .../layers_dense_variational_test.py          |  55 +-
 .../ops/layers_dense_variational_impl.py      | 501 ++++++------------
 2 files changed, 194 insertions(+), 362 deletions(-)

diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
index 50358fd1c2..7b5b2fec1e 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.bayesflow.python.ops import layers_dense_variational_impl as prob_layers_lib
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
@@ -41,7 +42,7 @@ class Counter(object):
     return self._value
 
 
-class MockDistribution(normal_lib.Normal):
+class MockDistribution(independent_lib.Independent):
   """Monitors DenseVariational calls to the underlying distribution."""
 
   def __init__(self, result_sample, result_log_prob, loc=None, scale=None):
@@ -49,6 +50,10 @@ class MockDistribution(normal_lib.Normal):
     self.result_log_prob = result_log_prob
     self.result_loc = loc
     self.result_scale = scale
+    self.result_distribution = normal_lib.Normal(loc=0.0, scale=1.0)
+    if loc is not None and scale is not None:
+      self.result_distribution = normal_lib.Normal(loc=self.result_loc,
+                                                   scale=self.result_scale)
     self.called_log_prob = Counter()
     self.called_sample = Counter()
     self.called_loc = Counter()
@@ -62,6 +67,10 @@ class MockDistribution(normal_lib.Normal):
     self.called_sample()
     return self.result_sample
 
+  @property
+  def distribution(self):  # for dummy check on Independent(Normal)
+    return self.result_distribution
+
   @property
   def loc(self):
     self.called_loc()
@@ -95,16 +104,16 @@ class DenseVariationalLocalReparametrization(test.TestCase):
       inputs = random_ops.random_uniform([2, 3], seed=1)
 
       # No keys.
-      loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-      self.assertEqual(len(loss_keys), 0)
-      self.assertListEqual(dense_vi.losses, loss_keys)
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 0)
+      self.assertListEqual(dense_vi.losses, losses)
 
       _ = dense_vi(inputs)
 
       # Yes keys.
-      loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-      self.assertEqual(len(loss_keys), 1)
-      self.assertListEqual(dense_vi.losses, loss_keys)
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 1)
+      self.assertListEqual(dense_vi.losses, losses)
 
   def testKLPenaltyBoth(self):
     def _make_normal(dtype, *args):  # pylint: disable=unused-argument
@@ -118,16 +127,16 @@ class DenseVariationalLocalReparametrization(test.TestCase):
       inputs = random_ops.random_uniform([2, 3], seed=1)
 
       # No keys.
-      loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-      self.assertEqual(len(loss_keys), 0)
-      self.assertListEqual(dense_vi.losses, loss_keys)
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 0)
+      self.assertListEqual(dense_vi.losses, losses)
 
       _ = dense_vi(inputs)
 
       # Yes keys.
-      loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-      self.assertEqual(len(loss_keys), 2)
-      self.assertListEqual(dense_vi.losses, loss_keys)
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 2)
+      self.assertListEqual(dense_vi.losses, losses)
 
   def testVariationalNonLocal(self):
     batch_size, in_size, out_size = 2, 3, 4
@@ -183,9 +192,9 @@ class DenseVariationalLocalReparametrization(test.TestCase):
           expected_bias_divergence_, actual_bias_divergence_,
       ] = sess.run([
           expected_outputs, outputs,
-          kernel_posterior.result_sample, dense_vi.kernel.posterior_tensor,
+          kernel_posterior.result_sample, dense_vi.kernel_posterior_tensor,
           kernel_divergence.result, kl_penalty[0],
-          bias_posterior.result_sample, dense_vi.bias.posterior_tensor,
+          bias_posterior.result_sample, dense_vi.bias_posterior_tensor,
           bias_divergence.result, kl_penalty[1],
       ])
 
@@ -206,11 +215,15 @@ class DenseVariationalLocalReparametrization(test.TestCase):
           rtol=1e-6, atol=0.)
 
       self.assertAllEqual(
-          [[kernel_posterior, kernel_prior, kernel_posterior.result_sample]],
+          [[kernel_posterior.distribution,
+            kernel_prior.distribution,
+            kernel_posterior.result_sample]],
           kernel_divergence.args)
 
       self.assertAllEqual(
-          [[bias_posterior, bias_prior, bias_posterior.result_sample]],
+          [[bias_posterior.distribution,
+            bias_prior.distribution,
+            bias_posterior.result_sample]],
           bias_divergence.args)
 
   def testVariationalLocal(self):
@@ -274,7 +287,7 @@ class DenseVariationalLocalReparametrization(test.TestCase):
       ] = sess.run([
           expected_outputs, outputs,
           kernel_divergence.result, kl_penalty[0],
-          bias_posterior.result_sample, dense_vi.bias.posterior_tensor,
+          bias_posterior.result_sample, dense_vi.bias_posterior_tensor,
           bias_divergence.result, kl_penalty[1],
       ])
 
@@ -292,11 +305,13 @@ class DenseVariationalLocalReparametrization(test.TestCase):
           rtol=1e-6, atol=0.)
 
       self.assertAllEqual(
-          [[kernel_posterior, kernel_prior, None]],
+          [[kernel_posterior.distribution, kernel_prior.distribution, None]],
           kernel_divergence.args)
 
       self.assertAllEqual(
-          [[bias_posterior, bias_prior, bias_posterior.result_sample]],
+          [[bias_posterior.distribution,
+            bias_prior.distribution,
+            bias_posterior.result_sample]],
           bias_divergence.args)
 
 
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
index b05ce0ffc1..a3b22f334a 100644
--- a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
+++ b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
@@ -28,10 +28,12 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import deterministic as deterministic_lib
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.layers import base as layers_lib
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import nn_ops
@@ -187,48 +189,34 @@ def default_mean_field_normal_fn(
       loc_constraint,
       untransformed_scale_constraint)
   def _fn(dtype, shape, name, trainable, add_variable_fn):
-    """Creates a batch of `Deterministic` or `Normal` distributions."""
+    """Creates multivariate `Deterministic` or `Normal` distribution."""
     loc, scale = loc_scale_fn_(dtype, shape, name, trainable, add_variable_fn)
     if scale is None:
-      return deterministic_lib.Deterministic(loc=loc)
-    return normal_lib.Normal(loc=loc, scale=scale)
+      dist = deterministic_lib.Deterministic(loc=loc)
+    else:
+      dist = normal_lib.Normal(loc=loc, scale=scale)
+    reinterpreted_batch_ndims = array_ops.shape(dist.batch_shape_tensor())[0]
+    return independent_lib.Independent(
+        dist, reinterpreted_batch_ndims=reinterpreted_batch_ndims)
   return _fn
 
 
 class DenseVariational(layers_lib.Layer):
   """Densely-connected variational class.
 
-  This layer implements the Bayesian variational inference analogue to:
-  `outputs = activation(matmul(inputs, kernel) + bias)`
-  by assuming the `kernel` and/or the `bias` are random variables.
-
-  The layer implements a stochastic dense calculation by making a Monte Carlo
-  approximation of a [variational Bayesian method based on KL divergence](
-  https://en.wikipedia.org/wiki/Variational_Bayesian_methods), i.e.,
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
 
   ```none
-  -log p(y|x) = -log int_{R**d} p(y|x,w) p(w) dw
-              = -log int_{R**d} p(y,w|x) q(w|x) / q(w|x) dw
-             <= E_q(W|x)[-log p(y,W|x) + log q(W|x)]       # Jensen's
-              = E_q(W|x)[-log p(y|x,W)] + KL[q(W|x), p(W)]
-             ~= m**-1 sum{ -log(y|x,w[j]) : w[j] ~ q(W|x), j=1..m }
-                 + KL[q(W|x), p(W)]
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
   ```
 
-  where `W` denotes the (independent) `kernel` and `bias` random variables, `w`
-  is a random variate or outcome of `W`, `y` is the label, `x` is the evidence`,
-  and `~=` denotes an approximation which becomes exact as `m->inf`. The above
-  bound is sometimes referred to as the negative Evidence Lower BOund or
-  negative [ELBO](https://arxiv.org/abs/1601.00670). In context of a DNN, this
-  layer is appropriate to use when the final loss is a negative log-likelihood.
-
-  The Monte-Carlo sum portion is used for the feed-forward calculation of the
-  DNN. The KL divergence portion can be added to the final loss via:
-  `loss += sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))`.
-
   The arguments permit separate specification of the surrogate posterior
   (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
-  random variables (which together comprise `W`).
+  distributions.
 
   Args:
     units: Integer or Long, dimensionality of the output space.
@@ -285,10 +273,39 @@ class DenseVariational(layers_lib.Layer):
     activity_regularizer: Regularizer function for the output.
     kernel_use_local_reparameterization: Python `bool` indicating whether
       `kernel` calculation should employ the Local Reparameterization Trick.
-    kernel: `VariationalKernelParamater` instance containing all `kernel`
-      related properties and `callable`s.
-    bias: `VariationalParameter` instance containing all `kernel`
-      related properties and `callable`s.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.DenseVariational(512, activation=tf.nn.relu)(features)
+  logits = tfp.layers.DenseVariational(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
   """
 
   def __init__(
@@ -314,49 +331,19 @@ class DenseVariational(layers_lib.Layer):
         name=name,
         activity_regularizer=activity_regularizer,
         **kwargs)
-    self._units = units
-    self._activation = activation
-    self._input_spec = layers_lib.InputSpec(min_ndim=2)
-    self._kernel_use_local_reparameterization = (
+    self.units = units
+    self.activation = activation
+    self.input_spec = layers_lib.InputSpec(min_ndim=2)
+    self.kernel_use_local_reparameterization = (
         kernel_use_local_reparameterization)
-    self._kernel = VariationalKernelParameter(
-        kernel_posterior_fn,
-        kernel_posterior_tensor_fn,
-        kernel_prior_fn,
-        kernel_divergence_fn)
-    self._bias = VariationalParameter(
-        bias_posterior_fn,
-        bias_posterior_tensor_fn,
-        bias_prior_fn,
-        bias_divergence_fn)
-
-  @property
-  def units(self):
-    return self._units
-
-  @property
-  def activation(self):
-    return self._activation
-
-  @property
-  def input_spec(self):
-    return self._input_spec
-
-  @input_spec.setter
-  def input_spec(self, value):
-    self._input_spec = value
-
-  @property
-  def kernel_use_local_reparameterization(self):
-    return self._kernel_use_local_reparameterization
-
-  @property
-  def kernel(self):
-    return self._kernel
-
-  @property
-  def bias(self):
-    return self._bias
+    self.kernel_posterior_fn = kernel_posterior_fn
+    self.kernel_posterior_tensor_fn = kernel_posterior_tensor_fn
+    self.kernel_prior_fn = kernel_prior_fn
+    self.kernel_divergence_fn = kernel_divergence_fn
+    self.bias_posterior_fn = bias_posterior_fn
+    self.bias_posterior_tensor_fn = bias_posterior_tensor_fn
+    self.bias_prior_fn = bias_prior_fn
+    self.bias_divergence_fn = bias_divergence_fn
 
   def build(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape)
@@ -368,29 +355,29 @@ class DenseVariational(layers_lib.Layer):
     dtype = dtypes.as_dtype(self.dtype)
 
     # Must have a posterior kernel.
-    self.kernel.posterior = self.kernel.posterior_fn(
+    self.kernel_posterior = self.kernel_posterior_fn(
         dtype, [in_size, self.units], "kernel_posterior",
         self.trainable, self.add_variable)
 
-    if self.kernel.prior_fn is None:
+    if self.kernel_prior_fn is None:
       self.kernel_prior = None
     else:
-      self.kernel.prior = self.kernel.prior_fn(
+      self.kernel_prior = self.kernel_prior_fn(
           dtype, [in_size, self.units], "kernel_prior",
           self.trainable, self.add_variable)
     self._built_kernel_divergence = False
 
-    if self.bias.posterior_fn is None:
-      self.bias.posterior = None
+    if self.bias_posterior_fn is None:
+      self.bias_posterior = None
     else:
-      self.bias.posterior = self.bias.posterior_fn(
+      self.bias_posterior = self.bias_posterior_fn(
           dtype, [self.units], "bias_posterior",
           self.trainable, self.add_variable)
 
-    if self.bias.prior_fn is None:
-      self.bias.prior = None
+    if self.bias_prior_fn is None:
+      self.bias_prior = None
     else:
-      self.bias.prior = self.bias.prior_fn(
+      self.bias_prior = self.bias_prior_fn(
           dtype, [self.units], "bias_prior",
           self.trainable, self.add_variable)
     self._built_bias_divergence = False
@@ -405,54 +392,77 @@ class DenseVariational(layers_lib.Layer):
     if self.activation is not None:
       outputs = self.activation(outputs)  # pylint: disable=not-callable
     if not self._built_kernel_divergence:
-      self._apply_divergence(self.kernel, name="divergence_kernel")
+      kernel_posterior = self.kernel_posterior
+      kernel_prior = self.kernel_prior
+      if isinstance(self.kernel_posterior, independent_lib.Independent):
+        kernel_posterior = kernel_posterior.distribution
+      if isinstance(self.kernel_prior, independent_lib.Independent):
+        kernel_prior = kernel_prior.distribution
+      self._apply_divergence(self.kernel_divergence_fn,
+                             kernel_posterior,
+                             kernel_prior,
+                             self.kernel_posterior_tensor,
+                             name="divergence_kernel")
       self._built_kernel_divergence = True
     if not self._built_bias_divergence:
-      self._apply_divergence(self.bias, name="divergence_bias")
+      bias_posterior = self.bias_posterior
+      bias_prior = self.bias_prior
+      if isinstance(self.bias_posterior, independent_lib.Independent):
+        bias_posterior = bias_posterior.distribution
+      if isinstance(self.bias_prior, independent_lib.Independent):
+        bias_prior = bias_prior.distribution
+      self._apply_divergence(self.bias_divergence_fn,
+                             bias_posterior,
+                             bias_prior,
+                             self.bias_posterior_tensor,
+                             name="divergence_bias")
       self._built_bias_divergence = True
     return outputs
 
   def _apply_variational_kernel(self, inputs):
     if not self.kernel_use_local_reparameterization:
-      self.kernel.posterior_tensor = self.kernel.posterior_tensor_fn(
-          self.kernel.posterior)
-      self.kernel.posterior_affine = None
-      self.kernel.posterior_affine_tensor = None
-      return self._matmul(inputs, self.kernel.posterior_tensor)
-    if not isinstance(self.kernel.posterior, normal_lib.Normal):
-      raise TypeError("`kernel_use_local_reparameterization=True` requires "
-                      "`kernel_posterior_fn` produce an instance of "
-                      "`tf.distributions.Normal` (saw: \"{}\").".format(
-                          type(self.kernel.posterior).__name__))
-    self.kernel.posterior_affine = normal_lib.Normal(
-        loc=self._matmul(inputs, self.kernel.posterior.loc),
+      self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn(
+          self.kernel_posterior)
+      self.kernel_posterior_affine = None
+      self.kernel_posterior_affine_tensor = None
+      return self._matmul(inputs, self.kernel_posterior_tensor)
+    if (not isinstance(self.kernel_posterior, independent_lib.Independent) or
+        not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)):
+      raise TypeError(
+          "`kernel_use_local_reparameterization=True` requires "
+          "`kernel_posterior_fn` produce an instance of "
+          "`tf.distributions.Independent(tf.distributions.Normal)` "
+          "(saw: \"{}\").".format(type(self.kernel_posterior).__name__))
+    self.kernel_posterior_affine = normal_lib.Normal(
+        loc=self._matmul(inputs, self.kernel_posterior.distribution.loc),
         scale=standard_ops.sqrt(self._matmul(
             standard_ops.square(inputs),
-            standard_ops.square(self.kernel.posterior.scale))))
-    self.kernel.posterior_affine_tensor = (
-        self.kernel.posterior_tensor_fn(self.kernel.posterior_affine))
-    self.kernel.posterior_tensor = None
-    return self.kernel.posterior_affine_tensor
+            standard_ops.square(self.kernel_posterior.distribution.scale))))
+    self.kernel_posterior_affine_tensor = (
+        self.kernel_posterior_tensor_fn(self.kernel_posterior_affine))
+    self.kernel_posterior_tensor = None
+    return self.kernel_posterior_affine_tensor
 
   def _apply_variational_bias(self, inputs):
-    if self.bias.posterior is None:
-      self.bias.posterior_tensor = None
+    if self.bias_posterior is None:
+      self.bias_posterior_tensor = None
       return inputs
-    self.bias.posterior_tensor = self.bias.posterior_tensor_fn(
-        self.bias.posterior)
-    return nn.bias_add(inputs, self.bias.posterior_tensor)
-
-  def _apply_divergence(self, param, name):
-    if (param.divergence_fn is None or
-        param.posterior is None or
-        param.prior is None):
-      param.divergence = None
+    self.bias_posterior_tensor = self.bias_posterior_tensor_fn(
+        self.bias_posterior)
+    return nn.bias_add(inputs, self.bias_posterior_tensor)
+
+  def _apply_divergence(self, divergence_fn, posterior, prior,
+                        posterior_tensor, name):
+    if (divergence_fn is None or
+        posterior is None or
+        prior is None):
+      divergence = None
       return
-    param.divergence = standard_ops.identity(
-        param.divergence_fn(
-            param.posterior, param.prior, param.posterior_tensor),
+    divergence = standard_ops.identity(
+        divergence_fn(
+            posterior, prior, posterior_tensor),
         name=name)
-    self.add_loss(param.divergence)
+    self.add_loss(divergence)
 
   def _matmul(self, inputs, kernel):
     if inputs.shape.ndims <= 2:
@@ -489,37 +499,19 @@ def dense_variational(
     reuse=None):
   """Densely-connected variational layer.
 
-  This layer implements the Bayesian variational inference analogue to:
-  `outputs = activation(matmul(inputs, kernel) + bias)`
-  by assuming the `kernel` and/or the `bias` are random variables.
-
-  The layer implements a stochastic dense calculation by making a Monte Carlo
-  approximation of a [variational Bayesian method based on KL divergence](
-  https://en.wikipedia.org/wiki/Variational_Bayesian_methods), i.e.,
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
 
   ```none
-  -log p(y|x) = -log int_{R**d} p(y|x,w) p(w) dw
-              = -log int_{R**d} p(y,w|x) q(w|x) / q(w|x) dw
-             <= E_q(W|x)[-log p(y,W|x) + log q(W|x)]       # Jensen's
-              = E_q(W|x)[-log p(y|x,W)] + KL[q(W|x), p(W)]
-             ~= m**-1 sum{ -log(y|x,w[j]) : w[j] ~ q(W|x), j=1..m }
-                 + KL[q(W|x), p(W)]
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
   ```
 
-  where `W` denotes the (independent) `kernel` and `bias` random variables, `w`
-  is a random variate or outcome of `W`, `y` is the label, `x` is the evidence`,
-  and `~=` denotes an approximation which becomes exact as `m->inf`. The above
-  bound is sometimes referred to as the negative Evidence Lower BOund or
-  negative [ELBO](https://arxiv.org/abs/1601.00670). In context of a DNN, this
-  layer is appropriate to use when the final loss is a negative log-likelihood.
-
-  The Monte-Carlo sum portion is used for the feed-forward calculation of the
-  DNN. The KL divergence portion can be added to the final loss via:
-  `loss += sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))`.
-
   The arguments permit separate specification of the surrogate posterior
   (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
-  random variables (which together comprise `W`).
+  distributions.
 
   Args:
     inputs: Tensor input.
@@ -574,6 +566,31 @@ def dense_variational(
   Returns:
     output: `Tensor` representing a the affine transformed input under a random
       draw from the surrogate posterior distribution.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.dense_variational(features, 512, activation=tf.nn.relu)
+  logits = tfp.layers.dense_variational(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
   """
   layer = DenseVariational(
       units,
@@ -595,203 +612,3 @@ def dense_variational(
       _scope=name,
       _reuse=reuse)
   return layer.apply(inputs)
-
-
-class NotSet(object):
-  """Helper to track whether a `VariationalParameter` value has been set."""
-  pass
-
-
-class VariationalParameter(object):
-  """Struct-like container of variational parameter properties.
-
-  A `VariationalParameter` is intitialized with Python `callable`s which set the
-  value of correspondingly named members. Corresponding values have "set once"
-  semantics, i.e., once set to any value they are immutable.
-  """
-
-  def __init__(
-      self,
-      posterior_fn,
-      posterior_tensor_fn,
-      prior_fn,
-      divergence_fn):
-    """Creates the `VariationalParameter` struct-like object.
-
-    Args:
-      posterior_fn: Python `callable` which creates a
-        `tf.distribution.Distribution` like object representing the posterior
-        distribution. See `VariationalParameter.posterior_fn` for `callable`'s
-        required parameters.
-      posterior_tensor_fn: Python `callable` which computes a `Tensor`
-        which represents the `posterior`.
-      prior_fn: Python `callable` which creates a
-        `tf.distribution.Distribution` like object representing the prior
-        distribution. See `VariationalParameter.prior_fn` for `callable`'s
-        required parameters.
-      divergence_fn: Python `callable` which computes the KL divergence from
-        `posterior` to `prior`. See `VariationalParameter.divergence_fn` for
-        required `callable`'s parameters.
-    """
-    self._posterior_fn = posterior_fn
-    self._posterior = NotSet()
-    self._posterior_tensor_fn = posterior_tensor_fn
-    self._posterior_tensor = NotSet()
-    self._prior_fn = prior_fn
-    self._prior = NotSet()
-    self._divergence_fn = divergence_fn
-    self._divergence = NotSet()
-    self._init_helper()
-
-  @property
-  def posterior_fn(self):
-    """`callable` which creates `tf.distributions.Distribution`-like posterior.
-
-    The `callable` must accept the following parameters:
-      name: Python `str` name prepended to any created (or existing)
-        `tf.Variable`s.
-      shape: Python `list`-like representing the parameter's event shape.
-      dtype: Type of parameter's event.
-      trainable: Python `bool` indicating all created `tf.Variable`s should be
-        added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
-      add_variable_fn: `tf.get_variable`-like `callable` used to create (or
-        access existing) `tf.Variable`s.
-
-    Returns:
-      posterior_fn: The Python `callable` specified in `__init__`.
-    """
-    return self._posterior_fn
-
-  @property
-  def posterior(self):
-    """`tf.distributions.Distribution`-like instance representing posterior."""
-    return self._posterior
-
-  @posterior.setter
-  def posterior(self, value):
-    """One-time setter of the `posterior` distribution."""
-    if not isinstance(self._posterior, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._posterior = value
-
-  @property
-  def posterior_tensor_fn(self):
-    """Creates `Tensor` representing the `posterior` distribution.
-
-    The `callable` must accept the following parameters:
-      posterior: `tf.distributions.Distribution`-like instance.
-
-    Returns:
-      posterior_tensor_fn: The Python `callable` specified in
-        `__init__`.
-    """
-    return self._posterior_tensor_fn
-
-  @property
-  def posterior_tensor(self):
-    """`Tensor` representing the `posterior` distribution."""
-    return self._posterior_tensor
-
-  @posterior_tensor.setter
-  def posterior_tensor(self, value):
-    """One-time setter of the `posterior_tensor`."""
-    if not isinstance(self._posterior_tensor, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._posterior_tensor = value
-
-  @property
-  def prior_fn(self):
-    """`callable` which creates `tf.distributions.Distribution`-like prior.
-
-    The `callable` must accept the following parameters:
-      name: Python `str` name prepended to any created (or existing)
-        `tf.Variable`s.
-      shape: Python `list`-like representing the parameter's event shape.
-      dtype: Type of parameter's event.
-      trainable: Python `bool` indicating all created `tf.Variable`s should be
-        added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
-      add_variable_fn: `tf.get_variable`-like `callable` used to create (or
-        access existing) `tf.Variable`s.
-
-    Returns:
-      prior_fn: The Python `callable` specified in `__init__`.
-    """
-    return self._prior_fn
-
-  @property
-  def prior(self):
-    """`tf.distributions.Distribution`-like instance representing posterior."""
-    return self._prior
-
-  @prior.setter
-  def prior(self, value):
-    """One-time setter of the `prior` distribution."""
-    if not isinstance(self._prior, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._prior = value
-
-  @property
-  def divergence_fn(self):
-    """`callable` which computes KL-divergence `Tensor` from posterior to prior.
-
-    The `callable` must accept the following parameters:
-      posterior: `tf.distributions.Distribution`-like instance.
-      prior: `tf.distributions.Distribution`-like instance.
-      posterior_tensor: `Tensor` representing value of posterior.
-
-    Returns:
-      divergence_fn: The Python `callable` specified in `__init__`.
-    """
-    return self._divergence_fn
-
-  @property
-  def divergence(self):
-    """`Tensor` representing KL-divergence from posterior to prior."""
-    return self._divergence
-
-  @divergence.setter
-  def divergence(self, value):
-    """One-time setter of the `divergence`."""
-    if not isinstance(self._divergence, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._divergence = value
-
-  def _init_helper(self):
-    pass
-
-
-class VariationalKernelParameter(VariationalParameter):
-  """Struct-like container of variational kernel properties.
-
-  A `VariationalKernelParameter` is intitialized with Python `callable`s which
-  set the value of correspondingly named members. Corresponding values have "set
-  once" semantics, i.e., once set to any value they are immutable.
-  """
-
-  @property
-  def posterior_affine(self):
-    """`tf.distributions.Distribution` affine transformed posterior."""
-    return self._posterior_affine
-
-  @posterior_affine.setter
-  def posterior_affine(self, value):
-    """One-time setter of `posterior_affine`."""
-    if not isinstance(self._posterior_affine, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._posterior_affine = value
-
-  @property
-  def posterior_affine_tensor(self):
-    """`Tensor` representing the `posterior_affine` distribution."""
-    return self._posterior_affine_tensor
-
-  @posterior_affine_tensor.setter
-  def posterior_affine_tensor(self, value):
-    """One-time setter of the `posterior_affine_tensor`."""
-    if not isinstance(self._posterior_affine_tensor, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._posterior_affine_tensor = value
-
-  def _init_helper(self):
-    self._posterior_affine = NotSet()
-    self._posterior_affine_tensor = NotSet()
-- 
GitLab


From dcb0666a2be2c78d1f36984ef45910998f19e50b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 16:41:44 -0800
Subject: [PATCH 1107/1225] add bfloat16 support to some GPU ops: concat,
 constant, fill, pack, reshape, slice, split, unpack

PiperOrigin-RevId: 179255814
---
 tensorflow/core/kernels/concat_lib_gpu.cc     |  4 +--
 tensorflow/core/kernels/constant_op.cc        |  3 ++
 tensorflow/core/kernels/constant_op_gpu.cu.cc | 29 +++++++------------
 tensorflow/core/kernels/pack_op.cc            |  1 +
 tensorflow/core/kernels/reshape_op.cc         |  3 +-
 tensorflow/core/kernels/slice_op.cc           |  8 +++--
 tensorflow/core/kernels/slice_op_gpu.cu.cc    |  1 +
 tensorflow/core/kernels/split_lib_gpu.cu.cc   |  7 +++--
 tensorflow/core/kernels/split_op.cc           |  1 +
 tensorflow/core/kernels/unpack_op.cc          |  1 +
 tensorflow/core/util/cuda_kernel_helper.h     | 14 +++++++++
 11 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/tensorflow/core/kernels/concat_lib_gpu.cc b/tensorflow/core/kernels/concat_lib_gpu.cc
index 319ead49ef..d8643c0b2f 100644
--- a/tensorflow/core/kernels/concat_lib_gpu.cc
+++ b/tensorflow/core/kernels/concat_lib_gpu.cc
@@ -116,8 +116,8 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER);
 TF_CALL_complex64(REGISTER);
 TF_CALL_complex128(REGISTER);
 TF_CALL_int64(REGISTER);
-REGISTER(bfloat16);
-REGISTER(bool);
+TF_CALL_bfloat16(REGISTER);
+TF_CALL_bool(REGISTER);
 
 #undef REGISTER
 
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index 72132574a4..103a0e225e 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -250,6 +250,7 @@ REGISTER_KERNEL_BUILDER(Name("Fill")
 
 #if GOOGLE_CUDA
 REGISTER_KERNEL(GPU, Eigen::half);
+REGISTER_KERNEL(GPU, bfloat16);
 REGISTER_KERNEL(GPU, float);
 REGISTER_KERNEL(GPU, double);
 REGISTER_KERNEL(GPU, uint8);
@@ -328,6 +329,7 @@ REGISTER_KERNEL_BUILDER(Name("ZerosLike")
 #if GOOGLE_CUDA
 REGISTER_KERNEL(bool, GPU);
 REGISTER_KERNEL(Eigen::half, GPU);
+REGISTER_KERNEL(bfloat16, GPU);
 REGISTER_KERNEL(float, GPU);
 REGISTER_KERNEL(double, GPU);
 REGISTER_KERNEL(complex64, GPU);
@@ -380,6 +382,7 @@ REGISTER_KERNEL_BUILDER(Name("OnesLike")
 #if GOOGLE_CUDA
 REGISTER_KERNEL(bool, GPU);
 REGISTER_KERNEL(Eigen::half, GPU);
+REGISTER_KERNEL(bfloat16, GPU);
 REGISTER_KERNEL(float, GPU);
 REGISTER_KERNEL(double, GPU);
 REGISTER_KERNEL(complex64, GPU);
diff --git a/tensorflow/core/kernels/constant_op_gpu.cu.cc b/tensorflow/core/kernels/constant_op_gpu.cu.cc
index d1a1e34ec3..49beb499af 100644
--- a/tensorflow/core/kernels/constant_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/constant_op_gpu.cu.cc
@@ -77,7 +77,8 @@ struct FillFunctor<GPUDevice, T> {
 
 #define DEFINE_FILL_GPU(T) template struct FillFunctor<GPUDevice, T>;
 TF_CALL_REAL_NUMBER_TYPES(DEFINE_FILL_GPU);
-DEFINE_FILL_GPU(bool);
+TF_CALL_bfloat16(DEFINE_FILL_GPU);
+TF_CALL_bool(DEFINE_FILL_GPU);
 #undef DEFINE_FILL_GPU
 
 // Partial specialization of FillFunctor<Device=GPUDevice, T>.
@@ -88,15 +89,10 @@ struct SetZeroFunctor<GPUDevice, T> {
   }
 };
 
-#define DEFINE_SETZERO_GPU(T) template struct SetZeroFunctor<GPUDevice, T>
-DEFINE_SETZERO_GPU(bool);
-DEFINE_SETZERO_GPU(Eigen::half);
-DEFINE_SETZERO_GPU(float);
-DEFINE_SETZERO_GPU(double);
-DEFINE_SETZERO_GPU(complex64);
-DEFINE_SETZERO_GPU(complex128);
-DEFINE_SETZERO_GPU(int32);
-DEFINE_SETZERO_GPU(int64);
+#define DEFINE_SETZERO_GPU(T) template struct SetZeroFunctor<GPUDevice, T>;
+TF_CALL_NUMBER_TYPES(DEFINE_SETZERO_GPU);
+TF_CALL_bfloat16(DEFINE_SETZERO_GPU);
+TF_CALL_bool(DEFINE_SETZERO_GPU);
 #undef DEFINE_SETZERO_GPU
 
 // Partial specialization of FillFunctor<Device=GPUDevice, T>.
@@ -107,15 +103,10 @@ struct SetOneFunctor<GPUDevice, T> {
   }
 };
 
-#define DEFINE_SETONE_GPU(T) template struct SetOneFunctor<GPUDevice, T>
-DEFINE_SETONE_GPU(bool);
-DEFINE_SETONE_GPU(Eigen::half);
-DEFINE_SETONE_GPU(float);
-DEFINE_SETONE_GPU(double);
-DEFINE_SETONE_GPU(complex64);
-DEFINE_SETONE_GPU(complex128);
-DEFINE_SETONE_GPU(int32);
-DEFINE_SETONE_GPU(int64);
+#define DEFINE_SETONE_GPU(T) template struct SetOneFunctor<GPUDevice, T>;
+TF_CALL_NUMBER_TYPES(DEFINE_SETONE_GPU);
+TF_CALL_bfloat16(DEFINE_SETONE_GPU);
+TF_CALL_bool(DEFINE_SETONE_GPU);
 #undef DEFINE_SETONE_GPU
 
 }  // end namespace functor
diff --git a/tensorflow/core/kernels/pack_op.cc b/tensorflow/core/kernels/pack_op.cc
index 6167593013..2923c38662 100644
--- a/tensorflow/core/kernels/pack_op.cc
+++ b/tensorflow/core/kernels/pack_op.cc
@@ -158,6 +158,7 @@ REGISTER_PACK(string);
       PackOp<GPUDevice, type>)
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
+TF_CALL_bfloat16(REGISTER_GPU);
 TF_CALL_int64(REGISTER_GPU);
 REGISTER_GPU(bool);
 #undef REGISTER_GPU
diff --git a/tensorflow/core/kernels/reshape_op.cc b/tensorflow/core/kernels/reshape_op.cc
index 18ebf70c17..8b86596721 100644
--- a/tensorflow/core/kernels/reshape_op.cc
+++ b/tensorflow/core/kernels/reshape_op.cc
@@ -43,7 +43,8 @@ REGISTER_KERNEL_BUILDER(Name("Reshape")
                               .TypeConstraint<int64>("Tshape"), \
                           ReshapeOp);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
-REGISTER_GPU_KERNEL(bool);
+TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
+TF_CALL_bool(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
 #ifdef TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index d46701749b..a9e31cc336 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -439,7 +439,7 @@ namespace functor {
   DECLARE_CPU_SPEC(T, 7);
 
 TF_CALL_ALL_TYPES(DECLARE_FOR_N);
-DECLARE_FOR_N(bfloat16);
+TF_CALL_bfloat16(DECLARE_FOR_N);
 
 #undef DECLARE_FOR_N
 #undef DECLARE_CPU_SPEC
@@ -456,7 +456,7 @@ DECLARE_FOR_N(bfloat16);
 
 TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
 TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
-REGISTER_SLICE(bfloat16);
+TF_CALL_bfloat16(REGISTER_SLICE);
 #undef REGISTER_SLICE
 #else
 #define REGISTER_SLICE(type)                             \
@@ -469,7 +469,7 @@ REGISTER_SLICE(bfloat16);
 
 TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
 TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
-REGISTER_SLICE(bfloat16);
+TF_CALL_bfloat16(REGISTER_SLICE);
 #undef REGISTER_SLICE
 #endif  // INTEL_MKL
 
@@ -497,6 +497,7 @@ namespace functor {
 TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N);
 TF_CALL_complex64(DECLARE_FOR_N);
 TF_CALL_complex128(DECLARE_FOR_N);
+TF_CALL_bfloat16(DECLARE_FOR_N);
 DECLARE_FOR_N(int32);
 
 #undef DECLARE_FOR_N
@@ -515,6 +516,7 @@ DECLARE_FOR_N(int32);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_bfloat16(REGISTER_GPU);
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/slice_op_gpu.cu.cc b/tensorflow/core/kernels/slice_op_gpu.cu.cc
index a301986f2f..9d51f8978c 100644
--- a/tensorflow/core/kernels/slice_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/slice_op_gpu.cu.cc
@@ -39,6 +39,7 @@ typedef Eigen::GpuDevice GPUDevice;
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
+TF_CALL_bfloat16(DEFINE_GPU_KERNELS);
 DEFINE_GPU_KERNELS(int32);
 
 #undef DEFINE_GPU_KERNELS
diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc
index dd6fc6115f..9f234fc093 100644
--- a/tensorflow/core/kernels/split_lib_gpu.cu.cc
+++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc
@@ -52,7 +52,7 @@ void SplitCustom<Device, T>::operator()(
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
-DEFINE_GPU_KERNELS(bfloat16);
+TF_CALL_bfloat16(DEFINE_GPU_KERNELS);
 
 #undef DEFINE_GPU_KERNELS
 #define DEFINE_GPU_KERNELS(T) template struct SplitCustom<Eigen::GpuDevice, T>;
@@ -60,7 +60,7 @@ DEFINE_GPU_KERNELS(bfloat16);
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
-DEFINE_GPU_KERNELS(bfloat16);
+TF_CALL_bfloat16(DEFINE_GPU_KERNELS);
 
 #undef DEFINE_GPU_KERNELS
 
@@ -243,6 +243,7 @@ struct SplitVOpGPULaunch {
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 TF_CALL_complex64(REGISTER_GPU_KERNEL);
 TF_CALL_complex128(REGISTER_GPU_KERNEL);
+TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 #define REGISTER_GPU_KERNEL(T)                 \
   template struct SplitVOpGPULaunch<T, int32>; \
@@ -251,7 +252,7 @@ TF_CALL_complex128(REGISTER_GPU_KERNEL);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 TF_CALL_complex64(REGISTER_GPU_KERNEL);
 TF_CALL_complex128(REGISTER_GPU_KERNEL);
-REGISTER_GPU_KERNEL(bfloat16);
+TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc
index 094ba8bb86..90d7e225ed 100644
--- a/tensorflow/core/kernels/split_op.cc
+++ b/tensorflow/core/kernels/split_op.cc
@@ -377,6 +377,7 @@ REGISTER_SPLIT(bfloat16);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+REGISTER_GPU(bfloat16);
 #undef REGISTER_GPU
 
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc
index 7fd1def1fe..71bbb50fb1 100644
--- a/tensorflow/core/kernels/unpack_op.cc
+++ b/tensorflow/core/kernels/unpack_op.cc
@@ -142,6 +142,7 @@ TF_CALL_ALL_TYPES(REGISTER_UNPACK);
       UnpackOp<GPUDevice, type>)
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
+TF_CALL_bfloat16(REGISTER_GPU);
 #undef REGISTER_GPU
 
 // A special GPU kernel for int32.
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index cf11f419a4..3e32ec7973 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -374,6 +374,20 @@ __device__ __host__ inline Eigen::half ldg(const Eigen::half* address) {
 #endif
 }
 
+template <>
+__device__ __host__ inline tensorflow::bfloat16 ldg(
+    const tensorflow::bfloat16* address) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  tensorflow::bfloat16 return_value;
+  asm volatile("ld.global.nc.u16 %0, [%1];"
+               : "=h"(return_value.value)
+               : "l"(address));
+  return return_value;
+#else
+  return *address;
+#endif
+}
+
 template <>
 __device__ __host__ inline bool ldg(const bool* address) {
 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
-- 
GitLab


From 4bdea2e4023f28ec50035af07c27b14a11c405bc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 16:49:30 -0800
Subject: [PATCH 1108/1225] iOS build cleanup.

PiperOrigin-RevId: 179256571
---
 tensorflow/tensorflow.bzl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 425997e41f..611d50bc52 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -103,6 +103,12 @@ def if_ios(a):
       "//conditions:default": [],
   })
 
+def if_ios_x86_64(a):
+  return select({
+      clean_dep("//tensorflow:ios_x86_64"): a,
+      "//conditions:default": [],
+  })
+
 def if_mobile(a):
   return select({
       clean_dep("//tensorflow:android"): a,
@@ -177,6 +183,7 @@ def tf_copts(android_optimization_level_override="-O2"):
       + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML", "-fopenmp",])
       + if_android_arm(["-mfpu=neon"])
       + if_linux_x86_64(["-msse3"])
+      + if_ios_x86_64(["-msse4.1"])
       + select({
             clean_dep("//tensorflow:android"): android_copts,
             clean_dep("//tensorflow:darwin"): [],
-- 
GitLab


From 40a53f912e1dd6840cd0cb651980bbce7f30e3f1 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Fri, 15 Dec 2017 16:56:16 -0800
Subject: [PATCH 1109/1225] Make maximum_iterations checks use public APIs.

This introduces some new (non-public) control_flow_util methods:
InWhileLoop, InCond, and GetContainingCondContext.

PiperOrigin-RevId: 179257208
---
 tensorflow/python/BUILD                    |  1 +
 tensorflow/python/ops/control_flow_ops.py  | 25 +++++++++----------
 tensorflow/python/ops/control_flow_util.py | 28 ++++++++++++++++++++++
 tensorflow/python/ops/rnn.py               |  4 +++-
 4 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 80f3ec6681..bb47acabf9 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2045,6 +2045,7 @@ py_library(
     deps = [
         ":array_ops",
         ":control_flow_ops",
+        ":control_flow_util",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":rnn_cell",
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index cb5f4a66fd..4d108155e4 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -1423,6 +1423,10 @@ class ControlFlowContext(object):
       g.as_graph_element(op)._set_control_flow_context(self)
       # pylint: enable=protected-access
 
+  @property
+  def name(self):
+    return self._name
+
   @property
   def outer_context(self):
     """Return the context containing this context."""
@@ -1528,6 +1532,9 @@ class ControlFlowContext(object):
   def IsWhileContext(self):
     return False
 
+  def IsCondContext(self):
+    return False
+
   def __str__(self):
     return self.name
 
@@ -1584,10 +1591,6 @@ class CondContext(ControlFlowContext):
     super(CondContext, self).__init__(values_def=context_def.values_def,
                                       import_scope=import_scope)
 
-  @property
-  def name(self):
-    return self._name
-
   @property
   def pred(self):
     return self._pred
@@ -1769,6 +1772,9 @@ class CondContext(ControlFlowContext):
       result = [result]
     return original_result, result
 
+  def IsCondContext(self):
+    return True
+
 
 def _UnpackIfSingleton(res):
   if isinstance(res, (list, _basetuple)) and len(res) == 1:
@@ -2079,10 +2085,6 @@ class WhileContext(ControlFlowContext):
     super(WhileContext, self).__init__(values_def=context_def.values_def,
                                        import_scope=import_scope)
 
-  @property
-  def name(self):
-    return self._name
-
   @property
   def maximum_iterations(self):
     """The maximum number of iterations that will be executed."""
@@ -2909,16 +2911,15 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
       # that will be error-prone and hard to reason about for users.
       #
       # TODO(skyewm): make this work (it's tricky).
-      # pylint: disable=protected-access
       if (context.in_graph_mode() and
-          maximum_iterations.op._get_control_flow_context() is not None):
+          (util.IsInWhileLoop(maximum_iterations.op) or
+           util.IsInCond(maximum_iterations.op))):
         raise ValueError(
             "maximum_iterations tensor cannot be declared in tf.cond or "
             "tf.while_loop. Please file an issue at "
             "https://github.com/tensorflow/tensorflow/issues if you require "
             "this functionality. (Control flow context: %s)" %
-            maximum_iterations.op._get_control_flow_context().name)
-      # pylint: enable=protected-access
+            maximum_iterations.op._get_control_flow_context().name)  # pylint: disable=protected-access
 
       counter = constant_op.constant(
           0, dtype=maximum_iterations.dtype, name="iteration_counter")
diff --git a/tensorflow/python/ops/control_flow_util.py b/tensorflow/python/ops/control_flow_util.py
index 941a1a743e..91cd90f189 100644
--- a/tensorflow/python/ops/control_flow_util.py
+++ b/tensorflow/python/ops/control_flow_util.py
@@ -28,6 +28,16 @@ import traceback
 from tensorflow.python.platform import tf_logging as logging
 
 
+def IsInWhileLoop(op):
+  ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+  return GetContainingWhileContext(ctxt) is not None
+
+
+def IsInCond(op):
+  ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+  return GetContainingCondContext(ctxt) is not None
+
+
 def IsSwitch(op):
   """Return true if `op` is a Switch."""
   return op.type == "Switch" or op.type == "RefSwitch"
@@ -92,6 +102,24 @@ def GetContainingWhileContext(ctxt):
   return None
 
 
+def GetContainingCondContext(ctxt):
+  """Returns the first ancestor CondContext of `ctxt`.
+
+  Returns `ctxt` if `ctxt` is a CondContext, or None if `ctxt` is not in a cond.
+
+  Args:
+    ctxt: ControlFlowContext
+
+  Returns:
+    `ctxt` if `ctxt` is a CondContext, the most nested CondContext containing
+    `ctxt`, or None if `ctxt` is not in a cond.
+  """
+  while ctxt:
+    if ctxt.IsCondContext(): return ctxt
+    ctxt = ctxt.outer_context
+  return None
+
+
 def IsContainingContext(ctxt, maybe_containing_ctxt):
   """Returns true if `maybe_containing_ctxt` is or contains `ctxt`."""
   while ctxt is not maybe_containing_ctxt:
diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index ececc53719..fd14740a00 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -35,6 +35,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import tensor_array_ops
@@ -816,7 +817,8 @@ def _dynamic_rnn_loop(cell,
   # time_steps is defined inside control flow, see the comment in
   # control_flow_ops.py.
   if (context.in_eager_mode() or
-      time_steps.op._get_control_flow_context() is None):  # pylint: disable=protected-access
+      not (control_flow_util.IsInWhileLoop(time_steps.op) or
+           control_flow_util.IsInCond(time_steps.op))):
     maximum_iterations = time_steps
   else:
     maximum_iterations = None
-- 
GitLab


From 32d5048ae96116202f2aa0fa739ef37514ee8a54 Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Fri, 15 Dec 2017 16:57:16 -0800
Subject: [PATCH 1110/1225] Average the loss across `replicate_model_fn`'s
 towers.

This avoids the need for users to add `loss = loss / num_of_towers` code and is in more in line with the current best practices.

I verified this by running cnn_mnist.

This has been rolled-back earlier but fixed (Python bug!) now.

PiperOrigin-RevId: 179257316
---
 tensorflow/contrib/estimator/BUILD            |   5 +-
 .../python/estimator/replicate_model_fn.py    |  75 ++++++++++---
 .../estimator/replicate_model_fn_test.py      | 101 +++++++++++++++++-
 3 files changed, 162 insertions(+), 19 deletions(-)

diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index ba272d7e88..bd65ece85d 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -331,16 +331,17 @@ py_library(
         "//tensorflow/python:device",
         "//tensorflow/python:device_lib",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:gradients",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
         "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:model_fn",
         "//tensorflow/python/estimator:util",
+        "//tensorflow/python/ops/losses",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
index ca3a2394ee..598bd549c5 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
@@ -41,21 +41,25 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import tf_logging
 from tensorflow.python.training import device_setter as device_setter_lib
 from tensorflow.python.training import training_util
 
 
-def replicate_model_fn(model_fn, optimizer_fn, devices=None):
+def replicate_model_fn(model_fn,
+                       optimizer_fn,
+                       loss_reduction=losses.Reduction.SUM,
+                       devices=None):
   """Replicate `Estimator.model_fn` over GPUs within a single host.
 
   The given `model_fn` specifies a single forward pass of a model.  To replicate
   such a model over GPUs, each GPU gets its own instance of the forward pass
   (a.k.a. a tower).  The input features and labels get sharded into the chunks
-  that correspond to the number of GPUs.  Each tower computes its own loss based
+  that correspond to the number of GPUs.  Each tower computes a loss based
   on its input.  For each such loss, gradients are computed.  After that, the
-  available losses are summed to form aggregated loss.  The available
-  gradients are summed too.  Then, they update weights using the specified
+  available losses are aggregated to form aggregated loss.  Available
+  gradients are summed.  Then, they update weights using the specified
   optimizer.
 
   If `devices` are `None`, then all available GPUs are going to be used for
@@ -102,7 +106,7 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
   On reduction algorithms:
   Certain algorithms were chosen for aggregating results of computations on
   multiple towers:
-    - Losses from all towers are reduced using sum.
+    - Losses from all towers are reduced according to `loss_reduction`.
     - Gradients are reduced using sum for each trainable variable.
     - `eval_metrics_ops` are reduced per metric using `reduce_mean`.
     - `EstimatorSpec.predictions` and `EstimatorSpec.export_outputs` are
@@ -124,6 +128,7 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
     optimizer_fn: a function that returns an optimizer instance.  The function
       may accept one `params` argument.  This is the `params` argument as
       defined by `Estimator`.  See  the `Estimator` documentation for details.
+    loss_reduction: controls whether losses are summed or averaged.
     devices: Optional list of devices to replicate the model across.  This
       argument can be used to replice only on the subset of available GPUs.
       If `None`, then all available GPUs are going to be used for replication.
@@ -137,9 +142,11 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
   return _replicate_model_fn_with_mode(
       model_fn,
       optimizer_fn,
+      loss_reduction,
       devices,
-      # TODO(isaprykin): Query system configuration to choose modes other than
-      # `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often appropriate.
+      # TODO(isaprykin): Query the system configuration to choose modes other
+      # than `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often
+      # appropriate.
       mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER)
 
 
@@ -171,9 +178,13 @@ class _VariableDistributionMode(object):
 def _replicate_model_fn_with_mode(
     model_fn,
     optimizer_fn,
+    loss_reduction=losses.Reduction.SUM,
     devices=None,
     mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER):
   """A version of `replicate_model_fn` that allows to specify a `mode`."""
+  if loss_reduction == losses.Reduction.NONE:
+    raise ValueError('Tower losses need to be reduced in some way, yet {} '
+                     'reduction is specified.'.format(loss_reduction))
   if not devices:
     devices = _get_local_devices('GPU') or _get_local_devices('CPU')
 
@@ -199,6 +210,7 @@ def _replicate_model_fn_with_mode(
         features=feature_shards,
         labels=label_shards,
         params=params,
+        loss_reduction=loss_reduction,
         config=config,
         devices=devices,
         local_ps_devices=ps_devices)
@@ -269,6 +281,7 @@ def _get_loss_towers(model_fn,
                      config,
                      devices,
                      local_ps_devices,
+                     loss_reduction=losses.Reduction.SUM,
                      name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN):
   """Replicate the loss computation across devices."""
   tower_specs = []
@@ -307,12 +320,15 @@ def _get_loss_towers(model_fn,
           if labels:
             labels_shard = labels[i]
 
-          tower_specs.append(
-              model_fn(
-                  mode=mode,
-                  features=features[i],
-                  labels=labels_shard,
-                  **optional_params))
+          tower_spec = model_fn(
+              mode=mode,
+              features=features[i],
+              labels=labels_shard,
+              **optional_params)
+          if loss_reduction != losses.Reduction.SUM:
+            tower_spec = _scale_tower_loss(
+                tower_spec, number_of_towers=len(devices))
+          tower_specs.append(tower_spec)
   return tower_specs
 
 
@@ -339,6 +355,17 @@ def _local_device_setter(worker_device, ps_devices, ps_strategy):
   return local_device_chooser
 
 
+def _scale_tower_loss(tower_spec, number_of_towers):
+  """Scale down the loss for arriving at the average loss by summing."""
+  if tower_spec.loss is None:
+    return tower_spec
+
+  estimator_spec = _asdict(tower_spec)
+  estimator_spec['loss'] = math_ops.div(
+      tower_spec.loss, 1.0 * number_of_towers, name='averaged_loss')
+  return model_fn_lib.EstimatorSpec(**estimator_spec)
+
+
 def _minimize_towers(tower_specs, optimizer):
   """Aggregate and apply gradients for computed losses."""
   grad_lists = {}
@@ -388,7 +415,7 @@ def _train_spec(tower_specs,
                 aggregation_device,
                 aggregated_loss_name='loss'):
   """Populate replicated EstimatorSpec for `GraphKeys.TRAIN`."""
-  estimator_spec = tower_specs[0]._asdict()
+  estimator_spec = _asdict(tower_specs[0])
   estimator_spec['mode'] = model_fn_lib.ModeKeys.TRAIN
   estimator_spec['train_op'] = train_op
   estimator_spec['loss'] = _compute_sum_on_device(
@@ -399,7 +426,7 @@ def _train_spec(tower_specs,
 
 def _eval_spec(tower_specs, aggregation_device, aggregated_loss_name='loss'):
   """Populate replicated EstimatorSpec for `GraphKeys.EVAL`."""
-  estimator_spec = tower_specs[0]._asdict()
+  estimator_spec = _asdict(tower_specs[0])
   estimator_spec['mode'] = model_fn_lib.ModeKeys.EVAL
   estimator_spec['loss'] = _compute_sum_on_device(
       [spec.loss for spec in tower_specs], aggregation_device,
@@ -467,7 +494,7 @@ def _reduce_metric_variables(number_of_towers):
 
 def _predict_spec(tower_specs, aggregation_device):
   """Populate replicated EstimatorSpec for `GraphKeys.PREDICT`."""
-  estimator_spec = tower_specs[0]._asdict()
+  estimator_spec = _asdict(tower_specs[0])
   estimator_spec['mode'] = model_fn_lib.ModeKeys.PREDICT
 
   with ops_lib.device(aggregation_device):
@@ -527,3 +554,19 @@ def _dict_concat(*dicts):
     for k, v in six.iteritems(d):
       list_dict.setdefault(k, []).append(v)
   return list_dict
+
+
+def _asdict(namedtuple):
+  """Returns a namedtuple as a dictionary.
+
+  This is required because `_asdict()` in Python 3.x.x is broken in classes
+  that inherit from `collections.namedtuple`. See
+  https://bugs.python.org/issue24931 for more details.
+
+  Args:
+    namedtuple: An object that inherits from `collections.namedtuple`.
+
+  Returns:
+    A dictionary version of the tuple.
+  """
+  return {k: getattr(namedtuple, k) for k in namedtuple._fields}
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
index a83a1b8407..b452e5c735 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
@@ -40,6 +40,7 @@ from tensorflow.python.framework import ops as ops_lib
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import losses
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics as metrics_lib
 from tensorflow.python.ops import variable_scope
@@ -221,13 +222,40 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
       total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
       self.assertEqual(total_loss, session.run(estimator_spec.loss))
 
-      # loss' of c is 3.
+      # derivative of loss = (1*c - 1) + (2*c - 2) is 3.
       # new value of c = 10 - learning rate * 3 = 7.0.
       session.run(estimator_spec.train_op)
       with variable_scope.variable_scope('', reuse=True):
         c = variable_scope.get_variable('c', dtype=dtypes.float64)
         self.assertEqual(7.0, session.run(c))
 
+  def test_train_with_mean_reduction(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.test_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn,
+          self.optimizer_fn,
+          losses.Reduction.MEAN,
+          devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+      session.run(variables.global_variables_initializer())
+
+      # loss = feature * c - label
+      total_loss = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)) / 2.0
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      # derivative of loss = (1*c - 1)/2 + (2*c - 2)/2 is 1.5.
+      # It's the same computation as without mean reduction, but the
+      # loss from every tower is scaled by 1/<number of towers>.
+      # new value of c = 10 - learning rate * 1.5 = 8.5
+      session.run(estimator_spec.train_op)
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(8.5, session.run(c))
+
   def test_train_spec_with_optimizer_without_params(self):
 
     def optimizer_fn_without_params():
@@ -276,6 +304,38 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
       self.assertEqual(0, auc)
       self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
 
+  def test_eval_with_mean_reduction(self):
+    features = np.array([[0.01], [0.002]])
+    labels = np.array([[0.01], [0.02]])
+
+    with self.test_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn,
+          self.optimizer_fn,
+          losses.Reduction.MEAN,
+          devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
+      session.run(variables.local_variables_initializer())
+      session.run(variables.global_variables_initializer())
+
+      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
+      auc, b = estimator_spec.eval_metric_ops['auc']
+
+      session.run([a, b])
+      accuracy = session.run(accuracy)
+      auc = session.run(auc)
+
+      # loss[i] = features[i] * 10 - labels[i].
+      # Accuracy is 0.0 (no match) in the first tower.
+      # Accuracy is 1.0 (match) in the second tower, since the feature
+      # times weight "c" happened to be equal to the label.
+      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) / 2.0
+
+      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
+      self.assertEqual(0, auc)
+      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
+
   def test_predict(self):
     features = np.array([[0.01], [0.002]])
     labels = np.array([[0.01], [0.02]])
@@ -356,6 +416,11 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
           'probabilities': np.array([[0.1], [0.02]])
       }, session.run(estimator_spec.predictions))
 
+  def test_unsupported_loss_reduction(self):
+    with self.assertRaisesRegexp(ValueError, ''):
+      _ = replicate_model_fn.replicate_model_fn(
+          self.model_fn, self.optimizer_fn, losses.Reduction.NONE)
+
 
 class GetLossTowersTest(test_util.TensorFlowTestCase):
 
@@ -406,6 +471,40 @@ class GetLossTowersTest(test_util.TensorFlowTestCase):
         c = variable_scope.get_variable('c', dtype=dtypes.float64)
         self.assertEqual(0.25, session.run(c))
 
+  def test_gradients_are_computed_with_mean_reduction(self):
+    with self.test_session() as session:
+      tower_specs = replicate_model_fn._get_loss_towers(
+          self.model_fn,
+          mode=model_fn_lib.ModeKeys.EVAL,
+          features=[[0.6], [1.6]],
+          labels=[[0.6], [0.6]],
+          params=None,
+          loss_reduction=losses.Reduction.MEAN,
+          config=None,
+          devices=['/gpu:0', '/gpu:1'],
+          local_ps_devices=['/gpu:0'],
+          name_scope_pattern='test_tower_{}')
+      session.run(variables.global_variables_initializer())
+
+      self.assertEqual(len(tower_specs), 2)
+
+      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
+      self.assertEqual('averaged_loss:0', tower_specs[0].loss.name)
+      self.assertEqual(0.5, session.run(tower_specs[0].loss))
+
+      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
+      self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name)
+      # The input batch for the second tower had a loss that is 1.0
+      # bigger: 0.6 vs 1.6.
+      self.assertEqual(1.0, session.run(tower_specs[1].loss))
+
+      self.assertEqual(1, len(variables.global_variables()))
+      self.assertEqual(1, len(variables.trainable_variables()))
+
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(0.25, session.run(c))
+
   def test_variables_are_round_robined_correctly(self):
     """Test that creates multiple variables and tests round-robin placement."""
 
-- 
GitLab


From d55f532867a3670d66460c5ee3b774519542adc1 Mon Sep 17 00:00:00 2001
From: Dandelion Man? <dandelion@google.com>
Date: Fri, 15 Dec 2017 17:12:41 -0800
Subject: [PATCH 1111/1225] Merge changes from github.

PiperOrigin-RevId: 179258973
---
 .gitignore                                    |   11 +-
 configure.py                                  |   35 +-
 tensorflow/c/c_api.cc                         |    2 +
 tensorflow/c/c_api_function.cc                |    4 +-
 .../tf2xla/kernels/tensor_array_ops.cc        |   87 +-
 tensorflow/compiler/xla/service/BUILD         |    2 -
 .../xla/service/copy_insertion_test.cc        |    2 +-
 .../compiler/xla/service/layout_assignment.cc |    2 +-
 tensorflow/compiler/xla/shape_tree.h          |    2 +-
 tensorflow/contrib/BUILD                      |    7 +-
 tensorflow/contrib/cmake/CMakeLists.txt       |   34 +-
 tensorflow/contrib/cmake/README.md            |   17 -
 .../contrib/cmake/external/gemmlowp.cmake     |    4 +-
 .../contrib/cmake/tf_core_kernels.cmake       |    3 -
 tensorflow/contrib/cmake/tf_tests.cmake       |    2 +
 .../contrib/factorization/python/ops/gmm.py   |    4 +-
 tensorflow/contrib/ffmpeg/__init__.py         |    1 +
 .../contrib/ffmpeg/default/ffmpeg_lib.cc      |    6 +-
 .../ffmpeg/default/ffmpeg_lib_utility_test.cc |    2 +
 tensorflow/contrib/ffmpeg/ffmpeg_ops.py       |    1 +
 .../estimator/python/gan_estimator_impl.py    |    7 +-
 .../gan/python/estimator/python/head_impl.py  |    6 +-
 .../layers/python/layers/initializers.py      |    3 +-
 .../learn/estimators/composable_model_test.py |    4 +-
 .../learn/python/learn/estimators/dnn.py      |    4 +-
 .../learn/estimators/estimator_input_test.py  |   10 +-
 .../python/learn/estimators/estimator_test.py |   26 +-
 .../learn/estimators/estimators_test.py       |    8 +-
 .../learn/python/learn/estimators/kmeans.py   |    4 +-
 .../learn/python/learn/estimators/linear.py   |    6 +-
 .../estimators/logistic_regressor_test.py     |    4 +-
 .../learn/python/learn/utils/export.py        |    6 +-
 .../linear_optimizer/python/sdca_estimator.py |    4 +-
 tensorflow/contrib/lite/README.md             |    5 +
 tensorflow/contrib/lite/ios_makefile.inc      |   78 +-
 .../contrib/lite/nnapi/NeuralNetworksShim.h   |    4 +-
 tensorflow/contrib/makefile/Makefile          |    2 +-
 tensorflow/contrib/nn/__init__.py             |    1 +
 .../contrib/nn/python/ops/sampling_ops.py     |  100 +
 tensorflow/contrib/opt/BUILD                  |   19 +
 tensorflow/contrib/opt/__init__.py            |    5 +-
 .../training/elastic_average_optimizer.py     |  345 +++
 .../elastic_average_optimizer_test.py         |  225 ++
 .../python/kernel_tests/core_rnn_cell_test.py |    3 +
 tensorflow/contrib/rnn/python/ops/rnn_cell.py |    1 +
 .../contrib/seq2seq/python/ops/helper.py      |    6 +-
 .../python/slim/data/dataset_data_provider.py |    4 +-
 .../timeseries/python/timeseries/head.py      |    4 +-
 tensorflow/core/common_runtime/function.cc    |    2 +-
 tensorflow/core/framework/numeric_types.h     |    2 +-
 .../core/framework/tensor_shape_test.cc       |    3 +-
 tensorflow/core/graph/mkl_graph_util.h        |   10 +-
 tensorflow/core/graph/mkl_layout_pass.cc      | 2083 +++++++++++++++++
 tensorflow/core/graph/mkl_layout_pass_test.cc | 1624 +++++++++++++
 tensorflow/core/kernels/logging_ops.cc        |    3 +-
 tensorflow/core/kernels/mkl_aggregate_ops.cc  |  204 +-
 tensorflow/core/kernels/mkl_avgpooling_op.cc  |  306 ++-
 tensorflow/core/kernels/mkl_concat_op.cc      |  374 ++-
 .../core/kernels/mkl_conv_grad_filter_ops.cc  |  317 +--
 .../core/kernels/mkl_conv_grad_input_ops.cc   |  244 +-
 tensorflow/core/kernels/mkl_conv_ops.cc       |  149 +-
 tensorflow/core/kernels/mkl_conv_ops.h        |  269 ++-
 .../core/kernels/mkl_fused_batch_norm_op.cc   |  652 +++++-
 tensorflow/core/kernels/mkl_identity_op.cc    |   33 +
 .../core/kernels/mkl_input_conversion_op.cc   |  217 +-
 tensorflow/core/kernels/mkl_lrn_op.cc         |    2 +-
 tensorflow/core/kernels/mkl_maxpooling_op.cc  |  357 ++-
 .../core/kernels/mkl_pooling_ops_common.cc    |   38 +-
 .../core/kernels/mkl_pooling_ops_common.h     |  342 +++
 tensorflow/core/kernels/mkl_relu_op.cc        |  505 +++-
 tensorflow/core/kernels/mkl_reshape_op.cc     |  182 ++
 tensorflow/core/kernels/quantized_conv_ops.cc |    7 +
 .../core/lib/io/snappy/snappy_outputbuffer.cc |    3 +-
 tensorflow/core/ops/nn_ops.cc                 |  173 ++
 tensorflow/core/platform/cloud/BUILD          |   14 +
 .../core/platform/cloud/gcs_dns_cache.cc      |   32 +-
 .../core/platform/cloud/gcs_file_system.cc    |   21 +-
 .../platform/cloud/google_auth_provider.cc    |    5 +-
 .../core/platform/cloud/oauth_client.cc       |    4 +
 tensorflow/core/platform/cloud/time_util.cc   |    3 +
 .../core/platform/default/build_config.bzl    |    1 -
 .../android_armv7a_cpu_utils_helper.cc        |    3 +-
 tensorflow/core/platform/s3/s3_file_system.cc |   10 +-
 tensorflow/core/util/mkl_util.h               |  313 ++-
 .../docs_src/api_guides/python/image.md       |    1 +
 .../api_guides/python/reading_data.md         |   27 +-
 .../docs_src/get_started/mnist/mechanics.md   |    8 +-
 .../docs_src/programmers_guide/estimators.md  |    2 +-
 .../docs_src/programmers_guide/variables.md   |    2 +-
 tensorflow/examples/android/build.gradle      |    6 +-
 .../android/gradle/wrapper/gradle-wrapper.jar |  Bin 0 -> 53636 bytes
 .../gradle/wrapper/gradle-wrapper.properties  |    6 +
 tensorflow/examples/android/gradlew           |  160 ++
 tensorflow/examples/android/gradlew.bat       |   90 +
 .../org/tensorflow/demo/CameraActivity.java   |    8 +-
 .../reading_data/fully_connected_reader.py    |  125 +-
 .../wav_to_spectrogram/wav_to_spectrogram.cc  |    7 +-
 tensorflow/go/graph.go                        |   17 +-
 tensorflow/go/graph_test.go                   |   22 +-
 tensorflow/python/client/session.py           |   18 +-
 tensorflow/python/client/session_test.py      |    6 +
 tensorflow/python/debug/BUILD                 |    1 +
 tensorflow/python/estimator/training_test.py  |    4 +-
 tensorflow/python/estimator/util.py           |    2 +-
 tensorflow/python/framework/function.py       |    4 +-
 .../keras/applications/inception_resnet_v2.py |    2 +-
 .../_impl/keras/applications/mobilenet.py     |    4 +-
 .../_impl/keras/applications/resnet50.py      |    2 +-
 .../keras/_impl/keras/applications/vgg16.py   |    4 +-
 .../keras/_impl/keras/applications/vgg19.py   |    4 +-
 .../_impl/keras/applications/xception.py      |    4 +-
 .../python/keras/_impl/keras/layers/core.py   |    2 +-
 .../keras/_impl/keras/layers/core_test.py     |    5 +
 tensorflow/python/kernel_tests/BUILD          |    4 +
 .../kernel_tests/summary_image_op_test.py     |    1 -
 tensorflow/python/ops/image_ops_impl.py       |   29 +-
 tensorflow/python/ops/image_ops_test.py       |  113 +
 tensorflow/python/ops/logging_ops.py          |    6 +-
 tensorflow/python/ops/nn_impl.py              |   13 +-
 .../python/ops/quantized_conv_ops_test.py     |    2 +-
 tensorflow/python/ops/quantized_ops_test.py   |   57 +
 .../python/training/learning_rate_decay.py    |   13 +-
 tensorflow/python/util/tf_inspect.py          |   20 +
 tensorflow/stream_executor/dnn.h              |    2 +-
 tensorflow/tensorflow.bzl                     |   43 +-
 tensorflow/tools/benchmark/benchmark_model.cc |    2 +-
 tensorflow/tools/ci_build/Dockerfile.gpu      |    4 +-
 .../tools/ci_build/Dockerfile.gpu_clang       |    4 +-
 tensorflow/tools/ci_build/builds/pip.sh       |   21 +-
 .../tools/ci_build/builds/print_build_info.sh |    2 +-
 .../tools/ci_build/builds/test_user_ops.sh    |    6 +-
 .../gpu_build/parallel_gpu_execute.sh         |    2 +-
 .../ci_build/install/install_deb_packages.sh  |    1 +
 .../ci_build/install/install_pip_packages.sh  |    3 +
 .../install/install_python3.5_pip_packages.sh |    2 +
 .../install/install_python3.6_pip_packages.sh |    2 +
 .../tools/ci_build/linux/gpu/run_cc_core.sh   |    2 +
 .../tools/ci_build/linux/gpu/run_py3_core.sh  |    2 +
 .../tools/ci_build/pi/build_raspberry_pi.sh   |    1 +
 .../ci_build/remote/remote_docker_build.sh    |    2 +-
 .../ci_build/windows/bazel/bazel_test_lib.sh  |    6 +-
 .../ci_build/windows/bazel/common_env.sh      |   16 -
 .../windows/cpu/bazel/run_cc_test_windows.sh  |    6 +-
 .../windows/cpu/pip/build_tf_windows.sh       |    9 +-
 .../windows/gpu/bazel/run_cc_test_windows.sh  |    4 +-
 .../windows/gpu/pip/build_tf_windows.sh       |    9 +-
 .../ci_build/windows/libtensorflow_cpu.sh     |    3 +-
 .../tools/ci_build/xla/linux/gpu/run_py3.sh   |    2 +
 tensorflow/tools/docker/Dockerfile.devel      |    5 +-
 tensorflow/tools/docker/Dockerfile.devel-gpu  |   26 +-
 tensorflow/tools/docker/Dockerfile.gpu        |    2 +-
 .../docker/parameterized_docker_build.sh      |    2 +-
 tensorflow/tools/docs/generate_lib.py         |    4 +-
 tensorflow/tools/pip_package/BUILD            |   21 +
 .../tools/pip_package/build_pip_package.sh    |    4 +-
 .../tools/pip_package/pip_smoke_test.py       |    1 +
 .../proto_text/gen_proto_text_functions.cc    |    1 +
 tensorflow/workspace.bzl                      |   16 +-
 third_party/curl.BUILD                        |   26 +-
 third_party/pcre.BUILD                        |    2 +-
 160 files changed, 9758 insertions(+), 938 deletions(-)
 create mode 100644 tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
 create mode 100644 tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
 create mode 100644 tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar
 create mode 100644 tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
 create mode 100644 tensorflow/examples/android/gradlew
 create mode 100644 tensorflow/examples/android/gradlew.bat
 create mode 100644 tensorflow/python/ops/quantized_ops_test.py

diff --git a/.gitignore b/.gitignore
index d11a504bdc..be75938ec4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,7 @@ node_modules
 /bazel-*
 /bazel_pip
 /tools/python_bin_path.sh
-/tools/git/gen
+/tensorflow/tools/git/gen
 /pip_test
 /_python_build
 *.pyc
@@ -26,4 +26,11 @@ Podfile.lock
 /tensorflow/contrib/lite/gen/**
 /tensorflow/contrib/lite/examples/ios/simple/data/*.txt
 /tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
-xcuserdata/**
\ No newline at end of file
+xcuserdata/**
+
+# Android
+.gradle
+.idea
+*.iml
+local.properties
+gradleBuild
diff --git a/configure.py b/configure.py
index 3d553e1c14..336559532d 100644
--- a/configure.py
+++ b/configure.py
@@ -36,8 +36,8 @@ _TF_BAZELRC = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                            '.tf_configure.bazelrc')
 _TF_WORKSPACE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              'WORKSPACE')
-_DEFAULT_CUDA_VERSION = '8.0'
-_DEFAULT_CUDNN_VERSION = '6'
+_DEFAULT_CUDA_VERSION = '9.0'
+_DEFAULT_CUDNN_VERSION = '7'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2'
 _DEFAULT_CUDA_PATH = '/usr/local/cuda'
 _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
@@ -1096,6 +1096,27 @@ def set_computecpp_toolkit_path(environ_cp):
   write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
                               computecpp_toolkit_path)
 
+def set_trisycl_include_dir(environ_cp):
+  """Set TRISYCL_INCLUDE_DIR"""
+  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
+                             'include directory. (Use --config=sycl_trisycl '
+                             'when building with Bazel) '
+                             '[Default is %s]: '
+                             ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
+  while True:
+    trisycl_include_dir = get_from_env_or_user_or_default(
+      environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
+      _DEFAULT_TRISYCL_INCLUDE_DIR)
+    if os.path.exists(trisycl_include_dir):
+      break
+
+    print('Invalid triSYCL include directory, %s cannot be found'
+          % (trisycl_include_dir))
+
+  # Set TRISYCL_INCLUDE_DIR
+  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
+  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
+                              trisycl_include_dir)
 
 def set_trisycl_include_dir(environ_cp):
   """Set TRISYCL_INCLUDE_DIR."""
@@ -1211,6 +1232,15 @@ def create_android_bazelrc_configs():
 def set_grpc_build_flags():
   write_to_bazelrc('build --define grpc_no_ares=true')
 
+def set_windows_build_flags():
+  if is_windows():
+    # The non-monolithic build is not supported yet
+    write_to_bazelrc('build --config monolithic')
+    # Suppress warning messages
+    write_to_bazelrc('build --copt=-w --host_copt=-w')
+    # Output more verbose information when something goes wrong
+    write_to_bazelrc('build --verbose_failures')
+
 
 def main():
   # Make a copy of os.environ to be clear when functions and getting and setting
@@ -1289,6 +1319,7 @@ def main():
   set_cc_opt_flags(environ_cp)
   set_mkl()
   set_monolithic()
+  set_windows_build_flags()
   create_android_bazelrc_configs()
 
   if workspace_has_any_android_rule():
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 6f5abd074c..9b57047028 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -579,6 +579,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
       status->status = InvalidArgument(
           "invalid string tensor encoding (string #", i, " of ",
           srcarray.size(), "): ", status->status.error_message());
+      delete[] base;
       return nullptr;
     }
     dst += consumed;
@@ -588,6 +589,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
     status->status = InvalidArgument(
         "invalid string tensor encoding (decoded ", (dst - base),
         " bytes, but the tensor is encoded in ", size, " bytes");
+    delete[] base;
     return nullptr;
   }
 
diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index b9312c2974..d60d1de315 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -68,7 +68,7 @@ class NodeNameMapping {
   // This is a superset of values in name_mapping_.
   std::unordered_set<string> used_names_;
   // Mapping from original node name from the graph to the normalized
-  // and uniqified version of it.
+  // and uniquified version of it.
   std::unordered_map<string, string> name_mapping_;
 };
 
@@ -236,7 +236,7 @@ Status FillFunctionBody(
 }
 
 // Graph to FunctionDef conversion. This code is closely modeled on the Python
-// code in third_party/tensorflow/python/framework/function.py.
+// code in tensorflow/python/framework/function.py.
 Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
                           bool append_hash_to_fn_name,
                           const std::vector<const Node*>& body_nodes,
diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
index 351fda2517..03c22354a9 100644
--- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
@@ -311,6 +311,32 @@ class TensorArrayGatherOp : public XlaOpKernel {
 
     xla::ComputationDataHandle ta = resource->value;
 
+    // Look for the case where the gather takes a simple slice from the
+    // tensor array (0, 1, 2, 3, 4, ..., N)
+    std::vector<int64> const_indices;
+    Status status = ctx->ConstantInputAsIntVector(1, &const_indices);
+    if (status.ok()) {
+      bool gather_is_dense_slice = true;
+      for (auto i = 0; i < const_indices.size(); i++) {
+        if (const_indices[i] != i) {
+          gather_is_dense_slice = false;
+          break;
+        }
+      }
+
+      if (gather_is_dense_slice) {
+        std::vector<int64> begin(ta_shape.dims(), 0);
+        std::vector<int64> strides(ta_shape.dims(), 1);
+        std::vector<int64> end(ta_shape.dims(), 1);
+        end[0] = const_indices.size();
+        for (auto i = 1; i < ta_shape.dims(); i++) {
+          end[i] = ta_shape.dim_size(i);
+        }
+        ctx->SetOutput(0, b->Slice(ta, begin, end, strides));
+        return;
+      }
+    }
+
     xla::ComputationDataHandle gather = XlaComputeGatherDynamicSlice(
         ctx, ta, ta_shape, indices, indices_shape, 0, dtype_, index_type, b);
     ctx->SetOutput(0, gather);
@@ -352,28 +378,47 @@ class TensorArrayScatterOp : public XlaOpKernel {
     const xla::ComputationDataHandle value = ctx->Input(2);
     const xla::ComputationDataHandle flow = ctx->Input(3);
 
-    auto slice_dims = value_shape.dim_sizes();
-    slice_dims[0] = 1LL;
-
-    std::vector<int64> value_starts(value_shape.dims(), 0);
-    auto value_ends = value_shape.dim_sizes();
-
-    std::vector<int64> value_strides(value_shape.dims(), 1);
-
-    // For every (index, value) pair, update the corresponding TensorArray
-    // storage.
-    for (int i = 0; i < num_indices; ++i) {
-      // Slice out part of the value.
-      value_starts[0] = i;
-      value_ends[0] = i + 1;
-      auto slice = b->Slice(value, value_starts, value_ends, value_strides);
+    // Look for the case where the scatter is for each sub-tensor in order. The
+    // tensor array implementation allows for this to be a straight addition.
+    bool scatter_all_elements_in_order = false;
+    std::vector<int64> const_indices;
+    Status status = ctx->ConstantInputAsIntVector(1, &const_indices);
+    if (status.ok() && num_indices == value_shape.dim_size(0)) {
+      scatter_all_elements_in_order = true;
+      for (auto i = 0; i < num_indices; i++) {
+        if (const_indices[i] != i) {
+          scatter_all_elements_in_order = false;
+          break;
+        }
+      }
+    }
 
-      // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
-      auto index = b->Slice(indices, {i}, {i + 1}, {1});
-      auto start_indices =
-          b->Pad(b->Reshape(index, {1}), b->ConstantR0<int32>(0),
-                 xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
-      ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
+    if (scatter_all_elements_in_order) {
+      ta = b->Add(ta, value);
+    } else {
+      auto slice_dims = value_shape.dim_sizes();
+      slice_dims[0] = 1LL;
+
+      std::vector<int64> value_starts(value_shape.dims(), 0);
+      auto value_ends = value_shape.dim_sizes();
+
+      std::vector<int64> value_strides(value_shape.dims(), 1);
+
+      // For every (index, value) pair, update the corresponding TensorArray
+      // storage.
+      for (int i = 0; i < num_indices; ++i) {
+        // Slice out part of the value.
+        value_starts[0] = i;
+        value_ends[0] = i + 1;
+        auto slice = b->Slice(value, value_starts, value_ends, value_strides);
+
+        // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
+        auto index = b->Slice(indices, {i}, {i + 1}, {1});
+        auto start_indices =
+                b->Pad(b->Reshape(index, {1}), b->ConstantR0<int32>(0),
+                       xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
+        ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
+      }
     }
 
     resource->value = ta;
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 07ef98076e..3655a08cf3 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -90,8 +90,6 @@ cc_library(
         ":shape_inference",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:status",
-        "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index 3278fd5f06..8388574716 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -339,7 +339,7 @@ TEST_F(CopyInsertionTest, ElementOfNestedTupleParameter) {
            ShapeUtil::MakeShape(F32, {42})}),
       "param0"));
 
-  // The return value of the computation is the zero-th elemnt of the nested
+  // The return value of the computation is the zero-th element of the nested
   // tuple. This element is itself a tuple.
   auto gte = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
       ShapeUtil::GetSubshape(param->shape(), {0}), param, 0));
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index af726271ae..328afe42ba 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -1303,7 +1303,7 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints,
     TF_RET_CHECK(LayoutUtil::HasLayout(instruction->shape()));
   }
 
-  // Copy the root instrucion's result if the it does not match the result
+  // Copy the root instruction's result if the it does not match the result
   // layout constraint
   if (constraints.ResultLayout() != nullptr &&
       !constraints.ResultLayout()->MatchesLayoutInShape(
diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h
index bf8d190150..d752619bd6 100644
--- a/tensorflow/compiler/xla/shape_tree.h
+++ b/tensorflow/compiler/xla/shape_tree.h
@@ -238,7 +238,7 @@ class ShapeTree {
   //           (or compatible).
   //   index : the index of the element in the shape. See ShapeUtil::GetSubshape
   //           for definition of index.
-  //   data : The data value at this elemnt.
+  //   data : The data value at this element.
   template <typename Fn>
   void ForEachElement(const Fn& func) const;
 
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 61f7821519..604c41bf8a 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -9,7 +9,12 @@ load("//third_party/mpi:mpi.bzl", "if_mpi")
 
 py_library(
     name = "contrib_py",
-    srcs = glob(["**/*.py"]),
+    srcs = glob(
+        ["**/*.py"],
+        exclude = [
+            "**/*_test.py",
+        ],
+    ),
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = [
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 77a3fc0c83..481caf6bb0 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -18,7 +18,6 @@ cmake_policy(SET CMP0022 NEW)
 
 # Options
 option(tensorflow_VERBOSE "Enable for verbose output" OFF)
-option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
 option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF)
 option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON)
 option(tensorflow_ENABLE_HDFS_SUPPORT "Enable HDFS support" OFF)
@@ -34,6 +33,12 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF)
 option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON)
 option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions")
 option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON)
+
+# GPU, CUDA and cuDNN options
+option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
+option(tensorflow_CUDA_VERSION "CUDA version to build against" 9.0)
+option(tensorflow_CUDNN_VERSION "cuDNN version to build against" 7)
+
 if(HAIKU)
 	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF)
 else()
@@ -262,7 +267,7 @@ if (tensorflow_ENABLE_GPU)
     list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs")
   endif (NOT WIN32)
 
-  find_package(CUDA 8.0 REQUIRED)
+  find_package(CUDA ${tensorflow_CUDA_VERSION} REQUIRED)
 
   # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
   # CUDA_NVCC_FLAGS and cuda_config.h below
@@ -316,13 +321,16 @@ if (tensorflow_ENABLE_GPU)
       ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY})
   endif (WIN32)
 
+  # Remove "." from CUDA version variable.
+  string(REPLACE "." "" short_CUDA_VER ${tensorflow_CUDA_VERSION})
+
   # create cuda_config.h
   FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
     "#ifndef CUDA_CUDA_CONFIG_H_\n"
     "#define CUDA_CUDA_CONFIG_H_\n"
     "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
-    "#define TF_CUDA_VERSION \"64_80\"\n"
-    "#define TF_CUDNN_VERSION \"64_6\"\n"
+    "#define TF_CUDA_VERSION \"64_${short_CUDA_VER}\"\n"
+    "#define TF_CUDNN_VERSION \"64_${tensorflow_CUDNN_VERSION}\"\n"
     "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
     "#endif  // CUDA_CUDA_CONFIG_H_\n"
   )
@@ -360,15 +368,15 @@ if (tensorflow_ENABLE_GPU)
   if(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
       msvcp_dll_name=msvcp140.dll
-      cudart_dll_name=cudart64_80.dll
-      cuda_version_number=8.0
+      cudart_dll_name=cudart64_${short_CUDA_VER}.dll
+      cuda_version_number=${tensorflow_CUDA_VERSION}
       nvcuda_dll_name=nvcuda.dll
-      cudnn_dll_name=cudnn64_6.dll
-      cudnn_version_number=6)
+      cudnn_dll_name=cudnn64_${tensorflow_CUDNN_VERSION}.dll
+      cudnn_version_number=${tensorflow_CUDNN_VERSION})
   else(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
-      cuda_version_number=8.0
-      cudnn_version_number=6)
+	    cuda_version_number=${tensorflow_CUDA_VERSION}
+	    cudnn_version_number=${tensorflow_CUDNN_VERSION})
   endif(WIN32)
 else(tensorflow_ENABLE_GPU)
   set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value
@@ -383,11 +391,7 @@ endif()
 
 # Let's get to work!
 include(tf_core_framework.cmake)
-# NOTE: Disabled until issue #3996 is fixed.
-# include(tf_stream_executor.cmake)
-if (tensorflow_ENABLE_GPU)
-    include(tf_stream_executor.cmake)
-endif()
+include(tf_stream_executor.cmake)
 
 include(tf_core_cpu.cmake)
 include(tf_core_ops.cmake)
diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 4ddfec5960..4be733a280 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -19,23 +19,6 @@ for instructions on how to install a pre-built TensorFlow package on Windows.
 ### Current known limitations
 * It is not possible to load a custom Op library.
 * GCS file system is not supported.
-* The following Ops are not currently implemented:
- - Dequantize
- - QuantizeAndDequantize
- - QuantizedAvgPool
- - QuantizedBatchNomWithGlobalNormalization
- - QuantizedBiasAdd
- - QuantizedConcat
- - QuantizedConv2D
- - QuantizedMatmul
- - QuantizedMaxPoo
- - QuantizeDownAndShrinkRange
- - QuantizedRelu
- - QuantizedRelu6
- - QuantizedReshape
- - QuantizeV2
- - RequantizationRange
- - Requantize
 
 ## Building with CMake
 
diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake
index 3b146657bf..a235442dc5 100644
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@@ -14,8 +14,8 @@
 # ==============================================================================
 include (ExternalProject)
 
-set(gemmlowp_URL https://mirror.bazel.build/github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip)
-set(gemmlowp_HASH SHA256=dd2557072bde12141419cb8320a9c25e6ec41a8ae53c2ac78c076a347bb46d9d)
+set(gemmlowp_URL https://github.com/google/gemmlowp/archive/6a2a90822e8546fc2bfa7044de0faf1c1cb4862f.zip)
+set(gemmlowp_HASH SHA256=3447948d219f3270383766bbe08942888c0eb4e0ca6663c0e0548502ec5bb77d)
 set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index 2d015908a8..eb6bf567aa 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -150,9 +150,6 @@ list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs})
 if(WIN32)
   file(GLOB_RECURSE tf_core_kernels_windows_exclude_srcs
       # not working on windows yet
-      "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
       "${tensorflow_source_dir}/tensorflow/core/kernels/neon/*"
       # not in core - those are loaded dynamically as dll
       "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc"
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 9ed5b4b9de..94ca4b0017 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -146,6 +146,8 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     "${tensorflow_source_dir}/tensorflow/contrib/estimator/python/estimator/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/kernel_tests/*.py"
     "${tensorflow_source_dir}/tensorflow/python/meta_graph_transform/*_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/ops/quantized_conv_ops_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/ops/quantized_ops_test.py"
     "${tensorflow_source_dir}/tensorflow/python/platform/build_info_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/internal/*_test.py"
diff --git a/tensorflow/contrib/factorization/python/ops/gmm.py b/tensorflow/contrib/factorization/python/ops/gmm.py
index 0d67e09f81..f72280c4ec 100644
--- a/tensorflow/contrib/factorization/python/ops/gmm.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm.py
@@ -24,7 +24,7 @@ import numpy as np
 from tensorflow.contrib import framework
 from tensorflow.contrib.factorization.python.ops import gmm_ops
 from tensorflow.contrib.framework.python.framework import checkpoint_utils
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
 from tensorflow.python.framework import constant_op
@@ -167,7 +167,7 @@ class GMM(estimator.Estimator):
                                      self._num_clusters, self._random_seed,
                                      self._covariance_type,
                                      self._params)
-      incr_step = state_ops.assign_add(variables.get_global_step(), 1)
+      incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
       loss = math_ops.reduce_sum(losses)
       training_op = with_dependencies([training_op, incr_step], loss)
       training_hooks = [_InitializeClustersHook(
diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py
index 484ffee3e7..daba965a98 100644
--- a/tensorflow/contrib/ffmpeg/__init__.py
+++ b/tensorflow/contrib/ffmpeg/__init__.py
@@ -28,6 +28,7 @@ from __future__ import print_function
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio
+from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
 
 from tensorflow.python.util.all_util import remove_undocumented
 
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
index 1245f515fe..1e8af1458c 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
@@ -49,7 +49,8 @@ std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
           "-nostdin",             // No interactive commands accepted.
           "-f", input_format_id,  // eg: "mp3"
           "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
-          "-loglevel", "info",  // Enable verbose logging to support debugging.
+          "-loglevel", "error",   // Print errors only.
+          "-hide_banner",         // Skip printing build options, version, etc.
           "-map_metadata", "-1",  // Copy global metadata from input to output.
           "-vn",                  // No video recording.
           "-ac:a:0", StrCat(channel_count), "-ar:a:0",
@@ -72,7 +73,8 @@ std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
           "-probesize",
           StrCat(kDefaultProbeSize),
           "-loglevel",
-          "info",  // Enable verbose logging to support debugging.
+          "error",  // Print errors only.
+          "-hide_banner",  // Skip printing build options, version, etc.
           "-vcodec",
           "rawvideo",
           "-pix_fmt",
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
index d6c885a324..36fc71794b 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
@@ -20,6 +20,8 @@
 #include <string>
 #include <vector>
 
+
+#include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
index 5bb011f41c..08b5a6ea48 100644
--- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py
 from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
 from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py
+from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
 from tensorflow.contrib.util import loader
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import resource_loader
diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index eef66af7f9..d3dca3d9e7 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -107,6 +107,7 @@ class GANEstimator(estimator.Estimator):
                discriminator_loss_fn=None,
                generator_optimizer=None,
                discriminator_optimizer=None,
+               get_hooks_fn=None,
                add_summaries=None,
                use_loss_summaries=True,
                config=None):
@@ -137,6 +138,10 @@ class GANEstimator(estimator.Estimator):
         work.
       discriminator_optimizer: Same as `generator_optimizer`, but for the
         discriminator updates.
+      get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a
+        list of hooks. These hooks are run on the generator and discriminator
+        train ops, and can be used to implement the GAN training scheme.
+        Defaults to `train.get_sequential_train_hooks()`.
       add_summaries: `None`, a single `SummaryType`, or a list of `SummaryType`.
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
@@ -151,7 +156,7 @@ class GANEstimator(estimator.Estimator):
               else discriminator_optimizer)
       gan_head = head_lib.gan_head(
           generator_loss_fn, discriminator_loss_fn, gopt, dopt,
-          use_loss_summaries)
+          use_loss_summaries, get_hooks_fn=get_hooks_fn)
       return _gan_model_fn(
           features, labels, mode, generator_fn, discriminator_fn, gan_head,
           add_summaries)
diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
index 204c646e19..a21358c50b 100644
--- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
@@ -71,7 +71,7 @@ class GANHead(head._Head):  # pylint: disable=protected-access
   def __init__(self, generator_loss_fn, discriminator_loss_fn,
                generator_optimizer, discriminator_optimizer,
                use_loss_summaries=True,
-               get_hooks_fn=tfgan_train.get_sequential_train_hooks(),
+               get_hooks_fn=None,
                name=None):
     """`Head` for GAN training.
 
@@ -86,10 +86,12 @@ class GANHead(head._Head):  # pylint: disable=protected-access
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
       get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list
-        of hooks.
+        of hooks. Defaults to `train.get_sequential_train_hooks()`
       name: name of the head. If provided, summary and metrics keys will be
         suffixed by `"/" + name`.
     """
+    if get_hooks_fn is None:
+      get_hooks_fn = tfgan_train.get_sequential_train_hooks()
     # TODO(joelshor): Validate inputs.
 
     if use_loss_summaries in [True, False]:
diff --git a/tensorflow/contrib/layers/python/layers/initializers.py b/tensorflow/contrib/layers/python/layers/initializers.py
index b12a882d9a..51610f21b2 100644
--- a/tensorflow/contrib/layers/python/layers/initializers.py
+++ b/tensorflow/contrib/layers/python/layers/initializers.py
@@ -79,7 +79,8 @@ def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False,
   ```
 
   * To get [Delving Deep into Rectifiers](
-     http://arxiv.org/pdf/1502.01852v1.pdf), use (Default):<br/>
+     http://arxiv.org/pdf/1502.01852v1.pdf) (also know as the "MSRA 
+     initialization"), use (Default):<br/>
     `factor=2.0 mode='FAN_IN' uniform=False`
   * To get [Convolutional Architecture for Fast Feature Embedding](
      http://arxiv.org/abs/1408.5093), use:<br/>
diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
index 14750961ef..ef5e620e8f 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.datasets import base
 from tensorflow.contrib.learn.python.learn.estimators import composable_model
@@ -55,7 +55,7 @@ def _base_model_fn(features, labels, mode, params):
     raise NotImplementedError
 
   def _train_op_fn(loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     assert global_step
     train_step = model.get_train_step(loss)
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
index cb15ef23e9..c17b41c0f7 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
@@ -23,7 +23,7 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import metric_spec
@@ -189,7 +189,7 @@ def _dnn_model_fn(features, labels, mode, params, config=None):
       """Returns the op to optimize the loss."""
       return optimizers.optimize_loss(
           loss=loss,
-          global_step=contrib_variables.get_global_step(),
+          global_step=training_util.get_global_step(),
           learning_rate=_LEARNING_RATE,
           optimizer=_get_optimizer(optimizer),
           gradient_multipliers=(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
index 248c6c733f..9d7c1a099a 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
@@ -23,7 +23,7 @@ import tempfile
 
 import numpy as np
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import metric_spec
 from tensorflow.contrib.learn.python.learn import models
@@ -114,7 +114,7 @@ def linear_model_params_fn(features, labels, mode, params):
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
       loss,
-      variables.get_global_step(),
+      training_util.get_global_step(),
       optimizer='Adagrad',
       learning_rate=params['learning_rate'])
   return prediction, loss, train_op
@@ -129,7 +129,7 @@ def linear_model_fn(features, labels, mode):
     (_, features), = features.items()
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return prediction, loss, train_op
 
 
@@ -139,7 +139,7 @@ def linear_model_fn_with_model_fn_ops(features, labels, mode):
                   model_fn.ModeKeys.INFER)
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return model_fn.ModelFnOps(
       mode=mode, predictions=prediction, loss=loss, train_op=train_op)
 
@@ -150,7 +150,7 @@ def logistic_model_no_mode_fn(features, labels):
   labels = array_ops.one_hot(labels, 3, 1, 0)
   prediction, loss = (models.logistic_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return {
       'class': math_ops.argmax(prediction, 1),
       'prob': prediction
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
index be2b0cb3ca..2a13a84627 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@@ -32,7 +32,7 @@ from google.protobuf import text_format
 
 from tensorflow.contrib import learn
 from tensorflow.contrib import lookup
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import experiment
@@ -132,7 +132,7 @@ def linear_model_params_fn(features, labels, mode, params):
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
       loss,
-      variables.get_global_step(),
+      training_util.get_global_step(),
       optimizer='Adagrad',
       learning_rate=params['learning_rate'])
   return prediction, loss, train_op
@@ -147,7 +147,7 @@ def linear_model_fn(features, labels, mode):
     (_, features), = features.items()
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return prediction, loss, train_op
 
 
@@ -157,7 +157,7 @@ def linear_model_fn_with_model_fn_ops(features, labels, mode):
                   model_fn.ModeKeys.INFER)
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return model_fn.ModelFnOps(
       mode=mode, predictions=prediction, loss=loss, train_op=train_op)
 
@@ -168,7 +168,7 @@ def logistic_model_no_mode_fn(features, labels):
   labels = array_ops.one_hot(labels, 3, 1, 0)
   prediction, loss = (models.logistic_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return {
       'class': math_ops.argmax(prediction, 1),
       'prob': prediction
@@ -241,7 +241,7 @@ def _build_estimator_for_resource_export_test():
     const = constant_op.constant(-1, dtype=dtypes.int64)
     table = lookup.MutableHashTable(
         dtypes.string, dtypes.int64, const, name='LookupTableModel')
-    update_global_step = variables.get_global_step().assign_add(1)
+    update_global_step = training_util.get_global_step().assign_add(1)
     if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL):
       key = constant_op.constant(['key'])
       value = constant_op.constant([42], dtype=dtypes.int64)
@@ -306,7 +306,7 @@ def _model_fn_ops(
         mode=mode,
         predictions=constant_op.constant(0.),
         loss=constant_op.constant(0.),
-        train_op=variables.get_global_step().assign_add(1))
+        train_op=training_util.get_global_step().assign_add(1))
 
 
 def _make_input_fn(features, labels):
@@ -389,7 +389,7 @@ class EstimatorModelFnTest(test.TestCase):
       self.assertEqual(expected_param, params)
       self.assertEqual(model_dir, expected_model_dir)
       return (constant_op.constant(0.), constant_op.constant(0.),
-              variables.get_global_step().assign_add(1))
+              training_util.get_global_step().assign_add(1))
     est = estimator.Estimator(model_fn=_argument_checker,
                               params=expected_param,
                               model_dir=expected_model_dir)
@@ -400,7 +400,7 @@ class EstimatorModelFnTest(test.TestCase):
     def _invalid_model_fn(features, labels):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         loss = 100.0 - w
       return None, loss, None
@@ -415,7 +415,7 @@ class EstimatorModelFnTest(test.TestCase):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
       loss = 100.0 - w
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         train_op = w.assign_add(loss / 100.0)
       predictions = loss
@@ -434,7 +434,7 @@ class EstimatorModelFnTest(test.TestCase):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
       loss = 100.0 - w
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         train_op = w.assign_add(loss / 100.0)
       return None, loss, train_op
@@ -464,7 +464,7 @@ class EstimatorModelFnTest(test.TestCase):
           mode=mode,
           predictions=constant_op.constant(0.),
           loss=constant_op.constant(0.),
-          train_op=variables.get_global_step().assign_add(1),
+          train_op=training_util.get_global_step().assign_add(1),
           scaffold=monitored_session.Scaffold(init_fn=_init_fn))
 
     est = estimator.Estimator(model_fn=_model_fn_scaffold)
@@ -483,7 +483,7 @@ class EstimatorModelFnTest(test.TestCase):
           mode=mode,
           predictions=constant_op.constant([[1.]]),
           loss=constant_op.constant(0.),
-          train_op=variables.get_global_step().assign_add(1),
+          train_op=training_util.get_global_step().assign_add(1),
           scaffold=monitored_session.Scaffold(saver=self.mock_saver))
 
     def input_fn():
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
index 1d89dfb55b..8131e0fde6 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
@@ -22,7 +22,7 @@ import random
 
 import numpy as np
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python import learn
 from tensorflow.contrib.learn.python.learn import datasets
 from tensorflow.contrib.learn.python.learn import metric_spec
@@ -62,7 +62,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["transformed_x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator = estimator_lib.Estimator(
@@ -100,7 +100,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator = estimator_lib.Estimator(
@@ -139,7 +139,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator_with_fe_fn = estimator_lib.Estimator(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
index 992b804f59..8f9d6fc318 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
@@ -28,7 +28,7 @@ import time
 import numpy as np
 
 from tensorflow.contrib.factorization.python.ops import clustering_ops
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators.model_fn import ModelFnOps
 from tensorflow.python.framework import ops
@@ -128,7 +128,7 @@ def _kmeans_clustering_model_fn(features, labels, mode, params, config):
        random_seed=params.get('random_seed'),
        kmeans_plus_plus_num_retries=params.get(
            'kmeans_plus_plus_num_retries')).training_graph()
-  incr_step = state_ops.assign_add(variables.get_global_step(), 1)
+  incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
   loss = math_ops.reduce_sum(losses, name=KMeansClustering.LOSS_OP_NAME)
   summary.scalar('loss/raw', loss)
   training_op = with_dependencies([training_op, incr_step], loss)
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py
index f5445ad4e7..37aa8b3396 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@@ -26,7 +26,7 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
@@ -170,7 +170,7 @@ def _linear_model_fn(features, labels, mode, params, config=None):
           weight_collections=[parent_scope])
 
     def _train_op_fn(loss):
-      global_step = contrib_variables.get_global_step()
+      global_step = training_util.get_global_step()
       my_vars = ops.get_collection(parent_scope)
       grads = gradients.gradients(loss, my_vars)
       if gradient_clip_norm:
@@ -252,7 +252,7 @@ def sdca_model_fn(features, labels, mode, params):
     _add_bias_column(feature_columns, features, bias, columns_to_variables)
 
   def _train_op_fn(unused_loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
                                                     weight_column_name,
                                                     loss_type, features,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
index 93c62f87e8..656d68b768 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
@@ -21,7 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib import layers
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn.datasets import base
 from tensorflow.contrib.learn.python.learn.estimators import logistic_regressor
@@ -57,7 +57,7 @@ def _logistic_regression_model_fn(features, labels, mode):
   predictions = math_ops.sigmoid(logits)
   loss = losses.sigmoid_cross_entropy(labels, logits)
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return predictions, loss, train_op
 
 
diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py
index 6af2287761..cb34cb1d26 100644
--- a/tensorflow/contrib/learn/python/learn/utils/export.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export.py
@@ -20,7 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.framework import deprecated
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.session_bundle import exporter
 from tensorflow.contrib.session_bundle import gc
 from tensorflow.python.client import session as tf_session
@@ -78,7 +78,7 @@ def _export_graph(graph, saver, checkpoint_path, export_dir,
           default_graph_signature=default_graph_signature,
           named_graph_signatures=named_graph_signatures,
           assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS))
-      return export.export(export_dir, contrib_variables.get_global_step(),
+      return export.export(export_dir, training_util.get_global_step(),
                            session, exports_to_keep=exports_to_keep)
 
 
@@ -295,7 +295,7 @@ def _export_estimator(estimator,
   checkpoint_path = (checkpoint_path or
                      tf_saver.latest_checkpoint(estimator._model_dir))
   with ops.Graph().as_default() as g:
-    contrib_variables.create_global_step(g)
+    training_util.create_global_step(g)
 
     if use_deprecated_input_fn:
       examples = array_ops.placeholder(dtype=dtypes.string,
diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
index 701fc1c059..05794a42c5 100644
--- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
+++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
@@ -19,7 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib import layers
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.contrib.learn.python.learn.estimators import prediction_key
@@ -154,7 +154,7 @@ def sdca_model_fn(features, labels, mode, params, config=None):
     _add_bias_column(feature_columns, features, bias, columns_to_variables)
 
   def _train_op_fn(unused_loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     sdca_model, train_op = optimizer.get_train_step(
         columns_to_variables, weight_column_name, loss_type, features, labels,
         global_step)
diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index fc9144d5fc..2fb40070cb 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -167,6 +167,7 @@ graphviz, or [in tensorboard](https://codelabs.developers.google.com/codelabs/te
 This frozen Graphdef is now ready to be converted to flatbuffer format (.lite) for use on Android or iOS.  On Android users have the flexibility to use either the float or quantized versions of the frozen graphdef, if available, using the Tensorflow Optimizing Converter tool.
 
 Here is a sample command line to convert the frozen Graphdef to '.lite' format for  The Tensorflow Optimizing Converter supports both float and quantized models, however, different configuration parameters are needed depending on whether a FLOAT or QUANTIZED mode is being used.
+(Here is a link to the pb [file](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz)).
 
 ```
 bazel build tensorflow/contrib/lite/toco:toco
@@ -215,3 +216,7 @@ Note that you'd need to follow instructions for installing TensorFlow on Android
 
 ### For iOS
 Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app.
+
+## Core ML support
+
+Core ML is a machine learning framework used across Apple products. In addition to using Tensorflow Lite models directly in their applications, developers have the option to convert their trained Tensorflow models to the [CoreML](https://developer.apple.com/machine-learning/) format for use on Apple devices. For information on how to use the converter please refer to the [Tensorflow-CoreML converter documentation](https://github.com/tf-coreml/tf-coreml).
diff --git a/tensorflow/contrib/lite/ios_makefile.inc b/tensorflow/contrib/lite/ios_makefile.inc
index 345ed26212..bcff7ed988 100644
--- a/tensorflow/contrib/lite/ios_makefile.inc
+++ b/tensorflow/contrib/lite/ios_makefile.inc
@@ -1,31 +1,47 @@
-#Settings for iOS.
-ifeq($(TARGET), IOS) BUILD_FOR_IOS_SIMULATOR
-    : = false ifeq($(IOS_ARCH), x86_64) BUILD_FOR_IOS_SIMULATOR
-    : = true endif ifeq($(IOS_ARCH), i386) BUILD_FOR_IOS_SIMULATOR
-    : = true endif ifeq($(BUILD_FOR_IOS_SIMULATOR), true) IPHONEOS_PLATFORM
-    : = $(shell xcrun-- sdk iphonesimulator-- show - sdk - platform -
-          path) IPHONEOS_SYSROOT
-    : = $(shell xcrun-- sdk iphonesimulator-- show - sdk -
-          path) else IPHONEOS_PLATFORM
-    : = $(shell xcrun-- sdk iphoneos-- show - sdk - platform -
-          path) IPHONEOS_SYSROOT
-    : = $(shell xcrun-- sdk iphoneos-- show - sdk - path) endif IOS_SDK_VERSION
-    : = $(shell xcrun-- sdk iphoneos-- show - sdk - version) MIN_SDK_VERSION
-    : = 9.0
-#Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64.
-      IOS_ARCH
-    : = x86_64 CXXFLAGS
-      += -miphoneos - version
-         - min = $(MIN_SDK_VERSION) - DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
-                 - fembed - bitcode - Wno - c++ 11 - narrowing - mno - thumb
-                 - fno - exceptions
-                 - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) - O3 CCFLAGS
-      += -miphoneos - version
-         - min = $(MIN_SDK_VERSION) - fembed - bitcode - mno - thumb
-                 - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) -
-                 O3 LDFLAGS
-    : = -fembed - bitcode - miphoneos - version
-        - min = ${MIN_SDK_VERSION} - arch $(IOS_ARCH) OBJDIR
-    : = $(OBJDIR) ios_$(IOS_ARCH) / LIBDIR
-    : = $(LIBDIR) ios_$(IOS_ARCH) / BINDIR
-    : = $(BINDIR) ios_$(IOS_ARCH) / DEPDIR : = $(DEPDIR) ios_$(IOS_ARCH) / endif
+# Settings for iOS.
+ifeq ($(TARGET), IOS)
+        BUILD_FOR_IOS_SIMULATOR := false
+	ifeq ($(IOS_ARCH), x86_64)
+	     	BUILD_FOR_IOS_SIMULATOR := true
+	endif
+	ifeq ($(IOS_ARCH), i386)
+	     	BUILD_FOR_IOS_SIMULATOR := true
+	endif
+	ifeq ($(BUILD_FOR_IOS_SIMULATOR), true)
+		IPHONEOS_PLATFORM := $(shell xcrun --sdk iphonesimulator \
+			--show-sdk-platform-path)
+		IPHONEOS_SYSROOT := $(shell xcrun --sdk iphonesimulator \
+			--show-sdk-path)
+	else
+		IPHONEOS_PLATFORM := $(shell xcrun --sdk iphoneos --show-sdk-platform-path)
+		IPHONEOS_SYSROOT := $(shell xcrun --sdk iphoneos --show-sdk-path)
+	endif
+	IOS_SDK_VERSION := $(shell xcrun --sdk iphoneos --show-sdk-version)
+	MIN_SDK_VERSION := 9.0
+	# Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64.
+	IOS_ARCH := x86_64
+	CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
+		-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \
+		-fembed-bitcode \
+		-Wno-c++11-narrowing \
+		-mno-thumb \
+		-fno-exceptions \
+		-isysroot \
+		${IPHONEOS_SYSROOT} \
+		-arch $(IOS_ARCH) \
+		-O3
+	CCFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
+		-fembed-bitcode \
+		-mno-thumb \
+		-isysroot \
+		${IPHONEOS_SYSROOT} \
+		-arch $(IOS_ARCH) \
+		-O3
+	LDFLAGS := -fembed-bitcode \
+		-miphoneos-version-min=${MIN_SDK_VERSION} \
+		-arch $(IOS_ARCH)
+	OBJDIR := $(OBJDIR)ios_$(IOS_ARCH)/
+	LIBDIR := $(LIBDIR)ios_$(IOS_ARCH)/
+	BINDIR := $(BINDIR)ios_$(IOS_ARCH)/
+	DEPDIR := $(DEPDIR)ios_$(IOS_ARCH)/
+endif
diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index bdb5e01538..8066889078 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -108,7 +108,7 @@ enum {
  * The type of operations that can be added to a model.
  */
 enum {
-  /** Adds two tensors, elment-wise.
+  /** Adds two tensors, element-wise.
    *
    * Takes two input tensors of identical type and compatible dimensions. The
    * output is the sum of both input tensors, optionally modified by an
@@ -743,7 +743,7 @@ enum {
    */
   ANEURALNETWORKS_MAX_POOL_2D = 17,
 
-  /** Multiplies two tensors, elment-wise.
+  /** Multiplies two tensors, element-wise.
    *
    * Takes two input tensors of identical type and compatible dimensions. The
    * output is the product of both input tensors, optionally modified by an
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index e2e6c05591..ee84b5b4c8 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -300,7 +300,7 @@ ifeq ($(TARGET),ANDROID)
 	ifeq ($(ANDROID_ARCH),x86_64)
 		TOOLCHAIN := x86_64-4.9
 		SYSROOT_ARCH := x86_64
-		BIN_PREFIX := x86-64-linux-android
+		BIN_PREFIX := x86_64-linux-android
 		MARCH_OPTION :=
 	endif
     
diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py
index 0bc133a00e..96d60e1498 100644
--- a/tensorflow/contrib/nn/__init__.py
+++ b/tensorflow/contrib/nn/__init__.py
@@ -21,6 +21,7 @@
 @@deprecated_flipped_sigmoid_cross_entropy_with_logits
 @@nth_element
 @@rank_sampled_softmax_loss
+@@sampled_sparse_softmax_loss
 @@scaled_softplus
 """
 
diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py
index 98749cff7e..63fc487dca 100644
--- a/tensorflow/contrib/nn/python/ops/sampling_ops.py
+++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py
@@ -24,6 +24,8 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_impl
+from tensorflow.python.ops import nn_ops
 
 
 def _rank_resample(weights, biases, inputs, sampled_values, num_resampled,
@@ -240,3 +242,101 @@ def rank_sampled_softmax_loss(weights,
         remove_accidental_hits=remove_accidental_hits,
         partition_strategy=partition_strategy,
         name=name)
+
+
+def sampled_sparse_softmax_loss(weights,
+                                biases,
+                                labels,
+                                inputs,
+                                num_sampled,
+                                num_classes,
+                                sampled_values=None,
+                                remove_accidental_hits=True,
+                                partition_strategy="mod",
+                                name="sampled_sparse_softmax_loss"):
+  """Computes and returns the sampled sparse softmax training loss.
+
+  This is a faster way to train a softmax classifier over a huge number of
+  classes.
+
+  This operation is for training only.  It is generally an underestimate of
+  the full softmax loss.
+
+  A common use case is to use this method for training, and calculate the full
+  softmax loss for evaluation or inference. In this case, you must set
+  `partition_strategy="div"` for the two losses to be consistent, as in the
+  following example:
+
+  ```python
+  if mode == "train":
+    loss = tf.nn.sampled_sparse_softmax_loss(
+        weights=weights,
+        biases=biases,
+        labels=labels,
+        inputs=inputs,
+        ...,
+        partition_strategy="div")
+  elif mode == "eval":
+    logits = tf.matmul(inputs, tf.transpose(weights))
+    logits = tf.nn.bias_add(logits, biases)
+    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=tf.squeeze(labels),
+        logits=logits)
+  ```
+
+  See our [Candidate Sampling Algorithms Reference]
+  (https://www.tensorflow.org/extras/candidate_sampling.pdf)
+
+  Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
+  ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
+
+  Args:
+    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
+        objects whose concatenation along dimension 0 has shape
+        [num_classes, dim].  The (possibly-sharded) class embeddings.
+    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
+    labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`.
+        The index of the single target class for each row of logits.  Note that
+        this format differs from the `labels` argument of
+        `nn.sparse_softmax_cross_entropy_with_logits`.
+    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
+        activations of the input network.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    num_classes: An `int`. The number of possible classes.
+    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
+        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
+        (if None, we default to `log_uniform_candidate_sampler`)
+    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
+        where a sampled class equals one of the target classes.  Default is
+        True.
+    partition_strategy: A string specifying the partitioning strategy, relevant
+        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
+        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `batch_size` 1-D tensor of per-example sampled softmax losses.
+
+  """
+  logits, _ = nn_impl._compute_sampled_logits(
+      weights=weights,
+      biases=biases,
+      labels=labels,
+      inputs=inputs,
+      num_sampled=num_sampled,
+      num_classes=num_classes,
+      num_true=1,
+      sampled_values=sampled_values,
+      subtract_log_q=True,
+      remove_accidental_hits=remove_accidental_hits,
+      partition_strategy=partition_strategy,
+      name=name)
+
+  # There is only one true label. _compute_sampled_logits puts the true logit
+  # at index 0.
+  labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64)
+
+  sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
+      labels=array_ops.squeeze(labels), logits=logits)
+  # sampled_losses is a [batch_size] tensor.
+  return sampled_losses
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index d2811f21af..9c961f2b9c 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -16,6 +16,7 @@ py_library(
         "__init__.py",
         "python/training/addsign.py",
         "python/training/drop_stale_gradient_optimizer.py",
+        "python/training/elastic_average_optimizer.py",
         "python/training/external_optimizer.py",
         "python/training/lazy_adam_optimizer.py",
         "python/training/moving_average_optimizer.py",
@@ -174,6 +175,24 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "elastic_average_optimizer_test",
+    srcs = ["python/training/elastic_average_optimizer_test.py"],
+    additional_deps = [
+        ":opt_py",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:variables",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:training",
+        "//tensorflow/python:ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "sign_decay_test",
     srcs = ["python/training/sign_decay_test.py"],
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 04643a6058..2025e8b4fc 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+    # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@ from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
 from tensorflow.contrib.opt.python.training.nadam_optimizer import *
 from tensorflow.contrib.opt.python.training.powersign import *
 from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import *
+from tensorflow.contrib.opt.python.training.elastic_average_optimizer import *
 # pylint: enable=wildcard-import
 
 from tensorflow.python.util.all_util import remove_undocumented
@@ -46,6 +47,8 @@ _allowed_symbols = [
     'VariableClippingOptimizer',
     'MultitaskOptimizerWrapper',
     'clip_gradients_by_global_norm',
+    'ElasticAverageOptimizer', 
+    'ElasticAverageCustomGetter'
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
new file mode 100644
index 0000000000..9941f22b1f
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
@@ -0,0 +1,345 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Wrapper optimizer for Elastic Average SGD """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import math_ops
+
+from tensorflow.python.ops import gen_nn_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import constant_op
+
+LOCAL_VARIABLE_NAME = 'local_center_variable'
+GLOBAL_VARIABLE_NAME = 'global_center_variable'
+
+
+class ElasticAverageCustomGetter(object):
+  """Custom_getter class is used to do:
+  1. Change trainable variables to local collection and place them at worker
+    device
+  2. Generate global variables(global center variables)
+  3. Generate local variables(local center variables) which record the global
+    variables and place them at worker device
+    Notice that the class should be used with tf.replica_device_setter,
+    so that the global center variables and global step variable can be placed
+    at ps device. Besides, use 'tf.get_variable' instead of 'tf.Variable' to
+    use this custom getter.
+
+  For example,
+  ea_custom_getter = ElasticAverageCustomGetter(worker_device)
+  with tf.device(
+    tf.train.replica_device_setter(
+      worker_device=worker_device,
+      ps_device="/job:ps/cpu:0",
+      cluster=cluster)),
+    tf.variable_scope('',custom_getter=ea_custom_getter):
+    hid_w = tf.get_variable(
+      initializer=tf.truncated_normal(
+          [IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units],
+          stddev=1.0 / IMAGE_PIXELS),
+      name="hid_w")
+    hid_b = tf.get_variable(initializer=tf.zeros([FLAGS.hidden_units]),
+                            name="hid_b")
+  """
+
+  def __init__(self, worker_device):
+    """Create a new `ElasticAverageCustomGetter`.
+
+    Args:
+      worker_device: String.  Name of the `worker` job.
+    """
+    self._worker_device = worker_device
+    self._local_map = {}
+    self._global_map = {}
+
+  def __call__(self, getter, name, trainable, collections, *args, **kwargs):
+    if trainable:
+      with ops.device(self._worker_device):
+        local_var = getter(name, trainable=True,
+                           collections=[ops.GraphKeys.LOCAL_VARIABLES], 
+                           *args, **kwargs)
+        
+      global_center_variable = variable_scope.variable(
+        name='%s/%s' %
+             (GLOBAL_VARIABLE_NAME,
+              name),
+        initial_value=local_var.initialized_value(),
+        trainable=False,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
+
+      with ops.device(self._worker_device):
+        local_center_variable = variable_scope.variable(
+          name='%s/%s' % (LOCAL_VARIABLE_NAME, name),
+          initial_value=local_var.initialized_value(),
+          trainable=False,
+          collections=[ops.GraphKeys.LOCAL_VARIABLES])
+        
+      self._local_map[local_var] = local_center_variable
+      self._global_map[local_var] = global_center_variable
+      return local_var
+    else:
+      return getter(name, trainable, collections, *args, **kwargs)
+
+
+class ElasticAverageOptimizer(optimizer.Optimizer):
+  """Wrapper optimizer that implements the Elastic Average SGD algorithm.
+  This is an async optimizer. During the training, Each worker will update
+  the local variables and maintains its own local_step, which starts from 0
+  and is incremented by 1 after each update of local variables. Whenever
+  the communication period divides the local step, the worker requests
+  the current global center variables and then computed the elastic difference
+  between global center variables and local variables. The elastic difference
+  then be used to update both local variables and global variables.
+  """
+
+  # Default value as paper described
+  BETA = 0.9
+
+  def __init__(
+      self,
+      opt,
+      num_worker,
+      ea_custom_getter,
+      communication_period=10,
+      moving_rate=None,
+      rho=None,
+      use_locking=True,
+      name="ElasticAverageOptimizer"):
+    """Construct a new gradient descent optimizer.
+
+    Args:
+      opt: The actual optimizer that will be used to update local variables.
+        Must be one of the Optimizer classes.
+      num_worker: The number of workers
+      ea_custom_getter: The ElasticAverageCustomGetter
+      communication_period: An int point value to controls the frequency
+        of the communication between every worker and the ps.
+      moving_rate: A floating point value to control the elastic difference.
+      rho: the amount of exploration we allow ine the model. The default
+        value is moving_rate/learning_rate
+      use_locking: If True use locks for update operations.
+      name: Optional name prefix for the operations created when applying
+        gradients. Defaults to "ElasticAverageOptimizer".
+    """
+    super(ElasticAverageOptimizer, self).__init__(use_locking, name)
+    self._opt = opt
+    self._num_worker = num_worker
+    self._period = communication_period
+    self._local_map = ea_custom_getter._local_map
+    self._global_map = ea_custom_getter._global_map
+
+    if moving_rate is None:
+      self._moving_rate = BETA / communication_period / num_worker
+    else:
+      self._moving_rate = moving_rate
+    if rho is None:
+      self._rho = self._moving_rate / self._opt._learning_rate
+    else:
+      self._rho = rho
+
+    self._local_step = variable_scope.get_variable(
+      initializer=0,
+      trainable=False,
+      collections=[ops.GraphKeys.LOCAL_VARIABLES],
+      name="local_step")
+    self._opt._prepare()
+
+  def compute_gradients(self, loss, var_list=None,
+                        gate_gradients=optimizer.Optimizer.GATE_OP,
+                        aggregation_method=None,
+                        colocate_gradients_with_ops=False,
+                        grad_loss=None):
+    """Compute gradients of `loss` for the variables in `var_list`.
+    
+    Add rho*elastic_difference to loss to control the exploration
+    This is the first part of `minimize()`.  It returns a list
+    of (gradient, variable) pairs where "gradient" is the gradient
+    for "variable".  Note that "gradient" can be a `Tensor`, an
+    `IndexedSlices`, or `None` if there is no gradient for the
+    given variable.
+
+    Args:
+      loss: A Tensor containing the value to minimize.
+      var_list: Optional list or tuple of `tf.Variable` to update to minimize
+        `loss`.  Defaults to the list of variables collected in the graph
+        under the key `GraphKey.TRAINABLE_VARIABLES`.
+      gate_gradients: How to gate the computation of gradients.  Can be
+        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
+      aggregation_method: Specifies the method used to combine gradient terms.
+        Valid values are defined in the class `AggregationMethod`.
+      colocate_gradients_with_ops: If True, try colocating gradients with
+        the corresponding op.
+      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
+
+    Returns:
+      A list of (gradient, variable) pairs. Variable is always present, but
+      gradient can be `None`.
+
+    Raises:
+      TypeError: If `var_list` contains anything else than `Variable` objects.
+      ValueError: If some arguments are invalid.
+    """
+    if not var_list:
+      var_list = variables.trainable_variables()
+      
+    elastic_difference = [math_ops.subtract(v, lv) for v, lv in zip(
+      variables.trainable_variables(),
+      [self._local_map[var] for var in var_list])]
+
+    distance_loss = self._rho * math_ops.add_n(
+                      [gen_nn_ops.l2_loss(ed) for ed in elastic_difference])
+
+    total_loss = loss + distance_loss
+    return self._opt.compute_gradients(total_loss, var_list,
+                                       gate_gradients, aggregation_method,
+                                       colocate_gradients_with_ops, grad_loss)
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    """Apply gradients to global variables.
+
+    This is the second part of `minimize()`. It returns an `Operation` that
+    applies gradients.
+
+    Args:
+      grads_and_vars: List of (gradient, variable) pairs as returned by
+        `compute_gradients()`.
+      global_step: Optional `Variable` to increment by one after the
+        variables have been updated.
+      name: Optional name for the returned operation.  Default to the
+        name passed to the `Optimizer` constructor.
+
+    Returns:
+      An `Operation` that applies the specified gradients. If `global_step`
+      was not None, that operation also increments `global_step`.
+
+    Raises:
+      TypeError: If `grads_and_vars` is malformed.
+      ValueError: If none of the variables have gradients.
+    """
+    apply_updates = self._opt.apply_gradients(grads_and_vars)
+    with ops.control_dependencies([apply_updates]):
+      local_update = state_ops.assign_add(
+        self._local_step, 1, name='local_step_update').op
+
+    # update global variables.
+    def _Update_global_variables():
+      local_vars = [v for g, v in grads_and_vars if g is not None]
+      global_center_vars = [self._global_map[var] for var in local_vars]
+      local_center_vars = [self._local_map[var] for var in local_vars]
+      local_center_vars_update = []
+      for lvar, var in zip(local_center_vars, global_center_vars):
+        local_center_vars_update.append(lvar.assign(var))
+      update_ops = []
+      differences = []
+      with ops.control_dependencies(local_center_vars_update):
+        for v, lv in zip(local_vars, local_center_vars):
+          with ops.device(v.device):
+            differences.append(math_ops.subtract(v, lv))
+        for lvar, diff in zip(local_vars, differences):
+          with ops.device(lvar.device):
+            update_ops.append(state_ops.assign_sub(lvar, math_ops.multiply(
+              self._moving_rate, diff)))
+        for var, diff in zip(global_center_vars, differences):
+          with ops.device(var.device):
+            update_ops.append(state_ops.assign_add(var, math_ops.multiply(
+              self._moving_rate, diff)))
+        if global_step:
+          with ops.colocate_with(global_step):
+            update_ops.append(state_ops.assign_add(global_step, 1))
+      variable_update = control_flow_ops.group(*(update_ops))
+      return variable_update
+
+    with ops.control_dependencies([local_update]):
+      condition = math_ops.equal(math_ops.mod(
+        self._local_step, self._period), 0)
+      conditional_update = control_flow_ops.cond(
+        condition, _Update_global_variables, control_flow_ops.no_op)
+    return conditional_update
+
+  def get_init_op(self, task_index):
+    """Returns the op to let all the local variables and local center
+    variables equal to the global center variables before the training begins"""
+
+    def _Add_sync_queues_and_barrier(enqueue_after_list):
+      """Adds ops to enqueu on all worker queues"""
+      sync_queues = [
+        data_flow_ops.FIFOQueue(self._num_worker, [dtypes.bool], shapes=[[]],
+                                shared_name='%s%s' % (
+                                  'variable_init_sync_queue', i)) for i in
+        range(self._num_worker)]
+      queue_ops = []
+      # For each other worker, add an entry in a queue
+      token = constant_op.constant(False)
+      with ops.control_dependencies(enqueue_after_list):
+        for i, q in enumerate(sync_queues):
+          if i == task_index:
+            queue_ops.append(control_flow_ops.no_op())
+          else:
+            queue_ops.append(q.enqueue(token))
+      queue_ops.append(
+        sync_queues[task_index].dequeue_many(len(sync_queues) - 1))
+      return control_flow_ops.group(*queue_ops)
+
+    init_ops = []
+    local_vars = variables.trainable_variables()
+    global_center_vars = [self._global_map[var] for var in local_vars]
+    local_center_vars = [self._local_map[var] for var in local_vars]
+    if not (local_vars and global_center_vars and local_center_vars):
+      raise ValueError(
+        'The lists of local_variables, global_center_variables, '
+        'local_center_variables should not be empty  ')
+    for lvar, gc_var, lc_var in zip(
+        local_vars, global_center_vars, local_center_vars):
+      init_ops.append(state_ops.assign(lvar, gc_var))
+      init_ops.append(state_ops.assign(lc_var, gc_var))
+
+    init_op = control_flow_ops.group(*(init_ops))
+    sync_queue_op = _Add_sync_queues_and_barrier([init_op])
+    return sync_queue_op
+
+  def make_session_run_hook(self, is_chief, task_index):
+    """Creates a hook to handle ElasticAverageOptimizerHook ops such as initialization."""
+    return _ElasticAverageOptimizerHook(self, is_chief, task_index)
+
+
+class _ElasticAverageOptimizerHook(session_run_hook.SessionRunHook):
+  def __init__(self, ea_optimizer, is_chief, task_index):
+    """Creates hook to handle ElasticAverageOptimizer initialization ops.
+
+    Args:
+      ea_optimizer: `ElasticAverageOptimizer` which this hook will initialize.
+      is_chief: `Bool`, whether is this a chief replica or not.
+    """
+    self._ea_optimizer = ea_optimizer
+    self._is_chief = is_chief
+    self._task_index = task_index
+
+  def begin(self):
+    self._local_init_op = variables.local_variables_initializer()
+    self._global_init_op = None
+    if self._is_chief:
+      self._global_init_op = variables.global_variables_initializer()
+    self._variable_init_op = self._ea_optimizer.get_init_op(self._task_index)
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
new file mode 100644
index 0000000000..59e55fceee
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
@@ -0,0 +1,225 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ElasticAverageOptimizer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import portpicker
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import server_lib
+from tensorflow.python.training import training
+from tensorflow.python.training import training_util
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import device_setter
+
+from tensorflow.contrib.opt.python.training.elastic_average_optimizer import \
+  ElasticAverageOptimizer, ElasticAverageCustomGetter, GLOBAL_VARIABLE_NAME
+
+
+def create_local_cluster(num_workers, num_ps, protocol="grpc"):
+  """Create local GRPC servers and return them."""
+  worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
+  ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
+  cluster_dict = {
+    "worker": ["localhost:%s" % port for port in worker_ports],
+    "ps": ["localhost:%s" % port for port in ps_ports]
+  }
+  cs = server_lib.ClusterSpec(cluster_dict)
+
+  workers = [
+    server_lib.Server(
+      cs, job_name="worker", protocol=protocol, task_index=ix, start=True)
+    for ix in range(num_workers)
+  ]
+  ps_servers = [
+    server_lib.Server(
+      cs, job_name="ps", protocol=protocol, task_index=ix, start=True)
+    for ix in range(num_ps)
+  ]
+
+  return cluster_dict, workers, ps_servers
+
+
+# Creates the workers and return their sessions, graphs, train_ops.
+# Cheif worker will update at last
+def _get_workers(num_workers, period, workers, moving_rate):
+  sessions = []
+  graphs = []
+  train_ops = []
+  for worker_id in range(num_workers):
+    graph = ops.Graph()
+    is_chief = (worker_id == 0)
+    with graph.as_default():
+      worker_device = "/job:worker/task:%d/cpu:0" % (worker_id)
+      ea_coustom = ElasticAverageCustomGetter(
+        worker_device=worker_device)
+      with variable_scope.variable_scope('',
+                                         custom_getter=ea_coustom), ops.device(
+        device_setter.replica_device_setter(worker_device=worker_device,
+                                            ps_device="/job:ps/task:0/cpu:0",
+                                            ps_tasks=1)):
+        global_step = variables.Variable(0, name='global_step',
+                                         trainable=False)
+        var_0 = variable_scope.get_variable(initializer=0.0, name="v0")
+        var_1 = variable_scope.get_variable(initializer=1.0, name="v1")
+
+      with ops.device("/job:worker/task:" + str(worker_id)):
+        grads_0 = constant_op.constant(-1.0)
+        grads_1 = constant_op.constant(-1.0)
+
+        sgd_opt = gradient_descent.GradientDescentOptimizer(1.0)
+        opt = ElasticAverageOptimizer(
+          opt=sgd_opt,
+          num_worker=num_workers,
+          moving_rate=moving_rate,
+          communication_period=period,
+          ea_custom_getter=ea_coustom
+        )
+        train_op = [
+          opt.apply_gradients(
+            ([grads_0, var_0],
+             [grads_1, var_1]), global_step)
+        ]
+        easgd_hook = opt.make_session_run_hook(is_chief, worker_id)
+      # Creates MonitoredSession
+      sess = training.MonitoredTrainingSession(workers[worker_id].target,
+                                               hooks=[easgd_hook])
+
+    sessions.append(sess)
+    graphs.append(graph)
+    train_ops.append(train_op)
+
+  return sessions, graphs, train_ops
+
+
+class ElasticAverageOptimizerTest(test.TestCase):
+  def _run(self, train_op, sess):
+    sess.run(train_op)
+
+  def test1Workers2Period(self):
+    num_workers = 1
+    communication_period = 2
+    num_ps = 1
+    cluster, workers, _ = create_local_cluster(num_workers=num_workers,
+                                               num_ps=num_ps)
+
+    sessions, graphs, train_ops = _get_workers(num_workers,
+                                               communication_period,
+                                               workers, 1.0)
+
+    var_0 = graphs[0].get_tensor_by_name('v0:0')
+    var_1 = graphs[0].get_tensor_by_name('v1:0')
+    global_step = training_util.get_global_step(graphs[0])
+    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
+    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
+    # Verify the initialized value.
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(0, sessions[0].run(global_step))
+
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(1.0, sessions[0].run(var_0))
+    self.assertAllEqual(2.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(0, sessions[0].run(global_step))
+
+    # iteration 2, global varibale update
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(2.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(3.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(1, sessions[0].run(global_step))
+
+    # iteration 3
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(1.0, sessions[0].run(var_0))
+    self.assertAllEqual(2.0, sessions[0].run(var_1))
+    self.assertAllEqual(2.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(3.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(1, sessions[0].run(global_step))
+
+  def test2Worker1Period(self):
+    num_workers = 2
+    communication_period = 1
+    num_ps = 2
+    cluster, workers, _ = create_local_cluster(num_workers=num_workers,
+                                               num_ps=num_ps)
+
+    sessions, graphs, train_ops = _get_workers(num_workers,
+                                               communication_period,
+                                               workers, 0.5)
+
+    var_0 = graphs[0].get_tensor_by_name('v0:0')
+    var_1 = graphs[0].get_tensor_by_name('v1:0')
+
+    var_0_1 = graphs[1].get_tensor_by_name('v0:0')
+    var_1_1 = graphs[1].get_tensor_by_name('v1:0')
+
+    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
+    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
+    # Verify the initialized value.
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[1].run(var_0_1))
+    self.assertAllEqual(1.0, sessions[1].run(var_1_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+
+    sessions[0].run(train_ops[0])
+    sessions[1].run(train_ops[1])
+
+    self.assertAllEqual(0.5, sessions[0].run(var_0))
+    self.assertAllEqual(1.5, sessions[0].run(var_1))
+    self.assertAllEqual(0.75, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.75, sessions[0].run(var_1_g))
+    self.assertAllEqual(0.75, sessions[1].run(var_0_1))
+    self.assertAllEqual(1.75, sessions[1].run(var_1_1))
+
+  def testPS2TasksWithClusterSpecClass(self):
+    cluster_spec = server_lib.ClusterSpec({
+      "ps": ["ps0:2222", "ps1:2222"],
+      "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]
+    })
+    ea_coustom = ElasticAverageCustomGetter(
+      worker_device="/job:worker/task:0")
+    from tensorflow.python.training import device_setter
+    with ops.device(
+        device_setter.replica_device_setter(cluster=cluster_spec,
+                                            worker_device="/job:worker/task:0",
+                                            ps_device="/job:ps")), \
+         variable_scope.variable_scope('', custom_getter=ea_coustom):
+      v = variable_scope.get_variable(initializer=[1, 2], name="v")
+      w = variable_scope.get_variable(initializer=[2, 1], name='w')
+      v_g, w_g = ea_coustom._global_map[v],ea_coustom._global_map[w]
+      self.assertDeviceEqual("/job:worker/task:0", v.device)
+      self.assertDeviceEqual("job:ps/task:0", v_g.device)
+      self.assertDeviceEqual("/job:worker/task:0", w.device)
+      self.assertDeviceEqual("job:ps/task:1", w_g.device)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index 84fcf733c1..63155faf1e 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -39,6 +39,9 @@ from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
+from tensorflow.python.framework import test_util
+from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
+
 
 # pylint: enable=protected-access
 Linear = core_rnn_cell._Linear  # pylint: disable=invalid-name
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 5a6d287c68..c6b1316043 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope as vs
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
diff --git a/tensorflow/contrib/seq2seq/python/ops/helper.py b/tensorflow/contrib/seq2seq/python/ops/helper.py
index b55d90cbab..dec03ce43f 100644
--- a/tensorflow/contrib/seq2seq/python/ops/helper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/helper.py
@@ -223,8 +223,7 @@ class TrainingHelper(Helper):
 
   def sample(self, time, outputs, name=None, **unused_kwargs):
     with ops.name_scope(name, "TrainingHelperSample", [time, outputs]):
-      sample_ids = math_ops.cast(
-          math_ops.argmax(outputs, axis=-1), dtypes.int32)
+      sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
       return sample_ids
 
   def next_inputs(self, time, outputs, state, name=None, **unused_kwargs):
@@ -540,8 +539,7 @@ class GreedyEmbeddingHelper(Helper):
     if not isinstance(outputs, ops.Tensor):
       raise TypeError("Expected outputs to be a single Tensor, got: %s" %
                       type(outputs))
-    sample_ids = math_ops.cast(
-        math_ops.argmax(outputs, axis=-1), dtypes.int32)
+    sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
     return sample_ids
 
   def next_inputs(self, time, outputs, state, sample_ids, name=None):
diff --git a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
index a781c647a1..c42c7b3391 100644
--- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
+++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
@@ -62,7 +62,9 @@ class DatasetDataProvider(data_provider.DataProvider):
                seed=None,
                scope=None):
     """Creates a DatasetDataProvider.
-
+    Note: if `num_epochs` is not `None`,  local counter `epochs` will be created
+    by relevant function. Use `local_variables_initializer()` to initialize
+    local variables.
     Args:
       dataset: An instance of the Dataset class.
       num_readers: The number of parallel readers to use.
diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py
index 5896fc2a20..f0330bfbbd 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/head.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/head.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import re
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 
 from tensorflow.contrib.timeseries.python.timeseries import feature_keys
@@ -79,7 +79,7 @@ class _TimeSeriesRegressionHead(head_lib._Head):  # pylint:disable=protected-acc
 
     train_op = optimizers.optimize_loss(
         model_outputs.loss,
-        global_step=variables.get_global_step(),
+        global_step=training_util.get_global_step(),
         optimizer=self.optimizer,
         # Learning rate is set in the Optimizer object
         learning_rate=None)
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index c51b172066..ee9988f0b7 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -348,7 +348,7 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
                                  kernel);
   }
 
-  // Try to instantiate this function for the func/attr. Maybe its
+  // Try to instantiate this function for the func/attr. Maybe it's
   // cached already.
   Handle handle;
   TF_RETURN_IF_ERROR(Instantiate(ndef.op(), AttrSlice(&ndef.attr()), &handle));
diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index edd952b824..8514d7c474 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -46,7 +46,7 @@ struct bfloat16 {
   EIGEN_DEVICE_FUNC bfloat16() {}
 
   EIGEN_DEVICE_FUNC explicit bfloat16(const float v) {
-    if (isnan(v)) {
+    if (Eigen::numext::isnan(v)) {
       value = NAN_VALUE;
       return;
     }
diff --git a/tensorflow/core/framework/tensor_shape_test.cc b/tensorflow/core/framework/tensor_shape_test.cc
index 06c576c7d4..d8a9c0bac5 100644
--- a/tensorflow/core/framework/tensor_shape_test.cc
+++ b/tensorflow/core/framework/tensor_shape_test.cc
@@ -359,7 +359,8 @@ Status TensorShapeOld::IsValidShape(const TensorShapeProto& proto) {
   for (const auto& d : proto.dim()) {
     if (d.size() < 0) {
       return errors::InvalidArgument("Shape ", DebugString(proto),
-                                     " has negative dimensions");
+                                     " has negative dimensions; ",
+                                     "perhaps an un-fed placeholder?");
     }
     num_elements *= d.size();
     if (num_elements > kMaxElements) {
diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index 880e4e712e..3df981437a 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -76,12 +76,12 @@ namespace tensorflow {
 namespace mkl_op_registry {
   static const char* kMklOpLabel = "MklOp";
   static const char* kMklOpLabelPattern = "label='MklOp'";
+  // Prefix that we add to Tensorflow op name to construct Mkl op name.
+  static const char* const kMklOpPrefix = "_Mkl";
 
   // Get the name of Mkl op from original TensorFlow op
   // We prefix 'Mkl' to the original op to get Mkl op.
   inline string GetMklOpName(const string& name) {
-    // Prefix that we add to Tensorflow op name to construct Mkl op name.
-    const char* const kMklOpPrefix = "_Mkl";
     return string(kMklOpPrefix) + name;
   }
 
@@ -94,9 +94,6 @@ namespace mkl_op_registry {
     string kernel = KernelsRegisteredForOp(op_name);
     bool result =
         kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
-    if (result) {
-      VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
-    }
     return result;
   }
 
@@ -112,15 +109,12 @@ namespace mkl_op_registry {
     if (!IsMklOp(op_name, T)) {
       return false;
     }
-
     bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
                     0 == op_name.compare(GetMklOpName("Sub")) ||
                     0 == op_name.compare(GetMklOpName("Mul")) ||
                     0 == op_name.compare(GetMklOpName("Maximum")) ||
                     0 == op_name.compare(GetMklOpName("SquaredDifference")));
 
-    VLOG(1) << "mkl_op_registry::" << op_name
-            << " is elementwise MKL op: " << result;
     return result;
   }
 }  // namespace mkl_op_registry
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 912075aa28..3beca1e5d2 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -42,6 +42,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+#ifndef INTEL_MKL_DNN
+
 // This pass implements rewriting of graph to support following scenarios:
 // (A) Merging nodes in the graph
 // (B) Rewriting a node in the graph to a new node
@@ -2213,6 +2215,2087 @@ Status MklLayoutRewritePass::Run(
   return Status::OK();
 }
 
+#else  // INTEL_MKL_DNN
+
+// This pass implements rewriting of graph to support following scenarios:
+// (A) Merging nodes in the graph
+// (B) Rewriting a node in the graph to a new node
+//     Rewrite happens under following scenario:
+//     - Propagating Mkl layout as an additional output tensor
+//        (we will loosely call a tensor that carries Mkl layout as Mkl tensor
+//         henceforth.) from every Mkl supported NN layer.
+//
+// Example of A : Merging nodes in the graph
+// -----------------------------------------
+// Currently, we merge Conv2D+AddBias together. Consider Conv2D and BiasAdd as:
+//
+//           O = Conv2D(A, B)
+//           P = BiasAdd(O, C)
+//
+// We merge them into Conv2DWithBias as:
+//           P = _MklConv2DWithBias(A, A_m, B, B_m, C, C_m)
+//
+// The meaning of A_m, B_m and C_m is explained in B.1.
+//
+// Merge rules:
+//  - The merge for Conv2D and BiasAdd happens when the output of Conv2D _only_
+//    goes to BiasAdd.
+//  - Also, the intersection of attributes of both the nodes must have same
+//    values.
+//  - Both the nodes must have been assigned to same device (if any).
+//
+// Example of B.1 : Rewriting nodes to Mkl nodes
+// ---------------------------------------------
+// Consider a Relu node. Current definition of Relu node looks like:
+//
+//           O = Relu(A)
+//
+// Relu has 1 input (A), and 1 output (O).
+//
+// This rewrite pass will generate a new graph node for Relu (new node is
+// called MklRelu) as:
+//
+//          O, O_m = MklRelu(A, A_m)
+//
+// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m). Here input A is
+// same as input A of Relu; output O is same as output O of Relu. O_m is the
+// additional output tensor that will be set by MklRelu, and it represents
+// Mkl tensor corresponding to O -- in other words, O_m is some kind of
+// metadata for O. A_m is additional input of Relu, and it represents metadata
+// for A - as O_m is metadata for O, A_m is metadata for A. MklRelu receives
+// this metadata from previous node in the graph.
+//
+// When a previous node in the graph is an Mkl node, A_m will represent a valid
+// Mkl tensor. But when a previous node is not an Mkl node, A_m will represent
+// a dummy Mkl tensor.
+//
+// Rewriting rules:
+//  - Selection of a node for rewriting happens by registering the op type of
+//    the node with the rewriting pass. If the op type is not registered, then
+//    all nodes of this op type will not be rewritten.
+//  - Number of inputs after rewriting:
+//      Since for every input Tensorflow tensor, the rewritten node gets Mkl
+//      tensor(s), rewritten node gets 2*N inputs, where N is the number of
+//      inputs for the original node.
+//  - Number of outputs after rewriting:
+//      Since for every output Tensorflow tensor, the rewritten node generates
+//      Mkl tensor(s), the rewritten node generates 2*N outputs, where N is the
+//      number of outputs of the original node.
+//  - Ordering of Tensorflow tensors and Mkl tensors:
+//      Since every rewritten node generates twice the number of inputs and
+//      outputs, one could imagine various orderings among Tensorflow tensors
+//      and Mkl tensors. E.g., assume an op 'Conv2D' that takes (A, B) as
+//      inputs, then the new op '_MklConv2D' can take inputs A, B, A_m and B_m
+//      in A, A_m, B, B_m order or it can also take them in A, B, A_m, B_m
+//      order. Among N inputs one can get N! permutations.
+//
+//      So the question is: which order do we follow? We support 2 types of
+//      orderings: (1) interleaved, and (2) contiguous. Interleaved ordering
+//      follows an intuitive order where an Mkl tensor follows the
+//      corresponding Tensorflow tensor immediately. In the context of the
+//      above example, it will be: A, A_m, B, B_m. Note that the ordering rule
+//      applies to both the inputs and outputs. Contiguous ordering means
+//      all the Tensorflow tensors are contiguous followed by all the Mkl
+//      tensors. We use contiguous ordering as default.
+//
+// Graph rewrite algorithm:
+//      Algorithm: Graph Rewrite
+//      Input: Graph G, Names of the nodes to rewrite and their new names
+//      Output: Modified Graph G' if the nodes are modified, G otherwise.
+//      Start:
+//        N = Topological_Sort(G) // N is a set of nodes in toposort order.
+//        foreach node n in N
+//        do
+//          if (Is_MKL_Op(n))  // Can this node accept an Mkl layout as input.
+//          then
+//            E = set of <incoming edge and its src_output slot> of n
+//            E' = {}   // a new set of edges for rewritten node
+//            foreach <e,s> in E
+//            do
+//              E' U {<e,s>}  // First copy edge which generates Tensorflow
+//                            // tensor as it is
+//              m = Source node of edge e
+//              if Is_Rewritten(m)  // Did we rewrite this node in this pass?
+//              then
+//                E' U {<m,s+1>}    // If yes, then m will generate an Mkl
+//                                  // tensor as an additional output.
+//              else
+//                d = Generate_Dummy_Mkl_Tensor()  // If not, generate a dummy
+//                                                 // Mkl tensor.
+//                E' U {<d,0>}  // The dummy Mkl tensor has only 1 output slot.
+//              fi
+//            done
+//            n' = Build_New_Node(G,new_name,E')
+//            Mark_Rewritten(n')  // Mark the new node as being rewritten.
+//          fi
+//        done
+//
+//      Explanation:
+//        For graph rewrite, we visit nodes of the input graph in the
+//        topological sort order. With this ordering, we visit nodes in the
+//        top-to-bottom fashion. We need this order because while visiting a
+//        node we want that all of its input nodes are visited and rewritten if
+//        applicable. This is because if we need to rewrite a given node
+//        then all of its input nodes need to be fixed (in other words they
+//        cannot be deleted later.)
+//
+//        While visiting a node, we first check if the op type of the node is
+//        an Mkl op. If it is, then we rewrite that node after constructing
+//        new inputs to the node. If the op type of the node is not Mkl op,
+//        then we do not rewrite that node.
+//
+// Handling workspace propagation for certain ops:
+//
+//        Certain backward ops in MKL (MaxPool, LRN and BatchNorm) require
+//        passing of a workspace from their respective forward ops. Workspace
+//        tensors provide memory for storing results of intermediate operations
+//        which are helpful in backward propagation. TensorFlow does not have
+//        a notion of a workspace and as a result does not allow producing
+//        additional outputs from these forward ops. For these ops, we need
+//        to add 2 extra edges between forward ops and their corresponding
+//        backward ops - the first extra edge carries a workspace tensor and
+//        the second one carries an Mkl tensor for the workspace tensor.
+//
+//        Example:
+//
+//        Typical graph for MaxPool and its gradient looks like:
+//
+//        A = MaxPool(T)
+//        B = MaxPoolGrad(X, A, Y)
+//
+//        We will transform this graph to propagate the workspace as:
+//        (with the contiguous ordering)
+//
+//        A, W, A_m, W_m = MklMaxPool(T, T_m)
+//        B, B_m = MklMaxPoolGrad(X, A, Y, W, X_m, A_m, Y_m, W_m)
+//
+//        Here W is the workspace tensor. Transformed tensor names with the
+//        suffix _m are Mkl tensors, and this transformation has been done
+//        using the algorithm discussed earlier. The transformation for
+//        workspace propagation only adds extra outputs (W, W_m) for a forward
+//        op and connects them to the corresponding backward ops.
+//
+//        Terms:
+//
+//        Forward op name = name of the op in the forward pass
+//          where a workspace tensor originates (MaxPool in this example)
+//        Backward op name = name of the op in the backward pass that receives
+//          a workspace tensor from the forward op (MaxPoolGrad in the example)
+//        Slot = Position of the output or input slot that will be
+//               used by the workspace tensor (1 for MklMaxPool as W is the 2nd
+//               output of MaxPool (0 is 1st); 3 for MklMaxPoolGrad)
+//
+//        Question:
+//
+//        How do we associate a backward op to a forward op? There can be more
+//        than one op with the exact same name.
+//
+//        In this example, we associate MaxPoolGrad with MaxPool. But there
+//        could be more than one MaxPool ops. To solve this problem, we look
+//        for _direct_ edge between a forward op and a backward op (tensor A is
+//        flowing along this edge in the example).
+//
+//        How do we transform forward and backward ops when there is no direct
+//        edge between them? In such a case, we generate dummy tensors for
+//        workspace tensors. For the example, transformation of MaxPool will
+//        be exactly same as it would be when there is a direct edge between
+//        the forward and the backward op --- it is just that MaxPool won't
+//        generate any workspace tensor. For MaxPoolGrad, the transformation
+//        will also be same, but instead of connecting W and W_m with the
+//        outputs of MaxPool, we will produce dummy tensors for them, and we
+//        will set workspace_enabled attribute to false.
+//
+class MklLayoutRewritePass : public GraphOptimizationPass {
+ public:
+  MklLayoutRewritePass() {
+    // NOTE: names are alphabetically sorted.
+    csinfo_.addn = "AddN";
+    csinfo_.avg_pool = "AvgPool";
+    csinfo_.avg_pool_grad = "AvgPoolGrad";
+    csinfo_.bias_add = "BiasAdd";
+    csinfo_.bias_add_grad = "BiasAddGrad";
+    csinfo_.concat = "Concat";
+    csinfo_.concatv2 = "ConcatV2";
+    csinfo_.conv2d = "Conv2D";
+    csinfo_.conv2d_with_bias = "__MklDummyConv2DWithBias";
+    csinfo_.conv2d_grad_input = "Conv2DBackpropInput";
+    csinfo_.conv2d_grad_filter = "Conv2DBackpropFilter";
+    csinfo_.conv2d_grad_filter_with_bias =
+                              "__MklDummyConv2DBackpropFilterWithBias";
+    csinfo_.fused_batch_norm = "FusedBatchNorm";
+    csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad";
+    csinfo_.identity = "Identity";
+    csinfo_.lrn = "LRN";
+    csinfo_.lrn_grad = "LRNGrad";
+    csinfo_.matmul = "MatMul";
+    csinfo_.max_pool = "MaxPool";
+    csinfo_.max_pool_grad = "MaxPoolGrad";
+    csinfo_.mkl_conv2d = "_MklConv2D";
+    csinfo_.mkl_conv2d_grad_input = "_MklConv2DBackpropInput";
+    csinfo_.mkl_conv2d_grad_filter = "_MklConv2DBackpropFilter";
+    csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias";
+    csinfo_.mkl_conv2d_grad_filter_with_bias =
+                                   "_MklConv2DBackpropFilterWithBias";
+    csinfo_.relu = "Relu";
+    csinfo_.relu_grad = "ReluGrad";
+    csinfo_.tanh       = "Tanh";
+    csinfo_.tanh_grad  = "TanhGrad";
+    csinfo_.reshape = "Reshape";
+    csinfo_.softmax = "Softmax";
+    csinfo_.split = "Split";
+    // Element-wise ops. Ensure you also add any new ops to IsOpElementWise
+    // in the MklUtil.h (IsMklElementWiseOp method) to ensure that the
+    // MklInputConversion op is added before it.
+    csinfo_.add = "Add";
+    csinfo_.maximum = "Maximum";
+    csinfo_.mul = "Mul";
+    csinfo_.squared_difference = "SquaredDifference";
+    csinfo_.sub = "Sub";
+    // End - element-wise ops. See note above.
+
+    // NOTE: names are alphabetically sorted.
+    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
+                      CopyAttrsAddN, AddNRewrite});
+    rinfo_.push_back({csinfo_.add,
+                      mkl_op_registry::GetMklOpName(csinfo_.add),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.avg_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.avg_pool_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.concat,
+                      mkl_op_registry::GetMklOpName(csinfo_.concat),
+                      CopyAttrsConcat, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.concatv2,
+                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
+                      CopyAttrsConcatV2, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_with_bias,
+                      csinfo_.mkl_conv2d_with_bias,
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_filter,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_filter_with_bias,
+                      csinfo_.mkl_conv2d_grad_filter_with_bias,
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_input,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.fused_batch_norm,
+                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
+                      CopyAttrsFusedBatchNorm, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.fused_batch_norm_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
+                      CopyAttrsFusedBatchNorm, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.identity,
+                      mkl_op_registry::GetMklOpName(csinfo_.identity),
+                      CopyAttrsDataType, AlwaysRewrite});
+    /*
+    rinfo_.push_back({csinfo_.lrn,
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn),
+                      CopyAttrsLRN, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.lrn_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
+                      CopyAttrsLRN, AlwaysRewrite});
+    */
+    rinfo_.push_back({csinfo_.max_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
+                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
+    rinfo_.push_back({csinfo_.max_pool_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.maximum,
+                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.mul,
+                      mkl_op_registry::GetMklOpName(csinfo_.mul),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.relu,
+                      mkl_op_registry::GetMklOpName(csinfo_.relu),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.relu_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.tanh,
+                      mkl_op_registry::GetMklOpName(csinfo_.tanh),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.tanh_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.tanh_grad),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.reshape,
+                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
+                      CopyAttrsReshape, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.softmax,
+                      mkl_op_registry::GetMklOpName(csinfo_.softmax),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.squared_difference,
+                      mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.sub,
+                      mkl_op_registry::GetMklOpName(csinfo_.sub),
+                      CopyAttrsDataType, AlwaysRewrite});
+
+    // Add info about which ops to add workspace edge to and the slots.
+    wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
+    wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3});
+
+    // Add a rule for merging nodes
+    minfo_.push_back({csinfo_.conv2d, csinfo_.bias_add,
+                      csinfo_.conv2d_with_bias,
+                      GetConv2DOrBiasAdd});
+
+    minfo_.push_back({csinfo_.conv2d_grad_filter, csinfo_.bias_add_grad,
+                      csinfo_.conv2d_grad_filter_with_bias,
+                      GetConv2DBackpropFilterOrBiasAddGrad});
+  }
+
+  // Standard interface to run pass
+  Status Run(const GraphOptimizationPassOptions& options);
+
+  // Helper function which does most of heavy lifting for rewriting
+  // Mkl nodes to propagate Mkl tensor as additional output
+  //
+  // Extracts common functionality between Run public interface and
+  // test interface.
+  //
+  // @return true, if and only if graph is mutated; false otherwise.
+  bool RunPass(std::unique_ptr<Graph>* g);
+
+  /// Structure to specify the name of an original node, its new name after
+  /// rewrite, the number of inputs to the original node, the function to
+  /// be used to copy attributes for the op, and the rule (if any) which
+  /// must hold for rewriting the node
+  typedef struct {
+    string name;      // Original name of op of the node in the graph
+    string new_name;  // New name of the op of the node in the graph
+    // A function handler to copy attributes from an old node to a new node.
+    std::function<void(const Node*, NodeBuilder*)> copy_attrs;
+    // A rule under which to rewrite this node
+    std::function<bool(const Node*)> rewrite_rule;
+  } RewriteInfo;
+
+  /// Structure to specify a forward op, a backward op, and the slot numbers
+  /// in the forward and backward ops where we will add a workspace edge.
+  typedef struct {
+    string fwd_op;    // Name of a forward op in the graph
+    string bwd_op;    // Name of a backward op in the graph
+    int fwd_slot;     // Output slot in the forward op node where actual
+                      // output tensor resides
+    int bwd_slot;     // Input slot in the backward op node where actual
+                      // input tensor resides
+    int ws_fwd_slot;  // Output slot in the forward op node where workspace
+                      // edge is added
+    int ws_bwd_slot;  // Input slot in the backward op node where workspace
+                      // edge is added
+  } WorkSpaceInfo;
+
+  /// Structure to specify information used in node merge of 2 operators
+  typedef struct {
+    string op1;       // Node string for one operator.
+    string op2;       // Node string for second operator.
+    string new_node;  // Name of the node after merge
+    // Function that enables user of the node merger to specify how to find
+    // second operator given the first operator.
+    std::function<Node*(const Node*)> get_node_to_be_merged;
+  } MergeInfo;
+
+  /// Structure to store all constant strings
+  /// NOTE: names are alphabetically sorted.
+  typedef struct {
+    string addn;
+    string add;
+    string avg_pool;
+    string avg_pool_grad;
+    string bias_add;
+    string bias_add_grad;
+    string concat;
+    string concatv2;
+    string conv2d;
+    string conv2d_with_bias;
+    string conv2d_grad_input;
+    string conv2d_grad_filter;
+    string conv2d_grad_filter_with_bias;
+    string fused_batch_norm;
+    string fused_batch_norm_grad;
+    string identity;
+    string lrn;
+    string lrn_grad;
+    string matmul;
+    string max_pool;
+    string max_pool_grad;
+    string maximum;
+    string mkl_conv2d;
+    string mkl_conv2d_grad_input;
+    string mkl_conv2d_grad_filter;
+    string mkl_conv2d_grad_filter_with_bias;
+    string mkl_conv2d_with_bias;
+    string mul;
+    string relu;
+    string relu_grad;
+    string tanh;
+    string tanh_grad;
+    string reshape;
+    string softmax;
+    string split;
+    string squared_difference;
+    string sub;
+  } ConstStringsInfo;
+
+ private:
+  /// Maintain info about nodes to rewrite
+  std::vector<RewriteInfo> rinfo_;
+
+  /// Maintain info about nodes to add workspace edge
+  std::vector<WorkSpaceInfo> wsinfo_;
+
+  /// Maintain info about nodes to be merged
+  std::vector<MergeInfo> minfo_;
+
+  /// Maintain structure of constant strings
+  static ConstStringsInfo csinfo_;
+
+ private:
+  // Is OpDef::ArgDef a list type? It could be N * T or list(type).
+  // Refer to opdef.proto for details of list type.
+  inline bool ArgIsList(const OpDef::ArgDef& arg) const {
+    return !arg.type_list_attr().empty() || !arg.number_attr().empty();
+  }
+
+  // Get length of a list in 'n' if 'arg' is of list type. Refer to
+  // description of ArgIsList for definition of list type.
+  inline int GetTensorListLength(const OpDef::ArgDef& arg, Node* n) {
+    CHECK_EQ(ArgIsList(arg), true);
+    int N = 0;
+    const string attr_name = !arg.type_list_attr().empty()
+                                 ? arg.type_list_attr()
+                                 : arg.number_attr();
+    if (!arg.type_list_attr().empty()) {
+      std::vector<DataType> value;
+      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &value));
+      N = value.size();
+    } else {
+      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &N));
+    }
+    return N;
+  }
+
+  // Can op represented by node 'n' run on DEVICE_CPU?
+  // Op can run on CPU with MKL if the runtime assigned device or the
+  // user requested device contains device CPU, or both are empty.
+  bool CanOpRunOnCPUDevice(const Node* n) {
+    bool result = true;
+    string reason;
+
+    // Substring that should be checked for in device name for CPU device.
+    const char* const kCPUDeviceSubStr = "CPU";
+
+    // If Op has been specifically assigned to a non-CPU device, then No.
+    if (!n->assigned_device_name().empty() &&
+        !StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
+      result = false;
+      reason = "Op has been assigned a runtime device that is not CPU.";
+    }
+
+    // If user has specifically assigned this op to a non-CPU device, then No.
+    if (!n->def().device().empty() &&
+        !StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
+      result = false;
+      reason = "User has assigned a device that is not CPU.";
+    }
+
+    if (result == false) {
+      VLOG(1) << "MklLayoutRewritePass: Skipping rewriting of the node "
+              << n->type_string() << ", reason: " << reason;
+    }
+
+    // Otherwise Yes.
+    return result;
+  }
+
+  // Return a node that can be merged with input node 'n'
+  //
+  // @return pointer to the node if we can find such a
+  // node. Otherwise, it returns nullptr.
+  Node* CheckForNodeMerge(const Node* n) const;
+
+  // Merge node 'm' with node 'n'.
+  // Currently, we merge (1) Conv2D with BiasAdd, and (2) BiasAddGrad with
+  // Conv2DBackpropFilter.
+  //
+  // Input nodes m and n may be deleted if the call to
+  // this function is successful. Attempt to use the pointers
+  // after the call to function may result in undefined behaviors.
+  //
+  // @input g - input graph, m - graph node, n - graph node to be merged with m
+  // @return Status::OK(), if merging is successful and supported.
+  //         Returns appropriate Status error code otherwise.
+  //         Graph is updated in case nodes are merged. Otherwise, it is
+  //         not updated.
+  Status MergeNode(std::unique_ptr<Graph>* g, Node* m, Node* n);
+
+  // Helper function to merge different nodes
+  Status MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g, Node* m, Node* n);
+  Status MergeConv2DBackpropFilterWithBiasAddGrad(std::unique_ptr<Graph>* g,
+                                                  Node* m, Node* n);
+
+  // Find BiasAdd or Conv2D node that can be merged with input node 'm'.
+  // If input 'm' is BiasAdd, then check if there exists Conv2D node that can be
+  // merged with 'm'. If input 'm' is Conv2D, then check if there exists BiasAdd
+  // node that can be merged with 'm'.
+  static Node* GetConv2DOrBiasAdd(const Node* m) {
+    CHECK_NOTNULL(m);
+    Node* n = nullptr;
+
+    if (m->type_string() == csinfo_.bias_add) {
+      // If a is BiasAdd, then Conv2D is 0th input of BiasAdd.
+      TF_CHECK_OK(m->input_node(0, &n));
+    } else {
+      CHECK_EQ(m->type_string(), csinfo_.conv2d);
+      // Go over all output edges and search for BiasAdd Node.
+      // 0th input of BiasAdd is Conv2D.
+      for (const Edge* e : m->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.bias_add &&
+            e->dst_input() == 0) {
+          n = e->dst();
+          break;
+        }
+      }
+    }
+
+    if (n == nullptr) {
+      VLOG(1) << "MklLayoutRewritePass: Could not find matching "
+              << "Conv2D and BiasAdd node for merging. Input node: "
+              << m->DebugString();
+    }
+
+    return n;
+  }
+
+  // Find Conv2DBackpropFilter or BiasAddGrad node that can be merged with input
+  // node 'm'. If input 'm' is Conv2DBackpropFilter, then check if there exists
+  // BiasAddGrad node that can be merged with 'm'. If input 'm' is BiasAddGrad,
+  // then check if there exists Conv2DBackpropFilter node that can be merged
+  // with 'm'.
+  //
+  // Graph that will allow us to connect Conv2DBackpropFilter with BiasAddGrad
+  // would look like:
+  //
+  // _ = Conv2DBackpropFilter(F, _, G)
+  // _ = BiasAddGrad(G)
+  //
+  // So 1st input of BiasAddGrad connects with 3rd input of
+  // Conv2DBackpropFilter and vice versa.
+  static Node* GetConv2DBackpropFilterOrBiasAddGrad(const Node* m) {
+    CHECK_NOTNULL(m);
+    Node* n = nullptr;
+
+    if (m->type_string() == csinfo_.bias_add_grad) {
+      // Get 1st input 'g' of BiasAddGrad.
+      Node* g = nullptr;
+      TF_CHECK_OK(m->input_node(0, &g));
+      // Now traverse all outgoing edges from g that have destination node as
+      // Conv2DBackpropFilter.
+      for (const Edge* e : g->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.conv2d_grad_filter &&
+            e->dst_input() == 2 /* 3rd input of BackpropFilter */) {
+          n = e->dst();
+          break;
+        }
+      }
+    } else {
+      CHECK_EQ(m->type_string(), csinfo_.conv2d_grad_filter);
+      // Get 3rd input 'g' of Conv2DBackpropFilter.
+      Node* g = nullptr;
+      TF_CHECK_OK(m->input_node(2, &g));
+      // Now traverse all outgoing edges from g that have destination node as
+      // BiasAddGrad.
+      for (const Edge* e : g->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.bias_add_grad &&
+            e->dst_input() == 0 /* 1st input of BiasAddGrad */) {
+          n = e->dst();
+          break;
+        }
+      }
+    }
+
+    if (n == nullptr) {
+      VLOG(1) << "MklLayoutRewritePass: Could not find matching "
+              << "Conv2DBackpropFilter and BiasAddGrad node for merging. "
+              << "Input node: " << m->DebugString();
+    }
+    return n;
+  }
+
+  // Check if the node 'n' has any applicable rewrite rule
+  // We check for 2 scenarios for rewrite.
+  //
+  // @return RewriteInfo* for the applicable rewrite rule
+  const RewriteInfo* CheckForNodeRewrite(const Node* n) const;
+
+  // Default rewrite rule to be used in scenario 1 for rewrite.
+  // @return - true (since we want to always rewrite)
+  static bool AlwaysRewrite(const Node* n) {
+    return true;
+  }
+
+  // Check if we are performing pooling on depth or batch. If it is, then we
+  // do not rewrite MaxPool node to Mkl version.
+  // @return - true (if it is not a depth/batch wise pooling case);
+  //           false otherwise.
+  static bool NonDepthBatchWisePoolRewrite(const Node* n) {
+    CHECK_NOTNULL(n);
+
+    string data_format_str;
+    TensorFormat data_format;
+    std::vector<int32> ksize, strides;
+    CHECK_EQ(GetNodeAttr(n->def(), "ksize", &ksize).ok(), true);
+    CHECK_EQ(GetNodeAttr(n->def(), "strides", &strides).ok(), true);
+    CHECK_EQ(GetNodeAttr(n->def(), "data_format", &data_format_str).ok(),
+             true);
+    CHECK_EQ(FormatFromString(data_format_str, &data_format), true);
+
+    // Condition that specifies non-batch-wise and non-depth-wise pooling.
+    if (GetTensorDim(ksize,   data_format, 'N') == 1 &&
+        GetTensorDim(strides, data_format, 'N') == 1 &&
+        GetTensorDim(ksize,   data_format, 'C') == 1 &&
+        GetTensorDim(strides, data_format, 'C') == 1) {
+      return true;
+    }
+
+    return false;
+  }
+
+  static bool AddNRewrite(const Node* n) {
+    CHECK_NOTNULL(n);
+
+    int num;
+    CHECK_EQ(GetNodeAttr(n->def(), "N", &num).ok(), true);
+
+    // Condition that specifies non-batch-wise and non-depth-wise pooling.
+    if (num == 2) {
+      return true;
+    }
+
+    return false;
+  }
+
+  // Rewrites input node to a new node specified by its matching rewrite info.
+  //
+  // Method first searches matching rewrite info for input node and then
+  // uses that info to rewrite.
+  //
+  // Input node may be deleted in case of rewrite. Attempt to use the node
+  // after the call can result in undefined behaviors.
+  //
+  // @input  g - input graph, n - Node to be rewritten,
+  //         ri - matching rewriteinfo
+  // @return Status::OK(), if the input node is rewritten;
+  //         Returns appropriate Status error code otherwise.
+  //         Graph is updated in case the input node is rewritten.
+  //         Otherwise, it is not updated.
+  Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const RewriteInfo* ri);
+
+  // Get nodes that will feed a list of TF tensors to the new
+  // node that we are constructing.
+  //
+  // @input g - input graph,
+  // @input inputs - inputs to old node that we are using for constructing
+  //                 new inputs,
+  // @input input_idx - the index in the 'inputs' vector pointing to the
+  //                    current input that we have processed so far
+  // @output input_idx - index will be incremented by the number of nodes
+  //                     from 'inputs' that are processed
+  // @input list_length - The expected length of list of TF tensors
+  // @output output_nodes - the list of new nodes creating TF tensors
+  //
+  // @return None
+  void GetNodesProducingTFTensorList(
+      const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+      int* input_idx, int list_length,
+      std::vector<NodeBuilder::NodeOut>* output_nodes);
+
+  // Get nodes that will feed a list of Mkl tensors to the new
+  // node that we are constructing.
+  //
+  // @input g - input graph,
+  // @input orig_node - Original node that we are rewriting
+  // @input inputs - inputs to old node that we are using for constructing
+  //                 new inputs,
+  // @input input_idx - the index in the 'inputs' vector pointing to the
+  //                    current input that we have processed so far
+  // @output input_idx - index will be incremented by the number of nodes
+  //                     from 'inputs' that are processed
+  // @input list_length - The expected length of list of Mkl tensors
+  // @output output_nodes - the list of new nodes creating Mkl tensors
+  //
+  // @return None
+  void GetNodesProducingMklTensorList(std::unique_ptr<Graph>* g,
+    Node* orig_node, const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+    int* input_idx, int list_length,
+    std::vector<NodeBuilder::NodeOut>* output_nodes);
+
+  // Get a node that will feed an Mkl tensor to the new
+  // node that we are constructing. The output node could be (1) 'n'
+  // if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
+  // if 'n' is not an Mkl layer.
+  //
+  // @input g - input graph,
+  // @input orig_node - Original node that we are rewriting,
+  // @input n - Node based on which we are creating Mkl node,
+  // @input n_output_slot - the output slot of node 'n'
+  //            which is feeding to the node that we are constructing
+  // @output mkl_node - the new node that will feed Mkl tensor
+  // @output mkl_node_output_slot - the slot number of mkl_node that
+  //                                will feed the tensor
+  // @return None
+  void GetNodeProducingMklTensor(std::unique_ptr<Graph>* g, Node* orig_node,
+    Node* n, int n_output_slot, Node** mkl_node, int* mkl_node_output_slot);
+
+  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
+  // in graph 'g'. Original node is input in 'old_node'. Inputs to 'nb' are
+  // set up in contiguous fashion. 'workspace_tensors' carry graph nodes
+  // producing workspace edges if 'are_workspace_tensors_available' is true.
+  // Otherwise, 'workspace_tensors' is empty vector.
+  //
+  // For details, refer to 'Ordering of inputs after rewriting' section in the
+  // documentation above.
+  //
+  // Returns Status::OK() if setting up inputs is successful, otherwise
+  // returns appropriate status code.
+  int SetUpContiguousInputs(
+      std::unique_ptr<Graph>* g,
+      const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+      NodeBuilder* nb, Node* old_node,
+      std::vector<NodeBuilder::NodeOut>* workspace_tensors,
+      bool are_workspace_tensors_available);
+
+  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
+  // in graph 'g'. Original node is input in 'orig_node'.
+  //
+  // For details, refer to 'Ordering of Tensorflow tensors and Mkl tensors'
+  // section in the documentation above.
+  //
+  // Returns Status::OK() if setting up inputs is successful, otherwise
+  // returns appropriate status code.
+  Status SetUpInputs(std::unique_ptr<Graph>* g,
+                     const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+                     NodeBuilder* nb, Node* orig_node);
+
+  // Add workspace edge on the input or output side of Node 'orig_node' by using
+  // NodeBuilder 'nb' for the new node provided. If 'orig_node' does not dictate
+  // adding workspace edge then do not add it. Workspace Tensorflow and Mkl
+  // tensors, if they need to be added, will be set into these tensors.
+  // If we set workspace tensors, then are_ws_tensors_added should be true.
+  void AddWorkSpaceEdgeIfNeeded(std::unique_ptr<Graph>* g, Node* orig_node,
+                                NodeBuilder* nb,
+                                std::vector<NodeBuilder::NodeOut>* ws_tensors,
+                                bool* are_ws_tensors_added);
+
+  // Functions specific to operators to copy attributes
+  // We need operator-specific function to copy attributes because the framework
+  // does not provide any generic function for it.
+  // NOTE: names are alphabetically sorted.
+  static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
+
+  // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
+  // using node for original node 'orig_node' and return it in '*out'.
+  // TODO(nhasabni) We should move this to mkl_util.h
+  void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out,
+                             Node* orig_node);
+  void GetDummyWorkspaceTensorNode(std::unique_ptr<Graph>* g, Node** out,
+                                   Node* orig_node);
+};
+
+MklLayoutRewritePass::ConstStringsInfo MklLayoutRewritePass::csinfo_;
+
+// We register Mkl rewrite pass for phase 1 in post partitioning group.
+// We register it here so that we get a complete picture of all users of Mkl
+// nodes. Do not change the ordering of the Mkl passes.
+const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup =
+    OptimizationPassRegistry::POST_PARTITIONING;
+REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass);
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions for creating new node
+//////////////////////////////////////////////////////////////////////////
+
+static void FillInputs(const Node* n,
+                       gtl::InlinedVector<Node*, 4>* control_edges,
+                       gtl::InlinedVector<std::pair<Node*, int>, 4>* in) {
+  control_edges->clear();
+  for (const Edge* e : n->in_edges()) {
+    if (e->IsControlEdge()) {
+      control_edges->push_back(e->src());
+    } else {
+      (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output());
+    }
+  }
+  std::sort(control_edges->begin(), control_edges->end());
+  if (n->op_def().is_commutative()) {
+    // For commutative inputs, we sort the input by the input Node*
+    // to get a canonical ordering (so that add(a,b) and add(b, a) will
+    // hash to the same value if is_commutative is true for 'add').
+    std::sort(in->begin(), in->end());
+  }
+}
+
+void MklLayoutRewritePass::GetNodesProducingTFTensorList(
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, int* input_idx,
+    int list_length, std::vector<NodeBuilder::NodeOut>* output_nodes) {
+  CHECK_LT(*input_idx, inputs.size());
+  CHECK_GT(list_length, 0);
+  CHECK_NOTNULL(output_nodes);
+  output_nodes->reserve(list_length);
+
+  while (list_length != 0) {
+    CHECK_GT(list_length, 0);
+    CHECK_LT(*input_idx, inputs.size());
+    Node* n = inputs[*input_idx].first;
+    int slot = inputs[*input_idx].second;
+    // If input node 'n' is just producing a single tensor at
+    // output slot 'slot' then we just add that single node.
+    output_nodes->push_back(NodeBuilder::NodeOut(n, slot));
+    (*input_idx)++;
+    list_length--;
+  }
+}
+
+// TODO(nhasabni) We should move this to mkl_util.h.
+void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr<Graph>* g,
+                                                 Node** out, Node* orig_node) {
+  // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent
+  // dummy Mkl tensor. 8 = 2*size_t.
+  const DataType dt = DataTypeToEnum<uint8>::v();
+  TensorProto proto;
+  proto.set_dtype(dt);
+  uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+  proto.set_tensor_content(const_cast<const void*>(static_cast<void*>(&zero)),
+                           8);
+  TensorShape dummy_shape({8});
+  dummy_shape.AsProto(proto.mutable_tensor_shape());
+  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+               .Attr("value", proto)
+               .Attr("dtype", dt)
+               .Device(orig_node->def().device())  // We place this node on
+                                                   // the same device as the
+                                                   // device of the original
+                                                   // node.
+               .Finalize(&**g, out));
+
+  // If number of inputs to the original node is > 0, then we add
+  // control dependency between 1st input (index 0) of the original node and
+  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
+  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
+  // rewritten node. Adding control edge between 1st input of the original node
+  // and the dummy Mkl node ensures that the dummy node is in the same frame
+  // as the original node. Choosing 1st input is not necessary - any input of
+  // the original node is fine because all the inputs of a node are always in
+  // the same frame.
+  if (orig_node->num_inputs() > 0) {
+    Node* orig_input0 = nullptr;
+    TF_CHECK_OK(orig_node->input_node(0,
+                                      const_cast<const Node**>(&orig_input0)));
+    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
+  }
+
+  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
+}
+
+void MklLayoutRewritePass::GetNodesProducingMklTensorList(
+    std::unique_ptr<Graph>* g,
+    Node* orig_node,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+    int* input_idx, int list_length,
+    std::vector<NodeBuilder::NodeOut>* output_nodes) {
+  CHECK_LT(*input_idx, inputs.size());
+  CHECK_GT(list_length, 0);
+  CHECK_NOTNULL(output_nodes);
+  output_nodes->reserve(list_length);
+
+  while (list_length != 0) {
+    CHECK_GT(list_length, 0);
+    CHECK_LT(*input_idx, inputs.size());
+    Node* n = inputs[*input_idx].first;
+    int slot = inputs[*input_idx].second;
+    // If 'n' is producing a single tensor, then create a single Mkl tensor
+    // node.
+    Node* mkl_node = nullptr;
+    int mkl_node_output_slot = 0;
+    GetNodeProducingMklTensor(g, orig_node, n, slot, &mkl_node,
+                              &mkl_node_output_slot);
+    output_nodes->push_back(NodeBuilder::NodeOut(mkl_node,
+                                                mkl_node_output_slot));
+    (*input_idx)++;
+    list_length--;
+  }
+}
+
+// Get an input node that will feed Mkl tensor to the new
+// node that we are constructing. An input node could be (1) 'n'
+// if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
+// if 'n' is not an Mkl layer.
+void MklLayoutRewritePass::GetNodeProducingMklTensor(std::unique_ptr<Graph>* g,
+    Node* orig_node, Node* n,
+    int n_output_slot, Node** mkl_node, int* mkl_node_output_slot) {
+  CHECK_NOTNULL(n);
+  CHECK_NOTNULL(mkl_node);
+  CHECK_NOTNULL(mkl_node_output_slot);
+
+  // If this is an MKL op, then it will create extra output for MKL layout.
+  DataType T;
+  if (GetNodeAttr(n->def(), "T", &T).ok() &&
+      mkl_op_registry::IsMklOp(n->type_string(), T)) {
+    // If this is an MKL op, then it will generate an edge that will receive
+    // Mkl tensor from a node.
+    // output slot number for Mkl tensor would be N+slot number of TensorFlow
+    // tensor, where N is total number of TensorFlow tensors.
+    *mkl_node = n;
+    *mkl_node_output_slot =
+        GetTensorMetaDataIndex(n_output_slot, n->num_outputs());
+  } else {
+    // If we have not visited the node and rewritten it, then we need
+    // to create a dummy node that will feed a dummy Mkl tensor to this node.
+    // DummyMklTensor node has no input and generates only 1 output
+    // (dummy Mkl tensor) as output slot number 0.
+    GetDummyMklTensorNode(g, mkl_node, orig_node);
+    CHECK_NOTNULL(*mkl_node);
+    *mkl_node_output_slot = 0;
+  }
+}
+
+int MklLayoutRewritePass::SetUpContiguousInputs(
+    std::unique_ptr<Graph>* g,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+    NodeBuilder* nb, Node* old_node,
+    std::vector<NodeBuilder::NodeOut>* workspace_tensors,
+    bool are_workspace_tensors_available) {
+  CHECK_NOTNULL(workspace_tensors);
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+
+  // TODO(nhasabni): Temporary solution to connect filter input of
+  // BackpropInput with the converted filter from Conv2D.
+  bool do_connect_conv2d_backprop_input_filter = false;
+  Node* conv2d_node = nullptr;
+  // Filter node is 2nd input (slot index 1) of Conv2D.
+  int kConv2DFilterInputSlotIdx = 1;
+  int kConv2DBackpropInputFilterInputSlotIdx = 1;
+  int kConv2DFilterOutputSlotIdx = 1;
+  if (old_node->type_string() == csinfo_.conv2d_grad_input) {
+    // We need to find Conv2D node from Conv2DBackpropInput.
+    // For that let's first find filter node that is 2nd input (slot 1)
+    // of BackpropInput.
+    Node* filter_node = nullptr;
+    old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, &filter_node);
+    CHECK_NOTNULL(filter_node);
+
+    // Now check which nodes receive from filter_node. Filter feeds as
+    // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias.
+    for (const Edge* e : filter_node->out_edges()) {
+      if ((e->dst()->type_string() == csinfo_.mkl_conv2d ||
+           e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias) &&
+          e->dst_input() == kConv2DFilterInputSlotIdx
+          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
+        if (conv2d_node != nullptr) {
+          VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
+                  << " feeding multiple Conv2D nodes: "
+                  << filter_node->DebugString();
+          // We will not connect filter input of Conv2DBackpropInput
+          // to be safe here.
+          do_connect_conv2d_backprop_input_filter = false;
+          break;
+        } else {
+          conv2d_node = e->dst();
+          do_connect_conv2d_backprop_input_filter = true;
+        }
+      }
+    }
+  }
+
+  // Number of input slots to original op
+  // Input slots are represented by .Input() calls in REGISTER_OP.
+  int old_node_input_slots = old_node->op_def().input_arg_size();
+  // Actual number of inputs can be greater than or equal to number
+  // of Input slots because inputs of type list could be unfolded.
+  CHECK_GE(old_node_inputs.size(), old_node_input_slots);
+  int nn_slot_idx = 0;  // slot index for inputs of new node
+
+  // Let's copy all inputs (TF tensors) of original node to new node.
+  int iidx = 0;
+  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
+    // An input slot could be a single tensor or a list. We need
+    // to handle this case accordingly.
+    CHECK_LT(iidx, old_node_inputs.size());
+    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
+    if (ArgIsList(arg)) {
+      std::vector<NodeBuilder::NodeOut> new_node_inputs;
+      int N = GetTensorListLength(arg, old_node);
+      GetNodesProducingTFTensorList(old_node_inputs, &iidx, N,
+                                    &new_node_inputs);
+      nb->Input(new_node_inputs);
+      nn_slot_idx++;
+    } else {
+      // Special case for connecting filter input of Conv2DBackpropInput
+      if (do_connect_conv2d_backprop_input_filter &&
+          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
+        nb->Input(conv2d_node, kConv2DFilterOutputSlotIdx);
+      } else {
+        nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second);
+      }
+      iidx++;
+      nn_slot_idx++;
+    }
+  }
+
+  // If workspace tensors are available for this op and we are using
+  // contiguous ordering then we need to add Tensorflow tensor for
+  // workspace here because Tensorflow tensor for workspace is the
+  // last tensor in the list of Tensorflow tensors.
+  if (are_workspace_tensors_available) {
+    CHECK_EQ(workspace_tensors->size(), 2);
+    // Tensorflow tensor
+    nb->Input((*workspace_tensors)[0].node, (*workspace_tensors)[0].index);
+    nn_slot_idx++;
+  }
+
+  // Let's now setup all Mkl inputs to a new node.
+  // Number of Mkl inputs must be same as number of TF inputs.
+  iidx = 0;
+  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
+    // An input slot could be a single tensor or a list. We need
+    // to handle this case accordingly.
+    CHECK_LT(iidx, old_node_inputs.size());
+    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
+    if (ArgIsList(arg)) {
+      std::vector<NodeBuilder::NodeOut> new_node_inputs;
+      int N = GetTensorListLength(arg, old_node);
+      GetNodesProducingMklTensorList(g, old_node, old_node_inputs, &iidx,
+                                     N, &new_node_inputs);
+      nb->Input(new_node_inputs);
+      nn_slot_idx++;
+    } else {
+      Node* mkl_node = nullptr;
+      int mkl_node_output_slot = 0;
+      // Special case for connecting filter input of Conv2DBackpropInput
+      if (do_connect_conv2d_backprop_input_filter &&
+          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
+        GetNodeProducingMklTensor(g, old_node, conv2d_node,
+                                  kConv2DFilterOutputSlotIdx, &mkl_node,
+                                  &mkl_node_output_slot);
+      } else {
+        GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first,
+                                  old_node_inputs[iidx].second, &mkl_node,
+                                  &mkl_node_output_slot);
+      }
+      nb->Input(mkl_node, mkl_node_output_slot);
+      iidx++;
+      nn_slot_idx++;
+    }
+  }
+
+  // If workspace tensors are available for this op and we are using
+  // contiguous ordering then we need to add Mkl tensor for
+  // workspace here because Mkl tensor for workspace is the
+  // last tensor in the list of Mkl tensors.
+  if (are_workspace_tensors_available) {
+    CHECK_EQ(workspace_tensors->size(), 2);
+    // Mkl tensor
+    nb->Input((*workspace_tensors)[1].node, (*workspace_tensors)[1].index);
+    nn_slot_idx++;
+  }
+
+  return nn_slot_idx;
+}
+
+Status MklLayoutRewritePass::SetUpInputs(
+    std::unique_ptr<Graph>* g,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+    NodeBuilder* nb, Node* old_node) {
+  // Let's check if we need to add workspace tensors for this node.
+  // We add workspace edge only for MaxPool, LRN and BatchNorm.
+  std::vector<NodeBuilder::NodeOut> workspace_tensors;
+  bool are_workspace_tensors_available = false;
+  AddWorkSpaceEdgeIfNeeded(g, old_node, nb, &workspace_tensors,
+                           &are_workspace_tensors_available);
+
+  int new_node_input_slots = 0;
+  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
+    // TODO(nhasabni): implement this function just for same of completion.
+    // We do not use interleaved ordering right now.
+    return Status(
+        error::Code::UNIMPLEMENTED,
+        "Interleaved ordering of tensors is currently not supported.");
+  } else {
+    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+    new_node_input_slots = SetUpContiguousInputs(
+        g, old_node_inputs, nb, old_node, &workspace_tensors,
+        are_workspace_tensors_available);
+  }
+
+  // Sanity check
+  int old_node_input_slots = old_node->op_def().input_arg_size();
+  if (!are_workspace_tensors_available) {
+    // If we are not adding workspace tensors for this op, then the total
+    // number of input slots to the new node _must_ be 2 times the number
+    // of input slots to the original node: N original Tensorflow tensors and
+    // N for Mkl tensors corresponding to each Tensorflow tensors.
+    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2);
+  } else {
+    // If we are adding workspace tensors for this op, then the total
+    // The total number of input slots to new node _must_ be 2 times the number
+    // of input slots to the original node: N original Tensorflow tensors and
+    // N for Mkl tensors corresponding to each Tensorflow tensors plus 2
+    // (for workspace Tensorflow tensor and workspace Mkl tensor).
+    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2 + 2);
+  }
+
+  return Status::OK();
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions related to workspace pass
+//////////////////////////////////////////////////////////////////////////
+
+// TODO(nhasabni) We should move this to mkl_util.h.
+void MklLayoutRewritePass::GetDummyWorkspaceTensorNode(
+    std::unique_ptr<Graph>* g, Node** out, Node* orig_node) {
+  // We use a tensor of shape {1} and value 0 to represent
+  // dummy float tensor. We need this as a dummy workspace tensor.
+  // Workspace tensor has type float.
+  const DataType dt = DataTypeToEnum<float>::v();
+  TensorProto proto;
+  proto.set_dtype(dt);
+  float zero[1] = {0};
+  proto.set_tensor_content(const_cast<const void*>(static_cast<void*>(&zero)),
+                           4);
+  TensorShape dummy_shape({1});
+  dummy_shape.AsProto(proto.mutable_tensor_shape());
+  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+                .Attr("value", proto)
+                .Attr("dtype", dt)
+                .Device(orig_node->def().device())  // We place this node on
+                                                    // same the device as the
+                                                    // device of the original
+                                                    // node.
+                .Finalize(&**g, out));
+
+  // If number of inputs to the original node is > 0, then we add
+  // control dependency between 1st input (index 0) of the original node and
+  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
+  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
+  // rewritten node. Adding control edge between 1st input of the original node
+  // and the dummy Mkl node ensures that the dummy node is in the same frame
+  // as the original node. Choosing 1st input is not necessary - any input of
+  // the original node is fine because all the inputs of a node are always in
+  // the same frame.
+  if (orig_node->num_inputs() > 0) {
+    Node* orig_input0 = nullptr;
+    TF_CHECK_OK(orig_node->input_node(0,
+                                      const_cast<const Node**>(&orig_input0)));
+    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
+  }
+
+  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
+}
+
+void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded(
+    std::unique_ptr<Graph>* g, Node* orig_node, NodeBuilder* nb,
+    std::vector<NodeBuilder::NodeOut>* ws_tensors, bool* are_ws_tensors_added) {
+  bool workspace_edge_added = false;  // Default initializer
+  CHECK_NOTNULL(are_ws_tensors_added);
+  *are_ws_tensors_added = false;  // Default initializer
+
+  DataType T;
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  for (auto ws : wsinfo_) {
+    if (orig_node->type_string() == ws.fwd_op &&
+        mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+          orig_node->type_string()), T)) {
+      // If this op is a fwd op, then we need to check if there is an
+      // edge from this node's fwd_slot to bwdop's bwd_slot. If there is
+      // an edge, then we just add an attribute on this node for setting
+      // workspace_passed to true. We don't add actual workspace edge
+      // in this node. Actual workspace edge gets added in the backward
+      // op for this node.
+      for (const Edge* e : orig_node->out_edges()) {
+        if (e->src_output() == ws.fwd_slot &&
+            e->dst()->type_string() == ws.bwd_op &&
+            e->dst_input() == ws.bwd_slot) {
+          nb->Attr("workspace_enabled", true);
+          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
+                  << orig_node->type_string();
+          workspace_edge_added = true;
+          // We found the edge that we were looking for, so break.
+          break;
+        }
+      }
+
+      if (!workspace_edge_added) {
+        // If we are here, then we did not find backward operator for this
+        // node.
+        nb->Attr("workspace_enabled", false);
+      }
+    } else if (orig_node->type_string() == ws.bwd_op &&
+               mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+                                          orig_node->type_string()), T)) {
+      // If this op is a bwd op, then we need to add workspace edge and
+      // it's Mkl tensor edge between its corresponding fwd op and this
+      // op. Corresponding fwd op is specified in 'fwd_op' field of
+      // workspace info. fwd_slot and bwd_slot in workspace info specify
+      // an edge between which slots connect forward and backward op.
+      // Once all these criteria match, we add a workspace edge between
+      // ws_fwd_slot and ws_bwd_slot. Its corresponding Mkl tensor is
+      // determined by interleaved/contiguous ordering. Function
+      // DataIndexToMetaDataIndex tells us the location of Mkl tensor
+      // from the location of the Tensorflow tensor.
+      for (const Edge* e : orig_node->in_edges()) {
+        if (e->src_output() == ws.fwd_slot &&
+            // We would have rewritten the forward op, so we need to use
+            // GetMklOpName call to get its Mkl name.
+            e->src()->type_string() == mkl_op_registry::GetMklOpName(
+                                                          ws.fwd_op) &&
+            e->dst_input() == ws.bwd_slot) {
+          nb->Attr("workspace_enabled", true);
+          CHECK_NOTNULL(ws_tensors);
+          // Add workspace edge between fwd op and bwd op.
+          ws_tensors->push_back(NodeBuilder::NodeOut(e->src(), ws.ws_fwd_slot));
+          // Add Mkl tensor edge for workspace edge between fwd op and bwd op.
+          ws_tensors->push_back(NodeBuilder::NodeOut(
+              e->src(), DataIndexToMetaDataIndex(ws.ws_fwd_slot,
+                                                 e->src()->num_outputs())));
+          *are_ws_tensors_added = true;
+          // In terms of input ordering, we add these calls to add Input
+          // here because workspace edge (and its Mkl tensor) is the last
+          // edge in the fwdop and bwdop. So all inputs before workspace
+          // tensor have been added by SetUpInputs function.
+          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
+                  << orig_node->type_string();
+          workspace_edge_added = true;
+          // We found the edge that we were looking for, so break.
+          break;
+        }
+      }
+
+      // If we are here means we did not find fwd op that feeds to this
+      // bwd op. So in this case, we need to generate dummy tensors for
+      // workspace input and Mkl tensor for workspace, and set
+      // workspace_enabled to false.
+      if (!workspace_edge_added) {
+        nb->Attr("workspace_enabled", false);
+        Node* dmt_ws = nullptr;      // Dummy tensor for workspace
+        Node* dmt_mkl_ws = nullptr;  // Dummy Mkl tensor for workspace
+        GetDummyWorkspaceTensorNode(g, &dmt_ws, orig_node);
+        GetDummyMklTensorNode(g, &dmt_mkl_ws, orig_node);
+        CHECK_NOTNULL(dmt_ws);
+        CHECK_NOTNULL(dmt_mkl_ws);
+        CHECK_NOTNULL(ws_tensors);
+        // We add dummy tensor as workspace tensor.
+        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_ws, 0));
+        // We add dummy tensor as Mkl tensor for workspace tensor.
+        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_mkl_ws, 0));
+        *are_ws_tensors_added = true;
+        VLOG(1) << "MklLayoutRewritePass: dummy workspace_enabled for "
+                << orig_node->type_string();
+      }
+    } else {
+      // If this node does not match any workspace info, then we do not
+      // do anything special for workspace propagation for it.
+    }
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Op-specific functions to copy attributes from old node to new node
+//////////////////////////////////////////////////////////////////////////
+
+void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  string padding;
+  std::vector<int32> strides;
+  bool use_cudnn_on_gpu;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+  TF_CHECK_OK(
+      GetNodeAttr(orig_node->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("strides", strides);
+  nb->Attr("padding", padding);
+  nb->Attr("data_format", data_format);
+  nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu);
+}
+
+void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node,
+                                         NodeBuilder* nb) {
+  DataType T;
+  int N;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+}
+
+void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node,
+                                                NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  std::vector<int32> strides;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("strides", strides);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node,
+                                        NodeBuilder* nb) {
+  DataType T;
+  int depth_radius;
+  float bias;
+  float alpha;
+  float beta;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "depth_radius", &depth_radius));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "bias", &bias));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "alpha", &alpha));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "beta", &beta));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("depth_radius", depth_radius);
+  nb->Attr("bias", bias);
+  nb->Attr("alpha", alpha);
+  nb->Attr("beta", beta);
+}
+
+void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node,
+                                            NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  string padding;
+  std::vector<int32> ksize, strides;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "ksize", &ksize));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("ksize", ksize);
+  nb->Attr("strides", strides);
+  nb->Attr("padding", padding);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node,
+                                             NodeBuilder* nb) {
+  DataType T;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+}
+
+void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  DataType Tshape;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tshape", &Tshape));
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("Tshape", Tshape);
+}
+
+void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node,
+                                          NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  int num_split;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "num_split", &num_split));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("num_split", num_split);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsConcat(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  int N;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+}
+
+void MklLayoutRewritePass::CopyAttrsConcatV2(const Node* orig_node,
+                                             NodeBuilder* nb) {
+  DataType T;
+  int N;
+  DataType tidx;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tidx", &tidx));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+  nb->Attr("Tidx", tidx);
+}
+
+void MklLayoutRewritePass::CopyAttrsFusedBatchNorm(const Node* orig_node,
+                                                   NodeBuilder* nb) {
+  DataType T;
+  float epsilon;
+  string data_format;
+  bool is_training;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "epsilon", &epsilon));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "is_training", &is_training));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("epsilon", epsilon);
+  nb->Attr("data_format", data_format);
+  nb->Attr("is_training", is_training);
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions related to node merge pass
+//////////////////////////////////////////////////////////////////////////
+
+Node* MklLayoutRewritePass::CheckForNodeMerge(const Node* a) const {
+  // TODO(nhasabni) Add check for type of node similar to CheckForNodeRewrite
+  // once we support BiasAddGrad as Mkl layer.
+
+  // Search for all matching mergeinfo.
+  // We allow more than one match for extensibility.
+  std::vector<const MergeInfo*> matching_mi;
+  for (auto mi = minfo_.cbegin(); mi != minfo_.cend(); ++mi) {
+    if (a->type_string() == mi->op1 || a->type_string() == mi->op2) {
+      matching_mi.push_back(&*mi);
+    }
+  }
+
+  for (const MergeInfo* mi : matching_mi) {
+    // Get the operand with which 'a' can be merged.
+    Node* b = nullptr;
+    if ((b = mi->get_node_to_be_merged(a)) == nullptr) {
+      continue;
+    }
+
+    // Get the control edges and input of node
+    const int N_in = a->num_inputs();
+    gtl::InlinedVector<Node*, 4> a_control_edges;
+    gtl::InlinedVector<std::pair<Node*, int>, 4> a_in(N_in);
+    FillInputs(a, &a_control_edges, &a_in);
+
+    const int B_in = b->num_inputs();
+    gtl::InlinedVector<Node*, 4> b_control_edges;
+    gtl::InlinedVector<std::pair<Node*, int>, 4> b_in(B_in);
+    FillInputs(b, &b_control_edges, &b_in);
+
+    // Shouldn't merge if a and b have different control edges.
+    if (a_control_edges != b_control_edges) {
+      continue;
+    } else {
+      // We found a match.
+      return b;
+    }
+  }
+
+  return nullptr;
+}
+
+Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g,
+                                                    Node* m, Node* n) {
+  CHECK_EQ(((m->type_string() == csinfo_.bias_add &&
+             n->type_string() == csinfo_.conv2d)) ||
+           ((n->type_string() == csinfo_.bias_add &&
+             m->type_string() == csinfo_.conv2d)), true);
+
+  // If 'm' is BiasAdd, then 'n' is Conv2D. Since Conv2D feeds BiasAdd,
+  // BiasAdd is successor node, and Conv2D predecessor node.
+  Node* pred = m->type_string() == csinfo_.bias_add ? n : m;
+  Node* succ = m->type_string() == csinfo_.bias_add ? m : n;
+
+  // 1. Get all attributes from input nodes.
+  DataType T_pred, T_succ;
+  string padding;
+  std::vector<int32> strides;
+  string data_format_pred, data_format_succ;
+  bool use_cudnn_on_gnu;
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred));
+  TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred));
+  TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ));
+  TF_CHECK_OK(
+      GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu));
+  // We check to ensure that data formats of both succ and pred are same.
+  // We expect them to be same, so we can enforce this as assert.
+  // But assert can be too strict, so we enforce this as a check.
+  // If the check fails, then we do not merge two nodes.
+  // We also do same check for devices.
+  if (data_format_pred != data_format_succ || T_pred != T_succ ||
+      pred->assigned_device_name() != succ->assigned_device_name() ||
+      pred->def().device() != succ->def().device()) {
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "data_format or T attribute or devices of Conv2D and "
+                  "BiasAdd do not match. Will skip node merge optimization");
+  }
+
+  const int succ_num = succ->num_inputs();
+  gtl::InlinedVector<Node*, 4> succ_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> succ_in(succ_num);
+  FillInputs(succ, &succ_control_edges, &succ_in);
+
+  const int pred_num = pred->num_inputs();
+  gtl::InlinedVector<Node*, 4> pred_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> pred_in(pred_num);
+  FillInputs(pred, &pred_control_edges, &pred_in);
+
+  // We need to ensure that Conv2D only feeds to BiasAdd (some other operator is
+  // not expecting output of Conv2D). If this is not the case, then we cannot
+  // merge Conv2D with BiasAdd.
+  const int kFirstOutputSlot = 0;
+  for (const Edge* e : pred->out_edges()) {
+    if (e->src_output() == kFirstOutputSlot && e->dst() != succ) {
+      return Status(error::Code::INVALID_ARGUMENT,
+                    "Conv2D does not feed to BiasAdd, or "
+                    "it feeds BiasAdd but has multiple outputs. "
+                    "Will skip node merge optimization");
+    }
+  }
+
+  // 2. Get inputs from both the nodes.
+  // Find the 2 inputs from the conv and the bias from the add Bias.
+  // Get operand 0, 1 of conv2D.
+  CHECK_EQ(pred->in_edges().size(), 2);  // Conv2D must have 2 inputs.
+  // Get operand 1 of add_bias
+  // BiasAdd must have 2 inputs: Conv, bias
+  CHECK_EQ(succ->in_edges().size(), 2);
+
+  // We will use the node name of BiasAdd as the name of new node
+  // Build new node. We use same name as original node, but change the op
+  // name.
+  NodeBuilder nb(succ->name(), csinfo_.conv2d_with_bias);
+  nb.Input(pred_in[0].first, pred_in[0].second);  // In1 of Conv2D
+  // pred_in[1] will be 2nd Tensorflow tensor for Conv2D.
+  nb.Input(pred_in[1].first, pred_in[1].second);  // In2 of Conv2D
+  // In1 of BiasAdd is same as output of Conv2D.
+  nb.Input(succ_in[1].first, succ_in[1].second);  // In2 of BiasAdd
+
+  // Copy attributes from Conv2D to Conv2DWithBias.
+  CopyAttrsConv2D(const_cast<const Node*>(pred), &nb);
+
+  // Copy the device assigned to old node to new node.
+  nb.Device(succ->def().device());
+
+  // Create node.
+  Node* new_node;
+  nb.Finalize(&**g, &new_node);
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from 'pred' node and 'succ' node to new 'new_node'
+  // node are already copied in BuildNode. We handle control edges now.
+  for (const Edge* e : pred->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+  for (const Edge* e : succ->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+
+  // Incoming edges are fixed, we will fix the outgoing edges now.
+  // First, we will fix outgoing control edges from 'pred' node.
+  for (const Edge* e : pred->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    }
+  }
+
+  // Second, we will fix outgoing control and data edges from 'succ' node.
+  for (const Edge* e : succ->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      // BiasAdd has only 1 output (at slot 0) and merged node also has only 1
+      // output (at slot 0).
+      const int kConv2DWithBiasOutputSlot = 0;
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kConv2DWithBiasOutputSlot,
+                                    e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy device assigned to old node to new node.
+  // It's ok to use pred or succ as we have enforced a check that
+  // both have same device assigned.
+  new_node->set_assigned_device_name(pred->assigned_device_name());
+
+  VLOG(1) << "MklLayoutRewritePass: Merged old node:" << pred->DebugString()
+          << ", and node: " << succ->DebugString()
+          << ", into node:" << new_node->DebugString();
+
+  (*g)->RemoveNode(succ);
+  (*g)->RemoveNode(pred);
+
+  return Status::OK();
+}
+
+Status MklLayoutRewritePass::MergeConv2DBackpropFilterWithBiasAddGrad(
+    std::unique_ptr<Graph>* g, Node* m, Node* n) {
+  CHECK_EQ(((m->type_string() == csinfo_.bias_add_grad &&
+             n->type_string() == csinfo_.conv2d_grad_filter)) ||
+           ((n->type_string() == csinfo_.bias_add_grad &&
+             m->type_string() == csinfo_.conv2d_grad_filter)), true);
+
+  // If 'm' is BiasAddGrad, then 'n' is BackpropFilter.
+  Node* badd = m->type_string() == csinfo_.bias_add_grad ? m : n;
+  Node* fltr = m->type_string() == csinfo_.bias_add_grad ? n : m;
+
+  // Sanity check for attributes from input nodes.
+  DataType T_b, T_f;
+  string data_format_b, data_format_f;
+  TF_CHECK_OK(GetNodeAttr(badd->def(), "T", &T_b));
+  TF_CHECK_OK(GetNodeAttr(fltr->def(), "T", &T_f));
+  TF_CHECK_OK(GetNodeAttr(badd->def(), "data_format", &data_format_b));
+  TF_CHECK_OK(GetNodeAttr(fltr->def(), "data_format", &data_format_f));
+  if (data_format_b != data_format_f || T_b != T_f ||
+      badd->assigned_device_name() != fltr->assigned_device_name() ||
+      badd->def().device() != fltr->def().device()) {
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "data_format or T attribute or devices of "
+                  "Conv2DBackpropFilter and BiasAddGrad do not match. "
+                  "Will skip node merge optimization");
+  }
+
+  // We will use the node name of Conv2DBackpropFilter as the name of new node.
+  // This is because BackpropFilterWithBias is going to emit bias output also.
+  NodeBuilder nb(fltr->name(), csinfo_.conv2d_grad_filter_with_bias);
+  // Since Conv2DBackpropFilterWithBias has same number of inputs as
+  // Conv2DBackpropFilter, we can just copy input edges directly. We dont need
+  // to copy any data input of BiasAddGrad because that input also goes to
+  // Conv2DBackpropFilter.
+  const int fltr_ins = fltr->num_inputs();
+  gtl::InlinedVector<Node*, 4> fltr_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> fltr_in_edges(fltr_ins);
+  FillInputs(fltr, &fltr_control_edges, &fltr_in_edges);
+  for (int idx = 0; idx < fltr_ins; idx++) {
+    nb.Input(fltr_in_edges[idx].first, fltr_in_edges[idx].second);
+  }
+
+  // Copy attributes from Conv2DBackpropFilter.
+  CopyAttrsConv2D(const_cast<const Node*>(fltr), &nb);
+
+  // Copy the device assigned to old node to new node.
+  nb.Device(fltr->def().device());
+
+  // Create node.
+  Node* new_node;
+  nb.Finalize(&**g, &new_node);
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from BiasAddGrad node and Conv2DBackpropFilter node to
+  // new 'new_node' node are already copied in BuildNode. We handle control
+  // edges now.
+  for (const Edge* e : badd->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+  for (const Edge* e : fltr->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+
+  // Incoming edges are fixed, we will fix the outgoing edges now.
+  // First, we will fix outgoing control edges from 'badd' node.
+  // Conv2DBackpropFilter has 1 output -- filter_grad.
+  // Conv2DBackpropFilterWithBias has 2 outputs -- filter_grad and
+  // bias_grad. But filter_grad is at same slot number (0) in both the
+  // nodes. bias_grad is at slot number 1 in Conv2DBackpropFilterWithBias, while
+  // it is at slot number 0 in BiasAddGrad.
+  const int kMergedNodeFilterGradOutputIdx = 0;
+  const int kMergedNodeBiasGradOutputIdx = 1;
+
+  for (const Edge* e : badd->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeBiasGradOutputIdx,
+                                  e->dst(), e->dst_input()));
+    }
+  }
+
+  // Second, we will fix outgoing control and data edges from 'fltr' node.
+  for (const Edge* e : fltr->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeFilterGradOutputIdx,
+                                  e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy device assigned to old node to new node.
+  // It's ok to use badd or fltr as we have enforced a check that
+  // both have same device assigned.
+  new_node->set_assigned_device_name(badd->assigned_device_name());
+
+  VLOG(1) << "MklLayoutRewritePass: Merged old node:" << badd->DebugString()
+          << ", and node: " << fltr->DebugString()
+          << ", into node:" << new_node->DebugString();
+
+  (*g)->RemoveNode(badd);
+  (*g)->RemoveNode(fltr);
+
+  return Status::OK();
+}
+
+Status MklLayoutRewritePass::MergeNode(std::unique_ptr<Graph>* g, Node* m,
+                                       Node* n) {
+  CHECK_NOTNULL(m);
+  CHECK_NOTNULL(n);
+
+  if (((m->type_string() == csinfo_.bias_add &&
+        n->type_string() == csinfo_.conv2d)) ||
+      ((n->type_string() == csinfo_.bias_add &&
+        m->type_string() == csinfo_.conv2d))) {
+    return this->MergeConv2DWithBiasAdd(g, m, n);
+  }
+
+  if (((m->type_string() == csinfo_.bias_add_grad &&
+        n->type_string() == csinfo_.conv2d_grad_filter)) ||
+      ((n->type_string() == csinfo_.bias_add_grad &&
+        m->type_string() == csinfo_.conv2d_grad_filter))) {
+    return this->MergeConv2DBackpropFilterWithBiasAddGrad(g, m, n);
+  }
+
+  return Status(error::Code::UNIMPLEMENTED,
+                "Unimplemented case for node merge optimization.");
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions for node rewrite
+//////////////////////////////////////////////////////////////////////////
+
+Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
+                                         Node* orig_node,
+                                         const RewriteInfo* ri) {
+  CHECK_NOTNULL(ri);
+  CHECK_NOTNULL(orig_node);
+
+  VLOG(1) << "MklLayoutRewritePass: Original node:" << orig_node->DebugString();
+
+  // Get all inputs.
+  int num_inputs = orig_node->in_edges().size();
+
+  // Drop count for control edges from inputs
+  for (const Edge* e : orig_node->in_edges()) {
+    if (e->IsControlEdge()) {
+      num_inputs--;
+    }
+  }
+
+  gtl::InlinedVector<Node*, 4> control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num_inputs);
+  FillInputs(orig_node, &control_edges, &inputs);
+
+  // Build new node. We use same name as original node, but change the op name.
+  NodeBuilder nb(orig_node->name().c_str(), ri->new_name.c_str());
+  // Copy user-specified device assigned to original node to new node.
+  nb.Device(orig_node->def().device());
+  // Set up new inputs to the rewritten node.
+  Status s = SetUpInputs(g, inputs, &nb, orig_node);
+  if (s != Status::OK()) {
+    return s;
+  }
+
+  ri->copy_attrs(const_cast<const Node*>(orig_node), &nb);
+  // Set the Mkl layer label for this op.
+  nb.Attr("_kernel", mkl_op_registry::kMklOpLabel);
+
+  // Finalize graph and get new node.
+  Node* new_node = nullptr;
+  TF_CHECK_OK(nb.Finalize(&**g, &new_node));
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from 'orig_node' node to new 'new_node' node are
+  // already copied in BuildNode. We need to handle control edges now.
+  for (const Edge* e : orig_node->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+
+  // Copy outgoing edges from 'orig_node' node to new
+  // 'new_node' node, since the output also follows same ordering among
+  // Tensorflow tensors and Mkl tensors. We need to connect Tensorflow
+  // tensors appropriately. Specifically, nth output of the original node
+  // will become 2*nth output of the Mkl node for the interleaved ordering
+  // of the tensors. For the contiguous ordering of the tensors, it will be n.
+  // GetTensorDataIndex provides this mapping function.
+  for (const Edge* e : orig_node->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, GetTensorDataIndex(e->src_output(),
+                            e->src()->num_outputs()),
+                    e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy the runtime device assigned from original code to new node.
+  new_node->set_assigned_device_name(orig_node->assigned_device_name());
+
+  // Delete original node and mark new node as rewritten.
+  (*g)->RemoveNode(orig_node);
+
+  VLOG(1) << "MklLayoutRewritePass: New node:" << new_node->DebugString();
+  return Status::OK();
+}
+
+const MklLayoutRewritePass::RewriteInfo*
+MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
+  CHECK_NOTNULL(n);
+
+  // First check if node along with its type is supported by MKL layer.
+  // We do not want to rewrite an op into Mkl op if types are not supported.
+  // E.g., MklRelu does not support INT32. So we cannot rewrite Relu to
+  // MklRelu if type is INT32.
+  DataType T;
+  if (!GetNodeAttr(n->def(), "T", &T).ok()) {
+    return nullptr;
+  }
+
+  // We make an exception for __MklDummyConv2DWithBias and
+  // __MklConv2DBackpropFilterWithBias since their names do not match Mkl node
+  // names.
+  if (n->type_string() != csinfo_.conv2d_with_bias &&
+      n->type_string() != csinfo_.conv2d_grad_filter_with_bias &&
+      !mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+                                        n->type_string()), T)) {
+      return nullptr;
+  }
+
+  // For elementwise node, we reuse the Eigen implementation and pass the MKL
+  // metadata tensor through so we can avoid conversions. However, if all
+  // incoming edges are in TF format, we don't need all this overhead, so
+  // replace the elementwise node only if at least one of its parents is a MKL
+  // node.
+  //
+  // Identity nodes can also skip replacement if they are not being served by
+  // any MKL nodes.
+  //
+  // TODO(vrane): Add implementation for element-wise ops that doesn't reuse
+  // eigen code to reduce cross-library dependency.
+  VLOG(1) << "ELEMENTWISE: checking op: " << n->type_string();
+  if (mkl_op_registry::IsMklElementWiseOp(
+        mkl_op_registry::GetMklOpName(n->type_string()), T) ||
+      n->type_string().find("Identity") != string::npos) {
+    VLOG(1) << "ELEMENTWISE: op is elementwise: " << n->type_string();
+    bool incoming_mkl_edge = false;
+    int num_parent = 0;
+    for (auto parent : n->in_edges()) {
+      if (mkl_op_registry::IsMklOp(parent->src()->type_string(), T)) {
+        VLOG(1) << "ELEMENTWISE: parent " << num_parent++ << " is MKL op: "
+                << parent->src()->type_string();
+        incoming_mkl_edge = true;
+        break;
+      } else {
+        VLOG(1) << "ELEMENTWISE: parent " << num_parent++ << " is NON-MKL op: "
+                << parent->src()->type_string();
+      }
+    }
+    if (incoming_mkl_edge == false) {
+      VLOG(1) << "ELEMENTWISE: Skipping replacement of elementwise node which has no MKL "
+                 "parents.";
+      return nullptr;
+    } else {
+      VLOG(1) << "ELEMENTWISE: Replacing elementwise node " << n->type_string() <<
+        " which has MKL parents";
+    }
+  }
+
+  // We now check if rewrite rule applies for this op. If rewrite rule passes
+  // for this op, then we rewrite it to Mkl op.
+  // Find matching RewriteInfo and then check that rewrite rule applies.
+  for (auto ri = rinfo_.cbegin(); ri != rinfo_.cend(); ++ri) {
+    if (n->type_string().compare(ri->name) == 0 &&
+        ri->rewrite_rule(n)) {
+      return &*ri;
+    }
+  }
+
+  // Else return not found.
+  return nullptr;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//              Run function for the pass
+///////////////////////////////////////////////////////////////////////////////
+
+bool MklLayoutRewritePass::RunPass(std::unique_ptr<Graph>* g) {
+  bool result = false;
+  CHECK_NOTNULL(g);
+
+  DumpGraph("Before running MklLayoutRewritePass", &**g);
+
+  std::vector<Node*> order;
+  GetReversePostOrder(**g, &order);  // This will give us topological sort.
+  for (Node* n : order) {
+    // If node is not an op or it cannot run on CPU device, then skip.
+    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
+      continue;
+    }
+
+    Node* m = nullptr;
+    if ((m = CheckForNodeMerge(n)) != nullptr && CanOpRunOnCPUDevice(m)) {
+      // Check if the node 'n' can be merged with any other node. If it can
+      // be 'm' contains the node with which it can be merged.
+      string n1_name = n->name();
+      string n2_name = m->name();
+
+      VLOG(1) << "MklLayoutRewritePass: Scheduled nodes " << n1_name << " and "
+              << n2_name << " for merging";
+
+      if (MergeNode(g, n, m) == Status::OK()) {
+        VLOG(1) << "MklLayoutRewritePass: Merged nodes " << n1_name << " and "
+                << n2_name;
+        result = true;
+      }
+    }
+  }
+
+  DumpGraph("After running MklLayoutRewritePass(NodeMerge)", &**g);
+
+  order.clear();
+  GetReversePostOrder(**g, &order);  // This will give us topological sort.
+  for (Node* n : order) {
+    // If node is not an op or it cannot run on CPU device, then skip.
+    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
+      continue;
+    }
+
+    const RewriteInfo* ri = nullptr;
+    // We will first search if node is to be rewritten.
+    if ((ri = CheckForNodeRewrite(n)) != nullptr) {
+      string node_name = n->name();
+      string op_name = n->type_string();
+
+      VLOG(1) << "MklLayoutRewritePass: Scheduled node " << node_name
+              << " with op " << op_name << " for rewrite using"
+              << " layout optimization.";
+
+      if (RewriteNode(g, n, ri) == Status::OK()) {
+        VLOG(1) << "MklLayoutRewritePass: rewrote node " << node_name
+                << " with op " << op_name << " for Mkl layout optimization.";
+        result = true;
+      }
+    }
+  }
+
+  DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite)", &**g);
+
+  return result;
+}
+
+bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) {
+  return MklLayoutRewritePass().RunPass(g);
+}
+
+Status MklLayoutRewritePass::Run(
+  const GraphOptimizationPassOptions& options) {
+  if (options.graph == nullptr && options.partition_graphs == nullptr) {
+    return Status::OK();
+  }
+
+  auto process_graph = [&](std::unique_ptr<Graph>* g) {
+    // Get the ownership of a graph
+    std::unique_ptr<Graph>* ng = std::move(g);
+    RunPass(ng);
+    // Return the ownership of a graph back
+    g->reset(ng->release());
+  };
+
+  if (kMklLayoutRewritePassGroup !=
+      OptimizationPassRegistry::POST_PARTITIONING) {
+    // For any pre-partitioning phase, a graph is stored in options.graph.
+    process_graph(options.graph);
+  } else {
+    // For post partitioning phase, graphs are stored in
+    // options.partition_graphs.
+    for (auto& pg : *options.partition_graphs) {
+      process_graph(&pg.second);
+    }
+  }
+
+  return Status::OK();
+}
+#endif  // INTEL_MKL_DNN
 }  // namespace tensorflow
 
 #endif
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index abc63e4f35..75f7ca2d4d 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -37,6 +37,9 @@ limitations under the License.
 #include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
+
+#ifndef INTEL_MKL_DNN
+
 namespace {
 
 const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
@@ -1881,6 +1884,1627 @@ static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
 BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
 
 }  // namespace
+
+#else  // INTEL_MKL_DNN
+
+namespace {
+
+const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
+const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0";
+
+static void InitGraph(const string& s, Graph* graph,
+                      const string& device = kCPUDevice) {
+  GraphDef graph_def;
+
+  auto parser = protobuf::TextFormat::Parser();
+  //  parser.AllowRelaxedWhitespace(true);
+  CHECK(parser.MergeFromString(s, &graph_def)) << s;
+  GraphConstructorOptions opts;
+  TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
+
+  for (Node* node : graph->nodes()) {
+    node->set_assigned_device_name(device);
+  }
+}
+
+class MklLayoutPassTest : public ::testing::Test {
+ public:
+  MklLayoutPassTest() : graph_(OpRegistry::Global()) {}
+
+  void InitGraph(const string& s, const string& device = kCPUDevice) {
+    ::tensorflow::InitGraph(s, &graph_, device);
+    original_ = CanonicalGraphString(&graph_);
+  }
+
+  static bool IncludeNode(const Node* n) { return n->IsOp(); }
+
+  static string EdgeId(const Node* n, int index) {
+    if (index == 0) {
+      return n->name();
+    } else if (index == Graph::kControlSlot) {
+      return strings::StrCat(n->name(), ":control");
+    } else {
+      return strings::StrCat(n->name(), ":", index);
+    }
+  }
+
+  string CanonicalGraphString(Graph* g) {
+    std::vector<string> nodes;
+    std::vector<string> edges;
+    for (const Node* n : g->nodes()) {
+      if (IncludeNode(n)) {
+        nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
+      }
+    }
+    for (const Edge* e : g->edges()) {
+      if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
+        edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
+                                        EdgeId(e->dst(), e->dst_input())));
+      }
+    }
+    // Canonicalize
+    std::sort(nodes.begin(), nodes.end());
+    std::sort(edges.begin(), edges.end());
+    return strings::StrCat(str_util::Join(nodes, ";"), "|",
+                           str_util::Join(edges, ";"));
+  }
+
+  string DoMklLayoutOptimizationPass() {
+    string before = CanonicalGraphString(&graph_);
+    LOG(ERROR) << "Before MKL layout rewrite pass: " << before;
+
+    std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
+    RunMklLayoutRewritePass(ug);
+
+    string result = CanonicalGraphString(&graph_);
+    LOG(ERROR) << "After MKL layout rewrite pass:  " << result;
+    return result;
+  }
+
+  const string& OriginalGraph() const { return original_; }
+
+  Graph graph_;
+  string original_;
+};
+
+REGISTER_OP("Input").Output("o: float").SetIsStateful();
+REGISTER_OP("InputList").Output("o: N * float").Attr("N: int").SetIsStateful();
+REGISTER_OP("HalfInput").Output("o: half").SetIsStateful();
+REGISTER_OP("Int32Input").Output("o: int32").SetIsStateful();
+REGISTER_OP("_MklInput").Output("o: uint8").SetIsStateful();
+REGISTER_OP("_MklInput2").Output("o: uint8")
+                        .Output("o1: uint8").SetIsStateful();
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to node merge optiimization
+/////////////////////////////////////////////////////////////////////
+
+TEST_F(MklLayoutPassTest, Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Zeta);D(Zeta)|"
+            "A->C;A->D;B->C:1;B->D:1");
+}
+
+// Test set 1: Conv2D + AddBias
+
+// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y)
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'Y' op: 'Input'}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'Y']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(_MklConv2DWithBias);Y(Input);Z(Zeta)|A->E;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;DMT/_1->E:4;"
+            "DMT/_2->E:5;E->Z;Y->Z:1");
+}
+
+// Graph contains only Conv2D, no AddBias.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_NoAddBias) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);DMT/_0(Const);DMT/_1(Const)|"
+            "A->C;A:control->DMT/_0:control;A:control->DMT/_1:control;B->C:1;"
+            "DMT/_0->C:2;DMT/_1->C:3");
+}
+
+// Conv2D output does not go to BiasAdd.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D', 'E'] }");  // Output of _MklConv2D does not go to BiasAdd.
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(Input);F(BiasAdd)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;D->F;DMT/_0->C:2;DMT/_1->C:3;"
+            "E->F:1");
+}
+
+// Conv2D has two outgoing edges: BiasAdd and some other dummy node (Zeta).
+// Merge should not be done in such case.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D', 'E'] }"  // Conv2D has two outputs.
+                              // No merge should happen.
+      "node { name: 'G' op: 'Zeta'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(Input);F(BiasAdd);G(Zeta)|A->C;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;B->C:1;C->G;"
+            "D->F;DMT/_0->C:2;DMT/_1->C:3;E->F:1;E->G:1");
+}
+
+// data_format attribute value mismatch. Merge should not be done
+// in such case.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_AttrMismatch) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NHCW' } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(BiasAdd)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;C->E;D->E:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+// Test set 2: BiasAddGrad + Conv2DBackpropFilter fusion tests
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilterWithBias);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const)|A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion in the presence of BackpropFilter. But nodes do not match
+// criteria for rewrite. So rewrite should not happen. 3rd input of
+// Conv2DBackpropFilter is different than input to BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['A'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilter);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(BiasAddGrad)|A->D;A->E;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion, but nodes do not match criteria for fusion.
+// Different input formats.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NHWC' } }"
+      " input: ['A'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilter);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(BiasAddGrad)|A->D;A->E;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion in the presence of BackpropFilter only. Fusion is done
+// before node rewrite. Check this ordering.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'O' op: '_MklInput'}"
+      "node { name: 'D' op: '_MklConv2DWithBias'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
+      "node { name: 'E' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['D', 'A']}"
+      "node { name: 'F' op: 'Int32Input'}"
+      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['E', 'F', 'A', 'M', 'N', 'O'] }"
+      "node { name: 'H' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
+            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
+            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
+            "C->D:2;D->E;E->G;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
+            "O->G:5");
+}
+
+// C=Conv2D(A,B); E=BiasAdd(C,D); Y=Zeta(E,X);
+// G=Conv2DBackpropInput(F,B,E)
+// This is a case of node rewrite followed by node merge followed by connecting
+// filter output of Conv2DWithBias to filter input of Conv2DBackpropInput.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'X' op: 'Input'}"
+      "node { name: 'Y' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'X']}"
+      "node { name: 'F' op: 'Int32Input'}"
+      "node { name: 'G' op: 'Conv2DBackpropInput'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['F', 'B', 'E']}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['G', 'X']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2DWithBias);F(Int32Input);"
+            "G(_MklConv2DBackpropInput);X(Input);Y(Zeta);Z(Zeta)|"
+            "A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;"
+            "DMT/_1->E:4;DMT/_2->E:5;DMT/_3->G:3;E->G:2;E->Y;E:1->G:1;E:2->G:5;"
+            "E:3->G:4;F->G;F:control->DMT/_3:control;G->Z;X->Y:1;X->Z:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to rewriting node to Mkl node
+/////////////////////////////////////////////////////////////////////
+
+// Single Conv2D Op; No Mkl layer on the input and on the output.
+// We will generate dummy Mkl tensor as 2nd input of Conv2D.
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+// 2 Conv2D Ops in sequence. Both should get transformed and 1st Conv2D will
+// have 2 outputs, both of which will be inputs to next Conv2D.
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(_MklConv2D);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->C;A->D;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;"
+            "C:2->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2");
+}
+
+// Conv2D with INT32 which is not supported by Mkl
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) {
+  InitGraph(
+      "node { name: 'A' op: 'HalfInput'}"
+      "node { name: 'B' op: 'HalfInput'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_HALF } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_HALF } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(HalfInput);B(HalfInput);C(Conv2D);D(Zeta)|"
+            "A->C;B->C:1;B->D;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropFilter);"
+            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:4;DMT/_2->D:5");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropInput'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['B', 'A', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropInput);"
+            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
+            "A->D:1;A->E;B->D;B:control->DMT/_0:control;"
+            "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;"
+            "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Polygamma'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
+            "A->C;A->D:1;B->C:1;C->D;D->E");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'MatMul'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'transpose_a'      value { b: false } }"
+      " attr { key: 'transpose_b'      value { b: false } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
+            "A->C;A->D:1;B->C:1;C->D;D->E");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'C' op: '_MklConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'M', 'N']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Zeta);E(BiasAddGrad);"
+            "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;"
+            "M->C:2;N->C:3");
+}
+
+// Concat Op test: Concat with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['A', 'B:0', 'B:1']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(_MklConcat);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;"
+            "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Concat with 2 Mkl layers feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['G', 'E', 'F']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
+            "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;"
+            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
+            "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;"
+            "G:control->DMT/_4:control;H->I:1");
+}
+
+// Concat with 1 Mkl and 1 non-Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['G', 'E', 'F']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
+            "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
+            "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;"
+            "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1");
+}
+
+// ConcatV2 Op test: ConcatV2 with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['B:0', 'B:1', 'A']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(_MklConcatV2);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;B:1->D:1;"
+            "B:control->DMT/_0:control;B:control->DMT/_1:control;"
+            "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// ConcatV2 with 2 Mkl layers feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['E', 'F', 'G']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
+            "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;"
+            "C:control->DMT/_0:control;C:control->DMT/_1:control;"
+            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
+            "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;"
+            "F:2->H:4;G->H:2;H->I:1");
+}
+
+// ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['E', 'F', 'G']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
+            "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
+            "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;"
+            "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;"
+            "G->H:2;H->I:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Relu_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;"
+            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluReluGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;"
+            "DMT/_1->C:2");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklAvgPool);C(Zeta);DMT/_0(Const)|A->B;A->C;"
+            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Int32Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'AvgPoolGrad' "
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Int32Input);B(Input);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolAvgPoolGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'I' op: 'Int32Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'AvgPoolGrad' "
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['I', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklAvgPool);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const);I(Int32Input)|A->B;A->D;A:control->DMT/_0:control;"
+            "B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;DMT/_1->C:2;I->C;"
+            "I:control->DMT/_1:control");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNormGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNormGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
+            "F(_MklFusedBatchNormGrad);G(Zeta)|A->F;A->G;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
+            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
+            "E->F:4;F->G:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNorm'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
+            "F(_MklFusedBatchNorm);G(Zeta)|A->F;A->G;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
+            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
+            "E->F:4;F->G:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to rewriting node for workspace edges
+/////////////////////////////////////////////////////////////////////
+
+/* Test LRN->MaxPool->MaxPoolGrad->LRNGrad replacement by workspace nodes. */
+TEST_F(MklLayoutPassTest, MaxPoolLRN_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['B'] }"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['B', 'C', 'D'] }"
+      "node { name: 'F' op: 'Input'}"
+      "node { name: 'G' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['E', 'F', 'B'] }"
+      "node { name: 'H' op: 'Input'}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['H', 'G'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+      "A(Input);B(_MklLRN);C(_MklMaxPool);D(Input);DMT/_0(Const);DMT/_1(Const);"
+      "DMT/_2(Const);E(_MklMaxPoolGrad);F(Input);G(_MklLRNGrad);H(Input);"
+      "I(Zeta)|A->B;A:control->DMT/_0:control;B->C;B->E;B->G:2;B:1->G:3;"
+      "B:2->C:1;B:2->E:4;B:2->G:6;B:3->G:7;B:control->DMT/_1:control;C->E:1;"
+      "C:1->E:3;C:2->E:5;C:3->E:7;D->E:2;DMT/_0->B:1;DMT/_1->E:6;DMT/_2->G:5;"
+      "E->G;E:1->G:4;E:control->DMT/_2:control;F->G:1;G->I:1;H->I");
+}
+
+/* Test LRN->LRNGrad replacement by workspace nodes. */
+TEST_F(MklLayoutPassTest, LRN_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'D', 'B'] }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(_MklLRNGrad);F(Zeta)|"
+            "A->B;A:control->DMT/_0:control;B->E:2;B:1->E:3;B:2->E:6;B:3->E:7;"
+            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "D->E:1;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:5;E->F:1");
+}
+
+/* Test LRN->LRNGrad replacement when only one of them is present. */
+TEST_F(MklLayoutPassTest, LRN_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Zeta);DMT/_0(Const)|"
+            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+/* Test LRN->LRNGrad replacement when only one of them is present. */
+TEST_F(MklLayoutPassTest, LRN_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklLRNGrad);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
+}
+
+/* Test LRN->LRNGrad negative case, where single LRN feeds
+   2 LRNGrad nodes at different slots. */
+TEST_F(MklLayoutPassTest, LRN_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'D', 'B'] }"
+      "node { name: 'F' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'B', 'D'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['E', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);DMT/_5(Const);"
+            "DMT/_6(Const);E(_MklLRNGrad);F(_MklLRNGrad);G(Zeta)|A->B;"
+            "A:control->DMT/_0:control;B->E:2;"
+            "B->F:1;B:1->E:3;B:2->E:6;B:2->F:5;B:3->E:7;C->E;C->F;"
+            "C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "C:control->DMT/_3:control;C:control->DMT/_4:control;"
+            "C:control->DMT/_5:control;C:control->DMT/_6:control;"
+            "D->E:1;D->F:2;DMT/_0->B:1;DMT/_1->F:3;DMT/_2->F:7;DMT/_3->F:4;"
+            "DMT/_4->F:6;DMT/_5->E:4;DMT/_6->E:5;E->G;F->G:1");
+}
+
+/* Test MaxPool->MaxPoolGrad replacement by workspace+rewrite nodes. */
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['C', 'B', 'D'] }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklMaxPool);C(Input);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(_MklMaxPoolGrad);F(Zeta)|"
+            "A->B;A:control->DMT/_0:control;B->E:1;B:1->E:3;B:2->E:5;B:3->E:7;"
+            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "D->E:2;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:6;E->F:1");
+}
+
+// Test MaxPool>MaxPoolGrad replacement when only one of them is present.
+// In this case, we will rewrite MaxPool node but workspace edges will not
+// be present.
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklMaxPool);C(Zeta);DMT/_0(Const)|"
+            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+// Test MaxPoolGrad replacement when only one of them is present.
+// In this case, we will rewrite MaxPoolGrad and for workspace tensor and
+// its Mkl part, we will generate dummy tensor.
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklMaxPoolGrad);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
+}
+
+// Test MaxPool handling for batch-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative4) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative5) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:2, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative6) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:2, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative7) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative8) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative9) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:2} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative10) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+
+// Single Conv2D Op on GPU device
+// No rewrite should happen
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Conv2D);D(Zeta)|A->C;B->C:1;B->D;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'O' op: '_MklInput'}"
+      "node { name: 'D' op: '_MklConv2DWithBias'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
+      "node { name: 'E' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['D', 'A']}"
+      "node { name: 'F' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['E'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
+            "E(Zeta);F(BiasAddGrad);M(_MklInput);N(_MklInput);"
+            "O(_MklInput)|A->D;A->E:1;B->D:1;C->D:2;D->E;E->F;"
+            "M->D:3;N->D:4;O->D:5");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(Conv2DBackpropFilter);E(Zeta)|"
+            "A->D;A->E;B->D:1;C->D:2;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Relu_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Relu);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(ReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_MaxPool_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(AvgPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Concat Op test: Concat with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['A', 'B:0', 'B:1']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(Concat);E(Zeta)|A->D;"
+            "B->D:1;B:1->D:2;C->E;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['B:0', 'B:1', 'A']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(ConcatV2);E(Zeta)|"
+            "A->D:2;B->D;B:1->D:1;C->E;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNorm'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);E(Input);"
+            "F(FusedBatchNorm);G(Zeta)|A->F;A->G;B->F:1;C->F:2;D->F:3;"
+            "E->F:4;F->G:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'C' op: '_MklConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'M', 'N']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'Y' op: 'Input'}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'Y']}", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);"
+            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->C;"
+            "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+
+static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
+  testing::StopTiming();
+  string s;
+  for (int in = 0; in < 10; in++) {
+    s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
+  }
+  random::PhiloxRandom philox(301, 17);
+  random::SimplePhilox rnd(&philox);
+  for (int op = 0; op < op_nodes; op++) {
+    s += strings::Printf(
+        "node { name: 'op%04d' op: 'Zeta' attr { key: 'T' value { "
+        "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
+        op, rnd.Uniform(10), rnd.Uniform(10));
+  }
+
+  bool first = true;
+  while (iters > 0) {
+    Graph* graph = new Graph(OpRegistry::Global());
+    InitGraph(s, graph);
+    int N = graph->num_node_ids();
+    if (first) {
+      testing::SetLabel(strings::StrCat("Per graph node.  Nodes: ", N));
+      first = false;
+    }
+    {
+      testing::StartTiming();
+      std::unique_ptr<Graph> ug(graph);
+      RunMklLayoutRewritePass(&ug);
+      testing::StopTiming();
+    }
+    iters -= N;  // Our benchmark units are individual graph nodes,
+                 // not whole graphs
+    // delete graph;
+  }
+}
+BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
+
+}  // namespace
+
+#endif  // INTEL_MKL_DNN
+
 }  // namespace tensorflow
 
 #endif /* INTEL_MKL */
diff --git a/tensorflow/core/kernels/logging_ops.cc b/tensorflow/core/kernels/logging_ops.cc
index 67d603dd0a..bacf3e7740 100644
--- a/tensorflow/core/kernels/logging_ops.cc
+++ b/tensorflow/core/kernels/logging_ops.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <iostream>
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -76,7 +77,7 @@ class PrintOp : public OpKernel {
       strings::StrAppend(&msg, "[", ctx->input(i).SummarizeValue(summarize_),
                          "]");
     }
-    LOG(INFO) << msg;
+    std::cerr << msg << std::endl;
   }
 
  private:
diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc
index 935eb81dd0..9aabbbdb6b 100644
--- a/tensorflow/core/kernels/mkl_aggregate_ops.cc
+++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include <numeric>
-
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -29,10 +28,17 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-namespace tensorflow {
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::stream;
+using mkldnn::sum;
+#endif
 
+namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklAddNOp : public OpKernel {
  public:
@@ -41,17 +47,18 @@ class MklAddNOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const int num = ctx->num_inputs();
     OP_REQUIRES(ctx, num / 2 == 2,
-                errors::InvalidArgument("Only additions of two arguments "
+                errors::InvalidArgument("Only additions of two tensors "
                                         "supported by MKL. Num inputs: ",
                                         num));
 
     MklAddNOpContext mkl_context;
-    const Tensor& input0 = MklGetInput(ctx, 0);
-    GetMklShape(ctx, 0, &(mkl_context.input1_shape));
+    size_t src1_idx = 0, src2_idx = 1;
+    const Tensor& input0 = MklGetInput(ctx, src1_idx);
+    GetMklShape(ctx, src1_idx, &(mkl_context.input1_shape));
     bool input1_in_mkl_format = mkl_context.input1_shape.IsMklTensor();
 
-    const Tensor& input1 = MklGetInput(ctx, 1);
-    GetMklShape(ctx, 1, &(mkl_context.input2_shape));
+    const Tensor& input1 = MklGetInput(ctx, src2_idx);
+    GetMklShape(ctx, src2_idx, &(mkl_context.input2_shape));
     bool input2_in_mkl_format = mkl_context.input2_shape.IsMklTensor();
 
     // handle the case of a scalar
@@ -59,13 +66,12 @@ class MklAddNOp : public OpKernel {
       const TensorShape& o_shape = input0.shape();
       Tensor* out_tensor = nullptr;
       mkl_context.output_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(ctx, 0, &out_tensor, o_shape,
+      AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
                                 mkl_context.output_shape);
       float user_i1 = (input0.scalar<T>()());
-      ;
       float user_i2 = (input1.scalar<T>()());
-      ;
-      out_tensor->scalar<T>()() = std::plus<float>{}(user_i1, user_i2);
+      out_tensor->scalar<T>()() =
+          std::plus<float>{}(user_i1, user_i2);
       return;
     }
 
@@ -82,8 +88,8 @@ class MklAddNOp : public OpKernel {
       if (o_shape.num_elements() == 0) {
         Tensor* out_tensor = nullptr;
         mkl_context.output_shape.SetMklTensor(false);
-        AllocateOutputSetMklShape(ctx, 0, &out_tensor, o_shape,
-                                  mkl_context.output_shape);
+        AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
+                                 mkl_context.output_shape);
         return;
       }
     }
@@ -92,9 +98,9 @@ class MklAddNOp : public OpKernel {
     mkl_context.in_strides = new size_t[mkl_context.in_dims];
     // Generate size, stride for input if input is in MKL format.
     if (input1_in_mkl_format || input2_in_mkl_format) {
-      const MklShape* tmp_mkl_shape = (input1_in_mkl_format)
-                                          ? &mkl_context.input1_shape
-                                          : &mkl_context.input2_shape;
+      const MklShape* tmp_mkl_shape =
+        (input1_in_mkl_format) ? &mkl_context.input1_shape :
+        &mkl_context.input2_shape;
       for (int i = 0; i < mkl_context.in_dims; i++) {
         mkl_context.in_sizes[i] = tmp_mkl_shape->GetSizes()[i];
         mkl_context.in_strides[i] = tmp_mkl_shape->GetStrides()[i];
@@ -110,7 +116,6 @@ class MklAddNOp : public OpKernel {
             mkl_context.in_strides[i - 1] * mkl_context.in_sizes[i - 1];
       }
     }
-
     std::vector<float> coeff(2, 1.0);
     mkl_context.MklCreateInputLayouts(ctx);
     CHECK_EQ(dnnSumCreate_F32(&mkl_context.Eltwise, mkl_context.attributes, 2,
@@ -127,7 +132,7 @@ class MklAddNOp : public OpKernel {
      mkl_context.output_shape.SetMklLayout(mkl_context.Eltwise, dnnResourceDst);
 
      mkl_context.output_shape.SetTfLayout(
-         mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
+        mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
      if (input1_in_mkl_format == true) {
       mkl_context.output_shape.SetTfDimOrder(mkl_context.in_dims,
       mkl_context.input1_shape.GetTfToMklDimMap());
@@ -139,12 +144,12 @@ class MklAddNOp : public OpKernel {
                         mkl_context.output_shape.GetMklLayout())) /
                     sizeof(T));
 
-     AllocateOutputSetMklShape(ctx, 0, &output, tf_shape,
+     AllocateOutputSetMklShape(ctx, src1_idx, &output, tf_shape,
                               mkl_context.output_shape);
     } else {
      const TensorShape& o_shape = input1.shape();
      mkl_context.output_shape.SetMklTensor(false);
-     AllocateOutputSetMklShape(ctx, 0, &output, o_shape,
+     AllocateOutputSetMklShape(ctx, src1_idx, &output, o_shape,
                                 mkl_context.output_shape);
     }
 
@@ -172,16 +177,18 @@ class MklAddNOp : public OpKernel {
     void MklCreateInputLayouts(OpKernelContext* context) {
       bool input1_in_mkl_format = input1_shape.IsMklTensor();
       if (!input1_in_mkl_format) {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
+        CHECK_EQ(
+            dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
+            E_SUCCESS);
       } else {
         lt_input1 = static_cast<dnnLayout_t>(input1_shape.GetCurLayout());
       }
 
       bool input2_in_mkl_format = input2_shape.IsMklTensor();
       if (!input2_in_mkl_format) {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
+        CHECK_EQ(
+            dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
+            E_SUCCESS);
       } else {
         lt_input2 = static_cast<dnnLayout_t>(input2_shape.GetCurLayout());
       }
@@ -257,8 +264,8 @@ class MklAddNOp : public OpKernel {
       bool input2_in_mkl_format = input2_shape.IsMklTensor();
       dnnDelete_F32(Eltwise);
       if (!input1_in_mkl_format || !input2_in_mkl_format) {
-        delete[] in_sizes;
-        delete[] in_strides;
+         delete [] in_sizes;
+         delete [] in_strides;
       }
       if (!input1_in_mkl_format) {
          dnnLayoutDelete_F32(lt_input1);
@@ -270,6 +277,151 @@ class MklAddNOp : public OpKernel {
   } MklAddNOpContext;
 };
 
+#else  // INTEL_MKL_DNN
+template <typename Device, typename T>
+class MklAddNOp : public OpKernel {
+ public:
+  ~MklAddNOp() {}
+  explicit MklAddNOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const int num = ctx->num_inputs();
+    // Only additions of 2 input tensors is supported now
+    OP_REQUIRES(ctx, num / 2 == 2,
+                errors::InvalidArgument("Only additions of two tensors "
+                                        "supported by MKL. Num inputs: ",
+                                        num));
+
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      size_t src1_idx = 0, src2_idx = 1;
+      const Tensor& src1_tensor = MklGetInput(ctx, src1_idx);
+      const Tensor& src2_tensor = MklGetInput(ctx, src2_idx);
+
+      MklDnnShape src1_mkl_shape, src2_mkl_shape;
+      GetMklShape(ctx, src1_idx, &src1_mkl_shape);
+      GetMklShape(ctx, src2_idx, &src2_mkl_shape);
+      bool input1_in_mkl_format = src1_mkl_shape.IsMklTensor();
+      bool input2_in_mkl_format = src2_mkl_shape.IsMklTensor();
+      int src1_dims_size = input1_in_mkl_format?
+       src1_mkl_shape.GetDimension(): src1_tensor.dims();
+      int src2_dims_size = input2_in_mkl_format?
+       src2_mkl_shape.GetDimension(): src2_tensor.dims();
+
+      if (!input1_in_mkl_format && src1_dims_size == 0) {
+         Tensor* dst_tensor = nullptr;
+         MklShape mkl_shape_dst;
+         mkl_shape_dst.SetMklTensor(false);
+         AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor,
+         src1_tensor.shape(), mkl_shape_dst);
+         float user_i1 = (src1_tensor.scalar<T>()());
+         float user_i2 = (src2_tensor.scalar<T>()());
+         dst_tensor->scalar<T>()() =
+           std::plus<float>{}(user_i1, user_i2);
+         return;
+       }
+
+      // If there is nothing to compute, return.
+      if (!input1_in_mkl_format && !input2_in_mkl_format) {
+        if (src1_tensor.shape().num_elements() == 0) {
+           Tensor* dst_tensor = nullptr;
+           MklShape mkl_shape_dst;
+           mkl_shape_dst.SetMklTensor(false);
+           AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor,
+           src1_tensor.shape(), mkl_shape_dst);
+           return;
+        }
+      }
+
+      // element-wise add operator for tensor input1 and tensor input2
+      std::vector<double> coeff(2, 1.0);
+      MklDnnData<T> src1(&cpu_engine);
+      MklDnnData<T> src2(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      int tmp_size = input1_in_mkl_format ? src2_dims_size: src1_dims_size;
+      memory::dims dims(tmp_size);
+      memory::dims strides(tmp_size);
+      memory::desc md1({}, memory::data_undef, memory::format_undef);
+      memory::desc md2({}, memory::data_undef, memory::format_undef);
+
+      if ( input1_in_mkl_format || input2_in_mkl_format ) {
+        if ( input1_in_mkl_format ) {
+          md1 = src1_mkl_shape.GetMklLayout();
+          md2 = md1;
+          dst.SetUsrMem(md1);
+        } else {
+          md2 = src2_mkl_shape.GetMklLayout();
+          md1 = md2;
+          dst.SetUsrMem(md2);
+        }
+      } else {
+         dims = TFShapeToMklDnnDims(src1_tensor.shape());
+         strides = CalculateTFStrides(dims);
+         md1 = MklDnnData<T>::CreateBlockedMemDesc(dims, strides);
+         md2 = md1;
+         dst.SetUsrMem(dims, strides);
+      }
+
+      std::vector<memory::primitive_desc> srcs_pd;
+
+      src1.SetUsrMem(md1, &src1_tensor);
+      auto mpd1 = src1.GetUsrMemPrimDesc();
+      srcs_pd.push_back(mpd1);
+
+      src2.SetUsrMem(md2, &src2_tensor);
+      auto mpd2 = src2.GetUsrMemPrimDesc();
+      srcs_pd.push_back(mpd2);
+
+      std::vector<primitive::at> inputs;
+      inputs.push_back(src1.GetOpMem());
+      inputs.push_back(src2.GetOpMem());
+      auto output_pd = dst.GetUsrMemPrimDesc();
+      Tensor* dst_tensor = nullptr;
+      auto sum_pd = sum::primitive_desc(dst.GetUsrMemDesc(), coeff, srcs_pd);
+      auto sum_op = sum(sum_pd, inputs, dst.GetOpMem());
+      if ( input2_in_mkl_format || input1_in_mkl_format ) {
+         MklDnnShape output_mkl_shape;
+         output_mkl_shape.SetMklTensor(true);
+         output_mkl_shape.SetMklLayout(&output_pd);
+         output_mkl_shape.SetElemType(MklDnnType<T>());
+         if ( input1_in_mkl_format ) {
+          output_mkl_shape.SetTfLayout(src1_dims_size,
+          src1_mkl_shape.GetSizesAsMklDnnDims(),
+          src1_mkl_shape.GetTfDataFormat());
+         } else {
+          output_mkl_shape.SetTfLayout(src2_dims_size,
+          src2_mkl_shape.GetSizesAsMklDnnDims(),
+          src2_mkl_shape.GetTfDataFormat());
+         }
+         TensorShape output_tf_shape;
+         output_tf_shape.AddDim((output_pd.get_size() / sizeof(T))
+         + (output_pd.get_size()%sizeof(T) == 0 ? 0 : 1));
+         AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor, output_tf_shape,
+                                output_mkl_shape);
+      } else {
+         MklShape mkl_shape_dst;
+         mkl_shape_dst.SetMklTensor(false);
+         AllocateOutputSetMklShape(ctx, src1_idx,
+         &dst_tensor, src1_tensor.shape(), mkl_shape_dst);
+      }
+
+      dst.SetUsrMemDataHandle(dst_tensor);
+      std::vector<primitive> net;
+      net.push_back(sum_op);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(ctx, errors::Aborted("Operation received an exception:",
+                                            error_msg));
+    }
+  }
+};
+
+#endif
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklAddN")                          \
                               .Device(DEVICE_CPU)                   \
diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc
index d90baee069..d751a70fc8 100644
--- a/tensorflow/core/kernels/mkl_avgpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc
@@ -24,10 +24,25 @@
 
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::error;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::padding_kind;
+using mkldnn::engine;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// For now, MKL-ML is default. So making MKL-DNN not a default choice.
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklAvgPoolingOp : public OpKernel {
  public:
@@ -132,7 +147,7 @@ class MklAvgPoolingOp : public OpKernel {
         E_SUCCESS);
 
     mkl_context.MklCleanup();
-  }
+  }  // Compute
 
  private:
   typedef struct {
@@ -411,7 +426,293 @@ class MklAvgPoolingGradOp : public OpKernel {
   std::vector<int32> stride_;
   Padding padding_;
   TensorFormat data_format_;
-};
+};  // MklAvgPoolingGradOp
+
+
+#else  // INTEL_MKL_DNN is defined
+
+template <typename Device, typename T>
+class MklAvgPoolingOp : public MklPoolingForwardOpBase<T> {
+ public:
+  explicit MklAvgPoolingOp(OpKernelConstruction* context)
+  : MklPoolingForwardOpBase<T>(context) {
+    // Workspace is an MKLDNN construct that is only used in Max Pooling.
+    // So set workspace_enabled_ to false.
+    this->workspace_enabled_ = false;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const Tensor& input_tensor = MklGetInput(context,
+              this->kInputTensorIndexInput);
+      MklDnnShape dnn_shape_input;
+      GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input);
+      this->SanityCheckInput(context, input_tensor, dnn_shape_input);
+      if (!context->status().ok()) return;
+
+      MklDnnData<T> dnn_data_input(&cpu_engine);
+      MklDnnData<T> dnn_data_output(&cpu_engine);
+
+      // initialize variables for the pooling op
+      MklPoolParameters pool_params;
+      // Get the input tensor and initialize the pooling parameters
+      this->ConfigureInput(context, dnn_shape_input,
+                          input_tensor, &pool_params,
+                          &dnn_data_input);
+      OP_REQUIRES_OK(context, context->status());
+
+      // Declare output tensor
+      Tensor* output_tensor = nullptr;
+      memory::dims output_dims_mkl_order;
+      this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+      // If input is in Mkl layout, then just get the memory format from it
+      // directly, instead of using input data_format to AvgPool.
+      if (dnn_shape_input.IsMklTensor()) {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                static_cast<memory::format>(dnn_data_input.GetUsrMemDesc()
+                    .data.format));
+
+      } else {
+          dnn_data_output.SetUsrMem(output_dims_mkl_order,
+              this->data_format_mkldnn_);
+      }
+
+        // describe the memory layout
+      dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
+
+      // 3. create a pooling primitive descriptor
+      auto pool_desc = pooling_forward::desc(prop_kind::forward,
+              algorithm::pooling_avg_exclude_padding,
+              dnn_data_input.GetUsrMemDesc(),
+              dnn_data_output.GetUsrMemDesc(),
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_prim_desc = pooling_forward::primitive_desc(pool_desc,
+                                                 cpu_engine);
+
+      this->AllocateOutputTensor(context, pool_prim_desc, output_dims_mkl_order,
+                            this->data_format_mkldnn_, &output_tensor);
+      CHECK_NOTNULL(output_tensor);
+
+      OP_REQUIRES_OK(context, context->status());
+      dnn_data_output.SetUsrMemDataHandle(output_tensor);
+
+      this->PrepareAndExecuteNet(pool_prim_desc,
+                                &dnn_data_input,
+                                &dnn_data_output);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Operation received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+};  // MklAvgPoolingOp
+
+//-----------------------------------------------------------------------------
+
+template <class Device, class T>
+class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase<T> {
+ public:
+  explicit MklAvgPoolingGradOp(OpKernelConstruction* context)
+      : MklPoolingBackwardOpBase<T>(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnShape original_input_mkl_shape, input_gradient_mkl_shape;
+      const Tensor& tensor_in_shape = MklGetInput(context,
+          kInputTensorIndexInputShape);
+      const Tensor& input_gradient_tensor = MklGetInput(context,
+          kInputTensorIndexInputGradient);
+      GetMklShape(context, kInputTensorIndexInputShape,
+            &original_input_mkl_shape);
+      GetMklShape(context, kInputTensorIndexInputGradient,
+            &input_gradient_mkl_shape);
+
+
+      SanityCheckInputs(context, tensor_in_shape,
+                        input_gradient_tensor,
+                        original_input_mkl_shape,
+                        input_gradient_mkl_shape);
+      if (!context->status().ok()) return;
+
+      // Used to allocate output_diff_src/diff_src
+      // and create pool_fwd mdm desc
+      // 0. Input("orig_input_shape: int32") //NOT a T Tensor!
+      // 1. Input("grad: T")
+
+      MklDnnData<T> input_gradient_diff_dst(&cpu_engine);
+      MklDnnData<T> output_diff_src(&cpu_engine);
+      Tensor* output_tensor_diff_src = nullptr;
+      TensorShape original_input_shape;
+      MklPoolParameters pool_params;
+      memory::dims output_dims_mkl_order, original_input_dims_nchw;
+      // Configure the original input memory descriptor
+      memory::desc original_input_md = ConfigureOriginalInput(context,
+                                      tensor_in_shape,
+                                      original_input_mkl_shape,
+                                      &original_input_dims_nchw,
+                                      &pool_params,
+                                      &original_input_shape);
+
+      // configure the original output memory descriptor
+      // by definition, the shape of the original output is the same
+      // as the shape of the gradient diff_dst
+      memory::desc original_output_md = this->ConfigureOriginalOutput(
+                pool_params, input_gradient_mkl_shape, output_dims_mkl_order);
+
+      memory::desc target_diff_dst_md = this->ConfigureInputGradient(
+                                    input_gradient_mkl_shape,
+                                    input_gradient_tensor,
+                                    &input_gradient_diff_dst,
+                                    original_output_md);
+      // The shape of the output diff src needs to be the same shape as the
+      // original input. But we will set its format to be same as the format of
+      // input gradient. We won't use format of original input since it will
+      // always be in Tensorflow layout (given that AvgPoolGrad gets shape of
+      // the input rather than actual input).
+      output_diff_src.SetUsrMem(original_input_dims_nchw,
+                                static_cast<memory::format>(
+                                  target_diff_dst_md.data.format));
+
+      // Create the forward pooling primitive descriptor so we can reference it
+      // in the backward pooling primitive descriptor
+      auto pool_fwd_desc = pooling_forward::desc(prop_kind::forward,
+              algorithm::pooling_avg_exclude_padding,
+              original_input_md,
+              original_output_md,
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_fwd_prim_desc
+              = pooling_forward::primitive_desc(pool_fwd_desc,
+                                                  cpu_engine);
+
+      auto pool_bkwd_desc = pooling_backward::desc(
+              algorithm::pooling_avg_exclude_padding,
+              output_diff_src.GetUsrMemDesc(),
+              target_diff_dst_md,
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_bkwd_prim_desc
+                = pooling_backward::primitive_desc(pool_bkwd_desc,
+                                              cpu_engine,
+                                              pool_fwd_prim_desc);
+      this->AllocateOutputTensor(context, pool_bkwd_prim_desc,
+                      original_input_dims_nchw,
+                      this->data_format_mkldnn_,
+                      &output_tensor_diff_src);
+
+      output_diff_src.SetUsrMemDataHandle(output_tensor_diff_src);
+
+      this->PrepareAndExecuteNet(pool_bkwd_prim_desc,
+                          &input_gradient_diff_dst,
+                          &output_diff_src,
+                          memory::primitive_desc(
+                              target_diff_dst_md,
+                              cpu_engine));
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                      ", message: " + string(e.message) +
+                      ", in file " + string(__FILE__) + ":" +
+                      std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                      errors::Aborted("Compute received an exception:",
+                                      error_msg));
+    }
+  }  // Compute
+
+ private:
+  // 0. Input("orig_input_shape: int32")
+  // 1. Input("grad: T")
+  const int kInputTensorIndexInputShape = 0;
+  const int kInputTensorIndexInputGradient = 1;
+
+  memory::desc ConfigureOriginalInput(OpKernelContext* context,
+        const Tensor& tensor_original_input_shape,
+        const MklDnnShape& original_input_mkl_shape,
+        memory::dims* original_input_dims_mkl_order,
+        MklPoolParameters* pool_params,
+        TensorShape* input_tensor_shape) {
+    CHECK_NOTNULL(original_input_dims_mkl_order);
+    CHECK_NOTNULL(pool_params);
+    CHECK_NOTNULL(input_tensor_shape);
+    // For AvgPoolGrad, we only get the size of the original input because
+    // The original data is irrelvant.
+    auto shape_vec = tensor_original_input_shape.vec<int32>();
+    for (int64 i = 0; i < tensor_original_input_shape.NumElements(); ++i) {
+      input_tensor_shape->AddDim(shape_vec(i));
+    }
+
+    return MklPoolingBackwardOpBase<T>::ConfigureOriginalInput(
+                                              context,
+                                              tensor_original_input_shape,
+                                              original_input_mkl_shape,
+                                              original_input_dims_mkl_order,
+                                              pool_params,
+                                              *input_tensor_shape);
+}
+
+  void SanityCheckInputs(OpKernelContext* context,
+                        const Tensor& tensor_in_shape,
+                        const Tensor& input_gradient_tensor,
+                        const MklDnnShape& original_input_mkl_shape,
+                        const MklDnnShape& input_gradient_mkl_shape) {
+    if (!original_input_mkl_shape.IsMklTensor()) {
+      OP_REQUIRES(context, tensor_in_shape.dims() == 1 &&
+          tensor_in_shape.NumElements() == 4,
+          errors::InvalidArgument("original input shape must be "
+                "1-dimensional and 4 elements"));
+    } else {
+      OP_REQUIRES(context, original_input_mkl_shape.GetDimension() == 1 &&
+          original_input_mkl_shape.DimSize(0) == 4,
+          errors::InvalidArgument("original input shape must be "
+                "1-dimensional and 4 elements"));
+    }
+
+    if (!input_gradient_mkl_shape.IsMklTensor()) {
+      // For avgpooling, input_gradient_diff_dst should have 4 dimensions.
+      OP_REQUIRES(context, input_gradient_tensor.dims() == 4,
+          errors::InvalidArgument("Gradient shape must be "
+                              "4-dimensional"));
+    } else {
+      OP_REQUIRES(context, input_gradient_mkl_shape.GetDimension() == 4,
+          errors::InvalidArgument("Gradient shape must be "
+                              "4-dimensional"));
+    }
+  }
+};  // MklAvgPoolingGradOp
+
+
+
+#endif  // INTEL_MKL_DNN
 
 REGISTER_KERNEL_BUILDER(Name("_MklAvgPool")
                             .Device(DEVICE_CPU)
@@ -427,3 +728,4 @@ REGISTER_KERNEL_BUILDER(Name("_MklAvgPoolGrad")
 
 }  // namespace tensorflow
 #endif  // INTEL_MKL
+
diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
index e6673b2ffb..d0175dfd71 100644
--- a/tensorflow/core/kernels/mkl_concat_op.cc
+++ b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -1,11 +1,8 @@
 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -33,11 +30,22 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::concat;
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// List of TensorShape objects. Used in Concat/Split layers.
+typedef std::vector<TensorShape> TensorShapeList;
+
 enum AxisArgumentName { NAME_IS_AXIS, NAME_IS_CONCAT_DIM };
 
+
 // TODO(intelft) Check if we can reuse existing EigenConcatOp using Mutable
 // reference inputs.
 // --------------------------------------------------------------------------
@@ -55,6 +63,8 @@ class EigenConcatBaseOp : public OpKernel {
   // we need to have empty Compute because Compute is pure virtual function.
   void Compute(OpKernelContext* c) {}
 
+#ifndef INTEL_MKL_DNN
+
   void Compute(OpKernelContext* c, const std::vector<Tensor>& values) {
     const Tensor* concat_dim_tensor;
     const char* axis_attribute_name =
@@ -139,8 +149,89 @@ class EigenConcatBaseOp : public OpKernel {
       ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
     }
   }
+
+#else  // MKL_DNN
+
+void Compute(OpKernelContext* c, const std::vector<Tensor>& values,
+                        const TensorShapeList& input_shapes) {
+    const Tensor* concat_dim_tensor;
+    const char* axis_attribute_name =
+        AxisArgName == NAME_IS_AXIS
+            ? "axis"
+            : AxisArgName == NAME_IS_CONCAT_DIM ? "concat_dim" : "<invalid>";
+    OP_REQUIRES_OK(c, c->input(axis_attribute_name, &concat_dim_tensor));
+    OP_REQUIRES(c, IsLegacyScalar(concat_dim_tensor->shape()),
+                errors::InvalidArgument(
+                    axis_attribute_name,
+                    " tensor should be a scalar integer, but got shape ",
+                    concat_dim_tensor->shape().DebugString()));
+    const int32 concat_dim =
+        internal::SubtleMustCopy(concat_dim_tensor->scalar<int32>()());
+    // Instead of accessing values from context, we use input to Compute.
+    const int N = values.size();
+    const int input_dims = input_shapes[0].dims();
+    const TensorShape& input_shape = input_shapes[0];
+
+    int32 axis = concat_dim < 0 ? concat_dim + input_dims : concat_dim;
+    OP_REQUIRES(c,
+                (0 <= axis && axis < input_dims) ||
+                    (allow_legacy_scalars() && concat_dim == 0),
+                errors::InvalidArgument(
+                    "ConcatOp : Expected concatenating dimensions in the range "
+                    "[",
+                    -input_dims, ", ", input_dims, "), but got ", concat_dim));
+    // Note that we reduce the concat of n-dimensional tensors into a two
+    // dimensional concat. Assuming the dimensions of any input/output
+    // tensor are {x0, x1,...,xn-1, y0, y1,...,ym-1}, where the concat is along
+    // the dimension indicated with size y0, we flatten it to {x, y}, where y =
+    // Prod_i(yi) and x = ((n > 0) ? Prod_i(xi) : 1).
+    ConstMatrixVector inputs_flat;
+    inputs_flat.reserve(N);
+    int64 inputs_flat_dim0 = 1;
+    for (int d = 0; d < axis; ++d) {
+      inputs_flat_dim0 *= input_shape.dim_size(d);
+    }
+    int64 output_concat_dim = 0;
+    const bool input_is_scalar = IsLegacyScalar(input_shape);
+    for (int i = 0; i < N; ++i) {
+      const auto in = values[i];
+      const bool in_is_scalar = IsLegacyScalar(input_shapes[i]);
+      OP_REQUIRES(
+          c, (input_shapes[i].dims() == input_dims) ||
+              (input_is_scalar && in_is_scalar),
+          errors::InvalidArgument(
+              "ConcatOp : Ranks of all input tensors should match: shape[0] = ",
+              input_shape.DebugString(), " vs. shape[", i,
+              "] = ", input_shapes[i].DebugString()));
+      if (in.NumElements() > 0) {
+        int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0;
+        inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
+            in.shaped<T, 2>({inputs_flat_dim0, inputs_flat_dim1})));
+      }
+      output_concat_dim += input_shapes[i].dims() > 0 ?
+                           input_shapes[i].dim_size(axis) : 1;
+    }
+
+    TensorShape output_shape(input_shape);
+    if (output_shape.dims() == 0) {
+      output_shape.AddDim(output_concat_dim);
+    } else {
+      output_shape.set_dim(axis, output_concat_dim);
+    }
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(c, c->allocate_output(0, output_shape, &output));
+    if (output->NumElements() > 0) {
+      int64 output_dim1 = output->NumElements() / inputs_flat_dim0;
+      auto output_flat = output->shaped<T, 2>({inputs_flat_dim0, output_dim1});
+      ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
+    }
+  }
+
+#endif
 };
 
+#ifndef INTEL_MKL_DNN
+
 // --------------------------------------------------------------------------
 //                      Mkl Concat Op
 // --------------------------------------------------------------------------
@@ -327,6 +418,7 @@ class MklConcatOp : public OpKernel {
     OP_REQUIRES_OK(context, context->status());
   }
 
+
  private:
   typedef struct {
     TensorFormat data_format;
@@ -435,8 +527,284 @@ class MklConcatOp : public OpKernel {
         mkl_tensor->flat<uint8>().data(),
         mkl_tensor->flat<uint8>().size() * sizeof(uint8));
   }
+
+  // overloading methods with input shapes as a list of TensorShape's
+  void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
+                        const TensorShapeList& input_shapes) {
+    CHECK_EQ(values.size(), input_shapes.size());
+
+    std::vector<Tensor> converted_values;
+    for (int i = 0; i < input_shapes.size(); i++) {
+      converted_values.push_back(values[i]);
+    }
+
+    // Call Eigen concat.
+    eigen_concat_op_.Compute(context, converted_values);
+
+    // Set dummy Mkl tensor as output Mkl tensor for this op.
+    MklShape mkl_tensor_mkl_shape;
+    mkl_tensor_mkl_shape.SetMklTensor(false);
+    mkl_tensor_mkl_shape.SetDimensions(4);
+    Tensor* mkl_tensor = nullptr;
+    TensorShape mkl_tensor_tf_shape;
+    mkl_tensor_tf_shape.AddDim(
+        SIZE_OF_MKL_SERIAL_DATA(mkl_tensor_mkl_shape.GetDimension()));
+    int tf_output_index = 0;
+    context->allocate_output(
+        GetTensorMetaDataIndex(tf_output_index, context->num_outputs()),
+        mkl_tensor_tf_shape, &mkl_tensor);
+    mkl_tensor_mkl_shape.SerializeMklShape(
+        mkl_tensor->flat<uint8>().data(),
+        mkl_tensor->flat<uint8>().size() * sizeof(uint8));
+  }
 };
 
+#else
+
+// --------------------------------------------------------------------------
+//                      Mkl Concat Op
+// --------------------------------------------------------------------------
+
+template <typename Device, typename T, AxisArgumentName AxisArgName>
+class MklConcatOp : public OpKernel {
+ private:
+  TensorFormat data_format_;
+  EigenConcatBaseOp<Device, T, AxisArgName> eigen_concat_op_;
+
+ public:
+  typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>
+      ConstMatrixVector;
+
+  explicit MklConcatOp(OpKernelConstruction* c)
+      : OpKernel(c), eigen_concat_op_(c) {}
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      OpInputList input_tensors;
+      GetMklInputList(context, "values", &input_tensors);
+      const int N = input_tensors.size();
+
+      // Get Tensor shapes.
+      std::vector<MklDnnShape> input_shapes(N);
+      GetMklShapeList(context, "values", &input_shapes);
+
+      const Tensor& concat_dim_tensor = (AxisArgName == NAME_IS_CONCAT_DIM)
+                    ? MklGetInput(context, 0) : MklGetInput(context, N);
+      // Sanity checks
+      OP_REQUIRES(context, IsLegacyScalar(concat_dim_tensor.shape()),
+        errors::InvalidArgument(
+            "Concat dim tensor should be a scalar integer, but got shape ",
+            concat_dim_tensor.shape().DebugString()));
+      int32 concat_dim = internal::SubtleMustCopy(
+                           concat_dim_tensor.scalar<int32>()());
+      if (concat_dim < 0) concat_dim = N + concat_dim;
+
+      // check that ranks of all tensors match
+      // and that their shapes match except for concat_dim.
+      int i = 0;
+      bool invoke_eigen = false;
+      bool are_all_mkl_inputs = true, are_all_tf_inputs = true;
+      const TensorShape expected_shape = input_shapes[0].IsMklTensor() ?
+                                         input_shapes[0].GetTfShape() :
+                                         input_tensors[0].shape();
+      size_t expected_dims = expected_shape.dims();
+      for (auto& s : input_shapes) {
+        if (s == expected_shape) {++i; continue;}
+
+        TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() :
+                      input_tensors[i].shape();
+        size_t s_dims = s_shape.dims();
+
+        OP_REQUIRES(context, s_dims == expected_dims,
+                  errors::InvalidArgument(
+                      "_MklConcatOp : Ranks of all input tensors should match:"
+                      " input dimensions = ",
+                      s_dims, " vs. expected rank = ", expected_dims));
+
+        for (int d = 0; d < expected_dims; ++d) {
+          if (d == concat_dim) continue;
+
+          size_t expected_size = expected_shape.dim_size(d);
+          size_t s_size = s_shape.dim_size(d);
+          OP_REQUIRES(
+            context, expected_size == s_size,
+            errors::InvalidArgument("_MklConcatOp : Dimensions of inputs "
+                    "should match: shape[0][", d, "]= ", expected_size,
+                    " vs. shape[", i, "][", d, "] = ", s_size));
+        }
+
+        if (s.IsMklTensor())
+          are_all_tf_inputs = false;
+        else
+          are_all_mkl_inputs = false;
+
+        if (s_dims != 4) invoke_eigen = true;
+        ++i;
+      }
+
+      // All inputs are not in one format (TF or MKL). This is mixed input case.
+      // We can potentially optimize this case by converting all TF inputs
+      // to Mkl format. But currently, we fall to Eigen for this case.
+      // It may be possible to convert inputs that in TF format to Mkl
+      // format and avoid calling eigen version.
+      if (!are_all_tf_inputs && !are_all_mkl_inputs) invoke_eigen = true;
+
+      // Temporary fallback to Eigen until MKLDNN Concat performance
+      // is improved. To be removed.
+      invoke_eigen = true;
+
+      // Call Eigen library
+      if (invoke_eigen) {
+        TensorShapeList tf_input_shapes;
+        i = 0;
+        for (auto& s : input_shapes) {
+          TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() :
+                                input_tensors[i].shape();
+          tf_input_shapes.push_back(s_shape);
+          ++i;
+        }
+        CallEigenVersion(context, input_tensors, tf_input_shapes);
+        return;
+      }
+
+      memory::dims dst_dims;
+      if (are_all_mkl_inputs)
+        dst_dims = TFShapeToMklDnnDims(input_shapes[0].GetTfShape());
+      else
+        // When all the inputs are in Tensorflow format, we don't know
+        // what is the input data format. In that case, we just use
+        // output format that is same as input formats.
+        dst_dims = TFShapeToMklDnnDims(input_tensors[0].shape());
+
+      std::vector<memory::primitive_desc> srcs_pd;
+      std::vector<MklDnnData<T>> srcs(N, MklDnnData<T>(&cpu_engine));
+      int64 dst_concat_dim_size = 0;
+      for (int k =0; k < N; k++) {
+        bool is_mkl_tensor = input_shapes[k].IsMklTensor();
+        memory::dims src_dims;
+
+        // Same comment as dst_dims for src_dims.
+        src_dims = (is_mkl_tensor) ?
+                   TFShapeToMklDnnDims(input_shapes[k].GetTfShape()) :
+                   TFShapeToMklDnnDims(input_tensors[k].shape());
+
+        dst_concat_dim_size += src_dims[concat_dim];
+        auto src_md = is_mkl_tensor ? input_shapes[k].GetMklLayout() :
+          // It does not matter what data format we use here (NHWC or NCHW).
+          // We just need to ensure that output of Concat uses same data format
+          // as input.
+                  memory::desc(src_dims, MklDnnType<T>(), memory::format::nhwc);
+
+        srcs[k].SetUsrMem(src_md, &input_tensors[k]);
+        auto src_mpd = srcs[k].GetUsrMemPrimDesc();
+        srcs_pd.push_back(src_mpd);
+      }
+      dst_dims[concat_dim] = dst_concat_dim_size;
+
+      MklDnnData<T> dst(&cpu_engine);
+      memory::desc dst_md({}, memory::data_undef, memory::format_undef);
+      memory::dims dst_dims_in_nchw;
+      if (are_all_mkl_inputs) {
+        // Since we are passing a specific format for destination,
+        // we need to have dst_dims in MklDnn order (NCHW).
+        auto orig_tf_format = input_shapes[0].GetTfDataFormat();
+        dst_dims_in_nchw = MklDnnDimsInNCHW(dst_dims,
+                               MklDnnDataFormatToTFDataFormat(orig_tf_format));
+        // We will set the output in the same format as input to avoid layout
+        // conversions.
+        // Currently we are setting dst format same as input format.
+        // See if we can make this choice in a better way.
+        dst_md = memory::desc(dst_dims_in_nchw, MklDnnType<T>(),
+                 (memory::format) input_shapes[0].GetMklLayout().data.format);
+      } else {
+        // Again, format does not matter here. We just need to make it same as
+        // input format.
+        dst_md = memory::desc(dst_dims, MklDnnType<T>(), memory::format::nhwc);
+      }
+
+      std::vector<primitive::at> inputs;
+      for (int k=0; k < input_tensors.size(); k++)
+        inputs.push_back(srcs[k].GetOpMem());
+
+      // If all inputs are in MKL format, then meaning of concat_dim needs to
+      // change. Value of concat_dim is tied to input Tensorflow data format
+      // (NHWC or NCHW). MklDnn dimensions are in NCHW order. So if Tensorflow
+      // tensors are in NCHW order, then concat_dim semantics is preserved.
+      // But ifinput tensors are in NHWC order, then semantics need to change.
+      // E.g., if we are concatinating over Channel (dimension 3 for NHWC),
+      // then since MklDnn order is NCHW, concat_dim needs to be 1.
+      if (are_all_mkl_inputs)
+        concat_dim = input_shapes[0].TfDimIdx(concat_dim);
+
+      auto concat_pd = concat::primitive_desc(dst_md, concat_dim, srcs_pd);
+
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      Tensor* dst_tensor = nullptr;
+      if (are_all_mkl_inputs) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = concat_pd.dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dst_dims.size(), dst_dims_in_nchw,
+                                  input_shapes[0].GetTfDataFormat());
+        tf_shape_dst.AddDim((dst_pd.get_size() / sizeof(T)));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = MklDnnDimsToTFShape(dst_dims);
+      }
+      AllocateOutputSetMklShape(context, 0, &dst_tensor,
+                                tf_shape_dst, dnn_shape_dst);
+      CHECK_NOTNULL(dst_tensor);
+
+      dst_md = dnn_shape_dst.IsMklTensor() ?
+               dnn_shape_dst.GetMklLayout() : dst_md;
+      dst.SetUsrMem(dst_md, dst_tensor);
+
+      auto concat_op = concat(concat_pd, inputs, dst.GetOpMem());
+      std::vector<primitive> net;
+      net.push_back(concat_op);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+               ", message: " + string(e.message) + ", in file " +
+               string(__FILE__) + ":" + std::to_string(__LINE__);
+        OP_REQUIRES_OK(context, errors::Aborted(
+                "Operation received an exception:", error_msg));
+    }
+  }
+
+  void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
+                        const TensorShapeList& input_shapes) {
+    CHECK_EQ(values.size(), input_shapes.size());
+
+    std::vector<Tensor> converted_values;
+    for (int i = 0; i < input_shapes.size(); i++)
+      converted_values.push_back(values[i]);
+
+    // Call Eigen concat.
+    eigen_concat_op_.Compute(context, converted_values, input_shapes);
+
+    // Set output Mkl tensor for this op.
+    MklDnnShape dnn_shape_output;
+    dnn_shape_output.SetMklTensor(false);
+    dnn_shape_output.SetDimensions(4);
+    Tensor* output_tensor = nullptr;
+    TensorShape tf_shape_output;
+    tf_shape_output.AddDim(
+        dnn_shape_output.GetSerializeBufferSize());
+    context->allocate_output(
+        GetTensorMetaDataIndex(0, context->num_outputs()),
+        tf_shape_output, &output_tensor);
+    dnn_shape_output.SerializeMklDnnShape(
+        output_tensor->flat<uint8>().data(),
+        output_tensor->flat<uint8>().size() * sizeof(uint8));
+  }
+};
+
+#endif
+
 /* Use optimized concat for float type only */
 #define REGISTER_MKL_CPU(type)                                              \
   REGISTER_KERNEL_BUILDER(Name("_MklConcat")                                \
diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index f291281108..793fa24d99 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -47,11 +47,8 @@ limitations under the License.
 
 using mkldnn::stream;
 using mkldnn::prop_kind;
-
-using mkldnn::convolution_forward;
 using mkldnn::convolution_backward_weights;
-using mkldnn::convolution_direct;
-
+using mkldnn::memory;
 #endif
 
 namespace tensorflow {
@@ -426,183 +423,229 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
   TensorFormat data_format_;
 };
 
+#define REGISTER_MKL_FILTER_KERNELS(T)                              \
+  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
+TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
+#undef REGISTER_MKL_FILTER_KERNELS
+
 #else
 
-template <typename Device, class T>
-class MklConv2DCustomBackpropFilterOp : public OpKernel {
+template <typename Device, class T, bool biasEnabled>
+class MklConv2DCustomBackpropFilterOp :
+  public MklConv2DBackpropCommonOp<Device, T> {
  public:
   explicit MklConv2DCustomBackpropFilterOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string data_format;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
-    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
+      : MklConv2DBackpropCommonOp<Device, T>(context) { }
+  ~MklConv2DCustomBackpropFilterOp() {}
 
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    int stride_n = GetTensorDim(strides_, data_format_, 'N');
-    int stride_c = GetTensorDim(strides_, data_format_, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ private:
+  void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                         const MklDnnShape& filter_mkl_shape,
+                         const MklDnnShape& obp_mkl_shape) {
+    CHECK(!filter_mkl_shape.IsMklTensor())
+      << "Conv2DBackpropFilter: filter should not be in MKL Layout";
   }
 
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
+  size_t GetInputTensorIndexWithSizes() { return 1; /* filter index */ }
 
-      MklDnnData<T> input(&cpu_engine);
-      MklDnnData<T> outbackprop(&cpu_engine);
-      MklDnnData<T> output(&cpu_engine);
+  TensorShape MakeInputTfShape(OpKernelContext* context,
+                               const Tensor& input_tensor) {
+    size_t input_idx = 0;
+    return GetTfShape(context, input_idx);
+  }
 
-      // Input tensors
-      const Tensor& input_tensor = MklGetInput(context, 0);
-      const Tensor& filter_tensor = MklGetInput(context, 1);
-      const Tensor& obp_tensor = MklGetInput(context, 2);  // Outbackprop
+  TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                const Tensor& filter_tensor) {
+    TensorShape filter_tf_shape;
+    CHECK_EQ(TensorShapeUtils::IsVector(filter_tensor.shape()), true);
+    CHECK_EQ(TensorShapeUtils::MakeShape(
+             filter_tensor.vec<int32>(), &filter_tf_shape).ok(), true);
+    return filter_tf_shape;
+  }
 
-      // Generate input shapes.
-      TensorShape filter_shape;
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()),
-        errors::InvalidArgument(
-              "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ",
-              filter_tensor.dims()));
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                        filter_tensor.vec<int32>(), &filter_shape));
-      TensorShape input_shape = input_tensor.shape();
-      TensorShape obp_shape = obp_tensor.shape();
-
-      // By default, all dims are in MKL order. Only dims in TF order
-      // are those with prefix tf_order.
-      memory::dims obp_dims, fwd_input_dims, fwd_filter_dims;
-      memory::dims padding_l, padding_r, strides, fwd_output_dims;
-      memory::dims fwd_output_dims_tf_order;
-
-      // Get forward convolution parameters.
-      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
-      if (!context->status().ok()) return;
-
-      // Create Convolution forward descriptor since Convolution backward
-      // API needs it. For that, we first need to create input, filter
-      // and output memory descriptors.
-      auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                        memory::format::hwio);
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
-      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
-
-      // Allocate output tensor and shape
-      // TODO(nhasabni): Update this when support for MKL layout is added.
-      // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D.
-      TensorShape tf_output_shape(filter_shape);
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      Tensor* output_tensor = nullptr;
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
-
-      // Create memory for user data.
-      // Describe how the inputs and outputs of Convolution look like. Also
-      // specify buffers containing actual input and output data.
-      // Although input shape required is in MKL-DNN order, the layout is
-      // Tensorflow's layout (NHWC or NCHW depending on data format).
-      input.SetUsrMem(fwd_input_dims, mkl_data_format, &input_tensor);
-      // Outbackprop shape is NHWC or NCHW depending on data format. Since
-      // GetInputSizeInMklOrder function returns size in that order we just use
-      // use that function directly.
-      conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims);
-      if (!context->status().ok()) return;
-      outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor);
-      // Although output shape required is in MKL-DNN order,
-      // layout is Tensorflow's filter layout (HWIO)
-      // Shape of output of Conv2DBackpropInput is same as shape of filter.
-      memory::dims bwd_output_dims = fwd_filter_dims;
-      output.SetUsrMem(bwd_output_dims, memory::format::hwio, output_tensor);
-
-      // Create memory descriptors for convolution data w/ no specified format.
-      input.SetOpMemDesc(fwd_input_dims, memory::format::any);
-      outbackprop.SetOpMemDesc(obp_dims, memory::format::any);
-      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
-
-      // Create convolution backward weights primitive.
-      auto bwd_desc = convolution_backward_weights::desc(convolution_direct,
-                          input.GetOpMemDesc(), output.GetOpMemDesc(),
-                          outbackprop.GetOpMemDesc(), strides, padding_l,
-                          padding_r, TFPaddingToMklDnnPadding(padding_));
-
-      auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
-                                                              cpu_engine,
-                                                              fwd_pd);
-
-      PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) {
+    // Shape of output of Conv2DBackpropFilter is same as shape of filter.
+    return fwd_filter_dims;
+  }
+
+  memory::format GetOutputFormat(const memory::format data_format) {
+    // Output layout is Tensorflow's filter layout (HWIO).
+    return memory::format::hwio;
+  }
+
+  void CreatePrimitive(OpKernelContext* context,
+                       const engine& cpu_engine,
+                       const convolution_forward::primitive_desc& conv_fwd_pd,
+                       MklDnnData<T>* input, MklDnnData<T>* filter,
+                       MklDnnData<T>* outbackprop, MklDnnData<T>* output,
+                       Tensor** output_tensor,
+                       const memory::dims& strides,
+                       const memory::dims& padding_l,
+                       const memory::dims& padding_r,
+                       padding_kind padding,
+                       const memory::dims& bwd_output_dims,
+                       memory::format bwd_output_format) {
+    CHECK_NOTNULL(context);
+    CHECK_NOTNULL(input);
+    CHECK_NOTNULL(filter);
+    CHECK_NOTNULL(outbackprop);
+    CHECK_NOTNULL(output);
+    CHECK_NOTNULL(output_tensor);
+
+    MklDnnData<T>* bias_grad = nullptr;
+    int depth = 0;
+    if (biasEnabled) {
+      // Data structure for bias_grad
+      bias_grad = new MklDnnData<T> (&cpu_engine);
+      TensorShape obp_tf_shape = GetTfShape(context, 2);
+      depth = (MklConv2DBackpropCommonOp<Device, T>::GetTFDataFormat()
+                == FORMAT_NCHW) ?
+          obp_tf_shape.dim_size(1) : obp_tf_shape.dim_size(3);
+      memory::dims bias_grad_dims = {depth};
+      bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x);
+    }
+
+    // Create convolution backward weights primitive.
+    auto bwd_desc = (biasEnabled && (bias_grad != nullptr))?
+        convolution_backward_weights::desc(convolution_direct,
+                                input->GetOpMemDesc(), output->GetOpMemDesc(),
+                                bias_grad->GetOpMemDesc(),
+                                outbackprop->GetOpMemDesc(), strides, padding_l,
+                                padding_r, padding) :
+        convolution_backward_weights::desc(convolution_direct,
+                          input->GetOpMemDesc(), output->GetOpMemDesc(),
+                          outbackprop->GetOpMemDesc(), strides, padding_l,
+                          padding_r, padding);
+
+    auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
+                                                            cpu_engine,
+                                                            conv_fwd_pd);
+
+    // Allocate output tensor.
+    AllocateOutputTensor(context, bwd_pd, bwd_output_dims,
+                         bwd_output_format, output_tensor);
+
+    CHECK_NOTNULL(*output_tensor);
+    // Set buffer handle using allocated output tensor.
+    output->SetUsrMemDataHandle(*output_tensor);
+
+    if (biasEnabled && (bias_grad != nullptr)) {
+      // Allocate bias_grad tensor
+      TensorShape bias_grad_shape({depth});
+      Tensor* bias_grad_tensor = nullptr;
+      AllocateBiasGradTensor(context, bias_grad_shape, &bias_grad_tensor);
+      memory::dims bias_grad_dims = {depth};
+      // Since Bias is 1D, we use format::x from MKLDNN to represent it.
+      auto bias_grad_md = memory::desc({bias_grad_dims}, MklDnnType<T>(),
+                                       memory::format::x);
+      bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor);
+      bias_grad->SetUsrMemDataHandle(bias_grad_tensor);
+    }
+
+    if (biasEnabled && (bias_grad != nullptr)) {
+      PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, bias_grad);
+    } else {
+      PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output);
     }
   }
 
- private:
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_;
+  // Allocate output tensor.
+  void AllocateOutputTensor(OpKernelContext* context,
+                  const convolution_backward_weights::primitive_desc& conv_pd,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+
+      // For BackpropFilter, we convert the output tensor back in Tensorflow
+      // layout. Because typically, BackpropFilter is the last operator in the
+      // graph that emit filter gradient that is provided to ApplyGradient
+      // method to update the filter. But it may be possible to eliminate this
+      // by forwarding filter in MKL layout if we support ApplyGradient method
+      // for MKL layout propagation.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(false);
+      // output_dims_mkl_order is in OIHW format.
+      // Allocate shape of TF tensor in HWIO format.
+      TensorShape output_tf_shape({output_dims_mkl_order[MklDnnDims::Dim_H],
+                                   output_dims_mkl_order[MklDnnDims::Dim_W],
+                                   output_dims_mkl_order[MklDnnDims::Dim_I],
+                                   output_dims_mkl_order[MklDnnDims::Dim_O]});
+      AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
+                                output_mkl_shape);
+  }
+
+  // Allocate tensor for bias grad
+  void AllocateBiasGradTensor(OpKernelContext* context,
+                              const TensorShape& bias_grad_shape,
+                              Tensor** bias_grad_tensor) {
+    CHECK_NOTNULL(bias_grad_tensor);
+
+    MklDnnShape bias_grad_mkl_shape;
+    bias_grad_mkl_shape.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, 1, bias_grad_tensor, bias_grad_shape,
+                              bias_grad_mkl_shape);
+  }
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
                   const convolution_backward_weights::primitive_desc& conv_pd,
                   MklDnnData<T>* input, MklDnnData<T>* obp,
-                  MklDnnData<T>* output) {
+                  MklDnnData<T>* output, MklDnnData<T>* bias_grad = nullptr) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
     input->CheckReorderToOpMem(conv_pd.src_primitive_desc(), &net);
     obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
+    // For BackpropFilter, we convert the output tensor back in Tensorflow
+    // layout.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
                                       conv_pd.diff_weights_primitive_desc());
 
-    net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
-                                    obp->GetOpMem(), output->GetOpMem()));
+    if (biasEnabled && (bias_grad != nullptr)) {
+      net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                      obp->GetOpMem(), output->GetOpMem(),
+                                      bias_grad->GetOpMem()));
+    } else {
+      net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                      obp->GetOpMem(), output->GetOpMem()));
+    }
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
     if (output_reorder_required) {
       output->InsertReorderToUserMem(&net);
     }
 
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
-#endif
 
 #define REGISTER_MKL_FILTER_KERNELS(T)                              \
   REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
                               .Device(DEVICE_CPU)                   \
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T, false>);\
+  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilterWithBias")  \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T, true>); \
+  REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DBackpropFilterWithBias")  \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklDummyOp<CPUDevice, T>);
 
 TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
 #undef REGISTER_MKL_FILTER_KERNELS
+
+#endif  // INTEL_MKL_DNN
+
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index 4a47d0463e..df51df9638 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -49,9 +49,6 @@ limitations under the License.
 
 using mkldnn::stream;
 using mkldnn::prop_kind;
-
-using mkldnn::convolution_forward;
-using mkldnn::convolution_direct;
 using mkldnn::convolution_backward_data;
 #endif
 
@@ -362,143 +359,117 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 #else
 
 template <typename Device, class T>
-class MklConv2DCustomBackpropInputOp : public OpKernel {
+class MklConv2DCustomBackpropInputOp :
+  public MklConv2DBackpropCommonOp<Device, T> {
  public:
-  ~MklConv2DCustomBackpropInputOp() {}
   explicit MklConv2DCustomBackpropInputOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string data_format_str;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
-    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    int stride_n = GetTensorDim(strides_, data_format_, 'N');
-    int stride_c = GetTensorDim(strides_, data_format_, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
+      : MklConv2DBackpropCommonOp<Device, T>(context) { }
+  ~MklConv2DCustomBackpropInputOp() {}
 
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ private:
+  void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                         const MklDnnShape& filter_mkl_shape,
+                         const MklDnnShape& obp_mkl_shape) {
+    // Tensor that feeds to 'Input' slot of BackpropInput is always just a shape
+    // of the Tensor and never an actual tensor. So it will never be in MKL
+    // layout.
+    CHECK(!input_mkl_shape.IsMklTensor())
+      << "Conv2DBackpropInput: input should not be in MKL Layout";
   }
 
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
+  size_t GetInputTensorIndexWithSizes() { return 0; /* input index */ }
 
-      MklDnnData<T> filter(&cpu_engine);
-      MklDnnData<T> outbackprop(&cpu_engine);
-      MklDnnData<T> output(&cpu_engine);
+  TensorShape MakeInputTfShape(OpKernelContext* context,
+                               const Tensor& input_tensor) {
+    TensorShape input_tf_shape;
+    CHECK_EQ(TensorShapeUtils::IsVector(input_tensor.shape()), true);
+    CHECK_EQ(TensorShapeUtils::MakeShape(input_tensor.vec<int32>(),
+                                         &input_tf_shape).ok(), true);
+    return input_tf_shape;
+  }
 
-      // Input tensors
-      const Tensor& input_tensor = MklGetInput(context, 0);
-      const Tensor& filter_tensor = MklGetInput(context, 1);
-      const Tensor& obp_tensor = MklGetInput(context, 2);  // Outbackprop
+  TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                const Tensor& filter_tensor) {
+    size_t filter_idx = 1;
+    return GetTfShape(context, filter_idx);
+  }
 
-      // Generate input shape.
-      TensorShape input_shape;
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
-        errors::InvalidArgument(
-              "Conv2DBackpropInput: input_sizes input must be 1-dim, not ",
-              input_tensor.dims()));
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                        input_tensor.vec<int32>(), &input_shape));
-      TensorShape filter_shape = filter_tensor.shape();
-      TensorShape obp_shape = obp_tensor.shape();
-
-      // By default, all dims are in MKL order. Only dims in TF order
-      // are those with prefix tf_order.
-      memory::dims obp_dims, fwd_input_dims, fwd_filter_dims;
-      memory::dims padding_l, padding_r, strides, fwd_output_dims;
-      memory::dims fwd_output_dims_tf_order;
-
-      // Get forward convolution parameters.
-      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
-      if (!context->status().ok()) return;
-
-      // Create Convolution forward descriptor since Convolution backward
-      // API needs it. For that, we first need to create input, filter
-      // and output memory descriptors.
-      auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                        memory::format::hwio);
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
-      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
-
-      // Allocate output tensor and shape
-      // TODO(nhasabni): Update this when support for MKL layout is added.
-      // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D.
-      TensorShape tf_output_shape(input_shape);
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      Tensor* output_tensor = nullptr;
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
-
-      // Create memory for user data.
-      // Describe how the inputs and outputs of Convolution look like. Also
-      // specify buffers containing actual input and output data.
-      // Although input shape required is in MKL-DNN order, the layout is
-      // Tensorflow's layout (NHWC or NCHW depending on data format).
-      // Although filter shape (filter_dims) required is in MKL-DNN order,
-      // the layout is Tensorflow's layout (HWIO).
-      // Shape of Conv2DBackpropInput's filter is same as that of Conv2D filter.
-      filter.SetUsrMem(fwd_filter_dims, memory::format::hwio, &filter_tensor);
-      // Outbackprop shape is NHWC or NCHW depending on data format. Since
-      // GetInputSizeInMklOrder function returns size in that order we just use
-      // use that function directly.
-      conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims);
-      if (!context->status().ok()) return;
-      outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor);
-      // Although output shape required is in MKL-DNN order,
-      // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
-      // Shape of output of Conv2DBackpropInput is same as shape of 'input'
-      // of Conv2D.
-      memory::dims bwd_output_dims = fwd_input_dims;
-      output.SetUsrMem(bwd_output_dims, mkl_data_format, output_tensor);
-
-      // Create memory descriptors for convolution data w/ no specified format.
-      filter.SetOpMemDesc(fwd_filter_dims, memory::format::any);
-      outbackprop.SetOpMemDesc(obp_dims, memory::format::any);
-      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
-
-      // Create convolution backward data primitive.
-      auto bwd_desc = convolution_backward_data::desc(convolution_direct,
-                          output.GetOpMemDesc(), filter.GetOpMemDesc(),
-                          outbackprop.GetOpMemDesc(), strides, padding_l,
-                          padding_r, TFPaddingToMklDnnPadding(padding_));
-
-      auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
-                                                              cpu_engine,
-                                                              fwd_pd);
-
-      PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
-    }
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) {
+    // Output Shape of Conv2DBackpropInput is same as shape of Conv2D 'input'.
+    return fwd_input_dims;
   }
 
- private:
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_;
+  memory::format GetOutputFormat(const memory::format data_format) {
+    // Output layout is Tensorflow's layout in data format order.
+    return data_format;
+  }
+
+  void CreatePrimitive(OpKernelContext* context,
+                       const engine& cpu_engine,
+                       const convolution_forward::primitive_desc& conv_fwd_pd,
+                       MklDnnData<T>* input, MklDnnData<T>* filter,
+                       MklDnnData<T>* outbackprop, MklDnnData<T>* output,
+                       Tensor** output_tensor,
+                       const memory::dims& strides,
+                       const memory::dims& padding_l,
+                       const memory::dims& padding_r,
+                       padding_kind padding,
+                       const memory::dims& bwd_output_dims,
+                       memory::format bwd_output_format) {
+    CHECK_NOTNULL(context);
+    CHECK_NOTNULL(input);
+    CHECK_NOTNULL(filter);
+    CHECK_NOTNULL(outbackprop);
+    CHECK_NOTNULL(output);
+    CHECK_NOTNULL(output_tensor);
+
+    // Create convolution backward data primitive.
+    auto bwd_desc = convolution_backward_data::desc(convolution_direct,
+                      output->GetOpMemDesc(), filter->GetOpMemDesc(),
+                      outbackprop->GetOpMemDesc(), strides, padding_l,
+                      padding_r, padding);
+
+    auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
+                                                          cpu_engine,
+                                                          conv_fwd_pd);
+
+
+    // Allocate output tensor in TensorFlow and MKL layout.
+    AllocateOutputTensor(context, bwd_pd, bwd_output_dims,
+                         bwd_output_format, output_tensor);
+    CHECK_NOTNULL(*output_tensor);
+    // Set buffer handle using allocated output tensor.
+    output->SetUsrMemDataHandle(*output_tensor);
+
+    PrepareAndExecutePrimitive(bwd_pd, filter, outbackprop, output);
+  }
+
+  // Allocate output tensor.
+  void AllocateOutputTensor(OpKernelContext* context,
+                  const convolution_backward_data::primitive_desc& conv_pd,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+
+      // Output primitive descriptor for backward data is diff_src.
+      auto dst_pd = conv_pd.diff_src_primitive_desc();
+
+      // Allocate shape of Mkl tensor.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(true);
+      output_mkl_shape.SetMklLayout(&dst_pd);
+      output_mkl_shape.SetElemType(MklDnnType<T>());
+      output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                                   output_dims_mkl_order, output_tf_format);
+
+      // Allocate shape of TF tensor.
+      TensorShape output_tf_shape;
+      output_tf_shape.AddDim(dst_pd.get_size() / sizeof(T));
+
+      AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
+                                output_mkl_shape);
+  }
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
@@ -511,22 +482,9 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
     filter->CheckReorderToOpMem(conv_pd.weights_primitive_desc(), &net);
     obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
-    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_pd.diff_src_primitive_desc());
-
     net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(),
                                     filter->GetOpMem(), output->GetOpMem()));
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
-    if (output_reorder_required) {
-      output->InsertReorderToUserMem(&net);
-    }
-
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index a9872b8d6d..04268f23bb 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -40,8 +40,7 @@ limitations under the License.
 #include "tensorflow/core/util/tensor_format.h"
 
 #include "tensorflow/core/util/mkl_util.h"
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
+
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
@@ -51,6 +50,9 @@ using mkldnn::prop_kind;
 
 using mkldnn::convolution_forward;
 using mkldnn::convolution_direct;
+#else
+#include "mkl_dnn.h"
+#include "mkl_dnn_types.h"
 #endif
 
 namespace tensorflow {
@@ -288,10 +290,8 @@ class MklConv2DOp : public OpKernel {
     mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd,
                                              dnnResourceFilter);
 
-    size_t filter_sizes[4] = {static_cast<size_t>(filter.dim_size(0)),
-                              static_cast<size_t>(filter.dim_size(1)),
-                              static_cast<size_t>(filter.dim_size(2)),
-                              static_cast<size_t>(filter.dim_size(3))};
+    size_t filter_sizes[4] = {filter.dim_size(0), filter.dim_size(1),
+                              filter.dim_size(2), filter.dim_size(3)};
     mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes,
                                             mkl_context.filter_strides);
 
@@ -514,6 +514,12 @@ class MklConv2DOp : public OpKernel {
       const Tensor& src_tensor = MklGetInput(context, src_idx);
       const Tensor& filter_tensor = MklGetInput(context, filter_idx);
 
+      MklDnnShape src_mkl_shape, filter_mkl_shape;
+      GetMklShape(context, src_idx, &src_mkl_shape);
+      GetMklShape(context, filter_idx, &filter_mkl_shape);
+      CHECK(!filter_mkl_shape.IsMklTensor())
+        << "Conv2D filter should not be in MKL Layout";
+
       MklDnnData<T> src(&cpu_engine);
       MklDnnData<T> filter(&cpu_engine);
       MklDnnData<T> output(&cpu_engine);
@@ -523,8 +529,9 @@ class MklConv2DOp : public OpKernel {
 
       // Get shapes of input tensors in MKL-DNN order
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(),
-                                         filter_tensor.shape(),
+      auto src_tf_shape = GetTfShape(context, src_idx);
+      auto filter_tf_shape = GetTfShape(context, filter_idx);
+      conv_utl.GetConvFwdSizesInMklOrder(src_tf_shape, filter_tf_shape,
                                          &src_dims, &filter_dims, &strides,
                                          &output_dims_tf_order,
                                          &output_dims_mkl_order, &padding_l,
@@ -532,58 +539,47 @@ class MklConv2DOp : public OpKernel {
       if (!context->status().ok()) return;
 
       // Check for corner case - if there is nothing to compute, return.
-      TensorShape tf_output_shape({output_dims_tf_order[0],
-                                output_dims_tf_order[1],
-                                output_dims_tf_order[2],
-                                output_dims_tf_order[3]});
-      Tensor* output_tensor = nullptr;
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
+      TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order);
 
       // Forward filter in TF format from input at index 1 to output at index 1.
       ForwardTfTensorInToOut(context, 1, 1);
 
-      if (tf_output_shape.num_elements() == 0) {
+      // Corner cases: output with 0 elements and 0 batch size.
+      Tensor* output_tensor = nullptr;
+      if (output_tf_shape.num_elements() == 0 ||
+          output_dims_tf_order[0] == 0) {
         // TODO(jbobba): Verify correctness here
         //               Need semantics for Null MKL tensor
+        MklDnnShape output_mkl_shape;
+        output_mkl_shape.SetMklTensor(false);
+        AllocateOutputSetMklShape(context, 0, &output_tensor, src_tf_shape,
+                                output_mkl_shape);
         return;
       }
 
-      // Corner case to handle 0 batch size.
-      if (output_dims_tf_order[0] == 0) {
-        // Nothing to do, allocate output tensor and return
-        // TODO(nhasabni): remove this code later once serialization
-        // in MKL-DNN is supported.
-        AllocateOutputSetMklShape(context, 0, &output_tensor,
-                                  src_tensor.shape(), mkl_output_mkl_shape);
-        return;
-      } else {
-        // Otherwise regular output tensor allocation
-        // Allocate output tensor.
-      }
-      CHECK_NOTNULL(output_tensor);
-
       // Create memory for user data.
       // Describe how the inputs and outputs of Convolution look like. Also
       // specify buffers containing actual input and output data.
-      // Although input shape (src_dims) required is in MKL-DNN order,
-      // the layout is Tensorflow's layout (NHWC or NCHW depending on data
-      // format).
-      src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_),
-                    const_cast<void*>(static_cast<const void*>(
-                    src_tensor.flat<T>().data())));
+      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
+      // If input is in MKL layout, then simply grab input layout; otherwise,
+      // construct input Tf layout. For TF layout, although input shape
+      // (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
+      // layout (NHWC or NCHW depending on data format).
+      auto src_md = src_mkl_shape.IsMklTensor()
+                    ? src_mkl_shape.GetMklLayout()
+                    : memory::desc(src_dims, MklDnnType<T>(), tf_fmt);
+      src.SetUsrMem(src_md, &src_tensor);
       // Although filter shape (filter_dims) required is in MKL-DNN order,
       // the layout is Tensorflow's layout (HWIO).
-      filter.SetUsrMem(filter_dims, memory::format::hwio,
-                       const_cast<void*>(static_cast<const void*>(
-                       filter_tensor.flat<T>().data())));
-      // Although output shape (output_dims) required is in MKL-DNN order,
-      // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
-      output.SetUsrMem(output_dims_mkl_order,
-                       TFDataFormatToMklDnnDataFormat(data_format_),
-                       output_tensor->flat<T>().data());
+      auto filter_md = filter_mkl_shape.IsMklTensor()
+                    ? filter_mkl_shape.GetMklLayout()
+          : memory::desc(filter_dims, MklDnnType<T>(), memory::format::hwio);
+      filter.SetUsrMem(filter_md, &filter_tensor);
+      // Set output shape (output_dims) required in MKL-DNN order.
+      // Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
+      // depending on data format). But later we propagate Mkl layout of the
+      // output to the next op directly.
+      output.SetUsrMem(output_dims_mkl_order, tf_fmt);
 
       // Create memory descriptors for convolution data w/ no specified format.
       src.SetOpMemDesc(src_dims, memory::format::any);
@@ -596,9 +592,7 @@ class MklConv2DOp : public OpKernel {
         memory::dims bias_size;
         conv_utl.GetBiasSizeInMklOrder(2 /* bias idx */, &bias_size);
         const Tensor& bias_tensor = MklGetInput(context, 2);
-        bias.SetUsrMem(bias_size, memory::format::x,
-                       const_cast<void*>(static_cast<const void*>(
-                       bias_tensor.flat<T>().data())));
+        bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
         bias.SetOpMemDesc(bias_size, memory::format::any);
 
         // Create convolution primitive with Bias.
@@ -609,6 +603,10 @@ class MklConv2DOp : public OpKernel {
 
         auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                 cpu_engine);
+        AllocateOutputTensor(context, conv_prim_desc,
+                             output_dims_mkl_order, tf_fmt, &output_tensor);
+        // Set data handle for output.
+        output.SetUsrMemDataHandle(output_tensor);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output);
       } else {
         // Create convolution primitive without Bias.
@@ -619,6 +617,10 @@ class MklConv2DOp : public OpKernel {
 
         auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                 cpu_engine);
+        AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order,
+                             tf_fmt, &output_tensor);
+        // Set data handle for output.
+        output.SetUsrMemDataHandle(output_tensor);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output);
       }
     } catch (mkldnn::error &e) {
@@ -636,23 +638,44 @@ class MklConv2DOp : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
 
+  // Allocate output tensor.
+  void AllocateOutputTensor(
+                  OpKernelContext* context,
+                  const convolution_forward::primitive_desc& conv_prim_desc,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+      auto dst_pd = conv_prim_desc.dst_primitive_desc();
+
+      // Allocate shape of Mkl tensor.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(true);
+      output_mkl_shape.SetMklLayout(&dst_pd);
+      output_mkl_shape.SetElemType(MklDnnType<T>());
+      output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                                   output_dims_mkl_order, output_tf_format);
+
+      // Allocate shape of TF tensor.
+      TensorShape output_tf_shape;
+      output_tf_shape.AddDim((dst_pd.get_size() / sizeof(T)));
+
+      const int kOutputSlotIdx = 0;
+      AllocateOutputSetMklShape(context, kOutputSlotIdx, output_tensor,
+                                output_tf_shape, output_mkl_shape);
+  }
+
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecuteNet(
                   const convolution_forward::primitive_desc& conv_prim_desc,
                   MklDnnData<T>* src, MklDnnData<T>* filter,
                   MklDnnData<T>* bias, MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
-    // add it to the net before convolution.
+    // add it to the net before convolution. No need to check for output
+    // reorder as we propagate output layout to the next layer.
     std::vector<primitive> net;
     src->CheckReorderToOpMem(conv_prim_desc.src_primitive_desc(), &net);
     filter->CheckReorderToOpMem(conv_prim_desc.weights_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
-    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_prim_desc.dst_primitive_desc());
-
     // Create convolution primitive and add it to net.
     if (bias) {
       CHECK_EQ(biasEnabled, true);
@@ -665,13 +688,6 @@ class MklConv2DOp : public OpKernel {
                                     filter->GetOpMem(), output->GetOpMem()));
     }
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
-    if (output_reorder_required) {
-      output->InsertReorderToUserMem(&net);
-    }
-
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
@@ -688,7 +704,12 @@ class MklConv2DOp : public OpKernel {
                               .Device(DEVICE_CPU)                   \
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DOp<CPUDevice, T, true>);
+                          MklConv2DOp<CPUDevice, T, true>);         \
+  REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DWithBias")          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklDummyOp<CPUDevice, T>);
 
 TF_CALL_float(REGISTER_MKL_CPU);
 
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index f0cb37f8a4..47a9b4bfc7 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -41,6 +41,12 @@ limitations under the License.
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+
+using mkldnn::convolution_forward;
+using mkldnn::convolution_direct;
 #endif
 
 namespace tensorflow {
@@ -108,7 +114,13 @@ class MklDnnConvUtil {
   #undef CHECK_BOUNDS
 
     // MKL-DNN always requires input in NCHW format.
-    *input_dims = {input_batch, input_depth, input_rows, input_cols};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_N] = input_batch;
+    mkldnn_sizes[MklDnnDims::Dim_C] = input_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = input_rows;
+    mkldnn_sizes[MklDnnDims::Dim_W] = input_cols;
+
+    *input_dims = mkldnn_sizes;
   }
 
   // Calculate Convolution filter size in MKL-DNN order. MKL-DNN
@@ -156,7 +168,13 @@ class MklDnnConvUtil {
 
     // MKL-DNN always needs filter in OIHW format.
     // OIHW = (out_depth, in_depth, rows, cols)
-    *filter_dims = {out_depth, in_depth, filter_rows, filter_cols};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_O] = out_depth;
+    mkldnn_sizes[MklDnnDims::Dim_I] = in_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = filter_rows;
+    mkldnn_sizes[MklDnnDims::Dim_W] = filter_cols;
+
+    *filter_dims = mkldnn_sizes;
   }
 
   // Calculate Convolution filter size in MKL-DNN order. MKL-DNN
@@ -167,9 +185,9 @@ class MklDnnConvUtil {
   GetFilterSizeInMklOrder(size_t src_index, size_t filter_index,
                           memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
-    const Tensor& input = MklGetInput(context_, src_index);
-    const Tensor& filter = MklGetInput(context_, filter_index);
-    GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims);
+    GetFilterSizeInMklOrder(GetTfShape(context_, src_index),
+                            GetTfShape(context_, filter_index),
+                            filter_dims);
   }
 
   // Calculate Bias size for 2D Convolution. Function does not return
@@ -238,8 +256,12 @@ class MklDnnConvUtil {
     *output_dims_tf_order = TFShapeToMklDnnDims(out_shape);
 
     // MKL-DNN always needs output in NCHW format.
-    *output_dims_mkl_order = {out_batch, out_depth, static_cast<int>(out_rows),
-                   static_cast<int>(out_cols)};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_N] = out_batch;
+    mkldnn_sizes[MklDnnDims::Dim_C] = out_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = static_cast<int>(out_rows);
+    mkldnn_sizes[MklDnnDims::Dim_W] = static_cast<int>(out_cols);
+    *output_dims_mkl_order = mkldnn_sizes;
 
     // Now handle padding. MKL-DNN uses asymetric padding.
     *pad_l = {static_cast<int>(pad_top), static_cast<int>(pad_left)};
@@ -261,14 +283,14 @@ class MklDnnConvUtil {
     CHECK_NOTNULL(pad_l);
     CHECK_NOTNULL(pad_r);
 
-    const Tensor& input = MklGetInput(context_, src_index);
-    const Tensor& filter = MklGetInput(context_, filter_index);
+    auto input_tf_shape = GetTfShape(context_, src_index);
+    auto filter_tf_shape = GetTfShape(context_, filter_index);
 
-    OP_REQUIRES(context_, input.dims() == 4,
+    OP_REQUIRES(context_, input_tf_shape.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
-                                          input.shape().DebugString()));
+                                        input_tf_shape.DebugString()));
 
-    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(),
+    GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape,
                                   strides, output_dims_tf_order,
                                   output_dims_mkl_order, pad_l, pad_r);
   }
@@ -309,8 +331,231 @@ class MklDnnConvUtil {
   }
 };
 
+/////////////////////////////////////////////////////////////////////
+///  Common class that implements Conv2DBackpropFilter and Input
+/////////////////////////////////////////////////////////////////////
+
+template <typename Device, class T>
+class MklConv2DBackpropCommonOp :  public OpKernel {
+ public:
+  ~MklConv2DBackpropCommonOp() {}
+  explicit MklConv2DBackpropCommonOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string data_format_str;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
+    int stride_n = GetTensorDim(strides_, data_format_, 'N');
+    int stride_c = GetTensorDim(strides_, data_format_, 'C');
+    OP_REQUIRES(
+        context, (stride_n == 1 && stride_c == 1),
+        errors::InvalidArgument("Current implementation does not yet support "
+                                "strides in the batch and depth dimensions."));
+
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+
+      // Prepare common tensors for Conv2DBackpropInput and
+      // Conv2DBackpropFilter.
+      MklDnnData<T> input(&cpu_engine);
+      MklDnnData<T> filter(&cpu_engine);
+      MklDnnData<T> outbackprop(&cpu_engine);
+      MklDnnData<T> output(&cpu_engine);
+
+      // Input tensors
+      const int kInputIdx = 0, kFilterIdx = 1, kOutbpropIdx = 2;
+      const Tensor& input_tensor = MklGetInput(context, kInputIdx);
+      const Tensor& filter_tensor = MklGetInput(context, kFilterIdx);
+      const Tensor& outbprop_tensor = MklGetInput(context, kOutbpropIdx);
+
+      MklDnnShape input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape;
+      GetMklShape(context, kInputIdx, &input_mkl_shape);
+      GetMklShape(context, kFilterIdx, &filter_mkl_shape);
+      GetMklShape(context, kOutbpropIdx, &outbprop_mkl_shape);
+      // Allow operator-specific sanity checking of shapes.
+      ValidateMklShapes(input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape);
+
+      // Allow operator-specific generation of shapes.
+      // E.g., Conv2DBackpropFilter gets filter as filter_sizes. It is a
+      // tensor containing shape of filter. So filter.shape() is not
+      // a correct way to get filter shape. These operator-specific calls
+      // allow this class to handle this case.
+      TensorShape input_tf_shape = MakeInputTfShape(context, input_tensor);
+      TensorShape filter_tf_shape = MakeFilterTfShape(context, filter_tensor);
+      TensorShape outbprop_tf_shape = GetTfShape(context, kOutbpropIdx);
+
+      // By default, all dims are in MKL order. Only dims in TF order
+      // are those with prefix tf_order.
+      memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims;
+      memory::dims padding_l, padding_r, strides, fwd_output_dims;
+      memory::dims fwd_output_dims_tf_order;
+
+      // Get forward convolution parameters.
+      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
+      conv_utl.GetConvFwdSizesInMklOrder(input_tf_shape, filter_tf_shape,
+                                         &fwd_input_dims, &fwd_filter_dims,
+                                         &strides,
+                                         &fwd_output_dims_tf_order,
+                                         &fwd_output_dims,
+                                         &padding_l, &padding_r);
+      if (!context->status().ok()) return;
+
+      // Create Convolution forward descriptor since Convolution backward
+      // API needs it. For that, we first need to create input, filter
+      // and output memory descriptors.
+      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
+      // If input is in MKL layout, then simply grab input layout; otherwise,
+      // construct input TF layout. For TF layout, although input shape
+      // required is in MKL-DNN order, the layout is Tensorflow's layout
+      // (NHWC or NCHW depending on data format).
+      auto fwd_input_md = input_mkl_shape.IsMklTensor() ?
+                          input_mkl_shape.GetMklLayout() :
+                       memory::desc(fwd_input_dims, MklDnnType<T>(), tf_fmt);
+      // If filter is in MKL layout, then simply grab filter layout; otherwise
+      // construct filter in TF layout. For TF layout, filter is in HWIO format.
+      auto fwd_filter_md = filter_mkl_shape.IsMklTensor() ?
+                          filter_mkl_shape.GetMklLayout() :
+                          memory::desc(fwd_filter_dims, MklDnnType<T>(),
+                                       memory::format::hwio);
+      // Tensorflow Output of Conv2D is in data_format order.
+      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(), tf_fmt);
+      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, fwd_input_md, fwd_filter_md, fwd_out_md,
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
+
+      // Create memory for user data. Describe how the inputs and outputs of
+      // Convolution look like. Also specify buffers containing actual input
+      // and output data.
+
+      // Since this is a common class for both Conv2DBackpropFilter and
+      // Conv2DBackpropInput, we skip SetUsrMem call for input tensor (for
+      // Conv2DBackpropInput) and for filter tensor (for
+      // conv2DBackpropFilter) depending on which tensor is int32 type.
+      size_t input_with_sizes = GetInputTensorIndexWithSizes();
+      if (input_with_sizes != kInputIdx) {
+        // Shape of Conv2DBackpropFilter's input is same as Conv2D input.
+        input.SetUsrMem(fwd_input_md, &input_tensor);
+      } else if (input_with_sizes != kFilterIdx) {
+        // Shape of Conv2DBackpropInput's filter is same as Conv2D filter.
+        filter.SetUsrMem(fwd_filter_md, &filter_tensor);
+      }
+
+      conv_utl.GetInputSizeInMklOrder(outbprop_tf_shape, &outbprop_dims);
+      if (!context->status().ok()) return;
+      if (outbprop_mkl_shape.IsMklTensor()) {
+        // If outbackprop is in Mkl layout, then simply grab it.
+        auto outbprop_md = outbprop_mkl_shape.GetMklLayout();
+        outbackprop.SetUsrMem(outbprop_md, &outbprop_tensor);
+      } else {
+        // If outbackprop is in TensorFlow layout, then we need to create memory
+        // descriptor for it. Outbackprop shape is data format order.
+        outbackprop.SetUsrMem(outbprop_dims, tf_fmt, &outbprop_tensor);
+      }
+
+      // Operator specific call to get output shape and data_format.
+      auto bwd_output_dims = GetOutputDims(fwd_input_dims, fwd_filter_dims);
+      auto bwd_output_format = GetOutputFormat(tf_fmt);
+      output.SetUsrMem(bwd_output_dims, bwd_output_format);
+
+      // Create memory descriptors for convolution data w/ no specified format.
+      input.SetOpMemDesc(fwd_input_dims, memory::format::any);
+      filter.SetOpMemDesc(fwd_filter_dims, memory::format::any);
+      outbackprop.SetOpMemDesc(outbprop_dims, memory::format::any);
+      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
+
+      // Operator-specific call to create and execute primitive.
+      Tensor* output_tensor = nullptr;
+      CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter,
+                      &outbackprop, &output, &output_tensor,
+                      strides, padding_l, padding_r,
+                      TFPaddingToMklDnnPadding(padding_),
+                      bwd_output_dims, bwd_output_format);
+    } catch (mkldnn::error &e) {
+     string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
+                                            error_msg));
+    }
+  }
+
+  /// Pure virtual function to allow operator to check for validity of input
+  /// shapes. Function asserts that input shapes are valid.
+  virtual void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                                 const MklDnnShape& filter_mkl_shape,
+                                 const MklDnnShape& outbprop_mkl_shape) = 0;
+
+  /// Operator-specific function that returns index of input that is
+  /// representing input sizes. For Conv2DBackpropFilter it returns 1 since
+  /// filter for this operator is filter shape. For Conv2DBackpropInput it
+  /// returns 0 (for input).
+  virtual size_t GetInputTensorIndexWithSizes() = 0;
+
+  /// Get TensorFlow shape of input tensor.
+  virtual TensorShape MakeInputTfShape(OpKernelContext* context,
+                                      const Tensor& input_tensor) = 0;
+
+  /// Get TensorFlow shape of filter tensor.
+  virtual TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                       const Tensor& filter_tensor) = 0;
+
+  /// Get shape of output in MKL-DNN order. Computes shape of output from
+  /// input shape (fwd_input_dims) and filter shape (fwd_filter_dims).
+  virtual
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) = 0;
+
+  /// Get data_format of output in MKL-DNN order. If output data format is
+  /// same as input data format, then it simply returns value of data_format
+  /// parameter as it is.
+  virtual memory::format GetOutputFormat(const memory::format data_format) = 0;
+
+  /// Create and execute the primitive storing output in the output_tensor.
+  virtual void CreatePrimitive(OpKernelContext* context,
+    const engine& cpu_engine,
+    const convolution_forward::primitive_desc& conv_fwd_pd,
+    MklDnnData<T>* input, MklDnnData<T>* filter, MklDnnData<T>* outbackprop,
+    MklDnnData<T>* output, Tensor** output_tensor, const memory::dims& strides,
+    const memory::dims& padding_l, const memory::dims& padding_r,
+    padding_kind padding, const memory::dims& bwd_output_dims,
+    memory::format bwd_output_format) = 0;
+
+  // Get the data_format {NCHW, NHWC}
+  TensorFormat GetTFDataFormat () { return data_format_; }
+
+ private:
+  std::vector<int32> strides_;
+  Padding padding_;
+  TensorFormat data_format_;
+};
 #endif  // INTEL_MKL_DNN
 
+/////////////////////////////////////////////////////////////////////
+///  Dummy Mkl op that is just used for operators that are intermediate
+///  output of node fusion in the graph
+/////////////////////////////////////////////////////////////////////
+
+template <typename Device, typename T>
+class MklDummyOp : public OpKernel {
+ public:
+  ~MklDummyOp() {}
+
+  explicit MklDummyOp(OpKernelConstruction* context) :
+    OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    TF_CHECK_OK(errors::Unimplemented("This is a dummy op."
+                                      "It should not have been invoked."));
+  }
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
index bc9e906c39..a761562a4b 100644
--- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
@@ -25,10 +25,24 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+using mkldnn::use_scale_shift;
+using mkldnn::use_global_stats;
+using mkldnn::batch_normalization_forward;
+using mkldnn::batch_normalization_backward;
+#endif
+
 // TODO(inteltf) Address comments from PR 8968.
 
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
+
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklFusedBatchNormOp : public OpKernel {
  public:
@@ -46,7 +60,6 @@ class MklFusedBatchNormOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     MklFusedBatchNormOpContext mkl_context;
-
     const Tensor& input = MklGetInput(context, 0);
     const Tensor& scale = MklGetInput(context, 1);
     const Tensor& shift = MklGetInput(context, 2);
@@ -55,6 +68,7 @@ class MklFusedBatchNormOp : public OpKernel {
 
     GetMklShape(context, 0, &(mkl_context.mkl_shape_input_shape));
     bool input_in_mkl_format = mkl_context.mkl_shape_input_shape.IsMklTensor();
+
     if (!input_in_mkl_format) {
       OP_REQUIRES(context, input.dims() == 4,
                   errors::InvalidArgument("input must be 4-dimensional",
@@ -69,10 +83,12 @@ class MklFusedBatchNormOp : public OpKernel {
     OP_REQUIRES(context, est_mean.dims() == 1,
                 errors::InvalidArgument("estimated_mean must be 1-dimensional",
                                         est_mean.shape().DebugString()));
+
     OP_REQUIRES(
         context, est_variance.dims() == 1,
         errors::InvalidArgument("estimated_variance must be 1-dimensional",
                                 est_variance.shape().DebugString()));
+
     if (is_training_) {
       OP_REQUIRES(context, est_mean.dim_size(0) == 0,
                   errors::InvalidArgument("estimated_mean empty for training",
@@ -258,7 +274,6 @@ class MklFusedBatchNormOp : public OpKernel {
             E_SUCCESS);
       }
     }
-
     void MklPrepareContextInputs(OpKernelContext* context,
                                  Tensor* mkl_tmp_input_buf_tensor,
                                  Tensor* mkl_tmp_scale_shift_buf_tensor) {
@@ -325,15 +340,6 @@ class MklFusedBatchNormOp : public OpKernel {
   } MklFusedBatchNormOpContext;
 };
 
-#define REGISTER_MKL_CPU(T)                                         \
-  REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm")                \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklFusedBatchNormOp<CPUDevice, T>);
-TF_CALL_float(REGISTER_MKL_CPU);
-#undef REGISTER_MKL_CPU
-
 template <typename Device, typename T>
 class MklFusedBatchNormGradOp : public OpKernel {
  public:
@@ -595,7 +601,7 @@ class MklFusedBatchNormGradOp : public OpKernel {
       mkl_res_batchnorm_bwd[dnnResourceSrc] =
           (mkl_convert_input) ? mkl_buf_converted_input : mkl_buf_input;
 
-      bool mkl_convert_out_backprop;
+     bool mkl_convert_out_backprop;
       dnnPrimitive_t mkl_prim_convert_out_backprop = nullptr;
       dnnLayout_t mkl_lt_internal_out_backprop = nullptr;
       void* mkl_buf_converted_out_backprop = nullptr;
@@ -675,6 +681,628 @@ class MklFusedBatchNormGradOp : public OpKernel {
     }
   } MklFusedBatchNormGradOpContext;
 };
+#endif
+
+#ifdef INTEL_MKL_DNN
+
+template <typename Device, typename T>
+class MklFusedBatchNormOp : public OpKernel {
+ public:
+  explicit MklFusedBatchNormOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    float epsilon;
+    OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon));
+    epsilon_ = T(epsilon);
+    string tensor_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format));
+    OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("is_training", &is_training_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const size_t src_index = 0;    // index of src input tensor
+      const size_t scale_index = 1;  // index of scale tensor
+      const size_t shift_index = 2;  // index of shift tensor
+      const size_t mean_index = 3;   // index of est_mean tensor
+      const size_t var_index = 4;    // index of est_variance tensor
+
+      const Tensor& src_tensor          = MklGetInput(context, src_index);
+      const Tensor& scale_tensor        = MklGetInput(context, scale_index);
+      const Tensor& shift_tensor        = MklGetInput(context, shift_index);
+      const Tensor& est_mean_tensor     = MklGetInput(context, mean_index);
+      const Tensor& est_variance_tensor = MklGetInput(context, var_index);
+
+      MklDnnShape dnn_shape_src;
+      GetMklShape(context, src_index, &dnn_shape_src);
+
+      if (dnn_shape_src.IsMklTensor()) {
+        OP_REQUIRES(context, dnn_shape_src.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      } else {
+        OP_REQUIRES(context, src_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      }
+      OP_REQUIRES(context, scale_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "scale must be 1-dimensional",
+                      scale_tensor.shape().DebugString()));
+      OP_REQUIRES(context, shift_tensor.dims() == 1,
+                  errors::InvalidArgument("offset must be 1-dimensional",
+                                        shift_tensor.shape().DebugString()));
+      OP_REQUIRES(context, est_mean_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "estimated_mean must be 1-dimensional",
+                      est_mean_tensor.shape().DebugString()));
+      OP_REQUIRES(context, est_variance_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "estimated_variance must be 1-dimensional",
+                      est_variance_tensor.shape().DebugString()));
+
+      if (is_training_) {
+        OP_REQUIRES(context, est_mean_tensor.dim_size(0) == 0,
+                    errors::InvalidArgument(
+                        "estimated_mean must be empty for training",
+                        est_mean_tensor.shape().DebugString()));
+        OP_REQUIRES(context, est_variance_tensor.dim_size(0) == 0,
+                    errors::InvalidArgument(
+                        "estimated_variance must be empty for training",
+                        est_variance_tensor.shape().DebugString()));
+      }
+
+      if (dnn_shape_src.IsMklTensor())
+        depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C);
+      else
+        ExtractParams(context);
+
+      // Indices of output tensors
+      const size_t dst_index = 0;
+      const size_t batch_mean_index = 1;
+      const size_t batch_variance_index = 2;
+      const size_t saved_mean_index = 3;
+      const size_t saved_variance_index = 4;
+
+      // allocate batch mean output tensor
+      Tensor* batch_mean_tensor = nullptr;
+      MklDnnShape mkl_shape_batch_mean;
+      mkl_shape_batch_mean.SetMklTensor(false);
+      AllocateOutputSetMklShape(context,
+                                batch_mean_index,
+                                &batch_mean_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_batch_mean);
+      CHECK_NOTNULL(batch_mean_tensor);
+
+      // Batch variance
+      Tensor* batch_variance_tensor = nullptr;
+      MklDnnShape mkl_shape_batch_variance;
+      mkl_shape_batch_variance.SetMklTensor(false);
+      AllocateOutputSetMklShape(context,
+                                batch_variance_index,
+                                &batch_variance_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_batch_variance);
+      CHECK_NOTNULL(batch_variance_tensor);
+
+      if (is_training_)
+        SetMeanVariance(*batch_mean_tensor, *batch_variance_tensor);
+      else
+        SetMeanVariance(est_mean_tensor, est_variance_tensor);
+
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      memory::format format_m;
+      if (dnn_shape_src.IsMklTensor()) {
+        if (dnn_shape_src.IsTensorInNCHWFormat()) {
+          format_m = memory::format::nchw;
+        } else {
+          format_m = memory::format::nhwc;
+        }
+      } else {
+        format_m = TFDataFormatToMklDnnDataFormat(tensor_format_);
+      }
+
+      // set src primitive
+      memory::dims src_dims;
+      if (dnn_shape_src.IsMklTensor()) {
+        src_dims = TFShapeToMklDnnDimsInNCHW(dnn_shape_src.GetTfShape(),
+                                             tensor_format_);
+      } else {
+        src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(),
+                                             tensor_format_);
+      }
+
+      auto src_md = dnn_shape_src.IsMklTensor()
+                    ? dnn_shape_src.GetMklLayout()
+                    : memory::desc(src_dims, MklDnnType<T>(), format_m);
+      src.SetUsrMem(src_md, &src_tensor);
+
+      // set weights primitive
+      // MKL-DNN packs scale & shift as "weights":
+      // <scale>...<scale><shift>...<shift>
+      auto weights_desc = memory::desc({2, depth_},
+                                       MklDnnType<T>(),
+                                       memory::format::nc);
+      auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine);
+      auto weights_m = memory(weights_pd);
+      T* weights_data = reinterpret_cast<T*>(
+                        weights_m.get_data_handle());
+      T* scale_tf = reinterpret_cast<T*>(
+                    const_cast<T*>(scale_tensor.flat<T>().data()));
+      T* shift_tf = reinterpret_cast<T*>(
+                    const_cast<T*>(shift_tensor.flat<T>().data()));
+
+      for (int k=0; k < depth_; k++) {
+        weights_data[k] = scale_tf[k];
+        weights_data[k + depth_] = shift_tf[k];
+      }
+
+      // Mean and variance (without Bessel's correction) saved for backward
+      // computation to serve as pre-computed mean and variance.
+      Tensor* saved_mean_tensor = nullptr;
+      MklDnnShape mkl_shape_saved_mean;
+      mkl_shape_saved_mean.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, saved_mean_index,
+                                &saved_mean_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_saved_mean);
+      CHECK_NOTNULL(saved_mean_tensor);
+
+      Tensor* saved_variance_tensor = nullptr;
+      MklDnnShape mkl_shape_saved_variance;
+      mkl_shape_saved_variance.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, saved_variance_index,
+                                &saved_variance_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_saved_variance);
+      CHECK_NOTNULL(saved_variance_tensor);
+
+      // set mean primitive
+      auto mean_desc = memory::desc({1, depth_},
+                                    MklDnnType<T>(),
+                                    memory::format::nc);
+      auto mean_pd = memory::primitive_desc(mean_desc, cpu_engine);
+      char* saved_mean_data_tf = reinterpret_cast<char*>
+                                 (saved_mean_tensor->flat<T>().data());
+      std::memcpy(saved_mean_data_tf,
+                  reinterpret_cast<char*>(mean_values_),
+                  depth_*sizeof(T));
+      auto mean_m = memory(mean_pd,
+                           reinterpret_cast<void*>(saved_mean_data_tf));
+
+      // set variance primitive
+      auto variance_desc = memory::desc({1, depth_},
+                                    MklDnnType<T>(),
+                                    memory::format::nc);
+      auto variance_pd = memory::primitive_desc(variance_desc, cpu_engine);
+      char* saved_variance_data_tf = reinterpret_cast<char*>
+                  (saved_variance_tensor->flat<T>().data());
+      std::memcpy(saved_variance_data_tf,
+                  reinterpret_cast<char*>(variance_values_),
+                  depth_*sizeof(T));
+      auto variance_m = memory(variance_pd, saved_variance_data_tf);
+
+      prop_kind pk = (is_training_) ?
+                     prop_kind::forward_training :
+                     prop_kind::forward_scoring;
+      auto bnrm_fwd_desc = batch_normalization_forward::desc(
+                               pk, src.GetUsrMemDesc(), epsilon_,
+                               is_training_ ? use_scale_shift :
+                               (use_scale_shift | use_global_stats));
+      auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc(
+                             bnrm_fwd_desc, cpu_engine);
+
+      // allocate dst tensor
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      Tensor* dst_tensor = nullptr;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = bnrm_fwd_pd.dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(),
+                                  src_dims, format_m);
+        tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                                tf_shape_dst, dnn_shape_dst);
+
+      // Output of batchnorm has same shape as input.
+      dst.SetUsrMem(src_md, dst_tensor);
+
+      primitive bnrm_fwd_op;
+      if (is_training_) {
+        bnrm_fwd_op = batch_normalization_forward(
+                          bnrm_fwd_pd,
+                          src.GetOpMem(),
+                          weights_m,
+                          dst.GetOpMem(),
+                          mean_m,
+                          variance_m);
+      } else {
+        bnrm_fwd_op = batch_normalization_forward(
+                          bnrm_fwd_pd,
+                          src.GetOpMem(),
+                          mean_m,
+                          variance_m,
+                          (const primitive::at) weights_m,
+                          dst.GetOpMem());
+      }
+      std::vector<primitive> net;
+      net.push_back(bnrm_fwd_op);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // copy batch_mean data
+      T* batch_mean_data_tf = reinterpret_cast<T*>(
+                                batch_mean_tensor->flat<T>().data());
+      std::memcpy(reinterpret_cast<char*>(batch_mean_data_tf),
+                  reinterpret_cast<char*>(mean_m.get_data_handle()),
+                  depth_*sizeof(T));
+
+      // copy batch_variance data with Bessel's correction
+      // if training mode is on
+      float adjust_factor = 1.0;
+      if (is_training_) {
+        size_t orig_size = src_dims[0] * src_dims[2] * src_dims[3];
+        size_t adjust_size = orig_size - 1;
+        adjust_factor = (static_cast<float>(orig_size)) / adjust_size;
+      }
+      T* batch_variance_data_tf = reinterpret_cast<T*>(
+                                  batch_variance_tensor->flat<T>().data());
+      for (int k=0; k < depth_; k++)
+        batch_variance_data_tf[k] =
+            (reinterpret_cast<T*>(variance_m.get_data_handle()))[k]
+            * adjust_factor;
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + string(e.message) +
+                         ", in file " + string(__FILE__) + ":" +
+                         std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                     error_msg));
+    }
+  }
+
+ private:
+  T epsilon_;
+  TensorFormat tensor_format_;
+  bool is_training_;
+  T* mean_values_;
+  T* variance_values_;
+  size_t depth_;          // batch normalization is done for per channel.
+
+  void ExtractParams(OpKernelContext* context) {
+    const Tensor& input = MklGetInput(context, 0);
+    depth_ = static_cast<int>(GetTensorDim(input, tensor_format_, 'C'));
+  }
+
+  void SetMeanVariance(const Tensor& mean, const Tensor& variance) {
+    mean_values_ = reinterpret_cast<T*>(
+                       const_cast<T*>(mean.flat<T>().data()));
+    variance_values_ = reinterpret_cast<T*>(
+                       const_cast<T*>(variance.flat<T>().data()));
+  }
+};
+
+
+template <typename Device, typename T>
+class MklFusedBatchNormGradOp : public OpKernel {
+ public:
+  explicit MklFusedBatchNormGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    float epsilon;
+    OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon));
+    epsilon_ = T(epsilon);
+    string tensor_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format));
+    OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_),
+                errors::InvalidArgument("Invalid data format"));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+
+      const size_t diff_dst_index = 0;  // index of diff_dst tensor
+      const size_t src_index = 1;       // index of src input tensor
+      const size_t scale_index = 2;     // index of scale tensor
+      const size_t mean_index = 3;      // index of saved_mean tensor
+      const size_t variance_index = 4;  // index of saved_variance tensor
+      const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+      const Tensor& src_tensor = MklGetInput(context, src_index);
+      const Tensor& scale_tensor = MklGetInput(context, scale_index);
+      const Tensor& saved_mean_tensor = MklGetInput(context, mean_index);
+      const Tensor& saved_variance_tensor = MklGetInput(context,
+                                            variance_index);
+
+      MklDnnShape dnn_shape_src, dnn_shape_diff_dst;
+      GetMklShape(context, src_index, &dnn_shape_src);
+      GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+      if (dnn_shape_diff_dst.IsMklTensor()) {
+        OP_REQUIRES(context, dnn_shape_diff_dst.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        diff_dst_tensor.shape().DebugString()));
+      } else {
+        OP_REQUIRES(context, diff_dst_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        diff_dst_tensor.shape().DebugString()));
+      }
+
+      if (dnn_shape_src.IsMklTensor()) {
+        OP_REQUIRES(context, dnn_shape_src.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                         src_tensor.shape().DebugString()));
+      } else {
+        OP_REQUIRES(context, src_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      }
+
+      OP_REQUIRES(context, scale_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "scale must be 1-dimensional",
+                      scale_tensor.shape().DebugString()));
+      OP_REQUIRES(context, saved_mean_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "saved mean must be 1-dimensional",
+                       saved_mean_tensor.shape().DebugString()));
+
+      OP_REQUIRES(context, saved_variance_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "saved variance must be 1-dimensional",
+                      saved_variance_tensor.shape().DebugString()));
+
+      if (dnn_shape_src.IsMklTensor())
+        depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C);
+      else
+        ExtractParams(context);
+
+      memory::format format_m;
+      if (dnn_shape_src.IsMklTensor()) {
+        if (dnn_shape_src.IsTensorInNCHWFormat())
+          format_m = memory::format::nchw;
+        else
+          format_m = memory::format::nhwc;
+      } else {
+        format_m = TFDataFormatToMklDnnDataFormat(tensor_format_);
+      }
+
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> mean(&cpu_engine);
+      MklDnnData<T> variance(&cpu_engine);
+      MklDnnData<T> diff_dst(&cpu_engine);
+      MklDnnData<T> diff_src(&cpu_engine);
+
+      memory::dims src_dims, diff_dst_dims;
+      if (dnn_shape_src.IsMklTensor())
+        src_dims = TFShapeToMklDnnDimsInNCHW(
+                       dnn_shape_src.GetTfShape(), tensor_format_);
+      else
+        src_dims = TFShapeToMklDnnDimsInNCHW(
+                       src_tensor.shape(), tensor_format_);
+
+      if (dnn_shape_diff_dst.IsMklTensor())
+        diff_dst_dims = TFShapeToMklDnnDimsInNCHW(
+                            dnn_shape_diff_dst.GetTfShape(),
+                            tensor_format_);
+      else
+        diff_dst_dims = TFShapeToMklDnnDimsInNCHW(
+                            diff_dst_tensor.shape(),
+                            tensor_format_);
+
+      // set src and diff_dst primitives
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
+        if (dnn_shape_src.IsMklTensor()) {
+          src_md = dnn_shape_src.GetMklLayout();
+          diff_dst_md = src_md;
+        } else {
+          diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
+          src_md = diff_dst_md;
+        }
+      } else {
+        src_md =  memory::desc(src_dims, MklDnnType<T>(), format_m);
+        diff_dst_md = src_md;
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+      diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
+
+      // weights -- DNN packs scales/shifts as weights in order of
+      // scale, ..., scale, shift, ..., shift
+      auto weights_desc = memory::desc({2, depth_},
+                                       MklDnnType<T>(),
+                                       memory::format::nc);
+      auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine);
+      auto weights_m = memory(weights_pd);
+      T* weights_data = reinterpret_cast<T*>(weights_m.get_data_handle());
+      T* scale_tf = reinterpret_cast<T*>(const_cast<T*>
+                                        (scale_tensor.flat<T>().data()));
+      for (int k=0; k < depth_; k++) {
+        weights_data[k] = scale_tf[k];
+        weights_data[k + depth_] = 0;
+      }
+
+      // set mean primitive
+      memory::dims mv_dims = GetMeanVarianceDims();
+      mean.SetUsrMem(mv_dims,
+                     memory::format::nc,
+                     const_cast<void*>(static_cast<const void*>
+                     (saved_mean_tensor.flat<T>().data())));
+      mean.SetOpMemDesc(mv_dims, memory::format::nc);
+
+      // set variance primitive
+      variance.SetUsrMem(mv_dims,  memory::format::nc,
+                         const_cast<void*>(static_cast<const void*>
+                         (saved_variance_tensor.flat<T>().data())));
+      variance.SetOpMemDesc(mv_dims, memory::format::nc);
+
+      // set diff_weight primitive
+      auto diff_weights_desc = memory::desc(
+                                 {2, depth_},
+                                 MklDnnType<T>(),
+                                 memory::format::nc);
+      auto diff_weights_pd = memory::primitive_desc(
+                                diff_weights_desc,
+                                cpu_engine);
+      auto diff_weights_m = memory(diff_weights_pd);
+
+      auto bnrm_fwd_desc = batch_normalization_forward::desc(
+                                prop_kind::forward_training,
+                                src.GetUsrMemDesc(),
+                                epsilon_,
+                                use_scale_shift);
+      auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc(
+                                bnrm_fwd_desc,
+                                cpu_engine);
+
+      // Indices of output tensors
+      const size_t diff_src_index = 0;    // index of diff_src tensor
+      const size_t diff_scale_index = 1;  // index of diff_scale tensor
+      const size_t diff_shift_index = 2;  // index of diff_shift tensor
+      const size_t p1_index = 3;  // index of 1st placeholder tensor
+      const size_t p2_index = 4;  // index of 2nd placeholder tensor
+
+      // allocate diff_src tensor
+      MklDnnShape dnn_shape_diff_src;
+      TensorShape tf_shape_diff_src;
+      Tensor* diff_src_tensor = nullptr;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_diff_src.SetMklTensor(true);
+        auto diff_src_pd = bnrm_fwd_pd.dst_primitive_desc();
+        dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
+        dnn_shape_diff_src.SetElemType(MklDnnType<T>());
+        dnn_shape_diff_src.SetTfLayout(
+                              dnn_shape_src.GetDimension(),
+                              src_dims,
+                              format_m);
+        dnn_shape_diff_src.SetTfDimOrder(
+                              dnn_shape_src.GetDimension(),
+                              tensor_format_);
+        tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_diff_src.SetMklTensor(false);
+        tf_shape_diff_src = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                                tf_shape_diff_src, dnn_shape_diff_src);
+
+      diff_src.SetUsrMem(src_md, diff_src_tensor);
+
+      prop_kind pk = prop_kind::backward;
+      auto bnrm_bwd_desc = batch_normalization_backward::desc(
+                               pk,
+                               diff_src.GetUsrMemDesc(),
+                               src.GetUsrMemDesc(),
+                               epsilon_,
+                               use_scale_shift);
+      auto bnrm_bwd_pd = batch_normalization_backward::primitive_desc(
+                               bnrm_bwd_desc,
+                               cpu_engine,
+                               bnrm_fwd_pd);
+
+      auto bnrm_bwd_op = batch_normalization_backward(
+                               bnrm_bwd_pd,
+                               src.GetOpMem(),
+                               mean.GetOpMem(),
+                               variance.GetOpMem(),
+                               diff_dst.GetOpMem(),
+                               weights_m,
+                               diff_src.GetOpMem(),
+                               diff_weights_m);
+
+      std::vector<primitive> net;
+      net.push_back(bnrm_bwd_op);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // separate out scale and shift grad and copy to individual tensors
+      const TensorShape& tf_shape_scale_shift = scale_tensor.shape();
+      Tensor* diff_scale_tensor = nullptr;
+      MklDnnShape mkl_shape_diff_scale;
+      mkl_shape_diff_scale.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, diff_scale_index, &diff_scale_tensor,
+                                tf_shape_scale_shift, mkl_shape_diff_scale);
+
+      Tensor* diff_shift_tensor = nullptr;
+      MklDnnShape mkl_shape_diff_shift;
+      mkl_shape_diff_shift.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, diff_shift_index, &diff_shift_tensor,
+                                tf_shape_scale_shift, mkl_shape_diff_shift);
+
+      // copy data: diff_scale and diff_shift
+      T* diff_weights_data_dnn = reinterpret_cast<T*>
+                                 (diff_weights_m.get_data_handle());
+      float* diff_scale_data_tf = const_cast<float*>(
+             static_cast<const float*>(diff_scale_tensor->flat<T>().data()));
+      float* diff_shift_data_tf = const_cast<float*>(
+             static_cast<const float*>(diff_shift_tensor->flat<T>().data()));
+      for (int i = 0; i < depth_; i++) {
+        diff_scale_data_tf[i] = diff_weights_data_dnn[i];
+        diff_shift_data_tf[i] = diff_weights_data_dnn[i + depth_];
+      }
+
+      // Placeholders for estimated_mean and estimated_variance, which are
+      // used for inference and thus not needed here for gradient computation.
+      Tensor* p1_tensor = nullptr, *p2_tensor = nullptr;
+      MklDnnShape mkl_shape_p;
+      mkl_shape_p.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, p1_index, &p1_tensor,
+                                TensorShape({}), mkl_shape_p);
+      AllocateOutputSetMklShape(context, p2_index, &p2_tensor,
+                                TensorShape({}), mkl_shape_p);
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                          ", message: " + string(e.message) +
+                          ", in file " + string(__FILE__) + ":" +
+                          std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                     error_msg));
+    }
+  }
+
+ private:
+  T epsilon_;
+  TensorFormat tensor_format_;
+  int depth_;             // batch normalization is done for per channel.
+
+  void ExtractParams(OpKernelContext* context) {
+      const Tensor& input = MklGetInput(context, 0);
+      depth_ = static_cast<int>(GetTensorDim(input, tensor_format_, 'C'));
+  }
+
+  memory::dims GetMeanVarianceDims() {
+    return memory::dims({1, depth_});
+  }
+};
+
+#endif
+
+#define REGISTER_MKL_CPU(T)                                         \
+  REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm")                \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklFusedBatchNormOp<CPUDevice, T>);
+TF_CALL_float(REGISTER_MKL_CPU);
+#undef REGISTER_MKL_CPU
 
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNormGrad")            \
diff --git a/tensorflow/core/kernels/mkl_identity_op.cc b/tensorflow/core/kernels/mkl_identity_op.cc
index f31e7afd46..9ee27ee21c 100644
--- a/tensorflow/core/kernels/mkl_identity_op.cc
+++ b/tensorflow/core/kernels/mkl_identity_op.cc
@@ -28,8 +28,15 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
+
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklIdentityOp : public OpKernel {
  public:
@@ -50,6 +57,32 @@ class MklIdentityOp : public OpKernel {
   bool IsExpensive() override { return false; }
 };
 
+#else
+
+template <typename Device, typename T>
+class MklIdentityOp : public OpKernel {
+ public:
+  explicit MklIdentityOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    MklDnnShape dnn_shape_input;
+    const int kInputIdx = 0, kOutputIdx = 0;
+    GetMklShape(context, kInputIdx, &dnn_shape_input);
+
+    if (dnn_shape_input.IsMklTensor()) {
+      ForwardMklTensorInToOut(context, kInputIdx, kOutputIdx);
+    } else {
+      ForwardTfTensorInToOut(context, kInputIdx, kOutputIdx);
+    }
+  }
+
+  // TensorFlow's IdentityOp has the following member function, so kept it
+  // as it is.
+  bool IsExpensive() override { return false; }
+};
+
+#endif
+
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklIdentity")                      \
                               .Device(DEVICE_CPU)                   \
diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc
index b58e44e398..001834b13b 100644
--- a/tensorflow/core/kernels/mkl_input_conversion_op.cc
+++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc
@@ -31,6 +31,12 @@ limitations under the License.
 #include "tensorflow/core/kernels/mkl_tfconv_op.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
@@ -44,15 +50,16 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 // else if both inputs are in mkl format:
 //   if both have the same shape:
 //     pass the inputs through to the output
-// 	else:
-// 		convert both to TF
+//   else:
+//     convert both to TF
 // else if one is TF and one is MKL:
-// 	if broadcast is needed:
-// 		convert the MKL format input to TF format
-// 	else:
-// 		convert the TF format input to MKL format
+//   if broadcast is needed:
+//     convert the MKL format input to TF format
+//   else:
+//     convert the TF format input to MKL format
 ///////////////////////////////////////////////////////////
 
+#ifndef INTEL_MKL_DNN
 template <typename Device, typename T>
 class MklInputConversionOp : public OpKernel {
  public:
@@ -242,6 +249,199 @@ class MklInputConversionOp : public OpKernel {
   bool has_avx512f_ = false;
 };
 
+#else
+
+template <typename Device, typename T>
+class MklInputConversionOp : public OpKernel {
+ public:
+  explicit MklInputConversionOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type));
+    has_avx512f_ = port::TestCPUFeature(port::CPUFeature::AVX512F);
+  }
+
+ private:
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor_0 = MklGetInput(context, 0);
+    MklDnnShape input_shape_0;
+    GetMklShape(context, 0, &input_shape_0);
+
+    const Tensor& input_tensor_1 = MklGetInput(context, 1);
+    MklDnnShape input_shape_1;
+    GetMklShape(context, 1, &input_shape_1);
+
+    bool tf_shapes_are_same = context->input(0).shape() ==
+                              context->input(1).shape();
+
+    VLOG(1) << "MklInputConversionOp: Input shapes are "
+            << (tf_shapes_are_same ? "*same*" : "*different*") << ": "
+            << context->input(0).shape().DebugString() << " and "
+            << context->input(1).shape().DebugString();
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // if both inputs are in TF format, just copy input tensors to output.
+    if (!input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
+      VLOG(1) << "MklInputConversionOp: No conversion needed, "
+              << "copying TF inputs to output";
+
+      ForwardTfTensorInToOut(context, 0, 0);
+      ForwardTfTensorInToOut(context, 1, 1);
+      return;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // If both inputs are in MKL format
+    if (input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
+      // If both have the same shape, pass them through
+      if (tf_shapes_are_same) {
+        VLOG(1) << "MklInputConversionOp: No conversion needed, "
+                << "copying MKL inputs with identical shapes to output";
+
+        ForwardMklTensorInToOut(context, 0, 0);
+        ForwardMklTensorInToOut(context, 1, 1);
+        return;
+      }
+
+      // Sanity check
+      bool mkl_shapes_are_same = input_shape_0 == input_shape_1;
+      if (mkl_shapes_are_same) {
+        CHECK(false) << "MklInputConversionOp: Unexpected: TF shapes are "
+                        "different but MKL shapes are same";
+      }
+
+      // Both have different shapes, so broadcast will be necessary.
+      // Convert to TF and pass both tensors through (we can't do broadcast
+      // with MKL tensors)
+      VLOG(1) << "MklInputConversionOp: Broadcast needed, "
+              << "converted MKL inputs to TF format";
+
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_, 0);
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_, 1);
+      SetDummyMklShapeOutput(context, 0);
+      SetDummyMklShapeOutput(context, 1);
+      return;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // One input is MKL and one is TF. If no broadcast is needed, convert
+    // the TF tensor to MKL, otherwise convert the MKL tensor to TF format
+    VLOG(1) << "MklInputConversionOp: Inputs in different formats (MKL/TF)";
+
+    const Tensor* mkl_tensor;
+    const MklDnnShape* mkl_shape;
+    const Tensor* tf_tensor;
+    MklDnnShape* tf_mkl_shape;
+    uint mkl_tensor_index;
+    uint tf_tensor_index;
+    if (input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
+      mkl_tensor = &input_tensor_0;
+      mkl_shape = &input_shape_0;
+      mkl_tensor_index = 0;
+      tf_tensor = &input_tensor_1;
+      tf_mkl_shape = &input_shape_1;
+      tf_tensor_index = 1;
+    } else if (!input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
+      mkl_tensor = &input_tensor_1;
+      mkl_shape = &input_shape_1;
+      mkl_tensor_index = 1;
+      tf_tensor = &input_tensor_0;
+      tf_mkl_shape = &input_shape_0;
+      tf_tensor_index = 0;
+    } else {
+      CHECK(false) << "MklInputConversionOp: Unexpected combination of input "
+                      "shapes for MKL "
+                   << "element-wise op";
+    }
+
+    // Broadcast is needed if the shapes are not the same
+    bool broadcast_needed;
+
+    size_t in0_size = 1;
+    for (size_t i = 0; i < mkl_shape->GetDimension(); ++i)
+      in0_size *= mkl_shape->TfDimSize(i);
+
+    size_t in1_size = 1;
+    for (size_t i = 0; i < tf_tensor->shape().dims(); ++i)
+      in1_size *= tf_tensor->shape().dim_size(i);
+
+    broadcast_needed = (in0_size != in1_size);
+
+    if (!broadcast_needed) {
+      // Both shapes are same, convert the TF input to MKL
+      VLOG(1) << "MklInputConversionOp: No broadcast needed.";
+      VLOG(1) << "MklInputConversionOp: Converting input " << tf_tensor_index
+              << " to MKL format";
+
+      // Create MklDnnShape for output Mkl tensor.
+      Tensor* tensor_out;
+      MklDnnShape mkl_output_mkl_shape;
+      mkl_output_mkl_shape.SetMklTensor(true);
+      mkl_output_mkl_shape.SetElemType(MklDnnType<T>());
+      mkl_output_mkl_shape.SetTfLayout(mkl_shape->GetDimension(),
+                                       mkl_shape->GetSizesAsMklDnnDims(),
+                                       mkl_shape->GetTfDataFormat());
+      // ** Temporarily borrow the layout from the MKL input **
+      auto output_mkl_md = mkl_shape->GetMklLayout();
+      mkl_output_mkl_shape.SetMklLayout(&output_mkl_md);
+
+      // Create output Mkl tensor
+      AllocateOutputSetMklShape(context, tf_tensor_index, &tensor_out,
+                                mkl_tensor->shape(), mkl_output_mkl_shape);
+
+      // Create MklDnnData object for input tensor. Input tensor is in
+      // Tensorflow layout.
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> tf_input(&cpu_engine);
+      auto input_tf_md = mkl_output_mkl_shape.GetTfLayout();
+      tf_input.SetUsrMem(input_tf_md, &tf_tensor);
+
+      // Create reorder between tensorflow layout and Mkl layout.
+      std::vector<primitive> net;
+      CHECK_EQ(tf_input.CheckReorderToOpMem(memory::primitive_desc(
+                                            output_mkl_md, cpu_engine),
+                                            tensor_out, &net),
+               true);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // -- The tensor in MKL format passes through --
+      ForwardMklTensorInToOut(context, mkl_tensor_index, mkl_tensor_index);
+    } else {
+      // Broadcast is needed, so convert the MKL input to TF
+      VLOG(1) << "MklInputConversionOp: Broadcast needed.";
+      VLOG(1) << "MklInputConversionOp: Converting input " << mkl_tensor_index
+              << " to TF format";
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_,
+                                           mkl_tensor_index);
+      SetDummyMklShapeOutput(context, mkl_tensor_index);
+
+      // The tensor in TF format passes through
+      ForwardTfTensorInToOut(context, tf_tensor_index, tf_tensor_index);
+    }
+
+    VLOG(1) << "MklInputConversionOp: Shapes (output): "
+            << context->mutable_output(0)->shape().DebugString() << " and "
+            << context->mutable_output(1)->shape().DebugString();
+
+    VLOG(1) << "MklInputConversion completed successfully.";
+  }
+
+ private:
+  /// Data format of the operation
+  string data_format_str;
+
+  /// Data type of the operation
+  DataType op_data_type;
+
+  /// CPUIDInfo
+  bool has_avx512f_ = false;
+};
+
+#endif
+
 ///////////////////////////////////////////////////////////
 //               Register kernel
 ///////////////////////////////////////////////////////////
@@ -253,7 +453,10 @@ class MklInputConversionOp : public OpKernel {
                               .Label(mkl_op_registry::kMklOpLabel), \
                           MklInputConversionOp<CPUDevice, T>);
 
-TF_CALL_NUMBER_TYPES(REGISTER_CPU);
+// TODO(nhasabni): We cannot support all number types since MklDnn does
+// not support types.
+// TF_CALL_NUMBER_TYPES(REGISTER_CPU);
+TF_CALL_float(REGISTER_CPU);
 #undef REGISTER_CPU
 }  // namespace tensorflow
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc
index aa08e93924..227765e46d 100644
--- a/tensorflow/core/kernels/mkl_lrn_op.cc
+++ b/tensorflow/core/kernels/mkl_lrn_op.cc
@@ -17,7 +17,7 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc. This opkernel uses MKL library, create MKL
 // layout and primitives, use MKL dnn primitives to compute local
 // response normalization
-
+#undef INTEL_MKL
 #ifdef INTEL_MKL
 
 #define EIGEN_USE_THREADS
diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc
index 846bb5710d..de4d7d2e72 100644
--- a/tensorflow/core/kernels/mkl_maxpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc
@@ -16,17 +16,32 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc.
 #ifdef INTEL_MKL
 #define EIGEN_USE_THREADS
-
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 
+#ifdef INTEL_MKL_DNN
+#include <algorithm>
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::error;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::padding_kind;
+using mkldnn::engine;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// For now, MKL-ML is default. So making MKL-DNN not a default choice.
+#ifndef INTEL_MKL_DNN
+
 // An implementation of MaxPooling (forward).
 template <typename Device, typename T>
 class MklMaxPoolingOp : public OpKernel {
@@ -475,8 +490,348 @@ class MklMaxPoolingGradOp : public OpKernel {
   TensorFormat data_format_;
 
   bool workspace_enabled_;
+};  // MklMaxPoolingGradOp
+
+#else  // INTEL_MKL_DNN is defined
+
+// An implementation of MaxPooling (forward).
+template <typename Device, typename T>
+class MklMaxPoolingOp : public MklPoolingForwardOpBase<T> {
+ public:
+  explicit MklMaxPoolingOp(OpKernelConstruction* context)
+            : MklPoolingForwardOpBase<T>(context) {
+    // In Max Pooling, MKLDNN does not allow passing workspace as NULL.
+    // So we set workspace_enabled_ to true.
+    this->workspace_enabled_ = true;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const Tensor& input_tensor = MklGetInput(context,
+                this->kInputTensorIndexInput);
+      MklDnnShape dnn_shape_input;
+      GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input);
+      this->SanityCheckInput(context, input_tensor, dnn_shape_input);
+      if (!context->status().ok()) return;
+
+      MklDnnData<T> dnn_data_input(&cpu_engine);
+      MklDnnData<T> dnn_data_output(&cpu_engine);
+      MklDnnData<T> dnn_data_wksp(&cpu_engine);
+
+      // initialize variables for the pooling op
+      MklPoolParameters pool_params;
+      // Get the input tensor and initialize the pooling parameters
+      this->ConfigureInput(context, dnn_shape_input,
+                        input_tensor, &pool_params,
+                        &dnn_data_input);
+      OP_REQUIRES_OK(context, context->status());
+
+      // Declare output tensor
+      Tensor* output_tensor = nullptr;
+      memory::dims output_dims_mkl_order;
+      this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+      // If input is in Mkl layout, then just get the memory format from it
+      // directly, instead of using input data_format to MaxPool.
+      if (dnn_shape_input.IsMklTensor()) {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                                  static_cast<memory::format>(
+              dnn_data_input.GetUsrMemDesc().data.format));
+      } else {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                                  this->data_format_mkldnn_);
+      }
+
+      // describe the memory layout; let mkl-dnn choose the best for the op
+      dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
+
+      auto pool_desc = pooling_forward::desc(prop_kind::forward,
+            algorithm::pooling_max,
+            dnn_data_input.GetUsrMemDesc(),
+            dnn_data_output.GetUsrMemDesc(),
+            memory::dims({  pool_params.row_stride,
+                            pool_params.col_stride}),
+            memory::dims({  pool_params.window_rows,
+                            pool_params.window_cols}),
+            memory::dims({  static_cast<int>(pool_params.pad_top),
+                            static_cast<int>(pool_params.pad_left)}),
+            memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                            static_cast<int>(pool_params.pad_right)}),
+            TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_fwd_desc = pooling_forward::primitive_desc(pool_desc,
+            cpu_engine);
+
+      this->AllocateOutputTensor(context, pool_fwd_desc, output_dims_mkl_order,
+                            this->data_format_mkldnn_, &output_tensor);
+      OP_REQUIRES_OK(context, context->status());
+      dnn_data_output.SetUsrMemDataHandle(output_tensor);
+
+      AllocateWorkspaceTensor(context, pool_fwd_desc, &dnn_data_wksp);
+      OP_REQUIRES_OK(context, context->status());
+
+      this->PrepareAndExecuteNet(pool_fwd_desc, &dnn_data_input,
+                        &dnn_data_output, &dnn_data_wksp);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Compute received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+
+ private:
+    const int kOutputTensorIndexWorkspace = 1;
+
+    void AllocateWorkspaceTensor(OpKernelContext* context,
+                const pooling_forward::primitive_desc& pool_fwd_prim_desc,
+                MklDnnData<T>* dnn_data_wksp) {
+        CHECK_NOTNULL(dnn_data_wksp);
+        Tensor* workspace_tensor = nullptr;
+        memory::primitive_desc workspace_pd
+                    = pool_fwd_prim_desc.workspace_primitive_desc();
+        size_t workspace_t_elems = this->GetNumTElements(workspace_pd);
+        MklDnnShape workspace_mkl_shape;
+        workspace_mkl_shape.SetMklTensor(false);
+        TensorShape workspace_tf_shape;
+        workspace_tf_shape.AddDim(workspace_t_elems);
+        AllocateOutputSetMklShape(context, kOutputTensorIndexWorkspace,
+                                &workspace_tensor,
+                                workspace_tf_shape, workspace_mkl_shape);
+        CHECK_NOTNULL(workspace_tensor);
+        dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor);
+    }
 };
 
+// The operation to compute MaxPool gradients.
+// It takes three inputs:
+//   - The original input tensor
+//   - The original output tensor
+//   - Backprop tensor for output
+// It produces one output: backprop tensor for input.
+template <class Device, class T>
+class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase<T> {
+ public:
+  explicit MklMaxPoolingGradOp(OpKernelConstruction* context)
+      : MklPoolingBackwardOpBase<T>(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+        auto cpu_engine = engine(engine::cpu, 0);
+        const Tensor& orig_input_tensor = MklGetInput(context,
+            kInputTensorIndexOrigInput);
+        const Tensor& orig_output_tensor = MklGetInput(context,
+            kInputTensorIndexOrigOutput);
+        const Tensor& grad_tensor = MklGetInput(context,
+            kInputTensorIndexGradient);
+        const Tensor& workspace_tensor = MklGetInput(context,
+            kInputTensorIndexWorkspace);
+        MklDnnShape orig_input_mkl_shape,
+                    orig_output_mkl_shape,
+                    grad_mkl_shape,
+                    workspace_mkl_shape;
+        GetMklShape(context, kInputTensorIndexOrigInput,
+            &orig_input_mkl_shape);
+        GetMklShape(context, kInputTensorIndexOrigOutput,
+            &orig_output_mkl_shape);
+        GetMklShape(context, kInputTensorIndexGradient,
+            &grad_mkl_shape);
+        GetMklShape(context, kInputTensorIndexWorkspace,
+            &workspace_mkl_shape);
+
+        SanityCheckInputs(context,
+                            orig_input_tensor, orig_output_tensor,
+                            grad_tensor, workspace_tensor,
+                            orig_input_mkl_shape, orig_output_mkl_shape,
+                            grad_mkl_shape, workspace_mkl_shape);
+        if (!context->status().ok()) return;
+
+        MklDnnData<T> grad_dnn_data(&cpu_engine);
+        MklDnnData<T> workspace_dnn_data(&cpu_engine);
+        MklDnnData<T> output_dnn_data(&cpu_engine);
+        Tensor* output_tensor = nullptr;
+        MklPoolParameters pool_params;
+        TensorShape orig_input_shape;
+        memory::dims output_dims_mkl_order, orig_input_dims_mkl_order;
+        memory::desc original_input_md = ConfigureOriginalInput(context,
+                                orig_input_tensor,
+                                orig_input_mkl_shape,
+                                &orig_input_dims_mkl_order,
+                                &pool_params,
+                                &orig_input_shape);
+
+        memory::desc original_output_md = this->ConfigureOriginalOutput(
+                                pool_params,
+                                orig_output_mkl_shape,
+                                output_dims_mkl_order);
+
+        memory::desc target_diff_dst_md =  this->ConfigureInputGradient(
+                                        grad_mkl_shape,
+                                        grad_tensor,
+                                        &grad_dnn_data,
+                                        original_output_md);
+
+        output_dnn_data.SetUsrMem(original_input_md);
+
+        // Create the forward pooling primitive descriptor so we can
+        // pass it as a hint to the backward pooling primitive descriptor
+        auto pool_fwd_desc = pooling_forward::desc(prop_kind::forward,
+                algorithm::pooling_max,
+                original_input_md,
+                original_output_md,
+                memory::dims({  pool_params.row_stride,
+                                pool_params.col_stride}),
+                memory::dims({  pool_params.window_rows,
+                                pool_params.window_cols}),
+                memory::dims({  static_cast<int>(pool_params.pad_top),
+                                static_cast<int>(pool_params.pad_left)}),
+                memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                                static_cast<int>(pool_params.pad_right)}),
+                TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_fwd_prim_desc
+                = pooling_forward::primitive_desc(pool_fwd_desc,
+                                                    cpu_engine);
+
+        auto pool_bkwd_desc = pooling_backward::desc(
+                algorithm::pooling_max,
+                output_dnn_data.GetUsrMemDesc(),
+                target_diff_dst_md,
+                memory::dims({  pool_params.row_stride,
+                                pool_params.col_stride}),
+                memory::dims({  pool_params.window_rows,
+                                pool_params.window_cols}),
+                memory::dims({  static_cast<int>(pool_params.pad_top),
+                                static_cast<int>(pool_params.pad_left)}),
+                memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                                static_cast<int>(pool_params.pad_right)}),
+                TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_bkwd_prim_desc
+            = pooling_backward::primitive_desc(pool_bkwd_desc,
+                                                cpu_engine,
+                                                pool_fwd_prim_desc);
+
+        this->AllocateOutputTensor(context, pool_bkwd_prim_desc,
+            orig_input_dims_mkl_order,
+            this->data_format_mkldnn_,
+            &output_tensor);
+        output_dnn_data.SetUsrMemDataHandle(output_tensor);
+
+        ConfigureWorkspace(workspace_tensor,
+                pool_fwd_prim_desc.workspace_primitive_desc(),
+                &workspace_dnn_data);
+        this->PrepareAndExecuteNet(pool_bkwd_prim_desc,
+                            &grad_dnn_data,
+                            &output_dnn_data,
+                            memory::primitive_desc(
+                                target_diff_dst_md,
+                                cpu_engine),
+                            &workspace_dnn_data);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Compute received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+
+ private:
+    // .Input("orig_input: T")
+    // .Input("orig_output: T")
+    // .Input("grad: T")
+    // .Input("workspace: T")
+    const int kInputTensorIndexOrigInput = 0;
+    const int kInputTensorIndexOrigOutput = 1;
+    const int kInputTensorIndexGradient = 2;
+    const int kInputTensorIndexWorkspace = 3;
+    //  Output("output: T") in Base Class
+
+    memory::desc ConfigureOriginalInput(OpKernelContext* context,
+                                const Tensor& tensor_original_input,
+                                const MklDnnShape& original_input_mkl_shape,
+                                memory::dims* original_input_dims_mkl_order,
+                                MklPoolParameters* pool_params,
+                                TensorShape* input_tensor_shape) {
+        *input_tensor_shape = tensor_original_input.shape();
+        return MklPoolingBackwardOpBase<T>::ConfigureOriginalInput(
+                                        context,
+                                        tensor_original_input,
+                                        original_input_mkl_shape,
+                                        original_input_dims_mkl_order,
+                                        pool_params,
+                                        *input_tensor_shape);
+    }
+
+    void ConfigureWorkspace(const Tensor& workspace_tensor,
+                        memory::primitive_desc workspace_pd,
+                        MklDnnData<T> *workspace_dnn_data) {
+        CHECK_NOTNULL(workspace_dnn_data);
+
+        workspace_dnn_data->SetUsrMem(workspace_pd, &workspace_tensor);
+    }
+
+    void SanityCheckInputs(OpKernelContext* context,
+                            const Tensor& orig_input_tensor,
+                            const Tensor& orig_output_tensor,
+                            const Tensor& grad_tensor,
+                            const Tensor& workspace_tensor,
+                            const MklDnnShape& orig_input_mkl_shape,
+                            const MklDnnShape& orig_output_mkl_shape,
+                            const MklDnnShape& grad_mkl_shape,
+                            const MklDnnShape& workspace_mkl_shape) {
+        if (!orig_input_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, orig_input_tensor.dims() == 4,
+                errors::InvalidArgument("Original input shape must be "
+                "4-dimensional"));
+        } else {
+            OP_REQUIRES(context, orig_input_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Original input shape must be "
+                    "4-dimensional"));
+        }
+        if (!orig_output_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, orig_output_tensor.dims() == 4,
+                errors::InvalidArgument("Original output must be "
+                        "4-dimensional"));
+        } else {
+            OP_REQUIRES(context, orig_output_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Original output must be "
+                    "4-dimensional"));
+        }
+        if (!grad_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, grad_tensor.dims() == 4,
+                errors::InvalidArgument("Gradient must be 4-dimensional"));
+        } else {
+            OP_REQUIRES(context, grad_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Gradient must be "
+                    "4-dimensional"));
+        }
+        if (this->workspace_enabled_){
+            // The workspace should not be an MKL tensor
+            OP_REQUIRES(context, workspace_mkl_shape.IsMklTensor() == false,
+                    errors::InvalidArgument("Workspace tensor should not"
+                                            " be an MKL Tensor."));
+            // It should only have one dimension
+            OP_REQUIRES(context, workspace_tensor.dims() == 1,
+                    errors::InvalidArgument("Workspace tensor must be "
+                                "1-dimensional"));
+        } else {
+            OP_REQUIRES(context, this->workspace_enabled_,
+                    errors::Unimplemented("MKL-DNN Max Pooling does not "
+                                "yet support the use case "
+                                "where MaxPoolGrad is called without first"
+                                " calling MaxPool."));
+        }
+    }
+};  // MklMaxPoolingGradOp
+
+#endif  // INTEL_MKL_DNN
+
 REGISTER_KERNEL_BUILDER(Name("_MklMaxPool")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<float>("T")
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
index 65e8852cfb..f7cadffd39 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
@@ -14,10 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 #ifdef INTEL_MKL
+
 #include <vector>
+#include <limits>
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/kernels/bounds_check.h"
 
 namespace tensorflow {
 
@@ -39,6 +42,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
   Init(context, ksize, stride, padding, data_format);
 }
 
+#ifndef INTEL_MKL_DNN
 // Initialization for MKL format
 void MklPoolParameters::Init(OpKernelContext* context,
                              const std::vector<int32>& ksize,
@@ -53,7 +57,22 @@ void MklPoolParameters::Init(OpKernelContext* context,
 
   Init(context, ksize, stride, padding, data_format);
 }
+#else
+// Initialization for MKL format
+void MklPoolParameters::Init(OpKernelContext* context,
+                             const std::vector<int32>& ksize,
+                             const std::vector<int32>& stride, Padding padding,
+                             TensorFormat data_format,
+                             const MklDnnShape* mklInputShape) {
+  // Get the input sizes
+  depth = mklInputShape->GetDimension('C');
+  tensor_in_cols = mklInputShape->GetDimension('W');
+  tensor_in_rows = mklInputShape->GetDimension('H');
+  tensor_in_batch = mklInputShape->GetDimension('N');
 
+  Init(context, ksize, stride, padding, data_format);
+}
+#endif  // INTEL_MKL_DNN
 // Common Initialization for TensorFlow and MKL formats
 void MklPoolParameters::Init(OpKernelContext* context,
                              const std::vector<int32>& ksize,
@@ -80,7 +99,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
                   "MaxPooling supports exactly one of pooling across depth "
                   "or pooling across width/height."));
 
-  if (depth_window == 1) {
+  if (depth_window == 1) {  // we are pooling in the H and W
     OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
                                 tensor_in_rows, window_rows, row_stride,
                                 padding, &out_height, &pad_top, &pad_bottom));
@@ -88,7 +107,21 @@ void MklPoolParameters::Init(OpKernelContext* context,
     OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
                                 tensor_in_cols, window_cols, col_stride,
                                 padding, &out_width, &pad_left, &pad_right));
-  } else {
+#ifdef INTEL_MKL_DNN
+    // TF can work with int64, but mkldnn only supports int32
+    // Fail if the height or width are greater than MAX_INT
+
+    OP_REQUIRES(context, FastBoundsCheck(out_height,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("output height is too large"));
+
+    OP_REQUIRES(context, FastBoundsCheck(out_width,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("output width is too large"));
+
+#endif
+    out_depth = depth;  // output will have the same depth as the input
+  } else {  // we are pooling in the depth dimension
     // Our current version of depthwise max pooling does not support
     // any padding, and expects the depth_window to equal the depth
     // stride (no overlapping).
@@ -109,7 +142,6 @@ void MklPoolParameters::Init(OpKernelContext* context,
                 errors::Unimplemented("Depthwise max pooling is currently "
                                       "only implemented for CPU devices."));
 
-    pad_depth = 0;
     out_depth = depth / depth_window;
   }
 }
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h
index 92ea2beb25..d33e91a15d 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.h
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h
@@ -18,9 +18,18 @@ limitations under the License.
 
 #ifdef INTEL_MKL
 #include <vector>
+#include <string>
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -51,14 +60,28 @@ struct MklPoolParameters {
   int pad_depth;
 
   TensorFormat data_format;
+  MklPoolParameters()
+    : depth(0)
+    , tensor_in_cols(0), tensor_in_rows(0), tensor_in_batch(0)
+    , window_rows(0), window_cols(0), depth_window(0)
+    , row_stride(0), col_stride(0), depth_stride(0)
+    , out_height(0), out_width(0), out_depth(0)
+    , pad_left(0), pad_right(0), pad_top(0), pad_bottom(0), pad_depth(0)
+    , data_format(TensorFormat::FORMAT_NCHW) {}
 
   // Updates context->status if there is an invalid input.
   void Init(OpKernelContext* context, const std::vector<int32>& ksize,
             const std::vector<int32>& stride, Padding padding,
             TensorFormat data_format, const TensorShape& tensor_in_shape);
+#ifndef INTEL_MKL_DNN
   void Init(OpKernelContext* context, const std::vector<int32>& ksize,
             const std::vector<int32>& stride, Padding padding,
             TensorFormat data_format, const MklShape* mkl_in_shape);
+#else
+  void Init(OpKernelContext* context, const std::vector<int32>& ksize,
+            const std::vector<int32>& stride, Padding padding,
+            TensorFormat data_format, const MklDnnShape* mkl_in_shape);
+#endif
 
  private:
   // Common initialization for TensorFlow and MKL formats
@@ -67,6 +90,325 @@ struct MklPoolParameters {
             TensorFormat data_format);
 };
 
+#ifdef INTEL_MKL_DNN
+
+template <class T>
+class MklPoolingOpBase : public OpKernel {
+ public:
+  explicit MklPoolingOpBase(OpKernelConstruction* context)
+            : OpKernel(context)
+            , workspace_enabled_(false) {
+      string data_format;
+      OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+      OP_REQUIRES(context,
+            FormatFromString(data_format, &this->data_format_tf_),
+            errors::InvalidArgument("Invalid data format"));
+      this->data_format_mkldnn_
+                = TFDataFormatToMklDnnDataFormat(this->data_format_tf_);
+      OP_REQUIRES_OK(context, context->GetAttr("ksize", &this->ksize_));
+      OP_REQUIRES(context, this->ksize_.size() == 4,
+                  errors::InvalidArgument("Sliding window ksize field must "
+                                          "specify 4 dimensions"));
+      OP_REQUIRES_OK(context, context->GetAttr("strides", &this->stride_));
+      OP_REQUIRES(context, this->stride_.size() == 4,
+                  errors::InvalidArgument("Sliding window strides field must "
+                                          "specify 4 dimensions"));
+      OP_REQUIRES_OK(context, context->GetAttr("padding", &this->padding_));
+      OP_REQUIRES(context, this->ksize_[0] == 1 && this->stride_[0] == 1,
+                  errors::Unimplemented("Pooling is not yet supported on the "
+                                        "batch dimension."));
+
+      // We may not get this attribute for this node if it does not go through
+      // graph rewrite pass. So we do not check for error while retrieving this
+      // attribute value.
+      context->GetAttr("workspace_enabled", &this->workspace_enabled_);
+    }
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  // Calculate output shape of pooling op in MKL-DNN and TensorFlow order.
+  // MKL-DNN uses NCHW for output order. But TensorFlow output will be in
+  // NHWC or NCHW format depending on data format. Function expects
+  // output height and output width to have already been int32
+  // bounds-checked
+  void GetOutputDims(const MklPoolParameters& mkl_pool_params,
+                    memory::dims* output_dims_mkl_order) {
+    // MKL-DNN always needs output in NCHW format.
+    *output_dims_mkl_order = { mkl_pool_params.tensor_in_batch,
+                              mkl_pool_params.out_depth,
+                              static_cast<int>(mkl_pool_params.out_height),
+                              static_cast<int>(mkl_pool_params.out_width)};
+  }
+
+  void InitMklPoolParameters(OpKernelContext* context,
+                      MklPoolParameters* pool_params,
+                      const MklDnnShape& original_input_mkl_shape,
+                      const TensorShape& input_tensor_shape) {
+    if (!original_input_mkl_shape.IsMklTensor()) {
+      pool_params->Init(context, this->ksize_, this->stride_, this->padding_,
+          this->data_format_tf_, input_tensor_shape);
+    } else {
+      pool_params->Init(context, this->ksize_, this->stride_, this->padding_,
+          this->data_format_tf_, &original_input_mkl_shape);
+    }
+  }
+
+  // Checks to make sure that the memory we need to allocate
+  // is a multiple of sizeof(T)
+  // returns the number of elements
+  size_t GetNumTElements(const memory::primitive_desc& pd) {
+    size_t num_bytes = pd.get_size();
+    size_t ret_val = num_bytes / sizeof(T);
+    if ( num_bytes % sizeof(T) != 0 ) {
+        ret_val++;
+    }
+    return ret_val;
+  }
+
+
+  std::vector<int32> ksize_;
+  std::vector<int32> stride_;
+  Padding padding_;
+  TensorFormat data_format_tf_;
+  memory::format data_format_mkldnn_;
+  bool workspace_enabled_;
+};
+
+template <class T>
+class MklPoolingForwardOpBase : public MklPoolingOpBase<T> {
+ public:
+  explicit MklPoolingForwardOpBase<T>(OpKernelConstruction* context)
+      : MklPoolingOpBase<T>(context) {}
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  void ConfigureInput(OpKernelContext* context,
+                    const MklDnnShape& input_mkl_shape,
+                    const Tensor& input_tensor,
+                    MklPoolParameters* pool_params,
+                    MklDnnData<T>* dnn_data_input) {
+    CHECK_NOTNULL(pool_params);
+    CHECK_NOTNULL(dnn_data_input);
+    TensorShape input_tensor_shape = input_tensor.shape();
+    memory::desc input_md = input_mkl_shape.IsMklTensor()
+                        ? input_mkl_shape.GetMklLayout()
+                        : memory::desc(
+                              TFShapeToMklDnnDimsInNCHW(
+                                  input_tensor_shape, this->data_format_tf_),
+                              MklDnnType<T>(),
+                              this->data_format_mkldnn_);
+    dnn_data_input->SetUsrMem(input_md, &input_tensor);
+    this->InitMklPoolParameters(context, pool_params,
+                      input_mkl_shape, input_tensor_shape);
+  }
+
+  void AllocateOutputTensor(OpKernelContext* context,
+            const pooling_forward::primitive_desc& pool_fwd_prim_desc,
+            const memory::dims output_dims_mkl_order,
+            const memory::format& output_tf_format,
+            Tensor** output_tensor) {
+    CHECK_NOTNULL(output_tensor);
+    memory::primitive_desc dst_pd = pool_fwd_prim_desc.dst_primitive_desc();
+
+    MklDnnShape output_mkl_shape;
+    output_mkl_shape.SetMklTensor(true);
+    output_mkl_shape.SetMklLayout(&dst_pd);
+    output_mkl_shape.SetElemType(MklDnnType<T>());
+    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                              output_dims_mkl_order,
+                              output_tf_format);
+    TensorShape output_tf_shape;
+
+    // only allocate enough space for the elements we need.
+    output_tf_shape.AddDim(this->GetNumTElements(dst_pd));
+    AllocateOutputSetMklShape(context, kOutputTensorIndexOutput,
+                            output_tensor,
+                            output_tf_shape, output_mkl_shape);
+    CHECK_NOTNULL(*output_tensor);
+  }
+
+  void PrepareAndExecuteNet(
+                  const pooling_forward::primitive_desc& pool_fwd_desc,
+                  const MklDnnData<T>* src,
+                  MklDnnData<T>* dst,
+                  MklDnnData<T>* wksp = nullptr) {
+    std::vector<primitive> net;
+
+    // Create pooling primitive and add it to net
+    if (wksp != nullptr) {
+        net.push_back(pooling_forward(pool_fwd_desc,
+                        src->GetOpMem(),
+                        dst->GetOpMem(),
+                        wksp->GetOpMem()));
+    } else {
+        net.push_back(pooling_forward(pool_fwd_desc,
+            src->GetOpMem(),
+            dst->GetOpMem()));
+    }
+    stream(stream::kind::eager).submit(net).wait();
+  }
+
+
+  void SanityCheckInput(OpKernelContext* context,
+                  const Tensor& input_tensor,
+                  const MklDnnShape& input_mkl_shape) {
+    if (!input_mkl_shape.IsMklTensor()) {
+      OP_REQUIRES(context, input_tensor.dims() == 4,
+          errors::InvalidArgument("Input must be 4-dimensional"));
+    } else {
+        OP_REQUIRES(context, input_mkl_shape.GetDimension() == 4,
+                errors::InvalidArgument("Input shape must be "
+                "4-dimensional"));
+    }
+  }
+  // .Input("value: T")
+  // .Output("output: T")
+  const int kInputTensorIndexInput = 0;
+  const int kOutputTensorIndexOutput = 0;
+};  // MklPoolingForwardBaseOp
+
+
+template <class T>
+class MklPoolingBackwardOpBase : public MklPoolingOpBase<T> {
+ public:
+  explicit MklPoolingBackwardOpBase<T>(OpKernelConstruction* context)
+          : MklPoolingOpBase<T>(context) { }
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  const int kOutputTensorIndexOutput = 0;
+
+  void AllocateOutputTensor(OpKernelContext* context,
+            const pooling_backward::primitive_desc& pool_bkwd_prim_desc,
+            const memory::dims output_dims_mkl_order,
+            const memory::format& output_tf_format,
+            Tensor** output_tensor) {
+    CHECK_NOTNULL(output_tensor);
+    memory::primitive_desc dst_pd
+                = pool_bkwd_prim_desc.diff_src_primitive_desc();
+    MklDnnShape output_mkl_shape;
+    output_mkl_shape.SetMklTensor(true);
+    output_mkl_shape.SetMklLayout(&dst_pd);
+    output_mkl_shape.SetElemType(MklDnnType<T>());
+    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                              output_dims_mkl_order,
+                              output_tf_format);
+
+    TensorShape output_tf_shape;
+    output_tf_shape.AddDim(this->GetNumTElements(dst_pd));
+    AllocateOutputSetMklShape(context, kOutputTensorIndexOutput,
+                            output_tensor,
+                            output_tf_shape, output_mkl_shape);
+    CHECK_NOTNULL(*output_tensor);
+  }
+
+  void PrepareAndExecuteNet(
+    const pooling_backward::primitive_desc& pool_bkwd_desc,
+    MklDnnData<T>* input_gradient_diff_dst,
+    MklDnnData<T>* output_diff_src,
+    const memory::primitive_desc& target_diff_dst_pd,
+    const MklDnnData<T>* workspace = nullptr) {
+
+    std::vector<primitive> net;
+
+    // If the input gradient isn't in the same format as the output
+    // reorder it to the same format as the output
+    input_gradient_diff_dst->CheckReorderToOpMem(
+            target_diff_dst_pd,
+            &net);
+
+    // Create pooling primitive and add it to net
+    if (nullptr == workspace) {
+      net.push_back(pooling_backward(pool_bkwd_desc,
+                              input_gradient_diff_dst->GetOpMem(),
+                              output_diff_src->GetOpMem()));
+    } else {
+      net.push_back(pooling_backward(pool_bkwd_desc,
+                                  input_gradient_diff_dst->GetOpMem(),
+                                  workspace->GetOpMem(),
+                                  output_diff_src->GetOpMem()));
+    }
+    stream(stream::kind::eager).submit(net).wait();
+  }
+
+  // Max Pooling and Avg Pooling have slightly different implementations
+  // Takes the Tensor containing original input data and the original
+  // mkl Dnn Shape and populates other data
+  memory::desc ConfigureOriginalInput(OpKernelContext* context,
+                              const Tensor& tensor_original_input_shape,
+                              const MklDnnShape& original_input_mkl_shape,
+                              memory::dims* original_input_dims_nchw,
+                              MklPoolParameters* pool_params,
+                              const TensorShape& input_tensor_shape) {
+    CHECK_NOTNULL(original_input_dims_nchw);
+    CHECK_NOTNULL(pool_params);
+    this->InitMklPoolParameters(context, pool_params,
+                          original_input_mkl_shape,
+                          input_tensor_shape);
+
+    *original_input_dims_nchw
+          = original_input_mkl_shape.IsMklTensor()
+          ? original_input_mkl_shape.GetSizesAsMklDnnDims()
+          : TFShapeToMklDnnDimsInNCHW(input_tensor_shape,
+        this->data_format_tf_);
+
+    return  original_input_mkl_shape.IsMklTensor()
+      ? original_input_mkl_shape.GetMklLayout()
+      : memory::desc(*original_input_dims_nchw,
+                      MklDnnType<T>(),
+                      this->data_format_mkldnn_);
+  }
+
+  memory::desc ConfigureOriginalOutput(const MklPoolParameters& pool_params,
+                                const MklDnnShape& original_output_mkl_shape,
+                                      memory::dims output_dims_mkl_order) {
+    this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+    return original_output_mkl_shape.IsMklTensor()
+            ? original_output_mkl_shape.GetMklLayout()
+            : memory::desc(output_dims_mkl_order,
+                         MklDnnType<T>(),
+                         this->data_format_mkldnn_);
+  }
+
+  memory::desc ConfigureInputGradient(
+        const MklDnnShape& input_gradient_mkl_shape,
+        const Tensor& input_gradient_tensor,
+        MklDnnData<T>* input_gradient_dnn_data,
+        const memory::desc& original_output_md) {
+    // Configure the gradient as is
+    memory::desc original_input_grad_md
+          = input_gradient_mkl_shape.IsMklTensor()
+          ? input_gradient_mkl_shape.GetMklLayout()
+          : memory::desc(TFShapeToMklDnnDimsInNCHW(
+                    input_gradient_tensor.shape(),
+                    this->data_format_tf_),
+                    MklDnnType<T>(), this->data_format_mkldnn_);
+
+    input_gradient_dnn_data->SetUsrMem(original_input_grad_md,
+                &input_gradient_tensor);
+
+    // Check to see if input grad diff dst is in the right format
+    // Create a new memory descriptor with the same shape as the
+    // original, but the format of the other tensors.
+    memory::format original_output_format =
+            static_cast<memory::format>(original_output_md.data.format);
+    bool grad_reorder_needed = input_gradient_dnn_data->IsReorderNeeded(
+                                    original_output_format);
+    memory::dims diff_dst_dims = input_gradient_mkl_shape.IsMklTensor()
+        ? input_gradient_mkl_shape.GetSizesAsMklDnnDims()
+        : TFShapeToMklDnnDimsInNCHW(input_gradient_tensor.shape(),
+                    this->data_format_tf_);
+    memory::desc target_diff_dst_md = memory::desc(diff_dst_dims,
+        MklDnnType<T>(), original_output_format);
+
+    return grad_reorder_needed
+            ? target_diff_dst_md
+            : original_input_grad_md;
+  }
+};
+#endif  // INTEL_MKL_DNN
+
 //-------------------------------------------------------------------
 // Utility functions
 
diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 86a77d769a..45bdd0ad5c 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -28,6 +28,19 @@ limitations under the License.
 #include "mkl_dnn.h"
 #include "mkl_dnn_types.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+using mkldnn::relu_forward;
+using mkldnn::relu_backward;
+using mkldnn::eltwise_relu;
+using mkldnn::eltwise_elu;
+using mkldnn::eltwise_tanh;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -45,6 +58,8 @@ struct MklReluHelpers {
   }
 };
 
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklReluOp : public OpKernel {
  public:
@@ -59,6 +74,7 @@ class MklReluOp : public OpKernel {
     GetMklShape(context, 0, &mkl_context.input_shape);
     void* user_i = static_cast<void*>(const_cast<T*>(input.flat<T>().data()));
     bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor();
+
     if (!input_in_mkl_format && !input.dims()) {  // handle the case of a scalar
       const TensorShape& o_shape = input.shape();
       Tensor* out_tensor = nullptr;
@@ -164,6 +180,7 @@ class MklReluOp : public OpKernel {
   } MklReluOpContext;
 };
 
+
 template <typename Device, typename T>
 class MklReluGradOp : public OpKernel {
  public:
@@ -189,18 +206,18 @@ class MklReluGradOp : public OpKernel {
       const Tensor& a = MklGetInput(context, 1);
       void* buf_input = static_cast<void*>(const_cast<T*>(a.flat<T>().data()));
       void* mkl_buffer_convert = nullptr;
+
       dnnPrimitive_t cv_input_to_grad = nullptr;
 
-      // if input and grad are not in the same layout, do a conversion between
-      // them.
+      // if input and grad are not in the same layout,
+      // do a conversion between them.
       if (!dnnLayoutCompare_F32(lt_input, lt_grad)) {
         AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, lt_grad,
                        &mkl_buffer_convert);
         CHECK_EQ(dnnConversionCreate_F32(&cv_input_to_grad, lt_input,
                    lt_grad), E_SUCCESS);
         CHECK_EQ(dnnConversionExecute_F32(cv_input_to_grad, buf_input,
-                                          mkl_buffer_convert),
-                 E_SUCCESS);
+                                          mkl_buffer_convert), E_SUCCESS);
         relu_res[dnnResourceSrc] = mkl_buffer_convert;
         dnnDelete_F32(cv_input_to_grad);
       } else {
@@ -246,7 +263,6 @@ class MklReluGradOp : public OpKernel {
 };
 
 template <typename Device, typename T>
-
 void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   MklReluGradOpContext mkl_context;
   const Tensor& g = MklGetInput(context, 0);
@@ -264,20 +280,21 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
       !MklReluHelpers::ValidateSameSize(context, g, a))
     return;
   Tensor* output = nullptr;
-  if (!input_is_mkl && !grad_is_mkl &&
-      !a.dims()) {  // handle the case of a scalar
-    // Allocate space for g and
+
+  if (!input_is_mkl && !grad_is_mkl && !a.dims()) {
+    // handle the scalar case
     const TensorShape& g_shape = g.shape();
     mkl_context.output_shape.SetMklTensor(false);
     AllocateOutputSetMklShape(context, 0, &output, g_shape,
                               mkl_context.output_shape);
+
     void* out_o = static_cast<void*>(output->flat<T>().data());
     (static_cast<T*>(out_o))[0] =
         (static_cast<T*>(user_g))[0] * ((static_cast<T*>(user_i))[0] > 0);
     return;
   }
 
-  // Generate size, stride for input if input/grad is in MKL format.
+  // generate size, stride for input if input/grad is in mkl format.
   if (grad_is_mkl || input_is_mkl) {
     const MklShape* tmp_mkl_shape =
         (grad_is_mkl) ? &mkl_context.grad_shape : &mkl_context.input_shape;
@@ -308,21 +325,20 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   float negative_slope = 0.0;
   CHECK_EQ(dnnReLUCreateBackward_F32(&mkl_context.prim_relu_bwd, NULL,
                                      mkl_context.lt_grad, mkl_context.lt_grad,
-                                     negative_slope),
-           E_SUCCESS);
+                                     negative_slope), E_SUCCESS);
   Tensor mkl_tmp_input_buf_tensor;
   mkl_context.MklPrepareReluGradInputs(context, &mkl_tmp_input_buf_tensor);
 
   if (input_is_mkl ||
-      grad_is_mkl) { /*if  grad or input are MKL leave it in MKL*/
+      grad_is_mkl) { /*if  grad or input are mkl leave it in mkl*/
     TensorShape tf_shape;
     mkl_context.output_shape.SetMklTensor(true);
     mkl_context.output_shape.SetMklLayout(mkl_context.prim_relu_bwd,
                                           dnnResourceDiffSrc);
     mkl_context.output_shape.SetTfLayout(
         mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
-    // If input_is_mkl or grad_is_mkl, then we copy strides and sizes from Mkl
-    // shape of one that is in MKL layout.
+    // if input_is_mkl or grad_is_mkl, then we copy strides and sizes from mkl
+    // shape of one that is in mkl layout.
     if (grad_is_mkl == true) {
       mkl_context.output_shape.SetTfDimOrder(
           mkl_context.in_dims, mkl_context.grad_shape.GetTfToMklDimMap());
@@ -332,11 +348,9 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
     }
 
     tf_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(
-                        mkl_context.output_shape.GetMklLayout())) /
-                    sizeof(T));
+                    mkl_context.output_shape.GetMklLayout())) / sizeof(T));
     AllocateOutputSetMklShape(context, 0, &output, tf_shape,
                               mkl_context.output_shape);
-
   } else {
     const TensorShape& o_shape = g.shape();
     mkl_context.output_shape.SetMklTensor(false);
@@ -347,13 +361,430 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   mkl_context.relu_res[dnnResourceDiffSrc] =
       static_cast<void*>(output->flat<T>().data());
 
-  CHECK_EQ(dnnExecute_F32(mkl_context.prim_relu_bwd, mkl_context.relu_res),
-           E_SUCCESS);
+  CHECK_EQ(dnnExecute_F32(mkl_context.prim_relu_bwd,
+                          mkl_context.relu_res),
+                          E_SUCCESS);
   mkl_context.MklCleanup();
 }
 
-/* Register DNN kernels for supported operations and supported types - right now
- * it is only Relu and f32*/
+
+#else  // INTEL_MKL_DNN
+
+template <typename Device, typename T, algorithm alg_kind>
+class MklReluOpBase : public OpKernel {
+ public:
+  ~MklReluOpBase() {}
+
+  explicit MklReluOpBase(OpKernelConstruction* context) : OpKernel(context) {
+  }
+
+  virtual void Compute_Scalar(OpKernelContext* context) = 0;
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const size_t src_index = 0;  // index of src input tensor
+      const size_t dst_index = 0;  // index of dst output tensor
+      const Tensor& src_tensor = MklGetInput(context, src_index);
+      MklDnnShape dnn_shape_src;
+      GetMklShape(context, src_index, &dnn_shape_src);
+
+      Tensor* dst_tensor = nullptr;
+      if (src_tensor.dims() == 0) {
+        Compute_Scalar(context);
+        return;
+      }
+
+      // Create relu primitive.
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      // Set DNN primitive - src
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor()) {
+        src_md = dnn_shape_src.GetMklLayout();
+      } else {
+        auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
+        auto src_strides = CalculateTFStrides(src_dims);
+        // Create blocked memory descriptor
+        src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+
+      T alpha = 0, beta = 0;
+      std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
+      auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
+          // Operator memory descriptor is same as user memory descriptor.
+                                              alg_kind, src.GetUsrMemDesc(),
+                                              alpha, beta);
+      relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
+                                                         cpu_engine));
+
+      // allocate dst tensor
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = relu_fwd_pd->dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(),
+                                  dnn_shape_src.GetSizesAsMklDnnDims(),
+                                  dnn_shape_src.GetTfDataFormat());
+        tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, dst_index, &dst_tensor, tf_shape_dst,
+                                dnn_shape_dst);
+
+      // Destination memory descriptor is same as source memory descriptor.
+      auto dst_md = src_md;
+      dst.SetUsrMem(dst_md, dst_tensor);
+
+      // execute net
+      std::vector<primitive> net;
+      auto relu_fwd = relu_forward(*relu_fwd_pd, src.GetOpMem(),
+                                   dst.GetOpMem());
+      net.push_back(relu_fwd);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + string(e.message) +
+                         ", in file " + string(__FILE__) + ":" +
+                         std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                        error_msg));
+    }
+  }
+};
+
+
+template <typename Device, typename T, algorithm alg_kind>
+class MklReluGradOpBase : public OpKernel {
+ public:
+  ~MklReluGradOpBase() {}
+
+  explicit MklReluGradOpBase(OpKernelConstruction* context) :
+    OpKernel(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) = 0;
+
+  void Compute(OpKernelContext* context)  {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> diff_dst(&cpu_engine);
+      MklDnnData<T> diff_src(&cpu_engine);
+
+      const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+      const size_t src_index = 1;       // index of src input tensor
+      const size_t diff_src_index = 0;  // index of diff_src output tensor
+
+      const Tensor& src_tensor      = MklGetInput(context, src_index);
+      const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+      Tensor* diff_src_tensor       = nullptr;
+
+      MklDnnShape dnn_shape_src, dnn_shape_diff_dst;
+      GetMklShape(context, src_index, &dnn_shape_src);
+      GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+      int src_dims_size = src_tensor.dims();
+      if (src_dims_size == 0) {
+        Compute_Scalar(context);
+        return;
+      }
+
+      // Set DNN primitives for src & diff_dst
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
+        if (dnn_shape_diff_dst.IsMklTensor()) {
+          diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
+          src_md = diff_dst_md;
+        } else {
+          src_md = dnn_shape_src.GetMklLayout();
+          diff_dst_md = src_md;
+        }
+      } else {
+        auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
+        auto src_strides = CalculateTFStrides(src_dims);
+        src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
+        diff_dst_md = src_md;
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+      diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
+
+      T alpha = 0, beta = 0;
+      std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
+      auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
+                                              alg_kind, src_md, alpha, beta);
+      relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
+                                                         cpu_engine));
+      auto relu_bwd_desc = relu_backward::desc(alg_kind, diff_dst_md, src_md,
+                                                alpha, beta);
+      auto relu_bwd_pd  = relu_backward::primitive_desc(relu_bwd_desc,
+                                                cpu_engine, *relu_fwd_pd);
+
+      // allocate diff_src tensor
+      MklDnnShape dnn_shape_diff_src;
+      TensorShape tf_shape_diff_src;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_diff_src.SetMklTensor(true);
+        auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc();
+        dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
+        dnn_shape_diff_src.SetElemType(MklDnnType<T>());
+        dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(),
+                                       dnn_shape_src.GetSizesAsMklDnnDims(),
+                                       dnn_shape_src.GetTfDataFormat());
+        tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_diff_src.SetMklTensor(false);
+        tf_shape_diff_src = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                                 tf_shape_diff_src, dnn_shape_diff_src);
+
+      // diff_src memory descriptor is same as diff_dst memory descriptor.
+      auto diff_src_md = diff_dst_md;
+      diff_src.SetUsrMem(diff_src_md, diff_src_tensor);
+
+      PrepareAndExecuteNet(relu_bwd_pd, &src, &diff_src, &diff_dst);
+     } catch (mkldnn::error &e) {
+       string error_msg = "Status: " + std::to_string(e.status) +
+                          ", message: " + string(e.message) +
+                          ", in file " + string(__FILE__) + ":" +
+                          std::to_string(__LINE__);
+       OP_REQUIRES_OK(context,
+                      errors::Aborted("Operation received an exception:",
+                                      error_msg));
+    }
+  }
+
+  void PrepareAndExecuteNet(const relu_backward::primitive_desc& relu_prim_desc,
+                  MklDnnData<T>* src, MklDnnData<T>* diff_src, MklDnnData<T>*
+                  diff_dst) {
+    std::vector<primitive> net;
+    net.push_back(relu_backward(relu_prim_desc, src->GetOpMem(),
+                                diff_dst->GetOpMem(), diff_src->GetOpMem()));
+    stream(stream::kind::eager).submit(net).wait();
+  }
+};
+
+
+template <typename Device, typename T>
+class MklReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
+ public:
+  ~MklReluOp() {}
+
+  explicit MklReluOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_relu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    (static_cast<T*>(out_o))[0] =
+              std::max((static_cast<T*>(user_i))[0], static_cast<T>(0));
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
+ public:
+  ~MklReluGradOp() {}
+
+  explicit MklReluGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_relu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    int src_dims_size = src_tensor.dims();
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] *
+                                  ((static_cast<T*>(user_i))[0] > 0);
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklEluOp : public MklReluOpBase<Device, T, eltwise_elu> {
+ public:
+  ~MklEluOp() {}
+
+  explicit MklEluOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_elu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    // return exp(feature) - 1 if feature > 0; feature otherwise
+    T feature = (static_cast<T*>(user_i))[0];
+    if (feature < 0)
+      (static_cast<T*>(out_o))[0] = std::exp(feature);
+    else
+      (static_cast<T*>(out_o))[0] = feature;
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklEluGradOp : public MklReluGradOpBase<Device, T, eltwise_elu> {
+ public:
+  ~MklEluGradOp() {}
+
+  explicit MklEluGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_elu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    int src_dims_size = src_tensor.dims();
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    // gradient of elu(x) = 1 if x > 0; elu(x) + 1 otherwise
+    T feature = (static_cast<T*>(user_i))[0];
+    if (feature > 0) {
+      (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0];
+    } else {
+      T elu = std::exp(feature) - 1;
+      (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] * (elu + 1);
+    }
+  }
+};
+
+template <typename Device, typename T>
+class MklTanhOp : public MklReluOpBase<Device, T, eltwise_tanh> {
+ public:
+  ~MklTanhOp() {}
+
+  explicit MklTanhOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_tanh>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    // tanh(x) = (e^x - e^(-x))/ (e^x + e^(-x))
+    T feature = (static_cast<T*>(user_i))[0];
+    T e1 = std::exp(feature);
+    T e2 = std::exp(-feature);
+    (static_cast<T*>(out_o))[0] = (e1 - e2)/(e1 + e2);
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklTanhGradOp : public MklReluGradOpBase<Device, T, eltwise_tanh> {
+ public:
+  ~MklTanhGradOp() {}
+
+  explicit MklTanhGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_tanh>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    int src_dims_size = src_tensor.dims();
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    // gradient of tanh(x) = 1 - tanh(x)^2
+    T feature = (static_cast<T*>(user_i))[0];
+    T e1 = std::exp(feature);
+    T e2 = std::exp(-feature);
+    T tanh = (e1 - e2)/(e1 + e2);
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] *
+                                  (1 - tanh * tanh);
+  }
+};
+
+#endif
+
+// register dnn kernels for supported operations and supported types
 #define REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES(type)             \
   REGISTER_KERNEL_BUILDER(Name("_MklRelu")                          \
                               .Device(DEVICE_CPU)                   \
@@ -367,6 +798,38 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
                           MklReluGradOp<CPUDevice, type>);
 TF_CALL_float(REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES);
 
+#ifdef INTEL_MKL_DNN
+
+// register dnn kernels for supported operations and supported types
+#define REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES(type)             \
+  REGISTER_KERNEL_BUILDER(Name("_MklElu")                          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklEluOp<CPUDevice, type>);              \
+  REGISTER_KERNEL_BUILDER(Name("_MklEluGrad")                      \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklEluGradOp<CPUDevice, type>);
+TF_CALL_float(REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES);
+
+#define REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES(type)             \
+  REGISTER_KERNEL_BUILDER(Name("_MklTanh")                          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklTanhOp<CPUDevice, type>);              \
+  REGISTER_KERNEL_BUILDER(Name("_MklTanhGrad")                      \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklTanhGradOp<CPUDevice, type>);
+TF_CALL_float(REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES);
+
+#endif
+
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
+
diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc
index 5e98582475..11c92ebdb4 100644
--- a/tensorflow/core/kernels/mkl_reshape_op.cc
+++ b/tensorflow/core/kernels/mkl_reshape_op.cc
@@ -28,6 +28,11 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
 template <typename Device, typename T>
@@ -35,6 +40,7 @@ class MklReshapeOp : public OpKernel {
  public:
   explicit MklReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
 
+#ifndef INTEL_MKL_DNN
   void Compute(OpKernelContext* context) override {
     const Tensor& input = MklGetInput(context, 0);
     const Tensor& sizes = MklGetInput(context, 1);
@@ -129,7 +135,183 @@ class MklReshapeOp : public OpKernel {
     }
   }
 
+#else
+
  private:
+  // When the input tensor is in MKL layout and we are reshaping the tensor to a
+  // different shape than its actual shape, then we use MKLDNN reorder primitive
+  // to put tensor back in Tensorflow layout. But we can skip this reordering
+  // some times. This function checks for all such cases.
+  bool SkipReorder(const MklDnnShape& mkl_shape_input,
+                   const TensorShape& reshape_to) {
+    CHECK_EQ(mkl_shape_input.IsMklTensor(), true);
+    bool ret = false;
+
+    // If Tensorflow's data format and the underlying format maintained by
+    // MKLDNN are equivalent (both are NHWC or both are NCHW), then we can
+    // safely return true.
+    auto input_mkl_md = mkl_shape_input.GetMklLayout();
+    if (mkl_shape_input.GetTfDataFormat() == input_mkl_md.data.format) {
+      ret = true;
+    }
+
+    return ret;
+  }
+
+ public:
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor = MklGetInput(context, 0);
+    const Tensor& sizes = MklGetInput(context, 1);
+
+    MklDnnShape mkl_shape_input;
+    GetMklShape(context, kInputSlotIdx, &mkl_shape_input);
+    bool input_in_mkl_format = mkl_shape_input.IsMklTensor();
+    const int64 nelems = input_in_mkl_format ?
+                         mkl_shape_input.GetTfShape().num_elements()
+                         : input_tensor.NumElements();
+
+    // Preliminary validation of sizes.
+    OP_REQUIRES(context, IsLegacyVector(sizes.shape()),
+                errors::InvalidArgument("sizes input must be 1-D, not shape ",
+                                        sizes.shape().DebugString()));
+
+    // Compute the output shape.  Determine product of specified
+    // dimensions, and find the index of the unspecified one.
+    TensorShape shape;
+    int64 product = 1;
+    int unknown_index = -1;
+    switch (sizes.dtype()) {
+      case DT_INT32:
+        OP_REQUIRES_OK(context, ValidateSizes<int32>(sizes, &product,
+                                                     &unknown_index, &shape));
+        break;
+      case DT_INT64:
+        OP_REQUIRES_OK(context, ValidateSizes<int64>(sizes, &product,
+                                                     &unknown_index, &shape));
+        break;
+      default:
+        context->CtxFailure(errors::InvalidArgument(
+            "desired shape must be a DT_INT32 or DT_INT64 vector, not a ",
+            DataTypeString(sizes.dtype())));
+        return;
+    }
+    if (unknown_index != -1) {
+      OP_REQUIRES(
+          context, product > 0,
+          errors::InvalidArgument("Reshape cannot infer the missing input size "
+                                  "for an empty tensor unless all specified "
+                                  "input sizes are non-zero"));
+      const int64 missing = nelems / product;
+      OP_REQUIRES(
+          context, product * missing == nelems,
+          errors::InvalidArgument(
+              "Input to reshape is a tensor with ", nelems,
+              " values, but the requested shape requires a multiple of ",
+              product));
+      shape.set_dim(unknown_index, missing);
+    }
+    OP_REQUIRES(context, shape.num_elements() == nelems,
+                errors::InvalidArgument("Input to reshape is a tensor with ",
+                                        nelems,
+                                        " values, but the requested shape has ",
+                                        shape.num_elements()));
+
+    if (input_in_mkl_format) {
+      TensorShape& shape_to = shape;
+      TensorShape shape_from = mkl_shape_input.GetTfShape();
+      if (shape_from == shape_to) {
+        CopyMklTensorInToOut(context, kInputSlotIdx, kOutputSlotIdx);
+        return;
+      } else {
+        try {
+          auto cpu_engine = engine(engine::cpu, 0);
+          MklDnnData<T> dnn_data_input(&cpu_engine);
+          // Reshape is just a logical view change operation for a tensor.
+          // It does not change underlying layout. But MKLDNN may maintain
+          // tensor data in different layout than that specified by Tensorflow.
+          // If MKLDNN maintains input tensor in different layout than that
+          // specified by Tensorflow, we will need to reorder tensor and then
+          // put it in the shape expected by Tensorflow. But if MKLDNN has
+          // maintained input tensor in the same layout as it is expected by
+          // Tensorflow, we don't need to reorder tensor contents, we just
+          // need to update MklDnnShape object associated with the input
+          // tensor to reflect the shape change expected by reshape.
+          if (!SkipReorder(mkl_shape_input, shape_to)) {
+              // If dimensions that are being expanded or collapsed are not
+              // maintained contiguously by MKLDNN, then we use reorder.
+
+              // Get Mkl layout of input tensor.
+              auto input_mkl_md = mkl_shape_input.GetMklLayout();
+              // Set input Mkl layout as the user layout.
+              dnn_data_input.SetUsrMem(input_mkl_md, &input_tensor);
+              // Get expected Tensorflow layout of input tensor.
+              auto output_tf_md = mkl_shape_input.GetTfLayout();
+              auto output_tf_pd = memory::primitive_desc(output_tf_md,
+                                                         cpu_engine);
+
+              Tensor* output_tensor = nullptr;
+              MklShape mkl_shape_output;
+              mkl_shape_output.SetMklTensor(false);
+              // We allocate output tensor in the shape expected by Reshape.
+              AllocateOutputSetMklShape(context, kOutputSlotIdx, &output_tensor,
+                                        shape_to, mkl_shape_output);
+
+              // Insert reorder between Mkl layout and TensorFlow layout.
+              std::vector<primitive> net;
+              CHECK_EQ(dnn_data_input.CheckReorderToOpMem(output_tf_pd,
+                       output_tensor, &net), true);
+              stream(stream::kind::eager).submit(net).wait();
+              return;
+          } else {
+            // If dimensions that are being expanded or collapsed are
+            // maintained contiguously by MKLDNN, then we skip reorder, just
+            // update MklDnnShape object for the tensorflow tensor, and forward
+            // Tensorflow tensor as it is to the output.
+            auto output_dims = TFShapeToMklDnnDims(shape_to);
+            auto output_strides = CalculateTFStrides(output_dims);
+            auto output_tf_md = MklDnnData<T>::CreateBlockedMemDesc(output_dims,
+                                                               output_strides);
+            auto output_tf_pd = memory::primitive_desc(output_tf_md,
+                                                       cpu_engine);
+
+            // Set MklDnnShape
+            MklDnnShape mkl_shape_output;
+            mkl_shape_output.SetMklTensor(true);
+            mkl_shape_output.SetMklLayout(&output_tf_pd);
+            mkl_shape_output.SetElemType(MklDnnType<T>());
+            mkl_shape_output.SetTfLayout(output_dims.size(), output_dims,
+                                         memory::format::blocked);
+
+            // We now simply forward input Mkl tensor to output and change its
+            // output MklDnnShape object.
+            ForwardMklTensorInToOutWithMklShape(context, kInputSlotIdx,
+                                              kOutputSlotIdx, mkl_shape_output);
+            return;
+          }
+        } catch (mkldnn::error &e) {
+          string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+          OP_REQUIRES_OK(context,
+                   errors::Aborted("Operation received an exception:",
+                      error_msg));
+        }
+      }
+    } else {
+      // If input tensor is not in Mkl format, then just copy Tensorflow tensor
+      // to output with specified shape.
+      CopyTfTensorInToOutWithShape(context, kInputSlotIdx, kOutputSlotIdx,
+                                   shape);
+    }
+  }
+
+#endif  // INTEL_MKL_DNN
+
+ private:
+  const int kInputSlotIdx = 0;
+  const int kOutputSlotIdx = 0;
+
   template <typename Tshape>
   Status ValidateSizes(const Tensor& sizes, int64* product, int* unknown_index,
                        TensorShape* shape) {
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index f83998e0c1..1921b83d12 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -268,6 +268,13 @@ class Im2ColConvFunctor {
     Im2ColBufferResource<T1, chunk_value_count>* im2col_buffer_resource;
     std::function<Status(Im2ColBufferResource<T1, chunk_value_count>**)>
         creator = [](Im2ColBufferResource<T1, chunk_value_count>** resource) {
+#ifdef _MSC_VER
+          // MSVC complains about the capture of chunk_value_count which oddly
+          // works fine in conv_ops_using_gemm.cc for example.
+          // Define chunk_value_count inside the lambda for now.
+          const int64 chunk_value_count =
+              (kMaxChunkSize + (sizeof(T1) - 1)) / sizeof(T1);
+#endif
           *resource = new Im2ColBufferResource<T1, chunk_value_count>();
           return Status::OK();
         };
diff --git a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
index be1fa22c69..3c31016732 100644
--- a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
@@ -161,7 +161,7 @@ Status SnappyOutputBuffer::Deflate() {
   }
 
   // Write length of compressed block to output buffer.
-  char* compressed_length_array = new char[4];
+  char compressed_length_array[4];
   std::fill(compressed_length_array, compressed_length_array + 4, 0);
   for (int i = 0; i < 4; i++) {
     // Little endian.
@@ -173,7 +173,6 @@ Status SnappyOutputBuffer::Deflate() {
   TF_RETURN_IF_ERROR(AddToOutputBuffer(output.data(), output.size()));
   next_in_ += avail_in_;
   avail_in_ = 0;
-  delete[] compressed_length_array;
 
   return Status::OK();
 }
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 980d0c31a3..15122afd23 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -2958,6 +2958,25 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("__MklDummyConv2DWithBias")
+    .Input("input: T")
+    .Input("filter: T")
+    .Input("bias: T")
+    .Output("output: T")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .Doc(R"doc(
+Dummy node that enables fusing Conv2D and BiasAdd operator for MKL. This node
+does not perform anything. It is just created as an intermediate output of
+merging Conv2D and BiasAdd.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklConv2DWithBias")
     .Input("input: T")
     .Input("filter: T")
@@ -3011,6 +3030,88 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("__MklDummyConv2DBackpropFilterWithBias")
+    .Input("input: T")
+    .Input("filter_sizes: int32")
+    .Input("out_backprop: T")
+    .Output("output: T")
+    .Output("bias_grad: T")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input_shape;
+      // Fetch the data_format attribute, which may not exist.
+      string data_format;
+      Status s = c->GetAttr("data_format", &data_format);
+
+      if (s.ok() && data_format == "NCHW") {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -3)));
+      } else {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -1)));
+      }
+      ShapeHandle sh;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &sh));
+      TF_RETURN_IF_ERROR(c->WithRank(sh, 4, &sh));
+      c->set_output(0, sh);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Dummy node that enables fusing Conv2DBackpropFilter and BiasAddGrad operator
+for MKL. This node does not perform anything. It is just created as an
+intermediate output of merging Conv2DBackpropFilter and BiasAddGrad.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklConv2DBackpropFilterWithBias")
+    .Input("input: T")
+    .Input("filter_sizes: int32")
+    .Input("out_backprop: T")
+    .Input("mkl_input: uint8")
+    .Input("mkl_filter_size: uint8")
+    .Input("mkl_out_backprop: uint8")
+    .Output("output: T")
+    .Output("bias_grad: T")
+    .Output("mkl_output: uint8")
+    .Output("mkl_bias_grad: uint8")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input_shape;
+      // Fetch the data_format attribute, which may not exist.
+      string data_format;
+      Status s = c->GetAttr("data_format", &data_format);
+
+      if (s.ok() && data_format == "NCHW") {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -3)));
+      } else {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -1)));
+      }
+      ShapeHandle sh;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &sh));
+      TF_RETURN_IF_ERROR(c->WithRank(sh, 4, &sh));
+      c->set_output(0, sh);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+MKL version of Conv2DBackpropFilterWithBias. Uses MKL DNN APIs to compute the
+gradients of convolution with respect to the filter.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklConv2DWithBiasBackpropBias")
     .Input("out_backprop: T")
     .Input("mkl_out_backprop: uint8")
@@ -3087,6 +3188,78 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("_MklElu")
+    .Input("features: T")
+    .Input("mkl_features: uint8")
+    .Output("activations: T")
+    .Output("mkl_activations: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+MKL version of Elu operator. Uses MKL DNN APIs to implement Elu operator.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklEluGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Input("mkl_gradients: uint8")
+    .Input("mkl_features: uint8")
+    .Output("backprops: T")
+    .Output("mkl_backprops: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+MKL version of EluGrad operator. Uses MKL DNN APIs to compute Elu
+gradients for Elu operation.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklSoftmax")
+    .Input("logits: T")
+    .Input("mkl_logits: uint8")
+    .Output("softmax: T")
+    .Output("mkl_softmax: uint8")
+    .Attr("T: {half, float, double}")
+    .SetShapeFn([](InferenceContext* c) {
+      return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
+    })
+    .Doc(R"doc(
+MKL version of ReluGrad operator. Uses MKL DNN APIs to compute rectified
+linear gradients for Relu operation.
+)doc");
+
+REGISTER_OP("_MklTanh")
+    .Input("features: T")
+    .Input("mkl_features: uint8")
+    .Output("activations: T")
+    .Output("mkl_activations: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+MKL version of Tanh operator. Uses MKL DNN APIs to implement Tanh operator.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklTanhGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Input("mkl_gradients: uint8")
+    .Input("mkl_features: uint8")
+    .Output("backprops: T")
+    .Output("mkl_backprops: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+MKL version of TanhGrad operator. Uses MKL DNN APIs to compute tanh
+gradients for Tanh operation.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklMaxPool")
     .Attr("T: {float, half} = DT_FLOAT")
     .Attr("ksize: list(int) >= 4")
diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD
index 624145da75..aaeccc8324 100644
--- a/tensorflow/core/platform/cloud/BUILD
+++ b/tensorflow/core/platform/cloud/BUILD
@@ -10,6 +10,7 @@ licenses(["notice"])  # Apache 2.0
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
+    "tf_copts",
 )
 
 filegroup(
@@ -29,6 +30,7 @@ filegroup(
 cc_library(
     name = "expiring_lru_cache",
     hdrs = ["expiring_lru_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = ["//tensorflow/core:lib"],
 )
@@ -37,6 +39,7 @@ cc_library(
     name = "file_block_cache",
     srcs = ["file_block_cache.cc"],
     hdrs = ["file_block_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = ["//tensorflow/core:lib"],
 )
@@ -45,6 +48,7 @@ cc_library(
     name = "gcs_dns_cache",
     srcs = ["gcs_dns_cache.cc"],
     hdrs = ["gcs_dns_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":http_request",
@@ -56,6 +60,7 @@ cc_library(
     name = "gcs_file_system",
     srcs = ["gcs_file_system.cc"],
     hdrs = ["gcs_file_system.h"],
+    copts = tf_copts(),
     linkstatic = 1,  # Needed since alwayslink is broken in bazel b/27630669
     visibility = ["//visibility:public"],
     deps = [
@@ -78,6 +83,7 @@ cc_library(
 cc_library(
     name = "http_request",
     hdrs = ["http_request.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/core:framework_headers_lib",
@@ -89,6 +95,7 @@ cc_library(
     name = "curl_http_request",
     srcs = ["curl_http_request.cc"],
     hdrs = ["curl_http_request.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":http_request",
@@ -104,6 +111,7 @@ cc_library(
     hdrs = [
         "http_request_fake.h",
     ],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":curl_http_request",
@@ -121,6 +129,7 @@ cc_library(
         "auth_provider.h",
         "google_auth_provider.h",
     ],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":curl_http_request",
@@ -136,6 +145,7 @@ cc_library(
     name = "now_seconds_env",
     testonly = 1,
     hdrs = ["now_seconds_env.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/core:lib",
@@ -151,6 +161,7 @@ cc_library(
     hdrs = [
         "oauth_client.h",
     ],
+    copts = tf_copts(),
     deps = [
         ":curl_http_request",
         ":http_request",
@@ -169,6 +180,7 @@ cc_library(
     hdrs = [
         "retrying_utils.h",
     ],
+    copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
@@ -183,6 +195,7 @@ cc_library(
     hdrs = [
         "retrying_file_system.h",
     ],
+    copts = tf_copts(),
     deps = [
         ":retrying_utils",
         "//tensorflow/core:framework_headers_lib",
@@ -198,6 +211,7 @@ cc_library(
     hdrs = [
         "time_util.h",
     ],
+    copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.cc b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
index 78bf680317..87b0dde136 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
@@ -14,9 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/gcs_dns_cache.h"
-
+#ifndef _WIN32
 #include <arpa/inet.h>
 #include <netdb.h>
+#else
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <Windows.h>
+#endif
 #include <sys/types.h>
 
 namespace tensorflow {
@@ -26,6 +31,21 @@ namespace {
 const std::vector<string>& kCachedDomainNames =
     *new std::vector<string>{"www.googleapis.com", "storage.googleapis.com"};
 
+inline void print_getaddrinfo_error(const string& name, int error_code) {
+#ifndef _WIN32
+  if (error_code == EAI_SYSTEM) {
+    LOG(ERROR) << "Error resolving " << name
+               << " (EAI_SYSTEM): " << strerror(errno);
+  } else {
+    LOG(ERROR) << "Error resolving " << name << ": "
+               << gai_strerror(error_code);
+  }
+#else
+  // TODO:WSAGetLastError is better than gai_strerror
+  LOG(ERROR) << "Error resolving " << name << ": " << gai_strerror(error_code);
+#endif
+}
+
 // Selects one item at random from a vector of items, using a uniform
 // distribution.
 template <typename T>
@@ -86,7 +106,7 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
 
   std::vector<string> output;
   if (return_code == 0) {
-    for (addrinfo* i = result; i != nullptr; i = i->ai_next) {
+    for (const addrinfo* i = result; i != nullptr; i = i->ai_next) {
       if (i->ai_family != AF_INET || i->ai_addr->sa_family != AF_INET) {
         LOG(WARNING) << "Non-IPv4 address returned. ai_family: " << i->ai_family
                      << ". sa_family: " << i->ai_addr->sa_family << ".";
@@ -106,13 +126,7 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
       }
     }
   } else {
-    if (return_code == EAI_SYSTEM) {
-      LOG(ERROR) << "Error resolving " << name
-                 << " (EAI_SYSTEM): " << strerror(errno);
-    } else {
-      LOG(ERROR) << "Error resolving " << name << ": "
-                 << gai_strerror(return_code);
-    }
+    print_getaddrinfo_error(name, return_code);
   }
   if (result != nullptr) {
     freeaddrinfo(result);
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index f80cbf7626..a183fe6fa8 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -22,6 +22,9 @@ limitations under the License.
 #include <cstring>
 #include <fstream>
 #include <vector>
+#ifdef _WIN32
+#include <io.h>  //for _mktemp
+#endif
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -40,6 +43,12 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 
+#ifdef _WIN32
+#ifdef DeleteFile
+#undef DeleteFile
+#endif
+#endif
+
 namespace tensorflow {
 
 namespace {
@@ -109,16 +118,25 @@ constexpr char kReadRequestTimeout[] = "GCS_READ_REQUEST_TIMEOUT_SECS";
 // upload requests.
 constexpr char kWriteRequestTimeout[] = "GCS_WRITE_REQUEST_TIMEOUT_SECS";
 
+// TODO: DO NOT use a hardcoded path
 Status GetTmpFilename(string* filename) {
   if (!filename) {
     return errors::Internal("'filename' cannot be nullptr.");
   }
+#ifndef _WIN32
   char buffer[] = "/tmp/gcs_filesystem_XXXXXX";
   int fd = mkstemp(buffer);
   if (fd < 0) {
     return errors::Internal("Failed to create a temporary file.");
   }
   close(fd);
+#else
+  char buffer[] = "/tmp/gcs_filesystem_XXXXXX";
+  char* ret = _mktemp(buffer);
+  if (ret == nullptr) {
+    return errors::Internal("Failed to create a temporary file.");
+  }
+#endif
   *filename = buffer;
   return Status::OK();
 }
@@ -306,6 +324,7 @@ class GcsWritableFile : public WritableFile {
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
+    // TODO: to make it safer, outfile_ should be constructed from an FD
     if (GetTmpFilename(&tmp_content_filename_).ok()) {
       outfile_.open(tmp_content_filename_,
                     std::ofstream::binary | std::ofstream::app);
@@ -429,7 +448,7 @@ class GcsWritableFile : public WritableFile {
       return errors::Internal("'size' cannot be nullptr");
     }
     const auto tellp = outfile_.tellp();
-    if (tellp == -1) {
+    if (tellp == static_cast<std::streampos>(-1)) {
       return errors::Internal(
           "Could not get the size of the internal temporary file.");
     }
diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc
index f6fd8373cd..d77f439c5a 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider.cc
@@ -14,9 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/google_auth_provider.h"
+#ifndef _WIN32
 #include <pwd.h>
-#include <sys/types.h>
 #include <unistd.h>
+#else
+#include <sys/types.h>
+#endif
 #include <fstream>
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
diff --git a/tensorflow/core/platform/cloud/oauth_client.cc b/tensorflow/core/platform/cloud/oauth_client.cc
index c700b97dc9..3c2830ccd9 100644
--- a/tensorflow/core/platform/cloud/oauth_client.cc
+++ b/tensorflow/core/platform/cloud/oauth_client.cc
@@ -14,9 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/oauth_client.h"
+#ifndef _WIN32
 #include <pwd.h>
 #include <sys/types.h>
 #include <unistd.h>
+#else
+#include <sys/types.h>
+#endif
 #include <fstream>
 #include <openssl/bio.h>
 #include <openssl/evp.h>
diff --git a/tensorflow/core/platform/cloud/time_util.cc b/tensorflow/core/platform/cloud/time_util.cc
index 2f8643f3c7..0587a65c29 100644
--- a/tensorflow/core/platform/cloud/time_util.cc
+++ b/tensorflow/core/platform/cloud/time_util.cc
@@ -18,6 +18,9 @@ limitations under the License.
 #include <cmath>
 #include <cstdio>
 #include <ctime>
+#ifdef _WIN32
+#define timegm _mkgmtime
+#endif
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 0f8cf8f122..948334d27b 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -458,7 +458,6 @@ def tf_additional_lib_deps():
 
 def tf_additional_core_deps():
   return select({
-      "//tensorflow:with_gcp_support_windows_override": [],
       "//tensorflow:with_gcp_support_android_override": [],
       "//tensorflow:with_gcp_support_ios_override": [],
       "//tensorflow:with_gcp_support": [
diff --git a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
index fb1955edde..12dc9c58b3 100644
--- a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
+++ b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
@@ -118,9 +118,10 @@ int64 AndroidArmV7ACpuUtilsHelper::ReadCpuFrequencyFile(
   const int retval = fscanf(fp, "%lld", &freq_in_khz);
   if (retval < 0) {
     LOG(WARNING) << "Failed to \"" << file_path << "\"";
+    fclose(fp);
     return INVALID_CPU_FREQUENCY;
   }
-  pclose(fp);
+  fclose(fp);
   return freq_in_khz * 1000;  // The file contains cpu frequency in khz
 }
 
diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc
index 234f3c3aed..682ad97eec 100644
--- a/tensorflow/core/platform/s3/s3_file_system.cc
+++ b/tensorflow/core/platform/s3/s3_file_system.cc
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/core/platform/s3/s3_file_system.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/s3/s3_file_system.h"
 #include "tensorflow/core/platform/s3/s3_crypto.h"
 
 #include <aws/core/Aws.h>
@@ -49,9 +49,15 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() {
     if (endpoint) {
       cfg.endpointOverride = Aws::String(endpoint);
     }
-    const char* region = getenv("S3_REGION");
+    const char* region = getenv("AWS_REGION");
     if (region) {
       cfg.region = Aws::String(region);
+    } else {
+      // TODO (yongtang): `S3_REGION` should be deprecated after 2.0.
+      const char* region = getenv("S3_REGION");
+      if (region) {
+        cfg.region = Aws::String(region);
+      }
     }
     const char* use_https = getenv("S3_USE_HTTPS");
     if (use_https) {
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 148c7851bd..2caf5fc56d 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -328,6 +328,10 @@ class MklShape {
 
 // Forward decl
 TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format);
+memory::dims CalculateTFStrides(const memory::dims& dims_tf_order);
+memory::desc CreateBlockedMemDescHelper(const memory::dims& dim,
+                                        const memory::dims& strides,
+                                        memory::data_type dtype);
 
 class MklDnnShape {
  private:
@@ -364,6 +368,52 @@ class MklDnnShape {
   ~MklDnnShape() {}
   TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape);  // Cannot copy
 
+  /// Helper function to compare memory::desc objects for MklDnn.
+  /// May be this should go into MklDnn directly.
+  inline bool CompareMklDnnLayouts(const memory::desc& md1,
+                                   const memory::desc& md2) const {
+    mkldnn_memory_desc_t mdd1 = md1.data;
+    mkldnn_memory_desc_t mdd2 = md2.data;
+    const char* d1 = reinterpret_cast<const char*>(&mdd1);
+    const char* d2 = reinterpret_cast<const char*>(&mdd2);
+
+    size_t md_size = sizeof(mdd1);
+    for (size_t i = 0; i < md_size; i++) {
+      if (*d1++ != *d2++) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /// Equality function for MklDnnShape objects
+  /// @return true if both are equal; false otherwise.
+  inline bool operator == (const MklDnnShape& input_shape) const {
+    if (this->IsMklTensor() != input_shape.IsMklTensor()) {
+      return false;
+    }
+
+    // If input tensors are in Mkl layout, then we check for dimensions and
+    // sizes.
+    if (this->IsMklTensor()) {
+      return this->GetTfShape() == input_shape.GetTfShape() &&
+             CompareMklDnnLayouts(this->GetMklLayout(),
+                                  input_shape.GetMklLayout());
+    }
+
+    return true;
+  }
+
+  /// Equality operator for MklDnnShape and TFShape.
+  /// Returns: true if TF shapes for both are the same, false otherwise
+  inline bool operator == (const TensorShape& input_shape) const {
+    if (!this->IsMklTensor()) {
+      return false;
+    }
+
+    return this->GetTfShape() == input_shape;
+  }
+
   inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; }
   inline void SetMklTensor(bool is_mkl_tensor) {
     data_.is_mkl_tensor_ = is_mkl_tensor;
@@ -375,7 +425,7 @@ class MklDnnShape {
   inline size_t GetDimension(char dimension) const {
     int index = GetMklDnnTensorDimIndex(dimension);
     CHECK(index >= 0 && index < this->GetDimension())
-        << "Invalid index from the dimension: " << index << ", " << dimension;
+      << "Invalid index from the dimension: " << index << ", " << dimension;
     return this->DimSize(index);
   }
 
@@ -405,7 +455,7 @@ class MklDnnShape {
   inline memory::dims GetSizesAsMklDnnDims() const {
     memory::dims retVal;
     if (data_.is_mkl_tensor_) {
-      int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
+      size_t dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
       for (size_t i = 0; i < dimensions; i++) {
         if (data_.sizes_[i] != INVALID_DIM_SIZE)
           retVal.push_back(data_.sizes_[i]);
@@ -423,12 +473,21 @@ class MklDnnShape {
 
   /// Return TensorShape that describes the Tensorflow shape of the tensor
   /// represented by this MklShape.
-  inline TensorShape GetTfShape() {
+  inline TensorShape GetTfShape() const {
     CHECK_EQ(data_.is_mkl_tensor_, true);
 
     std::vector<int32> shape(data_.dimension_, -1);
-    for (size_t idx = 0; idx < data_.dimension_; ++idx) {
-      shape[idx] = data_.sizes_[TfDimIdx(idx)];
+    if (data_.tf_data_format_ != memory::format::blocked) {
+      for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+        shape[idx] = data_.sizes_[TfDimIdx(idx)];
+      }
+    } else {
+      // If Tensorflow shape is in Blocked format, then we don't have dimension
+      // map for it. So we just create Tensorflow shape from sizes in the
+      // specified order.
+      for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+        shape[idx] = data_.sizes_[idx];
+      }
     }
 
     TensorShape ts;
@@ -444,6 +503,12 @@ class MklDnnShape {
     CHECK_NOTNULL(pd);
     data_.mkl_md_ = pd->desc().data;
   }
+
+  inline void SetMklLayout(memory::desc* md) {
+    CHECK_NOTNULL(md);
+    data_.mkl_md_ = md->data;
+  }
+
   inline const memory::desc GetMklLayout() const {
     return memory::desc(data_.mkl_md_);
   }
@@ -452,7 +517,8 @@ class MklDnnShape {
     return data_.tf_data_format_;
   }
   /// We don't create primitive_descriptor for TensorFlow layout now.
-  /// We use lazy evaluation and create it only when needed.
+  /// We use lazy evaluation and create it only when needed. Input format can
+  /// also be Blocked format.
   inline void SetTfLayout(size_t dims, const memory::dims& sizes,
                           memory::format format) {
     CHECK_EQ(dims, sizes.size());
@@ -461,15 +527,26 @@ class MklDnnShape {
       data_.sizes_[ii] = sizes[ii];
     }
     data_.tf_data_format_ = format;
-    SetTfDimOrder(dims, format);
+    if (format != memory::format::blocked) {
+      SetTfDimOrder(dims, format);
+    }
   }
+
   inline const memory::desc GetTfLayout() const {
     memory::dims dims;
     for (size_t ii = 0; ii < data_.dimension_; ii++) {
       dims.push_back(data_.sizes_[ii]);
     }
-    return memory::desc(dims, data_.T_, data_.tf_data_format_);
+
+    // Create Blocked memory desc if input TF format was set like that.
+    if (data_.tf_data_format_ == memory::format::blocked) {
+      auto strides = CalculateTFStrides(dims);
+      return CreateBlockedMemDescHelper(dims, strides, data_.T_);
+    } else {
+      return memory::desc(dims, data_.T_, data_.tf_data_format_);
+    }
   }
+
   inline const memory::desc GetCurLayout() const {
     return IsMklTensor() ? GetMklLayout() : GetTfLayout();
   }
@@ -579,8 +656,13 @@ class MklDnnShape {
 #endif
 
 // List of MklShape objects. Used in Concat/Split layers.
+
 typedef std::vector<MklShape> MklShapeList;
 
+#ifdef INTEL_MKL_DNN
+typedef std::vector<MklDnnShape> MklDnnShapeList;
+#endif
+
 // Check if all tensors specified by MklShapes are MKL tensors.
 inline bool AreAllMklTensors(const MklShapeList& shapes) {
   for (auto& s : shapes) {
@@ -591,6 +673,7 @@ inline bool AreAllMklTensors(const MklShapeList& shapes) {
   return true;
 }
 
+#ifndef INTEL_MKL_DNN
 template <typename T>
 inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
                              const MklShape& mkl_shape) {
@@ -615,32 +698,15 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
 
   return output_tensor;
 }
-
-#ifdef INTEL_MKL_DNN
+#else
 template <typename T>
 inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
                              const MklDnnShape& mkl_shape) {
   Tensor output_tensor;
   TensorShape output_shape;
 
-#if 0
-  // TODO(nhasabni): need to implement
-  for (size_t j = 0; j < mkl_shape.GetDimension(); j++) {
-    // Outermost to innermost dimension
-    output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]);
-  }
-
-  // Allocate output tensor.
-  context->allocate_temp(DataTypeToEnum<T>::v(), output_shape, &output_tensor);
-
-  dnnLayout_t output_layout = static_cast<dnnLayout_t>(mkl_shape.GetTfLayout());
-  void* input_buffer = const_cast<T*>(mkl_tensor.flat<T>().data());
-  void* output_buffer = const_cast<T*>(output_tensor.flat<T>().data());
-
-  if (mkl_tensor.NumElements() != 0) {
-    mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer);
-  }
-#endif
+  TF_CHECK_OK(Status(error::Code::UNIMPLEMENTED,
+                     "Unimplemented conversion function"));
 
   return output_tensor;
 }
@@ -682,6 +748,9 @@ inline void GetMklInputList(OpKernelContext* ctext, StringPiece name,
   ctext->input_list(name, input_tensors);
 }
 
+
+#ifndef INTEL_MKL_DNN
+
 inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
                             MklShapeList* mkl_shapes) {
   OpInputList input_mkl_tensors;
@@ -694,6 +763,22 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
   }
 }
 
+#else
+
+inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
+                            MklDnnShapeList* mkl_shapes) {
+  OpInputList input_mkl_tensors;
+  GetMklInputList(ctext, strings::StrCat("mkl_", name), &input_mkl_tensors);
+
+  for (int i = 0; i < input_mkl_tensors.size(); i++) {
+    (*mkl_shapes)[i].DeSerializeMklDnnShape(
+        input_mkl_tensors[i].flat<uint8>().data(),
+        input_mkl_tensors[i].flat<uint8>().size() * sizeof(uint8));
+  }
+}
+
+#endif
+
 #ifdef INTEL_MKL_DNN
 /// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
 /// If the input tensor is in MKL layout, then obtains TensorShape from
@@ -909,6 +994,7 @@ inline void CopyMklTensorInToOut(OpKernelContext* context,
   context->set_output(idx_meta_out, meta_output);
 }
 
+#ifndef INTEL_MKL_DNN
 inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
                                          int idx_in, int idx_out,
                                          const TensorShape& shape) {
@@ -926,6 +1012,27 @@ inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
   CHECK(output.CopyFrom(data, shape));
   context->set_output(idx_data_out, output);
 }
+#else
+inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
+                                         int idx_in, int idx_out,
+                                         const TensorShape& shape) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  const Tensor& data = context->input(idx_data_in);
+  MklDnnShape mkl_shape_output;
+  mkl_shape_output.SetMklTensor(false);
+  AllocateOutputSetMklShape(context, idx_out, mkl_shape_output);
+  Tensor output(data.dtype());
+  // TODO(intel_tf): alternatively, call forward_input_to_output_with_shape(...)
+  CHECK(output.CopyFrom(data, shape));
+  context->set_output(idx_data_out, output);
+}
+#endif
+
+#ifndef INTEL_MKL_DNN
 
 inline void ForwardTfTensorInToOut(OpKernelContext* context,
                                   int idx_in, int idx_out) {
@@ -944,6 +1051,27 @@ inline void ForwardTfTensorInToOut(OpKernelContext* context,
   }
 }
 
+#else
+
+inline void ForwardTfTensorInToOut(OpKernelContext* context,
+                                  int idx_in, int idx_out) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  MklDnnShape dnn_shape_output;
+  dnn_shape_output.SetMklTensor(false);
+  AllocateOutputSetMklShape(context, idx_out, dnn_shape_output);
+  if (IsRefType(context->input_dtype(idx_data_in))) {
+    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);
+  } else {
+    context->set_output(idx_data_out, context->input(idx_data_in));
+  }
+}
+
+#endif
+
 inline void ForwardMklTensorInToOut(OpKernelContext* context,
                                    int idx_in, int idx_out) {
   int num_inputs = context->num_inputs();
@@ -962,6 +1090,25 @@ inline void ForwardMklTensorInToOut(OpKernelContext* context,
   }
 }
 
+#ifdef INTEL_MKL_DNN
+inline void ForwardMklTensorInToOutWithMklShape(OpKernelContext* context,
+                                             int idx_in, int idx_out,
+                                             const MklDnnShape& mkl_shape) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  AllocateOutputSetMklShape(context, idx_out, mkl_shape);
+
+  if (IsRefType(context->input_dtype(idx_data_in))) {
+    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);
+  } else {
+    context->set_output(idx_data_out, context->input(idx_data_in));
+  }
+}
+#endif
+
 // Forward the MKL shape ONLY (used in elementwise and other ops where
 // we call the eigen implementation and MKL shape is not used)
 inline void ForwardMklMetaDataInToOut(OpKernelContext* context,
@@ -985,6 +1132,10 @@ inline void SetDummyMklShapeOutput(OpKernelContext* context,
   AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output);
 }
 
+#ifndef INTEL_MKL_DNN
+// We don't need these functions in MKLDNN. We have defined equality operator
+// on MklDnnShape class directly.
+
 // Checks if the TF shape for both MKL tensors is the same or not
 // Returns: true if both TF shapes are the same, false otherwise
 inline bool MklCompareShapes(const MklShape* input_shape_0,
@@ -1051,6 +1202,7 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0,
 
   return true;
 }
+#endif
 
 // These functions do not compile with MKL-DNN since mkl.h is missing.
 // We may need to remove them later.
@@ -1127,11 +1279,14 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
 /// @return: Tensorflow data format corresponding to memory::format
 ///          Fails with an error if invalid data format.
 inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
-  if (format == memory::format::nhwc)
-    return FORMAT_NHWC;
-  else if (format == memory::format::nchw)
-    return FORMAT_NCHW;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
+  if (format == memory::format::nhwc) return FORMAT_NHWC;
+  else if (format == memory::format::nchw) return FORMAT_NCHW;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
+                     "Unsupported data format"));
+
+  // Return to prevent compiler warnings, otherwise TF_CHECK_OK will ensure
+  // that we don't come here.
+  return FORMAT_NHWC;
 }
 
 /// Map TensorShape object into memory::dims required by MKL-DNN
@@ -1175,6 +1330,23 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
   return memory::dims({n, c, h, w});
 }
 
+/// Overloaded version of function above. Input parameters are
+/// self-explanatory.
+inline memory::dims MklDnnDimsInNCHW(const memory::dims& in_dims,
+                                     TensorFormat format) {
+  // Check validity of format.
+  CHECK_NE(TFDataFormatToMklDnnDataFormat(format),
+           memory::format::format_undef);
+
+  int n = in_dims[GetTensorDimIndex(format, 'N')];
+  int c = in_dims[GetTensorDimIndex(format, 'C')];
+  int h = in_dims[GetTensorDimIndex(format, 'H')];
+  int w = in_dims[GetTensorDimIndex(format, 'W')];
+
+  // MKL-DNN requires dimensions in NCHW format.
+  return memory::dims({n, c, h, w});
+}
+
 /// Map MklDnn memory::dims object into TensorShape object.
 ///
 /// This function will simply map input shape in MKL-DNN memory::dims format
@@ -1217,6 +1389,43 @@ inline padding_kind TFPaddingToMklDnnPadding(Padding pad) {
   return padding_kind::zero;
 }
 
+/// Helper function to create memory descriptor in Blocked format
+///
+/// @input: Tensor dimensions
+/// @input: strides corresponding to dimensions. One can use utility
+///         function such as CalculateTFStrides to compute strides
+///         for given dimensions.
+/// @return: memory::desc object corresponding to blocked memory format
+///          for given dimensions and strides.
+inline memory::desc CreateBlockedMemDescHelper(const memory::dims& dim,
+                                               const memory::dims& strides,
+                                               memory::data_type dtype) {
+  CHECK_EQ(dim.size(), strides.size());
+
+  // We have to construct memory descriptor in a C style. This is not at all
+  // ideal but MKLDNN does not offer any API to construct descriptor in
+  // blocked format except a copy constructor that accepts
+  // mkldnn_memory_desc_t.
+  mkldnn_memory_desc_t md;
+  md.primitive_kind = mkldnn_memory;
+  md.ndims = dim.size();
+  md.format = mkldnn_blocked;
+  md.data_type = memory::convert_to_c(dtype);
+
+  for (size_t i = 0; i < dim.size(); i++) {
+    md.layout_desc.blocking.block_dims[i] = 1;
+    md.layout_desc.blocking.strides[1][i] = 1;
+    md.layout_desc.blocking.strides[0][i] = strides[i];
+    md.layout_desc.blocking.padding_dims[i] = dim[i];
+    md.layout_desc.blocking.offset_padding_to_data[i] = 0;
+    md.dims[i] = dim[i];
+  }
+  md.layout_desc.blocking.offset_padding = 0;
+
+  return memory::desc(md);
+}
+
+
 /*
  * Class to represent all the resources corresponding to a tensor in TensorFlow
  * that are required to execute an operation (such as Convolution).
@@ -1285,30 +1494,8 @@ class MklDnnData {
   /// @return: memory::desc object corresponding to blocked memory format
   ///          for given dimensions and strides.
   static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
-                                                  const memory::dims& strides) {
-    CHECK_EQ(dim.size(), strides.size());
-
-    // We have to construct memory descriptor in a C style. This is not at all
-    // ideal but MKLDNN does not offer any API to construct descriptor in
-    // blocked format except a copy constructor that accepts
-    // mkldnn_memory_desc_t.
-    mkldnn_memory_desc_t md;
-    md.primitive_kind = mkldnn_memory;
-    md.ndims = dim.size();
-    md.format = mkldnn_blocked;
-    md.data_type = memory::convert_to_c(MklDnnType<T>());
-
-    for (size_t i = 0; i < dim.size(); i++) {
-      md.layout_desc.blocking.block_dims[i] = 1;
-      md.layout_desc.blocking.strides[1][i] = 1;
-      md.layout_desc.blocking.strides[0][i] = strides[i];
-      md.layout_desc.blocking.padding_dims[i] = dim[i];
-      md.layout_desc.blocking.offset_padding_to_data[i] = 0;
-      md.dims[i] = dim[i];
-    }
-    md.layout_desc.blocking.offset_padding = 0;
-
-    return memory::desc(md);
+                                                 const memory::dims& strides) {
+    return CreateBlockedMemDescHelper(dim, strides, MklDnnType<T>());
   }
 
   /// A version of SetUsrMem call that allows user to create memory in blocked
@@ -1376,6 +1563,7 @@ class MklDnnData {
     return user_memory_->get_primitive_desc();
   }
 
+
   /// Get function for descriptor of user memory.
   inline memory::desc GetUsrMemDesc() {
     // This is ugly. Why MKL-DNN does not provide desc() method of const type??
@@ -1438,6 +1626,17 @@ class MklDnnData {
     return op_pd != user_memory_->get_primitive_desc();
   }
 
+  /// Predicate that checks if we need to reorder user's memory into memory
+  /// based on the provided format.
+  ///
+  /// @input: target_format - memory format of the given input of an
+  ///               operation
+  /// @return: true in case reorder of input is needed; false, otherwise.
+  inline bool IsReorderNeeded(const memory::format& target_format) const {
+    CHECK_NOTNULL(user_memory_);
+    return target_format != user_memory_->get_primitive_desc().desc().data.format;
+  }
+
   /// Function to create a reorder from memory pointed by from to memory pointed
   /// by to. Returns created primitive.
   inline primitive CreateReorder(const memory* from, const memory* to) const {
diff --git a/tensorflow/docs_src/api_guides/python/image.md b/tensorflow/docs_src/api_guides/python/image.md
index a2c8c3c3c9..051e4547ee 100644
--- a/tensorflow/docs_src/api_guides/python/image.md
+++ b/tensorflow/docs_src/api_guides/python/image.md
@@ -19,6 +19,7 @@ Note: The PNG encode and decode Ops support RGBA, but the conversions Ops
 presently only support RGB, HSV, and GrayScale. Presently, the alpha channel has
 to be stripped from the image and re-attached using slicing ops.
 
+*   @{tf.image.decode_bmp}
 *   @{tf.image.decode_gif}
 *   @{tf.image.decode_jpeg}
 *   @{tf.image.encode_jpeg}
diff --git a/tensorflow/docs_src/api_guides/python/reading_data.md b/tensorflow/docs_src/api_guides/python/reading_data.md
index 4594887349..f316cce953 100644
--- a/tensorflow/docs_src/api_guides/python/reading_data.md
+++ b/tensorflow/docs_src/api_guides/python/reading_data.md
@@ -175,14 +175,25 @@ For example,
 [`tensorflow/examples/how_tos/reading_data/convert_to_records.py`](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/convert_to_records.py)
 converts MNIST data to this format.
 
-To read a file of TFRecords, use
-@{tf.TFRecordReader} with
-the @{tf.parse_single_example}
-decoder. The `parse_single_example` op decodes the example protocol buffers into
-tensors. An MNIST example using the data produced by `convert_to_records` can be
-found in
-[`tensorflow/examples/how_tos/reading_data/fully_connected_reader.py`](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py),
-which you can compare with the `fully_connected_feed` version.
+The recommended way to read a TFRecord file is with a @{tf.data.TFRecordDataset}, [as in this example](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py):
+
+``` python
+    dataset = tf.data.TFRecordDataset(filename)
+    dataset = dataset.repeat(num_epochs)
+
+    # map takes a python function and applies it to every sample
+    dataset = dataset.map(decode)
+```
+
+To acomplish the same task with a queue based input pipeline requires the following code 
+(using the same `decode` function from the above example): 
+
+``` python
+  filename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs)
+  reader = tf.TFRecordReader()
+  _, serialized_example = reader.read(filename_queue)
+  image,label = decode(serialized_example)
+```
 
 ### Preprocessing
 
diff --git a/tensorflow/docs_src/get_started/mnist/mechanics.md b/tensorflow/docs_src/get_started/mnist/mechanics.md
index 71eee4291e..dac00498e1 100644
--- a/tensorflow/docs_src/get_started/mnist/mechanics.md
+++ b/tensorflow/docs_src/get_started/mnist/mechanics.md
@@ -47,7 +47,7 @@ training folder and then unpack that data to return a dictionary of `DataSet`
 instances.
 
 ```python
-data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)
+data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
 ```
 
 **NOTE**: The `fake_data` flag is used for unit-testing purposes and may be
@@ -364,7 +364,7 @@ may be instantiated to write the events files, which
 contain both the graph itself and the values of the summaries.
 
 ```python
-summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
+summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
 ```
 
 Lastly, the events file will be updated with new summary values every time the
@@ -398,7 +398,7 @@ method will periodically be called to write a checkpoint file to the training
 directory with the current values of all the trainable variables.
 
 ```python
-saver.save(sess, FLAGS.train_dir, global_step=step)
+saver.save(sess, checkpoint_file, global_step=step)
 ```
 
 At some later point in the future, training might be resumed by using the
@@ -406,7 +406,7 @@ At some later point in the future, training might be resumed by using the
 method to reload the model parameters.
 
 ```python
-saver.restore(sess, FLAGS.train_dir)
+saver.restore(sess, checkpoint_file)
 ```
 
 ## Evaluate the Model
diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md
index 6544a16f2b..8b6cbbcd17 100644
--- a/tensorflow/docs_src/programmers_guide/estimators.md
+++ b/tensorflow/docs_src/programmers_guide/estimators.md
@@ -187,7 +187,7 @@ est_inception_v3.train(input_fn=train_input_fn, steps=2000)
 Note that the names of feature columns and labels of a keras estimator come from
 the corresponding compiled keras model. For example, the input key names for
 @{$get_started/input_fn} in above `est_inception_v3` estimator can be obtained
-from `keras_inception_v3.input_names`, and similarily, the predicted output
+from `keras_inception_v3.input_names`, and similarly, the predicted output
 names can be obtained from `keras_inception_v3.output_names`.
 
 For more details, please refer to the documentation for
diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md
index 16753c931f..bac385c02c 100644
--- a/tensorflow/docs_src/programmers_guide/variables.md
+++ b/tensorflow/docs_src/programmers_guide/variables.md
@@ -205,7 +205,7 @@ methods:
 v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
 assignment = v.assign_add(1)
 tf.global_variables_initializer().run()
-assignment.run()
+sess.run(assignment)  # or assignment.op.run()
 ```
 
 Most TensorFlow optimizers have specialized ops that efficiently update the
diff --git a/tensorflow/examples/android/build.gradle b/tensorflow/examples/android/build.gradle
index 48f566f825..f7bdf8b816 100644
--- a/tensorflow/examples/android/build.gradle
+++ b/tensorflow/examples/android/build.gradle
@@ -28,8 +28,8 @@ buildscript {
     }
 
     dependencies {
-        classpath 'com.android.tools.build:gradle:2.3.0'
-        classpath 'org.apache.httpcomponents:httpclient:4.5.2'
+        classpath 'com.android.tools.build:gradle:3.0.1'
+        classpath 'org.apache.httpcomponents:httpclient:4.5.4'
     }
 }
 
@@ -75,7 +75,7 @@ apply plugin: 'com.android.application'
 
 android {
     compileSdkVersion 23
-    buildToolsVersion "25.0.2"
+    buildToolsVersion '26.0.2'
 
     if (nativeBuildSystem == 'cmake') {
         defaultConfig {
diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000000000000000000000000000000000..13372aef5e24af05341d49695ee84e5f9b594659
GIT binary patch
literal 53636
zcmWIWW@h1HVBp|j(AqTBoq>UYfeAz~Ffed3FfjPKhB)ea`nl;dGoUKGK6OTrJp%(n
zC<6n72(m(7M?X(D*WeI6U$@V`XHNTg>*`(P_14uocjo-&AcHH$51xKHqkF>htnXQE
zPaQ_CS8XZNo-B#d+##;I?8%a(6Nk1+y_977`l*N!$wDzSm$5J~Fyt4dqc{p(4L4Lx
zdQoCZPAXod!l+8iixLY8Qj0LOWHhCuEoNX~xXQr5pp0FMOMZD?PJUvFilJU|PGWI!
zZI3V4Ap?Qd`x&ND+GYAp+}GRY9h5In)U$ESan9lN^jx)fHaGu+g-1jRU)wyhl{-_j
z{`+H21?NKtB$AwJwwX^qUAs~>ao5(h7sEted);A8+-AIU+dh+58najHN~pE8mUd~Y
zbLm#Tc8T>qUSGeuhry}Hz-?Er*gbE54{NFGhcxPTg&5^?e72uZA}L^7vs}LAf)bCD
zn*^JDd+%Z1QeD|vv`{{Jul1S;69Yp53j>1-0Y3&;7MG;v1{CENq!yKArWOYj<yTZX
zmX>6s=9Oe7Czj+FK>}fEaBjYkqd=XQM*H!Lk5(qEoqFq9Rmjt>{EG=voV}f#hQ6zO
zee81+nbX&mr{qukwEjzXuE|ICAB%f7J4{tz6n=mB+#8$EXKbu@e}DY^`g?{`6<JEX
zO`P1U%-NcE%r^G*ikJmQTze-g9K9o3iSPJ<ZH?7DC&l9)tc-2j5p0v<aplmmgostu
zjTM(|EyX_R+JDnbx_+4LJO9ovU+P4BR%{bDTqFNQS=H}b>J7o5N+qTDTjJlY+Qu>E
z`U9;gJl9?;2yA-xSwrYZWh8&<>0HlyUs`WZ-s2onuK$L$?!@fbpSi;%rbRow4c-~2
zF*|vNI2(_&-^PE9^-uB?rdZpqOnP9)?6J={*n8(9c46zgi5h1-Qw;bIt;^{MpC{-4
zj7cj~Q_Lu0iBxflq0Y9cuclc2I`Ljfk=JHJK$Ca*2lgEgvQmya+&tuLb4gZCy5i7=
z-}Q%6Py7wDWL$G@4uf-<!<WhnMeVf`mwwG_yZehL+OJl})l)wsw5z^dVxF((ANQ;>
zBR8dxi0tCXsgovmv;^C%GJLXpi^Zy!uj6JG`mDJapXOL!c;w64OUhA;=6X+y6A{~X
zUf{QnA5Ub|<YtBAsZnbanQ~&3XFKry6x`_**!y(9;=g--bCM7G-8yC&6kqU?=h^bk
zqNFu3brF3Bc=CEq*{VxRXC_|W`lO9hS@%}-!Ly9Hr4#D8!%yCNvNZ6B=!_kHVmGho
z2f#}mP`-WsCZc#10|Uct1_lNt?5WNZmbD;h%C+|(*C7L*w*O~dJ`)k#!xi^6>EoW&
zT+EIEU*fhKnR!{iDzTf*E`Ok1$a<<I$I0n`etxdGE6*VIqWVP@2S?o^ZB<i$j>#|8
zo`;{CqH!}(yfyxmNpyNp%VuT8h`ULFQ)Y&x^V^=Tt~K1a;(LqZmOqhaUppU)m_21_
zn|jcOQy&%yw3$jB`jD)@)9dE9Dcw6%dj2hb7ToSo5Wv2(MJ(oYu6OC%8((tXAE;+W
z^(o8Bs=F~v3=DxR3=F#1ed-P_utJMdi#+mkQ+@LDvr7vgp6(5f_Lp)L`R6t<_S)B=
zH5yyDWks_@-;8=2diludRh=Om3Y`8at(@!hcBe$<{0ZNyl9hGoxZM|bQ`^Hq+9#TK
zpWJ!2d|&aiGiTo0+t)Eg99q>Bc;;lqq6eDuJUcWw65`xqesl`BZBTf)vuOguUV*AW
zx1bE)L*AU;Jd>k;ef#C86@TmGy8NYIE;;(0pH_Bxeo0%ws~Z-kCZ{*7jyhNDyL;Cu
zgS)-jsheK*%sVUYU0pRXG2`+^>3NCOtcSCT9_??v{+-*-WJlVqW#1<*w4bM8z+yI+
zOSNZ-&+PZ++3k1EO1F!N*IJ|=){<DK`KQd_f$U7)HkVU{tD~-(YKdL9+wHdZTx{<0
zr*bAccE7$-ajMX7R%ur4j$4B1620nwbZ7N#%Z-|2@G0tMR{7E=bCyLvOKMcS)orXf
zDb&TlF~+WPlV`^hK}VH6K^uES*}kUnGS>Tkc(-vyMYE{rcY(ASFXsJrySzo~PAW(2
z<<%A4<yY2E`@+4tqMx7VbGZzA>4*Iuzdp>EzPB&n-I|9Vr4A=dt(Ls5DD}QaQEY#$
zs?grNlNI_3a|<WD*iq}%mT`8SfzG|oO|}e$4)Y$jOT0T?edU<W-Cu!KvVS|eGAD^1
zmB>6a`L27t$Yb+A3W_r=oF`2dx#(ehiJ4nGAwGJ7#>oV2i#~RhQ27Q;8x`3@CURRR
z`$wgGF*#g2{e}L!zfD!G4ii_f?Q3bCKEcZQ&=0oA8P{8tTzHhiI?8zZJ<ln$?R<Sm
zNK8ZRo9g9P``1m^jIId|=MdFj(89j*pXHSg0-eP?_3b*}4&}X3bt+mGTJ{YsL&`jg
zy?vCGf#DYy1A{U4Wa?UxoKsqyS(fUSnUm_6oSa%*?3<XEn4StP^{0k;7YL`y{Lh`G
zmboie(LpgFhoN;#K$C{p<)&#jk}{$e@GfN)-G0(6cGunB*}mSj3U;r4mwtFLcd?PO
zZUp-=Nq6_-%~p>aKOQ<NsXtFbU+&2L?X#!tljZv<_y76(|G&)a|NXwo%fSD5vck57
zo0$Zc9J%z=y<8wp=KEt80lSi})1HN|I2VM!$#k?-owiTGZ}I*9Muu}Nfyy1G7SczI
zE6hBC&$AZto&UX%#Xv*Em@%bwmXwQbo7Tj`1p-p%!jo8b$uF*xsGA{lZAqWH)%jAD
z;J44K{7Xwzeyg?wJ)df*x=h<m?^<^BtclqZ1Ew1EHm#K?o4-grlv8w@*{WHtyH_hX
zdU>rlH+$DSlbowk!6lAgwq~`Z$xjQA^Hr1nrBWfBHm%E4^K8KG?#0ckriQVVhW;=%
z?z*=;V%Co79dXqWi>F4M+ETP}&eD`pQ?oqHpL?>7-qKI`lH@2IY;6*odphNc(?V~<
zDRZ&}SNIol2;EwgCo@Y@xJdWi+KlWfon)z*i&9s%$$D$-(zDrSa;E#&j|Y{e$(Lu$
zSBx&3eO97haB^Ono641)lh}CE+e8lsd~JH%nAp?dv-0ypLu2m5jtf(m&R+S?ufWOp
z@Vx1!V?jM@EMKv`o@93Z=G(|4!Uw!hJiDXgIJ-c9%G%f&|2zNW@XE!=CT3gREt>Jm
z@tX75+fvdkwHH@E<vFSsb>)4Q@A2trUq94dJz9F{K<J@cfvHIWzAw@oZ6mLQM9)|F
zQGHFOE!unA?0yTM#-zhG9K{})Yn6<(^%L(Jxk*H<kKAIk)acs!JEhgFn|d@p8Oa^;
zlia-_xXH%eOH%*9qz#21Q)D%>s=~IWh$NiWNa}gJG5mbb2?_BQ4ZRaH4s_S;QF)xY
zM=9~Lm+SNg8;_;N99dl#GwGvL-1LbDg6sAuKKAw56#wq$J58yin&+Do7aG0oD?TFA
z#UYbl_(f^js+6{?IVmw0bCP;_W`?D#+o}9+j%$vwk<0hOReRq$)gA9IPu<i0{qGNl
zW9=t?f1KFUKQVpYbj{Y5Wxr<?@*RFOZ&HAmsOkDsE5C8Ne(}%DTEyGke%^m#Uh0%H
zUoW;jm9v`vAwQ*4YuA*$A&WyC4A*XZIZ5SB#I~d25<;?8R+Fx-X=46&rfEf$I@dO<
zQ*HXb%^H5wm%W~)eP+4F2?L?c%KLK7CWW@uyk5n0raRYn$(-kJ8%5VnmD^G2aQW~X
z!Dp-XELg4e`gO0I@WiXGlUJv%YI|^-jeA+#maD>Dwr3<`^{&@$UJ+g;{i2_L`o0}g
zir0UCHuZ4AfoZE&do{XU)0<h#_QZzoPlWpq=2HU4%HA;Cce!W7p`@ZV_xS{gGIyt>
z*X9MePjVZKQtxC-Emt(n+sWmhd^dDTY~hr0>#VqSw|C^Zv+{;t{va2mv`%W)2Jr&F
zL%mPtv?%kRH+-{L<Zi{Q9}A~5o-}5Df8d?wl=BD74S4PQ8#O~(nqrm(Sn}@N-FZhd
zWbU*_ahh>;yjRcKY07c1?{jC@-@5OZ{==Cec7J)!8n2Md;95KL*sUGrZ{|8_#K|pt
zW##_s-T6eV_59U41D57|k}`|gXRZ9k<m`g71uP0%CeCX;shPvoTB)|^?B;}RckUF+
zmI^%*O<Z*6m4u4gakYnciv`yGP?C#@Q>!qJv-IEaNzM4D<+8%=nY&u=&#jfOJ0$!3
z-}cSZEobiUs9Lvs#&?b48wY&MgEn!p&-mDEv-F4gTKV5U^Ir+_Otfk`KEd7i{*mS#
zEnz;71Z=ln3}=d8EPH<e^ZiE}bq6kgx!xI{@ldRC`^n$Ud+bmC?%rel<gfF*6-E3^
zTWXr;ecRl`Zs;7}Kk1m)xv+W5nZM{*E>AmE(ecAfPJL~U(SF4~v15Dq_X}NCobuRU
z>J!VK>Kgx5G~<F^b%uSpE4WQ}KgUur+tYU+Z+TI^B5O|{yGl%(d1-5kKI`fg&$eah
z_A;w`zE$1ZKgZ<u*87}GrWswmb6ax7?IrG#T80WQci4QBM{T^E;f-^f$i%?#lMQF%
zB^=Usa?a07%S<mVN-W9D&(i~Ug?cCF`U^V>9RGj&`L$-driMj}l-?@NS@6J^lRwlj
zlt<K~#(7=AH07s#cVbOH24^Z6er%OLz@Iv2Z}fsSw~v?`Z#Mos|L@zF-@nhFk7s!H
zL8IM!#>oV;ZkZmQcZK%H&)S%O-&uL!tdmrCrcKI)Z@Km#-h`QNh;2{YP?%9?#Cx2p
zXYJXq%XwPw{pxxd&9ijH@?@*r#h#|ib-(X5Zf0&<crWDhfjRLJIc5D4m*U?%&JgNf
zbnlAs?TyW<Z53O$ot+R-`hDfR{2P5=w^d)+wC8);YT<it-_4wxdwKKENEP9Ojc<*w
zNUghWc5B|w`x|@C&#SD-sXXTQBs}){;=Avv)Y292)ZFH}rnhHLz}l1f=YJOkZ>-wb
zzIocxz>Al)mOs{8xAM*ES?#O-e_Q3|zI#%-%T2+R+tVK^*WC~53z7G;cR%;u)@5qO
zjujowm)}-ZU*n$rzAj|zgUEkUyrMi8<Kpx5)|<ZC9ewuKvv;d|%}aTkPZ;yesZ;!?
z{n1op!u5bh%g*$+G4i_Jv-mnce(E}__(yK9FWllbVEot45Y+LpQ{?E6w+r<-;zjnU
z)L4FDPM9<QjltY9jq@k^U)vw}67+_paPJfgN28wL#5w=OPU%mUb(@sK%Gp&kMKfW_
zBEz15ux_@%v>uzoqA{Y=C7x93uB>-|_WI_ADFSo4C#OFuj1hjM`l|0^UhOZInZ;9L
zX3SMMd$!<E!4*}t4<6k2PZh|TizpOIrUY>sIX)IuD3n~{CbgHJ<G;b5N6}srmhc!Y
zX4$!x&GVE(@sgSEnS%rrCVNd-!?S4t%g)7YCr>H#dwyQR?-AJGqv;rAbfQ?*Bb0a2
zB*$r<n=i31QE|xfny`f@D7@)K)`TrQoBU>7uXQ-Zlwx(|&R=fSv>jM0oB5Q5fkBds
zfx!TKiwC{G>yen3k^^h*^oG4I77i8ppF2C{aM|g_0bX9M988(NIvPZCxU?>KE#+4U
z&}rJ`n?JcV#XQGsi>T=QA8vmS@$?-Ix3g?MEOFr6iSqv%U(3xGp6$MsR`qW0^V0J_
z=N6xTbN~OJpW+RFe?%VE>se9Z>@4hNGUv#OE_2f#s-LH@i6(zC&?^_#QVX3E_HkNU
zSPx5~+d59qb6!P$&3!8s&Sh6fU%vFliTmEcqhc|Kc2C-)u5Ycr^nJF0fwE`NVV#zc
zdD)I!nWD?OPIECiFH7C!E^6YwYq`jezT2Cf4z?Ly77}iK(^GIx%2=}K*0DEBCeAdH
z37&V_XZ~!n&s<f<bJRt>Esy269eF0JJYCpNXXA`=*Kb>rKR=kIRDWrMM%g1Sv*j((
z+MyeajmmEBNIT+VZI+bgvS)XR&W<!Sv%Du6;wEcbUUMj~RtQ;Yo%NI}&(p=UwQ;Nd
zX?OK@@kvV0XKizh{4Barqxj<z$-`bpGt#0oyRNKxsCM~KWZApBZ#t$#Jor1$<5+^l
zN~f@hysgtu&OEL3GU#5)z6A;1t<8bq7H!$yx4-UuT9SGtB_w^#f~9LVo@hxG%o4m%
zB-)U>R$x7YrtwKhz5K0GsYSa~y1YNHYSMc)PpovouUiYNHoUTQe_0TF$S1Nby5L1}
z#n+^DH<wS_!0i__<-w~eH}9|B!Lqv@Uzo4GQ+(;f^2+I5+Ezvhmvb^(rZ@GaZ<+KZ
z<K>2lmkUFG-Mh13xk$-6mbzsLJJZ5KIIOcaTZI`IZ{BpWjNiKNZOik^hq%&nlBaF@
z;8l9KxaVmHAA6-|j4W%+WX7}FH)h1QJxr=!Vt;FGlH+kDy9<S~@~eJ$96!Ee;Ui($
z`Av~EZRbm64v1^*yHXfjwC~V$t$i0i>h8Pz(Y7w2(EE7*4^PLExBN33uYB9jv{%TR
zPki&8moj?d5p!;>o51X@wm#u@*NRW3-n{ij$<LP>OZK_m-RDuex#arBQy#~&zns=D
zHT}CFdwN99Opm=vri*x$Ry>&FYyE9cjZfSk+4B|6H7h)=XMg?>sk*eS-~52(G=X(3
zTY`FZxT04}6y;4z7m%-=T6HuhYxQ%ZeOo5Z7vL^=xOvY~slKNR%QqPPdYpOKvtu=D
zqN;1NOqg4ytlndP_6_;xrj&kLF0=c9<qbK>+=$lRtu~J)35K2Wt>!iNnl<U?`@i#l
z=|2x-oMYZF`(_6hpSeN&WM`Z6_w`QfSJNo6oA~ab=CRO+ulns;KJ8Y&=PmVN#_f&$
zA^MsMO?m5H3%K%c%~)g8!0-6!Uxd@;FU}_YuH7I0GS#2|tNP<4M~}Ya+-1)nxa7wR
zzW%ZPZnNEs$+IWCKWW>dK5c=&{=%uZUf(to**H<pO!VTl?*g;uZ^^A}`PO+)X`7u}
zdy`}QiSH7(MeZEr+{r!h>3i;<-7j>KekUl1Xe@fo_pOC#pVB<(5a%pG>vxOxsxSHG
zwRUbuO;3UGx%<Dr2cEAr5uRB4BAw}P#l>i6ix$T%0ZNWDoUC8+E!!b1EpSZjReQ%B
zJ+JSAeR52f&VLoC$`o~-Gw1V@7^PL$Uw%}x-<DT+=k77bJ4d~D_@8CBc++Ub{xk84
z-)XyhV(#K^lkVJgQ`i0e`SNQuTe$_P4g1(KY!3)6Ddf_>-0k1w=fA#y_mTNMx%7!n
zKSZ4^4n2x_^!-}k9<TcKQ@JB9Rn6U<95=DQxcty<-v8_YD5Ew@D-Cvqfd-3N85rcT
z7nwf!>F^F=>*U}0mjXnN@1K5LYrWyI0-qVebGcl@+wu+7+EO?L<vG*Tey^P}d!2XY
zLXqD8UG=R}M~}$=Vg9qY=lM2Xp@_{B=FKa9Z#n<7R{g(!zjz-A_)qqDkj6B<K)dnw
z3bnvy9hcY+Yo|L?3ltnDUP$QZU(Qx8d4u!tE#t#Rcb@ZXeC7FeMqF<70m)6Vhaat9
ze%P-3-pbm^erCJ3Et_?GRWgUQMbFOV$66L%*(w`*QMNRp=;`9ceQtLGcWql&y!@zu
zY3|&G0dME6ln*%?q8s~iXJ6t=-H8EGpH8}5-OHoR7c##glDX|zQqAv>(yAJpr_+u_
zZ*A=spSNAZPvx3ga$8b_jI;5Sn=e~}9t($;9{FN5<)C)%@>_gg)1+*3+fvTYvy!qm
zI&vfEUk;y(kCdd$vWZSnPR6~}JM<qqO7RDJEbef3vJrd4`IK|6(cxqN|K$Hlk6FyL
z{hW4Z*1E2Z`aizg`Nr5S;jZ=yWZf@(IPBgfu9zs6@b)Vn|K=p$y5|wG?C64+E%QYm
zJv@3jzEks@``(gPuG+tx7Z$v{De`zpz1y5GJl8zaPFgv&)HwwG<5Zhor|@a&8V)m$
zb;@3!jFN7sc?nvc|Foxb8oP~eK<3;VPKpWpcxFWYv{srw@lRVuZQBi_9W0!wm)7ew
zY%VUC=C{qZK+u};M<~O&pAV++_wAp)UuU6q%RWEF-`p?m$jm(~>h*5%X~w9<ddXkT
z{eIEoFugoIBDyu4Gfvs#Zu9B78#43wA2{7!dQWNIHutD2-W9rgreET@lx5X=$y8&B
z<R$M`E784oK0NyNpAj`lo-2CYCCbjgkjKZspoTq31|*hbI2ISD<|e`G?x|rvlcmE2
z|6RK#S~=OGtN6y&<|OrAott8w!5W^zAFjHrUH?nV@79j&Y~_5d_q%qlKGIestns~;
zg=xxfHt{IUBU>2jp7b;AyBQn3P3!VE_PyrsGw+=-H@-i6`~QFE|C%$-Ikx|iMb{y@
zIY)zJ*z%5O?p1IOY<sHVb8}nDMxmqKg+~G>$VRKEvxPqkk_o&s<!!Xl8n=yN#~o`_
z`ub1Rr+*CMk?(%=xUhz4e$T_;!Wyah6OX35?vg$FJmp`F%<&JKc;Y)AbqCgHw&@?g
z*t-76htR-(vv}ggXWtiiIoq(t`&s_+4|65<Ngx0Iq4r>*dYk-_56+gA>Rr3J_<AC`
zqY6(H#N3Y1*x8n<6;Qlv<?UwIvxOz@2Pb+@{_CuK__J~F;mMz;A9rtmzI>T_aq@H#
z6}P+$^Q}37#@shGr3|>Ai@uTF;&c3}PkEU3v=Y|eYHG*l%gD{SXSH>GLiW!^c8_0t
zOU;fnsWEPwT6;+2m5E^G9RuF)i<*vwd|%UaVZqepxBdwpzH;2_H1{sswYnmuAN3!k
zo8F#Kx<RsNI?tkaG8^AGT`QluH#J*Zd)M5#7w#;rbKC0UJG+l>N!c#3{NLQWb{oro
zURkwm-p6#e>bn|Ka(qk9m+xPDT{z!!$CI4G-J-0eXRnm5P}sJ>^4AQt&Yq{4*TiS%
zoKBM1w!iC?d<^R)(PY<y6|dG_trjo5wz6hRinxCEIU&_&7pBXtd%X1B!@Z|>J<edQ
zJ)!ZUZ}o!1;YDALPRiYpee>em`e6G#8=i4lORwY82#|QZFgL6{T>R@)1$)8c#Rm`F
z`n9PvSU#?_^zz3`GDdsfXq;)Alzgt~)NjkzJQHT`nmu>snYTilmJ}b;%bQpcJo{L9
zL>X(Llx#xlo;T(1=Jjg*b`A=Vo3qL5+akT$DZlvmzMkY16)y-pT4^J9v_FLB@o}wr
zhfPI`E254DOFVYHmlt`||3g<7tIj8{>x(2bB#-XNTm56@i$6T}>28~}>J(RV|5(};
z-}xnb&!PuscBk*S@xxbSf2d*O^2lA2{l2MqUypG$+*jZnU$Xee+^+kN`S<)-?A-76
z(OtCu(EQLfk3&W455|jbsc@h6QqMX;q-fb{ah9*yiwYRmb3fYD)Lc4a!Gg8BtRszL
zlOykcOaHwfRO{K~Wlz<nd)!%)xNgdwCH`~mHpV8pY1}z{KjhBwwyyo&ZtuExKfA@Y
z@)7SI&mxw&{o<RCq+OqG{zgM?#gBH87fk;=^SF%9se0~R@=(S58TYc%?u1GC)00@2
z-+Sn*x+|ny*OdR-mK}`!1-dJDiL4Ku_aQ@8Fgz{J@xAq~2aNld#O}NnQlPjyYgfLk
z^X7l7NryifvT@5h*WcZ@XY=vvpEvBiz4~_W=H^MWHI%RG&Ta|*&}cO=PIl$fg1>ej
zFZ|C;t9yL1vj3S<vd!LVxAM1bhA)lkoOzb+TB_>irCoV;>CS>zuNqvWPO{%qS~+Kb
zvBuQZ+-9-wQ&+t6(Z6|3w=#P#i`hrx#s{B&z4}pV7|Cw^dDn4G-JEX^53h`=-Tx_z
z<;|sdolMu|$L^&yt$(xT@~p?NPS**k=X8q{TrXoZ>VEv^p4F-R-ELbR{jK0xqZQ73
zqM$bK_O9vnWnrfb-)+2D^hY7QzaY$_{j#XW`l?m(k0hS+lqei|zq!t9)%q4jo+2@3
z=f+0$6`H#W4JVx2)7Q3YYHbUvO6F6GZwoGOF1c}?Pp4C51IrPE+@EIuz5N5vZNEIz
zNJ^WdcUiR-NB`1lV^NoLt!4sqC+|-ZeRI>HW?hrox+_NC)V>vF@D^$$*R!5|erWbC
z_OcghWiP6N{J#dIFtb0uwD#dO4_4jR(GRyM9x^-{uxq1yrcB~A_KVG-Y2i-0-!$Lj
zdTXXv*dKGy`@}{uUYk;3|DdcVk(2#zCHtIJ`075R$>8mqqZ&&&On=^CJ-Opxz0;ij
zrJWl#_Be~m$a%|f?r&UN+&gpS&Vct<Y`3htKINT>ciA!Lso(tb7g{}246yP1U>acD
zSjZpRyeGZ!{%#v}_1{9d-#=W?n0(Fd+U01s<7|CrTRWcGimJ@^pHf&;Tf!V$+$p@~
z;z{;-Pao@)J?~rdwOGbM*wAFM)m)u2@zc6L&Kd3FxMkR>IWHqAZJD97PVAKQk3HhP
z_r-qM)!cBtuMp45cq#je*rKf|Lj2PUUpf7bZ83g5^Vm0|psRaS_GHXU>UGPgoU&8y
z`ychco1^8T3XXmjSb8kf=Jmzd^A#KQXWvR%-uv@~+*`qCb9UWuY`1-FE4%RdX^(JQ
zxwMpokVLm7M$6KUO;-4q6?$vio^6S8=P$cUv@fWP39PI=vOlEtzD48z>6Ly)MpHZm
z9M8)hxwL*^qV41tU&GEieRsKUo1$Iue%BP$2Q`mOUd|O<H~;37pUV7M6YRW~*}1R3
z9?@JmN1^XktL!Z&tCpGo#=F+_vhP1f9B-exwMOLBf3$gFi+kJdN((bEywPM}P{3ZX
zdgc|EB<AEmMtkN)a@UAF-D<DC`{qrZ!`cFePG!3&RkoW;3GjMOxFIm%gMq@Ul=4YE
zZ<ODio8+=Ks@v=8zJTb9VQc!6bVCC@FKFDzs@?W&;nuBNuZCskzka)S@Aq%_ZvWQ$
zUY}NGK08I=k^cU_Y0s+XJpVuEeNBAa-`|(rJ2MpP1s2$yn|@5aSE)CCRuhv6W9G4w
z%cT|=$*upu!vAshc~*@B!l^lvOeQnOoCsbrQ|f`zJh27VJ0fPi<YH$wnVNp$&rGiK
zpFY1~IG^bI@G^6W)Op?yk~aQTUP~;4AO2*Vb@(xJ%&AnyRga5)u9>hbXx6I2n6su_
z`X4?!wThqYIm$Tey!Vp&mGh^cEZ*5sJ*{W1?1H3qUq5qIuG4u?ToKxE{^@6?iq*jt
zr47@X#gDyOnmJ)vpvD?OuIIli*M2e4tZ6(n|5Nei`PV(&y&KLw{(32g<3sD7z=ziu
z);|4McTeT~*-Ig;72NruKa^T;r(D{`u=e>|#;k|kqW1(Bggv}{X!?(zK|go+%+hN8
z%CNO?_o0OhxzEe&+yCbL*y;S_t<D2IGuPFp|LzH%Cn%`4szAp<c7w{S9~SSPX)p2A
zn8S0yFK^1RNu8Rl<(hYd7v#;9ejXQjLA@(Z^1<pKSwHtg-LuoR+ZpvRdqxv)Z}uxQ
z^ZZ>~7OpJ4d~Dg(+2*Ee^LLgw-(9xVr}gag*So*W>X3WACUdJ$+L~h4%Rhcq=w!Y$
z%<h)+bo`V(vu0vN`tpehvo6Z`ms~4R`%-XZ#Y@i@ckj9_T6R<Jh*!PEJ>l*nUkxtj
zZ142teOdVM(%R_Q(vwRBbNe}-?#$yhdvfmJqRTz^0#v4&luKrx=?E0(l05mu%68f9
zO;>jWX4pJrn(%q8N%FF-T`wnH>c9B<v8m<G8>!JV_I&-g;h@RJn>~`cD>HUays;!X
zz(#=o<wV`8LsQ<DEWiHBXtIxStYvq;lAwn?hbqrqBdIe>j+Sg&%@#dZUFKF|*)pMB
zsmGGFPsuEw{ZVRT=MJSQ*5^XXPAypwDDg%(t=em!xw?{%u*@gJ!>exY*~l5(RkVCd
z*~{)}dfC3NGu7N(zP8PAY+dhM{`o|n+Ez_5=Jy?Ziw_xi`(=HpT-@Ox;L4I|ARH;#
z?z7<5rX1tlPo<Z!=Je!6P7C>A{6UD#c_qi1xyt|VTzVLC>Q2+i%c)<CHg?7f%iK<!
ze7;-BDb4coWH*he25a}tDs50JxS3EU<>?((#=+LL$!1p4T}|#1UfZs~w$PflEBp8y
z>+XHK%CYa8U4!HHl4VQ=r78)t?7UW8{ibzc#?8!#(v2U)loQLoJiV2bY{TcFE3xo?
z##cARl@`~p^12yKG1x83Uv)T*>6K4WSb|N^<(ykpe$sM`wRdM~TyQ%RYdPn(R^;x}
z6K!RCzQ(#`qzBb(-cqt<i~3cz!c{J^`Ul#oPI$c(ynUJVWQCTXQBFm_an<hL38^h_
z1?!JAiiBLg^KioIRpI_B&lJ<P^>3MNo^-@uLi!@j3{&gmT~oaz7RMS)TGs8kOr3Z2
zM)3ukj&w=)xLom3V?EKaX5y7?9H&#nlwXu@NNh2es=DDh(NkN-KQdC`)|%@_gahwx
zdc%2-BS`8+$#!Mi=MtsPN_r>DN>BAzOxdvT;*^WRDMuvQK7YOWAbKKe<K9J@Y|}bJ
z%zNfrt3FU^+WJxBMNhfitH&2t$T?4086oJu=5*CnpFGp#8_O1KUA`qpTBJukx+F4E
zz)Y!a$>o;rr3QhX%ig?Y{`Bay{HvxUPUZ{qHr+M-aM(%1UUc`RmPf@g$(q*%f4ZIV
z`jT3kcw5ptd%|R?RF_F2G84}-3Ry@hy)ORMy7lZOX|t5%fR=Tdk(({ri;{IjO*eVQ
z>@=GEqszV6-?=17<KErrlbdI(@{TQ@ygP*NT7qtWubRJ$j`)S#?sL%_5*ULwzvi2E
z^WCvIU7xDNJvIl-a&=xK#-;uJtkL09rN2*KvygP^Zn<!3qwuM<jYeC<=Fhq2S?t|*
zO=w}LsaVwQHZlFFN5$tIdbIJ`)jju)I>$`ooGzMNR-U&d#Y0zq^R`VpuKGRadvx{7
zo~)V6E`9#e<h6fcd8n1oU#-7$>K0y7uw8tq^o!)wbqljYt6JnjU%Ks+>p31$aB9){
zRa<7~1-z{KwdT<7(0z^hAwSjbE&lNQ$>X%C`46K*|JmH{di(Q;*IDK@o-b-0pXz^_
zzQA?eB#sx?ww8vj2p0Lb=~VSkGnTS<EprN31*;q%-VWW@ynoTB@SlEDzkiH9q4zGe
z`B3@lKc@e7o%;TvZtDA|`5wjdPkrwe;$QJc;V+N$!3&RiSI$Uv>&-cow(Ctzw7Ty0
zunW(ot31zsxMSj#q(X0J$E=rWK9`egXG}X#Cp}#zY^~`s^Z2t}*|Hu_W?k&AwYs!i
z_sqH1tKDWA9;vVW{q#lH&)GE&Jl`gqy8q~}kF}qM%dB!Cokvk$CVMXUbofv1)OU(A
zUMSSNKXp$KeYZ(l*EnT)tnR&qJ5OF%c-ZmbW$z@_jb;a~l=_9dzr5>9Y}>Uw3B$9O
z7T?{Jo^8AAYG|y`X74X9A)9xVaQ7}eTdsTHdG_5-3$dtLDV@9Odt^3*E<G4FSFZim
ztSf70&1@+YTBIx^JWX}hyoQXIEo?nE-dsApwa>+7)yr8^Jyegz`SFJ8c4h87x~y%}
zsxWQOX-AA^22Sn@Xq)qH%~9u<pVnO~6FHGlTGnfp<?z&<wYPs|O^1B=56-FcMUKlq
zt9x_JdFE!li5GsW)UL8|un+#pVZZd#<xgfD^DoYBdg8EK(wlwZf#;Gr7t~i3wjI<9
zu6y3TYf<49#d%M?r?2{>^IlNPJ=E^ldCfnuQ}?SU=*Y=tXLd!cPGn4+S>*RxZq@TG
zeHDwsCcmhTdng|A&(3~I=dHk2ncxeltILjs)J=>RS{XO(+0;a_Q({$G`%hH|1b5i0
zOp811BOSM4<sasmX1^b2n=SkFt3pd?o3m}D5bMV1{0@n#pn_#5cSa-$`MDpNDQm`7
z{bh#g#TkcW4#rz0nVBmJYJ5y7<#{^yl>L+IqW>nIvVS&z#h>6h=44wx)fJy+^UU9W
z)ZSh4Rn)%~Uljj4J=`ApZ|Q=^;eY&&U0Qs~WNJOH{O5cYYh!n-<i8t!<yPf+os$)^
z-=m#Z>A%$eqC40Bu&4Jwe%0E4zFcYhyozXtpR-?1oZx<Td&V2~r#<Pp-=F^AdAe!J
z%B8-(pXSteg|leL6rPgts}IV&dBVEsrLx*nna#_m?UGxwb#mshl9v(hT<k9PIz8mr
z`1Y`K*xrRnCJmK0Pq*HkzvGi+-qzr-b9+@jzkK=FXYT8pN8HT59SY!*v`@Qw#o40s
zLgICqbI-U~(`(MevYk5<aQ~XVSkpi5*mKWP)pzpDd^Bm_guV|P+-lp`>t#2&t$ep3
z&+}(9AN#U>1yL;dnKNT1`N++i`KJ1CVOY)E3$>nsnUl5e)mUsSiHa?g;n3f>S;ioJ
z%C-J^^Y<pLwAr{W@ABU;8|C9I4@K>!7)|cIn8)qz65l>|+S}qY8n14<?>-(nU&;LB
zlVwJkah$?c4>koJJF)9_dQ79p92rN|!+w5O&n4dK{I}~{ip``=Je|uQ&Q6inV*YWl
zSy$;hR|4}ANvm^yg)iGIcRkfs3iozfpILq5qs6{ASHcy-jhQljKU<u0W$tMU<C_yS
zZzwd$wHcqc_&HTP!l%jaYn6_iv!;0Alv!b~iZ~OVdoQp{j5fMH!MR+1vU};NV@qa>
zZHxPzQtb8N<eiNy{uM=2mCIuiQrnaKctoG4tbQ2YbNTN1?~ATZS-9|v@5C>^RApwr
zeZ636veUB_-l`|3-?WxKDJOktsn+G0%Tp$aO3txlR9P9Vx6*8T+W+ShjW--R$9VRm
zoQj`_l(S9I^Btc80vsjwzP9!g@bmrlHR4j?{@1#J8%(aV9L`VeieVQBimfV&wMjH_
z>(MYWW-!)E`x5o?{6y(Avoz5e7k9q=<XUdEed9qz^S)hcR-W8?a6|VB;fJZKq;*#|
zC>I)L)$tqcc3d7TKUFZ$j`gM2uC5vDj)lwB&f<Dn(fi%WGW)jwoJnuh`_5GO9lRs`
zQf*81hf-OMijQ-`?HpL2Hj89v%b3rMlT-L=(5t`R!pE!fY2v4;;j+s(+jIElJ6B8n
zym0QQSMRb*k`of;&Mp($GuP8RdC8rHp4%_1b@?{?#F1~aJ*KAb?~IZAQa5j3gWSYt
z9IPG(i$h)QePi0wg1moDHeuACoW0jE-(K|Ev(w35Yx(~^Zr<lO>sFZD7uL_(+N;=B
zteC&d*8OOI?}W`fR@0h`R8!V2k__QJDSo49UFnY0r!lVaOY|KV)mY8m=Tq&d>frst
z?8f!xtxrp=?{c0^b2wjHBA#}3>C6)Ido^E7*L+KQdVM3?^H6vBJg0aDWpfcfd#8h?
z%_ULMM;Xp$eAM~uH_zq#kL=Igdn$itriB{ppP#1pS*-HC<-h5t680_U@H%Hz@Td7y
z-K<z{-RE0)Cxor8nD(>#kK_G|zcUV|Sjl}6{V5(_+~T)hmqYB!<(T(-P4rgyL|yPt
z&*9a&WSajv&tBf*Usb@%<&zXQuUU99W4qIv?hK`^j8eBB9C~ZguD9&x7Us5HE$5VE
zpK)CLDYC3L<wEKeN43&Rq8Yz0l<T_Y2$wk=JiXypoW#loy#-6Z@b2pWv#8|S8uJT(
z4TO}~R?975S3K%+%k};eW`k@+)t8EEJ~X+t>~rNmyk_-*ueU2cKit{Hcd7irvKVH2
zr~YM(pLyI^vUHzr$gcCg8eF1w*!(hkW?gjR%I}d{*`GtU$tf<_{Yb)1d!pC3<?XB8
z_^RhEdS&Cqd}1>50__WG3dKSTd>QR_ub5X=+;DHw)h$Qa*ly?dA5^N0*g1{w;^PJ1
zSx%-3FRz~G;=j(XeWm@{%bPbe6y&a{n)vFpHv5WY*P>Nd#21L`vMGq=>BcO%u_%1Y
zw#thecWl+&ziR7Q@h@FkTYvUn7CLx2=)GW+naJki+~qO6ZPzbYSR`pF8Fob2a4(wQ
zE|J6VDSun?fp)v8YzlRDY4dtl>^T*-*>H85vTf>fnM&b5&u`}Y-!7cA`ig?g6OFIJ
zHmcWNv1M9rsF}v{{pRlsL%FTGq3+WwZBKvd7U2CpF)Vb-{uS56zkI#iRl6jNt=9kI
zDVtlntmQrGwilf+-<;dD-Er;hhqJ9$Oxf48;hz@EzUFhS5i=R|S#Q1H-dv&;dSI9P
z#yYElx-)Kfd47Dc-1D@%Ad>a<rMfT8vnurUU)wH!?)jzWy4l<BAFAiY6<pHX&#cG#
z;=e$LRl0m7KW|WBwClrFj8^5c+j%Zk=I%>hx$bQ64ykq8YuX<_*xz=IF{|X$ZK)^b
zJKp`-{enluB6Y&GLlM5M;$JQ=KXS3+OK`FMT5E@Fwv|hBCtTZp@((X_$wP+RDLxmw
zSZ8O68s2}lE$V-Fh)f9Ac8dW1U(vraUxz&r_2dxWDzhMy?{;p5?DIWwcbINWeQI>y
zaMDg5vACw`&KLG_eEcI&)L}E<fv@mD8K>ytyZo>3b9diu_kCVheJshPR`*ez>9sA&
zhpGjhUsv3h@96X1{fJG6Mg_;WI*BcEhkpxi_>%aCm#ObK!;x}^J@+4b*WBhU_@*GW
z-(UaGtLursD?8)Zj@z1a$1#fheK2i7_)F7@ozu!L@xN&QuqkEg^_T81taJANz0vlq
zJ@R}#$BM4!%P&ZO36{R)`l9l1TiLvrw58_fa(@Wjm>*X0QgGLs!b`tb?%lb_;EMml
zJ?t9yt1lEANLRI*^)Ehf-f$M%7GL%(m*z~qR+IdxY8Rr*BA%XXt(<kfc|y>JoqG=T
zwP)G%l+Fv?thJGIPF&vM)kW8o9%MJ{+0>X2{YmD|+ldheue3TlS$zDj#PdZcVsB4E
z1*=-MsOB3}l|R!n1+R!KoYoM_nYPz=f6EPF0l7&RY$J-*{&MjNOq`y6cI%wF+}Yax
zdA~33zUTTt#c@OV1gn(&uS+IeRegDL%9or+Pm9_V%d86e-z-kLxFpDK(S@b+lN?@3
zH;b?z*?qgMm-m7w^Arb0fiGU+MlYllydTW7a(FEJV0qK2jm*dM#h2|~ayfrL>)+#i
zj*DeWEE61mM1C`Fh^Y{sXMWJjy!VFE{X;Kqn{ZaKFZ^lqB7#%*{KD@O4DYiqz2AII
zUc>J{gIB#n-e2X6+7_$V@{TW+Lj>y2sV%I$k#&FZ{X^TVH=O;)w_5e+`?I$S^=l{G
zv-sAu>Oh{*tHu0|SD7WhH5`^#tXA8;E%A^4g9lHpMPE=C(46_k)%RjhU~AN(+?n6=
zW(7p1h6~xU_x+blc;PCt+)b>o-tl*Sz_;}pssDtZ-np;vsy?-mVNs-@_kwGT(MN<d
zzU|w){p`l_9R7qLpZ_V(FR$sC{O8O01@Ag;bXdySO#Q@_lksn1(2;cEIf=$s=4gJ=
z&uruRn|(`-%Vo-0lik}Je#F1Le_1i8dHI=B(~msyiz-#KShDel{essoj_wPN{$nn2
zA~I&SACukgKLzn`S$68Lo_hWwSINKG|Bm<eeSaYF@{j)C_*-YB6J$M2>t;xP-SB*Z
zw(9bXkLj8}9Z%jo`**|U&9i^Me1EvS_=DxSC$G-gl$*)FKexN+4(A$!;(4<TZ_NG1
z+2dHLWAph&x0S=EJh{6ET~}}ytl9MZ;(un;8LPVod^(S?FfdHz#JQ%*DL=oYxTGkt
zz&9~7FSsPJs3aA-#<n*sGWvFyfbF48tgD0$<QTFPFp11Q>KE)Ky}*RCb%VsDglSew
zx;N;%xlCK3Bm82TUgmG<U$>_3Gt+NdU*miKxBjmy^%u+5r8Jv=y2@WZbEb9ixifo{
z|NcCmug|!rQ9kuYVYr)qn#vyi#4tDQXDN3M@9AOH`IP9{#%?HiI^yBszg0F5CbfT!
zm?|*cKxMIDw$QcIoP^**>|Z?!%=>?AINE;v>yLnA?mr?9secXlSrqLiK1nEusU~>V
z!kd?wY!-fKV<<f4W60QRcs4<M=NYrj*RJHwRrxn#X2@6GtCxQ4QxJI@tE;UbW4yM+
zBzL9eT-W(gum0I>UR3UT`P{87kzuCFd8?<LUE|4kPbZ%11%K|*Z&#R2SFLullG?6&
z*2907(Al1InO3t)KCB3vrL^UWftbQ>#ph>v5_iVbML0CCjt$Xuns358TlSXmu32Y~
z`IsK{kSOq%IMcUo{V}hQKLu?8)=G?LRX55C@}K>B`pi93&GxyfY+|=BMqcXMWwj|s
z-~Z0VI>XAfQ}dR$E!+A0=3$lF+f6T@@OrTH#>JixpR<xDW78~Cs+>eCSEcB^%kV1=
zIsHnVJvy{%uH+H{F_tTtTP8eCJLk2@@_ERDu!l-_nwu2XX;!drXDNx9V8!UQ((NGM
zC809Wj2n|bxHR_lFg5c#&Jg-0TEpe-C|06g^+L+^&MMb0Opj04rk~%aapGaah0j-n
z*LJ<ptmtxP^nL#5_wDKsPEL0&yTi|2Q(8?fv|itu+xLi@HU5z{>-i6+ABzt6iyiKF
zYm*bM^gd+HweM(o$eYg$t1S;-GkUWofhjHh%$#{#Qp;qotbAj}eQUSQiy#iMS~l_b
zYuHQ|YxreO$<4JsZp*^XzNuM|_54TM_HWG%2mk-v@}hSRv$o@tt!KZ6YLx_Q#8llC
zS39k}HDE(eXymaIo7O5_KN_`t*-TmcQ@3=2|83}!)AF}iKj|iaNVjCj`+|+^BFeD^
zUFJ=bcCC9G<+C)sTyy5;9g83B+p4oh`{{$tXXdT^_{Z9LJ;(J0mv?>m$L@6X?DDRJ
z#;A>|ch(iAt$yY6BeeJNDOankTeAui>=s(rtTySXzJL0Vh|9yQi4FU@%z~PiOy<}%
zFXp^-qDIVBZxyAzi$1Z%%=@=l?QV$XVq?jx%i0~}Zf7n1CMbBvP}gXcLWFX^DaSn5
zbuVj}ufO<zjsM?ghX=a7$6jygzNB;EjjUDMhbFPo7mF`G=dixuzxa_<0>k>tKK;Uq
zn;y0QSF`SXc6Q;!<ST3E9GWap-#+6*&$EYT4~yG3PI3QhD5Z8p_ynV2|8=HC@h2Z&
zYB7KCHD2xYv6khmz8`I7sn;;hcMz&SWbE^M+F5bFD8A*#IfWM#%-!<-kiohnp1WVp
zUT~IY*vogOQrwGCExy(8iNk%B--1nV4m<Z;>iR9?+_IzF<oiRm+Nl*M!-ex6`EHWZ
zo={erKQV8PijwC&HT^IC0VnPnx_-I1r=;uJkq!IbO1!Ns(dpka=UY#AZqoPjFJv=?
z^NUth#(ZS|^x_ZazJ~ADpD+4wdVl@gMcNPdN2$~_y-WV`r!cIsjEkE~c*$K2!99+z
z3s2cDxAIMFW+`tvQBgSam0_o*)pegI_f^-n+cm6vY#Y@nb1%|s+8mb;P4~+l-TBFn
zS{!;@+pcn%i-AE|f`LH;`#eK9WQnP3MQU;>e2>}G@X7?4P|5$kn@;ALU2e&nv`te}
zM>Y4-S=ZBdrzv{|OP#oyb7|J1YbW2CMBaGQa`UDf2bYG1)*=}t5f|6B+Z|uBbu=sz
zJ1YJDx827>%=sU_pY<`@d^?x7{>|*~d%y2}|D*PJ_3tOe{PtW4dxbtd)~&lEcp$o2
z<KW!7JEAU<d~6FJ<VEg0-oKr-V7+`k|0CIobOF&F`W;>}UmnLghCEJSwY;x;xH`o`
z<#6@WFTYbN_HU?BXy8Bf-Fefe#F@1!4U#|a=^u7y{xPFQe!|<sMyJ^fwC}U0R%kS|
zKebjreEe^PN`w2+uH!9#g&pVg`FG9jKm8`y=ywUjd_noXA0Jyj$V{C3JLOxXy`A`x
zn-&rWCV%={!w}E$pqsHqvq7Kz!&HWSTBrSge%z|CtL5$MPk-BZ{6inj{xQwt=QjEK
zoQuToi>lt|e)RiC<BV^I73x~#**^;ZF=?2ux#IpMX<5m4Z>3~k%F5iZwYsytI-{as
zMS4YwkK>B>M!D;+O}r8$EE{;*XNj7bX2hl3C6}g6+f<fuvS(dF>b!<^=Q8@;o&G%w
zcz(j{{Ijk-XJ2ePcUStHO?G+7miU98V)r`lUaZ`F*tS*3)|&NE)8%D`YiEZ!`D;f!
zn=x;r(8<1qcdX8?k9u%xmdB-kTJ6S(B^<hZ?=GJasg*qwaamGZ{gm6n;;JbBqgxAS
zZ?s8U@=ZleWX)^iGo5{$!QJX#`)ZfDsclYP{`u{;N~2|Isy!n7SKk==NR`Z#+q_N1
zr?0Kj;o#A;Kf^@AD{X9Te}9c=JGA1^_1q&bBTD0ymhz`8J62lk(&uf{J7e0Ng)=!9
z-gRv}yR}VmVd=U93z_DeGFK1cd|ku(Ty*PJftSe@8=M=R5=-a3o%h$U_Ll4MbYC@z
zy~mDP?#j7!Ww~ka{Yf6i*2j&NQm*)DSS+lZyI5gqwCv);dxh3)nP`~I{W0za=QAFU
z-ye&Q-M_MRhwdc_)5&KZ+EjJ>Ej2uQ>9Msb+q$L(-^^)CXIn1MQQ3A*KiJW=#xWxF
z(uGIP^IUSHL}oQbD@;rhe03|tPx$<oWiu9^`=n-HcW{OOi)rHgO$Vp$(FwV5>5o&;
zwe`0r2=Dfut#7+aRb=uNZC$0AI=il?N{GF^yzIB>M-O9G)xrzQX9~M4S=PZPHL<`&
zE5l-*6YJK;u219_yLHbx)KhR{n`H2pos&N7oS3lZx|w|Lon1-aR^`pNG+59%rMPQO
z^qM6*_PL){dH(zD&hANDR<BsmurhvP;`PqWT;-Y?w_kUxNtTpcc6GkT@{`*i<QQaK
z?+O0DmG$d&y-%WR{{5foCB8><-GZGB_Z4r)Y^q)8yn4nF{gt~Sd{5k4pW?CNhiTnm
ztJs1Hi^-dWf~(eT5Z$uNtW~_F{riO(1(#=)1Y6`y@R1CQxPSgqX8Kv36H+qM-@Q1V
zA8ONkzw^-j%~|4+wpwM6Q?-t5@IJb<Vq@3z2TQg19(ajrN@)FSun&s76TCh)Dk}V=
zy3OK8r8_*2&UcNKJ3b-Jch!&Xf<K{VulLNp=-E3x!^C*$qx`;}*`YStIj^R6zSLN}
z&|2!B-tPY=ZWplp4>tR!DsAdoWis)NnbFA|z0<yI`mj&wz^r3acyo3i_?j;|?Z+>9
z!9Vdf%O6#q6FHEuz3chbcS|Jx{Q6l_7`>&dXM>*pndj?#lq9EL`j&p?iGg-xsL0Zm
znRf#jW&}5SN!xzXdw=2VmbVv;-e;8uC5yiC>_~{WUKltv$i|r8tu8<O!;iwtUGJH@
zdsSuop6m5)n72r)$aJTh`PR1CT)nz6#k#p%*()ploom|a`}W~PPusQa-&H4_{IEmL
z?5k$7zUVSu<*RaXUaxqnEhd-wPR=`(y|$J&_)F%|=i#SV_7;h5=lXYg@7Gs5lO+6(
zuGeB*dT2dM=+Ypr526Ntx>jj<Ptn>Rv{p-9!&|h3Pa<*m&I>ZxMm0O@4g}7gcP%+7
zZw{Nt**TSOBAV5!CM#4m1ibscB+>UBo6MIr^QzvIU#aPS>Kc4YCwXV=o6OmFriOpw
z)3%&*dT+(~ulIud`ZezDd);+cA|!sU-kufH&7Rx)wya+Ez-s!VROcMMRjbrudv*r2
zO!;|n&$(A!O!s1}ABO#!5$!nd*3=ngS6DB{9lBZAvfG+Z(DI+DnNg<H?;`(WTwR^V
z)Jj)ARxOhMd~y1wIj2>=UyH6dc>U>(#eVyaRx~8)Z031%YDd<$?hwb@i}H%MUp(<Q
zQ}Xl9jXznJ9tq4pbSvq6GVAinrDgtEX;Z|v&FgwunfCNAcVyIrPv5+~tc;zn#eA8Y
z@jP@QFO!J3l+d=R$LkUedkl@Dzb(GKuIH$oN<UZdM(-^*x_io&>RLDSy5679`tGsF
zzu#+F1YB&lCC><%62f_HvfRfD+8+H?FW4?w+k82>q`c}yu;>00&g)%0^2*LR7wx<q
zfA9Ub>DTS{hpuR7E1cIcQOd#6pHchyf}XPrQ*?y1=C<bZr6zCI$e-C-e{7oc^N4fC
z4#p*Yr{@IUNPazGW=3LYf|9y5<NmW(#ZD>}h=-k7n4}}}#kT3x#!DVw99L|P_%!21
zuSjTkYf#@>kxOn8RyJ9;W3Hz>ThY2_r_9z<W%D#1pUO!Q3l)j=y{Im8?2^ap#1<vp
z_CVoG>z-R;Ql;v7*_pKhr4!kA*mkEz>TF-WQ1sKuMRj7@n=LLzW*1xwyP2(SnXPV}
zotRYSCANC*(crm9-DVf|Zr*k+GJDhYS^NtV7oTraestiXQo)tqX%|1;_*;_9`|gl!
z#B-*NqTZHy4gIgLTF7p-Xb!X&R)46oZca1DIn}L#dvCZ{m>oV+xr6a}u$%hQn1<>?
z#Z51^G1@d=xYPY~2ha3iIY%d<V6!Qk%~o{TPUg|tn9h{1`MgH5Xv&IXm44r(I3u#<
z19PgGzLq_+X_%#RYv-kdJf|LQnK`XBp)DsjdgWiG6<bP7^**v}xwv&v&rjQ4?Tl8>
zQ^)gX{SNc{URKR3aKm}w*T@`;rlrLeb0c@xiKU-;bVf3&G%S&iH}GlMvKJj|gGya?
zXzlGXvUELfbo1#GUa$Ky=hB$w3Uki&=8eqZ$$ccinOnB<bIz+jHm{hb@0#E&Hdo}L
ztKi+gYZvOK`h62#^<swp7Q0!l=lz1Vu8qv8<q)5m@LVzE+wFHbw>9T&?FinozGYfk
zk><u}t!!d-oXl4b3w+(Y<FQWu<m9%ed#A_6_~swI&++$l`l2^S?LNIvE%o@o`b|29
z$LmAZo9(ktFiAQ7bbqkuqgRsR!)Vj?dxlIkh4tKT_qOcc=gh%(Vta38wA12M0q)1F
zj&0QwZ<)#xyvvdGYFJ=m$IX!A`OFW+m=Bv>SzYA1_tZK2m#SMUEZ1#{EkCnsmi@lB
zcm8c^jOR=}uyXCcLw-{~wpc&dwd%sm>^q-j-~BYQjIa2ib^hU|I}fu9=XQHea`T?f
zxz*|xyV<W>QLkoxYyK{ta^LjGr>JQkN_R3%DVm|bTOiK$i)eM3t#VM(`MBompK>Oo
z-S-Hnv1CyB_s9H^)rOn8M;(^9Ccd{=dZGQbt*Y3GOr8h4Wouk7`L0{Cc1p-Ak1Soa
zmu~(G`lhLDHZKZGI=$!S)JpD^M!N$yMBEfL%lUHkci)0N_slO>v81jwUj5F+wdLpD
zT(`&z+h=WZ*xfWG!Z%WJ&rG>Hhq^zSshxRyXaC{m<m7YHj?K~SU8ieQE$q+Tr>&WI
z!|nAonJVXZU#}#XKU!$CU9kV>n^z_4mov${`=)Sf!sWXUUWH{W{&4@FJ!-?aMQX|F
zO<W8NP7(|ZTG$)Lkd5Qe<}qlj0J;<>g1tiIxadE2zgw5HLM=F#9G=3=ePzPAhTe?_
z1R{kLCI~D@Ys}p<N9^T?mtl9_20ogWwERZrWsi-MEo}WBiOjK3zuD#cMnv7Oyz2eg
zmwT;$rk9sm-PLu9-^YLU?VifNdn@1b`rCc}u$bXmcdypAhkByYAB=YHiP2jh@WD%z
zRVbz*T+`z5@zsATdhhHFJNmj{yZrYbAFEjN0|M1Hp8DWcS+Ty${NY)ydy5ubV%#5k
zly8skr<&a{_Y|j>uiC-?KIlUp>-&$7>o}{{cRj6`-}HTTL@WP_h_=mKash$LJMXmf
zYyFrY&%bKL(e}_KhKl>uo_?ATBlj)&RDb`9503?R)`uLl6qP@ktR;8+cu0-R{D6<1
zHVYpFbJ?v;GM*RmQL^S*|9Q_@=YM}>WsRRQKYmiX|H;Gp-fsRsKZ>4wvAV9t_u%!V
zy5%RIO4r@dtKa|f`1X(*>HC2nV(s^-ckW;Ri2cXHn7Sp8j{oTFn*S)Bt8U#Q{`~?g
zZdESWx_9;Cg8Ka9jfZzv?Xu?k{^s@TH#fBfURW)gf8ns9t)0-jtwI-noDgfAWM({X
z%a<Ld3yuf1IcW6n*}HiC*3GLI+a6wi$o1y^`!_F6Z`7O3dRAHMj#RJv+FT~?*RR-F
zYI2_$ES*~F`uNzrD>np{FD{qwlnnATUUZ!8i%ZD6*ahdMdevC3mCfv&_`$<TzEASu
zWX{@}J030Nw{AP8UtjC~*yNP?vG_*S()Lfj+m^oW6#TUMgN0Z??77`vTO3w43z{Cb
zUS!z3eCcD($?aiRJiIy@CK>pyw)MJmsL*zz$&Zaz(dRvMr1Ff9AN%rSgGt%^US~H~
zLF2ln9+iWsJ6ZNFpFGWQo4&GEx8rRS$x4l9J493e*tpy(YTNvxBW~WAL~n7+Z%fx?
z=-4n%FSFY7Yvmig;+`_GV`nA`u6*Mf7x-cmzi0R}Co4b6iBnRA_8;S}FEE={Qhj`7
zhheT?tDVi9*UXJmOE)hztX5}UZIvX`8~$X$_2<WmCa(6~*7IqXjfrF2Y1xE38xH+t
zx2pC(uKPE{E%DKhWS{7tb3G-+Lw8*gHBL~OWajpCOV6Cu6^9%P-Z6)IN_ZMi?T}dO
z`AFi|j02B)^5%a0IOS=B=PFrG*W|<OJspCw+l?~Je&^_#Ux~Hw{dDHOo&KVoMNV(G
zo-KHHdeR>5+d8aSJJ_ygNgULb$~)#O)%Q>>Y%zDN3h(|SmrFv|SxcJ!n`*tpS2M}1
z*GSXk57+*%57NB*kG}6Hj#==aLM!V+y7d2k?G+Ed|7dQrf8@{gZ_PvfAKH_%o;kg}
zG<oseUpw0NKYTCsudzPM;}IY4@{c*ZV(&se==1(RR?jl6YRCHEYfmG3#Lqq6aMwn%
zf95ZP?1ti^pu?XFXMcHQy!oz~??(UaZqbuHlY2x>6+iwv-KyuXmi<%cnoQ4bY03F<
z-rIXcopcgbr7mYbTdFZT{nVnA-2sAjz9Jn?z8XC%J_MTvw%m?zTbH;)*p%t$O%*56
zqm%rSch2zd4R`5O^KzZ3bjoz22a8DH!cMP^5rW<(fvT%sZn1o){nvJ~K6_B1wpCgR
z-%=?@SDmXzT!Z582hLjAnVPioz&0~mIrFs_HaM2|7z#Q~oH5U*@ABq_i_?^ztddT8
z&LQhNBY7dm>^p+CnJrn@RkCNNFU>qZG1=|Nr6XJ$gS>b$lctJ@^V~MysrfMGX{240
zu3lQk_gRY4s!tZREL+VI>T$GPbLpkkOR^Wat+{w4Do}evr_G%x;jY_9)uzapD_vY1
zm~0`^JHNetv(hV*!2ayWdA6sxm9M8Q^Dw{deA%w>si}9ew8+I3ou!L9uLn-ZjS!yx
zz|3{c^zV<wtRC(;>JpSc;jN;WvH!Q@`5m97zWA8;&f4|sr^!91%U8b_o&T7?CuV!O
zw4`s-mB&W<a#ts>?VNaJhTwL+B~fkb=WzD9xd(B^tud`kHNP)$^Ap#bmC5_ARrIk&
zrj%Vw)|N6{^JZOy?cI%^*jbOxO=ej)%YE^QM~6=BSZbH9-@anXRjt)=3-rXNA6vKY
zgN9>jr8s-B_3q%cWz~KH+g`3c($9ao^78K4PyKZ^zdif1>gUD}a<1Y}`7~Q%&DE3F
zhZ)}Gv+++4t&46*x)F9kv3<{`W5F$#du`6|+10B1{L$y#)~8u{<#dX7zt>X>@Zob6
zJ#&WJzwKvbYWkLW4{}S47HVvsu6t#}w^v0eSA>pL_)K5yrF63SZ0TpN#n%nOt?h$m
z#hWqbPJVl2<HT!`IRb0A-!A(+E7G=>eR5B%Qr29)=Z^y3bnP^hu|KhWy78osH>Vvh
zeKzIKs`KT?xZM3>XRb6iHn-N@o_+euk_Yc1SLjS$Yr!AvcjD^JbFZX*XQ<DuwlJ9O
zxm<YLy%Q@E_j~j_*0go2*FC3JF7<lauV$A7MTyIj$F*&m3Qsg2)1J_6oAK5<MSX(l
ztCrUe_g2KccP*dy@$U1UqN}Czo(DyJ^l`L1+y3Ea<@vCEhnnS;1v_VMN;Nq%ckyQC
ziMKz0%Cc@gJkR{@f1A6<X5@Sg`RlaDShncxAA_F?ix1!Vf9Zo%LHW6=yJa(<@vba9
zw(woTUxu(m=53506gwDsid^>1+Rdu*s7@n$eNnwZcKo5g3)j?YYPQr|E7;pN!Qjab
zUJ(-?2b+Y&GS8win8P1bt((=%AnX1kx`m^y(>1BE?NJwJb&U}BE-Cem8)kRszZc<Z
zQwzB9i2K*20Ji3RP3{lmqa-w5$$nzJa?hme@`bbmszNbZ2b+U6lv69c9XB{!(&PFi
zb%|q(h~J_`uAEveVtxxJX>?zXnB*2ZLH(-kzXkg<g&cQA-U`~H7h%%b9d%{FErZrn
z=Yxwhb)HK_7<Nt-=I~9t8gRkB@v88S3NF(pTh)5E8c%!g*6>=&s!BI^?b5@q0$bU1
z+?Dl?TD@^H{&qPxJlS^dW<j=v%QtXtPcqJP=Z<SyYtgLzQ6eh$eXq5g(!D_In*L9g
zmR|j04;5X0sycsCke{|V!o%|WW40q5Tjh@%wpr}0v)!9F$y?Df#yf+FD|Kt3hHUHn
zOJ*N0PE*=$eC0KZpOMn?980FVf1a&A$6L~}N9@D)Y5V3ly$x_)kjM0O@`C2oF#*Y9
z8OIcthVW##vs%5FB)cupTPAGBM6Uz?tkR^Ho)X`r{lUn5bJxjr7k`$UsqeSi{Au-#
zZ`>yyJ>%&P6JLKS%%ZpSPd1kc`=QJjqsFg0-|X6aBQ*8K$FJ`;?Vi4Fa@G5F2ii=`
zmKA17O<%e(^L(ww%UOq>Ej-2<Q`lz|#T0$%Miu`mRfB@1?*((Dqi$TV?tCj;am_M(
z<~7ck(reQ~{qD!S%i7dr)DU-wjpMDe>aUqr%|44(SJv~V?Fm0Q;cnOS3H#=FevYx}
zFW6tByL*@2Spl&R<qf;oZe7Z_fB6H8S5B>!jB1%dX?dAx(B#<O#&afR<?Cl3yP<4&
z?d{XR@XI&FD%*E8?JKT+vw82H7q-t&)T(Z~aPZQ}+;93D=AOKN_Wj*&>yyv3{SyA8
zwdk3u4r8d#lBqsNrIr;d*Gey&zCPi~c7xn<<HYX<kxP4dr(E#x&R_m^lh@WqtQCq6
zwK;1ACr1cuo*`GUQFgJ)yMnj((wFP~y&<6Cm3;s1ijqlFXaCKVnP4~H<-O07xo@^;
zD7G&3`9G5<A~9b2<lRGa^}e6g`+iCD%fw7hRi6d+=KXnR9tFKB65Nv|^m-}!4B5?d
zf)xwc7#OzjG2lEL3bOPZcIw*P@a%HwtD<+WOGS3sc*-y`2DY;pC}`jC;^9>Ba}u_0
za5Pl(dvwe<`q(9fe&cz8?#k*5`4{Xtq<n#C>5OuDfedfys5KT#c>BxNuDe?I#=VZU
zenF~yZ_4RSkJP?K*?qrf{CwW#_m;oyzMTJGQzUo5?N6=buEK~T<t{bhN4~q*C>-io
z$et{ilH8#r-YDtJlWbHGQ1w&e*wU>o#)^+RIHHfF@GLG^RFE1l;o$ehACByjXzrhs
zvFuiX$nhf&cJRn&9p?XV*z6C7z1YVBk^YWf7bWVNjwe;bJnDJWsWGqTk><;J0w1@W
zsaJE`a^CGj42!yQMcl&LQyZ*v&9iST4twZ#+DtSoMXg6uId^BK<af<2oxbO^elbs5
zUJ#l4Br;j`e$ia!dA8;!4gY>=tZkmN#p}$bb)OeZ)vs*3p;?j|qd)&#wc9zf%kH*|
z0}S?l@wo8q`s|{*MTTcS?6|l)^K(IFkfI#RW?uFAa?2VgKW_=EzoS)_7w{rOEcJrs
zueRLoyJn|1{*mSCSD$VaGovMK@vSKRs=)5Ra@mjzzwXLx-e;6-`79u_=**XI(#b*X
z6>g`BE_~3D44*n{O%Nm3uLX|{nsP<D)l9GJY@BhfH)gwAx=u*-rqvtw8P;zy2>%=4
z*L5eUR4YbK!svHd)dlmNT-Sv^dmgq<s%rdMbljx)DC74P$-CYB9D9Xkz85okHYIbi
zzRR_^=W2(WxWvwv*0x^d4zJh~5dCylo91mXr+bPTW%8$Iv7XE}Y!c_#wr!nP-FxZT
z2AhR4Z*G`c&a?l?#n<PKvXvH|xSDhFv(It=@5aGQOCoP1Zhw+`HsNEH>3-X!V?yuB
zHf@VeN&G(J-;UG850+ciwTYMJ91N3{-Vo@%z9YI|b)WBzHIL2itXs^le=)pPLsma|
z`=P15EjL3qy_xg!<qylaiyyi7oT~b|!@b1k^bY>@FDqp8UsMDiiC$DK{a&M`nyc;k
z>mN&3)VIbPN3BgN+2v>RSFNeMbWeM*)xG2Ff`7t}zyDAzEB}b!>R-oY!I~wH{eKj*
z$iJQ;XrKD#d7?qWt!4Ut9@1)8zV=<dD|yDPAndrhh4{hvl0Tx)XYOP^=Nz_BVVlYU
ztNTkIF8|?k{P_>3<M)p|ZP3}3tSL9yx7RczVsnZhXHnfLnM{#&`wC>5&Q>jZIGg#&
zyhGELvAbT@-ItNyI%(7K>Cb)Z*DG#ddgNjuXQX!4?bbQ@v`4oe*1hsGvosZ3xoXdY
zBP-ssX|=wdeAaL6vB%5LYVlq13U55kdnZejd5(5x@$-m_TfO(~y#C?%ObO-LrrkE_
z<{y@vjrqiRD@F42&aAVXGtc}~kXH5h{N{R-$=)ENJiGXl$C}=RZvPW>(>vyZWN`9s
zpI4%1UlkU}6|Hx;SN3uOd$#nZ_e|Ca|2le~J=>K%nK%4mQxDfV4Nvz)Z_6u{%|AG5
z#WvTT{@-@jpz`CweMt-3Hm-fO^x$N>n^*rBb?RR$s_v0Ks`yzVHp6e}%7c4C6UySb
zeAazj)_rFW<6ZVw^JGf5KmBvqZN2<?=i4e5-tFhe_<CmN4Bja24UtS={@%}@Ru#TI
zwn*wJ4|m@Y#m_5NZ0O@ZDQD+=DI+(3`flEY+tIm|heM;{il$Ec9<%hR>xt(j9sg@@
z9J_h@=Iw4VmmBwHMA<V>-Brn^yYii6OAu!+Ta=dIJV%SE8FL-8U-2l(U);oMtr2Z0
z-&4cBH16g7j{HUPpDZ5vI%!wse-vD%dL{a~fbEoD61u56*6lYI8fS33RY{(HtgUe>
zqOz@DG|M@4&8%ayzNQ{ozHt_p>Zu2ZQ#bGUQ(|EJPfF>QX2!y|vX^H>T>39s{I7l2
z=?GgTyY#};s|IzV_Q6Z1PFxTw8P0xxzv8ZUn_qA&v@Y0|wxlgh>^jfK%TcXsqnxa{
z<5pe1p|>|%)Xupidxzq&(w>PN#f*$ovL?7xh+XfW^XvWc(ETPabRM&H9{YbSGAwoB
z?Ac5!bQ>2}7)i8A{SA{9NDGhsx8s2HjQwk)RJWLQixlsvQ&~87-3F&sR$W)8?Dd%B
z9DQOD&(7i*d)ZIS5l%Vl`RSQ+*f*QC{~PE2FfV(k{bjWUTL}Ao{_Mi(yJVZQn7*%g
zd}77q8;j!(PN<q|BfRX*<Mj)K-`w?F)@D&``z}epNB`u^-G&^#JA$4D2Khdes|&SQ
zQ}!?}DKl|TeIkFGz@q1&JNn~ItN0`&YIe`B`yRMB(D=^2$#vh^W1hqXiq2~bPQ6o8
z#i`W3n&H`<0J#-6<d1kP{VT>7y07=qZ6Ago#S^WirtV*lwr4p@A#1`GMh1p3W(Ece
z1_lPu!KTndOUofso%$$ymQ{T6lM{1XGK))!GLuS6;0NqX4T|=bb`<${F758slGTM<
zx7~`~QrVIH`qrapZ$}{(#VxI3@--ShlN9}S-Q=C@+x_3`V8Dlk_6OucZH(J4tq`cW
zF|+u$Uh$37`^D=S_8eVpc_v{k2fx4XmoqZwYxjH!RQr)@dZIsMzQO$$UGtT~BKNEp
z`dN2pmA3DPPsye$-UygU&3t!6R`*tCPkrXa_ujJgf<HgrOHimj>ymY>Qa)tWr~a0`
zufBZrym&I_^MnW2Hf>YUF$vMK=dJc#xu26w=#bkrp_2<QoO<$jQFf1zSnm3B-?nD)
z6;EB-@<73N={nV>rIjh=tG1rf6{?HAG4bbr)!p-gN<Tk)*JX1&+Eh|Ke)Gb-ldo0g
z2;4N=!Qg7cA&_8uKqig($BeHV9V^P~``;Xk@^rb?X>Y!J(^O%u(9NelnV7BlJpbWG
z`(TF*n*tFjXNQ$3I=A@ph4_EYdH%*)W=D;_d$e-E7iqsQRV_|m(pbLtyyg?rX`dN*
z=1S@nejP=}PVLmRWF4`e&AmO-l9d<dZIMe%T_gJ0x8}UdbB#Iwl)vW$Mjv`mmEhx1
zQKNoiFQ?s3<~I*IZNqQs{&;(2uJ|9JT}%Pqu)}vj$6!uM_L7$5U|{g(XJAmoo)TQ3
z2h2i-_FBU`>qXKf>(VywHn!%m6cdWjn6QjD=baMQq%BueCY0SaIjF-K8&J5(`lim^
zGdbyLoFTb;SAEUfDi^vWPBglI+O`~x3y(rWY@*hNMgI~1BYx%i?7MFS!_Mgc?EC)v
z-s9@|&#RxGo?1V@p7lW4pWxKw)(y;-7w_mxMW;U2C<uL0=yPll7whrRIR}`v>{|G{
zpD8DuZ`P7K<epmNbL@Di$zx%ySsy$_`5y--{k+U27x+=KCd_1h@W;v;$D`sORWI5|
zEqZ%gq$cuc_=g=m>$QH&>N@+Qis9aQm)%YID=L`ohkVqX6S5)LXP<7*9HB|UT=POc
z$g<uK{b0*_Kl}qTtN+O#3%f4=nAo#C)kgX8aUliA?wQLH`_oE~8_ICXeiZJ@)_Jpi
z?VQBjl7}C@_|f@rk9GcxJhhd(r|B*%{D0+%Z1C?#AC(uE`}yC|b?owtu+5s;w|e%w
zo_S}bd6m34SGz4ce(amCZ0B*lw~I5PH@75Q^IFaub<-m6=F7${YnN|2c3{!k=PwEq
z?nKK>x^Sd2%;HSMvzC1c9vuDhSNmTwN8a_}KN4%5ylkg#%+}mv8)VB$3bT)$*-@?1
z<Ffwo<EY~6Uc1s%ez>m{xoh5=?vt#v-yr)2`<I(*#3xL>ve|goT*1$iF1$aNKI6FW
zSDE))#m=r-x?JKrujlDGr<Z)*w$bR|Jx-}bhc-{r()73?q|25r_3przkToJ%$D|}q
zr||0gMEWnyIlS=wv0H0h6jLqoj_KUYTXS{lf_K*QOp1~&PW&j$dG>T(^0Mq3%jTup
zR9f#o^ufYZ`g30*|MFg;7ia$D)aCg#ytcMEG~vvjLsJwF<)6A47Ug1|^|UoG=$T&B
z)=%qNwiVe-t~8%sKC{X>enF{y@sWz_*DF*{`zUKRJ>`pUd%ZUG?~KT`a%vVQYr@KV
zUq01PUDI23b@4>6hErbxkG4r3_TgN&UdzwRTX*VGuG>1xJuXam8p7TF_K8jJ(Y1T#
znz*d0be|@9LH|w0jqXC;?T>Ax+6)&ZhM1Hm&*_|7emADxFTj~a|M7CEy0&!HH#_;(
z=KcF$9=@Z-@0cX-`Nyo1`dJp;O>L@kE*tc6O6q4?^m9LXUgYO)<Io#oYIbDFET^m9
zhYq@4+@teYly~}vVqVP;FIHK&&u?$)iO=|<TDSD!^@`AAsl59S=BNJgJ1qJq?y$Uz
zf&2manZJ7;bj>}#)6MJHe3LzG*HzbO9=45CTi0@;NbHt)>GHi^8{=J^O;^Srn{WDu
zHUGqq)ouG9`tsHve=YS-QRv;lJN0W=c7@HFdLT#3Ph3mrX3;NS{*tMSr8d4hxNqZS
z$rmNJ7SBC5;rs1S(<i*E0}6`1X}r{VtjnAKSW9$0vq9p@%~Gp%@^{HuZ_{6X?sN3i
zU8^#>c#^CxwM9R=$jKSKxKt~7Z}yeI?f1fzK5&Ib|Fl~FW6g@bhwn_EeW{ryUYZ<w
zaP`gSH!rOEdAwTn(AC^`n&)Ou&#zp1f>r#TcWTaY?X~UGc&B(RX=%vbxGh98EGx$Q
z%dIHs&TH!<r(W>-wdbVIqDajsA+t3Ffx#DUy;{FvhM{@zvA>mKpSbjHzj_e#U;5e!
zme5dl&eyAqLZZ#CUlH6ab~{d%--W-w<Y<;gal*bGTdZboXJtD5`IcAoluLhJ^$4$y
zIrr+--uV}=aW3}q`EO~sILy|KE8*Ys=ks_3f+y?cuMCr$d;RSWk2Nz*vOH|xycLr#
zb=$m6?ha4-#dABJ%s#W_NRm<m+rB&v|1{m>EA~dlP3TEFw@JK2F0p*BvW#4Oq{OSc
z#&=G9e_Xcj?!8V1vjlTCOSwn8FSJKn?q8d=k3I2Y?L!M4JHt5pS6><5-R!Q-Ic`zB
z^ZNO^+V4V3-p$+mq4fJaSLgEkkv}xK=6qap_h1=wQ;vDz_q3J`ti2h>rQEjg$?lk}
zeb?2x<DuUBwY%<p)~{3+Y&<XBDIsO??D#s7gA&h^uHP#7AoJvO#vTS$MP0=bzO19w
z%H@o6Yl`xhUSIJ%Q2W(X8?PPg!gq9Y{ED~VxPK-3z$&IK&3A40dPnTN6vKb^YtfzO
zGqd$qP2s=L;`!!#jLE~jGZv-INw!yIx0n}KruMg6GxqG%)t6dKBwlDPC^V>G^zG!%
z@Vk(GTIi7Q#017{HdnXne5VBKcbF=PUl98!z9G-x!lt!97a6T+DO)+w!9_EnHYxqY
zcNMksia`h3)Gz$9F>A}$NVv?J<JYoXXW`$V*|(&sBJMBUwlcf$vD1r>!5MGs1owUL
zWxZ2#L0D?{m3<wTE=XxD`23}Q-nX#rVjFaYcJQbQX(-$(m%LlcS~B;TZ*$b&(#SRL
zTfEaNt}yt0YGwEr`{h@XlDvZSk$N9HBjbkhMA5P&<+6#w@726^?=(7bQDA3*^u+sz
zTkiF-R4$w>{Cy%X-_9QjH-B&h|I|pd@JzFD&9iYfnXk&AAMn^Fb$w7->!#}()y<o>
zbG9+)y7#aCAdyxu;ZM(`^WQJ^`{=)4KHr(8V$N=U<1T@x?H}w;g))XoK3knSWBs&*
zZx40tockv5$yRX6r>v=)_ty7}(#d_ZSZW?jiO}&_u;KblkN*`rW{2!kzjL@u>CWl0
z3-a%TV-@b`?@Fj}C@K0eE$wM)j=xF3YQ=dUz8?{8^iY_`;nwuAMP}k-&ZXw_GWfo2
zRLp(!{6pXKQ!C$pkU4kZ<6a*}CS99*RdS`={5z)}fAd(;rE6JN<5WJG2d`v;5Bl4d
zUcPrUCz0vd-fqox`EAPfYkMAO{Ab!?>axOZ#yvZ+d26P}$&2l=@Nv@h>h8bro?({g
zuUn1>uKq`BR~(SEI~Ksez~BcxxE|8F)P=S#b-~9o79^Hr=oP@vat}Ut)?f3iXQ+p#
z@98tAbxxi+|2XK|+4I2#p1xrQMj-}9n~Y308yN-`l<@`{7#Wy6)zUb1-pf}{bDOWe
zm(NMhtG?$>KGD)Tt9|l}@7AYGNT&&KZJO$?a&ubhIR*v>8Ab*M2?hp+q|(fs6y2iK
z<kZZ95+qlc_V3L*Y{1h}zRye0aZ0^$#>ub+CoV0@cJ$t|_`oC{PqSY?w$5(3UHjzY
ziNw;)d)L)HSmMmXbC_}8oaZqo^rJj;0yIN9de_{45|^;(`?@{RYj>4xIJW7nmZh0!
zV7@^6F6;Tfr)4ZzmL6idwV-<^%cnKE`5Mby%F+Zcv*^{cMSap0v%NG&^4*q;=F8tC
zUC^*ntY6YJV}1M2!>wP-#n;!VZV>u(W!2>)jgyTxEU8Ej{-Vq{sj`kO``MRz2g5D=
z;w^7CK5?44RY)WL!Apw^NQY^H!T@y10_gB85Y3O;*h|jIM7ewbq<q4OFVk$87#M<C
z85j&Pl{+SZR){B;IOpf)Cg!F1Wagzh=jWBBBG!se4T;Scaul&;=I++s){(pR{ssQ0
zi%V|X&2;kMxR~LzO7PN=<EfI@a}%cXI4`b0bbpb)$;Bh`5BiPnMNSBqw&hFW^Lv)n
z?{^kIyLZ>VzWyIuL5DIk=bINdOa%`1G0L8w`rWD0Qnpf-Ro1nw;^^0y$kdK!pO*7P
z-F;uscD3#G$H|H7&$^z@D(CbxuetqdUeuw`-<&H>o`2V{@XCkQWUE83N_J+J$8NqR
z=aG1G)4T6g)2;`ml-{>~?o<?*c4NnxuUr4mfB!<wWop>!7*(^oa>_i%cOIKrnm04{
zbn%((B8wtU8{gRdO>%pxrF@%5x2nvgP1`z})_Gpa*1Kw}bE5aVuHl2G#O$~OXAa(H
zHx$|XV#-v>HJYujEgQFKWTqIz)o!ee-?h*D!p7qTt6y5pQ*Ccr_a-6JY|$z95_zq)
zk2iE>N$hg+S;6ggS)Ob5{<2W{&Ea2j<F9%iI*{9Tru6z$7tY_FGb}tF$?-{cna>bt
zyRt{E?X9Xy>*+HtS6htV6#DzkcH&(q_~(h0qjOpD_45tCE+rj2W3VU2V$09UNeY{d
zon6cSsfB3CY`yX&JMx#dii6XO`+|zzXBV1!?v{#lGYi|n@Y~T(q&%aSJwZx)i`sF<
z2-mJRcfCDk=`23CHgMje-)>>byMJ;|>=*BtC*q}?(Hd0Pa^oJy$M=`Hv{M`_IoDjS
zQ(h4w@!U^KGk@;A4F`7n`?o9#u4rhVvijUaP75>j=Mu;5XC1R&_Cx(ic8kr?%??#<
z^EOUD8kVpf`H(G8QZ$NB?O(>g!0>_rV~s8-<sv1)fW)HW)FRlhY-^7r=rRn~{~24O
zxXYgI>d1CJA0WcwdQkXC*$UlECeN=0dNvL1LcuY+3hX}H%ctFY{kMYYgg`rovrEg1
zoV;xha@Wq-aKiVtSM0m&v+9|$w@a1ZPcpdw!bvjx^{WGSuTD`+YgxK+>WVcwqTNnc
zA3d*}D(KKr9<}-G)RI0~#g`MEJYzc_FMYU3dC7kd&sUbrHR&%{?=$t(ut`+U|JD14
z_p9#A8nlus$ca;S1rr0qcQytF8_aBn=~v&(;^NG_bpHZS$K0_fJ-8&XBo)$In>#hu
z|8j!J@&D(|cCCG#vOr;<k@%Fo3dtP}4QfsLhJGdo6Pm*L*UU7%8+=LZzW$z%$E5!%
z#Ca@I{&<N0LA%ks<+{bv-p8h?y|wf||8ws3E#PzT7PN?p8a@d<64)`>MD^-wEuk6H
z(p@#rx)@3yHDx<`tCZK<#qhYnmm8Dk7u>%1mUsGYtLASG9Czc4cRqf>cR1$erqxGR
zoz-3zcgL{z(X7zbg@KyVPT&8k^*Kz_Jk_Ph^Ihz)(>8gByvd52(n2-nJ7(_K7k>NN
z+vPR4mrQ=RY3t#$zIoC?{CvvqxSKD@R;F0=DXy~nv@Mo*Rn(h#uI~O?s-chbx0hbZ
zxnA!cxasNiKcf1xVnVv5*ZJn1-l^MWuzj`Ber0{hNz*bMb4ndb8opis+oF0+w@^as
zqS5`Ueml$gFH0$;cB)O@y~t^6dhzVQm~@82pZB%hySw~}uG@>VJ$t4~sP#QQv>-2{
z`deS!`3c=dRY%gIzBwOO2>N8U-tj0WFSo?y4IR=_=N|59Q|)ngT=Vnaw~oYB)5VO>
ze7on+=JUPGz(+3GqHb|=Tk#I(RpuXJgQq8^ZL-zxd9^iZ{W9+9ig(XSX8vk8su-Rb
z;n%Ffs%WWbC8_k*E>p2e?b92Eg*6Y=5B*-!c0wjaPVIcTM8f-poA@m}(--fv^qa`y
zKPkfPdS(yng{2Iuo^Y4C>@%(4dJ<Hh^GJiYugTcrbzz1#(>doZe@TUz{~HTrnOj#{
zO?jWY!(UGCt+ZLeNd|S*sY?V-iW|r^<g;-eQ=L)Nv(4Gw{Ha8~!{c6=%R59Ch}qaW
z1ooZ(x7~m0yEoiPZ&tTou5p&1&g7e@)Nn`f^oIKzm^b|0mJ(+kdgIQ9({YcNmatBa
zbB#Q2>~6{|>5^fgnj!d8SUZvBlX%!6*GtA<T(rI+t;h%E_C0HSjX;M!H?uM@*b<Wi
zAW1*Cq$o2l-8nzEAh9SBzD$2^$a#NZN0EPNyHXQ#7k*ltu(c^_VL*zef`?bfl1|Qz
zg2FtV_m;g~#uHg~^4+u_>kpiq&0hVHsZ;IP=K3ig&z7xq%FePVxi|a!zRLHv^Y48*
zeqR4RgUH7-2Lzt+N@QK!xFGprL!0W>8CKImXXOMQ@9GO}Yt(&tB4M`TwE6Z47f$8f
zO_EsYalGoP&AO<2(t%qwjiL&(57`OK3;C_Fg0=lpy2tyuESE~ZG5cHfZmO<PvJu=N
z=bUuCP-lG(_x<f0+anA2w%z>Ta(COAS-QT5<hYlwo3&SLUVi1ydsDr5s&wxJUfx;r
z)MF0UWih|QIm+I5C;p7JdfaxktE*Og!Q?na!9UK|xjByC`+ILqO!k&=+f7-HE3Fqv
zef)kpyxPTbs^Ze?5(3j+|NeP&(yo>}{~09PzNH=(bPVp?^i_Y=S%+KC<W;xtYQ3>6
z=-9s7U$U-ougo*Odo|Bf)=Qx=g~@;8y5#1EmGOP452JVY<y-FKd9w1Vt=#2WfmM$e
zv#3o-^N4+Zc0=Le%w0iibj1!==x>#f`?jBRmc;EMse_3>+A^fle$72-Wb4%I_DIY&
zk~fCadw1fknJJl30`WUo{`>G9=UB_Ebt2ef;`|Ke0_n#A#jc-R+U*tH3KNzWG|u?O
zxa*r>R8IBB_jh+qvu!q9I9K^x^v&X@^Ay)@-RD$&T2D}4plZR@%$oWCoDaAgo;g0@
z>Ejtsg-?9;6!|F<R;i}4<yxMUV9-64Wloc<cR0(b7p*?=K=0ydrS|G4nJZ<L=Jj^$
zU*gk~Q?&4d>@TIidImRM{rqa2o3K1`l7?UV*-2AohZ&ff20rxNWHV{<*J;Lg*-E&M
ztxV|qWN5?paq|rBT)E^FV-a~L#>G!;6}qn|+)ZHgpYW3vF#rcj>o1gfKe;n9FnBXz
zZDb*p_KrpArManjC9W08so<g>Qp8W4c-D*AP^8uWm+|7Q+oE!{Umbe4C6xX8)TWLm
zgB=@|{w|SPHgTqlk@2hSomx9SFxF@odU}iMHbvffGVjjsyTYITef-7tfak0b*A1h@
z+pc$An-5q%Z(45K^lM?gzx+z^K*6fNA%%iv{cS9*-j-=Bq4T-RW*=2v##SPdx%AoT
z1~=zbJ5D}NQk?st<-w8ZT3?)8j|+Zo>ZqP>)VESX`a~<&<{6z|URfsaYffL}{ot66
zXGX@#{V@VtrdRx4#-rwCH}mJU0>cH7i#)$B6=hkv?@Y*nbvr-a{&o7sO2#RHEy+%;
z3qJRU3%;p4yZ^MyspM@tT(~R$a;H6YIKv_vJNIkJyvw^Uzw>d7d-J!ePj$6f=~KS2
zuPY5#U-~s`sf9*ztTE4Oqx8EkBrEssetYht$j<p2Z0~N#Ph{V`iS^9f!)I1@@n#oZ
zF1v1rG(ZlD6?^fF|Hs)G7}|x2EP=f9^2_re#So+gf>a6LjmWMLc`91BKV@>H8gpAK
zLu1(TSrbx<`5C(}Y*Adaq|HIvr)SHp#Bv`tbK~1HZ?e2O>J|O=Zrla4u<DH*x2hVw
zgq-$ViQBN~>$OGx|G!oKvZ?)bfBstA^t7_gv+r)`wEz8l_UF0p@9Zo7eeT`S|MTh@
z9jxz7-N;(LZh@-eV&P;VA4$cC9?wsoD&unEWa46$`Fl$6Cp#Syl)V061rKYv;g?o}
zeG!fN4^R7)Z1;Jz|IqDfm736FRW-If)u}m$YmN3b=6^c5J*7tae&V6`Z69rq-HC5}
z|CoKxpMXRCKYm!q+bcYlkel)$S+aD)p&y*}>XnrmJ@Y?(Pn7I`knZ#QGuI!c@7_OD
zS67A{TCXxkAYjds?1*O#+0xHCS|+#LoZh-HcYWroGJmIHnY)4+Z#Tc0cug?6Tm8$m
zh(j4~Hy%4?Vtg-s#;>}wWhJ}SX4khn>|F2o;MM63zieD$1!ud9C43AyvRR^Y>6t>)
z@aY9zJ8Fg5Bor?M-MOYyWmX#Fz2wKVskQA}3)h-HmFm1^cewCn&d~<Lq^!JCY6nfZ
z`^ta)Zb|Fg!uI`LQsKK553g*A{r7AitIyuc2E5z7A2`((eoA#)xZ`=ZbyelL90?JQ
zuMvfxu8I0wnlEL&<Tm>jXU;<|zt+r{CUGUF`8%W0k$XZB<&!ggXY32Lym%+>PQ#%!
zPoi%c?S4D)%8a~SOuJuwK74h<oe<7!$;yc}g-ok&e4q99+iUljAl(aL`*UPhyOosp
zKRXj07j?g2b-CmtAGMNiUNfg}Vp4VMJ#cHyo;RhX(Z#yw)^MGcz52G+QBuoOf6L3<
zxJ^%8?M?`A_H#Qtxc{I3W#pV)cfB7@QqJA8<@2S3r^Vu|DsP^uP2fz@NMH^-)%Vg|
zV$QSIpE9B>GPu4i2tUZXW8y+}mTPC)H{IU0b@83_A1|(Rl`V8X7?u-WAX}z!qqAT0
zh12#XM;=ZSy}V$n$z}4LFCBkuXE}bB(<=OL*xGAXtxB`*T#~)ICFbhYcUMBshFq4s
z@AS9O?(uh7t2M_@Y`Xuz)9k$_tN&xk#<_Qwt?F03yI9h!)T#K&lH6YDLfdus5^rYZ
z2wv3{^3%3xzOS`|)m*zm%$e(4lE<Mb2evKJyPs{rmpXO6Wy#Y8^&z51HlMw$vi1Hi
zg*L4j2Q4`_bDc}}J6K%dcIY!#+<|Vcy5k35+?wsRgY9aT+pfiaFShMV`KkZ#yU@P{
z5BlFU+dp*bd;j-S;0L4Kf6rIA9(3KKwV_V&=(;HrJ6@Z8y}`VA*Qy7me_E$sXEj@L
zRkBa&`s8hL%JYw0O;f63Uhh#6d@x$)Pwt=4L-$#(Bu!oQK<J*}j&IE0vKsqTxAn;^
zyUJQ{_Vg=9Cg0PU3SX9q>(@5!dbHOl!{o}XOx@)<5|>w8&e#9oCi^BjqV!9SXhqd3
zuD$s>HVbDfzgw|<_4K&ucO=^`MA__XuwL61H<!cK?QUm_>BP$Z7aOPXq??QXOcg%7
zfBAufsus%sKgO+M*4`4~Q$A<bw{vD$H*LMIO_gNeswjH6^i!Oy6!*!@m&?CwW9rlY
zx8-RsYq#xzkbQlH;xF&)F<~?6OEQz_*)s80+n<GH$!aTJ+D#R`vq7*&;7i6+gY;gp
zi8VT@M^`-iaL1W#_PzAnTIaf;!V`}^{(QKsD`%%&hGWX#32C$AVxJkMCr$4B9@h8$
zn<|fLqr~B?yZ%OZ6lcFb-n@O9&nva1b;Sn{7EDarmCX2ek*KA=eA(KP&5^+eM2cc2
zm2=8nIw5I(D@@BWrQqngb07aVoVyo2vspnbT7L5}<BjY0&T+rA(wt*z-K4Th{`ZPp
zDwZf{913pzzVzUs-<NV8)@Ht2@bi6DllO+*n<ll3_HS-U7q|MU@>b48GqGV=#$yq$
zMM-Q+FIu*=r3aX`e#*EkvLxfP$f1j`E{KG=tmf37|54?h%j>BVGLG0VM%r{~eUvck
zn!Lknj?>x-nchd7Uj%*CPes^$OOk)!m{Z=#zVYc&xg*=!Z93NKE5H8sI9O6zwSS+z
zRc}}OhD^Rw1u3a7D%VKq_0DTGDc}n~v*6RwW0`CG<lR5@bL!>KIXX*-XX1jb%Z<0+
zI3XwC?<nLy`RnqHk#iiUSBUL>w5Ghdbl<l#yuUtX@=VBOVlOC8cq|a<|JW?c)4M_{
zbn=R{+RYK_2AxxjBka$dyIWut{#9$=zJyhu1CAEYvpOm+vO?HKmCeh_X6b^AA71m$
zoX||)Q=!>yKj-0o)!Zc&4Kkj7VySOVcwR}3d{Mo2*MjR|AO6Xh{faXG`ZGs<(Zy0J
z`EL#9Tm6>g*QHu9g^Ttb(77vIJv;5AU|6*4fvZhv>$87J&t4u_{OWK0&!jB<1<ea4
z72mLFxAHpv%j2-gBAr~R-id1RX--Yc9QWw{+gB=<8_8w1M0Csd&%Pb&9$)$WUnO83
z|DjS5whrzH<+x>Q_Ihlr7v0`{-9Rz!c;QDjwWG5uBKEXSev}&{H2uT83ikhk?}K#y
zy|diYeW!j-QQ3@5i$uOB2<IJjob*?Y^`UWG+y7>*qsJ$HZaz?QQQ}AB9kxKuRqnbR
z8+|va@Fi{8xN@geNURxiVVdI%`Hx%PeSB#Yx$=R))$Jb^L<A&qJ#uW-ywJ=QT+kk#
z$kUr}<u!ZT@`na?uT&!~w>^|t*V@y6`Ma><Kj}9TskNMW%k{MmNBG}g-}pb%y6w2r
zC*;#fK`jKCY_W}JnHU&uvoSE36W>Qc@10By&Go+=AX1li{bHb%M1JrGRu>-W{L}^o
z9>F<DV&Vof6+b3Rp4E97b*b#g{D=GxyW<adB>i~PuQczrS@8z1htAdKZJ%47-}!yd
zmil#df7t>K^!1oLOZoCCod21s&Iy5y8p)ieh2p#-CQf|xTO`gi;_-)4PFGH8p*Z7`
zT#0E9bB$Dre%7*WOp$-ZQ!9G-Qu*v&=DR=F$j(?Ix#EWD**n)|n%_GweNk%~({|?G
z#`GIuC-Y>o`<~t8YYy6`b60cbtk<tvB39k-i(PAdYvIb(rk4XaZ(msBEca~kvmLvP
zD<>~Lp>Xw&EbFfcyIU1L=p0l2l=^5(?al)Rg6}Uh>0gSKX8SI+LMBsvR#sM6?Aofn
z#H-zny$ZoQ%^u%7I5F;X*IcPOu7|e|m9DyQOqZ22gzrbm#z<|E%ByPcnNnV?lPdN8
zJj-VB)c>+uUrH|fwVp@%W8^KBtesirb7$>K{;=&tS<C$ASGbH`gnwX*7TP8m_Wp*D
z<FwnewWV_({0SDir={^ww)E*)C&QehZ919j;<m-yE#f_IanU-t|EPjU&jZ`T6*1Q=
z@}?{-c)xQ;*~SG$JkEhj7mM}hb8E%Mes5ARS@CygO|*1K>AgwY@2DmuO62wD&nj8g
zpy$}^n0Y>Q(VL6B;b+`?k8Zul^?Y6MWc}pEeffJ$Y-+!hiYR`Vw)if?+mNWtbra^;
zwSQ=4VQl{L?vYnntKy`o3=N4kEb|?-0$==@dt%a-b!X-%7BYV1ws!xdzDrN>?YXiQ
z*N$>8`LVA$&&8&>=2q9WZWeJvjSF*G4)*VoF=!9m`GuqZ@8mTPI6d!tww#yLQ?vX<
z(qom*nYmVXH-9LRKk2l`!~cBDqV#F=j5U@|442_=SeuaPBVhS?spq5?o*xpeN3yqY
zC7z2YIk$;%f$I#FLozpoPyGIodFID!9k~luj+H*anwzHF6yKsWS*h2{epBEt$CVLB
zU&N#KY@Q@tRe#9Dz>vqzz+g{I4hhZ6#?y-48+tlO*ippR_M}RpkpqXo$JP#wlL8zM
z7b<lqEnONa64kmX;kruj;}<DyvwWt+e~6Fq5bf6zuWR`K;Mny;z0&5pHteyT$tCEd
zTKMqUy?3|2&)HY|?(CDl@3+5acv2Yc$n%)X%HX2o>Ny6J4?Q`0a7t6=8J9xSu56y7
zn~&$o9hGtZBUpZV*KRc?o>$DbuiZ`Uo_@M&W@6IO!rnO#KP_7NoNL<5=rS+WJn?X!
z==G1>UOo8CqxepW=j7D%Kdk2?@0b;3&&$5WCVP~-J)-U5;xl&s8~f%fOk}-mw%3)-
zwsy`&OULS?>DMF=o%Wm9^;%4G_0&tt-q}5yJxwp}iO7zv=VF83G$-7s&YI-G?EmKZ
zw$GR9c08Tg_jmdn-6;h^Gv?h-eQ~cgF0QE8%X7PJkonx33pOMNx4mBb>h`@!6RM5o
zW^VWIN|wKLFLOcO?H<Fp$I3;g?rkbDOfr`^@Zpuc$%e|Zd-<ET*lPO3&CpC-Cg~Q%
zc=`0TIbG2g-p^e0cV)7r@S|R>s*CDp_H^Z`gnUt0>Et-QE0M=-<AO(OVvBbiHTZu1
z@<D^EBDWc?TvnKt>z?xKp_zDBqRHJX<F;A6jLz!%V(;^+_2Qq0wSN2Cdu%J?)s5PQ
zF7k)?enqrR5aC@I8nora_baNO1U(kjnr!^LMf;+j&%XF%_6b`y+O}+YBX{W554k6@
zRud*D&2I8B{wHYko29kZ&HQ(h-8a$YFML)DB`~Bj)T`>H{PT*@{HY!2)3GSMP;ZBo
zcU+3+!|PkCm6Q(c`?)9UtI2~@i~fTQGr1>h+SY$!w(^7br3pLP_BosiIavH;>Er|U
zH3h3?1yoP$>HgO}L*jiy+QgzGqMW(YuiSFYkPj^Sq_|iq;)ef`o9lN?5(*7_<)N>~
z9wWU(;br#>vuU#S47V+VuCmM92cKOTDtdUT!*dBY=DEopzpc7MkG=0{F`Td4?f2=h
zNN2KGR>Ys=bB|0yD?4q3l5)}tZP)xk>wzj&os62q%D^y_7vow9v=MJeuM;vDFgGl^
z_;Q%QzbO)@H@XV(9C;|G(2y*AM~rF8k^qkF4jv6lJe?P~ZVYt~nHUrK?9%VOf4TR3
zS^IP;|CZoGOU{)1I^vxEOZ}JgtvdIxl$)Epmrd?GH}C4ayZPn&Z9mD^|NkZ3F#Th$
zjJ1JkvQ?j%Picbny%z^~(ptYPt2m=t!2Fr>&Jo>@72d~gbKYswN_KR!>pS;qWzz%`
z#l6Z)7FFmRTX4L<UE_Z5L4j4Oh1W#*z1=*IRSTu8e)3{k<g^Q>b?$ptKhx^Vd$P<V
zJ;LeulsJ>n<w@B>->&VKU75#wRp;3<nP~Iw!|Ihzfp3#iJ&otSWLK5DC9pxt^h(i8
zwb@do%94dfffr6yMwcJ-@Myam9C^WO+qCf3lWVeOU-9aAowje2!`vGl<{Y~+qNiS3
z^G?Uh;tt2HP2D>etrg?Zoo@c+&Wf#Dmdt7`yn99M?uk61i-LEr%$+P0^pc^+(0%h&
zR%dCs850F0w{p$Wko5NpnI>jZ9koaJz}h`oXI5;}`!>%gt1@%z%fEGuF#+p>k8hGR
zeWdg1&7pbX$!pdx?Dux^uPxYifcb(>znJWqbvMJVD8^lxb25}E*Mq-BW6S!$>q<FE
zfx7d*?NCnCkX1T&AnH|-O^M%xY+v`^m$Qw6nJi`&seiVYR(ZGVYR&7F(p$F{9Zg7!
zcFI2=QnGjfCtJy-^0}KAzud6ibfJ&XwamjC-mz}nww&EUF8XhHIERP6WYhtbeO8H4
zj)&6Bw|_XIc_n_u*&6{n4rkBWl(MT)r^(;_^8drry^m<FPI|ue^R5Ne{wH2d_^|4M
zLe8ZvVh<NZ#ifX_T5t4ri)?J(%k{0p-7Q0l>)oSyJ-@l+5_}H+{^8(Mp6s7ev*Lkr
z%^ALHG5xPow^?6Zberp2;{GQ84=bCPoqqAm?|v)(#V|hG>3)K|{?=%&OA-kWpJ$iL
zmc}QA91Q;vb<kX??0&aaXnRf3!@g<dF+pe6JSvMn6Jb>?KXcyhsy<t;cvhjLUFDe@
zFO=@(k4ZbZVcLADPt{Jl&#w@?9Q&$$DOYFv_3MER5?7lRBwOWZbI68ztDaqQFyV4$
zrfpuJvvGIR!8HeGw7h+J@&Bei%dk*YB{41;MZV$*YZp&T6qvkV3HRx1)7I>kD*OC-
zck}&U8k3q*6Bh5tUcAp&;OzbHpXbjhc*=b5)Vg9{F1=r6k#)yZg(@d}Qp~Dzw^oYL
zPo3v<%EtNjo!~9@jw+vxuh}|oSvGOAc8BMVq5@V~A-N>!)*VMacXpo9@8DVIka_sc
z;`)ETI`7?@__&xqspj31|026Cbjv;4G&`cL^Qp~J!>Bzj-8{N&6Cb>v`PXLVv)SPV
zGk$b%xf`(Txa23J`YCye=(^Ceh4$;iHRmpvo)u>KuH~#1<J*nd6N<Yvru@FMAXNSJ
z9)?hL<tuyJCeNFk9mC3DIazdS#hd4%<yV<)<`l+$4LMu0X4CbVQeg{Ue&0B+=<uF%
z%Q=dBq!KSryxDOiYr!^$1<p3}xD(!eVDhxMd*$S>G(W!GKW154HV2+RIX~{uE#s0+
zFJm9CiThrx5xG$>`LNFQC&f!PpT6-!_R-CE`%-geomDgME<dF4lIx`0d9$wqdwaL-
zU3b!G+ZBd+3aSUgj@>w-_pa(@d#uidy0WARl5&aj)(bw1{1<!7GDf}fS;55pPWr5;
z)>m>KE$A+L9>X-}III5F;Dd2x?W>Fr#`$e~v2=#k!#7Glqe{yTX_uK7_Z0c<YnQ2P
z@%Y`(>oS|`%I@3izb}1OcVoMJ)Q_3Dubvluells{lV>yUoE1AM`(cvzZy%{o4ku@x
z5MJVRF7NV8sRPQtD~{F5u`buYX;B+B`6cr@6WQ3EA2-}9Dp9{vopPyoQKhISr}~~9
z+dgFpC8e?6*0{EAh2OdZLi+NPuWn=t_4;`u=-$z2>GXvse?6|5ddW0=*FU67jzPVK
zIXN-smoqXjEW=!Qi&m;bMiEsYg*$|UG^98+C>FHv)^>B`<}zR3Ewdh#_h`Cl=843s
zC}lW^G;vL|`C;bS_Vz?tSem-(|Hb>4__x~qVE(nZtK5uHkUO(X?%dqn&*z=CE&uoD
z?_Zt=0`WB|8(tnhx~zRtdv4pCl_8hr7^bYM&Sl#!_?^XhpXSsZl3%Qy?tk`knRztr
z&eOoz*_rPjFUj6_vL*51x05F#?iSAFxNCIh!umT~gPhEj=BYGk96uY;?zo~-U&zsX
zlB4pw?>k@Jv|GIEl=Vcb!!>I(t~_(&IlGh3c8cV)dtX+DepmC}FZwmvHL7`EK-<Gb
zw`!i;uM{X0Wz$`}l<Re{_PO>$F(03VPCZlOAszTMSf%H~hQkbPcVDHvJ-q8~i}4!6
zkbmvIy-^<>%8xGH;;p&wvCIw5;u((DoLe7e{0cP?iHqI!)}~93_h+fpbcs`w-4iu@
zm3FFD9nyO<y)`*qDcp7!YfFk@+PC_HKRp|pZ%#?6{olwh^4yK(#}AGjTg-Hp?C9{<
z)JwEHy6CL9PI0PEozsh+qc`^-?N&bIVbdG7aoV)C%m46Z@t;b4k&}4t>qK=OfjTvp
z#BXRr{m~V9Gp4XJFdP@eTBU;{IR$CJA5x`H4Zm409WMIM?e>XFE*nFS_uW+sy=*Zp
z)v`_HLdDAN$7;SZw>1kr3!=D$vz7cOq))kcqBGjdd*OxFrAt|7O{+ETZd_u*sb)XT
z{Kv<0()oKngbVDmHkR_fw!={0`1w7{@7B+2(sy36|M&a2J@XHPd;SZA+jxu{1$xAu
z1Wqf|Y+SK>!;!^Ghy4~Dy}MFvW6NpJn1h=tY?L2IO3wFQAnL|+`(upI{Nt&T=hZ(l
z?GrfOZM1L3gN^#C7y72B)L1s=Nfg$&?uqYvEc&r7G-HpNU!7^^`iGk$>S7scoR7UO
z+%wUEf8Wd_vn#w`)Og$M)0N9V`J=DpZQ9LizKe@eDz9xTS$tyd%I;T_tRl94z4Ty9
zl-$G%Mythnuda)l)iSv^-u-T>r)gKS-hmTSGDX?Dr$uRGHh)&F{ZM#yRYogkabch-
zSMaWdbBlEPdIT*)w{7W4yYVh;>BS#w(t@8JJT{l1nQy{U-C5pI2id(c!*jf4w|8B2
zeN?=7D_^?TdX5)+rMH<K?CX8ime=q3;><P8IU7v9ZR0Pw{W|B-;WLBPx8nSVv{l{D
zO<!|<^mVE_H7DFVJ;Yl)JAd=hr*qU=oF@BnD%P%V`DXQACV5-t$Js$;KT4RVPPnUe
zx99Meb8l`|sCif?>A#rDa96G7)v2`JZ{HN!R7y-PZHQSHSZLu;|K;7g&MiGGsmb2y
zuE!p2i`4${Ehj!eXo`2}<!i@!4=q{Ay{LJze2G!m#|f$7D>&Y*F58*q{UXb%V9BcK
zT(dmZt_kmZ8P*zg$#UY&1s|BSOG-}fYW%lNXT{Vg+qWmovcFuMeU0_%>YbPKmQPfW
zlQy50t-gCxDXVqjG_PBG4sW;EyEjE_`4-i+Prj$SZ`jll^Pt_ANuFbyq|C3Kye?N4
z-L7KaDr)-sMcd9v>q7nirrf-~Wv3^@?JG_P9c>m*TXjqA%jTHH-tWGPy}Hg(Z{_g)
z-P8Z|Evt7pZ%tdjJo6gMg3H&Btcl$EY-{qaj1PTkKmH!dWNrUw#u=;raz|9;gVsd}
z48LdF6@1X%lh3bV(ZVeBF7m%sUgWDy3K0k0KLkpKi|%PIPpt_*`29mItNj6f)s%P7
z%#F91hKc;)u%Gr(*zAbgyVO4+2hV>f=49VnXffyI$|uIX;uCjYuZZs4%XT36x&EdJ
zqGujVX%hX@6n<VUxVhZbZt;WZKSbNz);DSTXwP8y`PVco^^e;@b;IQkw*QcIue{rn
zuw3M7(Am|C!=IV<x6fVYKl9!^ul!>MZ+#qhA9%t2+x3{<i5<%xScQr1X<sh#M{f1m
zCuWA~igQFuC+5uyoO5_r`PPjgQnQx5JLPTuz9^#T*O>{?>~q)38qRTjY*5~*;?gwt
z_4#?**qq;;@5$wr=FJXM?X|f4yI`B-o=Wu#%{N#tM}>46id@*{UuSUQ*V#qWPR|OA
zkgw@&&iTAr)h=IZ+p6vt<t#@OICA)BE#cXt@#2PfiH63RkjFP3SNiEZtXOCerxjS>
zEzkcUJWIXa#eIU9EI(&+OzQ{5bvIx9`=xNb-E0krp{VlHu1E3I;RcfigOqoBChca`
zESFxmGn(VRLrc<>q&I9&N)7!swkADdoAft3K*i_z^$os<Lgy@*FzG{h&itLdVXwCE
zy0hge?QN@8;c?%+XkMI~M_<zK0-f)6O0gT)oscSPDJ*+>ZhzRjJ>_%HwHO|H&ZtpS
zX8c6<&)WNrD<|8ZEjM~nZP@wSpgVTMMrr*=b$eQP;?z7B+Z4`Wyy@{Uu}86O`NeMc
z4~GAa&rp42(I=2U>4$K|lrp(P$LFqBbY$#%B=NwbU(ods=P9oH@6Ijpf8^;kN2X?y
zQ%zG?i(maN<!|aG=UR(<y!W-v-CQiFs3vQub!3OY|Auwhd_5eOY;QfbcsJ|S_d6V&
zh0oGB|39=o_WkAU7i!zT`?(*B`PZ)8WZQnz+3C}a-gi~(oi)Gq&Aw4;Hvit+J-Zf_
zuKsZ2)ZG2g^B?`ow!8a#?x`2fGL|YYT&IanPhTsWa^bK_?VQrGIU;^V>9b7LY{Xd7
zH?sE!cWt`lCS#<0D0u#v%S$3ANIpM#*~h8l#qp0UncUX}b+(-Q!7_j85yv-zHBHt^
zj=4Mk%|0<l^o`LkcBhE1nLZX=hvvG=9Jji|yiYjy%y)^0Dsz4_+^8^kqmt-<kM*0U
z&C{o^eD|?3*RtFWzOHz8D_`dPlN*iY_H<>Wls-MT|M2QME-Q~kE0*U4ZERP0vvmF5
zFA+1&32vFq$Jx9n)wu5Nx1aC6JPcm*{nsv0>-pa@|MIBDmBsxLUDVRrm*H%++uY@O
z)bh{uhtJ)9G1L2fx5d>v`dr_{s{T)kUw><haAKqHvcP=~>f$o$#(D{Jo%QTBx2?(8
zEAaI5jPEm~N@g%GTI6ZA&1K~})kSg(q+J{Rd)@A8$bH?kf<wS<V(QnJ`x6crdH)WI
zWfHyb$m)9B(SYHuv&kNocX<y|-KNCeo0(B-Vz}X5{wdBVpE+8~FHM@StMsF+%=)rN
z(Gt$%&qbnSy^h@M{;?+gH}Cmx0rk0MzrIdhyIOvi(T28|j@z1n+he}8A7T0Xyy``s
zq3vPCpE`Fd`?eqWJm<Imk9Th$Y1H1(T*gwpKg`E@-)ZN*#Y)bq?8P7JQ3txZ9Zhp;
z7#SG2nHd<YFbBE<it@8klS=~N3tf?_c(etRb0>NGA94_BJ<rPGQez)*c+r~!EK<Uy
zkCg*^v=^yJe0R2<W|-T~!_4{LD_l+Q592?L$0Dl?Z@4_xdG~UT<^3;Z%ktmruV?tS
zG1@^)x-#e4#;NX_?QdnuIBp6Ha@D$dglCtx_yq5ov*C`A;(j@{*mZf4*UisGd0U;?
zpX}xG`No+S^CSfNqJk>7EuZna`1Z%vyJ4Rit#5hE*Vg^9K(o1_HE&CHaGP>+QK?VF
z*B|K_HJ7hDwYBtSt^Iny?aa5x7dK_sy1bbA;#~g=)$2Y#?x;=?zQ)!k^s%qCPe!Xf
zzH`OXz4l+Pby#UAwc9k_YDud1y4w4VQ(Si4+8bRZpVD5(X?cI&aaES<{?=u@5ux`!
zNQLc{?OA*$(?HI!{zNwKp@a2&y#*G{bIaMIHr}@gF-U)BFC`i9C2`rg*)wye^qhNZ
z+dS`#GZ$;{X5nI4?qy=j85eg8*KnRTjOl;)CF)fnn{KW1<@gs~Ota>>p6+~+#WZWC
zt2XC~pXKT+zoIQcGRg_x=EcOoz{1MF;D#BeNKpuGwiT&3Cl;rA<`t*r6=#-YmZf4}
zi?lQtvLM#DM(07&#BDz!k9JMd-@Wu#l0vUnl9Ni7)8+4sY;)3X=9W!Aw*OK6gQL@D
z{Ya?!<}UeON7{#*P1t!x_49K(pP#cW|Mq-+{(VN14>ruq{ze`zRU2*hPT>tH*^#v%
z@4?E(+O`yLOD&6}j;Y$}JRi4NPvMb{eR`t&vS)nN@kRN;D)&p-57`OMyYyQ_g5OSS
z{zJ#;ADbTkxb!h}!MRlZ4^xcuc272b*m>Zq#F@Uw9_w?t?@yodVb|-b<m@+{3w75v
zuGKl@*J6An)Slz~^tepk)X4`WTC8^+i`!YPb*?p)J3Z&@#IsL5mO5L$i(C-1eBtj%
zCY?V1m75nz^-4rM=lcEo-%1&lvrjwHp3mpg6LwGkwIuOCl7q+gZ9m<D&oBC-@6cA_
z-zzKVvsGyA{@79}joZJJ<Fj}ER$g@K%bk_x6Y{6X<t47Xv(L|H;z3hk_KX=8ESfWS
zd!;4`N3+c{-1FzYVSLhj^CKI!u2OEh`!&lj$F2X_jki6r*C+3?JN&JSeF^)lxCN43
zuU~iyHQSk=TEuU_x2|;2Tu<w69bWm)SK%M@pA@8I`o2=|e`NLZnM?!klZEp+Jtb88
zIb_vmufM7<V14Un&yjOJzdzn=(p)cS)2iaHeAw7vr{t!v=Q9OYf7=$wq5ZUQvW;K4
zK_T~c!P9#e%_?6sD_?N2{DigF>J$Q(|7V@Fd8)YnxyEjx$U8^(Zg3H^e)3Vqqch3n
zPm@mnl!Vn&HO??{A7T--Tk=_=%=wS<6sO-3=7;PoPR2R+y{ntAQ?&9vY6;Z3bc=Yp
z5Cg+8Ev!8YjMNJ0S<H>(t_ZohHBS4p#pKD%Diax+ZIt*=CQo8;YWQH}5;BFwfTOLw
z!ozP;<4K<xGZ_L`EV`ogHE{K!gxc_xYr|YUI|A3NeV>%Oy7sPJ&il7t??vCP&ab{*
zl^y+Z|F@Yl&-n1EeS7@)jcxV%z1MEN{`h`t`Mp0sb_%<gbo)QM#P_TI=O0%)`IFDr
zOa0ZI{?AYT^^=9cR&Nt7t?Z0^EHk$xGuBG-&9$3{<7Qqn`Ib3n_HnD{FGCKr`TewU
zdOXoe^y3PfbI+S>^qv?$F^+nyG?(v#gna+iOq>48H`ZTPk$eBx`000+pFSd8o>sAk
zC)dX4D5sxH@9OdS`q3og<mK9$ZO4pDxD2|V2=#iGu1fk=ICIO>Pn;*ZxPFFkHETVz
z4z}Cu;=eVoOm6<+;wQliKE})yE8cqSb4k>x&!%w;3VZ!dS1k0~y2qnKEPId3^3O-P
zyi0clq=|jrDRuo-$wq^iW5qQw>S31tY14jZEcdXCw-bL+qqu&;Jy)stIqt73miE6k
z&71H}OK|6{<Jm6ny=(4h?7yB>v(hh|<8f(>!rrfsx*EPPNyR^VtakG{+uW$i#b3;R
zR&0)VUNm?62T`lpd#)ZoyNL78#YLNsuC>dXe6p;!+2=^g8jkk08L=mNj;@ioJR@}O
z#{`u-68&t4b4w@o`59*&Qx5FiZs@ztIQd#v!5OaOO&=flY0vJ^ydO7rU!L{l9o~m;
zAD$SSSY5PLvN!VLu^p1zP52xi9bG>2b%Iub1fQ|!RrX}Vj&14Li*vKMs#oUrMh8}#
zEGulu+#+RZFVTBcdiOS`^?b&6nAlFQx#n@@l+^Vd#;FarV$-gin$T@?;<Utpy55p(
z!?_!?zTOgDEw#<$$~FD_n|77Gz4dguiNVkCJmd0Np|KwmR%m5yjoiIDY<6LWXxs@C
ztt9Q;YphQuhe(y~y~#Q2&as5(8$IQ&8xKF7xw-qzd`06sVX4LuJB);KS7&w4(p6l&
z%2H&F*PGl_gPfbE^kSIiv8HK=+zge!le<;r%+Vv#8<<V!J=8xVvMpk6;Od(p`x-<v
z_@!@cYM!WNy2VcFh(W}BLtiJ&Rnz8)_sooT7SnkbE5gJ6*n38d>eAOU?vzdx^ETt&
zDz(dMThzs^P8@;S%4gFu=1TIOdVger^*y;3sbIUr{B7wjHJ)DUTfCI??W-r8ysYUx
z<Jr_4nVh4Sass>Ry=OTToXD2@!FiA`hhgsNa`Whu4M*gjO<xkYwn1&v;fI~ZQx0*~
z8=jTCBz2iBm48RY+5&DPws&!j7rIm)hwHpwu&<#dP5A7gTOLl^PK)qa9?U$kVM|BI
zZ^vu;KD_C9!iCpo?lolJZj~Ds&MjOSXZhutvAM09@v2?clP5lX!rQ)N(v(QcoLh;N
z#h)D5m>Pv9imc}4fAV<CB*&gPOmX2oVTyaB+ojL=<=ja#;VR$d946uGSZtP4Q}j!9
zfvTtulT>rW5u=N1FLos#w4ABj-PRsowLIb6Qp3%>eqWYXFWa_RSTHYLROo(3io%x{
z9$SufsZBm1C%Tm(f9)yJlN0}#Yz<>eUbHfM_f_5-H!|CHDO^czeH{36@14AEqluNX
zq^5;VT(^T|!Q!?P4s)DkUs;?;DXc4!Fi|$1(pEH2U`^w-tBLQ@X3ffVJpN(sw5<y-
zPtYy7)pptN@s6r@7miPUZ1sJchk)R=6)L&4u__{)W=%al*Xb7H8XF6>BPU;aY|DCW
zW*~Scc=E<6+M7}jh#G2#o~YPfwsl*V|1n;nEk0*Ec7$ol8=e2o*w%e2CE0s!xyt0j
zcdjfy_aa2=Ra)_8(Qe<_*2QX<zf4-_xIl!};oQ5K%-YXSpDJ$KToiC|Qp!e$h$pIb
zXM$GgNZ1{AJldKs_~NaTQ+`j0>+Hb8oPGV0M-~N1NR)`*&As5b>+%Bq3%P7}9m|b+
z{XfNB6@CAv>+rH^Hxs)rSma&c-*I}u;XT6gRRMP283fqPDi%s+A6F}UWc@;eyF7Ex
z?FV`LE-h$(X}Ijs!oFt5HxrLZ#ixFdyQj@#uRMSC%vJ3+-(+4#Mb`En`^~d&+WN~C
z`wz~4`kMEj%zcJAr~7o9^PezxG<R_|+n+Z#4CT=@+51W?()_W+iWN61em*+;l<oc@
zcAkAx_a}VF{o~r4|J<4(L)ANZDQDNwsi|!DPqMeZ&})AG%$XtARR6M(cl(jd%Fc~l
zXQ!rmoHnuW4V!&>@kNg-4-PN-a^qm6G|yHKC*yCW{PiavRR6SVelPB9bD&(}kAi*5
zhr=}|9_U}TWwSrjpH*|@!FrDEIqZC=%$nsNuUBJM65g*`w{Pyl2U`PAH#N^p<K5gK
zy~D4wbidmC<GgiE`{&(BXS#Cu_S$J71?QyIKELsvbI|`&uw?YgUbnM(mPRIZQjLNd
zW-q3H(QfYVVx0dlo!9P|f99R!3;sK=KX@E@I(T`<!CIlhcP}@-eAshfv*E1=&wsEr
z|NdFYF>~FXFHB6DVKF+~Ja&qi7RMxeP3r<*>~B<3UF*htYuijkk=?Pf7HfiAEwc2h
zUmrDIc05glJ^5%F2gij=v3Hu36mqtPbcyMA&fK5*VY`ZS)~hoG7DD=F(NSH>nc2Q;
zc-7r!<<5$@({rr5ob~bBqsCfkAyOsWcUOHp&9?v0`m8^*QjUD}k#0Y^(L#u?RNUPC
zP@%Cz+oHK|(<E+pvd!+Eef#0Yw3Y~-kD8l8HmABhRcBlCwx&1U{0DDfiqpwuORLRF
z<3gpG?(dZj-rJ=+`?MEt-DJT%!EHUNF?{D_CuN*?-puN}$>Of}p@K(uf6i^p{I-0*
zJ1={e|7SPD@`?hh7blZ;{G9W*;GoD4lj%zzKB#@R*!}0rm$E_nfA_rXJG!>dk)QKh
z;hus8YtCD(2#vI_x}aO%yz5!|La|e}>m`NXOybIT{9xCHl7gA`O3b?sIEemvV6tw>
zmy)H6|C_X~eqnO$=du2oZx3s<wC(fas}-NObcrb6HKwm228F5YThem$-)hBMREX@j
zlXpyN&E3MU3z~Oblnt|Yn?A)#^!`uTwC`a%_qsnmc{?Sr(>Zdt<307Vl9>x5#2hch
zDnuBb$ojv>*JRGcvu_{Wxf5~yT7=S~Zm%$lX^)guUR>U`;b0>7|1)oIf1I0^;xt1d
za_{vsGRKM?u5E4?<Jlgu`)TZfUfs0<^6Lxk&0446rK)||Nd0<E)XmCHW9_nycV_Kf
zw(AR1tW4$nWl@ux?BzV~-u0FVmA{#GH1I3WiuJcegHH&Vd~RD)5WOe1Rg-6eIH$SG
zAy4*a+NYkFP3x{(XSVXpxw0<X<p=zjxkE4QESRS}<JO{y>&|_3oESZWkN49P#X|Gs
z7iaukGQ>8iocA()-n!vdzzg9A>@OE3JFjeSZt9WQE}!6*rnDgU!sj&+#U?Xq-oBoq
z-t&{sOmf-OJ-659rOunkJo)Rgh~vtPocn)V$?jV8(eaPf@7|-`>tyY|X`ej6?O4UZ
z!}$88zNYXTqiTg4HftBIlh`8lHpN$F$>E;6M}8k@@?V;r7GS+dIcDAJS4L^=VNoCF
ze=KhmS?7B7ZEZuKtkyBN2dhQ%wi)H5=&#={u3PdTX8sd%t=`#x&T`DSlytNyaJ6bF
zzc9;-LoL^O3PQJk_V=i2-`{m1a!d29juYOGmsYsxyXU<>^2PP)e%b$p6T|;DuhN}T
zZ5Q~@VyE__?-~~#-RgTk{X@3l&WndLBJC?@^-bD7eM{UY+qDvRy4Uwj%h2pyQvA~H
z3(xF1@0PQ7h^%H8USl@d{i2`F>!mRpF3+ygd@r%{g6XVNclVk+lK9B**vUgJs_i|~
zhSiHXOXdj~97~%dps;uM3&oBb?);ah8>VN^UA&^`^r`;KC*-8QhIFkBf4N<&H#Dy+
zRL|C_Kj|OGuT5{)Klp$6i^R^Bm9G`PoM)CUw|{E>HP|uu=H@4_7CSAAvsyge?fUoN
zz~IP+>P#Q!O8MTSSC79f|9;V8nR880Ro$}u3qPd%Tkl_XPM3S6{%77HPFs_Y0#CnR
zUZNIhweumz{qBd4<g8u`^s+C#&-_T!)@19k*WbTgFcEmec(BAZ;N|xx`tKHQUcJ{W
zRZt_1W9_r+e||jO=d@P$;3Dn~D?)Uwyf5yu<qX>JwR(@{u9NO7-x!EC?vlCEuDO0b
z(}$3w-4RTSq}J!$e*bpyDH)eD5)YsHdW77%xSR8y>8wRpgr@w~`n8g)Cr#|!tA#h6
zY<9Pwn^>0Bd;b0Fg_i9rF1+{76JK?KQF7kx2P@C%o)VoLYVY!SqxPRkwP#mcG>&}5
zIjySe>7BN}J8Pa?IqjaIS#r`d+}yCfWf}j{=>jiLrGA#`tFhU<sQnqc%Dfn@d3`|)
z_j^tH%r1r;snCt}6Tc`H<sCkuv;KCbc4@G2tgmi~aHPk^FqcSaTkp4Dgjiqh-qKsg
zw(7fvi+m9K+(icc3po0h`pxsO-Z5iQj91}2_t-lg)|JX{ALU*-JGVsj{nFJx7~Vc~
z&(yc?yMH`G-roQE$JJZr-(M#EgL&7)dVllJ@-M^wY2Q62fBCKDq<=!cZ9O*Rr5p}_
znf>FQ=j4}#RZM%`C*&9{b2L6YuT{@%E?Z{si`QRNUdlM1vu_pMc_C}{tgnuFd5glm
z)?dE7;PQgzz1Q03{$;#YCA2^Lr)3qJE&tTd)*Fr(dVZBDH{!oo&GPHvwFCQI^JLwp
z`8|yfl(%J{{axMAEzoxU#I<He_BQ<EtTJr0DmGM-Tt9WE;<6kDj+_U(q`hRknDum7
zW7y|LKH*6!)Q`CH{6LcYq`#`OS$8ko*|yQQNqX;=1^f0aeB`qAcIaEpPa-?zoxH><
z)2>%ApS-i`<iRrWXBU3`l{@lYpQ(0B$I69=*o38`Cmxt|w8@}RE~@CsmEWn%hL)_?
z)D&j&Z=1=o`;1F8ul|}w`D=&IOFuBbST6j>l(R6Q+SOg9UU+rPflore#4_%LI9z@>
zUwy-2(*+eX;||_Pa#;7{%u)-Ljm5%yS8cf5exjcD-g(c;Q*xEgk{^A4ZOawBuJwU=
z)~D#dTehs+GyNQE%{8X5>86aVud80}Y3mZ%_w8cQj79U$wZGI`Q5dd~eTCt$&^HUs
zy)BM=%w}`9p5@vQ=+V)$U*qJHB!kG#GuLwTcK*A=GQISYx5E}ag`l@}4a<Ji`bk`N
z|C&(NqQ>vzFJV*rDscb$4G%5WHi@j*5OrgpS7q1t#7kV)6ArvR|MhikNvHRVbR~r+
z3o@qIc9+F*vMs%<@=|rn<u02h|EtF{bGNcTRP@%9-Lu4aYs%b+h5gr7KlS=Cv+>aB
zJNf~^lbJHE?CQ1MA}Adt;PEb9cv}$5h1)R|JNIh(vsF&EK6o*%;+0y<!)eQd6)RMk
zydBiUIp4T*O?TRT)8oNa4sPL0X}4MLoMw4VyUV}UPvOfyR>p6|Ef;@tOuw(#V&`<B
z#z#GJ`F4iR`wwfXW<S4B{xjy6-1Up!clO?U82d%<zL))z@E6DbSp9ABZ}Jwp$G0fS
zLNmejI>QXrlw!r-hE?2qOg;9`-RGRIY;%8kCc|!Z!{62|-@aezx_aTO#-Y33rYX)-
z*f;UA)=BvXWft;WxWTsLzun&MjiwJ)98*~_IkWK||BH5xpD}ea>s;*Jgns;t{KaWf
za4}~e`>lI!d2imZ{#xl}>^aLaBiFWm+NICW8A7A4=^Lbdu24Su*mlWMJ^j<2zn)Le
zSjZlG#XtPO%(T7Lo<Fmn=*Dc`w5EUCGI2E%+rSxDD%9kb-C;QQ=h;_A%QsO=o-=Lw
z!!d6w+kvG)m%_d@swow#ED^h~o>A^U--1echtKmhSpRcRy`7(7S-W7*MwVMF0eOG5
z|4M&*=5EjU%R;NZw{%I>`@`Fq#81DlKcLKhr}g~YmkZXvsBga3_KTlkll#l<oD*`}
zQ&X<1pL)%#K5fUUyRPp#`7f?>m{?ub>#n$3J+S-7@0D9uZqS{`Fz?voEBCw|Lo6me
zxjpIg?aj~ogFJLDUvz$Wm~)ng+1WaChTq5PqVA|HP1i5I#xC+>$>Z-!x<a&ePmO!$
zAjEi&n@2|fstMD5`->UBj1~Sm&+@Nt%(Yu|`bEBm(m%gygL2)TB5q^ba~6g-T+csV
z;py}{V~T|PRm0iMJOUZ#9qZROm5INpS=v-)&etNSweGG3@A9P!{<?78@>f~#SL{c4
zk&^oTcAuZ`kGza%4YN@eE3=;$njXG<ZnxYo`NWQ+|GoE@N4z<ucWJKp|AT===RT<X
zm;ZY)(U9NS=*`#VSJtkGIxEm?-(2=3KqTUwom1X*<B%{3Yrl`_8eg`oQe^9D$$E0a
z=F7h|DzzPJ#Cl|0V|T3YOcGyox7Rw_{Fe4bJ?CoCTecbBn~tYnh+F$-NAtIxrMtx5
zmP*WTDYL(Ie$igJ&#oEv?cdn8nC~in!Eoz*^FOsM=FNGs7rw~mzq3c%-5k$vcWE&b
z1A`_z1A{RG%F$!VIhpzxLscq{X(g#ekYy0Xka4TEQ^9MdkN@8;{rUx)v_{8y?j{HB
zDGL{M7zWmGoOBT`>D*&jo-KQIooTu2N97-}HHw{Hi#p{G@Tb~rU%hzK#3n=Es`HlL
z@7%Y1ey{lU{Qdj>G6Wp!ViUbAwQ0snb<c3a)r!59ylP@0Q$lr&p1i8teA0IHqYAUb
z6-UK+H!JRVdMoX2=^6EyEjihx_tRSyi%)-?^T6!zlafupeWohzeHG23?{WB7+2*wF
z<$In@O5O43`p)R5hbC5~_^?O&bt~3CPfh4Pp3ijr`;uMHDz`<HR<2H+oF&&I(=Dl{
zsP^c!=If-%``6!;W)ye4eZs&fw({uxqRUU?4&UKud2AY%W+oXFW0F!7JDG=hpSbgi
z9fu6AOuHSJX|zDfta?KA5ih+(-MZiJ_kTM7^WLuae^;t~4`(Z{zB)}tU(WMrD4W3=
z*@rVGpM1JObjn=TpIXK~GO3)++aBh3@A`XS;#$L7PmT4v&nUO=sfj(kSbkQQYTHwn
zU&imZKHvYjYR<hIiNQw4N=3~-MI7^O+nM}T;qaTnZSxa9{<!OCZc!O7xonr;mkpn_
zRDT-Qytw-M|H8yaYk#bNS$1@(^cmgP|NImxrtZG|O;Ns4%j?V0Gfvz7Z+N3Q>DPi2
zcFLz2GWk7&GtRus;qS_;am<<?!rJ^r&ZR5t+J)n<F3f0Uy6JZNoyH>;0WYD`FDyGJ
za_n+h{3R$z-g*ts6V*FihEw>Q6;H36VA!@<@Uh_91urye+<)y{SnuYQcH{d?h7FQT
z-nrNLHV7~-E$LHfee!vS#KQM(rrDFSYFyp!FV8A!<4lRvp0?_mVs>s(pqQQ9$9EI^
zd*)y4wcT)+S#n=T>Wr5KMmt3EW=~NwkW4GFjInf8XMIuPr^UlF@p`7z`6W;JFC1W%
z@-|sCV`=?@M$3%-Xlps$FQ&bj#Kgcbhn2`Q4@vHjIRr?WpBntu548IEGXJ(ITVqd6
z*O=85dP_G|^O1VTF)3kXMqwVd|GaM_rsb5+zI*h=;vX#krn)FxJi`8AVNdxoZKvBT
zKZ>8<vo3yT`~2LQ_xAR6j4K}9R9LrQidyoriH?zN#hM|PX0$I&ROIDN&0KX!XIVk`
zyx2<x@rRk)EWXd?*}2vB-^6IX^4ZtYUfj+ro|$+cyM5d9Nt=^)m+d%Z5dFsX`fSZy
zLzS#JXM>dMIpJ&GZ!c^4KkdDRQ`x6icWyq~r~6oACP(?(2FLpWyw<52HBWaPT5oB?
z!SXgLd*1ymv+VLM4z04@7Wz3#_nlcvxA`6ZmUxR^b;g;?IO1<O-hMd$dS&;o9x?B~
zEz6wNKhCZ=!O(VH{8FXe{8h7D-&}4o3C$Ff<Ptw<$n&u(mc?=5t}PB9qCQS~^_$B#
zzv_2g?4B)8FC4GdI^*5ZmndOXBr);X#6BIKq_~wC6>E|uwRwc^E}j4Wwx9D4t9c2b
z$9a7WH+1#Ao^k2Jl!Zl_uXv9A|0Qi*y-3dJ%p7~;s?)BPd5dSuzT|Uy*M892`cA6E
z>*AeDzkfMZxm))R+r_u#3)Dq3m#utu{&Abl-yJ-EuSxh$Ip!JX=Qs1(l2g4GW=k*4
zn7zniZRBT(!p$`uEtcgRWeXN0b4^?D-Li${vf+sp8mANHeEP}9BfNu6!+%@ismZ-b
zPcG-W<l3!}h%nr*JkeV&@d5X;I;Y!qZeDH8Q_{D{DE<|l{NJqYTuzPkLlcGUjfqE{
z-(TsiF!(95Xr9>GBK=G`$M<ag^O@AG6P&xwx<BBZ<bG)3%%?k(kMQk!Fso_yTxE6R
z3kI@h9S--*YW$M7uwlo<L%DZeCb~V^H8V4PhVSZ2hG(>*k1W|WN#gE1+Zb0V^UT(X
zzqnCnAS%=zg(R>rFi3GTFj!+2>#(z8h?xXk8x~zI6)N(NkH^Po=Ch`SrfO?Fc|@6b
z*KW(yHSxN9CE=v&%FayfvIfqXB54I_9WMWy->-XG9sim;H^w`B{$JUDe5Lan6;d`V
zjXd-y=G)Em_xbg|=hXgqygt63y@1u;!=nAR(lpgMM`iD=@z~MruewI*?DF{^FWPcS
zuJMdGeoVmZ<4=Ko4%5uPvwv91!oQ7egKbM;%~5shJ(UWN6z~7{IC5R7i09vuW!c|^
zn$2ZyCJ9XIU3AJ|(#4X}y<sO;KdV)ZSMyXz7Mb#CT2fZcJt3}d^U@4s&P$|e#u-gb
zitH6X6=gGZdrq+CjoHfrLw%!z*QPEG3jCsEzB}gyyGqDemctRAwqaIxR@|Dk(72~v
zd1mcXhtBDudqvLQTDEMuN9m!ZRqk6w*RBzhTvoLvk~KIxQbtGO*SfG(Gg&M}>^QeE
z-&&Rw)zU5eaJ^M>+s*5*CvUr0^i}Jvn$aaOZeK2myxkWp)VgJN3;7ylZ42vLuX)ww
z$&oom;iej#I=-G&i{$b&pU&D9w0_pARLcoWdvAQnC_YlszS2b1Wh1X#&kdequLOIg
zWt&5C(`U-hnf&5hnf#o?uRcE9d2Q{ilEWq5({}8iTIHClB=V6r)qM5TlA~ptem|D}
zd~^Bj6PgFted9fGbLYhS;oN0PkHi8`y_#NGywrN}+ozA}V-LlJ>sPKSkomUR?X1p<
za?^Q-GU~-^8*fkgXr?CN=6`GIN4;gKXA+{0Bul<`7MwUe&>`@cPJG}-?L|kzzqw=>
zef#L*66waei^JJX^!_P>gX<6IN}6TyOek!4pK_(}#O(Ky=Sq%hD>qeIH7?hR7WpP^
z@b=of3tlsqM!8;n`>iu=MqTXIg+<bJFV6WbnSbar*W=??!|w&JNse%N8M8zG@Zl}1
zS3l8f`>)=y?)9MwZ;o%vj=z2Q=s%N{8!oJV6cYSMQQ+Mg2FbIJ!t-`SA2L|5b(O}{
zz3X^oubMuX7Iaee-?gi|-r5}If01}~uIKmQ8~a`@S|)z=*D*ck4dSAi|5UDI{O;XQ
zT|WP4+J#jgoqfMmXuK>ruWl*vPE|bZ#s8z0ANo0t<=bw3e?iC3_O9=bXY-r?Ix+LB
zt^8_zNMYH5_iGn7eBQyp!KBaYXMg!sdZoYB`^0srTfA<?z0!B9-G6ZN<GJ|<X5STD
zA-swA$F(HhAJ>yiM5QHe{JWmbz_fKTOG($h=YmT8G6zd8M+r9?b6xD8c#k7=fx+9f
zJrxtgzW-CN{2n~NyJ$ZzkC}pOVn^cNOM)qF^EWs(8}sNt+^Olq(fE+fO{~`O{&|V+
zizQz;lH+phkE)#A;~3A}$f@{3SkC-ISoX{J;v3VS9DKcCpTm@<8yted+(!<zA93#b
z@K#`xdrP<b2A2*ofu;F97g`HMr)TIT@O;-`oj&(R(c2$IU-CaQw||w1-0k%-Hv4&_
zx0q+q++$3hc@LD^_s?Fe`Dy9`!RHB6b&6(RsxkQ{^q7}r{?{|PlOtEYjda@dH1d$H
z)7_WHFMs-CXjk~_3eqJ<pcT23=k5{R!^FU#z{<d&gIREaXZMj8WkRZENTu8x91T8N
zWZQP#w@OQ6L>x857wVqs65kWBn#E8t^T-mm=6q-C$Tm==tSea0bzW`Xj|H7x6I^P)
zxl5jZqqkJVvu5U8%inX~*W9->-v7VmKVyQRwq{B|K%kFelN0|hgZYb|wsp<+)LAm2
zVcs!a9ks&fy7wz~Xz&02WYdyI`?}b<gsRWv6oxx4+jRQdjC-G2R))8{*42KolIw6%
z-06p0X}jvCH*G$t<GjAAZ$r;hqopcdhlJuH9ZlkIga|}$eLdw)xS&}5#jKp-=c2dG
zG$*aSVVag_<bI-9qNmcjY04ArZOJd%4!=^np1tfsywUNeUqg3aObCk&$~Cb~j1JMu
zEXWU*3NV<VKWDm&oS%Tp9NWf{S#xZ~_Z*0tb@jD&-^Hwg)4>K)?E5G6O<u!$@}IYV
zQTxt?Nm&=ASy!&QTa@)@=LA`a-3MMZrCc&-Nn~YSo)s?B{?_}X)USzG3uHHQsrjv5
zH(SkoM&#ES$FJYJw&T2OM}tZD_MNjg=jd-g{H>xd^0(!GBb^gY!CG=z6)cl%qz-vj
zuDfVtd(ot_p@cK)Q1ULDyMCrqKDrzZ*39_!|5#;PYI61T>nqx)pX+OT*P(rnb#L_Y
zscqWs8}C{EQkYueo|zEa%=Q1|zSW0fvrYEa`dpfQ@<Q{X#*=!N${YLyd6fLBvn<z6
z*9>x-Qs%I%f@5h#&xvb_i{b@;zgfKgg4TUS4P`e=<t>+)Y}O@Rc`ko=>x9*kMY^K<
z73&)AC97=P^h$A(eRsTowG+Q&t$bUs`okB3mjwSbeOnn@IK!kVYh}pd`9F@YbdnR6
zy*mBY)#I9SA6LIFne_kBq_0zS9<ndDp7c1e=)we<qiTktQ59~2nc<UFKD)o@eNbAi
z{cM7orSiT2dFf9l@0z65NS35K9}CjZmj3c!QWNuvjr=KplBXa0$=7>x+d)hDssGWc
z?`;P*6whE{U|7Y*z(B%@JC5avMXBH{QiQZPeQoGlKjA<D+wHNt-j;0?U0C;kJxOqv
z2$$)J<y=CBD!j9<m>jI%{x(W{`|fA&uJyZr<oxrP-`AyA#jb%rbx!oOs7te4AI(2^
z?##^MW#!M_{rmIvFMC7f8ubZBczLZheaduNJb$*7n&DDKcITs8*hG!B(o9thmM6uz
zPE?F6p6&bYiubf+qn_z87d1>+*56KBm=@i7Jf<S8_A`IlRK;iZ+xNVQkuMQn7ptLC
zb+$<7zuw`)D+?-i)T}$SAZJ%;mYFKE_1d(nBK{Rumz@YWxwdfkp;pzu3M)>ONgDOX
z{hIbVXU{XG0PWv*mhaj3)?(R>EjypjY>DQ3Yjk^l=fCV-C+~;(erl=jNR*hV_4Z@<
zo`mA%ff~OL6-lk$xtu3ZPioV{c`Cg}O3PxNcPLb@x%vC=i$m6S_m+i~MK4Ori~Lk!
zWcwm=<pSl_Eq$9KE*N$gTjeE(99`6>s<S%c;>Ir1w$^~Bz3KZjx9L3m`83Sb!<gqg
zxA#_S9%J>#Ha7B7Gw!|h;=8r=Xz;lSivRV~S3Z(smQXt!Zf$)(?(UAKvb<$FIoBoG
zD);_-(5qk3dRzCi{^^jHOJ4;ppZPiJ?3X=n4#~vc=@gD}-MK{n-|4BgFK_02Uom%H
zwvEj4o{Q2!%gfVQ4i_D5EQ!*ybzKyY(chM_>G=t5K_xG<T%Y&Nl5ez?-0I{n`a9~b
znD$ABzxS-d(KJJ+jXY0Rw7qd%@K?oD=}0B#V#d#0KZQh%m+g1>dEsHV<GRp|-RjpK
zc0M#(7clRk)-k4ewme$`T^}1Saxrz7R_pvm)`&fze5tzemC8T1LF^uH*w%g9xOk-}
z+qMpi=vS9?%SyT%^H{FmHhsLq;hMrzmD>1o+JaW6W!7na(w^)*Pk?h1>!ZD{VlRK>
zqqbPpSv+_o85tPTL4y&P1wYb21ll6K-audP!wv$qm#-bWvVN8Nc_kArxAnf79)TiE
zT%7l|zucAMX};<1lz(RY{}}40?C@R4GTBLd+T6;`@AoD?d;a~k^a0y*vL&)}gyyIO
zbFbrFc%OTD&cQ!ZG#~DlSa;LnsUv5#tbb(^(|RsJtNzn1^-?oe?rb)0lJ;CU<D{wj
zovWPuUJ)r<LX;%f3S}z33C*icT-K^@GJDA#|Dc!q4Q-!K4m6u>Xv(?t*3QkZWtW)k
zTQ~PuyNK)UaM7gBpo|v>s@1=+74FZnSu;0i%2TU{{zhAWiL36sSj8E0e_ce&>9^-*
zuWxx*&wc1@*`sE$pFP@}Z-&fv$Ve!irRyZDJNxa;HK`xXWTZ_u#E0CO<#jxauOQ+`
z_7jfdN2@=vqlT~ZMX_hEm>C#qxEY8$#?C1<Ex#z$EwiY&B)BvQ8vc-q_GnmcuyD9Y
zUEZdG!p#{Tyi>Q`@Mw03<8WN<Aiym+L8vo#irJ0rz7dzNrd+?7v#Nff{lfo;&TVO)
zw(?ta`md$`7uzqoTwI>ap|;^*;j;L5r+1#WeZKSjyy|cNzP``bXIN4ge=PFM+(l_|
zZW6^Zl9EDVsX7m$7`%6!``9@{r+5v=4PHs3Fw1Lf8_pF5A7itdYS`i1H>XEno5d%Y
z?UKLVzW-WP(w3-`9yK-OXi)j;s8umngCjSG1^3>alb5kQboIJONhkeRZF%~JYy7JY
z$GclRyK5GcmNnb))U4f7hb0qlWL@!^RqC!AdQ#}r%+#&B)<*4`Vct7w!D)fGi|=Mn
z+qNvaboT`f(^?}9>9&QDH&;ci&t{qZSw-^gs}4UY4S9#%9hWP`+8i%y+*p<RT>j>+
z%z~P822c9#Mr_HLyngP<JIr6%O+!O_y;dbe?+QFw618m4%JLs-e05h>alEwpb8)+|
ztWD<=zB{b5p7Hwz?`6xgl*^i|b-$-D#%$*d9`{B@9{&4YT9upqtY1E>Hd3CoB_mC8
z(cU-76S&h3YX#5hkdb+Jz=wOM=$ykHO^JI>7q%-_DoGxFwwr6)N7m!hxw>v7c2sz-
zcp$egX~y@SlIs;2Gx~e=PFB2Ip~@<5pcYYkTEO1wz^2g5D<+5IYJ;xLn*Yz~a_W+q
zVkfm8D&?JXRXg@t_%hGI=09hq%Vng_er(OE<)xG~^Wm!J6V3W9E);xceK`Hq;i=}U
z+4l+*IqTVn>Ap2sxL7RBzwY46xXR_PH+DX5k}#{*a@rfG6{$OYYn82)x%2ebLHyq<
zPxW5i@HOkzv|k<~=bhQKQbf<D?YQB0`RtVN>f65TJkMC0KgLPCDVQ*?(fQTC8@^Mw
z2UOk4E?GZ|Ek4~zbWYIbWs`J1$5!04j$0YFm9PA!_M^X_*-~#jzW(4Z|BwAgeCHg|
zR{FAl>#}tZYxAN#mrwA|U-9kB_p1Ih^UQb4<2sMlx9W@ezd88d_-t*({9BikdSjjC
zqn#RJkF%#pFEc%qJf-HaWZ+g+>BH_jZhZM=&UV3l?>7FG68;-(zvLdRYkV*HOZfc3
z+8@l!+?P7Os9#$5^NK&mTW7svnNl5sah1FJ4<^0X^2Pd`$lsHPj9I=Ld?}J&xah`2
znTc_V_bYELs+ODcBCYckZ_lRCo{gp|H>`|un(uX6w*O0=9?`w8jnhXc*KpRNiTsRj
z6xvv1Yo|Q%Op*!`P>I}Cxx;3y$CYAt-LeH?`wI?>%{4F-zr=Oe(EF8fddbO)-aUGc
zlb=_|6xPHXZNIX9LRtDt%bIyQfv=9MevFB8m=kwY^Oc}XLxo<tqTFHb9j-eJzRvfw
ztMUD7KIsqIfwhwvtY?B&R{vpQV6en2qk}7pOHy-*8B>@$>9p5j2a&e(H&*9v-F7YO
zs_Pw7Ct;T(t>P2J#Dnw$Ue?W$yF5`vSnaaYJ+JU4ZutZ4Lh6|UD;<wS&N2Rc{{PuI
zJNMM@*~fT*F=x60f3H%G$im2$vrIXn3tScIPuZxQ=S;cvYl6eJ)5kp)+)9_b=W|!E
ztJ<n)O3?e_ZYyW42ot{}YvycdI1o6W`}P)AS?i1_K4bPog^PQI@3<>Uvc<mJbw%2}
zyKJX<uu_*Wlc{uS5?6lh$sO(sE?)cHU*hK5UbW3~LR00oP21`ssx>r=3w#pHQ}s5m
zc!%lm{s@VvIJv0n>;5TeF03I77+jf_xG#`75^y|Rr13GMSMIuux=m}3$(vX@zRjvw
zyY=D*-nEyD4`+Nk!<&9(=At|Si$BLrSKf|Ypy$>oa%Ix~;tlMUUmpaXX6ih*=+P#r
znOeeAi+}4o%nMED^wN5kypb<eRJ*wOl(ykMuB$BXR0@{u`?96MZ2!WU`X#!oXLS}?
zuguV8J*&4Uifff!_M52xEU49ju4qORI}-!LZWaaxBg`m;7W+P#dC+#H3P$sCYViI1
zO93MP{4_<lix1ohF%dSGF)({oV=UpTGHb?!T+TH9>fH{9cTI^dzODPq{^z4^HM<||
zKinnH=iW?dX34xXPyYLy_chPw+`eC5_m8Q7;kcXBZ8M`A=N4r>mMqh`qHDr2Gko5c
zMb}$G_7*xBSRcHgx~9zbrCeOk?{&X4e%V!f-Az#VBfQWi*MCl0$+Om$I~xNQuCAKc
zabxCxg{7;l_Hj5*=a0VY&^u?Mkn{agwtde_nm$IHSh#M&lhEatZnqqYsCl_NXXT3*
z!B1ExZHRk6uiN|K52wC4aZ7A0v&{WCBlY)id2*Uw(n){Bq1`ygY4Zke4Ygmq_m&rT
zozdLAW3FV*vkBZ!5*5~;*f;n3`AfxIPeM*uTW-0a(h#s(zs)M+b*}XM`NFII3;S-U
zKG-U?<Gt<PAH6eYOv}^Y=Z<U&trs@?7VXPjUHN8l#zNN{(aUV#y}RGg_v`sP1*JU^
zb)Q<+9zA<kV_HsgwM2JD!7}}VRKcdmhx@ge|3(;m;i%v&zflzN$!)^Dzl@Kiwb^q1
zrEQsf>cqx*{!9_;nAD6N4cW9d`Z(O0lbZ1&P4lblv+L0YvklL<2~9e1<d)RCMu`Gh
z{-g)(8qV>3?$_VU+IaSTZkl!Q8I4Arj1y-)A9+tZwq)(hC!a*-e=(hJGH-#<a@*d=
z+Y6?rN^07zOgy^9a&g;l9r?{JioeBCla<>xj_<b23=F3@v5r4tCM)o?5{|~k>!8a4
z0{{G?IhHVbu^zEbN?oA4g;j7vg=VN;kVb^-#rTCsq%=4uvwak;VYttnUJ@{~@Xzy}
z{zuLh?>xCgyk{RWcsze|b=mit^zzNSE%*Pc`OT-$<|=r$!{m~X=*6ZEk4=vHZ3W&3
zHZ<IhG1hWAFr&em>6mLpp?^!qZzeeo4}&YS^RGB5oVqagQg4h(`ia$*s*}#Hp15S+
zM#b#anxVmBDT^0K6mF6WH@&*&xWU!eIsewTs>bPV?EbE6STX6>szVnJv3`!0*6#Wk
z5_fX)k@=T47u9*1h3wPkvb_`Aw(UrEu%TM$;)JwqVO<k=F7+(abTiTy=i-X&wfH4r
z!PoxiOB%m!zo&1d3XkuuX@;qXee!3mTF2>OwB^>GT2W2kLsd6tM@_zeYT}+XPn9Gu
z2W*Ym6LabLn@UNupAV+>%J|1f%GFxl-K4(U%gI|j_3FO8xx(TfCNT26PXF|9?&0pp
zqYtaZpL+M`H}f<`oipr^tUY}~YqHk*8GS~x8RyAy*3Fsxz<G6pyO5fpYC_Z@9y#L@
z%hcw?GZ!bO-`ce-Kl}I;hji7cE#5oM$j)ATt8`DoHl`YPRoD7n)70xcrNJ+!?KV9>
z-Q@6xmA6(+oAE|k`1YoSGQBG&C4G2O>-6+pjB9B30kcI`lQ%~$4dCc;Tj|xFmC<9j
z+jzT=sr<~z=0C4kC@tVSr>W-b|8j!I`Zb!7#s70wdK_TtU8THmuYKO0IgZxT&drRP
zUA}EuV7T_J_rW~#UWvw?-DC0fU-+v->W;4Bf1gIbusu*?XLq5a`0&x^(q~_r-SWMB
z&o_jt&F-)$Piy(Ze;(E1w=#2|S#9gE`o<DB&5`}2m52K`i~jD0qZd<@BoB-3I_vhg
zfvuuLt!e&)Urhc#v|q*iX#Dl_-~12n9V7TAE>k>Fc5%mM_QILB-yYm8!!_CC)%|d@
zw=z2(J#Tnp8FiC;%jQ&;8``JmE~vfOXw4bpZoc#2PU#~%HC|{i?cm(rzxaz-tWR*p
zpCt)L&+sL$d)|K2iE~E*gWiJv2Q!Ycu3cd%?sVf+*8Wz*25uL14d%079=%dI8M^4Y
zjZ#&|q3aInNfM0pSw{ub_fCHi`@ubmM_5igt#g^pqj=O@nR%}kynDulfq_Aufq@~t
zC^01`RW~^&Q#Uy$vADQ^oErd)0x)j?c#3lFKO>U}GZzB`2M2@Jrm5}>3=9mgQ|~!J
z955DOc<Tsa__~HT>U#RQ=?8eDYP&vlMiA)OqEH3~1`&`-C|=U2$-uw>*XHZ!=jrAe
z9HQszhHL`pZY7Wgn8hq04j2nC{B@iKt@!Zi;0CJ(5g;A=pgJHSh~ijutAs&%z!;?Q
zC8|c~1q>JlWHhCuEoNX~xC*_m0j!^afniG{=)wdn_k+Q1g;FtubkWega`ay7H4!ET
zh5{A_22k`P3<O06J_FHPbpb{B6_t*qB^jxCC7H>HgPqZBeEue)cohQ!!)*r0rJV>H
z%UDp>8NkB86Xpl(K44i{bvK5IfgzBEfk78$7&u50Sh1St4o=;n#i>Oe`MIe+`T5zU
z1=x+2c@%s5C@TZQFD}T0FwFEVjqDs)jdra_&M7U<EK7CE%t>`jPEIW@_D#%7OiwMs
zZowJeI5*G<+CSOQ-7d?A)q-%ywQtV(d1;yHrA3J)@H^hngE_EPHuEV91A`<N1A_s~
z;oxBTDS*{z^!W5h%uC6^<?yAI2D`#QnT8coV!$le(x@Yb)pVcybey4ZuIP1_C_4i~
z9v=gP8q6fHQ};_?H4Np_JM1BAac|pQX<-J2H<}C#3MghuC}1_y6LL{3cGK=2@aa6l
z!oV<<6Fr^^jj@{Ml%HP$x^|<$H!(9WxFoTt1XoCUT-&a4nTvrzS%QH<1I3{Y7Fdmj
zCIQ!q)a25Vd>jd@MQX|FO<W8NP7(|ZS}3N@bjE5bBvU}m200pgK%n2a3p094<77Xq
z=0bxNSMh^>!y&@F;!v#Sp+vn3j{6SL^8)(qc?er-!?D_ezr;bmGzVea)p&U22`#;_
zR-nl415IwBUwVTuFfD-r=M^~Urhs&TvLuKGT^j?U85ls7OI9L^FF^~q&^1p$yF><}
z8C0>%#MBI1CV<pjM;m-aHy-_B5QOn6DHv`;G8WRS#FlzM;f8(%2f|c=R3c2pb0G)1
z`_ZpDK-eUdL4-|^*+|GjA@rj~(XB#1X&zzKzbt%K;W&35-FWmP(-Fo`$-!qlG$!yJ
zppI^p4BFYN2&=Z05MdQ0&<F(=`tg+rOKy}CVF~`jEYW?0e%c?xGO=oWmO;FNS)_sz
zG5T3k2s7U_;4>2%ArPY>9HhxTbeE$aYJ{-mWfO)iVAGIi)3Lj}8|^>^gqa7sFwG?X
zcm;H~8=-CTN7ys92g4rB!o!)EE&u3twxVs-M%d}xPuNcE;f21Y7GdVQg@nz-vceYK
z(dY{p5f(gJLWBj7qy#CYu=@ag`53~2qsuWYfYlPj4E&<|1bw*&!lJr07#3k9SBOWj
zdjWk49bt0IdJK~>6BEQ}>=Wwfo<N_vL|9e637=I2>QZO489Rir*Ei!c7931PDtPAa
z&^>@Yu8*)udIyG0==}$zup{6h^s#1yHScy}Sc7CN)I$&oPuYh)=!h`ibq^8d;~s)U
m53fwLaTJ6_xA(&vhPXym0=!w-KpG4g3>gFj85rUYfOr7Y2S`i+

literal 0
HcmV?d00001

diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000..bd9ee87db3
--- /dev/null
+++ b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Sat Nov 18 15:06:47 CET 2017
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-all.zip
diff --git a/tensorflow/examples/android/gradlew b/tensorflow/examples/android/gradlew
new file mode 100644
index 0000000000..9d82f78915
--- /dev/null
+++ b/tensorflow/examples/android/gradlew
@@ -0,0 +1,160 @@
+#!/usr/bin/env bash
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS=""
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn ( ) {
+    echo "$*"
+}
+
+die ( ) {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MINGW* )
+    msys=true
+    ;;
+esac
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=$((i+1))
+    done
+    case $i in
+        (0) set -- ;;
+        (1) set -- "$args0" ;;
+        (2) set -- "$args0" "$args1" ;;
+        (3) set -- "$args0" "$args1" "$args2" ;;
+        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
+function splitJvmOpts() {
+    JVM_OPTS=("$@")
+}
+eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
+JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
+
+exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
diff --git a/tensorflow/examples/android/gradlew.bat b/tensorflow/examples/android/gradlew.bat
new file mode 100644
index 0000000000..8a0b282aa6
--- /dev/null
+++ b/tensorflow/examples/android/gradlew.bat
@@ -0,0 +1,90 @@
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS=
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto init
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto init
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:init
+@rem Get command-line arguments, handling Windowz variants
+
+if not "%OS%" == "Windows_NT" goto win9xME_args
+if "%@eval[2+2]" == "4" goto 4NT_args
+
+:win9xME_args
+@rem Slurp the command line arguments.
+set CMD_LINE_ARGS=
+set _SKIP=2
+
+:win9xME_args_slurp
+if "x%~1" == "x" goto execute
+
+set CMD_LINE_ARGS=%*
+goto execute
+
+:4NT_args
+@rem Get arguments from the 4NT Shell from JP Software
+set CMD_LINE_ARGS=%$
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
index 4e45f42d0c..8bd4abb154 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
@@ -333,8 +333,12 @@ public abstract class CameraActivity extends Activity
           continue;
         }
 
-        useCamera2API = isHardwareLevelSupported(characteristics,
-            CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
+        // Fallback to camera1 API for internal cameras that don't have full support.
+        // This should help with legacy situations where using the camera2 API causes
+        // distorted or otherwise broken previews.
+        useCamera2API = (facing == CameraCharacteristics.LENS_FACING_EXTERNAL)
+            || isHardwareLevelSupported(characteristics, 
+                                        CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
         LOGGER.i("Camera API lv2?: %s", useCamera2API);
         return cameraId;
       }
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
index a9ed02dd1a..9db8835d92 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
@@ -45,9 +45,7 @@ TRAIN_FILE = 'train.tfrecords'
 VALIDATION_FILE = 'validation.tfrecords'
 
 
-def read_and_decode(filename_queue):
-  reader = tf.TFRecordReader()
-  _, serialized_example = reader.read(filename_queue)
+def decode(serialized_example):
   features = tf.parse_single_example(
       serialized_example,
       # Defaults are not specified since both keys are required.
@@ -60,22 +58,26 @@ def read_and_decode(filename_queue):
   # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
   # [mnist.IMAGE_PIXELS].
   image = tf.decode_raw(features['image_raw'], tf.uint8)
-  image.set_shape([mnist.IMAGE_PIXELS])
+  image.set_shape((mnist.IMAGE_PIXELS))
 
+  # Convert label from a scalar uint8 tensor to an int32 scalar.
+  label = tf.cast(features['label'], tf.int32)
+  
+  return image, label
+
+def augment(image, label):
   # OPTIONAL: Could reshape into a 28x28 image and apply distortions
   # here.  Since we are not applying any distortions in this
   # example, and the next step expects the image to be flattened
   # into a vector, we don't bother.
+  return image, label
 
+def normalize(image, label):
   # Convert from [0, 255] -> [-0.5, 0.5] floats.
   image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
 
-  # Convert label from a scalar uint8 tensor to an int32 scalar.
-  label = tf.cast(features['label'], tf.int32)
-
   return image, label
 
-
 def inputs(train, batch_size, num_epochs):
   """Reads input data num_epochs times.
 
@@ -91,31 +93,32 @@ def inputs(train, batch_size, num_epochs):
       in the range [-0.5, 0.5].
     * labels is an int32 tensor with shape [batch_size] with the true label,
       a number in the range [0, mnist.NUM_CLASSES).
-    Note that an tf.train.QueueRunner is added to the graph, which
-    must be run using e.g. tf.train.start_queue_runners().
+
+    This function creates a one_shot_iterator, meaning that it will only iterate
+    over the dataset once. On the other hand there is no special initialization
+    required.
   """
   if not num_epochs: num_epochs = None
   filename = os.path.join(FLAGS.train_dir,
                           TRAIN_FILE if train else VALIDATION_FILE)
 
   with tf.name_scope('input'):
-    filename_queue = tf.train.string_input_producer(
-        [filename], num_epochs=num_epochs)
+    # TFRecordDataset opens a protobuf and reads entries line by line
+    # could also be [list, of, filenames]
+    dataset = tf.data.TFRecordDataset(filename)
+    dataset = dataset.repeat(num_epochs)
 
-    # Even when reading in multiple threads, share the filename
-    # queue.
-    image, label = read_and_decode(filename_queue)
+    # map takes a python function and applies it to every sample
+    dataset = dataset.map(decode)
+    dataset = dataset.map(augment)
+    dataset = dataset.map(normalize)
 
-    # Shuffle the examples and collect them into batch_size batches.
-    # (Internally uses a RandomShuffleQueue.)
-    # We run this in two threads to avoid being a bottleneck.
-    images, sparse_labels = tf.train.shuffle_batch(
-        [image, label], batch_size=batch_size, num_threads=2,
-        capacity=1000 + 3 * batch_size,
-        # Ensures a minimum amount of shuffling of examples.
-        min_after_dequeue=1000)
+    #the parameter is the queue size
+    dataset = dataset.shuffle(1000 + 3 * batch_size)
+    dataset = dataset.batch(batch_size)
 
-    return images, sparse_labels
+    iterator = dataset.make_one_shot_iterator()
+  return iterator.get_next()
 
 
 def run_training():
@@ -124,16 +127,16 @@ def run_training():
   # Tell TensorFlow that the model will be built into the default Graph.
   with tf.Graph().as_default():
     # Input images and labels.
-    images, labels = inputs(train=True, batch_size=FLAGS.batch_size,
-                            num_epochs=FLAGS.num_epochs)
+    image_batch, label_batch = inputs(train=True, batch_size=FLAGS.batch_size,
+                               num_epochs=FLAGS.num_epochs)
 
     # Build a Graph that computes predictions from the inference model.
-    logits = mnist.inference(images,
+    logits = mnist.inference(image_batch,
                              FLAGS.hidden1,
                              FLAGS.hidden2)
 
     # Add to the Graph the loss calculation.
-    loss = mnist.loss(logits, labels)
+    loss = mnist.loss(logits, label_batch)
 
     # Add to the Graph operations that train the model.
     train_op = mnist.training(loss, FLAGS.learning_rate)
@@ -143,47 +146,33 @@ def run_training():
                        tf.local_variables_initializer())
 
     # Create a session for running operations in the Graph.
-    sess = tf.Session()
-
-    # Initialize the variables (the trained variables and the
-    # epoch counter).
-    sess.run(init_op)
-
-    # Start input enqueue threads.
-    coord = tf.train.Coordinator()
-    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
-
-    try:
-      step = 0
-      while not coord.should_stop():
-        start_time = time.time()
-
-        # Run one step of the model.  The return values are
-        # the activations from the `train_op` (which is
-        # discarded) and the `loss` op.  To inspect the values
-        # of your ops or variables, you may include them in
-        # the list passed to sess.run() and the value tensors
-        # will be returned in the tuple from the call.
-        _, loss_value = sess.run([train_op, loss])
-
-        duration = time.time() - start_time
-
-        # Print an overview fairly often.
-        if step % 100 == 0:
-          print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
+    with tf.Session() as sess:
+      # Initialize the variables (the trained variables and the
+      # epoch counter).
+      sess.run(init_op)
+      try:
+        step = 0
+        while True: #train until OutOfRangeError
+          start_time = time.time()
+
+          # Run one step of the model.  The return values are
+          # the activations from the `train_op` (which is
+          # discarded) and the `loss` op.  To inspect the values
+          # of your ops or variables, you may include them in
+          # the list passed to sess.run() and the value tensors
+          # will be returned in the tuple from the call.
+          _, loss_value = sess.run([train_op, loss])
+
+          duration = time.time() - start_time
+
+          # Print an overview fairly often.
+          if step % 100 == 0:
+            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
                                                      duration))
-        step += 1
-    except tf.errors.OutOfRangeError:
-      print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
-    finally:
-      # When done, ask the threads to stop.
-      coord.request_stop()
-
-    # Wait for threads to finish.
-    coord.join(threads)
-    sess.close()
-
-
+          step += 1
+      except tf.errors.OutOfRangeError:
+        print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
+      
 def main(_):
   run_training()
 
diff --git a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
index 1e375ed48e..4a429837b7 100644
--- a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
+++ b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
@@ -53,7 +53,8 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
   //  - Scales, clamps, and converts that spectrogram to 0 to 255 uint8's.
   //  - Reshapes the tensor so that it's [height, width, 1] for imaging.
   //  - Encodes it as a PNG stream and saves it out to a file.
-  Output file_reader = ReadFile(root.WithOpName("input_wav"), input_wav);
+  Output file_reader =
+      tensorflow::ops::ReadFile(root.WithOpName("input_wav"), input_wav);
   DecodeWav wav_decoder =
       DecodeWav(root.WithOpName("wav_decoder"), file_reader);
   Output spectrogram = AudioSpectrogram(root.WithOpName("spectrogram"),
@@ -71,8 +72,8 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
   Output squeeze = Squeeze(root.WithOpName("squeeze"), expand_dims,
                            Squeeze::Attrs().Axis({0}));
   Output png_encoder = EncodePng(root.WithOpName("png_encoder"), squeeze);
-  WriteFile file_writer =
-      WriteFile(root.WithOpName("output_image"), output_image, png_encoder);
+  tensorflow::ops::WriteFile file_writer = tensorflow::ops::WriteFile(
+      root.WithOpName("output_image"), output_image, png_encoder);
   tensorflow::GraphDef graph;
   TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
 
diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go
index f200a8e00a..fc087d9d99 100644
--- a/tensorflow/go/graph.go
+++ b/tensorflow/go/graph.go
@@ -28,7 +28,8 @@ package tensorflow
 //                                 int num_shapes) {
 //  const int64_t** dims =
 //    (const int64_t**)malloc(sizeof(const int64_t*) * num_shapes);
-//  for (int i = 0; i < num_shapes; i++) {
+//  int i = 0;
+//  for (i = 0; i < num_shapes; i++) {
 //    dims[i] = flat_dims;
 //    if (num_dims[i] > 0) {
 //      // flat_dims will be NULL iff num_shapes is 0 or all elements in num_dims are <= 0.
@@ -132,6 +133,20 @@ func (g *Graph) Operation(name string) *Operation {
 	return &Operation{cop, g}
 }
 
+// Operations returns a list of all operations in the graph
+func (g *Graph) Operations() []Operation {
+	var pos C.size_t = 0
+	ops := []Operation{}
+	for {
+		cop := C.TF_GraphNextOperation(g.c, &pos)
+		if cop == nil {
+			break
+		}
+		ops = append(ops, Operation{cop, g})
+	}
+	return ops
+}
+
 // OpSpec is the specification of an Operation to be added to a Graph
 // (using Graph.AddOperation).
 type OpSpec struct {
diff --git a/tensorflow/go/graph_test.go b/tensorflow/go/graph_test.go
index c3120bc720..b8d65c54f6 100644
--- a/tensorflow/go/graph_test.go
+++ b/tensorflow/go/graph_test.go
@@ -29,10 +29,26 @@ func hasOperations(g *Graph, ops ...string) error {
 			missing = append(missing, op)
 		}
 	}
-	if len(missing) == 0 {
-		return nil
+	if len(missing) != 0 {
+		return fmt.Errorf("Graph does not have the operations %v", missing)
 	}
-	return fmt.Errorf("Graph does not have the operations %v", missing)
+
+	inList := map[string]bool{}
+	for _, op := range g.Operations() {
+		inList[op.Name()] = true
+	}
+
+	for _, op := range ops {
+		if !inList[op] {
+			missing = append(missing, op)
+		}
+	}
+
+	if len(missing) != 0 {
+		return fmt.Errorf("Operations %v are missing from graph.Operations()", missing)
+	}
+
+	return nil
 }
 
 func TestGraphWriteToAndImport(t *testing.T) {
diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index 017bef99ce..1481a4d035 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -126,6 +126,12 @@ _REGISTERED_EXPANSIONS = [
      lambda feed: [feed])]
 # pylint: enable=g-long-lambda
 
+
+def _convert_to_numpy_obj(numpy_dtype, obj):
+  """Explicitly convert obj based on numpy type except for string type."""
+  return numpy_dtype(obj) if numpy_dtype is not object else str(obj)
+
+
 def register_session_run_conversion_functions(tensor_type, fetch_function,
     feed_function=None, feed_function_for_partial_run=None):
   """Register fetch and feed conversion functions for `tf.Session.run()`.
@@ -1072,12 +1078,14 @@ class BaseSession(SessionInterface):
                             'strings, lists, numpy ndarrays, or TensorHandles.')
 
           subfeed_dtype = subfeed_t.dtype.as_numpy_dtype
-          if isinstance(subfeed_val,
-                        int) and subfeed_dtype(subfeed_val) != subfeed_val:
+          if isinstance(subfeed_val, int) and _convert_to_numpy_obj(
+              subfeed_dtype, subfeed_val) != subfeed_val:
             raise TypeError(
-                'Type of feed value ' + str(subfeed_val) + ' is not'
-                ' compatible with Tensor type ' + str(subfeed_dtype) + '.'
-                ' Try explicitly setting the type of the feed tensor'
+                'Type of feed value ' + str(subfeed_val) + ' with type ' +
+                str(type(subfeed_val)) +
+                ' is not compatible with Tensor type ' +
+                str(subfeed_dtype) +
+                '. Try explicitly setting the type of the feed tensor'
                 ' to a larger type (e.g. int64).')
 
           is_tensor_handle_feed = isinstance(subfeed_val,
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index 3da03a7b0f..a563f5ef4a 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -1737,6 +1737,12 @@ class SessionTest(test_util.TensorFlowTestCase):
     server = server_lib.Server.create_local_server()
     self.runTestAddFunctionToSession(server.target)
 
+  def testAutoConvertAndCheckData(self):
+    with self.test_session() as sess:
+      a = array_ops.placeholder(dtype=dtypes.string)
+      with self.assertRaisesRegexp(
+          TypeError, 'Type of feed value 1 with type <(\w+) \'int\'> is not'):
+        sess.run(a, feed_dict={a: 1})
 
 class GraphMutationTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 2315ad4653..789771508e 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -535,6 +535,7 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "no_windows",
+        "nomac",
         "oss_serial",
     ],
     deps = [
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index d72b95dbdd..285671f99f 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -626,7 +626,7 @@ class _TrainingExecutorTrainingTest(object):
 
     self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
                                               mock_eval_spec))
-    mock_est.train.assert_called()
+    self.assertTrue(mock_est.train.called)
     mock_server.assert_not_called()
 
   def test_fail_with_empty_task_type(self):
@@ -836,7 +836,7 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     executor.run_master()
 
     mock_server.assert_not_called()
-    mock_est.train.assert_called()
+    self.assertTrue(mock_est.train.called)
 
   def test_fail_with_empty_task_type(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py
index 12f2592d84..b31486dfa1 100644
--- a/tensorflow/python/estimator/util.py
+++ b/tensorflow/python/estimator/util.py
@@ -52,7 +52,7 @@ def fn_args(fn):
   else:
     if _is_callable_object(fn):
       fn = fn.__call__
-    args = tf_inspect.getargspec(fn).args
+    args = tf_inspect.getfullargspec(fn).args
     if _is_bounded_method(fn):
       args.remove('self')
   return tuple(args)
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 366025a0d8..e06899f81d 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -82,8 +82,8 @@ class Defun(object):
     return x + y, x - y
 
   # Building the graph.
-  a = tf.Constant([1.0])
-  b = tf.Constant([2.0])
+  a = tf.constant([1.0])
+  b = tf.constant([2.0])
   c, d = MyFunc(a, b, name='mycall')
   ```
   """
diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
index c66b4b395e..2e73cefb6c 100644
--- a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
+++ b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
@@ -211,7 +211,7 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
+          'imagenet' (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
index 4d5ac72604..5f97c138fc 100644
--- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
+++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
@@ -350,7 +350,7 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
+          'imagenet' (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of
           `layers.Input()`)
@@ -536,6 +536,8 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/applications/resnet50.py b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
index f7cdf2be99..8ab46693aa 100644
--- a/tensorflow/python/keras/_impl/keras/applications/resnet50.py
+++ b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
@@ -164,7 +164,7 @@ def ResNet50(include_top=True,
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
+          'imagenet' (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg16.py b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
index ab205aa689..38dbbdc809 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg16.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
@@ -70,8 +70,8 @@ def VGG16(include_top=True,
   Arguments:
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
-     weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg19.py b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
index 5e5179f332..126c64260b 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg19.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
@@ -71,8 +71,8 @@ def VGG19(include_top=True,
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
       weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
-          or the path to the weights file to be loaded.
+         'imagenet' (pre-training on ImageNet),
+         or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
diff --git a/tensorflow/python/keras/_impl/keras/applications/xception.py b/tensorflow/python/keras/_impl/keras/applications/xception.py
index a9efd5d64c..8219831408 100644
--- a/tensorflow/python/keras/_impl/keras/applications/xception.py
+++ b/tensorflow/python/keras/_impl/keras/applications/xception.py
@@ -83,7 +83,7 @@ def Xception(include_top=True,
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
+          'imagenet' (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
@@ -303,6 +303,8 @@ def Xception(include_top=True,
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/layers/core.py b/tensorflow/python/keras/_impl/keras/layers/core.py
index 712db33c69..6a745844b2 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core.py
@@ -104,13 +104,13 @@ class Dropout(tf_core_layers.Dropout, Layer):
   """
 
   def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
-    self.supports_masking = True
     # Inheritance call order:
     # 1) tf.layers.Dropout, 2) keras.layers.Layer, 3) tf.layers.Layer
     super(Dropout, self).__init__(rate=rate,
                                   noise_shape=noise_shape,
                                   seed=seed,
                                   **kwargs)
+    self.supports_masking = True
 
   def call(self, inputs, training=None):
     if training is None:
diff --git a/tensorflow/python/keras/_impl/keras/layers/core_test.py b/tensorflow/python/keras/_impl/keras/layers/core_test.py
index 1fe043561d..bdb99c91c2 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core_test.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core_test.py
@@ -47,6 +47,11 @@ class CoreLayersTest(test.TestCase):
                   'noise_shape': [3, 1]},
           input_shape=(3, 2))
 
+    # https://github.com/tensorflow/tensorflow/issues/14819
+    with self.test_session():
+      dropout = keras.layers.Dropout(0.5)
+      self.assertEqual(True, dropout.supports_masking)
+
     with self.test_session():
       testing_utils.layer_test(
           keras.layers.SpatialDropout1D,
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 1d8ca99c07..31d3bd1b74 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2084,6 +2084,10 @@ cuda_py_test(
         "//tensorflow/python:framework_for_generated_wrappers",
     ],
     shard_count = 2,
+    tags = [
+        "no_gpu",
+        "no_oss",
+    ],
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/kernel_tests/summary_image_op_test.py b/tensorflow/python/kernel_tests/summary_image_op_test.py
index d2152ab560..4718827e88 100644
--- a/tensorflow/python/kernel_tests/summary_image_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_image_op_test.py
@@ -50,7 +50,6 @@ class SummaryImageOpTest(test.TestCase):
     self.assertProtoEquals(expected, image_summ)
 
   def testImageSummary(self):
-    np.random.seed(7)
     for depth in (1, 3, 4):
       for positive in False, True:
         with self.test_session(graph=ops.Graph()) as sess:
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index b9c89d62d5..21561f3689 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1168,7 +1168,7 @@ def random_hue(image, max_delta, seed=None):
       set_random_seed for its interaction with the graph-level random seed.
 
   Returns:
-    3-D float tensor of shape `[height, width, channels]`.
+    Adjusted image(s), same shape and DType as `image`.
 
   Raises:
     ValueError: if `max_delta` is invalid.
@@ -1275,30 +1275,9 @@ def adjust_saturation(image, saturation_factor, name=None):
     orig_dtype = image.dtype
     flt_image = convert_image_dtype(image, dtypes.float32)
 
-    # TODO(zhengxq): we will switch to the fused version after we add a GPU
-    # kernel for that.
-    fused = os.environ.get('TF_ADJUST_SATURATION_FUSED', '')
-    fused = fused.lower() in ('true', 't', '1')
-
-    if fused:
-      return convert_image_dtype(
-          gen_image_ops.adjust_saturation(flt_image, saturation_factor),
-          orig_dtype)
-
-    hsv = gen_image_ops.rgb_to_hsv(flt_image)
-
-    hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
-    saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
-    value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])
-
-    saturation *= saturation_factor
-    saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0)
-
-    hsv_altered = array_ops.concat([hue, saturation, value], 2)
-    rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)
-
-    return convert_image_dtype(rgb_altered, orig_dtype)
-
+    return convert_image_dtype(
+        gen_image_ops.adjust_saturation(flt_image, saturation_factor),
+        orig_dtype)
 
 def decode_image(contents, channels=None, name=None):
   """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`,
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index d1554b399f..4af9bd2a00 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -281,6 +281,21 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testBatchAdjustHue(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    delta = 0.25
+    y_data = [13, 0, 11, 226, 54, 221, 234, 8, 92, 1, 217, 255]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.adjust_hue(x, delta)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def _adjustHueNp(self, x_np, delta_h):
     self.assertEqual(x_np.shape[-1], 3)
     x_v = x_np.reshape([-1, 3])
@@ -359,6 +374,89 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
       self._adjustHueTf(x_np, delta_h)
 
 
+class FlipImageBenchmark(test.Benchmark):
+
+  def _benchmarkFlipLeftRight(self, device, cpu_count):
+    image_shape = [299, 299, 3]
+    warmup_rounds = 100
+    benchmark_rounds = 1000
+    config = config_pb2.ConfigProto()
+    if cpu_count is not None:
+      config.inter_op_parallelism_threads = 1
+      config.intra_op_parallelism_threads = cpu_count
+    with session.Session("", graph=ops.Graph(), config=config) as sess:
+      with ops.device(device):
+        inputs = variables.Variable(
+            random_ops.random_uniform(
+                image_shape, dtype=dtypes.float32) * 255,
+            trainable=False,
+            dtype=dtypes.float32)
+        run_op = image_ops.flip_left_right(inputs)
+        sess.run(variables.global_variables_initializer())
+        for i in xrange(warmup_rounds + benchmark_rounds):
+          if i == warmup_rounds:
+            start = time.time()
+          sess.run(run_op)
+    end = time.time()
+    step_time = (end - start) / benchmark_rounds
+    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+    print("benchmarkFlipLeftRight_299_299_3_%s step_time: %.2f us" %
+          (tag, step_time * 1e6))
+    self.report_benchmark(
+        name="benchmarkFlipLeftRight_299_299_3_%s" % (tag),
+        iters=benchmark_rounds,
+        wall_time=step_time)
+
+  def _benchmarkRandomFlipLeftRight(self, device, cpu_count):
+    image_shape = [299, 299, 3]
+    warmup_rounds = 100
+    benchmark_rounds = 1000
+    config = config_pb2.ConfigProto()
+    if cpu_count is not None:
+      config.inter_op_parallelism_threads = 1
+      config.intra_op_parallelism_threads = cpu_count
+    with session.Session("", graph=ops.Graph(), config=config) as sess:
+      with ops.device(device):
+        inputs = variables.Variable(
+            random_ops.random_uniform(
+                image_shape, dtype=dtypes.float32) * 255,
+            trainable=False,
+            dtype=dtypes.float32)
+        run_op = image_ops.random_flip_left_right(inputs)
+        sess.run(variables.global_variables_initializer())
+        for i in xrange(warmup_rounds + benchmark_rounds):
+          if i == warmup_rounds:
+            start = time.time()
+          sess.run(run_op)
+    end = time.time()
+    step_time = (end - start) / benchmark_rounds
+    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+    print("benchmarkRandomFlipLeftRight_299_299_3_%s step_time: %.2f us" %
+          (tag, step_time * 1e6))
+    self.report_benchmark(
+        name="benchmarkRandomFlipLeftRight_299_299_3_%s" % (tag),
+        iters=benchmark_rounds,
+        wall_time=step_time)
+
+  def benchmarkFlipLeftRightCpu1(self):
+    self._benchmarkFlipLeftRight("/cpu:0", 1)
+
+  def benchmarkFlipLeftRightCpuAll(self):
+    self._benchmarkFlipLeftRight("/cpu:0", None)
+
+  def benchmarkFlipLeftRightGpu(self):
+    self._benchmarkFlipLeftRight(test.gpu_device_name(), None)
+
+  def benchmarkRandomFlipLeftRightCpu1(self):
+    self._benchmarkRandomFlipLeftRight("/cpu:0", 1)
+
+  def benchmarkRandomFlipLeftRightCpuAll(self):
+    self._benchmarkRandomFlipLeftRight("/cpu:0", None)
+
+  def benchmarkRandomFlipLeftRightGpu(self):
+    self._benchmarkRandomFlipLeftRight(test.gpu_device_name(), None)
+
+
 class AdjustHueBenchmark(test.Benchmark):
 
   def _benchmarkAdjustHue(self, device, cpu_count):
@@ -632,6 +730,21 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testBatchSaturation(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    saturation_factor = 0.5
+    y_data = [6, 9, 13, 140, 180, 226, 135, 121, 234, 172, 255, 128]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.adjust_saturation(x, saturation_factor)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def _adjust_saturation(self, image, saturation_factor):
     image = ops.convert_to_tensor(image, name="image")
     orig_dtype = image.dtype
diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py
index 08e3f83a0b..51ab2aec22 100644
--- a/tensorflow/python/ops/logging_ops.py
+++ b/tensorflow/python/ops/logging_ops.py
@@ -39,8 +39,8 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
           name=None):
   """Prints a list of tensors.
 
-  This is an identity op with the side effect of printing `data` when
-  evaluating.
+  This is an identity op (behaves like `tf.identity`) with the side effect
+  of printing `data` when evaluating.
 
   Note: This op prints to the standard error. It is not currently compatible
     with jupyter notebook (printing to the notebook *server's* output, not into
@@ -57,7 +57,7 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
     name: A name for the operation (optional).
 
   Returns:
-    Same tensor as `input_`.
+    A `Tensor`. Has the same type and contents as `input_`.
   """
   return gen_logging_ops._print(input_, data, message, first_n, summarize, name)
 
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 19a86df6a9..fd96f7b8fc 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import candidate_sampling_ops
 from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
@@ -981,10 +982,11 @@ def _compute_sampled_logits(weights,
         Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
     name: A name for the operation (optional).
   Returns:
-    out_logits, out_labels: `Tensor` objects each with shape
+    out_logits: `Tensor` object with shape
         `[batch_size, num_true + num_sampled]`, for passing to either
         `nn.sigmoid_cross_entropy_with_logits` (NCE) or
         `nn.softmax_cross_entropy_with_logits` (sampled softmax).
+    out_labels: A Tensor object with the same shape as `out_logits`.
   """
 
   if isinstance(weights, variables.PartitionedVariable):
@@ -1095,15 +1097,16 @@ def _compute_sampled_logits(weights,
 
     # Construct output logits and labels. The true labels/logits start at col 0.
     out_logits = array_ops.concat([true_logits, sampled_logits], 1)
-    # true_logits is a float tensor, ones_like(true_logits) is a float tensor
-    # of ones. We then divide by num_true to ensure the per-example labels sum
-    # to 1.0, i.e. form a proper probability distribution.
+
+    # true_logits is a float tensor, ones_like(true_logits) is a float
+    # tensor of ones. We then divide by num_true to ensure the per-example
+    # labels sum to 1.0, i.e. form a proper probability distribution.
     out_labels = array_ops.concat([
         array_ops.ones_like(true_logits) / num_true,
         array_ops.zeros_like(sampled_logits)
     ], 1)
 
-  return out_logits, out_labels
+    return out_logits, out_labels
 
 
 def nce_loss(weights,
diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/python/ops/quantized_conv_ops_test.py
index 5ea47ea40e..5e9e710027 100644
--- a/tensorflow/python/ops/quantized_conv_ops_test.py
+++ b/tensorflow/python/ops/quantized_conv_ops_test.py
@@ -93,7 +93,7 @@ class Conv2DTest(test.TestCase):
     quantized_range = ((quantized_max - quantized_min) * range_adjust)
     range_scale = (quantized_range / number_of_steps)
     lowest_quantized = -(1 << (number_of_bits - 1))
-    result = np.array([(quantized_min + ((x - lowest_quantized) * range_scale))
+    result = np.array([(quantized_min + ((float(x) - lowest_quantized) * range_scale))
                        for x in quantized.flatten()])
     return result
 
diff --git a/tensorflow/python/ops/quantized_ops_test.py b/tensorflow/python/ops/quantized_ops_test.py
new file mode 100644
index 0000000000..4bf3b35e13
--- /dev/null
+++ b/tensorflow/python/ops/quantized_ops_test.py
@@ -0,0 +1,57 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for quantized operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class QuantizedOpsTest(test.TestCase):
+
+  def __init__(self, method_name="runTest"):
+    super(QuantizedOpsTest, self).__init__(method_name)
+
+  def testQuantizeOp(self):
+    expected_output = [1, 1, 2, 127, 255, 255]
+    with self.test_session(use_gpu=False) as sess:
+      x = constant_op.constant([1.0, 1.25, 1.75, 127.0, 255.0, 500.0], shape=[6], dtype=dtypes.float32)
+      x_min = 0.0
+      x_max = 255.0
+      op = array_ops.quantize(x, x_min, x_max, dtypes.quint8, mode="MIN_FIRST")
+      value = sess.run(op)
+      self.assertArrayNear(expected_output, value.output, 0.1)
+
+  def testDequantizeOp(self):
+    expected_output = [1.0, 2.0, 4.0, 8.0, 16.0, 255.0]
+    inp = np.array([1, 2, 4, 8, 16, 255]).astype(np.uint8)
+    with self.test_session(use_gpu=False) as sess:
+      x = constant_op.constant(inp, shape=[6], dtype=dtypes.quint8)
+      x_min = 0.0
+      x_max = 255.0
+      op = array_ops.dequantize(x, x_min, x_max, mode="MIN_FIRST")
+      value = sess.run(op)
+      self.assertArrayNear(expected_output, value, 0.1)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/training/learning_rate_decay.py b/tensorflow/python/training/learning_rate_decay.py
index 802b930b0e..f0c28e7b89 100644
--- a/tensorflow/python/training/learning_rate_decay.py
+++ b/tensorflow/python/training/learning_rate_decay.py
@@ -362,7 +362,13 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
   The function returns the decayed learning rate.  It is computed as:
 
   ```python
-  decayed_learning_rate = learning_rate / (1 + decay_rate * t)
+  decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
+  ```
+
+  or, if `staircase` is `True`, as:
+
+  ```python
+  decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
   ```
 
   Example: decay 1/t with a rate of 0.5:
@@ -371,8 +377,9 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
   ...
   global_step = tf.Variable(0, trainable=False)
   learning_rate = 0.1
-  k = 0.5
-  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k)
+  decay_steps = 1.0
+  decay_rate = 0.5
+  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate)
 
   # Passing global_step to minimize() will increment it at each step.
   learning_step = (
diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py
index 9ed125704b..d14e710388 100644
--- a/tensorflow/python/util/tf_inspect.py
+++ b/tensorflow/python/util/tf_inspect.py
@@ -45,6 +45,26 @@ def getargspec(object):  # pylint: disable=redefined-builtin
                if d.decorator_argspec is not None), _inspect.getargspec(target))
 
 
+def getfullargspec(obj):  # pylint: disable=redefined-builtin
+  """TFDecorator-aware replacement for inspect.getfullargspec and fallback to
+  inspect.getargspec in Python 2.
+
+  Args:
+    obj: A callable, possibly decorated.
+
+  Returns:
+    The `FullArgSpec` (`ArgSpec` in Python 2) that describes the signature of
+    the outermost decorator that changes the callable's signature. If the
+    callable is not decorated, `inspect.getfullargspec()`
+    (`inspect.getargspec()` in Python 2) will be called directly on the
+    callable.
+  """
+  spec_fn = getattr(_inspect, 'getfullargspec', getattr(_inspect, 'getargspec'))
+  decorators, target = tf_decorator.unwrap(obj)
+  return next((d.decorator_argspec for d in decorators
+               if d.decorator_argspec is not None), spec_fn(target))
+
+
 def getcallargs(func, *positional, **named):
   """TFDecorator-aware replacement for inspect.getcallargs.
 
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 0d2cd4a9f2..73b96de438 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -1132,7 +1132,7 @@ class DnnSupport {
   //    space in order to speed up the convolution operation.
   //  algorithm: an integer to specify which algorithm should be used for the
   //    operation. kDefaultAlgorithm means the system will pick an algorithm
-  //    by default. The coding of the algorithm is be interpretted by the
+  //    by default. The coding of the algorithm is be interpreted by the
   //    underlying implementation.
   //  output_profile_result: the output profile result for this call. The
   //    profiling is only enabled when this is not nullptr.
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 611d50bc52..9b13a86ed3 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -130,6 +130,13 @@ def if_not_windows(a):
       "//conditions:default": a,
   })
 
+def if_windows(a):
+  return select({
+      clean_dep("//tensorflow:windows"): a,
+      clean_dep("//tensorflow:windows_msvc"): a,
+      "//conditions:default": [],
+  })
+
 def if_linux_x86_64(a):
   return select({
       clean_dep("//tensorflow:linux_x86_64"): a,
@@ -1325,11 +1332,32 @@ def tf_py_wrap_cc(name,
           "//conditions:default": [":" + cc_library_name],
       }))
 
-def py_test(deps=[], **kwargs):
+# This macro is for running python tests against system installed pip package
+# on Windows.
+#
+# py_test is built as an exectuable python zip file on Windows, which contains all
+# dependencies of the target. Because of the C++ extensions, it would be very
+# inefficient if the py_test zips all runfiles, plus we don't need them when running
+# tests against system installed pip package. So we'd like to get rid of the deps
+# of py_test in this case.
+#
+# In order to trigger the tests without bazel clean after getting rid of deps,
+# we introduce the following :
+# 1. When --define=no_tensorflow_py_deps=true, the py_test depends on a marker
+#    file of the pip package, the test gets to rerun when the pip package change.
+#    Note that this only works on Windows. See the definition of
+#    //tensorflow/tools/pip_package:win_pip_package_marker for specific reasons.
+# 2. When --define=no_tensorflow_py_deps=false (by default), it's a normal py_test.
+def py_test(deps=[], data=[], **kwargs):
   native.py_test(
       deps=select({
           "//conditions:default": deps,
-          clean_dep("//tensorflow:no_tensorflow_py_deps"): []
+          clean_dep("//tensorflow:no_tensorflow_py_deps"): [],
+      }),
+      data = data + select({
+          "//conditions:default": [],
+          clean_dep("//tensorflow:no_tensorflow_py_deps"):
+          ["//tensorflow/tools/pip_package:win_pip_package_marker"],
       }),
       **kwargs)
 
@@ -1354,7 +1382,7 @@ def tf_py_test(name,
     additional_deps = additional_deps + tf_additional_xla_deps_py()
   if grpc_enabled:
     additional_deps = additional_deps + tf_additional_grpc_deps_py()
-  native.py_test(
+  py_test(
       name=name,
       size=size,
       srcs=srcs,
@@ -1364,13 +1392,10 @@ def tf_py_test(name,
       visibility=[clean_dep("//tensorflow:internal")],
       shard_count=shard_count,
       data=data,
-      deps=select({
-          "//conditions:default": [
-              clean_dep("//tensorflow/python:extra_py_tests_deps"),
-              clean_dep("//tensorflow/python:gradient_checker"),
+      deps=[
+            clean_dep("//tensorflow/python:extra_py_tests_deps"),
+            clean_dep("//tensorflow/python:gradient_checker"),
           ] + additional_deps,
-          clean_dep("//tensorflow:no_tensorflow_py_deps"): []
-      }),
       flaky=flaky,
       srcs_version="PY2AND3")
 
diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
index 9809ad52de..ecab6f8769 100644
--- a/tensorflow/tools/benchmark/benchmark_model.cc
+++ b/tensorflow/tools/benchmark/benchmark_model.cc
@@ -530,7 +530,7 @@ int Main(int argc, char** argv) {
   }
 
   // Capture overall inference time without stat logging overhead. This is the
-  // timing data that can be compared to other libaries.
+  // timing data that can be compared to other libraries.
   SleepSeconds(inter_benchmark_sleep_seconds);
   int64 no_stat_time_us = 0;
   int64 no_stat_num_runs = 0;
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu b/tensorflow/tools/ci_build/Dockerfile.gpu
index 2d46ccb6b1..7591ecc04e 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu
@@ -1,8 +1,8 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
+FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
 
 LABEL maintainer="Jan Prach <jendap@google.com>"
 
-# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
+# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to
 # /usr/local/cuda
 RUN cp -P /usr/include/cudnn.h /usr/local/cuda/include
 RUN cp -P /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu_clang b/tensorflow/tools/ci_build/Dockerfile.gpu_clang
index 0ecd8c75e0..438a7ec532 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu_clang
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu_clang
@@ -1,8 +1,8 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
+FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
 
 LABEL maintainer="Ilya Biryukov <ibiryukov@google.com>"
 
-# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
+# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to
 # /usr/local/cuda
 RUN cp /usr/include/cudnn.h /usr/local/cuda/include
 RUN cp /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index a37cf226f9..82042b93c0 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -296,19 +296,12 @@ create_activate_virtualenv_and_install_tensorflow() {
     die "FAILED to create virtualenv directory: ${VIRTUALENV_DIR}"
   fi
 
-  if [[ ${PYTHON_BIN_PATH} == *"python3.6"* ]]; then
-    "${PYTHON_BIN_PATH}" -m venv "${VIRTUALENV_FLAGS}" \
-      "${VIRTUALENV_DIR}" || \
-      die "FAILED: Unable to create virtualenv"
-  else
-    # Verify that virtualenv exists
-    if [[ -z $(which virtualenv) ]]; then
-      die "FAILED: virtualenv not available on path"
-    fi
-    virtualenv ${VIRTUALENV_FLAGS} \
-      -p "${PYTHON_BIN_PATH}" "${VIRTUALENV_DIR}" || \
-      die "FAILED: Unable to create virtualenv"
-  fi
+  # Use the virtualenv from the default python version (i.e., python-virtualenv)
+  # to create the virtualenv directory for testing. Use the -p flag to specify
+  # the python version inside the to-be-created virtualenv directory.
+  ${PYTHON_BIN_PATH} -m virtualenv -p "${PYTHON_BIN_PATH}" ${VIRTUALENV_FLAGS} \
+    "${VIRTUALENV_DIR}" || \
+    die "FAILED: Unable to create virtualenv"
 
   source "${VIRTUALENV_DIR}/bin/activate" || \
     die "FAILED: Unable to activate virtualenv in ${VIRTUALENV_DIR}"
@@ -350,7 +343,7 @@ do_clean_virtualenv_smoke_test() {
   then
     echo "Smoke test of tensorflow install in clean virtualenv PASSED."
   else
-    echo "Smoke test of tensroflow install in clean virtualenv FAILED."
+    echo "Smoke test of tensorflow install in clean virtualenv FAILED."
     return 1
   fi
 
diff --git a/tensorflow/tools/ci_build/builds/print_build_info.sh b/tensorflow/tools/ci_build/builds/print_build_info.sh
index 7c43419a76..e366abf8bb 100755
--- a/tensorflow/tools/ci_build/builds/print_build_info.sh
+++ b/tensorflow/tools/ci_build/builds/print_build_info.sh
@@ -88,7 +88,7 @@ fi
 # Print info
 echo "TF_BUILD_INFO = {"\
 "container_type: \"${CONTAINER_TYPE}\", "\
-"command: \"${COMMAND[@]}\", "\
+"command: \"${COMMAND[*]}\", "\
 "source_HEAD: \"${TF_HEAD}\", "\
 "source_remote_origin: \"${TF_FETCH_URL}\", "\
 "OS: \"${OS}\", "\
diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh
index 358f82ac5d..caa3a40817 100755
--- a/tensorflow/tools/ci_build/builds/test_user_ops.sh
+++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh
@@ -82,11 +82,11 @@ TF_CFLAGS=( $("${PYTHON_BIN_PATH}" \
 TF_LFLAGS=( $("${PYTHON_BIN_PATH}" \
 	      -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
 
-if [[ -z "${TF_CFLAGS}" || -z "${TF_LFLAGS}" ]]; then
+if [[ -z "${TF_CFLAGS[*]}" || -z "${TF_LFLAGS[*]}" ]]; then
   die "FAILED to determine TensorFlow compilation or linking flags"
 else
-  echo "TensorFlow compile flags: ${TF_CFLAGS[@]}"
-  echo "TensorFlow link flags: ${TF_LFLAGS[@]}"
+  echo "TensorFlow compile flags: ${TF_CFLAGS[*]}"
+  echo "TensorFlow link flags: ${TF_LFLAGS[*]}"
 fi
 
 # Check g++ availability
diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index 6e7b752c06..cfeaebdbf5 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -45,7 +45,7 @@ for i in `seq 0 $((TF_GPU_COUNT-1))`; do
       # This export only works within the brackets, so it is isolated to one
       # single command.
       export CUDA_VISIBLE_DEVICES=$i
-      echo "Running test $@ on GPU $CUDA_VISIBLE_DEVICES"
+      echo "Running test $* on GPU $CUDA_VISIBLE_DEVICES"
       $@
     )
     return_code=$?
diff --git a/tensorflow/tools/ci_build/install/install_deb_packages.sh b/tensorflow/tools/ci_build/install/install_deb_packages.sh
index 4ab307c925..9640810533 100755
--- a/tensorflow/tools/ci_build/install/install_deb_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_deb_packages.sh
@@ -48,6 +48,7 @@ apt-get install -y --no-install-recommends \
     git \
     libcurl4-openssl-dev \
     libtool \
+    libssl-dev \
     mlocate \
     openjdk-8-jdk \
     openjdk-8-jre-headless \
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index b8ed1ab767..da58ac2407 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -27,6 +27,9 @@ easy_install3 -U pip
 pip2 install wheel
 pip3 install wheel
 
+pip2 install virtualenv
+pip3 install virtualenv
+
 # Install six.
 pip2 install --upgrade six==1.10.0
 pip3 install --upgrade six==1.10.0
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 479242aa43..9881bd99c3 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -39,6 +39,8 @@ if [[ -z $pip35_version ]]; then
 fi
 
 set -e
+pip3.5 install --upgrade virtualenv
+
 # Install six.
 pip3.5 install --upgrade absl-py
 pip3.5 install --upgrade six==1.10.0
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index ec7d9bf195..1ca12c6c60 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -36,6 +36,8 @@ pip3.6 -V
 which pip3.6
 ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3
 
+pip3 install --upgrade virtualenv
+
 set -e
 # Install six.
 pip3 install --upgrade absl-py
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
index df196f829c..ac83e90f76 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
@@ -28,6 +28,8 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=8.0
+export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
index abd256a895..6b80f44729 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
@@ -28,6 +28,8 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=8.0
+export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 88116d9f24..1bd1852ffc 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -82,6 +82,7 @@ if [[ $1 == "PI_ONE" ]]; then
 else
   PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4
   --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
+  --copt=-O3
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8'
diff --git a/tensorflow/tools/ci_build/remote/remote_docker_build.sh b/tensorflow/tools/ci_build/remote/remote_docker_build.sh
index 3ac6840f4e..e00a66aaba 100755
--- a/tensorflow/tools/ci_build/remote/remote_docker_build.sh
+++ b/tensorflow/tools/ci_build/remote/remote_docker_build.sh
@@ -124,7 +124,7 @@ function build_tf_image {
 
 
 function publish_tf_image {
-  $gcr_tf_image="gcr.io/tensorflow/${tf_image}"
+  gcr_tf_image="gcr.io/tensorflow/${tf_image}"
   docker tag $tf_image $gcr_tf_image
   gcloud docker -- push $gcr_tf_image
 }
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 44b6d52952..8d50250c3a 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -96,10 +96,6 @@ exclude_cpu_cc_tests="${failing_cpu_cc_tests} + ${broken_cpu_cc_tests}"
 
 exclude_gpu_cc_tests="${extra_failing_gpu_cc_tests} + ${exclude_cpu_cc_tests}"
 
-function clean_output_base() {
-  bazel clean --expunge
-}
-
 function run_configure_for_cpu_build {
   # Due to a bug in Bazel: https://github.com/bazelbuild/bazel/issues/2182
   # yes "" | ./configure doesn't work on Windows, so we set all the
@@ -115,7 +111,7 @@ function run_configure_for_cpu_build {
     export TF_NEED_MKL=0
   fi
   export TF_NEED_VERBS=0
-  export TF_NEED_GCP=0
+  export TF_NEED_GCP=1
   export TF_NEED_HDFS=0
   export TF_NEED_OPENCL_SYCL=0
   echo "" | ./configure
diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
index 4a653698a2..f88e7176f0 100644
--- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
@@ -36,12 +36,6 @@ export BAZEL_SH=${BAZEL_SH:-"C:/tools/msys64/usr/bin/bash"}
 export PYTHON_BIN_PATH="C:/Program Files/Anaconda3/python.exe"
 export PYTHON_LIB_PATH="C:/Program Files/Anaconda3/lib/site-packages"
 
-# Set Python path for cc_configure.bzl
-export BAZEL_PYTHON="C:/Program Files/Anaconda3/python.exe"
-
-# Set Visual Studio path
-export BAZEL_VS="C:/Program Files (x86)/Microsoft Visual Studio 14.0"
-
 # Add python into PATH, it's needed because gen_git_source.py uses
 # '/usr/bin/env python' as a shebang
 export PATH="/c/Program Files/Anaconda3:$PATH"
@@ -53,13 +47,3 @@ export PATH="/c/Program Files/Anaconda3/Scripts:$PATH"
 export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin:$PATH"
 export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/extras/CUPTI/libx64:$PATH"
 export PATH="/c/tools/cuda/bin:$PATH"
-
-# Set the common build options on Windows
-export BUILD_OPTS='--config=monolithic --copt=-w --host_copt=-w --verbose_failures --experimental_ui'
-
-# Build TF with wrapper-less CROSSTOOL
-# TODO(pcloudy): Remove this after wrapper-less CROSSTOOL becomes default
-export NO_MSVC_WRAPPER=1
-
-export USE_DYNAMIC_CRT=1
-
diff --git a/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh b/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
index 8c419347d6..748a961e44 100644
--- a/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
@@ -42,8 +42,6 @@ source "tensorflow/tools/ci_build/windows/bazel/common_env.sh" \
 source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
   || { echo "Failed to source bazel_test_lib.sh" >&2; exit 1; }
 
-clean_output_base
-
 run_configure_for_cpu_build
 
 # Compliling the following test is extremely slow with -c opt
@@ -54,5 +52,5 @@ passing_tests=$(bazel query "kind(cc_test, //tensorflow/cc/... + //tensorflow/co
   # We need to strip \r so that the result could be store into a variable under MSYS
   tr '\r' ' ')
 
-bazel test $BUILD_OPTS -k $slow_compiling_test --test_output=errors
-bazel test -c opt $BUILD_OPTS -k $passing_tests --test_output=errors
+bazel test -k $slow_compiling_test --test_output=errors
+bazel test -c opt -k $passing_tests --test_output=errors
diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 8520ca898f..31b4226a30 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -44,9 +44,7 @@ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
 
 run_configure_for_cpu_build
 
-clean_output_base
-
-bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $?
+bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $?
 
 # Create a python test directory to avoid package name conflict
 PY_TEST_DIR="py_test_dir"
@@ -60,11 +58,8 @@ reinstall_tensorflow_pip ${PIP_NAME}
 
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
-# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after
-# https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
-bazel test -c opt $BUILD_OPTS -k --test_output=errors \
+bazel test -c opt -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_oss \
   --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \
-  --test_env=TF_SAVER_LENIENT_NAMES=True \
   //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh b/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
index 3fd960deab..f26f8727e5 100644
--- a/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
@@ -56,5 +56,5 @@ passing_tests=$(bazel query "kind(cc_test, //tensorflow/cc/... + //tensorflow/co
 
 # TODO(pcloudy): There is a bug in Bazel preventing build with GPU support without -c opt
 # Re-enable this test after it is fixed.
-# bazel test --config=win-cuda $BUILD_OPTS -k $slow_compiling_test --test_output=errors
-bazel test -c opt --config=win-cuda $BUILD_OPTS -k $passing_tests --test_output=errors
+# bazel test --config=win-cuda -k $slow_compiling_test --test_output=errors
+bazel test -c opt --config=win-cuda -k $passing_tests --test_output=errors
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 47ca42d642..922bb67bbf 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -44,9 +44,7 @@ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
 
 run_configure_for_gpu_build
 
-clean_output_base
-
-bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $?
+bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $?
 
 # Create a python test directory to avoid package name conflict
 PY_TEST_DIR="py_test_dir"
@@ -61,11 +59,8 @@ reinstall_tensorflow_pip ${PIP_NAME}
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
 # GPU tests are very flaky when running concurrently, so set local_test_jobs=1
-# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after
-# https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
-bazel test -c opt $BUILD_OPTS -k --test_output=errors \
+bazel test -c opt -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
   --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
-  --test_env=TF_SAVER_LENIENT_NAMES=True \
   --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
index 9ac3613f27..80f2b590c9 100755
--- a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
@@ -44,13 +44,12 @@ export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:clic
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/java:libtensorflow_jni.so"
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:jnilicenses_generate"
 
-clean_output_base
 run_configure_for_cpu_build
 
 # build_libtensorflow_tarball in ../builds/libtensorflow.sh
 # cannot be used on Windows since it relies on pkg_tar rules.
 # So we do something special here
-bazel build -c opt ${BUILD_OPTS} \
+bazel build -c opt \
   tensorflow:libtensorflow.so \
   tensorflow/tools/lib_package:clicenses_generate \
   tensorflow/java:libtensorflow_jni.so \
diff --git a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
index a94a627dfb..88333de856 100755
--- a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
+++ b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
@@ -28,6 +28,8 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=8.0
+export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 3525c7524f..0a6860e791 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -69,11 +69,8 @@ RUN mkdir /bazel && \
     rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Download and build TensorFlow.
-
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
-    cd tensorflow && \
-    git checkout r1.4
 WORKDIR /tensorflow
+RUN git clone --branch=r1.4 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # TODO(craigcitro): Don't install the pip package, since it makes it
 # more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 041f45971b..4164cc3f88 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -1,11 +1,20 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
+FROM nvidia/cuda:9.0-base-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
+        cuda-command-line-tools-9-0 \
+        cuda-cublas-dev-9-0 \
+        cuda-cudart-dev-9-0 \
+        cuda-cufft-dev-9-0 \
+        cuda-curand-dev-9-0 \
+        cuda-cusolver-dev-9-0 \
+        cuda-cusparse-dev-9-0 \
         curl \
         git \
+        libcudnn7=7.0.5.15-1+cuda9.0 \
+        libcudnn7-dev=7.0.5.15-1+cuda9.0 \
         libcurl3-dev \
         libfreetype6-dev \
         libpng12-dev \
@@ -17,12 +26,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         unzip \
         zip \
         zlib1g-dev \
-        openjdk-8-jdk \
-        openjdk-8-jre-headless \
         wget \
         && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+    rm -rf /var/lib/apt/lists/* && \
+    find /usr/local/cuda-9.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
 
 RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
     python get-pip.py && \
@@ -70,18 +78,16 @@ RUN mkdir /bazel && \
     rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Download and build TensorFlow.
-
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
-    cd tensorflow && \
-    git checkout r1.4
 WORKDIR /tensorflow
+RUN git clone --branch=r1.4 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1
-
+ENV TF_CUDA_VERSION=9.0
+ENV TF_CUDNN_VERSION=7
 
 RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
     LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index e212d10290..b6682cd681 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04
+FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh
index 80a07b9b3b..e7de7df856 100755
--- a/tensorflow/tools/docker/parameterized_docker_build.sh
+++ b/tensorflow/tools/docker/parameterized_docker_build.sh
@@ -265,7 +265,7 @@ else
   DOCKERFILE="${TMP_DIR}/Dockerfile"
 
   # Modify the devel Dockerfile to specify the git branch
-  sed -r "s/([\s]*git checkout )(.*)/\1${TF_DOCKER_BUILD_DEVEL_BRANCH}/g" \
+  sed "s/^RUN git clone --branch=.* --depth=1/RUN git clone --branch=${TF_DOCKER_BUILD_DEVEL_BRANCH} --depth=1/" \
       "${ORIG_DOCKERFILE}" > "${DOCKERFILE}"
 
   # Modify python/pip version if necessary.
diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py
index f950f19a7c..003f972070 100644
--- a/tensorflow/tools/docs/generate_lib.py
+++ b/tensorflow/tools/docs/generate_lib.py
@@ -199,12 +199,12 @@ def add_dict_to_dict(add_from, add_to):
       add_to[key] = add_from[key]
 
 
-# Exclude some libaries in contrib from the documentation altogether.
+# Exclude some libraries in contrib from the documentation altogether.
 def _get_default_private_map():
   return {'tf.test': ['mock']}
 
 
-# Exclude members of some libaries.
+# Exclude members of some libraries.
 def _get_default_do_not_descend_map():
   # TODO(wicke): Shrink this list once the modules get sealed.
   return {
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 33af4532c8..d80d5ecc6a 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -6,6 +6,7 @@ package(default_visibility = ["//visibility:private"])
 load(
     "//tensorflow:tensorflow.bzl",
     "if_not_windows",
+    "if_windows",
     "transitive_hdrs",
 )
 load("//third_party/mkl:build_defs.bzl", "if_mkl")
@@ -194,3 +195,23 @@ sh_binary(
         ],
     }) + if_mkl(["//third_party/mkl:intel_binary_blob"]),
 )
+
+# A genrule for generating a marker file for the pip package on Windows
+#
+# This only works on Windows, because :simple_console_for_windows is a
+# python zip file containing everything we need for building the pip package.
+# However, on other platforms, due to https://github.com/bazelbuild/bazel/issues/4223,
+# when C++ extensions change, this generule doesn't rebuild.
+genrule(
+    name = "win_pip_package_marker",
+    srcs = if_windows([
+        ":build_pip_package",
+        ":simple_console_for_windows",
+    ]),
+    outs = ["win_pip_package_marker_file"],
+    cmd = select({
+        "//conditions:default": "touch $@",
+        "//tensorflow:windows": "md5sum $(locations :build_pip_package) $(locations :simple_console_for_windows) > $@",
+    }),
+    visibility = ["//visibility:public"],
+)
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index 8249703ba7..f5203bc544 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -24,7 +24,7 @@ function real_path() {
 function cp_external() {
   local src_dir=$1
   local dest_dir=$2
-  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*'`; do
+  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*org_tensorflow*'`; do
     cp -R "$f" "$dest_dir"
   done
 }
@@ -92,7 +92,6 @@ function main() {
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles/org_tensorflow/tensorflow \
       "${TMPDIR}"
     mkdir "${TMPDIR}/external"
-    # Note: this makes an extra copy of org_tensorflow.
     cp_external \
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles \
       "${TMPDIR}/external"
@@ -123,7 +122,6 @@ function main() {
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/tensorflow \
         "${TMPDIR}"
       mkdir "${TMPDIR}/external"
-      # Note: this makes an extra copy of org_tensorflow.
       cp_external \
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles \
         "${TMPDIR}/external"
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index cc46dd5162..22e1584b78 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -42,6 +42,7 @@ BLACKLIST = [
     "//tensorflow/python:extra_py_tests_deps",
     "//tensorflow/cc/saved_model:saved_model_half_plus_two",
     "//tensorflow:no_tensorflow_py_deps",
+    "//tensorflow/tools/pip_package:win_pip_package_marker",
     "//tensorflow/python:test_ops_2",
     "//tensorflow/python:tf_optimizer",
     "//tensorflow/python:compare_test_proto_py",
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions.cc b/tensorflow/tools/proto_text/gen_proto_text_functions.cc
index ecb29a65a0..f0bb59acf8 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions.cc
@@ -132,6 +132,7 @@ int MainImpl(int argc, char** argv) {
       FILE* f = fopen(path.c_str(), "w");
       if (f == nullptr) return -1;
       if (fwrite(data.c_str(), 1, data.size(), f) != data.size()) {
+        fclose(f);
         return -1;
       }
       if (fclose(f) != 0) {
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b71f5dc4e5..046c2b2391 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -74,11 +74,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "mkl_dnn",
       urls = [
-          "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
-          "https://github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
+          "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/aab753280e83137ba955f8f19d72cb6aaba545ef.tar.gz",
+          "https://github.com/01org/mkl-dnn/archive/aab753280e83137ba955f8f19d72cb6aaba545ef.tar.gz",
       ],
-      sha256 = "0d529ad4c49dc799e6df07c2b88b115d0668735da15fb3b3862d28d33fa68165",
-      strip_prefix = "mkl-dnn-b01e3a55a07be62172e713bcd2644c5176360212",
+      sha256 = "fb67f255a96bd4ad39b8dd104eca5aa92200c95c1ed36e59641e6c0478eefd11",
+      strip_prefix = "mkl-dnn-aab753280e83137ba955f8f19d72cb6aaba545ef",
       build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")),
   )
 
@@ -95,11 +95,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "eigen_archive",
       urls = [
-          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
-          "https://bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
+          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
+          "https://bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
       ],
-      sha256 = "61d8b6fc4279dd1dda986fb1677d15e3d641c07a3ea5abe255790b1f0c0c14e9",
-      strip_prefix = "eigen-eigen-429aa5254200",
+      sha256 = "0840c497f2749b5e90bda666aab96be6da90dc75b4e21ca9843cae69b7fed52a",
+      strip_prefix = "eigen-eigen-b6e6d0cf6a77",
       build_file = str(Label("//third_party:eigen.BUILD")),
   )
 
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index e311c7e758..4def6f9489 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -10,6 +10,7 @@ CURL_WIN_COPTS = [
     "/DHAVE_CONFIG_H",
     "/DCURL_DISABLE_FTP",
     "/DCURL_DISABLE_NTLM",
+    "/DCURL_DISABLE_PROXY",
     "/DHAVE_LIBZ",
     "/DHAVE_ZLIB_H",
     # Defining _USING_V110_SDK71_ is hackery to defeat curl's incorrect
@@ -23,6 +24,8 @@ CURL_WIN_SRCS = [
     "lib/asyn-thread.c",
     "lib/inet_ntop.c",
     "lib/system_win32.c",
+    "lib/vtls/schannel.c",
+    "lib/idn_win32.c",
 ]
 
 cc_library(
@@ -276,6 +279,7 @@ cc_library(
             "-DCURL_MAX_WRITE_SIZE=65536",
         ],
     }),
+    defines = ["CURL_STATICLIB"],
     includes = ["include"],
     linkopts = select({
         "@org_tensorflow//tensorflow:android": [
@@ -289,10 +293,16 @@ cc_library(
         ],
         "@org_tensorflow//tensorflow:ios": [],
         "@org_tensorflow//tensorflow:windows": [
-            "-Wl,ws2_32.lib",
+            "-DEFAULTLIB:ws2_32.lib",
+            "-DEFAULTLIB:advapi32.lib",
+            "-DEFAULTLIB:crypt32.lib",
+            "-DEFAULTLIB:Normaliz.lib",
         ],
         "@org_tensorflow//tensorflow:windows_msvc": [
-            "-Wl,ws2_32.lib",
+            "-DEFAULTLIB:ws2_32.lib",
+            "-DEFAULTLIB:advapi32.lib",
+            "-DEFAULTLIB:crypt32.lib",
+            "-DEFAULTLIB:Normaliz.lib",
         ],
         "//conditions:default": [
             "-lrt",
@@ -438,12 +448,22 @@ genrule(
         "#  include \"lib/config-win32.h\"",
         "#  define BUILDING_LIBCURL 1",
         "#  define CURL_DISABLE_CRYPTO_AUTH 1",
+        "#  define CURL_DISABLE_DICT 1",
+        "#  define CURL_DISABLE_FILE 1",
+        "#  define CURL_DISABLE_GOPHER 1",
         "#  define CURL_DISABLE_IMAP 1",
         "#  define CURL_DISABLE_LDAP 1",
         "#  define CURL_DISABLE_LDAPS 1",
         "#  define CURL_DISABLE_POP3 1",
         "#  define CURL_PULL_WS2TCPIP_H 1",
-        "#  define HTTP_ONLY 1",
+        "#  define CURL_DISABLE_SMTP 1",
+        "#  define CURL_DISABLE_TELNET 1",
+        "#  define CURL_DISABLE_TFTP 1",
+        "#  define CURL_PULL_WS2TCPIP_H 1",
+        "#  define USE_WINDOWS_SSPI 1",
+        "#  define USE_WIN32_IDN 1",
+        "#  define USE_SCHANNEL 1",
+        "#  define WANT_IDN_PROTOTYPES 1",
         "#elif defined(__APPLE__)",
         "#  define HAVE_FSETXATTR_6 1",
         "#  define HAVE_SETMODE 1",
diff --git a/third_party/pcre.BUILD b/third_party/pcre.BUILD
index 68aadd1d40..e2cdec4029 100644
--- a/third_party/pcre.BUILD
+++ b/third_party/pcre.BUILD
@@ -50,12 +50,12 @@ cc_library(
         "-DNEWLINE=10",
         "-DNO_RECURSE",
         "-DPARENS_NEST_LIMIT=50",
-        "-DPCRE_STATIC=1",
         "-DPOSIX_MALLOC_THRESHOLD=10",
         "-DSTDC_HEADERS=1",
         "-DSUPPORT_UCP",
         "-DSUPPORT_UTF",
     ],
+    defines = ["PCRE_STATIC=1"],
     includes = ["."],
     visibility = ["@swig//:__pkg__"],  # Please use RE2
     alwayslink = 1,
-- 
GitLab


From 5b5445b9a7aa2664a90c4fc946ecf268c971425b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 17:31:22 -0800
Subject: [PATCH 1112/1225] Print both number of nodes and edges after each
 Grappler optimizer.

PiperOrigin-RevId: 179260418
---
 .../grappler/optimizers/meta_optimizer.cc     | 28 +++++++++++++------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 0d0b947c8a..4228e7baba 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -30,6 +30,23 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
+namespace {
+int64 NumEdges(const GraphDef& graph) {
+  int64 num_edges = 0;
+  for (const auto& node : graph.node()) {
+    num_edges += node.input_size();
+  }
+  return num_edges;
+}
+
+string PrintSizesBeforeAfter(const GraphDef& before, const GraphDef& after) {
+  return strings::StrCat("Graph size before: ", before.node_size(), " nodes, ",
+                         NumEdges(before),
+                         " edges. Graph size after: ", after.node_size(),
+                         " nodes, ", NumEdges(after), " edges.");
+}
+}  // namespace
+
 std::unique_ptr<GraphOptimizer> MetaOptimizer::NewOptimizer(
     const string& optimizer) {
   VLOG(1) << "Adding graph optimization pass: " << optimizer;
@@ -128,10 +145,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       } else {
         already_optimized = true;
         result = strings::StrCat(
-            "OK. "
-            "Graph size before: ",
-            item.graph.node_size(),
-            ". Graph size after: ", optimized_graph->node_size());
+            "OK. ", PrintSizesBeforeAfter(item.graph, *optimized_graph));
       }
       result_.push_back(std::make_pair(optimizer->name(), result));
       VLOG(1) << "Optimizer " << optimizer->name()
@@ -148,10 +162,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
         result = status.ToString();
       } else {
         result = strings::StrCat(
-            "OK. "
-            "Graph size before: ",
-            optimized_item.graph.node_size(),
-            ". Graph size after: ", optimized_graph->node_size());
+            "OK. ",
+            PrintSizesBeforeAfter(optimized_item.graph, *optimized_graph));
       }
       result_.push_back(std::make_pair(optimizer->name(), result));
       VLOG(1) << "Optimizer " << optimizer->name()
-- 
GitLab


From 9648f8040a559f6cf9bbe0501ba96f2b2c2864b1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 17:32:50 -0800
Subject: [PATCH 1113/1225] Automated g4 rollback of changelist 179258973

PiperOrigin-RevId: 179260538
---
 .gitignore                                    |   11 +-
 configure.py                                  |   35 +-
 tensorflow/c/c_api.cc                         |    2 -
 tensorflow/c/c_api_function.cc                |    4 +-
 .../tf2xla/kernels/tensor_array_ops.cc        |   87 +-
 tensorflow/compiler/xla/service/BUILD         |    2 +
 .../xla/service/copy_insertion_test.cc        |    2 +-
 .../compiler/xla/service/layout_assignment.cc |    2 +-
 tensorflow/compiler/xla/shape_tree.h          |    2 +-
 tensorflow/contrib/BUILD                      |    7 +-
 tensorflow/contrib/cmake/CMakeLists.txt       |   34 +-
 tensorflow/contrib/cmake/README.md            |   17 +
 .../contrib/cmake/external/gemmlowp.cmake     |    4 +-
 .../contrib/cmake/tf_core_kernels.cmake       |    3 +
 tensorflow/contrib/cmake/tf_tests.cmake       |    2 -
 .../contrib/factorization/python/ops/gmm.py   |    4 +-
 tensorflow/contrib/ffmpeg/__init__.py         |    1 -
 .../contrib/ffmpeg/default/ffmpeg_lib.cc      |    6 +-
 .../ffmpeg/default/ffmpeg_lib_utility_test.cc |    2 -
 tensorflow/contrib/ffmpeg/ffmpeg_ops.py       |    1 -
 .../estimator/python/gan_estimator_impl.py    |    7 +-
 .../gan/python/estimator/python/head_impl.py  |    6 +-
 .../layers/python/layers/initializers.py      |    3 +-
 .../learn/estimators/composable_model_test.py |    4 +-
 .../learn/python/learn/estimators/dnn.py      |    4 +-
 .../learn/estimators/estimator_input_test.py  |   10 +-
 .../python/learn/estimators/estimator_test.py |   26 +-
 .../learn/estimators/estimators_test.py       |    8 +-
 .../learn/python/learn/estimators/kmeans.py   |    4 +-
 .../learn/python/learn/estimators/linear.py   |    6 +-
 .../estimators/logistic_regressor_test.py     |    4 +-
 .../learn/python/learn/utils/export.py        |    6 +-
 .../linear_optimizer/python/sdca_estimator.py |    4 +-
 tensorflow/contrib/lite/README.md             |    5 -
 tensorflow/contrib/lite/ios_makefile.inc      |   78 +-
 .../contrib/lite/nnapi/NeuralNetworksShim.h   |    4 +-
 tensorflow/contrib/makefile/Makefile          |    2 +-
 tensorflow/contrib/nn/__init__.py             |    1 -
 .../contrib/nn/python/ops/sampling_ops.py     |  100 -
 tensorflow/contrib/opt/BUILD                  |   19 -
 tensorflow/contrib/opt/__init__.py            |    5 +-
 .../training/elastic_average_optimizer.py     |  345 ---
 .../elastic_average_optimizer_test.py         |  225 --
 .../python/kernel_tests/core_rnn_cell_test.py |    3 -
 tensorflow/contrib/rnn/python/ops/rnn_cell.py |    1 -
 .../contrib/seq2seq/python/ops/helper.py      |    6 +-
 .../python/slim/data/dataset_data_provider.py |    4 +-
 .../timeseries/python/timeseries/head.py      |    4 +-
 tensorflow/core/common_runtime/function.cc    |    2 +-
 tensorflow/core/framework/numeric_types.h     |    2 +-
 .../core/framework/tensor_shape_test.cc       |    3 +-
 tensorflow/core/graph/mkl_graph_util.h        |   10 +-
 tensorflow/core/graph/mkl_layout_pass.cc      | 2083 -----------------
 tensorflow/core/graph/mkl_layout_pass_test.cc | 1624 -------------
 tensorflow/core/kernels/logging_ops.cc        |    3 +-
 tensorflow/core/kernels/mkl_aggregate_ops.cc  |  204 +-
 tensorflow/core/kernels/mkl_avgpooling_op.cc  |  306 +--
 tensorflow/core/kernels/mkl_concat_op.cc      |  374 +--
 .../core/kernels/mkl_conv_grad_filter_ops.cc  |  317 ++-
 .../core/kernels/mkl_conv_grad_input_ops.cc   |  244 +-
 tensorflow/core/kernels/mkl_conv_ops.cc       |  149 +-
 tensorflow/core/kernels/mkl_conv_ops.h        |  269 +--
 .../core/kernels/mkl_fused_batch_norm_op.cc   |  652 +-----
 tensorflow/core/kernels/mkl_identity_op.cc    |   33 -
 .../core/kernels/mkl_input_conversion_op.cc   |  217 +-
 tensorflow/core/kernels/mkl_lrn_op.cc         |    2 +-
 tensorflow/core/kernels/mkl_maxpooling_op.cc  |  357 +--
 .../core/kernels/mkl_pooling_ops_common.cc    |   38 +-
 .../core/kernels/mkl_pooling_ops_common.h     |  342 ---
 tensorflow/core/kernels/mkl_relu_op.cc        |  505 +---
 tensorflow/core/kernels/mkl_reshape_op.cc     |  182 --
 tensorflow/core/kernels/quantized_conv_ops.cc |    7 -
 .../core/lib/io/snappy/snappy_outputbuffer.cc |    3 +-
 tensorflow/core/ops/nn_ops.cc                 |  173 --
 tensorflow/core/platform/cloud/BUILD          |   14 -
 .../core/platform/cloud/gcs_dns_cache.cc      |   32 +-
 .../core/platform/cloud/gcs_file_system.cc    |   21 +-
 .../platform/cloud/google_auth_provider.cc    |    5 +-
 .../core/platform/cloud/oauth_client.cc       |    4 -
 tensorflow/core/platform/cloud/time_util.cc   |    3 -
 .../core/platform/default/build_config.bzl    |    1 +
 .../android_armv7a_cpu_utils_helper.cc        |    3 +-
 tensorflow/core/platform/s3/s3_file_system.cc |   10 +-
 tensorflow/core/util/mkl_util.h               |  313 +--
 .../docs_src/api_guides/python/image.md       |    1 -
 .../api_guides/python/reading_data.md         |   27 +-
 .../docs_src/get_started/mnist/mechanics.md   |    8 +-
 .../docs_src/programmers_guide/estimators.md  |    2 +-
 .../docs_src/programmers_guide/variables.md   |    2 +-
 tensorflow/examples/android/build.gradle      |    6 +-
 .../android/gradle/wrapper/gradle-wrapper.jar |  Bin 53636 -> 0 bytes
 .../gradle/wrapper/gradle-wrapper.properties  |    6 -
 tensorflow/examples/android/gradlew           |  160 --
 tensorflow/examples/android/gradlew.bat       |   90 -
 .../org/tensorflow/demo/CameraActivity.java   |    8 +-
 .../reading_data/fully_connected_reader.py    |  125 +-
 .../wav_to_spectrogram/wav_to_spectrogram.cc  |    7 +-
 tensorflow/go/graph.go                        |   17 +-
 tensorflow/go/graph_test.go                   |   22 +-
 tensorflow/python/client/session.py           |   18 +-
 tensorflow/python/client/session_test.py      |    6 -
 tensorflow/python/debug/BUILD                 |    1 -
 tensorflow/python/estimator/training_test.py  |    4 +-
 tensorflow/python/estimator/util.py           |    2 +-
 tensorflow/python/framework/function.py       |    4 +-
 .../keras/applications/inception_resnet_v2.py |    2 +-
 .../_impl/keras/applications/mobilenet.py     |    4 +-
 .../_impl/keras/applications/resnet50.py      |    2 +-
 .../keras/_impl/keras/applications/vgg16.py   |    4 +-
 .../keras/_impl/keras/applications/vgg19.py   |    4 +-
 .../_impl/keras/applications/xception.py      |    4 +-
 .../python/keras/_impl/keras/layers/core.py   |    2 +-
 .../keras/_impl/keras/layers/core_test.py     |    5 -
 tensorflow/python/kernel_tests/BUILD          |    4 -
 .../kernel_tests/summary_image_op_test.py     |    1 +
 tensorflow/python/ops/image_ops_impl.py       |   29 +-
 tensorflow/python/ops/image_ops_test.py       |  113 -
 tensorflow/python/ops/logging_ops.py          |    6 +-
 tensorflow/python/ops/nn_impl.py              |   13 +-
 .../python/ops/quantized_conv_ops_test.py     |    2 +-
 tensorflow/python/ops/quantized_ops_test.py   |   57 -
 .../python/training/learning_rate_decay.py    |   13 +-
 tensorflow/python/util/tf_inspect.py          |   20 -
 tensorflow/stream_executor/dnn.h              |    2 +-
 tensorflow/tensorflow.bzl                     |   43 +-
 tensorflow/tools/benchmark/benchmark_model.cc |    2 +-
 tensorflow/tools/ci_build/Dockerfile.gpu      |    4 +-
 .../tools/ci_build/Dockerfile.gpu_clang       |    4 +-
 tensorflow/tools/ci_build/builds/pip.sh       |   21 +-
 .../tools/ci_build/builds/print_build_info.sh |    2 +-
 .../tools/ci_build/builds/test_user_ops.sh    |    6 +-
 .../gpu_build/parallel_gpu_execute.sh         |    2 +-
 .../ci_build/install/install_deb_packages.sh  |    1 -
 .../ci_build/install/install_pip_packages.sh  |    3 -
 .../install/install_python3.5_pip_packages.sh |    2 -
 .../install/install_python3.6_pip_packages.sh |    2 -
 .../tools/ci_build/linux/gpu/run_cc_core.sh   |    2 -
 .../tools/ci_build/linux/gpu/run_py3_core.sh  |    2 -
 .../tools/ci_build/pi/build_raspberry_pi.sh   |    1 -
 .../ci_build/remote/remote_docker_build.sh    |    2 +-
 .../ci_build/windows/bazel/bazel_test_lib.sh  |    6 +-
 .../ci_build/windows/bazel/common_env.sh      |   16 +
 .../windows/cpu/bazel/run_cc_test_windows.sh  |    6 +-
 .../windows/cpu/pip/build_tf_windows.sh       |    9 +-
 .../windows/gpu/bazel/run_cc_test_windows.sh  |    4 +-
 .../windows/gpu/pip/build_tf_windows.sh       |    9 +-
 .../ci_build/windows/libtensorflow_cpu.sh     |    3 +-
 .../tools/ci_build/xla/linux/gpu/run_py3.sh   |    2 -
 tensorflow/tools/docker/Dockerfile.devel      |    5 +-
 tensorflow/tools/docker/Dockerfile.devel-gpu  |   26 +-
 tensorflow/tools/docker/Dockerfile.gpu        |    2 +-
 .../docker/parameterized_docker_build.sh      |    2 +-
 tensorflow/tools/docs/generate_lib.py         |    4 +-
 tensorflow/tools/pip_package/BUILD            |   21 -
 .../tools/pip_package/build_pip_package.sh    |    4 +-
 .../tools/pip_package/pip_smoke_test.py       |    1 -
 .../proto_text/gen_proto_text_functions.cc    |    1 -
 tensorflow/workspace.bzl                      |   16 +-
 third_party/curl.BUILD                        |   26 +-
 third_party/pcre.BUILD                        |    2 +-
 160 files changed, 938 insertions(+), 9758 deletions(-)
 delete mode 100644 tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
 delete mode 100644 tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
 delete mode 100644 tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar
 delete mode 100644 tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
 delete mode 100644 tensorflow/examples/android/gradlew
 delete mode 100644 tensorflow/examples/android/gradlew.bat
 delete mode 100644 tensorflow/python/ops/quantized_ops_test.py

diff --git a/.gitignore b/.gitignore
index be75938ec4..d11a504bdc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,7 @@ node_modules
 /bazel-*
 /bazel_pip
 /tools/python_bin_path.sh
-/tensorflow/tools/git/gen
+/tools/git/gen
 /pip_test
 /_python_build
 *.pyc
@@ -26,11 +26,4 @@ Podfile.lock
 /tensorflow/contrib/lite/gen/**
 /tensorflow/contrib/lite/examples/ios/simple/data/*.txt
 /tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
-xcuserdata/**
-
-# Android
-.gradle
-.idea
-*.iml
-local.properties
-gradleBuild
+xcuserdata/**
\ No newline at end of file
diff --git a/configure.py b/configure.py
index 336559532d..3d553e1c14 100644
--- a/configure.py
+++ b/configure.py
@@ -36,8 +36,8 @@ _TF_BAZELRC = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                            '.tf_configure.bazelrc')
 _TF_WORKSPACE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              'WORKSPACE')
-_DEFAULT_CUDA_VERSION = '9.0'
-_DEFAULT_CUDNN_VERSION = '7'
+_DEFAULT_CUDA_VERSION = '8.0'
+_DEFAULT_CUDNN_VERSION = '6'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2'
 _DEFAULT_CUDA_PATH = '/usr/local/cuda'
 _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
@@ -1096,27 +1096,6 @@ def set_computecpp_toolkit_path(environ_cp):
   write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
                               computecpp_toolkit_path)
 
-def set_trisycl_include_dir(environ_cp):
-  """Set TRISYCL_INCLUDE_DIR"""
-  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
-                             'include directory. (Use --config=sycl_trisycl '
-                             'when building with Bazel) '
-                             '[Default is %s]: '
-                             ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
-  while True:
-    trisycl_include_dir = get_from_env_or_user_or_default(
-      environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
-      _DEFAULT_TRISYCL_INCLUDE_DIR)
-    if os.path.exists(trisycl_include_dir):
-      break
-
-    print('Invalid triSYCL include directory, %s cannot be found'
-          % (trisycl_include_dir))
-
-  # Set TRISYCL_INCLUDE_DIR
-  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
-  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
-                              trisycl_include_dir)
 
 def set_trisycl_include_dir(environ_cp):
   """Set TRISYCL_INCLUDE_DIR."""
@@ -1232,15 +1211,6 @@ def create_android_bazelrc_configs():
 def set_grpc_build_flags():
   write_to_bazelrc('build --define grpc_no_ares=true')
 
-def set_windows_build_flags():
-  if is_windows():
-    # The non-monolithic build is not supported yet
-    write_to_bazelrc('build --config monolithic')
-    # Suppress warning messages
-    write_to_bazelrc('build --copt=-w --host_copt=-w')
-    # Output more verbose information when something goes wrong
-    write_to_bazelrc('build --verbose_failures')
-
 
 def main():
   # Make a copy of os.environ to be clear when functions and getting and setting
@@ -1319,7 +1289,6 @@ def main():
   set_cc_opt_flags(environ_cp)
   set_mkl()
   set_monolithic()
-  set_windows_build_flags()
   create_android_bazelrc_configs()
 
   if workspace_has_any_android_rule():
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 9b57047028..6f5abd074c 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -579,7 +579,6 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
       status->status = InvalidArgument(
           "invalid string tensor encoding (string #", i, " of ",
           srcarray.size(), "): ", status->status.error_message());
-      delete[] base;
       return nullptr;
     }
     dst += consumed;
@@ -589,7 +588,6 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
     status->status = InvalidArgument(
         "invalid string tensor encoding (decoded ", (dst - base),
         " bytes, but the tensor is encoded in ", size, " bytes");
-    delete[] base;
     return nullptr;
   }
 
diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index d60d1de315..b9312c2974 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -68,7 +68,7 @@ class NodeNameMapping {
   // This is a superset of values in name_mapping_.
   std::unordered_set<string> used_names_;
   // Mapping from original node name from the graph to the normalized
-  // and uniquified version of it.
+  // and uniqified version of it.
   std::unordered_map<string, string> name_mapping_;
 };
 
@@ -236,7 +236,7 @@ Status FillFunctionBody(
 }
 
 // Graph to FunctionDef conversion. This code is closely modeled on the Python
-// code in tensorflow/python/framework/function.py.
+// code in third_party/tensorflow/python/framework/function.py.
 Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
                           bool append_hash_to_fn_name,
                           const std::vector<const Node*>& body_nodes,
diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
index 03c22354a9..351fda2517 100644
--- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
@@ -311,32 +311,6 @@ class TensorArrayGatherOp : public XlaOpKernel {
 
     xla::ComputationDataHandle ta = resource->value;
 
-    // Look for the case where the gather takes a simple slice from the
-    // tensor array (0, 1, 2, 3, 4, ..., N)
-    std::vector<int64> const_indices;
-    Status status = ctx->ConstantInputAsIntVector(1, &const_indices);
-    if (status.ok()) {
-      bool gather_is_dense_slice = true;
-      for (auto i = 0; i < const_indices.size(); i++) {
-        if (const_indices[i] != i) {
-          gather_is_dense_slice = false;
-          break;
-        }
-      }
-
-      if (gather_is_dense_slice) {
-        std::vector<int64> begin(ta_shape.dims(), 0);
-        std::vector<int64> strides(ta_shape.dims(), 1);
-        std::vector<int64> end(ta_shape.dims(), 1);
-        end[0] = const_indices.size();
-        for (auto i = 1; i < ta_shape.dims(); i++) {
-          end[i] = ta_shape.dim_size(i);
-        }
-        ctx->SetOutput(0, b->Slice(ta, begin, end, strides));
-        return;
-      }
-    }
-
     xla::ComputationDataHandle gather = XlaComputeGatherDynamicSlice(
         ctx, ta, ta_shape, indices, indices_shape, 0, dtype_, index_type, b);
     ctx->SetOutput(0, gather);
@@ -378,47 +352,28 @@ class TensorArrayScatterOp : public XlaOpKernel {
     const xla::ComputationDataHandle value = ctx->Input(2);
     const xla::ComputationDataHandle flow = ctx->Input(3);
 
-    // Look for the case where the scatter is for each sub-tensor in order. The
-    // tensor array implementation allows for this to be a straight addition.
-    bool scatter_all_elements_in_order = false;
-    std::vector<int64> const_indices;
-    Status status = ctx->ConstantInputAsIntVector(1, &const_indices);
-    if (status.ok() && num_indices == value_shape.dim_size(0)) {
-      scatter_all_elements_in_order = true;
-      for (auto i = 0; i < num_indices; i++) {
-        if (const_indices[i] != i) {
-          scatter_all_elements_in_order = false;
-          break;
-        }
-      }
-    }
+    auto slice_dims = value_shape.dim_sizes();
+    slice_dims[0] = 1LL;
 
-    if (scatter_all_elements_in_order) {
-      ta = b->Add(ta, value);
-    } else {
-      auto slice_dims = value_shape.dim_sizes();
-      slice_dims[0] = 1LL;
-
-      std::vector<int64> value_starts(value_shape.dims(), 0);
-      auto value_ends = value_shape.dim_sizes();
-
-      std::vector<int64> value_strides(value_shape.dims(), 1);
-
-      // For every (index, value) pair, update the corresponding TensorArray
-      // storage.
-      for (int i = 0; i < num_indices; ++i) {
-        // Slice out part of the value.
-        value_starts[0] = i;
-        value_ends[0] = i + 1;
-        auto slice = b->Slice(value, value_starts, value_ends, value_strides);
-
-        // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
-        auto index = b->Slice(indices, {i}, {i + 1}, {1});
-        auto start_indices =
-                b->Pad(b->Reshape(index, {1}), b->ConstantR0<int32>(0),
-                       xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
-        ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
-      }
+    std::vector<int64> value_starts(value_shape.dims(), 0);
+    auto value_ends = value_shape.dim_sizes();
+
+    std::vector<int64> value_strides(value_shape.dims(), 1);
+
+    // For every (index, value) pair, update the corresponding TensorArray
+    // storage.
+    for (int i = 0; i < num_indices; ++i) {
+      // Slice out part of the value.
+      value_starts[0] = i;
+      value_ends[0] = i + 1;
+      auto slice = b->Slice(value, value_starts, value_ends, value_strides);
+
+      // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
+      auto index = b->Slice(indices, {i}, {i + 1}, {1});
+      auto start_indices =
+          b->Pad(b->Reshape(index, {1}), b->ConstantR0<int32>(0),
+                 xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
+      ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
     }
 
     resource->value = ta;
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 3655a08cf3..07ef98076e 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -90,6 +90,8 @@ cc_library(
         ":shape_inference",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status",
+        "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index 8388574716..3278fd5f06 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -339,7 +339,7 @@ TEST_F(CopyInsertionTest, ElementOfNestedTupleParameter) {
            ShapeUtil::MakeShape(F32, {42})}),
       "param0"));
 
-  // The return value of the computation is the zero-th element of the nested
+  // The return value of the computation is the zero-th elemnt of the nested
   // tuple. This element is itself a tuple.
   auto gte = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
       ShapeUtil::GetSubshape(param->shape(), {0}), param, 0));
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 328afe42ba..af726271ae 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -1303,7 +1303,7 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints,
     TF_RET_CHECK(LayoutUtil::HasLayout(instruction->shape()));
   }
 
-  // Copy the root instruction's result if the it does not match the result
+  // Copy the root instrucion's result if the it does not match the result
   // layout constraint
   if (constraints.ResultLayout() != nullptr &&
       !constraints.ResultLayout()->MatchesLayoutInShape(
diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h
index d752619bd6..bf8d190150 100644
--- a/tensorflow/compiler/xla/shape_tree.h
+++ b/tensorflow/compiler/xla/shape_tree.h
@@ -238,7 +238,7 @@ class ShapeTree {
   //           (or compatible).
   //   index : the index of the element in the shape. See ShapeUtil::GetSubshape
   //           for definition of index.
-  //   data : The data value at this element.
+  //   data : The data value at this elemnt.
   template <typename Fn>
   void ForEachElement(const Fn& func) const;
 
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 604c41bf8a..61f7821519 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -9,12 +9,7 @@ load("//third_party/mpi:mpi.bzl", "if_mpi")
 
 py_library(
     name = "contrib_py",
-    srcs = glob(
-        ["**/*.py"],
-        exclude = [
-            "**/*_test.py",
-        ],
-    ),
+    srcs = glob(["**/*.py"]),
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = [
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 481caf6bb0..77a3fc0c83 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -18,6 +18,7 @@ cmake_policy(SET CMP0022 NEW)
 
 # Options
 option(tensorflow_VERBOSE "Enable for verbose output" OFF)
+option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
 option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF)
 option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON)
 option(tensorflow_ENABLE_HDFS_SUPPORT "Enable HDFS support" OFF)
@@ -33,12 +34,6 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF)
 option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON)
 option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions")
 option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON)
-
-# GPU, CUDA and cuDNN options
-option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
-option(tensorflow_CUDA_VERSION "CUDA version to build against" 9.0)
-option(tensorflow_CUDNN_VERSION "cuDNN version to build against" 7)
-
 if(HAIKU)
 	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF)
 else()
@@ -267,7 +262,7 @@ if (tensorflow_ENABLE_GPU)
     list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs")
   endif (NOT WIN32)
 
-  find_package(CUDA ${tensorflow_CUDA_VERSION} REQUIRED)
+  find_package(CUDA 8.0 REQUIRED)
 
   # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
   # CUDA_NVCC_FLAGS and cuda_config.h below
@@ -321,16 +316,13 @@ if (tensorflow_ENABLE_GPU)
       ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY})
   endif (WIN32)
 
-  # Remove "." from CUDA version variable.
-  string(REPLACE "." "" short_CUDA_VER ${tensorflow_CUDA_VERSION})
-
   # create cuda_config.h
   FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
     "#ifndef CUDA_CUDA_CONFIG_H_\n"
     "#define CUDA_CUDA_CONFIG_H_\n"
     "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
-    "#define TF_CUDA_VERSION \"64_${short_CUDA_VER}\"\n"
-    "#define TF_CUDNN_VERSION \"64_${tensorflow_CUDNN_VERSION}\"\n"
+    "#define TF_CUDA_VERSION \"64_80\"\n"
+    "#define TF_CUDNN_VERSION \"64_6\"\n"
     "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
     "#endif  // CUDA_CUDA_CONFIG_H_\n"
   )
@@ -368,15 +360,15 @@ if (tensorflow_ENABLE_GPU)
   if(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
       msvcp_dll_name=msvcp140.dll
-      cudart_dll_name=cudart64_${short_CUDA_VER}.dll
-      cuda_version_number=${tensorflow_CUDA_VERSION}
+      cudart_dll_name=cudart64_80.dll
+      cuda_version_number=8.0
       nvcuda_dll_name=nvcuda.dll
-      cudnn_dll_name=cudnn64_${tensorflow_CUDNN_VERSION}.dll
-      cudnn_version_number=${tensorflow_CUDNN_VERSION})
+      cudnn_dll_name=cudnn64_6.dll
+      cudnn_version_number=6)
   else(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
-	    cuda_version_number=${tensorflow_CUDA_VERSION}
-	    cudnn_version_number=${tensorflow_CUDNN_VERSION})
+      cuda_version_number=8.0
+      cudnn_version_number=6)
   endif(WIN32)
 else(tensorflow_ENABLE_GPU)
   set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value
@@ -391,7 +383,11 @@ endif()
 
 # Let's get to work!
 include(tf_core_framework.cmake)
-include(tf_stream_executor.cmake)
+# NOTE: Disabled until issue #3996 is fixed.
+# include(tf_stream_executor.cmake)
+if (tensorflow_ENABLE_GPU)
+    include(tf_stream_executor.cmake)
+endif()
 
 include(tf_core_cpu.cmake)
 include(tf_core_ops.cmake)
diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 4be733a280..4ddfec5960 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -19,6 +19,23 @@ for instructions on how to install a pre-built TensorFlow package on Windows.
 ### Current known limitations
 * It is not possible to load a custom Op library.
 * GCS file system is not supported.
+* The following Ops are not currently implemented:
+ - Dequantize
+ - QuantizeAndDequantize
+ - QuantizedAvgPool
+ - QuantizedBatchNomWithGlobalNormalization
+ - QuantizedBiasAdd
+ - QuantizedConcat
+ - QuantizedConv2D
+ - QuantizedMatmul
+ - QuantizedMaxPoo
+ - QuantizeDownAndShrinkRange
+ - QuantizedRelu
+ - QuantizedRelu6
+ - QuantizedReshape
+ - QuantizeV2
+ - RequantizationRange
+ - Requantize
 
 ## Building with CMake
 
diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake
index a235442dc5..3b146657bf 100644
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@@ -14,8 +14,8 @@
 # ==============================================================================
 include (ExternalProject)
 
-set(gemmlowp_URL https://github.com/google/gemmlowp/archive/6a2a90822e8546fc2bfa7044de0faf1c1cb4862f.zip)
-set(gemmlowp_HASH SHA256=3447948d219f3270383766bbe08942888c0eb4e0ca6663c0e0548502ec5bb77d)
+set(gemmlowp_URL https://mirror.bazel.build/github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip)
+set(gemmlowp_HASH SHA256=dd2557072bde12141419cb8320a9c25e6ec41a8ae53c2ac78c076a347bb46d9d)
 set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index eb6bf567aa..2d015908a8 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -150,6 +150,9 @@ list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs})
 if(WIN32)
   file(GLOB_RECURSE tf_core_kernels_windows_exclude_srcs
       # not working on windows yet
+      "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
+      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
+      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
       "${tensorflow_source_dir}/tensorflow/core/kernels/neon/*"
       # not in core - those are loaded dynamically as dll
       "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc"
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 94ca4b0017..9ed5b4b9de 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -146,8 +146,6 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     "${tensorflow_source_dir}/tensorflow/contrib/estimator/python/estimator/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/kernel_tests/*.py"
     "${tensorflow_source_dir}/tensorflow/python/meta_graph_transform/*_test.py"
-    "${tensorflow_source_dir}/tensorflow/python/ops/quantized_conv_ops_test.py"
-    "${tensorflow_source_dir}/tensorflow/python/ops/quantized_ops_test.py"
     "${tensorflow_source_dir}/tensorflow/python/platform/build_info_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/internal/*_test.py"
diff --git a/tensorflow/contrib/factorization/python/ops/gmm.py b/tensorflow/contrib/factorization/python/ops/gmm.py
index f72280c4ec..0d67e09f81 100644
--- a/tensorflow/contrib/factorization/python/ops/gmm.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm.py
@@ -24,7 +24,7 @@ import numpy as np
 from tensorflow.contrib import framework
 from tensorflow.contrib.factorization.python.ops import gmm_ops
 from tensorflow.contrib.framework.python.framework import checkpoint_utils
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
 from tensorflow.python.framework import constant_op
@@ -167,7 +167,7 @@ class GMM(estimator.Estimator):
                                      self._num_clusters, self._random_seed,
                                      self._covariance_type,
                                      self._params)
-      incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
+      incr_step = state_ops.assign_add(variables.get_global_step(), 1)
       loss = math_ops.reduce_sum(losses)
       training_op = with_dependencies([training_op, incr_step], loss)
       training_hooks = [_InitializeClustersHook(
diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py
index daba965a98..484ffee3e7 100644
--- a/tensorflow/contrib/ffmpeg/__init__.py
+++ b/tensorflow/contrib/ffmpeg/__init__.py
@@ -28,7 +28,6 @@ from __future__ import print_function
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio
-from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
 
 from tensorflow.python.util.all_util import remove_undocumented
 
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
index 1e8af1458c..1245f515fe 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
@@ -49,8 +49,7 @@ std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
           "-nostdin",             // No interactive commands accepted.
           "-f", input_format_id,  // eg: "mp3"
           "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
-          "-loglevel", "error",   // Print errors only.
-          "-hide_banner",         // Skip printing build options, version, etc.
+          "-loglevel", "info",  // Enable verbose logging to support debugging.
           "-map_metadata", "-1",  // Copy global metadata from input to output.
           "-vn",                  // No video recording.
           "-ac:a:0", StrCat(channel_count), "-ar:a:0",
@@ -73,8 +72,7 @@ std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
           "-probesize",
           StrCat(kDefaultProbeSize),
           "-loglevel",
-          "error",  // Print errors only.
-          "-hide_banner",  // Skip printing build options, version, etc.
+          "info",  // Enable verbose logging to support debugging.
           "-vcodec",
           "rawvideo",
           "-pix_fmt",
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
index 36fc71794b..d6c885a324 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
@@ -20,8 +20,6 @@
 #include <string>
 #include <vector>
 
-
-#include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
index 08b5a6ea48..5bb011f41c 100644
--- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
@@ -21,7 +21,6 @@ from __future__ import print_function
 from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py
 from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
 from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py
-from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
 from tensorflow.contrib.util import loader
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import resource_loader
diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index d3dca3d9e7..eef66af7f9 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -107,7 +107,6 @@ class GANEstimator(estimator.Estimator):
                discriminator_loss_fn=None,
                generator_optimizer=None,
                discriminator_optimizer=None,
-               get_hooks_fn=None,
                add_summaries=None,
                use_loss_summaries=True,
                config=None):
@@ -138,10 +137,6 @@ class GANEstimator(estimator.Estimator):
         work.
       discriminator_optimizer: Same as `generator_optimizer`, but for the
         discriminator updates.
-      get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a
-        list of hooks. These hooks are run on the generator and discriminator
-        train ops, and can be used to implement the GAN training scheme.
-        Defaults to `train.get_sequential_train_hooks()`.
       add_summaries: `None`, a single `SummaryType`, or a list of `SummaryType`.
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
@@ -156,7 +151,7 @@ class GANEstimator(estimator.Estimator):
               else discriminator_optimizer)
       gan_head = head_lib.gan_head(
           generator_loss_fn, discriminator_loss_fn, gopt, dopt,
-          use_loss_summaries, get_hooks_fn=get_hooks_fn)
+          use_loss_summaries)
       return _gan_model_fn(
           features, labels, mode, generator_fn, discriminator_fn, gan_head,
           add_summaries)
diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
index a21358c50b..204c646e19 100644
--- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
@@ -71,7 +71,7 @@ class GANHead(head._Head):  # pylint: disable=protected-access
   def __init__(self, generator_loss_fn, discriminator_loss_fn,
                generator_optimizer, discriminator_optimizer,
                use_loss_summaries=True,
-               get_hooks_fn=None,
+               get_hooks_fn=tfgan_train.get_sequential_train_hooks(),
                name=None):
     """`Head` for GAN training.
 
@@ -86,12 +86,10 @@ class GANHead(head._Head):  # pylint: disable=protected-access
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
       get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list
-        of hooks. Defaults to `train.get_sequential_train_hooks()`
+        of hooks.
       name: name of the head. If provided, summary and metrics keys will be
         suffixed by `"/" + name`.
     """
-    if get_hooks_fn is None:
-      get_hooks_fn = tfgan_train.get_sequential_train_hooks()
     # TODO(joelshor): Validate inputs.
 
     if use_loss_summaries in [True, False]:
diff --git a/tensorflow/contrib/layers/python/layers/initializers.py b/tensorflow/contrib/layers/python/layers/initializers.py
index 51610f21b2..b12a882d9a 100644
--- a/tensorflow/contrib/layers/python/layers/initializers.py
+++ b/tensorflow/contrib/layers/python/layers/initializers.py
@@ -79,8 +79,7 @@ def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False,
   ```
 
   * To get [Delving Deep into Rectifiers](
-     http://arxiv.org/pdf/1502.01852v1.pdf) (also know as the "MSRA 
-     initialization"), use (Default):<br/>
+     http://arxiv.org/pdf/1502.01852v1.pdf), use (Default):<br/>
     `factor=2.0 mode='FAN_IN' uniform=False`
   * To get [Convolutional Architecture for Fast Feature Embedding](
      http://arxiv.org/abs/1408.5093), use:<br/>
diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
index ef5e620e8f..14750961ef 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.datasets import base
 from tensorflow.contrib.learn.python.learn.estimators import composable_model
@@ -55,7 +55,7 @@ def _base_model_fn(features, labels, mode, params):
     raise NotImplementedError
 
   def _train_op_fn(loss):
-    global_step = training_util.get_global_step()
+    global_step = contrib_variables.get_global_step()
     assert global_step
     train_step = model.get_train_step(loss)
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
index c17b41c0f7..cb15ef23e9 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
@@ -23,7 +23,7 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import metric_spec
@@ -189,7 +189,7 @@ def _dnn_model_fn(features, labels, mode, params, config=None):
       """Returns the op to optimize the loss."""
       return optimizers.optimize_loss(
           loss=loss,
-          global_step=training_util.get_global_step(),
+          global_step=contrib_variables.get_global_step(),
           learning_rate=_LEARNING_RATE,
           optimizer=_get_optimizer(optimizer),
           gradient_multipliers=(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
index 9d7c1a099a..248c6c733f 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
@@ -23,7 +23,7 @@ import tempfile
 
 import numpy as np
 
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import metric_spec
 from tensorflow.contrib.learn.python.learn import models
@@ -114,7 +114,7 @@ def linear_model_params_fn(features, labels, mode, params):
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
       loss,
-      training_util.get_global_step(),
+      variables.get_global_step(),
       optimizer='Adagrad',
       learning_rate=params['learning_rate'])
   return prediction, loss, train_op
@@ -129,7 +129,7 @@ def linear_model_fn(features, labels, mode):
     (_, features), = features.items()
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return prediction, loss, train_op
 
 
@@ -139,7 +139,7 @@ def linear_model_fn_with_model_fn_ops(features, labels, mode):
                   model_fn.ModeKeys.INFER)
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return model_fn.ModelFnOps(
       mode=mode, predictions=prediction, loss=loss, train_op=train_op)
 
@@ -150,7 +150,7 @@ def logistic_model_no_mode_fn(features, labels):
   labels = array_ops.one_hot(labels, 3, 1, 0)
   prediction, loss = (models.logistic_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return {
       'class': math_ops.argmax(prediction, 1),
       'prob': prediction
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
index 2a13a84627..be2b0cb3ca 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@@ -32,7 +32,7 @@ from google.protobuf import text_format
 
 from tensorflow.contrib import learn
 from tensorflow.contrib import lookup
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables
 from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import experiment
@@ -132,7 +132,7 @@ def linear_model_params_fn(features, labels, mode, params):
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
       loss,
-      training_util.get_global_step(),
+      variables.get_global_step(),
       optimizer='Adagrad',
       learning_rate=params['learning_rate'])
   return prediction, loss, train_op
@@ -147,7 +147,7 @@ def linear_model_fn(features, labels, mode):
     (_, features), = features.items()
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return prediction, loss, train_op
 
 
@@ -157,7 +157,7 @@ def linear_model_fn_with_model_fn_ops(features, labels, mode):
                   model_fn.ModeKeys.INFER)
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return model_fn.ModelFnOps(
       mode=mode, predictions=prediction, loss=loss, train_op=train_op)
 
@@ -168,7 +168,7 @@ def logistic_model_no_mode_fn(features, labels):
   labels = array_ops.one_hot(labels, 3, 1, 0)
   prediction, loss = (models.logistic_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return {
       'class': math_ops.argmax(prediction, 1),
       'prob': prediction
@@ -241,7 +241,7 @@ def _build_estimator_for_resource_export_test():
     const = constant_op.constant(-1, dtype=dtypes.int64)
     table = lookup.MutableHashTable(
         dtypes.string, dtypes.int64, const, name='LookupTableModel')
-    update_global_step = training_util.get_global_step().assign_add(1)
+    update_global_step = variables.get_global_step().assign_add(1)
     if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL):
       key = constant_op.constant(['key'])
       value = constant_op.constant([42], dtype=dtypes.int64)
@@ -306,7 +306,7 @@ def _model_fn_ops(
         mode=mode,
         predictions=constant_op.constant(0.),
         loss=constant_op.constant(0.),
-        train_op=training_util.get_global_step().assign_add(1))
+        train_op=variables.get_global_step().assign_add(1))
 
 
 def _make_input_fn(features, labels):
@@ -389,7 +389,7 @@ class EstimatorModelFnTest(test.TestCase):
       self.assertEqual(expected_param, params)
       self.assertEqual(model_dir, expected_model_dir)
       return (constant_op.constant(0.), constant_op.constant(0.),
-              training_util.get_global_step().assign_add(1))
+              variables.get_global_step().assign_add(1))
     est = estimator.Estimator(model_fn=_argument_checker,
                               params=expected_param,
                               model_dir=expected_model_dir)
@@ -400,7 +400,7 @@ class EstimatorModelFnTest(test.TestCase):
     def _invalid_model_fn(features, labels):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
-      update_global_step = training_util.get_global_step().assign_add(1)
+      update_global_step = variables.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         loss = 100.0 - w
       return None, loss, None
@@ -415,7 +415,7 @@ class EstimatorModelFnTest(test.TestCase):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
       loss = 100.0 - w
-      update_global_step = training_util.get_global_step().assign_add(1)
+      update_global_step = variables.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         train_op = w.assign_add(loss / 100.0)
       predictions = loss
@@ -434,7 +434,7 @@ class EstimatorModelFnTest(test.TestCase):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
       loss = 100.0 - w
-      update_global_step = training_util.get_global_step().assign_add(1)
+      update_global_step = variables.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         train_op = w.assign_add(loss / 100.0)
       return None, loss, train_op
@@ -464,7 +464,7 @@ class EstimatorModelFnTest(test.TestCase):
           mode=mode,
           predictions=constant_op.constant(0.),
           loss=constant_op.constant(0.),
-          train_op=training_util.get_global_step().assign_add(1),
+          train_op=variables.get_global_step().assign_add(1),
           scaffold=monitored_session.Scaffold(init_fn=_init_fn))
 
     est = estimator.Estimator(model_fn=_model_fn_scaffold)
@@ -483,7 +483,7 @@ class EstimatorModelFnTest(test.TestCase):
           mode=mode,
           predictions=constant_op.constant([[1.]]),
           loss=constant_op.constant(0.),
-          train_op=training_util.get_global_step().assign_add(1),
+          train_op=variables.get_global_step().assign_add(1),
           scaffold=monitored_session.Scaffold(saver=self.mock_saver))
 
     def input_fn():
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
index 8131e0fde6..1d89dfb55b 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
@@ -22,7 +22,7 @@ import random
 
 import numpy as np
 
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables
 from tensorflow.contrib.learn.python import learn
 from tensorflow.contrib.learn.python.learn import datasets
 from tensorflow.contrib.learn.python.learn import metric_spec
@@ -62,7 +62,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["transformed_x"]
       loss = constant_op.constant([2.])
-      update_global_step = training_util.get_global_step().assign_add(1)
+      update_global_step = variables.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator = estimator_lib.Estimator(
@@ -100,7 +100,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["x"]
       loss = constant_op.constant([2.])
-      update_global_step = training_util.get_global_step().assign_add(1)
+      update_global_step = variables.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator = estimator_lib.Estimator(
@@ -139,7 +139,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["x"]
       loss = constant_op.constant([2.])
-      update_global_step = training_util.get_global_step().assign_add(1)
+      update_global_step = variables.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator_with_fe_fn = estimator_lib.Estimator(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
index 8f9d6fc318..992b804f59 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
@@ -28,7 +28,7 @@ import time
 import numpy as np
 
 from tensorflow.contrib.factorization.python.ops import clustering_ops
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators.model_fn import ModelFnOps
 from tensorflow.python.framework import ops
@@ -128,7 +128,7 @@ def _kmeans_clustering_model_fn(features, labels, mode, params, config):
        random_seed=params.get('random_seed'),
        kmeans_plus_plus_num_retries=params.get(
            'kmeans_plus_plus_num_retries')).training_graph()
-  incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
+  incr_step = state_ops.assign_add(variables.get_global_step(), 1)
   loss = math_ops.reduce_sum(losses, name=KMeansClustering.LOSS_OP_NAME)
   summary.scalar('loss/raw', loss)
   training_op = with_dependencies([training_op, incr_step], loss)
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py
index 37aa8b3396..f5445ad4e7 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@@ -26,7 +26,7 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
@@ -170,7 +170,7 @@ def _linear_model_fn(features, labels, mode, params, config=None):
           weight_collections=[parent_scope])
 
     def _train_op_fn(loss):
-      global_step = training_util.get_global_step()
+      global_step = contrib_variables.get_global_step()
       my_vars = ops.get_collection(parent_scope)
       grads = gradients.gradients(loss, my_vars)
       if gradient_clip_norm:
@@ -252,7 +252,7 @@ def sdca_model_fn(features, labels, mode, params):
     _add_bias_column(feature_columns, features, bias, columns_to_variables)
 
   def _train_op_fn(unused_loss):
-    global_step = training_util.get_global_step()
+    global_step = contrib_variables.get_global_step()
     sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
                                                     weight_column_name,
                                                     loss_type, features,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
index 656d68b768..93c62f87e8 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
@@ -21,7 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib import layers
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn.datasets import base
 from tensorflow.contrib.learn.python.learn.estimators import logistic_regressor
@@ -57,7 +57,7 @@ def _logistic_regression_model_fn(features, labels, mode):
   predictions = math_ops.sigmoid(logits)
   loss = losses.sigmoid_cross_entropy(labels, logits)
   train_op = optimizers.optimize_loss(
-      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return predictions, loss, train_op
 
 
diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py
index cb34cb1d26..6af2287761 100644
--- a/tensorflow/contrib/learn/python/learn/utils/export.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export.py
@@ -20,7 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.framework import deprecated
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 from tensorflow.contrib.session_bundle import exporter
 from tensorflow.contrib.session_bundle import gc
 from tensorflow.python.client import session as tf_session
@@ -78,7 +78,7 @@ def _export_graph(graph, saver, checkpoint_path, export_dir,
           default_graph_signature=default_graph_signature,
           named_graph_signatures=named_graph_signatures,
           assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS))
-      return export.export(export_dir, training_util.get_global_step(),
+      return export.export(export_dir, contrib_variables.get_global_step(),
                            session, exports_to_keep=exports_to_keep)
 
 
@@ -295,7 +295,7 @@ def _export_estimator(estimator,
   checkpoint_path = (checkpoint_path or
                      tf_saver.latest_checkpoint(estimator._model_dir))
   with ops.Graph().as_default() as g:
-    training_util.create_global_step(g)
+    contrib_variables.create_global_step(g)
 
     if use_deprecated_input_fn:
       examples = array_ops.placeholder(dtype=dtypes.string,
diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
index 05794a42c5..701fc1c059 100644
--- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
+++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
@@ -19,7 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib import layers
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.contrib.learn.python.learn.estimators import prediction_key
@@ -154,7 +154,7 @@ def sdca_model_fn(features, labels, mode, params, config=None):
     _add_bias_column(feature_columns, features, bias, columns_to_variables)
 
   def _train_op_fn(unused_loss):
-    global_step = training_util.get_global_step()
+    global_step = contrib_variables.get_global_step()
     sdca_model, train_op = optimizer.get_train_step(
         columns_to_variables, weight_column_name, loss_type, features, labels,
         global_step)
diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index 2fb40070cb..fc9144d5fc 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -167,7 +167,6 @@ graphviz, or [in tensorboard](https://codelabs.developers.google.com/codelabs/te
 This frozen Graphdef is now ready to be converted to flatbuffer format (.lite) for use on Android or iOS.  On Android users have the flexibility to use either the float or quantized versions of the frozen graphdef, if available, using the Tensorflow Optimizing Converter tool.
 
 Here is a sample command line to convert the frozen Graphdef to '.lite' format for  The Tensorflow Optimizing Converter supports both float and quantized models, however, different configuration parameters are needed depending on whether a FLOAT or QUANTIZED mode is being used.
-(Here is a link to the pb [file](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz)).
 
 ```
 bazel build tensorflow/contrib/lite/toco:toco
@@ -216,7 +215,3 @@ Note that you'd need to follow instructions for installing TensorFlow on Android
 
 ### For iOS
 Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app.
-
-## Core ML support
-
-Core ML is a machine learning framework used across Apple products. In addition to using Tensorflow Lite models directly in their applications, developers have the option to convert their trained Tensorflow models to the [CoreML](https://developer.apple.com/machine-learning/) format for use on Apple devices. For information on how to use the converter please refer to the [Tensorflow-CoreML converter documentation](https://github.com/tf-coreml/tf-coreml).
diff --git a/tensorflow/contrib/lite/ios_makefile.inc b/tensorflow/contrib/lite/ios_makefile.inc
index bcff7ed988..345ed26212 100644
--- a/tensorflow/contrib/lite/ios_makefile.inc
+++ b/tensorflow/contrib/lite/ios_makefile.inc
@@ -1,47 +1,31 @@
-# Settings for iOS.
-ifeq ($(TARGET), IOS)
-        BUILD_FOR_IOS_SIMULATOR := false
-	ifeq ($(IOS_ARCH), x86_64)
-	     	BUILD_FOR_IOS_SIMULATOR := true
-	endif
-	ifeq ($(IOS_ARCH), i386)
-	     	BUILD_FOR_IOS_SIMULATOR := true
-	endif
-	ifeq ($(BUILD_FOR_IOS_SIMULATOR), true)
-		IPHONEOS_PLATFORM := $(shell xcrun --sdk iphonesimulator \
-			--show-sdk-platform-path)
-		IPHONEOS_SYSROOT := $(shell xcrun --sdk iphonesimulator \
-			--show-sdk-path)
-	else
-		IPHONEOS_PLATFORM := $(shell xcrun --sdk iphoneos --show-sdk-platform-path)
-		IPHONEOS_SYSROOT := $(shell xcrun --sdk iphoneos --show-sdk-path)
-	endif
-	IOS_SDK_VERSION := $(shell xcrun --sdk iphoneos --show-sdk-version)
-	MIN_SDK_VERSION := 9.0
-	# Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64.
-	IOS_ARCH := x86_64
-	CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
-		-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \
-		-fembed-bitcode \
-		-Wno-c++11-narrowing \
-		-mno-thumb \
-		-fno-exceptions \
-		-isysroot \
-		${IPHONEOS_SYSROOT} \
-		-arch $(IOS_ARCH) \
-		-O3
-	CCFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
-		-fembed-bitcode \
-		-mno-thumb \
-		-isysroot \
-		${IPHONEOS_SYSROOT} \
-		-arch $(IOS_ARCH) \
-		-O3
-	LDFLAGS := -fembed-bitcode \
-		-miphoneos-version-min=${MIN_SDK_VERSION} \
-		-arch $(IOS_ARCH)
-	OBJDIR := $(OBJDIR)ios_$(IOS_ARCH)/
-	LIBDIR := $(LIBDIR)ios_$(IOS_ARCH)/
-	BINDIR := $(BINDIR)ios_$(IOS_ARCH)/
-	DEPDIR := $(DEPDIR)ios_$(IOS_ARCH)/
-endif
+#Settings for iOS.
+ifeq($(TARGET), IOS) BUILD_FOR_IOS_SIMULATOR
+    : = false ifeq($(IOS_ARCH), x86_64) BUILD_FOR_IOS_SIMULATOR
+    : = true endif ifeq($(IOS_ARCH), i386) BUILD_FOR_IOS_SIMULATOR
+    : = true endif ifeq($(BUILD_FOR_IOS_SIMULATOR), true) IPHONEOS_PLATFORM
+    : = $(shell xcrun-- sdk iphonesimulator-- show - sdk - platform -
+          path) IPHONEOS_SYSROOT
+    : = $(shell xcrun-- sdk iphonesimulator-- show - sdk -
+          path) else IPHONEOS_PLATFORM
+    : = $(shell xcrun-- sdk iphoneos-- show - sdk - platform -
+          path) IPHONEOS_SYSROOT
+    : = $(shell xcrun-- sdk iphoneos-- show - sdk - path) endif IOS_SDK_VERSION
+    : = $(shell xcrun-- sdk iphoneos-- show - sdk - version) MIN_SDK_VERSION
+    : = 9.0
+#Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64.
+      IOS_ARCH
+    : = x86_64 CXXFLAGS
+      += -miphoneos - version
+         - min = $(MIN_SDK_VERSION) - DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
+                 - fembed - bitcode - Wno - c++ 11 - narrowing - mno - thumb
+                 - fno - exceptions
+                 - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) - O3 CCFLAGS
+      += -miphoneos - version
+         - min = $(MIN_SDK_VERSION) - fembed - bitcode - mno - thumb
+                 - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) -
+                 O3 LDFLAGS
+    : = -fembed - bitcode - miphoneos - version
+        - min = ${MIN_SDK_VERSION} - arch $(IOS_ARCH) OBJDIR
+    : = $(OBJDIR) ios_$(IOS_ARCH) / LIBDIR
+    : = $(LIBDIR) ios_$(IOS_ARCH) / BINDIR
+    : = $(BINDIR) ios_$(IOS_ARCH) / DEPDIR : = $(DEPDIR) ios_$(IOS_ARCH) / endif
diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index 8066889078..bdb5e01538 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -108,7 +108,7 @@ enum {
  * The type of operations that can be added to a model.
  */
 enum {
-  /** Adds two tensors, element-wise.
+  /** Adds two tensors, elment-wise.
    *
    * Takes two input tensors of identical type and compatible dimensions. The
    * output is the sum of both input tensors, optionally modified by an
@@ -743,7 +743,7 @@ enum {
    */
   ANEURALNETWORKS_MAX_POOL_2D = 17,
 
-  /** Multiplies two tensors, element-wise.
+  /** Multiplies two tensors, elment-wise.
    *
    * Takes two input tensors of identical type and compatible dimensions. The
    * output is the product of both input tensors, optionally modified by an
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index ee84b5b4c8..e2e6c05591 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -300,7 +300,7 @@ ifeq ($(TARGET),ANDROID)
 	ifeq ($(ANDROID_ARCH),x86_64)
 		TOOLCHAIN := x86_64-4.9
 		SYSROOT_ARCH := x86_64
-		BIN_PREFIX := x86_64-linux-android
+		BIN_PREFIX := x86-64-linux-android
 		MARCH_OPTION :=
 	endif
     
diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py
index 96d60e1498..0bc133a00e 100644
--- a/tensorflow/contrib/nn/__init__.py
+++ b/tensorflow/contrib/nn/__init__.py
@@ -21,7 +21,6 @@
 @@deprecated_flipped_sigmoid_cross_entropy_with_logits
 @@nth_element
 @@rank_sampled_softmax_loss
-@@sampled_sparse_softmax_loss
 @@scaled_softplus
 """
 
diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py
index 63fc487dca..98749cff7e 100644
--- a/tensorflow/contrib/nn/python/ops/sampling_ops.py
+++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py
@@ -24,8 +24,6 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
-from tensorflow.python.ops import nn_impl
-from tensorflow.python.ops import nn_ops
 
 
 def _rank_resample(weights, biases, inputs, sampled_values, num_resampled,
@@ -242,101 +240,3 @@ def rank_sampled_softmax_loss(weights,
         remove_accidental_hits=remove_accidental_hits,
         partition_strategy=partition_strategy,
         name=name)
-
-
-def sampled_sparse_softmax_loss(weights,
-                                biases,
-                                labels,
-                                inputs,
-                                num_sampled,
-                                num_classes,
-                                sampled_values=None,
-                                remove_accidental_hits=True,
-                                partition_strategy="mod",
-                                name="sampled_sparse_softmax_loss"):
-  """Computes and returns the sampled sparse softmax training loss.
-
-  This is a faster way to train a softmax classifier over a huge number of
-  classes.
-
-  This operation is for training only.  It is generally an underestimate of
-  the full softmax loss.
-
-  A common use case is to use this method for training, and calculate the full
-  softmax loss for evaluation or inference. In this case, you must set
-  `partition_strategy="div"` for the two losses to be consistent, as in the
-  following example:
-
-  ```python
-  if mode == "train":
-    loss = tf.nn.sampled_sparse_softmax_loss(
-        weights=weights,
-        biases=biases,
-        labels=labels,
-        inputs=inputs,
-        ...,
-        partition_strategy="div")
-  elif mode == "eval":
-    logits = tf.matmul(inputs, tf.transpose(weights))
-    logits = tf.nn.bias_add(logits, biases)
-    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
-        labels=tf.squeeze(labels),
-        logits=logits)
-  ```
-
-  See our [Candidate Sampling Algorithms Reference]
-  (https://www.tensorflow.org/extras/candidate_sampling.pdf)
-
-  Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
-  ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
-
-  Args:
-    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
-        objects whose concatenation along dimension 0 has shape
-        [num_classes, dim].  The (possibly-sharded) class embeddings.
-    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
-    labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`.
-        The index of the single target class for each row of logits.  Note that
-        this format differs from the `labels` argument of
-        `nn.sparse_softmax_cross_entropy_with_logits`.
-    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
-        activations of the input network.
-    num_sampled: An `int`.  The number of classes to randomly sample per batch.
-    num_classes: An `int`. The number of possible classes.
-    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
-        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
-        (if None, we default to `log_uniform_candidate_sampler`)
-    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
-        where a sampled class equals one of the target classes.  Default is
-        True.
-    partition_strategy: A string specifying the partitioning strategy, relevant
-        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
-        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
-    name: A name for the operation (optional).
-
-  Returns:
-    A `batch_size` 1-D tensor of per-example sampled softmax losses.
-
-  """
-  logits, _ = nn_impl._compute_sampled_logits(
-      weights=weights,
-      biases=biases,
-      labels=labels,
-      inputs=inputs,
-      num_sampled=num_sampled,
-      num_classes=num_classes,
-      num_true=1,
-      sampled_values=sampled_values,
-      subtract_log_q=True,
-      remove_accidental_hits=remove_accidental_hits,
-      partition_strategy=partition_strategy,
-      name=name)
-
-  # There is only one true label. _compute_sampled_logits puts the true logit
-  # at index 0.
-  labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64)
-
-  sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
-      labels=array_ops.squeeze(labels), logits=logits)
-  # sampled_losses is a [batch_size] tensor.
-  return sampled_losses
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index 9c961f2b9c..d2811f21af 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -16,7 +16,6 @@ py_library(
         "__init__.py",
         "python/training/addsign.py",
         "python/training/drop_stale_gradient_optimizer.py",
-        "python/training/elastic_average_optimizer.py",
         "python/training/external_optimizer.py",
         "python/training/lazy_adam_optimizer.py",
         "python/training/moving_average_optimizer.py",
@@ -175,24 +174,6 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
-    name = "elastic_average_optimizer_test",
-    srcs = ["python/training/elastic_average_optimizer_test.py"],
-    additional_deps = [
-        ":opt_py",
-        "//tensorflow/python:client",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:variables",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:training",
-        "//tensorflow/python:ops",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//third_party/py/numpy",
-    ],
-)
-
 py_test(
     name = "sign_decay_test",
     srcs = ["python/training/sign_decay_test.py"],
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 2025e8b4fc..04643a6058 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -1,4 +1,4 @@
-    # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -28,7 +28,6 @@ from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
 from tensorflow.contrib.opt.python.training.nadam_optimizer import *
 from tensorflow.contrib.opt.python.training.powersign import *
 from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import *
-from tensorflow.contrib.opt.python.training.elastic_average_optimizer import *
 # pylint: enable=wildcard-import
 
 from tensorflow.python.util.all_util import remove_undocumented
@@ -47,8 +46,6 @@ _allowed_symbols = [
     'VariableClippingOptimizer',
     'MultitaskOptimizerWrapper',
     'clip_gradients_by_global_norm',
-    'ElasticAverageOptimizer', 
-    'ElasticAverageCustomGetter'
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
deleted file mode 100644
index 9941f22b1f..0000000000
--- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
+++ /dev/null
@@ -1,345 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Wrapper optimizer for Elastic Average SGD """
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import math_ops
-
-from tensorflow.python.ops import gen_nn_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.training import optimizer
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import data_flow_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import constant_op
-
-LOCAL_VARIABLE_NAME = 'local_center_variable'
-GLOBAL_VARIABLE_NAME = 'global_center_variable'
-
-
-class ElasticAverageCustomGetter(object):
-  """Custom_getter class is used to do:
-  1. Change trainable variables to local collection and place them at worker
-    device
-  2. Generate global variables(global center variables)
-  3. Generate local variables(local center variables) which record the global
-    variables and place them at worker device
-    Notice that the class should be used with tf.replica_device_setter,
-    so that the global center variables and global step variable can be placed
-    at ps device. Besides, use 'tf.get_variable' instead of 'tf.Variable' to
-    use this custom getter.
-
-  For example,
-  ea_custom_getter = ElasticAverageCustomGetter(worker_device)
-  with tf.device(
-    tf.train.replica_device_setter(
-      worker_device=worker_device,
-      ps_device="/job:ps/cpu:0",
-      cluster=cluster)),
-    tf.variable_scope('',custom_getter=ea_custom_getter):
-    hid_w = tf.get_variable(
-      initializer=tf.truncated_normal(
-          [IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units],
-          stddev=1.0 / IMAGE_PIXELS),
-      name="hid_w")
-    hid_b = tf.get_variable(initializer=tf.zeros([FLAGS.hidden_units]),
-                            name="hid_b")
-  """
-
-  def __init__(self, worker_device):
-    """Create a new `ElasticAverageCustomGetter`.
-
-    Args:
-      worker_device: String.  Name of the `worker` job.
-    """
-    self._worker_device = worker_device
-    self._local_map = {}
-    self._global_map = {}
-
-  def __call__(self, getter, name, trainable, collections, *args, **kwargs):
-    if trainable:
-      with ops.device(self._worker_device):
-        local_var = getter(name, trainable=True,
-                           collections=[ops.GraphKeys.LOCAL_VARIABLES], 
-                           *args, **kwargs)
-        
-      global_center_variable = variable_scope.variable(
-        name='%s/%s' %
-             (GLOBAL_VARIABLE_NAME,
-              name),
-        initial_value=local_var.initialized_value(),
-        trainable=False,
-        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
-
-      with ops.device(self._worker_device):
-        local_center_variable = variable_scope.variable(
-          name='%s/%s' % (LOCAL_VARIABLE_NAME, name),
-          initial_value=local_var.initialized_value(),
-          trainable=False,
-          collections=[ops.GraphKeys.LOCAL_VARIABLES])
-        
-      self._local_map[local_var] = local_center_variable
-      self._global_map[local_var] = global_center_variable
-      return local_var
-    else:
-      return getter(name, trainable, collections, *args, **kwargs)
-
-
-class ElasticAverageOptimizer(optimizer.Optimizer):
-  """Wrapper optimizer that implements the Elastic Average SGD algorithm.
-  This is an async optimizer. During the training, Each worker will update
-  the local variables and maintains its own local_step, which starts from 0
-  and is incremented by 1 after each update of local variables. Whenever
-  the communication period divides the local step, the worker requests
-  the current global center variables and then computed the elastic difference
-  between global center variables and local variables. The elastic difference
-  then be used to update both local variables and global variables.
-  """
-
-  # Default value as paper described
-  BETA = 0.9
-
-  def __init__(
-      self,
-      opt,
-      num_worker,
-      ea_custom_getter,
-      communication_period=10,
-      moving_rate=None,
-      rho=None,
-      use_locking=True,
-      name="ElasticAverageOptimizer"):
-    """Construct a new gradient descent optimizer.
-
-    Args:
-      opt: The actual optimizer that will be used to update local variables.
-        Must be one of the Optimizer classes.
-      num_worker: The number of workers
-      ea_custom_getter: The ElasticAverageCustomGetter
-      communication_period: An int point value to controls the frequency
-        of the communication between every worker and the ps.
-      moving_rate: A floating point value to control the elastic difference.
-      rho: the amount of exploration we allow ine the model. The default
-        value is moving_rate/learning_rate
-      use_locking: If True use locks for update operations.
-      name: Optional name prefix for the operations created when applying
-        gradients. Defaults to "ElasticAverageOptimizer".
-    """
-    super(ElasticAverageOptimizer, self).__init__(use_locking, name)
-    self._opt = opt
-    self._num_worker = num_worker
-    self._period = communication_period
-    self._local_map = ea_custom_getter._local_map
-    self._global_map = ea_custom_getter._global_map
-
-    if moving_rate is None:
-      self._moving_rate = BETA / communication_period / num_worker
-    else:
-      self._moving_rate = moving_rate
-    if rho is None:
-      self._rho = self._moving_rate / self._opt._learning_rate
-    else:
-      self._rho = rho
-
-    self._local_step = variable_scope.get_variable(
-      initializer=0,
-      trainable=False,
-      collections=[ops.GraphKeys.LOCAL_VARIABLES],
-      name="local_step")
-    self._opt._prepare()
-
-  def compute_gradients(self, loss, var_list=None,
-                        gate_gradients=optimizer.Optimizer.GATE_OP,
-                        aggregation_method=None,
-                        colocate_gradients_with_ops=False,
-                        grad_loss=None):
-    """Compute gradients of `loss` for the variables in `var_list`.
-    
-    Add rho*elastic_difference to loss to control the exploration
-    This is the first part of `minimize()`.  It returns a list
-    of (gradient, variable) pairs where "gradient" is the gradient
-    for "variable".  Note that "gradient" can be a `Tensor`, an
-    `IndexedSlices`, or `None` if there is no gradient for the
-    given variable.
-
-    Args:
-      loss: A Tensor containing the value to minimize.
-      var_list: Optional list or tuple of `tf.Variable` to update to minimize
-        `loss`.  Defaults to the list of variables collected in the graph
-        under the key `GraphKey.TRAINABLE_VARIABLES`.
-      gate_gradients: How to gate the computation of gradients.  Can be
-        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
-      aggregation_method: Specifies the method used to combine gradient terms.
-        Valid values are defined in the class `AggregationMethod`.
-      colocate_gradients_with_ops: If True, try colocating gradients with
-        the corresponding op.
-      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
-
-    Returns:
-      A list of (gradient, variable) pairs. Variable is always present, but
-      gradient can be `None`.
-
-    Raises:
-      TypeError: If `var_list` contains anything else than `Variable` objects.
-      ValueError: If some arguments are invalid.
-    """
-    if not var_list:
-      var_list = variables.trainable_variables()
-      
-    elastic_difference = [math_ops.subtract(v, lv) for v, lv in zip(
-      variables.trainable_variables(),
-      [self._local_map[var] for var in var_list])]
-
-    distance_loss = self._rho * math_ops.add_n(
-                      [gen_nn_ops.l2_loss(ed) for ed in elastic_difference])
-
-    total_loss = loss + distance_loss
-    return self._opt.compute_gradients(total_loss, var_list,
-                                       gate_gradients, aggregation_method,
-                                       colocate_gradients_with_ops, grad_loss)
-
-  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
-    """Apply gradients to global variables.
-
-    This is the second part of `minimize()`. It returns an `Operation` that
-    applies gradients.
-
-    Args:
-      grads_and_vars: List of (gradient, variable) pairs as returned by
-        `compute_gradients()`.
-      global_step: Optional `Variable` to increment by one after the
-        variables have been updated.
-      name: Optional name for the returned operation.  Default to the
-        name passed to the `Optimizer` constructor.
-
-    Returns:
-      An `Operation` that applies the specified gradients. If `global_step`
-      was not None, that operation also increments `global_step`.
-
-    Raises:
-      TypeError: If `grads_and_vars` is malformed.
-      ValueError: If none of the variables have gradients.
-    """
-    apply_updates = self._opt.apply_gradients(grads_and_vars)
-    with ops.control_dependencies([apply_updates]):
-      local_update = state_ops.assign_add(
-        self._local_step, 1, name='local_step_update').op
-
-    # update global variables.
-    def _Update_global_variables():
-      local_vars = [v for g, v in grads_and_vars if g is not None]
-      global_center_vars = [self._global_map[var] for var in local_vars]
-      local_center_vars = [self._local_map[var] for var in local_vars]
-      local_center_vars_update = []
-      for lvar, var in zip(local_center_vars, global_center_vars):
-        local_center_vars_update.append(lvar.assign(var))
-      update_ops = []
-      differences = []
-      with ops.control_dependencies(local_center_vars_update):
-        for v, lv in zip(local_vars, local_center_vars):
-          with ops.device(v.device):
-            differences.append(math_ops.subtract(v, lv))
-        for lvar, diff in zip(local_vars, differences):
-          with ops.device(lvar.device):
-            update_ops.append(state_ops.assign_sub(lvar, math_ops.multiply(
-              self._moving_rate, diff)))
-        for var, diff in zip(global_center_vars, differences):
-          with ops.device(var.device):
-            update_ops.append(state_ops.assign_add(var, math_ops.multiply(
-              self._moving_rate, diff)))
-        if global_step:
-          with ops.colocate_with(global_step):
-            update_ops.append(state_ops.assign_add(global_step, 1))
-      variable_update = control_flow_ops.group(*(update_ops))
-      return variable_update
-
-    with ops.control_dependencies([local_update]):
-      condition = math_ops.equal(math_ops.mod(
-        self._local_step, self._period), 0)
-      conditional_update = control_flow_ops.cond(
-        condition, _Update_global_variables, control_flow_ops.no_op)
-    return conditional_update
-
-  def get_init_op(self, task_index):
-    """Returns the op to let all the local variables and local center
-    variables equal to the global center variables before the training begins"""
-
-    def _Add_sync_queues_and_barrier(enqueue_after_list):
-      """Adds ops to enqueu on all worker queues"""
-      sync_queues = [
-        data_flow_ops.FIFOQueue(self._num_worker, [dtypes.bool], shapes=[[]],
-                                shared_name='%s%s' % (
-                                  'variable_init_sync_queue', i)) for i in
-        range(self._num_worker)]
-      queue_ops = []
-      # For each other worker, add an entry in a queue
-      token = constant_op.constant(False)
-      with ops.control_dependencies(enqueue_after_list):
-        for i, q in enumerate(sync_queues):
-          if i == task_index:
-            queue_ops.append(control_flow_ops.no_op())
-          else:
-            queue_ops.append(q.enqueue(token))
-      queue_ops.append(
-        sync_queues[task_index].dequeue_many(len(sync_queues) - 1))
-      return control_flow_ops.group(*queue_ops)
-
-    init_ops = []
-    local_vars = variables.trainable_variables()
-    global_center_vars = [self._global_map[var] for var in local_vars]
-    local_center_vars = [self._local_map[var] for var in local_vars]
-    if not (local_vars and global_center_vars and local_center_vars):
-      raise ValueError(
-        'The lists of local_variables, global_center_variables, '
-        'local_center_variables should not be empty  ')
-    for lvar, gc_var, lc_var in zip(
-        local_vars, global_center_vars, local_center_vars):
-      init_ops.append(state_ops.assign(lvar, gc_var))
-      init_ops.append(state_ops.assign(lc_var, gc_var))
-
-    init_op = control_flow_ops.group(*(init_ops))
-    sync_queue_op = _Add_sync_queues_and_barrier([init_op])
-    return sync_queue_op
-
-  def make_session_run_hook(self, is_chief, task_index):
-    """Creates a hook to handle ElasticAverageOptimizerHook ops such as initialization."""
-    return _ElasticAverageOptimizerHook(self, is_chief, task_index)
-
-
-class _ElasticAverageOptimizerHook(session_run_hook.SessionRunHook):
-  def __init__(self, ea_optimizer, is_chief, task_index):
-    """Creates hook to handle ElasticAverageOptimizer initialization ops.
-
-    Args:
-      ea_optimizer: `ElasticAverageOptimizer` which this hook will initialize.
-      is_chief: `Bool`, whether is this a chief replica or not.
-    """
-    self._ea_optimizer = ea_optimizer
-    self._is_chief = is_chief
-    self._task_index = task_index
-
-  def begin(self):
-    self._local_init_op = variables.local_variables_initializer()
-    self._global_init_op = None
-    if self._is_chief:
-      self._global_init_op = variables.global_variables_initializer()
-    self._variable_init_op = self._ea_optimizer.get_init_op(self._task_index)
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
deleted file mode 100644
index 59e55fceee..0000000000
--- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
+++ /dev/null
@@ -1,225 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for ElasticAverageOptimizer."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import portpicker
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import test
-from tensorflow.python.training import gradient_descent
-from tensorflow.python.training import server_lib
-from tensorflow.python.training import training
-from tensorflow.python.training import training_util
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.training import device_setter
-
-from tensorflow.contrib.opt.python.training.elastic_average_optimizer import \
-  ElasticAverageOptimizer, ElasticAverageCustomGetter, GLOBAL_VARIABLE_NAME
-
-
-def create_local_cluster(num_workers, num_ps, protocol="grpc"):
-  """Create local GRPC servers and return them."""
-  worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
-  ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
-  cluster_dict = {
-    "worker": ["localhost:%s" % port for port in worker_ports],
-    "ps": ["localhost:%s" % port for port in ps_ports]
-  }
-  cs = server_lib.ClusterSpec(cluster_dict)
-
-  workers = [
-    server_lib.Server(
-      cs, job_name="worker", protocol=protocol, task_index=ix, start=True)
-    for ix in range(num_workers)
-  ]
-  ps_servers = [
-    server_lib.Server(
-      cs, job_name="ps", protocol=protocol, task_index=ix, start=True)
-    for ix in range(num_ps)
-  ]
-
-  return cluster_dict, workers, ps_servers
-
-
-# Creates the workers and return their sessions, graphs, train_ops.
-# Cheif worker will update at last
-def _get_workers(num_workers, period, workers, moving_rate):
-  sessions = []
-  graphs = []
-  train_ops = []
-  for worker_id in range(num_workers):
-    graph = ops.Graph()
-    is_chief = (worker_id == 0)
-    with graph.as_default():
-      worker_device = "/job:worker/task:%d/cpu:0" % (worker_id)
-      ea_coustom = ElasticAverageCustomGetter(
-        worker_device=worker_device)
-      with variable_scope.variable_scope('',
-                                         custom_getter=ea_coustom), ops.device(
-        device_setter.replica_device_setter(worker_device=worker_device,
-                                            ps_device="/job:ps/task:0/cpu:0",
-                                            ps_tasks=1)):
-        global_step = variables.Variable(0, name='global_step',
-                                         trainable=False)
-        var_0 = variable_scope.get_variable(initializer=0.0, name="v0")
-        var_1 = variable_scope.get_variable(initializer=1.0, name="v1")
-
-      with ops.device("/job:worker/task:" + str(worker_id)):
-        grads_0 = constant_op.constant(-1.0)
-        grads_1 = constant_op.constant(-1.0)
-
-        sgd_opt = gradient_descent.GradientDescentOptimizer(1.0)
-        opt = ElasticAverageOptimizer(
-          opt=sgd_opt,
-          num_worker=num_workers,
-          moving_rate=moving_rate,
-          communication_period=period,
-          ea_custom_getter=ea_coustom
-        )
-        train_op = [
-          opt.apply_gradients(
-            ([grads_0, var_0],
-             [grads_1, var_1]), global_step)
-        ]
-        easgd_hook = opt.make_session_run_hook(is_chief, worker_id)
-      # Creates MonitoredSession
-      sess = training.MonitoredTrainingSession(workers[worker_id].target,
-                                               hooks=[easgd_hook])
-
-    sessions.append(sess)
-    graphs.append(graph)
-    train_ops.append(train_op)
-
-  return sessions, graphs, train_ops
-
-
-class ElasticAverageOptimizerTest(test.TestCase):
-  def _run(self, train_op, sess):
-    sess.run(train_op)
-
-  def test1Workers2Period(self):
-    num_workers = 1
-    communication_period = 2
-    num_ps = 1
-    cluster, workers, _ = create_local_cluster(num_workers=num_workers,
-                                               num_ps=num_ps)
-
-    sessions, graphs, train_ops = _get_workers(num_workers,
-                                               communication_period,
-                                               workers, 1.0)
-
-    var_0 = graphs[0].get_tensor_by_name('v0:0')
-    var_1 = graphs[0].get_tensor_by_name('v1:0')
-    global_step = training_util.get_global_step(graphs[0])
-    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
-    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
-    # Verify the initialized value.
-    self.assertAllEqual(0.0, sessions[0].run(var_0))
-    self.assertAllEqual(1.0, sessions[0].run(var_1))
-    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
-    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
-    self.assertAllEqual(0, sessions[0].run(global_step))
-
-    sessions[0].run(train_ops[0])
-
-    self.assertAllEqual(1.0, sessions[0].run(var_0))
-    self.assertAllEqual(2.0, sessions[0].run(var_1))
-    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
-    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
-    self.assertAllEqual(0, sessions[0].run(global_step))
-
-    # iteration 2, global varibale update
-    sessions[0].run(train_ops[0])
-
-    self.assertAllEqual(0.0, sessions[0].run(var_0))
-    self.assertAllEqual(1.0, sessions[0].run(var_1))
-    self.assertAllEqual(2.0, sessions[0].run(var_0_g))
-    self.assertAllEqual(3.0, sessions[0].run(var_1_g))
-    self.assertAllEqual(1, sessions[0].run(global_step))
-
-    # iteration 3
-    sessions[0].run(train_ops[0])
-
-    self.assertAllEqual(1.0, sessions[0].run(var_0))
-    self.assertAllEqual(2.0, sessions[0].run(var_1))
-    self.assertAllEqual(2.0, sessions[0].run(var_0_g))
-    self.assertAllEqual(3.0, sessions[0].run(var_1_g))
-    self.assertAllEqual(1, sessions[0].run(global_step))
-
-  def test2Worker1Period(self):
-    num_workers = 2
-    communication_period = 1
-    num_ps = 2
-    cluster, workers, _ = create_local_cluster(num_workers=num_workers,
-                                               num_ps=num_ps)
-
-    sessions, graphs, train_ops = _get_workers(num_workers,
-                                               communication_period,
-                                               workers, 0.5)
-
-    var_0 = graphs[0].get_tensor_by_name('v0:0')
-    var_1 = graphs[0].get_tensor_by_name('v1:0')
-
-    var_0_1 = graphs[1].get_tensor_by_name('v0:0')
-    var_1_1 = graphs[1].get_tensor_by_name('v1:0')
-
-    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
-    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
-    # Verify the initialized value.
-    self.assertAllEqual(0.0, sessions[0].run(var_0))
-    self.assertAllEqual(1.0, sessions[0].run(var_1))
-    self.assertAllEqual(0.0, sessions[1].run(var_0_1))
-    self.assertAllEqual(1.0, sessions[1].run(var_1_1))
-    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
-    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
-
-    sessions[0].run(train_ops[0])
-    sessions[1].run(train_ops[1])
-
-    self.assertAllEqual(0.5, sessions[0].run(var_0))
-    self.assertAllEqual(1.5, sessions[0].run(var_1))
-    self.assertAllEqual(0.75, sessions[0].run(var_0_g))
-    self.assertAllEqual(1.75, sessions[0].run(var_1_g))
-    self.assertAllEqual(0.75, sessions[1].run(var_0_1))
-    self.assertAllEqual(1.75, sessions[1].run(var_1_1))
-
-  def testPS2TasksWithClusterSpecClass(self):
-    cluster_spec = server_lib.ClusterSpec({
-      "ps": ["ps0:2222", "ps1:2222"],
-      "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]
-    })
-    ea_coustom = ElasticAverageCustomGetter(
-      worker_device="/job:worker/task:0")
-    from tensorflow.python.training import device_setter
-    with ops.device(
-        device_setter.replica_device_setter(cluster=cluster_spec,
-                                            worker_device="/job:worker/task:0",
-                                            ps_device="/job:ps")), \
-         variable_scope.variable_scope('', custom_getter=ea_coustom):
-      v = variable_scope.get_variable(initializer=[1, 2], name="v")
-      w = variable_scope.get_variable(initializer=[2, 1], name='w')
-      v_g, w_g = ea_coustom._global_map[v],ea_coustom._global_map[w]
-      self.assertDeviceEqual("/job:worker/task:0", v.device)
-      self.assertDeviceEqual("job:ps/task:0", v_g.device)
-      self.assertDeviceEqual("/job:worker/task:0", w.device)
-      self.assertDeviceEqual("job:ps/task:1", w_g.device)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index 63155faf1e..84fcf733c1 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -39,9 +39,6 @@ from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
-from tensorflow.python.framework import test_util
-from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
-
 
 # pylint: enable=protected-access
 Linear = core_rnn_cell._Linear  # pylint: disable=invalid-name
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index c6b1316043..5a6d287c68 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -36,7 +36,6 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope as vs
-from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
diff --git a/tensorflow/contrib/seq2seq/python/ops/helper.py b/tensorflow/contrib/seq2seq/python/ops/helper.py
index dec03ce43f..b55d90cbab 100644
--- a/tensorflow/contrib/seq2seq/python/ops/helper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/helper.py
@@ -223,7 +223,8 @@ class TrainingHelper(Helper):
 
   def sample(self, time, outputs, name=None, **unused_kwargs):
     with ops.name_scope(name, "TrainingHelperSample", [time, outputs]):
-      sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
+      sample_ids = math_ops.cast(
+          math_ops.argmax(outputs, axis=-1), dtypes.int32)
       return sample_ids
 
   def next_inputs(self, time, outputs, state, name=None, **unused_kwargs):
@@ -539,7 +540,8 @@ class GreedyEmbeddingHelper(Helper):
     if not isinstance(outputs, ops.Tensor):
       raise TypeError("Expected outputs to be a single Tensor, got: %s" %
                       type(outputs))
-    sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
+    sample_ids = math_ops.cast(
+        math_ops.argmax(outputs, axis=-1), dtypes.int32)
     return sample_ids
 
   def next_inputs(self, time, outputs, state, sample_ids, name=None):
diff --git a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
index c42c7b3391..a781c647a1 100644
--- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
+++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
@@ -62,9 +62,7 @@ class DatasetDataProvider(data_provider.DataProvider):
                seed=None,
                scope=None):
     """Creates a DatasetDataProvider.
-    Note: if `num_epochs` is not `None`,  local counter `epochs` will be created
-    by relevant function. Use `local_variables_initializer()` to initialize
-    local variables.
+
     Args:
       dataset: An instance of the Dataset class.
       num_readers: The number of parallel readers to use.
diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py
index f0330bfbbd..5896fc2a20 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/head.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/head.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import re
 
-from tensorflow.python.training import training_util
+from tensorflow.contrib.framework.python.ops import variables
 from tensorflow.contrib.layers.python.layers import optimizers
 
 from tensorflow.contrib.timeseries.python.timeseries import feature_keys
@@ -79,7 +79,7 @@ class _TimeSeriesRegressionHead(head_lib._Head):  # pylint:disable=protected-acc
 
     train_op = optimizers.optimize_loss(
         model_outputs.loss,
-        global_step=training_util.get_global_step(),
+        global_step=variables.get_global_step(),
         optimizer=self.optimizer,
         # Learning rate is set in the Optimizer object
         learning_rate=None)
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index ee9988f0b7..c51b172066 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -348,7 +348,7 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
                                  kernel);
   }
 
-  // Try to instantiate this function for the func/attr. Maybe it's
+  // Try to instantiate this function for the func/attr. Maybe its
   // cached already.
   Handle handle;
   TF_RETURN_IF_ERROR(Instantiate(ndef.op(), AttrSlice(&ndef.attr()), &handle));
diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index 8514d7c474..edd952b824 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -46,7 +46,7 @@ struct bfloat16 {
   EIGEN_DEVICE_FUNC bfloat16() {}
 
   EIGEN_DEVICE_FUNC explicit bfloat16(const float v) {
-    if (Eigen::numext::isnan(v)) {
+    if (isnan(v)) {
       value = NAN_VALUE;
       return;
     }
diff --git a/tensorflow/core/framework/tensor_shape_test.cc b/tensorflow/core/framework/tensor_shape_test.cc
index d8a9c0bac5..06c576c7d4 100644
--- a/tensorflow/core/framework/tensor_shape_test.cc
+++ b/tensorflow/core/framework/tensor_shape_test.cc
@@ -359,8 +359,7 @@ Status TensorShapeOld::IsValidShape(const TensorShapeProto& proto) {
   for (const auto& d : proto.dim()) {
     if (d.size() < 0) {
       return errors::InvalidArgument("Shape ", DebugString(proto),
-                                     " has negative dimensions; ",
-                                     "perhaps an un-fed placeholder?");
+                                     " has negative dimensions");
     }
     num_elements *= d.size();
     if (num_elements > kMaxElements) {
diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index 3df981437a..880e4e712e 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -76,12 +76,12 @@ namespace tensorflow {
 namespace mkl_op_registry {
   static const char* kMklOpLabel = "MklOp";
   static const char* kMklOpLabelPattern = "label='MklOp'";
-  // Prefix that we add to Tensorflow op name to construct Mkl op name.
-  static const char* const kMklOpPrefix = "_Mkl";
 
   // Get the name of Mkl op from original TensorFlow op
   // We prefix 'Mkl' to the original op to get Mkl op.
   inline string GetMklOpName(const string& name) {
+    // Prefix that we add to Tensorflow op name to construct Mkl op name.
+    const char* const kMklOpPrefix = "_Mkl";
     return string(kMklOpPrefix) + name;
   }
 
@@ -94,6 +94,9 @@ namespace mkl_op_registry {
     string kernel = KernelsRegisteredForOp(op_name);
     bool result =
         kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
+    if (result) {
+      VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
+    }
     return result;
   }
 
@@ -109,12 +112,15 @@ namespace mkl_op_registry {
     if (!IsMklOp(op_name, T)) {
       return false;
     }
+
     bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
                     0 == op_name.compare(GetMklOpName("Sub")) ||
                     0 == op_name.compare(GetMklOpName("Mul")) ||
                     0 == op_name.compare(GetMklOpName("Maximum")) ||
                     0 == op_name.compare(GetMklOpName("SquaredDifference")));
 
+    VLOG(1) << "mkl_op_registry::" << op_name
+            << " is elementwise MKL op: " << result;
     return result;
   }
 }  // namespace mkl_op_registry
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 3beca1e5d2..912075aa28 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -42,8 +42,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-#ifndef INTEL_MKL_DNN
-
 // This pass implements rewriting of graph to support following scenarios:
 // (A) Merging nodes in the graph
 // (B) Rewriting a node in the graph to a new node
@@ -2215,2087 +2213,6 @@ Status MklLayoutRewritePass::Run(
   return Status::OK();
 }
 
-#else  // INTEL_MKL_DNN
-
-// This pass implements rewriting of graph to support following scenarios:
-// (A) Merging nodes in the graph
-// (B) Rewriting a node in the graph to a new node
-//     Rewrite happens under following scenario:
-//     - Propagating Mkl layout as an additional output tensor
-//        (we will loosely call a tensor that carries Mkl layout as Mkl tensor
-//         henceforth.) from every Mkl supported NN layer.
-//
-// Example of A : Merging nodes in the graph
-// -----------------------------------------
-// Currently, we merge Conv2D+AddBias together. Consider Conv2D and BiasAdd as:
-//
-//           O = Conv2D(A, B)
-//           P = BiasAdd(O, C)
-//
-// We merge them into Conv2DWithBias as:
-//           P = _MklConv2DWithBias(A, A_m, B, B_m, C, C_m)
-//
-// The meaning of A_m, B_m and C_m is explained in B.1.
-//
-// Merge rules:
-//  - The merge for Conv2D and BiasAdd happens when the output of Conv2D _only_
-//    goes to BiasAdd.
-//  - Also, the intersection of attributes of both the nodes must have same
-//    values.
-//  - Both the nodes must have been assigned to same device (if any).
-//
-// Example of B.1 : Rewriting nodes to Mkl nodes
-// ---------------------------------------------
-// Consider a Relu node. Current definition of Relu node looks like:
-//
-//           O = Relu(A)
-//
-// Relu has 1 input (A), and 1 output (O).
-//
-// This rewrite pass will generate a new graph node for Relu (new node is
-// called MklRelu) as:
-//
-//          O, O_m = MklRelu(A, A_m)
-//
-// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m). Here input A is
-// same as input A of Relu; output O is same as output O of Relu. O_m is the
-// additional output tensor that will be set by MklRelu, and it represents
-// Mkl tensor corresponding to O -- in other words, O_m is some kind of
-// metadata for O. A_m is additional input of Relu, and it represents metadata
-// for A - as O_m is metadata for O, A_m is metadata for A. MklRelu receives
-// this metadata from previous node in the graph.
-//
-// When a previous node in the graph is an Mkl node, A_m will represent a valid
-// Mkl tensor. But when a previous node is not an Mkl node, A_m will represent
-// a dummy Mkl tensor.
-//
-// Rewriting rules:
-//  - Selection of a node for rewriting happens by registering the op type of
-//    the node with the rewriting pass. If the op type is not registered, then
-//    all nodes of this op type will not be rewritten.
-//  - Number of inputs after rewriting:
-//      Since for every input Tensorflow tensor, the rewritten node gets Mkl
-//      tensor(s), rewritten node gets 2*N inputs, where N is the number of
-//      inputs for the original node.
-//  - Number of outputs after rewriting:
-//      Since for every output Tensorflow tensor, the rewritten node generates
-//      Mkl tensor(s), the rewritten node generates 2*N outputs, where N is the
-//      number of outputs of the original node.
-//  - Ordering of Tensorflow tensors and Mkl tensors:
-//      Since every rewritten node generates twice the number of inputs and
-//      outputs, one could imagine various orderings among Tensorflow tensors
-//      and Mkl tensors. E.g., assume an op 'Conv2D' that takes (A, B) as
-//      inputs, then the new op '_MklConv2D' can take inputs A, B, A_m and B_m
-//      in A, A_m, B, B_m order or it can also take them in A, B, A_m, B_m
-//      order. Among N inputs one can get N! permutations.
-//
-//      So the question is: which order do we follow? We support 2 types of
-//      orderings: (1) interleaved, and (2) contiguous. Interleaved ordering
-//      follows an intuitive order where an Mkl tensor follows the
-//      corresponding Tensorflow tensor immediately. In the context of the
-//      above example, it will be: A, A_m, B, B_m. Note that the ordering rule
-//      applies to both the inputs and outputs. Contiguous ordering means
-//      all the Tensorflow tensors are contiguous followed by all the Mkl
-//      tensors. We use contiguous ordering as default.
-//
-// Graph rewrite algorithm:
-//      Algorithm: Graph Rewrite
-//      Input: Graph G, Names of the nodes to rewrite and their new names
-//      Output: Modified Graph G' if the nodes are modified, G otherwise.
-//      Start:
-//        N = Topological_Sort(G) // N is a set of nodes in toposort order.
-//        foreach node n in N
-//        do
-//          if (Is_MKL_Op(n))  // Can this node accept an Mkl layout as input.
-//          then
-//            E = set of <incoming edge and its src_output slot> of n
-//            E' = {}   // a new set of edges for rewritten node
-//            foreach <e,s> in E
-//            do
-//              E' U {<e,s>}  // First copy edge which generates Tensorflow
-//                            // tensor as it is
-//              m = Source node of edge e
-//              if Is_Rewritten(m)  // Did we rewrite this node in this pass?
-//              then
-//                E' U {<m,s+1>}    // If yes, then m will generate an Mkl
-//                                  // tensor as an additional output.
-//              else
-//                d = Generate_Dummy_Mkl_Tensor()  // If not, generate a dummy
-//                                                 // Mkl tensor.
-//                E' U {<d,0>}  // The dummy Mkl tensor has only 1 output slot.
-//              fi
-//            done
-//            n' = Build_New_Node(G,new_name,E')
-//            Mark_Rewritten(n')  // Mark the new node as being rewritten.
-//          fi
-//        done
-//
-//      Explanation:
-//        For graph rewrite, we visit nodes of the input graph in the
-//        topological sort order. With this ordering, we visit nodes in the
-//        top-to-bottom fashion. We need this order because while visiting a
-//        node we want that all of its input nodes are visited and rewritten if
-//        applicable. This is because if we need to rewrite a given node
-//        then all of its input nodes need to be fixed (in other words they
-//        cannot be deleted later.)
-//
-//        While visiting a node, we first check if the op type of the node is
-//        an Mkl op. If it is, then we rewrite that node after constructing
-//        new inputs to the node. If the op type of the node is not Mkl op,
-//        then we do not rewrite that node.
-//
-// Handling workspace propagation for certain ops:
-//
-//        Certain backward ops in MKL (MaxPool, LRN and BatchNorm) require
-//        passing of a workspace from their respective forward ops. Workspace
-//        tensors provide memory for storing results of intermediate operations
-//        which are helpful in backward propagation. TensorFlow does not have
-//        a notion of a workspace and as a result does not allow producing
-//        additional outputs from these forward ops. For these ops, we need
-//        to add 2 extra edges between forward ops and their corresponding
-//        backward ops - the first extra edge carries a workspace tensor and
-//        the second one carries an Mkl tensor for the workspace tensor.
-//
-//        Example:
-//
-//        Typical graph for MaxPool and its gradient looks like:
-//
-//        A = MaxPool(T)
-//        B = MaxPoolGrad(X, A, Y)
-//
-//        We will transform this graph to propagate the workspace as:
-//        (with the contiguous ordering)
-//
-//        A, W, A_m, W_m = MklMaxPool(T, T_m)
-//        B, B_m = MklMaxPoolGrad(X, A, Y, W, X_m, A_m, Y_m, W_m)
-//
-//        Here W is the workspace tensor. Transformed tensor names with the
-//        suffix _m are Mkl tensors, and this transformation has been done
-//        using the algorithm discussed earlier. The transformation for
-//        workspace propagation only adds extra outputs (W, W_m) for a forward
-//        op and connects them to the corresponding backward ops.
-//
-//        Terms:
-//
-//        Forward op name = name of the op in the forward pass
-//          where a workspace tensor originates (MaxPool in this example)
-//        Backward op name = name of the op in the backward pass that receives
-//          a workspace tensor from the forward op (MaxPoolGrad in the example)
-//        Slot = Position of the output or input slot that will be
-//               used by the workspace tensor (1 for MklMaxPool as W is the 2nd
-//               output of MaxPool (0 is 1st); 3 for MklMaxPoolGrad)
-//
-//        Question:
-//
-//        How do we associate a backward op to a forward op? There can be more
-//        than one op with the exact same name.
-//
-//        In this example, we associate MaxPoolGrad with MaxPool. But there
-//        could be more than one MaxPool ops. To solve this problem, we look
-//        for _direct_ edge between a forward op and a backward op (tensor A is
-//        flowing along this edge in the example).
-//
-//        How do we transform forward and backward ops when there is no direct
-//        edge between them? In such a case, we generate dummy tensors for
-//        workspace tensors. For the example, transformation of MaxPool will
-//        be exactly same as it would be when there is a direct edge between
-//        the forward and the backward op --- it is just that MaxPool won't
-//        generate any workspace tensor. For MaxPoolGrad, the transformation
-//        will also be same, but instead of connecting W and W_m with the
-//        outputs of MaxPool, we will produce dummy tensors for them, and we
-//        will set workspace_enabled attribute to false.
-//
-class MklLayoutRewritePass : public GraphOptimizationPass {
- public:
-  MklLayoutRewritePass() {
-    // NOTE: names are alphabetically sorted.
-    csinfo_.addn = "AddN";
-    csinfo_.avg_pool = "AvgPool";
-    csinfo_.avg_pool_grad = "AvgPoolGrad";
-    csinfo_.bias_add = "BiasAdd";
-    csinfo_.bias_add_grad = "BiasAddGrad";
-    csinfo_.concat = "Concat";
-    csinfo_.concatv2 = "ConcatV2";
-    csinfo_.conv2d = "Conv2D";
-    csinfo_.conv2d_with_bias = "__MklDummyConv2DWithBias";
-    csinfo_.conv2d_grad_input = "Conv2DBackpropInput";
-    csinfo_.conv2d_grad_filter = "Conv2DBackpropFilter";
-    csinfo_.conv2d_grad_filter_with_bias =
-                              "__MklDummyConv2DBackpropFilterWithBias";
-    csinfo_.fused_batch_norm = "FusedBatchNorm";
-    csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad";
-    csinfo_.identity = "Identity";
-    csinfo_.lrn = "LRN";
-    csinfo_.lrn_grad = "LRNGrad";
-    csinfo_.matmul = "MatMul";
-    csinfo_.max_pool = "MaxPool";
-    csinfo_.max_pool_grad = "MaxPoolGrad";
-    csinfo_.mkl_conv2d = "_MklConv2D";
-    csinfo_.mkl_conv2d_grad_input = "_MklConv2DBackpropInput";
-    csinfo_.mkl_conv2d_grad_filter = "_MklConv2DBackpropFilter";
-    csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias";
-    csinfo_.mkl_conv2d_grad_filter_with_bias =
-                                   "_MklConv2DBackpropFilterWithBias";
-    csinfo_.relu = "Relu";
-    csinfo_.relu_grad = "ReluGrad";
-    csinfo_.tanh       = "Tanh";
-    csinfo_.tanh_grad  = "TanhGrad";
-    csinfo_.reshape = "Reshape";
-    csinfo_.softmax = "Softmax";
-    csinfo_.split = "Split";
-    // Element-wise ops. Ensure you also add any new ops to IsOpElementWise
-    // in the MklUtil.h (IsMklElementWiseOp method) to ensure that the
-    // MklInputConversion op is added before it.
-    csinfo_.add = "Add";
-    csinfo_.maximum = "Maximum";
-    csinfo_.mul = "Mul";
-    csinfo_.squared_difference = "SquaredDifference";
-    csinfo_.sub = "Sub";
-    // End - element-wise ops. See note above.
-
-    // NOTE: names are alphabetically sorted.
-    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
-                      CopyAttrsAddN, AddNRewrite});
-    rinfo_.push_back({csinfo_.add,
-                      mkl_op_registry::GetMklOpName(csinfo_.add),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.avg_pool,
-                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
-                      CopyAttrsPooling, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.avg_pool_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
-                      CopyAttrsPooling, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.concat,
-                      mkl_op_registry::GetMklOpName(csinfo_.concat),
-                      CopyAttrsConcat, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.concatv2,
-                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
-                      CopyAttrsConcatV2, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.conv2d,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
-                      CopyAttrsConv2D, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.conv2d_with_bias,
-                      csinfo_.mkl_conv2d_with_bias,
-                      CopyAttrsConv2D, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.conv2d_grad_filter,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
-                      CopyAttrsConv2D, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.conv2d_grad_filter_with_bias,
-                      csinfo_.mkl_conv2d_grad_filter_with_bias,
-                      CopyAttrsConv2D, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.conv2d_grad_input,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
-                      CopyAttrsConv2D, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.fused_batch_norm,
-                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
-                      CopyAttrsFusedBatchNorm, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.fused_batch_norm_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
-                      CopyAttrsFusedBatchNorm, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.identity,
-                      mkl_op_registry::GetMklOpName(csinfo_.identity),
-                      CopyAttrsDataType, AlwaysRewrite});
-    /*
-    rinfo_.push_back({csinfo_.lrn,
-                      mkl_op_registry::GetMklOpName(csinfo_.lrn),
-                      CopyAttrsLRN, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.lrn_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
-                      CopyAttrsLRN, AlwaysRewrite});
-    */
-    rinfo_.push_back({csinfo_.max_pool,
-                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
-                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
-    rinfo_.push_back({csinfo_.max_pool_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
-                      CopyAttrsPooling, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.maximum,
-                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.mul,
-                      mkl_op_registry::GetMklOpName(csinfo_.mul),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.relu,
-                      mkl_op_registry::GetMklOpName(csinfo_.relu),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.relu_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.tanh,
-                      mkl_op_registry::GetMklOpName(csinfo_.tanh),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.tanh_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.tanh_grad),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.reshape,
-                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
-                      CopyAttrsReshape, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.softmax,
-                      mkl_op_registry::GetMklOpName(csinfo_.softmax),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.squared_difference,
-                      mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.sub,
-                      mkl_op_registry::GetMklOpName(csinfo_.sub),
-                      CopyAttrsDataType, AlwaysRewrite});
-
-    // Add info about which ops to add workspace edge to and the slots.
-    wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
-    wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3});
-
-    // Add a rule for merging nodes
-    minfo_.push_back({csinfo_.conv2d, csinfo_.bias_add,
-                      csinfo_.conv2d_with_bias,
-                      GetConv2DOrBiasAdd});
-
-    minfo_.push_back({csinfo_.conv2d_grad_filter, csinfo_.bias_add_grad,
-                      csinfo_.conv2d_grad_filter_with_bias,
-                      GetConv2DBackpropFilterOrBiasAddGrad});
-  }
-
-  // Standard interface to run pass
-  Status Run(const GraphOptimizationPassOptions& options);
-
-  // Helper function which does most of heavy lifting for rewriting
-  // Mkl nodes to propagate Mkl tensor as additional output
-  //
-  // Extracts common functionality between Run public interface and
-  // test interface.
-  //
-  // @return true, if and only if graph is mutated; false otherwise.
-  bool RunPass(std::unique_ptr<Graph>* g);
-
-  /// Structure to specify the name of an original node, its new name after
-  /// rewrite, the number of inputs to the original node, the function to
-  /// be used to copy attributes for the op, and the rule (if any) which
-  /// must hold for rewriting the node
-  typedef struct {
-    string name;      // Original name of op of the node in the graph
-    string new_name;  // New name of the op of the node in the graph
-    // A function handler to copy attributes from an old node to a new node.
-    std::function<void(const Node*, NodeBuilder*)> copy_attrs;
-    // A rule under which to rewrite this node
-    std::function<bool(const Node*)> rewrite_rule;
-  } RewriteInfo;
-
-  /// Structure to specify a forward op, a backward op, and the slot numbers
-  /// in the forward and backward ops where we will add a workspace edge.
-  typedef struct {
-    string fwd_op;    // Name of a forward op in the graph
-    string bwd_op;    // Name of a backward op in the graph
-    int fwd_slot;     // Output slot in the forward op node where actual
-                      // output tensor resides
-    int bwd_slot;     // Input slot in the backward op node where actual
-                      // input tensor resides
-    int ws_fwd_slot;  // Output slot in the forward op node where workspace
-                      // edge is added
-    int ws_bwd_slot;  // Input slot in the backward op node where workspace
-                      // edge is added
-  } WorkSpaceInfo;
-
-  /// Structure to specify information used in node merge of 2 operators
-  typedef struct {
-    string op1;       // Node string for one operator.
-    string op2;       // Node string for second operator.
-    string new_node;  // Name of the node after merge
-    // Function that enables user of the node merger to specify how to find
-    // second operator given the first operator.
-    std::function<Node*(const Node*)> get_node_to_be_merged;
-  } MergeInfo;
-
-  /// Structure to store all constant strings
-  /// NOTE: names are alphabetically sorted.
-  typedef struct {
-    string addn;
-    string add;
-    string avg_pool;
-    string avg_pool_grad;
-    string bias_add;
-    string bias_add_grad;
-    string concat;
-    string concatv2;
-    string conv2d;
-    string conv2d_with_bias;
-    string conv2d_grad_input;
-    string conv2d_grad_filter;
-    string conv2d_grad_filter_with_bias;
-    string fused_batch_norm;
-    string fused_batch_norm_grad;
-    string identity;
-    string lrn;
-    string lrn_grad;
-    string matmul;
-    string max_pool;
-    string max_pool_grad;
-    string maximum;
-    string mkl_conv2d;
-    string mkl_conv2d_grad_input;
-    string mkl_conv2d_grad_filter;
-    string mkl_conv2d_grad_filter_with_bias;
-    string mkl_conv2d_with_bias;
-    string mul;
-    string relu;
-    string relu_grad;
-    string tanh;
-    string tanh_grad;
-    string reshape;
-    string softmax;
-    string split;
-    string squared_difference;
-    string sub;
-  } ConstStringsInfo;
-
- private:
-  /// Maintain info about nodes to rewrite
-  std::vector<RewriteInfo> rinfo_;
-
-  /// Maintain info about nodes to add workspace edge
-  std::vector<WorkSpaceInfo> wsinfo_;
-
-  /// Maintain info about nodes to be merged
-  std::vector<MergeInfo> minfo_;
-
-  /// Maintain structure of constant strings
-  static ConstStringsInfo csinfo_;
-
- private:
-  // Is OpDef::ArgDef a list type? It could be N * T or list(type).
-  // Refer to opdef.proto for details of list type.
-  inline bool ArgIsList(const OpDef::ArgDef& arg) const {
-    return !arg.type_list_attr().empty() || !arg.number_attr().empty();
-  }
-
-  // Get length of a list in 'n' if 'arg' is of list type. Refer to
-  // description of ArgIsList for definition of list type.
-  inline int GetTensorListLength(const OpDef::ArgDef& arg, Node* n) {
-    CHECK_EQ(ArgIsList(arg), true);
-    int N = 0;
-    const string attr_name = !arg.type_list_attr().empty()
-                                 ? arg.type_list_attr()
-                                 : arg.number_attr();
-    if (!arg.type_list_attr().empty()) {
-      std::vector<DataType> value;
-      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &value));
-      N = value.size();
-    } else {
-      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &N));
-    }
-    return N;
-  }
-
-  // Can op represented by node 'n' run on DEVICE_CPU?
-  // Op can run on CPU with MKL if the runtime assigned device or the
-  // user requested device contains device CPU, or both are empty.
-  bool CanOpRunOnCPUDevice(const Node* n) {
-    bool result = true;
-    string reason;
-
-    // Substring that should be checked for in device name for CPU device.
-    const char* const kCPUDeviceSubStr = "CPU";
-
-    // If Op has been specifically assigned to a non-CPU device, then No.
-    if (!n->assigned_device_name().empty() &&
-        !StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
-      result = false;
-      reason = "Op has been assigned a runtime device that is not CPU.";
-    }
-
-    // If user has specifically assigned this op to a non-CPU device, then No.
-    if (!n->def().device().empty() &&
-        !StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
-      result = false;
-      reason = "User has assigned a device that is not CPU.";
-    }
-
-    if (result == false) {
-      VLOG(1) << "MklLayoutRewritePass: Skipping rewriting of the node "
-              << n->type_string() << ", reason: " << reason;
-    }
-
-    // Otherwise Yes.
-    return result;
-  }
-
-  // Return a node that can be merged with input node 'n'
-  //
-  // @return pointer to the node if we can find such a
-  // node. Otherwise, it returns nullptr.
-  Node* CheckForNodeMerge(const Node* n) const;
-
-  // Merge node 'm' with node 'n'.
-  // Currently, we merge (1) Conv2D with BiasAdd, and (2) BiasAddGrad with
-  // Conv2DBackpropFilter.
-  //
-  // Input nodes m and n may be deleted if the call to
-  // this function is successful. Attempt to use the pointers
-  // after the call to function may result in undefined behaviors.
-  //
-  // @input g - input graph, m - graph node, n - graph node to be merged with m
-  // @return Status::OK(), if merging is successful and supported.
-  //         Returns appropriate Status error code otherwise.
-  //         Graph is updated in case nodes are merged. Otherwise, it is
-  //         not updated.
-  Status MergeNode(std::unique_ptr<Graph>* g, Node* m, Node* n);
-
-  // Helper function to merge different nodes
-  Status MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g, Node* m, Node* n);
-  Status MergeConv2DBackpropFilterWithBiasAddGrad(std::unique_ptr<Graph>* g,
-                                                  Node* m, Node* n);
-
-  // Find BiasAdd or Conv2D node that can be merged with input node 'm'.
-  // If input 'm' is BiasAdd, then check if there exists Conv2D node that can be
-  // merged with 'm'. If input 'm' is Conv2D, then check if there exists BiasAdd
-  // node that can be merged with 'm'.
-  static Node* GetConv2DOrBiasAdd(const Node* m) {
-    CHECK_NOTNULL(m);
-    Node* n = nullptr;
-
-    if (m->type_string() == csinfo_.bias_add) {
-      // If a is BiasAdd, then Conv2D is 0th input of BiasAdd.
-      TF_CHECK_OK(m->input_node(0, &n));
-    } else {
-      CHECK_EQ(m->type_string(), csinfo_.conv2d);
-      // Go over all output edges and search for BiasAdd Node.
-      // 0th input of BiasAdd is Conv2D.
-      for (const Edge* e : m->out_edges()) {
-        if (!e->IsControlEdge() &&
-            e->dst()->type_string() == csinfo_.bias_add &&
-            e->dst_input() == 0) {
-          n = e->dst();
-          break;
-        }
-      }
-    }
-
-    if (n == nullptr) {
-      VLOG(1) << "MklLayoutRewritePass: Could not find matching "
-              << "Conv2D and BiasAdd node for merging. Input node: "
-              << m->DebugString();
-    }
-
-    return n;
-  }
-
-  // Find Conv2DBackpropFilter or BiasAddGrad node that can be merged with input
-  // node 'm'. If input 'm' is Conv2DBackpropFilter, then check if there exists
-  // BiasAddGrad node that can be merged with 'm'. If input 'm' is BiasAddGrad,
-  // then check if there exists Conv2DBackpropFilter node that can be merged
-  // with 'm'.
-  //
-  // Graph that will allow us to connect Conv2DBackpropFilter with BiasAddGrad
-  // would look like:
-  //
-  // _ = Conv2DBackpropFilter(F, _, G)
-  // _ = BiasAddGrad(G)
-  //
-  // So 1st input of BiasAddGrad connects with 3rd input of
-  // Conv2DBackpropFilter and vice versa.
-  static Node* GetConv2DBackpropFilterOrBiasAddGrad(const Node* m) {
-    CHECK_NOTNULL(m);
-    Node* n = nullptr;
-
-    if (m->type_string() == csinfo_.bias_add_grad) {
-      // Get 1st input 'g' of BiasAddGrad.
-      Node* g = nullptr;
-      TF_CHECK_OK(m->input_node(0, &g));
-      // Now traverse all outgoing edges from g that have destination node as
-      // Conv2DBackpropFilter.
-      for (const Edge* e : g->out_edges()) {
-        if (!e->IsControlEdge() &&
-            e->dst()->type_string() == csinfo_.conv2d_grad_filter &&
-            e->dst_input() == 2 /* 3rd input of BackpropFilter */) {
-          n = e->dst();
-          break;
-        }
-      }
-    } else {
-      CHECK_EQ(m->type_string(), csinfo_.conv2d_grad_filter);
-      // Get 3rd input 'g' of Conv2DBackpropFilter.
-      Node* g = nullptr;
-      TF_CHECK_OK(m->input_node(2, &g));
-      // Now traverse all outgoing edges from g that have destination node as
-      // BiasAddGrad.
-      for (const Edge* e : g->out_edges()) {
-        if (!e->IsControlEdge() &&
-            e->dst()->type_string() == csinfo_.bias_add_grad &&
-            e->dst_input() == 0 /* 1st input of BiasAddGrad */) {
-          n = e->dst();
-          break;
-        }
-      }
-    }
-
-    if (n == nullptr) {
-      VLOG(1) << "MklLayoutRewritePass: Could not find matching "
-              << "Conv2DBackpropFilter and BiasAddGrad node for merging. "
-              << "Input node: " << m->DebugString();
-    }
-    return n;
-  }
-
-  // Check if the node 'n' has any applicable rewrite rule
-  // We check for 2 scenarios for rewrite.
-  //
-  // @return RewriteInfo* for the applicable rewrite rule
-  const RewriteInfo* CheckForNodeRewrite(const Node* n) const;
-
-  // Default rewrite rule to be used in scenario 1 for rewrite.
-  // @return - true (since we want to always rewrite)
-  static bool AlwaysRewrite(const Node* n) {
-    return true;
-  }
-
-  // Check if we are performing pooling on depth or batch. If it is, then we
-  // do not rewrite MaxPool node to Mkl version.
-  // @return - true (if it is not a depth/batch wise pooling case);
-  //           false otherwise.
-  static bool NonDepthBatchWisePoolRewrite(const Node* n) {
-    CHECK_NOTNULL(n);
-
-    string data_format_str;
-    TensorFormat data_format;
-    std::vector<int32> ksize, strides;
-    CHECK_EQ(GetNodeAttr(n->def(), "ksize", &ksize).ok(), true);
-    CHECK_EQ(GetNodeAttr(n->def(), "strides", &strides).ok(), true);
-    CHECK_EQ(GetNodeAttr(n->def(), "data_format", &data_format_str).ok(),
-             true);
-    CHECK_EQ(FormatFromString(data_format_str, &data_format), true);
-
-    // Condition that specifies non-batch-wise and non-depth-wise pooling.
-    if (GetTensorDim(ksize,   data_format, 'N') == 1 &&
-        GetTensorDim(strides, data_format, 'N') == 1 &&
-        GetTensorDim(ksize,   data_format, 'C') == 1 &&
-        GetTensorDim(strides, data_format, 'C') == 1) {
-      return true;
-    }
-
-    return false;
-  }
-
-  static bool AddNRewrite(const Node* n) {
-    CHECK_NOTNULL(n);
-
-    int num;
-    CHECK_EQ(GetNodeAttr(n->def(), "N", &num).ok(), true);
-
-    // Condition that specifies non-batch-wise and non-depth-wise pooling.
-    if (num == 2) {
-      return true;
-    }
-
-    return false;
-  }
-
-  // Rewrites input node to a new node specified by its matching rewrite info.
-  //
-  // Method first searches matching rewrite info for input node and then
-  // uses that info to rewrite.
-  //
-  // Input node may be deleted in case of rewrite. Attempt to use the node
-  // after the call can result in undefined behaviors.
-  //
-  // @input  g - input graph, n - Node to be rewritten,
-  //         ri - matching rewriteinfo
-  // @return Status::OK(), if the input node is rewritten;
-  //         Returns appropriate Status error code otherwise.
-  //         Graph is updated in case the input node is rewritten.
-  //         Otherwise, it is not updated.
-  Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const RewriteInfo* ri);
-
-  // Get nodes that will feed a list of TF tensors to the new
-  // node that we are constructing.
-  //
-  // @input g - input graph,
-  // @input inputs - inputs to old node that we are using for constructing
-  //                 new inputs,
-  // @input input_idx - the index in the 'inputs' vector pointing to the
-  //                    current input that we have processed so far
-  // @output input_idx - index will be incremented by the number of nodes
-  //                     from 'inputs' that are processed
-  // @input list_length - The expected length of list of TF tensors
-  // @output output_nodes - the list of new nodes creating TF tensors
-  //
-  // @return None
-  void GetNodesProducingTFTensorList(
-      const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
-      int* input_idx, int list_length,
-      std::vector<NodeBuilder::NodeOut>* output_nodes);
-
-  // Get nodes that will feed a list of Mkl tensors to the new
-  // node that we are constructing.
-  //
-  // @input g - input graph,
-  // @input orig_node - Original node that we are rewriting
-  // @input inputs - inputs to old node that we are using for constructing
-  //                 new inputs,
-  // @input input_idx - the index in the 'inputs' vector pointing to the
-  //                    current input that we have processed so far
-  // @output input_idx - index will be incremented by the number of nodes
-  //                     from 'inputs' that are processed
-  // @input list_length - The expected length of list of Mkl tensors
-  // @output output_nodes - the list of new nodes creating Mkl tensors
-  //
-  // @return None
-  void GetNodesProducingMklTensorList(std::unique_ptr<Graph>* g,
-    Node* orig_node, const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
-    int* input_idx, int list_length,
-    std::vector<NodeBuilder::NodeOut>* output_nodes);
-
-  // Get a node that will feed an Mkl tensor to the new
-  // node that we are constructing. The output node could be (1) 'n'
-  // if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
-  // if 'n' is not an Mkl layer.
-  //
-  // @input g - input graph,
-  // @input orig_node - Original node that we are rewriting,
-  // @input n - Node based on which we are creating Mkl node,
-  // @input n_output_slot - the output slot of node 'n'
-  //            which is feeding to the node that we are constructing
-  // @output mkl_node - the new node that will feed Mkl tensor
-  // @output mkl_node_output_slot - the slot number of mkl_node that
-  //                                will feed the tensor
-  // @return None
-  void GetNodeProducingMklTensor(std::unique_ptr<Graph>* g, Node* orig_node,
-    Node* n, int n_output_slot, Node** mkl_node, int* mkl_node_output_slot);
-
-  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
-  // in graph 'g'. Original node is input in 'old_node'. Inputs to 'nb' are
-  // set up in contiguous fashion. 'workspace_tensors' carry graph nodes
-  // producing workspace edges if 'are_workspace_tensors_available' is true.
-  // Otherwise, 'workspace_tensors' is empty vector.
-  //
-  // For details, refer to 'Ordering of inputs after rewriting' section in the
-  // documentation above.
-  //
-  // Returns Status::OK() if setting up inputs is successful, otherwise
-  // returns appropriate status code.
-  int SetUpContiguousInputs(
-      std::unique_ptr<Graph>* g,
-      const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
-      NodeBuilder* nb, Node* old_node,
-      std::vector<NodeBuilder::NodeOut>* workspace_tensors,
-      bool are_workspace_tensors_available);
-
-  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
-  // in graph 'g'. Original node is input in 'orig_node'.
-  //
-  // For details, refer to 'Ordering of Tensorflow tensors and Mkl tensors'
-  // section in the documentation above.
-  //
-  // Returns Status::OK() if setting up inputs is successful, otherwise
-  // returns appropriate status code.
-  Status SetUpInputs(std::unique_ptr<Graph>* g,
-                     const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
-                     NodeBuilder* nb, Node* orig_node);
-
-  // Add workspace edge on the input or output side of Node 'orig_node' by using
-  // NodeBuilder 'nb' for the new node provided. If 'orig_node' does not dictate
-  // adding workspace edge then do not add it. Workspace Tensorflow and Mkl
-  // tensors, if they need to be added, will be set into these tensors.
-  // If we set workspace tensors, then are_ws_tensors_added should be true.
-  void AddWorkSpaceEdgeIfNeeded(std::unique_ptr<Graph>* g, Node* orig_node,
-                                NodeBuilder* nb,
-                                std::vector<NodeBuilder::NodeOut>* ws_tensors,
-                                bool* are_ws_tensors_added);
-
-  // Functions specific to operators to copy attributes
-  // We need operator-specific function to copy attributes because the framework
-  // does not provide any generic function for it.
-  // NOTE: names are alphabetically sorted.
-  static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
-
-  // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
-  // using node for original node 'orig_node' and return it in '*out'.
-  // TODO(nhasabni) We should move this to mkl_util.h
-  void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out,
-                             Node* orig_node);
-  void GetDummyWorkspaceTensorNode(std::unique_ptr<Graph>* g, Node** out,
-                                   Node* orig_node);
-};
-
-MklLayoutRewritePass::ConstStringsInfo MklLayoutRewritePass::csinfo_;
-
-// We register Mkl rewrite pass for phase 1 in post partitioning group.
-// We register it here so that we get a complete picture of all users of Mkl
-// nodes. Do not change the ordering of the Mkl passes.
-const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup =
-    OptimizationPassRegistry::POST_PARTITIONING;
-REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass);
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions for creating new node
-//////////////////////////////////////////////////////////////////////////
-
-static void FillInputs(const Node* n,
-                       gtl::InlinedVector<Node*, 4>* control_edges,
-                       gtl::InlinedVector<std::pair<Node*, int>, 4>* in) {
-  control_edges->clear();
-  for (const Edge* e : n->in_edges()) {
-    if (e->IsControlEdge()) {
-      control_edges->push_back(e->src());
-    } else {
-      (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output());
-    }
-  }
-  std::sort(control_edges->begin(), control_edges->end());
-  if (n->op_def().is_commutative()) {
-    // For commutative inputs, we sort the input by the input Node*
-    // to get a canonical ordering (so that add(a,b) and add(b, a) will
-    // hash to the same value if is_commutative is true for 'add').
-    std::sort(in->begin(), in->end());
-  }
-}
-
-void MklLayoutRewritePass::GetNodesProducingTFTensorList(
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, int* input_idx,
-    int list_length, std::vector<NodeBuilder::NodeOut>* output_nodes) {
-  CHECK_LT(*input_idx, inputs.size());
-  CHECK_GT(list_length, 0);
-  CHECK_NOTNULL(output_nodes);
-  output_nodes->reserve(list_length);
-
-  while (list_length != 0) {
-    CHECK_GT(list_length, 0);
-    CHECK_LT(*input_idx, inputs.size());
-    Node* n = inputs[*input_idx].first;
-    int slot = inputs[*input_idx].second;
-    // If input node 'n' is just producing a single tensor at
-    // output slot 'slot' then we just add that single node.
-    output_nodes->push_back(NodeBuilder::NodeOut(n, slot));
-    (*input_idx)++;
-    list_length--;
-  }
-}
-
-// TODO(nhasabni) We should move this to mkl_util.h.
-void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr<Graph>* g,
-                                                 Node** out, Node* orig_node) {
-  // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent
-  // dummy Mkl tensor. 8 = 2*size_t.
-  const DataType dt = DataTypeToEnum<uint8>::v();
-  TensorProto proto;
-  proto.set_dtype(dt);
-  uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
-  proto.set_tensor_content(const_cast<const void*>(static_cast<void*>(&zero)),
-                           8);
-  TensorShape dummy_shape({8});
-  dummy_shape.AsProto(proto.mutable_tensor_shape());
-  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
-               .Attr("value", proto)
-               .Attr("dtype", dt)
-               .Device(orig_node->def().device())  // We place this node on
-                                                   // the same device as the
-                                                   // device of the original
-                                                   // node.
-               .Finalize(&**g, out));
-
-  // If number of inputs to the original node is > 0, then we add
-  // control dependency between 1st input (index 0) of the original node and
-  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
-  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
-  // rewritten node. Adding control edge between 1st input of the original node
-  // and the dummy Mkl node ensures that the dummy node is in the same frame
-  // as the original node. Choosing 1st input is not necessary - any input of
-  // the original node is fine because all the inputs of a node are always in
-  // the same frame.
-  if (orig_node->num_inputs() > 0) {
-    Node* orig_input0 = nullptr;
-    TF_CHECK_OK(orig_node->input_node(0,
-                                      const_cast<const Node**>(&orig_input0)));
-    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
-  }
-
-  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
-}
-
-void MklLayoutRewritePass::GetNodesProducingMklTensorList(
-    std::unique_ptr<Graph>* g,
-    Node* orig_node,
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
-    int* input_idx, int list_length,
-    std::vector<NodeBuilder::NodeOut>* output_nodes) {
-  CHECK_LT(*input_idx, inputs.size());
-  CHECK_GT(list_length, 0);
-  CHECK_NOTNULL(output_nodes);
-  output_nodes->reserve(list_length);
-
-  while (list_length != 0) {
-    CHECK_GT(list_length, 0);
-    CHECK_LT(*input_idx, inputs.size());
-    Node* n = inputs[*input_idx].first;
-    int slot = inputs[*input_idx].second;
-    // If 'n' is producing a single tensor, then create a single Mkl tensor
-    // node.
-    Node* mkl_node = nullptr;
-    int mkl_node_output_slot = 0;
-    GetNodeProducingMklTensor(g, orig_node, n, slot, &mkl_node,
-                              &mkl_node_output_slot);
-    output_nodes->push_back(NodeBuilder::NodeOut(mkl_node,
-                                                mkl_node_output_slot));
-    (*input_idx)++;
-    list_length--;
-  }
-}
-
-// Get an input node that will feed Mkl tensor to the new
-// node that we are constructing. An input node could be (1) 'n'
-// if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
-// if 'n' is not an Mkl layer.
-void MklLayoutRewritePass::GetNodeProducingMklTensor(std::unique_ptr<Graph>* g,
-    Node* orig_node, Node* n,
-    int n_output_slot, Node** mkl_node, int* mkl_node_output_slot) {
-  CHECK_NOTNULL(n);
-  CHECK_NOTNULL(mkl_node);
-  CHECK_NOTNULL(mkl_node_output_slot);
-
-  // If this is an MKL op, then it will create extra output for MKL layout.
-  DataType T;
-  if (GetNodeAttr(n->def(), "T", &T).ok() &&
-      mkl_op_registry::IsMklOp(n->type_string(), T)) {
-    // If this is an MKL op, then it will generate an edge that will receive
-    // Mkl tensor from a node.
-    // output slot number for Mkl tensor would be N+slot number of TensorFlow
-    // tensor, where N is total number of TensorFlow tensors.
-    *mkl_node = n;
-    *mkl_node_output_slot =
-        GetTensorMetaDataIndex(n_output_slot, n->num_outputs());
-  } else {
-    // If we have not visited the node and rewritten it, then we need
-    // to create a dummy node that will feed a dummy Mkl tensor to this node.
-    // DummyMklTensor node has no input and generates only 1 output
-    // (dummy Mkl tensor) as output slot number 0.
-    GetDummyMklTensorNode(g, mkl_node, orig_node);
-    CHECK_NOTNULL(*mkl_node);
-    *mkl_node_output_slot = 0;
-  }
-}
-
-int MklLayoutRewritePass::SetUpContiguousInputs(
-    std::unique_ptr<Graph>* g,
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
-    NodeBuilder* nb, Node* old_node,
-    std::vector<NodeBuilder::NodeOut>* workspace_tensors,
-    bool are_workspace_tensors_available) {
-  CHECK_NOTNULL(workspace_tensors);
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-
-  // TODO(nhasabni): Temporary solution to connect filter input of
-  // BackpropInput with the converted filter from Conv2D.
-  bool do_connect_conv2d_backprop_input_filter = false;
-  Node* conv2d_node = nullptr;
-  // Filter node is 2nd input (slot index 1) of Conv2D.
-  int kConv2DFilterInputSlotIdx = 1;
-  int kConv2DBackpropInputFilterInputSlotIdx = 1;
-  int kConv2DFilterOutputSlotIdx = 1;
-  if (old_node->type_string() == csinfo_.conv2d_grad_input) {
-    // We need to find Conv2D node from Conv2DBackpropInput.
-    // For that let's first find filter node that is 2nd input (slot 1)
-    // of BackpropInput.
-    Node* filter_node = nullptr;
-    old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, &filter_node);
-    CHECK_NOTNULL(filter_node);
-
-    // Now check which nodes receive from filter_node. Filter feeds as
-    // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias.
-    for (const Edge* e : filter_node->out_edges()) {
-      if ((e->dst()->type_string() == csinfo_.mkl_conv2d ||
-           e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias) &&
-          e->dst_input() == kConv2DFilterInputSlotIdx
-          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
-        if (conv2d_node != nullptr) {
-          VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
-                  << " feeding multiple Conv2D nodes: "
-                  << filter_node->DebugString();
-          // We will not connect filter input of Conv2DBackpropInput
-          // to be safe here.
-          do_connect_conv2d_backprop_input_filter = false;
-          break;
-        } else {
-          conv2d_node = e->dst();
-          do_connect_conv2d_backprop_input_filter = true;
-        }
-      }
-    }
-  }
-
-  // Number of input slots to original op
-  // Input slots are represented by .Input() calls in REGISTER_OP.
-  int old_node_input_slots = old_node->op_def().input_arg_size();
-  // Actual number of inputs can be greater than or equal to number
-  // of Input slots because inputs of type list could be unfolded.
-  CHECK_GE(old_node_inputs.size(), old_node_input_slots);
-  int nn_slot_idx = 0;  // slot index for inputs of new node
-
-  // Let's copy all inputs (TF tensors) of original node to new node.
-  int iidx = 0;
-  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
-    // An input slot could be a single tensor or a list. We need
-    // to handle this case accordingly.
-    CHECK_LT(iidx, old_node_inputs.size());
-    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
-    if (ArgIsList(arg)) {
-      std::vector<NodeBuilder::NodeOut> new_node_inputs;
-      int N = GetTensorListLength(arg, old_node);
-      GetNodesProducingTFTensorList(old_node_inputs, &iidx, N,
-                                    &new_node_inputs);
-      nb->Input(new_node_inputs);
-      nn_slot_idx++;
-    } else {
-      // Special case for connecting filter input of Conv2DBackpropInput
-      if (do_connect_conv2d_backprop_input_filter &&
-          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
-        nb->Input(conv2d_node, kConv2DFilterOutputSlotIdx);
-      } else {
-        nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second);
-      }
-      iidx++;
-      nn_slot_idx++;
-    }
-  }
-
-  // If workspace tensors are available for this op and we are using
-  // contiguous ordering then we need to add Tensorflow tensor for
-  // workspace here because Tensorflow tensor for workspace is the
-  // last tensor in the list of Tensorflow tensors.
-  if (are_workspace_tensors_available) {
-    CHECK_EQ(workspace_tensors->size(), 2);
-    // Tensorflow tensor
-    nb->Input((*workspace_tensors)[0].node, (*workspace_tensors)[0].index);
-    nn_slot_idx++;
-  }
-
-  // Let's now setup all Mkl inputs to a new node.
-  // Number of Mkl inputs must be same as number of TF inputs.
-  iidx = 0;
-  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
-    // An input slot could be a single tensor or a list. We need
-    // to handle this case accordingly.
-    CHECK_LT(iidx, old_node_inputs.size());
-    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
-    if (ArgIsList(arg)) {
-      std::vector<NodeBuilder::NodeOut> new_node_inputs;
-      int N = GetTensorListLength(arg, old_node);
-      GetNodesProducingMklTensorList(g, old_node, old_node_inputs, &iidx,
-                                     N, &new_node_inputs);
-      nb->Input(new_node_inputs);
-      nn_slot_idx++;
-    } else {
-      Node* mkl_node = nullptr;
-      int mkl_node_output_slot = 0;
-      // Special case for connecting filter input of Conv2DBackpropInput
-      if (do_connect_conv2d_backprop_input_filter &&
-          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
-        GetNodeProducingMklTensor(g, old_node, conv2d_node,
-                                  kConv2DFilterOutputSlotIdx, &mkl_node,
-                                  &mkl_node_output_slot);
-      } else {
-        GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first,
-                                  old_node_inputs[iidx].second, &mkl_node,
-                                  &mkl_node_output_slot);
-      }
-      nb->Input(mkl_node, mkl_node_output_slot);
-      iidx++;
-      nn_slot_idx++;
-    }
-  }
-
-  // If workspace tensors are available for this op and we are using
-  // contiguous ordering then we need to add Mkl tensor for
-  // workspace here because Mkl tensor for workspace is the
-  // last tensor in the list of Mkl tensors.
-  if (are_workspace_tensors_available) {
-    CHECK_EQ(workspace_tensors->size(), 2);
-    // Mkl tensor
-    nb->Input((*workspace_tensors)[1].node, (*workspace_tensors)[1].index);
-    nn_slot_idx++;
-  }
-
-  return nn_slot_idx;
-}
-
-Status MklLayoutRewritePass::SetUpInputs(
-    std::unique_ptr<Graph>* g,
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
-    NodeBuilder* nb, Node* old_node) {
-  // Let's check if we need to add workspace tensors for this node.
-  // We add workspace edge only for MaxPool, LRN and BatchNorm.
-  std::vector<NodeBuilder::NodeOut> workspace_tensors;
-  bool are_workspace_tensors_available = false;
-  AddWorkSpaceEdgeIfNeeded(g, old_node, nb, &workspace_tensors,
-                           &are_workspace_tensors_available);
-
-  int new_node_input_slots = 0;
-  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-    // TODO(nhasabni): implement this function just for same of completion.
-    // We do not use interleaved ordering right now.
-    return Status(
-        error::Code::UNIMPLEMENTED,
-        "Interleaved ordering of tensors is currently not supported.");
-  } else {
-    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-    new_node_input_slots = SetUpContiguousInputs(
-        g, old_node_inputs, nb, old_node, &workspace_tensors,
-        are_workspace_tensors_available);
-  }
-
-  // Sanity check
-  int old_node_input_slots = old_node->op_def().input_arg_size();
-  if (!are_workspace_tensors_available) {
-    // If we are not adding workspace tensors for this op, then the total
-    // number of input slots to the new node _must_ be 2 times the number
-    // of input slots to the original node: N original Tensorflow tensors and
-    // N for Mkl tensors corresponding to each Tensorflow tensors.
-    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2);
-  } else {
-    // If we are adding workspace tensors for this op, then the total
-    // The total number of input slots to new node _must_ be 2 times the number
-    // of input slots to the original node: N original Tensorflow tensors and
-    // N for Mkl tensors corresponding to each Tensorflow tensors plus 2
-    // (for workspace Tensorflow tensor and workspace Mkl tensor).
-    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2 + 2);
-  }
-
-  return Status::OK();
-}
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions related to workspace pass
-//////////////////////////////////////////////////////////////////////////
-
-// TODO(nhasabni) We should move this to mkl_util.h.
-void MklLayoutRewritePass::GetDummyWorkspaceTensorNode(
-    std::unique_ptr<Graph>* g, Node** out, Node* orig_node) {
-  // We use a tensor of shape {1} and value 0 to represent
-  // dummy float tensor. We need this as a dummy workspace tensor.
-  // Workspace tensor has type float.
-  const DataType dt = DataTypeToEnum<float>::v();
-  TensorProto proto;
-  proto.set_dtype(dt);
-  float zero[1] = {0};
-  proto.set_tensor_content(const_cast<const void*>(static_cast<void*>(&zero)),
-                           4);
-  TensorShape dummy_shape({1});
-  dummy_shape.AsProto(proto.mutable_tensor_shape());
-  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
-                .Attr("value", proto)
-                .Attr("dtype", dt)
-                .Device(orig_node->def().device())  // We place this node on
-                                                    // same the device as the
-                                                    // device of the original
-                                                    // node.
-                .Finalize(&**g, out));
-
-  // If number of inputs to the original node is > 0, then we add
-  // control dependency between 1st input (index 0) of the original node and
-  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
-  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
-  // rewritten node. Adding control edge between 1st input of the original node
-  // and the dummy Mkl node ensures that the dummy node is in the same frame
-  // as the original node. Choosing 1st input is not necessary - any input of
-  // the original node is fine because all the inputs of a node are always in
-  // the same frame.
-  if (orig_node->num_inputs() > 0) {
-    Node* orig_input0 = nullptr;
-    TF_CHECK_OK(orig_node->input_node(0,
-                                      const_cast<const Node**>(&orig_input0)));
-    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
-  }
-
-  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
-}
-
-void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded(
-    std::unique_ptr<Graph>* g, Node* orig_node, NodeBuilder* nb,
-    std::vector<NodeBuilder::NodeOut>* ws_tensors, bool* are_ws_tensors_added) {
-  bool workspace_edge_added = false;  // Default initializer
-  CHECK_NOTNULL(are_ws_tensors_added);
-  *are_ws_tensors_added = false;  // Default initializer
-
-  DataType T;
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  for (auto ws : wsinfo_) {
-    if (orig_node->type_string() == ws.fwd_op &&
-        mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
-          orig_node->type_string()), T)) {
-      // If this op is a fwd op, then we need to check if there is an
-      // edge from this node's fwd_slot to bwdop's bwd_slot. If there is
-      // an edge, then we just add an attribute on this node for setting
-      // workspace_passed to true. We don't add actual workspace edge
-      // in this node. Actual workspace edge gets added in the backward
-      // op for this node.
-      for (const Edge* e : orig_node->out_edges()) {
-        if (e->src_output() == ws.fwd_slot &&
-            e->dst()->type_string() == ws.bwd_op &&
-            e->dst_input() == ws.bwd_slot) {
-          nb->Attr("workspace_enabled", true);
-          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
-                  << orig_node->type_string();
-          workspace_edge_added = true;
-          // We found the edge that we were looking for, so break.
-          break;
-        }
-      }
-
-      if (!workspace_edge_added) {
-        // If we are here, then we did not find backward operator for this
-        // node.
-        nb->Attr("workspace_enabled", false);
-      }
-    } else if (orig_node->type_string() == ws.bwd_op &&
-               mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
-                                          orig_node->type_string()), T)) {
-      // If this op is a bwd op, then we need to add workspace edge and
-      // it's Mkl tensor edge between its corresponding fwd op and this
-      // op. Corresponding fwd op is specified in 'fwd_op' field of
-      // workspace info. fwd_slot and bwd_slot in workspace info specify
-      // an edge between which slots connect forward and backward op.
-      // Once all these criteria match, we add a workspace edge between
-      // ws_fwd_slot and ws_bwd_slot. Its corresponding Mkl tensor is
-      // determined by interleaved/contiguous ordering. Function
-      // DataIndexToMetaDataIndex tells us the location of Mkl tensor
-      // from the location of the Tensorflow tensor.
-      for (const Edge* e : orig_node->in_edges()) {
-        if (e->src_output() == ws.fwd_slot &&
-            // We would have rewritten the forward op, so we need to use
-            // GetMklOpName call to get its Mkl name.
-            e->src()->type_string() == mkl_op_registry::GetMklOpName(
-                                                          ws.fwd_op) &&
-            e->dst_input() == ws.bwd_slot) {
-          nb->Attr("workspace_enabled", true);
-          CHECK_NOTNULL(ws_tensors);
-          // Add workspace edge between fwd op and bwd op.
-          ws_tensors->push_back(NodeBuilder::NodeOut(e->src(), ws.ws_fwd_slot));
-          // Add Mkl tensor edge for workspace edge between fwd op and bwd op.
-          ws_tensors->push_back(NodeBuilder::NodeOut(
-              e->src(), DataIndexToMetaDataIndex(ws.ws_fwd_slot,
-                                                 e->src()->num_outputs())));
-          *are_ws_tensors_added = true;
-          // In terms of input ordering, we add these calls to add Input
-          // here because workspace edge (and its Mkl tensor) is the last
-          // edge in the fwdop and bwdop. So all inputs before workspace
-          // tensor have been added by SetUpInputs function.
-          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
-                  << orig_node->type_string();
-          workspace_edge_added = true;
-          // We found the edge that we were looking for, so break.
-          break;
-        }
-      }
-
-      // If we are here means we did not find fwd op that feeds to this
-      // bwd op. So in this case, we need to generate dummy tensors for
-      // workspace input and Mkl tensor for workspace, and set
-      // workspace_enabled to false.
-      if (!workspace_edge_added) {
-        nb->Attr("workspace_enabled", false);
-        Node* dmt_ws = nullptr;      // Dummy tensor for workspace
-        Node* dmt_mkl_ws = nullptr;  // Dummy Mkl tensor for workspace
-        GetDummyWorkspaceTensorNode(g, &dmt_ws, orig_node);
-        GetDummyMklTensorNode(g, &dmt_mkl_ws, orig_node);
-        CHECK_NOTNULL(dmt_ws);
-        CHECK_NOTNULL(dmt_mkl_ws);
-        CHECK_NOTNULL(ws_tensors);
-        // We add dummy tensor as workspace tensor.
-        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_ws, 0));
-        // We add dummy tensor as Mkl tensor for workspace tensor.
-        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_mkl_ws, 0));
-        *are_ws_tensors_added = true;
-        VLOG(1) << "MklLayoutRewritePass: dummy workspace_enabled for "
-                << orig_node->type_string();
-      }
-    } else {
-      // If this node does not match any workspace info, then we do not
-      // do anything special for workspace propagation for it.
-    }
-  }
-}
-
-//////////////////////////////////////////////////////////////////////////
-// Op-specific functions to copy attributes from old node to new node
-//////////////////////////////////////////////////////////////////////////
-
-void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node,
-                                           NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  string padding;
-  std::vector<int32> strides;
-  bool use_cudnn_on_gpu;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-  TF_CHECK_OK(
-      GetNodeAttr(orig_node->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("strides", strides);
-  nb->Attr("padding", padding);
-  nb->Attr("data_format", data_format);
-  nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu);
-}
-
-void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node,
-                                         NodeBuilder* nb) {
-  DataType T;
-  int N;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("N", N);
-}
-
-void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node,
-                                                NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  std::vector<int32> strides;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("strides", strides);
-  nb->Attr("data_format", data_format);
-}
-
-void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node,
-                                        NodeBuilder* nb) {
-  DataType T;
-  int depth_radius;
-  float bias;
-  float alpha;
-  float beta;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "depth_radius", &depth_radius));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "bias", &bias));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "alpha", &alpha));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "beta", &beta));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("depth_radius", depth_radius);
-  nb->Attr("bias", bias);
-  nb->Attr("alpha", alpha);
-  nb->Attr("beta", beta);
-}
-
-void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node,
-                                            NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  string padding;
-  std::vector<int32> ksize, strides;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "ksize", &ksize));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("ksize", ksize);
-  nb->Attr("strides", strides);
-  nb->Attr("padding", padding);
-  nb->Attr("data_format", data_format);
-}
-
-void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node,
-                                             NodeBuilder* nb) {
-  DataType T;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-}
-
-void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node,
-                                           NodeBuilder* nb) {
-  DataType T;
-  DataType Tshape;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tshape", &Tshape));
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("Tshape", Tshape);
-}
-
-void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node,
-                                          NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  int num_split;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "num_split", &num_split));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("num_split", num_split);
-  nb->Attr("data_format", data_format);
-}
-
-void MklLayoutRewritePass::CopyAttrsConcat(const Node* orig_node,
-                                           NodeBuilder* nb) {
-  DataType T;
-  int N;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("N", N);
-}
-
-void MklLayoutRewritePass::CopyAttrsConcatV2(const Node* orig_node,
-                                             NodeBuilder* nb) {
-  DataType T;
-  int N;
-  DataType tidx;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tidx", &tidx));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("N", N);
-  nb->Attr("Tidx", tidx);
-}
-
-void MklLayoutRewritePass::CopyAttrsFusedBatchNorm(const Node* orig_node,
-                                                   NodeBuilder* nb) {
-  DataType T;
-  float epsilon;
-  string data_format;
-  bool is_training;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "epsilon", &epsilon));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "is_training", &is_training));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("epsilon", epsilon);
-  nb->Attr("data_format", data_format);
-  nb->Attr("is_training", is_training);
-}
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions related to node merge pass
-//////////////////////////////////////////////////////////////////////////
-
-Node* MklLayoutRewritePass::CheckForNodeMerge(const Node* a) const {
-  // TODO(nhasabni) Add check for type of node similar to CheckForNodeRewrite
-  // once we support BiasAddGrad as Mkl layer.
-
-  // Search for all matching mergeinfo.
-  // We allow more than one match for extensibility.
-  std::vector<const MergeInfo*> matching_mi;
-  for (auto mi = minfo_.cbegin(); mi != minfo_.cend(); ++mi) {
-    if (a->type_string() == mi->op1 || a->type_string() == mi->op2) {
-      matching_mi.push_back(&*mi);
-    }
-  }
-
-  for (const MergeInfo* mi : matching_mi) {
-    // Get the operand with which 'a' can be merged.
-    Node* b = nullptr;
-    if ((b = mi->get_node_to_be_merged(a)) == nullptr) {
-      continue;
-    }
-
-    // Get the control edges and input of node
-    const int N_in = a->num_inputs();
-    gtl::InlinedVector<Node*, 4> a_control_edges;
-    gtl::InlinedVector<std::pair<Node*, int>, 4> a_in(N_in);
-    FillInputs(a, &a_control_edges, &a_in);
-
-    const int B_in = b->num_inputs();
-    gtl::InlinedVector<Node*, 4> b_control_edges;
-    gtl::InlinedVector<std::pair<Node*, int>, 4> b_in(B_in);
-    FillInputs(b, &b_control_edges, &b_in);
-
-    // Shouldn't merge if a and b have different control edges.
-    if (a_control_edges != b_control_edges) {
-      continue;
-    } else {
-      // We found a match.
-      return b;
-    }
-  }
-
-  return nullptr;
-}
-
-Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g,
-                                                    Node* m, Node* n) {
-  CHECK_EQ(((m->type_string() == csinfo_.bias_add &&
-             n->type_string() == csinfo_.conv2d)) ||
-           ((n->type_string() == csinfo_.bias_add &&
-             m->type_string() == csinfo_.conv2d)), true);
-
-  // If 'm' is BiasAdd, then 'n' is Conv2D. Since Conv2D feeds BiasAdd,
-  // BiasAdd is successor node, and Conv2D predecessor node.
-  Node* pred = m->type_string() == csinfo_.bias_add ? n : m;
-  Node* succ = m->type_string() == csinfo_.bias_add ? m : n;
-
-  // 1. Get all attributes from input nodes.
-  DataType T_pred, T_succ;
-  string padding;
-  std::vector<int32> strides;
-  string data_format_pred, data_format_succ;
-  bool use_cudnn_on_gnu;
-  TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred));
-  TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ));
-  TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding));
-  TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides));
-  TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred));
-  TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ));
-  TF_CHECK_OK(
-      GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu));
-  // We check to ensure that data formats of both succ and pred are same.
-  // We expect them to be same, so we can enforce this as assert.
-  // But assert can be too strict, so we enforce this as a check.
-  // If the check fails, then we do not merge two nodes.
-  // We also do same check for devices.
-  if (data_format_pred != data_format_succ || T_pred != T_succ ||
-      pred->assigned_device_name() != succ->assigned_device_name() ||
-      pred->def().device() != succ->def().device()) {
-    return Status(error::Code::INVALID_ARGUMENT,
-                  "data_format or T attribute or devices of Conv2D and "
-                  "BiasAdd do not match. Will skip node merge optimization");
-  }
-
-  const int succ_num = succ->num_inputs();
-  gtl::InlinedVector<Node*, 4> succ_control_edges;
-  gtl::InlinedVector<std::pair<Node*, int>, 4> succ_in(succ_num);
-  FillInputs(succ, &succ_control_edges, &succ_in);
-
-  const int pred_num = pred->num_inputs();
-  gtl::InlinedVector<Node*, 4> pred_control_edges;
-  gtl::InlinedVector<std::pair<Node*, int>, 4> pred_in(pred_num);
-  FillInputs(pred, &pred_control_edges, &pred_in);
-
-  // We need to ensure that Conv2D only feeds to BiasAdd (some other operator is
-  // not expecting output of Conv2D). If this is not the case, then we cannot
-  // merge Conv2D with BiasAdd.
-  const int kFirstOutputSlot = 0;
-  for (const Edge* e : pred->out_edges()) {
-    if (e->src_output() == kFirstOutputSlot && e->dst() != succ) {
-      return Status(error::Code::INVALID_ARGUMENT,
-                    "Conv2D does not feed to BiasAdd, or "
-                    "it feeds BiasAdd but has multiple outputs. "
-                    "Will skip node merge optimization");
-    }
-  }
-
-  // 2. Get inputs from both the nodes.
-  // Find the 2 inputs from the conv and the bias from the add Bias.
-  // Get operand 0, 1 of conv2D.
-  CHECK_EQ(pred->in_edges().size(), 2);  // Conv2D must have 2 inputs.
-  // Get operand 1 of add_bias
-  // BiasAdd must have 2 inputs: Conv, bias
-  CHECK_EQ(succ->in_edges().size(), 2);
-
-  // We will use the node name of BiasAdd as the name of new node
-  // Build new node. We use same name as original node, but change the op
-  // name.
-  NodeBuilder nb(succ->name(), csinfo_.conv2d_with_bias);
-  nb.Input(pred_in[0].first, pred_in[0].second);  // In1 of Conv2D
-  // pred_in[1] will be 2nd Tensorflow tensor for Conv2D.
-  nb.Input(pred_in[1].first, pred_in[1].second);  // In2 of Conv2D
-  // In1 of BiasAdd is same as output of Conv2D.
-  nb.Input(succ_in[1].first, succ_in[1].second);  // In2 of BiasAdd
-
-  // Copy attributes from Conv2D to Conv2DWithBias.
-  CopyAttrsConv2D(const_cast<const Node*>(pred), &nb);
-
-  // Copy the device assigned to old node to new node.
-  nb.Device(succ->def().device());
-
-  // Create node.
-  Node* new_node;
-  nb.Finalize(&**g, &new_node);
-  CHECK_NOTNULL(new_node);
-
-  // Incoming data edges from 'pred' node and 'succ' node to new 'new_node'
-  // node are already copied in BuildNode. We handle control edges now.
-  for (const Edge* e : pred->in_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-    }
-  }
-  for (const Edge* e : succ->in_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-    }
-  }
-
-  // Incoming edges are fixed, we will fix the outgoing edges now.
-  // First, we will fix outgoing control edges from 'pred' node.
-  for (const Edge* e : pred->out_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-    }
-  }
-
-  // Second, we will fix outgoing control and data edges from 'succ' node.
-  for (const Edge* e : succ->out_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-    } else {
-      // BiasAdd has only 1 output (at slot 0) and merged node also has only 1
-      // output (at slot 0).
-      const int kConv2DWithBiasOutputSlot = 0;
-      CHECK_NOTNULL((*g)->AddEdge(new_node, kConv2DWithBiasOutputSlot,
-                                    e->dst(), e->dst_input()));
-    }
-  }
-
-  // Copy device assigned to old node to new node.
-  // It's ok to use pred or succ as we have enforced a check that
-  // both have same device assigned.
-  new_node->set_assigned_device_name(pred->assigned_device_name());
-
-  VLOG(1) << "MklLayoutRewritePass: Merged old node:" << pred->DebugString()
-          << ", and node: " << succ->DebugString()
-          << ", into node:" << new_node->DebugString();
-
-  (*g)->RemoveNode(succ);
-  (*g)->RemoveNode(pred);
-
-  return Status::OK();
-}
-
-Status MklLayoutRewritePass::MergeConv2DBackpropFilterWithBiasAddGrad(
-    std::unique_ptr<Graph>* g, Node* m, Node* n) {
-  CHECK_EQ(((m->type_string() == csinfo_.bias_add_grad &&
-             n->type_string() == csinfo_.conv2d_grad_filter)) ||
-           ((n->type_string() == csinfo_.bias_add_grad &&
-             m->type_string() == csinfo_.conv2d_grad_filter)), true);
-
-  // If 'm' is BiasAddGrad, then 'n' is BackpropFilter.
-  Node* badd = m->type_string() == csinfo_.bias_add_grad ? m : n;
-  Node* fltr = m->type_string() == csinfo_.bias_add_grad ? n : m;
-
-  // Sanity check for attributes from input nodes.
-  DataType T_b, T_f;
-  string data_format_b, data_format_f;
-  TF_CHECK_OK(GetNodeAttr(badd->def(), "T", &T_b));
-  TF_CHECK_OK(GetNodeAttr(fltr->def(), "T", &T_f));
-  TF_CHECK_OK(GetNodeAttr(badd->def(), "data_format", &data_format_b));
-  TF_CHECK_OK(GetNodeAttr(fltr->def(), "data_format", &data_format_f));
-  if (data_format_b != data_format_f || T_b != T_f ||
-      badd->assigned_device_name() != fltr->assigned_device_name() ||
-      badd->def().device() != fltr->def().device()) {
-    return Status(error::Code::INVALID_ARGUMENT,
-                  "data_format or T attribute or devices of "
-                  "Conv2DBackpropFilter and BiasAddGrad do not match. "
-                  "Will skip node merge optimization");
-  }
-
-  // We will use the node name of Conv2DBackpropFilter as the name of new node.
-  // This is because BackpropFilterWithBias is going to emit bias output also.
-  NodeBuilder nb(fltr->name(), csinfo_.conv2d_grad_filter_with_bias);
-  // Since Conv2DBackpropFilterWithBias has same number of inputs as
-  // Conv2DBackpropFilter, we can just copy input edges directly. We dont need
-  // to copy any data input of BiasAddGrad because that input also goes to
-  // Conv2DBackpropFilter.
-  const int fltr_ins = fltr->num_inputs();
-  gtl::InlinedVector<Node*, 4> fltr_control_edges;
-  gtl::InlinedVector<std::pair<Node*, int>, 4> fltr_in_edges(fltr_ins);
-  FillInputs(fltr, &fltr_control_edges, &fltr_in_edges);
-  for (int idx = 0; idx < fltr_ins; idx++) {
-    nb.Input(fltr_in_edges[idx].first, fltr_in_edges[idx].second);
-  }
-
-  // Copy attributes from Conv2DBackpropFilter.
-  CopyAttrsConv2D(const_cast<const Node*>(fltr), &nb);
-
-  // Copy the device assigned to old node to new node.
-  nb.Device(fltr->def().device());
-
-  // Create node.
-  Node* new_node;
-  nb.Finalize(&**g, &new_node);
-  CHECK_NOTNULL(new_node);
-
-  // Incoming data edges from BiasAddGrad node and Conv2DBackpropFilter node to
-  // new 'new_node' node are already copied in BuildNode. We handle control
-  // edges now.
-  for (const Edge* e : badd->in_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-    }
-  }
-  for (const Edge* e : fltr->in_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-    }
-  }
-
-  // Incoming edges are fixed, we will fix the outgoing edges now.
-  // First, we will fix outgoing control edges from 'badd' node.
-  // Conv2DBackpropFilter has 1 output -- filter_grad.
-  // Conv2DBackpropFilterWithBias has 2 outputs -- filter_grad and
-  // bias_grad. But filter_grad is at same slot number (0) in both the
-  // nodes. bias_grad is at slot number 1 in Conv2DBackpropFilterWithBias, while
-  // it is at slot number 0 in BiasAddGrad.
-  const int kMergedNodeFilterGradOutputIdx = 0;
-  const int kMergedNodeBiasGradOutputIdx = 1;
-
-  for (const Edge* e : badd->out_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-    } else {
-      CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeBiasGradOutputIdx,
-                                  e->dst(), e->dst_input()));
-    }
-  }
-
-  // Second, we will fix outgoing control and data edges from 'fltr' node.
-  for (const Edge* e : fltr->out_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-    } else {
-      CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeFilterGradOutputIdx,
-                                  e->dst(), e->dst_input()));
-    }
-  }
-
-  // Copy device assigned to old node to new node.
-  // It's ok to use badd or fltr as we have enforced a check that
-  // both have same device assigned.
-  new_node->set_assigned_device_name(badd->assigned_device_name());
-
-  VLOG(1) << "MklLayoutRewritePass: Merged old node:" << badd->DebugString()
-          << ", and node: " << fltr->DebugString()
-          << ", into node:" << new_node->DebugString();
-
-  (*g)->RemoveNode(badd);
-  (*g)->RemoveNode(fltr);
-
-  return Status::OK();
-}
-
-Status MklLayoutRewritePass::MergeNode(std::unique_ptr<Graph>* g, Node* m,
-                                       Node* n) {
-  CHECK_NOTNULL(m);
-  CHECK_NOTNULL(n);
-
-  if (((m->type_string() == csinfo_.bias_add &&
-        n->type_string() == csinfo_.conv2d)) ||
-      ((n->type_string() == csinfo_.bias_add &&
-        m->type_string() == csinfo_.conv2d))) {
-    return this->MergeConv2DWithBiasAdd(g, m, n);
-  }
-
-  if (((m->type_string() == csinfo_.bias_add_grad &&
-        n->type_string() == csinfo_.conv2d_grad_filter)) ||
-      ((n->type_string() == csinfo_.bias_add_grad &&
-        m->type_string() == csinfo_.conv2d_grad_filter))) {
-    return this->MergeConv2DBackpropFilterWithBiasAddGrad(g, m, n);
-  }
-
-  return Status(error::Code::UNIMPLEMENTED,
-                "Unimplemented case for node merge optimization.");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions for node rewrite
-//////////////////////////////////////////////////////////////////////////
-
-Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
-                                         Node* orig_node,
-                                         const RewriteInfo* ri) {
-  CHECK_NOTNULL(ri);
-  CHECK_NOTNULL(orig_node);
-
-  VLOG(1) << "MklLayoutRewritePass: Original node:" << orig_node->DebugString();
-
-  // Get all inputs.
-  int num_inputs = orig_node->in_edges().size();
-
-  // Drop count for control edges from inputs
-  for (const Edge* e : orig_node->in_edges()) {
-    if (e->IsControlEdge()) {
-      num_inputs--;
-    }
-  }
-
-  gtl::InlinedVector<Node*, 4> control_edges;
-  gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num_inputs);
-  FillInputs(orig_node, &control_edges, &inputs);
-
-  // Build new node. We use same name as original node, but change the op name.
-  NodeBuilder nb(orig_node->name().c_str(), ri->new_name.c_str());
-  // Copy user-specified device assigned to original node to new node.
-  nb.Device(orig_node->def().device());
-  // Set up new inputs to the rewritten node.
-  Status s = SetUpInputs(g, inputs, &nb, orig_node);
-  if (s != Status::OK()) {
-    return s;
-  }
-
-  ri->copy_attrs(const_cast<const Node*>(orig_node), &nb);
-  // Set the Mkl layer label for this op.
-  nb.Attr("_kernel", mkl_op_registry::kMklOpLabel);
-
-  // Finalize graph and get new node.
-  Node* new_node = nullptr;
-  TF_CHECK_OK(nb.Finalize(&**g, &new_node));
-  CHECK_NOTNULL(new_node);
-
-  // Incoming data edges from 'orig_node' node to new 'new_node' node are
-  // already copied in BuildNode. We need to handle control edges now.
-  for (const Edge* e : orig_node->in_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-    }
-  }
-
-  // Copy outgoing edges from 'orig_node' node to new
-  // 'new_node' node, since the output also follows same ordering among
-  // Tensorflow tensors and Mkl tensors. We need to connect Tensorflow
-  // tensors appropriately. Specifically, nth output of the original node
-  // will become 2*nth output of the Mkl node for the interleaved ordering
-  // of the tensors. For the contiguous ordering of the tensors, it will be n.
-  // GetTensorDataIndex provides this mapping function.
-  for (const Edge* e : orig_node->out_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-    } else {
-      CHECK_NOTNULL((*g)->AddEdge(new_node, GetTensorDataIndex(e->src_output(),
-                            e->src()->num_outputs()),
-                    e->dst(), e->dst_input()));
-    }
-  }
-
-  // Copy the runtime device assigned from original code to new node.
-  new_node->set_assigned_device_name(orig_node->assigned_device_name());
-
-  // Delete original node and mark new node as rewritten.
-  (*g)->RemoveNode(orig_node);
-
-  VLOG(1) << "MklLayoutRewritePass: New node:" << new_node->DebugString();
-  return Status::OK();
-}
-
-const MklLayoutRewritePass::RewriteInfo*
-MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
-  CHECK_NOTNULL(n);
-
-  // First check if node along with its type is supported by MKL layer.
-  // We do not want to rewrite an op into Mkl op if types are not supported.
-  // E.g., MklRelu does not support INT32. So we cannot rewrite Relu to
-  // MklRelu if type is INT32.
-  DataType T;
-  if (!GetNodeAttr(n->def(), "T", &T).ok()) {
-    return nullptr;
-  }
-
-  // We make an exception for __MklDummyConv2DWithBias and
-  // __MklConv2DBackpropFilterWithBias since their names do not match Mkl node
-  // names.
-  if (n->type_string() != csinfo_.conv2d_with_bias &&
-      n->type_string() != csinfo_.conv2d_grad_filter_with_bias &&
-      !mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
-                                        n->type_string()), T)) {
-      return nullptr;
-  }
-
-  // For elementwise node, we reuse the Eigen implementation and pass the MKL
-  // metadata tensor through so we can avoid conversions. However, if all
-  // incoming edges are in TF format, we don't need all this overhead, so
-  // replace the elementwise node only if at least one of its parents is a MKL
-  // node.
-  //
-  // Identity nodes can also skip replacement if they are not being served by
-  // any MKL nodes.
-  //
-  // TODO(vrane): Add implementation for element-wise ops that doesn't reuse
-  // eigen code to reduce cross-library dependency.
-  VLOG(1) << "ELEMENTWISE: checking op: " << n->type_string();
-  if (mkl_op_registry::IsMklElementWiseOp(
-        mkl_op_registry::GetMklOpName(n->type_string()), T) ||
-      n->type_string().find("Identity") != string::npos) {
-    VLOG(1) << "ELEMENTWISE: op is elementwise: " << n->type_string();
-    bool incoming_mkl_edge = false;
-    int num_parent = 0;
-    for (auto parent : n->in_edges()) {
-      if (mkl_op_registry::IsMklOp(parent->src()->type_string(), T)) {
-        VLOG(1) << "ELEMENTWISE: parent " << num_parent++ << " is MKL op: "
-                << parent->src()->type_string();
-        incoming_mkl_edge = true;
-        break;
-      } else {
-        VLOG(1) << "ELEMENTWISE: parent " << num_parent++ << " is NON-MKL op: "
-                << parent->src()->type_string();
-      }
-    }
-    if (incoming_mkl_edge == false) {
-      VLOG(1) << "ELEMENTWISE: Skipping replacement of elementwise node which has no MKL "
-                 "parents.";
-      return nullptr;
-    } else {
-      VLOG(1) << "ELEMENTWISE: Replacing elementwise node " << n->type_string() <<
-        " which has MKL parents";
-    }
-  }
-
-  // We now check if rewrite rule applies for this op. If rewrite rule passes
-  // for this op, then we rewrite it to Mkl op.
-  // Find matching RewriteInfo and then check that rewrite rule applies.
-  for (auto ri = rinfo_.cbegin(); ri != rinfo_.cend(); ++ri) {
-    if (n->type_string().compare(ri->name) == 0 &&
-        ri->rewrite_rule(n)) {
-      return &*ri;
-    }
-  }
-
-  // Else return not found.
-  return nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//              Run function for the pass
-///////////////////////////////////////////////////////////////////////////////
-
-bool MklLayoutRewritePass::RunPass(std::unique_ptr<Graph>* g) {
-  bool result = false;
-  CHECK_NOTNULL(g);
-
-  DumpGraph("Before running MklLayoutRewritePass", &**g);
-
-  std::vector<Node*> order;
-  GetReversePostOrder(**g, &order);  // This will give us topological sort.
-  for (Node* n : order) {
-    // If node is not an op or it cannot run on CPU device, then skip.
-    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
-      continue;
-    }
-
-    Node* m = nullptr;
-    if ((m = CheckForNodeMerge(n)) != nullptr && CanOpRunOnCPUDevice(m)) {
-      // Check if the node 'n' can be merged with any other node. If it can
-      // be 'm' contains the node with which it can be merged.
-      string n1_name = n->name();
-      string n2_name = m->name();
-
-      VLOG(1) << "MklLayoutRewritePass: Scheduled nodes " << n1_name << " and "
-              << n2_name << " for merging";
-
-      if (MergeNode(g, n, m) == Status::OK()) {
-        VLOG(1) << "MklLayoutRewritePass: Merged nodes " << n1_name << " and "
-                << n2_name;
-        result = true;
-      }
-    }
-  }
-
-  DumpGraph("After running MklLayoutRewritePass(NodeMerge)", &**g);
-
-  order.clear();
-  GetReversePostOrder(**g, &order);  // This will give us topological sort.
-  for (Node* n : order) {
-    // If node is not an op or it cannot run on CPU device, then skip.
-    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
-      continue;
-    }
-
-    const RewriteInfo* ri = nullptr;
-    // We will first search if node is to be rewritten.
-    if ((ri = CheckForNodeRewrite(n)) != nullptr) {
-      string node_name = n->name();
-      string op_name = n->type_string();
-
-      VLOG(1) << "MklLayoutRewritePass: Scheduled node " << node_name
-              << " with op " << op_name << " for rewrite using"
-              << " layout optimization.";
-
-      if (RewriteNode(g, n, ri) == Status::OK()) {
-        VLOG(1) << "MklLayoutRewritePass: rewrote node " << node_name
-                << " with op " << op_name << " for Mkl layout optimization.";
-        result = true;
-      }
-    }
-  }
-
-  DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite)", &**g);
-
-  return result;
-}
-
-bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) {
-  return MklLayoutRewritePass().RunPass(g);
-}
-
-Status MklLayoutRewritePass::Run(
-  const GraphOptimizationPassOptions& options) {
-  if (options.graph == nullptr && options.partition_graphs == nullptr) {
-    return Status::OK();
-  }
-
-  auto process_graph = [&](std::unique_ptr<Graph>* g) {
-    // Get the ownership of a graph
-    std::unique_ptr<Graph>* ng = std::move(g);
-    RunPass(ng);
-    // Return the ownership of a graph back
-    g->reset(ng->release());
-  };
-
-  if (kMklLayoutRewritePassGroup !=
-      OptimizationPassRegistry::POST_PARTITIONING) {
-    // For any pre-partitioning phase, a graph is stored in options.graph.
-    process_graph(options.graph);
-  } else {
-    // For post partitioning phase, graphs are stored in
-    // options.partition_graphs.
-    for (auto& pg : *options.partition_graphs) {
-      process_graph(&pg.second);
-    }
-  }
-
-  return Status::OK();
-}
-#endif  // INTEL_MKL_DNN
 }  // namespace tensorflow
 
 #endif
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 75f7ca2d4d..abc63e4f35 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -37,9 +37,6 @@ limitations under the License.
 #include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
-
-#ifndef INTEL_MKL_DNN
-
 namespace {
 
 const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
@@ -1884,1627 +1881,6 @@ static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
 BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
 
 }  // namespace
-
-#else  // INTEL_MKL_DNN
-
-namespace {
-
-const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
-const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0";
-
-static void InitGraph(const string& s, Graph* graph,
-                      const string& device = kCPUDevice) {
-  GraphDef graph_def;
-
-  auto parser = protobuf::TextFormat::Parser();
-  //  parser.AllowRelaxedWhitespace(true);
-  CHECK(parser.MergeFromString(s, &graph_def)) << s;
-  GraphConstructorOptions opts;
-  TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
-
-  for (Node* node : graph->nodes()) {
-    node->set_assigned_device_name(device);
-  }
-}
-
-class MklLayoutPassTest : public ::testing::Test {
- public:
-  MklLayoutPassTest() : graph_(OpRegistry::Global()) {}
-
-  void InitGraph(const string& s, const string& device = kCPUDevice) {
-    ::tensorflow::InitGraph(s, &graph_, device);
-    original_ = CanonicalGraphString(&graph_);
-  }
-
-  static bool IncludeNode(const Node* n) { return n->IsOp(); }
-
-  static string EdgeId(const Node* n, int index) {
-    if (index == 0) {
-      return n->name();
-    } else if (index == Graph::kControlSlot) {
-      return strings::StrCat(n->name(), ":control");
-    } else {
-      return strings::StrCat(n->name(), ":", index);
-    }
-  }
-
-  string CanonicalGraphString(Graph* g) {
-    std::vector<string> nodes;
-    std::vector<string> edges;
-    for (const Node* n : g->nodes()) {
-      if (IncludeNode(n)) {
-        nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
-      }
-    }
-    for (const Edge* e : g->edges()) {
-      if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
-        edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
-                                        EdgeId(e->dst(), e->dst_input())));
-      }
-    }
-    // Canonicalize
-    std::sort(nodes.begin(), nodes.end());
-    std::sort(edges.begin(), edges.end());
-    return strings::StrCat(str_util::Join(nodes, ";"), "|",
-                           str_util::Join(edges, ";"));
-  }
-
-  string DoMklLayoutOptimizationPass() {
-    string before = CanonicalGraphString(&graph_);
-    LOG(ERROR) << "Before MKL layout rewrite pass: " << before;
-
-    std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
-    RunMklLayoutRewritePass(ug);
-
-    string result = CanonicalGraphString(&graph_);
-    LOG(ERROR) << "After MKL layout rewrite pass:  " << result;
-    return result;
-  }
-
-  const string& OriginalGraph() const { return original_; }
-
-  Graph graph_;
-  string original_;
-};
-
-REGISTER_OP("Input").Output("o: float").SetIsStateful();
-REGISTER_OP("InputList").Output("o: N * float").Attr("N: int").SetIsStateful();
-REGISTER_OP("HalfInput").Output("o: half").SetIsStateful();
-REGISTER_OP("Int32Input").Output("o: int32").SetIsStateful();
-REGISTER_OP("_MklInput").Output("o: uint8").SetIsStateful();
-REGISTER_OP("_MklInput2").Output("o: uint8")
-                        .Output("o1: uint8").SetIsStateful();
-
-/////////////////////////////////////////////////////////////////////
-//  Unit tests related to node merge optiimization
-/////////////////////////////////////////////////////////////////////
-
-TEST_F(MklLayoutPassTest, Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Zeta);D(Zeta)|"
-            "A->C;A->D;B->C:1;B->D:1");
-}
-
-// Test set 1: Conv2D + AddBias
-
-// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y)
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'Y']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);E(_MklConv2DWithBias);Y(Input);Z(Zeta)|A->E;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;DMT/_1->E:4;"
-            "DMT/_2->E:5;E->Z;Y->Z:1");
-}
-
-// Graph contains only Conv2D, no AddBias.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_NoAddBias) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);DMT/_0(Const);DMT/_1(Const)|"
-            "A->C;A:control->DMT/_0:control;A:control->DMT/_1:control;B->C:1;"
-            "DMT/_0->C:2;DMT/_1->C:3");
-}
-
-// Conv2D output does not go to BiasAdd.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D', 'E'] }");  // Output of _MklConv2D does not go to BiasAdd.
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
-            "DMT/_1(Const);E(Input);F(BiasAdd)|A->C;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;D->F;DMT/_0->C:2;DMT/_1->C:3;"
-            "E->F:1");
-}
-
-// Conv2D has two outgoing edges: BiasAdd and some other dummy node (Zeta).
-// Merge should not be done in such case.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D', 'E'] }"  // Conv2D has two outputs.
-                              // No merge should happen.
-      "node { name: 'G' op: 'Zeta'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['C', 'E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
-            "DMT/_1(Const);E(Input);F(BiasAdd);G(Zeta)|A->C;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;B->C:1;C->G;"
-            "D->F;DMT/_0->C:2;DMT/_1->C:3;E->F:1;E->G:1");
-}
-
-// data_format attribute value mismatch. Merge should not be done
-// in such case.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_AttrMismatch) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NHCW' } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
-            "DMT/_1(Const);E(BiasAdd)|A->C;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;C->E;D->E:1;DMT/_0->C:2;"
-            "DMT/_1->C:3");
-}
-
-// Test set 2: BiasAddGrad + Conv2DBackpropFilter fusion tests
-
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);"
-            "D(_MklConv2DBackpropFilterWithBias);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const)|A->D;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
-            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// BiasAddGrad fusion in the presence of BackpropFilter. But nodes do not match
-// criteria for rewrite. So rewrite should not happen. 3rd input of
-// Conv2DBackpropFilter is different than input to BiasAddGrad.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['A'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);"
-            "D(_MklConv2DBackpropFilter);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);E(BiasAddGrad)|A->D;A->E;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
-            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// BiasAddGrad fusion, but nodes do not match criteria for fusion.
-// Different input formats.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NHWC' } }"
-      " input: ['A'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);"
-            "D(_MklConv2DBackpropFilter);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);E(BiasAddGrad)|A->D;A->E;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
-            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// BiasAddGrad fusion in the presence of BackpropFilter only. Fusion is done
-// before node rewrite. Check this ordering.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative3) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['E', 'F', 'A', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
-            "C->D:2;D->E;E->G;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
-            "O->G:5");
-}
-
-// C=Conv2D(A,B); E=BiasAdd(C,D); Y=Zeta(E,X);
-// G=Conv2DBackpropInput(F,B,E)
-// This is a case of node rewrite followed by node merge followed by connecting
-// filter output of Conv2DWithBias to filter input of Conv2DBackpropInput.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'X' op: 'Input'}"
-      "node { name: 'Y' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'X']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: 'Conv2DBackpropInput'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['F', 'B', 'E']}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['G', 'X']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2DWithBias);F(Int32Input);"
-            "G(_MklConv2DBackpropInput);X(Input);Y(Zeta);Z(Zeta)|"
-            "A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;"
-            "DMT/_1->E:4;DMT/_2->E:5;DMT/_3->G:3;E->G:2;E->Y;E:1->G:1;E:2->G:5;"
-            "E:3->G:4;F->G;F:control->DMT/_3:control;G->Z;X->Y:1;X->Z:1");
-}
-
-/////////////////////////////////////////////////////////////////////
-//  Unit tests related to rewriting node to Mkl node
-/////////////////////////////////////////////////////////////////////
-
-// Single Conv2D Op; No Mkl layer on the input and on the output.
-// We will generate dummy Mkl tensor as 2nd input of Conv2D.
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['B', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
-            "DMT/_1->C:3");
-}
-
-// 2 Conv2D Ops in sequence. Both should get transformed and 1st Conv2D will
-// have 2 outputs, both of which will be inputs to next Conv2D.
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(_MklConv2D);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->C;A->D;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;"
-            "C:2->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2");
-}
-
-// Conv2D with INT32 which is not supported by Mkl
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) {
-  InitGraph(
-      "node { name: 'A' op: 'HalfInput'}"
-      "node { name: 'B' op: 'HalfInput'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_HALF } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_HALF } }"
-      " input: ['B', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(HalfInput);B(HalfInput);C(Conv2D);D(Zeta)|"
-            "A->C;B->C:1;B->D;C->D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropFilter);"
-            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
-            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:4;DMT/_2->D:5");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropInput'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['B', 'A', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropInput);"
-            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
-            "A->D:1;A->E;B->D;B:control->DMT/_0:control;"
-            "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;"
-            "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// Check that we never rewrite BiasAddGrad.
-TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Polygamma'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
-            "A->C;A->D:1;B->C:1;C->D;D->E");
-}
-
-// Check that we never rewrite BiasAddGrad.
-TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'MatMul'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'transpose_a'      value { b: false } }"
-      " attr { key: 'transpose_b'      value { b: false } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
-            "A->C;A->D:1;B->C:1;C->D;D->E");
-}
-
-// Check that we never rewrite BiasAddGrad.
-TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Zeta);E(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;"
-            "M->C:2;N->C:3");
-}
-
-// Concat Op test: Concat with no Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['A', 'B:0', 'B:1']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(_MklConcat);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;"
-            "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// Concat with 2 Mkl layers feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['G', 'E', 'F']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
-            "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;"
-            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
-            "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;"
-            "G:control->DMT/_4:control;H->I:1");
-}
-
-// Concat with 1 Mkl and 1 non-Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['G', 'E', 'F']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
-            "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
-            "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;"
-            "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1");
-}
-
-// ConcatV2 Op test: ConcatV2 with no Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['B:0', 'B:1', 'A']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(_MklConcatV2);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;B:1->D:1;"
-            "B:control->DMT/_0:control;B:control->DMT/_1:control;"
-            "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// ConcatV2 with 2 Mkl layers feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['E', 'F', 'G']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
-            "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;"
-            "C:control->DMT/_0:control;C:control->DMT/_1:control;"
-            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
-            "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;"
-            "F:2->H:4;G->H:2;H->I:1");
-}
-
-// ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['E', 'F', 'G']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
-            "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
-            "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;"
-            "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;"
-            "G->H:2;H->I:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Relu_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;"
-            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'ReluGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ReluReluGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'ReluGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;"
-            "DMT/_1->C:2");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'AvgPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklAvgPool);C(Zeta);DMT/_0(Const)|A->B;A->C;"
-            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Int32Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'AvgPoolGrad' "
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['B', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Int32Input);B(Input);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
-            "DMT/_1->C:3");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolAvgPoolGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'I' op: 'Int32Input'}"
-      "node { name: 'B' op: 'AvgPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'AvgPoolGrad' "
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['I', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklAvgPool);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const);I(Int32Input)|A->B;A->D;A:control->DMT/_0:control;"
-            "B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;DMT/_1->C:2;I->C;"
-            "I:control->DMT/_1:control");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNormGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'FusedBatchNormGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'epsilon'      value { f: 0.0001 } }"
-      " attr { key: 'is_training'  value { b: true } }"
-      " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'F'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
-            "F(_MklFusedBatchNormGrad);G(Zeta)|A->F;A->G;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
-            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
-            "E->F:4;F->G:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'FusedBatchNorm'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'epsilon'      value { f: 0.0001 } }"
-      " attr { key: 'is_training'  value { b: true } }"
-      " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'F'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
-            "F(_MklFusedBatchNorm);G(Zeta)|A->F;A->G;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
-            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
-            "E->F:4;F->G:1");
-}
-
-/////////////////////////////////////////////////////////////////////
-//  Unit tests related to rewriting node for workspace edges
-/////////////////////////////////////////////////////////////////////
-
-/* Test LRN->MaxPool->MaxPoolGrad->LRNGrad replacement by workspace nodes. */
-TEST_F(MklLayoutPassTest, MaxPoolLRN_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['B'] }"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'MaxPoolGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['B', 'C', 'D'] }"
-      "node { name: 'F' op: 'Input'}"
-      "node { name: 'G' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['E', 'F', 'B'] }"
-      "node { name: 'H' op: 'Input'}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['H', 'G'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-      "A(Input);B(_MklLRN);C(_MklMaxPool);D(Input);DMT/_0(Const);DMT/_1(Const);"
-      "DMT/_2(Const);E(_MklMaxPoolGrad);F(Input);G(_MklLRNGrad);H(Input);"
-      "I(Zeta)|A->B;A:control->DMT/_0:control;B->C;B->E;B->G:2;B:1->G:3;"
-      "B:2->C:1;B:2->E:4;B:2->G:6;B:3->G:7;B:control->DMT/_1:control;C->E:1;"
-      "C:1->E:3;C:2->E:5;C:3->E:7;D->E:2;DMT/_0->B:1;DMT/_1->E:6;DMT/_2->G:5;"
-      "E->G;E:1->G:4;E:control->DMT/_2:control;F->G:1;G->I:1;H->I");
-}
-
-/* Test LRN->LRNGrad replacement by workspace nodes. */
-TEST_F(MklLayoutPassTest, LRN_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['C', 'D', 'B'] }"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);E(_MklLRNGrad);F(Zeta)|"
-            "A->B;A:control->DMT/_0:control;B->E:2;B:1->E:3;B:2->E:6;B:3->E:7;"
-            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
-            "D->E:1;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:5;E->F:1");
-}
-
-/* Test LRN->LRNGrad replacement when only one of them is present. */
-TEST_F(MklLayoutPassTest, LRN_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklLRN);C(Zeta);DMT/_0(Const)|"
-            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-/* Test LRN->LRNGrad replacement when only one of them is present. */
-TEST_F(MklLayoutPassTest, LRN_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklLRNGrad);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
-            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
-}
-
-/* Test LRN->LRNGrad negative case, where single LRN feeds
-   2 LRNGrad nodes at different slots. */
-TEST_F(MklLayoutPassTest, LRN_Negative3) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['C', 'D', 'B'] }"
-      "node { name: 'F' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['C', 'B', 'D'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['E', 'F'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);DMT/_5(Const);"
-            "DMT/_6(Const);E(_MklLRNGrad);F(_MklLRNGrad);G(Zeta)|A->B;"
-            "A:control->DMT/_0:control;B->E:2;"
-            "B->F:1;B:1->E:3;B:2->E:6;B:2->F:5;B:3->E:7;C->E;C->F;"
-            "C:control->DMT/_1:control;C:control->DMT/_2:control;"
-            "C:control->DMT/_3:control;C:control->DMT/_4:control;"
-            "C:control->DMT/_5:control;C:control->DMT/_6:control;"
-            "D->E:1;D->F:2;DMT/_0->B:1;DMT/_1->F:3;DMT/_2->F:7;DMT/_3->F:4;"
-            "DMT/_4->F:6;DMT/_5->E:4;DMT/_6->E:5;E->G;F->G:1");
-}
-
-/* Test MaxPool->MaxPoolGrad replacement by workspace+rewrite nodes. */
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'MaxPoolGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['C', 'B', 'D'] }"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklMaxPool);C(Input);D(Input);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(_MklMaxPoolGrad);F(Zeta)|"
-            "A->B;A:control->DMT/_0:control;B->E:1;B:1->E:3;B:2->E:5;B:3->E:7;"
-            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
-            "D->E:2;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:6;E->F:1");
-}
-
-// Test MaxPool>MaxPoolGrad replacement when only one of them is present.
-// In this case, we will rewrite MaxPool node but workspace edges will not
-// be present.
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklMaxPool);C(Zeta);DMT/_0(Const)|"
-            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-// Test MaxPoolGrad replacement when only one of them is present.
-// In this case, we will rewrite MaxPoolGrad and for workspace tensor and
-// its Mkl part, we will generate dummy tensor.
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'MaxPoolGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklMaxPoolGrad);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
-            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
-}
-
-// Test MaxPool handling for batch-wise pooling (NCHW)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative3) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for batch-wise pooling (NCHW)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative4) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative5) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:2, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NCHW)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative6) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:2, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for batch-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative7) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for batch-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative8) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative9) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:2} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative10) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-/////////////////////////////////////////////////////////////////////
-
-// Single Conv2D Op on GPU device
-// No rewrite should happen
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['B', 'C'] }", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Conv2D);D(Zeta)|A->C;B->C:1;B->D;C->D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(BiasAddGrad);M(_MklInput);N(_MklInput);"
-            "O(_MklInput)|A->D;A->E:1;B->D:1;C->D:2;D->E;E->F;"
-            "M->D:3;N->D:4;O->D:5");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);D(Conv2DBackpropFilter);E(Zeta)|"
-            "A->D;A->E;B->D:1;C->D:2;D->E:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Relu_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Relu);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'ReluGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(ReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_MaxPool_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'AvgPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(AvgPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Concat Op test: Concat with no Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['A', 'B:0', 'B:1']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(Concat);E(Zeta)|A->D;"
-            "B->D:1;B:1->D:2;C->E;D->E:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['B:0', 'B:1', 'A']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(ConcatV2);E(Zeta)|"
-            "A->D:2;B->D;B:1->D:1;C->E;D->E:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'FusedBatchNorm'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'epsilon'      value { f: 0.0001 } }"
-      " attr { key: 'is_training'  value { b: true } }"
-      " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'F'] }", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);E(Input);"
-            "F(FusedBatchNorm);G(Zeta)|A->F;A->G;B->F:1;C->F:2;D->F:3;"
-            "E->F:4;F->G:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'Y']}", kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);"
-            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->C;"
-            "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1");
-}
-
-/////////////////////////////////////////////////////////////////////
-
-static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
-  testing::StopTiming();
-  string s;
-  for (int in = 0; in < 10; in++) {
-    s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
-  }
-  random::PhiloxRandom philox(301, 17);
-  random::SimplePhilox rnd(&philox);
-  for (int op = 0; op < op_nodes; op++) {
-    s += strings::Printf(
-        "node { name: 'op%04d' op: 'Zeta' attr { key: 'T' value { "
-        "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
-        op, rnd.Uniform(10), rnd.Uniform(10));
-  }
-
-  bool first = true;
-  while (iters > 0) {
-    Graph* graph = new Graph(OpRegistry::Global());
-    InitGraph(s, graph);
-    int N = graph->num_node_ids();
-    if (first) {
-      testing::SetLabel(strings::StrCat("Per graph node.  Nodes: ", N));
-      first = false;
-    }
-    {
-      testing::StartTiming();
-      std::unique_ptr<Graph> ug(graph);
-      RunMklLayoutRewritePass(&ug);
-      testing::StopTiming();
-    }
-    iters -= N;  // Our benchmark units are individual graph nodes,
-                 // not whole graphs
-    // delete graph;
-  }
-}
-BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
-
-}  // namespace
-
-#endif  // INTEL_MKL_DNN
-
 }  // namespace tensorflow
 
 #endif /* INTEL_MKL */
diff --git a/tensorflow/core/kernels/logging_ops.cc b/tensorflow/core/kernels/logging_ops.cc
index bacf3e7740..67d603dd0a 100644
--- a/tensorflow/core/kernels/logging_ops.cc
+++ b/tensorflow/core/kernels/logging_ops.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include <iostream>
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -77,7 +76,7 @@ class PrintOp : public OpKernel {
       strings::StrAppend(&msg, "[", ctx->input(i).SummarizeValue(summarize_),
                          "]");
     }
-    std::cerr << msg << std::endl;
+    LOG(INFO) << msg;
   }
 
  private:
diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc
index 9aabbbdb6b..935eb81dd0 100644
--- a/tensorflow/core/kernels/mkl_aggregate_ops.cc
+++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include <numeric>
+
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -28,16 +29,9 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-#ifdef INTEL_MKL_DNN
-#include "mkldnn.hpp"
-using mkldnn::stream;
-using mkldnn::sum;
-#endif
-
 namespace tensorflow {
-typedef Eigen::ThreadPoolDevice CPUDevice;
 
-#ifndef INTEL_MKL_DNN
+typedef Eigen::ThreadPoolDevice CPUDevice;
 
 template <typename Device, typename T>
 class MklAddNOp : public OpKernel {
@@ -47,18 +41,17 @@ class MklAddNOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const int num = ctx->num_inputs();
     OP_REQUIRES(ctx, num / 2 == 2,
-                errors::InvalidArgument("Only additions of two tensors "
+                errors::InvalidArgument("Only additions of two arguments "
                                         "supported by MKL. Num inputs: ",
                                         num));
 
     MklAddNOpContext mkl_context;
-    size_t src1_idx = 0, src2_idx = 1;
-    const Tensor& input0 = MklGetInput(ctx, src1_idx);
-    GetMklShape(ctx, src1_idx, &(mkl_context.input1_shape));
+    const Tensor& input0 = MklGetInput(ctx, 0);
+    GetMklShape(ctx, 0, &(mkl_context.input1_shape));
     bool input1_in_mkl_format = mkl_context.input1_shape.IsMklTensor();
 
-    const Tensor& input1 = MklGetInput(ctx, src2_idx);
-    GetMklShape(ctx, src2_idx, &(mkl_context.input2_shape));
+    const Tensor& input1 = MklGetInput(ctx, 1);
+    GetMklShape(ctx, 1, &(mkl_context.input2_shape));
     bool input2_in_mkl_format = mkl_context.input2_shape.IsMklTensor();
 
     // handle the case of a scalar
@@ -66,12 +59,13 @@ class MklAddNOp : public OpKernel {
       const TensorShape& o_shape = input0.shape();
       Tensor* out_tensor = nullptr;
       mkl_context.output_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
+      AllocateOutputSetMklShape(ctx, 0, &out_tensor, o_shape,
                                 mkl_context.output_shape);
       float user_i1 = (input0.scalar<T>()());
+      ;
       float user_i2 = (input1.scalar<T>()());
-      out_tensor->scalar<T>()() =
-          std::plus<float>{}(user_i1, user_i2);
+      ;
+      out_tensor->scalar<T>()() = std::plus<float>{}(user_i1, user_i2);
       return;
     }
 
@@ -88,8 +82,8 @@ class MklAddNOp : public OpKernel {
       if (o_shape.num_elements() == 0) {
         Tensor* out_tensor = nullptr;
         mkl_context.output_shape.SetMklTensor(false);
-        AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
-                                 mkl_context.output_shape);
+        AllocateOutputSetMklShape(ctx, 0, &out_tensor, o_shape,
+                                  mkl_context.output_shape);
         return;
       }
     }
@@ -98,9 +92,9 @@ class MklAddNOp : public OpKernel {
     mkl_context.in_strides = new size_t[mkl_context.in_dims];
     // Generate size, stride for input if input is in MKL format.
     if (input1_in_mkl_format || input2_in_mkl_format) {
-      const MklShape* tmp_mkl_shape =
-        (input1_in_mkl_format) ? &mkl_context.input1_shape :
-        &mkl_context.input2_shape;
+      const MklShape* tmp_mkl_shape = (input1_in_mkl_format)
+                                          ? &mkl_context.input1_shape
+                                          : &mkl_context.input2_shape;
       for (int i = 0; i < mkl_context.in_dims; i++) {
         mkl_context.in_sizes[i] = tmp_mkl_shape->GetSizes()[i];
         mkl_context.in_strides[i] = tmp_mkl_shape->GetStrides()[i];
@@ -116,6 +110,7 @@ class MklAddNOp : public OpKernel {
             mkl_context.in_strides[i - 1] * mkl_context.in_sizes[i - 1];
       }
     }
+
     std::vector<float> coeff(2, 1.0);
     mkl_context.MklCreateInputLayouts(ctx);
     CHECK_EQ(dnnSumCreate_F32(&mkl_context.Eltwise, mkl_context.attributes, 2,
@@ -132,7 +127,7 @@ class MklAddNOp : public OpKernel {
      mkl_context.output_shape.SetMklLayout(mkl_context.Eltwise, dnnResourceDst);
 
      mkl_context.output_shape.SetTfLayout(
-        mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
+         mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
      if (input1_in_mkl_format == true) {
       mkl_context.output_shape.SetTfDimOrder(mkl_context.in_dims,
       mkl_context.input1_shape.GetTfToMklDimMap());
@@ -144,12 +139,12 @@ class MklAddNOp : public OpKernel {
                         mkl_context.output_shape.GetMklLayout())) /
                     sizeof(T));
 
-     AllocateOutputSetMklShape(ctx, src1_idx, &output, tf_shape,
+     AllocateOutputSetMklShape(ctx, 0, &output, tf_shape,
                               mkl_context.output_shape);
     } else {
      const TensorShape& o_shape = input1.shape();
      mkl_context.output_shape.SetMklTensor(false);
-     AllocateOutputSetMklShape(ctx, src1_idx, &output, o_shape,
+     AllocateOutputSetMklShape(ctx, 0, &output, o_shape,
                                 mkl_context.output_shape);
     }
 
@@ -177,18 +172,16 @@ class MklAddNOp : public OpKernel {
     void MklCreateInputLayouts(OpKernelContext* context) {
       bool input1_in_mkl_format = input1_shape.IsMklTensor();
       if (!input1_in_mkl_format) {
-        CHECK_EQ(
-            dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
-            E_SUCCESS);
+        CHECK_EQ(dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
+                 E_SUCCESS);
       } else {
         lt_input1 = static_cast<dnnLayout_t>(input1_shape.GetCurLayout());
       }
 
       bool input2_in_mkl_format = input2_shape.IsMklTensor();
       if (!input2_in_mkl_format) {
-        CHECK_EQ(
-            dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
-            E_SUCCESS);
+        CHECK_EQ(dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
+                 E_SUCCESS);
       } else {
         lt_input2 = static_cast<dnnLayout_t>(input2_shape.GetCurLayout());
       }
@@ -264,8 +257,8 @@ class MklAddNOp : public OpKernel {
       bool input2_in_mkl_format = input2_shape.IsMklTensor();
       dnnDelete_F32(Eltwise);
       if (!input1_in_mkl_format || !input2_in_mkl_format) {
-         delete [] in_sizes;
-         delete [] in_strides;
+        delete[] in_sizes;
+        delete[] in_strides;
       }
       if (!input1_in_mkl_format) {
          dnnLayoutDelete_F32(lt_input1);
@@ -277,151 +270,6 @@ class MklAddNOp : public OpKernel {
   } MklAddNOpContext;
 };
 
-#else  // INTEL_MKL_DNN
-template <typename Device, typename T>
-class MklAddNOp : public OpKernel {
- public:
-  ~MklAddNOp() {}
-  explicit MklAddNOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    const int num = ctx->num_inputs();
-    // Only additions of 2 input tensors is supported now
-    OP_REQUIRES(ctx, num / 2 == 2,
-                errors::InvalidArgument("Only additions of two tensors "
-                                        "supported by MKL. Num inputs: ",
-                                        num));
-
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
-      size_t src1_idx = 0, src2_idx = 1;
-      const Tensor& src1_tensor = MklGetInput(ctx, src1_idx);
-      const Tensor& src2_tensor = MklGetInput(ctx, src2_idx);
-
-      MklDnnShape src1_mkl_shape, src2_mkl_shape;
-      GetMklShape(ctx, src1_idx, &src1_mkl_shape);
-      GetMklShape(ctx, src2_idx, &src2_mkl_shape);
-      bool input1_in_mkl_format = src1_mkl_shape.IsMklTensor();
-      bool input2_in_mkl_format = src2_mkl_shape.IsMklTensor();
-      int src1_dims_size = input1_in_mkl_format?
-       src1_mkl_shape.GetDimension(): src1_tensor.dims();
-      int src2_dims_size = input2_in_mkl_format?
-       src2_mkl_shape.GetDimension(): src2_tensor.dims();
-
-      if (!input1_in_mkl_format && src1_dims_size == 0) {
-         Tensor* dst_tensor = nullptr;
-         MklShape mkl_shape_dst;
-         mkl_shape_dst.SetMklTensor(false);
-         AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor,
-         src1_tensor.shape(), mkl_shape_dst);
-         float user_i1 = (src1_tensor.scalar<T>()());
-         float user_i2 = (src2_tensor.scalar<T>()());
-         dst_tensor->scalar<T>()() =
-           std::plus<float>{}(user_i1, user_i2);
-         return;
-       }
-
-      // If there is nothing to compute, return.
-      if (!input1_in_mkl_format && !input2_in_mkl_format) {
-        if (src1_tensor.shape().num_elements() == 0) {
-           Tensor* dst_tensor = nullptr;
-           MklShape mkl_shape_dst;
-           mkl_shape_dst.SetMklTensor(false);
-           AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor,
-           src1_tensor.shape(), mkl_shape_dst);
-           return;
-        }
-      }
-
-      // element-wise add operator for tensor input1 and tensor input2
-      std::vector<double> coeff(2, 1.0);
-      MklDnnData<T> src1(&cpu_engine);
-      MklDnnData<T> src2(&cpu_engine);
-      MklDnnData<T> dst(&cpu_engine);
-
-      int tmp_size = input1_in_mkl_format ? src2_dims_size: src1_dims_size;
-      memory::dims dims(tmp_size);
-      memory::dims strides(tmp_size);
-      memory::desc md1({}, memory::data_undef, memory::format_undef);
-      memory::desc md2({}, memory::data_undef, memory::format_undef);
-
-      if ( input1_in_mkl_format || input2_in_mkl_format ) {
-        if ( input1_in_mkl_format ) {
-          md1 = src1_mkl_shape.GetMklLayout();
-          md2 = md1;
-          dst.SetUsrMem(md1);
-        } else {
-          md2 = src2_mkl_shape.GetMklLayout();
-          md1 = md2;
-          dst.SetUsrMem(md2);
-        }
-      } else {
-         dims = TFShapeToMklDnnDims(src1_tensor.shape());
-         strides = CalculateTFStrides(dims);
-         md1 = MklDnnData<T>::CreateBlockedMemDesc(dims, strides);
-         md2 = md1;
-         dst.SetUsrMem(dims, strides);
-      }
-
-      std::vector<memory::primitive_desc> srcs_pd;
-
-      src1.SetUsrMem(md1, &src1_tensor);
-      auto mpd1 = src1.GetUsrMemPrimDesc();
-      srcs_pd.push_back(mpd1);
-
-      src2.SetUsrMem(md2, &src2_tensor);
-      auto mpd2 = src2.GetUsrMemPrimDesc();
-      srcs_pd.push_back(mpd2);
-
-      std::vector<primitive::at> inputs;
-      inputs.push_back(src1.GetOpMem());
-      inputs.push_back(src2.GetOpMem());
-      auto output_pd = dst.GetUsrMemPrimDesc();
-      Tensor* dst_tensor = nullptr;
-      auto sum_pd = sum::primitive_desc(dst.GetUsrMemDesc(), coeff, srcs_pd);
-      auto sum_op = sum(sum_pd, inputs, dst.GetOpMem());
-      if ( input2_in_mkl_format || input1_in_mkl_format ) {
-         MklDnnShape output_mkl_shape;
-         output_mkl_shape.SetMklTensor(true);
-         output_mkl_shape.SetMklLayout(&output_pd);
-         output_mkl_shape.SetElemType(MklDnnType<T>());
-         if ( input1_in_mkl_format ) {
-          output_mkl_shape.SetTfLayout(src1_dims_size,
-          src1_mkl_shape.GetSizesAsMklDnnDims(),
-          src1_mkl_shape.GetTfDataFormat());
-         } else {
-          output_mkl_shape.SetTfLayout(src2_dims_size,
-          src2_mkl_shape.GetSizesAsMklDnnDims(),
-          src2_mkl_shape.GetTfDataFormat());
-         }
-         TensorShape output_tf_shape;
-         output_tf_shape.AddDim((output_pd.get_size() / sizeof(T))
-         + (output_pd.get_size()%sizeof(T) == 0 ? 0 : 1));
-         AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor, output_tf_shape,
-                                output_mkl_shape);
-      } else {
-         MklShape mkl_shape_dst;
-         mkl_shape_dst.SetMklTensor(false);
-         AllocateOutputSetMklShape(ctx, src1_idx,
-         &dst_tensor, src1_tensor.shape(), mkl_shape_dst);
-      }
-
-      dst.SetUsrMemDataHandle(dst_tensor);
-      std::vector<primitive> net;
-      net.push_back(sum_op);
-      stream(stream::kind::eager).submit(net).wait();
-    } catch (mkldnn::error &e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-      OP_REQUIRES_OK(ctx, errors::Aborted("Operation received an exception:",
-                                            error_msg));
-    }
-  }
-};
-
-#endif
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklAddN")                          \
                               .Device(DEVICE_CPU)                   \
diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc
index d751a70fc8..d90baee069 100644
--- a/tensorflow/core/kernels/mkl_avgpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc
@@ -24,25 +24,10 @@
 
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 
-#ifdef INTEL_MKL_DNN
-#include "mkldnn.hpp"
-using mkldnn::memory;
-using mkldnn::error;
-using mkldnn::pooling_forward;
-using mkldnn::pooling_backward;
-using mkldnn::padding_kind;
-using mkldnn::engine;
-using mkldnn::prop_kind;
-using mkldnn::algorithm;
-#endif
-
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
-// For now, MKL-ML is default. So making MKL-DNN not a default choice.
-#ifndef INTEL_MKL_DNN
-
 template <typename Device, typename T>
 class MklAvgPoolingOp : public OpKernel {
  public:
@@ -147,7 +132,7 @@ class MklAvgPoolingOp : public OpKernel {
         E_SUCCESS);
 
     mkl_context.MklCleanup();
-  }  // Compute
+  }
 
  private:
   typedef struct {
@@ -426,293 +411,7 @@ class MklAvgPoolingGradOp : public OpKernel {
   std::vector<int32> stride_;
   Padding padding_;
   TensorFormat data_format_;
-};  // MklAvgPoolingGradOp
-
-
-#else  // INTEL_MKL_DNN is defined
-
-template <typename Device, typename T>
-class MklAvgPoolingOp : public MklPoolingForwardOpBase<T> {
- public:
-  explicit MklAvgPoolingOp(OpKernelConstruction* context)
-  : MklPoolingForwardOpBase<T>(context) {
-    // Workspace is an MKLDNN construct that is only used in Max Pooling.
-    // So set workspace_enabled_ to false.
-    this->workspace_enabled_ = false;
-  }
-
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
-      const Tensor& input_tensor = MklGetInput(context,
-              this->kInputTensorIndexInput);
-      MklDnnShape dnn_shape_input;
-      GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input);
-      this->SanityCheckInput(context, input_tensor, dnn_shape_input);
-      if (!context->status().ok()) return;
-
-      MklDnnData<T> dnn_data_input(&cpu_engine);
-      MklDnnData<T> dnn_data_output(&cpu_engine);
-
-      // initialize variables for the pooling op
-      MklPoolParameters pool_params;
-      // Get the input tensor and initialize the pooling parameters
-      this->ConfigureInput(context, dnn_shape_input,
-                          input_tensor, &pool_params,
-                          &dnn_data_input);
-      OP_REQUIRES_OK(context, context->status());
-
-      // Declare output tensor
-      Tensor* output_tensor = nullptr;
-      memory::dims output_dims_mkl_order;
-      this->GetOutputDims(pool_params, &output_dims_mkl_order);
-
-      // If input is in Mkl layout, then just get the memory format from it
-      // directly, instead of using input data_format to AvgPool.
-      if (dnn_shape_input.IsMklTensor()) {
-        dnn_data_output.SetUsrMem(output_dims_mkl_order,
-                static_cast<memory::format>(dnn_data_input.GetUsrMemDesc()
-                    .data.format));
-
-      } else {
-          dnn_data_output.SetUsrMem(output_dims_mkl_order,
-              this->data_format_mkldnn_);
-      }
-
-        // describe the memory layout
-      dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
-
-      // 3. create a pooling primitive descriptor
-      auto pool_desc = pooling_forward::desc(prop_kind::forward,
-              algorithm::pooling_avg_exclude_padding,
-              dnn_data_input.GetUsrMemDesc(),
-              dnn_data_output.GetUsrMemDesc(),
-              memory::dims({  pool_params.row_stride,
-                              pool_params.col_stride}),
-              memory::dims({  pool_params.window_rows,
-                              pool_params.window_cols}),
-              memory::dims({  static_cast<int>(pool_params.pad_top),
-                              static_cast<int>(pool_params.pad_left)}),
-              memory::dims({  static_cast<int>(pool_params.pad_bottom),
-                              static_cast<int>(pool_params.pad_right)}),
-              TFPaddingToMklDnnPadding(this->padding_));
-      auto pool_prim_desc = pooling_forward::primitive_desc(pool_desc,
-                                                 cpu_engine);
-
-      this->AllocateOutputTensor(context, pool_prim_desc, output_dims_mkl_order,
-                            this->data_format_mkldnn_, &output_tensor);
-      CHECK_NOTNULL(output_tensor);
-
-      OP_REQUIRES_OK(context, context->status());
-      dnn_data_output.SetUsrMemDataHandle(output_tensor);
-
-      this->PrepareAndExecuteNet(pool_prim_desc,
-                                &dnn_data_input,
-                                &dnn_data_output);
-    } catch (mkldnn::error &e) {
-        string error_msg = "Status: " + std::to_string(e.status) +
-                        ", message: " + string(e.message) +
-                        ", in file " + string(__FILE__) + ":" +
-                        std::to_string(__LINE__);
-        OP_REQUIRES_OK(context,
-                        errors::Aborted("Operation received an exception:",
-                                         error_msg));
-    }
-  }  // Compute
-};  // MklAvgPoolingOp
-
-//-----------------------------------------------------------------------------
-
-template <class Device, class T>
-class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase<T> {
- public:
-  explicit MklAvgPoolingGradOp(OpKernelConstruction* context)
-      : MklPoolingBackwardOpBase<T>(context) {
-  }
-
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
-      MklDnnShape original_input_mkl_shape, input_gradient_mkl_shape;
-      const Tensor& tensor_in_shape = MklGetInput(context,
-          kInputTensorIndexInputShape);
-      const Tensor& input_gradient_tensor = MklGetInput(context,
-          kInputTensorIndexInputGradient);
-      GetMklShape(context, kInputTensorIndexInputShape,
-            &original_input_mkl_shape);
-      GetMklShape(context, kInputTensorIndexInputGradient,
-            &input_gradient_mkl_shape);
-
-
-      SanityCheckInputs(context, tensor_in_shape,
-                        input_gradient_tensor,
-                        original_input_mkl_shape,
-                        input_gradient_mkl_shape);
-      if (!context->status().ok()) return;
-
-      // Used to allocate output_diff_src/diff_src
-      // and create pool_fwd mdm desc
-      // 0. Input("orig_input_shape: int32") //NOT a T Tensor!
-      // 1. Input("grad: T")
-
-      MklDnnData<T> input_gradient_diff_dst(&cpu_engine);
-      MklDnnData<T> output_diff_src(&cpu_engine);
-      Tensor* output_tensor_diff_src = nullptr;
-      TensorShape original_input_shape;
-      MklPoolParameters pool_params;
-      memory::dims output_dims_mkl_order, original_input_dims_nchw;
-      // Configure the original input memory descriptor
-      memory::desc original_input_md = ConfigureOriginalInput(context,
-                                      tensor_in_shape,
-                                      original_input_mkl_shape,
-                                      &original_input_dims_nchw,
-                                      &pool_params,
-                                      &original_input_shape);
-
-      // configure the original output memory descriptor
-      // by definition, the shape of the original output is the same
-      // as the shape of the gradient diff_dst
-      memory::desc original_output_md = this->ConfigureOriginalOutput(
-                pool_params, input_gradient_mkl_shape, output_dims_mkl_order);
-
-      memory::desc target_diff_dst_md = this->ConfigureInputGradient(
-                                    input_gradient_mkl_shape,
-                                    input_gradient_tensor,
-                                    &input_gradient_diff_dst,
-                                    original_output_md);
-      // The shape of the output diff src needs to be the same shape as the
-      // original input. But we will set its format to be same as the format of
-      // input gradient. We won't use format of original input since it will
-      // always be in Tensorflow layout (given that AvgPoolGrad gets shape of
-      // the input rather than actual input).
-      output_diff_src.SetUsrMem(original_input_dims_nchw,
-                                static_cast<memory::format>(
-                                  target_diff_dst_md.data.format));
-
-      // Create the forward pooling primitive descriptor so we can reference it
-      // in the backward pooling primitive descriptor
-      auto pool_fwd_desc = pooling_forward::desc(prop_kind::forward,
-              algorithm::pooling_avg_exclude_padding,
-              original_input_md,
-              original_output_md,
-              memory::dims({  pool_params.row_stride,
-                              pool_params.col_stride}),
-              memory::dims({  pool_params.window_rows,
-                              pool_params.window_cols}),
-              memory::dims({  static_cast<int>(pool_params.pad_top),
-                              static_cast<int>(pool_params.pad_left)}),
-              memory::dims({  static_cast<int>(pool_params.pad_bottom),
-                              static_cast<int>(pool_params.pad_right)}),
-              TFPaddingToMklDnnPadding(this->padding_));
-      auto pool_fwd_prim_desc
-              = pooling_forward::primitive_desc(pool_fwd_desc,
-                                                  cpu_engine);
-
-      auto pool_bkwd_desc = pooling_backward::desc(
-              algorithm::pooling_avg_exclude_padding,
-              output_diff_src.GetUsrMemDesc(),
-              target_diff_dst_md,
-              memory::dims({  pool_params.row_stride,
-                              pool_params.col_stride}),
-              memory::dims({  pool_params.window_rows,
-                              pool_params.window_cols}),
-              memory::dims({  static_cast<int>(pool_params.pad_top),
-                              static_cast<int>(pool_params.pad_left)}),
-              memory::dims({  static_cast<int>(pool_params.pad_bottom),
-                              static_cast<int>(pool_params.pad_right)}),
-              TFPaddingToMklDnnPadding(this->padding_));
-      auto pool_bkwd_prim_desc
-                = pooling_backward::primitive_desc(pool_bkwd_desc,
-                                              cpu_engine,
-                                              pool_fwd_prim_desc);
-      this->AllocateOutputTensor(context, pool_bkwd_prim_desc,
-                      original_input_dims_nchw,
-                      this->data_format_mkldnn_,
-                      &output_tensor_diff_src);
-
-      output_diff_src.SetUsrMemDataHandle(output_tensor_diff_src);
-
-      this->PrepareAndExecuteNet(pool_bkwd_prim_desc,
-                          &input_gradient_diff_dst,
-                          &output_diff_src,
-                          memory::primitive_desc(
-                              target_diff_dst_md,
-                              cpu_engine));
-    } catch (mkldnn::error &e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                      ", message: " + string(e.message) +
-                      ", in file " + string(__FILE__) + ":" +
-                      std::to_string(__LINE__);
-      OP_REQUIRES_OK(context,
-                      errors::Aborted("Compute received an exception:",
-                                      error_msg));
-    }
-  }  // Compute
-
- private:
-  // 0. Input("orig_input_shape: int32")
-  // 1. Input("grad: T")
-  const int kInputTensorIndexInputShape = 0;
-  const int kInputTensorIndexInputGradient = 1;
-
-  memory::desc ConfigureOriginalInput(OpKernelContext* context,
-        const Tensor& tensor_original_input_shape,
-        const MklDnnShape& original_input_mkl_shape,
-        memory::dims* original_input_dims_mkl_order,
-        MklPoolParameters* pool_params,
-        TensorShape* input_tensor_shape) {
-    CHECK_NOTNULL(original_input_dims_mkl_order);
-    CHECK_NOTNULL(pool_params);
-    CHECK_NOTNULL(input_tensor_shape);
-    // For AvgPoolGrad, we only get the size of the original input because
-    // The original data is irrelvant.
-    auto shape_vec = tensor_original_input_shape.vec<int32>();
-    for (int64 i = 0; i < tensor_original_input_shape.NumElements(); ++i) {
-      input_tensor_shape->AddDim(shape_vec(i));
-    }
-
-    return MklPoolingBackwardOpBase<T>::ConfigureOriginalInput(
-                                              context,
-                                              tensor_original_input_shape,
-                                              original_input_mkl_shape,
-                                              original_input_dims_mkl_order,
-                                              pool_params,
-                                              *input_tensor_shape);
-}
-
-  void SanityCheckInputs(OpKernelContext* context,
-                        const Tensor& tensor_in_shape,
-                        const Tensor& input_gradient_tensor,
-                        const MklDnnShape& original_input_mkl_shape,
-                        const MklDnnShape& input_gradient_mkl_shape) {
-    if (!original_input_mkl_shape.IsMklTensor()) {
-      OP_REQUIRES(context, tensor_in_shape.dims() == 1 &&
-          tensor_in_shape.NumElements() == 4,
-          errors::InvalidArgument("original input shape must be "
-                "1-dimensional and 4 elements"));
-    } else {
-      OP_REQUIRES(context, original_input_mkl_shape.GetDimension() == 1 &&
-          original_input_mkl_shape.DimSize(0) == 4,
-          errors::InvalidArgument("original input shape must be "
-                "1-dimensional and 4 elements"));
-    }
-
-    if (!input_gradient_mkl_shape.IsMklTensor()) {
-      // For avgpooling, input_gradient_diff_dst should have 4 dimensions.
-      OP_REQUIRES(context, input_gradient_tensor.dims() == 4,
-          errors::InvalidArgument("Gradient shape must be "
-                              "4-dimensional"));
-    } else {
-      OP_REQUIRES(context, input_gradient_mkl_shape.GetDimension() == 4,
-          errors::InvalidArgument("Gradient shape must be "
-                              "4-dimensional"));
-    }
-  }
-};  // MklAvgPoolingGradOp
-
-
-
-#endif  // INTEL_MKL_DNN
+};
 
 REGISTER_KERNEL_BUILDER(Name("_MklAvgPool")
                             .Device(DEVICE_CPU)
@@ -728,4 +427,3 @@ REGISTER_KERNEL_BUILDER(Name("_MklAvgPoolGrad")
 
 }  // namespace tensorflow
 #endif  // INTEL_MKL
-
diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
index d0175dfd71..e6673b2ffb 100644
--- a/tensorflow/core/kernels/mkl_concat_op.cc
+++ b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -1,8 +1,11 @@
 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
+
     http://www.apache.org/licenses/LICENSE-2.0
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -30,22 +33,11 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-#ifdef INTEL_MKL_DNN
-#include "mkldnn.hpp"
-
-using mkldnn::stream;
-using mkldnn::concat;
-#endif
-
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
-// List of TensorShape objects. Used in Concat/Split layers.
-typedef std::vector<TensorShape> TensorShapeList;
-
 enum AxisArgumentName { NAME_IS_AXIS, NAME_IS_CONCAT_DIM };
 
-
 // TODO(intelft) Check if we can reuse existing EigenConcatOp using Mutable
 // reference inputs.
 // --------------------------------------------------------------------------
@@ -63,8 +55,6 @@ class EigenConcatBaseOp : public OpKernel {
   // we need to have empty Compute because Compute is pure virtual function.
   void Compute(OpKernelContext* c) {}
 
-#ifndef INTEL_MKL_DNN
-
   void Compute(OpKernelContext* c, const std::vector<Tensor>& values) {
     const Tensor* concat_dim_tensor;
     const char* axis_attribute_name =
@@ -149,89 +139,8 @@ class EigenConcatBaseOp : public OpKernel {
       ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
     }
   }
-
-#else  // MKL_DNN
-
-void Compute(OpKernelContext* c, const std::vector<Tensor>& values,
-                        const TensorShapeList& input_shapes) {
-    const Tensor* concat_dim_tensor;
-    const char* axis_attribute_name =
-        AxisArgName == NAME_IS_AXIS
-            ? "axis"
-            : AxisArgName == NAME_IS_CONCAT_DIM ? "concat_dim" : "<invalid>";
-    OP_REQUIRES_OK(c, c->input(axis_attribute_name, &concat_dim_tensor));
-    OP_REQUIRES(c, IsLegacyScalar(concat_dim_tensor->shape()),
-                errors::InvalidArgument(
-                    axis_attribute_name,
-                    " tensor should be a scalar integer, but got shape ",
-                    concat_dim_tensor->shape().DebugString()));
-    const int32 concat_dim =
-        internal::SubtleMustCopy(concat_dim_tensor->scalar<int32>()());
-    // Instead of accessing values from context, we use input to Compute.
-    const int N = values.size();
-    const int input_dims = input_shapes[0].dims();
-    const TensorShape& input_shape = input_shapes[0];
-
-    int32 axis = concat_dim < 0 ? concat_dim + input_dims : concat_dim;
-    OP_REQUIRES(c,
-                (0 <= axis && axis < input_dims) ||
-                    (allow_legacy_scalars() && concat_dim == 0),
-                errors::InvalidArgument(
-                    "ConcatOp : Expected concatenating dimensions in the range "
-                    "[",
-                    -input_dims, ", ", input_dims, "), but got ", concat_dim));
-    // Note that we reduce the concat of n-dimensional tensors into a two
-    // dimensional concat. Assuming the dimensions of any input/output
-    // tensor are {x0, x1,...,xn-1, y0, y1,...,ym-1}, where the concat is along
-    // the dimension indicated with size y0, we flatten it to {x, y}, where y =
-    // Prod_i(yi) and x = ((n > 0) ? Prod_i(xi) : 1).
-    ConstMatrixVector inputs_flat;
-    inputs_flat.reserve(N);
-    int64 inputs_flat_dim0 = 1;
-    for (int d = 0; d < axis; ++d) {
-      inputs_flat_dim0 *= input_shape.dim_size(d);
-    }
-    int64 output_concat_dim = 0;
-    const bool input_is_scalar = IsLegacyScalar(input_shape);
-    for (int i = 0; i < N; ++i) {
-      const auto in = values[i];
-      const bool in_is_scalar = IsLegacyScalar(input_shapes[i]);
-      OP_REQUIRES(
-          c, (input_shapes[i].dims() == input_dims) ||
-              (input_is_scalar && in_is_scalar),
-          errors::InvalidArgument(
-              "ConcatOp : Ranks of all input tensors should match: shape[0] = ",
-              input_shape.DebugString(), " vs. shape[", i,
-              "] = ", input_shapes[i].DebugString()));
-      if (in.NumElements() > 0) {
-        int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0;
-        inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
-            in.shaped<T, 2>({inputs_flat_dim0, inputs_flat_dim1})));
-      }
-      output_concat_dim += input_shapes[i].dims() > 0 ?
-                           input_shapes[i].dim_size(axis) : 1;
-    }
-
-    TensorShape output_shape(input_shape);
-    if (output_shape.dims() == 0) {
-      output_shape.AddDim(output_concat_dim);
-    } else {
-      output_shape.set_dim(axis, output_concat_dim);
-    }
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(c, c->allocate_output(0, output_shape, &output));
-    if (output->NumElements() > 0) {
-      int64 output_dim1 = output->NumElements() / inputs_flat_dim0;
-      auto output_flat = output->shaped<T, 2>({inputs_flat_dim0, output_dim1});
-      ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
-    }
-  }
-
-#endif
 };
 
-#ifndef INTEL_MKL_DNN
-
 // --------------------------------------------------------------------------
 //                      Mkl Concat Op
 // --------------------------------------------------------------------------
@@ -418,7 +327,6 @@ class MklConcatOp : public OpKernel {
     OP_REQUIRES_OK(context, context->status());
   }
 
-
  private:
   typedef struct {
     TensorFormat data_format;
@@ -527,284 +435,8 @@ class MklConcatOp : public OpKernel {
         mkl_tensor->flat<uint8>().data(),
         mkl_tensor->flat<uint8>().size() * sizeof(uint8));
   }
-
-  // overloading methods with input shapes as a list of TensorShape's
-  void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
-                        const TensorShapeList& input_shapes) {
-    CHECK_EQ(values.size(), input_shapes.size());
-
-    std::vector<Tensor> converted_values;
-    for (int i = 0; i < input_shapes.size(); i++) {
-      converted_values.push_back(values[i]);
-    }
-
-    // Call Eigen concat.
-    eigen_concat_op_.Compute(context, converted_values);
-
-    // Set dummy Mkl tensor as output Mkl tensor for this op.
-    MklShape mkl_tensor_mkl_shape;
-    mkl_tensor_mkl_shape.SetMklTensor(false);
-    mkl_tensor_mkl_shape.SetDimensions(4);
-    Tensor* mkl_tensor = nullptr;
-    TensorShape mkl_tensor_tf_shape;
-    mkl_tensor_tf_shape.AddDim(
-        SIZE_OF_MKL_SERIAL_DATA(mkl_tensor_mkl_shape.GetDimension()));
-    int tf_output_index = 0;
-    context->allocate_output(
-        GetTensorMetaDataIndex(tf_output_index, context->num_outputs()),
-        mkl_tensor_tf_shape, &mkl_tensor);
-    mkl_tensor_mkl_shape.SerializeMklShape(
-        mkl_tensor->flat<uint8>().data(),
-        mkl_tensor->flat<uint8>().size() * sizeof(uint8));
-  }
 };
 
-#else
-
-// --------------------------------------------------------------------------
-//                      Mkl Concat Op
-// --------------------------------------------------------------------------
-
-template <typename Device, typename T, AxisArgumentName AxisArgName>
-class MklConcatOp : public OpKernel {
- private:
-  TensorFormat data_format_;
-  EigenConcatBaseOp<Device, T, AxisArgName> eigen_concat_op_;
-
- public:
-  typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>
-      ConstMatrixVector;
-
-  explicit MklConcatOp(OpKernelConstruction* c)
-      : OpKernel(c), eigen_concat_op_(c) {}
-
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
-      OpInputList input_tensors;
-      GetMklInputList(context, "values", &input_tensors);
-      const int N = input_tensors.size();
-
-      // Get Tensor shapes.
-      std::vector<MklDnnShape> input_shapes(N);
-      GetMklShapeList(context, "values", &input_shapes);
-
-      const Tensor& concat_dim_tensor = (AxisArgName == NAME_IS_CONCAT_DIM)
-                    ? MklGetInput(context, 0) : MklGetInput(context, N);
-      // Sanity checks
-      OP_REQUIRES(context, IsLegacyScalar(concat_dim_tensor.shape()),
-        errors::InvalidArgument(
-            "Concat dim tensor should be a scalar integer, but got shape ",
-            concat_dim_tensor.shape().DebugString()));
-      int32 concat_dim = internal::SubtleMustCopy(
-                           concat_dim_tensor.scalar<int32>()());
-      if (concat_dim < 0) concat_dim = N + concat_dim;
-
-      // check that ranks of all tensors match
-      // and that their shapes match except for concat_dim.
-      int i = 0;
-      bool invoke_eigen = false;
-      bool are_all_mkl_inputs = true, are_all_tf_inputs = true;
-      const TensorShape expected_shape = input_shapes[0].IsMklTensor() ?
-                                         input_shapes[0].GetTfShape() :
-                                         input_tensors[0].shape();
-      size_t expected_dims = expected_shape.dims();
-      for (auto& s : input_shapes) {
-        if (s == expected_shape) {++i; continue;}
-
-        TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() :
-                      input_tensors[i].shape();
-        size_t s_dims = s_shape.dims();
-
-        OP_REQUIRES(context, s_dims == expected_dims,
-                  errors::InvalidArgument(
-                      "_MklConcatOp : Ranks of all input tensors should match:"
-                      " input dimensions = ",
-                      s_dims, " vs. expected rank = ", expected_dims));
-
-        for (int d = 0; d < expected_dims; ++d) {
-          if (d == concat_dim) continue;
-
-          size_t expected_size = expected_shape.dim_size(d);
-          size_t s_size = s_shape.dim_size(d);
-          OP_REQUIRES(
-            context, expected_size == s_size,
-            errors::InvalidArgument("_MklConcatOp : Dimensions of inputs "
-                    "should match: shape[0][", d, "]= ", expected_size,
-                    " vs. shape[", i, "][", d, "] = ", s_size));
-        }
-
-        if (s.IsMklTensor())
-          are_all_tf_inputs = false;
-        else
-          are_all_mkl_inputs = false;
-
-        if (s_dims != 4) invoke_eigen = true;
-        ++i;
-      }
-
-      // All inputs are not in one format (TF or MKL). This is mixed input case.
-      // We can potentially optimize this case by converting all TF inputs
-      // to Mkl format. But currently, we fall to Eigen for this case.
-      // It may be possible to convert inputs that in TF format to Mkl
-      // format and avoid calling eigen version.
-      if (!are_all_tf_inputs && !are_all_mkl_inputs) invoke_eigen = true;
-
-      // Temporary fallback to Eigen until MKLDNN Concat performance
-      // is improved. To be removed.
-      invoke_eigen = true;
-
-      // Call Eigen library
-      if (invoke_eigen) {
-        TensorShapeList tf_input_shapes;
-        i = 0;
-        for (auto& s : input_shapes) {
-          TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() :
-                                input_tensors[i].shape();
-          tf_input_shapes.push_back(s_shape);
-          ++i;
-        }
-        CallEigenVersion(context, input_tensors, tf_input_shapes);
-        return;
-      }
-
-      memory::dims dst_dims;
-      if (are_all_mkl_inputs)
-        dst_dims = TFShapeToMklDnnDims(input_shapes[0].GetTfShape());
-      else
-        // When all the inputs are in Tensorflow format, we don't know
-        // what is the input data format. In that case, we just use
-        // output format that is same as input formats.
-        dst_dims = TFShapeToMklDnnDims(input_tensors[0].shape());
-
-      std::vector<memory::primitive_desc> srcs_pd;
-      std::vector<MklDnnData<T>> srcs(N, MklDnnData<T>(&cpu_engine));
-      int64 dst_concat_dim_size = 0;
-      for (int k =0; k < N; k++) {
-        bool is_mkl_tensor = input_shapes[k].IsMklTensor();
-        memory::dims src_dims;
-
-        // Same comment as dst_dims for src_dims.
-        src_dims = (is_mkl_tensor) ?
-                   TFShapeToMklDnnDims(input_shapes[k].GetTfShape()) :
-                   TFShapeToMklDnnDims(input_tensors[k].shape());
-
-        dst_concat_dim_size += src_dims[concat_dim];
-        auto src_md = is_mkl_tensor ? input_shapes[k].GetMklLayout() :
-          // It does not matter what data format we use here (NHWC or NCHW).
-          // We just need to ensure that output of Concat uses same data format
-          // as input.
-                  memory::desc(src_dims, MklDnnType<T>(), memory::format::nhwc);
-
-        srcs[k].SetUsrMem(src_md, &input_tensors[k]);
-        auto src_mpd = srcs[k].GetUsrMemPrimDesc();
-        srcs_pd.push_back(src_mpd);
-      }
-      dst_dims[concat_dim] = dst_concat_dim_size;
-
-      MklDnnData<T> dst(&cpu_engine);
-      memory::desc dst_md({}, memory::data_undef, memory::format_undef);
-      memory::dims dst_dims_in_nchw;
-      if (are_all_mkl_inputs) {
-        // Since we are passing a specific format for destination,
-        // we need to have dst_dims in MklDnn order (NCHW).
-        auto orig_tf_format = input_shapes[0].GetTfDataFormat();
-        dst_dims_in_nchw = MklDnnDimsInNCHW(dst_dims,
-                               MklDnnDataFormatToTFDataFormat(orig_tf_format));
-        // We will set the output in the same format as input to avoid layout
-        // conversions.
-        // Currently we are setting dst format same as input format.
-        // See if we can make this choice in a better way.
-        dst_md = memory::desc(dst_dims_in_nchw, MklDnnType<T>(),
-                 (memory::format) input_shapes[0].GetMklLayout().data.format);
-      } else {
-        // Again, format does not matter here. We just need to make it same as
-        // input format.
-        dst_md = memory::desc(dst_dims, MklDnnType<T>(), memory::format::nhwc);
-      }
-
-      std::vector<primitive::at> inputs;
-      for (int k=0; k < input_tensors.size(); k++)
-        inputs.push_back(srcs[k].GetOpMem());
-
-      // If all inputs are in MKL format, then meaning of concat_dim needs to
-      // change. Value of concat_dim is tied to input Tensorflow data format
-      // (NHWC or NCHW). MklDnn dimensions are in NCHW order. So if Tensorflow
-      // tensors are in NCHW order, then concat_dim semantics is preserved.
-      // But ifinput tensors are in NHWC order, then semantics need to change.
-      // E.g., if we are concatinating over Channel (dimension 3 for NHWC),
-      // then since MklDnn order is NCHW, concat_dim needs to be 1.
-      if (are_all_mkl_inputs)
-        concat_dim = input_shapes[0].TfDimIdx(concat_dim);
-
-      auto concat_pd = concat::primitive_desc(dst_md, concat_dim, srcs_pd);
-
-      MklDnnShape dnn_shape_dst;
-      TensorShape tf_shape_dst;
-      Tensor* dst_tensor = nullptr;
-      if (are_all_mkl_inputs) {
-        dnn_shape_dst.SetMklTensor(true);
-        auto dst_pd = concat_pd.dst_primitive_desc();
-        dnn_shape_dst.SetMklLayout(&dst_pd);
-        dnn_shape_dst.SetElemType(MklDnnType<T>());
-        dnn_shape_dst.SetTfLayout(dst_dims.size(), dst_dims_in_nchw,
-                                  input_shapes[0].GetTfDataFormat());
-        tf_shape_dst.AddDim((dst_pd.get_size() / sizeof(T)));
-      } else {
-        dnn_shape_dst.SetMklTensor(false);
-        tf_shape_dst = MklDnnDimsToTFShape(dst_dims);
-      }
-      AllocateOutputSetMklShape(context, 0, &dst_tensor,
-                                tf_shape_dst, dnn_shape_dst);
-      CHECK_NOTNULL(dst_tensor);
-
-      dst_md = dnn_shape_dst.IsMklTensor() ?
-               dnn_shape_dst.GetMklLayout() : dst_md;
-      dst.SetUsrMem(dst_md, dst_tensor);
-
-      auto concat_op = concat(concat_pd, inputs, dst.GetOpMem());
-      std::vector<primitive> net;
-      net.push_back(concat_op);
-      stream(stream::kind::eager).submit(net).wait();
-    } catch (mkldnn::error &e) {
-        string error_msg = "Status: " + std::to_string(e.status) +
-               ", message: " + string(e.message) + ", in file " +
-               string(__FILE__) + ":" + std::to_string(__LINE__);
-        OP_REQUIRES_OK(context, errors::Aborted(
-                "Operation received an exception:", error_msg));
-    }
-  }
-
-  void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
-                        const TensorShapeList& input_shapes) {
-    CHECK_EQ(values.size(), input_shapes.size());
-
-    std::vector<Tensor> converted_values;
-    for (int i = 0; i < input_shapes.size(); i++)
-      converted_values.push_back(values[i]);
-
-    // Call Eigen concat.
-    eigen_concat_op_.Compute(context, converted_values, input_shapes);
-
-    // Set output Mkl tensor for this op.
-    MklDnnShape dnn_shape_output;
-    dnn_shape_output.SetMklTensor(false);
-    dnn_shape_output.SetDimensions(4);
-    Tensor* output_tensor = nullptr;
-    TensorShape tf_shape_output;
-    tf_shape_output.AddDim(
-        dnn_shape_output.GetSerializeBufferSize());
-    context->allocate_output(
-        GetTensorMetaDataIndex(0, context->num_outputs()),
-        tf_shape_output, &output_tensor);
-    dnn_shape_output.SerializeMklDnnShape(
-        output_tensor->flat<uint8>().data(),
-        output_tensor->flat<uint8>().size() * sizeof(uint8));
-  }
-};
-
-#endif
-
 /* Use optimized concat for float type only */
 #define REGISTER_MKL_CPU(type)                                              \
   REGISTER_KERNEL_BUILDER(Name("_MklConcat")                                \
diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index 793fa24d99..f291281108 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -47,8 +47,11 @@ limitations under the License.
 
 using mkldnn::stream;
 using mkldnn::prop_kind;
+
+using mkldnn::convolution_forward;
 using mkldnn::convolution_backward_weights;
-using mkldnn::memory;
+using mkldnn::convolution_direct;
+
 #endif
 
 namespace tensorflow {
@@ -423,229 +426,183 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
   TensorFormat data_format_;
 };
 
-#define REGISTER_MKL_FILTER_KERNELS(T)                              \
-  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-              MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
-TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
-#undef REGISTER_MKL_FILTER_KERNELS
-
 #else
 
-template <typename Device, class T, bool biasEnabled>
-class MklConv2DCustomBackpropFilterOp :
-  public MklConv2DBackpropCommonOp<Device, T> {
+template <typename Device, class T>
+class MklConv2DCustomBackpropFilterOp : public OpKernel {
  public:
   explicit MklConv2DCustomBackpropFilterOp(OpKernelConstruction* context)
-      : MklConv2DBackpropCommonOp<Device, T>(context) { }
-  ~MklConv2DCustomBackpropFilterOp() {}
-
- private:
-  void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
-                         const MklDnnShape& filter_mkl_shape,
-                         const MklDnnShape& obp_mkl_shape) {
-    CHECK(!filter_mkl_shape.IsMklTensor())
-      << "Conv2DBackpropFilter: filter should not be in MKL Layout";
-  }
-
-  size_t GetInputTensorIndexWithSizes() { return 1; /* filter index */ }
-
-  TensorShape MakeInputTfShape(OpKernelContext* context,
-                               const Tensor& input_tensor) {
-    size_t input_idx = 0;
-    return GetTfShape(context, input_idx);
-  }
-
-  TensorShape MakeFilterTfShape(OpKernelContext* context,
-                                const Tensor& filter_tensor) {
-    TensorShape filter_tf_shape;
-    CHECK_EQ(TensorShapeUtils::IsVector(filter_tensor.shape()), true);
-    CHECK_EQ(TensorShapeUtils::MakeShape(
-             filter_tensor.vec<int32>(), &filter_tf_shape).ok(), true);
-    return filter_tf_shape;
-  }
+      : OpKernel(context) {
+    string data_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
 
-  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
-                                    const memory::dims& fwd_filter_dims) {
-    // Shape of output of Conv2DBackpropFilter is same as shape of filter.
-    return fwd_filter_dims;
+    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
+    int stride_n = GetTensorDim(strides_, data_format_, 'N');
+    int stride_c = GetTensorDim(strides_, data_format_, 'C');
+    OP_REQUIRES(
+        context, (stride_n == 1 && stride_c == 1),
+        errors::InvalidArgument("Current implementation does not yet support "
+                                "strides in the batch and depth dimensions."));
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
   }
 
-  memory::format GetOutputFormat(const memory::format data_format) {
-    // Output layout is Tensorflow's filter layout (HWIO).
-    return memory::format::hwio;
-  }
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
 
-  void CreatePrimitive(OpKernelContext* context,
-                       const engine& cpu_engine,
-                       const convolution_forward::primitive_desc& conv_fwd_pd,
-                       MklDnnData<T>* input, MklDnnData<T>* filter,
-                       MklDnnData<T>* outbackprop, MklDnnData<T>* output,
-                       Tensor** output_tensor,
-                       const memory::dims& strides,
-                       const memory::dims& padding_l,
-                       const memory::dims& padding_r,
-                       padding_kind padding,
-                       const memory::dims& bwd_output_dims,
-                       memory::format bwd_output_format) {
-    CHECK_NOTNULL(context);
-    CHECK_NOTNULL(input);
-    CHECK_NOTNULL(filter);
-    CHECK_NOTNULL(outbackprop);
-    CHECK_NOTNULL(output);
-    CHECK_NOTNULL(output_tensor);
-
-    MklDnnData<T>* bias_grad = nullptr;
-    int depth = 0;
-    if (biasEnabled) {
-      // Data structure for bias_grad
-      bias_grad = new MklDnnData<T> (&cpu_engine);
-      TensorShape obp_tf_shape = GetTfShape(context, 2);
-      depth = (MklConv2DBackpropCommonOp<Device, T>::GetTFDataFormat()
-                == FORMAT_NCHW) ?
-          obp_tf_shape.dim_size(1) : obp_tf_shape.dim_size(3);
-      memory::dims bias_grad_dims = {depth};
-      bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x);
-    }
+      MklDnnData<T> input(&cpu_engine);
+      MklDnnData<T> outbackprop(&cpu_engine);
+      MklDnnData<T> output(&cpu_engine);
 
-    // Create convolution backward weights primitive.
-    auto bwd_desc = (biasEnabled && (bias_grad != nullptr))?
-        convolution_backward_weights::desc(convolution_direct,
-                                input->GetOpMemDesc(), output->GetOpMemDesc(),
-                                bias_grad->GetOpMemDesc(),
-                                outbackprop->GetOpMemDesc(), strides, padding_l,
-                                padding_r, padding) :
-        convolution_backward_weights::desc(convolution_direct,
-                          input->GetOpMemDesc(), output->GetOpMemDesc(),
-                          outbackprop->GetOpMemDesc(), strides, padding_l,
-                          padding_r, padding);
-
-    auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
-                                                            cpu_engine,
-                                                            conv_fwd_pd);
-
-    // Allocate output tensor.
-    AllocateOutputTensor(context, bwd_pd, bwd_output_dims,
-                         bwd_output_format, output_tensor);
-
-    CHECK_NOTNULL(*output_tensor);
-    // Set buffer handle using allocated output tensor.
-    output->SetUsrMemDataHandle(*output_tensor);
-
-    if (biasEnabled && (bias_grad != nullptr)) {
-      // Allocate bias_grad tensor
-      TensorShape bias_grad_shape({depth});
-      Tensor* bias_grad_tensor = nullptr;
-      AllocateBiasGradTensor(context, bias_grad_shape, &bias_grad_tensor);
-      memory::dims bias_grad_dims = {depth};
-      // Since Bias is 1D, we use format::x from MKLDNN to represent it.
-      auto bias_grad_md = memory::desc({bias_grad_dims}, MklDnnType<T>(),
-                                       memory::format::x);
-      bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor);
-      bias_grad->SetUsrMemDataHandle(bias_grad_tensor);
-    }
+      // Input tensors
+      const Tensor& input_tensor = MklGetInput(context, 0);
+      const Tensor& filter_tensor = MklGetInput(context, 1);
+      const Tensor& obp_tensor = MklGetInput(context, 2);  // Outbackprop
 
-    if (biasEnabled && (bias_grad != nullptr)) {
-      PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, bias_grad);
-    } else {
-      PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output);
+      // Generate input shapes.
+      TensorShape filter_shape;
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()),
+        errors::InvalidArgument(
+              "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ",
+              filter_tensor.dims()));
+      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
+                        filter_tensor.vec<int32>(), &filter_shape));
+      TensorShape input_shape = input_tensor.shape();
+      TensorShape obp_shape = obp_tensor.shape();
+
+      // By default, all dims are in MKL order. Only dims in TF order
+      // are those with prefix tf_order.
+      memory::dims obp_dims, fwd_input_dims, fwd_filter_dims;
+      memory::dims padding_l, padding_r, strides, fwd_output_dims;
+      memory::dims fwd_output_dims_tf_order;
+
+      // Get forward convolution parameters.
+      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
+      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
+                                         &fwd_input_dims, &fwd_filter_dims,
+                                         &strides,
+                                         &fwd_output_dims_tf_order,
+                                         &fwd_output_dims,
+                                         &padding_l, &padding_r);
+      if (!context->status().ok()) return;
+
+      // Create Convolution forward descriptor since Convolution backward
+      // API needs it. For that, we first need to create input, filter
+      // and output memory descriptors.
+      auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
+      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
+                                        memory::format::hwio);
+      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
+
+      // Allocate output tensor and shape
+      // TODO(nhasabni): Update this when support for MKL layout is added.
+      // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D.
+      TensorShape tf_output_shape(filter_shape);
+      MklShape mkl_output_mkl_shape;
+      mkl_output_mkl_shape.SetMklTensor(false);
+      Tensor* output_tensor = nullptr;
+      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
+                                mkl_output_mkl_shape);
+
+      // Create memory for user data.
+      // Describe how the inputs and outputs of Convolution look like. Also
+      // specify buffers containing actual input and output data.
+      // Although input shape required is in MKL-DNN order, the layout is
+      // Tensorflow's layout (NHWC or NCHW depending on data format).
+      input.SetUsrMem(fwd_input_dims, mkl_data_format, &input_tensor);
+      // Outbackprop shape is NHWC or NCHW depending on data format. Since
+      // GetInputSizeInMklOrder function returns size in that order we just use
+      // use that function directly.
+      conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims);
+      if (!context->status().ok()) return;
+      outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor);
+      // Although output shape required is in MKL-DNN order,
+      // layout is Tensorflow's filter layout (HWIO)
+      // Shape of output of Conv2DBackpropInput is same as shape of filter.
+      memory::dims bwd_output_dims = fwd_filter_dims;
+      output.SetUsrMem(bwd_output_dims, memory::format::hwio, output_tensor);
+
+      // Create memory descriptors for convolution data w/ no specified format.
+      input.SetOpMemDesc(fwd_input_dims, memory::format::any);
+      outbackprop.SetOpMemDesc(obp_dims, memory::format::any);
+      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
+
+      // Create convolution backward weights primitive.
+      auto bwd_desc = convolution_backward_weights::desc(convolution_direct,
+                          input.GetOpMemDesc(), output.GetOpMemDesc(),
+                          outbackprop.GetOpMemDesc(), strides, padding_l,
+                          padding_r, TFPaddingToMklDnnPadding(padding_));
+
+      auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
+                                                              cpu_engine,
+                                                              fwd_pd);
+
+      PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output);
+    } catch (mkldnn::error &e) {
+     string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
+                                            error_msg));
     }
   }
 
-  // Allocate output tensor.
-  void AllocateOutputTensor(OpKernelContext* context,
-                  const convolution_backward_weights::primitive_desc& conv_pd,
-                  const memory::dims& output_dims_mkl_order,
-                  memory::format output_tf_format, Tensor** output_tensor) {
-      CHECK_NOTNULL(output_tensor);
-
-      // For BackpropFilter, we convert the output tensor back in Tensorflow
-      // layout. Because typically, BackpropFilter is the last operator in the
-      // graph that emit filter gradient that is provided to ApplyGradient
-      // method to update the filter. But it may be possible to eliminate this
-      // by forwarding filter in MKL layout if we support ApplyGradient method
-      // for MKL layout propagation.
-      MklDnnShape output_mkl_shape;
-      output_mkl_shape.SetMklTensor(false);
-      // output_dims_mkl_order is in OIHW format.
-      // Allocate shape of TF tensor in HWIO format.
-      TensorShape output_tf_shape({output_dims_mkl_order[MklDnnDims::Dim_H],
-                                   output_dims_mkl_order[MklDnnDims::Dim_W],
-                                   output_dims_mkl_order[MklDnnDims::Dim_I],
-                                   output_dims_mkl_order[MklDnnDims::Dim_O]});
-      AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
-                                output_mkl_shape);
-  }
-
-  // Allocate tensor for bias grad
-  void AllocateBiasGradTensor(OpKernelContext* context,
-                              const TensorShape& bias_grad_shape,
-                              Tensor** bias_grad_tensor) {
-    CHECK_NOTNULL(bias_grad_tensor);
-
-    MklDnnShape bias_grad_mkl_shape;
-    bias_grad_mkl_shape.SetMklTensor(false);
-    AllocateOutputSetMklShape(context, 1, bias_grad_tensor, bias_grad_shape,
-                              bias_grad_mkl_shape);
-  }
+ private:
+  std::vector<int32> strides_;
+  Padding padding_;
+  TensorFormat data_format_;
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
                   const convolution_backward_weights::primitive_desc& conv_pd,
                   MklDnnData<T>* input, MklDnnData<T>* obp,
-                  MklDnnData<T>* output, MklDnnData<T>* bias_grad = nullptr) {
+                  MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
     input->CheckReorderToOpMem(conv_pd.src_primitive_desc(), &net);
     obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net);
 
-    // For BackpropFilter, we convert the output tensor back in Tensorflow
-    // layout.
+    // Memory for output of convolution. Since we may need reorder on the
+    // output side, we will prepare reorder primitive in case output
+    // reorder to user memory is required.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
                                       conv_pd.diff_weights_primitive_desc());
 
-    if (biasEnabled && (bias_grad != nullptr)) {
-      net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
-                                      obp->GetOpMem(), output->GetOpMem(),
-                                      bias_grad->GetOpMem()));
-    } else {
-      net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
-                                      obp->GetOpMem(), output->GetOpMem()));
-    }
+    net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                    obp->GetOpMem(), output->GetOpMem()));
 
+    // Insert reorder primitive in the net for output reorder if reorder is
+    // required.
     if (output_reorder_required) {
       output->InsertReorderToUserMem(&net);
     }
 
+    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
+#endif
 
 #define REGISTER_MKL_FILTER_KERNELS(T)                              \
   REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
                               .Device(DEVICE_CPU)                   \
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
-              MklConv2DCustomBackpropFilterOp<CPUDevice, T, false>);\
-  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilterWithBias")  \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-              MklConv2DCustomBackpropFilterOp<CPUDevice, T, true>); \
-  REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DBackpropFilterWithBias")  \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-              MklDummyOp<CPUDevice, T>);
+                          MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
 
 TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
 #undef REGISTER_MKL_FILTER_KERNELS
-
-#endif  // INTEL_MKL_DNN
-
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index df51df9638..4a47d0463e 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -49,6 +49,9 @@ limitations under the License.
 
 using mkldnn::stream;
 using mkldnn::prop_kind;
+
+using mkldnn::convolution_forward;
+using mkldnn::convolution_direct;
 using mkldnn::convolution_backward_data;
 #endif
 
@@ -359,117 +362,143 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 #else
 
 template <typename Device, class T>
-class MklConv2DCustomBackpropInputOp :
-  public MklConv2DBackpropCommonOp<Device, T> {
+class MklConv2DCustomBackpropInputOp : public OpKernel {
  public:
-  explicit MklConv2DCustomBackpropInputOp(OpKernelConstruction* context)
-      : MklConv2DBackpropCommonOp<Device, T>(context) { }
   ~MklConv2DCustomBackpropInputOp() {}
+  explicit MklConv2DCustomBackpropInputOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string data_format_str;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
+    int stride_n = GetTensorDim(strides_, data_format_, 'N');
+    int stride_c = GetTensorDim(strides_, data_format_, 'C');
+    OP_REQUIRES(
+        context, (stride_n == 1 && stride_c == 1),
+        errors::InvalidArgument("Current implementation does not yet support "
+                                "strides in the batch and depth dimensions."));
 
- private:
-  void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
-                         const MklDnnShape& filter_mkl_shape,
-                         const MklDnnShape& obp_mkl_shape) {
-    // Tensor that feeds to 'Input' slot of BackpropInput is always just a shape
-    // of the Tensor and never an actual tensor. So it will never be in MKL
-    // layout.
-    CHECK(!input_mkl_shape.IsMklTensor())
-      << "Conv2DBackpropInput: input should not be in MKL Layout";
-  }
-
-  size_t GetInputTensorIndexWithSizes() { return 0; /* input index */ }
-
-  TensorShape MakeInputTfShape(OpKernelContext* context,
-                               const Tensor& input_tensor) {
-    TensorShape input_tf_shape;
-    CHECK_EQ(TensorShapeUtils::IsVector(input_tensor.shape()), true);
-    CHECK_EQ(TensorShapeUtils::MakeShape(input_tensor.vec<int32>(),
-                                         &input_tf_shape).ok(), true);
-    return input_tf_shape;
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
   }
 
-  TensorShape MakeFilterTfShape(OpKernelContext* context,
-                                const Tensor& filter_tensor) {
-    size_t filter_idx = 1;
-    return GetTfShape(context, filter_idx);
-  }
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
 
-  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
-                                    const memory::dims& fwd_filter_dims) {
-    // Output Shape of Conv2DBackpropInput is same as shape of Conv2D 'input'.
-    return fwd_input_dims;
-  }
+      MklDnnData<T> filter(&cpu_engine);
+      MklDnnData<T> outbackprop(&cpu_engine);
+      MklDnnData<T> output(&cpu_engine);
 
-  memory::format GetOutputFormat(const memory::format data_format) {
-    // Output layout is Tensorflow's layout in data format order.
-    return data_format;
-  }
+      // Input tensors
+      const Tensor& input_tensor = MklGetInput(context, 0);
+      const Tensor& filter_tensor = MklGetInput(context, 1);
+      const Tensor& obp_tensor = MklGetInput(context, 2);  // Outbackprop
 
-  void CreatePrimitive(OpKernelContext* context,
-                       const engine& cpu_engine,
-                       const convolution_forward::primitive_desc& conv_fwd_pd,
-                       MklDnnData<T>* input, MklDnnData<T>* filter,
-                       MklDnnData<T>* outbackprop, MklDnnData<T>* output,
-                       Tensor** output_tensor,
-                       const memory::dims& strides,
-                       const memory::dims& padding_l,
-                       const memory::dims& padding_r,
-                       padding_kind padding,
-                       const memory::dims& bwd_output_dims,
-                       memory::format bwd_output_format) {
-    CHECK_NOTNULL(context);
-    CHECK_NOTNULL(input);
-    CHECK_NOTNULL(filter);
-    CHECK_NOTNULL(outbackprop);
-    CHECK_NOTNULL(output);
-    CHECK_NOTNULL(output_tensor);
-
-    // Create convolution backward data primitive.
-    auto bwd_desc = convolution_backward_data::desc(convolution_direct,
-                      output->GetOpMemDesc(), filter->GetOpMemDesc(),
-                      outbackprop->GetOpMemDesc(), strides, padding_l,
-                      padding_r, padding);
-
-    auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
-                                                          cpu_engine,
-                                                          conv_fwd_pd);
-
-
-    // Allocate output tensor in TensorFlow and MKL layout.
-    AllocateOutputTensor(context, bwd_pd, bwd_output_dims,
-                         bwd_output_format, output_tensor);
-    CHECK_NOTNULL(*output_tensor);
-    // Set buffer handle using allocated output tensor.
-    output->SetUsrMemDataHandle(*output_tensor);
-
-    PrepareAndExecutePrimitive(bwd_pd, filter, outbackprop, output);
+      // Generate input shape.
+      TensorShape input_shape;
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
+        errors::InvalidArgument(
+              "Conv2DBackpropInput: input_sizes input must be 1-dim, not ",
+              input_tensor.dims()));
+      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
+                        input_tensor.vec<int32>(), &input_shape));
+      TensorShape filter_shape = filter_tensor.shape();
+      TensorShape obp_shape = obp_tensor.shape();
+
+      // By default, all dims are in MKL order. Only dims in TF order
+      // are those with prefix tf_order.
+      memory::dims obp_dims, fwd_input_dims, fwd_filter_dims;
+      memory::dims padding_l, padding_r, strides, fwd_output_dims;
+      memory::dims fwd_output_dims_tf_order;
+
+      // Get forward convolution parameters.
+      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
+      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
+                                         &fwd_input_dims, &fwd_filter_dims,
+                                         &strides,
+                                         &fwd_output_dims_tf_order,
+                                         &fwd_output_dims,
+                                         &padding_l, &padding_r);
+      if (!context->status().ok()) return;
+
+      // Create Convolution forward descriptor since Convolution backward
+      // API needs it. For that, we first need to create input, filter
+      // and output memory descriptors.
+      auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
+      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
+                                        memory::format::hwio);
+      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
+                                     mkl_data_format);
+      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
+
+      // Allocate output tensor and shape
+      // TODO(nhasabni): Update this when support for MKL layout is added.
+      // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D.
+      TensorShape tf_output_shape(input_shape);
+      MklShape mkl_output_mkl_shape;
+      mkl_output_mkl_shape.SetMklTensor(false);
+      Tensor* output_tensor = nullptr;
+      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
+                                mkl_output_mkl_shape);
+
+      // Create memory for user data.
+      // Describe how the inputs and outputs of Convolution look like. Also
+      // specify buffers containing actual input and output data.
+      // Although input shape required is in MKL-DNN order, the layout is
+      // Tensorflow's layout (NHWC or NCHW depending on data format).
+      // Although filter shape (filter_dims) required is in MKL-DNN order,
+      // the layout is Tensorflow's layout (HWIO).
+      // Shape of Conv2DBackpropInput's filter is same as that of Conv2D filter.
+      filter.SetUsrMem(fwd_filter_dims, memory::format::hwio, &filter_tensor);
+      // Outbackprop shape is NHWC or NCHW depending on data format. Since
+      // GetInputSizeInMklOrder function returns size in that order we just use
+      // use that function directly.
+      conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims);
+      if (!context->status().ok()) return;
+      outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor);
+      // Although output shape required is in MKL-DNN order,
+      // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
+      // Shape of output of Conv2DBackpropInput is same as shape of 'input'
+      // of Conv2D.
+      memory::dims bwd_output_dims = fwd_input_dims;
+      output.SetUsrMem(bwd_output_dims, mkl_data_format, output_tensor);
+
+      // Create memory descriptors for convolution data w/ no specified format.
+      filter.SetOpMemDesc(fwd_filter_dims, memory::format::any);
+      outbackprop.SetOpMemDesc(obp_dims, memory::format::any);
+      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
+
+      // Create convolution backward data primitive.
+      auto bwd_desc = convolution_backward_data::desc(convolution_direct,
+                          output.GetOpMemDesc(), filter.GetOpMemDesc(),
+                          outbackprop.GetOpMemDesc(), strides, padding_l,
+                          padding_r, TFPaddingToMklDnnPadding(padding_));
+
+      auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
+                                                              cpu_engine,
+                                                              fwd_pd);
+
+      PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output);
+    } catch (mkldnn::error &e) {
+     string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
+                                            error_msg));
+    }
   }
 
-  // Allocate output tensor.
-  void AllocateOutputTensor(OpKernelContext* context,
-                  const convolution_backward_data::primitive_desc& conv_pd,
-                  const memory::dims& output_dims_mkl_order,
-                  memory::format output_tf_format, Tensor** output_tensor) {
-      CHECK_NOTNULL(output_tensor);
-
-      // Output primitive descriptor for backward data is diff_src.
-      auto dst_pd = conv_pd.diff_src_primitive_desc();
-
-      // Allocate shape of Mkl tensor.
-      MklDnnShape output_mkl_shape;
-      output_mkl_shape.SetMklTensor(true);
-      output_mkl_shape.SetMklLayout(&dst_pd);
-      output_mkl_shape.SetElemType(MklDnnType<T>());
-      output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
-                                   output_dims_mkl_order, output_tf_format);
-
-      // Allocate shape of TF tensor.
-      TensorShape output_tf_shape;
-      output_tf_shape.AddDim(dst_pd.get_size() / sizeof(T));
-
-      AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
-                                output_mkl_shape);
-  }
+ private:
+  std::vector<int32> strides_;
+  Padding padding_;
+  TensorFormat data_format_;
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
@@ -482,9 +511,22 @@ class MklConv2DCustomBackpropInputOp :
     filter->CheckReorderToOpMem(conv_pd.weights_primitive_desc(), &net);
     obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net);
 
+    // Memory for output of convolution. Since we may need reorder on the
+    // output side, we will prepare reorder primitive in case output
+    // reorder to user memory is required.
+    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
+                                      conv_pd.diff_src_primitive_desc());
+
     net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(),
                                     filter->GetOpMem(), output->GetOpMem()));
 
+    // Insert reorder primitive in the net for output reorder if reorder is
+    // required.
+    if (output_reorder_required) {
+      output->InsertReorderToUserMem(&net);
+    }
+
+    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index 04268f23bb..a9872b8d6d 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -40,7 +40,8 @@ limitations under the License.
 #include "tensorflow/core/util/tensor_format.h"
 
 #include "tensorflow/core/util/mkl_util.h"
-
+#include "mkl_dnn.h"
+#include "mkl_dnn_types.h"
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
@@ -50,9 +51,6 @@ using mkldnn::prop_kind;
 
 using mkldnn::convolution_forward;
 using mkldnn::convolution_direct;
-#else
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
 #endif
 
 namespace tensorflow {
@@ -290,8 +288,10 @@ class MklConv2DOp : public OpKernel {
     mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd,
                                              dnnResourceFilter);
 
-    size_t filter_sizes[4] = {filter.dim_size(0), filter.dim_size(1),
-                              filter.dim_size(2), filter.dim_size(3)};
+    size_t filter_sizes[4] = {static_cast<size_t>(filter.dim_size(0)),
+                              static_cast<size_t>(filter.dim_size(1)),
+                              static_cast<size_t>(filter.dim_size(2)),
+                              static_cast<size_t>(filter.dim_size(3))};
     mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes,
                                             mkl_context.filter_strides);
 
@@ -514,12 +514,6 @@ class MklConv2DOp : public OpKernel {
       const Tensor& src_tensor = MklGetInput(context, src_idx);
       const Tensor& filter_tensor = MklGetInput(context, filter_idx);
 
-      MklDnnShape src_mkl_shape, filter_mkl_shape;
-      GetMklShape(context, src_idx, &src_mkl_shape);
-      GetMklShape(context, filter_idx, &filter_mkl_shape);
-      CHECK(!filter_mkl_shape.IsMklTensor())
-        << "Conv2D filter should not be in MKL Layout";
-
       MklDnnData<T> src(&cpu_engine);
       MklDnnData<T> filter(&cpu_engine);
       MklDnnData<T> output(&cpu_engine);
@@ -529,9 +523,8 @@ class MklConv2DOp : public OpKernel {
 
       // Get shapes of input tensors in MKL-DNN order
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      auto src_tf_shape = GetTfShape(context, src_idx);
-      auto filter_tf_shape = GetTfShape(context, filter_idx);
-      conv_utl.GetConvFwdSizesInMklOrder(src_tf_shape, filter_tf_shape,
+      conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(),
+                                         filter_tensor.shape(),
                                          &src_dims, &filter_dims, &strides,
                                          &output_dims_tf_order,
                                          &output_dims_mkl_order, &padding_l,
@@ -539,47 +532,58 @@ class MklConv2DOp : public OpKernel {
       if (!context->status().ok()) return;
 
       // Check for corner case - if there is nothing to compute, return.
-      TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order);
+      TensorShape tf_output_shape({output_dims_tf_order[0],
+                                output_dims_tf_order[1],
+                                output_dims_tf_order[2],
+                                output_dims_tf_order[3]});
+      Tensor* output_tensor = nullptr;
+      MklShape mkl_output_mkl_shape;
+      mkl_output_mkl_shape.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
+                                mkl_output_mkl_shape);
 
       // Forward filter in TF format from input at index 1 to output at index 1.
       ForwardTfTensorInToOut(context, 1, 1);
 
-      // Corner cases: output with 0 elements and 0 batch size.
-      Tensor* output_tensor = nullptr;
-      if (output_tf_shape.num_elements() == 0 ||
-          output_dims_tf_order[0] == 0) {
+      if (tf_output_shape.num_elements() == 0) {
         // TODO(jbobba): Verify correctness here
         //               Need semantics for Null MKL tensor
-        MklDnnShape output_mkl_shape;
-        output_mkl_shape.SetMklTensor(false);
-        AllocateOutputSetMklShape(context, 0, &output_tensor, src_tf_shape,
-                                output_mkl_shape);
         return;
       }
 
+      // Corner case to handle 0 batch size.
+      if (output_dims_tf_order[0] == 0) {
+        // Nothing to do, allocate output tensor and return
+        // TODO(nhasabni): remove this code later once serialization
+        // in MKL-DNN is supported.
+        AllocateOutputSetMklShape(context, 0, &output_tensor,
+                                  src_tensor.shape(), mkl_output_mkl_shape);
+        return;
+      } else {
+        // Otherwise regular output tensor allocation
+        // Allocate output tensor.
+      }
+      CHECK_NOTNULL(output_tensor);
+
       // Create memory for user data.
       // Describe how the inputs and outputs of Convolution look like. Also
       // specify buffers containing actual input and output data.
-      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
-      // If input is in MKL layout, then simply grab input layout; otherwise,
-      // construct input Tf layout. For TF layout, although input shape
-      // (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
-      // layout (NHWC or NCHW depending on data format).
-      auto src_md = src_mkl_shape.IsMklTensor()
-                    ? src_mkl_shape.GetMklLayout()
-                    : memory::desc(src_dims, MklDnnType<T>(), tf_fmt);
-      src.SetUsrMem(src_md, &src_tensor);
+      // Although input shape (src_dims) required is in MKL-DNN order,
+      // the layout is Tensorflow's layout (NHWC or NCHW depending on data
+      // format).
+      src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_),
+                    const_cast<void*>(static_cast<const void*>(
+                    src_tensor.flat<T>().data())));
       // Although filter shape (filter_dims) required is in MKL-DNN order,
       // the layout is Tensorflow's layout (HWIO).
-      auto filter_md = filter_mkl_shape.IsMklTensor()
-                    ? filter_mkl_shape.GetMklLayout()
-          : memory::desc(filter_dims, MklDnnType<T>(), memory::format::hwio);
-      filter.SetUsrMem(filter_md, &filter_tensor);
-      // Set output shape (output_dims) required in MKL-DNN order.
-      // Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
-      // depending on data format). But later we propagate Mkl layout of the
-      // output to the next op directly.
-      output.SetUsrMem(output_dims_mkl_order, tf_fmt);
+      filter.SetUsrMem(filter_dims, memory::format::hwio,
+                       const_cast<void*>(static_cast<const void*>(
+                       filter_tensor.flat<T>().data())));
+      // Although output shape (output_dims) required is in MKL-DNN order,
+      // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
+      output.SetUsrMem(output_dims_mkl_order,
+                       TFDataFormatToMklDnnDataFormat(data_format_),
+                       output_tensor->flat<T>().data());
 
       // Create memory descriptors for convolution data w/ no specified format.
       src.SetOpMemDesc(src_dims, memory::format::any);
@@ -592,7 +596,9 @@ class MklConv2DOp : public OpKernel {
         memory::dims bias_size;
         conv_utl.GetBiasSizeInMklOrder(2 /* bias idx */, &bias_size);
         const Tensor& bias_tensor = MklGetInput(context, 2);
-        bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
+        bias.SetUsrMem(bias_size, memory::format::x,
+                       const_cast<void*>(static_cast<const void*>(
+                       bias_tensor.flat<T>().data())));
         bias.SetOpMemDesc(bias_size, memory::format::any);
 
         // Create convolution primitive with Bias.
@@ -603,10 +609,6 @@ class MklConv2DOp : public OpKernel {
 
         auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                 cpu_engine);
-        AllocateOutputTensor(context, conv_prim_desc,
-                             output_dims_mkl_order, tf_fmt, &output_tensor);
-        // Set data handle for output.
-        output.SetUsrMemDataHandle(output_tensor);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output);
       } else {
         // Create convolution primitive without Bias.
@@ -617,10 +619,6 @@ class MklConv2DOp : public OpKernel {
 
         auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                 cpu_engine);
-        AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order,
-                             tf_fmt, &output_tensor);
-        // Set data handle for output.
-        output.SetUsrMemDataHandle(output_tensor);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output);
       }
     } catch (mkldnn::error &e) {
@@ -638,44 +636,23 @@ class MklConv2DOp : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
 
-  // Allocate output tensor.
-  void AllocateOutputTensor(
-                  OpKernelContext* context,
-                  const convolution_forward::primitive_desc& conv_prim_desc,
-                  const memory::dims& output_dims_mkl_order,
-                  memory::format output_tf_format, Tensor** output_tensor) {
-      CHECK_NOTNULL(output_tensor);
-      auto dst_pd = conv_prim_desc.dst_primitive_desc();
-
-      // Allocate shape of Mkl tensor.
-      MklDnnShape output_mkl_shape;
-      output_mkl_shape.SetMklTensor(true);
-      output_mkl_shape.SetMklLayout(&dst_pd);
-      output_mkl_shape.SetElemType(MklDnnType<T>());
-      output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
-                                   output_dims_mkl_order, output_tf_format);
-
-      // Allocate shape of TF tensor.
-      TensorShape output_tf_shape;
-      output_tf_shape.AddDim((dst_pd.get_size() / sizeof(T)));
-
-      const int kOutputSlotIdx = 0;
-      AllocateOutputSetMklShape(context, kOutputSlotIdx, output_tensor,
-                                output_tf_shape, output_mkl_shape);
-  }
-
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecuteNet(
                   const convolution_forward::primitive_desc& conv_prim_desc,
                   MklDnnData<T>* src, MklDnnData<T>* filter,
                   MklDnnData<T>* bias, MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
-    // add it to the net before convolution. No need to check for output
-    // reorder as we propagate output layout to the next layer.
+    // add it to the net before convolution.
     std::vector<primitive> net;
     src->CheckReorderToOpMem(conv_prim_desc.src_primitive_desc(), &net);
     filter->CheckReorderToOpMem(conv_prim_desc.weights_primitive_desc(), &net);
 
+    // Memory for output of convolution. Since we may need reorder on the
+    // output side, we will prepare reorder primitive in case output
+    // reorder to user memory is required.
+    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
+                                      conv_prim_desc.dst_primitive_desc());
+
     // Create convolution primitive and add it to net.
     if (bias) {
       CHECK_EQ(biasEnabled, true);
@@ -688,6 +665,13 @@ class MklConv2DOp : public OpKernel {
                                     filter->GetOpMem(), output->GetOpMem()));
     }
 
+    // Insert reorder primitive in the net for output reorder if reorder is
+    // required.
+    if (output_reorder_required) {
+      output->InsertReorderToUserMem(&net);
+    }
+
+    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
@@ -704,12 +688,7 @@ class MklConv2DOp : public OpKernel {
                               .Device(DEVICE_CPU)                   \
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DOp<CPUDevice, T, true>);         \
-  REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DWithBias")          \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklDummyOp<CPUDevice, T>);
+                          MklConv2DOp<CPUDevice, T, true>);
 
 TF_CALL_float(REGISTER_MKL_CPU);
 
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index 47a9b4bfc7..f0cb37f8a4 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -41,12 +41,6 @@ limitations under the License.
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
-
-using mkldnn::stream;
-using mkldnn::prop_kind;
-
-using mkldnn::convolution_forward;
-using mkldnn::convolution_direct;
 #endif
 
 namespace tensorflow {
@@ -114,13 +108,7 @@ class MklDnnConvUtil {
   #undef CHECK_BOUNDS
 
     // MKL-DNN always requires input in NCHW format.
-    std::vector<int> mkldnn_sizes(4, -1);
-    mkldnn_sizes[MklDnnDims::Dim_N] = input_batch;
-    mkldnn_sizes[MklDnnDims::Dim_C] = input_depth;
-    mkldnn_sizes[MklDnnDims::Dim_H] = input_rows;
-    mkldnn_sizes[MklDnnDims::Dim_W] = input_cols;
-
-    *input_dims = mkldnn_sizes;
+    *input_dims = {input_batch, input_depth, input_rows, input_cols};
   }
 
   // Calculate Convolution filter size in MKL-DNN order. MKL-DNN
@@ -168,13 +156,7 @@ class MklDnnConvUtil {
 
     // MKL-DNN always needs filter in OIHW format.
     // OIHW = (out_depth, in_depth, rows, cols)
-    std::vector<int> mkldnn_sizes(4, -1);
-    mkldnn_sizes[MklDnnDims::Dim_O] = out_depth;
-    mkldnn_sizes[MklDnnDims::Dim_I] = in_depth;
-    mkldnn_sizes[MklDnnDims::Dim_H] = filter_rows;
-    mkldnn_sizes[MklDnnDims::Dim_W] = filter_cols;
-
-    *filter_dims = mkldnn_sizes;
+    *filter_dims = {out_depth, in_depth, filter_rows, filter_cols};
   }
 
   // Calculate Convolution filter size in MKL-DNN order. MKL-DNN
@@ -185,9 +167,9 @@ class MklDnnConvUtil {
   GetFilterSizeInMklOrder(size_t src_index, size_t filter_index,
                           memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
-    GetFilterSizeInMklOrder(GetTfShape(context_, src_index),
-                            GetTfShape(context_, filter_index),
-                            filter_dims);
+    const Tensor& input = MklGetInput(context_, src_index);
+    const Tensor& filter = MklGetInput(context_, filter_index);
+    GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims);
   }
 
   // Calculate Bias size for 2D Convolution. Function does not return
@@ -256,12 +238,8 @@ class MklDnnConvUtil {
     *output_dims_tf_order = TFShapeToMklDnnDims(out_shape);
 
     // MKL-DNN always needs output in NCHW format.
-    std::vector<int> mkldnn_sizes(4, -1);
-    mkldnn_sizes[MklDnnDims::Dim_N] = out_batch;
-    mkldnn_sizes[MklDnnDims::Dim_C] = out_depth;
-    mkldnn_sizes[MklDnnDims::Dim_H] = static_cast<int>(out_rows);
-    mkldnn_sizes[MklDnnDims::Dim_W] = static_cast<int>(out_cols);
-    *output_dims_mkl_order = mkldnn_sizes;
+    *output_dims_mkl_order = {out_batch, out_depth, static_cast<int>(out_rows),
+                   static_cast<int>(out_cols)};
 
     // Now handle padding. MKL-DNN uses asymetric padding.
     *pad_l = {static_cast<int>(pad_top), static_cast<int>(pad_left)};
@@ -283,14 +261,14 @@ class MklDnnConvUtil {
     CHECK_NOTNULL(pad_l);
     CHECK_NOTNULL(pad_r);
 
-    auto input_tf_shape = GetTfShape(context_, src_index);
-    auto filter_tf_shape = GetTfShape(context_, filter_index);
+    const Tensor& input = MklGetInput(context_, src_index);
+    const Tensor& filter = MklGetInput(context_, filter_index);
 
-    OP_REQUIRES(context_, input_tf_shape.dims() == 4,
+    OP_REQUIRES(context_, input.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
-                                        input_tf_shape.DebugString()));
+                                          input.shape().DebugString()));
 
-    GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape,
+    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(),
                                   strides, output_dims_tf_order,
                                   output_dims_mkl_order, pad_l, pad_r);
   }
@@ -331,231 +309,8 @@ class MklDnnConvUtil {
   }
 };
 
-/////////////////////////////////////////////////////////////////////
-///  Common class that implements Conv2DBackpropFilter and Input
-/////////////////////////////////////////////////////////////////////
-
-template <typename Device, class T>
-class MklConv2DBackpropCommonOp :  public OpKernel {
- public:
-  ~MklConv2DBackpropCommonOp() {}
-  explicit MklConv2DBackpropCommonOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string data_format_str;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
-    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    int stride_n = GetTensorDim(strides_, data_format_, 'N');
-    int stride_c = GetTensorDim(strides_, data_format_, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
-
-      // Prepare common tensors for Conv2DBackpropInput and
-      // Conv2DBackpropFilter.
-      MklDnnData<T> input(&cpu_engine);
-      MklDnnData<T> filter(&cpu_engine);
-      MklDnnData<T> outbackprop(&cpu_engine);
-      MklDnnData<T> output(&cpu_engine);
-
-      // Input tensors
-      const int kInputIdx = 0, kFilterIdx = 1, kOutbpropIdx = 2;
-      const Tensor& input_tensor = MklGetInput(context, kInputIdx);
-      const Tensor& filter_tensor = MklGetInput(context, kFilterIdx);
-      const Tensor& outbprop_tensor = MklGetInput(context, kOutbpropIdx);
-
-      MklDnnShape input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape;
-      GetMklShape(context, kInputIdx, &input_mkl_shape);
-      GetMklShape(context, kFilterIdx, &filter_mkl_shape);
-      GetMklShape(context, kOutbpropIdx, &outbprop_mkl_shape);
-      // Allow operator-specific sanity checking of shapes.
-      ValidateMklShapes(input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape);
-
-      // Allow operator-specific generation of shapes.
-      // E.g., Conv2DBackpropFilter gets filter as filter_sizes. It is a
-      // tensor containing shape of filter. So filter.shape() is not
-      // a correct way to get filter shape. These operator-specific calls
-      // allow this class to handle this case.
-      TensorShape input_tf_shape = MakeInputTfShape(context, input_tensor);
-      TensorShape filter_tf_shape = MakeFilterTfShape(context, filter_tensor);
-      TensorShape outbprop_tf_shape = GetTfShape(context, kOutbpropIdx);
-
-      // By default, all dims are in MKL order. Only dims in TF order
-      // are those with prefix tf_order.
-      memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims;
-      memory::dims padding_l, padding_r, strides, fwd_output_dims;
-      memory::dims fwd_output_dims_tf_order;
-
-      // Get forward convolution parameters.
-      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_tf_shape, filter_tf_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
-      if (!context->status().ok()) return;
-
-      // Create Convolution forward descriptor since Convolution backward
-      // API needs it. For that, we first need to create input, filter
-      // and output memory descriptors.
-      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
-      // If input is in MKL layout, then simply grab input layout; otherwise,
-      // construct input TF layout. For TF layout, although input shape
-      // required is in MKL-DNN order, the layout is Tensorflow's layout
-      // (NHWC or NCHW depending on data format).
-      auto fwd_input_md = input_mkl_shape.IsMklTensor() ?
-                          input_mkl_shape.GetMklLayout() :
-                       memory::desc(fwd_input_dims, MklDnnType<T>(), tf_fmt);
-      // If filter is in MKL layout, then simply grab filter layout; otherwise
-      // construct filter in TF layout. For TF layout, filter is in HWIO format.
-      auto fwd_filter_md = filter_mkl_shape.IsMklTensor() ?
-                          filter_mkl_shape.GetMklLayout() :
-                          memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                       memory::format::hwio);
-      // Tensorflow Output of Conv2D is in data_format order.
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(), tf_fmt);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_input_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
-      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
-
-      // Create memory for user data. Describe how the inputs and outputs of
-      // Convolution look like. Also specify buffers containing actual input
-      // and output data.
-
-      // Since this is a common class for both Conv2DBackpropFilter and
-      // Conv2DBackpropInput, we skip SetUsrMem call for input tensor (for
-      // Conv2DBackpropInput) and for filter tensor (for
-      // conv2DBackpropFilter) depending on which tensor is int32 type.
-      size_t input_with_sizes = GetInputTensorIndexWithSizes();
-      if (input_with_sizes != kInputIdx) {
-        // Shape of Conv2DBackpropFilter's input is same as Conv2D input.
-        input.SetUsrMem(fwd_input_md, &input_tensor);
-      } else if (input_with_sizes != kFilterIdx) {
-        // Shape of Conv2DBackpropInput's filter is same as Conv2D filter.
-        filter.SetUsrMem(fwd_filter_md, &filter_tensor);
-      }
-
-      conv_utl.GetInputSizeInMklOrder(outbprop_tf_shape, &outbprop_dims);
-      if (!context->status().ok()) return;
-      if (outbprop_mkl_shape.IsMklTensor()) {
-        // If outbackprop is in Mkl layout, then simply grab it.
-        auto outbprop_md = outbprop_mkl_shape.GetMklLayout();
-        outbackprop.SetUsrMem(outbprop_md, &outbprop_tensor);
-      } else {
-        // If outbackprop is in TensorFlow layout, then we need to create memory
-        // descriptor for it. Outbackprop shape is data format order.
-        outbackprop.SetUsrMem(outbprop_dims, tf_fmt, &outbprop_tensor);
-      }
-
-      // Operator specific call to get output shape and data_format.
-      auto bwd_output_dims = GetOutputDims(fwd_input_dims, fwd_filter_dims);
-      auto bwd_output_format = GetOutputFormat(tf_fmt);
-      output.SetUsrMem(bwd_output_dims, bwd_output_format);
-
-      // Create memory descriptors for convolution data w/ no specified format.
-      input.SetOpMemDesc(fwd_input_dims, memory::format::any);
-      filter.SetOpMemDesc(fwd_filter_dims, memory::format::any);
-      outbackprop.SetOpMemDesc(outbprop_dims, memory::format::any);
-      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
-
-      // Operator-specific call to create and execute primitive.
-      Tensor* output_tensor = nullptr;
-      CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter,
-                      &outbackprop, &output, &output_tensor,
-                      strides, padding_l, padding_r,
-                      TFPaddingToMklDnnPadding(padding_),
-                      bwd_output_dims, bwd_output_format);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
-    }
-  }
-
-  /// Pure virtual function to allow operator to check for validity of input
-  /// shapes. Function asserts that input shapes are valid.
-  virtual void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
-                                 const MklDnnShape& filter_mkl_shape,
-                                 const MklDnnShape& outbprop_mkl_shape) = 0;
-
-  /// Operator-specific function that returns index of input that is
-  /// representing input sizes. For Conv2DBackpropFilter it returns 1 since
-  /// filter for this operator is filter shape. For Conv2DBackpropInput it
-  /// returns 0 (for input).
-  virtual size_t GetInputTensorIndexWithSizes() = 0;
-
-  /// Get TensorFlow shape of input tensor.
-  virtual TensorShape MakeInputTfShape(OpKernelContext* context,
-                                      const Tensor& input_tensor) = 0;
-
-  /// Get TensorFlow shape of filter tensor.
-  virtual TensorShape MakeFilterTfShape(OpKernelContext* context,
-                                       const Tensor& filter_tensor) = 0;
-
-  /// Get shape of output in MKL-DNN order. Computes shape of output from
-  /// input shape (fwd_input_dims) and filter shape (fwd_filter_dims).
-  virtual
-  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
-                                    const memory::dims& fwd_filter_dims) = 0;
-
-  /// Get data_format of output in MKL-DNN order. If output data format is
-  /// same as input data format, then it simply returns value of data_format
-  /// parameter as it is.
-  virtual memory::format GetOutputFormat(const memory::format data_format) = 0;
-
-  /// Create and execute the primitive storing output in the output_tensor.
-  virtual void CreatePrimitive(OpKernelContext* context,
-    const engine& cpu_engine,
-    const convolution_forward::primitive_desc& conv_fwd_pd,
-    MklDnnData<T>* input, MklDnnData<T>* filter, MklDnnData<T>* outbackprop,
-    MklDnnData<T>* output, Tensor** output_tensor, const memory::dims& strides,
-    const memory::dims& padding_l, const memory::dims& padding_r,
-    padding_kind padding, const memory::dims& bwd_output_dims,
-    memory::format bwd_output_format) = 0;
-
-  // Get the data_format {NCHW, NHWC}
-  TensorFormat GetTFDataFormat () { return data_format_; }
-
- private:
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_;
-};
 #endif  // INTEL_MKL_DNN
 
-/////////////////////////////////////////////////////////////////////
-///  Dummy Mkl op that is just used for operators that are intermediate
-///  output of node fusion in the graph
-/////////////////////////////////////////////////////////////////////
-
-template <typename Device, typename T>
-class MklDummyOp : public OpKernel {
- public:
-  ~MklDummyOp() {}
-
-  explicit MklDummyOp(OpKernelConstruction* context) :
-    OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    TF_CHECK_OK(errors::Unimplemented("This is a dummy op."
-                                      "It should not have been invoked."));
-  }
-};
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
index a761562a4b..bc9e906c39 100644
--- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
@@ -25,24 +25,10 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-#ifdef INTEL_MKL_DNN
-#include "mkldnn.hpp"
-
-using mkldnn::stream;
-using mkldnn::prop_kind;
-using mkldnn::use_scale_shift;
-using mkldnn::use_global_stats;
-using mkldnn::batch_normalization_forward;
-using mkldnn::batch_normalization_backward;
-#endif
-
 // TODO(inteltf) Address comments from PR 8968.
 
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
-
-#ifndef INTEL_MKL_DNN
-
 template <typename Device, typename T>
 class MklFusedBatchNormOp : public OpKernel {
  public:
@@ -60,6 +46,7 @@ class MklFusedBatchNormOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     MklFusedBatchNormOpContext mkl_context;
+
     const Tensor& input = MklGetInput(context, 0);
     const Tensor& scale = MklGetInput(context, 1);
     const Tensor& shift = MklGetInput(context, 2);
@@ -68,7 +55,6 @@ class MklFusedBatchNormOp : public OpKernel {
 
     GetMklShape(context, 0, &(mkl_context.mkl_shape_input_shape));
     bool input_in_mkl_format = mkl_context.mkl_shape_input_shape.IsMklTensor();
-
     if (!input_in_mkl_format) {
       OP_REQUIRES(context, input.dims() == 4,
                   errors::InvalidArgument("input must be 4-dimensional",
@@ -83,12 +69,10 @@ class MklFusedBatchNormOp : public OpKernel {
     OP_REQUIRES(context, est_mean.dims() == 1,
                 errors::InvalidArgument("estimated_mean must be 1-dimensional",
                                         est_mean.shape().DebugString()));
-
     OP_REQUIRES(
         context, est_variance.dims() == 1,
         errors::InvalidArgument("estimated_variance must be 1-dimensional",
                                 est_variance.shape().DebugString()));
-
     if (is_training_) {
       OP_REQUIRES(context, est_mean.dim_size(0) == 0,
                   errors::InvalidArgument("estimated_mean empty for training",
@@ -274,6 +258,7 @@ class MklFusedBatchNormOp : public OpKernel {
             E_SUCCESS);
       }
     }
+
     void MklPrepareContextInputs(OpKernelContext* context,
                                  Tensor* mkl_tmp_input_buf_tensor,
                                  Tensor* mkl_tmp_scale_shift_buf_tensor) {
@@ -340,6 +325,15 @@ class MklFusedBatchNormOp : public OpKernel {
   } MklFusedBatchNormOpContext;
 };
 
+#define REGISTER_MKL_CPU(T)                                         \
+  REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm")                \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklFusedBatchNormOp<CPUDevice, T>);
+TF_CALL_float(REGISTER_MKL_CPU);
+#undef REGISTER_MKL_CPU
+
 template <typename Device, typename T>
 class MklFusedBatchNormGradOp : public OpKernel {
  public:
@@ -601,7 +595,7 @@ class MklFusedBatchNormGradOp : public OpKernel {
       mkl_res_batchnorm_bwd[dnnResourceSrc] =
           (mkl_convert_input) ? mkl_buf_converted_input : mkl_buf_input;
 
-     bool mkl_convert_out_backprop;
+      bool mkl_convert_out_backprop;
       dnnPrimitive_t mkl_prim_convert_out_backprop = nullptr;
       dnnLayout_t mkl_lt_internal_out_backprop = nullptr;
       void* mkl_buf_converted_out_backprop = nullptr;
@@ -681,628 +675,6 @@ class MklFusedBatchNormGradOp : public OpKernel {
     }
   } MklFusedBatchNormGradOpContext;
 };
-#endif
-
-#ifdef INTEL_MKL_DNN
-
-template <typename Device, typename T>
-class MklFusedBatchNormOp : public OpKernel {
- public:
-  explicit MklFusedBatchNormOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    float epsilon;
-    OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon));
-    epsilon_ = T(epsilon);
-    string tensor_format;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format));
-    OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_),
-                errors::InvalidArgument("Invalid data format"));
-    OP_REQUIRES_OK(context, context->GetAttr("is_training", &is_training_));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
-      const size_t src_index = 0;    // index of src input tensor
-      const size_t scale_index = 1;  // index of scale tensor
-      const size_t shift_index = 2;  // index of shift tensor
-      const size_t mean_index = 3;   // index of est_mean tensor
-      const size_t var_index = 4;    // index of est_variance tensor
-
-      const Tensor& src_tensor          = MklGetInput(context, src_index);
-      const Tensor& scale_tensor        = MklGetInput(context, scale_index);
-      const Tensor& shift_tensor        = MklGetInput(context, shift_index);
-      const Tensor& est_mean_tensor     = MklGetInput(context, mean_index);
-      const Tensor& est_variance_tensor = MklGetInput(context, var_index);
-
-      MklDnnShape dnn_shape_src;
-      GetMklShape(context, src_index, &dnn_shape_src);
-
-      if (dnn_shape_src.IsMklTensor()) {
-        OP_REQUIRES(context, dnn_shape_src.GetDimension() == 4,
-                    errors::InvalidArgument(
-                        "input must be 4-dimensional",
-                        src_tensor.shape().DebugString()));
-      } else {
-        OP_REQUIRES(context, src_tensor.dims() == 4,
-                    errors::InvalidArgument(
-                        "input must be 4-dimensional",
-                        src_tensor.shape().DebugString()));
-      }
-      OP_REQUIRES(context, scale_tensor.dims() == 1,
-                  errors::InvalidArgument(
-                      "scale must be 1-dimensional",
-                      scale_tensor.shape().DebugString()));
-      OP_REQUIRES(context, shift_tensor.dims() == 1,
-                  errors::InvalidArgument("offset must be 1-dimensional",
-                                        shift_tensor.shape().DebugString()));
-      OP_REQUIRES(context, est_mean_tensor.dims() == 1,
-                  errors::InvalidArgument(
-                      "estimated_mean must be 1-dimensional",
-                      est_mean_tensor.shape().DebugString()));
-      OP_REQUIRES(context, est_variance_tensor.dims() == 1,
-                  errors::InvalidArgument(
-                      "estimated_variance must be 1-dimensional",
-                      est_variance_tensor.shape().DebugString()));
-
-      if (is_training_) {
-        OP_REQUIRES(context, est_mean_tensor.dim_size(0) == 0,
-                    errors::InvalidArgument(
-                        "estimated_mean must be empty for training",
-                        est_mean_tensor.shape().DebugString()));
-        OP_REQUIRES(context, est_variance_tensor.dim_size(0) == 0,
-                    errors::InvalidArgument(
-                        "estimated_variance must be empty for training",
-                        est_variance_tensor.shape().DebugString()));
-      }
-
-      if (dnn_shape_src.IsMklTensor())
-        depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C);
-      else
-        ExtractParams(context);
-
-      // Indices of output tensors
-      const size_t dst_index = 0;
-      const size_t batch_mean_index = 1;
-      const size_t batch_variance_index = 2;
-      const size_t saved_mean_index = 3;
-      const size_t saved_variance_index = 4;
-
-      // allocate batch mean output tensor
-      Tensor* batch_mean_tensor = nullptr;
-      MklDnnShape mkl_shape_batch_mean;
-      mkl_shape_batch_mean.SetMklTensor(false);
-      AllocateOutputSetMklShape(context,
-                                batch_mean_index,
-                                &batch_mean_tensor,
-                                scale_tensor.shape(),
-                                mkl_shape_batch_mean);
-      CHECK_NOTNULL(batch_mean_tensor);
-
-      // Batch variance
-      Tensor* batch_variance_tensor = nullptr;
-      MklDnnShape mkl_shape_batch_variance;
-      mkl_shape_batch_variance.SetMklTensor(false);
-      AllocateOutputSetMklShape(context,
-                                batch_variance_index,
-                                &batch_variance_tensor,
-                                scale_tensor.shape(),
-                                mkl_shape_batch_variance);
-      CHECK_NOTNULL(batch_variance_tensor);
-
-      if (is_training_)
-        SetMeanVariance(*batch_mean_tensor, *batch_variance_tensor);
-      else
-        SetMeanVariance(est_mean_tensor, est_variance_tensor);
-
-      MklDnnData<T> src(&cpu_engine);
-      MklDnnData<T> dst(&cpu_engine);
-
-      memory::format format_m;
-      if (dnn_shape_src.IsMklTensor()) {
-        if (dnn_shape_src.IsTensorInNCHWFormat()) {
-          format_m = memory::format::nchw;
-        } else {
-          format_m = memory::format::nhwc;
-        }
-      } else {
-        format_m = TFDataFormatToMklDnnDataFormat(tensor_format_);
-      }
-
-      // set src primitive
-      memory::dims src_dims;
-      if (dnn_shape_src.IsMklTensor()) {
-        src_dims = TFShapeToMklDnnDimsInNCHW(dnn_shape_src.GetTfShape(),
-                                             tensor_format_);
-      } else {
-        src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(),
-                                             tensor_format_);
-      }
-
-      auto src_md = dnn_shape_src.IsMklTensor()
-                    ? dnn_shape_src.GetMklLayout()
-                    : memory::desc(src_dims, MklDnnType<T>(), format_m);
-      src.SetUsrMem(src_md, &src_tensor);
-
-      // set weights primitive
-      // MKL-DNN packs scale & shift as "weights":
-      // <scale>...<scale><shift>...<shift>
-      auto weights_desc = memory::desc({2, depth_},
-                                       MklDnnType<T>(),
-                                       memory::format::nc);
-      auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine);
-      auto weights_m = memory(weights_pd);
-      T* weights_data = reinterpret_cast<T*>(
-                        weights_m.get_data_handle());
-      T* scale_tf = reinterpret_cast<T*>(
-                    const_cast<T*>(scale_tensor.flat<T>().data()));
-      T* shift_tf = reinterpret_cast<T*>(
-                    const_cast<T*>(shift_tensor.flat<T>().data()));
-
-      for (int k=0; k < depth_; k++) {
-        weights_data[k] = scale_tf[k];
-        weights_data[k + depth_] = shift_tf[k];
-      }
-
-      // Mean and variance (without Bessel's correction) saved for backward
-      // computation to serve as pre-computed mean and variance.
-      Tensor* saved_mean_tensor = nullptr;
-      MklDnnShape mkl_shape_saved_mean;
-      mkl_shape_saved_mean.SetMklTensor(false);
-      AllocateOutputSetMklShape(context, saved_mean_index,
-                                &saved_mean_tensor,
-                                scale_tensor.shape(),
-                                mkl_shape_saved_mean);
-      CHECK_NOTNULL(saved_mean_tensor);
-
-      Tensor* saved_variance_tensor = nullptr;
-      MklDnnShape mkl_shape_saved_variance;
-      mkl_shape_saved_variance.SetMklTensor(false);
-      AllocateOutputSetMklShape(context, saved_variance_index,
-                                &saved_variance_tensor,
-                                scale_tensor.shape(),
-                                mkl_shape_saved_variance);
-      CHECK_NOTNULL(saved_variance_tensor);
-
-      // set mean primitive
-      auto mean_desc = memory::desc({1, depth_},
-                                    MklDnnType<T>(),
-                                    memory::format::nc);
-      auto mean_pd = memory::primitive_desc(mean_desc, cpu_engine);
-      char* saved_mean_data_tf = reinterpret_cast<char*>
-                                 (saved_mean_tensor->flat<T>().data());
-      std::memcpy(saved_mean_data_tf,
-                  reinterpret_cast<char*>(mean_values_),
-                  depth_*sizeof(T));
-      auto mean_m = memory(mean_pd,
-                           reinterpret_cast<void*>(saved_mean_data_tf));
-
-      // set variance primitive
-      auto variance_desc = memory::desc({1, depth_},
-                                    MklDnnType<T>(),
-                                    memory::format::nc);
-      auto variance_pd = memory::primitive_desc(variance_desc, cpu_engine);
-      char* saved_variance_data_tf = reinterpret_cast<char*>
-                  (saved_variance_tensor->flat<T>().data());
-      std::memcpy(saved_variance_data_tf,
-                  reinterpret_cast<char*>(variance_values_),
-                  depth_*sizeof(T));
-      auto variance_m = memory(variance_pd, saved_variance_data_tf);
-
-      prop_kind pk = (is_training_) ?
-                     prop_kind::forward_training :
-                     prop_kind::forward_scoring;
-      auto bnrm_fwd_desc = batch_normalization_forward::desc(
-                               pk, src.GetUsrMemDesc(), epsilon_,
-                               is_training_ ? use_scale_shift :
-                               (use_scale_shift | use_global_stats));
-      auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc(
-                             bnrm_fwd_desc, cpu_engine);
-
-      // allocate dst tensor
-      MklDnnShape dnn_shape_dst;
-      TensorShape tf_shape_dst;
-      Tensor* dst_tensor = nullptr;
-      if (dnn_shape_src.IsMklTensor()) {
-        dnn_shape_dst.SetMklTensor(true);
-        auto dst_pd = bnrm_fwd_pd.dst_primitive_desc();
-        dnn_shape_dst.SetMklLayout(&dst_pd);
-        dnn_shape_dst.SetElemType(MklDnnType<T>());
-        dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(),
-                                  src_dims, format_m);
-        tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T));
-      } else {
-        dnn_shape_dst.SetMklTensor(false);
-        tf_shape_dst = src_tensor.shape();
-      }
-      AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
-                                tf_shape_dst, dnn_shape_dst);
-
-      // Output of batchnorm has same shape as input.
-      dst.SetUsrMem(src_md, dst_tensor);
-
-      primitive bnrm_fwd_op;
-      if (is_training_) {
-        bnrm_fwd_op = batch_normalization_forward(
-                          bnrm_fwd_pd,
-                          src.GetOpMem(),
-                          weights_m,
-                          dst.GetOpMem(),
-                          mean_m,
-                          variance_m);
-      } else {
-        bnrm_fwd_op = batch_normalization_forward(
-                          bnrm_fwd_pd,
-                          src.GetOpMem(),
-                          mean_m,
-                          variance_m,
-                          (const primitive::at) weights_m,
-                          dst.GetOpMem());
-      }
-      std::vector<primitive> net;
-      net.push_back(bnrm_fwd_op);
-      stream(stream::kind::eager).submit(net).wait();
-
-      // copy batch_mean data
-      T* batch_mean_data_tf = reinterpret_cast<T*>(
-                                batch_mean_tensor->flat<T>().data());
-      std::memcpy(reinterpret_cast<char*>(batch_mean_data_tf),
-                  reinterpret_cast<char*>(mean_m.get_data_handle()),
-                  depth_*sizeof(T));
-
-      // copy batch_variance data with Bessel's correction
-      // if training mode is on
-      float adjust_factor = 1.0;
-      if (is_training_) {
-        size_t orig_size = src_dims[0] * src_dims[2] * src_dims[3];
-        size_t adjust_size = orig_size - 1;
-        adjust_factor = (static_cast<float>(orig_size)) / adjust_size;
-      }
-      T* batch_variance_data_tf = reinterpret_cast<T*>(
-                                  batch_variance_tensor->flat<T>().data());
-      for (int k=0; k < depth_; k++)
-        batch_variance_data_tf[k] =
-            (reinterpret_cast<T*>(variance_m.get_data_handle()))[k]
-            * adjust_factor;
-    } catch (mkldnn::error &e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) +
-                         ", in file " + string(__FILE__) + ":" +
-                         std::to_string(__LINE__);
-      OP_REQUIRES_OK(context,
-                     errors::Aborted("Operation received an exception:",
-                     error_msg));
-    }
-  }
-
- private:
-  T epsilon_;
-  TensorFormat tensor_format_;
-  bool is_training_;
-  T* mean_values_;
-  T* variance_values_;
-  size_t depth_;          // batch normalization is done for per channel.
-
-  void ExtractParams(OpKernelContext* context) {
-    const Tensor& input = MklGetInput(context, 0);
-    depth_ = static_cast<int>(GetTensorDim(input, tensor_format_, 'C'));
-  }
-
-  void SetMeanVariance(const Tensor& mean, const Tensor& variance) {
-    mean_values_ = reinterpret_cast<T*>(
-                       const_cast<T*>(mean.flat<T>().data()));
-    variance_values_ = reinterpret_cast<T*>(
-                       const_cast<T*>(variance.flat<T>().data()));
-  }
-};
-
-
-template <typename Device, typename T>
-class MklFusedBatchNormGradOp : public OpKernel {
- public:
-  explicit MklFusedBatchNormGradOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    float epsilon;
-    OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon));
-    epsilon_ = T(epsilon);
-    string tensor_format;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format));
-    OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_),
-                errors::InvalidArgument("Invalid data format"));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
-
-      const size_t diff_dst_index = 0;  // index of diff_dst tensor
-      const size_t src_index = 1;       // index of src input tensor
-      const size_t scale_index = 2;     // index of scale tensor
-      const size_t mean_index = 3;      // index of saved_mean tensor
-      const size_t variance_index = 4;  // index of saved_variance tensor
-      const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
-      const Tensor& src_tensor = MklGetInput(context, src_index);
-      const Tensor& scale_tensor = MklGetInput(context, scale_index);
-      const Tensor& saved_mean_tensor = MklGetInput(context, mean_index);
-      const Tensor& saved_variance_tensor = MklGetInput(context,
-                                            variance_index);
-
-      MklDnnShape dnn_shape_src, dnn_shape_diff_dst;
-      GetMklShape(context, src_index, &dnn_shape_src);
-      GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
-
-      if (dnn_shape_diff_dst.IsMklTensor()) {
-        OP_REQUIRES(context, dnn_shape_diff_dst.GetDimension() == 4,
-                    errors::InvalidArgument(
-                        "input must be 4-dimensional",
-                        diff_dst_tensor.shape().DebugString()));
-      } else {
-        OP_REQUIRES(context, diff_dst_tensor.dims() == 4,
-                    errors::InvalidArgument(
-                        "input must be 4-dimensional",
-                        diff_dst_tensor.shape().DebugString()));
-      }
-
-      if (dnn_shape_src.IsMklTensor()) {
-        OP_REQUIRES(context, dnn_shape_src.GetDimension() == 4,
-                    errors::InvalidArgument(
-                        "input must be 4-dimensional",
-                         src_tensor.shape().DebugString()));
-      } else {
-        OP_REQUIRES(context, src_tensor.dims() == 4,
-                    errors::InvalidArgument(
-                        "input must be 4-dimensional",
-                        src_tensor.shape().DebugString()));
-      }
-
-      OP_REQUIRES(context, scale_tensor.dims() == 1,
-                  errors::InvalidArgument(
-                      "scale must be 1-dimensional",
-                      scale_tensor.shape().DebugString()));
-      OP_REQUIRES(context, saved_mean_tensor.dims() == 1,
-                  errors::InvalidArgument(
-                      "saved mean must be 1-dimensional",
-                       saved_mean_tensor.shape().DebugString()));
-
-      OP_REQUIRES(context, saved_variance_tensor.dims() == 1,
-                  errors::InvalidArgument(
-                      "saved variance must be 1-dimensional",
-                      saved_variance_tensor.shape().DebugString()));
-
-      if (dnn_shape_src.IsMklTensor())
-        depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C);
-      else
-        ExtractParams(context);
-
-      memory::format format_m;
-      if (dnn_shape_src.IsMklTensor()) {
-        if (dnn_shape_src.IsTensorInNCHWFormat())
-          format_m = memory::format::nchw;
-        else
-          format_m = memory::format::nhwc;
-      } else {
-        format_m = TFDataFormatToMklDnnDataFormat(tensor_format_);
-      }
-
-      MklDnnData<T> src(&cpu_engine);
-      MklDnnData<T> mean(&cpu_engine);
-      MklDnnData<T> variance(&cpu_engine);
-      MklDnnData<T> diff_dst(&cpu_engine);
-      MklDnnData<T> diff_src(&cpu_engine);
-
-      memory::dims src_dims, diff_dst_dims;
-      if (dnn_shape_src.IsMklTensor())
-        src_dims = TFShapeToMklDnnDimsInNCHW(
-                       dnn_shape_src.GetTfShape(), tensor_format_);
-      else
-        src_dims = TFShapeToMklDnnDimsInNCHW(
-                       src_tensor.shape(), tensor_format_);
-
-      if (dnn_shape_diff_dst.IsMklTensor())
-        diff_dst_dims = TFShapeToMklDnnDimsInNCHW(
-                            dnn_shape_diff_dst.GetTfShape(),
-                            tensor_format_);
-      else
-        diff_dst_dims = TFShapeToMklDnnDimsInNCHW(
-                            diff_dst_tensor.shape(),
-                            tensor_format_);
-
-      // set src and diff_dst primitives
-      memory::desc src_md({}, memory::data_undef, memory::format_undef);
-      memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
-      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
-        if (dnn_shape_src.IsMklTensor()) {
-          src_md = dnn_shape_src.GetMklLayout();
-          diff_dst_md = src_md;
-        } else {
-          diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
-          src_md = diff_dst_md;
-        }
-      } else {
-        src_md =  memory::desc(src_dims, MklDnnType<T>(), format_m);
-        diff_dst_md = src_md;
-      }
-      src.SetUsrMem(src_md, &src_tensor);
-      diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
-
-      // weights -- DNN packs scales/shifts as weights in order of
-      // scale, ..., scale, shift, ..., shift
-      auto weights_desc = memory::desc({2, depth_},
-                                       MklDnnType<T>(),
-                                       memory::format::nc);
-      auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine);
-      auto weights_m = memory(weights_pd);
-      T* weights_data = reinterpret_cast<T*>(weights_m.get_data_handle());
-      T* scale_tf = reinterpret_cast<T*>(const_cast<T*>
-                                        (scale_tensor.flat<T>().data()));
-      for (int k=0; k < depth_; k++) {
-        weights_data[k] = scale_tf[k];
-        weights_data[k + depth_] = 0;
-      }
-
-      // set mean primitive
-      memory::dims mv_dims = GetMeanVarianceDims();
-      mean.SetUsrMem(mv_dims,
-                     memory::format::nc,
-                     const_cast<void*>(static_cast<const void*>
-                     (saved_mean_tensor.flat<T>().data())));
-      mean.SetOpMemDesc(mv_dims, memory::format::nc);
-
-      // set variance primitive
-      variance.SetUsrMem(mv_dims,  memory::format::nc,
-                         const_cast<void*>(static_cast<const void*>
-                         (saved_variance_tensor.flat<T>().data())));
-      variance.SetOpMemDesc(mv_dims, memory::format::nc);
-
-      // set diff_weight primitive
-      auto diff_weights_desc = memory::desc(
-                                 {2, depth_},
-                                 MklDnnType<T>(),
-                                 memory::format::nc);
-      auto diff_weights_pd = memory::primitive_desc(
-                                diff_weights_desc,
-                                cpu_engine);
-      auto diff_weights_m = memory(diff_weights_pd);
-
-      auto bnrm_fwd_desc = batch_normalization_forward::desc(
-                                prop_kind::forward_training,
-                                src.GetUsrMemDesc(),
-                                epsilon_,
-                                use_scale_shift);
-      auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc(
-                                bnrm_fwd_desc,
-                                cpu_engine);
-
-      // Indices of output tensors
-      const size_t diff_src_index = 0;    // index of diff_src tensor
-      const size_t diff_scale_index = 1;  // index of diff_scale tensor
-      const size_t diff_shift_index = 2;  // index of diff_shift tensor
-      const size_t p1_index = 3;  // index of 1st placeholder tensor
-      const size_t p2_index = 4;  // index of 2nd placeholder tensor
-
-      // allocate diff_src tensor
-      MklDnnShape dnn_shape_diff_src;
-      TensorShape tf_shape_diff_src;
-      Tensor* diff_src_tensor = nullptr;
-      if (dnn_shape_src.IsMklTensor()) {
-        dnn_shape_diff_src.SetMklTensor(true);
-        auto diff_src_pd = bnrm_fwd_pd.dst_primitive_desc();
-        dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
-        dnn_shape_diff_src.SetElemType(MklDnnType<T>());
-        dnn_shape_diff_src.SetTfLayout(
-                              dnn_shape_src.GetDimension(),
-                              src_dims,
-                              format_m);
-        dnn_shape_diff_src.SetTfDimOrder(
-                              dnn_shape_src.GetDimension(),
-                              tensor_format_);
-        tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T));
-      } else {
-        dnn_shape_diff_src.SetMklTensor(false);
-        tf_shape_diff_src = src_tensor.shape();
-      }
-      AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
-                                tf_shape_diff_src, dnn_shape_diff_src);
-
-      diff_src.SetUsrMem(src_md, diff_src_tensor);
-
-      prop_kind pk = prop_kind::backward;
-      auto bnrm_bwd_desc = batch_normalization_backward::desc(
-                               pk,
-                               diff_src.GetUsrMemDesc(),
-                               src.GetUsrMemDesc(),
-                               epsilon_,
-                               use_scale_shift);
-      auto bnrm_bwd_pd = batch_normalization_backward::primitive_desc(
-                               bnrm_bwd_desc,
-                               cpu_engine,
-                               bnrm_fwd_pd);
-
-      auto bnrm_bwd_op = batch_normalization_backward(
-                               bnrm_bwd_pd,
-                               src.GetOpMem(),
-                               mean.GetOpMem(),
-                               variance.GetOpMem(),
-                               diff_dst.GetOpMem(),
-                               weights_m,
-                               diff_src.GetOpMem(),
-                               diff_weights_m);
-
-      std::vector<primitive> net;
-      net.push_back(bnrm_bwd_op);
-      stream(stream::kind::eager).submit(net).wait();
-
-      // separate out scale and shift grad and copy to individual tensors
-      const TensorShape& tf_shape_scale_shift = scale_tensor.shape();
-      Tensor* diff_scale_tensor = nullptr;
-      MklDnnShape mkl_shape_diff_scale;
-      mkl_shape_diff_scale.SetMklTensor(false);
-      AllocateOutputSetMklShape(context, diff_scale_index, &diff_scale_tensor,
-                                tf_shape_scale_shift, mkl_shape_diff_scale);
-
-      Tensor* diff_shift_tensor = nullptr;
-      MklDnnShape mkl_shape_diff_shift;
-      mkl_shape_diff_shift.SetMklTensor(false);
-      AllocateOutputSetMklShape(context, diff_shift_index, &diff_shift_tensor,
-                                tf_shape_scale_shift, mkl_shape_diff_shift);
-
-      // copy data: diff_scale and diff_shift
-      T* diff_weights_data_dnn = reinterpret_cast<T*>
-                                 (diff_weights_m.get_data_handle());
-      float* diff_scale_data_tf = const_cast<float*>(
-             static_cast<const float*>(diff_scale_tensor->flat<T>().data()));
-      float* diff_shift_data_tf = const_cast<float*>(
-             static_cast<const float*>(diff_shift_tensor->flat<T>().data()));
-      for (int i = 0; i < depth_; i++) {
-        diff_scale_data_tf[i] = diff_weights_data_dnn[i];
-        diff_shift_data_tf[i] = diff_weights_data_dnn[i + depth_];
-      }
-
-      // Placeholders for estimated_mean and estimated_variance, which are
-      // used for inference and thus not needed here for gradient computation.
-      Tensor* p1_tensor = nullptr, *p2_tensor = nullptr;
-      MklDnnShape mkl_shape_p;
-      mkl_shape_p.SetMklTensor(false);
-      AllocateOutputSetMklShape(context, p1_index, &p1_tensor,
-                                TensorShape({}), mkl_shape_p);
-      AllocateOutputSetMklShape(context, p2_index, &p2_tensor,
-                                TensorShape({}), mkl_shape_p);
-    } catch (mkldnn::error &e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                          ", message: " + string(e.message) +
-                          ", in file " + string(__FILE__) + ":" +
-                          std::to_string(__LINE__);
-      OP_REQUIRES_OK(context,
-                     errors::Aborted("Operation received an exception:",
-                     error_msg));
-    }
-  }
-
- private:
-  T epsilon_;
-  TensorFormat tensor_format_;
-  int depth_;             // batch normalization is done for per channel.
-
-  void ExtractParams(OpKernelContext* context) {
-      const Tensor& input = MklGetInput(context, 0);
-      depth_ = static_cast<int>(GetTensorDim(input, tensor_format_, 'C'));
-  }
-
-  memory::dims GetMeanVarianceDims() {
-    return memory::dims({1, depth_});
-  }
-};
-
-#endif
-
-#define REGISTER_MKL_CPU(T)                                         \
-  REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm")                \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklFusedBatchNormOp<CPUDevice, T>);
-TF_CALL_float(REGISTER_MKL_CPU);
-#undef REGISTER_MKL_CPU
 
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNormGrad")            \
diff --git a/tensorflow/core/kernels/mkl_identity_op.cc b/tensorflow/core/kernels/mkl_identity_op.cc
index 9ee27ee21c..f31e7afd46 100644
--- a/tensorflow/core/kernels/mkl_identity_op.cc
+++ b/tensorflow/core/kernels/mkl_identity_op.cc
@@ -28,15 +28,8 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-#ifdef INTEL_MKL_DNN
-#include "mkldnn.hpp"
-#endif
-
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
-
-#ifndef INTEL_MKL_DNN
-
 template <typename Device, typename T>
 class MklIdentityOp : public OpKernel {
  public:
@@ -57,32 +50,6 @@ class MklIdentityOp : public OpKernel {
   bool IsExpensive() override { return false; }
 };
 
-#else
-
-template <typename Device, typename T>
-class MklIdentityOp : public OpKernel {
- public:
-  explicit MklIdentityOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    MklDnnShape dnn_shape_input;
-    const int kInputIdx = 0, kOutputIdx = 0;
-    GetMklShape(context, kInputIdx, &dnn_shape_input);
-
-    if (dnn_shape_input.IsMklTensor()) {
-      ForwardMklTensorInToOut(context, kInputIdx, kOutputIdx);
-    } else {
-      ForwardTfTensorInToOut(context, kInputIdx, kOutputIdx);
-    }
-  }
-
-  // TensorFlow's IdentityOp has the following member function, so kept it
-  // as it is.
-  bool IsExpensive() override { return false; }
-};
-
-#endif
-
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklIdentity")                      \
                               .Device(DEVICE_CPU)                   \
diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc
index 001834b13b..b58e44e398 100644
--- a/tensorflow/core/kernels/mkl_input_conversion_op.cc
+++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc
@@ -31,12 +31,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/mkl_tfconv_op.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-#ifdef INTEL_MKL_DNN
-#include "mkldnn.hpp"
-
-using mkldnn::stream;
-#endif
-
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
@@ -50,16 +44,15 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 // else if both inputs are in mkl format:
 //   if both have the same shape:
 //     pass the inputs through to the output
-//   else:
-//     convert both to TF
+// 	else:
+// 		convert both to TF
 // else if one is TF and one is MKL:
-//   if broadcast is needed:
-//     convert the MKL format input to TF format
-//   else:
-//     convert the TF format input to MKL format
+// 	if broadcast is needed:
+// 		convert the MKL format input to TF format
+// 	else:
+// 		convert the TF format input to MKL format
 ///////////////////////////////////////////////////////////
 
-#ifndef INTEL_MKL_DNN
 template <typename Device, typename T>
 class MklInputConversionOp : public OpKernel {
  public:
@@ -249,199 +242,6 @@ class MklInputConversionOp : public OpKernel {
   bool has_avx512f_ = false;
 };
 
-#else
-
-template <typename Device, typename T>
-class MklInputConversionOp : public OpKernel {
- public:
-  explicit MklInputConversionOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
-    OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type));
-    has_avx512f_ = port::TestCPUFeature(port::CPUFeature::AVX512F);
-  }
-
- private:
-  void Compute(OpKernelContext* context) override {
-    const Tensor& input_tensor_0 = MklGetInput(context, 0);
-    MklDnnShape input_shape_0;
-    GetMklShape(context, 0, &input_shape_0);
-
-    const Tensor& input_tensor_1 = MklGetInput(context, 1);
-    MklDnnShape input_shape_1;
-    GetMklShape(context, 1, &input_shape_1);
-
-    bool tf_shapes_are_same = context->input(0).shape() ==
-                              context->input(1).shape();
-
-    VLOG(1) << "MklInputConversionOp: Input shapes are "
-            << (tf_shapes_are_same ? "*same*" : "*different*") << ": "
-            << context->input(0).shape().DebugString() << " and "
-            << context->input(1).shape().DebugString();
-
-    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-    // if both inputs are in TF format, just copy input tensors to output.
-    if (!input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
-      VLOG(1) << "MklInputConversionOp: No conversion needed, "
-              << "copying TF inputs to output";
-
-      ForwardTfTensorInToOut(context, 0, 0);
-      ForwardTfTensorInToOut(context, 1, 1);
-      return;
-    }
-
-    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-    // If both inputs are in MKL format
-    if (input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
-      // If both have the same shape, pass them through
-      if (tf_shapes_are_same) {
-        VLOG(1) << "MklInputConversionOp: No conversion needed, "
-                << "copying MKL inputs with identical shapes to output";
-
-        ForwardMklTensorInToOut(context, 0, 0);
-        ForwardMklTensorInToOut(context, 1, 1);
-        return;
-      }
-
-      // Sanity check
-      bool mkl_shapes_are_same = input_shape_0 == input_shape_1;
-      if (mkl_shapes_are_same) {
-        CHECK(false) << "MklInputConversionOp: Unexpected: TF shapes are "
-                        "different but MKL shapes are same";
-      }
-
-      // Both have different shapes, so broadcast will be necessary.
-      // Convert to TF and pass both tensors through (we can't do broadcast
-      // with MKL tensors)
-      VLOG(1) << "MklInputConversionOp: Broadcast needed, "
-              << "converted MKL inputs to TF format";
-
-      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
-                                           op_data_type, has_avx512f_, 0);
-      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
-                                           op_data_type, has_avx512f_, 1);
-      SetDummyMklShapeOutput(context, 0);
-      SetDummyMklShapeOutput(context, 1);
-      return;
-    }
-
-    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-    // One input is MKL and one is TF. If no broadcast is needed, convert
-    // the TF tensor to MKL, otherwise convert the MKL tensor to TF format
-    VLOG(1) << "MklInputConversionOp: Inputs in different formats (MKL/TF)";
-
-    const Tensor* mkl_tensor;
-    const MklDnnShape* mkl_shape;
-    const Tensor* tf_tensor;
-    MklDnnShape* tf_mkl_shape;
-    uint mkl_tensor_index;
-    uint tf_tensor_index;
-    if (input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
-      mkl_tensor = &input_tensor_0;
-      mkl_shape = &input_shape_0;
-      mkl_tensor_index = 0;
-      tf_tensor = &input_tensor_1;
-      tf_mkl_shape = &input_shape_1;
-      tf_tensor_index = 1;
-    } else if (!input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
-      mkl_tensor = &input_tensor_1;
-      mkl_shape = &input_shape_1;
-      mkl_tensor_index = 1;
-      tf_tensor = &input_tensor_0;
-      tf_mkl_shape = &input_shape_0;
-      tf_tensor_index = 0;
-    } else {
-      CHECK(false) << "MklInputConversionOp: Unexpected combination of input "
-                      "shapes for MKL "
-                   << "element-wise op";
-    }
-
-    // Broadcast is needed if the shapes are not the same
-    bool broadcast_needed;
-
-    size_t in0_size = 1;
-    for (size_t i = 0; i < mkl_shape->GetDimension(); ++i)
-      in0_size *= mkl_shape->TfDimSize(i);
-
-    size_t in1_size = 1;
-    for (size_t i = 0; i < tf_tensor->shape().dims(); ++i)
-      in1_size *= tf_tensor->shape().dim_size(i);
-
-    broadcast_needed = (in0_size != in1_size);
-
-    if (!broadcast_needed) {
-      // Both shapes are same, convert the TF input to MKL
-      VLOG(1) << "MklInputConversionOp: No broadcast needed.";
-      VLOG(1) << "MklInputConversionOp: Converting input " << tf_tensor_index
-              << " to MKL format";
-
-      // Create MklDnnShape for output Mkl tensor.
-      Tensor* tensor_out;
-      MklDnnShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(true);
-      mkl_output_mkl_shape.SetElemType(MklDnnType<T>());
-      mkl_output_mkl_shape.SetTfLayout(mkl_shape->GetDimension(),
-                                       mkl_shape->GetSizesAsMklDnnDims(),
-                                       mkl_shape->GetTfDataFormat());
-      // ** Temporarily borrow the layout from the MKL input **
-      auto output_mkl_md = mkl_shape->GetMklLayout();
-      mkl_output_mkl_shape.SetMklLayout(&output_mkl_md);
-
-      // Create output Mkl tensor
-      AllocateOutputSetMklShape(context, tf_tensor_index, &tensor_out,
-                                mkl_tensor->shape(), mkl_output_mkl_shape);
-
-      // Create MklDnnData object for input tensor. Input tensor is in
-      // Tensorflow layout.
-      auto cpu_engine = engine(engine::cpu, 0);
-      MklDnnData<T> tf_input(&cpu_engine);
-      auto input_tf_md = mkl_output_mkl_shape.GetTfLayout();
-      tf_input.SetUsrMem(input_tf_md, &tf_tensor);
-
-      // Create reorder between tensorflow layout and Mkl layout.
-      std::vector<primitive> net;
-      CHECK_EQ(tf_input.CheckReorderToOpMem(memory::primitive_desc(
-                                            output_mkl_md, cpu_engine),
-                                            tensor_out, &net),
-               true);
-      stream(stream::kind::eager).submit(net).wait();
-
-      // -- The tensor in MKL format passes through --
-      ForwardMklTensorInToOut(context, mkl_tensor_index, mkl_tensor_index);
-    } else {
-      // Broadcast is needed, so convert the MKL input to TF
-      VLOG(1) << "MklInputConversionOp: Broadcast needed.";
-      VLOG(1) << "MklInputConversionOp: Converting input " << mkl_tensor_index
-              << " to TF format";
-      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
-                                           op_data_type, has_avx512f_,
-                                           mkl_tensor_index);
-      SetDummyMklShapeOutput(context, mkl_tensor_index);
-
-      // The tensor in TF format passes through
-      ForwardTfTensorInToOut(context, tf_tensor_index, tf_tensor_index);
-    }
-
-    VLOG(1) << "MklInputConversionOp: Shapes (output): "
-            << context->mutable_output(0)->shape().DebugString() << " and "
-            << context->mutable_output(1)->shape().DebugString();
-
-    VLOG(1) << "MklInputConversion completed successfully.";
-  }
-
- private:
-  /// Data format of the operation
-  string data_format_str;
-
-  /// Data type of the operation
-  DataType op_data_type;
-
-  /// CPUIDInfo
-  bool has_avx512f_ = false;
-};
-
-#endif
-
 ///////////////////////////////////////////////////////////
 //               Register kernel
 ///////////////////////////////////////////////////////////
@@ -453,10 +253,7 @@ class MklInputConversionOp : public OpKernel {
                               .Label(mkl_op_registry::kMklOpLabel), \
                           MklInputConversionOp<CPUDevice, T>);
 
-// TODO(nhasabni): We cannot support all number types since MklDnn does
-// not support types.
-// TF_CALL_NUMBER_TYPES(REGISTER_CPU);
-TF_CALL_float(REGISTER_CPU);
+TF_CALL_NUMBER_TYPES(REGISTER_CPU);
 #undef REGISTER_CPU
 }  // namespace tensorflow
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc
index 227765e46d..aa08e93924 100644
--- a/tensorflow/core/kernels/mkl_lrn_op.cc
+++ b/tensorflow/core/kernels/mkl_lrn_op.cc
@@ -17,7 +17,7 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc. This opkernel uses MKL library, create MKL
 // layout and primitives, use MKL dnn primitives to compute local
 // response normalization
-#undef INTEL_MKL
+
 #ifdef INTEL_MKL
 
 #define EIGEN_USE_THREADS
diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc
index de4d7d2e72..846bb5710d 100644
--- a/tensorflow/core/kernels/mkl_maxpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc
@@ -16,32 +16,17 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc.
 #ifdef INTEL_MKL
 #define EIGEN_USE_THREADS
+
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 
-#ifdef INTEL_MKL_DNN
-#include <algorithm>
-#include "mkldnn.hpp"
-using mkldnn::memory;
-using mkldnn::error;
-using mkldnn::pooling_forward;
-using mkldnn::pooling_backward;
-using mkldnn::padding_kind;
-using mkldnn::engine;
-using mkldnn::prop_kind;
-using mkldnn::algorithm;
-#endif
-
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
-// For now, MKL-ML is default. So making MKL-DNN not a default choice.
-#ifndef INTEL_MKL_DNN
-
 // An implementation of MaxPooling (forward).
 template <typename Device, typename T>
 class MklMaxPoolingOp : public OpKernel {
@@ -490,348 +475,8 @@ class MklMaxPoolingGradOp : public OpKernel {
   TensorFormat data_format_;
 
   bool workspace_enabled_;
-};  // MklMaxPoolingGradOp
-
-#else  // INTEL_MKL_DNN is defined
-
-// An implementation of MaxPooling (forward).
-template <typename Device, typename T>
-class MklMaxPoolingOp : public MklPoolingForwardOpBase<T> {
- public:
-  explicit MklMaxPoolingOp(OpKernelConstruction* context)
-            : MklPoolingForwardOpBase<T>(context) {
-    // In Max Pooling, MKLDNN does not allow passing workspace as NULL.
-    // So we set workspace_enabled_ to true.
-    this->workspace_enabled_ = true;
-  }
-
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
-      const Tensor& input_tensor = MklGetInput(context,
-                this->kInputTensorIndexInput);
-      MklDnnShape dnn_shape_input;
-      GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input);
-      this->SanityCheckInput(context, input_tensor, dnn_shape_input);
-      if (!context->status().ok()) return;
-
-      MklDnnData<T> dnn_data_input(&cpu_engine);
-      MklDnnData<T> dnn_data_output(&cpu_engine);
-      MklDnnData<T> dnn_data_wksp(&cpu_engine);
-
-      // initialize variables for the pooling op
-      MklPoolParameters pool_params;
-      // Get the input tensor and initialize the pooling parameters
-      this->ConfigureInput(context, dnn_shape_input,
-                        input_tensor, &pool_params,
-                        &dnn_data_input);
-      OP_REQUIRES_OK(context, context->status());
-
-      // Declare output tensor
-      Tensor* output_tensor = nullptr;
-      memory::dims output_dims_mkl_order;
-      this->GetOutputDims(pool_params, &output_dims_mkl_order);
-
-      // If input is in Mkl layout, then just get the memory format from it
-      // directly, instead of using input data_format to MaxPool.
-      if (dnn_shape_input.IsMklTensor()) {
-        dnn_data_output.SetUsrMem(output_dims_mkl_order,
-                                  static_cast<memory::format>(
-              dnn_data_input.GetUsrMemDesc().data.format));
-      } else {
-        dnn_data_output.SetUsrMem(output_dims_mkl_order,
-                                  this->data_format_mkldnn_);
-      }
-
-      // describe the memory layout; let mkl-dnn choose the best for the op
-      dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
-
-      auto pool_desc = pooling_forward::desc(prop_kind::forward,
-            algorithm::pooling_max,
-            dnn_data_input.GetUsrMemDesc(),
-            dnn_data_output.GetUsrMemDesc(),
-            memory::dims({  pool_params.row_stride,
-                            pool_params.col_stride}),
-            memory::dims({  pool_params.window_rows,
-                            pool_params.window_cols}),
-            memory::dims({  static_cast<int>(pool_params.pad_top),
-                            static_cast<int>(pool_params.pad_left)}),
-            memory::dims({  static_cast<int>(pool_params.pad_bottom),
-                            static_cast<int>(pool_params.pad_right)}),
-            TFPaddingToMklDnnPadding(this->padding_));
-        auto pool_fwd_desc = pooling_forward::primitive_desc(pool_desc,
-            cpu_engine);
-
-      this->AllocateOutputTensor(context, pool_fwd_desc, output_dims_mkl_order,
-                            this->data_format_mkldnn_, &output_tensor);
-      OP_REQUIRES_OK(context, context->status());
-      dnn_data_output.SetUsrMemDataHandle(output_tensor);
-
-      AllocateWorkspaceTensor(context, pool_fwd_desc, &dnn_data_wksp);
-      OP_REQUIRES_OK(context, context->status());
-
-      this->PrepareAndExecuteNet(pool_fwd_desc, &dnn_data_input,
-                        &dnn_data_output, &dnn_data_wksp);
-    } catch (mkldnn::error &e) {
-        string error_msg = "Status: " + std::to_string(e.status) +
-                        ", message: " + string(e.message) +
-                        ", in file " + string(__FILE__) + ":" +
-                        std::to_string(__LINE__);
-        OP_REQUIRES_OK(context,
-                        errors::Aborted("Compute received an exception:",
-                                         error_msg));
-    }
-  }  // Compute
-
- private:
-    const int kOutputTensorIndexWorkspace = 1;
-
-    void AllocateWorkspaceTensor(OpKernelContext* context,
-                const pooling_forward::primitive_desc& pool_fwd_prim_desc,
-                MklDnnData<T>* dnn_data_wksp) {
-        CHECK_NOTNULL(dnn_data_wksp);
-        Tensor* workspace_tensor = nullptr;
-        memory::primitive_desc workspace_pd
-                    = pool_fwd_prim_desc.workspace_primitive_desc();
-        size_t workspace_t_elems = this->GetNumTElements(workspace_pd);
-        MklDnnShape workspace_mkl_shape;
-        workspace_mkl_shape.SetMklTensor(false);
-        TensorShape workspace_tf_shape;
-        workspace_tf_shape.AddDim(workspace_t_elems);
-        AllocateOutputSetMklShape(context, kOutputTensorIndexWorkspace,
-                                &workspace_tensor,
-                                workspace_tf_shape, workspace_mkl_shape);
-        CHECK_NOTNULL(workspace_tensor);
-        dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor);
-    }
 };
 
-// The operation to compute MaxPool gradients.
-// It takes three inputs:
-//   - The original input tensor
-//   - The original output tensor
-//   - Backprop tensor for output
-// It produces one output: backprop tensor for input.
-template <class Device, class T>
-class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase<T> {
- public:
-  explicit MklMaxPoolingGradOp(OpKernelConstruction* context)
-      : MklPoolingBackwardOpBase<T>(context) {
-  }
-
-  void Compute(OpKernelContext* context) override {
-    try {
-        auto cpu_engine = engine(engine::cpu, 0);
-        const Tensor& orig_input_tensor = MklGetInput(context,
-            kInputTensorIndexOrigInput);
-        const Tensor& orig_output_tensor = MklGetInput(context,
-            kInputTensorIndexOrigOutput);
-        const Tensor& grad_tensor = MklGetInput(context,
-            kInputTensorIndexGradient);
-        const Tensor& workspace_tensor = MklGetInput(context,
-            kInputTensorIndexWorkspace);
-        MklDnnShape orig_input_mkl_shape,
-                    orig_output_mkl_shape,
-                    grad_mkl_shape,
-                    workspace_mkl_shape;
-        GetMklShape(context, kInputTensorIndexOrigInput,
-            &orig_input_mkl_shape);
-        GetMklShape(context, kInputTensorIndexOrigOutput,
-            &orig_output_mkl_shape);
-        GetMklShape(context, kInputTensorIndexGradient,
-            &grad_mkl_shape);
-        GetMklShape(context, kInputTensorIndexWorkspace,
-            &workspace_mkl_shape);
-
-        SanityCheckInputs(context,
-                            orig_input_tensor, orig_output_tensor,
-                            grad_tensor, workspace_tensor,
-                            orig_input_mkl_shape, orig_output_mkl_shape,
-                            grad_mkl_shape, workspace_mkl_shape);
-        if (!context->status().ok()) return;
-
-        MklDnnData<T> grad_dnn_data(&cpu_engine);
-        MklDnnData<T> workspace_dnn_data(&cpu_engine);
-        MklDnnData<T> output_dnn_data(&cpu_engine);
-        Tensor* output_tensor = nullptr;
-        MklPoolParameters pool_params;
-        TensorShape orig_input_shape;
-        memory::dims output_dims_mkl_order, orig_input_dims_mkl_order;
-        memory::desc original_input_md = ConfigureOriginalInput(context,
-                                orig_input_tensor,
-                                orig_input_mkl_shape,
-                                &orig_input_dims_mkl_order,
-                                &pool_params,
-                                &orig_input_shape);
-
-        memory::desc original_output_md = this->ConfigureOriginalOutput(
-                                pool_params,
-                                orig_output_mkl_shape,
-                                output_dims_mkl_order);
-
-        memory::desc target_diff_dst_md =  this->ConfigureInputGradient(
-                                        grad_mkl_shape,
-                                        grad_tensor,
-                                        &grad_dnn_data,
-                                        original_output_md);
-
-        output_dnn_data.SetUsrMem(original_input_md);
-
-        // Create the forward pooling primitive descriptor so we can
-        // pass it as a hint to the backward pooling primitive descriptor
-        auto pool_fwd_desc = pooling_forward::desc(prop_kind::forward,
-                algorithm::pooling_max,
-                original_input_md,
-                original_output_md,
-                memory::dims({  pool_params.row_stride,
-                                pool_params.col_stride}),
-                memory::dims({  pool_params.window_rows,
-                                pool_params.window_cols}),
-                memory::dims({  static_cast<int>(pool_params.pad_top),
-                                static_cast<int>(pool_params.pad_left)}),
-                memory::dims({  static_cast<int>(pool_params.pad_bottom),
-                                static_cast<int>(pool_params.pad_right)}),
-                TFPaddingToMklDnnPadding(this->padding_));
-        auto pool_fwd_prim_desc
-                = pooling_forward::primitive_desc(pool_fwd_desc,
-                                                    cpu_engine);
-
-        auto pool_bkwd_desc = pooling_backward::desc(
-                algorithm::pooling_max,
-                output_dnn_data.GetUsrMemDesc(),
-                target_diff_dst_md,
-                memory::dims({  pool_params.row_stride,
-                                pool_params.col_stride}),
-                memory::dims({  pool_params.window_rows,
-                                pool_params.window_cols}),
-                memory::dims({  static_cast<int>(pool_params.pad_top),
-                                static_cast<int>(pool_params.pad_left)}),
-                memory::dims({  static_cast<int>(pool_params.pad_bottom),
-                                static_cast<int>(pool_params.pad_right)}),
-                TFPaddingToMklDnnPadding(this->padding_));
-        auto pool_bkwd_prim_desc
-            = pooling_backward::primitive_desc(pool_bkwd_desc,
-                                                cpu_engine,
-                                                pool_fwd_prim_desc);
-
-        this->AllocateOutputTensor(context, pool_bkwd_prim_desc,
-            orig_input_dims_mkl_order,
-            this->data_format_mkldnn_,
-            &output_tensor);
-        output_dnn_data.SetUsrMemDataHandle(output_tensor);
-
-        ConfigureWorkspace(workspace_tensor,
-                pool_fwd_prim_desc.workspace_primitive_desc(),
-                &workspace_dnn_data);
-        this->PrepareAndExecuteNet(pool_bkwd_prim_desc,
-                            &grad_dnn_data,
-                            &output_dnn_data,
-                            memory::primitive_desc(
-                                target_diff_dst_md,
-                                cpu_engine),
-                            &workspace_dnn_data);
-    } catch (mkldnn::error &e) {
-        string error_msg = "Status: " + std::to_string(e.status) +
-                        ", message: " + string(e.message) +
-                        ", in file " + string(__FILE__) + ":" +
-                        std::to_string(__LINE__);
-        OP_REQUIRES_OK(context,
-                        errors::Aborted("Compute received an exception:",
-                                         error_msg));
-    }
-  }  // Compute
-
- private:
-    // .Input("orig_input: T")
-    // .Input("orig_output: T")
-    // .Input("grad: T")
-    // .Input("workspace: T")
-    const int kInputTensorIndexOrigInput = 0;
-    const int kInputTensorIndexOrigOutput = 1;
-    const int kInputTensorIndexGradient = 2;
-    const int kInputTensorIndexWorkspace = 3;
-    //  Output("output: T") in Base Class
-
-    memory::desc ConfigureOriginalInput(OpKernelContext* context,
-                                const Tensor& tensor_original_input,
-                                const MklDnnShape& original_input_mkl_shape,
-                                memory::dims* original_input_dims_mkl_order,
-                                MklPoolParameters* pool_params,
-                                TensorShape* input_tensor_shape) {
-        *input_tensor_shape = tensor_original_input.shape();
-        return MklPoolingBackwardOpBase<T>::ConfigureOriginalInput(
-                                        context,
-                                        tensor_original_input,
-                                        original_input_mkl_shape,
-                                        original_input_dims_mkl_order,
-                                        pool_params,
-                                        *input_tensor_shape);
-    }
-
-    void ConfigureWorkspace(const Tensor& workspace_tensor,
-                        memory::primitive_desc workspace_pd,
-                        MklDnnData<T> *workspace_dnn_data) {
-        CHECK_NOTNULL(workspace_dnn_data);
-
-        workspace_dnn_data->SetUsrMem(workspace_pd, &workspace_tensor);
-    }
-
-    void SanityCheckInputs(OpKernelContext* context,
-                            const Tensor& orig_input_tensor,
-                            const Tensor& orig_output_tensor,
-                            const Tensor& grad_tensor,
-                            const Tensor& workspace_tensor,
-                            const MklDnnShape& orig_input_mkl_shape,
-                            const MklDnnShape& orig_output_mkl_shape,
-                            const MklDnnShape& grad_mkl_shape,
-                            const MklDnnShape& workspace_mkl_shape) {
-        if (!orig_input_mkl_shape.IsMklTensor()) {
-            OP_REQUIRES(context, orig_input_tensor.dims() == 4,
-                errors::InvalidArgument("Original input shape must be "
-                "4-dimensional"));
-        } else {
-            OP_REQUIRES(context, orig_input_mkl_shape.GetDimension() == 4,
-                    errors::InvalidArgument("Original input shape must be "
-                    "4-dimensional"));
-        }
-        if (!orig_output_mkl_shape.IsMklTensor()) {
-            OP_REQUIRES(context, orig_output_tensor.dims() == 4,
-                errors::InvalidArgument("Original output must be "
-                        "4-dimensional"));
-        } else {
-            OP_REQUIRES(context, orig_output_mkl_shape.GetDimension() == 4,
-                    errors::InvalidArgument("Original output must be "
-                    "4-dimensional"));
-        }
-        if (!grad_mkl_shape.IsMklTensor()) {
-            OP_REQUIRES(context, grad_tensor.dims() == 4,
-                errors::InvalidArgument("Gradient must be 4-dimensional"));
-        } else {
-            OP_REQUIRES(context, grad_mkl_shape.GetDimension() == 4,
-                    errors::InvalidArgument("Gradient must be "
-                    "4-dimensional"));
-        }
-        if (this->workspace_enabled_){
-            // The workspace should not be an MKL tensor
-            OP_REQUIRES(context, workspace_mkl_shape.IsMklTensor() == false,
-                    errors::InvalidArgument("Workspace tensor should not"
-                                            " be an MKL Tensor."));
-            // It should only have one dimension
-            OP_REQUIRES(context, workspace_tensor.dims() == 1,
-                    errors::InvalidArgument("Workspace tensor must be "
-                                "1-dimensional"));
-        } else {
-            OP_REQUIRES(context, this->workspace_enabled_,
-                    errors::Unimplemented("MKL-DNN Max Pooling does not "
-                                "yet support the use case "
-                                "where MaxPoolGrad is called without first"
-                                " calling MaxPool."));
-        }
-    }
-};  // MklMaxPoolingGradOp
-
-#endif  // INTEL_MKL_DNN
-
 REGISTER_KERNEL_BUILDER(Name("_MklMaxPool")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<float>("T")
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
index f7cadffd39..65e8852cfb 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
@@ -14,13 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #ifdef INTEL_MKL
-
 #include <vector>
-#include <limits>
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/kernels/bounds_check.h"
 
 namespace tensorflow {
 
@@ -42,7 +39,6 @@ void MklPoolParameters::Init(OpKernelContext* context,
   Init(context, ksize, stride, padding, data_format);
 }
 
-#ifndef INTEL_MKL_DNN
 // Initialization for MKL format
 void MklPoolParameters::Init(OpKernelContext* context,
                              const std::vector<int32>& ksize,
@@ -57,22 +53,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
 
   Init(context, ksize, stride, padding, data_format);
 }
-#else
-// Initialization for MKL format
-void MklPoolParameters::Init(OpKernelContext* context,
-                             const std::vector<int32>& ksize,
-                             const std::vector<int32>& stride, Padding padding,
-                             TensorFormat data_format,
-                             const MklDnnShape* mklInputShape) {
-  // Get the input sizes
-  depth = mklInputShape->GetDimension('C');
-  tensor_in_cols = mklInputShape->GetDimension('W');
-  tensor_in_rows = mklInputShape->GetDimension('H');
-  tensor_in_batch = mklInputShape->GetDimension('N');
 
-  Init(context, ksize, stride, padding, data_format);
-}
-#endif  // INTEL_MKL_DNN
 // Common Initialization for TensorFlow and MKL formats
 void MklPoolParameters::Init(OpKernelContext* context,
                              const std::vector<int32>& ksize,
@@ -99,7 +80,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
                   "MaxPooling supports exactly one of pooling across depth "
                   "or pooling across width/height."));
 
-  if (depth_window == 1) {  // we are pooling in the H and W
+  if (depth_window == 1) {
     OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
                                 tensor_in_rows, window_rows, row_stride,
                                 padding, &out_height, &pad_top, &pad_bottom));
@@ -107,21 +88,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
     OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
                                 tensor_in_cols, window_cols, col_stride,
                                 padding, &out_width, &pad_left, &pad_right));
-#ifdef INTEL_MKL_DNN
-    // TF can work with int64, but mkldnn only supports int32
-    // Fail if the height or width are greater than MAX_INT
-
-    OP_REQUIRES(context, FastBoundsCheck(out_height,
-                                         std::numeric_limits<int>::max()),
-                errors::InvalidArgument("output height is too large"));
-
-    OP_REQUIRES(context, FastBoundsCheck(out_width,
-                                         std::numeric_limits<int>::max()),
-                errors::InvalidArgument("output width is too large"));
-
-#endif
-    out_depth = depth;  // output will have the same depth as the input
-  } else {  // we are pooling in the depth dimension
+  } else {
     // Our current version of depthwise max pooling does not support
     // any padding, and expects the depth_window to equal the depth
     // stride (no overlapping).
@@ -142,6 +109,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
                 errors::Unimplemented("Depthwise max pooling is currently "
                                       "only implemented for CPU devices."));
 
+    pad_depth = 0;
     out_depth = depth / depth_window;
   }
 }
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h
index d33e91a15d..92ea2beb25 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.h
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h
@@ -18,18 +18,9 @@ limitations under the License.
 
 #ifdef INTEL_MKL
 #include <vector>
-#include <string>
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 
-#ifdef INTEL_MKL_DNN
-#include "mkldnn.hpp"
-using mkldnn::memory;
-using mkldnn::pooling_forward;
-using mkldnn::pooling_backward;
-using mkldnn::stream;
-#endif
-
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -60,28 +51,14 @@ struct MklPoolParameters {
   int pad_depth;
 
   TensorFormat data_format;
-  MklPoolParameters()
-    : depth(0)
-    , tensor_in_cols(0), tensor_in_rows(0), tensor_in_batch(0)
-    , window_rows(0), window_cols(0), depth_window(0)
-    , row_stride(0), col_stride(0), depth_stride(0)
-    , out_height(0), out_width(0), out_depth(0)
-    , pad_left(0), pad_right(0), pad_top(0), pad_bottom(0), pad_depth(0)
-    , data_format(TensorFormat::FORMAT_NCHW) {}
 
   // Updates context->status if there is an invalid input.
   void Init(OpKernelContext* context, const std::vector<int32>& ksize,
             const std::vector<int32>& stride, Padding padding,
             TensorFormat data_format, const TensorShape& tensor_in_shape);
-#ifndef INTEL_MKL_DNN
   void Init(OpKernelContext* context, const std::vector<int32>& ksize,
             const std::vector<int32>& stride, Padding padding,
             TensorFormat data_format, const MklShape* mkl_in_shape);
-#else
-  void Init(OpKernelContext* context, const std::vector<int32>& ksize,
-            const std::vector<int32>& stride, Padding padding,
-            TensorFormat data_format, const MklDnnShape* mkl_in_shape);
-#endif
 
  private:
   // Common initialization for TensorFlow and MKL formats
@@ -90,325 +67,6 @@ struct MklPoolParameters {
             TensorFormat data_format);
 };
 
-#ifdef INTEL_MKL_DNN
-
-template <class T>
-class MklPoolingOpBase : public OpKernel {
- public:
-  explicit MklPoolingOpBase(OpKernelConstruction* context)
-            : OpKernel(context)
-            , workspace_enabled_(false) {
-      string data_format;
-      OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
-      OP_REQUIRES(context,
-            FormatFromString(data_format, &this->data_format_tf_),
-            errors::InvalidArgument("Invalid data format"));
-      this->data_format_mkldnn_
-                = TFDataFormatToMklDnnDataFormat(this->data_format_tf_);
-      OP_REQUIRES_OK(context, context->GetAttr("ksize", &this->ksize_));
-      OP_REQUIRES(context, this->ksize_.size() == 4,
-                  errors::InvalidArgument("Sliding window ksize field must "
-                                          "specify 4 dimensions"));
-      OP_REQUIRES_OK(context, context->GetAttr("strides", &this->stride_));
-      OP_REQUIRES(context, this->stride_.size() == 4,
-                  errors::InvalidArgument("Sliding window strides field must "
-                                          "specify 4 dimensions"));
-      OP_REQUIRES_OK(context, context->GetAttr("padding", &this->padding_));
-      OP_REQUIRES(context, this->ksize_[0] == 1 && this->stride_[0] == 1,
-                  errors::Unimplemented("Pooling is not yet supported on the "
-                                        "batch dimension."));
-
-      // We may not get this attribute for this node if it does not go through
-      // graph rewrite pass. So we do not check for error while retrieving this
-      // attribute value.
-      context->GetAttr("workspace_enabled", &this->workspace_enabled_);
-    }
-  void Compute(OpKernelContext* context) override = 0;
-
- protected:
-  // Calculate output shape of pooling op in MKL-DNN and TensorFlow order.
-  // MKL-DNN uses NCHW for output order. But TensorFlow output will be in
-  // NHWC or NCHW format depending on data format. Function expects
-  // output height and output width to have already been int32
-  // bounds-checked
-  void GetOutputDims(const MklPoolParameters& mkl_pool_params,
-                    memory::dims* output_dims_mkl_order) {
-    // MKL-DNN always needs output in NCHW format.
-    *output_dims_mkl_order = { mkl_pool_params.tensor_in_batch,
-                              mkl_pool_params.out_depth,
-                              static_cast<int>(mkl_pool_params.out_height),
-                              static_cast<int>(mkl_pool_params.out_width)};
-  }
-
-  void InitMklPoolParameters(OpKernelContext* context,
-                      MklPoolParameters* pool_params,
-                      const MklDnnShape& original_input_mkl_shape,
-                      const TensorShape& input_tensor_shape) {
-    if (!original_input_mkl_shape.IsMklTensor()) {
-      pool_params->Init(context, this->ksize_, this->stride_, this->padding_,
-          this->data_format_tf_, input_tensor_shape);
-    } else {
-      pool_params->Init(context, this->ksize_, this->stride_, this->padding_,
-          this->data_format_tf_, &original_input_mkl_shape);
-    }
-  }
-
-  // Checks to make sure that the memory we need to allocate
-  // is a multiple of sizeof(T)
-  // returns the number of elements
-  size_t GetNumTElements(const memory::primitive_desc& pd) {
-    size_t num_bytes = pd.get_size();
-    size_t ret_val = num_bytes / sizeof(T);
-    if ( num_bytes % sizeof(T) != 0 ) {
-        ret_val++;
-    }
-    return ret_val;
-  }
-
-
-  std::vector<int32> ksize_;
-  std::vector<int32> stride_;
-  Padding padding_;
-  TensorFormat data_format_tf_;
-  memory::format data_format_mkldnn_;
-  bool workspace_enabled_;
-};
-
-template <class T>
-class MklPoolingForwardOpBase : public MklPoolingOpBase<T> {
- public:
-  explicit MklPoolingForwardOpBase<T>(OpKernelConstruction* context)
-      : MklPoolingOpBase<T>(context) {}
-  void Compute(OpKernelContext* context) override = 0;
-
- protected:
-  void ConfigureInput(OpKernelContext* context,
-                    const MklDnnShape& input_mkl_shape,
-                    const Tensor& input_tensor,
-                    MklPoolParameters* pool_params,
-                    MklDnnData<T>* dnn_data_input) {
-    CHECK_NOTNULL(pool_params);
-    CHECK_NOTNULL(dnn_data_input);
-    TensorShape input_tensor_shape = input_tensor.shape();
-    memory::desc input_md = input_mkl_shape.IsMklTensor()
-                        ? input_mkl_shape.GetMklLayout()
-                        : memory::desc(
-                              TFShapeToMklDnnDimsInNCHW(
-                                  input_tensor_shape, this->data_format_tf_),
-                              MklDnnType<T>(),
-                              this->data_format_mkldnn_);
-    dnn_data_input->SetUsrMem(input_md, &input_tensor);
-    this->InitMklPoolParameters(context, pool_params,
-                      input_mkl_shape, input_tensor_shape);
-  }
-
-  void AllocateOutputTensor(OpKernelContext* context,
-            const pooling_forward::primitive_desc& pool_fwd_prim_desc,
-            const memory::dims output_dims_mkl_order,
-            const memory::format& output_tf_format,
-            Tensor** output_tensor) {
-    CHECK_NOTNULL(output_tensor);
-    memory::primitive_desc dst_pd = pool_fwd_prim_desc.dst_primitive_desc();
-
-    MklDnnShape output_mkl_shape;
-    output_mkl_shape.SetMklTensor(true);
-    output_mkl_shape.SetMklLayout(&dst_pd);
-    output_mkl_shape.SetElemType(MklDnnType<T>());
-    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
-                              output_dims_mkl_order,
-                              output_tf_format);
-    TensorShape output_tf_shape;
-
-    // only allocate enough space for the elements we need.
-    output_tf_shape.AddDim(this->GetNumTElements(dst_pd));
-    AllocateOutputSetMklShape(context, kOutputTensorIndexOutput,
-                            output_tensor,
-                            output_tf_shape, output_mkl_shape);
-    CHECK_NOTNULL(*output_tensor);
-  }
-
-  void PrepareAndExecuteNet(
-                  const pooling_forward::primitive_desc& pool_fwd_desc,
-                  const MklDnnData<T>* src,
-                  MklDnnData<T>* dst,
-                  MklDnnData<T>* wksp = nullptr) {
-    std::vector<primitive> net;
-
-    // Create pooling primitive and add it to net
-    if (wksp != nullptr) {
-        net.push_back(pooling_forward(pool_fwd_desc,
-                        src->GetOpMem(),
-                        dst->GetOpMem(),
-                        wksp->GetOpMem()));
-    } else {
-        net.push_back(pooling_forward(pool_fwd_desc,
-            src->GetOpMem(),
-            dst->GetOpMem()));
-    }
-    stream(stream::kind::eager).submit(net).wait();
-  }
-
-
-  void SanityCheckInput(OpKernelContext* context,
-                  const Tensor& input_tensor,
-                  const MklDnnShape& input_mkl_shape) {
-    if (!input_mkl_shape.IsMklTensor()) {
-      OP_REQUIRES(context, input_tensor.dims() == 4,
-          errors::InvalidArgument("Input must be 4-dimensional"));
-    } else {
-        OP_REQUIRES(context, input_mkl_shape.GetDimension() == 4,
-                errors::InvalidArgument("Input shape must be "
-                "4-dimensional"));
-    }
-  }
-  // .Input("value: T")
-  // .Output("output: T")
-  const int kInputTensorIndexInput = 0;
-  const int kOutputTensorIndexOutput = 0;
-};  // MklPoolingForwardBaseOp
-
-
-template <class T>
-class MklPoolingBackwardOpBase : public MklPoolingOpBase<T> {
- public:
-  explicit MklPoolingBackwardOpBase<T>(OpKernelConstruction* context)
-          : MklPoolingOpBase<T>(context) { }
-  void Compute(OpKernelContext* context) override = 0;
-
- protected:
-  const int kOutputTensorIndexOutput = 0;
-
-  void AllocateOutputTensor(OpKernelContext* context,
-            const pooling_backward::primitive_desc& pool_bkwd_prim_desc,
-            const memory::dims output_dims_mkl_order,
-            const memory::format& output_tf_format,
-            Tensor** output_tensor) {
-    CHECK_NOTNULL(output_tensor);
-    memory::primitive_desc dst_pd
-                = pool_bkwd_prim_desc.diff_src_primitive_desc();
-    MklDnnShape output_mkl_shape;
-    output_mkl_shape.SetMklTensor(true);
-    output_mkl_shape.SetMklLayout(&dst_pd);
-    output_mkl_shape.SetElemType(MklDnnType<T>());
-    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
-                              output_dims_mkl_order,
-                              output_tf_format);
-
-    TensorShape output_tf_shape;
-    output_tf_shape.AddDim(this->GetNumTElements(dst_pd));
-    AllocateOutputSetMklShape(context, kOutputTensorIndexOutput,
-                            output_tensor,
-                            output_tf_shape, output_mkl_shape);
-    CHECK_NOTNULL(*output_tensor);
-  }
-
-  void PrepareAndExecuteNet(
-    const pooling_backward::primitive_desc& pool_bkwd_desc,
-    MklDnnData<T>* input_gradient_diff_dst,
-    MklDnnData<T>* output_diff_src,
-    const memory::primitive_desc& target_diff_dst_pd,
-    const MklDnnData<T>* workspace = nullptr) {
-
-    std::vector<primitive> net;
-
-    // If the input gradient isn't in the same format as the output
-    // reorder it to the same format as the output
-    input_gradient_diff_dst->CheckReorderToOpMem(
-            target_diff_dst_pd,
-            &net);
-
-    // Create pooling primitive and add it to net
-    if (nullptr == workspace) {
-      net.push_back(pooling_backward(pool_bkwd_desc,
-                              input_gradient_diff_dst->GetOpMem(),
-                              output_diff_src->GetOpMem()));
-    } else {
-      net.push_back(pooling_backward(pool_bkwd_desc,
-                                  input_gradient_diff_dst->GetOpMem(),
-                                  workspace->GetOpMem(),
-                                  output_diff_src->GetOpMem()));
-    }
-    stream(stream::kind::eager).submit(net).wait();
-  }
-
-  // Max Pooling and Avg Pooling have slightly different implementations
-  // Takes the Tensor containing original input data and the original
-  // mkl Dnn Shape and populates other data
-  memory::desc ConfigureOriginalInput(OpKernelContext* context,
-                              const Tensor& tensor_original_input_shape,
-                              const MklDnnShape& original_input_mkl_shape,
-                              memory::dims* original_input_dims_nchw,
-                              MklPoolParameters* pool_params,
-                              const TensorShape& input_tensor_shape) {
-    CHECK_NOTNULL(original_input_dims_nchw);
-    CHECK_NOTNULL(pool_params);
-    this->InitMklPoolParameters(context, pool_params,
-                          original_input_mkl_shape,
-                          input_tensor_shape);
-
-    *original_input_dims_nchw
-          = original_input_mkl_shape.IsMklTensor()
-          ? original_input_mkl_shape.GetSizesAsMklDnnDims()
-          : TFShapeToMklDnnDimsInNCHW(input_tensor_shape,
-        this->data_format_tf_);
-
-    return  original_input_mkl_shape.IsMklTensor()
-      ? original_input_mkl_shape.GetMklLayout()
-      : memory::desc(*original_input_dims_nchw,
-                      MklDnnType<T>(),
-                      this->data_format_mkldnn_);
-  }
-
-  memory::desc ConfigureOriginalOutput(const MklPoolParameters& pool_params,
-                                const MklDnnShape& original_output_mkl_shape,
-                                      memory::dims output_dims_mkl_order) {
-    this->GetOutputDims(pool_params, &output_dims_mkl_order);
-
-    return original_output_mkl_shape.IsMklTensor()
-            ? original_output_mkl_shape.GetMklLayout()
-            : memory::desc(output_dims_mkl_order,
-                         MklDnnType<T>(),
-                         this->data_format_mkldnn_);
-  }
-
-  memory::desc ConfigureInputGradient(
-        const MklDnnShape& input_gradient_mkl_shape,
-        const Tensor& input_gradient_tensor,
-        MklDnnData<T>* input_gradient_dnn_data,
-        const memory::desc& original_output_md) {
-    // Configure the gradient as is
-    memory::desc original_input_grad_md
-          = input_gradient_mkl_shape.IsMklTensor()
-          ? input_gradient_mkl_shape.GetMklLayout()
-          : memory::desc(TFShapeToMklDnnDimsInNCHW(
-                    input_gradient_tensor.shape(),
-                    this->data_format_tf_),
-                    MklDnnType<T>(), this->data_format_mkldnn_);
-
-    input_gradient_dnn_data->SetUsrMem(original_input_grad_md,
-                &input_gradient_tensor);
-
-    // Check to see if input grad diff dst is in the right format
-    // Create a new memory descriptor with the same shape as the
-    // original, but the format of the other tensors.
-    memory::format original_output_format =
-            static_cast<memory::format>(original_output_md.data.format);
-    bool grad_reorder_needed = input_gradient_dnn_data->IsReorderNeeded(
-                                    original_output_format);
-    memory::dims diff_dst_dims = input_gradient_mkl_shape.IsMklTensor()
-        ? input_gradient_mkl_shape.GetSizesAsMklDnnDims()
-        : TFShapeToMklDnnDimsInNCHW(input_gradient_tensor.shape(),
-                    this->data_format_tf_);
-    memory::desc target_diff_dst_md = memory::desc(diff_dst_dims,
-        MklDnnType<T>(), original_output_format);
-
-    return grad_reorder_needed
-            ? target_diff_dst_md
-            : original_input_grad_md;
-  }
-};
-#endif  // INTEL_MKL_DNN
-
 //-------------------------------------------------------------------
 // Utility functions
 
diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 45bdd0ad5c..86a77d769a 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -28,19 +28,6 @@ limitations under the License.
 #include "mkl_dnn.h"
 #include "mkl_dnn_types.h"
 
-#ifdef INTEL_MKL_DNN
-#include "mkldnn.hpp"
-
-using mkldnn::stream;
-using mkldnn::prop_kind;
-using mkldnn::algorithm;
-using mkldnn::relu_forward;
-using mkldnn::relu_backward;
-using mkldnn::eltwise_relu;
-using mkldnn::eltwise_elu;
-using mkldnn::eltwise_tanh;
-#endif
-
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -58,8 +45,6 @@ struct MklReluHelpers {
   }
 };
 
-#ifndef INTEL_MKL_DNN
-
 template <typename Device, typename T>
 class MklReluOp : public OpKernel {
  public:
@@ -74,7 +59,6 @@ class MklReluOp : public OpKernel {
     GetMklShape(context, 0, &mkl_context.input_shape);
     void* user_i = static_cast<void*>(const_cast<T*>(input.flat<T>().data()));
     bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor();
-
     if (!input_in_mkl_format && !input.dims()) {  // handle the case of a scalar
       const TensorShape& o_shape = input.shape();
       Tensor* out_tensor = nullptr;
@@ -180,7 +164,6 @@ class MklReluOp : public OpKernel {
   } MklReluOpContext;
 };
 
-
 template <typename Device, typename T>
 class MklReluGradOp : public OpKernel {
  public:
@@ -206,18 +189,18 @@ class MklReluGradOp : public OpKernel {
       const Tensor& a = MklGetInput(context, 1);
       void* buf_input = static_cast<void*>(const_cast<T*>(a.flat<T>().data()));
       void* mkl_buffer_convert = nullptr;
-
       dnnPrimitive_t cv_input_to_grad = nullptr;
 
-      // if input and grad are not in the same layout,
-      // do a conversion between them.
+      // if input and grad are not in the same layout, do a conversion between
+      // them.
       if (!dnnLayoutCompare_F32(lt_input, lt_grad)) {
         AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, lt_grad,
                        &mkl_buffer_convert);
         CHECK_EQ(dnnConversionCreate_F32(&cv_input_to_grad, lt_input,
                    lt_grad), E_SUCCESS);
         CHECK_EQ(dnnConversionExecute_F32(cv_input_to_grad, buf_input,
-                                          mkl_buffer_convert), E_SUCCESS);
+                                          mkl_buffer_convert),
+                 E_SUCCESS);
         relu_res[dnnResourceSrc] = mkl_buffer_convert;
         dnnDelete_F32(cv_input_to_grad);
       } else {
@@ -263,6 +246,7 @@ class MklReluGradOp : public OpKernel {
 };
 
 template <typename Device, typename T>
+
 void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   MklReluGradOpContext mkl_context;
   const Tensor& g = MklGetInput(context, 0);
@@ -280,21 +264,20 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
       !MklReluHelpers::ValidateSameSize(context, g, a))
     return;
   Tensor* output = nullptr;
-
-  if (!input_is_mkl && !grad_is_mkl && !a.dims()) {
-    // handle the scalar case
+  if (!input_is_mkl && !grad_is_mkl &&
+      !a.dims()) {  // handle the case of a scalar
+    // Allocate space for g and
     const TensorShape& g_shape = g.shape();
     mkl_context.output_shape.SetMklTensor(false);
     AllocateOutputSetMklShape(context, 0, &output, g_shape,
                               mkl_context.output_shape);
-
     void* out_o = static_cast<void*>(output->flat<T>().data());
     (static_cast<T*>(out_o))[0] =
         (static_cast<T*>(user_g))[0] * ((static_cast<T*>(user_i))[0] > 0);
     return;
   }
 
-  // generate size, stride for input if input/grad is in mkl format.
+  // Generate size, stride for input if input/grad is in MKL format.
   if (grad_is_mkl || input_is_mkl) {
     const MklShape* tmp_mkl_shape =
         (grad_is_mkl) ? &mkl_context.grad_shape : &mkl_context.input_shape;
@@ -325,20 +308,21 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   float negative_slope = 0.0;
   CHECK_EQ(dnnReLUCreateBackward_F32(&mkl_context.prim_relu_bwd, NULL,
                                      mkl_context.lt_grad, mkl_context.lt_grad,
-                                     negative_slope), E_SUCCESS);
+                                     negative_slope),
+           E_SUCCESS);
   Tensor mkl_tmp_input_buf_tensor;
   mkl_context.MklPrepareReluGradInputs(context, &mkl_tmp_input_buf_tensor);
 
   if (input_is_mkl ||
-      grad_is_mkl) { /*if  grad or input are mkl leave it in mkl*/
+      grad_is_mkl) { /*if  grad or input are MKL leave it in MKL*/
     TensorShape tf_shape;
     mkl_context.output_shape.SetMklTensor(true);
     mkl_context.output_shape.SetMklLayout(mkl_context.prim_relu_bwd,
                                           dnnResourceDiffSrc);
     mkl_context.output_shape.SetTfLayout(
         mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
-    // if input_is_mkl or grad_is_mkl, then we copy strides and sizes from mkl
-    // shape of one that is in mkl layout.
+    // If input_is_mkl or grad_is_mkl, then we copy strides and sizes from Mkl
+    // shape of one that is in MKL layout.
     if (grad_is_mkl == true) {
       mkl_context.output_shape.SetTfDimOrder(
           mkl_context.in_dims, mkl_context.grad_shape.GetTfToMklDimMap());
@@ -348,9 +332,11 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
     }
 
     tf_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(
-                    mkl_context.output_shape.GetMklLayout())) / sizeof(T));
+                        mkl_context.output_shape.GetMklLayout())) /
+                    sizeof(T));
     AllocateOutputSetMklShape(context, 0, &output, tf_shape,
                               mkl_context.output_shape);
+
   } else {
     const TensorShape& o_shape = g.shape();
     mkl_context.output_shape.SetMklTensor(false);
@@ -361,430 +347,13 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   mkl_context.relu_res[dnnResourceDiffSrc] =
       static_cast<void*>(output->flat<T>().data());
 
-  CHECK_EQ(dnnExecute_F32(mkl_context.prim_relu_bwd,
-                          mkl_context.relu_res),
-                          E_SUCCESS);
+  CHECK_EQ(dnnExecute_F32(mkl_context.prim_relu_bwd, mkl_context.relu_res),
+           E_SUCCESS);
   mkl_context.MklCleanup();
 }
 
-
-#else  // INTEL_MKL_DNN
-
-template <typename Device, typename T, algorithm alg_kind>
-class MklReluOpBase : public OpKernel {
- public:
-  ~MklReluOpBase() {}
-
-  explicit MklReluOpBase(OpKernelConstruction* context) : OpKernel(context) {
-  }
-
-  virtual void Compute_Scalar(OpKernelContext* context) = 0;
-
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
-      const size_t src_index = 0;  // index of src input tensor
-      const size_t dst_index = 0;  // index of dst output tensor
-      const Tensor& src_tensor = MklGetInput(context, src_index);
-      MklDnnShape dnn_shape_src;
-      GetMklShape(context, src_index, &dnn_shape_src);
-
-      Tensor* dst_tensor = nullptr;
-      if (src_tensor.dims() == 0) {
-        Compute_Scalar(context);
-        return;
-      }
-
-      // Create relu primitive.
-      MklDnnData<T> src(&cpu_engine);
-      MklDnnData<T> dst(&cpu_engine);
-
-      // Set DNN primitive - src
-      memory::desc src_md({}, memory::data_undef, memory::format_undef);
-      if (dnn_shape_src.IsMklTensor()) {
-        src_md = dnn_shape_src.GetMklLayout();
-      } else {
-        auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
-        auto src_strides = CalculateTFStrides(src_dims);
-        // Create blocked memory descriptor
-        src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
-      }
-      src.SetUsrMem(src_md, &src_tensor);
-
-      T alpha = 0, beta = 0;
-      std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
-      auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
-          // Operator memory descriptor is same as user memory descriptor.
-                                              alg_kind, src.GetUsrMemDesc(),
-                                              alpha, beta);
-      relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
-                                                         cpu_engine));
-
-      // allocate dst tensor
-      MklDnnShape dnn_shape_dst;
-      TensorShape tf_shape_dst;
-      if (dnn_shape_src.IsMklTensor()) {
-        dnn_shape_dst.SetMklTensor(true);
-        auto dst_pd = relu_fwd_pd->dst_primitive_desc();
-        dnn_shape_dst.SetMklLayout(&dst_pd);
-        dnn_shape_dst.SetElemType(MklDnnType<T>());
-        dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(),
-                                  dnn_shape_src.GetSizesAsMklDnnDims(),
-                                  dnn_shape_src.GetTfDataFormat());
-        tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T));
-      } else {
-        dnn_shape_dst.SetMklTensor(false);
-        tf_shape_dst = src_tensor.shape();
-      }
-      AllocateOutputSetMklShape(context, dst_index, &dst_tensor, tf_shape_dst,
-                                dnn_shape_dst);
-
-      // Destination memory descriptor is same as source memory descriptor.
-      auto dst_md = src_md;
-      dst.SetUsrMem(dst_md, dst_tensor);
-
-      // execute net
-      std::vector<primitive> net;
-      auto relu_fwd = relu_forward(*relu_fwd_pd, src.GetOpMem(),
-                                   dst.GetOpMem());
-      net.push_back(relu_fwd);
-      stream(stream::kind::eager).submit(net).wait();
-    } catch (mkldnn::error &e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) +
-                         ", in file " + string(__FILE__) + ":" +
-                         std::to_string(__LINE__);
-      OP_REQUIRES_OK(context,
-                     errors::Aborted("Operation received an exception:",
-                        error_msg));
-    }
-  }
-};
-
-
-template <typename Device, typename T, algorithm alg_kind>
-class MklReluGradOpBase : public OpKernel {
- public:
-  ~MklReluGradOpBase() {}
-
-  explicit MklReluGradOpBase(OpKernelConstruction* context) :
-    OpKernel(context) {}
-
-  virtual void Compute_Scalar(OpKernelContext* context) = 0;
-
-  void Compute(OpKernelContext* context)  {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
-      MklDnnData<T> src(&cpu_engine);
-      MklDnnData<T> diff_dst(&cpu_engine);
-      MklDnnData<T> diff_src(&cpu_engine);
-
-      const size_t diff_dst_index = 0;  // index of diff_dst input tensor
-      const size_t src_index = 1;       // index of src input tensor
-      const size_t diff_src_index = 0;  // index of diff_src output tensor
-
-      const Tensor& src_tensor      = MklGetInput(context, src_index);
-      const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
-      Tensor* diff_src_tensor       = nullptr;
-
-      MklDnnShape dnn_shape_src, dnn_shape_diff_dst;
-      GetMklShape(context, src_index, &dnn_shape_src);
-      GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
-
-      int src_dims_size = src_tensor.dims();
-      if (src_dims_size == 0) {
-        Compute_Scalar(context);
-        return;
-      }
-
-      // Set DNN primitives for src & diff_dst
-      memory::desc src_md({}, memory::data_undef, memory::format_undef);
-      memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
-      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
-        if (dnn_shape_diff_dst.IsMklTensor()) {
-          diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
-          src_md = diff_dst_md;
-        } else {
-          src_md = dnn_shape_src.GetMklLayout();
-          diff_dst_md = src_md;
-        }
-      } else {
-        auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
-        auto src_strides = CalculateTFStrides(src_dims);
-        src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
-        diff_dst_md = src_md;
-      }
-      src.SetUsrMem(src_md, &src_tensor);
-      diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
-
-      T alpha = 0, beta = 0;
-      std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
-      auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
-                                              alg_kind, src_md, alpha, beta);
-      relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
-                                                         cpu_engine));
-      auto relu_bwd_desc = relu_backward::desc(alg_kind, diff_dst_md, src_md,
-                                                alpha, beta);
-      auto relu_bwd_pd  = relu_backward::primitive_desc(relu_bwd_desc,
-                                                cpu_engine, *relu_fwd_pd);
-
-      // allocate diff_src tensor
-      MklDnnShape dnn_shape_diff_src;
-      TensorShape tf_shape_diff_src;
-      if (dnn_shape_src.IsMklTensor()) {
-        dnn_shape_diff_src.SetMklTensor(true);
-        auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc();
-        dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
-        dnn_shape_diff_src.SetElemType(MklDnnType<T>());
-        dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(),
-                                       dnn_shape_src.GetSizesAsMklDnnDims(),
-                                       dnn_shape_src.GetTfDataFormat());
-        tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T));
-      } else {
-        dnn_shape_diff_src.SetMklTensor(false);
-        tf_shape_diff_src = src_tensor.shape();
-      }
-      AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
-                                 tf_shape_diff_src, dnn_shape_diff_src);
-
-      // diff_src memory descriptor is same as diff_dst memory descriptor.
-      auto diff_src_md = diff_dst_md;
-      diff_src.SetUsrMem(diff_src_md, diff_src_tensor);
-
-      PrepareAndExecuteNet(relu_bwd_pd, &src, &diff_src, &diff_dst);
-     } catch (mkldnn::error &e) {
-       string error_msg = "Status: " + std::to_string(e.status) +
-                          ", message: " + string(e.message) +
-                          ", in file " + string(__FILE__) + ":" +
-                          std::to_string(__LINE__);
-       OP_REQUIRES_OK(context,
-                      errors::Aborted("Operation received an exception:",
-                                      error_msg));
-    }
-  }
-
-  void PrepareAndExecuteNet(const relu_backward::primitive_desc& relu_prim_desc,
-                  MklDnnData<T>* src, MklDnnData<T>* diff_src, MklDnnData<T>*
-                  diff_dst) {
-    std::vector<primitive> net;
-    net.push_back(relu_backward(relu_prim_desc, src->GetOpMem(),
-                                diff_dst->GetOpMem(), diff_src->GetOpMem()));
-    stream(stream::kind::eager).submit(net).wait();
-  }
-};
-
-
-template <typename Device, typename T>
-class MklReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
- public:
-  ~MklReluOp() {}
-
-  explicit MklReluOp(OpKernelConstruction* context) :
-  MklReluOpBase<Device, T, eltwise_relu>(context) {}
-
-  virtual void Compute_Scalar(OpKernelContext* context) {
-    const size_t src_index = 0;  // index of src input tensor
-    const size_t dst_index = 0;  // index of dst output tensor
-    const Tensor& src_tensor = MklGetInput(context, src_index);
-    MklDnnShape dnn_shape_src;
-    GetMklShape(context, src_index, &dnn_shape_src);
-
-    Tensor* dst_tensor = nullptr;
-    void* user_i = static_cast<void*>(const_cast<T*>(
-                         src_tensor.flat<T>().data()));
-    MklDnnShape dnn_shape_dst;
-    dnn_shape_dst.SetMklTensor(false);
-    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
-                              src_tensor.shape(), dnn_shape_dst);
-    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
-    (static_cast<T*>(out_o))[0] =
-              std::max((static_cast<T*>(user_i))[0], static_cast<T>(0));
-    return;
-  }
-};
-
-template <typename Device, typename T>
-class MklReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
- public:
-  ~MklReluGradOp() {}
-
-  explicit MklReluGradOp(OpKernelConstruction* context) :
-  MklReluGradOpBase<Device, T, eltwise_relu>(context) {}
-
-  virtual void Compute_Scalar(OpKernelContext* context) {
-    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
-    const size_t src_index = 1;       // index of src input tensor
-    const size_t diff_src_index = 0;  // index of diff_src output tensor
-    const Tensor& src_tensor    = MklGetInput(context, src_index);
-    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
-    Tensor* diff_src_tensor = nullptr;
-
-    MklDnnShape dnn_shape_diff_dst;
-    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
-
-    int src_dims_size = src_tensor.dims();
-    MklDnnShape dnn_shape_diff_src;
-    dnn_shape_diff_src.SetMklTensor(false);
-    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
-                              diff_dst_tensor.shape(), dnn_shape_diff_src);
-    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
-    void* user_i =
-          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
-    void* user_g =
-          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
-    (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] *
-                                  ((static_cast<T*>(user_i))[0] > 0);
-    return;
-  }
-};
-
-template <typename Device, typename T>
-class MklEluOp : public MklReluOpBase<Device, T, eltwise_elu> {
- public:
-  ~MklEluOp() {}
-
-  explicit MklEluOp(OpKernelConstruction* context) :
-  MklReluOpBase<Device, T, eltwise_elu>(context) {}
-
-  virtual void Compute_Scalar(OpKernelContext* context) {
-    const size_t src_index = 0;  // index of src input tensor
-    const size_t dst_index = 0;  // index of dst output tensor
-    const Tensor& src_tensor = MklGetInput(context, src_index);
-    MklDnnShape dnn_shape_src;
-    GetMklShape(context, src_index, &dnn_shape_src);
-
-    Tensor* dst_tensor = nullptr;
-    void* user_i = static_cast<void*>(const_cast<T*>(
-                         src_tensor.flat<T>().data()));
-    MklDnnShape dnn_shape_dst;
-    dnn_shape_dst.SetMklTensor(false);
-    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
-                              src_tensor.shape(), dnn_shape_dst);
-    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
-    // return exp(feature) - 1 if feature > 0; feature otherwise
-    T feature = (static_cast<T*>(user_i))[0];
-    if (feature < 0)
-      (static_cast<T*>(out_o))[0] = std::exp(feature);
-    else
-      (static_cast<T*>(out_o))[0] = feature;
-    return;
-  }
-};
-
-template <typename Device, typename T>
-class MklEluGradOp : public MklReluGradOpBase<Device, T, eltwise_elu> {
- public:
-  ~MklEluGradOp() {}
-
-  explicit MklEluGradOp(OpKernelConstruction* context) :
-  MklReluGradOpBase<Device, T, eltwise_elu>(context) {}
-
-  virtual void Compute_Scalar(OpKernelContext* context) {
-    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
-    const size_t src_index = 1;       // index of src input tensor
-    const size_t diff_src_index = 0;  // index of diff_src output tensor
-    const Tensor& src_tensor    = MklGetInput(context, src_index);
-    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
-    Tensor* diff_src_tensor = nullptr;
-
-    MklDnnShape dnn_shape_diff_dst;
-    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
-
-    int src_dims_size = src_tensor.dims();
-    MklDnnShape dnn_shape_diff_src;
-    dnn_shape_diff_src.SetMklTensor(false);
-    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
-                              diff_dst_tensor.shape(), dnn_shape_diff_src);
-    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
-    void* user_i =
-          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
-    void* user_g =
-          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
-    // gradient of elu(x) = 1 if x > 0; elu(x) + 1 otherwise
-    T feature = (static_cast<T*>(user_i))[0];
-    if (feature > 0) {
-      (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0];
-    } else {
-      T elu = std::exp(feature) - 1;
-      (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] * (elu + 1);
-    }
-  }
-};
-
-template <typename Device, typename T>
-class MklTanhOp : public MklReluOpBase<Device, T, eltwise_tanh> {
- public:
-  ~MklTanhOp() {}
-
-  explicit MklTanhOp(OpKernelConstruction* context) :
-  MklReluOpBase<Device, T, eltwise_tanh>(context) {}
-
-  virtual void Compute_Scalar(OpKernelContext* context) {
-    const size_t src_index = 0;  // index of src input tensor
-    const size_t dst_index = 0;  // index of dst output tensor
-    const Tensor& src_tensor = MklGetInput(context, src_index);
-    MklDnnShape dnn_shape_src;
-    GetMklShape(context, src_index, &dnn_shape_src);
-
-    Tensor* dst_tensor = nullptr;
-    void* user_i = static_cast<void*>(const_cast<T*>(
-                         src_tensor.flat<T>().data()));
-    MklDnnShape dnn_shape_dst;
-    dnn_shape_dst.SetMklTensor(false);
-    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
-                              src_tensor.shape(), dnn_shape_dst);
-    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
-    // tanh(x) = (e^x - e^(-x))/ (e^x + e^(-x))
-    T feature = (static_cast<T*>(user_i))[0];
-    T e1 = std::exp(feature);
-    T e2 = std::exp(-feature);
-    (static_cast<T*>(out_o))[0] = (e1 - e2)/(e1 + e2);
-    return;
-  }
-};
-
-template <typename Device, typename T>
-class MklTanhGradOp : public MklReluGradOpBase<Device, T, eltwise_tanh> {
- public:
-  ~MklTanhGradOp() {}
-
-  explicit MklTanhGradOp(OpKernelConstruction* context) :
-  MklReluGradOpBase<Device, T, eltwise_tanh>(context) {}
-
-  virtual void Compute_Scalar(OpKernelContext* context) {
-    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
-    const size_t src_index = 1;       // index of src input tensor
-    const size_t diff_src_index = 0;  // index of diff_src output tensor
-    const Tensor& src_tensor    = MklGetInput(context, src_index);
-    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
-    Tensor* diff_src_tensor = nullptr;
-
-    MklDnnShape dnn_shape_diff_dst;
-    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
-
-    int src_dims_size = src_tensor.dims();
-    MklDnnShape dnn_shape_diff_src;
-    dnn_shape_diff_src.SetMklTensor(false);
-    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
-                              diff_dst_tensor.shape(), dnn_shape_diff_src);
-    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
-    void* user_i =
-          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
-    // gradient of tanh(x) = 1 - tanh(x)^2
-    T feature = (static_cast<T*>(user_i))[0];
-    T e1 = std::exp(feature);
-    T e2 = std::exp(-feature);
-    T tanh = (e1 - e2)/(e1 + e2);
-    void* user_g =
-          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
-    (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] *
-                                  (1 - tanh * tanh);
-  }
-};
-
-#endif
-
-// register dnn kernels for supported operations and supported types
+/* Register DNN kernels for supported operations and supported types - right now
+ * it is only Relu and f32*/
 #define REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES(type)             \
   REGISTER_KERNEL_BUILDER(Name("_MklRelu")                          \
                               .Device(DEVICE_CPU)                   \
@@ -798,38 +367,6 @@ class MklTanhGradOp : public MklReluGradOpBase<Device, T, eltwise_tanh> {
                           MklReluGradOp<CPUDevice, type>);
 TF_CALL_float(REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES);
 
-#ifdef INTEL_MKL_DNN
-
-// register dnn kernels for supported operations and supported types
-#define REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES(type)             \
-  REGISTER_KERNEL_BUILDER(Name("_MklElu")                          \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<type>("T")            \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklEluOp<CPUDevice, type>);              \
-  REGISTER_KERNEL_BUILDER(Name("_MklEluGrad")                      \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<type>("T")            \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklEluGradOp<CPUDevice, type>);
-TF_CALL_float(REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES);
-
-#define REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES(type)             \
-  REGISTER_KERNEL_BUILDER(Name("_MklTanh")                          \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<type>("T")            \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklTanhOp<CPUDevice, type>);              \
-  REGISTER_KERNEL_BUILDER(Name("_MklTanhGrad")                      \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<type>("T")            \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklTanhGradOp<CPUDevice, type>);
-TF_CALL_float(REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES);
-
-#endif
-
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
-
diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc
index 11c92ebdb4..5e98582475 100644
--- a/tensorflow/core/kernels/mkl_reshape_op.cc
+++ b/tensorflow/core/kernels/mkl_reshape_op.cc
@@ -28,11 +28,6 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-#ifdef INTEL_MKL_DNN
-#include "mkldnn.hpp"
-using mkldnn::stream;
-#endif
-
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
 template <typename Device, typename T>
@@ -40,7 +35,6 @@ class MklReshapeOp : public OpKernel {
  public:
   explicit MklReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
 
-#ifndef INTEL_MKL_DNN
   void Compute(OpKernelContext* context) override {
     const Tensor& input = MklGetInput(context, 0);
     const Tensor& sizes = MklGetInput(context, 1);
@@ -135,183 +129,7 @@ class MklReshapeOp : public OpKernel {
     }
   }
 
-#else
-
  private:
-  // When the input tensor is in MKL layout and we are reshaping the tensor to a
-  // different shape than its actual shape, then we use MKLDNN reorder primitive
-  // to put tensor back in Tensorflow layout. But we can skip this reordering
-  // some times. This function checks for all such cases.
-  bool SkipReorder(const MklDnnShape& mkl_shape_input,
-                   const TensorShape& reshape_to) {
-    CHECK_EQ(mkl_shape_input.IsMklTensor(), true);
-    bool ret = false;
-
-    // If Tensorflow's data format and the underlying format maintained by
-    // MKLDNN are equivalent (both are NHWC or both are NCHW), then we can
-    // safely return true.
-    auto input_mkl_md = mkl_shape_input.GetMklLayout();
-    if (mkl_shape_input.GetTfDataFormat() == input_mkl_md.data.format) {
-      ret = true;
-    }
-
-    return ret;
-  }
-
- public:
-  void Compute(OpKernelContext* context) override {
-    const Tensor& input_tensor = MklGetInput(context, 0);
-    const Tensor& sizes = MklGetInput(context, 1);
-
-    MklDnnShape mkl_shape_input;
-    GetMklShape(context, kInputSlotIdx, &mkl_shape_input);
-    bool input_in_mkl_format = mkl_shape_input.IsMklTensor();
-    const int64 nelems = input_in_mkl_format ?
-                         mkl_shape_input.GetTfShape().num_elements()
-                         : input_tensor.NumElements();
-
-    // Preliminary validation of sizes.
-    OP_REQUIRES(context, IsLegacyVector(sizes.shape()),
-                errors::InvalidArgument("sizes input must be 1-D, not shape ",
-                                        sizes.shape().DebugString()));
-
-    // Compute the output shape.  Determine product of specified
-    // dimensions, and find the index of the unspecified one.
-    TensorShape shape;
-    int64 product = 1;
-    int unknown_index = -1;
-    switch (sizes.dtype()) {
-      case DT_INT32:
-        OP_REQUIRES_OK(context, ValidateSizes<int32>(sizes, &product,
-                                                     &unknown_index, &shape));
-        break;
-      case DT_INT64:
-        OP_REQUIRES_OK(context, ValidateSizes<int64>(sizes, &product,
-                                                     &unknown_index, &shape));
-        break;
-      default:
-        context->CtxFailure(errors::InvalidArgument(
-            "desired shape must be a DT_INT32 or DT_INT64 vector, not a ",
-            DataTypeString(sizes.dtype())));
-        return;
-    }
-    if (unknown_index != -1) {
-      OP_REQUIRES(
-          context, product > 0,
-          errors::InvalidArgument("Reshape cannot infer the missing input size "
-                                  "for an empty tensor unless all specified "
-                                  "input sizes are non-zero"));
-      const int64 missing = nelems / product;
-      OP_REQUIRES(
-          context, product * missing == nelems,
-          errors::InvalidArgument(
-              "Input to reshape is a tensor with ", nelems,
-              " values, but the requested shape requires a multiple of ",
-              product));
-      shape.set_dim(unknown_index, missing);
-    }
-    OP_REQUIRES(context, shape.num_elements() == nelems,
-                errors::InvalidArgument("Input to reshape is a tensor with ",
-                                        nelems,
-                                        " values, but the requested shape has ",
-                                        shape.num_elements()));
-
-    if (input_in_mkl_format) {
-      TensorShape& shape_to = shape;
-      TensorShape shape_from = mkl_shape_input.GetTfShape();
-      if (shape_from == shape_to) {
-        CopyMklTensorInToOut(context, kInputSlotIdx, kOutputSlotIdx);
-        return;
-      } else {
-        try {
-          auto cpu_engine = engine(engine::cpu, 0);
-          MklDnnData<T> dnn_data_input(&cpu_engine);
-          // Reshape is just a logical view change operation for a tensor.
-          // It does not change underlying layout. But MKLDNN may maintain
-          // tensor data in different layout than that specified by Tensorflow.
-          // If MKLDNN maintains input tensor in different layout than that
-          // specified by Tensorflow, we will need to reorder tensor and then
-          // put it in the shape expected by Tensorflow. But if MKLDNN has
-          // maintained input tensor in the same layout as it is expected by
-          // Tensorflow, we don't need to reorder tensor contents, we just
-          // need to update MklDnnShape object associated with the input
-          // tensor to reflect the shape change expected by reshape.
-          if (!SkipReorder(mkl_shape_input, shape_to)) {
-              // If dimensions that are being expanded or collapsed are not
-              // maintained contiguously by MKLDNN, then we use reorder.
-
-              // Get Mkl layout of input tensor.
-              auto input_mkl_md = mkl_shape_input.GetMklLayout();
-              // Set input Mkl layout as the user layout.
-              dnn_data_input.SetUsrMem(input_mkl_md, &input_tensor);
-              // Get expected Tensorflow layout of input tensor.
-              auto output_tf_md = mkl_shape_input.GetTfLayout();
-              auto output_tf_pd = memory::primitive_desc(output_tf_md,
-                                                         cpu_engine);
-
-              Tensor* output_tensor = nullptr;
-              MklShape mkl_shape_output;
-              mkl_shape_output.SetMklTensor(false);
-              // We allocate output tensor in the shape expected by Reshape.
-              AllocateOutputSetMklShape(context, kOutputSlotIdx, &output_tensor,
-                                        shape_to, mkl_shape_output);
-
-              // Insert reorder between Mkl layout and TensorFlow layout.
-              std::vector<primitive> net;
-              CHECK_EQ(dnn_data_input.CheckReorderToOpMem(output_tf_pd,
-                       output_tensor, &net), true);
-              stream(stream::kind::eager).submit(net).wait();
-              return;
-          } else {
-            // If dimensions that are being expanded or collapsed are
-            // maintained contiguously by MKLDNN, then we skip reorder, just
-            // update MklDnnShape object for the tensorflow tensor, and forward
-            // Tensorflow tensor as it is to the output.
-            auto output_dims = TFShapeToMklDnnDims(shape_to);
-            auto output_strides = CalculateTFStrides(output_dims);
-            auto output_tf_md = MklDnnData<T>::CreateBlockedMemDesc(output_dims,
-                                                               output_strides);
-            auto output_tf_pd = memory::primitive_desc(output_tf_md,
-                                                       cpu_engine);
-
-            // Set MklDnnShape
-            MklDnnShape mkl_shape_output;
-            mkl_shape_output.SetMklTensor(true);
-            mkl_shape_output.SetMklLayout(&output_tf_pd);
-            mkl_shape_output.SetElemType(MklDnnType<T>());
-            mkl_shape_output.SetTfLayout(output_dims.size(), output_dims,
-                                         memory::format::blocked);
-
-            // We now simply forward input Mkl tensor to output and change its
-            // output MklDnnShape object.
-            ForwardMklTensorInToOutWithMklShape(context, kInputSlotIdx,
-                                              kOutputSlotIdx, mkl_shape_output);
-            return;
-          }
-        } catch (mkldnn::error &e) {
-          string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-          OP_REQUIRES_OK(context,
-                   errors::Aborted("Operation received an exception:",
-                      error_msg));
-        }
-      }
-    } else {
-      // If input tensor is not in Mkl format, then just copy Tensorflow tensor
-      // to output with specified shape.
-      CopyTfTensorInToOutWithShape(context, kInputSlotIdx, kOutputSlotIdx,
-                                   shape);
-    }
-  }
-
-#endif  // INTEL_MKL_DNN
-
- private:
-  const int kInputSlotIdx = 0;
-  const int kOutputSlotIdx = 0;
-
   template <typename Tshape>
   Status ValidateSizes(const Tensor& sizes, int64* product, int* unknown_index,
                        TensorShape* shape) {
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index 1921b83d12..f83998e0c1 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -268,13 +268,6 @@ class Im2ColConvFunctor {
     Im2ColBufferResource<T1, chunk_value_count>* im2col_buffer_resource;
     std::function<Status(Im2ColBufferResource<T1, chunk_value_count>**)>
         creator = [](Im2ColBufferResource<T1, chunk_value_count>** resource) {
-#ifdef _MSC_VER
-          // MSVC complains about the capture of chunk_value_count which oddly
-          // works fine in conv_ops_using_gemm.cc for example.
-          // Define chunk_value_count inside the lambda for now.
-          const int64 chunk_value_count =
-              (kMaxChunkSize + (sizeof(T1) - 1)) / sizeof(T1);
-#endif
           *resource = new Im2ColBufferResource<T1, chunk_value_count>();
           return Status::OK();
         };
diff --git a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
index 3c31016732..be1fa22c69 100644
--- a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
@@ -161,7 +161,7 @@ Status SnappyOutputBuffer::Deflate() {
   }
 
   // Write length of compressed block to output buffer.
-  char compressed_length_array[4];
+  char* compressed_length_array = new char[4];
   std::fill(compressed_length_array, compressed_length_array + 4, 0);
   for (int i = 0; i < 4; i++) {
     // Little endian.
@@ -173,6 +173,7 @@ Status SnappyOutputBuffer::Deflate() {
   TF_RETURN_IF_ERROR(AddToOutputBuffer(output.data(), output.size()));
   next_in_ += avail_in_;
   avail_in_ = 0;
+  delete[] compressed_length_array;
 
   return Status::OK();
 }
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 15122afd23..980d0c31a3 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -2958,25 +2958,6 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
-REGISTER_OP("__MklDummyConv2DWithBias")
-    .Input("input: T")
-    .Input("filter: T")
-    .Input("bias: T")
-    .Output("output: T")
-    .Attr("T: {half, float, double}")
-    .Attr("strides: list(int)")
-    .Attr("use_cudnn_on_gpu: bool = true")
-    .Attr(GetPaddingAttrString())
-    .Attr(GetConvnetDataFormatAttrString())
-    .Doc(R"doc(
-Dummy node that enables fusing Conv2D and BiasAdd operator for MKL. This node
-does not perform anything. It is just created as an intermediate output of
-merging Conv2D and BiasAdd.
-
-NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
-expected to invoke these operators.
-)doc");
-
 REGISTER_OP("_MklConv2DWithBias")
     .Input("input: T")
     .Input("filter: T")
@@ -3030,88 +3011,6 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
-REGISTER_OP("__MklDummyConv2DBackpropFilterWithBias")
-    .Input("input: T")
-    .Input("filter_sizes: int32")
-    .Input("out_backprop: T")
-    .Output("output: T")
-    .Output("bias_grad: T")
-    .Attr("T: {half, float, double}")
-    .Attr("strides: list(int)")
-    .Attr("use_cudnn_on_gpu: bool = true")
-    .Attr(GetPaddingAttrString())
-    .Attr(GetConvnetDataFormatAttrString())
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input_shape;
-      // Fetch the data_format attribute, which may not exist.
-      string data_format;
-      Status s = c->GetAttr("data_format", &data_format);
-
-      if (s.ok() && data_format == "NCHW") {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
-        c->set_output(1, c->Vector(c->Dim(input_shape, -3)));
-      } else {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
-        c->set_output(1, c->Vector(c->Dim(input_shape, -1)));
-      }
-      ShapeHandle sh;
-      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &sh));
-      TF_RETURN_IF_ERROR(c->WithRank(sh, 4, &sh));
-      c->set_output(0, sh);
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Dummy node that enables fusing Conv2DBackpropFilter and BiasAddGrad operator
-for MKL. This node does not perform anything. It is just created as an
-intermediate output of merging Conv2DBackpropFilter and BiasAddGrad.
-
-NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
-expected to invoke these operators.
-)doc");
-
-REGISTER_OP("_MklConv2DBackpropFilterWithBias")
-    .Input("input: T")
-    .Input("filter_sizes: int32")
-    .Input("out_backprop: T")
-    .Input("mkl_input: uint8")
-    .Input("mkl_filter_size: uint8")
-    .Input("mkl_out_backprop: uint8")
-    .Output("output: T")
-    .Output("bias_grad: T")
-    .Output("mkl_output: uint8")
-    .Output("mkl_bias_grad: uint8")
-    .Attr("T: {half, float, double}")
-    .Attr("strides: list(int)")
-    .Attr("use_cudnn_on_gpu: bool = true")
-    .Attr(GetPaddingAttrString())
-    .Attr(GetConvnetDataFormatAttrString())
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input_shape;
-      // Fetch the data_format attribute, which may not exist.
-      string data_format;
-      Status s = c->GetAttr("data_format", &data_format);
-
-      if (s.ok() && data_format == "NCHW") {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
-        c->set_output(1, c->Vector(c->Dim(input_shape, -3)));
-      } else {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
-        c->set_output(1, c->Vector(c->Dim(input_shape, -1)));
-      }
-      ShapeHandle sh;
-      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &sh));
-      TF_RETURN_IF_ERROR(c->WithRank(sh, 4, &sh));
-      c->set_output(0, sh);
-      return Status::OK();
-    })
-    .Doc(R"doc(
-MKL version of Conv2DBackpropFilterWithBias. Uses MKL DNN APIs to compute the
-gradients of convolution with respect to the filter.
-
-NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
-expected to invoke these operators.
-)doc");
-
 REGISTER_OP("_MklConv2DWithBiasBackpropBias")
     .Input("out_backprop: T")
     .Input("mkl_out_backprop: uint8")
@@ -3188,78 +3087,6 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
-REGISTER_OP("_MklElu")
-    .Input("features: T")
-    .Input("mkl_features: uint8")
-    .Output("activations: T")
-    .Output("mkl_activations: uint8")
-    .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-MKL version of Elu operator. Uses MKL DNN APIs to implement Elu operator.
-NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
-expected to invoke these operators.
-)doc");
-
-REGISTER_OP("_MklEluGrad")
-    .Input("gradients: T")
-    .Input("features: T")
-    .Input("mkl_gradients: uint8")
-    .Input("mkl_features: uint8")
-    .Output("backprops: T")
-    .Output("mkl_backprops: uint8")
-    .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
-    .Doc(R"doc(
-MKL version of EluGrad operator. Uses MKL DNN APIs to compute Elu
-gradients for Elu operation.
-NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
-expected to invoke these operators.
-)doc");
-
-REGISTER_OP("_MklSoftmax")
-    .Input("logits: T")
-    .Input("mkl_logits: uint8")
-    .Output("softmax: T")
-    .Output("mkl_softmax: uint8")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn([](InferenceContext* c) {
-      return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
-    })
-    .Doc(R"doc(
-MKL version of ReluGrad operator. Uses MKL DNN APIs to compute rectified
-linear gradients for Relu operation.
-)doc");
-
-REGISTER_OP("_MklTanh")
-    .Input("features: T")
-    .Input("mkl_features: uint8")
-    .Output("activations: T")
-    .Output("mkl_activations: uint8")
-    .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-MKL version of Tanh operator. Uses MKL DNN APIs to implement Tanh operator.
-NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
-expected to invoke these operators.
-)doc");
-
-REGISTER_OP("_MklTanhGrad")
-    .Input("gradients: T")
-    .Input("features: T")
-    .Input("mkl_gradients: uint8")
-    .Input("mkl_features: uint8")
-    .Output("backprops: T")
-    .Output("mkl_backprops: uint8")
-    .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
-    .Doc(R"doc(
-MKL version of TanhGrad operator. Uses MKL DNN APIs to compute tanh
-gradients for Tanh operation.
-NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
-expected to invoke these operators.
-)doc");
-
 REGISTER_OP("_MklMaxPool")
     .Attr("T: {float, half} = DT_FLOAT")
     .Attr("ksize: list(int) >= 4")
diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD
index aaeccc8324..624145da75 100644
--- a/tensorflow/core/platform/cloud/BUILD
+++ b/tensorflow/core/platform/cloud/BUILD
@@ -10,7 +10,6 @@ licenses(["notice"])  # Apache 2.0
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
-    "tf_copts",
 )
 
 filegroup(
@@ -30,7 +29,6 @@ filegroup(
 cc_library(
     name = "expiring_lru_cache",
     hdrs = ["expiring_lru_cache.h"],
-    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = ["//tensorflow/core:lib"],
 )
@@ -39,7 +37,6 @@ cc_library(
     name = "file_block_cache",
     srcs = ["file_block_cache.cc"],
     hdrs = ["file_block_cache.h"],
-    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = ["//tensorflow/core:lib"],
 )
@@ -48,7 +45,6 @@ cc_library(
     name = "gcs_dns_cache",
     srcs = ["gcs_dns_cache.cc"],
     hdrs = ["gcs_dns_cache.h"],
-    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":http_request",
@@ -60,7 +56,6 @@ cc_library(
     name = "gcs_file_system",
     srcs = ["gcs_file_system.cc"],
     hdrs = ["gcs_file_system.h"],
-    copts = tf_copts(),
     linkstatic = 1,  # Needed since alwayslink is broken in bazel b/27630669
     visibility = ["//visibility:public"],
     deps = [
@@ -83,7 +78,6 @@ cc_library(
 cc_library(
     name = "http_request",
     hdrs = ["http_request.h"],
-    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/core:framework_headers_lib",
@@ -95,7 +89,6 @@ cc_library(
     name = "curl_http_request",
     srcs = ["curl_http_request.cc"],
     hdrs = ["curl_http_request.h"],
-    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":http_request",
@@ -111,7 +104,6 @@ cc_library(
     hdrs = [
         "http_request_fake.h",
     ],
-    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":curl_http_request",
@@ -129,7 +121,6 @@ cc_library(
         "auth_provider.h",
         "google_auth_provider.h",
     ],
-    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":curl_http_request",
@@ -145,7 +136,6 @@ cc_library(
     name = "now_seconds_env",
     testonly = 1,
     hdrs = ["now_seconds_env.h"],
-    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/core:lib",
@@ -161,7 +151,6 @@ cc_library(
     hdrs = [
         "oauth_client.h",
     ],
-    copts = tf_copts(),
     deps = [
         ":curl_http_request",
         ":http_request",
@@ -180,7 +169,6 @@ cc_library(
     hdrs = [
         "retrying_utils.h",
     ],
-    copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
@@ -195,7 +183,6 @@ cc_library(
     hdrs = [
         "retrying_file_system.h",
     ],
-    copts = tf_copts(),
     deps = [
         ":retrying_utils",
         "//tensorflow/core:framework_headers_lib",
@@ -211,7 +198,6 @@ cc_library(
     hdrs = [
         "time_util.h",
     ],
-    copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.cc b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
index 87b0dde136..78bf680317 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
@@ -14,14 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/gcs_dns_cache.h"
-#ifndef _WIN32
+
 #include <arpa/inet.h>
 #include <netdb.h>
-#else
-#include <winsock2.h>
-#include <ws2tcpip.h>
-#include <Windows.h>
-#endif
 #include <sys/types.h>
 
 namespace tensorflow {
@@ -31,21 +26,6 @@ namespace {
 const std::vector<string>& kCachedDomainNames =
     *new std::vector<string>{"www.googleapis.com", "storage.googleapis.com"};
 
-inline void print_getaddrinfo_error(const string& name, int error_code) {
-#ifndef _WIN32
-  if (error_code == EAI_SYSTEM) {
-    LOG(ERROR) << "Error resolving " << name
-               << " (EAI_SYSTEM): " << strerror(errno);
-  } else {
-    LOG(ERROR) << "Error resolving " << name << ": "
-               << gai_strerror(error_code);
-  }
-#else
-  // TODO:WSAGetLastError is better than gai_strerror
-  LOG(ERROR) << "Error resolving " << name << ": " << gai_strerror(error_code);
-#endif
-}
-
 // Selects one item at random from a vector of items, using a uniform
 // distribution.
 template <typename T>
@@ -106,7 +86,7 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
 
   std::vector<string> output;
   if (return_code == 0) {
-    for (const addrinfo* i = result; i != nullptr; i = i->ai_next) {
+    for (addrinfo* i = result; i != nullptr; i = i->ai_next) {
       if (i->ai_family != AF_INET || i->ai_addr->sa_family != AF_INET) {
         LOG(WARNING) << "Non-IPv4 address returned. ai_family: " << i->ai_family
                      << ". sa_family: " << i->ai_addr->sa_family << ".";
@@ -126,7 +106,13 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
       }
     }
   } else {
-    print_getaddrinfo_error(name, return_code);
+    if (return_code == EAI_SYSTEM) {
+      LOG(ERROR) << "Error resolving " << name
+                 << " (EAI_SYSTEM): " << strerror(errno);
+    } else {
+      LOG(ERROR) << "Error resolving " << name << ": "
+                 << gai_strerror(return_code);
+    }
   }
   if (result != nullptr) {
     freeaddrinfo(result);
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index a183fe6fa8..f80cbf7626 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -22,9 +22,6 @@ limitations under the License.
 #include <cstring>
 #include <fstream>
 #include <vector>
-#ifdef _WIN32
-#include <io.h>  //for _mktemp
-#endif
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -43,12 +40,6 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 
-#ifdef _WIN32
-#ifdef DeleteFile
-#undef DeleteFile
-#endif
-#endif
-
 namespace tensorflow {
 
 namespace {
@@ -118,25 +109,16 @@ constexpr char kReadRequestTimeout[] = "GCS_READ_REQUEST_TIMEOUT_SECS";
 // upload requests.
 constexpr char kWriteRequestTimeout[] = "GCS_WRITE_REQUEST_TIMEOUT_SECS";
 
-// TODO: DO NOT use a hardcoded path
 Status GetTmpFilename(string* filename) {
   if (!filename) {
     return errors::Internal("'filename' cannot be nullptr.");
   }
-#ifndef _WIN32
   char buffer[] = "/tmp/gcs_filesystem_XXXXXX";
   int fd = mkstemp(buffer);
   if (fd < 0) {
     return errors::Internal("Failed to create a temporary file.");
   }
   close(fd);
-#else
-  char buffer[] = "/tmp/gcs_filesystem_XXXXXX";
-  char* ret = _mktemp(buffer);
-  if (ret == nullptr) {
-    return errors::Internal("Failed to create a temporary file.");
-  }
-#endif
   *filename = buffer;
   return Status::OK();
 }
@@ -324,7 +306,6 @@ class GcsWritableFile : public WritableFile {
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
-    // TODO: to make it safer, outfile_ should be constructed from an FD
     if (GetTmpFilename(&tmp_content_filename_).ok()) {
       outfile_.open(tmp_content_filename_,
                     std::ofstream::binary | std::ofstream::app);
@@ -448,7 +429,7 @@ class GcsWritableFile : public WritableFile {
       return errors::Internal("'size' cannot be nullptr");
     }
     const auto tellp = outfile_.tellp();
-    if (tellp == static_cast<std::streampos>(-1)) {
+    if (tellp == -1) {
       return errors::Internal(
           "Could not get the size of the internal temporary file.");
     }
diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc
index d77f439c5a..f6fd8373cd 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider.cc
@@ -14,12 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/google_auth_provider.h"
-#ifndef _WIN32
 #include <pwd.h>
-#include <unistd.h>
-#else
 #include <sys/types.h>
-#endif
+#include <unistd.h>
 #include <fstream>
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
diff --git a/tensorflow/core/platform/cloud/oauth_client.cc b/tensorflow/core/platform/cloud/oauth_client.cc
index 3c2830ccd9..c700b97dc9 100644
--- a/tensorflow/core/platform/cloud/oauth_client.cc
+++ b/tensorflow/core/platform/cloud/oauth_client.cc
@@ -14,13 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/oauth_client.h"
-#ifndef _WIN32
 #include <pwd.h>
 #include <sys/types.h>
 #include <unistd.h>
-#else
-#include <sys/types.h>
-#endif
 #include <fstream>
 #include <openssl/bio.h>
 #include <openssl/evp.h>
diff --git a/tensorflow/core/platform/cloud/time_util.cc b/tensorflow/core/platform/cloud/time_util.cc
index 0587a65c29..2f8643f3c7 100644
--- a/tensorflow/core/platform/cloud/time_util.cc
+++ b/tensorflow/core/platform/cloud/time_util.cc
@@ -18,9 +18,6 @@ limitations under the License.
 #include <cmath>
 #include <cstdio>
 #include <ctime>
-#ifdef _WIN32
-#define timegm _mkgmtime
-#endif
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 948334d27b..0f8cf8f122 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -458,6 +458,7 @@ def tf_additional_lib_deps():
 
 def tf_additional_core_deps():
   return select({
+      "//tensorflow:with_gcp_support_windows_override": [],
       "//tensorflow:with_gcp_support_android_override": [],
       "//tensorflow:with_gcp_support_ios_override": [],
       "//tensorflow:with_gcp_support": [
diff --git a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
index 12dc9c58b3..fb1955edde 100644
--- a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
+++ b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
@@ -118,10 +118,9 @@ int64 AndroidArmV7ACpuUtilsHelper::ReadCpuFrequencyFile(
   const int retval = fscanf(fp, "%lld", &freq_in_khz);
   if (retval < 0) {
     LOG(WARNING) << "Failed to \"" << file_path << "\"";
-    fclose(fp);
     return INVALID_CPU_FREQUENCY;
   }
-  fclose(fp);
+  pclose(fp);
   return freq_in_khz * 1000;  // The file contains cpu frequency in khz
 }
 
diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc
index 682ad97eec..234f3c3aed 100644
--- a/tensorflow/core/platform/s3/s3_file_system.cc
+++ b/tensorflow/core/platform/s3/s3_file_system.cc
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/platform/s3/s3_file_system.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/s3/s3_file_system.h"
 #include "tensorflow/core/platform/s3/s3_crypto.h"
 
 #include <aws/core/Aws.h>
@@ -49,15 +49,9 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() {
     if (endpoint) {
       cfg.endpointOverride = Aws::String(endpoint);
     }
-    const char* region = getenv("AWS_REGION");
+    const char* region = getenv("S3_REGION");
     if (region) {
       cfg.region = Aws::String(region);
-    } else {
-      // TODO (yongtang): `S3_REGION` should be deprecated after 2.0.
-      const char* region = getenv("S3_REGION");
-      if (region) {
-        cfg.region = Aws::String(region);
-      }
     }
     const char* use_https = getenv("S3_USE_HTTPS");
     if (use_https) {
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 2caf5fc56d..148c7851bd 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -328,10 +328,6 @@ class MklShape {
 
 // Forward decl
 TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format);
-memory::dims CalculateTFStrides(const memory::dims& dims_tf_order);
-memory::desc CreateBlockedMemDescHelper(const memory::dims& dim,
-                                        const memory::dims& strides,
-                                        memory::data_type dtype);
 
 class MklDnnShape {
  private:
@@ -368,52 +364,6 @@ class MklDnnShape {
   ~MklDnnShape() {}
   TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape);  // Cannot copy
 
-  /// Helper function to compare memory::desc objects for MklDnn.
-  /// May be this should go into MklDnn directly.
-  inline bool CompareMklDnnLayouts(const memory::desc& md1,
-                                   const memory::desc& md2) const {
-    mkldnn_memory_desc_t mdd1 = md1.data;
-    mkldnn_memory_desc_t mdd2 = md2.data;
-    const char* d1 = reinterpret_cast<const char*>(&mdd1);
-    const char* d2 = reinterpret_cast<const char*>(&mdd2);
-
-    size_t md_size = sizeof(mdd1);
-    for (size_t i = 0; i < md_size; i++) {
-      if (*d1++ != *d2++) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  /// Equality function for MklDnnShape objects
-  /// @return true if both are equal; false otherwise.
-  inline bool operator == (const MklDnnShape& input_shape) const {
-    if (this->IsMklTensor() != input_shape.IsMklTensor()) {
-      return false;
-    }
-
-    // If input tensors are in Mkl layout, then we check for dimensions and
-    // sizes.
-    if (this->IsMklTensor()) {
-      return this->GetTfShape() == input_shape.GetTfShape() &&
-             CompareMklDnnLayouts(this->GetMklLayout(),
-                                  input_shape.GetMklLayout());
-    }
-
-    return true;
-  }
-
-  /// Equality operator for MklDnnShape and TFShape.
-  /// Returns: true if TF shapes for both are the same, false otherwise
-  inline bool operator == (const TensorShape& input_shape) const {
-    if (!this->IsMklTensor()) {
-      return false;
-    }
-
-    return this->GetTfShape() == input_shape;
-  }
-
   inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; }
   inline void SetMklTensor(bool is_mkl_tensor) {
     data_.is_mkl_tensor_ = is_mkl_tensor;
@@ -425,7 +375,7 @@ class MklDnnShape {
   inline size_t GetDimension(char dimension) const {
     int index = GetMklDnnTensorDimIndex(dimension);
     CHECK(index >= 0 && index < this->GetDimension())
-      << "Invalid index from the dimension: " << index << ", " << dimension;
+        << "Invalid index from the dimension: " << index << ", " << dimension;
     return this->DimSize(index);
   }
 
@@ -455,7 +405,7 @@ class MklDnnShape {
   inline memory::dims GetSizesAsMklDnnDims() const {
     memory::dims retVal;
     if (data_.is_mkl_tensor_) {
-      size_t dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
+      int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
       for (size_t i = 0; i < dimensions; i++) {
         if (data_.sizes_[i] != INVALID_DIM_SIZE)
           retVal.push_back(data_.sizes_[i]);
@@ -473,21 +423,12 @@ class MklDnnShape {
 
   /// Return TensorShape that describes the Tensorflow shape of the tensor
   /// represented by this MklShape.
-  inline TensorShape GetTfShape() const {
+  inline TensorShape GetTfShape() {
     CHECK_EQ(data_.is_mkl_tensor_, true);
 
     std::vector<int32> shape(data_.dimension_, -1);
-    if (data_.tf_data_format_ != memory::format::blocked) {
-      for (size_t idx = 0; idx < data_.dimension_; ++idx) {
-        shape[idx] = data_.sizes_[TfDimIdx(idx)];
-      }
-    } else {
-      // If Tensorflow shape is in Blocked format, then we don't have dimension
-      // map for it. So we just create Tensorflow shape from sizes in the
-      // specified order.
-      for (size_t idx = 0; idx < data_.dimension_; ++idx) {
-        shape[idx] = data_.sizes_[idx];
-      }
+    for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+      shape[idx] = data_.sizes_[TfDimIdx(idx)];
     }
 
     TensorShape ts;
@@ -503,12 +444,6 @@ class MklDnnShape {
     CHECK_NOTNULL(pd);
     data_.mkl_md_ = pd->desc().data;
   }
-
-  inline void SetMklLayout(memory::desc* md) {
-    CHECK_NOTNULL(md);
-    data_.mkl_md_ = md->data;
-  }
-
   inline const memory::desc GetMklLayout() const {
     return memory::desc(data_.mkl_md_);
   }
@@ -517,8 +452,7 @@ class MklDnnShape {
     return data_.tf_data_format_;
   }
   /// We don't create primitive_descriptor for TensorFlow layout now.
-  /// We use lazy evaluation and create it only when needed. Input format can
-  /// also be Blocked format.
+  /// We use lazy evaluation and create it only when needed.
   inline void SetTfLayout(size_t dims, const memory::dims& sizes,
                           memory::format format) {
     CHECK_EQ(dims, sizes.size());
@@ -527,26 +461,15 @@ class MklDnnShape {
       data_.sizes_[ii] = sizes[ii];
     }
     data_.tf_data_format_ = format;
-    if (format != memory::format::blocked) {
-      SetTfDimOrder(dims, format);
-    }
+    SetTfDimOrder(dims, format);
   }
-
   inline const memory::desc GetTfLayout() const {
     memory::dims dims;
     for (size_t ii = 0; ii < data_.dimension_; ii++) {
       dims.push_back(data_.sizes_[ii]);
     }
-
-    // Create Blocked memory desc if input TF format was set like that.
-    if (data_.tf_data_format_ == memory::format::blocked) {
-      auto strides = CalculateTFStrides(dims);
-      return CreateBlockedMemDescHelper(dims, strides, data_.T_);
-    } else {
-      return memory::desc(dims, data_.T_, data_.tf_data_format_);
-    }
+    return memory::desc(dims, data_.T_, data_.tf_data_format_);
   }
-
   inline const memory::desc GetCurLayout() const {
     return IsMklTensor() ? GetMklLayout() : GetTfLayout();
   }
@@ -656,13 +579,8 @@ class MklDnnShape {
 #endif
 
 // List of MklShape objects. Used in Concat/Split layers.
-
 typedef std::vector<MklShape> MklShapeList;
 
-#ifdef INTEL_MKL_DNN
-typedef std::vector<MklDnnShape> MklDnnShapeList;
-#endif
-
 // Check if all tensors specified by MklShapes are MKL tensors.
 inline bool AreAllMklTensors(const MklShapeList& shapes) {
   for (auto& s : shapes) {
@@ -673,7 +591,6 @@ inline bool AreAllMklTensors(const MklShapeList& shapes) {
   return true;
 }
 
-#ifndef INTEL_MKL_DNN
 template <typename T>
 inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
                              const MklShape& mkl_shape) {
@@ -698,15 +615,32 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
 
   return output_tensor;
 }
-#else
+
+#ifdef INTEL_MKL_DNN
 template <typename T>
 inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
                              const MklDnnShape& mkl_shape) {
   Tensor output_tensor;
   TensorShape output_shape;
 
-  TF_CHECK_OK(Status(error::Code::UNIMPLEMENTED,
-                     "Unimplemented conversion function"));
+#if 0
+  // TODO(nhasabni): need to implement
+  for (size_t j = 0; j < mkl_shape.GetDimension(); j++) {
+    // Outermost to innermost dimension
+    output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]);
+  }
+
+  // Allocate output tensor.
+  context->allocate_temp(DataTypeToEnum<T>::v(), output_shape, &output_tensor);
+
+  dnnLayout_t output_layout = static_cast<dnnLayout_t>(mkl_shape.GetTfLayout());
+  void* input_buffer = const_cast<T*>(mkl_tensor.flat<T>().data());
+  void* output_buffer = const_cast<T*>(output_tensor.flat<T>().data());
+
+  if (mkl_tensor.NumElements() != 0) {
+    mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer);
+  }
+#endif
 
   return output_tensor;
 }
@@ -748,9 +682,6 @@ inline void GetMklInputList(OpKernelContext* ctext, StringPiece name,
   ctext->input_list(name, input_tensors);
 }
 
-
-#ifndef INTEL_MKL_DNN
-
 inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
                             MklShapeList* mkl_shapes) {
   OpInputList input_mkl_tensors;
@@ -763,22 +694,6 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
   }
 }
 
-#else
-
-inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
-                            MklDnnShapeList* mkl_shapes) {
-  OpInputList input_mkl_tensors;
-  GetMklInputList(ctext, strings::StrCat("mkl_", name), &input_mkl_tensors);
-
-  for (int i = 0; i < input_mkl_tensors.size(); i++) {
-    (*mkl_shapes)[i].DeSerializeMklDnnShape(
-        input_mkl_tensors[i].flat<uint8>().data(),
-        input_mkl_tensors[i].flat<uint8>().size() * sizeof(uint8));
-  }
-}
-
-#endif
-
 #ifdef INTEL_MKL_DNN
 /// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
 /// If the input tensor is in MKL layout, then obtains TensorShape from
@@ -994,7 +909,6 @@ inline void CopyMklTensorInToOut(OpKernelContext* context,
   context->set_output(idx_meta_out, meta_output);
 }
 
-#ifndef INTEL_MKL_DNN
 inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
                                          int idx_in, int idx_out,
                                          const TensorShape& shape) {
@@ -1012,27 +926,6 @@ inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
   CHECK(output.CopyFrom(data, shape));
   context->set_output(idx_data_out, output);
 }
-#else
-inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
-                                         int idx_in, int idx_out,
-                                         const TensorShape& shape) {
-  int num_inputs = context->num_inputs();
-  int num_outputs = context->num_outputs();
-  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
-  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
-
-  const Tensor& data = context->input(idx_data_in);
-  MklDnnShape mkl_shape_output;
-  mkl_shape_output.SetMklTensor(false);
-  AllocateOutputSetMklShape(context, idx_out, mkl_shape_output);
-  Tensor output(data.dtype());
-  // TODO(intel_tf): alternatively, call forward_input_to_output_with_shape(...)
-  CHECK(output.CopyFrom(data, shape));
-  context->set_output(idx_data_out, output);
-}
-#endif
-
-#ifndef INTEL_MKL_DNN
 
 inline void ForwardTfTensorInToOut(OpKernelContext* context,
                                   int idx_in, int idx_out) {
@@ -1051,27 +944,6 @@ inline void ForwardTfTensorInToOut(OpKernelContext* context,
   }
 }
 
-#else
-
-inline void ForwardTfTensorInToOut(OpKernelContext* context,
-                                  int idx_in, int idx_out) {
-  int num_inputs = context->num_inputs();
-  int num_outputs = context->num_outputs();
-  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
-  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
-
-  MklDnnShape dnn_shape_output;
-  dnn_shape_output.SetMklTensor(false);
-  AllocateOutputSetMklShape(context, idx_out, dnn_shape_output);
-  if (IsRefType(context->input_dtype(idx_data_in))) {
-    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);
-  } else {
-    context->set_output(idx_data_out, context->input(idx_data_in));
-  }
-}
-
-#endif
-
 inline void ForwardMklTensorInToOut(OpKernelContext* context,
                                    int idx_in, int idx_out) {
   int num_inputs = context->num_inputs();
@@ -1090,25 +962,6 @@ inline void ForwardMklTensorInToOut(OpKernelContext* context,
   }
 }
 
-#ifdef INTEL_MKL_DNN
-inline void ForwardMklTensorInToOutWithMklShape(OpKernelContext* context,
-                                             int idx_in, int idx_out,
-                                             const MklDnnShape& mkl_shape) {
-  int num_inputs = context->num_inputs();
-  int num_outputs = context->num_outputs();
-  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
-  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
-
-  AllocateOutputSetMklShape(context, idx_out, mkl_shape);
-
-  if (IsRefType(context->input_dtype(idx_data_in))) {
-    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);
-  } else {
-    context->set_output(idx_data_out, context->input(idx_data_in));
-  }
-}
-#endif
-
 // Forward the MKL shape ONLY (used in elementwise and other ops where
 // we call the eigen implementation and MKL shape is not used)
 inline void ForwardMklMetaDataInToOut(OpKernelContext* context,
@@ -1132,10 +985,6 @@ inline void SetDummyMklShapeOutput(OpKernelContext* context,
   AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output);
 }
 
-#ifndef INTEL_MKL_DNN
-// We don't need these functions in MKLDNN. We have defined equality operator
-// on MklDnnShape class directly.
-
 // Checks if the TF shape for both MKL tensors is the same or not
 // Returns: true if both TF shapes are the same, false otherwise
 inline bool MklCompareShapes(const MklShape* input_shape_0,
@@ -1202,7 +1051,6 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0,
 
   return true;
 }
-#endif
 
 // These functions do not compile with MKL-DNN since mkl.h is missing.
 // We may need to remove them later.
@@ -1279,14 +1127,11 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
 /// @return: Tensorflow data format corresponding to memory::format
 ///          Fails with an error if invalid data format.
 inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
-  if (format == memory::format::nhwc) return FORMAT_NHWC;
-  else if (format == memory::format::nchw) return FORMAT_NCHW;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
-                     "Unsupported data format"));
-
-  // Return to prevent compiler warnings, otherwise TF_CHECK_OK will ensure
-  // that we don't come here.
-  return FORMAT_NHWC;
+  if (format == memory::format::nhwc)
+    return FORMAT_NHWC;
+  else if (format == memory::format::nchw)
+    return FORMAT_NCHW;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
 }
 
 /// Map TensorShape object into memory::dims required by MKL-DNN
@@ -1330,23 +1175,6 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
   return memory::dims({n, c, h, w});
 }
 
-/// Overloaded version of function above. Input parameters are
-/// self-explanatory.
-inline memory::dims MklDnnDimsInNCHW(const memory::dims& in_dims,
-                                     TensorFormat format) {
-  // Check validity of format.
-  CHECK_NE(TFDataFormatToMklDnnDataFormat(format),
-           memory::format::format_undef);
-
-  int n = in_dims[GetTensorDimIndex(format, 'N')];
-  int c = in_dims[GetTensorDimIndex(format, 'C')];
-  int h = in_dims[GetTensorDimIndex(format, 'H')];
-  int w = in_dims[GetTensorDimIndex(format, 'W')];
-
-  // MKL-DNN requires dimensions in NCHW format.
-  return memory::dims({n, c, h, w});
-}
-
 /// Map MklDnn memory::dims object into TensorShape object.
 ///
 /// This function will simply map input shape in MKL-DNN memory::dims format
@@ -1389,43 +1217,6 @@ inline padding_kind TFPaddingToMklDnnPadding(Padding pad) {
   return padding_kind::zero;
 }
 
-/// Helper function to create memory descriptor in Blocked format
-///
-/// @input: Tensor dimensions
-/// @input: strides corresponding to dimensions. One can use utility
-///         function such as CalculateTFStrides to compute strides
-///         for given dimensions.
-/// @return: memory::desc object corresponding to blocked memory format
-///          for given dimensions and strides.
-inline memory::desc CreateBlockedMemDescHelper(const memory::dims& dim,
-                                               const memory::dims& strides,
-                                               memory::data_type dtype) {
-  CHECK_EQ(dim.size(), strides.size());
-
-  // We have to construct memory descriptor in a C style. This is not at all
-  // ideal but MKLDNN does not offer any API to construct descriptor in
-  // blocked format except a copy constructor that accepts
-  // mkldnn_memory_desc_t.
-  mkldnn_memory_desc_t md;
-  md.primitive_kind = mkldnn_memory;
-  md.ndims = dim.size();
-  md.format = mkldnn_blocked;
-  md.data_type = memory::convert_to_c(dtype);
-
-  for (size_t i = 0; i < dim.size(); i++) {
-    md.layout_desc.blocking.block_dims[i] = 1;
-    md.layout_desc.blocking.strides[1][i] = 1;
-    md.layout_desc.blocking.strides[0][i] = strides[i];
-    md.layout_desc.blocking.padding_dims[i] = dim[i];
-    md.layout_desc.blocking.offset_padding_to_data[i] = 0;
-    md.dims[i] = dim[i];
-  }
-  md.layout_desc.blocking.offset_padding = 0;
-
-  return memory::desc(md);
-}
-
-
 /*
  * Class to represent all the resources corresponding to a tensor in TensorFlow
  * that are required to execute an operation (such as Convolution).
@@ -1494,8 +1285,30 @@ class MklDnnData {
   /// @return: memory::desc object corresponding to blocked memory format
   ///          for given dimensions and strides.
   static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
-                                                 const memory::dims& strides) {
-    return CreateBlockedMemDescHelper(dim, strides, MklDnnType<T>());
+                                                  const memory::dims& strides) {
+    CHECK_EQ(dim.size(), strides.size());
+
+    // We have to construct memory descriptor in a C style. This is not at all
+    // ideal but MKLDNN does not offer any API to construct descriptor in
+    // blocked format except a copy constructor that accepts
+    // mkldnn_memory_desc_t.
+    mkldnn_memory_desc_t md;
+    md.primitive_kind = mkldnn_memory;
+    md.ndims = dim.size();
+    md.format = mkldnn_blocked;
+    md.data_type = memory::convert_to_c(MklDnnType<T>());
+
+    for (size_t i = 0; i < dim.size(); i++) {
+      md.layout_desc.blocking.block_dims[i] = 1;
+      md.layout_desc.blocking.strides[1][i] = 1;
+      md.layout_desc.blocking.strides[0][i] = strides[i];
+      md.layout_desc.blocking.padding_dims[i] = dim[i];
+      md.layout_desc.blocking.offset_padding_to_data[i] = 0;
+      md.dims[i] = dim[i];
+    }
+    md.layout_desc.blocking.offset_padding = 0;
+
+    return memory::desc(md);
   }
 
   /// A version of SetUsrMem call that allows user to create memory in blocked
@@ -1563,7 +1376,6 @@ class MklDnnData {
     return user_memory_->get_primitive_desc();
   }
 
-
   /// Get function for descriptor of user memory.
   inline memory::desc GetUsrMemDesc() {
     // This is ugly. Why MKL-DNN does not provide desc() method of const type??
@@ -1626,17 +1438,6 @@ class MklDnnData {
     return op_pd != user_memory_->get_primitive_desc();
   }
 
-  /// Predicate that checks if we need to reorder user's memory into memory
-  /// based on the provided format.
-  ///
-  /// @input: target_format - memory format of the given input of an
-  ///               operation
-  /// @return: true in case reorder of input is needed; false, otherwise.
-  inline bool IsReorderNeeded(const memory::format& target_format) const {
-    CHECK_NOTNULL(user_memory_);
-    return target_format != user_memory_->get_primitive_desc().desc().data.format;
-  }
-
   /// Function to create a reorder from memory pointed by from to memory pointed
   /// by to. Returns created primitive.
   inline primitive CreateReorder(const memory* from, const memory* to) const {
diff --git a/tensorflow/docs_src/api_guides/python/image.md b/tensorflow/docs_src/api_guides/python/image.md
index 051e4547ee..a2c8c3c3c9 100644
--- a/tensorflow/docs_src/api_guides/python/image.md
+++ b/tensorflow/docs_src/api_guides/python/image.md
@@ -19,7 +19,6 @@ Note: The PNG encode and decode Ops support RGBA, but the conversions Ops
 presently only support RGB, HSV, and GrayScale. Presently, the alpha channel has
 to be stripped from the image and re-attached using slicing ops.
 
-*   @{tf.image.decode_bmp}
 *   @{tf.image.decode_gif}
 *   @{tf.image.decode_jpeg}
 *   @{tf.image.encode_jpeg}
diff --git a/tensorflow/docs_src/api_guides/python/reading_data.md b/tensorflow/docs_src/api_guides/python/reading_data.md
index f316cce953..4594887349 100644
--- a/tensorflow/docs_src/api_guides/python/reading_data.md
+++ b/tensorflow/docs_src/api_guides/python/reading_data.md
@@ -175,25 +175,14 @@ For example,
 [`tensorflow/examples/how_tos/reading_data/convert_to_records.py`](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/convert_to_records.py)
 converts MNIST data to this format.
 
-The recommended way to read a TFRecord file is with a @{tf.data.TFRecordDataset}, [as in this example](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py):
-
-``` python
-    dataset = tf.data.TFRecordDataset(filename)
-    dataset = dataset.repeat(num_epochs)
-
-    # map takes a python function and applies it to every sample
-    dataset = dataset.map(decode)
-```
-
-To acomplish the same task with a queue based input pipeline requires the following code 
-(using the same `decode` function from the above example): 
-
-``` python
-  filename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs)
-  reader = tf.TFRecordReader()
-  _, serialized_example = reader.read(filename_queue)
-  image,label = decode(serialized_example)
-```
+To read a file of TFRecords, use
+@{tf.TFRecordReader} with
+the @{tf.parse_single_example}
+decoder. The `parse_single_example` op decodes the example protocol buffers into
+tensors. An MNIST example using the data produced by `convert_to_records` can be
+found in
+[`tensorflow/examples/how_tos/reading_data/fully_connected_reader.py`](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py),
+which you can compare with the `fully_connected_feed` version.
 
 ### Preprocessing
 
diff --git a/tensorflow/docs_src/get_started/mnist/mechanics.md b/tensorflow/docs_src/get_started/mnist/mechanics.md
index dac00498e1..71eee4291e 100644
--- a/tensorflow/docs_src/get_started/mnist/mechanics.md
+++ b/tensorflow/docs_src/get_started/mnist/mechanics.md
@@ -47,7 +47,7 @@ training folder and then unpack that data to return a dictionary of `DataSet`
 instances.
 
 ```python
-data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
+data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)
 ```
 
 **NOTE**: The `fake_data` flag is used for unit-testing purposes and may be
@@ -364,7 +364,7 @@ may be instantiated to write the events files, which
 contain both the graph itself and the values of the summaries.
 
 ```python
-summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
+summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
 ```
 
 Lastly, the events file will be updated with new summary values every time the
@@ -398,7 +398,7 @@ method will periodically be called to write a checkpoint file to the training
 directory with the current values of all the trainable variables.
 
 ```python
-saver.save(sess, checkpoint_file, global_step=step)
+saver.save(sess, FLAGS.train_dir, global_step=step)
 ```
 
 At some later point in the future, training might be resumed by using the
@@ -406,7 +406,7 @@ At some later point in the future, training might be resumed by using the
 method to reload the model parameters.
 
 ```python
-saver.restore(sess, checkpoint_file)
+saver.restore(sess, FLAGS.train_dir)
 ```
 
 ## Evaluate the Model
diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md
index 8b6cbbcd17..6544a16f2b 100644
--- a/tensorflow/docs_src/programmers_guide/estimators.md
+++ b/tensorflow/docs_src/programmers_guide/estimators.md
@@ -187,7 +187,7 @@ est_inception_v3.train(input_fn=train_input_fn, steps=2000)
 Note that the names of feature columns and labels of a keras estimator come from
 the corresponding compiled keras model. For example, the input key names for
 @{$get_started/input_fn} in above `est_inception_v3` estimator can be obtained
-from `keras_inception_v3.input_names`, and similarly, the predicted output
+from `keras_inception_v3.input_names`, and similarily, the predicted output
 names can be obtained from `keras_inception_v3.output_names`.
 
 For more details, please refer to the documentation for
diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md
index bac385c02c..16753c931f 100644
--- a/tensorflow/docs_src/programmers_guide/variables.md
+++ b/tensorflow/docs_src/programmers_guide/variables.md
@@ -205,7 +205,7 @@ methods:
 v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
 assignment = v.assign_add(1)
 tf.global_variables_initializer().run()
-sess.run(assignment)  # or assignment.op.run()
+assignment.run()
 ```
 
 Most TensorFlow optimizers have specialized ops that efficiently update the
diff --git a/tensorflow/examples/android/build.gradle b/tensorflow/examples/android/build.gradle
index f7bdf8b816..48f566f825 100644
--- a/tensorflow/examples/android/build.gradle
+++ b/tensorflow/examples/android/build.gradle
@@ -28,8 +28,8 @@ buildscript {
     }
 
     dependencies {
-        classpath 'com.android.tools.build:gradle:3.0.1'
-        classpath 'org.apache.httpcomponents:httpclient:4.5.4'
+        classpath 'com.android.tools.build:gradle:2.3.0'
+        classpath 'org.apache.httpcomponents:httpclient:4.5.2'
     }
 }
 
@@ -75,7 +75,7 @@ apply plugin: 'com.android.application'
 
 android {
     compileSdkVersion 23
-    buildToolsVersion '26.0.2'
+    buildToolsVersion "25.0.2"
 
     if (nativeBuildSystem == 'cmake') {
         defaultConfig {
diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar
deleted file mode 100644
index 13372aef5e24af05341d49695ee84e5f9b594659..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 53636
zcmWIWW@h1HVBp|j(AqTBoq>UYfeAz~Ffed3FfjPKhB)ea`nl;dGoUKGK6OTrJp%(n
zC<6n72(m(7M?X(D*WeI6U$@V`XHNTg>*`(P_14uocjo-&AcHH$51xKHqkF>htnXQE
zPaQ_CS8XZNo-B#d+##;I?8%a(6Nk1+y_977`l*N!$wDzSm$5J~Fyt4dqc{p(4L4Lx
zdQoCZPAXod!l+8iixLY8Qj0LOWHhCuEoNX~xXQr5pp0FMOMZD?PJUvFilJU|PGWI!
zZI3V4Ap?Qd`x&ND+GYAp+}GRY9h5In)U$ESan9lN^jx)fHaGu+g-1jRU)wyhl{-_j
z{`+H21?NKtB$AwJwwX^qUAs~>ao5(h7sEted);A8+-AIU+dh+58najHN~pE8mUd~Y
zbLm#Tc8T>qUSGeuhry}Hz-?Er*gbE54{NFGhcxPTg&5^?e72uZA}L^7vs}LAf)bCD
zn*^JDd+%Z1QeD|vv`{{Jul1S;69Yp53j>1-0Y3&;7MG;v1{CENq!yKArWOYj<yTZX
zmX>6s=9Oe7Czj+FK>}fEaBjYkqd=XQM*H!Lk5(qEoqFq9Rmjt>{EG=voV}f#hQ6zO
zee81+nbX&mr{qukwEjzXuE|ICAB%f7J4{tz6n=mB+#8$EXKbu@e}DY^`g?{`6<JEX
zO`P1U%-NcE%r^G*ikJmQTze-g9K9o3iSPJ<ZH?7DC&l9)tc-2j5p0v<aplmmgostu
zjTM(|EyX_R+JDnbx_+4LJO9ovU+P4BR%{bDTqFNQS=H}b>J7o5N+qTDTjJlY+Qu>E
z`U9;gJl9?;2yA-xSwrYZWh8&<>0HlyUs`WZ-s2onuK$L$?!@fbpSi;%rbRow4c-~2
zF*|vNI2(_&-^PE9^-uB?rdZpqOnP9)?6J={*n8(9c46zgi5h1-Qw;bIt;^{MpC{-4
zj7cj~Q_Lu0iBxflq0Y9cuclc2I`Ljfk=JHJK$Ca*2lgEgvQmya+&tuLb4gZCy5i7=
z-}Q%6Py7wDWL$G@4uf-<!<WhnMeVf`mwwG_yZehL+OJl})l)wsw5z^dVxF((ANQ;>
zBR8dxi0tCXsgovmv;^C%GJLXpi^Zy!uj6JG`mDJapXOL!c;w64OUhA;=6X+y6A{~X
zUf{QnA5Ub|<YtBAsZnbanQ~&3XFKry6x`_**!y(9;=g--bCM7G-8yC&6kqU?=h^bk
zqNFu3brF3Bc=CEq*{VxRXC_|W`lO9hS@%}-!Ly9Hr4#D8!%yCNvNZ6B=!_kHVmGho
z2f#}mP`-WsCZc#10|Uct1_lNt?5WNZmbD;h%C+|(*C7L*w*O~dJ`)k#!xi^6>EoW&
zT+EIEU*fhKnR!{iDzTf*E`Ok1$a<<I$I0n`etxdGE6*VIqWVP@2S?o^ZB<i$j>#|8
zo`;{CqH!}(yfyxmNpyNp%VuT8h`ULFQ)Y&x^V^=Tt~K1a;(LqZmOqhaUppU)m_21_
zn|jcOQy&%yw3$jB`jD)@)9dE9Dcw6%dj2hb7ToSo5Wv2(MJ(oYu6OC%8((tXAE;+W
z^(o8Bs=F~v3=DxR3=F#1ed-P_utJMdi#+mkQ+@LDvr7vgp6(5f_Lp)L`R6t<_S)B=
zH5yyDWks_@-;8=2diludRh=Om3Y`8at(@!hcBe$<{0ZNyl9hGoxZM|bQ`^Hq+9#TK
zpWJ!2d|&aiGiTo0+t)Eg99q>Bc;;lqq6eDuJUcWw65`xqesl`BZBTf)vuOguUV*AW
zx1bE)L*AU;Jd>k;ef#C86@TmGy8NYIE;;(0pH_Bxeo0%ws~Z-kCZ{*7jyhNDyL;Cu
zgS)-jsheK*%sVUYU0pRXG2`+^>3NCOtcSCT9_??v{+-*-WJlVqW#1<*w4bM8z+yI+
zOSNZ-&+PZ++3k1EO1F!N*IJ|=){<DK`KQd_f$U7)HkVU{tD~-(YKdL9+wHdZTx{<0
zr*bAccE7$-ajMX7R%ur4j$4B1620nwbZ7N#%Z-|2@G0tMR{7E=bCyLvOKMcS)orXf
zDb&TlF~+WPlV`^hK}VH6K^uES*}kUnGS>Tkc(-vyMYE{rcY(ASFXsJrySzo~PAW(2
z<<%A4<yY2E`@+4tqMx7VbGZzA>4*Iuzdp>EzPB&n-I|9Vr4A=dt(Ls5DD}QaQEY#$
zs?grNlNI_3a|<WD*iq}%mT`8SfzG|oO|}e$4)Y$jOT0T?edU<W-Cu!KvVS|eGAD^1
zmB>6a`L27t$Yb+A3W_r=oF`2dx#(ehiJ4nGAwGJ7#>oV2i#~RhQ27Q;8x`3@CURRR
z`$wgGF*#g2{e}L!zfD!G4ii_f?Q3bCKEcZQ&=0oA8P{8tTzHhiI?8zZJ<ln$?R<Sm
zNK8ZRo9g9P``1m^jIId|=MdFj(89j*pXHSg0-eP?_3b*}4&}X3bt+mGTJ{YsL&`jg
zy?vCGf#DYy1A{U4Wa?UxoKsqyS(fUSnUm_6oSa%*?3<XEn4StP^{0k;7YL`y{Lh`G
zmboie(LpgFhoN;#K$C{p<)&#jk}{$e@GfN)-G0(6cGunB*}mSj3U;r4mwtFLcd?PO
zZUp-=Nq6_-%~p>aKOQ<NsXtFbU+&2L?X#!tljZv<_y76(|G&)a|NXwo%fSD5vck57
zo0$Zc9J%z=y<8wp=KEt80lSi})1HN|I2VM!$#k?-owiTGZ}I*9Muu}Nfyy1G7SczI
zE6hBC&$AZto&UX%#Xv*Em@%bwmXwQbo7Tj`1p-p%!jo8b$uF*xsGA{lZAqWH)%jAD
z;J44K{7Xwzeyg?wJ)df*x=h<m?^<^BtclqZ1Ew1EHm#K?o4-grlv8w@*{WHtyH_hX
zdU>rlH+$DSlbowk!6lAgwq~`Z$xjQA^Hr1nrBWfBHm%E4^K8KG?#0ckriQVVhW;=%
z?z*=;V%Co79dXqWi>F4M+ETP}&eD`pQ?oqHpL?>7-qKI`lH@2IY;6*odphNc(?V~<
zDRZ&}SNIol2;EwgCo@Y@xJdWi+KlWfon)z*i&9s%$$D$-(zDrSa;E#&j|Y{e$(Lu$
zSBx&3eO97haB^Ono641)lh}CE+e8lsd~JH%nAp?dv-0ypLu2m5jtf(m&R+S?ufWOp
z@Vx1!V?jM@EMKv`o@93Z=G(|4!Uw!hJiDXgIJ-c9%G%f&|2zNW@XE!=CT3gREt>Jm
z@tX75+fvdkwHH@E<vFSsb>)4Q@A2trUq94dJz9F{K<J@cfvHIWzAw@oZ6mLQM9)|F
zQGHFOE!unA?0yTM#-zhG9K{})Yn6<(^%L(Jxk*H<kKAIk)acs!JEhgFn|d@p8Oa^;
zlia-_xXH%eOH%*9qz#21Q)D%>s=~IWh$NiWNa}gJG5mbb2?_BQ4ZRaH4s_S;QF)xY
zM=9~Lm+SNg8;_;N99dl#GwGvL-1LbDg6sAuKKAw56#wq$J58yin&+Do7aG0oD?TFA
z#UYbl_(f^js+6{?IVmw0bCP;_W`?D#+o}9+j%$vwk<0hOReRq$)gA9IPu<i0{qGNl
zW9=t?f1KFUKQVpYbj{Y5Wxr<?@*RFOZ&HAmsOkDsE5C8Ne(}%DTEyGke%^m#Uh0%H
zUoW;jm9v`vAwQ*4YuA*$A&WyC4A*XZIZ5SB#I~d25<;?8R+Fx-X=46&rfEf$I@dO<
zQ*HXb%^H5wm%W~)eP+4F2?L?c%KLK7CWW@uyk5n0raRYn$(-kJ8%5VnmD^G2aQW~X
z!Dp-XELg4e`gO0I@WiXGlUJv%YI|^-jeA+#maD>Dwr3<`^{&@$UJ+g;{i2_L`o0}g
zir0UCHuZ4AfoZE&do{XU)0<h#_QZzoPlWpq=2HU4%HA;Cce!W7p`@ZV_xS{gGIyt>
z*X9MePjVZKQtxC-Emt(n+sWmhd^dDTY~hr0>#VqSw|C^Zv+{;t{va2mv`%W)2Jr&F
zL%mPtv?%kRH+-{L<Zi{Q9}A~5o-}5Df8d?wl=BD74S4PQ8#O~(nqrm(Sn}@N-FZhd
zWbU*_ahh>;yjRcKY07c1?{jC@-@5OZ{==Cec7J)!8n2Md;95KL*sUGrZ{|8_#K|pt
zW##_s-T6eV_59U41D57|k}`|gXRZ9k<m`g71uP0%CeCX;shPvoTB)|^?B;}RckUF+
zmI^%*O<Z*6m4u4gakYnciv`yGP?C#@Q>!qJv-IEaNzM4D<+8%=nY&u=&#jfOJ0$!3
z-}cSZEobiUs9Lvs#&?b48wY&MgEn!p&-mDEv-F4gTKV5U^Ir+_Otfk`KEd7i{*mS#
zEnz;71Z=ln3}=d8EPH<e^ZiE}bq6kgx!xI{@ldRC`^n$Ud+bmC?%rel<gfF*6-E3^
zTWXr;ecRl`Zs;7}Kk1m)xv+W5nZM{*E>AmE(ecAfPJL~U(SF4~v15Dq_X}NCobuRU
z>J!VK>Kgx5G~<F^b%uSpE4WQ}KgUur+tYU+Z+TI^B5O|{yGl%(d1-5kKI`fg&$eah
z_A;w`zE$1ZKgZ<u*87}GrWswmb6ax7?IrG#T80WQci4QBM{T^E;f-^f$i%?#lMQF%
zB^=Usa?a07%S<mVN-W9D&(i~Ug?cCF`U^V>9RGj&`L$-driMj}l-?@NS@6J^lRwlj
zlt<K~#(7=AH07s#cVbOH24^Z6er%OLz@Iv2Z}fsSw~v?`Z#Mos|L@zF-@nhFk7s!H
zL8IM!#>oV;ZkZmQcZK%H&)S%O-&uL!tdmrCrcKI)Z@Km#-h`QNh;2{YP?%9?#Cx2p
zXYJXq%XwPw{pxxd&9ijH@?@*r#h#|ib-(X5Zf0&<crWDhfjRLJIc5D4m*U?%&JgNf
zbnlAs?TyW<Z53O$ot+R-`hDfR{2P5=w^d)+wC8);YT<it-_4wxdwKKENEP9Ojc<*w
zNUghWc5B|w`x|@C&#SD-sXXTQBs}){;=Avv)Y292)ZFH}rnhHLz}l1f=YJOkZ>-wb
zzIocxz>Al)mOs{8xAM*ES?#O-e_Q3|zI#%-%T2+R+tVK^*WC~53z7G;cR%;u)@5qO
zjujowm)}-ZU*n$rzAj|zgUEkUyrMi8<Kpx5)|<ZC9ewuKvv;d|%}aTkPZ;yesZ;!?
z{n1op!u5bh%g*$+G4i_Jv-mnce(E}__(yK9FWllbVEot45Y+LpQ{?E6w+r<-;zjnU
z)L4FDPM9<QjltY9jq@k^U)vw}67+_paPJfgN28wL#5w=OPU%mUb(@sK%Gp&kMKfW_
zBEz15ux_@%v>uzoqA{Y=C7x93uB>-|_WI_ADFSo4C#OFuj1hjM`l|0^UhOZInZ;9L
zX3SMMd$!<E!4*}t4<6k2PZh|TizpOIrUY>sIX)IuD3n~{CbgHJ<G;b5N6}srmhc!Y
zX4$!x&GVE(@sgSEnS%rrCVNd-!?S4t%g)7YCr>H#dwyQR?-AJGqv;rAbfQ?*Bb0a2
zB*$r<n=i31QE|xfny`f@D7@)K)`TrQoBU>7uXQ-Zlwx(|&R=fSv>jM0oB5Q5fkBds
zfx!TKiwC{G>yen3k^^h*^oG4I77i8ppF2C{aM|g_0bX9M988(NIvPZCxU?>KE#+4U
z&}rJ`n?JcV#XQGsi>T=QA8vmS@$?-Ix3g?MEOFr6iSqv%U(3xGp6$MsR`qW0^V0J_
z=N6xTbN~OJpW+RFe?%VE>se9Z>@4hNGUv#OE_2f#s-LH@i6(zC&?^_#QVX3E_HkNU
zSPx5~+d59qb6!P$&3!8s&Sh6fU%vFliTmEcqhc|Kc2C-)u5Ycr^nJF0fwE`NVV#zc
zdD)I!nWD?OPIECiFH7C!E^6YwYq`jezT2Cf4z?Ly77}iK(^GIx%2=}K*0DEBCeAdH
z37&V_XZ~!n&s<f<bJRt>Esy269eF0JJYCpNXXA`=*Kb>rKR=kIRDWrMM%g1Sv*j((
z+MyeajmmEBNIT+VZI+bgvS)XR&W<!Sv%Du6;wEcbUUMj~RtQ;Yo%NI}&(p=UwQ;Nd
zX?OK@@kvV0XKizh{4Barqxj<z$-`bpGt#0oyRNKxsCM~KWZApBZ#t$#Jor1$<5+^l
zN~f@hysgtu&OEL3GU#5)z6A;1t<8bq7H!$yx4-UuT9SGtB_w^#f~9LVo@hxG%o4m%
zB-)U>R$x7YrtwKhz5K0GsYSa~y1YNHYSMc)PpovouUiYNHoUTQe_0TF$S1Nby5L1}
z#n+^DH<wS_!0i__<-w~eH}9|B!Lqv@Uzo4GQ+(;f^2+I5+Ezvhmvb^(rZ@GaZ<+KZ
z<K>2lmkUFG-Mh13xk$-6mbzsLJJZ5KIIOcaTZI`IZ{BpWjNiKNZOik^hq%&nlBaF@
z;8l9KxaVmHAA6-|j4W%+WX7}FH)h1QJxr=!Vt;FGlH+kDy9<S~@~eJ$96!Ee;Ui($
z`Av~EZRbm64v1^*yHXfjwC~V$t$i0i>h8Pz(Y7w2(EE7*4^PLExBN33uYB9jv{%TR
zPki&8moj?d5p!;>o51X@wm#u@*NRW3-n{ij$<LP>OZK_m-RDuex#arBQy#~&zns=D
zHT}CFdwN99Opm=vri*x$Ry>&FYyE9cjZfSk+4B|6H7h)=XMg?>sk*eS-~52(G=X(3
zTY`FZxT04}6y;4z7m%-=T6HuhYxQ%ZeOo5Z7vL^=xOvY~slKNR%QqPPdYpOKvtu=D
zqN;1NOqg4ytlndP_6_;xrj&kLF0=c9<qbK>+=$lRtu~J)35K2Wt>!iNnl<U?`@i#l
z=|2x-oMYZF`(_6hpSeN&WM`Z6_w`QfSJNo6oA~ab=CRO+ulns;KJ8Y&=PmVN#_f&$
zA^MsMO?m5H3%K%c%~)g8!0-6!Uxd@;FU}_YuH7I0GS#2|tNP<4M~}Ya+-1)nxa7wR
zzW%ZPZnNEs$+IWCKWW>dK5c=&{=%uZUf(to**H<pO!VTl?*g;uZ^^A}`PO+)X`7u}
zdy`}QiSH7(MeZEr+{r!h>3i;<-7j>KekUl1Xe@fo_pOC#pVB<(5a%pG>vxOxsxSHG
zwRUbuO;3UGx%<Dr2cEAr5uRB4BAw}P#l>i6ix$T%0ZNWDoUC8+E!!b1EpSZjReQ%B
zJ+JSAeR52f&VLoC$`o~-Gw1V@7^PL$Uw%}x-<DT+=k77bJ4d~D_@8CBc++Ub{xk84
z-)XyhV(#K^lkVJgQ`i0e`SNQuTe$_P4g1(KY!3)6Ddf_>-0k1w=fA#y_mTNMx%7!n
zKSZ4^4n2x_^!-}k9<TcKQ@JB9Rn6U<95=DQxcty<-v8_YD5Ew@D-Cvqfd-3N85rcT
z7nwf!>F^F=>*U}0mjXnN@1K5LYrWyI0-qVebGcl@+wu+7+EO?L<vG*Tey^P}d!2XY
zLXqD8UG=R}M~}$=Vg9qY=lM2Xp@_{B=FKa9Z#n<7R{g(!zjz-A_)qqDkj6B<K)dnw
z3bnvy9hcY+Yo|L?3ltnDUP$QZU(Qx8d4u!tE#t#Rcb@ZXeC7FeMqF<70m)6Vhaat9
ze%P-3-pbm^erCJ3Et_?GRWgUQMbFOV$66L%*(w`*QMNRp=;`9ceQtLGcWql&y!@zu
zY3|&G0dME6ln*%?q8s~iXJ6t=-H8EGpH8}5-OHoR7c##glDX|zQqAv>(yAJpr_+u_
zZ*A=spSNAZPvx3ga$8b_jI;5Sn=e~}9t($;9{FN5<)C)%@>_gg)1+*3+fvTYvy!qm
zI&vfEUk;y(kCdd$vWZSnPR6~}JM<qqO7RDJEbef3vJrd4`IK|6(cxqN|K$Hlk6FyL
z{hW4Z*1E2Z`aizg`Nr5S;jZ=yWZf@(IPBgfu9zs6@b)Vn|K=p$y5|wG?C64+E%QYm
zJv@3jzEks@``(gPuG+tx7Z$v{De`zpz1y5GJl8zaPFgv&)HwwG<5Zhor|@a&8V)m$
zb;@3!jFN7sc?nvc|Foxb8oP~eK<3;VPKpWpcxFWYv{srw@lRVuZQBi_9W0!wm)7ew
zY%VUC=C{qZK+u};M<~O&pAV++_wAp)UuU6q%RWEF-`p?m$jm(~>h*5%X~w9<ddXkT
z{eIEoFugoIBDyu4Gfvs#Zu9B78#43wA2{7!dQWNIHutD2-W9rgreET@lx5X=$y8&B
z<R$M`E784oK0NyNpAj`lo-2CYCCbjgkjKZspoTq31|*hbI2ISD<|e`G?x|rvlcmE2
z|6RK#S~=OGtN6y&<|OrAott8w!5W^zAFjHrUH?nV@79j&Y~_5d_q%qlKGIestns~;
zg=xxfHt{IUBU>2jp7b;AyBQn3P3!VE_PyrsGw+=-H@-i6`~QFE|C%$-Ikx|iMb{y@
zIY)zJ*z%5O?p1IOY<sHVb8}nDMxmqKg+~G>$VRKEvxPqkk_o&s<!!Xl8n=yN#~o`_
z`ub1Rr+*CMk?(%=xUhz4e$T_;!Wyah6OX35?vg$FJmp`F%<&JKc;Y)AbqCgHw&@?g
z*t-76htR-(vv}ggXWtiiIoq(t`&s_+4|65<Ngx0Iq4r>*dYk-_56+gA>Rr3J_<AC`
zqY6(H#N3Y1*x8n<6;Qlv<?UwIvxOz@2Pb+@{_CuK__J~F;mMz;A9rtmzI>T_aq@H#
z6}P+$^Q}37#@shGr3|>Ai@uTF;&c3}PkEU3v=Y|eYHG*l%gD{SXSH>GLiW!^c8_0t
zOU;fnsWEPwT6;+2m5E^G9RuF)i<*vwd|%UaVZqepxBdwpzH;2_H1{sswYnmuAN3!k
zo8F#Kx<RsNI?tkaG8^AGT`QluH#J*Zd)M5#7w#;rbKC0UJG+l>N!c#3{NLQWb{oro
zURkwm-p6#e>bn|Ka(qk9m+xPDT{z!!$CI4G-J-0eXRnm5P}sJ>^4AQt&Yq{4*TiS%
zoKBM1w!iC?d<^R)(PY<y6|dG_trjo5wz6hRinxCEIU&_&7pBXtd%X1B!@Z|>J<edQ
zJ)!ZUZ}o!1;YDALPRiYpee>em`e6G#8=i4lORwY82#|QZFgL6{T>R@)1$)8c#Rm`F
z`n9PvSU#?_^zz3`GDdsfXq;)Alzgt~)NjkzJQHT`nmu>snYTilmJ}b;%bQpcJo{L9
zL>X(Llx#xlo;T(1=Jjg*b`A=Vo3qL5+akT$DZlvmzMkY16)y-pT4^J9v_FLB@o}wr
zhfPI`E254DOFVYHmlt`||3g<7tIj8{>x(2bB#-XNTm56@i$6T}>28~}>J(RV|5(};
z-}xnb&!PuscBk*S@xxbSf2d*O^2lA2{l2MqUypG$+*jZnU$Xee+^+kN`S<)-?A-76
z(OtCu(EQLfk3&W455|jbsc@h6QqMX;q-fb{ah9*yiwYRmb3fYD)Lc4a!Gg8BtRszL
zlOykcOaHwfRO{K~Wlz<nd)!%)xNgdwCH`~mHpV8pY1}z{KjhBwwyyo&ZtuExKfA@Y
z@)7SI&mxw&{o<RCq+OqG{zgM?#gBH87fk;=^SF%9se0~R@=(S58TYc%?u1GC)00@2
z-+Sn*x+|ny*OdR-mK}`!1-dJDiL4Ku_aQ@8Fgz{J@xAq~2aNld#O}NnQlPjyYgfLk
z^X7l7NryifvT@5h*WcZ@XY=vvpEvBiz4~_W=H^MWHI%RG&Ta|*&}cO=PIl$fg1>ej
zFZ|C;t9yL1vj3S<vd!LVxAM1bhA)lkoOzb+TB_>irCoV;>CS>zuNqvWPO{%qS~+Kb
zvBuQZ+-9-wQ&+t6(Z6|3w=#P#i`hrx#s{B&z4}pV7|Cw^dDn4G-JEX^53h`=-Tx_z
z<;|sdolMu|$L^&yt$(xT@~p?NPS**k=X8q{TrXoZ>VEv^p4F-R-ELbR{jK0xqZQ73
zqM$bK_O9vnWnrfb-)+2D^hY7QzaY$_{j#XW`l?m(k0hS+lqei|zq!t9)%q4jo+2@3
z=f+0$6`H#W4JVx2)7Q3YYHbUvO6F6GZwoGOF1c}?Pp4C51IrPE+@EIuz5N5vZNEIz
zNJ^WdcUiR-NB`1lV^NoLt!4sqC+|-ZeRI>HW?hrox+_NC)V>vF@D^$$*R!5|erWbC
z_OcghWiP6N{J#dIFtb0uwD#dO4_4jR(GRyM9x^-{uxq1yrcB~A_KVG-Y2i-0-!$Lj
zdTXXv*dKGy`@}{uUYk;3|DdcVk(2#zCHtIJ`075R$>8mqqZ&&&On=^CJ-Opxz0;ij
zrJWl#_Be~m$a%|f?r&UN+&gpS&Vct<Y`3htKINT>ciA!Lso(tb7g{}246yP1U>acD
zSjZpRyeGZ!{%#v}_1{9d-#=W?n0(Fd+U01s<7|CrTRWcGimJ@^pHf&;Tf!V$+$p@~
z;z{;-Pao@)J?~rdwOGbM*wAFM)m)u2@zc6L&Kd3FxMkR>IWHqAZJD97PVAKQk3HhP
z_r-qM)!cBtuMp45cq#je*rKf|Lj2PUUpf7bZ83g5^Vm0|psRaS_GHXU>UGPgoU&8y
z`ychco1^8T3XXmjSb8kf=Jmzd^A#KQXWvR%-uv@~+*`qCb9UWuY`1-FE4%RdX^(JQ
zxwMpokVLm7M$6KUO;-4q6?$vio^6S8=P$cUv@fWP39PI=vOlEtzD48z>6Ly)MpHZm
z9M8)hxwL*^qV41tU&GEieRsKUo1$Iue%BP$2Q`mOUd|O<H~;37pUV7M6YRW~*}1R3
z9?@JmN1^XktL!Z&tCpGo#=F+_vhP1f9B-exwMOLBf3$gFi+kJdN((bEywPM}P{3ZX
zdgc|EB<AEmMtkN)a@UAF-D<DC`{qrZ!`cFePG!3&RkoW;3GjMOxFIm%gMq@Ul=4YE
zZ<ODio8+=Ks@v=8zJTb9VQc!6bVCC@FKFDzs@?W&;nuBNuZCskzka)S@Aq%_ZvWQ$
zUY}NGK08I=k^cU_Y0s+XJpVuEeNBAa-`|(rJ2MpP1s2$yn|@5aSE)CCRuhv6W9G4w
z%cT|=$*upu!vAshc~*@B!l^lvOeQnOoCsbrQ|f`zJh27VJ0fPi<YH$wnVNp$&rGiK
zpFY1~IG^bI@G^6W)Op?yk~aQTUP~;4AO2*Vb@(xJ%&AnyRga5)u9>hbXx6I2n6su_
z`X4?!wThqYIm$Tey!Vp&mGh^cEZ*5sJ*{W1?1H3qUq5qIuG4u?ToKxE{^@6?iq*jt
zr47@X#gDyOnmJ)vpvD?OuIIli*M2e4tZ6(n|5Nei`PV(&y&KLw{(32g<3sD7z=ziu
z);|4McTeT~*-Ig;72NruKa^T;r(D{`u=e>|#;k|kqW1(Bggv}{X!?(zK|go+%+hN8
z%CNO?_o0OhxzEe&+yCbL*y;S_t<D2IGuPFp|LzH%Cn%`4szAp<c7w{S9~SSPX)p2A
zn8S0yFK^1RNu8Rl<(hYd7v#;9ejXQjLA@(Z^1<pKSwHtg-LuoR+ZpvRdqxv)Z}uxQ
z^ZZ>~7OpJ4d~Dg(+2*Ee^LLgw-(9xVr}gag*So*W>X3WACUdJ$+L~h4%Rhcq=w!Y$
z%<h)+bo`V(vu0vN`tpehvo6Z`ms~4R`%-XZ#Y@i@ckj9_T6R<Jh*!PEJ>l*nUkxtj
zZ142teOdVM(%R_Q(vwRBbNe}-?#$yhdvfmJqRTz^0#v4&luKrx=?E0(l05mu%68f9
zO;>jWX4pJrn(%q8N%FF-T`wnH>c9B<v8m<G8>!JV_I&-g;h@RJn>~`cD>HUays;!X
zz(#=o<wV`8LsQ<DEWiHBXtIxStYvq;lAwn?hbqrqBdIe>j+Sg&%@#dZUFKF|*)pMB
zsmGGFPsuEw{ZVRT=MJSQ*5^XXPAypwDDg%(t=em!xw?{%u*@gJ!>exY*~l5(RkVCd
z*~{)}dfC3NGu7N(zP8PAY+dhM{`o|n+Ez_5=Jy?Ziw_xi`(=HpT-@Ox;L4I|ARH;#
z?z7<5rX1tlPo<Z!=Je!6P7C>A{6UD#c_qi1xyt|VTzVLC>Q2+i%c)<CHg?7f%iK<!
ze7;-BDb4coWH*he25a}tDs50JxS3EU<>?((#=+LL$!1p4T}|#1UfZs~w$PflEBp8y
z>+XHK%CYa8U4!HHl4VQ=r78)t?7UW8{ibzc#?8!#(v2U)loQLoJiV2bY{TcFE3xo?
z##cARl@`~p^12yKG1x83Uv)T*>6K4WSb|N^<(ykpe$sM`wRdM~TyQ%RYdPn(R^;x}
z6K!RCzQ(#`qzBb(-cqt<i~3cz!c{J^`Ul#oPI$c(ynUJVWQCTXQBFm_an<hL38^h_
z1?!JAiiBLg^KioIRpI_B&lJ<P^>3MNo^-@uLi!@j3{&gmT~oaz7RMS)TGs8kOr3Z2
zM)3ukj&w=)xLom3V?EKaX5y7?9H&#nlwXu@NNh2es=DDh(NkN-KQdC`)|%@_gahwx
zdc%2-BS`8+$#!Mi=MtsPN_r>DN>BAzOxdvT;*^WRDMuvQK7YOWAbKKe<K9J@Y|}bJ
z%zNfrt3FU^+WJxBMNhfitH&2t$T?4086oJu=5*CnpFGp#8_O1KUA`qpTBJukx+F4E
zz)Y!a$>o;rr3QhX%ig?Y{`Bay{HvxUPUZ{qHr+M-aM(%1UUc`RmPf@g$(q*%f4ZIV
z`jT3kcw5ptd%|R?RF_F2G84}-3Ry@hy)ORMy7lZOX|t5%fR=Tdk(({ri;{IjO*eVQ
z>@=GEqszV6-?=17<KErrlbdI(@{TQ@ygP*NT7qtWubRJ$j`)S#?sL%_5*ULwzvi2E
z^WCvIU7xDNJvIl-a&=xK#-;uJtkL09rN2*KvygP^Zn<!3qwuM<jYeC<=Fhq2S?t|*
zO=w}LsaVwQHZlFFN5$tIdbIJ`)jju)I>$`ooGzMNR-U&d#Y0zq^R`VpuKGRadvx{7
zo~)V6E`9#e<h6fcd8n1oU#-7$>K0y7uw8tq^o!)wbqljYt6JnjU%Ks+>p31$aB9){
zRa<7~1-z{KwdT<7(0z^hAwSjbE&lNQ$>X%C`46K*|JmH{di(Q;*IDK@o-b-0pXz^_
zzQA?eB#sx?ww8vj2p0Lb=~VSkGnTS<EprN31*;q%-VWW@ynoTB@SlEDzkiH9q4zGe
z`B3@lKc@e7o%;TvZtDA|`5wjdPkrwe;$QJc;V+N$!3&RiSI$Uv>&-cow(Ctzw7Ty0
zunW(ot31zsxMSj#q(X0J$E=rWK9`egXG}X#Cp}#zY^~`s^Z2t}*|Hu_W?k&AwYs!i
z_sqH1tKDWA9;vVW{q#lH&)GE&Jl`gqy8q~}kF}qM%dB!Cokvk$CVMXUbofv1)OU(A
zUMSSNKXp$KeYZ(l*EnT)tnR&qJ5OF%c-ZmbW$z@_jb;a~l=_9dzr5>9Y}>Uw3B$9O
z7T?{Jo^8AAYG|y`X74X9A)9xVaQ7}eTdsTHdG_5-3$dtLDV@9Odt^3*E<G4FSFZim
ztSf70&1@+YTBIx^JWX}hyoQXIEo?nE-dsApwa>+7)yr8^Jyegz`SFJ8c4h87x~y%}
zsxWQOX-AA^22Sn@Xq)qH%~9u<pVnO~6FHGlTGnfp<?z&<wYPs|O^1B=56-FcMUKlq
zt9x_JdFE!li5GsW)UL8|un+#pVZZd#<xgfD^DoYBdg8EK(wlwZf#;Gr7t~i3wjI<9
zu6y3TYf<49#d%M?r?2{>^IlNPJ=E^ldCfnuQ}?SU=*Y=tXLd!cPGn4+S>*RxZq@TG
zeHDwsCcmhTdng|A&(3~I=dHk2ncxeltILjs)J=>RS{XO(+0;a_Q({$G`%hH|1b5i0
zOp811BOSM4<sasmX1^b2n=SkFt3pd?o3m}D5bMV1{0@n#pn_#5cSa-$`MDpNDQm`7
z{bh#g#TkcW4#rz0nVBmJYJ5y7<#{^yl>L+IqW>nIvVS&z#h>6h=44wx)fJy+^UU9W
z)ZSh4Rn)%~Uljj4J=`ApZ|Q=^;eY&&U0Qs~WNJOH{O5cYYh!n-<i8t!<yPf+os$)^
z-=m#Z>A%$eqC40Bu&4Jwe%0E4zFcYhyozXtpR-?1oZx<Td&V2~r#<Pp-=F^AdAe!J
z%B8-(pXSteg|leL6rPgts}IV&dBVEsrLx*nna#_m?UGxwb#mshl9v(hT<k9PIz8mr
z`1Y`K*xrRnCJmK0Pq*HkzvGi+-qzr-b9+@jzkK=FXYT8pN8HT59SY!*v`@Qw#o40s
zLgICqbI-U~(`(MevYk5<aQ~XVSkpi5*mKWP)pzpDd^Bm_guV|P+-lp`>t#2&t$ep3
z&+}(9AN#U>1yL;dnKNT1`N++i`KJ1CVOY)E3$>nsnUl5e)mUsSiHa?g;n3f>S;ioJ
z%C-J^^Y<pLwAr{W@ABU;8|C9I4@K>!7)|cIn8)qz65l>|+S}qY8n14<?>-(nU&;LB
zlVwJkah$?c4>koJJF)9_dQ79p92rN|!+w5O&n4dK{I}~{ip``=Je|uQ&Q6inV*YWl
zSy$;hR|4}ANvm^yg)iGIcRkfs3iozfpILq5qs6{ASHcy-jhQljKU<u0W$tMU<C_yS
zZzwd$wHcqc_&HTP!l%jaYn6_iv!;0Alv!b~iZ~OVdoQp{j5fMH!MR+1vU};NV@qa>
zZHxPzQtb8N<eiNy{uM=2mCIuiQrnaKctoG4tbQ2YbNTN1?~ATZS-9|v@5C>^RApwr
zeZ636veUB_-l`|3-?WxKDJOktsn+G0%Tp$aO3txlR9P9Vx6*8T+W+ShjW--R$9VRm
zoQj`_l(S9I^Btc80vsjwzP9!g@bmrlHR4j?{@1#J8%(aV9L`VeieVQBimfV&wMjH_
z>(MYWW-!)E`x5o?{6y(Avoz5e7k9q=<XUdEed9qz^S)hcR-W8?a6|VB;fJZKq;*#|
zC>I)L)$tqcc3d7TKUFZ$j`gM2uC5vDj)lwB&f<Dn(fi%WGW)jwoJnuh`_5GO9lRs`
zQf*81hf-OMijQ-`?HpL2Hj89v%b3rMlT-L=(5t`R!pE!fY2v4;;j+s(+jIElJ6B8n
zym0QQSMRb*k`of;&Mp($GuP8RdC8rHp4%_1b@?{?#F1~aJ*KAb?~IZAQa5j3gWSYt
z9IPG(i$h)QePi0wg1moDHeuACoW0jE-(K|Ev(w35Yx(~^Zr<lO>sFZD7uL_(+N;=B
zteC&d*8OOI?}W`fR@0h`R8!V2k__QJDSo49UFnY0r!lVaOY|KV)mY8m=Tq&d>frst
z?8f!xtxrp=?{c0^b2wjHBA#}3>C6)Ido^E7*L+KQdVM3?^H6vBJg0aDWpfcfd#8h?
z%_ULMM;Xp$eAM~uH_zq#kL=Igdn$itriB{ppP#1pS*-HC<-h5t680_U@H%Hz@Td7y
z-K<z{-RE0)Cxor8nD(>#kK_G|zcUV|Sjl}6{V5(_+~T)hmqYB!<(T(-P4rgyL|yPt
z&*9a&WSajv&tBf*Usb@%<&zXQuUU99W4qIv?hK`^j8eBB9C~ZguD9&x7Us5HE$5VE
zpK)CLDYC3L<wEKeN43&Rq8Yz0l<T_Y2$wk=JiXypoW#loy#-6Z@b2pWv#8|S8uJT(
z4TO}~R?975S3K%+%k};eW`k@+)t8EEJ~X+t>~rNmyk_-*ueU2cKit{Hcd7irvKVH2
zr~YM(pLyI^vUHzr$gcCg8eF1w*!(hkW?gjR%I}d{*`GtU$tf<_{Yb)1d!pC3<?XB8
z_^RhEdS&Cqd}1>50__WG3dKSTd>QR_ub5X=+;DHw)h$Qa*ly?dA5^N0*g1{w;^PJ1
zSx%-3FRz~G;=j(XeWm@{%bPbe6y&a{n)vFpHv5WY*P>Nd#21L`vMGq=>BcO%u_%1Y
zw#thecWl+&ziR7Q@h@FkTYvUn7CLx2=)GW+naJki+~qO6ZPzbYSR`pF8Fob2a4(wQ
zE|J6VDSun?fp)v8YzlRDY4dtl>^T*-*>H85vTf>fnM&b5&u`}Y-!7cA`ig?g6OFIJ
zHmcWNv1M9rsF}v{{pRlsL%FTGq3+WwZBKvd7U2CpF)Vb-{uS56zkI#iRl6jNt=9kI
zDVtlntmQrGwilf+-<;dD-Er;hhqJ9$Oxf48;hz@EzUFhS5i=R|S#Q1H-dv&;dSI9P
z#yYElx-)Kfd47Dc-1D@%Ad>a<rMfT8vnurUU)wH!?)jzWy4l<BAFAiY6<pHX&#cG#
z;=e$LRl0m7KW|WBwClrFj8^5c+j%Zk=I%>hx$bQ64ykq8YuX<_*xz=IF{|X$ZK)^b
zJKp`-{enluB6Y&GLlM5M;$JQ=KXS3+OK`FMT5E@Fwv|hBCtTZp@((X_$wP+RDLxmw
zSZ8O68s2}lE$V-Fh)f9Ac8dW1U(vraUxz&r_2dxWDzhMy?{;p5?DIWwcbINWeQI>y
zaMDg5vACw`&KLG_eEcI&)L}E<fv@mD8K>ytyZo>3b9diu_kCVheJshPR`*ez>9sA&
zhpGjhUsv3h@96X1{fJG6Mg_;WI*BcEhkpxi_>%aCm#ObK!;x}^J@+4b*WBhU_@*GW
z-(UaGtLursD?8)Zj@z1a$1#fheK2i7_)F7@ozu!L@xN&QuqkEg^_T81taJANz0vlq
zJ@R}#$BM4!%P&ZO36{R)`l9l1TiLvrw58_fa(@Wjm>*X0QgGLs!b`tb?%lb_;EMml
zJ?t9yt1lEANLRI*^)Ehf-f$M%7GL%(m*z~qR+IdxY8Rr*BA%XXt(<kfc|y>JoqG=T
zwP)G%l+Fv?thJGIPF&vM)kW8o9%MJ{+0>X2{YmD|+ldheue3TlS$zDj#PdZcVsB4E
z1*=-MsOB3}l|R!n1+R!KoYoM_nYPz=f6EPF0l7&RY$J-*{&MjNOq`y6cI%wF+}Yax
zdA~33zUTTt#c@OV1gn(&uS+IeRegDL%9or+Pm9_V%d86e-z-kLxFpDK(S@b+lN?@3
zH;b?z*?qgMm-m7w^Arb0fiGU+MlYllydTW7a(FEJV0qK2jm*dM#h2|~ayfrL>)+#i
zj*DeWEE61mM1C`Fh^Y{sXMWJjy!VFE{X;Kqn{ZaKFZ^lqB7#%*{KD@O4DYiqz2AII
zUc>J{gIB#n-e2X6+7_$V@{TW+Lj>y2sV%I$k#&FZ{X^TVH=O;)w_5e+`?I$S^=l{G
zv-sAu>Oh{*tHu0|SD7WhH5`^#tXA8;E%A^4g9lHpMPE=C(46_k)%RjhU~AN(+?n6=
zW(7p1h6~xU_x+blc;PCt+)b>o-tl*Sz_;}pssDtZ-np;vsy?-mVNs-@_kwGT(MN<d
zzU|w){p`l_9R7qLpZ_V(FR$sC{O8O01@Ag;bXdySO#Q@_lksn1(2;cEIf=$s=4gJ=
z&uruRn|(`-%Vo-0lik}Je#F1Le_1i8dHI=B(~msyiz-#KShDel{essoj_wPN{$nn2
zA~I&SACukgKLzn`S$68Lo_hWwSINKG|Bm<eeSaYF@{j)C_*-YB6J$M2>t;xP-SB*Z
zw(9bXkLj8}9Z%jo`**|U&9i^Me1EvS_=DxSC$G-gl$*)FKexN+4(A$!;(4<TZ_NG1
z+2dHLWAph&x0S=EJh{6ET~}}ytl9MZ;(un;8LPVod^(S?FfdHz#JQ%*DL=oYxTGkt
zz&9~7FSsPJs3aA-#<n*sGWvFyfbF48tgD0$<QTFPFp11Q>KE)Ky}*RCb%VsDglSew
zx;N;%xlCK3Bm82TUgmG<U$>_3Gt+NdU*miKxBjmy^%u+5r8Jv=y2@WZbEb9ixifo{
z|NcCmug|!rQ9kuYVYr)qn#vyi#4tDQXDN3M@9AOH`IP9{#%?HiI^yBszg0F5CbfT!
zm?|*cKxMIDw$QcIoP^**>|Z?!%=>?AINE;v>yLnA?mr?9secXlSrqLiK1nEusU~>V
z!kd?wY!-fKV<<f4W60QRcs4<M=NYrj*RJHwRrxn#X2@6GtCxQ4QxJI@tE;UbW4yM+
zBzL9eT-W(gum0I>UR3UT`P{87kzuCFd8?<LUE|4kPbZ%11%K|*Z&#R2SFLullG?6&
z*2907(Al1InO3t)KCB3vrL^UWftbQ>#ph>v5_iVbML0CCjt$Xuns358TlSXmu32Y~
z`IsK{kSOq%IMcUo{V}hQKLu?8)=G?LRX55C@}K>B`pi93&GxyfY+|=BMqcXMWwj|s
z-~Z0VI>XAfQ}dR$E!+A0=3$lF+f6T@@OrTH#>JixpR<xDW78~Cs+>eCSEcB^%kV1=
zIsHnVJvy{%uH+H{F_tTtTP8eCJLk2@@_ERDu!l-_nwu2XX;!drXDNx9V8!UQ((NGM
zC809Wj2n|bxHR_lFg5c#&Jg-0TEpe-C|06g^+L+^&MMb0Opj04rk~%aapGaah0j-n
z*LJ<ptmtxP^nL#5_wDKsPEL0&yTi|2Q(8?fv|itu+xLi@HU5z{>-i6+ABzt6iyiKF
zYm*bM^gd+HweM(o$eYg$t1S;-GkUWofhjHh%$#{#Qp;qotbAj}eQUSQiy#iMS~l_b
zYuHQ|YxreO$<4JsZp*^XzNuM|_54TM_HWG%2mk-v@}hSRv$o@tt!KZ6YLx_Q#8llC
zS39k}HDE(eXymaIo7O5_KN_`t*-TmcQ@3=2|83}!)AF}iKj|iaNVjCj`+|+^BFeD^
zUFJ=bcCC9G<+C)sTyy5;9g83B+p4oh`{{$tXXdT^_{Z9LJ;(J0mv?>m$L@6X?DDRJ
z#;A>|ch(iAt$yY6BeeJNDOankTeAui>=s(rtTySXzJL0Vh|9yQi4FU@%z~PiOy<}%
zFXp^-qDIVBZxyAzi$1Z%%=@=l?QV$XVq?jx%i0~}Zf7n1CMbBvP}gXcLWFX^DaSn5
zbuVj}ufO<zjsM?ghX=a7$6jygzNB;EjjUDMhbFPo7mF`G=dixuzxa_<0>k>tKK;Uq
zn;y0QSF`SXc6Q;!<ST3E9GWap-#+6*&$EYT4~yG3PI3QhD5Z8p_ynV2|8=HC@h2Z&
zYB7KCHD2xYv6khmz8`I7sn;;hcMz&SWbE^M+F5bFD8A*#IfWM#%-!<-kiohnp1WVp
zUT~IY*vogOQrwGCExy(8iNk%B--1nV4m<Z;>iR9?+_IzF<oiRm+Nl*M!-ex6`EHWZ
zo={erKQV8PijwC&HT^IC0VnPnx_-I1r=;uJkq!IbO1!Ns(dpka=UY#AZqoPjFJv=?
z^NUth#(ZS|^x_ZazJ~ADpD+4wdVl@gMcNPdN2$~_y-WV`r!cIsjEkE~c*$K2!99+z
z3s2cDxAIMFW+`tvQBgSam0_o*)pegI_f^-n+cm6vY#Y@nb1%|s+8mb;P4~+l-TBFn
zS{!;@+pcn%i-AE|f`LH;`#eK9WQnP3MQU;>e2>}G@X7?4P|5$kn@;ALU2e&nv`te}
zM>Y4-S=ZBdrzv{|OP#oyb7|J1YbW2CMBaGQa`UDf2bYG1)*=}t5f|6B+Z|uBbu=sz
zJ1YJDx827>%=sU_pY<`@d^?x7{>|*~d%y2}|D*PJ_3tOe{PtW4dxbtd)~&lEcp$o2
z<KW!7JEAU<d~6FJ<VEg0-oKr-V7+`k|0CIobOF&F`W;>}UmnLghCEJSwY;x;xH`o`
z<#6@WFTYbN_HU?BXy8Bf-Fefe#F@1!4U#|a=^u7y{xPFQe!|<sMyJ^fwC}U0R%kS|
zKebjreEe^PN`w2+uH!9#g&pVg`FG9jKm8`y=ywUjd_noXA0Jyj$V{C3JLOxXy`A`x
zn-&rWCV%={!w}E$pqsHqvq7Kz!&HWSTBrSge%z|CtL5$MPk-BZ{6inj{xQwt=QjEK
zoQuToi>lt|e)RiC<BV^I73x~#**^;ZF=?2ux#IpMX<5m4Z>3~k%F5iZwYsytI-{as
zMS4YwkK>B>M!D;+O}r8$EE{;*XNj7bX2hl3C6}g6+f<fuvS(dF>b!<^=Q8@;o&G%w
zcz(j{{Ijk-XJ2ePcUStHO?G+7miU98V)r`lUaZ`F*tS*3)|&NE)8%D`YiEZ!`D;f!
zn=x;r(8<1qcdX8?k9u%xmdB-kTJ6S(B^<hZ?=GJasg*qwaamGZ{gm6n;;JbBqgxAS
zZ?s8U@=ZleWX)^iGo5{$!QJX#`)ZfDsclYP{`u{;N~2|Isy!n7SKk==NR`Z#+q_N1
zr?0Kj;o#A;Kf^@AD{X9Te}9c=JGA1^_1q&bBTD0ymhz`8J62lk(&uf{J7e0Ng)=!9
z-gRv}yR}VmVd=U93z_DeGFK1cd|ku(Ty*PJftSe@8=M=R5=-a3o%h$U_Ll4MbYC@z
zy~mDP?#j7!Ww~ka{Yf6i*2j&NQm*)DSS+lZyI5gqwCv);dxh3)nP`~I{W0za=QAFU
z-ye&Q-M_MRhwdc_)5&KZ+EjJ>Ej2uQ>9Msb+q$L(-^^)CXIn1MQQ3A*KiJW=#xWxF
z(uGIP^IUSHL}oQbD@;rhe03|tPx$<oWiu9^`=n-HcW{OOi)rHgO$Vp$(FwV5>5o&;
zwe`0r2=Dfut#7+aRb=uNZC$0AI=il?N{GF^yzIB>M-O9G)xrzQX9~M4S=PZPHL<`&
zE5l-*6YJK;u219_yLHbx)KhR{n`H2pos&N7oS3lZx|w|Lon1-aR^`pNG+59%rMPQO
z^qM6*_PL){dH(zD&hANDR<BsmurhvP;`PqWT;-Y?w_kUxNtTpcc6GkT@{`*i<QQaK
z?+O0DmG$d&y-%WR{{5foCB8><-GZGB_Z4r)Y^q)8yn4nF{gt~Sd{5k4pW?CNhiTnm
ztJs1Hi^-dWf~(eT5Z$uNtW~_F{riO(1(#=)1Y6`y@R1CQxPSgqX8Kv36H+qM-@Q1V
zA8ONkzw^-j%~|4+wpwM6Q?-t5@IJb<Vq@3z2TQg19(ajrN@)FSun&s76TCh)Dk}V=
zy3OK8r8_*2&UcNKJ3b-Jch!&Xf<K{VulLNp=-E3x!^C*$qx`;}*`YStIj^R6zSLN}
z&|2!B-tPY=ZWplp4>tR!DsAdoWis)NnbFA|z0<yI`mj&wz^r3acyo3i_?j;|?Z+>9
z!9Vdf%O6#q6FHEuz3chbcS|Jx{Q6l_7`>&dXM>*pndj?#lq9EL`j&p?iGg-xsL0Zm
znRf#jW&}5SN!xzXdw=2VmbVv;-e;8uC5yiC>_~{WUKltv$i|r8tu8<O!;iwtUGJH@
zdsSuop6m5)n72r)$aJTh`PR1CT)nz6#k#p%*()ploom|a`}W~PPusQa-&H4_{IEmL
z?5k$7zUVSu<*RaXUaxqnEhd-wPR=`(y|$J&_)F%|=i#SV_7;h5=lXYg@7Gs5lO+6(
zuGeB*dT2dM=+Ypr526Ntx>jj<Ptn>Rv{p-9!&|h3Pa<*m&I>ZxMm0O@4g}7gcP%+7
zZw{Nt**TSOBAV5!CM#4m1ibscB+>UBo6MIr^QzvIU#aPS>Kc4YCwXV=o6OmFriOpw
z)3%&*dT+(~ulIud`ZezDd);+cA|!sU-kufH&7Rx)wya+Ez-s!VROcMMRjbrudv*r2
zO!;|n&$(A!O!s1}ABO#!5$!nd*3=ngS6DB{9lBZAvfG+Z(DI+DnNg<H?;`(WTwR^V
z)Jj)ARxOhMd~y1wIj2>=UyH6dc>U>(#eVyaRx~8)Z031%YDd<$?hwb@i}H%MUp(<Q
zQ}Xl9jXznJ9tq4pbSvq6GVAinrDgtEX;Z|v&FgwunfCNAcVyIrPv5+~tc;zn#eA8Y
z@jP@QFO!J3l+d=R$LkUedkl@Dzb(GKuIH$oN<UZdM(-^*x_io&>RLDSy5679`tGsF
zzu#+F1YB&lCC><%62f_HvfRfD+8+H?FW4?w+k82>q`c}yu;>00&g)%0^2*LR7wx<q
zfA9Ub>DTS{hpuR7E1cIcQOd#6pHchyf}XPrQ*?y1=C<bZr6zCI$e-C-e{7oc^N4fC
z4#p*Yr{@IUNPazGW=3LYf|9y5<NmW(#ZD>}h=-k7n4}}}#kT3x#!DVw99L|P_%!21
zuSjTkYf#@>kxOn8RyJ9;W3Hz>ThY2_r_9z<W%D#1pUO!Q3l)j=y{Im8?2^ap#1<vp
z_CVoG>z-R;Ql;v7*_pKhr4!kA*mkEz>TF-WQ1sKuMRj7@n=LLzW*1xwyP2(SnXPV}
zotRYSCANC*(crm9-DVf|Zr*k+GJDhYS^NtV7oTraestiXQo)tqX%|1;_*;_9`|gl!
z#B-*NqTZHy4gIgLTF7p-Xb!X&R)46oZca1DIn}L#dvCZ{m>oV+xr6a}u$%hQn1<>?
z#Z51^G1@d=xYPY~2ha3iIY%d<V6!Qk%~o{TPUg|tn9h{1`MgH5Xv&IXm44r(I3u#<
z19PgGzLq_+X_%#RYv-kdJf|LQnK`XBp)DsjdgWiG6<bP7^**v}xwv&v&rjQ4?Tl8>
zQ^)gX{SNc{URKR3aKm}w*T@`;rlrLeb0c@xiKU-;bVf3&G%S&iH}GlMvKJj|gGya?
zXzlGXvUELfbo1#GUa$Ky=hB$w3Uki&=8eqZ$$ccinOnB<bIz+jHm{hb@0#E&Hdo}L
ztKi+gYZvOK`h62#^<swp7Q0!l=lz1Vu8qv8<q)5m@LVzE+wFHbw>9T&?FinozGYfk
zk><u}t!!d-oXl4b3w+(Y<FQWu<m9%ed#A_6_~swI&++$l`l2^S?LNIvE%o@o`b|29
z$LmAZo9(ktFiAQ7bbqkuqgRsR!)Vj?dxlIkh4tKT_qOcc=gh%(Vta38wA12M0q)1F
zj&0QwZ<)#xyvvdGYFJ=m$IX!A`OFW+m=Bv>SzYA1_tZK2m#SMUEZ1#{EkCnsmi@lB
zcm8c^jOR=}uyXCcLw-{~wpc&dwd%sm>^q-j-~BYQjIa2ib^hU|I}fu9=XQHea`T?f
zxz*|xyV<W>QLkoxYyK{ta^LjGr>JQkN_R3%DVm|bTOiK$i)eM3t#VM(`MBompK>Oo
z-S-Hnv1CyB_s9H^)rOn8M;(^9Ccd{=dZGQbt*Y3GOr8h4Wouk7`L0{Cc1p-Ak1Soa
zmu~(G`lhLDHZKZGI=$!S)JpD^M!N$yMBEfL%lUHkci)0N_slO>v81jwUj5F+wdLpD
zT(`&z+h=WZ*xfWG!Z%WJ&rG>Hhq^zSshxRyXaC{m<m7YHj?K~SU8ieQE$q+Tr>&WI
z!|nAonJVXZU#}#XKU!$CU9kV>n^z_4mov${`=)Sf!sWXUUWH{W{&4@FJ!-?aMQX|F
zO<W8NP7(|ZTG$)Lkd5Qe<}qlj0J;<>g1tiIxadE2zgw5HLM=F#9G=3=ePzPAhTe?_
z1R{kLCI~D@Ys}p<N9^T?mtl9_20ogWwERZrWsi-MEo}WBiOjK3zuD#cMnv7Oyz2eg
zmwT;$rk9sm-PLu9-^YLU?VifNdn@1b`rCc}u$bXmcdypAhkByYAB=YHiP2jh@WD%z
zRVbz*T+`z5@zsATdhhHFJNmj{yZrYbAFEjN0|M1Hp8DWcS+Ty${NY)ydy5ubV%#5k
zly8skr<&a{_Y|j>uiC-?KIlUp>-&$7>o}{{cRj6`-}HTTL@WP_h_=mKash$LJMXmf
zYyFrY&%bKL(e}_KhKl>uo_?ATBlj)&RDb`9503?R)`uLl6qP@ktR;8+cu0-R{D6<1
zHVYpFbJ?v;GM*RmQL^S*|9Q_@=YM}>WsRRQKYmiX|H;Gp-fsRsKZ>4wvAV9t_u%!V
zy5%RIO4r@dtKa|f`1X(*>HC2nV(s^-ckW;Ri2cXHn7Sp8j{oTFn*S)Bt8U#Q{`~?g
zZdESWx_9;Cg8Ka9jfZzv?Xu?k{^s@TH#fBfURW)gf8ns9t)0-jtwI-noDgfAWM({X
z%a<Ld3yuf1IcW6n*}HiC*3GLI+a6wi$o1y^`!_F6Z`7O3dRAHMj#RJv+FT~?*RR-F
zYI2_$ES*~F`uNzrD>np{FD{qwlnnATUUZ!8i%ZD6*ahdMdevC3mCfv&_`$<TzEASu
zWX{@}J030Nw{AP8UtjC~*yNP?vG_*S()Lfj+m^oW6#TUMgN0Z??77`vTO3w43z{Cb
zUS!z3eCcD($?aiRJiIy@CK>pyw)MJmsL*zz$&Zaz(dRvMr1Ff9AN%rSgGt%^US~H~
zLF2ln9+iWsJ6ZNFpFGWQo4&GEx8rRS$x4l9J493e*tpy(YTNvxBW~WAL~n7+Z%fx?
z=-4n%FSFY7Yvmig;+`_GV`nA`u6*Mf7x-cmzi0R}Co4b6iBnRA_8;S}FEE={Qhj`7
zhheT?tDVi9*UXJmOE)hztX5}UZIvX`8~$X$_2<WmCa(6~*7IqXjfrF2Y1xE38xH+t
zx2pC(uKPE{E%DKhWS{7tb3G-+Lw8*gHBL~OWajpCOV6Cu6^9%P-Z6)IN_ZMi?T}dO
z`AFi|j02B)^5%a0IOS=B=PFrG*W|<OJspCw+l?~Je&^_#Ux~Hw{dDHOo&KVoMNV(G
zo-KHHdeR>5+d8aSJJ_ygNgULb$~)#O)%Q>>Y%zDN3h(|SmrFv|SxcJ!n`*tpS2M}1
z*GSXk57+*%57NB*kG}6Hj#==aLM!V+y7d2k?G+Ed|7dQrf8@{gZ_PvfAKH_%o;kg}
zG<oseUpw0NKYTCsudzPM;}IY4@{c*ZV(&se==1(RR?jl6YRCHEYfmG3#Lqq6aMwn%
zf95ZP?1ti^pu?XFXMcHQy!oz~??(UaZqbuHlY2x>6+iwv-KyuXmi<%cnoQ4bY03F<
z-rIXcopcgbr7mYbTdFZT{nVnA-2sAjz9Jn?z8XC%J_MTvw%m?zTbH;)*p%t$O%*56
zqm%rSch2zd4R`5O^KzZ3bjoz22a8DH!cMP^5rW<(fvT%sZn1o){nvJ~K6_B1wpCgR
z-%=?@SDmXzT!Z582hLjAnVPioz&0~mIrFs_HaM2|7z#Q~oH5U*@ABq_i_?^ztddT8
z&LQhNBY7dm>^p+CnJrn@RkCNNFU>qZG1=|Nr6XJ$gS>b$lctJ@^V~MysrfMGX{240
zu3lQk_gRY4s!tZREL+VI>T$GPbLpkkOR^Wat+{w4Do}evr_G%x;jY_9)uzapD_vY1
zm~0`^JHNetv(hV*!2ayWdA6sxm9M8Q^Dw{deA%w>si}9ew8+I3ou!L9uLn-ZjS!yx
zz|3{c^zV<wtRC(;>JpSc;jN;WvH!Q@`5m97zWA8;&f4|sr^!91%U8b_o&T7?CuV!O
zw4`s-mB&W<a#ts>?VNaJhTwL+B~fkb=WzD9xd(B^tud`kHNP)$^Ap#bmC5_ARrIk&
zrj%Vw)|N6{^JZOy?cI%^*jbOxO=ej)%YE^QM~6=BSZbH9-@anXRjt)=3-rXNA6vKY
zgN9>jr8s-B_3q%cWz~KH+g`3c($9ao^78K4PyKZ^zdif1>gUD}a<1Y}`7~Q%&DE3F
zhZ)}Gv+++4t&46*x)F9kv3<{`W5F$#du`6|+10B1{L$y#)~8u{<#dX7zt>X>@Zob6
zJ#&WJzwKvbYWkLW4{}S47HVvsu6t#}w^v0eSA>pL_)K5yrF63SZ0TpN#n%nOt?h$m
z#hWqbPJVl2<HT!`IRb0A-!A(+E7G=>eR5B%Qr29)=Z^y3bnP^hu|KhWy78osH>Vvh
zeKzIKs`KT?xZM3>XRb6iHn-N@o_+euk_Yc1SLjS$Yr!AvcjD^JbFZX*XQ<DuwlJ9O
zxm<YLy%Q@E_j~j_*0go2*FC3JF7<lauV$A7MTyIj$F*&m3Qsg2)1J_6oAK5<MSX(l
ztCrUe_g2KccP*dy@$U1UqN}Czo(DyJ^l`L1+y3Ea<@vCEhnnS;1v_VMN;Nq%ckyQC
ziMKz0%Cc@gJkR{@f1A6<X5@Sg`RlaDShncxAA_F?ix1!Vf9Zo%LHW6=yJa(<@vba9
zw(woTUxu(m=53506gwDsid^>1+Rdu*s7@n$eNnwZcKo5g3)j?YYPQr|E7;pN!Qjab
zUJ(-?2b+Y&GS8win8P1bt((=%AnX1kx`m^y(>1BE?NJwJb&U}BE-Cem8)kRszZc<Z
zQwzB9i2K*20Ji3RP3{lmqa-w5$$nzJa?hme@`bbmszNbZ2b+U6lv69c9XB{!(&PFi
zb%|q(h~J_`uAEveVtxxJX>?zXnB*2ZLH(-kzXkg<g&cQA-U`~H7h%%b9d%{FErZrn
z=Yxwhb)HK_7<Nt-=I~9t8gRkB@v88S3NF(pTh)5E8c%!g*6>=&s!BI^?b5@q0$bU1
z+?Dl?TD@^H{&qPxJlS^dW<j=v%QtXtPcqJP=Z<SyYtgLzQ6eh$eXq5g(!D_In*L9g
zmR|j04;5X0sycsCke{|V!o%|WW40q5Tjh@%wpr}0v)!9F$y?Df#yf+FD|Kt3hHUHn
zOJ*N0PE*=$eC0KZpOMn?980FVf1a&A$6L~}N9@D)Y5V3ly$x_)kjM0O@`C2oF#*Y9
z8OIcthVW##vs%5FB)cupTPAGBM6Uz?tkR^Ho)X`r{lUn5bJxjr7k`$UsqeSi{Au-#
zZ`>yyJ>%&P6JLKS%%ZpSPd1kc`=QJjqsFg0-|X6aBQ*8K$FJ`;?Vi4Fa@G5F2ii=`
zmKA17O<%e(^L(ww%UOq>Ej-2<Q`lz|#T0$%Miu`mRfB@1?*((Dqi$TV?tCj;am_M(
z<~7ck(reQ~{qD!S%i7dr)DU-wjpMDe>aUqr%|44(SJv~V?Fm0Q;cnOS3H#=FevYx}
zFW6tByL*@2Spl&R<qf;oZe7Z_fB6H8S5B>!jB1%dX?dAx(B#<O#&afR<?Cl3yP<4&
z?d{XR@XI&FD%*E8?JKT+vw82H7q-t&)T(Z~aPZQ}+;93D=AOKN_Wj*&>yyv3{SyA8
zwdk3u4r8d#lBqsNrIr;d*Gey&zCPi~c7xn<<HYX<kxP4dr(E#x&R_m^lh@WqtQCq6
zwK;1ACr1cuo*`GUQFgJ)yMnj((wFP~y&<6Cm3;s1ijqlFXaCKVnP4~H<-O07xo@^;
zD7G&3`9G5<A~9b2<lRGa^}e6g`+iCD%fw7hRi6d+=KXnR9tFKB65Nv|^m-}!4B5?d
zf)xwc7#OzjG2lEL3bOPZcIw*P@a%HwtD<+WOGS3sc*-y`2DY;pC}`jC;^9>Ba}u_0
za5Pl(dvwe<`q(9fe&cz8?#k*5`4{Xtq<n#C>5OuDfedfys5KT#c>BxNuDe?I#=VZU
zenF~yZ_4RSkJP?K*?qrf{CwW#_m;oyzMTJGQzUo5?N6=buEK~T<t{bhN4~q*C>-io
z$et{ilH8#r-YDtJlWbHGQ1w&e*wU>o#)^+RIHHfF@GLG^RFE1l;o$ehACByjXzrhs
zvFuiX$nhf&cJRn&9p?XV*z6C7z1YVBk^YWf7bWVNjwe;bJnDJWsWGqTk><;J0w1@W
zsaJE`a^CGj42!yQMcl&LQyZ*v&9iST4twZ#+DtSoMXg6uId^BK<af<2oxbO^elbs5
zUJ#l4Br;j`e$ia!dA8;!4gY>=tZkmN#p}$bb)OeZ)vs*3p;?j|qd)&#wc9zf%kH*|
z0}S?l@wo8q`s|{*MTTcS?6|l)^K(IFkfI#RW?uFAa?2VgKW_=EzoS)_7w{rOEcJrs
zueRLoyJn|1{*mSCSD$VaGovMK@vSKRs=)5Ra@mjzzwXLx-e;6-`79u_=**XI(#b*X
z6>g`BE_~3D44*n{O%Nm3uLX|{nsP<D)l9GJY@BhfH)gwAx=u*-rqvtw8P;zy2>%=4
z*L5eUR4YbK!svHd)dlmNT-Sv^dmgq<s%rdMbljx)DC74P$-CYB9D9Xkz85okHYIbi
zzRR_^=W2(WxWvwv*0x^d4zJh~5dCylo91mXr+bPTW%8$Iv7XE}Y!c_#wr!nP-FxZT
z2AhR4Z*G`c&a?l?#n<PKvXvH|xSDhFv(It=@5aGQOCoP1Zhw+`HsNEH>3-X!V?yuB
zHf@VeN&G(J-;UG850+ciwTYMJ91N3{-Vo@%z9YI|b)WBzHIL2itXs^le=)pPLsma|
z`=P15EjL3qy_xg!<qylaiyyi7oT~b|!@b1k^bY>@FDqp8UsMDiiC$DK{a&M`nyc;k
z>mN&3)VIbPN3BgN+2v>RSFNeMbWeM*)xG2Ff`7t}zyDAzEB}b!>R-oY!I~wH{eKj*
z$iJQ;XrKD#d7?qWt!4Ut9@1)8zV=<dD|yDPAndrhh4{hvl0Tx)XYOP^=Nz_BVVlYU
ztNTkIF8|?k{P_>3<M)p|ZP3}3tSL9yx7RczVsnZhXHnfLnM{#&`wC>5&Q>jZIGg#&
zyhGELvAbT@-ItNyI%(7K>Cb)Z*DG#ddgNjuXQX!4?bbQ@v`4oe*1hsGvosZ3xoXdY
zBP-ssX|=wdeAaL6vB%5LYVlq13U55kdnZejd5(5x@$-m_TfO(~y#C?%ObO-LrrkE_
z<{y@vjrqiRD@F42&aAVXGtc}~kXH5h{N{R-$=)ENJiGXl$C}=RZvPW>(>vyZWN`9s
zpI4%1UlkU}6|Hx;SN3uOd$#nZ_e|Ca|2le~J=>K%nK%4mQxDfV4Nvz)Z_6u{%|AG5
z#WvTT{@-@jpz`CweMt-3Hm-fO^x$N>n^*rBb?RR$s_v0Ks`yzVHp6e}%7c4C6UySb
zeAazj)_rFW<6ZVw^JGf5KmBvqZN2<?=i4e5-tFhe_<CmN4Bja24UtS={@%}@Ru#TI
zwn*wJ4|m@Y#m_5NZ0O@ZDQD+=DI+(3`flEY+tIm|heM;{il$Ec9<%hR>xt(j9sg@@
z9J_h@=Iw4VmmBwHMA<V>-Brn^yYii6OAu!+Ta=dIJV%SE8FL-8U-2l(U);oMtr2Z0
z-&4cBH16g7j{HUPpDZ5vI%!wse-vD%dL{a~fbEoD61u56*6lYI8fS33RY{(HtgUe>
zqOz@DG|M@4&8%ayzNQ{ozHt_p>Zu2ZQ#bGUQ(|EJPfF>QX2!y|vX^H>T>39s{I7l2
z=?GgTyY#};s|IzV_Q6Z1PFxTw8P0xxzv8ZUn_qA&v@Y0|wxlgh>^jfK%TcXsqnxa{
z<5pe1p|>|%)Xupidxzq&(w>PN#f*$ovL?7xh+XfW^XvWc(ETPabRM&H9{YbSGAwoB
z?Ac5!bQ>2}7)i8A{SA{9NDGhsx8s2HjQwk)RJWLQixlsvQ&~87-3F&sR$W)8?Dd%B
z9DQOD&(7i*d)ZIS5l%Vl`RSQ+*f*QC{~PE2FfV(k{bjWUTL}Ao{_Mi(yJVZQn7*%g
zd}77q8;j!(PN<q|BfRX*<Mj)K-`w?F)@D&``z}epNB`u^-G&^#JA$4D2Khdes|&SQ
zQ}!?}DKl|TeIkFGz@q1&JNn~ItN0`&YIe`B`yRMB(D=^2$#vh^W1hqXiq2~bPQ6o8
z#i`W3n&H`<0J#-6<d1kP{VT>7y07=qZ6Ago#S^WirtV*lwr4p@A#1`GMh1p3W(Ece
z1_lPu!KTndOUofso%$$ymQ{T6lM{1XGK))!GLuS6;0NqX4T|=bb`<${F758slGTM<
zx7~`~QrVIH`qrapZ$}{(#VxI3@--ShlN9}S-Q=C@+x_3`V8Dlk_6OucZH(J4tq`cW
zF|+u$Uh$37`^D=S_8eVpc_v{k2fx4XmoqZwYxjH!RQr)@dZIsMzQO$$UGtT~BKNEp
z`dN2pmA3DPPsye$-UygU&3t!6R`*tCPkrXa_ujJgf<HgrOHimj>ymY>Qa)tWr~a0`
zufBZrym&I_^MnW2Hf>YUF$vMK=dJc#xu26w=#bkrp_2<QoO<$jQFf1zSnm3B-?nD)
z6;EB-@<73N={nV>rIjh=tG1rf6{?HAG4bbr)!p-gN<Tk)*JX1&+Eh|Ke)Gb-ldo0g
z2;4N=!Qg7cA&_8uKqig($BeHV9V^P~``;Xk@^rb?X>Y!J(^O%u(9NelnV7BlJpbWG
z`(TF*n*tFjXNQ$3I=A@ph4_EYdH%*)W=D;_d$e-E7iqsQRV_|m(pbLtyyg?rX`dN*
z=1S@nejP=}PVLmRWF4`e&AmO-l9d<dZIMe%T_gJ0x8}UdbB#Iwl)vW$Mjv`mmEhx1
zQKNoiFQ?s3<~I*IZNqQs{&;(2uJ|9JT}%Pqu)}vj$6!uM_L7$5U|{g(XJAmoo)TQ3
z2h2i-_FBU`>qXKf>(VywHn!%m6cdWjn6QjD=baMQq%BueCY0SaIjF-K8&J5(`lim^
zGdbyLoFTb;SAEUfDi^vWPBglI+O`~x3y(rWY@*hNMgI~1BYx%i?7MFS!_Mgc?EC)v
z-s9@|&#RxGo?1V@p7lW4pWxKw)(y;-7w_mxMW;U2C<uL0=yPll7whrRIR}`v>{|G{
zpD8DuZ`P7K<epmNbL@Di$zx%ySsy$_`5y--{k+U27x+=KCd_1h@W;v;$D`sORWI5|
zEqZ%gq$cuc_=g=m>$QH&>N@+Qis9aQm)%YID=L`ohkVqX6S5)LXP<7*9HB|UT=POc
z$g<uK{b0*_Kl}qTtN+O#3%f4=nAo#C)kgX8aUliA?wQLH`_oE~8_ICXeiZJ@)_Jpi
z?VQBjl7}C@_|f@rk9GcxJhhd(r|B*%{D0+%Z1C?#AC(uE`}yC|b?owtu+5s;w|e%w
zo_S}bd6m34SGz4ce(amCZ0B*lw~I5PH@75Q^IFaub<-m6=F7${YnN|2c3{!k=PwEq
z?nKK>x^Sd2%;HSMvzC1c9vuDhSNmTwN8a_}KN4%5ylkg#%+}mv8)VB$3bT)$*-@?1
z<Ffwo<EY~6Uc1s%ez>m{xoh5=?vt#v-yr)2`<I(*#3xL>ve|goT*1$iF1$aNKI6FW
zSDE))#m=r-x?JKrujlDGr<Z)*w$bR|Jx-}bhc-{r()73?q|25r_3przkToJ%$D|}q
zr||0gMEWnyIlS=wv0H0h6jLqoj_KUYTXS{lf_K*QOp1~&PW&j$dG>T(^0Mq3%jTup
zR9f#o^ufYZ`g30*|MFg;7ia$D)aCg#ytcMEG~vvjLsJwF<)6A47Ug1|^|UoG=$T&B
z)=%qNwiVe-t~8%sKC{X>enF{y@sWz_*DF*{`zUKRJ>`pUd%ZUG?~KT`a%vVQYr@KV
zUq01PUDI23b@4>6hErbxkG4r3_TgN&UdzwRTX*VGuG>1xJuXam8p7TF_K8jJ(Y1T#
znz*d0be|@9LH|w0jqXC;?T>Ax+6)&ZhM1Hm&*_|7emADxFTj~a|M7CEy0&!HH#_;(
z=KcF$9=@Z-@0cX-`Nyo1`dJp;O>L@kE*tc6O6q4?^m9LXUgYO)<Io#oYIbDFET^m9
zhYq@4+@teYly~}vVqVP;FIHK&&u?$)iO=|<TDSD!^@`AAsl59S=BNJgJ1qJq?y$Uz
zf&2manZJ7;bj>}#)6MJHe3LzG*HzbO9=45CTi0@;NbHt)>GHi^8{=J^O;^Srn{WDu
zHUGqq)ouG9`tsHve=YS-QRv;lJN0W=c7@HFdLT#3Ph3mrX3;NS{*tMSr8d4hxNqZS
z$rmNJ7SBC5;rs1S(<i*E0}6`1X}r{VtjnAKSW9$0vq9p@%~Gp%@^{HuZ_{6X?sN3i
zU8^#>c#^CxwM9R=$jKSKxKt~7Z}yeI?f1fzK5&Ib|Fl~FW6g@bhwn_EeW{ryUYZ<w
zaP`gSH!rOEdAwTn(AC^`n&)Ou&#zp1f>r#TcWTaY?X~UGc&B(RX=%vbxGh98EGx$Q
z%dIHs&TH!<r(W>-wdbVIqDajsA+t3Ffx#DUy;{FvhM{@zvA>mKpSbjHzj_e#U;5e!
zme5dl&eyAqLZZ#CUlH6ab~{d%--W-w<Y<;gal*bGTdZboXJtD5`IcAoluLhJ^$4$y
zIrr+--uV}=aW3}q`EO~sILy|KE8*Ys=ks_3f+y?cuMCr$d;RSWk2Nz*vOH|xycLr#
zb=$m6?ha4-#dABJ%s#W_NRm<m+rB&v|1{m>EA~dlP3TEFw@JK2F0p*BvW#4Oq{OSc
z#&=G9e_Xcj?!8V1vjlTCOSwn8FSJKn?q8d=k3I2Y?L!M4JHt5pS6><5-R!Q-Ic`zB
z^ZNO^+V4V3-p$+mq4fJaSLgEkkv}xK=6qap_h1=wQ;vDz_q3J`ti2h>rQEjg$?lk}
zeb?2x<DuUBwY%<p)~{3+Y&<XBDIsO??D#s7gA&h^uHP#7AoJvO#vTS$MP0=bzO19w
z%H@o6Yl`xhUSIJ%Q2W(X8?PPg!gq9Y{ED~VxPK-3z$&IK&3A40dPnTN6vKb^YtfzO
zGqd$qP2s=L;`!!#jLE~jGZv-INw!yIx0n}KruMg6GxqG%)t6dKBwlDPC^V>G^zG!%
z@Vk(GTIi7Q#017{HdnXne5VBKcbF=PUl98!z9G-x!lt!97a6T+DO)+w!9_EnHYxqY
zcNMksia`h3)Gz$9F>A}$NVv?J<JYoXXW`$V*|(&sBJMBUwlcf$vD1r>!5MGs1owUL
zWxZ2#L0D?{m3<wTE=XxD`23}Q-nX#rVjFaYcJQbQX(-$(m%LlcS~B;TZ*$b&(#SRL
zTfEaNt}yt0YGwEr`{h@XlDvZSk$N9HBjbkhMA5P&<+6#w@726^?=(7bQDA3*^u+sz
zTkiF-R4$w>{Cy%X-_9QjH-B&h|I|pd@JzFD&9iYfnXk&AAMn^Fb$w7->!#}()y<o>
zbG9+)y7#aCAdyxu;ZM(`^WQJ^`{=)4KHr(8V$N=U<1T@x?H}w;g))XoK3knSWBs&*
zZx40tockv5$yRX6r>v=)_ty7}(#d_ZSZW?jiO}&_u;KblkN*`rW{2!kzjL@u>CWl0
z3-a%TV-@b`?@Fj}C@K0eE$wM)j=xF3YQ=dUz8?{8^iY_`;nwuAMP}k-&ZXw_GWfo2
zRLp(!{6pXKQ!C$pkU4kZ<6a*}CS99*RdS`={5z)}fAd(;rE6JN<5WJG2d`v;5Bl4d
zUcPrUCz0vd-fqox`EAPfYkMAO{Ab!?>axOZ#yvZ+d26P}$&2l=@Nv@h>h8bro?({g
zuUn1>uKq`BR~(SEI~Ksez~BcxxE|8F)P=S#b-~9o79^Hr=oP@vat}Ut)?f3iXQ+p#
z@98tAbxxi+|2XK|+4I2#p1xrQMj-}9n~Y308yN-`l<@`{7#Wy6)zUb1-pf}{bDOWe
zm(NMhtG?$>KGD)Tt9|l}@7AYGNT&&KZJO$?a&ubhIR*v>8Ab*M2?hp+q|(fs6y2iK
z<kZZ95+qlc_V3L*Y{1h}zRye0aZ0^$#>ub+CoV0@cJ$t|_`oC{PqSY?w$5(3UHjzY
ziNw;)d)L)HSmMmXbC_}8oaZqo^rJj;0yIN9de_{45|^;(`?@{RYj>4xIJW7nmZh0!
zV7@^6F6;Tfr)4ZzmL6idwV-<^%cnKE`5Mby%F+Zcv*^{cMSap0v%NG&^4*q;=F8tC
zUC^*ntY6YJV}1M2!>wP-#n;!VZV>u(W!2>)jgyTxEU8Ej{-Vq{sj`kO``MRz2g5D=
z;w^7CK5?44RY)WL!Apw^NQY^H!T@y10_gB85Y3O;*h|jIM7ewbq<q4OFVk$87#M<C
z85j&Pl{+SZR){B;IOpf)Cg!F1Wagzh=jWBBBG!se4T;Scaul&;=I++s){(pR{ssQ0
zi%V|X&2;kMxR~LzO7PN=<EfI@a}%cXI4`b0bbpb)$;Bh`5BiPnMNSBqw&hFW^Lv)n
z?{^kIyLZ>VzWyIuL5DIk=bINdOa%`1G0L8w`rWD0Qnpf-Ro1nw;^^0y$kdK!pO*7P
z-F;uscD3#G$H|H7&$^z@D(CbxuetqdUeuw`-<&H>o`2V{@XCkQWUE83N_J+J$8NqR
z=aG1G)4T6g)2;`ml-{>~?o<?*c4NnxuUr4mfB!<wWop>!7*(^oa>_i%cOIKrnm04{
zbn%((B8wtU8{gRdO>%pxrF@%5x2nvgP1`z})_Gpa*1Kw}bE5aVuHl2G#O$~OXAa(H
zHx$|XV#-v>HJYujEgQFKWTqIz)o!ee-?h*D!p7qTt6y5pQ*Ccr_a-6JY|$z95_zq)
zk2iE>N$hg+S;6ggS)Ob5{<2W{&Ea2j<F9%iI*{9Tru6z$7tY_FGb}tF$?-{cna>bt
zyRt{E?X9Xy>*+HtS6htV6#DzkcH&(q_~(h0qjOpD_45tCE+rj2W3VU2V$09UNeY{d
zon6cSsfB3CY`yX&JMx#dii6XO`+|zzXBV1!?v{#lGYi|n@Y~T(q&%aSJwZx)i`sF<
z2-mJRcfCDk=`23CHgMje-)>>byMJ;|>=*BtC*q}?(Hd0Pa^oJy$M=`Hv{M`_IoDjS
zQ(h4w@!U^KGk@;A4F`7n`?o9#u4rhVvijUaP75>j=Mu;5XC1R&_Cx(ic8kr?%??#<
z^EOUD8kVpf`H(G8QZ$NB?O(>g!0>_rV~s8-<sv1)fW)HW)FRlhY-^7r=rRn~{~24O
zxXYgI>d1CJA0WcwdQkXC*$UlECeN=0dNvL1LcuY+3hX}H%ctFY{kMYYgg`rovrEg1
zoV;xha@Wq-aKiVtSM0m&v+9|$w@a1ZPcpdw!bvjx^{WGSuTD`+YgxK+>WVcwqTNnc
zA3d*}D(KKr9<}-G)RI0~#g`MEJYzc_FMYU3dC7kd&sUbrHR&%{?=$t(ut`+U|JD14
z_p9#A8nlus$ca;S1rr0qcQytF8_aBn=~v&(;^NG_bpHZS$K0_fJ-8&XBo)$In>#hu
z|8j!J@&D(|cCCG#vOr;<k@%Fo3dtP}4QfsLhJGdo6Pm*L*UU7%8+=LZzW$z%$E5!%
z#Ca@I{&<N0LA%ks<+{bv-p8h?y|wf||8ws3E#PzT7PN?p8a@d<64)`>MD^-wEuk6H
z(p@#rx)@3yHDx<`tCZK<#qhYnmm8Dk7u>%1mUsGYtLASG9Czc4cRqf>cR1$erqxGR
zoz-3zcgL{z(X7zbg@KyVPT&8k^*Kz_Jk_Ph^Ihz)(>8gByvd52(n2-nJ7(_K7k>NN
z+vPR4mrQ=RY3t#$zIoC?{CvvqxSKD@R;F0=DXy~nv@Mo*Rn(h#uI~O?s-chbx0hbZ
zxnA!cxasNiKcf1xVnVv5*ZJn1-l^MWuzj`Ber0{hNz*bMb4ndb8opis+oF0+w@^as
zqS5`Ueml$gFH0$;cB)O@y~t^6dhzVQm~@82pZB%hySw~}uG@>VJ$t4~sP#QQv>-2{
z`deS!`3c=dRY%gIzBwOO2>N8U-tj0WFSo?y4IR=_=N|59Q|)ngT=Vnaw~oYB)5VO>
ze7on+=JUPGz(+3GqHb|=Tk#I(RpuXJgQq8^ZL-zxd9^iZ{W9+9ig(XSX8vk8su-Rb
z;n%Ffs%WWbC8_k*E>p2e?b92Eg*6Y=5B*-!c0wjaPVIcTM8f-poA@m}(--fv^qa`y
zKPkfPdS(yng{2Iuo^Y4C>@%(4dJ<Hh^GJiYugTcrbzz1#(>doZe@TUz{~HTrnOj#{
zO?jWY!(UGCt+ZLeNd|S*sY?V-iW|r^<g;-eQ=L)Nv(4Gw{Ha8~!{c6=%R59Ch}qaW
z1ooZ(x7~m0yEoiPZ&tTou5p&1&g7e@)Nn`f^oIKzm^b|0mJ(+kdgIQ9({YcNmatBa
zbB#Q2>~6{|>5^fgnj!d8SUZvBlX%!6*GtA<T(rI+t;h%E_C0HSjX;M!H?uM@*b<Wi
zAW1*Cq$o2l-8nzEAh9SBzD$2^$a#NZN0EPNyHXQ#7k*ltu(c^_VL*zef`?bfl1|Qz
zg2FtV_m;g~#uHg~^4+u_>kpiq&0hVHsZ;IP=K3ig&z7xq%FePVxi|a!zRLHv^Y48*
zeqR4RgUH7-2Lzt+N@QK!xFGprL!0W>8CKImXXOMQ@9GO}Yt(&tB4M`TwE6Z47f$8f
zO_EsYalGoP&AO<2(t%qwjiL&(57`OK3;C_Fg0=lpy2tyuESE~ZG5cHfZmO<PvJu=N
z=bUuCP-lG(_x<f0+anA2w%z>Ta(COAS-QT5<hYlwo3&SLUVi1ydsDr5s&wxJUfx;r
z)MF0UWih|QIm+I5C;p7JdfaxktE*Og!Q?na!9UK|xjByC`+ILqO!k&=+f7-HE3Fqv
zef)kpyxPTbs^Ze?5(3j+|NeP&(yo>}{~09PzNH=(bPVp?^i_Y=S%+KC<W;xtYQ3>6
z=-9s7U$U-ougo*Odo|Bf)=Qx=g~@;8y5#1EmGOP452JVY<y-FKd9w1Vt=#2WfmM$e
zv#3o-^N4+Zc0=Le%w0iibj1!==x>#f`?jBRmc;EMse_3>+A^fle$72-Wb4%I_DIY&
zk~fCadw1fknJJl30`WUo{`>G9=UB_Ebt2ef;`|Ke0_n#A#jc-R+U*tH3KNzWG|u?O
zxa*r>R8IBB_jh+qvu!q9I9K^x^v&X@^Ay)@-RD$&T2D}4plZR@%$oWCoDaAgo;g0@
z>Ejtsg-?9;6!|F<R;i}4<yxMUV9-64Wloc<cR0(b7p*?=K=0ydrS|G4nJZ<L=Jj^$
zU*gk~Q?&4d>@TIidImRM{rqa2o3K1`l7?UV*-2AohZ&ff20rxNWHV{<*J;Lg*-E&M
ztxV|qWN5?paq|rBT)E^FV-a~L#>G!;6}qn|+)ZHgpYW3vF#rcj>o1gfKe;n9FnBXz
zZDb*p_KrpArManjC9W08so<g>Qp8W4c-D*AP^8uWm+|7Q+oE!{Umbe4C6xX8)TWLm
zgB=@|{w|SPHgTqlk@2hSomx9SFxF@odU}iMHbvffGVjjsyTYITef-7tfak0b*A1h@
z+pc$An-5q%Z(45K^lM?gzx+z^K*6fNA%%iv{cS9*-j-=Bq4T-RW*=2v##SPdx%AoT
z1~=zbJ5D}NQk?st<-w8ZT3?)8j|+Zo>ZqP>)VESX`a~<&<{6z|URfsaYffL}{ot66
zXGX@#{V@VtrdRx4#-rwCH}mJU0>cH7i#)$B6=hkv?@Y*nbvr-a{&o7sO2#RHEy+%;
z3qJRU3%;p4yZ^MyspM@tT(~R$a;H6YIKv_vJNIkJyvw^Uzw>d7d-J!ePj$6f=~KS2
zuPY5#U-~s`sf9*ztTE4Oqx8EkBrEssetYht$j<p2Z0~N#Ph{V`iS^9f!)I1@@n#oZ
zF1v1rG(ZlD6?^fF|Hs)G7}|x2EP=f9^2_re#So+gf>a6LjmWMLc`91BKV@>H8gpAK
zLu1(TSrbx<`5C(}Y*Adaq|HIvr)SHp#Bv`tbK~1HZ?e2O>J|O=Zrla4u<DH*x2hVw
zgq-$ViQBN~>$OGx|G!oKvZ?)bfBstA^t7_gv+r)`wEz8l_UF0p@9Zo7eeT`S|MTh@
z9jxz7-N;(LZh@-eV&P;VA4$cC9?wsoD&unEWa46$`Fl$6Cp#Syl)V061rKYv;g?o}
zeG!fN4^R7)Z1;Jz|IqDfm736FRW-If)u}m$YmN3b=6^c5J*7tae&V6`Z69rq-HC5}
z|CoKxpMXRCKYm!q+bcYlkel)$S+aD)p&y*}>XnrmJ@Y?(Pn7I`knZ#QGuI!c@7_OD
zS67A{TCXxkAYjds?1*O#+0xHCS|+#LoZh-HcYWroGJmIHnY)4+Z#Tc0cug?6Tm8$m
zh(j4~Hy%4?Vtg-s#;>}wWhJ}SX4khn>|F2o;MM63zieD$1!ud9C43AyvRR^Y>6t>)
z@aY9zJ8Fg5Bor?M-MOYyWmX#Fz2wKVskQA}3)h-HmFm1^cewCn&d~<Lq^!JCY6nfZ
z`^ta)Zb|Fg!uI`LQsKK553g*A{r7AitIyuc2E5z7A2`((eoA#)xZ`=ZbyelL90?JQ
zuMvfxu8I0wnlEL&<Tm>jXU;<|zt+r{CUGUF`8%W0k$XZB<&!ggXY32Lym%+>PQ#%!
zPoi%c?S4D)%8a~SOuJuwK74h<oe<7!$;yc}g-ok&e4q99+iUljAl(aL`*UPhyOosp
zKRXj07j?g2b-CmtAGMNiUNfg}Vp4VMJ#cHyo;RhX(Z#yw)^MGcz52G+QBuoOf6L3<
zxJ^%8?M?`A_H#Qtxc{I3W#pV)cfB7@QqJA8<@2S3r^Vu|DsP^uP2fz@NMH^-)%Vg|
zV$QSIpE9B>GPu4i2tUZXW8y+}mTPC)H{IU0b@83_A1|(Rl`V8X7?u-WAX}z!qqAT0
zh12#XM;=ZSy}V$n$z}4LFCBkuXE}bB(<=OL*xGAXtxB`*T#~)ICFbhYcUMBshFq4s
z@AS9O?(uh7t2M_@Y`Xuz)9k$_tN&xk#<_Qwt?F03yI9h!)T#K&lH6YDLfdus5^rYZ
z2wv3{^3%3xzOS`|)m*zm%$e(4lE<Mb2evKJyPs{rmpXO6Wy#Y8^&z51HlMw$vi1Hi
zg*L4j2Q4`_bDc}}J6K%dcIY!#+<|Vcy5k35+?wsRgY9aT+pfiaFShMV`KkZ#yU@P{
z5BlFU+dp*bd;j-S;0L4Kf6rIA9(3KKwV_V&=(;HrJ6@Z8y}`VA*Qy7me_E$sXEj@L
zRkBa&`s8hL%JYw0O;f63Uhh#6d@x$)Pwt=4L-$#(Bu!oQK<J*}j&IE0vKsqTxAn;^
zyUJQ{_Vg=9Cg0PU3SX9q>(@5!dbHOl!{o}XOx@)<5|>w8&e#9oCi^BjqV!9SXhqd3
zuD$s>HVbDfzgw|<_4K&ucO=^`MA__XuwL61H<!cK?QUm_>BP$Z7aOPXq??QXOcg%7
zfBAufsus%sKgO+M*4`4~Q$A<bw{vD$H*LMIO_gNeswjH6^i!Oy6!*!@m&?CwW9rlY
zx8-RsYq#xzkbQlH;xF&)F<~?6OEQz_*)s80+n<GH$!aTJ+D#R`vq7*&;7i6+gY;gp
zi8VT@M^`-iaL1W#_PzAnTIaf;!V`}^{(QKsD`%%&hGWX#32C$AVxJkMCr$4B9@h8$
zn<|fLqr~B?yZ%OZ6lcFb-n@O9&nva1b;Sn{7EDarmCX2ek*KA=eA(KP&5^+eM2cc2
zm2=8nIw5I(D@@BWrQqngb07aVoVyo2vspnbT7L5}<BjY0&T+rA(wt*z-K4Th{`ZPp
zDwZf{913pzzVzUs-<NV8)@Ht2@bi6DllO+*n<ll3_HS-U7q|MU@>b48GqGV=#$yq$
zMM-Q+FIu*=r3aX`e#*EkvLxfP$f1j`E{KG=tmf37|54?h%j>BVGLG0VM%r{~eUvck
zn!Lknj?>x-nchd7Uj%*CPes^$OOk)!m{Z=#zVYc&xg*=!Z93NKE5H8sI9O6zwSS+z
zRc}}OhD^Rw1u3a7D%VKq_0DTGDc}n~v*6RwW0`CG<lR5@bL!>KIXX*-XX1jb%Z<0+
zI3XwC?<nLy`RnqHk#iiUSBUL>w5Ghdbl<l#yuUtX@=VBOVlOC8cq|a<|JW?c)4M_{
zbn=R{+RYK_2AxxjBka$dyIWut{#9$=zJyhu1CAEYvpOm+vO?HKmCeh_X6b^AA71m$
zoX||)Q=!>yKj-0o)!Zc&4Kkj7VySOVcwR}3d{Mo2*MjR|AO6Xh{faXG`ZGs<(Zy0J
z`EL#9Tm6>g*QHu9g^Ttb(77vIJv;5AU|6*4fvZhv>$87J&t4u_{OWK0&!jB<1<ea4
z72mLFxAHpv%j2-gBAr~R-id1RX--Yc9QWw{+gB=<8_8w1M0Csd&%Pb&9$)$WUnO83
z|DjS5whrzH<+x>Q_Ihlr7v0`{-9Rz!c;QDjwWG5uBKEXSev}&{H2uT83ikhk?}K#y
zy|diYeW!j-QQ3@5i$uOB2<IJjob*?Y^`UWG+y7>*qsJ$HZaz?QQQ}AB9kxKuRqnbR
z8+|va@Fi{8xN@geNURxiVVdI%`Hx%PeSB#Yx$=R))$Jb^L<A&qJ#uW-ywJ=QT+kk#
z$kUr}<u!ZT@`na?uT&!~w>^|t*V@y6`Ma><Kj}9TskNMW%k{MmNBG}g-}pb%y6w2r
zC*;#fK`jKCY_W}JnHU&uvoSE36W>Qc@10By&Go+=AX1li{bHb%M1JrGRu>-W{L}^o
z9>F<DV&Vof6+b3Rp4E97b*b#g{D=GxyW<adB>i~PuQczrS@8z1htAdKZJ%47-}!yd
zmil#df7t>K^!1oLOZoCCod21s&Iy5y8p)ieh2p#-CQf|xTO`gi;_-)4PFGH8p*Z7`
zT#0E9bB$Dre%7*WOp$-ZQ!9G-Qu*v&=DR=F$j(?Ix#EWD**n)|n%_GweNk%~({|?G
z#`GIuC-Y>o`<~t8YYy6`b60cbtk<tvB39k-i(PAdYvIb(rk4XaZ(msBEca~kvmLvP
zD<>~Lp>Xw&EbFfcyIU1L=p0l2l=^5(?al)Rg6}Uh>0gSKX8SI+LMBsvR#sM6?Aofn
z#H-zny$ZoQ%^u%7I5F;X*IcPOu7|e|m9DyQOqZ22gzrbm#z<|E%ByPcnNnV?lPdN8
zJj-VB)c>+uUrH|fwVp@%W8^KBtesirb7$>K{;=&tS<C$ASGbH`gnwX*7TP8m_Wp*D
z<FwnewWV_({0SDir={^ww)E*)C&QehZ919j;<m-yE#f_IanU-t|EPjU&jZ`T6*1Q=
z@}?{-c)xQ;*~SG$JkEhj7mM}hb8E%Mes5ARS@CygO|*1K>AgwY@2DmuO62wD&nj8g
zpy$}^n0Y>Q(VL6B;b+`?k8Zul^?Y6MWc}pEeffJ$Y-+!hiYR`Vw)if?+mNWtbra^;
zwSQ=4VQl{L?vYnntKy`o3=N4kEb|?-0$==@dt%a-b!X-%7BYV1ws!xdzDrN>?YXiQ
z*N$>8`LVA$&&8&>=2q9WZWeJvjSF*G4)*VoF=!9m`GuqZ@8mTPI6d!tww#yLQ?vX<
z(qom*nYmVXH-9LRKk2l`!~cBDqV#F=j5U@|442_=SeuaPBVhS?spq5?o*xpeN3yqY
zC7z2YIk$;%f$I#FLozpoPyGIodFID!9k~luj+H*anwzHF6yKsWS*h2{epBEt$CVLB
zU&N#KY@Q@tRe#9Dz>vqzz+g{I4hhZ6#?y-48+tlO*ippR_M}RpkpqXo$JP#wlL8zM
z7b<lqEnONa64kmX;kruj;}<DyvwWt+e~6Fq5bf6zuWR`K;Mny;z0&5pHteyT$tCEd
zTKMqUy?3|2&)HY|?(CDl@3+5acv2Yc$n%)X%HX2o>Ny6J4?Q`0a7t6=8J9xSu56y7
zn~&$o9hGtZBUpZV*KRc?o>$DbuiZ`Uo_@M&W@6IO!rnO#KP_7NoNL<5=rS+WJn?X!
z==G1>UOo8CqxepW=j7D%Kdk2?@0b;3&&$5WCVP~-J)-U5;xl&s8~f%fOk}-mw%3)-
zwsy`&OULS?>DMF=o%Wm9^;%4G_0&tt-q}5yJxwp}iO7zv=VF83G$-7s&YI-G?EmKZ
zw$GR9c08Tg_jmdn-6;h^Gv?h-eQ~cgF0QE8%X7PJkonx33pOMNx4mBb>h`@!6RM5o
zW^VWIN|wKLFLOcO?H<Fp$I3;g?rkbDOfr`^@Zpuc$%e|Zd-<ET*lPO3&CpC-Cg~Q%
zc=`0TIbG2g-p^e0cV)7r@S|R>s*CDp_H^Z`gnUt0>Et-QE0M=-<AO(OVvBbiHTZu1
z@<D^EBDWc?TvnKt>z?xKp_zDBqRHJX<F;A6jLz!%V(;^+_2Qq0wSN2Cdu%J?)s5PQ
zF7k)?enqrR5aC@I8nora_baNO1U(kjnr!^LMf;+j&%XF%_6b`y+O}+YBX{W554k6@
zRud*D&2I8B{wHYko29kZ&HQ(h-8a$YFML)DB`~Bj)T`>H{PT*@{HY!2)3GSMP;ZBo
zcU+3+!|PkCm6Q(c`?)9UtI2~@i~fTQGr1>h+SY$!w(^7br3pLP_BosiIavH;>Er|U
zH3h3?1yoP$>HgO}L*jiy+QgzGqMW(YuiSFYkPj^Sq_|iq;)ef`o9lN?5(*7_<)N>~
z9wWU(;br#>vuU#S47V+VuCmM92cKOTDtdUT!*dBY=DEopzpc7MkG=0{F`Td4?f2=h
zNN2KGR>Ys=bB|0yD?4q3l5)}tZP)xk>wzj&os62q%D^y_7vow9v=MJeuM;vDFgGl^
z_;Q%QzbO)@H@XV(9C;|G(2y*AM~rF8k^qkF4jv6lJe?P~ZVYt~nHUrK?9%VOf4TR3
zS^IP;|CZoGOU{)1I^vxEOZ}JgtvdIxl$)Epmrd?GH}C4ayZPn&Z9mD^|NkZ3F#Th$
zjJ1JkvQ?j%Picbny%z^~(ptYPt2m=t!2Fr>&Jo>@72d~gbKYswN_KR!>pS;qWzz%`
z#l6Z)7FFmRTX4L<UE_Z5L4j4Oh1W#*z1=*IRSTu8e)3{k<g^Q>b?$ptKhx^Vd$P<V
zJ;LeulsJ>n<w@B>->&VKU75#wRp;3<nP~Iw!|Ihzfp3#iJ&otSWLK5DC9pxt^h(i8
zwb@do%94dfffr6yMwcJ-@Myam9C^WO+qCf3lWVeOU-9aAowje2!`vGl<{Y~+qNiS3
z^G?Uh;tt2HP2D>etrg?Zoo@c+&Wf#Dmdt7`yn99M?uk61i-LEr%$+P0^pc^+(0%h&
zR%dCs850F0w{p$Wko5NpnI>jZ9koaJz}h`oXI5;}`!>%gt1@%z%fEGuF#+p>k8hGR
zeWdg1&7pbX$!pdx?Dux^uPxYifcb(>znJWqbvMJVD8^lxb25}E*Mq-BW6S!$>q<FE
zfx7d*?NCnCkX1T&AnH|-O^M%xY+v`^m$Qw6nJi`&seiVYR(ZGVYR&7F(p$F{9Zg7!
zcFI2=QnGjfCtJy-^0}KAzud6ibfJ&XwamjC-mz}nww&EUF8XhHIERP6WYhtbeO8H4
zj)&6Bw|_XIc_n_u*&6{n4rkBWl(MT)r^(;_^8drry^m<FPI|ue^R5Ne{wH2d_^|4M
zLe8ZvVh<NZ#ifX_T5t4ri)?J(%k{0p-7Q0l>)oSyJ-@l+5_}H+{^8(Mp6s7ev*Lkr
z%^ALHG5xPow^?6Zberp2;{GQ84=bCPoqqAm?|v)(#V|hG>3)K|{?=%&OA-kWpJ$iL
zmc}QA91Q;vb<kX??0&aaXnRf3!@g<dF+pe6JSvMn6Jb>?KXcyhsy<t;cvhjLUFDe@
zFO=@(k4ZbZVcLADPt{Jl&#w@?9Q&$$DOYFv_3MER5?7lRBwOWZbI68ztDaqQFyV4$
zrfpuJvvGIR!8HeGw7h+J@&Bei%dk*YB{41;MZV$*YZp&T6qvkV3HRx1)7I>kD*OC-
zck}&U8k3q*6Bh5tUcAp&;OzbHpXbjhc*=b5)Vg9{F1=r6k#)yZg(@d}Qp~Dzw^oYL
zPo3v<%EtNjo!~9@jw+vxuh}|oSvGOAc8BMVq5@V~A-N>!)*VMacXpo9@8DVIka_sc
z;`)ETI`7?@__&xqspj31|026Cbjv;4G&`cL^Qp~J!>Bzj-8{N&6Cb>v`PXLVv)SPV
zGk$b%xf`(Txa23J`YCye=(^Ceh4$;iHRmpvo)u>KuH~#1<J*nd6N<Yvru@FMAXNSJ
z9)?hL<tuyJCeNFk9mC3DIazdS#hd4%<yV<)<`l+$4LMu0X4CbVQeg{Ue&0B+=<uF%
z%Q=dBq!KSryxDOiYr!^$1<p3}xD(!eVDhxMd*$S>G(W!GKW154HV2+RIX~{uE#s0+
zFJm9CiThrx5xG$>`LNFQC&f!PpT6-!_R-CE`%-geomDgME<dF4lIx`0d9$wqdwaL-
zU3b!G+ZBd+3aSUgj@>w-_pa(@d#uidy0WARl5&aj)(bw1{1<!7GDf}fS;55pPWr5;
z)>m>KE$A+L9>X-}III5F;Dd2x?W>Fr#`$e~v2=#k!#7Glqe{yTX_uK7_Z0c<YnQ2P
z@%Y`(>oS|`%I@3izb}1OcVoMJ)Q_3Dubvluells{lV>yUoE1AM`(cvzZy%{o4ku@x
z5MJVRF7NV8sRPQtD~{F5u`buYX;B+B`6cr@6WQ3EA2-}9Dp9{vopPyoQKhISr}~~9
z+dgFpC8e?6*0{EAh2OdZLi+NPuWn=t_4;`u=-$z2>GXvse?6|5ddW0=*FU67jzPVK
zIXN-smoqXjEW=!Qi&m;bMiEsYg*$|UG^98+C>FHv)^>B`<}zR3Ewdh#_h`Cl=843s
zC}lW^G;vL|`C;bS_Vz?tSem-(|Hb>4__x~qVE(nZtK5uHkUO(X?%dqn&*z=CE&uoD
z?_Zt=0`WB|8(tnhx~zRtdv4pCl_8hr7^bYM&Sl#!_?^XhpXSsZl3%Qy?tk`knRztr
z&eOoz*_rPjFUj6_vL*51x05F#?iSAFxNCIh!umT~gPhEj=BYGk96uY;?zo~-U&zsX
zlB4pw?>k@Jv|GIEl=Vcb!!>I(t~_(&IlGh3c8cV)dtX+DepmC}FZwmvHL7`EK-<Gb
zw`!i;uM{X0Wz$`}l<Re{_PO>$F(03VPCZlOAszTMSf%H~hQkbPcVDHvJ-q8~i}4!6
zkbmvIy-^<>%8xGH;;p&wvCIw5;u((DoLe7e{0cP?iHqI!)}~93_h+fpbcs`w-4iu@
zm3FFD9nyO<y)`*qDcp7!YfFk@+PC_HKRp|pZ%#?6{olwh^4yK(#}AGjTg-Hp?C9{<
z)JwEHy6CL9PI0PEozsh+qc`^-?N&bIVbdG7aoV)C%m46Z@t;b4k&}4t>qK=OfjTvp
z#BXRr{m~V9Gp4XJFdP@eTBU;{IR$CJA5x`H4Zm409WMIM?e>XFE*nFS_uW+sy=*Zp
z)v`_HLdDAN$7;SZw>1kr3!=D$vz7cOq))kcqBGjdd*OxFrAt|7O{+ETZd_u*sb)XT
z{Kv<0()oKngbVDmHkR_fw!={0`1w7{@7B+2(sy36|M&a2J@XHPd;SZA+jxu{1$xAu
z1Wqf|Y+SK>!;!^Ghy4~Dy}MFvW6NpJn1h=tY?L2IO3wFQAnL|+`(upI{Nt&T=hZ(l
z?GrfOZM1L3gN^#C7y72B)L1s=Nfg$&?uqYvEc&r7G-HpNU!7^^`iGk$>S7scoR7UO
z+%wUEf8Wd_vn#w`)Og$M)0N9V`J=DpZQ9LizKe@eDz9xTS$tyd%I;T_tRl94z4Ty9
zl-$G%Mythnuda)l)iSv^-u-T>r)gKS-hmTSGDX?Dr$uRGHh)&F{ZM#yRYogkabch-
zSMaWdbBlEPdIT*)w{7W4yYVh;>BS#w(t@8JJT{l1nQy{U-C5pI2id(c!*jf4w|8B2
zeN?=7D_^?TdX5)+rMH<K?CX8ime=q3;><P8IU7v9ZR0Pw{W|B-;WLBPx8nSVv{l{D
zO<!|<^mVE_H7DFVJ;Yl)JAd=hr*qU=oF@BnD%P%V`DXQACV5-t$Js$;KT4RVPPnUe
zx99Meb8l`|sCif?>A#rDa96G7)v2`JZ{HN!R7y-PZHQSHSZLu;|K;7g&MiGGsmb2y
zuE!p2i`4${Ehj!eXo`2}<!i@!4=q{Ay{LJze2G!m#|f$7D>&Y*F58*q{UXb%V9BcK
zT(dmZt_kmZ8P*zg$#UY&1s|BSOG-}fYW%lNXT{Vg+qWmovcFuMeU0_%>YbPKmQPfW
zlQy50t-gCxDXVqjG_PBG4sW;EyEjE_`4-i+Prj$SZ`jll^Pt_ANuFbyq|C3Kye?N4
z-L7KaDr)-sMcd9v>q7nirrf-~Wv3^@?JG_P9c>m*TXjqA%jTHH-tWGPy}Hg(Z{_g)
z-P8Z|Evt7pZ%tdjJo6gMg3H&Btcl$EY-{qaj1PTkKmH!dWNrUw#u=;raz|9;gVsd}
z48LdF6@1X%lh3bV(ZVeBF7m%sUgWDy3K0k0KLkpKi|%PIPpt_*`29mItNj6f)s%P7
z%#F91hKc;)u%Gr(*zAbgyVO4+2hV>f=49VnXffyI$|uIX;uCjYuZZs4%XT36x&EdJ
zqGujVX%hX@6n<VUxVhZbZt;WZKSbNz);DSTXwP8y`PVco^^e;@b;IQkw*QcIue{rn
zuw3M7(Am|C!=IV<x6fVYKl9!^ul!>MZ+#qhA9%t2+x3{<i5<%xScQr1X<sh#M{f1m
zCuWA~igQFuC+5uyoO5_r`PPjgQnQx5JLPTuz9^#T*O>{?>~q)38qRTjY*5~*;?gwt
z_4#?**qq;;@5$wr=FJXM?X|f4yI`B-o=Wu#%{N#tM}>46id@*{UuSUQ*V#qWPR|OA
zkgw@&&iTAr)h=IZ+p6vt<t#@OICA)BE#cXt@#2PfiH63RkjFP3SNiEZtXOCerxjS>
zEzkcUJWIXa#eIU9EI(&+OzQ{5bvIx9`=xNb-E0krp{VlHu1E3I;RcfigOqoBChca`
zESFxmGn(VRLrc<>q&I9&N)7!swkADdoAft3K*i_z^$os<Lgy@*FzG{h&itLdVXwCE
zy0hge?QN@8;c?%+XkMI~M_<zK0-f)6O0gT)oscSPDJ*+>ZhzRjJ>_%HwHO|H&ZtpS
zX8c6<&)WNrD<|8ZEjM~nZP@wSpgVTMMrr*=b$eQP;?z7B+Z4`Wyy@{Uu}86O`NeMc
z4~GAa&rp42(I=2U>4$K|lrp(P$LFqBbY$#%B=NwbU(ods=P9oH@6Ijpf8^;kN2X?y
zQ%zG?i(maN<!|aG=UR(<y!W-v-CQiFs3vQub!3OY|Auwhd_5eOY;QfbcsJ|S_d6V&
zh0oGB|39=o_WkAU7i!zT`?(*B`PZ)8WZQnz+3C}a-gi~(oi)Gq&Aw4;Hvit+J-Zf_
zuKsZ2)ZG2g^B?`ow!8a#?x`2fGL|YYT&IanPhTsWa^bK_?VQrGIU;^V>9b7LY{Xd7
zH?sE!cWt`lCS#<0D0u#v%S$3ANIpM#*~h8l#qp0UncUX}b+(-Q!7_j85yv-zHBHt^
zj=4Mk%|0<l^o`LkcBhE1nLZX=hvvG=9Jji|yiYjy%y)^0Dsz4_+^8^kqmt-<kM*0U
z&C{o^eD|?3*RtFWzOHz8D_`dPlN*iY_H<>Wls-MT|M2QME-Q~kE0*U4ZERP0vvmF5
zFA+1&32vFq$Jx9n)wu5Nx1aC6JPcm*{nsv0>-pa@|MIBDmBsxLUDVRrm*H%++uY@O
z)bh{uhtJ)9G1L2fx5d>v`dr_{s{T)kUw><haAKqHvcP=~>f$o$#(D{Jo%QTBx2?(8
zEAaI5jPEm~N@g%GTI6ZA&1K~})kSg(q+J{Rd)@A8$bH?kf<wS<V(QnJ`x6crdH)WI
zWfHyb$m)9B(SYHuv&kNocX<y|-KNCeo0(B-Vz}X5{wdBVpE+8~FHM@StMsF+%=)rN
z(Gt$%&qbnSy^h@M{;?+gH}Cmx0rk0MzrIdhyIOvi(T28|j@z1n+he}8A7T0Xyy``s
zq3vPCpE`Fd`?eqWJm<Imk9Th$Y1H1(T*gwpKg`E@-)ZN*#Y)bq?8P7JQ3txZ9Zhp;
z7#SG2nHd<YFbBE<it@8klS=~N3tf?_c(etRb0>NGA94_BJ<rPGQez)*c+r~!EK<Uy
zkCg*^v=^yJe0R2<W|-T~!_4{LD_l+Q592?L$0Dl?Z@4_xdG~UT<^3;Z%ktmruV?tS
zG1@^)x-#e4#;NX_?QdnuIBp6Ha@D$dglCtx_yq5ov*C`A;(j@{*mZf4*UisGd0U;?
zpX}xG`No+S^CSfNqJk>7EuZna`1Z%vyJ4Rit#5hE*Vg^9K(o1_HE&CHaGP>+QK?VF
z*B|K_HJ7hDwYBtSt^Iny?aa5x7dK_sy1bbA;#~g=)$2Y#?x;=?zQ)!k^s%qCPe!Xf
zzH`OXz4l+Pby#UAwc9k_YDud1y4w4VQ(Si4+8bRZpVD5(X?cI&aaES<{?=u@5ux`!
zNQLc{?OA*$(?HI!{zNwKp@a2&y#*G{bIaMIHr}@gF-U)BFC`i9C2`rg*)wye^qhNZ
z+dS`#GZ$;{X5nI4?qy=j85eg8*KnRTjOl;)CF)fnn{KW1<@gs~Ota>>p6+~+#WZWC
zt2XC~pXKT+zoIQcGRg_x=EcOoz{1MF;D#BeNKpuGwiT&3Cl;rA<`t*r6=#-YmZf4}
zi?lQtvLM#DM(07&#BDz!k9JMd-@Wu#l0vUnl9Ni7)8+4sY;)3X=9W!Aw*OK6gQL@D
z{Ya?!<}UeON7{#*P1t!x_49K(pP#cW|Mq-+{(VN14>ruq{ze`zRU2*hPT>tH*^#v%
z@4?E(+O`yLOD&6}j;Y$}JRi4NPvMb{eR`t&vS)nN@kRN;D)&p-57`OMyYyQ_g5OSS
z{zJ#;ADbTkxb!h}!MRlZ4^xcuc272b*m>Zq#F@Uw9_w?t?@yodVb|-b<m@+{3w75v
zuGKl@*J6An)Slz~^tepk)X4`WTC8^+i`!YPb*?p)J3Z&@#IsL5mO5L$i(C-1eBtj%
zCY?V1m75nz^-4rM=lcEo-%1&lvrjwHp3mpg6LwGkwIuOCl7q+gZ9m<D&oBC-@6cA_
z-zzKVvsGyA{@79}joZJJ<Fj}ER$g@K%bk_x6Y{6X<t47Xv(L|H;z3hk_KX=8ESfWS
zd!;4`N3+c{-1FzYVSLhj^CKI!u2OEh`!&lj$F2X_jki6r*C+3?JN&JSeF^)lxCN43
zuU~iyHQSk=TEuU_x2|;2Tu<w69bWm)SK%M@pA@8I`o2=|e`NLZnM?!klZEp+Jtb88
zIb_vmufM7<V14Un&yjOJzdzn=(p)cS)2iaHeAw7vr{t!v=Q9OYf7=$wq5ZUQvW;K4
zK_T~c!P9#e%_?6sD_?N2{DigF>J$Q(|7V@Fd8)YnxyEjx$U8^(Zg3H^e)3Vqqch3n
zPm@mnl!Vn&HO??{A7T--Tk=_=%=wS<6sO-3=7;PoPR2R+y{ntAQ?&9vY6;Z3bc=Yp
z5Cg+8Ev!8YjMNJ0S<H>(t_ZohHBS4p#pKD%Diax+ZIt*=CQo8;YWQH}5;BFwfTOLw
z!ozP;<4K<xGZ_L`EV`ogHE{K!gxc_xYr|YUI|A3NeV>%Oy7sPJ&il7t??vCP&ab{*
zl^y+Z|F@Yl&-n1EeS7@)jcxV%z1MEN{`h`t`Mp0sb_%<gbo)QM#P_TI=O0%)`IFDr
zOa0ZI{?AYT^^=9cR&Nt7t?Z0^EHk$xGuBG-&9$3{<7Qqn`Ib3n_HnD{FGCKr`TewU
zdOXoe^y3PfbI+S>^qv?$F^+nyG?(v#gna+iOq>48H`ZTPk$eBx`000+pFSd8o>sAk
zC)dX4D5sxH@9OdS`q3og<mK9$ZO4pDxD2|V2=#iGu1fk=ICIO>Pn;*ZxPFFkHETVz
z4z}Cu;=eVoOm6<+;wQliKE})yE8cqSb4k>x&!%w;3VZ!dS1k0~y2qnKEPId3^3O-P
zyi0clq=|jrDRuo-$wq^iW5qQw>S31tY14jZEcdXCw-bL+qqu&;Jy)stIqt73miE6k
z&71H}OK|6{<Jm6ny=(4h?7yB>v(hh|<8f(>!rrfsx*EPPNyR^VtakG{+uW$i#b3;R
zR&0)VUNm?62T`lpd#)ZoyNL78#YLNsuC>dXe6p;!+2=^g8jkk08L=mNj;@ioJR@}O
z#{`u-68&t4b4w@o`59*&Qx5FiZs@ztIQd#v!5OaOO&=flY0vJ^ydO7rU!L{l9o~m;
zAD$SSSY5PLvN!VLu^p1zP52xi9bG>2b%Iub1fQ|!RrX}Vj&14Li*vKMs#oUrMh8}#
zEGulu+#+RZFVTBcdiOS`^?b&6nAlFQx#n@@l+^Vd#;FarV$-gin$T@?;<Utpy55p(
z!?_!?zTOgDEw#<$$~FD_n|77Gz4dguiNVkCJmd0Np|KwmR%m5yjoiIDY<6LWXxs@C
ztt9Q;YphQuhe(y~y~#Q2&as5(8$IQ&8xKF7xw-qzd`06sVX4LuJB);KS7&w4(p6l&
z%2H&F*PGl_gPfbE^kSIiv8HK=+zge!le<;r%+Vv#8<<V!J=8xVvMpk6;Od(p`x-<v
z_@!@cYM!WNy2VcFh(W}BLtiJ&Rnz8)_sooT7SnkbE5gJ6*n38d>eAOU?vzdx^ETt&
zDz(dMThzs^P8@;S%4gFu=1TIOdVger^*y;3sbIUr{B7wjHJ)DUTfCI??W-r8ysYUx
z<Jr_4nVh4Sass>Ry=OTToXD2@!FiA`hhgsNa`Whu4M*gjO<xkYwn1&v;fI~ZQx0*~
z8=jTCBz2iBm48RY+5&DPws&!j7rIm)hwHpwu&<#dP5A7gTOLl^PK)qa9?U$kVM|BI
zZ^vu;KD_C9!iCpo?lolJZj~Ds&MjOSXZhutvAM09@v2?clP5lX!rQ)N(v(QcoLh;N
z#h)D5m>Pv9imc}4fAV<CB*&gPOmX2oVTyaB+ojL=<=ja#;VR$d946uGSZtP4Q}j!9
zfvTtulT>rW5u=N1FLos#w4ABj-PRsowLIb6Qp3%>eqWYXFWa_RSTHYLROo(3io%x{
z9$SufsZBm1C%Tm(f9)yJlN0}#Yz<>eUbHfM_f_5-H!|CHDO^czeH{36@14AEqluNX
zq^5;VT(^T|!Q!?P4s)DkUs;?;DXc4!Fi|$1(pEH2U`^w-tBLQ@X3ffVJpN(sw5<y-
zPtYy7)pptN@s6r@7miPUZ1sJchk)R=6)L&4u__{)W=%al*Xb7H8XF6>BPU;aY|DCW
zW*~Scc=E<6+M7}jh#G2#o~YPfwsl*V|1n;nEk0*Ec7$ol8=e2o*w%e2CE0s!xyt0j
zcdjfy_aa2=Ra)_8(Qe<_*2QX<zf4-_xIl!};oQ5K%-YXSpDJ$KToiC|Qp!e$h$pIb
zXM$GgNZ1{AJldKs_~NaTQ+`j0>+Hb8oPGV0M-~N1NR)`*&As5b>+%Bq3%P7}9m|b+
z{XfNB6@CAv>+rH^Hxs)rSma&c-*I}u;XT6gRRMP283fqPDi%s+A6F}UWc@;eyF7Ex
z?FV`LE-h$(X}Ijs!oFt5HxrLZ#ixFdyQj@#uRMSC%vJ3+-(+4#Mb`En`^~d&+WN~C
z`wz~4`kMEj%zcJAr~7o9^PezxG<R_|+n+Z#4CT=@+51W?()_W+iWN61em*+;l<oc@
zcAkAx_a}VF{o~r4|J<4(L)ANZDQDNwsi|!DPqMeZ&})AG%$XtARR6M(cl(jd%Fc~l
zXQ!rmoHnuW4V!&>@kNg-4-PN-a^qm6G|yHKC*yCW{PiavRR6SVelPB9bD&(}kAi*5
zhr=}|9_U}TWwSrjpH*|@!FrDEIqZC=%$nsNuUBJM65g*`w{Pyl2U`PAH#N^p<K5gK
zy~D4wbidmC<GgiE`{&(BXS#Cu_S$J71?QyIKELsvbI|`&uw?YgUbnM(mPRIZQjLNd
zW-q3H(QfYVVx0dlo!9P|f99R!3;sK=KX@E@I(T`<!CIlhcP}@-eAshfv*E1=&wsEr
z|NdFYF>~FXFHB6DVKF+~Ja&qi7RMxeP3r<*>~B<3UF*htYuijkk=?Pf7HfiAEwc2h
zUmrDIc05glJ^5%F2gij=v3Hu36mqtPbcyMA&fK5*VY`ZS)~hoG7DD=F(NSH>nc2Q;
zc-7r!<<5$@({rr5ob~bBqsCfkAyOsWcUOHp&9?v0`m8^*QjUD}k#0Y^(L#u?RNUPC
zP@%Cz+oHK|(<E+pvd!+Eef#0Yw3Y~-kD8l8HmABhRcBlCwx&1U{0DDfiqpwuORLRF
z<3gpG?(dZj-rJ=+`?MEt-DJT%!EHUNF?{D_CuN*?-puN}$>Of}p@K(uf6i^p{I-0*
zJ1={e|7SPD@`?hh7blZ;{G9W*;GoD4lj%zzKB#@R*!}0rm$E_nfA_rXJG!>dk)QKh
z;hus8YtCD(2#vI_x}aO%yz5!|La|e}>m`NXOybIT{9xCHl7gA`O3b?sIEemvV6tw>
zmy)H6|C_X~eqnO$=du2oZx3s<wC(fas}-NObcrb6HKwm228F5YThem$-)hBMREX@j
zlXpyN&E3MU3z~Oblnt|Yn?A)#^!`uTwC`a%_qsnmc{?Sr(>Zdt<307Vl9>x5#2hch
zDnuBb$ojv>*JRGcvu_{Wxf5~yT7=S~Zm%$lX^)guUR>U`;b0>7|1)oIf1I0^;xt1d
za_{vsGRKM?u5E4?<Jlgu`)TZfUfs0<^6Lxk&0446rK)||Nd0<E)XmCHW9_nycV_Kf
zw(AR1tW4$nWl@ux?BzV~-u0FVmA{#GH1I3WiuJcegHH&Vd~RD)5WOe1Rg-6eIH$SG
zAy4*a+NYkFP3x{(XSVXpxw0<X<p=zjxkE4QESRS}<JO{y>&|_3oESZWkN49P#X|Gs
z7iaukGQ>8iocA()-n!vdzzg9A>@OE3JFjeSZt9WQE}!6*rnDgU!sj&+#U?Xq-oBoq
z-t&{sOmf-OJ-659rOunkJo)Rgh~vtPocn)V$?jV8(eaPf@7|-`>tyY|X`ej6?O4UZ
z!}$88zNYXTqiTg4HftBIlh`8lHpN$F$>E;6M}8k@@?V;r7GS+dIcDAJS4L^=VNoCF
ze=KhmS?7B7ZEZuKtkyBN2dhQ%wi)H5=&#={u3PdTX8sd%t=`#x&T`DSlytNyaJ6bF
zzc9;-LoL^O3PQJk_V=i2-`{m1a!d29juYOGmsYsxyXU<>^2PP)e%b$p6T|;DuhN}T
zZ5Q~@VyE__?-~~#-RgTk{X@3l&WndLBJC?@^-bD7eM{UY+qDvRy4Uwj%h2pyQvA~H
z3(xF1@0PQ7h^%H8USl@d{i2`F>!mRpF3+ygd@r%{g6XVNclVk+lK9B**vUgJs_i|~
zhSiHXOXdj~97~%dps;uM3&oBb?);ah8>VN^UA&^`^r`;KC*-8QhIFkBf4N<&H#Dy+
zRL|C_Kj|OGuT5{)Klp$6i^R^Bm9G`PoM)CUw|{E>HP|uu=H@4_7CSAAvsyge?fUoN
zz~IP+>P#Q!O8MTSSC79f|9;V8nR880Ro$}u3qPd%Tkl_XPM3S6{%77HPFs_Y0#CnR
zUZNIhweumz{qBd4<g8u`^s+C#&-_T!)@19k*WbTgFcEmec(BAZ;N|xx`tKHQUcJ{W
zRZt_1W9_r+e||jO=d@P$;3Dn~D?)Uwyf5yu<qX>JwR(@{u9NO7-x!EC?vlCEuDO0b
z(}$3w-4RTSq}J!$e*bpyDH)eD5)YsHdW77%xSR8y>8wRpgr@w~`n8g)Cr#|!tA#h6
zY<9Pwn^>0Bd;b0Fg_i9rF1+{76JK?KQF7kx2P@C%o)VoLYVY!SqxPRkwP#mcG>&}5
zIjySe>7BN}J8Pa?IqjaIS#r`d+}yCfWf}j{=>jiLrGA#`tFhU<sQnqc%Dfn@d3`|)
z_j^tH%r1r;snCt}6Tc`H<sCkuv;KCbc4@G2tgmi~aHPk^FqcSaTkp4Dgjiqh-qKsg
zw(7fvi+m9K+(icc3po0h`pxsO-Z5iQj91}2_t-lg)|JX{ALU*-JGVsj{nFJx7~Vc~
z&(yc?yMH`G-roQE$JJZr-(M#EgL&7)dVllJ@-M^wY2Q62fBCKDq<=!cZ9O*Rr5p}_
znf>FQ=j4}#RZM%`C*&9{b2L6YuT{@%E?Z{si`QRNUdlM1vu_pMc_C}{tgnuFd5glm
z)?dE7;PQgzz1Q03{$;#YCA2^Lr)3qJE&tTd)*Fr(dVZBDH{!oo&GPHvwFCQI^JLwp
z`8|yfl(%J{{axMAEzoxU#I<He_BQ<EtTJr0DmGM-Tt9WE;<6kDj+_U(q`hRknDum7
zW7y|LKH*6!)Q`CH{6LcYq`#`OS$8ko*|yQQNqX;=1^f0aeB`qAcIaEpPa-?zoxH><
z)2>%ApS-i`<iRrWXBU3`l{@lYpQ(0B$I69=*o38`Cmxt|w8@}RE~@CsmEWn%hL)_?
z)D&j&Z=1=o`;1F8ul|}w`D=&IOFuBbST6j>l(R6Q+SOg9UU+rPflore#4_%LI9z@>
zUwy-2(*+eX;||_Pa#;7{%u)-Ljm5%yS8cf5exjcD-g(c;Q*xEgk{^A4ZOawBuJwU=
z)~D#dTehs+GyNQE%{8X5>86aVud80}Y3mZ%_w8cQj79U$wZGI`Q5dd~eTCt$&^HUs
zy)BM=%w}`9p5@vQ=+V)$U*qJHB!kG#GuLwTcK*A=GQISYx5E}ag`l@}4a<Ji`bk`N
z|C&(NqQ>vzFJV*rDscb$4G%5WHi@j*5OrgpS7q1t#7kV)6ArvR|MhikNvHRVbR~r+
z3o@qIc9+F*vMs%<@=|rn<u02h|EtF{bGNcTRP@%9-Lu4aYs%b+h5gr7KlS=Cv+>aB
zJNf~^lbJHE?CQ1MA}Adt;PEb9cv}$5h1)R|JNIh(vsF&EK6o*%;+0y<!)eQd6)RMk
zydBiUIp4T*O?TRT)8oNa4sPL0X}4MLoMw4VyUV}UPvOfyR>p6|Ef;@tOuw(#V&`<B
z#z#GJ`F4iR`wwfXW<S4B{xjy6-1Up!clO?U82d%<zL))z@E6DbSp9ABZ}Jwp$G0fS
zLNmejI>QXrlw!r-hE?2qOg;9`-RGRIY;%8kCc|!Z!{62|-@aezx_aTO#-Y33rYX)-
z*f;UA)=BvXWft;WxWTsLzun&MjiwJ)98*~_IkWK||BH5xpD}ea>s;*Jgns;t{KaWf
za4}~e`>lI!d2imZ{#xl}>^aLaBiFWm+NICW8A7A4=^Lbdu24Su*mlWMJ^j<2zn)Le
zSjZlG#XtPO%(T7Lo<Fmn=*Dc`w5EUCGI2E%+rSxDD%9kb-C;QQ=h;_A%QsO=o-=Lw
z!!d6w+kvG)m%_d@swow#ED^h~o>A^U--1echtKmhSpRcRy`7(7S-W7*MwVMF0eOG5
z|4M&*=5EjU%R;NZw{%I>`@`Fq#81DlKcLKhr}g~YmkZXvsBga3_KTlkll#l<oD*`}
zQ&X<1pL)%#K5fUUyRPp#`7f?>m{?ub>#n$3J+S-7@0D9uZqS{`Fz?voEBCw|Lo6me
zxjpIg?aj~ogFJLDUvz$Wm~)ng+1WaChTq5PqVA|HP1i5I#xC+>$>Z-!x<a&ePmO!$
zAjEi&n@2|fstMD5`->UBj1~Sm&+@Nt%(Yu|`bEBm(m%gygL2)TB5q^ba~6g-T+csV
z;py}{V~T|PRm0iMJOUZ#9qZROm5INpS=v-)&etNSweGG3@A9P!{<?78@>f~#SL{c4
zk&^oTcAuZ`kGza%4YN@eE3=;$njXG<ZnxYo`NWQ+|GoE@N4z<ucWJKp|AT===RT<X
zm;ZY)(U9NS=*`#VSJtkGIxEm?-(2=3KqTUwom1X*<B%{3Yrl`_8eg`oQe^9D$$E0a
z=F7h|DzzPJ#Cl|0V|T3YOcGyox7Rw_{Fe4bJ?CoCTecbBn~tYnh+F$-NAtIxrMtx5
zmP*WTDYL(Ie$igJ&#oEv?cdn8nC~in!Eoz*^FOsM=FNGs7rw~mzq3c%-5k$vcWE&b
z1A`_z1A{RG%F$!VIhpzxLscq{X(g#ekYy0Xka4TEQ^9MdkN@8;{rUx)v_{8y?j{HB
zDGL{M7zWmGoOBT`>D*&jo-KQIooTu2N97-}HHw{Hi#p{G@Tb~rU%hzK#3n=Es`HlL
z@7%Y1ey{lU{Qdj>G6Wp!ViUbAwQ0snb<c3a)r!59ylP@0Q$lr&p1i8teA0IHqYAUb
z6-UK+H!JRVdMoX2=^6EyEjihx_tRSyi%)-?^T6!zlafupeWohzeHG23?{WB7+2*wF
z<$In@O5O43`p)R5hbC5~_^?O&bt~3CPfh4Pp3ijr`;uMHDz`<HR<2H+oF&&I(=Dl{
zsP^c!=If-%``6!;W)ye4eZs&fw({uxqRUU?4&UKud2AY%W+oXFW0F!7JDG=hpSbgi
z9fu6AOuHSJX|zDfta?KA5ih+(-MZiJ_kTM7^WLuae^;t~4`(Z{zB)}tU(WMrD4W3=
z*@rVGpM1JObjn=TpIXK~GO3)++aBh3@A`XS;#$L7PmT4v&nUO=sfj(kSbkQQYTHwn
zU&imZKHvYjYR<hIiNQw4N=3~-MI7^O+nM}T;qaTnZSxa9{<!OCZc!O7xonr;mkpn_
zRDT-Qytw-M|H8yaYk#bNS$1@(^cmgP|NImxrtZG|O;Ns4%j?V0Gfvz7Z+N3Q>DPi2
zcFLz2GWk7&GtRus;qS_;am<<?!rJ^r&ZR5t+J)n<F3f0Uy6JZNoyH>;0WYD`FDyGJ
za_n+h{3R$z-g*ts6V*FihEw>Q6;H36VA!@<@Uh_91urye+<)y{SnuYQcH{d?h7FQT
z-nrNLHV7~-E$LHfee!vS#KQM(rrDFSYFyp!FV8A!<4lRvp0?_mVs>s(pqQQ9$9EI^
zd*)y4wcT)+S#n=T>Wr5KMmt3EW=~NwkW4GFjInf8XMIuPr^UlF@p`7z`6W;JFC1W%
z@-|sCV`=?@M$3%-Xlps$FQ&bj#Kgcbhn2`Q4@vHjIRr?WpBntu548IEGXJ(ITVqd6
z*O=85dP_G|^O1VTF)3kXMqwVd|GaM_rsb5+zI*h=;vX#krn)FxJi`8AVNdxoZKvBT
zKZ>8<vo3yT`~2LQ_xAR6j4K}9R9LrQidyoriH?zN#hM|PX0$I&ROIDN&0KX!XIVk`
zyx2<x@rRk)EWXd?*}2vB-^6IX^4ZtYUfj+ro|$+cyM5d9Nt=^)m+d%Z5dFsX`fSZy
zLzS#JXM>dMIpJ&GZ!c^4KkdDRQ`x6icWyq~r~6oACP(?(2FLpWyw<52HBWaPT5oB?
z!SXgLd*1ymv+VLM4z04@7Wz3#_nlcvxA`6ZmUxR^b;g;?IO1<O-hMd$dS&;o9x?B~
zEz6wNKhCZ=!O(VH{8FXe{8h7D-&}4o3C$Ff<Ptw<$n&u(mc?=5t}PB9qCQS~^_$B#
zzv_2g?4B)8FC4GdI^*5ZmndOXBr);X#6BIKq_~wC6>E|uwRwc^E}j4Wwx9D4t9c2b
z$9a7WH+1#Ao^k2Jl!Zl_uXv9A|0Qi*y-3dJ%p7~;s?)BPd5dSuzT|Uy*M892`cA6E
z>*AeDzkfMZxm))R+r_u#3)Dq3m#utu{&Abl-yJ-EuSxh$Ip!JX=Qs1(l2g4GW=k*4
zn7zniZRBT(!p$`uEtcgRWeXN0b4^?D-Li${vf+sp8mANHeEP}9BfNu6!+%@ismZ-b
zPcG-W<l3!}h%nr*JkeV&@d5X;I;Y!qZeDH8Q_{D{DE<|l{NJqYTuzPkLlcGUjfqE{
z-(TsiF!(95Xr9>GBK=G`$M<ag^O@AG6P&xwx<BBZ<bG)3%%?k(kMQk!Fso_yTxE6R
z3kI@h9S--*YW$M7uwlo<L%DZeCb~V^H8V4PhVSZ2hG(>*k1W|WN#gE1+Zb0V^UT(X
zzqnCnAS%=zg(R>rFi3GTFj!+2>#(z8h?xXk8x~zI6)N(NkH^Po=Ch`SrfO?Fc|@6b
z*KW(yHSxN9CE=v&%FayfvIfqXB54I_9WMWy->-XG9sim;H^w`B{$JUDe5Lan6;d`V
zjXd-y=G)Em_xbg|=hXgqygt63y@1u;!=nAR(lpgMM`iD=@z~MruewI*?DF{^FWPcS
zuJMdGeoVmZ<4=Ko4%5uPvwv91!oQ7egKbM;%~5shJ(UWN6z~7{IC5R7i09vuW!c|^
zn$2ZyCJ9XIU3AJ|(#4X}y<sO;KdV)ZSMyXz7Mb#CT2fZcJt3}d^U@4s&P$|e#u-gb
zitH6X6=gGZdrq+CjoHfrLw%!z*QPEG3jCsEzB}gyyGqDemctRAwqaIxR@|Dk(72~v
zd1mcXhtBDudqvLQTDEMuN9m!ZRqk6w*RBzhTvoLvk~KIxQbtGO*SfG(Gg&M}>^QeE
z-&&Rw)zU5eaJ^M>+s*5*CvUr0^i}Jvn$aaOZeK2myxkWp)VgJN3;7ylZ42vLuX)ww
z$&oom;iej#I=-G&i{$b&pU&D9w0_pARLcoWdvAQnC_YlszS2b1Wh1X#&kdequLOIg
zWt&5C(`U-hnf&5hnf#o?uRcE9d2Q{ilEWq5({}8iTIHClB=V6r)qM5TlA~ptem|D}
zd~^Bj6PgFted9fGbLYhS;oN0PkHi8`y_#NGywrN}+ozA}V-LlJ>sPKSkomUR?X1p<
za?^Q-GU~-^8*fkgXr?CN=6`GIN4;gKXA+{0Bul<`7MwUe&>`@cPJG}-?L|kzzqw=>
zef#L*66waei^JJX^!_P>gX<6IN}6TyOek!4pK_(}#O(Ky=Sq%hD>qeIH7?hR7WpP^
z@b=of3tlsqM!8;n`>iu=MqTXIg+<bJFV6WbnSbar*W=??!|w&JNse%N8M8zG@Zl}1
zS3l8f`>)=y?)9MwZ;o%vj=z2Q=s%N{8!oJV6cYSMQQ+Mg2FbIJ!t-`SA2L|5b(O}{
zz3X^oubMuX7Iaee-?gi|-r5}If01}~uIKmQ8~a`@S|)z=*D*ck4dSAi|5UDI{O;XQ
zT|WP4+J#jgoqfMmXuK>ruWl*vPE|bZ#s8z0ANo0t<=bw3e?iC3_O9=bXY-r?Ix+LB
zt^8_zNMYH5_iGn7eBQyp!KBaYXMg!sdZoYB`^0srTfA<?z0!B9-G6ZN<GJ|<X5STD
zA-swA$F(HhAJ>yiM5QHe{JWmbz_fKTOG($h=YmT8G6zd8M+r9?b6xD8c#k7=fx+9f
zJrxtgzW-CN{2n~NyJ$ZzkC}pOVn^cNOM)qF^EWs(8}sNt+^Olq(fE+fO{~`O{&|V+
zizQz;lH+phkE)#A;~3A}$f@{3SkC-ISoX{J;v3VS9DKcCpTm@<8yted+(!<zA93#b
z@K#`xdrP<b2A2*ofu;F97g`HMr)TIT@O;-`oj&(R(c2$IU-CaQw||w1-0k%-Hv4&_
zx0q+q++$3hc@LD^_s?Fe`Dy9`!RHB6b&6(RsxkQ{^q7}r{?{|PlOtEYjda@dH1d$H
z)7_WHFMs-CXjk~_3eqJ<pcT23=k5{R!^FU#z{<d&gIREaXZMj8WkRZENTu8x91T8N
zWZQP#w@OQ6L>x857wVqs65kWBn#E8t^T-mm=6q-C$Tm==tSea0bzW`Xj|H7x6I^P)
zxl5jZqqkJVvu5U8%inX~*W9->-v7VmKVyQRwq{B|K%kFelN0|hgZYb|wsp<+)LAm2
zVcs!a9ks&fy7wz~Xz&02WYdyI`?}b<gsRWv6oxx4+jRQdjC-G2R))8{*42KolIw6%
z-06p0X}jvCH*G$t<GjAAZ$r;hqopcdhlJuH9ZlkIga|}$eLdw)xS&}5#jKp-=c2dG
zG$*aSVVag_<bI-9qNmcjY04ArZOJd%4!=^np1tfsywUNeUqg3aObCk&$~Cb~j1JMu
zEXWU*3NV<VKWDm&oS%Tp9NWf{S#xZ~_Z*0tb@jD&-^Hwg)4>K)?E5G6O<u!$@}IYV
zQTxt?Nm&=ASy!&QTa@)@=LA`a-3MMZrCc&-Nn~YSo)s?B{?_}X)USzG3uHHQsrjv5
zH(SkoM&#ES$FJYJw&T2OM}tZD_MNjg=jd-g{H>xd^0(!GBb^gY!CG=z6)cl%qz-vj
zuDfVtd(ot_p@cK)Q1ULDyMCrqKDrzZ*39_!|5#;PYI61T>nqx)pX+OT*P(rnb#L_Y
zscqWs8}C{EQkYueo|zEa%=Q1|zSW0fvrYEa`dpfQ@<Q{X#*=!N${YLyd6fLBvn<z6
z*9>x-Qs%I%f@5h#&xvb_i{b@;zgfKgg4TUS4P`e=<t>+)Y}O@Rc`ko=>x9*kMY^K<
z73&)AC97=P^h$A(eRsTowG+Q&t$bUs`okB3mjwSbeOnn@IK!kVYh}pd`9F@YbdnR6
zy*mBY)#I9SA6LIFne_kBq_0zS9<ndDp7c1e=)we<qiTktQ59~2nc<UFKD)o@eNbAi
z{cM7orSiT2dFf9l@0z65NS35K9}CjZmj3c!QWNuvjr=KplBXa0$=7>x+d)hDssGWc
z?`;P*6whE{U|7Y*z(B%@JC5avMXBH{QiQZPeQoGlKjA<D+wHNt-j;0?U0C;kJxOqv
z2$$)J<y=CBD!j9<m>jI%{x(W{`|fA&uJyZr<oxrP-`AyA#jb%rbx!oOs7te4AI(2^
z?##^MW#!M_{rmIvFMC7f8ubZBczLZheaduNJb$*7n&DDKcITs8*hG!B(o9thmM6uz
zPE?F6p6&bYiubf+qn_z87d1>+*56KBm=@i7Jf<S8_A`IlRK;iZ+xNVQkuMQn7ptLC
zb+$<7zuw`)D+?-i)T}$SAZJ%;mYFKE_1d(nBK{Rumz@YWxwdfkp;pzu3M)>ONgDOX
z{hIbVXU{XG0PWv*mhaj3)?(R>EjypjY>DQ3Yjk^l=fCV-C+~;(erl=jNR*hV_4Z@<
zo`mA%ff~OL6-lk$xtu3ZPioV{c`Cg}O3PxNcPLb@x%vC=i$m6S_m+i~MK4Ori~Lk!
zWcwm=<pSl_Eq$9KE*N$gTjeE(99`6>s<S%c;>Ir1w$^~Bz3KZjx9L3m`83Sb!<gqg
zxA#_S9%J>#Ha7B7Gw!|h;=8r=Xz;lSivRV~S3Z(smQXt!Zf$)(?(UAKvb<$FIoBoG
zD);_-(5qk3dRzCi{^^jHOJ4;ppZPiJ?3X=n4#~vc=@gD}-MK{n-|4BgFK_02Uom%H
zwvEj4o{Q2!%gfVQ4i_D5EQ!*ybzKyY(chM_>G=t5K_xG<T%Y&Nl5ez?-0I{n`a9~b
znD$ABzxS-d(KJJ+jXY0Rw7qd%@K?oD=}0B#V#d#0KZQh%m+g1>dEsHV<GRp|-RjpK
zc0M#(7clRk)-k4ewme$`T^}1Saxrz7R_pvm)`&fze5tzemC8T1LF^uH*w%g9xOk-}
z+qMpi=vS9?%SyT%^H{FmHhsLq;hMrzmD>1o+JaW6W!7na(w^)*Pk?h1>!ZD{VlRK>
zqqbPpSv+_o85tPTL4y&P1wYb21ll6K-audP!wv$qm#-bWvVN8Nc_kArxAnf79)TiE
zT%7l|zucAMX};<1lz(RY{}}40?C@R4GTBLd+T6;`@AoD?d;a~k^a0y*vL&)}gyyIO
zbFbrFc%OTD&cQ!ZG#~DlSa;LnsUv5#tbb(^(|RsJtNzn1^-?oe?rb)0lJ;CU<D{wj
zovWPuUJ)r<LX;%f3S}z33C*icT-K^@GJDA#|Dc!q4Q-!K4m6u>Xv(?t*3QkZWtW)k
zTQ~PuyNK)UaM7gBpo|v>s@1=+74FZnSu;0i%2TU{{zhAWiL36sSj8E0e_ce&>9^-*
zuWxx*&wc1@*`sE$pFP@}Z-&fv$Ve!irRyZDJNxa;HK`xXWTZ_u#E0CO<#jxauOQ+`
z_7jfdN2@=vqlT~ZMX_hEm>C#qxEY8$#?C1<Ex#z$EwiY&B)BvQ8vc-q_GnmcuyD9Y
zUEZdG!p#{Tyi>Q`@Mw03<8WN<Aiym+L8vo#irJ0rz7dzNrd+?7v#Nff{lfo;&TVO)
zw(?ta`md$`7uzqoTwI>ap|;^*;j;L5r+1#WeZKSjyy|cNzP``bXIN4ge=PFM+(l_|
zZW6^Zl9EDVsX7m$7`%6!``9@{r+5v=4PHs3Fw1Lf8_pF5A7itdYS`i1H>XEno5d%Y
z?UKLVzW-WP(w3-`9yK-OXi)j;s8umngCjSG1^3>alb5kQboIJONhkeRZF%~JYy7JY
z$GclRyK5GcmNnb))U4f7hb0qlWL@!^RqC!AdQ#}r%+#&B)<*4`Vct7w!D)fGi|=Mn
z+qNvaboT`f(^?}9>9&QDH&;ci&t{qZSw-^gs}4UY4S9#%9hWP`+8i%y+*p<RT>j>+
z%z~P822c9#Mr_HLyngP<JIr6%O+!O_y;dbe?+QFw618m4%JLs-e05h>alEwpb8)+|
ztWD<=zB{b5p7Hwz?`6xgl*^i|b-$-D#%$*d9`{B@9{&4YT9upqtY1E>Hd3CoB_mC8
z(cU-76S&h3YX#5hkdb+Jz=wOM=$ykHO^JI>7q%-_DoGxFwwr6)N7m!hxw>v7c2sz-
zcp$egX~y@SlIs;2Gx~e=PFB2Ip~@<5pcYYkTEO1wz^2g5D<+5IYJ;xLn*Yz~a_W+q
zVkfm8D&?JXRXg@t_%hGI=09hq%Vng_er(OE<)xG~^Wm!J6V3W9E);xceK`Hq;i=}U
z+4l+*IqTVn>Ap2sxL7RBzwY46xXR_PH+DX5k}#{*a@rfG6{$OYYn82)x%2ebLHyq<
zPxW5i@HOkzv|k<~=bhQKQbf<D?YQB0`RtVN>f65TJkMC0KgLPCDVQ*?(fQTC8@^Mw
z2UOk4E?GZ|Ek4~zbWYIbWs`J1$5!04j$0YFm9PA!_M^X_*-~#jzW(4Z|BwAgeCHg|
zR{FAl>#}tZYxAN#mrwA|U-9kB_p1Ih^UQb4<2sMlx9W@ezd88d_-t*({9BikdSjjC
zqn#RJkF%#pFEc%qJf-HaWZ+g+>BH_jZhZM=&UV3l?>7FG68;-(zvLdRYkV*HOZfc3
z+8@l!+?P7Os9#$5^NK&mTW7svnNl5sah1FJ4<^0X^2Pd`$lsHPj9I=Ld?}J&xah`2
znTc_V_bYELs+ODcBCYckZ_lRCo{gp|H>`|un(uX6w*O0=9?`w8jnhXc*KpRNiTsRj
z6xvv1Yo|Q%Op*!`P>I}Cxx;3y$CYAt-LeH?`wI?>%{4F-zr=Oe(EF8fddbO)-aUGc
zlb=_|6xPHXZNIX9LRtDt%bIyQfv=9MevFB8m=kwY^Oc}XLxo<tqTFHb9j-eJzRvfw
ztMUD7KIsqIfwhwvtY?B&R{vpQV6en2qk}7pOHy-*8B>@$>9p5j2a&e(H&*9v-F7YO
zs_Pw7Ct;T(t>P2J#Dnw$Ue?W$yF5`vSnaaYJ+JU4ZutZ4Lh6|UD;<wS&N2Rc{{PuI
zJNMM@*~fT*F=x60f3H%G$im2$vrIXn3tScIPuZxQ=S;cvYl6eJ)5kp)+)9_b=W|!E
ztJ<n)O3?e_ZYyW42ot{}YvycdI1o6W`}P)AS?i1_K4bPog^PQI@3<>Uvc<mJbw%2}
zyKJX<uu_*Wlc{uS5?6lh$sO(sE?)cHU*hK5UbW3~LR00oP21`ssx>r=3w#pHQ}s5m
zc!%lm{s@VvIJv0n>;5TeF03I77+jf_xG#`75^y|Rr13GMSMIuux=m}3$(vX@zRjvw
zyY=D*-nEyD4`+Nk!<&9(=At|Si$BLrSKf|Ypy$>oa%Ix~;tlMUUmpaXX6ih*=+P#r
znOeeAi+}4o%nMED^wN5kypb<eRJ*wOl(ykMuB$BXR0@{u`?96MZ2!WU`X#!oXLS}?
zuguV8J*&4Uifff!_M52xEU49ju4qORI}-!LZWaaxBg`m;7W+P#dC+#H3P$sCYViI1
zO93MP{4_<lix1ohF%dSGF)({oV=UpTGHb?!T+TH9>fH{9cTI^dzODPq{^z4^HM<||
zKinnH=iW?dX34xXPyYLy_chPw+`eC5_m8Q7;kcXBZ8M`A=N4r>mMqh`qHDr2Gko5c
zMb}$G_7*xBSRcHgx~9zbrCeOk?{&X4e%V!f-Az#VBfQWi*MCl0$+Om$I~xNQuCAKc
zabxCxg{7;l_Hj5*=a0VY&^u?Mkn{agwtde_nm$IHSh#M&lhEatZnqqYsCl_NXXT3*
z!B1ExZHRk6uiN|K52wC4aZ7A0v&{WCBlY)id2*Uw(n){Bq1`ygY4Zke4Ygmq_m&rT
zozdLAW3FV*vkBZ!5*5~;*f;n3`AfxIPeM*uTW-0a(h#s(zs)M+b*}XM`NFII3;S-U
zKG-U?<Gt<PAH6eYOv}^Y=Z<U&trs@?7VXPjUHN8l#zNN{(aUV#y}RGg_v`sP1*JU^
zb)Q<+9zA<kV_HsgwM2JD!7}}VRKcdmhx@ge|3(;m;i%v&zflzN$!)^Dzl@Kiwb^q1
zrEQsf>cqx*{!9_;nAD6N4cW9d`Z(O0lbZ1&P4lblv+L0YvklL<2~9e1<d)RCMu`Gh
z{-g)(8qV>3?$_VU+IaSTZkl!Q8I4Arj1y-)A9+tZwq)(hC!a*-e=(hJGH-#<a@*d=
z+Y6?rN^07zOgy^9a&g;l9r?{JioeBCla<>xj_<b23=F3@v5r4tCM)o?5{|~k>!8a4
z0{{G?IhHVbu^zEbN?oA4g;j7vg=VN;kVb^-#rTCsq%=4uvwak;VYttnUJ@{~@Xzy}
z{zuLh?>xCgyk{RWcsze|b=mit^zzNSE%*Pc`OT-$<|=r$!{m~X=*6ZEk4=vHZ3W&3
zHZ<IhG1hWAFr&em>6mLpp?^!qZzeeo4}&YS^RGB5oVqagQg4h(`ia$*s*}#Hp15S+
zM#b#anxVmBDT^0K6mF6WH@&*&xWU!eIsewTs>bPV?EbE6STX6>szVnJv3`!0*6#Wk
z5_fX)k@=T47u9*1h3wPkvb_`Aw(UrEu%TM$;)JwqVO<k=F7+(abTiTy=i-X&wfH4r
z!PoxiOB%m!zo&1d3XkuuX@;qXee!3mTF2>OwB^>GT2W2kLsd6tM@_zeYT}+XPn9Gu
z2W*Ym6LabLn@UNupAV+>%J|1f%GFxl-K4(U%gI|j_3FO8xx(TfCNT26PXF|9?&0pp
zqYtaZpL+M`H}f<`oipr^tUY}~YqHk*8GS~x8RyAy*3Fsxz<G6pyO5fpYC_Z@9y#L@
z%hcw?GZ!bO-`ce-Kl}I;hji7cE#5oM$j)ATt8`DoHl`YPRoD7n)70xcrNJ+!?KV9>
z-Q@6xmA6(+oAE|k`1YoSGQBG&C4G2O>-6+pjB9B30kcI`lQ%~$4dCc;Tj|xFmC<9j
z+jzT=sr<~z=0C4kC@tVSr>W-b|8j!I`Zb!7#s70wdK_TtU8THmuYKO0IgZxT&drRP
zUA}EuV7T_J_rW~#UWvw?-DC0fU-+v->W;4Bf1gIbusu*?XLq5a`0&x^(q~_r-SWMB
z&o_jt&F-)$Piy(Ze;(E1w=#2|S#9gE`o<DB&5`}2m52K`i~jD0qZd<@BoB-3I_vhg
zfvuuLt!e&)Urhc#v|q*iX#Dl_-~12n9V7TAE>k>Fc5%mM_QILB-yYm8!!_CC)%|d@
zw=z2(J#Tnp8FiC;%jQ&;8``JmE~vfOXw4bpZoc#2PU#~%HC|{i?cm(rzxaz-tWR*p
zpCt)L&+sL$d)|K2iE~E*gWiJv2Q!Ycu3cd%?sVf+*8Wz*25uL14d%079=%dI8M^4Y
zjZ#&|q3aInNfM0pSw{ub_fCHi`@ubmM_5igt#g^pqj=O@nR%}kynDulfq_Aufq@~t
zC^01`RW~^&Q#Uy$vADQ^oErd)0x)j?c#3lFKO>U}GZzB`2M2@Jrm5}>3=9mgQ|~!J
z955DOc<Tsa__~HT>U#RQ=?8eDYP&vlMiA)OqEH3~1`&`-C|=U2$-uw>*XHZ!=jrAe
z9HQszhHL`pZY7Wgn8hq04j2nC{B@iKt@!Zi;0CJ(5g;A=pgJHSh~ijutAs&%z!;?Q
zC8|c~1q>JlWHhCuEoNX~xC*_m0j!^afniG{=)wdn_k+Q1g;FtubkWega`ay7H4!ET
zh5{A_22k`P3<O06J_FHPbpb{B6_t*qB^jxCC7H>HgPqZBeEue)cohQ!!)*r0rJV>H
z%UDp>8NkB86Xpl(K44i{bvK5IfgzBEfk78$7&u50Sh1St4o=;n#i>Oe`MIe+`T5zU
z1=x+2c@%s5C@TZQFD}T0FwFEVjqDs)jdra_&M7U<EK7CE%t>`jPEIW@_D#%7OiwMs
zZowJeI5*G<+CSOQ-7d?A)q-%ywQtV(d1;yHrA3J)@H^hngE_EPHuEV91A`<N1A_s~
z;oxBTDS*{z^!W5h%uC6^<?yAI2D`#QnT8coV!$le(x@Yb)pVcybey4ZuIP1_C_4i~
z9v=gP8q6fHQ};_?H4Np_JM1BAac|pQX<-J2H<}C#3MghuC}1_y6LL{3cGK=2@aa6l
z!oV<<6Fr^^jj@{Ml%HP$x^|<$H!(9WxFoTt1XoCUT-&a4nTvrzS%QH<1I3{Y7Fdmj
zCIQ!q)a25Vd>jd@MQX|FO<W8NP7(|ZS}3N@bjE5bBvU}m200pgK%n2a3p094<77Xq
z=0bxNSMh^>!y&@F;!v#Sp+vn3j{6SL^8)(qc?er-!?D_ezr;bmGzVea)p&U22`#;_
zR-nl415IwBUwVTuFfD-r=M^~Urhs&TvLuKGT^j?U85ls7OI9L^FF^~q&^1p$yF><}
z8C0>%#MBI1CV<pjM;m-aHy-_B5QOn6DHv`;G8WRS#FlzM;f8(%2f|c=R3c2pb0G)1
z`_ZpDK-eUdL4-|^*+|GjA@rj~(XB#1X&zzKzbt%K;W&35-FWmP(-Fo`$-!qlG$!yJ
zppI^p4BFYN2&=Z05MdQ0&<F(=`tg+rOKy}CVF~`jEYW?0e%c?xGO=oWmO;FNS)_sz
zG5T3k2s7U_;4>2%ArPY>9HhxTbeE$aYJ{-mWfO)iVAGIi)3Lj}8|^>^gqa7sFwG?X
zcm;H~8=-CTN7ys92g4rB!o!)EE&u3twxVs-M%d}xPuNcE;f21Y7GdVQg@nz-vceYK
z(dY{p5f(gJLWBj7qy#CYu=@ag`53~2qsuWYfYlPj4E&<|1bw*&!lJr07#3k9SBOWj
zdjWk49bt0IdJK~>6BEQ}>=Wwfo<N_vL|9e637=I2>QZO489Rir*Ei!c7931PDtPAa
z&^>@Yu8*)udIyG0==}$zup{6h^s#1yHScy}Sc7CN)I$&oPuYh)=!h`ibq^8d;~s)U
m53fwLaTJ6_xA(&vhPXym0=!w-KpG4g3>gFj85rUYfOr7Y2S`i+

diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
deleted file mode 100644
index bd9ee87db3..0000000000
--- a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
+++ /dev/null
@@ -1,6 +0,0 @@
-#Sat Nov 18 15:06:47 CET 2017
-distributionBase=GRADLE_USER_HOME
-distributionPath=wrapper/dists
-zipStoreBase=GRADLE_USER_HOME
-zipStorePath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-all.zip
diff --git a/tensorflow/examples/android/gradlew b/tensorflow/examples/android/gradlew
deleted file mode 100644
index 9d82f78915..0000000000
--- a/tensorflow/examples/android/gradlew
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/usr/bin/env bash
-
-##############################################################################
-##
-##  Gradle start up script for UN*X
-##
-##############################################################################
-
-# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-DEFAULT_JVM_OPTS=""
-
-APP_NAME="Gradle"
-APP_BASE_NAME=`basename "$0"`
-
-# Use the maximum available, or set MAX_FD != -1 to use that value.
-MAX_FD="maximum"
-
-warn ( ) {
-    echo "$*"
-}
-
-die ( ) {
-    echo
-    echo "$*"
-    echo
-    exit 1
-}
-
-# OS specific support (must be 'true' or 'false').
-cygwin=false
-msys=false
-darwin=false
-case "`uname`" in
-  CYGWIN* )
-    cygwin=true
-    ;;
-  Darwin* )
-    darwin=true
-    ;;
-  MINGW* )
-    msys=true
-    ;;
-esac
-
-# Attempt to set APP_HOME
-# Resolve links: $0 may be a link
-PRG="$0"
-# Need this for relative symlinks.
-while [ -h "$PRG" ] ; do
-    ls=`ls -ld "$PRG"`
-    link=`expr "$ls" : '.*-> \(.*\)$'`
-    if expr "$link" : '/.*' > /dev/null; then
-        PRG="$link"
-    else
-        PRG=`dirname "$PRG"`"/$link"
-    fi
-done
-SAVED="`pwd`"
-cd "`dirname \"$PRG\"`/" >/dev/null
-APP_HOME="`pwd -P`"
-cd "$SAVED" >/dev/null
-
-CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
-
-# Determine the Java command to use to start the JVM.
-if [ -n "$JAVA_HOME" ] ; then
-    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
-        # IBM's JDK on AIX uses strange locations for the executables
-        JAVACMD="$JAVA_HOME/jre/sh/java"
-    else
-        JAVACMD="$JAVA_HOME/bin/java"
-    fi
-    if [ ! -x "$JAVACMD" ] ; then
-        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
-
-Please set the JAVA_HOME variable in your environment to match the
-location of your Java installation."
-    fi
-else
-    JAVACMD="java"
-    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
-
-Please set the JAVA_HOME variable in your environment to match the
-location of your Java installation."
-fi
-
-# Increase the maximum file descriptors if we can.
-if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
-    MAX_FD_LIMIT=`ulimit -H -n`
-    if [ $? -eq 0 ] ; then
-        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
-            MAX_FD="$MAX_FD_LIMIT"
-        fi
-        ulimit -n $MAX_FD
-        if [ $? -ne 0 ] ; then
-            warn "Could not set maximum file descriptor limit: $MAX_FD"
-        fi
-    else
-        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
-    fi
-fi
-
-# For Darwin, add options to specify how the application appears in the dock
-if $darwin; then
-    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
-fi
-
-# For Cygwin, switch paths to Windows format before running java
-if $cygwin ; then
-    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
-    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
-    JAVACMD=`cygpath --unix "$JAVACMD"`
-
-    # We build the pattern for arguments to be converted via cygpath
-    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
-    SEP=""
-    for dir in $ROOTDIRSRAW ; do
-        ROOTDIRS="$ROOTDIRS$SEP$dir"
-        SEP="|"
-    done
-    OURCYGPATTERN="(^($ROOTDIRS))"
-    # Add a user-defined pattern to the cygpath arguments
-    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
-        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
-    fi
-    # Now convert the arguments - kludge to limit ourselves to /bin/sh
-    i=0
-    for arg in "$@" ; do
-        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
-        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
-
-        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
-            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
-        else
-            eval `echo args$i`="\"$arg\""
-        fi
-        i=$((i+1))
-    done
-    case $i in
-        (0) set -- ;;
-        (1) set -- "$args0" ;;
-        (2) set -- "$args0" "$args1" ;;
-        (3) set -- "$args0" "$args1" "$args2" ;;
-        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
-        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
-        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
-        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
-        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
-        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
-    esac
-fi
-
-# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
-function splitJvmOpts() {
-    JVM_OPTS=("$@")
-}
-eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
-JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
-
-exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
diff --git a/tensorflow/examples/android/gradlew.bat b/tensorflow/examples/android/gradlew.bat
deleted file mode 100644
index 8a0b282aa6..0000000000
--- a/tensorflow/examples/android/gradlew.bat
+++ /dev/null
@@ -1,90 +0,0 @@
-@if "%DEBUG%" == "" @echo off
-@rem ##########################################################################
-@rem
-@rem  Gradle startup script for Windows
-@rem
-@rem ##########################################################################
-
-@rem Set local scope for the variables with windows NT shell
-if "%OS%"=="Windows_NT" setlocal
-
-@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-set DEFAULT_JVM_OPTS=
-
-set DIRNAME=%~dp0
-if "%DIRNAME%" == "" set DIRNAME=.
-set APP_BASE_NAME=%~n0
-set APP_HOME=%DIRNAME%
-
-@rem Find java.exe
-if defined JAVA_HOME goto findJavaFromJavaHome
-
-set JAVA_EXE=java.exe
-%JAVA_EXE% -version >NUL 2>&1
-if "%ERRORLEVEL%" == "0" goto init
-
-echo.
-echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
-echo.
-echo Please set the JAVA_HOME variable in your environment to match the
-echo location of your Java installation.
-
-goto fail
-
-:findJavaFromJavaHome
-set JAVA_HOME=%JAVA_HOME:"=%
-set JAVA_EXE=%JAVA_HOME%/bin/java.exe
-
-if exist "%JAVA_EXE%" goto init
-
-echo.
-echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
-echo.
-echo Please set the JAVA_HOME variable in your environment to match the
-echo location of your Java installation.
-
-goto fail
-
-:init
-@rem Get command-line arguments, handling Windowz variants
-
-if not "%OS%" == "Windows_NT" goto win9xME_args
-if "%@eval[2+2]" == "4" goto 4NT_args
-
-:win9xME_args
-@rem Slurp the command line arguments.
-set CMD_LINE_ARGS=
-set _SKIP=2
-
-:win9xME_args_slurp
-if "x%~1" == "x" goto execute
-
-set CMD_LINE_ARGS=%*
-goto execute
-
-:4NT_args
-@rem Get arguments from the 4NT Shell from JP Software
-set CMD_LINE_ARGS=%$
-
-:execute
-@rem Setup the command line
-
-set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
-
-@rem Execute Gradle
-"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
-
-:end
-@rem End local scope for the variables with windows NT shell
-if "%ERRORLEVEL%"=="0" goto mainEnd
-
-:fail
-rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
-rem the _cmd.exe /c_ return code!
-if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
-exit /b 1
-
-:mainEnd
-if "%OS%"=="Windows_NT" endlocal
-
-:omega
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
index 8bd4abb154..4e45f42d0c 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
@@ -333,12 +333,8 @@ public abstract class CameraActivity extends Activity
           continue;
         }
 
-        // Fallback to camera1 API for internal cameras that don't have full support.
-        // This should help with legacy situations where using the camera2 API causes
-        // distorted or otherwise broken previews.
-        useCamera2API = (facing == CameraCharacteristics.LENS_FACING_EXTERNAL)
-            || isHardwareLevelSupported(characteristics, 
-                                        CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
+        useCamera2API = isHardwareLevelSupported(characteristics,
+            CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
         LOGGER.i("Camera API lv2?: %s", useCamera2API);
         return cameraId;
       }
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
index 9db8835d92..a9ed02dd1a 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
@@ -45,7 +45,9 @@ TRAIN_FILE = 'train.tfrecords'
 VALIDATION_FILE = 'validation.tfrecords'
 
 
-def decode(serialized_example):
+def read_and_decode(filename_queue):
+  reader = tf.TFRecordReader()
+  _, serialized_example = reader.read(filename_queue)
   features = tf.parse_single_example(
       serialized_example,
       # Defaults are not specified since both keys are required.
@@ -58,26 +60,22 @@ def decode(serialized_example):
   # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
   # [mnist.IMAGE_PIXELS].
   image = tf.decode_raw(features['image_raw'], tf.uint8)
-  image.set_shape((mnist.IMAGE_PIXELS))
+  image.set_shape([mnist.IMAGE_PIXELS])
 
-  # Convert label from a scalar uint8 tensor to an int32 scalar.
-  label = tf.cast(features['label'], tf.int32)
-  
-  return image, label
-
-def augment(image, label):
   # OPTIONAL: Could reshape into a 28x28 image and apply distortions
   # here.  Since we are not applying any distortions in this
   # example, and the next step expects the image to be flattened
   # into a vector, we don't bother.
-  return image, label
 
-def normalize(image, label):
   # Convert from [0, 255] -> [-0.5, 0.5] floats.
   image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
 
+  # Convert label from a scalar uint8 tensor to an int32 scalar.
+  label = tf.cast(features['label'], tf.int32)
+
   return image, label
 
+
 def inputs(train, batch_size, num_epochs):
   """Reads input data num_epochs times.
 
@@ -93,32 +91,31 @@ def inputs(train, batch_size, num_epochs):
       in the range [-0.5, 0.5].
     * labels is an int32 tensor with shape [batch_size] with the true label,
       a number in the range [0, mnist.NUM_CLASSES).
-
-    This function creates a one_shot_iterator, meaning that it will only iterate
-    over the dataset once. On the other hand there is no special initialization
-    required.
+    Note that an tf.train.QueueRunner is added to the graph, which
+    must be run using e.g. tf.train.start_queue_runners().
   """
   if not num_epochs: num_epochs = None
   filename = os.path.join(FLAGS.train_dir,
                           TRAIN_FILE if train else VALIDATION_FILE)
 
   with tf.name_scope('input'):
-    # TFRecordDataset opens a protobuf and reads entries line by line
-    # could also be [list, of, filenames]
-    dataset = tf.data.TFRecordDataset(filename)
-    dataset = dataset.repeat(num_epochs)
+    filename_queue = tf.train.string_input_producer(
+        [filename], num_epochs=num_epochs)
 
-    # map takes a python function and applies it to every sample
-    dataset = dataset.map(decode)
-    dataset = dataset.map(augment)
-    dataset = dataset.map(normalize)
+    # Even when reading in multiple threads, share the filename
+    # queue.
+    image, label = read_and_decode(filename_queue)
 
-    #the parameter is the queue size
-    dataset = dataset.shuffle(1000 + 3 * batch_size)
-    dataset = dataset.batch(batch_size)
+    # Shuffle the examples and collect them into batch_size batches.
+    # (Internally uses a RandomShuffleQueue.)
+    # We run this in two threads to avoid being a bottleneck.
+    images, sparse_labels = tf.train.shuffle_batch(
+        [image, label], batch_size=batch_size, num_threads=2,
+        capacity=1000 + 3 * batch_size,
+        # Ensures a minimum amount of shuffling of examples.
+        min_after_dequeue=1000)
 
-    iterator = dataset.make_one_shot_iterator()
-  return iterator.get_next()
+    return images, sparse_labels
 
 
 def run_training():
@@ -127,16 +124,16 @@ def run_training():
   # Tell TensorFlow that the model will be built into the default Graph.
   with tf.Graph().as_default():
     # Input images and labels.
-    image_batch, label_batch = inputs(train=True, batch_size=FLAGS.batch_size,
-                               num_epochs=FLAGS.num_epochs)
+    images, labels = inputs(train=True, batch_size=FLAGS.batch_size,
+                            num_epochs=FLAGS.num_epochs)
 
     # Build a Graph that computes predictions from the inference model.
-    logits = mnist.inference(image_batch,
+    logits = mnist.inference(images,
                              FLAGS.hidden1,
                              FLAGS.hidden2)
 
     # Add to the Graph the loss calculation.
-    loss = mnist.loss(logits, label_batch)
+    loss = mnist.loss(logits, labels)
 
     # Add to the Graph operations that train the model.
     train_op = mnist.training(loss, FLAGS.learning_rate)
@@ -146,33 +143,47 @@ def run_training():
                        tf.local_variables_initializer())
 
     # Create a session for running operations in the Graph.
-    with tf.Session() as sess:
-      # Initialize the variables (the trained variables and the
-      # epoch counter).
-      sess.run(init_op)
-      try:
-        step = 0
-        while True: #train until OutOfRangeError
-          start_time = time.time()
-
-          # Run one step of the model.  The return values are
-          # the activations from the `train_op` (which is
-          # discarded) and the `loss` op.  To inspect the values
-          # of your ops or variables, you may include them in
-          # the list passed to sess.run() and the value tensors
-          # will be returned in the tuple from the call.
-          _, loss_value = sess.run([train_op, loss])
-
-          duration = time.time() - start_time
-
-          # Print an overview fairly often.
-          if step % 100 == 0:
-            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
+    sess = tf.Session()
+
+    # Initialize the variables (the trained variables and the
+    # epoch counter).
+    sess.run(init_op)
+
+    # Start input enqueue threads.
+    coord = tf.train.Coordinator()
+    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
+
+    try:
+      step = 0
+      while not coord.should_stop():
+        start_time = time.time()
+
+        # Run one step of the model.  The return values are
+        # the activations from the `train_op` (which is
+        # discarded) and the `loss` op.  To inspect the values
+        # of your ops or variables, you may include them in
+        # the list passed to sess.run() and the value tensors
+        # will be returned in the tuple from the call.
+        _, loss_value = sess.run([train_op, loss])
+
+        duration = time.time() - start_time
+
+        # Print an overview fairly often.
+        if step % 100 == 0:
+          print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
                                                      duration))
-          step += 1
-      except tf.errors.OutOfRangeError:
-        print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
-      
+        step += 1
+    except tf.errors.OutOfRangeError:
+      print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
+    finally:
+      # When done, ask the threads to stop.
+      coord.request_stop()
+
+    # Wait for threads to finish.
+    coord.join(threads)
+    sess.close()
+
+
 def main(_):
   run_training()
 
diff --git a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
index 4a429837b7..1e375ed48e 100644
--- a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
+++ b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
@@ -53,8 +53,7 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
   //  - Scales, clamps, and converts that spectrogram to 0 to 255 uint8's.
   //  - Reshapes the tensor so that it's [height, width, 1] for imaging.
   //  - Encodes it as a PNG stream and saves it out to a file.
-  Output file_reader =
-      tensorflow::ops::ReadFile(root.WithOpName("input_wav"), input_wav);
+  Output file_reader = ReadFile(root.WithOpName("input_wav"), input_wav);
   DecodeWav wav_decoder =
       DecodeWav(root.WithOpName("wav_decoder"), file_reader);
   Output spectrogram = AudioSpectrogram(root.WithOpName("spectrogram"),
@@ -72,8 +71,8 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
   Output squeeze = Squeeze(root.WithOpName("squeeze"), expand_dims,
                            Squeeze::Attrs().Axis({0}));
   Output png_encoder = EncodePng(root.WithOpName("png_encoder"), squeeze);
-  tensorflow::ops::WriteFile file_writer = tensorflow::ops::WriteFile(
-      root.WithOpName("output_image"), output_image, png_encoder);
+  WriteFile file_writer =
+      WriteFile(root.WithOpName("output_image"), output_image, png_encoder);
   tensorflow::GraphDef graph;
   TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
 
diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go
index fc087d9d99..f200a8e00a 100644
--- a/tensorflow/go/graph.go
+++ b/tensorflow/go/graph.go
@@ -28,8 +28,7 @@ package tensorflow
 //                                 int num_shapes) {
 //  const int64_t** dims =
 //    (const int64_t**)malloc(sizeof(const int64_t*) * num_shapes);
-//  int i = 0;
-//  for (i = 0; i < num_shapes; i++) {
+//  for (int i = 0; i < num_shapes; i++) {
 //    dims[i] = flat_dims;
 //    if (num_dims[i] > 0) {
 //      // flat_dims will be NULL iff num_shapes is 0 or all elements in num_dims are <= 0.
@@ -133,20 +132,6 @@ func (g *Graph) Operation(name string) *Operation {
 	return &Operation{cop, g}
 }
 
-// Operations returns a list of all operations in the graph
-func (g *Graph) Operations() []Operation {
-	var pos C.size_t = 0
-	ops := []Operation{}
-	for {
-		cop := C.TF_GraphNextOperation(g.c, &pos)
-		if cop == nil {
-			break
-		}
-		ops = append(ops, Operation{cop, g})
-	}
-	return ops
-}
-
 // OpSpec is the specification of an Operation to be added to a Graph
 // (using Graph.AddOperation).
 type OpSpec struct {
diff --git a/tensorflow/go/graph_test.go b/tensorflow/go/graph_test.go
index b8d65c54f6..c3120bc720 100644
--- a/tensorflow/go/graph_test.go
+++ b/tensorflow/go/graph_test.go
@@ -29,26 +29,10 @@ func hasOperations(g *Graph, ops ...string) error {
 			missing = append(missing, op)
 		}
 	}
-	if len(missing) != 0 {
-		return fmt.Errorf("Graph does not have the operations %v", missing)
+	if len(missing) == 0 {
+		return nil
 	}
-
-	inList := map[string]bool{}
-	for _, op := range g.Operations() {
-		inList[op.Name()] = true
-	}
-
-	for _, op := range ops {
-		if !inList[op] {
-			missing = append(missing, op)
-		}
-	}
-
-	if len(missing) != 0 {
-		return fmt.Errorf("Operations %v are missing from graph.Operations()", missing)
-	}
-
-	return nil
+	return fmt.Errorf("Graph does not have the operations %v", missing)
 }
 
 func TestGraphWriteToAndImport(t *testing.T) {
diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index 1481a4d035..017bef99ce 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -126,12 +126,6 @@ _REGISTERED_EXPANSIONS = [
      lambda feed: [feed])]
 # pylint: enable=g-long-lambda
 
-
-def _convert_to_numpy_obj(numpy_dtype, obj):
-  """Explicitly convert obj based on numpy type except for string type."""
-  return numpy_dtype(obj) if numpy_dtype is not object else str(obj)
-
-
 def register_session_run_conversion_functions(tensor_type, fetch_function,
     feed_function=None, feed_function_for_partial_run=None):
   """Register fetch and feed conversion functions for `tf.Session.run()`.
@@ -1078,14 +1072,12 @@ class BaseSession(SessionInterface):
                             'strings, lists, numpy ndarrays, or TensorHandles.')
 
           subfeed_dtype = subfeed_t.dtype.as_numpy_dtype
-          if isinstance(subfeed_val, int) and _convert_to_numpy_obj(
-              subfeed_dtype, subfeed_val) != subfeed_val:
+          if isinstance(subfeed_val,
+                        int) and subfeed_dtype(subfeed_val) != subfeed_val:
             raise TypeError(
-                'Type of feed value ' + str(subfeed_val) + ' with type ' +
-                str(type(subfeed_val)) +
-                ' is not compatible with Tensor type ' +
-                str(subfeed_dtype) +
-                '. Try explicitly setting the type of the feed tensor'
+                'Type of feed value ' + str(subfeed_val) + ' is not'
+                ' compatible with Tensor type ' + str(subfeed_dtype) + '.'
+                ' Try explicitly setting the type of the feed tensor'
                 ' to a larger type (e.g. int64).')
 
           is_tensor_handle_feed = isinstance(subfeed_val,
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index a563f5ef4a..3da03a7b0f 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -1737,12 +1737,6 @@ class SessionTest(test_util.TensorFlowTestCase):
     server = server_lib.Server.create_local_server()
     self.runTestAddFunctionToSession(server.target)
 
-  def testAutoConvertAndCheckData(self):
-    with self.test_session() as sess:
-      a = array_ops.placeholder(dtype=dtypes.string)
-      with self.assertRaisesRegexp(
-          TypeError, 'Type of feed value 1 with type <(\w+) \'int\'> is not'):
-        sess.run(a, feed_dict={a: 1})
 
 class GraphMutationTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 789771508e..2315ad4653 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -535,7 +535,6 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "no_windows",
-        "nomac",
         "oss_serial",
     ],
     deps = [
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index 285671f99f..d72b95dbdd 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -626,7 +626,7 @@ class _TrainingExecutorTrainingTest(object):
 
     self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
                                               mock_eval_spec))
-    self.assertTrue(mock_est.train.called)
+    mock_est.train.assert_called()
     mock_server.assert_not_called()
 
   def test_fail_with_empty_task_type(self):
@@ -836,7 +836,7 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     executor.run_master()
 
     mock_server.assert_not_called()
-    self.assertTrue(mock_est.train.called)
+    mock_est.train.assert_called()
 
   def test_fail_with_empty_task_type(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py
index b31486dfa1..12f2592d84 100644
--- a/tensorflow/python/estimator/util.py
+++ b/tensorflow/python/estimator/util.py
@@ -52,7 +52,7 @@ def fn_args(fn):
   else:
     if _is_callable_object(fn):
       fn = fn.__call__
-    args = tf_inspect.getfullargspec(fn).args
+    args = tf_inspect.getargspec(fn).args
     if _is_bounded_method(fn):
       args.remove('self')
   return tuple(args)
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index e06899f81d..366025a0d8 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -82,8 +82,8 @@ class Defun(object):
     return x + y, x - y
 
   # Building the graph.
-  a = tf.constant([1.0])
-  b = tf.constant([2.0])
+  a = tf.Constant([1.0])
+  b = tf.Constant([2.0])
   c, d = MyFunc(a, b, name='mycall')
   ```
   """
diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
index 2e73cefb6c..c66b4b395e 100644
--- a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
+++ b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
@@ -211,7 +211,7 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          'imagenet' (pre-training on ImageNet),
+          "imagenet" (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
index 5f97c138fc..4d5ac72604 100644
--- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
+++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
@@ -350,7 +350,7 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          'imagenet' (pre-training on ImageNet),
+          "imagenet" (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of
           `layers.Input()`)
@@ -536,8 +536,6 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
-  elif weights is not None:
-    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/applications/resnet50.py b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
index 8ab46693aa..f7cdf2be99 100644
--- a/tensorflow/python/keras/_impl/keras/applications/resnet50.py
+++ b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
@@ -164,7 +164,7 @@ def ResNet50(include_top=True,
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          'imagenet' (pre-training on ImageNet),
+          "imagenet" (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg16.py b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
index 38dbbdc809..ab205aa689 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg16.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
@@ -70,8 +70,8 @@ def VGG16(include_top=True,
   Arguments:
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
-      weights: one of `None` (random initialization),
-          'imagenet' (pre-training on ImageNet),
+     weights: one of `None` (random initialization),
+          "imagenet" (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg19.py b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
index 126c64260b..5e5179f332 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg19.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
@@ -71,8 +71,8 @@ def VGG19(include_top=True,
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
       weights: one of `None` (random initialization),
-         'imagenet' (pre-training on ImageNet),
-         or the path to the weights file to be loaded.
+          "imagenet" (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
diff --git a/tensorflow/python/keras/_impl/keras/applications/xception.py b/tensorflow/python/keras/_impl/keras/applications/xception.py
index 8219831408..a9efd5d64c 100644
--- a/tensorflow/python/keras/_impl/keras/applications/xception.py
+++ b/tensorflow/python/keras/_impl/keras/applications/xception.py
@@ -83,7 +83,7 @@ def Xception(include_top=True,
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          'imagenet' (pre-training on ImageNet),
+          "imagenet" (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
@@ -303,8 +303,6 @@ def Xception(include_top=True,
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
-  elif weights is not None:
-    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/layers/core.py b/tensorflow/python/keras/_impl/keras/layers/core.py
index 6a745844b2..712db33c69 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core.py
@@ -104,13 +104,13 @@ class Dropout(tf_core_layers.Dropout, Layer):
   """
 
   def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
+    self.supports_masking = True
     # Inheritance call order:
     # 1) tf.layers.Dropout, 2) keras.layers.Layer, 3) tf.layers.Layer
     super(Dropout, self).__init__(rate=rate,
                                   noise_shape=noise_shape,
                                   seed=seed,
                                   **kwargs)
-    self.supports_masking = True
 
   def call(self, inputs, training=None):
     if training is None:
diff --git a/tensorflow/python/keras/_impl/keras/layers/core_test.py b/tensorflow/python/keras/_impl/keras/layers/core_test.py
index bdb99c91c2..1fe043561d 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core_test.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core_test.py
@@ -47,11 +47,6 @@ class CoreLayersTest(test.TestCase):
                   'noise_shape': [3, 1]},
           input_shape=(3, 2))
 
-    # https://github.com/tensorflow/tensorflow/issues/14819
-    with self.test_session():
-      dropout = keras.layers.Dropout(0.5)
-      self.assertEqual(True, dropout.supports_masking)
-
     with self.test_session():
       testing_utils.layer_test(
           keras.layers.SpatialDropout1D,
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 31d3bd1b74..1d8ca99c07 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2084,10 +2084,6 @@ cuda_py_test(
         "//tensorflow/python:framework_for_generated_wrappers",
     ],
     shard_count = 2,
-    tags = [
-        "no_gpu",
-        "no_oss",
-    ],
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/kernel_tests/summary_image_op_test.py b/tensorflow/python/kernel_tests/summary_image_op_test.py
index 4718827e88..d2152ab560 100644
--- a/tensorflow/python/kernel_tests/summary_image_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_image_op_test.py
@@ -50,6 +50,7 @@ class SummaryImageOpTest(test.TestCase):
     self.assertProtoEquals(expected, image_summ)
 
   def testImageSummary(self):
+    np.random.seed(7)
     for depth in (1, 3, 4):
       for positive in False, True:
         with self.test_session(graph=ops.Graph()) as sess:
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 21561f3689..b9c89d62d5 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1168,7 +1168,7 @@ def random_hue(image, max_delta, seed=None):
       set_random_seed for its interaction with the graph-level random seed.
 
   Returns:
-    Adjusted image(s), same shape and DType as `image`.
+    3-D float tensor of shape `[height, width, channels]`.
 
   Raises:
     ValueError: if `max_delta` is invalid.
@@ -1275,9 +1275,30 @@ def adjust_saturation(image, saturation_factor, name=None):
     orig_dtype = image.dtype
     flt_image = convert_image_dtype(image, dtypes.float32)
 
-    return convert_image_dtype(
-        gen_image_ops.adjust_saturation(flt_image, saturation_factor),
-        orig_dtype)
+    # TODO(zhengxq): we will switch to the fused version after we add a GPU
+    # kernel for that.
+    fused = os.environ.get('TF_ADJUST_SATURATION_FUSED', '')
+    fused = fused.lower() in ('true', 't', '1')
+
+    if fused:
+      return convert_image_dtype(
+          gen_image_ops.adjust_saturation(flt_image, saturation_factor),
+          orig_dtype)
+
+    hsv = gen_image_ops.rgb_to_hsv(flt_image)
+
+    hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
+    saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
+    value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])
+
+    saturation *= saturation_factor
+    saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0)
+
+    hsv_altered = array_ops.concat([hue, saturation, value], 2)
+    rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)
+
+    return convert_image_dtype(rgb_altered, orig_dtype)
+
 
 def decode_image(contents, channels=None, name=None):
   """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`,
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 4af9bd2a00..d1554b399f 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -281,21 +281,6 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
-  def testBatchAdjustHue(self):
-    x_shape = [2, 1, 2, 3]
-    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
-    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
-
-    delta = 0.25
-    y_data = [13, 0, 11, 226, 54, 221, 234, 8, 92, 1, 217, 255]
-    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
-
-    with self.test_session(use_gpu=True):
-      x = constant_op.constant(x_np, shape=x_shape)
-      y = image_ops.adjust_hue(x, delta)
-      y_tf = y.eval()
-      self.assertAllEqual(y_tf, y_np)
-
   def _adjustHueNp(self, x_np, delta_h):
     self.assertEqual(x_np.shape[-1], 3)
     x_v = x_np.reshape([-1, 3])
@@ -374,89 +359,6 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
       self._adjustHueTf(x_np, delta_h)
 
 
-class FlipImageBenchmark(test.Benchmark):
-
-  def _benchmarkFlipLeftRight(self, device, cpu_count):
-    image_shape = [299, 299, 3]
-    warmup_rounds = 100
-    benchmark_rounds = 1000
-    config = config_pb2.ConfigProto()
-    if cpu_count is not None:
-      config.inter_op_parallelism_threads = 1
-      config.intra_op_parallelism_threads = cpu_count
-    with session.Session("", graph=ops.Graph(), config=config) as sess:
-      with ops.device(device):
-        inputs = variables.Variable(
-            random_ops.random_uniform(
-                image_shape, dtype=dtypes.float32) * 255,
-            trainable=False,
-            dtype=dtypes.float32)
-        run_op = image_ops.flip_left_right(inputs)
-        sess.run(variables.global_variables_initializer())
-        for i in xrange(warmup_rounds + benchmark_rounds):
-          if i == warmup_rounds:
-            start = time.time()
-          sess.run(run_op)
-    end = time.time()
-    step_time = (end - start) / benchmark_rounds
-    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
-    print("benchmarkFlipLeftRight_299_299_3_%s step_time: %.2f us" %
-          (tag, step_time * 1e6))
-    self.report_benchmark(
-        name="benchmarkFlipLeftRight_299_299_3_%s" % (tag),
-        iters=benchmark_rounds,
-        wall_time=step_time)
-
-  def _benchmarkRandomFlipLeftRight(self, device, cpu_count):
-    image_shape = [299, 299, 3]
-    warmup_rounds = 100
-    benchmark_rounds = 1000
-    config = config_pb2.ConfigProto()
-    if cpu_count is not None:
-      config.inter_op_parallelism_threads = 1
-      config.intra_op_parallelism_threads = cpu_count
-    with session.Session("", graph=ops.Graph(), config=config) as sess:
-      with ops.device(device):
-        inputs = variables.Variable(
-            random_ops.random_uniform(
-                image_shape, dtype=dtypes.float32) * 255,
-            trainable=False,
-            dtype=dtypes.float32)
-        run_op = image_ops.random_flip_left_right(inputs)
-        sess.run(variables.global_variables_initializer())
-        for i in xrange(warmup_rounds + benchmark_rounds):
-          if i == warmup_rounds:
-            start = time.time()
-          sess.run(run_op)
-    end = time.time()
-    step_time = (end - start) / benchmark_rounds
-    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
-    print("benchmarkRandomFlipLeftRight_299_299_3_%s step_time: %.2f us" %
-          (tag, step_time * 1e6))
-    self.report_benchmark(
-        name="benchmarkRandomFlipLeftRight_299_299_3_%s" % (tag),
-        iters=benchmark_rounds,
-        wall_time=step_time)
-
-  def benchmarkFlipLeftRightCpu1(self):
-    self._benchmarkFlipLeftRight("/cpu:0", 1)
-
-  def benchmarkFlipLeftRightCpuAll(self):
-    self._benchmarkFlipLeftRight("/cpu:0", None)
-
-  def benchmarkFlipLeftRightGpu(self):
-    self._benchmarkFlipLeftRight(test.gpu_device_name(), None)
-
-  def benchmarkRandomFlipLeftRightCpu1(self):
-    self._benchmarkRandomFlipLeftRight("/cpu:0", 1)
-
-  def benchmarkRandomFlipLeftRightCpuAll(self):
-    self._benchmarkRandomFlipLeftRight("/cpu:0", None)
-
-  def benchmarkRandomFlipLeftRightGpu(self):
-    self._benchmarkRandomFlipLeftRight(test.gpu_device_name(), None)
-
-
 class AdjustHueBenchmark(test.Benchmark):
 
   def _benchmarkAdjustHue(self, device, cpu_count):
@@ -730,21 +632,6 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
-  def testBatchSaturation(self):
-    x_shape = [2, 1, 2, 3]
-    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
-    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
-
-    saturation_factor = 0.5
-    y_data = [6, 9, 13, 140, 180, 226, 135, 121, 234, 172, 255, 128]
-    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
-
-    with self.test_session(use_gpu=True):
-      x = constant_op.constant(x_np, shape=x_shape)
-      y = image_ops.adjust_saturation(x, saturation_factor)
-      y_tf = y.eval()
-      self.assertAllEqual(y_tf, y_np)
-
   def _adjust_saturation(self, image, saturation_factor):
     image = ops.convert_to_tensor(image, name="image")
     orig_dtype = image.dtype
diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py
index 51ab2aec22..08e3f83a0b 100644
--- a/tensorflow/python/ops/logging_ops.py
+++ b/tensorflow/python/ops/logging_ops.py
@@ -39,8 +39,8 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
           name=None):
   """Prints a list of tensors.
 
-  This is an identity op (behaves like `tf.identity`) with the side effect
-  of printing `data` when evaluating.
+  This is an identity op with the side effect of printing `data` when
+  evaluating.
 
   Note: This op prints to the standard error. It is not currently compatible
     with jupyter notebook (printing to the notebook *server's* output, not into
@@ -57,7 +57,7 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
     name: A name for the operation (optional).
 
   Returns:
-    A `Tensor`. Has the same type and contents as `input_`.
+    Same tensor as `input_`.
   """
   return gen_logging_ops._print(input_, data, message, first_n, summarize, name)
 
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index fd96f7b8fc..19a86df6a9 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -27,7 +27,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import candidate_sampling_ops
 from tensorflow.python.ops import embedding_ops
-from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
@@ -982,11 +981,10 @@ def _compute_sampled_logits(weights,
         Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
     name: A name for the operation (optional).
   Returns:
-    out_logits: `Tensor` object with shape
+    out_logits, out_labels: `Tensor` objects each with shape
         `[batch_size, num_true + num_sampled]`, for passing to either
         `nn.sigmoid_cross_entropy_with_logits` (NCE) or
         `nn.softmax_cross_entropy_with_logits` (sampled softmax).
-    out_labels: A Tensor object with the same shape as `out_logits`.
   """
 
   if isinstance(weights, variables.PartitionedVariable):
@@ -1097,16 +1095,15 @@ def _compute_sampled_logits(weights,
 
     # Construct output logits and labels. The true labels/logits start at col 0.
     out_logits = array_ops.concat([true_logits, sampled_logits], 1)
-
-    # true_logits is a float tensor, ones_like(true_logits) is a float
-    # tensor of ones. We then divide by num_true to ensure the per-example
-    # labels sum to 1.0, i.e. form a proper probability distribution.
+    # true_logits is a float tensor, ones_like(true_logits) is a float tensor
+    # of ones. We then divide by num_true to ensure the per-example labels sum
+    # to 1.0, i.e. form a proper probability distribution.
     out_labels = array_ops.concat([
         array_ops.ones_like(true_logits) / num_true,
         array_ops.zeros_like(sampled_logits)
     ], 1)
 
-    return out_logits, out_labels
+  return out_logits, out_labels
 
 
 def nce_loss(weights,
diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/python/ops/quantized_conv_ops_test.py
index 5e9e710027..5ea47ea40e 100644
--- a/tensorflow/python/ops/quantized_conv_ops_test.py
+++ b/tensorflow/python/ops/quantized_conv_ops_test.py
@@ -93,7 +93,7 @@ class Conv2DTest(test.TestCase):
     quantized_range = ((quantized_max - quantized_min) * range_adjust)
     range_scale = (quantized_range / number_of_steps)
     lowest_quantized = -(1 << (number_of_bits - 1))
-    result = np.array([(quantized_min + ((float(x) - lowest_quantized) * range_scale))
+    result = np.array([(quantized_min + ((x - lowest_quantized) * range_scale))
                        for x in quantized.flatten()])
     return result
 
diff --git a/tensorflow/python/ops/quantized_ops_test.py b/tensorflow/python/ops/quantized_ops_test.py
deleted file mode 100644
index 4bf3b35e13..0000000000
--- a/tensorflow/python/ops/quantized_ops_test.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Functional tests for quantized operations."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import test
-
-
-class QuantizedOpsTest(test.TestCase):
-
-  def __init__(self, method_name="runTest"):
-    super(QuantizedOpsTest, self).__init__(method_name)
-
-  def testQuantizeOp(self):
-    expected_output = [1, 1, 2, 127, 255, 255]
-    with self.test_session(use_gpu=False) as sess:
-      x = constant_op.constant([1.0, 1.25, 1.75, 127.0, 255.0, 500.0], shape=[6], dtype=dtypes.float32)
-      x_min = 0.0
-      x_max = 255.0
-      op = array_ops.quantize(x, x_min, x_max, dtypes.quint8, mode="MIN_FIRST")
-      value = sess.run(op)
-      self.assertArrayNear(expected_output, value.output, 0.1)
-
-  def testDequantizeOp(self):
-    expected_output = [1.0, 2.0, 4.0, 8.0, 16.0, 255.0]
-    inp = np.array([1, 2, 4, 8, 16, 255]).astype(np.uint8)
-    with self.test_session(use_gpu=False) as sess:
-      x = constant_op.constant(inp, shape=[6], dtype=dtypes.quint8)
-      x_min = 0.0
-      x_max = 255.0
-      op = array_ops.dequantize(x, x_min, x_max, mode="MIN_FIRST")
-      value = sess.run(op)
-      self.assertArrayNear(expected_output, value, 0.1)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/training/learning_rate_decay.py b/tensorflow/python/training/learning_rate_decay.py
index f0c28e7b89..802b930b0e 100644
--- a/tensorflow/python/training/learning_rate_decay.py
+++ b/tensorflow/python/training/learning_rate_decay.py
@@ -362,13 +362,7 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
   The function returns the decayed learning rate.  It is computed as:
 
   ```python
-  decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
-  ```
-
-  or, if `staircase` is `True`, as:
-
-  ```python
-  decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
+  decayed_learning_rate = learning_rate / (1 + decay_rate * t)
   ```
 
   Example: decay 1/t with a rate of 0.5:
@@ -377,9 +371,8 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
   ...
   global_step = tf.Variable(0, trainable=False)
   learning_rate = 0.1
-  decay_steps = 1.0
-  decay_rate = 0.5
-  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate)
+  k = 0.5
+  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k)
 
   # Passing global_step to minimize() will increment it at each step.
   learning_step = (
diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py
index d14e710388..9ed125704b 100644
--- a/tensorflow/python/util/tf_inspect.py
+++ b/tensorflow/python/util/tf_inspect.py
@@ -45,26 +45,6 @@ def getargspec(object):  # pylint: disable=redefined-builtin
                if d.decorator_argspec is not None), _inspect.getargspec(target))
 
 
-def getfullargspec(obj):  # pylint: disable=redefined-builtin
-  """TFDecorator-aware replacement for inspect.getfullargspec and fallback to
-  inspect.getargspec in Python 2.
-
-  Args:
-    obj: A callable, possibly decorated.
-
-  Returns:
-    The `FullArgSpec` (`ArgSpec` in Python 2) that describes the signature of
-    the outermost decorator that changes the callable's signature. If the
-    callable is not decorated, `inspect.getfullargspec()`
-    (`inspect.getargspec()` in Python 2) will be called directly on the
-    callable.
-  """
-  spec_fn = getattr(_inspect, 'getfullargspec', getattr(_inspect, 'getargspec'))
-  decorators, target = tf_decorator.unwrap(obj)
-  return next((d.decorator_argspec for d in decorators
-               if d.decorator_argspec is not None), spec_fn(target))
-
-
 def getcallargs(func, *positional, **named):
   """TFDecorator-aware replacement for inspect.getcallargs.
 
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 73b96de438..0d2cd4a9f2 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -1132,7 +1132,7 @@ class DnnSupport {
   //    space in order to speed up the convolution operation.
   //  algorithm: an integer to specify which algorithm should be used for the
   //    operation. kDefaultAlgorithm means the system will pick an algorithm
-  //    by default. The coding of the algorithm is be interpreted by the
+  //    by default. The coding of the algorithm is be interpretted by the
   //    underlying implementation.
   //  output_profile_result: the output profile result for this call. The
   //    profiling is only enabled when this is not nullptr.
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 9b13a86ed3..611d50bc52 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -130,13 +130,6 @@ def if_not_windows(a):
       "//conditions:default": a,
   })
 
-def if_windows(a):
-  return select({
-      clean_dep("//tensorflow:windows"): a,
-      clean_dep("//tensorflow:windows_msvc"): a,
-      "//conditions:default": [],
-  })
-
 def if_linux_x86_64(a):
   return select({
       clean_dep("//tensorflow:linux_x86_64"): a,
@@ -1332,32 +1325,11 @@ def tf_py_wrap_cc(name,
           "//conditions:default": [":" + cc_library_name],
       }))
 
-# This macro is for running python tests against system installed pip package
-# on Windows.
-#
-# py_test is built as an exectuable python zip file on Windows, which contains all
-# dependencies of the target. Because of the C++ extensions, it would be very
-# inefficient if the py_test zips all runfiles, plus we don't need them when running
-# tests against system installed pip package. So we'd like to get rid of the deps
-# of py_test in this case.
-#
-# In order to trigger the tests without bazel clean after getting rid of deps,
-# we introduce the following :
-# 1. When --define=no_tensorflow_py_deps=true, the py_test depends on a marker
-#    file of the pip package, the test gets to rerun when the pip package change.
-#    Note that this only works on Windows. See the definition of
-#    //tensorflow/tools/pip_package:win_pip_package_marker for specific reasons.
-# 2. When --define=no_tensorflow_py_deps=false (by default), it's a normal py_test.
-def py_test(deps=[], data=[], **kwargs):
+def py_test(deps=[], **kwargs):
   native.py_test(
       deps=select({
           "//conditions:default": deps,
-          clean_dep("//tensorflow:no_tensorflow_py_deps"): [],
-      }),
-      data = data + select({
-          "//conditions:default": [],
-          clean_dep("//tensorflow:no_tensorflow_py_deps"):
-          ["//tensorflow/tools/pip_package:win_pip_package_marker"],
+          clean_dep("//tensorflow:no_tensorflow_py_deps"): []
       }),
       **kwargs)
 
@@ -1382,7 +1354,7 @@ def tf_py_test(name,
     additional_deps = additional_deps + tf_additional_xla_deps_py()
   if grpc_enabled:
     additional_deps = additional_deps + tf_additional_grpc_deps_py()
-  py_test(
+  native.py_test(
       name=name,
       size=size,
       srcs=srcs,
@@ -1392,10 +1364,13 @@ def tf_py_test(name,
       visibility=[clean_dep("//tensorflow:internal")],
       shard_count=shard_count,
       data=data,
-      deps=[
-            clean_dep("//tensorflow/python:extra_py_tests_deps"),
-            clean_dep("//tensorflow/python:gradient_checker"),
+      deps=select({
+          "//conditions:default": [
+              clean_dep("//tensorflow/python:extra_py_tests_deps"),
+              clean_dep("//tensorflow/python:gradient_checker"),
           ] + additional_deps,
+          clean_dep("//tensorflow:no_tensorflow_py_deps"): []
+      }),
       flaky=flaky,
       srcs_version="PY2AND3")
 
diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
index ecab6f8769..9809ad52de 100644
--- a/tensorflow/tools/benchmark/benchmark_model.cc
+++ b/tensorflow/tools/benchmark/benchmark_model.cc
@@ -530,7 +530,7 @@ int Main(int argc, char** argv) {
   }
 
   // Capture overall inference time without stat logging overhead. This is the
-  // timing data that can be compared to other libraries.
+  // timing data that can be compared to other libaries.
   SleepSeconds(inter_benchmark_sleep_seconds);
   int64 no_stat_time_us = 0;
   int64 no_stat_num_runs = 0;
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu b/tensorflow/tools/ci_build/Dockerfile.gpu
index 7591ecc04e..2d46ccb6b1 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu
@@ -1,8 +1,8 @@
-FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
+FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
 
 LABEL maintainer="Jan Prach <jendap@google.com>"
 
-# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to
+# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
 # /usr/local/cuda
 RUN cp -P /usr/include/cudnn.h /usr/local/cuda/include
 RUN cp -P /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu_clang b/tensorflow/tools/ci_build/Dockerfile.gpu_clang
index 438a7ec532..0ecd8c75e0 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu_clang
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu_clang
@@ -1,8 +1,8 @@
-FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
+FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
 
 LABEL maintainer="Ilya Biryukov <ibiryukov@google.com>"
 
-# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to
+# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
 # /usr/local/cuda
 RUN cp /usr/include/cudnn.h /usr/local/cuda/include
 RUN cp /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index 82042b93c0..a37cf226f9 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -296,12 +296,19 @@ create_activate_virtualenv_and_install_tensorflow() {
     die "FAILED to create virtualenv directory: ${VIRTUALENV_DIR}"
   fi
 
-  # Use the virtualenv from the default python version (i.e., python-virtualenv)
-  # to create the virtualenv directory for testing. Use the -p flag to specify
-  # the python version inside the to-be-created virtualenv directory.
-  ${PYTHON_BIN_PATH} -m virtualenv -p "${PYTHON_BIN_PATH}" ${VIRTUALENV_FLAGS} \
-    "${VIRTUALENV_DIR}" || \
-    die "FAILED: Unable to create virtualenv"
+  if [[ ${PYTHON_BIN_PATH} == *"python3.6"* ]]; then
+    "${PYTHON_BIN_PATH}" -m venv "${VIRTUALENV_FLAGS}" \
+      "${VIRTUALENV_DIR}" || \
+      die "FAILED: Unable to create virtualenv"
+  else
+    # Verify that virtualenv exists
+    if [[ -z $(which virtualenv) ]]; then
+      die "FAILED: virtualenv not available on path"
+    fi
+    virtualenv ${VIRTUALENV_FLAGS} \
+      -p "${PYTHON_BIN_PATH}" "${VIRTUALENV_DIR}" || \
+      die "FAILED: Unable to create virtualenv"
+  fi
 
   source "${VIRTUALENV_DIR}/bin/activate" || \
     die "FAILED: Unable to activate virtualenv in ${VIRTUALENV_DIR}"
@@ -343,7 +350,7 @@ do_clean_virtualenv_smoke_test() {
   then
     echo "Smoke test of tensorflow install in clean virtualenv PASSED."
   else
-    echo "Smoke test of tensorflow install in clean virtualenv FAILED."
+    echo "Smoke test of tensroflow install in clean virtualenv FAILED."
     return 1
   fi
 
diff --git a/tensorflow/tools/ci_build/builds/print_build_info.sh b/tensorflow/tools/ci_build/builds/print_build_info.sh
index e366abf8bb..7c43419a76 100755
--- a/tensorflow/tools/ci_build/builds/print_build_info.sh
+++ b/tensorflow/tools/ci_build/builds/print_build_info.sh
@@ -88,7 +88,7 @@ fi
 # Print info
 echo "TF_BUILD_INFO = {"\
 "container_type: \"${CONTAINER_TYPE}\", "\
-"command: \"${COMMAND[*]}\", "\
+"command: \"${COMMAND[@]}\", "\
 "source_HEAD: \"${TF_HEAD}\", "\
 "source_remote_origin: \"${TF_FETCH_URL}\", "\
 "OS: \"${OS}\", "\
diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh
index caa3a40817..358f82ac5d 100755
--- a/tensorflow/tools/ci_build/builds/test_user_ops.sh
+++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh
@@ -82,11 +82,11 @@ TF_CFLAGS=( $("${PYTHON_BIN_PATH}" \
 TF_LFLAGS=( $("${PYTHON_BIN_PATH}" \
 	      -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
 
-if [[ -z "${TF_CFLAGS[*]}" || -z "${TF_LFLAGS[*]}" ]]; then
+if [[ -z "${TF_CFLAGS}" || -z "${TF_LFLAGS}" ]]; then
   die "FAILED to determine TensorFlow compilation or linking flags"
 else
-  echo "TensorFlow compile flags: ${TF_CFLAGS[*]}"
-  echo "TensorFlow link flags: ${TF_LFLAGS[*]}"
+  echo "TensorFlow compile flags: ${TF_CFLAGS[@]}"
+  echo "TensorFlow link flags: ${TF_LFLAGS[@]}"
 fi
 
 # Check g++ availability
diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index cfeaebdbf5..6e7b752c06 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -45,7 +45,7 @@ for i in `seq 0 $((TF_GPU_COUNT-1))`; do
       # This export only works within the brackets, so it is isolated to one
       # single command.
       export CUDA_VISIBLE_DEVICES=$i
-      echo "Running test $* on GPU $CUDA_VISIBLE_DEVICES"
+      echo "Running test $@ on GPU $CUDA_VISIBLE_DEVICES"
       $@
     )
     return_code=$?
diff --git a/tensorflow/tools/ci_build/install/install_deb_packages.sh b/tensorflow/tools/ci_build/install/install_deb_packages.sh
index 9640810533..4ab307c925 100755
--- a/tensorflow/tools/ci_build/install/install_deb_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_deb_packages.sh
@@ -48,7 +48,6 @@ apt-get install -y --no-install-recommends \
     git \
     libcurl4-openssl-dev \
     libtool \
-    libssl-dev \
     mlocate \
     openjdk-8-jdk \
     openjdk-8-jre-headless \
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index da58ac2407..b8ed1ab767 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -27,9 +27,6 @@ easy_install3 -U pip
 pip2 install wheel
 pip3 install wheel
 
-pip2 install virtualenv
-pip3 install virtualenv
-
 # Install six.
 pip2 install --upgrade six==1.10.0
 pip3 install --upgrade six==1.10.0
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 9881bd99c3..479242aa43 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -39,8 +39,6 @@ if [[ -z $pip35_version ]]; then
 fi
 
 set -e
-pip3.5 install --upgrade virtualenv
-
 # Install six.
 pip3.5 install --upgrade absl-py
 pip3.5 install --upgrade six==1.10.0
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index 1ca12c6c60..ec7d9bf195 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -36,8 +36,6 @@ pip3.6 -V
 which pip3.6
 ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3
 
-pip3 install --upgrade virtualenv
-
 set -e
 # Install six.
 pip3 install --upgrade absl-py
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
index ac83e90f76..df196f829c 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
@@ -28,8 +28,6 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=8.0
-export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
index 6b80f44729..abd256a895 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
@@ -28,8 +28,6 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=8.0
-export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 1bd1852ffc..88116d9f24 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -82,7 +82,6 @@ if [[ $1 == "PI_ONE" ]]; then
 else
   PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4
   --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
-  --copt=-O3
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8'
diff --git a/tensorflow/tools/ci_build/remote/remote_docker_build.sh b/tensorflow/tools/ci_build/remote/remote_docker_build.sh
index e00a66aaba..3ac6840f4e 100755
--- a/tensorflow/tools/ci_build/remote/remote_docker_build.sh
+++ b/tensorflow/tools/ci_build/remote/remote_docker_build.sh
@@ -124,7 +124,7 @@ function build_tf_image {
 
 
 function publish_tf_image {
-  gcr_tf_image="gcr.io/tensorflow/${tf_image}"
+  $gcr_tf_image="gcr.io/tensorflow/${tf_image}"
   docker tag $tf_image $gcr_tf_image
   gcloud docker -- push $gcr_tf_image
 }
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 8d50250c3a..44b6d52952 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -96,6 +96,10 @@ exclude_cpu_cc_tests="${failing_cpu_cc_tests} + ${broken_cpu_cc_tests}"
 
 exclude_gpu_cc_tests="${extra_failing_gpu_cc_tests} + ${exclude_cpu_cc_tests}"
 
+function clean_output_base() {
+  bazel clean --expunge
+}
+
 function run_configure_for_cpu_build {
   # Due to a bug in Bazel: https://github.com/bazelbuild/bazel/issues/2182
   # yes "" | ./configure doesn't work on Windows, so we set all the
@@ -111,7 +115,7 @@ function run_configure_for_cpu_build {
     export TF_NEED_MKL=0
   fi
   export TF_NEED_VERBS=0
-  export TF_NEED_GCP=1
+  export TF_NEED_GCP=0
   export TF_NEED_HDFS=0
   export TF_NEED_OPENCL_SYCL=0
   echo "" | ./configure
diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
index f88e7176f0..4a653698a2 100644
--- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
@@ -36,6 +36,12 @@ export BAZEL_SH=${BAZEL_SH:-"C:/tools/msys64/usr/bin/bash"}
 export PYTHON_BIN_PATH="C:/Program Files/Anaconda3/python.exe"
 export PYTHON_LIB_PATH="C:/Program Files/Anaconda3/lib/site-packages"
 
+# Set Python path for cc_configure.bzl
+export BAZEL_PYTHON="C:/Program Files/Anaconda3/python.exe"
+
+# Set Visual Studio path
+export BAZEL_VS="C:/Program Files (x86)/Microsoft Visual Studio 14.0"
+
 # Add python into PATH, it's needed because gen_git_source.py uses
 # '/usr/bin/env python' as a shebang
 export PATH="/c/Program Files/Anaconda3:$PATH"
@@ -47,3 +53,13 @@ export PATH="/c/Program Files/Anaconda3/Scripts:$PATH"
 export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin:$PATH"
 export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/extras/CUPTI/libx64:$PATH"
 export PATH="/c/tools/cuda/bin:$PATH"
+
+# Set the common build options on Windows
+export BUILD_OPTS='--config=monolithic --copt=-w --host_copt=-w --verbose_failures --experimental_ui'
+
+# Build TF with wrapper-less CROSSTOOL
+# TODO(pcloudy): Remove this after wrapper-less CROSSTOOL becomes default
+export NO_MSVC_WRAPPER=1
+
+export USE_DYNAMIC_CRT=1
+
diff --git a/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh b/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
index 748a961e44..8c419347d6 100644
--- a/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
@@ -42,6 +42,8 @@ source "tensorflow/tools/ci_build/windows/bazel/common_env.sh" \
 source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
   || { echo "Failed to source bazel_test_lib.sh" >&2; exit 1; }
 
+clean_output_base
+
 run_configure_for_cpu_build
 
 # Compliling the following test is extremely slow with -c opt
@@ -52,5 +54,5 @@ passing_tests=$(bazel query "kind(cc_test, //tensorflow/cc/... + //tensorflow/co
   # We need to strip \r so that the result could be store into a variable under MSYS
   tr '\r' ' ')
 
-bazel test -k $slow_compiling_test --test_output=errors
-bazel test -c opt -k $passing_tests --test_output=errors
+bazel test $BUILD_OPTS -k $slow_compiling_test --test_output=errors
+bazel test -c opt $BUILD_OPTS -k $passing_tests --test_output=errors
diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 31b4226a30..8520ca898f 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -44,7 +44,9 @@ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
 
 run_configure_for_cpu_build
 
-bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $?
+clean_output_base
+
+bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $?
 
 # Create a python test directory to avoid package name conflict
 PY_TEST_DIR="py_test_dir"
@@ -58,8 +60,11 @@ reinstall_tensorflow_pip ${PIP_NAME}
 
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
-bazel test -c opt -k --test_output=errors \
+# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after
+# https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
+bazel test -c opt $BUILD_OPTS -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_oss \
   --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \
+  --test_env=TF_SAVER_LENIENT_NAMES=True \
   //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh b/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
index f26f8727e5..3fd960deab 100644
--- a/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
@@ -56,5 +56,5 @@ passing_tests=$(bazel query "kind(cc_test, //tensorflow/cc/... + //tensorflow/co
 
 # TODO(pcloudy): There is a bug in Bazel preventing build with GPU support without -c opt
 # Re-enable this test after it is fixed.
-# bazel test --config=win-cuda -k $slow_compiling_test --test_output=errors
-bazel test -c opt --config=win-cuda -k $passing_tests --test_output=errors
+# bazel test --config=win-cuda $BUILD_OPTS -k $slow_compiling_test --test_output=errors
+bazel test -c opt --config=win-cuda $BUILD_OPTS -k $passing_tests --test_output=errors
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 922bb67bbf..47ca42d642 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -44,7 +44,9 @@ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
 
 run_configure_for_gpu_build
 
-bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $?
+clean_output_base
+
+bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $?
 
 # Create a python test directory to avoid package name conflict
 PY_TEST_DIR="py_test_dir"
@@ -59,8 +61,11 @@ reinstall_tensorflow_pip ${PIP_NAME}
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
 # GPU tests are very flaky when running concurrently, so set local_test_jobs=1
-bazel test -c opt -k --test_output=errors \
+# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after
+# https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
+bazel test -c opt $BUILD_OPTS -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
   --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
+  --test_env=TF_SAVER_LENIENT_NAMES=True \
   --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
index 80f2b590c9..9ac3613f27 100755
--- a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
@@ -44,12 +44,13 @@ export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:clic
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/java:libtensorflow_jni.so"
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:jnilicenses_generate"
 
+clean_output_base
 run_configure_for_cpu_build
 
 # build_libtensorflow_tarball in ../builds/libtensorflow.sh
 # cannot be used on Windows since it relies on pkg_tar rules.
 # So we do something special here
-bazel build -c opt \
+bazel build -c opt ${BUILD_OPTS} \
   tensorflow:libtensorflow.so \
   tensorflow/tools/lib_package:clicenses_generate \
   tensorflow/java:libtensorflow_jni.so \
diff --git a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
index 88333de856..a94a627dfb 100755
--- a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
+++ b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
@@ -28,8 +28,6 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=8.0
-export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 0a6860e791..3525c7524f 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -69,8 +69,11 @@ RUN mkdir /bazel && \
     rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Download and build TensorFlow.
+
+RUN git clone https://github.com/tensorflow/tensorflow.git && \
+    cd tensorflow && \
+    git checkout r1.4
 WORKDIR /tensorflow
-RUN git clone --branch=r1.4 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # TODO(craigcitro): Don't install the pip package, since it makes it
 # more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 4164cc3f88..041f45971b 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -1,20 +1,11 @@
-FROM nvidia/cuda:9.0-base-ubuntu16.04
+FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-9-0 \
-        cuda-cublas-dev-9-0 \
-        cuda-cudart-dev-9-0 \
-        cuda-cufft-dev-9-0 \
-        cuda-curand-dev-9-0 \
-        cuda-cusolver-dev-9-0 \
-        cuda-cusparse-dev-9-0 \
         curl \
         git \
-        libcudnn7=7.0.5.15-1+cuda9.0 \
-        libcudnn7-dev=7.0.5.15-1+cuda9.0 \
         libcurl3-dev \
         libfreetype6-dev \
         libpng12-dev \
@@ -26,11 +17,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         unzip \
         zip \
         zlib1g-dev \
+        openjdk-8-jdk \
+        openjdk-8-jre-headless \
         wget \
         && \
-    rm -rf /var/lib/apt/lists/* && \
-    find /usr/local/cuda-9.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
-    rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
 RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
     python get-pip.py && \
@@ -78,16 +70,18 @@ RUN mkdir /bazel && \
     rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Download and build TensorFlow.
+
+RUN git clone https://github.com/tensorflow/tensorflow.git && \
+    cd tensorflow && \
+    git checkout r1.4
 WORKDIR /tensorflow
-RUN git clone --branch=r1.4 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1
-ENV TF_CUDA_VERSION=9.0
-ENV TF_CUDNN_VERSION=7
+
 
 RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
     LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index b6682cd681..e212d10290 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
+FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh
index e7de7df856..80a07b9b3b 100755
--- a/tensorflow/tools/docker/parameterized_docker_build.sh
+++ b/tensorflow/tools/docker/parameterized_docker_build.sh
@@ -265,7 +265,7 @@ else
   DOCKERFILE="${TMP_DIR}/Dockerfile"
 
   # Modify the devel Dockerfile to specify the git branch
-  sed "s/^RUN git clone --branch=.* --depth=1/RUN git clone --branch=${TF_DOCKER_BUILD_DEVEL_BRANCH} --depth=1/" \
+  sed -r "s/([\s]*git checkout )(.*)/\1${TF_DOCKER_BUILD_DEVEL_BRANCH}/g" \
       "${ORIG_DOCKERFILE}" > "${DOCKERFILE}"
 
   # Modify python/pip version if necessary.
diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py
index 003f972070..f950f19a7c 100644
--- a/tensorflow/tools/docs/generate_lib.py
+++ b/tensorflow/tools/docs/generate_lib.py
@@ -199,12 +199,12 @@ def add_dict_to_dict(add_from, add_to):
       add_to[key] = add_from[key]
 
 
-# Exclude some libraries in contrib from the documentation altogether.
+# Exclude some libaries in contrib from the documentation altogether.
 def _get_default_private_map():
   return {'tf.test': ['mock']}
 
 
-# Exclude members of some libraries.
+# Exclude members of some libaries.
 def _get_default_do_not_descend_map():
   # TODO(wicke): Shrink this list once the modules get sealed.
   return {
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index d80d5ecc6a..33af4532c8 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -6,7 +6,6 @@ package(default_visibility = ["//visibility:private"])
 load(
     "//tensorflow:tensorflow.bzl",
     "if_not_windows",
-    "if_windows",
     "transitive_hdrs",
 )
 load("//third_party/mkl:build_defs.bzl", "if_mkl")
@@ -195,23 +194,3 @@ sh_binary(
         ],
     }) + if_mkl(["//third_party/mkl:intel_binary_blob"]),
 )
-
-# A genrule for generating a marker file for the pip package on Windows
-#
-# This only works on Windows, because :simple_console_for_windows is a
-# python zip file containing everything we need for building the pip package.
-# However, on other platforms, due to https://github.com/bazelbuild/bazel/issues/4223,
-# when C++ extensions change, this generule doesn't rebuild.
-genrule(
-    name = "win_pip_package_marker",
-    srcs = if_windows([
-        ":build_pip_package",
-        ":simple_console_for_windows",
-    ]),
-    outs = ["win_pip_package_marker_file"],
-    cmd = select({
-        "//conditions:default": "touch $@",
-        "//tensorflow:windows": "md5sum $(locations :build_pip_package) $(locations :simple_console_for_windows) > $@",
-    }),
-    visibility = ["//visibility:public"],
-)
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index f5203bc544..8249703ba7 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -24,7 +24,7 @@ function real_path() {
 function cp_external() {
   local src_dir=$1
   local dest_dir=$2
-  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*org_tensorflow*'`; do
+  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*'`; do
     cp -R "$f" "$dest_dir"
   done
 }
@@ -92,6 +92,7 @@ function main() {
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles/org_tensorflow/tensorflow \
       "${TMPDIR}"
     mkdir "${TMPDIR}/external"
+    # Note: this makes an extra copy of org_tensorflow.
     cp_external \
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles \
       "${TMPDIR}/external"
@@ -122,6 +123,7 @@ function main() {
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/tensorflow \
         "${TMPDIR}"
       mkdir "${TMPDIR}/external"
+      # Note: this makes an extra copy of org_tensorflow.
       cp_external \
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles \
         "${TMPDIR}/external"
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index 22e1584b78..cc46dd5162 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -42,7 +42,6 @@ BLACKLIST = [
     "//tensorflow/python:extra_py_tests_deps",
     "//tensorflow/cc/saved_model:saved_model_half_plus_two",
     "//tensorflow:no_tensorflow_py_deps",
-    "//tensorflow/tools/pip_package:win_pip_package_marker",
     "//tensorflow/python:test_ops_2",
     "//tensorflow/python:tf_optimizer",
     "//tensorflow/python:compare_test_proto_py",
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions.cc b/tensorflow/tools/proto_text/gen_proto_text_functions.cc
index f0bb59acf8..ecb29a65a0 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions.cc
@@ -132,7 +132,6 @@ int MainImpl(int argc, char** argv) {
       FILE* f = fopen(path.c_str(), "w");
       if (f == nullptr) return -1;
       if (fwrite(data.c_str(), 1, data.size(), f) != data.size()) {
-        fclose(f);
         return -1;
       }
       if (fclose(f) != 0) {
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 046c2b2391..b71f5dc4e5 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -74,11 +74,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "mkl_dnn",
       urls = [
-          "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/aab753280e83137ba955f8f19d72cb6aaba545ef.tar.gz",
-          "https://github.com/01org/mkl-dnn/archive/aab753280e83137ba955f8f19d72cb6aaba545ef.tar.gz",
+          "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
+          "https://github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
       ],
-      sha256 = "fb67f255a96bd4ad39b8dd104eca5aa92200c95c1ed36e59641e6c0478eefd11",
-      strip_prefix = "mkl-dnn-aab753280e83137ba955f8f19d72cb6aaba545ef",
+      sha256 = "0d529ad4c49dc799e6df07c2b88b115d0668735da15fb3b3862d28d33fa68165",
+      strip_prefix = "mkl-dnn-b01e3a55a07be62172e713bcd2644c5176360212",
       build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")),
   )
 
@@ -95,11 +95,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "eigen_archive",
       urls = [
-          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
-          "https://bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
+          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
+          "https://bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
       ],
-      sha256 = "0840c497f2749b5e90bda666aab96be6da90dc75b4e21ca9843cae69b7fed52a",
-      strip_prefix = "eigen-eigen-b6e6d0cf6a77",
+      sha256 = "61d8b6fc4279dd1dda986fb1677d15e3d641c07a3ea5abe255790b1f0c0c14e9",
+      strip_prefix = "eigen-eigen-429aa5254200",
       build_file = str(Label("//third_party:eigen.BUILD")),
   )
 
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index 4def6f9489..e311c7e758 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -10,7 +10,6 @@ CURL_WIN_COPTS = [
     "/DHAVE_CONFIG_H",
     "/DCURL_DISABLE_FTP",
     "/DCURL_DISABLE_NTLM",
-    "/DCURL_DISABLE_PROXY",
     "/DHAVE_LIBZ",
     "/DHAVE_ZLIB_H",
     # Defining _USING_V110_SDK71_ is hackery to defeat curl's incorrect
@@ -24,8 +23,6 @@ CURL_WIN_SRCS = [
     "lib/asyn-thread.c",
     "lib/inet_ntop.c",
     "lib/system_win32.c",
-    "lib/vtls/schannel.c",
-    "lib/idn_win32.c",
 ]
 
 cc_library(
@@ -279,7 +276,6 @@ cc_library(
             "-DCURL_MAX_WRITE_SIZE=65536",
         ],
     }),
-    defines = ["CURL_STATICLIB"],
     includes = ["include"],
     linkopts = select({
         "@org_tensorflow//tensorflow:android": [
@@ -293,16 +289,10 @@ cc_library(
         ],
         "@org_tensorflow//tensorflow:ios": [],
         "@org_tensorflow//tensorflow:windows": [
-            "-DEFAULTLIB:ws2_32.lib",
-            "-DEFAULTLIB:advapi32.lib",
-            "-DEFAULTLIB:crypt32.lib",
-            "-DEFAULTLIB:Normaliz.lib",
+            "-Wl,ws2_32.lib",
         ],
         "@org_tensorflow//tensorflow:windows_msvc": [
-            "-DEFAULTLIB:ws2_32.lib",
-            "-DEFAULTLIB:advapi32.lib",
-            "-DEFAULTLIB:crypt32.lib",
-            "-DEFAULTLIB:Normaliz.lib",
+            "-Wl,ws2_32.lib",
         ],
         "//conditions:default": [
             "-lrt",
@@ -448,22 +438,12 @@ genrule(
         "#  include \"lib/config-win32.h\"",
         "#  define BUILDING_LIBCURL 1",
         "#  define CURL_DISABLE_CRYPTO_AUTH 1",
-        "#  define CURL_DISABLE_DICT 1",
-        "#  define CURL_DISABLE_FILE 1",
-        "#  define CURL_DISABLE_GOPHER 1",
         "#  define CURL_DISABLE_IMAP 1",
         "#  define CURL_DISABLE_LDAP 1",
         "#  define CURL_DISABLE_LDAPS 1",
         "#  define CURL_DISABLE_POP3 1",
         "#  define CURL_PULL_WS2TCPIP_H 1",
-        "#  define CURL_DISABLE_SMTP 1",
-        "#  define CURL_DISABLE_TELNET 1",
-        "#  define CURL_DISABLE_TFTP 1",
-        "#  define CURL_PULL_WS2TCPIP_H 1",
-        "#  define USE_WINDOWS_SSPI 1",
-        "#  define USE_WIN32_IDN 1",
-        "#  define USE_SCHANNEL 1",
-        "#  define WANT_IDN_PROTOTYPES 1",
+        "#  define HTTP_ONLY 1",
         "#elif defined(__APPLE__)",
         "#  define HAVE_FSETXATTR_6 1",
         "#  define HAVE_SETMODE 1",
diff --git a/third_party/pcre.BUILD b/third_party/pcre.BUILD
index e2cdec4029..68aadd1d40 100644
--- a/third_party/pcre.BUILD
+++ b/third_party/pcre.BUILD
@@ -50,12 +50,12 @@ cc_library(
         "-DNEWLINE=10",
         "-DNO_RECURSE",
         "-DPARENS_NEST_LIMIT=50",
+        "-DPCRE_STATIC=1",
         "-DPOSIX_MALLOC_THRESHOLD=10",
         "-DSTDC_HEADERS=1",
         "-DSUPPORT_UCP",
         "-DSUPPORT_UTF",
     ],
-    defines = ["PCRE_STATIC=1"],
     includes = ["."],
     visibility = ["@swig//:__pkg__"],  # Please use RE2
     alwayslink = 1,
-- 
GitLab


From 713d45278491d792c525344de6038a61ebcb2136 Mon Sep 17 00:00:00 2001
From: Kay Zhu <kayzhu@google.com>
Date: Fri, 15 Dec 2017 18:11:47 -0800
Subject: [PATCH 1114/1225] [XLA] Support Map in HloEvaluator, enable
 Interpreter to run xla/tests:map_map_test which tests this change.

Additionally:
- templatize Evaluate* methods to specialize on both std::unique_ptr<Literal> and const Literal* type of input literal arguments.
- add ResetVisitState to DfsHloVisitor such that a visitor instance can traverse the same HLO graph more than once.
PiperOrigin-RevId: 179263540
---
 tensorflow/compiler/xla/reference_util.cc     |   2 +-
 .../compiler/xla/service/dfs_hlo_visitor.h    |   5 +
 .../compiler/xla/service/hlo_evaluator.cc     | 177 +++++++++++++++---
 .../compiler/xla/service/hlo_evaluator.h      |  29 ++-
 .../xla/service/interpreter/executable.cc     |   7 +-
 tensorflow/compiler/xla/service/service.cc    |   5 +-
 tensorflow/compiler/xla/tests/BUILD           |   1 +
 7 files changed, 189 insertions(+), 37 deletions(-)

diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc
index bdf92eaed1..0a15540015 100644
--- a/tensorflow/compiler/xla/reference_util.cc
+++ b/tensorflow/compiler/xla/reference_util.cc
@@ -532,7 +532,7 @@ ReferenceUtil::ConvArray4DGeneralDimensionsDilated(
 
   HloEvaluator evaluator;
   std::unique_ptr<Literal> result_literal =
-      evaluator.Evaluate(*computation, {}).ConsumeValueOrDie();
+      evaluator.Evaluate<const Literal*>(*computation, {}).ConsumeValueOrDie();
 
   CHECK_EQ(ShapeUtil::Rank(result_literal->shape()), 4);
   auto result =
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index 91086fd4a5..0d54e325e6 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
@@ -247,6 +247,10 @@ class DfsHloVisitorBase {
   // affecting correctness.
   void ReserveVisitStates(int num) { visit_state_.Reserve(num); }
 
+  // Useful when we want to visit the same computation more than once with the
+  // same visitor.
+  void ResetVisitStates() { visit_state_.Reset(); }
+
   void SetVisitState(int id, VisitState state) {
     visit_state_.SetState(id, state);
   }
@@ -326,6 +330,7 @@ class DfsHloVisitorBase {
       *w = (*w & ~mask) | (static_cast<uint64>(state) << shift);
       DCHECK_EQ(GetState(id), state);
     }
+    void Reset() { states_.clear(); }
 
    private:
     static const uint32 kStatesPerWord = sizeof(uint64) / 2 /*bits per entry*/;
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 150f9f2d6e..173f0e2c42 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_query.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
@@ -1210,6 +1211,97 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  template <typename NativeT>
+  StatusOr<std::unique_ptr<Literal>> MapImpl(HloInstruction* map) {
+    auto operands = map->operands();
+    HloComputation* computation = map->to_apply();
+
+    auto result = Literal::CreateFromShape(map->shape());
+
+    HloEvaluator embedded_evaluator;
+    TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
+        [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
+          std::vector<std::unique_ptr<Literal>> arg_literals;
+          arg_literals.reserve(operands.size());
+
+          // Construct scalar literal parameters to be passed to the map
+          // computation.
+          for (auto operand : operands) {
+            const Literal& arg_literal =
+                parent_->GetEvaluatedLiteralFor(operand);
+
+            auto curr_val = arg_literal.Get<NativeT>(multi_index);
+            auto curr_val_literal = Literal::CreateR0<NativeT>(curr_val);
+
+            arg_literals.push_back(std::move(curr_val_literal));
+          }
+
+          std::unique_ptr<Literal> computed_result =
+              embedded_evaluator
+                  .Evaluate<std::unique_ptr<Literal>>(*computation,
+                                                      arg_literals)
+                  .ConsumeValueOrDie();
+          // Clear visit states so that the we can use the evaluate again on
+          // the same computation.
+          embedded_evaluator.ResetVisitStates();
+
+          return computed_result->Get<ReturnT>({});
+        }));
+    return std::move(result);
+  }
+
+  Status HandleMap(HloInstruction* map) override {
+    switch (map->operand(0)->shape().element_type()) {
+      case PRED: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<bool>(map));
+        break;
+      }
+      case U8: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<uint8>(map));
+        break;
+      }
+      case U32: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<uint32>(map));
+        break;
+      }
+      case U64: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<uint64>(map));
+        break;
+      }
+      case S8: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<int8>(map));
+        break;
+      }
+      case S32: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<int32>(map));
+        break;
+      }
+      case S64: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<int64>(map));
+        break;
+      }
+      case F32: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<float>(map));
+        break;
+      }
+      case F64: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<double>(map));
+        break;
+      }
+      case C64: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<complex64>(map));
+        break;
+      }
+      default:
+        LOG(FATAL) << "HandleMap: unhandled primitive type for "
+                      "input operand: "
+                   << PrimitiveType_Name(
+                          map->operand(0)->shape().element_type());
+    }
+
+    return Status::OK();
+  }
+
   Status HandleReduce(HloInstruction* reduce) override {
     auto arg = reduce->operand(0);
     auto init_value = reduce->operand(1);
@@ -1256,6 +1348,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       }
     }
 
+    HloEvaluator embedded_evaluator;
     // For each resulting dimension, calculate and assign computed value.
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
         [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
@@ -1275,13 +1368,12 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
             std::vector<const Literal*> args = {curr_val_literal.get(),
                                                 result_val_literal.get()};
 
-            // We need a new visitor for each evaluation, so that the same
-            // computation can be visited more than once (with different
-            // inputs).
-            HloEvaluator embedded_evaluator;
             std::unique_ptr<Literal> computed_result =
-                embedded_evaluator.Evaluate(*function, args)
+                embedded_evaluator.Evaluate<const Literal*>(*function, args)
                     .ConsumeValueOrDie();
+            // Clear visit states so that the we can use the evaluate again on
+            // the same computation.
+            embedded_evaluator.ResetVisitStates();
 
             // Assign computed result to result_val.
             result_val = computed_result->Get<ReturnT>({});
@@ -1338,6 +1430,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     DimensionVector window_index(window.dimensions_size());
     DimensionVector operand_index(ShapeUtil::Rank(operand_literal.shape()));
 
+    HloEvaluator embedded_evaluator;
     // For each resulting dimension, calculate and assign computed value.
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
         [&](tensorflow::gtl::ArraySlice<int64> output_index) {
@@ -1369,14 +1462,14 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                 Literal::CreateR0<ReturnT>(result_val);
             const std::vector<const Literal*> args = {curr_val_literal.get(),
                                                       result_val_literal.get()};
-            // We need a new visitor for each evaluation, so that the same
-            // computation can be visited more than once (with different
-            // inputs).
-            HloEvaluator embedded_evaluator;
             std::unique_ptr<Literal> computed_result =
-                embedded_evaluator.Evaluate(*function, args)
+                embedded_evaluator.Evaluate<const Literal*>(*function, args)
                     .ConsumeValueOrDie();
 
+            // Clear visit states so that the we can use the evaluate again on
+            // the same computation.
+            embedded_evaluator.ResetVisitStates();
+
             result_val = computed_result->Get<ReturnT>({});
           } while (IndexUtil::BumpIndices(window_shape, &window_index));
 
@@ -1542,8 +1635,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     const auto* lhs = instruction->operand(0);
     const auto* rhs = instruction->operand(1);
 
-    // TODO(b/35950897, b/27796129): add DCHECK back once implicit broadcast is
-    // removed.
+    // TODO(b/35950897, b/27796129): add DCHECK back once implicit broadcast
+    // is removed.
     if (!(ShapeUtil::SameDimensions(shape, rhs->shape()) &&
           ShapeUtil::SameDimensions(lhs->shape(), rhs->shape()))) {
       return Unimplemented(
@@ -1644,13 +1737,17 @@ HloEvaluator::HloEvaluator() {
   });
 }
 
+template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
     const HloModule& module,
-    tensorflow::gtl::ArraySlice<const Literal*> arg_literals) {
+    tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals) {
   XLA_VLOG_LINES(2, "HloEvaluator::Evaluate module:\n" + module.ToString());
 
-  arg_literals_ = arg_literals;
   evaluated_.clear();
+  arg_literals_.clear();
+  for (const auto& literal_ptr : arg_literals) {
+    arg_literals_.push_back(&*literal_ptr);
+  }
 
   TF_RETURN_IF_ERROR(module.entry_computation()->Accept(this));
 
@@ -1658,27 +1755,36 @@ StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
       GetEvaluatedLiteralFor(module.entry_computation()->root_instruction()));
 }
 
+template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
     const HloComputation& computation,
-    tensorflow::gtl::ArraySlice<const Literal*> arg_literals) {
+    tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals) {
   XLA_VLOG_LINES(
       2, "HloEvaluator::Evaluate computation:\n" + computation.ToString());
-  arg_literals_ = arg_literals;
+
   evaluated_.clear();
+  arg_literals_.clear();
+  for (const auto& literal_ptr : arg_literals) {
+    arg_literals_.push_back(&*literal_ptr);
+  }
 
   TF_RETURN_IF_ERROR(computation.Accept(this));
   return MakeUnique<Literal>(
       GetEvaluatedLiteralFor(computation.root_instruction()));
 }
 
+template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
     HloInstruction* instruction,
-    tensorflow::gtl::ArraySlice<const Literal*> operands) {
+    tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals) {
   TF_RET_CHECK(hlo_query::AllOperandsAreParametersOrConstants(*instruction));
   TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(instruction->shape()));
 
-  arg_literals_ = operands;
   evaluated_.clear();
+  arg_literals_.clear();
+  for (const auto& literal_ptr : arg_literals) {
+    arg_literals_.push_back(&*literal_ptr);
+  }
 
   // Evaluate operands of Parameter type against the input literals which
   // caches the evaluated literal results.
@@ -1769,7 +1875,10 @@ Status HloEvaluator::HandleParameter(HloInstruction* parameter) {
   CHECK_LT(parameter->parameter_number(), arg_literals_.size());
   const Literal* input_literal = arg_literals_[parameter->parameter_number()];
   VLOG(2) << "Parameter evaluated to: " << input_literal->ToString();
-  DCHECK(ShapeUtil::Equal(parameter->shape(), input_literal->shape()));
+  DCHECK(ShapeUtil::Equal(parameter->shape(), input_literal->shape()))
+      << "parameter shape is: " << ShapeUtil::HumanString(parameter->shape())
+      << ", but input literal shape is: "
+      << ShapeUtil::HumanString(input_literal->shape());
 
   evaluated_[parameter] = MakeUnique<Literal>(*input_literal);
   return Status::OK();
@@ -1794,8 +1903,8 @@ Status HloEvaluator::HandleTranspose(HloInstruction* transpose) {
 Status HloEvaluator::HandleConcatenate(HloInstruction* concatenate) {
   tensorflow::gtl::ArraySlice<HloInstruction*> operands(
       concatenate->operands());
-  // The result concatenate dimension is going to be the sum of all concatenate
-  // dimensions of the operands taking part of the operation.
+  // The result concatenate dimension is going to be the sum of all
+  // concatenate dimensions of the operands taking part of the operation.
   const Shape& reference_shape = operands[0]->shape();
   CHECK(!ShapeUtil::IsTuple(reference_shape));
   const int64 rank = ShapeUtil::Rank(reference_shape);
@@ -2005,4 +2114,30 @@ Status HloEvaluator::Postprocess(HloInstruction* hlo) {
   return Status::OK();
 }
 
+// Explicit instantiation of templatized Evaluate* methods.
+//
+template StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate<
+    const Literal*>(const HloModule& module,
+                    tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+template StatusOr<std::unique_ptr<Literal>>
+HloEvaluator::Evaluate<std::unique_ptr<Literal>>(
+    const HloModule& module,
+    tensorflow::gtl::ArraySlice<std::unique_ptr<Literal>> arg_literals);
+
+template StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate<
+    const Literal*>(const HloComputation& computation,
+                    tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+template StatusOr<std::unique_ptr<Literal>>
+HloEvaluator::Evaluate<std::unique_ptr<Literal>>(
+    const HloComputation& computation,
+    tensorflow::gtl::ArraySlice<std::unique_ptr<Literal>> arg_literals);
+
+template StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate<
+    const Literal*>(HloInstruction* instruction,
+                    tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+template StatusOr<std::unique_ptr<Literal>>
+HloEvaluator::Evaluate<std::unique_ptr<Literal>>(
+    HloInstruction* instruction,
+    tensorflow::gtl::ArraySlice<std::unique_ptr<Literal>> arg_literals);
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index e7f6254a0c..02bb8b0a47 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -42,9 +42,12 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // Precondition: The indices of arg_literals correspond to the parameter
   // numbers of the HLO parameters in the computation. See comment below for an
   // example.
+  // `LiteralPtr` accepts either std::unique_ptr<Literal> or const Literal*
+  // type.
+  template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Evaluate(
       const HloModule& module,
-      tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+      tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals);
 
   // Evaluates an HLO computation and an array of pointers to literals.
   // Returns the evaluated result as a literal if successful.
@@ -62,9 +65,12 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // where Parameter0 has parameter_number 0 and Parameter1 has parameter_number
   // 1 in this computation. The input literals array will then have its first
   // literal map to Parameter0 and the second map to Parameter1.
+  // `LiteralPtr` accepts either std::unique_ptr<Literal> or const Literal*
+  // type.
+  template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Evaluate(
       const HloComputation& computation,
-      tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+      tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals);
 
   // Evaluates a single HLO instruction and an array of pointers to literals.
   // Return the evaluated result as literal if successful.
@@ -72,10 +78,12 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // 1. argument literals correspond to the input instruction's parameters in
   // their post-ordering.
   // 2. the instruction's operands must be of either Parameter or Constant type.
-  // TODO(b/35950897): implement more ops other than element-wise ops.
+  // `LiteralPtr` accepts either std::unique_ptr<Literal> or const Literal*
+  // type.
+  template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Evaluate(
       HloInstruction* instruction,
-      tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+      tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals);
 
   // Evaluates a single HLO instruction with constant operands.
   // Returns the evaluated result as literal if successful.
@@ -100,7 +108,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
  protected:
   // Templated DfsHloVisitor. Typically ReturnT here indicates the resulting
   // literal type of each evaluated Handle* method of a TypedVisitor.
-  // There are however a few notable exceptions to this is rule, notably:
+  // There are however a few notable exceptions to this rule, notably:
   // - HandleCompare and HandleIsFinite: where the resulting literal type is
   // always boolean.
   // These operations are handled outside of the parent HloEvaluator handlers
@@ -138,6 +146,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   Status HandleIsFinite(HloInstruction* is_finite) override;
 
   Status HandleCompare(HloInstruction* compare) override;
+
   Status HandleTuple(HloInstruction* tuple) override;
 
   Status HandleGetTupleElement(HloInstruction* get_tuple_element) override;
@@ -171,13 +180,15 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // TODO(b/35950897): have better memory management here to free instructions
   // that are no longer a parent for any other subsequent instruction in
   // post-orderring.
+  // Must be cleared for each evaluation.
   tensorflow::gtl::FlatMap<const HloInstruction*, std::unique_ptr<Literal>>
       evaluated_;
 
-  // Stores input literals, assuming they are in post-order. Literals are not
-  // owned by this class, and they must outlive the lifetime of the instance of
-  // this class.
-  tensorflow::gtl::ArraySlice<const Literal*> arg_literals_;
+  // Caches pointers to input literals, assuming they are in post-order.
+  // Literals are not owned by this class, and they must outlive the lifetime of
+  // each invocation to the Evaluate* method.
+  // Must be cleared for each evaluation.
+  std::vector<const Literal*> arg_literals_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(HloEvaluator);
 };
diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc
index 9183a1d1bf..293cc2007e 100644
--- a/tensorflow/compiler/xla/service/interpreter/executable.cc
+++ b/tensorflow/compiler/xla/service/interpreter/executable.cc
@@ -98,12 +98,10 @@ StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteOnStream(
 
   // Create the arguments as an vector of XLA literals
   std::vector<std::unique_ptr<Literal>> arg_literals;
-  std::vector<Literal*> arg_literals_ptrs;
   for (int64 p = 0; p < computation->num_parameters(); ++p) {
     // Create the input literal for the parameter
     HloInstruction* param = computation->parameter_instruction(p);
     arg_literals.emplace_back(Literal::CreateFromShape(param->shape()));
-    arg_literals_ptrs.push_back(arg_literals.back().get());
 
     // Copy in the data from the stream_executor buffers
     void* buffer = arg_literals.back()->MutableInternalData();
@@ -113,8 +111,9 @@ StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteOnStream(
 
   // Execute the graph using the HloEvaluator.
   HloEvaluator evaluator;
-  TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> output,
-                      evaluator.Evaluate(*computation, arg_literals_ptrs));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<Literal> output,
+      evaluator.Evaluate<std::unique_ptr<Literal>>(*computation, arg_literals));
 
   // Copy the result into the return buffer
   perftools::gputools::StreamExecutor* executor(stream->parent());
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index ecc3c0ff12..9d78e6a2b2 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -1267,8 +1267,9 @@ tensorflow::Status Service::ComputeConstant(const ComputeConstantRequest* arg,
                  [](const Literal& literal) { return &literal; });
 
   HloEvaluator evaluator;
-  TF_ASSIGN_OR_RETURN(auto result_literal,
-                      evaluator.Evaluate(*module, parameter_ptrs));
+  TF_ASSIGN_OR_RETURN(auto result_literal, evaluator.Evaluate<const Literal*>(
+                                               *module, parameter_ptrs));
+
   // Since the shape_with_output_layout option in ExecutionOption is
   // non-effective to the Evaluator results, explicit relayout here.
   if (arg->has_output_layout()) {
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 7d7b13da84..9ce9b7aae4 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -361,6 +361,7 @@ xla_test(
 xla_test(
     name = "map_test",
     srcs = ["map_test.cc"],
+    tags = ["enable_for_xla_interpreter"],
     deps = [
         "//tensorflow/compiler/xla:array2d",
         "//tensorflow/compiler/xla:literal_util",
-- 
GitLab


From 90e42f3ac8c43474633136af4242dca04b6a1e09 Mon Sep 17 00:00:00 2001
From: Dandelion Man? <dandelion@google.com>
Date: Fri, 15 Dec 2017 18:15:07 -0800
Subject: [PATCH 1115/1225] Automated g4 rollback of changelist 179260538

PiperOrigin-RevId: 179263865
---
 .gitignore                                    |   11 +-
 configure.py                                  |   35 +-
 tensorflow/c/c_api.cc                         |    2 +
 tensorflow/c/c_api_function.cc                |    4 +-
 .../tf2xla/kernels/tensor_array_ops.cc        |   87 +-
 tensorflow/contrib/BUILD                      |    7 +-
 tensorflow/contrib/cmake/CMakeLists.txt       |   34 +-
 tensorflow/contrib/cmake/README.md            |   17 -
 .../contrib/cmake/external/gemmlowp.cmake     |    4 +-
 .../contrib/cmake/tf_core_kernels.cmake       |    3 -
 tensorflow/contrib/cmake/tf_tests.cmake       |    2 +
 .../contrib/factorization/python/ops/gmm.py   |    4 +-
 tensorflow/contrib/ffmpeg/__init__.py         |    1 +
 .../contrib/ffmpeg/default/ffmpeg_lib.cc      |    6 +-
 .../ffmpeg/default/ffmpeg_lib_utility_test.cc |    2 +
 tensorflow/contrib/ffmpeg/ffmpeg_ops.py       |    1 +
 .../estimator/python/gan_estimator_impl.py    |    7 +-
 .../gan/python/estimator/python/head_impl.py  |    6 +-
 .../layers/python/layers/initializers.py      |    3 +-
 .../learn/estimators/composable_model_test.py |    4 +-
 .../learn/python/learn/estimators/dnn.py      |    4 +-
 .../learn/estimators/estimator_input_test.py  |   10 +-
 .../python/learn/estimators/estimator_test.py |   26 +-
 .../learn/estimators/estimators_test.py       |    8 +-
 .../learn/python/learn/estimators/kmeans.py   |    4 +-
 .../learn/python/learn/estimators/linear.py   |    6 +-
 .../estimators/logistic_regressor_test.py     |    4 +-
 .../learn/python/learn/utils/export.py        |    6 +-
 .../linear_optimizer/python/sdca_estimator.py |    4 +-
 tensorflow/contrib/lite/README.md             |    5 +
 tensorflow/contrib/lite/ios_makefile.inc      |   78 +-
 .../contrib/lite/nnapi/NeuralNetworksShim.h   |    4 +-
 tensorflow/contrib/makefile/Makefile          |    2 +-
 tensorflow/contrib/nn/__init__.py             |    1 +
 .../contrib/nn/python/ops/sampling_ops.py     |  100 +
 tensorflow/contrib/opt/BUILD                  |   19 +
 tensorflow/contrib/opt/__init__.py            |    5 +-
 .../training/elastic_average_optimizer.py     |  345 +++
 .../elastic_average_optimizer_test.py         |  225 ++
 .../python/kernel_tests/core_rnn_cell_test.py |    3 +
 tensorflow/contrib/rnn/python/ops/rnn_cell.py |    1 +
 .../contrib/seq2seq/python/ops/helper.py      |    6 +-
 .../python/slim/data/dataset_data_provider.py |    4 +-
 .../timeseries/python/timeseries/head.py      |    4 +-
 tensorflow/core/common_runtime/function.cc    |    2 +-
 tensorflow/core/framework/numeric_types.h     |    2 +-
 .../core/framework/tensor_shape_test.cc       |    3 +-
 tensorflow/core/graph/mkl_graph_util.h        |   10 +-
 tensorflow/core/graph/mkl_layout_pass.cc      | 2083 +++++++++++++++++
 tensorflow/core/graph/mkl_layout_pass_test.cc | 1624 +++++++++++++
 tensorflow/core/kernels/logging_ops.cc        |    3 +-
 tensorflow/core/kernels/mkl_aggregate_ops.cc  |  204 +-
 tensorflow/core/kernels/mkl_avgpooling_op.cc  |  306 ++-
 tensorflow/core/kernels/mkl_concat_op.cc      |  374 ++-
 .../core/kernels/mkl_conv_grad_filter_ops.cc  |  317 +--
 .../core/kernels/mkl_conv_grad_input_ops.cc   |  244 +-
 tensorflow/core/kernels/mkl_conv_ops.cc       |  149 +-
 tensorflow/core/kernels/mkl_conv_ops.h        |  269 ++-
 .../core/kernels/mkl_fused_batch_norm_op.cc   |  652 +++++-
 tensorflow/core/kernels/mkl_identity_op.cc    |   33 +
 .../core/kernels/mkl_input_conversion_op.cc   |  217 +-
 tensorflow/core/kernels/mkl_lrn_op.cc         |    2 +-
 tensorflow/core/kernels/mkl_maxpooling_op.cc  |  357 ++-
 .../core/kernels/mkl_pooling_ops_common.cc    |   38 +-
 .../core/kernels/mkl_pooling_ops_common.h     |  342 +++
 tensorflow/core/kernels/mkl_relu_op.cc        |  505 +++-
 tensorflow/core/kernels/mkl_reshape_op.cc     |  182 ++
 tensorflow/core/kernels/quantized_conv_ops.cc |    7 +
 .../core/lib/io/snappy/snappy_outputbuffer.cc |    3 +-
 tensorflow/core/ops/nn_ops.cc                 |  173 ++
 tensorflow/core/platform/cloud/BUILD          |   14 +
 .../core/platform/cloud/gcs_dns_cache.cc      |   32 +-
 .../core/platform/cloud/gcs_file_system.cc    |   21 +-
 .../platform/cloud/google_auth_provider.cc    |    5 +-
 .../core/platform/cloud/oauth_client.cc       |    4 +
 tensorflow/core/platform/cloud/time_util.cc   |    3 +
 .../core/platform/default/build_config.bzl    |    1 -
 .../android_armv7a_cpu_utils_helper.cc        |    3 +-
 tensorflow/core/platform/s3/s3_file_system.cc |   10 +-
 tensorflow/core/util/mkl_util.h               |  313 ++-
 .../docs_src/api_guides/python/image.md       |    1 +
 .../api_guides/python/reading_data.md         |   27 +-
 .../docs_src/get_started/mnist/mechanics.md   |    8 +-
 .../docs_src/programmers_guide/estimators.md  |    2 +-
 .../docs_src/programmers_guide/variables.md   |    2 +-
 tensorflow/examples/android/build.gradle      |    6 +-
 .../android/gradle/wrapper/gradle-wrapper.jar |  Bin 0 -> 53636 bytes
 .../gradle/wrapper/gradle-wrapper.properties  |    6 +
 tensorflow/examples/android/gradlew           |  160 ++
 tensorflow/examples/android/gradlew.bat       |   90 +
 .../org/tensorflow/demo/CameraActivity.java   |    8 +-
 .../reading_data/fully_connected_reader.py    |  125 +-
 .../wav_to_spectrogram/wav_to_spectrogram.cc  |    7 +-
 tensorflow/go/graph.go                        |   17 +-
 tensorflow/go/graph_test.go                   |   22 +-
 tensorflow/python/client/session.py           |   18 +-
 tensorflow/python/client/session_test.py      |    6 +
 tensorflow/python/debug/BUILD                 |    1 +
 tensorflow/python/estimator/training_test.py  |    4 +-
 tensorflow/python/estimator/util.py           |    2 +-
 tensorflow/python/framework/function.py       |    4 +-
 .../keras/applications/inception_resnet_v2.py |    2 +-
 .../_impl/keras/applications/mobilenet.py     |    4 +-
 .../_impl/keras/applications/resnet50.py      |    2 +-
 .../keras/_impl/keras/applications/vgg16.py   |    4 +-
 .../keras/_impl/keras/applications/vgg19.py   |    4 +-
 .../_impl/keras/applications/xception.py      |    4 +-
 .../python/keras/_impl/keras/layers/core.py   |    2 +-
 .../keras/_impl/keras/layers/core_test.py     |    5 +
 tensorflow/python/kernel_tests/BUILD          |    4 +
 .../kernel_tests/summary_image_op_test.py     |    1 -
 tensorflow/python/ops/image_ops_impl.py       |   29 +-
 tensorflow/python/ops/image_ops_test.py       |  113 +
 tensorflow/python/ops/logging_ops.py          |    6 +-
 tensorflow/python/ops/nn_impl.py              |   13 +-
 .../python/ops/quantized_conv_ops_test.py     |    2 +-
 tensorflow/python/ops/quantized_ops_test.py   |   57 +
 .../python/training/learning_rate_decay.py    |   13 +-
 tensorflow/python/util/tf_inspect.py          |   20 +
 tensorflow/stream_executor/dnn.h              |    2 +-
 tensorflow/tensorflow.bzl                     |   43 +-
 tensorflow/tools/benchmark/benchmark_model.cc |    2 +-
 tensorflow/tools/ci_build/Dockerfile.gpu      |    4 +-
 .../tools/ci_build/Dockerfile.gpu_clang       |    4 +-
 tensorflow/tools/ci_build/builds/pip.sh       |   21 +-
 .../tools/ci_build/builds/print_build_info.sh |    2 +-
 .../tools/ci_build/builds/test_user_ops.sh    |    6 +-
 .../gpu_build/parallel_gpu_execute.sh         |    2 +-
 .../ci_build/install/install_deb_packages.sh  |    1 +
 .../ci_build/install/install_pip_packages.sh  |    3 +
 .../install/install_python3.5_pip_packages.sh |    2 +
 .../install/install_python3.6_pip_packages.sh |    2 +
 .../tools/ci_build/linux/gpu/run_cc_core.sh   |    2 +
 .../tools/ci_build/linux/gpu/run_py3_core.sh  |    2 +
 .../tools/ci_build/pi/build_raspberry_pi.sh   |    1 +
 .../ci_build/remote/remote_docker_build.sh    |    2 +-
 .../ci_build/windows/bazel/bazel_test_lib.sh  |    6 +-
 .../ci_build/windows/bazel/common_env.sh      |   16 -
 .../windows/cpu/bazel/run_cc_test_windows.sh  |    6 +-
 .../windows/cpu/pip/build_tf_windows.sh       |    9 +-
 .../windows/gpu/bazel/run_cc_test_windows.sh  |    4 +-
 .../windows/gpu/pip/build_tf_windows.sh       |    9 +-
 .../ci_build/windows/libtensorflow_cpu.sh     |    3 +-
 .../tools/ci_build/xla/linux/gpu/run_py3.sh   |    2 +
 tensorflow/tools/docker/Dockerfile.devel      |    5 +-
 tensorflow/tools/docker/Dockerfile.devel-gpu  |   26 +-
 tensorflow/tools/docker/Dockerfile.gpu        |    2 +-
 .../docker/parameterized_docker_build.sh      |    2 +-
 tensorflow/tools/docs/generate_lib.py         |    4 +-
 tensorflow/tools/pip_package/BUILD            |   21 +
 .../tools/pip_package/build_pip_package.sh    |    4 +-
 .../tools/pip_package/pip_smoke_test.py       |    1 +
 .../proto_text/gen_proto_text_functions.cc    |    1 +
 tensorflow/workspace.bzl                      |   16 +-
 third_party/curl.BUILD                        |   26 +-
 third_party/pcre.BUILD                        |    2 +-
 156 files changed, 9755 insertions(+), 933 deletions(-)
 create mode 100644 tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
 create mode 100644 tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
 create mode 100644 tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar
 create mode 100644 tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
 create mode 100644 tensorflow/examples/android/gradlew
 create mode 100644 tensorflow/examples/android/gradlew.bat
 create mode 100644 tensorflow/python/ops/quantized_ops_test.py

diff --git a/.gitignore b/.gitignore
index d11a504bdc..be75938ec4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,7 @@ node_modules
 /bazel-*
 /bazel_pip
 /tools/python_bin_path.sh
-/tools/git/gen
+/tensorflow/tools/git/gen
 /pip_test
 /_python_build
 *.pyc
@@ -26,4 +26,11 @@ Podfile.lock
 /tensorflow/contrib/lite/gen/**
 /tensorflow/contrib/lite/examples/ios/simple/data/*.txt
 /tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
-xcuserdata/**
\ No newline at end of file
+xcuserdata/**
+
+# Android
+.gradle
+.idea
+*.iml
+local.properties
+gradleBuild
diff --git a/configure.py b/configure.py
index 3d553e1c14..336559532d 100644
--- a/configure.py
+++ b/configure.py
@@ -36,8 +36,8 @@ _TF_BAZELRC = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                            '.tf_configure.bazelrc')
 _TF_WORKSPACE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              'WORKSPACE')
-_DEFAULT_CUDA_VERSION = '8.0'
-_DEFAULT_CUDNN_VERSION = '6'
+_DEFAULT_CUDA_VERSION = '9.0'
+_DEFAULT_CUDNN_VERSION = '7'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2'
 _DEFAULT_CUDA_PATH = '/usr/local/cuda'
 _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
@@ -1096,6 +1096,27 @@ def set_computecpp_toolkit_path(environ_cp):
   write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
                               computecpp_toolkit_path)
 
+def set_trisycl_include_dir(environ_cp):
+  """Set TRISYCL_INCLUDE_DIR"""
+  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
+                             'include directory. (Use --config=sycl_trisycl '
+                             'when building with Bazel) '
+                             '[Default is %s]: '
+                             ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
+  while True:
+    trisycl_include_dir = get_from_env_or_user_or_default(
+      environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
+      _DEFAULT_TRISYCL_INCLUDE_DIR)
+    if os.path.exists(trisycl_include_dir):
+      break
+
+    print('Invalid triSYCL include directory, %s cannot be found'
+          % (trisycl_include_dir))
+
+  # Set TRISYCL_INCLUDE_DIR
+  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
+  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
+                              trisycl_include_dir)
 
 def set_trisycl_include_dir(environ_cp):
   """Set TRISYCL_INCLUDE_DIR."""
@@ -1211,6 +1232,15 @@ def create_android_bazelrc_configs():
 def set_grpc_build_flags():
   write_to_bazelrc('build --define grpc_no_ares=true')
 
+def set_windows_build_flags():
+  if is_windows():
+    # The non-monolithic build is not supported yet
+    write_to_bazelrc('build --config monolithic')
+    # Suppress warning messages
+    write_to_bazelrc('build --copt=-w --host_copt=-w')
+    # Output more verbose information when something goes wrong
+    write_to_bazelrc('build --verbose_failures')
+
 
 def main():
   # Make a copy of os.environ to be clear when functions and getting and setting
@@ -1289,6 +1319,7 @@ def main():
   set_cc_opt_flags(environ_cp)
   set_mkl()
   set_monolithic()
+  set_windows_build_flags()
   create_android_bazelrc_configs()
 
   if workspace_has_any_android_rule():
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 6f5abd074c..9b57047028 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -579,6 +579,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
       status->status = InvalidArgument(
           "invalid string tensor encoding (string #", i, " of ",
           srcarray.size(), "): ", status->status.error_message());
+      delete[] base;
       return nullptr;
     }
     dst += consumed;
@@ -588,6 +589,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
     status->status = InvalidArgument(
         "invalid string tensor encoding (decoded ", (dst - base),
         " bytes, but the tensor is encoded in ", size, " bytes");
+    delete[] base;
     return nullptr;
   }
 
diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index b9312c2974..d60d1de315 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -68,7 +68,7 @@ class NodeNameMapping {
   // This is a superset of values in name_mapping_.
   std::unordered_set<string> used_names_;
   // Mapping from original node name from the graph to the normalized
-  // and uniqified version of it.
+  // and uniquified version of it.
   std::unordered_map<string, string> name_mapping_;
 };
 
@@ -236,7 +236,7 @@ Status FillFunctionBody(
 }
 
 // Graph to FunctionDef conversion. This code is closely modeled on the Python
-// code in third_party/tensorflow/python/framework/function.py.
+// code in tensorflow/python/framework/function.py.
 Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
                           bool append_hash_to_fn_name,
                           const std::vector<const Node*>& body_nodes,
diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
index 351fda2517..03c22354a9 100644
--- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
@@ -311,6 +311,32 @@ class TensorArrayGatherOp : public XlaOpKernel {
 
     xla::ComputationDataHandle ta = resource->value;
 
+    // Look for the case where the gather takes a simple slice from the
+    // tensor array (0, 1, 2, 3, 4, ..., N)
+    std::vector<int64> const_indices;
+    Status status = ctx->ConstantInputAsIntVector(1, &const_indices);
+    if (status.ok()) {
+      bool gather_is_dense_slice = true;
+      for (auto i = 0; i < const_indices.size(); i++) {
+        if (const_indices[i] != i) {
+          gather_is_dense_slice = false;
+          break;
+        }
+      }
+
+      if (gather_is_dense_slice) {
+        std::vector<int64> begin(ta_shape.dims(), 0);
+        std::vector<int64> strides(ta_shape.dims(), 1);
+        std::vector<int64> end(ta_shape.dims(), 1);
+        end[0] = const_indices.size();
+        for (auto i = 1; i < ta_shape.dims(); i++) {
+          end[i] = ta_shape.dim_size(i);
+        }
+        ctx->SetOutput(0, b->Slice(ta, begin, end, strides));
+        return;
+      }
+    }
+
     xla::ComputationDataHandle gather = XlaComputeGatherDynamicSlice(
         ctx, ta, ta_shape, indices, indices_shape, 0, dtype_, index_type, b);
     ctx->SetOutput(0, gather);
@@ -352,28 +378,47 @@ class TensorArrayScatterOp : public XlaOpKernel {
     const xla::ComputationDataHandle value = ctx->Input(2);
     const xla::ComputationDataHandle flow = ctx->Input(3);
 
-    auto slice_dims = value_shape.dim_sizes();
-    slice_dims[0] = 1LL;
-
-    std::vector<int64> value_starts(value_shape.dims(), 0);
-    auto value_ends = value_shape.dim_sizes();
-
-    std::vector<int64> value_strides(value_shape.dims(), 1);
-
-    // For every (index, value) pair, update the corresponding TensorArray
-    // storage.
-    for (int i = 0; i < num_indices; ++i) {
-      // Slice out part of the value.
-      value_starts[0] = i;
-      value_ends[0] = i + 1;
-      auto slice = b->Slice(value, value_starts, value_ends, value_strides);
+    // Look for the case where the scatter is for each sub-tensor in order. The
+    // tensor array implementation allows for this to be a straight addition.
+    bool scatter_all_elements_in_order = false;
+    std::vector<int64> const_indices;
+    Status status = ctx->ConstantInputAsIntVector(1, &const_indices);
+    if (status.ok() && num_indices == value_shape.dim_size(0)) {
+      scatter_all_elements_in_order = true;
+      for (auto i = 0; i < num_indices; i++) {
+        if (const_indices[i] != i) {
+          scatter_all_elements_in_order = false;
+          break;
+        }
+      }
+    }
 
-      // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
-      auto index = b->Slice(indices, {i}, {i + 1}, {1});
-      auto start_indices =
-          b->Pad(b->Reshape(index, {1}), b->ConstantR0<int32>(0),
-                 xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
-      ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
+    if (scatter_all_elements_in_order) {
+      ta = b->Add(ta, value);
+    } else {
+      auto slice_dims = value_shape.dim_sizes();
+      slice_dims[0] = 1LL;
+
+      std::vector<int64> value_starts(value_shape.dims(), 0);
+      auto value_ends = value_shape.dim_sizes();
+
+      std::vector<int64> value_strides(value_shape.dims(), 1);
+
+      // For every (index, value) pair, update the corresponding TensorArray
+      // storage.
+      for (int i = 0; i < num_indices; ++i) {
+        // Slice out part of the value.
+        value_starts[0] = i;
+        value_ends[0] = i + 1;
+        auto slice = b->Slice(value, value_starts, value_ends, value_strides);
+
+        // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
+        auto index = b->Slice(indices, {i}, {i + 1}, {1});
+        auto start_indices =
+                b->Pad(b->Reshape(index, {1}), b->ConstantR0<int32>(0),
+                       xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
+        ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
+      }
     }
 
     resource->value = ta;
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 61f7821519..604c41bf8a 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -9,7 +9,12 @@ load("//third_party/mpi:mpi.bzl", "if_mpi")
 
 py_library(
     name = "contrib_py",
-    srcs = glob(["**/*.py"]),
+    srcs = glob(
+        ["**/*.py"],
+        exclude = [
+            "**/*_test.py",
+        ],
+    ),
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = [
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 77a3fc0c83..481caf6bb0 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -18,7 +18,6 @@ cmake_policy(SET CMP0022 NEW)
 
 # Options
 option(tensorflow_VERBOSE "Enable for verbose output" OFF)
-option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
 option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF)
 option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON)
 option(tensorflow_ENABLE_HDFS_SUPPORT "Enable HDFS support" OFF)
@@ -34,6 +33,12 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF)
 option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON)
 option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions")
 option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON)
+
+# GPU, CUDA and cuDNN options
+option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
+option(tensorflow_CUDA_VERSION "CUDA version to build against" 9.0)
+option(tensorflow_CUDNN_VERSION "cuDNN version to build against" 7)
+
 if(HAIKU)
 	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF)
 else()
@@ -262,7 +267,7 @@ if (tensorflow_ENABLE_GPU)
     list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs")
   endif (NOT WIN32)
 
-  find_package(CUDA 8.0 REQUIRED)
+  find_package(CUDA ${tensorflow_CUDA_VERSION} REQUIRED)
 
   # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
   # CUDA_NVCC_FLAGS and cuda_config.h below
@@ -316,13 +321,16 @@ if (tensorflow_ENABLE_GPU)
       ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY})
   endif (WIN32)
 
+  # Remove "." from CUDA version variable.
+  string(REPLACE "." "" short_CUDA_VER ${tensorflow_CUDA_VERSION})
+
   # create cuda_config.h
   FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
     "#ifndef CUDA_CUDA_CONFIG_H_\n"
     "#define CUDA_CUDA_CONFIG_H_\n"
     "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
-    "#define TF_CUDA_VERSION \"64_80\"\n"
-    "#define TF_CUDNN_VERSION \"64_6\"\n"
+    "#define TF_CUDA_VERSION \"64_${short_CUDA_VER}\"\n"
+    "#define TF_CUDNN_VERSION \"64_${tensorflow_CUDNN_VERSION}\"\n"
     "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
     "#endif  // CUDA_CUDA_CONFIG_H_\n"
   )
@@ -360,15 +368,15 @@ if (tensorflow_ENABLE_GPU)
   if(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
       msvcp_dll_name=msvcp140.dll
-      cudart_dll_name=cudart64_80.dll
-      cuda_version_number=8.0
+      cudart_dll_name=cudart64_${short_CUDA_VER}.dll
+      cuda_version_number=${tensorflow_CUDA_VERSION}
       nvcuda_dll_name=nvcuda.dll
-      cudnn_dll_name=cudnn64_6.dll
-      cudnn_version_number=6)
+      cudnn_dll_name=cudnn64_${tensorflow_CUDNN_VERSION}.dll
+      cudnn_version_number=${tensorflow_CUDNN_VERSION})
   else(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
-      cuda_version_number=8.0
-      cudnn_version_number=6)
+	    cuda_version_number=${tensorflow_CUDA_VERSION}
+	    cudnn_version_number=${tensorflow_CUDNN_VERSION})
   endif(WIN32)
 else(tensorflow_ENABLE_GPU)
   set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value
@@ -383,11 +391,7 @@ endif()
 
 # Let's get to work!
 include(tf_core_framework.cmake)
-# NOTE: Disabled until issue #3996 is fixed.
-# include(tf_stream_executor.cmake)
-if (tensorflow_ENABLE_GPU)
-    include(tf_stream_executor.cmake)
-endif()
+include(tf_stream_executor.cmake)
 
 include(tf_core_cpu.cmake)
 include(tf_core_ops.cmake)
diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 4ddfec5960..4be733a280 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -19,23 +19,6 @@ for instructions on how to install a pre-built TensorFlow package on Windows.
 ### Current known limitations
 * It is not possible to load a custom Op library.
 * GCS file system is not supported.
-* The following Ops are not currently implemented:
- - Dequantize
- - QuantizeAndDequantize
- - QuantizedAvgPool
- - QuantizedBatchNomWithGlobalNormalization
- - QuantizedBiasAdd
- - QuantizedConcat
- - QuantizedConv2D
- - QuantizedMatmul
- - QuantizedMaxPoo
- - QuantizeDownAndShrinkRange
- - QuantizedRelu
- - QuantizedRelu6
- - QuantizedReshape
- - QuantizeV2
- - RequantizationRange
- - Requantize
 
 ## Building with CMake
 
diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake
index 3b146657bf..a235442dc5 100644
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@@ -14,8 +14,8 @@
 # ==============================================================================
 include (ExternalProject)
 
-set(gemmlowp_URL https://mirror.bazel.build/github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip)
-set(gemmlowp_HASH SHA256=dd2557072bde12141419cb8320a9c25e6ec41a8ae53c2ac78c076a347bb46d9d)
+set(gemmlowp_URL https://github.com/google/gemmlowp/archive/6a2a90822e8546fc2bfa7044de0faf1c1cb4862f.zip)
+set(gemmlowp_HASH SHA256=3447948d219f3270383766bbe08942888c0eb4e0ca6663c0e0548502ec5bb77d)
 set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index 2d015908a8..eb6bf567aa 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -150,9 +150,6 @@ list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs})
 if(WIN32)
   file(GLOB_RECURSE tf_core_kernels_windows_exclude_srcs
       # not working on windows yet
-      "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
       "${tensorflow_source_dir}/tensorflow/core/kernels/neon/*"
       # not in core - those are loaded dynamically as dll
       "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc"
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 9ed5b4b9de..94ca4b0017 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -146,6 +146,8 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     "${tensorflow_source_dir}/tensorflow/contrib/estimator/python/estimator/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/kernel_tests/*.py"
     "${tensorflow_source_dir}/tensorflow/python/meta_graph_transform/*_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/ops/quantized_conv_ops_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/ops/quantized_ops_test.py"
     "${tensorflow_source_dir}/tensorflow/python/platform/build_info_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/internal/*_test.py"
diff --git a/tensorflow/contrib/factorization/python/ops/gmm.py b/tensorflow/contrib/factorization/python/ops/gmm.py
index 0d67e09f81..f72280c4ec 100644
--- a/tensorflow/contrib/factorization/python/ops/gmm.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm.py
@@ -24,7 +24,7 @@ import numpy as np
 from tensorflow.contrib import framework
 from tensorflow.contrib.factorization.python.ops import gmm_ops
 from tensorflow.contrib.framework.python.framework import checkpoint_utils
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
 from tensorflow.python.framework import constant_op
@@ -167,7 +167,7 @@ class GMM(estimator.Estimator):
                                      self._num_clusters, self._random_seed,
                                      self._covariance_type,
                                      self._params)
-      incr_step = state_ops.assign_add(variables.get_global_step(), 1)
+      incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
       loss = math_ops.reduce_sum(losses)
       training_op = with_dependencies([training_op, incr_step], loss)
       training_hooks = [_InitializeClustersHook(
diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py
index 484ffee3e7..daba965a98 100644
--- a/tensorflow/contrib/ffmpeg/__init__.py
+++ b/tensorflow/contrib/ffmpeg/__init__.py
@@ -28,6 +28,7 @@ from __future__ import print_function
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio
+from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
 
 from tensorflow.python.util.all_util import remove_undocumented
 
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
index 1245f515fe..1e8af1458c 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
@@ -49,7 +49,8 @@ std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
           "-nostdin",             // No interactive commands accepted.
           "-f", input_format_id,  // eg: "mp3"
           "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
-          "-loglevel", "info",  // Enable verbose logging to support debugging.
+          "-loglevel", "error",   // Print errors only.
+          "-hide_banner",         // Skip printing build options, version, etc.
           "-map_metadata", "-1",  // Copy global metadata from input to output.
           "-vn",                  // No video recording.
           "-ac:a:0", StrCat(channel_count), "-ar:a:0",
@@ -72,7 +73,8 @@ std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
           "-probesize",
           StrCat(kDefaultProbeSize),
           "-loglevel",
-          "info",  // Enable verbose logging to support debugging.
+          "error",  // Print errors only.
+          "-hide_banner",  // Skip printing build options, version, etc.
           "-vcodec",
           "rawvideo",
           "-pix_fmt",
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
index d6c885a324..36fc71794b 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
@@ -20,6 +20,8 @@
 #include <string>
 #include <vector>
 
+
+#include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
index 5bb011f41c..08b5a6ea48 100644
--- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py
 from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
 from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py
+from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
 from tensorflow.contrib.util import loader
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import resource_loader
diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index eef66af7f9..d3dca3d9e7 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -107,6 +107,7 @@ class GANEstimator(estimator.Estimator):
                discriminator_loss_fn=None,
                generator_optimizer=None,
                discriminator_optimizer=None,
+               get_hooks_fn=None,
                add_summaries=None,
                use_loss_summaries=True,
                config=None):
@@ -137,6 +138,10 @@ class GANEstimator(estimator.Estimator):
         work.
       discriminator_optimizer: Same as `generator_optimizer`, but for the
         discriminator updates.
+      get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a
+        list of hooks. These hooks are run on the generator and discriminator
+        train ops, and can be used to implement the GAN training scheme.
+        Defaults to `train.get_sequential_train_hooks()`.
       add_summaries: `None`, a single `SummaryType`, or a list of `SummaryType`.
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
@@ -151,7 +156,7 @@ class GANEstimator(estimator.Estimator):
               else discriminator_optimizer)
       gan_head = head_lib.gan_head(
           generator_loss_fn, discriminator_loss_fn, gopt, dopt,
-          use_loss_summaries)
+          use_loss_summaries, get_hooks_fn=get_hooks_fn)
       return _gan_model_fn(
           features, labels, mode, generator_fn, discriminator_fn, gan_head,
           add_summaries)
diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
index 204c646e19..a21358c50b 100644
--- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
@@ -71,7 +71,7 @@ class GANHead(head._Head):  # pylint: disable=protected-access
   def __init__(self, generator_loss_fn, discriminator_loss_fn,
                generator_optimizer, discriminator_optimizer,
                use_loss_summaries=True,
-               get_hooks_fn=tfgan_train.get_sequential_train_hooks(),
+               get_hooks_fn=None,
                name=None):
     """`Head` for GAN training.
 
@@ -86,10 +86,12 @@ class GANHead(head._Head):  # pylint: disable=protected-access
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
       get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list
-        of hooks.
+        of hooks. Defaults to `train.get_sequential_train_hooks()`
       name: name of the head. If provided, summary and metrics keys will be
         suffixed by `"/" + name`.
     """
+    if get_hooks_fn is None:
+      get_hooks_fn = tfgan_train.get_sequential_train_hooks()
     # TODO(joelshor): Validate inputs.
 
     if use_loss_summaries in [True, False]:
diff --git a/tensorflow/contrib/layers/python/layers/initializers.py b/tensorflow/contrib/layers/python/layers/initializers.py
index b12a882d9a..51610f21b2 100644
--- a/tensorflow/contrib/layers/python/layers/initializers.py
+++ b/tensorflow/contrib/layers/python/layers/initializers.py
@@ -79,7 +79,8 @@ def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False,
   ```
 
   * To get [Delving Deep into Rectifiers](
-     http://arxiv.org/pdf/1502.01852v1.pdf), use (Default):<br/>
+     http://arxiv.org/pdf/1502.01852v1.pdf) (also know as the "MSRA 
+     initialization"), use (Default):<br/>
     `factor=2.0 mode='FAN_IN' uniform=False`
   * To get [Convolutional Architecture for Fast Feature Embedding](
      http://arxiv.org/abs/1408.5093), use:<br/>
diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
index 14750961ef..ef5e620e8f 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.datasets import base
 from tensorflow.contrib.learn.python.learn.estimators import composable_model
@@ -55,7 +55,7 @@ def _base_model_fn(features, labels, mode, params):
     raise NotImplementedError
 
   def _train_op_fn(loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     assert global_step
     train_step = model.get_train_step(loss)
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
index cb15ef23e9..c17b41c0f7 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
@@ -23,7 +23,7 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import metric_spec
@@ -189,7 +189,7 @@ def _dnn_model_fn(features, labels, mode, params, config=None):
       """Returns the op to optimize the loss."""
       return optimizers.optimize_loss(
           loss=loss,
-          global_step=contrib_variables.get_global_step(),
+          global_step=training_util.get_global_step(),
           learning_rate=_LEARNING_RATE,
           optimizer=_get_optimizer(optimizer),
           gradient_multipliers=(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
index 248c6c733f..9d7c1a099a 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
@@ -23,7 +23,7 @@ import tempfile
 
 import numpy as np
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import metric_spec
 from tensorflow.contrib.learn.python.learn import models
@@ -114,7 +114,7 @@ def linear_model_params_fn(features, labels, mode, params):
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
       loss,
-      variables.get_global_step(),
+      training_util.get_global_step(),
       optimizer='Adagrad',
       learning_rate=params['learning_rate'])
   return prediction, loss, train_op
@@ -129,7 +129,7 @@ def linear_model_fn(features, labels, mode):
     (_, features), = features.items()
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return prediction, loss, train_op
 
 
@@ -139,7 +139,7 @@ def linear_model_fn_with_model_fn_ops(features, labels, mode):
                   model_fn.ModeKeys.INFER)
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return model_fn.ModelFnOps(
       mode=mode, predictions=prediction, loss=loss, train_op=train_op)
 
@@ -150,7 +150,7 @@ def logistic_model_no_mode_fn(features, labels):
   labels = array_ops.one_hot(labels, 3, 1, 0)
   prediction, loss = (models.logistic_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return {
       'class': math_ops.argmax(prediction, 1),
       'prob': prediction
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
index be2b0cb3ca..2a13a84627 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@@ -32,7 +32,7 @@ from google.protobuf import text_format
 
 from tensorflow.contrib import learn
 from tensorflow.contrib import lookup
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import experiment
@@ -132,7 +132,7 @@ def linear_model_params_fn(features, labels, mode, params):
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
       loss,
-      variables.get_global_step(),
+      training_util.get_global_step(),
       optimizer='Adagrad',
       learning_rate=params['learning_rate'])
   return prediction, loss, train_op
@@ -147,7 +147,7 @@ def linear_model_fn(features, labels, mode):
     (_, features), = features.items()
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return prediction, loss, train_op
 
 
@@ -157,7 +157,7 @@ def linear_model_fn_with_model_fn_ops(features, labels, mode):
                   model_fn.ModeKeys.INFER)
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return model_fn.ModelFnOps(
       mode=mode, predictions=prediction, loss=loss, train_op=train_op)
 
@@ -168,7 +168,7 @@ def logistic_model_no_mode_fn(features, labels):
   labels = array_ops.one_hot(labels, 3, 1, 0)
   prediction, loss = (models.logistic_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return {
       'class': math_ops.argmax(prediction, 1),
       'prob': prediction
@@ -241,7 +241,7 @@ def _build_estimator_for_resource_export_test():
     const = constant_op.constant(-1, dtype=dtypes.int64)
     table = lookup.MutableHashTable(
         dtypes.string, dtypes.int64, const, name='LookupTableModel')
-    update_global_step = variables.get_global_step().assign_add(1)
+    update_global_step = training_util.get_global_step().assign_add(1)
     if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL):
       key = constant_op.constant(['key'])
       value = constant_op.constant([42], dtype=dtypes.int64)
@@ -306,7 +306,7 @@ def _model_fn_ops(
         mode=mode,
         predictions=constant_op.constant(0.),
         loss=constant_op.constant(0.),
-        train_op=variables.get_global_step().assign_add(1))
+        train_op=training_util.get_global_step().assign_add(1))
 
 
 def _make_input_fn(features, labels):
@@ -389,7 +389,7 @@ class EstimatorModelFnTest(test.TestCase):
       self.assertEqual(expected_param, params)
       self.assertEqual(model_dir, expected_model_dir)
       return (constant_op.constant(0.), constant_op.constant(0.),
-              variables.get_global_step().assign_add(1))
+              training_util.get_global_step().assign_add(1))
     est = estimator.Estimator(model_fn=_argument_checker,
                               params=expected_param,
                               model_dir=expected_model_dir)
@@ -400,7 +400,7 @@ class EstimatorModelFnTest(test.TestCase):
     def _invalid_model_fn(features, labels):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         loss = 100.0 - w
       return None, loss, None
@@ -415,7 +415,7 @@ class EstimatorModelFnTest(test.TestCase):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
       loss = 100.0 - w
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         train_op = w.assign_add(loss / 100.0)
       predictions = loss
@@ -434,7 +434,7 @@ class EstimatorModelFnTest(test.TestCase):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
       loss = 100.0 - w
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         train_op = w.assign_add(loss / 100.0)
       return None, loss, train_op
@@ -464,7 +464,7 @@ class EstimatorModelFnTest(test.TestCase):
           mode=mode,
           predictions=constant_op.constant(0.),
           loss=constant_op.constant(0.),
-          train_op=variables.get_global_step().assign_add(1),
+          train_op=training_util.get_global_step().assign_add(1),
           scaffold=monitored_session.Scaffold(init_fn=_init_fn))
 
     est = estimator.Estimator(model_fn=_model_fn_scaffold)
@@ -483,7 +483,7 @@ class EstimatorModelFnTest(test.TestCase):
           mode=mode,
           predictions=constant_op.constant([[1.]]),
           loss=constant_op.constant(0.),
-          train_op=variables.get_global_step().assign_add(1),
+          train_op=training_util.get_global_step().assign_add(1),
           scaffold=monitored_session.Scaffold(saver=self.mock_saver))
 
     def input_fn():
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
index 1d89dfb55b..8131e0fde6 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
@@ -22,7 +22,7 @@ import random
 
 import numpy as np
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python import learn
 from tensorflow.contrib.learn.python.learn import datasets
 from tensorflow.contrib.learn.python.learn import metric_spec
@@ -62,7 +62,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["transformed_x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator = estimator_lib.Estimator(
@@ -100,7 +100,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator = estimator_lib.Estimator(
@@ -139,7 +139,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator_with_fe_fn = estimator_lib.Estimator(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
index 992b804f59..8f9d6fc318 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
@@ -28,7 +28,7 @@ import time
 import numpy as np
 
 from tensorflow.contrib.factorization.python.ops import clustering_ops
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators.model_fn import ModelFnOps
 from tensorflow.python.framework import ops
@@ -128,7 +128,7 @@ def _kmeans_clustering_model_fn(features, labels, mode, params, config):
        random_seed=params.get('random_seed'),
        kmeans_plus_plus_num_retries=params.get(
            'kmeans_plus_plus_num_retries')).training_graph()
-  incr_step = state_ops.assign_add(variables.get_global_step(), 1)
+  incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
   loss = math_ops.reduce_sum(losses, name=KMeansClustering.LOSS_OP_NAME)
   summary.scalar('loss/raw', loss)
   training_op = with_dependencies([training_op, incr_step], loss)
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py
index f5445ad4e7..37aa8b3396 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@@ -26,7 +26,7 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
@@ -170,7 +170,7 @@ def _linear_model_fn(features, labels, mode, params, config=None):
           weight_collections=[parent_scope])
 
     def _train_op_fn(loss):
-      global_step = contrib_variables.get_global_step()
+      global_step = training_util.get_global_step()
       my_vars = ops.get_collection(parent_scope)
       grads = gradients.gradients(loss, my_vars)
       if gradient_clip_norm:
@@ -252,7 +252,7 @@ def sdca_model_fn(features, labels, mode, params):
     _add_bias_column(feature_columns, features, bias, columns_to_variables)
 
   def _train_op_fn(unused_loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
                                                     weight_column_name,
                                                     loss_type, features,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
index 93c62f87e8..656d68b768 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
@@ -21,7 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib import layers
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn.datasets import base
 from tensorflow.contrib.learn.python.learn.estimators import logistic_regressor
@@ -57,7 +57,7 @@ def _logistic_regression_model_fn(features, labels, mode):
   predictions = math_ops.sigmoid(logits)
   loss = losses.sigmoid_cross_entropy(labels, logits)
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return predictions, loss, train_op
 
 
diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py
index 6af2287761..cb34cb1d26 100644
--- a/tensorflow/contrib/learn/python/learn/utils/export.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export.py
@@ -20,7 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.framework import deprecated
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.session_bundle import exporter
 from tensorflow.contrib.session_bundle import gc
 from tensorflow.python.client import session as tf_session
@@ -78,7 +78,7 @@ def _export_graph(graph, saver, checkpoint_path, export_dir,
           default_graph_signature=default_graph_signature,
           named_graph_signatures=named_graph_signatures,
           assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS))
-      return export.export(export_dir, contrib_variables.get_global_step(),
+      return export.export(export_dir, training_util.get_global_step(),
                            session, exports_to_keep=exports_to_keep)
 
 
@@ -295,7 +295,7 @@ def _export_estimator(estimator,
   checkpoint_path = (checkpoint_path or
                      tf_saver.latest_checkpoint(estimator._model_dir))
   with ops.Graph().as_default() as g:
-    contrib_variables.create_global_step(g)
+    training_util.create_global_step(g)
 
     if use_deprecated_input_fn:
       examples = array_ops.placeholder(dtype=dtypes.string,
diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
index 701fc1c059..05794a42c5 100644
--- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
+++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
@@ -19,7 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib import layers
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.contrib.learn.python.learn.estimators import prediction_key
@@ -154,7 +154,7 @@ def sdca_model_fn(features, labels, mode, params, config=None):
     _add_bias_column(feature_columns, features, bias, columns_to_variables)
 
   def _train_op_fn(unused_loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     sdca_model, train_op = optimizer.get_train_step(
         columns_to_variables, weight_column_name, loss_type, features, labels,
         global_step)
diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index fc9144d5fc..2fb40070cb 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -167,6 +167,7 @@ graphviz, or [in tensorboard](https://codelabs.developers.google.com/codelabs/te
 This frozen Graphdef is now ready to be converted to flatbuffer format (.lite) for use on Android or iOS.  On Android users have the flexibility to use either the float or quantized versions of the frozen graphdef, if available, using the Tensorflow Optimizing Converter tool.
 
 Here is a sample command line to convert the frozen Graphdef to '.lite' format for  The Tensorflow Optimizing Converter supports both float and quantized models, however, different configuration parameters are needed depending on whether a FLOAT or QUANTIZED mode is being used.
+(Here is a link to the pb [file](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz)).
 
 ```
 bazel build tensorflow/contrib/lite/toco:toco
@@ -215,3 +216,7 @@ Note that you'd need to follow instructions for installing TensorFlow on Android
 
 ### For iOS
 Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app.
+
+## Core ML support
+
+Core ML is a machine learning framework used across Apple products. In addition to using Tensorflow Lite models directly in their applications, developers have the option to convert their trained Tensorflow models to the [CoreML](https://developer.apple.com/machine-learning/) format for use on Apple devices. For information on how to use the converter please refer to the [Tensorflow-CoreML converter documentation](https://github.com/tf-coreml/tf-coreml).
diff --git a/tensorflow/contrib/lite/ios_makefile.inc b/tensorflow/contrib/lite/ios_makefile.inc
index 345ed26212..bcff7ed988 100644
--- a/tensorflow/contrib/lite/ios_makefile.inc
+++ b/tensorflow/contrib/lite/ios_makefile.inc
@@ -1,31 +1,47 @@
-#Settings for iOS.
-ifeq($(TARGET), IOS) BUILD_FOR_IOS_SIMULATOR
-    : = false ifeq($(IOS_ARCH), x86_64) BUILD_FOR_IOS_SIMULATOR
-    : = true endif ifeq($(IOS_ARCH), i386) BUILD_FOR_IOS_SIMULATOR
-    : = true endif ifeq($(BUILD_FOR_IOS_SIMULATOR), true) IPHONEOS_PLATFORM
-    : = $(shell xcrun-- sdk iphonesimulator-- show - sdk - platform -
-          path) IPHONEOS_SYSROOT
-    : = $(shell xcrun-- sdk iphonesimulator-- show - sdk -
-          path) else IPHONEOS_PLATFORM
-    : = $(shell xcrun-- sdk iphoneos-- show - sdk - platform -
-          path) IPHONEOS_SYSROOT
-    : = $(shell xcrun-- sdk iphoneos-- show - sdk - path) endif IOS_SDK_VERSION
-    : = $(shell xcrun-- sdk iphoneos-- show - sdk - version) MIN_SDK_VERSION
-    : = 9.0
-#Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64.
-      IOS_ARCH
-    : = x86_64 CXXFLAGS
-      += -miphoneos - version
-         - min = $(MIN_SDK_VERSION) - DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
-                 - fembed - bitcode - Wno - c++ 11 - narrowing - mno - thumb
-                 - fno - exceptions
-                 - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) - O3 CCFLAGS
-      += -miphoneos - version
-         - min = $(MIN_SDK_VERSION) - fembed - bitcode - mno - thumb
-                 - isysroot ${IPHONEOS_SYSROOT} - arch $(IOS_ARCH) -
-                 O3 LDFLAGS
-    : = -fembed - bitcode - miphoneos - version
-        - min = ${MIN_SDK_VERSION} - arch $(IOS_ARCH) OBJDIR
-    : = $(OBJDIR) ios_$(IOS_ARCH) / LIBDIR
-    : = $(LIBDIR) ios_$(IOS_ARCH) / BINDIR
-    : = $(BINDIR) ios_$(IOS_ARCH) / DEPDIR : = $(DEPDIR) ios_$(IOS_ARCH) / endif
+# Settings for iOS.
+ifeq ($(TARGET), IOS)
+        BUILD_FOR_IOS_SIMULATOR := false
+	ifeq ($(IOS_ARCH), x86_64)
+	     	BUILD_FOR_IOS_SIMULATOR := true
+	endif
+	ifeq ($(IOS_ARCH), i386)
+	     	BUILD_FOR_IOS_SIMULATOR := true
+	endif
+	ifeq ($(BUILD_FOR_IOS_SIMULATOR), true)
+		IPHONEOS_PLATFORM := $(shell xcrun --sdk iphonesimulator \
+			--show-sdk-platform-path)
+		IPHONEOS_SYSROOT := $(shell xcrun --sdk iphonesimulator \
+			--show-sdk-path)
+	else
+		IPHONEOS_PLATFORM := $(shell xcrun --sdk iphoneos --show-sdk-platform-path)
+		IPHONEOS_SYSROOT := $(shell xcrun --sdk iphoneos --show-sdk-path)
+	endif
+	IOS_SDK_VERSION := $(shell xcrun --sdk iphoneos --show-sdk-version)
+	MIN_SDK_VERSION := 9.0
+	# Override IOS_ARCH with armv7, armv7s, arm64, i386, or x86_64.
+	IOS_ARCH := x86_64
+	CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
+		-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \
+		-fembed-bitcode \
+		-Wno-c++11-narrowing \
+		-mno-thumb \
+		-fno-exceptions \
+		-isysroot \
+		${IPHONEOS_SYSROOT} \
+		-arch $(IOS_ARCH) \
+		-O3
+	CCFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
+		-fembed-bitcode \
+		-mno-thumb \
+		-isysroot \
+		${IPHONEOS_SYSROOT} \
+		-arch $(IOS_ARCH) \
+		-O3
+	LDFLAGS := -fembed-bitcode \
+		-miphoneos-version-min=${MIN_SDK_VERSION} \
+		-arch $(IOS_ARCH)
+	OBJDIR := $(OBJDIR)ios_$(IOS_ARCH)/
+	LIBDIR := $(LIBDIR)ios_$(IOS_ARCH)/
+	BINDIR := $(BINDIR)ios_$(IOS_ARCH)/
+	DEPDIR := $(DEPDIR)ios_$(IOS_ARCH)/
+endif
diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index bdb5e01538..8066889078 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -108,7 +108,7 @@ enum {
  * The type of operations that can be added to a model.
  */
 enum {
-  /** Adds two tensors, elment-wise.
+  /** Adds two tensors, element-wise.
    *
    * Takes two input tensors of identical type and compatible dimensions. The
    * output is the sum of both input tensors, optionally modified by an
@@ -743,7 +743,7 @@ enum {
    */
   ANEURALNETWORKS_MAX_POOL_2D = 17,
 
-  /** Multiplies two tensors, elment-wise.
+  /** Multiplies two tensors, element-wise.
    *
    * Takes two input tensors of identical type and compatible dimensions. The
    * output is the product of both input tensors, optionally modified by an
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index e2e6c05591..ee84b5b4c8 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -300,7 +300,7 @@ ifeq ($(TARGET),ANDROID)
 	ifeq ($(ANDROID_ARCH),x86_64)
 		TOOLCHAIN := x86_64-4.9
 		SYSROOT_ARCH := x86_64
-		BIN_PREFIX := x86-64-linux-android
+		BIN_PREFIX := x86_64-linux-android
 		MARCH_OPTION :=
 	endif
     
diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py
index 0bc133a00e..96d60e1498 100644
--- a/tensorflow/contrib/nn/__init__.py
+++ b/tensorflow/contrib/nn/__init__.py
@@ -21,6 +21,7 @@
 @@deprecated_flipped_sigmoid_cross_entropy_with_logits
 @@nth_element
 @@rank_sampled_softmax_loss
+@@sampled_sparse_softmax_loss
 @@scaled_softplus
 """
 
diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py
index 98749cff7e..63fc487dca 100644
--- a/tensorflow/contrib/nn/python/ops/sampling_ops.py
+++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py
@@ -24,6 +24,8 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_impl
+from tensorflow.python.ops import nn_ops
 
 
 def _rank_resample(weights, biases, inputs, sampled_values, num_resampled,
@@ -240,3 +242,101 @@ def rank_sampled_softmax_loss(weights,
         remove_accidental_hits=remove_accidental_hits,
         partition_strategy=partition_strategy,
         name=name)
+
+
+def sampled_sparse_softmax_loss(weights,
+                                biases,
+                                labels,
+                                inputs,
+                                num_sampled,
+                                num_classes,
+                                sampled_values=None,
+                                remove_accidental_hits=True,
+                                partition_strategy="mod",
+                                name="sampled_sparse_softmax_loss"):
+  """Computes and returns the sampled sparse softmax training loss.
+
+  This is a faster way to train a softmax classifier over a huge number of
+  classes.
+
+  This operation is for training only.  It is generally an underestimate of
+  the full softmax loss.
+
+  A common use case is to use this method for training, and calculate the full
+  softmax loss for evaluation or inference. In this case, you must set
+  `partition_strategy="div"` for the two losses to be consistent, as in the
+  following example:
+
+  ```python
+  if mode == "train":
+    loss = tf.nn.sampled_sparse_softmax_loss(
+        weights=weights,
+        biases=biases,
+        labels=labels,
+        inputs=inputs,
+        ...,
+        partition_strategy="div")
+  elif mode == "eval":
+    logits = tf.matmul(inputs, tf.transpose(weights))
+    logits = tf.nn.bias_add(logits, biases)
+    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=tf.squeeze(labels),
+        logits=logits)
+  ```
+
+  See our [Candidate Sampling Algorithms Reference]
+  (https://www.tensorflow.org/extras/candidate_sampling.pdf)
+
+  Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
+  ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
+
+  Args:
+    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
+        objects whose concatenation along dimension 0 has shape
+        [num_classes, dim].  The (possibly-sharded) class embeddings.
+    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
+    labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`.
+        The index of the single target class for each row of logits.  Note that
+        this format differs from the `labels` argument of
+        `nn.sparse_softmax_cross_entropy_with_logits`.
+    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
+        activations of the input network.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    num_classes: An `int`. The number of possible classes.
+    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
+        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
+        (if None, we default to `log_uniform_candidate_sampler`)
+    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
+        where a sampled class equals one of the target classes.  Default is
+        True.
+    partition_strategy: A string specifying the partitioning strategy, relevant
+        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
+        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `batch_size` 1-D tensor of per-example sampled softmax losses.
+
+  """
+  logits, _ = nn_impl._compute_sampled_logits(
+      weights=weights,
+      biases=biases,
+      labels=labels,
+      inputs=inputs,
+      num_sampled=num_sampled,
+      num_classes=num_classes,
+      num_true=1,
+      sampled_values=sampled_values,
+      subtract_log_q=True,
+      remove_accidental_hits=remove_accidental_hits,
+      partition_strategy=partition_strategy,
+      name=name)
+
+  # There is only one true label. _compute_sampled_logits puts the true logit
+  # at index 0.
+  labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64)
+
+  sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
+      labels=array_ops.squeeze(labels), logits=logits)
+  # sampled_losses is a [batch_size] tensor.
+  return sampled_losses
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index d2811f21af..9c961f2b9c 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -16,6 +16,7 @@ py_library(
         "__init__.py",
         "python/training/addsign.py",
         "python/training/drop_stale_gradient_optimizer.py",
+        "python/training/elastic_average_optimizer.py",
         "python/training/external_optimizer.py",
         "python/training/lazy_adam_optimizer.py",
         "python/training/moving_average_optimizer.py",
@@ -174,6 +175,24 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "elastic_average_optimizer_test",
+    srcs = ["python/training/elastic_average_optimizer_test.py"],
+    additional_deps = [
+        ":opt_py",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:variables",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:training",
+        "//tensorflow/python:ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "sign_decay_test",
     srcs = ["python/training/sign_decay_test.py"],
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 04643a6058..2025e8b4fc 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+    # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@ from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
 from tensorflow.contrib.opt.python.training.nadam_optimizer import *
 from tensorflow.contrib.opt.python.training.powersign import *
 from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import *
+from tensorflow.contrib.opt.python.training.elastic_average_optimizer import *
 # pylint: enable=wildcard-import
 
 from tensorflow.python.util.all_util import remove_undocumented
@@ -46,6 +47,8 @@ _allowed_symbols = [
     'VariableClippingOptimizer',
     'MultitaskOptimizerWrapper',
     'clip_gradients_by_global_norm',
+    'ElasticAverageOptimizer', 
+    'ElasticAverageCustomGetter'
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
new file mode 100644
index 0000000000..9941f22b1f
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
@@ -0,0 +1,345 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Wrapper optimizer for Elastic Average SGD """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import math_ops
+
+from tensorflow.python.ops import gen_nn_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import constant_op
+
+LOCAL_VARIABLE_NAME = 'local_center_variable'
+GLOBAL_VARIABLE_NAME = 'global_center_variable'
+
+
+class ElasticAverageCustomGetter(object):
+  """Custom_getter class is used to do:
+  1. Change trainable variables to local collection and place them at worker
+    device
+  2. Generate global variables(global center variables)
+  3. Generate local variables(local center variables) which record the global
+    variables and place them at worker device
+    Notice that the class should be used with tf.replica_device_setter,
+    so that the global center variables and global step variable can be placed
+    at ps device. Besides, use 'tf.get_variable' instead of 'tf.Variable' to
+    use this custom getter.
+
+  For example,
+  ea_custom_getter = ElasticAverageCustomGetter(worker_device)
+  with tf.device(
+    tf.train.replica_device_setter(
+      worker_device=worker_device,
+      ps_device="/job:ps/cpu:0",
+      cluster=cluster)),
+    tf.variable_scope('',custom_getter=ea_custom_getter):
+    hid_w = tf.get_variable(
+      initializer=tf.truncated_normal(
+          [IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units],
+          stddev=1.0 / IMAGE_PIXELS),
+      name="hid_w")
+    hid_b = tf.get_variable(initializer=tf.zeros([FLAGS.hidden_units]),
+                            name="hid_b")
+  """
+
+  def __init__(self, worker_device):
+    """Create a new `ElasticAverageCustomGetter`.
+
+    Args:
+      worker_device: String.  Name of the `worker` job.
+    """
+    self._worker_device = worker_device
+    self._local_map = {}
+    self._global_map = {}
+
+  def __call__(self, getter, name, trainable, collections, *args, **kwargs):
+    if trainable:
+      with ops.device(self._worker_device):
+        local_var = getter(name, trainable=True,
+                           collections=[ops.GraphKeys.LOCAL_VARIABLES], 
+                           *args, **kwargs)
+        
+      global_center_variable = variable_scope.variable(
+        name='%s/%s' %
+             (GLOBAL_VARIABLE_NAME,
+              name),
+        initial_value=local_var.initialized_value(),
+        trainable=False,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
+
+      with ops.device(self._worker_device):
+        local_center_variable = variable_scope.variable(
+          name='%s/%s' % (LOCAL_VARIABLE_NAME, name),
+          initial_value=local_var.initialized_value(),
+          trainable=False,
+          collections=[ops.GraphKeys.LOCAL_VARIABLES])
+        
+      self._local_map[local_var] = local_center_variable
+      self._global_map[local_var] = global_center_variable
+      return local_var
+    else:
+      return getter(name, trainable, collections, *args, **kwargs)
+
+
+class ElasticAverageOptimizer(optimizer.Optimizer):
+  """Wrapper optimizer that implements the Elastic Average SGD algorithm.
+  This is an async optimizer. During the training, Each worker will update
+  the local variables and maintains its own local_step, which starts from 0
+  and is incremented by 1 after each update of local variables. Whenever
+  the communication period divides the local step, the worker requests
+  the current global center variables and then computed the elastic difference
+  between global center variables and local variables. The elastic difference
+  then be used to update both local variables and global variables.
+  """
+
+  # Default value as paper described
+  BETA = 0.9
+
+  def __init__(
+      self,
+      opt,
+      num_worker,
+      ea_custom_getter,
+      communication_period=10,
+      moving_rate=None,
+      rho=None,
+      use_locking=True,
+      name="ElasticAverageOptimizer"):
+    """Construct a new gradient descent optimizer.
+
+    Args:
+      opt: The actual optimizer that will be used to update local variables.
+        Must be one of the Optimizer classes.
+      num_worker: The number of workers
+      ea_custom_getter: The ElasticAverageCustomGetter
+      communication_period: An int point value to controls the frequency
+        of the communication between every worker and the ps.
+      moving_rate: A floating point value to control the elastic difference.
+      rho: the amount of exploration we allow ine the model. The default
+        value is moving_rate/learning_rate
+      use_locking: If True use locks for update operations.
+      name: Optional name prefix for the operations created when applying
+        gradients. Defaults to "ElasticAverageOptimizer".
+    """
+    super(ElasticAverageOptimizer, self).__init__(use_locking, name)
+    self._opt = opt
+    self._num_worker = num_worker
+    self._period = communication_period
+    self._local_map = ea_custom_getter._local_map
+    self._global_map = ea_custom_getter._global_map
+
+    if moving_rate is None:
+      self._moving_rate = BETA / communication_period / num_worker
+    else:
+      self._moving_rate = moving_rate
+    if rho is None:
+      self._rho = self._moving_rate / self._opt._learning_rate
+    else:
+      self._rho = rho
+
+    self._local_step = variable_scope.get_variable(
+      initializer=0,
+      trainable=False,
+      collections=[ops.GraphKeys.LOCAL_VARIABLES],
+      name="local_step")
+    self._opt._prepare()
+
+  def compute_gradients(self, loss, var_list=None,
+                        gate_gradients=optimizer.Optimizer.GATE_OP,
+                        aggregation_method=None,
+                        colocate_gradients_with_ops=False,
+                        grad_loss=None):
+    """Compute gradients of `loss` for the variables in `var_list`.
+    
+    Add rho*elastic_difference to loss to control the exploration
+    This is the first part of `minimize()`.  It returns a list
+    of (gradient, variable) pairs where "gradient" is the gradient
+    for "variable".  Note that "gradient" can be a `Tensor`, an
+    `IndexedSlices`, or `None` if there is no gradient for the
+    given variable.
+
+    Args:
+      loss: A Tensor containing the value to minimize.
+      var_list: Optional list or tuple of `tf.Variable` to update to minimize
+        `loss`.  Defaults to the list of variables collected in the graph
+        under the key `GraphKey.TRAINABLE_VARIABLES`.
+      gate_gradients: How to gate the computation of gradients.  Can be
+        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
+      aggregation_method: Specifies the method used to combine gradient terms.
+        Valid values are defined in the class `AggregationMethod`.
+      colocate_gradients_with_ops: If True, try colocating gradients with
+        the corresponding op.
+      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
+
+    Returns:
+      A list of (gradient, variable) pairs. Variable is always present, but
+      gradient can be `None`.
+
+    Raises:
+      TypeError: If `var_list` contains anything else than `Variable` objects.
+      ValueError: If some arguments are invalid.
+    """
+    if not var_list:
+      var_list = variables.trainable_variables()
+      
+    elastic_difference = [math_ops.subtract(v, lv) for v, lv in zip(
+      variables.trainable_variables(),
+      [self._local_map[var] for var in var_list])]
+
+    distance_loss = self._rho * math_ops.add_n(
+                      [gen_nn_ops.l2_loss(ed) for ed in elastic_difference])
+
+    total_loss = loss + distance_loss
+    return self._opt.compute_gradients(total_loss, var_list,
+                                       gate_gradients, aggregation_method,
+                                       colocate_gradients_with_ops, grad_loss)
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    """Apply gradients to global variables.
+
+    This is the second part of `minimize()`. It returns an `Operation` that
+    applies gradients.
+
+    Args:
+      grads_and_vars: List of (gradient, variable) pairs as returned by
+        `compute_gradients()`.
+      global_step: Optional `Variable` to increment by one after the
+        variables have been updated.
+      name: Optional name for the returned operation.  Default to the
+        name passed to the `Optimizer` constructor.
+
+    Returns:
+      An `Operation` that applies the specified gradients. If `global_step`
+      was not None, that operation also increments `global_step`.
+
+    Raises:
+      TypeError: If `grads_and_vars` is malformed.
+      ValueError: If none of the variables have gradients.
+    """
+    apply_updates = self._opt.apply_gradients(grads_and_vars)
+    with ops.control_dependencies([apply_updates]):
+      local_update = state_ops.assign_add(
+        self._local_step, 1, name='local_step_update').op
+
+    # update global variables.
+    def _Update_global_variables():
+      local_vars = [v for g, v in grads_and_vars if g is not None]
+      global_center_vars = [self._global_map[var] for var in local_vars]
+      local_center_vars = [self._local_map[var] for var in local_vars]
+      local_center_vars_update = []
+      for lvar, var in zip(local_center_vars, global_center_vars):
+        local_center_vars_update.append(lvar.assign(var))
+      update_ops = []
+      differences = []
+      with ops.control_dependencies(local_center_vars_update):
+        for v, lv in zip(local_vars, local_center_vars):
+          with ops.device(v.device):
+            differences.append(math_ops.subtract(v, lv))
+        for lvar, diff in zip(local_vars, differences):
+          with ops.device(lvar.device):
+            update_ops.append(state_ops.assign_sub(lvar, math_ops.multiply(
+              self._moving_rate, diff)))
+        for var, diff in zip(global_center_vars, differences):
+          with ops.device(var.device):
+            update_ops.append(state_ops.assign_add(var, math_ops.multiply(
+              self._moving_rate, diff)))
+        if global_step:
+          with ops.colocate_with(global_step):
+            update_ops.append(state_ops.assign_add(global_step, 1))
+      variable_update = control_flow_ops.group(*(update_ops))
+      return variable_update
+
+    with ops.control_dependencies([local_update]):
+      condition = math_ops.equal(math_ops.mod(
+        self._local_step, self._period), 0)
+      conditional_update = control_flow_ops.cond(
+        condition, _Update_global_variables, control_flow_ops.no_op)
+    return conditional_update
+
+  def get_init_op(self, task_index):
+    """Returns the op to let all the local variables and local center
+    variables equal to the global center variables before the training begins"""
+
+    def _Add_sync_queues_and_barrier(enqueue_after_list):
+      """Adds ops to enqueu on all worker queues"""
+      sync_queues = [
+        data_flow_ops.FIFOQueue(self._num_worker, [dtypes.bool], shapes=[[]],
+                                shared_name='%s%s' % (
+                                  'variable_init_sync_queue', i)) for i in
+        range(self._num_worker)]
+      queue_ops = []
+      # For each other worker, add an entry in a queue
+      token = constant_op.constant(False)
+      with ops.control_dependencies(enqueue_after_list):
+        for i, q in enumerate(sync_queues):
+          if i == task_index:
+            queue_ops.append(control_flow_ops.no_op())
+          else:
+            queue_ops.append(q.enqueue(token))
+      queue_ops.append(
+        sync_queues[task_index].dequeue_many(len(sync_queues) - 1))
+      return control_flow_ops.group(*queue_ops)
+
+    init_ops = []
+    local_vars = variables.trainable_variables()
+    global_center_vars = [self._global_map[var] for var in local_vars]
+    local_center_vars = [self._local_map[var] for var in local_vars]
+    if not (local_vars and global_center_vars and local_center_vars):
+      raise ValueError(
+        'The lists of local_variables, global_center_variables, '
+        'local_center_variables should not be empty  ')
+    for lvar, gc_var, lc_var in zip(
+        local_vars, global_center_vars, local_center_vars):
+      init_ops.append(state_ops.assign(lvar, gc_var))
+      init_ops.append(state_ops.assign(lc_var, gc_var))
+
+    init_op = control_flow_ops.group(*(init_ops))
+    sync_queue_op = _Add_sync_queues_and_barrier([init_op])
+    return sync_queue_op
+
+  def make_session_run_hook(self, is_chief, task_index):
+    """Creates a hook to handle ElasticAverageOptimizerHook ops such as initialization."""
+    return _ElasticAverageOptimizerHook(self, is_chief, task_index)
+
+
+class _ElasticAverageOptimizerHook(session_run_hook.SessionRunHook):
+  def __init__(self, ea_optimizer, is_chief, task_index):
+    """Creates hook to handle ElasticAverageOptimizer initialization ops.
+
+    Args:
+      ea_optimizer: `ElasticAverageOptimizer` which this hook will initialize.
+      is_chief: `Bool`, whether is this a chief replica or not.
+    """
+    self._ea_optimizer = ea_optimizer
+    self._is_chief = is_chief
+    self._task_index = task_index
+
+  def begin(self):
+    self._local_init_op = variables.local_variables_initializer()
+    self._global_init_op = None
+    if self._is_chief:
+      self._global_init_op = variables.global_variables_initializer()
+    self._variable_init_op = self._ea_optimizer.get_init_op(self._task_index)
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
new file mode 100644
index 0000000000..59e55fceee
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
@@ -0,0 +1,225 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ElasticAverageOptimizer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import portpicker
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import server_lib
+from tensorflow.python.training import training
+from tensorflow.python.training import training_util
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import device_setter
+
+from tensorflow.contrib.opt.python.training.elastic_average_optimizer import \
+  ElasticAverageOptimizer, ElasticAverageCustomGetter, GLOBAL_VARIABLE_NAME
+
+
+def create_local_cluster(num_workers, num_ps, protocol="grpc"):
+  """Create local GRPC servers and return them."""
+  worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
+  ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
+  cluster_dict = {
+    "worker": ["localhost:%s" % port for port in worker_ports],
+    "ps": ["localhost:%s" % port for port in ps_ports]
+  }
+  cs = server_lib.ClusterSpec(cluster_dict)
+
+  workers = [
+    server_lib.Server(
+      cs, job_name="worker", protocol=protocol, task_index=ix, start=True)
+    for ix in range(num_workers)
+  ]
+  ps_servers = [
+    server_lib.Server(
+      cs, job_name="ps", protocol=protocol, task_index=ix, start=True)
+    for ix in range(num_ps)
+  ]
+
+  return cluster_dict, workers, ps_servers
+
+
+# Creates the workers and return their sessions, graphs, train_ops.
+# Cheif worker will update at last
+def _get_workers(num_workers, period, workers, moving_rate):
+  sessions = []
+  graphs = []
+  train_ops = []
+  for worker_id in range(num_workers):
+    graph = ops.Graph()
+    is_chief = (worker_id == 0)
+    with graph.as_default():
+      worker_device = "/job:worker/task:%d/cpu:0" % (worker_id)
+      ea_coustom = ElasticAverageCustomGetter(
+        worker_device=worker_device)
+      with variable_scope.variable_scope('',
+                                         custom_getter=ea_coustom), ops.device(
+        device_setter.replica_device_setter(worker_device=worker_device,
+                                            ps_device="/job:ps/task:0/cpu:0",
+                                            ps_tasks=1)):
+        global_step = variables.Variable(0, name='global_step',
+                                         trainable=False)
+        var_0 = variable_scope.get_variable(initializer=0.0, name="v0")
+        var_1 = variable_scope.get_variable(initializer=1.0, name="v1")
+
+      with ops.device("/job:worker/task:" + str(worker_id)):
+        grads_0 = constant_op.constant(-1.0)
+        grads_1 = constant_op.constant(-1.0)
+
+        sgd_opt = gradient_descent.GradientDescentOptimizer(1.0)
+        opt = ElasticAverageOptimizer(
+          opt=sgd_opt,
+          num_worker=num_workers,
+          moving_rate=moving_rate,
+          communication_period=period,
+          ea_custom_getter=ea_coustom
+        )
+        train_op = [
+          opt.apply_gradients(
+            ([grads_0, var_0],
+             [grads_1, var_1]), global_step)
+        ]
+        easgd_hook = opt.make_session_run_hook(is_chief, worker_id)
+      # Creates MonitoredSession
+      sess = training.MonitoredTrainingSession(workers[worker_id].target,
+                                               hooks=[easgd_hook])
+
+    sessions.append(sess)
+    graphs.append(graph)
+    train_ops.append(train_op)
+
+  return sessions, graphs, train_ops
+
+
+class ElasticAverageOptimizerTest(test.TestCase):
+  def _run(self, train_op, sess):
+    sess.run(train_op)
+
+  def test1Workers2Period(self):
+    num_workers = 1
+    communication_period = 2
+    num_ps = 1
+    cluster, workers, _ = create_local_cluster(num_workers=num_workers,
+                                               num_ps=num_ps)
+
+    sessions, graphs, train_ops = _get_workers(num_workers,
+                                               communication_period,
+                                               workers, 1.0)
+
+    var_0 = graphs[0].get_tensor_by_name('v0:0')
+    var_1 = graphs[0].get_tensor_by_name('v1:0')
+    global_step = training_util.get_global_step(graphs[0])
+    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
+    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
+    # Verify the initialized value.
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(0, sessions[0].run(global_step))
+
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(1.0, sessions[0].run(var_0))
+    self.assertAllEqual(2.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(0, sessions[0].run(global_step))
+
+    # iteration 2, global varibale update
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(2.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(3.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(1, sessions[0].run(global_step))
+
+    # iteration 3
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(1.0, sessions[0].run(var_0))
+    self.assertAllEqual(2.0, sessions[0].run(var_1))
+    self.assertAllEqual(2.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(3.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(1, sessions[0].run(global_step))
+
+  def test2Worker1Period(self):
+    num_workers = 2
+    communication_period = 1
+    num_ps = 2
+    cluster, workers, _ = create_local_cluster(num_workers=num_workers,
+                                               num_ps=num_ps)
+
+    sessions, graphs, train_ops = _get_workers(num_workers,
+                                               communication_period,
+                                               workers, 0.5)
+
+    var_0 = graphs[0].get_tensor_by_name('v0:0')
+    var_1 = graphs[0].get_tensor_by_name('v1:0')
+
+    var_0_1 = graphs[1].get_tensor_by_name('v0:0')
+    var_1_1 = graphs[1].get_tensor_by_name('v1:0')
+
+    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
+    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
+    # Verify the initialized value.
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[1].run(var_0_1))
+    self.assertAllEqual(1.0, sessions[1].run(var_1_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+
+    sessions[0].run(train_ops[0])
+    sessions[1].run(train_ops[1])
+
+    self.assertAllEqual(0.5, sessions[0].run(var_0))
+    self.assertAllEqual(1.5, sessions[0].run(var_1))
+    self.assertAllEqual(0.75, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.75, sessions[0].run(var_1_g))
+    self.assertAllEqual(0.75, sessions[1].run(var_0_1))
+    self.assertAllEqual(1.75, sessions[1].run(var_1_1))
+
+  def testPS2TasksWithClusterSpecClass(self):
+    cluster_spec = server_lib.ClusterSpec({
+      "ps": ["ps0:2222", "ps1:2222"],
+      "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]
+    })
+    ea_coustom = ElasticAverageCustomGetter(
+      worker_device="/job:worker/task:0")
+    from tensorflow.python.training import device_setter
+    with ops.device(
+        device_setter.replica_device_setter(cluster=cluster_spec,
+                                            worker_device="/job:worker/task:0",
+                                            ps_device="/job:ps")), \
+         variable_scope.variable_scope('', custom_getter=ea_coustom):
+      v = variable_scope.get_variable(initializer=[1, 2], name="v")
+      w = variable_scope.get_variable(initializer=[2, 1], name='w')
+      v_g, w_g = ea_coustom._global_map[v],ea_coustom._global_map[w]
+      self.assertDeviceEqual("/job:worker/task:0", v.device)
+      self.assertDeviceEqual("job:ps/task:0", v_g.device)
+      self.assertDeviceEqual("/job:worker/task:0", w.device)
+      self.assertDeviceEqual("job:ps/task:1", w_g.device)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index 84fcf733c1..63155faf1e 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -39,6 +39,9 @@ from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
+from tensorflow.python.framework import test_util
+from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
+
 
 # pylint: enable=protected-access
 Linear = core_rnn_cell._Linear  # pylint: disable=invalid-name
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 5a6d287c68..c6b1316043 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope as vs
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
diff --git a/tensorflow/contrib/seq2seq/python/ops/helper.py b/tensorflow/contrib/seq2seq/python/ops/helper.py
index b55d90cbab..dec03ce43f 100644
--- a/tensorflow/contrib/seq2seq/python/ops/helper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/helper.py
@@ -223,8 +223,7 @@ class TrainingHelper(Helper):
 
   def sample(self, time, outputs, name=None, **unused_kwargs):
     with ops.name_scope(name, "TrainingHelperSample", [time, outputs]):
-      sample_ids = math_ops.cast(
-          math_ops.argmax(outputs, axis=-1), dtypes.int32)
+      sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
       return sample_ids
 
   def next_inputs(self, time, outputs, state, name=None, **unused_kwargs):
@@ -540,8 +539,7 @@ class GreedyEmbeddingHelper(Helper):
     if not isinstance(outputs, ops.Tensor):
       raise TypeError("Expected outputs to be a single Tensor, got: %s" %
                       type(outputs))
-    sample_ids = math_ops.cast(
-        math_ops.argmax(outputs, axis=-1), dtypes.int32)
+    sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
     return sample_ids
 
   def next_inputs(self, time, outputs, state, sample_ids, name=None):
diff --git a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
index a781c647a1..c42c7b3391 100644
--- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
+++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
@@ -62,7 +62,9 @@ class DatasetDataProvider(data_provider.DataProvider):
                seed=None,
                scope=None):
     """Creates a DatasetDataProvider.
-
+    Note: if `num_epochs` is not `None`,  local counter `epochs` will be created
+    by relevant function. Use `local_variables_initializer()` to initialize
+    local variables.
     Args:
       dataset: An instance of the Dataset class.
       num_readers: The number of parallel readers to use.
diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py
index 5896fc2a20..f0330bfbbd 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/head.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/head.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import re
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 
 from tensorflow.contrib.timeseries.python.timeseries import feature_keys
@@ -79,7 +79,7 @@ class _TimeSeriesRegressionHead(head_lib._Head):  # pylint:disable=protected-acc
 
     train_op = optimizers.optimize_loss(
         model_outputs.loss,
-        global_step=variables.get_global_step(),
+        global_step=training_util.get_global_step(),
         optimizer=self.optimizer,
         # Learning rate is set in the Optimizer object
         learning_rate=None)
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index c51b172066..ee9988f0b7 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -348,7 +348,7 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
                                  kernel);
   }
 
-  // Try to instantiate this function for the func/attr. Maybe its
+  // Try to instantiate this function for the func/attr. Maybe it's
   // cached already.
   Handle handle;
   TF_RETURN_IF_ERROR(Instantiate(ndef.op(), AttrSlice(&ndef.attr()), &handle));
diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index edd952b824..8514d7c474 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -46,7 +46,7 @@ struct bfloat16 {
   EIGEN_DEVICE_FUNC bfloat16() {}
 
   EIGEN_DEVICE_FUNC explicit bfloat16(const float v) {
-    if (isnan(v)) {
+    if (Eigen::numext::isnan(v)) {
       value = NAN_VALUE;
       return;
     }
diff --git a/tensorflow/core/framework/tensor_shape_test.cc b/tensorflow/core/framework/tensor_shape_test.cc
index 06c576c7d4..d8a9c0bac5 100644
--- a/tensorflow/core/framework/tensor_shape_test.cc
+++ b/tensorflow/core/framework/tensor_shape_test.cc
@@ -359,7 +359,8 @@ Status TensorShapeOld::IsValidShape(const TensorShapeProto& proto) {
   for (const auto& d : proto.dim()) {
     if (d.size() < 0) {
       return errors::InvalidArgument("Shape ", DebugString(proto),
-                                     " has negative dimensions");
+                                     " has negative dimensions; ",
+                                     "perhaps an un-fed placeholder?");
     }
     num_elements *= d.size();
     if (num_elements > kMaxElements) {
diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index 880e4e712e..3df981437a 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -76,12 +76,12 @@ namespace tensorflow {
 namespace mkl_op_registry {
   static const char* kMklOpLabel = "MklOp";
   static const char* kMklOpLabelPattern = "label='MklOp'";
+  // Prefix that we add to Tensorflow op name to construct Mkl op name.
+  static const char* const kMklOpPrefix = "_Mkl";
 
   // Get the name of Mkl op from original TensorFlow op
   // We prefix 'Mkl' to the original op to get Mkl op.
   inline string GetMklOpName(const string& name) {
-    // Prefix that we add to Tensorflow op name to construct Mkl op name.
-    const char* const kMklOpPrefix = "_Mkl";
     return string(kMklOpPrefix) + name;
   }
 
@@ -94,9 +94,6 @@ namespace mkl_op_registry {
     string kernel = KernelsRegisteredForOp(op_name);
     bool result =
         kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
-    if (result) {
-      VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
-    }
     return result;
   }
 
@@ -112,15 +109,12 @@ namespace mkl_op_registry {
     if (!IsMklOp(op_name, T)) {
       return false;
     }
-
     bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
                     0 == op_name.compare(GetMklOpName("Sub")) ||
                     0 == op_name.compare(GetMklOpName("Mul")) ||
                     0 == op_name.compare(GetMklOpName("Maximum")) ||
                     0 == op_name.compare(GetMklOpName("SquaredDifference")));
 
-    VLOG(1) << "mkl_op_registry::" << op_name
-            << " is elementwise MKL op: " << result;
     return result;
   }
 }  // namespace mkl_op_registry
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 912075aa28..3beca1e5d2 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -42,6 +42,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+#ifndef INTEL_MKL_DNN
+
 // This pass implements rewriting of graph to support following scenarios:
 // (A) Merging nodes in the graph
 // (B) Rewriting a node in the graph to a new node
@@ -2213,6 +2215,2087 @@ Status MklLayoutRewritePass::Run(
   return Status::OK();
 }
 
+#else  // INTEL_MKL_DNN
+
+// This pass implements rewriting of graph to support following scenarios:
+// (A) Merging nodes in the graph
+// (B) Rewriting a node in the graph to a new node
+//     Rewrite happens under following scenario:
+//     - Propagating Mkl layout as an additional output tensor
+//        (we will loosely call a tensor that carries Mkl layout as Mkl tensor
+//         henceforth.) from every Mkl supported NN layer.
+//
+// Example of A : Merging nodes in the graph
+// -----------------------------------------
+// Currently, we merge Conv2D+AddBias together. Consider Conv2D and BiasAdd as:
+//
+//           O = Conv2D(A, B)
+//           P = BiasAdd(O, C)
+//
+// We merge them into Conv2DWithBias as:
+//           P = _MklConv2DWithBias(A, A_m, B, B_m, C, C_m)
+//
+// The meaning of A_m, B_m and C_m is explained in B.1.
+//
+// Merge rules:
+//  - The merge for Conv2D and BiasAdd happens when the output of Conv2D _only_
+//    goes to BiasAdd.
+//  - Also, the intersection of attributes of both the nodes must have same
+//    values.
+//  - Both the nodes must have been assigned to same device (if any).
+//
+// Example of B.1 : Rewriting nodes to Mkl nodes
+// ---------------------------------------------
+// Consider a Relu node. Current definition of Relu node looks like:
+//
+//           O = Relu(A)
+//
+// Relu has 1 input (A), and 1 output (O).
+//
+// This rewrite pass will generate a new graph node for Relu (new node is
+// called MklRelu) as:
+//
+//          O, O_m = MklRelu(A, A_m)
+//
+// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m). Here input A is
+// same as input A of Relu; output O is same as output O of Relu. O_m is the
+// additional output tensor that will be set by MklRelu, and it represents
+// Mkl tensor corresponding to O -- in other words, O_m is some kind of
+// metadata for O. A_m is additional input of Relu, and it represents metadata
+// for A - as O_m is metadata for O, A_m is metadata for A. MklRelu receives
+// this metadata from previous node in the graph.
+//
+// When a previous node in the graph is an Mkl node, A_m will represent a valid
+// Mkl tensor. But when a previous node is not an Mkl node, A_m will represent
+// a dummy Mkl tensor.
+//
+// Rewriting rules:
+//  - Selection of a node for rewriting happens by registering the op type of
+//    the node with the rewriting pass. If the op type is not registered, then
+//    all nodes of this op type will not be rewritten.
+//  - Number of inputs after rewriting:
+//      Since for every input Tensorflow tensor, the rewritten node gets Mkl
+//      tensor(s), rewritten node gets 2*N inputs, where N is the number of
+//      inputs for the original node.
+//  - Number of outputs after rewriting:
+//      Since for every output Tensorflow tensor, the rewritten node generates
+//      Mkl tensor(s), the rewritten node generates 2*N outputs, where N is the
+//      number of outputs of the original node.
+//  - Ordering of Tensorflow tensors and Mkl tensors:
+//      Since every rewritten node generates twice the number of inputs and
+//      outputs, one could imagine various orderings among Tensorflow tensors
+//      and Mkl tensors. E.g., assume an op 'Conv2D' that takes (A, B) as
+//      inputs, then the new op '_MklConv2D' can take inputs A, B, A_m and B_m
+//      in A, A_m, B, B_m order or it can also take them in A, B, A_m, B_m
+//      order. Among N inputs one can get N! permutations.
+//
+//      So the question is: which order do we follow? We support 2 types of
+//      orderings: (1) interleaved, and (2) contiguous. Interleaved ordering
+//      follows an intuitive order where an Mkl tensor follows the
+//      corresponding Tensorflow tensor immediately. In the context of the
+//      above example, it will be: A, A_m, B, B_m. Note that the ordering rule
+//      applies to both the inputs and outputs. Contiguous ordering means
+//      all the Tensorflow tensors are contiguous followed by all the Mkl
+//      tensors. We use contiguous ordering as default.
+//
+// Graph rewrite algorithm:
+//      Algorithm: Graph Rewrite
+//      Input: Graph G, Names of the nodes to rewrite and their new names
+//      Output: Modified Graph G' if the nodes are modified, G otherwise.
+//      Start:
+//        N = Topological_Sort(G) // N is a set of nodes in toposort order.
+//        foreach node n in N
+//        do
+//          if (Is_MKL_Op(n))  // Can this node accept an Mkl layout as input.
+//          then
+//            E = set of <incoming edge and its src_output slot> of n
+//            E' = {}   // a new set of edges for rewritten node
+//            foreach <e,s> in E
+//            do
+//              E' U {<e,s>}  // First copy edge which generates Tensorflow
+//                            // tensor as it is
+//              m = Source node of edge e
+//              if Is_Rewritten(m)  // Did we rewrite this node in this pass?
+//              then
+//                E' U {<m,s+1>}    // If yes, then m will generate an Mkl
+//                                  // tensor as an additional output.
+//              else
+//                d = Generate_Dummy_Mkl_Tensor()  // If not, generate a dummy
+//                                                 // Mkl tensor.
+//                E' U {<d,0>}  // The dummy Mkl tensor has only 1 output slot.
+//              fi
+//            done
+//            n' = Build_New_Node(G,new_name,E')
+//            Mark_Rewritten(n')  // Mark the new node as being rewritten.
+//          fi
+//        done
+//
+//      Explanation:
+//        For graph rewrite, we visit nodes of the input graph in the
+//        topological sort order. With this ordering, we visit nodes in the
+//        top-to-bottom fashion. We need this order because while visiting a
+//        node we want that all of its input nodes are visited and rewritten if
+//        applicable. This is because if we need to rewrite a given node
+//        then all of its input nodes need to be fixed (in other words they
+//        cannot be deleted later.)
+//
+//        While visiting a node, we first check if the op type of the node is
+//        an Mkl op. If it is, then we rewrite that node after constructing
+//        new inputs to the node. If the op type of the node is not Mkl op,
+//        then we do not rewrite that node.
+//
+// Handling workspace propagation for certain ops:
+//
+//        Certain backward ops in MKL (MaxPool, LRN and BatchNorm) require
+//        passing of a workspace from their respective forward ops. Workspace
+//        tensors provide memory for storing results of intermediate operations
+//        which are helpful in backward propagation. TensorFlow does not have
+//        a notion of a workspace and as a result does not allow producing
+//        additional outputs from these forward ops. For these ops, we need
+//        to add 2 extra edges between forward ops and their corresponding
+//        backward ops - the first extra edge carries a workspace tensor and
+//        the second one carries an Mkl tensor for the workspace tensor.
+//
+//        Example:
+//
+//        Typical graph for MaxPool and its gradient looks like:
+//
+//        A = MaxPool(T)
+//        B = MaxPoolGrad(X, A, Y)
+//
+//        We will transform this graph to propagate the workspace as:
+//        (with the contiguous ordering)
+//
+//        A, W, A_m, W_m = MklMaxPool(T, T_m)
+//        B, B_m = MklMaxPoolGrad(X, A, Y, W, X_m, A_m, Y_m, W_m)
+//
+//        Here W is the workspace tensor. Transformed tensor names with the
+//        suffix _m are Mkl tensors, and this transformation has been done
+//        using the algorithm discussed earlier. The transformation for
+//        workspace propagation only adds extra outputs (W, W_m) for a forward
+//        op and connects them to the corresponding backward ops.
+//
+//        Terms:
+//
+//        Forward op name = name of the op in the forward pass
+//          where a workspace tensor originates (MaxPool in this example)
+//        Backward op name = name of the op in the backward pass that receives
+//          a workspace tensor from the forward op (MaxPoolGrad in the example)
+//        Slot = Position of the output or input slot that will be
+//               used by the workspace tensor (1 for MklMaxPool as W is the 2nd
+//               output of MaxPool (0 is 1st); 3 for MklMaxPoolGrad)
+//
+//        Question:
+//
+//        How do we associate a backward op to a forward op? There can be more
+//        than one op with the exact same name.
+//
+//        In this example, we associate MaxPoolGrad with MaxPool. But there
+//        could be more than one MaxPool ops. To solve this problem, we look
+//        for _direct_ edge between a forward op and a backward op (tensor A is
+//        flowing along this edge in the example).
+//
+//        How do we transform forward and backward ops when there is no direct
+//        edge between them? In such a case, we generate dummy tensors for
+//        workspace tensors. For the example, transformation of MaxPool will
+//        be exactly same as it would be when there is a direct edge between
+//        the forward and the backward op --- it is just that MaxPool won't
+//        generate any workspace tensor. For MaxPoolGrad, the transformation
+//        will also be same, but instead of connecting W and W_m with the
+//        outputs of MaxPool, we will produce dummy tensors for them, and we
+//        will set workspace_enabled attribute to false.
+//
+class MklLayoutRewritePass : public GraphOptimizationPass {
+ public:
+  MklLayoutRewritePass() {
+    // NOTE: names are alphabetically sorted.
+    csinfo_.addn = "AddN";
+    csinfo_.avg_pool = "AvgPool";
+    csinfo_.avg_pool_grad = "AvgPoolGrad";
+    csinfo_.bias_add = "BiasAdd";
+    csinfo_.bias_add_grad = "BiasAddGrad";
+    csinfo_.concat = "Concat";
+    csinfo_.concatv2 = "ConcatV2";
+    csinfo_.conv2d = "Conv2D";
+    csinfo_.conv2d_with_bias = "__MklDummyConv2DWithBias";
+    csinfo_.conv2d_grad_input = "Conv2DBackpropInput";
+    csinfo_.conv2d_grad_filter = "Conv2DBackpropFilter";
+    csinfo_.conv2d_grad_filter_with_bias =
+                              "__MklDummyConv2DBackpropFilterWithBias";
+    csinfo_.fused_batch_norm = "FusedBatchNorm";
+    csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad";
+    csinfo_.identity = "Identity";
+    csinfo_.lrn = "LRN";
+    csinfo_.lrn_grad = "LRNGrad";
+    csinfo_.matmul = "MatMul";
+    csinfo_.max_pool = "MaxPool";
+    csinfo_.max_pool_grad = "MaxPoolGrad";
+    csinfo_.mkl_conv2d = "_MklConv2D";
+    csinfo_.mkl_conv2d_grad_input = "_MklConv2DBackpropInput";
+    csinfo_.mkl_conv2d_grad_filter = "_MklConv2DBackpropFilter";
+    csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias";
+    csinfo_.mkl_conv2d_grad_filter_with_bias =
+                                   "_MklConv2DBackpropFilterWithBias";
+    csinfo_.relu = "Relu";
+    csinfo_.relu_grad = "ReluGrad";
+    csinfo_.tanh       = "Tanh";
+    csinfo_.tanh_grad  = "TanhGrad";
+    csinfo_.reshape = "Reshape";
+    csinfo_.softmax = "Softmax";
+    csinfo_.split = "Split";
+    // Element-wise ops. Ensure you also add any new ops to IsOpElementWise
+    // in the MklUtil.h (IsMklElementWiseOp method) to ensure that the
+    // MklInputConversion op is added before it.
+    csinfo_.add = "Add";
+    csinfo_.maximum = "Maximum";
+    csinfo_.mul = "Mul";
+    csinfo_.squared_difference = "SquaredDifference";
+    csinfo_.sub = "Sub";
+    // End - element-wise ops. See note above.
+
+    // NOTE: names are alphabetically sorted.
+    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
+                      CopyAttrsAddN, AddNRewrite});
+    rinfo_.push_back({csinfo_.add,
+                      mkl_op_registry::GetMklOpName(csinfo_.add),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.avg_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.avg_pool_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.concat,
+                      mkl_op_registry::GetMklOpName(csinfo_.concat),
+                      CopyAttrsConcat, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.concatv2,
+                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
+                      CopyAttrsConcatV2, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_with_bias,
+                      csinfo_.mkl_conv2d_with_bias,
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_filter,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_filter_with_bias,
+                      csinfo_.mkl_conv2d_grad_filter_with_bias,
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_input,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.fused_batch_norm,
+                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
+                      CopyAttrsFusedBatchNorm, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.fused_batch_norm_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
+                      CopyAttrsFusedBatchNorm, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.identity,
+                      mkl_op_registry::GetMklOpName(csinfo_.identity),
+                      CopyAttrsDataType, AlwaysRewrite});
+    /*
+    rinfo_.push_back({csinfo_.lrn,
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn),
+                      CopyAttrsLRN, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.lrn_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
+                      CopyAttrsLRN, AlwaysRewrite});
+    */
+    rinfo_.push_back({csinfo_.max_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
+                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
+    rinfo_.push_back({csinfo_.max_pool_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.maximum,
+                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.mul,
+                      mkl_op_registry::GetMklOpName(csinfo_.mul),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.relu,
+                      mkl_op_registry::GetMklOpName(csinfo_.relu),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.relu_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.tanh,
+                      mkl_op_registry::GetMklOpName(csinfo_.tanh),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.tanh_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.tanh_grad),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.reshape,
+                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
+                      CopyAttrsReshape, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.softmax,
+                      mkl_op_registry::GetMklOpName(csinfo_.softmax),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.squared_difference,
+                      mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.sub,
+                      mkl_op_registry::GetMklOpName(csinfo_.sub),
+                      CopyAttrsDataType, AlwaysRewrite});
+
+    // Add info about which ops to add workspace edge to and the slots.
+    wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
+    wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3});
+
+    // Add a rule for merging nodes
+    minfo_.push_back({csinfo_.conv2d, csinfo_.bias_add,
+                      csinfo_.conv2d_with_bias,
+                      GetConv2DOrBiasAdd});
+
+    minfo_.push_back({csinfo_.conv2d_grad_filter, csinfo_.bias_add_grad,
+                      csinfo_.conv2d_grad_filter_with_bias,
+                      GetConv2DBackpropFilterOrBiasAddGrad});
+  }
+
+  // Standard interface to run pass
+  Status Run(const GraphOptimizationPassOptions& options);
+
+  // Helper function which does most of heavy lifting for rewriting
+  // Mkl nodes to propagate Mkl tensor as additional output
+  //
+  // Extracts common functionality between Run public interface and
+  // test interface.
+  //
+  // @return true, if and only if graph is mutated; false otherwise.
+  bool RunPass(std::unique_ptr<Graph>* g);
+
+  /// Structure to specify the name of an original node, its new name after
+  /// rewrite, the number of inputs to the original node, the function to
+  /// be used to copy attributes for the op, and the rule (if any) which
+  /// must hold for rewriting the node
+  typedef struct {
+    string name;      // Original name of op of the node in the graph
+    string new_name;  // New name of the op of the node in the graph
+    // A function handler to copy attributes from an old node to a new node.
+    std::function<void(const Node*, NodeBuilder*)> copy_attrs;
+    // A rule under which to rewrite this node
+    std::function<bool(const Node*)> rewrite_rule;
+  } RewriteInfo;
+
+  /// Structure to specify a forward op, a backward op, and the slot numbers
+  /// in the forward and backward ops where we will add a workspace edge.
+  typedef struct {
+    string fwd_op;    // Name of a forward op in the graph
+    string bwd_op;    // Name of a backward op in the graph
+    int fwd_slot;     // Output slot in the forward op node where actual
+                      // output tensor resides
+    int bwd_slot;     // Input slot in the backward op node where actual
+                      // input tensor resides
+    int ws_fwd_slot;  // Output slot in the forward op node where workspace
+                      // edge is added
+    int ws_bwd_slot;  // Input slot in the backward op node where workspace
+                      // edge is added
+  } WorkSpaceInfo;
+
+  /// Structure to specify information used in node merge of 2 operators
+  typedef struct {
+    string op1;       // Node string for one operator.
+    string op2;       // Node string for second operator.
+    string new_node;  // Name of the node after merge
+    // Function that enables user of the node merger to specify how to find
+    // second operator given the first operator.
+    std::function<Node*(const Node*)> get_node_to_be_merged;
+  } MergeInfo;
+
+  /// Structure to store all constant strings
+  /// NOTE: names are alphabetically sorted.
+  typedef struct {
+    string addn;
+    string add;
+    string avg_pool;
+    string avg_pool_grad;
+    string bias_add;
+    string bias_add_grad;
+    string concat;
+    string concatv2;
+    string conv2d;
+    string conv2d_with_bias;
+    string conv2d_grad_input;
+    string conv2d_grad_filter;
+    string conv2d_grad_filter_with_bias;
+    string fused_batch_norm;
+    string fused_batch_norm_grad;
+    string identity;
+    string lrn;
+    string lrn_grad;
+    string matmul;
+    string max_pool;
+    string max_pool_grad;
+    string maximum;
+    string mkl_conv2d;
+    string mkl_conv2d_grad_input;
+    string mkl_conv2d_grad_filter;
+    string mkl_conv2d_grad_filter_with_bias;
+    string mkl_conv2d_with_bias;
+    string mul;
+    string relu;
+    string relu_grad;
+    string tanh;
+    string tanh_grad;
+    string reshape;
+    string softmax;
+    string split;
+    string squared_difference;
+    string sub;
+  } ConstStringsInfo;
+
+ private:
+  /// Maintain info about nodes to rewrite
+  std::vector<RewriteInfo> rinfo_;
+
+  /// Maintain info about nodes to add workspace edge
+  std::vector<WorkSpaceInfo> wsinfo_;
+
+  /// Maintain info about nodes to be merged
+  std::vector<MergeInfo> minfo_;
+
+  /// Maintain structure of constant strings
+  static ConstStringsInfo csinfo_;
+
+ private:
+  // Is OpDef::ArgDef a list type? It could be N * T or list(type).
+  // Refer to opdef.proto for details of list type.
+  inline bool ArgIsList(const OpDef::ArgDef& arg) const {
+    return !arg.type_list_attr().empty() || !arg.number_attr().empty();
+  }
+
+  // Get length of a list in 'n' if 'arg' is of list type. Refer to
+  // description of ArgIsList for definition of list type.
+  inline int GetTensorListLength(const OpDef::ArgDef& arg, Node* n) {
+    CHECK_EQ(ArgIsList(arg), true);
+    int N = 0;
+    const string attr_name = !arg.type_list_attr().empty()
+                                 ? arg.type_list_attr()
+                                 : arg.number_attr();
+    if (!arg.type_list_attr().empty()) {
+      std::vector<DataType> value;
+      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &value));
+      N = value.size();
+    } else {
+      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &N));
+    }
+    return N;
+  }
+
+  // Can op represented by node 'n' run on DEVICE_CPU?
+  // Op can run on CPU with MKL if the runtime assigned device or the
+  // user requested device contains device CPU, or both are empty.
+  bool CanOpRunOnCPUDevice(const Node* n) {
+    bool result = true;
+    string reason;
+
+    // Substring that should be checked for in device name for CPU device.
+    const char* const kCPUDeviceSubStr = "CPU";
+
+    // If Op has been specifically assigned to a non-CPU device, then No.
+    if (!n->assigned_device_name().empty() &&
+        !StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
+      result = false;
+      reason = "Op has been assigned a runtime device that is not CPU.";
+    }
+
+    // If user has specifically assigned this op to a non-CPU device, then No.
+    if (!n->def().device().empty() &&
+        !StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
+      result = false;
+      reason = "User has assigned a device that is not CPU.";
+    }
+
+    if (result == false) {
+      VLOG(1) << "MklLayoutRewritePass: Skipping rewriting of the node "
+              << n->type_string() << ", reason: " << reason;
+    }
+
+    // Otherwise Yes.
+    return result;
+  }
+
+  // Return a node that can be merged with input node 'n'
+  //
+  // @return pointer to the node if we can find such a
+  // node. Otherwise, it returns nullptr.
+  Node* CheckForNodeMerge(const Node* n) const;
+
+  // Merge node 'm' with node 'n'.
+  // Currently, we merge (1) Conv2D with BiasAdd, and (2) BiasAddGrad with
+  // Conv2DBackpropFilter.
+  //
+  // Input nodes m and n may be deleted if the call to
+  // this function is successful. Attempt to use the pointers
+  // after the call to function may result in undefined behaviors.
+  //
+  // @input g - input graph, m - graph node, n - graph node to be merged with m
+  // @return Status::OK(), if merging is successful and supported.
+  //         Returns appropriate Status error code otherwise.
+  //         Graph is updated in case nodes are merged. Otherwise, it is
+  //         not updated.
+  Status MergeNode(std::unique_ptr<Graph>* g, Node* m, Node* n);
+
+  // Helper function to merge different nodes
+  Status MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g, Node* m, Node* n);
+  Status MergeConv2DBackpropFilterWithBiasAddGrad(std::unique_ptr<Graph>* g,
+                                                  Node* m, Node* n);
+
+  // Find BiasAdd or Conv2D node that can be merged with input node 'm'.
+  // If input 'm' is BiasAdd, then check if there exists Conv2D node that can be
+  // merged with 'm'. If input 'm' is Conv2D, then check if there exists BiasAdd
+  // node that can be merged with 'm'.
+  static Node* GetConv2DOrBiasAdd(const Node* m) {
+    CHECK_NOTNULL(m);
+    Node* n = nullptr;
+
+    if (m->type_string() == csinfo_.bias_add) {
+      // If a is BiasAdd, then Conv2D is 0th input of BiasAdd.
+      TF_CHECK_OK(m->input_node(0, &n));
+    } else {
+      CHECK_EQ(m->type_string(), csinfo_.conv2d);
+      // Go over all output edges and search for BiasAdd Node.
+      // 0th input of BiasAdd is Conv2D.
+      for (const Edge* e : m->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.bias_add &&
+            e->dst_input() == 0) {
+          n = e->dst();
+          break;
+        }
+      }
+    }
+
+    if (n == nullptr) {
+      VLOG(1) << "MklLayoutRewritePass: Could not find matching "
+              << "Conv2D and BiasAdd node for merging. Input node: "
+              << m->DebugString();
+    }
+
+    return n;
+  }
+
+  // Find Conv2DBackpropFilter or BiasAddGrad node that can be merged with input
+  // node 'm'. If input 'm' is Conv2DBackpropFilter, then check if there exists
+  // BiasAddGrad node that can be merged with 'm'. If input 'm' is BiasAddGrad,
+  // then check if there exists Conv2DBackpropFilter node that can be merged
+  // with 'm'.
+  //
+  // Graph that will allow us to connect Conv2DBackpropFilter with BiasAddGrad
+  // would look like:
+  //
+  // _ = Conv2DBackpropFilter(F, _, G)
+  // _ = BiasAddGrad(G)
+  //
+  // So 1st input of BiasAddGrad connects with 3rd input of
+  // Conv2DBackpropFilter and vice versa.
+  static Node* GetConv2DBackpropFilterOrBiasAddGrad(const Node* m) {
+    CHECK_NOTNULL(m);
+    Node* n = nullptr;
+
+    if (m->type_string() == csinfo_.bias_add_grad) {
+      // Get 1st input 'g' of BiasAddGrad.
+      Node* g = nullptr;
+      TF_CHECK_OK(m->input_node(0, &g));
+      // Now traverse all outgoing edges from g that have destination node as
+      // Conv2DBackpropFilter.
+      for (const Edge* e : g->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.conv2d_grad_filter &&
+            e->dst_input() == 2 /* 3rd input of BackpropFilter */) {
+          n = e->dst();
+          break;
+        }
+      }
+    } else {
+      CHECK_EQ(m->type_string(), csinfo_.conv2d_grad_filter);
+      // Get 3rd input 'g' of Conv2DBackpropFilter.
+      Node* g = nullptr;
+      TF_CHECK_OK(m->input_node(2, &g));
+      // Now traverse all outgoing edges from g that have destination node as
+      // BiasAddGrad.
+      for (const Edge* e : g->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.bias_add_grad &&
+            e->dst_input() == 0 /* 1st input of BiasAddGrad */) {
+          n = e->dst();
+          break;
+        }
+      }
+    }
+
+    if (n == nullptr) {
+      VLOG(1) << "MklLayoutRewritePass: Could not find matching "
+              << "Conv2DBackpropFilter and BiasAddGrad node for merging. "
+              << "Input node: " << m->DebugString();
+    }
+    return n;
+  }
+
+  // Check if the node 'n' has any applicable rewrite rule
+  // We check for 2 scenarios for rewrite.
+  //
+  // @return RewriteInfo* for the applicable rewrite rule
+  const RewriteInfo* CheckForNodeRewrite(const Node* n) const;
+
+  // Default rewrite rule to be used in scenario 1 for rewrite.
+  // @return - true (since we want to always rewrite)
+  static bool AlwaysRewrite(const Node* n) {
+    return true;
+  }
+
+  // Check if we are performing pooling on depth or batch. If it is, then we
+  // do not rewrite MaxPool node to Mkl version.
+  // @return - true (if it is not a depth/batch wise pooling case);
+  //           false otherwise.
+  static bool NonDepthBatchWisePoolRewrite(const Node* n) {
+    CHECK_NOTNULL(n);
+
+    string data_format_str;
+    TensorFormat data_format;
+    std::vector<int32> ksize, strides;
+    CHECK_EQ(GetNodeAttr(n->def(), "ksize", &ksize).ok(), true);
+    CHECK_EQ(GetNodeAttr(n->def(), "strides", &strides).ok(), true);
+    CHECK_EQ(GetNodeAttr(n->def(), "data_format", &data_format_str).ok(),
+             true);
+    CHECK_EQ(FormatFromString(data_format_str, &data_format), true);
+
+    // Condition that specifies non-batch-wise and non-depth-wise pooling.
+    if (GetTensorDim(ksize,   data_format, 'N') == 1 &&
+        GetTensorDim(strides, data_format, 'N') == 1 &&
+        GetTensorDim(ksize,   data_format, 'C') == 1 &&
+        GetTensorDim(strides, data_format, 'C') == 1) {
+      return true;
+    }
+
+    return false;
+  }
+
+  static bool AddNRewrite(const Node* n) {
+    CHECK_NOTNULL(n);
+
+    int num;
+    CHECK_EQ(GetNodeAttr(n->def(), "N", &num).ok(), true);
+
+    // Condition that specifies non-batch-wise and non-depth-wise pooling.
+    if (num == 2) {
+      return true;
+    }
+
+    return false;
+  }
+
+  // Rewrites input node to a new node specified by its matching rewrite info.
+  //
+  // Method first searches matching rewrite info for input node and then
+  // uses that info to rewrite.
+  //
+  // Input node may be deleted in case of rewrite. Attempt to use the node
+  // after the call can result in undefined behaviors.
+  //
+  // @input  g - input graph, n - Node to be rewritten,
+  //         ri - matching rewriteinfo
+  // @return Status::OK(), if the input node is rewritten;
+  //         Returns appropriate Status error code otherwise.
+  //         Graph is updated in case the input node is rewritten.
+  //         Otherwise, it is not updated.
+  Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const RewriteInfo* ri);
+
+  // Get nodes that will feed a list of TF tensors to the new
+  // node that we are constructing.
+  //
+  // @input g - input graph,
+  // @input inputs - inputs to old node that we are using for constructing
+  //                 new inputs,
+  // @input input_idx - the index in the 'inputs' vector pointing to the
+  //                    current input that we have processed so far
+  // @output input_idx - index will be incremented by the number of nodes
+  //                     from 'inputs' that are processed
+  // @input list_length - The expected length of list of TF tensors
+  // @output output_nodes - the list of new nodes creating TF tensors
+  //
+  // @return None
+  void GetNodesProducingTFTensorList(
+      const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+      int* input_idx, int list_length,
+      std::vector<NodeBuilder::NodeOut>* output_nodes);
+
+  // Get nodes that will feed a list of Mkl tensors to the new
+  // node that we are constructing.
+  //
+  // @input g - input graph,
+  // @input orig_node - Original node that we are rewriting
+  // @input inputs - inputs to old node that we are using for constructing
+  //                 new inputs,
+  // @input input_idx - the index in the 'inputs' vector pointing to the
+  //                    current input that we have processed so far
+  // @output input_idx - index will be incremented by the number of nodes
+  //                     from 'inputs' that are processed
+  // @input list_length - The expected length of list of Mkl tensors
+  // @output output_nodes - the list of new nodes creating Mkl tensors
+  //
+  // @return None
+  void GetNodesProducingMklTensorList(std::unique_ptr<Graph>* g,
+    Node* orig_node, const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+    int* input_idx, int list_length,
+    std::vector<NodeBuilder::NodeOut>* output_nodes);
+
+  // Get a node that will feed an Mkl tensor to the new
+  // node that we are constructing. The output node could be (1) 'n'
+  // if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
+  // if 'n' is not an Mkl layer.
+  //
+  // @input g - input graph,
+  // @input orig_node - Original node that we are rewriting,
+  // @input n - Node based on which we are creating Mkl node,
+  // @input n_output_slot - the output slot of node 'n'
+  //            which is feeding to the node that we are constructing
+  // @output mkl_node - the new node that will feed Mkl tensor
+  // @output mkl_node_output_slot - the slot number of mkl_node that
+  //                                will feed the tensor
+  // @return None
+  void GetNodeProducingMklTensor(std::unique_ptr<Graph>* g, Node* orig_node,
+    Node* n, int n_output_slot, Node** mkl_node, int* mkl_node_output_slot);
+
+  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
+  // in graph 'g'. Original node is input in 'old_node'. Inputs to 'nb' are
+  // set up in contiguous fashion. 'workspace_tensors' carry graph nodes
+  // producing workspace edges if 'are_workspace_tensors_available' is true.
+  // Otherwise, 'workspace_tensors' is empty vector.
+  //
+  // For details, refer to 'Ordering of inputs after rewriting' section in the
+  // documentation above.
+  //
+  // Returns Status::OK() if setting up inputs is successful, otherwise
+  // returns appropriate status code.
+  int SetUpContiguousInputs(
+      std::unique_ptr<Graph>* g,
+      const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+      NodeBuilder* nb, Node* old_node,
+      std::vector<NodeBuilder::NodeOut>* workspace_tensors,
+      bool are_workspace_tensors_available);
+
+  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
+  // in graph 'g'. Original node is input in 'orig_node'.
+  //
+  // For details, refer to 'Ordering of Tensorflow tensors and Mkl tensors'
+  // section in the documentation above.
+  //
+  // Returns Status::OK() if setting up inputs is successful, otherwise
+  // returns appropriate status code.
+  Status SetUpInputs(std::unique_ptr<Graph>* g,
+                     const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+                     NodeBuilder* nb, Node* orig_node);
+
+  // Add workspace edge on the input or output side of Node 'orig_node' by using
+  // NodeBuilder 'nb' for the new node provided. If 'orig_node' does not dictate
+  // adding workspace edge then do not add it. Workspace Tensorflow and Mkl
+  // tensors, if they need to be added, will be set into these tensors.
+  // If we set workspace tensors, then are_ws_tensors_added should be true.
+  void AddWorkSpaceEdgeIfNeeded(std::unique_ptr<Graph>* g, Node* orig_node,
+                                NodeBuilder* nb,
+                                std::vector<NodeBuilder::NodeOut>* ws_tensors,
+                                bool* are_ws_tensors_added);
+
+  // Functions specific to operators to copy attributes
+  // We need operator-specific function to copy attributes because the framework
+  // does not provide any generic function for it.
+  // NOTE: names are alphabetically sorted.
+  static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
+
+  // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
+  // using node for original node 'orig_node' and return it in '*out'.
+  // TODO(nhasabni) We should move this to mkl_util.h
+  void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out,
+                             Node* orig_node);
+  void GetDummyWorkspaceTensorNode(std::unique_ptr<Graph>* g, Node** out,
+                                   Node* orig_node);
+};
+
+MklLayoutRewritePass::ConstStringsInfo MklLayoutRewritePass::csinfo_;
+
+// We register Mkl rewrite pass for phase 1 in post partitioning group.
+// We register it here so that we get a complete picture of all users of Mkl
+// nodes. Do not change the ordering of the Mkl passes.
+const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup =
+    OptimizationPassRegistry::POST_PARTITIONING;
+REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass);
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions for creating new node
+//////////////////////////////////////////////////////////////////////////
+
+static void FillInputs(const Node* n,
+                       gtl::InlinedVector<Node*, 4>* control_edges,
+                       gtl::InlinedVector<std::pair<Node*, int>, 4>* in) {
+  control_edges->clear();
+  for (const Edge* e : n->in_edges()) {
+    if (e->IsControlEdge()) {
+      control_edges->push_back(e->src());
+    } else {
+      (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output());
+    }
+  }
+  std::sort(control_edges->begin(), control_edges->end());
+  if (n->op_def().is_commutative()) {
+    // For commutative inputs, we sort the input by the input Node*
+    // to get a canonical ordering (so that add(a,b) and add(b, a) will
+    // hash to the same value if is_commutative is true for 'add').
+    std::sort(in->begin(), in->end());
+  }
+}
+
+void MklLayoutRewritePass::GetNodesProducingTFTensorList(
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, int* input_idx,
+    int list_length, std::vector<NodeBuilder::NodeOut>* output_nodes) {
+  CHECK_LT(*input_idx, inputs.size());
+  CHECK_GT(list_length, 0);
+  CHECK_NOTNULL(output_nodes);
+  output_nodes->reserve(list_length);
+
+  while (list_length != 0) {
+    CHECK_GT(list_length, 0);
+    CHECK_LT(*input_idx, inputs.size());
+    Node* n = inputs[*input_idx].first;
+    int slot = inputs[*input_idx].second;
+    // If input node 'n' is just producing a single tensor at
+    // output slot 'slot' then we just add that single node.
+    output_nodes->push_back(NodeBuilder::NodeOut(n, slot));
+    (*input_idx)++;
+    list_length--;
+  }
+}
+
+// TODO(nhasabni) We should move this to mkl_util.h.
+void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr<Graph>* g,
+                                                 Node** out, Node* orig_node) {
+  // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent
+  // dummy Mkl tensor. 8 = 2*size_t.
+  const DataType dt = DataTypeToEnum<uint8>::v();
+  TensorProto proto;
+  proto.set_dtype(dt);
+  uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+  proto.set_tensor_content(const_cast<const void*>(static_cast<void*>(&zero)),
+                           8);
+  TensorShape dummy_shape({8});
+  dummy_shape.AsProto(proto.mutable_tensor_shape());
+  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+               .Attr("value", proto)
+               .Attr("dtype", dt)
+               .Device(orig_node->def().device())  // We place this node on
+                                                   // the same device as the
+                                                   // device of the original
+                                                   // node.
+               .Finalize(&**g, out));
+
+  // If number of inputs to the original node is > 0, then we add
+  // control dependency between 1st input (index 0) of the original node and
+  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
+  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
+  // rewritten node. Adding control edge between 1st input of the original node
+  // and the dummy Mkl node ensures that the dummy node is in the same frame
+  // as the original node. Choosing 1st input is not necessary - any input of
+  // the original node is fine because all the inputs of a node are always in
+  // the same frame.
+  if (orig_node->num_inputs() > 0) {
+    Node* orig_input0 = nullptr;
+    TF_CHECK_OK(orig_node->input_node(0,
+                                      const_cast<const Node**>(&orig_input0)));
+    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
+  }
+
+  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
+}
+
+void MklLayoutRewritePass::GetNodesProducingMklTensorList(
+    std::unique_ptr<Graph>* g,
+    Node* orig_node,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+    int* input_idx, int list_length,
+    std::vector<NodeBuilder::NodeOut>* output_nodes) {
+  CHECK_LT(*input_idx, inputs.size());
+  CHECK_GT(list_length, 0);
+  CHECK_NOTNULL(output_nodes);
+  output_nodes->reserve(list_length);
+
+  while (list_length != 0) {
+    CHECK_GT(list_length, 0);
+    CHECK_LT(*input_idx, inputs.size());
+    Node* n = inputs[*input_idx].first;
+    int slot = inputs[*input_idx].second;
+    // If 'n' is producing a single tensor, then create a single Mkl tensor
+    // node.
+    Node* mkl_node = nullptr;
+    int mkl_node_output_slot = 0;
+    GetNodeProducingMklTensor(g, orig_node, n, slot, &mkl_node,
+                              &mkl_node_output_slot);
+    output_nodes->push_back(NodeBuilder::NodeOut(mkl_node,
+                                                mkl_node_output_slot));
+    (*input_idx)++;
+    list_length--;
+  }
+}
+
+// Get an input node that will feed Mkl tensor to the new
+// node that we are constructing. An input node could be (1) 'n'
+// if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
+// if 'n' is not an Mkl layer.
+void MklLayoutRewritePass::GetNodeProducingMklTensor(std::unique_ptr<Graph>* g,
+    Node* orig_node, Node* n,
+    int n_output_slot, Node** mkl_node, int* mkl_node_output_slot) {
+  CHECK_NOTNULL(n);
+  CHECK_NOTNULL(mkl_node);
+  CHECK_NOTNULL(mkl_node_output_slot);
+
+  // If this is an MKL op, then it will create extra output for MKL layout.
+  DataType T;
+  if (GetNodeAttr(n->def(), "T", &T).ok() &&
+      mkl_op_registry::IsMklOp(n->type_string(), T)) {
+    // If this is an MKL op, then it will generate an edge that will receive
+    // Mkl tensor from a node.
+    // output slot number for Mkl tensor would be N+slot number of TensorFlow
+    // tensor, where N is total number of TensorFlow tensors.
+    *mkl_node = n;
+    *mkl_node_output_slot =
+        GetTensorMetaDataIndex(n_output_slot, n->num_outputs());
+  } else {
+    // If we have not visited the node and rewritten it, then we need
+    // to create a dummy node that will feed a dummy Mkl tensor to this node.
+    // DummyMklTensor node has no input and generates only 1 output
+    // (dummy Mkl tensor) as output slot number 0.
+    GetDummyMklTensorNode(g, mkl_node, orig_node);
+    CHECK_NOTNULL(*mkl_node);
+    *mkl_node_output_slot = 0;
+  }
+}
+
+int MklLayoutRewritePass::SetUpContiguousInputs(
+    std::unique_ptr<Graph>* g,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+    NodeBuilder* nb, Node* old_node,
+    std::vector<NodeBuilder::NodeOut>* workspace_tensors,
+    bool are_workspace_tensors_available) {
+  CHECK_NOTNULL(workspace_tensors);
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+
+  // TODO(nhasabni): Temporary solution to connect filter input of
+  // BackpropInput with the converted filter from Conv2D.
+  bool do_connect_conv2d_backprop_input_filter = false;
+  Node* conv2d_node = nullptr;
+  // Filter node is 2nd input (slot index 1) of Conv2D.
+  int kConv2DFilterInputSlotIdx = 1;
+  int kConv2DBackpropInputFilterInputSlotIdx = 1;
+  int kConv2DFilterOutputSlotIdx = 1;
+  if (old_node->type_string() == csinfo_.conv2d_grad_input) {
+    // We need to find Conv2D node from Conv2DBackpropInput.
+    // For that let's first find filter node that is 2nd input (slot 1)
+    // of BackpropInput.
+    Node* filter_node = nullptr;
+    old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, &filter_node);
+    CHECK_NOTNULL(filter_node);
+
+    // Now check which nodes receive from filter_node. Filter feeds as
+    // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias.
+    for (const Edge* e : filter_node->out_edges()) {
+      if ((e->dst()->type_string() == csinfo_.mkl_conv2d ||
+           e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias) &&
+          e->dst_input() == kConv2DFilterInputSlotIdx
+          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
+        if (conv2d_node != nullptr) {
+          VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
+                  << " feeding multiple Conv2D nodes: "
+                  << filter_node->DebugString();
+          // We will not connect filter input of Conv2DBackpropInput
+          // to be safe here.
+          do_connect_conv2d_backprop_input_filter = false;
+          break;
+        } else {
+          conv2d_node = e->dst();
+          do_connect_conv2d_backprop_input_filter = true;
+        }
+      }
+    }
+  }
+
+  // Number of input slots to original op
+  // Input slots are represented by .Input() calls in REGISTER_OP.
+  int old_node_input_slots = old_node->op_def().input_arg_size();
+  // Actual number of inputs can be greater than or equal to number
+  // of Input slots because inputs of type list could be unfolded.
+  CHECK_GE(old_node_inputs.size(), old_node_input_slots);
+  int nn_slot_idx = 0;  // slot index for inputs of new node
+
+  // Let's copy all inputs (TF tensors) of original node to new node.
+  int iidx = 0;
+  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
+    // An input slot could be a single tensor or a list. We need
+    // to handle this case accordingly.
+    CHECK_LT(iidx, old_node_inputs.size());
+    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
+    if (ArgIsList(arg)) {
+      std::vector<NodeBuilder::NodeOut> new_node_inputs;
+      int N = GetTensorListLength(arg, old_node);
+      GetNodesProducingTFTensorList(old_node_inputs, &iidx, N,
+                                    &new_node_inputs);
+      nb->Input(new_node_inputs);
+      nn_slot_idx++;
+    } else {
+      // Special case for connecting filter input of Conv2DBackpropInput
+      if (do_connect_conv2d_backprop_input_filter &&
+          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
+        nb->Input(conv2d_node, kConv2DFilterOutputSlotIdx);
+      } else {
+        nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second);
+      }
+      iidx++;
+      nn_slot_idx++;
+    }
+  }
+
+  // If workspace tensors are available for this op and we are using
+  // contiguous ordering then we need to add Tensorflow tensor for
+  // workspace here because Tensorflow tensor for workspace is the
+  // last tensor in the list of Tensorflow tensors.
+  if (are_workspace_tensors_available) {
+    CHECK_EQ(workspace_tensors->size(), 2);
+    // Tensorflow tensor
+    nb->Input((*workspace_tensors)[0].node, (*workspace_tensors)[0].index);
+    nn_slot_idx++;
+  }
+
+  // Let's now setup all Mkl inputs to a new node.
+  // Number of Mkl inputs must be same as number of TF inputs.
+  iidx = 0;
+  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
+    // An input slot could be a single tensor or a list. We need
+    // to handle this case accordingly.
+    CHECK_LT(iidx, old_node_inputs.size());
+    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
+    if (ArgIsList(arg)) {
+      std::vector<NodeBuilder::NodeOut> new_node_inputs;
+      int N = GetTensorListLength(arg, old_node);
+      GetNodesProducingMklTensorList(g, old_node, old_node_inputs, &iidx,
+                                     N, &new_node_inputs);
+      nb->Input(new_node_inputs);
+      nn_slot_idx++;
+    } else {
+      Node* mkl_node = nullptr;
+      int mkl_node_output_slot = 0;
+      // Special case for connecting filter input of Conv2DBackpropInput
+      if (do_connect_conv2d_backprop_input_filter &&
+          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
+        GetNodeProducingMklTensor(g, old_node, conv2d_node,
+                                  kConv2DFilterOutputSlotIdx, &mkl_node,
+                                  &mkl_node_output_slot);
+      } else {
+        GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first,
+                                  old_node_inputs[iidx].second, &mkl_node,
+                                  &mkl_node_output_slot);
+      }
+      nb->Input(mkl_node, mkl_node_output_slot);
+      iidx++;
+      nn_slot_idx++;
+    }
+  }
+
+  // If workspace tensors are available for this op and we are using
+  // contiguous ordering then we need to add Mkl tensor for
+  // workspace here because Mkl tensor for workspace is the
+  // last tensor in the list of Mkl tensors.
+  if (are_workspace_tensors_available) {
+    CHECK_EQ(workspace_tensors->size(), 2);
+    // Mkl tensor
+    nb->Input((*workspace_tensors)[1].node, (*workspace_tensors)[1].index);
+    nn_slot_idx++;
+  }
+
+  return nn_slot_idx;
+}
+
+Status MklLayoutRewritePass::SetUpInputs(
+    std::unique_ptr<Graph>* g,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+    NodeBuilder* nb, Node* old_node) {
+  // Let's check if we need to add workspace tensors for this node.
+  // We add workspace edge only for MaxPool, LRN and BatchNorm.
+  std::vector<NodeBuilder::NodeOut> workspace_tensors;
+  bool are_workspace_tensors_available = false;
+  AddWorkSpaceEdgeIfNeeded(g, old_node, nb, &workspace_tensors,
+                           &are_workspace_tensors_available);
+
+  int new_node_input_slots = 0;
+  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
+    // TODO(nhasabni): implement this function just for same of completion.
+    // We do not use interleaved ordering right now.
+    return Status(
+        error::Code::UNIMPLEMENTED,
+        "Interleaved ordering of tensors is currently not supported.");
+  } else {
+    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+    new_node_input_slots = SetUpContiguousInputs(
+        g, old_node_inputs, nb, old_node, &workspace_tensors,
+        are_workspace_tensors_available);
+  }
+
+  // Sanity check
+  int old_node_input_slots = old_node->op_def().input_arg_size();
+  if (!are_workspace_tensors_available) {
+    // If we are not adding workspace tensors for this op, then the total
+    // number of input slots to the new node _must_ be 2 times the number
+    // of input slots to the original node: N original Tensorflow tensors and
+    // N for Mkl tensors corresponding to each Tensorflow tensors.
+    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2);
+  } else {
+    // If we are adding workspace tensors for this op, then the total
+    // The total number of input slots to new node _must_ be 2 times the number
+    // of input slots to the original node: N original Tensorflow tensors and
+    // N for Mkl tensors corresponding to each Tensorflow tensors plus 2
+    // (for workspace Tensorflow tensor and workspace Mkl tensor).
+    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2 + 2);
+  }
+
+  return Status::OK();
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions related to workspace pass
+//////////////////////////////////////////////////////////////////////////
+
+// TODO(nhasabni) We should move this to mkl_util.h.
+void MklLayoutRewritePass::GetDummyWorkspaceTensorNode(
+    std::unique_ptr<Graph>* g, Node** out, Node* orig_node) {
+  // We use a tensor of shape {1} and value 0 to represent
+  // dummy float tensor. We need this as a dummy workspace tensor.
+  // Workspace tensor has type float.
+  const DataType dt = DataTypeToEnum<float>::v();
+  TensorProto proto;
+  proto.set_dtype(dt);
+  float zero[1] = {0};
+  proto.set_tensor_content(const_cast<const void*>(static_cast<void*>(&zero)),
+                           4);
+  TensorShape dummy_shape({1});
+  dummy_shape.AsProto(proto.mutable_tensor_shape());
+  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+                .Attr("value", proto)
+                .Attr("dtype", dt)
+                .Device(orig_node->def().device())  // We place this node on
+                                                    // same the device as the
+                                                    // device of the original
+                                                    // node.
+                .Finalize(&**g, out));
+
+  // If number of inputs to the original node is > 0, then we add
+  // control dependency between 1st input (index 0) of the original node and
+  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
+  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
+  // rewritten node. Adding control edge between 1st input of the original node
+  // and the dummy Mkl node ensures that the dummy node is in the same frame
+  // as the original node. Choosing 1st input is not necessary - any input of
+  // the original node is fine because all the inputs of a node are always in
+  // the same frame.
+  if (orig_node->num_inputs() > 0) {
+    Node* orig_input0 = nullptr;
+    TF_CHECK_OK(orig_node->input_node(0,
+                                      const_cast<const Node**>(&orig_input0)));
+    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
+  }
+
+  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
+}
+
+void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded(
+    std::unique_ptr<Graph>* g, Node* orig_node, NodeBuilder* nb,
+    std::vector<NodeBuilder::NodeOut>* ws_tensors, bool* are_ws_tensors_added) {
+  bool workspace_edge_added = false;  // Default initializer
+  CHECK_NOTNULL(are_ws_tensors_added);
+  *are_ws_tensors_added = false;  // Default initializer
+
+  DataType T;
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  for (auto ws : wsinfo_) {
+    if (orig_node->type_string() == ws.fwd_op &&
+        mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+          orig_node->type_string()), T)) {
+      // If this op is a fwd op, then we need to check if there is an
+      // edge from this node's fwd_slot to bwdop's bwd_slot. If there is
+      // an edge, then we just add an attribute on this node for setting
+      // workspace_passed to true. We don't add actual workspace edge
+      // in this node. Actual workspace edge gets added in the backward
+      // op for this node.
+      for (const Edge* e : orig_node->out_edges()) {
+        if (e->src_output() == ws.fwd_slot &&
+            e->dst()->type_string() == ws.bwd_op &&
+            e->dst_input() == ws.bwd_slot) {
+          nb->Attr("workspace_enabled", true);
+          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
+                  << orig_node->type_string();
+          workspace_edge_added = true;
+          // We found the edge that we were looking for, so break.
+          break;
+        }
+      }
+
+      if (!workspace_edge_added) {
+        // If we are here, then we did not find backward operator for this
+        // node.
+        nb->Attr("workspace_enabled", false);
+      }
+    } else if (orig_node->type_string() == ws.bwd_op &&
+               mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+                                          orig_node->type_string()), T)) {
+      // If this op is a bwd op, then we need to add workspace edge and
+      // it's Mkl tensor edge between its corresponding fwd op and this
+      // op. Corresponding fwd op is specified in 'fwd_op' field of
+      // workspace info. fwd_slot and bwd_slot in workspace info specify
+      // an edge between which slots connect forward and backward op.
+      // Once all these criteria match, we add a workspace edge between
+      // ws_fwd_slot and ws_bwd_slot. Its corresponding Mkl tensor is
+      // determined by interleaved/contiguous ordering. Function
+      // DataIndexToMetaDataIndex tells us the location of Mkl tensor
+      // from the location of the Tensorflow tensor.
+      for (const Edge* e : orig_node->in_edges()) {
+        if (e->src_output() == ws.fwd_slot &&
+            // We would have rewritten the forward op, so we need to use
+            // GetMklOpName call to get its Mkl name.
+            e->src()->type_string() == mkl_op_registry::GetMklOpName(
+                                                          ws.fwd_op) &&
+            e->dst_input() == ws.bwd_slot) {
+          nb->Attr("workspace_enabled", true);
+          CHECK_NOTNULL(ws_tensors);
+          // Add workspace edge between fwd op and bwd op.
+          ws_tensors->push_back(NodeBuilder::NodeOut(e->src(), ws.ws_fwd_slot));
+          // Add Mkl tensor edge for workspace edge between fwd op and bwd op.
+          ws_tensors->push_back(NodeBuilder::NodeOut(
+              e->src(), DataIndexToMetaDataIndex(ws.ws_fwd_slot,
+                                                 e->src()->num_outputs())));
+          *are_ws_tensors_added = true;
+          // In terms of input ordering, we add these calls to add Input
+          // here because workspace edge (and its Mkl tensor) is the last
+          // edge in the fwdop and bwdop. So all inputs before workspace
+          // tensor have been added by SetUpInputs function.
+          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
+                  << orig_node->type_string();
+          workspace_edge_added = true;
+          // We found the edge that we were looking for, so break.
+          break;
+        }
+      }
+
+      // If we are here means we did not find fwd op that feeds to this
+      // bwd op. So in this case, we need to generate dummy tensors for
+      // workspace input and Mkl tensor for workspace, and set
+      // workspace_enabled to false.
+      if (!workspace_edge_added) {
+        nb->Attr("workspace_enabled", false);
+        Node* dmt_ws = nullptr;      // Dummy tensor for workspace
+        Node* dmt_mkl_ws = nullptr;  // Dummy Mkl tensor for workspace
+        GetDummyWorkspaceTensorNode(g, &dmt_ws, orig_node);
+        GetDummyMklTensorNode(g, &dmt_mkl_ws, orig_node);
+        CHECK_NOTNULL(dmt_ws);
+        CHECK_NOTNULL(dmt_mkl_ws);
+        CHECK_NOTNULL(ws_tensors);
+        // We add dummy tensor as workspace tensor.
+        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_ws, 0));
+        // We add dummy tensor as Mkl tensor for workspace tensor.
+        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_mkl_ws, 0));
+        *are_ws_tensors_added = true;
+        VLOG(1) << "MklLayoutRewritePass: dummy workspace_enabled for "
+                << orig_node->type_string();
+      }
+    } else {
+      // If this node does not match any workspace info, then we do not
+      // do anything special for workspace propagation for it.
+    }
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Op-specific functions to copy attributes from old node to new node
+//////////////////////////////////////////////////////////////////////////
+
+void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  string padding;
+  std::vector<int32> strides;
+  bool use_cudnn_on_gpu;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+  TF_CHECK_OK(
+      GetNodeAttr(orig_node->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("strides", strides);
+  nb->Attr("padding", padding);
+  nb->Attr("data_format", data_format);
+  nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu);
+}
+
+void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node,
+                                         NodeBuilder* nb) {
+  DataType T;
+  int N;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+}
+
+void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node,
+                                                NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  std::vector<int32> strides;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("strides", strides);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node,
+                                        NodeBuilder* nb) {
+  DataType T;
+  int depth_radius;
+  float bias;
+  float alpha;
+  float beta;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "depth_radius", &depth_radius));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "bias", &bias));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "alpha", &alpha));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "beta", &beta));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("depth_radius", depth_radius);
+  nb->Attr("bias", bias);
+  nb->Attr("alpha", alpha);
+  nb->Attr("beta", beta);
+}
+
+void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node,
+                                            NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  string padding;
+  std::vector<int32> ksize, strides;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "ksize", &ksize));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("ksize", ksize);
+  nb->Attr("strides", strides);
+  nb->Attr("padding", padding);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node,
+                                             NodeBuilder* nb) {
+  DataType T;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+}
+
+void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  DataType Tshape;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tshape", &Tshape));
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("Tshape", Tshape);
+}
+
+void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node,
+                                          NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  int num_split;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "num_split", &num_split));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("num_split", num_split);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsConcat(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  int N;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+}
+
+void MklLayoutRewritePass::CopyAttrsConcatV2(const Node* orig_node,
+                                             NodeBuilder* nb) {
+  DataType T;
+  int N;
+  DataType tidx;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tidx", &tidx));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+  nb->Attr("Tidx", tidx);
+}
+
+void MklLayoutRewritePass::CopyAttrsFusedBatchNorm(const Node* orig_node,
+                                                   NodeBuilder* nb) {
+  DataType T;
+  float epsilon;
+  string data_format;
+  bool is_training;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "epsilon", &epsilon));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "is_training", &is_training));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("epsilon", epsilon);
+  nb->Attr("data_format", data_format);
+  nb->Attr("is_training", is_training);
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions related to node merge pass
+//////////////////////////////////////////////////////////////////////////
+
+Node* MklLayoutRewritePass::CheckForNodeMerge(const Node* a) const {
+  // TODO(nhasabni) Add check for type of node similar to CheckForNodeRewrite
+  // once we support BiasAddGrad as Mkl layer.
+
+  // Search for all matching mergeinfo.
+  // We allow more than one match for extensibility.
+  std::vector<const MergeInfo*> matching_mi;
+  for (auto mi = minfo_.cbegin(); mi != minfo_.cend(); ++mi) {
+    if (a->type_string() == mi->op1 || a->type_string() == mi->op2) {
+      matching_mi.push_back(&*mi);
+    }
+  }
+
+  for (const MergeInfo* mi : matching_mi) {
+    // Get the operand with which 'a' can be merged.
+    Node* b = nullptr;
+    if ((b = mi->get_node_to_be_merged(a)) == nullptr) {
+      continue;
+    }
+
+    // Get the control edges and input of node
+    const int N_in = a->num_inputs();
+    gtl::InlinedVector<Node*, 4> a_control_edges;
+    gtl::InlinedVector<std::pair<Node*, int>, 4> a_in(N_in);
+    FillInputs(a, &a_control_edges, &a_in);
+
+    const int B_in = b->num_inputs();
+    gtl::InlinedVector<Node*, 4> b_control_edges;
+    gtl::InlinedVector<std::pair<Node*, int>, 4> b_in(B_in);
+    FillInputs(b, &b_control_edges, &b_in);
+
+    // Shouldn't merge if a and b have different control edges.
+    if (a_control_edges != b_control_edges) {
+      continue;
+    } else {
+      // We found a match.
+      return b;
+    }
+  }
+
+  return nullptr;
+}
+
+Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g,
+                                                    Node* m, Node* n) {
+  CHECK_EQ(((m->type_string() == csinfo_.bias_add &&
+             n->type_string() == csinfo_.conv2d)) ||
+           ((n->type_string() == csinfo_.bias_add &&
+             m->type_string() == csinfo_.conv2d)), true);
+
+  // If 'm' is BiasAdd, then 'n' is Conv2D. Since Conv2D feeds BiasAdd,
+  // BiasAdd is successor node, and Conv2D predecessor node.
+  Node* pred = m->type_string() == csinfo_.bias_add ? n : m;
+  Node* succ = m->type_string() == csinfo_.bias_add ? m : n;
+
+  // 1. Get all attributes from input nodes.
+  DataType T_pred, T_succ;
+  string padding;
+  std::vector<int32> strides;
+  string data_format_pred, data_format_succ;
+  bool use_cudnn_on_gnu;
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred));
+  TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred));
+  TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ));
+  TF_CHECK_OK(
+      GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu));
+  // We check to ensure that data formats of both succ and pred are same.
+  // We expect them to be same, so we can enforce this as assert.
+  // But assert can be too strict, so we enforce this as a check.
+  // If the check fails, then we do not merge two nodes.
+  // We also do same check for devices.
+  if (data_format_pred != data_format_succ || T_pred != T_succ ||
+      pred->assigned_device_name() != succ->assigned_device_name() ||
+      pred->def().device() != succ->def().device()) {
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "data_format or T attribute or devices of Conv2D and "
+                  "BiasAdd do not match. Will skip node merge optimization");
+  }
+
+  const int succ_num = succ->num_inputs();
+  gtl::InlinedVector<Node*, 4> succ_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> succ_in(succ_num);
+  FillInputs(succ, &succ_control_edges, &succ_in);
+
+  const int pred_num = pred->num_inputs();
+  gtl::InlinedVector<Node*, 4> pred_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> pred_in(pred_num);
+  FillInputs(pred, &pred_control_edges, &pred_in);
+
+  // We need to ensure that Conv2D only feeds to BiasAdd (some other operator is
+  // not expecting output of Conv2D). If this is not the case, then we cannot
+  // merge Conv2D with BiasAdd.
+  const int kFirstOutputSlot = 0;
+  for (const Edge* e : pred->out_edges()) {
+    if (e->src_output() == kFirstOutputSlot && e->dst() != succ) {
+      return Status(error::Code::INVALID_ARGUMENT,
+                    "Conv2D does not feed to BiasAdd, or "
+                    "it feeds BiasAdd but has multiple outputs. "
+                    "Will skip node merge optimization");
+    }
+  }
+
+  // 2. Get inputs from both the nodes.
+  // Find the 2 inputs from the conv and the bias from the add Bias.
+  // Get operand 0, 1 of conv2D.
+  CHECK_EQ(pred->in_edges().size(), 2);  // Conv2D must have 2 inputs.
+  // Get operand 1 of add_bias
+  // BiasAdd must have 2 inputs: Conv, bias
+  CHECK_EQ(succ->in_edges().size(), 2);
+
+  // We will use the node name of BiasAdd as the name of new node
+  // Build new node. We use same name as original node, but change the op
+  // name.
+  NodeBuilder nb(succ->name(), csinfo_.conv2d_with_bias);
+  nb.Input(pred_in[0].first, pred_in[0].second);  // In1 of Conv2D
+  // pred_in[1] will be 2nd Tensorflow tensor for Conv2D.
+  nb.Input(pred_in[1].first, pred_in[1].second);  // In2 of Conv2D
+  // In1 of BiasAdd is same as output of Conv2D.
+  nb.Input(succ_in[1].first, succ_in[1].second);  // In2 of BiasAdd
+
+  // Copy attributes from Conv2D to Conv2DWithBias.
+  CopyAttrsConv2D(const_cast<const Node*>(pred), &nb);
+
+  // Copy the device assigned to old node to new node.
+  nb.Device(succ->def().device());
+
+  // Create node.
+  Node* new_node;
+  nb.Finalize(&**g, &new_node);
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from 'pred' node and 'succ' node to new 'new_node'
+  // node are already copied in BuildNode. We handle control edges now.
+  for (const Edge* e : pred->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+  for (const Edge* e : succ->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+
+  // Incoming edges are fixed, we will fix the outgoing edges now.
+  // First, we will fix outgoing control edges from 'pred' node.
+  for (const Edge* e : pred->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    }
+  }
+
+  // Second, we will fix outgoing control and data edges from 'succ' node.
+  for (const Edge* e : succ->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      // BiasAdd has only 1 output (at slot 0) and merged node also has only 1
+      // output (at slot 0).
+      const int kConv2DWithBiasOutputSlot = 0;
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kConv2DWithBiasOutputSlot,
+                                    e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy device assigned to old node to new node.
+  // It's ok to use pred or succ as we have enforced a check that
+  // both have same device assigned.
+  new_node->set_assigned_device_name(pred->assigned_device_name());
+
+  VLOG(1) << "MklLayoutRewritePass: Merged old node:" << pred->DebugString()
+          << ", and node: " << succ->DebugString()
+          << ", into node:" << new_node->DebugString();
+
+  (*g)->RemoveNode(succ);
+  (*g)->RemoveNode(pred);
+
+  return Status::OK();
+}
+
+Status MklLayoutRewritePass::MergeConv2DBackpropFilterWithBiasAddGrad(
+    std::unique_ptr<Graph>* g, Node* m, Node* n) {
+  CHECK_EQ(((m->type_string() == csinfo_.bias_add_grad &&
+             n->type_string() == csinfo_.conv2d_grad_filter)) ||
+           ((n->type_string() == csinfo_.bias_add_grad &&
+             m->type_string() == csinfo_.conv2d_grad_filter)), true);
+
+  // If 'm' is BiasAddGrad, then 'n' is BackpropFilter.
+  Node* badd = m->type_string() == csinfo_.bias_add_grad ? m : n;
+  Node* fltr = m->type_string() == csinfo_.bias_add_grad ? n : m;
+
+  // Sanity check for attributes from input nodes.
+  DataType T_b, T_f;
+  string data_format_b, data_format_f;
+  TF_CHECK_OK(GetNodeAttr(badd->def(), "T", &T_b));
+  TF_CHECK_OK(GetNodeAttr(fltr->def(), "T", &T_f));
+  TF_CHECK_OK(GetNodeAttr(badd->def(), "data_format", &data_format_b));
+  TF_CHECK_OK(GetNodeAttr(fltr->def(), "data_format", &data_format_f));
+  if (data_format_b != data_format_f || T_b != T_f ||
+      badd->assigned_device_name() != fltr->assigned_device_name() ||
+      badd->def().device() != fltr->def().device()) {
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "data_format or T attribute or devices of "
+                  "Conv2DBackpropFilter and BiasAddGrad do not match. "
+                  "Will skip node merge optimization");
+  }
+
+  // We will use the node name of Conv2DBackpropFilter as the name of new node.
+  // This is because BackpropFilterWithBias is going to emit bias output also.
+  NodeBuilder nb(fltr->name(), csinfo_.conv2d_grad_filter_with_bias);
+  // Since Conv2DBackpropFilterWithBias has same number of inputs as
+  // Conv2DBackpropFilter, we can just copy input edges directly. We dont need
+  // to copy any data input of BiasAddGrad because that input also goes to
+  // Conv2DBackpropFilter.
+  const int fltr_ins = fltr->num_inputs();
+  gtl::InlinedVector<Node*, 4> fltr_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> fltr_in_edges(fltr_ins);
+  FillInputs(fltr, &fltr_control_edges, &fltr_in_edges);
+  for (int idx = 0; idx < fltr_ins; idx++) {
+    nb.Input(fltr_in_edges[idx].first, fltr_in_edges[idx].second);
+  }
+
+  // Copy attributes from Conv2DBackpropFilter.
+  CopyAttrsConv2D(const_cast<const Node*>(fltr), &nb);
+
+  // Copy the device assigned to old node to new node.
+  nb.Device(fltr->def().device());
+
+  // Create node.
+  Node* new_node;
+  nb.Finalize(&**g, &new_node);
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from BiasAddGrad node and Conv2DBackpropFilter node to
+  // new 'new_node' node are already copied in BuildNode. We handle control
+  // edges now.
+  for (const Edge* e : badd->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+  for (const Edge* e : fltr->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+
+  // Incoming edges are fixed, we will fix the outgoing edges now.
+  // First, we will fix outgoing control edges from 'badd' node.
+  // Conv2DBackpropFilter has 1 output -- filter_grad.
+  // Conv2DBackpropFilterWithBias has 2 outputs -- filter_grad and
+  // bias_grad. But filter_grad is at same slot number (0) in both the
+  // nodes. bias_grad is at slot number 1 in Conv2DBackpropFilterWithBias, while
+  // it is at slot number 0 in BiasAddGrad.
+  const int kMergedNodeFilterGradOutputIdx = 0;
+  const int kMergedNodeBiasGradOutputIdx = 1;
+
+  for (const Edge* e : badd->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeBiasGradOutputIdx,
+                                  e->dst(), e->dst_input()));
+    }
+  }
+
+  // Second, we will fix outgoing control and data edges from 'fltr' node.
+  for (const Edge* e : fltr->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeFilterGradOutputIdx,
+                                  e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy device assigned to old node to new node.
+  // It's ok to use badd or fltr as we have enforced a check that
+  // both have same device assigned.
+  new_node->set_assigned_device_name(badd->assigned_device_name());
+
+  VLOG(1) << "MklLayoutRewritePass: Merged old node:" << badd->DebugString()
+          << ", and node: " << fltr->DebugString()
+          << ", into node:" << new_node->DebugString();
+
+  (*g)->RemoveNode(badd);
+  (*g)->RemoveNode(fltr);
+
+  return Status::OK();
+}
+
+Status MklLayoutRewritePass::MergeNode(std::unique_ptr<Graph>* g, Node* m,
+                                       Node* n) {
+  CHECK_NOTNULL(m);
+  CHECK_NOTNULL(n);
+
+  if (((m->type_string() == csinfo_.bias_add &&
+        n->type_string() == csinfo_.conv2d)) ||
+      ((n->type_string() == csinfo_.bias_add &&
+        m->type_string() == csinfo_.conv2d))) {
+    return this->MergeConv2DWithBiasAdd(g, m, n);
+  }
+
+  if (((m->type_string() == csinfo_.bias_add_grad &&
+        n->type_string() == csinfo_.conv2d_grad_filter)) ||
+      ((n->type_string() == csinfo_.bias_add_grad &&
+        m->type_string() == csinfo_.conv2d_grad_filter))) {
+    return this->MergeConv2DBackpropFilterWithBiasAddGrad(g, m, n);
+  }
+
+  return Status(error::Code::UNIMPLEMENTED,
+                "Unimplemented case for node merge optimization.");
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions for node rewrite
+//////////////////////////////////////////////////////////////////////////
+
+Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
+                                         Node* orig_node,
+                                         const RewriteInfo* ri) {
+  CHECK_NOTNULL(ri);
+  CHECK_NOTNULL(orig_node);
+
+  VLOG(1) << "MklLayoutRewritePass: Original node:" << orig_node->DebugString();
+
+  // Get all inputs.
+  int num_inputs = orig_node->in_edges().size();
+
+  // Drop count for control edges from inputs
+  for (const Edge* e : orig_node->in_edges()) {
+    if (e->IsControlEdge()) {
+      num_inputs--;
+    }
+  }
+
+  gtl::InlinedVector<Node*, 4> control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num_inputs);
+  FillInputs(orig_node, &control_edges, &inputs);
+
+  // Build new node. We use same name as original node, but change the op name.
+  NodeBuilder nb(orig_node->name().c_str(), ri->new_name.c_str());
+  // Copy user-specified device assigned to original node to new node.
+  nb.Device(orig_node->def().device());
+  // Set up new inputs to the rewritten node.
+  Status s = SetUpInputs(g, inputs, &nb, orig_node);
+  if (s != Status::OK()) {
+    return s;
+  }
+
+  ri->copy_attrs(const_cast<const Node*>(orig_node), &nb);
+  // Set the Mkl layer label for this op.
+  nb.Attr("_kernel", mkl_op_registry::kMklOpLabel);
+
+  // Finalize graph and get new node.
+  Node* new_node = nullptr;
+  TF_CHECK_OK(nb.Finalize(&**g, &new_node));
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from 'orig_node' node to new 'new_node' node are
+  // already copied in BuildNode. We need to handle control edges now.
+  for (const Edge* e : orig_node->in_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
+    }
+  }
+
+  // Copy outgoing edges from 'orig_node' node to new
+  // 'new_node' node, since the output also follows same ordering among
+  // Tensorflow tensors and Mkl tensors. We need to connect Tensorflow
+  // tensors appropriately. Specifically, nth output of the original node
+  // will become 2*nth output of the Mkl node for the interleaved ordering
+  // of the tensors. For the contiguous ordering of the tensors, it will be n.
+  // GetTensorDataIndex provides this mapping function.
+  for (const Edge* e : orig_node->out_edges()) {
+    if (e->IsControlEdge()) {
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, GetTensorDataIndex(e->src_output(),
+                            e->src()->num_outputs()),
+                    e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy the runtime device assigned from original code to new node.
+  new_node->set_assigned_device_name(orig_node->assigned_device_name());
+
+  // Delete original node and mark new node as rewritten.
+  (*g)->RemoveNode(orig_node);
+
+  VLOG(1) << "MklLayoutRewritePass: New node:" << new_node->DebugString();
+  return Status::OK();
+}
+
+const MklLayoutRewritePass::RewriteInfo*
+MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
+  CHECK_NOTNULL(n);
+
+  // First check if node along with its type is supported by MKL layer.
+  // We do not want to rewrite an op into Mkl op if types are not supported.
+  // E.g., MklRelu does not support INT32. So we cannot rewrite Relu to
+  // MklRelu if type is INT32.
+  DataType T;
+  if (!GetNodeAttr(n->def(), "T", &T).ok()) {
+    return nullptr;
+  }
+
+  // We make an exception for __MklDummyConv2DWithBias and
+  // __MklConv2DBackpropFilterWithBias since their names do not match Mkl node
+  // names.
+  if (n->type_string() != csinfo_.conv2d_with_bias &&
+      n->type_string() != csinfo_.conv2d_grad_filter_with_bias &&
+      !mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+                                        n->type_string()), T)) {
+      return nullptr;
+  }
+
+  // For elementwise node, we reuse the Eigen implementation and pass the MKL
+  // metadata tensor through so we can avoid conversions. However, if all
+  // incoming edges are in TF format, we don't need all this overhead, so
+  // replace the elementwise node only if at least one of its parents is a MKL
+  // node.
+  //
+  // Identity nodes can also skip replacement if they are not being served by
+  // any MKL nodes.
+  //
+  // TODO(vrane): Add implementation for element-wise ops that doesn't reuse
+  // eigen code to reduce cross-library dependency.
+  VLOG(1) << "ELEMENTWISE: checking op: " << n->type_string();
+  if (mkl_op_registry::IsMklElementWiseOp(
+        mkl_op_registry::GetMklOpName(n->type_string()), T) ||
+      n->type_string().find("Identity") != string::npos) {
+    VLOG(1) << "ELEMENTWISE: op is elementwise: " << n->type_string();
+    bool incoming_mkl_edge = false;
+    int num_parent = 0;
+    for (auto parent : n->in_edges()) {
+      if (mkl_op_registry::IsMklOp(parent->src()->type_string(), T)) {
+        VLOG(1) << "ELEMENTWISE: parent " << num_parent++ << " is MKL op: "
+                << parent->src()->type_string();
+        incoming_mkl_edge = true;
+        break;
+      } else {
+        VLOG(1) << "ELEMENTWISE: parent " << num_parent++ << " is NON-MKL op: "
+                << parent->src()->type_string();
+      }
+    }
+    if (incoming_mkl_edge == false) {
+      VLOG(1) << "ELEMENTWISE: Skipping replacement of elementwise node which has no MKL "
+                 "parents.";
+      return nullptr;
+    } else {
+      VLOG(1) << "ELEMENTWISE: Replacing elementwise node " << n->type_string() <<
+        " which has MKL parents";
+    }
+  }
+
+  // We now check if rewrite rule applies for this op. If rewrite rule passes
+  // for this op, then we rewrite it to Mkl op.
+  // Find matching RewriteInfo and then check that rewrite rule applies.
+  for (auto ri = rinfo_.cbegin(); ri != rinfo_.cend(); ++ri) {
+    if (n->type_string().compare(ri->name) == 0 &&
+        ri->rewrite_rule(n)) {
+      return &*ri;
+    }
+  }
+
+  // Else return not found.
+  return nullptr;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//              Run function for the pass
+///////////////////////////////////////////////////////////////////////////////
+
+bool MklLayoutRewritePass::RunPass(std::unique_ptr<Graph>* g) {
+  bool result = false;
+  CHECK_NOTNULL(g);
+
+  DumpGraph("Before running MklLayoutRewritePass", &**g);
+
+  std::vector<Node*> order;
+  GetReversePostOrder(**g, &order);  // This will give us topological sort.
+  for (Node* n : order) {
+    // If node is not an op or it cannot run on CPU device, then skip.
+    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
+      continue;
+    }
+
+    Node* m = nullptr;
+    if ((m = CheckForNodeMerge(n)) != nullptr && CanOpRunOnCPUDevice(m)) {
+      // Check if the node 'n' can be merged with any other node. If it can
+      // be 'm' contains the node with which it can be merged.
+      string n1_name = n->name();
+      string n2_name = m->name();
+
+      VLOG(1) << "MklLayoutRewritePass: Scheduled nodes " << n1_name << " and "
+              << n2_name << " for merging";
+
+      if (MergeNode(g, n, m) == Status::OK()) {
+        VLOG(1) << "MklLayoutRewritePass: Merged nodes " << n1_name << " and "
+                << n2_name;
+        result = true;
+      }
+    }
+  }
+
+  DumpGraph("After running MklLayoutRewritePass(NodeMerge)", &**g);
+
+  order.clear();
+  GetReversePostOrder(**g, &order);  // This will give us topological sort.
+  for (Node* n : order) {
+    // If node is not an op or it cannot run on CPU device, then skip.
+    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
+      continue;
+    }
+
+    const RewriteInfo* ri = nullptr;
+    // We will first search if node is to be rewritten.
+    if ((ri = CheckForNodeRewrite(n)) != nullptr) {
+      string node_name = n->name();
+      string op_name = n->type_string();
+
+      VLOG(1) << "MklLayoutRewritePass: Scheduled node " << node_name
+              << " with op " << op_name << " for rewrite using"
+              << " layout optimization.";
+
+      if (RewriteNode(g, n, ri) == Status::OK()) {
+        VLOG(1) << "MklLayoutRewritePass: rewrote node " << node_name
+                << " with op " << op_name << " for Mkl layout optimization.";
+        result = true;
+      }
+    }
+  }
+
+  DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite)", &**g);
+
+  return result;
+}
+
+bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) {
+  return MklLayoutRewritePass().RunPass(g);
+}
+
+Status MklLayoutRewritePass::Run(
+  const GraphOptimizationPassOptions& options) {
+  if (options.graph == nullptr && options.partition_graphs == nullptr) {
+    return Status::OK();
+  }
+
+  auto process_graph = [&](std::unique_ptr<Graph>* g) {
+    // Get the ownership of a graph
+    std::unique_ptr<Graph>* ng = std::move(g);
+    RunPass(ng);
+    // Return the ownership of a graph back
+    g->reset(ng->release());
+  };
+
+  if (kMklLayoutRewritePassGroup !=
+      OptimizationPassRegistry::POST_PARTITIONING) {
+    // For any pre-partitioning phase, a graph is stored in options.graph.
+    process_graph(options.graph);
+  } else {
+    // For post partitioning phase, graphs are stored in
+    // options.partition_graphs.
+    for (auto& pg : *options.partition_graphs) {
+      process_graph(&pg.second);
+    }
+  }
+
+  return Status::OK();
+}
+#endif  // INTEL_MKL_DNN
 }  // namespace tensorflow
 
 #endif
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index abc63e4f35..75f7ca2d4d 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -37,6 +37,9 @@ limitations under the License.
 #include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
+
+#ifndef INTEL_MKL_DNN
+
 namespace {
 
 const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
@@ -1881,6 +1884,1627 @@ static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
 BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
 
 }  // namespace
+
+#else  // INTEL_MKL_DNN
+
+namespace {
+
+const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
+const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0";
+
+static void InitGraph(const string& s, Graph* graph,
+                      const string& device = kCPUDevice) {
+  GraphDef graph_def;
+
+  auto parser = protobuf::TextFormat::Parser();
+  //  parser.AllowRelaxedWhitespace(true);
+  CHECK(parser.MergeFromString(s, &graph_def)) << s;
+  GraphConstructorOptions opts;
+  TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
+
+  for (Node* node : graph->nodes()) {
+    node->set_assigned_device_name(device);
+  }
+}
+
+class MklLayoutPassTest : public ::testing::Test {
+ public:
+  MklLayoutPassTest() : graph_(OpRegistry::Global()) {}
+
+  void InitGraph(const string& s, const string& device = kCPUDevice) {
+    ::tensorflow::InitGraph(s, &graph_, device);
+    original_ = CanonicalGraphString(&graph_);
+  }
+
+  static bool IncludeNode(const Node* n) { return n->IsOp(); }
+
+  static string EdgeId(const Node* n, int index) {
+    if (index == 0) {
+      return n->name();
+    } else if (index == Graph::kControlSlot) {
+      return strings::StrCat(n->name(), ":control");
+    } else {
+      return strings::StrCat(n->name(), ":", index);
+    }
+  }
+
+  string CanonicalGraphString(Graph* g) {
+    std::vector<string> nodes;
+    std::vector<string> edges;
+    for (const Node* n : g->nodes()) {
+      if (IncludeNode(n)) {
+        nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
+      }
+    }
+    for (const Edge* e : g->edges()) {
+      if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
+        edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
+                                        EdgeId(e->dst(), e->dst_input())));
+      }
+    }
+    // Canonicalize
+    std::sort(nodes.begin(), nodes.end());
+    std::sort(edges.begin(), edges.end());
+    return strings::StrCat(str_util::Join(nodes, ";"), "|",
+                           str_util::Join(edges, ";"));
+  }
+
+  string DoMklLayoutOptimizationPass() {
+    string before = CanonicalGraphString(&graph_);
+    LOG(ERROR) << "Before MKL layout rewrite pass: " << before;
+
+    std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
+    RunMklLayoutRewritePass(ug);
+
+    string result = CanonicalGraphString(&graph_);
+    LOG(ERROR) << "After MKL layout rewrite pass:  " << result;
+    return result;
+  }
+
+  const string& OriginalGraph() const { return original_; }
+
+  Graph graph_;
+  string original_;
+};
+
+REGISTER_OP("Input").Output("o: float").SetIsStateful();
+REGISTER_OP("InputList").Output("o: N * float").Attr("N: int").SetIsStateful();
+REGISTER_OP("HalfInput").Output("o: half").SetIsStateful();
+REGISTER_OP("Int32Input").Output("o: int32").SetIsStateful();
+REGISTER_OP("_MklInput").Output("o: uint8").SetIsStateful();
+REGISTER_OP("_MklInput2").Output("o: uint8")
+                        .Output("o1: uint8").SetIsStateful();
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to node merge optiimization
+/////////////////////////////////////////////////////////////////////
+
+TEST_F(MklLayoutPassTest, Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Zeta);D(Zeta)|"
+            "A->C;A->D;B->C:1;B->D:1");
+}
+
+// Test set 1: Conv2D + AddBias
+
+// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y)
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'Y' op: 'Input'}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'Y']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(_MklConv2DWithBias);Y(Input);Z(Zeta)|A->E;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;DMT/_1->E:4;"
+            "DMT/_2->E:5;E->Z;Y->Z:1");
+}
+
+// Graph contains only Conv2D, no AddBias.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_NoAddBias) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);DMT/_0(Const);DMT/_1(Const)|"
+            "A->C;A:control->DMT/_0:control;A:control->DMT/_1:control;B->C:1;"
+            "DMT/_0->C:2;DMT/_1->C:3");
+}
+
+// Conv2D output does not go to BiasAdd.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D', 'E'] }");  // Output of _MklConv2D does not go to BiasAdd.
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(Input);F(BiasAdd)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;D->F;DMT/_0->C:2;DMT/_1->C:3;"
+            "E->F:1");
+}
+
+// Conv2D has two outgoing edges: BiasAdd and some other dummy node (Zeta).
+// Merge should not be done in such case.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D', 'E'] }"  // Conv2D has two outputs.
+                              // No merge should happen.
+      "node { name: 'G' op: 'Zeta'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(Input);F(BiasAdd);G(Zeta)|A->C;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;B->C:1;C->G;"
+            "D->F;DMT/_0->C:2;DMT/_1->C:3;E->F:1;E->G:1");
+}
+
+// data_format attribute value mismatch. Merge should not be done
+// in such case.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_AttrMismatch) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NHCW' } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(BiasAdd)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;C->E;D->E:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+// Test set 2: BiasAddGrad + Conv2DBackpropFilter fusion tests
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilterWithBias);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const)|A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion in the presence of BackpropFilter. But nodes do not match
+// criteria for rewrite. So rewrite should not happen. 3rd input of
+// Conv2DBackpropFilter is different than input to BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['A'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilter);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(BiasAddGrad)|A->D;A->E;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion, but nodes do not match criteria for fusion.
+// Different input formats.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NHWC' } }"
+      " input: ['A'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilter);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(BiasAddGrad)|A->D;A->E;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion in the presence of BackpropFilter only. Fusion is done
+// before node rewrite. Check this ordering.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'O' op: '_MklInput'}"
+      "node { name: 'D' op: '_MklConv2DWithBias'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
+      "node { name: 'E' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['D', 'A']}"
+      "node { name: 'F' op: 'Int32Input'}"
+      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['E', 'F', 'A', 'M', 'N', 'O'] }"
+      "node { name: 'H' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
+            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
+            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
+            "C->D:2;D->E;E->G;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
+            "O->G:5");
+}
+
+// C=Conv2D(A,B); E=BiasAdd(C,D); Y=Zeta(E,X);
+// G=Conv2DBackpropInput(F,B,E)
+// This is a case of node rewrite followed by node merge followed by connecting
+// filter output of Conv2DWithBias to filter input of Conv2DBackpropInput.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'X' op: 'Input'}"
+      "node { name: 'Y' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'X']}"
+      "node { name: 'F' op: 'Int32Input'}"
+      "node { name: 'G' op: 'Conv2DBackpropInput'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['F', 'B', 'E']}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['G', 'X']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2DWithBias);F(Int32Input);"
+            "G(_MklConv2DBackpropInput);X(Input);Y(Zeta);Z(Zeta)|"
+            "A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;"
+            "DMT/_1->E:4;DMT/_2->E:5;DMT/_3->G:3;E->G:2;E->Y;E:1->G:1;E:2->G:5;"
+            "E:3->G:4;F->G;F:control->DMT/_3:control;G->Z;X->Y:1;X->Z:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to rewriting node to Mkl node
+/////////////////////////////////////////////////////////////////////
+
+// Single Conv2D Op; No Mkl layer on the input and on the output.
+// We will generate dummy Mkl tensor as 2nd input of Conv2D.
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+// 2 Conv2D Ops in sequence. Both should get transformed and 1st Conv2D will
+// have 2 outputs, both of which will be inputs to next Conv2D.
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(_MklConv2D);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->C;A->D;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;"
+            "C:2->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2");
+}
+
+// Conv2D with INT32 which is not supported by Mkl
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) {
+  InitGraph(
+      "node { name: 'A' op: 'HalfInput'}"
+      "node { name: 'B' op: 'HalfInput'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_HALF } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_HALF } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(HalfInput);B(HalfInput);C(Conv2D);D(Zeta)|"
+            "A->C;B->C:1;B->D;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropFilter);"
+            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:4;DMT/_2->D:5");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropInput'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['B', 'A', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropInput);"
+            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
+            "A->D:1;A->E;B->D;B:control->DMT/_0:control;"
+            "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;"
+            "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Polygamma'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
+            "A->C;A->D:1;B->C:1;C->D;D->E");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'MatMul'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'transpose_a'      value { b: false } }"
+      " attr { key: 'transpose_b'      value { b: false } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
+            "A->C;A->D:1;B->C:1;C->D;D->E");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'C' op: '_MklConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'M', 'N']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Zeta);E(BiasAddGrad);"
+            "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;"
+            "M->C:2;N->C:3");
+}
+
+// Concat Op test: Concat with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['A', 'B:0', 'B:1']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(_MklConcat);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;"
+            "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Concat with 2 Mkl layers feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['G', 'E', 'F']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
+            "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;"
+            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
+            "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;"
+            "G:control->DMT/_4:control;H->I:1");
+}
+
+// Concat with 1 Mkl and 1 non-Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['G', 'E', 'F']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
+            "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
+            "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;"
+            "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1");
+}
+
+// ConcatV2 Op test: ConcatV2 with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['B:0', 'B:1', 'A']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(_MklConcatV2);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;B:1->D:1;"
+            "B:control->DMT/_0:control;B:control->DMT/_1:control;"
+            "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// ConcatV2 with 2 Mkl layers feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['E', 'F', 'G']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
+            "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;"
+            "C:control->DMT/_0:control;C:control->DMT/_1:control;"
+            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
+            "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;"
+            "F:2->H:4;G->H:2;H->I:1");
+}
+
+// ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['E', 'F', 'G']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
+            "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
+            "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;"
+            "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;"
+            "G->H:2;H->I:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Relu_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;"
+            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluReluGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;"
+            "DMT/_1->C:2");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklAvgPool);C(Zeta);DMT/_0(Const)|A->B;A->C;"
+            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Int32Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'AvgPoolGrad' "
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Int32Input);B(Input);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolAvgPoolGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'I' op: 'Int32Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'AvgPoolGrad' "
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['I', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklAvgPool);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const);I(Int32Input)|A->B;A->D;A:control->DMT/_0:control;"
+            "B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;DMT/_1->C:2;I->C;"
+            "I:control->DMT/_1:control");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNormGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNormGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
+            "F(_MklFusedBatchNormGrad);G(Zeta)|A->F;A->G;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
+            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
+            "E->F:4;F->G:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNorm'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
+            "F(_MklFusedBatchNorm);G(Zeta)|A->F;A->G;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
+            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
+            "E->F:4;F->G:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to rewriting node for workspace edges
+/////////////////////////////////////////////////////////////////////
+
+/* Test LRN->MaxPool->MaxPoolGrad->LRNGrad replacement by workspace nodes. */
+TEST_F(MklLayoutPassTest, MaxPoolLRN_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['B'] }"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['B', 'C', 'D'] }"
+      "node { name: 'F' op: 'Input'}"
+      "node { name: 'G' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['E', 'F', 'B'] }"
+      "node { name: 'H' op: 'Input'}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['H', 'G'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+      "A(Input);B(_MklLRN);C(_MklMaxPool);D(Input);DMT/_0(Const);DMT/_1(Const);"
+      "DMT/_2(Const);E(_MklMaxPoolGrad);F(Input);G(_MklLRNGrad);H(Input);"
+      "I(Zeta)|A->B;A:control->DMT/_0:control;B->C;B->E;B->G:2;B:1->G:3;"
+      "B:2->C:1;B:2->E:4;B:2->G:6;B:3->G:7;B:control->DMT/_1:control;C->E:1;"
+      "C:1->E:3;C:2->E:5;C:3->E:7;D->E:2;DMT/_0->B:1;DMT/_1->E:6;DMT/_2->G:5;"
+      "E->G;E:1->G:4;E:control->DMT/_2:control;F->G:1;G->I:1;H->I");
+}
+
+/* Test LRN->LRNGrad replacement by workspace nodes. */
+TEST_F(MklLayoutPassTest, LRN_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'D', 'B'] }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(_MklLRNGrad);F(Zeta)|"
+            "A->B;A:control->DMT/_0:control;B->E:2;B:1->E:3;B:2->E:6;B:3->E:7;"
+            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "D->E:1;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:5;E->F:1");
+}
+
+/* Test LRN->LRNGrad replacement when only one of them is present. */
+TEST_F(MklLayoutPassTest, LRN_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Zeta);DMT/_0(Const)|"
+            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+/* Test LRN->LRNGrad replacement when only one of them is present. */
+TEST_F(MklLayoutPassTest, LRN_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklLRNGrad);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
+}
+
+/* Test LRN->LRNGrad negative case, where single LRN feeds
+   2 LRNGrad nodes at different slots. */
+TEST_F(MklLayoutPassTest, LRN_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'D', 'B'] }"
+      "node { name: 'F' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'B', 'D'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['E', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);DMT/_5(Const);"
+            "DMT/_6(Const);E(_MklLRNGrad);F(_MklLRNGrad);G(Zeta)|A->B;"
+            "A:control->DMT/_0:control;B->E:2;"
+            "B->F:1;B:1->E:3;B:2->E:6;B:2->F:5;B:3->E:7;C->E;C->F;"
+            "C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "C:control->DMT/_3:control;C:control->DMT/_4:control;"
+            "C:control->DMT/_5:control;C:control->DMT/_6:control;"
+            "D->E:1;D->F:2;DMT/_0->B:1;DMT/_1->F:3;DMT/_2->F:7;DMT/_3->F:4;"
+            "DMT/_4->F:6;DMT/_5->E:4;DMT/_6->E:5;E->G;F->G:1");
+}
+
+/* Test MaxPool->MaxPoolGrad replacement by workspace+rewrite nodes. */
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['C', 'B', 'D'] }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklMaxPool);C(Input);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(_MklMaxPoolGrad);F(Zeta)|"
+            "A->B;A:control->DMT/_0:control;B->E:1;B:1->E:3;B:2->E:5;B:3->E:7;"
+            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "D->E:2;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:6;E->F:1");
+}
+
+// Test MaxPool>MaxPoolGrad replacement when only one of them is present.
+// In this case, we will rewrite MaxPool node but workspace edges will not
+// be present.
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklMaxPool);C(Zeta);DMT/_0(Const)|"
+            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+// Test MaxPoolGrad replacement when only one of them is present.
+// In this case, we will rewrite MaxPoolGrad and for workspace tensor and
+// its Mkl part, we will generate dummy tensor.
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklMaxPoolGrad);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
+}
+
+// Test MaxPool handling for batch-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative4) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative5) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:2, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative6) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:2, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative7) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative8) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative9) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:2} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative10) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+
+// Single Conv2D Op on GPU device
+// No rewrite should happen
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Conv2D);D(Zeta)|A->C;B->C:1;B->D;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'O' op: '_MklInput'}"
+      "node { name: 'D' op: '_MklConv2DWithBias'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
+      "node { name: 'E' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['D', 'A']}"
+      "node { name: 'F' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['E'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
+            "E(Zeta);F(BiasAddGrad);M(_MklInput);N(_MklInput);"
+            "O(_MklInput)|A->D;A->E:1;B->D:1;C->D:2;D->E;E->F;"
+            "M->D:3;N->D:4;O->D:5");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(Conv2DBackpropFilter);E(Zeta)|"
+            "A->D;A->E;B->D:1;C->D:2;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Relu_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Relu);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(ReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_MaxPool_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(AvgPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Concat Op test: Concat with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['A', 'B:0', 'B:1']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(Concat);E(Zeta)|A->D;"
+            "B->D:1;B:1->D:2;C->E;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['B:0', 'B:1', 'A']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(ConcatV2);E(Zeta)|"
+            "A->D:2;B->D;B:1->D:1;C->E;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNorm'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);E(Input);"
+            "F(FusedBatchNorm);G(Zeta)|A->F;A->G;B->F:1;C->F:2;D->F:3;"
+            "E->F:4;F->G:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'C' op: '_MklConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'M', 'N']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'Y' op: 'Input'}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'Y']}", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);"
+            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->C;"
+            "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+
+static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
+  testing::StopTiming();
+  string s;
+  for (int in = 0; in < 10; in++) {
+    s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
+  }
+  random::PhiloxRandom philox(301, 17);
+  random::SimplePhilox rnd(&philox);
+  for (int op = 0; op < op_nodes; op++) {
+    s += strings::Printf(
+        "node { name: 'op%04d' op: 'Zeta' attr { key: 'T' value { "
+        "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
+        op, rnd.Uniform(10), rnd.Uniform(10));
+  }
+
+  bool first = true;
+  while (iters > 0) {
+    Graph* graph = new Graph(OpRegistry::Global());
+    InitGraph(s, graph);
+    int N = graph->num_node_ids();
+    if (first) {
+      testing::SetLabel(strings::StrCat("Per graph node.  Nodes: ", N));
+      first = false;
+    }
+    {
+      testing::StartTiming();
+      std::unique_ptr<Graph> ug(graph);
+      RunMklLayoutRewritePass(&ug);
+      testing::StopTiming();
+    }
+    iters -= N;  // Our benchmark units are individual graph nodes,
+                 // not whole graphs
+    // delete graph;
+  }
+}
+BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
+
+}  // namespace
+
+#endif  // INTEL_MKL_DNN
+
 }  // namespace tensorflow
 
 #endif /* INTEL_MKL */
diff --git a/tensorflow/core/kernels/logging_ops.cc b/tensorflow/core/kernels/logging_ops.cc
index 67d603dd0a..bacf3e7740 100644
--- a/tensorflow/core/kernels/logging_ops.cc
+++ b/tensorflow/core/kernels/logging_ops.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <iostream>
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -76,7 +77,7 @@ class PrintOp : public OpKernel {
       strings::StrAppend(&msg, "[", ctx->input(i).SummarizeValue(summarize_),
                          "]");
     }
-    LOG(INFO) << msg;
+    std::cerr << msg << std::endl;
   }
 
  private:
diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc
index 935eb81dd0..9aabbbdb6b 100644
--- a/tensorflow/core/kernels/mkl_aggregate_ops.cc
+++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include <numeric>
-
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -29,10 +28,17 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-namespace tensorflow {
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::stream;
+using mkldnn::sum;
+#endif
 
+namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklAddNOp : public OpKernel {
  public:
@@ -41,17 +47,18 @@ class MklAddNOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const int num = ctx->num_inputs();
     OP_REQUIRES(ctx, num / 2 == 2,
-                errors::InvalidArgument("Only additions of two arguments "
+                errors::InvalidArgument("Only additions of two tensors "
                                         "supported by MKL. Num inputs: ",
                                         num));
 
     MklAddNOpContext mkl_context;
-    const Tensor& input0 = MklGetInput(ctx, 0);
-    GetMklShape(ctx, 0, &(mkl_context.input1_shape));
+    size_t src1_idx = 0, src2_idx = 1;
+    const Tensor& input0 = MklGetInput(ctx, src1_idx);
+    GetMklShape(ctx, src1_idx, &(mkl_context.input1_shape));
     bool input1_in_mkl_format = mkl_context.input1_shape.IsMklTensor();
 
-    const Tensor& input1 = MklGetInput(ctx, 1);
-    GetMklShape(ctx, 1, &(mkl_context.input2_shape));
+    const Tensor& input1 = MklGetInput(ctx, src2_idx);
+    GetMklShape(ctx, src2_idx, &(mkl_context.input2_shape));
     bool input2_in_mkl_format = mkl_context.input2_shape.IsMklTensor();
 
     // handle the case of a scalar
@@ -59,13 +66,12 @@ class MklAddNOp : public OpKernel {
       const TensorShape& o_shape = input0.shape();
       Tensor* out_tensor = nullptr;
       mkl_context.output_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(ctx, 0, &out_tensor, o_shape,
+      AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
                                 mkl_context.output_shape);
       float user_i1 = (input0.scalar<T>()());
-      ;
       float user_i2 = (input1.scalar<T>()());
-      ;
-      out_tensor->scalar<T>()() = std::plus<float>{}(user_i1, user_i2);
+      out_tensor->scalar<T>()() =
+          std::plus<float>{}(user_i1, user_i2);
       return;
     }
 
@@ -82,8 +88,8 @@ class MklAddNOp : public OpKernel {
       if (o_shape.num_elements() == 0) {
         Tensor* out_tensor = nullptr;
         mkl_context.output_shape.SetMklTensor(false);
-        AllocateOutputSetMklShape(ctx, 0, &out_tensor, o_shape,
-                                  mkl_context.output_shape);
+        AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
+                                 mkl_context.output_shape);
         return;
       }
     }
@@ -92,9 +98,9 @@ class MklAddNOp : public OpKernel {
     mkl_context.in_strides = new size_t[mkl_context.in_dims];
     // Generate size, stride for input if input is in MKL format.
     if (input1_in_mkl_format || input2_in_mkl_format) {
-      const MklShape* tmp_mkl_shape = (input1_in_mkl_format)
-                                          ? &mkl_context.input1_shape
-                                          : &mkl_context.input2_shape;
+      const MklShape* tmp_mkl_shape =
+        (input1_in_mkl_format) ? &mkl_context.input1_shape :
+        &mkl_context.input2_shape;
       for (int i = 0; i < mkl_context.in_dims; i++) {
         mkl_context.in_sizes[i] = tmp_mkl_shape->GetSizes()[i];
         mkl_context.in_strides[i] = tmp_mkl_shape->GetStrides()[i];
@@ -110,7 +116,6 @@ class MklAddNOp : public OpKernel {
             mkl_context.in_strides[i - 1] * mkl_context.in_sizes[i - 1];
       }
     }
-
     std::vector<float> coeff(2, 1.0);
     mkl_context.MklCreateInputLayouts(ctx);
     CHECK_EQ(dnnSumCreate_F32(&mkl_context.Eltwise, mkl_context.attributes, 2,
@@ -127,7 +132,7 @@ class MklAddNOp : public OpKernel {
      mkl_context.output_shape.SetMklLayout(mkl_context.Eltwise, dnnResourceDst);
 
      mkl_context.output_shape.SetTfLayout(
-         mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
+        mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
      if (input1_in_mkl_format == true) {
       mkl_context.output_shape.SetTfDimOrder(mkl_context.in_dims,
       mkl_context.input1_shape.GetTfToMklDimMap());
@@ -139,12 +144,12 @@ class MklAddNOp : public OpKernel {
                         mkl_context.output_shape.GetMklLayout())) /
                     sizeof(T));
 
-     AllocateOutputSetMklShape(ctx, 0, &output, tf_shape,
+     AllocateOutputSetMklShape(ctx, src1_idx, &output, tf_shape,
                               mkl_context.output_shape);
     } else {
      const TensorShape& o_shape = input1.shape();
      mkl_context.output_shape.SetMklTensor(false);
-     AllocateOutputSetMklShape(ctx, 0, &output, o_shape,
+     AllocateOutputSetMklShape(ctx, src1_idx, &output, o_shape,
                                 mkl_context.output_shape);
     }
 
@@ -172,16 +177,18 @@ class MklAddNOp : public OpKernel {
     void MklCreateInputLayouts(OpKernelContext* context) {
       bool input1_in_mkl_format = input1_shape.IsMklTensor();
       if (!input1_in_mkl_format) {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
+        CHECK_EQ(
+            dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
+            E_SUCCESS);
       } else {
         lt_input1 = static_cast<dnnLayout_t>(input1_shape.GetCurLayout());
       }
 
       bool input2_in_mkl_format = input2_shape.IsMklTensor();
       if (!input2_in_mkl_format) {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
+        CHECK_EQ(
+            dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
+            E_SUCCESS);
       } else {
         lt_input2 = static_cast<dnnLayout_t>(input2_shape.GetCurLayout());
       }
@@ -257,8 +264,8 @@ class MklAddNOp : public OpKernel {
       bool input2_in_mkl_format = input2_shape.IsMklTensor();
       dnnDelete_F32(Eltwise);
       if (!input1_in_mkl_format || !input2_in_mkl_format) {
-        delete[] in_sizes;
-        delete[] in_strides;
+         delete [] in_sizes;
+         delete [] in_strides;
       }
       if (!input1_in_mkl_format) {
          dnnLayoutDelete_F32(lt_input1);
@@ -270,6 +277,151 @@ class MklAddNOp : public OpKernel {
   } MklAddNOpContext;
 };
 
+#else  // INTEL_MKL_DNN
+template <typename Device, typename T>
+class MklAddNOp : public OpKernel {
+ public:
+  ~MklAddNOp() {}
+  explicit MklAddNOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const int num = ctx->num_inputs();
+    // Only additions of 2 input tensors is supported now
+    OP_REQUIRES(ctx, num / 2 == 2,
+                errors::InvalidArgument("Only additions of two tensors "
+                                        "supported by MKL. Num inputs: ",
+                                        num));
+
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      size_t src1_idx = 0, src2_idx = 1;
+      const Tensor& src1_tensor = MklGetInput(ctx, src1_idx);
+      const Tensor& src2_tensor = MklGetInput(ctx, src2_idx);
+
+      MklDnnShape src1_mkl_shape, src2_mkl_shape;
+      GetMklShape(ctx, src1_idx, &src1_mkl_shape);
+      GetMklShape(ctx, src2_idx, &src2_mkl_shape);
+      bool input1_in_mkl_format = src1_mkl_shape.IsMklTensor();
+      bool input2_in_mkl_format = src2_mkl_shape.IsMklTensor();
+      int src1_dims_size = input1_in_mkl_format?
+       src1_mkl_shape.GetDimension(): src1_tensor.dims();
+      int src2_dims_size = input2_in_mkl_format?
+       src2_mkl_shape.GetDimension(): src2_tensor.dims();
+
+      if (!input1_in_mkl_format && src1_dims_size == 0) {
+         Tensor* dst_tensor = nullptr;
+         MklShape mkl_shape_dst;
+         mkl_shape_dst.SetMklTensor(false);
+         AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor,
+         src1_tensor.shape(), mkl_shape_dst);
+         float user_i1 = (src1_tensor.scalar<T>()());
+         float user_i2 = (src2_tensor.scalar<T>()());
+         dst_tensor->scalar<T>()() =
+           std::plus<float>{}(user_i1, user_i2);
+         return;
+       }
+
+      // If there is nothing to compute, return.
+      if (!input1_in_mkl_format && !input2_in_mkl_format) {
+        if (src1_tensor.shape().num_elements() == 0) {
+           Tensor* dst_tensor = nullptr;
+           MklShape mkl_shape_dst;
+           mkl_shape_dst.SetMklTensor(false);
+           AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor,
+           src1_tensor.shape(), mkl_shape_dst);
+           return;
+        }
+      }
+
+      // element-wise add operator for tensor input1 and tensor input2
+      std::vector<double> coeff(2, 1.0);
+      MklDnnData<T> src1(&cpu_engine);
+      MklDnnData<T> src2(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      int tmp_size = input1_in_mkl_format ? src2_dims_size: src1_dims_size;
+      memory::dims dims(tmp_size);
+      memory::dims strides(tmp_size);
+      memory::desc md1({}, memory::data_undef, memory::format_undef);
+      memory::desc md2({}, memory::data_undef, memory::format_undef);
+
+      if ( input1_in_mkl_format || input2_in_mkl_format ) {
+        if ( input1_in_mkl_format ) {
+          md1 = src1_mkl_shape.GetMklLayout();
+          md2 = md1;
+          dst.SetUsrMem(md1);
+        } else {
+          md2 = src2_mkl_shape.GetMklLayout();
+          md1 = md2;
+          dst.SetUsrMem(md2);
+        }
+      } else {
+         dims = TFShapeToMklDnnDims(src1_tensor.shape());
+         strides = CalculateTFStrides(dims);
+         md1 = MklDnnData<T>::CreateBlockedMemDesc(dims, strides);
+         md2 = md1;
+         dst.SetUsrMem(dims, strides);
+      }
+
+      std::vector<memory::primitive_desc> srcs_pd;
+
+      src1.SetUsrMem(md1, &src1_tensor);
+      auto mpd1 = src1.GetUsrMemPrimDesc();
+      srcs_pd.push_back(mpd1);
+
+      src2.SetUsrMem(md2, &src2_tensor);
+      auto mpd2 = src2.GetUsrMemPrimDesc();
+      srcs_pd.push_back(mpd2);
+
+      std::vector<primitive::at> inputs;
+      inputs.push_back(src1.GetOpMem());
+      inputs.push_back(src2.GetOpMem());
+      auto output_pd = dst.GetUsrMemPrimDesc();
+      Tensor* dst_tensor = nullptr;
+      auto sum_pd = sum::primitive_desc(dst.GetUsrMemDesc(), coeff, srcs_pd);
+      auto sum_op = sum(sum_pd, inputs, dst.GetOpMem());
+      if ( input2_in_mkl_format || input1_in_mkl_format ) {
+         MklDnnShape output_mkl_shape;
+         output_mkl_shape.SetMklTensor(true);
+         output_mkl_shape.SetMklLayout(&output_pd);
+         output_mkl_shape.SetElemType(MklDnnType<T>());
+         if ( input1_in_mkl_format ) {
+          output_mkl_shape.SetTfLayout(src1_dims_size,
+          src1_mkl_shape.GetSizesAsMklDnnDims(),
+          src1_mkl_shape.GetTfDataFormat());
+         } else {
+          output_mkl_shape.SetTfLayout(src2_dims_size,
+          src2_mkl_shape.GetSizesAsMklDnnDims(),
+          src2_mkl_shape.GetTfDataFormat());
+         }
+         TensorShape output_tf_shape;
+         output_tf_shape.AddDim((output_pd.get_size() / sizeof(T))
+         + (output_pd.get_size()%sizeof(T) == 0 ? 0 : 1));
+         AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor, output_tf_shape,
+                                output_mkl_shape);
+      } else {
+         MklShape mkl_shape_dst;
+         mkl_shape_dst.SetMklTensor(false);
+         AllocateOutputSetMklShape(ctx, src1_idx,
+         &dst_tensor, src1_tensor.shape(), mkl_shape_dst);
+      }
+
+      dst.SetUsrMemDataHandle(dst_tensor);
+      std::vector<primitive> net;
+      net.push_back(sum_op);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(ctx, errors::Aborted("Operation received an exception:",
+                                            error_msg));
+    }
+  }
+};
+
+#endif
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklAddN")                          \
                               .Device(DEVICE_CPU)                   \
diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc
index d90baee069..d751a70fc8 100644
--- a/tensorflow/core/kernels/mkl_avgpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc
@@ -24,10 +24,25 @@
 
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::error;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::padding_kind;
+using mkldnn::engine;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// For now, MKL-ML is default. So making MKL-DNN not a default choice.
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklAvgPoolingOp : public OpKernel {
  public:
@@ -132,7 +147,7 @@ class MklAvgPoolingOp : public OpKernel {
         E_SUCCESS);
 
     mkl_context.MklCleanup();
-  }
+  }  // Compute
 
  private:
   typedef struct {
@@ -411,7 +426,293 @@ class MklAvgPoolingGradOp : public OpKernel {
   std::vector<int32> stride_;
   Padding padding_;
   TensorFormat data_format_;
-};
+};  // MklAvgPoolingGradOp
+
+
+#else  // INTEL_MKL_DNN is defined
+
+template <typename Device, typename T>
+class MklAvgPoolingOp : public MklPoolingForwardOpBase<T> {
+ public:
+  explicit MklAvgPoolingOp(OpKernelConstruction* context)
+  : MklPoolingForwardOpBase<T>(context) {
+    // Workspace is an MKLDNN construct that is only used in Max Pooling.
+    // So set workspace_enabled_ to false.
+    this->workspace_enabled_ = false;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const Tensor& input_tensor = MklGetInput(context,
+              this->kInputTensorIndexInput);
+      MklDnnShape dnn_shape_input;
+      GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input);
+      this->SanityCheckInput(context, input_tensor, dnn_shape_input);
+      if (!context->status().ok()) return;
+
+      MklDnnData<T> dnn_data_input(&cpu_engine);
+      MklDnnData<T> dnn_data_output(&cpu_engine);
+
+      // initialize variables for the pooling op
+      MklPoolParameters pool_params;
+      // Get the input tensor and initialize the pooling parameters
+      this->ConfigureInput(context, dnn_shape_input,
+                          input_tensor, &pool_params,
+                          &dnn_data_input);
+      OP_REQUIRES_OK(context, context->status());
+
+      // Declare output tensor
+      Tensor* output_tensor = nullptr;
+      memory::dims output_dims_mkl_order;
+      this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+      // If input is in Mkl layout, then just get the memory format from it
+      // directly, instead of using input data_format to AvgPool.
+      if (dnn_shape_input.IsMklTensor()) {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                static_cast<memory::format>(dnn_data_input.GetUsrMemDesc()
+                    .data.format));
+
+      } else {
+          dnn_data_output.SetUsrMem(output_dims_mkl_order,
+              this->data_format_mkldnn_);
+      }
+
+        // describe the memory layout
+      dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
+
+      // 3. create a pooling primitive descriptor
+      auto pool_desc = pooling_forward::desc(prop_kind::forward,
+              algorithm::pooling_avg_exclude_padding,
+              dnn_data_input.GetUsrMemDesc(),
+              dnn_data_output.GetUsrMemDesc(),
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_prim_desc = pooling_forward::primitive_desc(pool_desc,
+                                                 cpu_engine);
+
+      this->AllocateOutputTensor(context, pool_prim_desc, output_dims_mkl_order,
+                            this->data_format_mkldnn_, &output_tensor);
+      CHECK_NOTNULL(output_tensor);
+
+      OP_REQUIRES_OK(context, context->status());
+      dnn_data_output.SetUsrMemDataHandle(output_tensor);
+
+      this->PrepareAndExecuteNet(pool_prim_desc,
+                                &dnn_data_input,
+                                &dnn_data_output);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Operation received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+};  // MklAvgPoolingOp
+
+//-----------------------------------------------------------------------------
+
+template <class Device, class T>
+class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase<T> {
+ public:
+  explicit MklAvgPoolingGradOp(OpKernelConstruction* context)
+      : MklPoolingBackwardOpBase<T>(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnShape original_input_mkl_shape, input_gradient_mkl_shape;
+      const Tensor& tensor_in_shape = MklGetInput(context,
+          kInputTensorIndexInputShape);
+      const Tensor& input_gradient_tensor = MklGetInput(context,
+          kInputTensorIndexInputGradient);
+      GetMklShape(context, kInputTensorIndexInputShape,
+            &original_input_mkl_shape);
+      GetMklShape(context, kInputTensorIndexInputGradient,
+            &input_gradient_mkl_shape);
+
+
+      SanityCheckInputs(context, tensor_in_shape,
+                        input_gradient_tensor,
+                        original_input_mkl_shape,
+                        input_gradient_mkl_shape);
+      if (!context->status().ok()) return;
+
+      // Used to allocate output_diff_src/diff_src
+      // and create pool_fwd mdm desc
+      // 0. Input("orig_input_shape: int32") //NOT a T Tensor!
+      // 1. Input("grad: T")
+
+      MklDnnData<T> input_gradient_diff_dst(&cpu_engine);
+      MklDnnData<T> output_diff_src(&cpu_engine);
+      Tensor* output_tensor_diff_src = nullptr;
+      TensorShape original_input_shape;
+      MklPoolParameters pool_params;
+      memory::dims output_dims_mkl_order, original_input_dims_nchw;
+      // Configure the original input memory descriptor
+      memory::desc original_input_md = ConfigureOriginalInput(context,
+                                      tensor_in_shape,
+                                      original_input_mkl_shape,
+                                      &original_input_dims_nchw,
+                                      &pool_params,
+                                      &original_input_shape);
+
+      // configure the original output memory descriptor
+      // by definition, the shape of the original output is the same
+      // as the shape of the gradient diff_dst
+      memory::desc original_output_md = this->ConfigureOriginalOutput(
+                pool_params, input_gradient_mkl_shape, output_dims_mkl_order);
+
+      memory::desc target_diff_dst_md = this->ConfigureInputGradient(
+                                    input_gradient_mkl_shape,
+                                    input_gradient_tensor,
+                                    &input_gradient_diff_dst,
+                                    original_output_md);
+      // The shape of the output diff src needs to be the same shape as the
+      // original input. But we will set its format to be same as the format of
+      // input gradient. We won't use format of original input since it will
+      // always be in Tensorflow layout (given that AvgPoolGrad gets shape of
+      // the input rather than actual input).
+      output_diff_src.SetUsrMem(original_input_dims_nchw,
+                                static_cast<memory::format>(
+                                  target_diff_dst_md.data.format));
+
+      // Create the forward pooling primitive descriptor so we can reference it
+      // in the backward pooling primitive descriptor
+      auto pool_fwd_desc = pooling_forward::desc(prop_kind::forward,
+              algorithm::pooling_avg_exclude_padding,
+              original_input_md,
+              original_output_md,
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_fwd_prim_desc
+              = pooling_forward::primitive_desc(pool_fwd_desc,
+                                                  cpu_engine);
+
+      auto pool_bkwd_desc = pooling_backward::desc(
+              algorithm::pooling_avg_exclude_padding,
+              output_diff_src.GetUsrMemDesc(),
+              target_diff_dst_md,
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_bkwd_prim_desc
+                = pooling_backward::primitive_desc(pool_bkwd_desc,
+                                              cpu_engine,
+                                              pool_fwd_prim_desc);
+      this->AllocateOutputTensor(context, pool_bkwd_prim_desc,
+                      original_input_dims_nchw,
+                      this->data_format_mkldnn_,
+                      &output_tensor_diff_src);
+
+      output_diff_src.SetUsrMemDataHandle(output_tensor_diff_src);
+
+      this->PrepareAndExecuteNet(pool_bkwd_prim_desc,
+                          &input_gradient_diff_dst,
+                          &output_diff_src,
+                          memory::primitive_desc(
+                              target_diff_dst_md,
+                              cpu_engine));
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                      ", message: " + string(e.message) +
+                      ", in file " + string(__FILE__) + ":" +
+                      std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                      errors::Aborted("Compute received an exception:",
+                                      error_msg));
+    }
+  }  // Compute
+
+ private:
+  // 0. Input("orig_input_shape: int32")
+  // 1. Input("grad: T")
+  const int kInputTensorIndexInputShape = 0;
+  const int kInputTensorIndexInputGradient = 1;
+
+  memory::desc ConfigureOriginalInput(OpKernelContext* context,
+        const Tensor& tensor_original_input_shape,
+        const MklDnnShape& original_input_mkl_shape,
+        memory::dims* original_input_dims_mkl_order,
+        MklPoolParameters* pool_params,
+        TensorShape* input_tensor_shape) {
+    CHECK_NOTNULL(original_input_dims_mkl_order);
+    CHECK_NOTNULL(pool_params);
+    CHECK_NOTNULL(input_tensor_shape);
+    // For AvgPoolGrad, we only get the size of the original input because
+    // The original data is irrelvant.
+    auto shape_vec = tensor_original_input_shape.vec<int32>();
+    for (int64 i = 0; i < tensor_original_input_shape.NumElements(); ++i) {
+      input_tensor_shape->AddDim(shape_vec(i));
+    }
+
+    return MklPoolingBackwardOpBase<T>::ConfigureOriginalInput(
+                                              context,
+                                              tensor_original_input_shape,
+                                              original_input_mkl_shape,
+                                              original_input_dims_mkl_order,
+                                              pool_params,
+                                              *input_tensor_shape);
+}
+
+  void SanityCheckInputs(OpKernelContext* context,
+                        const Tensor& tensor_in_shape,
+                        const Tensor& input_gradient_tensor,
+                        const MklDnnShape& original_input_mkl_shape,
+                        const MklDnnShape& input_gradient_mkl_shape) {
+    if (!original_input_mkl_shape.IsMklTensor()) {
+      OP_REQUIRES(context, tensor_in_shape.dims() == 1 &&
+          tensor_in_shape.NumElements() == 4,
+          errors::InvalidArgument("original input shape must be "
+                "1-dimensional and 4 elements"));
+    } else {
+      OP_REQUIRES(context, original_input_mkl_shape.GetDimension() == 1 &&
+          original_input_mkl_shape.DimSize(0) == 4,
+          errors::InvalidArgument("original input shape must be "
+                "1-dimensional and 4 elements"));
+    }
+
+    if (!input_gradient_mkl_shape.IsMklTensor()) {
+      // For avgpooling, input_gradient_diff_dst should have 4 dimensions.
+      OP_REQUIRES(context, input_gradient_tensor.dims() == 4,
+          errors::InvalidArgument("Gradient shape must be "
+                              "4-dimensional"));
+    } else {
+      OP_REQUIRES(context, input_gradient_mkl_shape.GetDimension() == 4,
+          errors::InvalidArgument("Gradient shape must be "
+                              "4-dimensional"));
+    }
+  }
+};  // MklAvgPoolingGradOp
+
+
+
+#endif  // INTEL_MKL_DNN
 
 REGISTER_KERNEL_BUILDER(Name("_MklAvgPool")
                             .Device(DEVICE_CPU)
@@ -427,3 +728,4 @@ REGISTER_KERNEL_BUILDER(Name("_MklAvgPoolGrad")
 
 }  // namespace tensorflow
 #endif  // INTEL_MKL
+
diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
index e6673b2ffb..d0175dfd71 100644
--- a/tensorflow/core/kernels/mkl_concat_op.cc
+++ b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -1,11 +1,8 @@
 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -33,11 +30,22 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::concat;
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// List of TensorShape objects. Used in Concat/Split layers.
+typedef std::vector<TensorShape> TensorShapeList;
+
 enum AxisArgumentName { NAME_IS_AXIS, NAME_IS_CONCAT_DIM };
 
+
 // TODO(intelft) Check if we can reuse existing EigenConcatOp using Mutable
 // reference inputs.
 // --------------------------------------------------------------------------
@@ -55,6 +63,8 @@ class EigenConcatBaseOp : public OpKernel {
   // we need to have empty Compute because Compute is pure virtual function.
   void Compute(OpKernelContext* c) {}
 
+#ifndef INTEL_MKL_DNN
+
   void Compute(OpKernelContext* c, const std::vector<Tensor>& values) {
     const Tensor* concat_dim_tensor;
     const char* axis_attribute_name =
@@ -139,8 +149,89 @@ class EigenConcatBaseOp : public OpKernel {
       ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
     }
   }
+
+#else  // MKL_DNN
+
+void Compute(OpKernelContext* c, const std::vector<Tensor>& values,
+                        const TensorShapeList& input_shapes) {
+    const Tensor* concat_dim_tensor;
+    const char* axis_attribute_name =
+        AxisArgName == NAME_IS_AXIS
+            ? "axis"
+            : AxisArgName == NAME_IS_CONCAT_DIM ? "concat_dim" : "<invalid>";
+    OP_REQUIRES_OK(c, c->input(axis_attribute_name, &concat_dim_tensor));
+    OP_REQUIRES(c, IsLegacyScalar(concat_dim_tensor->shape()),
+                errors::InvalidArgument(
+                    axis_attribute_name,
+                    " tensor should be a scalar integer, but got shape ",
+                    concat_dim_tensor->shape().DebugString()));
+    const int32 concat_dim =
+        internal::SubtleMustCopy(concat_dim_tensor->scalar<int32>()());
+    // Instead of accessing values from context, we use input to Compute.
+    const int N = values.size();
+    const int input_dims = input_shapes[0].dims();
+    const TensorShape& input_shape = input_shapes[0];
+
+    int32 axis = concat_dim < 0 ? concat_dim + input_dims : concat_dim;
+    OP_REQUIRES(c,
+                (0 <= axis && axis < input_dims) ||
+                    (allow_legacy_scalars() && concat_dim == 0),
+                errors::InvalidArgument(
+                    "ConcatOp : Expected concatenating dimensions in the range "
+                    "[",
+                    -input_dims, ", ", input_dims, "), but got ", concat_dim));
+    // Note that we reduce the concat of n-dimensional tensors into a two
+    // dimensional concat. Assuming the dimensions of any input/output
+    // tensor are {x0, x1,...,xn-1, y0, y1,...,ym-1}, where the concat is along
+    // the dimension indicated with size y0, we flatten it to {x, y}, where y =
+    // Prod_i(yi) and x = ((n > 0) ? Prod_i(xi) : 1).
+    ConstMatrixVector inputs_flat;
+    inputs_flat.reserve(N);
+    int64 inputs_flat_dim0 = 1;
+    for (int d = 0; d < axis; ++d) {
+      inputs_flat_dim0 *= input_shape.dim_size(d);
+    }
+    int64 output_concat_dim = 0;
+    const bool input_is_scalar = IsLegacyScalar(input_shape);
+    for (int i = 0; i < N; ++i) {
+      const auto in = values[i];
+      const bool in_is_scalar = IsLegacyScalar(input_shapes[i]);
+      OP_REQUIRES(
+          c, (input_shapes[i].dims() == input_dims) ||
+              (input_is_scalar && in_is_scalar),
+          errors::InvalidArgument(
+              "ConcatOp : Ranks of all input tensors should match: shape[0] = ",
+              input_shape.DebugString(), " vs. shape[", i,
+              "] = ", input_shapes[i].DebugString()));
+      if (in.NumElements() > 0) {
+        int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0;
+        inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
+            in.shaped<T, 2>({inputs_flat_dim0, inputs_flat_dim1})));
+      }
+      output_concat_dim += input_shapes[i].dims() > 0 ?
+                           input_shapes[i].dim_size(axis) : 1;
+    }
+
+    TensorShape output_shape(input_shape);
+    if (output_shape.dims() == 0) {
+      output_shape.AddDim(output_concat_dim);
+    } else {
+      output_shape.set_dim(axis, output_concat_dim);
+    }
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(c, c->allocate_output(0, output_shape, &output));
+    if (output->NumElements() > 0) {
+      int64 output_dim1 = output->NumElements() / inputs_flat_dim0;
+      auto output_flat = output->shaped<T, 2>({inputs_flat_dim0, output_dim1});
+      ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
+    }
+  }
+
+#endif
 };
 
+#ifndef INTEL_MKL_DNN
+
 // --------------------------------------------------------------------------
 //                      Mkl Concat Op
 // --------------------------------------------------------------------------
@@ -327,6 +418,7 @@ class MklConcatOp : public OpKernel {
     OP_REQUIRES_OK(context, context->status());
   }
 
+
  private:
   typedef struct {
     TensorFormat data_format;
@@ -435,8 +527,284 @@ class MklConcatOp : public OpKernel {
         mkl_tensor->flat<uint8>().data(),
         mkl_tensor->flat<uint8>().size() * sizeof(uint8));
   }
+
+  // overloading methods with input shapes as a list of TensorShape's
+  void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
+                        const TensorShapeList& input_shapes) {
+    CHECK_EQ(values.size(), input_shapes.size());
+
+    std::vector<Tensor> converted_values;
+    for (int i = 0; i < input_shapes.size(); i++) {
+      converted_values.push_back(values[i]);
+    }
+
+    // Call Eigen concat.
+    eigen_concat_op_.Compute(context, converted_values);
+
+    // Set dummy Mkl tensor as output Mkl tensor for this op.
+    MklShape mkl_tensor_mkl_shape;
+    mkl_tensor_mkl_shape.SetMklTensor(false);
+    mkl_tensor_mkl_shape.SetDimensions(4);
+    Tensor* mkl_tensor = nullptr;
+    TensorShape mkl_tensor_tf_shape;
+    mkl_tensor_tf_shape.AddDim(
+        SIZE_OF_MKL_SERIAL_DATA(mkl_tensor_mkl_shape.GetDimension()));
+    int tf_output_index = 0;
+    context->allocate_output(
+        GetTensorMetaDataIndex(tf_output_index, context->num_outputs()),
+        mkl_tensor_tf_shape, &mkl_tensor);
+    mkl_tensor_mkl_shape.SerializeMklShape(
+        mkl_tensor->flat<uint8>().data(),
+        mkl_tensor->flat<uint8>().size() * sizeof(uint8));
+  }
 };
 
+#else
+
+// --------------------------------------------------------------------------
+//                      Mkl Concat Op
+// --------------------------------------------------------------------------
+
+template <typename Device, typename T, AxisArgumentName AxisArgName>
+class MklConcatOp : public OpKernel {
+ private:
+  TensorFormat data_format_;
+  EigenConcatBaseOp<Device, T, AxisArgName> eigen_concat_op_;
+
+ public:
+  typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>
+      ConstMatrixVector;
+
+  explicit MklConcatOp(OpKernelConstruction* c)
+      : OpKernel(c), eigen_concat_op_(c) {}
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      OpInputList input_tensors;
+      GetMklInputList(context, "values", &input_tensors);
+      const int N = input_tensors.size();
+
+      // Get Tensor shapes.
+      std::vector<MklDnnShape> input_shapes(N);
+      GetMklShapeList(context, "values", &input_shapes);
+
+      const Tensor& concat_dim_tensor = (AxisArgName == NAME_IS_CONCAT_DIM)
+                    ? MklGetInput(context, 0) : MklGetInput(context, N);
+      // Sanity checks
+      OP_REQUIRES(context, IsLegacyScalar(concat_dim_tensor.shape()),
+        errors::InvalidArgument(
+            "Concat dim tensor should be a scalar integer, but got shape ",
+            concat_dim_tensor.shape().DebugString()));
+      int32 concat_dim = internal::SubtleMustCopy(
+                           concat_dim_tensor.scalar<int32>()());
+      if (concat_dim < 0) concat_dim = N + concat_dim;
+
+      // check that ranks of all tensors match
+      // and that their shapes match except for concat_dim.
+      int i = 0;
+      bool invoke_eigen = false;
+      bool are_all_mkl_inputs = true, are_all_tf_inputs = true;
+      const TensorShape expected_shape = input_shapes[0].IsMklTensor() ?
+                                         input_shapes[0].GetTfShape() :
+                                         input_tensors[0].shape();
+      size_t expected_dims = expected_shape.dims();
+      for (auto& s : input_shapes) {
+        if (s == expected_shape) {++i; continue;}
+
+        TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() :
+                      input_tensors[i].shape();
+        size_t s_dims = s_shape.dims();
+
+        OP_REQUIRES(context, s_dims == expected_dims,
+                  errors::InvalidArgument(
+                      "_MklConcatOp : Ranks of all input tensors should match:"
+                      " input dimensions = ",
+                      s_dims, " vs. expected rank = ", expected_dims));
+
+        for (int d = 0; d < expected_dims; ++d) {
+          if (d == concat_dim) continue;
+
+          size_t expected_size = expected_shape.dim_size(d);
+          size_t s_size = s_shape.dim_size(d);
+          OP_REQUIRES(
+            context, expected_size == s_size,
+            errors::InvalidArgument("_MklConcatOp : Dimensions of inputs "
+                    "should match: shape[0][", d, "]= ", expected_size,
+                    " vs. shape[", i, "][", d, "] = ", s_size));
+        }
+
+        if (s.IsMklTensor())
+          are_all_tf_inputs = false;
+        else
+          are_all_mkl_inputs = false;
+
+        if (s_dims != 4) invoke_eigen = true;
+        ++i;
+      }
+
+      // All inputs are not in one format (TF or MKL). This is mixed input case.
+      // We can potentially optimize this case by converting all TF inputs
+      // to Mkl format. But currently, we fall to Eigen for this case.
+      // It may be possible to convert inputs that in TF format to Mkl
+      // format and avoid calling eigen version.
+      if (!are_all_tf_inputs && !are_all_mkl_inputs) invoke_eigen = true;
+
+      // Temporary fallback to Eigen until MKLDNN Concat performance
+      // is improved. To be removed.
+      invoke_eigen = true;
+
+      // Call Eigen library
+      if (invoke_eigen) {
+        TensorShapeList tf_input_shapes;
+        i = 0;
+        for (auto& s : input_shapes) {
+          TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() :
+                                input_tensors[i].shape();
+          tf_input_shapes.push_back(s_shape);
+          ++i;
+        }
+        CallEigenVersion(context, input_tensors, tf_input_shapes);
+        return;
+      }
+
+      memory::dims dst_dims;
+      if (are_all_mkl_inputs)
+        dst_dims = TFShapeToMklDnnDims(input_shapes[0].GetTfShape());
+      else
+        // When all the inputs are in Tensorflow format, we don't know
+        // what is the input data format. In that case, we just use
+        // output format that is same as input formats.
+        dst_dims = TFShapeToMklDnnDims(input_tensors[0].shape());
+
+      std::vector<memory::primitive_desc> srcs_pd;
+      std::vector<MklDnnData<T>> srcs(N, MklDnnData<T>(&cpu_engine));
+      int64 dst_concat_dim_size = 0;
+      for (int k =0; k < N; k++) {
+        bool is_mkl_tensor = input_shapes[k].IsMklTensor();
+        memory::dims src_dims;
+
+        // Same comment as dst_dims for src_dims.
+        src_dims = (is_mkl_tensor) ?
+                   TFShapeToMklDnnDims(input_shapes[k].GetTfShape()) :
+                   TFShapeToMklDnnDims(input_tensors[k].shape());
+
+        dst_concat_dim_size += src_dims[concat_dim];
+        auto src_md = is_mkl_tensor ? input_shapes[k].GetMklLayout() :
+          // It does not matter what data format we use here (NHWC or NCHW).
+          // We just need to ensure that output of Concat uses same data format
+          // as input.
+                  memory::desc(src_dims, MklDnnType<T>(), memory::format::nhwc);
+
+        srcs[k].SetUsrMem(src_md, &input_tensors[k]);
+        auto src_mpd = srcs[k].GetUsrMemPrimDesc();
+        srcs_pd.push_back(src_mpd);
+      }
+      dst_dims[concat_dim] = dst_concat_dim_size;
+
+      MklDnnData<T> dst(&cpu_engine);
+      memory::desc dst_md({}, memory::data_undef, memory::format_undef);
+      memory::dims dst_dims_in_nchw;
+      if (are_all_mkl_inputs) {
+        // Since we are passing a specific format for destination,
+        // we need to have dst_dims in MklDnn order (NCHW).
+        auto orig_tf_format = input_shapes[0].GetTfDataFormat();
+        dst_dims_in_nchw = MklDnnDimsInNCHW(dst_dims,
+                               MklDnnDataFormatToTFDataFormat(orig_tf_format));
+        // We will set the output in the same format as input to avoid layout
+        // conversions.
+        // Currently we are setting dst format same as input format.
+        // See if we can make this choice in a better way.
+        dst_md = memory::desc(dst_dims_in_nchw, MklDnnType<T>(),
+                 (memory::format) input_shapes[0].GetMklLayout().data.format);
+      } else {
+        // Again, format does not matter here. We just need to make it same as
+        // input format.
+        dst_md = memory::desc(dst_dims, MklDnnType<T>(), memory::format::nhwc);
+      }
+
+      std::vector<primitive::at> inputs;
+      for (int k=0; k < input_tensors.size(); k++)
+        inputs.push_back(srcs[k].GetOpMem());
+
+      // If all inputs are in MKL format, then meaning of concat_dim needs to
+      // change. Value of concat_dim is tied to input Tensorflow data format
+      // (NHWC or NCHW). MklDnn dimensions are in NCHW order. So if Tensorflow
+      // tensors are in NCHW order, then concat_dim semantics is preserved.
+      // But ifinput tensors are in NHWC order, then semantics need to change.
+      // E.g., if we are concatinating over Channel (dimension 3 for NHWC),
+      // then since MklDnn order is NCHW, concat_dim needs to be 1.
+      if (are_all_mkl_inputs)
+        concat_dim = input_shapes[0].TfDimIdx(concat_dim);
+
+      auto concat_pd = concat::primitive_desc(dst_md, concat_dim, srcs_pd);
+
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      Tensor* dst_tensor = nullptr;
+      if (are_all_mkl_inputs) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = concat_pd.dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dst_dims.size(), dst_dims_in_nchw,
+                                  input_shapes[0].GetTfDataFormat());
+        tf_shape_dst.AddDim((dst_pd.get_size() / sizeof(T)));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = MklDnnDimsToTFShape(dst_dims);
+      }
+      AllocateOutputSetMklShape(context, 0, &dst_tensor,
+                                tf_shape_dst, dnn_shape_dst);
+      CHECK_NOTNULL(dst_tensor);
+
+      dst_md = dnn_shape_dst.IsMklTensor() ?
+               dnn_shape_dst.GetMklLayout() : dst_md;
+      dst.SetUsrMem(dst_md, dst_tensor);
+
+      auto concat_op = concat(concat_pd, inputs, dst.GetOpMem());
+      std::vector<primitive> net;
+      net.push_back(concat_op);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+               ", message: " + string(e.message) + ", in file " +
+               string(__FILE__) + ":" + std::to_string(__LINE__);
+        OP_REQUIRES_OK(context, errors::Aborted(
+                "Operation received an exception:", error_msg));
+    }
+  }
+
+  void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
+                        const TensorShapeList& input_shapes) {
+    CHECK_EQ(values.size(), input_shapes.size());
+
+    std::vector<Tensor> converted_values;
+    for (int i = 0; i < input_shapes.size(); i++)
+      converted_values.push_back(values[i]);
+
+    // Call Eigen concat.
+    eigen_concat_op_.Compute(context, converted_values, input_shapes);
+
+    // Set output Mkl tensor for this op.
+    MklDnnShape dnn_shape_output;
+    dnn_shape_output.SetMklTensor(false);
+    dnn_shape_output.SetDimensions(4);
+    Tensor* output_tensor = nullptr;
+    TensorShape tf_shape_output;
+    tf_shape_output.AddDim(
+        dnn_shape_output.GetSerializeBufferSize());
+    context->allocate_output(
+        GetTensorMetaDataIndex(0, context->num_outputs()),
+        tf_shape_output, &output_tensor);
+    dnn_shape_output.SerializeMklDnnShape(
+        output_tensor->flat<uint8>().data(),
+        output_tensor->flat<uint8>().size() * sizeof(uint8));
+  }
+};
+
+#endif
+
 /* Use optimized concat for float type only */
 #define REGISTER_MKL_CPU(type)                                              \
   REGISTER_KERNEL_BUILDER(Name("_MklConcat")                                \
diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index f291281108..793fa24d99 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -47,11 +47,8 @@ limitations under the License.
 
 using mkldnn::stream;
 using mkldnn::prop_kind;
-
-using mkldnn::convolution_forward;
 using mkldnn::convolution_backward_weights;
-using mkldnn::convolution_direct;
-
+using mkldnn::memory;
 #endif
 
 namespace tensorflow {
@@ -426,183 +423,229 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
   TensorFormat data_format_;
 };
 
+#define REGISTER_MKL_FILTER_KERNELS(T)                              \
+  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
+TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
+#undef REGISTER_MKL_FILTER_KERNELS
+
 #else
 
-template <typename Device, class T>
-class MklConv2DCustomBackpropFilterOp : public OpKernel {
+template <typename Device, class T, bool biasEnabled>
+class MklConv2DCustomBackpropFilterOp :
+  public MklConv2DBackpropCommonOp<Device, T> {
  public:
   explicit MklConv2DCustomBackpropFilterOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string data_format;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
-    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
+      : MklConv2DBackpropCommonOp<Device, T>(context) { }
+  ~MklConv2DCustomBackpropFilterOp() {}
 
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    int stride_n = GetTensorDim(strides_, data_format_, 'N');
-    int stride_c = GetTensorDim(strides_, data_format_, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ private:
+  void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                         const MklDnnShape& filter_mkl_shape,
+                         const MklDnnShape& obp_mkl_shape) {
+    CHECK(!filter_mkl_shape.IsMklTensor())
+      << "Conv2DBackpropFilter: filter should not be in MKL Layout";
   }
 
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
+  size_t GetInputTensorIndexWithSizes() { return 1; /* filter index */ }
 
-      MklDnnData<T> input(&cpu_engine);
-      MklDnnData<T> outbackprop(&cpu_engine);
-      MklDnnData<T> output(&cpu_engine);
+  TensorShape MakeInputTfShape(OpKernelContext* context,
+                               const Tensor& input_tensor) {
+    size_t input_idx = 0;
+    return GetTfShape(context, input_idx);
+  }
 
-      // Input tensors
-      const Tensor& input_tensor = MklGetInput(context, 0);
-      const Tensor& filter_tensor = MklGetInput(context, 1);
-      const Tensor& obp_tensor = MklGetInput(context, 2);  // Outbackprop
+  TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                const Tensor& filter_tensor) {
+    TensorShape filter_tf_shape;
+    CHECK_EQ(TensorShapeUtils::IsVector(filter_tensor.shape()), true);
+    CHECK_EQ(TensorShapeUtils::MakeShape(
+             filter_tensor.vec<int32>(), &filter_tf_shape).ok(), true);
+    return filter_tf_shape;
+  }
 
-      // Generate input shapes.
-      TensorShape filter_shape;
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()),
-        errors::InvalidArgument(
-              "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ",
-              filter_tensor.dims()));
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                        filter_tensor.vec<int32>(), &filter_shape));
-      TensorShape input_shape = input_tensor.shape();
-      TensorShape obp_shape = obp_tensor.shape();
-
-      // By default, all dims are in MKL order. Only dims in TF order
-      // are those with prefix tf_order.
-      memory::dims obp_dims, fwd_input_dims, fwd_filter_dims;
-      memory::dims padding_l, padding_r, strides, fwd_output_dims;
-      memory::dims fwd_output_dims_tf_order;
-
-      // Get forward convolution parameters.
-      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
-      if (!context->status().ok()) return;
-
-      // Create Convolution forward descriptor since Convolution backward
-      // API needs it. For that, we first need to create input, filter
-      // and output memory descriptors.
-      auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                        memory::format::hwio);
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
-      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
-
-      // Allocate output tensor and shape
-      // TODO(nhasabni): Update this when support for MKL layout is added.
-      // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D.
-      TensorShape tf_output_shape(filter_shape);
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      Tensor* output_tensor = nullptr;
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
-
-      // Create memory for user data.
-      // Describe how the inputs and outputs of Convolution look like. Also
-      // specify buffers containing actual input and output data.
-      // Although input shape required is in MKL-DNN order, the layout is
-      // Tensorflow's layout (NHWC or NCHW depending on data format).
-      input.SetUsrMem(fwd_input_dims, mkl_data_format, &input_tensor);
-      // Outbackprop shape is NHWC or NCHW depending on data format. Since
-      // GetInputSizeInMklOrder function returns size in that order we just use
-      // use that function directly.
-      conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims);
-      if (!context->status().ok()) return;
-      outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor);
-      // Although output shape required is in MKL-DNN order,
-      // layout is Tensorflow's filter layout (HWIO)
-      // Shape of output of Conv2DBackpropInput is same as shape of filter.
-      memory::dims bwd_output_dims = fwd_filter_dims;
-      output.SetUsrMem(bwd_output_dims, memory::format::hwio, output_tensor);
-
-      // Create memory descriptors for convolution data w/ no specified format.
-      input.SetOpMemDesc(fwd_input_dims, memory::format::any);
-      outbackprop.SetOpMemDesc(obp_dims, memory::format::any);
-      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
-
-      // Create convolution backward weights primitive.
-      auto bwd_desc = convolution_backward_weights::desc(convolution_direct,
-                          input.GetOpMemDesc(), output.GetOpMemDesc(),
-                          outbackprop.GetOpMemDesc(), strides, padding_l,
-                          padding_r, TFPaddingToMklDnnPadding(padding_));
-
-      auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
-                                                              cpu_engine,
-                                                              fwd_pd);
-
-      PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) {
+    // Shape of output of Conv2DBackpropFilter is same as shape of filter.
+    return fwd_filter_dims;
+  }
+
+  memory::format GetOutputFormat(const memory::format data_format) {
+    // Output layout is Tensorflow's filter layout (HWIO).
+    return memory::format::hwio;
+  }
+
+  void CreatePrimitive(OpKernelContext* context,
+                       const engine& cpu_engine,
+                       const convolution_forward::primitive_desc& conv_fwd_pd,
+                       MklDnnData<T>* input, MklDnnData<T>* filter,
+                       MklDnnData<T>* outbackprop, MklDnnData<T>* output,
+                       Tensor** output_tensor,
+                       const memory::dims& strides,
+                       const memory::dims& padding_l,
+                       const memory::dims& padding_r,
+                       padding_kind padding,
+                       const memory::dims& bwd_output_dims,
+                       memory::format bwd_output_format) {
+    CHECK_NOTNULL(context);
+    CHECK_NOTNULL(input);
+    CHECK_NOTNULL(filter);
+    CHECK_NOTNULL(outbackprop);
+    CHECK_NOTNULL(output);
+    CHECK_NOTNULL(output_tensor);
+
+    MklDnnData<T>* bias_grad = nullptr;
+    int depth = 0;
+    if (biasEnabled) {
+      // Data structure for bias_grad
+      bias_grad = new MklDnnData<T> (&cpu_engine);
+      TensorShape obp_tf_shape = GetTfShape(context, 2);
+      depth = (MklConv2DBackpropCommonOp<Device, T>::GetTFDataFormat()
+                == FORMAT_NCHW) ?
+          obp_tf_shape.dim_size(1) : obp_tf_shape.dim_size(3);
+      memory::dims bias_grad_dims = {depth};
+      bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x);
+    }
+
+    // Create convolution backward weights primitive.
+    auto bwd_desc = (biasEnabled && (bias_grad != nullptr))?
+        convolution_backward_weights::desc(convolution_direct,
+                                input->GetOpMemDesc(), output->GetOpMemDesc(),
+                                bias_grad->GetOpMemDesc(),
+                                outbackprop->GetOpMemDesc(), strides, padding_l,
+                                padding_r, padding) :
+        convolution_backward_weights::desc(convolution_direct,
+                          input->GetOpMemDesc(), output->GetOpMemDesc(),
+                          outbackprop->GetOpMemDesc(), strides, padding_l,
+                          padding_r, padding);
+
+    auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
+                                                            cpu_engine,
+                                                            conv_fwd_pd);
+
+    // Allocate output tensor.
+    AllocateOutputTensor(context, bwd_pd, bwd_output_dims,
+                         bwd_output_format, output_tensor);
+
+    CHECK_NOTNULL(*output_tensor);
+    // Set buffer handle using allocated output tensor.
+    output->SetUsrMemDataHandle(*output_tensor);
+
+    if (biasEnabled && (bias_grad != nullptr)) {
+      // Allocate bias_grad tensor
+      TensorShape bias_grad_shape({depth});
+      Tensor* bias_grad_tensor = nullptr;
+      AllocateBiasGradTensor(context, bias_grad_shape, &bias_grad_tensor);
+      memory::dims bias_grad_dims = {depth};
+      // Since Bias is 1D, we use format::x from MKLDNN to represent it.
+      auto bias_grad_md = memory::desc({bias_grad_dims}, MklDnnType<T>(),
+                                       memory::format::x);
+      bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor);
+      bias_grad->SetUsrMemDataHandle(bias_grad_tensor);
+    }
+
+    if (biasEnabled && (bias_grad != nullptr)) {
+      PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, bias_grad);
+    } else {
+      PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output);
     }
   }
 
- private:
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_;
+  // Allocate output tensor.
+  void AllocateOutputTensor(OpKernelContext* context,
+                  const convolution_backward_weights::primitive_desc& conv_pd,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+
+      // For BackpropFilter, we convert the output tensor back in Tensorflow
+      // layout. Because typically, BackpropFilter is the last operator in the
+      // graph that emit filter gradient that is provided to ApplyGradient
+      // method to update the filter. But it may be possible to eliminate this
+      // by forwarding filter in MKL layout if we support ApplyGradient method
+      // for MKL layout propagation.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(false);
+      // output_dims_mkl_order is in OIHW format.
+      // Allocate shape of TF tensor in HWIO format.
+      TensorShape output_tf_shape({output_dims_mkl_order[MklDnnDims::Dim_H],
+                                   output_dims_mkl_order[MklDnnDims::Dim_W],
+                                   output_dims_mkl_order[MklDnnDims::Dim_I],
+                                   output_dims_mkl_order[MklDnnDims::Dim_O]});
+      AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
+                                output_mkl_shape);
+  }
+
+  // Allocate tensor for bias grad
+  void AllocateBiasGradTensor(OpKernelContext* context,
+                              const TensorShape& bias_grad_shape,
+                              Tensor** bias_grad_tensor) {
+    CHECK_NOTNULL(bias_grad_tensor);
+
+    MklDnnShape bias_grad_mkl_shape;
+    bias_grad_mkl_shape.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, 1, bias_grad_tensor, bias_grad_shape,
+                              bias_grad_mkl_shape);
+  }
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
                   const convolution_backward_weights::primitive_desc& conv_pd,
                   MklDnnData<T>* input, MklDnnData<T>* obp,
-                  MklDnnData<T>* output) {
+                  MklDnnData<T>* output, MklDnnData<T>* bias_grad = nullptr) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
     input->CheckReorderToOpMem(conv_pd.src_primitive_desc(), &net);
     obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
+    // For BackpropFilter, we convert the output tensor back in Tensorflow
+    // layout.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
                                       conv_pd.diff_weights_primitive_desc());
 
-    net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
-                                    obp->GetOpMem(), output->GetOpMem()));
+    if (biasEnabled && (bias_grad != nullptr)) {
+      net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                      obp->GetOpMem(), output->GetOpMem(),
+                                      bias_grad->GetOpMem()));
+    } else {
+      net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                      obp->GetOpMem(), output->GetOpMem()));
+    }
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
     if (output_reorder_required) {
       output->InsertReorderToUserMem(&net);
     }
 
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
-#endif
 
 #define REGISTER_MKL_FILTER_KERNELS(T)                              \
   REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
                               .Device(DEVICE_CPU)                   \
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T, false>);\
+  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilterWithBias")  \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T, true>); \
+  REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DBackpropFilterWithBias")  \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklDummyOp<CPUDevice, T>);
 
 TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
 #undef REGISTER_MKL_FILTER_KERNELS
+
+#endif  // INTEL_MKL_DNN
+
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index 4a47d0463e..df51df9638 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -49,9 +49,6 @@ limitations under the License.
 
 using mkldnn::stream;
 using mkldnn::prop_kind;
-
-using mkldnn::convolution_forward;
-using mkldnn::convolution_direct;
 using mkldnn::convolution_backward_data;
 #endif
 
@@ -362,143 +359,117 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 #else
 
 template <typename Device, class T>
-class MklConv2DCustomBackpropInputOp : public OpKernel {
+class MklConv2DCustomBackpropInputOp :
+  public MklConv2DBackpropCommonOp<Device, T> {
  public:
-  ~MklConv2DCustomBackpropInputOp() {}
   explicit MklConv2DCustomBackpropInputOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string data_format_str;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
-    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    int stride_n = GetTensorDim(strides_, data_format_, 'N');
-    int stride_c = GetTensorDim(strides_, data_format_, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
+      : MklConv2DBackpropCommonOp<Device, T>(context) { }
+  ~MklConv2DCustomBackpropInputOp() {}
 
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ private:
+  void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                         const MklDnnShape& filter_mkl_shape,
+                         const MklDnnShape& obp_mkl_shape) {
+    // Tensor that feeds to 'Input' slot of BackpropInput is always just a shape
+    // of the Tensor and never an actual tensor. So it will never be in MKL
+    // layout.
+    CHECK(!input_mkl_shape.IsMklTensor())
+      << "Conv2DBackpropInput: input should not be in MKL Layout";
   }
 
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
+  size_t GetInputTensorIndexWithSizes() { return 0; /* input index */ }
 
-      MklDnnData<T> filter(&cpu_engine);
-      MklDnnData<T> outbackprop(&cpu_engine);
-      MklDnnData<T> output(&cpu_engine);
+  TensorShape MakeInputTfShape(OpKernelContext* context,
+                               const Tensor& input_tensor) {
+    TensorShape input_tf_shape;
+    CHECK_EQ(TensorShapeUtils::IsVector(input_tensor.shape()), true);
+    CHECK_EQ(TensorShapeUtils::MakeShape(input_tensor.vec<int32>(),
+                                         &input_tf_shape).ok(), true);
+    return input_tf_shape;
+  }
 
-      // Input tensors
-      const Tensor& input_tensor = MklGetInput(context, 0);
-      const Tensor& filter_tensor = MklGetInput(context, 1);
-      const Tensor& obp_tensor = MklGetInput(context, 2);  // Outbackprop
+  TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                const Tensor& filter_tensor) {
+    size_t filter_idx = 1;
+    return GetTfShape(context, filter_idx);
+  }
 
-      // Generate input shape.
-      TensorShape input_shape;
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
-        errors::InvalidArgument(
-              "Conv2DBackpropInput: input_sizes input must be 1-dim, not ",
-              input_tensor.dims()));
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                        input_tensor.vec<int32>(), &input_shape));
-      TensorShape filter_shape = filter_tensor.shape();
-      TensorShape obp_shape = obp_tensor.shape();
-
-      // By default, all dims are in MKL order. Only dims in TF order
-      // are those with prefix tf_order.
-      memory::dims obp_dims, fwd_input_dims, fwd_filter_dims;
-      memory::dims padding_l, padding_r, strides, fwd_output_dims;
-      memory::dims fwd_output_dims_tf_order;
-
-      // Get forward convolution parameters.
-      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
-      if (!context->status().ok()) return;
-
-      // Create Convolution forward descriptor since Convolution backward
-      // API needs it. For that, we first need to create input, filter
-      // and output memory descriptors.
-      auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                        memory::format::hwio);
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
-      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
-
-      // Allocate output tensor and shape
-      // TODO(nhasabni): Update this when support for MKL layout is added.
-      // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D.
-      TensorShape tf_output_shape(input_shape);
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      Tensor* output_tensor = nullptr;
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
-
-      // Create memory for user data.
-      // Describe how the inputs and outputs of Convolution look like. Also
-      // specify buffers containing actual input and output data.
-      // Although input shape required is in MKL-DNN order, the layout is
-      // Tensorflow's layout (NHWC or NCHW depending on data format).
-      // Although filter shape (filter_dims) required is in MKL-DNN order,
-      // the layout is Tensorflow's layout (HWIO).
-      // Shape of Conv2DBackpropInput's filter is same as that of Conv2D filter.
-      filter.SetUsrMem(fwd_filter_dims, memory::format::hwio, &filter_tensor);
-      // Outbackprop shape is NHWC or NCHW depending on data format. Since
-      // GetInputSizeInMklOrder function returns size in that order we just use
-      // use that function directly.
-      conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims);
-      if (!context->status().ok()) return;
-      outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor);
-      // Although output shape required is in MKL-DNN order,
-      // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
-      // Shape of output of Conv2DBackpropInput is same as shape of 'input'
-      // of Conv2D.
-      memory::dims bwd_output_dims = fwd_input_dims;
-      output.SetUsrMem(bwd_output_dims, mkl_data_format, output_tensor);
-
-      // Create memory descriptors for convolution data w/ no specified format.
-      filter.SetOpMemDesc(fwd_filter_dims, memory::format::any);
-      outbackprop.SetOpMemDesc(obp_dims, memory::format::any);
-      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
-
-      // Create convolution backward data primitive.
-      auto bwd_desc = convolution_backward_data::desc(convolution_direct,
-                          output.GetOpMemDesc(), filter.GetOpMemDesc(),
-                          outbackprop.GetOpMemDesc(), strides, padding_l,
-                          padding_r, TFPaddingToMklDnnPadding(padding_));
-
-      auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
-                                                              cpu_engine,
-                                                              fwd_pd);
-
-      PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
-    }
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) {
+    // Output Shape of Conv2DBackpropInput is same as shape of Conv2D 'input'.
+    return fwd_input_dims;
   }
 
- private:
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_;
+  memory::format GetOutputFormat(const memory::format data_format) {
+    // Output layout is Tensorflow's layout in data format order.
+    return data_format;
+  }
+
+  void CreatePrimitive(OpKernelContext* context,
+                       const engine& cpu_engine,
+                       const convolution_forward::primitive_desc& conv_fwd_pd,
+                       MklDnnData<T>* input, MklDnnData<T>* filter,
+                       MklDnnData<T>* outbackprop, MklDnnData<T>* output,
+                       Tensor** output_tensor,
+                       const memory::dims& strides,
+                       const memory::dims& padding_l,
+                       const memory::dims& padding_r,
+                       padding_kind padding,
+                       const memory::dims& bwd_output_dims,
+                       memory::format bwd_output_format) {
+    CHECK_NOTNULL(context);
+    CHECK_NOTNULL(input);
+    CHECK_NOTNULL(filter);
+    CHECK_NOTNULL(outbackprop);
+    CHECK_NOTNULL(output);
+    CHECK_NOTNULL(output_tensor);
+
+    // Create convolution backward data primitive.
+    auto bwd_desc = convolution_backward_data::desc(convolution_direct,
+                      output->GetOpMemDesc(), filter->GetOpMemDesc(),
+                      outbackprop->GetOpMemDesc(), strides, padding_l,
+                      padding_r, padding);
+
+    auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
+                                                          cpu_engine,
+                                                          conv_fwd_pd);
+
+
+    // Allocate output tensor in TensorFlow and MKL layout.
+    AllocateOutputTensor(context, bwd_pd, bwd_output_dims,
+                         bwd_output_format, output_tensor);
+    CHECK_NOTNULL(*output_tensor);
+    // Set buffer handle using allocated output tensor.
+    output->SetUsrMemDataHandle(*output_tensor);
+
+    PrepareAndExecutePrimitive(bwd_pd, filter, outbackprop, output);
+  }
+
+  // Allocate output tensor.
+  void AllocateOutputTensor(OpKernelContext* context,
+                  const convolution_backward_data::primitive_desc& conv_pd,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+
+      // Output primitive descriptor for backward data is diff_src.
+      auto dst_pd = conv_pd.diff_src_primitive_desc();
+
+      // Allocate shape of Mkl tensor.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(true);
+      output_mkl_shape.SetMklLayout(&dst_pd);
+      output_mkl_shape.SetElemType(MklDnnType<T>());
+      output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                                   output_dims_mkl_order, output_tf_format);
+
+      // Allocate shape of TF tensor.
+      TensorShape output_tf_shape;
+      output_tf_shape.AddDim(dst_pd.get_size() / sizeof(T));
+
+      AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
+                                output_mkl_shape);
+  }
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
@@ -511,22 +482,9 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
     filter->CheckReorderToOpMem(conv_pd.weights_primitive_desc(), &net);
     obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
-    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_pd.diff_src_primitive_desc());
-
     net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(),
                                     filter->GetOpMem(), output->GetOpMem()));
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
-    if (output_reorder_required) {
-      output->InsertReorderToUserMem(&net);
-    }
-
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index a9872b8d6d..04268f23bb 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -40,8 +40,7 @@ limitations under the License.
 #include "tensorflow/core/util/tensor_format.h"
 
 #include "tensorflow/core/util/mkl_util.h"
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
+
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
@@ -51,6 +50,9 @@ using mkldnn::prop_kind;
 
 using mkldnn::convolution_forward;
 using mkldnn::convolution_direct;
+#else
+#include "mkl_dnn.h"
+#include "mkl_dnn_types.h"
 #endif
 
 namespace tensorflow {
@@ -288,10 +290,8 @@ class MklConv2DOp : public OpKernel {
     mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd,
                                              dnnResourceFilter);
 
-    size_t filter_sizes[4] = {static_cast<size_t>(filter.dim_size(0)),
-                              static_cast<size_t>(filter.dim_size(1)),
-                              static_cast<size_t>(filter.dim_size(2)),
-                              static_cast<size_t>(filter.dim_size(3))};
+    size_t filter_sizes[4] = {filter.dim_size(0), filter.dim_size(1),
+                              filter.dim_size(2), filter.dim_size(3)};
     mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes,
                                             mkl_context.filter_strides);
 
@@ -514,6 +514,12 @@ class MklConv2DOp : public OpKernel {
       const Tensor& src_tensor = MklGetInput(context, src_idx);
       const Tensor& filter_tensor = MklGetInput(context, filter_idx);
 
+      MklDnnShape src_mkl_shape, filter_mkl_shape;
+      GetMklShape(context, src_idx, &src_mkl_shape);
+      GetMklShape(context, filter_idx, &filter_mkl_shape);
+      CHECK(!filter_mkl_shape.IsMklTensor())
+        << "Conv2D filter should not be in MKL Layout";
+
       MklDnnData<T> src(&cpu_engine);
       MklDnnData<T> filter(&cpu_engine);
       MklDnnData<T> output(&cpu_engine);
@@ -523,8 +529,9 @@ class MklConv2DOp : public OpKernel {
 
       // Get shapes of input tensors in MKL-DNN order
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(),
-                                         filter_tensor.shape(),
+      auto src_tf_shape = GetTfShape(context, src_idx);
+      auto filter_tf_shape = GetTfShape(context, filter_idx);
+      conv_utl.GetConvFwdSizesInMklOrder(src_tf_shape, filter_tf_shape,
                                          &src_dims, &filter_dims, &strides,
                                          &output_dims_tf_order,
                                          &output_dims_mkl_order, &padding_l,
@@ -532,58 +539,47 @@ class MklConv2DOp : public OpKernel {
       if (!context->status().ok()) return;
 
       // Check for corner case - if there is nothing to compute, return.
-      TensorShape tf_output_shape({output_dims_tf_order[0],
-                                output_dims_tf_order[1],
-                                output_dims_tf_order[2],
-                                output_dims_tf_order[3]});
-      Tensor* output_tensor = nullptr;
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
+      TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order);
 
       // Forward filter in TF format from input at index 1 to output at index 1.
       ForwardTfTensorInToOut(context, 1, 1);
 
-      if (tf_output_shape.num_elements() == 0) {
+      // Corner cases: output with 0 elements and 0 batch size.
+      Tensor* output_tensor = nullptr;
+      if (output_tf_shape.num_elements() == 0 ||
+          output_dims_tf_order[0] == 0) {
         // TODO(jbobba): Verify correctness here
         //               Need semantics for Null MKL tensor
+        MklDnnShape output_mkl_shape;
+        output_mkl_shape.SetMklTensor(false);
+        AllocateOutputSetMklShape(context, 0, &output_tensor, src_tf_shape,
+                                output_mkl_shape);
         return;
       }
 
-      // Corner case to handle 0 batch size.
-      if (output_dims_tf_order[0] == 0) {
-        // Nothing to do, allocate output tensor and return
-        // TODO(nhasabni): remove this code later once serialization
-        // in MKL-DNN is supported.
-        AllocateOutputSetMklShape(context, 0, &output_tensor,
-                                  src_tensor.shape(), mkl_output_mkl_shape);
-        return;
-      } else {
-        // Otherwise regular output tensor allocation
-        // Allocate output tensor.
-      }
-      CHECK_NOTNULL(output_tensor);
-
       // Create memory for user data.
       // Describe how the inputs and outputs of Convolution look like. Also
       // specify buffers containing actual input and output data.
-      // Although input shape (src_dims) required is in MKL-DNN order,
-      // the layout is Tensorflow's layout (NHWC or NCHW depending on data
-      // format).
-      src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_),
-                    const_cast<void*>(static_cast<const void*>(
-                    src_tensor.flat<T>().data())));
+      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
+      // If input is in MKL layout, then simply grab input layout; otherwise,
+      // construct input Tf layout. For TF layout, although input shape
+      // (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
+      // layout (NHWC or NCHW depending on data format).
+      auto src_md = src_mkl_shape.IsMklTensor()
+                    ? src_mkl_shape.GetMklLayout()
+                    : memory::desc(src_dims, MklDnnType<T>(), tf_fmt);
+      src.SetUsrMem(src_md, &src_tensor);
       // Although filter shape (filter_dims) required is in MKL-DNN order,
       // the layout is Tensorflow's layout (HWIO).
-      filter.SetUsrMem(filter_dims, memory::format::hwio,
-                       const_cast<void*>(static_cast<const void*>(
-                       filter_tensor.flat<T>().data())));
-      // Although output shape (output_dims) required is in MKL-DNN order,
-      // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
-      output.SetUsrMem(output_dims_mkl_order,
-                       TFDataFormatToMklDnnDataFormat(data_format_),
-                       output_tensor->flat<T>().data());
+      auto filter_md = filter_mkl_shape.IsMklTensor()
+                    ? filter_mkl_shape.GetMklLayout()
+          : memory::desc(filter_dims, MklDnnType<T>(), memory::format::hwio);
+      filter.SetUsrMem(filter_md, &filter_tensor);
+      // Set output shape (output_dims) required in MKL-DNN order.
+      // Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
+      // depending on data format). But later we propagate Mkl layout of the
+      // output to the next op directly.
+      output.SetUsrMem(output_dims_mkl_order, tf_fmt);
 
       // Create memory descriptors for convolution data w/ no specified format.
       src.SetOpMemDesc(src_dims, memory::format::any);
@@ -596,9 +592,7 @@ class MklConv2DOp : public OpKernel {
         memory::dims bias_size;
         conv_utl.GetBiasSizeInMklOrder(2 /* bias idx */, &bias_size);
         const Tensor& bias_tensor = MklGetInput(context, 2);
-        bias.SetUsrMem(bias_size, memory::format::x,
-                       const_cast<void*>(static_cast<const void*>(
-                       bias_tensor.flat<T>().data())));
+        bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
         bias.SetOpMemDesc(bias_size, memory::format::any);
 
         // Create convolution primitive with Bias.
@@ -609,6 +603,10 @@ class MklConv2DOp : public OpKernel {
 
         auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                 cpu_engine);
+        AllocateOutputTensor(context, conv_prim_desc,
+                             output_dims_mkl_order, tf_fmt, &output_tensor);
+        // Set data handle for output.
+        output.SetUsrMemDataHandle(output_tensor);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output);
       } else {
         // Create convolution primitive without Bias.
@@ -619,6 +617,10 @@ class MklConv2DOp : public OpKernel {
 
         auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                 cpu_engine);
+        AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order,
+                             tf_fmt, &output_tensor);
+        // Set data handle for output.
+        output.SetUsrMemDataHandle(output_tensor);
         PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output);
       }
     } catch (mkldnn::error &e) {
@@ -636,23 +638,44 @@ class MklConv2DOp : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
 
+  // Allocate output tensor.
+  void AllocateOutputTensor(
+                  OpKernelContext* context,
+                  const convolution_forward::primitive_desc& conv_prim_desc,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+      auto dst_pd = conv_prim_desc.dst_primitive_desc();
+
+      // Allocate shape of Mkl tensor.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(true);
+      output_mkl_shape.SetMklLayout(&dst_pd);
+      output_mkl_shape.SetElemType(MklDnnType<T>());
+      output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                                   output_dims_mkl_order, output_tf_format);
+
+      // Allocate shape of TF tensor.
+      TensorShape output_tf_shape;
+      output_tf_shape.AddDim((dst_pd.get_size() / sizeof(T)));
+
+      const int kOutputSlotIdx = 0;
+      AllocateOutputSetMklShape(context, kOutputSlotIdx, output_tensor,
+                                output_tf_shape, output_mkl_shape);
+  }
+
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecuteNet(
                   const convolution_forward::primitive_desc& conv_prim_desc,
                   MklDnnData<T>* src, MklDnnData<T>* filter,
                   MklDnnData<T>* bias, MklDnnData<T>* output) {
     // Create reorders between user layout and MKL layout if it is needed and
-    // add it to the net before convolution.
+    // add it to the net before convolution. No need to check for output
+    // reorder as we propagate output layout to the next layer.
     std::vector<primitive> net;
     src->CheckReorderToOpMem(conv_prim_desc.src_primitive_desc(), &net);
     filter->CheckReorderToOpMem(conv_prim_desc.weights_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
-    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_prim_desc.dst_primitive_desc());
-
     // Create convolution primitive and add it to net.
     if (bias) {
       CHECK_EQ(biasEnabled, true);
@@ -665,13 +688,6 @@ class MklConv2DOp : public OpKernel {
                                     filter->GetOpMem(), output->GetOpMem()));
     }
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
-    if (output_reorder_required) {
-      output->InsertReorderToUserMem(&net);
-    }
-
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
@@ -688,7 +704,12 @@ class MklConv2DOp : public OpKernel {
                               .Device(DEVICE_CPU)                   \
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DOp<CPUDevice, T, true>);
+                          MklConv2DOp<CPUDevice, T, true>);         \
+  REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DWithBias")          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklDummyOp<CPUDevice, T>);
 
 TF_CALL_float(REGISTER_MKL_CPU);
 
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index f0cb37f8a4..47a9b4bfc7 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -41,6 +41,12 @@ limitations under the License.
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+
+using mkldnn::convolution_forward;
+using mkldnn::convolution_direct;
 #endif
 
 namespace tensorflow {
@@ -108,7 +114,13 @@ class MklDnnConvUtil {
   #undef CHECK_BOUNDS
 
     // MKL-DNN always requires input in NCHW format.
-    *input_dims = {input_batch, input_depth, input_rows, input_cols};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_N] = input_batch;
+    mkldnn_sizes[MklDnnDims::Dim_C] = input_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = input_rows;
+    mkldnn_sizes[MklDnnDims::Dim_W] = input_cols;
+
+    *input_dims = mkldnn_sizes;
   }
 
   // Calculate Convolution filter size in MKL-DNN order. MKL-DNN
@@ -156,7 +168,13 @@ class MklDnnConvUtil {
 
     // MKL-DNN always needs filter in OIHW format.
     // OIHW = (out_depth, in_depth, rows, cols)
-    *filter_dims = {out_depth, in_depth, filter_rows, filter_cols};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_O] = out_depth;
+    mkldnn_sizes[MklDnnDims::Dim_I] = in_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = filter_rows;
+    mkldnn_sizes[MklDnnDims::Dim_W] = filter_cols;
+
+    *filter_dims = mkldnn_sizes;
   }
 
   // Calculate Convolution filter size in MKL-DNN order. MKL-DNN
@@ -167,9 +185,9 @@ class MklDnnConvUtil {
   GetFilterSizeInMklOrder(size_t src_index, size_t filter_index,
                           memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
-    const Tensor& input = MklGetInput(context_, src_index);
-    const Tensor& filter = MklGetInput(context_, filter_index);
-    GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims);
+    GetFilterSizeInMklOrder(GetTfShape(context_, src_index),
+                            GetTfShape(context_, filter_index),
+                            filter_dims);
   }
 
   // Calculate Bias size for 2D Convolution. Function does not return
@@ -238,8 +256,12 @@ class MklDnnConvUtil {
     *output_dims_tf_order = TFShapeToMklDnnDims(out_shape);
 
     // MKL-DNN always needs output in NCHW format.
-    *output_dims_mkl_order = {out_batch, out_depth, static_cast<int>(out_rows),
-                   static_cast<int>(out_cols)};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_N] = out_batch;
+    mkldnn_sizes[MklDnnDims::Dim_C] = out_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = static_cast<int>(out_rows);
+    mkldnn_sizes[MklDnnDims::Dim_W] = static_cast<int>(out_cols);
+    *output_dims_mkl_order = mkldnn_sizes;
 
     // Now handle padding. MKL-DNN uses asymetric padding.
     *pad_l = {static_cast<int>(pad_top), static_cast<int>(pad_left)};
@@ -261,14 +283,14 @@ class MklDnnConvUtil {
     CHECK_NOTNULL(pad_l);
     CHECK_NOTNULL(pad_r);
 
-    const Tensor& input = MklGetInput(context_, src_index);
-    const Tensor& filter = MklGetInput(context_, filter_index);
+    auto input_tf_shape = GetTfShape(context_, src_index);
+    auto filter_tf_shape = GetTfShape(context_, filter_index);
 
-    OP_REQUIRES(context_, input.dims() == 4,
+    OP_REQUIRES(context_, input_tf_shape.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
-                                          input.shape().DebugString()));
+                                        input_tf_shape.DebugString()));
 
-    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(),
+    GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape,
                                   strides, output_dims_tf_order,
                                   output_dims_mkl_order, pad_l, pad_r);
   }
@@ -309,8 +331,231 @@ class MklDnnConvUtil {
   }
 };
 
+/////////////////////////////////////////////////////////////////////
+///  Common class that implements Conv2DBackpropFilter and Input
+/////////////////////////////////////////////////////////////////////
+
+template <typename Device, class T>
+class MklConv2DBackpropCommonOp :  public OpKernel {
+ public:
+  ~MklConv2DBackpropCommonOp() {}
+  explicit MklConv2DBackpropCommonOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string data_format_str;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
+    int stride_n = GetTensorDim(strides_, data_format_, 'N');
+    int stride_c = GetTensorDim(strides_, data_format_, 'C');
+    OP_REQUIRES(
+        context, (stride_n == 1 && stride_c == 1),
+        errors::InvalidArgument("Current implementation does not yet support "
+                                "strides in the batch and depth dimensions."));
+
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+
+      // Prepare common tensors for Conv2DBackpropInput and
+      // Conv2DBackpropFilter.
+      MklDnnData<T> input(&cpu_engine);
+      MklDnnData<T> filter(&cpu_engine);
+      MklDnnData<T> outbackprop(&cpu_engine);
+      MklDnnData<T> output(&cpu_engine);
+
+      // Input tensors
+      const int kInputIdx = 0, kFilterIdx = 1, kOutbpropIdx = 2;
+      const Tensor& input_tensor = MklGetInput(context, kInputIdx);
+      const Tensor& filter_tensor = MklGetInput(context, kFilterIdx);
+      const Tensor& outbprop_tensor = MklGetInput(context, kOutbpropIdx);
+
+      MklDnnShape input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape;
+      GetMklShape(context, kInputIdx, &input_mkl_shape);
+      GetMklShape(context, kFilterIdx, &filter_mkl_shape);
+      GetMklShape(context, kOutbpropIdx, &outbprop_mkl_shape);
+      // Allow operator-specific sanity checking of shapes.
+      ValidateMklShapes(input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape);
+
+      // Allow operator-specific generation of shapes.
+      // E.g., Conv2DBackpropFilter gets filter as filter_sizes. It is a
+      // tensor containing shape of filter. So filter.shape() is not
+      // a correct way to get filter shape. These operator-specific calls
+      // allow this class to handle this case.
+      TensorShape input_tf_shape = MakeInputTfShape(context, input_tensor);
+      TensorShape filter_tf_shape = MakeFilterTfShape(context, filter_tensor);
+      TensorShape outbprop_tf_shape = GetTfShape(context, kOutbpropIdx);
+
+      // By default, all dims are in MKL order. Only dims in TF order
+      // are those with prefix tf_order.
+      memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims;
+      memory::dims padding_l, padding_r, strides, fwd_output_dims;
+      memory::dims fwd_output_dims_tf_order;
+
+      // Get forward convolution parameters.
+      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
+      conv_utl.GetConvFwdSizesInMklOrder(input_tf_shape, filter_tf_shape,
+                                         &fwd_input_dims, &fwd_filter_dims,
+                                         &strides,
+                                         &fwd_output_dims_tf_order,
+                                         &fwd_output_dims,
+                                         &padding_l, &padding_r);
+      if (!context->status().ok()) return;
+
+      // Create Convolution forward descriptor since Convolution backward
+      // API needs it. For that, we first need to create input, filter
+      // and output memory descriptors.
+      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
+      // If input is in MKL layout, then simply grab input layout; otherwise,
+      // construct input TF layout. For TF layout, although input shape
+      // required is in MKL-DNN order, the layout is Tensorflow's layout
+      // (NHWC or NCHW depending on data format).
+      auto fwd_input_md = input_mkl_shape.IsMklTensor() ?
+                          input_mkl_shape.GetMklLayout() :
+                       memory::desc(fwd_input_dims, MklDnnType<T>(), tf_fmt);
+      // If filter is in MKL layout, then simply grab filter layout; otherwise
+      // construct filter in TF layout. For TF layout, filter is in HWIO format.
+      auto fwd_filter_md = filter_mkl_shape.IsMklTensor() ?
+                          filter_mkl_shape.GetMklLayout() :
+                          memory::desc(fwd_filter_dims, MklDnnType<T>(),
+                                       memory::format::hwio);
+      // Tensorflow Output of Conv2D is in data_format order.
+      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(), tf_fmt);
+      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, fwd_input_md, fwd_filter_md, fwd_out_md,
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
+
+      // Create memory for user data. Describe how the inputs and outputs of
+      // Convolution look like. Also specify buffers containing actual input
+      // and output data.
+
+      // Since this is a common class for both Conv2DBackpropFilter and
+      // Conv2DBackpropInput, we skip SetUsrMem call for input tensor (for
+      // Conv2DBackpropInput) and for filter tensor (for
+      // conv2DBackpropFilter) depending on which tensor is int32 type.
+      size_t input_with_sizes = GetInputTensorIndexWithSizes();
+      if (input_with_sizes != kInputIdx) {
+        // Shape of Conv2DBackpropFilter's input is same as Conv2D input.
+        input.SetUsrMem(fwd_input_md, &input_tensor);
+      } else if (input_with_sizes != kFilterIdx) {
+        // Shape of Conv2DBackpropInput's filter is same as Conv2D filter.
+        filter.SetUsrMem(fwd_filter_md, &filter_tensor);
+      }
+
+      conv_utl.GetInputSizeInMklOrder(outbprop_tf_shape, &outbprop_dims);
+      if (!context->status().ok()) return;
+      if (outbprop_mkl_shape.IsMklTensor()) {
+        // If outbackprop is in Mkl layout, then simply grab it.
+        auto outbprop_md = outbprop_mkl_shape.GetMklLayout();
+        outbackprop.SetUsrMem(outbprop_md, &outbprop_tensor);
+      } else {
+        // If outbackprop is in TensorFlow layout, then we need to create memory
+        // descriptor for it. Outbackprop shape is data format order.
+        outbackprop.SetUsrMem(outbprop_dims, tf_fmt, &outbprop_tensor);
+      }
+
+      // Operator specific call to get output shape and data_format.
+      auto bwd_output_dims = GetOutputDims(fwd_input_dims, fwd_filter_dims);
+      auto bwd_output_format = GetOutputFormat(tf_fmt);
+      output.SetUsrMem(bwd_output_dims, bwd_output_format);
+
+      // Create memory descriptors for convolution data w/ no specified format.
+      input.SetOpMemDesc(fwd_input_dims, memory::format::any);
+      filter.SetOpMemDesc(fwd_filter_dims, memory::format::any);
+      outbackprop.SetOpMemDesc(outbprop_dims, memory::format::any);
+      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
+
+      // Operator-specific call to create and execute primitive.
+      Tensor* output_tensor = nullptr;
+      CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter,
+                      &outbackprop, &output, &output_tensor,
+                      strides, padding_l, padding_r,
+                      TFPaddingToMklDnnPadding(padding_),
+                      bwd_output_dims, bwd_output_format);
+    } catch (mkldnn::error &e) {
+     string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
+                                            error_msg));
+    }
+  }
+
+  /// Pure virtual function to allow operator to check for validity of input
+  /// shapes. Function asserts that input shapes are valid.
+  virtual void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                                 const MklDnnShape& filter_mkl_shape,
+                                 const MklDnnShape& outbprop_mkl_shape) = 0;
+
+  /// Operator-specific function that returns index of input that is
+  /// representing input sizes. For Conv2DBackpropFilter it returns 1 since
+  /// filter for this operator is filter shape. For Conv2DBackpropInput it
+  /// returns 0 (for input).
+  virtual size_t GetInputTensorIndexWithSizes() = 0;
+
+  /// Get TensorFlow shape of input tensor.
+  virtual TensorShape MakeInputTfShape(OpKernelContext* context,
+                                      const Tensor& input_tensor) = 0;
+
+  /// Get TensorFlow shape of filter tensor.
+  virtual TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                       const Tensor& filter_tensor) = 0;
+
+  /// Get shape of output in MKL-DNN order. Computes shape of output from
+  /// input shape (fwd_input_dims) and filter shape (fwd_filter_dims).
+  virtual
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) = 0;
+
+  /// Get data_format of output in MKL-DNN order. If output data format is
+  /// same as input data format, then it simply returns value of data_format
+  /// parameter as it is.
+  virtual memory::format GetOutputFormat(const memory::format data_format) = 0;
+
+  /// Create and execute the primitive storing output in the output_tensor.
+  virtual void CreatePrimitive(OpKernelContext* context,
+    const engine& cpu_engine,
+    const convolution_forward::primitive_desc& conv_fwd_pd,
+    MklDnnData<T>* input, MklDnnData<T>* filter, MklDnnData<T>* outbackprop,
+    MklDnnData<T>* output, Tensor** output_tensor, const memory::dims& strides,
+    const memory::dims& padding_l, const memory::dims& padding_r,
+    padding_kind padding, const memory::dims& bwd_output_dims,
+    memory::format bwd_output_format) = 0;
+
+  // Get the data_format {NCHW, NHWC}
+  TensorFormat GetTFDataFormat () { return data_format_; }
+
+ private:
+  std::vector<int32> strides_;
+  Padding padding_;
+  TensorFormat data_format_;
+};
 #endif  // INTEL_MKL_DNN
 
+/////////////////////////////////////////////////////////////////////
+///  Dummy Mkl op that is just used for operators that are intermediate
+///  output of node fusion in the graph
+/////////////////////////////////////////////////////////////////////
+
+template <typename Device, typename T>
+class MklDummyOp : public OpKernel {
+ public:
+  ~MklDummyOp() {}
+
+  explicit MklDummyOp(OpKernelConstruction* context) :
+    OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    TF_CHECK_OK(errors::Unimplemented("This is a dummy op."
+                                      "It should not have been invoked."));
+  }
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
index bc9e906c39..a761562a4b 100644
--- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
@@ -25,10 +25,24 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+using mkldnn::use_scale_shift;
+using mkldnn::use_global_stats;
+using mkldnn::batch_normalization_forward;
+using mkldnn::batch_normalization_backward;
+#endif
+
 // TODO(inteltf) Address comments from PR 8968.
 
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
+
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklFusedBatchNormOp : public OpKernel {
  public:
@@ -46,7 +60,6 @@ class MklFusedBatchNormOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     MklFusedBatchNormOpContext mkl_context;
-
     const Tensor& input = MklGetInput(context, 0);
     const Tensor& scale = MklGetInput(context, 1);
     const Tensor& shift = MklGetInput(context, 2);
@@ -55,6 +68,7 @@ class MklFusedBatchNormOp : public OpKernel {
 
     GetMklShape(context, 0, &(mkl_context.mkl_shape_input_shape));
     bool input_in_mkl_format = mkl_context.mkl_shape_input_shape.IsMklTensor();
+
     if (!input_in_mkl_format) {
       OP_REQUIRES(context, input.dims() == 4,
                   errors::InvalidArgument("input must be 4-dimensional",
@@ -69,10 +83,12 @@ class MklFusedBatchNormOp : public OpKernel {
     OP_REQUIRES(context, est_mean.dims() == 1,
                 errors::InvalidArgument("estimated_mean must be 1-dimensional",
                                         est_mean.shape().DebugString()));
+
     OP_REQUIRES(
         context, est_variance.dims() == 1,
         errors::InvalidArgument("estimated_variance must be 1-dimensional",
                                 est_variance.shape().DebugString()));
+
     if (is_training_) {
       OP_REQUIRES(context, est_mean.dim_size(0) == 0,
                   errors::InvalidArgument("estimated_mean empty for training",
@@ -258,7 +274,6 @@ class MklFusedBatchNormOp : public OpKernel {
             E_SUCCESS);
       }
     }
-
     void MklPrepareContextInputs(OpKernelContext* context,
                                  Tensor* mkl_tmp_input_buf_tensor,
                                  Tensor* mkl_tmp_scale_shift_buf_tensor) {
@@ -325,15 +340,6 @@ class MklFusedBatchNormOp : public OpKernel {
   } MklFusedBatchNormOpContext;
 };
 
-#define REGISTER_MKL_CPU(T)                                         \
-  REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm")                \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklFusedBatchNormOp<CPUDevice, T>);
-TF_CALL_float(REGISTER_MKL_CPU);
-#undef REGISTER_MKL_CPU
-
 template <typename Device, typename T>
 class MklFusedBatchNormGradOp : public OpKernel {
  public:
@@ -595,7 +601,7 @@ class MklFusedBatchNormGradOp : public OpKernel {
       mkl_res_batchnorm_bwd[dnnResourceSrc] =
           (mkl_convert_input) ? mkl_buf_converted_input : mkl_buf_input;
 
-      bool mkl_convert_out_backprop;
+     bool mkl_convert_out_backprop;
       dnnPrimitive_t mkl_prim_convert_out_backprop = nullptr;
       dnnLayout_t mkl_lt_internal_out_backprop = nullptr;
       void* mkl_buf_converted_out_backprop = nullptr;
@@ -675,6 +681,628 @@ class MklFusedBatchNormGradOp : public OpKernel {
     }
   } MklFusedBatchNormGradOpContext;
 };
+#endif
+
+#ifdef INTEL_MKL_DNN
+
+template <typename Device, typename T>
+class MklFusedBatchNormOp : public OpKernel {
+ public:
+  explicit MklFusedBatchNormOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    float epsilon;
+    OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon));
+    epsilon_ = T(epsilon);
+    string tensor_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format));
+    OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("is_training", &is_training_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const size_t src_index = 0;    // index of src input tensor
+      const size_t scale_index = 1;  // index of scale tensor
+      const size_t shift_index = 2;  // index of shift tensor
+      const size_t mean_index = 3;   // index of est_mean tensor
+      const size_t var_index = 4;    // index of est_variance tensor
+
+      const Tensor& src_tensor          = MklGetInput(context, src_index);
+      const Tensor& scale_tensor        = MklGetInput(context, scale_index);
+      const Tensor& shift_tensor        = MklGetInput(context, shift_index);
+      const Tensor& est_mean_tensor     = MklGetInput(context, mean_index);
+      const Tensor& est_variance_tensor = MklGetInput(context, var_index);
+
+      MklDnnShape dnn_shape_src;
+      GetMklShape(context, src_index, &dnn_shape_src);
+
+      if (dnn_shape_src.IsMklTensor()) {
+        OP_REQUIRES(context, dnn_shape_src.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      } else {
+        OP_REQUIRES(context, src_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      }
+      OP_REQUIRES(context, scale_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "scale must be 1-dimensional",
+                      scale_tensor.shape().DebugString()));
+      OP_REQUIRES(context, shift_tensor.dims() == 1,
+                  errors::InvalidArgument("offset must be 1-dimensional",
+                                        shift_tensor.shape().DebugString()));
+      OP_REQUIRES(context, est_mean_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "estimated_mean must be 1-dimensional",
+                      est_mean_tensor.shape().DebugString()));
+      OP_REQUIRES(context, est_variance_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "estimated_variance must be 1-dimensional",
+                      est_variance_tensor.shape().DebugString()));
+
+      if (is_training_) {
+        OP_REQUIRES(context, est_mean_tensor.dim_size(0) == 0,
+                    errors::InvalidArgument(
+                        "estimated_mean must be empty for training",
+                        est_mean_tensor.shape().DebugString()));
+        OP_REQUIRES(context, est_variance_tensor.dim_size(0) == 0,
+                    errors::InvalidArgument(
+                        "estimated_variance must be empty for training",
+                        est_variance_tensor.shape().DebugString()));
+      }
+
+      if (dnn_shape_src.IsMklTensor())
+        depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C);
+      else
+        ExtractParams(context);
+
+      // Indices of output tensors
+      const size_t dst_index = 0;
+      const size_t batch_mean_index = 1;
+      const size_t batch_variance_index = 2;
+      const size_t saved_mean_index = 3;
+      const size_t saved_variance_index = 4;
+
+      // allocate batch mean output tensor
+      Tensor* batch_mean_tensor = nullptr;
+      MklDnnShape mkl_shape_batch_mean;
+      mkl_shape_batch_mean.SetMklTensor(false);
+      AllocateOutputSetMklShape(context,
+                                batch_mean_index,
+                                &batch_mean_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_batch_mean);
+      CHECK_NOTNULL(batch_mean_tensor);
+
+      // Batch variance
+      Tensor* batch_variance_tensor = nullptr;
+      MklDnnShape mkl_shape_batch_variance;
+      mkl_shape_batch_variance.SetMklTensor(false);
+      AllocateOutputSetMklShape(context,
+                                batch_variance_index,
+                                &batch_variance_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_batch_variance);
+      CHECK_NOTNULL(batch_variance_tensor);
+
+      if (is_training_)
+        SetMeanVariance(*batch_mean_tensor, *batch_variance_tensor);
+      else
+        SetMeanVariance(est_mean_tensor, est_variance_tensor);
+
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      memory::format format_m;
+      if (dnn_shape_src.IsMklTensor()) {
+        if (dnn_shape_src.IsTensorInNCHWFormat()) {
+          format_m = memory::format::nchw;
+        } else {
+          format_m = memory::format::nhwc;
+        }
+      } else {
+        format_m = TFDataFormatToMklDnnDataFormat(tensor_format_);
+      }
+
+      // set src primitive
+      memory::dims src_dims;
+      if (dnn_shape_src.IsMklTensor()) {
+        src_dims = TFShapeToMklDnnDimsInNCHW(dnn_shape_src.GetTfShape(),
+                                             tensor_format_);
+      } else {
+        src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(),
+                                             tensor_format_);
+      }
+
+      auto src_md = dnn_shape_src.IsMklTensor()
+                    ? dnn_shape_src.GetMklLayout()
+                    : memory::desc(src_dims, MklDnnType<T>(), format_m);
+      src.SetUsrMem(src_md, &src_tensor);
+
+      // set weights primitive
+      // MKL-DNN packs scale & shift as "weights":
+      // <scale>...<scale><shift>...<shift>
+      auto weights_desc = memory::desc({2, depth_},
+                                       MklDnnType<T>(),
+                                       memory::format::nc);
+      auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine);
+      auto weights_m = memory(weights_pd);
+      T* weights_data = reinterpret_cast<T*>(
+                        weights_m.get_data_handle());
+      T* scale_tf = reinterpret_cast<T*>(
+                    const_cast<T*>(scale_tensor.flat<T>().data()));
+      T* shift_tf = reinterpret_cast<T*>(
+                    const_cast<T*>(shift_tensor.flat<T>().data()));
+
+      for (int k=0; k < depth_; k++) {
+        weights_data[k] = scale_tf[k];
+        weights_data[k + depth_] = shift_tf[k];
+      }
+
+      // Mean and variance (without Bessel's correction) saved for backward
+      // computation to serve as pre-computed mean and variance.
+      Tensor* saved_mean_tensor = nullptr;
+      MklDnnShape mkl_shape_saved_mean;
+      mkl_shape_saved_mean.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, saved_mean_index,
+                                &saved_mean_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_saved_mean);
+      CHECK_NOTNULL(saved_mean_tensor);
+
+      Tensor* saved_variance_tensor = nullptr;
+      MklDnnShape mkl_shape_saved_variance;
+      mkl_shape_saved_variance.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, saved_variance_index,
+                                &saved_variance_tensor,
+                                scale_tensor.shape(),
+                                mkl_shape_saved_variance);
+      CHECK_NOTNULL(saved_variance_tensor);
+
+      // set mean primitive
+      auto mean_desc = memory::desc({1, depth_},
+                                    MklDnnType<T>(),
+                                    memory::format::nc);
+      auto mean_pd = memory::primitive_desc(mean_desc, cpu_engine);
+      char* saved_mean_data_tf = reinterpret_cast<char*>
+                                 (saved_mean_tensor->flat<T>().data());
+      std::memcpy(saved_mean_data_tf,
+                  reinterpret_cast<char*>(mean_values_),
+                  depth_*sizeof(T));
+      auto mean_m = memory(mean_pd,
+                           reinterpret_cast<void*>(saved_mean_data_tf));
+
+      // set variance primitive
+      auto variance_desc = memory::desc({1, depth_},
+                                    MklDnnType<T>(),
+                                    memory::format::nc);
+      auto variance_pd = memory::primitive_desc(variance_desc, cpu_engine);
+      char* saved_variance_data_tf = reinterpret_cast<char*>
+                  (saved_variance_tensor->flat<T>().data());
+      std::memcpy(saved_variance_data_tf,
+                  reinterpret_cast<char*>(variance_values_),
+                  depth_*sizeof(T));
+      auto variance_m = memory(variance_pd, saved_variance_data_tf);
+
+      prop_kind pk = (is_training_) ?
+                     prop_kind::forward_training :
+                     prop_kind::forward_scoring;
+      auto bnrm_fwd_desc = batch_normalization_forward::desc(
+                               pk, src.GetUsrMemDesc(), epsilon_,
+                               is_training_ ? use_scale_shift :
+                               (use_scale_shift | use_global_stats));
+      auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc(
+                             bnrm_fwd_desc, cpu_engine);
+
+      // allocate dst tensor
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      Tensor* dst_tensor = nullptr;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = bnrm_fwd_pd.dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(),
+                                  src_dims, format_m);
+        tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                                tf_shape_dst, dnn_shape_dst);
+
+      // Output of batchnorm has same shape as input.
+      dst.SetUsrMem(src_md, dst_tensor);
+
+      primitive bnrm_fwd_op;
+      if (is_training_) {
+        bnrm_fwd_op = batch_normalization_forward(
+                          bnrm_fwd_pd,
+                          src.GetOpMem(),
+                          weights_m,
+                          dst.GetOpMem(),
+                          mean_m,
+                          variance_m);
+      } else {
+        bnrm_fwd_op = batch_normalization_forward(
+                          bnrm_fwd_pd,
+                          src.GetOpMem(),
+                          mean_m,
+                          variance_m,
+                          (const primitive::at) weights_m,
+                          dst.GetOpMem());
+      }
+      std::vector<primitive> net;
+      net.push_back(bnrm_fwd_op);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // copy batch_mean data
+      T* batch_mean_data_tf = reinterpret_cast<T*>(
+                                batch_mean_tensor->flat<T>().data());
+      std::memcpy(reinterpret_cast<char*>(batch_mean_data_tf),
+                  reinterpret_cast<char*>(mean_m.get_data_handle()),
+                  depth_*sizeof(T));
+
+      // copy batch_variance data with Bessel's correction
+      // if training mode is on
+      float adjust_factor = 1.0;
+      if (is_training_) {
+        size_t orig_size = src_dims[0] * src_dims[2] * src_dims[3];
+        size_t adjust_size = orig_size - 1;
+        adjust_factor = (static_cast<float>(orig_size)) / adjust_size;
+      }
+      T* batch_variance_data_tf = reinterpret_cast<T*>(
+                                  batch_variance_tensor->flat<T>().data());
+      for (int k=0; k < depth_; k++)
+        batch_variance_data_tf[k] =
+            (reinterpret_cast<T*>(variance_m.get_data_handle()))[k]
+            * adjust_factor;
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + string(e.message) +
+                         ", in file " + string(__FILE__) + ":" +
+                         std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                     error_msg));
+    }
+  }
+
+ private:
+  T epsilon_;
+  TensorFormat tensor_format_;
+  bool is_training_;
+  T* mean_values_;
+  T* variance_values_;
+  size_t depth_;          // batch normalization is done for per channel.
+
+  void ExtractParams(OpKernelContext* context) {
+    const Tensor& input = MklGetInput(context, 0);
+    depth_ = static_cast<int>(GetTensorDim(input, tensor_format_, 'C'));
+  }
+
+  void SetMeanVariance(const Tensor& mean, const Tensor& variance) {
+    mean_values_ = reinterpret_cast<T*>(
+                       const_cast<T*>(mean.flat<T>().data()));
+    variance_values_ = reinterpret_cast<T*>(
+                       const_cast<T*>(variance.flat<T>().data()));
+  }
+};
+
+
+template <typename Device, typename T>
+class MklFusedBatchNormGradOp : public OpKernel {
+ public:
+  explicit MklFusedBatchNormGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    float epsilon;
+    OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon));
+    epsilon_ = T(epsilon);
+    string tensor_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format));
+    OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_),
+                errors::InvalidArgument("Invalid data format"));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+
+      const size_t diff_dst_index = 0;  // index of diff_dst tensor
+      const size_t src_index = 1;       // index of src input tensor
+      const size_t scale_index = 2;     // index of scale tensor
+      const size_t mean_index = 3;      // index of saved_mean tensor
+      const size_t variance_index = 4;  // index of saved_variance tensor
+      const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+      const Tensor& src_tensor = MklGetInput(context, src_index);
+      const Tensor& scale_tensor = MklGetInput(context, scale_index);
+      const Tensor& saved_mean_tensor = MklGetInput(context, mean_index);
+      const Tensor& saved_variance_tensor = MklGetInput(context,
+                                            variance_index);
+
+      MklDnnShape dnn_shape_src, dnn_shape_diff_dst;
+      GetMklShape(context, src_index, &dnn_shape_src);
+      GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+      if (dnn_shape_diff_dst.IsMklTensor()) {
+        OP_REQUIRES(context, dnn_shape_diff_dst.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        diff_dst_tensor.shape().DebugString()));
+      } else {
+        OP_REQUIRES(context, diff_dst_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        diff_dst_tensor.shape().DebugString()));
+      }
+
+      if (dnn_shape_src.IsMklTensor()) {
+        OP_REQUIRES(context, dnn_shape_src.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                         src_tensor.shape().DebugString()));
+      } else {
+        OP_REQUIRES(context, src_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      }
+
+      OP_REQUIRES(context, scale_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "scale must be 1-dimensional",
+                      scale_tensor.shape().DebugString()));
+      OP_REQUIRES(context, saved_mean_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "saved mean must be 1-dimensional",
+                       saved_mean_tensor.shape().DebugString()));
+
+      OP_REQUIRES(context, saved_variance_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "saved variance must be 1-dimensional",
+                      saved_variance_tensor.shape().DebugString()));
+
+      if (dnn_shape_src.IsMklTensor())
+        depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C);
+      else
+        ExtractParams(context);
+
+      memory::format format_m;
+      if (dnn_shape_src.IsMklTensor()) {
+        if (dnn_shape_src.IsTensorInNCHWFormat())
+          format_m = memory::format::nchw;
+        else
+          format_m = memory::format::nhwc;
+      } else {
+        format_m = TFDataFormatToMklDnnDataFormat(tensor_format_);
+      }
+
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> mean(&cpu_engine);
+      MklDnnData<T> variance(&cpu_engine);
+      MklDnnData<T> diff_dst(&cpu_engine);
+      MklDnnData<T> diff_src(&cpu_engine);
+
+      memory::dims src_dims, diff_dst_dims;
+      if (dnn_shape_src.IsMklTensor())
+        src_dims = TFShapeToMklDnnDimsInNCHW(
+                       dnn_shape_src.GetTfShape(), tensor_format_);
+      else
+        src_dims = TFShapeToMklDnnDimsInNCHW(
+                       src_tensor.shape(), tensor_format_);
+
+      if (dnn_shape_diff_dst.IsMklTensor())
+        diff_dst_dims = TFShapeToMklDnnDimsInNCHW(
+                            dnn_shape_diff_dst.GetTfShape(),
+                            tensor_format_);
+      else
+        diff_dst_dims = TFShapeToMklDnnDimsInNCHW(
+                            diff_dst_tensor.shape(),
+                            tensor_format_);
+
+      // set src and diff_dst primitives
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
+        if (dnn_shape_src.IsMklTensor()) {
+          src_md = dnn_shape_src.GetMklLayout();
+          diff_dst_md = src_md;
+        } else {
+          diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
+          src_md = diff_dst_md;
+        }
+      } else {
+        src_md =  memory::desc(src_dims, MklDnnType<T>(), format_m);
+        diff_dst_md = src_md;
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+      diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
+
+      // weights -- DNN packs scales/shifts as weights in order of
+      // scale, ..., scale, shift, ..., shift
+      auto weights_desc = memory::desc({2, depth_},
+                                       MklDnnType<T>(),
+                                       memory::format::nc);
+      auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine);
+      auto weights_m = memory(weights_pd);
+      T* weights_data = reinterpret_cast<T*>(weights_m.get_data_handle());
+      T* scale_tf = reinterpret_cast<T*>(const_cast<T*>
+                                        (scale_tensor.flat<T>().data()));
+      for (int k=0; k < depth_; k++) {
+        weights_data[k] = scale_tf[k];
+        weights_data[k + depth_] = 0;
+      }
+
+      // set mean primitive
+      memory::dims mv_dims = GetMeanVarianceDims();
+      mean.SetUsrMem(mv_dims,
+                     memory::format::nc,
+                     const_cast<void*>(static_cast<const void*>
+                     (saved_mean_tensor.flat<T>().data())));
+      mean.SetOpMemDesc(mv_dims, memory::format::nc);
+
+      // set variance primitive
+      variance.SetUsrMem(mv_dims,  memory::format::nc,
+                         const_cast<void*>(static_cast<const void*>
+                         (saved_variance_tensor.flat<T>().data())));
+      variance.SetOpMemDesc(mv_dims, memory::format::nc);
+
+      // set diff_weight primitive
+      auto diff_weights_desc = memory::desc(
+                                 {2, depth_},
+                                 MklDnnType<T>(),
+                                 memory::format::nc);
+      auto diff_weights_pd = memory::primitive_desc(
+                                diff_weights_desc,
+                                cpu_engine);
+      auto diff_weights_m = memory(diff_weights_pd);
+
+      auto bnrm_fwd_desc = batch_normalization_forward::desc(
+                                prop_kind::forward_training,
+                                src.GetUsrMemDesc(),
+                                epsilon_,
+                                use_scale_shift);
+      auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc(
+                                bnrm_fwd_desc,
+                                cpu_engine);
+
+      // Indices of output tensors
+      const size_t diff_src_index = 0;    // index of diff_src tensor
+      const size_t diff_scale_index = 1;  // index of diff_scale tensor
+      const size_t diff_shift_index = 2;  // index of diff_shift tensor
+      const size_t p1_index = 3;  // index of 1st placeholder tensor
+      const size_t p2_index = 4;  // index of 2nd placeholder tensor
+
+      // allocate diff_src tensor
+      MklDnnShape dnn_shape_diff_src;
+      TensorShape tf_shape_diff_src;
+      Tensor* diff_src_tensor = nullptr;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_diff_src.SetMklTensor(true);
+        auto diff_src_pd = bnrm_fwd_pd.dst_primitive_desc();
+        dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
+        dnn_shape_diff_src.SetElemType(MklDnnType<T>());
+        dnn_shape_diff_src.SetTfLayout(
+                              dnn_shape_src.GetDimension(),
+                              src_dims,
+                              format_m);
+        dnn_shape_diff_src.SetTfDimOrder(
+                              dnn_shape_src.GetDimension(),
+                              tensor_format_);
+        tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_diff_src.SetMklTensor(false);
+        tf_shape_diff_src = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                                tf_shape_diff_src, dnn_shape_diff_src);
+
+      diff_src.SetUsrMem(src_md, diff_src_tensor);
+
+      prop_kind pk = prop_kind::backward;
+      auto bnrm_bwd_desc = batch_normalization_backward::desc(
+                               pk,
+                               diff_src.GetUsrMemDesc(),
+                               src.GetUsrMemDesc(),
+                               epsilon_,
+                               use_scale_shift);
+      auto bnrm_bwd_pd = batch_normalization_backward::primitive_desc(
+                               bnrm_bwd_desc,
+                               cpu_engine,
+                               bnrm_fwd_pd);
+
+      auto bnrm_bwd_op = batch_normalization_backward(
+                               bnrm_bwd_pd,
+                               src.GetOpMem(),
+                               mean.GetOpMem(),
+                               variance.GetOpMem(),
+                               diff_dst.GetOpMem(),
+                               weights_m,
+                               diff_src.GetOpMem(),
+                               diff_weights_m);
+
+      std::vector<primitive> net;
+      net.push_back(bnrm_bwd_op);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // separate out scale and shift grad and copy to individual tensors
+      const TensorShape& tf_shape_scale_shift = scale_tensor.shape();
+      Tensor* diff_scale_tensor = nullptr;
+      MklDnnShape mkl_shape_diff_scale;
+      mkl_shape_diff_scale.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, diff_scale_index, &diff_scale_tensor,
+                                tf_shape_scale_shift, mkl_shape_diff_scale);
+
+      Tensor* diff_shift_tensor = nullptr;
+      MklDnnShape mkl_shape_diff_shift;
+      mkl_shape_diff_shift.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, diff_shift_index, &diff_shift_tensor,
+                                tf_shape_scale_shift, mkl_shape_diff_shift);
+
+      // copy data: diff_scale and diff_shift
+      T* diff_weights_data_dnn = reinterpret_cast<T*>
+                                 (diff_weights_m.get_data_handle());
+      float* diff_scale_data_tf = const_cast<float*>(
+             static_cast<const float*>(diff_scale_tensor->flat<T>().data()));
+      float* diff_shift_data_tf = const_cast<float*>(
+             static_cast<const float*>(diff_shift_tensor->flat<T>().data()));
+      for (int i = 0; i < depth_; i++) {
+        diff_scale_data_tf[i] = diff_weights_data_dnn[i];
+        diff_shift_data_tf[i] = diff_weights_data_dnn[i + depth_];
+      }
+
+      // Placeholders for estimated_mean and estimated_variance, which are
+      // used for inference and thus not needed here for gradient computation.
+      Tensor* p1_tensor = nullptr, *p2_tensor = nullptr;
+      MklDnnShape mkl_shape_p;
+      mkl_shape_p.SetMklTensor(false);
+      AllocateOutputSetMklShape(context, p1_index, &p1_tensor,
+                                TensorShape({}), mkl_shape_p);
+      AllocateOutputSetMklShape(context, p2_index, &p2_tensor,
+                                TensorShape({}), mkl_shape_p);
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                          ", message: " + string(e.message) +
+                          ", in file " + string(__FILE__) + ":" +
+                          std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                     error_msg));
+    }
+  }
+
+ private:
+  T epsilon_;
+  TensorFormat tensor_format_;
+  int depth_;             // batch normalization is done for per channel.
+
+  void ExtractParams(OpKernelContext* context) {
+      const Tensor& input = MklGetInput(context, 0);
+      depth_ = static_cast<int>(GetTensorDim(input, tensor_format_, 'C'));
+  }
+
+  memory::dims GetMeanVarianceDims() {
+    return memory::dims({1, depth_});
+  }
+};
+
+#endif
+
+#define REGISTER_MKL_CPU(T)                                         \
+  REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm")                \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklFusedBatchNormOp<CPUDevice, T>);
+TF_CALL_float(REGISTER_MKL_CPU);
+#undef REGISTER_MKL_CPU
 
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNormGrad")            \
diff --git a/tensorflow/core/kernels/mkl_identity_op.cc b/tensorflow/core/kernels/mkl_identity_op.cc
index f31e7afd46..9ee27ee21c 100644
--- a/tensorflow/core/kernels/mkl_identity_op.cc
+++ b/tensorflow/core/kernels/mkl_identity_op.cc
@@ -28,8 +28,15 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
+
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklIdentityOp : public OpKernel {
  public:
@@ -50,6 +57,32 @@ class MklIdentityOp : public OpKernel {
   bool IsExpensive() override { return false; }
 };
 
+#else
+
+template <typename Device, typename T>
+class MklIdentityOp : public OpKernel {
+ public:
+  explicit MklIdentityOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    MklDnnShape dnn_shape_input;
+    const int kInputIdx = 0, kOutputIdx = 0;
+    GetMklShape(context, kInputIdx, &dnn_shape_input);
+
+    if (dnn_shape_input.IsMklTensor()) {
+      ForwardMklTensorInToOut(context, kInputIdx, kOutputIdx);
+    } else {
+      ForwardTfTensorInToOut(context, kInputIdx, kOutputIdx);
+    }
+  }
+
+  // TensorFlow's IdentityOp has the following member function, so kept it
+  // as it is.
+  bool IsExpensive() override { return false; }
+};
+
+#endif
+
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklIdentity")                      \
                               .Device(DEVICE_CPU)                   \
diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc
index b58e44e398..001834b13b 100644
--- a/tensorflow/core/kernels/mkl_input_conversion_op.cc
+++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc
@@ -31,6 +31,12 @@ limitations under the License.
 #include "tensorflow/core/kernels/mkl_tfconv_op.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
@@ -44,15 +50,16 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 // else if both inputs are in mkl format:
 //   if both have the same shape:
 //     pass the inputs through to the output
-// 	else:
-// 		convert both to TF
+//   else:
+//     convert both to TF
 // else if one is TF and one is MKL:
-// 	if broadcast is needed:
-// 		convert the MKL format input to TF format
-// 	else:
-// 		convert the TF format input to MKL format
+//   if broadcast is needed:
+//     convert the MKL format input to TF format
+//   else:
+//     convert the TF format input to MKL format
 ///////////////////////////////////////////////////////////
 
+#ifndef INTEL_MKL_DNN
 template <typename Device, typename T>
 class MklInputConversionOp : public OpKernel {
  public:
@@ -242,6 +249,199 @@ class MklInputConversionOp : public OpKernel {
   bool has_avx512f_ = false;
 };
 
+#else
+
+template <typename Device, typename T>
+class MklInputConversionOp : public OpKernel {
+ public:
+  explicit MklInputConversionOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type));
+    has_avx512f_ = port::TestCPUFeature(port::CPUFeature::AVX512F);
+  }
+
+ private:
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor_0 = MklGetInput(context, 0);
+    MklDnnShape input_shape_0;
+    GetMklShape(context, 0, &input_shape_0);
+
+    const Tensor& input_tensor_1 = MklGetInput(context, 1);
+    MklDnnShape input_shape_1;
+    GetMklShape(context, 1, &input_shape_1);
+
+    bool tf_shapes_are_same = context->input(0).shape() ==
+                              context->input(1).shape();
+
+    VLOG(1) << "MklInputConversionOp: Input shapes are "
+            << (tf_shapes_are_same ? "*same*" : "*different*") << ": "
+            << context->input(0).shape().DebugString() << " and "
+            << context->input(1).shape().DebugString();
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // if both inputs are in TF format, just copy input tensors to output.
+    if (!input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
+      VLOG(1) << "MklInputConversionOp: No conversion needed, "
+              << "copying TF inputs to output";
+
+      ForwardTfTensorInToOut(context, 0, 0);
+      ForwardTfTensorInToOut(context, 1, 1);
+      return;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // If both inputs are in MKL format
+    if (input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
+      // If both have the same shape, pass them through
+      if (tf_shapes_are_same) {
+        VLOG(1) << "MklInputConversionOp: No conversion needed, "
+                << "copying MKL inputs with identical shapes to output";
+
+        ForwardMklTensorInToOut(context, 0, 0);
+        ForwardMklTensorInToOut(context, 1, 1);
+        return;
+      }
+
+      // Sanity check
+      bool mkl_shapes_are_same = input_shape_0 == input_shape_1;
+      if (mkl_shapes_are_same) {
+        CHECK(false) << "MklInputConversionOp: Unexpected: TF shapes are "
+                        "different but MKL shapes are same";
+      }
+
+      // Both have different shapes, so broadcast will be necessary.
+      // Convert to TF and pass both tensors through (we can't do broadcast
+      // with MKL tensors)
+      VLOG(1) << "MklInputConversionOp: Broadcast needed, "
+              << "converted MKL inputs to TF format";
+
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_, 0);
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_, 1);
+      SetDummyMklShapeOutput(context, 0);
+      SetDummyMklShapeOutput(context, 1);
+      return;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // One input is MKL and one is TF. If no broadcast is needed, convert
+    // the TF tensor to MKL, otherwise convert the MKL tensor to TF format
+    VLOG(1) << "MklInputConversionOp: Inputs in different formats (MKL/TF)";
+
+    const Tensor* mkl_tensor;
+    const MklDnnShape* mkl_shape;
+    const Tensor* tf_tensor;
+    MklDnnShape* tf_mkl_shape;
+    uint mkl_tensor_index;
+    uint tf_tensor_index;
+    if (input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
+      mkl_tensor = &input_tensor_0;
+      mkl_shape = &input_shape_0;
+      mkl_tensor_index = 0;
+      tf_tensor = &input_tensor_1;
+      tf_mkl_shape = &input_shape_1;
+      tf_tensor_index = 1;
+    } else if (!input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
+      mkl_tensor = &input_tensor_1;
+      mkl_shape = &input_shape_1;
+      mkl_tensor_index = 1;
+      tf_tensor = &input_tensor_0;
+      tf_mkl_shape = &input_shape_0;
+      tf_tensor_index = 0;
+    } else {
+      CHECK(false) << "MklInputConversionOp: Unexpected combination of input "
+                      "shapes for MKL "
+                   << "element-wise op";
+    }
+
+    // Broadcast is needed if the shapes are not the same
+    bool broadcast_needed;
+
+    size_t in0_size = 1;
+    for (size_t i = 0; i < mkl_shape->GetDimension(); ++i)
+      in0_size *= mkl_shape->TfDimSize(i);
+
+    size_t in1_size = 1;
+    for (size_t i = 0; i < tf_tensor->shape().dims(); ++i)
+      in1_size *= tf_tensor->shape().dim_size(i);
+
+    broadcast_needed = (in0_size != in1_size);
+
+    if (!broadcast_needed) {
+      // Both shapes are same, convert the TF input to MKL
+      VLOG(1) << "MklInputConversionOp: No broadcast needed.";
+      VLOG(1) << "MklInputConversionOp: Converting input " << tf_tensor_index
+              << " to MKL format";
+
+      // Create MklDnnShape for output Mkl tensor.
+      Tensor* tensor_out;
+      MklDnnShape mkl_output_mkl_shape;
+      mkl_output_mkl_shape.SetMklTensor(true);
+      mkl_output_mkl_shape.SetElemType(MklDnnType<T>());
+      mkl_output_mkl_shape.SetTfLayout(mkl_shape->GetDimension(),
+                                       mkl_shape->GetSizesAsMklDnnDims(),
+                                       mkl_shape->GetTfDataFormat());
+      // ** Temporarily borrow the layout from the MKL input **
+      auto output_mkl_md = mkl_shape->GetMklLayout();
+      mkl_output_mkl_shape.SetMklLayout(&output_mkl_md);
+
+      // Create output Mkl tensor
+      AllocateOutputSetMklShape(context, tf_tensor_index, &tensor_out,
+                                mkl_tensor->shape(), mkl_output_mkl_shape);
+
+      // Create MklDnnData object for input tensor. Input tensor is in
+      // Tensorflow layout.
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> tf_input(&cpu_engine);
+      auto input_tf_md = mkl_output_mkl_shape.GetTfLayout();
+      tf_input.SetUsrMem(input_tf_md, &tf_tensor);
+
+      // Create reorder between tensorflow layout and Mkl layout.
+      std::vector<primitive> net;
+      CHECK_EQ(tf_input.CheckReorderToOpMem(memory::primitive_desc(
+                                            output_mkl_md, cpu_engine),
+                                            tensor_out, &net),
+               true);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // -- The tensor in MKL format passes through --
+      ForwardMklTensorInToOut(context, mkl_tensor_index, mkl_tensor_index);
+    } else {
+      // Broadcast is needed, so convert the MKL input to TF
+      VLOG(1) << "MklInputConversionOp: Broadcast needed.";
+      VLOG(1) << "MklInputConversionOp: Converting input " << mkl_tensor_index
+              << " to TF format";
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_,
+                                           mkl_tensor_index);
+      SetDummyMklShapeOutput(context, mkl_tensor_index);
+
+      // The tensor in TF format passes through
+      ForwardTfTensorInToOut(context, tf_tensor_index, tf_tensor_index);
+    }
+
+    VLOG(1) << "MklInputConversionOp: Shapes (output): "
+            << context->mutable_output(0)->shape().DebugString() << " and "
+            << context->mutable_output(1)->shape().DebugString();
+
+    VLOG(1) << "MklInputConversion completed successfully.";
+  }
+
+ private:
+  /// Data format of the operation
+  string data_format_str;
+
+  /// Data type of the operation
+  DataType op_data_type;
+
+  /// CPUIDInfo
+  bool has_avx512f_ = false;
+};
+
+#endif
+
 ///////////////////////////////////////////////////////////
 //               Register kernel
 ///////////////////////////////////////////////////////////
@@ -253,7 +453,10 @@ class MklInputConversionOp : public OpKernel {
                               .Label(mkl_op_registry::kMklOpLabel), \
                           MklInputConversionOp<CPUDevice, T>);
 
-TF_CALL_NUMBER_TYPES(REGISTER_CPU);
+// TODO(nhasabni): We cannot support all number types since MklDnn does
+// not support types.
+// TF_CALL_NUMBER_TYPES(REGISTER_CPU);
+TF_CALL_float(REGISTER_CPU);
 #undef REGISTER_CPU
 }  // namespace tensorflow
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc
index aa08e93924..227765e46d 100644
--- a/tensorflow/core/kernels/mkl_lrn_op.cc
+++ b/tensorflow/core/kernels/mkl_lrn_op.cc
@@ -17,7 +17,7 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc. This opkernel uses MKL library, create MKL
 // layout and primitives, use MKL dnn primitives to compute local
 // response normalization
-
+#undef INTEL_MKL
 #ifdef INTEL_MKL
 
 #define EIGEN_USE_THREADS
diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc
index 846bb5710d..de4d7d2e72 100644
--- a/tensorflow/core/kernels/mkl_maxpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc
@@ -16,17 +16,32 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc.
 #ifdef INTEL_MKL
 #define EIGEN_USE_THREADS
-
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 
+#ifdef INTEL_MKL_DNN
+#include <algorithm>
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::error;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::padding_kind;
+using mkldnn::engine;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// For now, MKL-ML is default. So making MKL-DNN not a default choice.
+#ifndef INTEL_MKL_DNN
+
 // An implementation of MaxPooling (forward).
 template <typename Device, typename T>
 class MklMaxPoolingOp : public OpKernel {
@@ -475,8 +490,348 @@ class MklMaxPoolingGradOp : public OpKernel {
   TensorFormat data_format_;
 
   bool workspace_enabled_;
+};  // MklMaxPoolingGradOp
+
+#else  // INTEL_MKL_DNN is defined
+
+// An implementation of MaxPooling (forward).
+template <typename Device, typename T>
+class MklMaxPoolingOp : public MklPoolingForwardOpBase<T> {
+ public:
+  explicit MklMaxPoolingOp(OpKernelConstruction* context)
+            : MklPoolingForwardOpBase<T>(context) {
+    // In Max Pooling, MKLDNN does not allow passing workspace as NULL.
+    // So we set workspace_enabled_ to true.
+    this->workspace_enabled_ = true;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const Tensor& input_tensor = MklGetInput(context,
+                this->kInputTensorIndexInput);
+      MklDnnShape dnn_shape_input;
+      GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input);
+      this->SanityCheckInput(context, input_tensor, dnn_shape_input);
+      if (!context->status().ok()) return;
+
+      MklDnnData<T> dnn_data_input(&cpu_engine);
+      MklDnnData<T> dnn_data_output(&cpu_engine);
+      MklDnnData<T> dnn_data_wksp(&cpu_engine);
+
+      // initialize variables for the pooling op
+      MklPoolParameters pool_params;
+      // Get the input tensor and initialize the pooling parameters
+      this->ConfigureInput(context, dnn_shape_input,
+                        input_tensor, &pool_params,
+                        &dnn_data_input);
+      OP_REQUIRES_OK(context, context->status());
+
+      // Declare output tensor
+      Tensor* output_tensor = nullptr;
+      memory::dims output_dims_mkl_order;
+      this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+      // If input is in Mkl layout, then just get the memory format from it
+      // directly, instead of using input data_format to MaxPool.
+      if (dnn_shape_input.IsMklTensor()) {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                                  static_cast<memory::format>(
+              dnn_data_input.GetUsrMemDesc().data.format));
+      } else {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                                  this->data_format_mkldnn_);
+      }
+
+      // describe the memory layout; let mkl-dnn choose the best for the op
+      dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
+
+      auto pool_desc = pooling_forward::desc(prop_kind::forward,
+            algorithm::pooling_max,
+            dnn_data_input.GetUsrMemDesc(),
+            dnn_data_output.GetUsrMemDesc(),
+            memory::dims({  pool_params.row_stride,
+                            pool_params.col_stride}),
+            memory::dims({  pool_params.window_rows,
+                            pool_params.window_cols}),
+            memory::dims({  static_cast<int>(pool_params.pad_top),
+                            static_cast<int>(pool_params.pad_left)}),
+            memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                            static_cast<int>(pool_params.pad_right)}),
+            TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_fwd_desc = pooling_forward::primitive_desc(pool_desc,
+            cpu_engine);
+
+      this->AllocateOutputTensor(context, pool_fwd_desc, output_dims_mkl_order,
+                            this->data_format_mkldnn_, &output_tensor);
+      OP_REQUIRES_OK(context, context->status());
+      dnn_data_output.SetUsrMemDataHandle(output_tensor);
+
+      AllocateWorkspaceTensor(context, pool_fwd_desc, &dnn_data_wksp);
+      OP_REQUIRES_OK(context, context->status());
+
+      this->PrepareAndExecuteNet(pool_fwd_desc, &dnn_data_input,
+                        &dnn_data_output, &dnn_data_wksp);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Compute received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+
+ private:
+    const int kOutputTensorIndexWorkspace = 1;
+
+    void AllocateWorkspaceTensor(OpKernelContext* context,
+                const pooling_forward::primitive_desc& pool_fwd_prim_desc,
+                MklDnnData<T>* dnn_data_wksp) {
+        CHECK_NOTNULL(dnn_data_wksp);
+        Tensor* workspace_tensor = nullptr;
+        memory::primitive_desc workspace_pd
+                    = pool_fwd_prim_desc.workspace_primitive_desc();
+        size_t workspace_t_elems = this->GetNumTElements(workspace_pd);
+        MklDnnShape workspace_mkl_shape;
+        workspace_mkl_shape.SetMklTensor(false);
+        TensorShape workspace_tf_shape;
+        workspace_tf_shape.AddDim(workspace_t_elems);
+        AllocateOutputSetMklShape(context, kOutputTensorIndexWorkspace,
+                                &workspace_tensor,
+                                workspace_tf_shape, workspace_mkl_shape);
+        CHECK_NOTNULL(workspace_tensor);
+        dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor);
+    }
 };
 
+// The operation to compute MaxPool gradients.
+// It takes three inputs:
+//   - The original input tensor
+//   - The original output tensor
+//   - Backprop tensor for output
+// It produces one output: backprop tensor for input.
+template <class Device, class T>
+class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase<T> {
+ public:
+  explicit MklMaxPoolingGradOp(OpKernelConstruction* context)
+      : MklPoolingBackwardOpBase<T>(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+        auto cpu_engine = engine(engine::cpu, 0);
+        const Tensor& orig_input_tensor = MklGetInput(context,
+            kInputTensorIndexOrigInput);
+        const Tensor& orig_output_tensor = MklGetInput(context,
+            kInputTensorIndexOrigOutput);
+        const Tensor& grad_tensor = MklGetInput(context,
+            kInputTensorIndexGradient);
+        const Tensor& workspace_tensor = MklGetInput(context,
+            kInputTensorIndexWorkspace);
+        MklDnnShape orig_input_mkl_shape,
+                    orig_output_mkl_shape,
+                    grad_mkl_shape,
+                    workspace_mkl_shape;
+        GetMklShape(context, kInputTensorIndexOrigInput,
+            &orig_input_mkl_shape);
+        GetMklShape(context, kInputTensorIndexOrigOutput,
+            &orig_output_mkl_shape);
+        GetMklShape(context, kInputTensorIndexGradient,
+            &grad_mkl_shape);
+        GetMklShape(context, kInputTensorIndexWorkspace,
+            &workspace_mkl_shape);
+
+        SanityCheckInputs(context,
+                            orig_input_tensor, orig_output_tensor,
+                            grad_tensor, workspace_tensor,
+                            orig_input_mkl_shape, orig_output_mkl_shape,
+                            grad_mkl_shape, workspace_mkl_shape);
+        if (!context->status().ok()) return;
+
+        MklDnnData<T> grad_dnn_data(&cpu_engine);
+        MklDnnData<T> workspace_dnn_data(&cpu_engine);
+        MklDnnData<T> output_dnn_data(&cpu_engine);
+        Tensor* output_tensor = nullptr;
+        MklPoolParameters pool_params;
+        TensorShape orig_input_shape;
+        memory::dims output_dims_mkl_order, orig_input_dims_mkl_order;
+        memory::desc original_input_md = ConfigureOriginalInput(context,
+                                orig_input_tensor,
+                                orig_input_mkl_shape,
+                                &orig_input_dims_mkl_order,
+                                &pool_params,
+                                &orig_input_shape);
+
+        memory::desc original_output_md = this->ConfigureOriginalOutput(
+                                pool_params,
+                                orig_output_mkl_shape,
+                                output_dims_mkl_order);
+
+        memory::desc target_diff_dst_md =  this->ConfigureInputGradient(
+                                        grad_mkl_shape,
+                                        grad_tensor,
+                                        &grad_dnn_data,
+                                        original_output_md);
+
+        output_dnn_data.SetUsrMem(original_input_md);
+
+        // Create the forward pooling primitive descriptor so we can
+        // pass it as a hint to the backward pooling primitive descriptor
+        auto pool_fwd_desc = pooling_forward::desc(prop_kind::forward,
+                algorithm::pooling_max,
+                original_input_md,
+                original_output_md,
+                memory::dims({  pool_params.row_stride,
+                                pool_params.col_stride}),
+                memory::dims({  pool_params.window_rows,
+                                pool_params.window_cols}),
+                memory::dims({  static_cast<int>(pool_params.pad_top),
+                                static_cast<int>(pool_params.pad_left)}),
+                memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                                static_cast<int>(pool_params.pad_right)}),
+                TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_fwd_prim_desc
+                = pooling_forward::primitive_desc(pool_fwd_desc,
+                                                    cpu_engine);
+
+        auto pool_bkwd_desc = pooling_backward::desc(
+                algorithm::pooling_max,
+                output_dnn_data.GetUsrMemDesc(),
+                target_diff_dst_md,
+                memory::dims({  pool_params.row_stride,
+                                pool_params.col_stride}),
+                memory::dims({  pool_params.window_rows,
+                                pool_params.window_cols}),
+                memory::dims({  static_cast<int>(pool_params.pad_top),
+                                static_cast<int>(pool_params.pad_left)}),
+                memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                                static_cast<int>(pool_params.pad_right)}),
+                TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_bkwd_prim_desc
+            = pooling_backward::primitive_desc(pool_bkwd_desc,
+                                                cpu_engine,
+                                                pool_fwd_prim_desc);
+
+        this->AllocateOutputTensor(context, pool_bkwd_prim_desc,
+            orig_input_dims_mkl_order,
+            this->data_format_mkldnn_,
+            &output_tensor);
+        output_dnn_data.SetUsrMemDataHandle(output_tensor);
+
+        ConfigureWorkspace(workspace_tensor,
+                pool_fwd_prim_desc.workspace_primitive_desc(),
+                &workspace_dnn_data);
+        this->PrepareAndExecuteNet(pool_bkwd_prim_desc,
+                            &grad_dnn_data,
+                            &output_dnn_data,
+                            memory::primitive_desc(
+                                target_diff_dst_md,
+                                cpu_engine),
+                            &workspace_dnn_data);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Compute received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+
+ private:
+    // .Input("orig_input: T")
+    // .Input("orig_output: T")
+    // .Input("grad: T")
+    // .Input("workspace: T")
+    const int kInputTensorIndexOrigInput = 0;
+    const int kInputTensorIndexOrigOutput = 1;
+    const int kInputTensorIndexGradient = 2;
+    const int kInputTensorIndexWorkspace = 3;
+    //  Output("output: T") in Base Class
+
+    memory::desc ConfigureOriginalInput(OpKernelContext* context,
+                                const Tensor& tensor_original_input,
+                                const MklDnnShape& original_input_mkl_shape,
+                                memory::dims* original_input_dims_mkl_order,
+                                MklPoolParameters* pool_params,
+                                TensorShape* input_tensor_shape) {
+        *input_tensor_shape = tensor_original_input.shape();
+        return MklPoolingBackwardOpBase<T>::ConfigureOriginalInput(
+                                        context,
+                                        tensor_original_input,
+                                        original_input_mkl_shape,
+                                        original_input_dims_mkl_order,
+                                        pool_params,
+                                        *input_tensor_shape);
+    }
+
+    void ConfigureWorkspace(const Tensor& workspace_tensor,
+                        memory::primitive_desc workspace_pd,
+                        MklDnnData<T> *workspace_dnn_data) {
+        CHECK_NOTNULL(workspace_dnn_data);
+
+        workspace_dnn_data->SetUsrMem(workspace_pd, &workspace_tensor);
+    }
+
+    void SanityCheckInputs(OpKernelContext* context,
+                            const Tensor& orig_input_tensor,
+                            const Tensor& orig_output_tensor,
+                            const Tensor& grad_tensor,
+                            const Tensor& workspace_tensor,
+                            const MklDnnShape& orig_input_mkl_shape,
+                            const MklDnnShape& orig_output_mkl_shape,
+                            const MklDnnShape& grad_mkl_shape,
+                            const MklDnnShape& workspace_mkl_shape) {
+        if (!orig_input_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, orig_input_tensor.dims() == 4,
+                errors::InvalidArgument("Original input shape must be "
+                "4-dimensional"));
+        } else {
+            OP_REQUIRES(context, orig_input_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Original input shape must be "
+                    "4-dimensional"));
+        }
+        if (!orig_output_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, orig_output_tensor.dims() == 4,
+                errors::InvalidArgument("Original output must be "
+                        "4-dimensional"));
+        } else {
+            OP_REQUIRES(context, orig_output_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Original output must be "
+                    "4-dimensional"));
+        }
+        if (!grad_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, grad_tensor.dims() == 4,
+                errors::InvalidArgument("Gradient must be 4-dimensional"));
+        } else {
+            OP_REQUIRES(context, grad_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Gradient must be "
+                    "4-dimensional"));
+        }
+        if (this->workspace_enabled_){
+            // The workspace should not be an MKL tensor
+            OP_REQUIRES(context, workspace_mkl_shape.IsMklTensor() == false,
+                    errors::InvalidArgument("Workspace tensor should not"
+                                            " be an MKL Tensor."));
+            // It should only have one dimension
+            OP_REQUIRES(context, workspace_tensor.dims() == 1,
+                    errors::InvalidArgument("Workspace tensor must be "
+                                "1-dimensional"));
+        } else {
+            OP_REQUIRES(context, this->workspace_enabled_,
+                    errors::Unimplemented("MKL-DNN Max Pooling does not "
+                                "yet support the use case "
+                                "where MaxPoolGrad is called without first"
+                                " calling MaxPool."));
+        }
+    }
+};  // MklMaxPoolingGradOp
+
+#endif  // INTEL_MKL_DNN
+
 REGISTER_KERNEL_BUILDER(Name("_MklMaxPool")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<float>("T")
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
index 65e8852cfb..f7cadffd39 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
@@ -14,10 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 #ifdef INTEL_MKL
+
 #include <vector>
+#include <limits>
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/kernels/bounds_check.h"
 
 namespace tensorflow {
 
@@ -39,6 +42,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
   Init(context, ksize, stride, padding, data_format);
 }
 
+#ifndef INTEL_MKL_DNN
 // Initialization for MKL format
 void MklPoolParameters::Init(OpKernelContext* context,
                              const std::vector<int32>& ksize,
@@ -53,7 +57,22 @@ void MklPoolParameters::Init(OpKernelContext* context,
 
   Init(context, ksize, stride, padding, data_format);
 }
+#else
+// Initialization for MKL format
+void MklPoolParameters::Init(OpKernelContext* context,
+                             const std::vector<int32>& ksize,
+                             const std::vector<int32>& stride, Padding padding,
+                             TensorFormat data_format,
+                             const MklDnnShape* mklInputShape) {
+  // Get the input sizes
+  depth = mklInputShape->GetDimension('C');
+  tensor_in_cols = mklInputShape->GetDimension('W');
+  tensor_in_rows = mklInputShape->GetDimension('H');
+  tensor_in_batch = mklInputShape->GetDimension('N');
 
+  Init(context, ksize, stride, padding, data_format);
+}
+#endif  // INTEL_MKL_DNN
 // Common Initialization for TensorFlow and MKL formats
 void MklPoolParameters::Init(OpKernelContext* context,
                              const std::vector<int32>& ksize,
@@ -80,7 +99,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
                   "MaxPooling supports exactly one of pooling across depth "
                   "or pooling across width/height."));
 
-  if (depth_window == 1) {
+  if (depth_window == 1) {  // we are pooling in the H and W
     OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
                                 tensor_in_rows, window_rows, row_stride,
                                 padding, &out_height, &pad_top, &pad_bottom));
@@ -88,7 +107,21 @@ void MklPoolParameters::Init(OpKernelContext* context,
     OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
                                 tensor_in_cols, window_cols, col_stride,
                                 padding, &out_width, &pad_left, &pad_right));
-  } else {
+#ifdef INTEL_MKL_DNN
+    // TF can work with int64, but mkldnn only supports int32
+    // Fail if the height or width are greater than MAX_INT
+
+    OP_REQUIRES(context, FastBoundsCheck(out_height,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("output height is too large"));
+
+    OP_REQUIRES(context, FastBoundsCheck(out_width,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("output width is too large"));
+
+#endif
+    out_depth = depth;  // output will have the same depth as the input
+  } else {  // we are pooling in the depth dimension
     // Our current version of depthwise max pooling does not support
     // any padding, and expects the depth_window to equal the depth
     // stride (no overlapping).
@@ -109,7 +142,6 @@ void MklPoolParameters::Init(OpKernelContext* context,
                 errors::Unimplemented("Depthwise max pooling is currently "
                                       "only implemented for CPU devices."));
 
-    pad_depth = 0;
     out_depth = depth / depth_window;
   }
 }
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h
index 92ea2beb25..d33e91a15d 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.h
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h
@@ -18,9 +18,18 @@ limitations under the License.
 
 #ifdef INTEL_MKL
 #include <vector>
+#include <string>
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -51,14 +60,28 @@ struct MklPoolParameters {
   int pad_depth;
 
   TensorFormat data_format;
+  MklPoolParameters()
+    : depth(0)
+    , tensor_in_cols(0), tensor_in_rows(0), tensor_in_batch(0)
+    , window_rows(0), window_cols(0), depth_window(0)
+    , row_stride(0), col_stride(0), depth_stride(0)
+    , out_height(0), out_width(0), out_depth(0)
+    , pad_left(0), pad_right(0), pad_top(0), pad_bottom(0), pad_depth(0)
+    , data_format(TensorFormat::FORMAT_NCHW) {}
 
   // Updates context->status if there is an invalid input.
   void Init(OpKernelContext* context, const std::vector<int32>& ksize,
             const std::vector<int32>& stride, Padding padding,
             TensorFormat data_format, const TensorShape& tensor_in_shape);
+#ifndef INTEL_MKL_DNN
   void Init(OpKernelContext* context, const std::vector<int32>& ksize,
             const std::vector<int32>& stride, Padding padding,
             TensorFormat data_format, const MklShape* mkl_in_shape);
+#else
+  void Init(OpKernelContext* context, const std::vector<int32>& ksize,
+            const std::vector<int32>& stride, Padding padding,
+            TensorFormat data_format, const MklDnnShape* mkl_in_shape);
+#endif
 
  private:
   // Common initialization for TensorFlow and MKL formats
@@ -67,6 +90,325 @@ struct MklPoolParameters {
             TensorFormat data_format);
 };
 
+#ifdef INTEL_MKL_DNN
+
+template <class T>
+class MklPoolingOpBase : public OpKernel {
+ public:
+  explicit MklPoolingOpBase(OpKernelConstruction* context)
+            : OpKernel(context)
+            , workspace_enabled_(false) {
+      string data_format;
+      OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+      OP_REQUIRES(context,
+            FormatFromString(data_format, &this->data_format_tf_),
+            errors::InvalidArgument("Invalid data format"));
+      this->data_format_mkldnn_
+                = TFDataFormatToMklDnnDataFormat(this->data_format_tf_);
+      OP_REQUIRES_OK(context, context->GetAttr("ksize", &this->ksize_));
+      OP_REQUIRES(context, this->ksize_.size() == 4,
+                  errors::InvalidArgument("Sliding window ksize field must "
+                                          "specify 4 dimensions"));
+      OP_REQUIRES_OK(context, context->GetAttr("strides", &this->stride_));
+      OP_REQUIRES(context, this->stride_.size() == 4,
+                  errors::InvalidArgument("Sliding window strides field must "
+                                          "specify 4 dimensions"));
+      OP_REQUIRES_OK(context, context->GetAttr("padding", &this->padding_));
+      OP_REQUIRES(context, this->ksize_[0] == 1 && this->stride_[0] == 1,
+                  errors::Unimplemented("Pooling is not yet supported on the "
+                                        "batch dimension."));
+
+      // We may not get this attribute for this node if it does not go through
+      // graph rewrite pass. So we do not check for error while retrieving this
+      // attribute value.
+      context->GetAttr("workspace_enabled", &this->workspace_enabled_);
+    }
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  // Calculate output shape of pooling op in MKL-DNN and TensorFlow order.
+  // MKL-DNN uses NCHW for output order. But TensorFlow output will be in
+  // NHWC or NCHW format depending on data format. Function expects
+  // output height and output width to have already been int32
+  // bounds-checked
+  void GetOutputDims(const MklPoolParameters& mkl_pool_params,
+                    memory::dims* output_dims_mkl_order) {
+    // MKL-DNN always needs output in NCHW format.
+    *output_dims_mkl_order = { mkl_pool_params.tensor_in_batch,
+                              mkl_pool_params.out_depth,
+                              static_cast<int>(mkl_pool_params.out_height),
+                              static_cast<int>(mkl_pool_params.out_width)};
+  }
+
+  void InitMklPoolParameters(OpKernelContext* context,
+                      MklPoolParameters* pool_params,
+                      const MklDnnShape& original_input_mkl_shape,
+                      const TensorShape& input_tensor_shape) {
+    if (!original_input_mkl_shape.IsMklTensor()) {
+      pool_params->Init(context, this->ksize_, this->stride_, this->padding_,
+          this->data_format_tf_, input_tensor_shape);
+    } else {
+      pool_params->Init(context, this->ksize_, this->stride_, this->padding_,
+          this->data_format_tf_, &original_input_mkl_shape);
+    }
+  }
+
+  // Checks to make sure that the memory we need to allocate
+  // is a multiple of sizeof(T)
+  // returns the number of elements
+  size_t GetNumTElements(const memory::primitive_desc& pd) {
+    size_t num_bytes = pd.get_size();
+    size_t ret_val = num_bytes / sizeof(T);
+    if ( num_bytes % sizeof(T) != 0 ) {
+        ret_val++;
+    }
+    return ret_val;
+  }
+
+
+  std::vector<int32> ksize_;
+  std::vector<int32> stride_;
+  Padding padding_;
+  TensorFormat data_format_tf_;
+  memory::format data_format_mkldnn_;
+  bool workspace_enabled_;
+};
+
+template <class T>
+class MklPoolingForwardOpBase : public MklPoolingOpBase<T> {
+ public:
+  explicit MklPoolingForwardOpBase<T>(OpKernelConstruction* context)
+      : MklPoolingOpBase<T>(context) {}
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  void ConfigureInput(OpKernelContext* context,
+                    const MklDnnShape& input_mkl_shape,
+                    const Tensor& input_tensor,
+                    MklPoolParameters* pool_params,
+                    MklDnnData<T>* dnn_data_input) {
+    CHECK_NOTNULL(pool_params);
+    CHECK_NOTNULL(dnn_data_input);
+    TensorShape input_tensor_shape = input_tensor.shape();
+    memory::desc input_md = input_mkl_shape.IsMklTensor()
+                        ? input_mkl_shape.GetMklLayout()
+                        : memory::desc(
+                              TFShapeToMklDnnDimsInNCHW(
+                                  input_tensor_shape, this->data_format_tf_),
+                              MklDnnType<T>(),
+                              this->data_format_mkldnn_);
+    dnn_data_input->SetUsrMem(input_md, &input_tensor);
+    this->InitMklPoolParameters(context, pool_params,
+                      input_mkl_shape, input_tensor_shape);
+  }
+
+  void AllocateOutputTensor(OpKernelContext* context,
+            const pooling_forward::primitive_desc& pool_fwd_prim_desc,
+            const memory::dims output_dims_mkl_order,
+            const memory::format& output_tf_format,
+            Tensor** output_tensor) {
+    CHECK_NOTNULL(output_tensor);
+    memory::primitive_desc dst_pd = pool_fwd_prim_desc.dst_primitive_desc();
+
+    MklDnnShape output_mkl_shape;
+    output_mkl_shape.SetMklTensor(true);
+    output_mkl_shape.SetMklLayout(&dst_pd);
+    output_mkl_shape.SetElemType(MklDnnType<T>());
+    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                              output_dims_mkl_order,
+                              output_tf_format);
+    TensorShape output_tf_shape;
+
+    // only allocate enough space for the elements we need.
+    output_tf_shape.AddDim(this->GetNumTElements(dst_pd));
+    AllocateOutputSetMklShape(context, kOutputTensorIndexOutput,
+                            output_tensor,
+                            output_tf_shape, output_mkl_shape);
+    CHECK_NOTNULL(*output_tensor);
+  }
+
+  void PrepareAndExecuteNet(
+                  const pooling_forward::primitive_desc& pool_fwd_desc,
+                  const MklDnnData<T>* src,
+                  MklDnnData<T>* dst,
+                  MklDnnData<T>* wksp = nullptr) {
+    std::vector<primitive> net;
+
+    // Create pooling primitive and add it to net
+    if (wksp != nullptr) {
+        net.push_back(pooling_forward(pool_fwd_desc,
+                        src->GetOpMem(),
+                        dst->GetOpMem(),
+                        wksp->GetOpMem()));
+    } else {
+        net.push_back(pooling_forward(pool_fwd_desc,
+            src->GetOpMem(),
+            dst->GetOpMem()));
+    }
+    stream(stream::kind::eager).submit(net).wait();
+  }
+
+
+  void SanityCheckInput(OpKernelContext* context,
+                  const Tensor& input_tensor,
+                  const MklDnnShape& input_mkl_shape) {
+    if (!input_mkl_shape.IsMklTensor()) {
+      OP_REQUIRES(context, input_tensor.dims() == 4,
+          errors::InvalidArgument("Input must be 4-dimensional"));
+    } else {
+        OP_REQUIRES(context, input_mkl_shape.GetDimension() == 4,
+                errors::InvalidArgument("Input shape must be "
+                "4-dimensional"));
+    }
+  }
+  // .Input("value: T")
+  // .Output("output: T")
+  const int kInputTensorIndexInput = 0;
+  const int kOutputTensorIndexOutput = 0;
+};  // MklPoolingForwardBaseOp
+
+
+template <class T>
+class MklPoolingBackwardOpBase : public MklPoolingOpBase<T> {
+ public:
+  explicit MklPoolingBackwardOpBase<T>(OpKernelConstruction* context)
+          : MklPoolingOpBase<T>(context) { }
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  const int kOutputTensorIndexOutput = 0;
+
+  void AllocateOutputTensor(OpKernelContext* context,
+            const pooling_backward::primitive_desc& pool_bkwd_prim_desc,
+            const memory::dims output_dims_mkl_order,
+            const memory::format& output_tf_format,
+            Tensor** output_tensor) {
+    CHECK_NOTNULL(output_tensor);
+    memory::primitive_desc dst_pd
+                = pool_bkwd_prim_desc.diff_src_primitive_desc();
+    MklDnnShape output_mkl_shape;
+    output_mkl_shape.SetMklTensor(true);
+    output_mkl_shape.SetMklLayout(&dst_pd);
+    output_mkl_shape.SetElemType(MklDnnType<T>());
+    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                              output_dims_mkl_order,
+                              output_tf_format);
+
+    TensorShape output_tf_shape;
+    output_tf_shape.AddDim(this->GetNumTElements(dst_pd));
+    AllocateOutputSetMklShape(context, kOutputTensorIndexOutput,
+                            output_tensor,
+                            output_tf_shape, output_mkl_shape);
+    CHECK_NOTNULL(*output_tensor);
+  }
+
+  void PrepareAndExecuteNet(
+    const pooling_backward::primitive_desc& pool_bkwd_desc,
+    MklDnnData<T>* input_gradient_diff_dst,
+    MklDnnData<T>* output_diff_src,
+    const memory::primitive_desc& target_diff_dst_pd,
+    const MklDnnData<T>* workspace = nullptr) {
+
+    std::vector<primitive> net;
+
+    // If the input gradient isn't in the same format as the output
+    // reorder it to the same format as the output
+    input_gradient_diff_dst->CheckReorderToOpMem(
+            target_diff_dst_pd,
+            &net);
+
+    // Create pooling primitive and add it to net
+    if (nullptr == workspace) {
+      net.push_back(pooling_backward(pool_bkwd_desc,
+                              input_gradient_diff_dst->GetOpMem(),
+                              output_diff_src->GetOpMem()));
+    } else {
+      net.push_back(pooling_backward(pool_bkwd_desc,
+                                  input_gradient_diff_dst->GetOpMem(),
+                                  workspace->GetOpMem(),
+                                  output_diff_src->GetOpMem()));
+    }
+    stream(stream::kind::eager).submit(net).wait();
+  }
+
+  // Max Pooling and Avg Pooling have slightly different implementations
+  // Takes the Tensor containing original input data and the original
+  // mkl Dnn Shape and populates other data
+  memory::desc ConfigureOriginalInput(OpKernelContext* context,
+                              const Tensor& tensor_original_input_shape,
+                              const MklDnnShape& original_input_mkl_shape,
+                              memory::dims* original_input_dims_nchw,
+                              MklPoolParameters* pool_params,
+                              const TensorShape& input_tensor_shape) {
+    CHECK_NOTNULL(original_input_dims_nchw);
+    CHECK_NOTNULL(pool_params);
+    this->InitMklPoolParameters(context, pool_params,
+                          original_input_mkl_shape,
+                          input_tensor_shape);
+
+    *original_input_dims_nchw
+          = original_input_mkl_shape.IsMklTensor()
+          ? original_input_mkl_shape.GetSizesAsMklDnnDims()
+          : TFShapeToMklDnnDimsInNCHW(input_tensor_shape,
+        this->data_format_tf_);
+
+    return  original_input_mkl_shape.IsMklTensor()
+      ? original_input_mkl_shape.GetMklLayout()
+      : memory::desc(*original_input_dims_nchw,
+                      MklDnnType<T>(),
+                      this->data_format_mkldnn_);
+  }
+
+  memory::desc ConfigureOriginalOutput(const MklPoolParameters& pool_params,
+                                const MklDnnShape& original_output_mkl_shape,
+                                      memory::dims output_dims_mkl_order) {
+    this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+    return original_output_mkl_shape.IsMklTensor()
+            ? original_output_mkl_shape.GetMklLayout()
+            : memory::desc(output_dims_mkl_order,
+                         MklDnnType<T>(),
+                         this->data_format_mkldnn_);
+  }
+
+  memory::desc ConfigureInputGradient(
+        const MklDnnShape& input_gradient_mkl_shape,
+        const Tensor& input_gradient_tensor,
+        MklDnnData<T>* input_gradient_dnn_data,
+        const memory::desc& original_output_md) {
+    // Configure the gradient as is
+    memory::desc original_input_grad_md
+          = input_gradient_mkl_shape.IsMklTensor()
+          ? input_gradient_mkl_shape.GetMklLayout()
+          : memory::desc(TFShapeToMklDnnDimsInNCHW(
+                    input_gradient_tensor.shape(),
+                    this->data_format_tf_),
+                    MklDnnType<T>(), this->data_format_mkldnn_);
+
+    input_gradient_dnn_data->SetUsrMem(original_input_grad_md,
+                &input_gradient_tensor);
+
+    // Check to see if input grad diff dst is in the right format
+    // Create a new memory descriptor with the same shape as the
+    // original, but the format of the other tensors.
+    memory::format original_output_format =
+            static_cast<memory::format>(original_output_md.data.format);
+    bool grad_reorder_needed = input_gradient_dnn_data->IsReorderNeeded(
+                                    original_output_format);
+    memory::dims diff_dst_dims = input_gradient_mkl_shape.IsMklTensor()
+        ? input_gradient_mkl_shape.GetSizesAsMklDnnDims()
+        : TFShapeToMklDnnDimsInNCHW(input_gradient_tensor.shape(),
+                    this->data_format_tf_);
+    memory::desc target_diff_dst_md = memory::desc(diff_dst_dims,
+        MklDnnType<T>(), original_output_format);
+
+    return grad_reorder_needed
+            ? target_diff_dst_md
+            : original_input_grad_md;
+  }
+};
+#endif  // INTEL_MKL_DNN
+
 //-------------------------------------------------------------------
 // Utility functions
 
diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 86a77d769a..45bdd0ad5c 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -28,6 +28,19 @@ limitations under the License.
 #include "mkl_dnn.h"
 #include "mkl_dnn_types.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+using mkldnn::relu_forward;
+using mkldnn::relu_backward;
+using mkldnn::eltwise_relu;
+using mkldnn::eltwise_elu;
+using mkldnn::eltwise_tanh;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -45,6 +58,8 @@ struct MklReluHelpers {
   }
 };
 
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklReluOp : public OpKernel {
  public:
@@ -59,6 +74,7 @@ class MklReluOp : public OpKernel {
     GetMklShape(context, 0, &mkl_context.input_shape);
     void* user_i = static_cast<void*>(const_cast<T*>(input.flat<T>().data()));
     bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor();
+
     if (!input_in_mkl_format && !input.dims()) {  // handle the case of a scalar
       const TensorShape& o_shape = input.shape();
       Tensor* out_tensor = nullptr;
@@ -164,6 +180,7 @@ class MklReluOp : public OpKernel {
   } MklReluOpContext;
 };
 
+
 template <typename Device, typename T>
 class MklReluGradOp : public OpKernel {
  public:
@@ -189,18 +206,18 @@ class MklReluGradOp : public OpKernel {
       const Tensor& a = MklGetInput(context, 1);
       void* buf_input = static_cast<void*>(const_cast<T*>(a.flat<T>().data()));
       void* mkl_buffer_convert = nullptr;
+
       dnnPrimitive_t cv_input_to_grad = nullptr;
 
-      // if input and grad are not in the same layout, do a conversion between
-      // them.
+      // if input and grad are not in the same layout,
+      // do a conversion between them.
       if (!dnnLayoutCompare_F32(lt_input, lt_grad)) {
         AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, lt_grad,
                        &mkl_buffer_convert);
         CHECK_EQ(dnnConversionCreate_F32(&cv_input_to_grad, lt_input,
                    lt_grad), E_SUCCESS);
         CHECK_EQ(dnnConversionExecute_F32(cv_input_to_grad, buf_input,
-                                          mkl_buffer_convert),
-                 E_SUCCESS);
+                                          mkl_buffer_convert), E_SUCCESS);
         relu_res[dnnResourceSrc] = mkl_buffer_convert;
         dnnDelete_F32(cv_input_to_grad);
       } else {
@@ -246,7 +263,6 @@ class MklReluGradOp : public OpKernel {
 };
 
 template <typename Device, typename T>
-
 void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   MklReluGradOpContext mkl_context;
   const Tensor& g = MklGetInput(context, 0);
@@ -264,20 +280,21 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
       !MklReluHelpers::ValidateSameSize(context, g, a))
     return;
   Tensor* output = nullptr;
-  if (!input_is_mkl && !grad_is_mkl &&
-      !a.dims()) {  // handle the case of a scalar
-    // Allocate space for g and
+
+  if (!input_is_mkl && !grad_is_mkl && !a.dims()) {
+    // handle the scalar case
     const TensorShape& g_shape = g.shape();
     mkl_context.output_shape.SetMklTensor(false);
     AllocateOutputSetMklShape(context, 0, &output, g_shape,
                               mkl_context.output_shape);
+
     void* out_o = static_cast<void*>(output->flat<T>().data());
     (static_cast<T*>(out_o))[0] =
         (static_cast<T*>(user_g))[0] * ((static_cast<T*>(user_i))[0] > 0);
     return;
   }
 
-  // Generate size, stride for input if input/grad is in MKL format.
+  // generate size, stride for input if input/grad is in mkl format.
   if (grad_is_mkl || input_is_mkl) {
     const MklShape* tmp_mkl_shape =
         (grad_is_mkl) ? &mkl_context.grad_shape : &mkl_context.input_shape;
@@ -308,21 +325,20 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   float negative_slope = 0.0;
   CHECK_EQ(dnnReLUCreateBackward_F32(&mkl_context.prim_relu_bwd, NULL,
                                      mkl_context.lt_grad, mkl_context.lt_grad,
-                                     negative_slope),
-           E_SUCCESS);
+                                     negative_slope), E_SUCCESS);
   Tensor mkl_tmp_input_buf_tensor;
   mkl_context.MklPrepareReluGradInputs(context, &mkl_tmp_input_buf_tensor);
 
   if (input_is_mkl ||
-      grad_is_mkl) { /*if  grad or input are MKL leave it in MKL*/
+      grad_is_mkl) { /*if  grad or input are mkl leave it in mkl*/
     TensorShape tf_shape;
     mkl_context.output_shape.SetMklTensor(true);
     mkl_context.output_shape.SetMklLayout(mkl_context.prim_relu_bwd,
                                           dnnResourceDiffSrc);
     mkl_context.output_shape.SetTfLayout(
         mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
-    // If input_is_mkl or grad_is_mkl, then we copy strides and sizes from Mkl
-    // shape of one that is in MKL layout.
+    // if input_is_mkl or grad_is_mkl, then we copy strides and sizes from mkl
+    // shape of one that is in mkl layout.
     if (grad_is_mkl == true) {
       mkl_context.output_shape.SetTfDimOrder(
           mkl_context.in_dims, mkl_context.grad_shape.GetTfToMklDimMap());
@@ -332,11 +348,9 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
     }
 
     tf_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(
-                        mkl_context.output_shape.GetMklLayout())) /
-                    sizeof(T));
+                    mkl_context.output_shape.GetMklLayout())) / sizeof(T));
     AllocateOutputSetMklShape(context, 0, &output, tf_shape,
                               mkl_context.output_shape);
-
   } else {
     const TensorShape& o_shape = g.shape();
     mkl_context.output_shape.SetMklTensor(false);
@@ -347,13 +361,430 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   mkl_context.relu_res[dnnResourceDiffSrc] =
       static_cast<void*>(output->flat<T>().data());
 
-  CHECK_EQ(dnnExecute_F32(mkl_context.prim_relu_bwd, mkl_context.relu_res),
-           E_SUCCESS);
+  CHECK_EQ(dnnExecute_F32(mkl_context.prim_relu_bwd,
+                          mkl_context.relu_res),
+                          E_SUCCESS);
   mkl_context.MklCleanup();
 }
 
-/* Register DNN kernels for supported operations and supported types - right now
- * it is only Relu and f32*/
+
+#else  // INTEL_MKL_DNN
+
+template <typename Device, typename T, algorithm alg_kind>
+class MklReluOpBase : public OpKernel {
+ public:
+  ~MklReluOpBase() {}
+
+  explicit MklReluOpBase(OpKernelConstruction* context) : OpKernel(context) {
+  }
+
+  virtual void Compute_Scalar(OpKernelContext* context) = 0;
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const size_t src_index = 0;  // index of src input tensor
+      const size_t dst_index = 0;  // index of dst output tensor
+      const Tensor& src_tensor = MklGetInput(context, src_index);
+      MklDnnShape dnn_shape_src;
+      GetMklShape(context, src_index, &dnn_shape_src);
+
+      Tensor* dst_tensor = nullptr;
+      if (src_tensor.dims() == 0) {
+        Compute_Scalar(context);
+        return;
+      }
+
+      // Create relu primitive.
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      // Set DNN primitive - src
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor()) {
+        src_md = dnn_shape_src.GetMklLayout();
+      } else {
+        auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
+        auto src_strides = CalculateTFStrides(src_dims);
+        // Create blocked memory descriptor
+        src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+
+      T alpha = 0, beta = 0;
+      std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
+      auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
+          // Operator memory descriptor is same as user memory descriptor.
+                                              alg_kind, src.GetUsrMemDesc(),
+                                              alpha, beta);
+      relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
+                                                         cpu_engine));
+
+      // allocate dst tensor
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = relu_fwd_pd->dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(),
+                                  dnn_shape_src.GetSizesAsMklDnnDims(),
+                                  dnn_shape_src.GetTfDataFormat());
+        tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, dst_index, &dst_tensor, tf_shape_dst,
+                                dnn_shape_dst);
+
+      // Destination memory descriptor is same as source memory descriptor.
+      auto dst_md = src_md;
+      dst.SetUsrMem(dst_md, dst_tensor);
+
+      // execute net
+      std::vector<primitive> net;
+      auto relu_fwd = relu_forward(*relu_fwd_pd, src.GetOpMem(),
+                                   dst.GetOpMem());
+      net.push_back(relu_fwd);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + string(e.message) +
+                         ", in file " + string(__FILE__) + ":" +
+                         std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                        error_msg));
+    }
+  }
+};
+
+
+template <typename Device, typename T, algorithm alg_kind>
+class MklReluGradOpBase : public OpKernel {
+ public:
+  ~MklReluGradOpBase() {}
+
+  explicit MklReluGradOpBase(OpKernelConstruction* context) :
+    OpKernel(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) = 0;
+
+  void Compute(OpKernelContext* context)  {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> diff_dst(&cpu_engine);
+      MklDnnData<T> diff_src(&cpu_engine);
+
+      const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+      const size_t src_index = 1;       // index of src input tensor
+      const size_t diff_src_index = 0;  // index of diff_src output tensor
+
+      const Tensor& src_tensor      = MklGetInput(context, src_index);
+      const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+      Tensor* diff_src_tensor       = nullptr;
+
+      MklDnnShape dnn_shape_src, dnn_shape_diff_dst;
+      GetMklShape(context, src_index, &dnn_shape_src);
+      GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+      int src_dims_size = src_tensor.dims();
+      if (src_dims_size == 0) {
+        Compute_Scalar(context);
+        return;
+      }
+
+      // Set DNN primitives for src & diff_dst
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
+        if (dnn_shape_diff_dst.IsMklTensor()) {
+          diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
+          src_md = diff_dst_md;
+        } else {
+          src_md = dnn_shape_src.GetMklLayout();
+          diff_dst_md = src_md;
+        }
+      } else {
+        auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
+        auto src_strides = CalculateTFStrides(src_dims);
+        src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
+        diff_dst_md = src_md;
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+      diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
+
+      T alpha = 0, beta = 0;
+      std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
+      auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
+                                              alg_kind, src_md, alpha, beta);
+      relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
+                                                         cpu_engine));
+      auto relu_bwd_desc = relu_backward::desc(alg_kind, diff_dst_md, src_md,
+                                                alpha, beta);
+      auto relu_bwd_pd  = relu_backward::primitive_desc(relu_bwd_desc,
+                                                cpu_engine, *relu_fwd_pd);
+
+      // allocate diff_src tensor
+      MklDnnShape dnn_shape_diff_src;
+      TensorShape tf_shape_diff_src;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_diff_src.SetMklTensor(true);
+        auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc();
+        dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
+        dnn_shape_diff_src.SetElemType(MklDnnType<T>());
+        dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(),
+                                       dnn_shape_src.GetSizesAsMklDnnDims(),
+                                       dnn_shape_src.GetTfDataFormat());
+        tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_diff_src.SetMklTensor(false);
+        tf_shape_diff_src = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                                 tf_shape_diff_src, dnn_shape_diff_src);
+
+      // diff_src memory descriptor is same as diff_dst memory descriptor.
+      auto diff_src_md = diff_dst_md;
+      diff_src.SetUsrMem(diff_src_md, diff_src_tensor);
+
+      PrepareAndExecuteNet(relu_bwd_pd, &src, &diff_src, &diff_dst);
+     } catch (mkldnn::error &e) {
+       string error_msg = "Status: " + std::to_string(e.status) +
+                          ", message: " + string(e.message) +
+                          ", in file " + string(__FILE__) + ":" +
+                          std::to_string(__LINE__);
+       OP_REQUIRES_OK(context,
+                      errors::Aborted("Operation received an exception:",
+                                      error_msg));
+    }
+  }
+
+  void PrepareAndExecuteNet(const relu_backward::primitive_desc& relu_prim_desc,
+                  MklDnnData<T>* src, MklDnnData<T>* diff_src, MklDnnData<T>*
+                  diff_dst) {
+    std::vector<primitive> net;
+    net.push_back(relu_backward(relu_prim_desc, src->GetOpMem(),
+                                diff_dst->GetOpMem(), diff_src->GetOpMem()));
+    stream(stream::kind::eager).submit(net).wait();
+  }
+};
+
+
+template <typename Device, typename T>
+class MklReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
+ public:
+  ~MklReluOp() {}
+
+  explicit MklReluOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_relu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    (static_cast<T*>(out_o))[0] =
+              std::max((static_cast<T*>(user_i))[0], static_cast<T>(0));
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
+ public:
+  ~MklReluGradOp() {}
+
+  explicit MklReluGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_relu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    int src_dims_size = src_tensor.dims();
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] *
+                                  ((static_cast<T*>(user_i))[0] > 0);
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklEluOp : public MklReluOpBase<Device, T, eltwise_elu> {
+ public:
+  ~MklEluOp() {}
+
+  explicit MklEluOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_elu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    // return exp(feature) - 1 if feature > 0; feature otherwise
+    T feature = (static_cast<T*>(user_i))[0];
+    if (feature < 0)
+      (static_cast<T*>(out_o))[0] = std::exp(feature);
+    else
+      (static_cast<T*>(out_o))[0] = feature;
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklEluGradOp : public MklReluGradOpBase<Device, T, eltwise_elu> {
+ public:
+  ~MklEluGradOp() {}
+
+  explicit MklEluGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_elu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    int src_dims_size = src_tensor.dims();
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    // gradient of elu(x) = 1 if x > 0; elu(x) + 1 otherwise
+    T feature = (static_cast<T*>(user_i))[0];
+    if (feature > 0) {
+      (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0];
+    } else {
+      T elu = std::exp(feature) - 1;
+      (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] * (elu + 1);
+    }
+  }
+};
+
+template <typename Device, typename T>
+class MklTanhOp : public MklReluOpBase<Device, T, eltwise_tanh> {
+ public:
+  ~MklTanhOp() {}
+
+  explicit MklTanhOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_tanh>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    // tanh(x) = (e^x - e^(-x))/ (e^x + e^(-x))
+    T feature = (static_cast<T*>(user_i))[0];
+    T e1 = std::exp(feature);
+    T e2 = std::exp(-feature);
+    (static_cast<T*>(out_o))[0] = (e1 - e2)/(e1 + e2);
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklTanhGradOp : public MklReluGradOpBase<Device, T, eltwise_tanh> {
+ public:
+  ~MklTanhGradOp() {}
+
+  explicit MklTanhGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_tanh>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    int src_dims_size = src_tensor.dims();
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    // gradient of tanh(x) = 1 - tanh(x)^2
+    T feature = (static_cast<T*>(user_i))[0];
+    T e1 = std::exp(feature);
+    T e2 = std::exp(-feature);
+    T tanh = (e1 - e2)/(e1 + e2);
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] *
+                                  (1 - tanh * tanh);
+  }
+};
+
+#endif
+
+// register dnn kernels for supported operations and supported types
 #define REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES(type)             \
   REGISTER_KERNEL_BUILDER(Name("_MklRelu")                          \
                               .Device(DEVICE_CPU)                   \
@@ -367,6 +798,38 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
                           MklReluGradOp<CPUDevice, type>);
 TF_CALL_float(REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES);
 
+#ifdef INTEL_MKL_DNN
+
+// register dnn kernels for supported operations and supported types
+#define REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES(type)             \
+  REGISTER_KERNEL_BUILDER(Name("_MklElu")                          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklEluOp<CPUDevice, type>);              \
+  REGISTER_KERNEL_BUILDER(Name("_MklEluGrad")                      \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklEluGradOp<CPUDevice, type>);
+TF_CALL_float(REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES);
+
+#define REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES(type)             \
+  REGISTER_KERNEL_BUILDER(Name("_MklTanh")                          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklTanhOp<CPUDevice, type>);              \
+  REGISTER_KERNEL_BUILDER(Name("_MklTanhGrad")                      \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklTanhGradOp<CPUDevice, type>);
+TF_CALL_float(REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES);
+
+#endif
+
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
+
diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc
index 5e98582475..11c92ebdb4 100644
--- a/tensorflow/core/kernels/mkl_reshape_op.cc
+++ b/tensorflow/core/kernels/mkl_reshape_op.cc
@@ -28,6 +28,11 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
 template <typename Device, typename T>
@@ -35,6 +40,7 @@ class MklReshapeOp : public OpKernel {
  public:
   explicit MklReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
 
+#ifndef INTEL_MKL_DNN
   void Compute(OpKernelContext* context) override {
     const Tensor& input = MklGetInput(context, 0);
     const Tensor& sizes = MklGetInput(context, 1);
@@ -129,7 +135,183 @@ class MklReshapeOp : public OpKernel {
     }
   }
 
+#else
+
  private:
+  // When the input tensor is in MKL layout and we are reshaping the tensor to a
+  // different shape than its actual shape, then we use MKLDNN reorder primitive
+  // to put tensor back in Tensorflow layout. But we can skip this reordering
+  // some times. This function checks for all such cases.
+  bool SkipReorder(const MklDnnShape& mkl_shape_input,
+                   const TensorShape& reshape_to) {
+    CHECK_EQ(mkl_shape_input.IsMklTensor(), true);
+    bool ret = false;
+
+    // If Tensorflow's data format and the underlying format maintained by
+    // MKLDNN are equivalent (both are NHWC or both are NCHW), then we can
+    // safely return true.
+    auto input_mkl_md = mkl_shape_input.GetMklLayout();
+    if (mkl_shape_input.GetTfDataFormat() == input_mkl_md.data.format) {
+      ret = true;
+    }
+
+    return ret;
+  }
+
+ public:
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor = MklGetInput(context, 0);
+    const Tensor& sizes = MklGetInput(context, 1);
+
+    MklDnnShape mkl_shape_input;
+    GetMklShape(context, kInputSlotIdx, &mkl_shape_input);
+    bool input_in_mkl_format = mkl_shape_input.IsMklTensor();
+    const int64 nelems = input_in_mkl_format ?
+                         mkl_shape_input.GetTfShape().num_elements()
+                         : input_tensor.NumElements();
+
+    // Preliminary validation of sizes.
+    OP_REQUIRES(context, IsLegacyVector(sizes.shape()),
+                errors::InvalidArgument("sizes input must be 1-D, not shape ",
+                                        sizes.shape().DebugString()));
+
+    // Compute the output shape.  Determine product of specified
+    // dimensions, and find the index of the unspecified one.
+    TensorShape shape;
+    int64 product = 1;
+    int unknown_index = -1;
+    switch (sizes.dtype()) {
+      case DT_INT32:
+        OP_REQUIRES_OK(context, ValidateSizes<int32>(sizes, &product,
+                                                     &unknown_index, &shape));
+        break;
+      case DT_INT64:
+        OP_REQUIRES_OK(context, ValidateSizes<int64>(sizes, &product,
+                                                     &unknown_index, &shape));
+        break;
+      default:
+        context->CtxFailure(errors::InvalidArgument(
+            "desired shape must be a DT_INT32 or DT_INT64 vector, not a ",
+            DataTypeString(sizes.dtype())));
+        return;
+    }
+    if (unknown_index != -1) {
+      OP_REQUIRES(
+          context, product > 0,
+          errors::InvalidArgument("Reshape cannot infer the missing input size "
+                                  "for an empty tensor unless all specified "
+                                  "input sizes are non-zero"));
+      const int64 missing = nelems / product;
+      OP_REQUIRES(
+          context, product * missing == nelems,
+          errors::InvalidArgument(
+              "Input to reshape is a tensor with ", nelems,
+              " values, but the requested shape requires a multiple of ",
+              product));
+      shape.set_dim(unknown_index, missing);
+    }
+    OP_REQUIRES(context, shape.num_elements() == nelems,
+                errors::InvalidArgument("Input to reshape is a tensor with ",
+                                        nelems,
+                                        " values, but the requested shape has ",
+                                        shape.num_elements()));
+
+    if (input_in_mkl_format) {
+      TensorShape& shape_to = shape;
+      TensorShape shape_from = mkl_shape_input.GetTfShape();
+      if (shape_from == shape_to) {
+        CopyMklTensorInToOut(context, kInputSlotIdx, kOutputSlotIdx);
+        return;
+      } else {
+        try {
+          auto cpu_engine = engine(engine::cpu, 0);
+          MklDnnData<T> dnn_data_input(&cpu_engine);
+          // Reshape is just a logical view change operation for a tensor.
+          // It does not change underlying layout. But MKLDNN may maintain
+          // tensor data in different layout than that specified by Tensorflow.
+          // If MKLDNN maintains input tensor in different layout than that
+          // specified by Tensorflow, we will need to reorder tensor and then
+          // put it in the shape expected by Tensorflow. But if MKLDNN has
+          // maintained input tensor in the same layout as it is expected by
+          // Tensorflow, we don't need to reorder tensor contents, we just
+          // need to update MklDnnShape object associated with the input
+          // tensor to reflect the shape change expected by reshape.
+          if (!SkipReorder(mkl_shape_input, shape_to)) {
+              // If dimensions that are being expanded or collapsed are not
+              // maintained contiguously by MKLDNN, then we use reorder.
+
+              // Get Mkl layout of input tensor.
+              auto input_mkl_md = mkl_shape_input.GetMklLayout();
+              // Set input Mkl layout as the user layout.
+              dnn_data_input.SetUsrMem(input_mkl_md, &input_tensor);
+              // Get expected Tensorflow layout of input tensor.
+              auto output_tf_md = mkl_shape_input.GetTfLayout();
+              auto output_tf_pd = memory::primitive_desc(output_tf_md,
+                                                         cpu_engine);
+
+              Tensor* output_tensor = nullptr;
+              MklShape mkl_shape_output;
+              mkl_shape_output.SetMklTensor(false);
+              // We allocate output tensor in the shape expected by Reshape.
+              AllocateOutputSetMklShape(context, kOutputSlotIdx, &output_tensor,
+                                        shape_to, mkl_shape_output);
+
+              // Insert reorder between Mkl layout and TensorFlow layout.
+              std::vector<primitive> net;
+              CHECK_EQ(dnn_data_input.CheckReorderToOpMem(output_tf_pd,
+                       output_tensor, &net), true);
+              stream(stream::kind::eager).submit(net).wait();
+              return;
+          } else {
+            // If dimensions that are being expanded or collapsed are
+            // maintained contiguously by MKLDNN, then we skip reorder, just
+            // update MklDnnShape object for the tensorflow tensor, and forward
+            // Tensorflow tensor as it is to the output.
+            auto output_dims = TFShapeToMklDnnDims(shape_to);
+            auto output_strides = CalculateTFStrides(output_dims);
+            auto output_tf_md = MklDnnData<T>::CreateBlockedMemDesc(output_dims,
+                                                               output_strides);
+            auto output_tf_pd = memory::primitive_desc(output_tf_md,
+                                                       cpu_engine);
+
+            // Set MklDnnShape
+            MklDnnShape mkl_shape_output;
+            mkl_shape_output.SetMklTensor(true);
+            mkl_shape_output.SetMklLayout(&output_tf_pd);
+            mkl_shape_output.SetElemType(MklDnnType<T>());
+            mkl_shape_output.SetTfLayout(output_dims.size(), output_dims,
+                                         memory::format::blocked);
+
+            // We now simply forward input Mkl tensor to output and change its
+            // output MklDnnShape object.
+            ForwardMklTensorInToOutWithMklShape(context, kInputSlotIdx,
+                                              kOutputSlotIdx, mkl_shape_output);
+            return;
+          }
+        } catch (mkldnn::error &e) {
+          string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+          OP_REQUIRES_OK(context,
+                   errors::Aborted("Operation received an exception:",
+                      error_msg));
+        }
+      }
+    } else {
+      // If input tensor is not in Mkl format, then just copy Tensorflow tensor
+      // to output with specified shape.
+      CopyTfTensorInToOutWithShape(context, kInputSlotIdx, kOutputSlotIdx,
+                                   shape);
+    }
+  }
+
+#endif  // INTEL_MKL_DNN
+
+ private:
+  const int kInputSlotIdx = 0;
+  const int kOutputSlotIdx = 0;
+
   template <typename Tshape>
   Status ValidateSizes(const Tensor& sizes, int64* product, int* unknown_index,
                        TensorShape* shape) {
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index f83998e0c1..1921b83d12 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -268,6 +268,13 @@ class Im2ColConvFunctor {
     Im2ColBufferResource<T1, chunk_value_count>* im2col_buffer_resource;
     std::function<Status(Im2ColBufferResource<T1, chunk_value_count>**)>
         creator = [](Im2ColBufferResource<T1, chunk_value_count>** resource) {
+#ifdef _MSC_VER
+          // MSVC complains about the capture of chunk_value_count which oddly
+          // works fine in conv_ops_using_gemm.cc for example.
+          // Define chunk_value_count inside the lambda for now.
+          const int64 chunk_value_count =
+              (kMaxChunkSize + (sizeof(T1) - 1)) / sizeof(T1);
+#endif
           *resource = new Im2ColBufferResource<T1, chunk_value_count>();
           return Status::OK();
         };
diff --git a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
index be1fa22c69..3c31016732 100644
--- a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
@@ -161,7 +161,7 @@ Status SnappyOutputBuffer::Deflate() {
   }
 
   // Write length of compressed block to output buffer.
-  char* compressed_length_array = new char[4];
+  char compressed_length_array[4];
   std::fill(compressed_length_array, compressed_length_array + 4, 0);
   for (int i = 0; i < 4; i++) {
     // Little endian.
@@ -173,7 +173,6 @@ Status SnappyOutputBuffer::Deflate() {
   TF_RETURN_IF_ERROR(AddToOutputBuffer(output.data(), output.size()));
   next_in_ += avail_in_;
   avail_in_ = 0;
-  delete[] compressed_length_array;
 
   return Status::OK();
 }
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 980d0c31a3..15122afd23 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -2958,6 +2958,25 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("__MklDummyConv2DWithBias")
+    .Input("input: T")
+    .Input("filter: T")
+    .Input("bias: T")
+    .Output("output: T")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .Doc(R"doc(
+Dummy node that enables fusing Conv2D and BiasAdd operator for MKL. This node
+does not perform anything. It is just created as an intermediate output of
+merging Conv2D and BiasAdd.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklConv2DWithBias")
     .Input("input: T")
     .Input("filter: T")
@@ -3011,6 +3030,88 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("__MklDummyConv2DBackpropFilterWithBias")
+    .Input("input: T")
+    .Input("filter_sizes: int32")
+    .Input("out_backprop: T")
+    .Output("output: T")
+    .Output("bias_grad: T")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input_shape;
+      // Fetch the data_format attribute, which may not exist.
+      string data_format;
+      Status s = c->GetAttr("data_format", &data_format);
+
+      if (s.ok() && data_format == "NCHW") {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -3)));
+      } else {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -1)));
+      }
+      ShapeHandle sh;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &sh));
+      TF_RETURN_IF_ERROR(c->WithRank(sh, 4, &sh));
+      c->set_output(0, sh);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Dummy node that enables fusing Conv2DBackpropFilter and BiasAddGrad operator
+for MKL. This node does not perform anything. It is just created as an
+intermediate output of merging Conv2DBackpropFilter and BiasAddGrad.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklConv2DBackpropFilterWithBias")
+    .Input("input: T")
+    .Input("filter_sizes: int32")
+    .Input("out_backprop: T")
+    .Input("mkl_input: uint8")
+    .Input("mkl_filter_size: uint8")
+    .Input("mkl_out_backprop: uint8")
+    .Output("output: T")
+    .Output("bias_grad: T")
+    .Output("mkl_output: uint8")
+    .Output("mkl_bias_grad: uint8")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input_shape;
+      // Fetch the data_format attribute, which may not exist.
+      string data_format;
+      Status s = c->GetAttr("data_format", &data_format);
+
+      if (s.ok() && data_format == "NCHW") {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -3)));
+      } else {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -1)));
+      }
+      ShapeHandle sh;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &sh));
+      TF_RETURN_IF_ERROR(c->WithRank(sh, 4, &sh));
+      c->set_output(0, sh);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+MKL version of Conv2DBackpropFilterWithBias. Uses MKL DNN APIs to compute the
+gradients of convolution with respect to the filter.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklConv2DWithBiasBackpropBias")
     .Input("out_backprop: T")
     .Input("mkl_out_backprop: uint8")
@@ -3087,6 +3188,78 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("_MklElu")
+    .Input("features: T")
+    .Input("mkl_features: uint8")
+    .Output("activations: T")
+    .Output("mkl_activations: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+MKL version of Elu operator. Uses MKL DNN APIs to implement Elu operator.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklEluGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Input("mkl_gradients: uint8")
+    .Input("mkl_features: uint8")
+    .Output("backprops: T")
+    .Output("mkl_backprops: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+MKL version of EluGrad operator. Uses MKL DNN APIs to compute Elu
+gradients for Elu operation.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklSoftmax")
+    .Input("logits: T")
+    .Input("mkl_logits: uint8")
+    .Output("softmax: T")
+    .Output("mkl_softmax: uint8")
+    .Attr("T: {half, float, double}")
+    .SetShapeFn([](InferenceContext* c) {
+      return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
+    })
+    .Doc(R"doc(
+MKL version of ReluGrad operator. Uses MKL DNN APIs to compute rectified
+linear gradients for Relu operation.
+)doc");
+
+REGISTER_OP("_MklTanh")
+    .Input("features: T")
+    .Input("mkl_features: uint8")
+    .Output("activations: T")
+    .Output("mkl_activations: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+MKL version of Tanh operator. Uses MKL DNN APIs to implement Tanh operator.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklTanhGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Input("mkl_gradients: uint8")
+    .Input("mkl_features: uint8")
+    .Output("backprops: T")
+    .Output("mkl_backprops: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+MKL version of TanhGrad operator. Uses MKL DNN APIs to compute tanh
+gradients for Tanh operation.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklMaxPool")
     .Attr("T: {float, half} = DT_FLOAT")
     .Attr("ksize: list(int) >= 4")
diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD
index 624145da75..aaeccc8324 100644
--- a/tensorflow/core/platform/cloud/BUILD
+++ b/tensorflow/core/platform/cloud/BUILD
@@ -10,6 +10,7 @@ licenses(["notice"])  # Apache 2.0
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
+    "tf_copts",
 )
 
 filegroup(
@@ -29,6 +30,7 @@ filegroup(
 cc_library(
     name = "expiring_lru_cache",
     hdrs = ["expiring_lru_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = ["//tensorflow/core:lib"],
 )
@@ -37,6 +39,7 @@ cc_library(
     name = "file_block_cache",
     srcs = ["file_block_cache.cc"],
     hdrs = ["file_block_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = ["//tensorflow/core:lib"],
 )
@@ -45,6 +48,7 @@ cc_library(
     name = "gcs_dns_cache",
     srcs = ["gcs_dns_cache.cc"],
     hdrs = ["gcs_dns_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":http_request",
@@ -56,6 +60,7 @@ cc_library(
     name = "gcs_file_system",
     srcs = ["gcs_file_system.cc"],
     hdrs = ["gcs_file_system.h"],
+    copts = tf_copts(),
     linkstatic = 1,  # Needed since alwayslink is broken in bazel b/27630669
     visibility = ["//visibility:public"],
     deps = [
@@ -78,6 +83,7 @@ cc_library(
 cc_library(
     name = "http_request",
     hdrs = ["http_request.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/core:framework_headers_lib",
@@ -89,6 +95,7 @@ cc_library(
     name = "curl_http_request",
     srcs = ["curl_http_request.cc"],
     hdrs = ["curl_http_request.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":http_request",
@@ -104,6 +111,7 @@ cc_library(
     hdrs = [
         "http_request_fake.h",
     ],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":curl_http_request",
@@ -121,6 +129,7 @@ cc_library(
         "auth_provider.h",
         "google_auth_provider.h",
     ],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":curl_http_request",
@@ -136,6 +145,7 @@ cc_library(
     name = "now_seconds_env",
     testonly = 1,
     hdrs = ["now_seconds_env.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/core:lib",
@@ -151,6 +161,7 @@ cc_library(
     hdrs = [
         "oauth_client.h",
     ],
+    copts = tf_copts(),
     deps = [
         ":curl_http_request",
         ":http_request",
@@ -169,6 +180,7 @@ cc_library(
     hdrs = [
         "retrying_utils.h",
     ],
+    copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
@@ -183,6 +195,7 @@ cc_library(
     hdrs = [
         "retrying_file_system.h",
     ],
+    copts = tf_copts(),
     deps = [
         ":retrying_utils",
         "//tensorflow/core:framework_headers_lib",
@@ -198,6 +211,7 @@ cc_library(
     hdrs = [
         "time_util.h",
     ],
+    copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.cc b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
index 78bf680317..87b0dde136 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
@@ -14,9 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/gcs_dns_cache.h"
-
+#ifndef _WIN32
 #include <arpa/inet.h>
 #include <netdb.h>
+#else
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <Windows.h>
+#endif
 #include <sys/types.h>
 
 namespace tensorflow {
@@ -26,6 +31,21 @@ namespace {
 const std::vector<string>& kCachedDomainNames =
     *new std::vector<string>{"www.googleapis.com", "storage.googleapis.com"};
 
+inline void print_getaddrinfo_error(const string& name, int error_code) {
+#ifndef _WIN32
+  if (error_code == EAI_SYSTEM) {
+    LOG(ERROR) << "Error resolving " << name
+               << " (EAI_SYSTEM): " << strerror(errno);
+  } else {
+    LOG(ERROR) << "Error resolving " << name << ": "
+               << gai_strerror(error_code);
+  }
+#else
+  // TODO:WSAGetLastError is better than gai_strerror
+  LOG(ERROR) << "Error resolving " << name << ": " << gai_strerror(error_code);
+#endif
+}
+
 // Selects one item at random from a vector of items, using a uniform
 // distribution.
 template <typename T>
@@ -86,7 +106,7 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
 
   std::vector<string> output;
   if (return_code == 0) {
-    for (addrinfo* i = result; i != nullptr; i = i->ai_next) {
+    for (const addrinfo* i = result; i != nullptr; i = i->ai_next) {
       if (i->ai_family != AF_INET || i->ai_addr->sa_family != AF_INET) {
         LOG(WARNING) << "Non-IPv4 address returned. ai_family: " << i->ai_family
                      << ". sa_family: " << i->ai_addr->sa_family << ".";
@@ -106,13 +126,7 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
       }
     }
   } else {
-    if (return_code == EAI_SYSTEM) {
-      LOG(ERROR) << "Error resolving " << name
-                 << " (EAI_SYSTEM): " << strerror(errno);
-    } else {
-      LOG(ERROR) << "Error resolving " << name << ": "
-                 << gai_strerror(return_code);
-    }
+    print_getaddrinfo_error(name, return_code);
   }
   if (result != nullptr) {
     freeaddrinfo(result);
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index f80cbf7626..a183fe6fa8 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -22,6 +22,9 @@ limitations under the License.
 #include <cstring>
 #include <fstream>
 #include <vector>
+#ifdef _WIN32
+#include <io.h>  //for _mktemp
+#endif
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -40,6 +43,12 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 
+#ifdef _WIN32
+#ifdef DeleteFile
+#undef DeleteFile
+#endif
+#endif
+
 namespace tensorflow {
 
 namespace {
@@ -109,16 +118,25 @@ constexpr char kReadRequestTimeout[] = "GCS_READ_REQUEST_TIMEOUT_SECS";
 // upload requests.
 constexpr char kWriteRequestTimeout[] = "GCS_WRITE_REQUEST_TIMEOUT_SECS";
 
+// TODO: DO NOT use a hardcoded path
 Status GetTmpFilename(string* filename) {
   if (!filename) {
     return errors::Internal("'filename' cannot be nullptr.");
   }
+#ifndef _WIN32
   char buffer[] = "/tmp/gcs_filesystem_XXXXXX";
   int fd = mkstemp(buffer);
   if (fd < 0) {
     return errors::Internal("Failed to create a temporary file.");
   }
   close(fd);
+#else
+  char buffer[] = "/tmp/gcs_filesystem_XXXXXX";
+  char* ret = _mktemp(buffer);
+  if (ret == nullptr) {
+    return errors::Internal("Failed to create a temporary file.");
+  }
+#endif
   *filename = buffer;
   return Status::OK();
 }
@@ -306,6 +324,7 @@ class GcsWritableFile : public WritableFile {
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
+    // TODO: to make it safer, outfile_ should be constructed from an FD
     if (GetTmpFilename(&tmp_content_filename_).ok()) {
       outfile_.open(tmp_content_filename_,
                     std::ofstream::binary | std::ofstream::app);
@@ -429,7 +448,7 @@ class GcsWritableFile : public WritableFile {
       return errors::Internal("'size' cannot be nullptr");
     }
     const auto tellp = outfile_.tellp();
-    if (tellp == -1) {
+    if (tellp == static_cast<std::streampos>(-1)) {
       return errors::Internal(
           "Could not get the size of the internal temporary file.");
     }
diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc
index f6fd8373cd..d77f439c5a 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider.cc
@@ -14,9 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/google_auth_provider.h"
+#ifndef _WIN32
 #include <pwd.h>
-#include <sys/types.h>
 #include <unistd.h>
+#else
+#include <sys/types.h>
+#endif
 #include <fstream>
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
diff --git a/tensorflow/core/platform/cloud/oauth_client.cc b/tensorflow/core/platform/cloud/oauth_client.cc
index c700b97dc9..3c2830ccd9 100644
--- a/tensorflow/core/platform/cloud/oauth_client.cc
+++ b/tensorflow/core/platform/cloud/oauth_client.cc
@@ -14,9 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/oauth_client.h"
+#ifndef _WIN32
 #include <pwd.h>
 #include <sys/types.h>
 #include <unistd.h>
+#else
+#include <sys/types.h>
+#endif
 #include <fstream>
 #include <openssl/bio.h>
 #include <openssl/evp.h>
diff --git a/tensorflow/core/platform/cloud/time_util.cc b/tensorflow/core/platform/cloud/time_util.cc
index 2f8643f3c7..0587a65c29 100644
--- a/tensorflow/core/platform/cloud/time_util.cc
+++ b/tensorflow/core/platform/cloud/time_util.cc
@@ -18,6 +18,9 @@ limitations under the License.
 #include <cmath>
 #include <cstdio>
 #include <ctime>
+#ifdef _WIN32
+#define timegm _mkgmtime
+#endif
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 0f8cf8f122..948334d27b 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -458,7 +458,6 @@ def tf_additional_lib_deps():
 
 def tf_additional_core_deps():
   return select({
-      "//tensorflow:with_gcp_support_windows_override": [],
       "//tensorflow:with_gcp_support_android_override": [],
       "//tensorflow:with_gcp_support_ios_override": [],
       "//tensorflow:with_gcp_support": [
diff --git a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
index fb1955edde..12dc9c58b3 100644
--- a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
+++ b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
@@ -118,9 +118,10 @@ int64 AndroidArmV7ACpuUtilsHelper::ReadCpuFrequencyFile(
   const int retval = fscanf(fp, "%lld", &freq_in_khz);
   if (retval < 0) {
     LOG(WARNING) << "Failed to \"" << file_path << "\"";
+    fclose(fp);
     return INVALID_CPU_FREQUENCY;
   }
-  pclose(fp);
+  fclose(fp);
   return freq_in_khz * 1000;  // The file contains cpu frequency in khz
 }
 
diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc
index 234f3c3aed..682ad97eec 100644
--- a/tensorflow/core/platform/s3/s3_file_system.cc
+++ b/tensorflow/core/platform/s3/s3_file_system.cc
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/core/platform/s3/s3_file_system.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/s3/s3_file_system.h"
 #include "tensorflow/core/platform/s3/s3_crypto.h"
 
 #include <aws/core/Aws.h>
@@ -49,9 +49,15 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() {
     if (endpoint) {
       cfg.endpointOverride = Aws::String(endpoint);
     }
-    const char* region = getenv("S3_REGION");
+    const char* region = getenv("AWS_REGION");
     if (region) {
       cfg.region = Aws::String(region);
+    } else {
+      // TODO (yongtang): `S3_REGION` should be deprecated after 2.0.
+      const char* region = getenv("S3_REGION");
+      if (region) {
+        cfg.region = Aws::String(region);
+      }
     }
     const char* use_https = getenv("S3_USE_HTTPS");
     if (use_https) {
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 148c7851bd..2caf5fc56d 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -328,6 +328,10 @@ class MklShape {
 
 // Forward decl
 TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format);
+memory::dims CalculateTFStrides(const memory::dims& dims_tf_order);
+memory::desc CreateBlockedMemDescHelper(const memory::dims& dim,
+                                        const memory::dims& strides,
+                                        memory::data_type dtype);
 
 class MklDnnShape {
  private:
@@ -364,6 +368,52 @@ class MklDnnShape {
   ~MklDnnShape() {}
   TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape);  // Cannot copy
 
+  /// Helper function to compare memory::desc objects for MklDnn.
+  /// May be this should go into MklDnn directly.
+  inline bool CompareMklDnnLayouts(const memory::desc& md1,
+                                   const memory::desc& md2) const {
+    mkldnn_memory_desc_t mdd1 = md1.data;
+    mkldnn_memory_desc_t mdd2 = md2.data;
+    const char* d1 = reinterpret_cast<const char*>(&mdd1);
+    const char* d2 = reinterpret_cast<const char*>(&mdd2);
+
+    size_t md_size = sizeof(mdd1);
+    for (size_t i = 0; i < md_size; i++) {
+      if (*d1++ != *d2++) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /// Equality function for MklDnnShape objects
+  /// @return true if both are equal; false otherwise.
+  inline bool operator == (const MklDnnShape& input_shape) const {
+    if (this->IsMklTensor() != input_shape.IsMklTensor()) {
+      return false;
+    }
+
+    // If input tensors are in Mkl layout, then we check for dimensions and
+    // sizes.
+    if (this->IsMklTensor()) {
+      return this->GetTfShape() == input_shape.GetTfShape() &&
+             CompareMklDnnLayouts(this->GetMklLayout(),
+                                  input_shape.GetMklLayout());
+    }
+
+    return true;
+  }
+
+  /// Equality operator for MklDnnShape and TFShape.
+  /// Returns: true if TF shapes for both are the same, false otherwise
+  inline bool operator == (const TensorShape& input_shape) const {
+    if (!this->IsMklTensor()) {
+      return false;
+    }
+
+    return this->GetTfShape() == input_shape;
+  }
+
   inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; }
   inline void SetMklTensor(bool is_mkl_tensor) {
     data_.is_mkl_tensor_ = is_mkl_tensor;
@@ -375,7 +425,7 @@ class MklDnnShape {
   inline size_t GetDimension(char dimension) const {
     int index = GetMklDnnTensorDimIndex(dimension);
     CHECK(index >= 0 && index < this->GetDimension())
-        << "Invalid index from the dimension: " << index << ", " << dimension;
+      << "Invalid index from the dimension: " << index << ", " << dimension;
     return this->DimSize(index);
   }
 
@@ -405,7 +455,7 @@ class MklDnnShape {
   inline memory::dims GetSizesAsMklDnnDims() const {
     memory::dims retVal;
     if (data_.is_mkl_tensor_) {
-      int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
+      size_t dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
       for (size_t i = 0; i < dimensions; i++) {
         if (data_.sizes_[i] != INVALID_DIM_SIZE)
           retVal.push_back(data_.sizes_[i]);
@@ -423,12 +473,21 @@ class MklDnnShape {
 
   /// Return TensorShape that describes the Tensorflow shape of the tensor
   /// represented by this MklShape.
-  inline TensorShape GetTfShape() {
+  inline TensorShape GetTfShape() const {
     CHECK_EQ(data_.is_mkl_tensor_, true);
 
     std::vector<int32> shape(data_.dimension_, -1);
-    for (size_t idx = 0; idx < data_.dimension_; ++idx) {
-      shape[idx] = data_.sizes_[TfDimIdx(idx)];
+    if (data_.tf_data_format_ != memory::format::blocked) {
+      for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+        shape[idx] = data_.sizes_[TfDimIdx(idx)];
+      }
+    } else {
+      // If Tensorflow shape is in Blocked format, then we don't have dimension
+      // map for it. So we just create Tensorflow shape from sizes in the
+      // specified order.
+      for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+        shape[idx] = data_.sizes_[idx];
+      }
     }
 
     TensorShape ts;
@@ -444,6 +503,12 @@ class MklDnnShape {
     CHECK_NOTNULL(pd);
     data_.mkl_md_ = pd->desc().data;
   }
+
+  inline void SetMklLayout(memory::desc* md) {
+    CHECK_NOTNULL(md);
+    data_.mkl_md_ = md->data;
+  }
+
   inline const memory::desc GetMklLayout() const {
     return memory::desc(data_.mkl_md_);
   }
@@ -452,7 +517,8 @@ class MklDnnShape {
     return data_.tf_data_format_;
   }
   /// We don't create primitive_descriptor for TensorFlow layout now.
-  /// We use lazy evaluation and create it only when needed.
+  /// We use lazy evaluation and create it only when needed. Input format can
+  /// also be Blocked format.
   inline void SetTfLayout(size_t dims, const memory::dims& sizes,
                           memory::format format) {
     CHECK_EQ(dims, sizes.size());
@@ -461,15 +527,26 @@ class MklDnnShape {
       data_.sizes_[ii] = sizes[ii];
     }
     data_.tf_data_format_ = format;
-    SetTfDimOrder(dims, format);
+    if (format != memory::format::blocked) {
+      SetTfDimOrder(dims, format);
+    }
   }
+
   inline const memory::desc GetTfLayout() const {
     memory::dims dims;
     for (size_t ii = 0; ii < data_.dimension_; ii++) {
       dims.push_back(data_.sizes_[ii]);
     }
-    return memory::desc(dims, data_.T_, data_.tf_data_format_);
+
+    // Create Blocked memory desc if input TF format was set like that.
+    if (data_.tf_data_format_ == memory::format::blocked) {
+      auto strides = CalculateTFStrides(dims);
+      return CreateBlockedMemDescHelper(dims, strides, data_.T_);
+    } else {
+      return memory::desc(dims, data_.T_, data_.tf_data_format_);
+    }
   }
+
   inline const memory::desc GetCurLayout() const {
     return IsMklTensor() ? GetMklLayout() : GetTfLayout();
   }
@@ -579,8 +656,13 @@ class MklDnnShape {
 #endif
 
 // List of MklShape objects. Used in Concat/Split layers.
+
 typedef std::vector<MklShape> MklShapeList;
 
+#ifdef INTEL_MKL_DNN
+typedef std::vector<MklDnnShape> MklDnnShapeList;
+#endif
+
 // Check if all tensors specified by MklShapes are MKL tensors.
 inline bool AreAllMklTensors(const MklShapeList& shapes) {
   for (auto& s : shapes) {
@@ -591,6 +673,7 @@ inline bool AreAllMklTensors(const MklShapeList& shapes) {
   return true;
 }
 
+#ifndef INTEL_MKL_DNN
 template <typename T>
 inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
                              const MklShape& mkl_shape) {
@@ -615,32 +698,15 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
 
   return output_tensor;
 }
-
-#ifdef INTEL_MKL_DNN
+#else
 template <typename T>
 inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
                              const MklDnnShape& mkl_shape) {
   Tensor output_tensor;
   TensorShape output_shape;
 
-#if 0
-  // TODO(nhasabni): need to implement
-  for (size_t j = 0; j < mkl_shape.GetDimension(); j++) {
-    // Outermost to innermost dimension
-    output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]);
-  }
-
-  // Allocate output tensor.
-  context->allocate_temp(DataTypeToEnum<T>::v(), output_shape, &output_tensor);
-
-  dnnLayout_t output_layout = static_cast<dnnLayout_t>(mkl_shape.GetTfLayout());
-  void* input_buffer = const_cast<T*>(mkl_tensor.flat<T>().data());
-  void* output_buffer = const_cast<T*>(output_tensor.flat<T>().data());
-
-  if (mkl_tensor.NumElements() != 0) {
-    mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer);
-  }
-#endif
+  TF_CHECK_OK(Status(error::Code::UNIMPLEMENTED,
+                     "Unimplemented conversion function"));
 
   return output_tensor;
 }
@@ -682,6 +748,9 @@ inline void GetMklInputList(OpKernelContext* ctext, StringPiece name,
   ctext->input_list(name, input_tensors);
 }
 
+
+#ifndef INTEL_MKL_DNN
+
 inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
                             MklShapeList* mkl_shapes) {
   OpInputList input_mkl_tensors;
@@ -694,6 +763,22 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
   }
 }
 
+#else
+
+inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
+                            MklDnnShapeList* mkl_shapes) {
+  OpInputList input_mkl_tensors;
+  GetMklInputList(ctext, strings::StrCat("mkl_", name), &input_mkl_tensors);
+
+  for (int i = 0; i < input_mkl_tensors.size(); i++) {
+    (*mkl_shapes)[i].DeSerializeMklDnnShape(
+        input_mkl_tensors[i].flat<uint8>().data(),
+        input_mkl_tensors[i].flat<uint8>().size() * sizeof(uint8));
+  }
+}
+
+#endif
+
 #ifdef INTEL_MKL_DNN
 /// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
 /// If the input tensor is in MKL layout, then obtains TensorShape from
@@ -909,6 +994,7 @@ inline void CopyMklTensorInToOut(OpKernelContext* context,
   context->set_output(idx_meta_out, meta_output);
 }
 
+#ifndef INTEL_MKL_DNN
 inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
                                          int idx_in, int idx_out,
                                          const TensorShape& shape) {
@@ -926,6 +1012,27 @@ inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
   CHECK(output.CopyFrom(data, shape));
   context->set_output(idx_data_out, output);
 }
+#else
+inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
+                                         int idx_in, int idx_out,
+                                         const TensorShape& shape) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  const Tensor& data = context->input(idx_data_in);
+  MklDnnShape mkl_shape_output;
+  mkl_shape_output.SetMklTensor(false);
+  AllocateOutputSetMklShape(context, idx_out, mkl_shape_output);
+  Tensor output(data.dtype());
+  // TODO(intel_tf): alternatively, call forward_input_to_output_with_shape(...)
+  CHECK(output.CopyFrom(data, shape));
+  context->set_output(idx_data_out, output);
+}
+#endif
+
+#ifndef INTEL_MKL_DNN
 
 inline void ForwardTfTensorInToOut(OpKernelContext* context,
                                   int idx_in, int idx_out) {
@@ -944,6 +1051,27 @@ inline void ForwardTfTensorInToOut(OpKernelContext* context,
   }
 }
 
+#else
+
+inline void ForwardTfTensorInToOut(OpKernelContext* context,
+                                  int idx_in, int idx_out) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  MklDnnShape dnn_shape_output;
+  dnn_shape_output.SetMklTensor(false);
+  AllocateOutputSetMklShape(context, idx_out, dnn_shape_output);
+  if (IsRefType(context->input_dtype(idx_data_in))) {
+    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);
+  } else {
+    context->set_output(idx_data_out, context->input(idx_data_in));
+  }
+}
+
+#endif
+
 inline void ForwardMklTensorInToOut(OpKernelContext* context,
                                    int idx_in, int idx_out) {
   int num_inputs = context->num_inputs();
@@ -962,6 +1090,25 @@ inline void ForwardMklTensorInToOut(OpKernelContext* context,
   }
 }
 
+#ifdef INTEL_MKL_DNN
+inline void ForwardMklTensorInToOutWithMklShape(OpKernelContext* context,
+                                             int idx_in, int idx_out,
+                                             const MklDnnShape& mkl_shape) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  AllocateOutputSetMklShape(context, idx_out, mkl_shape);
+
+  if (IsRefType(context->input_dtype(idx_data_in))) {
+    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);
+  } else {
+    context->set_output(idx_data_out, context->input(idx_data_in));
+  }
+}
+#endif
+
 // Forward the MKL shape ONLY (used in elementwise and other ops where
 // we call the eigen implementation and MKL shape is not used)
 inline void ForwardMklMetaDataInToOut(OpKernelContext* context,
@@ -985,6 +1132,10 @@ inline void SetDummyMklShapeOutput(OpKernelContext* context,
   AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output);
 }
 
+#ifndef INTEL_MKL_DNN
+// We don't need these functions in MKLDNN. We have defined equality operator
+// on MklDnnShape class directly.
+
 // Checks if the TF shape for both MKL tensors is the same or not
 // Returns: true if both TF shapes are the same, false otherwise
 inline bool MklCompareShapes(const MklShape* input_shape_0,
@@ -1051,6 +1202,7 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0,
 
   return true;
 }
+#endif
 
 // These functions do not compile with MKL-DNN since mkl.h is missing.
 // We may need to remove them later.
@@ -1127,11 +1279,14 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
 /// @return: Tensorflow data format corresponding to memory::format
 ///          Fails with an error if invalid data format.
 inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
-  if (format == memory::format::nhwc)
-    return FORMAT_NHWC;
-  else if (format == memory::format::nchw)
-    return FORMAT_NCHW;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
+  if (format == memory::format::nhwc) return FORMAT_NHWC;
+  else if (format == memory::format::nchw) return FORMAT_NCHW;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
+                     "Unsupported data format"));
+
+  // Return to prevent compiler warnings, otherwise TF_CHECK_OK will ensure
+  // that we don't come here.
+  return FORMAT_NHWC;
 }
 
 /// Map TensorShape object into memory::dims required by MKL-DNN
@@ -1175,6 +1330,23 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
   return memory::dims({n, c, h, w});
 }
 
+/// Overloaded version of function above. Input parameters are
+/// self-explanatory.
+inline memory::dims MklDnnDimsInNCHW(const memory::dims& in_dims,
+                                     TensorFormat format) {
+  // Check validity of format.
+  CHECK_NE(TFDataFormatToMklDnnDataFormat(format),
+           memory::format::format_undef);
+
+  int n = in_dims[GetTensorDimIndex(format, 'N')];
+  int c = in_dims[GetTensorDimIndex(format, 'C')];
+  int h = in_dims[GetTensorDimIndex(format, 'H')];
+  int w = in_dims[GetTensorDimIndex(format, 'W')];
+
+  // MKL-DNN requires dimensions in NCHW format.
+  return memory::dims({n, c, h, w});
+}
+
 /// Map MklDnn memory::dims object into TensorShape object.
 ///
 /// This function will simply map input shape in MKL-DNN memory::dims format
@@ -1217,6 +1389,43 @@ inline padding_kind TFPaddingToMklDnnPadding(Padding pad) {
   return padding_kind::zero;
 }
 
+/// Helper function to create memory descriptor in Blocked format
+///
+/// @input: Tensor dimensions
+/// @input: strides corresponding to dimensions. One can use utility
+///         function such as CalculateTFStrides to compute strides
+///         for given dimensions.
+/// @return: memory::desc object corresponding to blocked memory format
+///          for given dimensions and strides.
+inline memory::desc CreateBlockedMemDescHelper(const memory::dims& dim,
+                                               const memory::dims& strides,
+                                               memory::data_type dtype) {
+  CHECK_EQ(dim.size(), strides.size());
+
+  // We have to construct memory descriptor in a C style. This is not at all
+  // ideal but MKLDNN does not offer any API to construct descriptor in
+  // blocked format except a copy constructor that accepts
+  // mkldnn_memory_desc_t.
+  mkldnn_memory_desc_t md;
+  md.primitive_kind = mkldnn_memory;
+  md.ndims = dim.size();
+  md.format = mkldnn_blocked;
+  md.data_type = memory::convert_to_c(dtype);
+
+  for (size_t i = 0; i < dim.size(); i++) {
+    md.layout_desc.blocking.block_dims[i] = 1;
+    md.layout_desc.blocking.strides[1][i] = 1;
+    md.layout_desc.blocking.strides[0][i] = strides[i];
+    md.layout_desc.blocking.padding_dims[i] = dim[i];
+    md.layout_desc.blocking.offset_padding_to_data[i] = 0;
+    md.dims[i] = dim[i];
+  }
+  md.layout_desc.blocking.offset_padding = 0;
+
+  return memory::desc(md);
+}
+
+
 /*
  * Class to represent all the resources corresponding to a tensor in TensorFlow
  * that are required to execute an operation (such as Convolution).
@@ -1285,30 +1494,8 @@ class MklDnnData {
   /// @return: memory::desc object corresponding to blocked memory format
   ///          for given dimensions and strides.
   static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
-                                                  const memory::dims& strides) {
-    CHECK_EQ(dim.size(), strides.size());
-
-    // We have to construct memory descriptor in a C style. This is not at all
-    // ideal but MKLDNN does not offer any API to construct descriptor in
-    // blocked format except a copy constructor that accepts
-    // mkldnn_memory_desc_t.
-    mkldnn_memory_desc_t md;
-    md.primitive_kind = mkldnn_memory;
-    md.ndims = dim.size();
-    md.format = mkldnn_blocked;
-    md.data_type = memory::convert_to_c(MklDnnType<T>());
-
-    for (size_t i = 0; i < dim.size(); i++) {
-      md.layout_desc.blocking.block_dims[i] = 1;
-      md.layout_desc.blocking.strides[1][i] = 1;
-      md.layout_desc.blocking.strides[0][i] = strides[i];
-      md.layout_desc.blocking.padding_dims[i] = dim[i];
-      md.layout_desc.blocking.offset_padding_to_data[i] = 0;
-      md.dims[i] = dim[i];
-    }
-    md.layout_desc.blocking.offset_padding = 0;
-
-    return memory::desc(md);
+                                                 const memory::dims& strides) {
+    return CreateBlockedMemDescHelper(dim, strides, MklDnnType<T>());
   }
 
   /// A version of SetUsrMem call that allows user to create memory in blocked
@@ -1376,6 +1563,7 @@ class MklDnnData {
     return user_memory_->get_primitive_desc();
   }
 
+
   /// Get function for descriptor of user memory.
   inline memory::desc GetUsrMemDesc() {
     // This is ugly. Why MKL-DNN does not provide desc() method of const type??
@@ -1438,6 +1626,17 @@ class MklDnnData {
     return op_pd != user_memory_->get_primitive_desc();
   }
 
+  /// Predicate that checks if we need to reorder user's memory into memory
+  /// based on the provided format.
+  ///
+  /// @input: target_format - memory format of the given input of an
+  ///               operation
+  /// @return: true in case reorder of input is needed; false, otherwise.
+  inline bool IsReorderNeeded(const memory::format& target_format) const {
+    CHECK_NOTNULL(user_memory_);
+    return target_format != user_memory_->get_primitive_desc().desc().data.format;
+  }
+
   /// Function to create a reorder from memory pointed by from to memory pointed
   /// by to. Returns created primitive.
   inline primitive CreateReorder(const memory* from, const memory* to) const {
diff --git a/tensorflow/docs_src/api_guides/python/image.md b/tensorflow/docs_src/api_guides/python/image.md
index a2c8c3c3c9..051e4547ee 100644
--- a/tensorflow/docs_src/api_guides/python/image.md
+++ b/tensorflow/docs_src/api_guides/python/image.md
@@ -19,6 +19,7 @@ Note: The PNG encode and decode Ops support RGBA, but the conversions Ops
 presently only support RGB, HSV, and GrayScale. Presently, the alpha channel has
 to be stripped from the image and re-attached using slicing ops.
 
+*   @{tf.image.decode_bmp}
 *   @{tf.image.decode_gif}
 *   @{tf.image.decode_jpeg}
 *   @{tf.image.encode_jpeg}
diff --git a/tensorflow/docs_src/api_guides/python/reading_data.md b/tensorflow/docs_src/api_guides/python/reading_data.md
index 4594887349..f316cce953 100644
--- a/tensorflow/docs_src/api_guides/python/reading_data.md
+++ b/tensorflow/docs_src/api_guides/python/reading_data.md
@@ -175,14 +175,25 @@ For example,
 [`tensorflow/examples/how_tos/reading_data/convert_to_records.py`](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/convert_to_records.py)
 converts MNIST data to this format.
 
-To read a file of TFRecords, use
-@{tf.TFRecordReader} with
-the @{tf.parse_single_example}
-decoder. The `parse_single_example` op decodes the example protocol buffers into
-tensors. An MNIST example using the data produced by `convert_to_records` can be
-found in
-[`tensorflow/examples/how_tos/reading_data/fully_connected_reader.py`](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py),
-which you can compare with the `fully_connected_feed` version.
+The recommended way to read a TFRecord file is with a @{tf.data.TFRecordDataset}, [as in this example](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py):
+
+``` python
+    dataset = tf.data.TFRecordDataset(filename)
+    dataset = dataset.repeat(num_epochs)
+
+    # map takes a python function and applies it to every sample
+    dataset = dataset.map(decode)
+```
+
+To acomplish the same task with a queue based input pipeline requires the following code 
+(using the same `decode` function from the above example): 
+
+``` python
+  filename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs)
+  reader = tf.TFRecordReader()
+  _, serialized_example = reader.read(filename_queue)
+  image,label = decode(serialized_example)
+```
 
 ### Preprocessing
 
diff --git a/tensorflow/docs_src/get_started/mnist/mechanics.md b/tensorflow/docs_src/get_started/mnist/mechanics.md
index 71eee4291e..dac00498e1 100644
--- a/tensorflow/docs_src/get_started/mnist/mechanics.md
+++ b/tensorflow/docs_src/get_started/mnist/mechanics.md
@@ -47,7 +47,7 @@ training folder and then unpack that data to return a dictionary of `DataSet`
 instances.
 
 ```python
-data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)
+data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
 ```
 
 **NOTE**: The `fake_data` flag is used for unit-testing purposes and may be
@@ -364,7 +364,7 @@ may be instantiated to write the events files, which
 contain both the graph itself and the values of the summaries.
 
 ```python
-summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
+summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
 ```
 
 Lastly, the events file will be updated with new summary values every time the
@@ -398,7 +398,7 @@ method will periodically be called to write a checkpoint file to the training
 directory with the current values of all the trainable variables.
 
 ```python
-saver.save(sess, FLAGS.train_dir, global_step=step)
+saver.save(sess, checkpoint_file, global_step=step)
 ```
 
 At some later point in the future, training might be resumed by using the
@@ -406,7 +406,7 @@ At some later point in the future, training might be resumed by using the
 method to reload the model parameters.
 
 ```python
-saver.restore(sess, FLAGS.train_dir)
+saver.restore(sess, checkpoint_file)
 ```
 
 ## Evaluate the Model
diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md
index 6544a16f2b..8b6cbbcd17 100644
--- a/tensorflow/docs_src/programmers_guide/estimators.md
+++ b/tensorflow/docs_src/programmers_guide/estimators.md
@@ -187,7 +187,7 @@ est_inception_v3.train(input_fn=train_input_fn, steps=2000)
 Note that the names of feature columns and labels of a keras estimator come from
 the corresponding compiled keras model. For example, the input key names for
 @{$get_started/input_fn} in above `est_inception_v3` estimator can be obtained
-from `keras_inception_v3.input_names`, and similarily, the predicted output
+from `keras_inception_v3.input_names`, and similarly, the predicted output
 names can be obtained from `keras_inception_v3.output_names`.
 
 For more details, please refer to the documentation for
diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md
index 16753c931f..bac385c02c 100644
--- a/tensorflow/docs_src/programmers_guide/variables.md
+++ b/tensorflow/docs_src/programmers_guide/variables.md
@@ -205,7 +205,7 @@ methods:
 v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
 assignment = v.assign_add(1)
 tf.global_variables_initializer().run()
-assignment.run()
+sess.run(assignment)  # or assignment.op.run()
 ```
 
 Most TensorFlow optimizers have specialized ops that efficiently update the
diff --git a/tensorflow/examples/android/build.gradle b/tensorflow/examples/android/build.gradle
index 48f566f825..f7bdf8b816 100644
--- a/tensorflow/examples/android/build.gradle
+++ b/tensorflow/examples/android/build.gradle
@@ -28,8 +28,8 @@ buildscript {
     }
 
     dependencies {
-        classpath 'com.android.tools.build:gradle:2.3.0'
-        classpath 'org.apache.httpcomponents:httpclient:4.5.2'
+        classpath 'com.android.tools.build:gradle:3.0.1'
+        classpath 'org.apache.httpcomponents:httpclient:4.5.4'
     }
 }
 
@@ -75,7 +75,7 @@ apply plugin: 'com.android.application'
 
 android {
     compileSdkVersion 23
-    buildToolsVersion "25.0.2"
+    buildToolsVersion '26.0.2'
 
     if (nativeBuildSystem == 'cmake') {
         defaultConfig {
diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000000000000000000000000000000000..13372aef5e24af05341d49695ee84e5f9b594659
GIT binary patch
literal 53636
zcmWIWW@h1HVBp|j(AqTBoq>UYfeAz~Ffed3FfjPKhB)ea`nl;dGoUKGK6OTrJp%(n
zC<6n72(m(7M?X(D*WeI6U$@V`XHNTg>*`(P_14uocjo-&AcHH$51xKHqkF>htnXQE
zPaQ_CS8XZNo-B#d+##;I?8%a(6Nk1+y_977`l*N!$wDzSm$5J~Fyt4dqc{p(4L4Lx
zdQoCZPAXod!l+8iixLY8Qj0LOWHhCuEoNX~xXQr5pp0FMOMZD?PJUvFilJU|PGWI!
zZI3V4Ap?Qd`x&ND+GYAp+}GRY9h5In)U$ESan9lN^jx)fHaGu+g-1jRU)wyhl{-_j
z{`+H21?NKtB$AwJwwX^qUAs~>ao5(h7sEted);A8+-AIU+dh+58najHN~pE8mUd~Y
zbLm#Tc8T>qUSGeuhry}Hz-?Er*gbE54{NFGhcxPTg&5^?e72uZA}L^7vs}LAf)bCD
zn*^JDd+%Z1QeD|vv`{{Jul1S;69Yp53j>1-0Y3&;7MG;v1{CENq!yKArWOYj<yTZX
zmX>6s=9Oe7Czj+FK>}fEaBjYkqd=XQM*H!Lk5(qEoqFq9Rmjt>{EG=voV}f#hQ6zO
zee81+nbX&mr{qukwEjzXuE|ICAB%f7J4{tz6n=mB+#8$EXKbu@e}DY^`g?{`6<JEX
zO`P1U%-NcE%r^G*ikJmQTze-g9K9o3iSPJ<ZH?7DC&l9)tc-2j5p0v<aplmmgostu
zjTM(|EyX_R+JDnbx_+4LJO9ovU+P4BR%{bDTqFNQS=H}b>J7o5N+qTDTjJlY+Qu>E
z`U9;gJl9?;2yA-xSwrYZWh8&<>0HlyUs`WZ-s2onuK$L$?!@fbpSi;%rbRow4c-~2
zF*|vNI2(_&-^PE9^-uB?rdZpqOnP9)?6J={*n8(9c46zgi5h1-Qw;bIt;^{MpC{-4
zj7cj~Q_Lu0iBxflq0Y9cuclc2I`Ljfk=JHJK$Ca*2lgEgvQmya+&tuLb4gZCy5i7=
z-}Q%6Py7wDWL$G@4uf-<!<WhnMeVf`mwwG_yZehL+OJl})l)wsw5z^dVxF((ANQ;>
zBR8dxi0tCXsgovmv;^C%GJLXpi^Zy!uj6JG`mDJapXOL!c;w64OUhA;=6X+y6A{~X
zUf{QnA5Ub|<YtBAsZnbanQ~&3XFKry6x`_**!y(9;=g--bCM7G-8yC&6kqU?=h^bk
zqNFu3brF3Bc=CEq*{VxRXC_|W`lO9hS@%}-!Ly9Hr4#D8!%yCNvNZ6B=!_kHVmGho
z2f#}mP`-WsCZc#10|Uct1_lNt?5WNZmbD;h%C+|(*C7L*w*O~dJ`)k#!xi^6>EoW&
zT+EIEU*fhKnR!{iDzTf*E`Ok1$a<<I$I0n`etxdGE6*VIqWVP@2S?o^ZB<i$j>#|8
zo`;{CqH!}(yfyxmNpyNp%VuT8h`ULFQ)Y&x^V^=Tt~K1a;(LqZmOqhaUppU)m_21_
zn|jcOQy&%yw3$jB`jD)@)9dE9Dcw6%dj2hb7ToSo5Wv2(MJ(oYu6OC%8((tXAE;+W
z^(o8Bs=F~v3=DxR3=F#1ed-P_utJMdi#+mkQ+@LDvr7vgp6(5f_Lp)L`R6t<_S)B=
zH5yyDWks_@-;8=2diludRh=Om3Y`8at(@!hcBe$<{0ZNyl9hGoxZM|bQ`^Hq+9#TK
zpWJ!2d|&aiGiTo0+t)Eg99q>Bc;;lqq6eDuJUcWw65`xqesl`BZBTf)vuOguUV*AW
zx1bE)L*AU;Jd>k;ef#C86@TmGy8NYIE;;(0pH_Bxeo0%ws~Z-kCZ{*7jyhNDyL;Cu
zgS)-jsheK*%sVUYU0pRXG2`+^>3NCOtcSCT9_??v{+-*-WJlVqW#1<*w4bM8z+yI+
zOSNZ-&+PZ++3k1EO1F!N*IJ|=){<DK`KQd_f$U7)HkVU{tD~-(YKdL9+wHdZTx{<0
zr*bAccE7$-ajMX7R%ur4j$4B1620nwbZ7N#%Z-|2@G0tMR{7E=bCyLvOKMcS)orXf
zDb&TlF~+WPlV`^hK}VH6K^uES*}kUnGS>Tkc(-vyMYE{rcY(ASFXsJrySzo~PAW(2
z<<%A4<yY2E`@+4tqMx7VbGZzA>4*Iuzdp>EzPB&n-I|9Vr4A=dt(Ls5DD}QaQEY#$
zs?grNlNI_3a|<WD*iq}%mT`8SfzG|oO|}e$4)Y$jOT0T?edU<W-Cu!KvVS|eGAD^1
zmB>6a`L27t$Yb+A3W_r=oF`2dx#(ehiJ4nGAwGJ7#>oV2i#~RhQ27Q;8x`3@CURRR
z`$wgGF*#g2{e}L!zfD!G4ii_f?Q3bCKEcZQ&=0oA8P{8tTzHhiI?8zZJ<ln$?R<Sm
zNK8ZRo9g9P``1m^jIId|=MdFj(89j*pXHSg0-eP?_3b*}4&}X3bt+mGTJ{YsL&`jg
zy?vCGf#DYy1A{U4Wa?UxoKsqyS(fUSnUm_6oSa%*?3<XEn4StP^{0k;7YL`y{Lh`G
zmboie(LpgFhoN;#K$C{p<)&#jk}{$e@GfN)-G0(6cGunB*}mSj3U;r4mwtFLcd?PO
zZUp-=Nq6_-%~p>aKOQ<NsXtFbU+&2L?X#!tljZv<_y76(|G&)a|NXwo%fSD5vck57
zo0$Zc9J%z=y<8wp=KEt80lSi})1HN|I2VM!$#k?-owiTGZ}I*9Muu}Nfyy1G7SczI
zE6hBC&$AZto&UX%#Xv*Em@%bwmXwQbo7Tj`1p-p%!jo8b$uF*xsGA{lZAqWH)%jAD
z;J44K{7Xwzeyg?wJ)df*x=h<m?^<^BtclqZ1Ew1EHm#K?o4-grlv8w@*{WHtyH_hX
zdU>rlH+$DSlbowk!6lAgwq~`Z$xjQA^Hr1nrBWfBHm%E4^K8KG?#0ckriQVVhW;=%
z?z*=;V%Co79dXqWi>F4M+ETP}&eD`pQ?oqHpL?>7-qKI`lH@2IY;6*odphNc(?V~<
zDRZ&}SNIol2;EwgCo@Y@xJdWi+KlWfon)z*i&9s%$$D$-(zDrSa;E#&j|Y{e$(Lu$
zSBx&3eO97haB^Ono641)lh}CE+e8lsd~JH%nAp?dv-0ypLu2m5jtf(m&R+S?ufWOp
z@Vx1!V?jM@EMKv`o@93Z=G(|4!Uw!hJiDXgIJ-c9%G%f&|2zNW@XE!=CT3gREt>Jm
z@tX75+fvdkwHH@E<vFSsb>)4Q@A2trUq94dJz9F{K<J@cfvHIWzAw@oZ6mLQM9)|F
zQGHFOE!unA?0yTM#-zhG9K{})Yn6<(^%L(Jxk*H<kKAIk)acs!JEhgFn|d@p8Oa^;
zlia-_xXH%eOH%*9qz#21Q)D%>s=~IWh$NiWNa}gJG5mbb2?_BQ4ZRaH4s_S;QF)xY
zM=9~Lm+SNg8;_;N99dl#GwGvL-1LbDg6sAuKKAw56#wq$J58yin&+Do7aG0oD?TFA
z#UYbl_(f^js+6{?IVmw0bCP;_W`?D#+o}9+j%$vwk<0hOReRq$)gA9IPu<i0{qGNl
zW9=t?f1KFUKQVpYbj{Y5Wxr<?@*RFOZ&HAmsOkDsE5C8Ne(}%DTEyGke%^m#Uh0%H
zUoW;jm9v`vAwQ*4YuA*$A&WyC4A*XZIZ5SB#I~d25<;?8R+Fx-X=46&rfEf$I@dO<
zQ*HXb%^H5wm%W~)eP+4F2?L?c%KLK7CWW@uyk5n0raRYn$(-kJ8%5VnmD^G2aQW~X
z!Dp-XELg4e`gO0I@WiXGlUJv%YI|^-jeA+#maD>Dwr3<`^{&@$UJ+g;{i2_L`o0}g
zir0UCHuZ4AfoZE&do{XU)0<h#_QZzoPlWpq=2HU4%HA;Cce!W7p`@ZV_xS{gGIyt>
z*X9MePjVZKQtxC-Emt(n+sWmhd^dDTY~hr0>#VqSw|C^Zv+{;t{va2mv`%W)2Jr&F
zL%mPtv?%kRH+-{L<Zi{Q9}A~5o-}5Df8d?wl=BD74S4PQ8#O~(nqrm(Sn}@N-FZhd
zWbU*_ahh>;yjRcKY07c1?{jC@-@5OZ{==Cec7J)!8n2Md;95KL*sUGrZ{|8_#K|pt
zW##_s-T6eV_59U41D57|k}`|gXRZ9k<m`g71uP0%CeCX;shPvoTB)|^?B;}RckUF+
zmI^%*O<Z*6m4u4gakYnciv`yGP?C#@Q>!qJv-IEaNzM4D<+8%=nY&u=&#jfOJ0$!3
z-}cSZEobiUs9Lvs#&?b48wY&MgEn!p&-mDEv-F4gTKV5U^Ir+_Otfk`KEd7i{*mS#
zEnz;71Z=ln3}=d8EPH<e^ZiE}bq6kgx!xI{@ldRC`^n$Ud+bmC?%rel<gfF*6-E3^
zTWXr;ecRl`Zs;7}Kk1m)xv+W5nZM{*E>AmE(ecAfPJL~U(SF4~v15Dq_X}NCobuRU
z>J!VK>Kgx5G~<F^b%uSpE4WQ}KgUur+tYU+Z+TI^B5O|{yGl%(d1-5kKI`fg&$eah
z_A;w`zE$1ZKgZ<u*87}GrWswmb6ax7?IrG#T80WQci4QBM{T^E;f-^f$i%?#lMQF%
zB^=Usa?a07%S<mVN-W9D&(i~Ug?cCF`U^V>9RGj&`L$-driMj}l-?@NS@6J^lRwlj
zlt<K~#(7=AH07s#cVbOH24^Z6er%OLz@Iv2Z}fsSw~v?`Z#Mos|L@zF-@nhFk7s!H
zL8IM!#>oV;ZkZmQcZK%H&)S%O-&uL!tdmrCrcKI)Z@Km#-h`QNh;2{YP?%9?#Cx2p
zXYJXq%XwPw{pxxd&9ijH@?@*r#h#|ib-(X5Zf0&<crWDhfjRLJIc5D4m*U?%&JgNf
zbnlAs?TyW<Z53O$ot+R-`hDfR{2P5=w^d)+wC8);YT<it-_4wxdwKKENEP9Ojc<*w
zNUghWc5B|w`x|@C&#SD-sXXTQBs}){;=Avv)Y292)ZFH}rnhHLz}l1f=YJOkZ>-wb
zzIocxz>Al)mOs{8xAM*ES?#O-e_Q3|zI#%-%T2+R+tVK^*WC~53z7G;cR%;u)@5qO
zjujowm)}-ZU*n$rzAj|zgUEkUyrMi8<Kpx5)|<ZC9ewuKvv;d|%}aTkPZ;yesZ;!?
z{n1op!u5bh%g*$+G4i_Jv-mnce(E}__(yK9FWllbVEot45Y+LpQ{?E6w+r<-;zjnU
z)L4FDPM9<QjltY9jq@k^U)vw}67+_paPJfgN28wL#5w=OPU%mUb(@sK%Gp&kMKfW_
zBEz15ux_@%v>uzoqA{Y=C7x93uB>-|_WI_ADFSo4C#OFuj1hjM`l|0^UhOZInZ;9L
zX3SMMd$!<E!4*}t4<6k2PZh|TizpOIrUY>sIX)IuD3n~{CbgHJ<G;b5N6}srmhc!Y
zX4$!x&GVE(@sgSEnS%rrCVNd-!?S4t%g)7YCr>H#dwyQR?-AJGqv;rAbfQ?*Bb0a2
zB*$r<n=i31QE|xfny`f@D7@)K)`TrQoBU>7uXQ-Zlwx(|&R=fSv>jM0oB5Q5fkBds
zfx!TKiwC{G>yen3k^^h*^oG4I77i8ppF2C{aM|g_0bX9M988(NIvPZCxU?>KE#+4U
z&}rJ`n?JcV#XQGsi>T=QA8vmS@$?-Ix3g?MEOFr6iSqv%U(3xGp6$MsR`qW0^V0J_
z=N6xTbN~OJpW+RFe?%VE>se9Z>@4hNGUv#OE_2f#s-LH@i6(zC&?^_#QVX3E_HkNU
zSPx5~+d59qb6!P$&3!8s&Sh6fU%vFliTmEcqhc|Kc2C-)u5Ycr^nJF0fwE`NVV#zc
zdD)I!nWD?OPIECiFH7C!E^6YwYq`jezT2Cf4z?Ly77}iK(^GIx%2=}K*0DEBCeAdH
z37&V_XZ~!n&s<f<bJRt>Esy269eF0JJYCpNXXA`=*Kb>rKR=kIRDWrMM%g1Sv*j((
z+MyeajmmEBNIT+VZI+bgvS)XR&W<!Sv%Du6;wEcbUUMj~RtQ;Yo%NI}&(p=UwQ;Nd
zX?OK@@kvV0XKizh{4Barqxj<z$-`bpGt#0oyRNKxsCM~KWZApBZ#t$#Jor1$<5+^l
zN~f@hysgtu&OEL3GU#5)z6A;1t<8bq7H!$yx4-UuT9SGtB_w^#f~9LVo@hxG%o4m%
zB-)U>R$x7YrtwKhz5K0GsYSa~y1YNHYSMc)PpovouUiYNHoUTQe_0TF$S1Nby5L1}
z#n+^DH<wS_!0i__<-w~eH}9|B!Lqv@Uzo4GQ+(;f^2+I5+Ezvhmvb^(rZ@GaZ<+KZ
z<K>2lmkUFG-Mh13xk$-6mbzsLJJZ5KIIOcaTZI`IZ{BpWjNiKNZOik^hq%&nlBaF@
z;8l9KxaVmHAA6-|j4W%+WX7}FH)h1QJxr=!Vt;FGlH+kDy9<S~@~eJ$96!Ee;Ui($
z`Av~EZRbm64v1^*yHXfjwC~V$t$i0i>h8Pz(Y7w2(EE7*4^PLExBN33uYB9jv{%TR
zPki&8moj?d5p!;>o51X@wm#u@*NRW3-n{ij$<LP>OZK_m-RDuex#arBQy#~&zns=D
zHT}CFdwN99Opm=vri*x$Ry>&FYyE9cjZfSk+4B|6H7h)=XMg?>sk*eS-~52(G=X(3
zTY`FZxT04}6y;4z7m%-=T6HuhYxQ%ZeOo5Z7vL^=xOvY~slKNR%QqPPdYpOKvtu=D
zqN;1NOqg4ytlndP_6_;xrj&kLF0=c9<qbK>+=$lRtu~J)35K2Wt>!iNnl<U?`@i#l
z=|2x-oMYZF`(_6hpSeN&WM`Z6_w`QfSJNo6oA~ab=CRO+ulns;KJ8Y&=PmVN#_f&$
zA^MsMO?m5H3%K%c%~)g8!0-6!Uxd@;FU}_YuH7I0GS#2|tNP<4M~}Ya+-1)nxa7wR
zzW%ZPZnNEs$+IWCKWW>dK5c=&{=%uZUf(to**H<pO!VTl?*g;uZ^^A}`PO+)X`7u}
zdy`}QiSH7(MeZEr+{r!h>3i;<-7j>KekUl1Xe@fo_pOC#pVB<(5a%pG>vxOxsxSHG
zwRUbuO;3UGx%<Dr2cEAr5uRB4BAw}P#l>i6ix$T%0ZNWDoUC8+E!!b1EpSZjReQ%B
zJ+JSAeR52f&VLoC$`o~-Gw1V@7^PL$Uw%}x-<DT+=k77bJ4d~D_@8CBc++Ub{xk84
z-)XyhV(#K^lkVJgQ`i0e`SNQuTe$_P4g1(KY!3)6Ddf_>-0k1w=fA#y_mTNMx%7!n
zKSZ4^4n2x_^!-}k9<TcKQ@JB9Rn6U<95=DQxcty<-v8_YD5Ew@D-Cvqfd-3N85rcT
z7nwf!>F^F=>*U}0mjXnN@1K5LYrWyI0-qVebGcl@+wu+7+EO?L<vG*Tey^P}d!2XY
zLXqD8UG=R}M~}$=Vg9qY=lM2Xp@_{B=FKa9Z#n<7R{g(!zjz-A_)qqDkj6B<K)dnw
z3bnvy9hcY+Yo|L?3ltnDUP$QZU(Qx8d4u!tE#t#Rcb@ZXeC7FeMqF<70m)6Vhaat9
ze%P-3-pbm^erCJ3Et_?GRWgUQMbFOV$66L%*(w`*QMNRp=;`9ceQtLGcWql&y!@zu
zY3|&G0dME6ln*%?q8s~iXJ6t=-H8EGpH8}5-OHoR7c##glDX|zQqAv>(yAJpr_+u_
zZ*A=spSNAZPvx3ga$8b_jI;5Sn=e~}9t($;9{FN5<)C)%@>_gg)1+*3+fvTYvy!qm
zI&vfEUk;y(kCdd$vWZSnPR6~}JM<qqO7RDJEbef3vJrd4`IK|6(cxqN|K$Hlk6FyL
z{hW4Z*1E2Z`aizg`Nr5S;jZ=yWZf@(IPBgfu9zs6@b)Vn|K=p$y5|wG?C64+E%QYm
zJv@3jzEks@``(gPuG+tx7Z$v{De`zpz1y5GJl8zaPFgv&)HwwG<5Zhor|@a&8V)m$
zb;@3!jFN7sc?nvc|Foxb8oP~eK<3;VPKpWpcxFWYv{srw@lRVuZQBi_9W0!wm)7ew
zY%VUC=C{qZK+u};M<~O&pAV++_wAp)UuU6q%RWEF-`p?m$jm(~>h*5%X~w9<ddXkT
z{eIEoFugoIBDyu4Gfvs#Zu9B78#43wA2{7!dQWNIHutD2-W9rgreET@lx5X=$y8&B
z<R$M`E784oK0NyNpAj`lo-2CYCCbjgkjKZspoTq31|*hbI2ISD<|e`G?x|rvlcmE2
z|6RK#S~=OGtN6y&<|OrAott8w!5W^zAFjHrUH?nV@79j&Y~_5d_q%qlKGIestns~;
zg=xxfHt{IUBU>2jp7b;AyBQn3P3!VE_PyrsGw+=-H@-i6`~QFE|C%$-Ikx|iMb{y@
zIY)zJ*z%5O?p1IOY<sHVb8}nDMxmqKg+~G>$VRKEvxPqkk_o&s<!!Xl8n=yN#~o`_
z`ub1Rr+*CMk?(%=xUhz4e$T_;!Wyah6OX35?vg$FJmp`F%<&JKc;Y)AbqCgHw&@?g
z*t-76htR-(vv}ggXWtiiIoq(t`&s_+4|65<Ngx0Iq4r>*dYk-_56+gA>Rr3J_<AC`
zqY6(H#N3Y1*x8n<6;Qlv<?UwIvxOz@2Pb+@{_CuK__J~F;mMz;A9rtmzI>T_aq@H#
z6}P+$^Q}37#@shGr3|>Ai@uTF;&c3}PkEU3v=Y|eYHG*l%gD{SXSH>GLiW!^c8_0t
zOU;fnsWEPwT6;+2m5E^G9RuF)i<*vwd|%UaVZqepxBdwpzH;2_H1{sswYnmuAN3!k
zo8F#Kx<RsNI?tkaG8^AGT`QluH#J*Zd)M5#7w#;rbKC0UJG+l>N!c#3{NLQWb{oro
zURkwm-p6#e>bn|Ka(qk9m+xPDT{z!!$CI4G-J-0eXRnm5P}sJ>^4AQt&Yq{4*TiS%
zoKBM1w!iC?d<^R)(PY<y6|dG_trjo5wz6hRinxCEIU&_&7pBXtd%X1B!@Z|>J<edQ
zJ)!ZUZ}o!1;YDALPRiYpee>em`e6G#8=i4lORwY82#|QZFgL6{T>R@)1$)8c#Rm`F
z`n9PvSU#?_^zz3`GDdsfXq;)Alzgt~)NjkzJQHT`nmu>snYTilmJ}b;%bQpcJo{L9
zL>X(Llx#xlo;T(1=Jjg*b`A=Vo3qL5+akT$DZlvmzMkY16)y-pT4^J9v_FLB@o}wr
zhfPI`E254DOFVYHmlt`||3g<7tIj8{>x(2bB#-XNTm56@i$6T}>28~}>J(RV|5(};
z-}xnb&!PuscBk*S@xxbSf2d*O^2lA2{l2MqUypG$+*jZnU$Xee+^+kN`S<)-?A-76
z(OtCu(EQLfk3&W455|jbsc@h6QqMX;q-fb{ah9*yiwYRmb3fYD)Lc4a!Gg8BtRszL
zlOykcOaHwfRO{K~Wlz<nd)!%)xNgdwCH`~mHpV8pY1}z{KjhBwwyyo&ZtuExKfA@Y
z@)7SI&mxw&{o<RCq+OqG{zgM?#gBH87fk;=^SF%9se0~R@=(S58TYc%?u1GC)00@2
z-+Sn*x+|ny*OdR-mK}`!1-dJDiL4Ku_aQ@8Fgz{J@xAq~2aNld#O}NnQlPjyYgfLk
z^X7l7NryifvT@5h*WcZ@XY=vvpEvBiz4~_W=H^MWHI%RG&Ta|*&}cO=PIl$fg1>ej
zFZ|C;t9yL1vj3S<vd!LVxAM1bhA)lkoOzb+TB_>irCoV;>CS>zuNqvWPO{%qS~+Kb
zvBuQZ+-9-wQ&+t6(Z6|3w=#P#i`hrx#s{B&z4}pV7|Cw^dDn4G-JEX^53h`=-Tx_z
z<;|sdolMu|$L^&yt$(xT@~p?NPS**k=X8q{TrXoZ>VEv^p4F-R-ELbR{jK0xqZQ73
zqM$bK_O9vnWnrfb-)+2D^hY7QzaY$_{j#XW`l?m(k0hS+lqei|zq!t9)%q4jo+2@3
z=f+0$6`H#W4JVx2)7Q3YYHbUvO6F6GZwoGOF1c}?Pp4C51IrPE+@EIuz5N5vZNEIz
zNJ^WdcUiR-NB`1lV^NoLt!4sqC+|-ZeRI>HW?hrox+_NC)V>vF@D^$$*R!5|erWbC
z_OcghWiP6N{J#dIFtb0uwD#dO4_4jR(GRyM9x^-{uxq1yrcB~A_KVG-Y2i-0-!$Lj
zdTXXv*dKGy`@}{uUYk;3|DdcVk(2#zCHtIJ`075R$>8mqqZ&&&On=^CJ-Opxz0;ij
zrJWl#_Be~m$a%|f?r&UN+&gpS&Vct<Y`3htKINT>ciA!Lso(tb7g{}246yP1U>acD
zSjZpRyeGZ!{%#v}_1{9d-#=W?n0(Fd+U01s<7|CrTRWcGimJ@^pHf&;Tf!V$+$p@~
z;z{;-Pao@)J?~rdwOGbM*wAFM)m)u2@zc6L&Kd3FxMkR>IWHqAZJD97PVAKQk3HhP
z_r-qM)!cBtuMp45cq#je*rKf|Lj2PUUpf7bZ83g5^Vm0|psRaS_GHXU>UGPgoU&8y
z`ychco1^8T3XXmjSb8kf=Jmzd^A#KQXWvR%-uv@~+*`qCb9UWuY`1-FE4%RdX^(JQ
zxwMpokVLm7M$6KUO;-4q6?$vio^6S8=P$cUv@fWP39PI=vOlEtzD48z>6Ly)MpHZm
z9M8)hxwL*^qV41tU&GEieRsKUo1$Iue%BP$2Q`mOUd|O<H~;37pUV7M6YRW~*}1R3
z9?@JmN1^XktL!Z&tCpGo#=F+_vhP1f9B-exwMOLBf3$gFi+kJdN((bEywPM}P{3ZX
zdgc|EB<AEmMtkN)a@UAF-D<DC`{qrZ!`cFePG!3&RkoW;3GjMOxFIm%gMq@Ul=4YE
zZ<ODio8+=Ks@v=8zJTb9VQc!6bVCC@FKFDzs@?W&;nuBNuZCskzka)S@Aq%_ZvWQ$
zUY}NGK08I=k^cU_Y0s+XJpVuEeNBAa-`|(rJ2MpP1s2$yn|@5aSE)CCRuhv6W9G4w
z%cT|=$*upu!vAshc~*@B!l^lvOeQnOoCsbrQ|f`zJh27VJ0fPi<YH$wnVNp$&rGiK
zpFY1~IG^bI@G^6W)Op?yk~aQTUP~;4AO2*Vb@(xJ%&AnyRga5)u9>hbXx6I2n6su_
z`X4?!wThqYIm$Tey!Vp&mGh^cEZ*5sJ*{W1?1H3qUq5qIuG4u?ToKxE{^@6?iq*jt
zr47@X#gDyOnmJ)vpvD?OuIIli*M2e4tZ6(n|5Nei`PV(&y&KLw{(32g<3sD7z=ziu
z);|4McTeT~*-Ig;72NruKa^T;r(D{`u=e>|#;k|kqW1(Bggv}{X!?(zK|go+%+hN8
z%CNO?_o0OhxzEe&+yCbL*y;S_t<D2IGuPFp|LzH%Cn%`4szAp<c7w{S9~SSPX)p2A
zn8S0yFK^1RNu8Rl<(hYd7v#;9ejXQjLA@(Z^1<pKSwHtg-LuoR+ZpvRdqxv)Z}uxQ
z^ZZ>~7OpJ4d~Dg(+2*Ee^LLgw-(9xVr}gag*So*W>X3WACUdJ$+L~h4%Rhcq=w!Y$
z%<h)+bo`V(vu0vN`tpehvo6Z`ms~4R`%-XZ#Y@i@ckj9_T6R<Jh*!PEJ>l*nUkxtj
zZ142teOdVM(%R_Q(vwRBbNe}-?#$yhdvfmJqRTz^0#v4&luKrx=?E0(l05mu%68f9
zO;>jWX4pJrn(%q8N%FF-T`wnH>c9B<v8m<G8>!JV_I&-g;h@RJn>~`cD>HUays;!X
zz(#=o<wV`8LsQ<DEWiHBXtIxStYvq;lAwn?hbqrqBdIe>j+Sg&%@#dZUFKF|*)pMB
zsmGGFPsuEw{ZVRT=MJSQ*5^XXPAypwDDg%(t=em!xw?{%u*@gJ!>exY*~l5(RkVCd
z*~{)}dfC3NGu7N(zP8PAY+dhM{`o|n+Ez_5=Jy?Ziw_xi`(=HpT-@Ox;L4I|ARH;#
z?z7<5rX1tlPo<Z!=Je!6P7C>A{6UD#c_qi1xyt|VTzVLC>Q2+i%c)<CHg?7f%iK<!
ze7;-BDb4coWH*he25a}tDs50JxS3EU<>?((#=+LL$!1p4T}|#1UfZs~w$PflEBp8y
z>+XHK%CYa8U4!HHl4VQ=r78)t?7UW8{ibzc#?8!#(v2U)loQLoJiV2bY{TcFE3xo?
z##cARl@`~p^12yKG1x83Uv)T*>6K4WSb|N^<(ykpe$sM`wRdM~TyQ%RYdPn(R^;x}
z6K!RCzQ(#`qzBb(-cqt<i~3cz!c{J^`Ul#oPI$c(ynUJVWQCTXQBFm_an<hL38^h_
z1?!JAiiBLg^KioIRpI_B&lJ<P^>3MNo^-@uLi!@j3{&gmT~oaz7RMS)TGs8kOr3Z2
zM)3ukj&w=)xLom3V?EKaX5y7?9H&#nlwXu@NNh2es=DDh(NkN-KQdC`)|%@_gahwx
zdc%2-BS`8+$#!Mi=MtsPN_r>DN>BAzOxdvT;*^WRDMuvQK7YOWAbKKe<K9J@Y|}bJ
z%zNfrt3FU^+WJxBMNhfitH&2t$T?4086oJu=5*CnpFGp#8_O1KUA`qpTBJukx+F4E
zz)Y!a$>o;rr3QhX%ig?Y{`Bay{HvxUPUZ{qHr+M-aM(%1UUc`RmPf@g$(q*%f4ZIV
z`jT3kcw5ptd%|R?RF_F2G84}-3Ry@hy)ORMy7lZOX|t5%fR=Tdk(({ri;{IjO*eVQ
z>@=GEqszV6-?=17<KErrlbdI(@{TQ@ygP*NT7qtWubRJ$j`)S#?sL%_5*ULwzvi2E
z^WCvIU7xDNJvIl-a&=xK#-;uJtkL09rN2*KvygP^Zn<!3qwuM<jYeC<=Fhq2S?t|*
zO=w}LsaVwQHZlFFN5$tIdbIJ`)jju)I>$`ooGzMNR-U&d#Y0zq^R`VpuKGRadvx{7
zo~)V6E`9#e<h6fcd8n1oU#-7$>K0y7uw8tq^o!)wbqljYt6JnjU%Ks+>p31$aB9){
zRa<7~1-z{KwdT<7(0z^hAwSjbE&lNQ$>X%C`46K*|JmH{di(Q;*IDK@o-b-0pXz^_
zzQA?eB#sx?ww8vj2p0Lb=~VSkGnTS<EprN31*;q%-VWW@ynoTB@SlEDzkiH9q4zGe
z`B3@lKc@e7o%;TvZtDA|`5wjdPkrwe;$QJc;V+N$!3&RiSI$Uv>&-cow(Ctzw7Ty0
zunW(ot31zsxMSj#q(X0J$E=rWK9`egXG}X#Cp}#zY^~`s^Z2t}*|Hu_W?k&AwYs!i
z_sqH1tKDWA9;vVW{q#lH&)GE&Jl`gqy8q~}kF}qM%dB!Cokvk$CVMXUbofv1)OU(A
zUMSSNKXp$KeYZ(l*EnT)tnR&qJ5OF%c-ZmbW$z@_jb;a~l=_9dzr5>9Y}>Uw3B$9O
z7T?{Jo^8AAYG|y`X74X9A)9xVaQ7}eTdsTHdG_5-3$dtLDV@9Odt^3*E<G4FSFZim
ztSf70&1@+YTBIx^JWX}hyoQXIEo?nE-dsApwa>+7)yr8^Jyegz`SFJ8c4h87x~y%}
zsxWQOX-AA^22Sn@Xq)qH%~9u<pVnO~6FHGlTGnfp<?z&<wYPs|O^1B=56-FcMUKlq
zt9x_JdFE!li5GsW)UL8|un+#pVZZd#<xgfD^DoYBdg8EK(wlwZf#;Gr7t~i3wjI<9
zu6y3TYf<49#d%M?r?2{>^IlNPJ=E^ldCfnuQ}?SU=*Y=tXLd!cPGn4+S>*RxZq@TG
zeHDwsCcmhTdng|A&(3~I=dHk2ncxeltILjs)J=>RS{XO(+0;a_Q({$G`%hH|1b5i0
zOp811BOSM4<sasmX1^b2n=SkFt3pd?o3m}D5bMV1{0@n#pn_#5cSa-$`MDpNDQm`7
z{bh#g#TkcW4#rz0nVBmJYJ5y7<#{^yl>L+IqW>nIvVS&z#h>6h=44wx)fJy+^UU9W
z)ZSh4Rn)%~Uljj4J=`ApZ|Q=^;eY&&U0Qs~WNJOH{O5cYYh!n-<i8t!<yPf+os$)^
z-=m#Z>A%$eqC40Bu&4Jwe%0E4zFcYhyozXtpR-?1oZx<Td&V2~r#<Pp-=F^AdAe!J
z%B8-(pXSteg|leL6rPgts}IV&dBVEsrLx*nna#_m?UGxwb#mshl9v(hT<k9PIz8mr
z`1Y`K*xrRnCJmK0Pq*HkzvGi+-qzr-b9+@jzkK=FXYT8pN8HT59SY!*v`@Qw#o40s
zLgICqbI-U~(`(MevYk5<aQ~XVSkpi5*mKWP)pzpDd^Bm_guV|P+-lp`>t#2&t$ep3
z&+}(9AN#U>1yL;dnKNT1`N++i`KJ1CVOY)E3$>nsnUl5e)mUsSiHa?g;n3f>S;ioJ
z%C-J^^Y<pLwAr{W@ABU;8|C9I4@K>!7)|cIn8)qz65l>|+S}qY8n14<?>-(nU&;LB
zlVwJkah$?c4>koJJF)9_dQ79p92rN|!+w5O&n4dK{I}~{ip``=Je|uQ&Q6inV*YWl
zSy$;hR|4}ANvm^yg)iGIcRkfs3iozfpILq5qs6{ASHcy-jhQljKU<u0W$tMU<C_yS
zZzwd$wHcqc_&HTP!l%jaYn6_iv!;0Alv!b~iZ~OVdoQp{j5fMH!MR+1vU};NV@qa>
zZHxPzQtb8N<eiNy{uM=2mCIuiQrnaKctoG4tbQ2YbNTN1?~ATZS-9|v@5C>^RApwr
zeZ636veUB_-l`|3-?WxKDJOktsn+G0%Tp$aO3txlR9P9Vx6*8T+W+ShjW--R$9VRm
zoQj`_l(S9I^Btc80vsjwzP9!g@bmrlHR4j?{@1#J8%(aV9L`VeieVQBimfV&wMjH_
z>(MYWW-!)E`x5o?{6y(Avoz5e7k9q=<XUdEed9qz^S)hcR-W8?a6|VB;fJZKq;*#|
zC>I)L)$tqcc3d7TKUFZ$j`gM2uC5vDj)lwB&f<Dn(fi%WGW)jwoJnuh`_5GO9lRs`
zQf*81hf-OMijQ-`?HpL2Hj89v%b3rMlT-L=(5t`R!pE!fY2v4;;j+s(+jIElJ6B8n
zym0QQSMRb*k`of;&Mp($GuP8RdC8rHp4%_1b@?{?#F1~aJ*KAb?~IZAQa5j3gWSYt
z9IPG(i$h)QePi0wg1moDHeuACoW0jE-(K|Ev(w35Yx(~^Zr<lO>sFZD7uL_(+N;=B
zteC&d*8OOI?}W`fR@0h`R8!V2k__QJDSo49UFnY0r!lVaOY|KV)mY8m=Tq&d>frst
z?8f!xtxrp=?{c0^b2wjHBA#}3>C6)Ido^E7*L+KQdVM3?^H6vBJg0aDWpfcfd#8h?
z%_ULMM;Xp$eAM~uH_zq#kL=Igdn$itriB{ppP#1pS*-HC<-h5t680_U@H%Hz@Td7y
z-K<z{-RE0)Cxor8nD(>#kK_G|zcUV|Sjl}6{V5(_+~T)hmqYB!<(T(-P4rgyL|yPt
z&*9a&WSajv&tBf*Usb@%<&zXQuUU99W4qIv?hK`^j8eBB9C~ZguD9&x7Us5HE$5VE
zpK)CLDYC3L<wEKeN43&Rq8Yz0l<T_Y2$wk=JiXypoW#loy#-6Z@b2pWv#8|S8uJT(
z4TO}~R?975S3K%+%k};eW`k@+)t8EEJ~X+t>~rNmyk_-*ueU2cKit{Hcd7irvKVH2
zr~YM(pLyI^vUHzr$gcCg8eF1w*!(hkW?gjR%I}d{*`GtU$tf<_{Yb)1d!pC3<?XB8
z_^RhEdS&Cqd}1>50__WG3dKSTd>QR_ub5X=+;DHw)h$Qa*ly?dA5^N0*g1{w;^PJ1
zSx%-3FRz~G;=j(XeWm@{%bPbe6y&a{n)vFpHv5WY*P>Nd#21L`vMGq=>BcO%u_%1Y
zw#thecWl+&ziR7Q@h@FkTYvUn7CLx2=)GW+naJki+~qO6ZPzbYSR`pF8Fob2a4(wQ
zE|J6VDSun?fp)v8YzlRDY4dtl>^T*-*>H85vTf>fnM&b5&u`}Y-!7cA`ig?g6OFIJ
zHmcWNv1M9rsF}v{{pRlsL%FTGq3+WwZBKvd7U2CpF)Vb-{uS56zkI#iRl6jNt=9kI
zDVtlntmQrGwilf+-<;dD-Er;hhqJ9$Oxf48;hz@EzUFhS5i=R|S#Q1H-dv&;dSI9P
z#yYElx-)Kfd47Dc-1D@%Ad>a<rMfT8vnurUU)wH!?)jzWy4l<BAFAiY6<pHX&#cG#
z;=e$LRl0m7KW|WBwClrFj8^5c+j%Zk=I%>hx$bQ64ykq8YuX<_*xz=IF{|X$ZK)^b
zJKp`-{enluB6Y&GLlM5M;$JQ=KXS3+OK`FMT5E@Fwv|hBCtTZp@((X_$wP+RDLxmw
zSZ8O68s2}lE$V-Fh)f9Ac8dW1U(vraUxz&r_2dxWDzhMy?{;p5?DIWwcbINWeQI>y
zaMDg5vACw`&KLG_eEcI&)L}E<fv@mD8K>ytyZo>3b9diu_kCVheJshPR`*ez>9sA&
zhpGjhUsv3h@96X1{fJG6Mg_;WI*BcEhkpxi_>%aCm#ObK!;x}^J@+4b*WBhU_@*GW
z-(UaGtLursD?8)Zj@z1a$1#fheK2i7_)F7@ozu!L@xN&QuqkEg^_T81taJANz0vlq
zJ@R}#$BM4!%P&ZO36{R)`l9l1TiLvrw58_fa(@Wjm>*X0QgGLs!b`tb?%lb_;EMml
zJ?t9yt1lEANLRI*^)Ehf-f$M%7GL%(m*z~qR+IdxY8Rr*BA%XXt(<kfc|y>JoqG=T
zwP)G%l+Fv?thJGIPF&vM)kW8o9%MJ{+0>X2{YmD|+ldheue3TlS$zDj#PdZcVsB4E
z1*=-MsOB3}l|R!n1+R!KoYoM_nYPz=f6EPF0l7&RY$J-*{&MjNOq`y6cI%wF+}Yax
zdA~33zUTTt#c@OV1gn(&uS+IeRegDL%9or+Pm9_V%d86e-z-kLxFpDK(S@b+lN?@3
zH;b?z*?qgMm-m7w^Arb0fiGU+MlYllydTW7a(FEJV0qK2jm*dM#h2|~ayfrL>)+#i
zj*DeWEE61mM1C`Fh^Y{sXMWJjy!VFE{X;Kqn{ZaKFZ^lqB7#%*{KD@O4DYiqz2AII
zUc>J{gIB#n-e2X6+7_$V@{TW+Lj>y2sV%I$k#&FZ{X^TVH=O;)w_5e+`?I$S^=l{G
zv-sAu>Oh{*tHu0|SD7WhH5`^#tXA8;E%A^4g9lHpMPE=C(46_k)%RjhU~AN(+?n6=
zW(7p1h6~xU_x+blc;PCt+)b>o-tl*Sz_;}pssDtZ-np;vsy?-mVNs-@_kwGT(MN<d
zzU|w){p`l_9R7qLpZ_V(FR$sC{O8O01@Ag;bXdySO#Q@_lksn1(2;cEIf=$s=4gJ=
z&uruRn|(`-%Vo-0lik}Je#F1Le_1i8dHI=B(~msyiz-#KShDel{essoj_wPN{$nn2
zA~I&SACukgKLzn`S$68Lo_hWwSINKG|Bm<eeSaYF@{j)C_*-YB6J$M2>t;xP-SB*Z
zw(9bXkLj8}9Z%jo`**|U&9i^Me1EvS_=DxSC$G-gl$*)FKexN+4(A$!;(4<TZ_NG1
z+2dHLWAph&x0S=EJh{6ET~}}ytl9MZ;(un;8LPVod^(S?FfdHz#JQ%*DL=oYxTGkt
zz&9~7FSsPJs3aA-#<n*sGWvFyfbF48tgD0$<QTFPFp11Q>KE)Ky}*RCb%VsDglSew
zx;N;%xlCK3Bm82TUgmG<U$>_3Gt+NdU*miKxBjmy^%u+5r8Jv=y2@WZbEb9ixifo{
z|NcCmug|!rQ9kuYVYr)qn#vyi#4tDQXDN3M@9AOH`IP9{#%?HiI^yBszg0F5CbfT!
zm?|*cKxMIDw$QcIoP^**>|Z?!%=>?AINE;v>yLnA?mr?9secXlSrqLiK1nEusU~>V
z!kd?wY!-fKV<<f4W60QRcs4<M=NYrj*RJHwRrxn#X2@6GtCxQ4QxJI@tE;UbW4yM+
zBzL9eT-W(gum0I>UR3UT`P{87kzuCFd8?<LUE|4kPbZ%11%K|*Z&#R2SFLullG?6&
z*2907(Al1InO3t)KCB3vrL^UWftbQ>#ph>v5_iVbML0CCjt$Xuns358TlSXmu32Y~
z`IsK{kSOq%IMcUo{V}hQKLu?8)=G?LRX55C@}K>B`pi93&GxyfY+|=BMqcXMWwj|s
z-~Z0VI>XAfQ}dR$E!+A0=3$lF+f6T@@OrTH#>JixpR<xDW78~Cs+>eCSEcB^%kV1=
zIsHnVJvy{%uH+H{F_tTtTP8eCJLk2@@_ERDu!l-_nwu2XX;!drXDNx9V8!UQ((NGM
zC809Wj2n|bxHR_lFg5c#&Jg-0TEpe-C|06g^+L+^&MMb0Opj04rk~%aapGaah0j-n
z*LJ<ptmtxP^nL#5_wDKsPEL0&yTi|2Q(8?fv|itu+xLi@HU5z{>-i6+ABzt6iyiKF
zYm*bM^gd+HweM(o$eYg$t1S;-GkUWofhjHh%$#{#Qp;qotbAj}eQUSQiy#iMS~l_b
zYuHQ|YxreO$<4JsZp*^XzNuM|_54TM_HWG%2mk-v@}hSRv$o@tt!KZ6YLx_Q#8llC
zS39k}HDE(eXymaIo7O5_KN_`t*-TmcQ@3=2|83}!)AF}iKj|iaNVjCj`+|+^BFeD^
zUFJ=bcCC9G<+C)sTyy5;9g83B+p4oh`{{$tXXdT^_{Z9LJ;(J0mv?>m$L@6X?DDRJ
z#;A>|ch(iAt$yY6BeeJNDOankTeAui>=s(rtTySXzJL0Vh|9yQi4FU@%z~PiOy<}%
zFXp^-qDIVBZxyAzi$1Z%%=@=l?QV$XVq?jx%i0~}Zf7n1CMbBvP}gXcLWFX^DaSn5
zbuVj}ufO<zjsM?ghX=a7$6jygzNB;EjjUDMhbFPo7mF`G=dixuzxa_<0>k>tKK;Uq
zn;y0QSF`SXc6Q;!<ST3E9GWap-#+6*&$EYT4~yG3PI3QhD5Z8p_ynV2|8=HC@h2Z&
zYB7KCHD2xYv6khmz8`I7sn;;hcMz&SWbE^M+F5bFD8A*#IfWM#%-!<-kiohnp1WVp
zUT~IY*vogOQrwGCExy(8iNk%B--1nV4m<Z;>iR9?+_IzF<oiRm+Nl*M!-ex6`EHWZ
zo={erKQV8PijwC&HT^IC0VnPnx_-I1r=;uJkq!IbO1!Ns(dpka=UY#AZqoPjFJv=?
z^NUth#(ZS|^x_ZazJ~ADpD+4wdVl@gMcNPdN2$~_y-WV`r!cIsjEkE~c*$K2!99+z
z3s2cDxAIMFW+`tvQBgSam0_o*)pegI_f^-n+cm6vY#Y@nb1%|s+8mb;P4~+l-TBFn
zS{!;@+pcn%i-AE|f`LH;`#eK9WQnP3MQU;>e2>}G@X7?4P|5$kn@;ALU2e&nv`te}
zM>Y4-S=ZBdrzv{|OP#oyb7|J1YbW2CMBaGQa`UDf2bYG1)*=}t5f|6B+Z|uBbu=sz
zJ1YJDx827>%=sU_pY<`@d^?x7{>|*~d%y2}|D*PJ_3tOe{PtW4dxbtd)~&lEcp$o2
z<KW!7JEAU<d~6FJ<VEg0-oKr-V7+`k|0CIobOF&F`W;>}UmnLghCEJSwY;x;xH`o`
z<#6@WFTYbN_HU?BXy8Bf-Fefe#F@1!4U#|a=^u7y{xPFQe!|<sMyJ^fwC}U0R%kS|
zKebjreEe^PN`w2+uH!9#g&pVg`FG9jKm8`y=ywUjd_noXA0Jyj$V{C3JLOxXy`A`x
zn-&rWCV%={!w}E$pqsHqvq7Kz!&HWSTBrSge%z|CtL5$MPk-BZ{6inj{xQwt=QjEK
zoQuToi>lt|e)RiC<BV^I73x~#**^;ZF=?2ux#IpMX<5m4Z>3~k%F5iZwYsytI-{as
zMS4YwkK>B>M!D;+O}r8$EE{;*XNj7bX2hl3C6}g6+f<fuvS(dF>b!<^=Q8@;o&G%w
zcz(j{{Ijk-XJ2ePcUStHO?G+7miU98V)r`lUaZ`F*tS*3)|&NE)8%D`YiEZ!`D;f!
zn=x;r(8<1qcdX8?k9u%xmdB-kTJ6S(B^<hZ?=GJasg*qwaamGZ{gm6n;;JbBqgxAS
zZ?s8U@=ZleWX)^iGo5{$!QJX#`)ZfDsclYP{`u{;N~2|Isy!n7SKk==NR`Z#+q_N1
zr?0Kj;o#A;Kf^@AD{X9Te}9c=JGA1^_1q&bBTD0ymhz`8J62lk(&uf{J7e0Ng)=!9
z-gRv}yR}VmVd=U93z_DeGFK1cd|ku(Ty*PJftSe@8=M=R5=-a3o%h$U_Ll4MbYC@z
zy~mDP?#j7!Ww~ka{Yf6i*2j&NQm*)DSS+lZyI5gqwCv);dxh3)nP`~I{W0za=QAFU
z-ye&Q-M_MRhwdc_)5&KZ+EjJ>Ej2uQ>9Msb+q$L(-^^)CXIn1MQQ3A*KiJW=#xWxF
z(uGIP^IUSHL}oQbD@;rhe03|tPx$<oWiu9^`=n-HcW{OOi)rHgO$Vp$(FwV5>5o&;
zwe`0r2=Dfut#7+aRb=uNZC$0AI=il?N{GF^yzIB>M-O9G)xrzQX9~M4S=PZPHL<`&
zE5l-*6YJK;u219_yLHbx)KhR{n`H2pos&N7oS3lZx|w|Lon1-aR^`pNG+59%rMPQO
z^qM6*_PL){dH(zD&hANDR<BsmurhvP;`PqWT;-Y?w_kUxNtTpcc6GkT@{`*i<QQaK
z?+O0DmG$d&y-%WR{{5foCB8><-GZGB_Z4r)Y^q)8yn4nF{gt~Sd{5k4pW?CNhiTnm
ztJs1Hi^-dWf~(eT5Z$uNtW~_F{riO(1(#=)1Y6`y@R1CQxPSgqX8Kv36H+qM-@Q1V
zA8ONkzw^-j%~|4+wpwM6Q?-t5@IJb<Vq@3z2TQg19(ajrN@)FSun&s76TCh)Dk}V=
zy3OK8r8_*2&UcNKJ3b-Jch!&Xf<K{VulLNp=-E3x!^C*$qx`;}*`YStIj^R6zSLN}
z&|2!B-tPY=ZWplp4>tR!DsAdoWis)NnbFA|z0<yI`mj&wz^r3acyo3i_?j;|?Z+>9
z!9Vdf%O6#q6FHEuz3chbcS|Jx{Q6l_7`>&dXM>*pndj?#lq9EL`j&p?iGg-xsL0Zm
znRf#jW&}5SN!xzXdw=2VmbVv;-e;8uC5yiC>_~{WUKltv$i|r8tu8<O!;iwtUGJH@
zdsSuop6m5)n72r)$aJTh`PR1CT)nz6#k#p%*()ploom|a`}W~PPusQa-&H4_{IEmL
z?5k$7zUVSu<*RaXUaxqnEhd-wPR=`(y|$J&_)F%|=i#SV_7;h5=lXYg@7Gs5lO+6(
zuGeB*dT2dM=+Ypr526Ntx>jj<Ptn>Rv{p-9!&|h3Pa<*m&I>ZxMm0O@4g}7gcP%+7
zZw{Nt**TSOBAV5!CM#4m1ibscB+>UBo6MIr^QzvIU#aPS>Kc4YCwXV=o6OmFriOpw
z)3%&*dT+(~ulIud`ZezDd);+cA|!sU-kufH&7Rx)wya+Ez-s!VROcMMRjbrudv*r2
zO!;|n&$(A!O!s1}ABO#!5$!nd*3=ngS6DB{9lBZAvfG+Z(DI+DnNg<H?;`(WTwR^V
z)Jj)ARxOhMd~y1wIj2>=UyH6dc>U>(#eVyaRx~8)Z031%YDd<$?hwb@i}H%MUp(<Q
zQ}Xl9jXznJ9tq4pbSvq6GVAinrDgtEX;Z|v&FgwunfCNAcVyIrPv5+~tc;zn#eA8Y
z@jP@QFO!J3l+d=R$LkUedkl@Dzb(GKuIH$oN<UZdM(-^*x_io&>RLDSy5679`tGsF
zzu#+F1YB&lCC><%62f_HvfRfD+8+H?FW4?w+k82>q`c}yu;>00&g)%0^2*LR7wx<q
zfA9Ub>DTS{hpuR7E1cIcQOd#6pHchyf}XPrQ*?y1=C<bZr6zCI$e-C-e{7oc^N4fC
z4#p*Yr{@IUNPazGW=3LYf|9y5<NmW(#ZD>}h=-k7n4}}}#kT3x#!DVw99L|P_%!21
zuSjTkYf#@>kxOn8RyJ9;W3Hz>ThY2_r_9z<W%D#1pUO!Q3l)j=y{Im8?2^ap#1<vp
z_CVoG>z-R;Ql;v7*_pKhr4!kA*mkEz>TF-WQ1sKuMRj7@n=LLzW*1xwyP2(SnXPV}
zotRYSCANC*(crm9-DVf|Zr*k+GJDhYS^NtV7oTraestiXQo)tqX%|1;_*;_9`|gl!
z#B-*NqTZHy4gIgLTF7p-Xb!X&R)46oZca1DIn}L#dvCZ{m>oV+xr6a}u$%hQn1<>?
z#Z51^G1@d=xYPY~2ha3iIY%d<V6!Qk%~o{TPUg|tn9h{1`MgH5Xv&IXm44r(I3u#<
z19PgGzLq_+X_%#RYv-kdJf|LQnK`XBp)DsjdgWiG6<bP7^**v}xwv&v&rjQ4?Tl8>
zQ^)gX{SNc{URKR3aKm}w*T@`;rlrLeb0c@xiKU-;bVf3&G%S&iH}GlMvKJj|gGya?
zXzlGXvUELfbo1#GUa$Ky=hB$w3Uki&=8eqZ$$ccinOnB<bIz+jHm{hb@0#E&Hdo}L
ztKi+gYZvOK`h62#^<swp7Q0!l=lz1Vu8qv8<q)5m@LVzE+wFHbw>9T&?FinozGYfk
zk><u}t!!d-oXl4b3w+(Y<FQWu<m9%ed#A_6_~swI&++$l`l2^S?LNIvE%o@o`b|29
z$LmAZo9(ktFiAQ7bbqkuqgRsR!)Vj?dxlIkh4tKT_qOcc=gh%(Vta38wA12M0q)1F
zj&0QwZ<)#xyvvdGYFJ=m$IX!A`OFW+m=Bv>SzYA1_tZK2m#SMUEZ1#{EkCnsmi@lB
zcm8c^jOR=}uyXCcLw-{~wpc&dwd%sm>^q-j-~BYQjIa2ib^hU|I}fu9=XQHea`T?f
zxz*|xyV<W>QLkoxYyK{ta^LjGr>JQkN_R3%DVm|bTOiK$i)eM3t#VM(`MBompK>Oo
z-S-Hnv1CyB_s9H^)rOn8M;(^9Ccd{=dZGQbt*Y3GOr8h4Wouk7`L0{Cc1p-Ak1Soa
zmu~(G`lhLDHZKZGI=$!S)JpD^M!N$yMBEfL%lUHkci)0N_slO>v81jwUj5F+wdLpD
zT(`&z+h=WZ*xfWG!Z%WJ&rG>Hhq^zSshxRyXaC{m<m7YHj?K~SU8ieQE$q+Tr>&WI
z!|nAonJVXZU#}#XKU!$CU9kV>n^z_4mov${`=)Sf!sWXUUWH{W{&4@FJ!-?aMQX|F
zO<W8NP7(|ZTG$)Lkd5Qe<}qlj0J;<>g1tiIxadE2zgw5HLM=F#9G=3=ePzPAhTe?_
z1R{kLCI~D@Ys}p<N9^T?mtl9_20ogWwERZrWsi-MEo}WBiOjK3zuD#cMnv7Oyz2eg
zmwT;$rk9sm-PLu9-^YLU?VifNdn@1b`rCc}u$bXmcdypAhkByYAB=YHiP2jh@WD%z
zRVbz*T+`z5@zsATdhhHFJNmj{yZrYbAFEjN0|M1Hp8DWcS+Ty${NY)ydy5ubV%#5k
zly8skr<&a{_Y|j>uiC-?KIlUp>-&$7>o}{{cRj6`-}HTTL@WP_h_=mKash$LJMXmf
zYyFrY&%bKL(e}_KhKl>uo_?ATBlj)&RDb`9503?R)`uLl6qP@ktR;8+cu0-R{D6<1
zHVYpFbJ?v;GM*RmQL^S*|9Q_@=YM}>WsRRQKYmiX|H;Gp-fsRsKZ>4wvAV9t_u%!V
zy5%RIO4r@dtKa|f`1X(*>HC2nV(s^-ckW;Ri2cXHn7Sp8j{oTFn*S)Bt8U#Q{`~?g
zZdESWx_9;Cg8Ka9jfZzv?Xu?k{^s@TH#fBfURW)gf8ns9t)0-jtwI-noDgfAWM({X
z%a<Ld3yuf1IcW6n*}HiC*3GLI+a6wi$o1y^`!_F6Z`7O3dRAHMj#RJv+FT~?*RR-F
zYI2_$ES*~F`uNzrD>np{FD{qwlnnATUUZ!8i%ZD6*ahdMdevC3mCfv&_`$<TzEASu
zWX{@}J030Nw{AP8UtjC~*yNP?vG_*S()Lfj+m^oW6#TUMgN0Z??77`vTO3w43z{Cb
zUS!z3eCcD($?aiRJiIy@CK>pyw)MJmsL*zz$&Zaz(dRvMr1Ff9AN%rSgGt%^US~H~
zLF2ln9+iWsJ6ZNFpFGWQo4&GEx8rRS$x4l9J493e*tpy(YTNvxBW~WAL~n7+Z%fx?
z=-4n%FSFY7Yvmig;+`_GV`nA`u6*Mf7x-cmzi0R}Co4b6iBnRA_8;S}FEE={Qhj`7
zhheT?tDVi9*UXJmOE)hztX5}UZIvX`8~$X$_2<WmCa(6~*7IqXjfrF2Y1xE38xH+t
zx2pC(uKPE{E%DKhWS{7tb3G-+Lw8*gHBL~OWajpCOV6Cu6^9%P-Z6)IN_ZMi?T}dO
z`AFi|j02B)^5%a0IOS=B=PFrG*W|<OJspCw+l?~Je&^_#Ux~Hw{dDHOo&KVoMNV(G
zo-KHHdeR>5+d8aSJJ_ygNgULb$~)#O)%Q>>Y%zDN3h(|SmrFv|SxcJ!n`*tpS2M}1
z*GSXk57+*%57NB*kG}6Hj#==aLM!V+y7d2k?G+Ed|7dQrf8@{gZ_PvfAKH_%o;kg}
zG<oseUpw0NKYTCsudzPM;}IY4@{c*ZV(&se==1(RR?jl6YRCHEYfmG3#Lqq6aMwn%
zf95ZP?1ti^pu?XFXMcHQy!oz~??(UaZqbuHlY2x>6+iwv-KyuXmi<%cnoQ4bY03F<
z-rIXcopcgbr7mYbTdFZT{nVnA-2sAjz9Jn?z8XC%J_MTvw%m?zTbH;)*p%t$O%*56
zqm%rSch2zd4R`5O^KzZ3bjoz22a8DH!cMP^5rW<(fvT%sZn1o){nvJ~K6_B1wpCgR
z-%=?@SDmXzT!Z582hLjAnVPioz&0~mIrFs_HaM2|7z#Q~oH5U*@ABq_i_?^ztddT8
z&LQhNBY7dm>^p+CnJrn@RkCNNFU>qZG1=|Nr6XJ$gS>b$lctJ@^V~MysrfMGX{240
zu3lQk_gRY4s!tZREL+VI>T$GPbLpkkOR^Wat+{w4Do}evr_G%x;jY_9)uzapD_vY1
zm~0`^JHNetv(hV*!2ayWdA6sxm9M8Q^Dw{deA%w>si}9ew8+I3ou!L9uLn-ZjS!yx
zz|3{c^zV<wtRC(;>JpSc;jN;WvH!Q@`5m97zWA8;&f4|sr^!91%U8b_o&T7?CuV!O
zw4`s-mB&W<a#ts>?VNaJhTwL+B~fkb=WzD9xd(B^tud`kHNP)$^Ap#bmC5_ARrIk&
zrj%Vw)|N6{^JZOy?cI%^*jbOxO=ej)%YE^QM~6=BSZbH9-@anXRjt)=3-rXNA6vKY
zgN9>jr8s-B_3q%cWz~KH+g`3c($9ao^78K4PyKZ^zdif1>gUD}a<1Y}`7~Q%&DE3F
zhZ)}Gv+++4t&46*x)F9kv3<{`W5F$#du`6|+10B1{L$y#)~8u{<#dX7zt>X>@Zob6
zJ#&WJzwKvbYWkLW4{}S47HVvsu6t#}w^v0eSA>pL_)K5yrF63SZ0TpN#n%nOt?h$m
z#hWqbPJVl2<HT!`IRb0A-!A(+E7G=>eR5B%Qr29)=Z^y3bnP^hu|KhWy78osH>Vvh
zeKzIKs`KT?xZM3>XRb6iHn-N@o_+euk_Yc1SLjS$Yr!AvcjD^JbFZX*XQ<DuwlJ9O
zxm<YLy%Q@E_j~j_*0go2*FC3JF7<lauV$A7MTyIj$F*&m3Qsg2)1J_6oAK5<MSX(l
ztCrUe_g2KccP*dy@$U1UqN}Czo(DyJ^l`L1+y3Ea<@vCEhnnS;1v_VMN;Nq%ckyQC
ziMKz0%Cc@gJkR{@f1A6<X5@Sg`RlaDShncxAA_F?ix1!Vf9Zo%LHW6=yJa(<@vba9
zw(woTUxu(m=53506gwDsid^>1+Rdu*s7@n$eNnwZcKo5g3)j?YYPQr|E7;pN!Qjab
zUJ(-?2b+Y&GS8win8P1bt((=%AnX1kx`m^y(>1BE?NJwJb&U}BE-Cem8)kRszZc<Z
zQwzB9i2K*20Ji3RP3{lmqa-w5$$nzJa?hme@`bbmszNbZ2b+U6lv69c9XB{!(&PFi
zb%|q(h~J_`uAEveVtxxJX>?zXnB*2ZLH(-kzXkg<g&cQA-U`~H7h%%b9d%{FErZrn
z=Yxwhb)HK_7<Nt-=I~9t8gRkB@v88S3NF(pTh)5E8c%!g*6>=&s!BI^?b5@q0$bU1
z+?Dl?TD@^H{&qPxJlS^dW<j=v%QtXtPcqJP=Z<SyYtgLzQ6eh$eXq5g(!D_In*L9g
zmR|j04;5X0sycsCke{|V!o%|WW40q5Tjh@%wpr}0v)!9F$y?Df#yf+FD|Kt3hHUHn
zOJ*N0PE*=$eC0KZpOMn?980FVf1a&A$6L~}N9@D)Y5V3ly$x_)kjM0O@`C2oF#*Y9
z8OIcthVW##vs%5FB)cupTPAGBM6Uz?tkR^Ho)X`r{lUn5bJxjr7k`$UsqeSi{Au-#
zZ`>yyJ>%&P6JLKS%%ZpSPd1kc`=QJjqsFg0-|X6aBQ*8K$FJ`;?Vi4Fa@G5F2ii=`
zmKA17O<%e(^L(ww%UOq>Ej-2<Q`lz|#T0$%Miu`mRfB@1?*((Dqi$TV?tCj;am_M(
z<~7ck(reQ~{qD!S%i7dr)DU-wjpMDe>aUqr%|44(SJv~V?Fm0Q;cnOS3H#=FevYx}
zFW6tByL*@2Spl&R<qf;oZe7Z_fB6H8S5B>!jB1%dX?dAx(B#<O#&afR<?Cl3yP<4&
z?d{XR@XI&FD%*E8?JKT+vw82H7q-t&)T(Z~aPZQ}+;93D=AOKN_Wj*&>yyv3{SyA8
zwdk3u4r8d#lBqsNrIr;d*Gey&zCPi~c7xn<<HYX<kxP4dr(E#x&R_m^lh@WqtQCq6
zwK;1ACr1cuo*`GUQFgJ)yMnj((wFP~y&<6Cm3;s1ijqlFXaCKVnP4~H<-O07xo@^;
zD7G&3`9G5<A~9b2<lRGa^}e6g`+iCD%fw7hRi6d+=KXnR9tFKB65Nv|^m-}!4B5?d
zf)xwc7#OzjG2lEL3bOPZcIw*P@a%HwtD<+WOGS3sc*-y`2DY;pC}`jC;^9>Ba}u_0
za5Pl(dvwe<`q(9fe&cz8?#k*5`4{Xtq<n#C>5OuDfedfys5KT#c>BxNuDe?I#=VZU
zenF~yZ_4RSkJP?K*?qrf{CwW#_m;oyzMTJGQzUo5?N6=buEK~T<t{bhN4~q*C>-io
z$et{ilH8#r-YDtJlWbHGQ1w&e*wU>o#)^+RIHHfF@GLG^RFE1l;o$ehACByjXzrhs
zvFuiX$nhf&cJRn&9p?XV*z6C7z1YVBk^YWf7bWVNjwe;bJnDJWsWGqTk><;J0w1@W
zsaJE`a^CGj42!yQMcl&LQyZ*v&9iST4twZ#+DtSoMXg6uId^BK<af<2oxbO^elbs5
zUJ#l4Br;j`e$ia!dA8;!4gY>=tZkmN#p}$bb)OeZ)vs*3p;?j|qd)&#wc9zf%kH*|
z0}S?l@wo8q`s|{*MTTcS?6|l)^K(IFkfI#RW?uFAa?2VgKW_=EzoS)_7w{rOEcJrs
zueRLoyJn|1{*mSCSD$VaGovMK@vSKRs=)5Ra@mjzzwXLx-e;6-`79u_=**XI(#b*X
z6>g`BE_~3D44*n{O%Nm3uLX|{nsP<D)l9GJY@BhfH)gwAx=u*-rqvtw8P;zy2>%=4
z*L5eUR4YbK!svHd)dlmNT-Sv^dmgq<s%rdMbljx)DC74P$-CYB9D9Xkz85okHYIbi
zzRR_^=W2(WxWvwv*0x^d4zJh~5dCylo91mXr+bPTW%8$Iv7XE}Y!c_#wr!nP-FxZT
z2AhR4Z*G`c&a?l?#n<PKvXvH|xSDhFv(It=@5aGQOCoP1Zhw+`HsNEH>3-X!V?yuB
zHf@VeN&G(J-;UG850+ciwTYMJ91N3{-Vo@%z9YI|b)WBzHIL2itXs^le=)pPLsma|
z`=P15EjL3qy_xg!<qylaiyyi7oT~b|!@b1k^bY>@FDqp8UsMDiiC$DK{a&M`nyc;k
z>mN&3)VIbPN3BgN+2v>RSFNeMbWeM*)xG2Ff`7t}zyDAzEB}b!>R-oY!I~wH{eKj*
z$iJQ;XrKD#d7?qWt!4Ut9@1)8zV=<dD|yDPAndrhh4{hvl0Tx)XYOP^=Nz_BVVlYU
ztNTkIF8|?k{P_>3<M)p|ZP3}3tSL9yx7RczVsnZhXHnfLnM{#&`wC>5&Q>jZIGg#&
zyhGELvAbT@-ItNyI%(7K>Cb)Z*DG#ddgNjuXQX!4?bbQ@v`4oe*1hsGvosZ3xoXdY
zBP-ssX|=wdeAaL6vB%5LYVlq13U55kdnZejd5(5x@$-m_TfO(~y#C?%ObO-LrrkE_
z<{y@vjrqiRD@F42&aAVXGtc}~kXH5h{N{R-$=)ENJiGXl$C}=RZvPW>(>vyZWN`9s
zpI4%1UlkU}6|Hx;SN3uOd$#nZ_e|Ca|2le~J=>K%nK%4mQxDfV4Nvz)Z_6u{%|AG5
z#WvTT{@-@jpz`CweMt-3Hm-fO^x$N>n^*rBb?RR$s_v0Ks`yzVHp6e}%7c4C6UySb
zeAazj)_rFW<6ZVw^JGf5KmBvqZN2<?=i4e5-tFhe_<CmN4Bja24UtS={@%}@Ru#TI
zwn*wJ4|m@Y#m_5NZ0O@ZDQD+=DI+(3`flEY+tIm|heM;{il$Ec9<%hR>xt(j9sg@@
z9J_h@=Iw4VmmBwHMA<V>-Brn^yYii6OAu!+Ta=dIJV%SE8FL-8U-2l(U);oMtr2Z0
z-&4cBH16g7j{HUPpDZ5vI%!wse-vD%dL{a~fbEoD61u56*6lYI8fS33RY{(HtgUe>
zqOz@DG|M@4&8%ayzNQ{ozHt_p>Zu2ZQ#bGUQ(|EJPfF>QX2!y|vX^H>T>39s{I7l2
z=?GgTyY#};s|IzV_Q6Z1PFxTw8P0xxzv8ZUn_qA&v@Y0|wxlgh>^jfK%TcXsqnxa{
z<5pe1p|>|%)Xupidxzq&(w>PN#f*$ovL?7xh+XfW^XvWc(ETPabRM&H9{YbSGAwoB
z?Ac5!bQ>2}7)i8A{SA{9NDGhsx8s2HjQwk)RJWLQixlsvQ&~87-3F&sR$W)8?Dd%B
z9DQOD&(7i*d)ZIS5l%Vl`RSQ+*f*QC{~PE2FfV(k{bjWUTL}Ao{_Mi(yJVZQn7*%g
zd}77q8;j!(PN<q|BfRX*<Mj)K-`w?F)@D&``z}epNB`u^-G&^#JA$4D2Khdes|&SQ
zQ}!?}DKl|TeIkFGz@q1&JNn~ItN0`&YIe`B`yRMB(D=^2$#vh^W1hqXiq2~bPQ6o8
z#i`W3n&H`<0J#-6<d1kP{VT>7y07=qZ6Ago#S^WirtV*lwr4p@A#1`GMh1p3W(Ece
z1_lPu!KTndOUofso%$$ymQ{T6lM{1XGK))!GLuS6;0NqX4T|=bb`<${F758slGTM<
zx7~`~QrVIH`qrapZ$}{(#VxI3@--ShlN9}S-Q=C@+x_3`V8Dlk_6OucZH(J4tq`cW
zF|+u$Uh$37`^D=S_8eVpc_v{k2fx4XmoqZwYxjH!RQr)@dZIsMzQO$$UGtT~BKNEp
z`dN2pmA3DPPsye$-UygU&3t!6R`*tCPkrXa_ujJgf<HgrOHimj>ymY>Qa)tWr~a0`
zufBZrym&I_^MnW2Hf>YUF$vMK=dJc#xu26w=#bkrp_2<QoO<$jQFf1zSnm3B-?nD)
z6;EB-@<73N={nV>rIjh=tG1rf6{?HAG4bbr)!p-gN<Tk)*JX1&+Eh|Ke)Gb-ldo0g
z2;4N=!Qg7cA&_8uKqig($BeHV9V^P~``;Xk@^rb?X>Y!J(^O%u(9NelnV7BlJpbWG
z`(TF*n*tFjXNQ$3I=A@ph4_EYdH%*)W=D;_d$e-E7iqsQRV_|m(pbLtyyg?rX`dN*
z=1S@nejP=}PVLmRWF4`e&AmO-l9d<dZIMe%T_gJ0x8}UdbB#Iwl)vW$Mjv`mmEhx1
zQKNoiFQ?s3<~I*IZNqQs{&;(2uJ|9JT}%Pqu)}vj$6!uM_L7$5U|{g(XJAmoo)TQ3
z2h2i-_FBU`>qXKf>(VywHn!%m6cdWjn6QjD=baMQq%BueCY0SaIjF-K8&J5(`lim^
zGdbyLoFTb;SAEUfDi^vWPBglI+O`~x3y(rWY@*hNMgI~1BYx%i?7MFS!_Mgc?EC)v
z-s9@|&#RxGo?1V@p7lW4pWxKw)(y;-7w_mxMW;U2C<uL0=yPll7whrRIR}`v>{|G{
zpD8DuZ`P7K<epmNbL@Di$zx%ySsy$_`5y--{k+U27x+=KCd_1h@W;v;$D`sORWI5|
zEqZ%gq$cuc_=g=m>$QH&>N@+Qis9aQm)%YID=L`ohkVqX6S5)LXP<7*9HB|UT=POc
z$g<uK{b0*_Kl}qTtN+O#3%f4=nAo#C)kgX8aUliA?wQLH`_oE~8_ICXeiZJ@)_Jpi
z?VQBjl7}C@_|f@rk9GcxJhhd(r|B*%{D0+%Z1C?#AC(uE`}yC|b?owtu+5s;w|e%w
zo_S}bd6m34SGz4ce(amCZ0B*lw~I5PH@75Q^IFaub<-m6=F7${YnN|2c3{!k=PwEq
z?nKK>x^Sd2%;HSMvzC1c9vuDhSNmTwN8a_}KN4%5ylkg#%+}mv8)VB$3bT)$*-@?1
z<Ffwo<EY~6Uc1s%ez>m{xoh5=?vt#v-yr)2`<I(*#3xL>ve|goT*1$iF1$aNKI6FW
zSDE))#m=r-x?JKrujlDGr<Z)*w$bR|Jx-}bhc-{r()73?q|25r_3przkToJ%$D|}q
zr||0gMEWnyIlS=wv0H0h6jLqoj_KUYTXS{lf_K*QOp1~&PW&j$dG>T(^0Mq3%jTup
zR9f#o^ufYZ`g30*|MFg;7ia$D)aCg#ytcMEG~vvjLsJwF<)6A47Ug1|^|UoG=$T&B
z)=%qNwiVe-t~8%sKC{X>enF{y@sWz_*DF*{`zUKRJ>`pUd%ZUG?~KT`a%vVQYr@KV
zUq01PUDI23b@4>6hErbxkG4r3_TgN&UdzwRTX*VGuG>1xJuXam8p7TF_K8jJ(Y1T#
znz*d0be|@9LH|w0jqXC;?T>Ax+6)&ZhM1Hm&*_|7emADxFTj~a|M7CEy0&!HH#_;(
z=KcF$9=@Z-@0cX-`Nyo1`dJp;O>L@kE*tc6O6q4?^m9LXUgYO)<Io#oYIbDFET^m9
zhYq@4+@teYly~}vVqVP;FIHK&&u?$)iO=|<TDSD!^@`AAsl59S=BNJgJ1qJq?y$Uz
zf&2manZJ7;bj>}#)6MJHe3LzG*HzbO9=45CTi0@;NbHt)>GHi^8{=J^O;^Srn{WDu
zHUGqq)ouG9`tsHve=YS-QRv;lJN0W=c7@HFdLT#3Ph3mrX3;NS{*tMSr8d4hxNqZS
z$rmNJ7SBC5;rs1S(<i*E0}6`1X}r{VtjnAKSW9$0vq9p@%~Gp%@^{HuZ_{6X?sN3i
zU8^#>c#^CxwM9R=$jKSKxKt~7Z}yeI?f1fzK5&Ib|Fl~FW6g@bhwn_EeW{ryUYZ<w
zaP`gSH!rOEdAwTn(AC^`n&)Ou&#zp1f>r#TcWTaY?X~UGc&B(RX=%vbxGh98EGx$Q
z%dIHs&TH!<r(W>-wdbVIqDajsA+t3Ffx#DUy;{FvhM{@zvA>mKpSbjHzj_e#U;5e!
zme5dl&eyAqLZZ#CUlH6ab~{d%--W-w<Y<;gal*bGTdZboXJtD5`IcAoluLhJ^$4$y
zIrr+--uV}=aW3}q`EO~sILy|KE8*Ys=ks_3f+y?cuMCr$d;RSWk2Nz*vOH|xycLr#
zb=$m6?ha4-#dABJ%s#W_NRm<m+rB&v|1{m>EA~dlP3TEFw@JK2F0p*BvW#4Oq{OSc
z#&=G9e_Xcj?!8V1vjlTCOSwn8FSJKn?q8d=k3I2Y?L!M4JHt5pS6><5-R!Q-Ic`zB
z^ZNO^+V4V3-p$+mq4fJaSLgEkkv}xK=6qap_h1=wQ;vDz_q3J`ti2h>rQEjg$?lk}
zeb?2x<DuUBwY%<p)~{3+Y&<XBDIsO??D#s7gA&h^uHP#7AoJvO#vTS$MP0=bzO19w
z%H@o6Yl`xhUSIJ%Q2W(X8?PPg!gq9Y{ED~VxPK-3z$&IK&3A40dPnTN6vKb^YtfzO
zGqd$qP2s=L;`!!#jLE~jGZv-INw!yIx0n}KruMg6GxqG%)t6dKBwlDPC^V>G^zG!%
z@Vk(GTIi7Q#017{HdnXne5VBKcbF=PUl98!z9G-x!lt!97a6T+DO)+w!9_EnHYxqY
zcNMksia`h3)Gz$9F>A}$NVv?J<JYoXXW`$V*|(&sBJMBUwlcf$vD1r>!5MGs1owUL
zWxZ2#L0D?{m3<wTE=XxD`23}Q-nX#rVjFaYcJQbQX(-$(m%LlcS~B;TZ*$b&(#SRL
zTfEaNt}yt0YGwEr`{h@XlDvZSk$N9HBjbkhMA5P&<+6#w@726^?=(7bQDA3*^u+sz
zTkiF-R4$w>{Cy%X-_9QjH-B&h|I|pd@JzFD&9iYfnXk&AAMn^Fb$w7->!#}()y<o>
zbG9+)y7#aCAdyxu;ZM(`^WQJ^`{=)4KHr(8V$N=U<1T@x?H}w;g))XoK3knSWBs&*
zZx40tockv5$yRX6r>v=)_ty7}(#d_ZSZW?jiO}&_u;KblkN*`rW{2!kzjL@u>CWl0
z3-a%TV-@b`?@Fj}C@K0eE$wM)j=xF3YQ=dUz8?{8^iY_`;nwuAMP}k-&ZXw_GWfo2
zRLp(!{6pXKQ!C$pkU4kZ<6a*}CS99*RdS`={5z)}fAd(;rE6JN<5WJG2d`v;5Bl4d
zUcPrUCz0vd-fqox`EAPfYkMAO{Ab!?>axOZ#yvZ+d26P}$&2l=@Nv@h>h8bro?({g
zuUn1>uKq`BR~(SEI~Ksez~BcxxE|8F)P=S#b-~9o79^Hr=oP@vat}Ut)?f3iXQ+p#
z@98tAbxxi+|2XK|+4I2#p1xrQMj-}9n~Y308yN-`l<@`{7#Wy6)zUb1-pf}{bDOWe
zm(NMhtG?$>KGD)Tt9|l}@7AYGNT&&KZJO$?a&ubhIR*v>8Ab*M2?hp+q|(fs6y2iK
z<kZZ95+qlc_V3L*Y{1h}zRye0aZ0^$#>ub+CoV0@cJ$t|_`oC{PqSY?w$5(3UHjzY
ziNw;)d)L)HSmMmXbC_}8oaZqo^rJj;0yIN9de_{45|^;(`?@{RYj>4xIJW7nmZh0!
zV7@^6F6;Tfr)4ZzmL6idwV-<^%cnKE`5Mby%F+Zcv*^{cMSap0v%NG&^4*q;=F8tC
zUC^*ntY6YJV}1M2!>wP-#n;!VZV>u(W!2>)jgyTxEU8Ej{-Vq{sj`kO``MRz2g5D=
z;w^7CK5?44RY)WL!Apw^NQY^H!T@y10_gB85Y3O;*h|jIM7ewbq<q4OFVk$87#M<C
z85j&Pl{+SZR){B;IOpf)Cg!F1Wagzh=jWBBBG!se4T;Scaul&;=I++s){(pR{ssQ0
zi%V|X&2;kMxR~LzO7PN=<EfI@a}%cXI4`b0bbpb)$;Bh`5BiPnMNSBqw&hFW^Lv)n
z?{^kIyLZ>VzWyIuL5DIk=bINdOa%`1G0L8w`rWD0Qnpf-Ro1nw;^^0y$kdK!pO*7P
z-F;uscD3#G$H|H7&$^z@D(CbxuetqdUeuw`-<&H>o`2V{@XCkQWUE83N_J+J$8NqR
z=aG1G)4T6g)2;`ml-{>~?o<?*c4NnxuUr4mfB!<wWop>!7*(^oa>_i%cOIKrnm04{
zbn%((B8wtU8{gRdO>%pxrF@%5x2nvgP1`z})_Gpa*1Kw}bE5aVuHl2G#O$~OXAa(H
zHx$|XV#-v>HJYujEgQFKWTqIz)o!ee-?h*D!p7qTt6y5pQ*Ccr_a-6JY|$z95_zq)
zk2iE>N$hg+S;6ggS)Ob5{<2W{&Ea2j<F9%iI*{9Tru6z$7tY_FGb}tF$?-{cna>bt
zyRt{E?X9Xy>*+HtS6htV6#DzkcH&(q_~(h0qjOpD_45tCE+rj2W3VU2V$09UNeY{d
zon6cSsfB3CY`yX&JMx#dii6XO`+|zzXBV1!?v{#lGYi|n@Y~T(q&%aSJwZx)i`sF<
z2-mJRcfCDk=`23CHgMje-)>>byMJ;|>=*BtC*q}?(Hd0Pa^oJy$M=`Hv{M`_IoDjS
zQ(h4w@!U^KGk@;A4F`7n`?o9#u4rhVvijUaP75>j=Mu;5XC1R&_Cx(ic8kr?%??#<
z^EOUD8kVpf`H(G8QZ$NB?O(>g!0>_rV~s8-<sv1)fW)HW)FRlhY-^7r=rRn~{~24O
zxXYgI>d1CJA0WcwdQkXC*$UlECeN=0dNvL1LcuY+3hX}H%ctFY{kMYYgg`rovrEg1
zoV;xha@Wq-aKiVtSM0m&v+9|$w@a1ZPcpdw!bvjx^{WGSuTD`+YgxK+>WVcwqTNnc
zA3d*}D(KKr9<}-G)RI0~#g`MEJYzc_FMYU3dC7kd&sUbrHR&%{?=$t(ut`+U|JD14
z_p9#A8nlus$ca;S1rr0qcQytF8_aBn=~v&(;^NG_bpHZS$K0_fJ-8&XBo)$In>#hu
z|8j!J@&D(|cCCG#vOr;<k@%Fo3dtP}4QfsLhJGdo6Pm*L*UU7%8+=LZzW$z%$E5!%
z#Ca@I{&<N0LA%ks<+{bv-p8h?y|wf||8ws3E#PzT7PN?p8a@d<64)`>MD^-wEuk6H
z(p@#rx)@3yHDx<`tCZK<#qhYnmm8Dk7u>%1mUsGYtLASG9Czc4cRqf>cR1$erqxGR
zoz-3zcgL{z(X7zbg@KyVPT&8k^*Kz_Jk_Ph^Ihz)(>8gByvd52(n2-nJ7(_K7k>NN
z+vPR4mrQ=RY3t#$zIoC?{CvvqxSKD@R;F0=DXy~nv@Mo*Rn(h#uI~O?s-chbx0hbZ
zxnA!cxasNiKcf1xVnVv5*ZJn1-l^MWuzj`Ber0{hNz*bMb4ndb8opis+oF0+w@^as
zqS5`Ueml$gFH0$;cB)O@y~t^6dhzVQm~@82pZB%hySw~}uG@>VJ$t4~sP#QQv>-2{
z`deS!`3c=dRY%gIzBwOO2>N8U-tj0WFSo?y4IR=_=N|59Q|)ngT=Vnaw~oYB)5VO>
ze7on+=JUPGz(+3GqHb|=Tk#I(RpuXJgQq8^ZL-zxd9^iZ{W9+9ig(XSX8vk8su-Rb
z;n%Ffs%WWbC8_k*E>p2e?b92Eg*6Y=5B*-!c0wjaPVIcTM8f-poA@m}(--fv^qa`y
zKPkfPdS(yng{2Iuo^Y4C>@%(4dJ<Hh^GJiYugTcrbzz1#(>doZe@TUz{~HTrnOj#{
zO?jWY!(UGCt+ZLeNd|S*sY?V-iW|r^<g;-eQ=L)Nv(4Gw{Ha8~!{c6=%R59Ch}qaW
z1ooZ(x7~m0yEoiPZ&tTou5p&1&g7e@)Nn`f^oIKzm^b|0mJ(+kdgIQ9({YcNmatBa
zbB#Q2>~6{|>5^fgnj!d8SUZvBlX%!6*GtA<T(rI+t;h%E_C0HSjX;M!H?uM@*b<Wi
zAW1*Cq$o2l-8nzEAh9SBzD$2^$a#NZN0EPNyHXQ#7k*ltu(c^_VL*zef`?bfl1|Qz
zg2FtV_m;g~#uHg~^4+u_>kpiq&0hVHsZ;IP=K3ig&z7xq%FePVxi|a!zRLHv^Y48*
zeqR4RgUH7-2Lzt+N@QK!xFGprL!0W>8CKImXXOMQ@9GO}Yt(&tB4M`TwE6Z47f$8f
zO_EsYalGoP&AO<2(t%qwjiL&(57`OK3;C_Fg0=lpy2tyuESE~ZG5cHfZmO<PvJu=N
z=bUuCP-lG(_x<f0+anA2w%z>Ta(COAS-QT5<hYlwo3&SLUVi1ydsDr5s&wxJUfx;r
z)MF0UWih|QIm+I5C;p7JdfaxktE*Og!Q?na!9UK|xjByC`+ILqO!k&=+f7-HE3Fqv
zef)kpyxPTbs^Ze?5(3j+|NeP&(yo>}{~09PzNH=(bPVp?^i_Y=S%+KC<W;xtYQ3>6
z=-9s7U$U-ougo*Odo|Bf)=Qx=g~@;8y5#1EmGOP452JVY<y-FKd9w1Vt=#2WfmM$e
zv#3o-^N4+Zc0=Le%w0iibj1!==x>#f`?jBRmc;EMse_3>+A^fle$72-Wb4%I_DIY&
zk~fCadw1fknJJl30`WUo{`>G9=UB_Ebt2ef;`|Ke0_n#A#jc-R+U*tH3KNzWG|u?O
zxa*r>R8IBB_jh+qvu!q9I9K^x^v&X@^Ay)@-RD$&T2D}4plZR@%$oWCoDaAgo;g0@
z>Ejtsg-?9;6!|F<R;i}4<yxMUV9-64Wloc<cR0(b7p*?=K=0ydrS|G4nJZ<L=Jj^$
zU*gk~Q?&4d>@TIidImRM{rqa2o3K1`l7?UV*-2AohZ&ff20rxNWHV{<*J;Lg*-E&M
ztxV|qWN5?paq|rBT)E^FV-a~L#>G!;6}qn|+)ZHgpYW3vF#rcj>o1gfKe;n9FnBXz
zZDb*p_KrpArManjC9W08so<g>Qp8W4c-D*AP^8uWm+|7Q+oE!{Umbe4C6xX8)TWLm
zgB=@|{w|SPHgTqlk@2hSomx9SFxF@odU}iMHbvffGVjjsyTYITef-7tfak0b*A1h@
z+pc$An-5q%Z(45K^lM?gzx+z^K*6fNA%%iv{cS9*-j-=Bq4T-RW*=2v##SPdx%AoT
z1~=zbJ5D}NQk?st<-w8ZT3?)8j|+Zo>ZqP>)VESX`a~<&<{6z|URfsaYffL}{ot66
zXGX@#{V@VtrdRx4#-rwCH}mJU0>cH7i#)$B6=hkv?@Y*nbvr-a{&o7sO2#RHEy+%;
z3qJRU3%;p4yZ^MyspM@tT(~R$a;H6YIKv_vJNIkJyvw^Uzw>d7d-J!ePj$6f=~KS2
zuPY5#U-~s`sf9*ztTE4Oqx8EkBrEssetYht$j<p2Z0~N#Ph{V`iS^9f!)I1@@n#oZ
zF1v1rG(ZlD6?^fF|Hs)G7}|x2EP=f9^2_re#So+gf>a6LjmWMLc`91BKV@>H8gpAK
zLu1(TSrbx<`5C(}Y*Adaq|HIvr)SHp#Bv`tbK~1HZ?e2O>J|O=Zrla4u<DH*x2hVw
zgq-$ViQBN~>$OGx|G!oKvZ?)bfBstA^t7_gv+r)`wEz8l_UF0p@9Zo7eeT`S|MTh@
z9jxz7-N;(LZh@-eV&P;VA4$cC9?wsoD&unEWa46$`Fl$6Cp#Syl)V061rKYv;g?o}
zeG!fN4^R7)Z1;Jz|IqDfm736FRW-If)u}m$YmN3b=6^c5J*7tae&V6`Z69rq-HC5}
z|CoKxpMXRCKYm!q+bcYlkel)$S+aD)p&y*}>XnrmJ@Y?(Pn7I`knZ#QGuI!c@7_OD
zS67A{TCXxkAYjds?1*O#+0xHCS|+#LoZh-HcYWroGJmIHnY)4+Z#Tc0cug?6Tm8$m
zh(j4~Hy%4?Vtg-s#;>}wWhJ}SX4khn>|F2o;MM63zieD$1!ud9C43AyvRR^Y>6t>)
z@aY9zJ8Fg5Bor?M-MOYyWmX#Fz2wKVskQA}3)h-HmFm1^cewCn&d~<Lq^!JCY6nfZ
z`^ta)Zb|Fg!uI`LQsKK553g*A{r7AitIyuc2E5z7A2`((eoA#)xZ`=ZbyelL90?JQ
zuMvfxu8I0wnlEL&<Tm>jXU;<|zt+r{CUGUF`8%W0k$XZB<&!ggXY32Lym%+>PQ#%!
zPoi%c?S4D)%8a~SOuJuwK74h<oe<7!$;yc}g-ok&e4q99+iUljAl(aL`*UPhyOosp
zKRXj07j?g2b-CmtAGMNiUNfg}Vp4VMJ#cHyo;RhX(Z#yw)^MGcz52G+QBuoOf6L3<
zxJ^%8?M?`A_H#Qtxc{I3W#pV)cfB7@QqJA8<@2S3r^Vu|DsP^uP2fz@NMH^-)%Vg|
zV$QSIpE9B>GPu4i2tUZXW8y+}mTPC)H{IU0b@83_A1|(Rl`V8X7?u-WAX}z!qqAT0
zh12#XM;=ZSy}V$n$z}4LFCBkuXE}bB(<=OL*xGAXtxB`*T#~)ICFbhYcUMBshFq4s
z@AS9O?(uh7t2M_@Y`Xuz)9k$_tN&xk#<_Qwt?F03yI9h!)T#K&lH6YDLfdus5^rYZ
z2wv3{^3%3xzOS`|)m*zm%$e(4lE<Mb2evKJyPs{rmpXO6Wy#Y8^&z51HlMw$vi1Hi
zg*L4j2Q4`_bDc}}J6K%dcIY!#+<|Vcy5k35+?wsRgY9aT+pfiaFShMV`KkZ#yU@P{
z5BlFU+dp*bd;j-S;0L4Kf6rIA9(3KKwV_V&=(;HrJ6@Z8y}`VA*Qy7me_E$sXEj@L
zRkBa&`s8hL%JYw0O;f63Uhh#6d@x$)Pwt=4L-$#(Bu!oQK<J*}j&IE0vKsqTxAn;^
zyUJQ{_Vg=9Cg0PU3SX9q>(@5!dbHOl!{o}XOx@)<5|>w8&e#9oCi^BjqV!9SXhqd3
zuD$s>HVbDfzgw|<_4K&ucO=^`MA__XuwL61H<!cK?QUm_>BP$Z7aOPXq??QXOcg%7
zfBAufsus%sKgO+M*4`4~Q$A<bw{vD$H*LMIO_gNeswjH6^i!Oy6!*!@m&?CwW9rlY
zx8-RsYq#xzkbQlH;xF&)F<~?6OEQz_*)s80+n<GH$!aTJ+D#R`vq7*&;7i6+gY;gp
zi8VT@M^`-iaL1W#_PzAnTIaf;!V`}^{(QKsD`%%&hGWX#32C$AVxJkMCr$4B9@h8$
zn<|fLqr~B?yZ%OZ6lcFb-n@O9&nva1b;Sn{7EDarmCX2ek*KA=eA(KP&5^+eM2cc2
zm2=8nIw5I(D@@BWrQqngb07aVoVyo2vspnbT7L5}<BjY0&T+rA(wt*z-K4Th{`ZPp
zDwZf{913pzzVzUs-<NV8)@Ht2@bi6DllO+*n<ll3_HS-U7q|MU@>b48GqGV=#$yq$
zMM-Q+FIu*=r3aX`e#*EkvLxfP$f1j`E{KG=tmf37|54?h%j>BVGLG0VM%r{~eUvck
zn!Lknj?>x-nchd7Uj%*CPes^$OOk)!m{Z=#zVYc&xg*=!Z93NKE5H8sI9O6zwSS+z
zRc}}OhD^Rw1u3a7D%VKq_0DTGDc}n~v*6RwW0`CG<lR5@bL!>KIXX*-XX1jb%Z<0+
zI3XwC?<nLy`RnqHk#iiUSBUL>w5Ghdbl<l#yuUtX@=VBOVlOC8cq|a<|JW?c)4M_{
zbn=R{+RYK_2AxxjBka$dyIWut{#9$=zJyhu1CAEYvpOm+vO?HKmCeh_X6b^AA71m$
zoX||)Q=!>yKj-0o)!Zc&4Kkj7VySOVcwR}3d{Mo2*MjR|AO6Xh{faXG`ZGs<(Zy0J
z`EL#9Tm6>g*QHu9g^Ttb(77vIJv;5AU|6*4fvZhv>$87J&t4u_{OWK0&!jB<1<ea4
z72mLFxAHpv%j2-gBAr~R-id1RX--Yc9QWw{+gB=<8_8w1M0Csd&%Pb&9$)$WUnO83
z|DjS5whrzH<+x>Q_Ihlr7v0`{-9Rz!c;QDjwWG5uBKEXSev}&{H2uT83ikhk?}K#y
zy|diYeW!j-QQ3@5i$uOB2<IJjob*?Y^`UWG+y7>*qsJ$HZaz?QQQ}AB9kxKuRqnbR
z8+|va@Fi{8xN@geNURxiVVdI%`Hx%PeSB#Yx$=R))$Jb^L<A&qJ#uW-ywJ=QT+kk#
z$kUr}<u!ZT@`na?uT&!~w>^|t*V@y6`Ma><Kj}9TskNMW%k{MmNBG}g-}pb%y6w2r
zC*;#fK`jKCY_W}JnHU&uvoSE36W>Qc@10By&Go+=AX1li{bHb%M1JrGRu>-W{L}^o
z9>F<DV&Vof6+b3Rp4E97b*b#g{D=GxyW<adB>i~PuQczrS@8z1htAdKZJ%47-}!yd
zmil#df7t>K^!1oLOZoCCod21s&Iy5y8p)ieh2p#-CQf|xTO`gi;_-)4PFGH8p*Z7`
zT#0E9bB$Dre%7*WOp$-ZQ!9G-Qu*v&=DR=F$j(?Ix#EWD**n)|n%_GweNk%~({|?G
z#`GIuC-Y>o`<~t8YYy6`b60cbtk<tvB39k-i(PAdYvIb(rk4XaZ(msBEca~kvmLvP
zD<>~Lp>Xw&EbFfcyIU1L=p0l2l=^5(?al)Rg6}Uh>0gSKX8SI+LMBsvR#sM6?Aofn
z#H-zny$ZoQ%^u%7I5F;X*IcPOu7|e|m9DyQOqZ22gzrbm#z<|E%ByPcnNnV?lPdN8
zJj-VB)c>+uUrH|fwVp@%W8^KBtesirb7$>K{;=&tS<C$ASGbH`gnwX*7TP8m_Wp*D
z<FwnewWV_({0SDir={^ww)E*)C&QehZ919j;<m-yE#f_IanU-t|EPjU&jZ`T6*1Q=
z@}?{-c)xQ;*~SG$JkEhj7mM}hb8E%Mes5ARS@CygO|*1K>AgwY@2DmuO62wD&nj8g
zpy$}^n0Y>Q(VL6B;b+`?k8Zul^?Y6MWc}pEeffJ$Y-+!hiYR`Vw)if?+mNWtbra^;
zwSQ=4VQl{L?vYnntKy`o3=N4kEb|?-0$==@dt%a-b!X-%7BYV1ws!xdzDrN>?YXiQ
z*N$>8`LVA$&&8&>=2q9WZWeJvjSF*G4)*VoF=!9m`GuqZ@8mTPI6d!tww#yLQ?vX<
z(qom*nYmVXH-9LRKk2l`!~cBDqV#F=j5U@|442_=SeuaPBVhS?spq5?o*xpeN3yqY
zC7z2YIk$;%f$I#FLozpoPyGIodFID!9k~luj+H*anwzHF6yKsWS*h2{epBEt$CVLB
zU&N#KY@Q@tRe#9Dz>vqzz+g{I4hhZ6#?y-48+tlO*ippR_M}RpkpqXo$JP#wlL8zM
z7b<lqEnONa64kmX;kruj;}<DyvwWt+e~6Fq5bf6zuWR`K;Mny;z0&5pHteyT$tCEd
zTKMqUy?3|2&)HY|?(CDl@3+5acv2Yc$n%)X%HX2o>Ny6J4?Q`0a7t6=8J9xSu56y7
zn~&$o9hGtZBUpZV*KRc?o>$DbuiZ`Uo_@M&W@6IO!rnO#KP_7NoNL<5=rS+WJn?X!
z==G1>UOo8CqxepW=j7D%Kdk2?@0b;3&&$5WCVP~-J)-U5;xl&s8~f%fOk}-mw%3)-
zwsy`&OULS?>DMF=o%Wm9^;%4G_0&tt-q}5yJxwp}iO7zv=VF83G$-7s&YI-G?EmKZ
zw$GR9c08Tg_jmdn-6;h^Gv?h-eQ~cgF0QE8%X7PJkonx33pOMNx4mBb>h`@!6RM5o
zW^VWIN|wKLFLOcO?H<Fp$I3;g?rkbDOfr`^@Zpuc$%e|Zd-<ET*lPO3&CpC-Cg~Q%
zc=`0TIbG2g-p^e0cV)7r@S|R>s*CDp_H^Z`gnUt0>Et-QE0M=-<AO(OVvBbiHTZu1
z@<D^EBDWc?TvnKt>z?xKp_zDBqRHJX<F;A6jLz!%V(;^+_2Qq0wSN2Cdu%J?)s5PQ
zF7k)?enqrR5aC@I8nora_baNO1U(kjnr!^LMf;+j&%XF%_6b`y+O}+YBX{W554k6@
zRud*D&2I8B{wHYko29kZ&HQ(h-8a$YFML)DB`~Bj)T`>H{PT*@{HY!2)3GSMP;ZBo
zcU+3+!|PkCm6Q(c`?)9UtI2~@i~fTQGr1>h+SY$!w(^7br3pLP_BosiIavH;>Er|U
zH3h3?1yoP$>HgO}L*jiy+QgzGqMW(YuiSFYkPj^Sq_|iq;)ef`o9lN?5(*7_<)N>~
z9wWU(;br#>vuU#S47V+VuCmM92cKOTDtdUT!*dBY=DEopzpc7MkG=0{F`Td4?f2=h
zNN2KGR>Ys=bB|0yD?4q3l5)}tZP)xk>wzj&os62q%D^y_7vow9v=MJeuM;vDFgGl^
z_;Q%QzbO)@H@XV(9C;|G(2y*AM~rF8k^qkF4jv6lJe?P~ZVYt~nHUrK?9%VOf4TR3
zS^IP;|CZoGOU{)1I^vxEOZ}JgtvdIxl$)Epmrd?GH}C4ayZPn&Z9mD^|NkZ3F#Th$
zjJ1JkvQ?j%Picbny%z^~(ptYPt2m=t!2Fr>&Jo>@72d~gbKYswN_KR!>pS;qWzz%`
z#l6Z)7FFmRTX4L<UE_Z5L4j4Oh1W#*z1=*IRSTu8e)3{k<g^Q>b?$ptKhx^Vd$P<V
zJ;LeulsJ>n<w@B>->&VKU75#wRp;3<nP~Iw!|Ihzfp3#iJ&otSWLK5DC9pxt^h(i8
zwb@do%94dfffr6yMwcJ-@Myam9C^WO+qCf3lWVeOU-9aAowje2!`vGl<{Y~+qNiS3
z^G?Uh;tt2HP2D>etrg?Zoo@c+&Wf#Dmdt7`yn99M?uk61i-LEr%$+P0^pc^+(0%h&
zR%dCs850F0w{p$Wko5NpnI>jZ9koaJz}h`oXI5;}`!>%gt1@%z%fEGuF#+p>k8hGR
zeWdg1&7pbX$!pdx?Dux^uPxYifcb(>znJWqbvMJVD8^lxb25}E*Mq-BW6S!$>q<FE
zfx7d*?NCnCkX1T&AnH|-O^M%xY+v`^m$Qw6nJi`&seiVYR(ZGVYR&7F(p$F{9Zg7!
zcFI2=QnGjfCtJy-^0}KAzud6ibfJ&XwamjC-mz}nww&EUF8XhHIERP6WYhtbeO8H4
zj)&6Bw|_XIc_n_u*&6{n4rkBWl(MT)r^(;_^8drry^m<FPI|ue^R5Ne{wH2d_^|4M
zLe8ZvVh<NZ#ifX_T5t4ri)?J(%k{0p-7Q0l>)oSyJ-@l+5_}H+{^8(Mp6s7ev*Lkr
z%^ALHG5xPow^?6Zberp2;{GQ84=bCPoqqAm?|v)(#V|hG>3)K|{?=%&OA-kWpJ$iL
zmc}QA91Q;vb<kX??0&aaXnRf3!@g<dF+pe6JSvMn6Jb>?KXcyhsy<t;cvhjLUFDe@
zFO=@(k4ZbZVcLADPt{Jl&#w@?9Q&$$DOYFv_3MER5?7lRBwOWZbI68ztDaqQFyV4$
zrfpuJvvGIR!8HeGw7h+J@&Bei%dk*YB{41;MZV$*YZp&T6qvkV3HRx1)7I>kD*OC-
zck}&U8k3q*6Bh5tUcAp&;OzbHpXbjhc*=b5)Vg9{F1=r6k#)yZg(@d}Qp~Dzw^oYL
zPo3v<%EtNjo!~9@jw+vxuh}|oSvGOAc8BMVq5@V~A-N>!)*VMacXpo9@8DVIka_sc
z;`)ETI`7?@__&xqspj31|026Cbjv;4G&`cL^Qp~J!>Bzj-8{N&6Cb>v`PXLVv)SPV
zGk$b%xf`(Txa23J`YCye=(^Ceh4$;iHRmpvo)u>KuH~#1<J*nd6N<Yvru@FMAXNSJ
z9)?hL<tuyJCeNFk9mC3DIazdS#hd4%<yV<)<`l+$4LMu0X4CbVQeg{Ue&0B+=<uF%
z%Q=dBq!KSryxDOiYr!^$1<p3}xD(!eVDhxMd*$S>G(W!GKW154HV2+RIX~{uE#s0+
zFJm9CiThrx5xG$>`LNFQC&f!PpT6-!_R-CE`%-geomDgME<dF4lIx`0d9$wqdwaL-
zU3b!G+ZBd+3aSUgj@>w-_pa(@d#uidy0WARl5&aj)(bw1{1<!7GDf}fS;55pPWr5;
z)>m>KE$A+L9>X-}III5F;Dd2x?W>Fr#`$e~v2=#k!#7Glqe{yTX_uK7_Z0c<YnQ2P
z@%Y`(>oS|`%I@3izb}1OcVoMJ)Q_3Dubvluells{lV>yUoE1AM`(cvzZy%{o4ku@x
z5MJVRF7NV8sRPQtD~{F5u`buYX;B+B`6cr@6WQ3EA2-}9Dp9{vopPyoQKhISr}~~9
z+dgFpC8e?6*0{EAh2OdZLi+NPuWn=t_4;`u=-$z2>GXvse?6|5ddW0=*FU67jzPVK
zIXN-smoqXjEW=!Qi&m;bMiEsYg*$|UG^98+C>FHv)^>B`<}zR3Ewdh#_h`Cl=843s
zC}lW^G;vL|`C;bS_Vz?tSem-(|Hb>4__x~qVE(nZtK5uHkUO(X?%dqn&*z=CE&uoD
z?_Zt=0`WB|8(tnhx~zRtdv4pCl_8hr7^bYM&Sl#!_?^XhpXSsZl3%Qy?tk`knRztr
z&eOoz*_rPjFUj6_vL*51x05F#?iSAFxNCIh!umT~gPhEj=BYGk96uY;?zo~-U&zsX
zlB4pw?>k@Jv|GIEl=Vcb!!>I(t~_(&IlGh3c8cV)dtX+DepmC}FZwmvHL7`EK-<Gb
zw`!i;uM{X0Wz$`}l<Re{_PO>$F(03VPCZlOAszTMSf%H~hQkbPcVDHvJ-q8~i}4!6
zkbmvIy-^<>%8xGH;;p&wvCIw5;u((DoLe7e{0cP?iHqI!)}~93_h+fpbcs`w-4iu@
zm3FFD9nyO<y)`*qDcp7!YfFk@+PC_HKRp|pZ%#?6{olwh^4yK(#}AGjTg-Hp?C9{<
z)JwEHy6CL9PI0PEozsh+qc`^-?N&bIVbdG7aoV)C%m46Z@t;b4k&}4t>qK=OfjTvp
z#BXRr{m~V9Gp4XJFdP@eTBU;{IR$CJA5x`H4Zm409WMIM?e>XFE*nFS_uW+sy=*Zp
z)v`_HLdDAN$7;SZw>1kr3!=D$vz7cOq))kcqBGjdd*OxFrAt|7O{+ETZd_u*sb)XT
z{Kv<0()oKngbVDmHkR_fw!={0`1w7{@7B+2(sy36|M&a2J@XHPd;SZA+jxu{1$xAu
z1Wqf|Y+SK>!;!^Ghy4~Dy}MFvW6NpJn1h=tY?L2IO3wFQAnL|+`(upI{Nt&T=hZ(l
z?GrfOZM1L3gN^#C7y72B)L1s=Nfg$&?uqYvEc&r7G-HpNU!7^^`iGk$>S7scoR7UO
z+%wUEf8Wd_vn#w`)Og$M)0N9V`J=DpZQ9LizKe@eDz9xTS$tyd%I;T_tRl94z4Ty9
zl-$G%Mythnuda)l)iSv^-u-T>r)gKS-hmTSGDX?Dr$uRGHh)&F{ZM#yRYogkabch-
zSMaWdbBlEPdIT*)w{7W4yYVh;>BS#w(t@8JJT{l1nQy{U-C5pI2id(c!*jf4w|8B2
zeN?=7D_^?TdX5)+rMH<K?CX8ime=q3;><P8IU7v9ZR0Pw{W|B-;WLBPx8nSVv{l{D
zO<!|<^mVE_H7DFVJ;Yl)JAd=hr*qU=oF@BnD%P%V`DXQACV5-t$Js$;KT4RVPPnUe
zx99Meb8l`|sCif?>A#rDa96G7)v2`JZ{HN!R7y-PZHQSHSZLu;|K;7g&MiGGsmb2y
zuE!p2i`4${Ehj!eXo`2}<!i@!4=q{Ay{LJze2G!m#|f$7D>&Y*F58*q{UXb%V9BcK
zT(dmZt_kmZ8P*zg$#UY&1s|BSOG-}fYW%lNXT{Vg+qWmovcFuMeU0_%>YbPKmQPfW
zlQy50t-gCxDXVqjG_PBG4sW;EyEjE_`4-i+Prj$SZ`jll^Pt_ANuFbyq|C3Kye?N4
z-L7KaDr)-sMcd9v>q7nirrf-~Wv3^@?JG_P9c>m*TXjqA%jTHH-tWGPy}Hg(Z{_g)
z-P8Z|Evt7pZ%tdjJo6gMg3H&Btcl$EY-{qaj1PTkKmH!dWNrUw#u=;raz|9;gVsd}
z48LdF6@1X%lh3bV(ZVeBF7m%sUgWDy3K0k0KLkpKi|%PIPpt_*`29mItNj6f)s%P7
z%#F91hKc;)u%Gr(*zAbgyVO4+2hV>f=49VnXffyI$|uIX;uCjYuZZs4%XT36x&EdJ
zqGujVX%hX@6n<VUxVhZbZt;WZKSbNz);DSTXwP8y`PVco^^e;@b;IQkw*QcIue{rn
zuw3M7(Am|C!=IV<x6fVYKl9!^ul!>MZ+#qhA9%t2+x3{<i5<%xScQr1X<sh#M{f1m
zCuWA~igQFuC+5uyoO5_r`PPjgQnQx5JLPTuz9^#T*O>{?>~q)38qRTjY*5~*;?gwt
z_4#?**qq;;@5$wr=FJXM?X|f4yI`B-o=Wu#%{N#tM}>46id@*{UuSUQ*V#qWPR|OA
zkgw@&&iTAr)h=IZ+p6vt<t#@OICA)BE#cXt@#2PfiH63RkjFP3SNiEZtXOCerxjS>
zEzkcUJWIXa#eIU9EI(&+OzQ{5bvIx9`=xNb-E0krp{VlHu1E3I;RcfigOqoBChca`
zESFxmGn(VRLrc<>q&I9&N)7!swkADdoAft3K*i_z^$os<Lgy@*FzG{h&itLdVXwCE
zy0hge?QN@8;c?%+XkMI~M_<zK0-f)6O0gT)oscSPDJ*+>ZhzRjJ>_%HwHO|H&ZtpS
zX8c6<&)WNrD<|8ZEjM~nZP@wSpgVTMMrr*=b$eQP;?z7B+Z4`Wyy@{Uu}86O`NeMc
z4~GAa&rp42(I=2U>4$K|lrp(P$LFqBbY$#%B=NwbU(ods=P9oH@6Ijpf8^;kN2X?y
zQ%zG?i(maN<!|aG=UR(<y!W-v-CQiFs3vQub!3OY|Auwhd_5eOY;QfbcsJ|S_d6V&
zh0oGB|39=o_WkAU7i!zT`?(*B`PZ)8WZQnz+3C}a-gi~(oi)Gq&Aw4;Hvit+J-Zf_
zuKsZ2)ZG2g^B?`ow!8a#?x`2fGL|YYT&IanPhTsWa^bK_?VQrGIU;^V>9b7LY{Xd7
zH?sE!cWt`lCS#<0D0u#v%S$3ANIpM#*~h8l#qp0UncUX}b+(-Q!7_j85yv-zHBHt^
zj=4Mk%|0<l^o`LkcBhE1nLZX=hvvG=9Jji|yiYjy%y)^0Dsz4_+^8^kqmt-<kM*0U
z&C{o^eD|?3*RtFWzOHz8D_`dPlN*iY_H<>Wls-MT|M2QME-Q~kE0*U4ZERP0vvmF5
zFA+1&32vFq$Jx9n)wu5Nx1aC6JPcm*{nsv0>-pa@|MIBDmBsxLUDVRrm*H%++uY@O
z)bh{uhtJ)9G1L2fx5d>v`dr_{s{T)kUw><haAKqHvcP=~>f$o$#(D{Jo%QTBx2?(8
zEAaI5jPEm~N@g%GTI6ZA&1K~})kSg(q+J{Rd)@A8$bH?kf<wS<V(QnJ`x6crdH)WI
zWfHyb$m)9B(SYHuv&kNocX<y|-KNCeo0(B-Vz}X5{wdBVpE+8~FHM@StMsF+%=)rN
z(Gt$%&qbnSy^h@M{;?+gH}Cmx0rk0MzrIdhyIOvi(T28|j@z1n+he}8A7T0Xyy``s
zq3vPCpE`Fd`?eqWJm<Imk9Th$Y1H1(T*gwpKg`E@-)ZN*#Y)bq?8P7JQ3txZ9Zhp;
z7#SG2nHd<YFbBE<it@8klS=~N3tf?_c(etRb0>NGA94_BJ<rPGQez)*c+r~!EK<Uy
zkCg*^v=^yJe0R2<W|-T~!_4{LD_l+Q592?L$0Dl?Z@4_xdG~UT<^3;Z%ktmruV?tS
zG1@^)x-#e4#;NX_?QdnuIBp6Ha@D$dglCtx_yq5ov*C`A;(j@{*mZf4*UisGd0U;?
zpX}xG`No+S^CSfNqJk>7EuZna`1Z%vyJ4Rit#5hE*Vg^9K(o1_HE&CHaGP>+QK?VF
z*B|K_HJ7hDwYBtSt^Iny?aa5x7dK_sy1bbA;#~g=)$2Y#?x;=?zQ)!k^s%qCPe!Xf
zzH`OXz4l+Pby#UAwc9k_YDud1y4w4VQ(Si4+8bRZpVD5(X?cI&aaES<{?=u@5ux`!
zNQLc{?OA*$(?HI!{zNwKp@a2&y#*G{bIaMIHr}@gF-U)BFC`i9C2`rg*)wye^qhNZ
z+dS`#GZ$;{X5nI4?qy=j85eg8*KnRTjOl;)CF)fnn{KW1<@gs~Ota>>p6+~+#WZWC
zt2XC~pXKT+zoIQcGRg_x=EcOoz{1MF;D#BeNKpuGwiT&3Cl;rA<`t*r6=#-YmZf4}
zi?lQtvLM#DM(07&#BDz!k9JMd-@Wu#l0vUnl9Ni7)8+4sY;)3X=9W!Aw*OK6gQL@D
z{Ya?!<}UeON7{#*P1t!x_49K(pP#cW|Mq-+{(VN14>ruq{ze`zRU2*hPT>tH*^#v%
z@4?E(+O`yLOD&6}j;Y$}JRi4NPvMb{eR`t&vS)nN@kRN;D)&p-57`OMyYyQ_g5OSS
z{zJ#;ADbTkxb!h}!MRlZ4^xcuc272b*m>Zq#F@Uw9_w?t?@yodVb|-b<m@+{3w75v
zuGKl@*J6An)Slz~^tepk)X4`WTC8^+i`!YPb*?p)J3Z&@#IsL5mO5L$i(C-1eBtj%
zCY?V1m75nz^-4rM=lcEo-%1&lvrjwHp3mpg6LwGkwIuOCl7q+gZ9m<D&oBC-@6cA_
z-zzKVvsGyA{@79}joZJJ<Fj}ER$g@K%bk_x6Y{6X<t47Xv(L|H;z3hk_KX=8ESfWS
zd!;4`N3+c{-1FzYVSLhj^CKI!u2OEh`!&lj$F2X_jki6r*C+3?JN&JSeF^)lxCN43
zuU~iyHQSk=TEuU_x2|;2Tu<w69bWm)SK%M@pA@8I`o2=|e`NLZnM?!klZEp+Jtb88
zIb_vmufM7<V14Un&yjOJzdzn=(p)cS)2iaHeAw7vr{t!v=Q9OYf7=$wq5ZUQvW;K4
zK_T~c!P9#e%_?6sD_?N2{DigF>J$Q(|7V@Fd8)YnxyEjx$U8^(Zg3H^e)3Vqqch3n
zPm@mnl!Vn&HO??{A7T--Tk=_=%=wS<6sO-3=7;PoPR2R+y{ntAQ?&9vY6;Z3bc=Yp
z5Cg+8Ev!8YjMNJ0S<H>(t_ZohHBS4p#pKD%Diax+ZIt*=CQo8;YWQH}5;BFwfTOLw
z!ozP;<4K<xGZ_L`EV`ogHE{K!gxc_xYr|YUI|A3NeV>%Oy7sPJ&il7t??vCP&ab{*
zl^y+Z|F@Yl&-n1EeS7@)jcxV%z1MEN{`h`t`Mp0sb_%<gbo)QM#P_TI=O0%)`IFDr
zOa0ZI{?AYT^^=9cR&Nt7t?Z0^EHk$xGuBG-&9$3{<7Qqn`Ib3n_HnD{FGCKr`TewU
zdOXoe^y3PfbI+S>^qv?$F^+nyG?(v#gna+iOq>48H`ZTPk$eBx`000+pFSd8o>sAk
zC)dX4D5sxH@9OdS`q3og<mK9$ZO4pDxD2|V2=#iGu1fk=ICIO>Pn;*ZxPFFkHETVz
z4z}Cu;=eVoOm6<+;wQliKE})yE8cqSb4k>x&!%w;3VZ!dS1k0~y2qnKEPId3^3O-P
zyi0clq=|jrDRuo-$wq^iW5qQw>S31tY14jZEcdXCw-bL+qqu&;Jy)stIqt73miE6k
z&71H}OK|6{<Jm6ny=(4h?7yB>v(hh|<8f(>!rrfsx*EPPNyR^VtakG{+uW$i#b3;R
zR&0)VUNm?62T`lpd#)ZoyNL78#YLNsuC>dXe6p;!+2=^g8jkk08L=mNj;@ioJR@}O
z#{`u-68&t4b4w@o`59*&Qx5FiZs@ztIQd#v!5OaOO&=flY0vJ^ydO7rU!L{l9o~m;
zAD$SSSY5PLvN!VLu^p1zP52xi9bG>2b%Iub1fQ|!RrX}Vj&14Li*vKMs#oUrMh8}#
zEGulu+#+RZFVTBcdiOS`^?b&6nAlFQx#n@@l+^Vd#;FarV$-gin$T@?;<Utpy55p(
z!?_!?zTOgDEw#<$$~FD_n|77Gz4dguiNVkCJmd0Np|KwmR%m5yjoiIDY<6LWXxs@C
ztt9Q;YphQuhe(y~y~#Q2&as5(8$IQ&8xKF7xw-qzd`06sVX4LuJB);KS7&w4(p6l&
z%2H&F*PGl_gPfbE^kSIiv8HK=+zge!le<;r%+Vv#8<<V!J=8xVvMpk6;Od(p`x-<v
z_@!@cYM!WNy2VcFh(W}BLtiJ&Rnz8)_sooT7SnkbE5gJ6*n38d>eAOU?vzdx^ETt&
zDz(dMThzs^P8@;S%4gFu=1TIOdVger^*y;3sbIUr{B7wjHJ)DUTfCI??W-r8ysYUx
z<Jr_4nVh4Sass>Ry=OTToXD2@!FiA`hhgsNa`Whu4M*gjO<xkYwn1&v;fI~ZQx0*~
z8=jTCBz2iBm48RY+5&DPws&!j7rIm)hwHpwu&<#dP5A7gTOLl^PK)qa9?U$kVM|BI
zZ^vu;KD_C9!iCpo?lolJZj~Ds&MjOSXZhutvAM09@v2?clP5lX!rQ)N(v(QcoLh;N
z#h)D5m>Pv9imc}4fAV<CB*&gPOmX2oVTyaB+ojL=<=ja#;VR$d946uGSZtP4Q}j!9
zfvTtulT>rW5u=N1FLos#w4ABj-PRsowLIb6Qp3%>eqWYXFWa_RSTHYLROo(3io%x{
z9$SufsZBm1C%Tm(f9)yJlN0}#Yz<>eUbHfM_f_5-H!|CHDO^czeH{36@14AEqluNX
zq^5;VT(^T|!Q!?P4s)DkUs;?;DXc4!Fi|$1(pEH2U`^w-tBLQ@X3ffVJpN(sw5<y-
zPtYy7)pptN@s6r@7miPUZ1sJchk)R=6)L&4u__{)W=%al*Xb7H8XF6>BPU;aY|DCW
zW*~Scc=E<6+M7}jh#G2#o~YPfwsl*V|1n;nEk0*Ec7$ol8=e2o*w%e2CE0s!xyt0j
zcdjfy_aa2=Ra)_8(Qe<_*2QX<zf4-_xIl!};oQ5K%-YXSpDJ$KToiC|Qp!e$h$pIb
zXM$GgNZ1{AJldKs_~NaTQ+`j0>+Hb8oPGV0M-~N1NR)`*&As5b>+%Bq3%P7}9m|b+
z{XfNB6@CAv>+rH^Hxs)rSma&c-*I}u;XT6gRRMP283fqPDi%s+A6F}UWc@;eyF7Ex
z?FV`LE-h$(X}Ijs!oFt5HxrLZ#ixFdyQj@#uRMSC%vJ3+-(+4#Mb`En`^~d&+WN~C
z`wz~4`kMEj%zcJAr~7o9^PezxG<R_|+n+Z#4CT=@+51W?()_W+iWN61em*+;l<oc@
zcAkAx_a}VF{o~r4|J<4(L)ANZDQDNwsi|!DPqMeZ&})AG%$XtARR6M(cl(jd%Fc~l
zXQ!rmoHnuW4V!&>@kNg-4-PN-a^qm6G|yHKC*yCW{PiavRR6SVelPB9bD&(}kAi*5
zhr=}|9_U}TWwSrjpH*|@!FrDEIqZC=%$nsNuUBJM65g*`w{Pyl2U`PAH#N^p<K5gK
zy~D4wbidmC<GgiE`{&(BXS#Cu_S$J71?QyIKELsvbI|`&uw?YgUbnM(mPRIZQjLNd
zW-q3H(QfYVVx0dlo!9P|f99R!3;sK=KX@E@I(T`<!CIlhcP}@-eAshfv*E1=&wsEr
z|NdFYF>~FXFHB6DVKF+~Ja&qi7RMxeP3r<*>~B<3UF*htYuijkk=?Pf7HfiAEwc2h
zUmrDIc05glJ^5%F2gij=v3Hu36mqtPbcyMA&fK5*VY`ZS)~hoG7DD=F(NSH>nc2Q;
zc-7r!<<5$@({rr5ob~bBqsCfkAyOsWcUOHp&9?v0`m8^*QjUD}k#0Y^(L#u?RNUPC
zP@%Cz+oHK|(<E+pvd!+Eef#0Yw3Y~-kD8l8HmABhRcBlCwx&1U{0DDfiqpwuORLRF
z<3gpG?(dZj-rJ=+`?MEt-DJT%!EHUNF?{D_CuN*?-puN}$>Of}p@K(uf6i^p{I-0*
zJ1={e|7SPD@`?hh7blZ;{G9W*;GoD4lj%zzKB#@R*!}0rm$E_nfA_rXJG!>dk)QKh
z;hus8YtCD(2#vI_x}aO%yz5!|La|e}>m`NXOybIT{9xCHl7gA`O3b?sIEemvV6tw>
zmy)H6|C_X~eqnO$=du2oZx3s<wC(fas}-NObcrb6HKwm228F5YThem$-)hBMREX@j
zlXpyN&E3MU3z~Oblnt|Yn?A)#^!`uTwC`a%_qsnmc{?Sr(>Zdt<307Vl9>x5#2hch
zDnuBb$ojv>*JRGcvu_{Wxf5~yT7=S~Zm%$lX^)guUR>U`;b0>7|1)oIf1I0^;xt1d
za_{vsGRKM?u5E4?<Jlgu`)TZfUfs0<^6Lxk&0446rK)||Nd0<E)XmCHW9_nycV_Kf
zw(AR1tW4$nWl@ux?BzV~-u0FVmA{#GH1I3WiuJcegHH&Vd~RD)5WOe1Rg-6eIH$SG
zAy4*a+NYkFP3x{(XSVXpxw0<X<p=zjxkE4QESRS}<JO{y>&|_3oESZWkN49P#X|Gs
z7iaukGQ>8iocA()-n!vdzzg9A>@OE3JFjeSZt9WQE}!6*rnDgU!sj&+#U?Xq-oBoq
z-t&{sOmf-OJ-659rOunkJo)Rgh~vtPocn)V$?jV8(eaPf@7|-`>tyY|X`ej6?O4UZ
z!}$88zNYXTqiTg4HftBIlh`8lHpN$F$>E;6M}8k@@?V;r7GS+dIcDAJS4L^=VNoCF
ze=KhmS?7B7ZEZuKtkyBN2dhQ%wi)H5=&#={u3PdTX8sd%t=`#x&T`DSlytNyaJ6bF
zzc9;-LoL^O3PQJk_V=i2-`{m1a!d29juYOGmsYsxyXU<>^2PP)e%b$p6T|;DuhN}T
zZ5Q~@VyE__?-~~#-RgTk{X@3l&WndLBJC?@^-bD7eM{UY+qDvRy4Uwj%h2pyQvA~H
z3(xF1@0PQ7h^%H8USl@d{i2`F>!mRpF3+ygd@r%{g6XVNclVk+lK9B**vUgJs_i|~
zhSiHXOXdj~97~%dps;uM3&oBb?);ah8>VN^UA&^`^r`;KC*-8QhIFkBf4N<&H#Dy+
zRL|C_Kj|OGuT5{)Klp$6i^R^Bm9G`PoM)CUw|{E>HP|uu=H@4_7CSAAvsyge?fUoN
zz~IP+>P#Q!O8MTSSC79f|9;V8nR880Ro$}u3qPd%Tkl_XPM3S6{%77HPFs_Y0#CnR
zUZNIhweumz{qBd4<g8u`^s+C#&-_T!)@19k*WbTgFcEmec(BAZ;N|xx`tKHQUcJ{W
zRZt_1W9_r+e||jO=d@P$;3Dn~D?)Uwyf5yu<qX>JwR(@{u9NO7-x!EC?vlCEuDO0b
z(}$3w-4RTSq}J!$e*bpyDH)eD5)YsHdW77%xSR8y>8wRpgr@w~`n8g)Cr#|!tA#h6
zY<9Pwn^>0Bd;b0Fg_i9rF1+{76JK?KQF7kx2P@C%o)VoLYVY!SqxPRkwP#mcG>&}5
zIjySe>7BN}J8Pa?IqjaIS#r`d+}yCfWf}j{=>jiLrGA#`tFhU<sQnqc%Dfn@d3`|)
z_j^tH%r1r;snCt}6Tc`H<sCkuv;KCbc4@G2tgmi~aHPk^FqcSaTkp4Dgjiqh-qKsg
zw(7fvi+m9K+(icc3po0h`pxsO-Z5iQj91}2_t-lg)|JX{ALU*-JGVsj{nFJx7~Vc~
z&(yc?yMH`G-roQE$JJZr-(M#EgL&7)dVllJ@-M^wY2Q62fBCKDq<=!cZ9O*Rr5p}_
znf>FQ=j4}#RZM%`C*&9{b2L6YuT{@%E?Z{si`QRNUdlM1vu_pMc_C}{tgnuFd5glm
z)?dE7;PQgzz1Q03{$;#YCA2^Lr)3qJE&tTd)*Fr(dVZBDH{!oo&GPHvwFCQI^JLwp
z`8|yfl(%J{{axMAEzoxU#I<He_BQ<EtTJr0DmGM-Tt9WE;<6kDj+_U(q`hRknDum7
zW7y|LKH*6!)Q`CH{6LcYq`#`OS$8ko*|yQQNqX;=1^f0aeB`qAcIaEpPa-?zoxH><
z)2>%ApS-i`<iRrWXBU3`l{@lYpQ(0B$I69=*o38`Cmxt|w8@}RE~@CsmEWn%hL)_?
z)D&j&Z=1=o`;1F8ul|}w`D=&IOFuBbST6j>l(R6Q+SOg9UU+rPflore#4_%LI9z@>
zUwy-2(*+eX;||_Pa#;7{%u)-Ljm5%yS8cf5exjcD-g(c;Q*xEgk{^A4ZOawBuJwU=
z)~D#dTehs+GyNQE%{8X5>86aVud80}Y3mZ%_w8cQj79U$wZGI`Q5dd~eTCt$&^HUs
zy)BM=%w}`9p5@vQ=+V)$U*qJHB!kG#GuLwTcK*A=GQISYx5E}ag`l@}4a<Ji`bk`N
z|C&(NqQ>vzFJV*rDscb$4G%5WHi@j*5OrgpS7q1t#7kV)6ArvR|MhikNvHRVbR~r+
z3o@qIc9+F*vMs%<@=|rn<u02h|EtF{bGNcTRP@%9-Lu4aYs%b+h5gr7KlS=Cv+>aB
zJNf~^lbJHE?CQ1MA}Adt;PEb9cv}$5h1)R|JNIh(vsF&EK6o*%;+0y<!)eQd6)RMk
zydBiUIp4T*O?TRT)8oNa4sPL0X}4MLoMw4VyUV}UPvOfyR>p6|Ef;@tOuw(#V&`<B
z#z#GJ`F4iR`wwfXW<S4B{xjy6-1Up!clO?U82d%<zL))z@E6DbSp9ABZ}Jwp$G0fS
zLNmejI>QXrlw!r-hE?2qOg;9`-RGRIY;%8kCc|!Z!{62|-@aezx_aTO#-Y33rYX)-
z*f;UA)=BvXWft;WxWTsLzun&MjiwJ)98*~_IkWK||BH5xpD}ea>s;*Jgns;t{KaWf
za4}~e`>lI!d2imZ{#xl}>^aLaBiFWm+NICW8A7A4=^Lbdu24Su*mlWMJ^j<2zn)Le
zSjZlG#XtPO%(T7Lo<Fmn=*Dc`w5EUCGI2E%+rSxDD%9kb-C;QQ=h;_A%QsO=o-=Lw
z!!d6w+kvG)m%_d@swow#ED^h~o>A^U--1echtKmhSpRcRy`7(7S-W7*MwVMF0eOG5
z|4M&*=5EjU%R;NZw{%I>`@`Fq#81DlKcLKhr}g~YmkZXvsBga3_KTlkll#l<oD*`}
zQ&X<1pL)%#K5fUUyRPp#`7f?>m{?ub>#n$3J+S-7@0D9uZqS{`Fz?voEBCw|Lo6me
zxjpIg?aj~ogFJLDUvz$Wm~)ng+1WaChTq5PqVA|HP1i5I#xC+>$>Z-!x<a&ePmO!$
zAjEi&n@2|fstMD5`->UBj1~Sm&+@Nt%(Yu|`bEBm(m%gygL2)TB5q^ba~6g-T+csV
z;py}{V~T|PRm0iMJOUZ#9qZROm5INpS=v-)&etNSweGG3@A9P!{<?78@>f~#SL{c4
zk&^oTcAuZ`kGza%4YN@eE3=;$njXG<ZnxYo`NWQ+|GoE@N4z<ucWJKp|AT===RT<X
zm;ZY)(U9NS=*`#VSJtkGIxEm?-(2=3KqTUwom1X*<B%{3Yrl`_8eg`oQe^9D$$E0a
z=F7h|DzzPJ#Cl|0V|T3YOcGyox7Rw_{Fe4bJ?CoCTecbBn~tYnh+F$-NAtIxrMtx5
zmP*WTDYL(Ie$igJ&#oEv?cdn8nC~in!Eoz*^FOsM=FNGs7rw~mzq3c%-5k$vcWE&b
z1A`_z1A{RG%F$!VIhpzxLscq{X(g#ekYy0Xka4TEQ^9MdkN@8;{rUx)v_{8y?j{HB
zDGL{M7zWmGoOBT`>D*&jo-KQIooTu2N97-}HHw{Hi#p{G@Tb~rU%hzK#3n=Es`HlL
z@7%Y1ey{lU{Qdj>G6Wp!ViUbAwQ0snb<c3a)r!59ylP@0Q$lr&p1i8teA0IHqYAUb
z6-UK+H!JRVdMoX2=^6EyEjihx_tRSyi%)-?^T6!zlafupeWohzeHG23?{WB7+2*wF
z<$In@O5O43`p)R5hbC5~_^?O&bt~3CPfh4Pp3ijr`;uMHDz`<HR<2H+oF&&I(=Dl{
zsP^c!=If-%``6!;W)ye4eZs&fw({uxqRUU?4&UKud2AY%W+oXFW0F!7JDG=hpSbgi
z9fu6AOuHSJX|zDfta?KA5ih+(-MZiJ_kTM7^WLuae^;t~4`(Z{zB)}tU(WMrD4W3=
z*@rVGpM1JObjn=TpIXK~GO3)++aBh3@A`XS;#$L7PmT4v&nUO=sfj(kSbkQQYTHwn
zU&imZKHvYjYR<hIiNQw4N=3~-MI7^O+nM}T;qaTnZSxa9{<!OCZc!O7xonr;mkpn_
zRDT-Qytw-M|H8yaYk#bNS$1@(^cmgP|NImxrtZG|O;Ns4%j?V0Gfvz7Z+N3Q>DPi2
zcFLz2GWk7&GtRus;qS_;am<<?!rJ^r&ZR5t+J)n<F3f0Uy6JZNoyH>;0WYD`FDyGJ
za_n+h{3R$z-g*ts6V*FihEw>Q6;H36VA!@<@Uh_91urye+<)y{SnuYQcH{d?h7FQT
z-nrNLHV7~-E$LHfee!vS#KQM(rrDFSYFyp!FV8A!<4lRvp0?_mVs>s(pqQQ9$9EI^
zd*)y4wcT)+S#n=T>Wr5KMmt3EW=~NwkW4GFjInf8XMIuPr^UlF@p`7z`6W;JFC1W%
z@-|sCV`=?@M$3%-Xlps$FQ&bj#Kgcbhn2`Q4@vHjIRr?WpBntu548IEGXJ(ITVqd6
z*O=85dP_G|^O1VTF)3kXMqwVd|GaM_rsb5+zI*h=;vX#krn)FxJi`8AVNdxoZKvBT
zKZ>8<vo3yT`~2LQ_xAR6j4K}9R9LrQidyoriH?zN#hM|PX0$I&ROIDN&0KX!XIVk`
zyx2<x@rRk)EWXd?*}2vB-^6IX^4ZtYUfj+ro|$+cyM5d9Nt=^)m+d%Z5dFsX`fSZy
zLzS#JXM>dMIpJ&GZ!c^4KkdDRQ`x6icWyq~r~6oACP(?(2FLpWyw<52HBWaPT5oB?
z!SXgLd*1ymv+VLM4z04@7Wz3#_nlcvxA`6ZmUxR^b;g;?IO1<O-hMd$dS&;o9x?B~
zEz6wNKhCZ=!O(VH{8FXe{8h7D-&}4o3C$Ff<Ptw<$n&u(mc?=5t}PB9qCQS~^_$B#
zzv_2g?4B)8FC4GdI^*5ZmndOXBr);X#6BIKq_~wC6>E|uwRwc^E}j4Wwx9D4t9c2b
z$9a7WH+1#Ao^k2Jl!Zl_uXv9A|0Qi*y-3dJ%p7~;s?)BPd5dSuzT|Uy*M892`cA6E
z>*AeDzkfMZxm))R+r_u#3)Dq3m#utu{&Abl-yJ-EuSxh$Ip!JX=Qs1(l2g4GW=k*4
zn7zniZRBT(!p$`uEtcgRWeXN0b4^?D-Li${vf+sp8mANHeEP}9BfNu6!+%@ismZ-b
zPcG-W<l3!}h%nr*JkeV&@d5X;I;Y!qZeDH8Q_{D{DE<|l{NJqYTuzPkLlcGUjfqE{
z-(TsiF!(95Xr9>GBK=G`$M<ag^O@AG6P&xwx<BBZ<bG)3%%?k(kMQk!Fso_yTxE6R
z3kI@h9S--*YW$M7uwlo<L%DZeCb~V^H8V4PhVSZ2hG(>*k1W|WN#gE1+Zb0V^UT(X
zzqnCnAS%=zg(R>rFi3GTFj!+2>#(z8h?xXk8x~zI6)N(NkH^Po=Ch`SrfO?Fc|@6b
z*KW(yHSxN9CE=v&%FayfvIfqXB54I_9WMWy->-XG9sim;H^w`B{$JUDe5Lan6;d`V
zjXd-y=G)Em_xbg|=hXgqygt63y@1u;!=nAR(lpgMM`iD=@z~MruewI*?DF{^FWPcS
zuJMdGeoVmZ<4=Ko4%5uPvwv91!oQ7egKbM;%~5shJ(UWN6z~7{IC5R7i09vuW!c|^
zn$2ZyCJ9XIU3AJ|(#4X}y<sO;KdV)ZSMyXz7Mb#CT2fZcJt3}d^U@4s&P$|e#u-gb
zitH6X6=gGZdrq+CjoHfrLw%!z*QPEG3jCsEzB}gyyGqDemctRAwqaIxR@|Dk(72~v
zd1mcXhtBDudqvLQTDEMuN9m!ZRqk6w*RBzhTvoLvk~KIxQbtGO*SfG(Gg&M}>^QeE
z-&&Rw)zU5eaJ^M>+s*5*CvUr0^i}Jvn$aaOZeK2myxkWp)VgJN3;7ylZ42vLuX)ww
z$&oom;iej#I=-G&i{$b&pU&D9w0_pARLcoWdvAQnC_YlszS2b1Wh1X#&kdequLOIg
zWt&5C(`U-hnf&5hnf#o?uRcE9d2Q{ilEWq5({}8iTIHClB=V6r)qM5TlA~ptem|D}
zd~^Bj6PgFted9fGbLYhS;oN0PkHi8`y_#NGywrN}+ozA}V-LlJ>sPKSkomUR?X1p<
za?^Q-GU~-^8*fkgXr?CN=6`GIN4;gKXA+{0Bul<`7MwUe&>`@cPJG}-?L|kzzqw=>
zef#L*66waei^JJX^!_P>gX<6IN}6TyOek!4pK_(}#O(Ky=Sq%hD>qeIH7?hR7WpP^
z@b=of3tlsqM!8;n`>iu=MqTXIg+<bJFV6WbnSbar*W=??!|w&JNse%N8M8zG@Zl}1
zS3l8f`>)=y?)9MwZ;o%vj=z2Q=s%N{8!oJV6cYSMQQ+Mg2FbIJ!t-`SA2L|5b(O}{
zz3X^oubMuX7Iaee-?gi|-r5}If01}~uIKmQ8~a`@S|)z=*D*ck4dSAi|5UDI{O;XQ
zT|WP4+J#jgoqfMmXuK>ruWl*vPE|bZ#s8z0ANo0t<=bw3e?iC3_O9=bXY-r?Ix+LB
zt^8_zNMYH5_iGn7eBQyp!KBaYXMg!sdZoYB`^0srTfA<?z0!B9-G6ZN<GJ|<X5STD
zA-swA$F(HhAJ>yiM5QHe{JWmbz_fKTOG($h=YmT8G6zd8M+r9?b6xD8c#k7=fx+9f
zJrxtgzW-CN{2n~NyJ$ZzkC}pOVn^cNOM)qF^EWs(8}sNt+^Olq(fE+fO{~`O{&|V+
zizQz;lH+phkE)#A;~3A}$f@{3SkC-ISoX{J;v3VS9DKcCpTm@<8yted+(!<zA93#b
z@K#`xdrP<b2A2*ofu;F97g`HMr)TIT@O;-`oj&(R(c2$IU-CaQw||w1-0k%-Hv4&_
zx0q+q++$3hc@LD^_s?Fe`Dy9`!RHB6b&6(RsxkQ{^q7}r{?{|PlOtEYjda@dH1d$H
z)7_WHFMs-CXjk~_3eqJ<pcT23=k5{R!^FU#z{<d&gIREaXZMj8WkRZENTu8x91T8N
zWZQP#w@OQ6L>x857wVqs65kWBn#E8t^T-mm=6q-C$Tm==tSea0bzW`Xj|H7x6I^P)
zxl5jZqqkJVvu5U8%inX~*W9->-v7VmKVyQRwq{B|K%kFelN0|hgZYb|wsp<+)LAm2
zVcs!a9ks&fy7wz~Xz&02WYdyI`?}b<gsRWv6oxx4+jRQdjC-G2R))8{*42KolIw6%
z-06p0X}jvCH*G$t<GjAAZ$r;hqopcdhlJuH9ZlkIga|}$eLdw)xS&}5#jKp-=c2dG
zG$*aSVVag_<bI-9qNmcjY04ArZOJd%4!=^np1tfsywUNeUqg3aObCk&$~Cb~j1JMu
zEXWU*3NV<VKWDm&oS%Tp9NWf{S#xZ~_Z*0tb@jD&-^Hwg)4>K)?E5G6O<u!$@}IYV
zQTxt?Nm&=ASy!&QTa@)@=LA`a-3MMZrCc&-Nn~YSo)s?B{?_}X)USzG3uHHQsrjv5
zH(SkoM&#ES$FJYJw&T2OM}tZD_MNjg=jd-g{H>xd^0(!GBb^gY!CG=z6)cl%qz-vj
zuDfVtd(ot_p@cK)Q1ULDyMCrqKDrzZ*39_!|5#;PYI61T>nqx)pX+OT*P(rnb#L_Y
zscqWs8}C{EQkYueo|zEa%=Q1|zSW0fvrYEa`dpfQ@<Q{X#*=!N${YLyd6fLBvn<z6
z*9>x-Qs%I%f@5h#&xvb_i{b@;zgfKgg4TUS4P`e=<t>+)Y}O@Rc`ko=>x9*kMY^K<
z73&)AC97=P^h$A(eRsTowG+Q&t$bUs`okB3mjwSbeOnn@IK!kVYh}pd`9F@YbdnR6
zy*mBY)#I9SA6LIFne_kBq_0zS9<ndDp7c1e=)we<qiTktQ59~2nc<UFKD)o@eNbAi
z{cM7orSiT2dFf9l@0z65NS35K9}CjZmj3c!QWNuvjr=KplBXa0$=7>x+d)hDssGWc
z?`;P*6whE{U|7Y*z(B%@JC5avMXBH{QiQZPeQoGlKjA<D+wHNt-j;0?U0C;kJxOqv
z2$$)J<y=CBD!j9<m>jI%{x(W{`|fA&uJyZr<oxrP-`AyA#jb%rbx!oOs7te4AI(2^
z?##^MW#!M_{rmIvFMC7f8ubZBczLZheaduNJb$*7n&DDKcITs8*hG!B(o9thmM6uz
zPE?F6p6&bYiubf+qn_z87d1>+*56KBm=@i7Jf<S8_A`IlRK;iZ+xNVQkuMQn7ptLC
zb+$<7zuw`)D+?-i)T}$SAZJ%;mYFKE_1d(nBK{Rumz@YWxwdfkp;pzu3M)>ONgDOX
z{hIbVXU{XG0PWv*mhaj3)?(R>EjypjY>DQ3Yjk^l=fCV-C+~;(erl=jNR*hV_4Z@<
zo`mA%ff~OL6-lk$xtu3ZPioV{c`Cg}O3PxNcPLb@x%vC=i$m6S_m+i~MK4Ori~Lk!
zWcwm=<pSl_Eq$9KE*N$gTjeE(99`6>s<S%c;>Ir1w$^~Bz3KZjx9L3m`83Sb!<gqg
zxA#_S9%J>#Ha7B7Gw!|h;=8r=Xz;lSivRV~S3Z(smQXt!Zf$)(?(UAKvb<$FIoBoG
zD);_-(5qk3dRzCi{^^jHOJ4;ppZPiJ?3X=n4#~vc=@gD}-MK{n-|4BgFK_02Uom%H
zwvEj4o{Q2!%gfVQ4i_D5EQ!*ybzKyY(chM_>G=t5K_xG<T%Y&Nl5ez?-0I{n`a9~b
znD$ABzxS-d(KJJ+jXY0Rw7qd%@K?oD=}0B#V#d#0KZQh%m+g1>dEsHV<GRp|-RjpK
zc0M#(7clRk)-k4ewme$`T^}1Saxrz7R_pvm)`&fze5tzemC8T1LF^uH*w%g9xOk-}
z+qMpi=vS9?%SyT%^H{FmHhsLq;hMrzmD>1o+JaW6W!7na(w^)*Pk?h1>!ZD{VlRK>
zqqbPpSv+_o85tPTL4y&P1wYb21ll6K-audP!wv$qm#-bWvVN8Nc_kArxAnf79)TiE
zT%7l|zucAMX};<1lz(RY{}}40?C@R4GTBLd+T6;`@AoD?d;a~k^a0y*vL&)}gyyIO
zbFbrFc%OTD&cQ!ZG#~DlSa;LnsUv5#tbb(^(|RsJtNzn1^-?oe?rb)0lJ;CU<D{wj
zovWPuUJ)r<LX;%f3S}z33C*icT-K^@GJDA#|Dc!q4Q-!K4m6u>Xv(?t*3QkZWtW)k
zTQ~PuyNK)UaM7gBpo|v>s@1=+74FZnSu;0i%2TU{{zhAWiL36sSj8E0e_ce&>9^-*
zuWxx*&wc1@*`sE$pFP@}Z-&fv$Ve!irRyZDJNxa;HK`xXWTZ_u#E0CO<#jxauOQ+`
z_7jfdN2@=vqlT~ZMX_hEm>C#qxEY8$#?C1<Ex#z$EwiY&B)BvQ8vc-q_GnmcuyD9Y
zUEZdG!p#{Tyi>Q`@Mw03<8WN<Aiym+L8vo#irJ0rz7dzNrd+?7v#Nff{lfo;&TVO)
zw(?ta`md$`7uzqoTwI>ap|;^*;j;L5r+1#WeZKSjyy|cNzP``bXIN4ge=PFM+(l_|
zZW6^Zl9EDVsX7m$7`%6!``9@{r+5v=4PHs3Fw1Lf8_pF5A7itdYS`i1H>XEno5d%Y
z?UKLVzW-WP(w3-`9yK-OXi)j;s8umngCjSG1^3>alb5kQboIJONhkeRZF%~JYy7JY
z$GclRyK5GcmNnb))U4f7hb0qlWL@!^RqC!AdQ#}r%+#&B)<*4`Vct7w!D)fGi|=Mn
z+qNvaboT`f(^?}9>9&QDH&;ci&t{qZSw-^gs}4UY4S9#%9hWP`+8i%y+*p<RT>j>+
z%z~P822c9#Mr_HLyngP<JIr6%O+!O_y;dbe?+QFw618m4%JLs-e05h>alEwpb8)+|
ztWD<=zB{b5p7Hwz?`6xgl*^i|b-$-D#%$*d9`{B@9{&4YT9upqtY1E>Hd3CoB_mC8
z(cU-76S&h3YX#5hkdb+Jz=wOM=$ykHO^JI>7q%-_DoGxFwwr6)N7m!hxw>v7c2sz-
zcp$egX~y@SlIs;2Gx~e=PFB2Ip~@<5pcYYkTEO1wz^2g5D<+5IYJ;xLn*Yz~a_W+q
zVkfm8D&?JXRXg@t_%hGI=09hq%Vng_er(OE<)xG~^Wm!J6V3W9E);xceK`Hq;i=}U
z+4l+*IqTVn>Ap2sxL7RBzwY46xXR_PH+DX5k}#{*a@rfG6{$OYYn82)x%2ebLHyq<
zPxW5i@HOkzv|k<~=bhQKQbf<D?YQB0`RtVN>f65TJkMC0KgLPCDVQ*?(fQTC8@^Mw
z2UOk4E?GZ|Ek4~zbWYIbWs`J1$5!04j$0YFm9PA!_M^X_*-~#jzW(4Z|BwAgeCHg|
zR{FAl>#}tZYxAN#mrwA|U-9kB_p1Ih^UQb4<2sMlx9W@ezd88d_-t*({9BikdSjjC
zqn#RJkF%#pFEc%qJf-HaWZ+g+>BH_jZhZM=&UV3l?>7FG68;-(zvLdRYkV*HOZfc3
z+8@l!+?P7Os9#$5^NK&mTW7svnNl5sah1FJ4<^0X^2Pd`$lsHPj9I=Ld?}J&xah`2
znTc_V_bYELs+ODcBCYckZ_lRCo{gp|H>`|un(uX6w*O0=9?`w8jnhXc*KpRNiTsRj
z6xvv1Yo|Q%Op*!`P>I}Cxx;3y$CYAt-LeH?`wI?>%{4F-zr=Oe(EF8fddbO)-aUGc
zlb=_|6xPHXZNIX9LRtDt%bIyQfv=9MevFB8m=kwY^Oc}XLxo<tqTFHb9j-eJzRvfw
ztMUD7KIsqIfwhwvtY?B&R{vpQV6en2qk}7pOHy-*8B>@$>9p5j2a&e(H&*9v-F7YO
zs_Pw7Ct;T(t>P2J#Dnw$Ue?W$yF5`vSnaaYJ+JU4ZutZ4Lh6|UD;<wS&N2Rc{{PuI
zJNMM@*~fT*F=x60f3H%G$im2$vrIXn3tScIPuZxQ=S;cvYl6eJ)5kp)+)9_b=W|!E
ztJ<n)O3?e_ZYyW42ot{}YvycdI1o6W`}P)AS?i1_K4bPog^PQI@3<>Uvc<mJbw%2}
zyKJX<uu_*Wlc{uS5?6lh$sO(sE?)cHU*hK5UbW3~LR00oP21`ssx>r=3w#pHQ}s5m
zc!%lm{s@VvIJv0n>;5TeF03I77+jf_xG#`75^y|Rr13GMSMIuux=m}3$(vX@zRjvw
zyY=D*-nEyD4`+Nk!<&9(=At|Si$BLrSKf|Ypy$>oa%Ix~;tlMUUmpaXX6ih*=+P#r
znOeeAi+}4o%nMED^wN5kypb<eRJ*wOl(ykMuB$BXR0@{u`?96MZ2!WU`X#!oXLS}?
zuguV8J*&4Uifff!_M52xEU49ju4qORI}-!LZWaaxBg`m;7W+P#dC+#H3P$sCYViI1
zO93MP{4_<lix1ohF%dSGF)({oV=UpTGHb?!T+TH9>fH{9cTI^dzODPq{^z4^HM<||
zKinnH=iW?dX34xXPyYLy_chPw+`eC5_m8Q7;kcXBZ8M`A=N4r>mMqh`qHDr2Gko5c
zMb}$G_7*xBSRcHgx~9zbrCeOk?{&X4e%V!f-Az#VBfQWi*MCl0$+Om$I~xNQuCAKc
zabxCxg{7;l_Hj5*=a0VY&^u?Mkn{agwtde_nm$IHSh#M&lhEatZnqqYsCl_NXXT3*
z!B1ExZHRk6uiN|K52wC4aZ7A0v&{WCBlY)id2*Uw(n){Bq1`ygY4Zke4Ygmq_m&rT
zozdLAW3FV*vkBZ!5*5~;*f;n3`AfxIPeM*uTW-0a(h#s(zs)M+b*}XM`NFII3;S-U
zKG-U?<Gt<PAH6eYOv}^Y=Z<U&trs@?7VXPjUHN8l#zNN{(aUV#y}RGg_v`sP1*JU^
zb)Q<+9zA<kV_HsgwM2JD!7}}VRKcdmhx@ge|3(;m;i%v&zflzN$!)^Dzl@Kiwb^q1
zrEQsf>cqx*{!9_;nAD6N4cW9d`Z(O0lbZ1&P4lblv+L0YvklL<2~9e1<d)RCMu`Gh
z{-g)(8qV>3?$_VU+IaSTZkl!Q8I4Arj1y-)A9+tZwq)(hC!a*-e=(hJGH-#<a@*d=
z+Y6?rN^07zOgy^9a&g;l9r?{JioeBCla<>xj_<b23=F3@v5r4tCM)o?5{|~k>!8a4
z0{{G?IhHVbu^zEbN?oA4g;j7vg=VN;kVb^-#rTCsq%=4uvwak;VYttnUJ@{~@Xzy}
z{zuLh?>xCgyk{RWcsze|b=mit^zzNSE%*Pc`OT-$<|=r$!{m~X=*6ZEk4=vHZ3W&3
zHZ<IhG1hWAFr&em>6mLpp?^!qZzeeo4}&YS^RGB5oVqagQg4h(`ia$*s*}#Hp15S+
zM#b#anxVmBDT^0K6mF6WH@&*&xWU!eIsewTs>bPV?EbE6STX6>szVnJv3`!0*6#Wk
z5_fX)k@=T47u9*1h3wPkvb_`Aw(UrEu%TM$;)JwqVO<k=F7+(abTiTy=i-X&wfH4r
z!PoxiOB%m!zo&1d3XkuuX@;qXee!3mTF2>OwB^>GT2W2kLsd6tM@_zeYT}+XPn9Gu
z2W*Ym6LabLn@UNupAV+>%J|1f%GFxl-K4(U%gI|j_3FO8xx(TfCNT26PXF|9?&0pp
zqYtaZpL+M`H}f<`oipr^tUY}~YqHk*8GS~x8RyAy*3Fsxz<G6pyO5fpYC_Z@9y#L@
z%hcw?GZ!bO-`ce-Kl}I;hji7cE#5oM$j)ATt8`DoHl`YPRoD7n)70xcrNJ+!?KV9>
z-Q@6xmA6(+oAE|k`1YoSGQBG&C4G2O>-6+pjB9B30kcI`lQ%~$4dCc;Tj|xFmC<9j
z+jzT=sr<~z=0C4kC@tVSr>W-b|8j!I`Zb!7#s70wdK_TtU8THmuYKO0IgZxT&drRP
zUA}EuV7T_J_rW~#UWvw?-DC0fU-+v->W;4Bf1gIbusu*?XLq5a`0&x^(q~_r-SWMB
z&o_jt&F-)$Piy(Ze;(E1w=#2|S#9gE`o<DB&5`}2m52K`i~jD0qZd<@BoB-3I_vhg
zfvuuLt!e&)Urhc#v|q*iX#Dl_-~12n9V7TAE>k>Fc5%mM_QILB-yYm8!!_CC)%|d@
zw=z2(J#Tnp8FiC;%jQ&;8``JmE~vfOXw4bpZoc#2PU#~%HC|{i?cm(rzxaz-tWR*p
zpCt)L&+sL$d)|K2iE~E*gWiJv2Q!Ycu3cd%?sVf+*8Wz*25uL14d%079=%dI8M^4Y
zjZ#&|q3aInNfM0pSw{ub_fCHi`@ubmM_5igt#g^pqj=O@nR%}kynDulfq_Aufq@~t
zC^01`RW~^&Q#Uy$vADQ^oErd)0x)j?c#3lFKO>U}GZzB`2M2@Jrm5}>3=9mgQ|~!J
z955DOc<Tsa__~HT>U#RQ=?8eDYP&vlMiA)OqEH3~1`&`-C|=U2$-uw>*XHZ!=jrAe
z9HQszhHL`pZY7Wgn8hq04j2nC{B@iKt@!Zi;0CJ(5g;A=pgJHSh~ijutAs&%z!;?Q
zC8|c~1q>JlWHhCuEoNX~xC*_m0j!^afniG{=)wdn_k+Q1g;FtubkWega`ay7H4!ET
zh5{A_22k`P3<O06J_FHPbpb{B6_t*qB^jxCC7H>HgPqZBeEue)cohQ!!)*r0rJV>H
z%UDp>8NkB86Xpl(K44i{bvK5IfgzBEfk78$7&u50Sh1St4o=;n#i>Oe`MIe+`T5zU
z1=x+2c@%s5C@TZQFD}T0FwFEVjqDs)jdra_&M7U<EK7CE%t>`jPEIW@_D#%7OiwMs
zZowJeI5*G<+CSOQ-7d?A)q-%ywQtV(d1;yHrA3J)@H^hngE_EPHuEV91A`<N1A_s~
z;oxBTDS*{z^!W5h%uC6^<?yAI2D`#QnT8coV!$le(x@Yb)pVcybey4ZuIP1_C_4i~
z9v=gP8q6fHQ};_?H4Np_JM1BAac|pQX<-J2H<}C#3MghuC}1_y6LL{3cGK=2@aa6l
z!oV<<6Fr^^jj@{Ml%HP$x^|<$H!(9WxFoTt1XoCUT-&a4nTvrzS%QH<1I3{Y7Fdmj
zCIQ!q)a25Vd>jd@MQX|FO<W8NP7(|ZS}3N@bjE5bBvU}m200pgK%n2a3p094<77Xq
z=0bxNSMh^>!y&@F;!v#Sp+vn3j{6SL^8)(qc?er-!?D_ezr;bmGzVea)p&U22`#;_
zR-nl415IwBUwVTuFfD-r=M^~Urhs&TvLuKGT^j?U85ls7OI9L^FF^~q&^1p$yF><}
z8C0>%#MBI1CV<pjM;m-aHy-_B5QOn6DHv`;G8WRS#FlzM;f8(%2f|c=R3c2pb0G)1
z`_ZpDK-eUdL4-|^*+|GjA@rj~(XB#1X&zzKzbt%K;W&35-FWmP(-Fo`$-!qlG$!yJ
zppI^p4BFYN2&=Z05MdQ0&<F(=`tg+rOKy}CVF~`jEYW?0e%c?xGO=oWmO;FNS)_sz
zG5T3k2s7U_;4>2%ArPY>9HhxTbeE$aYJ{-mWfO)iVAGIi)3Lj}8|^>^gqa7sFwG?X
zcm;H~8=-CTN7ys92g4rB!o!)EE&u3twxVs-M%d}xPuNcE;f21Y7GdVQg@nz-vceYK
z(dY{p5f(gJLWBj7qy#CYu=@ag`53~2qsuWYfYlPj4E&<|1bw*&!lJr07#3k9SBOWj
zdjWk49bt0IdJK~>6BEQ}>=Wwfo<N_vL|9e637=I2>QZO489Rir*Ei!c7931PDtPAa
z&^>@Yu8*)udIyG0==}$zup{6h^s#1yHScy}Sc7CN)I$&oPuYh)=!h`ibq^8d;~s)U
m53fwLaTJ6_xA(&vhPXym0=!w-KpG4g3>gFj85rUYfOr7Y2S`i+

literal 0
HcmV?d00001

diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000..bd9ee87db3
--- /dev/null
+++ b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Sat Nov 18 15:06:47 CET 2017
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-all.zip
diff --git a/tensorflow/examples/android/gradlew b/tensorflow/examples/android/gradlew
new file mode 100644
index 0000000000..9d82f78915
--- /dev/null
+++ b/tensorflow/examples/android/gradlew
@@ -0,0 +1,160 @@
+#!/usr/bin/env bash
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS=""
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn ( ) {
+    echo "$*"
+}
+
+die ( ) {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MINGW* )
+    msys=true
+    ;;
+esac
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=$((i+1))
+    done
+    case $i in
+        (0) set -- ;;
+        (1) set -- "$args0" ;;
+        (2) set -- "$args0" "$args1" ;;
+        (3) set -- "$args0" "$args1" "$args2" ;;
+        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
+function splitJvmOpts() {
+    JVM_OPTS=("$@")
+}
+eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
+JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
+
+exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
diff --git a/tensorflow/examples/android/gradlew.bat b/tensorflow/examples/android/gradlew.bat
new file mode 100644
index 0000000000..8a0b282aa6
--- /dev/null
+++ b/tensorflow/examples/android/gradlew.bat
@@ -0,0 +1,90 @@
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS=
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto init
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto init
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:init
+@rem Get command-line arguments, handling Windowz variants
+
+if not "%OS%" == "Windows_NT" goto win9xME_args
+if "%@eval[2+2]" == "4" goto 4NT_args
+
+:win9xME_args
+@rem Slurp the command line arguments.
+set CMD_LINE_ARGS=
+set _SKIP=2
+
+:win9xME_args_slurp
+if "x%~1" == "x" goto execute
+
+set CMD_LINE_ARGS=%*
+goto execute
+
+:4NT_args
+@rem Get arguments from the 4NT Shell from JP Software
+set CMD_LINE_ARGS=%$
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
index 4e45f42d0c..8bd4abb154 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
@@ -333,8 +333,12 @@ public abstract class CameraActivity extends Activity
           continue;
         }
 
-        useCamera2API = isHardwareLevelSupported(characteristics,
-            CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
+        // Fallback to camera1 API for internal cameras that don't have full support.
+        // This should help with legacy situations where using the camera2 API causes
+        // distorted or otherwise broken previews.
+        useCamera2API = (facing == CameraCharacteristics.LENS_FACING_EXTERNAL)
+            || isHardwareLevelSupported(characteristics, 
+                                        CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
         LOGGER.i("Camera API lv2?: %s", useCamera2API);
         return cameraId;
       }
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
index a9ed02dd1a..9db8835d92 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
@@ -45,9 +45,7 @@ TRAIN_FILE = 'train.tfrecords'
 VALIDATION_FILE = 'validation.tfrecords'
 
 
-def read_and_decode(filename_queue):
-  reader = tf.TFRecordReader()
-  _, serialized_example = reader.read(filename_queue)
+def decode(serialized_example):
   features = tf.parse_single_example(
       serialized_example,
       # Defaults are not specified since both keys are required.
@@ -60,22 +58,26 @@ def read_and_decode(filename_queue):
   # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
   # [mnist.IMAGE_PIXELS].
   image = tf.decode_raw(features['image_raw'], tf.uint8)
-  image.set_shape([mnist.IMAGE_PIXELS])
+  image.set_shape((mnist.IMAGE_PIXELS))
 
+  # Convert label from a scalar uint8 tensor to an int32 scalar.
+  label = tf.cast(features['label'], tf.int32)
+  
+  return image, label
+
+def augment(image, label):
   # OPTIONAL: Could reshape into a 28x28 image and apply distortions
   # here.  Since we are not applying any distortions in this
   # example, and the next step expects the image to be flattened
   # into a vector, we don't bother.
+  return image, label
 
+def normalize(image, label):
   # Convert from [0, 255] -> [-0.5, 0.5] floats.
   image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
 
-  # Convert label from a scalar uint8 tensor to an int32 scalar.
-  label = tf.cast(features['label'], tf.int32)
-
   return image, label
 
-
 def inputs(train, batch_size, num_epochs):
   """Reads input data num_epochs times.
 
@@ -91,31 +93,32 @@ def inputs(train, batch_size, num_epochs):
       in the range [-0.5, 0.5].
     * labels is an int32 tensor with shape [batch_size] with the true label,
       a number in the range [0, mnist.NUM_CLASSES).
-    Note that an tf.train.QueueRunner is added to the graph, which
-    must be run using e.g. tf.train.start_queue_runners().
+
+    This function creates a one_shot_iterator, meaning that it will only iterate
+    over the dataset once. On the other hand there is no special initialization
+    required.
   """
   if not num_epochs: num_epochs = None
   filename = os.path.join(FLAGS.train_dir,
                           TRAIN_FILE if train else VALIDATION_FILE)
 
   with tf.name_scope('input'):
-    filename_queue = tf.train.string_input_producer(
-        [filename], num_epochs=num_epochs)
+    # TFRecordDataset opens a protobuf and reads entries line by line
+    # could also be [list, of, filenames]
+    dataset = tf.data.TFRecordDataset(filename)
+    dataset = dataset.repeat(num_epochs)
 
-    # Even when reading in multiple threads, share the filename
-    # queue.
-    image, label = read_and_decode(filename_queue)
+    # map takes a python function and applies it to every sample
+    dataset = dataset.map(decode)
+    dataset = dataset.map(augment)
+    dataset = dataset.map(normalize)
 
-    # Shuffle the examples and collect them into batch_size batches.
-    # (Internally uses a RandomShuffleQueue.)
-    # We run this in two threads to avoid being a bottleneck.
-    images, sparse_labels = tf.train.shuffle_batch(
-        [image, label], batch_size=batch_size, num_threads=2,
-        capacity=1000 + 3 * batch_size,
-        # Ensures a minimum amount of shuffling of examples.
-        min_after_dequeue=1000)
+    #the parameter is the queue size
+    dataset = dataset.shuffle(1000 + 3 * batch_size)
+    dataset = dataset.batch(batch_size)
 
-    return images, sparse_labels
+    iterator = dataset.make_one_shot_iterator()
+  return iterator.get_next()
 
 
 def run_training():
@@ -124,16 +127,16 @@ def run_training():
   # Tell TensorFlow that the model will be built into the default Graph.
   with tf.Graph().as_default():
     # Input images and labels.
-    images, labels = inputs(train=True, batch_size=FLAGS.batch_size,
-                            num_epochs=FLAGS.num_epochs)
+    image_batch, label_batch = inputs(train=True, batch_size=FLAGS.batch_size,
+                               num_epochs=FLAGS.num_epochs)
 
     # Build a Graph that computes predictions from the inference model.
-    logits = mnist.inference(images,
+    logits = mnist.inference(image_batch,
                              FLAGS.hidden1,
                              FLAGS.hidden2)
 
     # Add to the Graph the loss calculation.
-    loss = mnist.loss(logits, labels)
+    loss = mnist.loss(logits, label_batch)
 
     # Add to the Graph operations that train the model.
     train_op = mnist.training(loss, FLAGS.learning_rate)
@@ -143,47 +146,33 @@ def run_training():
                        tf.local_variables_initializer())
 
     # Create a session for running operations in the Graph.
-    sess = tf.Session()
-
-    # Initialize the variables (the trained variables and the
-    # epoch counter).
-    sess.run(init_op)
-
-    # Start input enqueue threads.
-    coord = tf.train.Coordinator()
-    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
-
-    try:
-      step = 0
-      while not coord.should_stop():
-        start_time = time.time()
-
-        # Run one step of the model.  The return values are
-        # the activations from the `train_op` (which is
-        # discarded) and the `loss` op.  To inspect the values
-        # of your ops or variables, you may include them in
-        # the list passed to sess.run() and the value tensors
-        # will be returned in the tuple from the call.
-        _, loss_value = sess.run([train_op, loss])
-
-        duration = time.time() - start_time
-
-        # Print an overview fairly often.
-        if step % 100 == 0:
-          print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
+    with tf.Session() as sess:
+      # Initialize the variables (the trained variables and the
+      # epoch counter).
+      sess.run(init_op)
+      try:
+        step = 0
+        while True: #train until OutOfRangeError
+          start_time = time.time()
+
+          # Run one step of the model.  The return values are
+          # the activations from the `train_op` (which is
+          # discarded) and the `loss` op.  To inspect the values
+          # of your ops or variables, you may include them in
+          # the list passed to sess.run() and the value tensors
+          # will be returned in the tuple from the call.
+          _, loss_value = sess.run([train_op, loss])
+
+          duration = time.time() - start_time
+
+          # Print an overview fairly often.
+          if step % 100 == 0:
+            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
                                                      duration))
-        step += 1
-    except tf.errors.OutOfRangeError:
-      print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
-    finally:
-      # When done, ask the threads to stop.
-      coord.request_stop()
-
-    # Wait for threads to finish.
-    coord.join(threads)
-    sess.close()
-
-
+          step += 1
+      except tf.errors.OutOfRangeError:
+        print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
+      
 def main(_):
   run_training()
 
diff --git a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
index 1e375ed48e..4a429837b7 100644
--- a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
+++ b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
@@ -53,7 +53,8 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
   //  - Scales, clamps, and converts that spectrogram to 0 to 255 uint8's.
   //  - Reshapes the tensor so that it's [height, width, 1] for imaging.
   //  - Encodes it as a PNG stream and saves it out to a file.
-  Output file_reader = ReadFile(root.WithOpName("input_wav"), input_wav);
+  Output file_reader =
+      tensorflow::ops::ReadFile(root.WithOpName("input_wav"), input_wav);
   DecodeWav wav_decoder =
       DecodeWav(root.WithOpName("wav_decoder"), file_reader);
   Output spectrogram = AudioSpectrogram(root.WithOpName("spectrogram"),
@@ -71,8 +72,8 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
   Output squeeze = Squeeze(root.WithOpName("squeeze"), expand_dims,
                            Squeeze::Attrs().Axis({0}));
   Output png_encoder = EncodePng(root.WithOpName("png_encoder"), squeeze);
-  WriteFile file_writer =
-      WriteFile(root.WithOpName("output_image"), output_image, png_encoder);
+  tensorflow::ops::WriteFile file_writer = tensorflow::ops::WriteFile(
+      root.WithOpName("output_image"), output_image, png_encoder);
   tensorflow::GraphDef graph;
   TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
 
diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go
index f200a8e00a..fc087d9d99 100644
--- a/tensorflow/go/graph.go
+++ b/tensorflow/go/graph.go
@@ -28,7 +28,8 @@ package tensorflow
 //                                 int num_shapes) {
 //  const int64_t** dims =
 //    (const int64_t**)malloc(sizeof(const int64_t*) * num_shapes);
-//  for (int i = 0; i < num_shapes; i++) {
+//  int i = 0;
+//  for (i = 0; i < num_shapes; i++) {
 //    dims[i] = flat_dims;
 //    if (num_dims[i] > 0) {
 //      // flat_dims will be NULL iff num_shapes is 0 or all elements in num_dims are <= 0.
@@ -132,6 +133,20 @@ func (g *Graph) Operation(name string) *Operation {
 	return &Operation{cop, g}
 }
 
+// Operations returns a list of all operations in the graph
+func (g *Graph) Operations() []Operation {
+	var pos C.size_t = 0
+	ops := []Operation{}
+	for {
+		cop := C.TF_GraphNextOperation(g.c, &pos)
+		if cop == nil {
+			break
+		}
+		ops = append(ops, Operation{cop, g})
+	}
+	return ops
+}
+
 // OpSpec is the specification of an Operation to be added to a Graph
 // (using Graph.AddOperation).
 type OpSpec struct {
diff --git a/tensorflow/go/graph_test.go b/tensorflow/go/graph_test.go
index c3120bc720..b8d65c54f6 100644
--- a/tensorflow/go/graph_test.go
+++ b/tensorflow/go/graph_test.go
@@ -29,10 +29,26 @@ func hasOperations(g *Graph, ops ...string) error {
 			missing = append(missing, op)
 		}
 	}
-	if len(missing) == 0 {
-		return nil
+	if len(missing) != 0 {
+		return fmt.Errorf("Graph does not have the operations %v", missing)
 	}
-	return fmt.Errorf("Graph does not have the operations %v", missing)
+
+	inList := map[string]bool{}
+	for _, op := range g.Operations() {
+		inList[op.Name()] = true
+	}
+
+	for _, op := range ops {
+		if !inList[op] {
+			missing = append(missing, op)
+		}
+	}
+
+	if len(missing) != 0 {
+		return fmt.Errorf("Operations %v are missing from graph.Operations()", missing)
+	}
+
+	return nil
 }
 
 func TestGraphWriteToAndImport(t *testing.T) {
diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index 017bef99ce..1481a4d035 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -126,6 +126,12 @@ _REGISTERED_EXPANSIONS = [
      lambda feed: [feed])]
 # pylint: enable=g-long-lambda
 
+
+def _convert_to_numpy_obj(numpy_dtype, obj):
+  """Explicitly convert obj based on numpy type except for string type."""
+  return numpy_dtype(obj) if numpy_dtype is not object else str(obj)
+
+
 def register_session_run_conversion_functions(tensor_type, fetch_function,
     feed_function=None, feed_function_for_partial_run=None):
   """Register fetch and feed conversion functions for `tf.Session.run()`.
@@ -1072,12 +1078,14 @@ class BaseSession(SessionInterface):
                             'strings, lists, numpy ndarrays, or TensorHandles.')
 
           subfeed_dtype = subfeed_t.dtype.as_numpy_dtype
-          if isinstance(subfeed_val,
-                        int) and subfeed_dtype(subfeed_val) != subfeed_val:
+          if isinstance(subfeed_val, int) and _convert_to_numpy_obj(
+              subfeed_dtype, subfeed_val) != subfeed_val:
             raise TypeError(
-                'Type of feed value ' + str(subfeed_val) + ' is not'
-                ' compatible with Tensor type ' + str(subfeed_dtype) + '.'
-                ' Try explicitly setting the type of the feed tensor'
+                'Type of feed value ' + str(subfeed_val) + ' with type ' +
+                str(type(subfeed_val)) +
+                ' is not compatible with Tensor type ' +
+                str(subfeed_dtype) +
+                '. Try explicitly setting the type of the feed tensor'
                 ' to a larger type (e.g. int64).')
 
           is_tensor_handle_feed = isinstance(subfeed_val,
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index 3da03a7b0f..a563f5ef4a 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -1737,6 +1737,12 @@ class SessionTest(test_util.TensorFlowTestCase):
     server = server_lib.Server.create_local_server()
     self.runTestAddFunctionToSession(server.target)
 
+  def testAutoConvertAndCheckData(self):
+    with self.test_session() as sess:
+      a = array_ops.placeholder(dtype=dtypes.string)
+      with self.assertRaisesRegexp(
+          TypeError, 'Type of feed value 1 with type <(\w+) \'int\'> is not'):
+        sess.run(a, feed_dict={a: 1})
 
 class GraphMutationTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 2315ad4653..789771508e 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -535,6 +535,7 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "no_windows",
+        "nomac",
         "oss_serial",
     ],
     deps = [
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index d72b95dbdd..285671f99f 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -626,7 +626,7 @@ class _TrainingExecutorTrainingTest(object):
 
     self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
                                               mock_eval_spec))
-    mock_est.train.assert_called()
+    self.assertTrue(mock_est.train.called)
     mock_server.assert_not_called()
 
   def test_fail_with_empty_task_type(self):
@@ -836,7 +836,7 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     executor.run_master()
 
     mock_server.assert_not_called()
-    mock_est.train.assert_called()
+    self.assertTrue(mock_est.train.called)
 
   def test_fail_with_empty_task_type(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py
index 12f2592d84..b31486dfa1 100644
--- a/tensorflow/python/estimator/util.py
+++ b/tensorflow/python/estimator/util.py
@@ -52,7 +52,7 @@ def fn_args(fn):
   else:
     if _is_callable_object(fn):
       fn = fn.__call__
-    args = tf_inspect.getargspec(fn).args
+    args = tf_inspect.getfullargspec(fn).args
     if _is_bounded_method(fn):
       args.remove('self')
   return tuple(args)
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 366025a0d8..e06899f81d 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -82,8 +82,8 @@ class Defun(object):
     return x + y, x - y
 
   # Building the graph.
-  a = tf.Constant([1.0])
-  b = tf.Constant([2.0])
+  a = tf.constant([1.0])
+  b = tf.constant([2.0])
   c, d = MyFunc(a, b, name='mycall')
   ```
   """
diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
index c66b4b395e..2e73cefb6c 100644
--- a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
+++ b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
@@ -211,7 +211,7 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
+          'imagenet' (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
index 4d5ac72604..5f97c138fc 100644
--- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
+++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
@@ -350,7 +350,7 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
+          'imagenet' (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of
           `layers.Input()`)
@@ -536,6 +536,8 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/applications/resnet50.py b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
index f7cdf2be99..8ab46693aa 100644
--- a/tensorflow/python/keras/_impl/keras/applications/resnet50.py
+++ b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
@@ -164,7 +164,7 @@ def ResNet50(include_top=True,
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
+          'imagenet' (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg16.py b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
index ab205aa689..38dbbdc809 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg16.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
@@ -70,8 +70,8 @@ def VGG16(include_top=True,
   Arguments:
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
-     weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg19.py b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
index 5e5179f332..126c64260b 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg19.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
@@ -71,8 +71,8 @@ def VGG19(include_top=True,
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
       weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
-          or the path to the weights file to be loaded.
+         'imagenet' (pre-training on ImageNet),
+         or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
diff --git a/tensorflow/python/keras/_impl/keras/applications/xception.py b/tensorflow/python/keras/_impl/keras/applications/xception.py
index a9efd5d64c..8219831408 100644
--- a/tensorflow/python/keras/_impl/keras/applications/xception.py
+++ b/tensorflow/python/keras/_impl/keras/applications/xception.py
@@ -83,7 +83,7 @@ def Xception(include_top=True,
       include_top: whether to include the fully-connected
           layer at the top of the network.
       weights: one of `None` (random initialization),
-          "imagenet" (pre-training on ImageNet),
+          'imagenet' (pre-training on ImageNet),
           or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
@@ -303,6 +303,8 @@ def Xception(include_top=True,
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/layers/core.py b/tensorflow/python/keras/_impl/keras/layers/core.py
index 712db33c69..6a745844b2 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core.py
@@ -104,13 +104,13 @@ class Dropout(tf_core_layers.Dropout, Layer):
   """
 
   def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
-    self.supports_masking = True
     # Inheritance call order:
     # 1) tf.layers.Dropout, 2) keras.layers.Layer, 3) tf.layers.Layer
     super(Dropout, self).__init__(rate=rate,
                                   noise_shape=noise_shape,
                                   seed=seed,
                                   **kwargs)
+    self.supports_masking = True
 
   def call(self, inputs, training=None):
     if training is None:
diff --git a/tensorflow/python/keras/_impl/keras/layers/core_test.py b/tensorflow/python/keras/_impl/keras/layers/core_test.py
index 1fe043561d..bdb99c91c2 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core_test.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core_test.py
@@ -47,6 +47,11 @@ class CoreLayersTest(test.TestCase):
                   'noise_shape': [3, 1]},
           input_shape=(3, 2))
 
+    # https://github.com/tensorflow/tensorflow/issues/14819
+    with self.test_session():
+      dropout = keras.layers.Dropout(0.5)
+      self.assertEqual(True, dropout.supports_masking)
+
     with self.test_session():
       testing_utils.layer_test(
           keras.layers.SpatialDropout1D,
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 1d8ca99c07..31d3bd1b74 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2084,6 +2084,10 @@ cuda_py_test(
         "//tensorflow/python:framework_for_generated_wrappers",
     ],
     shard_count = 2,
+    tags = [
+        "no_gpu",
+        "no_oss",
+    ],
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/kernel_tests/summary_image_op_test.py b/tensorflow/python/kernel_tests/summary_image_op_test.py
index d2152ab560..4718827e88 100644
--- a/tensorflow/python/kernel_tests/summary_image_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_image_op_test.py
@@ -50,7 +50,6 @@ class SummaryImageOpTest(test.TestCase):
     self.assertProtoEquals(expected, image_summ)
 
   def testImageSummary(self):
-    np.random.seed(7)
     for depth in (1, 3, 4):
       for positive in False, True:
         with self.test_session(graph=ops.Graph()) as sess:
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index b9c89d62d5..21561f3689 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1168,7 +1168,7 @@ def random_hue(image, max_delta, seed=None):
       set_random_seed for its interaction with the graph-level random seed.
 
   Returns:
-    3-D float tensor of shape `[height, width, channels]`.
+    Adjusted image(s), same shape and DType as `image`.
 
   Raises:
     ValueError: if `max_delta` is invalid.
@@ -1275,30 +1275,9 @@ def adjust_saturation(image, saturation_factor, name=None):
     orig_dtype = image.dtype
     flt_image = convert_image_dtype(image, dtypes.float32)
 
-    # TODO(zhengxq): we will switch to the fused version after we add a GPU
-    # kernel for that.
-    fused = os.environ.get('TF_ADJUST_SATURATION_FUSED', '')
-    fused = fused.lower() in ('true', 't', '1')
-
-    if fused:
-      return convert_image_dtype(
-          gen_image_ops.adjust_saturation(flt_image, saturation_factor),
-          orig_dtype)
-
-    hsv = gen_image_ops.rgb_to_hsv(flt_image)
-
-    hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
-    saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
-    value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])
-
-    saturation *= saturation_factor
-    saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0)
-
-    hsv_altered = array_ops.concat([hue, saturation, value], 2)
-    rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)
-
-    return convert_image_dtype(rgb_altered, orig_dtype)
-
+    return convert_image_dtype(
+        gen_image_ops.adjust_saturation(flt_image, saturation_factor),
+        orig_dtype)
 
 def decode_image(contents, channels=None, name=None):
   """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`,
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index d1554b399f..4af9bd2a00 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -281,6 +281,21 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testBatchAdjustHue(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    delta = 0.25
+    y_data = [13, 0, 11, 226, 54, 221, 234, 8, 92, 1, 217, 255]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.adjust_hue(x, delta)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def _adjustHueNp(self, x_np, delta_h):
     self.assertEqual(x_np.shape[-1], 3)
     x_v = x_np.reshape([-1, 3])
@@ -359,6 +374,89 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
       self._adjustHueTf(x_np, delta_h)
 
 
+class FlipImageBenchmark(test.Benchmark):
+
+  def _benchmarkFlipLeftRight(self, device, cpu_count):
+    image_shape = [299, 299, 3]
+    warmup_rounds = 100
+    benchmark_rounds = 1000
+    config = config_pb2.ConfigProto()
+    if cpu_count is not None:
+      config.inter_op_parallelism_threads = 1
+      config.intra_op_parallelism_threads = cpu_count
+    with session.Session("", graph=ops.Graph(), config=config) as sess:
+      with ops.device(device):
+        inputs = variables.Variable(
+            random_ops.random_uniform(
+                image_shape, dtype=dtypes.float32) * 255,
+            trainable=False,
+            dtype=dtypes.float32)
+        run_op = image_ops.flip_left_right(inputs)
+        sess.run(variables.global_variables_initializer())
+        for i in xrange(warmup_rounds + benchmark_rounds):
+          if i == warmup_rounds:
+            start = time.time()
+          sess.run(run_op)
+    end = time.time()
+    step_time = (end - start) / benchmark_rounds
+    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+    print("benchmarkFlipLeftRight_299_299_3_%s step_time: %.2f us" %
+          (tag, step_time * 1e6))
+    self.report_benchmark(
+        name="benchmarkFlipLeftRight_299_299_3_%s" % (tag),
+        iters=benchmark_rounds,
+        wall_time=step_time)
+
+  def _benchmarkRandomFlipLeftRight(self, device, cpu_count):
+    image_shape = [299, 299, 3]
+    warmup_rounds = 100
+    benchmark_rounds = 1000
+    config = config_pb2.ConfigProto()
+    if cpu_count is not None:
+      config.inter_op_parallelism_threads = 1
+      config.intra_op_parallelism_threads = cpu_count
+    with session.Session("", graph=ops.Graph(), config=config) as sess:
+      with ops.device(device):
+        inputs = variables.Variable(
+            random_ops.random_uniform(
+                image_shape, dtype=dtypes.float32) * 255,
+            trainable=False,
+            dtype=dtypes.float32)
+        run_op = image_ops.random_flip_left_right(inputs)
+        sess.run(variables.global_variables_initializer())
+        for i in xrange(warmup_rounds + benchmark_rounds):
+          if i == warmup_rounds:
+            start = time.time()
+          sess.run(run_op)
+    end = time.time()
+    step_time = (end - start) / benchmark_rounds
+    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+    print("benchmarkRandomFlipLeftRight_299_299_3_%s step_time: %.2f us" %
+          (tag, step_time * 1e6))
+    self.report_benchmark(
+        name="benchmarkRandomFlipLeftRight_299_299_3_%s" % (tag),
+        iters=benchmark_rounds,
+        wall_time=step_time)
+
+  def benchmarkFlipLeftRightCpu1(self):
+    self._benchmarkFlipLeftRight("/cpu:0", 1)
+
+  def benchmarkFlipLeftRightCpuAll(self):
+    self._benchmarkFlipLeftRight("/cpu:0", None)
+
+  def benchmarkFlipLeftRightGpu(self):
+    self._benchmarkFlipLeftRight(test.gpu_device_name(), None)
+
+  def benchmarkRandomFlipLeftRightCpu1(self):
+    self._benchmarkRandomFlipLeftRight("/cpu:0", 1)
+
+  def benchmarkRandomFlipLeftRightCpuAll(self):
+    self._benchmarkRandomFlipLeftRight("/cpu:0", None)
+
+  def benchmarkRandomFlipLeftRightGpu(self):
+    self._benchmarkRandomFlipLeftRight(test.gpu_device_name(), None)
+
+
 class AdjustHueBenchmark(test.Benchmark):
 
   def _benchmarkAdjustHue(self, device, cpu_count):
@@ -632,6 +730,21 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testBatchSaturation(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    saturation_factor = 0.5
+    y_data = [6, 9, 13, 140, 180, 226, 135, 121, 234, 172, 255, 128]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.adjust_saturation(x, saturation_factor)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def _adjust_saturation(self, image, saturation_factor):
     image = ops.convert_to_tensor(image, name="image")
     orig_dtype = image.dtype
diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py
index 08e3f83a0b..51ab2aec22 100644
--- a/tensorflow/python/ops/logging_ops.py
+++ b/tensorflow/python/ops/logging_ops.py
@@ -39,8 +39,8 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
           name=None):
   """Prints a list of tensors.
 
-  This is an identity op with the side effect of printing `data` when
-  evaluating.
+  This is an identity op (behaves like `tf.identity`) with the side effect
+  of printing `data` when evaluating.
 
   Note: This op prints to the standard error. It is not currently compatible
     with jupyter notebook (printing to the notebook *server's* output, not into
@@ -57,7 +57,7 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
     name: A name for the operation (optional).
 
   Returns:
-    Same tensor as `input_`.
+    A `Tensor`. Has the same type and contents as `input_`.
   """
   return gen_logging_ops._print(input_, data, message, first_n, summarize, name)
 
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 19a86df6a9..fd96f7b8fc 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import candidate_sampling_ops
 from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
@@ -981,10 +982,11 @@ def _compute_sampled_logits(weights,
         Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
     name: A name for the operation (optional).
   Returns:
-    out_logits, out_labels: `Tensor` objects each with shape
+    out_logits: `Tensor` object with shape
         `[batch_size, num_true + num_sampled]`, for passing to either
         `nn.sigmoid_cross_entropy_with_logits` (NCE) or
         `nn.softmax_cross_entropy_with_logits` (sampled softmax).
+    out_labels: A Tensor object with the same shape as `out_logits`.
   """
 
   if isinstance(weights, variables.PartitionedVariable):
@@ -1095,15 +1097,16 @@ def _compute_sampled_logits(weights,
 
     # Construct output logits and labels. The true labels/logits start at col 0.
     out_logits = array_ops.concat([true_logits, sampled_logits], 1)
-    # true_logits is a float tensor, ones_like(true_logits) is a float tensor
-    # of ones. We then divide by num_true to ensure the per-example labels sum
-    # to 1.0, i.e. form a proper probability distribution.
+
+    # true_logits is a float tensor, ones_like(true_logits) is a float
+    # tensor of ones. We then divide by num_true to ensure the per-example
+    # labels sum to 1.0, i.e. form a proper probability distribution.
     out_labels = array_ops.concat([
         array_ops.ones_like(true_logits) / num_true,
         array_ops.zeros_like(sampled_logits)
     ], 1)
 
-  return out_logits, out_labels
+    return out_logits, out_labels
 
 
 def nce_loss(weights,
diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/python/ops/quantized_conv_ops_test.py
index 5ea47ea40e..5e9e710027 100644
--- a/tensorflow/python/ops/quantized_conv_ops_test.py
+++ b/tensorflow/python/ops/quantized_conv_ops_test.py
@@ -93,7 +93,7 @@ class Conv2DTest(test.TestCase):
     quantized_range = ((quantized_max - quantized_min) * range_adjust)
     range_scale = (quantized_range / number_of_steps)
     lowest_quantized = -(1 << (number_of_bits - 1))
-    result = np.array([(quantized_min + ((x - lowest_quantized) * range_scale))
+    result = np.array([(quantized_min + ((float(x) - lowest_quantized) * range_scale))
                        for x in quantized.flatten()])
     return result
 
diff --git a/tensorflow/python/ops/quantized_ops_test.py b/tensorflow/python/ops/quantized_ops_test.py
new file mode 100644
index 0000000000..4bf3b35e13
--- /dev/null
+++ b/tensorflow/python/ops/quantized_ops_test.py
@@ -0,0 +1,57 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for quantized operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class QuantizedOpsTest(test.TestCase):
+
+  def __init__(self, method_name="runTest"):
+    super(QuantizedOpsTest, self).__init__(method_name)
+
+  def testQuantizeOp(self):
+    expected_output = [1, 1, 2, 127, 255, 255]
+    with self.test_session(use_gpu=False) as sess:
+      x = constant_op.constant([1.0, 1.25, 1.75, 127.0, 255.0, 500.0], shape=[6], dtype=dtypes.float32)
+      x_min = 0.0
+      x_max = 255.0
+      op = array_ops.quantize(x, x_min, x_max, dtypes.quint8, mode="MIN_FIRST")
+      value = sess.run(op)
+      self.assertArrayNear(expected_output, value.output, 0.1)
+
+  def testDequantizeOp(self):
+    expected_output = [1.0, 2.0, 4.0, 8.0, 16.0, 255.0]
+    inp = np.array([1, 2, 4, 8, 16, 255]).astype(np.uint8)
+    with self.test_session(use_gpu=False) as sess:
+      x = constant_op.constant(inp, shape=[6], dtype=dtypes.quint8)
+      x_min = 0.0
+      x_max = 255.0
+      op = array_ops.dequantize(x, x_min, x_max, mode="MIN_FIRST")
+      value = sess.run(op)
+      self.assertArrayNear(expected_output, value, 0.1)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/training/learning_rate_decay.py b/tensorflow/python/training/learning_rate_decay.py
index 802b930b0e..f0c28e7b89 100644
--- a/tensorflow/python/training/learning_rate_decay.py
+++ b/tensorflow/python/training/learning_rate_decay.py
@@ -362,7 +362,13 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
   The function returns the decayed learning rate.  It is computed as:
 
   ```python
-  decayed_learning_rate = learning_rate / (1 + decay_rate * t)
+  decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
+  ```
+
+  or, if `staircase` is `True`, as:
+
+  ```python
+  decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
   ```
 
   Example: decay 1/t with a rate of 0.5:
@@ -371,8 +377,9 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
   ...
   global_step = tf.Variable(0, trainable=False)
   learning_rate = 0.1
-  k = 0.5
-  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k)
+  decay_steps = 1.0
+  decay_rate = 0.5
+  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate)
 
   # Passing global_step to minimize() will increment it at each step.
   learning_step = (
diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py
index 9ed125704b..d14e710388 100644
--- a/tensorflow/python/util/tf_inspect.py
+++ b/tensorflow/python/util/tf_inspect.py
@@ -45,6 +45,26 @@ def getargspec(object):  # pylint: disable=redefined-builtin
                if d.decorator_argspec is not None), _inspect.getargspec(target))
 
 
+def getfullargspec(obj):  # pylint: disable=redefined-builtin
+  """TFDecorator-aware replacement for inspect.getfullargspec and fallback to
+  inspect.getargspec in Python 2.
+
+  Args:
+    obj: A callable, possibly decorated.
+
+  Returns:
+    The `FullArgSpec` (`ArgSpec` in Python 2) that describes the signature of
+    the outermost decorator that changes the callable's signature. If the
+    callable is not decorated, `inspect.getfullargspec()`
+    (`inspect.getargspec()` in Python 2) will be called directly on the
+    callable.
+  """
+  spec_fn = getattr(_inspect, 'getfullargspec', getattr(_inspect, 'getargspec'))
+  decorators, target = tf_decorator.unwrap(obj)
+  return next((d.decorator_argspec for d in decorators
+               if d.decorator_argspec is not None), spec_fn(target))
+
+
 def getcallargs(func, *positional, **named):
   """TFDecorator-aware replacement for inspect.getcallargs.
 
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 0d2cd4a9f2..73b96de438 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -1132,7 +1132,7 @@ class DnnSupport {
   //    space in order to speed up the convolution operation.
   //  algorithm: an integer to specify which algorithm should be used for the
   //    operation. kDefaultAlgorithm means the system will pick an algorithm
-  //    by default. The coding of the algorithm is be interpretted by the
+  //    by default. The coding of the algorithm is be interpreted by the
   //    underlying implementation.
   //  output_profile_result: the output profile result for this call. The
   //    profiling is only enabled when this is not nullptr.
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 611d50bc52..9b13a86ed3 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -130,6 +130,13 @@ def if_not_windows(a):
       "//conditions:default": a,
   })
 
+def if_windows(a):
+  return select({
+      clean_dep("//tensorflow:windows"): a,
+      clean_dep("//tensorflow:windows_msvc"): a,
+      "//conditions:default": [],
+  })
+
 def if_linux_x86_64(a):
   return select({
       clean_dep("//tensorflow:linux_x86_64"): a,
@@ -1325,11 +1332,32 @@ def tf_py_wrap_cc(name,
           "//conditions:default": [":" + cc_library_name],
       }))
 
-def py_test(deps=[], **kwargs):
+# This macro is for running python tests against system installed pip package
+# on Windows.
+#
+# py_test is built as an exectuable python zip file on Windows, which contains all
+# dependencies of the target. Because of the C++ extensions, it would be very
+# inefficient if the py_test zips all runfiles, plus we don't need them when running
+# tests against system installed pip package. So we'd like to get rid of the deps
+# of py_test in this case.
+#
+# In order to trigger the tests without bazel clean after getting rid of deps,
+# we introduce the following :
+# 1. When --define=no_tensorflow_py_deps=true, the py_test depends on a marker
+#    file of the pip package, the test gets to rerun when the pip package change.
+#    Note that this only works on Windows. See the definition of
+#    //tensorflow/tools/pip_package:win_pip_package_marker for specific reasons.
+# 2. When --define=no_tensorflow_py_deps=false (by default), it's a normal py_test.
+def py_test(deps=[], data=[], **kwargs):
   native.py_test(
       deps=select({
           "//conditions:default": deps,
-          clean_dep("//tensorflow:no_tensorflow_py_deps"): []
+          clean_dep("//tensorflow:no_tensorflow_py_deps"): [],
+      }),
+      data = data + select({
+          "//conditions:default": [],
+          clean_dep("//tensorflow:no_tensorflow_py_deps"):
+          ["//tensorflow/tools/pip_package:win_pip_package_marker"],
       }),
       **kwargs)
 
@@ -1354,7 +1382,7 @@ def tf_py_test(name,
     additional_deps = additional_deps + tf_additional_xla_deps_py()
   if grpc_enabled:
     additional_deps = additional_deps + tf_additional_grpc_deps_py()
-  native.py_test(
+  py_test(
       name=name,
       size=size,
       srcs=srcs,
@@ -1364,13 +1392,10 @@ def tf_py_test(name,
       visibility=[clean_dep("//tensorflow:internal")],
       shard_count=shard_count,
       data=data,
-      deps=select({
-          "//conditions:default": [
-              clean_dep("//tensorflow/python:extra_py_tests_deps"),
-              clean_dep("//tensorflow/python:gradient_checker"),
+      deps=[
+            clean_dep("//tensorflow/python:extra_py_tests_deps"),
+            clean_dep("//tensorflow/python:gradient_checker"),
           ] + additional_deps,
-          clean_dep("//tensorflow:no_tensorflow_py_deps"): []
-      }),
       flaky=flaky,
       srcs_version="PY2AND3")
 
diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
index 9809ad52de..ecab6f8769 100644
--- a/tensorflow/tools/benchmark/benchmark_model.cc
+++ b/tensorflow/tools/benchmark/benchmark_model.cc
@@ -530,7 +530,7 @@ int Main(int argc, char** argv) {
   }
 
   // Capture overall inference time without stat logging overhead. This is the
-  // timing data that can be compared to other libaries.
+  // timing data that can be compared to other libraries.
   SleepSeconds(inter_benchmark_sleep_seconds);
   int64 no_stat_time_us = 0;
   int64 no_stat_num_runs = 0;
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu b/tensorflow/tools/ci_build/Dockerfile.gpu
index 2d46ccb6b1..7591ecc04e 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu
@@ -1,8 +1,8 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
+FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
 
 LABEL maintainer="Jan Prach <jendap@google.com>"
 
-# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
+# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to
 # /usr/local/cuda
 RUN cp -P /usr/include/cudnn.h /usr/local/cuda/include
 RUN cp -P /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu_clang b/tensorflow/tools/ci_build/Dockerfile.gpu_clang
index 0ecd8c75e0..438a7ec532 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu_clang
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu_clang
@@ -1,8 +1,8 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
+FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
 
 LABEL maintainer="Ilya Biryukov <ibiryukov@google.com>"
 
-# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
+# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to
 # /usr/local/cuda
 RUN cp /usr/include/cudnn.h /usr/local/cuda/include
 RUN cp /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index a37cf226f9..82042b93c0 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -296,19 +296,12 @@ create_activate_virtualenv_and_install_tensorflow() {
     die "FAILED to create virtualenv directory: ${VIRTUALENV_DIR}"
   fi
 
-  if [[ ${PYTHON_BIN_PATH} == *"python3.6"* ]]; then
-    "${PYTHON_BIN_PATH}" -m venv "${VIRTUALENV_FLAGS}" \
-      "${VIRTUALENV_DIR}" || \
-      die "FAILED: Unable to create virtualenv"
-  else
-    # Verify that virtualenv exists
-    if [[ -z $(which virtualenv) ]]; then
-      die "FAILED: virtualenv not available on path"
-    fi
-    virtualenv ${VIRTUALENV_FLAGS} \
-      -p "${PYTHON_BIN_PATH}" "${VIRTUALENV_DIR}" || \
-      die "FAILED: Unable to create virtualenv"
-  fi
+  # Use the virtualenv from the default python version (i.e., python-virtualenv)
+  # to create the virtualenv directory for testing. Use the -p flag to specify
+  # the python version inside the to-be-created virtualenv directory.
+  ${PYTHON_BIN_PATH} -m virtualenv -p "${PYTHON_BIN_PATH}" ${VIRTUALENV_FLAGS} \
+    "${VIRTUALENV_DIR}" || \
+    die "FAILED: Unable to create virtualenv"
 
   source "${VIRTUALENV_DIR}/bin/activate" || \
     die "FAILED: Unable to activate virtualenv in ${VIRTUALENV_DIR}"
@@ -350,7 +343,7 @@ do_clean_virtualenv_smoke_test() {
   then
     echo "Smoke test of tensorflow install in clean virtualenv PASSED."
   else
-    echo "Smoke test of tensroflow install in clean virtualenv FAILED."
+    echo "Smoke test of tensorflow install in clean virtualenv FAILED."
     return 1
   fi
 
diff --git a/tensorflow/tools/ci_build/builds/print_build_info.sh b/tensorflow/tools/ci_build/builds/print_build_info.sh
index 7c43419a76..e366abf8bb 100755
--- a/tensorflow/tools/ci_build/builds/print_build_info.sh
+++ b/tensorflow/tools/ci_build/builds/print_build_info.sh
@@ -88,7 +88,7 @@ fi
 # Print info
 echo "TF_BUILD_INFO = {"\
 "container_type: \"${CONTAINER_TYPE}\", "\
-"command: \"${COMMAND[@]}\", "\
+"command: \"${COMMAND[*]}\", "\
 "source_HEAD: \"${TF_HEAD}\", "\
 "source_remote_origin: \"${TF_FETCH_URL}\", "\
 "OS: \"${OS}\", "\
diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh
index 358f82ac5d..caa3a40817 100755
--- a/tensorflow/tools/ci_build/builds/test_user_ops.sh
+++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh
@@ -82,11 +82,11 @@ TF_CFLAGS=( $("${PYTHON_BIN_PATH}" \
 TF_LFLAGS=( $("${PYTHON_BIN_PATH}" \
 	      -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
 
-if [[ -z "${TF_CFLAGS}" || -z "${TF_LFLAGS}" ]]; then
+if [[ -z "${TF_CFLAGS[*]}" || -z "${TF_LFLAGS[*]}" ]]; then
   die "FAILED to determine TensorFlow compilation or linking flags"
 else
-  echo "TensorFlow compile flags: ${TF_CFLAGS[@]}"
-  echo "TensorFlow link flags: ${TF_LFLAGS[@]}"
+  echo "TensorFlow compile flags: ${TF_CFLAGS[*]}"
+  echo "TensorFlow link flags: ${TF_LFLAGS[*]}"
 fi
 
 # Check g++ availability
diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index 6e7b752c06..cfeaebdbf5 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -45,7 +45,7 @@ for i in `seq 0 $((TF_GPU_COUNT-1))`; do
       # This export only works within the brackets, so it is isolated to one
       # single command.
       export CUDA_VISIBLE_DEVICES=$i
-      echo "Running test $@ on GPU $CUDA_VISIBLE_DEVICES"
+      echo "Running test $* on GPU $CUDA_VISIBLE_DEVICES"
       $@
     )
     return_code=$?
diff --git a/tensorflow/tools/ci_build/install/install_deb_packages.sh b/tensorflow/tools/ci_build/install/install_deb_packages.sh
index 4ab307c925..9640810533 100755
--- a/tensorflow/tools/ci_build/install/install_deb_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_deb_packages.sh
@@ -48,6 +48,7 @@ apt-get install -y --no-install-recommends \
     git \
     libcurl4-openssl-dev \
     libtool \
+    libssl-dev \
     mlocate \
     openjdk-8-jdk \
     openjdk-8-jre-headless \
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index b8ed1ab767..da58ac2407 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -27,6 +27,9 @@ easy_install3 -U pip
 pip2 install wheel
 pip3 install wheel
 
+pip2 install virtualenv
+pip3 install virtualenv
+
 # Install six.
 pip2 install --upgrade six==1.10.0
 pip3 install --upgrade six==1.10.0
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 479242aa43..9881bd99c3 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -39,6 +39,8 @@ if [[ -z $pip35_version ]]; then
 fi
 
 set -e
+pip3.5 install --upgrade virtualenv
+
 # Install six.
 pip3.5 install --upgrade absl-py
 pip3.5 install --upgrade six==1.10.0
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index ec7d9bf195..1ca12c6c60 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -36,6 +36,8 @@ pip3.6 -V
 which pip3.6
 ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3
 
+pip3 install --upgrade virtualenv
+
 set -e
 # Install six.
 pip3 install --upgrade absl-py
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
index df196f829c..ac83e90f76 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh
@@ -28,6 +28,8 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=8.0
+export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
index abd256a895..6b80f44729 100755
--- a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh
@@ -28,6 +28,8 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=8.0
+export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 88116d9f24..1bd1852ffc 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -82,6 +82,7 @@ if [[ $1 == "PI_ONE" ]]; then
 else
   PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4
   --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
+  --copt=-O3
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8'
diff --git a/tensorflow/tools/ci_build/remote/remote_docker_build.sh b/tensorflow/tools/ci_build/remote/remote_docker_build.sh
index 3ac6840f4e..e00a66aaba 100755
--- a/tensorflow/tools/ci_build/remote/remote_docker_build.sh
+++ b/tensorflow/tools/ci_build/remote/remote_docker_build.sh
@@ -124,7 +124,7 @@ function build_tf_image {
 
 
 function publish_tf_image {
-  $gcr_tf_image="gcr.io/tensorflow/${tf_image}"
+  gcr_tf_image="gcr.io/tensorflow/${tf_image}"
   docker tag $tf_image $gcr_tf_image
   gcloud docker -- push $gcr_tf_image
 }
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 44b6d52952..8d50250c3a 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -96,10 +96,6 @@ exclude_cpu_cc_tests="${failing_cpu_cc_tests} + ${broken_cpu_cc_tests}"
 
 exclude_gpu_cc_tests="${extra_failing_gpu_cc_tests} + ${exclude_cpu_cc_tests}"
 
-function clean_output_base() {
-  bazel clean --expunge
-}
-
 function run_configure_for_cpu_build {
   # Due to a bug in Bazel: https://github.com/bazelbuild/bazel/issues/2182
   # yes "" | ./configure doesn't work on Windows, so we set all the
@@ -115,7 +111,7 @@ function run_configure_for_cpu_build {
     export TF_NEED_MKL=0
   fi
   export TF_NEED_VERBS=0
-  export TF_NEED_GCP=0
+  export TF_NEED_GCP=1
   export TF_NEED_HDFS=0
   export TF_NEED_OPENCL_SYCL=0
   echo "" | ./configure
diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
index 4a653698a2..f88e7176f0 100644
--- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
@@ -36,12 +36,6 @@ export BAZEL_SH=${BAZEL_SH:-"C:/tools/msys64/usr/bin/bash"}
 export PYTHON_BIN_PATH="C:/Program Files/Anaconda3/python.exe"
 export PYTHON_LIB_PATH="C:/Program Files/Anaconda3/lib/site-packages"
 
-# Set Python path for cc_configure.bzl
-export BAZEL_PYTHON="C:/Program Files/Anaconda3/python.exe"
-
-# Set Visual Studio path
-export BAZEL_VS="C:/Program Files (x86)/Microsoft Visual Studio 14.0"
-
 # Add python into PATH, it's needed because gen_git_source.py uses
 # '/usr/bin/env python' as a shebang
 export PATH="/c/Program Files/Anaconda3:$PATH"
@@ -53,13 +47,3 @@ export PATH="/c/Program Files/Anaconda3/Scripts:$PATH"
 export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin:$PATH"
 export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/extras/CUPTI/libx64:$PATH"
 export PATH="/c/tools/cuda/bin:$PATH"
-
-# Set the common build options on Windows
-export BUILD_OPTS='--config=monolithic --copt=-w --host_copt=-w --verbose_failures --experimental_ui'
-
-# Build TF with wrapper-less CROSSTOOL
-# TODO(pcloudy): Remove this after wrapper-less CROSSTOOL becomes default
-export NO_MSVC_WRAPPER=1
-
-export USE_DYNAMIC_CRT=1
-
diff --git a/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh b/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
index 8c419347d6..748a961e44 100644
--- a/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
@@ -42,8 +42,6 @@ source "tensorflow/tools/ci_build/windows/bazel/common_env.sh" \
 source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
   || { echo "Failed to source bazel_test_lib.sh" >&2; exit 1; }
 
-clean_output_base
-
 run_configure_for_cpu_build
 
 # Compliling the following test is extremely slow with -c opt
@@ -54,5 +52,5 @@ passing_tests=$(bazel query "kind(cc_test, //tensorflow/cc/... + //tensorflow/co
   # We need to strip \r so that the result could be store into a variable under MSYS
   tr '\r' ' ')
 
-bazel test $BUILD_OPTS -k $slow_compiling_test --test_output=errors
-bazel test -c opt $BUILD_OPTS -k $passing_tests --test_output=errors
+bazel test -k $slow_compiling_test --test_output=errors
+bazel test -c opt -k $passing_tests --test_output=errors
diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 8520ca898f..31b4226a30 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -44,9 +44,7 @@ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
 
 run_configure_for_cpu_build
 
-clean_output_base
-
-bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $?
+bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $?
 
 # Create a python test directory to avoid package name conflict
 PY_TEST_DIR="py_test_dir"
@@ -60,11 +58,8 @@ reinstall_tensorflow_pip ${PIP_NAME}
 
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
-# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after
-# https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
-bazel test -c opt $BUILD_OPTS -k --test_output=errors \
+bazel test -c opt -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_oss \
   --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \
-  --test_env=TF_SAVER_LENIENT_NAMES=True \
   //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh b/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
index 3fd960deab..f26f8727e5 100644
--- a/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
@@ -56,5 +56,5 @@ passing_tests=$(bazel query "kind(cc_test, //tensorflow/cc/... + //tensorflow/co
 
 # TODO(pcloudy): There is a bug in Bazel preventing build with GPU support without -c opt
 # Re-enable this test after it is fixed.
-# bazel test --config=win-cuda $BUILD_OPTS -k $slow_compiling_test --test_output=errors
-bazel test -c opt --config=win-cuda $BUILD_OPTS -k $passing_tests --test_output=errors
+# bazel test --config=win-cuda -k $slow_compiling_test --test_output=errors
+bazel test -c opt --config=win-cuda -k $passing_tests --test_output=errors
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 47ca42d642..922bb67bbf 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -44,9 +44,7 @@ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
 
 run_configure_for_gpu_build
 
-clean_output_base
-
-bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $?
+bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $?
 
 # Create a python test directory to avoid package name conflict
 PY_TEST_DIR="py_test_dir"
@@ -61,11 +59,8 @@ reinstall_tensorflow_pip ${PIP_NAME}
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
 # GPU tests are very flaky when running concurrently, so set local_test_jobs=1
-# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after
-# https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
-bazel test -c opt $BUILD_OPTS -k --test_output=errors \
+bazel test -c opt -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
   --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
-  --test_env=TF_SAVER_LENIENT_NAMES=True \
   --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
index 9ac3613f27..80f2b590c9 100755
--- a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
@@ -44,13 +44,12 @@ export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:clic
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/java:libtensorflow_jni.so"
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:jnilicenses_generate"
 
-clean_output_base
 run_configure_for_cpu_build
 
 # build_libtensorflow_tarball in ../builds/libtensorflow.sh
 # cannot be used on Windows since it relies on pkg_tar rules.
 # So we do something special here
-bazel build -c opt ${BUILD_OPTS} \
+bazel build -c opt \
   tensorflow:libtensorflow.so \
   tensorflow/tools/lib_package:clicenses_generate \
   tensorflow/java:libtensorflow_jni.so \
diff --git a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
index a94a627dfb..88333de856 100755
--- a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
+++ b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh
@@ -28,6 +28,8 @@ echo ""
 export PYTHON_BIN_PATH=`which python3`
 
 export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=8.0
+export TF_CUDNN_VERSION=6
 export TF_CUDA_COMPUTE_CAPABILITIES=3.7
 
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 3525c7524f..0a6860e791 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -69,11 +69,8 @@ RUN mkdir /bazel && \
     rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Download and build TensorFlow.
-
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
-    cd tensorflow && \
-    git checkout r1.4
 WORKDIR /tensorflow
+RUN git clone --branch=r1.4 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # TODO(craigcitro): Don't install the pip package, since it makes it
 # more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 041f45971b..4164cc3f88 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -1,11 +1,20 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
+FROM nvidia/cuda:9.0-base-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
+        cuda-command-line-tools-9-0 \
+        cuda-cublas-dev-9-0 \
+        cuda-cudart-dev-9-0 \
+        cuda-cufft-dev-9-0 \
+        cuda-curand-dev-9-0 \
+        cuda-cusolver-dev-9-0 \
+        cuda-cusparse-dev-9-0 \
         curl \
         git \
+        libcudnn7=7.0.5.15-1+cuda9.0 \
+        libcudnn7-dev=7.0.5.15-1+cuda9.0 \
         libcurl3-dev \
         libfreetype6-dev \
         libpng12-dev \
@@ -17,12 +26,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         unzip \
         zip \
         zlib1g-dev \
-        openjdk-8-jdk \
-        openjdk-8-jre-headless \
         wget \
         && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+    rm -rf /var/lib/apt/lists/* && \
+    find /usr/local/cuda-9.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
 
 RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
     python get-pip.py && \
@@ -70,18 +78,16 @@ RUN mkdir /bazel && \
     rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Download and build TensorFlow.
-
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
-    cd tensorflow && \
-    git checkout r1.4
 WORKDIR /tensorflow
+RUN git clone --branch=r1.4 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1
-
+ENV TF_CUDA_VERSION=9.0
+ENV TF_CUDNN_VERSION=7
 
 RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
     LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index e212d10290..b6682cd681 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04
+FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh
index 80a07b9b3b..e7de7df856 100755
--- a/tensorflow/tools/docker/parameterized_docker_build.sh
+++ b/tensorflow/tools/docker/parameterized_docker_build.sh
@@ -265,7 +265,7 @@ else
   DOCKERFILE="${TMP_DIR}/Dockerfile"
 
   # Modify the devel Dockerfile to specify the git branch
-  sed -r "s/([\s]*git checkout )(.*)/\1${TF_DOCKER_BUILD_DEVEL_BRANCH}/g" \
+  sed "s/^RUN git clone --branch=.* --depth=1/RUN git clone --branch=${TF_DOCKER_BUILD_DEVEL_BRANCH} --depth=1/" \
       "${ORIG_DOCKERFILE}" > "${DOCKERFILE}"
 
   # Modify python/pip version if necessary.
diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py
index f950f19a7c..003f972070 100644
--- a/tensorflow/tools/docs/generate_lib.py
+++ b/tensorflow/tools/docs/generate_lib.py
@@ -199,12 +199,12 @@ def add_dict_to_dict(add_from, add_to):
       add_to[key] = add_from[key]
 
 
-# Exclude some libaries in contrib from the documentation altogether.
+# Exclude some libraries in contrib from the documentation altogether.
 def _get_default_private_map():
   return {'tf.test': ['mock']}
 
 
-# Exclude members of some libaries.
+# Exclude members of some libraries.
 def _get_default_do_not_descend_map():
   # TODO(wicke): Shrink this list once the modules get sealed.
   return {
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 33af4532c8..d80d5ecc6a 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -6,6 +6,7 @@ package(default_visibility = ["//visibility:private"])
 load(
     "//tensorflow:tensorflow.bzl",
     "if_not_windows",
+    "if_windows",
     "transitive_hdrs",
 )
 load("//third_party/mkl:build_defs.bzl", "if_mkl")
@@ -194,3 +195,23 @@ sh_binary(
         ],
     }) + if_mkl(["//third_party/mkl:intel_binary_blob"]),
 )
+
+# A genrule for generating a marker file for the pip package on Windows
+#
+# This only works on Windows, because :simple_console_for_windows is a
+# python zip file containing everything we need for building the pip package.
+# However, on other platforms, due to https://github.com/bazelbuild/bazel/issues/4223,
+# when C++ extensions change, this generule doesn't rebuild.
+genrule(
+    name = "win_pip_package_marker",
+    srcs = if_windows([
+        ":build_pip_package",
+        ":simple_console_for_windows",
+    ]),
+    outs = ["win_pip_package_marker_file"],
+    cmd = select({
+        "//conditions:default": "touch $@",
+        "//tensorflow:windows": "md5sum $(locations :build_pip_package) $(locations :simple_console_for_windows) > $@",
+    }),
+    visibility = ["//visibility:public"],
+)
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index 8249703ba7..f5203bc544 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -24,7 +24,7 @@ function real_path() {
 function cp_external() {
   local src_dir=$1
   local dest_dir=$2
-  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*'`; do
+  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*org_tensorflow*'`; do
     cp -R "$f" "$dest_dir"
   done
 }
@@ -92,7 +92,6 @@ function main() {
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles/org_tensorflow/tensorflow \
       "${TMPDIR}"
     mkdir "${TMPDIR}/external"
-    # Note: this makes an extra copy of org_tensorflow.
     cp_external \
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles \
       "${TMPDIR}/external"
@@ -123,7 +122,6 @@ function main() {
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/tensorflow \
         "${TMPDIR}"
       mkdir "${TMPDIR}/external"
-      # Note: this makes an extra copy of org_tensorflow.
       cp_external \
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles \
         "${TMPDIR}/external"
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index cc46dd5162..22e1584b78 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -42,6 +42,7 @@ BLACKLIST = [
     "//tensorflow/python:extra_py_tests_deps",
     "//tensorflow/cc/saved_model:saved_model_half_plus_two",
     "//tensorflow:no_tensorflow_py_deps",
+    "//tensorflow/tools/pip_package:win_pip_package_marker",
     "//tensorflow/python:test_ops_2",
     "//tensorflow/python:tf_optimizer",
     "//tensorflow/python:compare_test_proto_py",
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions.cc b/tensorflow/tools/proto_text/gen_proto_text_functions.cc
index ecb29a65a0..f0bb59acf8 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions.cc
@@ -132,6 +132,7 @@ int MainImpl(int argc, char** argv) {
       FILE* f = fopen(path.c_str(), "w");
       if (f == nullptr) return -1;
       if (fwrite(data.c_str(), 1, data.size(), f) != data.size()) {
+        fclose(f);
         return -1;
       }
       if (fclose(f) != 0) {
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b71f5dc4e5..046c2b2391 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -74,11 +74,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "mkl_dnn",
       urls = [
-          "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
-          "https://github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
+          "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/aab753280e83137ba955f8f19d72cb6aaba545ef.tar.gz",
+          "https://github.com/01org/mkl-dnn/archive/aab753280e83137ba955f8f19d72cb6aaba545ef.tar.gz",
       ],
-      sha256 = "0d529ad4c49dc799e6df07c2b88b115d0668735da15fb3b3862d28d33fa68165",
-      strip_prefix = "mkl-dnn-b01e3a55a07be62172e713bcd2644c5176360212",
+      sha256 = "fb67f255a96bd4ad39b8dd104eca5aa92200c95c1ed36e59641e6c0478eefd11",
+      strip_prefix = "mkl-dnn-aab753280e83137ba955f8f19d72cb6aaba545ef",
       build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")),
   )
 
@@ -95,11 +95,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "eigen_archive",
       urls = [
-          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
-          "https://bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
+          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
+          "https://bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
       ],
-      sha256 = "61d8b6fc4279dd1dda986fb1677d15e3d641c07a3ea5abe255790b1f0c0c14e9",
-      strip_prefix = "eigen-eigen-429aa5254200",
+      sha256 = "0840c497f2749b5e90bda666aab96be6da90dc75b4e21ca9843cae69b7fed52a",
+      strip_prefix = "eigen-eigen-b6e6d0cf6a77",
       build_file = str(Label("//third_party:eigen.BUILD")),
   )
 
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index e311c7e758..4def6f9489 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -10,6 +10,7 @@ CURL_WIN_COPTS = [
     "/DHAVE_CONFIG_H",
     "/DCURL_DISABLE_FTP",
     "/DCURL_DISABLE_NTLM",
+    "/DCURL_DISABLE_PROXY",
     "/DHAVE_LIBZ",
     "/DHAVE_ZLIB_H",
     # Defining _USING_V110_SDK71_ is hackery to defeat curl's incorrect
@@ -23,6 +24,8 @@ CURL_WIN_SRCS = [
     "lib/asyn-thread.c",
     "lib/inet_ntop.c",
     "lib/system_win32.c",
+    "lib/vtls/schannel.c",
+    "lib/idn_win32.c",
 ]
 
 cc_library(
@@ -276,6 +279,7 @@ cc_library(
             "-DCURL_MAX_WRITE_SIZE=65536",
         ],
     }),
+    defines = ["CURL_STATICLIB"],
     includes = ["include"],
     linkopts = select({
         "@org_tensorflow//tensorflow:android": [
@@ -289,10 +293,16 @@ cc_library(
         ],
         "@org_tensorflow//tensorflow:ios": [],
         "@org_tensorflow//tensorflow:windows": [
-            "-Wl,ws2_32.lib",
+            "-DEFAULTLIB:ws2_32.lib",
+            "-DEFAULTLIB:advapi32.lib",
+            "-DEFAULTLIB:crypt32.lib",
+            "-DEFAULTLIB:Normaliz.lib",
         ],
         "@org_tensorflow//tensorflow:windows_msvc": [
-            "-Wl,ws2_32.lib",
+            "-DEFAULTLIB:ws2_32.lib",
+            "-DEFAULTLIB:advapi32.lib",
+            "-DEFAULTLIB:crypt32.lib",
+            "-DEFAULTLIB:Normaliz.lib",
         ],
         "//conditions:default": [
             "-lrt",
@@ -438,12 +448,22 @@ genrule(
         "#  include \"lib/config-win32.h\"",
         "#  define BUILDING_LIBCURL 1",
         "#  define CURL_DISABLE_CRYPTO_AUTH 1",
+        "#  define CURL_DISABLE_DICT 1",
+        "#  define CURL_DISABLE_FILE 1",
+        "#  define CURL_DISABLE_GOPHER 1",
         "#  define CURL_DISABLE_IMAP 1",
         "#  define CURL_DISABLE_LDAP 1",
         "#  define CURL_DISABLE_LDAPS 1",
         "#  define CURL_DISABLE_POP3 1",
         "#  define CURL_PULL_WS2TCPIP_H 1",
-        "#  define HTTP_ONLY 1",
+        "#  define CURL_DISABLE_SMTP 1",
+        "#  define CURL_DISABLE_TELNET 1",
+        "#  define CURL_DISABLE_TFTP 1",
+        "#  define CURL_PULL_WS2TCPIP_H 1",
+        "#  define USE_WINDOWS_SSPI 1",
+        "#  define USE_WIN32_IDN 1",
+        "#  define USE_SCHANNEL 1",
+        "#  define WANT_IDN_PROTOTYPES 1",
         "#elif defined(__APPLE__)",
         "#  define HAVE_FSETXATTR_6 1",
         "#  define HAVE_SETMODE 1",
diff --git a/third_party/pcre.BUILD b/third_party/pcre.BUILD
index 68aadd1d40..e2cdec4029 100644
--- a/third_party/pcre.BUILD
+++ b/third_party/pcre.BUILD
@@ -50,12 +50,12 @@ cc_library(
         "-DNEWLINE=10",
         "-DNO_RECURSE",
         "-DPARENS_NEST_LIMIT=50",
-        "-DPCRE_STATIC=1",
         "-DPOSIX_MALLOC_THRESHOLD=10",
         "-DSTDC_HEADERS=1",
         "-DSUPPORT_UCP",
         "-DSUPPORT_UTF",
     ],
+    defines = ["PCRE_STATIC=1"],
     includes = ["."],
     visibility = ["@swig//:__pkg__"],  # Please use RE2
     alwayslink = 1,
-- 
GitLab


From 7aa64c7ccbb0e09b5ff196109c15eb63bdc185b4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 18:22:06 -0800
Subject: [PATCH 1116/1225]    [tpu:profiler] Remove unused fields in
 ProfileResponse.

PiperOrigin-RevId: 179264385
---
 tensorflow/contrib/tpu/profiler/tpu_profiler.proto | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
index 9c3fd45fd1..bf30d2ce09 100644
--- a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
+++ b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
@@ -40,7 +40,7 @@ message ProfileToolData {
 }
 
 message ProfileResponse {
-  uint64 xprof_response_size = 1;  // Placeholder: return something meaningful.
+  reserved 1;  // was uint64 placeholder for returning something meaningful.
   // Graphs of programs executed on TPUs during the profiling period.
   repeated GraphDef computation_graph = 2;
 
-- 
GitLab


From f2996ec7a1e0e9f4d07637889475681a7432375c Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Fri, 15 Dec 2017 18:22:15 -0800
Subject: [PATCH 1117/1225] [tf.contrib.rnn] Fix regression in variable
 creation in fused LSTM/GRU cells.

PiperOrigin-RevId: 179264398
---
 .../rnn/python/kernel_tests/lstm_ops_test.py  |  9 ++---
 tensorflow/contrib/rnn/python/ops/gru_ops.py  | 35 ++++++++++---------
 tensorflow/contrib/rnn/python/ops/lstm_ops.py | 27 +++++++-------
 3 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
index 4ada2118ba..7957edf68c 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
@@ -98,7 +98,8 @@ def blocks_match(sess, use_peephole):
           cell_clip=0)
 
     fused_cell = lstm_ops.LSTMBlockFusedCell(
-        cell_size, cell_clip=0, use_peephole=use_peephole, reuse=True)
+        cell_size, cell_clip=0, use_peephole=use_peephole, reuse=True,
+        name="rnn/lstm_cell")
     fused_outputs_op, fused_state_op = fused_cell(
         stacked_inputs, dtype=dtypes.float32)
 
@@ -368,8 +369,7 @@ class LSTMBlockCellTest(test.TestCase):
       initializer = init_ops.random_uniform_initializer(
           -0.01, 0.01, seed=19890213)
 
-      with variable_scope.variable_scope(
-          "rnn/lstm_cell", initializer=initializer):
+      with variable_scope.variable_scope("lstm_cell", initializer=initializer):
         # magic naming so that the cells pick up these variables and reuse them
         variable_scope.get_variable(
             "kernel",
@@ -383,7 +383,8 @@ class LSTMBlockCellTest(test.TestCase):
             initializer=init_ops.zeros_initializer())
 
       cell = lstm_ops.LSTMBlockFusedCell(
-          cell_size, cell_clip=0, use_peephole=False, reuse=True)
+          cell_size, cell_clip=0, use_peephole=False, reuse=True,
+          name="lstm_cell")
 
       fused_outputs_op, fused_state_op = cell(
           cell_inputs, dtype=dtypes.float32, sequence_length=seq_lengths)
diff --git a/tensorflow/contrib/rnn/python/ops/gru_ops.py b/tensorflow/contrib/rnn/python/ops/gru_ops.py
index 8e4b60451e..4c964ec201 100644
--- a/tensorflow/contrib/rnn/python/ops/gru_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/gru_ops.py
@@ -26,7 +26,6 @@ from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import rnn_cell_impl
-from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.platform import resource_loader
 from tensorflow.python.util.deprecation import deprecated_args
 
@@ -181,17 +180,19 @@ class GRUBlockCell(LayerRNNCell):
     if input_size is None:
       raise ValueError("Expecting input_size to be set.")
 
-    self._gate_kernel = vs.get_variable(
+    self._gate_kernel = self.add_variable(
         "w_ru", [input_size + self._cell_size, self._cell_size * 2])
-    self._gate_bias = vs.get_variable(
+    self._gate_bias = self.add_variable(
         "b_ru", [self._cell_size * 2],
         initializer=init_ops.constant_initializer(1.0))
-    self._candidate_kernel = vs.get_variable(
+    self._candidate_kernel = self.add_variable(
         "w_c", [input_size + self._cell_size, self._cell_size])
-    self._candidate_bias = vs.get_variable(
+    self._candidate_bias = self.add_variable(
         "b_c", [self._cell_size],
         initializer=init_ops.constant_initializer(0.0))
 
+    self.built = True
+
   def call(self, inputs, h_prev):
     """GRU cell."""
     # Check cell_size == state_size from h_prev.
@@ -224,15 +225,15 @@ class GRUBlockCellV2(GRUBlockCell):
     if input_size is None:
       raise ValueError("Expecting input_size to be set.")
 
-    with vs.variable_scope("gates"):
-      self._gate_kernel = vs.get_variable(
-          "kernel", [input_size + self._cell_size, self._cell_size * 2])
-      self._gate_bias = vs.get_variable(
-          "bias", [self._cell_size * 2],
-          initializer=init_ops.constant_initializer(1.0))
-    with vs.variable_scope("candidate"):
-      self._candidate_kernel = vs.get_variable(
-          "kernel", [input_size + self._cell_size, self._cell_size])
-      self._candidate_bias = vs.get_variable(
-          "bias", [self._cell_size],
-          initializer=init_ops.constant_initializer(0.0))
+    self._gate_kernel = self.add_variable(
+        "gates/kernel", [input_size + self._cell_size, self._cell_size * 2])
+    self._gate_bias = self.add_variable(
+        "gates/bias", [self._cell_size * 2],
+        initializer=init_ops.constant_initializer(1.0))
+    self._candidate_kernel = self.add_variable(
+        "candidate/kernel", [input_size + self._cell_size, self._cell_size])
+    self._candidate_bias = self.add_variable(
+        "candidate/bias", [self._cell_size],
+        initializer=init_ops.constant_initializer(0.0))
+
+    self.built = True
diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index 9217617e77..04f342cd18 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -29,7 +29,6 @@ from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import rnn_cell_impl
-from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.platform import resource_loader
 
 _lstm_ops_so = loader.load_op_library(
@@ -396,15 +395,17 @@ class LSTMBlockCell(LayerRNNCell):
       raise ValueError(
           "Expecting inputs_shape[1] to be set: %s" % str(inputs_shape))
     input_size = inputs_shape[1].value
-    self._kernel = vs.get_variable(
+    self._kernel = self.add_variable(
         self._names["W"], [input_size + self._num_units, self._num_units * 4])
-    self._bias = vs.get_variable(
+    self._bias = self.add_variable(
         self._names["b"], [self._num_units * 4],
         initializer=init_ops.constant_initializer(0.0))
     if self._use_peephole:
-      self._w_i_diag = vs.get_variable(self._names["wci"], [self._num_units])
-      self._w_f_diag = vs.get_variable(self._names["wcf"], [self._num_units])
-      self._w_o_diag = vs.get_variable(self._names["wco"], [self._num_units])
+      self._w_i_diag = self.add_variable(self._names["wci"], [self._num_units])
+      self._w_f_diag = self.add_variable(self._names["wcf"], [self._num_units])
+      self._w_o_diag = self.add_variable(self._names["wco"], [self._num_units])
+
+    self.built = True
 
   def call(self, inputs, state):
     """Long short-term memory cell (LSTM)."""
@@ -597,7 +598,7 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
                cell_clip=None,
                use_peephole=False,
                reuse=None,
-               name="rnn/lstm_cell"):
+               name="lstm_fused_cell"):
     """Initialize the LSTM cell.
 
     Args:
@@ -629,15 +630,17 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
 
   def build(self, input_shape):
     input_size = input_shape[2].value
-    self._kernel = vs.get_variable(
+    self._kernel = self.add_variable(
         "kernel", [input_size + self._num_units, self._num_units * 4])
-    self._bias = vs.get_variable(
+    self._bias = self.add_variable(
         "bias", [self._num_units * 4],
         initializer=init_ops.constant_initializer(0.0))
     if self._use_peephole:
-      self._w_i_diag = vs.get_variable("w_i_diag", [self._num_units])
-      self._w_f_diag = vs.get_variable("w_f_diag", [self._num_units])
-      self._w_o_diag = vs.get_variable("w_o_diag", [self._num_units])
+      self._w_i_diag = self.add_variable("w_i_diag", [self._num_units])
+      self._w_f_diag = self.add_variable("w_f_diag", [self._num_units])
+      self._w_o_diag = self.add_variable("w_o_diag", [self._num_units])
+
+    self.built = True
 
   def _call_cell(self,
                  inputs,
-- 
GitLab


From 71d3c8e636efcf0bc3f87ed9888cca71fde8cc8e Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 16 Dec 2017 17:02:58 -0600
Subject: [PATCH 1118/1225] Add customized kernel implementation for
 clip_by_value (#13998)

This fix tries to address the issue raised in 7225 where
`tf.clip_by_value` does not have a custom kernel and reused
`tf.maximum` and `tf.mimimum`. In case scalar values are passed
to `tf.clip_by_value`, unnecessary memory might incur.
---
 .../base_api/api_def_ClipByValue.pbtxt        |  36 +++
 tensorflow/core/kernels/cwise_op_clip.cc      | 224 ++++++++++++++++++
 tensorflow/core/kernels/cwise_op_clip.h       |  61 +++++
 .../core/kernels/cwise_op_clip_gpu.cu.cc      | 134 +++++++++++
 tensorflow/core/ops/math_ops.cc               |  23 ++
 .../python/kernel_tests/clip_ops_test.py      | 132 +++++++++--
 tensorflow/python/ops/clip_ops.py             |  44 +++-
 tensorflow/python/ops/hidden_ops.txt          |   1 +
 8 files changed, 618 insertions(+), 37 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt
 create mode 100644 tensorflow/core/kernels/cwise_op_clip.cc
 create mode 100644 tensorflow/core/kernels/cwise_op_clip.h
 create mode 100644 tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc

diff --git a/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt b/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt
new file mode 100644
index 0000000000..803d8970ab
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt
@@ -0,0 +1,36 @@
+op {
+  graph_op_name: "ClipByValue"
+  in_arg {
+    name: "t"
+    description: <<END
+A `Tensor`.
+END
+  }
+  in_arg {
+    name: "clip_value_min"
+    description: <<END
+A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
+as `t`. The minimum value to clip by.
+END
+  }
+  in_arg {
+    name: "clip_value_max"
+    description: <<END
+A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
+as `t`. The maximum value to clip by.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+A clipped `Tensor` with the same shape as input 't'.
+END
+  }
+  summary: "Clips tensor values to a specified min and max."
+  description: <<END
+Given a tensor `t`, this operation returns a tensor of the same type and
+shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
+Any values less than `clip_value_min` are set to `clip_value_min`. Any values
+greater than `clip_value_max` are set to `clip_value_max`.
+END
+}
diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc
new file mode 100644
index 0000000000..bd22f5777c
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_clip.cc
@@ -0,0 +1,224 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/cwise_op_clip.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+// Basic coefficient-wise tenary operations.
+// This is the case for example of the clip_by_value.
+//   Device: E.g., CPUDevice, GPUDevice.
+//   Functor: defined above. E.g., functor::clip.
+template <typename Device, typename T>
+class ClipOp : public OpKernel {
+ public:
+  explicit ClipOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& in0 = ctx->input(0);
+    const Tensor& in1 = ctx->input(1);
+    const Tensor& in2 = ctx->input(2);
+
+    auto in0_flat = in0.flat<T>();
+    auto in1_flat = in1.flat<T>();
+    auto in2_flat = in2.flat<T>();
+    const Device& d = ctx->eigen_device<Device>();
+
+    Tensor* out = nullptr;
+    OP_REQUIRES_OK(
+        ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out));
+    auto out_flat = out->flat<T>();
+    if (in1.shape() == in2.shape()) {
+      if (in0.shape() == in1.shape()) {
+        functor::TernaryClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
+                                            out_flat);
+      } else {
+        OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()),
+                    errors::InvalidArgument(
+                        "clip_value_min and clip_value_max must be either of "
+                        "the same shape as input, or a scalar. ",
+                        "input shape: ", in0.shape().DebugString(),
+                        "clip_value_min shape: ", in1.shape().DebugString(),
+                        "clip_value_max shape: ", in2.shape().DebugString()));
+        functor::UnaryClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
+                                          out_flat);
+      }
+    } else {
+      if (in0.shape() == in1.shape()) {
+        OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in2.shape()),
+                    errors::InvalidArgument(
+                        "clip_value_min and clip_value_max must be either of "
+                        "the same shape as input, or a scalar. ",
+                        "input shape: ", in0.shape().DebugString(),
+                        "clip_value_min shape: ", in1.shape().DebugString(),
+                        "clip_value_max shape: ", in2.shape().DebugString()));
+        functor::BinaryLeftClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
+                                               out_flat);
+      } else {
+        OP_REQUIRES(ctx, (in0.shape() == in2.shape() &&
+                          TensorShapeUtils::IsScalar(in1.shape())),
+                    errors::InvalidArgument(
+                        "clip_value_min and clip_value_max must be either of "
+                        "the same shape as input, or a scalar. ",
+                        "input shape: ", in0.shape().DebugString(),
+                        "clip_value_min shape: ", in1.shape().DebugString(),
+                        "clip_value_max shape: ", in2.shape().DebugString()));
+        functor::BinaryRightClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
+                                                out_flat);
+      }
+    }
+  }
+};
+
+namespace functor {
+// Unary functor for clip [Tensor, Scalar, Scalar]
+template <typename T>
+struct UnaryClipFunc {
+  UnaryClipFunc(const T& value_min, const T& value_max)
+      : value_min_(value_min), value_max_(value_max) {}
+  const T operator()(const T& value) const {
+    return std::max(std::min(value, value_max_), value_min_);
+  }
+  T value_min_;
+  T value_max_;
+};
+template <typename T>
+struct UnaryClipOp<CPUDevice, T> {
+  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat& in0_flat,
+                  typename TTypes<T>::ConstFlat& in1_flat,
+                  typename TTypes<T>::ConstFlat& in2_flat,
+                  typename TTypes<T>::Flat& out_flat) const {
+    out_flat = in0_flat.unaryExpr(UnaryClipFunc<T>(in1_flat(0), in2_flat(0)));
+  }
+};
+
+// Binary functor for clip [Tensor, Scalar, Tensor]
+template <typename T>
+struct BinaryRightClipFunc {
+  BinaryRightClipFunc(const T& value_min) : value_min_(value_min) {}
+  const T operator()(const T& value, const T& value_max) const {
+    return std::max(std::min(value, value_max), value_min_);
+  }
+  T value_min_;
+};
+template <typename T>
+struct BinaryRightClipOp<CPUDevice, T> {
+  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat& in0_flat,
+                  typename TTypes<T>::ConstFlat& in1_flat,
+                  typename TTypes<T>::ConstFlat& in2_flat,
+                  typename TTypes<T>::Flat& out_flat) const {
+    out_flat =
+        in0_flat.binaryExpr(in2_flat, BinaryRightClipFunc<T>(in1_flat(0)));
+  }
+};
+
+// Binary functor for clip [Tensor, Tensor, Scalar]
+template <typename T>
+struct BinaryLeftClipFunc {
+  BinaryLeftClipFunc(const T& value_max) : value_max_(value_max) {}
+  const T operator()(const T& value, const T& value_min) const {
+    return std::max(std::min(value, value_max_), value_min);
+  }
+  T value_max_;
+};
+template <typename T>
+struct BinaryLeftClipOp<CPUDevice, T> {
+  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat& in0_flat,
+                  typename TTypes<T>::ConstFlat& in1_flat,
+                  typename TTypes<T>::ConstFlat& in2_flat,
+                  typename TTypes<T>::Flat& out_flat) const {
+    out_flat =
+        in0_flat.binaryExpr(in1_flat, BinaryLeftClipFunc<T>(in2_flat(0)));
+  }
+};
+
+// Ternary functor for clip [Tensor, Tensor, Tensor]
+template <typename T>
+struct TernaryClipOp<CPUDevice, T> {
+  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat& in0_flat,
+                  typename TTypes<T>::ConstFlat& in1_flat,
+                  typename TTypes<T>::ConstFlat& in2_flat,
+                  typename TTypes<T>::Flat& out_flat) const {
+    out_flat.device(d) = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat);
+  }
+};
+
+#define INSTANTIATE_CPU(T)                         \
+  template struct UnaryClipOp<CPUDevice, T>;       \
+  template struct BinaryRightClipOp<CPUDevice, T>; \
+  template struct BinaryLeftClipOp<CPUDevice, T>;  \
+  template struct TernaryClipOp<CPUDevice, T>;
+INSTANTIATE_CPU(Eigen::half);
+INSTANTIATE_CPU(float);
+INSTANTIATE_CPU(double);
+INSTANTIATE_CPU(int8);
+INSTANTIATE_CPU(int16);
+INSTANTIATE_CPU(int32);
+INSTANTIATE_CPU(int64);
+INSTANTIATE_CPU(uint8);
+INSTANTIATE_CPU(uint16);
+#undef INSTANTIATE_CPU
+}  // namespace functor
+
+#define REGISTER_CPU_KERNEL(type)                                       \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("ClipByValue").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
+      ClipOp<CPUDevice, type>);
+
+REGISTER_CPU_KERNEL(Eigen::half);
+REGISTER_CPU_KERNEL(float);
+REGISTER_CPU_KERNEL(double);
+REGISTER_CPU_KERNEL(int8);
+REGISTER_CPU_KERNEL(int16);
+REGISTER_CPU_KERNEL(int32);
+REGISTER_CPU_KERNEL(int64);
+REGISTER_CPU_KERNEL(uint8);
+REGISTER_CPU_KERNEL(uint16);
+#undef REGISTER_CPU_KERNEL
+
+#if GOOGLE_CUDA
+
+#define REGISTER_GPU_KERNEL(type)                                       \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("ClipByValue").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
+      ClipOp<GPUDevice, type>);
+REGISTER_GPU_KERNEL(Eigen::half);
+REGISTER_GPU_KERNEL(float);
+REGISTER_GPU_KERNEL(double);
+REGISTER_GPU_KERNEL(int8);
+REGISTER_GPU_KERNEL(int16);
+REGISTER_GPU_KERNEL(int64);
+REGISTER_GPU_KERNEL(uint8);
+REGISTER_GPU_KERNEL(uint16);
+
+// A special GPU kernel for int32.
+// TODO(b/25387198): Also enable int32 in device memory. This kernel
+// registration requires all int32 inputs and outputs to be in host memory.
+REGISTER_KERNEL_BUILDER(Name("ClipByValue")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("t")
+                            .HostMemory("clip_value_min")
+                            .HostMemory("clip_value_max")
+                            .HostMemory("output")
+                            .TypeConstraint<int32>("T"),
+                        ClipOp<CPUDevice, int32>);
+
+#undef REGISTER_GPU_KERNEL
+#endif
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_clip.h b/tensorflow/core/kernels/cwise_op_clip.h
new file mode 100644
index 0000000000..1a4bf8cf1d
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_clip.h
@@ -0,0 +1,61 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_
+#define TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_
+
+#include "tensorflow/core/kernels/cwise_ops_common.h"
+
+namespace tensorflow {
+namespace functor {
+// Unary functor for clip [Tensor, Scalar, Scalar]
+template <typename Device, typename T>
+struct UnaryClipOp {
+  void operator()(const Device &d, typename TTypes<T>::ConstFlat &in0_flat,
+                  typename TTypes<T>::ConstFlat &in1_flat,
+                  typename TTypes<T>::ConstFlat &in2_flat,
+                  typename TTypes<T>::Flat &out_flat) const;
+};
+
+// Binary functor for clip [Tensor, Scalar, Tensor]
+template <typename Device, typename T>
+struct BinaryRightClipOp {
+  void operator()(const Device &d, typename TTypes<T>::ConstFlat &in0_flat,
+                  typename TTypes<T>::ConstFlat &in1_flat,
+                  typename TTypes<T>::ConstFlat &in2_flat,
+                  typename TTypes<T>::Flat &out_flat) const;
+};
+
+// Binary functor for clip [Tensor, Tensor, Scalar]
+template <typename Device, typename T>
+struct BinaryLeftClipOp {
+  void operator()(const Device &d, typename TTypes<T>::ConstFlat &in0_flat,
+                  typename TTypes<T>::ConstFlat &in1_flat,
+                  typename TTypes<T>::ConstFlat &in2_flat,
+                  typename TTypes<T>::Flat &out_flat) const;
+};
+
+// Ternary functor for clip [Tensor, Tensor, Tensor]
+template <typename Device, typename T>
+struct TernaryClipOp {
+  void operator()(const Device &d, typename TTypes<T>::ConstFlat &in0_flat,
+                  typename TTypes<T>::ConstFlat &in1_flat,
+                  typename TTypes<T>::ConstFlat &in2_flat,
+                  typename TTypes<T>::Flat &out_flat) const;
+};
+}
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_
diff --git a/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc b/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc
new file mode 100644
index 0000000000..5c07847548
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc
@@ -0,0 +1,134 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/kernels/cwise_op_clip.h"
+#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+
+template <typename T>
+__global__ void UnaryClipCustomKernel(const int32 size_in, const T *in0,
+                                      const T *in1, const T *in2, T *out) {
+  CUDA_1D_KERNEL_LOOP(i, size_in) {
+    T value = in2[0] < in0[i] ? in2[0] : in0[i];
+    out[i] = value < in1[0] ? in1[0] : value;
+  }
+}
+
+template <typename T>
+__global__ void BinaryRightClipCustomKernel(const int32 size_in, const T *in0,
+                                            const T *in1, const T *in2,
+                                            T *out) {
+  CUDA_1D_KERNEL_LOOP(i, size_in) {
+    T value = in2[i] < in0[i] ? in2[i] : in0[i];
+    out[i] = value < in1[0] ? in1[0] : value;
+  }
+}
+
+template <typename T>
+__global__ void BinaryLeftClipCustomKernel(const int32 size_in, const T *in0,
+                                           const T *in1, const T *in2, T *out) {
+  CUDA_1D_KERNEL_LOOP(i, size_in) {
+    T value = in2[0] < in0[i] ? in2[0] : in0[i];
+    out[i] = value < in1[i] ? in1[i] : value;
+  }
+}
+
+namespace functor {
+
+// Unary functor for clip [Tensor, Scalar, Scalar]
+template <typename T>
+struct UnaryClipOp<GPUDevice, T> {
+  void operator()(const GPUDevice &d, typename TTypes<T>::ConstFlat &in0_flat,
+                  typename TTypes<T>::ConstFlat &in1_flat,
+                  typename TTypes<T>::ConstFlat &in2_flat,
+                  typename TTypes<T>::Flat &out_flat) const {
+    CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d);
+
+    UnaryClipCustomKernel<
+        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+        in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(),
+        out_flat.data());
+  }
+};
+
+// Binary functor for clip [Tensor, Scalar, Tensor]
+template <typename T>
+struct BinaryRightClipOp<GPUDevice, T> {
+  void operator()(const GPUDevice &d, typename TTypes<T>::ConstFlat &in0_flat,
+                  typename TTypes<T>::ConstFlat &in1_flat,
+                  typename TTypes<T>::ConstFlat &in2_flat,
+                  typename TTypes<T>::Flat &out_flat) const {
+    CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d);
+
+    BinaryRightClipCustomKernel<
+        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+        in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(),
+        out_flat.data());
+  }
+};
+
+// Binary functor for clip [Tensor, Tensor, Scalar]
+template <typename T>
+struct BinaryLeftClipOp<GPUDevice, T> {
+  void operator()(const GPUDevice &d, typename TTypes<T>::ConstFlat &in0_flat,
+                  typename TTypes<T>::ConstFlat &in1_flat,
+                  typename TTypes<T>::ConstFlat &in2_flat,
+                  typename TTypes<T>::Flat &out_flat) const {
+    CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d);
+
+    BinaryLeftClipCustomKernel<
+        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+        in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(),
+        out_flat.data());
+  }
+};
+
+// Ternary functor for clip [Tensor, Tensor, Tensor]
+template <typename T>
+struct TernaryClipOp<GPUDevice, T> {
+  void operator()(const GPUDevice &d, typename TTypes<T>::ConstFlat &in0_flat,
+                  typename TTypes<T>::ConstFlat &in1_flat,
+                  typename TTypes<T>::ConstFlat &in2_flat,
+                  typename TTypes<T>::Flat &out_flat) const {
+    out_flat.device(d) = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat);
+  }
+};
+
+#define INSTANTIATE_GPU(T)                         \
+  template struct UnaryClipOp<GPUDevice, T>;       \
+  template struct BinaryRightClipOp<GPUDevice, T>; \
+  template struct BinaryLeftClipOp<GPUDevice, T>;  \
+  template struct TernaryClipOp<GPUDevice, T>;
+INSTANTIATE_GPU(Eigen::half);
+INSTANTIATE_GPU(float);
+INSTANTIATE_GPU(double);
+INSTANTIATE_GPU(int8);
+INSTANTIATE_GPU(int16);
+INSTANTIATE_GPU(int32);
+INSTANTIATE_GPU(int64);
+INSTANTIATE_GPU(uint8);
+INSTANTIATE_GPU(uint16);
+#undef INSTANTIATE_GPU
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 8ea170ba14..e3add98f89 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -3031,6 +3031,29 @@ Equivalent to np.digitize.
 @end_compatibility
 )doc");
 
+REGISTER_OP("ClipByValue")
+    .Input("t: T")
+    .Input("clip_value_min: T")
+    .Input("clip_value_max: T")
+    .Output("output: T")
+    .Attr("T: numbertype")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Clips tensor values to a specified min and max.
+
+Given a tensor `t`, this operation returns a tensor of the same type and
+shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
+Any values less than `clip_value_min` are set to `clip_value_min`. Any values
+greater than `clip_value_max` are set to `clip_value_max`.
+
+t: A `Tensor`.
+clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
+  as `t`. The minimum value to clip by.
+clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
+  as `t`. The maximum value to clip by.
+output: A clipped `Tensor` with the same shape as input 't'.
+)doc");
+
 #ifdef INTEL_MKL
 REGISTER_OP("_MklAddN")
     .Input("inputs: N * T")
diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py
index 5c8b71da17..cb1359be15 100644
--- a/tensorflow/python/kernel_tests/clip_ops_test.py
+++ b/tensorflow/python/kernel_tests/clip_ops_test.py
@@ -19,16 +19,34 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import gradient_checker
 from tensorflow.python.platform import test
 
 
 class ClipTest(test.TestCase):
 
+  def testClipByValueGradient(self):
+    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32)
+    outputs_1 = clip_ops.clip_by_value(inputs, 0.5, 3.5)
+    min_val = constant_op.constant([0.5, 0.5, 0.5, 0.5], dtype=dtypes.float32)
+    max_val = constant_op.constant([3.5, 3.5, 3.5, 3.5], dtype=dtypes.float32)
+    outputs_2 = clip_ops.clip_by_value(inputs, min_val, max_val)
+    with self.test_session():
+      error_1 = gradient_checker.compute_gradient_error(inputs, [4],
+                                                        outputs_1, [4])
+      self.assertLess(error_1, 1e-4)
+
+      error_2 = gradient_checker.compute_gradient_error(inputs, [4],
+                                                        outputs_2, [4])
+      self.assertLess(error_2, 1e-4)
+
   # ClipByValue test
   def testClipByValue(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
       np_ans = [[-4.4, 2.0, 3.0], [4.0, 4.4, 4.4]]
       clip_value = 4.4
@@ -37,18 +55,84 @@ class ClipTest(test.TestCase):
 
     self.assertAllClose(np_ans, tf_ans)
 
+  # [Tensor, Scalar, Scalar]
+  def testClipByValue0Type(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64,
+                  dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
+                  dtypes.uint8, dtypes.uint16]:
+      with self.test_session(use_gpu=True):
+        x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
+        np_ans = [[2, 2, 3], [4, 4, 4]]
+        clip_value_min = 2
+        clip_value_max = 4
+        ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
+        tf_ans = ans.eval()
+
+      self.assertAllClose(np_ans, tf_ans)
+
+  # [Tensor, Tensor, Scalar]
+  def testClipByValue1Type(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64,
+                  dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
+                  dtypes.uint8, dtypes.uint16]:
+      with self.test_session(use_gpu=True):
+        x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
+        np_ans = [[2, 2, 3], [4, 4, 4]]
+        clip_value_min = constant_op.constant([2, 2, 2, 3, 3, 3], shape=[2, 3],
+                                              dtype=dtype)
+        clip_value_max = 4
+        ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
+        tf_ans = ans.eval()
+
+      self.assertAllClose(np_ans, tf_ans)
+
+  # [Tensor, Scalar, Tensor]
+  def testClipByValue2Type(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64,
+                  dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
+                  dtypes.uint8, dtypes.uint16]:
+      with self.test_session(use_gpu=True):
+        x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
+        np_ans = [[4, 4, 4], [4, 5, 6]]
+        clip_value_min = 4
+        clip_value_max = constant_op.constant([6, 6, 6, 6, 6, 6], shape=[2, 3],
+                                              dtype=dtype)
+        ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
+        tf_ans = ans.eval()
+
+      self.assertAllClose(np_ans, tf_ans)
+
+  # [Tensor, Tensor, Tensor]
+  def testClipByValue3Type(self):
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64,
+                  dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
+                  dtypes.uint8, dtypes.uint16]:
+      with self.test_session(use_gpu=True):
+        x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
+        np_ans = [[2, 2, 3], [5, 5, 6]]
+        clip_value_min = constant_op.constant([2, 2, 2, 5, 5, 5], shape=[2, 3],
+                                              dtype=dtype)
+        clip_value_max = constant_op.constant([5, 5, 5, 7, 7, 7], shape=[2, 3],
+                                              dtype=dtype)
+        ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
+        tf_ans = ans.eval()
+
+      self.assertAllClose(np_ans, tf_ans)
+
   def testClipByValueBadShape(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3, 1])
       # Use a nonsensical shape.
       clip = constant_op.constant([1.0, 2.0])
-      with self.assertRaises(ValueError):
-        _ = clip_ops.clip_by_value(x, -clip, clip)
-      with self.assertRaises(ValueError):
-        _ = clip_ops.clip_by_value(x, 1.0, clip)
+      with self.assertRaises(errors_impl.InvalidArgumentError):
+        ans = clip_ops.clip_by_value(x, -clip, clip)
+        tf_ans = ans.eval()
+      with self.assertRaises(errors_impl.InvalidArgumentError):
+        ans = clip_ops.clip_by_value(x, 1.0, clip)
+        tf_ans = ans.eval()
 
   def testClipByValueNonFinite(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([float('NaN'), float('Inf'), -float('Inf')])
       np_ans = [float('NaN'), 4.0, -4.0]
       clip_value = 4.0
@@ -60,7 +144,7 @@ class ClipTest(test.TestCase):
   # ClipByNorm tests
   def testClipByNormClipped(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Norm of x = sqrt(3^2 + 4^2) = 5
       np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]]
@@ -76,7 +160,7 @@ class ClipTest(test.TestCase):
     self.assertAllClose(np_ans, tf_ans_tensor)
 
   def testClipByNormBadShape(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3, 1])
       # Use a nonsensical shape.
       clip = constant_op.constant([1.0, 2.0])
@@ -85,7 +169,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormNotClipped(self):
     # No norm clipping when clip_norm >= 5
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Norm of x = sqrt(3^2 + 4^2) = 5
       np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]]
@@ -97,7 +181,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormZero(self):
     # No norm clipping when norm = 0
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
       # Norm = 0, no changes
       np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
@@ -109,7 +193,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormClippedWithDim0(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3])
       # Norm of x[:, 0] = sqrt(3^2 + 4^2) = 5, x[:, 2] = 3
       np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 3.0]]
@@ -121,7 +205,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormClippedWithDim1(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3])
       # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5
       np_ans = [[-3.0, 0.0, 0.0], [3.2, 0.0, 2.4]]
@@ -133,7 +217,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormNotClippedWithAxes(self):
     # No norm clipping when clip_norm >= 5
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3])
       # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5
       np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 3.0]]
@@ -146,7 +230,7 @@ class ClipTest(test.TestCase):
   # ClipByGlobalNorm tests
   def testClipByGlobalNormClipped(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -167,7 +251,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormClippedTensor(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -188,7 +272,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormSupportsNone(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -211,7 +295,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormWithIndexedSlicesClipped(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = ops.IndexedSlices(
           constant_op.constant([1.0, -2.0]), constant_op.constant([3, 4]))
@@ -244,7 +328,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormNotClipped(self):
     # No norm clipping when clip_norm >= 5
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -263,7 +347,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormZero(self):
     # No norm clipping when norm = 0
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x0 = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([0.0, 0.0])
       # Norm = 0, no changes
@@ -282,7 +366,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormClipped(self):
     # Norm clipping when average clip_norm < 0.83333333
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
       np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]]
@@ -294,7 +378,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormClippedTensor(self):
     # Norm clipping when average clip_norm < 0.83333333
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
       np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]]
@@ -306,7 +390,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormNotClipped(self):
     # No norm clipping when average clip_norm >= 0.83333333
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
       np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]]
@@ -318,7 +402,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormZero(self):
     # No norm clipping when average clip_norm = 0
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
       # Average norm = 0, no changes
       np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
index 80803530c1..b53134a5ba 100644
--- a/tensorflow/python/ops/clip_ops.py
+++ b/tensorflow/python/ops/clip_ops.py
@@ -26,10 +26,11 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 
-
 def clip_by_value(t, clip_value_min, clip_value_max,
                   name=None):
   """Clips tensor values to a specified min and max.
@@ -56,18 +57,35 @@ def clip_by_value(t, clip_value_min, clip_value_max,
   """
   with ops.name_scope(name, "clip_by_value",
                       [t, clip_value_min, clip_value_max]) as name:
-    t = ops.convert_to_tensor(t, name="t")
-
-    # Go through list of tensors, for each value in each tensor clip
-    t_min = math_ops.minimum(t, clip_value_max)
-    # Assert that the shape is compatible with the initial shape,
-    # to prevent unintentional broadcasting.
-    _ = t.shape.merge_with(t_min.shape)
-
-    t_max = math_ops.maximum(t_min, clip_value_min, name=name)
-    _ = t.shape.merge_with(t_max.shape)
-
-  return t_max
+    return gen_math_ops._clip_by_value(t,
+                                       clip_value_min,
+                                       clip_value_max,
+                                       name=name)
+
+
+@ops.RegisterGradient("ClipByValue")
+def _ClipByValueGrad(op, grad):
+  """Returns grad of clip_by_value."""
+  x = op.inputs[0]
+  y = op.inputs[1]
+  z = op.inputs[2]
+  gdtype = grad.dtype
+  sx = array_ops.shape(x)
+  sy = array_ops.shape(y)
+  sz = array_ops.shape(z)
+  gradshape = array_ops.shape(grad)
+  zeros = array_ops.zeros(gradshape, gdtype)
+  xymask = math_ops.less(x, y)
+  xzmask = math_ops.greater(x, z)
+  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
+  rx, rz = gen_array_ops._broadcast_gradient_args(sx, sz)
+  xgrad = array_ops.where(math_ops.logical_or(xymask, xzmask), zeros, grad)
+  ygrad = array_ops.where(xymask, grad, zeros)
+  zgrad = array_ops.where(xzmask, grad, zeros)
+  gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx)
+  gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy)
+  gz = array_ops.reshape(math_ops.reduce_sum(zgrad, rz), sz)
+  return (gx, gy, gz)
 
 
 def clip_by_norm(t, clip_norm, axes=None, name=None):
diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt
index ec0890c016..51e349c29d 100644
--- a/tensorflow/python/ops/hidden_ops.txt
+++ b/tensorflow/python/ops/hidden_ops.txt
@@ -257,6 +257,7 @@ BatchIFFT
 BatchIFFT2D
 BatchIFFT3D
 Bucketize
+ClipByValue
 Complex
 ComplexAbs
 Conj
-- 
GitLab


From e3e2ac9181c42eb82548726d8a250944b56180fd Mon Sep 17 00:00:00 2001
From: Tian Jin <tjingrant@gmail.com>
Date: Sat, 16 Dec 2017 20:00:27 -0500
Subject: [PATCH 1119/1225] Initial SRU Implementation (#13978)

* initial tf sru implementation

* fix equation

* Refactor to BasicLSTM like

* Refactor to BasicLSTM like

* address comments and explain constraint

* address pylint concern

* fix wrong merge
---
 .../python/kernel_tests/core_rnn_cell_test.py | 14 ++++
 tensorflow/python/ops/rnn_cell.py             |  1 +
 tensorflow/python/ops/rnn_cell_impl.py        | 81 ++++++++++++++++++-
 3 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index 63155faf1e..e47755e2fe 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -140,6 +140,20 @@ class RNNCellTest(test.TestCase):
         # Smoke test
         self.assertAllClose(res[0], [[0.156736, 0.156736]])
 
+  def testSRUCell(self):
+    with self.test_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        g, _ = rnn_cell_impl.SRUCell(2)(x, m)
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run(
+            [g], {x.name: np.array([[1., 1.]]),
+                  m.name: np.array([[0.1, 0.1]])})
+        # Smoke test
+        self.assertAllClose(res[0], [[0.509682,  0.509682]])
+
   def testBasicLSTMCell(self):
     for dtype in [dtypes.float16, dtypes.float32]:
       np_dtype = dtype.as_numpy_dtype
diff --git a/tensorflow/python/ops/rnn_cell.py b/tensorflow/python/ops/rnn_cell.py
index c0dac8fb01..35dbbf34c4 100644
--- a/tensorflow/python/ops/rnn_cell.py
+++ b/tensorflow/python/ops/rnn_cell.py
@@ -24,6 +24,7 @@
 @@BasicLSTMCell
 @@GRUCell
 @@LSTMCell
+@@SRUCell
 
 ## Classes storing split `RNNCell` state
 
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index 7cb9f7762d..10284a91be 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -250,7 +250,6 @@ class RNNCell(base_layer.Layer):
     self._last_zero_state = (state_size, batch_size, dtype, output)
     return output
 
-
 class _LayerRNNCell(RNNCell):
   """Subclass of RNNCells that act like proper `tf.Layer` objects.
 
@@ -291,6 +290,86 @@ class _LayerRNNCell(RNNCell):
     return base_layer.Layer.__call__(self, inputs, state, scope=scope,
                                      *args, **kwargs)
 
+class SRUCell(_LayerRNNCell):
+  """Training RNNs as Fast as CNNs (cf. https://arxiv.org/abs/1709.02755).
+
+  Args:
+    num_units: int, The number of units in the SRU cell.
+    activation: Nonlinearity to use.  Default: `tanh`.
+    reuse: (optional) Python boolean describing whether to reuse variables
+      in an existing scope.  If not `True`, and the existing scope already has
+      the given variables, an error is raised.
+    name: (optional) String, the name of the layer. Layers with the same name
+      will share weights, but to avoid mistakes we require reuse=True in such
+      cases.
+  """
+  def __init__(self, num_units,
+               activation=None, reuse=None, name=None):
+    super(SRUCell, self).__init__(_reuse=reuse, name=name)
+    self._num_units = num_units
+    self._activation = activation or math_ops.tanh
+
+    # Restrict inputs to be 2-dimensional matrices
+    self.input_spec = base_layer.InputSpec(ndim=2)
+
+  @property
+  def state_size(self):
+    return self._num_units
+
+  @property
+  def output_size(self):
+    return self._num_units
+
+  def build(self, inputs_shape):
+    if inputs_shape[1].value is None:
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
+                       % inputs_shape)
+
+    input_depth = inputs_shape[1].value
+
+    # Here the contributor believes that the following constraints
+    # are implied. The reasoning is explained here with reference to
+    # the paper https://arxiv.org/pdf/1709.02755.pdf upon which this
+    # implementation is based.
+    # In section 2.1 Equation 5, specifically:
+    # h_t = r_t \odot g(c_t) + (1 - r_t) \odot x_t
+    # the pointwise operation between r_t and x_t means they have
+    # the same shape (since we are implementing an RNN cell, braodcasting
+    # does not happen to input of a single timestep); by the same
+    # reasons, x_t has the same shape as h_t, essentially mandating that
+    # input_depth = unit_num.
+    if input_depth != self._num_units:
+      raise ValueError("SRU requires input_depth == num_units, got "
+                       "input_depth = %s, num_units = %s" % (input_depth,
+                                                             self._num_units))
+
+    self._kernel = self.add_variable(
+        _WEIGHTS_VARIABLE_NAME,
+        shape=[input_depth, 3 * self._num_units])
+
+    self._bias = self.add_variable(
+        _BIAS_VARIABLE_NAME,
+        shape=[2 * self._num_units],
+        initializer=init_ops.constant_initializer(0.0, dtype=self.dtype))
+
+    self._built = True
+
+  def call(self, inputs, state):
+    """Simple recurrent unit (SRU) with num_units cells."""
+
+    U = math_ops.matmul(inputs, self._kernel)
+    x_bar, f_intermediate, r_intermediate = array_ops.split(value=U,
+                                                            num_or_size_splits=3,
+                                                            axis=1)
+
+    f_r = math_ops.sigmoid(nn_ops.bias_add(array_ops.concat(
+        [f_intermediate, r_intermediate], 1), self._bias))
+    f, r = array_ops.split(value=f_r, num_or_size_splits=2, axis=1)
+
+    c = f * state + (1.0 - f) * x_bar
+    h = r * self._activation(c) + (1.0 - r) * inputs
+
+    return h, c
 
 class BasicRNNCell(_LayerRNNCell):
   """The most basic RNN cell.
-- 
GitLab


From 92233820e6256ffb428650e67dc8c6dc4bbc7074 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 16 Dec 2017 19:04:59 -0600
Subject: [PATCH 1120/1225] Fix issues in doc `tf.Placeholder` should be
 `tf.placeholder` (#15330)

This fix fixes issues in the doc (data_feeder.py) where
`tf.Placeholder` should be `tf.placeholder`

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
index 86fad4c553..f36a778b52 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
@@ -857,8 +857,8 @@ class DaskDataFeeder(object):
     """Returns a function, that will sample data and provide it to placeholders.
 
     Args:
-      input_placeholder: tf.Placeholder for input features mini batch.
-      output_placeholder: tf.Placeholder for output labels.
+      input_placeholder: tf.placeholder for input features mini batch.
+      output_placeholder: tf.placeholder for output labels.
 
     Returns:
       A function that when called samples a random subset of batch size
-- 
GitLab


From 715b40154e27475328fb7e2f60c01ccd84b835db Mon Sep 17 00:00:00 2001
From: Mike Knapp <mikeknapp@users.noreply.github.com>
Date: Sun, 17 Dec 2017 11:12:11 +1000
Subject: [PATCH 1121/1225] Add common error documentation

See https://github.com/tensorflow/tensorflow/issues/15258
---
 tensorflow/contrib/lite/g3doc/ios.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/lite/g3doc/ios.md b/tensorflow/contrib/lite/g3doc/ios.md
index ce8b37fbf9..a359b8d4b4 100644
--- a/tensorflow/contrib/lite/g3doc/ios.md
+++ b/tensorflow/contrib/lite/g3doc/ios.md
@@ -45,6 +45,10 @@ into a universal file containing armv7, armv7s, arm64, i386, and x86_64
 architectures. The resulting library is in
 `tensorflow/contrib/lite/gen/lib/libtensorflow-lite.a`.
 
+If you get an error such as `no such file or directory: 'x86_64'` when running 
+`build_ios_universal_lib.sh`: open Xcode > Preferences > Locations, and ensure 
+a value is selected in the "Command Line Tools" dropdown.
+
 ## Using in your own application
 
 You'll need to update various settings in your app to link against TensorFlow
-- 
GitLab


From 2853a0d4a5b0d1d11aa4c68548a250b1b8084bb4 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Sun, 17 Dec 2017 21:29:01 -0800
Subject: [PATCH 1122/1225] Revert "Initial SRU Implementation (#13978)"

This reverts commit e3e2ac9181c42eb82548726d8a250944b56180fd.
---
 .../python/kernel_tests/core_rnn_cell_test.py | 14 ----
 tensorflow/python/ops/rnn_cell.py             |  1 -
 tensorflow/python/ops/rnn_cell_impl.py        | 81 +------------------
 3 files changed, 1 insertion(+), 95 deletions(-)

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index e47755e2fe..63155faf1e 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -140,20 +140,6 @@ class RNNCellTest(test.TestCase):
         # Smoke test
         self.assertAllClose(res[0], [[0.156736, 0.156736]])
 
-  def testSRUCell(self):
-    with self.test_session() as sess:
-      with variable_scope.variable_scope(
-          "root", initializer=init_ops.constant_initializer(0.5)):
-        x = array_ops.zeros([1, 2])
-        m = array_ops.zeros([1, 2])
-        g, _ = rnn_cell_impl.SRUCell(2)(x, m)
-        sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run(
-            [g], {x.name: np.array([[1., 1.]]),
-                  m.name: np.array([[0.1, 0.1]])})
-        # Smoke test
-        self.assertAllClose(res[0], [[0.509682,  0.509682]])
-
   def testBasicLSTMCell(self):
     for dtype in [dtypes.float16, dtypes.float32]:
       np_dtype = dtype.as_numpy_dtype
diff --git a/tensorflow/python/ops/rnn_cell.py b/tensorflow/python/ops/rnn_cell.py
index 35dbbf34c4..c0dac8fb01 100644
--- a/tensorflow/python/ops/rnn_cell.py
+++ b/tensorflow/python/ops/rnn_cell.py
@@ -24,7 +24,6 @@
 @@BasicLSTMCell
 @@GRUCell
 @@LSTMCell
-@@SRUCell
 
 ## Classes storing split `RNNCell` state
 
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index 10284a91be..7cb9f7762d 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -250,6 +250,7 @@ class RNNCell(base_layer.Layer):
     self._last_zero_state = (state_size, batch_size, dtype, output)
     return output
 
+
 class _LayerRNNCell(RNNCell):
   """Subclass of RNNCells that act like proper `tf.Layer` objects.
 
@@ -290,86 +291,6 @@ class _LayerRNNCell(RNNCell):
     return base_layer.Layer.__call__(self, inputs, state, scope=scope,
                                      *args, **kwargs)
 
-class SRUCell(_LayerRNNCell):
-  """Training RNNs as Fast as CNNs (cf. https://arxiv.org/abs/1709.02755).
-
-  Args:
-    num_units: int, The number of units in the SRU cell.
-    activation: Nonlinearity to use.  Default: `tanh`.
-    reuse: (optional) Python boolean describing whether to reuse variables
-      in an existing scope.  If not `True`, and the existing scope already has
-      the given variables, an error is raised.
-    name: (optional) String, the name of the layer. Layers with the same name
-      will share weights, but to avoid mistakes we require reuse=True in such
-      cases.
-  """
-  def __init__(self, num_units,
-               activation=None, reuse=None, name=None):
-    super(SRUCell, self).__init__(_reuse=reuse, name=name)
-    self._num_units = num_units
-    self._activation = activation or math_ops.tanh
-
-    # Restrict inputs to be 2-dimensional matrices
-    self.input_spec = base_layer.InputSpec(ndim=2)
-
-  @property
-  def state_size(self):
-    return self._num_units
-
-  @property
-  def output_size(self):
-    return self._num_units
-
-  def build(self, inputs_shape):
-    if inputs_shape[1].value is None:
-      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
-                       % inputs_shape)
-
-    input_depth = inputs_shape[1].value
-
-    # Here the contributor believes that the following constraints
-    # are implied. The reasoning is explained here with reference to
-    # the paper https://arxiv.org/pdf/1709.02755.pdf upon which this
-    # implementation is based.
-    # In section 2.1 Equation 5, specifically:
-    # h_t = r_t \odot g(c_t) + (1 - r_t) \odot x_t
-    # the pointwise operation between r_t and x_t means they have
-    # the same shape (since we are implementing an RNN cell, braodcasting
-    # does not happen to input of a single timestep); by the same
-    # reasons, x_t has the same shape as h_t, essentially mandating that
-    # input_depth = unit_num.
-    if input_depth != self._num_units:
-      raise ValueError("SRU requires input_depth == num_units, got "
-                       "input_depth = %s, num_units = %s" % (input_depth,
-                                                             self._num_units))
-
-    self._kernel = self.add_variable(
-        _WEIGHTS_VARIABLE_NAME,
-        shape=[input_depth, 3 * self._num_units])
-
-    self._bias = self.add_variable(
-        _BIAS_VARIABLE_NAME,
-        shape=[2 * self._num_units],
-        initializer=init_ops.constant_initializer(0.0, dtype=self.dtype))
-
-    self._built = True
-
-  def call(self, inputs, state):
-    """Simple recurrent unit (SRU) with num_units cells."""
-
-    U = math_ops.matmul(inputs, self._kernel)
-    x_bar, f_intermediate, r_intermediate = array_ops.split(value=U,
-                                                            num_or_size_splits=3,
-                                                            axis=1)
-
-    f_r = math_ops.sigmoid(nn_ops.bias_add(array_ops.concat(
-        [f_intermediate, r_intermediate], 1), self._bias))
-    f, r = array_ops.split(value=f_r, num_or_size_splits=2, axis=1)
-
-    c = f * state + (1.0 - f) * x_bar
-    h = r * self._activation(c) + (1.0 - r) * inputs
-
-    return h, c
 
 class BasicRNNCell(_LayerRNNCell):
   """The most basic RNN cell.
-- 
GitLab


From 943201bf1a959acf6a08b88a488b3db55404835c Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Sun, 17 Dec 2017 21:44:42 -0800
Subject: [PATCH 1123/1225] Revert "Add customized kernel implementation for
 clip_by_value (#13998)"

This reverts commit 71d3c8e636efcf0bc3f87ed9888cca71fde8cc8e.
---
 .../base_api/api_def_ClipByValue.pbtxt        |  36 ---
 tensorflow/core/kernels/cwise_op_clip.cc      | 224 ------------------
 tensorflow/core/kernels/cwise_op_clip.h       |  61 -----
 .../core/kernels/cwise_op_clip_gpu.cu.cc      | 134 -----------
 tensorflow/core/ops/math_ops.cc               |  23 --
 .../python/kernel_tests/clip_ops_test.py      | 132 ++---------
 tensorflow/python/ops/clip_ops.py             |  44 +---
 tensorflow/python/ops/hidden_ops.txt          |   1 -
 8 files changed, 37 insertions(+), 618 deletions(-)
 delete mode 100644 tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt
 delete mode 100644 tensorflow/core/kernels/cwise_op_clip.cc
 delete mode 100644 tensorflow/core/kernels/cwise_op_clip.h
 delete mode 100644 tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc

diff --git a/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt b/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt
deleted file mode 100644
index 803d8970ab..0000000000
--- a/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt
+++ /dev/null
@@ -1,36 +0,0 @@
-op {
-  graph_op_name: "ClipByValue"
-  in_arg {
-    name: "t"
-    description: <<END
-A `Tensor`.
-END
-  }
-  in_arg {
-    name: "clip_value_min"
-    description: <<END
-A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
-as `t`. The minimum value to clip by.
-END
-  }
-  in_arg {
-    name: "clip_value_max"
-    description: <<END
-A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
-as `t`. The maximum value to clip by.
-END
-  }
-  out_arg {
-    name: "output"
-    description: <<END
-A clipped `Tensor` with the same shape as input 't'.
-END
-  }
-  summary: "Clips tensor values to a specified min and max."
-  description: <<END
-Given a tensor `t`, this operation returns a tensor of the same type and
-shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
-Any values less than `clip_value_min` are set to `clip_value_min`. Any values
-greater than `clip_value_max` are set to `clip_value_max`.
-END
-}
diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc
deleted file mode 100644
index bd22f5777c..0000000000
--- a/tensorflow/core/kernels/cwise_op_clip.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/kernels/cwise_op_clip.h"
-
-namespace tensorflow {
-
-typedef Eigen::ThreadPoolDevice CPUDevice;
-typedef Eigen::GpuDevice GPUDevice;
-
-// Basic coefficient-wise tenary operations.
-// This is the case for example of the clip_by_value.
-//   Device: E.g., CPUDevice, GPUDevice.
-//   Functor: defined above. E.g., functor::clip.
-template <typename Device, typename T>
-class ClipOp : public OpKernel {
- public:
-  explicit ClipOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& in0 = ctx->input(0);
-    const Tensor& in1 = ctx->input(1);
-    const Tensor& in2 = ctx->input(2);
-
-    auto in0_flat = in0.flat<T>();
-    auto in1_flat = in1.flat<T>();
-    auto in2_flat = in2.flat<T>();
-    const Device& d = ctx->eigen_device<Device>();
-
-    Tensor* out = nullptr;
-    OP_REQUIRES_OK(
-        ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out));
-    auto out_flat = out->flat<T>();
-    if (in1.shape() == in2.shape()) {
-      if (in0.shape() == in1.shape()) {
-        functor::TernaryClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
-                                            out_flat);
-      } else {
-        OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()),
-                    errors::InvalidArgument(
-                        "clip_value_min and clip_value_max must be either of "
-                        "the same shape as input, or a scalar. ",
-                        "input shape: ", in0.shape().DebugString(),
-                        "clip_value_min shape: ", in1.shape().DebugString(),
-                        "clip_value_max shape: ", in2.shape().DebugString()));
-        functor::UnaryClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
-                                          out_flat);
-      }
-    } else {
-      if (in0.shape() == in1.shape()) {
-        OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in2.shape()),
-                    errors::InvalidArgument(
-                        "clip_value_min and clip_value_max must be either of "
-                        "the same shape as input, or a scalar. ",
-                        "input shape: ", in0.shape().DebugString(),
-                        "clip_value_min shape: ", in1.shape().DebugString(),
-                        "clip_value_max shape: ", in2.shape().DebugString()));
-        functor::BinaryLeftClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
-                                               out_flat);
-      } else {
-        OP_REQUIRES(ctx, (in0.shape() == in2.shape() &&
-                          TensorShapeUtils::IsScalar(in1.shape())),
-                    errors::InvalidArgument(
-                        "clip_value_min and clip_value_max must be either of "
-                        "the same shape as input, or a scalar. ",
-                        "input shape: ", in0.shape().DebugString(),
-                        "clip_value_min shape: ", in1.shape().DebugString(),
-                        "clip_value_max shape: ", in2.shape().DebugString()));
-        functor::BinaryRightClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
-                                                out_flat);
-      }
-    }
-  }
-};
-
-namespace functor {
-// Unary functor for clip [Tensor, Scalar, Scalar]
-template <typename T>
-struct UnaryClipFunc {
-  UnaryClipFunc(const T& value_min, const T& value_max)
-      : value_min_(value_min), value_max_(value_max) {}
-  const T operator()(const T& value) const {
-    return std::max(std::min(value, value_max_), value_min_);
-  }
-  T value_min_;
-  T value_max_;
-};
-template <typename T>
-struct UnaryClipOp<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat& in0_flat,
-                  typename TTypes<T>::ConstFlat& in1_flat,
-                  typename TTypes<T>::ConstFlat& in2_flat,
-                  typename TTypes<T>::Flat& out_flat) const {
-    out_flat = in0_flat.unaryExpr(UnaryClipFunc<T>(in1_flat(0), in2_flat(0)));
-  }
-};
-
-// Binary functor for clip [Tensor, Scalar, Tensor]
-template <typename T>
-struct BinaryRightClipFunc {
-  BinaryRightClipFunc(const T& value_min) : value_min_(value_min) {}
-  const T operator()(const T& value, const T& value_max) const {
-    return std::max(std::min(value, value_max), value_min_);
-  }
-  T value_min_;
-};
-template <typename T>
-struct BinaryRightClipOp<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat& in0_flat,
-                  typename TTypes<T>::ConstFlat& in1_flat,
-                  typename TTypes<T>::ConstFlat& in2_flat,
-                  typename TTypes<T>::Flat& out_flat) const {
-    out_flat =
-        in0_flat.binaryExpr(in2_flat, BinaryRightClipFunc<T>(in1_flat(0)));
-  }
-};
-
-// Binary functor for clip [Tensor, Tensor, Scalar]
-template <typename T>
-struct BinaryLeftClipFunc {
-  BinaryLeftClipFunc(const T& value_max) : value_max_(value_max) {}
-  const T operator()(const T& value, const T& value_min) const {
-    return std::max(std::min(value, value_max_), value_min);
-  }
-  T value_max_;
-};
-template <typename T>
-struct BinaryLeftClipOp<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat& in0_flat,
-                  typename TTypes<T>::ConstFlat& in1_flat,
-                  typename TTypes<T>::ConstFlat& in2_flat,
-                  typename TTypes<T>::Flat& out_flat) const {
-    out_flat =
-        in0_flat.binaryExpr(in1_flat, BinaryLeftClipFunc<T>(in2_flat(0)));
-  }
-};
-
-// Ternary functor for clip [Tensor, Tensor, Tensor]
-template <typename T>
-struct TernaryClipOp<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat& in0_flat,
-                  typename TTypes<T>::ConstFlat& in1_flat,
-                  typename TTypes<T>::ConstFlat& in2_flat,
-                  typename TTypes<T>::Flat& out_flat) const {
-    out_flat.device(d) = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat);
-  }
-};
-
-#define INSTANTIATE_CPU(T)                         \
-  template struct UnaryClipOp<CPUDevice, T>;       \
-  template struct BinaryRightClipOp<CPUDevice, T>; \
-  template struct BinaryLeftClipOp<CPUDevice, T>;  \
-  template struct TernaryClipOp<CPUDevice, T>;
-INSTANTIATE_CPU(Eigen::half);
-INSTANTIATE_CPU(float);
-INSTANTIATE_CPU(double);
-INSTANTIATE_CPU(int8);
-INSTANTIATE_CPU(int16);
-INSTANTIATE_CPU(int32);
-INSTANTIATE_CPU(int64);
-INSTANTIATE_CPU(uint8);
-INSTANTIATE_CPU(uint16);
-#undef INSTANTIATE_CPU
-}  // namespace functor
-
-#define REGISTER_CPU_KERNEL(type)                                       \
-  REGISTER_KERNEL_BUILDER(                                              \
-      Name("ClipByValue").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
-      ClipOp<CPUDevice, type>);
-
-REGISTER_CPU_KERNEL(Eigen::half);
-REGISTER_CPU_KERNEL(float);
-REGISTER_CPU_KERNEL(double);
-REGISTER_CPU_KERNEL(int8);
-REGISTER_CPU_KERNEL(int16);
-REGISTER_CPU_KERNEL(int32);
-REGISTER_CPU_KERNEL(int64);
-REGISTER_CPU_KERNEL(uint8);
-REGISTER_CPU_KERNEL(uint16);
-#undef REGISTER_CPU_KERNEL
-
-#if GOOGLE_CUDA
-
-#define REGISTER_GPU_KERNEL(type)                                       \
-  REGISTER_KERNEL_BUILDER(                                              \
-      Name("ClipByValue").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
-      ClipOp<GPUDevice, type>);
-REGISTER_GPU_KERNEL(Eigen::half);
-REGISTER_GPU_KERNEL(float);
-REGISTER_GPU_KERNEL(double);
-REGISTER_GPU_KERNEL(int8);
-REGISTER_GPU_KERNEL(int16);
-REGISTER_GPU_KERNEL(int64);
-REGISTER_GPU_KERNEL(uint8);
-REGISTER_GPU_KERNEL(uint16);
-
-// A special GPU kernel for int32.
-// TODO(b/25387198): Also enable int32 in device memory. This kernel
-// registration requires all int32 inputs and outputs to be in host memory.
-REGISTER_KERNEL_BUILDER(Name("ClipByValue")
-                            .Device(DEVICE_GPU)
-                            .HostMemory("t")
-                            .HostMemory("clip_value_min")
-                            .HostMemory("clip_value_max")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T"),
-                        ClipOp<CPUDevice, int32>);
-
-#undef REGISTER_GPU_KERNEL
-#endif
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_clip.h b/tensorflow/core/kernels/cwise_op_clip.h
deleted file mode 100644
index 1a4bf8cf1d..0000000000
--- a/tensorflow/core/kernels/cwise_op_clip.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_
-#define TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_
-
-#include "tensorflow/core/kernels/cwise_ops_common.h"
-
-namespace tensorflow {
-namespace functor {
-// Unary functor for clip [Tensor, Scalar, Scalar]
-template <typename Device, typename T>
-struct UnaryClipOp {
-  void operator()(const Device &d, typename TTypes<T>::ConstFlat &in0_flat,
-                  typename TTypes<T>::ConstFlat &in1_flat,
-                  typename TTypes<T>::ConstFlat &in2_flat,
-                  typename TTypes<T>::Flat &out_flat) const;
-};
-
-// Binary functor for clip [Tensor, Scalar, Tensor]
-template <typename Device, typename T>
-struct BinaryRightClipOp {
-  void operator()(const Device &d, typename TTypes<T>::ConstFlat &in0_flat,
-                  typename TTypes<T>::ConstFlat &in1_flat,
-                  typename TTypes<T>::ConstFlat &in2_flat,
-                  typename TTypes<T>::Flat &out_flat) const;
-};
-
-// Binary functor for clip [Tensor, Tensor, Scalar]
-template <typename Device, typename T>
-struct BinaryLeftClipOp {
-  void operator()(const Device &d, typename TTypes<T>::ConstFlat &in0_flat,
-                  typename TTypes<T>::ConstFlat &in1_flat,
-                  typename TTypes<T>::ConstFlat &in2_flat,
-                  typename TTypes<T>::Flat &out_flat) const;
-};
-
-// Ternary functor for clip [Tensor, Tensor, Tensor]
-template <typename Device, typename T>
-struct TernaryClipOp {
-  void operator()(const Device &d, typename TTypes<T>::ConstFlat &in0_flat,
-                  typename TTypes<T>::ConstFlat &in1_flat,
-                  typename TTypes<T>::ConstFlat &in2_flat,
-                  typename TTypes<T>::Flat &out_flat) const;
-};
-}
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_
diff --git a/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc b/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc
deleted file mode 100644
index 5c07847548..0000000000
--- a/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if GOOGLE_CUDA
-
-#define EIGEN_USE_GPU
-
-#include "tensorflow/core/kernels/cwise_op_clip.h"
-#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
-#include "tensorflow/core/util/cuda_kernel_helper.h"
-
-namespace tensorflow {
-
-template <typename T>
-__global__ void UnaryClipCustomKernel(const int32 size_in, const T *in0,
-                                      const T *in1, const T *in2, T *out) {
-  CUDA_1D_KERNEL_LOOP(i, size_in) {
-    T value = in2[0] < in0[i] ? in2[0] : in0[i];
-    out[i] = value < in1[0] ? in1[0] : value;
-  }
-}
-
-template <typename T>
-__global__ void BinaryRightClipCustomKernel(const int32 size_in, const T *in0,
-                                            const T *in1, const T *in2,
-                                            T *out) {
-  CUDA_1D_KERNEL_LOOP(i, size_in) {
-    T value = in2[i] < in0[i] ? in2[i] : in0[i];
-    out[i] = value < in1[0] ? in1[0] : value;
-  }
-}
-
-template <typename T>
-__global__ void BinaryLeftClipCustomKernel(const int32 size_in, const T *in0,
-                                           const T *in1, const T *in2, T *out) {
-  CUDA_1D_KERNEL_LOOP(i, size_in) {
-    T value = in2[0] < in0[i] ? in2[0] : in0[i];
-    out[i] = value < in1[i] ? in1[i] : value;
-  }
-}
-
-namespace functor {
-
-// Unary functor for clip [Tensor, Scalar, Scalar]
-template <typename T>
-struct UnaryClipOp<GPUDevice, T> {
-  void operator()(const GPUDevice &d, typename TTypes<T>::ConstFlat &in0_flat,
-                  typename TTypes<T>::ConstFlat &in1_flat,
-                  typename TTypes<T>::ConstFlat &in2_flat,
-                  typename TTypes<T>::Flat &out_flat) const {
-    CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d);
-
-    UnaryClipCustomKernel<
-        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-        in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(),
-        out_flat.data());
-  }
-};
-
-// Binary functor for clip [Tensor, Scalar, Tensor]
-template <typename T>
-struct BinaryRightClipOp<GPUDevice, T> {
-  void operator()(const GPUDevice &d, typename TTypes<T>::ConstFlat &in0_flat,
-                  typename TTypes<T>::ConstFlat &in1_flat,
-                  typename TTypes<T>::ConstFlat &in2_flat,
-                  typename TTypes<T>::Flat &out_flat) const {
-    CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d);
-
-    BinaryRightClipCustomKernel<
-        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-        in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(),
-        out_flat.data());
-  }
-};
-
-// Binary functor for clip [Tensor, Tensor, Scalar]
-template <typename T>
-struct BinaryLeftClipOp<GPUDevice, T> {
-  void operator()(const GPUDevice &d, typename TTypes<T>::ConstFlat &in0_flat,
-                  typename TTypes<T>::ConstFlat &in1_flat,
-                  typename TTypes<T>::ConstFlat &in2_flat,
-                  typename TTypes<T>::Flat &out_flat) const {
-    CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d);
-
-    BinaryLeftClipCustomKernel<
-        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-        in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(),
-        out_flat.data());
-  }
-};
-
-// Ternary functor for clip [Tensor, Tensor, Tensor]
-template <typename T>
-struct TernaryClipOp<GPUDevice, T> {
-  void operator()(const GPUDevice &d, typename TTypes<T>::ConstFlat &in0_flat,
-                  typename TTypes<T>::ConstFlat &in1_flat,
-                  typename TTypes<T>::ConstFlat &in2_flat,
-                  typename TTypes<T>::Flat &out_flat) const {
-    out_flat.device(d) = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat);
-  }
-};
-
-#define INSTANTIATE_GPU(T)                         \
-  template struct UnaryClipOp<GPUDevice, T>;       \
-  template struct BinaryRightClipOp<GPUDevice, T>; \
-  template struct BinaryLeftClipOp<GPUDevice, T>;  \
-  template struct TernaryClipOp<GPUDevice, T>;
-INSTANTIATE_GPU(Eigen::half);
-INSTANTIATE_GPU(float);
-INSTANTIATE_GPU(double);
-INSTANTIATE_GPU(int8);
-INSTANTIATE_GPU(int16);
-INSTANTIATE_GPU(int32);
-INSTANTIATE_GPU(int64);
-INSTANTIATE_GPU(uint8);
-INSTANTIATE_GPU(uint16);
-#undef INSTANTIATE_GPU
-
-}  // namespace functor
-}  // namespace tensorflow
-
-#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index e3add98f89..8ea170ba14 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -3031,29 +3031,6 @@ Equivalent to np.digitize.
 @end_compatibility
 )doc");
 
-REGISTER_OP("ClipByValue")
-    .Input("t: T")
-    .Input("clip_value_min: T")
-    .Input("clip_value_max: T")
-    .Output("output: T")
-    .Attr("T: numbertype")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Clips tensor values to a specified min and max.
-
-Given a tensor `t`, this operation returns a tensor of the same type and
-shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
-Any values less than `clip_value_min` are set to `clip_value_min`. Any values
-greater than `clip_value_max` are set to `clip_value_max`.
-
-t: A `Tensor`.
-clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
-  as `t`. The minimum value to clip by.
-clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
-  as `t`. The maximum value to clip by.
-output: A clipped `Tensor` with the same shape as input 't'.
-)doc");
-
 #ifdef INTEL_MKL
 REGISTER_OP("_MklAddN")
     .Input("inputs: N * T")
diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py
index cb1359be15..5c8b71da17 100644
--- a/tensorflow/python/kernel_tests/clip_ops_test.py
+++ b/tensorflow/python/kernel_tests/clip_ops_test.py
@@ -19,34 +19,16 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import clip_ops
-from tensorflow.python.ops import gradient_checker
 from tensorflow.python.platform import test
 
 
 class ClipTest(test.TestCase):
 
-  def testClipByValueGradient(self):
-    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32)
-    outputs_1 = clip_ops.clip_by_value(inputs, 0.5, 3.5)
-    min_val = constant_op.constant([0.5, 0.5, 0.5, 0.5], dtype=dtypes.float32)
-    max_val = constant_op.constant([3.5, 3.5, 3.5, 3.5], dtype=dtypes.float32)
-    outputs_2 = clip_ops.clip_by_value(inputs, min_val, max_val)
-    with self.test_session():
-      error_1 = gradient_checker.compute_gradient_error(inputs, [4],
-                                                        outputs_1, [4])
-      self.assertLess(error_1, 1e-4)
-
-      error_2 = gradient_checker.compute_gradient_error(inputs, [4],
-                                                        outputs_2, [4])
-      self.assertLess(error_2, 1e-4)
-
   # ClipByValue test
   def testClipByValue(self):
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
       np_ans = [[-4.4, 2.0, 3.0], [4.0, 4.4, 4.4]]
       clip_value = 4.4
@@ -55,84 +37,18 @@ class ClipTest(test.TestCase):
 
     self.assertAllClose(np_ans, tf_ans)
 
-  # [Tensor, Scalar, Scalar]
-  def testClipByValue0Type(self):
-    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64,
-                  dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
-                  dtypes.uint8, dtypes.uint16]:
-      with self.test_session(use_gpu=True):
-        x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
-        np_ans = [[2, 2, 3], [4, 4, 4]]
-        clip_value_min = 2
-        clip_value_max = 4
-        ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
-        tf_ans = ans.eval()
-
-      self.assertAllClose(np_ans, tf_ans)
-
-  # [Tensor, Tensor, Scalar]
-  def testClipByValue1Type(self):
-    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64,
-                  dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
-                  dtypes.uint8, dtypes.uint16]:
-      with self.test_session(use_gpu=True):
-        x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
-        np_ans = [[2, 2, 3], [4, 4, 4]]
-        clip_value_min = constant_op.constant([2, 2, 2, 3, 3, 3], shape=[2, 3],
-                                              dtype=dtype)
-        clip_value_max = 4
-        ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
-        tf_ans = ans.eval()
-
-      self.assertAllClose(np_ans, tf_ans)
-
-  # [Tensor, Scalar, Tensor]
-  def testClipByValue2Type(self):
-    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64,
-                  dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
-                  dtypes.uint8, dtypes.uint16]:
-      with self.test_session(use_gpu=True):
-        x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
-        np_ans = [[4, 4, 4], [4, 5, 6]]
-        clip_value_min = 4
-        clip_value_max = constant_op.constant([6, 6, 6, 6, 6, 6], shape=[2, 3],
-                                              dtype=dtype)
-        ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
-        tf_ans = ans.eval()
-
-      self.assertAllClose(np_ans, tf_ans)
-
-  # [Tensor, Tensor, Tensor]
-  def testClipByValue3Type(self):
-    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64,
-                  dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
-                  dtypes.uint8, dtypes.uint16]:
-      with self.test_session(use_gpu=True):
-        x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
-        np_ans = [[2, 2, 3], [5, 5, 6]]
-        clip_value_min = constant_op.constant([2, 2, 2, 5, 5, 5], shape=[2, 3],
-                                              dtype=dtype)
-        clip_value_max = constant_op.constant([5, 5, 5, 7, 7, 7], shape=[2, 3],
-                                              dtype=dtype)
-        ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
-        tf_ans = ans.eval()
-
-      self.assertAllClose(np_ans, tf_ans)
-
   def testClipByValueBadShape(self):
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3, 1])
       # Use a nonsensical shape.
       clip = constant_op.constant([1.0, 2.0])
-      with self.assertRaises(errors_impl.InvalidArgumentError):
-        ans = clip_ops.clip_by_value(x, -clip, clip)
-        tf_ans = ans.eval()
-      with self.assertRaises(errors_impl.InvalidArgumentError):
-        ans = clip_ops.clip_by_value(x, 1.0, clip)
-        tf_ans = ans.eval()
+      with self.assertRaises(ValueError):
+        _ = clip_ops.clip_by_value(x, -clip, clip)
+      with self.assertRaises(ValueError):
+        _ = clip_ops.clip_by_value(x, 1.0, clip)
 
   def testClipByValueNonFinite(self):
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([float('NaN'), float('Inf'), -float('Inf')])
       np_ans = [float('NaN'), 4.0, -4.0]
       clip_value = 4.0
@@ -144,7 +60,7 @@ class ClipTest(test.TestCase):
   # ClipByNorm tests
   def testClipByNormClipped(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Norm of x = sqrt(3^2 + 4^2) = 5
       np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]]
@@ -160,7 +76,7 @@ class ClipTest(test.TestCase):
     self.assertAllClose(np_ans, tf_ans_tensor)
 
   def testClipByNormBadShape(self):
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3, 1])
       # Use a nonsensical shape.
       clip = constant_op.constant([1.0, 2.0])
@@ -169,7 +85,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormNotClipped(self):
     # No norm clipping when clip_norm >= 5
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Norm of x = sqrt(3^2 + 4^2) = 5
       np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]]
@@ -181,7 +97,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormZero(self):
     # No norm clipping when norm = 0
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
       # Norm = 0, no changes
       np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
@@ -193,7 +109,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormClippedWithDim0(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3])
       # Norm of x[:, 0] = sqrt(3^2 + 4^2) = 5, x[:, 2] = 3
       np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 3.0]]
@@ -205,7 +121,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormClippedWithDim1(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3])
       # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5
       np_ans = [[-3.0, 0.0, 0.0], [3.2, 0.0, 2.4]]
@@ -217,7 +133,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormNotClippedWithAxes(self):
     # No norm clipping when clip_norm >= 5
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3])
       # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5
       np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 3.0]]
@@ -230,7 +146,7 @@ class ClipTest(test.TestCase):
   # ClipByGlobalNorm tests
   def testClipByGlobalNormClipped(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -251,7 +167,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormClippedTensor(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -272,7 +188,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormSupportsNone(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -295,7 +211,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormWithIndexedSlicesClipped(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = ops.IndexedSlices(
           constant_op.constant([1.0, -2.0]), constant_op.constant([3, 4]))
@@ -328,7 +244,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormNotClipped(self):
     # No norm clipping when clip_norm >= 5
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -347,7 +263,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormZero(self):
     # No norm clipping when norm = 0
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x0 = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([0.0, 0.0])
       # Norm = 0, no changes
@@ -366,7 +282,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormClipped(self):
     # Norm clipping when average clip_norm < 0.83333333
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
       np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]]
@@ -378,7 +294,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormClippedTensor(self):
     # Norm clipping when average clip_norm < 0.83333333
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
       np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]]
@@ -390,7 +306,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormNotClipped(self):
     # No norm clipping when average clip_norm >= 0.83333333
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
       np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]]
@@ -402,7 +318,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormZero(self):
     # No norm clipping when average clip_norm = 0
-    with self.test_session(use_gpu=True):
+    with self.test_session():
       x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
       # Average norm = 0, no changes
       np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
index b53134a5ba..80803530c1 100644
--- a/tensorflow/python/ops/clip_ops.py
+++ b/tensorflow/python/ops/clip_ops.py
@@ -26,11 +26,10 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 
+
 def clip_by_value(t, clip_value_min, clip_value_max,
                   name=None):
   """Clips tensor values to a specified min and max.
@@ -57,35 +56,18 @@ def clip_by_value(t, clip_value_min, clip_value_max,
   """
   with ops.name_scope(name, "clip_by_value",
                       [t, clip_value_min, clip_value_max]) as name:
-    return gen_math_ops._clip_by_value(t,
-                                       clip_value_min,
-                                       clip_value_max,
-                                       name=name)
-
-
-@ops.RegisterGradient("ClipByValue")
-def _ClipByValueGrad(op, grad):
-  """Returns grad of clip_by_value."""
-  x = op.inputs[0]
-  y = op.inputs[1]
-  z = op.inputs[2]
-  gdtype = grad.dtype
-  sx = array_ops.shape(x)
-  sy = array_ops.shape(y)
-  sz = array_ops.shape(z)
-  gradshape = array_ops.shape(grad)
-  zeros = array_ops.zeros(gradshape, gdtype)
-  xymask = math_ops.less(x, y)
-  xzmask = math_ops.greater(x, z)
-  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
-  rx, rz = gen_array_ops._broadcast_gradient_args(sx, sz)
-  xgrad = array_ops.where(math_ops.logical_or(xymask, xzmask), zeros, grad)
-  ygrad = array_ops.where(xymask, grad, zeros)
-  zgrad = array_ops.where(xzmask, grad, zeros)
-  gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx)
-  gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy)
-  gz = array_ops.reshape(math_ops.reduce_sum(zgrad, rz), sz)
-  return (gx, gy, gz)
+    t = ops.convert_to_tensor(t, name="t")
+
+    # Go through list of tensors, for each value in each tensor clip
+    t_min = math_ops.minimum(t, clip_value_max)
+    # Assert that the shape is compatible with the initial shape,
+    # to prevent unintentional broadcasting.
+    _ = t.shape.merge_with(t_min.shape)
+
+    t_max = math_ops.maximum(t_min, clip_value_min, name=name)
+    _ = t.shape.merge_with(t_max.shape)
+
+  return t_max
 
 
 def clip_by_norm(t, clip_norm, axes=None, name=None):
diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt
index 51e349c29d..ec0890c016 100644
--- a/tensorflow/python/ops/hidden_ops.txt
+++ b/tensorflow/python/ops/hidden_ops.txt
@@ -257,7 +257,6 @@ BatchIFFT
 BatchIFFT2D
 BatchIFFT3D
 Bucketize
-ClipByValue
 Complex
 ComplexAbs
 Conj
-- 
GitLab


From 0f2f5b978524f3306e415d18701ea64bd2c688b3 Mon Sep 17 00:00:00 2001
From: Taehoon Lee <me@taehoonlee.com>
Date: Mon, 18 Dec 2017 15:03:46 +0900
Subject: [PATCH 1124/1225] Fix PEP8 (#15378)

---
 tensorflow/contrib/opt/__init__.py                     |  2 +-
 .../opt/python/training/elastic_average_optimizer.py   | 10 +++++-----
 tensorflow/contrib/tpu/profiler/pip_package/setup.py   |  2 +-
 .../how_tos/reading_data/fully_connected_reader.py     |  4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 2025e8b4fc..ef20a132fb 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -47,7 +47,7 @@ _allowed_symbols = [
     'VariableClippingOptimizer',
     'MultitaskOptimizerWrapper',
     'clip_gradients_by_global_norm',
-    'ElasticAverageOptimizer', 
+    'ElasticAverageOptimizer',
     'ElasticAverageCustomGetter'
 ]
 
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
index 9941f22b1f..56da39c369 100644
--- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
@@ -79,9 +79,9 @@ class ElasticAverageCustomGetter(object):
     if trainable:
       with ops.device(self._worker_device):
         local_var = getter(name, trainable=True,
-                           collections=[ops.GraphKeys.LOCAL_VARIABLES], 
+                           collections=[ops.GraphKeys.LOCAL_VARIABLES],
                            *args, **kwargs)
-        
+
       global_center_variable = variable_scope.variable(
         name='%s/%s' %
              (GLOBAL_VARIABLE_NAME,
@@ -96,7 +96,7 @@ class ElasticAverageCustomGetter(object):
           initial_value=local_var.initialized_value(),
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES])
-        
+
       self._local_map[local_var] = local_center_variable
       self._global_map[local_var] = global_center_variable
       return local_var
@@ -173,7 +173,7 @@ class ElasticAverageOptimizer(optimizer.Optimizer):
                         colocate_gradients_with_ops=False,
                         grad_loss=None):
     """Compute gradients of `loss` for the variables in `var_list`.
-    
+
     Add rho*elastic_difference to loss to control the exploration
     This is the first part of `minimize()`.  It returns a list
     of (gradient, variable) pairs where "gradient" is the gradient
@@ -204,7 +204,7 @@ class ElasticAverageOptimizer(optimizer.Optimizer):
     """
     if not var_list:
       var_list = variables.trainable_variables()
-      
+
     elastic_difference = [math_ops.subtract(v, lv) for v, lv in zip(
       variables.trainable_variables(),
       [self._local_map[var] for var in var_list])]
diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
index ee6950699e..179d29602b 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
@@ -70,7 +70,7 @@ setup(
         'Topic :: Scientific/Engineering :: Mathematics',
         'Topic :: Scientific/Engineering :: Artificial Intelligence',
         'Topic :: Software Development',
-        'Topic :: Software Development :: Libraries',  
+        'Topic :: Software Development :: Libraries',
         'Topic :: Software Development :: Libraries :: Python Modules',
     ],
     license='Apache 2.0',
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
index 9db8835d92..fa4c1c0da5 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
@@ -62,7 +62,7 @@ def decode(serialized_example):
 
   # Convert label from a scalar uint8 tensor to an int32 scalar.
   label = tf.cast(features['label'], tf.int32)
-  
+
   return image, label
 
 def augment(image, label):
@@ -172,7 +172,7 @@ def run_training():
           step += 1
       except tf.errors.OutOfRangeError:
         print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
-      
+
 def main(_):
   run_training()
 
-- 
GitLab


From fdf34a88bec9645473f10ba2d52df4cfcb80d582 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Mon, 18 Dec 2017 14:05:28 +0800
Subject: [PATCH 1125/1225] Fix a bug: bfloat16 is unsigned on Windows (#15302)

---
 tensorflow/core/framework/numeric_types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index e7268fd7a7..5985579803 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint"
 // clang-format on
 
+#include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
-- 
GitLab


From 1aa3b549770d05a665e8075322e84e8b8c787088 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Mon, 18 Dec 2017 16:53:20 +0800
Subject: [PATCH 1126/1225] Add an is_external arg to tf_copts

---
 tensorflow/core/BUILD     |  1 +
 tensorflow/tensorflow.bzl | 70 ++++++++++++++++++++++-----------------
 2 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index a280444121..2956aae2e9 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -575,6 +575,7 @@ cc_library(
 
 # Generates library per group of ops.
 tf_gen_op_libs(
+    is_external = False,
     op_lib_names = [
         "bitwise_ops",
         "candidate_sampling_ops",
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index fcefe23d76..67d23bfe71 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -19,6 +19,7 @@ load(
     "//third_party/mkl:build_defs.bzl",
     "if_mkl",
 )
+
 def register_extension_info(**kwargs):
     pass
 
@@ -143,24 +144,28 @@ def if_darwin(a):
       "//conditions:default": [],
   })
 
-WIN_COPTS = [
-    "/DLANG_CXX11",
-    "/D__VERSION__=\\\"MSVC\\\"",
-    "/DPLATFORM_WINDOWS",
-    "/DTF_COMPILE_LIBRARY",
-    "/DEIGEN_HAS_C99_MATH",
-    "/DTENSORFLOW_USE_EIGEN_THREADPOOL",
-    "/DEIGEN_AVOID_STL_ARRAY",
-    "/Iexternal/gemmlowp",
-    "/wd4018",  # -Wno-sign-compare
-    "/U_HAS_EXCEPTIONS",
-    "/D_HAS_EXCEPTIONS=1",
-    "/EHsc",  # -fno-exceptions
-    "/DNOGDI",
-]
+def get_win_copts(is_external=False):
+    WINDOWS_COPTS = [
+        "/DLANG_CXX11",
+        "/D__VERSION__=\\\"MSVC\\\"",
+        "/DPLATFORM_WINDOWS",
+        "/DEIGEN_HAS_C99_MATH",
+        "/DTENSORFLOW_USE_EIGEN_THREADPOOL",
+        "/DEIGEN_AVOID_STL_ARRAY",
+        "/Iexternal/gemmlowp",
+        "/wd4018",  # -Wno-sign-compare
+        "/U_HAS_EXCEPTIONS",
+        "/D_HAS_EXCEPTIONS=1",
+        "/EHsc",  # -fno-exceptions
+        "/DNOGDI",
+    ]
+    if is_external:
+      return WINDOWS_COPTS + ["/UTF_COMPILE_LIBRARY"]
+    else:
+      return WINDOWS_COPTS + ["/DTF_COMPILE_LIBRARY"]
 
 # LINT.IfChange
-def tf_copts(android_optimization_level_override="-O2"):
+def tf_copts(android_optimization_level_override="-O2", is_external=False):
   # For compatibility reasons, android_optimization_level_override
   # is currently only being set for Android.
   # To clear this value, and allow the CROSSTOOL default
@@ -191,8 +196,8 @@ def tf_copts(android_optimization_level_override="-O2"):
       + select({
             clean_dep("//tensorflow:android"): android_copts,
             clean_dep("//tensorflow:darwin"): [],
-            clean_dep("//tensorflow:windows"): WIN_COPTS,
-            clean_dep("//tensorflow:windows_msvc"): WIN_COPTS,
+            clean_dep("//tensorflow:windows"): get_win_copts(is_external),
+            clean_dep("//tensorflow:windows_msvc"): get_win_copts(is_external),
             clean_dep("//tensorflow:ios"): ["-std=c++11"],
             "//conditions:default": ["-pthread"]
       }))
@@ -208,7 +213,7 @@ def tf_opts_nortti_if_android():
 
 # Given a list of "op_lib_names" (a list of files in the ops directory
 # without their .cc extensions), generate a library for that file.
-def tf_gen_op_libs(op_lib_names, deps=None):
+def tf_gen_op_libs(op_lib_names, deps=None, is_external=True):
   # Make library out of each op so it can also be used to generate wrappers
   # for various languages.
   if not deps:
@@ -216,7 +221,7 @@ def tf_gen_op_libs(op_lib_names, deps=None):
   for n in op_lib_names:
     native.cc_library(
         name=n + "_op_lib",
-        copts=tf_copts(),
+        copts=tf_copts(is_external=is_external),
         srcs=["ops/" + n + ".cc"],
         deps=deps + [clean_dep("//tensorflow/core:framework")],
         visibility=["//visibility:public"],
@@ -289,9 +294,11 @@ def tf_cc_binary(name,
                  srcs=[],
                  deps=[],
                  linkopts=[],
+                 copts=tf_copts(),
                  **kwargs):
   native.cc_binary(
       name=name,
+      copts=copts,
       srcs=srcs + tf_binary_additional_srcs(),
       deps=deps + if_mkl(
           [
@@ -322,7 +329,7 @@ def tf_gen_op_wrapper_cc(name,
   tf_cc_binary(
       name=tool,
       copts=tf_copts(),
-      linkopts=["-lm"],
+      linkopts=if_not_windows(["-lm"]),
       linkstatic=1,  # Faster to link this one-time-use binary dynamically
       deps=[op_gen] + deps)
 
@@ -493,7 +500,7 @@ def tf_gen_op_wrapper_py(name,
     deps = [str(Label("//tensorflow/core:" + name + "_op_lib"))]
   tf_cc_binary(
       name=tool_name,
-      linkopts=["-lm"] + cc_linkopts,
+      linkopts=if_not_windows(["-lm"]) + cc_linkopts,
       copts=tf_copts(),
       linkstatic=1,  # Faster to link this one-time-use binary dynamically
       deps=([
@@ -586,7 +593,7 @@ def tf_cc_test(name,
       name="%s%s" % (name, suffix),
       srcs=srcs + tf_binary_additional_srcs(),
       copts=tf_copts() + extra_copts,
-      linkopts=["-lpthread", "-lm"] + linkopts + _rpath_linkopts(name),
+      linkopts=if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name),
       deps=deps + if_mkl(
           [
               "//third_party/mkl:intel_binary_blob",
@@ -700,7 +707,7 @@ def tf_cuda_only_cc_test(name,
       deps=deps + if_cuda([
           clean_dep("//tensorflow/core:cuda"),
           clean_dep("//tensorflow/core:gpu_lib")]),
-      linkopts=["-lpthread", "-lm"] + linkopts + _rpath_linkopts(name),
+      linkopts=if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name),
       linkstatic=linkstatic or select({
           # cc_tests with ".so"s in srcs incorrectly link on Darwin
           # unless linkstatic=1.
@@ -838,7 +845,7 @@ register_extension_info(
     label_regex_for_dep = "{extension_name}",
 )
 
-def tf_cuda_library(deps=None, cuda_deps=None, copts=None, **kwargs):
+def tf_cuda_library(deps=None, cuda_deps=None, copts=tf_copts(), **kwargs):
   """Generate a cc_library with a conditional set of CUDA dependencies.
 
   When the library is built with --config=cuda:
@@ -858,8 +865,6 @@ def tf_cuda_library(deps=None, cuda_deps=None, copts=None, **kwargs):
     deps = []
   if not cuda_deps:
     cuda_deps = []
-  if not copts:
-    copts = []
 
   native.cc_library(
       deps=deps + if_cuda(cuda_deps + [
@@ -881,7 +886,8 @@ def tf_kernel_library(name,
                       hdrs=None,
                       deps=None,
                       alwayslink=1,
-                      copts=tf_copts(),
+                      copts=None,
+                      is_external=False,
                       **kwargs):
   """A rule to build a TensorFlow OpKernel.
 
@@ -910,7 +916,8 @@ def tf_kernel_library(name,
     hdrs = []
   if not deps:
     deps = []
-
+  if not copts:
+    copts = tf_copts(is_external=is_external)
   if prefix:
     if native.glob([prefix + "*.cu.cc"], exclude=["*test*"]):
       if not gpu_srcs:
@@ -1222,7 +1229,7 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]):
       srcs=srcs,
       deps=deps + if_cuda(cuda_deps),
       data=[name + "_check_deps"],
-      copts=tf_copts(),
+      copts=tf_copts(is_external=True),
       linkopts=linkopts + select({
           "//conditions:default": [
               "-lm",
@@ -1566,9 +1573,10 @@ def tf_py_build_info_genrule():
 def cc_library_with_android_deps(deps,
                                  android_deps=[],
                                  common_deps=[],
+                                 copts=tf_copts(),
                                  **kwargs):
   deps = if_not_android(deps) + if_android(android_deps) + common_deps
-  native.cc_library(deps=deps, **kwargs)
+  native.cc_library(deps=deps, copts=copts, **kwargs)
 
 register_extension_info(
     extension_name = "cc_library_with_android_deps",
-- 
GitLab


From 12899384a56a796c032d573957da08bbf642c0de Mon Sep 17 00:00:00 2001
From: Daniel Ylitalo <daniel.ylitalo@mytaste.com>
Date: Mon, 18 Dec 2017 10:37:02 +0100
Subject: [PATCH 1127/1225] Return empty string when path resolver fails

---
 tensorflow/core/platform/env.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index 4b5bfeab0f..316919aa1c 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -275,7 +275,8 @@ string Env::GetExecutablePath() {
   size_t exe_path_size = PATH_MAX;
 
   if (sysctl(mib, 4, exe_path, &exe_path_size, NULL, 0) != 0) {
-    return "Resolving ", exe_path, " failed";
+    // Resolution of path failed
+    return "";
   }
 #elif defined(PLATFORM_WINDOWS)
   HMODULE hModule = GetModuleHandleW(NULL);
-- 
GitLab


From b9e021d07eb92c3f9fa6302224da7ecca66d7545 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Mon, 18 Dec 2017 19:40:16 +0800
Subject: [PATCH 1128/1225] Fix lib_strings_str_util_test on Windows

---
 tensorflow/core/lib/strings/str_util_test.cc              | 2 +-
 tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc
index d5909d17aa..6d461241f7 100644
--- a/tensorflow/core/lib/strings/str_util_test.cc
+++ b/tensorflow/core/lib/strings/str_util_test.cc
@@ -305,7 +305,7 @@ TEST(SplitAndParseAsInts, Int64) {
   EXPECT_EQ(nums[0], 134);
   EXPECT_EQ(nums[1], 2);
   EXPECT_EQ(nums[2], 13);
-  EXPECT_EQ(nums[3], -4000000000);
+  EXPECT_EQ(nums[3], static_cast<int64>(-4000000000ull));
 
   EXPECT_FALSE(str_util::SplitAndParseAsInts("abc", ',', &nums));
 
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 8d50250c3a..0c9f3bb5b3 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -21,7 +21,6 @@ failing_cpu_cc_tests="\
     //tensorflow/core:lib_core_status_test + \
     //tensorflow/core:lib_monitoring_collection_registry_test + \
     //tensorflow/core:lib_strings_numbers_test + \
-    //tensorflow/core:lib_strings_str_util_test + \
     //tensorflow/core/platform/hadoop:hadoop_file_system_test + \
     //tensorflow/core:platform_file_system_test + \
     //tensorflow/core:platform_logging_test + \
-- 
GitLab


From 3a74cfd8645269ab6a903094ff5be67d9b59049b Mon Sep 17 00:00:00 2001
From: Andrei Costinescu <AndreiCostinescu@users.noreply.github.com>
Date: Mon, 18 Dec 2017 18:10:57 +0100
Subject: [PATCH 1129/1225] Update math_ops.py

Corrected documentation of tf.reduce_mean()
---
 tensorflow/python/ops/math_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 6af36343d5..363c7b8a97 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -1437,7 +1437,7 @@ def reduce_mean(input_tensor,
     input_tensor: The tensor to reduce. Should have numeric type.
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
-      `[-rank(input_tensor), rank(input_tensor))`.
+      `[-rank(input_tensor), rank(input_tensor)]`.
     keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
-- 
GitLab


From bcb326c26e7986e7bb411e639ee3fb2fd9f53c04 Mon Sep 17 00:00:00 2001
From: Andrei Costinescu <AndreiCostinescu@users.noreply.github.com>
Date: Mon, 18 Dec 2017 18:14:04 +0100
Subject: [PATCH 1130/1225] Update core.py

Corrected the documentation of the Dense layer, regarding the computation performed by the layer
---
 tensorflow/python/layers/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index 7be1fa5cfe..44016d5eda 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py
@@ -43,7 +43,7 @@ class Dense(base.Layer):
   """Densely-connected layer class.
 
   This layer implements the operation:
-  `outputs = activation(inputs.kernel + bias)`
+  `outputs = activation(inputs * kernel + bias)`
   Where `activation` is the activation function passed as the `activation`
   argument (if not `None`), `kernel` is a weights matrix created by the layer,
   and `bias` is a bias vector created by the layer
-- 
GitLab


From aed215d72110269fa4ba513092717a62bbbb0af5 Mon Sep 17 00:00:00 2001
From: Neal Wu <neal@nealwu.com>
Date: Mon, 18 Dec 2017 09:28:58 -0800
Subject: [PATCH 1131/1225] Fix the CODEOWNERS file syntax (#15411)

---
 CODEOWNERS | 94 +++++++++++++++++++++++++++---------------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/CODEOWNERS b/CODEOWNERS
index 57a4df40e6..007a304c3e 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,53 +1,53 @@
 # NOTE: Disabled temporarily because it's too noisy on pushes.
 # Where component owners are known, add them here.
 
-#tensorflow/core/platform/windows/* @mrry
-#tensorflow/java/* @asimshankar
-#tensorflow/tensorboard/* @jart @dandelionmane
-#tensorflow/tools/docs/* @markdaoust
+# /tensorflow/core/platform/windows/ @mrry
+# /tensorflow/java/ @asimshankar
+# /tensorflow/tensorboard/ @jart @dandelionmane
+# /tensorflow/tools/docs/ @markdaoust
 
 # contrib
 
-# NEED OWNER: tensorflow/contrib/avro/*
-#tensorflow/contrib/batching/* @alextp @chrisolston
-#tensorflow/contrib/bayesflow/* @ebrevdo @rsepassi @jvdillon
-#tensorflow/contrib/boosted_trees/* @sshrdp @yk5 @nataliaponomareva
-#tensorflow/contrib/cmake/* @mrry @benoitsteiner
-#tensorflow/contrib/copy_graph/* @tucker @poxvoculi
-#tensorflow/contrib/crf/* @kentonl
-#tensorflow/contrib/data/* @mrry
-#tensorflow/contrib/distributions/* @jvdillon @langmore @rsepassi
-#tensorflow/contrib/factorization/* @agarwal-ashish @xavigonzalvo
-#tensorflow/contrib/ffmpeg/* @fredbertsch
-# NEED OWNER: tensorflow/contrib/framework/*
-#tensorflow/contrib/graph_editor/* @purpledog
-# NEED OWNER: tensorflow/contrib/grid_rnn/*
-#tensorflow/contrib/hvx/* @satok16
-#tensorflow/contrib/integrate/* @shoyer
-#tensorflow/contrib/kernel_methods/* @petrosmol
-#tensorflow/contrib/ios_examples/* @petewarden
-#tensorflow/contrib/labeled_tensor/* @shoyer
-#tensorflow/contrib/layers/* @fchollet @martinwicke
-#tensorflow/contrib/learn/* @martinwicke @ispirmustafa @alextp
-#tensorflow/contrib/linalg/* @langmore
-#tensorflow/contrib/linear_optimizer/* @petrosmol @andreasst @katsiapis
-#tensorflow/contrib/lookup/* @ysuematsu @andreasst
-#tensorflow/contrib/losses/* @alextp @ispirmustafa
-#tensorflow/contrib/makefile/* @petewarden @satok16 @wolffg
-#tensorflow/contrib/metrics/* @alextp @honkentuber @ispirmustafa
-#tensorflow/contrib/nccl/* @cwhipkey @zheng-xq
-#tensorflow/contrib/opt/* @strategist333
-#tensorflow/contrib/pi_examples/* @maciekcc
-#tensorflow/contrib/quantization/* @petewarden @cwhipkey @keveman
-#tensorflow/contrib/rnn/* @ebrevdo
-#tensorflow/contrib/saved_model/* @nfiedel @sukritiramesh
-#tensorflow/contrib/seq2seq/* @lukaszkaiser
-#tensorflow/contrib/session_bundle/* @nfiedel @sukritiramesh
-#tensorflow/contrib/slim/* @sguada @thenbasilmanran
-#tensorflow/contrib/stateless/* @girving
-#tensorflow/contrib/tensor_forest/* @gilberthendry @thomascolthurst
-#tensorflow/contrib/testing/* @dandelionmane
-#tensorflow/contrib/timeseries/* @allenlavoie
-#tensorflow/contrib/tpu/* @frankchn @saeta @jhseu
-#tensorflow/contrib/training/* @joel-shor @ebrevdo
-#tensorflow/contrib/util/* @sherrym
+# NEED OWNER: /tensorflow/contrib/avro/
+# /tensorflow/contrib/batching/ @alextp @chrisolston
+# /tensorflow/contrib/bayesflow/ @ebrevdo @rsepassi @jvdillon
+# /tensorflow/contrib/boosted_trees/ @sshrdp @yk5 @nataliaponomareva
+# /tensorflow/contrib/cmake/ @mrry @benoitsteiner
+# /tensorflow/contrib/copy_graph/ @tucker @poxvoculi
+# /tensorflow/contrib/crf/ @kentonl
+# /tensorflow/contrib/data/ @mrry
+# /tensorflow/contrib/distributions/ @jvdillon @langmore @rsepassi
+# /tensorflow/contrib/factorization/ @agarwal-ashish @xavigonzalvo
+# /tensorflow/contrib/ffmpeg/ @fredbertsch
+# NEED OWNER: /tensorflow/contrib/framework/
+# /tensorflow/contrib/graph_editor/ @purpledog
+# NEED OWNER: /tensorflow/contrib/grid_rnn/
+# /tensorflow/contrib/hvx/ @satok16
+# /tensorflow/contrib/integrate/ @shoyer
+# /tensorflow/contrib/kernel_methods/ @petrosmol
+# /tensorflow/contrib/ios_examples/ @petewarden
+# /tensorflow/contrib/labeled_tensor/ @shoyer
+# /tensorflow/contrib/layers/ @fchollet @martinwicke
+# /tensorflow/contrib/learn/ @martinwicke @ispirmustafa @alextp
+# /tensorflow/contrib/linalg/ @langmore
+# /tensorflow/contrib/linear_optimizer/ @petrosmol @andreasst @katsiapis
+# /tensorflow/contrib/lookup/ @ysuematsu @andreasst
+# /tensorflow/contrib/losses/ @alextp @ispirmustafa
+# /tensorflow/contrib/makefile/ @petewarden @satok16 @wolffg
+# /tensorflow/contrib/metrics/ @alextp @honkentuber @ispirmustafa
+# /tensorflow/contrib/nccl/ @cwhipkey @zheng-xq
+# /tensorflow/contrib/opt/ @strategist333
+# /tensorflow/contrib/pi_examples/ @maciekcc
+# /tensorflow/contrib/quantization/ @petewarden @cwhipkey @keveman
+# /tensorflow/contrib/rnn/ @ebrevdo
+# /tensorflow/contrib/saved_model/ @nfiedel @sukritiramesh
+# /tensorflow/contrib/seq2seq/ @lukaszkaiser
+# /tensorflow/contrib/session_bundle/ @nfiedel @sukritiramesh
+# /tensorflow/contrib/slim/ @sguada @thenbasilmanran
+# /tensorflow/contrib/stateless/ @girving
+# /tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst
+# /tensorflow/contrib/testing/ @dandelionmane
+# /tensorflow/contrib/timeseries/ @allenlavoie
+# /tensorflow/contrib/tpu/ @frankchn @saeta @jhseu
+# /tensorflow/contrib/training/ @joel-shor @ebrevdo
+# /tensorflow/contrib/util/ @sherrym
-- 
GitLab


From fc2526a8c1cf0bc2a93c8cc819ff7209eb4628c9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 15 Dec 2017 23:38:01 -0800
Subject: [PATCH 1132/1225] Merged commit includes the following changes:
 179277894  by gunan:

    Run buildifier on build file.

--
179275101  by meheff:

    Replace DeviceMemoryBase with ShapedBuffer in XLA interfaces.
    Executable, TransferManager, and AllocationTracker now use ShapedBuffer to hold device memory addresses holding XLA data. Most of the change is straight-forward with the exception of AllocationTracker which was mostly rewritten (and simplified) and some refactoring in the CPU executable.

    Also, have ShapedBuffer hold on-host and on-device Shapes which are the shapes of the representation of the data on the host and device, respectively. This is necessary because with cl/178624364 the on-host and on-device shape may no longer be equal.

--
179265385  by A. Unique TensorFlower:

    Return error rather than CHECK fail in Executable::ExecuteOnStreamWrapper

--
179264551  by dandelion:

    Internal fixes.

--

PiperOrigin-RevId: 179277894
---
 .../compiler/jit/kernels/xla_launch_op.cc     |  17 +-
 .../compiler/xla/client/local_client.cc       |  14 +-
 tensorflow/compiler/xla/literal_util.cc       |  21 ++
 tensorflow/compiler/xla/literal_util.h        |   4 +
 .../xla/service/allocation_tracker.cc         | 228 +++++++--------
 .../compiler/xla/service/allocation_tracker.h | 179 ++++--------
 .../xla/service/cpu/cpu_executable.cc         | 200 +++++--------
 .../compiler/xla/service/cpu/cpu_executable.h |  30 +-
 .../service/cpu/parallel_cpu_executable.cc    | 146 +++-------
 .../xla/service/cpu/parallel_cpu_executable.h |  18 +-
 tensorflow/compiler/xla/service/executable.cc |  14 +-
 tensorflow/compiler/xla/service/executable.h  |  23 +-
 .../xla/service/generic_transfer_manager.cc   | 132 ++-------
 .../xla/service/generic_transfer_manager.h    |  24 +-
 .../xla/service/gpu/gpu_executable.cc         | 142 ++--------
 .../compiler/xla/service/gpu/gpu_executable.h |  16 +-
 tensorflow/compiler/xla/service/hlo_runner.cc |  98 ++-----
 tensorflow/compiler/xla/service/hlo_runner.h  |  45 +--
 .../compiler/xla/service/interpreter/BUILD    |   1 +
 .../xla/service/interpreter/executable.cc     |  86 ++----
 .../xla/service/interpreter/executable.h      |  11 +-
 .../compiler/xla/service/local_service.cc     |   4 +-
 tensorflow/compiler/xla/service/service.cc    | 267 ++++++++----------
 tensorflow/compiler/xla/service/service.h     |  19 +-
 .../compiler/xla/service/shaped_buffer.cc     | 120 ++------
 .../compiler/xla/service/shaped_buffer.h      | 100 +++----
 .../compiler/xla/service/transfer_manager.cc  | 107 +++++--
 .../compiler/xla/service/transfer_manager.h   | 100 +++----
 tensorflow/compiler/xla/tests/copy_test.cc    |  11 +-
 .../compiler/xla/tests/dynamic_ops_test.cc    |  16 +-
 tensorflow/compiler/xla/tests/fusion_test.cc  |   3 +-
 .../compiler/xla/tests/hlo_test_base.cc       |  22 +-
 tensorflow/compiler/xla/tests/hlo_test_base.h |  21 +-
 .../xla/tests/local_client_execute_test.cc    |  36 ++-
 .../xla/tests/local_client_test_base.cc       |   2 +-
 .../xla/tests/multioutput_fusion_test.cc      |  13 +-
 .../xla/tests/transfer_manager_test.cc        |  41 ++-
 37 files changed, 866 insertions(+), 1465 deletions(-)

diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc
index 39a770ab7b..4f3f17df9c 100644
--- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc
+++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc
@@ -287,10 +287,17 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) {
     gpu::DeviceMemoryBase dmem = gpu::DeviceMemoryBase(
         const_cast<char*>(t->tensor_data().data()), t->tensor_data().size());
 
-    arg_buffers[i] =
-        xla::ShapedBuffer::MakeArrayShapedBuffer(
-            shape, client->platform(), client->default_device_ordinal(), dmem)
-            .ConsumeValueOrDie();
+    const xla::Shape on_device_shape =
+        client->backend().transfer_manager()->HostShapeToDeviceShape(shape);
+    CHECK(xla::ShapeUtil::Equal(shape, on_device_shape))
+        << "On-device shape "
+        << xla::ShapeUtil::HumanStringWithLayout(on_device_shape)
+        << " not the same as on-host shape "
+        << xla::ShapeUtil::HumanStringWithLayout(shape);
+    arg_buffers[i] = xla::MakeUnique<xla::ShapedBuffer>(
+        /*on_host_shape=*/shape, /*on_device_shape=*/shape, client->platform(),
+        client->default_device_ordinal());
+    arg_buffers[i]->set_buffer(dmem, /*index=*/{});
     arg_ptrs[i] = arg_buffers[i].get();
 
     OP_REQUIRES_OK(ctx, xla_allocator.RegisterArgument(t));
@@ -313,7 +320,7 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) {
 
   // Computation output should always be a tuple.
   if (VLOG_IS_ON(2)) {
-    VLOG(2) << "Result tuple shape: " << output->shape().DebugString();
+    VLOG(2) << "Result tuple shape: " << output->on_host_shape().DebugString();
   }
   CHECK_EQ(ctx->num_outputs(), kernel->outputs.size());
 
diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc
index b051955f0f..7900246a49 100644
--- a/tensorflow/compiler/xla/client/local_client.cc
+++ b/tensorflow/compiler/xla/client/local_client.cc
@@ -78,14 +78,14 @@ tensorflow::Status LocalExecutable::ValidateExecutionOptions(
   }
   for (int i = 0; i < arguments.size(); ++i) {
     if (!computation_layout.parameter_layout(i).MatchesLayoutInShape(
-            arguments[i]->shape())) {
+            arguments[i]->on_host_shape())) {
       return InvalidArgument(
           "argument does not match shape or layout of computation parameter "
           "%d: expected %s, got %s",
           i,
           ShapeUtil::HumanString(computation_layout.parameter_layout(i).shape())
               .c_str(),
-          ShapeUtil::HumanString(arguments[i]->shape()).c_str());
+          ShapeUtil::HumanString(arguments[i]->on_host_shape()).c_str());
     }
   }
 
@@ -281,13 +281,9 @@ LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal,
   if (allocator == nullptr) {
     allocator = backend().memory_allocator();
   }
-  TF_ASSIGN_OR_RETURN(
-      auto scoped_buffer,
-      ScopedShapedBuffer::Allocate(
-          literal.shape(), allocator, device_ordinal,
-          [this](const Shape& shape) {
-            return backend().transfer_manager()->GetByteSizeRequirement(shape);
-          }));
+  TF_ASSIGN_OR_RETURN(auto scoped_buffer,
+                      backend().transfer_manager()->AllocateScopedShapedBuffer(
+                          literal.shape(), allocator, device_ordinal));
   TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
                       backend().stream_executor(device_ordinal));
   TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc
index 42c9d21149..3ae356bc11 100644
--- a/tensorflow/compiler/xla/literal_util.cc
+++ b/tensorflow/compiler/xla/literal_util.cc
@@ -404,6 +404,27 @@ std::unique_ptr<Literal> Literal::Relayout(
   return outer_result;
 }
 
+std::unique_ptr<Literal> Literal::Relayout(
+    const Shape& shape_with_layout) const {
+  CHECK(ShapeUtil::Compatible(shape_with_layout, shape()))
+      << "Given shape_with_layout " << ShapeUtil::HumanString(shape_with_layout)
+      << " not compatible with literal shape "
+      << ShapeUtil::HumanString(shape());
+  std::unique_ptr<Literal> result = CreateFromShape(shape_with_layout);
+  ShapeUtil::ForEachSubshape(
+      result->shape(),
+      [this, &result](const Shape& subshape, const ShapeIndex& index) {
+        if (ShapeUtil::IsArray(subshape)) {
+          DimensionVector base(ShapeUtil::Rank(subshape), 0);
+          DimensionVector copy_size(subshape.dimensions().begin(),
+                                    subshape.dimensions().end());
+          TF_CHECK_OK(result->GetSubliteral(index).Copy(GetSubliteral(index),
+                                                        base, base, copy_size));
+        }
+      });
+  return result;
+}
+
 StatusOr<std::unique_ptr<Literal>> Literal::Reshape(
     tensorflow::gtl::ArraySlice<int64> dimensions) const {
   if (ShapeUtil::IsTuple(shape())) {
diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h
index 2981f9f875..9b9972725b 100644
--- a/tensorflow/compiler/xla/literal_util.h
+++ b/tensorflow/compiler/xla/literal_util.h
@@ -286,6 +286,10 @@ class Literal {
   std::unique_ptr<Literal> Relayout(const Layout& new_layout,
                                     const ShapeIndex& shape_index = {}) const;
 
+  // An overload of Relayout which changes the layout of the entire shape rather
+  // than being limited to a single array within the shape.
+  std::unique_ptr<Literal> Relayout(const Shape& shape_with_layout) const;
+
   // Creates a new literal by reshaping this literal to have the given
   // dimensions. The total number of elements must not change; The
   // implementation currently only supports monotonic dim0-major layouts.
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc
index ad2fee2d39..b69a6e730f 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.cc
+++ b/tensorflow/compiler/xla/service/allocation_tracker.cc
@@ -27,191 +27,163 @@ limitations under the License.
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/stream_executor_no_cuda.h"
 
 namespace se = ::perftools::gputools;
 
 namespace xla {
 
-AllocationTracker::AllocationTracker() : next_handle_(1) {}
-
-GlobalDataHandle AllocationTracker::Register(Backend* backend,
-                                             int device_ordinal,
-                                             se::DeviceMemoryBase device_memory,
-                                             const Shape& shape,
-                                             const string& tag) {
-  tensorflow::mutex_lock lock(allocation_mutex_);
+StatusOr<GlobalDataHandle> AllocationTracker::Register(
+    std::unique_ptr<ShapedBuffer> shaped_buffer, const string& tag) {
+  tensorflow::mutex_lock lock(mutex_);
   VLOG(2) << "Register";
-  return RegisterInternal(backend, device_ordinal, device_memory, shape, tag,
-                          /*initial_ref_count=*/1);
+  return RegisterInternal(std::move(shaped_buffer), tag);
 }
 
-GlobalDataHandle AllocationTracker::RegisterInternal(
-    Backend* backend, int device_ordinal, se::DeviceMemoryBase device_memory,
-    const Shape& shape, const string& tag, int initial_ref_count) {
+StatusOr<GlobalDataHandle> AllocationTracker::RegisterInternal(
+    std::unique_ptr<ShapedBuffer> shaped_buffer, const string& tag) {
   VLOG(2) << "RegisterInternal("
           << "tag: \"" << tag << "\" "
-          << "device_ordinal: " << device_ordinal << " "
-          << "device_memory: " << device_memory.opaque() << " "
-          << "shape: " << shape.ShortDebugString() << ")";
-  TF_CHECK_OK(ShapeUtil::ValidateShape(shape));
-
-  int64 handle;
-  HandleMap& handle_map = GetOrCreateOpaqueToHandleMap(device_ordinal);
-  auto handle_it = handle_map.find(device_memory.opaque());
-  if (handle_it != handle_map.end()) {
-    handle = handle_it->second;
-    auto& allocation = FindOrDie(handle_to_allocation_, handle);
-    int ref_count = allocation->ref_count();
-    CHECK_GT(ref_count, 0);
-    VLOG(2) << "ref_count: " << ref_count << " -> " <<
-            (ref_count + initial_ref_count);
-    allocation->increment_ref_count(initial_ref_count);
-  } else {
-    handle = next_handle_++;
-    VLOG(2) << "ref_count: " << initial_ref_count;
-    InsertOrDie(&handle_map, device_memory.opaque(), handle);
-    auto inserted = handle_to_allocation_.emplace(
-        handle, MakeUnique<Allocation>(backend, device_ordinal, device_memory,
-                                       shape, tag, initial_ref_count));
-    CHECK(inserted.second);
+          << "shaped_buffer: " << *shaped_buffer;
+  if (shaped_buffer->platform() != backend_->platform()) {
+    return InvalidArgument(
+        "AllocationTracker for platform %s cannot register buffer from "
+        "platform %s",
+        backend_->platform()->Name().c_str(),
+        shaped_buffer->platform()->Name().c_str());
   }
 
+  int64 handle = next_handle_++;
+  std::vector<ShapeIndex> shape_indices;
+  ShapeUtil::ForEachSubshape(shaped_buffer->on_device_shape(),
+                             [this, &shape_indices](const Shape& /*subshape*/,
+                                                    const ShapeIndex& index) {
+                               shape_indices.push_back(index);
+                             });
+  for (const ShapeIndex& index : shape_indices) {
+    AddAllocationOrIncrementRefCount(shaped_buffer->buffer(index),
+                                     shaped_buffer->device_ordinal());
+  }
   GlobalDataHandle result;
   result.set_handle(handle);
+
+  handle_to_shaped_buffer_[handle] = std::move(shaped_buffer);
+
   VLOG(2) << "handle: " << handle;
 
   return result;
 }
 
 tensorflow::Status AllocationTracker::Unregister(const GlobalDataHandle& data) {
-  tensorflow::mutex_lock lock(allocation_mutex_);
-  TF_ASSIGN_OR_RETURN(Allocation * allocation, ResolveInternal(data));
-  std::set<void*> deallocated_buffers;
-  TF_RETURN_IF_ERROR(
-      DeallocateShape(allocation->backend(), allocation->device_ordinal(),
-                      allocation->mutable_device_memory(), allocation->shape(),
-                      &deallocated_buffers));
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status AllocationTracker::DeallocateShape(
-    Backend* backend, int device_ordinal, se::DeviceMemoryBase* device_memory,
-    const Shape& shape, std::set<void*>* deallocated_buffers) {
-  VLOG(2) << "DeallocateShape("
-          << "shape: \"" << shape.ShortDebugString() << "\" "
-          << "device_memory: " << device_memory->opaque() << ")";
-  if (ContainsKey(*deallocated_buffers, device_memory->opaque())) {
-    // Buffer has already been deallocated. Nothing to do.
-    VLOG(2) << "already deallocated";
-    return tensorflow::Status::OK();
+  tensorflow::mutex_lock lock(mutex_);
+  VLOG(2) << "Unregister("
+          << "handle: " << data.handle() << ")";
+  TF_ASSIGN_OR_RETURN(ShapedBuffer * shaped_buffer, ResolveInternal(data));
+  std::vector<ShapeIndex> shape_indices;
+  ShapeUtil::ForEachSubshape(shaped_buffer->on_device_shape(),
+                             [this, &shape_indices](const Shape& /*subshape*/,
+                                                    const ShapeIndex& index) {
+                               shape_indices.push_back(index);
+                             });
+  for (const ShapeIndex& index : shape_indices) {
+    TF_RETURN_IF_ERROR(DecrementRefCount(shaped_buffer->buffer(index),
+                                         shaped_buffer->device_ordinal()));
   }
 
-  // Add buffer to deallocated set so we do not try to deallocate it again
-  // if it is encountered again while traversing a tuple.
-  deallocated_buffers->insert(device_memory->opaque());
-
-  HandleMap& handle_map = GetOrCreateOpaqueToHandleMap(device_ordinal);
-  auto handle_it = handle_map.find(device_memory->opaque());
-  if (handle_it != handle_map.end()) {
-    int64 handle = handle_it->second;
-    auto& allocation = FindOrDie(handle_to_allocation_, handle);
-    int ref_count = allocation->ref_count();
-    VLOG(2) << "ref_count: " << ref_count << " -> " << ref_count - 1;
-    allocation->decrement_ref_count();
-    if (allocation->ref_count() > 0) {
-      // Buffer is referred to by another allocation. Don't deallocate it.
-      return tensorflow::Status::OK();
-    }
-    handle_map.erase(device_memory->opaque());
-  }
+  // Keep a nullptr as a tombstone for unregistered handles. This enables better
+  // error messages. That is, "handle has been deallocated" versus "handle does
+  // not exist".
+  handle_to_shaped_buffer_.at(data.handle()).reset();
 
-  if (ShapeUtil::IsTuple(shape)) {
-    // Traverse into tuple recursively deallocating buffers.
-    TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
-                        backend->stream_executor(device_ordinal));
-    TF_ASSIGN_OR_RETURN(std::vector<se::DeviceMemoryBase> elements,
-                        backend->transfer_manager()->ShallowCopyTupleFromDevice(
-                            executor, *device_memory, shape));
-
-    TF_RET_CHECK(ShapeUtil::TupleElementCount(shape) == elements.size())
-        << "tuple has unexpected number of elements: " << elements.size()
-        << " != " << ShapeUtil::TupleElementCount(shape);
-    for (size_t i = 0; i < elements.size(); ++i) {
-      VLOG(2) << "recursing onto the tuple elements";
-      TF_RETURN_IF_ERROR(DeallocateShape(backend, device_ordinal, &elements[i],
-                                         shape.tuple_shapes(i),
-                                         deallocated_buffers));
-    }
-  }
-
-  return backend->memory_allocator()->Deallocate(device_ordinal, device_memory);
+  return tensorflow::Status::OK();
 }
 
 StatusOr<std::vector<GlobalDataHandle>> AllocationTracker::DeconstructTuple(
     const GlobalDataHandle& data) {
-  tensorflow::mutex_lock lock(allocation_mutex_);
-  TF_ASSIGN_OR_RETURN(Allocation * allocation, ResolveInternal(data));
+  tensorflow::mutex_lock lock(mutex_);
 
-  if (!ShapeUtil::IsTuple(allocation->shape())) {
+  TF_ASSIGN_OR_RETURN(ShapedBuffer * shaped_buffer, ResolveInternal(data));
+  if (!ShapeUtil::IsTuple(shaped_buffer->on_host_shape())) {
     return InvalidArgument("global data handle %lld is not a tuple",
                            data.handle());
   }
+  // If the on-host representation is a tuple, then the on-device one should be
+  // as well.
+  TF_RET_CHECK(ShapeUtil::IsTuple(shaped_buffer->on_device_shape()));
 
-  if (ShapeUtil::IsNestedTuple(allocation->shape())) {
+  if (ShapeUtil::IsNestedTuple(shaped_buffer->on_device_shape())) {
     return Unimplemented("deconstructing nested tuples not yet supported");
   }
 
-  TF_ASSIGN_OR_RETURN(
-      se::StreamExecutor * executor,
-      allocation->backend()->stream_executor(allocation->device_ordinal()));
-  TF_ASSIGN_OR_RETURN(
-      std::vector<se::DeviceMemoryBase> element_bases,
-      allocation->backend()->transfer_manager()->ShallowCopyTupleFromDevice(
-          executor, allocation->device_memory(), allocation->shape()));
-
   std::vector<GlobalDataHandle> element_handles;
-  element_handles.reserve(element_bases.size());
-  for (int i = 0; i < element_bases.size(); ++i) {
-    element_handles.push_back(RegisterInternal(
-        allocation->backend(), allocation->device_ordinal(), element_bases[i],
-        ShapeUtil::GetSubshape(allocation->shape(), {i}),
-        tensorflow::strings::StrCat(allocation->tag(), ".element_", i),
-        /*initial_ref_count=*/2));
+  for (int i = 0;
+       i < ShapeUtil::TupleElementCount(shaped_buffer->on_device_shape());
+       ++i) {
+    auto element_buffer = MakeUnique<ShapedBuffer>(
+        ShapeUtil::GetTupleElementShape(shaped_buffer->on_host_shape(), i),
+        ShapeUtil::GetTupleElementShape(shaped_buffer->on_device_shape(), i),
+        shaped_buffer->platform(), shaped_buffer->device_ordinal());
+    element_buffer->set_buffer(shaped_buffer->buffer(/*index=*/{i}),
+                               /*index=*/{});
+    TF_ASSIGN_OR_RETURN(
+        GlobalDataHandle element_handle,
+        RegisterInternal(std::move(element_buffer), "deconstructed tuple"));
+
+    element_handles.push_back(element_handle);
   }
   return std::move(element_handles);
 }
 
-StatusOr<const Allocation*> AllocationTracker::Resolve(
+StatusOr<const ShapedBuffer*> AllocationTracker::Resolve(
     const GlobalDataHandle& data) {
-  tensorflow::mutex_lock lock(allocation_mutex_);
+  tensorflow::mutex_lock lock(mutex_);
   return AllocationTracker::ResolveInternal(data);
 }
 
-StatusOr<Allocation*> AllocationTracker::ResolveInternal(
+StatusOr<ShapedBuffer*> AllocationTracker::ResolveInternal(
     const GlobalDataHandle& data) {
   VLOG(2) << "resolve:" << data.handle();
-  auto it = handle_to_allocation_.find(data.handle());
-  if (it == handle_to_allocation_.end()) {
+  auto it = handle_to_shaped_buffer_.find(data.handle());
+  if (it == handle_to_shaped_buffer_.end()) {
     return NotFound("no allocation record for global data handle: %lld",
                     data.handle());
   }
-  Allocation* allocation = it->second.get();
+  ShapedBuffer* shaped_buffer = it->second.get();
 
-  if (allocation->is_deallocated()) {
+  if (shaped_buffer == nullptr) {
     return InvalidArgument("global data handle %lld was previously deallocated",
                            data.handle());
   }
 
-  return allocation;
+  return shaped_buffer;
 }
 
-AllocationTracker::HandleMap& AllocationTracker::GetOrCreateOpaqueToHandleMap(
-    int device_ordinal) {
-  if (opaque_to_handle_.size() <= device_ordinal) {
-    opaque_to_handle_.resize(device_ordinal + 1);
+void AllocationTracker::AddAllocationOrIncrementRefCount(
+    perftools::gputools::DeviceMemoryBase device_memory, int device_ordinal) {
+  AllocationMap& allocation_map = opaque_to_allocation_map_[device_ordinal];
+  auto it = allocation_map.find(device_memory.opaque());
+  if (it == allocation_map.end()) {
+    allocation_map[device_memory.opaque()] = {device_memory, device_ordinal,
+                                              /*ref_count=*/1};
+  } else {
+    it->second.ref_count++;
   }
-  return opaque_to_handle_[device_ordinal];
+}
+
+Status AllocationTracker::DecrementRefCount(
+    perftools::gputools::DeviceMemoryBase device_memory, int device_ordinal) {
+  AllocationMap& allocation_map = opaque_to_allocation_map_[device_ordinal];
+  auto it = allocation_map.find(device_memory.opaque());
+  TF_RET_CHECK(it != allocation_map.end());
+  Allocation& allocation = it->second;
+  TF_RET_CHECK(allocation.ref_count >= 1);
+  if (allocation.ref_count == 1) {
+    TF_RETURN_IF_ERROR(backend_->memory_allocator()->Deallocate(
+        device_ordinal, &device_memory));
+    allocation_map.erase(it);
+  } else {
+    allocation.ref_count--;
+  }
+  return tensorflow::Status::OK();
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h
index ebbf35b6fe..8b25cbb482 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.h
+++ b/tensorflow/compiler/xla/service/allocation_tracker.h
@@ -28,147 +28,92 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/stream_executor_no_cuda.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
 
-// A global allocation in device space, tracked by the XLA service.
-class Allocation {
- public:
-  Allocation(Backend* backend, int device_ordinal,
-             perftools::gputools::DeviceMemoryBase device_memory,
-             const Shape& shape, const string& tag, int initial_ref_count)
-      : backend_(backend),
-        device_ordinal_(device_ordinal),
-        device_memory_(device_memory),
-        shape_(shape),
-        tag_(tag),
-        ref_count_(initial_ref_count) {}
-
-  Backend* backend() const { return backend_; }
-  int device_ordinal() const { return device_ordinal_; }
-  perftools::gputools::DeviceMemoryBase device_memory() const {
-    return device_memory_;
-  }
-  const Shape& shape() const { return shape_; }
-  const string& tag() const { return tag_; }
-
-  bool is_deallocated() const {
-    CHECK_GE(ref_count_, 0);
-    return ref_count_ == 0;
-  }
-  int ref_count() const {
-    CHECK_GE(ref_count_, 0);
-    return ref_count_;
-  }
-  void increment_ref_count(int inc) {
-    CHECK_GT(ref_count_, 0);
-    CHECK_LE(ref_count_, INT_MAX - inc);
-    ref_count_ += inc;
-  }
-  void decrement_ref_count() {
-    CHECK_GT(ref_count_, 0);
-    --ref_count_;
-  }
-  perftools::gputools::DeviceMemoryBase* mutable_device_memory() {
-    return &device_memory_;
-  }
-
- private:
-  // The backend that the memory is allocated on.
-  Backend* backend_;
-
-  // The device that the memory is allocated on.
-  int device_ordinal_;
-
-  // The pointer to this allocation.
-  perftools::gputools::DeviceMemoryBase device_memory_;
-
-  // The shape of this allocation.
-  Shape shape_;
-
-  // An informal description of this allocation shown in tools.
-  string tag_;
-
-  // This is the number of Allocation objects which refer to this memory
-  // allocation.
-  int ref_count_;
-
-  // Return a string representation of this allocation for debugging or logging
-  // purposes.
-  string ToString() const;
-};
-
 // Tracks allocations for the XLA service; allocations can be registered
 // with shape/device/tag and resolved from a handle for later use.
 class AllocationTracker {
  public:
-  AllocationTracker();
+  // The allocator is used for deallocating memory when allocations are
+  // deregistered. All registered allocations must have the same platform as the
+  // allocator.
+  AllocationTracker(Backend* backend) : backend_(backend), next_handle_(1) {}
 
-  // Registers device memory with a given shape, device identifier, and tag, and
-  // returns a corresponding handle that can be used for talking to XLA
-  // clients.
-  GlobalDataHandle Register(Backend* backend, int device_ordinal,
-                            perftools::gputools::DeviceMemoryBase device_memory,
-                            const Shape& shape, const string& tag);
+  // Registers a shaped buffer of device memory, and returns a corresponding
+  // handle that can be used for talking to XLA clients.
+  StatusOr<GlobalDataHandle> Register(
+      std::unique_ptr<ShapedBuffer> shaped_buffer, const string& tag);
 
   // Unregister the allocation for the given data handle.
-  tensorflow::Status Unregister(const GlobalDataHandle& data);
+  Status Unregister(const GlobalDataHandle& data);
 
   // Returns a vector of global data handles that point to the tuple elements.
   StatusOr<std::vector<GlobalDataHandle>> DeconstructTuple(
       const GlobalDataHandle& Data);
 
-  // Resolve a handle from an XLA client to an allocation, or provide an
-  // error status to say whether it was not found (or found, but found
-  // deallocated).
-  StatusOr<const Allocation*> Resolve(const GlobalDataHandle& data);
+  // Resolve a handle from an XLA client to a shaped buffer, or provide an error
+  // status to say whether it was not found (or found, but found deallocated).
+  StatusOr<const ShapedBuffer*> Resolve(const GlobalDataHandle& data);
 
  private:
-  // Internal helper which resolves the given GlobalDataHandle to an Allocation.
-  StatusOr<Allocation*> ResolveInternal(const GlobalDataHandle& data)
-      EXCLUSIVE_LOCKS_REQUIRED(allocation_mutex_);
-
-  GlobalDataHandle RegisterInternal(
-      Backend* backend, int device_ordinal,
-      perftools::gputools::DeviceMemoryBase device_memory, const Shape& shape,
-      const string& tag, int initial_ref_count)
-      EXCLUSIVE_LOCKS_REQUIRED(allocation_mutex_);
-
-  // Helper function which deallocates the memory buffer containing the given
-  // shape referred to by device_memory. Tuples are traversed recursively
-  // deallocating all nested buffers. The parameter deallocated_buffers contains
-  // the set of buffers deallocated so far stored as opaque values (void *) from
-  // DeviceMemoryBase. Keeping track of deallocated buffers prevents
-  // double-freeing of buffers which may be referred to more than once in a
-  // nested tuple.
-  tensorflow::Status DeallocateShape(
-      Backend* backend, int device_ordinal,
-      perftools::gputools::DeviceMemoryBase* device_memory, const Shape& shape,
-      std::set<void*>* deallocated_buffers)
-      EXCLUSIVE_LOCKS_REQUIRED(allocation_mutex_);
-
-  // Returns the opaque_to_handle_ map for the given device_ordinal, creating
-  // a new map if there is not one for the device_ordinal.
-  using HandleMap = std::map<void*, int64>;
-  HandleMap& GetOrCreateOpaqueToHandleMap(int device_ordinal)
-      EXCLUSIVE_LOCKS_REQUIRED(allocation_mutex_);
-
-  tensorflow::mutex allocation_mutex_;  // Guards the allocation mapping.
+  // Data structure encapsulating single memory allocation on the device.
+  struct Allocation {
+    // The pointer to this allocation.
+    perftools::gputools::DeviceMemoryBase device_memory;
+
+    // The device that the memory is allocated on.
+    int device_ordinal;
+
+    // This is the number of times this memory allocation is refered to by
+    // registered data handles.
+    int ref_count;
+  };
+
+  // Internal helper which resolves the given GlobalDataHandle to a
+  // ShapedBuffer.
+  StatusOr<ShapedBuffer*> ResolveInternal(const GlobalDataHandle& data)
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Internal helper which registers a shaped buffer.
+  StatusOr<GlobalDataHandle> RegisterInternal(
+      std::unique_ptr<ShapedBuffer> shaped_buffer, const string& tag)
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Adds the given device address to the allocation tracker, or if it already
+  // exists, then increment it's reference count.
+  void AddAllocationOrIncrementRefCount(
+      perftools::gputools::DeviceMemoryBase device_memory, int device_ordinal)
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Decrements the reference count of the given device memory. Then, if it is
+  // zero, deallocate the memory.
+  Status DecrementRefCount(perftools::gputools::DeviceMemoryBase device_memory,
+                           int device_ordinal) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // A map from device memory opaque value to allocation. One such map is
+  // maintained per device ordinal.
+  using AllocationMap = tensorflow::gtl::FlatMap<const void*, Allocation>;
+
+  tensorflow::mutex mutex_;
+
+  // Backend to use with this tracker. The backend supplies the memory allocator
+  // to use when deallocating memory.
+  Backend* backend_;
 
   // The next handle to assign to an allocation, guarded by the same mutex as
   // the mapping as they'll be mutated at the same time.
-  int64 next_handle_ GUARDED_BY(allocation_mutex_);
+  int64 next_handle_ GUARDED_BY(mutex_);
 
-  // A map from DeviceMemoryBase to handle for each device_ordinal.
-  std::vector<HandleMap> opaque_to_handle_ GUARDED_BY(allocation_mutex_);
+  // A map from device ordinal to AllocationMap.
+  tensorflow::gtl::FlatMap<int, AllocationMap> opaque_to_allocation_map_
+      GUARDED_BY(mutex_);
 
-  // Mapping from GlobalDataHandle handle to the corresponding registered
-  // Allocation object.
-  std::map<int64, std::unique_ptr<Allocation>> handle_to_allocation_
-      GUARDED_BY(allocation_mutex_);
+  // A map from data handle to ShapedBuffer.
+  tensorflow::gtl::FlatMap<int64, std::unique_ptr<ShapedBuffer>>
+      handle_to_shaped_buffer_ GUARDED_BY(mutex_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(AllocationTracker);
 };
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index e956f478b8..028f827337 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -73,28 +73,6 @@ CpuExecutable::CpuExecutable(
       reinterpret_cast<ComputeFunctionType>(cantFail(sym.getAddress()));
 }
 
-// Given a pointer to an output buffer (following the CPU JIT calling
-// conventions), mark addresses that are "live". The initial pointer itself is
-// trivially live. If the shape of the buffer is a tuple, this analysis looks
-// into the tuple's elements and marks them live as well (since tuples keep
-// pointers to buffers) and also works recursively.  address is an in-memory
-// buffer address that contains some runtime XLA object.  shape is its
-// shape. marked_addresses is the set of live addresses to populate.
-static void MarkLiveAddressesInOutput(
-    const void* address, const Shape& shape,
-    std::unordered_set<const void*>* marked_addresses) {
-  marked_addresses->insert(address);
-  const uintptr_t* address_buffer = static_cast<const uintptr_t*>(address);
-  if (ShapeUtil::IsTuple(shape)) {
-    for (int i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) {
-      const uintptr_t* element_address = address_buffer + i;
-      const void* element = reinterpret_cast<const void*>(*element_address);
-      MarkLiveAddressesInOutput(
-          element, ShapeUtil::GetTupleElementShape(shape, i), marked_addresses);
-    }
-  }
-}
-
 Status CpuExecutable::AllocateBuffers(
     DeviceMemoryAllocator* memory_allocator, int device_ordinal,
     std::vector<perftools::gputools::DeviceMemoryBase>* buffers) {
@@ -148,20 +126,6 @@ Status CpuExecutable::ExecuteComputeFunction(
     tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
     HloExecutionProfile* hlo_execution_profile) {
-  std::vector<se::DeviceMemoryBase> argument_buffers;
-  argument_buffers.reserve(arguments.size());
-  for (const auto* argument : arguments) {
-    argument_buffers.push_back(argument->buffer(/*index=*/{}));
-  }
-  return ExecuteComputeFunction(run_options, argument_buffers, buffers,
-                                hlo_execution_profile);
-}
-
-Status CpuExecutable::ExecuteComputeFunction(
-    const ExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
-    HloExecutionProfile* hlo_execution_profile) {
   // The calling convention for JITed functions is:
   //
   //  void function(void* result, const void* run_options, void** args_array,
@@ -177,8 +141,8 @@ Status CpuExecutable::ExecuteComputeFunction(
   //               determined by buffer analysis.
   //
   std::vector<const void*> args_array;
-  for (se::DeviceMemoryBase arg_mem : arguments) {
-    args_array.push_back(arg_mem.opaque());
+  for (const ShapedBuffer* argument : arguments) {
+    args_array.push_back(argument->root_buffer().opaque());
   }
 
   uint64 start_micros = tensorflow::Env::Default()->NowMicros();
@@ -246,11 +210,23 @@ Status CpuExecutable::ExecuteComputeFunction(
 }
 
 static void LogLiveAddresses(
-    const std::unordered_set<const void*>& marked_addresses) {
+    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
+    const std::vector<bool>& buffers_in_result) {
+  if (!VLOG_IS_ON(3)) {
+    return;
+  }
+
+  CHECK_EQ(buffers.size(), buffers_in_result.size());
+  std::vector<const void*> live_out_buffers;
+  for (int i = 0; i < buffers.size(); ++i) {
+    if (buffers_in_result[i]) {
+      live_out_buffers.push_back(buffers[i].opaque());
+    }
+  }
   VLOG(3) << "Live addresses in output marking found "
-          << marked_addresses.size() << " addresses:\n"
+          << live_out_buffers.size() << " addresses:\n"
           << tensorflow::str_util::Join(
-                 marked_addresses, ", ", [](string* out, const void* address) {
+                 live_out_buffers, ", ", [](string* out, const void* address) {
                    tensorflow::strings::StrAppend(
                        out, tensorflow::strings::Printf("%p", address));
                  });
@@ -259,13 +235,12 @@ static void LogLiveAddresses(
 static Status DeallocateTempBuffers(
     DeviceMemoryAllocator* allocator, se::Stream* stream,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
-    const std::unordered_set<const void*>& marked_addresses) {
-  // Keep those marked live because they are referenced by the output of the
-  // computation and are needed by the service. They will be deallocated by the
-  // service.
+    const std::vector<bool>& buffers_in_result) {
+  // Keep those buffers in the output of the marked live because they are needed
+  // by the service. They will be deallocated by the service.
   for (size_t i = 0; i < buffers.size(); ++i) {
     se::DeviceMemoryBase alloc = buffers[i];
-    if (marked_addresses.count(alloc.opaque()) == 0 && !alloc.is_null()) {
+    if (!buffers_in_result[i] && !alloc.is_null()) {
       VLOG(3) << "CpuExecutable deallocating buffer #" << i << " ["
               << alloc.opaque() << "]";
       TF_RETURN_IF_ERROR(
@@ -276,33 +251,43 @@ static Status DeallocateTempBuffers(
   return Status::OK();
 }
 
-StatusOr<perftools::gputools::DeviceMemoryBase> CpuExecutable::ExecuteOnStream(
+StatusOr<std::unique_ptr<ShapedBuffer>> CpuExecutable::CreateResultShapedBuffer(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    HloExecutionProfile* hlo_execution_profile) {
+    tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
+        allocated_buffers,
+    std::vector<bool>* buffers_in_result) {
   se::Stream* stream = run_options->stream();
-  DeviceMemoryAllocator* memory_allocator = run_options->allocator();
-  std::vector<se::DeviceMemoryBase> buffers(assignment_->Allocations().size());
-
-  TF_RETURN_IF_ERROR(AllocateBuffers(
-      memory_allocator, stream->parent()->device_ordinal(), &buffers));
-  TF_RETURN_IF_ERROR(ExecuteComputeFunction(
-      &run_options->run_options(), arguments, buffers, hlo_execution_profile));
-
-  // Mark the buffers that are actually live (used in the output) when the
-  // computation finishes executing.
-  std::unordered_set<const void*> marked_addresses;
-  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice,
-                      assignment_->GetUniqueTopLevelOutputSlice());
-  se::DeviceMemoryBase top_level_output = buffers[result_slice.index()];
-  MarkLiveAddressesInOutput(top_level_output.opaque(), result_shape(),
-                            &marked_addresses);
-
-  LogLiveAddresses(marked_addresses);
-  TF_RETURN_IF_ERROR(DeallocateTempBuffers(memory_allocator, stream, buffers,
-                                           marked_addresses));
+  auto result_buffer = MakeUnique<ShapedBuffer>(
+      /*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(),
+      stream->parent()->platform(), stream->parent()->device_ordinal());
 
-  return top_level_output;
+  // Copy DeviceMemoryBase values which contain the array(s) of the result into
+  // the respective location in ShapedBuffer which is returned to the caller.
+  TF_RETURN_IF_ERROR(result_buffer->buffers().ForEachMutableElementWithStatus(
+      [&](const ShapeIndex& index, se::DeviceMemoryBase* device_memory) {
+        const auto& sources = this->GetRootPointsToSet().element(index);
+        // The points to set is unambiguous so the set should be a
+        // singleton.
+        CHECK_EQ(1, sources.size());
+        const LogicalBuffer* buffer_source = sources[0];
+        HloInstruction* src = buffer_source->instruction();
+
+        // The source for this result buffer can be a nested buffer such as
+        // a tuple element. The source instruction should have a
+        // non-parameter buffer assigned.
+        TF_ASSIGN_OR_RETURN(
+            const BufferAllocation::Slice slice,
+            this->assignment_->GetUniqueSlice(src, buffer_source->index()));
+        CHECK(!slice.allocation()->is_entry_computation_parameter());
+
+        const BufferAllocation::Index buffer_index = slice.index();
+        const se::DeviceMemoryBase& buffer = allocated_buffers[buffer_index];
+        CHECK(!buffer.is_null() || buffer.size() == 0);
+        *device_memory = buffer;
+        (*buffers_in_result)[buffer_index] = true;
+        return Status::OK();
+      }));
+  return std::move(result_buffer);
 }
 
 StatusOr<std::unique_ptr<ShapedBuffer>> CpuExecutable::ExecuteOnStream(
@@ -317,67 +302,26 @@ StatusOr<std::unique_ptr<ShapedBuffer>> CpuExecutable::ExecuteOnStream(
   DeviceMemoryAllocator* memory_allocator = run_options->allocator();
   std::vector<se::DeviceMemoryBase> buffers(assignment_->Allocations().size());
 
-  auto result_buffer =
-      MakeUnique<ShapedBuffer>(result_shape(), stream->parent()->platform(),
-                               stream->parent()->device_ordinal());
-
   TF_RETURN_IF_ERROR(AllocateBuffers(
       memory_allocator, stream->parent()->device_ordinal(), &buffers));
   TF_RETURN_IF_ERROR(ExecuteComputeFunction(
       &run_options->run_options(), arguments, buffers, hlo_execution_profile));
 
-  // Copy DeviceMemoryBase values which contain the array(s) of the result into
-  // the respective location in ShapedBuffer which is returned to the caller.
   std::vector<bool> buffers_in_result(assignment_->Allocations().size(), false);
-  TF_RETURN_IF_ERROR(
-      result_buffer->mutable_shape_index_to_buffer_entry()
-          ->ForEachMutableElementWithStatus(
-              [&buffers, &buffers_in_result, &result_buffer, this](
-                  const ShapeIndex& index, size_t* buffer_entry) {
-                const auto& sources = this->GetRootPointsToSet().element(index);
-                // The points to set is unambiguous so the set should be a
-                // singleton.
-                CHECK_EQ(1, sources.size());
-                const LogicalBuffer* buffer_source = sources[0];
-                HloInstruction* src = buffer_source->instruction();
-
-                // The source for this result buffer can be a nested buffer
-                // such as a tuple element.
-
-                // The source instruction should have a non-parameter buffer
-                // assigned.
-                TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice,
-                                    this->assignment_->GetUniqueSlice(
-                                        src, buffer_source->index()));
-                CHECK(!slice.allocation()->is_entry_computation_parameter());
-
-                const BufferAllocation::Index buffer_index = slice.index();
-                const se::DeviceMemoryBase& buffer = buffers[buffer_index];
-                CHECK(!buffer.is_null() || buffer.size() == 0);
-                *buffer_entry = result_buffer->mutable_buffers()->size();
-                result_buffer->mutable_buffers()->push_back(buffer);
-                buffers_in_result[buffer_index] = true;
-                return Status::OK();
-              }));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> result_buffer,
+      CreateResultShapedBuffer(run_options, buffers, &buffers_in_result));
 
   // Free all buffers not in the result.
-  for (size_t i = 0; i < buffers.size(); ++i) {
-    se::DeviceMemoryBase alloc = buffers[i];
-    if (!buffers_in_result[i] && !alloc.is_null()) {
-      VLOG(3) << "CpuExecutable deallocating buffer #" << i << " ["
-              << alloc.opaque() << "]";
-      TF_RETURN_IF_ERROR(memory_allocator->Deallocate(
-          stream->parent()->device_ordinal(), &alloc));
-    }
-  }
+  TF_RETURN_IF_ERROR(DeallocateTempBuffers(memory_allocator, stream, buffers,
+                                           buffers_in_result));
 
   return std::move(result_buffer);
 }
 
-StatusOr<perftools::gputools::DeviceMemoryBase>
-CpuExecutable::ExecuteAsyncOnStream(
+StatusOr<std::unique_ptr<ShapedBuffer>> CpuExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
   if (hlo_profiling_enabled()) {
     return Unimplemented(
         "Asynchronous execution on stream with hlo profiling is not yet "
@@ -393,29 +337,25 @@ CpuExecutable::ExecuteAsyncOnStream(
   TF_RETURN_IF_ERROR(AllocateBuffers(
       memory_allocator, stream->parent()->device_ordinal(), &buffers));
 
-  // Mark the buffers that are actually live (used in the output) when the
-  // computation finishes executing.
-  std::unordered_set<const void*> marked_addresses;
-  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice,
-                      assignment_->GetUniqueTopLevelOutputSlice());
-  se::DeviceMemoryBase top_level_output = buffers[result_slice.index()];
-  MarkLiveAddressesInOutput(top_level_output.opaque(), result_shape(),
-                            &marked_addresses);
+  std::vector<bool> buffers_in_result(assignment_->Allocations().size(), false);
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> result_buffer,
+      CreateResultShapedBuffer(run_options, buffers, &buffers_in_result));
 
-  LogLiveAddresses(marked_addresses);
+  LogLiveAddresses(buffers, buffers_in_result);
 
   host_stream->EnqueueTask([this, run_options, arguments, buffers,
-                            marked_addresses, memory_allocator, stream]() {
+                            buffers_in_result, memory_allocator, stream]() {
     // Failing a CHECK here is not great, but I don't see an obvious way to
     // return a failed Status asynchronously.
     TF_CHECK_OK(ExecuteComputeFunction(&run_options->run_options(), arguments,
                                        buffers,
                                        /*hlo_execution_profile=*/nullptr));
     TF_CHECK_OK(DeallocateTempBuffers(memory_allocator, stream, buffers,
-                                      marked_addresses));
+                                      buffers_in_result));
   });
 
-  return top_level_output;
+  return std::move(result_buffer);
 }
 
 /*static*/ int64 CpuExecutable::ShapeSizeBytes(const Shape& shape) {
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
index 17ee2d673e..50443a5995 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
@@ -55,21 +55,14 @@ class CpuExecutable : public Executable {
                 std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
   ~CpuExecutable() override {}
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      HloExecutionProfile* hlo_execution_profile) override;
-
   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+  StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments) override;
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
 
   // This should be called after set_ir_module_string.
   const string& ir_module_string() const { return ir_module_string_; }
@@ -108,13 +101,6 @@ class CpuExecutable : public Executable {
 
   // Calls the generated function performing the computation with the given
   // arguments using the supplied buffers.
-  Status ExecuteComputeFunction(
-      const ExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          buffers,
-      HloExecutionProfile* hlo_execution_profile);
   Status ExecuteComputeFunction(
       const ExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
@@ -122,6 +108,18 @@ class CpuExecutable : public Executable {
           buffers,
       HloExecutionProfile* hlo_execution_profile);
 
+  // Create a ShapedBuffer for holding the result of the computation. The
+  // addresses (DeviceMemoryBases) are set according to buffer assignment.
+  // 'buffers_in_result' should point to a vector of the same size as
+  // 'allocated_buffers'. An element in buffers_in_result is set to true if the
+  // corresponding buffer is live out of the computation (and thus contained in
+  // the returned ShapedBuffer).
+  StatusOr<std::unique_ptr<ShapedBuffer>> CreateResultShapedBuffer(
+      const ServiceExecutableRunOptions* run_options,
+      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
+          allocated_buffers,
+      std::vector<bool>* buffers_in_result);
+
   // Returns the points-to set of the root instruction of the entry
   // computation. Uses points-to analysis from buffer assignment.
   const PointsToSet& GetRootPointsToSet() const;
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc
index 0077e344e2..d1b88b27f0 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc
@@ -376,19 +376,6 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions(
     tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
     HloExecutionProfile* hlo_execution_profile) {
-  std::vector<se::DeviceMemoryBase> argument_buffers(arguments.size());
-  for (int i = 0; i < arguments.size(); ++i) {
-    argument_buffers[i] = arguments[i]->buffer(/*index=*/{});
-  }
-  return ExecuteComputeFunctions(run_options, argument_buffers, buffers,
-                                 hlo_execution_profile);
-}
-
-Status ParallelCpuExecutable::ExecuteComputeFunctions(
-    const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
-    HloExecutionProfile* hlo_execution_profile) {
   // Allocate profiling counters for each hlo instruction that we would like to
   // profile.
   std::vector<int64>* profile_counters = nullptr;
@@ -428,8 +415,9 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions(
     // just copy the existing buffer into the map containing instruction
     // results..
     if (instruction->opcode() == HloOpcode::kParameter) {
-      InsertOrDie(&results, instruction,
-                  arguments[instruction->parameter_number()].opaque());
+      InsertOrDie(
+          &results, instruction,
+          arguments[instruction->parameter_number()]->root_buffer().opaque());
     } else if (instruction->opcode() == HloOpcode::kConstant) {
       unsigned char* aligned_data =
           FindOrDie(aligned_constants_, instruction).get();
@@ -461,69 +449,6 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions(
   return Status::OK();
 }
 
-StatusOr<perftools::gputools::DeviceMemoryBase>
-ParallelCpuExecutable::ExecuteOnStream(
-    const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    HloExecutionProfile* hlo_execution_profile) {
-  se::Stream* stream = run_options->stream();
-  DeviceMemoryAllocator* memory_allocator = run_options->allocator();
-  VLOG(3) << "ExecuteOnStream arg size: " << arguments.size();
-  if (!arguments.empty()) {
-    VLOG(3) << "ExecuteOnStream arg[0]: " << arguments.at(0).opaque();
-  }
-
-  // Allocate the temporary buffers required for the computation.
-  se::StreamExecutor* stream_executor = stream->parent();
-  int device_ordinal = stream_executor->device_ordinal();
-  int64 buffer_count = assignment_->Allocations().size();
-  VLOG(3) << "temp buffer count: " << buffer_count;
-
-  std::vector<se::DeviceMemoryBase> device_allocations(
-      assignment_->Allocations().size());
-  TF_RETURN_IF_ERROR(AllocateBuffers(memory_allocator,
-                                     stream->parent()->device_ordinal(),
-                                     &device_allocations));
-
-  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice,
-                      assignment_->GetUniqueTopLevelOutputSlice());
-  const BufferAllocation::Index result_index = result_slice.index();
-  VLOG(3) << "result index: " << result_index;
-
-  TF_RETURN_IF_ERROR(ExecuteComputeFunctions(
-      run_options, arguments, device_allocations, hlo_execution_profile));
-
-  // Mark the buffers that are actually live (used in the output) when the
-  // computation finishes executing.
-  std::unordered_set<const void*> marked_addresses;
-  MarkLiveAddressesInOutput(device_allocations[result_index].opaque(),
-                            result_shape(), &marked_addresses);
-
-  VLOG(3) << "Live addresses in output marking found "
-          << marked_addresses.size() << " addresses:\n"
-          << tensorflow::str_util::Join(
-                 marked_addresses, ", ", [](string* out, const void* address) {
-                   tensorflow::strings::StrAppend(
-                       out, tensorflow::strings::Printf("%p", address));
-                 });
-
-  // Computation is done - deallocate temp buffers. Keep those marked
-  // live because they are referenced by the output of the computation
-  // and are needed by the service. They will be deallocated by the
-  // service.
-  for (size_t i = 0; i < device_allocations.size(); ++i) {
-    auto alloc = device_allocations[i];
-    if (marked_addresses.count(alloc.opaque()) == 0 &&
-        alloc.opaque() != nullptr) {
-      VLOG(3) << "ParallelCpuExecutable deallocating buffer #" << i << " ["
-              << alloc.opaque() << "]";
-      TF_RETURN_IF_ERROR(memory_allocator->Deallocate(device_ordinal, &alloc));
-    }
-  }
-
-  return device_allocations[result_index];
-}
-
 StatusOr<std::unique_ptr<ShapedBuffer>> ParallelCpuExecutable::ExecuteOnStream(
     const ServiceExecutableRunOptions* run_options,
     tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
@@ -536,9 +461,9 @@ StatusOr<std::unique_ptr<ShapedBuffer>> ParallelCpuExecutable::ExecuteOnStream(
   DeviceMemoryAllocator* memory_allocator = run_options->allocator();
   std::vector<se::DeviceMemoryBase> buffers(assignment_->Allocations().size());
 
-  auto result_buffer =
-      MakeUnique<ShapedBuffer>(result_shape(), stream->parent()->platform(),
-                               stream->parent()->device_ordinal());
+  auto result_buffer = MakeUnique<ShapedBuffer>(
+      /*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(),
+      stream->parent()->platform(), stream->parent()->device_ordinal());
 
   TF_RETURN_IF_ERROR(AllocateBuffers(
       memory_allocator, stream->parent()->device_ordinal(), &buffers));
@@ -549,37 +474,30 @@ StatusOr<std::unique_ptr<ShapedBuffer>> ParallelCpuExecutable::ExecuteOnStream(
   // Copy DeviceMemoryBase values which into the respective location in
   // ShapedBuffer which is returned to the caller.
   std::vector<bool> buffers_in_result(assignment_->Allocations().size(), false);
-  TF_RETURN_IF_ERROR(
-      result_buffer->mutable_shape_index_to_buffer_entry()
-          ->ForEachMutableElementWithStatus(
-              [&buffers, &buffers_in_result, &result_buffer, this](
-                  const ShapeIndex& index, size_t* buffer_entry) {
-                  const auto& sources =
-                      this->GetRootPointsToSet().element(index);
-                  // The points to set is unambiguous so the set should be a
-                  // singleton.
-                  CHECK_EQ(1, sources.size());
-                  const LogicalBuffer* buffer_source = sources[0];
-                  HloInstruction* src = buffer_source->instruction();
-
-                  // The source for this result buffer can be a nested buffer
-                  // such as a tuple element.
-
-                  // The source instruction should have a non-parameter buffer
-                  // assigned.
-                  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice,
-                                      this->assignment_->GetUniqueSlice(
-                                          src, buffer_source->index()));
-                  CHECK(!slice.allocation()->is_entry_computation_parameter());
-
-                  const BufferAllocation::Index buffer_index = slice.index();
-                  const se::DeviceMemoryBase& buffer = buffers[buffer_index];
-                  CHECK(!buffer.is_null() || buffer.size() == 0);
-                  *buffer_entry = result_buffer->mutable_buffers()->size();
-                  result_buffer->mutable_buffers()->push_back(buffer);
-                  buffers_in_result[buffer_index] = true;
-                return Status::OK();
-              }));
+  TF_RETURN_IF_ERROR(result_buffer->buffers().ForEachMutableElementWithStatus(
+      [&](const ShapeIndex& index, se::DeviceMemoryBase* device_memory) {
+        const auto& sources = this->GetRootPointsToSet().element(index);
+
+        // The points to set is unambiguous so the set should be a singleton.
+        CHECK_EQ(1, sources.size());
+        const LogicalBuffer* buffer_source = sources[0];
+        HloInstruction* src = buffer_source->instruction();
+
+        // The source for this result buffer can be a nested buffer such as a
+        // tuple element. The source instruction should have a non-parameter
+        // buffer assigned.
+        TF_ASSIGN_OR_RETURN(
+            const BufferAllocation::Slice slice,
+            this->assignment_->GetUniqueSlice(src, buffer_source->index()));
+        CHECK(!slice.allocation()->is_entry_computation_parameter());
+
+        const BufferAllocation::Index buffer_index = slice.index();
+        const se::DeviceMemoryBase& buffer = buffers[buffer_index];
+        CHECK(!buffer.is_null() || buffer.size() == 0);
+        *device_memory = buffer;
+        buffers_in_result[buffer_index] = true;
+        return Status::OK();
+      }));
 
   // Free all buffers not in the result.
   for (size_t i = 0; i < buffers.size(); ++i) {
@@ -595,10 +513,10 @@ StatusOr<std::unique_ptr<ShapedBuffer>> ParallelCpuExecutable::ExecuteOnStream(
   return std::move(result_buffer);
 }
 
-StatusOr<perftools::gputools::DeviceMemoryBase>
+StatusOr<std::unique_ptr<ShapedBuffer>>
 ParallelCpuExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
   // TODO(b/30671675): Implement asynchronous execution mode.
   return Unimplemented(
       "Asynchronous execution on stream is not yet supported on CPU.");
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h
index d65e3f42f3..90ac94ef92 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h
+++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h
@@ -59,21 +59,14 @@ class ParallelCpuExecutable : public Executable {
       std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
   ~ParallelCpuExecutable() override {}
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      HloExecutionProfile* hlo_execution_profile) override;
-
   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+  StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments) override;
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
 
   // This should be called after set_ir_module_string.
   const string& ir_module_string() const { return ir_module_string_; }
@@ -108,13 +101,6 @@ class ParallelCpuExecutable : public Executable {
 
   // Calls the generated functions in 'function_names_', performing the
   // computation with the given arguments using the supplied buffers.
-  Status ExecuteComputeFunctions(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          buffers,
-      HloExecutionProfile* hlo_execution_profile);
   Status ExecuteComputeFunctions(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc
index ad5d5ead00..c50aaec572 100644
--- a/tensorflow/compiler/xla/service/executable.cc
+++ b/tensorflow/compiler/xla/service/executable.cc
@@ -26,23 +26,23 @@ limitations under the License.
 
 namespace xla {
 
-StatusOr<std::vector<perftools::gputools::DeviceMemoryBase>>
+StatusOr<std::vector<std::unique_ptr<ShapedBuffer>>>
 Executable::ExecuteOnStreams(
     tensorflow::gtl::ArraySlice<const ServiceExecutableRunOptions> run_options,
     tensorflow::gtl::ArraySlice<
-        tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>>
+        tensorflow::gtl::ArraySlice<const ShapedBuffer*>>
         arguments) {
   TF_RET_CHECK(run_options.size() == arguments.size());
 
+  std::vector<std::unique_ptr<ShapedBuffer>> return_values(run_options.size());
+
   if (run_options.size() == 1) {
-    TF_ASSIGN_OR_RETURN(auto result,
+    TF_ASSIGN_OR_RETURN(return_values[0],
                         ExecuteOnStream(&run_options[0], arguments[0],
                                         /*hlo_execution_profile=*/nullptr));
-    return std::vector<perftools::gputools::DeviceMemoryBase>({result});
+    return std::move(return_values);
   }
 
-  std::vector<perftools::gputools::DeviceMemoryBase> return_values(
-      run_options.size());
   for (size_t i = 0; i < run_options.size(); ++i) {
     // We cannot BlockHostUntilDone() on the already-launched executions in case
     // of error, since if the executions communicate, the initially launched
@@ -54,7 +54,7 @@ Executable::ExecuteOnStreams(
     TF_RET_CHECK(options.stream() != nullptr);
     TF_RETURN_IF_ERROR(options.stream()->BlockHostUntilDone());
   }
-  return return_values;
+  return std::move(return_values);
 }
 
 Status Executable::DumpSessionModule() {
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
index cb9ee47dc6..23864dda78 100644
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -61,16 +61,7 @@ class Executable {
   // If the hlo_execution_profile is provided as non-nullptr, profiling will be
   // enabled.
   //
-  // Returns the device memory region that a successful execution would
-  // populate.
-  virtual StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      HloExecutionProfile* hlo_execution_profile) = 0;
-
-  // Overload of ExecuteOnStream which returns and takes arguments as
-  // ShapedBuffers. Used for LocalService execution.
+  // Returns a shaped buffer containing the result of the computation.
   virtual StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
@@ -78,21 +69,19 @@ class Executable {
 
   // Same as ExecuteOnStream(), but this call is non-blocking and returns as
   // soon as all of the operations are enqueued for launch on the stream.
-  virtual StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+  virtual StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments) = 0;
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) = 0;
 
   // Same as ExecuteOnStream(), but runs this executable on multiple
   // streams. arguments[i] contains the arguments to the execution on
   // run_options[i]->stream() and the returned value is at index i of the
   // returned vector.
-  virtual StatusOr<std::vector<perftools::gputools::DeviceMemoryBase>>
-  ExecuteOnStreams(
+  virtual StatusOr<std::vector<std::unique_ptr<ShapedBuffer>>> ExecuteOnStreams(
       tensorflow::gtl::ArraySlice<const ServiceExecutableRunOptions>
           run_options,
       tensorflow::gtl::ArraySlice<
-          tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>>
+          tensorflow::gtl::ArraySlice<const ShapedBuffer*>>
           arguments);
 
   // Populates `hlo_execution_profile` from `executor`. This is implicit in any
@@ -224,7 +213,7 @@ StatusOr<ReturnT> Executable::ExecuteOnStreamWrapper(
   if (profile != nullptr) {
     VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";
     stream->ThenStopTimer(timer.get());
-    SE_CHECK_OK(stream->BlockHostUntilDone());
+    TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
     VLOG(1) << "done with block-host-until-done";
 
     // Merge in run-time profile information from execution_profile.
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
index 74aa77b4f1..271a856efd 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
@@ -51,83 +51,7 @@ se::Platform::Id GenericTransferManager::PlatformId() const {
   return platform_id_;
 }
 
-Status GenericTransferManager::TransferLiteralFromDevice(
-    se::StreamExecutor* executor, const se::DeviceMemoryBase& source,
-    const Shape& device_shape, const Shape& literal_shape, Literal* literal) {
-  VLOG(2) << "transferring literal shape from device: "
-          << ShapeUtil::HumanString(literal_shape)
-          << "; device location: " << source.opaque();
-  TF_RET_CHECK(ShapeUtil::Compatible(device_shape, literal_shape));
-
-  // Tuples are a special case and contain one or more shapes inside of them to
-  // an arbitrary nesting depth.
-  if (device_shape.element_type() == TUPLE) {
-    *literal->mutable_shape() = literal_shape;
-    TF_ASSIGN_OR_RETURN(
-        std::vector<se::DeviceMemoryBase> element_buffers,
-        ShallowCopyTupleFromDevice(executor, source, device_shape));
-    TF_RET_CHECK(element_buffers.size() ==
-                 ShapeUtil::TupleElementCount(device_shape));
-    for (int64 i = 0; i < element_buffers.size(); ++i) {
-      const Shape& element_device_shape = device_shape.tuple_shapes(i);
-      const Shape& element_literal_shape = literal_shape.tuple_shapes(i);
-      Literal* element_literal = literal->add_tuple_literals();
-      // Recursively call TransferFromDevice to copy over the data in the
-      // element array.
-      TF_RETURN_IF_ERROR(TransferLiteralFromDevice(
-          executor, element_buffers[i], /*device_shape=*/element_device_shape,
-          /*literal_shape=*/element_literal_shape, element_literal));
-    }
-    return Status::OK();
-  }
-
-  *literal->mutable_shape() = device_shape;
-  literal->Reserve(ShapeUtil::ElementsIn(device_shape));
-  TF_RETURN_IF_ERROR(TransferBufferFromDevice(
-      executor, source, /*size=*/ShapeUtil::ByteSizeOf(device_shape),
-      /*destination=*/literal->MutableInternalData()));
-  if (!ShapeUtil::Equal(literal_shape, device_shape)) {
-    *literal = std::move(*literal->Relayout(literal_shape.layout()));
-  }
-  TF_RET_CHECK(ShapeUtil::Equal(literal_shape, literal->shape()));
-  return Status::OK();
-}
-
-StatusOr<std::vector<se::DeviceMemoryBase>>
-GenericTransferManager::ShallowCopyTupleFromDevice(
-    se::StreamExecutor* executor, const se::DeviceMemoryBase& source,
-    const Shape& shape) {
-  TF_RET_CHECK(ShapeUtil::IsTuple(shape));
-
-  // For devices which use the GenericTransferManager, a tuple is stored as an
-  // array of pointers to buffers. Copy the contents of the tuple buffer into
-  // a vector of void* pointers.
-  std::vector<void*> element_pointers(ShapeUtil::TupleElementCount(shape),
-                                      nullptr);
-  int64 tuple_size = ShapeUtil::ByteSizeOf(shape, pointer_size_);
-  auto copy_status = executor->SynchronousMemcpyD2H(source, tuple_size,
-                                                    element_pointers.data());
-  if (!copy_status.ok()) {
-    return AddStatus(
-        Status(static_cast<tensorflow::error::Code>(copy_status.code()),
-               copy_status.error_message()),
-        "failed transfer of tuple buffer " + ShapeUtil::HumanString(shape));
-  }
-
-  // Create a DeviceMemoryBase from each void* pointer.
-  std::vector<se::DeviceMemoryBase> destination;
-  for (size_t i = 0; i < element_pointers.size(); ++i) {
-    if (element_pointers[i] == nullptr &&
-        !ShapeUtil::HasZeroElements(shape.tuple_shapes(i))) {
-      return FailedPrecondition("tuple contains nullptr at element %lu", i);
-    }
-    destination.emplace_back(element_pointers[i],
-                             GetByteSizeRequirement(shape.tuple_shapes(i)));
-  }
-  return std::move(destination);
-}
-
-Status GenericTransferManager::WriteTuplePointersToDevice(
+Status GenericTransferManager::WriteSingleTupleIndexTable(
     perftools::gputools::StreamExecutor* executor,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> elements,
     const Shape& shape, perftools::gputools::DeviceMemoryBase* region) {
@@ -145,16 +69,19 @@ StatusOr<std::unique_ptr<Literal>>
 GenericTransferManager::TransferLiteralFromDevice(
     se::StreamExecutor* executor, const ShapedBuffer& device_buffer) {
   VLOG(2) << "transferring literal from device ordinal "
-          << executor->device_ordinal() << "; device shape: "
-          << ShapeUtil::HumanStringWithLayout(device_buffer.shape())
-          << "; opaque: " << device_buffer.buffer(/*index=*/{}).opaque();
+          << executor->device_ordinal() << "; device buffer: " << device_buffer;
   TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal());
 
+  // The on-host and on-device shape should always be the same for the generic
+  // transfer manager.
+  TF_RET_CHECK(ShapeUtil::Equal(device_buffer.on_device_shape(),
+                                device_buffer.on_host_shape()));
+
   std::unique_ptr<Literal> literal =
-      Literal::CreateFromShape(device_buffer.shape());
+      Literal::CreateFromShape(device_buffer.on_host_shape());
 
   TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
-      device_buffer.shape(),
+      device_buffer.on_host_shape(),
       [&](const Shape& subshape, const ShapeIndex& index) -> Status {
         if (!ShapeUtil::IsTuple(subshape)) {
           TF_RETURN_IF_ERROR(TransferBufferFromDevice(
@@ -175,16 +102,22 @@ Status GenericTransferManager::TransferLiteralToDevice(
     const ShapedBuffer& device_buffer) {
   const Shape& shape = literal.shape();
   VLOG(2) << "transferring literal shape to device: "
-          << ShapeUtil::HumanString(shape) << "; device location: "
-          << device_buffer.buffer(/*index=*/{}).opaque();
+          << ShapeUtil::HumanString(shape)
+          << "; device buffer: " << device_buffer;
+
+  // The on-host and on-device shape should always be the same for the generic
+  // transfer manager.
+  TF_RET_CHECK(ShapeUtil::Equal(device_buffer.on_device_shape(),
+                                device_buffer.on_host_shape()));
 
-  TF_RET_CHECK(ShapeUtil::Compatible(literal.shape(), device_buffer.shape()));
+  TF_RET_CHECK(
+      ShapeUtil::Compatible(literal.shape(), device_buffer.on_host_shape()));
   TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal());
 
   TF_RETURN_IF_ERROR(WriteTupleIndexTables(executor, device_buffer));
 
   return ShapeUtil::ForEachSubshapeWithStatus(
-      device_buffer.shape(),
+      device_buffer.on_host_shape(),
       [&](const Shape& device_subshape, const ShapeIndex& index) -> Status {
         se::DeviceMemoryBase device_memory = device_buffer.buffer(index);
         if (ShapeUtil::IsArray(device_subshape)) {
@@ -212,33 +145,6 @@ Status GenericTransferManager::TransferLiteralToDevice(
       });
 }
 
-Status GenericTransferManager::TransferLiteralToDevice(
-    se::StreamExecutor* executor, const Literal& literal,
-    se::DeviceMemoryBase* destination) {
-  const Shape& shape = literal.shape();
-  VLOG(2) << "transferring literal shape to device: "
-          << ShapeUtil::HumanString(shape)
-          << "; device location: " << destination->opaque();
-
-  if (ShapeUtil::IsTuple(literal.shape())) {
-    std::vector<void*> tuple_elements_on_device;
-    for (const Literal& tuple_element : literal.tuple_literals()) {
-      se::DeviceMemoryBase allocation = executor->AllocateArray<uint8>(
-          GetByteSizeRequirement(tuple_element.shape()));
-      TF_RETURN_IF_ERROR(
-          TransferLiteralToDevice(executor, tuple_element, &allocation));
-      tuple_elements_on_device.push_back(allocation.opaque());
-    }
-    return TransferBufferToDevice(
-        executor, tuple_elements_on_device.size() * sizeof(void*),
-        tuple_elements_on_device.data(), destination);
-  }
-
-  return TransferBufferToDevice(executor,
-                                /*size=*/GetByteSizeRequirement(shape),
-                                /*source=*/literal.InternalData(), destination);
-}
-
 Status GenericTransferManager::TransferLiteralToInfeed(
     se::StreamExecutor* executor, const Literal& literal) {
   return Unimplemented("Generic transfer to Infeed");
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.h b/tensorflow/compiler/xla/service/generic_transfer_manager.h
index 50dca6aec5..63a7c820cf 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.h
@@ -42,16 +42,6 @@ class GenericTransferManager : public TransferManager {
 
   perftools::gputools::Platform::Id PlatformId() const override;
 
-  Status TransferLiteralFromDevice(
-      perftools::gputools::StreamExecutor* executor,
-      const perftools::gputools::DeviceMemoryBase& source,
-      const Shape& device_shape, const Shape& literal_shape,
-      Literal* literal) override;
-
-  Status TransferLiteralToDevice(
-      perftools::gputools::StreamExecutor* executor, const Literal& literal,
-      perftools::gputools::DeviceMemoryBase* destination) override;
-
   StatusOr<std::unique_ptr<Literal>> TransferLiteralFromDevice(
       perftools::gputools::StreamExecutor* executor,
       const ShapedBuffer& device_buffer) override;
@@ -62,9 +52,6 @@ class GenericTransferManager : public TransferManager {
 
   Status TransferLiteralToInfeed(perftools::gputools::StreamExecutor* executor,
                                  const Literal& literal) override;
-  Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor,
-                                int64 size, const void* source) override;
-
   Status TransferLiteralFromOutfeed(
       perftools::gputools::StreamExecutor* executor, const Shape& literal_shape,
       Literal* literal) override;
@@ -73,16 +60,13 @@ class GenericTransferManager : public TransferManager {
       tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*>
           executors) override;
 
-  StatusOr<std::vector<perftools::gputools::DeviceMemoryBase>>
-  ShallowCopyTupleFromDevice(
-      perftools::gputools::StreamExecutor* executor,
-      const perftools::gputools::DeviceMemoryBase& source,
-      const Shape& shape) override;
-
   int64 GetByteSizeRequirement(const Shape& shape) const override;
 
  protected:
-  Status WriteTuplePointersToDevice(
+  Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor,
+                                int64 size, const void* source) override;
+
+  Status WriteSingleTupleIndexTable(
       perftools::gputools::StreamExecutor* executor,
       tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
           elements,
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index b802ae9c7a..366d87e9c3 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -203,84 +203,6 @@ Status GpuExecutable::ExecuteThunks(
   return Status::OK();
 }
 
-StatusOr<se::DeviceMemoryBase> GpuExecutable::ExecuteOnStream(
-    const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    HloExecutionProfile* hlo_execution_profile) {
-  se::Stream* stream = run_options->stream();
-  DeviceMemoryAllocator* memory_allocator = run_options->allocator();
-
-  BufferAllocations::Builder buffer_allocations_builder;
-  for (BufferAllocation::Index i = 0; i < assignment_->Allocations().size();
-       ++i) {
-    const BufferAllocation& allocation = assignment_->GetAllocation(i);
-    if (allocation.is_entry_computation_parameter()) {
-      buffer_allocations_builder.RegisterBuffer(
-          i, arguments[allocation.parameter_number()]);
-    }
-  }
-  se::StreamExecutor* executor = stream->parent();
-  TF_ASSIGN_OR_RETURN(
-      auto buffer_allocations,
-      buffer_allocations_builder.Build(*assignment_, executor->device_ordinal(),
-                                       memory_allocator));
-
-  bool block_host_until_done =
-      !memory_allocator->AllowsAsynchronousDeallocation();
-  TF_RETURN_IF_ERROR(ExecuteThunks(run_options, *buffer_allocations,
-                                   block_host_until_done,
-                                   hlo_execution_profile));
-
-  HloInstruction* root = hlo_module_->entry_computation()->root_instruction();
-  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice output_slice,
-                      assignment_->GetUniqueTopLevelOutputSlice());
-  se::DeviceMemoryBase output_buffer_address =
-      buffer_allocations->GetDeviceAddress(output_slice.index());
-
-  if (ShapeUtil::IsTuple(root->shape())) {
-    std::set<se::DeviceMemoryBase> referred_by_output;
-    if (GetRootPointsToSet().IsAmbiguous()) {
-      // The points-to set of the root is ambiguous so we need to examine the
-      // result data to determine which buffers are contained in the result.
-      TF_ASSIGN_OR_RETURN(
-          TransferManager * transfer_manager,
-          TransferManager::GetForPlatform(executor->platform()));
-      TF_ASSIGN_OR_RETURN(referred_by_output,
-                          transfer_manager->GatherBufferPointersFromTuple(
-                              executor, output_buffer_address, root->shape()));
-    } else {
-      // The points-to set of the root is unambiguous so it's known statically
-      // which buffers are in the result. Gather these buffers using the root's
-      // points-to set.
-      TF_RETURN_IF_ERROR(GetRootPointsToSet().ForEachElementWithStatus(
-          [&referred_by_output, &buffer_allocations, this](
-              const ShapeIndex& /*index*/,
-              const PointsToSet::BufferList& buffers) {
-            // The points to set is unambiguous so the set should be a
-            // singleton. That is, we know exactly which instruction produced
-            // the array at this element.
-            CHECK_EQ(1, buffers.size());
-            HloInstruction* hlo = buffers[0]->instruction();
-            TF_ASSIGN_OR_RETURN(
-                const BufferAllocation::Slice slice,
-                this->assignment_->GetUniqueSlice(hlo, buffers[0]->index()));
-            CHECK(!slice.allocation()->is_entry_computation_parameter());
-            referred_by_output.insert(
-                buffer_allocations->GetDeviceAddress(slice.index()));
-            return Status::OK();
-          }));
-    }
-    TF_RETURN_IF_ERROR(
-        buffer_allocations->TearDown(referred_by_output, *assignment_));
-  } else {
-    // If the computation result is not a tuple, we can delete all temporary
-    // buffers that are not the output.
-    TF_RETURN_IF_ERROR(
-        buffer_allocations->TearDown({output_buffer_address}, *assignment_));
-  }
-  return output_buffer_address;
-}
-
 StatusOr<std::unique_ptr<ShapedBuffer>> GpuExecutable::ExecuteOnStream(
     const ServiceExecutableRunOptions* run_options,
     tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
@@ -298,7 +220,7 @@ StatusOr<std::unique_ptr<ShapedBuffer>> GpuExecutable::ExecuteOnStream(
     if (allocation.is_entry_computation_parameter()) {
       auto param_no = allocation.parameter_number();
       buffer_allocations_builder.RegisterBuffer(
-          i, arguments[param_no]->buffer(/*index=*/{}));
+          i, arguments[param_no]->root_buffer());
     }
   }
   se::StreamExecutor* executor = run_options->stream()->parent();
@@ -316,50 +238,46 @@ StatusOr<std::unique_ptr<ShapedBuffer>> GpuExecutable::ExecuteOnStream(
   HloInstruction* root = hlo_module_->entry_computation()->root_instruction();
   auto device_ordinal = executor->device_ordinal();
   auto shaped_buffer = MakeUnique<ShapedBuffer>(
-      root->shape(), executor->platform(), device_ordinal);
+      root->shape(), root->shape(), executor->platform(), device_ordinal);
 
   // Copy DeviceMemoryBase values which contain the array(s) of the result into
   // the respective location in ShapedBuffer.
   std::set<se::DeviceMemoryBase> buffers_in_result;
-  TF_RETURN_IF_ERROR(
-      shaped_buffer->mutable_shape_index_to_buffer_entry()
-          ->ForEachMutableElementWithStatus(
-              [&buffer_allocations, &buffers_in_result, &shaped_buffer, this](
-                  const ShapeIndex& index, size_t* buffer_entry) {
-                const auto& sources = this->GetRootPointsToSet().element(index);
-                // The points-to set is unambiguous so the set should be a
-                // singleton. That is, we know exactly which instruction
-                // produced the array at this element.
-                CHECK_EQ(1, sources.size());
-                auto src_hlo = sources[0]->instruction();
-
-                VLOG(4) << "Looking at: " << sources[0];
-
-                // The source instruction should have a non-parameter buffer
-                // assigned.
-                TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice,
-                                    this->assignment_->GetUniqueSlice(
-                                        src_hlo, sources[0]->index()));
-                CHECK(!slice.allocation()->is_entry_computation_parameter());
-
-                perftools::gputools::DeviceMemoryBase src_base =
-                    buffer_allocations->GetDeviceAddress(slice.index());
-                CHECK(!src_base.is_null() || src_base.size() == 0);
-                shaped_buffer->mutable_buffers()->push_back(src_base);
-                *buffer_entry = shaped_buffer->mutable_buffers()->size() - 1;
-
-                buffers_in_result.insert(src_base);
-                return Status::OK();
-              }));
+  TF_RETURN_IF_ERROR(shaped_buffer->buffers().ForEachMutableElementWithStatus(
+      [&buffer_allocations, &buffers_in_result, &shaped_buffer, this](
+          const ShapeIndex& index, se::DeviceMemoryBase* device_memory) {
+        const auto& sources = this->GetRootPointsToSet().element(index);
+        // The points-to set is unambiguous so the set should be a
+        // singleton. That is, we know exactly which instruction
+        // produced the array at this element.
+        CHECK_EQ(1, sources.size());
+        auto src_hlo = sources[0]->instruction();
+
+        VLOG(4) << "Looking at: " << sources[0];
+
+        // The source instruction should have a non-parameter buffer
+        // assigned.
+        TF_ASSIGN_OR_RETURN(
+            const BufferAllocation::Slice slice,
+            this->assignment_->GetUniqueSlice(src_hlo, sources[0]->index()));
+        CHECK(!slice.allocation()->is_entry_computation_parameter());
+
+        perftools::gputools::DeviceMemoryBase src_base =
+            buffer_allocations->GetDeviceAddress(slice.index());
+        CHECK(!src_base.is_null() || src_base.size() == 0);
+        *device_memory = src_base;
+        buffers_in_result.insert(src_base);
+        return Status::OK();
+      }));
   TF_RETURN_IF_ERROR(
       buffer_allocations->TearDown(buffers_in_result, *assignment_));
 
   return std::move(shaped_buffer);
 }
 
-StatusOr<se::DeviceMemoryBase> GpuExecutable::ExecuteAsyncOnStream(
+StatusOr<std::unique_ptr<ShapedBuffer>> GpuExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
   // TODO(b/30671675): Implement asynchronous execution mode.
   return Unimplemented(
       "Asynchronous execution on stream is not yet supported on GPU.");
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
index e7307e07c0..00da64dfad 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
@@ -72,24 +72,16 @@ class GpuExecutable : public Executable {
   // empty, in which case compilation is left up to the GPU driver.
   const std::vector<uint8>& cubin() const { return cubin_; }
 
-  // Both overloads of ExecuteOnStream will fail if the compute capability of
-  // the stream doesn't match the compute capability passed to this object's
-  // constructor.
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      HloExecutionProfile* hlo_execution_profile) override;
-
+  // ExecuteOnStream will fail if the compute capability of the stream doesn't
+  // match the compute capability passed to this object's constructor.
   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+  StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments) override;
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
 
   const Status EqualOrFail(const Executable& executable) {
     // TODO(b/62952745) Implement equality test on GPU executable.
diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index a6101bbe60..7b3a8cef97 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc
@@ -112,17 +112,12 @@ HloRunner::HloRunner(se::Platform* platform) {
   VLOG(1) << "Created HloRunner for platform: " << platform->Name();
 }
 
-HloRunner::~HloRunner() {
-  // Deallocate all the memory allocated during the tests.
-  for (auto& allocation : allocations_) {
-    backend().default_stream_executor()->Deallocate(&allocation);
-  }
-}
+HloRunner::~HloRunner() {}
 
-StatusOr<se::DeviceMemoryBase> HloRunner::Execute(
+StatusOr<std::unique_ptr<Literal>> HloRunner::ExecuteInternal(
     std::unique_ptr<HloModule> module,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    Shape* result_shape, bool run_hlo_passes) {
+    const tensorflow::gtl::ArraySlice<Literal*> arguments,
+    bool run_hlo_passes) {
   if (run_hlo_passes) {
     TF_ASSIGN_OR_RETURN(
         module, backend().compiler()->RunHloPasses(
@@ -137,6 +132,7 @@ StatusOr<se::DeviceMemoryBase> HloRunner::Execute(
   stream.Init();
 
   ExecutableRunOptions run_options;
+  run_options.set_device_ordinal(backend().default_device_ordinal());
   run_options.set_stream(&stream);
   run_options.set_allocator(backend().memory_allocator());
   run_options.set_inter_op_thread_pool(backend().inter_op_thread_pool());
@@ -146,73 +142,35 @@ StatusOr<se::DeviceMemoryBase> HloRunner::Execute(
   ServiceExecutableRunOptions service_run_options(
       run_options, backend().StreamBorrower(),
       backend().inter_op_thread_pool());
-  TF_ASSIGN_OR_RETURN(
-      se::DeviceMemoryBase result,
-      executable->ExecuteOnStream(&service_run_options, arguments,
-                                  /*hlo_execution_profile=*/nullptr));
-  TF_RETURN_IF_ERROR(stream.BlockHostUntilDone());
 
-  allocations_.push_back(result);
-
-  *result_shape = executable->result_shape();
-
-  if (ShapeUtil::IsTuple(*result_shape)) {
-    // We must record element buffers of tuples as well to avoid leaks.
-    DCHECK(!ShapeUtil::IsNestedTuple(*result_shape));
+  // Copy arguments to device.
+  std::vector<std::unique_ptr<ScopedShapedBuffer>> argument_buffers;
+  std::vector<ShapedBuffer*> argument_buffer_ptrs;
+  for (Literal* argument : arguments) {
     TF_ASSIGN_OR_RETURN(
-        std::vector<se::DeviceMemoryBase> element_buffers,
-        backend().transfer_manager()->ShallowCopyTupleFromDevice(
-            backend().default_stream_executor(), result, *result_shape));
-
-    // A tuple may contain the same buffer in more than one element. Keep track
-    // of the buffers already added to avoid duplicates in allocations_.
-    std::set<void*> added_opaques;
-    for (auto element_buffer : element_buffers) {
-      if (added_opaques.count(element_buffer.opaque()) == 0) {
-        CHECK(element_buffer.opaque() != nullptr);
-        added_opaques.insert(element_buffer.opaque());
-        allocations_.push_back(element_buffer);
-      }
-    }
+        std::unique_ptr<ScopedShapedBuffer> argument_buffer,
+        backend().transfer_manager()->AllocateScopedShapedBuffer(
+            argument->shape(), run_options.allocator(),
+            run_options.device_ordinal()));
+    TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
+        stream.parent(), *argument, *argument_buffer));
+    argument_buffers.push_back(std::move(argument_buffer));
+    argument_buffer_ptrs.push_back(argument_buffers.back().get());
   }
 
-  return result;
-}
-
-StatusOr<se::DeviceMemoryBase> HloRunner::TransferToDevice(
-    const Literal& literal) {
-  // Allocate memory on the device using the stream executor.
-  int64 allocation_size =
-      backend().transfer_manager()->GetByteSizeRequirement(literal.shape());
-  se::DeviceMemoryBase allocation =
-      backend().default_stream_executor()->AllocateArray<uint8>(
-          allocation_size);
-  allocations_.push_back(allocation);
-
-  TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
-      backend().default_stream_executor(), literal, &allocation));
-
-  return allocation;
-}
-
-StatusOr<std::unique_ptr<Literal>> HloRunner::TransferFromDevice(
-    const Shape& shape, se::DeviceMemoryBase device_base) {
-  auto literal = MakeUnique<Literal>();
-  TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralFromDevice(
-      backend().default_stream_executor(), device_base, shape, shape,
-      literal.get()));
-  return std::move(literal);
-}
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> result,
+      executable->ExecuteOnStream(&service_run_options, argument_buffer_ptrs,
+                                  /*hlo_execution_profile=*/nullptr));
 
-StatusOr<std::unique_ptr<Literal>> HloRunner::ExecuteAndTransfer(
-    std::unique_ptr<HloModule> module,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    bool run_hlo_passes) {
-  Shape result_shape;
+  // Create a ScopedShapedBuffer of the result to manage deallocation. This will
+  // deallocate all the device memory when it goes out of scope.
   TF_ASSIGN_OR_RETURN(
-      se::DeviceMemoryBase device_base,
-      Execute(std::move(module), arguments, &result_shape, run_hlo_passes));
-  return TransferFromDevice(result_shape, device_base);
+      std::unique_ptr<ScopedShapedBuffer> scoped_result,
+      ScopedShapedBuffer::MakeScoped(result.get(), run_options.allocator()));
+
+  return backend().transfer_manager()->TransferLiteralFromDevice(
+      stream.parent(), *scoped_result);
 }
 
 Backend& HloRunner::backend() {
diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h
index a65c66fd4b..d4b221fb52 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.h
+++ b/tensorflow/compiler/xla/service/hlo_runner.h
@@ -78,30 +78,7 @@ class HloRunner {
   template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Execute(
       std::unique_ptr<HloModule> module,
-      const tensorflow::gtl::ArraySlice<LiteralPtr> literals,
-      bool run_hlo_passes = true);
-
-  // Executes the given module and returns a global data handle.
-  StatusOr<perftools::gputools::DeviceMemoryBase> Execute(
-      std::unique_ptr<HloModule> module,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      Shape* result_shape, bool run_hlo_passes = true);
-
-  // Transfers the given literal to the device and returns the data handle.
-  StatusOr<perftools::gputools::DeviceMemoryBase> TransferToDevice(
-      const Literal& literal);
-
-  // Transfers the array referred to by the given handle from the device and
-  // returns as a Literal.
-  StatusOr<std::unique_ptr<Literal>> TransferFromDevice(
-      const Shape& shape, perftools::gputools::DeviceMemoryBase device_base);
-
-  // Executes the given module and return the result as a Literal.
-  StatusOr<std::unique_ptr<Literal>> ExecuteAndTransfer(
-      std::unique_ptr<HloModule> module,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
+      const tensorflow::gtl::ArraySlice<LiteralPtr> arguments,
       bool run_hlo_passes = true);
 
   // If backend is not created in the constructor, creates and returns the
@@ -112,9 +89,12 @@ class HloRunner {
   Backend& backend();
 
  private:
-  struct EigenThreadPoolWrapper;
+  StatusOr<std::unique_ptr<Literal>> ExecuteInternal(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::ArraySlice<Literal*> arguments,
+      bool run_hlo_passes = true);
 
-  std::vector<perftools::gputools::DeviceMemoryBase> allocations_;
+  struct EigenThreadPoolWrapper;
 
   std::unique_ptr<EigenThreadPoolWrapper> thread_pool_wrapper_;
 
@@ -124,15 +104,14 @@ class HloRunner {
 template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloRunner::Execute(
     std::unique_ptr<HloModule> module,
-    const tensorflow::gtl::ArraySlice<LiteralPtr> literals,
+    const tensorflow::gtl::ArraySlice<LiteralPtr> arguments,
     bool run_hlo_passes) {
-  std::vector<perftools::gputools::DeviceMemoryBase> arguments;
-  for (const auto& literal : literals) {
-    TF_ASSIGN_OR_RETURN(perftools::gputools::DeviceMemoryBase argument,
-                        TransferToDevice(*literal));
-    arguments.push_back(argument);
+  // Construct a vector of plain pointers for the arguments.
+  std::vector<Literal*> argument_pointers;
+  for (const auto& argument : arguments) {
+    argument_pointers.push_back(&*argument);
   }
-  return ExecuteAndTransfer(std::move(module), arguments, run_hlo_passes);
+  return ExecuteInternal(std::move(module), argument_pointers, run_hlo_passes);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/interpreter/BUILD b/tensorflow/compiler/xla/service/interpreter/BUILD
index 2704a805a9..0819ab3b90 100644
--- a/tensorflow/compiler/xla/service/interpreter/BUILD
+++ b/tensorflow/compiler/xla/service/interpreter/BUILD
@@ -92,6 +92,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_execution_profile",
         "//tensorflow/compiler/xla/service:hlo_module_config",
         "//tensorflow/compiler/xla/service:shaped_buffer",
+        "//tensorflow/compiler/xla/service:transfer_manager",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
     ],
diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc
index 293cc2007e..b01fcccdb4 100644
--- a/tensorflow/compiler/xla/service/interpreter/executable.cc
+++ b/tensorflow/compiler/xla/service/interpreter/executable.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_evaluator.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/interpreter/executor.h"
+#include "tensorflow/compiler/xla/service/transfer_manager.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -47,44 +48,18 @@ InterpreterExecutable::InterpreterExecutable(
 
 InterpreterExecutable::~InterpreterExecutable() {}
 
-static se::DeviceMemoryBase AllocateSingleOutput(
-    sep::InterpreterExecutor* executor, const Literal& literal) {
-  int64 size(xla::ShapeUtil::ByteSizeOf(literal.shape()));
-  void* buf = executor->Allocate(size);
-  const void* src = literal.InternalData();
-  memcpy(buf, src, size);
-  return se::DeviceMemoryBase(buf, size);
-}
-
-static se::DeviceMemoryBase AllocateOutputBuffer(
-    sep::InterpreterExecutor* executor, const Literal& literal) {
-  const Shape& shape = literal.shape();
-  if (shape.element_type() != xla::TUPLE) {
-    return AllocateSingleOutput(executor, literal);
-  } else {
-    int64 size(xla::ShapeUtil::ByteSizeOf(shape, sizeof(void*)));
-    void** buf = reinterpret_cast<void**>(executor->Allocate(size));
-    void** buf_rc = buf;
-    for (int64 n = 0; n < xla::ShapeUtil::TupleElementCount(shape); n++) {
-      se::DeviceMemoryBase out =
-          AllocateSingleOutput(executor, literal.tuple_literals(n));
-      *buf++ = out.opaque();
-    }
-
-    return se::DeviceMemoryBase(buf_rc, size);
-  }
-}
-
-StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteOnStream(
+StatusOr<std::unique_ptr<ShapedBuffer>> InterpreterExecutable::ExecuteOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     HloExecutionProfile* hlo_execution_profile) {
   se::Stream* stream = run_options->stream();
+  se::StreamExecutor* executor = stream->parent();
+  const se::Platform* platform = executor->platform();
 
   VLOG(1) << "Execute " << module().name();
   if (VLOG_IS_ON(2)) {
     for (const auto& a : arguments) {
-      VLOG(2) << "-- argument " << a.opaque();
+      VLOG(2) << "-- argument " << *a;
     }
   }
 
@@ -96,32 +71,32 @@ StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteOnStream(
         "Mismatch between argument count and graph parameter count.");
   }
 
-  // Create the arguments as an vector of XLA literals
+  TF_ASSIGN_OR_RETURN(TransferManager * transfer_manager,
+                      TransferManager::GetForPlatform(platform));
+
+  // Transform the ShapedBuffer arguments into literals which the evaluator
+  // consumes.
   std::vector<std::unique_ptr<Literal>> arg_literals;
   for (int64 p = 0; p < computation->num_parameters(); ++p) {
-    // Create the input literal for the parameter
-    HloInstruction* param = computation->parameter_instruction(p);
-    arg_literals.emplace_back(Literal::CreateFromShape(param->shape()));
-
-    // Copy in the data from the stream_executor buffers
-    void* buffer = arg_literals.back()->MutableInternalData();
-    memcpy(buffer, arguments[p].opaque(),
-           ShapeUtil::ByteSizeOf(param->shape()));
+    TF_ASSIGN_OR_RETURN(
+        std::unique_ptr<Literal> arg_literal,
+        transfer_manager->TransferLiteralFromDevice(executor, *arguments[p]));
+    arg_literals.push_back(std::move(arg_literal));
   }
 
   // Execute the graph using the HloEvaluator.
   HloEvaluator evaluator;
   TF_ASSIGN_OR_RETURN(
-      std::unique_ptr<Literal> output,
+      std::unique_ptr<Literal> result_literal,
       evaluator.Evaluate<std::unique_ptr<Literal>>(*computation, arg_literals));
 
-  // Copy the result into the return buffer
-  perftools::gputools::StreamExecutor* executor(stream->parent());
-  sep::InterpreterExecutor* interpreter_executor(
-      static_cast<sep::InterpreterExecutor*>(executor->implementation()));
-
-  se::DeviceMemoryBase ret =
-      AllocateOutputBuffer(interpreter_executor, *(output.get()));
+  // Transform the result literal back into a ShapedBuffer.
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<ShapedBuffer> result,
+                      transfer_manager->AllocateShapedBuffer(
+                          result_literal->shape(), run_options->allocator(),
+                          run_options->device_ordinal()));
+  TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDevice(
+      executor, *result_literal, *result));
 
   uint64 end_micros = tensorflow::Env::Default()->NowMicros();
 
@@ -131,20 +106,13 @@ StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteOnStream(
     execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0));
   }
 
-  return ret;
-}
-
-StatusOr<std::unique_ptr<ShapedBuffer>> InterpreterExecutable::ExecuteOnStream(
-    const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
-    HloExecutionProfile* hlo_execution_profile) {
-  return tensorflow::errors::Unimplemented(
-      "ExecuteOnStream is not yet supported on Interpreter.");
+  return std::move(result);
 }
 
-StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteAsyncOnStream(
+StatusOr<std::unique_ptr<ShapedBuffer>>
+InterpreterExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
   return tensorflow::errors::Unimplemented(
       "ExecuteAsyncOnStream is not yet supported on Interpreter.");
 }
diff --git a/tensorflow/compiler/xla/service/interpreter/executable.h b/tensorflow/compiler/xla/service/interpreter/executable.h
index 0e87eb90bf..410110a1ad 100644
--- a/tensorflow/compiler/xla/service/interpreter/executable.h
+++ b/tensorflow/compiler/xla/service/interpreter/executable.h
@@ -43,21 +43,14 @@ class InterpreterExecutable : public Executable {
   InterpreterExecutable(std::unique_ptr<const HloModule> hlo_module);
   ~InterpreterExecutable() override;
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      HloExecutionProfile* hlo_execution_profile) override;
-
   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+  StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments) override;
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
 
   static int64 ShapeSizeBytes(const Shape& shape);
 
diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc
index 06f43bd3cb..4071b948a5 100644
--- a/tensorflow/compiler/xla/service/local_service.cc
+++ b/tensorflow/compiler/xla/service/local_service.cc
@@ -118,10 +118,8 @@ StatusOr<std::unique_ptr<Executable>> LocalService::CompileExecutable(
   TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
                       execute_backend_->stream_executor(device_ordinal));
 
-  std::vector<perftools::gputools::DeviceMemoryBase> argument_buffers(
-      argument_layouts.size());
   return BuildExecutable(versioned_handle, std::move(module_config),
-                         argument_buffers, execute_backend_.get(), executor);
+                         execute_backend_.get(), executor);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 9d78e6a2b2..e77a46128b 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -60,41 +60,32 @@ namespace xla {
 
 namespace {
 
-// Copies the contents of an Allocation into a Literal proto.
-tensorflow::Status LiteralFromAllocation(const Allocation* allocation,
-                                         const Shape& literal_shape,
-                                         Literal* literal) {
-  TF_ASSIGN_OR_RETURN(
-      se::StreamExecutor * executor,
-      allocation->backend()->stream_executor(allocation->device_ordinal()));
-  return allocation->backend()->transfer_manager()->TransferLiteralFromDevice(
-      executor, allocation->device_memory(), allocation->shape(), literal_shape,
-      literal);
-}
-
 // Records the arguments used to invoke a computation in a SessionModule
 // proto.
 tensorflow::Status RecordArguments(
-    const tensorflow::gtl::ArraySlice<const Allocation*> arg_allocations,
+    const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+    se::StreamExecutor* executor, TransferManager* transfer_manager,
     SessionModule* module) {
   module->clear_arguments();
-  for (const Allocation* allocation : arg_allocations) {
-    Literal argument;
-    TF_RETURN_IF_ERROR(
-        LiteralFromAllocation(allocation, allocation->shape(), &argument));
-    *module->add_arguments() = argument.ToProto();
+  for (const ShapedBuffer* argument : arguments) {
+    TF_ASSIGN_OR_RETURN(
+        std::unique_ptr<Literal> literal,
+        transfer_manager->TransferLiteralFromDevice(executor, *argument));
+    *module->add_arguments() = literal->ToProto();
   }
   return tensorflow::Status::OK();
 }
 
 // Records the result of a computation in a SessionModule proto.
-tensorflow::Status RecordResult(const Allocation* result_allocation,
+tensorflow::Status RecordResult(const ShapedBuffer& result,
+                                se::StreamExecutor* executor,
+                                TransferManager* transfer_manager,
                                 SessionModule* module) {
   module->clear_result();
-  Literal result;
-  TF_RETURN_IF_ERROR(LiteralFromAllocation(
-      result_allocation, result_allocation->shape(), &result));
-  *module->mutable_result() = result.ToProto();
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<Literal> literal,
+      transfer_manager->TransferLiteralFromDevice(executor, result));
+  *module->mutable_result() = literal->ToProto();
   return tensorflow::Status::OK();
 }
 
@@ -152,7 +143,9 @@ int ServiceOptions::intra_op_parallelism_threads() const {
 
 Service::Service(const ServiceOptions& options,
                  std::unique_ptr<Backend> execute_backend)
-    : options_(options), execute_backend_(std::move(execute_backend)) {
+    : options_(options),
+      allocation_tracker_(execute_backend.get()),
+      execute_backend_(std::move(execute_backend)) {
   CHECK_GT(options_.number_of_replicas(), 0);
   if (execute_backend_) {
     if (execute_backend_->device_count() > 0) {
@@ -235,35 +228,33 @@ tensorflow::Status Service::ValidateResultShapeWithLayout(
   return ShapeUtil::ValidateShape(shape_with_layout);
 }
 
-StatusOr<std::vector<const Allocation*>> Service::ResolveAndValidateArguments(
+StatusOr<std::vector<const ShapedBuffer*>> Service::ResolveAndValidateArguments(
     tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
-    const Backend* backend, int device_ordinal) {
-  std::vector<const Allocation*> allocations;
+    int device_ordinal) {
+  std::vector<const ShapedBuffer*> shaped_buffers;
   for (size_t i = 0; i < arguments.size(); ++i) {
-    auto allocation_status = allocation_tracker_.Resolve(*arguments[i]);
-    if (!allocation_status.ok()) {
-      return Status(allocation_status.status().code(),
-                    StrCat(allocation_status.status().error_message(), ", ",
+    auto buffer_status = allocation_tracker_.Resolve(*arguments[i]);
+    if (!buffer_status.ok()) {
+      return Status(buffer_status.status().code(),
+                    StrCat(buffer_status.status().error_message(), ", ",
                            "failed to resolve allocation for parameter ", i));
     }
-    const Allocation* allocation = allocation_status.ValueOrDie();
+    const ShapedBuffer* shaped_buffer = buffer_status.ValueOrDie();
 
     // Verify allocation is same platform and device as the execution.
-    if (allocation->backend() != backend ||
-        allocation->device_ordinal() != device_ordinal) {
+    if (shaped_buffer->platform() != execute_backend_->platform() ||
+        shaped_buffer->device_ordinal() != device_ordinal) {
       return InvalidArgument(
-          "argument %lu is on device %s but computation will be executed "
+          "argument %lu is on device %s:%d but computation will be executed "
           "on device %s",
-          i,
-          allocation->backend()
-              ->device_name(allocation->device_ordinal())
-              .c_str(),
-          backend->device_name(device_ordinal).c_str());
+          i, shaped_buffer->platform()->Name().c_str(),
+          shaped_buffer->device_ordinal(),
+          execute_backend_->device_name(device_ordinal).c_str());
     }
 
-    allocations.push_back(allocation);
+    shaped_buffers.push_back(shaped_buffer);
   }
-  return allocations;
+  return shaped_buffers;
 }
 
 StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
@@ -325,11 +316,11 @@ StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
 
 StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
     const ProgramShape& program_shape,
-    tensorflow::gtl::ArraySlice<const Allocation*> arguments,
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     const ExecutionOptions& execution_options) {
   std::vector<const Shape*> argument_shapes;
   for (const auto* arg : arguments) {
-    argument_shapes.push_back(&arg->shape());
+    argument_shapes.push_back(&arg->on_host_shape());
   }
   return CreateModuleConfig(program_shape, argument_shapes, &execution_options);
 }
@@ -398,8 +389,6 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> Service::BuildExecutables(
 StatusOr<std::unique_ptr<Executable>> Service::BuildExecutable(
     const VersionedComputationHandle& versioned_handle,
     std::unique_ptr<HloModuleConfig> module_config,
-    const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-        arguments,
     Backend* backend, se::StreamExecutor* executor) {
   VLOG(1) << Printf("BuildExecutable on service %p with handle %s", this,
                     versioned_handle.ToString().c_str());
@@ -447,8 +436,6 @@ StatusOr<std::unique_ptr<Executable>> Service::BuildExecutable(
 StatusOr<std::shared_ptr<Executable>> Service::BuildAndCacheExecutable(
     const VersionedComputationHandle& versioned_handle,
     std::unique_ptr<HloModuleConfig> module_config,
-    const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-        arguments,
     Backend* backend, perftools::gputools::StreamExecutor* executor,
     ExecutionProfile* profile) {
   std::shared_ptr<Executable> executable =
@@ -471,8 +458,8 @@ StatusOr<std::shared_ptr<Executable>> Service::BuildAndCacheExecutable(
   HloModuleConfig original_module_config = *module_config;
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<Executable> executable_unique_ptr,
-      BuildExecutable(versioned_handle, std::move(module_config), arguments,
-                      backend, executor));
+      BuildExecutable(versioned_handle, std::move(module_config), backend,
+                      executor));
 
   if (profile != nullptr) {
     uint64 end_micros = tensorflow::Env::Default()->NowMicros();
@@ -489,9 +476,7 @@ StatusOr<std::shared_ptr<Executable>> Service::BuildAndCacheExecutable(
 StatusOr<std::vector<GlobalDataHandle>>
 Service::ExecuteParallelAndRegisterResult(
     tensorflow::gtl::ArraySlice<Executable*> executables,
-    tensorflow::gtl::ArraySlice<
-        std::vector<perftools::gputools::DeviceMemoryBase>>
-        arguments,
+    tensorflow::gtl::ArraySlice<std::vector<const ShapedBuffer*>> arguments,
     Backend* backend, tensorflow::gtl::ArraySlice<DeviceHandle> device_handles,
     tensorflow::gtl::ArraySlice<string> result_tags,
     ExecutionProfile* profile) {
@@ -547,7 +532,7 @@ Service::ExecuteParallelAndRegisterResult(
 
       // Asynchronously launch the computation.
       TF_ASSIGN_OR_RETURN(
-          perftools::gputools::DeviceMemoryBase result,
+          std::unique_ptr<ShapedBuffer> result,
           executables[i]->ExecuteAsyncOnStream(&run_options, arguments[i]));
 
       if (replica == 0 && profile != nullptr) {
@@ -557,9 +542,10 @@ Service::ExecuteParallelAndRegisterResult(
       // All replicas share the same device address for the result allocation,
       // so only one of the replicas need to register the result handle.
       if (replica == 0) {
-        result_handles.push_back(allocation_tracker_.Register(
-            backend, replicas[0]->device_ordinal(), result,
-            executables[i]->result_shape(), result_tags[i]));
+        TF_ASSIGN_OR_RETURN(
+            GlobalDataHandle handle,
+            allocation_tracker_.Register(std::move(result), result_tags[i]));
+        result_handles.push_back(handle);
       }
     }
   }
@@ -627,8 +613,7 @@ Service::ExecuteParallelAndRegisterResult(
 
 StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
     Executable* executable,
-    const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-        arguments,
+    const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     Backend* backend, perftools::gputools::StreamExecutor* executor,
     const string& result_tag, ExecutionProfile* profile) {
   // Set up streams.
@@ -653,6 +638,7 @@ StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
   for (const Pool<se::Stream>::SmartPtr& stream : streams) {
     ExecutableRunOptions options;
     options.set_stream(stream.get());
+    options.set_device_ordinal(stream->parent()->device_ordinal());
     options.set_allocator(backend->memory_allocator());
     options.set_inter_op_thread_pool(backend->inter_op_thread_pool());
     options.set_intra_op_thread_pool(
@@ -662,25 +648,23 @@ StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
                              backend->inter_op_thread_pool());
   }
 
-  perftools::gputools::DeviceMemoryBase result;
+  std::unique_ptr<ShapedBuffer> result;
   if (options_.number_of_replicas() == 1) {
     TF_ASSIGN_OR_RETURN(
-        result, executable->ExecuteOnStreamWrapper<se::DeviceMemoryBase>(
-                    &run_options[0], profile, arguments));
+        result,
+        executable->ExecuteOnStreamWrapper<std::unique_ptr<ShapedBuffer>>(
+            &run_options[0], profile, arguments));
   } else {
     // TODO(b/69985541): Support profiling also on this path.
-    std::vector<
-        tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>>
+    std::vector<tensorflow::gtl::ArraySlice<const ShapedBuffer*>>
         repeated_arguments(options_.number_of_replicas(), arguments);
 
     TF_ASSIGN_OR_RETURN(auto results, executable->ExecuteOnStreams(
                                           run_options, repeated_arguments));
     TF_RET_CHECK(!results.empty());
-    result = results[0];
+    result = std::move(results[0]);
   }
-  return allocation_tracker_.Register(backend, executor->device_ordinal(),
-                                      result, executable->result_shape(),
-                                      result_tag);
+  return allocation_tracker_.Register(std::move(result), result_tag);
 }
 
 tensorflow::Status Service::SetReturnValue(const SetReturnValueRequest* arg,
@@ -694,7 +678,7 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg,
                                             ExecuteParallelResponse* result) {
   VLOG(1) << "running execute-parallel request: " << arg->ShortDebugString();
 
-  std::vector<std::vector<se::DeviceMemoryBase>> all_arguments;
+  std::vector<std::vector<const ShapedBuffer*>> all_arguments;
   std::vector<std::vector<perftools::gputools::StreamExecutor*>> all_executors;
   std::vector<VersionedComputationHandle> versioned_handles;
   std::vector<std::unique_ptr<HloModuleConfig>> module_configs;
@@ -751,19 +735,14 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg,
     // In the case of partitioned computations, assume all arguments go on the
     // zeroth core.
     TF_ASSIGN_OR_RETURN(
-        std::vector<const Allocation*> arg_allocations,
-        ResolveAndValidateArguments(request.arguments(), execute_backend_.get(),
+        std::vector<const ShapedBuffer*> arguments,
+        ResolveAndValidateArguments(request.arguments(),
                                     executors[0]->device_ordinal()));
-    std::vector<se::DeviceMemoryBase> arguments;
-    arguments.reserve(arg_allocations.size());
-    for (const Allocation* allocation : arg_allocations) {
-      arguments.push_back(allocation->device_memory());
-    }
 
     // Create an HloModuleConfig object for the computation, given the shape of
     // the program and the argument allocations.
     TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModuleConfig> module_config,
-                        CreateModuleConfig(*program_shape, arg_allocations,
+                        CreateModuleConfig(*program_shape, arguments,
                                            request.execution_options()));
     VLOG(3) << "ExecuteParallel created HloModuleConfig computation layout: "
             << module_config->entry_computation_layout().ToString();
@@ -866,35 +845,30 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg,
       user_computation->ComputeProgramShape(versioned_handle.version));
 
   TF_ASSIGN_OR_RETURN(
-      std::vector<const Allocation*> arg_allocations,
-      ResolveAndValidateArguments(arg->arguments(), execute_backend_.get(),
+      std::vector<const ShapedBuffer*> arguments,
+      ResolveAndValidateArguments(arg->arguments(),
                                   execute_backend_->default_device_ordinal()));
 
-  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModuleConfig> module_config,
-                      CreateModuleConfig(*program_shape, arg_allocations,
-                                         arg->execution_options()));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<HloModuleConfig> module_config,
+      CreateModuleConfig(*program_shape, arguments, arg->execution_options()));
 
   VLOG(3) << "Execute created HloModuleConfig computation layout: "
           << module_config->entry_computation_layout().ToString();
 
-  std::vector<se::DeviceMemoryBase> arguments;
-  arguments.reserve(arg_allocations.size());
-  for (const Allocation* allocation : arg_allocations) {
-    arguments.push_back(allocation->device_memory());
-  }
-
   TF_ASSIGN_OR_RETURN(
       std::shared_ptr<Executable> executable,
       BuildAndCacheExecutable(versioned_handle, std::move(module_config),
-                              arguments, execute_backend_.get(),
+                              execute_backend_.get(),
                               execute_backend_->default_stream_executor(),
                               result->mutable_profile()));
 
   if (executable->dumping()) {
     executable->session_module()->set_execution_platform(
         execute_backend_->platform()->Name());
-    TF_RETURN_IF_ERROR(
-        RecordArguments(arg_allocations, executable->session_module()));
+    TF_RETURN_IF_ERROR(RecordArguments(
+        arguments, execute_backend_->default_stream_executor(),
+        execute_backend_->transfer_manager(), executable->session_module()));
   }
 
   TF_ASSIGN_OR_RETURN(
@@ -905,10 +879,11 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg,
           "result of " + user_computation->name(), result->mutable_profile()));
 
   if (executable->dumping()) {
-    TF_ASSIGN_OR_RETURN(const Allocation* result_allocation,
+    TF_ASSIGN_OR_RETURN(const ShapedBuffer* result_buffer,
                         allocation_tracker_.Resolve(result->output()));
-    TF_RETURN_IF_ERROR(
-        RecordResult(result_allocation, executable->session_module()));
+    TF_RETURN_IF_ERROR(RecordResult(
+        *result_buffer, execute_backend_->default_stream_executor(),
+        execute_backend_->transfer_manager(), executable->session_module()));
     TF_RETURN_IF_ERROR(executable->DumpSessionModule());
   }
 
@@ -934,31 +909,24 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg,
       user_computation->ComputeProgramShape(versioned_handle.version));
 
   TF_ASSIGN_OR_RETURN(
-      std::vector<const Allocation*> arg_allocations,
-      ResolveAndValidateArguments(arg->arguments(), execute_backend_.get(),
+      std::vector<const ShapedBuffer*> arguments,
+      ResolveAndValidateArguments(arg->arguments(),
                                   execute_backend_->default_device_ordinal()));
 
-  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModuleConfig> module_config,
-                      CreateModuleConfig(*program_shape, arg_allocations,
-                                         arg->execution_options()));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<HloModuleConfig> module_config,
+      CreateModuleConfig(*program_shape, arguments, arg->execution_options()));
 
   VLOG(3) << "ExecuteAsync created HloModuleConfig computation layout: "
           << module_config->entry_computation_layout().ToString();
 
-  std::vector<se::DeviceMemoryBase> arguments;
-  arguments.reserve(arg_allocations.size());
-  for (const Allocation* allocation : arg_allocations) {
-    arguments.push_back(allocation->device_memory());
-  }
-
   ExecutionProfile profile;
 
   TF_ASSIGN_OR_RETURN(
       std::shared_ptr<Executable> executable,
-      BuildAndCacheExecutable(versioned_handle, std::move(module_config),
-                              arguments, execute_backend_.get(),
-                              execute_backend_->default_stream_executor(),
-                              &profile));
+      BuildAndCacheExecutable(
+          versioned_handle, std::move(module_config), execute_backend_.get(),
+          execute_backend_->default_stream_executor(), &profile));
 
   TF_ASSIGN_OR_RETURN(auto replicas, Replicas(*execute_backend_,
                                               SingleComputationDeviceHandle()));
@@ -973,7 +941,7 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg,
     streams.push_back(std::move(stream));
   }
 
-  perftools::gputools::DeviceMemoryBase result_data;
+  std::unique_ptr<ShapedBuffer> result_buffer;
   for (const Pool<se::Stream>::SmartPtr& stream : streams) {
     ExecutableRunOptions options;
     options.set_stream(stream.get());
@@ -986,19 +954,19 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg,
         options, execute_backend_->StreamBorrower());
 
     TF_ASSIGN_OR_RETURN(
-        perftools::gputools::DeviceMemoryBase this_result_data,
+        std::unique_ptr<ShapedBuffer> this_result_buffer,
         executable->ExecuteAsyncOnStream(&service_options, arguments));
 
     // Take the first result.
-    if (result_data == nullptr) {
-      result_data = this_result_data;
+    if (result_buffer == nullptr) {
+      result_buffer = std::move(this_result_buffer);
     }
   }
 
-  auto output = allocation_tracker_.Register(
-      execute_backend_.get(), execute_backend_->default_device_ordinal(),
-      result_data, executable->result_shape(),
-      "result of " + user_computation->name());
+  TF_ASSIGN_OR_RETURN(
+      GlobalDataHandle output,
+      allocation_tracker_.Register(std::move(result_buffer),
+                                   "result of " + user_computation->name()));
 
   *result->mutable_execution() = execution_tracker_.Register(
       execute_backend_.get(), std::move(streams), profile, output);
@@ -1025,23 +993,35 @@ tensorflow::Status Service::WaitForExecution(const WaitForExecutionRequest* arg,
 
 tensorflow::Status Service::TransferToClient(const TransferToClientRequest* arg,
                                              TransferToClientResponse* result) {
-  TF_ASSIGN_OR_RETURN(const Allocation* allocation,
+  TF_ASSIGN_OR_RETURN(const ShapedBuffer* shaped_buffer,
                       allocation_tracker_.Resolve(arg->data()));
 
-  const Shape* literal_shape;
+  const Shape* return_shape;
   if (arg->has_shape_with_layout()) {
     if (!LayoutUtil::HasLayout(arg->shape_with_layout())) {
       return InvalidArgument("shape_with_layout must have layout if present.");
     }
-    literal_shape = &arg->shape_with_layout();
+    return_shape = &arg->shape_with_layout();
   } else {
-    literal_shape = &allocation->shape();
+    return_shape = &shaped_buffer->on_host_shape();
   }
 
-  Literal literal;
-  TF_RETURN_IF_ERROR(
-      LiteralFromAllocation(allocation, *literal_shape, &literal));
-  *result->mutable_literal() = literal.ToProto();
+  TF_ASSIGN_OR_RETURN(
+      se::StreamExecutor * executor,
+      execute_backend_->stream_executor(shaped_buffer->device_ordinal()));
+
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<Literal> result_literal,
+      execute_backend_->transfer_manager()->TransferLiteralFromDevice(
+          executor, *shaped_buffer));
+
+  if (LayoutUtil::LayoutsInShapesEqual(*return_shape,
+                                       result_literal->shape())) {
+    *result->mutable_literal() = result_literal->ToProto();
+  } else {
+    *result->mutable_literal() =
+        result_literal->Relayout(*return_shape)->ToProto();
+  }
   return tensorflow::Status::OK();
 }
 
@@ -1052,12 +1032,9 @@ namespace {
 std::unique_ptr<ShapedBuffer> CloneShapedBufferOnDevice(
     const ShapedBuffer& shaped_buffer, int device_ordinal) {
   auto clone = MakeUnique<ShapedBuffer>(
-      shaped_buffer.shape(), shaped_buffer.platform(), device_ordinal);
-  ShapeUtil::ForEachSubshape(
-      shaped_buffer.shape(), [&clone, &shaped_buffer](const Shape& /*subshape*/,
-                                                      const ShapeIndex& index) {
-        clone->AddBufferAtIndex(shaped_buffer.buffer(index), index);
-      });
+      shaped_buffer.on_host_shape(), shaped_buffer.on_device_shape(),
+      shaped_buffer.platform(), device_ordinal);
+  clone->buffers() = shaped_buffer.buffers();
   return clone;
 }
 
@@ -1082,22 +1059,8 @@ tensorflow::Status Service::TransferToServer(const TransferToServerRequest* arg,
   int master_device_ordinal = replicas[0]->device_ordinal();
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<ShapedBuffer> shaped_buffer,
-      ShapedBuffer::Allocate(
-          execute_backend_->transfer_manager()->HostShapeToDeviceShape(shape),
-          execute_backend_->memory_allocator(), master_device_ordinal,
-          [this](const Shape& shape) {
-            return execute_backend_->transfer_manager()->GetByteSizeRequirement(
-                shape);
-          }));
-
-  // The allocation tracker only keeps track of the top-level buffer of the
-  // shape so pass in the buffer at shape index {}.
-  // TODO(b/37515654): Allocation tracker should hold a ShapedBuffer.
-  *result->mutable_data() = allocation_tracker_.Register(
-      execute_backend_.get(), master_device_ordinal,
-      shaped_buffer->buffer(/*index=*/{}), shape,
-      StrCat("TransferToServer literal of shape ",
-             ShapeUtil::HumanString(shape)));
+      execute_backend_->transfer_manager()->AllocateShapedBuffer(
+          shape, execute_backend_->memory_allocator(), master_device_ordinal));
 
   // Transfer the data to the replicas.
   for (se::StreamExecutor* executor : replicas) {
@@ -1117,6 +1080,12 @@ tensorflow::Status Service::TransferToServer(const TransferToServerRequest* arg,
               executor, literal, *clone));
     }
   }
+  TF_ASSIGN_OR_RETURN(
+      *result->mutable_data(),
+      allocation_tracker_.Register(std::move(shaped_buffer),
+                                   StrCat("TransferToServer literal of shape ",
+                                          ShapeUtil::HumanString(shape))));
+
   return tensorflow::Status::OK();
 }
 
@@ -1282,9 +1251,9 @@ tensorflow::Status Service::ComputeConstant(const ComputeConstantRequest* arg,
 
 tensorflow::Status Service::GetShape(const GetShapeRequest* arg,
                                      GetShapeResponse* result) {
-  TF_ASSIGN_OR_RETURN(const Allocation* allocation,
+  TF_ASSIGN_OR_RETURN(const ShapedBuffer* buffer,
                       allocation_tracker_.Resolve(arg->data()));
-  *result->mutable_shape() = allocation->shape();
+  *result->mutable_shape() = buffer->on_host_shape();
   return tensorflow::Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 47f4f0ade5..f962d0cdc7 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -250,7 +250,7 @@ class Service : public ServiceInterface {
   // class.
   StatusOr<std::unique_ptr<HloModuleConfig>> CreateModuleConfig(
       const ProgramShape& program_shape,
-      tensorflow::gtl::ArraySlice<const Allocation*> arguments,
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       const ExecutionOptions& execution_options);
 
  protected:
@@ -265,10 +265,10 @@ class Service : public ServiceInterface {
 
   // Resolves the given argument handles in the allocation tracker and returns
   // the corresponding allocations. The function also verifies that each
-  // allocation matches the given backend and device ordinal.
-  StatusOr<std::vector<const Allocation*>> ResolveAndValidateArguments(
+  // allocation matches the execution platform and device ordinal.
+  StatusOr<std::vector<const ShapedBuffer*>> ResolveAndValidateArguments(
       tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
-      const Backend* backend, int device_ordinal);
+      int device_ordinal);
 
   // Create a Hlo module config for the given program shape and arguments.
   // execution_options is optional; if not given a default is used.
@@ -281,8 +281,6 @@ class Service : public ServiceInterface {
   StatusOr<std::unique_ptr<Executable>> BuildExecutable(
       const VersionedComputationHandle& versioned_handle,
       std::unique_ptr<HloModuleConfig> module_config,
-      const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
       Backend* backend, perftools::gputools::StreamExecutor* executor);
 
   // Same as BuildExecutable() above, but builds a list of Executables for the
@@ -299,8 +297,6 @@ class Service : public ServiceInterface {
   StatusOr<std::shared_ptr<Executable>> BuildAndCacheExecutable(
       const VersionedComputationHandle& versioned_handle,
       std::unique_ptr<HloModuleConfig> module_config,
-      const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
       Backend* backend, perftools::gputools::StreamExecutor* executor,
       ExecutionProfile* profile);
 
@@ -310,8 +306,7 @@ class Service : public ServiceInterface {
   // ExecutionProfile object which will be filled in with profile data.
   StatusOr<GlobalDataHandle> ExecuteAndRegisterResult(
       Executable* executable,
-      const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
+      const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       Backend* backend, perftools::gputools::StreamExecutor* executor,
       const string& result_tag, ExecutionProfile* profile);
 
@@ -320,9 +315,7 @@ class Service : public ServiceInterface {
   // from the tracker are returned.
   StatusOr<std::vector<GlobalDataHandle>> ExecuteParallelAndRegisterResult(
       tensorflow::gtl::ArraySlice<Executable*> executables,
-      tensorflow::gtl::ArraySlice<
-          std::vector<perftools::gputools::DeviceMemoryBase>>
-          arguments,
+      tensorflow::gtl::ArraySlice<std::vector<const ShapedBuffer*>> arguments,
       Backend* backend,
       tensorflow::gtl::ArraySlice<DeviceHandle> device_handles,
       tensorflow::gtl::ArraySlice<string> result_tags,
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc
index aa0a24a283..c679d401c3 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.cc
+++ b/tensorflow/compiler/xla/service/shaped_buffer.cc
@@ -34,86 +34,32 @@ namespace xla {
 
 using ::tensorflow::strings::Appendf;
 
-/* static */ StatusOr<std::unique_ptr<ShapedBuffer>>
-ShapedBuffer::MakeArrayShapedBuffer(const Shape& shape,
-                                    const se::Platform* platform,
-                                    int device_ordinal,
-                                    const se::DeviceMemoryBase& buffer) {
-  if (ShapeUtil::IsTuple(shape)) {
-    return InvalidArgument("Shape must be an array: %s",
-                           ShapeUtil::HumanStringWithLayout(shape).c_str());
-  }
-  auto shaped_buffer =
-      MakeUnique<ShapedBuffer>(shape, platform, device_ordinal);
-  *shaped_buffer->mutable_shape_index_to_buffer_entry()->mutable_element({}) =
-      0;
-  *shaped_buffer->mutable_buffers() = {buffer};
-  return std::move(shaped_buffer);
-}
-
-/* static */ StatusOr<std::unique_ptr<ShapedBuffer>> ShapedBuffer::Allocate(
-    const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
-    const std::function<int64(const Shape&)>& shape_size_fn) {
-  if (!LayoutUtil::HasLayout(shape)) {
-    return InvalidArgument("Shape must have a layout: %s",
-                           ShapeUtil::HumanStringWithLayout(shape).c_str());
-  }
-  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(shape));
-  auto shaped_buffer = WrapUnique(
-      new ShapedBuffer(shape, allocator->platform(), device_ordinal));
-
-  // Allocate an appropriate sized buffer for each element in the shape
-  // including the tuple pointer arrays.
-  for (auto& pair : shaped_buffer->shape_index_to_buffer_entry_) {
-    const ShapeIndex& index = pair.first;
-    size_t& buffer_entry = pair.second;
-    TF_ASSIGN_OR_RETURN(
-        se::DeviceMemoryBase memory_base,
-        allocator->Allocate(shaped_buffer->device_ordinal(),
-                            shape_size_fn(ShapeUtil::GetSubshape(
-                                shaped_buffer->shape(), index))));
-    shaped_buffer->buffers_.push_back(memory_base);
-    buffer_entry = shaped_buffer->buffers_.size() - 1;
-  }
-
-  return std::move(shaped_buffer);
-}
-
-ShapedBuffer::ShapedBuffer(const Shape& shape, const se::Platform* platform,
-                           int device_ordinal)
-    : shape_(shape),
+ShapedBuffer::ShapedBuffer(const Shape& on_host_shape,
+                           const Shape& on_device_shape,
+                           const se::Platform* platform, int device_ordinal)
+    : on_host_shape_(on_host_shape),
+      on_device_shape_(on_device_shape),
       platform_(platform),
       device_ordinal_(device_ordinal),
-      shape_index_to_buffer_entry_(shape) {}
+      buffers_(on_device_shape) {}
 
 void ShapedBuffer::clear() {
-  for (se::DeviceMemoryBase& memory_base : buffers_) {
+  for (auto& pair : buffers_) {
     // A default constructed DeviceMemoryBase is a null pointer.
-    memory_base = se::DeviceMemoryBase();
+    pair.second = se::DeviceMemoryBase();
   }
 }
 
-void ShapedBuffer::AddBufferAtIndex(
-    const perftools::gputools::DeviceMemoryBase& buffer,
-    const ShapeIndex& shape_index) {
-  *mutable_shape_index_to_buffer_entry()->mutable_element(shape_index) =
-      buffers().size();
-  mutable_buffers()->push_back(buffer);
-}
-
-const se::DeviceMemoryBase& ShapedBuffer::buffer(
-    const ShapeIndex& index) const {
-  return buffers_[shape_index_to_buffer_entry_.element(index)];
-}
-
-se::DeviceMemoryBase* ShapedBuffer::mutable_buffer(const ShapeIndex& index) {
-  return &buffers_[shape_index_to_buffer_entry_.element(index)];
-}
-
 string ShapedBuffer::ToString() const {
-  string s = "ShapedBuffer(" + platform_->Name() + "):\n";
+  string s = tensorflow::strings::StrCat(
+      "ShapedBuffer(", platform_->Name(), ":", device_ordinal(),
+      "), on-host shape=" + ShapeUtil::HumanStringWithLayout(on_host_shape()),
+      ", on-device shape=" +
+          ShapeUtil::HumanStringWithLayout(on_device_shape()),
+      ":\n");
   ShapeUtil::ForEachSubshape(
-      shape(), [this, &s](const Shape& subshape, const ShapeIndex& index) {
+      on_device_shape(),
+      [this, &s](const Shape& subshape, const ShapeIndex& index) {
         string shape_str;
         if (ShapeUtil::IsTuple(subshape)) {
           shape_str = "tuple";
@@ -133,34 +79,24 @@ std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer) {
   return out;
 }
 
-/* static */ StatusOr<std::unique_ptr<ScopedShapedBuffer>>
-ScopedShapedBuffer::Allocate(
-    const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
-    const std::function<int64(const Shape&)>& shape_size_fn) {
-  TF_ASSIGN_OR_RETURN(
-      std::unique_ptr<ShapedBuffer> unscoped_buffer,
-      ShapedBuffer::Allocate(shape, allocator, device_ordinal, shape_size_fn));
-  return MakeScoped(unscoped_buffer.get(), allocator);
-}
-
 /* static */
 StatusOr<std::unique_ptr<ScopedShapedBuffer>> ScopedShapedBuffer::MakeScoped(
     ShapedBuffer* shaped_buffer, DeviceMemoryAllocator* allocator) {
   auto scoped_buffer = WrapUnique(new ScopedShapedBuffer(
-      shaped_buffer->shape(), allocator, shaped_buffer->device_ordinal()));
+      shaped_buffer->on_host_shape(), shaped_buffer->on_device_shape(),
+      allocator, shaped_buffer->device_ordinal()));
   scoped_buffer->buffers_ = shaped_buffer->buffers();
-  scoped_buffer->shape_index_to_buffer_entry_ =
-      shaped_buffer->shape_index_to_buffer_entry();
-
   shaped_buffer->clear();
 
   return std::move(scoped_buffer);
 }
 
-ScopedShapedBuffer::ScopedShapedBuffer(const Shape& shape,
+ScopedShapedBuffer::ScopedShapedBuffer(const Shape& on_host_shape,
+                                       const Shape& on_device_shape,
                                        DeviceMemoryAllocator* allocator,
                                        int device_ordinal)
-    : ShapedBuffer(shape, allocator->platform(), device_ordinal),
+    : ShapedBuffer(on_host_shape, on_device_shape, allocator->platform(),
+                   device_ordinal),
       allocator_(allocator) {}
 
 ScopedShapedBuffer::~ScopedShapedBuffer() {
@@ -168,7 +104,8 @@ ScopedShapedBuffer::~ScopedShapedBuffer() {
   // in the shape (eg, a tuple with a repeated element) so keep track of what
   // has been deallocated.
   std::set<void*> deallocated_opaques;
-  for (se::DeviceMemoryBase& memory_base : buffers_) {
+  for (auto& pair : buffers_) {
+    se::DeviceMemoryBase& memory_base = pair.second;
     if (!memory_base.is_null() &&
         deallocated_opaques.count(memory_base.opaque()) == 0) {
       deallocated_opaques.insert(memory_base.opaque());
@@ -179,13 +116,10 @@ ScopedShapedBuffer::~ScopedShapedBuffer() {
 }
 
 std::unique_ptr<ShapedBuffer> ScopedShapedBuffer::release() {
-  auto shaped_buffer =
-      MakeUnique<ShapedBuffer>(shape(), platform(), device_ordinal());
-
-  *shaped_buffer->mutable_buffers() = buffers();
-  *shaped_buffer->mutable_shape_index_to_buffer_entry() =
-      shape_index_to_buffer_entry();
+  auto shaped_buffer = MakeUnique<ShapedBuffer>(
+      on_host_shape(), on_device_shape(), platform(), device_ordinal());
 
+  shaped_buffer->buffers() = buffers();
   clear();
 
   return shaped_buffer;
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h
index ca8bfff674..f570ebb9cb 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.h
+++ b/tensorflow/compiler/xla/service/shaped_buffer.h
@@ -31,69 +31,68 @@ limitations under the License.
 namespace xla {
 
 // Class which encapsulates a buffer or set of buffers containing data of a
-// particular XLA shape. Used for zero-copy execution interface for a
-// XLA client running in the same process as the service (LocalClient),
+// particular XLA shape.
 class ShapedBuffer {
  public:
-  // Convenience method which creates a ShapedBuffer of array shape (not a
-  // tuple). Its single buffer pointer is set to the given value "buffer". The
-  // given buffer must be large enough to store the given shape as given by
-  // ShapeUtil::ByteSizeOf.
-  static StatusOr<std::unique_ptr<ShapedBuffer>> MakeArrayShapedBuffer(
-      const Shape& shape, const perftools::gputools::Platform* platform,
-      int device_ordinal, const perftools::gputools::DeviceMemoryBase& buffer);
-
-  // Return a newly allocated ShapedBuffer of an arbitrary shape. Array buffers
-  // (leaves in the shape) are allocated and uninitialized. Tuple buffers (if
-  // any) are allocated and initialized to the backend-specific representation
-  // of an array of pointers to the tuple elements.
-  static StatusOr<std::unique_ptr<ShapedBuffer>> Allocate(
-      const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
-      const std::function<int64(const Shape&)>& shape_size_fn);
-
-  ShapedBuffer(const Shape& shape,
+  // Construct a ScopedShapedBuffer with null DeviceMemoryBases at each
+  // index. The shape of the data on the host and the device may differ because
+  // the device may have a different representation for different data
+  // types. Therefore, both the on-host and on-device shape are required. The
+  // on-device shape determines the number of device allocations
+  // (DeviceMemoryBase) held by the ShapedBuffer.
+  ShapedBuffer(const Shape& on_host_shape, const Shape& on_device_shape,
                const perftools::gputools::Platform* platform,
                int device_ordinal);
 
-  const Shape& shape() const { return shape_; }
+  // Returns the shape of the on-host representation of the data held by this
+  // ShapedBuffer.
+  const Shape& on_host_shape() const { return on_host_shape_; }
+
+  // Returns the shape of the on-device representation of the data held by this
+  // ShapedBuffer.
+  const Shape& on_device_shape() const { return on_device_shape_; }
+
   const perftools::gputools::Platform* platform() const { return platform_; }
   int device_ordinal() const { return device_ordinal_; }
 
+  // Return the root buffer of the shape (shape index {}).
+  const perftools::gputools::DeviceMemoryBase& root_buffer() const {
+    return buffer(/*index=*/{});
+  }
+
   // Returns the buffer at the given shape index where index is defined as in
   // ShapeUtil::GetSubshape.
   const perftools::gputools::DeviceMemoryBase& buffer(
-      const ShapeIndex& index) const;
-  perftools::gputools::DeviceMemoryBase* mutable_buffer(
-      const ShapeIndex& index);
-
-  // Returns the underlying structure which stores the buffer pointers.
-  const std::vector<perftools::gputools::DeviceMemoryBase>& buffers() const {
-    return buffers_;
+      const ShapeIndex& index) const {
+    return buffers_.element(index);
   }
-  std::vector<perftools::gputools::DeviceMemoryBase>* mutable_buffers() {
-    return &buffers_;
+
+  // Sets the device memory buffer at the given index.
+  void set_buffer(const perftools::gputools::DeviceMemoryBase& buffer,
+                  const ShapeIndex& index) {
+    *buffers_.mutable_element(index) = buffer;
   }
 
-  // Returns the tree of indices which map to buffer pointers.
-  const ShapeTree<size_t>& shape_index_to_buffer_entry() const {
-    return shape_index_to_buffer_entry_;
+  // Returns the underlying ShapeTree containing all the device addresses in the
+  // ShapedBuffer.
+  const ShapeTree<perftools::gputools::DeviceMemoryBase>& buffers() const {
+    return buffers_;
   }
-  ShapeTree<size_t>* mutable_shape_index_to_buffer_entry() {
-    return &shape_index_to_buffer_entry_;
+  ShapeTree<perftools::gputools::DeviceMemoryBase>& buffers() {
+    return buffers_;
   }
 
   // Set all device memory pointers in the object to null.
   void clear();
 
-  // Adds a new buffer at the given shape index.
-  void AddBufferAtIndex(const perftools::gputools::DeviceMemoryBase& buffer,
-                        const ShapeIndex& shape_index);
-
   string ToString() const;
 
  protected:
-  // The shape of the device buffer with layout.
-  const Shape shape_;
+  // The shape of the data when represented on the host.
+  const Shape on_host_shape_;
+
+  // The shape of the data on the device.
+  const Shape on_device_shape_;
 
   // The platform the memory is allocated on.
   const perftools::gputools::Platform* platform_;
@@ -101,14 +100,8 @@ class ShapedBuffer {
   // The device the memory is allocated on.
   const int device_ordinal_;
 
-  // The list of DeviceMemoryBase pointers representing this shape.
-  // Note that there can be a many to one relationship between tuple elements
-  // and buffers.  To account for this, shape_index_to_buffer_entry_ allows us
-  // to make from a position in a shape to an index into this list.
-  std::vector<perftools::gputools::DeviceMemoryBase> buffers_;
-
-  // The tree of indices into buffers_.
-  ShapeTree<size_t> shape_index_to_buffer_entry_;
+  // The tree of device buffers. Its shape is on_device_shape().
+  ShapeTree<perftools::gputools::DeviceMemoryBase> buffers_;
 };
 
 std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer);
@@ -118,17 +111,16 @@ std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer);
 // destructed.
 class ScopedShapedBuffer : public ShapedBuffer {
  public:
-  // Identical to ShapedBuffer::Allocate.
-  static StatusOr<std::unique_ptr<ScopedShapedBuffer>> Allocate(
-      const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
-      const std::function<int64(const Shape&)>& shape_size_fn);
-
   // Takes a ShapedBuffer and returns a ScopedShapedBuffer which manages the
   // deallocation of the device memory held in the shaped buffer. All device
   // memory pointers in the given ShapedBuffer are set to null.
   static StatusOr<std::unique_ptr<ScopedShapedBuffer>> MakeScoped(
       ShapedBuffer* shaped_buffer, DeviceMemoryAllocator* allocator);
 
+  // Create a ScopedShapedBuffer with null DeviceMemoryBases at each index.
+  ScopedShapedBuffer(const Shape& on_host_shape, const Shape& on_device_shape,
+                     DeviceMemoryAllocator* allocator, int device_ordinal);
+
   // Return the allocator used to allocate the device memory held in this
   // ScopedShapedBuffer.
   DeviceMemoryAllocator* memory_allocator() const { return allocator_; }
@@ -143,8 +135,6 @@ class ScopedShapedBuffer : public ShapedBuffer {
   virtual ~ScopedShapedBuffer();
 
  protected:
-  ScopedShapedBuffer(const Shape& shape, DeviceMemoryAllocator* allocator,
-                     int device_ordinal);
   ScopedShapedBuffer(const ScopedShapedBuffer&) = delete;
   void operator=(const ScopedShapedBuffer&) = delete;
 
diff --git a/tensorflow/compiler/xla/service/transfer_manager.cc b/tensorflow/compiler/xla/service/transfer_manager.cc
index d5f53ad56f..2f36e2b16e 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/transfer_manager.cc
@@ -40,6 +40,45 @@ TransferManager::GetPlatformTransferManagers() {
   return r;
 }
 
+Status TransferManager::TransferArrayToDevice(
+    perftools::gputools::StreamExecutor* executor, const Literal& literal,
+    const perftools::gputools::DeviceMemoryBase& dest) {
+  const Shape on_device_shape = HostShapeToDeviceShape(literal.shape());
+  TF_RET_CHECK(ShapeUtil::IsArray(on_device_shape))
+      << "On-device representation of "
+      << ShapeUtil::HumanString(literal.shape())
+      << " is not an array: " << ShapeUtil::HumanString(on_device_shape);
+  if (dest.size() < GetByteSizeRequirement(on_device_shape)) {
+    return FailedPrecondition(
+        "Allocation on device not large enough for array: "
+        "%lld < %lld",
+        dest.size(), GetByteSizeRequirement(on_device_shape));
+  }
+  ShapedBuffer shaped_buffer(/*on_host_shape=*/literal.shape(), on_device_shape,
+                             executor->platform(), executor->device_ordinal());
+  shaped_buffer.set_buffer(dest, /*index=*/{});
+  return TransferLiteralToDevice(executor, literal, shaped_buffer);
+}
+
+StatusOr<std::unique_ptr<Literal>> TransferManager::TransferArrayFromDevice(
+    perftools::gputools::StreamExecutor* executor, const Shape& shape,
+    const perftools::gputools::DeviceMemoryBase& source) {
+  TF_RET_CHECK(ShapeUtil::Equal(HostShapeToDeviceShape(shape), shape))
+      << "Shape " << ShapeUtil::HumanString(shape)
+      << " has a differently shaped representation on-device: "
+      << ShapeUtil::HumanString(HostShapeToDeviceShape(shape));
+  if (source.size() < GetByteSizeRequirement(shape)) {
+    return FailedPrecondition(
+        "Allocation on device not large enough for array: "
+        "%lld < %lld",
+        source.size(), GetByteSizeRequirement(shape));
+  }
+  ShapedBuffer shaped_buffer(/*on_host_shape=*/shape, shape,
+                             executor->platform(), executor->device_ordinal());
+  shaped_buffer.set_buffer(source, /*index=*/{});
+  return TransferLiteralFromDevice(executor, shaped_buffer);
+}
+
 /* static */ void TransferManager::RegisterTransferManager(
     se::Platform::Id platform_id,
     TransferManagerCreationFunction creation_function) {
@@ -75,14 +114,12 @@ TransferManager::GetPlatformTransferManagers() {
 Status TransferManager::WriteTupleIndexTables(
     perftools::gputools::StreamExecutor* executor,
     const ShapedBuffer& device_buffer) {
-  VLOG(2) << "Writing tuple index tables to ShapedBuffer rooted at "
-          << device_buffer.buffer(/*index=*/{}).opaque()
-          << "; shape: " << ShapeUtil::HumanString(device_buffer.shape());
+  VLOG(2) << "Writing tuple index tables for " << device_buffer;
 
   TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal());
 
   return ShapeUtil::ForEachSubshapeWithStatus(
-      device_buffer.shape(),
+      device_buffer.on_device_shape(),
       [&](const Shape& device_subshape, const ShapeIndex& index) -> Status {
         if (ShapeUtil::IsTuple(device_subshape)) {
           se::DeviceMemoryBase device_memory = device_buffer.buffer(index);
@@ -97,7 +134,7 @@ Status TransferManager::WriteTupleIndexTables(
             elements.push_back(device_buffer.buffer(element_index));
             element_index.pop_back();
           }
-          return WriteTuplePointersToDevice(executor, elements, device_subshape,
+          return WriteSingleTupleIndexTable(executor, elements, device_subshape,
                                             &device_memory);
         }
 
@@ -143,31 +180,43 @@ Status TransferManager::TransferBufferToDevice(
   return Status::OK();
 }
 
-StatusOr<std::set<se::DeviceMemoryBase>>
-TransferManager::GatherBufferPointersFromTuple(
-    se::StreamExecutor* executor, const se::DeviceMemoryBase& source,
-    const Shape& shape) {
-  TF_RET_CHECK(ShapeUtil::IsTuple(shape));
-
-  std::set<se::DeviceMemoryBase> buffer_pointers;
-  buffer_pointers.insert(source);
-
-  TF_ASSIGN_OR_RETURN(std::vector<se::DeviceMemoryBase> tuple_elements,
-                      ShallowCopyTupleFromDevice(executor, source, shape));
-  for (auto i = 0; i < tuple_elements.size(); ++i) {
-    const Shape& element_shape = shape.tuple_shapes(i);
-    if (ShapeUtil::IsTuple(element_shape)) {
-      TF_ASSIGN_OR_RETURN(
-          std::set<se::DeviceMemoryBase> buffer_pointers_in_element,
-          GatherBufferPointersFromTuple(executor, tuple_elements[i],
-                                        element_shape));
-      buffer_pointers.insert(buffer_pointers_in_element.begin(),
-                             buffer_pointers_in_element.end());
-    } else {
-      buffer_pointers.insert(tuple_elements[i]);
-    }
+StatusOr<std::unique_ptr<ShapedBuffer>> TransferManager::AllocateShapedBuffer(
+    const Shape& on_host_shape, DeviceMemoryAllocator* allocator,
+    int device_ordinal) {
+  if (!LayoutUtil::HasLayout(on_host_shape)) {
+    return InvalidArgument(
+        "Shape must have a layout: %s",
+        ShapeUtil::HumanStringWithLayout(on_host_shape).c_str());
+  }
+  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(on_host_shape));
+  const Shape on_device_shape = HostShapeToDeviceShape(on_host_shape);
+  TF_RET_CHECK(LayoutUtil::HasLayout(on_device_shape));
+
+  auto shaped_buffer = WrapUnique(new ShapedBuffer(
+      on_host_shape, on_device_shape, allocator->platform(), device_ordinal));
+
+  // Allocate an appropriate sized buffer for each element in the shape
+  // including the tuple pointer arrays.
+  for (auto& pair : shaped_buffer->buffers()) {
+    const ShapeIndex& index = pair.first;
+    se::DeviceMemoryBase& memory_base = pair.second;
+    const Shape& subshape = ShapeUtil::GetSubshape(on_device_shape, index);
+    TF_ASSIGN_OR_RETURN(memory_base,
+                        allocator->Allocate(shaped_buffer->device_ordinal(),
+                                            GetByteSizeRequirement(subshape)));
   }
-  return std::move(buffer_pointers);
+
+  return std::move(shaped_buffer);
+}
+
+StatusOr<std::unique_ptr<ScopedShapedBuffer>>
+TransferManager::AllocateScopedShapedBuffer(const Shape& on_host_shape,
+                                            DeviceMemoryAllocator* allocator,
+                                            int device_ordinal) {
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> unscoped_buffer,
+      AllocateShapedBuffer(on_host_shape, allocator, device_ordinal));
+  return ScopedShapedBuffer::MakeScoped(unscoped_buffer.get(), allocator);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h
index be9b769ac8..9f2b5c4aec 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.h
+++ b/tensorflow/compiler/xla/service/transfer_manager.h
@@ -44,24 +44,6 @@ class TransferManager {
   // Returns the ID of the platform that this transfer manager acts on.
   virtual perftools::gputools::Platform::Id PlatformId() const = 0;
 
-  // Transfers the region into the provided literal using the provided
-  // executor. device_shape is the shape, including layout, of the data on the
-  // device, while literal_shape will be the shape for the literal. device_shape
-  // and literal_shape must be compatible, but need not have the same layout.
-  // TODO(b/66694934): Remove TransferLiteral* methods which accept bare
-  // DeviceMemoryBase.
-  virtual Status TransferLiteralFromDevice(
-      perftools::gputools::StreamExecutor* executor,
-      const perftools::gputools::DeviceMemoryBase& region,
-      const Shape& device_shape, const Shape& literal_shape,
-      Literal* literal) = 0;
-
-  // Transfers the given literal into the provided region output parameter,
-  // using the given executor.
-  virtual Status TransferLiteralToDevice(
-      perftools::gputools::StreamExecutor* executor, const Literal& literal,
-      perftools::gputools::DeviceMemoryBase* region) = 0;
-
   // Returns the shape of the on-device representation for the given shape on
   // the host. This is intended for use with ShapedBuffer where buffers are
   // pre-allocated by the host, e.g. TransferLiteralToDevice, without the user
@@ -70,37 +52,39 @@ class TransferManager {
     return host_shape;
   }
 
-  // Transfers the data held in the given ShapedBuffer into the provided literal
-  // using the provided executor. literal_shape will be the shape for the
-  // literal. The shape of the ShapedBuffer and DeviceShape(literal_shape) must
-  // be compatible, but need not have the same layout.
+  // Returns a literal containing the data held in the given ShapedBuffer.
+  // using the provided executor. The optional literal_shape will be the shape
+  // for the literal. The shape of the ShapedBuffer and
+  // DeviceShape(literal_shape) must be compatible, but need not have the same
+  // layout.
   virtual StatusOr<std::unique_ptr<Literal>> TransferLiteralFromDevice(
       perftools::gputools::StreamExecutor* executor,
       const ShapedBuffer& device_buffer) = 0;
 
   // Transfers the given literal into the previously allocated device memory
-  // represented by the given ShapedBuffer using the given executor.
+  // represented by the given ShapedBuffer using the given executor. The shape
+  // of the ShapedBuffer and DeviceShape(literal.shape()) must be compatible,
+  // but need not have the same layout
   virtual Status TransferLiteralToDevice(
       perftools::gputools::StreamExecutor* executor, const Literal& literal,
       const ShapedBuffer& device_buffer) = 0;
 
+  // Convenience methods for transferring an array to or from the device at a
+  // known address. This avoids having to construct a ShapedBuffer just to
+  // transfer an array at a known address.
+  Status TransferArrayToDevice(
+      perftools::gputools::StreamExecutor* executor, const Literal& literal,
+      const perftools::gputools::DeviceMemoryBase& dest);
+  StatusOr<std::unique_ptr<Literal>> TransferArrayFromDevice(
+      perftools::gputools::StreamExecutor* executor, const Shape& shape,
+      const perftools::gputools::DeviceMemoryBase& source);
+
   // Transfers the given literal into the Infeed interface of the device,
   // using the given executor.
   virtual Status TransferLiteralToInfeed(
       perftools::gputools::StreamExecutor* executor,
       const Literal& literal) = 0;
 
-  // Transfer a memory block of the given size from 'source' buffer to the
-  // Infeed interface of the device using the given executor.
-  //
-  // size is the size to transfer from source in bytes.
-  //
-  // source is the source data that must be in the target-dependent layout that
-  // the Infeed HLO used in the computation expects.
-  virtual Status TransferBufferToInfeed(
-      perftools::gputools::StreamExecutor* executor, int64 size,
-      const void* source) = 0;
-
   // Transfers the given literal from the Outfeed interface of the device,
   // using the given executor.
   virtual Status TransferLiteralFromOutfeed(
@@ -112,37 +96,26 @@ class TransferManager {
       tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*>
           executor) = 0;
 
-  // Shallow copy a tuple from the device and create a DeviceMemoryBase object
-  // for each element in the tuple. A DeviceMemoryBase object refers to the
-  // buffer containing the data of that element. The DeviceMemoryBase objects
-  // are returned as a vector.
-  virtual StatusOr<std::vector<perftools::gputools::DeviceMemoryBase>>
-  ShallowCopyTupleFromDevice(
-      perftools::gputools::StreamExecutor* executor,
-      const perftools::gputools::DeviceMemoryBase& source,
-      const Shape& shape) = 0;
-
   // Given an allocated ShapedBuffer, constructs the tuple index table(s) in
   // each buffer of the given ShapedBuffer corresponding to tuple shapes. If the
   // ShapedBuffer is array-shaped this method does nothing.
   Status WriteTupleIndexTables(perftools::gputools::StreamExecutor* executor,
                                const ShapedBuffer& device_buffer);
 
-  // Returns all buffer pointers that the tuple `source` refers to. Unlike
-  // ShallowCopyTupleFromDevice, this function gather buffer pointers in nested
-  // tuples as well. Also, the returned DeviceMemoryBase objects are
-  // deduplicated.
-  StatusOr<std::set<perftools::gputools::DeviceMemoryBase>>
-  GatherBufferPointersFromTuple(
-      perftools::gputools::StreamExecutor* executor,
-      const perftools::gputools::DeviceMemoryBase& source, const Shape& shape);
-
   // Determines the byte size requirement for the given shape on the underlying
   // architecture. This will be used to allocate an appropriately sized memory
   // region for a host-to-device transfer.
   virtual int64 GetByteSizeRequirement(const Shape& shape) const = 0;
 
-  typedef std::unique_ptr<TransferManager> (*TransferManagerCreationFunction)();
+  // Allocate a ShapedBuffer which can hold data with the given on-host
+  // shape. The on-device shape may be different as indicated by
+  // HostShapeToDeviceShape.
+  StatusOr<std::unique_ptr<ShapedBuffer>> AllocateShapedBuffer(
+      const Shape& on_host_shape, DeviceMemoryAllocator* allocator,
+      int device_ordinal);
+  StatusOr<std::unique_ptr<ScopedShapedBuffer>> AllocateScopedShapedBuffer(
+      const Shape& on_host_shape, DeviceMemoryAllocator* allocator,
+      int device_ordinal);
 
   /////
   // The TransferManager class also serves as a point to register objects for
@@ -152,6 +125,7 @@ class TransferManager {
   // assumed to be a singleton, so no ownership is transferred.
   //
   // Precondition: a platform kind must not be registered more than once.
+  typedef std::unique_ptr<TransferManager> (*TransferManagerCreationFunction)();
   static void RegisterTransferManager(
       perftools::gputools::Platform::Id platform_id,
       TransferManagerCreationFunction transfer_manager);
@@ -162,6 +136,17 @@ class TransferManager {
       const perftools::gputools::Platform* platform);
 
  protected:
+  // Transfer a memory block of the given size from 'source' buffer to the
+  // Infeed interface of the device using the given executor.
+  //
+  // size is the size to transfer from source in bytes.
+  //
+  // source is the source data that must be in the target-dependent layout that
+  // the Infeed HLO used in the computation expects.
+  virtual Status TransferBufferToInfeed(
+      perftools::gputools::StreamExecutor* executor, int64 size,
+      const void* source) = 0;
+
   // Transfer a memory block of the given size from the device source into the
   // 'destination' buffer.
   //
@@ -180,10 +165,9 @@ class TransferManager {
       const void* source, perftools::gputools::DeviceMemoryBase* destination);
 
   // Writes the given device-memory pointers in 'elements' to the given region
-  // to construct a tuple in the platform-specific tuple representation. This
-  // can handle nested tuples as well. In the nested case, the element
-  // DeviceMemoryBase points to another array of pointers on the device.
-  virtual Status WriteTuplePointersToDevice(
+  // to construct a tuple index table in the platform-specific tuple
+  // representation.
+  virtual Status WriteSingleTupleIndexTable(
       perftools::gputools::StreamExecutor* executor,
       tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
           elements,
diff --git a/tensorflow/compiler/xla/tests/copy_test.cc b/tensorflow/compiler/xla/tests/copy_test.cc
index bcb85b04ee..d64bf0aa5b 100644
--- a/tensorflow/compiler/xla/tests/copy_test.cc
+++ b/tensorflow/compiler/xla/tests/copy_test.cc
@@ -56,9 +56,13 @@ class CopyOpTest : public HloTestBase {
                                 tensorflow::gtl::ArraySlice<int64> permutation);
 };
 
-XLA_TEST_F(CopyOpTest, CopyR0Bool) { TestCopyOp(*Literal::CreateR0<bool>(true)); }
+XLA_TEST_F(CopyOpTest, CopyR0Bool) {
+  TestCopyOp(*Literal::CreateR0<bool>(true));
+}
 
-XLA_TEST_F(CopyOpTest, CopyR1S0U32) { TestCopyOp(*Literal::CreateR1<uint32>({})); }
+XLA_TEST_F(CopyOpTest, CopyR1S0U32) {
+  TestCopyOp(*Literal::CreateR1<uint32>({}));
+}
 
 XLA_TEST_F(CopyOpTest, CopyR1S3U32) {
   TestCopyOp(*Literal::CreateR1<uint32>({1, 2, 3}));
@@ -85,7 +89,6 @@ XLA_TEST_F(CopyOpTest, CopyParameterScalar) {
   // Copy literal to device to use as parameter.
   auto literal = Literal::CreateR0<float>(42.0);
   Shape shape = literal->shape();
-  auto constant_device_base = TransferToDevice(*literal);
 
   auto param0 = builder.AddInstruction(
       HloInstruction::CreateParameter(0, shape, "param0"));
@@ -98,7 +101,7 @@ XLA_TEST_F(CopyOpTest, CopyParameterScalar) {
   module->AddEntryComputation(std::move(computation));
 
   std::unique_ptr<Literal> result =
-      ExecuteAndTransfer(std::move(module), {constant_device_base});
+      ExecuteAndTransfer(std::move(module), {literal.get()});
   LiteralTestUtil::ExpectR0Near<float>(42.0f, *result, error_spec_);
 }
 
diff --git a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc
index 8baaf39e3c..59be32a8ff 100644
--- a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc
+++ b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc
@@ -559,20 +559,20 @@ void BM_DynamicSlice(int num_iters) {
   auto computation = builder.Build().ConsumeValueOrDie();
 
   // Initialize and transfer parameter buffer.
-  auto shape_size_fn = [client](const Shape& shape) {
-    return client->backend().transfer_manager()->GetByteSizeRequirement(shape);
-  };
-  auto buffer = ScopedShapedBuffer::Allocate(start_indices_shape, &allocator, 0,
-                                             shape_size_fn)
+  auto buffer = client->backend()
+                    .transfer_manager()
+                    ->AllocateScopedShapedBuffer(
+                        start_indices_shape, &allocator, /*device_ordinal=*/0)
                     .ConsumeValueOrDie();
 
   auto start_indices_literal = Literal::CreateR1<int32>({0, 1, 2, 3});
   ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice(
-      executors[device_ordinal], *start_indices_literal,
-      buffer->mutable_buffer({})));
+      executors[device_ordinal], *start_indices_literal, *buffer));
 
   std::unique_ptr<LocalExecutable> executable =
-      client->Compile(computation, {&buffer->shape()}, ExecutableBuildOptions())
+      client
+          ->Compile(computation, {&buffer->on_host_shape()},
+                    ExecutableBuildOptions())
           .ConsumeValueOrDie();
 
   // Run some warm-up executions.
diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc
index 2686afccc2..a292eab1d1 100644
--- a/tensorflow/compiler/xla/tests/fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/fusion_test.cc
@@ -816,7 +816,8 @@ void BM_ParallelFusion(int num_iters) {
   std::unique_ptr<LocalExecutable> executable =
       client
           ->Compile(computation,
-                    {&buffer0->shape(), &buffer1->shape(), &buffer2->shape()},
+                    {&buffer0->on_host_shape(), &buffer1->on_host_shape(),
+                     &buffer2->on_host_shape()},
                     ExecutableBuildOptions())
           .ConsumeValueOrDie();
 
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc
index f9458f5b74..a27e0f2c10 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc
@@ -111,28 +111,16 @@ std::unique_ptr<HloModule> HloTestBase::CreateNewModule() {
   return debug_options;
 }
 
-StatusOr<perftools::gputools::DeviceMemoryBase> HloTestBase::Execute(
+StatusOr<std::unique_ptr<Literal>> HloTestBase::Execute(
     std::unique_ptr<HloModule> module,
-    tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-        arguments,
-    Shape* result_shape) {
-  return test_runner_.Execute(std::move(module), arguments, result_shape);
-}
-
-se::DeviceMemoryBase HloTestBase::TransferToDevice(const Literal& literal) {
-  return test_runner_.TransferToDevice(literal).ValueOrDie();
-}
-
-std::unique_ptr<Literal> HloTestBase::TransferFromDevice(
-    const Shape& shape, se::DeviceMemoryBase device_base) {
-  return test_runner_.TransferFromDevice(shape, device_base).ValueOrDie();
+    tensorflow::gtl::ArraySlice<Literal*> arguments) {
+  return test_runner_.Execute(std::move(module), arguments);
 }
 
 std::unique_ptr<Literal> HloTestBase::ExecuteAndTransfer(
     std::unique_ptr<HloModule> module,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
-  return test_runner_.ExecuteAndTransfer(std::move(module), arguments)
-      .ValueOrDie();
+    tensorflow::gtl::ArraySlice<Literal*> arguments) {
+  return test_runner_.Execute(std::move(module), arguments).ValueOrDie();
 }
 
 StatusOr<std::unique_ptr<HloModule>> HloTestBase::MakeReferenceModule(
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h
index 2c5ce04402..4aea9fc9fd 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.h
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.h
@@ -93,27 +93,14 @@ class HloTestBase : public ::testing::Test {
   // DebugOptions, e.g. when creating a module from a string or a file.
   static DebugOptions GetDebugOptionsForTest();
 
-  // Executes the given module and returns a global data handle.
-  StatusOr<perftools::gputools::DeviceMemoryBase> Execute(
+  // Executes the given module and return the result as a Literal.
+  StatusOr<std::unique_ptr<Literal>> Execute(
       std::unique_ptr<HloModule> module,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      Shape* result_shape);
-
-  // Transfers the given literal to the device and returns the data handle.
-  perftools::gputools::DeviceMemoryBase TransferToDevice(
-      const Literal& literal);
+      tensorflow::gtl::ArraySlice<Literal*> arguments);
 
-  // Transfers the array referred to by the given handle from the device and
-  // returns as a Literal.
-  std::unique_ptr<Literal> TransferFromDevice(
-      const Shape& shape, perftools::gputools::DeviceMemoryBase device_base);
-
-  // Executes the given module and return the result as a Literal.
   std::unique_ptr<Literal> ExecuteAndTransfer(
       std::unique_ptr<HloModule> module,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments);
+      tensorflow::gtl::ArraySlice<Literal*> arguments);
 
   // Executes the given hlo module on two backends and compares results.
   //
diff --git a/tensorflow/compiler/xla/tests/local_client_execute_test.cc b/tensorflow/compiler/xla/tests/local_client_execute_test.cc
index ad71d40197..e3298e98c6 100644
--- a/tensorflow/compiler/xla/tests/local_client_execute_test.cc
+++ b/tensorflow/compiler/xla/tests/local_client_execute_test.cc
@@ -138,13 +138,13 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentInputLayouts) {
   // Create x as a col-major array.
   auto x_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout(
       {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({0, 1})));
-  EXPECT_TRUE(LayoutUtil::Equal(x_array->shape().layout(),
+  EXPECT_TRUE(LayoutUtil::Equal(x_array->on_device_shape().layout(),
                                 LayoutUtil::MakeLayout({0, 1})));
 
   // Create y as a row-major array.
   auto y_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout(
       {{10.0f, 20.0f}, {30.0f, 40.0f}}, LayoutUtil::MakeLayout({1, 0})));
-  EXPECT_TRUE(LayoutUtil::Equal(y_array->shape().layout(),
+  EXPECT_TRUE(LayoutUtil::Equal(y_array->on_device_shape().layout(),
                                 LayoutUtil::MakeLayout({1, 0})));
 
   std::unique_ptr<ScopedShapedBuffer> result_colmaj =
@@ -179,7 +179,7 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentOutputLayouts) {
       DefaultExecutableBuildOptions().set_result_layout(
           ShapeUtil::MakeShapeWithLayout(F32, /*dimensions=*/{2, 2}, {0, 1})),
       DefaultExecutableRunOptions());
-  EXPECT_TRUE(LayoutUtil::Equal(result_colmaj->shape().layout(),
+  EXPECT_TRUE(LayoutUtil::Equal(result_colmaj->on_device_shape().layout(),
                                 LayoutUtil::MakeLayout({0, 1})));
   LiteralTestUtil::ExpectR2Near<float>({{11.0f, 22.0f}, {33.0f, 44.0f}},
                                        *ShapedBufferToLiteral(*result_colmaj),
@@ -191,7 +191,7 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentOutputLayouts) {
       DefaultExecutableBuildOptions().set_result_layout(
           ShapeUtil::MakeShapeWithLayout(F32, /*dimensions=*/{2, 2}, {1, 0})),
       DefaultExecutableRunOptions());
-  EXPECT_TRUE(LayoutUtil::Equal(result_rowmaj->shape().layout(),
+  EXPECT_TRUE(LayoutUtil::Equal(result_rowmaj->on_device_shape().layout(),
                                 LayoutUtil::MakeLayout({1, 0})));
   LiteralTestUtil::ExpectR2Near<float>({{11.0f, 22.0f}, {33.0f, 44.0f}},
                                        *ShapedBufferToLiteral(*result_rowmaj),
@@ -213,8 +213,8 @@ XLA_TEST_F(LocalClientExecuteTest, TupleResult) {
   std::unique_ptr<ScopedShapedBuffer> result =
       ExecuteLocallyOrDie(computation, {x_array.get(), y_array.get()});
 
-  EXPECT_TRUE(ShapeUtil::IsTuple(result->shape()));
-  EXPECT_EQ(3, ShapeUtil::TupleElementCount(result->shape()));
+  EXPECT_TRUE(ShapeUtil::IsTuple(result->on_host_shape()));
+  EXPECT_EQ(3, ShapeUtil::TupleElementCount(result->on_host_shape()));
 
   std::unique_ptr<Literal> result_literal = ShapedBufferToLiteral(*result);
   LiteralTestUtil::ExpectR2Equal<float>({{1.0f, 2.0f}, {3.0f, 4.0f}},
@@ -241,8 +241,8 @@ XLA_TEST_F(LocalClientExecuteTest, NestedTupleResult) {
   std::unique_ptr<ScopedShapedBuffer> result =
       ExecuteLocallyOrDie(computation, {x_array.get(), y_array.get()});
 
-  EXPECT_TRUE(ShapeUtil::IsTuple(result->shape()));
-  EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->shape()));
+  EXPECT_TRUE(ShapeUtil::IsTuple(result->on_host_shape()));
+  EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->on_host_shape()));
 
   std::unique_ptr<Literal> result_literal = ShapedBufferToLiteral(*result);
   LiteralTestUtil::ExpectR2Equal<float>({{1.0f, 2.0f}, {3.0f, 4.0f}},
@@ -320,8 +320,8 @@ XLA_TEST_F(LocalClientExecuteTest, TupleArguments) {
   std::unique_ptr<ScopedShapedBuffer> result =
       ExecuteLocallyOrDie(computation, {x_buffer.get(), y_buffer.get()});
 
-  EXPECT_TRUE(ShapeUtil::IsTuple(result->shape()));
-  EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->shape()));
+  EXPECT_TRUE(ShapeUtil::IsTuple(result->on_host_shape()));
+  EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->on_host_shape()));
 
   std::unique_ptr<Literal> result_literal = ShapedBufferToLiteral(*result);
   LiteralTestUtil::ExpectR2Equal<float>({{56.0f, 46.0f}, {36.0f, 26.0f}},
@@ -906,20 +906,18 @@ void BM_LocalClientOverhead(int num_iters) {
   builder.Add(x, x);
   auto computation = builder.Build().ConsumeValueOrDie();
 
-  auto shape_size_fn = [client](const Shape& shape) {
-    return client->backend().transfer_manager()->GetByteSizeRequirement(shape);
-  };
-  auto buffer = ScopedShapedBuffer::Allocate(
-                    shape, &allocator, /*device_ordinal=*/0, shape_size_fn)
-                    .ConsumeValueOrDie();
+  auto buffer =
+      transfer_manager
+          ->AllocateScopedShapedBuffer(shape, &allocator, /*device_ordinal=*/0)
+          .ConsumeValueOrDie();
   auto literal = Literal::CreateR2<float>({{0, 0, 0}, {0, 0, 0}});
   ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice(
-      executors[device_ordinal], *literal, buffer->mutable_buffer({})));
+      executors[device_ordinal], *literal, *buffer));
 
   const int kWarmups = 2;
 
-  auto executable_status = client->Compile(computation, {&buffer->shape()},
-                                           ExecutableBuildOptions());
+  auto executable_status = client->Compile(
+      computation, {&buffer->on_host_shape()}, ExecutableBuildOptions());
   ASSERT_IS_OK(executable_status);
   std::unique_ptr<LocalExecutable> executable =
       executable_status.ConsumeValueOrDie();
diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc
index 062a9246e4..96b976d25d 100644
--- a/tensorflow/compiler/xla/tests/local_client_test_base.cc
+++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc
@@ -188,7 +188,7 @@ LocalClientTestBase::ExecuteLocally(
     const ExecutableRunOptions& run_options) {
   std::vector<const Shape*> argument_layouts(arguments.size());
   for (int i = 0; i < arguments.size(); ++i) {
-    argument_layouts[i] = &arguments[i]->shape();
+    argument_layouts[i] = &arguments[i]->on_host_shape();
   }
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<LocalExecutable> executable,
diff --git a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
index 89fa6ed9f7..62d24a11fd 100644
--- a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
@@ -99,14 +99,13 @@ class MultiOutputFusionTest : public HloTestBase {
           nullptr);
     }
 
-    Literal input;
-    input.PopulateWithValue<float>(2.5f, {size, size});
-    auto p1 = TransferToDevice(input);
-    auto p0 = TransferToDevice(*Literal::CreateR0<float>(-9.0f));
+    Literal arg1;
+    arg1.PopulateWithValue<float>(2.5f, {size, size});
 
     Literal expect;
     expect.PopulateWithValue<float>(size * 1.5f * 3.5f, {size, size});
-    auto actual = ExecuteAndTransfer(std::move(hlo_module), {p0, p1});
+    auto actual = ExecuteAndTransfer(
+        std::move(hlo_module), {Literal::CreateR0<float>(-9.0f).get(), &arg1});
     LiteralTestUtil::ExpectNear(expect, *actual, error_spec_);
   }
 
@@ -163,11 +162,9 @@ class MultiOutputFusionTest : public HloTestBase {
     Literal input0, input1;
     input0.PopulateWithValue<float>(2.5f, {size});
     input1.PopulateWithValue<double>(1, {size});
-    auto p0 = TransferToDevice(input0);
-    auto p1 = TransferToDevice(input1);
 
     Literal expect = *Literal::CreateR1<float>({size * 1.5f * 3.5f});
-    auto actual = ExecuteAndTransfer(std::move(hlo_module), {p0, p1});
+    auto actual = ExecuteAndTransfer(std::move(hlo_module), {&input0, &input1});
     LiteralTestUtil::ExpectNear(expect, *actual, error_spec_);
   }
 };
diff --git a/tensorflow/compiler/xla/tests/transfer_manager_test.cc b/tensorflow/compiler/xla/tests/transfer_manager_test.cc
index f2a6474948..ed556fafb1 100644
--- a/tensorflow/compiler/xla/tests/transfer_manager_test.cc
+++ b/tensorflow/compiler/xla/tests/transfer_manager_test.cc
@@ -46,9 +46,10 @@ class TransferManagerTest : public LocalClientTestBase {
   ~TransferManagerTest() override = default;
 
   std::unique_ptr<ScopedShapedBuffer> AllocateDeviceBuffer(const Shape& shape) {
-    return ScopedShapedBuffer::Allocate(
-               shape, GetOrCreateAllocator(local_client_->platform()),
-               /*device_ordinal=*/0, shape_size_fn_)
+    return transfer_manager_
+        ->AllocateScopedShapedBuffer(
+            shape, GetOrCreateAllocator(local_client_->platform()),
+            /*device_ordinal=*/0)
         .ValueOrDie();
   }
 
@@ -211,5 +212,39 @@ XLA_TEST_F(TransferManagerTest, TransferNestedTuple) {
   LiteralTestUtil::ExpectEqual(*literal, *result);
 }
 
+XLA_TEST_F(TransferManagerTest, TransferComplexValue) {
+  std::unique_ptr<Literal> literal = Literal::CreateR1<complex64>(
+      {complex64(1.0f, 2.0f), complex64(42.0f, -123.4f)});
+  auto device_buffer = AllocateDeviceBuffer(literal->shape());
+
+  // Round trip literal through device.
+  ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice(
+      stream_executor_, *literal, *device_buffer));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Literal> result,
+                          transfer_manager_->TransferLiteralFromDevice(
+                              stream_executor_, *device_buffer));
+
+  LiteralTestUtil::ExpectEqual(*literal, *result);
+}
+
+XLA_TEST_F(TransferManagerTest, TransferComplexValueInTuple) {
+  std::unique_ptr<Literal> literal = Literal::MakeTuple(
+      {Literal::CreateR1<complex64>(
+           {complex64(1.0f, 2.0f), complex64(42.0f, -123.4f)})
+           .get(),
+       Literal::CreateR1<int32>({1, 2, 3, 4, 5, 6}).get(),
+       Literal::CreateR0<complex64>(complex64(0.3f, -0.4f)).get()});
+  auto device_buffer = AllocateDeviceBuffer(literal->shape());
+
+  // Round trip literal through device.
+  ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice(
+      stream_executor_, *literal, *device_buffer));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Literal> result,
+                          transfer_manager_->TransferLiteralFromDevice(
+                              stream_executor_, *device_buffer));
+
+  LiteralTestUtil::ExpectEqual(*literal, *result);
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 9b46a03d0cad50b3b15280cf389858513c4cecc0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 16 Dec 2017 05:54:26 -0800
Subject: [PATCH 1133/1225] Update ops-related pbtxt files.

PiperOrigin-RevId: 179293176
---
 tensorflow/core/ops/ops.pbtxt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 438c2dc13b..c0ca309a7b 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -6439,12 +6439,12 @@ op {
   name: "DataFormatVecPermute"
   input_arg {
     name: "x"
-    description: "Vector in source data format. Must be of size 4."
+    description: "Vector of size 4 or Tensor of shape (2, 4) in source data format."
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    description: "Vector in destination data format. Must be of size 4."
+    description: "Vector of size 4 or Tensor of shape (2, 4) in destination data format."
     type_attr: "T"
   }
   attr {
@@ -6476,7 +6476,7 @@ op {
     }
     description: "destination data format."
   }
-  summary: "Returns the permuted vector in the destination data format given the one in"
+  summary: "Returns the permuted vector/tensor in the destination data format given the one in"
   description: "the source data format."
 }
 op {
-- 
GitLab


From b22ee3053e6c64d9b7d8187e07d736709e4f7db5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 16 Dec 2017 06:11:24 -0800
Subject: [PATCH 1134/1225] Go: Update generated wrapper functions for
 TensorFlow ops.

PiperOrigin-RevId: 179294028
---
 tensorflow/go/op/wrappers.go | 3955 ++++++++++++++++++++--------------
 1 file changed, 2311 insertions(+), 1644 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 664e37d3a1..7d4671dcb8 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -1484,6 +1484,61 @@ func Slice(scope *Scope, input tf.Output, begin tf.Output, size tf.Output) (outp
 	return op.Output(0)
 }
 
+// UniqueV2Attr is an optional argument to UniqueV2.
+type UniqueV2Attr func(optionalAttr)
+
+// UniqueV2OutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr {
+	return func(m optionalAttr) {
+		m["out_idx"] = value
+	}
+}
+
+// Finds unique elements in a 1-D tensor.
+//
+// This operation returns a tensor `y` containing all of the unique elements of `x`
+// sorted in the same order that they occur in `x`. This operation also returns a
+// tensor `idx` the same size as `x` that contains the index of each value of `x`
+// in the unique output `y`. In other words:
+//
+// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+//
+// For example:
+//
+// ```
+// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+// y, idx = unique(x)
+// y ==> [1, 2, 4, 7, 8]
+// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+// ```
+//
+// Arguments:
+//	x: A `Tensor`.
+//	axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
+// find the unique elements.
+//
+// Returns A `Tensor`. Unique elements along the `axis` of `Tensor` x.A 1-D Tensor. Has the same type as x that contains the index of each
+// value of x in the output y.
+func UniqueV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueV2Attr) (y tf.Output, idx tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UniqueV2",
+		Input: []tf.Input{
+			x, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
 // Shuffle dimensions of x according to a permutation and conjugate the result.
 //
 // The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
@@ -2019,6 +2074,28 @@ func ZerosLike(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
+// Gives a guarantee to the TF runtime that the input tensor is a constant.
+//
+// The runtime is then free to make optimizations based on this.
+//
+// Only accepts value typed tensors as inputs and rejects resource variable handles
+// as input.
+//
+// Returns the input tensor without modification.
+func GuaranteeConst(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "GuaranteeConst",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Splits a tensor into `num_split` tensors along one dimension.
 //
 // Arguments:
@@ -2711,7 +2788,7 @@ func SpaceToDepthDataFormat(value string) SpaceToDepthAttr {
 //   "NHWC": `[ batch, height, width, channels ]`
 //   "NCHW": `[ batch, channels, height, width ]`
 //   "NCHW_VECT_C":
-//       `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
+//       `qint8 [ batch, channels / 4, height, width, 4 ]`
 //
 // It is useful to consider the operation as transforming a 6-D Tensor.
 // e.g. for data_format = NHWC,
@@ -3641,7 +3718,7 @@ func DepthToSpaceDataFormat(value string) DepthToSpaceAttr {
 //   "NHWC": `[ batch, height, width, channels ]`
 //   "NCHW": `[ batch, channels, height, width ]`
 //   "NCHW_VECT_C":
-//       `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
+//       `qint8 [ batch, channels / 4, height, width, 4 ]`
 //
 // It is useful to consider the operation as transforming a 6-D Tensor.
 // e.g. for data_format = NHWC,
@@ -5807,6 +5884,31 @@ func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, out
 	return op.Output(0)
 }
 
+// Creates a Dataset that returns pseudorandom numbers.
+//
+// Arguments:
+//	seed: A scalar seed for the random number generator. If either seed or
+// seed2 is set to be non-zero, the random number generator is seeded
+// by the given seed.  Otherwise, a random seed is used.
+//	seed2: A second scalar seed to avoid seed collision.
+//
+//
+func RandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "RandomDataset",
+		Input: []tf.Input{
+			seed, seed2,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Identity op for gradient debugging.
 //
 // This op is hidden from public in Python. It is used by TensorFlow Debugger to
@@ -8191,48 +8293,85 @@ func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate t
 	return op.Output(0)
 }
 
-// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2.
-type Conv3DBackpropInputV2Attr func(optionalAttr)
+// ImageSummaryAttr is an optional argument to ImageSummary.
+type ImageSummaryAttr func(optionalAttr)
 
-// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value.
+// ImageSummaryMaxImages sets the optional max_images attribute to value.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr {
+// value: Max number of batch elements to generate images for.
+// If not specified, defaults to 3
+//
+// REQUIRES: value >= 1
+func ImageSummaryMaxImages(value int64) ImageSummaryAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["max_images"] = value
 	}
 }
 
-// Computes the gradients of 3-D convolution with respect to the input.
+// ImageSummaryBadColor sets the optional bad_color attribute to value.
+//
+// value: Color to use for pixels with non-finite values.
+// If not specified, defaults to <dtype:DT_UINT8 tensor_shape:<dim:<size:4 > > int_val:255 int_val:0 int_val:0 int_val:255 >
+func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
+	return func(m optionalAttr) {
+		m["bad_color"] = value
+	}
+}
+
+// Outputs a `Summary` protocol buffer with images.
+//
+// The summary has up to `max_images` summary values containing images. The
+// images are built from `tensor` which must be 4-D with shape `[batch_size,
+// height, width, channels]` and where `channels` can be:
+//
+// *  1: `tensor` is interpreted as Grayscale.
+// *  3: `tensor` is interpreted as RGB.
+// *  4: `tensor` is interpreted as RGBA.
+//
+// The images have the same number of channels as the input tensor. For float
+// input, the values are normalized one image at a time to fit in the range
+// `[0, 255]`.  `uint8` values are unchanged.  The op uses two different
+// normalization algorithms:
+//
+// *  If the input values are all positive, they are rescaled so the largest one
+//    is 255.
+//
+// *  If any input value is negative, the values are shifted so input value 0.0
+//    is at 127.  They are then rescaled so that either the smallest value is 0,
+//    or the largest one is 255.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_images` is 1, the summary value tag is '*tag*/image'.
+// *  If `max_images` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
+//
+// The `bad_color` argument is the color to use in the generated images for
+// non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
+// Each element must be in the range `[0, 255]` (It represents the value of a
+// pixel in the output image).  Non-finite values in the input tensor are
+// replaced by this tensor in the output image.  The default value is the color
+// red.
 //
 // Arguments:
-//	input_sizes: An integer vector representing the tensor shape of `input`,
-// where `input` is a 5-D
-// `[batch, depth, rows, cols, in_channels]` tensor.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) {
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 4-D of shape `[batch_size, height, width, channels]` where
+// `channels` is 1, 3, or 4.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropInputV2",
+		Type: "ImageSummary",
 		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
+			tag, tensor,
 		},
 		Attrs: attrs,
 	}
@@ -8240,308 +8379,237 @@ func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output
 	return op.Output(0)
 }
 
-// Returns a tensor of ones with the same shape and type as x.
+// Computes the number of elements in the given queue.
 //
 // Arguments:
-//	x: a tensor of type T.
+//	handle: The handle to a queue.
 //
-// Returns a tensor of the same shape and type as x but filled with ones.
-func OnesLike(scope *Scope, x tf.Output) (y tf.Output) {
+// Returns The number of elements in the given queue.
+func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "OnesLike",
+		Type: "QueueSizeV2",
 		Input: []tf.Input{
-			x,
+			handle,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns element-wise remainder of division. This emulates C semantics in that
+// Outputs a `Summary` protocol buffer with a histogram.
 //
-// the result here is consistent with a truncating divide. E.g.
-// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.
+// The generated
+// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+// has one summary value containing a histogram for `values`.
 //
-// *NOTE*: `Mod` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// This op reports an `InvalidArgument` error if any value is not finite.
+//
+// Arguments:
+//	tag: Scalar.  Tag to use for the `Summary.Value`.
+//	values: Any shape. Values to use to build the histogram.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Mod",
+		Type: "HistogramSummary",
 		Input: []tf.Input{
-			x, y,
+			tag, values,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the gradients of 3-D convolution with respect to the filter.
+// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2.
+type RandomShuffleQueueV2Attr func(optionalAttr)
+
+// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value.
 //
-// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types. If the length of
+// this attr is 0, the shapes of queue elements are not constrained, and
+// only one element may be dequeued at a time.
+// If not specified, defaults to <>
 //
-// Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropFilter",
-		Input: []tf.Input{
-			input, filter, out_backprop,
-		},
-		Attrs: attrs,
+// REQUIRES: len(value) >= 0
+func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shapes"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes the gradients of 3-D convolution with respect to the input.
+// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value.
 //
-// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value.
 //
-// Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropInput",
-		Input: []tf.Input{
-			input, filter, out_backprop,
-		},
-		Attrs: attrs,
+// value: Dequeue will block unless there would be this
+// many elements after the dequeue or the queue is closed. This
+// ensures a minimum level of mixing of elements.
+// If not specified, defaults to 0
+func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["min_after_dequeue"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// ReverseSequenceAttr is an optional argument to ReverseSequence.
-type ReverseSequenceAttr func(optionalAttr)
-
-// ReverseSequenceBatchDim sets the optional batch_dim attribute to value.
+// RandomShuffleQueueV2Seed sets the optional seed attribute to value.
 //
-// value: The dimension along which reversal is performed.
+// value: If either seed or seed2 is set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, a random seed is used.
 // If not specified, defaults to 0
-func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr {
+func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr {
 	return func(m optionalAttr) {
-		m["batch_dim"] = value
+		m["seed"] = value
 	}
 }
 
-// Reverses variable length slices.
-//
-// This op first slices `input` along the dimension `batch_dim`, and for each
-// slice `i`, reverses the first `seq_lengths[i]` elements along
-// the dimension `seq_dim`.
-//
-// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
-// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
-//
-// The output slice `i` along dimension `batch_dim` is then given by input
-// slice `i`, with the first `seq_lengths[i]` slices along dimension
-// `seq_dim` reversed.
-//
-// For example:
-//
-// ```
-// # Given this:
-// batch_dim = 0
-// seq_dim = 1
-// input.dims = (4, 8, ...)
-// seq_lengths = [7, 2, 3, 5]
-//
-// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
-// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
-// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
-// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
-// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
-//
-// # while entries past seq_lens are copied through:
-// output[0, 7:, :, ...] = input[0, 7:, :, ...]
-// output[1, 2:, :, ...] = input[1, 2:, :, ...]
-// output[2, 3:, :, ...] = input[2, 3:, :, ...]
-// output[3, 2:, :, ...] = input[3, 2:, :, ...]
-// ```
-//
-// In contrast, if:
+// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value.
 //
-// ```
-// # Given this:
-// batch_dim = 2
-// seq_dim = 0
-// input.dims = (8, ?, 4, ...)
-// seq_lengths = [7, 2, 3, 5]
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// RandomShuffleQueueV2Container sets the optional container attribute to value.
 //
-// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
-// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
-// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
-// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
-// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value.
 //
-// # while entries past seq_lens are copied through:
-// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
-// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
-// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
-// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
-// ```
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that randomizes the order of elements.
 //
 // Arguments:
-//	input: The input to reverse.
-//	seq_lengths: 1-D with length `input.dims(batch_dim)` and
-// `max(seq_lengths) <= input.dims(seq_dim)`
-//	seq_dim: The dimension which is partially reversed.
+//	component_types: The type of each component in a value.
 //
-// Returns The partially reversed input. It has the same shape as `input`.
-func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) {
+// Returns The handle to the queue.
+func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"seq_dim": seq_dim}
+	attrs := map[string]interface{}{"component_types": component_types}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ReverseSequence",
-		Input: []tf.Input{
-			input, seq_lengths,
-		},
+		Type: "RandomShuffleQueueV2",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the gradient for the rsqrt of `x` wrt its input.
+// Outputs a `Summary` protocol buffer with scalar values.
 //
-// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`
-// is the corresponding input gradient.
-func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RsqrtGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Makes its input available to the next iteration.
+// The input `tags` and `values` must have the same shape.  The generated summary
+// has a summary value for each tag-value pair in `tags` and `values`.
 //
 // Arguments:
-//	data: The tensor to be made available to the next iteration.
+//	tags: Tags for the summary.
+//	values: Same shape as `tags.  Values for the summary.
 //
-// Returns The same tensor as `data`.
-func NextIteration(scope *Scope, data tf.Output) (output tf.Output) {
+// Returns Scalar.  Serialized `Summary` protocol buffer.
+func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "NextIteration",
+		Type: "ScalarSummary",
 		Input: []tf.Input{
-			data,
+			tags, values,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Does nothing. Only useful as a placeholder for control edges.
+// TensorSummaryAttr is an optional argument to TensorSummary.
+type TensorSummaryAttr func(optionalAttr)
+
+// TensorSummaryDescription sets the optional description attribute to value.
 //
-// Returns the created operation.
-func NoOp(scope *Scope) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "NoOp",
+// value: A json-encoded SummaryDescription proto.
+// If not specified, defaults to ""
+func TensorSummaryDescription(value string) TensorSummaryAttr {
+	return func(m optionalAttr) {
+		m["description"] = value
 	}
-	return scope.AddOperation(opspec)
 }
 
-// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative.
-type DepthwiseConv2dNativeAttr func(optionalAttr)
-
-// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value.
+// TensorSummaryLabels sets the optional labels attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr {
+// value: An unused list of strings.
+// If not specified, defaults to <>
+func TensorSummaryLabels(value []string) TensorSummaryAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["labels"] = value
 	}
 }
 
-// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
-//
-// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
-// and a filter / kernel tensor of shape
-// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing
-// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies
-// a different filter to each input channel (expanding from 1 channel to
-// `channel_multiplier` channels for each), then concatenates the results
-// together. Thus, the output has `in_channels * channel_multiplier` channels.
+// TensorSummaryDisplayName sets the optional display_name attribute to value.
 //
-// ```
-// for k in 0..in_channels-1
-//   for q in 0..channel_multiplier-1
-//     output[b, i, j, k * channel_multiplier + q] =
-//       sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
-//                         filter[di, dj, k, q]
-// ```
+// value: An unused string.
+// If not specified, defaults to ""
+func TensorSummaryDisplayName(value string) TensorSummaryAttr {
+	return func(m optionalAttr) {
+		m["display_name"] = value
+	}
+}
+
+// Outputs a `Summary` protocol buffer with a tensor.
 //
-// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+// This op is being phased out in favor of TensorSummaryV2, which lets callers pass
+// a tag as well as a serialized SummaryMetadata proto string that contains
+// plugin-specific data. We will keep this op to maintain backwards compatibility.
 //
 // Arguments:
-//
-//
-//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
-// of `input`.
-//	padding: The type of padding algorithm to use.
-func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) {
+//	tensor: A tensor to serialize.
+func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNative",
+		Type: "TensorSummary",
 		Input: []tf.Input{
-			input, filter,
+			tensor,
 		},
 		Attrs: attrs,
 	}
@@ -8549,66 +8617,94 @@ func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, stri
 	return op.Output(0)
 }
 
-// CropAndResizeAttr is an optional argument to CropAndResize.
-type CropAndResizeAttr func(optionalAttr)
-
-// CropAndResizeMethod sets the optional method attribute to value.
+// Creates a dataset that asynchronously prefetches elements from `input_dataset`.
 //
-// value: A string specifying the interpolation method. Only 'bilinear' is
-// supported for now.
-// If not specified, defaults to "bilinear"
-func CropAndResizeMethod(value string) CropAndResizeAttr {
+// Arguments:
+//
+//	buffer_size: The maximum number of elements to buffer in an iterator over
+// this dataset.
+//
+//
+func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "PrefetchDataset",
+		Input: []tf.Input{
+			input_dataset, buffer_size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs a `Summary` protocol buffer with a tensor and per-plugin data.
+//
+// Arguments:
+//	tag: A string attached to this summary. Used for organization in TensorBoard.
+//	tensor: A tensor to serialize.
+//	serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin
+// data.
+func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorSummaryV2",
+		Input: []tf.Input{
+			tag, tensor, serialized_summary_metadata,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// PrintAttr is an optional argument to Print.
+type PrintAttr func(optionalAttr)
+
+// PrintMessage sets the optional message attribute to value.
+//
+// value: A string, prefix of the error message.
+// If not specified, defaults to ""
+func PrintMessage(value string) PrintAttr {
 	return func(m optionalAttr) {
-		m["method"] = value
+		m["message"] = value
 	}
 }
 
-// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value.
+// PrintFirstN sets the optional first_n attribute to value.
 //
-// value: Value used for extrapolation, when applicable.
-// If not specified, defaults to 0
-func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr {
+// value: Only log `first_n` number of times. -1 disables logging.
+// If not specified, defaults to -1
+func PrintFirstN(value int64) PrintAttr {
 	return func(m optionalAttr) {
-		m["extrapolation_value"] = value
+		m["first_n"] = value
 	}
 }
 
-// Extracts crops from the input image tensor and bilinearly resizes them (possibly
+// PrintSummarize sets the optional summarize attribute to value.
 //
-// with aspect ratio change) to a common output size specified by `crop_size`. This
-// is more general than the `crop_to_bounding_box` op which extracts a fixed size
-// slice from the input image and does not allow resizing or aspect ratio change.
+// value: Only print this many entries of each tensor.
+// If not specified, defaults to 3
+func PrintSummarize(value int64) PrintAttr {
+	return func(m optionalAttr) {
+		m["summarize"] = value
+	}
+}
+
+// Prints a list of tensors.
 //
-// Returns a tensor with `crops` from the input `image` at positions defined at the
-// bounding box locations in `boxes`. The cropped boxes are all resized (with
-// bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The
-// result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The
-// resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the
-// method will give identical results to using `tf.image.resize_bilinear()`
-// with `align_corners=True`.
+// Passes `input` through to `output` and prints `data` when evaluating.
 //
 // Arguments:
-//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-// Both `image_height` and `image_width` need to be positive.
-//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-// specifies the coordinates of a box in the `box_ind[i]` image and is specified
-// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-// `[0, 1]` interval of normalized image height is mapped to
-// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
-// which case the sampled crop is an up-down flipped version of the original
-// image. The width dimension is treated similarly. Normalized coordinates
-// outside the `[0, 1]` range are allowed, in which case we use
-// `extrapolation_value` to extrapolate the input image values.
-//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-//	crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
-// cropped image patches are resized to this size. The aspect ratio of the image
-// content is not preserved. Both `crop_height` and `crop_width` need to be
-// positive.
+//	input: The tensor passed to `output`
+//	data: A list of tensors to print out when op is evaluated.
 //
-// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) {
+// Returns = The unmodified `input` tensor
+func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -8617,9 +8713,9 @@ func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Ou
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CropAndResize",
+		Type: "Print",
 		Input: []tf.Input{
-			image, boxes, box_ind, crop_size,
+			input, tf.OutputList(data),
 		},
 		Attrs: attrs,
 	}
@@ -8627,47 +8723,109 @@ func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Ou
 	return op.Output(0)
 }
 
-// MaxPoolGradAttr is an optional argument to MaxPoolGrad.
-type MaxPoolGradAttr func(optionalAttr)
+// Makes its input available to the next iteration.
+//
+// Arguments:
+//	data: The tensor to be made available to the next iteration.
+//
+// Returns The same tensor as `data`.
+func NextIteration(scope *Scope, data tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NextIteration",
+		Input: []tf.Input{
+			data,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// MaxPoolGradDataFormat sets the optional data_format attribute to value.
+// Does nothing. Only useful as a placeholder for control edges.
+//
+// Returns the created operation.
+func NoOp(scope *Scope) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NoOp",
+	}
+	return scope.AddOperation(opspec)
+}
+
+// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative.
+type DepthwiseConv2dNativeAttr func(optionalAttr)
+
+// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value.
 //
 // value: Specify the data format of the input and output data. With the
 // default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
+//     [batch, height, width, channels].
 // Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
+//     [batch, channels, height, width].
 // If not specified, defaults to "NHWC"
-func MaxPoolGradDataFormat(value string) MaxPoolGradAttr {
+func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr {
 	return func(m optionalAttr) {
 		m["data_format"] = value
 	}
 }
 
-// Computes gradients of the maxpooling function.
+// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
+//
+// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
+// and a filter / kernel tensor of shape
+// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing
+// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies
+// a different filter to each input channel (expanding from 1 channel to
+// `channel_multiplier` channels for each), then concatenates the results
+// together. Thus, the output has `in_channels * channel_multiplier` channels.
+//
+// ```
+// for k in 0..in_channels-1
+//   for q in 0..channel_multiplier-1
+//     output[b, i, j, k * channel_multiplier + q] =
+//       sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+//                         filter[di, dj, k, q]
+// ```
+//
+// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
+// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
 //
 // Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
 //
-// Returns Gradients w.r.t. the input to `max_pool`.
-func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) {
+//
+//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
+// of `input`.
+//	padding: The type of padding algorithm to use.
+func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MaxPoolGrad",
+		Type: "DepthwiseConv2dNative",
 		Input: []tf.Input{
-			orig_input, orig_output, grad,
+			input, filter,
 		},
 		Attrs: attrs,
 	}
@@ -8675,46 +8833,271 @@ func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad
 	return op.Output(0)
 }
 
-// EncodeJpegAttr is an optional argument to EncodeJpeg.
-type EncodeJpegAttr func(optionalAttr)
+// DataFormatDimMapAttr is an optional argument to DataFormatDimMap.
+type DataFormatDimMapAttr func(optionalAttr)
 
-// EncodeJpegFormat sets the optional format attribute to value.
+// DataFormatDimMapSrcFormat sets the optional src_format attribute to value.
 //
-// value: Per pixel image format.
-// If not specified, defaults to ""
-func EncodeJpegFormat(value string) EncodeJpegAttr {
+// value: source data format.
+// If not specified, defaults to "NHWC"
+func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr {
 	return func(m optionalAttr) {
-		m["format"] = value
+		m["src_format"] = value
 	}
 }
 
-// EncodeJpegQuality sets the optional quality attribute to value.
+// DataFormatDimMapDstFormat sets the optional dst_format attribute to value.
 //
-// value: Quality of the compression from 0 to 100 (higher is better and slower).
-// If not specified, defaults to 95
-func EncodeJpegQuality(value int64) EncodeJpegAttr {
+// value: destination data format.
+// If not specified, defaults to "NCHW"
+func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr {
 	return func(m optionalAttr) {
-		m["quality"] = value
+		m["dst_format"] = value
 	}
 }
 
-// EncodeJpegProgressive sets the optional progressive attribute to value.
+// Returns the dimension index in the destination data format given the one in
 //
-// value: If True, create a JPEG that loads progressively (coarse to fine).
-// If not specified, defaults to false
-func EncodeJpegProgressive(value bool) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["progressive"] = value
+// the source data format.
+//
+// Arguments:
+//	x: Scalar. Dimension index in source data format. Must be in the range [-4, 4).
+//
+// Returns Scalar. Dimension index in destination data format.
+func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DataFormatDimMap",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value.
+// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
+type ResourceApplyPowerSignAttr func(optionalAttr)
+
+// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
 //
-// value: If True, spend CPU/RAM to reduce size with no quality change.
+// value: If `True`, updating of the var and m tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
 // If not specified, defaults to false
-func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr {
+func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
 	return func(m optionalAttr) {
-		m["optimize_size"] = value
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the AddSign update.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
+// variable <- variable - lr_t * update
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	logbase: Must be a scalar.
+//	sign_decay: Must be a scalar.
+//	beta: Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyPowerSign",
+		Input: []tf.Input{
+			var_, m, lr, logbase, sign_decay, beta, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// CropAndResizeAttr is an optional argument to CropAndResize.
+type CropAndResizeAttr func(optionalAttr)
+
+// CropAndResizeMethod sets the optional method attribute to value.
+//
+// value: A string specifying the interpolation method. Only 'bilinear' is
+// supported for now.
+// If not specified, defaults to "bilinear"
+func CropAndResizeMethod(value string) CropAndResizeAttr {
+	return func(m optionalAttr) {
+		m["method"] = value
+	}
+}
+
+// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value.
+//
+// value: Value used for extrapolation, when applicable.
+// If not specified, defaults to 0
+func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr {
+	return func(m optionalAttr) {
+		m["extrapolation_value"] = value
+	}
+}
+
+// Extracts crops from the input image tensor and bilinearly resizes them (possibly
+//
+// with aspect ratio change) to a common output size specified by `crop_size`. This
+// is more general than the `crop_to_bounding_box` op which extracts a fixed size
+// slice from the input image and does not allow resizing or aspect ratio change.
+//
+// Returns a tensor with `crops` from the input `image` at positions defined at the
+// bounding box locations in `boxes`. The cropped boxes are all resized (with
+// bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The
+// result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The
+// resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the
+// method will give identical results to using `tf.image.resize_bilinear()`
+// with `align_corners=True`.
+//
+// Arguments:
+//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+// Both `image_height` and `image_width` need to be positive.
+//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+// specifies the coordinates of a box in the `box_ind[i]` image and is specified
+// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+// `[0, 1]` interval of normalized image height is mapped to
+// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
+// which case the sampled crop is an up-down flipped version of the original
+// image. The width dimension is treated similarly. Normalized coordinates
+// outside the `[0, 1]` range are allowed, in which case we use
+// `extrapolation_value` to extrapolate the input image values.
+//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+//	crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
+// cropped image patches are resized to this size. The aspect ratio of the image
+// content is not preserved. Both `crop_height` and `crop_width` need to be
+// positive.
+//
+// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CropAndResize",
+		Input: []tf.Input{
+			image, boxes, box_ind, crop_size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MaxPoolGradAttr is an optional argument to MaxPoolGrad.
+type MaxPoolGradAttr func(optionalAttr)
+
+// MaxPoolGradDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradDataFormat(value string) MaxPoolGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes gradients of the maxpooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients w.r.t. the input to `max_pool`.
+func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGrad",
+		Input: []tf.Input{
+			orig_input, orig_output, grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EncodeJpegAttr is an optional argument to EncodeJpeg.
+type EncodeJpegAttr func(optionalAttr)
+
+// EncodeJpegFormat sets the optional format attribute to value.
+//
+// value: Per pixel image format.
+// If not specified, defaults to ""
+func EncodeJpegFormat(value string) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["format"] = value
+	}
+}
+
+// EncodeJpegQuality sets the optional quality attribute to value.
+//
+// value: Quality of the compression from 0 to 100 (higher is better and slower).
+// If not specified, defaults to 95
+func EncodeJpegQuality(value int64) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["quality"] = value
+	}
+}
+
+// EncodeJpegProgressive sets the optional progressive attribute to value.
+//
+// value: If True, create a JPEG that loads progressively (coarse to fine).
+// If not specified, defaults to false
+func EncodeJpegProgressive(value bool) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["progressive"] = value
+	}
+}
+
+// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value.
+//
+// value: If True, spend CPU/RAM to reduce size with no quality change.
+// If not specified, defaults to false
+func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["optimize_size"] = value
 	}
 }
 
@@ -8947,6 +9330,20 @@ func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr {
 	}
 }
 
+// Conv2DBackpropInputDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
 // Computes the gradients of convolution with respect to the input.
 //
 // Arguments:
@@ -10035,120 +10432,50 @@ func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf
 	return op.Output(0)
 }
 
-// Convert JSON-encoded Example records to binary protocol buffer strings.
+// Delete the TensorArray from its resource container.
 //
-// This op translates a tensor containing Example records, encoded using
-// the [standard JSON
-// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json),
-// into a tensor containing the same records encoded as binary protocol
-// buffers. The resulting tensor can then be fed to any of the other
-// Example-parsing ops.
+// This enables the user to close and release the resource in the middle
+// of a step/run.
 //
 // Arguments:
-//	json_examples: Each string is a JSON object serialized according to the JSON
-// mapping of the Example proto.
+//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
 //
-// Returns Each string is a binary Example protocol buffer corresponding
-// to the respective element of `json_examples`.
-func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) {
+// Returns the created operation.
+func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeJSONExample",
+		Type: "TensorArrayCloseV3",
 		Input: []tf.Input{
-			json_examples,
+			handle,
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Adds sparse updates to the variable referenced by `resource`.
+// ResourceGatherAttr is an optional argument to ResourceGather.
+type ResourceGatherAttr func(optionalAttr)
+
+// ResourceGatherValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func ResourceGatherValidateIndices(value bool) ResourceGatherAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Gather slices from the variable pointed to by `resource` according to `indices`.
 //
-// This operation computes
+// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
 //
+// ```python
 //     # Scalar indices
-//     ref[indices, ...] += updates[...]
+//     output[:, ..., :] = params[indices, :, ... :]
 //
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] += updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions add.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterAdd",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Delete the TensorArray from its resource container.
-//
-// This enables the user to close and release the resource in the middle
-// of a step/run.
-//
-// Arguments:
-//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
-//
-// Returns the created operation.
-func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayCloseV3",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceGatherAttr is an optional argument to ResourceGather.
-type ResourceGatherAttr func(optionalAttr)
-
-// ResourceGatherValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func ResourceGatherValidateIndices(value bool) ResourceGatherAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Gather slices from the variable pointed to by `resource` according to `indices`.
-//
-// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
-//
-// ```python
-//     # Scalar indices
-//     output[:, ..., :] = params[indices, :, ... :]
-//
-//     # Vector indices
-//     output[i, :, ..., :] = params[indices[i], :, ... :]
+//     # Vector indices
+//     output[i, :, ..., :] = params[indices[i], :, ... :]
 //
 //     # Higher rank indices
 //     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
@@ -10183,6 +10510,20 @@ func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr {
 	}
 }
 
+// QuantizedConv2DDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
 // Computes a 2D convolution given quantized 4D input and filter tensors.
 //
 // The inputs are quantized tensors where the lowest value represents the real
@@ -10549,6 +10890,175 @@ func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
+// Convert JSON-encoded Example records to binary protocol buffer strings.
+//
+// This op translates a tensor containing Example records, encoded using
+// the [standard JSON
+// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json),
+// into a tensor containing the same records encoded as binary protocol
+// buffers. The resulting tensor can then be fed to any of the other
+// Example-parsing ops.
+//
+// Arguments:
+//	json_examples: Each string is a JSON object serialized according to the JSON
+// mapping of the Example proto.
+//
+// Returns Each string is a binary Example protocol buffer corresponding
+// to the respective element of `json_examples`.
+func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeJSONExample",
+		Input: []tf.Input{
+			json_examples,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds sparse updates to the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] += updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] += updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions add.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterAdd",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Eagerly executes a python function to compute func(input)->output. The
+//
+// semantics of the input, output, and attributes are the same as those for
+// PyFunc.
+func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"token": token, "Tout": Tout}
+	opspec := tf.OpSpec{
+		Type: "EagerPyFunc",
+		Input: []tf.Input{
+			tf.OutputList(input),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("EagerPyFunc", err)
+		return
+	}
+	return output
+}
+
+// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
+type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
+
+// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of depthwise convolution with respect to the input.
+//
+// Arguments:
+//	input_sizes: An integer vector representing the shape of `input`, based
+// on `data_format`.  For example, if `data_format` is 'NHWC' then
+//  `input` is a 4-D `[batch, height, width, channels]` tensor.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
+//	out_backprop: 4-D with shape  based on `data_format`.
+// For example, if `data_format` is 'NHWC' then
+// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape according to `data_format`.  For example, if
+// `data_format` is 'NHWC', output shape is `[batch, in_height,
+// in_width, in_channels]`.  Gradient w.r.t. the input of the
+// convolution.
+func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DepthwiseConv2dNativeBackpropInput",
+		Input: []tf.Input{
+			input_sizes, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Creates a dataset with a range of values. Corresponds to python's xrange.
 //
 // Arguments:
@@ -12140,189 +12650,17 @@ func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// Subtracts a value from the current value of a variable.
-//
-// Any ReadVariableOp which depends directly or indirectly on this assign is
-// guaranteed to see the incremented value or a subsequent newer one.
-//
-// Outputs the incremented value, which can be used to totally order the
-// increments to this variable.
-//
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value by which the variable will be incremented.
+// Computes numerical negative value element-wise.
 //
-// Returns the created operation.
-func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+// I.e., \\(y = -x\\).
+func Neg(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "AssignSubVariableOp",
+		Type: "Neg",
 		Input: []tf.Input{
-			resource, value,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// SparseReduceMaxAttr is an optional argument to SparseReduceMax.
-type SparseReduceMaxAttr func(optionalAttr)
-
-// SparseReduceMaxKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceMaxKeepDims(value bool) SparseReduceMaxAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the max of elements across dimensions of a SparseTensor.
-//
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_max()`.  In particular, this Op also returns a dense `Tensor`
-// instead of a sparse one.
-//
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
-//
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
-//
-// Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-//
-// Returns `R-K`-D.  The reduced Tensor.
-func SparseReduceMax(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseReduceMax",
-		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2.
-type Conv3DBackpropFilterV2Attr func(optionalAttr)
-
-// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value.
-//
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes the gradients of 3-D convolution with respect to the filter.
-//
-// Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 5-D
-// `[filter_depth, filter_height, filter_width, in_channels, out_channels]`
-// tensor.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropFilterV2",
-		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Execute a sub graph on a remote processor.
-//
-// The graph specifications(such as graph itself, input tensors and output names)
-// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo
-// as serialized_remote_fused_graph_execute_info.
-// The specifications will be passed to a dedicated registered
-// remote fused graph executor.  The executor will send the graph specifications
-// to a remote processor and execute that graph.  The execution results
-// will be passed to consumer nodes as outputs of this node.
-//
-// Arguments:
-//	inputs: Arbitrary number of tensors with arbitrary data types
-//
-//	serialized_remote_fused_graph_execute_info: Serialized protocol buffer
-// of RemoteFusedGraphExecuteInfo which contains graph specifications.
-//
-// Returns Arbitrary number of tensors with arbitrary data types
-func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info}
-	opspec := tf.OpSpec{
-		Type: "RemoteFusedGraphExecute",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("RemoteFusedGraphExecute", err)
-		return
-	}
-	return outputs
-}
-
-// Computes numerical negative value element-wise.
-//
-// I.e., \\(y = -x\\).
-func Neg(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Neg",
-		Input: []tf.Input{
-			x,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
@@ -13168,53 +13506,6 @@ func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// Computes the number of elements in the given queue.
-//
-// Arguments:
-//	handle: The handle to a queue.
-//
-// Returns The number of elements in the given queue.
-func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueSizeV2",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Outputs a `Summary` protocol buffer with a histogram.
-//
-// The generated
-// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-// has one summary value containing a histogram for `values`.
-//
-// This op reports an `InvalidArgument` error if any value is not finite.
-//
-// Arguments:
-//	tag: Scalar.  Tag to use for the `Summary.Value`.
-//	values: Any shape. Values to use to build the histogram.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "HistogramSummary",
-		Input: []tf.Input{
-			tag, values,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a dataset that emits the lines of one or more text files.
 //
 // Arguments:
@@ -13359,52 +13650,238 @@ func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output
 	return op.Output(0)
 }
 
-// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey.
-type OrderedMapUnstageNoKeyAttr func(optionalAttr)
+// SparseReduceMaxAttr is an optional argument to SparseReduceMax.
+type SparseReduceMaxAttr func(optionalAttr)
 
-// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// SparseReduceMaxKeepDims sets the optional keep_dims attribute to value.
 //
-// REQUIRES: value >= 0
-func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr {
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SparseReduceMaxKeepDims(value bool) SparseReduceMaxAttr {
 	return func(m optionalAttr) {
-		m["capacity"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// Computes the max of elements across dimensions of a SparseTensor.
 //
-// REQUIRES: value >= 0
-func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes and returns the (key, value) element with the smallest
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_max()`.  In particular, this Op also returns a dense `Tensor`
+// instead of a sparse one.
 //
-// key from the underlying container.   If the underlying container
-// does not contain elements, the op will block until it does.
-func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
-	if scope.Err() != nil {
-		return
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
+//
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+//
+// Returns `R-K`-D.  The reduced Tensor.
+func SparseReduceMax(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReduceMax",
+		Input: []tf.Input{
+			input_indices, input_values, input_shape, reduction_axes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Subtracts a value from the current value of a variable.
+//
+// Any ReadVariableOp which depends directly or indirectly on this assign is
+// guaranteed to see the incremented value or a subsequent newer one.
+//
+// Outputs the incremented value, which can be used to totally order the
+// increments to this variable.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	value: the value by which the variable will be incremented.
+//
+// Returns the created operation.
+func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AssignSubVariableOp",
+		Input: []tf.Input{
+			resource, value,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Execute a sub graph on a remote processor.
+//
+// The graph specifications(such as graph itself, input tensors and output names)
+// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo
+// as serialized_remote_fused_graph_execute_info.
+// The specifications will be passed to a dedicated registered
+// remote fused graph executor.  The executor will send the graph specifications
+// to a remote processor and execute that graph.  The execution results
+// will be passed to consumer nodes as outputs of this node.
+//
+// Arguments:
+//	inputs: Arbitrary number of tensors with arbitrary data types
+//
+//	serialized_remote_fused_graph_execute_info: Serialized protocol buffer
+// of RemoteFusedGraphExecuteInfo which contains graph specifications.
+//
+// Returns Arbitrary number of tensors with arbitrary data types
+func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info}
+	opspec := tf.OpSpec{
+		Type: "RemoteFusedGraphExecute",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("RemoteFusedGraphExecute", err)
+		return
+	}
+	return outputs
+}
+
+// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2.
+type Conv3DBackpropFilterV2Attr func(optionalAttr)
+
+// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv3DBackpropFilterV2Dilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 5.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
+func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of 3-D convolution with respect to the filter.
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 5-D
+// `[filter_depth, filter_height, filter_width, in_channels, out_channels]`
+// tensor.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv3DBackpropFilterV2",
+		Input: []tf.Input{
+			input, filter_sizes, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey.
+type OrderedMapUnstageNoKeyAttr func(optionalAttr)
+
+// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes and returns the (key, value) element with the smallest
+//
+// key from the underlying container.   If the underlying container
+// does not contain elements, the op will block until it does.
+func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
 	attrs := map[string]interface{}{"dtypes": dtypes}
 	for _, a := range optional {
@@ -13431,6 +13908,56 @@ func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataTyp
 	return key, values
 }
 
+// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute.
+type DataFormatVecPermuteAttr func(optionalAttr)
+
+// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value.
+//
+// value: source data format.
+// If not specified, defaults to "NHWC"
+func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr {
+	return func(m optionalAttr) {
+		m["src_format"] = value
+	}
+}
+
+// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value.
+//
+// value: destination data format.
+// If not specified, defaults to "NCHW"
+func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr {
+	return func(m optionalAttr) {
+		m["dst_format"] = value
+	}
+}
+
+// Returns the permuted vector/tensor in the destination data format given the one in
+//
+// the source data format.
+//
+// Arguments:
+//	x: Vector of size 4 or Tensor of shape (2, 4) in source data format.
+//
+// Returns Vector of size 4 or Tensor of shape (2, 4) in destination data format.
+func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DataFormatVecPermute",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Read an element from the TensorArray into output `value`.
 //
 // Arguments:
@@ -13506,6 +14033,20 @@ func Conv3DDataFormat(value string) Conv3DAttr {
 	}
 }
 
+// Conv3DDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 5.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
+func Conv3DDilations(value []int64) Conv3DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
 // Computes a 3-D convolution given 5-D `input` and `filter` tensors.
 //
 // In signal processing, cross-correlation is a measure of similarity of
@@ -13800,6 +14341,20 @@ func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2d
 	}
 }
 
+// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
 // Computes the gradients of depthwise convolution with respect to the filter.
 //
 // Arguments:
@@ -14678,66 +15233,10 @@ func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, def
 	return op.Output(0)
 }
 
-// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
-type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
+// MatrixSolveAttr is an optional argument to MatrixSolve.
+type MatrixSolveAttr func(optionalAttr)
 
-// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes the gradients of depthwise convolution with respect to the input.
-//
-// Arguments:
-//	input_sizes: An integer vector representing the shape of `input`, based
-// on `data_format`.  For example, if `data_format` is 'NHWC' then
-//  `input` is a 4-D `[batch, height, width, channels]` tensor.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
-//	out_backprop: 4-D with shape  based on `data_format`.
-// For example, if `data_format` is 'NHWC' then
-// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape according to `data_format`.  For example, if
-// `data_format` is 'NHWC', output shape is `[batch, in_height,
-// in_width, in_channels]`.  Gradient w.r.t. the input of the
-// convolution.
-func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNativeBackpropInput",
-		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MatrixSolveAttr is an optional argument to MatrixSolve.
-type MatrixSolveAttr func(optionalAttr)
-
-// MatrixSolveAdjoint sets the optional adjoint attribute to value.
+// MatrixSolveAdjoint sets the optional adjoint attribute to value.
 //
 // value: Boolean indicating whether to solve with `matrix` or its (block-wise)
 // adjoint.
@@ -14998,6 +15497,21 @@ func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
+// Returns a copy of the input tensor.
+func Snapshot(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Snapshot",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Get the value of the tensor specified by its handle.
 //
 // Arguments:
@@ -15312,6 +15826,14 @@ func MultinomialSeed2(value int64) MultinomialAttr {
 	}
 }
 
+// MultinomialOutputDtype sets the optional output_dtype attribute to value.
+// If not specified, defaults to DT_INT64
+func MultinomialOutputDtype(value tf.DataType) MultinomialAttr {
+	return func(m optionalAttr) {
+		m["output_dtype"] = value
+	}
+}
+
 // Draws samples from a multinomial distribution.
 //
 // Arguments:
@@ -15415,66 +15937,6 @@ func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, label
 	return op.Output(0), op.Output(1)
 }
 
-// TensorSummaryAttr is an optional argument to TensorSummary.
-type TensorSummaryAttr func(optionalAttr)
-
-// TensorSummaryDescription sets the optional description attribute to value.
-//
-// value: A json-encoded SummaryDescription proto.
-// If not specified, defaults to ""
-func TensorSummaryDescription(value string) TensorSummaryAttr {
-	return func(m optionalAttr) {
-		m["description"] = value
-	}
-}
-
-// TensorSummaryLabels sets the optional labels attribute to value.
-//
-// value: An unused list of strings.
-// If not specified, defaults to <>
-func TensorSummaryLabels(value []string) TensorSummaryAttr {
-	return func(m optionalAttr) {
-		m["labels"] = value
-	}
-}
-
-// TensorSummaryDisplayName sets the optional display_name attribute to value.
-//
-// value: An unused string.
-// If not specified, defaults to ""
-func TensorSummaryDisplayName(value string) TensorSummaryAttr {
-	return func(m optionalAttr) {
-		m["display_name"] = value
-	}
-}
-
-// Outputs a `Summary` protocol buffer with a tensor.
-//
-// This op is being phased out in favor of TensorSummaryV2, which lets callers pass
-// a tag as well as a serialized SummaryMetadata proto string that contains
-// plugin-specific data. We will keep this op to maintain backwards compatibility.
-//
-// Arguments:
-//	tensor: A tensor to serialize.
-func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorSummary",
-		Input: []tf.Input{
-			tensor,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor.
 //
 // This operation folds the padded areas of `input` by `MirrorPad` according to the
@@ -15709,6 +16171,20 @@ func Conv2DDataFormat(value string) Conv2DAttr {
 	}
 }
 
+// Conv2DDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func Conv2DDilations(value []int64) Conv2DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
 // Computes a 2-D convolution given 4-D `input` and `filter` tensors.
 //
 // Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
@@ -15740,7 +16216,7 @@ func Conv2DDataFormat(value string) Conv2DAttr {
 // `[filter_height, filter_width, in_channels, out_channels]`
 //	strides: 1-D tensor of length 4.  The stride of the sliding window for each
 // dimension of `input`. The dimension order is determined by the value of
-//   `data_format`, see below for details.
+// `data_format`, see below for details.
 //	padding: The type of padding algorithm to use.
 //
 // Returns A 4-D tensor. The dimension order is determined by the value of
@@ -16931,6 +17407,109 @@ func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (s
 	return op.Output(0)
 }
 
+// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate.
+type ResourceScatterNdUpdateAttr func(optionalAttr)
+
+// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value.
+//
+// value: An optional bool. Defaults to True. If True, the assignment will
+// be protected by a lock; otherwise the behavior is undefined,
+// but may exhibit less contention.
+// If not specified, defaults to true
+func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Applies sparse `updates` to individual values or slices within a given
+//
+// variable according to `indices`.
+//
+// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `ref`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+// dimension of `ref`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// ```
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+// ```
+//
+// For example, say we want to update 4 scattered elements to a rank-1 tensor to
+// 8 elements. In Python, that update would look like this:
+//
+// ```python
+//     ref = tfe.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+//     indices = tf.constant([[4], [3], [1] ,[7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     update = tf.scatter_nd_update(ref, indices, updates)
+//     with tf.Session() as sess:
+//       print sess.run(update)
+// ```
+//
+// The resulting update to ref would look like this:
+//
+//     [1, 11, 3, 10, 9, 6, 7, 12]
+//
+// See @{tf.scatter_nd} for more details about how to make updates to
+// slices.
+//
+// Arguments:
+//	ref: A resource handle. Must be from a VarHandleOp.
+//	indices: A Tensor. Must be one of the following types: int32, int64.
+// A tensor of indices into ref.
+//	updates: A Tensor. Must have the same type as ref. A tensor of updated
+// values to add to ref.
+//
+// Returns the created operation.
+func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterNdUpdate",
+		Input: []tf.Input{
+			ref, indices, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the power of one value to another.
+//
+// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
+// corresponding elements in `x` and `y`. For example:
+//
+// ```
+// # tensor 'x' is [[2, 2]], [3, 3]]
+// # tensor 'y' is [[8, 16], [2, 3]]
+// tf.pow(x, y) ==> [[256, 65536], [9, 27]]
+// ```
+func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Pow",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // SizeAttr is an optional argument to Size.
 type SizeAttr func(optionalAttr)
 
@@ -17120,13 +17699,54 @@ func FFT3D(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`)
+// Deserialize `SparseTensor` objects.
+//
+// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+// the last dimension stores serialized `SparseTensor` objects and the other N
+// dimensions (N >= 0) correspond to a batch. The ranks of the original
+// `SparseTensor` objects must all match. When the final `SparseTensor` is
+// created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+// the sparse tensors have been concatenated along new dimensions, one for each
+// batch.
+//
+// The output `SparseTensor` object's shape values for the original dimensions
+// are the max across the input `SparseTensor` objects' shape values for the
+// corresponding dimensions. The new dimensions match the size of the batch.
+//
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
+//
+// For example, if the serialized input is a `[2 x 3]` matrix representing two
+// original `SparseTensor` objects:
+//
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
+//
+// and
+//
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
+//
+// then the final deserialized `SparseTensor` will be:
 //
-// object.
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
 //
 // Arguments:
-//	serialized_sparse: 1-D, The serialized `SparseTensor` object. Must have 3 columns.
-//	dtype: The `dtype` of the serialized `SparseTensor` object.
+//	serialized_sparse: The serialized `SparseTensor` objects. The last dimension
+// must have 3 columns.
+//	dtype: The `dtype` of the serialized `SparseTensor` objects.
 func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
 	if scope.Err() != nil {
 		return
@@ -17332,79 +17952,18 @@ func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optio
 	return op.Output(0)
 }
 
-// PrintAttr is an optional argument to Print.
-type PrintAttr func(optionalAttr)
+// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix.
+type LoadAndRemapMatrixAttr func(optionalAttr)
 
-// PrintMessage sets the optional message attribute to value.
+// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value.
 //
-// value: A string, prefix of the error message.
-// If not specified, defaults to ""
-func PrintMessage(value string) PrintAttr {
+// value: The maximum number of rows to load from the checkpoint at
+// once. If less than or equal to 0, the entire matrix will be loaded into
+// memory. Setting this arg trades increased disk reads for lower memory usage.
+// If not specified, defaults to -1
+func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr {
 	return func(m optionalAttr) {
-		m["message"] = value
-	}
-}
-
-// PrintFirstN sets the optional first_n attribute to value.
-//
-// value: Only log `first_n` number of times. -1 disables logging.
-// If not specified, defaults to -1
-func PrintFirstN(value int64) PrintAttr {
-	return func(m optionalAttr) {
-		m["first_n"] = value
-	}
-}
-
-// PrintSummarize sets the optional summarize attribute to value.
-//
-// value: Only print this many entries of each tensor.
-// If not specified, defaults to 3
-func PrintSummarize(value int64) PrintAttr {
-	return func(m optionalAttr) {
-		m["summarize"] = value
-	}
-}
-
-// Prints a list of tensors.
-//
-// Passes `input` through to `output` and prints `data` when evaluating.
-//
-// Arguments:
-//	input: The tensor passed to `output`
-//	data: A list of tensors to print out when op is evaluated.
-//
-// Returns = The unmodified `input` tensor
-func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Print",
-		Input: []tf.Input{
-			input, tf.OutputList(data),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix.
-type LoadAndRemapMatrixAttr func(optionalAttr)
-
-// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value.
-//
-// value: The maximum number of rows to load from the checkpoint at
-// once. If less than or equal to 0, the entire matrix will be loaded into
-// memory. Setting this arg trades increased disk reads for lower memory usage.
-// If not specified, defaults to -1
-func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr {
-	return func(m optionalAttr) {
-		m["max_rows_in_memory"] = value
+		m["max_rows_in_memory"] = value
 	}
 }
 
@@ -18129,55 +18688,6 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr)
 	return op.Output(0)
 }
 
-// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
-type ResourceApplyPowerSignAttr func(optionalAttr)
-
-// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and m tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the AddSign update.
-//
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
-// variable <- variable - lr_t * update
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	logbase: Must be a scalar.
-//	sign_decay: Must be a scalar.
-//	beta: Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyPowerSign",
-		Input: []tf.Input{
-			var_, m, lr, logbase, sign_decay, beta, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
 // DestroyResourceOpAttr is an optional argument to DestroyResourceOp.
 type DestroyResourceOpAttr func(optionalAttr)
 
@@ -20766,41 +21276,28 @@ func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Outp
 	return op.Output(0)
 }
 
-// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3.
-type QuantizeAndDequantizeV3Attr func(optionalAttr)
-
-// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["signed_input"] = value
-	}
-}
-
-// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["range_given"] = value
-	}
-}
-
-// Quantizes then dequantizes a tensor.
+// Computes the gradients of 3-D convolution with respect to the input.
 //
-// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
-// tensor, so its value can change during training.
-func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) {
+// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "QuantizeAndDequantizeV3",
+		Type: "Conv3DBackpropInput",
 		Input: []tf.Input{
-			input, input_min, input_max, num_bits,
+			input, filter, out_backprop,
 		},
 		Attrs: attrs,
 	}
@@ -20808,46 +21305,95 @@ func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output,
 	return op.Output(0)
 }
 
-// AvgPool3DAttr is an optional argument to AvgPool3D.
-type AvgPool3DAttr func(optionalAttr)
+// ReverseSequenceAttr is an optional argument to ReverseSequence.
+type ReverseSequenceAttr func(optionalAttr)
 
-// AvgPool3DDataFormat sets the optional data_format attribute to value.
+// ReverseSequenceBatchDim sets the optional batch_dim attribute to value.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func AvgPool3DDataFormat(value string) AvgPool3DAttr {
+// value: The dimension along which reversal is performed.
+// If not specified, defaults to 0
+func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["batch_dim"] = value
 	}
 }
 
-// Performs 3D average pooling on the input.
+// Reverses variable length slices.
+//
+// This op first slices `input` along the dimension `batch_dim`, and for each
+// slice `i`, reverses the first `seq_lengths[i]` elements along
+// the dimension `seq_dim`.
+//
+// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
+// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
+//
+// The output slice `i` along dimension `batch_dim` is then given by input
+// slice `i`, with the first `seq_lengths[i]` slices along dimension
+// `seq_dim` reversed.
+//
+// For example:
+//
+// ```
+// # Given this:
+// batch_dim = 0
+// seq_dim = 1
+// input.dims = (4, 8, ...)
+// seq_lengths = [7, 2, 3, 5]
+//
+// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
+// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
+// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
+// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
+// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
+//
+// # while entries past seq_lens are copied through:
+// output[0, 7:, :, ...] = input[0, 7:, :, ...]
+// output[1, 2:, :, ...] = input[1, 2:, :, ...]
+// output[2, 3:, :, ...] = input[2, 3:, :, ...]
+// output[3, 2:, :, ...] = input[3, 2:, :, ...]
+// ```
+//
+// In contrast, if:
+//
+// ```
+// # Given this:
+// batch_dim = 2
+// seq_dim = 0
+// input.dims = (8, ?, 4, ...)
+// seq_lengths = [7, 2, 3, 5]
+//
+// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
+// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
+// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
+// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
+// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
+//
+// # while entries past seq_lens are copied through:
+// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
+// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
+// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
+// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
+// ```
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
+//	input: The input to reverse.
+//	seq_lengths: 1-D with length `input.dims(batch_dim)` and
+// `max(seq_lengths) <= input.dims(seq_dim)`
+//	seq_dim: The dimension which is partially reversed.
 //
-// Returns The average pooled output tensor.
-func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) {
+// Returns The partially reversed input. It has the same shape as `input`.
+func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"seq_dim": seq_dim}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AvgPool3D",
+		Type: "ReverseSequence",
 		Input: []tf.Input{
-			input,
+			input, seq_lengths,
 		},
 		Attrs: attrs,
 	}
@@ -20855,10 +21401,250 @@ func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa
 	return op.Output(0)
 }
 
-// Produces the max pool of the input tensor for quantized types.
+// Computes the gradient for the rsqrt of `x` wrt its input.
 //
-// Arguments:
-//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
+// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`
+// is the corresponding input gradient.
+func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RsqrtGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradients of 3-D convolution with respect to the filter.
+//
+// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "Conv3DBackpropFilter",
+		Input: []tf.Input{
+			input, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2.
+type Conv3DBackpropInputV2Attr func(optionalAttr)
+
+// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv3DBackpropInputV2Dilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 5.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
+func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of 3-D convolution with respect to the input.
+//
+// Arguments:
+//	input_sizes: An integer vector representing the tensor shape of `input`,
+// where `input` is a 5-D
+// `[batch, depth, rows, cols, in_channels]` tensor.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv3DBackpropInputV2",
+		Input: []tf.Input{
+			input_sizes, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns a tensor of ones with the same shape and type as x.
+//
+// Arguments:
+//	x: a tensor of type T.
+//
+// Returns a tensor of the same shape and type as x but filled with ones.
+func OnesLike(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "OnesLike",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns element-wise remainder of division. This emulates C semantics in that
+//
+// the result here is consistent with a truncating divide. E.g.
+// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.
+//
+// *NOTE*: `Mod` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Mod",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3.
+type QuantizeAndDequantizeV3Attr func(optionalAttr)
+
+// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr {
+	return func(m optionalAttr) {
+		m["signed_input"] = value
+	}
+}
+
+// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr {
+	return func(m optionalAttr) {
+		m["range_given"] = value
+	}
+}
+
+// Quantizes then dequantizes a tensor.
+//
+// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
+// tensor, so its value can change during training.
+func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizeAndDequantizeV3",
+		Input: []tf.Input{
+			input, input_min, input_max, num_bits,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AvgPool3DAttr is an optional argument to AvgPool3D.
+type AvgPool3DAttr func(optionalAttr)
+
+// AvgPool3DDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func AvgPool3DDataFormat(value string) AvgPool3DAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Performs 3D average pooling on the input.
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The average pooled output tensor.
+func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AvgPool3D",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Produces the max pool of the input tensor for quantized types.
+//
+// Arguments:
+//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
 //	min_input: The float value that the lowest quantized input value represents.
 //	max_input: The float value that the highest quantized input value represents.
 //	ksize: The size of the window for each dimension of the input tensor.
@@ -22195,144 +22981,13 @@ func Softmax(scope *Scope, logits tf.Output) (softmax tf.Output) {
 	return op.Output(0)
 }
 
-// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2.
-type RandomShuffleQueueV2Attr func(optionalAttr)
-
-// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value.
-//
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types. If the length of
-// this attr is 0, the shapes of queue elements are not constrained, and
-// only one element may be dequeued at a time.
-// If not specified, defaults to <>
+// Returns the truth value of (x <= y) element-wise.
 //
-// REQUIRES: len(value) >= 0
-func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shapes"] = value
-	}
-}
-
-// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value.
-//
-// value: Dequeue will block unless there would be this
-// many elements after the dequeue or the queue is closed. This
-// ensures a minimum level of mixing of elements.
-// If not specified, defaults to 0
-func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["min_after_dequeue"] = value
-	}
-}
-
-// RandomShuffleQueueV2Seed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 is set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, a random seed is used.
-// If not specified, defaults to 0
-func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// RandomShuffleQueueV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A queue that randomizes the order of elements.
-//
-// Arguments:
-//	component_types: The type of each component in a value.
-//
-// Returns The handle to the queue.
-func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomShuffleQueueV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Outputs a `Summary` protocol buffer with scalar values.
-//
-// The input `tags` and `values` must have the same shape.  The generated summary
-// has a summary value for each tag-value pair in `tags` and `values`.
-//
-// Arguments:
-//	tags: Tags for the summary.
-//	values: Same shape as `tags.  Values for the summary.
-//
-// Returns Scalar.  Serialized `Summary` protocol buffer.
-func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ScalarSummary",
-		Input: []tf.Input{
-			tags, values,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the truth value of (x <= y) element-wise.
-//
-// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
+// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
 	opspec := tf.OpSpec{
 		Type: "LessEqual",
@@ -24034,7 +24689,21 @@ func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...
 	return op.Output(0)
 }
 
-// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` string `Tensor`.
+// SerializeManySparseAttr is an optional argument to SerializeManySparse.
+type SerializeManySparseAttr func(optionalAttr)
+
+// SerializeManySparseOutType sets the optional out_type attribute to value.
+//
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object.
 //
 // The `SparseTensor` must have rank `R` greater than 1, and the first dimension
 // is treated as the minibatch dimension.  Elements of the `SparseTensor`
@@ -24048,15 +24717,20 @@ func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...
 //	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
 //	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
 //	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
-func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) (serialized_sparse tf.Output) {
+func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
 		Type: "SerializeManySparse",
 		Input: []tf.Input{
 			sparse_indices, sparse_values, sparse_shape,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -24198,6 +24872,20 @@ func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr {
 	}
 }
 
+// Conv2DBackpropFilterDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
 // Computes the gradients of convolution with respect to the filter.
 //
 // Arguments:
@@ -24442,280 +25130,46 @@ func Cos(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// BatchToSpace for 4-D tensors of type T.
+// Convert the quantized 'input' tensor into a lower-precision 'output', using the
 //
-// This is a legacy version of the more general BatchToSpaceND.
+// output range specified with 'requested_output_min' and 'requested_output_max'.
 //
-// Rearranges (permutes) data from batch into blocks of spatial data, followed by
-// cropping. This is the reverse transformation of SpaceToBatch. More specifically,
-// this op outputs a copy of the input tensor where values from the `batch`
-// dimension are moved in spatial blocks to the `height` and `width` dimensions,
-// followed by cropping along the `height` and `width` dimensions.
+// [input_min, input_max] are scalar floats that specify the range for the float
+// interpretation of the 'input' data. For example, if input_min is -1.0f and
+// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
 //
 // Arguments:
-//	input: 4-D tensor with shape
-// `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
-//   depth]`. Note that the batch size of the input tensor must be divisible by
-// `block_size * block_size`.
-//	crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
-// how many elements to crop from the intermediate result across the spatial
-// dimensions as follows:
-//
-//     crops = [[crop_top, crop_bottom], [crop_left, crop_right]]
-//
-//
-// Returns 4-D with shape `[batch, height, width, depth]`, where:
-//
-//       height = height_pad - crop_top - crop_bottom
-//       width = width_pad - crop_left - crop_right
-//
-// The attr `block_size` must be greater than one. It indicates the block size.
-//
-// Some examples:
-//
-// (1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2:
-//
-// ```
-// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-// ```
-//
-// The output tensor has shape `[1, 2, 2, 1]` and value:
-//
-// ```
-// x = [[[[1], [2]], [[3], [4]]]]
-// ```
-//
-// (2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2:
-//
-// ```
-// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-// ```
-//
-// The output tensor has shape `[1, 2, 2, 3]` and value:
-//
-// ```
-// x = [[[[1, 2, 3], [4, 5, 6]],
-//       [[7, 8, 9], [10, 11, 12]]]]
-// ```
-//
-// (3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1], [3]], [[9], [11]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
-//
-// The output tensor has shape `[1, 4, 4, 1]` and value:
-//
-// ```
-// x = [[[1],   [2],  [3],  [4]],
-//      [[5],   [6],  [7],  [8]],
-//      [[9],  [10], [11],  [12]],
-//      [[13], [14], [15],  [16]]]
-// ```
-//
-// (4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
-//      [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
-// ```
 //
-// The output tensor has shape `[2, 2, 4, 1]` and value:
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//	requested_output_min: The float value that the minimum quantized output value represents.
+//	requested_output_max: The float value that the maximum quantized output value represents.
+//	out_type: The type of the output. Should be a lower bit depth than Tinput.
 //
-// ```
-// x = [[[[1], [3]], [[5], [7]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
-func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int64) (output tf.Output) {
+// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
+func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"block_size": block_size}
+	attrs := map[string]interface{}{"out_type": out_type}
 	opspec := tf.OpSpec{
-		Type: "BatchToSpace",
+		Type: "Requantize",
 		Input: []tf.Input{
-			input, crops,
+			input, input_min, input_max, requested_output_min, requested_output_max,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// SparseToDenseAttr is an optional argument to SparseToDense.
-type SparseToDenseAttr func(optionalAttr)
+// ArgMinAttr is an optional argument to ArgMin.
+type ArgMinAttr func(optionalAttr)
 
-// SparseToDenseValidateIndices sets the optional validate_indices attribute to value.
-//
-// value: If true, indices are checked to make sure they are sorted in
-// lexicographic order and that there are no repeats.
-// If not specified, defaults to true
-func SparseToDenseValidateIndices(value bool) SparseToDenseAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Converts a sparse representation into a dense tensor.
-//
-// Builds an array `dense` with shape `output_shape` such that
-//
-// ```
-// # If sparse_indices is scalar
-// dense[i] = (i == sparse_indices ? sparse_values : default_value)
-//
-// # If sparse_indices is a vector, then for each i
-// dense[sparse_indices[i]] = sparse_values[i]
-//
-// # If sparse_indices is an n by d matrix, then for each i in [0, n)
-// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
-// ```
-//
-// All other values in `dense` are set to `default_value`.  If `sparse_values` is a
-// scalar, all sparse indices are set to this single value.
-//
-// Indices should be sorted in lexicographic order, and indices must not
-// contain any repeats. If `validate_indices` is true, these properties
-// are checked during execution.
-//
-// Arguments:
-//	sparse_indices: 0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete
-// index where `sparse_values[i]` will be placed.
-//	output_shape: 1-D.  Shape of the dense output tensor.
-//	sparse_values: 1-D.  Values corresponding to each row of `sparse_indices`,
-// or a scalar value to be used for all sparse indices.
-//	default_value: Scalar value to set for indices not specified in
-// `sparse_indices`.
-//
-// Returns Dense output tensor of shape `output_shape`.
-func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseToDense",
-		Input: []tf.Input{
-			sparse_indices, output_shape, sparse_values, default_value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// NthElementAttr is an optional argument to NthElement.
-type NthElementAttr func(optionalAttr)
-
-// NthElementReverse sets the optional reverse attribute to value.
-//
-// value: When set to True, find the nth-largest value in the vector and vice
-// versa.
-// If not specified, defaults to false
-func NthElementReverse(value bool) NthElementAttr {
-	return func(m optionalAttr) {
-		m["reverse"] = value
-	}
-}
-
-// Finds values of the `n`-th order statistic for the last dimension.
-//
-// If the input is a vector (rank-1), finds the entries which is the nth-smallest
-// value in the vector and outputs their values as scalar tensor.
-//
-// For matrices (resp. higher rank input), computes the entries which is the
-// nth-smallest value in each row (resp. vector along the last dimension). Thus,
-//
-//     values.shape = input.shape[:-1]
-//
-// Arguments:
-//	input: 1-D or higher with last dimension at least `n+1`.
-//	n: 0-D. Position of sorted vector to select along the last dimension (along
-// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
-//
-// Returns The `n`-th order statistic along each last dimensional slice.
-func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "NthElement",
-		Input: []tf.Input{
-			input, n,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes asin of x element-wise.
-func Asin(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Asin",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Convert the quantized 'input' tensor into a lower-precision 'output', using the
-//
-// output range specified with 'requested_output_min' and 'requested_output_max'.
-//
-// [input_min, input_max] are scalar floats that specify the range for the float
-// interpretation of the 'input' data. For example, if input_min is -1.0f and
-// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
-//
-// Arguments:
-//
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//	requested_output_min: The float value that the minimum quantized output value represents.
-//	requested_output_max: The float value that the maximum quantized output value represents.
-//	out_type: The type of the output. Should be a lower bit depth than Tinput.
-//
-// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
-func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	opspec := tf.OpSpec{
-		Type: "Requantize",
-		Input: []tf.Input{
-			input, input_min, input_max, requested_output_min, requested_output_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// ArgMinAttr is an optional argument to ArgMin.
-type ArgMinAttr func(optionalAttr)
-
-// ArgMinOutputType sets the optional output_type attribute to value.
-// If not specified, defaults to DT_INT64
-func ArgMinOutputType(value tf.DataType) ArgMinAttr {
+// ArgMinOutputType sets the optional output_type attribute to value.
+// If not specified, defaults to DT_INT64
+func ArgMinOutputType(value tf.DataType) ArgMinAttr {
 	return func(m optionalAttr) {
 		m["output_type"] = value
 	}
@@ -25595,30 +26049,6 @@ func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAt
 	return op.Output(0)
 }
 
-// Computes the power of one value to another.
-//
-// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
-// corresponding elements in `x` and `y`. For example:
-//
-// ```
-// # tensor 'x' is [[2, 2]], [3, 3]]
-// # tensor 'y' is [[8, 16], [2, 3]]
-// tf.pow(x, y) ==> [[256, 65536], [9, 27]]
-// ```
-func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Pow",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Compute the upper regularized incomplete Gamma function `Q(a, x)`.
 //
 // The upper regularized incomplete Gamma function is defined as:
@@ -25997,256 +26427,491 @@ func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.
 	return op.Output(0)
 }
 
-// ImageSummaryAttr is an optional argument to ImageSummary.
-type ImageSummaryAttr func(optionalAttr)
-
-// ImageSummaryMaxImages sets the optional max_images attribute to value.
+// Bucketizes 'input' based on 'boundaries'.
 //
-// value: Max number of batch elements to generate images for.
-// If not specified, defaults to 3
+// For example, if the inputs are
+//     boundaries = [0, 10, 100]
+//     input = [[-5, 10000]
+//              [150,   10]
+//              [5,    100]]
 //
-// REQUIRES: value >= 1
-func ImageSummaryMaxImages(value int64) ImageSummaryAttr {
-	return func(m optionalAttr) {
-		m["max_images"] = value
-	}
-}
-
-// ImageSummaryBadColor sets the optional bad_color attribute to value.
+// then the output will be
+//     output = [[0, 3]
+//               [3, 2]
+//               [1, 3]]
 //
-// value: Color to use for pixels with non-finite values.
-// If not specified, defaults to <dtype:DT_UINT8 tensor_shape:<dim:<size:4 > > int_val:255 int_val:0 int_val:0 int_val:255 >
-func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
-	return func(m optionalAttr) {
-		m["bad_color"] = value
+// Arguments:
+//	input: Any shape of Tensor contains with int or float type.
+//	boundaries: A sorted list of floats gives the boundary of the buckets.
+//
+// Returns Same shape with 'input', each value of input replaced with bucket index.
+//
+// @compatibility(numpy)
+// Equivalent to np.digitize.
+// @end_compatibility
+func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	attrs := map[string]interface{}{"boundaries": boundaries}
+	opspec := tf.OpSpec{
+		Type: "Bucketize",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Outputs a `Summary` protocol buffer with images.
+// Reshapes a SparseTensor to represent values in a new dense shape.
 //
-// The summary has up to `max_images` summary values containing images. The
-// images are built from `tensor` which must be 4-D with shape `[batch_size,
-// height, width, channels]` and where `channels` can be:
+// This operation has the same semantics as reshape on the represented dense
+// tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
 //
-// *  1: `tensor` is interpreted as Grayscale.
-// *  3: `tensor` is interpreted as RGB.
-// *  4: `tensor` is interpreted as RGBA.
+// If one component of `new_shape` is the special value -1, the size of that
+// dimension is computed so that the total dense size remains constant.  At
+// most one component of `new_shape` can be -1.  The number of dense elements
+// implied by `new_shape` must be the same as the number of dense elements
+// originally implied by `input_shape`.
 //
-// The images have the same number of channels as the input tensor. For float
-// input, the values are normalized one image at a time to fit in the range
-// `[0, 255]`.  `uint8` values are unchanged.  The op uses two different
-// normalization algorithms:
+// Reshaping does not affect the order of values in the SparseTensor.
 //
-// *  If the input values are all positive, they are rescaled so the largest one
-//    is 255.
+// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
+// has length `R_out`, then `input_indices` has shape `[N, R_in]`,
+// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
+// `output_shape` has length `R_out`.
 //
-// *  If any input value is negative, the values are shifted so input value 0.0
-//    is at 127.  They are then rescaled so that either the smallest value is 0,
-//    or the largest one is 255.
+// Arguments:
+//	input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
+// SparseTensor.
+//	input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
+//	new_shape: 1-D.  `R_out` vector with the requested new dense shape.
 //
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
+// Returns 2-D.  `N x R_out` matrix with the updated indices of non-empty
+// values in the output SparseTensor.1-D.  `R_out` vector with the full dense shape of the output
+// SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
+// filled in.
+func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReshape",
+		Input: []tf.Input{
+			input_indices, input_shape, new_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Computes the product along segments of a tensor.
 //
-// *  If `max_images` is 1, the summary value tag is '*tag*/image'.
-// *  If `max_images` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
-// The `bad_color` argument is the color to use in the generated images for
-// non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
-// Each element must be in the range `[0, 255]` (It represents the value of a
-// pixel in the output image).  Non-finite values in the input tensor are
-// replaced by this tensor in the output image.  The default value is the color
-// red.
+// Computes a tensor such that
+// \\(output_i = \prod_j data_j\\) where the product is over `j` such
+// that `segment_ids[j] == i`.
+//
+// If the product is empty for a given segment ID `i`, `output[i] = 1`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
+// </div>
 //
 // Arguments:
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 4-D of shape `[batch_size, height, width, channels]` where
-// `channels` is 1, 3, or 4.
 //
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) {
+//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
+	opspec := tf.OpSpec{
+		Type: "SegmentProd",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along segments of a tensor.
+//
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
+//
+// Computes a tensor such that
+// `(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such
+// that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
+// need not be sorted and need not cover all values in the full
+// range of valid values.
+//
+// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+// If the given segment ID `i` is negative, the value is dropped and will not be
+// added to the sum of the segment.
+//
+// `num_segments` should equal the number of distinct segment IDs.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentSum.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
+//
+//
+// Returns Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
+func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ImageSummary",
+		Type: "UnsortedSegmentSum",
 		Input: []tf.Input{
-			tag, tensor,
+			data, segment_ids, num_segments,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Bucketizes 'input' based on 'boundaries'.
+// Computes hyperbolic sine of x element-wise.
+func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sinh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along sparse segments of a tensor.
 //
-// For example, if the inputs are
-//     boundaries = [0, 10, 100]
-//     input = [[-5, 10000]
-//              [150,   10]
-//              [5,    100]]
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
-// then the output will be
-//     output = [[0, 3]
-//               [3, 2]
-//               [1, 3]]
+// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
+// dimension, selecting a subset of dimension 0, specified by `indices`.
+//
+// For example:
+//
+// ```python
+// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+//
+// # Select two rows, one segment.
+// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
+// # => [[0 0 0 0]]
+//
+// # Select two rows, two segment.
+// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
+// # => [[ 1  2  3  4]
+// #     [-1 -2 -3 -4]]
+//
+// # Select all rows, two segments.
+// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+// # => [[0 0 0 0]
+// #     [5 6 7 8]]
+//
+// # Which is equivalent to:
+// tf.segment_sum(c, tf.constant([0, 0, 1]))
+// ```
 //
 // Arguments:
-//	input: Any shape of Tensor contains with int or float type.
-//	boundaries: A sorted list of floats gives the boundary of the buckets.
 //
-// Returns Same shape with 'input', each value of input replaced with bucket index.
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
 //
-// @compatibility(numpy)
-// Equivalent to np.digitize.
-// @end_compatibility
-func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"boundaries": boundaries}
 	opspec := tf.OpSpec{
-		Type: "Bucketize",
+		Type: "SparseSegmentSum",
 		Input: []tf.Input{
-			input,
+			data, indices, segment_ids,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Reshapes a SparseTensor to represent values in a new dense shape.
+// Counts the number of occurrences of each value in an integer array.
 //
-// This operation has the same semantics as reshape on the represented dense
-// tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
+// Outputs a vector with length `size` and the same dtype as `weights`. If
+// `weights` are empty, then index `i` stores the number of times the value `i` is
+// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+// the value in `weights` at each index where the corresponding value in `arr` is
+// `i`.
 //
-// If one component of `new_shape` is the special value -1, the size of that
-// dimension is computed so that the total dense size remains constant.  At
-// most one component of `new_shape` can be -1.  The number of dense elements
-// implied by `new_shape` must be the same as the number of dense elements
-// originally implied by `input_shape`.
+// Values in `arr` outside of the range [0, size) are ignored.
 //
-// Reshaping does not affect the order of values in the SparseTensor.
+// Arguments:
+//	arr: int32 `Tensor`.
+//	size: non-negative int32 scalar `Tensor`.
+//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
+// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
+// equal to 1.
 //
-// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
-// has length `R_out`, then `input_indices` has shape `[N, R_in]`,
-// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
-// `output_shape` has length `R_out`.
+// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for
+// each value in the range [0, size).
+func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Bincount",
+		Input: []tf.Input{
+			arr, size, weights,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BatchToSpace for 4-D tensors of type T.
+//
+// This is a legacy version of the more general BatchToSpaceND.
+//
+// Rearranges (permutes) data from batch into blocks of spatial data, followed by
+// cropping. This is the reverse transformation of SpaceToBatch. More specifically,
+// this op outputs a copy of the input tensor where values from the `batch`
+// dimension are moved in spatial blocks to the `height` and `width` dimensions,
+// followed by cropping along the `height` and `width` dimensions.
 //
 // Arguments:
-//	input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
-// SparseTensor.
-//	input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
-//	new_shape: 1-D.  `R_out` vector with the requested new dense shape.
+//	input: 4-D tensor with shape
+// `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
+//   depth]`. Note that the batch size of the input tensor must be divisible by
+// `block_size * block_size`.
+//	crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
+// how many elements to crop from the intermediate result across the spatial
+// dimensions as follows:
 //
-// Returns 2-D.  `N x R_out` matrix with the updated indices of non-empty
-// values in the output SparseTensor.1-D.  `R_out` vector with the full dense shape of the output
-// SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
-// filled in.
-func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) {
+//     crops = [[crop_top, crop_bottom], [crop_left, crop_right]]
+//
+//
+// Returns 4-D with shape `[batch, height, width, depth]`, where:
+//
+//       height = height_pad - crop_top - crop_bottom
+//       width = width_pad - crop_left - crop_right
+//
+// The attr `block_size` must be greater than one. It indicates the block size.
+//
+// Some examples:
+//
+// (1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2:
+//
+// ```
+// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+// ```
+//
+// The output tensor has shape `[1, 2, 2, 1]` and value:
+//
+// ```
+// x = [[[[1], [2]], [[3], [4]]]]
+// ```
+//
+// (2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2:
+//
+// ```
+// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+// ```
+//
+// The output tensor has shape `[1, 2, 2, 3]` and value:
+//
+// ```
+// x = [[[[1, 2, 3], [4, 5, 6]],
+//       [[7, 8, 9], [10, 11, 12]]]]
+// ```
+//
+// (3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2:
+//
+// ```
+// x = [[[[1], [3]], [[9], [11]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
+//
+// The output tensor has shape `[1, 4, 4, 1]` and value:
+//
+// ```
+// x = [[[1],   [2],  [3],  [4]],
+//      [[5],   [6],  [7],  [8]],
+//      [[9],  [10], [11],  [12]],
+//      [[13], [14], [15],  [16]]]
+// ```
+//
+// (4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2:
+//
+// ```
+// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
+//      [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
+// ```
+//
+// The output tensor has shape `[2, 2, 4, 1]` and value:
+//
+// ```
+// x = [[[[1], [3]], [[5], [7]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
+func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int64) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"block_size": block_size}
 	opspec := tf.OpSpec{
-		Type: "SparseReshape",
+		Type: "BatchToSpace",
 		Input: []tf.Input{
-			input_indices, input_shape, new_shape,
+			input, crops,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// Computes the product along segments of a tensor.
+// SparseToDenseAttr is an optional argument to SparseToDense.
+type SparseToDenseAttr func(optionalAttr)
+
+// SparseToDenseValidateIndices sets the optional validate_indices attribute to value.
 //
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
+// value: If true, indices are checked to make sure they are sorted in
+// lexicographic order and that there are no repeats.
+// If not specified, defaults to true
+func SparseToDenseValidateIndices(value bool) SparseToDenseAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Converts a sparse representation into a dense tensor.
 //
-// Computes a tensor such that
-// \\(output_i = \prod_j data_j\\) where the product is over `j` such
-// that `segment_ids[j] == i`.
+// Builds an array `dense` with shape `output_shape` such that
+//
+// ```
+// # If sparse_indices is scalar
+// dense[i] = (i == sparse_indices ? sparse_values : default_value)
+//
+// # If sparse_indices is a vector, then for each i
+// dense[sparse_indices[i]] = sparse_values[i]
+//
+// # If sparse_indices is an n by d matrix, then for each i in [0, n)
+// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
+// ```
 //
-// If the product is empty for a given segment ID `i`, `output[i] = 1`.
+// All other values in `dense` are set to `default_value`.  If `sparse_values` is a
+// scalar, all sparse indices are set to this single value.
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
-// </div>
+// Indices should be sorted in lexicographic order, and indices must not
+// contain any repeats. If `validate_indices` is true, these properties
+// are checked during execution.
 //
 // Arguments:
+//	sparse_indices: 0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete
+// index where `sparse_values[i]` will be placed.
+//	output_shape: 1-D.  Shape of the dense output tensor.
+//	sparse_values: 1-D.  Values corresponding to each row of `sparse_indices`,
+// or a scalar value to be used for all sparse indices.
+//	default_value: Scalar value to set for indices not specified in
+// `sparse_indices`.
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+// Returns Dense output tensor of shape `output_shape`.
+func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SegmentProd",
+		Type: "SparseToDense",
 		Input: []tf.Input{
-			data, segment_ids,
+			sparse_indices, output_shape, sparse_values, default_value,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the sum along segments of a tensor.
-//
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
+// NthElementAttr is an optional argument to NthElement.
+type NthElementAttr func(optionalAttr)
+
+// NthElementReverse sets the optional reverse attribute to value.
 //
-// Computes a tensor such that
-// `(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such
-// that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
-// need not be sorted and need not cover all values in the full
-// range of valid values.
+// value: When set to True, find the nth-largest value in the vector and vice
+// versa.
+// If not specified, defaults to false
+func NthElementReverse(value bool) NthElementAttr {
+	return func(m optionalAttr) {
+		m["reverse"] = value
+	}
+}
+
+// Finds values of the `n`-th order statistic for the last dimension.
 //
-// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+// If the input is a vector (rank-1), finds the entries which is the nth-smallest
+// value in the vector and outputs their values as scalar tensor.
 //
-// `num_segments` should equal the number of distinct segment IDs.
+// For matrices (resp. higher rank input), computes the entries which is the
+// nth-smallest value in each row (resp. vector along the last dimension). Thus,
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentSum.png" alt>
-// </div>
+//     values.shape = input.shape[:-1]
 //
 // Arguments:
+//	input: 1-D or higher with last dimension at least `n+1`.
+//	n: 0-D. Position of sorted vector to select along the last dimension (along
+// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
 //
-//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
-//
-//
-// Returns Has same shape as data, except for the first `segment_ids.rank`
-// dimensions, which are replaced with a single dimension which has size
-// `num_segments`.
-func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+// Returns The `n`-th order statistic along each last dimensional slice.
+func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentSum",
+		Type: "NthElement",
 		Input: []tf.Input{
-			data, segment_ids, num_segments,
+			input, n,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes hyperbolic sine of x element-wise.
-func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
+// Computes asin of x element-wise.
+func Asin(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Sinh",
+		Type: "Asin",
 		Input: []tf.Input{
 			x,
 		},
@@ -26257,83 +26922,49 @@ func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
 
 // Computes the sum along sparse segments of a tensor.
 //
+// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
+//
 // Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
 // segments.
 //
-// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
-// dimension, selecting a subset of dimension 0, specified by `indices`.
-//
 // For example:
 //
 // ```python
 // c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
 //
-// # Select two rows, one segment.
-// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
-// # => [[0 0 0 0]]
-//
-// # Select two rows, two segment.
-// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
-// # => [[ 1  2  3  4]
-// #     [-1 -2 -3 -4]]
-//
-// # Select all rows, two segments.
-// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+// tf.sparse_segment_sum_with_num_segments(
+//     c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
 // # => [[0 0 0 0]
-// #     [5 6 7 8]]
+// #     [0 0 0 0]
+// #     [0 0 0 0]]
 //
-// # Which is equivalent to:
-// tf.segment_sum(c, tf.constant([0, 0, 1]))
+// tf.sparse_segment_sum_with_num_segments(c,
+//                                         tf.constant([0, 1]),
+//                                         tf.constant([0, 2],
+//                                         num_segments=4))
+// # => [[ 1  2  3  4]
+// #     [ 0  0  0  0]
+// #     [-1 -2 -3 -4]
+// #     [ 0  0  0  0]]
 // ```
 //
 // Arguments:
 //
 //	indices: A 1-D tensor. Has same rank as `segment_ids`.
 //	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
 //
 // Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentSum",
-		Input: []tf.Input{
-			data, indices, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Counts the number of occurrences of each value in an integer array.
-//
-// Outputs a vector with length `size` and the same dtype as `weights`. If
-// `weights` are empty, then index `i` stores the number of times the value `i` is
-// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
-// the value in `weights` at each index where the corresponding value in `arr` is
-// `i`.
-//
-// Values in `arr` outside of the range [0, size) are ignored.
-//
-// Arguments:
-//	arr: int32 `Tensor`.
-//	size: non-negative int32 scalar `Tensor`.
-//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
-// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
-// equal to 1.
-//
-// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for
-// each value in the range [0, size).
-func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) {
+// has size `num_segments`.
+func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Bincount",
+		Type: "SparseSegmentSumWithNumSegments",
 		Input: []tf.Input{
-			arr, size, weights,
+			data, indices, segment_ids, num_segments,
 		},
 	}
 	op := scope.AddOperation(opspec)
@@ -26440,6 +27071,68 @@ func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_
 	return op.Output(0)
 }
 
+// Computes the mean along sparse segments of a tensor.
+//
+// Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
+//
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
+//
+// Arguments:
+//
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
+//
+// Returns Has same shape as data, except for dimension 0 which has size
+// `num_segments`.
+func SparseSegmentMeanWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentMeanWithNumSegments",
+		Input: []tf.Input{
+			data, indices, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along sparse segments of a tensor divided by the sqrt of N.
+//
+// N is the size of the segment being reduced.
+//
+// Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
+//
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
+//
+// Arguments:
+//
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentSqrtNWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentSqrtNWithNumSegments",
+		Input: []tf.Input{
+			data, indices, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Reshapes a quantized tensor as per the Reshape op.
 //
 // ```
@@ -27368,21 +28061,40 @@ func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_pa
 	return outputs
 }
 
-// Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object.
+// SerializeSparseAttr is an optional argument to SerializeSparse.
+type SerializeSparseAttr func(optionalAttr)
+
+// SerializeSparseOutType sets the optional out_type attribute to value.
+//
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Serialize a `SparseTensor` into a `[3]` `Tensor` object.
 //
 // Arguments:
 //	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
 //	sparse_values: 1-D.  The `values` of the `SparseTensor`.
 //	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
-func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) (serialized_sparse tf.Output) {
+func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
 		Type: "SerializeSparse",
 		Input: []tf.Input{
 			sparse_indices, sparse_values, sparse_shape,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -27408,48 +28120,3 @@ func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, val
 	}
 	return scope.AddOperation(opspec)
 }
-
-// Creates a dataset that asynchronously prefetches elements from `input_dataset`.
-//
-// Arguments:
-//
-//	buffer_size: The maximum number of elements to buffer in an iterator over
-// this dataset.
-//
-//
-func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "PrefetchDataset",
-		Input: []tf.Input{
-			input_dataset, buffer_size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Outputs a `Summary` protocol buffer with a tensor and per-plugin data.
-//
-// Arguments:
-//	tag: A string attached to this summary. Used for organization in TensorBoard.
-//	tensor: A tensor to serialize.
-//	serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin
-// data.
-func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorSummaryV2",
-		Input: []tf.Input{
-			tag, tensor, serialized_summary_metadata,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-- 
GitLab


From 04a14ef4f98ffa921095590d7b86490b3d2b19c6 Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Sat, 16 Dec 2017 10:09:18 -0800
Subject: [PATCH 1135/1225] Removes the TpuContext from non-TPU mode.

PiperOrigin-RevId: 179302096
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 6 +-----
 tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py | 7 +++++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index d66abd7b66..6bf11e1ae5 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -32,7 +32,6 @@ from tensorflow.contrib.tpu.python.ops import tpu_ops
 from tensorflow.contrib.tpu.python.tpu import tpu
 from tensorflow.contrib.tpu.python.tpu import tpu_config
 from tensorflow.contrib.tpu.python.tpu import tpu_feed
-from tensorflow.contrib.tpu.python.tpu import tpu_function
 from tensorflow.contrib.tpu.python.tpu import training_loop
 from tensorflow.contrib.tpu.python.tpu import util as util_lib
 
@@ -1012,10 +1011,7 @@ class _ModelFnWrapper(object):
     self._ctx = ctx
 
   def call_without_tpu(self, features, labels):
-    # Let CrossShardOptimizer be called without TPU in model_fn, since it's
-    # common to set the train_op even when running evaluate() or predict().
-    with tpu_function.tpu_shard_context(1):
-      return self._call_model_fn(features, labels)
+    return self._call_model_fn(features, labels)
 
   def convert_to_single_tpu_train_step(self, dequeue_fn):
     """Converts user provided model_fn` as a single train step on TPU.
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py b/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py
index a00fd1d086..e76cf83e4d 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 from tensorflow.contrib.tpu.python.ops import tpu_ops
 from tensorflow.contrib.tpu.python.tpu import tpu_function
 from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import optimizer
 
 
@@ -74,8 +75,10 @@ class CrossShardOptimizer(optimizer.Optimizer):
     """
     num_shards = tpu_function.get_tpu_context().number_of_shards
     if num_shards is None:
-      raise ValueError("CrossShardOptimizer must be used within a "
-                       "tpu_shard_context.")
+      logging.warning(
+          "CrossShardOptimizer should be used within a tpu_shard_context, but "
+          "got unset number_of_shards. Assuming 1.")
+      num_shards = 1
     if num_shards > 1 and self._reduction == losses.Reduction.MEAN:
       scale = 1.0 / num_shards
       loss *= scale
-- 
GitLab


From 2ea90304ffb2cd338b1dfc5a3e26a3373ce1fe98 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 16 Dec 2017 12:24:36 -0800
Subject: [PATCH 1136/1225] Updating sparsify gather to work with core
 estimators.

PiperOrigin-RevId: 179306398
---
 .../learn/utils/saved_model_export_utils.py   | 31 +++++++++++++------
 .../utils/saved_model_export_utils_test.py    | 18 ++++++-----
 .../tools/graph_transforms/sparsify_gather.cc |  5 ++-
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
index 6ffd2a1339..4b404a8e20 100644
--- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
+++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
@@ -33,7 +33,6 @@ from __future__ import division
 from __future__ import print_function
 
 import os
-import tempfile
 import time
 
 from tensorflow.contrib.layers.python.layers import feature_column
@@ -682,22 +681,36 @@ def extend_export_strategy(base_export_strategy,
       ValueError: If `estimator` is a ${tf.estimator.Estimator} instance
         and `default_output_alternative_key` was specified or if post_export_fn
         does not return a valid directory.
+      RuntimeError: If unable to create temporary or final export directory.
     """
-    tmp_base_export_dir = tempfile.mkdtemp()
+    tmp_base_export_folder = 'temp-base-export-' + str(int(time.time()))
+    tmp_base_export_dir = os.path.join(export_dir_base, tmp_base_export_folder)
+    if gfile.Exists(tmp_base_export_dir):
+      raise RuntimeError('Failed to obtain base export directory')
+    gfile.MakeDirs(tmp_base_export_dir)
     tmp_base_export = base_export_strategy.export(
         estimator, tmp_base_export_dir, checkpoint_path)
-    tmp_post_export_dir = tempfile.mkdtemp()
+
+    tmp_post_export_folder = 'temp-post-export-' + str(int(time.time()))
+    tmp_post_export_dir = os.path.join(export_dir_base, tmp_post_export_folder)
+    if gfile.Exists(tmp_post_export_dir):
+      raise RuntimeError('Failed to obtain temp export directory')
+
+    gfile.MakeDirs(tmp_post_export_dir)
     tmp_post_export = post_export_fn(tmp_base_export, tmp_post_export_dir)
 
     if not tmp_post_export.startswith(tmp_post_export_dir):
       raise ValueError('post_export_fn must return a sub-directory of {}'
                        .format(tmp_post_export_dir))
-    export_relpath = os.path.relpath(tmp_post_export, tmp_post_export_dir)
-
-    gfile.Rename(
-        os.path.join(tmp_post_export_dir, export_relpath),
-        os.path.join(export_dir_base, export_relpath))
-    return os.path.join(export_dir_base, export_relpath)
+    post_export_relpath = os.path.relpath(tmp_post_export, tmp_post_export_dir)
+    post_export = os.path.join(export_dir_base, post_export_relpath)
+    if gfile.Exists(post_export):
+      raise RuntimeError('Failed to obtain final export directory')
+    gfile.Rename(tmp_post_export, post_export)
+
+    gfile.DeleteRecursively(tmp_base_export_dir)
+    gfile.DeleteRecursively(tmp_post_export_dir)
+    return post_export
 
   name = post_export_name if post_export_name else base_export_strategy.name
   return export_strategy.ExportStrategy(name, export_fn)
diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
index ec3a88003f..628eb254c3 100644
--- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
@@ -766,10 +766,11 @@ class SavedModelExportUtilsTest(test.TestCase):
 
     test_estimator = TestEstimator()
     tmpdir = tempfile.mkdtemp()
-    final_path = final_export_strategy.export(test_estimator, tmpdir,
-                                              os.path.join(
-                                                  tmpdir, "checkpoint"))
-    self.assertEqual(os.path.join(tmpdir, "rewrite"), final_path)
+    export_model_dir = os.path.join(tmpdir, "model")
+    checkpoint_path = os.path.join(tmpdir, "checkpoint")
+    final_path = final_export_strategy.export(test_estimator, export_model_dir,
+                                              checkpoint_path)
+    self.assertEqual(os.path.join(export_model_dir, "rewrite"), final_path)
 
   def test_extend_export_strategy_same_name(self):
 
@@ -795,10 +796,11 @@ class SavedModelExportUtilsTest(test.TestCase):
 
     test_estimator = TestEstimator()
     tmpdir = tempfile.mkdtemp()
-    final_path = final_export_strategy.export(test_estimator, tmpdir,
-                                              os.path.join(
-                                                  tmpdir, "checkpoint"))
-    self.assertEqual(os.path.join(tmpdir, "rewrite"), final_path)
+    export_model_dir = os.path.join(tmpdir, "model")
+    checkpoint_path = os.path.join(tmpdir, "checkpoint")
+    final_path = final_export_strategy.export(test_estimator, export_model_dir,
+                                              checkpoint_path)
+    self.assertEqual(os.path.join(export_model_dir, "rewrite"), final_path)
 
   def test_extend_export_strategy_raises_error(self):
 
diff --git a/tensorflow/tools/graph_transforms/sparsify_gather.cc b/tensorflow/tools/graph_transforms/sparsify_gather.cc
index 20d443c7e9..96324d0dea 100644
--- a/tensorflow/tools/graph_transforms/sparsify_gather.cc
+++ b/tensorflow/tools/graph_transforms/sparsify_gather.cc
@@ -89,7 +89,10 @@ Status ObtainTensorSlice(const GraphDef& input_graph_def,
                          string* shape_slice_string) {
   string restore_node_name;
   for (const auto& node : input_graph_def.node()) {
-    if (StringPiece(node.name()).starts_with("save/Assign") &&
+    std::vector<string> node_name_parts = str_util::Split(node.name(), "/");
+    if (node_name_parts.size() == 2 &&
+        StringPiece(node_name_parts[0]).starts_with("save") &&
+        StringPiece(node_name_parts[1]).starts_with("Assign") &&
         node.input(0) == tensor_name) {
       restore_node_name = node.input(1);
       break;
-- 
GitLab


From 9383f0b4dd7d06cd4a54f8e4df07182a90530690 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Sat, 16 Dec 2017 13:21:31 -0800
Subject: [PATCH 1137/1225] First and second dimension should be reversed.

PiperOrigin-RevId: 179308396
---
 .../api_def_DataFormatVecPermute.pbtxt        |  8 ++--
 tensorflow/core/kernels/data_format_ops.cc    |  8 ++--
 tensorflow/core/kernels/data_format_ops.h     | 42 ++++++++++++-------
 tensorflow/core/ops/nn_ops.cc                 |  8 ++--
 tensorflow/python/ops/nn_test.py              | 13 +++---
 5 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt b/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
index d41f4df304..d87c088899 100644
--- a/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
@@ -3,13 +3,13 @@ op {
   in_arg {
     name: "x"
     description: <<END
-Vector of size 4 or Tensor of shape (2, 4) in source data format.
+Vector of size 4 or Tensor of shape (4, 2) in source data format.
 END
   }
   out_arg {
     name: "y"
     description: <<END
-Vector of size 4 or Tensor of shape (2, 4) in destination data format.
+Vector of size 4 or Tensor of shape (4, 2) in destination data format.
 END
   }
   attr {
@@ -24,8 +24,8 @@ END
 destination data format.
 END
   }
-  summary: "Returns the permuted vector/tensor in the destination data format given the one in"
+  summary: "Returns the permuted vector/tensor in the destination data format given the"
   description: <<END
-the source data format.
+one in the source data format.
 END
 }
diff --git a/tensorflow/core/kernels/data_format_ops.cc b/tensorflow/core/kernels/data_format_ops.cc
index 7a0b44dfe7..e32d6545b8 100644
--- a/tensorflow/core/kernels/data_format_ops.cc
+++ b/tensorflow/core/kernels/data_format_ops.cc
@@ -95,14 +95,14 @@ class DataFormatVecPermuteOp : public OpKernel {
                                   input.shape().DebugString()));
     } else if (input.dims() == 2) {
       OP_REQUIRES(
-          context, input.dim_size(0) == 2,
+          context, input.dim_size(0) == 4,
           errors::InvalidArgument(
-              "First dimension of 2D input must be of size 2, but got shape ",
+              "First dimension of 2D input must be of size 4, but got shape ",
               input.shape().DebugString()));
       OP_REQUIRES(
-          context, input.dim_size(1) == 4,
+          context, input.dim_size(1) == 2,
           errors::InvalidArgument(
-              "Second dimension of 2D input must be of size 4, but got shape ",
+              "Second dimension of 2D input must be of size 2, but got shape ",
               input.shape().DebugString()));
     }
 
diff --git a/tensorflow/core/kernels/data_format_ops.h b/tensorflow/core/kernels/data_format_ops.h
index d69f0326a4..01b7bff1eb 100644
--- a/tensorflow/core/kernels/data_format_ops.h
+++ b/tensorflow/core/kernels/data_format_ops.h
@@ -50,15 +50,20 @@ struct VecPermuteNHWCToNCHW {
   template <typename Output, typename Device>
   void eval(typename TTypes<T>::ConstFlat input, Output& output,
             const Device& d) const {
-    output.template chip<0>(0).device(d) = input.template chip<0>(0);
-    output.template chip<0>(1).device(d) = input.template chip<0>(3);
-    output.template chip<0>(2).device(d) = input.template chip<0>(1);
-    output.template chip<0>(3).device(d) = input.template chip<0>(2);
     if (input.size() == 8) {
-      output.template chip<0>(4).device(d) = input.template chip<0>(4);
-      output.template chip<0>(5).device(d) = input.template chip<0>(7);
-      output.template chip<0>(6).device(d) = input.template chip<0>(5);
-      output.template chip<0>(7).device(d) = input.template chip<0>(6);
+      output.template chip<0>(0).device(d) = input.template chip<0>(0);
+      output.template chip<0>(1).device(d) = input.template chip<0>(1);
+      output.template chip<0>(2).device(d) = input.template chip<0>(6);
+      output.template chip<0>(3).device(d) = input.template chip<0>(7);
+      output.template chip<0>(4).device(d) = input.template chip<0>(2);
+      output.template chip<0>(5).device(d) = input.template chip<0>(3);
+      output.template chip<0>(6).device(d) = input.template chip<0>(4);
+      output.template chip<0>(7).device(d) = input.template chip<0>(5);
+    } else {
+      output.template chip<0>(0).device(d) = input.template chip<0>(0);
+      output.template chip<0>(1).device(d) = input.template chip<0>(3);
+      output.template chip<0>(2).device(d) = input.template chip<0>(1);
+      output.template chip<0>(3).device(d) = input.template chip<0>(2);
     }
   }
 };
@@ -74,15 +79,20 @@ struct VecPermuteNCHWToNHWC {
   template <typename Output, typename Device>
   void eval(typename TTypes<T>::ConstFlat input, Output& output,
             const Device& d) const {
-    output.template chip<0>(0).device(d) = input.template chip<0>(0);
-    output.template chip<0>(1).device(d) = input.template chip<0>(2);
-    output.template chip<0>(2).device(d) = input.template chip<0>(3);
-    output.template chip<0>(3).device(d) = input.template chip<0>(1);
     if (input.size() == 8) {
-      output.template chip<0>(4).device(d) = input.template chip<0>(4);
-      output.template chip<0>(5).device(d) = input.template chip<0>(6);
-      output.template chip<0>(6).device(d) = input.template chip<0>(7);
-      output.template chip<0>(7).device(d) = input.template chip<0>(5);
+      output.template chip<0>(0).device(d) = input.template chip<0>(0);
+      output.template chip<0>(1).device(d) = input.template chip<0>(1);
+      output.template chip<0>(2).device(d) = input.template chip<0>(4);
+      output.template chip<0>(3).device(d) = input.template chip<0>(5);
+      output.template chip<0>(4).device(d) = input.template chip<0>(6);
+      output.template chip<0>(5).device(d) = input.template chip<0>(7);
+      output.template chip<0>(6).device(d) = input.template chip<0>(2);
+      output.template chip<0>(7).device(d) = input.template chip<0>(3);
+    } else {
+      output.template chip<0>(0).device(d) = input.template chip<0>(0);
+      output.template chip<0>(1).device(d) = input.template chip<0>(2);
+      output.template chip<0>(2).device(d) = input.template chip<0>(3);
+      output.template chip<0>(3).device(d) = input.template chip<0>(1);
     }
   }
 };
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 15122afd23..df2d4a7123 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -776,11 +776,11 @@ REGISTER_OP("DataFormatVecPermute")
     .Attr("dst_format: string = 'NCHW'")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
-Returns the permuted vector/tensor in the destination data format given the one in
-the source data format.
+Returns the permuted vector/tensor in the destination data format given the
+one in the source data format.
 
-x: Vector of size 4 or Tensor of shape (2, 4) in source data format.
-y: Vector of size 4 or Tensor of shape (2, 4) in destination data format.
+x: Vector of size 4 or Tensor of shape (4, 2) in source data format.
+y: Vector of size 4 or Tensor of shape (4, 2) in destination data format.
 src_format: source data format.
 dst_format: destination data format.
 )doc");
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index b87aef6585..d391e345fe 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -991,22 +991,21 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [7, 9, 3, 4])
 
-  def testNCHWToNHWC2D(self):
-    x_val = [[7, 4, 9, 3], [4, 3, 5, 1]]
+  def testNHWCToNCHW2D(self):
+    x_val = [[7, 4], [9, 3], [4, 5], [5, 1]]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x)
     with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
-      print(y_val)
-      self.assertAllEqual(y_val, [[7, 3, 4, 9], [4, 1, 3, 5]])
+      self.assertAllEqual(y_val, [[7, 4], [5, 1], [9, 3], [4, 5]])
 
-  def testNHWCToNCHW2D(self):
-    x_val = [[7, 4, 9, 3], [4, 3, 5, 1]]
+  def testNCHWToNHWC2D(self):
+    x_val = [[7, 4], [9, 3], [4, 5], [5, 1]]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x, src_format="NCHW", dst_format="NHWC")
     with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
-      self.assertAllEqual(y_val, [[7, 9, 3, 4], [4, 5, 1, 3]])
+      self.assertAllEqual(y_val, [[7, 4], [4, 5], [5, 1], [9, 3]])
 
 
 if __name__ == "__main__":
-- 
GitLab


From cdf7b94b9aee89812d55cc477bdcad1ff36c02fb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 16 Dec 2017 14:03:34 -0800
Subject: [PATCH 1138/1225] nit: it's --> its

PiperOrigin-RevId: 179309732
---
 tensorflow/python/saved_model/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/saved_model/README.md b/tensorflow/python/saved_model/README.md
index 8213e52ce9..8c78013ffd 100644
--- a/tensorflow/python/saved_model/README.md
+++ b/tensorflow/python/saved_model/README.md
@@ -93,7 +93,7 @@ with an asset of the same name, only the first version is retained.
 Each meta graph added to the SavedModel must be annotated with user specified
 tags. The tags provide a means to identify the specific meta graph to load and
 restore, along with the shared set of variables and assets. These tags
-typically annotate a MetaGraph with it's functionality (e.g. serving or
+typically annotate a MetaGraph with its functionality (e.g. serving or
 training), and possibly hardware specific aspects such as GPU.
 
 #### Usage
-- 
GitLab


From 845fe8d9b5d6ef89efe091b0ea4b1169d29d3d67 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Sat, 16 Dec 2017 14:21:18 -0800
Subject: [PATCH 1139/1225] Support non-const paddings.

PiperOrigin-RevId: 179310587
---
 .../grappler/optimizers/layout_optimizer.cc   | 29 +++-----
 .../python/grappler/layout_optimizer_test.py  | 70 +++++++++++++++++++
 2 files changed, 78 insertions(+), 21 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index cb848dfce5..74c0a14d67 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -1205,28 +1205,15 @@ class PadProcessor : public AgnosticNodeProcessor {
       : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
-  bool ShouldProcess() const override {
-    return !MustPreserve() && IsPortZeroDimsFour(*node_) && HasOutputs() &&
-           IsNodeAfterNCHWToNHWC() && PaddingSupported() && IsOnGPU();
-  }
-  Status CustomizedProcessing() override { return UpdateAttrValueOfInput(1); }
-
- private:
-  bool PaddingSupported() const {
-    auto pad_const = node_map_->GetNode(node_->input(1));
-    bool is_const = IsConstant(*pad_const);
-    bool is_4D = false;
-    if (HasAttribute(*pad_const, "value").ok()) {
-      Tensor tensor;
-      if (tensor.FromProto(pad_const->mutable_attr()->at({"value"}).tensor())) {
-        if (tensor.dims() == 2) {
-          if (tensor.dim_size(0) == 4 && tensor.dim_size(1) == 2) {
-            is_4D = true;
-          }
-        }
-      }
+  Status CustomizedProcessing() override {
+    auto index_node = node_map_->GetNode(node_->input(1));
+    if (IsConstant(*index_node)) {
+      TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(1));
+    } else {
+      DataType dtype = node_->attr().at("Tpaddings").type();
+      AddDataFormatTranformToInput("DataFormatVecPermute", 1, dtype);
     }
-    return is_const && is_4D;
+    return Status::OK();
   }
 };
 
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index d9b979e623..d677385ebe 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -279,6 +279,76 @@ class LayoutOptimizerTest(test.TestCase):
       self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_split_0', nodes)
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
+  def testPadWithConstPaddings(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
+      paddings = constant_op.constant(
+          paddings_val, dtype='int32', name='PaddingsConst')
+      pad = array_ops.pad(conv, paddings)
+      output = array_ops.identity(pad)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if node.name.startswith('LayoutOptimizerTranspose'):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Pad-0-0', nodes)
+      self.assertIn('LayoutOptimizer-Pad-PaddingsConst', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testPadWithNonConstPaddings(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      paddings = array_ops.placeholder(dtype='int32')
+      pad = array_ops.pad(conv, paddings)
+      output = array_ops.identity(pad)
+
+      paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={paddings: paddings_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                paddings: paddings_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if node.name.startswith('LayoutOptimizerTranspose'):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
+      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Pad-0-0', nodes)
+      self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_Pad_1', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
   def testSliceWithNonConstAxis(self):
     if test.is_gpu_available(cuda_only=True):
       random_seed.set_random_seed(0)
-- 
GitLab


From b15e4ed61efdccc349ebf596c23e2aa465c6f908 Mon Sep 17 00:00:00 2001
From: Dustin Tran <trandustin@google.com>
Date: Sat, 16 Dec 2017 14:29:08 -0800
Subject: [PATCH 1140/1225] Add DenseFlipout probabilistic layer.

PiperOrigin-RevId: 179310861
---
 .../layers_dense_variational_test.py          | 311 +++++--
 .../contrib/bayesflow/python/ops/layers.py    |   8 +-
 .../ops/layers_dense_variational_impl.py      | 841 ++++++++++++++++--
 3 files changed, 987 insertions(+), 173 deletions(-)

diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
index 7b5b2fec1e..5371e912ed 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
@@ -18,12 +18,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.contrib.bayesflow.python.ops import layers_dense_variational_impl as prob_layers_lib
 from tensorflow.contrib.distributions.python.ops import independent as independent_lib
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.ops.distributions import util as distribution_util
 from tensorflow.python.platform import test
 
 
@@ -43,7 +48,7 @@ class Counter(object):
 
 
 class MockDistribution(independent_lib.Independent):
-  """Monitors DenseVariational calls to the underlying distribution."""
+  """Monitors layer calls to the underlying distribution."""
 
   def __init__(self, result_sample, result_log_prob, loc=None, scale=None):
     self.result_sample = result_sample
@@ -83,7 +88,7 @@ class MockDistribution(independent_lib.Independent):
 
 
 class MockKLDivergence(object):
-  """Monitors DenseVariational calls to the divergence implementation."""
+  """Monitors layer calls to the divergence implementation."""
 
   def __init__(self, result):
     self.result = result
@@ -96,31 +101,31 @@ class MockKLDivergence(object):
     return self.result
 
 
-class DenseVariationalLocalReparametrization(test.TestCase):
+class DenseVariational(test.TestCase):
 
-  def testKLPenaltyKernel(self):
+  def _testKLPenaltyKernel(self, layer_class):
     with self.test_session():
-      dense_vi = prob_layers_lib.DenseVariational(units=2)
+      layer = layer_class(units=2)
       inputs = random_ops.random_uniform([2, 3], seed=1)
 
       # No keys.
       losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
       self.assertEqual(len(losses), 0)
-      self.assertListEqual(dense_vi.losses, losses)
+      self.assertListEqual(layer.losses, losses)
 
-      _ = dense_vi(inputs)
+      _ = layer(inputs)
 
       # Yes keys.
       losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
       self.assertEqual(len(losses), 1)
-      self.assertListEqual(dense_vi.losses, losses)
+      self.assertListEqual(layer.losses, losses)
 
-  def testKLPenaltyBoth(self):
+  def _testKLPenaltyBoth(self, layer_class):
     def _make_normal(dtype, *args):  # pylint: disable=unused-argument
       return normal_lib.Normal(
           loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.))
     with self.test_session():
-      dense_vi = prob_layers_lib.DenseVariational(
+      layer = layer_class(
           units=2,
           bias_posterior_fn=prob_layers_lib.default_mean_field_normal_fn(),
           bias_prior_fn=_make_normal)
@@ -129,61 +134,92 @@ class DenseVariationalLocalReparametrization(test.TestCase):
       # No keys.
       losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
       self.assertEqual(len(losses), 0)
-      self.assertListEqual(dense_vi.losses, losses)
+      self.assertListEqual(layer.losses, losses)
 
-      _ = dense_vi(inputs)
+      _ = layer(inputs)
 
       # Yes keys.
       losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
       self.assertEqual(len(losses), 2)
-      self.assertListEqual(dense_vi.losses, losses)
-
-  def testVariationalNonLocal(self):
+      self.assertListEqual(layer.losses, losses)
+
+  def _testDenseSetUp(self, layer_class, batch_size, in_size, out_size,
+                      **kwargs):
+    seed = Counter()
+    inputs = random_ops.random_uniform([batch_size, in_size], seed=seed())
+
+    kernel_size = [in_size, out_size]
+    kernel_posterior = MockDistribution(
+        loc=random_ops.random_uniform(kernel_size, seed=seed()),
+        scale=random_ops.random_uniform(kernel_size, seed=seed()),
+        result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
+        result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
+    kernel_prior = MockDistribution(
+        result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
+        result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
+    kernel_divergence = MockKLDivergence(
+        result=random_ops.random_uniform(kernel_size, seed=seed()))
+
+    bias_size = [out_size]
+    bias_posterior = MockDistribution(
+        result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
+        result_sample=random_ops.random_uniform(bias_size, seed=seed()))
+    bias_prior = MockDistribution(
+        result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
+        result_sample=random_ops.random_uniform(bias_size, seed=seed()))
+    bias_divergence = MockKLDivergence(
+        result=random_ops.random_uniform(bias_size, seed=seed()))
+
+    layer = layer_class(
+        units=out_size,
+        kernel_posterior_fn=lambda *args: kernel_posterior,
+        kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
+        kernel_prior_fn=lambda *args: kernel_prior,
+        kernel_divergence_fn=kernel_divergence,
+        bias_posterior_fn=lambda *args: bias_posterior,
+        bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
+        bias_prior_fn=lambda *args: bias_prior,
+        bias_divergence_fn=bias_divergence,
+        **kwargs)
+
+    outputs = layer(inputs)
+
+    kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+    return (kernel_posterior, kernel_prior, kernel_divergence,
+            bias_posterior, bias_prior, bias_divergence,
+            layer, inputs, outputs, kl_penalty)
+
+  def testKLPenaltyKernelReparameterization(self):
+    self._testKLPenaltyKernel(prob_layers_lib.DenseReparameterization)
+
+  def testKLPenaltyKernelLocalReparameterization(self):
+    self._testKLPenaltyKernel(prob_layers_lib.DenseLocalReparameterization)
+
+  def testKLPenaltyKernelFlipout(self):
+    self._testKLPenaltyKernel(prob_layers_lib.DenseFlipout)
+
+  def testKLPenaltyBothReparameterization(self):
+    self._testKLPenaltyBoth(prob_layers_lib.DenseReparameterization)
+
+  def testKLPenaltyBothLocalReparameterization(self):
+    self._testKLPenaltyBoth(prob_layers_lib.DenseLocalReparameterization)
+
+  def testKLPenaltyBothFlipout(self):
+    self._testKLPenaltyBoth(prob_layers_lib.DenseFlipout)
+
+  def testDenseReparameterization(self):
     batch_size, in_size, out_size = 2, 3, 4
     with self.test_session() as sess:
-      seed = Counter()
-      inputs = random_ops.random_uniform([batch_size, in_size], seed=seed())
-
-      kernel_size = [in_size, out_size]
-      kernel_posterior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
-          result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
-      kernel_prior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
-          result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
-      kernel_divergence = MockKLDivergence(
-          result=random_ops.random_uniform(kernel_size, seed=seed()))
-
-      bias_size = [out_size]
-      bias_posterior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
-          result_sample=random_ops.random_uniform(bias_size, seed=seed()))
-      bias_prior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
-          result_sample=random_ops.random_uniform(bias_size, seed=seed()))
-      bias_divergence = MockKLDivergence(
-          result=random_ops.random_uniform(bias_size, seed=seed()))
+      (kernel_posterior, kernel_prior, kernel_divergence,
+       bias_posterior, bias_prior, bias_divergence, layer, inputs,
+       outputs, kl_penalty) = self._testDenseSetUp(
+           prob_layers_lib.DenseReparameterization,
+           batch_size, in_size, out_size)
 
       expected_outputs = (
           math_ops.matmul(inputs, kernel_posterior.result_sample) +
           bias_posterior.result_sample)
 
-      dense_vi = prob_layers_lib.DenseVariational(
-          units=2,
-          kernel_use_local_reparameterization=False,
-          kernel_posterior_fn=lambda *args: kernel_posterior,
-          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
-          kernel_prior_fn=lambda *args: kernel_prior,
-          kernel_divergence_fn=kernel_divergence,
-          bias_posterior_fn=lambda *args: bias_posterior,
-          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
-          bias_prior_fn=lambda *args: bias_prior,
-          bias_divergence_fn=bias_divergence)
-
-      outputs = dense_vi(inputs)
-
-      kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-
       [
           expected_outputs_, actual_outputs_,
           expected_kernel_, actual_kernel_,
@@ -192,9 +228,9 @@ class DenseVariationalLocalReparametrization(test.TestCase):
           expected_bias_divergence_, actual_bias_divergence_,
       ] = sess.run([
           expected_outputs, outputs,
-          kernel_posterior.result_sample, dense_vi.kernel_posterior_tensor,
+          kernel_posterior.result_sample, layer.kernel_posterior_tensor,
           kernel_divergence.result, kl_penalty[0],
-          bias_posterior.result_sample, dense_vi.bias_posterior_tensor,
+          bias_posterior.result_sample, layer.bias_posterior_tensor,
           bias_divergence.result, kl_penalty[1],
       ])
 
@@ -226,33 +262,14 @@ class DenseVariationalLocalReparametrization(test.TestCase):
             bias_posterior.result_sample]],
           bias_divergence.args)
 
-  def testVariationalLocal(self):
+  def testDenseLocalReparameterization(self):
     batch_size, in_size, out_size = 2, 3, 4
     with self.test_session() as sess:
-      seed = Counter()
-      inputs = random_ops.random_uniform([batch_size, in_size], seed=seed())
-
-      kernel_size = [in_size, out_size]
-      kernel_posterior = MockDistribution(
-          loc=random_ops.random_uniform(kernel_size, seed=seed()),
-          scale=random_ops.random_uniform(kernel_size, seed=seed()),
-          result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
-          result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
-      kernel_prior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
-          result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
-      kernel_divergence = MockKLDivergence(
-          result=random_ops.random_uniform(kernel_size, seed=seed()))
-
-      bias_size = [out_size]
-      bias_posterior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
-          result_sample=random_ops.random_uniform(bias_size, seed=seed()))
-      bias_prior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
-          result_sample=random_ops.random_uniform(bias_size, seed=seed()))
-      bias_divergence = MockKLDivergence(
-          result=random_ops.random_uniform(bias_size, seed=seed()))
+      (kernel_posterior, kernel_prior, kernel_divergence,
+       bias_posterior, bias_prior, bias_divergence, layer, inputs,
+       outputs, kl_penalty) = self._testDenseSetUp(
+           prob_layers_lib.DenseLocalReparameterization,
+           batch_size, in_size, out_size)
 
       expected_kernel_posterior_affine = normal_lib.Normal(
           loc=math_ops.matmul(inputs, kernel_posterior.result_loc),
@@ -263,21 +280,80 @@ class DenseVariationalLocalReparametrization(test.TestCase):
       expected_outputs = (expected_kernel_posterior_affine_tensor +
                           bias_posterior.result_sample)
 
-      dense_vi = prob_layers_lib.DenseVariational(
-          units=2,
-          kernel_use_local_reparameterization=True,
-          kernel_posterior_fn=lambda *args: kernel_posterior,
-          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
-          kernel_prior_fn=lambda *args: kernel_prior,
-          kernel_divergence_fn=kernel_divergence,
-          bias_posterior_fn=lambda *args: bias_posterior,
-          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
-          bias_prior_fn=lambda *args: bias_prior,
-          bias_divergence_fn=bias_divergence)
+      [
+          expected_outputs_, actual_outputs_,
+          expected_kernel_divergence_, actual_kernel_divergence_,
+          expected_bias_, actual_bias_,
+          expected_bias_divergence_, actual_bias_divergence_,
+      ] = sess.run([
+          expected_outputs, outputs,
+          kernel_divergence.result, kl_penalty[0],
+          bias_posterior.result_sample, layer.bias_posterior_tensor,
+          bias_divergence.result, kl_penalty[1],
+      ])
+
+      self.assertAllClose(
+          expected_bias_, actual_bias_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_outputs_, actual_outputs_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_kernel_divergence_, actual_kernel_divergence_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_bias_divergence_, actual_bias_divergence_,
+          rtol=1e-6, atol=0.)
+
+      self.assertAllEqual(
+          [[kernel_posterior.distribution,
+            kernel_prior.distribution,
+            None]],
+          kernel_divergence.args)
 
-      outputs = dense_vi(inputs)
+      self.assertAllEqual(
+          [[bias_posterior.distribution,
+            bias_prior.distribution,
+            bias_posterior.result_sample]],
+          bias_divergence.args)
+
+  def testDenseFlipout(self):
+    batch_size, in_size, out_size = 2, 3, 4
+    with self.test_session() as sess:
+      (kernel_posterior, kernel_prior, kernel_divergence,
+       bias_posterior, bias_prior, bias_divergence, layer, inputs,
+       outputs, kl_penalty) = self._testDenseSetUp(
+           prob_layers_lib.DenseFlipout,
+           batch_size, in_size, out_size, seed=44)
+
+      expected_kernel_posterior_affine = normal_lib.Normal(
+          loc=array_ops.zeros_like(kernel_posterior.result_loc),
+          scale=kernel_posterior.result_scale)
+      expected_kernel_posterior_affine_tensor = (
+          expected_kernel_posterior_affine.sample(seed=42))
 
-      kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      sign_input = random_ops.random_uniform(
+          [batch_size, in_size],
+          minval=0,
+          maxval=2,
+          dtype=dtypes.int32,
+          seed=layer.seed)
+      sign_input = math_ops.cast(2 * sign_input - 1, inputs.dtype)
+      sign_output = random_ops.random_uniform(
+          [batch_size, out_size],
+          minval=0,
+          maxval=2,
+          dtype=dtypes.int32,
+          seed=distribution_util.gen_new_seed(
+              layer.seed, salt="conv_variational"))
+      sign_output = math_ops.cast(2 * sign_output - 1, inputs.dtype)
+      perturbed_inputs = math_ops.matmul(
+          inputs * sign_input, expected_kernel_posterior_affine_tensor)
+      perturbed_inputs *= sign_output
+
+      expected_outputs = math_ops.matmul(inputs, kernel_posterior.result_loc)
+      expected_outputs += perturbed_inputs
+      expected_outputs += bias_posterior.result_sample
 
       [
           expected_outputs_, actual_outputs_,
@@ -287,7 +363,7 @@ class DenseVariationalLocalReparametrization(test.TestCase):
       ] = sess.run([
           expected_outputs, outputs,
           kernel_divergence.result, kl_penalty[0],
-          bias_posterior.result_sample, dense_vi.bias_posterior_tensor,
+          bias_posterior.result_sample, layer.bias_posterior_tensor,
           bias_divergence.result, kl_penalty[1],
       ])
 
@@ -314,6 +390,53 @@ class DenseVariationalLocalReparametrization(test.TestCase):
             bias_posterior.result_sample]],
           bias_divergence.args)
 
+  def testRandomDenseFlipout(self):
+    batch_size, in_size, out_size = 2, 3, 4
+    with self.test_session() as sess:
+      seed = Counter()
+      inputs = random_ops.random_uniform([batch_size, in_size], seed=seed())
+
+      kernel_posterior = MockDistribution(
+          loc=random_ops.random_uniform(
+              [in_size, out_size], seed=seed()),
+          scale=random_ops.random_uniform(
+              [in_size, out_size], seed=seed()),
+          result_log_prob=random_ops.random_uniform(
+              [in_size, out_size], seed=seed()),
+          result_sample=random_ops.random_uniform(
+              [in_size, out_size], seed=seed()))
+      bias_posterior = MockDistribution(
+          loc=random_ops.random_uniform(
+              [out_size], seed=seed()),
+          scale=random_ops.random_uniform(
+              [out_size], seed=seed()),
+          result_log_prob=random_ops.random_uniform(
+              [out_size], seed=seed()),
+          result_sample=random_ops.random_uniform(
+              [out_size], seed=seed()))
+      layer_one = prob_layers_lib.DenseFlipout(
+          units=out_size,
+          kernel_posterior_fn=lambda *args: kernel_posterior,
+          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
+          bias_posterior_fn=lambda *args: bias_posterior,
+          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
+          seed=44)
+      layer_two = prob_layers_lib.DenseFlipout(
+          units=out_size,
+          kernel_posterior_fn=lambda *args: kernel_posterior,
+          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
+          bias_posterior_fn=lambda *args: bias_posterior,
+          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
+          seed=45)
+
+      outputs_one = layer_one(inputs)
+      outputs_two = layer_two(inputs)
+
+      outputs_one_, outputs_two_ = sess.run([
+          outputs_one, outputs_two])
+
+      self.assertLess(np.sum(np.isclose(outputs_one_, outputs_two_)), out_size)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers.py b/tensorflow/contrib/bayesflow/python/ops/layers.py
index dcead38af8..121f36ec4e 100644
--- a/tensorflow/contrib/bayesflow/python/ops/layers.py
+++ b/tensorflow/contrib/bayesflow/python/ops/layers.py
@@ -28,8 +28,12 @@ from tensorflow.contrib.bayesflow.python.ops.layers_dense_variational_impl impor
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
-    'DenseVariational',
-    'dense_variational',
+    'DenseReparameterization',
+    'DenseLocalReparameterization',
+    'DenseFlipout',
+    'dense_reparameterization',
+    'dense_local_reparameterization',
+    'dense_flipout',
     'default_loc_scale_fn',
     'default_mean_field_normal_fn',
 ]
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
index a3b22f334a..2a260405d0 100644
--- a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
+++ b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
@@ -14,8 +14,12 @@
 # ==============================================================================
 """Dense Bayesian layer using KL-divergence based variational inference.
 
-@@DenseVariational
-@@dense_variational
+@@DenseReparameterization
+@@DenseLocalReparameterization
+@@DenseFlipout
+@@dense_reparameterization
+@@dense_local_reparameterization
+@@dense_flipout
 
 @@default_loc_scale_fn
 @@default_mean_field_normal_fn
@@ -35,16 +39,23 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.layers import base as layers_lib
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import standard_ops
 from tensorflow.python.ops.distributions import kullback_leibler as kl_lib
 from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.ops.distributions import util as distribution_util
 
 
 __all__ = [
-    "DenseVariational",
-    "dense_variational",
+    "DenseReparameterization",
+    "DenseLocalReparameterization",
+    "DenseFlipout",
+    "dense_reparameterization",
+    "dense_local_reparameterization",
+    "dense_flipout",
     "default_loc_scale_fn",
     "default_mean_field_normal_fn",
 ]
@@ -201,8 +212,8 @@ def default_mean_field_normal_fn(
   return _fn
 
 
-class DenseVariational(layers_lib.Layer):
-  """Densely-connected variational class.
+class _DenseVariational(layers_lib.Layer):
+  """Abstract densely-connected class (private, used as implementation base).
 
   This layer implements the Bayesian variational inference analogue to
   a dense layer by assuming the `kernel` and/or the `bias` are drawn
@@ -225,10 +236,6 @@ class DenseVariational(layers_lib.Layer):
     activity_regularizer: Regularizer function for the output.
     trainable: Boolean, if `True` also add variables to the graph collection
       `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
-    kernel_use_local_reparameterization: Python `bool` indicating whether
-      `kernel` calculation should employ the Local Reparameterization Trick.
-      When `True`, `kernel_posterior_fn` must create an instance of
-      `tf.distributions.Normal`.
     kernel_posterior_fn: Python `callable` which creates
       `tf.distributions.Distribution` instance representing the surrogate
       posterior of the `kernel` parameter. Default value:
@@ -271,8 +278,6 @@ class DenseVariational(layers_lib.Layer):
     units: Python integer, dimensionality of the output space.
     activation: Activation function (`callable`).
     activity_regularizer: Regularizer function for the output.
-    kernel_use_local_reparameterization: Python `bool` indicating whether
-      `kernel` calculation should employ the Local Reparameterization Trick.
     kernel_posterior_fn: `callable` returning posterior.
     kernel_posterior_tensor_fn: `callable` operating on posterior.
     kernel_prior_fn: `callable` returning prior.
@@ -281,31 +286,6 @@ class DenseVariational(layers_lib.Layer):
     bias_posterior_tensor_fn: `callable` operating on posterior.
     bias_prior_fn: `callable` returning prior.
     bias_divergence_fn: `callable` returning divergence.
-
-  #### Examples
-
-  We illustrate a Bayesian neural network with [variational inference](
-  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
-  assuming a dataset of `features` and `labels`.
-
-  ```python
-  tfp = tf.contrib.bayesflow
-
-  net = tfp.layers.DenseVariational(512, activation=tf.nn.relu)(features)
-  logits = tfp.layers.DenseVariational(10)(net)
-  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
-      labels=labels, logits=logits)
-  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
-  loss = neg_log_likelihood + kl
-  train_op = tf.train.AdamOptimizer().minimize(loss)
-  ```
-
-  It uses reparameterization gradients to minimize the
-  Kullback-Leibler divergence up to a constant, also known as the
-  negative Evidence Lower Bound. It consists of the sum of two terms:
-  the expected negative log-likelihood, which we approximate via
-  Monte Carlo; and the KL divergence, which is added via regularizer
-  terms which are arguments to the layer.
   """
 
   def __init__(
@@ -314,7 +294,6 @@ class DenseVariational(layers_lib.Layer):
       activation=None,
       activity_regularizer=None,
       trainable=True,
-      kernel_use_local_reparameterization=True,
       kernel_posterior_fn=default_mean_field_normal_fn(),
       kernel_posterior_tensor_fn=lambda d: d.sample(),
       kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
@@ -326,7 +305,7 @@ class DenseVariational(layers_lib.Layer):
       bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
       name=None,
       **kwargs):
-    super(DenseVariational, self).__init__(
+    super(_DenseVariational, self).__init__(
         trainable=trainable,
         name=name,
         activity_regularizer=activity_regularizer,
@@ -334,8 +313,6 @@ class DenseVariational(layers_lib.Layer):
     self.units = units
     self.activation = activation
     self.input_spec = layers_lib.InputSpec(min_ndim=2)
-    self.kernel_use_local_reparameterization = (
-        kernel_use_local_reparameterization)
     self.kernel_posterior_fn = kernel_posterior_fn
     self.kernel_posterior_tensor_fn = kernel_posterior_tensor_fn
     self.kernel_prior_fn = kernel_prior_fn
@@ -419,30 +396,6 @@ class DenseVariational(layers_lib.Layer):
       self._built_bias_divergence = True
     return outputs
 
-  def _apply_variational_kernel(self, inputs):
-    if not self.kernel_use_local_reparameterization:
-      self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn(
-          self.kernel_posterior)
-      self.kernel_posterior_affine = None
-      self.kernel_posterior_affine_tensor = None
-      return self._matmul(inputs, self.kernel_posterior_tensor)
-    if (not isinstance(self.kernel_posterior, independent_lib.Independent) or
-        not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)):
-      raise TypeError(
-          "`kernel_use_local_reparameterization=True` requires "
-          "`kernel_posterior_fn` produce an instance of "
-          "`tf.distributions.Independent(tf.distributions.Normal)` "
-          "(saw: \"{}\").".format(type(self.kernel_posterior).__name__))
-    self.kernel_posterior_affine = normal_lib.Normal(
-        loc=self._matmul(inputs, self.kernel_posterior.distribution.loc),
-        scale=standard_ops.sqrt(self._matmul(
-            standard_ops.square(inputs),
-            standard_ops.square(self.kernel_posterior.distribution.scale))))
-    self.kernel_posterior_affine_tensor = (
-        self.kernel_posterior_tensor_fn(self.kernel_posterior_affine))
-    self.kernel_posterior_tensor = None
-    return self.kernel_posterior_affine_tensor
-
   def _apply_variational_bias(self, inputs):
     if self.bias_posterior is None:
       self.bias_posterior_tensor = None
@@ -479,13 +432,155 @@ class DenseVariational(layers_lib.Layer):
     return input_shape[:-1].concatenate(self.units)
 
 
-def dense_variational(
+class DenseReparameterization(_DenseVariational):
+  """Densely-connected layer class with reparameterization estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Properties:
+    units: Python integer, dimensionality of the output space.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.DenseReparameterization(
+      512, activation=tf.nn.relu)(features)
+  logits = tfp.layers.DenseReparameterization(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+
+  def __init__(
+      self,
+      units,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(DenseReparameterization, self).__init__(
+        units=units,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name,
+        **kwargs)
+
+  def _apply_variational_kernel(self, inputs):
+    self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn(
+        self.kernel_posterior)
+    self.kernel_posterior_affine = None
+    self.kernel_posterior_affine_tensor = None
+    return self._matmul(inputs, self.kernel_posterior_tensor)
+
+
+def dense_reparameterization(
     inputs,
     units,
     activation=None,
     activity_regularizer=None,
     trainable=True,
-    kernel_use_local_reparameterization=True,
     kernel_posterior_fn=default_mean_field_normal_fn(),
     kernel_posterior_tensor_fn=lambda d: d.sample(),
     kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
@@ -497,7 +592,7 @@ def dense_variational(
     bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
     name=None,
     reuse=None):
-  """Densely-connected variational layer.
+  """Densely-connected layer with reparameterization estimator.
 
   This layer implements the Bayesian variational inference analogue to
   a dense layer by assuming the `kernel` and/or the `bias` are drawn
@@ -521,10 +616,6 @@ def dense_variational(
     activity_regularizer: Regularizer function for the output.
     trainable: Boolean, if `True` also add variables to the graph collection
       `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
-    kernel_use_local_reparameterization: Python `bool` indicating whether
-      `kernel` calculation should employ the Local Reparameterization Trick.
-      When `True`, `kernel_posterior_fn` must create an instance of
-      `tf.distributions.Normal`.
     kernel_posterior_fn: Python `callable` which creates
       `tf.distributions.Distribution` instance representing the surrogate
       posterior of the `kernel` parameter. Default value:
@@ -576,8 +667,9 @@ def dense_variational(
   ```python
   tfp = tf.contrib.bayesflow
 
-  net = tfp.layers.dense_variational(features, 512, activation=tf.nn.relu)
-  logits = tfp.layers.dense_variational(net, 10)
+  net = tfp.layers.dense_reparameterization(
+      features, 512, activation=tf.nn.relu)
+  logits = tfp.layers.dense_reparameterization(net, 10)
   neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
       labels=labels, logits=logits)
   kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
@@ -592,13 +684,11 @@ def dense_variational(
   Monte Carlo; and the KL divergence, which is added via regularizer
   terms which are arguments to the layer.
   """
-  layer = DenseVariational(
+  layer = DenseReparameterization(
       units,
       activation=activation,
       activity_regularizer=activity_regularizer,
       trainable=trainable,
-      kernel_use_local_reparameterization=(
-          kernel_use_local_reparameterization),
       kernel_posterior_fn=kernel_posterior_fn,
       kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
       kernel_prior_fn=kernel_prior_fn,
@@ -612,3 +702,600 @@ def dense_variational(
       _scope=name,
       _reuse=reuse)
   return layer.apply(inputs)
+
+
+class DenseLocalReparameterization(_DenseVariational):
+  """Densely-connected layer class with local reparameterization estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Properties:
+    units: Python integer, dimensionality of the output space.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.DenseLocalReparameterization(
+      512, activation=tf.nn.relu)(features)
+  logits = tfp.layers.DenseLocalReparameterization(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses local reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+
+  def __init__(
+      self,
+      units,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(DenseLocalReparameterization, self).__init__(
+        units=units,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name,
+        **kwargs)
+
+  def _apply_variational_kernel(self, inputs):
+    if (not isinstance(self.kernel_posterior, independent_lib.Independent) or
+        not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)):
+      raise TypeError(
+          "`DenseLocalReparameterization` requires "
+          "`kernel_posterior_fn` produce an instance of "
+          "`tf.distributions.Independent(tf.distributions.Normal)` "
+          "(saw: \"{}\").".format(type(self.kernel_posterior).__name__))
+    self.kernel_posterior_affine = normal_lib.Normal(
+        loc=self._matmul(inputs, self.kernel_posterior.distribution.loc),
+        scale=standard_ops.sqrt(self._matmul(
+            standard_ops.square(inputs),
+            standard_ops.square(self.kernel_posterior.distribution.scale))))
+    self.kernel_posterior_affine_tensor = (
+        self.kernel_posterior_tensor_fn(self.kernel_posterior_affine))
+    self.kernel_posterior_tensor = None
+    return self.kernel_posterior_affine_tensor
+
+
+def dense_local_reparameterization(
+    inputs,
+    units,
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    name=None,
+    reuse=None):
+  """Densely-connected layer with local reparameterization estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    inputs: Tensor input.
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Returns:
+    output: `Tensor` representing a the affine transformed input under a random
+      draw from the surrogate posterior distribution.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.dense_local_reparameterization(
+      features, 512, activation=tf.nn.relu)
+  logits = tfp.layers.dense_local_reparameterization(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses local reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+  layer = DenseLocalReparameterization(
+      units,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+class DenseFlipout(_DenseVariational):
+  """Densely-connected layer class with Flipout estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Properties:
+    units: Python integer, dimensionality of the output space.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+    seed: Python integer, used to create random seeds.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.DenseFlipout(
+      512, activation=tf.nn.relu)(features)
+  logits = tfp.layers.DenseFlipout(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses the Flipout gradient estimator to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+
+  def __init__(
+      self,
+      units,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      seed=None,
+      name=None,
+      **kwargs):
+    super(DenseFlipout, self).__init__(
+        units=units,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name,
+        **kwargs)
+    self.seed = seed
+
+  def _apply_variational_kernel(self, inputs):
+    if (not isinstance(self.kernel_posterior, independent_lib.Independent) or
+        not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)):
+      raise TypeError(
+          "`DenseFlipout` requires "
+          "`kernel_posterior_fn` produce an instance of "
+          "`tf.distributions.Independent(tf.distributions.Normal)` "
+          "(saw: \"{}\").".format(type(self.kernel_posterior).__name__))
+    self.kernel_posterior_affine = normal_lib.Normal(
+        loc=array_ops.zeros_like(self.kernel_posterior.distribution.loc),
+        scale=self.kernel_posterior.distribution.scale)
+    self.kernel_posterior_affine_tensor = (
+        self.kernel_posterior_tensor_fn(self.kernel_posterior_affine))
+    self.kernel_posterior_tensor = None
+
+    input_shape = array_ops.shape(inputs)
+    batch_shape = input_shape[:-1]
+
+    sign_input = random_sign(input_shape, dtype=inputs.dtype, seed=self.seed)
+    sign_output = random_sign(
+        array_ops.concat([batch_shape,
+                          array_ops.expand_dims(self.units, 0)], 0),
+        dtype=inputs.dtype,
+        seed=distribution_util.gen_new_seed(
+            self.seed, salt="conv_variational"))
+    perturbed_inputs = self._matmul(
+        inputs * sign_input, self.kernel_posterior_affine_tensor) * sign_output
+
+    outputs = self._matmul(inputs, self.kernel_posterior.distribution.loc)
+    outputs += perturbed_inputs
+    return outputs
+
+
+def dense_flipout(
+    inputs,
+    units,
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    seed=None,
+    name=None,
+    reuse=None):
+  """Densely-connected layer with Flipout estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    inputs: Tensor input.
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Returns:
+    output: `Tensor` representing a the affine transformed input under a random
+      draw from the surrogate posterior distribution.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.dense_flipout(
+      features, 512, activation=tf.nn.relu)
+  logits = tfp.layers.dense_flipout(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses the Flipout gradient estimator to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+  layer = DenseFlipout(
+      units,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      seed=seed,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+def random_sign(shape, dtype=dtypes.float32, seed=None):
+  """Draw values from {-1, 1} uniformly, i.e., Rademacher distribution."""
+  random_bernoulli = random_ops.random_uniform(shape, minval=0, maxval=2,
+                                               dtype=dtypes.int32,
+                                               seed=seed)
+  return math_ops.cast(2 * random_bernoulli - 1, dtype)
-- 
GitLab


From d234325e2b82174e203cbdb8f19dfb86bbad7bec Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 16 Dec 2017 16:23:27 -0800
Subject: [PATCH 1141/1225] Create the contrib directory for the Eager to graph
 compiler.

PiperOrigin-RevId: 179314481
---
 tensorflow/contrib/py2tf/README.md   |  4 ++++
 tensorflow/contrib/py2tf/__init__.py | 29 ++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 tensorflow/contrib/py2tf/README.md
 create mode 100644 tensorflow/contrib/py2tf/__init__.py

diff --git a/tensorflow/contrib/py2tf/README.md b/tensorflow/contrib/py2tf/README.md
new file mode 100644
index 0000000000..cd50675ad5
--- /dev/null
+++ b/tensorflow/contrib/py2tf/README.md
@@ -0,0 +1,4 @@
+# Py2TF
+
+A compiler for generating TensorFlow numeric and control flow ops from Python
+code.
diff --git a/tensorflow/contrib/py2tf/__init__.py b/tensorflow/contrib/py2tf/__init__.py
new file mode 100644
index 0000000000..f7fd8725e0
--- /dev/null
+++ b/tensorflow/contrib/py2tf/__init__.py
@@ -0,0 +1,29 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Py2TF compiles Python code into equivalent TensorFlow code.
+
+Equivalent here means that they have the same effect when executed.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.util.all_util import remove_undocumented
+
+
+_allowed_symbols = []
+
+remove_undocumented(__name__, _allowed_symbols)
-- 
GitLab


From c11e07925a2c40ee220b9a3d76f82dc6ef17b87a Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Sat, 16 Dec 2017 18:57:31 -0800
Subject: [PATCH 1142/1225] Introduce the ContinuousEvalListener

PiperOrigin-RevId: 179319836
---
 tensorflow/python/estimator/training.py      | 238 +++++++++++++++----
 tensorflow/python/estimator/training_test.py | 151 +++++++++++-
 2 files changed, 335 insertions(+), 54 deletions(-)

diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py
index 58fccc3a29..569ea04f01 100644
--- a/tensorflow/python/estimator/training.py
+++ b/tensorflow/python/estimator/training.py
@@ -488,7 +488,11 @@ class _TrainingExecutor(object):
   training and evaluation based on the setting in `tf.estimator.RunConfig`.
   """
 
-  def __init__(self, estimator, train_spec, eval_spec):
+  def __init__(self,
+               estimator,
+               train_spec,
+               eval_spec,
+               continuous_eval_listener=None):
     if not isinstance(estimator, estimator_lib.Estimator):
       raise TypeError('`estimator` must have type `tf.estimator.Estimator`.')
     self._estimator = estimator
@@ -501,6 +505,13 @@ class _TrainingExecutor(object):
       raise TypeError('`eval_spec` must have type `tf.estimator.EvalSpec`.')
     self._eval_spec = eval_spec
 
+    if (continuous_eval_listener and
+        not isinstance(continuous_eval_listener, _ContinuousEvalListener)):
+      raise TypeError('`continuous_eval_listener` must have type '
+                      '`_ContinuousEvalListener`.')
+    self._continuous_eval_listener = (
+        continuous_eval_listener or _ContinuousEvalListener())
+
   @property
   def estimator(self):
     return self._estimator
@@ -615,13 +626,16 @@ class _TrainingExecutor(object):
       # _should_stop_local_train will then end the while True as the stopping
       # condition is satisfied (both checks use the same global_step value,
       # i.e., no race condition)
-      metrics = evaluator.evaluate_and_export()
+      eval_result = evaluator.evaluate_and_export()
 
-      if not metrics:
-        #  This is unexpected. Training should always end with a new checkpoint.
-        raise RuntimeError('There was no new checkpoint after the training.')
+      if eval_result.status != _EvalStatus.EVALUATED:
+        #  This is unexpected; should never happen.
+        #  Training should always end with a new checkpoint.
+        raise RuntimeError('There was no new checkpoint after the training. '
+                           'Eval status: {}'.format(eval_result.status))
 
-      if _should_stop_local_train(metrics[ops.GraphKeys.GLOBAL_STEP]):
+      if _should_stop_local_train(
+          eval_result.metrics[ops.GraphKeys.GLOBAL_STEP]):
         break
 
   def _start_std_server(self, config):
@@ -697,9 +711,11 @@ class _TrainingExecutor(object):
     evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec,
                                              self._train_spec.max_steps)
 
-    while True:
-      if latest_eval_result:
-        global_step = latest_eval_result.get(ops.GraphKeys.GLOBAL_STEP)
+    should_early_stop = False
+    while not should_early_stop:
+      if (latest_eval_result and
+          latest_eval_result.status == _EvalStatus.EVALUATED):
+        global_step = latest_eval_result.metrics.get(ops.GraphKeys.GLOBAL_STEP)
         if (global_step and self._train_spec.max_steps and
             global_step >= self._train_spec.max_steps):
           logging.info(
@@ -708,21 +724,46 @@ class _TrainingExecutor(object):
               self._train_spec.max_steps)
           return
 
-      # Final export signal: For any eval result with global_step >= train
-      # max_steps, the evaluator will send the final export signal. The next
-      # iteration of while loop will end the continuous eval as the stopping
-      # condition is satisfied (both checks use the same global_step value,
-      # i.e., no race condition)
-      start = time.time()
-      latest_eval_result = evaluator.evaluate_and_export()
+      latest_eval_result, should_early_stop = self._execute_evaluator_once(
+          evaluator, self._continuous_eval_listener,
+          self._eval_spec.throttle_secs)
+
+  def _execute_evaluator_once(self, evaluator, continuous_eval_listener,
+                              throttle_secs):
+    """Executes the `evaluator`."""
+    start = time.time()
+
+    eval_result = None
+    should_early_stop = False
 
-      # Throttle if necessary.
-      elapsed_time = time.time() - start
-      difference = self._eval_spec.throttle_secs  - elapsed_time
-      if difference > 0:
-        logging.info('Waiting %f secs before starting next eval run.',
-                     difference)
-        time.sleep(difference)
+    if not continuous_eval_listener.before_eval():
+      logging.info('Exiting evaluation, as requested by '
+                   '_ContinuousEvalListener.before_eval.')
+      should_early_stop = True
+      return (eval_result, should_early_stop)
+
+    # Final export signal: For any eval result with global_step >= train
+    # max_steps, the evaluator will send the final export signal. The next
+    # iteration of while loop will end the continuous eval as the stopping
+    # condition is satisfied (both checks use the same global_step value,
+    # i.e., no race condition)
+    eval_result = evaluator.evaluate_and_export()
+
+    if not self._continuous_eval_listener.after_eval(eval_result):
+      logging.info('Exiting evaluation, as requested by '
+                   '_ContinuousEvalListener.after_eval.')
+      should_early_stop = True
+      return (eval_result, should_early_stop)
+
+    # Throttle if necessary.
+    elapsed_time = time.time() - start
+    difference = throttle_secs  - elapsed_time
+    if difference > 0:
+      logging.info('Waiting %f secs before starting next eval run.',
+                   difference)
+      time.sleep(difference)
+
+    return (eval_result, should_early_stop)
 
   class _Evaluator(object):
     """A helper class to call `Estimator.evaluate` and export model."""
@@ -743,8 +784,7 @@ class _TrainingExecutor(object):
       """Evaluate and (maybe) export the current model.
 
       Returns:
-        Evaluation results. Returns `None` if current round of evaluation is
-        skipped.
+        An `EvalResult` instance.
 
       Raises:
         RuntimeError: for any unexpected internal error.
@@ -754,39 +794,32 @@ class _TrainingExecutor(object):
       if not latest_ckpt_path:
         self._log_err_msg('Estimator is not trained yet. Will start an '
                           'evaluation when a checkpoint is ready.')
-        return None
+        return _EvalResult(status=_EvalStatus.MISSING_CHECKPOINT)
 
       if latest_ckpt_path == self._previous_ckpt_path:
         self._log_err_msg(
             'No new checkpoint ready for evaluation. Skip the current '
             'evaluation pass as evaluation results are expected to be same '
             'for the same checkpoint.')
-        return None
-      eval_result = self._estimator.evaluate(
+        return _EvalResult(status=_EvalStatus.NO_NEW_CHECKPOINT)
+
+      metrics = self._estimator.evaluate(
           input_fn=self._eval_spec.input_fn,
           steps=self._eval_spec.steps,
           name=self._eval_spec.name,
           checkpoint_path=latest_ckpt_path,
           hooks=self._eval_spec.hooks)
 
-      if not eval_result:
-        raise RuntimeError(
-            'Internal error: `Estimator.evaluate` should never return empty '
-            'result.')
-      if not isinstance(eval_result, dict):
-        raise TypeError(
-            '`Estimator.evaluate` should return dict. Given {}.'.format(
-                type(eval_result)))
-      if ops.GraphKeys.GLOBAL_STEP not in eval_result:
-        raise RuntimeError(
-            'Internal error: `Estimator.evaluate` result should have '
-            '`global_step` in result. Given {}'.format(eval_result))
+      # _EvalResult validates the metrics.
+      eval_result = _EvalResult(
+          status=_EvalStatus.EVALUATED,
+          metrics=metrics,
+          checkpoint_path=latest_ckpt_path)
 
-      is_the_final_export = (eval_result[ops.GraphKeys.GLOBAL_STEP] >=
-                             self._max_training_steps
-                             if self._max_training_steps else False)
-      self._export_eval_result(eval_result, latest_ckpt_path,
-                               is_the_final_export)
+      is_the_final_export = (
+          eval_result.metrics[ops.GraphKeys.GLOBAL_STEP] >=
+          self._max_training_steps if self._max_training_steps else False)
+      self._export_eval_result(eval_result, is_the_final_export)
 
       if is_the_final_export:
         logging.debug('Calling exporter with the `is_the_final_export=True`.')
@@ -803,8 +836,7 @@ class _TrainingExecutor(object):
         logging.warning(message)
         self._last_warning_time = current_time
 
-    def _export_eval_result(self, eval_result, checkpoint_path,
-                            is_the_final_export):
+    def _export_eval_result(self, eval_result, is_the_final_export):
       """Export `eval_result` according to exporters in `EvalSpec`."""
       export_dir_base = os.path.join(
           compat.as_str_any(self._estimator.model_dir),
@@ -816,6 +848,114 @@ class _TrainingExecutor(object):
             export_path=os.path.join(
                 compat.as_str_any(export_dir_base),
                 compat.as_str_any(exporter.name)),
-            checkpoint_path=checkpoint_path,
-            eval_result=eval_result,
+            checkpoint_path=eval_result.checkpoint_path,
+            eval_result=eval_result.metrics,
             is_the_final_export=is_the_final_export)
+
+
+class _EvalStatus(object):
+  """The status of an evaluation event.
+
+  For local training and evaluation, the status can only be `EVALUATED` as
+  `Estimator.train` always generates a new checkpoint.
+
+  For distributed training and evaluation, a separated evaluator keeps looking
+  for new checkpoint. So, multiple situations might occur:
+
+  - EVALUATED: A new checkpoint is found since last evaluation.
+      `Estimator.evaluate` will be invoked.
+  - MISSING_CHECKPOINT: No checkpoint can be found. Typically, this means
+      the trainer has not yet produced any checkpoint.
+  - NO_NEW_CHECKPOINT: No new checkpoint can be found since last evaluation.
+      Typically, this means the trainer has not yet produced any new checkpoint.
+  """
+
+  EVALUATED = 'evaluated'
+  MISSING_CHECKPOINT = 'missing checkpoint'
+  NO_NEW_CHECKPOINT = 'no new checkpoint'
+
+
+class _EvalResult(
+    collections.namedtuple('EvalResult',
+                           ['status', 'metrics', 'checkpoint_path'])):
+  """_EvalResult holds the result of an evaluation event."""
+
+  def __new__(cls, status, metrics=None, checkpoint_path=None):
+    """Creates a validated `_EvalResult`.
+
+    Args:
+      status: See `_EvalStatus`.
+      metrics: The evaluation results returned by `Estimator.evaluate`. Only set
+          if status is `EVALUATED`.
+      checkpoint_path: The corresponding checkpoint path for the `metrics`. Only
+          set if status is `EVALUATED`.
+    Returns:
+      A validated `_EvalResult` object.
+
+    Raises:
+      ValueError: If validation fails.
+      TypeError: If any of the arguments is not the expected type.
+    """
+
+    if status != _EvalStatus.EVALUATED:
+      if metrics:
+        raise ValueError(
+            'metrics must be `None` if status is not {}; got status {},'
+            ' metrics {}'.format(_EvalStatus.EVALUATED, status, metrics))
+      if checkpoint_path:
+        raise ValueError(
+            'checkpoint must be `None` if status is not {}; got status {}, '
+            'checkpoint_path {}'.format(
+                _EvalStatus.EVALUATED, status, checkpoint_path))
+      return super(_EvalResult, cls).__new__(cls, status, metrics,
+                                             checkpoint_path)
+
+    # Now, evaluated case.
+    assert status == _EvalStatus.EVALUATED
+
+    # Validates metrics.
+    if not metrics:
+      raise ValueError(
+          'Internal error: `Estimator.evaluate` should never return empty '
+          'metrics.')
+    if not isinstance(metrics, dict):
+      raise TypeError(
+          '`Estimator.evaluate` should return dict. Given {}.'.format(
+              type(metrics)))
+    if ops.GraphKeys.GLOBAL_STEP not in metrics:
+      raise ValueError(
+          'Internal error: `Estimator.evaluate` result should have '
+          '`global_step` in result. Given {}'.format(metrics))
+
+    # Validates checkpoint_path.
+    if not checkpoint_path:
+      raise ValueError(
+          'Internal error: `checkpoint_path` should never be empty.')
+
+    return super(_EvalResult, cls).__new__(cls, status, metrics,
+                                           checkpoint_path)
+
+
+class _ContinuousEvalListener(object):
+  """Interface for listeners that take action before or after evaluation."""
+
+  def before_eval(self):
+    """Called before evaluation.
+
+    Returns:
+      `False` if you want to skip the current evaluation and early stop the
+      continuous evaluation; `True` otherwise.
+    """
+    return True
+
+  def after_eval(self, eval_result):
+    """Called after the evaluation is executed.
+
+    Args:
+      eval_result: An `_EvalResult` instance.
+
+    Returns:
+      False if you want to early stop continuous evaluation; `True` otherwise.
+    """
+    del eval_result
+    return True
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index 285671f99f..6390a67762 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -81,7 +81,7 @@ _INVALID_TASK_TYPE = '`estimator.config` must have task_type set.'
 _INVALID_TASK_TO_RUN = (
     'Task type .* is not supported. Supported task types are ((?!local).)*$')
 _INVALID_EMPTY_EVAL_RESULT_ERR = (
-    'Internal error: `Estimator.evaluate` should never return empty result')
+    'Internal error: `Estimator.evaluate` should never return empty metrics')
 _INVALID_EVAL_RESULT_TYPE_ERR = '`Estimator.evaluate` should return dict.'
 _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR = (
     'Internal error: `Estimator.evaluate` result should have `global_step`')
@@ -1082,6 +1082,86 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
     self.assertEqual(2, mock_est.times_export_was_called)
     self.assertEqual(1, mock_est.times_final_export_was_called)
 
+  def test_evaluate_listener_before_eval(self):
+    training_max_step = 200
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
+    # Without early stopping, this eval will be run twice.
+    mock_est.evaluate.side_effect = [{
+        _GLOBAL_STEP_KEY: training_max_step // 2
+    }, {
+        _GLOBAL_STEP_KEY: training_max_step
+    }]
+    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
+
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.call_count = 0
+
+      def before_eval(self):
+        self.call_count += 1
+        return  self.call_count == 1
+
+    listener = _Listener()
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
+
+    training._TrainingExecutor(mock_est, mock_train_spec, eval_spec,
+                               listener).run_evaluator()
+
+    # Before_eval returns False during the second time, so, evaluate will be
+    # called once.
+    self.assertEqual(1, mock_est.evaluate.call_count)
+    self.assertEqual(2, listener.call_count)
+
+  def test_evaluate_listener_after_eval(self):
+    training_max_step = 200
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
+    # Without early stopping, this eval will be run twice.
+    expected_eval_metrics = [{
+        _GLOBAL_STEP_KEY: training_max_step // 2
+    }, {
+        _GLOBAL_STEP_KEY: training_max_step
+    }]
+    mock_est.evaluate.side_effect = expected_eval_metrics
+    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
+
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.call_count = 0
+
+      def after_eval(self, eval_result):
+        self.call_count += 1
+        self.eval_result = eval_result
+        return False
+
+    listener = _Listener()
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
+
+    training._TrainingExecutor(mock_est, mock_train_spec, eval_spec,
+                               listener).run_evaluator()
+
+    # after_eval returns False during the first time, so, evaluate will be
+    # called once.
+    self.assertEqual(1, mock_est.evaluate.call_count)
+    self.assertEqual(1, listener.call_count)
+    self.assertAllEqual(expected_eval_metrics[0], listener.eval_result.metrics)
+    self.assertEqual('path_1', listener.eval_result.checkpoint_path)
+
   def test_final_export_is_true_in_the_end(self):
     training_max_step = 200
 
@@ -1154,6 +1234,67 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
     # successuful evaluation)
     self.assertEqual(2, mock_log.call_count)
 
+  def test_continuous_eval_listener_eval_result(self):
+    training_max_step = 200
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    expected_eval_metrics = [{
+        _GLOBAL_STEP_KEY: training_max_step // 2
+    }, {
+        _GLOBAL_STEP_KEY: training_max_step
+    }]
+    mock_est.evaluate.side_effect = expected_eval_metrics
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.eval_results = []
+
+      def after_eval(self, eval_result):
+        self.eval_results.append(eval_result)
+        return True
+
+    continuous_eval_listener = _Listener()
+
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    # First two items are invalid, next two items are same.
+    mock_est.latest_checkpoint.side_effect = [
+        None, '', 'same', 'same', 'path_2'
+    ]
+    expected_eval_results = [
+        training._EvalResult(training._EvalStatus.MISSING_CHECKPOINT),
+        training._EvalResult(training._EvalStatus.MISSING_CHECKPOINT),
+        training._EvalResult(
+            training._EvalStatus.EVALUATED,
+            metrics=expected_eval_metrics[0],
+            checkpoint_path='same'),
+        training._EvalResult(training._EvalStatus.NO_NEW_CHECKPOINT),
+        training._EvalResult(
+            training._EvalStatus.EVALUATED,
+            metrics=expected_eval_metrics[1],
+            checkpoint_path='path_2'),
+    ]
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec,
+                                          continuous_eval_listener)
+    executor.run_evaluator()
+
+    # Three checkpoint paths are invalid.
+    self.assertEqual(5, mock_est.latest_checkpoint.call_count)
+    self.assertEqual(2, mock_est.evaluate.call_count)
+
+    self.assertEqual(5, len(continuous_eval_listener.eval_results))
+    for i, result in enumerate(continuous_eval_listener.eval_results):
+      self.assertEqual(expected_eval_results[i].status, result.status)
+      self.assertAllEqual(expected_eval_results[i].metrics, result.metrics)
+      self.assertEqual(expected_eval_results[i].checkpoint_path,
+                       result.checkpoint_path)
+
   def test_sleep_start_delay_secs(self):
     training_max_step = 200
     start_delay_secs = 123
@@ -1230,7 +1371,7 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
     mock_est.evaluate.return_value = {}
 
     executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(RuntimeError, _INVALID_EMPTY_EVAL_RESULT_ERR):
+    with self.assertRaisesRegexp(ValueError, _INVALID_EMPTY_EVAL_RESULT_ERR):
       executor.run_evaluator()
 
   def test_errors_out_if_evaluate_returns_non_dict(self):
@@ -1252,7 +1393,7 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
     mock_est.evaluate.return_value = {'loss': 123}
 
     executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(RuntimeError,
+    with self.assertRaisesRegexp(ValueError,
                                  _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR):
       executor.run_evaluator()
 
@@ -1573,7 +1714,7 @@ class TrainingExecutorRunLocalTest(test.TestCase):
     mock_est.evaluate.return_value = {}
 
     executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(RuntimeError, _INVALID_EMPTY_EVAL_RESULT_ERR):
+    with self.assertRaisesRegexp(ValueError, _INVALID_EMPTY_EVAL_RESULT_ERR):
       executor.run_local()
 
   def test_errors_out_if_evaluate_returns_non_dict(self):
@@ -1593,7 +1734,7 @@ class TrainingExecutorRunLocalTest(test.TestCase):
     mock_est.evaluate.return_value = {'loss': 123}
 
     executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(RuntimeError,
+    with self.assertRaisesRegexp(ValueError,
                                  _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR):
       executor.run_local()
 
-- 
GitLab


From 26640bba1f697d592ae0a95013fe61cbe2c527ac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 17 Dec 2017 02:16:16 -0800
Subject: [PATCH 1143/1225] Update ops-related pbtxt files.

PiperOrigin-RevId: 179335105
---
 tensorflow/core/ops/ops.pbtxt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index c0ca309a7b..284455ee54 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -6439,12 +6439,12 @@ op {
   name: "DataFormatVecPermute"
   input_arg {
     name: "x"
-    description: "Vector of size 4 or Tensor of shape (2, 4) in source data format."
+    description: "Vector of size 4 or Tensor of shape (4, 2) in source data format."
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    description: "Vector of size 4 or Tensor of shape (2, 4) in destination data format."
+    description: "Vector of size 4 or Tensor of shape (4, 2) in destination data format."
     type_attr: "T"
   }
   attr {
@@ -6476,8 +6476,8 @@ op {
     }
     description: "destination data format."
   }
-  summary: "Returns the permuted vector/tensor in the destination data format given the one in"
-  description: "the source data format."
+  summary: "Returns the permuted vector/tensor in the destination data format given the"
+  description: "one in the source data format."
 }
 op {
   name: "DatasetToSingleElement"
-- 
GitLab


From 7d34e8ce7a01f41bf70c12bc7f8ce08ba2cd17cf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 17 Dec 2017 02:26:46 -0800
Subject: [PATCH 1144/1225] Go: Update generated wrapper functions for
 TensorFlow ops.

PiperOrigin-RevId: 179335476
---
 tensorflow/go/op/wrappers.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 7d4671dcb8..314663a814 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -13931,14 +13931,14 @@ func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr {
 	}
 }
 
-// Returns the permuted vector/tensor in the destination data format given the one in
+// Returns the permuted vector/tensor in the destination data format given the
 //
-// the source data format.
+// one in the source data format.
 //
 // Arguments:
-//	x: Vector of size 4 or Tensor of shape (2, 4) in source data format.
+//	x: Vector of size 4 or Tensor of shape (4, 2) in source data format.
 //
-// Returns Vector of size 4 or Tensor of shape (2, 4) in destination data format.
+// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format.
 func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) {
 	if scope.Err() != nil {
 		return
-- 
GitLab


From 897ee02ee5aca8803d7a1ab217d8aeffdebd1473 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Sun, 17 Dec 2017 07:54:51 -0800
Subject: [PATCH 1145/1225] [XLA] Shorten "custom_call_target" to "target" in
 XLA graph dumper.

Make CustomCall nodes a bit smaller by shortening
"custom_call_target=" to "target=".

PiperOrigin-RevId: 179347188
---
 tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 142e2066c8..943679784f 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1056,7 +1056,7 @@ string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) {
       case HloOpcode::kBatchNormGrad:
         return Printf("feature_index=%lld", instr->feature_index());
       case HloOpcode::kCustomCall:
-        return Printf("custom_call_target=%s", instr->custom_call_target());
+        return Printf("target=%s", instr->custom_call_target());
       case HloOpcode::kSlice:
         return std::all_of(instr->slice_strides().begin(),
                            instr->slice_strides().end(),
-- 
GitLab


From 6c55eb1c61d645b214cb9deb97d1659001dadf34 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Sun, 17 Dec 2017 08:37:31 -0800
Subject: [PATCH 1146/1225] [XLA] Fix bugs and improve error messages in tuple
 comparisons in literal_test_util.

 * Fix bug where EqualsTuple() only compared the first tuple element (!).
 * Compare all tuple elements, even when the first one fails.
 * Print out which tuple element we're looking at when we get a
   mismatch.

PiperOrigin-RevId: 179348614
---
 .../compiler/xla/tests/literal_test_util.cc   | 58 +++++++++++--------
 1 file changed, 34 insertions(+), 24 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc
index bf6631a431..fb425fe6f3 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util.cc
+++ b/tensorflow/compiler/xla/tests/literal_test_util.cc
@@ -57,7 +57,8 @@ namespace xla {
     }
     for (int i = 0; i < expected.tuple_shapes_size(); ++i) {
       ::testing::AssertionResult result =
-          EqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i));
+          EqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i))
+          << "mismatch in tuple index " << i;
       if (!result) {
         return result;
       }
@@ -345,20 +346,28 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual,
            << " actual shape = " << actual.shape().ShortDebugString();
   }
   AssertEqualShapes(expected.shape(), actual.shape());
+
+  ::testing::AssertionResult err = ::testing::AssertionSuccess();
   for (uint64 i = 0; i < expected.tuple_literals_size(); ++i) {
+    SCOPED_TRACE(tensorflow::strings::StrCat(
+        "Tuple index ", i, " in ", ShapeUtil::HumanString(expected.shape())));
     const auto& expected_element = expected.tuple_literals(i);
     const auto& actual_element = actual.tuple_literals(i);
-    if (ShapeUtil::IsTuple(expected_element.shape())) {
-      auto ret = EqualTuple(expected_element, actual_element);
-      if (!ret) {
-        return ret;
+
+    ::testing::AssertionResult res = [&] {
+      if (ShapeUtil::IsTuple(expected_element.shape())) {
+        return EqualTuple(expected_element, actual_element);
+      } else {
+        return Equal(expected_element, actual_element);
       }
-    } else {
-      return Equal(expected_element, actual_element);
+    }();
+
+    if (!res && err) {
+      err = res;
     }
   }
 
-  return ::testing::AssertionSuccess();
+  return err;
 }
 
 /* static */ void LiteralTestUtil::ExpectEqualTuple(const Literal& expected,
@@ -633,28 +642,29 @@ bool NearComparator::ExpectValuesNear<bfloat16>(bfloat16 expected,
            << " actual shape = " << actual.shape().ShortDebugString();
   }
   AssertEqualShapes(expected.shape(), actual.shape());
+
+  ::testing::AssertionResult err = ::testing::AssertionSuccess();
   for (uint64 i = 0; i < expected.tuple_literals_size(); ++i) {
+    SCOPED_TRACE(tensorflow::strings::StrCat(
+        "Tuple index ", i, " in ", ShapeUtil::HumanString(expected.shape())));
     const auto& expected_element = expected.tuple_literals(i);
     const auto& actual_element = actual.tuple_literals(i);
-    if (ShapeUtil::IsTuple(expected_element.shape())) {
-      auto ret = NearTuple(expected_element, actual_element, error);
-      if (!ret) {
-        return ret;
-      }
-    } else if (ShapeUtil::ElementIsFloating(expected_element.shape())) {
-      auto ret = Near(expected_element, actual_element, error);
-      if (!ret) {
-        return ret;
-      }
-    } else {
-      auto ret = Equal(expected_element, actual_element);
-      if (!ret) {
-        return ret;
+
+    ::testing::AssertionResult res = [&] {
+      if (ShapeUtil::IsTuple(expected_element.shape())) {
+        return NearTuple(expected_element, actual_element, error);
+      } else if (ShapeUtil::ElementIsFloating(expected_element.shape())) {
+        return Near(expected_element, actual_element, error);
+      } else {
+        return Equal(expected_element, actual_element);
       }
+    }();
+
+    if (err && !res) {
+      err = res;
     }
   }
-
-  return ::testing::AssertionSuccess();
+  return err;
 }
 
 /* static */ void LiteralTestUtil::ExpectNearTuple(const Literal& expected,
-- 
GitLab


From 01da208158687c575a9c459cb62e3c5f90968bd2 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Sun, 17 Dec 2017 09:43:37 -0800
Subject: [PATCH 1147/1225] [StreamExecutor] Allow null batch_mean/batch_var in
 calls to BatchNormalizationForward.

cudnn allows these pointers to be null, so we should too.  This lets us
avoid an unnecessary ThenMemZero, and lets callers avoid an unnecessary
allocation.

PiperOrigin-RevId: 179350594
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index daeb9a4b77..9856a0bcbb 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -2761,14 +2761,27 @@ bool CudnnSupport::DoBatchNormalizationForwardImpl(
   float zero = 0.0;
 
   if (is_training) {
-    stream->ThenMemZero(batch_mean, batch_mean->size());
-    stream->ThenMemZero(batch_var, batch_var->size());
+    CHECK_EQ(batch_mean->is_null(), batch_var->is_null())
+        << "batch_mean and batch_var must both be null or both be non-null";
+
+    void* batch_mean_opaque;
+    void* batch_var_opaque;
+    if (!batch_mean->is_null() && !batch_var->is_null()) {
+      stream->ThenMemZero(batch_mean, batch_mean->size());
+      stream->ThenMemZero(batch_var, batch_var->size());
+      batch_mean_opaque = batch_mean->opaque();
+      batch_var_opaque = batch_var->opaque();
+    } else {
+      batch_mean_opaque = nullptr;
+      batch_var_opaque = nullptr;
+    }
+
     status = wrap::cudnnBatchNormalizationForwardTraining(
         parent_, ToHandle(dnn_handle_), mode, &one, &zero,
         x_descriptor.handle(), x.opaque(), x_descriptor.handle(), y->opaque(),
         scale_offset_descriptor.handle(), scale.opaque(), offset.opaque(), 1.0,
-        batch_mean->opaque(), batch_var->opaque(), epsilon,
-        saved_mean->opaque(), saved_inv_var->opaque());
+        batch_mean_opaque, batch_var_opaque, epsilon, saved_mean->opaque(),
+        saved_inv_var->opaque());
 #if CUDNN_VERSION < 5000
     CHECK(inv_var_to_var);
     inv_var_to_var();
-- 
GitLab


From 483e439c7494dbe30a660b90a3bca1349a1bf8fd Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Sun, 17 Dec 2017 10:43:11 -0800
Subject: [PATCH 1148/1225] [StreamExecutor] Change "variance" to "inv_var" in
 BatchNormalizationBackward.

This parameter is not the variance of the data, but rather is
1/(sqrt(variance + epsilon).  Neglecting epsilon, this is the inverse
standard deviation.

"inv_stddev" might be a better name, but "inv_var" is certainly better
than plain "variance", and it matches nvidia's name for this parameter,
which I think may override the desire for a more precise name.

No functional change.

PiperOrigin-RevId: 179352839
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 12 ++++++------
 tensorflow/stream_executor/cuda/cuda_dnn.h  |  6 +++---
 tensorflow/stream_executor/dnn.h            |  9 +++++----
 tensorflow/stream_executor/stream.cc        |  8 ++++----
 tensorflow/stream_executor/stream.h         |  4 ++--
 5 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 9856a0bcbb..2cba59e44a 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -2810,28 +2810,28 @@ bool CudnnSupport::DoBatchNormalizationForwardImpl(
 bool CudnnSupport::DoBatchNormalizationBackward(
     Stream* stream, const DeviceMemory<float>& y_backprop,
     const DeviceMemory<float>& x, const DeviceMemory<float>& scale,
-    const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+    const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
     const dnn::BatchDescriptor& x_desc,
     const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
     DeviceMemory<float>* x_backprop, DeviceMemory<float>* scale_backprop,
     DeviceMemory<float>* offset_backprop) {
   return DoBatchNormalizationBackwardImpl(
       stream, CUDNN_DATA_FLOAT, CUDNN_DATA_FLOAT, y_backprop, x, scale, mean,
-      variance, x_desc, scale_offset_desc, epsilon, x_backprop, scale_backprop,
+      inv_var, x_desc, scale_offset_desc, epsilon, x_backprop, scale_backprop,
       offset_backprop);
 }
 
 bool CudnnSupport::DoBatchNormalizationBackward(
     Stream* stream, const DeviceMemory<Eigen::half>& y_backprop,
     const DeviceMemory<Eigen::half>& x, const DeviceMemory<float>& scale,
-    const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+    const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
     const dnn::BatchDescriptor& x_desc,
     const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
     DeviceMemory<Eigen::half>* x_backprop, DeviceMemory<float>* scale_backprop,
     DeviceMemory<float>* offset_backprop) {
   return DoBatchNormalizationBackwardImpl(
       stream, CUDNN_DATA_HALF, CUDNN_DATA_FLOAT, y_backprop, x, scale, mean,
-      variance, x_desc, scale_offset_desc, epsilon, x_backprop, scale_backprop,
+      inv_var, x_desc, scale_offset_desc, epsilon, x_backprop, scale_backprop,
       offset_backprop);
 }
 
@@ -2840,7 +2840,7 @@ bool CudnnSupport::DoBatchNormalizationBackwardImpl(
     Stream* stream, int cudnn_input_type, int cudnn_scale_type,
     const DeviceMemory<T>& y_backprop, const DeviceMemory<T>& x,
     const DeviceMemory<U>& scale, const DeviceMemory<U>& mean,
-    const DeviceMemory<U>& variance, const dnn::BatchDescriptor& x_desc,
+    const DeviceMemory<U>& inv_var, const dnn::BatchDescriptor& x_desc,
     const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
     DeviceMemory<T>* x_backprop, DeviceMemory<U>* scale_backprop,
     DeviceMemory<U>* offset_backprop) {
@@ -2867,7 +2867,7 @@ bool CudnnSupport::DoBatchNormalizationBackwardImpl(
       y_backprop.opaque(), x_descriptor.handle(), x_backprop->opaque(),
       scale_offset_descriptor.handle(), scale.opaque(),
       scale_backprop->opaque(), offset_backprop->opaque(), epsilon,
-      mean.opaque(), variance.opaque());
+      mean.opaque(), inv_var.opaque());
   if (status != CUDNN_STATUS_SUCCESS) {
     LOG(ERROR) << "failed to enqueue backward batch normalization on stream: "
                << ToString(status);
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index 14986286f1..ee28c0bf57 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -226,7 +226,7 @@ class CudnnSupport : public dnn::DnnSupport {
   bool DoBatchNormalizationBackward(
       Stream* stream, const DeviceMemory<float>& y_backprop,
       const DeviceMemory<float>& x, const DeviceMemory<float>& scale,
-      const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+      const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
       const dnn::BatchDescriptor& x_desc,
       const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
       DeviceMemory<float>* x_backprop, DeviceMemory<float>* scale_backprop,
@@ -235,7 +235,7 @@ class CudnnSupport : public dnn::DnnSupport {
   bool DoBatchNormalizationBackward(
       Stream* stream, const DeviceMemory<Eigen::half>& y_backprop,
       const DeviceMemory<Eigen::half>& x, const DeviceMemory<float>& scale,
-      const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+      const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
       const dnn::BatchDescriptor& x_desc,
       const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
       DeviceMemory<Eigen::half>* x_backprop,
@@ -637,7 +637,7 @@ class CudnnSupport : public dnn::DnnSupport {
       Stream* stream, int cudnn_input_type, int cudnn_scale_type,
       const DeviceMemory<T>& y_backprop, const DeviceMemory<T>& x,
       const DeviceMemory<U>& scale, const DeviceMemory<U>& mean,
-      const DeviceMemory<U>& variance, const dnn::BatchDescriptor& x_desc,
+      const DeviceMemory<U>& inv_var, const dnn::BatchDescriptor& x_desc,
       const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
       DeviceMemory<T>* x_backprop, DeviceMemory<U>* scale_backprop,
       DeviceMemory<U>* offset_backprop);
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 73b96de438..07314a0ff7 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -908,8 +908,8 @@ class DnnSupport {
   //    the running variance.
   //  reserve_space_1: saved mean, to be reused in the backward gradient
   //    computation.
-  //  reserve_space_2: saved variance, to be reused in the backward gradient
-  //    computation.
+  //  reserve_space_2: saved inv_var (1/sqrt(epsilon + variance), to be reused
+  //    in the backward gradient computation.
   //  is_training: Set to true for training, false for inference.
   //  var_to_inv_var: a function to convert the variance to inverted variance
   //    for cuDNN v4 forward inference.
@@ -957,6 +957,7 @@ class DnnSupport {
   //  y_backprop: gradient with regard to output y.
   //  x: input data.
   //  scale: scaling parameters.
+  //  inv_var: 1/sqrt(epsilon + variance) of x.
   //  x_desc: dimensions of the input data, which is the same as the dimensions
   //    of the output.
   //  scale_offset_desc: dimensions of scale and offset.
@@ -967,7 +968,7 @@ class DnnSupport {
   virtual bool DoBatchNormalizationBackward(
       Stream* stream, const DeviceMemory<float>& y_backprop,
       const DeviceMemory<float>& x, const DeviceMemory<float>& scale,
-      const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+      const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
       const dnn::BatchDescriptor& x_desc,
       const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
       DeviceMemory<float>* x_backprop, DeviceMemory<float>* scale_backprop,
@@ -981,7 +982,7 @@ class DnnSupport {
   virtual bool DoBatchNormalizationBackward(
       Stream* stream, const DeviceMemory<Eigen::half>& y_backprop,
       const DeviceMemory<Eigen::half>& x, const DeviceMemory<float>& scale,
-      const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+      const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
       const dnn::BatchDescriptor& x_desc,
       const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
       DeviceMemory<Eigen::half>* x_backprop,
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index e92ed14779..ba5001e273 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -342,7 +342,7 @@ Stream &Stream::ThenBatchNormalizationForward(
 Stream &Stream::ThenBatchNormalizationBackward(
     const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x,
     const DeviceMemory<float> &scale, const DeviceMemory<float> &mean,
-    const DeviceMemory<float> &variance, const dnn::BatchDescriptor &x_desc,
+    const DeviceMemory<float> &inv_var, const dnn::BatchDescriptor &x_desc,
     const dnn::BatchDescriptor &scale_offset_desc, const double epsilon,
     DeviceMemory<float> *x_backprop, DeviceMemory<float> *scale_backprop,
     DeviceMemory<float> *offset_backprop) {
@@ -352,7 +352,7 @@ Stream &Stream::ThenBatchNormalizationBackward(
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
       CheckError(dnn->DoBatchNormalizationBackward(
-          this, y_backprop, x, scale, mean, variance, x_desc, scale_offset_desc,
+          this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc,
           epsilon, x_backprop, scale_backprop, offset_backprop));
     } else {
       SetErrorAndLogNoDnnSupport();
@@ -392,7 +392,7 @@ Stream &Stream::ThenBatchNormalizationForward(
 Stream &Stream::ThenBatchNormalizationBackward(
     const DeviceMemory<Eigen::half> &y_backprop,
     const DeviceMemory<Eigen::half> &x, const DeviceMemory<float> &scale,
-    const DeviceMemory<float> &mean, const DeviceMemory<float> &variance,
+    const DeviceMemory<float> &mean, const DeviceMemory<float> &inv_var,
     const dnn::BatchDescriptor &x_desc,
     const dnn::BatchDescriptor &scale_offset_desc, const double epsilon,
     DeviceMemory<Eigen::half> *x_backprop, DeviceMemory<float> *scale_backprop,
@@ -403,7 +403,7 @@ Stream &Stream::ThenBatchNormalizationBackward(
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
       CheckError(dnn->DoBatchNormalizationBackward(
-          this, y_backprop, x, scale, mean, variance, x_desc, scale_offset_desc,
+          this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc,
           epsilon, x_backprop, scale_backprop, offset_backprop));
     } else {
       SetErrorAndLogNoDnnSupport();
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index 37828d9882..a2fb2ea237 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -234,7 +234,7 @@ class Stream {
   Stream &ThenBatchNormalizationBackward(
       const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x,
       const DeviceMemory<float> &scale, const DeviceMemory<float> &mean,
-      const DeviceMemory<float> &variance, const dnn::BatchDescriptor &x_desc,
+      const DeviceMemory<float> &inv_var, const dnn::BatchDescriptor &x_desc,
       const dnn::BatchDescriptor &scale_offset_desc, const double epsilon,
       DeviceMemory<float> *x_backprop, DeviceMemory<float> *scale_backprop,
       DeviceMemory<float> *offset_backprop);
@@ -255,7 +255,7 @@ class Stream {
   Stream &ThenBatchNormalizationBackward(
       const DeviceMemory<Eigen::half> &y_backprop,
       const DeviceMemory<Eigen::half> &x, const DeviceMemory<float> &scale,
-      const DeviceMemory<float> &mean, const DeviceMemory<float> &variance,
+      const DeviceMemory<float> &mean, const DeviceMemory<float> &inv_var,
       const dnn::BatchDescriptor &x_desc,
       const dnn::BatchDescriptor &scale_offset_desc, const double epsilon,
       DeviceMemory<Eigen::half> *x_backprop,
-- 
GitLab


From f83e9d1ca0e5a78e33ee7fc905742c4e471ce3a6 Mon Sep 17 00:00:00 2001
From: Ian Langmore <langmore@google.com>
Date: Sun, 17 Dec 2017 14:16:48 -0800
Subject: [PATCH 1149/1225] auto_correlation added to tf.contrib.distributions

PiperOrigin-RevId: 179359575
---
 tensorflow/contrib/distributions/BUILD        |   2 +
 .../python/kernel_tests/sample_stats_test.py  | 233 ++++++++++++++++++
 .../distributions/python/ops/sample_stats.py  | 178 +++++++++++++
 3 files changed, 413 insertions(+)

diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index b2c641f8ab..95848af699 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -60,6 +60,7 @@ py_library(
         "//tensorflow/python:nn",
         "//tensorflow/python:nn_ops",
         "//tensorflow/python:random_ops",
+        "//tensorflow/python:spectral_ops",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:tensor_util",
         "//tensorflow/python:util",
@@ -437,6 +438,7 @@ cuda_py_test(
         "//tensorflow/python:framework",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:spectral_ops_test_util",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:nn_ops",
         "//tensorflow/python:platform_test",
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/sample_stats_test.py b/tensorflow/contrib/distributions/python/kernel_tests/sample_stats_test.py
index 595d9f5df7..4186cf129d 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/sample_stats_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/sample_stats_test.py
@@ -23,11 +23,244 @@ import numpy as np
 from tensorflow.contrib.distributions.python.ops import sample_stats
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import spectral_ops_test_util
 from tensorflow.python.platform import test
 
 rng = np.random.RandomState(0)
 
 
+class _AutoCorrelationTest(object):
+
+  @property
+  def use_static_shape(self):
+    raise NotImplementedError("Subclass failed to implement `use_static_shape`")
+
+  @property
+  def dtype(self):
+    raise NotImplementedError("Subclass failed to implement `dtype`.")
+
+  def test_constant_sequence_axis_0_max_lags_none_center_false(self):
+    x_ = np.array([[0., 0., 0.],
+                   [1., 1., 1.]]).astype(self.dtype)
+    x_ph = array_ops.placeholder_with_default(
+        input=x_,
+        shape=x_.shape if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session() as sess:
+        # Setting normalize = True means we divide by zero.
+        auto_corr = sample_stats.auto_correlation(
+            x_ph, axis=1, center=False, normalize=False)
+        if self.use_static_shape:
+          self.assertEqual((2, 3), auto_corr.shape)
+        auto_corr_ = sess.run(auto_corr)
+        self.assertAllClose(
+            [[0., 0., 0.],
+             [1., 1., 1.]], auto_corr_)
+
+  def test_constant_sequence_axis_0_max_lags_none_center_true(self):
+    x_ = np.array([[0., 0., 0.],
+                   [1., 1., 1.]]).astype(self.dtype)
+    x_ph = array_ops.placeholder_with_default(
+        input=x_,
+        shape=x_.shape if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session() as sess:
+        # Setting normalize = True means we divide by zero.
+        auto_corr = sample_stats.auto_correlation(
+            x_ph, axis=1, normalize=False, center=True)
+        if self.use_static_shape:
+          self.assertEqual((2, 3), auto_corr.shape)
+        auto_corr_ = sess.run(auto_corr)
+        self.assertAllClose(
+            [[0., 0., 0.],
+             [0., 0., 0.]], auto_corr_)
+
+  def check_results_versus_brute_force(
+      self, x, axis, max_lags, center, normalize):
+    """Compute auto-correlation by brute force, then compare to tf result."""
+    # Brute for auto-corr -- avoiding fft and transpositions.
+    axis_len = x.shape[axis]
+    if max_lags is None:
+      max_lags = axis_len - 1
+    else:
+      max_lags = min(axis_len - 1, max_lags)
+    auto_corr_at_lag = []
+    if center:
+      x -= x.mean(axis=axis, keepdims=True)
+    for m in range(max_lags + 1):
+      auto_corr_at_lag.append((
+          np.take(x, indices=range(0, axis_len - m), axis=axis) *
+          np.conj(np.take(x, indices=range(m, axis_len), axis=axis))
+      ).mean(axis=axis, keepdims=True))
+    rxx = np.concatenate(auto_corr_at_lag, axis=axis)
+    if normalize:
+      rxx /= np.take(rxx, [0], axis=axis)
+
+    x_ph = array_ops.placeholder_with_default(
+        x, shape=x.shape if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session():
+        auto_corr = sample_stats.auto_correlation(
+            x_ph, axis=axis, max_lags=max_lags, center=center,
+            normalize=normalize)
+        if self.use_static_shape:
+          output_shape = list(x.shape)
+          output_shape[axis] = max_lags + 1
+          self.assertAllEqual(output_shape, auto_corr.shape)
+        self.assertAllClose(rxx, auto_corr.eval(), rtol=1e-5, atol=1e-5)
+
+  def test_axis_n1_center_false_max_lags_none(self):
+    x = rng.randn(2, 3, 4).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(2, 3, 4).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=-1, max_lags=None, center=False, normalize=False)
+
+  def test_axis_n2_center_false_max_lags_none(self):
+    x = rng.randn(3, 4, 5).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(3, 4, 5).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=-2, max_lags=None, center=False, normalize=False)
+
+  def test_axis_n1_center_false_max_lags_none_normalize_true(self):
+    x = rng.randn(2, 3, 4).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(2, 3, 4).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=-1, max_lags=None, center=False, normalize=True)
+
+  def test_axis_n2_center_false_max_lags_none_normalize_true(self):
+    x = rng.randn(3, 4, 5).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(3, 4, 5).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=-2, max_lags=None, center=False, normalize=True)
+
+  def test_axis_0_center_true_max_lags_none(self):
+    x = rng.randn(3, 4, 5).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(3, 4, 5).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=0, max_lags=None, center=True, normalize=False)
+
+  def test_axis_2_center_true_max_lags_1(self):
+    x = rng.randn(3, 4, 5).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(3, 4, 5).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=2, max_lags=1, center=True, normalize=False)
+
+  def test_axis_2_center_true_max_lags_100(self):
+    # There are less than 100 elements in axis 2, so expect we get back an array
+    # the same size as x, despite having asked for 100 lags.
+    x = rng.randn(3, 4, 5).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(3, 4, 5).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=2, max_lags=100, center=True, normalize=False)
+
+  def test_long_orthonormal_sequence_has_corr_length_0(self):
+    l = 10000
+    x = rng.randn(l).astype(self.dtype)
+    x_ph = array_ops.placeholder_with_default(
+        x, shape=(l,) if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session():
+        rxx = sample_stats.auto_correlation(
+            x_ph, max_lags=l // 2, center=True, normalize=False)
+        if self.use_static_shape:
+          self.assertAllEqual((l // 2 + 1,), rxx.shape)
+        rxx_ = rxx.eval()
+        # OSS CPU FFT has some accuracy issues is not the most accurate.
+        # So this tolerance is a bit bad.
+        self.assertAllClose(1., rxx_[0], rtol=0.05)
+        # The maximal error in the rest of the sequence is not great.
+        self.assertAllClose(np.zeros(l // 2), rxx_[1:], atol=0.1)
+        # The mean error in the rest is ok, actually 0.008 when I tested it.
+        self.assertLess(np.abs(rxx_[1:]).mean(), 0.02)
+
+  def test_step_function_sequence(self):
+    # x jumps to new random value every 10 steps.  So correlation length = 10.
+    x = (rng.randint(-10, 10, size=(1000, 1))
+         * np.ones((1, 10))).ravel().astype(self.dtype)
+    x_ph = array_ops.placeholder_with_default(
+        x, shape=(1000 * 10,) if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session():
+        rxx = sample_stats.auto_correlation(
+            x_ph, max_lags=1000 * 10 // 2, center=True, normalize=False)
+        if self.use_static_shape:
+          self.assertAllEqual((1000 * 10 // 2 + 1,), rxx.shape)
+        rxx_ = rxx.eval()
+        rxx_ /= rxx_[0]
+        # Expect positive correlation for the first 10 lags, then significantly
+        # smaller negative.
+        self.assertGreater(rxx_[:10].min(), 0)
+        self.assertGreater(rxx_[9], 5 * rxx_[10:20].mean())
+        # RXX should be decreasing for the first 10 lags.
+        diff = np.diff(rxx_)
+        self.assertLess(diff[:10].max(), 0)
+
+  def test_normalization(self):
+    l = 10000
+    x = 3 * rng.randn(l).astype(self.dtype)
+    x_ph = array_ops.placeholder_with_default(
+        x, shape=(l,) if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session():
+        rxx = sample_stats.auto_correlation(
+            x_ph, max_lags=l // 2, center=True, normalize=True)
+        if self.use_static_shape:
+          self.assertAllEqual((l // 2 + 1,), rxx.shape)
+        rxx_ = rxx.eval()
+        # Note that RXX[0] = 1, despite the fact that E[X^2] = 9, and this is
+        # due to normalize=True.
+        # OSS CPU FFT has some accuracy issues is not the most accurate.
+        # So this tolerance is a bit bad.
+        self.assertAllClose(1., rxx_[0], rtol=0.05)
+        # The maximal error in the rest of the sequence is not great.
+        self.assertAllClose(np.zeros(l // 2), rxx_[1:], atol=0.1)
+        # The mean error in the rest is ok, actually 0.008 when I tested it.
+        self.assertLess(np.abs(rxx_[1:]).mean(), 0.02)
+
+
+class AutoCorrelationTestStaticShapeFloat32(test.TestCase,
+                                            _AutoCorrelationTest):
+
+  @property
+  def dtype(self):
+    return np.float32
+
+  @property
+  def use_static_shape(self):
+    return True
+
+
+class AutoCorrelationTestStaticShapeComplex64(test.TestCase,
+                                              _AutoCorrelationTest):
+
+  @property
+  def dtype(self):
+    return np.complex64
+
+  @property
+  def use_static_shape(self):
+    return True
+
+
+class AutoCorrelationTestDynamicShapeFloat32(test.TestCase,
+                                             _AutoCorrelationTest):
+
+  @property
+  def dtype(self):
+    return np.float32
+
+  @property
+  def use_static_shape(self):
+    return False
+
+
 class PercentileTestWithLowerInterpolation(test.TestCase):
 
   _interpolation = "lower"
diff --git a/tensorflow/contrib/distributions/python/ops/sample_stats.py b/tensorflow/contrib/distributions/python/ops/sample_stats.py
index 2a4b92c729..dfc8133619 100644
--- a/tensorflow/contrib/distributions/python/ops/sample_stats.py
+++ b/tensorflow/contrib/distributions/python/ops/sample_stats.py
@@ -28,12 +28,190 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import spectral_ops
+from tensorflow.python.ops.distributions import util
 
 __all__ = [
+    "auto_correlation",
     "percentile",
 ]
 
 
+# TODO(langmore) Write separate versions of this for real/complex dtype, taking
+# advantage of optimized real-fft ops.
+def auto_correlation(
+    x,
+    axis=-1,
+    max_lags=None,
+    center=True,
+    normalize=True,
+    name="auto_correlation"):
+  """Auto correlation along one axis.
+
+  Given a `1-D` wide sense stationary (WSS) sequence `X`, the auto correlation
+  `RXX` may be defined as  (with `E` expectation and `Conj` complex conjugate)
+
+  ```
+  RXX[m] := E{ W[m] Conj(W[0]) } = E{ W[0] Conj(W[-m]) },
+  W[n]   := (X[n] - MU) / S,
+  MU     := E{ X[0] },
+  S**2   := E{ (X[0] - MU) Conj(X[0] - MU) }.
+  ```
+
+  This function takes the viewpoint that `x` is (along one axis) a finite
+  sub-sequence of a realization of (WSS) `X`, and then uses `x` to produce an
+  estimate of `RXX[m]` as follows:
+
+  After extending `x` from length `L` to `inf` by zero padding, the auto
+  correlation estimate `rxx[m]` is computed for `m = 0, 1, ..., max_lags` as
+
+  ```
+  rxx[m] := (L - m)**-1 sum_n w[n + m] Conj(w[n]),
+  w[n]   := (x[n] - mu) / s,
+  mu     := L**-1 sum_n x[n],
+  s**2   := L**-1 sum_n (x[n] - mu) Conj(x[n] - mu)
+  ```
+
+  The error in this estimate is proportional to `1 / sqrt(len(x) - m)`, so users
+  often set `max_lags` small enough so that the entire output is meaningful.
+
+  Note that since `mu` is an imperfect estimate of `E{ X[0] }`, and we divide by
+  `len(x) - m` rather than `len(x) - m - 1`, our estimate of auto correlation
+  contains a slight bias, which goes to zero as `len(x) - m --> infinity`.
+
+  Args:
+    x:  `float32` or `complex64` `Tensor`.
+    axis:  Python `int`. The axis number along which to compute correlation.
+      Other dimensions index different batch members.
+    max_lags:  Positive `int` tensor.  The maximum value of `m` to consider
+      (in equation above).  If `max_lags >= x.shape[axis]`, we effectively
+      re-set `max_lags` to `x.shape[axis] - 1`.
+    center:  Python `bool`.  If `False`, do not subtract the mean estimate `mu`
+      from `x[n]` when forming `w[n]`.
+    normalize:  Python `bool`.  If `False`, do not divide by the variance
+      estimate `s**2` when forming `w[n]`.
+    name:  `String` name to prepend to created ops.
+
+  Returns:
+    `rxx`: `Tensor` of same `dtype` as `x`.  `rxx.shape[i] = x.shape[i]` for
+      `i != axis`, and `rxx.shape[axis] = max_lags + 1`.
+
+  Raises:
+    TypeError:  If `x` is not a supported type.
+  """
+  # Implementation details:
+  # Extend length N / 2 1-D array x to length N by zero padding onto the end.
+  # Then, set
+  #   F[x]_k := sum_n x_n exp{-i 2 pi k n / N }.
+  # It is not hard to see that
+  #   F[x]_k Conj(F[x]_k) = F[R]_k, where
+  #   R_m := sum_n x_n Conj(x_{(n - m) mod N}).
+  # One can also check that R_m / (N / 2 - m) is an unbiased estimate of RXX[m].
+
+  # Since F[x] is the DFT of x, this leads us to a zero-padding and FFT/IFFT
+  # based version of estimating RXX.
+  # Note that this is a special case of the Wiener-Khinchin Theorem.
+  with ops.name_scope(name, values=[x]):
+    x = ops.convert_to_tensor(x, name="x")
+
+    # Rotate dimensions of x in order to put axis at the rightmost dim.
+    # FFT op requires this.
+    rank = util.prefer_static_rank(x)
+    if axis < 0:
+      axis = rank + axis
+    shift = rank - 1 - axis
+    # Suppose x.shape[axis] = T, so there are T "time" steps.
+    #   ==> x_rotated.shape = B + [T],
+    # where B is x_rotated's batch shape.
+    x_rotated = util.rotate_transpose(x, shift)
+
+    if center:
+      x_rotated -= math_ops.reduce_mean(x_rotated, axis=-1, keepdims=True)
+
+    # x_len = N / 2 from above explanation.  The length of x along axis.
+    # Get a value for x_len that works in all cases.
+    x_len = util.prefer_static_shape(x_rotated)[-1]
+
+    # TODO(langmore) Investigate whether this zero padding helps or hurts.  At
+    # the moment is is necessary so that all FFT implementations work.
+    # Zero pad to the next power of 2 greater than 2 * x_len, which equals
+    # 2**(ceil(Log_2(2 * x_len))).  Note: Log_2(X) = Log_e(X) / Log_e(2).
+    x_len_float64 = math_ops.cast(x_len, np.float64)
+    target_length = math_ops.pow(
+        np.float64(2.),
+        math_ops.ceil(math_ops.log(x_len_float64 * 2) / np.log(2.)))
+    pad_length = math_ops.cast(target_length - x_len_float64, np.int32)
+
+    # We should have:
+    # x_rotated_pad.shape = x_rotated.shape[:-1] + [T + pad_length]
+    #                     = B + [T + pad_length]
+    x_rotated_pad = util.pad(x_rotated, axis=-1, back=True, count=pad_length)
+
+    dtype = x.dtype
+    if not dtype.is_complex:
+      if not dtype.is_floating:
+        raise TypeError("Argument x must have either float or complex dtype"
+                        " found: {}".format(dtype))
+      x_rotated_pad = math_ops.complex(x_rotated_pad,
+                                       dtype.real_dtype.as_numpy_dtype(0.))
+
+    # Autocorrelation is IFFT of power-spectral density (up to some scaling).
+    fft_x_rotated_pad = spectral_ops.fft(x_rotated_pad)
+    spectral_density = fft_x_rotated_pad * math_ops.conj(fft_x_rotated_pad)
+    # shifted_product is R[m] from above detailed explanation.
+    # It is the inner product sum_n X[n] * Conj(X[n - m]).
+    shifted_product = spectral_ops.ifft(spectral_density)
+
+    # Cast back to real-valued if x was real to begin with.
+    shifted_product = math_ops.cast(shifted_product, dtype)
+
+    # Figure out if we can deduce the final static shape, and set max_lags.
+    # Use x_rotated as a reference, because it has the time dimension in the far
+    # right, and was created before we performed all sorts of crazy shape
+    # manipulations.
+    know_static_shape = True
+    if not x_rotated.shape.is_fully_defined():
+      know_static_shape = False
+    if max_lags is None:
+      max_lags = x_len - 1
+    else:
+      max_lags = ops.convert_to_tensor(max_lags, name="max_lags")
+      max_lags_ = tensor_util.constant_value(max_lags)
+      if max_lags_ is None or not know_static_shape:
+        know_static_shape = False
+        max_lags = math_ops.minimum(x_len - 1, max_lags)
+      else:
+        max_lags = min(x_len - 1, max_lags_)
+
+    # Chop off the padding.
+    # We allow users to provide a huge max_lags, but cut it off here.
+    # shifted_product_chopped.shape = x_rotated.shape[:-1] + [max_lags]
+    shifted_product_chopped = shifted_product[..., :max_lags + 1]
+
+    # If possible, set shape.
+    if know_static_shape:
+      chopped_shape = x_rotated.shape.as_list()
+      chopped_shape[-1] = min(x_len, max_lags + 1)
+      shifted_product_chopped.set_shape(chopped_shape)
+
+    # Recall R[m] is a sum of N / 2 - m nonzero terms x[n] Conj(x[n - m]).  The
+    # other terms were zeros arising only due to zero padding.
+    # `denominator = (N / 2 - m)` (defined below) is the proper term to
+    # divide by by to make this an unbiased estimate of the expectation
+    # E[X[n] Conj(X[n - m])].
+    x_len = math_ops.cast(x_len, dtype.real_dtype)
+    max_lags = math_ops.cast(max_lags, dtype.real_dtype)
+    denominator = x_len - math_ops.range(0., max_lags + 1.)
+    denominator = math_ops.cast(denominator, dtype)
+    shifted_product_rotated = shifted_product_chopped / denominator
+
+    if normalize:
+      shifted_product_rotated /= shifted_product_rotated[..., :1]
+
+    # Transpose dimensions back to those of x.
+    return util.rotate_transpose(shifted_product_rotated, -shift)
+
+
 # TODO(langmore) To make equivalent to numpy.percentile:
 #  Make work with a sequence of floats or single float for 'q'.
 #  Make work with "linear", "midpoint" interpolation. (linear should be default)
-- 
GitLab


From d2355fcee9f47cc2e8225f8ff54f7c12fa8045f0 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Sun, 17 Dec 2017 16:02:29 -0800
Subject: [PATCH 1150/1225] [XLA] Fix comments on HloInstruction::epsilon() and
 HloInstruction::feature_index().

These functions can be called for kBatchNorm{Training,Inference,Grad},
not just kBatchNormTraining.

No functional change.

PiperOrigin-RevId: 179363059
---
 tensorflow/compiler/xla/service/hlo_instruction.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 7139db1f81..2083c1b81d 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -765,13 +765,15 @@ class HloInstruction {
   // Returns feature_index field associated with the instruction. The index
   // represents the index of the feature dimension.
   //
-  // Precondition: opcode() == HloOpcode::kBatchNormTraining
+  // Precondition: opcode() is one of kBatchNormTraining, kBatchNormInference,
+  // or kBatchNormGrad.
   int64 feature_index() const { return feature_index_; }
 
   // Returns a epsilon value associated with the instruction. The is a small
   // number added to the variance to avoid divide-by-zero error.
   //
-  // Precondition: opcode() == HloOpcode::kBatchNormTraining
+  // Precondition: opcode() is one of kBatchNormTraining, kBatchNormInference,
+  // or kBatchNormGrad.
   float epsilon() const { return epsilon_; }
 
   // Returns the infeed configuration string. The infeed configuration includes
-- 
GitLab


From e7bee822fe6bfa7ca861b09ba10769339692cf86 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Sun, 17 Dec 2017 21:10:28 -0800
Subject: [PATCH 1151/1225] [XLA] Don't reimplement
 ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout inside of layout_assignment.

No functional change.

PiperOrigin-RevId: 179376105
---
 .../compiler/xla/service/layout_assignment.cc | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index af726271ae..b598c765fc 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -477,16 +477,10 @@ Status LayoutAssignment::AddMandatoryConstraints(
           /*mandatory=*/true));
     } else if (instruction->opcode() == HloOpcode::kCustomCall) {
       // Add constraints for kCustomCall instruction operands and instructions.
-      // For now we only support row major layouts for all inputs and outputs.
-      auto row_major_shape = [](const Shape& old_shape) {
-        Shape new_shape(old_shape);
-        std::vector<int64> dimension_order(new_shape.dimensions_size());
-        std::iota(dimension_order.rbegin(), dimension_order.rend(), 0);
-        *new_shape.mutable_layout() = LayoutUtil::MakeLayout(dimension_order);
-        return new_shape;
-      };
-
-      Shape result_shape(row_major_shape(instruction->shape()));
+      // For now we only support major-first layouts for all inputs and outputs.
+      Shape result_shape = ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
+          instruction->shape().element_type(),
+          AsInt64Slice(instruction->shape().dimensions()));
       TF_RETURN_IF_ERROR(
           constraints->SetInstructionLayout(result_shape, instruction));
       for (int64 i = 0; i < instruction->operand_count(); ++i) {
@@ -496,7 +490,10 @@ Status LayoutAssignment::AddMandatoryConstraints(
           continue;
         }
 
-        Shape row_major_operand_shape(row_major_shape(operand_shape));
+        Shape row_major_operand_shape =
+            ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
+                operand_shape.element_type(),
+                AsInt64Slice(operand_shape.dimensions()));
         TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
             row_major_operand_shape, instruction, i, /*mandatory=*/true));
       }
-- 
GitLab


From 04df827cfd9d0409d85c1f4359c97262f08a245b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 17 Dec 2017 22:38:49 -0800
Subject: [PATCH 1152/1225] Update readme.

PiperOrigin-RevId: 179380041
---
 tensorflow/compiler/xla/tools/parser/README.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md
index 9acdfd4b02..93bbc7d659 100644
--- a/tensorflow/compiler/xla/tools/parser/README.md
+++ b/tensorflow/compiler/xla/tools/parser/README.md
@@ -1,9 +1,5 @@
 # HloModule string syntax
 
-TODO: Support all subcomputations (for fusion, reduce, ...).
-
-TODO: Support all extra attributes, e.g. dimensions, strides.
-
 ```yacc
 hlo_module
   : 'HloModule' name computations
-- 
GitLab


From 119f5d477b82fe7e37bbcc28309b3b822a50789e Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Mon, 18 Dec 2017 04:12:29 -0800
Subject: [PATCH 1153/1225] Sync Premade and Custom estimator docs with example
 code.

PiperOrigin-RevId: 179404175
---
 .../docs_src/get_started/custom_estimators.md | 140 ++++++++++--------
 .../get_started/premade_estimators.md         | 107 +++++++------
 .../docs_src/get_started/saving_models.md     |  15 +-
 3 files changed, 147 insertions(+), 115 deletions(-)

diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/get_started/custom_estimators.md
index ae9e107e56..81ab68a803 100644
--- a/tensorflow/docs_src/get_started/custom_estimators.md
+++ b/tensorflow/docs_src/get_started/custom_estimators.md
@@ -4,13 +4,31 @@ This document introduces custom Estimators. In particular, this document
 demonstrates how to create a custom @{tf.estimator.Estimator$Estimator} that
 mimics the behavior of the pre-made Estimator
 @{tf.estimator.DNNClassifier$`DNNClassifier`} in solving the Iris problem. See
-the @{$get_started/estimator$Pre-Made Estimators chapter} for details.
+the @{$get_started/premade_estimators$Pre-Made Estimators chapter} for details
+on the Iris problem.
+
+To download and access the example code invoke the following two commands:
+
+```shell
+git clone https://github.com/tensorflow/models/
+cd models/samples/core/get_started
+```
+
+In this document we wil be looking at
+[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py).
+You can run it with the following command:
+
+```bsh
+python custom_estimator.py
+```
+
+If you are feeling impatient, feel free to compare and contrast
+[`custom_estimatr.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py)
+with
+[`premade_estimatr.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py).
+(which is in the same directory).
 
-If you are feeling impatient, feel free to compare and contrast the following
-full programs:
 
-* Iris implemented with the [pre-made DNNClassifier Estimator](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py).
-* Iris implemented with a [custom Estimator](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py).
 
 ## Pre-made vs. custom
 
@@ -64,14 +82,16 @@ and a logits output layer.
 
 ## Write an Input function
 
-In our custom Estimator implementation, we'll reuse the input function we used
-in the pre-made Estimator implementation. Namely:
+Our custom Estimator implementation uses the same input function as our
+@{$get_started/premade_estimators$pre-made Estimator implementation}, from
+[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py).
+Namely:
 
 ```python
 def train_input_fn(features, labels, batch_size):
     """An input function for training"""
     # Convert the inputs to a Dataset.
-    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
+    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
 
     # Shuffle, repeat, and batch the examples.
     dataset = dataset.shuffle(1000).repeat().batch(batch_size)
@@ -85,8 +105,8 @@ This input function builds an input pipeline that yields batches of
 
 ## Create feature columns
 
-<!-- TODO(markdaoust): link to feature_columns when it exists-->
-As detailed in @{$get_started/estimator$Premade Estimators}, you must define
+As detailed in the @{$get_started/estimator$Premade Estimators} and
+@{$get_started/feature_columns$Feature Columns} chapters, you must define
 your model's feature columns to specify how the model should use each feature.
 Whether working with pre-made Estimators or custom Estimators, you define
 feature columns in the same fashion.
@@ -119,20 +139,23 @@ the input function; that is, `features` and `labels` are the handles to the
 data your model will use. The `mode` argument indicates whether the caller is
 requesting training, predicting, or evaluation.
 
-The caller may pass `params` to an Estimator's constructor. The `params` passed
-to the constructor become the `params` passed to `model_fn`.
+The caller may pass `params` to an Estimator's constructor. Any `params` passed
+to the constructor are in turn passed on to the `model_fn`. In
+[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py)
+the following lines create the estimator and set the params to configure the
+model. This configuration step is similar to how we configured the @{tf.estimator.DNNClassifier} in
+@{$get_started/premade_estimators}.
 
 ```python
-    # Build 2 hidden layer DNN with 10, 10 units respectively.
-    classifier = tf.estimator.Estimator(
-        model_fn=my_model,
-        params={
-            'feature_columns': my_feature_columns,
-            # Two hidden layers of 10 nodes each.
-            'hidden_units': [10, 10],
-            # The model must choose between 3 classes.
-            'n_classes': 3,
-        })
+classifier = tf.estimator.Estimator(
+    model_fn=my_model,
+    params={
+        'feature_columns': my_feature_columns,
+        # Two hidden layers of 10 nodes each.
+        'hidden_units': [10, 10],
+        # The model must choose between 3 classes.
+        'n_classes': 3,
+    })
 ```
 
 To implement a typical model function, you must do the following:
@@ -163,7 +186,7 @@ feature columns into input for your model. For example:
 ```
 
 The preceding line applies the transformations defined by your feature columns,
-creating the input layer of our model.
+creating the model's input layer.
 
 <div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
 <img style="height:260px"
@@ -186,6 +209,7 @@ is connected to every node in the preceding layer.  Here's the relevant code:
     for units in params['hidden_units']:
         net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
 ```
+
 * The `units` parameter defines the number of output neurons in a given layer.
 * The `activation` parameter defines the [activation function](https://developers.google.com/machine-learning/glossary/#a) —
   [Relu](https://developers.google.com/machine-learning/glossary/#ReLU) in this
@@ -193,12 +217,11 @@ is connected to every node in the preceding layer.  Here's the relevant code:
 
 The variable `net` here signifies the current top layer of the network. During
 the first iteration, `net` signifies the input layer. On each loop iteration
-`tf.layers.dense` creates a new layer, which takes the previous layer as its
-input. So, the loop uses `net` to pass the previously created layer as input
-to the layer being created.
+`tf.layers.dense` creates a new layer, which takes the previous layer's output
+as its input, using the variable `net`.
 
 After creating two hidden layers, our network looks as follows. For
-simplicity, the figure only shows four hidden units in each layer.
+simplicity, the figure does not show all the units in each layer.
 
 <div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
 <img style="height:260px"
@@ -235,8 +258,8 @@ The final hidden layer feeds into the output layer.
 
 When defining an output layer, the `units` parameter specifies the number of
 outputs. So, by setting `units` to `params['n_classes']`, the model produces
-one output value per class. Each element of the output vector will contains the
-score, or "logit", calculated to the associated class of Iris: Setosa,
+one output value per class. Each element of the output vector will contain the
+score, or "logit", calculated for the associated class of Iris: Setosa,
 Versicolor, or Virginica, respectively.
 
 Later on, these logits will be transformed into probabilities by the
@@ -255,11 +278,12 @@ function looks like this:
 def my_model_fn(
    features, # This is batch_features from input_fn
    labels,   # This is batch_labels from input_fn
-   mode):    # An instance of tf.estimator.ModeKeys, see below
+   mode,     # An instance of tf.estimator.ModeKeys, see below
+   params):  # Additional configuration
 ```
 
 Focus on that third argument, mode. As the following table shows, when someone
-calls train, evaluate, or predict, the Estimator framework invokes your model
+calls `train`, `evaluate`, or `predict`, the Estimator framework invokes your model
 function with the mode parameter set as follows:
 
 | Estimator method                 |    Estimator Mode |
@@ -344,8 +368,8 @@ decreases.
 This function returns the average over the whole batch.
 
 ```python
-    # Compute loss.
-    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
+# Compute loss.
+loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 ```
 
 ### Evaluate
@@ -364,10 +388,10 @@ true values, that is, against the labels provided by the input function. The
 same shape. Here's the call to @{tf.metrics.accuracy}:
 
 ``` python
-    # Compute evaluation metrics.
-    accuracy = tf.metrics.accuracy(labels=labels,
-                                   predictions=predicted_classes,
-                                   name='acc_op')
+# Compute evaluation metrics.
+accuracy = tf.metrics.accuracy(labels=labels,
+                               predictions=predicted_classes,
+                               name='acc_op')
 ```
 
 The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for evaluation
@@ -382,16 +406,16 @@ same dictionary.  Then, we'll pass that dictionary in the `eval_metric_ops`
 argument of `tf.estimator.EstimatorSpec`. Here's the code:
 
 ```python
-    metrics = {'accuracy': accuracy}
-    tf.summary.scalar('accuracy', accuracy[1])
+metrics = {'accuracy': accuracy}
+tf.summary.scalar('accuracy', accuracy[1])
 
-    if mode == tf.estimator.ModeKeys.EVAL:
-        return tf.estimator.EstimatorSpec(
-            mode, loss=loss, eval_metric_ops=metrics)
+if mode == tf.estimator.ModeKeys.EVAL:
+    return tf.estimator.EstimatorSpec(
+        mode, loss=loss, eval_metric_ops=metrics)
 ```
 
-The @{tf.summary.scalar} will make accuracy available to TensorBoard (more on
-this later).
+The @{tf.summary.scalar} will make accuracy available to TensorBoard
+in both `TRAIN` and `EVAL` modes. (More on this later).
 
 ### Train
 
@@ -407,11 +431,10 @@ optimizers—feel free to experiment with them.
 Here is the code that builds the optimizer:
 
 ``` python
-  # Instantiate an optimizer.
-  optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
+optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
 ```
 
-Next, we train the model using the optimizer's
+Next, we build the training operation using the optimizer's
 @{tf.train.Optimizer.minimize$`minimize`} method on the loss we calculated
 earlier.
 
@@ -425,9 +448,7 @@ argument of `minimize`.
 Here's the code to train the model:
 
 ``` python
-  # Train the model by establishing an objective, which is to
-  # minimize loss using that optimizer.
-  train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
+train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
 ```
 
 The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for training
@@ -439,11 +460,7 @@ must have the following fields set:
 Here's our code to call `EstimatorSpec`:
 
 ```python
-    # Return training information.
-    return tf.estimator.EstimatorSpec(
-        mode=tf.estimator.ModeKeys.TRAIN,
-        loss=loss,
-        train_op=train_op)
+return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
 ```
 
 The model function is now complete.
@@ -469,14 +486,15 @@ arguments of `DNNClassifier`; that is, the `params` dictionary lets you
 configure your Estimator without modifying the code in the `model_fn`.
 
 The rest of the code to train, evaluate, and generate predictions using our
-Estimator is the same as for the pre-made `DNNClassifier`. For example, the
-following line will train the model:
+Estimator is the same as in the
+@{$get_started/premade_estimators$Premade Estimators} chapter. For
+example, the following line will train the model:
 
 ```python
-    # Train the Model.
-    classifier.train(
-        input_fn=lambda:train_input_fn(train_x, train_y, args.batch_size),
-        steps=args.train_steps)
+# Train the Model.
+classifier.train(
+    input_fn=lambda:iris_data.train_input_fn(train_x, train_y, args.batch_size),
+    steps=args.train_steps)
 ```
 
 ## TensorBoard
diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/get_started/premade_estimators.md
index d6fc1643f0..00b936905d 100644
--- a/tensorflow/docs_src/get_started/premade_estimators.md
+++ b/tensorflow/docs_src/get_started/premade_estimators.md
@@ -6,7 +6,7 @@ how to write the Iris classification problem in TensorFlow.
 
 Prior to reading this document, do the following:
 
-* [Install TensorFlow](install/index.md).
+* @{$install$Install TensorFlow}.
 * If you installed TensorFlow with virtualenv or Anaconda, activate your
   TensorFlow environment.
 * To keep the data import simple, our Iris example uses Pandas. You can
@@ -28,7 +28,11 @@ Take the following steps to get the sample code for this program:
 
        `cd models/samples/core/get_started/`
 
-The program described in this document is called `premade_estimator.py`.
+The program described in this document is
+[`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py).
+This program uses
+[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py)
+To fetch its training data.
 
 ### Running the program
 
@@ -38,15 +42,15 @@ You run TensorFlow programs as you would run any Python program. For example:
 python premade_estimator.py
 ```
 
-The program should output training logs and some predictions against a test
-set. For example, the first line in the following output shows that the model
-thinks there is a 99.6% chance that the first example in the test set is a
-Sentosa. Since the test set `expected "Setosa"`, this appears to be a good
-prediction.
+The program should output training logs followed by some predictions against
+the test set. For example, the first line in the following output shows that
+the model thinks there is a 99.6% chance that the first example in the test
+set is a Setosa. Since the test set `expected "Setosa"`, this appears to be
+a good prediction.
 
 ``` None
 ...
-Prediction is "Sentosa" (99.6%), expected "Setosa"
+Prediction is "Setosa" (99.6%), expected "Setosa"
 
 Prediction is "Versicolor" (99.8%), expected "Versicolor"
 
@@ -76,12 +80,12 @@ The TensorFlow Programming Environment
 
 We strongly recommend writing TensorFlow programs with the following APIs:
 
-* Estimators, which represent a complete model. The Estimator API provides
-  methods to train the model, to judge the model's accuracy, and to generate
-  predictions.
-* Datasets, which build a data input pipeline. The Dataset API has methods to
-  load and manipulate data, and feed it into your model. The Datasets API meshes
-  well with the Estimators API.
+* @{tf.estimator$Estimators}, which represent a complete model.
+  The Estimator API provides methods to train the model, to judge the model's
+  accuracy, and to generate predictions.
+* @{$get_started/datasets_quickstart$Datasets}, which build a data input
+  pipeline. The Dataset API has methods to load and manipulate data, and feed
+  it into your model. The Datasets API meshes well with the Estimators API.
 
 ## Classifying irises: an overview
 
@@ -130,7 +134,7 @@ The following table shows three examples in the data set:
 
 |sepal length | sepal width | petal length | petal width| species (label) |
 |------------:|------------:|-------------:|-----------:|:---------------:|
-|         5.1 |         3.3 |          1.7 |        0.5 |   0 (Sentosa)   |
+|         5.1 |         3.3 |          1.7 |        0.5 |   0 (Setosa)   |
 |         5.0 |         2.3 |          3.3 |        1.0 |   1 (versicolor)|
 |         6.4 |         2.8 |          5.6 |        2.2 |   2 (virginica) |
 
@@ -145,11 +149,10 @@ topology:
 The following figure illustrates the features, hidden layers, and predictions
 (not all of the nodes in the hidden layers are shown):
 
-
 <div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
 <img style="width:100%"
   alt="A diagram of the network architecture: Inputs, 2 hidden layers, and outputs"
-  src="../images/iris_model.png">
+  src="../images/custom_estimators/full_network.png">
 </div>
 <div style="text-align: center">
 The Model.
@@ -252,9 +255,11 @@ The Dataset API can handle a lot of common cases for you. For example,
 using the Dataset API, you can easily read in records from a large collection
 of files in parallel and join them into a single stream.
 
-To keep things simple in this example we are going to load the data with pandas, and build our input pipeline from this in-memory data.
+To keep things simple in this example we are going to load the data with pandas,
+and build our input pipeline from this in-memory data.
 
-Here is the input function used for training in this program:
+Here is the input function used for training in this program, which is available
+in [`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py):
 
 ``` python
 def train_input_fn(features, labels, batch_size):
@@ -272,14 +277,14 @@ def train_input_fn(features, labels, batch_size):
 ## Define the Feature Columns
 
 A [**Feature Column**](https://developers.google.com/machine-learning/glossary/#feature_columns)
-is an object describing how the model should use raw input features from the
+is an object describing how the model should use raw input data from the
 features dictionary. When you build an Estimator model, you pass it a list of
 feature columns that describes each of the features you want the model to use.
-
-These objects are created by functions in the @{tf.feature_column} module. `tf.feature_column` methods provide many different ways to represent data.
+The @{tf.feature_column} module provides many options for representing data
+to the model.
 
 For Iris, the 4 raw features are numeric values, so we'll build a list of
-feature columns, to tell the Estimator model to represent each of the four
+feature columns to tell the Estimator model to represent each of the four
 features as 32-bit floating-point values. Therefore, the code to create the
 Feature Column is simply:
 
@@ -291,7 +296,8 @@ for key in train_x.keys():
 ```
 
 Feature Columns can be far more sophisticated than those we're showing here.
-<!--TODO(markdaoust) add link to feature_columns doc when it exists.-->
+We detail feature columns @{$get_started/feature_columns$later on} in
+getting started.
 
 Now that we have the description of how we want the model to represent the raw
 features, we can build the estimator.
@@ -305,8 +311,7 @@ provides several pre-made classifier Estimators, including:
 * @{tf.estimator.DNNClassifier}—for deep models that perform multi-class
   classification.
 * @{tf.estimator.DNNLinearCombinedClassifier}—for wide-n-deep models.
-* @{tf.estimator.LinearClassifier}—for linear models that feed results into
-  binary classifiers.
+* @{tf.estimator.LinearClassifier}— for classifiers based on linear models.
 
 For the Iris problem, `tf.estimator.DNNClassifier` seems like the best choice.
 Here's how we instantiated this Estimator:
@@ -336,14 +341,15 @@ Train the model by calling the Estimator's `train` method as follows:
 ```python
 # Train the Model.
 classifier.train(
-    input_fn=lambda:train_input_fn(train_x, train_y, args.batch_size),
+    input_fn=lambda:iris_data.train_input_fn(train_x, train_y, args.batch_size),
     steps=args.train_steps)
 ```
 
-Here we wrap up our `input_fn` call in a [`lambda`](https://docs.python.org/3/tutorial/controlflow.html)
-to allow the Estimator to call it, at the correct time, with no arguments.
-The `steps` argument tells the method to stop training after a number of
-training steps.
+Here we wrap up our `input_fn` call in a
+[`lambda`](https://docs.python.org/3/tutorial/controlflow.html)
+to capture the arguments while providing an input function that takes no
+arguments, as expected by the Estimator. The `steps` argument tells the method
+to stop training after a number of training steps.
 
 ### Evaluate the trained model
 
@@ -354,14 +360,14 @@ model on the test data:
 ```python
 # Evaluate the model.
 eval_result = classifier.evaluate(
-    input_fn=lambda:eval_input_fn(test_x, test_y, args.batch_size))
+    input_fn=lambda:iris_data.eval_input_fn(test_x, test_y, args.batch_size))
 
 print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
 ```
 
-Note how unlike our call to the `train` method, we did not pass the `steps`
-argument to evaluate. Our `eval_input_fn` doesn't use the `repeat` method on
-the dataset, so evaluation just runs to the end of the data.
+Unlike our call to the `train` method, we did not pass the `steps`
+argument to evaluate. Our `eval_input_fn` only yields a single
+[epoch](https://developers.google.com/machine-learning/glossary/#epoch) of data.
 
 Running this code yields the following output (or something similar):
 
@@ -387,7 +393,8 @@ predict_x = {
 }
 
 predictions = classifier.predict(
-    input_fn=lambda:eval_input_fn(predict_x, batch_size=args.batch_size))
+    input_fn=lambda:iris_data.eval_input_fn(predict_x,
+                                            batch_size=args.batch_size))
 ```
 
 The `predict` method returns a Python iterable, yielding a dictionary of
@@ -401,29 +408,35 @@ for pred_dict, expec in zip(predictions, expected):
 
     class_id = pred_dict['class_ids'][0]
     probability = pred_dict['probabilities'][class_id]
-    print(template.format(SPECIES[class_id], 100 * probability, expec))
+
+    print(template.format(iris_data.SPECIES[class_id],
+                          100 * probability, expec))
 ```
 
 Running the preceding code yields the following output:
 
 ``` None
 ...
-Prediction is "Sentosa" (99.6%), expected "Setosa"
+Prediction is "Setosa" (99.6%), expected "Setosa"
 
 Prediction is "Versicolor" (99.8%), expected "Versicolor"
 
 Prediction is "Virginica" (97.9%), expected "Virginica"
 ```
 
-## Next
 
-Now that you've gotten started writing TensorFlow programs.
+## Summary
+
+Pre-made Estimators are an effective way to quickly create standard models.
+
+Now that you've gotten started writing TensorFlow programs, consider the
+following material:
 
-* For more on Datasets, see the
-  @{$programmers_guide/datasets$Programmer's guide} and
-  @{tf.data$reference documentation}.
-* For more on Estimators, see the
-  @{$programmers_guide/estimators$Programmer's guide} and
-  @{tf.estimator$reference documentation}.
-<!--TODO(markdaoust) add links to next get_started section when it exists.-->
+* @{$get_started/saving_models$Checkpoints} to learn how to save and restore
+  models.
+* @{$get_started/datasets_quickstart$Datasets} to learn more about importing
+  data into your
+  model.
+* @{$get_started/custom_estimators$Creating Custom Estimators} to learn how to
+  write your own Estimator, customized for a particular problem.
 
diff --git a/tensorflow/docs_src/get_started/saving_models.md b/tensorflow/docs_src/get_started/saving_models.md
index 056263c157..680e1c0d3f 100644
--- a/tensorflow/docs_src/get_started/saving_models.md
+++ b/tensorflow/docs_src/get_started/saving_models.md
@@ -15,9 +15,8 @@ This document focuses on checkpoints. For details on SavedModel, see the
 
 ## Sample code
 
-This document relies on the same Iris classification example detailed in
-<!-- TODO (barryr): fill in link when module settles down. --> 
-@{$premade_estimators$Getting Started with TensorFlow}.
+This document relies on the same
+[https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py](Iris classification example) detailed in @{$premade_estimators$Getting Started with TensorFlow}.
 To download and access the example, invoke the following two commands:
 
 ```shell
@@ -228,10 +227,12 @@ This separation will keep your checkpoints recoverable.
 
 ## Summary
 
-Checkpoints provide an easy automatic mechanism for storing and restoring
-models created by Estimators.  See the @{$saved_model$Saving and Restoring}
+Checkpoints provide an easy automatic mechanism for saving and restoring
+models created by Estimators.
+
+See the @{$saved_model$Saving and Restoring}
 chapter of the *TensorFlow Programmer's Guide* for details on:
 
-*   Saving and restoring models created by low-level TensorFlow APIs.
-*   Saving and restoring models in the SavedModel format, which is a
+*   Saving and restoring models using low-level TensorFlow APIs.
+*   Exporting and importing models in the SavedModel format, which is a
     language-neutral, recoverable, serialization format.
-- 
GitLab


From 13a8558846a1f0a821f3ee1f147fae833b00f088 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Dec 2017 07:42:47 -0800
Subject: [PATCH 1154/1225] This is a bug fix for multi-replica training. When
 there is any parameter servers and more than one replica, replicas except the
 chief replica would complain un-initialized stale_counter variable since it
 doesn't live in parameter server.

PiperOrigin-RevId: 179421368
---
 .../opt/python/training/drop_stale_gradient_optimizer.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py b/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py
index f20c172ee3..4a905b1b2a 100644
--- a/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py
@@ -78,10 +78,11 @@ class DropStaleGradientOptimizer(optimizer.Optimizer):
   def apply_gradients(self, grads_and_vars, global_step=None, name=None):
     gradients = []
     # Number of stale gradients.
-    stale_counter = variable_scope.get_variable(
-        "stale_counter", [],
-        initializer=init_ops.zeros_initializer(),
-        trainable=False)
+    with ops.colocate_with(global_step):
+      stale_counter = variable_scope.get_variable(
+          "stale_counter", [],
+          initializer=init_ops.zeros_initializer(),
+          trainable=False)
 
     def _AcceptGradientOp():
       with ops.control_dependencies(
-- 
GitLab


From 566df46de272827a20915f0dd470d062a458e78f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Dec 2017 09:03:42 -0800
Subject: [PATCH 1155/1225] Fix support for functions to grappler items.

PiperOrigin-RevId: 179429486
---
 tensorflow/core/grappler/BUILD                |  1 +
 .../core/grappler/grappler_item_builder.cc    |  7 +++--
 .../grappler/grappler_item_builder_test.cc    | 26 +++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index 99f1318072..2ca9b720ee 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -159,6 +159,7 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
         "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder",
     ],
 )
diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index ca3c1a6667..866f87688c 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -450,8 +450,11 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
   }
 
   // Instantiate all the missing attributes with their default values.
-  Status attr_status =
-      AddDefaultAttrsToGraphDef(&new_item->graph, *OpRegistry::Global(), 0);
+  Status attr_status = AddDefaultAttrsToGraphDef(
+      &new_item->graph,
+      FunctionLibraryDefinition(OpRegistry::Global(),
+                                new_item->graph.library()),
+      0);
   if (!attr_status.ok()) {
     LOG(ERROR) << "Failed to instantiate default attribute values: "
                << attr_status.error_message();
diff --git a/tensorflow/core/grappler/grappler_item_builder_test.cc b/tensorflow/core/grappler/grappler_item_builder_test.cc
index 4272179d3c..09d9aa4ef1 100644
--- a/tensorflow/core/grappler/grappler_item_builder_test.cc
+++ b/tensorflow/core/grappler/grappler_item_builder_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/cc/gradients/grad_testutil.h"
 #include "tensorflow/cc/ops/functional_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
@@ -253,6 +254,31 @@ TEST_F(GrapplerItemBuilderTest, AssetFilepathOverrideTest_FileNotAccessible) {
   ASSERT_TRUE(item == nullptr);
 }
 
+TEST_F(GrapplerItemBuilderTest, GraphWithFunctions) {
+  MetaGraphDef meta_graph;
+  // y = XTimesTwo(x)
+  constexpr char device[] = "/cpu:0";
+  *meta_graph.mutable_graph_def() = test::function::GDef(
+      {test::function::NDef("x", "Const", {}, {{"dtype", DT_FLOAT}}, device),
+       test::function::NDef("y", "XTimesTwo", {"x"}, {{"T", DT_FLOAT}},
+                            device)},
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+      });
+
+  CollectionDef train_op;
+  train_op.mutable_node_list()->add_value("y");
+  (*meta_graph.mutable_collection_def())["train_op"] = train_op;
+
+  ItemConfig cfg;
+  cfg.inline_functions = false;
+
+  std::unique_ptr<GrapplerItem> item =
+      GrapplerItemFromMetaGraphDef("0", meta_graph, cfg);
+  ASSERT_TRUE(item != nullptr);
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 4b056849f8f142ceb9e0472634bea41ac35850b2 Mon Sep 17 00:00:00 2001
From: Martin Wicke <wicke@google.com>
Date: Mon, 18 Dec 2017 09:04:02 -0800
Subject: [PATCH 1156/1225] Re-enable adaptive_shared_batch_scheduler_test
 after potential bugfix.

PiperOrigin-RevId: 179429524
---
 tensorflow/contrib/batching/BUILD | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index a111cfecb3..8b7df4a84c 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -82,7 +82,6 @@ cc_library(
 tf_cc_test(
     name = "adaptive_shared_batch_scheduler_test",
     srcs = ["adaptive_shared_batch_scheduler_test.cc"],
-    tags = ["manual"],  # b/69013768
     deps = [
         ":adaptive_shared_batch_scheduler",
         "//tensorflow/contrib/batching/test_util:fake_clock_env",
-- 
GitLab


From 03fd17915630933211a7784466dbcdfaa3ec7c83 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Dec 2017 09:03:42 -0800
Subject: [PATCH 1157/1225] Fix support for functions to grappler items.

PiperOrigin-RevId: 179429486
---
 tensorflow/contrib/batching/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index 8b7df4a84c..a111cfecb3 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -82,6 +82,7 @@ cc_library(
 tf_cc_test(
     name = "adaptive_shared_batch_scheduler_test",
     srcs = ["adaptive_shared_batch_scheduler_test.cc"],
+    tags = ["manual"],  # b/69013768
     deps = [
         ":adaptive_shared_batch_scheduler",
         "//tensorflow/contrib/batching/test_util:fake_clock_env",
-- 
GitLab


From d9d50b1346827e2ace6686989f6c50f78609285b Mon Sep 17 00:00:00 2001
From: Martin Wicke <wicke@google.com>
Date: Mon, 18 Dec 2017 09:04:02 -0800
Subject: [PATCH 1158/1225] Re-enable adaptive_shared_batch_scheduler_test
 after potential bugfix.

PiperOrigin-RevId: 179429524
---
 tensorflow/contrib/batching/BUILD | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index a111cfecb3..8b7df4a84c 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -82,7 +82,6 @@ cc_library(
 tf_cc_test(
     name = "adaptive_shared_batch_scheduler_test",
     srcs = ["adaptive_shared_batch_scheduler_test.cc"],
-    tags = ["manual"],  # b/69013768
     deps = [
         ":adaptive_shared_batch_scheduler",
         "//tensorflow/contrib/batching/test_util:fake_clock_env",
-- 
GitLab


From 511181cc1c4e70330ad46f4dbcabc511d1a9af4a Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 18 Dec 2017 09:11:46 -0800
Subject: [PATCH 1159/1225] Provide an option to disable forceinline in cmake
 build

PiperOrigin-RevId: 179430327
---
 tensorflow/contrib/cmake/CMakeLists.txt                   | 5 +++++
 tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat | 4 +++-
 tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat | 4 +++-
 tensorflow/workspace.bzl                                  | 8 ++++----
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 481caf6bb0..7392daf66b 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -33,6 +33,7 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF)
 option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON)
 option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions")
 option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON)
+option(tensorflow_DISABLE_EIGEN_FORCEINLINE "Disable forceinline, to speed up build on windows." OFF)
 
 # GPU, CUDA and cuDNN options
 option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
@@ -97,6 +98,10 @@ else()
 	set(CMAKE_POSITION_INDEPENDENT_CODE OFF)
 endif()
 
+if (tensorflow_DISABLE_EIGEN_FORCEINLINE)
+  add_definitions(-DEIGEN_STRONG_INLINE=inline)
+endif()
+
 add_definitions(-DEIGEN_AVOID_STL_ARRAY)
 if(WIN32)
   add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC)
diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
index 56bff07774..957729bb37 100644
--- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
+++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
@@ -30,11 +30,13 @@ IF DEFINED SWIG_EXE (ECHO SWIG_EXE is set to %SWIG_EXE%) ELSE (SET SWIG_EXE="C:\
 IF DEFINED PY_EXE (ECHO PY_EXE is set to %PY_EXE%) ELSE (SET PY_EXE="C:\Program Files\Anaconda3\python.exe")
 IF DEFINED PY_LIB (ECHO PY_LIB is set to %PY_LIB%) ELSE (SET PY_LIB="C:\Program Files\Anaconda3\libs\python35.lib")
 
+IF DEFINED DISABLE_FORCEINLINE (ECHO DISABLE_FORCEINLINE is set to %DISABLE_FORCEINLINE%) ELSE (SET DISABLE_FORCEINLINE="OFF")
+
 SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake
 SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe"
 
 :: Run cmake to create Visual Studio Project files.
-%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY%
+%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE%
 
 :: Run msbuild in the resulting VS project files to build a pip package.
 %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj
diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
index 832943ad6c..5a362de399 100644
--- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
+++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
@@ -31,11 +31,13 @@ IF DEFINED PY_EXE (ECHO PY_EXE is set to %PY_EXE%) ELSE (SET PY_EXE="C:\Program
 IF DEFINED PY_LIB (ECHO PY_LIB is set to %PY_LIB%) ELSE (SET PY_LIB="C:\Program Files\Anaconda3\libs\python35.lib")
 IF DEFINED CUDNN_HOME (ECHO CUDNN_HOME is set to %CUDNN_HOME%) ELSE (SET CUDNN_HOME="c:\tools\cuda")
 verbosity:quiet
+IF DEFINED DISABLE_FORCEINLINE (ECHO DISABLE_FORCEINLINE is set to %DISABLE_FORCEINLINE%) ELSE (SET DISABLE_FORCEINLINE="OFF")
+
 SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake
 SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe"
 
 :: Run cmake to create Visual Studio Project files.
-%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY%
+%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE%
 
 :: Run msbuild in the resulting VS project files to build a pip package.
 %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 046c2b2391..846b9bc645 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -95,11 +95,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "eigen_archive",
       urls = [
-          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
-          "https://bitbucket.org/eigen/eigen/get/b6e6d0cf6a77.tar.gz",
+          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/c2947c341c68.tar.gz",
+          "https://bitbucket.org/eigen/eigen/get/c2947c341c68.tar.gz",
       ],
-      sha256 = "0840c497f2749b5e90bda666aab96be6da90dc75b4e21ca9843cae69b7fed52a",
-      strip_prefix = "eigen-eigen-b6e6d0cf6a77",
+      sha256 = "f21f8ab8a8dbcb91cd0deeade19a043f47708d0da7a4000164cdf203b4a71e34",
+      strip_prefix = "eigen-eigen-c2947c341c68",
       build_file = str(Label("//third_party:eigen.BUILD")),
   )
 
-- 
GitLab


From 6548e417f8d26e81d10ee577f8575b1cebc443a8 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 18 Dec 2017 09:16:40 -0800
Subject: [PATCH 1160/1225] Prune unused stateless nodes from function bodies.

Previously, all nodes in a TensorFlow function would be executed
unconditionally, which led to surprising performance issues (such as
executing a expensive image summary op that was created but unused in
a preprocessing function). We can prune nodes that are not
reverse-reachable from the return values of a function if they are
stateless and are not reverse-reachable from a stateful node.

PiperOrigin-RevId: 179430810
---
 tensorflow/core/BUILD                         |  1 +
 tensorflow/core/common_runtime/function.cc    | 27 +++++++++
 .../core/common_runtime/function_test.cc      | 60 +++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index a280444121..de074cc33c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -3164,6 +3164,7 @@ tf_cc_test(
         "//tensorflow/core/kernels:cwise_op",
         "//tensorflow/core/kernels:function_ops",
         "//tensorflow/core/kernels:matmul_op",
+        "//tensorflow/core/kernels:random_ops",
         "//tensorflow/core/kernels:shape_ops",
         "//third_party/eigen3",
     ],
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index ee9988f0b7..b921cbcafc 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -535,12 +535,39 @@ void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr<Graph>* g) {
   optimizer.Optimize(lib, lib->env(), lib->device(), g, /*shape_map=*/nullptr);
 }
 
+namespace {
+// Removes all stateless nodes that do not contribute to a return
+// value from the function body.  Unlike `RemoveDeadNodes()`, which is
+// triggered by `OptimizerOptions.do_function_inlining`, this pass
+// ignores the SINK node, from which (by definition) all nodes are
+// reverse reachable.
+void PruneFunctionBody(Graph* g) {
+  VLOG(2) << "Pruning function body";
+  std::unordered_set<const Node*> nodes;
+  for (auto n : g->nodes()) {
+    // NOTE(mrry): "_Retval" nodes are stateful, and so will be added
+    // to the seed set of `nodes`.
+    // TODO(mrry): Investigate whether the `n->IsControlFlow()` test is
+    // still needed. It would be preferable to prune entire loops and/or
+    // conditionals if they are not used in the graph.
+    if (n->IsControlFlow() || n->op_def().is_stateful()) {
+      nodes.insert(n);
+    }
+  }
+  bool changed = PruneForReverseReachability(g, std::move(nodes));
+  if (changed) {
+    FixupSourceAndSinkEdges(g);
+  }
+}
+}  // namespace
+
 Status FunctionLibraryRuntimeImpl::CreateItem(Handle handle, Item** item) {
   const FunctionBody* fbody = GetFunctionBody(handle);
   CHECK_NOTNULL(fbody);
   std::unique_ptr<Graph> g(new Graph(lib_def_));
   CopyGraph(*fbody->graph, g.get());
 
+  PruneFunctionBody(g.get());
   optimizer_.Optimize(this, env(), device(), &g, /*shape_map=*/nullptr);
   TF_RETURN_IF_ERROR(EnsureMemoryTypes(DeviceType(device()->device_type()),
                                        device()->name(), g.get()));
diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index 52bfb9e0ed..7b553c2dcd 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/common_runtime/function_testlib.h"
 #include "tensorflow/core/common_runtime/rendezvous_mgr.h"
+#include "tensorflow/core/common_runtime/step_stats_collector.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/op.h"
@@ -566,6 +567,65 @@ TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctionsWithControlDeps) {
   }
 }
 
+TEST_F(FunctionLibraryRuntimeTest, PruneBody) {
+  auto T = DT_INT32;
+  FunctionDef stateful_func = FDH::Define(
+      // Name
+      "SquareAndAddOneWithStatefulNodes",
+      // Args
+      {"x: int32"},
+      // Return values
+      {"y: int32"},
+      // Attrs
+      {},
+      // Nodes
+      {// a = Square<T>(x)
+       {{"a"}, "Square", {"x"}, {{"T", T}}},
+       // 1
+       FDH::Const("o", 1),
+       // A bunch of extra arithmetic that y doesn't depend on
+       {{"x1"}, "Add", {"o", "o"}, {{"T", T}}},
+       {{"x2"}, "Mul", {"a", "x1"}, {{"T", T}}},
+       {{"x3"}, "Mul", {"x1", "x2"}, {{"T", T}}},
+       FDH::Const<int32>("shape", {1, 2}),
+       // A stateful node.
+       {{"keep_me"},
+        "RandomUniform",
+        {"shape"},
+        {{"T", T}, {"dtype", DT_FLOAT}}},
+       // y = Add<T>(a, o)
+       {{"y"}, "Add", {"a", "o"}, {{"T", T}}}});
+  Init({stateful_func});
+
+  auto x = test::AsTensor<int32>({1, 2, 3, 4});
+  Tensor y;
+
+  FunctionLibraryRuntime::Handle handle;
+  TF_CHECK_OK(
+      Instantiate(flr0_, "SquareAndAddOneWithStatefulNodes", {}, &handle));
+
+  StepStats stats;
+  StepStatsCollector stats_collector(&stats);
+  FunctionLibraryRuntime::Options opts;
+  opts.stats_collector = &stats_collector;
+  TF_CHECK_OK(Run(flr0_, handle, opts, {x}, {&y}));
+
+  TF_CHECK_OK(InstantiateAndRun(flr0_, "SquareAndAddOneWithStatefulNodes", {},
+                                {x}, {&y}));
+  test::ExpectTensorEqual<int>(y, test::AsTensor<int32>({2, 5, 10, 17}));
+
+  stats_collector.FinalizeAndSwap(&stats);
+
+  // Note that we do not expect the nodes named "x1", "x2", or "x3" to execute.
+  std::set<string> expected_node_names(
+      {"_SOURCE", "shape", "x", "o", "a", "keep_me", "y", "y_RetVal"});
+  std::set<string> executed_node_names;
+  for (const auto& node_stats : stats.dev_stats()[0].node_stats()) {
+    executed_node_names.insert(node_stats.node_name());
+  }
+  EXPECT_EQ(expected_node_names, executed_node_names);
+}
+
 TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) {
   Init({test::function::XTimesTwo(), test::function::XTimesFour(),
         test::function::XTimes16()});
-- 
GitLab


From a386aff3dd7383d576bfde3a83b00ca79698d398 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 18 Dec 2017 09:24:44 -0800
Subject: [PATCH 1161/1225] Implementing a fused version of shuffle and repeat.
 The logic is incorporated into the existing
 `tf.data.Dataset.shuffle_and_repeat` transformation.

Fusing of shuffle and repeat is expected to benefit the performance of pipelines with deep shuffle buffers and/or short epochs as it removes the per epoch initialization of the shuffle buffer.

PiperOrigin-RevId: 179431675
---
 .../kernel_tests/shuffle_dataset_op_test.py   |  22 +-
 .../contrib/data/python/ops/shuffle_ops.py    |  83 +++-
 .../api_def_ShuffleAndRepeatDataset.pbtxt     |  36 ++
 .../base_api/api_def_ShuffleDataset.pbtxt     |   4 +-
 .../core/kernels/data/repeat_dataset_op.cc    |  33 +-
 .../core/kernels/data/shuffle_dataset_op.cc   | 368 +++++++++++++-----
 tensorflow/core/ops/dataset_ops.cc            |  29 +-
 tensorflow/python/data/ops/dataset_ops.py     |  12 +-
 8 files changed, 434 insertions(+), 153 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ShuffleAndRepeatDataset.pbtxt

diff --git a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
index ba1be0690f..72745ec752 100644
--- a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
@@ -158,13 +158,6 @@ class ShuffleDatasetTest(test.TestCase):
     for i in range(5):
       self.assertEqual(10, counts[i])
 
-  def testSeedNoneSeed2NonNone(self):
-    with self.assertRaises(ValueError):
-      dataset_ops.ShuffleDataset(dataset_ops.Dataset.range(5),
-                                 buffer_size=1,
-                                 seed=None,
-                                 seed2=10)
-
 
 class ShuffleDatasetSerializationTest(test.TestCase):
 
@@ -486,8 +479,8 @@ class ShuffleDatasetSerializationTest(test.TestCase):
 class ShuffleAndRepeatTest(
     dataset_serialization_test_base.DatasetSerializationTestBase):
 
-  def _build_ds(self, seed, count=5):
-    return dataset_ops.Dataset.range(20).apply(
+  def _build_ds(self, seed, count=5, num_elements=20):
+    return dataset_ops.Dataset.range(num_elements).apply(
         shuffle_ops.shuffle_and_repeat(buffer_size=5, count=count, seed=seed))
 
   def testCorrectOutput(self):
@@ -534,13 +527,20 @@ class ShuffleAndRepeatTest(
     self.assertEqual(sorted(output1), sorted(output2))
 
   def testInfiniteOutputs(self):
-    # Asserting that the iterator is exhausted after producing 100 items should
-    # fail.
+    # Asserting the iterator is exhausted after producing 100 items should fail.
     with self.assertRaises(AssertionError):
       self.gen_outputs(lambda: self._build_ds(10, count=None), [], 100)
     with self.assertRaises(AssertionError):
       self.gen_outputs(lambda: self._build_ds(10, count=-1), [], 100)
 
+  def testInfiniteEmpty(self):
+    with self.assertRaises(errors.OutOfRangeError):
+      self.gen_outputs(lambda: self._build_ds(10, count=None, num_elements=0),
+                       [], 100)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.gen_outputs(lambda: self._build_ds(10, count=-1, num_elements=0), [],
+                       100)
+
 
 class ShuffleAndRepeatSerializationTest(
     dataset_serialization_test_base.DatasetSerializationTestBase):
diff --git a/tensorflow/contrib/data/python/ops/shuffle_ops.py b/tensorflow/contrib/data/python/ops/shuffle_ops.py
index 460732d65e..410989fad4 100644
--- a/tensorflow/contrib/data/python/ops/shuffle_ops.py
+++ b/tensorflow/contrib/data/python/ops/shuffle_ops.py
@@ -17,9 +17,72 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import random_ops
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.ops import gen_dataset_ops
+
+
+class _ShuffleAndRepeatDataset(dataset_ops.Dataset):
+  """A `Dataset` that fuses `shuffle` and `repeat`."""
+
+  def __init__(self,
+               input_dataset,
+               buffer_size,
+               count=None,
+               seed=None):
+    """See `Dataset.map()` for details."""
+    super(_ShuffleAndRepeatDataset, self).__init__()
+    self._input_dataset = input_dataset
+    self._buffer_size = ops.convert_to_tensor(
+        buffer_size, dtype=dtypes.int64, name="buffer_size")
+    if count is None:
+      self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count")
+    else:
+      self._count = ops.convert_to_tensor(
+          count, dtype=dtypes.int64, name="count")
+
+    seed, seed2 = random_seed.get_seed(seed)
+    if seed is None:
+      self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed")
+    else:
+      self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed")
+    if seed2 is None:
+      self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2")
+    else:
+      self._seed2 = ops.convert_to_tensor(
+          seed2, dtype=dtypes.int64, name="seed2")
+
+  def _as_variant_tensor(self):
+    # pylint: disable=protected-access
+    input_resource = self._input_dataset._as_variant_tensor()
+    return gen_dataset_ops.shuffle_and_repeat_dataset(
+        input_resource,
+        buffer_size=self._buffer_size,
+        count=self._count,
+        seed=self._seed,
+        seed2=self._seed2,
+        output_types=nest.flatten(
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+    # pylint: enable=protected-access
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
 
 
 def shuffle_and_repeat(buffer_size, count=None, seed=None):
@@ -50,20 +113,8 @@ def shuffle_and_repeat(buffer_size, count=None, seed=None):
     A `Dataset` transformation function, which can be passed to
     @{tf.contrib.data.Dataset.apply}.
   """
+
   def _apply_fn(dataset):  # pylint: disable=missing-docstring
-    random_ds = random_ops.RandomDataset(seed).apply(
-        batching.batch_and_drop_remainder(2))
-    if count is not None and count is not -1:
-      random_ds = random_ds.take(count)
-
-    def map_fn(seeds):
-      return dataset_ops.ShuffleDataset(
-          input_dataset=dataset,
-          buffer_size=buffer_size,
-          seed=seeds[0],
-          reshuffle_each_iteration=False,
-          seed2=seeds[1])
-
-    return random_ds.flat_map(map_fn)
+    return _ShuffleAndRepeatDataset(dataset, buffer_size, count, seed)
 
   return _apply_fn
diff --git a/tensorflow/core/api_def/base_api/api_def_ShuffleAndRepeatDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ShuffleAndRepeatDataset.pbtxt
new file mode 100644
index 0000000000..fb425b24a4
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ShuffleAndRepeatDataset.pbtxt
@@ -0,0 +1,36 @@
+op {
+  graph_op_name: "ShuffleAndRepeatDataset"
+  in_arg {
+    name: "buffer_size"
+    description: <<END
+The number of output elements to buffer in an iterator over
+this dataset. Compare with the `min_after_dequeue` attr when creating a
+`RandomShuffleQueue`.
+END
+  }
+  in_arg {
+    name: "seed"
+    description: <<END
+A scalar seed for the random number generator. If either `seed` or
+`seed2` is set to be non-zero, the random number generator is seeded
+by the given seed.  Otherwise, a random seed is used.
+END
+  }
+  in_arg {
+    name: "seed2"
+    description: <<END
+A second scalar seed to avoid seed collision.
+END
+  }
+  in_arg {
+    name: "count"
+    description: <<END
+A scalar representing the number of times the underlying dataset
+should be repeated. The default is `-1`, which results in infinite repetition.
+END
+  }
+  summary: "Creates a dataset that shuffles and repeats elements from `input_dataset`"
+  description: <<END
+pseudorandomly.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ShuffleDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ShuffleDataset.pbtxt
index b12d3af9d7..ea5c52c0ee 100644
--- a/tensorflow/core/api_def/base_api/api_def_ShuffleDataset.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ShuffleDataset.pbtxt
@@ -11,8 +11,8 @@ END
   in_arg {
     name: "seed"
     description: <<END
-A scalar seed for the random number generator. If either seed or
-seed2 is set to be non-zero, the random number generator is seeded
+A scalar seed for the random number generator. If either `seed` or
+`seed2` is set to be non-zero, the random number generator is seeded
 by the given seed.  Otherwise, a random seed is used.
 END
   }
diff --git a/tensorflow/core/kernels/data/repeat_dataset_op.cc b/tensorflow/core/kernels/data/repeat_dataset_op.cc
index f5c686dfc8..0e4f92a8fd 100644
--- a/tensorflow/core/kernels/data/repeat_dataset_op.cc
+++ b/tensorflow/core/kernels/data/repeat_dataset_op.cc
@@ -175,30 +175,25 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel {
                              bool* end_of_sequence) override {
         mutex_lock l(mu_);  // TODO(mrry): Make locking less conservative.
         do {
+          bool first_call = false;
           if (!input_impl_) {
+            first_call = true;
             input_impl_ = dataset()->input_->MakeIterator(prefix());
-            TF_RETURN_IF_ERROR(
-                input_impl_->GetNext(ctx, out_tensors, end_of_sequence));
-            // If the first call to GetNext() fails because the end of
-            // sequence has been reached, we return an OutOfRange
-            // error to terminate the iteration. (Otherwise, this
-            // iterator would loop infinitely and never produce a
-            // value.)
-            if (!*end_of_sequence) {
-              return Status::OK();
-            } else {
-              input_impl_.reset();
+          }
+          TF_RETURN_IF_ERROR(
+              input_impl_->GetNext(ctx, out_tensors, end_of_sequence));
+          if (!*end_of_sequence) {
+            return Status::OK();
+          } else {
+            input_impl_.reset();
+            if (first_call) {
+              // If the first call to GetNext() fails because the end of
+              // sequence has been reached, we return an OutOfRange error to
+              // terminate the iteration. (Otherwise, this iterator would loop
+              // infinitely and never produce a value.)
               return errors::OutOfRange(
                   "Attempted to repeat an empty dataset infinitely.");
             }
-          } else {
-            TF_RETURN_IF_ERROR(
-                input_impl_->GetNext(ctx, out_tensors, end_of_sequence));
-            if (!*end_of_sequence) {
-              return Status::OK();
-            } else {
-              input_impl_.reset();
-            }
           }
         } while (true);
       }
diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 4ac4ab0f5a..caef449b8e 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -12,6 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+
+#include <deque>
+#include <vector>
+
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/dataset.h"
@@ -28,50 +32,21 @@ const int64 kLogIntervalMicros = 10 * 1000000;  // 10 seconds.
 // See documentation in ../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
-class ShuffleDatasetOp : public UnaryDatasetOpKernel {
+class ShuffleDatasetOpBase : public UnaryDatasetOpKernel {
  public:
-  explicit ShuffleDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("reshuffle_each_iteration",
-                                     &reshuffle_each_iteration_));
-  }
-
-  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
-                   DatasetBase** output) override {
-    int64 buffer_size;
-    OP_REQUIRES_OK(
-        ctx, ParseScalarArgument<int64>(ctx, "buffer_size", &buffer_size));
-    OP_REQUIRES(
-        ctx, buffer_size > 0,
-        errors::InvalidArgument("buffer_size must be greater than zero."));
-
-    int64 seed;
-    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed", &seed));
-
-    int64 seed2;
-    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed2", &seed2));
+  explicit ShuffleDatasetOpBase(OpKernelConstruction* ctx)
+      : UnaryDatasetOpKernel(ctx) {}
 
-    // By TensorFlow convention, passing 0 for both seeds indicates
-    // that the shuffling should be seeded non-deterministically.
-    if (seed == 0 && seed2 == 0) {
-      seed = random::New64();
-      seed2 = random::New64();
-    }
-
-    if (reshuffle_each_iteration_) {
-      *output = new ReshufflingDataset(ctx, input, buffer_size, seed, seed2);
-    } else {
-      *output = new FixedSeedDataset(ctx, input, buffer_size, seed, seed2);
-    }
-  }
-
- private:
+ protected:
   // Abstract base dataset that implements a shuffling iterator.
   class ShuffleDatasetBase : public GraphDatasetBase {
    public:
     ShuffleDatasetBase(OpKernelContext* ctx, const DatasetBase* input,
-                       int64 buffer_size)
-        : GraphDatasetBase(ctx), input_(input), buffer_size_(buffer_size) {
+                       int64 buffer_size, int64 count)
+        : GraphDatasetBase(ctx),
+          input_(input),
+          buffer_size_(buffer_size),
+          count_(count) {
       input_->Ref();
     }
 
@@ -90,12 +65,15 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
      public:
       explicit Iterator(const Params& params, int64 seed, int64 seed2)
           : DatasetIterator<ShuffleDatasetBase>(params),
-            input_impl_(params.dataset->input_->MakeIterator(params.prefix)),
+            input_impl_(nullptr),
             seed_(seed),
             seed2_(seed2),
+            epoch_(0),
+            num_elements_(0),
             parent_generator_(seed, seed2),
             generator_(&parent_generator_) {
-        buffer_.reserve(params.dataset->buffer_size_);
+        buffer_.reset(new std::vector<Tensor>[params.dataset->buffer_size_]);
+        slices_.emplace_back(new Slice{0, 0});
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -104,19 +82,44 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
         mutex_lock l(mu_);
         int64 start_micros = ctx->env()->NowMicros();
         int64 num_log_entries = 0;
-        while (input_impl_ && buffer_.size() < dataset()->buffer_size_) {
+        bool first_call = false;
+        if (!input_impl_ && epoch_ == 0) {
+          first_call = true;
+          input_impl_ = dataset()->input_->MakeIterator(prefix());
+        }
+        while (input_impl_ && num_elements_ < dataset()->buffer_size_) {
           if (ctx->env()->NowMicros() >
               ((num_log_entries + 1) * kLogIntervalMicros) + start_micros) {
             num_log_entries++;
             LOG(INFO) << "Filling up shuffle buffer (this may take a while): "
-                      << buffer_.size() << " of " << dataset()->buffer_size_;
+                      << num_elements_ << " of " << dataset()->buffer_size_;
           }
           std::vector<Tensor> input_element;
-          bool end_of_input_sequence;
-          TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &input_element,
-                                                  &end_of_input_sequence));
+          bool end_of_input_sequence = false;
+          while (dataset()->count_ == -1 || epoch_ < dataset()->count_) {
+            TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &input_element,
+                                                    &end_of_input_sequence));
+            if (!end_of_input_sequence) {
+              break;
+            }
+            if (first_call && dataset()->count_ == -1) {
+              // If the first call to GetNext() fails because the end of
+              // sequence has been reached, we return an OutOfRange error to
+              // terminate the iteration. (Otherwise, this iterator may loop
+              // infinitely and never produce a value.)
+              return errors::OutOfRange(
+                  "Attempted to repeat an empty dataset infinitely.");
+            }
+            epoch_++;
+            int64 n = slices_.back()->end;
+            slices_.emplace_back(new Slice{n, n});
+            input_impl_ = dataset()->input_->MakeIterator(prefix());
+          }
           if (!end_of_input_sequence) {
-            buffer_.emplace_back(std::move(input_element));
+            buffer_[slices_.back()->end % dataset()->buffer_size_] =
+                std::move(input_element);
+            num_elements_++;
+            slices_.back()->end++;
           } else {
             input_impl_.reset();
           }
@@ -125,14 +128,25 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
           LOG(INFO) << "Shuffle buffer filled.";
         }
 
-        if (!buffer_.empty()) {
+        if (num_elements_ > 0) {
           *end_of_sequence = false;
-          // Choose an element to produce uniformly at random, and
-          // swap the last element into its place in the buffer.
-          int64 index = Random() % buffer_.size();
+          // Garbage collect all empty slices.
+          while (!slices_.empty() &&
+                 slices_.front()->start == slices_.front()->end) {
+            slices_.pop_front();
+          }
+          DCHECK(!slices_.empty());
+          // Choose an element to produce uniformly at random from the first
+          // slice, and then remove the element from the slice.
+          int64 offset =
+              Random() % (slices_.front()->end - slices_.front()->start);
+          int64 index =
+              (slices_.front()->start + offset) % dataset()->buffer_size_;
           *out_tensors = std::move(buffer_[index]);
-          std::swap(buffer_[index], buffer_.back());
-          buffer_.pop_back();
+          std::swap(buffer_[index],
+                    buffer_[slices_.front()->start % dataset()->buffer_size_]);
+          slices_.front()->start++;
+          num_elements_--;
         } else {
           DCHECK(input_impl_ == nullptr);
           *end_of_sequence = true;
@@ -144,20 +158,6 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
       Status SaveInternal(IteratorStateWriter* writer) override {
         mutex_lock l(mu_);
 
-        // Save the tensors in the buffer.
-        TF_RETURN_IF_ERROR(
-            writer->WriteScalar(full_name("buffer_size"), buffer_.size()));
-        for (size_t i = 0; i < buffer_.size(); i++) {
-          TF_RETURN_IF_ERROR(writer->WriteScalar(
-              full_name(strings::StrCat("buffer_", i, "_size")),
-              buffer_[i].size()));
-          for (size_t j = 0; j < buffer_[i].size(); j++) {
-            TF_RETURN_IF_ERROR(writer->WriteTensor(
-                full_name(strings::StrCat("buffer_", i, "_", j)),
-                buffer_[i][j]));
-          }
-        }
-
         // Save state needed to restore the random number generators.
         TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("num_random_samples"),
                                                num_random_samples_));
@@ -170,34 +170,38 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
         } else {
           TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
         }
+
+        // Save the epoch counter, buffer, and buffer slices.
+        TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("epoch"), epoch_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("num_elements"), num_elements_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("slices_size"), slices_.size()));
+        for (size_t i = 0; i < slices_.size(); ++i) {
+          TF_RETURN_IF_ERROR(writer->WriteScalar(
+              full_name(strings::StrCat("slices_start_", i)),
+              slices_[i]->start));
+          TF_RETURN_IF_ERROR(writer->WriteScalar(
+              full_name(strings::StrCat("slices_end_", i)), slices_[i]->end));
+          for (size_t j = slices_[i]->start; j < slices_[i]->end; ++j) {
+            size_t index = j % dataset()->buffer_size_;
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name(strings::StrCat("buffer_", index, "_size")),
+                buffer_[index].size()));
+            for (size_t k = 0; k < buffer_[index].size(); ++k) {
+              TF_RETURN_IF_ERROR(writer->WriteTensor(
+                  full_name(strings::StrCat("buffer_", index, "_", k)),
+                  buffer_[index][k]));
+            }
+          }
+        }
+
         return Status::OK();
       }
 
       Status RestoreInternal(OpKernelContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
-        buffer_.clear();
-
-        // Restore the buffer.
-        size_t buffer_size;
-        {
-          int64 temp;
-          TF_RETURN_IF_ERROR(
-              reader->ReadScalar(full_name("buffer_size"), &temp));
-          buffer_size = static_cast<size_t>(temp);
-        }
-        buffer_.reserve(buffer_size);
-        for (size_t i = 0; i < buffer_size; i++) {
-          int64 list_size;
-          TF_RETURN_IF_ERROR(reader->ReadScalar(
-              full_name(strings::StrCat("buffer_", i, "_size")), &list_size));
-          buffer_.emplace_back(std::vector<Tensor>(list_size));
-          for (int j = 0; j < list_size; j++) {
-            TF_RETURN_IF_ERROR(reader->ReadTensor(
-                full_name(strings::StrCat("buffer_", i, "_", j)),
-                &buffer_[i][j]));
-          }
-        }
 
         // Restore the random number generators.
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("num_random_samples"),
@@ -211,10 +215,58 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
         } else {
           input_impl_.reset();
         }
+
+        // Restore the epoch counter, buffer, and buffer slices.
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("epoch"), &epoch_));
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(full_name("num_elements"), &num_elements_));
+        size_t slices_size;
+        {
+          int64 temp;
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(full_name("slices_size"), &temp));
+          slices_size = static_cast<size_t>(temp);
+        }
+        buffer_.reset(new std::vector<Tensor>[dataset()->buffer_size_]);
+        for (size_t i = 0; i < slices_size; ++i) {
+          int64 start;
+          TF_RETURN_IF_ERROR(reader->ReadScalar(
+              full_name(strings::StrCat("slices_start_", i)), &start));
+          int64 end;
+          TF_RETURN_IF_ERROR(reader->ReadScalar(
+              full_name(strings::StrCat("slices_end_", i)), &end));
+          slices_.emplace_back(new Slice{start, end});
+          for (size_t j = start; j < end; ++j) {
+            size_t index = j % dataset()->buffer_size_;
+            int64 list_size;
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                full_name(strings::StrCat("buffer_", index, "_size")),
+                &list_size));
+            buffer_[index] = std::vector<Tensor>(list_size);
+            for (int k = 0; k < list_size; ++k) {
+              TF_RETURN_IF_ERROR(reader->ReadTensor(
+                  full_name(strings::StrCat("buffer_", index, "_", k)),
+                  &buffer_[index][k]));
+            }
+          }
+        }
+
         return Status::OK();
       }
 
      private:
+      // Used to represent slices of `buffer_` that belong to different epochs.
+      // The invariant maintained by the implementation is: `start` <= `end`.
+      // When using `start` and `end` to index into `buffer_`, their values
+      // should be taken modulo the size of `buffer_` as their absolute value
+      // can be greater than the range of `buffer_`.
+      struct Slice {
+        Slice(int64 start, int64 end) : start(start), end(end) {}
+
+        int64 start;
+        int64 end;
+      };
+
       random::SingleSampleAdapter<random::PhiloxRandom>::ResultType Random()
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         num_random_samples_++;
@@ -231,10 +283,13 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
       }
 
       mutex mu_;
-      std::vector<std::vector<Tensor>> buffer_ GUARDED_BY(mu_);
+      std::unique_ptr<std::vector<Tensor>[]> buffer_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       const int64 seed_ GUARDED_BY(mu_);
       const int64 seed2_ GUARDED_BY(mu_);
+      int64 epoch_ GUARDED_BY(mu_);
+      int64 num_elements_ GUARDED_BY(mu_);
+      std::deque<std::unique_ptr<Slice>> slices_ GUARDED_BY(mu_);
       random::PhiloxRandom parent_generator_ GUARDED_BY(mu_);
       random::SingleSampleAdapter<random::PhiloxRandom> generator_
           GUARDED_BY(mu_);
@@ -243,15 +298,58 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
 
     const DatasetBase* const input_;
     const int64 buffer_size_;
+    const int64 count_;
   };
+};
 
+class ShuffleDatasetOp : public ShuffleDatasetOpBase {
+ public:
+  explicit ShuffleDatasetOp(OpKernelConstruction* ctx)
+      : ShuffleDatasetOpBase(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("reshuffle_each_iteration",
+                                     &reshuffle_each_iteration_));
+  }
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    int64 buffer_size;
+    OP_REQUIRES_OK(
+        ctx, ParseScalarArgument<int64>(ctx, "buffer_size", &buffer_size));
+    OP_REQUIRES(
+        ctx, buffer_size > 0,
+        errors::InvalidArgument("buffer_size must be greater than zero."));
+
+    int64 seed;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed", &seed));
+
+    int64 seed2;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed2", &seed2));
+
+    // By TensorFlow convention, passing 0 for both seeds indicates
+    // that the shuffling should be seeded non-deterministically.
+    if (seed == 0 && seed2 == 0) {
+      seed = random::New64();
+      seed2 = random::New64();
+    }
+
+    int64 count = 1;
+    if (reshuffle_each_iteration_) {
+      *output =
+          new ReshufflingDataset(ctx, input, buffer_size, seed, seed2, count);
+    } else {
+      *output =
+          new FixedSeedDataset(ctx, input, buffer_size, seed, seed2, count);
+    }
+  }
+
+ private:
   // A dataset that uses a pseduorandom sequence of seeds for the iterators
   // created from it. Used when `reshuffle_each_iteration` is true.
   class ReshufflingDataset : public ShuffleDatasetBase {
    public:
     ReshufflingDataset(OpKernelContext* ctx, const DatasetBase* input,
-                       int64 buffer_size, int64 seed, int64 seed2)
-        : ShuffleDatasetBase(ctx, input, buffer_size),
+                       int64 buffer_size, int64 seed, int64 seed2, int64 count)
+        : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
           seed2_(seed2),
           parent_generator_(seed, seed2),
@@ -290,8 +388,8 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
   class FixedSeedDataset : public ShuffleDatasetBase {
    public:
     FixedSeedDataset(OpKernelContext* ctx, const DatasetBase* input,
-                     int64 buffer_size, int64 seed, int64 seed2)
-        : ShuffleDatasetBase(ctx, input, buffer_size),
+                     int64 buffer_size, int64 seed, int64 seed2, int64 count)
+        : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
           seed2_(seed) {}
 
@@ -336,9 +434,93 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
   bool reshuffle_each_iteration_;
 };
 
+class ShuffleAndRepeatDatasetOp : public ShuffleDatasetOpBase {
+ public:
+  explicit ShuffleAndRepeatDatasetOp(OpKernelConstruction* ctx)
+      : ShuffleDatasetOpBase(ctx) {}
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    int64 buffer_size;
+    OP_REQUIRES_OK(
+        ctx, ParseScalarArgument<int64>(ctx, "buffer_size", &buffer_size));
+    OP_REQUIRES(
+        ctx, buffer_size > 0,
+        errors::InvalidArgument("buffer_size must be greater than zero."));
+
+    int64 seed;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed", &seed));
+
+    int64 seed2;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed2", &seed2));
+
+    int64 count;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "count", &count));
+
+    // By TensorFlow convention, if both seeds are 0, then shuffling should be
+    // seeded non-deterministically.
+    if (seed == 0 && seed2 == 0) {
+      seed = random::New64();
+      seed2 = random::New64();
+    }
+
+    *output = new Dataset(ctx, input, buffer_size, seed, seed2, count);
+  }
+
+ private:
+  class Dataset : public ShuffleDatasetBase {
+   public:
+    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 buffer_size,
+            int64 seed, int64 seed2, int64 count)
+        : ShuffleDatasetBase(ctx, input, buffer_size, count),
+          seed_(seed),
+          seed2_(seed2) {}
+
+    string DebugString() override {
+      return strings::StrCat("ShuffleAndRepeatDatasetOp(", buffer_size_, ", ",
+                             seed_, ", ", seed2_, ", ", count_, ")::Dataset");
+    }
+
+    std::unique_ptr<IteratorBase> MakeIterator(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(new ShuffleDatasetBase::Iterator(
+          {this, strings::StrCat(prefix, "::ShuffleAndRepeat")}, seed_,
+          seed2_));
+    }
+
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+      Node* buffer_size = nullptr;
+      Node* seed = nullptr;
+      Node* seed2 = nullptr;
+      Node* count = nullptr;
+
+      TF_RETURN_IF_ERROR(b->AddScalar(buffer_size_, &buffer_size));
+      TF_RETURN_IF_ERROR(b->AddScalar(seed_, &seed));
+      TF_RETURN_IF_ERROR(b->AddScalar(seed2_, &seed2));
+      TF_RETURN_IF_ERROR(b->AddScalar(count_, &count));
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this, {input_graph_node, buffer_size, seed, seed2, count},  // Inputs
+          {},                                                         // Attrs
+          output));
+      return Status::OK();
+    }
+
+   private:
+    const int64 seed_;
+    const int64 seed2_;
+  };
+};
+
 REGISTER_KERNEL_BUILDER(Name("ShuffleDataset").Device(DEVICE_CPU),
                         ShuffleDatasetOp);
 
+REGISTER_KERNEL_BUILDER(Name("ShuffleAndRepeatDataset").Device(DEVICE_CPU),
+                        ShuffleAndRepeatDatasetOp);
+
 }  // namespace
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index be41531347..2072e0df57 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -508,8 +508,33 @@ reshuffle_each_iteration: If true, each iterator over this dataset will be given
   `seed` and `seed2` inputs. If false, each iterator will be given the same
   seed, and repeated iteration over this dataset will yield the exact same
   sequence of results.
-seed: A scalar seed for the random number generator. If either seed or
-  seed2 is set to be non-zero, the random number generator is seeded
+seed: A scalar seed for the random number generator. If either `seed` or
+  `seed2` is set to be non-zero, the random number generator is seeded
+  by the given seed.  Otherwise, a random seed is used.
+seed2: A second scalar seed to avoid seed collision.
+)doc");
+
+REGISTER_OP("ShuffleAndRepeatDataset")
+    .Input("input_dataset: variant")
+    .Input("buffer_size: int64")
+    .Input("seed: int64")
+    .Input("seed2: int64")
+    .Input("count: int64")
+    .Output("handle: variant")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that shuffles and repeats elements from `input_dataset`
+pseudorandomly.
+
+buffer_size: The number of output elements to buffer in an iterator over
+  this dataset. Compare with the `min_after_dequeue` attr when creating a
+  `RandomShuffleQueue`.
+count: A scalar representing the number of times the underlying dataset
+  should be repeated. The default is `-1`, which results in infinite repetition.
+seed: A scalar seed for the random number generator. If either `seed` or
+  `seed2` is set to be non-zero, the random number generator is seeded
   by the given seed.  Otherwise, a random seed is used.
 seed2: A second scalar seed to avoid seed collision.
 )doc");
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 76398beaa8..eba9637bdc 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1246,8 +1246,7 @@ class ShuffleDataset(Dataset):
                input_dataset,
                buffer_size,
                seed=None,
-               reshuffle_each_iteration=None,
-               seed2=None):
+               reshuffle_each_iteration=None):
     """Randomly shuffles the elements of this dataset.
 
     Args:
@@ -1261,10 +1260,6 @@ class ShuffleDataset(Dataset):
       reshuffle_each_iteration: (Optional.) A boolean, which if true indicates
         that the dataset should be pseudorandomly reshuffled each time it is
         iterated over. (Defaults to `True`.)
-      seed2: (Optional.) A `tf.int64` scalar `tf.Tensor` used to avoid seed
-        collision. Users should generally not need to specify this. This is
-        supposed to be used when both the seeds for the Dataset op need to be
-        manually specified. If not None, seed must also be non-None.
 
     Returns:
       A `Dataset`.
@@ -1276,10 +1271,7 @@ class ShuffleDataset(Dataset):
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
-    if seed2 is None:
-      seed, seed2 = random_seed.get_seed(seed)
-    elif seed is None:
-      raise ValueError("seed must be non-None if seed2 is non-None.")
+    seed, seed2 = random_seed.get_seed(seed)
     if seed is None:
       self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed")
     else:
-- 
GitLab


From 36a24ce9950e8cbc9fad6c05991d6edab34b7c9d Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 18 Dec 2017 10:26:35 -0800
Subject: [PATCH 1162/1225] [XLA] Rename BatchNormRewriter ->
 BatchNormExpander.

PiperOrigin-RevId: 179439609
---
 tensorflow/compiler/xla/service/BUILD         | 12 ++++----
 ...norm_rewriter.cc => batchnorm_expander.cc} | 30 +++++++++----------
 ...chnorm_rewriter.h => batchnorm_expander.h} | 14 ++++-----
 ...ter_test.cc => batchnorm_expander_test.cc} | 12 ++++----
 tensorflow/compiler/xla/service/cpu/BUILD     |  2 +-
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  4 +--
 tensorflow/compiler/xla/service/gpu/BUILD     |  2 +-
 .../compiler/xla/service/gpu/gpu_compiler.cc  |  6 ++--
 8 files changed, 41 insertions(+), 41 deletions(-)
 rename tensorflow/compiler/xla/service/{batchnorm_rewriter.cc => batchnorm_expander.cc} (96%)
 rename tensorflow/compiler/xla/service/{batchnorm_rewriter.h => batchnorm_expander.h} (83%)
 rename tensorflow/compiler/xla/service/{batchnorm_rewriter_test.cc => batchnorm_expander_test.cc} (93%)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 07ef98076e..99ef9adf88 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1013,9 +1013,9 @@ tf_cc_test(
 )
 
 cc_library(
-    name = "batchnorm_rewriter",
-    srcs = ["batchnorm_rewriter.cc"],
-    hdrs = ["batchnorm_rewriter.h"],
+    name = "batchnorm_expander",
+    srcs = ["batchnorm_expander.cc"],
+    hdrs = ["batchnorm_expander.h"],
     deps = [
         ":hlo",
         ":hlo_pass",
@@ -1033,11 +1033,11 @@ cc_library(
 )
 
 tf_cc_test(
-    name = "batchnorm_rewriter_test",
+    name = "batchnorm_expander_test",
     size = "small",
-    srcs = ["batchnorm_rewriter_test.cc"],
+    srcs = ["batchnorm_expander_test.cc"],
     deps = [
-        ":batchnorm_rewriter",
+        ":batchnorm_expander",
         ":hlo",
         ":hlo_matchers",
         ":hlo_pass",
diff --git a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc b/tensorflow/compiler/xla/service/batchnorm_expander.cc
similarity index 96%
rename from tensorflow/compiler/xla/service/batchnorm_rewriter.cc
rename to tensorflow/compiler/xla/service/batchnorm_expander.cc
index 2bbae25aee..b806d61663 100644
--- a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc
+++ b/tensorflow/compiler/xla/service/batchnorm_expander.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/batchnorm_rewriter.h"
+#include "tensorflow/compiler/xla/service/batchnorm_expander.h"
 
 #include <algorithm>
 #include <memory>
@@ -45,9 +45,9 @@ limitations under the License.
 
 namespace xla {
 
-// BatchNormRewriterVisitor traverses the HLO computation and rewrites BatchNorm
+// BatchNormExpanderVisitor traverses the HLO computation and rewrites BatchNorm
 // operations into smaller operations.
-class BatchNormRewriterVisitor : public DfsHloVisitorWithDefault {
+class BatchNormExpanderVisitor : public DfsHloVisitorWithDefault {
  public:
   // Default visitor action is to do nothing and return OK.
   Status DefaultAction(HloInstruction* /*hlo_instruction*/) override {
@@ -68,10 +68,10 @@ class BatchNormRewriterVisitor : public DfsHloVisitorWithDefault {
   // Returns whether any batch norm ops were rewritten.
   const bool changed() const { return changed_; }
 
-  ~BatchNormRewriterVisitor() override = default;
+  ~BatchNormExpanderVisitor() override = default;
 
  private:
-  explicit BatchNormRewriterVisitor(HloComputation* computation,
+  explicit BatchNormExpanderVisitor(HloComputation* computation,
                                     bool rewrite_training_op,
                                     bool rewrite_inference_op,
                                     bool rewrite_grad_op, bool use_fusion)
@@ -94,7 +94,7 @@ class BatchNormRewriterVisitor : public DfsHloVisitorWithDefault {
     return computation_->parent()->AddEmbeddedComputation(b.Build(scalar_op));
   }
 
-  // Current HloComputation instance the BatchNormRewriter is
+  // Current HloComputation instance the BatchNormExpander is
   // traversing.
   HloComputation* computation_;
 
@@ -130,11 +130,11 @@ class BatchNormRewriterVisitor : public DfsHloVisitorWithDefault {
   }
 };
 
-bool BatchNormRewriterVisitor::Run(HloComputation* computation,
+bool BatchNormExpanderVisitor::Run(HloComputation* computation,
                                    bool rewrite_training_op,
                                    bool rewrite_inference_op,
                                    bool rewrite_grad_op, bool use_fusion) {
-  BatchNormRewriterVisitor visitor(
+  BatchNormExpanderVisitor visitor(
       computation,
       /*rewrite_training_op=*/rewrite_training_op,
       /*rewrite_inference_op=*/rewrite_inference_op,
@@ -144,7 +144,7 @@ bool BatchNormRewriterVisitor::Run(HloComputation* computation,
   return visitor.changed_;
 }
 
-Status BatchNormRewriterVisitor::HandleBatchNormTraining(
+Status BatchNormExpanderVisitor::HandleBatchNormTraining(
     HloInstruction* batch_norm) {
   if (!rewrite_training_op_) {
     return Status::OK();
@@ -299,7 +299,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
   return Status::OK();
 }
 
-Status BatchNormRewriterVisitor::HandleBatchNormInference(
+Status BatchNormExpanderVisitor::HandleBatchNormInference(
     HloInstruction* batch_norm) {
   if (!rewrite_inference_op_) {
     return Status::OK();
@@ -397,7 +397,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormInference(
   return Status::OK();
 }
 
-Status BatchNormRewriterVisitor::HandleBatchNormGrad(
+Status BatchNormExpanderVisitor::HandleBatchNormGrad(
     HloInstruction* batch_norm) {
   // Use the following formulas to calculate gradients:
   // scale_grad =
@@ -593,17 +593,17 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad(
   return Status::OK();
 }
 
-StatusOr<bool> BatchNormRewriter::Run(HloModule* module) {
-  XLA_VLOG_LINES(2, "BatchNormRewriter::Run(), before:\n" + module->ToString());
+StatusOr<bool> BatchNormExpander::Run(HloModule* module) {
+  XLA_VLOG_LINES(2, "BatchNormExpander::Run(), before:\n" + module->ToString());
   bool changed = false;
   for (auto* comp : module->MakeNonfusionComputations()) {
-    if (BatchNormRewriterVisitor::Run(comp, rewrite_training_op_,
+    if (BatchNormExpanderVisitor::Run(comp, rewrite_training_op_,
                                       rewrite_inference_op_, rewrite_grad_op_,
                                       use_fusion_)) {
       changed = true;
     }
   }
-  XLA_VLOG_LINES(2, "BatchNormRewriter::Run(), after:\n" + module->ToString());
+  XLA_VLOG_LINES(2, "BatchNormExpander::Run(), after:\n" + module->ToString());
   return changed;
 }
 
diff --git a/tensorflow/compiler/xla/service/batchnorm_rewriter.h b/tensorflow/compiler/xla/service/batchnorm_expander.h
similarity index 83%
rename from tensorflow/compiler/xla/service/batchnorm_rewriter.h
rename to tensorflow/compiler/xla/service/batchnorm_expander.h
index f601741d96..4ad987085d 100644
--- a/tensorflow/compiler/xla/service/batchnorm_rewriter.h
+++ b/tensorflow/compiler/xla/service/batchnorm_expander.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_REWRITER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_REWRITER_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_EXPANDER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_EXPANDER_H_
 
 #include <utility>
 
@@ -26,18 +26,18 @@ namespace xla {
 // A pass which rewrites batch norm operations into more operations. Breaking a
 // big operation into smaller operations helps leverage our generic fusion
 // logic.
-class BatchNormRewriter : public HloPassInterface {
+class BatchNormExpander : public HloPassInterface {
  public:
   // When use_fusion is set, a multi-output fusion node is created.
-  BatchNormRewriter(bool rewrite_training_op = false,
+  BatchNormExpander(bool rewrite_training_op = false,
                     bool rewrite_inference_op = false,
                     bool rewrite_grad_op = false, bool use_fusion = true)
       : rewrite_training_op_(rewrite_training_op),
         rewrite_inference_op_(rewrite_inference_op),
         rewrite_grad_op_(rewrite_grad_op),
         use_fusion_(use_fusion) {}
-  ~BatchNormRewriter() = default;
-  tensorflow::StringPiece name() const override { return "batchnorm_rewriter"; }
+  ~BatchNormExpander() = default;
+  tensorflow::StringPiece name() const override { return "batchnorm_expander"; }
 
   // Run operation expander on the given computation. Returns whether the
   // computation was changed.
@@ -52,4 +52,4 @@ class BatchNormRewriter : public HloPassInterface {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_REWRITER_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_EXPANDER_H_
diff --git a/tensorflow/compiler/xla/service/batchnorm_rewriter_test.cc b/tensorflow/compiler/xla/service/batchnorm_expander_test.cc
similarity index 93%
rename from tensorflow/compiler/xla/service/batchnorm_rewriter_test.cc
rename to tensorflow/compiler/xla/service/batchnorm_expander_test.cc
index 590f79aee5..aa36e64b07 100644
--- a/tensorflow/compiler/xla/service/batchnorm_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/batchnorm_expander_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/batchnorm_rewriter.h"
+#include "tensorflow/compiler/xla/service/batchnorm_expander.h"
 
 #include <memory>
 #include <utility>
@@ -36,10 +36,10 @@ limitations under the License.
 namespace xla {
 namespace {
 
-using BatchNormRewriterTest = HloTestBase;
+using BatchNormExpanderTest = HloTestBase;
 
 // Test that we expand BatchNormTraining.
-TEST_F(BatchNormRewriterTest, BatchNormTraining) {
+TEST_F(BatchNormExpanderTest, BatchNormTraining) {
   Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2, 2});
   Shape scale_shape = ShapeUtil::MakeShape(F32, {2});
   Shape offset_shape = ShapeUtil::MakeShape(F32, {2});
@@ -63,7 +63,7 @@ TEST_F(BatchNormRewriterTest, BatchNormTraining) {
   auto computation = module->AddEntryComputation(builder.Build());
   HloInstruction* root = computation->root_instruction();
   EXPECT_EQ(root->opcode(), HloOpcode::kBatchNormTraining);
-  BatchNormRewriter rewriter(/*rewrite_training_op=*/true,
+  BatchNormExpander rewriter(/*rewrite_training_op=*/true,
                              /*rewrite_inference_op=*/true,
                              /*rewrite_grad_op=*/true);
   ASSERT_TRUE(rewriter.Run(module.get()).ValueOrDie());
@@ -73,7 +73,7 @@ TEST_F(BatchNormRewriterTest, BatchNormTraining) {
 }
 
 // Test that we expand BatchNormGrad.
-TEST_F(BatchNormRewriterTest, BatchNormGrad) {
+TEST_F(BatchNormExpanderTest, BatchNormGrad) {
   Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2, 2});
   Shape scale_shape = ShapeUtil::MakeShape(F32, {2});
   Shape mean_shape = ShapeUtil::MakeShape(F32, {2});
@@ -105,7 +105,7 @@ TEST_F(BatchNormRewriterTest, BatchNormGrad) {
   auto computation = module->AddEntryComputation(builder.Build());
   HloInstruction* root = computation->root_instruction();
   EXPECT_EQ(root->opcode(), HloOpcode::kBatchNormGrad);
-  BatchNormRewriter rewriter(/*rewrite_training_op=*/true,
+  BatchNormExpander rewriter(/*rewrite_training_op=*/true,
                              /*rewrite_inference_op=*/true,
                              /*rewrite_grad_op=*/true);
   ASSERT_TRUE(rewriter.Run(module.get()).ValueOrDie());
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index b0c959f40b..ed142bd077 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -100,7 +100,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service:algebraic_simplifier",
-        "//tensorflow/compiler/xla/service:batchnorm_rewriter",
+        "//tensorflow/compiler/xla/service:batchnorm_expander",
         "//tensorflow/compiler/xla/service:buffer_assignment",
         "//tensorflow/compiler/xla/service:buffer_liveness",
         "//tensorflow/compiler/xla/service:call_inliner",
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 55e7c7bc2c..6dc30bfe2c 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -42,7 +42,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/protobuf_util.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
 #include "tensorflow/compiler/xla/service/algebraic_simplifier.h"
-#include "tensorflow/compiler/xla/service/batchnorm_rewriter.h"
+#include "tensorflow/compiler/xla/service/batchnorm_expander.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
@@ -281,7 +281,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
         pipeline.AddPass<HloPassFix<HloPassPipeline>>("simplification");
     pass.AddInvariantChecker<HloVerifier>(ShapeSizeBytesFunction());
 
-    pass.AddPass<BatchNormRewriter>(
+    pass.AddPass<BatchNormExpander>(
         /*rewrite_training_op=*/true,
         /*rewrite_inference_op=*/true,
         /*rewrite_grad_op=*/true,
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 4a72f87efd..f673f0cbd0 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -445,7 +445,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla/service:algebraic_simplifier",
-        "//tensorflow/compiler/xla/service:batchnorm_rewriter",
+        "//tensorflow/compiler/xla/service:batchnorm_expander",
         "//tensorflow/compiler/xla/service:buffer_assignment",
         "//tensorflow/compiler/xla/service:buffer_liveness",
         "//tensorflow/compiler/xla/service:call_inliner",
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 1ccfe323c5..fc3b299936 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -27,7 +27,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/protobuf_util.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
 #include "tensorflow/compiler/xla/service/algebraic_simplifier.h"
-#include "tensorflow/compiler/xla/service/batchnorm_rewriter.h"
+#include "tensorflow/compiler/xla/service/batchnorm_expander.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
@@ -144,9 +144,9 @@ tensorflow::Status OptimizeHloModule(
           pipeline.AddPass<HloPassFix<HloPassPipeline>>("simplification");
       pass.AddInvariantChecker<HloVerifier>(shape_size_function);
 
-      // TODO(b/62764704): Do not rewrite on GPU, use cuDNN's BatchNorm APIs
+      // TODO(b/62764704): Do not expand on GPU, use cuDNN's BatchNorm APIs
       // instead.
-      pass.AddPass<BatchNormRewriter>(
+      pass.AddPass<BatchNormExpander>(
           /*rewrite_training_op=*/true,
           /*rewrite_inference_op=*/true,
           /*rewrite_grad_op=*/true,
-- 
GitLab


From bacd47b8e14c38b146bc5303ae093e2af378afac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Dec 2017 10:27:34 -0800
Subject: [PATCH 1163/1225] A fast "pow" function for positive integer
 exponents

PiperOrigin-RevId: 179439743
---
 tensorflow/core/lib/math/math_util.h       |  34 +++++++
 tensorflow/core/lib/math/math_util_test.cc | 113 +++++++++++++++++++++
 2 files changed, 147 insertions(+)

diff --git a/tensorflow/core/lib/math/math_util.h b/tensorflow/core/lib/math/math_util.h
index 9e71598622..41d486f2bd 100644
--- a/tensorflow/core/lib/math/math_util.h
+++ b/tensorflow/core/lib/math/math_util.h
@@ -64,6 +64,26 @@ class MathUtil {
 
   template <typename IntegralType>
   static IntegralType GCD(IntegralType x, IntegralType y);
+
+  // ----------------------------------------------------------------------
+  // IPow<T>
+  //   Computes the result of raising a number to a non-negative integral power.
+  //
+  //  * T: An integral type, floating-point type, or user-defined type for which
+  //    operator*= is defined.
+  //  * base: the base "v" of the operation
+  //  * exp: the exponent "i" of the operation; must be non-negative.
+  //
+  // Computes v^i, in a way that is faster than std::pow (which supports
+  // arbitrary real exponents).
+  //
+  // When T is a floating point type, this has the same semantics as std::pow,
+  // but it is much faster. When T is an integral type, computations are
+  // performed in the value domain of T, and overflow semantics are those of T.
+  //
+  // Input validity is DCHECKed.
+  template <typename T>
+  static T IPow(T base, int exp);
 };
 
 // ---- CeilOrFloorOfRatio ----
@@ -124,6 +144,20 @@ IntegralType MathUtil::GCD(IntegralType a, IntegralType b) {
   return a;
 }
 
+// ---- IPow ----
+// Implemented with the squared exponentiation method (a.k.a. double-and-add).
+//
+// Note that "exp >>= 1" is faster than "exp /= 2" on at least one platform.
+template <typename T>
+T MathUtil::IPow(T base, int exp) {
+  DCHECK_GE(exp, 0);
+  for (T result(1);; base *= base) {
+    if ((exp & 1) != 0) result *= base;
+    exp >>= 1;
+    if (exp == 0) return result;
+  }
+}
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_LIB_MATH_MATH_UTIL_H_
diff --git a/tensorflow/core/lib/math/math_util_test.cc b/tensorflow/core/lib/math/math_util_test.cc
index a96e5467c3..cad5d0d899 100644
--- a/tensorflow/core/lib/math/math_util_test.cc
+++ b/tensorflow/core/lib/math/math_util_test.cc
@@ -15,12 +15,17 @@ limitations under the License.
 
 #include "tensorflow/core/lib/math/math_util.h"
 
+#include <cmath>
+#include <limits>
 #include <vector>
+
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
+namespace {
 
 // Number of arguments for each test of the CeilOrRatio method
 const int kNumTestArguments = 4;
@@ -224,4 +229,112 @@ TEST(MathUtil, GCD) {
             MathUtil::GCD<uint64>(biggish_prime * 3, biggish_prime * 4));
 }
 
+template <typename T>
+void TestOneIPowN() {
+  const T one{1};
+  for (int i = 0; i < 1024; ++i) {
+    // Computations are exact.
+    EXPECT_EQ(MathUtil::IPow(one, i), one);
+  }
+}
+
+template <typename T>
+void TestTwoIPowN() {
+  int limit = std::is_integral<T>::value ? std::numeric_limits<T>::digits : 63;
+  for (int i = 0; i < limit; ++i) {
+    // Computations are exact.
+    EXPECT_EQ(MathUtil::IPow(T{2}, i), static_cast<T>(1ull << i));
+  }
+}
+
+template <typename T>
+void TestFloatIPow(const int max_exponent, const T start, const T end,
+                   const T step) {
+  for (T f = start; f < end; f += step) {
+    for (int i = 0; i < max_exponent; ++i) {
+      EXPECT_FLOAT_EQ(MathUtil::IPow(f, i), pow(f, i));
+    }
+  }
+}
+
+TEST(MathUtil, IPow) {
+  TestOneIPowN<double>();
+  TestOneIPowN<float>();
+  TestOneIPowN<int>();
+  TestOneIPowN<int64>();
+  TestTwoIPowN<double>();
+  TestTwoIPowN<float>();
+  TestTwoIPowN<int>();
+  TestTwoIPowN<int64>();
+
+  EXPECT_EQ(MathUtil::IPow(3, 0), 1);
+  EXPECT_EQ(MathUtil::IPow(3, 1), 3);
+  EXPECT_EQ(MathUtil::IPow(3, 2), 9);
+  EXPECT_EQ(MathUtil::IPow(3, 3), 27);
+  EXPECT_EQ(MathUtil::IPow(3, 4), 81);
+  EXPECT_EQ(MathUtil::IPow(3, 5), 243);
+
+  TestFloatIPow<float>(13, -16.0f, 16.0f, 1.0f / 8);
+  TestFloatIPow<double>(13, -16.0, 16.0, 1.0 / 8);
+
+  TestFloatIPow<float>(13, -1.0f / (1 << 12), -1.0f / (1 << 12),
+                       1.0f / (1 << 16));
+  TestFloatIPow<double>(13, -1.0 / (1 << 12), -1.0 / (1 << 12),
+                        1.0 / (1 << 16));
+}
+
+TEST(MathUtil, IPowEdgeCases) {
+  constexpr const double kInf = std::numeric_limits<double>::infinity();
+
+  EXPECT_EQ(MathUtil::IPow(-12345.0, 79), -kInf);
+  EXPECT_EQ(MathUtil::IPow(-12345.0, 80), +kInf);
+
+  // The semantics of the edge cases that follow  are defined in the standard:
+  // http://en.cppreference.com/w/cpp/numeric/math/pow for a summary.
+
+  // 1 - These edge cases apply.
+  // pow(+0, exp), where exp is a positive odd integer, returns +0
+  EXPECT_EQ(MathUtil::IPow(+0.0, 3), +0.0);
+  // pow(-0, exp), where exp is a positive odd integer, returns -0
+  EXPECT_EQ(MathUtil::IPow(-0.0, 3), -0.0);
+  // pow(±0, exp), where exp is positive non-integer or a positive even integer,
+  // returns +0
+  EXPECT_EQ(MathUtil::IPow(+0.0, 42), +0.0);
+  EXPECT_EQ(MathUtil::IPow(-0.0, 42), +0.0);
+  // pow(base, ±0) returns 1 for any base, even when base is NaN
+  EXPECT_EQ(MathUtil::IPow(-kInf, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(-2.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(-1.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(-0.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(+0.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(+1.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(+2.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(+kInf, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(std::numeric_limits<double>::quiet_NaN(), 0.0), 1.0);
+  // pow(-∞, exp) returns -∞ if exp is a positive odd integer
+  EXPECT_EQ(MathUtil::IPow(-kInf, 43), -kInf);
+  // pow(-∞, exp) returns +∞ if exp is a positive non-integer or even integer
+  EXPECT_EQ(MathUtil::IPow(-kInf, 42), +kInf);
+  // pow(+∞, exp) returns +∞ for any positive exp
+  EXPECT_EQ(MathUtil::IPow(+kInf, 42), +kInf);
+  EXPECT_EQ(MathUtil::IPow(+kInf, 43), +kInf);
+
+  // 2 - These do not apply due to the restricted exp range.
+  // pow(+0, exp), where exp is a negative odd integer, returns +∞ and raises
+  // FE_DIVBYZERO pow(-0, exp), where exp is a negative odd integer, returns -∞
+  // and raises FE_DIVBYZERO pow(±0, exp), where exp is negative, finite, and is
+  // an even integer or a non-integer, returns +∞ and raises FE_DIVBYZERO
+  // pow(-1, ±∞) returns 1
+  // pow(+1, exp) returns 1 for any exp, even when exp is NaN
+  // pow(±0, -∞) returns +∞ and may raise FE_DIVBYZERO
+  // pow(base, exp) returns NaN and raises FE_INVALID if base is finite and
+  // negative and exp is finite and non-integer. pow(base, -∞) returns +∞ for
+  // any |base|<1 pow(base, -∞) returns +0 for any |base|>1 pow(base, +∞)
+  // returns +0 for any |base|<1 pow(base, +∞) returns +∞ for any |base|>1
+  // pow(-∞, exp) returns -0 if exp is a negative odd integer
+  // pow(-∞, exp) returns +0 if exp is a negative non-integer or even integer
+  // pow(+∞, exp) returns +0 for any negative exp
+}
+
+}  // namespace
 }  // namespace tensorflow
-- 
GitLab


From cc6a1d343940e155be4fe2668c95dd30127dcffa Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 18 Dec 2017 10:57:22 -0800
Subject: [PATCH 1164/1225] [XLA:CPU] Parameters and temps for embedded
 computations are not invariant

PiperOrigin-RevId: 179443961
---
 .../compiler/xla/service/cpu/ir_emitter.cc    | 19 ++++++++++++++-----
 .../compiler/xla/service/hlo_instruction.cc   |  5 +++++
 tensorflow/compiler/xla/tests/BUILD           |  1 +
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 4bf3e22751..a433debbf5 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -1386,9 +1386,14 @@ Status IrEmitter::HandleParameter(HloInstruction* parameter) {
   llvm::LoadInst* param_address_untyped =
       ir_builder_.CreateLoad(param_address_offset);
   param_address_untyped->setName(AsStringRef(IrName(parameter, "untyped")));
-  if (hlo_module_config_.debug_options()
+  if (is_top_level_computation_ &&
+      hlo_module_config_.debug_options()
           .xla_llvm_enable_invariant_load_metadata()) {
-    // We never reassign parameters, so this load is invariant.
+    // In the entry computation the parameter slots in the %params argument are
+    // invariant through program execution.  In computations that are called
+    // from the entry computation (via kWhile, kCall and kConditional) the
+    // parameter slots are *not* invariant since they're written to by their
+    // callers.
     param_address_untyped->setMetadata(
         llvm::LLVMContext::MD_invariant_load,
         llvm::MDNode::get(param_address_untyped->getContext(), /*MDs=*/{}));
@@ -2829,10 +2834,14 @@ llvm::Value* IrEmitter::EmitTempBufferPointer(
       GetTempBuffersArgument(), slice.index(), &ir_builder_);
   llvm::LoadInst* tempbuf_address_base =
       ir_builder_.CreateLoad(tempbuf_address_ptr);
-  if (hlo_module_config_.debug_options()
+  if (is_top_level_computation_ &&
+      hlo_module_config_.debug_options()
           .xla_llvm_enable_invariant_load_metadata()) {
-    // Loading the address of a buffer is invariant of the point at which the
-    // load is executed in the program because we never reassign buffers.
+    // In the entry computation the parameter slots in the %params argument are
+    // invariant through program execution.  In computations that are called
+    // from the entry computation (via kWhile, kCall and kConditional) the
+    // parameter slots are *not* invariant since they're written to by their
+    // callers.
     tempbuf_address_base->setMetadata(
         llvm::LLVMContext::MD_invariant_load,
         llvm::MDNode::get(tempbuf_address_base->getContext(), /*MDs=*/{}));
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 79855a1393..89a95b2b99 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -1272,6 +1272,11 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
                                   new_operands[4], epsilon(), feature_index());
       break;
     case HloOpcode::kConditional:
+      CHECK_EQ(new_operands.size(), 3);
+      clone = CreateConditional(shape, new_operands[0], new_operands[1],
+                                true_computation(), new_operands[2],
+                                false_computation());
+      break;
     case HloOpcode::kRecv:
     case HloOpcode::kRecvDone:
     case HloOpcode::kSend:
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 9ce9b7aae4..d8c0584d10 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -446,6 +446,7 @@ xla_test(
     backends = [
         "cpu",
         "gpu",
+        "cpu_parallel",
     ],
     deps = [
         "//tensorflow/compiler/xla:xla_data_proto",
-- 
GitLab


From 3338e27c3b31160f5a14a7822e1d116c76091543 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Dec 2017 12:42:48 -0800
Subject: [PATCH 1165/1225] Small doc fix in kfac loss_functions: `evaluate()`
 returns *negative* log probabilities.

PiperOrigin-RevId: 179456448
---
 tensorflow/contrib/kfac/python/ops/loss_functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions.py b/tensorflow/contrib/kfac/python/ops/loss_functions.py
index e2e5bc3ffe..d449abcfa7 100644
--- a/tensorflow/contrib/kfac/python/ops/loss_functions.py
+++ b/tensorflow/contrib/kfac/python/ops/loss_functions.py
@@ -91,13 +91,13 @@ class LossFunction(object):
 
   @abc.abstractmethod
   def _evaluate(self, targets):
-    """Evaluates the log probability of the targets.
+    """Evaluates the negative log probability of the targets.
 
     Args:
       targets: Tensor that distribution can calculate log_prob() of.
 
     Returns:
-      log probability of each target, summed across all targets.
+      negative log probability of each target, summed across all targets.
     """
     pass
 
-- 
GitLab


From 768729ac47fd8bb84a195a1c32954de84434977b Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 18 Dec 2017 13:09:23 -0800
Subject: [PATCH 1166/1225] Enable api compatibility test to also run on macos.

---
 tensorflow/tools/api/tests/api_compatibility_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index a8fdf4c9a0..88d14f3867 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py
@@ -248,8 +248,8 @@ class ApiCompatibilityTest(test.TestCase):
       logging.info('No differences found between API and golden.')
 
   @unittest.skipUnless(
-      sys.version_info.major == 2 and os.uname()[0] == 'Linux',
-      'API compabitility test goldens are generated using python2 on Linux.')
+      sys.version_info.major == 2,
+      'API compabitility test goldens are generated using python2.')
   def testAPIBackwardsCompatibility(self):
     # Extract all API stuff.
     visitor = python_object_to_proto_visitor.PythonObjectToProtoVisitor()
-- 
GitLab


From 4abda8cf06d0aab3a42c2bcce162272055a6ee8b Mon Sep 17 00:00:00 2001
From: Sukriti Ramesh <sukritiramesh@google.com>
Date: Mon, 18 Dec 2017 13:20:19 -0800
Subject: [PATCH 1167/1225] SavedModel tag logging.

PiperOrigin-RevId: 179460064
---
 tensorflow/cc/saved_model/loader.cc | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/tensorflow/cc/saved_model/loader.cc b/tensorflow/cc/saved_model/loader.cc
index f98abc8a81..acef098c7d 100644
--- a/tensorflow/cc/saved_model/loader.cc
+++ b/tensorflow/cc/saved_model/loader.cc
@@ -62,6 +62,15 @@ Status ReadSavedModel(const string& export_dir, SavedModel* saved_model_proto) {
                     export_dir);
 }
 
+string GetTagsAsString(const std::unordered_set<string>& tags) {
+  string tags_as_string = "{ ";
+  for (const string& tag : tags) {
+    tags_as_string = strings::StrCat(tags_as_string, tag, " ");
+  }
+  tags_as_string = strings::StrCat(tags_as_string, "}");
+  return tags_as_string;
+}
+
 Status FindMetaGraphDefToLoad(const SavedModel& saved_model_proto,
                               const std::unordered_set<string>& tags,
                               MetaGraphDef* meta_graph_def_to_load) {
@@ -77,14 +86,9 @@ Status FindMetaGraphDefToLoad(const SavedModel& saved_model_proto,
       return Status::OK();
     }
   }
-  string tags_as_string = "{ ";
-  for (const string& tag : tags) {
-    tags_as_string = strings::StrCat(tags_as_string, tag, " ");
-  }
-  tags_as_string = strings::StrCat(tags_as_string, "}");
   return Status(error::Code::NOT_FOUND,
                 "Could not find meta graph def matching supplied tags: " +
-                    tags_as_string +
+                    GetTagsAsString(tags) +
                     ". To inspect available tag-sets in the SavedModel, please "
                     "use the SavedModel CLI: `saved_model_cli`");
 }
@@ -233,7 +237,8 @@ Status LoadSavedModelInternal(const SessionOptions& session_options,
     return Status(error::Code::NOT_FOUND,
                   "SavedModel not found in export directory: " + export_dir);
   }
-  LOG(INFO) << "Loading SavedModel from: " << export_dir;
+  LOG(INFO) << "Loading SavedModel with tags: " << GetTagsAsString(tags)
+            << "; from: " << export_dir;
 
   SavedModel saved_model_proto;
   TF_RETURN_IF_ERROR(ReadSavedModel(export_dir, &saved_model_proto));
@@ -281,7 +286,8 @@ Status LoadSavedModel(const SessionOptions& session_options,
     return end_microseconds - start_microseconds;
   }();
   auto log_and_count = [&](const string& status_str) {
-    LOG(INFO) << "Loading SavedModel: " << status_str << ". Took "
+    LOG(INFO) << "SavedModel load for tags " << GetTagsAsString(tags)
+              << "; Status: " << status_str << ". Took "
               << load_latency_microsecs << " microseconds.";
     load_attempt_count->GetCell(export_dir, status_str)->IncrementBy(1);
   };
-- 
GitLab


From 2daf4aa01b3d1d837eaaaebcbe4527b521cca7a9 Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Mon, 18 Dec 2017 14:01:58 -0800
Subject: [PATCH 1168/1225] Delete the instructions regarding manually
 installing protobuf.

PiperOrigin-RevId: 179464468
---
 tensorflow/docs_src/install/install_linux.md | 41 --------------------
 tensorflow/docs_src/install/install_mac.md   | 41 --------------------
 2 files changed, 82 deletions(-)

diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md
index 28b04bab95..e3d5b80aa7 100644
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -718,44 +718,3 @@ https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp36-cp
 
 Note that GPU support requires the NVIDIA hardware and software described in
 [NVIDIA requirements to run TensorFlow with GPU support](#NVIDIARequirements).
-
-<a name="Protobuf31"></a>
-## Protobuf pip package 3.1
-
-You can skip this section unless you are seeing problems related
-to the protobuf pip package.
-
-**NOTE:** If your TensorFlow programs are running slowly, you might
-have a problem related to the protobuf pip package.
-
-The TensorFlow pip package depends on protobuf pip package version 3.1. The
-protobuf pip package downloaded from PyPI (when invoking
-<tt>pip install protobuf</tt>) is a Python-only library containing
-Python implementations of proto serialization/deserialization that can run
-**10x-50x slower** than the C++ implementation. Protobuf also supports a
-binary extension for the Python package that contains fast
-C++ based proto parsing.  This extension is not available in the
-standard Python-only pip package.  We have created a custom binary
-pip package for protobuf that contains the binary extension. To install
-the custom binary protobuf pip package, invoke one of the following commands:
-
-  * for Python 2.7:
-
-  <pre>
-  $ <b>pip install --upgrade \
-  https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.1.0-cp27-none-linux_x86_64.whl</b></pre>
-
-  * for Python 3.5:
-
-  <pre>
-  $ <b>pip3 install --upgrade \
-  https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.1.0-cp35-none-linux_x86_64.whl</b></pre>
-
-Installing this protobuf package will overwrite the existing protobuf package.
-Note that the binary pip package already has support for protobufs
-larger than 64MB, which should fix errors such as these:
-
-<pre>[libprotobuf ERROR google/protobuf/src/google/protobuf/io/coded_stream.cc:207]
-A protocol message was rejected because it was too big (more than 67108864 bytes).
-To increase the limit (or to disable these warnings), see
-CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.</pre>
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index 3afd0aec0f..d4ab5475fa 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -530,44 +530,3 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.
 <pre>
 https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py3-none-any.whl
 </pre>
-
-
-
-<a name="Protobuf31"></a>
-## Protobuf pip package 3.1
-
-You can skip this section unless you are seeing problems related
-to the protobuf pip package.
-
-**NOTE:** If your TensorFlow programs are running slowly, you might
-have a problem related to the protobuf pip package.
-
-The TensorFlow pip package depends on protobuf pip package version 3.1. The
-protobuf pip package downloaded from PyPI (when invoking
-<tt>pip install protobuf</tt>) is a Python-only library containing
-Python implementations of proto serialization/deserialization that can run
-**10x-50x slower** than the C++ implementation. Protobuf also supports a
-binary extension for the Python package that contains fast
-C++ based proto parsing.  This extension is not available in the
-standard Python-only pip package.  We have created a custom binary
-pip package for protobuf that contains the binary extension. To install
-the custom binary protobuf pip package, invoke one of the following commands:
-
-  * for Python 2.7:
-
-    <pre>$ <b>pip install --upgrade \
-    https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.1.0-cp27-none-macosx_10_11_x86_64.whl</b></pre>
-
-  * for Python 3.n:
-
-    <pre>$ <b>pip3 install --upgrade \
-    https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.1.0-cp35-none-macosx_10_11_x86_64.whl</b></pre>
-
-Installing this protobuf package will overwrite the existing protobuf package.
-Note that the binary pip package already has support for protobufs
-larger than 64MB, which should fix errors such as these:
-
-<pre>[libprotobuf ERROR google/protobuf/src/google/protobuf/io/coded_stream.cc:207]
-A protocol message was rejected because it was too big (more than 67108864 bytes).
-To increase the limit (or to disable these warnings), see
-CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.</pre>
-- 
GitLab


From 7fd2c7a7f8650a128213b19b13cb6ced65e87696 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Dec 2017 15:14:59 -0800
Subject: [PATCH 1169/1225] [XLA] Add format field to layout

Format will describe the method used to store array data in memory. Currently
only DENSE is supported, which represents the way XLA currently stores arrays.

Scalars have a DENSE format. Tuples and opaque shapes use INVALID_FORMAT.

Adds checks to code that uses minor_to_major to ensure the layout is dense.

PiperOrigin-RevId: 179475450
---
 tensorflow/compiler/aot/codegen_test_h.golden |   4 +-
 tensorflow/compiler/tf2xla/xla_compiler.cc    |  14 +-
 tensorflow/compiler/xla/index_util.cc         |  11 +-
 tensorflow/compiler/xla/layout_util.cc        | 131 +++++++++++++-----
 tensorflow/compiler/xla/layout_util.h         |  24 ++++
 tensorflow/compiler/xla/literal_util.cc       |  23 ++-
 tensorflow/compiler/xla/literal_util.h        |   5 +-
 .../xla/service/algebraic_simplifier.cc       |  16 +--
 .../xla/service/cpu/dot_op_emitter.cc         |   8 +-
 .../compiler/xla/service/cpu/ir_emitter.cc    |  27 ++--
 .../xla/service/cpu/parallel_loop_emitter.cc  |   4 +-
 .../compiler/xla/service/gpu/gemm_thunk.cc    |   8 +-
 .../compiler/xla/service/gpu/ir_emitter.cc    |   4 +-
 .../xla/service/gpu/ir_emitter_unnested.cc    |  28 ++--
 .../compiler/xla/service/hlo_graph_dumper.cc  |   2 +-
 .../xla/service/hlo_tfgraph_builder.cc        |   2 +-
 .../compiler/xla/service/layout_assignment.cc |  14 +-
 .../compiler/xla/service/llvm_ir/ir_array.cc  |   6 +-
 .../compiler/xla/service/llvm_ir/llvm_util.cc |   8 +-
 .../xla/service/llvm_ir/loop_emitter.cc       |   4 +-
 .../xla/service/user_computation_test.cc      |   1 +
 tensorflow/compiler/xla/shape_util.cc         |  42 +++---
 tensorflow/compiler/xla/shape_util.h          |   9 +-
 tensorflow/compiler/xla/shape_util_test.cc    |  32 ++---
 tensorflow/compiler/xla/xla_data.proto        |  26 +++-
 25 files changed, 269 insertions(+), 184 deletions(-)

diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden
index 35e50433d6..95ab3a7332 100644
--- a/tensorflow/compiler/aot/codegen_test_h.golden
+++ b/tensorflow/compiler/aot/codegen_test_h.golden
@@ -235,8 +235,8 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
   // Shape of the args and results.
   static const xla::ProgramShape* StaticProgramShape() {
     static const xla::ProgramShape* kShape = []() {
-      static const char kProto[] = {10,12,16,11,26,2,1,2,42,4,10,2,1,0,10,12,16,5,26,2,3,4,42,4,10,2,1,0,18,16,16,13,34,12,16,8,26,2,5,6,42,4,10,2,1,0};
-      static constexpr int kProtoSize = 46;
+      static const char kProto[] = {10,14,16,11,26,2,1,2,42,6,10,2,1,0,32,1,10,14,16,5,26,2,3,4,42,6,10,2,1,0,32,1,18,18,16,13,34,14,16,8,26,2,5,6,42,6,10,2,1,0,32,1};
+      static constexpr int kProtoSize = 52;
       xla::ProgramShape* shape = new xla::ProgramShape;
       shape->ParseFromArray(kProto, kProtoSize);
       return shape;
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index 4c01e67321..50da76e514 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@@ -502,18 +502,6 @@ Status BuildComputation(
   return Status::OK();
 }
 
-void AssignMajorToMinorLayout(xla::Shape* shape) {
-  if (xla::ShapeUtil::IsTuple(*shape)) {
-    for (xla::Shape& elem_shape : *shape->mutable_tuple_shapes()) {
-      AssignMajorToMinorLayout(&elem_shape);
-    }
-  } else {
-    auto& minor_to_major = *shape->mutable_layout()->mutable_minor_to_major();
-    minor_to_major.Resize(xla::ShapeUtil::Rank(*shape), 0);
-    std::iota(minor_to_major.rbegin(), minor_to_major.rend(), 0);
-  }
-}
-
 }  // namespace
 
 Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options,
@@ -589,7 +577,7 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options,
           << xla::ShapeUtil::HumanString(result->xla_output_shape);
 
   // Tensorflow expects a major-to-minor order of results.
-  AssignMajorToMinorLayout(&result->xla_output_shape);
+  xla::LayoutUtil::SetToDefaultLayout(&result->xla_output_shape);
 
   // Converts the output shapes to TensorShapes.
   int computation_output = 0;
diff --git a/tensorflow/compiler/xla/index_util.cc b/tensorflow/compiler/xla/index_util.cc
index 76c0168f37..2ee23927d8 100644
--- a/tensorflow/compiler/xla/index_util.cc
+++ b/tensorflow/compiler/xla/index_util.cc
@@ -78,7 +78,7 @@ namespace xla {
   int64 scale = 1;
   int64 linear_index = 0;
   bool first = true;
-  for (auto dimension : shape.layout().minor_to_major()) {
+  for (auto dimension : LayoutUtil::MinorToMajor(shape)) {
     if (first) {
       // Avoid two multiplies on the first loop iteration
       linear_index = multi_index[dimension];
@@ -110,7 +110,7 @@ namespace xla {
 
   // Accumulated product D{L(0)} * D{L(1)} * ...
   int64 divisor = 1;
-  for (auto dimension : shape.layout().minor_to_major()) {
+  for (auto dimension : LayoutUtil::MinorToMajor(shape)) {
     multi_index[dimension] =
         (linear_index / divisor) % shape.dimensions(dimension);
     divisor *= shape.dimensions(dimension);
@@ -133,18 +133,17 @@ namespace xla {
 
 /* static */ int64 IndexUtil::GetDimensionStride(const Shape& shape,
                                                  int64 dimension) {
-  const Layout& layout = shape.layout();
-  int64 pdim_size = layout.padded_dimensions_size();
+  int64 pdim_size = LayoutUtil::PaddedDimensions(shape).size();
   int64 stride = 1;
   DCHECK(pdim_size == 0 || pdim_size == shape.dimensions_size());
-  for (auto dim : layout.minor_to_major()) {
+  for (auto dim : LayoutUtil::MinorToMajor(shape)) {
     if (dim == dimension) {
       break;
     }
     if (pdim_size == 0) {
       stride *= shape.dimensions(dim);
     } else {
-      stride *= layout.padded_dimensions(dim);
+      stride *= LayoutUtil::PaddedDimension(shape, dim);
     }
   }
   return stride;
diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index 5c2cc2a7a9..f9803be32f 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -57,6 +57,7 @@ void SetDefaultLayoutToContainer(
 /* static */ Layout LayoutUtil::MakeLayout(
     tensorflow::gtl::ArraySlice<int64> minor_to_major) {
   Layout layout;
+  layout.set_format(DENSE);
   for (int64 dimension_number : minor_to_major) {
     layout.add_minor_to_major(dimension_number);
   }
@@ -68,6 +69,7 @@ namespace {
 // Internal helper that creates a default layout for an array of the given rank.
 Layout CreateDefaultLayoutForRank(int64 rank) {
   Layout layout;
+  layout.set_format(DENSE);
   tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>*
       minor_to_major = layout.mutable_minor_to_major();
   minor_to_major->Resize(rank, 0);
@@ -105,7 +107,11 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
     for (auto& element_shape : *shape->mutable_tuple_shapes()) {
       SetToDefaultLayout(&element_shape);
     }
+    shape->clear_layout();
+  } else if (ShapeUtil::IsOpaque(*shape)) {
+    shape->clear_layout();
   } else {
+    shape->mutable_layout()->set_format(DENSE);
     tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>*
         minor_to_major = shape->mutable_layout()->mutable_minor_to_major();
     minor_to_major->Resize(shape->dimensions_size(), 0);
@@ -137,8 +143,10 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
       TF_RETURN_IF_ERROR(ValidateLayoutInShape(element_shape));
     }
     return tensorflow::Status::OK();
-  } else if (ShapeUtil::Rank(shape) == 0 && !shape.has_layout()) {
-    // A scalar without a layout is ok.
+  } else if (ShapeUtil::IsOpaque(shape)) {
+    if (shape.has_layout()) {
+      return InvalidArgument("opaque should not have a layout field");
+    }
     return tensorflow::Status::OK();
   } else {
     // Array shape.
@@ -156,46 +164,59 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
     return InvalidArgument("a single Layout is not valid for tuple shapes");
   }
 
-  if (layout.minor_to_major_size() != ShapeUtil::Rank(shape)) {
+  if (ShapeUtil::IsOpaque(shape)) {
+    return tensorflow::Status::OK();
+  }
+
+  if (layout.format() == INVALID_FORMAT) {
     return InvalidArgument(
-        "layout minor_to_major field contains %d elements, "
-        "but shape is rank %lld: {%s}; shape: %s",
-        layout.minor_to_major_size(), ShapeUtil::Rank(shape),
-        tensorflow::str_util::Join(layout.minor_to_major(), ", ").c_str(),
-        shape.ShortDebugString().c_str());
+        "Layout does not have a valid format: layout {%s}, shape {%s}",
+        layout.ShortDebugString().c_str(), shape.ShortDebugString().c_str());
   }
 
-  std::vector<bool> dimensions_in_layout(ShapeUtil::Rank(shape), false);
-  for (int64 i = 0; i < ShapeUtil::Rank(shape); ++i) {
-    int64 dim = layout.minor_to_major(i);
-    if (dim < 0 || dim >= ShapeUtil::Rank(shape)) {
+  if (layout.format() == DENSE) {
+    if (layout.minor_to_major_size() != ShapeUtil::Rank(shape)) {
       return InvalidArgument(
-          "layout minor_to_major field has out-of-bounds value: %s",
-          HumanString(layout).c_str());
+          "layout minor_to_major field contains %d elements, "
+          "but shape is rank %lld: {%s}; shape: %s",
+          layout.minor_to_major_size(), ShapeUtil::Rank(shape),
+          tensorflow::str_util::Join(layout.minor_to_major(), ", ").c_str(),
+          shape.ShortDebugString().c_str());
     }
-    if (dimensions_in_layout[dim]) {
-      return InvalidArgument(
-          "layout minor_to_major field has duplicate values: {%s}",
-          HumanString(layout).c_str());
-    }
-    dimensions_in_layout[dim] = true;
-  }
 
-  if (layout.padded_dimensions_size() > 0) {
-    if (layout.padded_dimensions_size() != ShapeUtil::Rank(shape)) {
-      return InvalidArgument(
-          "layout has %d padded dimensions, but shape is rank %lld",
-          layout.padded_dimensions_size(), ShapeUtil::Rank(shape));
+    std::vector<bool> dimensions_in_layout(ShapeUtil::Rank(shape), false);
+    for (int64 i = 0; i < ShapeUtil::Rank(shape); ++i) {
+      int64 dim = layout.minor_to_major(i);
+      if (dim < 0 || dim >= ShapeUtil::Rank(shape)) {
+        return InvalidArgument(
+            "layout minor_to_major field has out-of-bounds value: %s",
+            HumanString(layout).c_str());
+      }
+      if (dimensions_in_layout[dim]) {
+        return InvalidArgument(
+            "layout minor_to_major field has duplicate values: {%s}",
+            HumanString(layout).c_str());
+      }
+      dimensions_in_layout[dim] = true;
     }
-    for (int i = 0; i < layout.padded_dimensions_size(); ++i) {
-      if (layout.padded_dimensions(i) < shape.dimensions(i)) {
+
+    if (layout.padded_dimensions_size() > 0) {
+      if (layout.padded_dimensions_size() != ShapeUtil::Rank(shape)) {
         return InvalidArgument(
-            "for dimension %d, dimension padding (%lld) is smaller than "
-            "the dimension size (%lld) of the shape",
-            i, layout.padded_dimensions(i), shape.dimensions(i));
+            "layout has %d padded dimensions, but shape is rank %lld",
+            layout.padded_dimensions_size(), ShapeUtil::Rank(shape));
+      }
+      for (int i = 0; i < layout.padded_dimensions_size(); ++i) {
+        if (layout.padded_dimensions(i) < shape.dimensions(i)) {
+          return InvalidArgument(
+              "for dimension %d, dimension padding (%lld) is smaller than "
+              "the dimension size (%lld) of the shape",
+              i, layout.padded_dimensions(i), shape.dimensions(i));
+        }
       }
     }
   }
+
   return tensorflow::Status::OK();
 }
 
@@ -213,12 +234,23 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
   LayoutUtil::ClearLayout(program_shape->mutable_result());
 }
 
+/* static */ bool LayoutUtil::IsDense(const Shape& shape) {
+  return ShapeUtil::IsArray(shape) && shape.has_layout() &&
+         IsDense(shape.layout());
+}
+
+/* static */ bool LayoutUtil::IsDense(const Layout& layout) {
+  return layout.format() == DENSE;
+}
+
 /* static */ bool LayoutUtil::IsMonotonicWithDim0Minor(const Layout& layout) {
+  CHECK(layout.format() == DENSE);
   return std::is_sorted(layout.minor_to_major().begin(),
                         layout.minor_to_major().end());
 }
 
 /* static */ bool LayoutUtil::IsMonotonicWithDim0Major(const Layout& layout) {
+  CHECK(layout.format() == DENSE);
   return std::is_sorted(layout.minor_to_major().begin(),
                         layout.minor_to_major().end(), std::greater<int64>());
 }
@@ -228,6 +260,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
       shape.layout().padded_dimensions_size() == 0) {
     return false;
   }
+  CHECK(IsDense(shape));
   CHECK_EQ(shape.dimensions_size(), shape.layout().padded_dimensions_size());
   for (int64 i = 0; i < shape.dimensions_size(); ++i) {
     if (shape.layout().padded_dimensions(i) > shape.dimensions(i)) {
@@ -237,15 +270,32 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
   return false;
 }
 
+/* static */ tensorflow::gtl::ArraySlice<const int64>
+LayoutUtil::PaddedDimensions(const Shape& shape) {
+  CHECK(IsDense(shape));
+  return AsInt64Slice(shape.layout().padded_dimensions());
+}
+
+/* static */ int64 LayoutUtil::PaddedDimension(const Shape& shape,
+                                               int64 index) {
+  CHECK(IsDense(shape));
+  return shape.layout().padded_dimensions(index);
+}
+
+/* static */ PaddingValue LayoutUtil::GetPaddingValue(const Shape& shape) {
+  CHECK(IsDense(shape));
+  return shape.layout().padding_value();
+}
+
 /* static */ bool LayoutUtil::HasLayout(const Shape& shape) {
   if (ShapeUtil::IsTuple(shape)) {
     // Tuple shape: all subshapes must have a layout.
     return std::all_of(shape.tuple_shapes().begin(), shape.tuple_shapes().end(),
                        [](const Shape& s) { return HasLayout(s); });
+  } else if (ShapeUtil::IsOpaque(shape)) {
+    return true;
   }
-  // A scalar trivially always has a layout.
-  return (ShapeUtil::Rank(shape) == 0 ||
-          (shape.has_layout() && (shape.layout().minor_to_major_size() > 0)));
+  return shape.has_layout() && shape.layout().format() != INVALID_FORMAT;
 }
 
 /* static */ bool LayoutUtil::HasLayout(const ProgramShape& program_shape) {
@@ -261,6 +311,18 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
   return protobuf_util::ProtobufEquals(lhs, rhs);
 }
 
+/* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::MinorToMajor(
+    const Shape& shape) {
+  CHECK(IsDense(shape));
+  return AsInt64Slice(shape.layout().minor_to_major());
+}
+
+/* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::MinorToMajor(
+    const Layout& layout) {
+  CHECK(layout.format() == DENSE);
+  return AsInt64Slice(layout.minor_to_major());
+}
+
 /* static */ int64 LayoutUtil::Major(const Layout& layout,
                                      int64 physical_dimension_number) {
   CHECK_LE(0, physical_dimension_number);
@@ -271,6 +333,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
 
 /* static */ int64 LayoutUtil::Minor(const Layout& layout,
                                      int64 physical_dimension_number) {
+  CHECK_EQ(layout.format(), DENSE);
   CHECK_LE(0, physical_dimension_number);
   CHECK_LT(physical_dimension_number, layout.minor_to_major_size());
   return layout.minor_to_major(physical_dimension_number);
diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h
index bc42e22229..d00cd03756 100644
--- a/tensorflow/compiler/xla/layout_util.h
+++ b/tensorflow/compiler/xla/layout_util.h
@@ -71,6 +71,12 @@ class LayoutUtil {
   // Clears the layout on all Shapes within the given ProgramShape.
   static void ClearLayout(ProgramShape* program_shape);
 
+  // Returns whether the given Shape is an array and has a dense format layout.
+  static bool IsDense(const Shape& shape);
+
+  // Returns whether the given Layout has a dense format.
+  static bool IsDense(const Layout& layout);
+
   // Returns whether the layout is monotonic and dim 0 is minor in the layout.
   // * R0 and R1: this is always trivially true.
   // * R2+: equivalent to column-major. Dimension 0 is the minor, dimension 1 is
@@ -88,6 +94,19 @@ class LayoutUtil {
   // dimension size).
   static bool IsPadded(const Shape& shape);
 
+  // Returns the padded_dimensions array for the given Shape.  Requires that the
+  // shape is an array and has a dense layout.
+  static tensorflow::gtl::ArraySlice<const int64> PaddedDimensions(
+      const Shape& shape);
+
+  // Returns the given index of the padded_dimensions array for the given Shape.
+  // Requires that the shape is an array and has a dense layout.
+  static int64 PaddedDimension(const Shape& shape, int64 index);
+
+  // Returns the padding_value for the given Shape.  Requires that the shape is
+  // an array and has a dense layout.
+  static PaddingValue GetPaddingValue(const Shape& shape);
+
   // Returns whether the given shape has a layout. For tuple shapes, true is
   // returned only if all elements have layouts.
   static bool HasLayout(const Shape& shape);
@@ -98,6 +117,11 @@ class LayoutUtil {
   // Returns whether lhs and rhs are identical.
   static bool Equal(const Layout& lhs, const Layout& rhs);
 
+  // Returns the minor_to_major array for the given Shape.  Requires that the
+  // shape is an array and has a dense layout.
+  static tensorflow::gtl::ArraySlice<int64> MinorToMajor(const Shape& shape);
+  static tensorflow::gtl::ArraySlice<int64> MinorToMajor(const Layout& layout);
+
   // Major(0) is the most major logical dimension number, major(1) is the
   // second-most-major logical dimension number and so on.
   //
diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc
index 3ae356bc11..f493460e79 100644
--- a/tensorflow/compiler/xla/literal_util.cc
+++ b/tensorflow/compiler/xla/literal_util.cc
@@ -64,12 +64,12 @@ Literal::StrideConfig::StrideConfig(
   if (!dimensions.empty()) {
     // Selects the shape with the largest minor dimension as the one upon
     // which to run the tight stride loop.
-    if (dimensions[source_shape.layout().minor_to_major()[0]] >=
-        dimensions[dest_shape.layout().minor_to_major()[0]]) {
-      minor_dimension = source_shape.layout().minor_to_major()[0];
+    if (dimensions[LayoutUtil::Minor(source_shape.layout(), 0)] >=
+        dimensions[LayoutUtil::Minor(dest_shape.layout(), 0)]) {
+      minor_dimension = LayoutUtil::Minor(source_shape.layout(), 0);
       dest_stride = IndexUtil::GetDimensionStride(dest_shape, minor_dimension);
     } else {
-      minor_dimension = dest_shape.layout().minor_to_major()[0];
+      minor_dimension = LayoutUtil::Minor(dest_shape.layout(), 0);
       source_stride =
           IndexUtil::GetDimensionStride(source_shape, minor_dimension);
     }
@@ -432,10 +432,8 @@ StatusOr<std::unique_ptr<Literal>> Literal::Reshape(
   }
   std::unique_ptr<Literal> output;
   if (!LayoutUtil::IsMonotonicWithDim0Major(shape().layout())) {
-    std::vector<int64> minor_to_major(ShapeUtil::Rank(shape()));
-    std::iota(minor_to_major.rbegin(), minor_to_major.rend(),
-              static_cast<int64>(0));
-    output = Relayout(LayoutUtil::MakeLayout(minor_to_major));
+    output =
+        Relayout(LayoutUtil::GetDefaultLayoutForRank(ShapeUtil::Rank(shape())));
   } else {
     output = CloneToUnique();
   }
@@ -481,9 +479,10 @@ std::unique_ptr<Literal> Literal::Transpose(
   // dimension has within the transposed array, a layout is affine if
   // MinMaj(Di) == TMinMaj(T(Di)), with TMinMaj() being the minor to major
   // vector of the affine layout.
+  CHECK(LayoutUtil::IsDense(permuted_shape));
   Layout* layout = permuted_shape.mutable_layout();
   layout->clear_minor_to_major();
-  for (auto index : shape().layout().minor_to_major()) {
+  for (auto index : LayoutUtil::MinorToMajor(shape())) {
     layout->add_minor_to_major(inverse_permutation[index]);
   }
   std::unique_ptr<Literal> new_literal = CreateFromShape(permuted_shape);
@@ -507,9 +506,9 @@ std::unique_ptr<Literal> Literal::Slice(
     CHECK_GT(dimension, 0);
     result_dimensions.push_back(dimension);
   }
-  const auto result_shape = ShapeUtil::MakeShapeWithLayout(
-      shape().element_type(), result_dimensions,
-      AsInt64Slice(shape().layout().minor_to_major()));
+  const auto result_shape =
+      ShapeUtil::MakeShapeWithLayout(shape().element_type(), result_dimensions,
+                                     LayoutUtil::MinorToMajor(shape()));
 
   auto result_literal = MakeUnique<Literal>();
   *result_literal->mutable_shape() = result_shape;
diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h
index 9b9972725b..c782e0f19e 100644
--- a/tensorflow/compiler/xla/literal_util.h
+++ b/tensorflow/compiler/xla/literal_util.h
@@ -1111,7 +1111,7 @@ void Literal::PopulateR2WithLayout(
       primitive_util::NativeToPrimitiveType<NativeT>(),
       {static_cast<int64>(values.size()),
        static_cast<int64>(values.begin()->size())},
-      AsInt64Slice(layout.minor_to_major()));
+      LayoutUtil::MinorToMajor(layout));
 
   const int64 dim0_size = values.size();
   const int64 dim1_size = values.begin()->size();
@@ -1142,9 +1142,10 @@ void Literal::PopulateR2(
 template <typename NativeT>
 void Literal::PopulateFromArrayWithLayout(const Array<NativeT>& values,
                                           const Layout& layout) {
+  CHECK_EQ(layout.format(), DENSE);
   *mutable_shape() = ShapeUtil::MakeShapeWithLayout(
       primitive_util::NativeToPrimitiveType<NativeT>(), values.dimensions(),
-      AsInt64Slice(layout.minor_to_major()));
+      LayoutUtil::MinorToMajor(layout));
   Reserve(values.num_elements());
   values.Each([this](tensorflow::gtl::ArraySlice<int64> indices,
                      NativeT value) { this->Set(indices, value); });
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index d7bf4f37af..b16c742081 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -1375,7 +1375,7 @@ StatusOr<bool> AlgebraicSimplifierVisitor::
         ShapeUtil::MakeShapeWithLayout(
             user->shape().element_type(),
             AsInt64Slice(operand->shape().dimensions()),
-            AsInt64Slice(operand->shape().layout().minor_to_major())),
+            LayoutUtil::MinorToMajor(operand->shape())),
         new_user_operands));
     VLOG(4) << "  new user: " << new_user->ToString();
     HloInstruction* new_reshape_or_broadcast = nullptr;
@@ -1385,8 +1385,7 @@ StatusOr<bool> AlgebraicSimplifierVisitor::
               ShapeUtil::MakeShapeWithLayout(
                   user->shape().element_type(),
                   AsInt64Slice(reshape_or_broadcast->shape().dimensions()),
-                  AsInt64Slice(
-                      reshape_or_broadcast->shape().layout().minor_to_major())),
+                  LayoutUtil::MinorToMajor(reshape_or_broadcast->shape())),
               new_user));
     } else {
       TF_RET_CHECK(reshape_or_broadcast->opcode() == HloOpcode::kBroadcast);
@@ -1395,8 +1394,7 @@ StatusOr<bool> AlgebraicSimplifierVisitor::
               ShapeUtil::MakeShapeWithLayout(
                   user->shape().element_type(),
                   AsInt64Slice(reshape_or_broadcast->shape().dimensions()),
-                  AsInt64Slice(
-                      reshape_or_broadcast->shape().layout().minor_to_major())),
+                  LayoutUtil::MinorToMajor(reshape_or_broadcast->shape())),
               new_user, reshape_or_broadcast->dimensions()));
     }
     VLOG(4) << "  new reshape/broadcast: "
@@ -1758,15 +1756,15 @@ Status AlgebraicSimplifierVisitor::HandleConvolution(
   // still convert Conv into more efficient Matmul with operand transposition
   // (such as the transposition flags in cuBLAS SGEMM).
   if (!LayoutUtil::Equal(input_shape.layout(), convolution_shape.layout()) ||
-      input_shape.layout().minor_to_major(0) !=
+      LayoutUtil::Minor(input_shape.layout(), 0) !=
           dnums.input_feature_dimension() ||
-      convolution_shape.layout().minor_to_major(0) !=
+      LayoutUtil::Minor(convolution_shape.layout(), 0) !=
           dnums.output_feature_dimension() ||
       // The input feature dimension should come later in the minor-to-major
       // order.
-      (PositionInContainer(filter_shape.layout().minor_to_major(),
+      (PositionInContainer(LayoutUtil::MinorToMajor(filter_shape),
                            dnums.kernel_input_feature_dimension()) <
-       PositionInContainer(filter_shape.layout().minor_to_major(),
+       PositionInContainer(LayoutUtil::MinorToMajor(filter_shape),
                            dnums.kernel_output_feature_dimension()))) {
     return Status::OK();
   }
diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index 0631454d5c..74f71e5ad5 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -988,8 +988,8 @@ DotOpEmitter::MatMultDims DotOpEmitter::GetMatMultDims() const {
   return {lhs_shape.dimensions(transpose_lhs_ ? 1 : 0),
           lhs_shape.dimensions(transpose_lhs_ ? 0 : 1),
           rhs_shape.dimensions(transpose_rhs_ ? 0 : 1),
-          lhs_shape.layout().minor_to_major(0) == 0,
-          rhs_shape.layout().minor_to_major(0) == 0};
+          LayoutUtil::Minor(lhs_shape.layout(), 0) == 0,
+          LayoutUtil::Minor(rhs_shape.layout(), 0) == 0};
 }
 
 llvm_ir::IrArray::Index DotOpEmitter::EmitOperandArrayLoopNest(
@@ -1000,8 +1000,8 @@ llvm_ir::IrArray::Index DotOpEmitter::EmitOperandArrayLoopNest(
   // reduction dimension.
   std::vector<int64> dimensions;
   const Shape& shape = operand_array.GetShape();
-  for (int i = shape.layout().minor_to_major_size() - 1; i >= 0; --i) {
-    int64 dimension = shape.layout().minor_to_major(i);
+  for (int i = LayoutUtil::MinorToMajor(shape).size() - 1; i >= 0; --i) {
+    int64 dimension = LayoutUtil::Minor(shape.layout(), i);
     if (dimension != reduction_dimension) {
       dimensions.push_back(dimension);
     }
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index a433debbf5..ef33260c17 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -1697,7 +1697,8 @@ StatusOr<bool> IrEmitter::EmitVectorizedReduce(
 
   bool is_reduction_over_minor_dimension =
       std::find(dimensions.begin(), dimensions.end(),
-                arg->shape().layout().minor_to_major(0)) != dimensions.end();
+                LayoutUtil::Minor(arg->shape().layout(), 0)) !=
+      dimensions.end();
 
   unsigned element_alignment = tensorflow::MathUtil::GCD<unsigned>(
       ShapeUtil::ByteSizeOfPrimitiveType(reduce->shape().element_type()),
@@ -1734,8 +1735,9 @@ StatusOr<bool> IrEmitter::EmitVectorizedReduce(
 
   llvm_ir::ForLoopNest loop_nest(IrName(reduce), &ir_builder_);
   llvm_ir::IrArray::Index array_index(reduce->shape().dimensions_size());
-  for (int i = reduce->shape().layout().minor_to_major_size() - 1; i > 0; --i) {
-    int64 dimension = reduce->shape().layout().minor_to_major(i);
+  for (int i = LayoutUtil::MinorToMajor(reduce->shape()).size() - 1; i > 0;
+       --i) {
+    int64 dimension = LayoutUtil::Minor(reduce->shape().layout(), i);
     int64 start_index = 0;
     int64 end_index = reduce->shape().dimensions(dimension);
     std::unique_ptr<llvm_ir::ForLoop> loop =
@@ -1744,7 +1746,7 @@ StatusOr<bool> IrEmitter::EmitVectorizedReduce(
     array_index[dimension] = loop->GetIndVarValue();
   }
 
-  int64 innermost_dimension = reduce->shape().layout().minor_to_major(0);
+  int64 innermost_dimension = LayoutUtil::Minor(reduce->shape().layout(), 0);
   int64 innermost_dimension_size =
       reduce->shape().dimensions(innermost_dimension);
 
@@ -1780,10 +1782,10 @@ StatusOr<bool> IrEmitter::EmitVectorizedReduce(
                            target_array);
 
     if (auto exit_terminator = loop->GetExitBasicBlock()->getTerminator()) {
-      CHECK_GT(reduce->shape().layout().minor_to_major_size(), 1);
+      CHECK_GT(LayoutUtil::MinorToMajor(reduce->shape()).size(), 1);
       ir_builder_.SetInsertPoint(exit_terminator);
     } else {
-      CHECK_EQ(reduce->shape().layout().minor_to_major_size(), 1);
+      CHECK_EQ(LayoutUtil::MinorToMajor(reduce->shape()).size(), 1);
       ir_builder_.SetInsertPoint(loop->GetExitBasicBlock());
     }
   }
@@ -1943,7 +1945,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) {
   // * Implement the memcpy within the innermost loop.
 
   tensorflow::gtl::FlatSet<int64> inner_dims;
-  for (int64 dim : layout.minor_to_major()) {
+  for (int64 dim : LayoutUtil::MinorToMajor(layout)) {
     if (operand->shape().dimensions(dim) != slice->shape().dimensions(dim)) {
       break;
     }
@@ -1970,7 +1972,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) {
 
   // memcpy_dim is the innermost (in terms of layout) dimension for which the
   // slice does *not* just copy all the elements along the dimension.
-  const int64 memcpy_dim = layout.minor_to_major(inner_dims.size());
+  const int64 memcpy_dim = LayoutUtil::Minor(layout, inner_dims.size());
 
   const bool memcpy_is_contiguous = slice->slice_strides(memcpy_dim) == 1;
   // The number of logical elements that can be copied in a single call
@@ -2431,14 +2433,13 @@ StatusOr<bool> IrEmitter::EmitFastConcatenate(
 
   int64 concat_dim = concatenate->dimensions(0);
   const Layout& output_layout = output_shape.layout();
+  auto output_min2maj = LayoutUtil::MinorToMajor(output_layout);
   auto concat_dim_layout_itr =
-      std::find(output_layout.minor_to_major().begin(),
-                output_layout.minor_to_major().end(), concat_dim);
+      std::find(output_min2maj.begin(), output_min2maj.end(), concat_dim);
 
-  std::vector<int64> inner_dims(output_layout.minor_to_major().begin(),
-                                concat_dim_layout_itr);
+  std::vector<int64> inner_dims(output_min2maj.begin(), concat_dim_layout_itr);
   std::vector<int64> outer_dims(std::next(concat_dim_layout_itr),
-                                output_layout.minor_to_major().end());
+                                output_min2maj.end());
 
   llvm::Type* i8_ptr_type = ir_builder_.getInt8PtrTy();
   llvm::Type* i8_type = ir_builder_.getInt8Ty();
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
index a3c3c1e5ef..1e439cde11 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
@@ -39,8 +39,8 @@ llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock(
   llvm_ir::IrArray::Index array_index(num_dims);
 
   // Add loops from outer-most to inner-most dimensions.
-  for (int i = shape_.layout().minor_to_major_size() - 1; i >= 0; --i) {
-    const int64 dimension = shape_.layout().minor_to_major(i);
+  for (int i = LayoutUtil::MinorToMajor(shape_).size() - 1; i >= 0; --i) {
+    const int64 dimension = LayoutUtil::Minor(shape_.layout(), i);
     const int bounds_index = num_dims - 1 - i;
     if (bounds_index < dynamic_loop_bounds_->size()) {
       // Emit dynamic loop bounds for this dimension. Dynamic loop bounds
diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc
index e784046450..8e3aebbc12 100644
--- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc
@@ -264,9 +264,9 @@ tensorflow::Status GemmThunk::ExecuteOnStream(
 
   auto make_descriptor = [this](se::DeviceMemoryBase data, const Shape& shape,
                                 bool transpose) -> MatrixDescriptor {
-    bool is_row_major = shape.layout().minor_to_major(0) != 0;
-    bool layout_mismatch = shape.layout().minor_to_major(0) !=
-                           output_shape_.layout().minor_to_major(0);
+    bool is_row_major = LayoutUtil::Minor(shape.layout(), 0) != 0;
+    bool layout_mismatch = LayoutUtil::Minor(shape.layout(), 0) !=
+                           LayoutUtil::Minor(output_shape_.layout(), 0);
     return MatrixDescriptor(data, transpose ^ layout_mismatch,
                             shape.dimensions(is_row_major),
                             shape.dimensions(!is_row_major));
@@ -320,7 +320,7 @@ tensorflow::Status GemmThunk::ExecuteOnStream(
   };
 
   bool launch_ok;
-  if (output_shape_.layout().minor_to_major(0) == 0) {
+  if (LayoutUtil::Minor(output_shape_.layout(), 0) == 0) {
     launch_ok = launch(
         lhs_descriptor, rhs_descriptor,
         MatrixDescriptor(output_data, false, output_num_rows, output_num_cols),
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
index f64e93024f..e71aa0d133 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
@@ -766,8 +766,8 @@ llvm_ir::IrArray::Index IrEmitter::EmitOperandArrayLoopNest(
   // reduction dimension.
   std::vector<int64> dimensions;
   const Shape& shape = operand_array.GetShape();
-  for (int i = shape.layout().minor_to_major_size() - 1; i >= 0; --i) {
-    int64 dimension = shape.layout().minor_to_major(i);
+  for (int i = 0; i < LayoutUtil::MinorToMajor(shape).size(); ++i) {
+    int64 dimension = LayoutUtil::Major(shape.layout(), i);
     if (dimension != reduction_dimension) {
       dimensions.push_back(dimension);
     }
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 8dbc90ee1f..022c63de8d 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -433,10 +433,10 @@ std::tuple<bool, Shape, Shape> IsTranspose021(const Shape& a, const Shape& b) {
   CHECK(ShapeUtil::Compatible(a, b));
   std::vector<int64> perm(a.dimensions().size());
   {
-    std::vector<int64> layout_a(a.layout().minor_to_major().rbegin(),
-                                a.layout().minor_to_major().rend());
-    std::vector<int64> layout_b(b.layout().minor_to_major().rbegin(),
-                                b.layout().minor_to_major().rend());
+    auto layout_a_orig = LayoutUtil::MinorToMajor(a);
+    std::vector<int64> layout_a(layout_a_orig.rbegin(), layout_a_orig.rend());
+    auto layout_b_orig = LayoutUtil::MinorToMajor(b);
+    std::vector<int64> layout_b(layout_b_orig.rbegin(), layout_b_orig.rend());
     for (size_t i = 0; i < perm.size(); ++i) {
       perm[i] = PositionInContainer(layout_b, layout_a[i]);
     }
@@ -812,9 +812,9 @@ Status IrEmitterUnnested::EmitColumnReduction(
         // normalized_input_shape to input_matrix_shape.
         const Shape normalized_input_shape =
             ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(input_shape);
+        auto input_shape_min2maj = LayoutUtil::MinorToMajor(input_shape);
         const std::vector<int64> transpose_dimension_mapping(
-            input_shape.layout().minor_to_major().rbegin(),
-            input_shape.layout().minor_to_major().rend());
+            input_shape_min2maj.rbegin(), input_shape_min2maj.rend());
 
         const Shape input_matrix_shape =
             ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
@@ -1055,9 +1055,9 @@ Status IrEmitterUnnested::EmitRowReduction(
         // normalized_input_shape to input_3d_tensor_shape.
         const Shape normalized_input_shape =
             ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(input_shape);
+        auto input_shape_min2maj = LayoutUtil::MinorToMajor(input_shape);
         const std::vector<int64> transpose_dimension_mapping(
-            input_shape.layout().minor_to_major().rbegin(),
-            input_shape.layout().minor_to_major().rend());
+            input_shape_min2maj.rbegin(), input_shape_min2maj.rend());
         const Shape input_3d_tensor_shape =
             ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
                 input_shape.element_type(), {depth, height, width});
@@ -1189,9 +1189,9 @@ Status IrEmitterUnnested::EmitReductionToVector(
   // whether another dimension is major or minor of them.
   std::sort(input_dims_to_keep.begin(), input_dims_to_keep.end(),
             [&input_shape](int64 dim_a, int64 dim_b) {
-              return PositionInContainer(input_shape.layout().minor_to_major(),
+              return PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                                          dim_a) <
-                     PositionInContainer(input_shape.layout().minor_to_major(),
+                     PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                                          dim_b);
             });
   // Now, if output rank is at least 1, `input_dims_to_keep.front()` is
@@ -1236,14 +1236,14 @@ Status IrEmitterUnnested::EmitReductionToVector(
     int64 width = 1;
     for (int64 input_dim = 0; input_dim < ShapeUtil::Rank(input_shape);
          ++input_dim) {
-      if (PositionInContainer(input_shape.layout().minor_to_major(),
+      if (PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                               input_dim) >
-          PositionInContainer(input_shape.layout().minor_to_major(),
+          PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                               input_dims_to_keep.back())) {
         depth *= input_shape.dimensions(input_dim);
-      } else if (PositionInContainer(input_shape.layout().minor_to_major(),
+      } else if (PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                                      input_dim) <
-                 PositionInContainer(input_shape.layout().minor_to_major(),
+                 PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                                      input_dims_to_keep.front())) {
         width *= input_shape.dimensions(input_dim);
       }
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 943679784f..e534cb2140 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1091,7 +1091,7 @@ string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) {
         instr->shape().dimensions_size() > 1 &&
         !ShapeUtil::IsTuple(instr->shape())) {
       StrAppend(&instr_shape, "{",
-                Join(instr->shape().layout().minor_to_major(), ","), "}");
+                Join(LayoutUtil::MinorToMajor(instr->shape()), ","), "}");
     }
 
     // Some instructions have giant tuples as their shapes, so truncate the
diff --git a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
index 101a710d1c..3dc733940f 100644
--- a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
+++ b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
@@ -166,7 +166,7 @@ void HloTfGraphBuilder::SetNodeAttrs(const HloInstruction* instruction,
       layout_string = ShapeUtil::HumanStringWithLayout(instruction->shape());
     } else {
       layout_string = StrCat(
-          "{", Join(instruction->shape().layout().minor_to_major(), ","), "}");
+          "{", Join(LayoutUtil::MinorToMajor(instruction->shape()), ","), "}");
     }
     attrs["layout"].set_s(layout_string);
   }
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index b598c765fc..641712fdfa 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -527,9 +527,11 @@ Status CheckCallLayout(HloInstruction* call,
 Status CheckCustomCallLayout(HloInstruction* custom_call) {
   for (const HloInstruction* operand : custom_call->operands()) {
     TF_RET_CHECK(
+        ShapeUtil::IsOpaque(operand->shape()) ||
         LayoutUtil::IsMonotonicWithDim0Major(operand->shape().layout()));
   }
   TF_RET_CHECK(
+      ShapeUtil::IsOpaque(custom_call->shape()) ||
       LayoutUtil::IsMonotonicWithDim0Major(custom_call->shape().layout()));
   return Status::OK();
 }
@@ -708,8 +710,8 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOperandLayoutFromOutputLayout(
     int64 operand_no) {
   const HloInstruction* operand = instruction->operand(operand_no);
 
-  CHECK(ShapeUtil::IsArray(instruction->shape()) &&
-        ShapeUtil::IsArray(operand->shape()));
+  CHECK(ShapeUtil::IsArray(instruction->shape()));
+  CHECK(ShapeUtil::IsArray(operand->shape()));
 
   if (instruction->IsElementwiseOnOperand(operand_no) &&
       !ShapeUtil::IsScalar(operand->shape()) &&
@@ -739,7 +741,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOperandLayoutFromOutputLayout(
     const Shape& output_shape = instruction->shape();
     Shape output_shape_with_layout = ShapeUtil::MakeShapeWithLayout(
         output_shape.element_type(), AsInt64Slice(output_shape.dimensions()),
-        AsInt64Slice(output_layout.minor_to_major()));
+        LayoutUtil::MinorToMajor(output_layout));
     Shape operand_shape = operand->shape();
     *operand_shape.mutable_layout() =
         LayoutUtil::GetDefaultLayoutForShape(operand_shape);
@@ -768,7 +770,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOperandLayoutFromOutputLayout(
     int64 rank = ShapeUtil::Rank(instruction->shape());
     std::vector<int64> new_minor_to_major(rank);
     for (int64 i = 0; i < rank; ++i) {
-      int64 output_dim = output_layout.minor_to_major(i);
+      int64 output_dim = LayoutUtil::Minor(output_layout, i);
       int64 operand_dim = instruction->dimensions(output_dim);
       new_minor_to_major[i] = operand_dim;
     }
@@ -811,7 +813,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOutputLayoutFromOperandLayout(
     Shape operand_shape_with_layout = ShapeUtil::MakeShapeWithLayout(
         operand->shape().element_type(),
         AsInt64Slice(operand->shape().dimensions()),
-        AsInt64Slice(operand_layout.minor_to_major()));
+        LayoutUtil::MinorToMajor(operand_layout));
     Shape output_shape = user->shape();
     *output_shape.mutable_layout() =
         LayoutUtil::GetDefaultLayoutForShape(output_shape);
@@ -841,7 +843,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOutputLayoutFromOperandLayout(
     std::vector<int64> new_minor_to_major(rank);
     auto inverse_dimensions = InversePermutation(user->dimensions());
     for (int64 i = 0; i < rank; ++i) {
-      int64 operand_dim = operand_layout.minor_to_major(i);
+      int64 operand_dim = LayoutUtil::Minor(operand_layout, i);
       int64 user_dim = inverse_dimensions[operand_dim];
       new_minor_to_major[i] = user_dim;
     }
diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc
index 7224bd6898..c558f7388c 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc
@@ -39,7 +39,7 @@ IrArray::Index::Index(llvm::Value* linear, const Shape& shape,
       << "Shape " << ShapeUtil::HumanStringWithLayout(shape)
       << " should have a layout.";
   int64 divisor = 1;
-  for (int64 dimension : layout_.minor_to_major()) {
+  for (int64 dimension : LayoutUtil::MinorToMajor(layout_)) {
     int64 size_of_current_dimension = shape.dimensions(dimension);
     // Emit IR instructions that compute
     //   (linear_index / divisor) % current_dimension
@@ -244,8 +244,8 @@ llvm::Value* IrArray::EmitArrayElementAddress(
   //
   //   getelementptr base_ptr_, 0, most major index, ..., most minor index
   std::vector<llvm::Value*> gep_indices(1, ir_builder->getInt64(0));
-  for (int64 i = shape_->layout().minor_to_major_size() - 1; i >= 0; --i) {
-    int64 dimension = shape_->layout().minor_to_major(i);
+  for (int64 i = 0; i < LayoutUtil::MinorToMajor(*shape_).size(); ++i) {
+    int64 dimension = LayoutUtil::Major(shape_->layout(), i);
     gep_indices.push_back(actual_index[dimension]);
   }
   return ir_builder->CreateInBoundsGEP(base_ptr_, gep_indices,
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
index 9a0c94b1c7..61c47a0b6e 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
@@ -207,8 +207,8 @@ llvm::Type* ShapeToIrType(const Shape& shape, llvm::Module* module) {
   if (ShapeUtil::IsTuple(shape)) {
     // A tuple buffer is an array of pointers.
     result_type = llvm::ArrayType::get(result_type, shape.tuple_shapes_size());
-  } else {
-    for (int64 dimension : shape.layout().minor_to_major()) {
+  } else if (ShapeUtil::IsArray(shape)) {
+    for (int64 dimension : LayoutUtil::MinorToMajor(shape)) {
       result_type =
           llvm::ArrayType::get(result_type, shape.dimensions(dimension));
     }
@@ -316,7 +316,7 @@ llvm::Constant* LiteralToConstant(const Literal& literal, int64 dimension_index,
   // decrements with each recursive call. We want to iterate through the
   // dimensions in major-to-minor order as we recurse so just index into
   // minor_to_major to get the dimension number for this level of the recursion.
-  int64 dimension = shape.layout().minor_to_major(dimension_index);
+  int64 dimension = LayoutUtil::Minor(shape.layout(), dimension_index);
 
   // Recursively call LiteralToConstant to construct subarrays for the
   // more-minor dimensions. Gather the subarrays into a vector for bundling into
@@ -332,7 +332,7 @@ llvm::Constant* LiteralToConstant(const Literal& literal, int64 dimension_index,
   if (elements.empty()) {
     element_type = ir_element_type;
     for (int i = 0; i < dimension_index; ++i) {
-      int64 index = shape.layout().minor_to_major(i);
+      int64 index = LayoutUtil::Minor(shape.layout(), i);
       element_type =
           llvm::ArrayType::get(element_type, shape.dimensions(index));
     }
diff --git a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc
index 6fa4cd08c9..a5f7c850c3 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc
@@ -99,8 +99,8 @@ IrArray::Index LoopEmitter::EmitIndexAndSetExitBasicBlock(
   // dimension (of the target shape).
   ForLoopNest loop_nest(loop_name, ir_builder_);
   IrArray::Index array_index(shape_.dimensions_size());
-  for (int i = shape_.layout().minor_to_major_size() - 1; i >= 0; --i) {
-    int64 dimension = shape_.layout().minor_to_major(i);
+  for (int i = 0; i < LayoutUtil::MinorToMajor(shape_).size(); ++i) {
+    int64 dimension = LayoutUtil::Major(shape_.layout(), i);
     std::unique_ptr<ForLoop> loop = loop_nest.AddLoop(
         /*start_index=*/0,
         /*end_index=*/shape_.dimensions(dimension),
diff --git a/tensorflow/compiler/xla/service/user_computation_test.cc b/tensorflow/compiler/xla/service/user_computation_test.cc
index e45673300b..ca02115863 100644
--- a/tensorflow/compiler/xla/service/user_computation_test.cc
+++ b/tensorflow/compiler/xla/service/user_computation_test.cc
@@ -65,6 +65,7 @@ TEST_F(UserComputationTest, SimpleComputation) {
 
   OutfeedRequest outfeed_request;
   *outfeed_request.mutable_operand() = constant_handle;
+  *outfeed_request.mutable_shape() = kVectorShape;
   outfeed_request.set_outfeed_config("abc");
   TF_ASSERT_OK(computation.AddOutfeedInstruction(outfeed_request));
 
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index fe5166643d..ead9f5c4ce 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -65,30 +65,36 @@ namespace {
 // the shapes are the same. If compare_layouts is true, then layouts must also
 // match.
 bool CompareShapes(const Shape& lhs, const Shape& rhs, bool compare_layouts) {
-  if (ShapeUtil::IsTuple(lhs)) {
-    return ShapeUtil::IsTuple(rhs) &&
+  if (ShapeUtil::IsTuple(lhs) || ShapeUtil::IsTuple(rhs)) {
+    return ShapeUtil::IsTuple(lhs) && ShapeUtil::IsTuple(rhs) &&
            ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(),
                            [=](const Shape& l, const Shape& r) {
                              return CompareShapes(l, r, compare_layouts);
                            });
+  } else if (ShapeUtil::IsOpaque(lhs) || ShapeUtil::IsOpaque(rhs)) {
+    return ShapeUtil::IsOpaque(lhs) && ShapeUtil::IsOpaque(rhs);
   }
-  // Explicitly compare the fields rather than using MessageDifferencer because
-  // we want empty layouts to be treated identically to missing layouts.
+
   if (compare_layouts) {
-    if (!ContainersEqual(lhs.layout().minor_to_major(),
-                         rhs.layout().minor_to_major())) {
-      VLOG(3) << "CompareShapes: lhs layout != rhs layout";
-      return false;
-    }
-    if (!ContainersEqual(lhs.layout().padded_dimensions(),
-                         rhs.layout().padded_dimensions())) {
-      VLOG(3)
-          << "CompareShapes: lhs padded_dimensions != rhs padded_dimensions";
+    if (lhs.layout().format() != rhs.layout().format()) {
       return false;
     }
-    if (lhs.layout().padding_value() != rhs.layout().padding_value()) {
-      VLOG(3) << "CompareShapes: lhs padding value != rhs padding_value";
-      return false;
+    if (LayoutUtil::IsDense(lhs)) {
+      if (!ContainersEqual(LayoutUtil::MinorToMajor(lhs),
+                           LayoutUtil::MinorToMajor(rhs))) {
+        VLOG(3) << "CompareShapes: lhs layout != rhs layout";
+        return false;
+      }
+      if (!ContainersEqual(lhs.layout().padded_dimensions(),
+                           rhs.layout().padded_dimensions())) {
+        VLOG(3)
+            << "CompareShapes: lhs padded_dimensions != rhs padded_dimensions";
+        return false;
+      }
+      if (lhs.layout().padding_value() != rhs.layout().padding_value()) {
+        VLOG(3) << "CompareShapes: lhs padding value != rhs padding_value";
+        return false;
+      }
     }
   }
 
@@ -236,6 +242,7 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
 }
 
 /* static */ void ShapeUtil::AppendMajorDimension(int bound, Shape* shape) {
+  CHECK(LayoutUtil::IsDense(*shape));
   shape->mutable_layout()->add_minor_to_major(Rank(*shape));
   shape->add_dimensions(bound);
   TF_DCHECK_OK(ValidateShape(*shape));
@@ -885,7 +892,9 @@ Status ForEachMutableSubshapeHelper(
     new_shape.add_dimensions(dim);
   }
   if (shape.has_layout()) {
+    CHECK(LayoutUtil::IsDense(shape));
     Layout* new_layout = new_shape.mutable_layout();
+    new_layout->set_format(DENSE);
     new_layout->clear_minor_to_major();
     for (auto index : Permute(permutation, shape.layout().minor_to_major())) {
       new_layout->add_minor_to_major(index);
@@ -1312,6 +1321,7 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape,
   shape.mutable_dimensions()->erase(shape.dimensions().begin() + dim_to_delete);
   if (LayoutUtil::HasLayout(shape)) {
     Layout* layout = shape.mutable_layout();
+    layout->set_format(DENSE);
     for (size_t i = 0; i < layout->minor_to_major().size();) {
       if (layout->minor_to_major(i) == dim_to_delete) {
         layout->mutable_minor_to_major()->erase(
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 666c7da697..301247d61c 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <initializer_list>
 #include <string>
 
+#include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
@@ -324,7 +325,8 @@ class ShapeUtil {
     return shape.element_type() == OPAQUE;
   }
 
-  // Returns whether the shape is an array.
+  // Returns whether the shape is an array.  Note that scalars are considered
+  // arrays.
   static bool IsArray(const Shape& shape) {
     return !IsTuple(shape) && !IsOpaque(shape);
   }
@@ -506,8 +508,7 @@ class ShapeUtil {
     CHECK_EQ(Rank(shape), base.size());
     CHECK_EQ(incr.size(), base.size());
     CHECK_EQ(count.size(), base.size());
-    const Layout& layout = shape.layout();
-    const int64 rank = layout.minor_to_major_size();
+    const int64 rank = LayoutUtil::MinorToMajor(shape).size();
     // Allows handling R0 arrays, such that the visitor function will be called
     // once with the proper empty indexes.
     int64 n = -1;
@@ -515,7 +516,7 @@ class ShapeUtil {
     while (n < rank && visitor_function(indexes)) {
       // Increments dimensions in minor to major order.
       for (n = 0; n < rank; ++n) {
-        int64 dim = layout.minor_to_major(n);
+        int64 dim = LayoutUtil::Minor(shape.layout(), n);
         indexes[dim] += incr[dim];
         if (indexes[dim] < base[dim] + count[dim]) {
           break;
diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc
index 4bce7ca51d..3be6d6c429 100644
--- a/tensorflow/compiler/xla/shape_util_test.cc
+++ b/tensorflow/compiler/xla/shape_util_test.cc
@@ -165,20 +165,6 @@ TEST(ShapeUtilTest, IncompatibleTuplesWithDifferentDimensions) {
   EXPECT_FALSE(ShapeUtil::Compatible(tuple1, tuple2));
 }
 
-TEST(ShapeUtilTest, EmptyLayoutEqualsMissingLayout) {
-  // A shape with a missing layout should be equal to a shape with an empty
-  // layout.
-  Shape scalar1 = ShapeUtil::MakeShape(F32, {});
-  Shape scalar2 = ShapeUtil::MakeShape(F32, {});
-
-  EXPECT_TRUE(ShapeUtil::Equal(scalar1, scalar2));
-
-  scalar1.clear_layout();    // Remove layout field.
-  scalar2.mutable_layout();  // Create empty layout field.
-
-  EXPECT_TRUE(ShapeUtil::Equal(scalar1, scalar2));
-}
-
 TEST(ShapeUtilTest, CompareShapesWithPaddedDimensionsMismatch) {
   Shape shape1 = ShapeUtil::MakeShape(F32, {20, 30});
   shape1.mutable_layout()->add_padded_dimensions(10);
@@ -199,17 +185,17 @@ TEST(ShapeUtilTest, CompareShapesWithPaddingValueMismatch) {
   EXPECT_FALSE(ShapeUtil::Equal(shape1, shape2));
 }
 
-TEST(ShapeUtilTest, ScalarUnpopulatedLayoutEqualsScalarLayout) {
-  Shape scalar_unpopulated = ShapeUtil::MakeShape(F32, {});
-  scalar_unpopulated.clear_layout();
-  ASSERT_FALSE(scalar_unpopulated.has_layout())
-      << ShapeUtil::HumanStringWithLayout(scalar_unpopulated);
+TEST(ShapeUtilTest, ScalarDefaultLayoutEqualsScalarEmptyMin2Maj) {
+  Shape scalar_default_layout = ShapeUtil::MakeShape(F32, {});
+  ASSERT_TRUE(scalar_default_layout.has_layout())
+      << ShapeUtil::HumanStringWithLayout(scalar_default_layout);
 
-  const Shape scalar_populated = ShapeUtil::MakeShapeWithLayout(F32, {}, {});
-  ASSERT_TRUE(scalar_populated.has_layout())
-      << ShapeUtil::HumanStringWithLayout(scalar_populated);
+  const Shape scalar_empty_min2maj =
+      ShapeUtil::MakeShapeWithLayout(F32, {}, {});
+  ASSERT_TRUE(scalar_empty_min2maj.has_layout())
+      << ShapeUtil::HumanStringWithLayout(scalar_empty_min2maj);
 
-  EXPECT_TRUE(ShapeUtil::Equal(scalar_unpopulated, scalar_populated));
+  EXPECT_TRUE(ShapeUtil::Equal(scalar_default_layout, scalar_empty_min2maj));
 }
 
 TEST(ShapeUtilTest, ByteSizeOfWithoutPadding) {
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index 215707634b..95045d5e28 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -114,6 +114,14 @@ message PaddingConfig {
   repeated PaddingConfigDimension dimensions = 1;
 }
 
+// A format specifies the method used by a layout to store an array in memory.
+enum Format {
+  INVALID_FORMAT = 0;
+  // The default layout, with exactly one storage location per element (ignoring
+  // padding).
+  DENSE = 1;
+}
+
 // A layout describes how the array is placed in (1D) memory space.  This
 // includes the minor-to-major ordering of dimensions within a shape, as well as
 // any padding present in those dimensions.
@@ -124,19 +132,23 @@ message PaddingConfig {
 //
 // See the XLA documentation for more information on shapes and layouts.
 message Layout {
+  // The method used to store the data in memory. The format determines which of
+  // the other fields are used by the layout.
+  Format format = 4;
+
   // Sequence of dimension numbers, from minor (fastest varying index) to major
   // (slowest varying index). This field is required.
   repeated int64 minor_to_major = 1;
 
-  // The width to which the layout of each dimension is padded up
-  // to. If present, the size of the padded_dimensions must equal the
-  // rank of the shape. The padding appears at the end of a dimension,
-  // not at the beginning. This kind of padding, unlike padding in
-  // e.g. convolution, is not part of the shape.
+  // The width to which the layout of each dimension is padded up to. If
+  // present, the size of the padded_dimensions must equal the rank of the
+  // shape. The padding appears at the end of a dimension, not at the
+  // beginning. This kind of padding, unlike padding in e.g. convolution, is not
+  // part of the shape. This field must be unset unless the format is DENSE.
   repeated int64 padded_dimensions = 2;
 
-  // Describes the values in the padding specified by
-  // padded_dimensions.
+  // Describes the values in the padding specified by padded_dimensions. This
+  // field must be unset unless the format is DENSE.
   PaddingValue padding_value = 3;
 
   // Important: if any field is added, be sure to modify ShapeUtil::Equal()
-- 
GitLab


From 77bbce2fe0e82e712cf44fed510e616c10c0e7de Mon Sep 17 00:00:00 2001
From: Erich Elsen <eriche@google.com>
Date: Mon, 18 Dec 2017 15:25:46 -0800
Subject: [PATCH 1170/1225] disable failing test

---
 tensorflow/contrib/batching/BUILD | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index 8b7df4a84c..5f25a01144 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -89,6 +89,10 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
     ],
+    tags = [
+        "local",
+        "manual",
+    ],
 )
 
 cc_library(
-- 
GitLab


From 82bf45c534fcc84a38d3402b1f3e1aec7cd64444 Mon Sep 17 00:00:00 2001
From: Erich Elsen <eriche@google.com>
Date: Mon, 18 Dec 2017 15:32:36 -0800
Subject: [PATCH 1171/1225] disable failing test, fix formatting

---
 tensorflow/contrib/batching/BUILD | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index 5f25a01144..ea8ac2c680 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -82,6 +82,10 @@ cc_library(
 tf_cc_test(
     name = "adaptive_shared_batch_scheduler_test",
     srcs = ["adaptive_shared_batch_scheduler_test.cc"],
+    tags = [
+        "local",
+        "manual",
+    ],
     deps = [
         ":adaptive_shared_batch_scheduler",
         "//tensorflow/contrib/batching/test_util:fake_clock_env",
@@ -89,10 +93,6 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
     ],
-    tags = [
-        "local",
-        "manual",
-    ],
 )
 
 cc_library(
-- 
GitLab


From 6655b6bfebb4ae480602cb156755f9bf6107d909 Mon Sep 17 00:00:00 2001
From: Bhavani Subramanian <bhavani1.subramanian@intel.com>
Date: Mon, 18 Dec 2017 16:16:38 -0800
Subject: [PATCH 1172/1225] Updating MKL to the latest release.

---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 846b9bc645..8683d60ddf 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -59,11 +59,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   mkl_repository(
       name = "mkl",
       urls = [
-          "https://mirror.bazel.build/github.com/01org/mkl-dnn/releases/download/v0.9/mklml_lnx_2018.0.20170720.tgz",
-          "https://github.com/01org/mkl-dnn/releases/download/v0.9/mklml_lnx_2018.0.20170720.tgz",
+          "https://mirror.bazel.build/github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz",
+          "https://github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz",
       ],
-      sha256 = "57ba56c4c243f403ff78f417ff854ef50b9eddf4a610a917b7c95e7fa8553a4b",
-      strip_prefix = "mklml_lnx_2018.0.20170720",
+      sha256 = "6b07cb7e5451db67c2e31e785ae458b18f7f363c60a61685488f69e9ae7199d4",
+      strip_prefix = "mklml_lnx_2018.0.1.20171007",
       build_file = str(Label("//third_party/mkl:mkl.BUILD")),
   )
 
-- 
GitLab


From 707171494b1571695a2611ccb1e3f91951178a64 Mon Sep 17 00:00:00 2001
From: Yifei Feng <fengyifei2026@gmail.com>
Date: Mon, 18 Dec 2017 16:37:46 -0800
Subject: [PATCH 1173/1225] Add no_pip tags to stats_dataset_ops_test

---
 tensorflow/contrib/data/python/kernel_tests/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index e0d0759567..d5ad145327 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -481,6 +481,7 @@ py_test(
     size = "small",
     srcs = ["stats_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
-- 
GitLab


From e289dfd636bfab31232a511b0e96a785571ada92 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Dec 2017 16:37:40 -0800
Subject: [PATCH 1174/1225] Fix typo in comment for GetRoot().

PiperOrigin-RevId: 179486882
---
 tensorflow/compiler/xla/service/user_computation.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index 4a4f00f4ea..066ffcd7e9 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -1506,7 +1506,7 @@ StatusOr<const OperationRequest*> LookUpRequest(
   return &session_computation.requests().at(handle_value);
 }
 
-// Returns the OperationRequestion corresponding to the root (result) of the
+// Returns the OperationRequest corresponding to the root (result) of the
 // session computation.
 StatusOr<const OperationRequest*> GetRoot(
     VersionedComputationHandle::Version version,
-- 
GitLab


From fb81f1927924c1e3d82a2673c04f8918004cca9e Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 18 Dec 2017 16:43:08 -0800
Subject: [PATCH 1175/1225] [XLA] Add pow(pow(A, X), Y) => pow(A, X*Y)
 simplification.

PiperOrigin-RevId: 179487434
---
 .../xla/service/algebraic_simplifier.cc       | 21 ++++++++
 .../xla/service/algebraic_simplifier_test.cc  | 49 +++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index b16c742081..7dc09a8cbd 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -1322,6 +1322,27 @@ Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) {
         power, HloInstruction::CreateBinary(power->shape(), HloOpcode::kDivide,
                                             broadcast_one, lhs));
   }
+
+  VLOG(10) << "trying transform [pow(pow(A, X), Y) => pow(A, X*Y)]: "
+           << power->ToString();
+
+  // Don't perform this optimization if either of the exponents is complex; this
+  // identity is true only for real-valued exponents.  In addition, we cowardly
+  // refuse to do this transformation if the two expontents have different
+  // element types.
+  if (lhs->opcode() == HloOpcode::kPower &&
+      !ShapeUtil::ElementIsComplex(lhs->operand(1)->shape()) &&
+      !ShapeUtil::ElementIsComplex(rhs->shape()) &&
+      ShapeUtil::SameElementType(lhs->operand(1)->shape(), rhs->shape())) {
+    auto exponent_product =
+        computation_->AddInstruction(HloInstruction::CreateBinary(
+            rhs->shape(), HloOpcode::kMultiply, lhs->mutable_operand(1), rhs));
+    return ReplaceWithNewInstruction(
+        power, HloInstruction::CreateBinary(power->shape(), HloOpcode::kPower,
+                                            lhs->mutable_operand(0),
+                                            exponent_product));
+  }
+
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index d0b659eec3..d4739ca113 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -327,6 +327,55 @@ TEST_F(AlgebraicSimplifierTest, DivOfBroadcastingPower) {
   EXPECT_EQ(0, negate_shape.dimensions_size());
 }
 
+// pow(pow(A, X), Y) => pow(A, X*Y)
+TEST_F(AlgebraicSimplifierTest, PowerOfPower) {
+  Shape r0f32 = ShapeUtil::MakeShape(F32, {});
+  Shape r1f32 = ShapeUtil::MakeShape(F32, {7});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* base = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r1f32, "param0"));
+  HloInstruction* exp1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, r0f32, "param1"));
+  HloInstruction* exp2 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, r0f32, "param2"));
+  HloInstruction* inner_power = builder.AddInstruction(
+      HloInstruction::CreateBinary(r1f32, HloOpcode::kPower, base, exp1));
+  builder.AddInstruction(HloInstruction::CreateBinary(r1f32, HloOpcode::kPower,
+                                                      inner_power, exp2));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_THAT(computation->root_instruction(),
+              op::Power(base, op::Multiply(exp1, exp2)));
+}
+
+// Don't simplify pow(pow(A, X), Y) => pow(A, X*Y) if X and Y are complex
+// numbers.
+TEST_F(AlgebraicSimplifierTest, PowerOfPowerComplex) {
+  Shape r0c64 = ShapeUtil::MakeShape(C64, {});
+  Shape r1f32 = ShapeUtil::MakeShape(F32, {7});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* base = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r1f32, "param0"));
+  HloInstruction* exp1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, r0c64, "param1"));
+  HloInstruction* exp2 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, r0c64, "param2"));
+  HloInstruction* inner_power = builder.AddInstruction(
+      HloInstruction::CreateBinary(r1f32, HloOpcode::kPower, base, exp1));
+  builder.AddInstruction(HloInstruction::CreateBinary(r1f32, HloOpcode::kPower,
+                                                      inner_power, exp2));
+
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_FALSE(simplifier.Run(module.get()).ValueOrDie());
+}
+
 // Test that A/1 is simplified to A for a scalar.
 TEST_F(AlgebraicSimplifierTest, DivOneScalar) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
-- 
GitLab


From 0d3713e808ac27b547619bd850713eb28ff26eb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Tue, 19 Dec 2017 08:52:09 +0800
Subject: [PATCH 1176/1225] `variable_scope` use `auxiliary_name_scope` to
 control whether to create new name scope (#14390)

* TST: add test case

* ENH: variable_scope supports auxiliary_name_scope

* DOC: add document

* ENH: support to reuse name scope given by user

* TST: add test for name scope

* DOC: revise docment for name scope

* CLN: revise TypeError for None

* TST: add test for reentering variable_scope

* BUG: root scope reuse name scope

* TST: add test case for root scope

* TST: remove name scope tests

* ENH: remove name scope support

* DOC: remove name scope support

* TST: split and add more test cases

* BUG: root name scope is empty string

* CLN: remove comment
---
 .../kernel_tests/variable_scope_test.py       | 109 ++++++++++++++++++
 tensorflow/python/ops/variable_scope.py       |  36 +++++-
 .../golden/tensorflow.variable_scope.pbtxt    |   2 +-
 3 files changed, 141 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 8491171923..8d57ff03c8 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -746,6 +746,115 @@ class VariableScopeTest(test.TestCase):
           with ops.name_scope("scope2") as sc2:
             self.assertEqual(sc2, "outer_1/default/scope2/")
 
+  def testBasicWhenAuxiliaryNameScopeIsFalse(self):
+    with self.test_session():
+      with variable_scope.variable_scope("scope",
+                                         auxiliary_name_scope=False) as scope:
+        self.assertEqual(scope.original_name_scope, "")
+        self.assertEqual(variable_scope.get_variable("w", []).name, "scope/w:0")
+        self.assertEqual(constant_op.constant([], name="c").name, "c:0")
+      with variable_scope.variable_scope(scope,
+                                         auxiliary_name_scope=False) as scope1:
+        self.assertEqual(scope.original_name_scope, "")
+        self.assertEqual(variable_scope.get_variable("w1", []).name, "scope/w1:0")
+        self.assertEqual(constant_op.constant([], name="c1").name, "c1:0")
+      # Recheck: new name scope is NOT created before
+      with ops.name_scope("scope"):
+        self.assertEqual(constant_op.constant([], name="c").name, "scope/c:0")
+
+      with variable_scope.variable_scope("outer"):
+        with variable_scope.variable_scope("inner",
+                                           auxiliary_name_scope=False) as inner:
+          self.assertEqual(inner.original_name_scope, "outer/")
+          self.assertEqual(variable_scope.get_variable("w", []).name, "outer/inner/w:0")
+          self.assertEqual(constant_op.constant([], name="c").name, "outer/c:0")
+        with variable_scope.variable_scope(inner,
+                                           auxiliary_name_scope=False) as inner1:
+          self.assertEqual(inner1.original_name_scope, "outer/")
+          self.assertEqual(variable_scope.get_variable("w1", []).name, "outer/inner/w1:0")
+          self.assertEqual(constant_op.constant([], name="c1").name, "outer/c1:0")
+        # Recheck: new name scope is NOT created before
+        with ops.name_scope("inner"):
+          self.assertEqual(constant_op.constant([], name="c").name, "outer/inner/c:0")
+
+  def testCreatedByDefaultNameWhenAuxiliaryNameScopeIsFalse(self):
+    with self.test_session():
+      with variable_scope.variable_scope(None, default_name="default",
+                                         auxiliary_name_scope=False) as scope:
+        self.assertEqual(scope.original_name_scope, "")
+        self.assertEqual(variable_scope.get_variable("w", []).name, "default/w:0")
+        self.assertEqual(constant_op.constant([], name="c").name, "c:0")
+      # Recheck: new name scope is NOT created before
+      with ops.name_scope("default"):
+        self.assertEqual(constant_op.constant([], name="c").name, "default/c:0")
+
+      with variable_scope.variable_scope("outer"):
+        with variable_scope.variable_scope(None, default_name="default",
+                                           auxiliary_name_scope=False) as inner:
+          self.assertEqual(inner.original_name_scope, "outer/")
+          self.assertEqual(variable_scope.get_variable("w", []).name, "outer/default/w:0")
+          self.assertEqual(constant_op.constant([], name="c").name, "outer/c:0")
+        # Recheck: new name scope is NOT created before
+        with ops.name_scope("default"):
+          self.assertEqual(constant_op.constant([], name="c").name, "outer/default/c:0")
+
+  def testReenterRootScopeWhenAuxiliaryNameScopeIsFalse(self):
+    with self.test_session():
+      root_scope = variable_scope.get_variable_scope()
+      with variable_scope.variable_scope(root_scope,
+                                         auxiliary_name_scope=False) as scope:
+        self.assertEqual(scope.original_name_scope, "")
+        self.assertEqual(variable_scope.get_variable("w", []).name, "w:0")
+        self.assertEqual(constant_op.constant([], name="c").name, "c:0")
+
+      with variable_scope.variable_scope("outer"):
+        with variable_scope.variable_scope(root_scope,
+                                           auxiliary_name_scope=False) as inner:
+          self.assertEqual(inner.original_name_scope, "")
+          self.assertEqual(variable_scope.get_variable("w1", []).name, "w1:0")
+          self.assertEqual(constant_op.constant([], name="c1").name, "outer/c1:0")
+
+  def testAuxiliaryNameScopeIsInvalid(self):
+    with self.test_session():
+      with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
+        with variable_scope.variable_scope(None, default_name="scope",
+                                           auxiliary_name_scope="invalid"):
+          pass
+
+      with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
+        with variable_scope.variable_scope("scope", auxiliary_name_scope="invalid"):
+          pass
+
+      with variable_scope.variable_scope("scope") as scope:
+        pass
+      with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
+        with variable_scope.variable_scope(scope, auxiliary_name_scope="invalid"):
+          pass
+
+  def testReuseScopeWithoutNameScopeCollision(self):
+    # Github issue: #13429
+    with self.test_session():
+      with variable_scope.variable_scope("outer"):
+        with variable_scope.variable_scope("inner") as inner:
+          pass
+
+      with variable_scope.variable_scope(inner,
+                                         auxiliary_name_scope=False) as scope:
+        with ops.name_scope(scope.original_name_scope):
+          self.assertEqual(variable_scope.get_variable("w", []).name, "outer/inner/w:0")
+          self.assertEqual(constant_op.constant([], name="c").name, "outer/inner/c:0")
+        with ops.name_scope("inner"):
+          self.assertEqual(constant_op.constant([], name="c").name, "inner/c:0")
+
+      with variable_scope.variable_scope("another"):
+        with variable_scope.variable_scope(inner,
+                                           auxiliary_name_scope=False) as scope1:
+          with ops.name_scope(scope1.original_name_scope):
+            self.assertEqual(variable_scope.get_variable("w1", []).name, "outer/inner/w1:0")
+            self.assertEqual(constant_op.constant([], name="c1").name, "outer/inner/c1:0")
+          with ops.name_scope("inner"):
+            self.assertEqual(constant_op.constant([], name="c").name, "another/inner/c:0")
+
   @test_util.run_in_graph_and_eager_modes()
   def testGetLocalVar(self):
     # Check that local variable respects naming.
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 4a23d96721..ac6173c260 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -1584,6 +1584,10 @@ class _pure_variable_scope(object):  # pylint: disable=invalid-name
           else self._name_or_scope)
       self._reuse = (self._reuse
                      or self._old.reuse)  # Re-using is inherited by sub-scopes.
+      if self._old_name_scope is None:
+        name_scope = self._name_or_scope
+      else:
+        name_scope = self._old_name_scope
       variable_scope_object = VariableScope(
           self._reuse,
           name=self._new_name,
@@ -1594,7 +1598,7 @@ class _pure_variable_scope(object):  # pylint: disable=invalid-name
           dtype=self._old.dtype,
           use_resource=self._old.use_resource,
           custom_getter=self._old.custom_getter,
-          name_scope=self._old_name_scope or self._name_or_scope,
+          name_scope=name_scope,
           constraint=self._constraint)
       if self._initializer is not None:
         variable_scope_object.set_initializer(self._initializer)
@@ -1763,7 +1767,8 @@ class variable_scope(object):  # pylint: disable=invalid-name
                reuse=None,
                dtype=None,
                use_resource=None,
-               constraint=None):
+               constraint=None,
+               auxiliary_name_scope=True):
     """Initialize the context manager.
 
     Args:
@@ -1795,6 +1800,8 @@ class variable_scope(object):  # pylint: disable=invalid-name
         variable and return the Tensor for the projected value
         (which must have the same shape). Constraints are not safe to
         use when doing asynchronous distributed training.
+      auxiliary_name_scope: If `True`, we create an auxiliary name scope with
+        the scope. If `False`, we don't touch name scope.
 
     Returns:
       A scope that can be captured and reused.
@@ -1832,6 +1839,10 @@ class variable_scope(object):  # pylint: disable=invalid-name
       self._graph = ops._get_graph_from_inputs(self._values)  # pylint: disable=protected-access
     self._cached_pure_variable_scope = None
     self._current_name_scope = None
+    if not isinstance(auxiliary_name_scope, bool):
+      raise TypeError("The auxiliary_name_scope must be `True` or `False`, "
+                      "while get {}".format(auxiliary_name_scope))
+    self._auxiliary_name_scope = auxiliary_name_scope
 
   def __enter__(self):
     # If the default graph is building a function, then we should not replace it
@@ -1850,6 +1861,21 @@ class variable_scope(object):  # pylint: disable=invalid-name
       if self._current_name_scope is not None:
         self._current_name_scope.__enter__()
       return self._cached_pure_variable_scope.__enter__()
+
+    if self._auxiliary_name_scope:
+      # Create a new name scope later
+      current_name_scope = None
+    else:
+      # Reenter the current name scope
+      name_scope = ops.get_name_scope()
+      if name_scope:
+        # Hack to reenter
+        name_scope = name_scope + "/"
+        current_name_scope = ops.name_scope(name_scope)
+      else:
+        # Root scope
+        current_name_scope = ops.name_scope(name_scope)
+
     if self._name_or_scope is not None:
       if not isinstance(self._name_or_scope,
                         (VariableScope,) + six.string_types):
@@ -1859,8 +1885,8 @@ class variable_scope(object):  # pylint: disable=invalid-name
         name_scope = self._name_or_scope
       else:
         name_scope = self._name_or_scope.name.split("/")[-1]
-      if name_scope:
-        self._current_name_scope = ops.name_scope(name_scope)
+      if name_scope or current_name_scope:
+        self._current_name_scope = current_name_scope or ops.name_scope(name_scope)
         current_name_scope_name = self._current_name_scope.__enter__()
         if isinstance(self._name_or_scope, six.string_types):
           old_name_scope = current_name_scope_name
@@ -1898,7 +1924,7 @@ class variable_scope(object):  # pylint: disable=invalid-name
     else:  # Here name_or_scope is None. Using default name, but made unique.
       if self._reuse:
         raise ValueError("reuse=True cannot be used without a name_or_scope")
-      self._current_name_scope = ops.name_scope(self._default_name)
+      self._current_name_scope = current_name_scope or ops.name_scope(self._default_name)
       current_name_scope_name = self._current_name_scope.__enter__()
       unique_default_name = _get_unique_variable_scope(self._default_name)
       self._cached_pure_variable_scope = _pure_variable_scope(
diff --git a/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt b/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt
index de1ad7e860..e62dec93e6 100644
--- a/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt
@@ -4,6 +4,6 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'name_or_scope\', \'default_name\', \'values\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'name_or_scope\', \'default_name\', \'values\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\', \'auxiliary_name_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
   }
 }
-- 
GitLab


From a57ba6e9337e217ed159378aabe8c851d7770aca Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 18 Dec 2017 16:56:32 -0800
Subject: [PATCH 1177/1225] Disable failing testcases on windows. (#15462)

---
 tensorflow/python/kernel_tests/dynamic_partition_op_test.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
index b4fb5aa411..fedbf9e696 100644
--- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import unittest
+
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
@@ -219,6 +221,7 @@ class DynamicPartitionTest(test.TestCase):
     self.assertAllEqual([], partition_vals[0])
     self.assertAllEqual([], partition_vals[1])
 
+  @unittest.skip("Fails on windows.")
   def testGPUTooManyParts(self):
     # This test only makes sense on the GPU. There we do not check
     # for errors. In this case, we should discard all but the first
@@ -239,6 +242,7 @@ class DynamicPartitionTest(test.TestCase):
     self.assertAllEqual([6], partition_vals[0])
     self.assertAllEqual([5], partition_vals[1])
 
+  @unittest.skip("Fails on windows.")
   def testGPUPartsTooLarge(self):
     # This test only makes sense on the GPU. There we do not check
     # for errors. In this case, we should discard all the values
@@ -262,6 +266,7 @@ class DynamicPartitionTest(test.TestCase):
     self.assertAllEqual([], partition_vals[3])
     self.assertAllEqual([], partition_vals[4])
 
+  @unittest.skip("Fails on windows.")
   def testGPUAllIndicesBig(self):
     # This test only makes sense on the GPU. There we do not check
     # for errors. In this case, we should discard all the values
-- 
GitLab


From 94159844e0bab1dc39fe694a3226e6d93b546848 Mon Sep 17 00:00:00 2001
From: Ted Ying <yingted@gmail.com>
Date: Mon, 18 Dec 2017 17:09:37 -0800
Subject: [PATCH 1178/1225] Include _solib_local for MKL-DNN libs (#14709)

* Include _solib_local for MKL-DNN libs

`_solib_local/libmklml_intel.so` is not getting included in the package.
`build_pip_package.sh` has been updated to copy `*solib*` instead of just `_solib_k8`, so just update `setup.py` to include it.
See https://github.com/tensorflow/tensorflow/issues/13711 for details.

* Don't glob for `_solib_local` and `_solib_k8`

`find_files` doesn't accept globs for `root`.
---
 tensorflow/tools/pip_package/setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 1b2e007f9d..8fa39b6248 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -177,6 +177,7 @@ def find_files(pattern, root):
 
 matches = ['../' + x for x in find_files('*', 'external') if '.py' not in x]
 matches += ['../' + x for x in find_files('*', '_solib_k8') if '.py' not in x]
+matches += ['../' + x for x in find_files('*', '_solib_local') if '.py' not in x]
 
 if os.name == 'nt':
   EXTENSION_NAME = 'python/_pywrap_tensorflow_internal.pyd'
-- 
GitLab


From f44f6fedb6a7a50837803e94ab7a7d76b273cdc8 Mon Sep 17 00:00:00 2001
From: Dustin Tran <trandustin@google.com>
Date: Mon, 18 Dec 2017 17:15:01 -0800
Subject: [PATCH 1179/1225] Add probabilistic convolutional layers.

PiperOrigin-RevId: 179490700
---
 tensorflow/contrib/bayesflow/BUILD            |   19 +
 .../layers_conv_variational_test.py           |  289 ++++
 .../layers_dense_variational_test.py          |    5 +-
 .../contrib/bayesflow/python/ops/layers.py    |   14 +
 .../python/ops/layers_conv_variational.py     | 1415 +++++++++++++++++
 .../ops/layers_dense_variational_impl.py      |  197 +--
 .../bayesflow/python/ops/layers_util.py       |  180 +++
 7 files changed, 1941 insertions(+), 178 deletions(-)
 create mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py
 create mode 100644 tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py
 create mode 100644 tensorflow/contrib/bayesflow/python/ops/layers_util.py

diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD
index a262d4aecd..4e0520fa33 100644
--- a/tensorflow/contrib/bayesflow/BUILD
+++ b/tensorflow/contrib/bayesflow/BUILD
@@ -99,6 +99,25 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "layers_conv_variational_test",
+    size = "small",
+    srcs = ["python/kernel_tests/layers_conv_variational_test.py"],
+    additional_deps = [
+        ":bayesflow_py",
+        "//third_party/py/numpy",
+        "//tensorflow/contrib/distributions:distributions_py",
+        "//tensorflow/python/ops/distributions",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:linalg_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:nn_ops",
+    ],
+)
+
 cuda_py_test(
     name = "layers_dense_variational_test",
     size = "small",
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py
new file mode 100644
index 0000000000..57f44aef1a
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py
@@ -0,0 +1,289 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for convolutional Bayesian layers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.bayesflow.python.ops import layers_conv_variational as prob_layers_lib
+from tensorflow.contrib.bayesflow.python.ops import layers_util as prob_layers_util
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.platform import test
+
+
+class Counter(object):
+  """Helper class to manage incrementing a counting `int`."""
+
+  def __init__(self):
+    self._value = -1
+
+  @property
+  def value(self):
+    return self._value
+
+  def __call__(self):
+    self._value += 1
+    return self._value
+
+
+class MockDistribution(independent_lib.Independent):
+  """Monitors DenseVariational calls to the underlying distribution."""
+
+  def __init__(self, result_sample, result_log_prob, loc=None, scale=None):
+    self.result_sample = result_sample
+    self.result_log_prob = result_log_prob
+    self.result_loc = loc
+    self.result_scale = scale
+    self.result_distribution = normal_lib.Normal(loc=0.0, scale=1.0)
+    if loc is not None and scale is not None:
+      self.result_distribution = normal_lib.Normal(loc=self.result_loc,
+                                                   scale=self.result_scale)
+    self.called_log_prob = Counter()
+    self.called_sample = Counter()
+    self.called_loc = Counter()
+    self.called_scale = Counter()
+
+  def log_prob(self, *args, **kwargs):
+    self.called_log_prob()
+    return self.result_log_prob
+
+  def sample(self, *args, **kwargs):
+    self.called_sample()
+    return self.result_sample
+
+  @property
+  def distribution(self):  # for dummy check on Independent(Normal)
+    return self.result_distribution
+
+  @property
+  def loc(self):
+    self.called_loc()
+    return self.result_loc
+
+  @property
+  def scale(self):
+    self.called_scale()
+    return self.result_scale
+
+
+class MockKLDivergence(object):
+  """Monitors layer calls to the divergence implementation."""
+
+  def __init__(self, result):
+    self.result = result
+    self.args = []
+    self.called = Counter()
+
+  def __call__(self, *args, **kwargs):
+    self.called()
+    self.args.append(args)
+    return self.result
+
+
+class ConvVariational(test.TestCase):
+
+  def _testKLPenaltyKernel(self, layer_class):
+    with self.test_session():
+      layer = layer_class(filters=2, kernel_size=3)
+      if layer_class == prob_layers_lib.Conv1DVariational:
+        inputs = random_ops.random_uniform([2, 3, 1], seed=1)
+      elif layer_class == prob_layers_lib.Conv2DVariational:
+        inputs = random_ops.random_uniform([2, 3, 3, 1], seed=1)
+      elif layer_class == prob_layers_lib.Conv3DVariational:
+        inputs = random_ops.random_uniform([2, 3, 3, 3, 1], seed=1)
+
+      # No keys.
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 0)
+      self.assertListEqual(layer.losses, losses)
+
+      _ = layer(inputs)
+
+      # Yes keys.
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 1)
+      self.assertListEqual(layer.losses, losses)
+
+  def _testKLPenaltyBoth(self, layer_class):
+    def _make_normal(dtype, *args):  # pylint: disable=unused-argument
+      return normal_lib.Normal(
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.))
+    with self.test_session():
+      layer = layer_class(
+          filters=2,
+          kernel_size=3,
+          bias_posterior_fn=prob_layers_util.default_mean_field_normal_fn(),
+          bias_prior_fn=_make_normal)
+      if layer_class == prob_layers_lib.Conv1DVariational:
+        inputs = random_ops.random_uniform([2, 3, 1], seed=1)
+      elif layer_class == prob_layers_lib.Conv2DVariational:
+        inputs = random_ops.random_uniform([2, 3, 3, 1], seed=1)
+      elif layer_class == prob_layers_lib.Conv3DVariational:
+        inputs = random_ops.random_uniform([2, 3, 3, 3, 1], seed=1)
+
+      # No keys.
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 0)
+      self.assertListEqual(layer.losses, losses)
+
+      _ = layer(inputs)
+
+      # Yes keys.
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 2)
+      self.assertListEqual(layer.losses, losses)
+
+  def _testConvVariational(self, layer_class):
+    batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5
+    with self.test_session() as sess:
+      seed = Counter()
+      if layer_class == prob_layers_lib.Conv1DVariational:
+        inputs = random_ops.random_uniform(
+            [batch_size, width, channels], seed=seed())
+        kernel_size = (2,)
+      elif layer_class == prob_layers_lib.Conv2DVariational:
+        inputs = random_ops.random_uniform(
+            [batch_size, height, width, channels], seed=seed())
+        kernel_size = (2, 2)
+      elif layer_class == prob_layers_lib.Conv3DVariational:
+        inputs = random_ops.random_uniform(
+            [batch_size, depth, height, width, channels], seed=seed())
+        kernel_size = (2, 2, 2)
+
+      kernel_shape = kernel_size + (channels, filters)
+      kernel_posterior = MockDistribution(
+          result_log_prob=random_ops.random_uniform(kernel_shape, seed=seed()),
+          result_sample=random_ops.random_uniform(kernel_shape, seed=seed()))
+      kernel_prior = MockDistribution(
+          result_log_prob=random_ops.random_uniform(kernel_shape, seed=seed()),
+          result_sample=random_ops.random_uniform(kernel_shape, seed=seed()))
+      kernel_divergence = MockKLDivergence(
+          result=random_ops.random_uniform(kernel_shape, seed=seed()))
+
+      bias_size = (filters,)
+      bias_posterior = MockDistribution(
+          result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
+          result_sample=random_ops.random_uniform(bias_size, seed=seed()))
+      bias_prior = MockDistribution(
+          result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
+          result_sample=random_ops.random_uniform(bias_size, seed=seed()))
+      bias_divergence = MockKLDivergence(
+          result=random_ops.random_uniform(bias_size, seed=seed()))
+
+      convolution_op = nn_ops.Convolution(
+          tensor_shape.TensorShape(inputs.shape),
+          filter_shape=tensor_shape.TensorShape(kernel_shape),
+          padding="SAME")
+      expected_outputs = convolution_op(inputs, kernel_posterior.result_sample)
+      expected_outputs = nn.bias_add(expected_outputs,
+                                     bias_posterior.result_sample,
+                                     data_format="NHWC")
+
+      layer = layer_class(
+          filters=filters,
+          kernel_size=kernel_size,
+          padding="SAME",
+          kernel_posterior_fn=lambda *args: kernel_posterior,
+          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
+          kernel_prior_fn=lambda *args: kernel_prior,
+          kernel_divergence_fn=kernel_divergence,
+          bias_posterior_fn=lambda *args: bias_posterior,
+          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
+          bias_prior_fn=lambda *args: bias_prior,
+          bias_divergence_fn=bias_divergence)
+
+      outputs = layer(inputs)
+
+      kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+
+      [
+          expected_outputs_, actual_outputs_,
+          expected_kernel_, actual_kernel_,
+          expected_kernel_divergence_, actual_kernel_divergence_,
+          expected_bias_, actual_bias_,
+          expected_bias_divergence_, actual_bias_divergence_,
+      ] = sess.run([
+          expected_outputs, outputs,
+          kernel_posterior.result_sample, layer.kernel_posterior_tensor,
+          kernel_divergence.result, kl_penalty[0],
+          bias_posterior.result_sample, layer.bias_posterior_tensor,
+          bias_divergence.result, kl_penalty[1],
+      ])
+
+      self.assertAllClose(
+          expected_kernel_, actual_kernel_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_bias_, actual_bias_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_outputs_, actual_outputs_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_kernel_divergence_, actual_kernel_divergence_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_bias_divergence_, actual_bias_divergence_,
+          rtol=1e-6, atol=0.)
+
+      self.assertAllEqual(
+          [[kernel_posterior.distribution,
+            kernel_prior.distribution,
+            kernel_posterior.result_sample]],
+          kernel_divergence.args)
+
+      self.assertAllEqual(
+          [[bias_posterior.distribution,
+            bias_prior.distribution,
+            bias_posterior.result_sample]],
+          bias_divergence.args)
+
+  def testKLPenaltyKernelConv1DVariational(self):
+    self._testKLPenaltyKernel(prob_layers_lib.Conv1DVariational)
+
+  def testKLPenaltyKernelConv2DVariational(self):
+    self._testKLPenaltyKernel(prob_layers_lib.Conv2DVariational)
+
+  def testKLPenaltyKernelConv3DVariational(self):
+    self._testKLPenaltyKernel(prob_layers_lib.Conv3DVariational)
+
+  def testKLPenaltyBothConv1DVariational(self):
+    self._testKLPenaltyBoth(prob_layers_lib.Conv1DVariational)
+
+  def testKLPenaltyBothConv2DVariational(self):
+    self._testKLPenaltyBoth(prob_layers_lib.Conv2DVariational)
+
+  def testKLPenaltyBothConv3DVariational(self):
+    self._testKLPenaltyBoth(prob_layers_lib.Conv3DVariational)
+
+  def testConv1DVariational(self):
+    self._testConvVariational(prob_layers_lib.Conv1DVariational)
+
+  def testConv2DVariational(self):
+    self._testConvVariational(prob_layers_lib.Conv2DVariational)
+
+  def testConv3DVariational(self):
+    self._testConvVariational(prob_layers_lib.Conv3DVariational)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
index 5371e912ed..4e9f119351 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.bayesflow.python.ops import layers_dense_variational_impl as prob_layers_lib
+from tensorflow.contrib.bayesflow.python.ops import layers_util as prob_layers_util
 from tensorflow.contrib.distributions.python.ops import independent as independent_lib
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -127,7 +128,7 @@ class DenseVariational(test.TestCase):
     with self.test_session():
       layer = layer_class(
           units=2,
-          bias_posterior_fn=prob_layers_lib.default_mean_field_normal_fn(),
+          bias_posterior_fn=prob_layers_util.default_mean_field_normal_fn(),
           bias_prior_fn=_make_normal)
       inputs = random_ops.random_uniform([2, 3], seed=1)
 
@@ -345,7 +346,7 @@ class DenseVariational(test.TestCase):
           maxval=2,
           dtype=dtypes.int32,
           seed=distribution_util.gen_new_seed(
-              layer.seed, salt="conv_variational"))
+              layer.seed, salt="dense_flipout"))
       sign_output = math_ops.cast(2 * sign_output - 1, inputs.dtype)
       perturbed_inputs = math_ops.matmul(
           inputs * sign_input, expected_kernel_posterior_affine_tensor)
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers.py b/tensorflow/contrib/bayesflow/python/ops/layers.py
index 121f36ec4e..93412afae7 100644
--- a/tensorflow/contrib/bayesflow/python/ops/layers.py
+++ b/tensorflow/contrib/bayesflow/python/ops/layers.py
@@ -23,11 +23,25 @@ from __future__ import print_function
 
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
+from tensorflow.contrib.bayesflow.python.ops.layers_conv_variational import *
 from tensorflow.contrib.bayesflow.python.ops.layers_dense_variational_impl import *
+from tensorflow.contrib.bayesflow.python.ops.layers_util import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
+    'Convolution1DVariational',
+    'Convolution2DVariational',
+    'Convolution3DVariational',
+    'Conv1DVariational',
+    'Conv2DVariational',
+    'Conv3DVariational',
+    'convolution1d_variational',
+    'convolution2d_variational',
+    'convolution3d_variational',
+    'conv1d_variational',
+    'conv2d_variational',
+    'conv3d_variational',
     'DenseReparameterization',
     'DenseLocalReparameterization',
     'DenseFlipout',
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py b/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py
new file mode 100644
index 0000000000..6ffb55feb1
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py
@@ -0,0 +1,1415 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Convolutional variational layer classes and their functional aliases.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.bayesflow.python.ops import layers_util
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.layers import base as layers_lib
+from tensorflow.python.layers import utils
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import standard_ops
+from tensorflow.python.ops.distributions import kullback_leibler as kl_lib
+from tensorflow.python.ops.distributions import normal as normal_lib
+
+
+class _ConvVariational(layers_lib.Layer):
+  """Abstract nD convolution layer (private, used as implementation base).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of n integers, specifying the
+      length of the convolution window.
+    strides: An integer or tuple/list of n integers,
+      specifying the stride length of the convolution.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, ..., channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, ...)`.
+    dilation_rate: An integer or tuple/list of n integers, specifying
+      the dilation rate to use for dilated convolution.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any `strides` value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: A string, the name of the layer.
+
+  Properties:
+    rank: Python integer, dimensionality of convolution.
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_use_local_reparameterization: Python `bool` indicating whether
+      `kernel` calculation should employ the Local Reparameterization Trick.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+  """
+
+  def __init__(
+      self,
+      rank,
+      filters,
+      kernel_size,
+      strides=1,
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=1,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(_ConvVariational, self).__init__(
+        trainable=trainable,
+        name=name,
+        activity_regularizer=activity_regularizer,
+        **kwargs)
+    self.rank = rank
+    self.filters = filters
+    self.kernel_size = utils.normalize_tuple(kernel_size, rank, "kernel_size")
+    self.strides = utils.normalize_tuple(strides, rank, "strides")
+    self.padding = utils.normalize_padding(padding)
+    self.data_format = utils.normalize_data_format(data_format)
+    self.dilation_rate = utils.normalize_tuple(
+        dilation_rate, rank, "dilation_rate")
+    self.activation = activation
+    self.input_spec = layers_lib.InputSpec(ndim=self.rank + 2)
+    self.kernel_posterior_fn = kernel_posterior_fn
+    self.kernel_posterior_tensor_fn = kernel_posterior_tensor_fn
+    self.kernel_prior_fn = kernel_prior_fn
+    self.kernel_divergence_fn = kernel_divergence_fn
+    self.bias_posterior_fn = bias_posterior_fn
+    self.bias_posterior_tensor_fn = bias_posterior_tensor_fn
+    self.bias_prior_fn = bias_prior_fn
+    self.bias_divergence_fn = bias_divergence_fn
+
+  def build(self, input_shape):
+    input_shape = tensor_shape.TensorShape(input_shape)
+    if self.data_format == "channels_first":
+      channel_axis = 1
+    else:
+      channel_axis = -1
+    if input_shape[channel_axis].value is None:
+      raise ValueError("The channel dimension of the inputs "
+                       "should be defined. Found `None`.")
+    input_dim = input_shape[channel_axis].value
+    kernel_shape = self.kernel_size + (input_dim, self.filters)
+    dtype = dtypes.as_dtype(self.dtype)
+
+    # Must have a posterior kernel.
+    self.kernel_posterior = self.kernel_posterior_fn(
+        dtype, kernel_shape, "kernel_posterior",
+        self.trainable, self.add_variable)
+
+    if self.kernel_prior_fn is None:
+      self.kernel_prior = None
+    else:
+      self.kernel_prior = self.kernel_prior_fn(
+          dtype, kernel_shape, "kernel_prior",
+          self.trainable, self.add_variable)
+    self._built_kernel_divergence = False
+
+    if self.bias_posterior_fn is None:
+      self.bias_posterior = None
+    else:
+      self.bias_posterior = self.bias_posterior_fn(
+          dtype, (self.filters,), "bias_posterior",
+          self.trainable, self.add_variable)
+
+    if self.bias_prior_fn is None:
+      self.bias_prior = None
+    else:
+      self.bias_prior = self.bias_prior_fn(
+          dtype, (self.filters,), "bias_prior",
+          self.trainable, self.add_variable)
+    self._built_bias_divergence = False
+
+    self.input_spec = layers_lib.InputSpec(ndim=self.rank + 2,
+                                           axes={channel_axis: input_dim})
+    self._convolution_op = nn_ops.Convolution(
+        input_shape,
+        filter_shape=tensor_shape.TensorShape(kernel_shape),
+        dilation_rate=self.dilation_rate,
+        strides=self.strides,
+        padding=self.padding.upper(),
+        data_format=utils.convert_data_format(self.data_format,
+                                              self.rank + 2))
+
+    self.built = True
+
+  def call(self, inputs):
+    inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
+
+    outputs = self._apply_variational_kernel(inputs)
+    outputs = self._apply_variational_bias(outputs)
+    if self.activation is not None:
+      outputs = self.activation(outputs)
+    if not self._built_kernel_divergence:
+      kernel_posterior = self.kernel_posterior
+      kernel_prior = self.kernel_prior
+      if isinstance(self.kernel_posterior, independent_lib.Independent):
+        kernel_posterior = kernel_posterior.distribution
+      if isinstance(self.kernel_prior, independent_lib.Independent):
+        kernel_prior = kernel_prior.distribution
+      self._apply_divergence(self.kernel_divergence_fn,
+                             kernel_posterior,
+                             kernel_prior,
+                             self.kernel_posterior_tensor,
+                             name="divergence_kernel")
+      self._built_kernel_divergence = True
+    if not self._built_bias_divergence:
+      bias_posterior = self.bias_posterior
+      bias_prior = self.bias_prior
+      if isinstance(self.bias_posterior, independent_lib.Independent):
+        bias_posterior = bias_posterior.distribution
+      if isinstance(self.bias_prior, independent_lib.Independent):
+        bias_prior = bias_prior.distribution
+      self._apply_divergence(self.bias_divergence_fn,
+                             bias_posterior,
+                             bias_prior,
+                             self.bias_posterior_tensor,
+                             name="divergence_bias")
+      self._built_bias_divergence = True
+    return outputs
+
+  def _apply_variational_kernel(self, inputs):
+    self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn(
+        self.kernel_posterior)
+    outputs = self._convolution_op(inputs, self.kernel_posterior_tensor)
+    return outputs
+
+  def _apply_variational_bias(self, inputs):
+    if self.bias_posterior is None:
+      self.bias_posterior_tensor = None
+      return inputs
+    self.bias_posterior_tensor = self.bias_posterior_tensor_fn(
+        self.bias_posterior)
+    outputs = inputs
+    if self.data_format == "channels_first":
+      if self.rank == 1:
+        # nn.bias_add does not accept a 1D input tensor.
+        bias = array_ops.reshape(self.bias_posterior_tensor,
+                                 (1, self.filters, 1))
+        outputs += bias
+      if self.rank == 2:
+        outputs = nn.bias_add(outputs,
+                              self.bias_posterior_tensor,
+                              data_format="NCHW")
+      if self.rank == 3:
+        # As of Mar 2017, direct addition is significantly slower than
+        # bias_add when computing gradients. To use bias_add, we collapse Z
+        # and Y into a single dimension to obtain a 4D input tensor.
+        outputs_shape = outputs.shape.as_list()
+        outputs_4d = array_ops.reshape(outputs,
+                                       [outputs_shape[0], outputs_shape[1],
+                                        outputs_shape[2] * outputs_shape[3],
+                                        outputs_shape[4]])
+        outputs_4d = nn.bias_add(outputs_4d,
+                                 self.bias_posterior_tensor,
+                                 data_format="NCHW")
+        outputs = array_ops.reshape(outputs_4d, outputs_shape)
+    else:
+      outputs = nn.bias_add(outputs,
+                            self.bias_posterior_tensor,
+                            data_format="NHWC")
+    return outputs
+
+  def _apply_divergence(self, divergence_fn, posterior, prior,
+                        posterior_tensor, name):
+    if (divergence_fn is None or
+        posterior is None or
+        prior is None):
+      divergence = None
+      return
+    divergence = standard_ops.identity(
+        divergence_fn(
+            posterior, prior, posterior_tensor),
+        name=name)
+    self.add_loss(divergence)
+
+  def _compute_output_shape(self, input_shape):
+    input_shape = tensor_shape.TensorShape(input_shape).as_list()
+    if self.data_format == "channels_last":
+      space = input_shape[1:-1]
+      new_space = []
+      for i in range(len(space)):
+        new_dim = utils.conv_output_length(
+            space[i],
+            self.kernel_size[i],
+            padding=self.padding,
+            stride=self.strides[i],
+            dilation=self.dilation_rate[i])
+        new_space.append(new_dim)
+      return tensor_shape.TensorShape([input_shape[0]] + new_space +
+                                      [self.filters])
+    else:
+      space = input_shape[2:]
+      new_space = []
+      for i in range(len(space)):
+        new_dim = utils.conv_output_length(
+            space[i],
+            self.kernel_size[i],
+            padding=self.padding,
+            stride=self.strides[i],
+            dilation=self.dilation_rate[i])
+        new_space.append(new_dim)
+      return tensor_shape.TensorShape([input_shape[0], self.filters] +
+                                      new_space)
+
+
+class Conv1DVariational(_ConvVariational):
+  """1D convolution layer (e.g. temporal convolution).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of a single integer, specifying the
+      length of the 1D convolution window.
+    strides: An integer or tuple/list of a single integer,
+      specifying the stride length of the convolution.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, length, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, length)`.
+    dilation_rate: An integer or tuple/list of a single integer, specifying
+      the dilation rate to use for dilated convolution.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any `strides` value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+
+  Properties:
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_use_local_reparameterization: Python `bool` indicating whether
+      `kernel` calculation should employ the Local Reparameterization Trick.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 128, 1])
+  net = tfp.layers.Conv1DVariational(64,
+                                     kernel_size=5,
+                                     padding="SAME",
+                                     activation=tf.nn.relu)(net)
+  net = tf.reshape(net, [-1, 128 * 64])
+  logits = tfp.layers.DenseVariational(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+
+  def __init__(
+      self,
+      filters,
+      kernel_size,
+      strides=1,
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=1,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(Conv1DVariational, self).__init__(
+        rank=1,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name, **kwargs)
+
+
+def conv1d_variational(
+    inputs,
+    filters,
+    kernel_size,
+    strides=1,
+    padding="valid",
+    data_format="channels_last",
+    dilation_rate=1,
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    name=None,
+    reuse=None):
+  """Functional interface for 1D convolution layer (e.g. temporal convolution).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    inputs: Tensor input.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of a single integer, specifying the
+      length of the 1D convolution window.
+    strides: An integer or tuple/list of a single integer,
+      specifying the stride length of the convolution.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, length, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, length)`.
+    dilation_rate: An integer or tuple/list of a single integer, specifying
+      the dilation rate to use for dilated convolution.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any `strides` value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+    reuse: Boolean, whether to reuse the weights of a previous layer
+      by the same name.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    ValueError: if eager execution is enabled.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 128, 1])
+  net = tfp.layers.conv1d_variational(net,
+                                      64,
+                                      kernel_size=5,
+                                      padding="SAME",
+                                      activation=tf.nn.relu)
+  net = tf.reshape(net, [-1, 128 * 64])
+  logits = tfp.layers.dense_variational(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+  layer = Conv1DVariational(
+      filters=filters,
+      kernel_size=kernel_size,
+      strides=strides,
+      padding=padding,
+      data_format=data_format,
+      dilation_rate=dilation_rate,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+class Conv2DVariational(_ConvVariational):
+  """2D convolution layer (e.g. spatial convolution over images).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 2 integers, specifying the
+      height and width of the 2D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 2 integers,
+      specifying the strides of the convolution along the height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, height, width, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, height, width)`.
+
+    dilation_rate: An integer or tuple/list of 2 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+
+  Properties:
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_use_local_reparameterization: Python `bool` indicating whether
+      `kernel` calculation should employ the Local Reparameterization Trick.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 32, 32, 3])
+  net = tfp.layers.Conv2DVariational(64,
+                                     kernel_size=5,
+                                     padding="SAME",
+                                     activation=tf.nn.relu)(net)
+  net = tf.layers.MaxPooling2D(pool_size=2,
+                               strides=2,
+                               padding="SAME")(net)
+  net = tf.reshape(net, [-1, 8 * 8 * 64])
+  logits = tfp.layers.DenseVariational(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+
+  def __init__(
+      self,
+      filters,
+      kernel_size,
+      strides=(1, 1),
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=(1, 1),
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(Conv2DVariational, self).__init__(
+        rank=2,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name, **kwargs)
+
+
+def conv2d_variational(
+    inputs,
+    filters,
+    kernel_size,
+    strides=(1, 1),
+    padding="valid",
+    data_format="channels_last",
+    dilation_rate=(1, 1),
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    name=None,
+    reuse=None):
+  """Functional interface for the 2D convolution layer.
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    inputs: Tensor input.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 2 integers, specifying the
+      height and width of the 2D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 2 integers,
+      specifying the strides of the convolution along the height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, height, width, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, height, width)`.
+
+    dilation_rate: An integer or tuple/list of 2 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+    reuse: Boolean, whether to reuse the weights of a previous layer
+      by the same name.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    ValueError: if eager execution is enabled.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 32, 32, 3])
+  net = tfp.layers.conv2d_variational(net,
+                                      64,
+                                      kernel_size=5,
+                                      padding="SAME",
+                                      activation=tf.nn.relu)
+  net = tf.layers.max_pooling2d(net,
+                                pool_size=2,
+                                strides=2,
+                                padding="SAME")
+  net = tf.reshape(net, [-1, 8 * 8 * 64])
+  logits = tfp.layers.dense_variational(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+  layer = Conv2DVariational(
+      filters=filters,
+      kernel_size=kernel_size,
+      strides=strides,
+      padding=padding,
+      data_format=data_format,
+      dilation_rate=dilation_rate,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+class Conv3DVariational(_ConvVariational):
+  """3D convolution layer (e.g. spatial convolution over volumes).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 3 integers, specifying the
+      depth, height and width of the 3D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 3 integers,
+      specifying the strides of the convolution along the depth,
+      height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, depth, height, width, channels)` while `channels_first`
+      corresponds to inputs with shape
+      `(batch, channels, depth, height, width)`.
+    dilation_rate: An integer or tuple/list of 3 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+
+  Properties:
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_use_local_reparameterization: Python `bool` indicating whether
+      `kernel` calculation should employ the Local Reparameterization Trick.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 256, 32, 32, 3])
+  net = tfp.layers.Conv3DVariational(64,
+                                     kernel_size=5,
+                                     padding="SAME",
+                                     activation=tf.nn.relu)(net)
+  net = tf.layers.MaxPooling2D(pool_size=2,
+                               strides=2,
+                               padding="SAME")(net)
+  net = tf.reshape(net, [-1, 256 * 8 * 8 * 64])
+  logits = tfp.layers.DenseVariational(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+
+  def __init__(
+      self,
+      filters,
+      kernel_size,
+      strides=(1, 1, 1),
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=(1, 1, 1),
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(Conv3DVariational, self).__init__(
+        rank=3,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name, **kwargs)
+
+
+def conv3d_variational(
+    inputs,
+    filters,
+    kernel_size,
+    strides=(1, 1, 1),
+    padding="valid",
+    data_format="channels_last",
+    dilation_rate=(1, 1, 1),
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    name=None,
+    reuse=None):
+  """Functional interface for the 3D convolution layer.
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    inputs: Tensor input.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 3 integers, specifying the
+      depth, height and width of the 3D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 3 integers,
+      specifying the strides of the convolution along the depth,
+      height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, depth, height, width, channels)` while `channels_first`
+      corresponds to inputs with shape
+      `(batch, channels, depth, height, width)`.
+    dilation_rate: An integer or tuple/list of 3 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+    reuse: Boolean, whether to reuse the weights of a previous layer
+      by the same name.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    ValueError: if eager execution is enabled.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 256, 32, 32, 3])
+  net = tfp.layers.conv3d_variational(net,
+                                      64,
+                                      kernel_size=5,
+                                      padding="SAME",
+                                      activation=tf.nn.relu)
+  net = tf.layers.max_pooling2d(net,
+                                pool_size=2,
+                                strides=2,
+                                padding="SAME")
+  net = tf.reshape(net, [-1, 256 * 8 * 8 * 64])
+  logits = tfp.layers.dense_variational(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+  """
+  layer = Conv3DVariational(
+      filters=filters,
+      kernel_size=kernel_size,
+      strides=strides,
+      padding=padding,
+      data_format=data_format,
+      dilation_rate=dilation_rate,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+# Aliases
+
+Convolution1DVariational = Conv1DVariational
+Convolution2DVariational = Conv2DVariational
+Convolution3DVariational = Conv3DVariational
+convolution1d_variational = conv1d_variational
+convolution2d_variational = conv2d_variational
+convolution3d_variational = conv3d_variational
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
index 2a260405d0..a749a396f1 100644
--- a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
+++ b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
@@ -20,28 +20,21 @@
 @@dense_reparameterization
 @@dense_local_reparameterization
 @@dense_flipout
-
-@@default_loc_scale_fn
-@@default_mean_field_normal_fn
 """
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
-from tensorflow.contrib.distributions.python.ops import deterministic as deterministic_lib
+from tensorflow.contrib.bayesflow.python.ops import layers_util
 from tensorflow.contrib.distributions.python.ops import independent as independent_lib
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.layers import base as layers_lib
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
-from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import standard_ops
 from tensorflow.python.ops.distributions import kullback_leibler as kl_lib
@@ -56,162 +49,9 @@ __all__ = [
     "dense_reparameterization",
     "dense_local_reparameterization",
     "dense_flipout",
-    "default_loc_scale_fn",
-    "default_mean_field_normal_fn",
 ]
 
 
-def default_loc_scale_fn(
-    is_singular=False,
-    loc_initializer=init_ops.random_normal_initializer(stddev=0.1),
-    untransformed_scale_initializer=init_ops.random_normal_initializer(
-        mean=-3., stddev=0.1),
-    loc_regularizer=None,
-    untransformed_scale_regularizer=None,
-    loc_constraint=None,
-    untransformed_scale_constraint=None):
-  """Makes closure which creates `loc`, `scale` params from `tf.get_variable`.
-
-  This function produces a closure which produces `loc`, `scale` using
-  `tf.get_variable`. The closure accepts the following arguments:
-
-    dtype: Type of parameter's event.
-    shape: Python `list`-like representing the parameter's event shape.
-    name: Python `str` name prepended to any created (or existing)
-      `tf.Variable`s.
-    trainable: Python `bool` indicating all created `tf.Variable`s should be
-      added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
-    add_variable_fn: `tf.get_variable`-like `callable` used to create (or
-      access existing) `tf.Variable`s.
-
-  Args:
-    is_singular: Python `bool` indicating if `scale is None`. Default: `False`.
-    loc_initializer: Initializer function for the `loc` parameters.
-      The default is `tf.random_normal_initializer(mean=0., stddev=0.1)`.
-    untransformed_scale_initializer: Initializer function for the `scale`
-      parameters. Default value: `tf.random_normal_initializer(mean=-3.,
-      stddev=0.1)`. This implies the softplus transformed result has mean
-      approximately `0.05` and std. deviation approximately `0.005`.
-    loc_regularizer: Regularizer function for the `loc` parameters.
-      The default (`None`) is to use the `tf.get_variable` default.
-    untransformed_scale_regularizer: Regularizer function for the `scale`
-      parameters. The default (`None`) is to use the `tf.get_variable` default.
-    loc_constraint: An optional projection function to be applied to the
-      loc after being updated by an `Optimizer`. The function must take as input
-      the unprojected variable and must return the projected variable (which
-      must have the same shape). Constraints are not safe to use when doing
-      asynchronous distributed training.
-      The default (`None`) is to use the `tf.get_variable` default.
-    untransformed_scale_constraint: An optional projection function to be
-      applied to the `scale` parameters after being updated by an `Optimizer`
-      (e.g. used to implement norm constraints or value constraints). The
-      function must take as input the unprojected variable and must return the
-      projected variable (which must have the same shape). Constraints are not
-      safe to use when doing asynchronous distributed training. The default
-      (`None`) is to use the `tf.get_variable` default.
-
-  Returns:
-    default_loc_scale_fn: Python `callable` which instantiates `loc`, `scale`
-    parameters from args: `dtype, shape, name, trainable, add_variable_fn`.
-  """
-  def _fn(dtype, shape, name, trainable, add_variable_fn):
-    """Creates `loc`, `scale` parameters."""
-    loc = add_variable_fn(
-        name=name + "_loc",
-        shape=shape,
-        initializer=loc_initializer,
-        regularizer=loc_regularizer,
-        constraint=loc_constraint,
-        dtype=dtype,
-        trainable=trainable)
-    if is_singular:
-      return loc, None
-    untransformed_scale = add_variable_fn(
-        name=name + "_untransformed_scale",
-        shape=shape,
-        initializer=untransformed_scale_initializer,
-        regularizer=untransformed_scale_regularizer,
-        constraint=untransformed_scale_constraint,
-        dtype=dtype,
-        trainable=trainable)
-    scale = (np.finfo(dtype.as_numpy_dtype).eps +
-             nn_ops.softplus(untransformed_scale))
-    return loc, scale
-  return _fn
-
-
-def default_mean_field_normal_fn(
-    is_singular=False,
-    loc_initializer=None,
-    untransformed_scale_initializer=None,
-    loc_regularizer=None,
-    untransformed_scale_regularizer=None,
-    loc_constraint=None,
-    untransformed_scale_constraint=None):
-  """Creates a function to build Normal distributions with trainable params.
-
-  This function produces a closure which produces `tf.distributions.Normal`
-  parameterized by a loc` and `scale` each created using `tf.get_variable`. The
-  produced closure accepts the following arguments:
-
-    name: Python `str` name prepended to any created (or existing)
-      `tf.Variable`s.
-    shape: Python `list`-like representing the parameter's event shape.
-    dtype: Type of parameter's event.
-    trainable: Python `bool` indicating all created `tf.Variable`s should be
-      added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
-    add_variable_fn: `tf.get_variable`-like `callable` used to create (or
-      access existing) `tf.Variable`s.
-
-  Args:
-    is_singular: Python `bool` if `True`, forces the special case limit of
-      `scale->0`, i.e., a `Deterministic` distribution.
-    loc_initializer: Initializer function for the `loc` parameters.
-      If `None` (default), values are initialized using the default
-      initializer used by `tf.get_variable`.
-    untransformed_scale_initializer: Initializer function for the `scale`
-      parameters. If `None` (default), values are initialized using the default
-      initializer used by `tf.get_variable`.
-    loc_regularizer: Regularizer function for the `loc` parameters.
-    untransformed_scale_regularizer: Regularizer function for the `scale`
-      parameters.
-    loc_constraint: An optional projection function to be applied to the
-      loc after being updated by an `Optimizer`. The function must take as input
-      the unprojected variable and must return the projected variable (which
-      must have the same shape). Constraints are not safe to use when doing
-      asynchronous distributed training.
-    untransformed_scale_constraint: An optional projection function to be
-      applied to the `scale` parameters after being updated by an `Optimizer`
-      (e.g. used to implement norm constraints or value constraints). The
-      function must take as input the unprojected variable and must return the
-      projected variable (which must have the same shape). Constraints are not
-      safe to use when doing asynchronous distributed training.
-
-  Returns:
-    make_normal_fn: Python `callable` which creates a `tf.distributions.Normal`
-      using from args: `dtype, shape, name, trainable, add_variable_fn`.
-  """
-  loc_scale_fn_ = default_loc_scale_fn(
-      is_singular,
-      loc_initializer,
-      untransformed_scale_initializer,
-      loc_regularizer,
-      untransformed_scale_regularizer,
-      loc_constraint,
-      untransformed_scale_constraint)
-  def _fn(dtype, shape, name, trainable, add_variable_fn):
-    """Creates multivariate `Deterministic` or `Normal` distribution."""
-    loc, scale = loc_scale_fn_(dtype, shape, name, trainable, add_variable_fn)
-    if scale is None:
-      dist = deterministic_lib.Deterministic(loc=loc)
-    else:
-      dist = normal_lib.Normal(loc=loc, scale=scale)
-    reinterpreted_batch_ndims = array_ops.shape(dist.batch_shape_tensor())[0]
-    return independent_lib.Independent(
-        dist, reinterpreted_batch_ndims=reinterpreted_batch_ndims)
-  return _fn
-
-
 class _DenseVariational(layers_lib.Layer):
   """Abstract densely-connected class (private, used as implementation base).
 
@@ -294,12 +134,12 @@ class _DenseVariational(layers_lib.Layer):
       activation=None,
       activity_regularizer=None,
       trainable=True,
-      kernel_posterior_fn=default_mean_field_normal_fn(),
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
       kernel_posterior_tensor_fn=lambda d: d.sample(),
       kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
           loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
       kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-      bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
       bias_posterior_tensor_fn=lambda d: d.sample(),
       bias_prior_fn=None,
       bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
@@ -540,12 +380,13 @@ class DenseReparameterization(_DenseVariational):
       activation=None,
       activity_regularizer=None,
       trainable=True,
-      kernel_posterior_fn=default_mean_field_normal_fn(),
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
       kernel_posterior_tensor_fn=lambda d: d.sample(),
       kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
           loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
       kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-      bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(
+          is_singular=True),
       bias_posterior_tensor_fn=lambda d: d.sample(),
       bias_prior_fn=None,
       bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
@@ -581,12 +422,12 @@ def dense_reparameterization(
     activation=None,
     activity_regularizer=None,
     trainable=True,
-    kernel_posterior_fn=default_mean_field_normal_fn(),
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
     kernel_posterior_tensor_fn=lambda d: d.sample(),
     kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
         loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
     kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-    bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
     bias_posterior_tensor_fn=lambda d: d.sample(),
     bias_prior_fn=None,
     bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
@@ -812,12 +653,13 @@ class DenseLocalReparameterization(_DenseVariational):
       activation=None,
       activity_regularizer=None,
       trainable=True,
-      kernel_posterior_fn=default_mean_field_normal_fn(),
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
       kernel_posterior_tensor_fn=lambda d: d.sample(),
       kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
           loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
       kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-      bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(
+          is_singular=True),
       bias_posterior_tensor_fn=lambda d: d.sample(),
       bias_prior_fn=None,
       bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
@@ -864,12 +706,13 @@ def dense_local_reparameterization(
     activation=None,
     activity_regularizer=None,
     trainable=True,
-    kernel_posterior_fn=default_mean_field_normal_fn(),
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
     kernel_posterior_tensor_fn=lambda d: d.sample(),
     kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
         loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
     kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-    bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(
+        is_singular=True),
     bias_posterior_tensor_fn=lambda d: d.sample(),
     bias_prior_fn=None,
     bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
@@ -1098,12 +941,13 @@ class DenseFlipout(_DenseVariational):
       activation=None,
       activity_regularizer=None,
       trainable=True,
-      kernel_posterior_fn=default_mean_field_normal_fn(),
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
       kernel_posterior_tensor_fn=lambda d: d.sample(),
       kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
           loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
       kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-      bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(
+          is_singular=True),
       bias_posterior_tensor_fn=lambda d: d.sample(),
       bias_prior_fn=None,
       bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
@@ -1151,7 +995,7 @@ class DenseFlipout(_DenseVariational):
                           array_ops.expand_dims(self.units, 0)], 0),
         dtype=inputs.dtype,
         seed=distribution_util.gen_new_seed(
-            self.seed, salt="conv_variational"))
+            self.seed, salt="dense_flipout"))
     perturbed_inputs = self._matmul(
         inputs * sign_input, self.kernel_posterior_affine_tensor) * sign_output
 
@@ -1166,12 +1010,13 @@ def dense_flipout(
     activation=None,
     activity_regularizer=None,
     trainable=True,
-    kernel_posterior_fn=default_mean_field_normal_fn(),
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
     kernel_posterior_tensor_fn=lambda d: d.sample(),
     kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
         loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
     kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-    bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(
+        is_singular=True),
     bias_posterior_tensor_fn=lambda d: d.sample(),
     bias_prior_fn=None,
     bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_util.py b/tensorflow/contrib/bayesflow/python/ops/layers_util.py
new file mode 100644
index 0000000000..9a4fecf4e5
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/python/ops/layers_util.py
@@ -0,0 +1,180 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for probabilistic layers.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.distributions.python.ops import deterministic as deterministic_lib
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops.distributions import normal as normal_lib
+
+
+def default_loc_scale_fn(
+    is_singular=False,
+    loc_initializer=init_ops.random_normal_initializer(stddev=0.1),
+    untransformed_scale_initializer=init_ops.random_normal_initializer(
+        mean=-3., stddev=0.1),
+    loc_regularizer=None,
+    untransformed_scale_regularizer=None,
+    loc_constraint=None,
+    untransformed_scale_constraint=None):
+  """Makes closure which creates `loc`, `scale` params from `tf.get_variable`.
+
+  This function produces a closure which produces `loc`, `scale` using
+  `tf.get_variable`. The closure accepts the following arguments:
+
+    dtype: Type of parameter's event.
+    shape: Python `list`-like representing the parameter's event shape.
+    name: Python `str` name prepended to any created (or existing)
+      `tf.Variable`s.
+    trainable: Python `bool` indicating all created `tf.Variable`s should be
+      added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
+    add_variable_fn: `tf.get_variable`-like `callable` used to create (or
+      access existing) `tf.Variable`s.
+
+  Args:
+    is_singular: Python `bool` indicating if `scale is None`. Default: `False`.
+    loc_initializer: Initializer function for the `loc` parameters.
+      The default is `tf.random_normal_initializer(mean=0., stddev=0.1)`.
+    untransformed_scale_initializer: Initializer function for the `scale`
+      parameters. Default value: `tf.random_normal_initializer(mean=-3.,
+      stddev=0.1)`. This implies the softplus transformed result has mean
+      approximately `0.05` and std. deviation approximately `0.005`.
+    loc_regularizer: Regularizer function for the `loc` parameters.
+      The default (`None`) is to use the `tf.get_variable` default.
+    untransformed_scale_regularizer: Regularizer function for the `scale`
+      parameters. The default (`None`) is to use the `tf.get_variable` default.
+    loc_constraint: An optional projection function to be applied to the
+      loc after being updated by an `Optimizer`. The function must take as input
+      the unprojected variable and must return the projected variable (which
+      must have the same shape). Constraints are not safe to use when doing
+      asynchronous distributed training.
+      The default (`None`) is to use the `tf.get_variable` default.
+    untransformed_scale_constraint: An optional projection function to be
+      applied to the `scale` parameters after being updated by an `Optimizer`
+      (e.g. used to implement norm constraints or value constraints). The
+      function must take as input the unprojected variable and must return the
+      projected variable (which must have the same shape). Constraints are not
+      safe to use when doing asynchronous distributed training. The default
+      (`None`) is to use the `tf.get_variable` default.
+
+  Returns:
+    default_loc_scale_fn: Python `callable` which instantiates `loc`, `scale`
+    parameters from args: `dtype, shape, name, trainable, add_variable_fn`.
+  """
+  def _fn(dtype, shape, name, trainable, add_variable_fn):
+    """Creates `loc`, `scale` parameters."""
+    loc = add_variable_fn(
+        name=name + "_loc",
+        shape=shape,
+        initializer=loc_initializer,
+        regularizer=loc_regularizer,
+        constraint=loc_constraint,
+        dtype=dtype,
+        trainable=trainable)
+    if is_singular:
+      return loc, None
+    untransformed_scale = add_variable_fn(
+        name=name + "_untransformed_scale",
+        shape=shape,
+        initializer=untransformed_scale_initializer,
+        regularizer=untransformed_scale_regularizer,
+        constraint=untransformed_scale_constraint,
+        dtype=dtype,
+        trainable=trainable)
+    scale = (np.finfo(dtype.as_numpy_dtype).eps +
+             nn_ops.softplus(untransformed_scale))
+    return loc, scale
+  return _fn
+
+
+def default_mean_field_normal_fn(
+    is_singular=False,
+    loc_initializer=None,
+    untransformed_scale_initializer=None,
+    loc_regularizer=None,
+    untransformed_scale_regularizer=None,
+    loc_constraint=None,
+    untransformed_scale_constraint=None):
+  """Creates a function to build Normal distributions with trainable params.
+
+  This function produces a closure which produces `tf.distributions.Normal`
+  parameterized by a loc` and `scale` each created using `tf.get_variable`. The
+  produced closure accepts the following arguments:
+
+    name: Python `str` name prepended to any created (or existing)
+      `tf.Variable`s.
+    shape: Python `list`-like representing the parameter's event shape.
+    dtype: Type of parameter's event.
+    trainable: Python `bool` indicating all created `tf.Variable`s should be
+      added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
+    add_variable_fn: `tf.get_variable`-like `callable` used to create (or
+      access existing) `tf.Variable`s.
+
+  Args:
+    is_singular: Python `bool` if `True`, forces the special case limit of
+      `scale->0`, i.e., a `Deterministic` distribution.
+    loc_initializer: Initializer function for the `loc` parameters.
+      If `None` (default), values are initialized using the default
+      initializer used by `tf.get_variable`.
+    untransformed_scale_initializer: Initializer function for the `scale`
+      parameters. If `None` (default), values are initialized using the default
+      initializer used by `tf.get_variable`.
+    loc_regularizer: Regularizer function for the `loc` parameters.
+    untransformed_scale_regularizer: Regularizer function for the `scale`
+      parameters.
+    loc_constraint: An optional projection function to be applied to the
+      loc after being updated by an `Optimizer`. The function must take as input
+      the unprojected variable and must return the projected variable (which
+      must have the same shape). Constraints are not safe to use when doing
+      asynchronous distributed training.
+    untransformed_scale_constraint: An optional projection function to be
+      applied to the `scale` parameters after being updated by an `Optimizer`
+      (e.g. used to implement norm constraints or value constraints). The
+      function must take as input the unprojected variable and must return the
+      projected variable (which must have the same shape). Constraints are not
+      safe to use when doing asynchronous distributed training.
+
+  Returns:
+    make_normal_fn: Python `callable` which creates a `tf.distributions.Normal`
+      using from args: `dtype, shape, name, trainable, add_variable_fn`.
+  """
+  loc_scale_fn_ = default_loc_scale_fn(
+      is_singular,
+      loc_initializer,
+      untransformed_scale_initializer,
+      loc_regularizer,
+      untransformed_scale_regularizer,
+      loc_constraint,
+      untransformed_scale_constraint)
+  def _fn(dtype, shape, name, trainable, add_variable_fn):
+    """Creates multivariate `Deterministic` or `Normal` distribution."""
+    loc, scale = loc_scale_fn_(dtype, shape, name, trainable, add_variable_fn)
+    if scale is None:
+      dist = deterministic_lib.Deterministic(loc=loc)
+    else:
+      dist = normal_lib.Normal(loc=loc, scale=scale)
+    reinterpreted_batch_ndims = array_ops.shape(dist.batch_shape_tensor())[0]
+    return independent_lib.Independent(
+        dist, reinterpreted_batch_ndims=reinterpreted_batch_ndims)
+  return _fn
-- 
GitLab


From bb3c383ca03e116a3428bbd885be2f1726ff0c54 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Mon, 18 Dec 2017 17:45:50 -0800
Subject: [PATCH 1180/1225] Support more activation ops.

PiperOrigin-RevId: 179493169
---
 tensorflow/core/grappler/op_types.cc          | 26 ++++++++++++++++++-
 tensorflow/core/grappler/op_types.h           | 11 ++++++++
 .../grappler/optimizers/layout_optimizer.cc   | 24 ++++++++++++++---
 3 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 24c372a7cf..e4c1da52ec 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -85,6 +85,8 @@ bool IsDequeueOp(const NodeDef& node) {
 
 bool IsDiv(const NodeDef& node) { return node.op() == "Div"; }
 
+bool IsEluGrad(const NodeDef& node) { return node.op() == "EluGrad"; }
+
 bool IsEnter(const NodeDef& node) {
   const auto& op = node.op();
   return op == "Enter" || op == "RefEnter";
@@ -106,6 +108,8 @@ bool IsIdentity(const NodeDef& node) {
   return op == "Identity" || op == "RefIdentity";
 }
 
+bool IsInvGrad(const NodeDef& node) { return node.op() == "InvGrad"; }
+
 bool IsMatMul(const NodeDef& node) {
   const auto& op = node.op();
   return op == "MatMul" || op == "BatchMatMul" || op == "QuantizedMatMul" ||
@@ -136,7 +140,9 @@ bool IsPlaceholder(const NodeDef& node) {
 
 bool IsRealDiv(const NodeDef& node) { return node.op() == "RealDiv"; }
 
-bool IsReluGrad(const NodeDef& node) { return node.op() == "ReluGrad"; }
+bool IsReciprocalGrad(const NodeDef& node) {
+  return node.op() == "ReciprocalGrad";
+}
 
 bool IsRecv(const NodeDef& node) { return node.op() == "_Recv"; }
 
@@ -146,6 +152,10 @@ bool IsReduction(const NodeDef& node) {
          op == "Mean" || op == "Any" || op == "All";
 }
 
+bool IsReluGrad(const NodeDef& node) { return node.op() == "ReluGrad"; }
+
+bool IsRelu6Grad(const NodeDef& node) { return node.op() == "Relu6Grad"; }
+
 bool IsReshape(const NodeDef& node) { return (node.op() == "Reshape"); }
 
 bool IsRestore(const NodeDef& node) {
@@ -153,16 +163,28 @@ bool IsRestore(const NodeDef& node) {
           node.op() == "RestoreSlice");
 }
 
+bool IsRsqrtGrad(const NodeDef& node) { return node.op() == "RsqrtGrad"; }
+
+bool IsSeluGrad(const NodeDef& node) { return node.op() == "SeluGrad"; }
+
 bool IsSend(const NodeDef& node) { return node.op() == "_Send"; }
 
 bool IsShape(const NodeDef& node) { return node.op() == "Shape"; }
 
 bool IsShapeN(const NodeDef& node) { return node.op() == "ShapeN"; }
 
+bool IsSigmoidGrad(const NodeDef& node) { return node.op() == "SigmoidGrad"; }
+
 bool IsSlice(const NodeDef& node) { return node.op() == "Slice"; }
 
+bool IsSoftplusGrad(const NodeDef& node) { return node.op() == "SoftplusGrad"; }
+
+bool IsSoftsignGrad(const NodeDef& node) { return node.op() == "SoftsignGrad"; }
+
 bool IsSplit(const NodeDef& node) { return node.op() == "Split"; }
 
+bool IsSqrtGrad(const NodeDef& node) { return node.op() == "SqrtGrad"; }
+
 bool IsSquaredDifference(const NodeDef& node) {
   return node.op() == "SquaredDifference";
 }
@@ -183,6 +205,8 @@ bool IsSwitch(const NodeDef& node) {
   return op == "Switch" || op == "RefSwitch";
 }
 
+bool IsTanhGrad(const NodeDef& node) { return node.op() == "TanhGrad"; }
+
 bool IsTranspose(const NodeDef& node) { return node.op() == "Transpose"; }
 
 bool IsVariable(const NodeDef& node) {
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 6a88dc21e0..0e246a661f 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -39,11 +39,13 @@ bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node);
 bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node);
 bool IsDequeueOp(const NodeDef& node);
 bool IsDiv(const NodeDef& node);
+bool IsEluGrad(const NodeDef& node);
 bool IsEnter(const NodeDef& node);
 bool IsExit(const NodeDef& node);
 bool IsFloorMod(const NodeDef& node);
 bool IsFusedBatchNormGradV1(const NodeDef& node);
 bool IsIdentity(const NodeDef& node);
+bool IsInvGrad(const NodeDef& node);
 bool IsMerge(const NodeDef& node);
 bool IsMul(const NodeDef& node);
 bool IsMatMul(const NodeDef& node);
@@ -52,22 +54,31 @@ bool IsPad(const NodeDef& node);
 bool IsNoOp(const NodeDef& node);
 bool IsPlaceholder(const NodeDef& node);
 bool IsRealDiv(const NodeDef& node);
+bool IsRelu6Grad(const NodeDef& node);
 bool IsReluGrad(const NodeDef& node);
+bool IsReciprocalGrad(const NodeDef& node);
 bool IsRecv(const NodeDef& node);
 bool IsReduction(const NodeDef& node);
 bool IsReshape(const NodeDef& node);
 bool IsRestore(const NodeDef& node);
+bool IsRsqrtGrad(const NodeDef& node);
+bool IsSeluGrad(const NodeDef& node);
 bool IsSend(const NodeDef& node);
 bool IsSlice(const NodeDef& node);
 bool IsShape(const NodeDef& node);
 bool IsShapeN(const NodeDef& node);
+bool IsSigmoidGrad(const NodeDef& node);
+bool IsSoftplusGrad(const NodeDef& node);
+bool IsSoftsignGrad(const NodeDef& node);
 bool IsSplit(const NodeDef& node);
+bool IsSqrtGrad(const NodeDef& node);
 bool IsSquaredDifference(const NodeDef& node);
 bool IsSqueeze(const NodeDef& node);
 bool IsStopGradient(const NodeDef& node);
 bool IsSub(const NodeDef& node);
 bool IsSum(const NodeDef& node);
 bool IsSwitch(const NodeDef& node);
+bool IsTanhGrad(const NodeDef& node);
 bool IsTranspose(const NodeDef& node);
 bool IsVariable(const NodeDef& node);
 
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 74c0a14d67..a998d0513e 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -86,6 +86,8 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "Concat",
                                           "ConcatV2",
                                           "Digamma",
+                                          "Elu",
+                                          "EluGrad",
                                           "Erf",
                                           "Erfc",
                                           "Exp",
@@ -109,8 +111,11 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "ReciprocalGrad",
                                           "Relu",
                                           "Relu6",
+                                          "Relu6Grad",
                                           "ReluGrad",
                                           "Rint",
+                                          "Selu",
+                                          "SeluGrad",
                                           "Shape",
                                           "ShapeN",
                                           "Sigmoid",
@@ -119,6 +124,8 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "Sin",
                                           "Sinh",
                                           "Slice",
+                                          "Softplus",
+                                          "SoftplusGrad",
                                           "Split",
                                           "Switch",
                                           "RefMerge",
@@ -180,6 +187,15 @@ bool IsMaxPoolGradV1(const NodeDef& node) {
   return op == "MaxPoolGrad";
 }
 
+bool IsUnaryGrad(const NodeDef& node) {
+  bool is_unary_grad =
+      IsEluGrad(node) || IsInvGrad(node) || IsReciprocalGrad(node) ||
+      IsRelu6Grad(node) || IsReluGrad(node) || IsRsqrtGrad(node) ||
+      IsSeluGrad(node) || IsSigmoidGrad(node) || IsSoftplusGrad(node) ||
+      IsSoftsignGrad(node) || IsSqrtGrad(node) || IsTanhGrad(node);
+  return is_unary_grad;
+}
+
 class GraphProcessor {
  public:
   GraphProcessor(const VirtualPlacer& virtual_placer,
@@ -1241,9 +1257,9 @@ class SplitProcessor : public ConcatProcessor {
   }
 };
 
-class ReluGradProcessor : public AgnosticNodeProcessor {
+class UnaryGradProcessor : public AgnosticNodeProcessor {
  public:
-  explicit ReluGradProcessor(const OptimizeContext& opt_cxt)
+  explicit UnaryGradProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
@@ -1524,8 +1540,6 @@ class DataLayoutOptimizer : GraphProcessor {
             node_processor.reset(new MergeProcessor(opt_cxt));
           } else if (IsPad(*node)) {
             node_processor.reset(new PadProcessor(opt_cxt));
-          } else if (IsReluGrad(*node)) {
-            node_processor.reset(new ReluGradProcessor(opt_cxt));
           } else if (IsSlice(*node)) {
             node_processor.reset(new SliceProcessor(opt_cxt));
           } else if (IsShape(*node) || IsShapeN(*node)) {
@@ -1538,6 +1552,8 @@ class DataLayoutOptimizer : GraphProcessor {
             node_processor.reset(new SumProcessor(opt_cxt));
           } else if (IsSwitch(*node)) {
             node_processor.reset(new SwitchProcessor(opt_cxt));
+          } else if (IsUnaryGrad(*node)) {
+            node_processor.reset(new UnaryGradProcessor(opt_cxt));
           } else {
             node_processor.reset(new AgnosticNodeProcessor(opt_cxt));
           }
-- 
GitLab


From 4eb5e6cfcc360de5ada42f9d03d30ac5eab38601 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Mon, 18 Dec 2017 18:53:35 -0800
Subject: [PATCH 1181/1225] Support more unary ops.

PiperOrigin-RevId: 179498141
---
 .../grappler/optimizers/layout_optimizer.cc   | 36 ++++++++++++++++---
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index a998d0513e..bcf785f272 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -72,19 +72,26 @@ std::set<string> GetOpsFormatSupported() {
 // TODO(yaozhang): enable SumProcessor with auto-tuning. Currently disabled
 // because of the worse performance in some cases.
 std::set<string> GetOpsFormatAgnostic() {
-  std::set<string> ops_format_agnostic = {"Add",
+  std::set<string> ops_format_agnostic = {"Abs",
+                                          "Add",
                                           "AddN",
                                           "Acos",
                                           "Acosh",
+                                          "Angle",
                                           "Asin",
                                           "Asinh",
                                           "Atan",
                                           "Atanh",
+                                          "Bitcast",
+                                          "Cast",
                                           "Ceil",
+                                          "CheckNumerics",
                                           "Cos",
                                           "Cosh",
+                                          "ComplexAbs",
                                           "Concat",
                                           "ConcatV2",
+                                          "Conj",
                                           "Digamma",
                                           "Elu",
                                           "EluGrad",
@@ -93,7 +100,9 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "Exp",
                                           "Expm1",
                                           "Floor",
+                                          "GuaranteeConst",
                                           "Identity",
+                                          "Imag",
                                           "Inv",
                                           "InvGrad",
                                           "IsFinite",
@@ -105,7 +114,10 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "Merge",
                                           "Mul",
                                           "Neg",
+                                          "OnesLike",
                                           "Pad",
+                                          "PreventGradient",
+                                          "Real",
                                           "RealDiv",
                                           "Reciprocal",
                                           "ReciprocalGrad",
@@ -124,10 +136,12 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "Sin",
                                           "Sinh",
                                           "Slice",
+                                          "Snapshot",
                                           "Softplus",
                                           "SoftplusGrad",
                                           "Split",
                                           "Switch",
+                                          "RefIdentity",
                                           "RefMerge",
                                           "RefSwitch",
                                           "Round",
@@ -138,10 +152,12 @@ std::set<string> GetOpsFormatAgnostic() {
                                           "Square",
                                           "SquaredDifference",
                                           "Squeeze",
+                                          "StopGradient",
                                           /*"Sum",*/ "Sub",
                                           "Tan",
                                           "Tanh",
-                                          "TanhGrad"};
+                                          "TanhGrad",
+                                          "ZerosLike"};
   return ops_format_agnostic;
 }
 
@@ -586,11 +602,21 @@ class NodeProcessor : public GraphProcessor {
             if (op == "Transpose") {
               added_node_name = AddPrefixToNodeName(added_node_base_name,
                                                     kTransposeNCHWToNHWC, "-");
-              TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
+              DataType dtype;
+              if (op == "Imag" || op == "Real" || op == "Angle" ||
+                  op == "Conj" || op == "ComplexAbs") {
+                TF_RETURN_IF_ERROR(HasAttribute(*node_, "Tout"));
+                dtype = node_->attr().at("Tout").type();
+              } else if (op == "Bitcast") {
+                TF_RETURN_IF_ERROR(HasAttribute(*node_, "type"));
+                dtype = node_->attr().at("type").type();
+              } else {
+                TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
+                dtype = node_->attr().at("T").type();
+              }
               TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes"));
               AddNodeTranspose(
-                  added_node_name, input, const_name,
-                  node_->attr().at("T").type(),
+                  added_node_name, input, const_name, dtype,
                   node_->attr().at("_output_shapes").list().shape(0), false);
             } else if (op == "DataFormatVecPermute") {
               added_node_name = AddPrefixToNodeName(added_node_base_name,
-- 
GitLab


From 395907baba0494d2a59d7fede5b14da1c177802b Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Dec 2017 20:31:51 -0800
Subject: [PATCH 1182/1225] Add decode_compressed support

This fix tries to address the issue raised in 14887 to add
decode_compressed support.

The API will take a string Tensor (compressed with either ZLIB
or GZIP) and output a string Tensor of the same shape uncompressed.

This fix fixes 14887.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/BUILD                 |   9 ++
 .../core/kernels/decode_compressed_op.cc      | 113 ++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 tensorflow/core/kernels/decode_compressed_op.cc

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index a24335e1ea..88e371d111 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -3497,6 +3497,7 @@ cc_library(
     deps = [
         ":decode_csv_op",
         ":decode_raw_op",
+        ":decode_compressed_op",
         ":example_parsing_ops",
         ":parse_tensor_op",
         ":string_to_number_op",
@@ -3523,6 +3524,14 @@ tf_kernel_library(
     deps = PARSING_DEPS,
 )
 
+tf_kernel_library(
+    name = "decode_compressed_op",
+    prefix = "decode_compressed_op",
+    deps = [
+        "//tensorflow/core:lib_internal",
+    ] + PARSING_DEPS,
+)
+
 tf_kernel_library(
     name = "example_parsing_ops",
     prefix = "example_parsing_ops",
diff --git a/tensorflow/core/kernels/decode_compressed_op.cc b/tensorflow/core/kernels/decode_compressed_op.cc
new file mode 100644
index 0000000000..b0abdf655d
--- /dev/null
+++ b/tensorflow/core/kernels/decode_compressed_op.cc
@@ -0,0 +1,113 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/parse_ops.cc.
+
+#include <algorithm>
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/io/zlib_compression_options.h"
+#include "tensorflow/core/lib/io/zlib_inputstream.h"
+
+namespace tensorflow {
+namespace {
+// Wrap memory buffer into InputStreamInterface
+class MemoryInputStream : public io::InputStreamInterface {
+ public:
+  explicit MemoryInputStream(const char* buffer, size_t length)
+      : buf_(buffer), len_(length), pos_(0) {}
+
+  ~MemoryInputStream(){};
+
+  Status ReadNBytes(int64 bytes_to_read, string* result) override {
+    result->clear();
+    if (bytes_to_read < 0) {
+      return errors::InvalidArgument("Can't read a negative number of bytes: ",
+                                     bytes_to_read);
+    }
+    int64 bytes = bytes_to_read;
+    Status s = Status::OK();
+    if (pos_ + bytes_to_read > len_) {
+      bytes = len_ - pos_;
+      s = errors::OutOfRange("reached end of file");
+    }
+    if (bytes > 0) {
+      result->resize(bytes);
+      memcpy(&(*result)[0], &buf_[pos_], bytes);
+      pos_ += bytes;
+    }
+    return s;
+  }
+
+  int64 Tell() const override { return pos_; }
+
+  Status Reset() override {
+    pos_ = 0;
+    return Status::OK();
+  }
+
+ private:
+  const char* buf_;  // Not owned.
+  int64 len_;
+  int64 pos_ = 0;  // Tracks where we are in the file.
+};
+}  // namespace
+
+class DecodeCompressedOp : public OpKernel {
+ public:
+  explicit DecodeCompressedOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("compression_type", &compression_type_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor* bytes_tensor;
+    OP_REQUIRES_OK(context, context->input("bytes", &bytes_tensor));
+    const auto& bytes_flat = bytes_tensor->flat<string>();
+
+    Tensor* output_tensor = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output("output", bytes_tensor->shape(),
+                                            &output_tensor));
+    auto output_flat = output_tensor->flat<string>();
+    const io::ZlibCompressionOptions zlib_options =
+        compression_type_ == "ZLIB" ? io::ZlibCompressionOptions::DEFAULT()
+                                    : io::ZlibCompressionOptions::GZIP();
+    for (int64 i = 0; i < bytes_flat.size(); i++) {
+      std::unique_ptr<MemoryInputStream> input_stream(
+          new MemoryInputStream(bytes_flat(i).data(), bytes_flat(i).size()));
+      std::unique_ptr<io::ZlibInputStream> zlib_stream(new io::ZlibInputStream(
+          input_stream.get(), static_cast<size_t>(kBufferSize),
+          static_cast<size_t>(kBufferSize), zlib_options));
+      std::string output_string;
+      Status s = zlib_stream->ReadNBytes(INT_MAX, &output_string);
+      OP_REQUIRES(context, (s.ok() || errors::IsOutOfRange(s)), s);
+      output_flat(i) = output_string;
+    }
+  }
+
+ private:
+  enum { kBufferSize = 256 << 10 /* 256 kB */ };
+  std::string compression_type_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("DecodeCompressed").Device(DEVICE_CPU),
+                        DecodeCompressedOp)
+
+}  // namespace tensorflow
-- 
GitLab


From 5f65cc6648be94adf55b9e6b2589ffc669a82c5a Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Dec 2017 20:33:34 -0800
Subject: [PATCH 1183/1225] Add DecodeCompressed ops to parsing_ops.cc

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/parsing_ops.cc | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc
index 40ec792ef8..36d360b7cd 100644
--- a/tensorflow/core/ops/parsing_ops.cc
+++ b/tensorflow/core/ops/parsing_ops.cc
@@ -48,6 +48,20 @@ output: A Tensor with one more dimension than the input `bytes`.  The
   of `bytes` divided by the number of bytes to represent `out_type`.
 )doc");
 
+REGISTER_OP("DecodeCompressed")
+    .Input("bytes: string")
+    .Output("output: string")
+    .Attr("compression_type: string = ''")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Reinterpret the bytes of a string as a vector of numbers.
+
+bytes: All the elements must have the same length.
+compression_type: A scalar containing either (i) the empty string (no
+  compression), (ii) "ZLIB", or (iii) "GZIP".
+output: A Tensor with the same shape as input `bytes`.
+)doc");
+
 REGISTER_OP("ParseExample")
     .Input("serialized: string")
     .Input("names: string")
-- 
GitLab


From 0c64eedbedaccabe1693b35a6a819202b655b8ab Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Dec 2017 20:33:57 -0800
Subject: [PATCH 1184/1225] Add test case for decode_compressed support

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/BUILD          | 13 ++++
 .../kernel_tests/decode_compressed_op_test.py | 60 +++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 tensorflow/python/kernel_tests/decode_compressed_op_test.py

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 31d3bd1b74..640edb26bc 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -294,6 +294,19 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "decode_compressed_op_test",
+    size = "small",
+    srcs = ["decode_compressed_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:parsing_ops",
+    ],
+)
+
 cuda_py_test(
     name = "determinant_op_test",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/decode_compressed_op_test.py b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
new file mode 100644
index 0000000000..2c07692398
--- /dev/null
+++ b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
@@ -0,0 +1,60 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for DecodeRaw op from parsing_ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import gzip
+import sys
+import zlib
+
+from six import StringIO
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.platform import test
+
+
+class DecodeCompressedOpTest(test.TestCase):
+
+  def _compress(self, bytes, compression_type):
+    if compression_type == "ZLIB":
+      return zlib.compress(bytes)
+    else:
+      out = StringIO()
+      with gzip.GzipFile(fileobj=out, mode="w") as f:
+        f.write(bytes)
+      return out.getvalue()
+
+  def testDecompress(self):
+    for compression_type in ["ZLIB", "GZIP"]:
+      with self.test_session():
+        in_bytes = array_ops.placeholder(dtypes.string, shape=[2])
+        decompressed = parsing_ops.decode_compressed(
+            in_bytes, compression_type=compression_type)
+        self.assertEqual([2], decompressed.get_shape().as_list())
+
+        result = decompressed.eval(
+            feed_dict={in_bytes: [self._compress("AaAA", compression_type),
+                                  self._compress("bBbb", compression_type)]})
+        self.assertAllEqual(["AaAA", "bBbb"], result)
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From e2fb0f1c75252929415a52f18f2d6f9dad9f509f Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Dec 2017 20:43:29 -0800
Subject: [PATCH 1185/1225] Add addtional test case to showcase combination of
 decode_compressed + decode_raw

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../kernel_tests/decode_compressed_op_test.py       | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tensorflow/python/kernel_tests/decode_compressed_op_test.py b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
index 2c07692398..c5f842516b 100644
--- a/tensorflow/python/kernel_tests/decode_compressed_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
@@ -55,6 +55,19 @@ class DecodeCompressedOpTest(test.TestCase):
                                   self._compress("bBbb", compression_type)]})
         self.assertAllEqual(["AaAA", "bBbb"], result)
 
+  def testDecompressWithRaw(self):
+    for compression_type in ["ZLIB", "GZIP"]:
+      with self.test_session():
+        in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
+        decompressed = parsing_ops.decode_compressed(
+            in_bytes, compression_type=compression_type)
+        decode = parsing_ops.decode_raw(decompressed, out_type=dtypes.int16)
+
+        result = decode.eval(
+            feed_dict={in_bytes: [self._compress("AaBC", compression_type)]})
+        self.assertAllEqual(
+            [[ord("A") + ord("a") * 256, ord("B") + ord("C") * 256]], result)
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 0864aded79062b7f344e2c90c06e18d037d4988a Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Dec 2017 20:52:23 -0800
Subject: [PATCH 1186/1225] Add NONE ("") compression type support for
 decode_compressed

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../core/kernels/decode_compressed_op.cc      | 38 ++++++++++++-------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/kernels/decode_compressed_op.cc b/tensorflow/core/kernels/decode_compressed_op.cc
index b0abdf655d..3f0c0aef55 100644
--- a/tensorflow/core/kernels/decode_compressed_op.cc
+++ b/tensorflow/core/kernels/decode_compressed_op.cc
@@ -74,6 +74,11 @@ class DecodeCompressedOp : public OpKernel {
       : OpKernel(context) {
     OP_REQUIRES_OK(context,
                    context->GetAttr("compression_type", &compression_type_));
+    OP_REQUIRES(context,
+                (compression_type_ == "" || compression_type_ == "ZLIB" ||
+                 compression_type_ == "GZIP"),
+                errors::InvalidArgument(
+                    "Only ZLIB, GZIP or NONE are supported compressions"));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -86,19 +91,26 @@ class DecodeCompressedOp : public OpKernel {
                    context->allocate_output("output", bytes_tensor->shape(),
                                             &output_tensor));
     auto output_flat = output_tensor->flat<string>();
-    const io::ZlibCompressionOptions zlib_options =
-        compression_type_ == "ZLIB" ? io::ZlibCompressionOptions::DEFAULT()
-                                    : io::ZlibCompressionOptions::GZIP();
-    for (int64 i = 0; i < bytes_flat.size(); i++) {
-      std::unique_ptr<MemoryInputStream> input_stream(
-          new MemoryInputStream(bytes_flat(i).data(), bytes_flat(i).size()));
-      std::unique_ptr<io::ZlibInputStream> zlib_stream(new io::ZlibInputStream(
-          input_stream.get(), static_cast<size_t>(kBufferSize),
-          static_cast<size_t>(kBufferSize), zlib_options));
-      std::string output_string;
-      Status s = zlib_stream->ReadNBytes(INT_MAX, &output_string);
-      OP_REQUIRES(context, (s.ok() || errors::IsOutOfRange(s)), s);
-      output_flat(i) = output_string;
+    if (compression_type_ == "") {
+      for (int64 i = 0; i < bytes_flat.size(); i++) {
+        output_flat(i) = bytes_flat(i);
+      }
+    } else {
+      const io::ZlibCompressionOptions zlib_options =
+          compression_type_ == "ZLIB" ? io::ZlibCompressionOptions::DEFAULT()
+                                      : io::ZlibCompressionOptions::GZIP();
+      for (int64 i = 0; i < bytes_flat.size(); i++) {
+        std::unique_ptr<MemoryInputStream> input_stream(
+            new MemoryInputStream(bytes_flat(i).data(), bytes_flat(i).size()));
+        std::unique_ptr<io::ZlibInputStream> zlib_stream(
+            new io::ZlibInputStream(
+                input_stream.get(), static_cast<size_t>(kBufferSize),
+                static_cast<size_t>(kBufferSize), zlib_options));
+        std::string output_string;
+        Status s = zlib_stream->ReadNBytes(INT_MAX, &output_string);
+        OP_REQUIRES(context, (s.ok() || errors::IsOutOfRange(s)), s);
+        output_flat(i) = output_string;
+      }
     }
   }
 
-- 
GitLab


From 6e3b4092a4cecb38c61c79397c161cc7249d1c68 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Dec 2017 20:52:35 -0800
Subject: [PATCH 1187/1225] Add test case for NONE ("") compression type with
 decode_compressed

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../python/kernel_tests/decode_compressed_op_test.py      | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/kernel_tests/decode_compressed_op_test.py b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
index c5f842516b..c10e3f1c33 100644
--- a/tensorflow/python/kernel_tests/decode_compressed_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
@@ -34,7 +34,9 @@ from tensorflow.python.platform import test
 class DecodeCompressedOpTest(test.TestCase):
 
   def _compress(self, bytes, compression_type):
-    if compression_type == "ZLIB":
+    if compression_type == "":
+      return bytes
+    elif compression_type == "ZLIB":
       return zlib.compress(bytes)
     else:
       out = StringIO()
@@ -43,7 +45,7 @@ class DecodeCompressedOpTest(test.TestCase):
       return out.getvalue()
 
   def testDecompress(self):
-    for compression_type in ["ZLIB", "GZIP"]:
+    for compression_type in ["ZLIB", "GZIP", ""]:
       with self.test_session():
         in_bytes = array_ops.placeholder(dtypes.string, shape=[2])
         decompressed = parsing_ops.decode_compressed(
@@ -56,7 +58,7 @@ class DecodeCompressedOpTest(test.TestCase):
         self.assertAllEqual(["AaAA", "bBbb"], result)
 
   def testDecompressWithRaw(self):
-    for compression_type in ["ZLIB", "GZIP"]:
+    for compression_type in ["ZLIB", "GZIP", ""]:
       with self.test_session():
         in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
         decompressed = parsing_ops.decode_compressed(
-- 
GitLab


From 0d4530104957dd9e3b7ef6cc32d436f35a3e168c Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 5 Dec 2017 07:12:08 -0800
Subject: [PATCH 1188/1225] Buildifier tensorflow/core/kernels/BUILD

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 88e371d111..ae39c4522d 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -3495,9 +3495,9 @@ tf_kernel_library(
 cc_library(
     name = "parsing",
     deps = [
+        ":decode_compressed_op",
         ":decode_csv_op",
         ":decode_raw_op",
-        ":decode_compressed_op",
         ":example_parsing_ops",
         ":parse_tensor_op",
         ":string_to_number_op",
-- 
GitLab


From cfc8e70b6018174570289373929256817eebeae6 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 18 Dec 2017 21:24:14 +0000
Subject: [PATCH 1189/1225] Update docstring.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/parsing_ops.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc
index 36d360b7cd..2a5f037373 100644
--- a/tensorflow/core/ops/parsing_ops.cc
+++ b/tensorflow/core/ops/parsing_ops.cc
@@ -54,12 +54,13 @@ REGISTER_OP("DecodeCompressed")
     .Attr("compression_type: string = ''")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
-Reinterpret the bytes of a string as a vector of numbers.
+Decompress the bytes of a string to the output string.
 
-bytes: All the elements must have the same length.
+bytes: A Tensor of string which is compressed.
 compression_type: A scalar containing either (i) the empty string (no
   compression), (ii) "ZLIB", or (iii) "GZIP".
-output: A Tensor with the same shape as input `bytes`.
+output: A Tensor with the same shape as input `bytes`, uncompressed
+  from bytes.
 )doc");
 
 REGISTER_OP("ParseExample")
-- 
GitLab


From 1203dc0832133250de970ce10833ba43b71daf7b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Dec 2017 19:07:23 -0800
Subject: [PATCH 1190/1225] [XLA] Hlo parser: don't crash if no computation is
 specified as ENTRY. Also disallow multiple entry computations and multiple
 root intructions.

PiperOrigin-RevId: 179498839
---
 tensorflow/compiler/xla/service/hlo_module.h  |  4 +
 .../compiler/xla/tools/parser/README.md       |  4 +
 .../compiler/xla/tools/parser/hlo_parser.cc   | 73 ++++++++++++------
 .../xla/tools/parser/hlo_parser_test.cc       | 77 +++++++++++++++++++
 4 files changed, 136 insertions(+), 22 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index f37885d043..d3bb46bffc 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -98,6 +98,10 @@ class HloModule {
     return config_.mutable_entry_computation_layout();
   }
 
+  ComputationLayout entry_computation_layout() const {
+    return config_.entry_computation_layout();
+  }
+
   const VersionedComputationHandle& entry_computation_handle() const {
     return entry_computation_handle_;
   }
diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md
index 93bbc7d659..2e329cc513 100644
--- a/tensorflow/compiler/xla/tools/parser/README.md
+++ b/tensorflow/compiler/xla/tools/parser/README.md
@@ -5,6 +5,8 @@ hlo_module
   : 'HloModule' name computations
   ;
 
+/* If no computation is marked as ENTRY, the last computation will be the entry
+computation of the module.*/
 computations
   : computation
   | computation computations
@@ -17,6 +19,8 @@ computation
   | name instruction_list
   ;
 
+/* If no instruction is marked as ROOT, the last instruction will be the root of
+its computation. */
 instruction_list
   : '{' instruction_list1 '}'
   ;
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 06812f677d..68fb9dd9ec 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -59,7 +59,7 @@ class HloParser {
   // ParseXXX returns false if an error occurred.
   bool ParseHloModule();
   bool ParseComputations();
-  bool ParseComputation();
+  bool ParseComputation(HloComputation** entry_computation);
   bool ParseInstructionList(HloComputation::Builder* builder,
                             string* root_name);
   bool ParseInstruction(HloComputation::Builder* builder, string* root_name);
@@ -218,6 +218,7 @@ class HloParser {
 
   HloLexer lexer_;
   std::unique_ptr<HloModule> module_;
+  std::vector<std::unique_ptr<HloComputation>> computations_;
   const HloModuleConfig config_;
   std::vector<string> error_;
 };
@@ -266,17 +267,52 @@ bool HloParser::ParseHloModule() {
 
 // computations ::= (computation)+
 bool HloParser::ParseComputations() {
+  HloComputation* entry_computation = nullptr;
   do {
-    if (!ParseComputation()) {
+    if (!ParseComputation(&entry_computation)) {
       return false;
     }
   } while (lexer_.GetKind() != TokKind::kEof);
+
+  for (int i = 0; i < computations_.size(); i++) {
+    // If entry_computation is not nullptr, it means the computation it pointed
+    // to is marked with "ENTRY"; otherwise, no computation is marked with
+    // "ENTRY", and we use the last computation as the entry computation. We
+    // add the non-entry computations as embedded computations to the module.
+    if ((entry_computation != nullptr &&
+         computations_[i].get() != entry_computation) ||
+        (entry_computation == nullptr && i != computations_.size() - 1)) {
+      module_->AddEmbeddedComputation(std::move(computations_[i]));
+      continue;
+    }
+    auto computation =
+        module_->AddEntryComputation(std::move(computations_[i]));
+    // The parameters and result layouts were set to default layout. Here we
+    // set the layouts to what the hlo text says.
+    for (int p = 0; p < computation->num_parameters(); p++) {
+      const Shape& param_shape = computation->parameter_instruction(p)->shape();
+      if (param_shape.has_layout()) {
+        module_->mutable_entry_computation_layout()
+            ->mutable_parameter_layout(p)
+            ->ResetLayout(param_shape.layout());
+      }
+    }
+    const Shape& result_shape = computation->root_instruction()->shape();
+    if (result_shape.has_layout()) {
+      module_->mutable_entry_computation_layout()
+          ->mutable_result_layout()
+          ->ResetLayout(result_shape.layout());
+    }
+  }
+
   return true;
 }
 
 // computation ::= ('ENTRY')? name (param_list_to_shape)? instruction_list
-bool HloParser::ParseComputation() {
+bool HloParser::ParseComputation(HloComputation** entry_computation) {
+  LocTy maybe_entry_loc = lexer_.GetLoc();
   const bool is_entry_computation = EatIfPresent(TokKind::kw_ENTRY);
+
   string name;
   LocTy name_loc = lexer_.GetLoc();
   if (!ParseName(&name)) {
@@ -307,10 +343,8 @@ bool HloParser::ParseComputation() {
   // Now root can be either an existing instruction or a nullptr. If it's a
   // nullptr, the implementation of Builder will set the last instruction as
   // root instruction.
-  HloComputation* computation =
-      is_entry_computation
-          ? module_->AddEntryComputation(builder->Build(root))
-          : module_->AddEmbeddedComputation(builder->Build(root));
+  computations_.emplace_back(builder->Build(root));
+  HloComputation* computation = computations_.back().get();
 
   if (!root) {
     root = computation->root_instruction();
@@ -328,24 +362,13 @@ bool HloParser::ParseComputation() {
                root_name, ", ", ShapeUtil::HumanString(root->shape())));
   }
 
-  // The parameters and result layouts were set to default layout. Here we set
-  // the layouts to what the hlo text says.
   if (is_entry_computation) {
-    for (int i = 0; i < computation->num_parameters(); i++) {
-      const Shape& param_shape = computation->parameter_instruction(i)->shape();
-      if (param_shape.has_layout()) {
-        module_->mutable_entry_computation_layout()
-            ->mutable_parameter_layout(i)
-            ->ResetLayout(param_shape.layout());
-      }
-    }
-    const Shape& result_shape = computation->root_instruction()->shape();
-    if (result_shape.has_layout()) {
-      module_->mutable_entry_computation_layout()
-          ->mutable_result_layout()
-          ->ResetLayout(result_shape.layout());
+    if (*entry_computation != nullptr) {
+      return Error(maybe_entry_loc, "expects only one ENTRY");
     }
+    *entry_computation = computation;
   }
+
   return AddComputation(name, computation, name_loc);
 }
 
@@ -373,6 +396,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
   Shape shape;
   HloOpcode opcode;
   std::vector<HloInstruction*> operands;
+
+  LocTy maybe_root_loc = lexer_.GetLoc();
   bool is_root = EatIfPresent(TokKind::kw_ROOT);
 
   const LocTy name_loc = lexer_.GetLoc();
@@ -381,7 +406,11 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
       !ParseShape(&shape) || !ParseOpcode(&opcode)) {
     return false;
   }
+
   if (is_root) {
+    if (!root_name->empty()) {
+      return Error(maybe_root_loc, "one computation should have only one ROOT");
+    }
     *root_name = name;
   }
 
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 74a0e35839..e6f7ee7c08 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -1107,6 +1107,83 @@ ENTRY %CustomCall () -> f32[1] {
                   "with that of its root instruction foo, f32[1,2,3]");
 }
 
+TEST_F(HloParserTest, EntryComputationWithLayout) {
+  const string original = R"(HloModule layout:
+add_F32.v3 {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT add = f32[] add(lhs, rhs)
+}
+
+ENTRY %Reduce (input: f32[8,16,256]) -> f32[8,16] {
+  input = f32[8,16,256]{0,1,2} parameter(0)
+  constant = f32[] constant(0)
+  ROOT reduce = f32[8,16]{0,1} reduce(input, constant), dimensions={2}, to_apply=add_F32.v3
+})";
+
+  auto module = Parse(original);
+  TF_ASSERT_OK(module.status());
+  auto program_layout = module.ValueOrDie()->entry_computation_layout();
+  ASSERT_EQ(program_layout.parameter_count(), 1);
+  auto param_layout = program_layout.parameter_layout(0).layout();
+  auto result_layout = program_layout.result_layout().layout();
+  EXPECT_TRUE(
+      LayoutUtil::Equal(LayoutUtil::MakeLayout({0, 1, 2}), param_layout))
+      << "actual layout of parameter(0) is "
+      << LayoutUtil::HumanString(param_layout);
+  EXPECT_TRUE(LayoutUtil::Equal(LayoutUtil::MakeLayout({0, 1}), result_layout))
+      << "actual layout of result is "
+      << LayoutUtil::HumanString(result_layout);
+}
+
+TEST_F(HloParserTest, NoEntry) {
+  const string original = R"(HloModule no_entry:
+c1 {
+  const1 = f32[1]{0} constant({12345})
+}
+c2 {
+  const2 = f32[1]{0} constant({67890})
+})";
+  auto module = Parse(original);
+  TF_ASSERT_OK(module.status());
+  EXPECT_EQ(module.ValueOrDie()->entry_computation()->name(), "c2");
+}
+
+TEST_F(HloParserTest, NoRoot) {
+  const string original = R"(HloModule no_root:
+ENTRY consts {
+  first = f32[1]{0} constant({12345})
+  last = f32[1]{0} constant({67890})
+})";
+  auto module = Parse(original);
+  TF_ASSERT_OK(module.status());
+  EXPECT_EQ(
+      module.ValueOrDie()->entry_computation()->root_instruction()->name(),
+      "last");
+}
+
+TEST_F(HloParserTest, MultipleEntries) {
+  const string original = R"(HloModule multiple_entries:
+ENTRY c1 {
+  const1 = f32[1]{0} constant({12345})
+}
+ENTRY c2 {
+  const2 = f32[1]{0} constant({67890})
+})";
+  ExpectHasSubstr(Parse(original).status().error_message(),
+                  "expects only one ENTRY");
+}
+
+TEST_F(HloParserTest, MultipleRoots) {
+  const string original = R"(HloModule multiple_roots:
+ENTRY consts {
+  ROOT const1 = f32[1]{0} constant({12345})
+  ROOT const2 = f32[1]{0} constant({12345})
+})";
+  ExpectHasSubstr(Parse(original).status().error_message(),
+                  "one computation should have only one ROOT");
+}
+
 }  // namespace
 }  // namespace tools
 }  // namespace xla
-- 
GitLab


From c26daab335366e87bd495be5e61630716f92163b Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 18 Dec 2017 21:49:56 +0000
Subject: [PATCH 1191/1225] Fix python 3 error `a bytes-like object is
 required, not 'str'`

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../kernel_tests/decode_compressed_op_test.py      | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/kernel_tests/decode_compressed_op_test.py b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
index c10e3f1c33..0deb24136b 100644
--- a/tensorflow/python/kernel_tests/decode_compressed_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
@@ -23,7 +23,7 @@ import gzip
 import sys
 import zlib
 
-from six import StringIO
+from six import BytesIO
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
@@ -39,8 +39,8 @@ class DecodeCompressedOpTest(test.TestCase):
     elif compression_type == "ZLIB":
       return zlib.compress(bytes)
     else:
-      out = StringIO()
-      with gzip.GzipFile(fileobj=out, mode="w") as f:
+      out = BytesIO()
+      with gzip.GzipFile(fileobj=out, mode="wb") as f:
         f.write(bytes)
       return out.getvalue()
 
@@ -53,9 +53,9 @@ class DecodeCompressedOpTest(test.TestCase):
         self.assertEqual([2], decompressed.get_shape().as_list())
 
         result = decompressed.eval(
-            feed_dict={in_bytes: [self._compress("AaAA", compression_type),
-                                  self._compress("bBbb", compression_type)]})
-        self.assertAllEqual(["AaAA", "bBbb"], result)
+            feed_dict={in_bytes: [self._compress(b"AaAA", compression_type),
+                                  self._compress(b"bBbb", compression_type)]})
+        self.assertAllEqual([b"AaAA", b"bBbb"], result)
 
   def testDecompressWithRaw(self):
     for compression_type in ["ZLIB", "GZIP", ""]:
@@ -66,7 +66,7 @@ class DecodeCompressedOpTest(test.TestCase):
         decode = parsing_ops.decode_raw(decompressed, out_type=dtypes.int16)
 
         result = decode.eval(
-            feed_dict={in_bytes: [self._compress("AaBC", compression_type)]})
+            feed_dict={in_bytes: [self._compress(b"AaBC", compression_type)]})
         self.assertAllEqual(
             [[ord("A") + ord("a") * 256, ord("B") + ord("C") * 256]], result)
 
-- 
GitLab


From ac55745a63aa3d16c4bca1b9609b63b46f28bf76 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 19 Dec 2017 04:01:11 +0000
Subject: [PATCH 1192/1225] Update api_defs

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../base_api/api_def_DecodeCompressed.pbtxt   | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt

diff --git a/tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt b/tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt
new file mode 100644
index 0000000000..63ff608630
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt
@@ -0,0 +1,24 @@
+op {
+  graph_op_name: "DecodeCompressed"
+  in_arg {
+    name: "bytes"
+    description: <<END
+A Tensor of string which is compressed.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+A Tensor with the same shape as input `bytes`, uncompressed
+from bytes.
+END
+  }
+  attr {
+    name: "compression_type"
+    description: <<END
+A scalar containing either (i) the empty string (no
+compression), (ii) "ZLIB", or (iii) "GZIP".
+END
+  }
+  summary: "Decompress the bytes of a string to the output string."
+}
-- 
GitLab


From 14cb8e14a8fb1e78e2ce623e4198972762e6e253 Mon Sep 17 00:00:00 2001
From: Guangda Lai <laigd@google.com>
Date: Mon, 18 Dec 2017 20:16:08 -0800
Subject: [PATCH 1193/1225] Added virtual gpu support.

PiperOrigin-RevId: 179504116
---
 tensorflow/core/BUILD                         |   5 +
 .../common_runtime/gpu/gpu_bfc_allocator.cc   |  17 +-
 .../common_runtime/gpu/gpu_bfc_allocator.h    |  10 +-
 .../gpu/gpu_bfc_allocator_test.cc             |  31 +-
 .../gpu/gpu_cudamalloc_allocator.cc           |   6 +-
 .../gpu/gpu_cudamalloc_allocator.h            |   4 +-
 .../common_runtime/gpu/gpu_debug_allocator.cc |  11 +-
 .../common_runtime/gpu/gpu_debug_allocator.h  |   7 +-
 .../gpu/gpu_debug_allocator_test.cc           |  53 +--
 .../core/common_runtime/gpu/gpu_device.cc     | 325 ++++++++++++------
 .../core/common_runtime/gpu/gpu_device.h      |  42 ++-
 .../common_runtime/gpu/gpu_device_factory.cc  |  14 +-
 .../common_runtime/gpu/gpu_device_test.cc     | 189 ++++++++++
 tensorflow/core/common_runtime/gpu/gpu_id.h   |  88 +++++
 .../core/common_runtime/gpu/gpu_id_utils.cc   |  74 ++++
 .../core/common_runtime/gpu/gpu_id_utils.h    |  61 ++++
 .../common_runtime/gpu/gpu_id_utils_test.cc   |  55 +++
 .../core/common_runtime/gpu/process_state.cc  |  63 ++--
 .../core/common_runtime/gpu/process_state.h   |  10 +-
 tensorflow/core/protobuf/config.proto         |  70 +++-
 tensorflow/python/BUILD                       |  13 +
 tensorflow/python/client/virtual_gpu_test.py  | 245 +++++++++++++
 .../golden/tensorflow.-g-p-u-options.pbtxt    |   8 +
 .../tools/api/tests/api_compatibility_test.py |   1 +
 24 files changed, 1164 insertions(+), 238 deletions(-)
 create mode 100644 tensorflow/core/common_runtime/gpu/gpu_device_test.cc
 create mode 100644 tensorflow/core/common_runtime/gpu/gpu_id.h
 create mode 100644 tensorflow/core/common_runtime/gpu/gpu_id_utils.cc
 create mode 100644 tensorflow/core/common_runtime/gpu/gpu_id_utils.h
 create mode 100644 tensorflow/core/common_runtime/gpu/gpu_id_utils_test.cc
 create mode 100644 tensorflow/python/client/virtual_gpu_test.py

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index de074cc33c..c855bb05bc 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2172,6 +2172,8 @@ GPU_RUNTIME_HEADERS = [
     "common_runtime/gpu/gpu_cudamalloc_allocator.h",
     "common_runtime/gpu/gpu_debug_allocator.h",
     "common_runtime/gpu/gpu_device.h",
+    "common_runtime/gpu/gpu_id.h",
+    "common_runtime/gpu/gpu_id_utils.h",
     "common_runtime/gpu/gpu_init.h",
     "common_runtime/gpu/gpu_managed_allocator.h",
     "common_runtime/gpu/gpu_stream_util.h",
@@ -2189,6 +2191,7 @@ tf_cuda_library(
         "common_runtime/gpu/gpu_debug_allocator.cc",
         "common_runtime/gpu/gpu_device.cc",
         "common_runtime/gpu/gpu_device_factory.cc",
+        "common_runtime/gpu/gpu_id_utils.cc",
         "common_runtime/gpu/gpu_managed_allocator.cc",
         "common_runtime/gpu/gpu_stream_util.cc",
         "common_runtime/gpu/gpu_util.cc",
@@ -2800,6 +2803,8 @@ tf_cc_tests_gpu(
     size = "small",
     srcs = glob(["user_ops/**/*_test.cc"]) + [
         "common_runtime/gpu/gpu_bfc_allocator_test.cc",
+        "common_runtime/gpu/gpu_device_test.cc",
+        "common_runtime/gpu/gpu_id_utils_test.cc",
         "common_runtime/gpu/gpu_event_mgr_test.cc",
         "common_runtime/gpu/pool_allocator_test.cc",
     ],
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index 646cd88a3a..2f7fbbbec2 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -15,20 +15,23 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 
 namespace tensorflow {
 
-GPUBFCAllocator::GPUBFCAllocator(int device_id, size_t total_memory)
-    : GPUBFCAllocator(device_id, total_memory, GPUOptions()) {}
+GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                                 const string& name)
+    : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {}
 
-GPUBFCAllocator::GPUBFCAllocator(int device_id, size_t total_memory,
-                                 const GPUOptions& gpu_options)
+GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                                 const GPUOptions& gpu_options,
+                                 const string& name)
     : BFCAllocator(
           new GPUMemAllocator(
-              GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie()),
-          total_memory, gpu_options.allow_growth(),
-          strings::StrCat("GPU_", device_id, "_bfc")) {}
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie()),
+          total_memory, gpu_options.allow_growth(), name) {}
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index 2c23340b6d..c2c0b020c7 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -23,6 +23,7 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/allocator_retry.h"
 #include "tensorflow/core/common_runtime/bfc_allocator.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
@@ -36,11 +37,12 @@ namespace tensorflow {
 // algorithm.
 class GPUBFCAllocator : public BFCAllocator {
  public:
-  // 'device_id' refers to the StreamExecutor ID of the device within
+  // 'cuda_gpu_id' refers to the ID of the GPU device within
   // the process and must reference a valid ID in the process.
-  GPUBFCAllocator(int device_id, size_t total_memory);
-  GPUBFCAllocator(int device_id, size_t total_memory,
-                  const GPUOptions& gpu_options);
+  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                  const string& name);
+  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                  const GPUOptions& gpu_options, const string& name);
   virtual ~GPUBFCAllocator() {}
 
   TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator);
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
index 00ef130d34..9e4b617d2b 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -45,7 +46,7 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use,
 }
 
 TEST(GPUBFCAllocatorTest, NoDups) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   // Allocate a lot of raw pointers
@@ -74,7 +75,7 @@ TEST(GPUBFCAllocatorTest, NoDups) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   // Allocate 256 raw pointers of sizes between 100 bytes and about
   // a meg
   random::PhiloxRandom philox(123, 17);
@@ -132,7 +133,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
 }
 
 TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   float* first_ptr = a.Allocate<float>(1024);
@@ -166,18 +167,18 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   float* ptr = a.Allocate<float>(0);
   EXPECT_EQ(nullptr, ptr);
 }
 
 TEST(GPUBFCAllocatorTest, TracksSizes) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
@@ -186,7 +187,7 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
 
 TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) {
   // Configure a 1MiB byte limit
-  GPUBFCAllocator a(0, 1 << 20);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc");
 
   float* first_ptr = a.Allocate<float>(1 << 6);
   float* second_ptr = a.Allocate<float>(1 << 20);
@@ -201,7 +202,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
   options.set_allow_growth(true);
 
   // Max of 2GiB, but starts out small.
-  GPUBFCAllocator a(0, 1LL << 31, options);
+  GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc");
 
   // Allocate 10 raw pointers of sizes between 100 bytes and about
   // 64 megs.
@@ -262,8 +263,8 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
 }
 
 TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
-  GPUBFCAllocator a(0, 1UL << 60);
-  GPUBFCAllocator b(0, 1UL << 60);
+  GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
+  GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
   void* amem = a.AllocateRaw(1, 1);
   void* bmem = b.AllocateRaw(1, 1 << 30);
   a.DeallocateRaw(amem);
@@ -271,7 +272,7 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
 }
 
 static void BM_Allocation(int iters) {
-  GPUBFCAllocator a(0, 1uLL << 33);
+  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<size_t> sizes = {256,        4096,      16384,    524288,
                                512,        1048576,   10485760, 104857600,
@@ -287,7 +288,7 @@ static void BM_Allocation(int iters) {
 BENCHMARK(BM_Allocation);
 
 static void BM_AllocationThreaded(int iters, int num_threads) {
-  GPUBFCAllocator a(0, 1uLL << 33);
+  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
   thread::ThreadPool pool(Env::Default(), "test", num_threads);
   std::atomic_int_fast32_t count(iters);
   mutex done_lock;
@@ -323,7 +324,7 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16);
 // A more complex benchmark that defers deallocation of an object for
 // "delay" allocations.
 static void BM_AllocationDelayed(int iters, int delay) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<int> sizes = {256, 4096, 16384, 4096, 512, 1024, 1024};
   int size_index = 0;
@@ -361,7 +362,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   // only methods inside this class can access private members of BFCAllocator.
 
   void TestBinDebugInfo() {
-    GPUBFCAllocator a(0, 1 << 30);
+    GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
 
     std::vector<void*> initial_ptrs;
     std::vector<size_t> initial_ptrs_allocated_sizes;
@@ -439,7 +440,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   }
 
   void TestLog2FloorNonZeroSlow() {
-    GPUBFCAllocator a(0 /* device_id */, 1 /* total_memory */);
+    GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc");
     EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0));
     EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1));
     EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2));
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
index 70c2d96763..7c09451a8a 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
@@ -20,6 +20,8 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h"
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/platform/stream_executor.h"
 
@@ -28,9 +30,9 @@ namespace gpu = ::perftools::gputools;
 namespace tensorflow {
 
 GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator,
-                                               int device_id)
+                                               CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
-  stream_exec_ = GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+  stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 }
 
 GPUcudaMallocAllocator::~GPUcudaMallocAllocator() { delete base_allocator_; }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
index 23552b809a..208697361d 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <memory>
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor.h"
@@ -30,7 +31,8 @@ namespace tensorflow {
 // allocated memory.
 class GPUcudaMallocAllocator : public VisitableAllocator {
  public:
-  explicit GPUcudaMallocAllocator(VisitableAllocator* allocator, int device_id);
+  explicit GPUcudaMallocAllocator(VisitableAllocator* allocator,
+                                  CudaGpuId cuda_gpu_id);
   ~GPUcudaMallocAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
index 6480f0b256..45e97fdbf0 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
@@ -16,6 +16,9 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h"
 
 #include <vector>
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/platform/stream_executor.h"
 
@@ -75,9 +78,9 @@ void InitMask(perftools::gputools::StreamExecutor* exec, void* ptr,
 // GPUDebugAllocator
 // -----------------------------------------------------------------------------
 GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator,
-                                     int device_id)
+                                     CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
-  stream_exec_ = GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+  stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 }
 
 GPUDebugAllocator::~GPUDebugAllocator() { delete base_allocator_; }
@@ -154,9 +157,9 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) {
 // GPUNanResetAllocator
 // -----------------------------------------------------------------------------
 GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator,
-                                           int device_id)
+                                           CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
-  stream_exec_ = GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+  stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 }
 
 GPUNanResetAllocator::~GPUNanResetAllocator() { delete base_allocator_; }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
index 9fbaf64f8a..a990f5ce7c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <unordered_map>
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor.h"
@@ -32,7 +33,8 @@ namespace tensorflow {
 // allocated memory.
 class GPUDebugAllocator : public VisitableAllocator {
  public:
-  explicit GPUDebugAllocator(VisitableAllocator* allocator, int device_id);
+  explicit GPUDebugAllocator(VisitableAllocator* allocator,
+                             CudaGpuId cuda_gpu_id);
   ~GPUDebugAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
@@ -62,7 +64,8 @@ class GPUDebugAllocator : public VisitableAllocator {
 // user forgets to initialize the memory.
 class GPUNanResetAllocator : public VisitableAllocator {
  public:
-  explicit GPUNanResetAllocator(VisitableAllocator* allocator, int device_id);
+  explicit GPUNanResetAllocator(VisitableAllocator* allocator,
+                                CudaGpuId cuda_gpu_id);
   ~GPUNanResetAllocator() override;
   string Name() override { return "gpu_nan_reset"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
index 14d8591731..ca4b93815c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
@@ -21,6 +21,8 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/logging.h"
@@ -33,10 +35,10 @@ namespace gpu = ::perftools::gputools;
 namespace tensorflow {
 
 TEST(GPUDebugAllocatorTest, OverwriteDetection_None) {
-  const int device_id = 0;
-  GPUDebugAllocator a(new GPUBFCAllocator(device_id, 1 << 30), device_id);
-  auto stream_exec =
-      GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+  const CudaGpuId cuda_gpu_id(0);
+  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                      cuda_gpu_id);
+  auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
   for (int s : {8}) {
     std::vector<int64> cpu_array(s);
@@ -57,11 +59,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) {
   for (int s : {8, 211}) {
     EXPECT_DEATH(
         {
-          const int device_id = 0;
-          GPUDebugAllocator a(new GPUBFCAllocator(device_id, 1 << 30),
-                              device_id);
+          const CudaGpuId cuda_gpu_id(0);
+          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                              cuda_gpu_id);
           auto stream_exec =
-              GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
           std::vector<int64> cpu_array(s);
           memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
@@ -90,11 +92,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
   for (int s : {8, 22}) {
     EXPECT_DEATH(
         {
-          const int device_id = 0;
-          GPUDebugAllocator a(new GPUBFCAllocator(device_id, 1 << 30),
-                              device_id);
+          const CudaGpuId cuda_gpu_id(0);
+          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                              cuda_gpu_id);
           auto stream_exec =
-              GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
           std::vector<int64> cpu_array(s);
           memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
@@ -120,10 +122,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
 }
 
 TEST(GPUDebugAllocatorTest, ResetToNan) {
-  const int device_id = 0;
-  GPUNanResetAllocator a(new GPUBFCAllocator(device_id, 1 << 30), device_id);
-  auto stream_exec =
-      GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+  const CudaGpuId cuda_gpu_id(0);
+  GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                         cuda_gpu_id);
+  auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
   std::vector<float> cpu_array(1024);
   std::vector<float> cpu_array_result(1024);
@@ -160,13 +162,13 @@ TEST(GPUDebugAllocatorTest, ResetToNan) {
 }
 
 TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
-  const int device_id = 0;
+  const CudaGpuId cuda_gpu_id(0);
   // NaN reset must be the outer-most allocator.
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(device_id, 1 << 30), device_id),
-      device_id);
-  auto stream_exec =
-      GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                            cuda_gpu_id),
+      cuda_gpu_id);
+  auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
   std::vector<float> cpu_array(1024);
   std::vector<float> cpu_array_result(1024);
@@ -203,13 +205,18 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
 }
 
 TEST(GPUDebugAllocatorTest, TracksSizes) {
-  GPUDebugAllocator a(new GPUBFCAllocator(0, 1 << 30), 0);
+  const CudaGpuId cuda_gpu_id(0);
+  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                      cuda_gpu_id);
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
+  const CudaGpuId cuda_gpu_id(0);
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(0, 1 << 30), 0), 0);
+      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                            cuda_gpu_id),
+      cuda_gpu_id);
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 646568a3e5..1390810c28 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -32,6 +32,8 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_stream_util.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
@@ -85,7 +87,8 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
   }
   ~EigenCudaStreamDevice() override {}
   void Reinitialize(OpKernelContext* context, const cudaStream_t* cuda_stream,
-                    int gpu_id, ::tensorflow::Allocator* alloc, char* scratch) {
+                    TfGpuId tf_gpu_id, ::tensorflow::Allocator* alloc,
+                    char* scratch) {
     if (LogMemory::IsEnabled()) {
       operation_ = context->op_kernel().name() + "/EigenAllocator";
       step_id_ = context->step_id();
@@ -96,7 +99,8 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
         reinterpret_cast<unsigned int*>(scratch + Eigen::kCudaScratchSize);
     stream_ = cuda_stream;
     allocator_ = alloc;
-    device_prop_ = &Eigen::m_deviceProperties[gpu_id];
+    const int cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id).value();
+    device_prop_ = &Eigen::m_deviceProperties[cuda_gpu_id];
   }
 
   const cudaStream_t& stream() const override { return *stream_; }
@@ -186,13 +190,15 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
 class BaseGPUDevice::StreamGroupFactory {
  public:
   // Returns the unique stream group for use with the stream defined by
-  // {gpu_id, stream_group_within_gpu}, creating it if it does not yet exist.
+  // {tf_gpu_id, stream_group_within_gpu}, creating it if it does not yet
+  // exist.
   // This function is thread safe.
-  BaseGPUDevice::StreamGroup* GetOrCreate(int gpu_id,
+  BaseGPUDevice::StreamGroup* GetOrCreate(TfGpuId tf_gpu_id,
                                           int stream_group_within_gpu,
                                           gpu::StreamExecutor* executor) {
     mutex_lock guard(lock_);
-    StreamGroup* group = &streams_[key_type(gpu_id, stream_group_within_gpu)];
+    StreamGroup* group =
+        &streams_[key_type(tf_gpu_id.value(), stream_group_within_gpu)];
     if (!group->compute) {
       group->compute = new gpu::Stream(executor);
       group->compute->Init();
@@ -237,7 +243,8 @@ class BaseGPUDevice::StreamGroupFactory {
 
 BaseGPUDevice::BaseGPUDevice(const SessionOptions& options, const string& name,
                              Bytes memory_limit, const DeviceLocality& locality,
-                             int gpu_id, const string& physical_device_desc,
+                             TfGpuId tf_gpu_id,
+                             const string& physical_device_desc,
                              Allocator* gpu_allocator, Allocator* cpu_allocator,
                              bool sync_every_op, int32 max_streams)
     : LocalDevice(options, Device::BuildDeviceAttributes(name, DEVICE_GPU,
@@ -245,7 +252,7 @@ BaseGPUDevice::BaseGPUDevice(const SessionOptions& options, const string& name,
                                                          physical_device_desc)),
       gpu_allocator_(gpu_allocator),
       cpu_allocator_(cpu_allocator),
-      gpu_id_(gpu_id),
+      tf_gpu_id_(tf_gpu_id),
       sync_every_op_(sync_every_op),
       max_streams_(max_streams) {
   ProcessState::singleton()->EnableGPUDevice();
@@ -257,10 +264,10 @@ BaseGPUDevice::~BaseGPUDevice() {
 }
 
 Status BaseGPUDevice::Init(const SessionOptions& options) {
-  auto executor_status = GPUMachineManager()->ExecutorForDevice(gpu_id_);
+  auto executor_status = GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id_);
   if (!executor_status.status().ok()) {
     return errors::Internal("Failed to get StreamExecutor for device ",
-                            gpu_id_);
+                            tf_gpu_id_.value());
   }
 
   executor_ = executor_status.ValueOrDie();
@@ -273,14 +280,14 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
   // Create the specified number of GPU streams
   for (int i = 0; i < max_streams_; i++) {
     streams_.push_back(
-        StreamGroupFactory::Global().GetOrCreate(gpu_id_, i, executor_));
+        StreamGroupFactory::Global().GetOrCreate(tf_gpu_id_, i, executor_));
 
     size_t scratch_buffer_size = Eigen::kCudaScratchSize + sizeof(unsigned int);
     void* scratch_buffer = gpu_allocator_->AllocateRaw(
         Allocator::kAllocatorAlignment, scratch_buffer_size);
     if (scratch_buffer == nullptr) {
       return errors::FailedPrecondition(
-          "Failed to allocate scratch buffer for device ", gpu_id_);
+          "Failed to allocate scratch buffer for device ", tf_gpu_id_.value());
     }
     scratch_.push_back(static_cast<char*>(scratch_buffer));
 
@@ -292,7 +299,8 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
         &mem, Eigen::kCudaScratchSize + sizeof(unsigned int));
     if (!ok) {
       return errors::FailedPrecondition(
-          "Failed to memcopy into scratch buffer for device ", gpu_id_);
+          "Failed to memcopy into scratch buffer for device ",
+          tf_gpu_id_.value());
     }
 
     device_contexts_.push_back(new GPUDeviceContext(
@@ -303,7 +311,7 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
   gpu_device_info_->stream = streams_[0]->compute;
   gpu_device_info_->default_context = device_contexts_[0];
   gpu_device_info_->event_mgr = em_.get();
-  gpu_device_info_->gpu_id = gpu_id_;
+  gpu_device_info_->gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id_).value();
   set_tensorflow_gpu_device_info(gpu_device_info_);
 
   // Whether and how the GPU device uses its own threadpool.
@@ -331,7 +339,7 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
       //   setting them for each kernel.
       // TODO(zhengxq): pin the thread to the same socket of the target GPU.
       thread_pool_.reset(new thread::ThreadPool(
-          options.env, strings::StrCat("gpu_private_", gpu_id_),
+          options.env, strings::StrCat("gpu_private_", tf_gpu_id_.value()),
           static_cast<int32>(gpu_thread_count)));
       set_tensorflow_device_thread_pool(thread_pool_.get());
     } else if (gpu_thread_mode == "gpu_shared") {
@@ -435,7 +443,7 @@ void BaseGPUDevice::ComputeHelper(OpKernel* op_kernel,
 
   if (vlog_1) {
     VLOG(1) << "GpuDevice::Compute " << op_kernel->name() << " op "
-            << op_kernel->type_string() << " on GPU" << gpu_id_ << " stream["
+            << op_kernel->type_string() << " on GPU" << tf_gpu_id_ << " stream["
             << stream_id << "]";
   }
 
@@ -510,7 +518,7 @@ void BaseGPUDevice::ComputeAsync(AsyncOpKernel* op_kernel,
   const auto stream_id = gpu_device_context->stream_id();
 
   VLOG(1) << "GpuDevice::ComputeAsync " << op_kernel->name() << " op "
-          << op_kernel->type_string() << " on GPU" << gpu_id_ << " stream["
+          << op_kernel->type_string() << " on GPU" << tf_gpu_id_ << " stream["
           << stream_id << "]";
 
   // When TraceMe profiling is off (which is the default), the
@@ -635,8 +643,9 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
   ConcretePerOpGpuDevice() : device_(&stream_device_) {}
 
   void Reinitialize(OpKernelContext* context, const cudaStream_t* cuda_stream,
-                    int gpu_id, Allocator* base_allocator, char* scratch) {
-    stream_device_.Reinitialize(context, cuda_stream, gpu_id, base_allocator,
+                    TfGpuId tf_gpu_id, Allocator* base_allocator,
+                    char* scratch) {
+    stream_device_.Reinitialize(context, cuda_stream, tf_gpu_id, base_allocator,
                                 scratch);
   }
 
@@ -647,8 +656,9 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
   Eigen::GpuDevice device_;
 };
 
+// Parse 'visible_device_list' into a list of CUDA GPU ids.
 Status ParseVisibleDeviceList(const string& visible_device_list,
-                              std::vector<int>* visible_gpu_order) {
+                              std::vector<CudaGpuId>* visible_gpu_order) {
   visible_gpu_order->clear();
   gpu::Platform* gpu_manager = GPUMachineManager();
 
@@ -676,13 +686,13 @@ Status ParseVisibleDeviceList(const string& visible_device_list,
             "' but visible device count is ",
             gpu_manager->VisibleDeviceCount());
       }
-      visible_gpu_order->push_back(cuda_gpu_id);
+      visible_gpu_order->push_back(CudaGpuId(cuda_gpu_id));
     }
   }
 
   // Validate no repeats.
-  std::set<int> visible_device_set(visible_gpu_order->begin(),
-                                   visible_gpu_order->end());
+  std::set<CudaGpuId> visible_device_set(visible_gpu_order->begin(),
+                                         visible_gpu_order->end());
   if (visible_device_set.size() != visible_gpu_order->size()) {
     return errors::InvalidArgument(
         "visible_device_list contained a duplicate entry: ",
@@ -691,6 +701,41 @@ Status ParseVisibleDeviceList(const string& visible_device_list,
   return Status::OK();
 }
 
+Status VerifyVirtualDeviceSettings(
+    const size_t num_gpus_to_use, const GPUOptions& gpu_options,
+    const std::vector<CudaGpuId>& visible_gpu_order,
+    const std::vector<CudaGpuId>& valid_cuda_gpu_ids) {
+  const auto& virtual_devices = gpu_options.experimental().virtual_devices();
+  CHECK(!virtual_devices.empty());
+  if (gpu_options.per_process_gpu_memory_fraction() > 0) {
+    return errors::InvalidArgument(
+        "It's invalid to set per_process_gpu_memory_fraction when "
+        "virtual_devices is set.");
+  }
+  if (num_gpus_to_use < virtual_devices.size()) {
+    return errors::Unknown(
+        "Not enough GPUs to create virtual devices."
+        " num_gpus_to_use: ",
+        num_gpus_to_use, " #virtual_devices: ", virtual_devices.size());
+  }
+  if (!gpu_options.visible_device_list().empty() &&
+      visible_gpu_order.size() != virtual_devices.size()) {
+    return errors::InvalidArgument(
+        "The number of GPUs in visible_device_list doesn't match the number "
+        "of elements in the virtual_devices list.",
+        " #GPUs in visible_device_list: ", visible_gpu_order.size(),
+        " virtual_devices.size(): ", virtual_devices.size());
+  }
+  if (valid_cuda_gpu_ids.size() != virtual_devices.size()) {
+    return errors::Unknown(
+        "The number of valid GPUs doesn't match the number of elements in "
+        "the virtual_devices list.",
+        " #valid GPUs: ", valid_cuda_gpu_ids.size(),
+        " virtual_devices.size(): ", virtual_devices.size());
+  }
+  return Status::OK();
+}
+
 int64 MinSystemMemory(int64 available_memory) {
   // We use the following heuristic for now:
   //
@@ -719,6 +764,37 @@ int64 MinSystemMemory(int64 available_memory) {
 #endif
   return min_system_memory;
 }
+
+// Get the memory limit for the virtual device being created on GPU with
+// 'cuda_gpu_id', when that virtual device is the only virtual device being
+// created on that GPU.
+Status SingleVirtualDeviceMemoryLimit(const GPUOptions& gpu_options,
+                                      CudaGpuId cuda_gpu_id,
+                                      int64* memory_limit) {
+  int64 total_memory = 0;
+  int64 available_memory = 0;
+  gpu::StreamExecutor* se =
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+  if (!se->DeviceMemoryUsage(&available_memory, &total_memory)) {
+    return errors::Unknown("Failed to query available memory for GPU ",
+                           cuda_gpu_id.value());
+  }
+
+  int64 allocated_memory = 0;
+  const double per_process_gpu_memory_fraction =
+      gpu_options.per_process_gpu_memory_fraction();
+  if (per_process_gpu_memory_fraction == 0) {
+    allocated_memory = available_memory;
+    const int64 min_system_memory = MinSystemMemory(available_memory);
+    if (min_system_memory < allocated_memory) {
+      allocated_memory -= min_system_memory;
+    }
+  } else {
+    allocated_memory = total_memory * per_process_gpu_memory_fraction;
+  }
+  *memory_limit = allocated_memory;
+  return Status::OK();
+}
 }  // namespace
 
 void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
@@ -729,7 +805,7 @@ void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
   DCHECK(concrete_device);
   const cudaStream_t* cuda_stream = reinterpret_cast<const cudaStream_t*>(
       streams_[stream_id]->compute->implementation()->CudaStreamMemberHack());
-  concrete_device->Reinitialize(context, cuda_stream, gpu_id_, allocator,
+  concrete_device->Reinitialize(context, cuda_stream, tf_gpu_id_, allocator,
                                 scratch_[stream_id]);
 }
 
@@ -766,21 +842,22 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
     return Status::OK();
   }
 
-  size_t n = INT_MAX;
+  size_t num_gpus_to_use = INT_MAX;
   auto iter = options.config.device_count().find("GPU");
   if (iter != options.config.device_count().end()) {
-    n = iter->second;
+    num_gpus_to_use = iter->second;
   }
   const auto& gpu_options = options.config.gpu_options();
-  std::vector<int> visible_gpu_order;
+  std::vector<CudaGpuId> visible_gpu_order;
   TF_RETURN_IF_ERROR(ParseVisibleDeviceList(gpu_options.visible_device_list(),
                                             &visible_gpu_order));
-  std::vector<int> valid_gpu_ids;
-  TF_RETURN_IF_ERROR(GetValidDeviceIds(visible_gpu_order, &valid_gpu_ids));
-  if (static_cast<size_t>(n) > valid_gpu_ids.size()) {
-    n = valid_gpu_ids.size();
+
+  std::vector<CudaGpuId> valid_cuda_gpu_ids;
+  TF_RETURN_IF_ERROR(GetValidDeviceIds(visible_gpu_order, &valid_cuda_gpu_ids));
+  if (num_gpus_to_use > valid_cuda_gpu_ids.size()) {
+    num_gpus_to_use = valid_cuda_gpu_ids.size();
   }
-  if (!valid_gpu_ids.empty()) {
+  if (!valid_cuda_gpu_ids.empty()) {
     // Save the original device.
     int original_device = 0;
     cudaError_t err = cudaGetDevice(&original_device);
@@ -790,16 +867,16 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
     }
     // Force to implicitly initialize CUDA runtime on each valid GPU before
     // CreateGPUDevice().
-    for (int gpu_id : valid_gpu_ids) {
-      err = cudaSetDevice(gpu_id);
+    for (CudaGpuId cuda_gpu_id : valid_cuda_gpu_ids) {
+      err = cudaSetDevice(cuda_gpu_id.value());
       if (err != cudaSuccess) {
-        return errors::Internal("cudaSetDevice() on GPU:", gpu_id,
+        return errors::Internal("cudaSetDevice() on GPU:", cuda_gpu_id.value(),
                                 " failed. Status: ", cudaGetErrorString(err));
       }
       err = cudaFree(nullptr);
       if (err != cudaSuccess) {
         return errors::Internal(
-            "CUDA runtime implicit initialization on GPU:", gpu_id,
+            "CUDA runtime implicit initialization on GPU:", cuda_gpu_id.value(),
             " failed. Status: ", cudaGetErrorString(err));
       }
     }
@@ -810,19 +887,45 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
                               " failed. Status: ", cudaGetErrorString(err));
     }
   }
-  for (int i = 0; i < n; i++) {
-    BaseGPUDevice* gpu_device;
-    TF_RETURN_IF_ERROR(CreateGPUDevice(
-        options, strings::StrCat(name_prefix, "/device:GPU:", i),
-        valid_gpu_ids[i], &gpu_device));
-    TF_RETURN_IF_ERROR(gpu_device->Init(options));
-    devices->push_back(gpu_device);
-  }
 
+  const auto& virtual_devices = gpu_options.experimental().virtual_devices();
+  if (!virtual_devices.empty()) {
+    TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings(
+        num_gpus_to_use, gpu_options, visible_gpu_order, valid_cuda_gpu_ids));
+    // We've verified that num_gpus_to_use >= virtual_devices.size().
+    num_gpus_to_use = virtual_devices.size();
+    CHECK(gpu_options.visible_device_list().empty() ||
+          valid_cuda_gpu_ids == visible_gpu_order);
+  }
+  int next_tf_gpu_id = 0;
+  for (int i = 0; i < num_gpus_to_use; ++i) {
+    const CudaGpuId cuda_gpu_id = valid_cuda_gpu_ids[i];
+    std::vector<int64> memory_limit_bytes;
+    if (virtual_devices.empty() ||
+        virtual_devices.Get(i).memory_limit_mb_size() == 0) {
+      int64 single_virtual_device_memory_limit = 0;
+      TF_RETURN_IF_ERROR(SingleVirtualDeviceMemoryLimit(
+          gpu_options, cuda_gpu_id, &single_virtual_device_memory_limit));
+      memory_limit_bytes.push_back(single_virtual_device_memory_limit);
+    } else {
+      const auto& memory_limit_mb = virtual_devices.Get(i).memory_limit_mb();
+      std::transform(memory_limit_mb.begin(), memory_limit_mb.end(),
+                     std::back_inserter(memory_limit_bytes), [](float mb) {
+                       return static_cast<int64>(mb) * (1ll << 20);
+                     });
+    }
+    for (int64 bytes : memory_limit_bytes) {
+      TfGpuId tf_gpu_id(next_tf_gpu_id);
+      ++next_tf_gpu_id;
+      GpuIdUtil::InsertTfCudaGpuIdPair(tf_gpu_id, cuda_gpu_id);
+      TF_RETURN_IF_ERROR(
+          CreateGPUDevice(options, name_prefix, tf_gpu_id, bytes, devices));
+    }
+  }
   return Status::OK();
 }
 
-static string GetShortDeviceDescription(int device_id,
+static string GetShortDeviceDescription(CudaGpuId cuda_gpu_id,
                                         const gpu::DeviceDescription& desc) {
   int cc_major;
   int cc_minor;
@@ -831,22 +934,26 @@ static string GetShortDeviceDescription(int device_id,
     cc_minor = 0;
   }
   // LINT.IfChange
-  return strings::StrCat("device: ", device_id, ", name: ", desc.name(),
+  return strings::StrCat("device: ", cuda_gpu_id.value(),
+                         ", name: ", desc.name(),
                          ", pci bus id: ", desc.pci_bus_id(),
                          ", compute capability: ", cc_major, ".", cc_minor);
   // LINT.ThenChange(//tensorflow/python/platform/test.py)
 }
 
 Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
-                                             const string& name, int gpu_id,
-                                             BaseGPUDevice** out_device) {
-  CHECK_GE(gpu_id, 0);
+                                             const string& name_prefix,
+                                             TfGpuId tf_gpu_id,
+                                             int64 memory_limit,
+                                             std::vector<Device*>* devices) {
+  CHECK_GE(tf_gpu_id.value(), 0);
+  const string device_name =
+      strings::StrCat(name_prefix, "/device:GPU:", tf_gpu_id.value());
 
   // Look up the device, to see its attributes.
-  gpu::Platform* gpu_platform = GPUMachineManager();
-  CHECK_LT(gpu_id, gpu_platform->VisibleDeviceCount());
+  GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
   gpu::StreamExecutor* se =
-      gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie();
+      GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
   const gpu::DeviceDescription& desc = se->GetDeviceDescription();
   int numa_node = desc.numa_node();
   if (numa_node < 0) {
@@ -856,60 +963,49 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
     // may run into trouble later with data transfer operations.  The
     // trouble may manifest as slower than expected performance, or
     // outright failures.
-    LOG(INFO) << "Could not identify NUMA node of " << name
+    LOG(INFO) << "Could not identify NUMA node of " << device_name
               << ", defaulting to 0.  Your kernel may not have been built "
               << "with NUMA support.";
     numa_node = 0;
   }
-
-  int64 total_memory, available_memory;
-  if (!se->DeviceMemoryUsage(&available_memory, &total_memory)) {
-    return errors::Unknown(
-        strings::StrCat("Failed to query available memory for GPU ", gpu_id));
-  }
-
-  int64 allocated_memory;
-  double config_memory_fraction =
-      options.config.gpu_options().per_process_gpu_memory_fraction();
-  if (config_memory_fraction == 0) {
-    allocated_memory = available_memory;
-    const int64 min_system_memory = MinSystemMemory(available_memory);
-    if (min_system_memory < allocated_memory) {
-      allocated_memory -= min_system_memory;
-    }
-  } else {
-    allocated_memory = total_memory * config_memory_fraction;
-  }
-
-  Bytes allocated_bytes = static_cast<Bytes>(allocated_memory);
+  Bytes allocated_bytes = static_cast<Bytes>(memory_limit);
 
   // Get GPU bus_id from its reported NUMA affinity.  Because GPUs are
   // virtualized in some environments, we can't just use the GPU id.
   // NUMA locales are indexed from 0, buses are indexed from 1.
   DeviceLocality dev_locality;
   dev_locality.set_bus_id(numa_node + 1);
-  VLOG(1) << "GPUDevice id " << gpu_id << " on bus " << dev_locality.bus_id()
-          << " numa: " << numa_node << " pci: " << desc.pci_bus_id();
-
+  const CudaGpuId cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id);
+  VLOG(1) << "GPUDevice id " << cuda_gpu_id << " on bus "
+          << dev_locality.bus_id() << " numa: " << numa_node
+          << " pci: " << desc.pci_bus_id();
+
+  LOG(INFO) << "Creating TensorFlow device (" << device_name << " with "
+            << (memory_limit >> 20) << " MB memory) -> physical GPU ("
+            << GetShortDeviceDescription(cuda_gpu_id, desc) << ")";
   ProcessState* process_state = ProcessState::singleton();
-  *out_device = CreateGPUDevice(
-      options, name, allocated_bytes, dev_locality, gpu_id,
-      GetShortDeviceDescription(gpu_id, desc),
-      process_state->GetGPUAllocator(options.config.gpu_options(), gpu_id,
-                                     allocated_memory),
+  BaseGPUDevice* gpu_device = CreateGPUDevice(
+      options, device_name, allocated_bytes, dev_locality, tf_gpu_id,
+      GetShortDeviceDescription(cuda_gpu_id, desc),
+      process_state->GetGPUAllocator(options.config.gpu_options(), tf_gpu_id,
+                                     memory_limit),
       process_state->GetCPUAllocator(numa_node));
+  TF_RETURN_IF_ERROR(gpu_device->Init(options));
+  devices->push_back(gpu_device);
 
   return Status::OK();
 }
 
 static int GetDefaultMinGPUMultiprocessorCount(
-    gpu::Platform* gpu_manager, const std::vector<int>& visible_gpu_order) {
+    gpu::Platform* gpu_manager,
+    const std::vector<CudaGpuId>& visible_gpu_order) {
   static const int kDefaultMinGPUMultiprocessorCount = 8;
 
   // Find the highest multi-processor count across all visible GPUs.
   int max_count = -1;
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    auto exec_status = gpu_manager->ExecutorForDevice(visible_gpu_order[i]);
+    auto exec_status =
+        GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_order[i]);
     if (!exec_status.ok()) {
       continue;
     }
@@ -927,7 +1023,8 @@ static int GetDefaultMinGPUMultiprocessorCount(
 }
 
 static int GetMinGPUMultiprocessorCount(
-    gpu::Platform* gpu_manager, const std::vector<int>& visible_gpu_order) {
+    gpu::Platform* gpu_manager,
+    const std::vector<CudaGpuId>& visible_gpu_order) {
   const char* tf_min_gpu_core_count = getenv("TF_MIN_GPU_MULTIPROCESSOR_COUNT");
 
   if (tf_min_gpu_core_count == nullptr ||
@@ -1005,17 +1102,17 @@ std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
 }
 
 std::unique_ptr<std::map<std::pair<int, int>, bool>> GetPeerAccessMap(
-    gpu::Platform* platform, const std::vector<int>& visible_gpu_order) {
+    gpu::Platform* platform, const std::vector<CudaGpuId>& visible_gpu_order) {
   std::unique_ptr<std::map<std::pair<int, int>, bool>> map(
       new std::map<std::pair<int, int>, bool>);
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const int i_gpu_id = visible_gpu_order[i];
+    const CudaGpuId i_gpu_id = visible_gpu_order[i];
     for (int j = 0; j < visible_gpu_order.size(); ++j) {
-      const int j_gpu_id = visible_gpu_order[j];
+      const CudaGpuId j_gpu_id = visible_gpu_order[j];
       gpu::StreamExecutor* from =
-          platform->ExecutorForDevice(i_gpu_id).ValueOrDie();
+          GpuIdUtil::ExecutorForCudaGpuId(platform, i_gpu_id).ValueOrDie();
       gpu::StreamExecutor* to =
-          platform->ExecutorForDevice(j_gpu_id).ValueOrDie();
+          GpuIdUtil::ExecutorForCudaGpuId(platform, j_gpu_id).ValueOrDie();
       (*map)[{i, j}] = from->CanEnablePeerAccessTo(to);
     }
   }
@@ -1024,19 +1121,18 @@ std::unique_ptr<std::map<std::pair<int, int>, bool>> GetPeerAccessMap(
 }
 
 Status EnablePeerAccess(gpu::Platform* platform,
-                        const std::vector<int>& visible_gpu_order) {
+                        const std::vector<CudaGpuId>& visible_gpu_order) {
   int possible_peer_count = 0;
   int enabled_peer_count = 0;
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const int i_gpu_id = visible_gpu_order[i];
+    const CudaGpuId i_gpu_id = visible_gpu_order[i];
     for (int j = 0; j < visible_gpu_order.size(); ++j) {
-      const int j_gpu_id = visible_gpu_order[j];
-      // We have already validated that ExecutorForDevice() calls
-      // return OK.
+      const CudaGpuId j_gpu_id = visible_gpu_order[j];
+      // We have already validated that ExecutorForDevice() calls return OK.
       gpu::StreamExecutor* from =
-          platform->ExecutorForDevice(i_gpu_id).ValueOrDie();
+          GpuIdUtil::ExecutorForCudaGpuId(platform, i_gpu_id).ValueOrDie();
       gpu::StreamExecutor* to =
-          platform->ExecutorForDevice(j_gpu_id).ValueOrDie();
+          GpuIdUtil::ExecutorForCudaGpuId(platform, j_gpu_id).ValueOrDie();
 
       if (from->CanEnablePeerAccessTo(to)) {
         ++possible_peer_count;
@@ -1067,21 +1163,22 @@ Status EnablePeerAccess(gpu::Platform* platform,
 }  // namespace
 
 Status BaseGPUDeviceFactory::GetValidDeviceIds(
-    const std::vector<int>& visible_gpu_order, std::vector<int>* ids) {
+    const std::vector<CudaGpuId>& visible_gpu_order,
+    std::vector<CudaGpuId>* ids) {
   gpu::Platform* gpu_manager = GPUMachineManager();
   bool new_gpu_found = false;
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    int gpu_id = visible_gpu_order[i];
+    const CudaGpuId cuda_gpu_id = visible_gpu_order[i];
 
-    // Only perform this once per visible gpu id.
-    if (visible_gpu_initialized_[gpu_id]) {
+    // Only perform this once per visible cuda gpu id.
+    if (visible_gpu_initialized_[cuda_gpu_id.value()]) {
       continue;
     }
 
-    visible_gpu_initialized_[gpu_id] = true;
+    visible_gpu_initialized_[cuda_gpu_id.value()] = true;
     new_gpu_found = true;
 
-    auto executor = gpu_manager->ExecutorForDevice(gpu_id);
+    auto executor = GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, cuda_gpu_id);
     if (!executor.ok()) {
       return StreamExecutorUtil::ConvertStatus(executor.status());
     }
@@ -1121,11 +1218,11 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
     auto access_map = GetPeerAccessMap(gpu_manager, visible_gpu_order);
     string line_buf = "DMA: ";
     for (int i = 0; i < visible_gpu_order.size(); ++i) {
-      strings::StrAppend(&line_buf, visible_gpu_order[i], " ");
+      strings::StrAppend(&line_buf, visible_gpu_order[i].value(), " ");
     }
     LOG(INFO) << line_buf;
     for (int i = 0; i < visible_gpu_order.size(); ++i) {
-      line_buf = strings::StrCat(visible_gpu_order[i], ":   ");
+      line_buf = strings::StrCat(visible_gpu_order[i].value(), ":   ");
       for (int j = 0; j < visible_gpu_order.size(); ++j) {
         if ((*access_map)[{i, j}]) {
           line_buf.append("Y ");
@@ -1150,9 +1247,13 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
 
   // Filter out devices that don't have the right capability or power.
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const int32 visible_gpu_id = visible_gpu_order[i];
-    auto exec_status = gpu_manager->ExecutorForDevice(visible_gpu_id);
+    const CudaGpuId visible_gpu_id = visible_gpu_order[i];
+    auto exec_status =
+        GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_id);
     if (!exec_status.ok()) {
+      LOG(INFO) << "Ignoring visible gpu device " << visible_gpu_id
+                << " whose executor is in invalid state: "
+                << exec_status.status().ToString();
       continue;
     }
     gpu::StreamExecutor* se = exec_status.ValueOrDie();
@@ -1160,6 +1261,10 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
     CudaVersion device_capability;
     if (!desc.cuda_compute_capability(&device_capability.major_part,
                                       &device_capability.minor_part)) {
+      LOG(INFO) << "Ignoring visible gpu device "
+                << "(" << GetShortDeviceDescription(visible_gpu_id, desc)
+                << ") "
+                << "whose CUDA compute capability is not available.";
       continue;
     }
     // Only GPUs with no less than the minimum supported compute capability is
@@ -1179,7 +1284,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
     // multiprocessors. If the TF_MIN_GPU_MULTIPROCESSOR_COUNT environment
     // variable is set, its value will be used to filter out GPUs.
     if (desc.core_count() < min_gpu_core_count) {
-      LOG(INFO) << "Ignoring gpu device "
+      LOG(INFO) << "Ignoring visible gpu device "
                 << "(" << GetShortDeviceDescription(visible_gpu_id, desc)
                 << ") "
                 << "with Cuda multiprocessor count: " << desc.core_count()
@@ -1188,12 +1293,8 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
                    "TF_MIN_GPU_MULTIPROCESSOR_COUNT.";
       continue;
     }
-
-    size_t new_id = ids->size();
+    LOG(INFO) << "Adding visible gpu device " << visible_gpu_id;
     ids->push_back(visible_gpu_id);
-
-    LOG(INFO) << "Creating TensorFlow device (/device:GPU:" << new_id << ") -> "
-              << "(" << GetShortDeviceDescription(visible_gpu_id, desc) << ")";
   }
 
   return Status::OK();
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index 74176cd448..41e60b4884 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -28,6 +28,8 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu_device_context.h"
 #include "tensorflow/core/common_runtime/local_device.h"
 #include "tensorflow/core/framework/allocator.h"
@@ -45,10 +47,10 @@ namespace tensorflow {
 class BaseGPUDevice : public LocalDevice {
  public:
   BaseGPUDevice(const SessionOptions& options, const string& name,
-                Bytes memory_limit, const DeviceLocality& locality, int gpu_id,
-                const string& physical_device_desc, Allocator* gpu_allocator,
-                Allocator* cpu_allocator, bool sync_every_op,
-                int32 max_streams);
+                Bytes memory_limit, const DeviceLocality& locality,
+                TfGpuId tf_gpu_id, const string& physical_device_desc,
+                Allocator* gpu_allocator, Allocator* cpu_allocator,
+                bool sync_every_op, int32 max_streams);
 
   ~BaseGPUDevice() override;
 
@@ -84,9 +86,9 @@ class BaseGPUDevice : public LocalDevice {
   void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
                              DeviceContext* dc, Allocator* allocator) override;
 
-  // Returns the id of this device within the native driver system; e.g., for
-  // CUDA this is the ordinal of the GPU within the system.
-  int gpu_id() const { return gpu_id_; }
+  // Returns the CUDA GPU id of this device within the native driver system;
+  // e.g., for CUDA this is the ordinal of the GPU within the system.
+  int gpu_id() const { return GpuIdUtil::TfToCudaGpuId(tf_gpu_id_).value(); }
 
   // The executor that provides control for the device; e.g., for CUDA this
   // corresponds to the cuda context.
@@ -112,7 +114,7 @@ class BaseGPUDevice : public LocalDevice {
   std::vector<GPUDeviceContext*> device_contexts_;
   GpuDeviceInfo* gpu_device_info_ = nullptr;
   mutex trace_mu_;
-  int gpu_id_ = -1;
+  TfGpuId tf_gpu_id_;
   const bool sync_every_op_ = false;
   const int32 max_streams_;
   std::unique_ptr<EventMgr> em_;
@@ -139,26 +141,30 @@ class BaseGPUDeviceFactory : public DeviceFactory {
                        std::vector<Device*>* devices) override;
 
  private:
-  Status CreateGPUDevice(const SessionOptions& options, const string& name,
-                         int gpu_id, BaseGPUDevice** out_device);
+  // Creates a BaseGPUDevice associated with 'tf_gpu_id', allocates (strictly)
+  // 'memory_limit' bytes of GPU memory to it, and adds it to the 'devices'
+  // vector.
+  Status CreateGPUDevice(const SessionOptions& options,
+                         const string& name_prefix, TfGpuId tf_gpu_id,
+                         int64 memory_limit, std::vector<Device*>* devices);
 
   virtual BaseGPUDevice* CreateGPUDevice(const SessionOptions& options,
                                          const string& name, Bytes memory_limit,
                                          const DeviceLocality& locality,
-                                         int gpu_id,
+                                         TfGpuId tf_gpu_id,
                                          const string& physical_device_desc,
                                          Allocator* gpu_allocator,
                                          Allocator* cpu_allocator) = 0;
 
-  // Returns into 'ids' the list of valid GPU ids, in the order that
-  // they should map to logical gpu ids "/device:GPU:0", "/device:GPU:1", etc,
+  // Returns into 'ids' the list of valid CUDA GPU ids, in the order that
+  // they should map to TF GPU ids "/device:GPU:0", "/device:GPU:1", etc,
   // based upon 'visible_gpu_order' which was generated by parsing
-  // GPUOptions::visible_device_list which is a comma-separated list of
-  // 'visible gpu ids'.
-  Status GetValidDeviceIds(const std::vector<int>& visible_gpu_order,
-                           std::vector<int>* ids);
+  // GPUOptions::visible_device_list which is a comma-separated list of CUDA GPU
+  // ids.
+  Status GetValidDeviceIds(const std::vector<CudaGpuId>& visible_gpu_order,
+                           std::vector<CudaGpuId>* ids);
 
-  // visible_gpu_initialized_[gpu_id] is true if visible GPU gpu_id
+  // visible_gpu_initialized_[cuda_gpu_id] is true if visible GPU cuda_gpu_id
   // has been initialized by the process.
   std::unordered_map<int, bool> visible_gpu_initialized_;
 };
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
index 63ac3daba1..9a000749c6 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #define EIGEN_USE_GPU
 
 #include "tensorflow/core/common_runtime/gpu/gpu_device.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/gpu/process_state.h"
 #include "tensorflow/core/common_runtime/threadpool_device.h"
 
@@ -26,10 +27,10 @@ namespace tensorflow {
 class GPUDevice : public BaseGPUDevice {
  public:
   GPUDevice(const SessionOptions& options, const string& name,
-            Bytes memory_limit, const DeviceLocality& locality, int gpu_id,
-            const string& physical_device_desc, Allocator* gpu_allocator,
-            Allocator* cpu_allocator)
-      : BaseGPUDevice(options, name, memory_limit, locality, gpu_id,
+            Bytes memory_limit, const DeviceLocality& locality,
+            TfGpuId tf_gpu_id, const string& physical_device_desc,
+            Allocator* gpu_allocator, Allocator* cpu_allocator)
+      : BaseGPUDevice(options, name, memory_limit, locality, tf_gpu_id,
                       physical_device_desc, gpu_allocator, cpu_allocator,
                       false /* sync every op */, 1 /* max_streams */) {
     if (options.config.has_gpu_options()) {
@@ -59,11 +60,12 @@ class GPUDeviceFactory : public BaseGPUDeviceFactory {
  private:
   BaseGPUDevice* CreateGPUDevice(const SessionOptions& options,
                                  const string& name, Bytes memory_limit,
-                                 const DeviceLocality& locality, int gpu_id,
+                                 const DeviceLocality& locality,
+                                 TfGpuId tf_gpu_id,
                                  const string& physical_device_desc,
                                  Allocator* gpu_allocator,
                                  Allocator* cpu_allocator) override {
-    return new GPUDevice(options, name, memory_limit, locality, gpu_id,
+    return new GPUDevice(options, name, memory_limit, locality, tf_gpu_id,
                          physical_device_desc, gpu_allocator, cpu_allocator);
   }
 };
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
new file mode 100644
index 0000000000..ff46be9c01
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
@@ -0,0 +1,189 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/common_runtime/gpu/gpu_device.h"
+
+#include "tensorflow/core/common_runtime/gpu/gpu_init.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+const char* kDeviceNamePrefix = "/job:localhost/replica:0/task:0";
+
+static SessionOptions MakeSessionOptions(
+    const string& visible_device_list = "",
+    double per_process_gpu_memory_fraction = 0, int gpu_device_count = 1,
+    const std::vector<std::vector<float>>& memory_limit_mb = {}) {
+  SessionOptions options;
+  ConfigProto* config = &options.config;
+  (*config->mutable_device_count())["GPU"] = gpu_device_count;
+  GPUOptions* gpu_options = config->mutable_gpu_options();
+  gpu_options->set_visible_device_list(visible_device_list);
+  gpu_options->set_per_process_gpu_memory_fraction(
+      per_process_gpu_memory_fraction);
+  for (const auto& v : memory_limit_mb) {
+    auto virtual_devices =
+        gpu_options->mutable_experimental()->add_virtual_devices();
+    for (float mb : v) {
+      virtual_devices->add_memory_limit_mb(mb);
+    }
+  }
+  return options;
+}
+
+static bool StartsWith(const string& lhs, const string& rhs) {
+  if (rhs.length() > lhs.length()) return false;
+  return lhs.substr(0, rhs.length()) == rhs;
+}
+
+TEST(GPUDeviceTest, FailedToParseVisibleDeviceList) {
+  SessionOptions opts = MakeSessionOptions("0,abc");
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(StartsWith(status.error_message(), "Could not parse entry"))
+      << status;
+}
+
+TEST(GPUDeviceTest, InvalidGpuId) {
+  SessionOptions opts = MakeSessionOptions("100");
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "'visible_device_list' listed an invalid GPU id"))
+      << status;
+}
+
+TEST(GPUDeviceTest, DuplicateEntryInVisibleDeviceList) {
+  SessionOptions opts = MakeSessionOptions("0,0");
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "visible_device_list contained a duplicate entry"))
+      << status;
+}
+
+TEST(GPUDeviceTest, VirtualDeviceConfigConflictsWithMemoryFractionSettings) {
+  SessionOptions opts = MakeSessionOptions("0", 0.1, 1, {{}});
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "It's invalid to set per_process_gpu_memory_fraction"))
+      << status;
+}
+
+TEST(GPUDeviceTest, GpuDeviceCountTooSmall) {
+  // device_count is 0, but with one entry in visible_device_list and one
+  // (empty) VirtualDevices messages.
+  SessionOptions opts = MakeSessionOptions("0", 0, 0, {{}});
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::UNKNOWN);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "Not enough GPUs to create virtual devices."))
+      << status;
+}
+
+TEST(GPUDeviceTest, NotEnoughGpuInVisibleDeviceList) {
+  // Single entry in visible_device_list with two (empty) VirtualDevices
+  // messages.
+  SessionOptions opts = MakeSessionOptions("0", 0, 8, {{}, {}});
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::UNKNOWN);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "Not enough GPUs to create virtual devices."))
+      << status;
+}
+
+TEST(GPUDeviceTest, VirtualDeviceConfigConflictsWithVisibleDeviceList) {
+  // This test requires at least two visible GPU hardware.
+  if (GPUMachineManager()->VisibleDeviceCount() < 2) return;
+  // Three entries in visible_device_list with two (empty) VirtualDevices
+  // messages.
+  SessionOptions opts = MakeSessionOptions("0,1", 0, 8, {{}});
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "The number of GPUs in visible_device_list doesn't "
+                         "match the number of elements in the virtual_devices "
+                         "list."))
+      << status;
+}
+
+TEST(GPUDeviceTest, EmptyVirtualDeviceConfig) {
+  // It'll create single virtual device when the virtual device config is empty.
+  SessionOptions opts = MakeSessionOptions("0");
+  std::vector<tensorflow::Device*> devices;
+  TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices));
+  EXPECT_EQ(1, devices.size());
+  EXPECT_GE(devices[0]->attributes().memory_limit(), 0);
+  for (auto d : devices) delete d;
+}
+
+TEST(GPUDeviceTest, SingleVirtualDeviceWithNoMemoryLimit) {
+  // It'll create single virtual device for the gpu in question when
+  // memory_limit_mb is unset.
+  SessionOptions opts = MakeSessionOptions("0", 0, 1, {{}});
+  std::vector<tensorflow::Device*> devices;
+  TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices));
+  EXPECT_EQ(1, devices.size());
+  EXPECT_GE(devices[0]->attributes().memory_limit(), 0);
+  for (auto d : devices) delete d;
+}
+
+TEST(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimit) {
+  SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123}});
+  std::vector<tensorflow::Device*> devices;
+  TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices));
+  EXPECT_EQ(1, devices.size());
+  EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
+  for (auto d : devices) delete d;
+}
+
+TEST(GPUDeviceTest, MultipleVirtualDevices) {
+  SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}});
+  std::vector<tensorflow::Device*> devices;
+  TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices));
+  EXPECT_EQ(2, devices.size());
+  EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
+  EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
+  for (auto d : devices) delete d;
+}
+
+}  // namespace
+}  // namespace tensorflow
+
+#endif
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id.h b/tensorflow/core/common_runtime/gpu/gpu_id.h
new file mode 100644
index 0000000000..ff81ccd432
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_id.h
@@ -0,0 +1,88 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_H_
+
+#include "tensorflow/core/lib/gtl/int_type.h"
+
+namespace tensorflow {
+
+// There are three types of GPU ids:
+// - *physical* GPU id: this is the integer index of a GPU hardware in the
+//   physical machine, it can be filtered by CUDA environment variable
+//   CUDA_VISIBLE_DEVICES. Note that this id is not visible to Tensorflow, but
+//   result after filtering by CUDA_VISIBLE_DEVICES is visible to TF and is
+//   called CUDA GPU id as below. See
+//   http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
+//   for more details.
+// - CUDA GPU id (also called *visible* GPU id in
+//   third_party/tensorflow/core/protobuf/config.proto): this is the id that is
+//   visible to Tensorflow after filtering by CUDA_VISIBLE_DEVICES, and is
+//   generated by the CUDA GPU driver. It starts from 0 and is used for CUDA API
+//   calls like cuDeviceGet().
+// - TF GPU id (also called *virtual* GPU id in
+//   third_party/tensorflow/core/protobuf/config.proto): this is the id that
+//   Tensorflow generates and exposes to its users. It is the id in the <id>
+//   field of the device name "/device:GPU:<id>", and is also the identifier of
+//   a BaseGPUDevice. Note that the configuration allows us to create multiple
+//   BaseGPUDevice per GPU hardware in order to use multi CUDA streams on the
+//   hardware, so the mapping between TF GPU id and CUDA GPU id is not a 1:1
+//   mappping, see the example below.
+//
+// For example, assuming that in the machine we have GPU device with index 0, 1,
+// 2 and 3 (physical GPU id). Setting "CUDA_VISIBLE_DEVICES=1,2,3" will create
+// the following mapping between CUDA GPU id and physical GPU id:
+//
+//        CUDA GPU id ->  physical GPU id
+//                 0  ->  1
+//                 1  ->  2
+//                 2  ->  3
+//
+// Note that physical GPU id 0 is invisible to TF so there is no mapping entry
+// for it.
+//
+// Assuming we configure the Session to create one BaseGPUDevice per GPU
+// hardware, then setting GPUOptions::visible_device_list to "2,0" will create
+// the following mappting between TF GPU id and CUDA GPU id:
+//
+//                  TF GPU id  ->  CUDA GPU ID
+//      0 (i.e. /device:GPU:0) ->  2
+//      1 (i.e. /device:GPU:1) ->  0
+//
+// Note that CUDA GPU id 1 is filtered out by GPUOptions::visible_device_list,
+// so it won't be used by the TF process.
+//
+// On the other hand, if we configure it to create 2 BaseGPUDevice per GPU
+// hardware, then setting GPUOptions::visible_device_list to "2,0" will create
+// the following mappting between TF GPU id and CUDA GPU id:
+//
+//                  TF GPU id  ->  CUDA GPU ID
+//      0 (i.e. /device:GPU:0) ->  2
+//      1 (i.e. /device:GPU:1) ->  2
+//      2 (i.e. /device:GPU:2) ->  0
+//      3 (i.e. /device:GPU:3) ->  0
+//
+// We create strong-typed integer classes for both TF GPU id and CUDA GPU id to
+// minimize programming errors and improve code readability. Except for the
+// StreamExecutor interface (as we don't change its API), whenever we need a
+// TF GPU id (or CUDA GPU id) we should use TfGpuId (or CudaGpuId) instead of a
+// raw integer.
+TF_LIB_GTL_DEFINE_INT_TYPE(TfGpuId, int32);
+TF_LIB_GTL_DEFINE_INT_TYPE(CudaGpuId, int32);
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_H_
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils.cc b/tensorflow/core/common_runtime/gpu/gpu_id_utils.cc
new file mode 100644
index 0000000000..92cd19453f
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils.cc
@@ -0,0 +1,74 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
+
+#include <unordered_map>
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/mutex.h"
+
+namespace tensorflow {
+namespace {
+// Manages the map between TfGpuId and CUDA GPU id.
+class GpuIdManager {
+ public:
+  static GpuIdManager* singleton() {
+    static auto* manager = new GpuIdManager;
+    return manager;
+  }
+
+  void InsertOrDie(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id)
+      LOCKS_EXCLUDED(mu_) {
+    std::pair<IdMapType::iterator, bool> result;
+    {
+      mutex_lock lock(mu_);
+      result = id_map_.insert({tf_gpu_id.value(), cuda_gpu_id.value()});
+    }
+    if (!result.second) {
+      CHECK_EQ(cuda_gpu_id.value(), result.first->second)
+          << "Mapping the same TfGpuId to a different CUDA GPU id."
+          << " TfGpuId: " << tf_gpu_id
+          << " Existing mapped CUDA GPU id: " << result.first->second
+          << " CUDA GPU id being tried to map to: " << cuda_gpu_id;
+    }
+  }
+
+  int32 FindOrDie(TfGpuId tf_gpu_id) const LOCKS_EXCLUDED(mu_) {
+    mutex_lock lock(mu_);
+    auto result = id_map_.find(tf_gpu_id.value());
+    CHECK(result != id_map_.end())
+        << "Could not find the mapping for TfGpuId: " << tf_gpu_id;
+    return result->second;
+  }
+
+ private:
+  using IdMapType = std::unordered_map<int32, int32>;
+  mutable mutex mu_;
+  IdMapType id_map_ GUARDED_BY(mu_);
+};
+}  // namespace
+
+void GpuIdUtil::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id,
+                                      CudaGpuId cuda_gpu_id) {
+  GpuIdManager::singleton()->InsertOrDie(tf_gpu_id, cuda_gpu_id);
+}
+
+CudaGpuId GpuIdUtil::TfToCudaGpuId(TfGpuId tf_gpu_id) {
+  return CudaGpuId(GpuIdManager::singleton()->FindOrDie(tf_gpu_id));
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h
new file mode 100644
index 0000000000..78e51c84c1
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h
@@ -0,0 +1,61 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_UTILS_H_
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_init.h"
+#include "tensorflow/core/lib/gtl/int_type.h"
+#include "tensorflow/core/platform/stream_executor.h"
+
+namespace tensorflow {
+namespace gpu = ::perftools::gputools;
+
+// Utility methods for translation between Tensorflow GPU ids and CUDA GPU ids.
+class GpuIdUtil {
+ public:
+  static void InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id);
+  static CudaGpuId TfToCudaGpuId(TfGpuId tf_gpu_id);
+
+  // Convenient methods for getting the associated executor given a TfGpuId or
+  // CudaGpuId.
+  static gpu::port::StatusOr<gpu::StreamExecutor*> ExecutorForCudaGpuId(
+      gpu::Platform* gpu_manager, CudaGpuId cuda_gpu_id) {
+    return gpu_manager->ExecutorForDevice(cuda_gpu_id.value());
+  }
+  static gpu::port::StatusOr<gpu::StreamExecutor*> ExecutorForCudaGpuId(
+      CudaGpuId cuda_gpu_id) {
+    return ExecutorForCudaGpuId(GPUMachineManager(), cuda_gpu_id);
+  }
+  static gpu::port::StatusOr<gpu::StreamExecutor*> ExecutorForTfGpuId(
+      TfGpuId tf_gpu_id) {
+    return ExecutorForCudaGpuId(GpuIdUtil::TfToCudaGpuId(tf_gpu_id));
+  }
+
+  // Verify that the cuda_gpu_id associated with a TfGpuId is legitimate.
+  static void CheckValidTfGpuId(TfGpuId tf_gpu_id) {
+    const CudaGpuId cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id);
+    const int visible_device_count = GPUMachineManager()->VisibleDeviceCount();
+    CHECK_LT(cuda_gpu_id.value(), visible_device_count)
+        << "cuda_gpu_id is outside discovered device range."
+        << " TF GPU id: " << tf_gpu_id << " CUDA GPU id: " << cuda_gpu_id
+        << " visible device count: " << visible_device_count;
+  }
+};
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_UTILS_H_
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils_test.cc b/tensorflow/core/common_runtime/gpu/gpu_id_utils_test.cc
new file mode 100644
index 0000000000..bebe00a431
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils_test.cc
@@ -0,0 +1,55 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace test {
+
+TEST(GpuIdTest, Basics) {
+  TfGpuId key_0(0);
+  CudaGpuId value_0(0);
+  GpuIdUtil::InsertTfCudaGpuIdPair(key_0, value_0);
+  EXPECT_EQ(value_0, GpuIdUtil::TfToCudaGpuId(key_0));
+
+  // Multiple calls to map the same value is ok.
+  GpuIdUtil::InsertTfCudaGpuIdPair(key_0, value_0);
+  EXPECT_EQ(value_0, GpuIdUtil::TfToCudaGpuId(key_0));
+
+  // Map a different TfGpuId to a different value.
+  TfGpuId key_1(3);
+  CudaGpuId value_1(2);
+  GpuIdUtil::InsertTfCudaGpuIdPair(key_1, value_1);
+  EXPECT_EQ(value_1, GpuIdUtil::TfToCudaGpuId(key_1));
+
+  // Mapping a different TfGpuId to the same value is ok.
+  TfGpuId key_2(10);
+  GpuIdUtil::InsertTfCudaGpuIdPair(key_2, value_1);
+  EXPECT_EQ(value_1, GpuIdUtil::TfToCudaGpuId(key_2));
+
+  // Mapping the same TfGpuId to a different value will crash the program.
+  ASSERT_DEATH(GpuIdUtil::InsertTfCudaGpuIdPair(key_2, value_0),
+               "Mapping the same TfGpuId to a different CUDA GPU id");
+
+  // Getting an nonexistent mapping will crash the program.
+  ASSERT_DEATH(GpuIdUtil::TfToCudaGpuId(TfGpuId(100)),
+               "Could not find the mapping for TfGpuId");
+}
+
+}  // namespace test
+}  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/process_state.cc b/tensorflow/core/common_runtime/gpu/process_state.cc
index 0675dbf3fc..8a3220ce2b 100644
--- a/tensorflow/core/common_runtime/gpu/process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/process_state.cc
@@ -20,6 +20,8 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/common_runtime/gpu/pool_allocator.h"
 #include "tensorflow/core/framework/allocator.h"
@@ -107,23 +109,20 @@ ProcessState::MemDesc ProcessState::PtrType(const void* ptr) {
   return MemDesc();
 }
 
-Allocator* ProcessState::GetGPUAllocator(const GPUOptions& options, int gpu_id,
+Allocator* ProcessState::GetGPUAllocator(const GPUOptions& options,
+                                         TfGpuId tf_gpu_id,
                                          size_t total_bytes) {
 #if GOOGLE_CUDA
   const string& allocator_type = options.allocator_type();
   mutex_lock lock(mu_);
-  gpu::Platform* gpu_platform = GPUMachineManager();
+  GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
 
-  // Verify that gpu_id is legitimate.
-  CHECK_LT(gpu_id, gpu_platform->VisibleDeviceCount())
-      << "gpu_id is outside discovered device range";
-
-  if (gpu_id >= static_cast<int64>(gpu_allocators_.size())) {
-    gpu_allocators_.resize(gpu_id + 1);
-    if (FLAGS_brain_gpu_record_mem_types) gpu_al_.resize(gpu_id + 1);
+  if (tf_gpu_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
+    gpu_allocators_.resize(tf_gpu_id.value() + 1);
+    if (FLAGS_brain_gpu_record_mem_types) gpu_al_.resize(tf_gpu_id.value() + 1);
   }
 
-  if (gpu_allocators_[gpu_id] == nullptr) {
+  if (gpu_allocators_[tf_gpu_id.value()] == nullptr) {
     VisitableAllocator* gpu_allocator;
 
     // Validate allocator types.
@@ -132,45 +131,49 @@ Allocator* ProcessState::GetGPUAllocator(const GPUOptions& options, int gpu_id,
       return nullptr;
     }
 
-    gpu_allocator = new GPUBFCAllocator(gpu_id, total_bytes, options);
+    const CudaGpuId cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id);
+    gpu_allocator =
+        new GPUBFCAllocator(cuda_gpu_id, total_bytes, options,
+                            strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc"));
 
     // If true, checks for memory overwrites by writing
     // distinctive patterns on both ends of allocated memory.
     if (useCudaMemoryGuardAllocator()) {
-      gpu_allocator = new GPUDebugAllocator(gpu_allocator, gpu_id);
-      gpu_allocator = new GPUNanResetAllocator(gpu_allocator, gpu_id);
+      gpu_allocator = new GPUDebugAllocator(gpu_allocator, cuda_gpu_id);
+      gpu_allocator = new GPUNanResetAllocator(gpu_allocator, cuda_gpu_id);
     } else if (useCudaMallocAllocator()) {
       // If true, passes all allocation requests through to cudaMalloc
       // useful for doing memory debugging with tools like cuda-memcheck
       // **WARNING** probably will not work in a multi-gpu scenario
-      gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, gpu_id);
+      gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id);
     }
-    gpu_allocators_[gpu_id] = gpu_allocator;
+    gpu_allocators_[tf_gpu_id.value()] = gpu_allocator;
 
     // If there are any pending AllocVisitors for this bus, add
     // them now.
     gpu::StreamExecutor* se =
-        gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie();
+        GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
     int bus_id = se->GetDeviceDescription().numa_node();
     if (bus_id >= 0 && bus_id < static_cast<int64>(gpu_visitors_.size())) {
       for (const auto& v : gpu_visitors_[bus_id]) {
-        gpu_allocators_[gpu_id]->AddAllocVisitor(v);
+        gpu_allocator->AddAllocVisitor(v);
       }
     }
     if (FLAGS_brain_gpu_record_mem_types) {
       MemDesc md;
       md.loc = MemDesc::GPU;
-      md.dev_index = gpu_id;
+      md.dev_index = cuda_gpu_id.value();
       md.gpu_registered = false;
       md.nic_registered = true;
-      if (static_cast<int64>(gpu_al_.size()) <= gpu_id)
-        gpu_al_.resize(gpu_id + 1);
-      gpu_al_[gpu_id] = new internal::RecordingAllocator(
-          &mem_desc_map_, gpu_allocators_[gpu_id], md, &mu_);
+      if (static_cast<int64>(gpu_al_.size()) <= tf_gpu_id.value()) {
+        gpu_al_.resize(tf_gpu_id.value() + 1);
+      }
+      gpu_al_[tf_gpu_id.value()] = new internal::RecordingAllocator(
+          &mem_desc_map_, gpu_allocator, md, &mu_);
     }
   }
-  if (FLAGS_brain_gpu_record_mem_types) return gpu_al_[gpu_id];
-  return gpu_allocators_[gpu_id];
+  if (FLAGS_brain_gpu_record_mem_types) return gpu_al_[tf_gpu_id.value()];
+  return gpu_allocators_[tf_gpu_id.value()];
 #else
   LOG(FATAL) << "GPUAllocator unavailable. Not compiled with --config=cuda.";
   return nullptr;
@@ -246,7 +249,7 @@ Allocator* ProcessState::GetCUDAHostAllocator(int numa_node) {
   gpu::StreamExecutor* se = nullptr;
   for (int i = 0; i < static_cast<int>(gpu_allocators_.size()); ++i) {
     if (gpu_allocators_[i] != nullptr) {
-      se = GPUMachineManager()->ExecutorForDevice(i).ValueOrDie();
+      se = GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie();
       break;
     }
   }
@@ -290,14 +293,12 @@ Allocator* ProcessState::GetCUDAHostAllocator(int numa_node) {
 void ProcessState::AddGPUAllocVisitor(int bus_id, AllocVisitor visitor) {
 #if GOOGLE_CUDA
   mutex_lock lock(mu_);
-  gpu::Platform* gpu_platform = GPUMachineManager();
-  for (int gpu_id = 0; gpu_id < static_cast<int64>(gpu_allocators_.size());
-       ++gpu_id) {
+  for (int i = 0; i < static_cast<int64>(gpu_allocators_.size()); ++i) {
     gpu::StreamExecutor* se =
-        gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie();
-    if (gpu_allocators_[gpu_id] &&
+        GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie();
+    if (gpu_allocators_[i] &&
         (se->GetDeviceDescription().numa_node() + 1) == bus_id) {
-      gpu_allocators_[gpu_id]->AddAllocVisitor(visitor);
+      gpu_allocators_[i]->AddAllocVisitor(visitor);
     }
   }
   while (bus_id >= static_cast<int64>(gpu_visitors_.size())) {
diff --git a/tensorflow/core/common_runtime/gpu/process_state.h b/tensorflow/core/common_runtime/gpu/process_state.h
index 319c508b92..fa1e3fd785 100644
--- a/tensorflow/core/common_runtime/gpu/process_state.h
+++ b/tensorflow/core/common_runtime/gpu/process_state.h
@@ -17,9 +17,11 @@ limitations under the License.
 #define TENSORFLOW_COMMON_RUNTIME_GPU_PROCESS_STATE_H_
 
 #include <functional>
+#include <map>
 #include <unordered_map>
 #include <vector>
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
@@ -80,17 +82,17 @@ class ProcessState {
   //
   // 'total_bytes' is the total number of bytes that should be made
   // available to the allocator.  The first call to this function for
-  // a given gpu_id creates the allocator, so only the total_bytes
+  // a given tf_gpu_id creates the allocator, so only the total_bytes
   // used on that first call is used.
   //
   // "Allocator type" describes the type of algorithm to use for the
   // underlying allocator.  REQUIRES: Must be a valid type (see
   // config.proto for the list of supported strings.).
   //
-  // REQUIRES: gpu_id must be a valid ordinal for a GPU available in the
+  // REQUIRES: tf_gpu_id must be a valid id for a BaseGPUDevice available in the
   // current system environment.  Otherwise returns nullptr.
-  virtual Allocator* GetGPUAllocator(const GPUOptions& options, int gpu_id,
-                                     size_t total_bytes);
+  virtual Allocator* GetGPUAllocator(const GPUOptions& options,
+                                     TfGpuId tf_gpu_id, size_t total_bytes);
 
   virtual Allocator* GetCUDAHostAllocator(int numa_node);
 
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index 1916316245..ccab69b9c0 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -42,18 +42,24 @@ message GPUOptions {
   // A comma-separated list of GPU ids that determines the 'visible'
   // to 'virtual' mapping of GPU devices.  For example, if TensorFlow
   // can see 8 GPU devices in the process, and one wanted to map
-  // visible GPU devices 5 and 3 as "/device:GPU:0", and "/device:GPU:1", then one
-  // would specify this field as "5,3".  This field is similar in
+  // visible GPU devices 5 and 3 as "/device:GPU:0", and "/device:GPU:1",
+  // then one would specify this field as "5,3".  This field is similar in
   // spirit to the CUDA_VISIBLE_DEVICES environment variable, except
   // it applies to the visible GPU devices in the process.
   //
-  // NOTE: The GPU driver provides the process with the visible GPUs
-  // in an order which is not guaranteed to have any correlation to
-  // the *physical* GPU id in the machine.  This field is used for
-  // remapping "visible" to "virtual", which means this operates only
-  // after the process starts.  Users are required to use vendor
-  // specific mechanisms (e.g., CUDA_VISIBLE_DEVICES) to control the
-  // physical to visible device mapping prior to invoking TensorFlow.
+  // NOTE:
+  // 1. The GPU driver provides the process with the visible GPUs
+  //    in an order which is not guaranteed to have any correlation to
+  //    the *physical* GPU id in the machine.  This field is used for
+  //    remapping "visible" to "virtual", which means this operates only
+  //    after the process starts.  Users are required to use vendor
+  //    specific mechanisms (e.g., CUDA_VISIBLE_DEVICES) to control the
+  //    physical to visible device mapping prior to invoking TensorFlow.
+  // 2. In the code, the ids in this list are also called "CUDA GPU id"s,
+  //    and the 'virtual' ids of GPU devices (i.e. the ids in the device
+  //    name "/device:GPU:<id>") are also called "TF GPU id"s. Please
+  //    refer to third_party/tensorflow/core/common_runtime/gpu/gpu_id.h
+  //    for more information.
   string visible_device_list = 5;
 
   // In the event polling loop sleep this many microseconds between
@@ -77,6 +83,52 @@ message GPUOptions {
   // memory is unpageable, having too much pinned memory might negatively impact
   // the overall host system performance.
   bool force_gpu_compatible = 8;
+
+  // Everything inside Experimental is subject to change and is not subject
+  // to API stability guarantees in
+  // https://www.tensorflow.org/programmers_guide/version_compat.
+  message Experimental {
+    // Configuration for breaking down a visible GPU into multiple "virtual"
+    // devices.
+    message VirtualDevices {
+      // Per "virtual" device memory limit, in MB. The number of elements in
+      // the list is the number of virtual devices to create on the
+      // corresponding visible GPU (see "virtual_devices" below).
+      // If empty, it will create single virtual device taking all available
+      // memory from the device.
+      //
+      // For the concept of "visible" and "virtual" GPU, see the comments for
+      // "visible_device_list" above for more information.
+      repeated float memory_limit_mb = 1;
+    }
+
+    // The multi virtual device settings. If empty (not set), it will create
+    // single virtual device on each visible GPU, according to the settings
+    // in "visible_device_list" above. Otherwise, the number of elements in the
+    // list must be the same as the number of visible GPUs (after
+    // "visible_device_list" filtering if it is set), and the string represented
+    // device names (e.g. /device:GPU:<id>) will refer to the virtual
+    // devices and have the <id> field assigned sequentially starting from 0,
+    // according to the order they appear in this list and the "memory_limit"
+    // list inside each element. For example,
+    //   visible_device_list = "1,0"
+    //   virtual_devices { memory_limit: 1GB memory_limit: 2GB }
+    //   virtual_devices {}
+    // will create three virtual devices as:
+    //   /device:GPU:0 -> visible GPU 1 with 1GB memory
+    //   /device:GPU:1 -> visible GPU 1 with 2GB memory
+    //   /device:GPU:2 -> visible GPU 0 with all available memory
+    //
+    // NOTE:
+    // 1. It's invalid to set both this and "per_process_gpu_memory_fraction"
+    //    at the same time.
+    // 2. Currently this setting is per-process, not per-session. Using
+    //    different settings in different sessions within same process will
+    //    result in undefined behavior.
+    repeated VirtualDevices virtual_devices = 1;
+  }
+
+  Experimental experimental = 9;
 };
 
 // Options passed to the graph optimizer
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index bb47acabf9..a40b87e84b 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3468,6 +3468,19 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "virtual_gpu_test",
+    size = "small",
+    srcs = ["client/virtual_gpu_test.py"],
+    additional_deps = [
+        ":client",
+        ":client_testlib",
+        ":framework_for_generated_wrappers",
+        ":math_ops",
+        "//tensorflow/core:protos_all_py",
+    ],
+)
+
 py_test(
     name = "graph_util_test",
     size = "small",
diff --git a/tensorflow/python/client/virtual_gpu_test.py b/tensorflow/python/client/virtual_gpu_test.py
new file mode 100644
index 0000000000..addf63474c
--- /dev/null
+++ b/tensorflow/python/client/virtual_gpu_test.py
@@ -0,0 +1,245 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for multiple virtual GPU support."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import random
+
+import numpy as np
+
+from google.protobuf import text_format
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging as logging
+
+
+class VirtualGpuTestUtil(object):
+
+  def __init__(self,
+               dim=1000,
+               num_ops=100,
+               virtual_devices_per_gpu=None,
+               device_probabilities=None):
+    self._dim = dim
+    self._num_ops = num_ops
+    if virtual_devices_per_gpu is None:
+      self._virtual_devices_per_gpu = [3]
+    else:
+      self._virtual_devices_per_gpu = virtual_devices_per_gpu
+    self._visible_device_list = [
+        i for i in range(len(self._virtual_devices_per_gpu))
+    ]
+    gpu_devices = [
+        ('/gpu:' + str(i)) for i in range(sum(self._virtual_devices_per_gpu))
+    ]
+    self.devices = ['/cpu:0'] + gpu_devices
+    self._num_devices = len(self.devices)
+    # Each virtual device gets 2GB memory.
+    self._mem_limits_mb = [
+        ([1 << 11] * i) for i in self._virtual_devices_per_gpu
+    ]
+    self.config = self._GetSessionConfig()
+
+    if device_probabilities is not None:
+      self._device_probabilities = list(device_probabilities)  # Deep copy
+      for i in range(1, self._num_devices):
+        self._device_probabilities[i] += self._device_probabilities[i - 1]
+    else:
+      # Each device gets same probability to be assigned an operation.
+      step = 1.0 / self._num_devices
+      self._device_probabilities = [
+          (x + 1) * step for x in range(self._num_devices)
+      ]
+    # To prevent rounding error causing problems.
+    self._device_probabilities[self._num_devices - 1] = 1.1
+
+    logging.info('dim: %d', self._dim)
+    logging.info('num_ops: %d', self._num_ops)
+    logging.info('visible_device_list: %s', str(self._visible_device_list))
+    logging.info('virtual_devices_per_gpu: %s',
+                 str(self._virtual_devices_per_gpu))
+    logging.info('mem_limits: %s', str(self._mem_limits_mb))
+    logging.info('devices: %s', str(self.devices))
+    logging.info('config: %s', text_format.MessageToString(self.config))
+    logging.info('device_probabilities: %s', str(self._device_probabilities))
+
+  # Creates virtual GPU devices
+  def _GetSessionConfig(self):
+    virtual_device_gpu_options = config_pb2.GPUOptions(
+        visible_device_list=','.join(str(d) for d in self._visible_device_list),
+        experimental=config_pb2.GPUOptions.Experimental(virtual_devices=[
+            config_pb2.GPUOptions.Experimental.VirtualDevices(
+                memory_limit_mb=i) for i in self._mem_limits_mb
+        ]))
+    return config_pb2.ConfigProto(gpu_options=virtual_device_gpu_options)
+
+  # Generates a list of 3-tuples, each tuple contains the source and destination
+  # device index for a binary operation like 'add', like:
+  # (src_devcie_1, src_device_2, dst_device)
+  def _GenerateOperationPlacement(self):
+    result = []
+    for unused_i in range(self._num_ops):
+      op_device = ()
+      for unused_j in range(3):
+        random_num = random.random()
+        for device_index in range(self._num_devices):
+          if self._device_probabilities[device_index] > random_num:
+            op_device += (device_index,)
+            break
+      result.append(op_device)
+    return result
+
+  # Logs part of the matrix for debugging purposes.
+  def _LogMatrix(self, mat, dim):
+    logging.info('---- printing the first 10*10 submatrix ----')
+    for i in range(min(10, dim)):
+      row = ''
+      for j in range(min(10, dim)):
+        row += ' ' + str(mat[i][j])
+      logging.info(row)
+
+  # Runs a list of 'add' operations where each operation satisfies the device
+  # placement constraints in `op_placement`, and returns the result.
+  def _TestRandomGraphWithDevices(self,
+                                  sess,
+                                  seed,
+                                  op_placement,
+                                  devices,
+                                  debug_mode=False):
+    data = []
+    shape = (self._dim, self._dim)
+    feed_dict = {}
+    # Initialize the matrices
+    for i in range(len(devices)):
+      with ops.device(devices[i]):
+        var = array_ops.placeholder(dtypes.float32, shape=shape)
+        np.random.seed(seed + i)
+        feed_dict[var] = np.random.uniform(
+            low=0, high=0.1, size=shape).astype(np.float32)
+        data.append(var)
+    # Run the 'add' operations on those matrices
+    for op in op_placement:
+      with ops.device(devices[op[2]]):
+        data[op[2]] = math_ops.add(data[op[0]], data[op[1]])
+    with ops.device('/cpu:0'):
+      s = data[0]
+      for i in range(1, len(data)):
+        s = math_ops.add(s, data[i])
+    if debug_mode:
+      logging.info(ops.get_default_graph().as_graph_def())
+    result = sess.run(s, feed_dict=feed_dict)
+    self._LogMatrix(result, self._dim)
+    return result
+
+  # Generates a random graph with `self._num_ops` 'add' operations with each
+  # operation placed on different virtual device, test that the result is
+  # identical to the result obtained by running the same graph on cpu only.
+  def TestRandomGraph(self, sess, op_placement=None, random_seed=None):
+    debug_mode = False
+    if op_placement is None:
+      op_placement = self._GenerateOperationPlacement()
+    else:
+      debug_mode = True
+    if random_seed is None:
+      random_seed = random.randint(0, 1 << 31)
+    else:
+      debug_mode = True
+    logging.info('Virtual gpu functional test for random graph...')
+    logging.info('operation placement: %s', str(op_placement))
+    logging.info('random seed: %d', random_seed)
+
+    # Run with multiple virtual gpus.
+    result_vgd = self._TestRandomGraphWithDevices(
+        sess, random_seed, op_placement, self.devices, debug_mode=debug_mode)
+    # Run with single cpu.
+    result_cpu = self._TestRandomGraphWithDevices(
+        sess,
+        random_seed,
+        op_placement, ['/cpu:0'] * self._num_devices,
+        debug_mode=debug_mode)
+    # Test the result
+    for i in range(self._dim):
+      for j in range(self._dim):
+        if result_vgd[i][j] != result_cpu[i][j]:
+          logging.error(
+              'Result mismatch at row %d column %d: expected %f, actual %f', i,
+              j, result_cpu[i][j], result_vgd[i][j])
+          logging.error('Devices: %s', self.devices)
+          logging.error('Memory limits (in MB): %s', self._mem_limits_mb)
+          return False
+    return True
+
+
+@test_util.with_c_api
+class VirtualGpuTest(test_util.TensorFlowTestCase):
+
+  def __init__(self, method_name):
+    super(VirtualGpuTest, self).__init__(method_name)
+    self._util = VirtualGpuTestUtil()
+
+  def testStatsContainAllDeviceNames(self):
+    with self.test_session(config=self._util.config) as sess:
+      # TODO(laigd): b/70811538. The is_gpu_available() call will invoke
+      # DeviceFactory::AddDevices() with a default SessionOption, which prevents
+      # adding virtual devices in the future, thus must be called within a
+      # context of a session within which virtual devices are created. Same in
+      # the following test case.
+      if not test.is_gpu_available(cuda_only=True):
+        self.skipTest('No GPU available')
+      run_options = config_pb2.RunOptions(
+          trace_level=config_pb2.RunOptions.FULL_TRACE)
+      run_metadata = config_pb2.RunMetadata()
+
+      mat_shape = [10, 10]
+      data = []
+      for d in self._util.devices:
+        with ops.device(d):
+          var = variables.Variable(random_ops.random_uniform(mat_shape))
+          sess.run(var.initializer)
+          data.append(var)
+      s = data[0]
+      for i in range(1, len(data)):
+        s = math_ops.add(s, data[i])
+      sess.run(s, options=run_options, run_metadata=run_metadata)
+
+    self.assertTrue(run_metadata.HasField('step_stats'))
+    step_stats = run_metadata.step_stats
+    devices = [d.device for d in step_stats.dev_stats]
+    self.assertTrue('/job:localhost/replica:0/task:0/device:CPU:0' in devices)
+    self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:0' in devices)
+    self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:1' in devices)
+    self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:2' in devices)
+
+  def testLargeRandomGraph(self):
+    with self.test_session(config=self._util.config) as sess:
+      if not test.is_gpu_available(cuda_only=True):
+        self.skipTest('No GPU available')
+      for _ in range(10):
+        if not self._util.TestRandomGraph(sess):
+          return
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt b/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt
index 30f7e4e116..875d802a9c 100644
--- a/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt
@@ -18,6 +18,14 @@ tf_class {
     name: "DESCRIPTOR"
     mtype: "<type \'google.protobuf.pyext._message.MessageDescriptor\'>"
   }
+  member {
+    name: "EXPERIMENTAL_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "Experimental"
+    mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
+  }
   member {
     name: "Extensions"
     mtype: "<type \'getset_descriptor\'>"
diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index a8fdf4c9a0..1fa4e85e02 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py
@@ -256,6 +256,7 @@ class ApiCompatibilityTest(test.TestCase):
 
     public_api_visitor = public_api.PublicAPIVisitor(visitor)
     public_api_visitor.do_not_descend_map['tf'].append('contrib')
+    public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental']
     traverse.traverse(tf, public_api_visitor)
 
     proto_dict = visitor.GetProtos()
-- 
GitLab


From e1d4eea1ee9a8dd81c2327dd57d3a0eea4099924 Mon Sep 17 00:00:00 2001
From: HyoukJoong Lee <hyouklee@google.com>
Date: Mon, 18 Dec 2017 20:48:26 -0800
Subject: [PATCH 1194/1225] Changed the buffer liveness such that the buffers
 pointed to by the root instruction have liveness to the end of the
 computation.

PiperOrigin-RevId: 179506342
---
 .../compiler/xla/service/buffer_liveness.cc   | 12 +++++-
 .../xla/service/buffer_liveness_test.cc       | 42 +++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/buffer_liveness.cc b/tensorflow/compiler/xla/service/buffer_liveness.cc
index 513bfa3b7f..e7749252ce 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness.cc
+++ b/tensorflow/compiler/xla/service/buffer_liveness.cc
@@ -102,8 +102,8 @@ bool BufferLiveness::live_range_strictly_before(const LogicalBuffer& a,
     return false;
   }
 
-  // Every user of 'a' must be a predecessor of 'b' or 'b' itself.
   for (const BufferAlias& alias : points_to_analysis_->GetBufferAliases(a)) {
+    // Every user of 'a' must be a predecessor of 'b' or 'b' itself.
     for (auto user : alias.instruction()->users()) {
       if (DoesNotUseOperandBuffer(alias.instruction(), alias.index(), user,
                                   points_to_analysis())) {
@@ -114,6 +114,16 @@ bool BufferLiveness::live_range_strictly_before(const LogicalBuffer& a,
         return false;
       }
     }
+
+    // If the root instruction aliases the buffer 'a', the live range of 'a' is
+    // until the end of the computation and can never be strictly before another
+    // buffer. This is needed to prevent the root instruction's buffers from
+    // being reused by later instructions even when the root is not the last
+    // instruction in the schedule.
+    if (alias.instruction()->parent()->root_instruction() ==
+        alias.instruction()) {
+      return false;
+    }
   }
 
   // If 'b' is a user of 'a' then the buffers interfere unless 'a.instruction'
diff --git a/tensorflow/compiler/xla/service/buffer_liveness_test.cc b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
index 13825fe05b..f623aef67a 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
@@ -311,6 +311,48 @@ TEST_F(BufferLivenessTest, OverlappedBuffersSequentialOrder) {
   EXPECT_FALSE(InstructionsMayInterfere(*liveness, add, exp));
 }
 
+TEST_F(BufferLivenessTest, RootInstructionIsNotLastInSequentialOrder) {
+  // Tests that when the root instruction is not the last instruction in the
+  // schedule, the live range of its buffers interfere with the buffers of the
+  // later instructions.
+  //
+  // Two sets of independent instructions are executed in the computation.
+  // param --> add (root)
+  // recv --> recv-done --> send --> send-done
+  //
+  // Sequential order:
+  //  param, add (root), recv, recv-done, send, send-done
+  auto builder = HloComputation::Builder(TestName());
+  auto param =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param"));
+  auto add = builder.AddInstruction(
+      HloInstruction::CreateBinary(vec_, HloOpcode::kAdd, param, param));
+  auto recv = builder.AddInstruction(
+      HloInstruction::CreateRecv(vec_, /*channel_id=*/0));
+  auto recv_done = builder.AddInstruction(HloInstruction::CreateRecvDone(recv));
+  auto send = builder.AddInstruction(
+      HloInstruction::CreateSend(recv_done, /*channel_id=*/1));
+  auto send_done = builder.AddInstruction(HloInstruction::CreateSendDone(send));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build(add));
+
+  SequentialHloOrdering::HloModuleSequence module_sequence;
+  std::vector<const HloInstruction*> order = {param,     add,  recv,
+                                              recv_done, send, send_done};
+  module_sequence.emplace(computation, order);
+  auto liveness =
+      BufferLiveness::Run(module.get(), xla::MakeUnique<SequentialHloOrdering>(
+                                            module.get(), module_sequence))
+          .ConsumeValueOrDie();
+
+  EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, add));
+  // Check the root instruction (add) buffer interferes with the recv buffer.
+  EXPECT_TRUE(
+      liveness->MayInterfere(GetBuffer(*liveness, add, /*index=*/{}),
+                             GetBuffer(*liveness, recv, /*index=*/{0})));
+}
+
 TEST_F(BufferLivenessTest, TupleLiveOut) {
   // Verify MaybeLiveOut with nested tuples. Result of computation looks like:
   //
-- 
GitLab


From 92aafb5f6b833483517306b71d1a76c9edd33bf9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Tue, 19 Dec 2017 13:14:59 +0800
Subject: [PATCH 1195/1225] TST: add test case

---
 tensorflow/python/layers/base_test.py | 60 +++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py
index 3e5a51eb62..63b9d3cb38 100644
--- a/tensorflow/python/layers/base_test.py
+++ b/tensorflow/python/layers/base_test.py
@@ -474,6 +474,66 @@ class BaseLayerTest(test.TestCase):
     layer.apply(x)
     self.assertEqual(len(layer.get_losses_for(x)), 1)
 
+  def testNameScopeIsConsistentWithVariableScope(self):
+    # Github issue 13429.
+
+    class MyLayer(base_layers.Layer):
+
+      def build(self, input_shape):
+        self.my_var = self.add_variable('my_var', (), dtypes.float32)
+        self.built = True
+
+      def call(self, inputs):
+        return math_ops.multiply(inputs, self.my_var, name='my_op')
+
+    def _gen_layer(x, name=None):
+      layer = MyLayer(name=name)
+      out = layer.apply(x)
+      return layer, out
+
+    # unnamed layer
+    with ops.Graph().as_default():
+      x = array_ops.placeholder(dtypes.float32, (), 'x')
+
+      layer, op = _gen_layer(x)
+      layer1, op1 = _gen_layer(op)
+      layer2, op2 = _gen_layer(op1)
+
+      self.assertEqual(layer.my_var.name, 'my_layer/my_var:0')
+      self.assertEqual(op.name, 'my_layer/my_op:0')
+      self.assertEqual(layer1.my_var.name, 'my_layer_1/my_var:0')
+      self.assertEqual(op1.name, 'my_layer_1/my_op:0')
+      self.assertEqual(layer2.my_var.name, 'my_layer_2/my_var:0')
+      self.assertEqual(op2.name, 'my_layer_2/my_op:0')
+    # name starts from zero
+    with ops.Graph().as_default():
+      x = array_ops.placeholder(dtypes.float32, (), 'x')
+
+      layer, op = _gen_layer(x, name='name')
+      layer1, op1 = _gen_layer(op, name='name_1')
+      layer2, op2 = _gen_layer(op1, name='name_2')
+
+      self.assertEqual(layer.my_var.name, 'name/my_var:0')
+      self.assertEqual(op.name, 'name/my_op:0')
+      self.assertEqual(layer1.my_var.name, 'name_1/my_var:0')
+      self.assertEqual(op1.name, 'name_1/my_op:0')
+      self.assertEqual(layer2.my_var.name, 'name_2/my_var:0')
+      self.assertEqual(op2.name, 'name_2/my_op:0')
+    # name starts from one
+    with ops.Graph().as_default():
+      x = array_ops.placeholder(dtypes.float32, (), 'x')
+
+      layer, op = _gen_layer(x, name='name_1')
+      layer1, op1 = _gen_layer(op, name='name_2')
+      layer2, op2 = _gen_layer(op1, name='name_3')
+
+      self.assertEqual(layer.my_var.name, 'name_1/my_var:0')
+      self.assertEqual(op.name, 'name_1/my_op:0')
+      self.assertEqual(layer1.my_var.name, 'name_2/my_var:0')
+      self.assertEqual(op1.name, 'name_2/my_op:0')
+      self.assertEqual(layer2.my_var.name, 'name_3/my_var:0')
+      self.assertEqual(op2.name, 'name_3/my_op:0')
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From a5aa4191f375beb7f21d69c565d2ec13e9adbc0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Tue, 19 Dec 2017 13:38:23 +0800
Subject: [PATCH 1196/1225] BUG: fix name scope collision in layer

---
 tensorflow/python/layers/base.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 4c92aac915..acbbb21322 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -493,7 +493,8 @@ class Layer(object):
 
     self._set_scope(None)
     with vs.variable_scope(
-        self._scope, reuse=(self.built or self._reuse)) as scope:
+        self._scope, reuse=(self.built or self._reuse),
+        auxiliary_name_scope=False) as scope:
       with ops.name_scope(self._name_scope_name(scope)):
         variable = vs.get_variable(name,
                                    shape=shape,
@@ -602,11 +603,11 @@ class Layer(object):
         # variable scope with this setting. We avoid re-creating variable scopes
         # after this point as an optimization.
         self._always_reuse_variable_scope = vs.variable_scope(
-            self._scope, reuse=True)
+            self._scope, reuse=True, auxiliary_name_scope=False)
         scope_context_manager = self._always_reuse_variable_scope
     else:
       scope_context_manager = vs.variable_scope(
-          self._scope, reuse=self._reuse)
+          self._scope, reuse=self._reuse, auxiliary_name_scope=False)
     with scope_context_manager as scope:
       with ops.name_scope(self._name_scope_name(scope)):
         if not self.built:
-- 
GitLab


From 740cb2fa204c5bd6438fec3e9bf61cb65689575c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Tue, 19 Dec 2017 13:45:02 +0800
Subject: [PATCH 1197/1225] CLN: clean code

---
 tensorflow/python/layers/base_test.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py
index 63b9d3cb38..42c3693922 100644
--- a/tensorflow/python/layers/base_test.py
+++ b/tensorflow/python/layers/base_test.py
@@ -476,7 +476,6 @@ class BaseLayerTest(test.TestCase):
 
   def testNameScopeIsConsistentWithVariableScope(self):
     # Github issue 13429.
-
     class MyLayer(base_layers.Layer):
 
       def build(self, input_shape):
@@ -494,7 +493,6 @@ class BaseLayerTest(test.TestCase):
     # unnamed layer
     with ops.Graph().as_default():
       x = array_ops.placeholder(dtypes.float32, (), 'x')
-
       layer, op = _gen_layer(x)
       layer1, op1 = _gen_layer(op)
       layer2, op2 = _gen_layer(op1)
@@ -508,7 +506,6 @@ class BaseLayerTest(test.TestCase):
     # name starts from zero
     with ops.Graph().as_default():
       x = array_ops.placeholder(dtypes.float32, (), 'x')
-
       layer, op = _gen_layer(x, name='name')
       layer1, op1 = _gen_layer(op, name='name_1')
       layer2, op2 = _gen_layer(op1, name='name_2')
@@ -522,7 +519,6 @@ class BaseLayerTest(test.TestCase):
     # name starts from one
     with ops.Graph().as_default():
       x = array_ops.placeholder(dtypes.float32, (), 'x')
-
       layer, op = _gen_layer(x, name='name_1')
       layer1, op1 = _gen_layer(op, name='name_2')
       layer2, op2 = _gen_layer(op1, name='name_3')
-- 
GitLab


From c475eba63a9c0ec9913f1baeecf8a31aca805046 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Dec 2017 23:07:13 -0800
Subject: [PATCH 1198/1225] [XLA] Adding more tests for conditional.

PiperOrigin-RevId: 179514274
---
 .../compiler/xla/tests/conditional_test.cc    | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/tensorflow/compiler/xla/tests/conditional_test.cc b/tensorflow/compiler/xla/tests/conditional_test.cc
index cbfacaea53..c8c4932be8 100644
--- a/tensorflow/compiler/xla/tests/conditional_test.cc
+++ b/tensorflow/compiler/xla/tests/conditional_test.cc
@@ -133,6 +133,93 @@ XLA_TEST_F(ConditionalOpTest, Parameters1) {
   ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
 }
 
+// Test conditional with two different computations in the true and false cases
+// that take in different arguments.
+XLA_TEST_F(ConditionalOpTest, DiffComputationsDiffArgs) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.4f);
+  auto operand2 = builder.ConstantR0<float>(12.6f);
+  auto result =
+      builder.Conditional(pred, operand1, CreateR0F32CeilComputation(),
+                          operand2, CreateR0F32FloorComputation());
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test conditional with two different computations in the true and false cases
+// that take in the same arguments.
+XLA_TEST_F(ConditionalOpTest, DiffComputationsSameArg) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand = builder.ConstantR0<float>(12.6f);
+  auto result = builder.Conditional(pred, operand, CreateR0F32CeilComputation(),
+                                    operand, CreateR0F32FloorComputation());
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test conditional with the same computation in the true and false cases but
+// take in different arguments.
+XLA_TEST_F(ConditionalOpTest, SameComputationDiffArgs) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.4f);
+  auto operand2 = builder.ConstantR0<float>(12.6f);
+  auto floor = CreateR0F32FloorComputation();
+  auto result = builder.Conditional(pred, operand1, floor, operand2, floor);
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test conditional with the same computation in the true and false cases that
+// take in the same arguments.
+XLA_TEST_F(ConditionalOpTest, SameComputationSameArg) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand = builder.ConstantR0<float>(12.6f);
+  auto floor = CreateR0F32FloorComputation();
+  auto result = builder.Conditional(pred, operand, floor, operand, floor);
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test conditional with different instances of the same computation in the true
+// and false cases.
+XLA_TEST_F(ConditionalOpTest, SameComputationDiffInstances) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.4f);
+  auto operand2 = builder.ConstantR0<float>(12.6f);
+  auto result =
+      builder.Conditional(pred, operand1, CreateR0F32FloorComputation(),
+                          operand2, CreateR0F32FloorComputation());
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test the case when a call invokes a computation that contains a conditional.
+XLA_TEST_F(ConditionalOpTest, ConditionalWithCall) {
+  Shape r0bool = ShapeUtil::MakeShape(PRED, {});
+  ComputationBuilder inner_builder(client_, TestName() + ".inner_conditional");
+  auto pred_cond = inner_builder.Parameter(0, r0bool, "param0");
+  auto true_operand = inner_builder.Parameter(1, r0f32_, "param1");
+  auto false_operand = inner_builder.Parameter(2, r0f32_, "param2");
+  inner_builder.Conditional(pred_cond, true_operand,
+                            CreateR0F32CeilComputation(), false_operand,
+                            CreateR0F32FloorComputation());
+  auto inner_builder_result = inner_builder.Build();
+
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.4f);
+  auto operand2 = builder.ConstantR0<float>(12.6f);
+  builder.Call(inner_builder_result.ConsumeValueOrDie(),
+               {pred, operand1, operand2});
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
 // Test true and false computations that take in 2 parameters and predicate is
 // true.
 XLA_TEST_F(ConditionalOpTest, Parameters2TrueBranch) {
-- 
GitLab


From 1d7d8667d19e61bc65f35a6dae33563b2acadaac Mon Sep 17 00:00:00 2001
From: ManHyuk <manhyuk@kw.ac.kr>
Date: Tue, 19 Dec 2017 19:33:03 +0900
Subject: [PATCH 1199/1225] fix typos

---
 tensorflow/compiler/tf2xla/xla_context.h                 | 2 +-
 tensorflow/compiler/xla/service/gpu/while_transformer.cc | 2 +-
 tensorflow/stream_executor/dnn.h                         | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/xla_context.h b/tensorflow/compiler/tf2xla/xla_context.h
index ebd758d154..1a7dafe8cd 100644
--- a/tensorflow/compiler/tf2xla/xla_context.h
+++ b/tensorflow/compiler/tf2xla/xla_context.h
@@ -116,7 +116,7 @@ class XlaContext : public ResourceBase {
   const bool allow_cpu_custom_calls_;
 
   // If true, constant return values are returned as Tensors instead of
-  // run-time computation outptus.
+  // run-time computation outputs.
   const bool resolve_compile_time_constants_;
 
   // Arguments to the Tensorflow graph, indexed by _Arg index.
diff --git a/tensorflow/compiler/xla/service/gpu/while_transformer.cc b/tensorflow/compiler/xla/service/gpu/while_transformer.cc
index ccdd171759..ab94d7d543 100644
--- a/tensorflow/compiler/xla/service/gpu/while_transformer.cc
+++ b/tensorflow/compiler/xla/service/gpu/while_transformer.cc
@@ -44,7 +44,7 @@ namespace {
 //
 //            Parameter
 //               |
-//   Const  GetTupleElemet
+//   Const  GetTupleElement
 //      \   /
 //       Add (root)
 //
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 07314a0ff7..f4162b0962 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -2024,7 +2024,7 @@ class DnnSupport {
   //  output_h_desc: descriptor for the output "h" state.
   //  output_h_data: the memory region that stores the output "h" data.
   //  output_c_desc: descriptor for the output "c" state.
-  //  output_c_data: the memory region that stores the outptu "c" data. This
+  //  output_c_data: the memory region that stores the output "c" data. This
   //    must be specified for LSTM models.
   //  is_training: whether this is used in training or inference. That decides
   //    whether respace_space data need to be produced.
@@ -2033,7 +2033,7 @@ class DnnSupport {
   //  retains the data and feed it to the backward pass.
   //  workspace_allocator: an allocator to create temporary workspace used in
   //    this kernel. The caller is responsible for retaining the memory long
-  //    enough for the lifespan of this operation, and recycles aftewards.
+  //    enough for the lifespan of this operation, and recycles afterwards.
   virtual bool DoRnnForward(Stream* stream, const dnn::RnnDescriptor& rnn_desc,
                             const dnn::RnnSequenceTensorDescriptor& input_desc,
                             const DeviceMemory<Eigen::half>& input_data,
@@ -2112,7 +2112,7 @@ class DnnSupport {
   //  output_h_desc: descriptor for the output "h" state.
   //  output_h_data: the memory region that stores the output "h" data.
   //  output_c_desc: descriptor for the output "c" state.
-  //  output_c_data: the memory region that stores the outptu "c" data. This
+  //  output_c_data: the memory region that stores the output "c" data. This
   //    must be specified for LSTM models.
   //  output_backprop_data: the device memory region that contains the backprop
   //    to the output sequence.
-- 
GitLab


From 3ce38884954e33e249eedf4fda86585f6f263736 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 19 Dec 2017 02:55:10 -0800
Subject: [PATCH 1200/1225] Avoid casting to/from int64 in
 Categorical._sample_n

PiperOrigin-RevId: 179529787
---
 tensorflow/python/ops/distributions/categorical.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/distributions/categorical.py b/tensorflow/python/ops/distributions/categorical.py
index 84ca6db4c4..2046a08d61 100644
--- a/tensorflow/python/ops/distributions/categorical.py
+++ b/tensorflow/python/ops/distributions/categorical.py
@@ -263,11 +263,12 @@ class Categorical(distribution.Distribution):
       logits_2d = self.logits
     else:
       logits_2d = array_ops.reshape(self.logits, [-1, self.event_size])
-    draws = random_ops.multinomial(logits_2d, n, seed=seed)
+    draws = random_ops.multinomial(
+        logits_2d, n, seed=seed, output_dtype=self.dtype)
     draws = array_ops.reshape(
         array_ops.transpose(draws),
         array_ops.concat([[n], self.batch_shape_tensor()], 0))
-    return math_ops.cast(draws, self.dtype)
+    return draws
 
   def _cdf(self, k):
     k = ops.convert_to_tensor(k, name="k")
-- 
GitLab


From db76da170ad12e9f197b267282291d18dbf02263 Mon Sep 17 00:00:00 2001
From: Jiongyan Zhang <qmick@live.cn>
Date: Tue, 19 Dec 2017 19:33:42 +0800
Subject: [PATCH 1201/1225] fix issue building memory_stats with opencl

---
 tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc b/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc
index 7e2e96e160..39c0d5af45 100644
--- a/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc
+++ b/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc
@@ -59,7 +59,7 @@ REGISTER_KERNEL_BUILDER(Name("BytesInUse").Device(DEVICE_GPU).HostMemory("out"),
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER_KERNEL_BUILDER(
-    Name("BytesInUse").Device(DEVICE_SYCL).HostMemory("out"), MaxBytesInUseOp);
+    Name("BytesInUse").Device(DEVICE_SYCL).HostMemory("out"), BytesInUseOp);
 #endif  // TENSORFLOW_USE_SYCL
 
 // Op that measures the total memory (in bytes) of a device.
-- 
GitLab


From 39e0696ae0797f966b6c5221c072619ff2b9b097 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 19 Dec 2017 20:47:56 +0800
Subject: [PATCH 1202/1225] Fix a compile error in file_block_cache_test.cc

---
 tensorflow/core/platform/cloud/file_block_cache_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/platform/cloud/file_block_cache_test.cc b/tensorflow/core/platform/cloud/file_block_cache_test.cc
index 081b32af64..bebed5af10 100644
--- a/tensorflow/core/platform/cloud/file_block_cache_test.cc
+++ b/tensorflow/core/platform/cloud/file_block_cache_test.cc
@@ -456,7 +456,7 @@ TEST(FileBlockCacheTest, CoalesceConcurrentReads) {
   FileBlockCache cache(block_size, block_size, 0, fetcher);
   // Fork off thread for parallel read.
   std::unique_ptr<Thread> concurrent(
-      Env::Default()->StartThread({}, "concurrent", [&cache] {
+      Env::Default()->StartThread({}, "concurrent", [&cache, block_size] {
         std::vector<char> out;
         TF_EXPECT_OK(cache.Read("", 0, block_size / 2, &out));
         EXPECT_EQ(out.size(), block_size / 2);
-- 
GitLab


From 2ab233c4b9d6c815bb641d0e2ec833931127eddf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 19 Dec 2017 05:36:22 -0800
Subject: [PATCH 1203/1225] Update ops-related pbtxt files.

PiperOrigin-RevId: 179542009
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 39 +++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 47 ++++++++++++++++++-
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 713f6842d9..e57f1d7238 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -42752,6 +42752,45 @@ op {
     type: DT_STRING
   }
 }
+op {
+  name: "ShuffleAndRepeatDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "count"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "ShuffleDataset"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 284455ee54..6382a2fb79 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -26590,6 +26590,51 @@ op {
   }
   summary: "Generate a glob pattern matching all sharded file names."
 }
+op {
+  name: "ShuffleAndRepeatDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "buffer_size"
+    description: "The number of output elements to buffer in an iterator over\nthis dataset. Compare with the `min_after_dequeue` attr when creating a\n`RandomShuffleQueue`."
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed"
+    description: "A scalar seed for the random number generator. If either `seed` or\n`seed2` is set to be non-zero, the random number generator is seeded\nby the given seed.  Otherwise, a random seed is used."
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    description: "A second scalar seed to avoid seed collision."
+    type: DT_INT64
+  }
+  input_arg {
+    name: "count"
+    description: "A scalar representing the number of times the underlying dataset\nshould be repeated. The default is `-1`, which results in infinite repetition."
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  summary: "Creates a dataset that shuffles and repeats elements from `input_dataset`"
+  description: "pseudorandomly."
+}
 op {
   name: "ShuffleDataset"
   input_arg {
@@ -26603,7 +26648,7 @@ op {
   }
   input_arg {
     name: "seed"
-    description: "A scalar seed for the random number generator. If either seed or\nseed2 is set to be non-zero, the random number generator is seeded\nby the given seed.  Otherwise, a random seed is used."
+    description: "A scalar seed for the random number generator. If either `seed` or\n`seed2` is set to be non-zero, the random number generator is seeded\nby the given seed.  Otherwise, a random seed is used."
     type: DT_INT64
   }
   input_arg {
-- 
GitLab


From 84cef7bfbff4d7dc8e4b34926adfbce4e92df081 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 19 Dec 2017 05:48:13 -0800
Subject: [PATCH 1204/1225] Go: Update generated wrapper functions for
 TensorFlow ops.

PiperOrigin-RevId: 179542713
---
 tensorflow/go/op/wrappers.go | 37 ++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 314663a814..091f64de70 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -5884,6 +5884,39 @@ func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, out
 	return op.Output(0)
 }
 
+// Creates a dataset that shuffles and repeats elements from `input_dataset`
+//
+// pseudorandomly.
+//
+// Arguments:
+//
+//	buffer_size: The number of output elements to buffer in an iterator over
+// this dataset. Compare with the `min_after_dequeue` attr when creating a
+// `RandomShuffleQueue`.
+//	seed: A scalar seed for the random number generator. If either `seed` or
+// `seed2` is set to be non-zero, the random number generator is seeded
+// by the given seed.  Otherwise, a random seed is used.
+//	seed2: A second scalar seed to avoid seed collision.
+//	count: A scalar representing the number of times the underlying dataset
+// should be repeated. The default is `-1`, which results in infinite repetition.
+//
+//
+func ShuffleAndRepeatDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ShuffleAndRepeatDataset",
+		Input: []tf.Input{
+			input_dataset, buffer_size, seed, seed2, count,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Creates a Dataset that returns pseudorandom numbers.
 //
 // Arguments:
@@ -6379,8 +6412,8 @@ func ShuffleDatasetReshuffleEachIteration(value bool) ShuffleDatasetAttr {
 //	buffer_size: The number of output elements to buffer in an iterator over
 // this dataset. Compare with the `min_after_dequeue` attr when creating a
 // `RandomShuffleQueue`.
-//	seed: A scalar seed for the random number generator. If either seed or
-// seed2 is set to be non-zero, the random number generator is seeded
+//	seed: A scalar seed for the random number generator. If either `seed` or
+// `seed2` is set to be non-zero, the random number generator is seeded
 // by the given seed.  Otherwise, a random seed is used.
 //	seed2: A second scalar seed to avoid seed collision.
 //
-- 
GitLab


From 9e3e6475b97a3ec67ad63e0e39024fd7dd36da9a Mon Sep 17 00:00:00 2001
From: Malcolm Reynolds <mareynolds@google.com>
Date: Tue, 19 Dec 2017 06:36:25 -0800
Subject: [PATCH 1205/1225] Automated g4 rollback of changelist 179529787

PiperOrigin-RevId: 179546698
---
 tensorflow/python/ops/distributions/categorical.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/ops/distributions/categorical.py b/tensorflow/python/ops/distributions/categorical.py
index 2046a08d61..84ca6db4c4 100644
--- a/tensorflow/python/ops/distributions/categorical.py
+++ b/tensorflow/python/ops/distributions/categorical.py
@@ -263,12 +263,11 @@ class Categorical(distribution.Distribution):
       logits_2d = self.logits
     else:
       logits_2d = array_ops.reshape(self.logits, [-1, self.event_size])
-    draws = random_ops.multinomial(
-        logits_2d, n, seed=seed, output_dtype=self.dtype)
+    draws = random_ops.multinomial(logits_2d, n, seed=seed)
     draws = array_ops.reshape(
         array_ops.transpose(draws),
         array_ops.concat([[n], self.batch_shape_tensor()], 0))
-    return draws
+    return math_ops.cast(draws, self.dtype)
 
   def _cdf(self, k):
     k = ops.convert_to_tensor(k, name="k")
-- 
GitLab


From b61de902345da2a2a2320159f415a707844b1605 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 19 Dec 2017 07:40:36 -0800
Subject: [PATCH 1206/1225] Run ResizeArea tests on random data rather than all
 0s.

PiperOrigin-RevId: 179552297
---
 tensorflow/core/kernels/resize_area_op_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/resize_area_op_test.cc b/tensorflow/core/kernels/resize_area_op_test.cc
index cc5244d3a0..a7e06ef15a 100644
--- a/tensorflow/core/kernels/resize_area_op_test.cc
+++ b/tensorflow/core/kernels/resize_area_op_test.cc
@@ -41,7 +41,7 @@ class ResizeAreaOpTest : public OpsTestBase {
     bool is_ref = IsRefType(input_types_[inputs_.size()]);
     Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()),
                                DataTypeToEnum<float>::v(), shape);
-    input->flat<float>().setZero();
+    input->flat<float>().setRandom();
     tensors_.push_back(input);
     if (is_ref) {
       CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]),
-- 
GitLab


From 2071f7ff4fe13d0b5a7b8d9dceaeb3c211e37199 Mon Sep 17 00:00:00 2001
From: HyoukJoong Lee <hyouklee@google.com>
Date: Tue, 19 Dec 2017 07:43:14 -0800
Subject: [PATCH 1207/1225] Automated g4 rollback of changelist 179506342

PiperOrigin-RevId: 179552496
---
 .../compiler/xla/service/buffer_liveness.cc   | 12 +-----
 .../xla/service/buffer_liveness_test.cc       | 42 -------------------
 2 files changed, 1 insertion(+), 53 deletions(-)

diff --git a/tensorflow/compiler/xla/service/buffer_liveness.cc b/tensorflow/compiler/xla/service/buffer_liveness.cc
index e7749252ce..513bfa3b7f 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness.cc
+++ b/tensorflow/compiler/xla/service/buffer_liveness.cc
@@ -102,8 +102,8 @@ bool BufferLiveness::live_range_strictly_before(const LogicalBuffer& a,
     return false;
   }
 
+  // Every user of 'a' must be a predecessor of 'b' or 'b' itself.
   for (const BufferAlias& alias : points_to_analysis_->GetBufferAliases(a)) {
-    // Every user of 'a' must be a predecessor of 'b' or 'b' itself.
     for (auto user : alias.instruction()->users()) {
       if (DoesNotUseOperandBuffer(alias.instruction(), alias.index(), user,
                                   points_to_analysis())) {
@@ -114,16 +114,6 @@ bool BufferLiveness::live_range_strictly_before(const LogicalBuffer& a,
         return false;
       }
     }
-
-    // If the root instruction aliases the buffer 'a', the live range of 'a' is
-    // until the end of the computation and can never be strictly before another
-    // buffer. This is needed to prevent the root instruction's buffers from
-    // being reused by later instructions even when the root is not the last
-    // instruction in the schedule.
-    if (alias.instruction()->parent()->root_instruction() ==
-        alias.instruction()) {
-      return false;
-    }
   }
 
   // If 'b' is a user of 'a' then the buffers interfere unless 'a.instruction'
diff --git a/tensorflow/compiler/xla/service/buffer_liveness_test.cc b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
index f623aef67a..13825fe05b 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
@@ -311,48 +311,6 @@ TEST_F(BufferLivenessTest, OverlappedBuffersSequentialOrder) {
   EXPECT_FALSE(InstructionsMayInterfere(*liveness, add, exp));
 }
 
-TEST_F(BufferLivenessTest, RootInstructionIsNotLastInSequentialOrder) {
-  // Tests that when the root instruction is not the last instruction in the
-  // schedule, the live range of its buffers interfere with the buffers of the
-  // later instructions.
-  //
-  // Two sets of independent instructions are executed in the computation.
-  // param --> add (root)
-  // recv --> recv-done --> send --> send-done
-  //
-  // Sequential order:
-  //  param, add (root), recv, recv-done, send, send-done
-  auto builder = HloComputation::Builder(TestName());
-  auto param =
-      builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param"));
-  auto add = builder.AddInstruction(
-      HloInstruction::CreateBinary(vec_, HloOpcode::kAdd, param, param));
-  auto recv = builder.AddInstruction(
-      HloInstruction::CreateRecv(vec_, /*channel_id=*/0));
-  auto recv_done = builder.AddInstruction(HloInstruction::CreateRecvDone(recv));
-  auto send = builder.AddInstruction(
-      HloInstruction::CreateSend(recv_done, /*channel_id=*/1));
-  auto send_done = builder.AddInstruction(HloInstruction::CreateSendDone(send));
-
-  auto module = CreateNewModule();
-  auto computation = module->AddEntryComputation(builder.Build(add));
-
-  SequentialHloOrdering::HloModuleSequence module_sequence;
-  std::vector<const HloInstruction*> order = {param,     add,  recv,
-                                              recv_done, send, send_done};
-  module_sequence.emplace(computation, order);
-  auto liveness =
-      BufferLiveness::Run(module.get(), xla::MakeUnique<SequentialHloOrdering>(
-                                            module.get(), module_sequence))
-          .ConsumeValueOrDie();
-
-  EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, add));
-  // Check the root instruction (add) buffer interferes with the recv buffer.
-  EXPECT_TRUE(
-      liveness->MayInterfere(GetBuffer(*liveness, add, /*index=*/{}),
-                             GetBuffer(*liveness, recv, /*index=*/{0})));
-}
-
 TEST_F(BufferLivenessTest, TupleLiveOut) {
   // Verify MaybeLiveOut with nested tuples. Result of computation looks like:
   //
-- 
GitLab


From 38aa1352b570255eea597b056f4901ef0a828030 Mon Sep 17 00:00:00 2001
From: Martin Wicke <martin.wicke@gmail.com>
Date: Tue, 19 Dec 2017 09:24:26 -0800
Subject: [PATCH 1208/1225] docstring

---
 tensorflow/core/ops/parsing_ops.cc | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc
index 2a5f037373..ee12c7dfaa 100644
--- a/tensorflow/core/ops/parsing_ops.cc
+++ b/tensorflow/core/ops/parsing_ops.cc
@@ -54,13 +54,20 @@ REGISTER_OP("DecodeCompressed")
     .Attr("compression_type: string = ''")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
-Decompress the bytes of a string to the output string.
+Decompress strings. 
+
+This op decompresses each element of the `bytes` input `Tensor`, which
+is assumed to be compressed using the given `compression_type`. 
+
+The `output` is a string `Tensor` of the same shape as `bytes`, 
+each element containing the decompressed data from the corresponding
+element in `bytes`.
 
 bytes: A Tensor of string which is compressed.
-compression_type: A scalar containing either (i) the empty string (no
-  compression), (ii) "ZLIB", or (iii) "GZIP".
 output: A Tensor with the same shape as input `bytes`, uncompressed
   from bytes.
+compression_type: A scalar containing either (i) the empty string (no
+  compression), (ii) "ZLIB", or (iii) "GZIP".
 )doc");
 
 REGISTER_OP("ParseExample")
-- 
GitLab


From 59f2530bf3889fd84b18e9ea6f0a03bf916637cd Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 19 Dec 2017 12:01:40 -0600
Subject: [PATCH 1209/1225] Use Eigen version of the `scalar_fmod_op` for
 `fmod` ops (#15456)

* Use Eigen version of the `scalar_fmod_op` for `fmod` ops

It seems that scalar_fmod_op is supported in Eigen so
this fix changes `scalar_fmod2_op` to use Eigen version
of the `scalar_fmod_op`.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Remove `scalar_fmod2_op` as ` scalar_fmod_op` is in Eigen

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/cwise_ops.h | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index 062487b8c3..24c6e6361d 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -47,23 +47,6 @@ std::complex<double> exp(const std::complex<double> &x) {
 
 namespace internal {
 
-// TODO(rmlarsen): Get rid of fmod2 once fmod is upstreamed to Eigen.
-template <typename T>
-struct scalar_fmod2_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod2_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a,
-                                                           const T& b) const {
-    return std::fmod(a, b);
-  }
-};
-template <typename T>
-struct functor_traits<scalar_fmod2_op<T>> {
-  enum {
-    Cost = 13,  // Reciprocal throughput of FPREM on Haswell.
-    PacketAccess = false,
-  };
-};
-
 template <typename T>
 struct scalar_asinh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op)
@@ -721,7 +704,7 @@ struct safe_div : base<T, Eigen::internal::safe_div_or_mod_op<
 };
 
 template <typename T>
-struct fmod : base<T, Eigen::internal::scalar_fmod2_op<T>> {};
+struct fmod : base<T, Eigen::internal::scalar_fmod_op<T>> {};
 
 template <typename T>
 struct mod : base<T, Eigen::internal::scalar_mod2_op<T>> {};
-- 
GitLab


From 2368d4114465b3ebd6bd597cd5919b295cd4348b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 19 Dec 2017 10:11:58 -0800
Subject: [PATCH 1210/1225] [XLA] Add support for atan2 on CPU

This leans on the libm's atan2 for the actual routine but allows us to share
the implementation of other complex operations between CPU and GPU.

PiperOrigin-RevId: 179569666
---
 tensorflow/compiler/tests/binary_ops_test.py  |  65 ++---
 tensorflow/compiler/tests/unary_ops_test.py   |  85 +++---
 .../xla/service/cpu/elemental_ir_emitter.cc   |  33 ++-
 .../xla/service/cpu/elemental_ir_emitter.h    |   2 +
 .../xla/service/elemental_ir_emitter.cc       | 203 ++++++++++---
 .../xla/service/elemental_ir_emitter.h        |  22 +-
 .../xla/service/gpu/elemental_ir_emitter.cc   | 267 +++---------------
 .../xla/service/gpu/elemental_ir_emitter.h    |  25 +-
 8 files changed, 330 insertions(+), 372 deletions(-)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index 905dd9fc7b..65706b35d6 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -94,14 +94,12 @@ class BinaryOpsTest(XLATestCase):
           dtype(4),
           expected=np.array([[16], [81]], dtype=dtype))
 
-      atan2_supported = self.device == "XLA_GPU"
-      if atan2_supported:
-        self._testBinary(
-            math_ops.atan2,
-            np.array([0, np.sqrt(2), 1, np.sqrt(2), 0], dtype),
-            np.array([1, np.sqrt(2), 0, -np.sqrt(2), -1], dtype),
-            expected=np.array(
-                [0, np.pi / 4, np.pi / 2, np.pi * 3 / 4, np.pi], dtype=dtype))
+      self._testBinary(
+          math_ops.atan2,
+          np.array([0, np.sqrt(2), 1, np.sqrt(2), 0], dtype),
+          np.array([1, np.sqrt(2), 0, -np.sqrt(2), -1], dtype),
+          expected=np.array(
+              [0, np.pi / 4, np.pi / 2, np.pi * 3 / 4, np.pi], dtype=dtype))
 
       self._testBinary(
           gen_math_ops._reciprocal_grad,
@@ -388,30 +386,28 @@ class BinaryOpsTest(XLATestCase):
               ],
               dtype=dtype))
 
-      atan2_supported = self.device == "XLA_GPU"
-      if atan2_supported:
-        self._testBinary(
-            math_ops.pow,
-            dtype(3 + 2j),
-            dtype(4 - 5j),
-            expected=np.power(dtype(3 + 2j), dtype(4 - 5j)))
-        self._testBinary(  # empty rhs
-            math_ops.pow,
-            np.array([1 + 2j, 2 - 3j], dtype=dtype),
-            np.zeros(shape=[0, 2], dtype=dtype),
-            expected=np.zeros(shape=[0, 2], dtype=dtype))
-        self._testBinary(  # to zero power
-            math_ops.pow,
-            np.array([1 + 2j, 2 - 3j], dtype=dtype),
-            np.zeros(shape=[1, 2], dtype=dtype),
-            expected=np.ones(shape=[1, 2], dtype=dtype))
-        lhs = np.array([1 - 2j, 4 + 3j, 2 - 3j, 3, 2j, 1, 4], dtype=dtype)
-        rhs = np.array([2, 3j, 3 + 4j, 2 + 3j, 3 - 2j, 2, 3 + 3j], dtype=dtype)
-        scalar = dtype(2 + 2j)
-        self._testBinary(math_ops.pow, lhs, rhs, expected=np.power(lhs, rhs))
-        self._testBinary(
-            math_ops.pow, scalar, rhs, expected=np.power(scalar, rhs))
-        self._testBinary(math_ops.pow, lhs, scalar, np.power(lhs, scalar))
+      self._testBinary(
+          math_ops.pow,
+          dtype(3 + 2j),
+          dtype(4 - 5j),
+          expected=np.power(dtype(3 + 2j), dtype(4 - 5j)))
+      self._testBinary(  # empty rhs
+          math_ops.pow,
+          np.array([1 + 2j, 2 - 3j], dtype=dtype),
+          np.zeros(shape=[0, 2], dtype=dtype),
+          expected=np.zeros(shape=[0, 2], dtype=dtype))
+      self._testBinary(  # to zero power
+          math_ops.pow,
+          np.array([1 + 2j, 2 - 3j], dtype=dtype),
+          np.zeros(shape=[1, 2], dtype=dtype),
+          expected=np.ones(shape=[1, 2], dtype=dtype))
+      lhs = np.array([1 - 2j, 4 + 3j, 2 - 3j, 3, 2j, 1, 4], dtype=dtype)
+      rhs = np.array([2, 3j, 3 + 4j, 2 + 3j, 3 - 2j, 2, 3 + 3j], dtype=dtype)
+      scalar = dtype(2 + 2j)
+      self._testBinary(math_ops.pow, lhs, rhs, expected=np.power(lhs, rhs))
+      self._testBinary(
+          math_ops.pow, scalar, rhs, expected=np.power(scalar, rhs))
+      self._testBinary(math_ops.pow, lhs, scalar, np.power(lhs, scalar))
 
       lhs = np.array([4 + 2j, -3 - 1j, 2j, 1], dtype=dtype)
       rhs = np.array([5, -6j, 7 - 3j, -8j], dtype=dtype)
@@ -421,9 +417,8 @@ class BinaryOpsTest(XLATestCase):
       self._testBinary(
           gen_math_ops._sigmoid_grad, lhs, rhs, expected=rhs * lhs * (1 - lhs))
 
-      if atan2_supported:
-        self._testBinary(
-            gen_math_ops._rsqrt_grad, lhs, rhs, expected=lhs**3 * rhs / -2)
+      self._testBinary(
+          gen_math_ops._rsqrt_grad, lhs, rhs, expected=lhs**3 * rhs / -2)
 
       self._testBinary(
           gen_math_ops._sqrt_grad, lhs, rhs, expected=rhs / (2 * lhs))
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index ecba5a4fb0..0a6fe04d3c 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -363,26 +363,23 @@ class UnaryOpsTest(XLATestCase):
   def testComplexOps(self):
     for dtype in self.complex_types:
 
-      # TODO(b/65408531): Wider support for log (needs atan2).
-      atan2_supported = self.device == "XLA_GPU"
-      if atan2_supported:
-        self._assertOpOutputMatchesExpected(
-            math_ops.acosh,
-            np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
-            expected=np.arccosh(
-                np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.acosh,
+          np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
+          expected=np.arccosh(
+              np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
 
-        self._assertOpOutputMatchesExpected(
-            math_ops.asinh,
-            np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
-            expected=np.arcsinh(
-                np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.asinh,
+          np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
+          expected=np.arcsinh(
+              np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
 
-        self._assertOpOutputMatchesExpected(
-            math_ops.atanh,
-            np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
-            expected=np.arctanh(
-                np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.atanh,
+          np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
+          expected=np.arctanh(
+              np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
 
       self._assertOpOutputMatchesExpected(
           math_ops.cosh,
@@ -409,11 +406,10 @@ class UnaryOpsTest(XLATestCase):
           np.array([[1, 2j, 2 + 3j]], dtype=dtype),
           expected=1.0 / np.array([[1, 2j, 2 + 3j]], dtype=dtype))
 
-      if atan2_supported:
-        self._assertOpOutputMatchesExpected(
-            math_ops.log,
-            np.array([[5j, 3 - 2j]], dtype=dtype),
-            expected=np.log(np.array([[5j, 3 - 2j]], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.log,
+          np.array([[5j, 3 - 2j]], dtype=dtype),
+          expected=np.log(np.array([[5j, 3 - 2j]], dtype=dtype)))
 
       self._assertOpOutputMatchesExpected(
           math_ops.sin,
@@ -427,27 +423,26 @@ class UnaryOpsTest(XLATestCase):
 
       # TODO(b/34703906): improve log1p implementation and make tolerance
       # tighter.
-      if atan2_supported:  # TODO(b/34703906): log support
-        self._assertOpOutputMatchesExpected(
-            math_ops.log1p,
-            np.array([[1e-14, 1e-15j, 0.6 - 0.3j]], dtype=dtype),
-            expected=np.log1p(
-                np.array([[1e-14, 1e-15j, 0.6 - 0.3j]], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.log1p,
+          np.array([[1e-14, 1e-15j, 0.6 - 0.3j]], dtype=dtype),
+          expected=np.log1p(
+              np.array([[1e-14, 1e-15j, 0.6 - 0.3j]], dtype=dtype)))
 
-        val = np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype)
-        self._assertOpOutputMatchesExpected(
-            math_ops.rsqrt, val, expected=1 / np.sqrt(val))
+      val = np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype)
+      self._assertOpOutputMatchesExpected(
+          math_ops.rsqrt, val, expected=1 / np.sqrt(val))
 
-        self._assertOpOutputMatchesExpected(
-            math_ops.sigmoid, val, expected=1 / (1 + np.exp(-val)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.sigmoid, val, expected=1 / (1 + np.exp(-val)))
 
-        self._assertOpOutputMatchesExpected(
-            math_ops.sqrt, val, expected=np.sqrt(val))
+      self._assertOpOutputMatchesExpected(
+          math_ops.sqrt, val, expected=np.sqrt(val))
 
-        self._assertOpOutputMatchesExpected(
-            math_ops.tanh,
-            np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype),
-            expected=np.tanh(np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.tanh,
+          np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype),
+          expected=np.tanh(np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype)))
 
       self._assertOpOutputMatchesExpected(
           math_ops.tan,
@@ -480,12 +475,10 @@ class UnaryOpsTest(XLATestCase):
           np.array([[-4j, 3 + 2j], [2, -1j]], dtype=dtype),
           expected=np.array([[1, 1], [1, 1]], dtype=dtype))
 
-      if atan2_supported:  # TODO(b/34703906): atan2 support
-        self._assertOpOutputMatchesExpected(
-            math_ops.angle,
-            np.array([1 + 3j, -4 + 7j, 2.7, -3j], dtype=dtype),
-            expected=np.angle(
-                np.array([1 + 3j, -4 + 7j, 2.7, -3j], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.angle,
+          np.array([1 + 3j, -4 + 7j, 2.7, -3j], dtype=dtype),
+          expected=np.angle(np.array([1 + 3j, -4 + 7j, 2.7, -3j], dtype=dtype)))
 
       self._assertOpOutputMatchesExpected(
           math_ops.conj,
diff --git a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc
index ba693ec89a..ebd96c4c42 100644
--- a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc
@@ -44,15 +44,11 @@ StatusOr<llvm::Value*> CpuElementalIrEmitter::EmitFloatUnaryOp(
         default:
           return Unimplemented("tanh");
       }
-      // Create function type for the function.
-      llvm::FunctionType* function_type = llvm::FunctionType::get(
-          llvm_ir::PrimitiveTypeToIrType(element_type, module_),
-          llvm_ir::PrimitiveTypeToIrType(element_type, module_),
-          /*isVarArg=*/false);
       // Create function declaration for 'tanhf'.
       llvm::Function* function =
           llvm::cast<llvm::Function>(module_->getOrInsertFunction(
-              llvm_ir::AsStringRef(function_name), function_type));
+              llvm_ir::AsStringRef(function_name), operand_value->getType(),
+              operand_value->getType()));
       function->setCallingConv(llvm::CallingConv::C);
       function->setDoesNotThrow();
       function->setDoesNotAccessMemory();
@@ -64,6 +60,31 @@ StatusOr<llvm::Value*> CpuElementalIrEmitter::EmitFloatUnaryOp(
   }
 }
 
+StatusOr<llvm::Value*> CpuElementalIrEmitter::EmitAtan2(
+    PrimitiveType prim_type, llvm::Value* lhs, llvm::Value* rhs) const {
+  string function_name;
+  switch (prim_type) {
+    case F32:
+      function_name = "atan2f";
+      break;
+    case F64:
+      function_name = "atan2";
+      break;
+    default:
+      return Unimplemented("atan2");
+  }
+  // Create function declaration for 'atan2'.
+  llvm::Function* function =
+      llvm::cast<llvm::Function>(module_->getOrInsertFunction(
+          llvm_ir::AsStringRef(function_name), lhs->getType(), lhs->getType(),
+          rhs->getType()));
+  function->setCallingConv(llvm::CallingConv::C);
+  function->setDoesNotThrow();
+  function->setDoesNotAccessMemory();
+  // Create instruction to call 'atan2'.
+  return ir_builder_->CreateCall(function, {lhs, rhs});
+}
+
 llvm_ir::ElementGenerator CpuElementalIrEmitter::MakeElementGenerator(
     const HloInstruction* hlo,
     const HloToElementGeneratorMap& operand_to_generator) const {
diff --git a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h
index 7e9f27befb..4446dfd282 100644
--- a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h
@@ -41,6 +41,8 @@ class CpuElementalIrEmitter : public ElementalIrEmitter {
  protected:
   StatusOr<llvm::Value*> EmitFloatUnaryOp(
       const HloInstruction* op, llvm::Value* operand_value) const override;
+  StatusOr<llvm::Value*> EmitAtan2(PrimitiveType prim_type, llvm::Value* lhs,
+                                   llvm::Value* rhs) const override;
 
   IrEmitter* ir_emitter_;
 };
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index 7e88bbd631..3792929432 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -404,21 +404,13 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatUnaryOp(
           primitive_util::BitWidth(to_type));
     }
     case HloOpcode::kExp:
-      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {operand_value},
-                                          {operand_value->getType()},
-                                          ir_builder_);
+      return EmitExp(op->shape().element_type(), operand_value);
     case HloOpcode::kLog:
-      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::log, {operand_value},
-                                          {operand_value->getType()},
-                                          ir_builder_);
+      return EmitLog(op->shape().element_type(), operand_value);
     case HloOpcode::kCos:
-      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::cos, {operand_value},
-                                          {operand_value->getType()},
-                                          ir_builder_);
+      return EmitCos(op->shape().element_type(), operand_value);
     case HloOpcode::kSin:
-      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sin, {operand_value},
-                                          {operand_value->getType()},
-                                          ir_builder_);
+      return EmitSin(op->shape().element_type(), operand_value);
     case HloOpcode::kFloor:
       return llvm_ir::EmitCallToIntrinsic(
           llvm::Intrinsic::floor, {operand_value}, {operand_value->getType()},
@@ -469,9 +461,25 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatUnaryOp(
 
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
     const HloInstruction* op, llvm::Value* operand_value) const {
+  PrimitiveType input_type = op->operand(0)->shape().element_type();
+  PrimitiveType component_type =
+      primitive_util::IsComplexType(input_type)
+          ? primitive_util::ComplexComponentType(input_type)
+          : input_type;
   switch (op->opcode()) {
-    // TODO(b/65209142): Angle/Log require atan2.
-    // case HloOpcode::kLog:  // log(a+bi) = .5*log(a^2+b^2) + i*atan2(b, a)
+    case HloOpcode::kLog: {
+      // log(a+bi) = .5*log(a^2+b^2) + i*atan2(b, a)
+      auto a = EmitExtractReal(operand_value);
+      auto b = EmitExtractImag(operand_value);
+      llvm::Type* llvm_ty = a->getType();
+      auto sum_sq = ir_builder_->CreateFAdd(ir_builder_->CreateFMul(a, a),
+                                            ir_builder_->CreateFMul(b, b));
+      TF_ASSIGN_OR_RETURN(auto log_sum_sq, EmitLog(component_type, sum_sq));
+      TF_ASSIGN_OR_RETURN(auto angle, EmitAtan2(component_type, b, a));
+      auto one_half = llvm::ConstantFP::get(llvm_ty, 0.5);
+      return EmitComposeComplex(
+          op, ir_builder_->CreateFMul(one_half, log_sum_sq), angle);
+    }
     case HloOpcode::kConvert: {
       PrimitiveType from_type = op->operand(0)->shape().element_type();
       TF_RET_CHECK(primitive_util::IsComplexType(from_type));
@@ -493,15 +501,12 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
     }
     case HloOpcode::kExp: {
       // e^(a+bi) = e^a*(cos(b)+sin(b)i)
-      auto exp_a = llvm_ir::EmitCallToIntrinsic(
-          llvm::Intrinsic::exp, {EmitExtractReal(operand_value)},
-          {EmitExtractReal(operand_value)->getType()}, ir_builder_);
-      auto cos_b = llvm_ir::EmitCallToIntrinsic(
-          llvm::Intrinsic::cos, {EmitExtractImag(operand_value)},
-          {EmitExtractImag(operand_value)->getType()}, ir_builder_);
-      auto sin_b = llvm_ir::EmitCallToIntrinsic(
-          llvm::Intrinsic::sin, {EmitExtractImag(operand_value)},
-          {EmitExtractImag(operand_value)->getType()}, ir_builder_);
+      TF_ASSIGN_OR_RETURN(
+          auto exp_a, EmitExp(component_type, EmitExtractReal(operand_value)));
+      TF_ASSIGN_OR_RETURN(
+          auto cos_b, EmitCos(component_type, EmitExtractImag(operand_value)));
+      TF_ASSIGN_OR_RETURN(
+          auto sin_b, EmitSin(component_type, EmitExtractImag(operand_value)));
       return EmitComposeComplex(op, ir_builder_->CreateFMul(exp_a, cos_b),
                                 ir_builder_->CreateFMul(exp_a, sin_b));
     }
@@ -516,16 +521,13 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
       auto a = EmitExtractReal(operand_value);
       auto b = EmitExtractImag(operand_value);
       auto type = a->getType();
-      auto exp_b = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {b},
-                                                {type}, ir_builder_);
+      TF_ASSIGN_OR_RETURN(auto exp_b, EmitExp(component_type, b));
       auto half_exp_b =
           ir_builder_->CreateFMul(llvm::ConstantFP::get(type, 0.5), exp_b);
       auto half_exp_neg_b =
           ir_builder_->CreateFDiv(llvm::ConstantFP::get(type, 0.5), exp_b);
-      auto cos_a = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::cos, {a},
-                                                {type}, ir_builder_);
-      auto sin_a = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sin, {a},
-                                                {type}, ir_builder_);
+      TF_ASSIGN_OR_RETURN(auto cos_a, EmitCos(component_type, a));
+      TF_ASSIGN_OR_RETURN(auto sin_a, EmitSin(component_type, a));
       return EmitComposeComplex(
           op,
           ir_builder_->CreateFMul(
@@ -546,16 +548,13 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
       auto a = EmitExtractReal(operand_value);
       auto b = EmitExtractImag(operand_value);
       auto type = a->getType();
-      auto exp_b = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {b},
-                                                {type}, ir_builder_);
+      TF_ASSIGN_OR_RETURN(auto exp_b, EmitExp(component_type, b));
       auto half_exp_b =
           ir_builder_->CreateFMul(llvm::ConstantFP::get(type, 0.5), exp_b);
       auto half_exp_neg_b =
           ir_builder_->CreateFDiv(llvm::ConstantFP::get(type, 0.5), exp_b);
-      auto cos_a = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::cos, {a},
-                                                {type}, ir_builder_);
-      auto sin_a = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sin, {a},
-                                                {type}, ir_builder_);
+      TF_ASSIGN_OR_RETURN(auto cos_a, EmitCos(component_type, a));
+      TF_ASSIGN_OR_RETURN(auto sin_a, EmitSin(component_type, a));
       return EmitComposeComplex(
           op,
           ir_builder_->CreateFMul(
@@ -563,6 +562,58 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
           ir_builder_->CreateFMul(
               cos_a, ir_builder_->CreateFSub(half_exp_b, half_exp_neg_b)));
     }
+    case HloOpcode::kTanh: {
+      /*
+      tanh=(exp(x)-exp(-x)) / (exp(x)+exp(-x))
+      e^(a+bi) = e^a*(cos(b)+sin(b)i)
+      so tanh=(((cos(b)+sin(b)i)e^a - (cos(-b)+sin(-b)i)e^-a)) /
+              (((cos(b)+sin(b)i)e^a + (cos(-b)+sin(-b)i)e^-a))
+      cos(b)=cos(-b), sin(-b)=-sin(b)
+      so tanh=(((cos(b)+sin(b)i)e^a - (cos(b)-sin(b)i)e^-a)) /
+              (((cos(b)+sin(b)i)e^a + (cos(b)-sin(b)i)e^-a))
+             =(cos(b)e^a+i*sin(b)e^a + cos(b)(-e^-a)+i*sin(b)e^-a) /
+              (cos(b)e^a+i*sin(b)e^a + cos(b)e^-a+i*sin(b)(-e^-a))
+             =(cos(b)(e^a-e^-a) + i*sin(b)(e^a+e^-a)) /
+              (cos(b)(e^a+e^-a) + i*sin(b)(e^a-e^-a))
+      This is a complex division, so we can multiply by denom_conj/denom_conj
+             =(cos(b)(e^a-e^-a) + i*sin(b)(e^a+e^-a)) *
+              (cos(b)(e^a+e^-a) - i*sin(b)(e^a-e^-a)) /
+              ((cos(b)(e^a+e^-a))^2 + (sin(b)(e^a-e^-a))^2)
+             =(cos(b)^2(e^(2a)-e^(-2a)) + sin(b)^2(e^(2a)-e^(-2a)) +
+               i*(cos(b)sin(b)(e^a+e^-a)^2 - cos(b)sin(b)(e^a-e^-a)^2)) /
+              ((cos(b)(e^a+e^-a))^2 + (sin(b)(e^a-e^-a))^2)
+      */
+      auto a = EmitExtractReal(operand_value);
+      auto b = EmitExtractImag(operand_value);
+      TF_ASSIGN_OR_RETURN(auto exp_a, EmitExp(component_type, a));
+      TF_ASSIGN_OR_RETURN(auto cos_b, EmitCos(component_type, b));
+      TF_ASSIGN_OR_RETURN(auto sin_b, EmitSin(component_type, b));
+      auto exp_neg_a = ir_builder_->CreateFDiv(
+          llvm::ConstantFP::get(exp_a->getType(), 1), exp_a);
+      auto exp_2a_minus_exp_neg_2a = ir_builder_->CreateFSub(
+          ir_builder_->CreateFMul(exp_a, exp_a),
+          ir_builder_->CreateFMul(exp_neg_a, exp_neg_a));
+      auto cos_b_sq = ir_builder_->CreateFMul(cos_b, cos_b);
+      auto sin_b_sq = ir_builder_->CreateFMul(sin_b, sin_b);
+      auto real_num = ir_builder_->CreateFAdd(
+          ir_builder_->CreateFMul(cos_b_sq, exp_2a_minus_exp_neg_2a),
+          ir_builder_->CreateFMul(sin_b_sq, exp_2a_minus_exp_neg_2a));
+      auto cos_b_sin_b = ir_builder_->CreateFMul(cos_b, sin_b);
+      auto exp_a_plus_exp_neg_a = ir_builder_->CreateFAdd(exp_a, exp_neg_a);
+      auto exp_a_plus_exp_neg_a_sq =
+          ir_builder_->CreateFMul(exp_a_plus_exp_neg_a, exp_a_plus_exp_neg_a);
+      auto exp_a_minus_exp_neg_a = ir_builder_->CreateFSub(exp_a, exp_neg_a);
+      auto exp_a_minus_exp_neg_a_sq =
+          ir_builder_->CreateFMul(exp_a_minus_exp_neg_a, exp_a_minus_exp_neg_a);
+      auto imag_num = ir_builder_->CreateFMul(
+          cos_b_sin_b, ir_builder_->CreateFSub(exp_a_plus_exp_neg_a_sq,
+                                               exp_a_minus_exp_neg_a_sq));
+      auto denom = ir_builder_->CreateFAdd(
+          ir_builder_->CreateFMul(cos_b_sq, exp_a_plus_exp_neg_a_sq),
+          ir_builder_->CreateFMul(sin_b_sq, exp_a_minus_exp_neg_a_sq));
+      return EmitComposeComplex(op, ir_builder_->CreateFDiv(real_num, denom),
+                                ir_builder_->CreateFDiv(imag_num, denom));
+    }
     case HloOpcode::kAbs: {
       auto sum_sq = ir_builder_->CreateFAdd(
           ir_builder_->CreateFMul(EmitExtractReal(operand_value),
@@ -625,7 +676,6 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatBinaryOp(
     const HloInstruction* op, llvm::Value* lhs_value,
     llvm::Value* rhs_value) const {
   switch (op->opcode()) {
-    // case HloOpcode::kAtan2:  // TODO(b/65209142): CPU atan2 support
     case HloOpcode::kComplex:
       return EmitComposeComplex(op, lhs_value, rhs_value);
     case HloOpcode::kAdd:
@@ -669,10 +719,9 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatBinaryOp(
     case HloOpcode::kMinimum:
       return EmitFloatMin(lhs_value, rhs_value);
     case HloOpcode::kPower:
-      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::pow,
-                                          {lhs_value, rhs_value},
-                                          {lhs_value->getType()}, ir_builder_);
-
+      return EmitPow(op->shape().element_type(), lhs_value, rhs_value);
+    case HloOpcode::kAtan2:
+      return EmitAtan2(op->shape().element_type(), lhs_value, rhs_value);
     default:
       return Unimplemented("binary floating point op '%s'",
                            HloOpcodeString(op->opcode()).c_str());
@@ -768,9 +817,40 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexBinaryOp(
                                   EmitExtractImag(lhs_value),
                                   EmitExtractImag(rhs_value), ir_builder_));
 
-    // TODO(b/65209142): requires arg(z) -> requires atan|atan2 intrinsic
-    // case HloOpcode::kPower:
-    // // (a+bi)^(c+di) = exp(i(c+di)*arg(a+bi)) * (a*a+b*b)^(c/2+di/2)
+    case HloOpcode::kPower: {
+      // (a+bi)^(c+di) =
+      //    (a*a+b*b)^(0.5c) * exp(-d*atan2(b,a)) * (cos(q) + i*sin(q)),
+      //    where q = c*atan2(b,a)+0.5d*ln(a*a+b*b)
+      PrimitiveType component_type =
+          primitive_util::ComplexComponentType(op->shape().element_type());
+      auto a = EmitExtractReal(lhs_value);
+      auto b = EmitExtractImag(lhs_value);
+      auto c = EmitExtractReal(rhs_value);
+      auto d = EmitExtractImag(rhs_value);
+      auto aa_p_bb = ir_builder_->CreateFAdd(ir_builder_->CreateFMul(a, a),
+                                             ir_builder_->CreateFMul(b, b));
+      auto one_half = llvm::ConstantFP::get(a->getType(), 0.5);
+      auto half_c = ir_builder_->CreateFMul(one_half, c);
+
+      TF_ASSIGN_OR_RETURN(auto aa_p_bb_to_half_c,
+                          EmitPow(component_type, aa_p_bb, half_c));
+      auto neg_d = ir_builder_->CreateFNeg(d);
+      TF_ASSIGN_OR_RETURN(auto arg_lhs, EmitAtan2(component_type, b, a));
+      auto neg_d_arg_lhs = ir_builder_->CreateFMul(neg_d, arg_lhs);
+      TF_ASSIGN_OR_RETURN(auto e_to_neg_d_arg_lhs,
+                          EmitExp(component_type, neg_d_arg_lhs));
+      auto coeff =
+          ir_builder_->CreateFMul(aa_p_bb_to_half_c, e_to_neg_d_arg_lhs);
+      TF_ASSIGN_OR_RETURN(auto ln_aa_p_bb, EmitLog(component_type, aa_p_bb));
+      auto half_d = ir_builder_->CreateFMul(one_half, d);
+      auto q =
+          ir_builder_->CreateFAdd(ir_builder_->CreateFMul(c, arg_lhs),
+                                  ir_builder_->CreateFMul(half_d, ln_aa_p_bb));
+      TF_ASSIGN_OR_RETURN(auto cos_q, EmitCos(component_type, q));
+      TF_ASSIGN_OR_RETURN(auto sin_q, EmitSin(component_type, q));
+      return EmitComposeComplex(op, ir_builder_->CreateFMul(coeff, cos_q),
+                                ir_builder_->CreateFMul(coeff, sin_q));
+    }
     default:
       return Unimplemented("binary complex op '%s'",
                            HloOpcodeString(op->opcode()).c_str());
@@ -873,6 +953,43 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitErfcInv(
   return EmitErfInv(prim_type, ir_builder_->CreateFSub(one, value));
 }
 
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitLog(PrimitiveType prim_type,
+                                                   llvm::Value* value) const {
+  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::log, {value},
+                                      {value->getType()}, ir_builder_);
+}
+
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitSin(PrimitiveType prim_type,
+                                                   llvm::Value* value) const {
+  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sin, {value},
+                                      {value->getType()}, ir_builder_);
+}
+
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitCos(PrimitiveType prim_type,
+                                                   llvm::Value* value) const {
+  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::cos, {value},
+                                      {value->getType()}, ir_builder_);
+}
+
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitExp(PrimitiveType prim_type,
+                                                   llvm::Value* value) const {
+  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {value},
+                                      {value->getType()}, ir_builder_);
+}
+
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitPow(PrimitiveType prim_type,
+                                                   llvm::Value* lhs,
+                                                   llvm::Value* rhs) const {
+  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::pow, {lhs, rhs},
+                                      {lhs->getType()}, ir_builder_);
+}
+
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitAtan2(PrimitiveType prim_type,
+                                                     llvm::Value* lhs,
+                                                     llvm::Value* rhs) const {
+  return Unimplemented("atan2");
+}
+
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitReducePrecision(
     const HloInstruction* hlo, llvm::Value* x) const {
   if (hlo->operand(0)->shape().element_type() != F32) {
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/elemental_ir_emitter.h
index cccb498f82..1a48eb5fcb 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.h
@@ -39,7 +39,7 @@ class ElementalIrEmitter {
         module_(module),
         hlo_module_config_(hlo_module_config) {}
 
-  virtual ~ElementalIrEmitter() {}
+  virtual ~ElementalIrEmitter() = default;
 
   virtual StatusOr<llvm::Value*> EmitUnaryOp(const HloInstruction* op,
                                              llvm::Value* operand_value) const;
@@ -92,6 +92,26 @@ class ElementalIrEmitter {
   virtual StatusOr<llvm::Value*> EmitErfcInv(PrimitiveType prim_type,
                                              llvm::Value* value) const;
 
+  virtual StatusOr<llvm::Value*> EmitAtan2(PrimitiveType prim_type,
+                                           llvm::Value* lhs,
+                                           llvm::Value* rhs) const;
+
+  virtual StatusOr<llvm::Value*> EmitLog(PrimitiveType prim_type,
+                                         llvm::Value* value) const;
+
+  virtual StatusOr<llvm::Value*> EmitSin(PrimitiveType prim_type,
+                                         llvm::Value* value) const;
+
+  virtual StatusOr<llvm::Value*> EmitCos(PrimitiveType prim_type,
+                                         llvm::Value* value) const;
+
+  virtual StatusOr<llvm::Value*> EmitExp(PrimitiveType prim_type,
+                                         llvm::Value* value) const;
+
+  virtual StatusOr<llvm::Value*> EmitPow(PrimitiveType prim_type,
+                                         llvm::Value* lhs,
+                                         llvm::Value* rhs) const;
+
   virtual StatusOr<llvm::Value*> EmitReducePrecision(const HloInstruction* hlo,
                                                      llvm::Value* x) const;
 
diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
index 6bf00cfb8a..4b511cb4bb 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
@@ -135,10 +135,6 @@ StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitFloatBinaryOp(
   PrimitiveType rhs_input_type = op->operand(1)->shape().element_type();
   PrimitiveType output_type = op->shape().element_type();
   switch (op->opcode()) {
-    case HloOpcode::kAtan2:
-      return EmitLibdeviceMathCall("__nv_atan2", {lhs_value, rhs_value},
-                                   {lhs_input_type, rhs_input_type},
-                                   output_type);
     case HloOpcode::kRemainder: {
       return EmitLibdeviceMathCall("__nv_fmod", {lhs_value, rhs_value},
                                    {lhs_input_type, rhs_input_type},
@@ -199,29 +195,50 @@ StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitErfcInv(
   return EmitLibdeviceMathCall("__nv_erfcinv", {value}, {prim_type}, prim_type);
 }
 
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitLog(
+    PrimitiveType prim_type, llvm::Value* value) const {
+  return EmitLibdeviceMathCall("__nv_log", {value}, {prim_type}, prim_type);
+}
+
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitSin(
+    PrimitiveType prim_type, llvm::Value* value) const {
+  return EmitLibdeviceMathCall("__nv_sin", {value}, {prim_type}, prim_type);
+}
+
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitCos(
+    PrimitiveType prim_type, llvm::Value* value) const {
+  return EmitLibdeviceMathCall("__nv_cos", {value}, {prim_type}, prim_type);
+}
+
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitExp(
+    PrimitiveType prim_type, llvm::Value* value) const {
+  return EmitLibdeviceMathCall("__nv_exp", {value}, {prim_type}, prim_type);
+}
+
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitPow(PrimitiveType prim_type,
+                                                      llvm::Value* lhs,
+                                                      llvm::Value* rhs) const {
+  return EmitLibdeviceMathCall("__nv_pow", {lhs, rhs}, {prim_type, prim_type},
+                               prim_type);
+}
+
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitAtan2(
+    PrimitiveType prim_type, llvm::Value* lhs, llvm::Value* rhs) const {
+  return EmitLibdeviceMathCall("__nv_atan2", {lhs, rhs}, {prim_type, prim_type},
+                               prim_type);
+}
+
 StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitFloatUnaryOp(
     const HloInstruction* op, llvm::Value* operand_value) const {
   PrimitiveType input_type = op->operand(0)->shape().element_type();
   PrimitiveType output_type = op->shape().element_type();
   switch (op->opcode()) {
-    case HloOpcode::kExp:
-      return EmitLibdeviceMathCall("__nv_exp", {operand_value}, {input_type},
-                                   output_type);
     case HloOpcode::kFloor:
       return EmitLibdeviceMathCall("__nv_floor", {operand_value}, {input_type},
                                    output_type);
     case HloOpcode::kCeil:
       return EmitLibdeviceMathCall("__nv_ceil", {operand_value}, {input_type},
                                    output_type);
-    case HloOpcode::kLog:
-      return EmitLibdeviceMathCall("__nv_log", {operand_value}, {input_type},
-                                   output_type);
-    case HloOpcode::kCos:
-      return EmitLibdeviceMathCall("__nv_cos", {operand_value}, {input_type},
-                                   output_type);
-    case HloOpcode::kSin:
-      return EmitLibdeviceMathCall("__nv_sin", {operand_value}, {input_type},
-                                   output_type);
     case HloOpcode::kTanh:
       return EmitLibdeviceMathCall("__nv_tanh", {operand_value}, {input_type},
                                    output_type);
@@ -230,224 +247,6 @@ StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitFloatUnaryOp(
   }
 }
 
-StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitComplexBinaryOp(
-    const HloInstruction* op, llvm::Value* lhs_value,
-    llvm::Value* rhs_value) const {
-  PrimitiveType input_type = op->operand(0)->shape().element_type();
-  TF_RET_CHECK(primitive_util::IsComplexType(input_type));
-  PrimitiveType component_type =
-      primitive_util::ComplexComponentType(input_type);
-  switch (op->opcode()) {
-    case HloOpcode::kPower: {
-      // (a+bi)^(c+di) =
-      //    (a*a+b*b)^(0.5c) * exp(-d*atan2(b,a)) * (cos(q) + i*sin(q)),
-      //    where q = c*atan2(b,a)+0.5d*ln(a*a+b*b)
-      auto a = EmitExtractReal(lhs_value);
-      auto b = EmitExtractImag(lhs_value);
-      auto c = EmitExtractReal(rhs_value);
-      auto d = EmitExtractImag(rhs_value);
-      auto aa_p_bb = ir_builder_->CreateFAdd(ir_builder_->CreateFMul(a, a),
-                                             ir_builder_->CreateFMul(b, b));
-      auto one_half = llvm::ConstantFP::get(a->getType(), 0.5);
-      auto half_c = ir_builder_->CreateFMul(one_half, c);
-
-      TF_ASSIGN_OR_RETURN(
-          auto aa_p_bb_to_half_c,
-          EmitLibdeviceMathCall("__nv_pow", {aa_p_bb, half_c},
-                                {component_type, component_type},
-                                component_type));
-      auto neg_d = ir_builder_->CreateFNeg(d);
-      TF_ASSIGN_OR_RETURN(
-          auto arg_lhs, EmitLibdeviceMathCall("__nv_atan2", {b, a},
-                                              {component_type, component_type},
-                                              component_type));
-      auto neg_d_arg_lhs = ir_builder_->CreateFMul(neg_d, arg_lhs);
-      TF_ASSIGN_OR_RETURN(
-          auto e_to_neg_d_arg_lhs,
-          EmitLibdeviceMathCall("__nv_exp", {neg_d_arg_lhs}, {component_type},
-                                component_type));
-      auto coeff =
-          ir_builder_->CreateFMul(aa_p_bb_to_half_c, e_to_neg_d_arg_lhs);
-      TF_ASSIGN_OR_RETURN(
-          auto ln_aa_p_bb,
-          EmitLibdeviceMathCall("__nv_log", {aa_p_bb}, {component_type},
-                                component_type));
-      auto half_d = ir_builder_->CreateFMul(one_half, d);
-      auto q =
-          ir_builder_->CreateFAdd(ir_builder_->CreateFMul(c, arg_lhs),
-                                  ir_builder_->CreateFMul(half_d, ln_aa_p_bb));
-      TF_ASSIGN_OR_RETURN(
-          auto cos_q, EmitLibdeviceMathCall("__nv_cos", {q}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto sin_q, EmitLibdeviceMathCall("__nv_sin", {q}, {component_type},
-                                            component_type));
-      return EmitComposeComplex(op, ir_builder_->CreateFMul(coeff, cos_q),
-                                ir_builder_->CreateFMul(coeff, sin_q));
-    }
-    default:
-      return ElementalIrEmitter::EmitComplexBinaryOp(op, lhs_value, rhs_value);
-  }
-}
-
-StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitComplexUnaryOp(
-    const HloInstruction* op, llvm::Value* operand_value) const {
-  PrimitiveType input_type = op->operand(0)->shape().element_type();
-  PrimitiveType component_type =
-      primitive_util::IsComplexType(input_type)
-          ? primitive_util::ComplexComponentType(input_type)
-          : input_type;
-
-  switch (op->opcode()) {
-    case HloOpcode::kLog: {
-      // log(a+bi) = .5*log(a^2+b^2) + i*atan2(b, a)
-      auto a = EmitExtractReal(operand_value);
-      auto b = EmitExtractImag(operand_value);
-      llvm::Type* llvm_ty = a->getType();
-      auto sum_sq = ir_builder_->CreateFAdd(ir_builder_->CreateFMul(a, a),
-                                            ir_builder_->CreateFMul(b, b));
-      TF_ASSIGN_OR_RETURN(
-          auto log_sum_sq,
-          EmitLibdeviceMathCall("__nv_log", {sum_sq}, {component_type},
-                                component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto angle, EmitLibdeviceMathCall("__nv_atan2", {b, a},
-                                            {component_type, component_type},
-                                            component_type));
-      auto one_half = llvm::ConstantFP::get(llvm_ty, 0.5);
-      return EmitComposeComplex(
-          op, ir_builder_->CreateFMul(one_half, log_sum_sq), angle);
-    }
-    case HloOpcode::kExp: {
-      // e^(a+bi) = e^a*(cos(b)+sin(b)i)
-      auto b = EmitExtractImag(operand_value);
-      TF_ASSIGN_OR_RETURN(
-          auto exp_a,
-          EmitLibdeviceMathCall("__nv_exp", {EmitExtractReal(operand_value)},
-                                {component_type}, component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto cos_b, EmitLibdeviceMathCall("__nv_cos", {b}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto sin_b, EmitLibdeviceMathCall("__nv_sin", {b}, {component_type},
-                                            component_type));
-      return EmitComposeComplex(op, ir_builder_->CreateFMul(exp_a, cos_b),
-                                ir_builder_->CreateFMul(exp_a, sin_b));
-    }
-    case HloOpcode::kCos: {
-      // cos(a+bi) = .5(cos(a)*(e^-b+e^b) + i*sin(a)*(e^-b-e^b))
-      auto a = EmitExtractReal(operand_value);
-      auto llvm_ty = a->getType();
-      TF_ASSIGN_OR_RETURN(
-          auto exp_b,
-          EmitLibdeviceMathCall("__nv_exp", {EmitExtractImag(operand_value)},
-                                {component_type}, component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto cos_a, EmitLibdeviceMathCall("__nv_cos", {a}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto sin_a, EmitLibdeviceMathCall("__nv_sin", {a}, {component_type},
-                                            component_type));
-      auto half_exp_b =
-          ir_builder_->CreateFMul(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b);
-      auto half_exp_neg_b =
-          ir_builder_->CreateFDiv(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b);
-      return EmitComposeComplex(
-          op,
-          ir_builder_->CreateFMul(
-              cos_a, ir_builder_->CreateFAdd(half_exp_neg_b, half_exp_b)),
-          ir_builder_->CreateFMul(
-              sin_a, ir_builder_->CreateFSub(half_exp_neg_b, half_exp_b)));
-    }
-
-    case HloOpcode::kSin: {
-      // sin(a+bi) = 0.5(sin(a)*(e^b+e^-b) + i*cos(a)*(e^b-e^-b)
-      auto a = EmitExtractReal(operand_value);
-      auto llvm_ty = a->getType();
-      TF_ASSIGN_OR_RETURN(
-          auto exp_b,
-          EmitLibdeviceMathCall("__nv_exp", {EmitExtractImag(operand_value)},
-                                {component_type}, component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto cos_a, EmitLibdeviceMathCall("__nv_cos", {a}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto sin_a, EmitLibdeviceMathCall("__nv_sin", {a}, {component_type},
-                                            component_type));
-      auto half_exp_b =
-          ir_builder_->CreateFMul(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b);
-      auto half_exp_neg_b =
-          ir_builder_->CreateFDiv(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b);
-      return EmitComposeComplex(
-          op,
-          ir_builder_->CreateFMul(
-              sin_a, ir_builder_->CreateFAdd(half_exp_b, half_exp_neg_b)),
-          ir_builder_->CreateFMul(
-              cos_a, ir_builder_->CreateFSub(half_exp_b, half_exp_neg_b)));
-    }
-    case HloOpcode::kTanh: {
-      /*
-      tanh=(exp(x)-exp(-x)) / (exp(x)+exp(-x))
-      e^(a+bi) = e^a*(cos(b)+sin(b)i)
-      so tanh=(((cos(b)+sin(b)i)e^a - (cos(-b)+sin(-b)i)e^-a)) /
-              (((cos(b)+sin(b)i)e^a + (cos(-b)+sin(-b)i)e^-a))
-      cos(b)=cos(-b), sin(-b)=-sin(b)
-      so tanh=(((cos(b)+sin(b)i)e^a - (cos(b)-sin(b)i)e^-a)) /
-              (((cos(b)+sin(b)i)e^a + (cos(b)-sin(b)i)e^-a))
-             =(cos(b)e^a+i*sin(b)e^a + cos(b)(-e^-a)+i*sin(b)e^-a) /
-              (cos(b)e^a+i*sin(b)e^a + cos(b)e^-a+i*sin(b)(-e^-a))
-             =(cos(b)(e^a-e^-a) + i*sin(b)(e^a+e^-a)) /
-              (cos(b)(e^a+e^-a) + i*sin(b)(e^a-e^-a))
-      This is a complex division, so we can multiply by denom_conj/denom_conj
-             =(cos(b)(e^a-e^-a) + i*sin(b)(e^a+e^-a)) *
-              (cos(b)(e^a+e^-a) - i*sin(b)(e^a-e^-a)) /
-              ((cos(b)(e^a+e^-a))^2 + (sin(b)(e^a-e^-a))^2)
-             =(cos(b)^2(e^(2a)-e^(-2a)) + sin(b)^2(e^(2a)-e^(-2a)) +
-               i*(cos(b)sin(b)(e^a+e^-a)^2 - cos(b)sin(b)(e^a-e^-a)^2)) /
-              ((cos(b)(e^a+e^-a))^2 + (sin(b)(e^a-e^-a))^2)
-      */
-      auto a = EmitExtractReal(operand_value);
-      auto b = EmitExtractImag(operand_value);
-      TF_ASSIGN_OR_RETURN(
-          auto exp_a, EmitLibdeviceMathCall("__nv_exp", {a}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto cos_b, EmitLibdeviceMathCall("__nv_cos", {b}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto sin_b, EmitLibdeviceMathCall("__nv_sin", {b}, {component_type},
-                                            component_type));
-      auto exp_neg_a = ir_builder_->CreateFDiv(
-          llvm::ConstantFP::get(exp_a->getType(), 1), exp_a);
-      auto exp_2a_minus_exp_neg_2a = ir_builder_->CreateFSub(
-          ir_builder_->CreateFMul(exp_a, exp_a),
-          ir_builder_->CreateFMul(exp_neg_a, exp_neg_a));
-      auto cos_b_sq = ir_builder_->CreateFMul(cos_b, cos_b);
-      auto sin_b_sq = ir_builder_->CreateFMul(sin_b, sin_b);
-      auto real_num = ir_builder_->CreateFAdd(
-          ir_builder_->CreateFMul(cos_b_sq, exp_2a_minus_exp_neg_2a),
-          ir_builder_->CreateFMul(sin_b_sq, exp_2a_minus_exp_neg_2a));
-      auto cos_b_sin_b = ir_builder_->CreateFMul(cos_b, sin_b);
-      auto exp_a_plus_exp_neg_a = ir_builder_->CreateFAdd(exp_a, exp_neg_a);
-      auto exp_a_plus_exp_neg_a_sq =
-          ir_builder_->CreateFMul(exp_a_plus_exp_neg_a, exp_a_plus_exp_neg_a);
-      auto exp_a_minus_exp_neg_a = ir_builder_->CreateFSub(exp_a, exp_neg_a);
-      auto exp_a_minus_exp_neg_a_sq =
-          ir_builder_->CreateFMul(exp_a_minus_exp_neg_a, exp_a_minus_exp_neg_a);
-      auto imag_num = ir_builder_->CreateFMul(
-          cos_b_sin_b, ir_builder_->CreateFSub(exp_a_plus_exp_neg_a_sq,
-                                               exp_a_minus_exp_neg_a_sq));
-      auto denom = ir_builder_->CreateFAdd(
-          ir_builder_->CreateFMul(cos_b_sq, exp_a_plus_exp_neg_a_sq),
-          ir_builder_->CreateFMul(sin_b_sq, exp_a_minus_exp_neg_a_sq));
-      return EmitComposeComplex(op, ir_builder_->CreateFDiv(real_num, denom),
-                                ir_builder_->CreateFDiv(imag_num, denom));
-    }
-    default:
-      return ElementalIrEmitter::EmitComplexUnaryOp(op, operand_value);
-  }
-}
-
 llvm::Value* GpuElementalIrEmitter::EmitDeviceFunctionCall(
     const string& callee_name,
     tensorflow::gtl::ArraySlice<llvm::Value*> operands,
diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
index 6a537d0152..77d4569b1e 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
@@ -54,20 +54,31 @@ class GpuElementalIrEmitter : public ElementalIrEmitter {
   StatusOr<llvm::Value*> EmitFloatUnaryOp(
       const HloInstruction* op, llvm::Value* operand_value) const override;
 
-  StatusOr<llvm::Value*> EmitComplexUnaryOp(
-      const HloInstruction* op, llvm::Value* operand_value) const override;
-
   StatusOr<llvm::Value*> EmitFloatBinaryOp(
       const HloInstruction* op, llvm::Value* lhs_value,
       llvm::Value* rhs_value) const override;
 
-  StatusOr<llvm::Value*> EmitComplexBinaryOp(
-      const HloInstruction* op, llvm::Value* lhs_value,
-      llvm::Value* rhs_value) const override;
-
   StatusOr<llvm::Value*> EmitErfcInv(PrimitiveType prim_type,
                                      llvm::Value* value) const override;
 
+  StatusOr<llvm::Value*> EmitLog(PrimitiveType prim_type,
+                                 llvm::Value* value) const override;
+
+  StatusOr<llvm::Value*> EmitSin(PrimitiveType prim_type,
+                                 llvm::Value* value) const override;
+
+  StatusOr<llvm::Value*> EmitCos(PrimitiveType prim_type,
+                                 llvm::Value* value) const override;
+
+  StatusOr<llvm::Value*> EmitExp(PrimitiveType prim_type,
+                                 llvm::Value* value) const override;
+
+  StatusOr<llvm::Value*> EmitPow(PrimitiveType prim_type, llvm::Value* lhs,
+                                 llvm::Value* rhs) const override;
+
+  StatusOr<llvm::Value*> EmitAtan2(PrimitiveType prim_type, llvm::Value* lhs,
+                                   llvm::Value* rhs) const override;
+
   llvm::Value* EmitThreadId() const override;
 
  private:
-- 
GitLab


From 7316cf1bb842212480afac6a3b4ee5c4d8736bfb Mon Sep 17 00:00:00 2001
From: Julian Niedermeier <jpniedermeier@gmail.com>
Date: Tue, 19 Dec 2017 10:30:01 -0800
Subject: [PATCH 1211/1225] Reverting changes in
 69c324591ba4dfeafb403ee59de56ffe063c1e94 to fix no gradient for argmax bug

---
 tensorflow/contrib/seq2seq/python/ops/helper.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/seq2seq/python/ops/helper.py b/tensorflow/contrib/seq2seq/python/ops/helper.py
index dec03ce43f..ef3722ee41 100644
--- a/tensorflow/contrib/seq2seq/python/ops/helper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/helper.py
@@ -223,7 +223,8 @@ class TrainingHelper(Helper):
 
   def sample(self, time, outputs, name=None, **unused_kwargs):
     with ops.name_scope(name, "TrainingHelperSample", [time, outputs]):
-      sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
+      sample_ids = math_ops.cast(
+          math_ops.argmax(outputs, axis=-1), dtypes.int32)
       return sample_ids
 
   def next_inputs(self, time, outputs, state, name=None, **unused_kwargs):
-- 
GitLab


From 47c0d45b042714b510eb4be8148968e3536f96f2 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 19 Dec 2017 19:04:56 +0000
Subject: [PATCH 1212/1225] Rerun tensorflow/core/api_def/update_api_def.sh

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../api_def/base_api/api_def_DecodeCompressed.pbtxt    | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt b/tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt
index 63ff608630..9babd82293 100644
--- a/tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt
@@ -20,5 +20,13 @@ A scalar containing either (i) the empty string (no
 compression), (ii) "ZLIB", or (iii) "GZIP".
 END
   }
-  summary: "Decompress the bytes of a string to the output string."
+  summary: "Decompress strings."
+  description: <<END
+This op decompresses each element of the `bytes` input `Tensor`, which
+is assumed to be compressed using the given `compression_type`.
+
+The `output` is a string `Tensor` of the same shape as `bytes`,
+each element containing the decompressed data from the corresponding
+element in `bytes`.
+END
 }
-- 
GitLab


From 37b5747cc7db07d59a4cac61a4c93ac17bf1579b Mon Sep 17 00:00:00 2001
From: Guangda Lai <laigd@google.com>
Date: Tue, 19 Dec 2017 11:14:38 -0800
Subject: [PATCH 1213/1225] Fix comment.

PiperOrigin-RevId: 179578952
---
 tensorflow/core/grappler/optimizers/graph_optimizer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer.h b/tensorflow/core/grappler/optimizers/graph_optimizer.h
index 55a90dce88..42d9837312 100644
--- a/tensorflow/core/grappler/optimizers/graph_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/graph_optimizer.h
@@ -41,7 +41,7 @@ class GraphOptimizer {
                           GraphDef* optimized_graph) = 0;
 
   // Method invoked by the framework so that it can provide feedback
-  // on how well the "optimize_output" (produced as *output from a
+  // on how well the "optimized_graph" (produced as *optimized_graph from a
   // call to Optimize) performed.  Lower "result" scores are better.
   virtual void Feedback(Cluster* cluster, const GrapplerItem& item,
                         const GraphDef& optimized_graph, double result) = 0;
-- 
GitLab


From 8698fa938ffdea982062ba7e284df49675f6ae84 Mon Sep 17 00:00:00 2001
From: Loo Rong Jie <loorongjie@gmail.com>
Date: Tue, 19 Dec 2017 18:27:26 +0800
Subject: [PATCH 1214/1225] [XLA/tfcompile] Add Env::CreateTempFilename and use
 it in SaveGraph

---
 .../compiler/xla/service/hlo_graph_dumper.cc  |  9 ++-
 tensorflow/core/platform/env.cc               | 57 +++++++++++--------
 tensorflow/core/platform/env.h                |  7 ++-
 tensorflow/core/platform/env_test.cc          | 15 +++++
 4 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index e534cb2140..c13fcfb4b4 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1354,18 +1354,17 @@ string SaveGraph(const string& graph,
       break;
   }
   string path = JoinPath(
-      dest_path, StrCat("hlo_graph_", output_num++, ".XXXXXX", file_extension));
+      dest_path, StrCat("hlo_graph_", output_num++, "."));
   auto status = Status::OK();
-  int fd = mkstemps(&path[0], file_extension.length());
-  if (fd < 0) {
+  auto env = tensorflow::Env::Default();
+  if (!env->CreateTempFilename(&path, file_extension)) {
     status =
         Status(tensorflow::error::Code::UNKNOWN,
                StrCat("Failed to create temporary file to dump HLO graph: ",
                       strerror(errno)));
   } else {
     status =
-        tensorflow::WriteStringToFile(tensorflow::Env::Default(), path, graph);
-    close(fd);
+        tensorflow::WriteStringToFile(env, path, graph);
   }
   if (!status.ok()) {
     LOG(WARNING) << "Saving HLO graph failed: " << status;
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index 316919aa1c..4ad75165e9 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -300,36 +300,47 @@ bool Env::LocalTempFilename(string* filename) {
   // Try each directory, as they might be full, have inappropriate
   // permissions or have different problems at times.
   for (const string& dir : dirs) {
+    *filename = io::JoinPath(dir, "tempfile-");
+    if (CreateTempFilename(filename, "")) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Env::CreateTempFilename(string* prefix, const string& suffix) {
 #ifdef __APPLE__
-    uint64_t tid64;
-    pthread_threadid_np(nullptr, &tid64);
-    int32 tid = static_cast<int32>(tid64);
-    int32 pid = static_cast<int32>(getpid());
+  uint64_t tid64;
+  pthread_threadid_np(nullptr, &tid64);
+  int32 tid = static_cast<int32>(tid64);
+  int32 pid = static_cast<int32>(getpid());
 #elif defined(__FreeBSD__)
-    // Has to be casted to long first, else this error appears:
-    // static_cast from 'pthread_t' (aka 'pthread *') to 'int32' (aka 'int')
-    // is not allowed
-    int32 tid = static_cast<int32>((long) pthread_self());
-    int32 pid = static_cast<int32>(getpid());
+  // Has to be casted to long first, else this error appears:
+  // static_cast from 'pthread_t' (aka 'pthread *') to 'int32' (aka 'int')
+  // is not allowed
+  int32 tid = static_cast<int32>((long) pthread_self());
+  int32 pid = static_cast<int32>(getpid());
 #elif defined(PLATFORM_WINDOWS)
-    int32 tid = static_cast<int32>(GetCurrentThreadId());
-    int32 pid = static_cast<int32>(GetCurrentProcessId());
+  int32 tid = static_cast<int32>(GetCurrentThreadId());
+  int32 pid = static_cast<int32>(GetCurrentProcessId());
 #else
-    int32 tid = static_cast<int32>(pthread_self());
-    int32 pid = static_cast<int32>(getpid());
+  int32 tid = static_cast<int32>(pthread_self());
+  int32 pid = static_cast<int32>(getpid());
 #endif
-    uint64 now_microsec = NowMicros();
+  uint64 now_microsec = NowMicros();
 
-    *filename = io::JoinPath(
-        dir, strings::Printf("tempfile-%s-%x-%d-%llx", port::Hostname().c_str(),
-                             tid, pid, now_microsec));
-    if (FileExists(*filename).ok()) {
-      filename->clear();
-    } else {
-      return true;
-    }
+  *prefix += strings::Printf("%s-%x-%d-%llx", port::Hostname().c_str(),
+                           tid, pid, now_microsec);
+
+  if (suffix.size()) {
+    *prefix += suffix;
+  }
+  if (FileExists(*prefix).ok()) {
+    prefix->clear();
+    return false;
+  } else {
+    return true;
   }
-  return false;
 }
 
 Thread::~Thread() {}
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index da8c3e2d7e..970a6c54d7 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -218,6 +218,10 @@ class Env {
   /// Creates a local unique temporary file name. Returns true if success.
   bool LocalTempFilename(string* filename);
 
+  /// Create a unique file name starts with |prefix| and ends with |suffix|.
+  /// Returns true if success.
+  bool CreateTempFilename(string* prefix, const string& suffix);
+
   // TODO(jeff,sanjay): Add back thread/thread-pool support if needed.
   // TODO(jeff,sanjay): if needed, tighten spec so relative to epoch, or
   // provide a routine to get the absolute time.
@@ -279,7 +283,7 @@ class Env {
   // "version" should be the version of the library or NULL
   // returns the name that LoadLibrary() can use
   virtual string FormatLibraryFileName(const string& name,
-      const string& version) = 0;
+                                       const string& version) = 0;
 
  private:
   // Returns a possible list of local temporary directories.
@@ -346,6 +350,7 @@ class EnvWrapper : public Env {
                                const string& version) override {
     return target_->FormatLibraryFileName(name, version);
   }
+
  private:
   Env* target_;
 };
diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index c9b362f182..041b4118ff 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -340,4 +341,18 @@ TEST_F(DefaultEnvTest, LocalTempFilename) {
   EXPECT_FALSE(env->FileExists(filename).ok());
 }
 
+TEST_F(DefaultEnvTest, CreateTempFilename) {
+  Env* env = Env::Default();
+
+  string prefix = "tempfile-prefix-";
+  string suffix = ".tmp";
+  string filename = prefix;
+
+  EXPECT_TRUE(env->CreateTempFilename(&filename, suffix));
+
+  StringPiece str(filename);
+  EXPECT_TRUE(str.starts_with(prefix));
+  EXPECT_TRUE(str.ends_with(suffix));
+}
+
 }  // namespace tensorflow
-- 
GitLab


From a944978c391189c6852de15c91d86aefd7cbd3ae Mon Sep 17 00:00:00 2001
From: Loo Rong Jie <loorongjie@gmail.com>
Date: Wed, 20 Dec 2017 08:46:11 +0800
Subject: [PATCH 1215/1225] Address comments

---
 tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 2 +-
 tensorflow/core/platform/env.cc                     | 4 ++--
 tensorflow/core/platform/env.h                      | 6 +++---
 tensorflow/core/platform/env_test.cc                | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index c13fcfb4b4..c33c5af834 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1357,7 +1357,7 @@ string SaveGraph(const string& graph,
       dest_path, StrCat("hlo_graph_", output_num++, "."));
   auto status = Status::OK();
   auto env = tensorflow::Env::Default();
-  if (!env->CreateTempFilename(&path, file_extension)) {
+  if (!env->CreateLocalFilename(&path, file_extension)) {
     status =
         Status(tensorflow::error::Code::UNKNOWN,
                StrCat("Failed to create temporary file to dump HLO graph: ",
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index 4ad75165e9..563c9bee78 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -301,14 +301,14 @@ bool Env::LocalTempFilename(string* filename) {
   // permissions or have different problems at times.
   for (const string& dir : dirs) {
     *filename = io::JoinPath(dir, "tempfile-");
-    if (CreateTempFilename(filename, "")) {
+    if (CreateLocalFilename(filename, "")) {
       return true;
     }
   }
   return false;
 }
 
-bool Env::CreateTempFilename(string* prefix, const string& suffix) {
+bool Env::CreateLocalFilename(string* prefix, const string& suffix) {
 #ifdef __APPLE__
   uint64_t tid64;
   pthread_threadid_np(nullptr, &tid64);
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 970a6c54d7..a0d38a0e2f 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -218,9 +218,9 @@ class Env {
   /// Creates a local unique temporary file name. Returns true if success.
   bool LocalTempFilename(string* filename);
 
-  /// Create a unique file name starts with |prefix| and ends with |suffix|.
-  /// Returns true if success.
-  bool CreateTempFilename(string* prefix, const string& suffix);
+  /// Creates a local unique file name that starts with |prefix| and ends with
+  /// |suffix|. Returns true if success.
+  bool CreateLocalFilename(string* prefix, const string& suffix);
 
   // TODO(jeff,sanjay): Add back thread/thread-pool support if needed.
   // TODO(jeff,sanjay): if needed, tighten spec so relative to epoch, or
diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index 041b4118ff..6a2ae04c16 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -341,14 +341,14 @@ TEST_F(DefaultEnvTest, LocalTempFilename) {
   EXPECT_FALSE(env->FileExists(filename).ok());
 }
 
-TEST_F(DefaultEnvTest, CreateTempFilename) {
+TEST_F(DefaultEnvTest, CreateLocalFilename) {
   Env* env = Env::Default();
 
   string prefix = "tempfile-prefix-";
   string suffix = ".tmp";
   string filename = prefix;
 
-  EXPECT_TRUE(env->CreateTempFilename(&filename, suffix));
+  EXPECT_TRUE(env->CreateLocalFilename(&filename, suffix));
 
   StringPiece str(filename);
   EXPECT_TRUE(str.starts_with(prefix));
-- 
GitLab


From 8a7b8fd3cf97682edf681e1927fc996ee70ba7e7 Mon Sep 17 00:00:00 2001
From: Loo Rong Jie <loorongjie@gmail.com>
Date: Wed, 20 Dec 2017 09:08:45 +0800
Subject: [PATCH 1216/1225] Change name to CreateNewFileWithUniqueName

---
 tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 2 +-
 tensorflow/core/platform/env.cc                     | 4 ++--
 tensorflow/core/platform/env.h                      | 2 +-
 tensorflow/core/platform/env_test.cc                | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index c33c5af834..cf22212c28 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1357,7 +1357,7 @@ string SaveGraph(const string& graph,
       dest_path, StrCat("hlo_graph_", output_num++, "."));
   auto status = Status::OK();
   auto env = tensorflow::Env::Default();
-  if (!env->CreateLocalFilename(&path, file_extension)) {
+  if (!env->CreateNewFileWithUniqueName(&path, file_extension)) {
     status =
         Status(tensorflow::error::Code::UNKNOWN,
                StrCat("Failed to create temporary file to dump HLO graph: ",
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index 563c9bee78..6716f0702c 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -301,14 +301,14 @@ bool Env::LocalTempFilename(string* filename) {
   // permissions or have different problems at times.
   for (const string& dir : dirs) {
     *filename = io::JoinPath(dir, "tempfile-");
-    if (CreateLocalFilename(filename, "")) {
+    if (CreateNewFileWithUniqueName(filename, "")) {
       return true;
     }
   }
   return false;
 }
 
-bool Env::CreateLocalFilename(string* prefix, const string& suffix) {
+bool Env::CreateNewFileWithUniqueName(string* prefix, const string& suffix) {
 #ifdef __APPLE__
   uint64_t tid64;
   pthread_threadid_np(nullptr, &tid64);
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index a0d38a0e2f..1d3b9afe0c 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -220,7 +220,7 @@ class Env {
 
   /// Creates a local unique file name that starts with |prefix| and ends with
   /// |suffix|. Returns true if success.
-  bool CreateLocalFilename(string* prefix, const string& suffix);
+  bool CreateNewFileWithUniqueName(string* prefix, const string& suffix);
 
   // TODO(jeff,sanjay): Add back thread/thread-pool support if needed.
   // TODO(jeff,sanjay): if needed, tighten spec so relative to epoch, or
diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index 6a2ae04c16..bf166002d1 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -341,14 +341,14 @@ TEST_F(DefaultEnvTest, LocalTempFilename) {
   EXPECT_FALSE(env->FileExists(filename).ok());
 }
 
-TEST_F(DefaultEnvTest, CreateLocalFilename) {
+TEST_F(DefaultEnvTest, CreateNewFileWithUniqueName) {
   Env* env = Env::Default();
 
   string prefix = "tempfile-prefix-";
   string suffix = ".tmp";
   string filename = prefix;
 
-  EXPECT_TRUE(env->CreateLocalFilename(&filename, suffix));
+  EXPECT_TRUE(env->CreateNewFileWithUniqueName(&filename, suffix));
 
   StringPiece str(filename);
   EXPECT_TRUE(str.starts_with(prefix));
-- 
GitLab


From d97d2b2183b6ebc18f03625f10dfab60f6e5134c Mon Sep 17 00:00:00 2001
From: Loo Rong Jie <loorongjie@gmail.com>
Date: Wed, 20 Dec 2017 09:10:17 +0800
Subject: [PATCH 1217/1225] Revert clang-format

---
 tensorflow/core/platform/env.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 1d3b9afe0c..b806a79a29 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -283,7 +283,7 @@ class Env {
   // "version" should be the version of the library or NULL
   // returns the name that LoadLibrary() can use
   virtual string FormatLibraryFileName(const string& name,
-                                       const string& version) = 0;
+      const string& version) = 0;
 
  private:
   // Returns a possible list of local temporary directories.
@@ -350,7 +350,6 @@ class EnvWrapper : public Env {
                                const string& version) override {
     return target_->FormatLibraryFileName(name, version);
   }
-
  private:
   Env* target_;
 };
-- 
GitLab


From f698dd8b40ae2e602b80aed1f4a8d74d3e878f3d Mon Sep 17 00:00:00 2001
From: Loo Rong Jie <loorongjie@gmail.com>
Date: Wed, 20 Dec 2017 09:53:54 +0800
Subject: [PATCH 1218/1225] Change name to CreateUniqueFileName

---
 tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 2 +-
 tensorflow/core/platform/env.cc                     | 4 ++--
 tensorflow/core/platform/env.h                      | 2 +-
 tensorflow/core/platform/env_test.cc                | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index cf22212c28..44db092085 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1357,7 +1357,7 @@ string SaveGraph(const string& graph,
       dest_path, StrCat("hlo_graph_", output_num++, "."));
   auto status = Status::OK();
   auto env = tensorflow::Env::Default();
-  if (!env->CreateNewFileWithUniqueName(&path, file_extension)) {
+  if (!env->CreateUniqueFileName(&path, file_extension)) {
     status =
         Status(tensorflow::error::Code::UNKNOWN,
                StrCat("Failed to create temporary file to dump HLO graph: ",
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index 6716f0702c..9a7725da94 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -301,14 +301,14 @@ bool Env::LocalTempFilename(string* filename) {
   // permissions or have different problems at times.
   for (const string& dir : dirs) {
     *filename = io::JoinPath(dir, "tempfile-");
-    if (CreateNewFileWithUniqueName(filename, "")) {
+    if (CreateUniqueFileName(filename, "")) {
       return true;
     }
   }
   return false;
 }
 
-bool Env::CreateNewFileWithUniqueName(string* prefix, const string& suffix) {
+bool Env::CreateUniqueFileName(string* prefix, const string& suffix) {
 #ifdef __APPLE__
   uint64_t tid64;
   pthread_threadid_np(nullptr, &tid64);
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index b806a79a29..a0adf70ef4 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -220,7 +220,7 @@ class Env {
 
   /// Creates a local unique file name that starts with |prefix| and ends with
   /// |suffix|. Returns true if success.
-  bool CreateNewFileWithUniqueName(string* prefix, const string& suffix);
+  bool CreateUniqueFileName(string* prefix, const string& suffix);
 
   // TODO(jeff,sanjay): Add back thread/thread-pool support if needed.
   // TODO(jeff,sanjay): if needed, tighten spec so relative to epoch, or
diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index bf166002d1..233c370a5f 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -341,14 +341,14 @@ TEST_F(DefaultEnvTest, LocalTempFilename) {
   EXPECT_FALSE(env->FileExists(filename).ok());
 }
 
-TEST_F(DefaultEnvTest, CreateNewFileWithUniqueName) {
+TEST_F(DefaultEnvTest, CreateUniqueFileName) {
   Env* env = Env::Default();
 
   string prefix = "tempfile-prefix-";
   string suffix = ".tmp";
   string filename = prefix;
 
-  EXPECT_TRUE(env->CreateNewFileWithUniqueName(&filename, suffix));
+  EXPECT_TRUE(env->CreateUniqueFileName(&filename, suffix));
 
   StringPiece str(filename);
   EXPECT_TRUE(str.starts_with(prefix));
-- 
GitLab


From dac28e61affcaf7dbeadc3ed3d21257261ad19ee Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Wed, 20 Dec 2017 15:38:28 +0800
Subject: [PATCH 1219/1225] fix typos

---
 tensorflow/core/grappler/optimizers/dependency_optimizer.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 6cc50845b3..62cebaef76 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -51,7 +51,7 @@ int RemoveInput(NodeDef* node, const string& input, NodeMap* node_map) {
   return num_removed;
 }
 
-// Remove dulicate control inputs.
+// Remove duplicate control inputs.
 void PruneControlInputs(NodeDef* node) {
   std::unordered_set<string> inputs;
   int pos = 0;
-- 
GitLab


From a29fdd0104748321b60802fd4bfb8a7dd06b0bc5 Mon Sep 17 00:00:00 2001
From: Loo Rong Jie <loorongjie@gmail.com>
Date: Wed, 20 Dec 2017 16:02:27 +0800
Subject: [PATCH 1220/1225] [XLA] Use os.path for path manipulation

---
 tensorflow/compiler/aot/tests/make_test_graphs.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py
index a898eab1d1..89c7cd4507 100644
--- a/tensorflow/compiler/aot/tests/make_test_graphs.py
+++ b/tensorflow/compiler/aot/tests/make_test_graphs.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import os
 import sys
 
 from tensorflow.core.protobuf import saver_pb2
@@ -53,7 +54,7 @@ def tfadd_with_ckpt(out_dir):
     sess.run(init_op)
     sess.run(y.assign(y + 42))
     # Without the checkpoint, the variable won't be set to 42.
-    ckpt = '%s/test_graph_tfadd_with_ckpt.ckpt' % out_dir
+    ckpt = os.path.join(out_dir, 'test_graph_tfadd_with_ckpt.ckpt')
     saver.save(sess, ckpt)
 
 
@@ -68,10 +69,10 @@ def tfadd_with_ckpt_saver(out_dir):
     sess.run(init_op)
     sess.run(y.assign(y + 42))
     # Without the checkpoint, the variable won't be set to 42.
-    ckpt_file = '%s/test_graph_tfadd_with_ckpt_saver.ckpt' % out_dir
+    ckpt_file = os.path.join(out_dir, 'test_graph_tfadd_with_ckpt_saver.ckpt')
     saver.save(sess, ckpt_file)
     # Without the SaverDef, the restore op won't be named correctly.
-    saver_file = '%s/test_graph_tfadd_with_ckpt_saver.saver' % out_dir
+    saver_file = os.path.join(out_dir, 'test_graph_tfadd_with_ckpt_saver.saver')
     with open(saver_file, 'wb') as f:
       f.write(saver.as_saver_def().SerializeToString())
 
@@ -129,7 +130,7 @@ def write_graph(build_graph, out_dir):
   g = ops.Graph()
   with g.as_default():
     build_graph(out_dir)
-    filename = '%s/test_graph_%s.pb' % (out_dir, build_graph.__name__)
+    filename = os.path.join(out_dir, 'test_graph_%s.pb' % build_graph.__name__)
     with open(filename, 'wb') as f:
       f.write(g.as_graph_def().SerializeToString())
 
-- 
GitLab


From b5ee8eb4d8d6f2baa7e602809c6057cda27d77ea Mon Sep 17 00:00:00 2001
From: ManHyuk <manhyuk@kw.ac.kr>
Date: Wed, 20 Dec 2017 19:09:07 +0900
Subject: [PATCH 1221/1225] fix typos

---
 .../gen/java/org/tensorflow/processor/OperatorProcessor.java    | 2 +-
 tensorflow/python/debug/wrappers/grpc_wrapper.py                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/java/src/gen/java/org/tensorflow/processor/OperatorProcessor.java b/tensorflow/java/src/gen/java/org/tensorflow/processor/OperatorProcessor.java
index 45e42878c7..11fda4fc22 100644
--- a/tensorflow/java/src/gen/java/org/tensorflow/processor/OperatorProcessor.java
+++ b/tensorflow/java/src/gen/java/org/tensorflow/processor/OperatorProcessor.java
@@ -77,7 +77,7 @@ public final class OperatorProcessor extends AbstractProcessor {
     TypeElement annotation = annotations.iterator().next();
     Set<? extends Element> annotated = roundEnv.getElementsAnnotatedWith(annotation);
 
-    // If there are no annotated elements, claim the annotion but do nothing.
+    // If there are no annotated elements, claim the annotation but do nothing.
     if (annotated.size() == 0) {
       return true;
     }
diff --git a/tensorflow/python/debug/wrappers/grpc_wrapper.py b/tensorflow/python/debug/wrappers/grpc_wrapper.py
index 16b2018b41..c3bd1da7aa 100644
--- a/tensorflow/python/debug/wrappers/grpc_wrapper.py
+++ b/tensorflow/python/debug/wrappers/grpc_wrapper.py
@@ -99,7 +99,7 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
 
 
 class TensorBoardDebugWrapperSession(GrpcDebugWrapperSession):
-  """A tfdbg Session wrapper that can be used with TensroBoard Debugger Plugin.
+  """A tfdbg Session wrapper that can be used with TensorBoard Debugger Plugin.
 
   This wrapper is the same as `GrpcDebugWrapperSession`, except that it uses a
     predefined `watch_fn` that
-- 
GitLab


From 3958c4d8d60af7cbde734bab2c1aa3ab6ee456af Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Wed, 20 Dec 2017 09:50:10 -0800
Subject: [PATCH 1222/1225] Clear the decref cache after calls to py_func to
 ensure no leaks

---
 tensorflow/python/lib/core/py_func.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc
index eae1c2eea6..dc56b39486 100644
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -432,6 +432,9 @@ class PyFuncOp : public OpKernel {
     py_threadstate = PyGILState_Ensure();
     bool log_on_error;
     Status s = DoCallPyFunc(&call, &log_on_error);
+    // Sometimes py_funcs can be called without a session and leak memory. This
+    // ensures we clear the decref cache so this doesn't happen.
+    ClearDecrefCache();
     PyGILState_Release(py_threadstate);
 
     // Ensures that GIL is released even when !s.ok().
-- 
GitLab


From 54712744d4b8add6c6e910e3acb2d2fff94922bb Mon Sep 17 00:00:00 2001
From: Aiden Scandella <ai@uber.com>
Date: Wed, 20 Dec 2017 13:17:30 -0500
Subject: [PATCH 1223/1225] Fix Golang readme installation instruction
 formatting

This looks like it got pasted in incorrectly
---
 tensorflow/go/README.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/go/README.md b/tensorflow/go/README.md
index 376e22b380..b1bd87eb0c 100644
--- a/tensorflow/go/README.md
+++ b/tensorflow/go/README.md
@@ -26,9 +26,12 @@ from source.
     ([Linux](https://www.tensorflow.org/install/install_sources#PrepareLinux)
     or [OS
     X](https://www.tensorflow.org/install/install_sources#PrepareMac)).
-    If you don't need GPU support, then try the following: `sh # Linux sudo
-    apt-get install python swig python-numpy # OS X with homebrew brew install
-    swig`
+    If you don't need GPU support, then try the following:
+
+    ```sh
+    sudo apt-get install python swig python-numpy # Linux
+    brew install swig                             # OS X with homebrew
+    ```
 
 ### Build
 
-- 
GitLab


From dc355dc491836f1202a2c3fcef0a9da6902fd7da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Thu, 21 Dec 2017 03:40:04 +0800
Subject: [PATCH 1224/1225] C++ gradient for Select (#14862)

* ENH: add SelectGrad

* TST: add test case

* TST: clean code

* CLN: grad_scope -> scope

* CLN: x_shape, y_shape -> shape
---
 tensorflow/cc/gradients/math_grad.cc      | 18 ++++++++++++++++++
 tensorflow/cc/gradients/math_grad_test.cc |  8 ++++++++
 2 files changed, 26 insertions(+)

diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc
index d7446b9560..ebc0c77828 100644
--- a/tensorflow/cc/gradients/math_grad.cc
+++ b/tensorflow/cc/gradients/math_grad.cc
@@ -728,6 +728,24 @@ Status LgammaGrad(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("Lgamma", LgammaGrad);
 
+Status SelectGrad(const Scope& scope, const Operation& op,
+                  const std::vector<Output>& grad_inputs,
+                  std::vector<Output>* grad_outputs) {
+  auto comparator = op.input(0);
+  auto x = op.input(1);
+  auto zeros = ZerosLike(scope, x);
+  auto grad = grad_inputs[0];
+
+  auto gx_1 = Where3(scope, comparator, grad, zeros);
+  auto gx_2 = Where3(scope, comparator, zeros, grad);
+
+  grad_outputs->push_back(NoGradient());
+  grad_outputs->push_back(gx_1);
+  grad_outputs->push_back(gx_2);
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("Select", SelectGrad);
+
 Status MinOrMaxGrad(const Scope& scope, const Operation& op,
                     const std::vector<Output>& grad_inputs,
                     std::vector<Output>* grad_outputs) {
diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc
index 6313f41da5..29def3c3ea 100644
--- a/tensorflow/cc/gradients/math_grad_test.cc
+++ b/tensorflow/cc/gradients/math_grad_test.cc
@@ -865,5 +865,13 @@ TEST_F(NaryGradTest, Minimum) {
   RunTest(x, x_init_value, y, shape);
 }
 
+TEST_F(NaryGradTest, Select) {
+  TensorShape shape({3, 4});
+  auto x1 = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
+  auto x2 = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
+  auto y = Where3(scope_, Greater(scope_, x1, x2), x1, x2);
+  RunTest({x1, x2}, {shape, shape}, {y}, {shape});
+}
+
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From d4c33cfc944cbcac7a31811ba8f86448d8cc2e23 Mon Sep 17 00:00:00 2001
From: Anush Elangovan <anush@nod-labs.com>
Date: Fri, 8 Dec 2017 20:48:23 -0800
Subject: [PATCH 1225/1225] [XLA] Fix XLA/tfcompile compile error on OSX

This fixes issue #15196

kernel_support_library.cc:99:5: error: no matching function
for call to 'transform' std::transform(function->arg_begin(),..

TEST=build tfcompile on OSX (also requires PR#14893)
---
 .../xla/service/llvm_ir/kernel_support_library.cc     | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
index 5f6f9810c3..23d2d4e87d 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
@@ -111,8 +111,15 @@ void KernelSupportLibrary::EmitAndCallOutlinedKernel(
     ir_builder->SetInsertPoint(return_inst);
 
     std::vector<llvm::Value*> arg_values;
-    std::transform(function->arg_begin(), function->arg_end(),
-                   std::back_inserter(arg_values), std::addressof<llvm::Value>);
+    /*
+     * clang on OSX doesn't like std::transform or range for loop here.
+     * See https://github.com/tensorflow/tensorflow/issues/15196
+     */
+    for (llvm::Function::arg_iterator arg = function->arg_begin(),
+                                      arg_e = function->arg_end();
+         arg != arg_e; ++arg) {
+      arg_values.push_back(arg);
+    }
     if (null_arg_idx != -1) {
       arg_values.insert(arg_values.begin() + null_arg_idx, nullptr);
     }
-- 
GitLab